Repository: dmlc/dgl Branch: master Commit: 3d16000b4170 Files: 2167 Total size: 15.1 MB Directory structure: gitextract_7pa1_qyp/ ├── .clang-format ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── --work-item--dev-only-.md │ │ ├── bug-report.md │ │ ├── documentation.md │ │ ├── feature-request.md │ │ └── questions-help-support.md │ ├── PULL_REQUEST_TEMPLATE.md │ └── workflows/ │ ├── lint.yml │ └── stale.yml ├── .gitignore ├── .gitmodules ├── .lintrunner.toml ├── CMakeLists.txt ├── CONTRIBUTORS.md ├── Jenkinsfile ├── LICENSE ├── NEWS.md ├── README.md ├── apps/ │ └── life_sci/ │ └── README.md ├── benchmarks/ │ ├── .gitignore │ ├── Jenkinsfile │ ├── README.md │ ├── asv.conf.json │ ├── benchmarks/ │ │ ├── __init__.py │ │ ├── api/ │ │ │ ├── __init__.py │ │ │ ├── bench_add_self_loop.py │ │ │ ├── bench_batch.py │ │ │ ├── bench_builtin_apply_edges.py │ │ │ ├── bench_builtin_apply_edges_hetero.py │ │ │ ├── bench_builtin_multi_update_all.py │ │ │ ├── bench_builtin_update_all_coo.py │ │ │ ├── bench_builtin_update_all_csc.py │ │ │ ├── bench_edge_ids.py │ │ │ ├── bench_edge_subgraph.py │ │ │ ├── bench_find_edges.py │ │ │ ├── bench_format_conversion.py │ │ │ ├── bench_fused_sample_neighbors.py │ │ │ ├── bench_heterograph_construction.py │ │ │ ├── bench_homograph_edge_construction.py │ │ │ ├── bench_homograph_scipy_construction.py │ │ │ ├── bench_in_degrees.py │ │ │ ├── bench_in_edges.py │ │ │ ├── bench_in_subgraph.py │ │ │ ├── bench_khop.py │ │ │ ├── bench_knn_graph.py │ │ │ ├── bench_metis_partition.py │ │ │ ├── bench_nn_graphconv.py │ │ │ ├── bench_nn_heterographconv.py │ │ │ ├── bench_node_subgraph.py │ │ │ ├── bench_random_walk.py │ │ │ ├── bench_readout.py │ │ │ ├── bench_reverse.py │ │ │ ├── bench_sample_neighbors.py │ │ │ ├── bench_to_block.py │ │ │ ├── bench_udf_apply_edges.py │ │ │ ├── bench_udf_multi_update_all.py │ │ │ ├── bench_udf_update_all.py │ │ │ └── bench_unbatch.py │ │ ├── kernel/ │ │ │ ├── __init__.py │ │ │ ├── bench_edgesoftmax.py │ │ │ ├── bench_gsddmm_u_dot_v.py │ │ │ ├── bench_gspmm_copy_u.py │ │ │ └── bench_gspmm_u_mul_e_sum.py │ │ ├── model_acc/ │ │ │ ├── __init__.py │ │ │ ├── bench_gat.py │ │ │ ├── bench_gcn.py │ │ │ ├── bench_gcn_udf.py │ │ │ ├── bench_rgcn_base.py │ │ │ ├── bench_rgcn_ns.py │ │ │ ├── bench_sage.py │ │ │ └── bench_sage_ns.py │ │ ├── model_speed/ │ │ │ ├── __init__.py │ │ │ ├── bench_gat.py │ │ │ ├── bench_gat_ns.py │ │ │ ├── bench_gcn_udf.py │ │ │ ├── bench_pinsage.py │ │ │ ├── bench_rgcn_base.py │ │ │ ├── bench_rgcn_hetero_ns.py │ │ │ ├── bench_rgcn_homogeneous_ns.py │ │ │ ├── bench_sage.py │ │ │ ├── bench_sage_ns.py │ │ │ └── bench_sage_unsupervised_ns.py │ │ ├── multigpu/ │ │ │ ├── __init__.py │ │ │ ├── bench_multigpu_rgcn.py │ │ │ ├── bench_multigpu_sage.py │ │ │ └── rgcn_model.py │ │ ├── rgcn.py │ │ └── utils.py │ ├── run.sh │ ├── scripts/ │ │ ├── README.md │ │ ├── build_dgl_asv.sh │ │ ├── fix_ram_info.py │ │ ├── generate_excel.py │ │ ├── install_dgl_asv.sh │ │ ├── publish.sh │ │ ├── replace_branch.py │ │ └── torch_gpu_pip.txt │ └── task.json ├── cmake/ │ ├── modules/ │ │ ├── CUDA.cmake │ │ └── FindMETIS.cmake │ └── util/ │ ├── FindCUDA.cmake │ ├── MshadowUtil.cmake │ └── Util.cmake ├── conda/ │ └── dgl/ │ ├── README.md │ ├── bld.bat │ ├── build.sh │ ├── conda_build_config.yaml │ ├── meta.yaml │ ├── run_test.bat │ └── run_test.sh ├── dgl_sparse/ │ ├── CMakeLists.txt │ ├── build.bat │ ├── build.sh │ ├── find_cmake.py │ ├── include/ │ │ └── sparse/ │ │ ├── dgl_headers.h │ │ ├── elementwise_op.h │ │ ├── matrix_ops.h │ │ ├── reduction.h │ │ ├── sddmm.h │ │ ├── softmax.h │ │ ├── sparse_format.h │ │ ├── sparse_matrix.h │ │ ├── spmm.h │ │ └── spspmm.h │ └── src/ │ ├── cpu/ │ │ └── matrix_ops_impl.cc │ ├── elemenwise_op.cc │ ├── matmul.cc │ ├── matmul.h │ ├── matrix_ops.cc │ ├── matrix_ops_impl.h │ ├── python_binding.cc │ ├── reduction.cc │ ├── sddmm.cc │ ├── softmax.cc │ ├── sparse_format.cc │ ├── sparse_matrix.cc │ ├── sparse_matrix_coalesce.cc │ ├── spmm.cc │ ├── spspmm.cc │ └── utils.h ├── dglgo/ │ ├── README.md │ ├── dglgo/ │ │ ├── __init__.py │ │ ├── apply_pipeline/ │ │ │ ├── __init__.py │ │ │ ├── graphpred/ │ │ │ │ ├── __init__.py │ │ │ │ ├── gen.py │ │ │ │ └── graphpred.jinja-py │ │ │ ├── nodepred/ │ │ │ │ ├── __init__.py │ │ │ │ ├── gen.py │ │ │ │ └── nodepred.jinja-py │ │ │ └── nodepred_sample/ │ │ │ ├── __init__.py │ │ │ ├── gen.py │ │ │ └── nodepred-ns.jinja-py │ │ ├── cli/ │ │ │ ├── __init__.py │ │ │ ├── apply_cli.py │ │ │ ├── cli.py │ │ │ ├── config_apply_cli.py │ │ │ ├── config_cli.py │ │ │ ├── export_cli.py │ │ │ ├── recipe_cli.py │ │ │ └── train_cli.py │ │ ├── model/ │ │ │ ├── __init__.py │ │ │ ├── edge_encoder/ │ │ │ │ ├── __init__.py │ │ │ │ ├── bilinear.py │ │ │ │ ├── dot.py │ │ │ │ └── ele.py │ │ │ ├── graph_encoder/ │ │ │ │ ├── __init__.py │ │ │ │ ├── gin_ogbg.py │ │ │ │ └── pna.py │ │ │ └── node_encoder/ │ │ │ ├── __init__.py │ │ │ ├── gat.py │ │ │ ├── gcn.py │ │ │ ├── gin.py │ │ │ ├── sage.py │ │ │ └── sgc.py │ │ ├── pipeline/ │ │ │ ├── __init__.py │ │ │ ├── graphpred/ │ │ │ │ ├── __init__.py │ │ │ │ ├── gen.py │ │ │ │ └── graphpred.jinja-py │ │ │ ├── linkpred/ │ │ │ │ ├── __init__.py │ │ │ │ ├── gen.py │ │ │ │ └── linkpred.jinja-py │ │ │ ├── nodepred/ │ │ │ │ ├── __init__.py │ │ │ │ ├── gen.py │ │ │ │ └── nodepred.jinja-py │ │ │ └── nodepred_sample/ │ │ │ ├── __init__.py │ │ │ ├── gen.py │ │ │ └── nodepred-ns.jinja-py │ │ └── utils/ │ │ ├── __init__.py │ │ ├── base_model.py │ │ ├── early_stop.py │ │ ├── enter_config.py │ │ ├── factory.py │ │ └── yaml_dump.py │ ├── recipes/ │ │ ├── __init__.py │ │ ├── graphpred_hiv_gin.yaml │ │ ├── graphpred_hiv_pna.yaml │ │ ├── graphpred_pcba_gin.yaml │ │ ├── linkpred_citation2_sage.yaml │ │ ├── linkpred_collab_sage.yaml │ │ ├── linkpred_cora_sage.yaml │ │ ├── nodepred-ns_arxiv_gcn.yaml │ │ ├── nodepred-ns_product_sage.yaml │ │ ├── nodepred_citeseer_gat.yaml │ │ ├── nodepred_citeseer_gcn.yaml │ │ ├── nodepred_citeseer_sage.yaml │ │ ├── nodepred_cora_gat.yaml │ │ ├── nodepred_cora_gcn.yaml │ │ ├── nodepred_cora_sage.yaml │ │ ├── nodepred_pubmed_gat.yaml │ │ ├── nodepred_pubmed_gcn.yaml │ │ └── nodepred_pubmed_sage.yaml │ ├── setup.py │ └── tests/ │ ├── cfg.yml │ ├── run_test.sh │ └── test_pipeline.py ├── docker/ │ ├── Dockerfile.awscli │ ├── Dockerfile.ci_benchmark │ ├── Dockerfile.ci_cpu │ ├── Dockerfile.ci_gpu │ ├── Dockerfile.ci_lint │ ├── README.md │ ├── install/ │ │ ├── conda_env/ │ │ │ ├── kg_cpu.yml │ │ │ ├── kg_gpu.yml │ │ │ ├── mxnet_cpu.yml │ │ │ ├── mxnet_gpu.yml │ │ │ ├── tensorflow_cpu.yml │ │ │ ├── tensorflow_gpu.yml │ │ │ ├── torch_cpu.yml │ │ │ ├── torch_cpu_pip.txt │ │ │ ├── torch_gpu.yml │ │ │ └── torch_gpu_pip.txt │ │ ├── ubuntu_install_antlr.sh │ │ ├── ubuntu_install_build.sh │ │ ├── ubuntu_install_conda.sh │ │ ├── ubuntu_install_core.sh │ │ ├── ubuntu_install_java.sh │ │ ├── ubuntu_install_mxnet_cpu.sh │ │ ├── ubuntu_install_mxnet_gpu.sh │ │ ├── ubuntu_install_python.sh │ │ ├── ubuntu_install_python_package.sh │ │ ├── ubuntu_install_torch.sh │ │ └── ubuntu_install_torch_1.2.0.sh │ └── pods/ │ ├── ci-compile-cpu.yaml │ ├── ci-compile-gpu.yaml │ ├── ci-cpu.yaml │ ├── ci-gpu.yaml │ └── ci-lint.yaml ├── docs/ │ ├── .gitignore │ ├── Makefile │ ├── README.md │ ├── clean.sh │ ├── migrate-guide-0.5.md │ └── source/ │ ├── _static/ │ │ └── css/ │ │ └── custom.css │ ├── _templates/ │ │ ├── classtemplate.rst │ │ └── graphbolt_classtemplate.rst │ ├── api/ │ │ └── python/ │ │ ├── dgl.DGLGraph.rst │ │ ├── dgl.data.rst │ │ ├── dgl.dataloading.rst │ │ ├── dgl.distributed.rst │ │ ├── dgl.function.rst │ │ ├── dgl.geometry.rst │ │ ├── dgl.graphbolt.rst │ │ ├── dgl.multiprocessing.rst │ │ ├── dgl.ops.rst │ │ ├── dgl.optim.rst │ │ ├── dgl.rst │ │ ├── dgl.sampling.rst │ │ ├── dgl.sparse_v0.rst │ │ ├── index.rst │ │ ├── knn_benchmark.rst │ │ ├── nn-mxnet.rst │ │ ├── nn-pytorch.rst │ │ ├── nn-tensorflow.rst │ │ ├── nn.functional.rst │ │ ├── transforms.rst │ │ └── udf.rst │ ├── conf.py │ ├── contribute.rst │ ├── developer/ │ │ └── ffi.rst │ ├── env_var.rst │ ├── faq.rst │ ├── features/ │ │ └── dataset.rst │ ├── gen_dataset_stat.py │ ├── graphtransformer/ │ │ ├── data.rst │ │ ├── index.rst │ │ └── model.rst │ ├── guide/ │ │ ├── data-dataset.rst │ │ ├── data-download.rst │ │ ├── data-loadcsv.rst │ │ ├── data-loadogb.rst │ │ ├── data-process.rst │ │ ├── data-savenload.rst │ │ ├── data.rst │ │ ├── distributed-apis.rst │ │ ├── distributed-hetero.rst │ │ ├── distributed-partition.rst │ │ ├── distributed-preprocessing.rst │ │ ├── distributed-tools.rst │ │ ├── distributed.rst │ │ ├── graph-basic.rst │ │ ├── graph-external.rst │ │ ├── graph-feature.rst │ │ ├── graph-gpu.rst │ │ ├── graph-graphs-nodes-edges.rst │ │ ├── graph-heterogeneous.rst │ │ ├── graph.rst │ │ ├── index.rst │ │ ├── message-api.rst │ │ ├── message-efficient.rst │ │ ├── message-heterograph.rst │ │ ├── message-part.rst │ │ ├── message.rst │ │ ├── minibatch-custom-sampler.rst │ │ ├── minibatch-edge.rst │ │ ├── minibatch-gpu-sampling.rst │ │ ├── minibatch-inference.rst │ │ ├── minibatch-link.rst │ │ ├── minibatch-nn.rst │ │ ├── minibatch-node.rst │ │ ├── minibatch-parallelism.rst │ │ ├── minibatch-sparse.rst │ │ ├── minibatch.rst │ │ ├── mixed_precision.rst │ │ ├── nn-construction.rst │ │ ├── nn-forward.rst │ │ ├── nn-heterograph.rst │ │ ├── nn.rst │ │ ├── training-edge.rst │ │ ├── training-eweight.rst │ │ ├── training-graph.rst │ │ ├── training-link.rst │ │ ├── training-node.rst │ │ └── training.rst │ ├── guide_cn/ │ │ ├── data-dataset.rst │ │ ├── data-download.rst │ │ ├── data-loadogb.rst │ │ ├── data-process.rst │ │ ├── data-savenload.rst │ │ ├── data.rst │ │ ├── distributed-apis.rst │ │ ├── distributed-preprocessing.rst │ │ ├── distributed-tools.rst │ │ ├── distributed.rst │ │ ├── graph-basic.rst │ │ ├── graph-external.rst │ │ ├── graph-feature.rst │ │ ├── graph-gpu.rst │ │ ├── graph-graphs-nodes-edges.rst │ │ ├── graph-heterogeneous.rst │ │ ├── graph.rst │ │ ├── index.rst │ │ ├── message-api.rst │ │ ├── message-efficient.rst │ │ ├── message-heterograph.rst │ │ ├── message-part.rst │ │ ├── message.rst │ │ ├── minibatch-custom-sampler.rst │ │ ├── minibatch-edge.rst │ │ ├── minibatch-inference.rst │ │ ├── minibatch-link.rst │ │ ├── minibatch-nn.rst │ │ ├── minibatch-node.rst │ │ ├── minibatch.rst │ │ ├── nn-construction.rst │ │ ├── nn-forward.rst │ │ ├── nn-heterograph.rst │ │ ├── nn.rst │ │ ├── training-edge.rst │ │ ├── training-eweight.rst │ │ ├── training-graph.rst │ │ ├── training-link.rst │ │ ├── training-node.rst │ │ └── training.rst │ ├── guide_ko/ │ │ ├── data-dataset.rst │ │ ├── data-download.rst │ │ ├── data-loadogb.rst │ │ ├── data-process.rst │ │ ├── data-savenload.rst │ │ ├── data.rst │ │ ├── distributed-apis.rst │ │ ├── distributed-hetero.rst │ │ ├── distributed-preprocessing.rst │ │ ├── distributed-tools.rst │ │ ├── distributed.rst │ │ ├── graph-basic.rst │ │ ├── graph-external.rst │ │ ├── graph-feature.rst │ │ ├── graph-gpu.rst │ │ ├── graph-graphs-nodes-edges.rst │ │ ├── graph-heterogeneous.rst │ │ ├── graph.rst │ │ ├── index.rst │ │ ├── message-api.rst │ │ ├── message-edge.rst │ │ ├── message-efficient.rst │ │ ├── message-heterograph.rst │ │ ├── message-part.rst │ │ ├── message.rst │ │ ├── minibatch-custom-sampler.rst │ │ ├── minibatch-edge.rst │ │ ├── minibatch-gpu-sampling.rst │ │ ├── minibatch-inference.rst │ │ ├── minibatch-link.rst │ │ ├── minibatch-nn.rst │ │ ├── minibatch-node.rst │ │ ├── minibatch.rst │ │ ├── mixed_precision.rst │ │ ├── nn-construction.rst │ │ ├── nn-forward.rst │ │ ├── nn-heterograph.rst │ │ ├── nn.rst │ │ ├── training-edge.rst │ │ ├── training-graph.rst │ │ ├── training-link.rst │ │ ├── training-node.rst │ │ └── training.rst │ ├── index.rst │ ├── install/ │ │ └── index.rst │ ├── notebooks/ │ │ └── sparse/ │ │ ├── gcn.nblink │ │ ├── graph_diffusion.nblink │ │ ├── graph_transformer.nblink │ │ ├── hgnn.nblink │ │ ├── index.rst │ │ └── quickstart.nblink │ ├── performance.rst │ ├── resources.rst │ └── stochastic_training/ │ ├── index.rst │ ├── link_prediction.nblink │ ├── multigpu_node_classification.nblink │ ├── neighbor_sampling_overview.nblink │ ├── node_classification.nblink │ ├── ondisk-dataset-specification.rst │ ├── ondisk-dataset.rst │ ├── ondisk_dataset_heterograph.nblink │ └── ondisk_dataset_homograph.nblink ├── examples/ │ ├── README.md │ ├── advanced/ │ │ └── cugraph/ │ │ ├── graphsage.py │ │ └── rgcn.py │ ├── core/ │ │ ├── Graphormer/ │ │ │ ├── README.md │ │ │ ├── dataset.py │ │ │ ├── main.py │ │ │ └── model.py │ │ ├── gat/ │ │ │ ├── README.md │ │ │ └── train.py │ │ ├── gated_gcn/ │ │ │ ├── README.md │ │ │ └── train.py │ │ ├── graphsage/ │ │ │ └── node_classification.py │ │ └── rgcn/ │ │ ├── README.md │ │ └── hetero_rgcn.py │ ├── distributed/ │ │ ├── graphsage/ │ │ │ ├── README.md │ │ │ ├── node_classification.py │ │ │ ├── node_classification_unsupervised.py │ │ │ └── partition_graph.py │ │ └── rgcn/ │ │ ├── README.md │ │ ├── lp_perf.py │ │ ├── node_classification.py │ │ └── partition_graph.py │ ├── graphbolt/ │ │ ├── README.md │ │ ├── disk_based_feature/ │ │ │ ├── README.md │ │ │ └── node_classification.py │ │ ├── lightning/ │ │ │ ├── README.md │ │ │ └── node_classification.py │ │ ├── link_prediction.py │ │ ├── node_classification.py │ │ ├── pyg/ │ │ │ ├── README.md │ │ │ ├── hetero/ │ │ │ │ └── node_classification.py │ │ │ ├── labor/ │ │ │ │ ├── README.md │ │ │ │ ├── load_dataset.py │ │ │ │ ├── node_classification.py │ │ │ │ └── sage_conv.py │ │ │ ├── link_prediction.py │ │ │ ├── multigpu/ │ │ │ │ └── node_classification.py │ │ │ ├── node_classification.py │ │ │ └── node_classification_advanced.py │ │ ├── quickstart/ │ │ │ ├── README.md │ │ │ ├── link_prediction.py │ │ │ └── node_classification.py │ │ ├── rgcn/ │ │ │ ├── README.md │ │ │ └── hetero_rgcn.py │ │ ├── sparse/ │ │ │ └── graphsage.py │ │ └── temporal_link_prediction.py │ ├── legacy/ │ │ ├── README.md │ │ ├── link_prediction.py │ │ └── node_classification.py │ ├── multigpu/ │ │ ├── README.md │ │ ├── graphbolt/ │ │ │ ├── README.md │ │ │ └── node_classification.py │ │ └── node_classification_sage.py │ ├── mxnet/ │ │ ├── README.md │ │ ├── appnp/ │ │ │ ├── README.md │ │ │ └── appnp.py │ │ ├── gat/ │ │ │ ├── README.md │ │ │ ├── gat.py │ │ │ ├── train.py │ │ │ └── utils.py │ │ ├── gcn/ │ │ │ ├── README.md │ │ │ ├── gcn.py │ │ │ ├── gcn_concat.py │ │ │ ├── gcn_mp.py │ │ │ └── train.py │ │ ├── gin/ │ │ │ ├── README.md │ │ │ ├── dataloader.py │ │ │ ├── gin.py │ │ │ ├── main.py │ │ │ └── parser.py │ │ ├── graphsage/ │ │ │ ├── README.md │ │ │ └── main.py │ │ ├── monet/ │ │ │ ├── README.md │ │ │ └── citation.py │ │ ├── rgcn/ │ │ │ ├── README.md │ │ │ ├── entity_classify.py │ │ │ └── model.py │ │ ├── scenegraph/ │ │ │ ├── README.md │ │ │ ├── data/ │ │ │ │ ├── __init__.py │ │ │ │ ├── dataloader.py │ │ │ │ ├── object.py │ │ │ │ ├── prepare_visualgenome.py │ │ │ │ └── relation.py │ │ │ ├── demo_reldn.py │ │ │ ├── model/ │ │ │ │ ├── __init__.py │ │ │ │ ├── faster_rcnn.py │ │ │ │ └── reldn.py │ │ │ ├── train_faster_rcnn.py │ │ │ ├── train_faster_rcnn.sh │ │ │ ├── train_freq_prior.py │ │ │ ├── train_reldn.py │ │ │ ├── train_reldn.sh │ │ │ ├── utils/ │ │ │ │ ├── __init__.py │ │ │ │ ├── build_graph.py │ │ │ │ ├── metric.py │ │ │ │ ├── sampling.py │ │ │ │ └── viz.py │ │ │ ├── validate_reldn.py │ │ │ └── validate_reldn.sh │ │ ├── sgc/ │ │ │ ├── README.md │ │ │ └── sgc.py │ │ ├── tagcn/ │ │ │ ├── README.md │ │ │ ├── tagcn.py │ │ │ └── train.py │ │ └── tree_lstm/ │ │ ├── README.md │ │ ├── train.py │ │ └── tree_lstm.py │ ├── pytorch/ │ │ ├── GATNE-T/ │ │ │ ├── README.md │ │ │ ├── requirements.txt │ │ │ ├── scripts/ │ │ │ │ ├── run_example.sh │ │ │ │ ├── run_example_sparse.sh │ │ │ │ └── run_example_sparse_multi_gpus.sh │ │ │ └── src/ │ │ │ ├── main.py │ │ │ ├── main_sparse.py │ │ │ ├── main_sparse_multi_gpus.py │ │ │ └── utils.py │ │ ├── GNN-FiLM/ │ │ │ ├── README.md │ │ │ ├── data_loader.py │ │ │ ├── main.py │ │ │ └── utils.py │ │ ├── NGCF/ │ │ │ ├── Data/ │ │ │ │ ├── load_amazon-book.sh │ │ │ │ └── load_gowalla.sh │ │ │ ├── NGCF/ │ │ │ │ ├── main.py │ │ │ │ ├── model.py │ │ │ │ └── utility/ │ │ │ │ ├── batch_test.py │ │ │ │ ├── helper.py │ │ │ │ ├── load_data.py │ │ │ │ ├── metrics.py │ │ │ │ └── parser.py │ │ │ └── README.md │ │ ├── P-GNN/ │ │ │ ├── README.md │ │ │ ├── main.py │ │ │ ├── model.py │ │ │ └── utils.py │ │ ├── TAHIN/ │ │ │ ├── TAHIN.py │ │ │ ├── data_loader.py │ │ │ ├── main.py │ │ │ ├── readme.md │ │ │ └── utils.py │ │ ├── appnp/ │ │ │ ├── README.md │ │ │ ├── appnp.py │ │ │ └── train.py │ │ ├── argo/ │ │ │ ├── README.md │ │ │ ├── argo.py │ │ │ ├── main.py │ │ │ ├── ogb_example.py │ │ │ └── ogb_example_ARGO.py │ │ ├── arma/ │ │ │ ├── README.md │ │ │ ├── citation.py │ │ │ └── model.py │ │ ├── bgnn/ │ │ │ ├── BGNN.py │ │ │ ├── Readme.md │ │ │ └── run.py │ │ ├── bgrl/ │ │ │ ├── README.md │ │ │ ├── eval_function.py │ │ │ ├── main.py │ │ │ ├── model.py │ │ │ └── utils.py │ │ ├── capsule/ │ │ │ ├── DGLDigitCapsule.py │ │ │ ├── DGLRoutingLayer.py │ │ │ ├── README.md │ │ │ ├── main.py │ │ │ ├── model.py │ │ │ └── simple_routing.py │ │ ├── caregnn/ │ │ │ ├── README.md │ │ │ ├── main.py │ │ │ ├── main_sampling.py │ │ │ ├── model.py │ │ │ ├── model_sampling.py │ │ │ └── utils.py │ │ ├── cluster_gcn/ │ │ │ ├── README.md │ │ │ └── cluster_gcn.py │ │ ├── compGCN/ │ │ │ ├── README.md │ │ │ ├── data_loader.py │ │ │ ├── get_fb15k-237.sh │ │ │ ├── get_wn18rr.sh │ │ │ ├── main.py │ │ │ ├── models.py │ │ │ └── utils.py │ │ ├── correct_and_smooth/ │ │ │ ├── README.md │ │ │ ├── main.py │ │ │ └── model.py │ │ ├── dagnn/ │ │ │ ├── README.md │ │ │ ├── main.py │ │ │ └── utils.py │ │ ├── deepergcn/ │ │ │ ├── README.md │ │ │ ├── layers.py │ │ │ ├── main.py │ │ │ ├── models.py │ │ │ └── modules.py │ │ ├── deepwalk/ │ │ │ └── README.md │ │ ├── dgi/ │ │ │ ├── README.md │ │ │ ├── dgi.py │ │ │ ├── gcn.py │ │ │ └── train.py │ │ ├── dgmg/ │ │ │ ├── README.md │ │ │ ├── configure.py │ │ │ ├── cycles.py │ │ │ ├── main.py │ │ │ ├── model.py │ │ │ └── utils.py │ │ ├── diffpool/ │ │ │ ├── README.md │ │ │ ├── data_utils.py │ │ │ ├── model/ │ │ │ │ ├── __init__.py │ │ │ │ ├── dgl_layers/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── aggregator.py │ │ │ │ │ ├── bundler.py │ │ │ │ │ └── gnn.py │ │ │ │ ├── encoder.py │ │ │ │ ├── loss.py │ │ │ │ ├── model_utils.py │ │ │ │ └── tensorized_layers/ │ │ │ │ ├── __init__.py │ │ │ │ ├── assignment.py │ │ │ │ ├── diffpool.py │ │ │ │ └── graphsage.py │ │ │ └── train.py │ │ ├── dimenet/ │ │ │ ├── README.md │ │ │ ├── config/ │ │ │ │ ├── convert.yaml │ │ │ │ ├── dimenet.yaml │ │ │ │ └── dimenet_pp.yaml │ │ │ ├── convert_tf_ckpt_to_pytorch.py │ │ │ ├── main.py │ │ │ ├── modules/ │ │ │ │ ├── activations.py │ │ │ │ ├── basis_utils.py │ │ │ │ ├── bessel_basis_layer.py │ │ │ │ ├── dimenet.py │ │ │ │ ├── dimenet_pp.py │ │ │ │ ├── embedding_block.py │ │ │ │ ├── envelope.py │ │ │ │ ├── initializers.py │ │ │ │ ├── interaction_block.py │ │ │ │ ├── interaction_pp_block.py │ │ │ │ ├── output_block.py │ │ │ │ ├── output_pp_block.py │ │ │ │ ├── residual_layer.py │ │ │ │ └── spherical_basis_layer.py │ │ │ └── qm9.py │ │ ├── dtgrnn/ │ │ │ ├── README.md │ │ │ ├── dataloading.py │ │ │ ├── dcrnn.py │ │ │ ├── gaan.py │ │ │ ├── model.py │ │ │ ├── train.py │ │ │ └── utils.py │ │ ├── eeg-gcnn/ │ │ │ ├── EEGGraphDataset.py │ │ │ ├── README.md │ │ │ ├── deep_EEGGraphConvNet.py │ │ │ ├── main.py │ │ │ └── shallow_EEGGraphConvNet.py │ │ ├── eges/ │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── main.py │ │ │ ├── model.py │ │ │ ├── sampler.py │ │ │ └── utils.py │ │ ├── evolveGCN/ │ │ │ ├── README.md │ │ │ ├── dataset.py │ │ │ ├── model.py │ │ │ ├── train.py │ │ │ └── utils.py │ │ ├── gas/ │ │ │ ├── README.md │ │ │ ├── dataloader.py │ │ │ ├── main.py │ │ │ ├── main_sampling.py │ │ │ ├── model.py │ │ │ └── model_sampling.py │ │ ├── gat/ │ │ │ ├── README.md │ │ │ ├── train.py │ │ │ └── train_ppi.py │ │ ├── gatv2/ │ │ │ ├── README.md │ │ │ ├── gatv2.py │ │ │ └── train.py │ │ ├── gcmc/ │ │ │ ├── README.md │ │ │ ├── data.py │ │ │ ├── model.py │ │ │ ├── train.py │ │ │ ├── train_sampling.py │ │ │ └── utils.py │ │ ├── gcn/ │ │ │ ├── README.md │ │ │ └── train.py │ │ ├── geniepath/ │ │ │ ├── README.md │ │ │ ├── model.py │ │ │ ├── ppi.py │ │ │ └── pubmed.py │ │ ├── ggnn/ │ │ │ ├── README.md │ │ │ ├── data_utils.py │ │ │ ├── ggnn_gc.py │ │ │ ├── ggnn_ns.py │ │ │ ├── ggsnn.py │ │ │ ├── train_gc.py │ │ │ ├── train_ns.py │ │ │ └── train_path_finding.py │ │ ├── gin/ │ │ │ ├── README.md │ │ │ └── train.py │ │ ├── gnn_explainer/ │ │ │ ├── README.md │ │ │ ├── explain_main.py │ │ │ ├── gnn_subgraph/ │ │ │ │ ├── 1/ │ │ │ │ │ ├── graph.json │ │ │ │ │ ├── model_list.json │ │ │ │ │ ├── subgraph_1.json │ │ │ │ │ └── subgraph_list.json │ │ │ │ └── dataset_list.json │ │ │ ├── models.py │ │ │ └── train_main.py │ │ ├── grace/ │ │ │ ├── README.md │ │ │ ├── aug.py │ │ │ ├── dataset.py │ │ │ ├── eval.py │ │ │ ├── main.py │ │ │ └── model.py │ │ ├── grand/ │ │ │ ├── README.md │ │ │ ├── main.py │ │ │ └── model.py │ │ ├── graph_matching/ │ │ │ ├── README.md │ │ │ ├── examples.py │ │ │ └── ged.py │ │ ├── graphsage/ │ │ │ ├── README.md │ │ │ ├── advanced/ │ │ │ │ ├── README.md │ │ │ │ ├── model.py │ │ │ │ ├── negative_sampler.py │ │ │ │ └── train_lightning_unsupervised.py │ │ │ ├── lightning/ │ │ │ │ └── node_classification.py │ │ │ ├── link_pred.py │ │ │ ├── load_graph.py │ │ │ ├── node_classification.py │ │ │ └── train_full.py │ │ ├── graphsaint/ │ │ │ ├── README.md │ │ │ ├── config.py │ │ │ ├── modules.py │ │ │ ├── sampler.py │ │ │ ├── train_sampling.py │ │ │ └── utils.py │ │ ├── graphsim/ │ │ │ ├── README.md │ │ │ ├── dataloader.py │ │ │ ├── models.py │ │ │ ├── n_body_sim.py │ │ │ ├── train.py │ │ │ └── utils.py │ │ ├── graphwriter/ │ │ │ ├── README.md │ │ │ ├── graphwriter.py │ │ │ ├── modules.py │ │ │ ├── opts.py │ │ │ ├── prepare_data.sh │ │ │ ├── run.sh │ │ │ ├── test.sh │ │ │ ├── train.py │ │ │ └── utlis.py │ │ ├── gxn/ │ │ │ ├── README.md │ │ │ ├── data_preprocess.py │ │ │ ├── layers.py │ │ │ ├── main.py │ │ │ ├── main_early_stop.py │ │ │ ├── networks.py │ │ │ ├── scripts/ │ │ │ │ ├── run_gxn.sh │ │ │ │ └── run_gxn_early_stop.sh │ │ │ └── utils.py │ │ ├── han/ │ │ │ ├── README.md │ │ │ ├── main.py │ │ │ ├── model.py │ │ │ ├── model_hetero.py │ │ │ ├── train_sampling.py │ │ │ └── utils.py │ │ ├── hardgat/ │ │ │ ├── README.md │ │ │ ├── hgao.py │ │ │ ├── train.py │ │ │ └── utils.py │ │ ├── hgp_sl/ │ │ │ ├── README.md │ │ │ ├── functions.py │ │ │ ├── layers.py │ │ │ ├── main.py │ │ │ ├── networks.py │ │ │ └── utils.py │ │ ├── hgt/ │ │ │ ├── README.md │ │ │ ├── model.py │ │ │ └── train_acm.py │ │ ├── hilander/ │ │ │ ├── PSS/ │ │ │ │ ├── README.md │ │ │ │ ├── Smooth_AP/ │ │ │ │ │ ├── README.md │ │ │ │ │ └── src/ │ │ │ │ │ ├── auxiliaries.py │ │ │ │ │ ├── datasets.py │ │ │ │ │ ├── evaluate.py │ │ │ │ │ ├── evaluate_model.py │ │ │ │ │ ├── finetune_1head.py │ │ │ │ │ ├── get_features.py │ │ │ │ │ ├── losses.py │ │ │ │ │ ├── main.py │ │ │ │ │ └── netlib.py │ │ │ │ ├── __init__.py │ │ │ │ ├── test.sh │ │ │ │ ├── test_subg_inat.py │ │ │ │ ├── train.sh │ │ │ │ └── train_subg_inat.py │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── checkpoint/ │ │ │ │ └── .gitkeep │ │ │ ├── data/ │ │ │ │ └── .gitkeep │ │ │ ├── models/ │ │ │ │ ├── __init__.py │ │ │ │ ├── focal_loss.py │ │ │ │ ├── graphconv.py │ │ │ │ └── lander.py │ │ │ ├── scripts/ │ │ │ │ ├── test_deepglint_hannah.sh │ │ │ │ ├── test_deepglint_imdb.sh │ │ │ │ ├── test_deepglint_imdb_sampled_as_deepglint.sh │ │ │ │ ├── test_inat.sh │ │ │ │ ├── test_inat_train_on_resampled_1_in_6_per_class.sh │ │ │ │ ├── train_deepglint.sh │ │ │ │ ├── train_inat.sh │ │ │ │ └── train_inat_resampled_1_in_6_per_class.sh │ │ │ ├── test.py │ │ │ ├── test_subg.py │ │ │ ├── train.py │ │ │ ├── train_subg.py │ │ │ └── utils/ │ │ │ ├── __init__.py │ │ │ ├── adjacency.py │ │ │ ├── deduce.py │ │ │ ├── density.py │ │ │ ├── evaluate.py │ │ │ ├── faiss_gpu.py │ │ │ ├── faiss_search.py │ │ │ ├── knn.py │ │ │ ├── metrics.py │ │ │ └── misc.py │ │ ├── infograph/ │ │ │ ├── README.md │ │ │ ├── evaluate_embedding.py │ │ │ ├── model.py │ │ │ ├── semisupervised.py │ │ │ ├── unsupervised.py │ │ │ └── utils.py │ │ ├── jknet/ │ │ │ ├── README.md │ │ │ ├── main.py │ │ │ └── model.py │ │ ├── jtnn/ │ │ │ ├── README.md │ │ │ ├── jtnn/ │ │ │ │ ├── __init__.py │ │ │ │ ├── chemutils.py │ │ │ │ ├── datautils.py │ │ │ │ ├── jtmpn.py │ │ │ │ ├── jtnn_dec.py │ │ │ │ ├── jtnn_enc.py │ │ │ │ ├── jtnn_vae.py │ │ │ │ ├── line_profiler_integration.py │ │ │ │ ├── mol_tree.py │ │ │ │ ├── mol_tree_nx.py │ │ │ │ ├── mpn.py │ │ │ │ └── nnutils.py │ │ │ └── vaetrain_dgl.py │ │ ├── label_propagation/ │ │ │ ├── README.md │ │ │ └── main.py │ │ ├── labor/ │ │ │ ├── README.md │ │ │ ├── ladies_sampler.py │ │ │ ├── load_graph.py │ │ │ ├── model.py │ │ │ └── train_lightning.py │ │ ├── lda/ │ │ │ ├── README.md │ │ │ ├── example_20newsgroups.py │ │ │ └── lda_model.py │ │ ├── line_graph/ │ │ │ ├── README.md │ │ │ ├── gnn.py │ │ │ └── train.py │ │ ├── metapath2vec/ │ │ │ ├── README.md │ │ │ ├── download.py │ │ │ ├── metapath2vec.py │ │ │ ├── model.py │ │ │ ├── reading_data.py │ │ │ ├── sampler.py │ │ │ └── test.py │ │ ├── mixhop/ │ │ │ ├── README.md │ │ │ └── main.py │ │ ├── model_zoo/ │ │ │ ├── README.md │ │ │ ├── citation_network/ │ │ │ │ ├── README.md │ │ │ │ ├── conf.py │ │ │ │ ├── models.py │ │ │ │ └── run.py │ │ │ └── geometric/ │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── coarsening.py │ │ │ ├── coordinate.py │ │ │ ├── grid_graph.py │ │ │ └── mnist.py │ │ ├── monet/ │ │ │ ├── README.md │ │ │ └── citation.py │ │ ├── multigpu/ │ │ │ ├── README.md │ │ │ ├── multi_gpu_graph_prediction.py │ │ │ ├── multi_gpu_link_prediction.py │ │ │ └── multi_gpu_node_classification.py │ │ ├── mvgrl/ │ │ │ ├── README.md │ │ │ ├── graph/ │ │ │ │ ├── dataset.py │ │ │ │ ├── main.py │ │ │ │ ├── model.py │ │ │ │ └── utils.py │ │ │ └── node/ │ │ │ ├── dataset.py │ │ │ ├── main.py │ │ │ ├── main_sample.py │ │ │ └── model.py │ │ ├── node2vec/ │ │ │ ├── README.md │ │ │ ├── main.py │ │ │ ├── model.py │ │ │ └── utils.py │ │ ├── ogb/ │ │ │ ├── README.md │ │ │ ├── cluster-gat/ │ │ │ │ ├── README.md │ │ │ │ ├── main.py │ │ │ │ ├── partition_utils.py │ │ │ │ └── sampler.py │ │ │ ├── cluster-sage/ │ │ │ │ ├── README.md │ │ │ │ ├── main.py │ │ │ │ ├── partition_utils.py │ │ │ │ └── sampler.py │ │ │ ├── deepwalk/ │ │ │ │ ├── README.md │ │ │ │ ├── deepwalk.py │ │ │ │ ├── load_dataset.py │ │ │ │ ├── model.py │ │ │ │ ├── reading_data.py │ │ │ │ └── utils.py │ │ │ ├── directional_GSN/ │ │ │ │ ├── README.md │ │ │ │ ├── main.py │ │ │ │ └── preprocessing.py │ │ │ ├── line/ │ │ │ │ ├── README.md │ │ │ │ ├── line.py │ │ │ │ ├── load_dataset.py │ │ │ │ ├── model.py │ │ │ │ ├── reading_data.py │ │ │ │ └── utils.py │ │ │ ├── ngnn/ │ │ │ │ ├── README.md │ │ │ │ └── main.py │ │ │ ├── ngnn_seal/ │ │ │ │ ├── README.md │ │ │ │ ├── main.py │ │ │ │ ├── models.py │ │ │ │ └── utils.py │ │ │ ├── ogbn-arxiv/ │ │ │ │ ├── README.md │ │ │ │ ├── correct_and_smooth.py │ │ │ │ ├── gat.py │ │ │ │ ├── gcn.py │ │ │ │ └── models.py │ │ │ ├── ogbn-mag/ │ │ │ │ ├── README.md │ │ │ │ └── hetero_rgcn.py │ │ │ ├── ogbn-products/ │ │ │ │ ├── gat/ │ │ │ │ │ ├── README.md │ │ │ │ │ ├── gat.py │ │ │ │ │ ├── main.py │ │ │ │ │ └── models.py │ │ │ │ ├── graphsage/ │ │ │ │ │ ├── README.md │ │ │ │ │ └── main.py │ │ │ │ └── mlp/ │ │ │ │ ├── README.md │ │ │ │ ├── mlp.py │ │ │ │ └── models.py │ │ │ ├── ogbn-proteins/ │ │ │ │ ├── README.md │ │ │ │ ├── configure.py │ │ │ │ ├── gat.py │ │ │ │ ├── main_proteins_full_dgl.py │ │ │ │ ├── models.py │ │ │ │ └── utils.py │ │ │ ├── seal_ogbl/ │ │ │ │ ├── README.md │ │ │ │ └── main.py │ │ │ └── sign/ │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── dataset.py │ │ │ └── sign.py │ │ ├── ogb_lsc/ │ │ │ ├── MAG240M/ │ │ │ │ ├── README.md │ │ │ │ ├── preprocess.py │ │ │ │ ├── train.py │ │ │ │ └── train_multi_gpus.py │ │ │ ├── PCQM4M/ │ │ │ │ ├── README.md │ │ │ │ ├── conv.py │ │ │ │ ├── gnn.py │ │ │ │ ├── main.py │ │ │ │ └── test_inference.py │ │ │ └── README.md │ │ ├── ogc/ │ │ │ ├── README.md │ │ │ ├── ogc.py │ │ │ ├── train.py │ │ │ └── utils.py │ │ ├── pagerank.py │ │ ├── pinsage/ │ │ │ ├── README.md │ │ │ ├── builder.py │ │ │ ├── data_utils.py │ │ │ ├── evaluation.py │ │ │ ├── layers.py │ │ │ ├── model.py │ │ │ ├── model_sparse.py │ │ │ ├── process_movielens1m.py │ │ │ ├── process_nowplaying_rs.py │ │ │ └── sampler.py │ │ ├── pointcloud/ │ │ │ ├── bipointnet/ │ │ │ │ ├── ModelNetDataLoader.py │ │ │ │ ├── README.md │ │ │ │ ├── basic.py │ │ │ │ ├── bipointnet2.py │ │ │ │ ├── bipointnet_cls.py │ │ │ │ └── train_cls.py │ │ │ ├── edgeconv/ │ │ │ │ ├── README.md │ │ │ │ ├── main.py │ │ │ │ ├── model.py │ │ │ │ └── modelnet.py │ │ │ ├── pct/ │ │ │ │ ├── ModelNetDataLoader.py │ │ │ │ ├── README.md │ │ │ │ ├── ShapeNet.py │ │ │ │ ├── helper.py │ │ │ │ ├── pct.py │ │ │ │ ├── provider.py │ │ │ │ ├── train_cls.py │ │ │ │ └── train_partseg.py │ │ │ ├── point_transformer/ │ │ │ │ ├── ModelNetDataLoader.py │ │ │ │ ├── README.md │ │ │ │ ├── ShapeNet.py │ │ │ │ ├── helper.py │ │ │ │ ├── point_transformer.py │ │ │ │ ├── provider.py │ │ │ │ ├── train_cls.py │ │ │ │ └── train_partseg.py │ │ │ └── pointnet/ │ │ │ ├── ModelNetDataLoader.py │ │ │ ├── README.md │ │ │ ├── ShapeNet.py │ │ │ ├── pointnet2.py │ │ │ ├── pointnet2_partseg.py │ │ │ ├── pointnet_cls.py │ │ │ ├── pointnet_partseg.py │ │ │ ├── provider.py │ │ │ ├── train_cls.py │ │ │ └── train_partseg.py │ │ ├── rect/ │ │ │ ├── README.md │ │ │ ├── classify.py │ │ │ ├── label_utils.py │ │ │ ├── main.py │ │ │ ├── model.py │ │ │ └── utils.py │ │ ├── rgat/ │ │ │ ├── README.md │ │ │ └── train.py │ │ ├── rgcn/ │ │ │ ├── README.md │ │ │ ├── entity.py │ │ │ ├── entity_sample.py │ │ │ ├── entity_sample_multi_gpu.py │ │ │ ├── entity_utils.py │ │ │ ├── experimental/ │ │ │ │ ├── README.md │ │ │ │ ├── entity_classify_dist.py │ │ │ │ ├── get_mag_data.py │ │ │ │ ├── partition_graph.py │ │ │ │ ├── preprocessing_dist_training/ │ │ │ │ │ ├── edges/ │ │ │ │ │ │ ├── identity1/ │ │ │ │ │ │ │ └── sample.csv │ │ │ │ │ │ ├── identity2/ │ │ │ │ │ │ │ └── sample.csv │ │ │ │ │ │ └── identity3/ │ │ │ │ │ │ └── sample.csv │ │ │ │ │ ├── metis_creation.py │ │ │ │ │ ├── nodes/ │ │ │ │ │ │ └── order/ │ │ │ │ │ │ └── sample.csv │ │ │ │ │ └── pre_process_dist_training.sh │ │ │ │ ├── verify_mag_partitions.py │ │ │ │ └── write_mag.py │ │ │ ├── link.py │ │ │ └── model.py │ │ ├── rgcn-hetero/ │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── entity_classify.py │ │ │ ├── entity_classify_heteroAPI.py │ │ │ ├── entity_classify_mb.py │ │ │ ├── model.py │ │ │ └── test_classify.py │ │ ├── rrn/ │ │ │ ├── README.md │ │ │ ├── ckpt/ │ │ │ │ └── rrn-sudoku.pkl │ │ │ ├── rrn.py │ │ │ ├── sudoku.py │ │ │ ├── sudoku_data.py │ │ │ ├── sudoku_solver.py │ │ │ └── train_sudoku.py │ │ ├── sagpool/ │ │ │ ├── README.md │ │ │ ├── grid_search.py │ │ │ ├── grid_search_config.json │ │ │ ├── layer.py │ │ │ ├── main.py │ │ │ ├── network.py │ │ │ └── utils.py │ │ ├── seal/ │ │ │ ├── README.md │ │ │ ├── logger.py │ │ │ ├── main.py │ │ │ ├── model.py │ │ │ ├── sampler.py │ │ │ └── utils.py │ │ ├── sgc/ │ │ │ ├── README.md │ │ │ ├── sgc.py │ │ │ └── sgc_reddit.py │ │ ├── sign/ │ │ │ ├── README.md │ │ │ ├── dataset.py │ │ │ └── sign.py │ │ ├── stgcn_wave/ │ │ │ ├── README.md │ │ │ ├── load_data.py │ │ │ ├── main.py │ │ │ ├── model.py │ │ │ ├── sensors2graph.py │ │ │ └── utils.py │ │ ├── tagcn/ │ │ │ ├── README.md │ │ │ ├── tagcn.py │ │ │ └── train.py │ │ ├── tgn/ │ │ │ └── README.md │ │ ├── tree_lstm/ │ │ │ ├── README.md │ │ │ ├── train.py │ │ │ └── tree_lstm.py │ │ ├── vgae/ │ │ │ ├── README.md │ │ │ ├── input_data.py │ │ │ ├── model.py │ │ │ ├── preprocess.py │ │ │ └── train.py │ │ └── vrgcn/ │ │ ├── README.md │ │ ├── train_cv.py │ │ └── train_cv_multi_gpu.py │ ├── sparse/ │ │ ├── appnp.py │ │ ├── c_and_s.py │ │ ├── gat.py │ │ ├── gcn.py │ │ ├── gcnii.py │ │ ├── graph_transformer.py │ │ ├── han.py │ │ ├── hetero-rgcn.py │ │ ├── hgnn.py │ │ ├── hypergraphatt.py │ │ ├── pagerank.py │ │ ├── sampling/ │ │ │ ├── graphsage.py │ │ │ └── ladies.py │ │ ├── sgc.py │ │ ├── sign.py │ │ └── twirls.py │ └── tensorflow/ │ ├── dgi/ │ │ ├── README.md │ │ ├── dgi.py │ │ ├── gcn.py │ │ └── train.py │ ├── gat/ │ │ ├── README.md │ │ ├── gat.py │ │ ├── train.py │ │ └── utils.py │ ├── gcn/ │ │ ├── README.md │ │ ├── gcn.py │ │ ├── gcn_builtin.py │ │ ├── gcn_mp.py │ │ └── train.py │ ├── rgcn/ │ │ ├── README.md │ │ ├── entity_classify.py │ │ ├── model.py │ │ └── utils.py │ └── sgc/ │ ├── README.md │ └── sgc.py ├── graphbolt/ │ ├── CMakeLists.txt │ ├── build.bat │ ├── build.sh │ ├── find_cmake.py │ ├── include/ │ │ └── graphbolt/ │ │ ├── async.h │ │ ├── continuous_seed.h │ │ ├── cuda_ops.h │ │ ├── cuda_sampling_ops.h │ │ ├── fused_csc_sampling_graph.h │ │ ├── fused_sampled_subgraph.h │ │ ├── isin.h │ │ ├── serialize.h │ │ ├── shared_memory.h │ │ └── unique_and_compact.h │ └── src/ │ ├── cache_policy.cc │ ├── cache_policy.h │ ├── circular_queue.h │ ├── cnumpy.cc │ ├── cnumpy.h │ ├── concurrent_id_hash_map.cc │ ├── concurrent_id_hash_map.h │ ├── cuda/ │ │ ├── common.h │ │ ├── cooperative_minibatching_utils.cu │ │ ├── cooperative_minibatching_utils.cuh │ │ ├── cooperative_minibatching_utils.h │ │ ├── cumsum.cu │ │ ├── expand_indptr.cu │ │ ├── extension/ │ │ │ ├── gpu_cache.cu │ │ │ ├── gpu_cache.h │ │ │ ├── gpu_graph_cache.cu │ │ │ ├── gpu_graph_cache.h │ │ │ ├── unique_and_compact.h │ │ │ └── unique_and_compact_map.cu │ │ ├── gather.cu │ │ ├── index_select_csc_impl.cu │ │ ├── index_select_impl.cu │ │ ├── insubgraph.cu │ │ ├── isin.cu │ │ ├── max_uva_threads.cc │ │ ├── max_uva_threads.h │ │ ├── neighbor_sampler.cu │ │ ├── sampling_utils.cu │ │ ├── sort_impl.cu │ │ ├── unique_and_compact_impl.cu │ │ └── utils.h │ ├── expand_indptr.cc │ ├── expand_indptr.h │ ├── feature_cache.cc │ ├── feature_cache.h │ ├── fused_csc_sampling_graph.cc │ ├── index_select.cc │ ├── index_select.h │ ├── io_uring.cc │ ├── io_uring.h │ ├── isin.cc │ ├── macro.h │ ├── partitioned_cache_policy.cc │ ├── partitioned_cache_policy.h │ ├── python_binding.cc │ ├── random.cc │ ├── random.h │ ├── serialize.cc │ ├── shared_memory.cc │ ├── shared_memory_helper.cc │ ├── shared_memory_helper.h │ ├── unique_and_compact.cc │ ├── utils.cc │ └── utils.h ├── include/ │ └── dgl/ │ ├── array.h │ ├── array_iterator.h │ ├── aten/ │ │ ├── array_ops.h │ │ ├── coo.h │ │ ├── csr.h │ │ ├── macro.h │ │ ├── spmat.h │ │ └── types.h │ ├── base_heterograph.h │ ├── bcast.h │ ├── env_variable.h │ ├── graph.h │ ├── graph_interface.h │ ├── graph_op.h │ ├── graph_serializer.h │ ├── graph_traversal.h │ ├── immutable_graph.h │ ├── kernel.h │ ├── lazy.h │ ├── nodeflow.h │ ├── packed_func_ext.h │ ├── random.h │ ├── runtime/ │ │ ├── bfloat16.h │ │ ├── c_backend_api.h │ │ ├── c_object_api.h │ │ ├── c_runtime_api.h │ │ ├── config.h │ │ ├── container.h │ │ ├── device_api.h │ │ ├── dlpack_convert.h │ │ ├── module.h │ │ ├── ndarray.h │ │ ├── object.h │ │ ├── packed_func.h │ │ ├── parallel_for.h │ │ ├── registry.h │ │ ├── serializer.h │ │ ├── shared_mem.h │ │ ├── smart_ptr_serializer.h │ │ ├── tensordispatch.h │ │ ├── threading_backend.h │ │ └── util.h │ ├── sampler.h │ ├── sampling/ │ │ ├── negative.h │ │ ├── neighbor.h │ │ └── randomwalks.h │ ├── scheduler.h │ ├── transform.h │ └── zerocopy_serializer.h ├── notebooks/ │ ├── graphbolt/ │ │ └── walkthrough.ipynb │ ├── sparse/ │ │ ├── gcn.ipynb │ │ ├── graph_diffusion.ipynb │ │ ├── graph_transformer.ipynb │ │ ├── hgnn.ipynb │ │ └── quickstart.ipynb │ └── stochastic_training/ │ ├── link_prediction.ipynb │ ├── multigpu_node_classification.ipynb │ ├── neighbor_sampling_overview.ipynb │ ├── node_classification.ipynb │ ├── ondisk_dataset_heterograph.ipynb │ └── ondisk_dataset_homograph.ipynb ├── pyproject.toml ├── python/ │ ├── dgl/ │ │ ├── __init__.py │ │ ├── _api_internal.py │ │ ├── _ffi/ │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── _ctypes/ │ │ │ │ ├── __init__.py │ │ │ │ ├── function.py │ │ │ │ ├── ndarray.py │ │ │ │ ├── object.py │ │ │ │ └── types.py │ │ │ ├── _cy2/ │ │ │ │ └── __init__.py │ │ │ ├── _cy3/ │ │ │ │ └── __init__.py │ │ │ ├── _cython/ │ │ │ │ ├── .gitignore │ │ │ │ ├── base.pxi │ │ │ │ ├── core.pyx │ │ │ │ ├── function.pxi │ │ │ │ ├── ndarray.pxi │ │ │ │ └── object.pxi │ │ │ ├── base.py │ │ │ ├── capi.py │ │ │ ├── function.py │ │ │ ├── libinfo.py │ │ │ ├── ndarray.py │ │ │ ├── object.py │ │ │ ├── object_generic.py │ │ │ ├── runtime_ctypes.py │ │ │ └── streams.py │ │ ├── _sparse_ops.py │ │ ├── backend/ │ │ │ ├── __init__.py │ │ │ ├── backend.py │ │ │ ├── mxnet/ │ │ │ │ ├── __init__.py │ │ │ │ ├── sparse.py │ │ │ │ ├── sparse_optim.py │ │ │ │ └── tensor.py │ │ │ ├── pytorch/ │ │ │ │ ├── __init__.py │ │ │ │ ├── sparse.py │ │ │ │ └── tensor.py │ │ │ ├── set_default_backend.py │ │ │ └── tensorflow/ │ │ │ ├── __init__.py │ │ │ ├── sparse.py │ │ │ ├── sparse_optim.py │ │ │ └── tensor.py │ │ ├── base.py │ │ ├── batch.py │ │ ├── container.py │ │ ├── convert.py │ │ ├── core.py │ │ ├── cuda/ │ │ │ ├── __init__.py │ │ │ ├── gpu_cache.py │ │ │ └── nccl.py │ │ ├── data/ │ │ │ ├── __init__.py │ │ │ ├── actor.py │ │ │ ├── adapter.py │ │ │ ├── bitcoinotc.py │ │ │ ├── citation_graph.py │ │ │ ├── cluster.py │ │ │ ├── csv_dataset.py │ │ │ ├── csv_dataset_base.py │ │ │ ├── dgl_dataset.py │ │ │ ├── fakenews.py │ │ │ ├── flickr.py │ │ │ ├── fraud.py │ │ │ ├── gdelt.py │ │ │ ├── geom_gcn.py │ │ │ ├── gindt.py │ │ │ ├── gnn_benchmark.py │ │ │ ├── graph_serialize.py │ │ │ ├── heterograph_serialize.py │ │ │ ├── heterophilous_graphs.py │ │ │ ├── icews18.py │ │ │ ├── karate.py │ │ │ ├── knowledge_graph.py │ │ │ ├── lrgb.py │ │ │ ├── minigc.py │ │ │ ├── movielens.py │ │ │ ├── pattern.py │ │ │ ├── ppi.py │ │ │ ├── qm7b.py │ │ │ ├── qm9.py │ │ │ ├── qm9_edge.py │ │ │ ├── rdf.py │ │ │ ├── reddit.py │ │ │ ├── sbm.py │ │ │ ├── superpixel.py │ │ │ ├── synthetic.py │ │ │ ├── tensor_serialize.py │ │ │ ├── tree.py │ │ │ ├── tu.py │ │ │ ├── utils.py │ │ │ ├── wikics.py │ │ │ ├── yelp.py │ │ │ └── zinc.py │ │ ├── dataloading/ │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── capped_neighbor_sampler.py │ │ │ ├── cluster_gcn.py │ │ │ ├── dataloader.py │ │ │ ├── graphsaint.py │ │ │ ├── labor_sampler.py │ │ │ ├── negative_sampler.py │ │ │ ├── neighbor_sampler.py │ │ │ ├── shadow.py │ │ │ └── spot_target.py │ │ ├── distgnn/ │ │ │ ├── __init__.py │ │ │ ├── partition/ │ │ │ │ ├── __init__.py │ │ │ │ └── libra_partition.py │ │ │ └── tools/ │ │ │ ├── __init__.py │ │ │ └── tools.py │ │ ├── distributed/ │ │ │ ├── __init__.py │ │ │ ├── constants.py │ │ │ ├── dist_context.py │ │ │ ├── dist_dataloader.py │ │ │ ├── dist_graph.py │ │ │ ├── dist_tensor.py │ │ │ ├── graph_partition_book.py │ │ │ ├── graph_services.py │ │ │ ├── id_map.py │ │ │ ├── kvstore.py │ │ │ ├── nn/ │ │ │ │ ├── __init__.py │ │ │ │ ├── mxnet/ │ │ │ │ │ └── __init__.py │ │ │ │ ├── pytorch/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── sparse_emb.py │ │ │ │ └── tensorflow/ │ │ │ │ └── __init__.py │ │ │ ├── optim/ │ │ │ │ ├── __init__.py │ │ │ │ ├── mxnet/ │ │ │ │ │ └── __init__.py │ │ │ │ ├── pytorch/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── sparse_optim.py │ │ │ │ │ └── utils.py │ │ │ │ └── tensorflow/ │ │ │ │ └── __init__.py │ │ │ ├── partition.py │ │ │ ├── role.py │ │ │ ├── rpc.py │ │ │ ├── rpc_client.py │ │ │ ├── rpc_server.py │ │ │ ├── server_state.py │ │ │ ├── shared_mem_utils.py │ │ │ └── standalone_kvstore.py │ │ ├── frame.py │ │ ├── function/ │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── message.py │ │ │ └── reducer.py │ │ ├── generators.py │ │ ├── geometry/ │ │ │ ├── __init__.py │ │ │ ├── capi.py │ │ │ ├── edge_coarsening.py │ │ │ └── fps.py │ │ ├── global_config.py │ │ ├── graph_index.py │ │ ├── graphbolt/ │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── dataloader.py │ │ │ ├── datapipes/ │ │ │ │ ├── __init__.py │ │ │ │ ├── utils.py │ │ │ │ └── visualization.py │ │ │ ├── dataset.py │ │ │ ├── external_utils.py │ │ │ ├── feature_fetcher.py │ │ │ ├── feature_store.py │ │ │ ├── impl/ │ │ │ │ ├── __init__.py │ │ │ │ ├── basic_feature_store.py │ │ │ │ ├── cooperative_conv.py │ │ │ │ ├── cpu_cached_feature.py │ │ │ │ ├── cpu_feature_cache.py │ │ │ │ ├── fused_csc_sampling_graph.py │ │ │ │ ├── gpu_cached_feature.py │ │ │ │ ├── gpu_feature_cache.py │ │ │ │ ├── gpu_graph_cache.py │ │ │ │ ├── in_subgraph_sampler.py │ │ │ │ ├── legacy_dataset.py │ │ │ │ ├── neighbor_sampler.py │ │ │ │ ├── ondisk_dataset.py │ │ │ │ ├── ondisk_metadata.py │ │ │ │ ├── sampled_subgraph_impl.py │ │ │ │ ├── temporal_neighbor_sampler.py │ │ │ │ ├── torch_based_feature_store.py │ │ │ │ └── uniform_negative_sampler.py │ │ │ ├── internal/ │ │ │ │ ├── __init__.py │ │ │ │ ├── item_sampler_utils.py │ │ │ │ ├── sample_utils.py │ │ │ │ └── utils.py │ │ │ ├── internal_utils.py │ │ │ ├── item_sampler.py │ │ │ ├── itemset.py │ │ │ ├── minibatch.py │ │ │ ├── minibatch_transformer.py │ │ │ ├── negative_sampler.py │ │ │ ├── sampled_subgraph.py │ │ │ ├── sampling_graph.py │ │ │ └── subgraph_sampler.py │ │ ├── heterograph.py │ │ ├── heterograph_index.py │ │ ├── homophily.py │ │ ├── init.py │ │ ├── label_informativeness.py │ │ ├── logging.py │ │ ├── merge.py │ │ ├── mpops/ │ │ │ ├── __init__.py │ │ │ ├── edgewise.py │ │ │ ├── fused.py │ │ │ └── nodewise.py │ │ ├── multiprocessing/ │ │ │ ├── __init__.py │ │ │ └── pytorch.py │ │ ├── ndarray.py │ │ ├── nn/ │ │ │ ├── __init__.py │ │ │ ├── functional/ │ │ │ │ └── __init__.py │ │ │ ├── mxnet/ │ │ │ │ ├── __init__.py │ │ │ │ ├── conv/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── agnnconv.py │ │ │ │ │ ├── appnpconv.py │ │ │ │ │ ├── chebconv.py │ │ │ │ │ ├── densechebconv.py │ │ │ │ │ ├── densegraphconv.py │ │ │ │ │ ├── densesageconv.py │ │ │ │ │ ├── edgeconv.py │ │ │ │ │ ├── gatconv.py │ │ │ │ │ ├── gatedgraphconv.py │ │ │ │ │ ├── ginconv.py │ │ │ │ │ ├── gmmconv.py │ │ │ │ │ ├── graphconv.py │ │ │ │ │ ├── nnconv.py │ │ │ │ │ ├── relgraphconv.py │ │ │ │ │ ├── sageconv.py │ │ │ │ │ ├── sgconv.py │ │ │ │ │ └── tagconv.py │ │ │ │ ├── glob.py │ │ │ │ ├── hetero.py │ │ │ │ ├── softmax.py │ │ │ │ └── utils.py │ │ │ ├── pytorch/ │ │ │ │ ├── __init__.py │ │ │ │ ├── conv/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── agnnconv.py │ │ │ │ │ ├── appnpconv.py │ │ │ │ │ ├── atomicconv.py │ │ │ │ │ ├── cfconv.py │ │ │ │ │ ├── chebconv.py │ │ │ │ │ ├── cugraph_base.py │ │ │ │ │ ├── cugraph_gatconv.py │ │ │ │ │ ├── cugraph_relgraphconv.py │ │ │ │ │ ├── cugraph_sageconv.py │ │ │ │ │ ├── densechebconv.py │ │ │ │ │ ├── densegraphconv.py │ │ │ │ │ ├── densesageconv.py │ │ │ │ │ ├── dgnconv.py │ │ │ │ │ ├── dotgatconv.py │ │ │ │ │ ├── edgeconv.py │ │ │ │ │ ├── edgegatconv.py │ │ │ │ │ ├── egatconv.py │ │ │ │ │ ├── egnnconv.py │ │ │ │ │ ├── gatconv.py │ │ │ │ │ ├── gatedgcnconv.py │ │ │ │ │ ├── gatedgraphconv.py │ │ │ │ │ ├── gatv2conv.py │ │ │ │ │ ├── gcn2conv.py │ │ │ │ │ ├── ginconv.py │ │ │ │ │ ├── gineconv.py │ │ │ │ │ ├── gmmconv.py │ │ │ │ │ ├── graphconv.py │ │ │ │ │ ├── grouprevres.py │ │ │ │ │ ├── hgtconv.py │ │ │ │ │ ├── nnconv.py │ │ │ │ │ ├── pnaconv.py │ │ │ │ │ ├── relgraphconv.py │ │ │ │ │ ├── sageconv.py │ │ │ │ │ ├── sgconv.py │ │ │ │ │ ├── tagconv.py │ │ │ │ │ └── twirlsconv.py │ │ │ │ ├── explain/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── gnnexplainer.py │ │ │ │ │ ├── pgexplainer.py │ │ │ │ │ └── subgraphx.py │ │ │ │ ├── factory.py │ │ │ │ ├── glob.py │ │ │ │ ├── gt/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── biased_mha.py │ │ │ │ │ ├── degree_encoder.py │ │ │ │ │ ├── egt.py │ │ │ │ │ ├── graphormer.py │ │ │ │ │ ├── lap_pos_encoder.py │ │ │ │ │ ├── path_encoder.py │ │ │ │ │ └── spatial_encoder.py │ │ │ │ ├── hetero.py │ │ │ │ ├── linear.py │ │ │ │ ├── link/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── edgepred.py │ │ │ │ │ ├── transe.py │ │ │ │ │ └── transr.py │ │ │ │ ├── network_emb.py │ │ │ │ ├── softmax.py │ │ │ │ ├── sparse_emb.py │ │ │ │ └── utils.py │ │ │ └── tensorflow/ │ │ │ ├── __init__.py │ │ │ ├── conv/ │ │ │ │ ├── __init__.py │ │ │ │ ├── appnpconv.py │ │ │ │ ├── chebconv.py │ │ │ │ ├── densechebconv.py │ │ │ │ ├── edgeconv.py │ │ │ │ ├── gatconv.py │ │ │ │ ├── ginconv.py │ │ │ │ ├── graphconv.py │ │ │ │ ├── relgraphconv.py │ │ │ │ ├── sageconv.py │ │ │ │ └── sgconv.py │ │ │ ├── glob.py │ │ │ ├── hetero.py │ │ │ ├── softmax.py │ │ │ └── utils.py │ │ ├── ops/ │ │ │ ├── __init__.py │ │ │ ├── edge_softmax.py │ │ │ ├── gather_mm.py │ │ │ ├── sddmm.py │ │ │ ├── segment.py │ │ │ └── spmm.py │ │ ├── optim/ │ │ │ ├── __init__.py │ │ │ ├── mxnet/ │ │ │ │ └── __init__.py │ │ │ ├── pytorch/ │ │ │ │ ├── __init__.py │ │ │ │ └── sparse_optim.py │ │ │ └── tensorflow/ │ │ │ └── __init__.py │ │ ├── partition.py │ │ ├── propagate.py │ │ ├── random.py │ │ ├── readout.py │ │ ├── sampling/ │ │ │ ├── __init__.py │ │ │ ├── labor.py │ │ │ ├── negative.py │ │ │ ├── neighbor.py │ │ │ ├── node2vec_randomwalk.py │ │ │ ├── pinsage.py │ │ │ ├── randomwalks.py │ │ │ └── utils.py │ │ ├── sparse/ │ │ │ ├── __init__.py │ │ │ ├── broadcast.py │ │ │ ├── elementwise_op.py │ │ │ ├── elementwise_op_sp.py │ │ │ ├── matmul.py │ │ │ ├── reduction.py │ │ │ ├── sddmm.py │ │ │ ├── softmax.py │ │ │ ├── sparse_matrix.py │ │ │ ├── unary_op.py │ │ │ └── utils.py │ │ ├── storages/ │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── numpy.py │ │ │ ├── pytorch_tensor.py │ │ │ └── tensor.py │ │ ├── subgraph.py │ │ ├── transforms/ │ │ │ ├── __init__.py │ │ │ ├── functional.py │ │ │ ├── module.py │ │ │ └── to_block.py │ │ ├── traversal.py │ │ ├── udf.py │ │ ├── utils/ │ │ │ ├── __init__.py │ │ │ ├── checks.py │ │ │ ├── data.py │ │ │ ├── exception.py │ │ │ ├── filter.py │ │ │ ├── internal.py │ │ │ ├── pin_memory.py │ │ │ └── shared_mem.py │ │ └── view.py │ ├── setup.py │ └── update_version.py ├── readthedocs.yml ├── script/ │ ├── build_dgl.sh │ ├── build_doc.sh │ ├── create_dev_conda_env.sh │ ├── dgl_dev.yml.template │ └── run_pytest.sh ├── src/ │ ├── api/ │ │ ├── api_container.cc │ │ └── api_test.cc │ ├── array/ │ │ ├── arith.h │ │ ├── array.cc │ │ ├── array_arith.cc │ │ ├── array_op.h │ │ ├── check.h │ │ ├── cpu/ │ │ │ ├── array_cumsum.cc │ │ │ ├── array_index_select.cc │ │ │ ├── array_nonzero.cc │ │ │ ├── array_op_impl.cc │ │ │ ├── array_pack.cc │ │ │ ├── array_repeat.cc │ │ │ ├── array_scatter.cc │ │ │ ├── array_sort.cc │ │ │ ├── array_utils.h │ │ │ ├── concurrent_id_hash_map.cc │ │ │ ├── concurrent_id_hash_map.h │ │ │ ├── coo_coalesce.cc │ │ │ ├── coo_linegraph.cc │ │ │ ├── coo_remove.cc │ │ │ ├── coo_sort.cc │ │ │ ├── csr_get_data.cc │ │ │ ├── csr_mm.cc │ │ │ ├── csr_remove.cc │ │ │ ├── csr_sort.cc │ │ │ ├── csr_sum.cc │ │ │ ├── csr_to_simple.cc │ │ │ ├── csr_union.cc │ │ │ ├── disjoint_union.cc │ │ │ ├── gather_mm.cc │ │ │ ├── gather_mm.h │ │ │ ├── labor_pick.h │ │ │ ├── labor_sampling.cc │ │ │ ├── negative_sampling.cc │ │ │ ├── rowwise_pick.h │ │ │ ├── rowwise_sampling.cc │ │ │ ├── rowwise_topk.cc │ │ │ ├── sddmm.cc │ │ │ ├── sddmm.h │ │ │ ├── segment_reduce.cc │ │ │ ├── segment_reduce.h │ │ │ ├── spmat_op_impl_coo.cc │ │ │ ├── spmat_op_impl_csr.cc │ │ │ ├── spmm.cc │ │ │ ├── spmm.h │ │ │ ├── spmm_binary_ops.h │ │ │ ├── spmm_blocking_libxsmm.h │ │ │ ├── traversal.cc │ │ │ └── traversal.h │ │ ├── cuda/ │ │ │ ├── array_cumsum.cu │ │ │ ├── array_index_select.cu │ │ │ ├── array_index_select.cuh │ │ │ ├── array_nonzero.cu │ │ │ ├── array_op_impl.cu │ │ │ ├── array_scatter.cu │ │ │ ├── array_sort.cu │ │ │ ├── atomic.cuh │ │ │ ├── bf16.cuh │ │ │ ├── coo2csr.cu │ │ │ ├── coo_sort.cu │ │ │ ├── csr2coo.cu │ │ │ ├── csr_get_data.cu │ │ │ ├── csr_mm.cu │ │ │ ├── csr_sort.cu │ │ │ ├── csr_sum.cu │ │ │ ├── csr_transpose.cc │ │ │ ├── cuda_filter.cu │ │ │ ├── cusparse_dispatcher.cuh │ │ │ ├── disjoint_union.cu │ │ │ ├── fp16.cuh │ │ │ ├── functor.cuh │ │ │ ├── gather_mm.cu │ │ │ ├── ge_spmm.cuh │ │ │ ├── labor_sampling.cu │ │ │ ├── macro.cuh │ │ │ ├── negative_sampling.cu │ │ │ ├── rowwise_sampling.cu │ │ │ ├── rowwise_sampling_prob.cu │ │ │ ├── sddmm.cu │ │ │ ├── sddmm.cuh │ │ │ ├── sddmm_hetero_coo.cu │ │ │ ├── sddmm_hetero_csr.cu │ │ │ ├── segment_reduce.cu │ │ │ ├── segment_reduce.cuh │ │ │ ├── spmat_op_impl_coo.cu │ │ │ ├── spmat_op_impl_csr.cu │ │ │ ├── spmm.cu │ │ │ ├── spmm.cuh │ │ │ ├── spmm_hetero.cu │ │ │ ├── utils.cu │ │ │ ├── utils.h │ │ │ └── uvm/ │ │ │ ├── array_index_select_uvm.cu │ │ │ └── array_index_select_uvm.cuh │ │ ├── filter.cc │ │ ├── filter.h │ │ ├── kernel.cc │ │ ├── kernel_decl.h │ │ ├── libra_partition.cc │ │ ├── selector.h │ │ ├── union_partition.cc │ │ ├── uvm_array.cc │ │ └── uvm_array_op.h │ ├── bcast.cc │ ├── c_api_common.cc │ ├── c_api_common.h │ ├── geometry/ │ │ ├── cpu/ │ │ │ └── geometry_op_impl.cc │ │ ├── cuda/ │ │ │ ├── edge_coarsening_impl.cu │ │ │ └── geometry_op_impl.cu │ │ ├── geometry.cc │ │ └── geometry_op.h │ ├── graph/ │ │ ├── creators.cc │ │ ├── gk_ops.cc │ │ ├── graph.cc │ │ ├── graph_apis.cc │ │ ├── graph_op.cc │ │ ├── graph_traversal.cc │ │ ├── heterograph.cc │ │ ├── heterograph.h │ │ ├── heterograph_capi.cc │ │ ├── immutable_graph.cc │ │ ├── metis_partition.cc │ │ ├── nodeflow.cc │ │ ├── pickle.cc │ │ ├── sampler.cc │ │ ├── sampling/ │ │ │ ├── negative/ │ │ │ │ └── global_uniform.cc │ │ │ ├── neighbor/ │ │ │ │ └── neighbor.cc │ │ │ └── randomwalks/ │ │ │ ├── frequency_hashmap.cu │ │ │ ├── frequency_hashmap.cuh │ │ │ ├── get_node_types_cpu.cc │ │ │ ├── get_node_types_gpu.cu │ │ │ ├── metapath_randomwalk.h │ │ │ ├── node2vec.cc │ │ │ ├── node2vec_cpu.cc │ │ │ ├── node2vec_impl.h │ │ │ ├── node2vec_randomwalk.h │ │ │ ├── randomwalk_cpu.cc │ │ │ ├── randomwalk_gpu.cu │ │ │ ├── randomwalk_with_restart_cpu.cc │ │ │ ├── randomwalks.cc │ │ │ ├── randomwalks_cpu.h │ │ │ └── randomwalks_impl.h │ │ ├── serialize/ │ │ │ ├── dglgraph_data.h │ │ │ ├── dglgraph_serialize.cc │ │ │ ├── dglstream.h │ │ │ ├── graph_serialize.cc │ │ │ ├── graph_serialize.h │ │ │ ├── heterograph_data.h │ │ │ ├── heterograph_serialize.cc │ │ │ ├── tensor_serialize.cc │ │ │ └── zerocopy_serializer.cc │ │ ├── shared_mem_manager.cc │ │ ├── shared_mem_manager.h │ │ ├── subgraph.cc │ │ ├── transform/ │ │ │ ├── compact.cc │ │ │ ├── compact.h │ │ │ ├── cpu/ │ │ │ │ ├── kdtree_ndarray_adapter.h │ │ │ │ └── knn.cc │ │ │ ├── cuda/ │ │ │ │ ├── cuda_compact_graph.cu │ │ │ │ ├── cuda_map_edges.cuh │ │ │ │ ├── cuda_to_block.cu │ │ │ │ └── knn.cu │ │ │ ├── knn.cc │ │ │ ├── knn.h │ │ │ ├── line_graph.cc │ │ │ ├── metis_partition_hetero.cc │ │ │ ├── partition_hetero.cc │ │ │ ├── remove_edges.cc │ │ │ ├── to_block.cc │ │ │ ├── to_block.h │ │ │ ├── to_simple.cc │ │ │ └── union_partition.cc │ │ ├── traversal.cc │ │ ├── traversal.h │ │ ├── unit_graph.cc │ │ └── unit_graph.h │ ├── partition/ │ │ ├── cuda/ │ │ │ └── partition_op.cu │ │ ├── ndarray_partition.cc │ │ ├── ndarray_partition.h │ │ └── partition_op.h │ ├── random/ │ │ ├── continuous_seed.h │ │ ├── cpu/ │ │ │ ├── choice.cc │ │ │ └── sample_utils.h │ │ └── random.cc │ ├── rpc/ │ │ ├── network/ │ │ │ ├── common.cc │ │ │ ├── common.h │ │ │ ├── communicator.h │ │ │ ├── msg_queue.cc │ │ │ ├── msg_queue.h │ │ │ ├── socket_communicator.cc │ │ │ ├── socket_communicator.h │ │ │ ├── socket_pool.cc │ │ │ ├── socket_pool.h │ │ │ ├── tcp_socket.cc │ │ │ └── tcp_socket.h │ │ ├── rpc.cc │ │ ├── rpc.h │ │ ├── rpc_msg.h │ │ └── server_state.h │ ├── runtime/ │ │ ├── c_object_api.cc │ │ ├── c_runtime_api.cc │ │ ├── config.cc │ │ ├── cpu_device_api.cc │ │ ├── cuda/ │ │ │ ├── cuda_common.h │ │ │ ├── cuda_device_api.cc │ │ │ ├── cuda_hashtable.cu │ │ │ ├── cuda_hashtable.cuh │ │ │ └── gpu_cache.cu │ │ ├── dlpack_convert.cc │ │ ├── dso_module.cc │ │ ├── file_util.cc │ │ ├── file_util.h │ │ ├── meta_data.h │ │ ├── module.cc │ │ ├── module_util.cc │ │ ├── module_util.h │ │ ├── ndarray.cc │ │ ├── object.cc │ │ ├── pack_args.h │ │ ├── parallel_for.cpp │ │ ├── registry.cc │ │ ├── resource_manager.cc │ │ ├── resource_manager.h │ │ ├── runtime_base.h │ │ ├── semaphore_wrapper.cc │ │ ├── semaphore_wrapper.h │ │ ├── shared_mem.cc │ │ ├── system_lib_module.cc │ │ ├── tensordispatch.cc │ │ ├── thread_pool.cc │ │ ├── thread_storage_scope.h │ │ ├── threading_backend.cc │ │ ├── utils.cc │ │ ├── workspace.h │ │ ├── workspace_pool.cc │ │ └── workspace_pool.h │ └── scheduler/ │ ├── scheduler.cc │ └── scheduler_apis.cc ├── tensoradapter/ │ ├── include/ │ │ ├── tensoradapter.h │ │ └── tensoradapter_exports.h │ └── pytorch/ │ ├── CMakeLists.txt │ ├── build.bat │ ├── build.sh │ ├── find_cmake.py │ └── torch.cpp ├── tests/ │ ├── README.md │ ├── backend/ │ │ ├── __init__.py │ │ ├── backend_unittest.py │ │ ├── mxnet/ │ │ │ └── __init__.py │ │ ├── pytorch/ │ │ │ └── __init__.py │ │ └── tensorflow/ │ │ └── __init__.py │ ├── cpp/ │ │ ├── common.h │ │ ├── graph_index_test.cc │ │ ├── message_queue_test.cc │ │ ├── socket_communicator_test.cc │ │ ├── string_test.cc │ │ ├── test_aten.cc │ │ ├── test_concurrent_id_hash_map.cc │ │ ├── test_csrmm.cc │ │ ├── test_partition.cc │ │ ├── test_rowwise.cc │ │ ├── test_sampler.cc │ │ ├── test_serialize.cc │ │ ├── test_smart_ptr_serialize.cc │ │ ├── test_spmat_coo.cc │ │ ├── test_spmat_csr.cc │ │ ├── test_spmm.cc │ │ ├── test_unit_graph.cc │ │ └── test_zerocopy_serialize.cc │ ├── cugraph/ │ │ ├── cugraph-ops/ │ │ │ ├── test_cugraph_gatconv.py │ │ │ ├── test_cugraph_relgraphconv.py │ │ │ └── test_cugraph_sageconv.py │ │ └── test_basics.py │ ├── dist/ │ │ ├── python/ │ │ │ ├── rpc_basic.py │ │ │ └── run_dist_objects.py │ │ ├── test_dist_objects.py │ │ ├── test_rpc.py │ │ └── utils.py │ ├── distributed/ │ │ ├── test_dist_graph_store.py │ │ ├── test_dist_tensor.py │ │ ├── test_distributed_sampling.py │ │ ├── test_mp_dataloader.py │ │ ├── test_new_kvstore.py │ │ ├── test_partition.py │ │ ├── test_rpc.py │ │ └── utils.py │ ├── examples/ │ │ ├── test_sampling_examples.py │ │ └── test_sparse_examples.py │ ├── go/ │ │ ├── test_model.py │ │ └── test_pipeline.py │ ├── integration/ │ │ └── test_data.py │ ├── lint/ │ │ ├── clangformat_linter.py │ │ ├── lint.py │ │ ├── pip_init.py │ │ ├── pylintrc │ │ └── ufmt_linter.py │ ├── python/ │ │ ├── common/ │ │ │ ├── backend/ │ │ │ │ ├── test_set_default_backend.py │ │ │ │ └── test_tensor.py │ │ │ ├── cuda/ │ │ │ │ └── test_gpu_cache.py │ │ │ ├── data/ │ │ │ │ ├── data/ │ │ │ │ │ ├── 1.npy │ │ │ │ │ ├── 2.npy │ │ │ │ │ ├── graph_0.9a220622.dgl │ │ │ │ │ └── test_heterophilous_graphs.py │ │ │ │ ├── test_actor.py │ │ │ │ ├── test_data.py │ │ │ │ ├── test_geom_gcn.py │ │ │ │ ├── test_movielens.py │ │ │ │ ├── test_serialize.py │ │ │ │ └── test_utils.py │ │ │ ├── dataloading/ │ │ │ │ └── test_dataloader.py │ │ │ ├── function/ │ │ │ │ └── test_basics.py │ │ │ ├── ops/ │ │ │ │ ├── test_edge_softmax.py │ │ │ │ └── test_ops.py │ │ │ ├── sampling/ │ │ │ │ └── test_sampling.py │ │ │ ├── test_batch-graph.py │ │ │ ├── test_batch-heterograph.py │ │ │ ├── test_convert.py │ │ │ ├── test_ffi.py │ │ │ ├── test_frame.py │ │ │ ├── test_generators.py │ │ │ ├── test_heterograph-apply-edges.py │ │ │ ├── test_heterograph-index.py │ │ │ ├── test_heterograph-kernel.py │ │ │ ├── test_heterograph-misc.py │ │ │ ├── test_heterograph-pickle.py │ │ │ ├── test_heterograph-remove.py │ │ │ ├── test_heterograph-shared-memory.py │ │ │ ├── test_heterograph-specialization.py │ │ │ ├── test_heterograph-update-all.py │ │ │ ├── test_heterograph.py │ │ │ ├── test_homophily.py │ │ │ ├── test_label_informativeness.py │ │ │ ├── test_merge.py │ │ │ ├── test_partition.py │ │ │ ├── test_propagate.py │ │ │ ├── test_random.py │ │ │ ├── test_readout.py │ │ │ ├── test_sparse_ops-csr.py │ │ │ ├── test_subgraph.py │ │ │ ├── test_traversal.py │ │ │ ├── transforms/ │ │ │ │ ├── test_functional-sort.py │ │ │ │ ├── test_to_block.py │ │ │ │ └── test_transform.py │ │ │ └── utils/ │ │ │ ├── test_filter.py │ │ │ └── test_pin_memory.py │ │ ├── mxnet/ │ │ │ ├── ip_config.txt │ │ │ ├── test_geometry.py │ │ │ └── test_nn.py │ │ ├── pytorch/ │ │ │ ├── cuda/ │ │ │ │ └── test_nccl.py │ │ │ ├── dataloading/ │ │ │ │ ├── test_dataloader.py │ │ │ │ └── test_spot_target.py │ │ │ ├── distributed/ │ │ │ │ └── optim/ │ │ │ │ └── test_dist_optim.py │ │ │ ├── geometry/ │ │ │ │ └── test_geometry.py │ │ │ ├── graphbolt/ │ │ │ │ ├── __init__.py │ │ │ │ ├── gb_test_utils.py │ │ │ │ ├── impl/ │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── test_basic_feature_store.py │ │ │ │ │ ├── test_cooperative_minibatching_utils.py │ │ │ │ │ ├── test_cpu_cached_feature.py │ │ │ │ │ ├── test_disk_based_feature_store.py │ │ │ │ │ ├── test_feature_cache.py │ │ │ │ │ ├── test_fused_csc_sampling_graph.py │ │ │ │ │ ├── test_gpu_cached_feature.py │ │ │ │ │ ├── test_gpu_graph_cache.py │ │ │ │ │ ├── test_hetero_cached_feature.py │ │ │ │ │ ├── test_in_subgraph_sampler.py │ │ │ │ │ ├── test_legacy_dataset.py │ │ │ │ │ ├── test_negative_sampler.py │ │ │ │ │ ├── test_neighbor_sampler.py │ │ │ │ │ ├── test_ondisk_dataset.py │ │ │ │ │ ├── test_sampled_subgraph_impl.py │ │ │ │ │ └── test_torch_based_feature_store.py │ │ │ │ ├── internal/ │ │ │ │ │ ├── test_sample_utils.py │ │ │ │ │ └── test_utils.py │ │ │ │ ├── test_base.py │ │ │ │ ├── test_dataloader.py │ │ │ │ ├── test_dataset.py │ │ │ │ ├── test_feature_fetcher.py │ │ │ │ ├── test_graphbolt_utils.py │ │ │ │ ├── test_integration.py │ │ │ │ ├── test_item_sampler.py │ │ │ │ ├── test_itemset.py │ │ │ │ ├── test_minibatch.py │ │ │ │ ├── test_subgraph_sampler.py │ │ │ │ └── test_utils.py │ │ │ ├── ip_config.txt │ │ │ ├── mpops/ │ │ │ │ └── test_edgewise.py │ │ │ ├── nn/ │ │ │ │ ├── conv/ │ │ │ │ │ └── test_gatedgcnconv.py │ │ │ │ ├── test_nn.py │ │ │ │ └── test_sparse_emb.py │ │ │ ├── optim/ │ │ │ │ └── test_optim.py │ │ │ ├── sparse/ │ │ │ │ ├── __init__.py │ │ │ │ ├── test_broadcast.py │ │ │ │ ├── test_elementwise_op.py │ │ │ │ ├── test_elementwise_op_sp.py │ │ │ │ ├── test_matmul.py │ │ │ │ ├── test_matrix_op.py │ │ │ │ ├── test_reduction.py │ │ │ │ ├── test_sddmm.py │ │ │ │ ├── test_softmax.py │ │ │ │ ├── test_sparse_matrix.py │ │ │ │ ├── test_unary_op.py │ │ │ │ └── utils.py │ │ │ ├── test_ffi-stream.py │ │ │ ├── test_heterograph-pickle.py │ │ │ ├── test_multiprocessing-ipc.py │ │ │ └── utils/ │ │ │ └── test_pin_memory.py │ │ ├── tensorflow/ │ │ │ ├── test_basic.py │ │ │ └── test_nn.py │ │ └── test_dgl_import.py │ ├── scripts/ │ │ ├── build_dgl.bat │ │ ├── build_dgl.sh │ │ ├── ci_report/ │ │ │ ├── report.py │ │ │ └── status.py │ │ ├── cugraph_unit_test.sh │ │ ├── task_cpp_unit_test.bat │ │ ├── task_cpp_unit_test.sh │ │ ├── task_dist_test.sh │ │ ├── task_distributed_test.sh │ │ ├── task_example_test.bat │ │ ├── task_example_test.sh │ │ ├── task_go_test.sh │ │ ├── task_lint.sh │ │ ├── task_pytorch_tutorial_test.sh │ │ ├── task_unit_test.bat │ │ └── task_unit_test.sh │ ├── tools/ │ │ ├── pytest_utils.py │ │ ├── test_array_readwriter.py │ │ ├── test_change_etype_to_canonical_etype.py │ │ ├── test_convert_partition.py │ │ ├── test_dist_lookup.py │ │ ├── test_dist_part.py │ │ ├── test_dist_partition_graphbolt.py │ │ ├── test_launch.py │ │ ├── test_parmetis.py │ │ └── test_parmetis_preproc.py │ └── utils/ │ ├── __init__.py │ ├── checks.py │ └── graph_cases.py ├── third_party/ │ └── HugeCTR/ │ └── gpu_cache/ │ ├── ReadMe.md │ ├── include/ │ │ ├── gpu_cache_api.hpp │ │ ├── hash_functions.cuh │ │ ├── nv_gpu_cache.hpp │ │ └── nv_util.h │ └── src/ │ └── nv_gpu_cache.cu ├── tools/ │ ├── README.md │ ├── change_etype_to_canonical_etype.py │ ├── chunk_graph.py │ ├── copy_files.py │ ├── dispatch_data.py │ ├── distgraphlaunch.py │ ├── distpartitioning/ │ │ ├── README.md │ │ ├── array_readwriter/ │ │ │ ├── __init__.py │ │ │ ├── csv.py │ │ │ ├── numpy_array.py │ │ │ ├── parquet.py │ │ │ └── registry.py │ │ ├── constants.py │ │ ├── convert_partition.py │ │ ├── data_proc_pipeline.py │ │ ├── data_shuffle.py │ │ ├── dataset_utils.py │ │ ├── dist_lookup.py │ │ ├── globalids.py │ │ ├── gloo_wrapper.py │ │ ├── parmetis_postprocess.py │ │ ├── parmetis_preprocess.py │ │ ├── parmetis_wrapper.py │ │ └── utils.py │ ├── files.py │ ├── launch.py │ ├── partition_algo/ │ │ ├── base.py │ │ └── random_partition.py │ ├── verification_utils.py │ └── verify_partitions.py └── tutorials/ ├── blitz/ │ ├── .gitignore │ ├── 1_introduction.py │ ├── 2_dglgraph.py │ ├── 3_message_passing.py │ ├── 4_link_predict.py │ ├── 5_graph_classification.py │ ├── 6_load_data.py │ └── README.txt ├── cpu/ │ ├── README.txt │ ├── argo_tutorial.py │ └── cpu_best_practises.py ├── models/ │ ├── 1_gnn/ │ │ ├── 1_gcn.py │ │ ├── 4_rgcn.py │ │ ├── 6_line_graph.py │ │ ├── 9_gat.py │ │ └── README.txt │ ├── 2_small_graph/ │ │ ├── 3_tree-lstm.py │ │ └── README.txt │ ├── 3_generative_model/ │ │ ├── 5_dgmg.py │ │ └── README.txt │ ├── 4_old_wines/ │ │ ├── 2_capsule.py │ │ ├── 7_transformer.py │ │ └── README.txt │ └── README.txt ├── multi/ │ ├── 1_graph_classification.py │ ├── 2_node_classification.py │ └── README.txt └── requirements.txt ================================================ FILE CONTENTS ================================================ ================================================ FILE: .clang-format ================================================ --- Language: Cpp # BasedOnStyle: Google AccessModifierOffset: -1 AlignAfterOpenBracket: AlwaysBreak AlignArrayOfStructures: None AlignConsecutiveAssignments: Enabled: false AcrossEmptyLines: false AcrossComments: false AlignCompound: false PadOperators: true AlignConsecutiveBitFields: Enabled: false AcrossEmptyLines: false AcrossComments: false AlignCompound: false PadOperators: false AlignConsecutiveDeclarations: Enabled: false AcrossEmptyLines: false AcrossComments: false AlignCompound: false PadOperators: false AlignConsecutiveMacros: Enabled: false AcrossEmptyLines: false AcrossComments: false AlignCompound: false PadOperators: false AlignEscapedNewlines: Left AlignOperands: Align AllowAllArgumentsOnNextLine: true AllowAllParametersOfDeclarationOnNextLine: true AllowShortBlocksOnASingleLine: Never AllowShortCaseLabelsOnASingleLine: false AllowShortEnumsOnASingleLine: true AllowShortFunctionsOnASingleLine: All AllowShortIfStatementsOnASingleLine: WithoutElse AllowShortLambdasOnASingleLine: All AllowShortLoopsOnASingleLine: true AlwaysBreakAfterDefinitionReturnType: None AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: true AlwaysBreakTemplateDeclarations: Yes AttributeMacros: - __capability BinPackArguments: true BinPackParameters: true BitFieldColonSpacing: Both BraceWrapping: AfterCaseLabel: false AfterClass: false AfterControlStatement: Never AfterEnum: false AfterExternBlock: false AfterFunction: false AfterNamespace: false AfterObjCDeclaration: false AfterStruct: false AfterUnion: false BeforeCatch: false BeforeElse: false BeforeLambdaBody: false BeforeWhile: false IndentBraces: false SplitEmptyFunction: true SplitEmptyRecord: true SplitEmptyNamespace: true BreakAfterJavaFieldAnnotations: false BreakBeforeBinaryOperators: None BreakBeforeConceptDeclarations: Always BreakBeforeBraces: Attach BreakBeforeTernaryOperators: true BreakConstructorInitializers: BeforeColon BreakInheritanceList: BeforeColon BreakStringLiterals: true ColumnLimit: 80 CommentPragmas: '^ IWYU pragma:' CompactNamespaces: false ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 4 Cpp11BracedListStyle: true DerivePointerAlignment: true DisableFormat: false EmptyLineAfterAccessModifier: Never EmptyLineBeforeAccessModifier: LogicalBlock ExperimentalAutoDetectBinPacking: false FixNamespaceComments: true ForEachMacros: - foreach - Q_FOREACH - BOOST_FOREACH IfMacros: - KJ_IF_MAYBE IncludeBlocks: Regroup IncludeCategories: - Regex: '^' Priority: 2 SortPriority: 0 CaseSensitive: false - Regex: '^<.*\.h>' Priority: 1 SortPriority: 0 CaseSensitive: false - Regex: '^<.*' Priority: 2 SortPriority: 0 CaseSensitive: false - Regex: '.*' Priority: 3 SortPriority: 0 CaseSensitive: false IncludeIsMainRegex: '([-_](test|unittest))?$' IncludeIsMainSourceRegex: '' IndentAccessModifiers: false IndentCaseBlocks: false IndentCaseLabels: true IndentExternBlock: AfterExternBlock IndentGotoLabels: true IndentPPDirectives: None IndentRequiresClause: true IndentWidth: 2 IndentWrappedFunctionNames: false InsertBraces: false InsertTrailingCommas: None JavaScriptQuotes: Leave JavaScriptWrapImports: true KeepEmptyLinesAtTheStartOfBlocks: false LambdaBodyIndentation: Signature MacroBlockBegin: '' MacroBlockEnd: '' MaxEmptyLinesToKeep: 1 NamespaceIndentation: None ObjCBinPackProtocolList: Never ObjCBlockIndentWidth: 2 ObjCBreakBeforeNestedBlockParam: true ObjCSpaceAfterProperty: false ObjCSpaceBeforeProtocolList: true PackConstructorInitializers: NextLine PenaltyBreakAssignment: 2 PenaltyBreakBeforeFirstCallParameter: 1 PenaltyBreakComment: 300 PenaltyBreakFirstLessLess: 120 PenaltyBreakOpenParenthesis: 0 PenaltyBreakString: 1000 PenaltyBreakTemplateDeclaration: 10 PenaltyExcessCharacter: 1000000 PenaltyIndentedWhitespace: 0 PenaltyReturnTypeOnItsOwnLine: 200 PointerAlignment: Left PPIndentWidth: -1 QualifierAlignment: Leave RawStringFormats: - Language: Cpp Delimiters: - cc - CC - cpp - Cpp - CPP - 'c++' - 'C++' CanonicalDelimiter: '' BasedOnStyle: google - Language: TextProto Delimiters: - pb - PB - proto - PROTO EnclosingFunctions: - EqualsProto - EquivToProto - PARSE_PARTIAL_TEXT_PROTO - PARSE_TEST_PROTO - PARSE_TEXT_PROTO - ParseTextOrDie - ParseTextProtoOrDie - ParseTestProto - ParsePartialTestProto CanonicalDelimiter: pb BasedOnStyle: google ReferenceAlignment: Pointer ReflowComments: true RemoveBracesLLVM: false RequiresClausePosition: OwnLine SeparateDefinitionBlocks: Leave ShortNamespaceLines: 1 SortIncludes: CaseSensitive SortJavaStaticImport: Before SpaceAfterCStyleCast: false SpaceAfterLogicalNot: false SpaceAfterTemplateKeyword: true SpaceAroundPointerQualifiers: Default SpaceBeforeAssignmentOperators: true SpaceBeforeCaseColon: false SpaceBeforeCpp11BracedList: false SpaceBeforeCtorInitializerColon: true SpaceBeforeInheritanceColon: true SpaceBeforeParens: ControlStatements SpaceBeforeParensOptions: AfterControlStatements: true AfterForeachMacros: true AfterFunctionDefinitionName: false AfterFunctionDeclarationName: false AfterIfMacros: true AfterOverloadedOperator: false AfterRequiresInClause: false AfterRequiresInExpression: false BeforeNonEmptyParentheses: false SpaceBeforeRangeBasedForLoopColon: true SpaceBeforeSquareBrackets: false SpaceInEmptyBlock: false SpacesBeforeTrailingComments: 2 SpacesInAngles: Never SpacesInContainerLiterals: true SpacesInLineCommentPrefix: Minimum: 1 Maximum: -1 SpacesInSquareBrackets: false Standard: Auto StatementAttributeLikeMacros: - Q_EMIT StatementMacros: - Q_UNUSED - QT_REQUIRE_VERSION TabWidth: 8 UseTab: Never WhitespaceSensitiveMacros: - BOOST_PP_STRINGIZE - CF_SWIFT_NAME - NS_SWIFT_NAME - PP_STRINGIZE - STRINGIZE ... ================================================ FILE: .github/ISSUE_TEMPLATE/--work-item--dev-only-.md ================================================ --- name: "\U0001F528Work Item (DEV ONLY)" about: Work item issue for tracking progress. Dev team only. title: '' labels: Work Item assignees: '' --- ## 🔨Work Item **IMPORTANT:** * This template is only for dev team to track project progress. For feature request or bug report, please use the corresponding issue templates. * DO NOT create a new work item if the purpose is to fix an existing issue or feature request. We will directly use the issue in the project tracker. Project tracker: https://github.com/orgs/dmlc/projects/2 ## Description ## Depending work items or issues ================================================ FILE: .github/ISSUE_TEMPLATE/bug-report.md ================================================ --- name: "\U0001F41B Bug Report" about: Submit a bug report to help us improve DGL title: '' labels: '' assignees: '' --- ## 🐛 Bug ## To Reproduce Steps to reproduce the behavior: 1. 1. 1. ## Expected behavior ## Environment - DGL Version (e.g., 1.0): - Backend Library & Version (e.g., PyTorch 0.4.1, MXNet/Gluon 1.3): - OS (e.g., Linux): - How you installed DGL (`conda`, `pip`, source): - Build command you used (if compiling from source): - Python version: - CUDA/cuDNN version (if applicable): - GPU models and configuration (e.g. V100): - Any other relevant information: ## Additional context ================================================ FILE: .github/ISSUE_TEMPLATE/documentation.md ================================================ --- name: "\U0001F4DA Documentation" about: Report an issue related to docs.dgl.ai title: '' labels: '' assignees: '' --- ## 📚 Documentation ================================================ FILE: .github/ISSUE_TEMPLATE/feature-request.md ================================================ --- name: "\U0001F680Feature Request" about: Submit a proposal/request for a new DGL feature title: '' labels: '' assignees: '' --- ## 🚀 Feature ## Motivation ## Alternatives ## Pitch ## Additional context ================================================ FILE: .github/ISSUE_TEMPLATE/questions-help-support.md ================================================ --- name: "❓Questions/Help/Support" about: Do you need support? We have resources. title: '' labels: '' assignees: '' --- ## ❓ Questions and Help Before proceeding, please note that we recommend using our discussion forum (https://discuss.dgl.ai) for general questions. As a result, this issue will likely be CLOSED shortly. ================================================ FILE: .github/PULL_REQUEST_TEMPLATE.md ================================================ ## Description ## Checklist Please feel free to remove inapplicable items for your PR. - [ ] The PR title starts with [$CATEGORY] (such as [NN], [Model], [Doc], [Feature]]) - [ ] I've leverage the [tools](https://docs.google.com/document/d/1iHyj7zlmygKSk5gBPsqIqL5ASPzJSPREaNT_QdsiYA4/edit) to beautify the python and c++ code. - [ ] The PR is complete and small, read the [Google eng practice (CL equals to PR)](https://google.github.io/eng-practices/review/developer/small-cls.html) to understand more about small PR. In DGL, we consider PRs with less than 200 lines of core code change are small (example, test and documentation could be exempted). - [ ] All changes have test coverage - [ ] Code is well-documented - [ ] To the best of my knowledge, examples are either not affected by this change, or have been fixed to be compatible with this change - [ ] Related issue is referred in this PR - [ ] If the PR is for a new model/paper, I've updated the example index [here](../examples/README.md). ## Changes ================================================ FILE: .github/workflows/lint.yml ================================================ name: Lint on: [pull_request] jobs: lintrunner: runs-on: ubuntu-latest steps: - name: Pull DGL uses: actions/checkout@v3 with: fetch-depth: 0 - name: Checkout master and HEAD run: | git checkout -t origin/master git checkout ${{ github.event.pull_request.head.sha }} - name: Setup Python uses: actions/setup-python@v4 with: python-version: '3.8' - name: Install requirements run: | python -m pip install --upgrade pip pip install lintrunner --user - name: Initialize lint dependencies run: lintrunner init - name: Run lintrunner on all changed files run: | set +e if ! lintrunner --force-color -m master --tee-json=lint.json; then echo "" echo -e "\e[1m\e[36mYou can reproduce these results locally by using \`lintrunner\`.\e[0m" echo -e "\e[1m\e[36mSee https://github.com/pytorch/pytorch/wiki/lintrunner for setup instructions.\e[0m" exit 1 fi - name: Store annotations if: always() && github.event_name == 'pull_request' # Don't show this as an error; the above step will have already failed. continue-on-error: true run: | # Use jq to massage the JSON lint output into GitHub Actions workflow commands. jq --raw-output \ '"::\(if .severity == "advice" or .severity == "disabled" then "warning" else .severity end) file=\(.path),line=\(.line),col=\(.char),title=\(.code) \(.name)::" + (.description | gsub("\\n"; "%0A"))' \ lint.json concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} cancel-in-progress: true ================================================ FILE: .github/workflows/stale.yml ================================================ # This workflow warns and then closes issues and PRs that have had no activity for a specified amount of time. # # You can adjust the behavior by modifying this file. # For more information, see: # https://github.com/actions/stale name: Mark stale issues and pull requests on: schedule: - cron: '0 1 * * *' jobs: stale: runs-on: ubuntu-latest permissions: issues: write pull-requests: write steps: - uses: actions/stale@v4.1.0 with: repo-token: ${{ secrets.GITHUB_TOKEN }} days-before-issue-stale: 30 days-before-issue-close: -1 # disable issue close days-before-pr-stale: -1 # disable stale bot on pr days-before-pr-close: -1 # disable stale bot on pr stale-issue-message: 'This issue has been automatically marked as stale due to lack of activity. It will be closed if no further activity occurs. Thank you' close-issue-message: 'This issue is closed due to lack of activity. Feel free to reopen it if you still have questions.' stale-issue-label: 'stale-issue' exempt-issue-labels: 'bug:confirmed,feature request,help wanted,Work Item' exempt-all-issue-milestones: true ================================================ FILE: .gitignore ================================================ # IDE .idea # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ dataset/ datasets/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # Whitelist some distribution / package non-related directories !tests/dist # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ examples/pytorch/data/ind.pubmed.y examples/pytorch/data/ind.pubmed.x examples/pytorch/data/ind.pubmed.ty examples/pytorch/data/ind.pubmed.tx examples/pytorch/data/ind.pubmed.test.index examples/pytorch/data/ind.pubmed.graph examples/pytorch/data/ind.pubmed.ally examples/pytorch/data/ind.pubmed.allx examples/pytorch/data/ind.cora.y examples/pytorch/data/ind.cora.x examples/pytorch/data/ind.cora.ty examples/pytorch/data/ind.cora.tx examples/pytorch/data/ind.cora.test.index examples/pytorch/data/ind.cora.graph examples/pytorch/data/ind.cora.ally examples/pytorch/data/ind.cora.allx examples/pytorch/data/ind.citeseer.y examples/pytorch/data/ind.citeseer.x examples/pytorch/data/ind.citeseer.ty examples/pytorch/data/ind.citeseer.tx examples/pytorch/data/ind.citeseer.test.index examples/pytorch/data/ind.citeseer.graph examples/pytorch/data/ind.citeseer.ally examples/pytorch/data/ind.citeseer.allx examples/pytorch/.DS_Store examples/.DS_Store examples/pytorch/generative_graph/*.p .DS_Store # data directory _download # CTags & CScope tags cscope.* # Vim *.swp *.swo *.un~ *~ # parameters *.params # vscode .clangd .vscode # asv .asv .ycm_extra_conf.py **.png # model file *.pth ================================================ FILE: .gitmodules ================================================ [submodule "third_party/dmlc-core"] path = third_party/dmlc-core url = https://github.com/dmlc/dmlc-core.git [submodule "third_party/dlpack"] path = third_party/dlpack url = https://github.com/dmlc/dlpack.git [submodule "third_party/googletest"] path = third_party/googletest url = https://github.com/google/googletest.git [submodule "third_party/METIS"] path = third_party/METIS url = https://github.com/KarypisLab/METIS.git [submodule "third_party/nanoflann"] path = third_party/nanoflann url = https://github.com/jlblancoc/nanoflann [submodule "third_party/libxsmm"] path = third_party/libxsmm url = https://github.com/hfp/libxsmm.git [submodule "third_party/pcg"] path = third_party/pcg url = https://github.com/imneme/pcg-cpp.git [submodule "third_party/cccl"] path = third_party/cccl url = https://github.com/NVIDIA/cccl.git [submodule "third_party/liburing"] path = third_party/liburing url = https://github.com/axboe/liburing.git [submodule "third_party/cuco"] path = third_party/cuco url = https://github.com/NVIDIA/cuCollections.git [submodule "third_party/GKlib"] path = third_party/GKlib url = https://github.com/KarypisLab/GKlib.git [submodule "third_party/taskflow"] path = third_party/taskflow url = https://github.com/taskflow/taskflow.git [submodule "third_party/tsl_robin_map"] path = third_party/tsl_robin_map url = https://github.com/Tessil/robin-map.git ================================================ FILE: .lintrunner.toml ================================================ # Black + usort [[linter]] code = 'UFMT' include_patterns = [ '**/*.py', ] command = [ 'python3', 'tests/lint/ufmt_linter.py', '--', '@{{PATHSFILE}}' ] exclude_patterns = [ '.github/*', 'build/*', 'cmake/*', 'conda/*', 'docker/*', 'third_party/*', ] init_command = [ 'python3', 'tests/lint/pip_init.py', '--dry-run={{DRYRUN}}', 'black==22.10.0', 'ufmt==2.0.1', 'usort==1.0.5', ] is_formatter = true [[linter]] code = 'CLANGFORMAT' include_patterns = [ '**/*.h', '**/*.c', '**/*.cc', '**/*.cpp', '**/*.cuh', '**/*.cu', ] exclude_patterns = [ 'third_party/**', ] init_command = [ 'python3', 'tests/lint/pip_init.py', '--dry-run={{DRYRUN}}', 'clang-format==15.0.4', ] command = [ 'python3', 'tests/lint/clangformat_linter.py', '--binary=clang-format', '--', '@{{PATHSFILE}}' ] is_formatter = true ================================================ FILE: CMakeLists.txt ================================================ cmake_minimum_required(VERSION 3.18) ######################################## # Borrowed and adapted from TVM project ######################################## project(dgl C CXX) message(STATUS "Start configuring project ${PROJECT_NAME}") set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) # cmake utils include(cmake/util/Util.cmake) include(cmake/util/MshadowUtil.cmake) include(cmake/util/FindCUDA.cmake) # Options for building DGL. # NOTE: Please avoid editing this file to change build type. Instead, using # bash script/build_dgl.sh -e -t release to overwrite the value. dgl_option(BUILD_TYPE "Type of the build: dev, dogfood or release" "dev") message(STATUS "Build for ${BUILD_TYPE}") dgl_option(USE_CUDA "Build with CUDA" OFF) dgl_option(TORCH_PYTHON_INTERPS "Python interpreter for building sub-components" python3) # Conda build related options. dgl_option(EXTERNAL_DLPACK_PATH "Path to external dlpack" OFF) dgl_option(EXTERNAL_DMLC_PATH "Path to external dmlc-core" OFF) dgl_option(EXTERNAL_DMLC_LIB_PATH "Path to external dmlc-core library" OFF) dgl_option(EXTERNAL_PHMAP_PATH "Path to external parallel-hashmap" OFF) dgl_option(EXTERNAL_NANOFLANN_PATH "Path to use external nanoflann" OFF) dgl_option(EXTERNAL_METIS_PATH "Path to external metis" OFF) dgl_option(EXTERNAL_METIS_LIB_PATH "Path to external metis library" OFF) dgl_option(EXTERNAL_GKLIB_PATH "Path to external gklib" OFF) # Options for building DGL features: "none," "dev," "dogfood," "release," and # "all." # "none" - The feature is OFF for all build types. This is used when # disabling a feature. # "dev" - The feature is ON for dev build. The default build from source # and the build for unit tests are using this build type. # "dogfood" - The major function of this feature is done. The regression and # benchmark framework are using this build type. # "release" - The feature will be build for release. # "all" - The feature is ON for all build types. Equivalent to set ["dev" # "dogfood" "release"]. # NOTE: Please avoid editing this file to change feature options for a local # build. Instead, using bash script/build_dgl.sh -e '-DFEATURE_NAME=ON/OFF' to # overwrite the value. dgl_feature_option( BUILD_SPARSE "Build DGL sparse library" "all" ) dgl_feature_option( BUILD_TORCH "Build the PyTorch plugin" "all" ) dgl_feature_option( USE_EPOLL "Build with epoll for socket communicator" "all" ) dgl_feature_option( USE_LIBXSMM "Build with LIBXSMM library optimization" "all" ) dgl_feature_option( USE_OPENMP "Build with OpenMP" "all" ) dgl_feature_option( BUILD_GRAPHBOLT "Build Graphbolt library" "all" ) dgl_feature_option( LIBCXX_ENABLE_PARALLEL_ALGORITHMS "Enable the parallel algorithms library. This requires the PSTL to be available." "none" ) dgl_feature_option( REBUILD_LIBXSMM "Clean LIBXSMM build cache at every build" "none" ) dgl_feature_option( USE_HDFS "Build with HDFS support" "none" ) # Set env HADOOP_HDFS_HOME if needed dgl_feature_option( USE_S3 "Build with S3 support" "none" ) # Only build C++ tests for unit testing purposes in dev build. dgl_feature_option( BUILD_CPP_TEST "Build cpp unittest executables" "dev" ) if (EXTERNAL_DLPACK_PATH OR EXTERNAL_DMLC_PATH OR EXTERNAL_NANOFLANN_PATH OR EXTERNAL_NANOFLANN_PATH OR EXTERNAL_METIS_PATH OR EXTERNAL_GKLIB_PATH) message(STATUS "Using at least one external library") set(USE_EXTERNAL_LIBS ON) if (BUILD_CPP_TEST) message(FATAL_ERROR "Cannot build cpp unittests with external libraries") endif(BUILD_CPP_TEST) endif() set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules) # Set optimization options for different build types. if (${BUILD_TYPE} STREQUAL "dev") if (MSVC) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /Od") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Od") else() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O0 -g3 -ggdb") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -g3 -ggdb") endif() else() if (MSVC) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O2 /DNDEBUG") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /DNDEBUG") else() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2 -DNDEBUG") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -DNDEBUG") endif() endif() if(USE_CUDA) message(STATUS "Build with CUDA support") project(dgl C CXX) include(cmake/modules/CUDA.cmake) message(STATUS "Use external CCCL library for a consistent API and performance.") cuda_include_directories(BEFORE "${CMAKE_SOURCE_DIR}/third_party/cccl/thrust") cuda_include_directories(BEFORE "${CMAKE_SOURCE_DIR}/third_party/cccl/cub") cuda_include_directories(BEFORE "${CMAKE_SOURCE_DIR}/third_party/cccl/libcudacxx/include") endif(USE_CUDA) # initial variables if(NOT MSVC) set(DGL_LINKER_LIBS "dl") endif(NOT MSVC) if(MSVC OR CMAKE_SYSTEM_NAME STREQUAL "Darwin") set(DGL_RUNTIME_LINKER_LIBS "") else(MSVC OR CMAKE_SYSTEM_NAME STREQUAL "Darwin") set(DGL_RUNTIME_LINKER_LIBS "rt") endif(MSVC OR CMAKE_SYSTEM_NAME STREQUAL "Darwin") # Generic compilation options if(MSVC) add_definitions(-DWIN32_LEAN_AND_MEAN) add_definitions(-D_CRT_SECURE_NO_WARNINGS) add_definitions(-D_SCL_SECURE_NO_WARNINGS) add_definitions(-DNOMINMAX) set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS 1) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /bigobj") if(USE_MSVC_MT) foreach(flag_var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO) if(${flag_var} MATCHES "/MD") string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") endif(${flag_var} MATCHES "/MD") endforeach(flag_var) endif() else(MSVC) include(CheckCXXCompilerFlag) set(CMAKE_C_FLAGS "-Wall -fPIC ${CMAKE_C_FLAGS}") set(CMAKE_CXX_FLAGS "-Wall -fPIC ${CMAKE_CXX_FLAGS}") if(NOT APPLE) set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--warn-common ${CMAKE_SHARED_LINKER_FLAGS}") endif(NOT APPLE) endif(MSVC) if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") message(STATUS "Disabling LIBXSMM on ${CMAKE_SYSTEM_PROCESSOR}.") set(USE_LIBXSMM OFF) endif() # Source file lists file(GLOB DGL_SRC src/*.cc src/array/*.cc src/array/cpu/*.cc src/random/*.cc src/random/cpu/*.cc src/runtime/*.cc src/geometry/*.cc src/geometry/cpu/*.cc src/partition/*.cc ) file(GLOB_RECURSE DGL_SRC_1 src/api/*.cc src/graph/*.cc src/scheduler/*.cc ) list(APPEND DGL_SRC ${DGL_SRC_1}) if (NOT MSVC) file(GLOB_RECURSE DGL_RPC_SRC src/rpc/*.cc) else() file(GLOB_RECURSE DGL_RPC_SRC src/rpc/network/*.cc) endif() list(APPEND DGL_SRC ${DGL_RPC_SRC}) if(USE_OPENMP) find_package(OpenMP REQUIRED) list(APPEND DGL_LINKER_LIBS OpenMP::OpenMP_CXX) message(STATUS "Build with OpenMP.") endif(USE_OPENMP) # Configure cuda if(USE_CUDA) file(GLOB_RECURSE DGL_CUDA_SRC src/array/cuda/*.cc src/array/cuda/*.cu src/array/cuda/uvm/*.cc src/array/cuda/uvm/*.cu src/kernel/cuda/*.cc src/kernel/cuda/*.cu src/partition/cuda/*.cu src/runtime/cuda/*.cc src/runtime/cuda/*.cu src/geometry/cuda/*.cu src/graph/transform/cuda/*.cu src/graph/sampling/randomwalks/*.cu ) list(APPEND DGL_SRC ${DGL_CUDA_SRC}) dgl_config_cuda(DGL_LINKER_LIBS) cuda_add_library(dgl SHARED ${DGL_SRC}) else(USE_CUDA) add_library(dgl SHARED ${DGL_SRC}) endif(USE_CUDA) if ((NOT MSVC) AND USE_EPOLL) INCLUDE(CheckIncludeFile) check_include_file("sys/epoll.h" EPOLL_AVAILABLE) if (EPOLL_AVAILABLE) target_compile_definitions(dgl PRIVATE USE_EPOLL) else() message(WARNING "EPOLL is not available on this platform...") endif() endif () # include directories target_include_directories(dgl PRIVATE "include") # check for conda includes if("$ENV{CONDA_BUILD}" STREQUAL "1") set(in_conda_build TRUE) message(STATUS "Conda build environment detected") elseif(DEFINED ENV{CONDA_PREFIX}) set(in_conda_prefix TRUE) message(STATUS "Conda environment detected: $ENV{CONDA_PREFIX}") endif() if (USE_CONDA_INCLUDES) if(in_conda_build) message(STATUS "Using Conda build environment includes: $ENV{PREFIX}") target_include_directories(dgl PRIVATE "$ENV{PREFIX}/include" "$ENV{BUILD_PREFIX}/include") elseif(in_conda_prefix) message(STATUS "Using Conda environment includes: $ENV{CONDA_PREFIX}") target_include_directories(dgl PRIVATE "$ENV{CONDA_PREFIX}/include") else() message(FATAL_ERROR "Conda environment not detected") endif() endif() if(EXTERNAL_DLPACK_PATH) message(STATUS "looking for dlpack headers in ${EXTERNAL_DLPACK_PATH}") include_directories(SYSTEM ${EXTERNAL_DLPACK_PATH}) else(EXTERNAL_DLPACK_PATH) target_include_directories(dgl PRIVATE "third_party/dlpack/include") endif(EXTERNAL_DLPACK_PATH) if(EXTERNAL_DMLC_PATH) if (USE_HDFS) message(FATAL_ERROR "Cannot use HDFS and external dmlc-core at the same time") endif() message(STATUS "looking for dmlc headers in ${EXTERNAL_DMLC_PATH}") include_directories(SYSTEM ${EXTERNAL_DMLC_PATH}) if (NOT EXTERNAL_DMLC_LIB_PATH) message(FATAL_ERROR "EXTERNAL_DMLC_LIB_PATH must be set if EXTERNAL_DMLC_PATH is set") endif() message(STATUS "looking for dmlc library in ${EXTERNAL_DMLC_LIB_PATH}") find_package(dmlc REQUIRED HINTS ${EXTERNAL_DMLC_LIB_PATH} ) if(NOT dmlc_FOUND) message(FATAL_ERROR "Failed to find DMLC library") endif() list(APPEND DGL_LINKER_LIBS dmlc::dmlc) else(EXTERNAL_DMLC_PATH) target_include_directories(dgl PRIVATE "third_party/dmlc-core/include") # For serialization if (USE_HDFS) option(DMLC_HDFS_SHARED "dgl has to build with dynamic hdfs library" ON) endif() add_subdirectory("third_party/dmlc-core") list(APPEND DGL_LINKER_LIBS dmlc) set(GOOGLE_TEST 0) # Turn off dmlc-core test endif(EXTERNAL_DMLC_PATH) target_include_directories(dgl PRIVATE "tensoradapter/include") target_include_directories(dgl PRIVATE "third_party/pcg/include") target_include_directories(dgl PRIVATE "third_party/tsl_robin_map/include") if(EXTERNAL_NANOFLANN_PATH) include_directories(SYSTEM ${EXTERNAL_NANOFLANN_PATH}) else(EXTERNAL_NANOFLANN_PATH) target_include_directories(dgl PRIVATE "third_party/nanoflann/include") endif(EXTERNAL_NANOFLANN_PATH) if (USE_LIBXSMM) target_compile_definitions(dgl PRIVATE USE_LIBXSMM DGL_CPU_LLC_SIZE=40000000 __BLAS=0) target_include_directories(dgl PRIVATE "third_party/libxsmm/include") message(STATUS "Build with LIBXSMM optimization.") endif() # To compile METIS correct for DGL. add_compile_definitions(IDXTYPEWIDTH=64 REALTYPEWIDTH=32) if (EXTERNAL_METIS_PATH) # To compile METIS correct for DGL. if(MSVC) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /DIDXTYPEWIDTH=64 /DREALTYPEWIDTH=32") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /DIDXTYPEWIDTH=64 /DREALTYPEWIDTH=32") else(MSVC) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DIDXTYPEWIDTH=64 -DREALTYPEWIDTH=32") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIDXTYPEWIDTH=64 -DREALTYPEWIDTH=32") endif(MSVC) find_package(METIS REQUIRED) message(STATUS "Found METIS library") target_include_directories(dgl SYSTEM PUBLIC ${METIS_INCLUDE_DIR}) list(APPEND DGL_LINKER_LIBS ${METIS_LIBRARIES}) else(EXTERNAL_METIS_PATH) target_include_directories(dgl PRIVATE "third_party/METIS/include") # Compile METIS if(NOT MSVC) set(GKLIB_PATH "${CMAKE_CURRENT_SOURCE_DIR}/third_party/GKlib") include(${GKLIB_PATH}/GKlibSystem.cmake) include_directories(${GKLIB_PATH}) add_library(GKlib ${GKlib_sources}) include_directories("third_party/METIS/include/") add_subdirectory("third_party/METIS/libmetis/") # When building on ubi7, it fails with the following error: # /usr/include/signal.h:156:29: error: unknown type name 'siginfo_t'. # So I(Rui) define _POSIX_C_SOURCE to 200809L for GKlib and metis to avoid the error. target_compile_definitions(GKlib PRIVATE _POSIX_C_SOURCE=200809L) target_compile_definitions(metis PRIVATE _POSIX_C_SOURCE=200809L) list(APPEND DGL_LINKER_LIBS metis GKlib) endif(NOT MSVC) endif(EXTERNAL_METIS_PATH) # Avoid exposing third-party symbols when using DGL as a library. if((NOT MSVC) AND (NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin")) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wl,--exclude-libs,ALL") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,--exclude-libs,ALL") endif() # Compile gpu_cache if(USE_CUDA) # Manually build gpu_cache because CMake always builds it as shared file(GLOB gpu_cache_src third_party/HugeCTR/gpu_cache/src/nv_gpu_cache.cu ) cuda_add_library(gpu_cache STATIC ${gpu_cache_src}) target_include_directories(gpu_cache PRIVATE "third_party/HugeCTR/gpu_cache/include") target_include_directories(dgl PRIVATE "third_party/HugeCTR/gpu_cache/include") list(APPEND DGL_LINKER_LIBS gpu_cache) message(STATUS "Build with HugeCTR GPU embedding cache.") endif(USE_CUDA) # support PARALLEL_ALGORITHMS if (LIBCXX_ENABLE_PARALLEL_ALGORITHMS) target_compile_definitions(dgl PRIVATE PARALLEL_ALGORITHMS) endif(LIBCXX_ENABLE_PARALLEL_ALGORITHMS) target_link_libraries(dgl ${DGL_LINKER_LIBS} ${DGL_RUNTIME_LINKER_LIBS}) if(MSVC) add_custom_command( TARGET dgl POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy "$" "$/..") endif(MSVC) # Tensor adapter libraries # Linking against LibTorch involves linking against a bunch of other libraries # returned by PyTorch's CMake (e.g. C10 or NVTools). Because CMake caches # the found libraries in find_library(), often times CMake will look into the libraries # of the wrong version when I build everything in the same CMake process. As # a result, I (BarclayII) am launching an individual CMake build for every PyTorch version. if(BUILD_TORCH) file(TO_NATIVE_PATH ${CMAKE_CURRENT_BINARY_DIR} BINDIR) file(TO_NATIVE_PATH ${CMAKE_COMMAND} CMAKE_CMD) if(MSVC) file(TO_NATIVE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/tensoradapter/pytorch/build.bat BUILD_SCRIPT) add_custom_target( tensoradapter_pytorch ${CMAKE_COMMAND} -E env CMAKE_COMMAND=${CMAKE_CMD} CUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_ROOT_DIR} USE_CUDA=${USE_CUDA} EXTERNAL_DMLC_LIB_PATH=${EXTERNAL_DMLC_LIB_PATH} BINDIR=${BINDIR} cmd /e:on /c ${BUILD_SCRIPT} ${TORCH_PYTHON_INTERPS} DEPENDS ${BUILD_SCRIPT} WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/tensoradapter/pytorch) else(MSVC) file(TO_NATIVE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/tensoradapter/pytorch/build.sh BUILD_SCRIPT) add_custom_target( tensoradapter_pytorch ${CMAKE_COMMAND} -E env CMAKE_COMMAND=${CMAKE_CMD} CUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_ROOT_DIR} USE_CUDA=${USE_CUDA} EXTERNAL_DMLC_LIB_PATH=${EXTERNAL_DMLC_LIB_PATH} BINDIR=${CMAKE_CURRENT_BINARY_DIR} bash ${BUILD_SCRIPT} ${TORCH_PYTHON_INTERPS} DEPENDS ${BUILD_SCRIPT} WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/tensoradapter/pytorch) endif(MSVC) add_dependencies(dgl tensoradapter_pytorch) endif(BUILD_TORCH) # Installation rules install(TARGETS dgl DESTINATION lib${LIB_SUFFIX}) # Testing if(BUILD_CPP_TEST) message(STATUS "Build with unittest") add_subdirectory(./third_party/googletest) enable_testing() include_directories(${gtest_SOURCE_DIR}/include ${gtest_SOURCE_DIR}) include_directories("include") include_directories("third_party/dlpack/include") include_directories("third_party/dmlc-core/include") include_directories("third_party/tsl_robin_map/include") include_directories("third_party/libxsmm/include") include_directories("third_party/pcg/include") file(GLOB_RECURSE TEST_SRC_FILES ${PROJECT_SOURCE_DIR}/tests/cpp/*.cc) add_executable(runUnitTests ${TEST_SRC_FILES}) target_link_libraries(runUnitTests gtest gtest_main) target_link_libraries(runUnitTests dgl) add_test(UnitTests runUnitTests) endif(BUILD_CPP_TEST) if(BUILD_SPARSE) message(STATUS "Configuring DGL sparse library") file(TO_NATIVE_PATH ${CMAKE_CURRENT_BINARY_DIR} BINDIR) file(TO_NATIVE_PATH ${CMAKE_COMMAND} CMAKE_CMD) get_target_property(DGL_INCLUDE_DIRS dgl INCLUDE_DIRECTORIES) message(STATUS "DGL include directories: ${DGL_INCLUDE_DIRS}") message(STATUS "DGL link directories: ${DGL_INCLUDE_DIRS}") if(MSVC) file(TO_NATIVE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/dgl_sparse/build.bat BUILD_SCRIPT) add_custom_target( dgl_sparse ALL ${CMAKE_COMMAND} -E env CMAKE_COMMAND=${CMAKE_CMD} CUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_ROOT_DIR} USE_CUDA=${USE_CUDA} BINDIR=${BINDIR} INCLUDEDIR="${DGL_INCLUDE_DIRS}" CFLAGS=${CMAKE_C_FLAGS} CXXFLAGS=${CMAKE_CXX_FLAGS} LDFLAGS=${CMAKE_SHARED_LINKER_FLAGS} cmd /e:on /c ${BUILD_SCRIPT} ${TORCH_PYTHON_INTERPS} DEPENDS ${BUILD_SCRIPT} WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/dgl_sparse) else(MSVC) file(TO_NATIVE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/dgl_sparse/build.sh BUILD_SCRIPT) add_custom_target( dgl_sparse ALL ${CMAKE_COMMAND} -E env CMAKE_COMMAND=${CMAKE_CMD} CUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_ROOT_DIR} USE_CUDA=${USE_CUDA} BINDIR=${CMAKE_CURRENT_BINARY_DIR} INCLUDEDIR="${DGL_INCLUDE_DIRS}" CFLAGS=${CMAKE_C_FLAGS} CXXFLAGS=${CMAKE_CXX_FLAGS} LDFLAGS=${CMAKE_SHARED_LINKER_FLAGS} bash ${BUILD_SCRIPT} ${TORCH_PYTHON_INTERPS} DEPENDS ${BUILD_SCRIPT} WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/dgl_sparse) endif(MSVC) add_dependencies(dgl_sparse dgl) endif(BUILD_SPARSE) if(BUILD_GRAPHBOLT) message(STATUS "Configuring graphbolt library") string(REPLACE ";" "\\;" CUDA_ARCHITECTURES_ESCAPED "${CUDA_ARCHITECTURES}") file(TO_NATIVE_PATH ${CMAKE_CURRENT_BINARY_DIR} BINDIR) file(TO_NATIVE_PATH ${CMAKE_COMMAND} CMAKE_CMD) if(MSVC) file(TO_NATIVE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/graphbolt/build.bat BUILD_SCRIPT) add_custom_target( graphbolt ALL ${CMAKE_COMMAND} -E env CMAKE_COMMAND=${CMAKE_CMD} CUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_ROOT_DIR} USE_CUDA=${USE_CUDA} BINDIR=${BINDIR} CFLAGS=${CMAKE_C_FLAGS} CXXFLAGS=${CMAKE_CXX_FLAGS} CUDAARCHS="${CUDA_ARCHITECTURES_ESCAPED}" LDFLAGS=${CMAKE_SHARED_LINKER_FLAGS} cmd /e:on /c ${BUILD_SCRIPT} ${TORCH_PYTHON_INTERPS} DEPENDS ${BUILD_SCRIPT} WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/graphbolt) else(MSVC) file(TO_NATIVE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/graphbolt/build.sh BUILD_SCRIPT) add_custom_target( graphbolt ALL ${CMAKE_COMMAND} -E env CMAKE_COMMAND=${CMAKE_CMD} CUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_ROOT_DIR} USE_CUDA=${USE_CUDA} USE_LIBURING=${USE_LIBURING} BINDIR=${CMAKE_CURRENT_BINARY_DIR} CFLAGS=${CMAKE_C_FLAGS} CXXFLAGS=${CMAKE_CXX_FLAGS} CUDAARCHS="${CUDA_ARCHITECTURES_ESCAPED}" LDFLAGS=${CMAKE_SHARED_LINKER_FLAGS} bash ${BUILD_SCRIPT} ${TORCH_PYTHON_INTERPS} DEPENDS ${BUILD_SCRIPT} WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/graphbolt) endif(MSVC) endif(BUILD_GRAPHBOLT) ================================================ FILE: CONTRIBUTORS.md ================================================ ## Contributing to DGL Contribution is always welcomed. A good starting place is the roadmap issue, where you can find our current milestones. All contributions must go through pull requests and be reviewed by the committers. See our [contribution guide](https://docs.dgl.ai/contribute.html) for more details. Once your contribution is accepted and merged, congratulations, you are now a contributor to the DGL project. We will put your name in the list below. Contributors ------------ * [Minjie Wang](https://github.com/jermainewang) from AWS * [Da Zheng](https://github.com/zheng-da) from AWS * [Quan Gan](https://github.com/BarclayII) from AWS * [Mufei Li](https://github.com/mufeili) from AWS * [Jinjing Zhou](https://github.com/VoVAllen) from AWS * [Xiang Song](https://github.com/classicsong) from AWS * [Tianjun Xiao](https://github.com/sneakerkg) from AWS * [Tong He](https://github.com/hetong007) from AWS * [Jian Zhang](https://github.com/zhjwy9343) from AWS * [Qipeng Guo](https://github.com/QipengGuo) from AWS * [Xiangkun Hu](https://github.com/HuXiangkun) from AWS * [Ying Rui](https://github.com/Rhett-Ying) from AWS * [Israt Nisa](https://github.com/isratnisa) from AWS * [Zheng Zhang](https://github.com/zzhang-cn) from AWS * [Zihao Ye](https://github.com/yzh119) from University of Washington * [Chao Ma](https://github.com/aksnzhy) * [Qidong](https://github.com/soodoshll) * [Lingfan Yu](https://github.com/lingfanyu) from New York University * [Yu Gai](https://github.com/GaiYu0) from University of California, Berkeyley * [Qi Huang]() from New York University * [Dominique LaSalle](https://github.com/nv-dlasalle) from Nvidia * [Pawel Piotrowcz](https://github.com/pawelpiotrowicz) from Intel * [Michal Szarmach](https://github.com/mszarma) from Intel * [Izabela Mazur](https://github.com/IzabelaMazur) from Intel * [Sanchit Misra](https://github.com/sanchit-misra) from Intel * [Andrzej Kotlowski](https://github.com/anko-intel) from Intel * [Sheng Zha](https://github.com/szha) from AWS * [Yifei Ma](https://github.com/yifeim) from AWS * [Yizhi Liu](https://github.com/yzhliu) from AWS * [Kay Liu](https://github.com/kayzliu) from UIC * [Tianqi Zhang](https://github.com/lygztq) from SJTU * [Hengrui Zhang](https://github.com/hengruizhang98) * [Seung Won Min](https://github.com/davidmin7) from UIUC * [@hbsun2113](https://github.com/hbsun2113): GraphSAGE in PyTorch * [Tianyi Zhang](https://github.com/Tiiiger): SGC in PyTorch * [Jun Chen](https://github.com/kitaev-chen): GIN in PyTorch * [Aymen Waheb](https://github.com/aymenwah): APPNP in PyTorch * [Chengqiang Lu](https://github.com/geekinglcq): MGCN, SchNet and MPNN in PyTorch * [Gongze Cao](https://github.com/Zardinality): Cluster GCN * [Yicheng Wu](https://github.com/MilkshakeForReal): RotatE in PyTorch * [Hao Xiong](https://github.com/ShawXh): DeepWalk in PyTorch * [Zhi Lin](https://github.com/kira-lin): Integrate FeatGraph into DGL * [Andrew Tsesis](https://github.com/noncomputable): Framework-Agnostic Graph Ops * [Brett Koonce](https://github.com/brettkoonce) * [@giuseppefutia](https://github.com/giuseppefutia) * [@mori97](https://github.com/mori97) * [@xnuohz](https://github.com/xnuohz) * [Hao Jin](https://github.com/haojin2) from Amazon * [Xin Yao](https://github.com/yaox12) from Nvidia * [Abdurrahman Yasar](https://github.com/ayasar70) from Nvidia * [Shaked Brody](https://github.com/shakedbr) from Technion * [Jiahui Liu](https://github.com/paoxiaode) from Nvidia * [Neil Dickson](https://github.com/ndickson-nvidia) from Nvidia * [Chang Liu](https://github.com/chang-l) from Nvidia * [Muhammed Fatih Balin](https://github.com/mfbalin) from Nvidia and Georgia Tech ================================================ FILE: Jenkinsfile ================================================ #!/usr/bin/env groovy // CI tests are executed within Docker containers as the 'root' user. However, // communications between Jenkins nodes are done with the 'ubuntu' user(login // via root is disallowed on AWS EC2 instances). Therefore, we need to change // the file permission to allow 'ubuntu' user to access the files created by // the 'root' user. This is achieved by running 'chmod -R 777 .'. // Summary of Jenkins nodes: // - linux-benchmark-node: Linux CPU node for authentication and lint check. // number of nodes: 1 // instance type: m5.2xlarge(8 vCPUs, 32 GB memory) // number of executors per node: 6 // number of jobs running on this node per CI run: 3 // - dgl-ci-linux-cpu: Linux CPU node for building and testing. // number of nodes: 4 // instance type: m6i.24xlarge(96 vCPUs, 384 GB memory) // number of executors per node: 6 // number of jobs running on this node per CI run: 8 // - dgl-ci-linux-gpu: Linux GPU node for building and testing. // number of nodes: 4 // instance type: g4dn.4xlarge(16 vCPUs, 64 GB memory, 1 GPU) // number of executors per node: 1 // number of jobs running on this node per CI run: 4 // - dgl-ci-windows-cpu: Windows CPU node for building and testing. // number of nodes: 4 // instance type: m6i.8xlarge(32 vCPUs, 128 GB memory) // number of executors per node: 2 // number of jobs running on this node per CI run: 3 dgl_linux_libs = 'build/libdgl.so, build/runUnitTests, python/dgl/_ffi/_cy3/core.cpython-*-x86_64-linux-gnu.so, build/tensoradapter/pytorch/*.so, build/dgl_sparse/*.so, build/graphbolt/*.so' // Currently DGL on Windows is not working with Cython yet dgl_win64_libs = "build\\dgl.dll, build\\runUnitTests.exe, build\\tensoradapter\\pytorch\\*.dll, build\\dgl_sparse\\*.dll, build\\graphbolt\\*.dll" def init_git() { sh "chmod -R 777 ." // Fix permission issue sh 'rm -rf *' sh "git config --global --add safe.directory '*'" checkout scm sh 'git submodule update --recursive --init' } def init_git_win64() { checkout scm bat 'git submodule update --recursive --init' } // pack libraries for later use def pack_lib(name, libs) { echo "Packing ${libs} into ${name}" stash includes: libs, name: name } // unpack libraries saved before def unpack_lib(name, libs) { unstash name echo "Unpacked ${libs} from ${name}" } def build_dgl_linux(dev) { init_git() sh "bash tests/scripts/build_dgl.sh ${dev}" sh 'ls -lh /usr/lib/x86_64-linux-gnu/' pack_lib("dgl-${dev}-linux", dgl_linux_libs) } def build_dgl_win64(dev) { /* Assuming that Windows slaves are already configured with MSBuild VS2017, * CMake and Python/pip/setuptools etc. */ init_git_win64() bat "CALL tests\\scripts\\build_dgl.bat" pack_lib("dgl-${dev}-win64", dgl_win64_libs) } def cpp_unit_test_linux(dev) { init_git() unpack_lib("dgl-${dev}-linux", dgl_linux_libs) sh 'bash tests/scripts/task_cpp_unit_test.sh' } def cpp_unit_test_win64() { init_git_win64() unpack_lib('dgl-cpu-win64', dgl_win64_libs) bat "CALL tests\\scripts\\task_cpp_unit_test.bat" } def unit_test_linux(backend, dev) { init_git() unpack_lib("dgl-${dev}-linux", dgl_linux_libs) timeout(time: 40, unit: 'MINUTES') { sh "bash tests/scripts/task_unit_test.sh ${backend} ${dev}" } } def unit_distributed_linux(backend, dev) { init_git() unpack_lib("dgl-${dev}-linux", dgl_linux_libs) timeout(time: 40, unit: 'MINUTES') { sh "bash tests/scripts/task_distributed_test.sh ${backend} ${dev}" } } def unit_test_cugraph(backend, dev) { init_git() unpack_lib("dgl-${dev}-linux", dgl_linux_libs) timeout(time: 15, unit: 'MINUTES') { sh "bash tests/scripts/cugraph_unit_test.sh ${backend}" } } def unit_test_win64(backend, dev) { init_git_win64() unpack_lib("dgl-${dev}-win64", dgl_win64_libs) timeout(time: 50, unit: 'MINUTES') { bat "CALL tests\\scripts\\task_unit_test.bat ${backend}" } } def example_test_linux(backend, dev) { init_git() unpack_lib("dgl-${dev}-linux", dgl_linux_libs) timeout(time: 20, unit: 'MINUTES') { sh "bash tests/scripts/task_example_test.sh ${dev}" } } def example_test_win64(backend, dev) { init_git_win64() unpack_lib("dgl-${dev}-win64", dgl_win64_libs) timeout(time: 20, unit: 'MINUTES') { bat "CALL tests\\scripts\\task_example_test.bat ${dev}" } } def tutorial_test_linux(backend) { init_git() unpack_lib('dgl-cpu-linux', dgl_linux_libs) timeout(time: 20, unit: 'MINUTES') { sh "bash tests/scripts/task_${backend}_tutorial_test.sh" } } def go_test_linux() { init_git() unpack_lib('dgl-cpu-linux', dgl_linux_libs) timeout(time: 20, unit: 'MINUTES') { sh "bash tests/scripts/task_go_test.sh" } } def is_authorized(name) { def devs = [ // System: 'dgl-bot', 'noreply', // Core: 'Rhett-Ying', 'BarclayII', 'jermainewang', 'mufeili', 'isratnisa', 'rudongyu', 'classicsong', 'HuXiangkun', 'hetong007', 'kylasa', 'frozenbugs', 'peizhou001', 'zheng-da', 'czkkkkkk', 'thvasilo', // Intern: 'pyynb', 'az15240', 'BowenYao18', 'kec020', 'Liu-rj', // Friends: 'nv-dlasalle', 'yaox12', 'chang-l', 'Kh4L', 'VibhuJawa', 'kkranen', 'TristonC', 'mfbalin', 'bgawrych', 'itaraban', 'daniil-sizov', 'anko-intel', 'Kacper-Pietkun', 'hankaj', 'agrabows', 'DominikaJedynak', 'RafLit', 'CfromBU', // Emeritus: 'VoVAllen', ] return (name in devs) } def is_admin(name) { def admins = ['dgl-bot', 'Rhett-Ying', 'BarclayII', 'jermainewang'] return (name in admins) } def regression_test_done = false pipeline { agent any triggers { issueCommentTrigger('@dgl-bot.*') } stages { // Below 2 stages are to authenticate the change/comment author. // Only core developers are allowed to trigger CI. // Such authentication protects CI from malicious code which may bring CI instances down. stage('Authentication') { agent { docker { label 'linux-benchmark-node' image 'dgllib/dgl-ci-lint' alwaysPull true } } when { not { triggeredBy 'IssueCommentCause' } } steps { script { def author = env.CHANGE_AUTHOR def prOpenTriggerCause = currentBuild.getBuildCauses('jenkins.branch.BranchEventCause') def first_run = prOpenTriggerCause && env.BUILD_ID == '1' if (author && !is_authorized(author)) { pullRequest.comment("Not authorized to trigger CI. Please ask core developer to help trigger via issuing comment: \n - `@dgl-bot`") error("Authentication failed.") } if (first_run) { pullRequest.comment('To trigger regression tests: \n - `@dgl-bot run [instance-type] [which tests] [compare-with-branch]`; \n For example: `@dgl-bot run g4dn.4xlarge all dmlc/master` or `@dgl-bot run c5.9xlarge kernel,api dmlc/master`') } } } } stage('AuthenticationComment') { agent { docker { label 'linux-benchmark-node' image 'dgllib/dgl-ci-lint' alwaysPull true } } when { triggeredBy 'IssueCommentCause' } steps { script { def author = env.GITHUB_COMMENT_AUTHOR if (!is_authorized(author)) { pullRequest.comment("Not authorized to trigger CI via issuing comment.") error("Authentication failed.") } } } } stage('Regression Test') { agent { docker { label 'linux-benchmark-node' image 'dgllib/dgl-ci-lint' alwaysPull true } } when { triggeredBy 'IssueCommentCause' } steps { checkout scm script { def comment = env.GITHUB_COMMENT def command_lists = comment.split(' ') if (command_lists.size() == 1) { // CI command, not for regression return } if (command_lists.size() != 5) { pullRequest.comment('Cannot run the regression test due to unknown command') error('Unknown command') } def author = env.GITHUB_COMMENT_AUTHOR echo("${env.GIT_URL}") echo("${env}") if (!is_admin(author)) { error('Not authorized to launch regression tests') } dir('benchmark_scripts_repo') { checkout([$class: 'GitSCM', branches: [[name: '*/master']], userRemoteConfigs: [[credentialsId: 'github', url: 'https://github.com/dglai/DGL_scripts.git']]]) } sh('cp benchmark_scripts_repo/benchmark/* benchmarks/scripts/') def instance_type = command_lists[2].replace('.', '') pullRequest.comment("Start the Regression test. View at ${RUN_DISPLAY_URL}") def prNumber = env.BRANCH_NAME.replace('PR-', '') dir('benchmarks/scripts') { sh('python3 -m pip install boto3') sh("PYTHONUNBUFFERED=1 GIT_PR_ID=${prNumber} GIT_URL=${env.GIT_URL} GIT_BRANCH=${env.CHANGE_BRANCH} python3 run_reg_test.py --data-folder ${env.GIT_COMMIT}_${instance_type} --run-cmd '${comment}'") } pullRequest.comment("Finished the Regression test. Result table is at https://dgl-asv-data.s3-us-west-2.amazonaws.com/${env.GIT_COMMIT}_${instance_type}/results/result.csv. Jenkins job link is ${RUN_DISPLAY_URL}. ") currentBuild.result = 'SUCCESS' regression_test_done = true } } } stage('CI') { when { expression { !regression_test_done } } stages { stage('Abort Previous CI') { steps { script { if (env.BRANCH_NAME != "master") { // Jenkins will abort an older build if a newer build already // passed a higher milestone. // https://www.jenkins.io/doc/pipeline/steps/pipeline-milestone-step/ def buildNumber = env.BUILD_NUMBER as int for (int i = 1; i <= buildNumber; i++) { milestone(i) } } } } } stage('Lint Check') { agent { docker { label "linux-benchmark-node" image "dgllib/dgl-ci-lint" alwaysPull true } } steps { init_git() sh 'bash tests/scripts/task_lint.sh' } post { always { cleanWs disableDeferredWipeout: true, deleteDirs: true } } } stage('Build') { parallel { stage('CPU Build') { agent { docker { label "dgl-ci-linux-cpu" image "dgllib/dgl-ci-cpu:v240511_1440" args "-u root" alwaysPull true } } steps { build_dgl_linux('cpu') } post { always { sh "chmod -R 777 ." // Fix permission issue cleanWs disableDeferredWipeout: true, deleteDirs: true } } } stage('GPU Build') { agent { docker { label "dgl-ci-linux-cpu" image "dgllib/dgl-ci-gpu:cu121_v240511_1440" args "-u root" alwaysPull true } } steps { // sh "nvidia-smi" build_dgl_linux('gpu') } post { always { sh "chmod -R 777 ." // Fix permission issue cleanWs disableDeferredWipeout: true, deleteDirs: true } } } stage('PyTorch Cugraph GPU Build') { agent { docker { label "dgl-ci-linux-cpu" image "rapidsai/cugraph_stable_torch-cuda:11.8-base-ubuntu20.04-py3.10-pytorch2.0.0-rapids23.04" args "-u root" alwaysPull true } } steps { build_dgl_linux('cugraph') } post { always { sh "chmod -R 777 ." // Fix permission issue cleanWs disableDeferredWipeout: true, deleteDirs: true } } } stage('CPU Build (Win64)') { agent { label 'dgl-ci-windows-cpu' } steps { build_dgl_win64('cpu') } post { always { cleanWs disableDeferredWipeout: true, deleteDirs: true } } } // Currently we don't have Windows GPU build machines } } stage('Test') { parallel { stage('C++ CPU') { agent { docker { label "dgl-ci-linux-cpu" image "dgllib/dgl-ci-cpu:v240511_1440" args "-u root" alwaysPull true } } steps { cpp_unit_test_linux('cpu') } post { always { sh "chmod -R 777 ." // Fix permission issue cleanWs disableDeferredWipeout: true, deleteDirs: true } } } stage('C++ GPU') { agent { docker { label "dgl-ci-linux-gpu" image "dgllib/dgl-ci-gpu:cu121_v240511_1440" args "-u root --runtime nvidia" alwaysPull true } } steps { cpp_unit_test_linux('gpu') } post { always { sh "chmod -R 777 ." // Fix permission issue cleanWs disableDeferredWipeout: true, deleteDirs: true } } } stage('C++ CPU (Win64)') { agent { label 'dgl-ci-windows-cpu' } steps { cpp_unit_test_win64() } post { always { cleanWs disableDeferredWipeout: true, deleteDirs: true } } } stage('Tensorflow CPU') { agent { docker { label "dgl-ci-linux-cpu" image "dgllib/dgl-ci-cpu:v230810" args "-u root" alwaysPull true } } stages { stage('Tensorflow CPU Unit test') { steps { unit_test_linux('tensorflow', 'cpu') } // Tensorflow is deprecated. when { expression { false } } } } post { always { sh "chmod -R 777 ." // Fix permission issue cleanWs disableDeferredWipeout: true, deleteDirs: true } } } stage('Tensorflow GPU') { agent { docker { label "dgl-ci-linux-gpu" image "dgllib/dgl-ci-gpu:cu121_v240511_1440" args "-u root --runtime nvidia" alwaysPull true } } stages { stage('Tensorflow GPU Unit test') { steps { unit_test_linux('tensorflow', 'gpu') } // Tensorflow does not support cuda 11.6 yet. when { expression { false } } } } post { always { sh "chmod -R 777 ." // Fix permission issue cleanWs disableDeferredWipeout: true, deleteDirs: true } } } stage('Torch CPU') { agent { docker { label "dgl-ci-linux-cpu" image "dgllib/dgl-ci-cpu:v240511_1440" args "-u root --shm-size=4gb" alwaysPull true } } stages { stage('Torch CPU Unit test') { steps { unit_test_linux('pytorch', 'cpu') } } stage('Torch CPU Example test') { steps { example_test_linux('pytorch', 'cpu') } } stage('Torch CPU Tutorial test') { steps { tutorial_test_linux('pytorch') } } } post { always { sh "chmod -R 777 ." // Fix permission issue cleanWs disableDeferredWipeout: true, deleteDirs: true } } } stage('Torch CPU (Win64)') { agent { label 'dgl-ci-windows-cpu' } stages { stage('Torch CPU (Win64) Unit test') { steps { unit_test_win64('pytorch', 'cpu') } } stage('Torch CPU (Win64) Example test') { steps { example_test_win64('pytorch', 'cpu') } } } post { always { cleanWs disableDeferredWipeout: true, deleteDirs: true } } } stage('Torch GPU') { agent { docker { label "dgl-ci-linux-gpu" image "dgllib/dgl-ci-gpu:cu121_v240511_1440" args "-u root --runtime nvidia --shm-size=8gb" alwaysPull true } } stages { stage('Torch GPU Unit test') { steps { sh 'nvidia-smi' unit_test_linux('pytorch', 'gpu') } } stage('Torch GPU Example test') { steps { example_test_linux('pytorch', 'gpu') } } } post { always { sh "chmod -R 777 ." // Fix permission issue cleanWs disableDeferredWipeout: true, deleteDirs: true } } } stage('Distributed') { agent { docker { label "dgl-ci-linux-cpu" image "dgllib/dgl-ci-cpu:v240511_1440" args "-u root --shm-size=8gb" alwaysPull true } } stages { stage('Distributed Torch CPU Unit test') { steps { unit_distributed_linux('pytorch', 'cpu') } } } post { always { sh "chmod -R 777 ." // Fix permission issue cleanWs disableDeferredWipeout: true, deleteDirs: true } } } stage('PyTorch Cugraph GPU') { agent { docker { label "dgl-ci-linux-gpu" image "rapidsai/cugraph_stable_torch-cuda:11.8-base-ubuntu20.04-py3.10-pytorch2.0.0-rapids23.04" args "-u root --runtime nvidia --shm-size=8gb" alwaysPull true } } stages { stage('PyTorch Cugraph GPU Unit test') { steps { sh 'nvidia-smi' unit_test_cugraph('pytorch', 'cugraph') } // Cugraph is under refactoring. Skip the test for now. when { expression { false } } } } post { always { sh "chmod -R 777 ." // Fix permission issue cleanWs disableDeferredWipeout: true, deleteDirs: true } } } stage('DGL-Go') { agent { docker { label "dgl-ci-linux-cpu" image "dgllib/dgl-ci-cpu:v240511_1440" args "-u root" alwaysPull true } } stages { stage('DGL-Go CPU test') { steps { go_test_linux() } } } post { always { sh "chmod -R 777 ." // Fix permission issue cleanWs disableDeferredWipeout: true, deleteDirs: true } } } } } } } } post { always { script { node("dglci-post-linux") { docker.image('dgllib/dgl-ci-awscli:v220418').inside("--pull always --entrypoint=''") { sh("rm -rf ci_tmp") dir('ci_tmp') { sh("curl -k -o cireport.log ${BUILD_URL}consoleText") sh("curl -o report.py https://raw.githubusercontent.com/dmlc/dgl/master/tests/scripts/ci_report/report.py") sh("curl -o status.py https://raw.githubusercontent.com/dmlc/dgl/master/tests/scripts/ci_report/status.py") sh("curl -k -L ${BUILD_URL}wfapi") sh("cat status.py") sh("pytest --html=report.html --self-contained-html report.py || true") sh("aws s3 sync ./ s3://dgl-ci-result/${JOB_NAME}/${BUILD_NUMBER}/${BUILD_ID}/logs/ --exclude '*' --include '*.log' --acl public-read --content-type text/plain") sh("aws s3 sync ./ s3://dgl-ci-result/${JOB_NAME}/${BUILD_NUMBER}/${BUILD_ID}/logs/ --exclude '*.log' --acl public-read") def comment = sh(returnStdout: true, script: "python3 status.py --result ${currentBuild.currentResult}").trim() echo(comment) if ((env.BRANCH_NAME).startsWith('PR-')) { pullRequest.comment(comment) } } } } node('dgl-ci-windows-cpu') { bat(script: "rmvirtualenv ${BUILD_TAG}", returnStatus: true) } } } } } ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: NEWS.md ================================================ DGL release and change logs ========== Refer to the roadmap issue for the on-going versions and features. 0.2 --- Major release that includes many features, bugfix and performance improvement. Speed of GCN model on Pubmed dataset has been improved by **4.19x**! Speed of RGCN model on Mutag dataset has been improved by **7.35x**! Important new feature: **graph sampling APIs**. Update details: # Model examples - [x] TreeLSTM w/ MXNet (PR #279 by @szha ) - [x] GraphSage (@ZiyueHuang ) - [x] Improve GAT model speed (PR #348 by @jermainewang ) # Core system improvement - [x] Immutable CSR graph structure (PR #342 by @zheng-da ) - [x] Finish remaining functionality (Issue #369, PR #404 by @yzh119) - [x] Nodeflow data structure (PR #361 by @zheng-da ) - [x] Neighbor sampler (PR #322 ) - [x] Layer-wise sampler (PR #362 by @GaiYu0 ) - [x] Multi-GPU support by data parallelism (PR #356 #338 by @ylfdq1118 ) - [x] More dataset: - [x] Reddit dataset loader (PR #372 by @ZiyueHuang ) - [x] PPI dataset loader (PR #395 by @sufeidechabei ) - [x] Mini graph classification dataset (PR #364 by @mufeili ) - [x] NN modules (PR #406 by @jermainewang @mufeili) - [x] GraphConv layer - [x] Edge softmax layer - [x] Edge group apply API (PR #358 by @VoVAllen ) - [x] Reversed graph and transform.py module (PR #331 by @mufeili ) - [x] Max readout (PR #341 by @mufeili ) - [x] Random walk APIs (PR #392 by @BarclayII ) # Tutorial/Blog - [x] Batched graph classification in DGL (PR #360 by @mufeili ) - [x] Understanding GAT (@sufeidechabei ) # Project improvement - [x] Python lint check (PR #330 by @jermainewang ) - [x] Win CI (PR #324 by @BarclayII ) - [x] Auto doc build (by @VoVAllen ) - [x] Unify tests for different backends (PR #333 by @BarclayII ) 0.1.3 ----- Bug fix * Compatible with Pytorch v1.0 * Bug fix in networkx graph conversion. 0.1.2 ----- First open release. * Basic graph APIs. * Basic message passing APIs. * Pytorch backend. * MXNet backend. * Optimization using SPMV. * Model examples w/ Pytorch: - GCN - GAT - JTNN - DGMG - Capsule - LGNN - RGCN - Transformer - TreeLSTM * Model examples w/ MXNet: - GCN - GAT - RGCN - SSE ================================================ FILE: README.md ================================================

[![Latest Release](https://img.shields.io/github/v/release/dmlc/dgl)](https://github.com/dmlc/dgl/releases) [![Conda Latest Release](https://anaconda.org/dglteam/dgl/badges/version.svg)](https://anaconda.org/dglteam/dgl) [![Build Status](https://ci.dgl.ai/buildStatus/icon?job=DGL/master)](https://ci.dgl.ai/job/DGL/job/master/) [![Benchmark by ASV](http://img.shields.io/badge/benchmarked%20by-asv-green.svg?style=flat)](https://asv.dgl.ai/) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](./LICENSE) [![Twitter](https://img.shields.io/twitter/follow/DGLGraph?style=social)](https://twitter.com/GraphDeep) [Website](https://www.dgl.ai) | [A Blitz Introduction to DGL](https://docs.dgl.ai/tutorials/blitz/index.html) | Documentation ([Latest](https://www.dgl.ai/dgl_docs/) | [Official Examples](examples/README.md) | [Discussion Forum](https://discuss.dgl.ai) | [Slack Channel](https://join.slack.com/t/deep-graph-library/shared_invite/zt-eb4ict1g-xcg3PhZAFAB8p6dtKuP6xQ) DGL is an easy-to-use, high performance and scalable Python package for deep learning on graphs. DGL is framework agnostic, meaning if a deep graph model is a component of an end-to-end application, the rest of the logics can be implemented in any major frameworks, such as PyTorch, Apache MXNet or TensorFlow.

DGL v0.4 architecture
Figure: DGL Overall Architecture

## Highlighted Features ### A GPU-ready graph library DGL provides a powerful graph object that can reside on either CPU or GPU. It bundles structural data as well as features for better control. We provide a variety of functions for computing with graph objects including efficient and customizable message passing primitives for Graph Neural Networks. ### A versatile tool for GNN researchers and practitioners The field of graph deep learning is still rapidly evolving and many research ideas emerge by standing on the shoulders of giants. To ease the process, [DGl-Go](https://github.com/dmlc/dgl/tree/master/dglgo) is a command-line interface to get started with training, using and studying state-of-the-art GNNs. DGL collects a rich set of [example implementations](https://github.com/dmlc/dgl/tree/master/examples) of popular GNN models of a wide range of topics. Researchers can [search](https://www.dgl.ai/) for related models to innovate new ideas from or use them as baselines for experiments. Moreover, DGL provides many state-of-the-art [GNN layers and modules](https://docs.dgl.ai/api/python/nn.html) for users to build new model architectures. DGL is one of the preferred platforms for many standard graph deep learning benchmarks including [OGB](https://ogb.stanford.edu/) and [GNNBenchmarks](https://github.com/graphdeeplearning/benchmarking-gnns). ### Easy to learn and use DGL provides plenty of learning materials for all kinds of users from ML researchers to domain experts. The [Blitz Introduction to DGL](https://docs.dgl.ai/tutorials/blitz/index.html) is a 120-minute tour of the basics of graph machine learning. The [User Guide](https://docs.dgl.ai/guide/index.html) explains in more details the concepts of graphs as well as the training methodology. All of them include code snippets in DGL that are runnable and ready to be plugged into one’s own pipeline. ### Scalable and efficient It is convenient to train models using DGL on large-scale graphs across **multiple GPUs** or **multiple machines**. DGL extensively optimizes the whole stack to reduce the overhead in communication, memory consumption and synchronization. As a result, DGL can easily scale to billion-sized graphs. Get started with the [tutorials](https://docs.dgl.ai/en/tutorials/dist/index.html) and [user guide](https://docs.dgl.ai/en/latest/guide/distributed.html) for distributed training. See the [system performance note](https://docs.dgl.ai/performance.html) for the comparison with other tools. ## Get Started Users can install DGL from [pip and conda](https://www.dgl.ai/pages/start.html). You can also download GPU enabled DGL docker [containers](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/dgl) (backended by PyTorch) from NVIDIA NGC for both x86 and ARM based linux systems. Advanced users can follow the [instructions](https://docs.dgl.ai/install/index.html#install-from-source) to install from source. For absolute beginners, start with [the Blitz Introduction to DGL](https://docs.dgl.ai/tutorials/blitz/index.html). It covers the basic concepts of common graph machine learning tasks and a step-by-step on building Graph Neural Networks (GNNs) to solve them. For acquainted users who wish to learn more, * Experience state-of-the-art GNN models in only two command-lines using [DGL-Go](https://github.com/dmlc/dgl/tree/master/dglgo). * Learn DGL by [example implementations](https://www.dgl.ai/) of popular GNN models. * Read the [User Guide](https://docs.dgl.ai/guide/index.html) ([中文版链接](https://docs.dgl.ai/guide_cn/index.html)), which explains the concepts and usage of DGL in much more details. * Go through the tutorials for advanced features like [stochastic training of GNNs](https://docs.dgl.ai/tutorials/large/index.html), training on [multi-GPU](https://docs.dgl.ai/tutorials/multi/index.html) or [multi-machine](https://docs.dgl.ai/tutorials/dist/index.html). * [Study classical papers](https://docs.dgl.ai/tutorials/models/index.html) on graph machine learning alongside DGL. * Search for the usage of a specific API in the [API reference manual](https://docs.dgl.ai/api/python/index.html), which organizes all DGL APIs by their namespace. All the learning materials are available at our [documentation site](https://docs.dgl.ai/). If you are new to deep learning in general, check out the open source book [Dive into Deep Learning](https://d2l.ai/). ## Community ### Get connected We provide multiple channels to connect you to the community of the DGL developers, users, and the general GNN academic researchers: * Our Slack channel, [click to join](https://join.slack.com/t/deep-graph-library/shared_invite/zt-eb4ict1g-xcg3PhZAFAB8p6dtKuP6xQ) * Our discussion forum: https://discuss.dgl.ai/ * Our [Zhihu blog (in Chinese)](https://www.zhihu.com/column/c_1070749881013936128) * Monthly GNN User Group online seminar ([event link](https://www.eventbrite.com/e/graph-neural-networks-user-group-tickets-137512275919?utm-medium=discovery&utm-campaign=social&utm-content=attendeeshare&aff=escb&utm-source=cp&utm-term=listing) | [past videos](https://www.youtube.com/channel/UCnmuSDY1pTlaFH1WRQElfTg)) Take the survey [here](https://forms.gle/Ej3jHCocACmb49Gp8) and leave any feedback to make DGL better fit for your needs. Thanks! ### DGL-powered projects * DGL-LifeSci: a DGL-based package for various applications in life science with graph neural networks. https://github.com/awslabs/dgl-lifesci * DGL-KE: a high performance, easy-to-use, and scalable package for learning large-scale knowledge graph embeddings. https://github.com/awslabs/dgl-ke * Benchmarking GNN: https://github.com/graphdeeplearning/benchmarking-gnns * OGB: a collection of realistic, large-scale, and diverse benchmark datasets for machine learning on graphs. https://ogb.stanford.edu/ * Graph4NLP: an easy-to-use library for R&D at the intersection of Deep Learning on Graphs and Natural Language Processing. https://github.com/graph4ai/graph4nlp * GNN-RecSys: https://github.com/je-dbl/GNN-RecSys * Amazon Neptune ML: a new capability of Neptune that uses Graph Neural Networks (GNNs), a machine learning technique purpose-built for graphs, to make easy, fast, and more accurate predictions using graph data. https://aws.amazon.com/cn/neptune/machine-learning/ * GNNLens2: Visualization tool for Graph Neural Networks. https://github.com/dmlc/GNNLens2 * RNAGlib: A package to facilitate construction, analysis, visualization and machine learning on RNA 2.5D Graphs. Includes a pre-built dataset: https://rnaglib.cs.mcgill.ca * OpenHGNN: Model zoo and benchmarks for Heterogeneous Graph Neural Networks. https://github.com/BUPT-GAMMA/OpenHGNN * TGL: A graph learning framework for large-scale temporal graphs. https://github.com/amazon-research/tgl * gtrick: Bag of Tricks for Graph Neural Networks. https://github.com/sangyx/gtrick * ArangoDB-DGL Adapter: Import [ArangoDB](https://github.com/arangodb/arangodb) graphs into DGL and vice-versa. https://github.com/arangoml/dgl-adapter * DGLD: [DGLD](https://github.com/EagleLab-ZJU/DGLD) is an open-source library for Deep Graph Anomaly Detection based on pytorch and DGL. ### Awesome Papers Using DGL 1. [**Benchmarking Graph Neural Networks**](https://arxiv.org/pdf/2003.00982.pdf), *Vijay Prakash Dwivedi, Chaitanya K. Joshi, Thomas Laurent, Yoshua Bengio, Xavier Bresson* 1. [**Open Graph Benchmarks: Datasets for Machine Learning on Graphs**](https://arxiv.org/pdf/2005.00687.pdf), NeurIPS'20, *Weihua Hu, Matthias Fey, Marinka Zitnik, Yuxiao Dong, Hongyu Ren, Bowen Liu, Michele Catasta, Jure Leskovec* 1. [**DropEdge: Towards Deep Graph Convolutional Networks on Node Classification**](https://openreview.net/pdf?id=Hkx1qkrKPr), ICLR'20, *Yu Rong, Wenbing Huang, Tingyang Xu, Junzhou Huan* 1. [**Discourse-Aware Neural Extractive Text Summarization**](https://www.aclweb.org/anthology/2020.acl-main.451/), ACL'20, *Jiacheng Xu, Zhe Gan, Yu Cheng, Jingjing Liu* 1. [**GCC: Graph Contrastive Coding for Graph Neural Network Pre-Training**](https://dl.acm.org/doi/pdf/10.1145/3394486.3403168?casa_token=EClsH2Vc4DcAAAAA:LIB8cbtr6yTDbYuv4cTLwTIYeDq5Y2dhj_ktcWdKpzdPLGeiuL0o8GlcN4QIOnpsAnmGeGVZ), KDD'20, *Jiezhong Qiu, Qibin Chen, Yuxiao Dong, Jing Zhang, Hongxia Yang, Ming Ding, Kuansan Wang, Jie Tang* 1. [**DGL-KE: Training Knowledge Graph Embeddings at Scale**](https://arxiv.org/pdf/2004.08532), SIGIR'20, *Da Zheng, Xiang Song, Chao Ma, Zeyuan Tan, Zihao Ye, Jin Dong, Hao Xiong, Zheng Zhang, George Karypis* 1. [**Improving Graph Neural Network Expressivity via Subgraph Isomorphism Counting**](https://arxiv.org/pdf/2006.09252.pdf), *Giorgos Bouritsas, Fabrizio Frasca, Stefanos Zafeiriou, Michael M. Bronstein* 1. [**INT: An Inequality Benchmark for Evaluating Generalization in Theorem Proving**](https://arxiv.org/pdf/2007.02924.pdf), *Yuhuai Wu, Albert Q. Jiang, Jimmy Ba, Roger Grosse* 1. [**Finding Patient Zero: Learning Contagion Source with Graph Neural Networks**](https://arxiv.org/pdf/2006.11913.pdf), *Chintan Shah, Nima Dehmamy, Nicola Perra, Matteo Chinazzi, Albert-László Barabási, Alessandro Vespignani, Rose Yu* 1. [**FeatGraph: A Flexible and Efficient Backend for Graph Neural Network Systems**](https://arxiv.org/pdf/2008.11359.pdf), SC'20, *Yuwei Hu, Zihao Ye, Minjie Wang, Jiali Yu, Da Zheng, Mu Li, Zheng Zhang, Zhiru Zhang, Yida Wang*
more 11. [**BP-Transformer: Modelling Long-Range Context via Binary Partitioning.**](https://arxiv.org/pdf/1911.04070.pdf), *Zihao Ye, Qipeng Guo, Quan Gan, Xipeng Qiu, Zheng Zhang* 12. [**OptiMol: Optimization of Binding Affinities in Chemical Space for Drug Discovery**](https://www.biorxiv.org/content/biorxiv/early/2020/06/16/2020.05.23.112201.full.pdf), *Jacques Boitreaud,Vincent Mallet, Carlos Oliver, Jérôme Waldispühl* 1. [**JAKET: Joint Pre-training of Knowledge Graph and Language Understanding**](https://arxiv.org/pdf/2010.00796.pdf), *Donghan Yu, Chenguang Zhu, Yiming Yang, Michael Zeng* 1. [**Architectural Implications of Graph Neural Networks**](https://arxiv.org/pdf/2009.00804.pdf), *Zhihui Zhang, Jingwen Leng, Lingxiao Ma, Youshan Miao, Chao Li, Minyi Guo* 1. [**Combining Reinforcement Learning and Constraint Programming for Combinatorial Optimization**](https://arxiv.org/pdf/2006.01610.pdf), *Quentin Cappart, Thierry Moisan, Louis-Martin Rousseau1, Isabeau Prémont-Schwarz, and Andre Cire* 1. [**Therapeutics Data Commons: Machine Learning Datasets and Tasks for Therapeutics**](https://arxiv.org/abs/2102.09548) ([code repo](https://github.com/mims-harvard/TDC)), *Kexin Huang, Tianfan Fu, Wenhao Gao, Yue Zhao, Yusuf Roohani, Jure Leskovec, Connor W. Coley, Cao Xiao, Jimeng Sun, Marinka Zitnik* 1. [**Sparse Graph Attention Networks**](https://arxiv.org/abs/1912.00552), *Yang Ye, Shihao Ji* 1. [**On Self-Distilling Graph Neural Network**](https://arxiv.org/pdf/2011.02255.pdf), *Yuzhao Chen, Yatao Bian, Xi Xiao, Yu Rong, Tingyang Xu, Junzhou Huang* 1. [**Learning Robust Node Representations on Graphs**](https://arxiv.org/pdf/2008.11416.pdf), *Xu Chen, Ya Zhang, Ivor Tsang, and Yuangang Pan* 1. [**Recurrent Event Network: Autoregressive Structure Inference over Temporal Knowledge Graphs**](https://arxiv.org/abs/1904.05530), *Woojeong Jin, Meng Qu, Xisen Jin, Xiang Ren* 1. [**Graph Neural Ordinary Differential Equations**](https://arxiv.org/abs/1911.07532), *Michael Poli, Stefano Massaroli, Junyoung Park, Atsushi Yamashita, Hajime Asama, Jinkyoo Park* 1. [**FusedMM: A Unified SDDMM-SpMM Kernel for Graph Embedding and Graph Neural Networks**](https://arxiv.org/pdf/2011.06391.pdf), *Md. Khaledur Rahman, Majedul Haque Sujon, , Ariful Azad* 1. [**An Efficient Neighborhood-based Interaction Model for Recommendation on Heterogeneous Graph**](https://arxiv.org/pdf/2007.00216.pdf), KDD'20 *Jiarui Jin, Jiarui Qin, Yuchen Fang, Kounianhua Du, Weinan Zhang, Yong Yu, Zheng Zhang, Alexander J. Smola* 1. [**Learning Interaction Models of Structured Neighborhood on Heterogeneous Information Network**](https://arxiv.org/pdf/2011.12683.pdf), *Jiarui Jin, Kounianhua Du, Weinan Zhang, Jiarui Qin, Yuchen Fang, Yong Yu, Zheng Zhang, Alexander J. Smola* 1. [**Graphein - a Python Library for Geometric Deep Learning and Network Analysis on Protein Structures**](https://www.biorxiv.org/content/10.1101/2020.07.15.204701v1), *Arian R. Jamasb, Pietro Lió, Tom L. Blundell* 1. [**Graph Policy Gradients for Large Scale Robot Control**](https://arxiv.org/abs/1907.03822), *Arbaaz Khan, Ekaterina Tolstaya, Alejandro Ribeiro, Vijay Kumar* 1. [**Heterogeneous Molecular Graph Neural Networks for Predicting Molecule Properties**](https://arxiv.org/abs/2009.12710), *Zeren Shui, George Karypis* 1. [**Could Graph Neural Networks Learn Better Molecular Representation for Drug Discovery? A Comparison Study of Descriptor-based and Graph-based Models**](https://assets.researchsquare.com/files/rs-81439/v1_stamped.pdf), *Dejun Jiang, Zhenxing Wu, Chang-Yu Hsieh, Guangyong Chen, Ben Liao, Zhe Wang, Chao Shen, Dongsheng Cao, Jian Wu, Tingjun Hou* 1. [**Principal Neighbourhood Aggregation for Graph Nets**](https://arxiv.org/abs/2004.05718), *Gabriele Corso, Luca Cavalleri, Dominique Beaini, Pietro Liò, Petar Veličković* 1. [**Collective Multi-type Entity Alignment Between Knowledge Graphs**](https://dl.acm.org/doi/abs/10.1145/3366423.3380289), *Qi Zhu, Hao Wei, Bunyamin Sisman, Da Zheng, Christos Faloutsos, Xin Luna Dong, Jiawei Han* 1. [**Graph Representation Forecasting of Patient's Medical Conditions: towards A Digital Twin**](https://arxiv.org/abs/2009.08299), *Pietro Barbiero, Ramon Viñas Torné, Pietro Lió* 1. [**Relational Graph Learning on Visual and Kinematics Embeddings for Accurate Gesture Recognition in Robotic Surgery**](https://arxiv.org/abs/2011.01619), *Yong-Hao Long, Jie-Ying Wu, Bo Lu, Yue-Ming Jin, Mathias Unberath, Yun-Hui Liu, Pheng-Ann Heng and Qi Dou* 1. [**Dark Reciprocal-Rank: Boosting Graph-Convolutional Self-Localization Network via Teacher-to-student Knowledge Transfer**](https://arxiv.org/abs/2011.00402), *Takeda Koji, Tanaka Kanji* 1. [**Graph InfoClust: Leveraging Cluster-Level Node Information For Unsupervised Graph Representation Learning**](https://arxiv.org/abs/2009.06946), *Costas Mavromatis, George Karypis* 1. [**GraphSeam: Supervised Graph Learning Framework for Semantic UV Mapping**](https://arxiv.org/abs/2011.13748), *Fatemeh Teimury, Bruno Roy, Juan Sebastian Casallas, David macdonald, Mark Coates* 1. [**Comprehensive Study on Molecular Supervised Learning with Graph Neural Networks**](https://pubs.acs.org/doi/10.1021/acs.jcim.0c00416), *Doyeong Hwang, Soojung Yang, Yongchan Kwon, Kyung Hoon Lee, Grace Lee, Hanseok Jo, Seyeol Yoon, and Seongok Ryu* 1. [**A graph auto-encoder model for miRNA-disease associations prediction**](https://academic.oup.com/bib/advance-article-abstract/doi/10.1093/bib/bbaa240/5929824?redirectedFrom=fulltext), *Zhengwei Li, Jiashu Li, Ru Nie, Zhu-Hong You, Wenzheng Bao* 1. [**Graph convolutional regression of cardiac depolarization from sparse endocardial maps**](https://arxiv.org/abs/2009.14068), STACOM 2020 workshop, *Felix Meister, Tiziano Passerini, Chloé Audigier, Èric Lluch, Viorel Mihalef, Hiroshi Ashikaga, Andreas Maier, Henry Halperin, Tommaso Mansi* 1. [**AttnIO: Knowledge Graph Exploration with In-and-Out Attention Flow for Knowledge-Grounded Dialogue**](https://www.aclweb.org/anthology/2020.emnlp-main.280/), EMNLP'20, *Jaehun Jung, Bokyung Son, Sungwon Lyu* 1. [**Learning from Non-Binary Constituency Trees via Tensor Decomposition**](https://github.com/danielecastellana22/tensor-tree-nn), COLING'20, *Daniele Castellana, Davide Bacciu* 1. [**Inducing Alignment Structure with Gated Graph Attention Networks for Sentence Matching**](https://arxiv.org/abs/2010.07668), *Peng Cui, Le Hu, Yuanchao Liu* 1. [**Enhancing Extractive Text Summarization with Topic-Aware Graph Neural Networks**](https://arxiv.org/abs/2010.06253), COLING'20, *Peng Cui, Le Hu, Yuanchao Liu* 1. [**Double Graph Based Reasoning for Document-level Relation Extraction**](https://arxiv.org/abs/2009.13752), EMNLP'20, *Shuang Zeng, Runxin Xu, Baobao Chang, Lei Li* 1. [**Systematic Generalization on gSCAN with Language Conditioned Embedding**](https://arxiv.org/abs/2009.05552), AACL-IJCNLP'20, *Tong Gao, Qi Huang, Raymond J. Mooney* 1. [**Automatic selection of clustering algorithms using supervised graph embedding**](https://arxiv.org/pdf/2011.08225.pdf), *Noy Cohen-Shapira, Lior Rokach* 1. [**Improving Learning to Branch via Reinforcement Learning**](https://openreview.net/forum?id=z4D7-PTxTb), *Haoran Sun, Wenbo Chen, Hui Li, Le Song* 1. [**A Practical Guide to Graph Neural Networks**](https://arxiv.org/pdf/2010.05234.pdf), *Isaac Ronald Ward, Jack Joyner, Casey Lickfold, Stash Rowe, Yulan Guo, Mohammed Bennamoun*, [code](https://github.com/isolabs/gnn-tutorial) 1. [**APAN: Asynchronous Propagation Attention Network for Real-time Temporal Graph Embedding**](https://arxiv.org/pdf/2011.11545.pdf), SIGMOD'21, *Xuhong Wang, Ding Lyu, Mengjian Li, Yang Xia, Qi Yang, Xinwen Wang, Xinguang Wang, Ping Cui, Yupu Yang, Bowen Sun, Zhenyu Guo, Junkui Li* 1. [**Uncertainty-Matching Graph Neural Networks to Defend Against Poisoning Attacks**](https://arxiv.org/pdf/2009.14455.pdf), *Uday Shankar Shanthamallu, Jayaraman J. Thiagarajan, Andreas Spanias* 1. [**Computing Graph Neural Networks: A Survey from Algorithms to Accelerators**](https://arxiv.org/pdf/2010.00130.pdf), *Sergi Abadal, Akshay Jain, Robert Guirado, Jorge López-Alonso, Eduard Alarcón* 1. [**NHK_STRL at WNUT-2020 Task 2: GATs with Syntactic Dependencies as Edges and CTC-based Loss for Text Classification**](https://www.aclweb.org/anthology/2020.wnut-1.43.pdf), *Yuki Yasuda, Taichi Ishiwatari, Taro Miyazaki, Jun Goto* 1. [**Relation-aware Graph Attention Networks with Relational Position Encodings for Emotion Recognition in Conversations**](https://www.aclweb.org/anthology/2020.emnlp-main.597.pdf), *Taichi Ishiwatari, Yuki Yasuda, Taro Miyazaki, Jun Goto* 1. [**PGM-Explainer: Probabilistic Graphical Model Explanations for Graph Neural Networks**](https://proceedings.neurips.cc/paper/2020/file/8fb134f258b1f7865a6ab2d935a897c9-Paper.pdf), *Minh N. Vu, My T. Thai* 1. [**A Generalization of Transformer Networks to Graphs**](https://arxiv.org/pdf/2012.09699.pdf), *Vijay Prakash Dwivedi, Xavier Bresson* 1. [**Discourse-Aware Neural Extractive Text Summarization**](https://www.aclweb.org/anthology/2020.acl-main.451.pdf), ACL'20, *Jiacheng Xu, Zhe Gan, Yu Cheng, Jingjing Liu* 1. [**Learning Robust Node Representations on Graphs**](https://arxiv.org/abs/2008.11416), *Xu Chen, Ya Zhang, Ivor Tsang, Yuangang Pan* 1. [**Adaptive Graph Diffusion Networks with Hop-wise Attention**](https://arxiv.org/abs/2012.15024), *Chuxiong Sun, Guoshi Wu* 1. [**The Photoswitch Dataset: A Molecular Machine Learning Benchmark for the Advancement of Synthetic Chemistry**](https://arxiv.org/abs/2008.03226), *Aditya R. Thawani, Ryan-Rhys Griffiths, Arian Jamasb, Anthony Bourached, Penelope Jones, William McCorkindale, Alexander A. Aldrick, Alpha A. Lee* 1. [**A community-powered search of machine learning strategy space to find NMR property prediction models**](https://arxiv.org/abs/2008.05994), *Lars A. Bratholm, Will Gerrard, Brandon Anderson, Shaojie Bai, Sunghwan Choi, Lam Dang, Pavel Hanchar, Addison Howard, Guillaume Huard, Sanghoon Kim, Zico Kolter, Risi Kondor, Mordechai Kornbluth, Youhan Lee, Youngsoo Lee, Jonathan P. Mailoa, Thanh Tu Nguyen, Milos Popovic, Goran Rakocevic, Walter Reade, Wonho Song, Luka Stojanovic, Erik H. Thiede, Nebojsa Tijanic, Andres Torrubia, Devin Willmott, Craig P. Butts, David R. Glowacki, Kaggle participants* 1. [**Adaptive Layout Decomposition with Graph Embedding Neural Networks**](http://www.cse.cuhk.edu.hk/~byu/papers/C98-DAC2020-MPL-Selector.pdf), *Wei Li, Jialu Xia, Yuzhe Ma, Jialu Li, Yibo Lin, Bei Yu*, DAC'20 1. [**Transfer Learning with Graph Neural Networks for Optoelectronic Properties of Conjugated Oligomers**](https://aip.scitation.org/doi/10.1063/5.0037863), J. Chem. Phys. 154, *Chee-Kong Lee, Chengqiang Lu, Yue Yu, Qiming Sun, Chang-Yu Hsieh, Shengyu Zhang, Qi Liu, and Liang Shi* 1. [**Jet tagging in the Lund plane with graph networks**](https://link.springer.com/article/10.1007/JHEP03(2021)052), Journal of High Energy Physics 2021, *Frédéric A. Dreyer and Huilin Qu* 1. [**Global Attention Improves Graph Networks Generalization**](https://arxiv.org/abs/2006.07846), *Omri Puny, Heli Ben-Hamu, and Yaron Lipman* 1. [**Learning over Families of Sets -- Hypergraph Representation Learning for Higher Order Tasks**](https://arxiv.org/abs/2101.07773), SDM 2021, *Balasubramaniam Srinivasan, Da Zheng, and George Karypis* 1. [**SSFG: Stochastically Scaling Features and Gradients for Regularizing Graph Convolution Networks**](https://arxiv.org/abs/2102.10338), *Haimin Zhang, Min Xu* 1. [**Application and evaluation of knowledge graph embeddings in biomedical data**](https://peerj.com/articles/cs-341/), PeerJ Computer Science 7:e341, *Mona Alshahrani​, Maha A. Thafar, Magbubah Essack* 1. [**MoTSE: an interpretable task similarity estimator for small molecular property prediction tasks**](https://www.biorxiv.org/content/10.1101/2021.01.13.426608v2), bioRxiv 2021.01.13.426608, *Han Li, Xinyi Zhao, Shuya Li, Fangping Wan, Dan Zhao, Jianyang Zeng* 1. [**Reinforcement Learning For Data Poisoning on Graph Neural Networks**](https://arxiv.org/abs/2102.06800), *Jacob Dineen, A S M Ahsan-Ul Haque, Matthew Bielskas* 1. [**Generalising Recursive Neural Models by Tensor Decomposition**](https://github.com/danielecastellana22/tensor-tree-nn), IJCNN'20, *Daniele Castellana, Davide Bacciu* 1. [**Tensor Decompositions in Recursive Neural Networks for Tree-Structured Data**](https://github.com/danielecastellana22/tensor-tree-nn), ESANN'20, *Daniele Castellana, Davide Bacciu* 1. [**Combining Self-Organizing and Graph Neural Networks for Modeling Deformable Objects in Robotic Manipulation**](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7806087/), Frotiers in Robotics and AI, *Valencia, Angel J., and Pierre Payeur* 1. [**Joint stroke classification and text line grouping in online handwritten documents with edge pooling attention networks**](https://www.sciencedirect.com/science/article/abs/pii/S0031320321000467), Pattern Recognition, *Jun-Yu Ye, Yan-Ming Zhang, Qing Yang, Cheng-Lin Liu* 1. [**Toward Accurate Predictions of Atomic Properties via Quantum Mechanics Descriptors Augmented Graph Convolutional Neural Network: Application of This Novel Approach in NMR Chemical Shifts Predictions**](https://pubs.acs.org/doi/full/10.1021/acs.jpclett.0c02654), The Journal of Physical Chemistry Letters, *Peng Gao, Jie Zhang, Yuzhu Sun, and Jianguo Yu* 1. [**A Graph Neural Network to Model User Comfort in Robot Navigation**](https://arxiv.org/abs/2102.08863), *Pilar Bachiller, Daniel Rodriguez-Criado, Ronit R. Jorvekar, Pablo Bustos, Diego R. Faria, Luis J. Manso* 1. [**Medical Entity Disambiguation Using Graph Neural Networks**](https://arxiv.org/abs/2104.01488), *Alina Vretinaris, Chuan Lei, Vasilis Efthymiou, Xiao Qin, Fatma Özcan* 1. [**Chemistry-informed Macromolecule Graph Representation for Similarity Computation and Supervised Learning**](https://arxiv.org/abs/2103.02565), *Somesh Mohapatra, Joyce An, Rafael Gómez-Bombarelli* 1. [**Characterizing and Forecasting User Engagement with In-app Action Graph: A Case Study of Snapchat**](https://arxiv.org/pdf/1906.00355.pdf), *Yozen Liu, Xiaolin Shi, Lucas Pierce, Xiang Ren* 1. [**GIPA: General Information Propagation Algorithm for Graph Learning**](https://arxiv.org/abs/2105.06035), *Qinkai Zheng, Houyi Li, Peng Zhang, Zhixiong Yang, Guowei Zhang, Xintan Zeng, Yongchao Liu* 1. [**Graph Ensemble Learning over Multiple Dependency Trees for Aspect-level Sentiment Classification**](https://arxiv.org/abs/2103.11794), NAACL'21, *Xiaochen Hou, Peng Qi, Guangtao Wang, Rex Ying, Jing Huang, Xiaodong He, Bowen Zhou* 1. [**Enhancing Scientific Papers Summarization with Citation Graph**](https://arxiv.org/abs/2104.03057), AAAI'21, *Chenxin An, Ming Zhong, Yiran Chen, Danqing Wang, Xipeng Qiu, Xuanjing Huang* 1. [**Improving Graph Representation Learning by Contrastive Regularization**](https://arxiv.org/pdf/2101.11525.pdf), *Kaili Ma, Haochen Yang, Han Yang, Tatiana Jin, Pengfei Chen, Yongqiang Chen, Barakeel Fanseu Kamhoua, James Cheng* 1. [**Extract the Knowledge of Graph Neural Networks and Go Beyond it: An Effective Knowledge Distillation Framework**](https://arxiv.org/pdf/2103.02885.pdf), WWW'21, *Cheng Yang, Jiawei Liu, Chuan Shi* 1. [**VIKING: Adversarial Attack on Network Embeddings via Supervised Network Poisoning**](https://arxiv.org/pdf/2102.07164.pdf), PAKDD'21, *Viresh Gupta, Tanmoy Chakraborty* 1. [**Knowledge Graph Embedding using Graph Convolutional Networks with Relation-Aware Attention**](https://arxiv.org/pdf/2102.07200.pdf), *Nasrullah Sheikh, Xiao Qin, Berthold Reinwald, Christoph Miksovic, Thomas Gschwind, Paolo Scotton* 1. [**SLAPS: Self-Supervision Improves Structure Learning for Graph Neural Networks**](https://arxiv.org/pdf/2102.05034.pdf), *Bahare Fatemi, Layla El Asri, Seyed Mehran Kazemi* 1. [**Finding Needles in Heterogeneous Haystacks**](https://homepage.divms.uiowa.edu/~badhikari/assets/doc/papers/CONGCNIAAI2021.pdf), AAAI'21, *Bijaya Adhikari, Liangyue Li, Nikhil Rao, Karthik Subbian* 1. [**RetCL: A Selection-based Approach for Retrosynthesis via Contrastive Learning**](https://arxiv.org/abs/2105.00795), IJCAI 2021, *Hankook Lee, Sungsoo Ahn, Seung-Woo Seo, You Young Song, Eunho Yang, Sung-Ju Hwang, Jinwoo Shin* 1. [**Accurate Prediction of Free Solvation Energy of Organic Molecules via Graph Attention Network and Message Passing Neural Network from Pairwise Atomistic Interactions**](https://arxiv.org/abs/2105.02048), *Ramin Ansari, Amirata Ghorbani* 1. [**DIPS-Plus: The Enhanced Database of Interacting Protein Structures for Interface Prediction**](https://arxiv.org/abs/2106.04362), *Alex Morehead, Chen Chen, Ada Sedova, Jianlin Cheng* 1. [**Coreference-Aware Dialogue Summarization**](https://arxiv.org/abs/2106.08556), SIGDIAL'21, *Zhengyuan Liu, Ke Shi, Nancy F. Chen* 1. [**Document Structure aware Relational Graph Convolutional Networks for Ontology Population**](https://arxiv.org/abs/2104.12950), arXiv, *Abhay M Shalghar, Ayush Kumar, Balaji Ganesan, Aswin Kannan, Shobha G* 1. [**Covid-19 Detection from Chest X-ray and Patient Metadata using Graph Convolutional Neural Networks**](https://arxiv.org/abs/2105.09720), *Thosini Bamunu Mudiyanselage, Nipuna Senanayake, Chunyan Ji, Yi Pan, Yanqing Zhang* 1. [**Rossmann-toolbox: a deep learning-based protocol for the prediction and design of cofactor specificity in Rossmann fold proteins**](https://academic.oup.com/bib/advance-article/doi/10.1093/bib/bbab371/6375059), Briefings in Bioinformatics, *Kamil Kaminski, Jan Ludwiczak, Maciej Jasinski, Adriana Bukala, Rafal Madaj, Krzysztof Szczepaniak, Stanislaw Dunin-Horkawicz* 1. [**LGESQL: Line Graph Enhanced Text-to-SQL Model with Mixed Local and Non-Local Relations**](https://arxiv.org/pdf/2106.01093.pdf), ACL'21, *Ruisheng Cao, Lu Chen, Zhi Chen, Yanbin Zhao, Su Zhu, Kai Yu* 1. [**Enhancing Graph Neural Networks via auxiliary training for semi-supervised node classification**](https://www.sciencedirect.com/science/article/pii/S0950705121001477), Knowledge-Based System'21, *Yao Wu, Yu Song, Hong Huang, Fanghua Ye, Xing Xie, Hai Jin* 1. [**Modeling Graph Node Correlations with Neighbor Mixture Models**](https://arxiv.org/pdf/2103.15966.pdf), *Linfeng Liu, Michael C. Hughes, Li-Ping Liu* 1. [**COMBINING PHYSICS AND MACHINE LEARNING FOR NETWORK FLOW ESTIMATION**](https://openreview.net/pdf/9dc2744a465941220de07cf308acf822ec8aaa64.pdf), ICLR'21, *Arlei Silva, Furkan Kocayusufoglu, Saber Jafarpour, Francesco Bullo, Ananthram Swami, Ambuj Singh* 1. [**A Classification Method for Academic Resources Based on a Graph Attention Network**](https://www.mdpi.com/1999-5903/13/3/64/htm), Future Internet'21, *Jie Yu, Yaliu Li, Chenle Pan and Junwei Wang* 1. [**Large Graph Convolutional Network Training with GPU-Oriented Data Communication Architecture**](https://arxiv.org/abs/2103.03330), *Seung Won Min, Kun Wu, Sitao Huang, Mert Hidayetoğlu, Jinjun Xiong, Eiman Ebrahimi, Deming Chen, Wen-mei Hwu* 1. [**Graph Attention Multi-Layer Perception**](https://github.com/PKU-DAIR/GAMLP/blob/main/GAMLP.pdf), *Wentao Zhang, Ziqi Yin, Zeang Sheng, Wen Ouyang, Xiaosen Li, Yangyu Tao, Zhi Yang, Bin Cui* 1. [**GNNLens: A Visual Analytics Approach for Prediction Error Diagnosis of Graph Neural Networks**](https://arxiv.org/abs/2011.11048v5), *Zhihua Jin, Yong Wang, Qianwen Wang, Yao Ming, Tengfei Ma, Huamin Qu* 1. [**How Attentive are Graph Attention Networks?**](https://arxiv.org/pdf/2105.14491.pdf), *Shaked Brody, Uri Alon, Eran Yahav*, [code](https://github.com/tech-srl/how_attentive_are_gats) 1. [**SCENE: Reasoning about Traffic Scenes using Heterogeneous Graph Neural Networks**](https://arxiv.org/pdf/2301.03512.pdf), *Thomas Monninger\*, Julian Schmidt\*, Jan Rupprecht, David Raba, Julian Jordan, Daniel Frank, Steffen Staab, Klaus Dietmayer*, [code](https://github.com/schmidt-ju/scene), \*co-first authors
## Contributing Please let us know if you encounter a bug or have any suggestions by [filing an issue](https://github.com/dmlc/dgl/issues). We welcome all contributions from bug fixes to new features and extensions. We expect all contributions discussed in the issue tracker and going through PRs. Please refer to our [contribution guide](https://docs.dgl.ai/contribute.html). ## Cite If you use DGL in a scientific publication, we would appreciate citations to the following paper: ``` @article{wang2019dgl, title={Deep Graph Library: A Graph-Centric, Highly-Performant Package for Graph Neural Networks}, author={Minjie Wang and Da Zheng and Zihao Ye and Quan Gan and Mufei Li and Xiang Song and Jinjing Zhou and Chao Ma and Lingfan Yu and Yu Gai and Tianjun Xiao and Tong He and George Karypis and Jinyang Li and Zheng Zhang}, year={2019}, journal={arXiv preprint arXiv:1909.01315} } ``` ## The Team DGL is developed and maintained by [NYU, NYU Shanghai, AWS Shanghai AI Lab, and AWS MXNet Science Team](https://www.dgl.ai/pages/about.html). ## License DGL uses Apache License 2.0. ================================================ FILE: apps/life_sci/README.md ================================================ # DGL-LifeSci DGL-LifeSci is moved [here](https://github.com/awslabs/dgl-lifesci). ================================================ FILE: benchmarks/.gitignore ================================================ html results ================================================ FILE: benchmarks/Jenkinsfile ================================================ pipeline { triggers { issueCommentTrigger('@dgl-bot .*') } agent { docker { label 'linux-benchmark-node' image 'dgllib/dgl-ci-lint' alwaysPull true } } stages { stage('Regression Test') { steps { checkout scm script { def commentTriggerCause = currentBuild.getBuildCauses('org.jenkinsci.plugins.pipeline.github.trigger.IssueCommentCause') def prOpenTriggerCause = currentBuild.getBuildCauses('jenkins.branch.BranchEventCause') def realTriggerCause = currentBuild.getBuildCauses() echo("BUILD CAUSE: ${realTriggerCause.toString()}") if (commentTriggerCause) { dir('benchmark_scripts_repo') { checkout([$class: 'GitSCM', branches: [[name: '*/master']], userRemoteConfigs: [[credentialsId: 'github', url: 'https://github.com/dglai/DGL_scripts.git']]]) } sh('cp benchmark_scripts_repo/benchmark/* benchmarks/scripts/') def comment = env.GITHUB_COMMENT def author = env.GITHUB_COMMENT_AUTHOR def authorized_user = ['VoVAllen', 'BarclayII', 'jermainewang', 'zheng-da', 'mufeili'] def isauthorized = author in authorized_user def command_lists = comment.split(' ') def instance_type = command_lists[2].replace('.', "") if (!isauthorized) { error("Not authorized to launch regression tests") } if (command_lists.size() != 5) { pullRequest.comment('Cannot run the regression test due to unknown command') error('Unknown command') } else { pullRequest.comment("Start the Regression test. View at ${RUN_DISPLAY_URL}") } dir('benchmarks/scripts') { sh('python3 -m pip install boto3') sh("PYTHONUNBUFFERED=1 GIT_URL=${env.GIT_URL} GIT_BRANCH=${env.CHANGE_BRANCH} python3 run_reg_test.py --data-folder ${env.GIT_COMMIT}_${instance_type} --run-cmd '${comment}'") } pullRequest.comment("Finished the Regression test. Result table is at https://dgl-asv-data.s3-us-west-2.amazonaws.com/${env.GIT_COMMIT}_${instance_type}/results/result.csv. Jenkins job link is ${RUN_DISPLAY_URL}. ") } else { // if (prOpenTriggerCause) { // if (env.BUILD_ID == "1") { // pullRequest.comment('To trigger regression tests: \n - `@dgl-bot run [instance-type] [which tests] [compare-with-branch]`; \n For example: `@dgl-bot run g4dn.4xlarge all dmlc/master` or `@dgl-bot run c5.9xlarge kernel,api dmlc/master`') // } // } echo('Build was not started by a trigger') } // echo("Comment: ${commentTriggerCause.getComment()}") } } post { failure { echo '========Regression execution failed========' } } } } } ================================================ FILE: benchmarks/README.md ================================================ DGL Benchmarks ==== Benchmarking DGL with Airspeed Velocity. Usage --- Before beginning, ensure that airspeed velocity is installed: ```bash pip install asv ``` To run all benchmarks locally, build the project first and then run: ```bash asv run -n -e --python=same --verbose ``` **Due to ASV's restriction, `--python=same` will not write any benchmark results to disk. It does not support specifying branches and commits either. They are only available under ASV's managed environment.** To change the device for benchmarking, set the `DGL_BENCH_DEVICE` environment variable. Allowed values are `"cpu"` or `"gpu"`. ```bash export DGL_BENCH_DEVICE=gpu ``` To select which benchmark to run, use the `--bench` flag. For example, ```bash asv run -n -e --python=same --verbose --bench model_acc.bench_gat ``` Note that OGB dataset need to be download manually to `/tmp/dataset` folder (i.e. `/tmp/dataset/ogbn-products/`) beforehand. You can do it by runnnig the code below in this folder ```python from benchmarks.utils import get_ogb_graph get_ogb_graph("ogbn-product") ``` Run in docker locally --- DGL runs all benchmarks automatically in docker container. To run bencmarks in docker locally, * Git commit your locally changes. No need to push to remote repository. * To compare commits from different branches. Change the `"branches"` list in `asv.conf.json`. The default is `"HEAD"` which is the last commit of the current branch. For example, to compare your proposed changes with the master branch, set it to be `["HEAD", "master"]`. If your workspace is a forked repository, make sure your local master has synced with the upstream. * Use the `publish.sh` script. It accepts two arguments, a name specifying the identity of the test machine and a device name. For example, ```bash bash publish.sh dev-machine gpu ``` The script will output two folders `results` and `html`. The `html` folder contains the generated static web pages. View it by: ```bash asv preview ``` Please see `publish.sh` for more information on how it works and how to modify it according to your need. Adding a new benchmark suite --- The benchmark folder is organized as follows: ``` |-- benchmarks/ |-- model_acc/ # benchmarks for model accuracy |-- bench_gcn.py |-- bench_gat.py |-- bench_sage.py ... |-- model_speed/ # benchmarks for model training speed |-- bench_gat.py |-- bench_sage.py ... ... # other types of benchmarks |-- html/ # generated html files |-- results/ # generated result files |-- asv.conf.json # asv config file |-- build_dgl_asv.sh # script for building dgl in asv |-- install_dgl_asv.sh # script for installing dgl in asv |-- publish.sh # script for running benchmarks in docker |-- README.md # this readme |-- run.sh # script for calling asv in docker |-- ... # other aux files ``` To add a new benchmark, pick a suitable benchmark type and create a python script under it. We prefer to have the prefix `bench_` in the name. Here is a toy example: ```python # bench_range.py import time from .. import utils @utils.benchmark('time') @utils.parametrize('l', [10, 100, 1000]) @utils.parametrize('u', [10, 100, 1000]) def track_time(l, u): t0 = time.time() for i in range(l, u): pass return time.time() - t0 ``` * The main entry point of each benchmark script is a `track_*` function. The function can have arbitrary arguments and must return the benchmark result. * There are two useful decorators: `utils.benchmark` and `utils.parametrize`. * `utils.benchmark` indicates the type of this benchmark. Currently supported types are: `'time'` and `'acc'`. The decorator will perform some necessary setup and finalize steps such as fixing the random seed for the `'acc'` type. * `utils.parametrize` specifies the parameters to test. Multiple parametrize decorators mean benchmarking the combination. * Check out `model_acc/bench_gcn.py` and `model_speed/bench_sage.py`. * ASV's [official guide on writing benchmarks](https://asv.readthedocs.io/en/stable/writing_benchmarks.html) is also very helpful. Tips ---- * Feed flags `-e --verbose` to `asv run` to print out stderr and more information. * When running benchmarks locally (e.g., with `--python=same`), ASV will not write results to disk so `asv publish` will not generate plots. * Try make your benchmarks compatible with all the versions being tested. * For ogbn dataset, put the dataset into /tmp/dataset/ ================================================ FILE: benchmarks/asv.conf.json ================================================ { // The version of the config file format. Do not change, unless // you know what you are doing. "version": 1, // The name of the project being benchmarked "project": "dgl", // The project's homepage "project_url": "https://www.dgl.ai", // The URL or local path of the source code repository for the // project being benchmarked "repo": "..", // The Python project's subdirectory in your repo. If missing or // the empty string, the project is assumed to be located at the root // of the repository. // "repo_subdir": "python", // Customizable commands for building, installing, and // uninstalling the project. See asv.conf.json documentation. // "build_command": [ "/bin/bash {conf_dir}/scripts/build_dgl_asv.sh" ], "install_command": [ "/bin/bash {conf_dir}/scripts/install_dgl_asv.sh" ], "uninstall_command": [ "return-code=any python -m pip uninstall -y dgl" ], // List of branches to benchmark. If not provided, defaults to "master" // (for git) or "default" (for mercurial). "branches": [ "HEAD" ], // for git // The DVCS being used. If not set, it will be automatically // determined from "repo" by looking at the protocol in the URL // (if remote), or by looking for special directories, such as // ".git" (if local). "dvcs": "git", // The tool to use to create environments. May be "conda", // "virtualenv" or other value depending on the plugins in use. // If missing or the empty string, the tool will be automatically // determined by looking for tools on the PATH environment // variable. "environment_type": "conda", // timeout in seconds for installing any dependencies in environment // defaults to 10 min "install_timeout": 600, // the base URL to show a commit for the project. // "show_commit_url": "http://github.com/owner/project/commit/", // The Pythons you'd like to test against. If not provided, defaults // to the current version of Python used to run `asv`. // "pythons": ["2.7", "3.6"], // The list of conda channel names to be searched for benchmark // dependency packages in the specified order // "conda_channels": ["conda-forge", "defaults"], // The matrix of dependencies to test. Each key is the name of a // package (in PyPI) and the values are version numbers. An empty // list or empty string indicates to just test against the default // (latest) version. null indicates that the package is to not be // installed. If the package to be tested is only available from // PyPi, and the 'environment_type' is conda, then you can preface // the package name by 'pip+', and the package will be installed via // pip (with all the conda available packages installed first, // followed by the pip installed packages). // // "matrix": { // "numpy": ["1.6", "1.7"], // "six": ["", null], // test with and without six installed // "pip+emcee": [""], // emcee is only available for install with pip. // }, // Combinations of libraries/python versions can be excluded/included // from the set to test. Each entry is a dictionary containing additional // key-value pairs to include/exclude. // // An exclude entry excludes entries where all values match. The // values are regexps that should match the whole string. // // An include entry adds an environment. Only the packages listed // are installed. The 'python' key is required. The exclude rules // do not apply to includes. // // In addition to package names, the following keys are available: // // - python // Python version, as in the *pythons* variable above. // - environment_type // Environment type, as above. // - sys_platform // Platform, as in sys.platform. Possible values for the common // cases: 'linux2', 'win32', 'cygwin', 'darwin'. // // "exclude": [ // {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows // {"environment_type": "conda", "six": null}, // don't run without six on conda // ], // // "include": [ // // additional env for python2.7 // {"python": "2.7", "numpy": "1.8"}, // // additional env if run on windows+conda // {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""}, // ], // The directory (relative to the current directory) that benchmarks are // stored in. If not provided, defaults to "benchmarks" // "benchmark_dir": "benchmarks", // The directory (relative to the current directory) to cache the Python // environments in. If not provided, defaults to "env" "env_dir": "env", // The directory (relative to the current directory) that raw benchmark // results are stored in. If not provided, defaults to "results". "results_dir": "results", // The directory (relative to the current directory) that the html tree // should be written to. If not provided, defaults to "html". "html_dir": "html", // The number of characters to retain in the commit hashes. // "hash_length": 8, // `asv` will cache results of the recent builds in each // environment, making them faster to install next time. This is // the number of builds to keep, per environment. // "build_cache_size": 2, // The commits after which the regression search in `asv publish` // should start looking for regressions. Dictionary whose keys are // regexps matching to benchmark names, and values corresponding to // the commit (exclusive) after which to start looking for // regressions. The default is to start from the first commit // with results. If the commit is `null`, regression detection is // skipped for the matching benchmark. // // "regressions_first_commits": { // "some_benchmark": "352cdf", // Consider regressions only after this commit // "another_benchmark": null, // Skip regression detection altogether // }, // The thresholds for relative change in results, after which `asv // publish` starts reporting regressions. Dictionary of the same // form as in ``regressions_first_commits``, with values // indicating the thresholds. If multiple entries match, the // maximum is taken. If no entry matches, the default is 5%. // // "regressions_thresholds": { // "some_benchmark": 0.01, // Threshold of 1% // "another_benchmark": 0.5, // Threshold of 50% // }, } ================================================ FILE: benchmarks/benchmarks/__init__.py ================================================ ================================================ FILE: benchmarks/benchmarks/api/__init__.py ================================================ ================================================ FILE: benchmarks/benchmarks/api/bench_add_self_loop.py ================================================ import time import dgl import dgl.function as fn import numpy as np import torch from .. import utils @utils.benchmark("time") @utils.parametrize("graph_name", ["cora", "livejournal"]) @utils.parametrize("format", ["coo"]) def track_time(graph_name, format): device = utils.get_bench_device() graph = utils.get_graph(graph_name, format) graph = graph.to(device) # dry run for i in range(3): g = graph.add_self_loop() # timing with utils.Timer() as t: for i in range(3): edges = graph.add_self_loop() return t.elapsed_secs / 3 ================================================ FILE: benchmarks/benchmarks/api/bench_batch.py ================================================ import time import dgl import torch from .. import utils @utils.benchmark("time") @utils.parametrize("batch_size", [4, 32, 256, 1024]) def track_time(batch_size): device = utils.get_bench_device() ds = dgl.data.QM7bDataset() # prepare graph graphs = [] for graph in ds[0:batch_size][0]: g = graph.to(device) graphs.append(g) # dry run for i in range(10): g = dgl.batch(graphs) # timing with utils.Timer() as t: for i in range(100): g = dgl.batch(graphs) return t.elapsed_secs / 100 ================================================ FILE: benchmarks/benchmarks/api/bench_builtin_apply_edges.py ================================================ import time import dgl import dgl.function as fn import numpy as np import torch from .. import utils @utils.benchmark("time", timeout=600) @utils.parametrize("graph_name", ["cora", "ogbn-arxiv"]) @utils.parametrize("format", ["coo", "csr"]) @utils.parametrize("feat_size", [8, 128, 512]) @utils.parametrize("reduce_type", ["u->e", "u+v"]) def track_time(graph_name, format, feat_size, reduce_type): device = utils.get_bench_device() graph = utils.get_graph(graph_name, format) graph = graph.to(device) graph.ndata["h"] = torch.randn( (graph.num_nodes(), feat_size), device=device ) reduce_builtin_dict = { "u->e": fn.copy_u("h", "x"), "u+v": fn.u_add_v("h", "h", "x"), } # dry run for i in range(3): graph.apply_edges(reduce_builtin_dict[reduce_type]) # timing with utils.Timer() as t: for i in range(10): graph.apply_edges(reduce_builtin_dict[reduce_type]) return t.elapsed_secs / 10 ================================================ FILE: benchmarks/benchmarks/api/bench_builtin_apply_edges_hetero.py ================================================ import time import dgl import dgl.function as fn import numpy as np import torch from .. import utils @utils.benchmark("time", timeout=600) @utils.parametrize("num_relations", [5, 50, 500]) @utils.parametrize("format", ["coo", "csr"]) @utils.parametrize("feat_size", [8, 128, 512]) @utils.parametrize("reduce_type", ["u->e"]) # , 'e->u']) def track_time(num_relations, format, feat_size, reduce_type): device = utils.get_bench_device() dd = {} candidate_edges = [ dgl.data.CoraGraphDataset(verbose=False)[0].edges(), dgl.data.PubmedGraphDataset(verbose=False)[0].edges(), dgl.data.CiteseerGraphDataset(verbose=False)[0].edges(), ] for i in range(num_relations): dd[("n1", "e_{}".format(i), "n2")] = candidate_edges[ i % len(candidate_edges) ] graph = dgl.heterograph(dd) graph = graph.to(device) graph.nodes["n1"].data["h"] = torch.randn( (graph.num_nodes("n1"), feat_size), device=device ) graph.nodes["n2"].data["h"] = torch.randn( (graph.num_nodes("n2"), feat_size), device=device ) reduce_builtin_dict = { "u->e": fn.copy_u("h", "x"), # 'e->u': fn.copy_e('h', 'x'), } # dry run for i in range(3): graph.apply_edges(reduce_builtin_dict[reduce_type]) # timing with utils.Timer() as t: for i in range(10): graph.apply_edges(reduce_builtin_dict[reduce_type]) return t.elapsed_secs / 10 ================================================ FILE: benchmarks/benchmarks/api/bench_builtin_multi_update_all.py ================================================ import time import dgl import dgl.function as fn import numpy as np import torch from .. import utils @utils.benchmark("time", timeout=600) @utils.parametrize("feat_size", [32, 128, 512]) @utils.parametrize("num_relations", [5, 50, 500]) @utils.parametrize("multi_reduce_type", ["sum", "stack"]) def track_time(feat_size, num_relations, multi_reduce_type): device = utils.get_bench_device() dd = {} candidate_edges = [ dgl.data.CoraGraphDataset(verbose=False)[0].edges(), dgl.data.PubmedGraphDataset(verbose=False)[0].edges(), dgl.data.CiteseerGraphDataset(verbose=False)[0].edges(), ] for i in range(num_relations): dd[("n1", "e_{}".format(i), "n2")] = candidate_edges[ i % len(candidate_edges) ] graph = dgl.heterograph(dd) graph = graph.to(device) graph.nodes["n1"].data["h"] = torch.randn( (graph.num_nodes("n1"), feat_size), device=device ) graph.nodes["n2"].data["h"] = torch.randn( (graph.num_nodes("n2"), feat_size), device=device ) # dry run update_dict = {} for i in range(num_relations): update_dict["e_{}".format(i)] = (fn.copy_u("h", "m"), fn.sum("m", "h")) graph.multi_update_all(update_dict, multi_reduce_type) # timing with utils.Timer() as t: for i in range(3): graph.multi_update_all(update_dict, multi_reduce_type) return t.elapsed_secs / 3 ================================================ FILE: benchmarks/benchmarks/api/bench_builtin_update_all_coo.py ================================================ import time import dgl import dgl.function as fn import numpy as np import torch from .. import utils @utils.benchmark("time", timeout=600) @utils.parametrize("graph_name", ["ogbn-arxiv"]) @utils.parametrize("format", ["coo"]) @utils.parametrize("feat_size", [4, 32, 256]) @utils.parametrize("msg_type", ["copy_u", "u_mul_e"]) @utils.parametrize("reduce_type", ["sum", "mean", "max"]) def track_time(graph_name, format, feat_size, msg_type, reduce_type): device = utils.get_bench_device() graph = utils.get_graph(graph_name, format) graph = graph.to(device) graph.ndata["h"] = torch.randn( (graph.num_nodes(), feat_size), device=device ) graph.edata["e"] = torch.randn((graph.num_edges(), 1), device=device) msg_builtin_dict = { "copy_u": fn.copy_u("h", "x"), "u_mul_e": fn.u_mul_e("h", "e", "x"), } reduce_builtin_dict = { "sum": fn.sum("x", "h_new"), "mean": fn.mean("x", "h_new"), "max": fn.max("x", "h_new"), } # dry run graph.update_all( msg_builtin_dict[msg_type], reduce_builtin_dict[reduce_type] ) # timing with utils.Timer() as t: for i in range(3): graph.update_all( msg_builtin_dict[msg_type], reduce_builtin_dict[reduce_type] ) return t.elapsed_secs / 3 ================================================ FILE: benchmarks/benchmarks/api/bench_builtin_update_all_csc.py ================================================ import time import dgl import dgl.function as fn import numpy as np import torch from .. import utils @utils.benchmark("time", timeout=600) @utils.parametrize("graph_name", ["ogbn-arxiv", "reddit", "ogbn-proteins"]) @utils.parametrize("format", ["csc"]) @utils.parametrize("feat_size", [4, 32, 256]) @utils.parametrize("msg_type", ["copy_u", "u_mul_e"]) @utils.parametrize("reduce_type", ["sum", "mean", "max"]) def track_time(graph_name, format, feat_size, msg_type, reduce_type): device = utils.get_bench_device() graph = utils.get_graph(graph_name, format) graph = graph.to(device) graph.ndata["h"] = torch.randn( (graph.num_nodes(), feat_size), device=device ) graph.edata["e"] = torch.randn((graph.num_edges(), 1), device=device) msg_builtin_dict = { "copy_u": fn.copy_u("h", "x"), "u_mul_e": fn.u_mul_e("h", "e", "x"), } reduce_builtin_dict = { "sum": fn.sum("x", "h_new"), "mean": fn.mean("x", "h_new"), "max": fn.max("x", "h_new"), } # dry run for i in range(3): graph.update_all( msg_builtin_dict[msg_type], reduce_builtin_dict[reduce_type] ) # timing with utils.Timer() as t: for i in range(10): graph.update_all( msg_builtin_dict[msg_type], reduce_builtin_dict[reduce_type] ) return t.elapsed_secs / 10 ================================================ FILE: benchmarks/benchmarks/api/bench_edge_ids.py ================================================ import time import dgl import numpy as np import torch from .. import utils # edge_ids is not supported on cuda # @utils.skip_if_gpu() @utils.benchmark("time", timeout=1200) @utils.parametrize_cpu("graph_name", ["cora", "livejournal", "friendster"]) @utils.parametrize_gpu("graph_name", ["cora", "livejournal"]) @utils.parametrize("format", ["coo", "csr", "csc"]) @utils.parametrize("fraction", [0.01, 0.1]) @utils.parametrize("return_uv", [True, False]) def track_time(graph_name, format, fraction, return_uv): device = utils.get_bench_device() graph = utils.get_graph(graph_name, format) coo_graph = utils.get_graph(graph_name, "coo") graph = graph.to(device) eids = np.random.choice( np.arange(graph.num_edges(), dtype=np.int64), int(graph.num_edges() * fraction), ) eids = torch.tensor(eids, device="cpu", dtype=torch.int64) u, v = coo_graph.find_edges(eids) del coo_graph, eids u = u.to(device) v = v.to(device) # dry run for i in range(10): out = graph.edge_ids(u[0], v[0]) # timing with utils.Timer() as t: for i in range(3): edges = graph.edge_ids(u, v, return_uv=return_uv) return t.elapsed_secs / 3 ================================================ FILE: benchmarks/benchmarks/api/bench_edge_subgraph.py ================================================ import time import dgl import dgl.function as fn import numpy as np import torch from .. import utils @utils.benchmark("time") @utils.parametrize("graph_name", ["livejournal", "reddit"]) @utils.parametrize("format", ["coo"]) @utils.parametrize("seed_egdes_num", [500, 5000, 50000]) def track_time(graph_name, format, seed_egdes_num): device = utils.get_bench_device() graph = utils.get_graph(graph_name, format) graph = graph.to(device) seed_edges = np.random.randint(0, graph.num_edges(), seed_egdes_num) seed_edges = torch.from_numpy(seed_edges).to(device) # dry run for i in range(3): dgl.edge_subgraph(graph, seed_edges) # timing num_iters = 50 with utils.Timer() as t: for i in range(num_iters): dgl.edge_subgraph(graph, seed_edges) return t.elapsed_secs / num_iters ================================================ FILE: benchmarks/benchmarks/api/bench_find_edges.py ================================================ import time import dgl import numpy as np import torch from .. import utils @utils.benchmark("time", timeout=600) @utils.parametrize_cpu("graph_name", ["cora", "livejournal", "friendster"]) @utils.parametrize_gpu("graph_name", ["cora", "livejournal"]) @utils.parametrize("format", ["coo"]) # csc is not supported @utils.parametrize("fraction", [0.01, 0.1]) def track_time(graph_name, format, fraction): device = utils.get_bench_device() graph = utils.get_graph(graph_name, format) graph = graph.to(device) eids = np.random.choice( np.arange(graph.num_edges(), dtype=np.int64), int(graph.num_edges() * fraction), ) eids = torch.tensor(eids, device=device, dtype=torch.int64) # dry run for i in range(10): out = graph.find_edges(i) out = graph.find_edges( torch.arange(i * 10, dtype=torch.int64, device=device) ) # timing with utils.Timer() as t: for i in range(10): edges = graph.find_edges(eids) return t.elapsed_secs / 10 ================================================ FILE: benchmarks/benchmarks/api/bench_format_conversion.py ================================================ import time import dgl import numpy as np import torch from .. import utils @utils.benchmark("time", timeout=600) @utils.parametrize_cpu( "graph_name", ["cora", "pubmed", "ogbn-arxiv", "livejournal", "friendster"] ) @utils.parametrize_gpu("graph_name", ["cora", "livejournal"]) @utils.parametrize( "format", [ ("coo", "csc"), ("csc", "coo"), ("coo", "csr"), ("csr", "coo"), ("csr", "csc"), ("csc", "csr"), ], ) def track_time(graph_name, format): from_format, to_format = format device = utils.get_bench_device() graph = utils.get_graph(graph_name, from_format) graph = graph.to(device) if format == ("coo", "csr") and graph_name == "friendster": # Mark graph as sorted to check performance for COO matrix marked as # sorted. Note that friendster dataset is already sorted. graph = dgl.graph(graph.edges(), row_sorted=True) graph = graph.formats([from_format]) # dry run graph.formats([to_format]) # timing with utils.Timer() as t: for i in range(10): gg = graph.formats([to_format]) return t.elapsed_secs / 10 ================================================ FILE: benchmarks/benchmarks/api/bench_fused_sample_neighbors.py ================================================ import time import dgl import dgl.function as fn import numpy as np import torch from .. import utils @utils.benchmark("time") @utils.parametrize_cpu("graph_name", ["livejournal", "reddit"]) @utils.parametrize_gpu("graph_name", ["ogbn-arxiv", "reddit"]) @utils.parametrize("format", ["csr", "csc"]) @utils.parametrize("seed_nodes_num", [200, 5000, 20000]) @utils.parametrize("fanout", [5, 20, 40]) def track_time(graph_name, format, seed_nodes_num, fanout): device = utils.get_bench_device() graph = utils.get_graph(graph_name, format).to(device) edge_dir = "in" if format == "csc" else "out" seed_nodes = np.random.randint(0, graph.num_nodes(), seed_nodes_num) seed_nodes = torch.from_numpy(seed_nodes).to(device) # dry run for i in range(3): dgl.sampling.sample_neighbors_fused( graph, seed_nodes, fanout, edge_dir=edge_dir ) # timing with utils.Timer() as t: for i in range(50): dgl.sampling.sample_neighbors_fused( graph, seed_nodes, fanout, edge_dir=edge_dir ) return t.elapsed_secs / 50 ================================================ FILE: benchmarks/benchmarks/api/bench_heterograph_construction.py ================================================ import time import dgl import dgl.function as fn import numpy as np import torch from .. import utils @utils.benchmark("time") @utils.parametrize("num_relations", [5, 50, 500]) def track_time(num_relations): dd = {} candidate_edges = [ dgl.data.CoraGraphDataset(verbose=False)[0].edges(), dgl.data.PubmedGraphDataset(verbose=False)[0].edges(), dgl.data.CiteseerGraphDataset(verbose=False)[0].edges(), ] for i in range(num_relations): dd[("n1", "e_{}".format(i), "n2")] = candidate_edges[ i % len(candidate_edges) ] # dry run graph = dgl.heterograph(dd) # timing with utils.Timer() as t: for i in range(3): graph = dgl.heterograph(dd) return t.elapsed_secs / 3 ================================================ FILE: benchmarks/benchmarks/api/bench_homograph_edge_construction.py ================================================ import time import dgl import dgl.function as fn import numpy as np import torch from .. import utils @utils.skip_if_gpu() @utils.benchmark("time") @utils.parametrize("size", ["small", "large"]) def track_time(size): edge_list = { "small": dgl.data.CiteseerGraphDataset(verbose=False)[0].edges(), "large": utils.get_livejournal().edges(), } # dry run dgl.graph(edge_list[size]) # timing with utils.Timer() as t: for i in range(10): g = dgl.graph(edge_list[size]) return t.elapsed_secs / 10 ================================================ FILE: benchmarks/benchmarks/api/bench_homograph_scipy_construction.py ================================================ import time import dgl import dgl.function as fn import numpy as np import torch from .. import utils @utils.skip_if_gpu() @utils.benchmark("time") @utils.parametrize("size", ["small", "large"]) @utils.parametrize("scipy_format", ["coo", "csr"]) def track_time(size, scipy_format): matrix_dict = { "small": dgl.data.CiteseerGraphDataset(verbose=False)[0].adj_external( scipy_fmt=scipy_format ), "large": utils.get_livejournal().adj_external(scipy_fmt=scipy_format), } # dry run dgl.from_scipy(matrix_dict[size]) # timing with utils.Timer() as t: for i in range(3): dgl.from_scipy(matrix_dict[size]) return t.elapsed_secs / 3 ================================================ FILE: benchmarks/benchmarks/api/bench_in_degrees.py ================================================ import time import dgl import numpy as np import torch from .. import utils @utils.benchmark("time", timeout=1200) @utils.parametrize_cpu("graph_name", ["cora", "livejournal", "friendster"]) @utils.parametrize_gpu("graph_name", ["cora", "livejournal"]) # in_degrees on coo is not supported on cuda @utils.parametrize_cpu("format", ["coo", "csc"]) @utils.parametrize_gpu("format", ["csc"]) @utils.parametrize("fraction", [0.01, 0.1]) def track_time(graph_name, format, fraction): device = utils.get_bench_device() graph = utils.get_graph(graph_name, format) graph = graph.to(device) nids = np.random.choice( np.arange(graph.num_nodes(), dtype=np.int64), int(graph.num_nodes() * fraction), ) nids = torch.tensor(nids, device=device, dtype=torch.int64) # dry run for i in range(10): out = graph.in_degrees(i) # timing with utils.Timer() as t: for i in range(10): edges = graph.in_degrees(nids) return t.elapsed_secs / 10 ================================================ FILE: benchmarks/benchmarks/api/bench_in_edges.py ================================================ import time import dgl import numpy as np import torch from .. import utils @utils.benchmark("time", timeout=1200) @utils.parametrize_cpu("graph_name", ["cora", "livejournal", "friendster"]) @utils.parametrize_gpu("graph_name", ["cora", "livejournal"]) # in_edges on coo is not supported on cuda @utils.parametrize_cpu("format", ["coo", "csc"]) @utils.parametrize_gpu("format", ["csc"]) @utils.parametrize("fraction", [0.01, 0.1]) def track_time(graph_name, format, fraction): device = utils.get_bench_device() graph = utils.get_graph(graph_name, format) graph = graph.to(device) nids = np.random.choice( np.arange(graph.num_nodes(), dtype=np.int64), int(graph.num_nodes() * fraction), ) nids = torch.tensor(nids, device=device, dtype=torch.int64) # dry run for i in range(10): out = graph.in_edges(i) # timing with utils.Timer() as t: for i in range(10): edges = graph.in_edges(nids) return t.elapsed_secs / 10 ================================================ FILE: benchmarks/benchmarks/api/bench_in_subgraph.py ================================================ import time import dgl import dgl.function as fn import numpy as np import torch from .. import utils @utils.benchmark("time") @utils.parametrize("graph_name", ["livejournal", "reddit"]) @utils.parametrize("format", ["csc"]) # coo is not supported @utils.parametrize("seed_nodes_num", [200, 5000, 20000]) def track_time(graph_name, format, seed_nodes_num): device = utils.get_bench_device() graph = utils.get_graph(graph_name, format) graph = graph.to(device) seed_nodes = np.random.randint(0, graph.num_nodes(), seed_nodes_num) seed_nodes = torch.from_numpy(seed_nodes).to(device) # dry run for i in range(3): dgl.in_subgraph(graph, seed_nodes) # timing num_iters = 50 with utils.Timer() as t: for i in range(num_iters): dgl.in_subgraph(graph, seed_nodes) return t.elapsed_secs / num_iters ================================================ FILE: benchmarks/benchmarks/api/bench_khop.py ================================================ import time import dgl import numpy as np import torch from .. import utils @utils.benchmark("time", timeout=60) @utils.parametrize("graph_name", ["cora"]) @utils.parametrize("format", ["coo", "csr"]) @utils.parametrize("k", [1, 3, 5]) def track_time(graph_name, format, k): device = utils.get_bench_device() graph = utils.get_graph(graph_name, format) graph = graph.to(device) graph = graph.formats([format]) # dry run dgl.khop_graph(graph, k) # timing with utils.Timer() as t: for i in range(10): gg = dgl.khop_graph(graph, k) return t.elapsed_secs / 10 ================================================ FILE: benchmarks/benchmarks/api/bench_knn_graph.py ================================================ import time import dgl import numpy as np import torch from .. import utils @utils.benchmark("time", timeout=60) @utils.parametrize("k", [8, 64]) @utils.parametrize("size", [1000, 10000]) @utils.parametrize("dim", [4, 32, 256]) @utils.parametrize_cpu( "algorithm", ["bruteforce-blas", "bruteforce", "kd-tree", "nn-descent"] ) @utils.parametrize_gpu( "algorithm", ["bruteforce-blas", "bruteforce", "bruteforce-sharemem", "nn-descent"], ) def track_time(size, dim, k, algorithm): device = utils.get_bench_device() features = np.random.RandomState(42).randn(size, dim) feat = torch.tensor(features, dtype=torch.float, device=device) # dry run for i in range(1): dgl.knn_graph(feat, k, algorithm=algorithm) # timing with utils.Timer() as t: for i in range(5): dgl.knn_graph(feat, k, algorithm=algorithm) return t.elapsed_secs / 5 ================================================ FILE: benchmarks/benchmarks/api/bench_metis_partition.py ================================================ import time import dgl import numpy as np import torch from .. import utils @utils.skip_if_gpu() @utils.benchmark("time", timeout=1200) @utils.parametrize("graph_name", ["reddit"]) @utils.parametrize("k", [2, 4, 8]) def track_time(graph_name, k): device = utils.get_bench_device() data = utils.process_data(graph_name) graph = data[0] # dry run gg = dgl.transforms.metis_partition(graph, k) # timing with utils.Timer() as t: for i in range(3): gg = dgl.transforms.metis_partition(graph, k) return t.elapsed_secs / 3 ================================================ FILE: benchmarks/benchmarks/api/bench_nn_graphconv.py ================================================ import time import dgl import dgl.function as fn import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from dgl.nn.pytorch import SAGEConv from .. import utils @utils.benchmark("time") @utils.parametrize("graph_name", ["pubmed", "ogbn-arxiv"]) @utils.parametrize("feat_dim", [4, 32, 256]) @utils.parametrize("aggr_type", ["mean", "gcn", "pool"]) def track_time(graph_name, feat_dim, aggr_type): device = utils.get_bench_device() graph = utils.get_graph(graph_name).to(device) feat = torch.randn((graph.num_nodes(), feat_dim), device=device) model = SAGEConv( feat_dim, feat_dim, aggr_type, activation=F.relu, bias=False ).to(device) # dry run for i in range(3): model(graph, feat) # timing with utils.Timer() as t: for i in range(50): model(graph, feat) return t.elapsed_secs / 50 ================================================ FILE: benchmarks/benchmarks/api/bench_nn_heterographconv.py ================================================ import time import dgl import dgl.function as fn import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from dgl.nn.pytorch import HeteroGraphConv, SAGEConv from .. import utils @utils.benchmark("time") @utils.parametrize("feat_dim", [4, 32, 256]) @utils.parametrize("num_relations", [5, 50, 200]) def track_time(feat_dim, num_relations): device = utils.get_bench_device() dd = {} nn_dict = {} candidate_edges = [ dgl.data.CoraGraphDataset(verbose=False)[0].edges(), dgl.data.PubmedGraphDataset(verbose=False)[0].edges(), dgl.data.CiteseerGraphDataset(verbose=False)[0].edges(), ] for i in range(num_relations): dd[("n1", "e_{}".format(i), "n2")] = candidate_edges[ i % len(candidate_edges) ] nn_dict["e_{}".format(i)] = SAGEConv( feat_dim, feat_dim, "mean", activation=F.relu ) # dry run feat_dict = {} graph = dgl.heterograph(dd) for i in range(num_relations): etype = "e_{}".format(i) feat_dict[etype] = torch.randn( (graph[etype].num_nodes(), feat_dim), device=device ) conv = HeteroGraphConv(nn_dict).to(device) # dry run for i in range(3): conv(graph, feat_dict) # timing with utils.Timer() as t: for i in range(50): conv(graph, feat_dict) return t.elapsed_secs / 50 ================================================ FILE: benchmarks/benchmarks/api/bench_node_subgraph.py ================================================ import time import dgl import dgl.function as fn import numpy as np import torch from .. import utils @utils.benchmark("time") @utils.parametrize("graph_name", ["livejournal", "reddit"]) @utils.parametrize("format", ["coo", "csc"]) @utils.parametrize("seed_nodes_num", [200, 5000, 20000]) def track_time(graph_name, format, seed_nodes_num): device = utils.get_bench_device() graph = utils.get_graph(graph_name, format) graph = graph.to(device) seed_nodes = np.random.randint(0, graph.num_nodes(), seed_nodes_num) seed_nodes = torch.from_numpy(seed_nodes).to(device) # dry run for i in range(3): dgl.node_subgraph(graph, seed_nodes) # timing num_iters = 50 with utils.Timer() as t: for i in range(num_iters): dgl.node_subgraph(graph, seed_nodes) return t.elapsed_secs / num_iters ================================================ FILE: benchmarks/benchmarks/api/bench_random_walk.py ================================================ import time import dgl import torch from .. import utils def _random_walk(g, seeds, length): return dgl.sampling.random_walk(g, seeds, length=length) def _node2vec(g, seeds, length): return dgl.sampling.node2vec_random_walk(g, seeds, 1, 1, length) @utils.skip_if_gpu() @utils.benchmark("time") @utils.parametrize("graph_name", ["cora", "livejournal", "friendster"]) @utils.parametrize("num_seeds", [10, 100, 1000]) @utils.parametrize("length", [2, 5, 10, 20]) @utils.parametrize("algorithm", ["_random_walk", "_node2vec"]) def track_time(graph_name, num_seeds, length, algorithm): device = utils.get_bench_device() graph = utils.get_graph(graph_name, "csr") seeds = torch.randint(0, graph.num_nodes(), (num_seeds,)) print(graph_name, num_seeds, length) alg = globals()[algorithm] # dry run for i in range(5): _ = alg(graph, seeds, length=length) # timing with utils.Timer() as t: for i in range(50): _ = alg(graph, seeds, length=length) return t.elapsed_secs / 50 ================================================ FILE: benchmarks/benchmarks/api/bench_readout.py ================================================ import time import dgl import torch from .. import utils @utils.benchmark("time") @utils.parametrize("batch_size", [4, 256, 1024]) @utils.parametrize("feat_size", [16, 128, 512]) @utils.parametrize("readout_op", ["sum", "max", "min", "mean"]) @utils.parametrize("type", ["edge", "node"]) def track_time(batch_size, feat_size, readout_op, type): device = utils.get_bench_device() ds = dgl.data.QM7bDataset() # prepare graph graphs = ds[0:batch_size][0] g = dgl.batch(graphs).to(device) if type == "node": g.ndata["h"] = torch.randn((g.num_nodes(), feat_size), device=device) for i in range(10): out = dgl.readout_nodes(g, "h", op=readout_op) with utils.Timer() as t: for i in range(50): out = dgl.readout_nodes(g, "h", op=readout_op) elif type == "edge": g.edata["h"] = torch.randn((g.num_edges(), feat_size), device=device) for i in range(10): out = dgl.readout_edges(g, "h", op=readout_op) with utils.Timer() as t: for i in range(50): out = dgl.readout_edges(g, "h", op=readout_op) else: raise Exception("Unknown type") return t.elapsed_secs / 50 ================================================ FILE: benchmarks/benchmarks/api/bench_reverse.py ================================================ import time import dgl import numpy as np import torch from .. import utils @utils.benchmark("time", timeout=1200) @utils.parametrize_cpu("graph_name", ["cora", "livejournal", "friendster"]) @utils.parametrize_gpu("graph_name", ["cora", "livejournal"]) @utils.parametrize("format", ["coo", "csc", "csr"]) def track_time(graph_name, format): device = utils.get_bench_device() graph = utils.get_graph(graph_name, format) graph = graph.to(device) graph = graph.formats([format]) # dry run dgl.reverse(graph) # timing with utils.Timer() as t: for i in range(100): gg = dgl.reverse(graph) return t.elapsed_secs / 100 ================================================ FILE: benchmarks/benchmarks/api/bench_sample_neighbors.py ================================================ import time import dgl import dgl.function as fn import numpy as np import torch from .. import utils @utils.benchmark("time") @utils.parametrize_cpu("graph_name", ["livejournal", "reddit"]) @utils.parametrize_gpu("graph_name", ["ogbn-arxiv", "reddit"]) @utils.parametrize("format", ["coo", "csc"]) @utils.parametrize("seed_nodes_num", [200, 5000, 20000]) @utils.parametrize("fanout", [5, 20, 40]) def track_time(graph_name, format, seed_nodes_num, fanout): device = utils.get_bench_device() graph = utils.get_graph(graph_name, format).to(device) edge_dir = "in" seed_nodes = np.random.randint(0, graph.num_nodes(), seed_nodes_num) seed_nodes = torch.from_numpy(seed_nodes).to(device) # dry run for i in range(3): dgl.sampling.sample_neighbors( graph, seed_nodes, fanout, edge_dir=edge_dir ) # timing with utils.Timer() as t: for i in range(50): dgl.sampling.sample_neighbors( graph, seed_nodes, fanout, edge_dir=edge_dir ) return t.elapsed_secs / 50 ================================================ FILE: benchmarks/benchmarks/api/bench_to_block.py ================================================ import time import dgl import numpy as np import torch from .. import utils @utils.skip_if_gpu() @utils.benchmark("time", timeout=1200) @utils.parametrize("graph_name", ["reddit", "ogbn-products"]) @utils.parametrize("num_seed_nodes", [32, 256, 1024, 2048]) @utils.parametrize("fanout", [5, 10, 20]) def track_time(graph_name, num_seed_nodes, fanout): device = utils.get_bench_device() data = utils.process_data(graph_name) graph = data[0] # dry run dgl.sampling.sample_neighbors(graph, [1, 2, 3], fanout) subg_list = [] for i in range(10): seed_nodes = np.random.randint( 0, graph.num_nodes(), size=num_seed_nodes ) subg = dgl.sampling.sample_neighbors(graph, seed_nodes, fanout) subg_list.append(subg) # timing with utils.Timer() as t: for i in range(10): gg = dgl.to_block(subg_list[i]) return t.elapsed_secs / 10 ================================================ FILE: benchmarks/benchmarks/api/bench_udf_apply_edges.py ================================================ import time import dgl import dgl.function as fn import numpy as np import torch from .. import utils @utils.benchmark("time", timeout=7200) @utils.parametrize("graph_name", ["ogbn-arxiv", "pubmed"]) @utils.parametrize("format", ["coo"]) # only coo supports udf @utils.parametrize("feat_size", [8, 32, 128, 512]) @utils.parametrize("reduce_type", ["u->e", "u+v"]) def track_time(graph_name, format, feat_size, reduce_type): device = utils.get_bench_device() graph = utils.get_graph(graph_name, format) graph = graph.to(device) graph.ndata["h"] = torch.randn( (graph.num_nodes(), feat_size), device=device ) reduce_udf_dict = { "u->e": lambda edges: {"x": edges.src["h"]}, "u+v": lambda edges: {"x": edges.src["h"] + edges.dst["h"]}, } # dry run graph.apply_edges(reduce_udf_dict[reduce_type]) # timing with utils.Timer() as t: for i in range(3): graph.apply_edges(reduce_udf_dict[reduce_type]) return t.elapsed_secs / 3 ================================================ FILE: benchmarks/benchmarks/api/bench_udf_multi_update_all.py ================================================ import time import dgl import dgl.function as fn import numpy as np import torch from .. import utils @utils.benchmark("time", timeout=600) @utils.parametrize("feat_size", [32, 128, 512]) @utils.parametrize("num_relations", [5, 50, 500]) @utils.parametrize("multi_reduce_type", ["sum", "stack"]) def track_time(feat_size, num_relations, multi_reduce_type): device = utils.get_bench_device() dd = {} candidate_edges = [ dgl.data.CoraGraphDataset(verbose=False)[0].edges(), dgl.data.PubmedGraphDataset(verbose=False)[0].edges(), dgl.data.CiteseerGraphDataset(verbose=False)[0].edges(), ] for i in range(num_relations): dd[("n1", "e_{}".format(i), "n2")] = candidate_edges[ i % len(candidate_edges) ] graph = dgl.heterograph(dd) graph = graph.to(device) graph.nodes["n1"].data["h"] = torch.randn( (graph.num_nodes("n1"), feat_size), device=device ) graph.nodes["n2"].data["h"] = torch.randn( (graph.num_nodes("n2"), feat_size), device=device ) # dry run update_dict = {} for i in range(num_relations): update_dict["e_{}".format(i)] = ( lambda edges: {"x": edges.src["h"]}, lambda nodes: {"h_new": torch.sum(nodes.mailbox["x"], dim=1)}, ) graph.multi_update_all(update_dict, multi_reduce_type) # timing with utils.Timer() as t: for i in range(3): graph.multi_update_all(update_dict, multi_reduce_type) return t.elapsed_secs / 3 ================================================ FILE: benchmarks/benchmarks/api/bench_udf_update_all.py ================================================ import time import dgl import dgl.function as fn import numpy as np import torch from .. import utils @utils.benchmark("time", timeout=600) @utils.parametrize("graph_name", ["pubmed", "ogbn-arxiv"]) @utils.parametrize("format", ["coo"]) # only coo supports udf @utils.parametrize("feat_size", [8, 64, 512]) @utils.parametrize("msg_type", ["copy_u", "u_mul_e"]) @utils.parametrize("reduce_type", ["sum", "mean", "max"]) def track_time(graph_name, format, feat_size, msg_type, reduce_type): device = utils.get_bench_device() graph = utils.get_graph(graph_name, format) graph = graph.to(device) graph.ndata["h"] = torch.randn( (graph.num_nodes(), feat_size), device=device ) graph.edata["e"] = torch.randn((graph.num_edges(), 1), device=device) msg_udf_dict = { "copy_u": lambda edges: {"x": edges.src["h"]}, "u_mul_e": lambda edges: {"x": edges.src["h"] * edges.data["e"]}, } reduct_udf_dict = { "sum": lambda nodes: {"h_new": torch.sum(nodes.mailbox["x"], dim=1)}, "mean": lambda nodes: {"h_new": torch.mean(nodes.mailbox["x"], dim=1)}, "max": lambda nodes: {"h_new": torch.max(nodes.mailbox["x"], dim=1)[0]}, } # dry run graph.update_all(msg_udf_dict[msg_type], reduct_udf_dict[reduce_type]) # timing with utils.Timer() as t: for i in range(3): graph.update_all( msg_udf_dict[msg_type], reduct_udf_dict[reduce_type] ) return t.elapsed_secs / 3 ================================================ FILE: benchmarks/benchmarks/api/bench_unbatch.py ================================================ import time import dgl import torch from .. import utils @utils.benchmark("time") @utils.parametrize("batch_size", [4, 32, 256, 1024]) def track_time(batch_size): device = utils.get_bench_device() ds = dgl.data.QM7bDataset() # prepare graph graphs = ds[0:batch_size][0] bg = dgl.batch(graphs).to(device) # dry run for i in range(10): glist = dgl.unbatch(bg) # timing with utils.Timer() as t: for i in range(100): glist = dgl.unbatch(bg) return t.elapsed_secs / 100 ================================================ FILE: benchmarks/benchmarks/kernel/__init__.py ================================================ ================================================ FILE: benchmarks/benchmarks/kernel/bench_edgesoftmax.py ================================================ import time import dgl import torch from .. import utils # The benchmarks for ops edge_softmax @utils.benchmark("time", timeout=600) @utils.parametrize("graph", ["ogbn-arxiv", "reddit", "cora", "pubmed"]) @utils.parametrize("num_heads", [1, 4, 8]) def track_time(graph, num_heads): device = utils.get_bench_device() graph = utils.get_graph(graph).to(device) score = ( torch.randn((graph.num_edges(), num_heads)) .requires_grad_(True) .float() .to(device) ) # dry run for i in range(3): y = dgl.ops.edge_softmax(graph, score) # timing with utils.Timer(device) as t: for i in range(100): y = dgl.ops.edge_softmax(graph, score) return t.elapsed_secs / 100 ================================================ FILE: benchmarks/benchmarks/kernel/bench_gsddmm_u_dot_v.py ================================================ import time import dgl import torch from .. import utils def calc_gflops(graph, feat_size, num_heads, time): return round( 2 * graph.num_edges() * feat_size / 1000000000 / time, 2 ) # count both mul and add # The benchmarks include broadcasting cases. # Given feat_size = D, num_heads = H, the node feature shape will be (H, D // H) # while the edge feature shape will be (H, ), so tested operations will broadcast # along the last dimension. The total FLOP is controlled by the feat_size no # matter how many heads are there. # If num_heads = 0, it falls back to the normal element-wise operation without # broadcasting. @utils.benchmark("flops", timeout=600) @utils.parametrize("graph", ["ogbn-arxiv", "reddit", "ogbn-proteins"]) @utils.parametrize("feat_size", [4, 32, 256]) @utils.parametrize("num_heads", [0, 1, 4]) def track_flops(graph, feat_size, num_heads): device = utils.get_bench_device() graph = utils.get_graph(graph, format="coo").to(device) if num_heads == 0: x = torch.randn(graph.num_nodes(), feat_size, device=device) else: x = torch.randn( graph.num_nodes(), num_heads, feat_size // num_heads, device=device ) # dry run for i in range(3): y = dgl.ops.u_dot_v(graph, x, x) # timing with utils.Timer(device) as t: for i in range(10): y = dgl.ops.u_dot_v(graph, x, x) return calc_gflops(graph, feat_size, num_heads, t.elapsed_secs / 10) ================================================ FILE: benchmarks/benchmarks/kernel/bench_gspmm_copy_u.py ================================================ import time import dgl import torch from .. import utils def calc_gflops(graph, feat_size, time): return round(graph.num_edges() * feat_size / 1000000000 / time, 2) @utils.benchmark("flops", timeout=600) @utils.parametrize("graph", ["ogbn-arxiv", "reddit", "ogbn-proteins"]) @utils.parametrize("feat_size", [4, 32, 256]) @utils.parametrize("reducer", ["sum", "max"]) def track_flops(graph, feat_size, reducer): device = utils.get_bench_device() graph = utils.get_graph(graph, format="csc").to(device) x = torch.randn(graph.num_nodes(), feat_size, device=device) if reducer == "sum": op = dgl.ops.copy_u_sum elif reducer == "max": op = dgl.ops.copy_u_max else: raise ValueError("Invalid reducer", reducer) # dry run for i in range(3): y = op(graph, x) # timing with utils.Timer(device) as t: for i in range(10): y = op(graph, x) return calc_gflops(graph, feat_size, t.elapsed_secs / 10) ================================================ FILE: benchmarks/benchmarks/kernel/bench_gspmm_u_mul_e_sum.py ================================================ import time import dgl import torch from .. import utils def calc_gflops(graph, feat_size, num_heads, time): return round( 2 * graph.num_edges() * feat_size / 1000000000 / time, 2 ) # count both mul and add # The benchmarks include broadcasting cases. # Given feat_size = D, num_heads = H, the node feature shape will be (H, D // H) # while the edge feature shape will be (H, ), so tested operations will broadcast # along the last dimension. The total FLOP is controlled by the feat_size no # matter how many heads are there. # If num_heads = 0, it falls back to the normal element-wise operation without # broadcasting. @utils.benchmark("flops", timeout=600) @utils.parametrize("graph", ["ogbn-arxiv", "reddit", "ogbn-proteins"]) @utils.parametrize("feat_size", [4, 32, 256]) @utils.parametrize("num_heads", [0, 1, 4]) def track_flops(graph, feat_size, num_heads): device = utils.get_bench_device() graph = utils.get_graph(graph, format="csc").to(device) if num_heads == 0: x = torch.randn(graph.num_nodes(), feat_size, device=device) w = torch.randn(graph.num_edges(), feat_size, device=device) else: x = torch.randn( graph.num_nodes(), num_heads, feat_size // num_heads, device=device ) w = torch.randn(graph.num_edges(), num_heads, 1, device=device) # dry run for i in range(3): y = dgl.ops.u_mul_e_sum(graph, x, w) # timing with utils.Timer(device) as t: for i in range(10): y = dgl.ops.u_mul_e_sum(graph, x, w) return calc_gflops(graph, feat_size, num_heads, t.elapsed_secs / 10) ================================================ FILE: benchmarks/benchmarks/model_acc/__init__.py ================================================ ================================================ FILE: benchmarks/benchmarks/model_acc/bench_gat.py ================================================ import dgl import torch import torch.nn as nn import torch.nn.functional as F from dgl.nn.pytorch import GATConv from .. import utils class GAT(nn.Module): def __init__( self, num_layers, in_dim, num_hidden, num_classes, heads, activation, feat_drop, attn_drop, negative_slope, residual, ): super(GAT, self).__init__() self.num_layers = num_layers self.gat_layers = nn.ModuleList() self.activation = activation # input projection (no residual) self.gat_layers.append( GATConv( in_dim, num_hidden, heads[0], feat_drop, attn_drop, negative_slope, False, self.activation, ) ) # hidden layers for l in range(1, num_layers): # due to multi-head, the in_dim = num_hidden * num_heads self.gat_layers.append( GATConv( num_hidden * heads[l - 1], num_hidden, heads[l], feat_drop, attn_drop, negative_slope, residual, self.activation, ) ) # output projection self.gat_layers.append( GATConv( num_hidden * heads[-2], num_classes, heads[-1], feat_drop, attn_drop, negative_slope, residual, None, ) ) def forward(self, g, inputs): h = inputs for l in range(self.num_layers): h = self.gat_layers[l](g, h).flatten(1) # output projection logits = self.gat_layers[-1](g, h).mean(1) return logits def evaluate(model, g, features, labels, mask): model.eval() with torch.no_grad(): logits = model(g, features) logits = logits[mask] labels = labels[mask] _, indices = torch.max(logits, dim=1) correct = torch.sum(indices == labels) return correct.item() * 1.0 / len(labels) * 100 @utils.benchmark("acc") @utils.parametrize("data", ["cora", "pubmed"]) def track_acc(data): data = utils.process_data(data) device = utils.get_bench_device() g = data[0].to(device) features = g.ndata["feat"] labels = g.ndata["label"] train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] in_feats = features.shape[1] n_classes = data.num_classes g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) # create model model = GAT(1, in_feats, 8, n_classes, [8, 1], F.elu, 0.6, 0.6, 0.2, False) loss_fcn = torch.nn.CrossEntropyLoss() model = model.to(device) model.train() # optimizer optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4) for epoch in range(200): logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() acc = evaluate(model, g, features, labels, test_mask) return acc ================================================ FILE: benchmarks/benchmarks/model_acc/bench_gcn.py ================================================ import dgl import torch import torch.nn as nn import torch.nn.functional as F from dgl.nn.pytorch import GraphConv from .. import utils class GCN(nn.Module): def __init__( self, in_feats, n_hidden, n_classes, n_layers, activation, dropout ): super(GCN, self).__init__() self.layers = nn.ModuleList() # input layer self.layers.append(GraphConv(in_feats, n_hidden, activation=activation)) # hidden layers for i in range(n_layers - 1): self.layers.append( GraphConv(n_hidden, n_hidden, activation=activation) ) # output layer self.layers.append(GraphConv(n_hidden, n_classes)) self.dropout = nn.Dropout(p=dropout) def forward(self, g, features): h = features for i, layer in enumerate(self.layers): if i != 0: h = self.dropout(h) h = layer(g, h) return h def evaluate(model, g, features, labels, mask): model.eval() with torch.no_grad(): logits = model(g, features) logits = logits[mask] labels = labels[mask] _, indices = torch.max(logits, dim=1) correct = torch.sum(indices == labels) return correct.item() * 1.0 / len(labels) * 100 @utils.benchmark("acc") @utils.parametrize("data", ["cora", "pubmed"]) def track_acc(data): data = utils.process_data(data) device = utils.get_bench_device() g = data[0].to(device).int() features = g.ndata["feat"] labels = g.ndata["label"] train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] in_feats = features.shape[1] n_classes = data.num_classes g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 g.ndata["norm"] = norm.unsqueeze(1) # create GCN model model = GCN(in_feats, 16, n_classes, 1, F.relu, 0.5) loss_fcn = torch.nn.CrossEntropyLoss() model = model.to(device) model.train() # optimizer optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4) for epoch in range(200): logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() acc = evaluate(model, g, features, labels, test_mask) return acc ================================================ FILE: benchmarks/benchmarks/model_acc/bench_gcn_udf.py ================================================ import dgl import torch import torch.nn as nn import torch.nn.functional as F from .. import utils class GraphConv(nn.Module): def __init__(self, in_dim, out_dim, activation=None): super(GraphConv, self).__init__() self.in_dim = in_dim self.out_dim = out_dim self.activation = activation self.weight = nn.Parameter(torch.Tensor(in_dim, out_dim)) self.bias = nn.Parameter(torch.Tensor(out_dim)) nn.init.xavier_normal_(self.weight) nn.init.zeros_(self.bias) def forward(self, graph, feat): with graph.local_scope(): graph.ndata["ci"] = torch.pow( graph.out_degrees().float().clamp(min=1), -0.5 ) graph.ndata["cj"] = torch.pow( graph.in_degrees().float().clamp(min=1), -0.5 ) graph.ndata["h"] = feat graph.update_all(self.mfunc, self.rfunc) h = graph.ndata["h"] h = torch.matmul(h, self.weight) + self.bias if self.activation is not None: h = self.activation(h) return h def mfunc(self, edges): return {"m": edges.src["h"], "ci": edges.src["ci"]} def rfunc(self, nodes): ci = nodes.mailbox["ci"].unsqueeze(2) newh = (nodes.mailbox["m"] * ci).sum(1) * nodes.data["cj"].unsqueeze(1) return {"h": newh} class GCN(nn.Module): def __init__( self, in_feats, n_hidden, n_classes, n_layers, activation, dropout ): super(GCN, self).__init__() self.layers = nn.ModuleList() # input layer self.layers.append(GraphConv(in_feats, n_hidden, activation=activation)) # hidden layers for i in range(n_layers - 1): self.layers.append( GraphConv(n_hidden, n_hidden, activation=activation) ) # output layer self.layers.append(GraphConv(n_hidden, n_classes)) self.dropout = nn.Dropout(p=dropout) def forward(self, g, features): h = features for i, layer in enumerate(self.layers): if i != 0: h = self.dropout(h) h = layer(g, h) return h def evaluate(model, g, features, labels, mask): model.eval() with torch.no_grad(): logits = model(g, features) logits = logits[mask] labels = labels[mask] _, indices = torch.max(logits, dim=1) correct = torch.sum(indices == labels) return correct.item() * 1.0 / len(labels) * 100 @utils.benchmark("acc", timeout=300) @utils.parametrize("data", ["cora", "pubmed"]) def track_acc(data): data = utils.process_data(data) device = utils.get_bench_device() g = data[0].to(device).int() features = g.ndata["feat"] labels = g.ndata["label"] train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] in_feats = features.shape[1] n_classes = data.num_classes g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 g.ndata["norm"] = norm.unsqueeze(1) # create GCN model model = GCN(in_feats, 16, n_classes, 1, F.relu, 0.5) loss_fcn = torch.nn.CrossEntropyLoss() model = model.to(device) model.train() # optimizer optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4) for epoch in range(200): logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() acc = evaluate(model, g, features, labels, test_mask) return acc ================================================ FILE: benchmarks/benchmarks/model_acc/bench_rgcn_base.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from torchmetrics.functional import accuracy from .. import rgcn, utils @utils.benchmark("acc", timeout=1200) @utils.parametrize("dataset", ["aifb", "mutag"]) @utils.parametrize("ns_mode", [False]) def track_acc(dataset, ns_mode): ( g, num_rels, num_classes, labels, train_idx, test_idx, target_idx, ) = rgcn.load_data(dataset, get_norm=True) num_hidden = 16 if dataset == "aifb": num_bases = -1 l2norm = 0.0 elif dataset == "mutag": num_bases = 30 l2norm = 5e-4 elif dataset == "am": num_bases = 40 l2norm = 5e-4 else: raise ValueError() model = rgcn.RGCN( g.num_nodes(), num_hidden, num_classes, num_rels, num_bases=num_bases, ns_mode=ns_mode, ) device = utils.get_bench_device() labels = labels.to(device) model = model.to(device) g = g.int().to(device) optimizer = torch.optim.Adam( model.parameters(), lr=1e-2, weight_decay=l2norm ) model.train() for epoch in range(30): logits = model(g) logits = logits[target_idx] loss = F.cross_entropy(logits[train_idx], labels[train_idx]) optimizer.zero_grad() loss.backward() optimizer.step() model.eval() with torch.no_grad(): logits = model(g) logits = logits[target_idx] test_acc = accuracy( logits[test_idx].argmax(dim=1), labels[test_idx], task="multiclass", num_classes=num_classes, ).item() return test_acc ================================================ FILE: benchmarks/benchmarks/model_acc/bench_rgcn_ns.py ================================================ import itertools import time import dgl import dgl.nn.pytorch as dglnn import torch as th import torch.multiprocessing as mp import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from dgl.nn import RelGraphConv from torch.utils.data import DataLoader from .. import utils class EntityClassify(nn.Module): """Entity classification class for RGCN Parameters ---------- device : int Device to run the layer. num_nodes : int Number of nodes. h_dim : int Hidden dim size. out_dim : int Output dim size. num_rels : int Numer of relation types. num_bases : int Number of bases. If is none, use number of relations. num_hidden_layers : int Number of hidden RelGraphConv Layer dropout : float Dropout use_self_loop : bool Use self loop if True, default False. """ def __init__( self, device, num_nodes, h_dim, out_dim, num_rels, num_bases=None, num_hidden_layers=1, dropout=0, use_self_loop=False, layer_norm=False, ): super(EntityClassify, self).__init__() self.device = device self.num_nodes = num_nodes self.h_dim = h_dim self.out_dim = out_dim self.num_rels = num_rels self.num_bases = None if num_bases < 0 else num_bases self.num_hidden_layers = num_hidden_layers self.dropout = dropout self.use_self_loop = use_self_loop self.layer_norm = layer_norm self.layers = nn.ModuleList() # i2h self.layers.append( RelGraphConv( self.h_dim, self.h_dim, self.num_rels, "basis", self.num_bases, activation=F.relu, self_loop=self.use_self_loop, dropout=self.dropout, layer_norm=layer_norm, ) ) # h2h for idx in range(self.num_hidden_layers): self.layers.append( RelGraphConv( self.h_dim, self.h_dim, self.num_rels, "basis", self.num_bases, activation=F.relu, self_loop=self.use_self_loop, dropout=self.dropout, layer_norm=layer_norm, ) ) # h2o self.layers.append( RelGraphConv( self.h_dim, self.out_dim, self.num_rels, "basis", self.num_bases, activation=None, self_loop=self.use_self_loop, layer_norm=layer_norm, ) ) def forward(self, blocks, feats, norm=None): if blocks is None: # full graph training blocks = [self.g] * len(self.layers) h = feats for layer, block in zip(self.layers, blocks): block = block.to(self.device) h = layer(block, h, block.edata["etype"], block.edata["norm"]) return h class RelGraphEmbedLayer(nn.Module): r"""Embedding layer for featureless heterograph. Parameters ---------- device : int Device to run the layer. num_nodes : int Number of nodes. node_tides : tensor Storing the node type id for each node starting from 0 num_of_ntype : int Number of node types input_size : list of int A list of input feature size for each node type. If None, we then treat certain input feature as an one-hot encoding feature. embed_size : int Output embed size embed_name : str, optional Embed name """ def __init__( self, device, num_nodes, node_tids, num_of_ntype, input_size, embed_size, sparse_emb=False, embed_name="embed", ): super(RelGraphEmbedLayer, self).__init__() self.device = device self.embed_size = embed_size self.embed_name = embed_name self.num_nodes = num_nodes self.sparse_emb = sparse_emb # create weight embeddings for each node for each relation self.embeds = nn.ParameterDict() self.num_of_ntype = num_of_ntype self.idmap = th.empty(num_nodes).long() for ntype in range(num_of_ntype): if input_size[ntype] is not None: input_emb_size = input_size[ntype].shape[1] embed = nn.Parameter(th.Tensor(input_emb_size, self.embed_size)) nn.init.xavier_uniform_(embed) self.embeds[str(ntype)] = embed self.node_embeds = th.nn.Embedding( node_tids.shape[0], self.embed_size, sparse=self.sparse_emb ) nn.init.uniform_(self.node_embeds.weight, -1.0, 1.0) def forward(self, node_ids, node_tids, type_ids, features): """Forward computation Parameters ---------- node_ids : tensor node ids to generate embedding for. node_tids : tensor node type ids features : list of features list of initial features for nodes belong to different node type. If None, the corresponding features is an one-hot encoding feature, else use the features directly as input feature and matmul a projection matrix. Returns ------- tensor embeddings as the input of the next layer """ tsd_ids = node_ids.to(self.node_embeds.weight.device) embeds = th.empty( node_ids.shape[0], self.embed_size, device=self.device ) for ntype in range(self.num_of_ntype): if features[ntype] is not None: loc = node_tids == ntype embeds[loc] = features[ntype][type_ids[loc]].to( self.device ) @ self.embeds[str(ntype)].to(self.device) else: loc = node_tids == ntype embeds[loc] = self.node_embeds(tsd_ids[loc]).to(self.device) return embeds def evaluate(model, embed_layer, eval_loader, node_feats): model.eval() embed_layer.eval() eval_logits = [] eval_seeds = [] with th.no_grad(): for sample_data in eval_loader: th.cuda.empty_cache() _, _, blocks = sample_data feats = embed_layer( blocks[0].srcdata[dgl.NID], blocks[0].srcdata[dgl.NTYPE], blocks[0].srcdata["type_id"], node_feats, ) logits = model(blocks, feats) eval_logits.append(logits.cpu().detach()) eval_seeds.append(blocks[-1].dstdata["type_id"].cpu().detach()) eval_logits = th.cat(eval_logits) eval_seeds = th.cat(eval_seeds) return eval_logits, eval_seeds @utils.benchmark("acc", timeout=3600) # ogbn-mag takes ~1 hour to train @utils.parametrize("data", ["am", "ogbn-mag"]) def track_acc(data): dataset = utils.process_data(data) device = utils.get_bench_device() if data == "am": n_bases = 40 l2norm = 5e-4 n_epochs = 20 elif data == "ogbn-mag": n_bases = 2 l2norm = 0 n_epochs = 20 else: raise ValueError() fanouts = [25, 15] n_layers = 2 batch_size = 1024 n_hidden = 64 dropout = 0.5 use_self_loop = True lr = 0.01 num_workers = 4 hg = dataset[0] category = dataset.predict_category num_classes = dataset.num_classes train_mask = hg.nodes[category].data.pop("train_mask") train_idx = th.nonzero(train_mask, as_tuple=False).squeeze() test_mask = hg.nodes[category].data.pop("test_mask") test_idx = th.nonzero(test_mask, as_tuple=False).squeeze() labels = hg.nodes[category].data.pop("labels").to(device) num_of_ntype = len(hg.ntypes) num_rels = len(hg.canonical_etypes) node_feats = [] for ntype in hg.ntypes: if len(hg.nodes[ntype].data) == 0 or "feat" not in hg.nodes[ntype].data: node_feats.append(None) else: feat = hg.nodes[ntype].data.pop("feat") node_feats.append(feat.share_memory_()) # get target category id category_id = len(hg.ntypes) for i, ntype in enumerate(hg.ntypes): if ntype == category: category_id = i g = dgl.to_homogeneous(hg) u, v, eid = g.all_edges(form="all") # global norm _, inverse_index, count = th.unique( v, return_inverse=True, return_counts=True ) degrees = count[inverse_index] norm = th.ones(eid.shape[0]) / degrees norm = norm.unsqueeze(1) g.edata["norm"] = norm g.edata["etype"] = g.edata[dgl.ETYPE] g.ndata["type_id"] = g.ndata[dgl.NID] g.ndata["ntype"] = g.ndata[dgl.NTYPE] node_ids = th.arange(g.num_nodes()) # find out the target node ids node_tids = g.ndata[dgl.NTYPE] loc = node_tids == category_id target_nids = node_ids[loc] g = g.formats("csc") sampler = dgl.dataloading.MultiLayerNeighborSampler(fanouts) train_loader = dgl.dataloading.DataLoader( g, target_nids[train_idx], sampler, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=num_workers, ) test_loader = dgl.dataloading.DataLoader( g, target_nids[test_idx], sampler, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=num_workers, ) # node features # None for one-hot feature, if not none, it should be the feature tensor. embed_layer = RelGraphEmbedLayer( device, g.num_nodes(), node_tids, num_of_ntype, node_feats, n_hidden, sparse_emb=True, ) # create model # all model params are in device. model = EntityClassify( device, g.num_nodes(), n_hidden, num_classes, num_rels, num_bases=n_bases, num_hidden_layers=n_layers - 2, dropout=dropout, use_self_loop=use_self_loop, layer_norm=False, ) embed_layer = embed_layer.to(device) model = model.to(device) all_params = itertools.chain( model.parameters(), embed_layer.embeds.parameters() ) optimizer = th.optim.Adam(all_params, lr=lr, weight_decay=l2norm) emb_optimizer = th.optim.SparseAdam( list(embed_layer.node_embeds.parameters()), lr=lr ) print("start training...") for epoch in range(n_epochs): model.train() embed_layer.train() for i, sample_data in enumerate(train_loader): input_nodes, output_nodes, blocks = sample_data feats = embed_layer( input_nodes, blocks[0].srcdata["ntype"], blocks[0].srcdata["type_id"], node_feats, ) logits = model(blocks, feats) seed_idx = blocks[-1].dstdata["type_id"] loss = F.cross_entropy(logits, labels[seed_idx]) optimizer.zero_grad() emb_optimizer.zero_grad() loss.backward() optimizer.step() emb_optimizer.step() print("start testing...") test_logits, test_seeds = evaluate( model, embed_layer, test_loader, node_feats ) test_loss = F.cross_entropy(test_logits, labels[test_seeds].cpu()).item() test_acc = th.sum( test_logits.argmax(dim=1) == labels[test_seeds].cpu() ).item() / len(test_seeds) return test_acc ================================================ FILE: benchmarks/benchmarks/model_acc/bench_sage.py ================================================ import dgl import torch import torch.nn as nn import torch.nn.functional as F from dgl.nn.pytorch import SAGEConv from .. import utils class GraphSAGE(nn.Module): def __init__( self, in_feats, n_hidden, n_classes, n_layers, activation, dropout, aggregator_type, ): super(GraphSAGE, self).__init__() self.layers = nn.ModuleList() self.dropout = nn.Dropout(dropout) self.activation = activation # input layer self.layers.append(SAGEConv(in_feats, n_hidden, aggregator_type)) # hidden layers for i in range(n_layers - 1): self.layers.append(SAGEConv(n_hidden, n_hidden, aggregator_type)) # output layer self.layers.append( SAGEConv(n_hidden, n_classes, aggregator_type) ) # activation None def forward(self, graph, inputs): h = self.dropout(inputs) for l, layer in enumerate(self.layers): h = layer(graph, h) if l != len(self.layers) - 1: h = self.activation(h) h = self.dropout(h) return h def evaluate(model, g, features, labels, mask): model.eval() with torch.no_grad(): logits = model(g, features) logits = logits[mask] labels = labels[mask] _, indices = torch.max(logits, dim=1) correct = torch.sum(indices == labels) return correct.item() * 1.0 / len(labels) * 100 @utils.benchmark("acc") @utils.parametrize("data", ["cora", "pubmed"]) def track_acc(data): data = utils.process_data(data) device = utils.get_bench_device() g = data[0].to(device) features = g.ndata["feat"] labels = g.ndata["label"] train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] in_feats = features.shape[1] n_classes = data.num_classes g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) # create model model = GraphSAGE(in_feats, 16, n_classes, 1, F.relu, 0.5, "gcn") loss_fcn = torch.nn.CrossEntropyLoss() model = model.to(device) model.train() # optimizer optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4) for epoch in range(200): logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() acc = evaluate(model, g, features, labels, test_mask) return acc ================================================ FILE: benchmarks/benchmarks/model_acc/bench_sage_ns.py ================================================ import time import dgl import dgl.nn.pytorch as dglnn import torch as th import torch.multiprocessing as mp import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torch.utils.data import DataLoader from .. import utils class SAGE(nn.Module): def __init__( self, in_feats, n_hidden, n_classes, n_layers, activation, dropout ): super().__init__() self.n_layers = n_layers self.n_hidden = n_hidden self.n_classes = n_classes self.layers = nn.ModuleList() self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, "mean")) for i in range(1, n_layers - 1): self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, "mean")) self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, "mean")) self.dropout = nn.Dropout(dropout) self.activation = activation def forward(self, blocks, x): h = x for l, (layer, block) in enumerate(zip(self.layers, blocks)): h = layer(block, h) if l != len(self.layers) - 1: h = self.activation(h) h = self.dropout(h) return h def inference(self, g, x, batch_size, device): """ Inference with the GraphSAGE model on full neighbors (i.e. without neighbor sampling). g : the entire graph. x : the input of entire node set. The inference code is written in a fashion that it could handle any number of nodes and layers. """ # During inference with sampling, multi-layer blocks are very inefficient because # lots of computations in the first few layers are repeated. # Therefore, we compute the representation of all nodes layer by layer. The nodes # on each layer are of course splitted in batches. # TODO: can we standardize this? for l, layer in enumerate(self.layers): y = th.zeros( g.num_nodes(), self.n_hidden if l != len(self.layers) - 1 else self.n_classes, ) sampler = dgl.dataloading.MultiLayerFullNeighborSampler(1) dataloader = dgl.dataloading.DataLoader( g, th.arange(g.num_nodes()), sampler, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=4, ) for input_nodes, output_nodes, blocks in dataloader: block = blocks[0] block = block.int().to(device) h = x[input_nodes].to(device) h = layer(block, h) if l != len(self.layers) - 1: h = self.activation(h) h = self.dropout(h) y[output_nodes] = h.cpu() x = y return y def compute_acc(pred, labels): """ Compute the accuracy of prediction given the labels. """ labels = labels.long() return (th.argmax(pred, dim=1) == labels).float().sum() / len(pred) def evaluate(model, g, inputs, labels, val_nid, batch_size, device): """ Evaluate the model on the validation set specified by ``val_nid``. g : The entire graph. inputs : The features of all the nodes. labels : The labels of all the nodes. val_nid : the node Ids for validation. batch_size : Number of nodes to compute at the same time. device : The GPU device to evaluate on. """ model.eval() with th.no_grad(): pred = model.inference(g, inputs, batch_size, device) model.train() return compute_acc(pred[val_nid], labels[val_nid]) def load_subtensor(g, seeds, input_nodes, device): """ Copys features and labels of a set of nodes onto GPU. """ batch_inputs = g.ndata["features"][input_nodes].to(device) batch_labels = g.ndata["labels"][seeds].to(device) return batch_inputs, batch_labels @utils.benchmark("acc", 600) @utils.parametrize("data", ["ogbn-products", "reddit"]) def track_acc(data): data = utils.process_data(data) device = utils.get_bench_device() g = data[0] g.ndata["features"] = g.ndata["feat"] g.ndata["labels"] = g.ndata["label"] in_feats = g.ndata["features"].shape[1] n_classes = data.num_classes # Create csr/coo/csc formats before launching training processes with multi-gpu. # This avoids creating certain formats in each sub-process, which saves momory and CPU. g.create_formats_() num_epochs = 20 num_hidden = 16 num_layers = 2 fan_out = "5,10" batch_size = 1024 lr = 0.003 dropout = 0.5 num_workers = 4 train_nid = th.nonzero(g.ndata["train_mask"], as_tuple=True)[0] # Create PyTorch DataLoader for constructing blocks sampler = dgl.dataloading.MultiLayerNeighborSampler( [int(fanout) for fanout in fan_out.split(",")] ) dataloader = dgl.dataloading.DataLoader( g, train_nid, sampler, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=num_workers, ) # Define model and optimizer model = SAGE(in_feats, num_hidden, n_classes, num_layers, F.relu, dropout) model = model.to(device) loss_fcn = nn.CrossEntropyLoss() loss_fcn = loss_fcn.to(device) optimizer = optim.Adam(model.parameters(), lr=lr) # dry run one epoch for step, (input_nodes, seeds, blocks) in enumerate(dataloader): # Load the input features as well as output labels # batch_inputs, batch_labels = load_subtensor(g, seeds, input_nodes, device) blocks = [block.int().to(device) for block in blocks] batch_inputs = blocks[0].srcdata["features"] batch_labels = blocks[-1].dstdata["labels"] # Compute loss and prediction batch_pred = model(blocks, batch_inputs) loss = loss_fcn(batch_pred, batch_labels) optimizer.zero_grad() loss.backward() optimizer.step() # Training loop for epoch in range(num_epochs): # Loop over the dataloader to sample the computation dependency graph as a list of # blocks. for step, (input_nodes, seeds, blocks) in enumerate(dataloader): # Load the input features as well as output labels # batch_inputs, batch_labels = load_subtensor(g, seeds, input_nodes, device) blocks = [block.int().to(device) for block in blocks] batch_inputs = blocks[0].srcdata["features"] batch_labels = blocks[-1].dstdata["labels"] # Compute loss and prediction batch_pred = model(blocks, batch_inputs) loss = loss_fcn(batch_pred, batch_labels) optimizer.zero_grad() loss.backward() optimizer.step() test_g = g test_nid = th.nonzero( ~(test_g.ndata["train_mask"] | test_g.ndata["val_mask"]), as_tuple=True )[0] test_acc = evaluate( model, test_g, test_g.ndata["features"], test_g.ndata["labels"], test_nid, batch_size, device, ) return test_acc.item() ================================================ FILE: benchmarks/benchmarks/model_speed/__init__.py ================================================ ================================================ FILE: benchmarks/benchmarks/model_speed/bench_gat.py ================================================ import time import dgl import torch import torch.nn as nn import torch.nn.functional as F from dgl.nn.pytorch import GATConv from .. import utils class GAT(nn.Module): def __init__( self, num_layers, in_dim, num_hidden, num_classes, heads, activation, feat_drop, attn_drop, negative_slope, residual, ): super(GAT, self).__init__() self.num_layers = num_layers self.gat_layers = nn.ModuleList() self.activation = activation # input projection (no residual) self.gat_layers.append( GATConv( in_dim, num_hidden, heads[0], feat_drop, attn_drop, negative_slope, False, self.activation, ) ) # hidden layers for l in range(1, num_layers): # due to multi-head, the in_dim = num_hidden * num_heads self.gat_layers.append( GATConv( num_hidden * heads[l - 1], num_hidden, heads[l], feat_drop, attn_drop, negative_slope, residual, self.activation, ) ) # output projection self.gat_layers.append( GATConv( num_hidden * heads[-2], num_classes, heads[-1], feat_drop, attn_drop, negative_slope, residual, None, ) ) def forward(self, g, inputs): h = inputs for l in range(self.num_layers): h = self.gat_layers[l](g, h).flatten(1) # output projection logits = self.gat_layers[-1](g, h).mean(1) return logits @utils.benchmark("time") @utils.parametrize("data", ["cora", "pubmed"]) def track_time(data): data = utils.process_data(data) device = utils.get_bench_device() num_epochs = 200 g = data[0].to(device) features = g.ndata["feat"] labels = g.ndata["label"] train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] in_feats = features.shape[1] n_classes = data.num_classes g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) # create model model = GAT(1, in_feats, 8, n_classes, [8, 1], F.elu, 0.6, 0.6, 0.2, False) loss_fcn = torch.nn.CrossEntropyLoss() model = model.to(device) model.train() # optimizer optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4) # dry run for epoch in range(10): logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() # timing t0 = time.time() for epoch in range(num_epochs): logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() t1 = time.time() return (t1 - t0) / num_epochs ================================================ FILE: benchmarks/benchmarks/model_speed/bench_gat_ns.py ================================================ import time import traceback import dgl import dgl.nn.pytorch as dglnn import torch as th import torch.multiprocessing as mp import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torch.utils.data import DataLoader from .. import utils class GAT(nn.Module): def __init__( self, in_feats, num_heads, n_hidden, n_classes, n_layers, activation, dropout=0.0, ): super().__init__() self.n_layers = n_layers self.n_hidden = n_hidden self.n_classes = n_classes self.layers = nn.ModuleList() self.num_heads = num_heads self.layers.append( dglnn.GATConv( in_feats, n_hidden, num_heads=num_heads, feat_drop=dropout, attn_drop=dropout, activation=activation, negative_slope=0.2, ) ) for i in range(1, n_layers - 1): self.layers.append( dglnn.GATConv( n_hidden * num_heads, n_hidden, num_heads=num_heads, feat_drop=dropout, attn_drop=dropout, activation=activation, negative_slope=0.2, ) ) self.layers.append( dglnn.GATConv( n_hidden * num_heads, n_classes, num_heads=num_heads, feat_drop=dropout, attn_drop=dropout, activation=None, negative_slope=0.2, ) ) def forward(self, blocks, x): h = x for l, (layer, block) in enumerate(zip(self.layers, blocks)): h = layer(block, h) if l < len(self.layers) - 1: h = h.flatten(1) h = h.mean(1) return h.log_softmax(dim=-1) def load_subtensor(g, seeds, input_nodes, device): """ Copys features and labels of a set of nodes onto GPU. """ batch_inputs = g.ndata["features"][input_nodes].to(device) batch_labels = g.ndata["labels"][seeds].to(device) return batch_inputs, batch_labels @utils.benchmark("time", 600) @utils.parametrize("data", ["reddit", "ogbn-products"]) def track_time(data): data = utils.process_data(data) device = utils.get_bench_device() g = data[0] g.ndata["features"] = g.ndata["feat"] g.ndata["labels"] = g.ndata["label"] g = g.remove_self_loop().add_self_loop() in_feats = g.ndata["features"].shape[1] n_classes = data.num_classes # Create csr/coo/csc formats before launching training processes with multi-gpu. # This avoids creating certain formats in each sub-process, which saves momory and CPU. g.create_formats_() num_hidden = 16 num_heads = 8 num_layers = 2 fan_out = "10,25" batch_size = 1024 lr = 0.003 dropout = 0.5 num_workers = 4 iter_start = 3 iter_count = 10 train_nid = th.nonzero(g.ndata["train_mask"], as_tuple=True)[0] # Create PyTorch DataLoader for constructing blocks sampler = dgl.dataloading.MultiLayerNeighborSampler( [int(fanout) for fanout in fan_out.split(",")] ) dataloader = dgl.dataloading.DataLoader( g, train_nid, sampler, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=num_workers, ) # Define model and optimizer model = GAT( in_feats, num_heads, num_hidden, n_classes, num_layers, F.relu, dropout ) model = model.to(device) loss_fcn = nn.CrossEntropyLoss() loss_fcn = loss_fcn.to(device) optimizer = optim.Adam(model.parameters(), lr=lr) # Enable dataloader cpu affinitization for cpu devices (no effect on gpu) with dataloader.enable_cpu_affinity(): # Loop over the dataloader to sample the computation dependency graph as a list of # blocks. # Training loop avg = 0 iter_tput = [] for step, (input_nodes, seeds, blocks) in enumerate(dataloader): # Load the input features as well as output labels blocks = [block.int().to(device) for block in blocks] batch_inputs = blocks[0].srcdata["features"] batch_labels = blocks[-1].dstdata["labels"] # Compute loss and prediction batch_pred = model(blocks, batch_inputs) loss = loss_fcn(batch_pred, batch_labels) optimizer.zero_grad() loss.backward() optimizer.step() # start timer at before iter_start if step == iter_start - 1: t0 = time.time() elif ( step == iter_count + iter_start - 1 ): # time iter_count iterations break t1 = time.time() return (t1 - t0) / iter_count ================================================ FILE: benchmarks/benchmarks/model_speed/bench_gcn_udf.py ================================================ import time import dgl import torch import torch.nn as nn import torch.nn.functional as F from .. import utils class GraphConv(nn.Module): def __init__(self, in_dim, out_dim, activation=None): super(GraphConv, self).__init__() self.in_dim = in_dim self.out_dim = out_dim self.activation = activation self.weight = nn.Parameter(torch.Tensor(in_dim, out_dim)) self.bias = nn.Parameter(torch.Tensor(out_dim)) nn.init.xavier_normal_(self.weight) nn.init.zeros_(self.bias) def forward(self, graph, feat): with graph.local_scope(): graph.ndata["ci"] = torch.pow( graph.out_degrees().float().clamp(min=1), -0.5 ) graph.ndata["cj"] = torch.pow( graph.in_degrees().float().clamp(min=1), -0.5 ) graph.ndata["h"] = feat graph.update_all(self.mfunc, self.rfunc) h = graph.ndata["h"] h = torch.matmul(h, self.weight) + self.bias if self.activation is not None: h = self.activation(h) return h def mfunc(self, edges): return {"m": edges.src["h"], "ci": edges.src["ci"]} def rfunc(self, nodes): ci = nodes.mailbox["ci"].unsqueeze(2) newh = (nodes.mailbox["m"] * ci).sum(1) * nodes.data["cj"].unsqueeze(1) return {"h": newh} class GCN(nn.Module): def __init__( self, in_feats, n_hidden, n_classes, n_layers, activation, dropout ): super(GCN, self).__init__() self.layers = nn.ModuleList() # input layer self.layers.append(GraphConv(in_feats, n_hidden, activation=activation)) # hidden layers for i in range(n_layers - 1): self.layers.append( GraphConv(n_hidden, n_hidden, activation=activation) ) # output layer self.layers.append(GraphConv(n_hidden, n_classes)) self.dropout = nn.Dropout(p=dropout) def forward(self, g, features): h = features for i, layer in enumerate(self.layers): if i != 0: h = self.dropout(h) h = layer(g, h) return h @utils.benchmark("time", timeout=300) @utils.parametrize("data", ["cora", "pubmed"]) def track_time(data): data = utils.process_data(data) device = utils.get_bench_device() g = data[0].to(device).int() features = g.ndata["feat"] labels = g.ndata["label"] train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] in_feats = features.shape[1] n_classes = data.num_classes g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 g.ndata["norm"] = norm.unsqueeze(1) # create GCN model model = GCN(in_feats, 16, n_classes, 1, F.relu, 0.5) loss_fcn = torch.nn.CrossEntropyLoss() model = model.to(device) model.train() # optimizer optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4) # dry run for epoch in range(5): logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() with utils.Timer(device) as t: for epoch in range(200): logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() return t.elapsed_secs / 200 ================================================ FILE: benchmarks/benchmarks/model_speed/bench_pinsage.py ================================================ import argparse import pickle import time import dgl import dgl.function as fn import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from torch.utils.data import DataLoader, IterableDataset from .. import utils def _init_input_modules(g, ntype, textset, hidden_dims): # We initialize the linear projections of each input feature ``x`` as # follows: # * If ``x`` is a scalar integral feature, we assume that ``x`` is a categorical # feature, and assume the range of ``x`` is 0..max(x). # * If ``x`` is a float one-dimensional feature, we assume that ``x`` is a # numeric vector. # * If ``x`` is a field of a textset, we process it as bag of words. module_dict = nn.ModuleDict() for column, data in g.nodes[ntype].data.items(): if column == dgl.NID: continue if data.dtype == torch.float32: assert data.ndim == 2 m = nn.Linear(data.shape[1], hidden_dims) nn.init.xavier_uniform_(m.weight) nn.init.constant_(m.bias, 0) module_dict[column] = m elif data.dtype == torch.int64: assert data.ndim == 1 m = nn.Embedding(data.max() + 2, hidden_dims, padding_idx=-1) nn.init.xavier_uniform_(m.weight) module_dict[column] = m if textset is not None: for column, field in textset.fields.items(): if field.vocab.vectors: module_dict[column] = BagOfWordsPretrained(field, hidden_dims) else: module_dict[column] = BagOfWords(field, hidden_dims) return module_dict class BagOfWordsPretrained(nn.Module): def __init__(self, field, hidden_dims): super().__init__() input_dims = field.vocab.vectors.shape[1] self.emb = nn.Embedding( len(field.vocab.itos), input_dims, padding_idx=field.vocab.stoi[field.pad_token], ) self.emb.weight[:] = field.vocab.vectors self.proj = nn.Linear(input_dims, hidden_dims) nn.init.xavier_uniform_(self.proj.weight) nn.init.constant_(self.proj.bias, 0) disable_grad(self.emb) def forward(self, x, length): """ x: (batch_size, max_length) LongTensor length: (batch_size,) LongTensor """ x = self.emb(x).sum(1) / length.unsqueeze(1).float() return self.proj(x) class BagOfWords(nn.Module): def __init__(self, field, hidden_dims): super().__init__() self.emb = nn.Embedding( len(field.vocab.itos), hidden_dims, padding_idx=field.vocab.stoi[field.pad_token], ) nn.init.xavier_uniform_(self.emb.weight) def forward(self, x, length): return self.emb(x).sum(1) / length.unsqueeze(1).float() class WeightedSAGEConv(nn.Module): def __init__(self, input_dims, hidden_dims, output_dims, act=F.relu): super().__init__() self.act = act self.Q = nn.Linear(input_dims, hidden_dims) self.W = nn.Linear(input_dims + hidden_dims, output_dims) self.reset_parameters() self.dropout = nn.Dropout(0.5) def reset_parameters(self): gain = nn.init.calculate_gain("relu") nn.init.xavier_uniform_(self.Q.weight, gain=gain) nn.init.xavier_uniform_(self.W.weight, gain=gain) nn.init.constant_(self.Q.bias, 0) nn.init.constant_(self.W.bias, 0) def forward(self, g, h, weights): """ g : graph h : node features weights : scalar edge weights """ h_src, h_dst = h with g.local_scope(): g.srcdata["n"] = self.act(self.Q(self.dropout(h_src))) g.edata["w"] = weights.float() g.update_all(fn.u_mul_e("n", "w", "m"), fn.sum("m", "n")) g.update_all(fn.copy_e("w", "m"), fn.sum("m", "ws")) n = g.dstdata["n"] ws = g.dstdata["ws"].unsqueeze(1).clamp(min=1) z = self.act(self.W(self.dropout(torch.cat([n / ws, h_dst], 1)))) z_norm = z.norm(2, 1, keepdim=True) z_norm = torch.where( z_norm == 0, torch.tensor(1.0).to(z_norm), z_norm ) z = z / z_norm return z class SAGENet(nn.Module): def __init__(self, hidden_dims, n_layers): """ g : DGLGraph The user-item interaction graph. This is only for finding the range of categorical variables. item_textsets : torchtext.data.Dataset The textual features of each item node. """ super().__init__() self.convs = nn.ModuleList() for _ in range(n_layers): self.convs.append( WeightedSAGEConv(hidden_dims, hidden_dims, hidden_dims) ) def forward(self, blocks, h): for layer, block in zip(self.convs, blocks): h_dst = h[: block.num_nodes("DST/" + block.ntypes[0])] h = layer(block, (h, h_dst), block.edata["weights"]) return h class LinearProjector(nn.Module): """ Projects each input feature of the graph linearly and sums them up """ def __init__(self, full_graph, ntype, textset, hidden_dims): super().__init__() self.ntype = ntype self.inputs = _init_input_modules( full_graph, ntype, textset, hidden_dims ) def forward(self, ndata): projections = [] for feature, data in ndata.items(): if feature == dgl.NID or feature.endswith("__len"): # This is an additional feature indicating the length of the ``feature`` # column; we shouldn't process this. continue module = self.inputs[feature] if isinstance(module, (BagOfWords, BagOfWordsPretrained)): # Textual feature; find the length and pass it to the textual module. length = ndata[feature + "__len"] result = module(data, length) else: result = module(data) projections.append(result) return torch.stack(projections, 1).sum(1) class ItemToItemScorer(nn.Module): def __init__(self, full_graph, ntype): super().__init__() n_nodes = full_graph.num_nodes(ntype) self.bias = nn.Parameter(torch.zeros(n_nodes)) def _add_bias(self, edges): bias_src = self.bias[edges.src[dgl.NID]] bias_dst = self.bias[edges.dst[dgl.NID]] return {"s": edges.data["s"] + bias_src + bias_dst} def forward(self, item_item_graph, h): """ item_item_graph : graph consists of edges connecting the pairs h : hidden state of every node """ with item_item_graph.local_scope(): item_item_graph.ndata["h"] = h item_item_graph.apply_edges(fn.u_dot_v("h", "h", "s")) item_item_graph.apply_edges(self._add_bias) pair_score = item_item_graph.edata["s"] return pair_score class PinSAGEModel(nn.Module): def __init__(self, full_graph, ntype, textsets, hidden_dims, n_layers): super().__init__() self.proj = LinearProjector(full_graph, ntype, textsets, hidden_dims) self.sage = SAGENet(hidden_dims, n_layers) self.scorer = ItemToItemScorer(full_graph, ntype) def forward(self, pos_graph, neg_graph, blocks): h_item = self.get_repr(blocks) pos_score = self.scorer(pos_graph, h_item) neg_score = self.scorer(neg_graph, h_item) return (neg_score - pos_score + 1).clamp(min=0) def get_repr(self, blocks): h_item = self.proj(blocks[0].srcdata) h_item_dst = self.proj(blocks[-1].dstdata) return h_item_dst + self.sage(blocks, h_item) def compact_and_copy(frontier, seeds): block = dgl.to_block(frontier, seeds) for col, data in frontier.edata.items(): if col == dgl.EID: continue block.edata[col] = data[block.edata[dgl.EID]] return block class ItemToItemBatchSampler(IterableDataset): def __init__(self, g, user_type, item_type, batch_size): self.g = g self.user_type = user_type self.item_type = item_type self.user_to_item_etype = list(g.metagraph()[user_type][item_type])[0] self.item_to_user_etype = list(g.metagraph()[item_type][user_type])[0] self.batch_size = batch_size def __iter__(self): while True: heads = torch.randint( 0, self.g.num_nodes(self.item_type), (self.batch_size,) ) tails = dgl.sampling.random_walk( self.g, heads, metapath=[self.item_to_user_etype, self.user_to_item_etype], )[0][:, 2] neg_tails = torch.randint( 0, self.g.num_nodes(self.item_type), (self.batch_size,) ) mask = tails != -1 yield heads[mask], tails[mask], neg_tails[mask] class NeighborSampler(object): def __init__( self, g, user_type, item_type, random_walk_length, random_walk_restart_prob, num_random_walks, num_neighbors, num_layers, ): self.g = g self.user_type = user_type self.item_type = item_type self.user_to_item_etype = list(g.metagraph()[user_type][item_type])[0] self.item_to_user_etype = list(g.metagraph()[item_type][user_type])[0] self.samplers = [ dgl.sampling.PinSAGESampler( g, item_type, user_type, random_walk_length, random_walk_restart_prob, num_random_walks, num_neighbors, ) for _ in range(num_layers) ] def sample_blocks(self, seeds, heads=None, tails=None, neg_tails=None): blocks = [] for sampler in self.samplers: frontier = sampler(seeds) if heads is not None: eids = frontier.edge_ids( torch.cat([heads, heads]), torch.cat([tails, neg_tails]), return_uv=True, )[2] if len(eids) > 0: old_frontier = frontier frontier = dgl.remove_edges(old_frontier, eids) # print(old_frontier) # print(frontier) # print(frontier.edata['weights']) # frontier.edata['weights'] = old_frontier.edata['weights'][frontier.edata[dgl.EID]] block = compact_and_copy(frontier, seeds) seeds = block.srcdata[dgl.NID] blocks.insert(0, block) return blocks def sample_from_item_pairs(self, heads, tails, neg_tails): # Create a graph with positive connections only and another graph with negative # connections only. pos_graph = dgl.graph( (heads, tails), num_nodes=self.g.num_nodes(self.item_type) ) neg_graph = dgl.graph( (heads, neg_tails), num_nodes=self.g.num_nodes(self.item_type) ) pos_graph, neg_graph = dgl.compact_graphs([pos_graph, neg_graph]) seeds = pos_graph.ndata[dgl.NID] blocks = self.sample_blocks(seeds, heads, tails, neg_tails) return pos_graph, neg_graph, blocks def assign_simple_node_features(ndata, g, ntype, assign_id=False): """ Copies data to the given block from the corresponding nodes in the original graph. """ for col in g.nodes[ntype].data.keys(): if not assign_id and col == dgl.NID: continue induced_nodes = ndata[dgl.NID] ndata[col] = g.nodes[ntype].data[col][induced_nodes] def assign_textual_node_features(ndata, textset, ntype): """ Assigns numericalized tokens from a torchtext dataset to given block. The numericalized tokens would be stored in the block as node features with the same name as ``field_name``. The length would be stored as another node feature with name ``field_name + '__len'``. block : DGLGraph First element of the compacted blocks, with "dgl.NID" as the corresponding node ID in the original graph, hence the index to the text dataset. The numericalized tokens (and lengths if available) would be stored onto the blocks as new node features. textset : torchtext.data.Dataset A torchtext dataset whose number of examples is the same as that of nodes in the original graph. """ node_ids = ndata[dgl.NID].numpy() for field_name, field in textset.fields.items(): examples = [getattr(textset[i], field_name) for i in node_ids] tokens, lengths = field.process(examples) if not field.batch_first: tokens = tokens.t() ndata[field_name] = tokens ndata[field_name + "__len"] = lengths def assign_features_to_blocks(blocks, g, textset, ntype): # For the first block (which is closest to the input), copy the features from # the original graph as well as the texts. assign_simple_node_features(blocks[0].srcdata, g, ntype) assign_textual_node_features(blocks[0].srcdata, textset, ntype) assign_simple_node_features(blocks[-1].dstdata, g, ntype) assign_textual_node_features(blocks[-1].dstdata, textset, ntype) class PinSAGECollator(object): def __init__(self, sampler, g, ntype, textset): self.sampler = sampler self.ntype = ntype self.g = g self.textset = textset def collate_train(self, batches): heads, tails, neg_tails = batches[0] # Construct multilayer neighborhood via PinSAGE... pos_graph, neg_graph, blocks = self.sampler.sample_from_item_pairs( heads, tails, neg_tails ) assign_features_to_blocks(blocks, self.g, self.textset, self.ntype) return pos_graph, neg_graph, blocks def collate_test(self, samples): batch = torch.LongTensor(samples) blocks = self.sampler.sample_blocks(batch) assign_features_to_blocks(blocks, self.g, self.textset, self.ntype) return blocks @utils.benchmark("time", 600) @utils.parametrize("data", ["nowplaying_rs"]) def track_time(data): dataset = utils.process_data(data) device = utils.get_bench_device() user_ntype = dataset.user_ntype item_ntype = dataset.item_ntype textset = dataset.textset batch_size = 32 random_walk_length = 2 random_walk_restart_prob = 0.5 num_random_walks = 10 num_neighbors = 3 num_layers = 2 num_workers = 0 hidden_dims = 16 lr = 3e-5 iter_start = 3 iter_count = 10 g = dataset[0] # Sampler batch_sampler = ItemToItemBatchSampler( g, user_ntype, item_ntype, batch_size ) neighbor_sampler = NeighborSampler( g, user_ntype, item_ntype, random_walk_length, random_walk_restart_prob, num_random_walks, num_neighbors, num_layers, ) collator = PinSAGECollator(neighbor_sampler, g, item_ntype, textset) dataloader = DataLoader( batch_sampler, collate_fn=collator.collate_train, num_workers=num_workers, ) dataloader_test = DataLoader( torch.arange(g.num_nodes(item_ntype)), batch_size=batch_size, collate_fn=collator.collate_test, num_workers=num_workers, ) # Model model = PinSAGEModel(g, item_ntype, textset, hidden_dims, num_layers).to( device ) # Optimizer opt = torch.optim.Adam(model.parameters(), lr=lr) model.train() print("start training...") # For each batch of head-tail-negative triplets... for batch_id, (pos_graph, neg_graph, blocks) in enumerate(dataloader): # Copy to GPU for i in range(len(blocks)): blocks[i] = blocks[i].to(device) pos_graph = pos_graph.to(device) neg_graph = neg_graph.to(device) loss = model(pos_graph, neg_graph, blocks).mean() opt.zero_grad() loss.backward() opt.step() # start timer at before iter_start if batch_id == iter_start - 1: t0 = time.time() elif ( batch_id == iter_count + iter_start - 1 ): # time iter_count iterations break t1 = time.time() return (t1 - t0) / iter_count ================================================ FILE: benchmarks/benchmarks/model_speed/bench_rgcn_base.py ================================================ import time import torch import torch.nn as nn import torch.nn.functional as F from .. import rgcn, utils @utils.benchmark("time", 1200) @utils.parametrize("data", ["aifb", "am"]) def track_time(data): # args if data == "aifb": num_bases = -1 l2norm = 0.0 elif data == "am": num_bases = 40 l2norm = 5e-4 else: raise ValueError() ( g, num_rels, num_classes, labels, train_idx, test_idx, target_idx, ) = rgcn.load_data(data, get_norm=True) num_hidden = 16 model = rgcn.RGCN( g.num_nodes(), num_hidden, num_classes, num_rels, num_bases=num_bases ) device = utils.get_bench_device() labels = labels.to(device) model = model.to(device) g = g.int().to(device) optimizer = torch.optim.Adam( model.parameters(), lr=1e-2, weight_decay=l2norm ) model.train() num_epochs = 30 t0 = time.time() for epoch in range(num_epochs): logits = model(g) logits = logits[target_idx] loss = F.cross_entropy(logits[train_idx], labels[train_idx]) optimizer.zero_grad() loss.backward() optimizer.step() t1 = time.time() return (t1 - t0) / num_epochs ================================================ FILE: benchmarks/benchmarks/model_speed/bench_rgcn_hetero_ns.py ================================================ import itertools import time import traceback import dgl import dgl.nn.pytorch as dglnn import torch as th import torch.multiprocessing as mp import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torch.utils.data import DataLoader from .. import utils class RelGraphConvLayer(nn.Module): r"""Relational graph convolution layer. Parameters ---------- in_feat : int Input feature size. out_feat : int Output feature size. rel_names : list[str] Relation names. num_bases : int, optional Number of bases. If is none, use number of relations. Default: None. weight : bool, optional True if a linear layer is applied after message passing. Default: True bias : bool, optional True if bias is added. Default: True activation : callable, optional Activation function. Default: None self_loop : bool, optional True to include self loop message. Default: False dropout : float, optional Dropout rate. Default: 0.0 """ def __init__( self, in_feat, out_feat, rel_names, num_bases, *, weight=True, bias=True, activation=None, self_loop=False, dropout=0.0 ): super(RelGraphConvLayer, self).__init__() self.in_feat = in_feat self.out_feat = out_feat self.rel_names = rel_names self.num_bases = num_bases self.bias = bias self.activation = activation self.self_loop = self_loop self.conv = dglnn.HeteroGraphConv( { rel: dglnn.GraphConv( in_feat, out_feat, norm="right", weight=False, bias=False ) for rel in rel_names } ) self.use_weight = weight self.use_basis = num_bases < len(self.rel_names) and weight if self.use_weight: if self.use_basis: self.basis = dglnn.WeightBasis( (in_feat, out_feat), num_bases, len(self.rel_names) ) else: self.weight = nn.Parameter( th.Tensor(len(self.rel_names), in_feat, out_feat) ) nn.init.xavier_uniform_( self.weight, gain=nn.init.calculate_gain("relu") ) # bias if bias: self.h_bias = nn.Parameter(th.Tensor(out_feat)) nn.init.zeros_(self.h_bias) # weight for self loop if self.self_loop: self.loop_weight = nn.Parameter(th.Tensor(in_feat, out_feat)) nn.init.xavier_uniform_( self.loop_weight, gain=nn.init.calculate_gain("relu") ) self.dropout = nn.Dropout(dropout) def forward(self, g, inputs): """Forward computation Parameters ---------- g : DGLGraph Input graph. inputs : dict[str, torch.Tensor] Node feature for each node type. Returns ------- dict[str, torch.Tensor] New node features for each node type. """ g = g.local_var() if self.use_weight: weight = self.basis() if self.use_basis else self.weight wdict = { self.rel_names[i]: {"weight": w.squeeze(0)} for i, w in enumerate(th.split(weight, 1, dim=0)) } else: wdict = {} if g.is_block: inputs_src = inputs inputs_dst = { k: v[: g.number_of_dst_nodes(k)] for k, v in inputs.items() } else: inputs_src = inputs_dst = inputs hs = self.conv(g, inputs, mod_kwargs=wdict) def _apply(ntype, h): if self.self_loop: h = h + th.matmul(inputs_dst[ntype], self.loop_weight) if self.bias: h = h + self.h_bias if self.activation: h = self.activation(h) return self.dropout(h) return {ntype: _apply(ntype, h) for ntype, h in hs.items()} class RelGraphEmbed(nn.Module): r"""Embedding layer for featureless heterograph.""" def __init__( self, g, device, embed_size, num_nodes, node_feats, embed_name="embed", activation=None, dropout=0.0, ): super(RelGraphEmbed, self).__init__() self.g = g self.device = device self.embed_size = embed_size self.embed_name = embed_name self.activation = activation self.dropout = nn.Dropout(dropout) self.node_feats = node_feats # create weight embeddings for each node for each relation self.embeds = nn.ParameterDict() self.node_embeds = nn.ModuleDict() for ntype in g.ntypes: if node_feats[ntype] is None: sparse_emb = th.nn.Embedding( num_nodes[ntype], embed_size, sparse=True ) nn.init.uniform_(sparse_emb.weight, -1.0, 1.0) self.node_embeds[ntype] = sparse_emb else: input_emb_size = node_feats[ntype].shape[1] embed = nn.Parameter(th.Tensor(input_emb_size, embed_size)) nn.init.xavier_uniform_(embed) self.embeds[ntype] = embed def forward(self, block=None): """Forward computation Parameters ---------- block : DGLGraph, optional If not specified, directly return the full graph with embeddings stored in :attr:`embed_name`. Otherwise, extract and store the embeddings to the block graph and return. Returns ------- DGLGraph The block graph fed with embeddings. """ embeds = {} for ntype in block.ntypes: if self.node_feats[ntype] is None: embeds[ntype] = self.node_embeds[ntype](block.nodes(ntype)).to( self.device ) else: embeds[ntype] = ( self.node_feats[ntype][block.nodes(ntype)].to(self.device) @ self.embeds[ntype] ) return embeds class EntityClassify(nn.Module): def __init__( self, g, h_dim, out_dim, num_bases, num_hidden_layers=1, dropout=0, use_self_loop=False, ): super(EntityClassify, self).__init__() self.g = g self.h_dim = h_dim self.out_dim = out_dim self.rel_names = list(set(g.etypes)) self.rel_names.sort() if num_bases < 0 or num_bases > len(self.rel_names): self.num_bases = len(self.rel_names) else: self.num_bases = num_bases self.num_hidden_layers = num_hidden_layers self.dropout = dropout self.use_self_loop = use_self_loop self.layers = nn.ModuleList() # i2h self.layers.append( RelGraphConvLayer( self.h_dim, self.h_dim, self.rel_names, self.num_bases, activation=F.relu, self_loop=self.use_self_loop, dropout=self.dropout, weight=False, ) ) # h2h for i in range(self.num_hidden_layers): self.layers.append( RelGraphConvLayer( self.h_dim, self.h_dim, self.rel_names, self.num_bases, activation=F.relu, self_loop=self.use_self_loop, dropout=self.dropout, ) ) # h2o self.layers.append( RelGraphConvLayer( self.h_dim, self.out_dim, self.rel_names, self.num_bases, activation=None, self_loop=self.use_self_loop, ) ) def forward(self, h, blocks): for layer, block in zip(self.layers, blocks): h = layer(block, h) return h @utils.benchmark("time", 600) @utils.parametrize("data", ["ogbn-mag"]) def track_time(data): dataset = utils.process_data(data) device = utils.get_bench_device() if data == "ogbn-mag": n_bases = 2 l2norm = 0 else: raise ValueError() fanout = 4 n_layers = 2 batch_size = 1024 n_hidden = 64 dropout = 0.5 use_self_loop = True lr = 0.01 iter_start = 3 iter_count = 10 hg = dataset[0] category = dataset.predict_category num_classes = dataset.num_classes train_mask = hg.nodes[category].data.pop("train_mask") train_idx = th.nonzero(train_mask, as_tuple=False).squeeze() labels = hg.nodes[category].data.pop("labels") node_feats = {} num_nodes = {} for ntype in hg.ntypes: node_feats[ntype] = ( hg.nodes[ntype].data["feat"] if "feat" in hg.nodes[ntype].data else None ) num_nodes[ntype] = hg.num_nodes(ntype) embed_layer = RelGraphEmbed(hg, device, n_hidden, num_nodes, node_feats) model = EntityClassify( hg, n_hidden, num_classes, num_bases=n_bases, num_hidden_layers=n_layers - 2, dropout=dropout, use_self_loop=use_self_loop, ) embed_layer = embed_layer.to(device) model = model.to(device) all_params = itertools.chain( model.parameters(), embed_layer.embeds.parameters() ) optimizer = th.optim.Adam(all_params, lr=lr, weight_decay=l2norm) sparse_optimizer = th.optim.SparseAdam( list(embed_layer.node_embeds.parameters()), lr=lr ) sampler = dgl.dataloading.MultiLayerNeighborSampler([fanout] * n_layers) loader = dgl.dataloading.DataLoader( hg, {category: train_idx}, sampler, batch_size=batch_size, shuffle=True, num_workers=4, ) print("start training...") model.train() embed_layer.train() optimizer.zero_grad() sparse_optimizer.zero_grad() # Enable dataloader cpu affinitization for cpu devices (no effect on gpu) with loader.enable_cpu_affinity(): for step, (input_nodes, seeds, blocks) in enumerate(loader): blocks = [blk.to(device) for blk in blocks] seeds = seeds[ category ] # we only predict the nodes with type "category" batch_tic = time.time() emb = embed_layer(blocks[0]) lbl = labels[seeds].to(device) emb = {k: e.to(device) for k, e in emb.items()} logits = model(emb, blocks)[category] loss = F.cross_entropy(logits, lbl) loss.backward() optimizer.step() sparse_optimizer.step() # start timer at before iter_start if step == iter_start - 1: t0 = time.time() elif ( step == iter_count + iter_start - 1 ): # time iter_count iterations break t1 = time.time() return (t1 - t0) / iter_count ================================================ FILE: benchmarks/benchmarks/model_speed/bench_rgcn_homogeneous_ns.py ================================================ import itertools import time import dgl import dgl.nn.pytorch as dglnn import torch as th import torch.multiprocessing as mp import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from dgl.nn import RelGraphConv from torch.utils.data import DataLoader from .. import utils class EntityClassify(nn.Module): """Entity classification class for RGCN Parameters ---------- device : int Device to run the layer. num_nodes : int Number of nodes. h_dim : int Hidden dim size. out_dim : int Output dim size. num_rels : int Numer of relation types. num_bases : int Number of bases. If is none, use number of relations. num_hidden_layers : int Number of hidden RelGraphConv Layer dropout : float Dropout use_self_loop : bool Use self loop if True, default False. """ def __init__( self, device, num_nodes, h_dim, out_dim, num_rels, num_bases=None, num_hidden_layers=1, dropout=0, use_self_loop=False, layer_norm=False, ): super(EntityClassify, self).__init__() self.device = device self.num_nodes = num_nodes self.h_dim = h_dim self.out_dim = out_dim self.num_rels = num_rels self.num_bases = None if num_bases < 0 else num_bases self.num_hidden_layers = num_hidden_layers self.dropout = dropout self.use_self_loop = use_self_loop self.layer_norm = layer_norm self.layers = nn.ModuleList() # i2h self.layers.append( RelGraphConv( self.h_dim, self.h_dim, self.num_rels, "basis", self.num_bases, activation=F.relu, self_loop=self.use_self_loop, dropout=self.dropout, layer_norm=layer_norm, ) ) # h2h for idx in range(self.num_hidden_layers): self.layers.append( RelGraphConv( self.h_dim, self.h_dim, self.num_rels, "basis", self.num_bases, activation=F.relu, self_loop=self.use_self_loop, dropout=self.dropout, layer_norm=layer_norm, ) ) # h2o self.layers.append( RelGraphConv( self.h_dim, self.out_dim, self.num_rels, "basis", self.num_bases, activation=None, self_loop=self.use_self_loop, layer_norm=layer_norm, ) ) def forward(self, blocks, feats, norm=None): if blocks is None: # full graph training blocks = [self.g] * len(self.layers) h = feats for layer, block in zip(self.layers, blocks): block = block.to(self.device) h = layer(block, h, block.edata["etype"], block.edata["norm"]) return h class RelGraphEmbedLayer(nn.Module): r"""Embedding layer for featureless heterograph. Parameters ---------- device : int Device to run the layer. num_nodes : int Number of nodes. node_tides : tensor Storing the node type id for each node starting from 0 num_of_ntype : int Number of node types input_size : list of int A list of input feature size for each node type. If None, we then treat certain input feature as an one-hot encoding feature. embed_size : int Output embed size embed_name : str, optional Embed name """ def __init__( self, device, num_nodes, node_tids, num_of_ntype, input_size, embed_size, sparse_emb=False, embed_name="embed", ): super(RelGraphEmbedLayer, self).__init__() self.device = device self.embed_size = embed_size self.embed_name = embed_name self.num_nodes = num_nodes self.sparse_emb = sparse_emb # create weight embeddings for each node for each relation self.embeds = nn.ParameterDict() self.num_of_ntype = num_of_ntype self.idmap = th.empty(num_nodes).long() for ntype in range(num_of_ntype): if input_size[ntype] is not None: input_emb_size = input_size[ntype].shape[1] embed = nn.Parameter(th.Tensor(input_emb_size, self.embed_size)) nn.init.xavier_uniform_(embed) self.embeds[str(ntype)] = embed self.node_embeds = th.nn.Embedding( node_tids.shape[0], self.embed_size, sparse=self.sparse_emb ) nn.init.uniform_(self.node_embeds.weight, -1.0, 1.0) def forward(self, node_ids, node_tids, type_ids, features): """Forward computation Parameters ---------- node_ids : tensor node ids to generate embedding for. node_tids : tensor node type ids features : list of features list of initial features for nodes belong to different node type. If None, the corresponding features is an one-hot encoding feature, else use the features directly as input feature and matmul a projection matrix. Returns ------- tensor embeddings as the input of the next layer """ tsd_ids = node_ids.to(self.node_embeds.weight.device) embeds = th.empty( node_ids.shape[0], self.embed_size, device=self.device ) for ntype in range(self.num_of_ntype): if features[ntype] is not None: loc = node_tids == ntype embeds[loc] = features[ntype][type_ids[loc]].to( self.device ) @ self.embeds[str(ntype)].to(self.device) else: loc = node_tids == ntype embeds[loc] = self.node_embeds(tsd_ids[loc]).to(self.device) return embeds @utils.benchmark("time", 600) @utils.parametrize("data", ["am", "ogbn-mag"]) def track_time(data): dataset = utils.process_data(data) device = utils.get_bench_device() if data == "am": batch_size = 64 n_bases = 40 l2norm = 5e-4 elif data == "ogbn-mag": batch_size = 1024 n_bases = 2 l2norm = 0 else: raise ValueError() fanouts = [25, 15] n_layers = 2 n_hidden = 64 dropout = 0.5 use_self_loop = True lr = 0.01 num_workers = 4 iter_start = 3 iter_count = 10 hg = dataset[0] category = dataset.predict_category num_classes = dataset.num_classes train_mask = hg.nodes[category].data.pop("train_mask") train_idx = th.nonzero(train_mask, as_tuple=False).squeeze() labels = hg.nodes[category].data.pop("labels").to(device) num_of_ntype = len(hg.ntypes) num_rels = len(hg.canonical_etypes) node_feats = [] for ntype in hg.ntypes: if len(hg.nodes[ntype].data) == 0 or "feat" not in hg.nodes[ntype].data: node_feats.append(None) else: feat = hg.nodes[ntype].data.pop("feat") node_feats.append(feat.share_memory_()) # get target category id category_id = len(hg.ntypes) for i, ntype in enumerate(hg.ntypes): if ntype == category: category_id = i g = dgl.to_homogeneous(hg) u, v, eid = g.all_edges(form="all") # global norm _, inverse_index, count = th.unique( v, return_inverse=True, return_counts=True ) degrees = count[inverse_index] norm = th.ones(eid.shape[0]) / degrees norm = norm.unsqueeze(1) g.edata["norm"] = norm g.edata["etype"] = g.edata[dgl.ETYPE] g.ndata["type_id"] = g.ndata[dgl.NID] g.ndata["ntype"] = g.ndata[dgl.NTYPE] node_ids = th.arange(g.num_nodes()) # find out the target node ids node_tids = g.ndata[dgl.NTYPE] loc = node_tids == category_id target_nids = node_ids[loc] train_nids = target_nids[train_idx] g = g.formats("csc") sampler = dgl.dataloading.MultiLayerNeighborSampler(fanouts) loader = dgl.dataloading.DataLoader( g, target_nids[train_idx], sampler, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=num_workers, ) # node features # None for one-hot feature, if not none, it should be the feature tensor. # embed_layer = RelGraphEmbedLayer( device, g.num_nodes(), node_tids, num_of_ntype, node_feats, n_hidden, sparse_emb=True, ) # create model # all model params are in device. model = EntityClassify( device, g.num_nodes(), n_hidden, num_classes, num_rels, num_bases=n_bases, num_hidden_layers=n_layers - 2, dropout=dropout, use_self_loop=use_self_loop, layer_norm=False, ) embed_layer = embed_layer.to(device) model = model.to(device) all_params = itertools.chain( model.parameters(), embed_layer.embeds.parameters() ) optimizer = th.optim.Adam(all_params, lr=lr, weight_decay=l2norm) emb_optimizer = th.optim.SparseAdam( list(embed_layer.node_embeds.parameters()), lr=lr ) print("start training...") model.train() embed_layer.train() # Enable dataloader cpu affinitization for cpu devices (no effect on gpu) with loader.enable_cpu_affinity(): for step, sample_data in enumerate(loader): input_nodes, output_nodes, blocks = sample_data feats = embed_layer( input_nodes, blocks[0].srcdata["ntype"], blocks[0].srcdata["type_id"], node_feats, ) logits = model(blocks, feats) seed_idx = blocks[-1].dstdata["type_id"] loss = F.cross_entropy(logits, labels[seed_idx]) optimizer.zero_grad() emb_optimizer.zero_grad() loss.backward() optimizer.step() emb_optimizer.step() # start timer at before iter_start if step == iter_start - 1: t0 = time.time() elif ( step == iter_count + iter_start - 1 ): # time iter_count iterations break t1 = time.time() return (t1 - t0) / iter_count ================================================ FILE: benchmarks/benchmarks/model_speed/bench_sage.py ================================================ import time import dgl import torch import torch.nn as nn import torch.nn.functional as F from dgl.nn.pytorch import SAGEConv from .. import utils class GraphSAGE(nn.Module): def __init__( self, in_feats, n_hidden, n_classes, n_layers, activation, dropout, aggregator_type, ): super(GraphSAGE, self).__init__() self.layers = nn.ModuleList() self.dropout = nn.Dropout(dropout) self.activation = activation # input layer self.layers.append(SAGEConv(in_feats, n_hidden, aggregator_type)) # hidden layers for i in range(n_layers - 1): self.layers.append(SAGEConv(n_hidden, n_hidden, aggregator_type)) # output layer self.layers.append( SAGEConv(n_hidden, n_classes, aggregator_type) ) # activation None def forward(self, graph, inputs): h = self.dropout(inputs) for l, layer in enumerate(self.layers): h = layer(graph, h) if l != len(self.layers) - 1: h = self.activation(h) h = self.dropout(h) return h @utils.benchmark("time") @utils.parametrize("data", ["cora", "pubmed"]) def track_time(data): data = utils.process_data(data) device = utils.get_bench_device() num_epochs = 200 g = data[0].to(device) features = g.ndata["feat"] labels = g.ndata["label"] train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] in_feats = features.shape[1] n_classes = data.num_classes g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) # create model model = GraphSAGE(in_feats, 16, n_classes, 1, F.relu, 0.5, "gcn") loss_fcn = torch.nn.CrossEntropyLoss() model = model.to(device) model.train() # optimizer optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4) # dry run for i in range(10): logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() # timing t0 = time.time() for epoch in range(num_epochs): logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() t1 = time.time() return (t1 - t0) / num_epochs ================================================ FILE: benchmarks/benchmarks/model_speed/bench_sage_ns.py ================================================ import time import dgl import dgl.nn.pytorch as dglnn import torch as th import torch.multiprocessing as mp import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torch.utils.data import DataLoader from .. import utils class SAGE(nn.Module): def __init__( self, in_feats, n_hidden, n_classes, n_layers, activation, dropout ): super().__init__() self.n_layers = n_layers self.n_hidden = n_hidden self.n_classes = n_classes self.layers = nn.ModuleList() self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, "mean")) for i in range(1, n_layers - 1): self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, "mean")) self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, "mean")) self.dropout = nn.Dropout(dropout) self.activation = activation def forward(self, blocks, x): h = x for l, (layer, block) in enumerate(zip(self.layers, blocks)): h = layer(block, h) if l != len(self.layers) - 1: h = self.activation(h) h = self.dropout(h) return h def load_subtensor(g, seeds, input_nodes, device): """ Copys features and labels of a set of nodes onto GPU. """ batch_inputs = g.ndata["features"][input_nodes].to(device) batch_labels = g.ndata["labels"][seeds].to(device) return batch_inputs, batch_labels @utils.benchmark("time", 600) @utils.parametrize("data", ["reddit", "ogbn-products"]) def track_time(data): data = utils.process_data(data) device = utils.get_bench_device() g = data[0] g.ndata["features"] = g.ndata["feat"] g.ndata["labels"] = g.ndata["label"] in_feats = g.ndata["features"].shape[1] n_classes = data.num_classes # Create csr/coo/csc formats before launching training processes with multi-gpu. # This avoids creating certain formats in each sub-process, which saves momory and CPU. g.create_formats_() num_epochs = 20 num_hidden = 16 num_layers = 2 fan_out = "10,25" batch_size = 1024 lr = 0.003 dropout = 0.5 num_workers = 4 iter_start = 3 iter_count = 10 train_nid = th.nonzero(g.ndata["train_mask"], as_tuple=True)[0] # Create PyTorch DataLoader for constructing blocks sampler = dgl.dataloading.MultiLayerNeighborSampler( [int(fanout) for fanout in fan_out.split(",")] ) dataloader = dgl.dataloading.DataLoader( g, train_nid, sampler, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=num_workers, ) # Define model and optimizer model = SAGE(in_feats, num_hidden, n_classes, num_layers, F.relu, dropout) model = model.to(device) loss_fcn = nn.CrossEntropyLoss() loss_fcn = loss_fcn.to(device) optimizer = optim.Adam(model.parameters(), lr=lr) # Enable dataloader cpu affinitization for cpu devices (no effect on gpu) with dataloader.enable_cpu_affinity(): # Training loop avg = 0 iter_tput = [] for step, (input_nodes, seeds, blocks) in enumerate(dataloader): # Load the input features as well as output labels # batch_inputs, batch_labels = load_subtensor(g, seeds, input_nodes, device) blocks = [block.int().to(device) for block in blocks] batch_inputs = blocks[0].srcdata["features"] batch_labels = blocks[-1].dstdata["labels"] # Compute loss and prediction batch_pred = model(blocks, batch_inputs) loss = loss_fcn(batch_pred, batch_labels) optimizer.zero_grad() loss.backward() optimizer.step() # start timer at before iter_start if step == iter_start - 1: t0 = time.time() elif ( step == iter_count + iter_start - 1 ): # time iter_count iterations break t1 = time.time() return (t1 - t0) / iter_count ================================================ FILE: benchmarks/benchmarks/model_speed/bench_sage_unsupervised_ns.py ================================================ import time import dgl import dgl.function as fn import dgl.nn.pytorch as dglnn import numpy as np import torch as th import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from .. import utils class NegativeSampler(object): def __init__(self, g, k, neg_share=False): self.weights = g.in_degrees().float() ** 0.75 self.k = k self.neg_share = neg_share def __call__(self, g, eids): src, _ = g.find_edges(eids) n = len(src) if self.neg_share and n % self.k == 0: dst = self.weights.multinomial(n, replacement=True) dst = dst.view(-1, 1, self.k).expand(-1, self.k, -1).flatten() else: dst = self.weights.multinomial(n * self.k, replacement=True) src = src.repeat_interleave(self.k) return src, dst def load_subtensor(g, input_nodes, device): """ Copys features and labels of a set of nodes onto GPU. """ batch_inputs = g.ndata["features"][input_nodes].to(device) return batch_inputs class SAGE(nn.Module): def __init__( self, in_feats, n_hidden, n_classes, n_layers, activation, dropout ): super().__init__() self.n_layers = n_layers self.n_hidden = n_hidden self.n_classes = n_classes self.layers = nn.ModuleList() self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, "mean")) for i in range(1, n_layers - 1): self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, "mean")) self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, "mean")) self.dropout = nn.Dropout(dropout) self.activation = activation def forward(self, blocks, x): h = x for l, (layer, block) in enumerate(zip(self.layers, blocks)): h = layer(block, h) if l != len(self.layers) - 1: h = self.activation(h) h = self.dropout(h) return h def load_subtensor(g, input_nodes, device): """ Copys features and labels of a set of nodes onto GPU. """ batch_inputs = g.ndata["features"][input_nodes].to(device) return batch_inputs class CrossEntropyLoss(nn.Module): def forward(self, block_outputs, pos_graph, neg_graph): with pos_graph.local_scope(): pos_graph.ndata["h"] = block_outputs pos_graph.apply_edges(fn.u_dot_v("h", "h", "score")) pos_score = pos_graph.edata["score"] with neg_graph.local_scope(): neg_graph.ndata["h"] = block_outputs neg_graph.apply_edges(fn.u_dot_v("h", "h", "score")) neg_score = neg_graph.edata["score"] score = th.cat([pos_score, neg_score]) label = th.cat( [th.ones_like(pos_score), th.zeros_like(neg_score)] ).long() loss = F.binary_cross_entropy_with_logits(score, label.float()) return loss @utils.benchmark("time", 600) @utils.parametrize("data", ["reddit"]) @utils.parametrize("num_negs", [2, 8, 32]) @utils.parametrize("batch_size", [1024, 2048, 8192]) def track_time(data, num_negs, batch_size): data = utils.process_data(data) device = utils.get_bench_device() g = data[0] g.ndata["features"] = g.ndata["feat"] g.ndata["labels"] = g.ndata["label"] in_feats = g.ndata["features"].shape[1] n_classes = data.num_classes # Create csr/coo/csc formats before launching training processes with multi-gpu. # This avoids creating certain formats in each sub-process, which saves momory and CPU. g.create_formats_() num_epochs = 2 num_hidden = 16 num_layers = 2 fan_out = "10,25" lr = 0.003 dropout = 0.5 num_workers = 4 num_negs = 2 iter_start = 3 iter_count = 10 n_edges = g.num_edges() train_seeds = np.arange(n_edges) # Create PyTorch DataLoader for constructing blocks sampler = dgl.dataloading.MultiLayerNeighborSampler( [int(fanout) for fanout in fan_out.split(",")] ) sampler = dgl.dataloading.as_edge_prediction_sampler( sampler, exclude="reverse_id", # For each edge with ID e in Reddit dataset, the reverse edge is e ± |E|/2. reverse_eids=th.cat( [th.arange(n_edges // 2, n_edges), th.arange(0, n_edges // 2)] ), negative_sampler=NegativeSampler(g, num_negs), ) dataloader = dgl.dataloading.DataLoader( g, train_seeds, sampler, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=num_workers, ) # Define model and optimizer model = SAGE(in_feats, num_hidden, n_classes, num_layers, F.relu, dropout) model = model.to(device) loss_fcn = CrossEntropyLoss() loss_fcn = loss_fcn.to(device) optimizer = optim.Adam(model.parameters(), lr=lr) # Training loop avg = 0 iter_tput = [] for step, (input_nodes, pos_graph, neg_graph, blocks) in enumerate( dataloader ): # Load the input features as well as output labels batch_inputs = load_subtensor(g, input_nodes, device) pos_graph = pos_graph.to(device) neg_graph = neg_graph.to(device) blocks = [block.int().to(device) for block in blocks] # Compute loss and prediction batch_pred = model(blocks, batch_inputs) loss = loss_fcn(batch_pred, pos_graph, neg_graph) optimizer.zero_grad() loss.backward() optimizer.step() # start timer at before iter_start if step == iter_start - 1: t0 = time.time() elif step == iter_count + iter_start - 1: # time iter_count iterations break t1 = time.time() return (t1 - t0) / iter_count ================================================ FILE: benchmarks/benchmarks/multigpu/__init__.py ================================================ ================================================ FILE: benchmarks/benchmarks/multigpu/bench_multigpu_rgcn.py ================================================ """ Modeling Relational Data with Graph Convolutional Networks Paper: https://arxiv.org/abs/1703.06103 Code: https://github.com/tkipf/relational-gcn Difference compared to tkipf/relation-gcn * l2norm applied to all weights * remove nodes that won't be touched """ import argparse import gc import logging import time from pathlib import Path from types import SimpleNamespace import dgl import numpy as np import torch as th import torch.multiprocessing as mp import torch.nn as nn import torch.nn.functional as F from dgl.nn import RelGraphConv from torch.multiprocessing import Queue from torch.nn.parallel import DistributedDataParallel from torch.utils.data import DataLoader from .. import utils class EntityClassify(nn.Module): def __init__( self, device, num_nodes, h_dim, out_dim, num_rels, num_bases=None, num_hidden_layers=1, dropout=0, use_self_loop=False, layer_norm=False, ): super(EntityClassify, self).__init__() self.device = th.device(device if device >= 0 else "cpu") self.num_nodes = num_nodes self.h_dim = h_dim self.out_dim = out_dim self.num_rels = num_rels self.num_bases = None if num_bases < 0 else num_bases self.num_hidden_layers = num_hidden_layers self.dropout = dropout self.use_self_loop = use_self_loop self.layer_norm = layer_norm self.layers = nn.ModuleList() # i2h self.layers.append( RelGraphConv( self.h_dim, self.h_dim, self.num_rels, "basis", self.num_bases, activation=F.relu, self_loop=self.use_self_loop, dropout=self.dropout, layer_norm=layer_norm, ) ) # h2h for idx in range(self.num_hidden_layers): self.layers.append( RelGraphConv( self.h_dim, self.h_dim, self.num_rels, "basis", self.num_bases, activation=F.relu, self_loop=self.use_self_loop, dropout=self.dropout, layer_norm=layer_norm, ) ) # h2o self.layers.append( RelGraphConv( self.h_dim, self.out_dim, self.num_rels, "basis", self.num_bases, activation=None, self_loop=self.use_self_loop, layer_norm=layer_norm, ) ) def forward(self, blocks, feats, norm=None): if blocks is None: # full graph training blocks = [self.g] * len(self.layers) h = feats for layer, block in zip(self.layers, blocks): block = block.to(self.device) h = layer(block, h, block.edata["etype"], block.edata["norm"]) return h def gen_norm(g): _, v, eid = g.all_edges(form="all") _, inverse_index, count = th.unique( v, return_inverse=True, return_counts=True ) degrees = count[inverse_index] norm = th.ones(eid.shape[0], device=eid.device) / degrees norm = norm.unsqueeze(1) g.edata["norm"] = norm class NeighborSampler: def __init__(self, g, target_idx, fanouts): self.g = g self.target_idx = target_idx self.fanouts = fanouts def sample_blocks(self, seeds): blocks = [] etypes = [] norms = [] ntypes = [] seeds = th.tensor(seeds).long() cur = self.target_idx[seeds] for fanout in self.fanouts: if fanout is None or fanout == -1: frontier = dgl.in_subgraph(self.g, cur) else: frontier = dgl.sampling.sample_neighbors(self.g, cur, fanout) block = dgl.to_block(frontier, cur) gen_norm(block) cur = block.srcdata[dgl.NID] blocks.insert(0, block) return seeds, blocks @utils.thread_wrapped_func def run(proc_id, n_gpus, n_cpus, args, devices, dataset, split, queue=None): from .rgcn_model import RelGraphEmbedLayer dev_id = devices[proc_id] ( g, node_feats, num_of_ntype, num_classes, num_rels, target_idx, train_idx, val_idx, test_idx, labels, ) = dataset labels = labels.cuda(dev_id) if split is not None: train_seed, val_seed, test_seed = split train_idx = train_idx[train_seed] # val_idx = val_idx[val_seed] # test_idx = test_idx[test_seed] fanouts = args.fanout node_tids = g.ndata[dgl.NTYPE] sampler = NeighborSampler(g, target_idx, fanouts) loader = DataLoader( dataset=train_idx.numpy(), batch_size=args.batch_size, collate_fn=sampler.sample_blocks, shuffle=True, num_workers=args.num_workers, ) world_size = n_gpus dist_init_method = "tcp://{master_ip}:{master_port}".format( master_ip="127.0.0.1", master_port="12345" ) backend = "nccl" # using sparse embedding or usig mix_cpu_gpu model (embedding model can not be stored in GPU) if args.dgl_sparse is False: backend = "gloo" print("backend using {}".format(backend)) th.distributed.init_process_group( backend=backend, init_method=dist_init_method, world_size=world_size, rank=dev_id, ) # node features # None for one-hot feature, if not none, it should be the feature tensor. # embed_layer = RelGraphEmbedLayer( dev_id, g.num_nodes(), node_tids, num_of_ntype, node_feats, args.n_hidden, dgl_sparse=args.dgl_sparse, ) # create model # all model params are in device. model = EntityClassify( dev_id, g.num_nodes(), args.n_hidden, num_classes, num_rels, num_bases=args.n_bases, num_hidden_layers=args.n_layers - 2, dropout=args.dropout, use_self_loop=args.use_self_loop, layer_norm=args.layer_norm, ) model.cuda(dev_id) model = DistributedDataParallel( model, device_ids=[dev_id], output_device=dev_id ) if args.dgl_sparse: embed_layer.cuda(dev_id) if len(list(embed_layer.parameters())) > 0: embed_layer = DistributedDataParallel( embed_layer, device_ids=[dev_id], output_device=dev_id ) else: if len(list(embed_layer.parameters())) > 0: embed_layer = DistributedDataParallel( embed_layer, device_ids=None, output_device=None ) # optimizer dense_params = list(model.parameters()) if args.node_feats: if n_gpus > 1: dense_params += list(embed_layer.module.embeds.parameters()) else: dense_params += list(embed_layer.embeds.parameters()) optimizer = th.optim.Adam( dense_params, lr=args.lr, weight_decay=args.l2norm ) if args.dgl_sparse: all_params = list(model.parameters()) + list(embed_layer.parameters()) optimizer = th.optim.Adam( all_params, lr=args.lr, weight_decay=args.l2norm ) if n_gpus > 1 and isinstance(embed_layer, DistributedDataParallel): dgl_emb = embed_layer.module.dgl_emb else: dgl_emb = embed_layer.dgl_emb emb_optimizer = ( dgl.optim.SparseAdam(params=dgl_emb, lr=args.sparse_lr, eps=1e-8) if len(dgl_emb) > 0 else None ) else: if n_gpus > 1: embs = list(embed_layer.module.node_embeds.parameters()) else: embs = list(embed_layer.node_embeds.parameters()) emb_optimizer = ( th.optim.SparseAdam(embs, lr=args.sparse_lr) if len(embs) > 0 else None ) # training loop print("start training...") forward_time = [] backward_time = [] train_time = 0 validation_time = 0 test_time = 0 last_val_acc = 0.0 do_test = False if n_gpus > 1 and n_cpus - args.num_workers > 0: th.set_num_threads(n_cpus - args.num_workers) steps = 0 time_records = [] model.train() embed_layer.train() # Warm up for i, sample_data in enumerate(loader): seeds, blocks = sample_data t0 = time.time() feats = embed_layer( blocks[0].srcdata[dgl.NID], blocks[0].srcdata["ntype"], blocks[0].srcdata["type_id"], node_feats, ) logits = model(blocks, feats) loss = F.cross_entropy(logits, labels[seeds]) t1 = time.time() optimizer.zero_grad() if emb_optimizer is not None: emb_optimizer.zero_grad() loss.backward() if emb_optimizer is not None: emb_optimizer.step() optimizer.step() gc.collect() if i >= 3: break # real time for i, sample_data in enumerate(loader): seeds, blocks = sample_data t0 = time.time() feats = embed_layer( blocks[0].srcdata[dgl.NID], blocks[0].srcdata["ntype"], blocks[0].srcdata["type_id"], node_feats, ) logits = model(blocks, feats) loss = F.cross_entropy(logits, labels[seeds]) t1 = time.time() optimizer.zero_grad() if emb_optimizer is not None: emb_optimizer.zero_grad() loss.backward() if emb_optimizer is not None: emb_optimizer.step() optimizer.step() th.distributed.barrier() t2 = time.time() forward_time.append(t1 - t0) backward_time.append(t2 - t1) time_records.append(t2 - t0) gc.collect() if i >= 10: break if proc_id == 0: queue.put(np.array(time_records)) @utils.skip_if_not_4gpu() @utils.benchmark("time", timeout=600) @utils.parametrize("data", ["am", "ogbn-mag"]) @utils.parametrize("dgl_sparse", [True, False]) def track_time(data, dgl_sparse): # load graph data dataset = utils.process_data(data) args = config() devices = [0, 1, 2, 3] args.dgl_sparse = dgl_sparse args.dataset = dataset ogb_dataset = False if data == "am": args.n_bases = 40 args.l2norm = 5e-4 elif data == "ogbn-mag": args.n_bases = 2 args.l2norm = 0 else: raise ValueError() if ogb_dataset is True: split_idx = dataset.get_idx_split() train_idx = split_idx["train"]["paper"] val_idx = split_idx["valid"]["paper"] test_idx = split_idx["test"]["paper"] hg_orig, labels = dataset[0] subgs = {} for etype in hg_orig.canonical_etypes: u, v = hg_orig.all_edges(etype=etype) subgs[etype] = (u, v) subgs[(etype[2], "rev-" + etype[1], etype[0])] = (v, u) hg = dgl.heterograph(subgs) hg.nodes["paper"].data["feat"] = hg_orig.nodes["paper"].data["feat"] labels = labels["paper"].squeeze() num_rels = len(hg.canonical_etypes) num_of_ntype = len(hg.ntypes) num_classes = dataset.num_classes if args.dataset == "ogbn-mag": category = "paper" print("Number of relations: {}".format(num_rels)) print("Number of class: {}".format(num_classes)) print("Number of train: {}".format(len(train_idx))) print("Number of valid: {}".format(len(val_idx))) print("Number of test: {}".format(len(test_idx))) else: # Load from hetero-graph hg = dataset[0] num_rels = len(hg.canonical_etypes) num_of_ntype = len(hg.ntypes) category = dataset.predict_category num_classes = dataset.num_classes train_mask = hg.nodes[category].data.pop("train_mask") test_mask = hg.nodes[category].data.pop("test_mask") labels = hg.nodes[category].data.pop("labels") train_idx = th.nonzero(train_mask, as_tuple=False).squeeze() test_idx = th.nonzero(test_mask, as_tuple=False).squeeze() # AIFB, MUTAG, BGS and AM datasets do not provide validation set split. # Split train set into train and validation if args.validation is set # otherwise use train set as the validation set. if args.validation: val_idx = train_idx[: len(train_idx) // 5] train_idx = train_idx[len(train_idx) // 5 :] else: val_idx = train_idx node_feats = [] for ntype in hg.ntypes: if len(hg.nodes[ntype].data) == 0 or args.node_feats is False: node_feats.append(hg.num_nodes(ntype)) else: assert len(hg.nodes[ntype].data) == 1 feat = hg.nodes[ntype].data.pop("feat") node_feats.append(feat.share_memory_()) # get target category id category_id = len(hg.ntypes) for i, ntype in enumerate(hg.ntypes): if ntype == category: category_id = i print("{}:{}".format(i, ntype)) g = dgl.to_homogeneous(hg) g.ndata["ntype"] = g.ndata[dgl.NTYPE] g.ndata["ntype"].share_memory_() g.edata["etype"] = g.edata[dgl.ETYPE] g.edata["etype"].share_memory_() g.ndata["type_id"] = g.ndata[dgl.NID] g.ndata["type_id"].share_memory_() node_ids = th.arange(g.num_nodes()) # find out the target node ids node_tids = g.ndata[dgl.NTYPE] loc = node_tids == category_id target_idx = node_ids[loc] target_idx.share_memory_() train_idx.share_memory_() val_idx.share_memory_() test_idx.share_memory_() # Create csr/coo/csc formats before launching training processes with multi-gpu. # This avoids creating certain formats in each sub-process, which saves momory and CPU. g.create_formats_() n_gpus = len(devices) n_cpus = mp.cpu_count() ctx = mp.get_context("spawn") queue = ctx.Queue() procs = [] num_train_seeds = train_idx.shape[0] num_valid_seeds = val_idx.shape[0] num_test_seeds = test_idx.shape[0] train_seeds = th.randperm(num_train_seeds) valid_seeds = th.randperm(num_valid_seeds) test_seeds = th.randperm(num_test_seeds) tseeds_per_proc = num_train_seeds // n_gpus vseeds_per_proc = num_valid_seeds // n_gpus tstseeds_per_proc = num_test_seeds // n_gpus for proc_id in range(n_gpus): # we have multi-gpu for training, evaluation and testing # so split trian set, valid set and test set into num-of-gpu parts. proc_train_seeds = train_seeds[ proc_id * tseeds_per_proc : (proc_id + 1) * tseeds_per_proc if (proc_id + 1) * tseeds_per_proc < num_train_seeds else num_train_seeds ] proc_valid_seeds = valid_seeds[ proc_id * vseeds_per_proc : (proc_id + 1) * vseeds_per_proc if (proc_id + 1) * vseeds_per_proc < num_valid_seeds else num_valid_seeds ] proc_test_seeds = test_seeds[ proc_id * tstseeds_per_proc : (proc_id + 1) * tstseeds_per_proc if (proc_id + 1) * tstseeds_per_proc < num_test_seeds else num_test_seeds ] p = ctx.Process( target=run, args=( proc_id, n_gpus, n_cpus // n_gpus, args, devices, ( g, node_feats, num_of_ntype, num_classes, num_rels, target_idx, train_idx, val_idx, test_idx, labels, ), (proc_train_seeds, proc_valid_seeds, proc_test_seeds), queue, ), ) p.start() procs.append(p) for p in procs: p.join() time_records = queue.get(block=False) num_exclude = 10 # exclude first 10 iterations if len(time_records) < 15: # exclude less if less records num_exclude = int(len(time_records) * 0.3) return np.mean(time_records[num_exclude:]) def config(): # parser = argparse.ArgumentParser(description='RGCN') args = SimpleNamespace( dropout=0, n_hidden=16, gpu="0,1,2,3", lr=1e-2, sparse_lr=2e-2, n_bases=-1, n_layers=2, dataset=None, l2norm=0, fanout=[10, 25], use_self_loop=True, batch_size=100, layer_norm=False, validation=False, node_feats=False, num_workers=0, dgl_sparse=False, ) return args if __name__ == "__main__": track_time("am") ================================================ FILE: benchmarks/benchmarks/multigpu/bench_multigpu_sage.py ================================================ import argparse import math import time from types import SimpleNamespace from typing import NamedTuple import dgl import dgl.nn.pytorch as dglnn import numpy as np import torch as th import torch.multiprocessing as mp import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torch.nn.parallel import DistributedDataParallel from .. import utils class SAGE(nn.Module): def __init__( self, in_feats, n_hidden, n_classes, n_layers, activation, dropout ): super().__init__() self.n_layers = n_layers self.n_hidden = n_hidden self.n_classes = n_classes self.layers = nn.ModuleList() self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, "mean")) for i in range(1, n_layers - 1): self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, "mean")) self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, "mean")) self.dropout = nn.Dropout(dropout) self.activation = activation def forward(self, blocks, x): h = x for l, (layer, block) in enumerate(zip(self.layers, blocks)): h = layer(block, h) if l != len(self.layers) - 1: h = self.activation(h) h = self.dropout(h) return h def load_subtensor(nfeat, labels, seeds, input_nodes, dev_id): """ Extracts features and labels for a subset of nodes. """ batch_inputs = nfeat[input_nodes].to(dev_id) batch_labels = labels[seeds].to(dev_id) return batch_inputs, batch_labels # Entry point @utils.thread_wrapped_func def run(result_queue, proc_id, n_gpus, args, devices, data): dev_id = devices[proc_id] timing_records = [] if n_gpus > 1: dist_init_method = "tcp://{master_ip}:{master_port}".format( master_ip="127.0.0.1", master_port="12345" ) world_size = n_gpus th.distributed.init_process_group( backend="nccl", init_method=dist_init_method, world_size=world_size, rank=proc_id, ) th.cuda.set_device(dev_id) n_classes, train_g, _, _ = data train_nfeat = train_g.ndata.pop("feat") train_labels = train_g.ndata.pop("label") train_nfeat = train_nfeat.to(dev_id) train_labels = train_labels.to(dev_id) in_feats = train_nfeat.shape[1] train_mask = train_g.ndata["train_mask"] train_nid = train_mask.nonzero().squeeze() # Split train_nid train_nid = th.split(train_nid, math.ceil(len(train_nid) / n_gpus))[proc_id] # Create PyTorch DataLoader for constructing blocks sampler = dgl.dataloading.MultiLayerNeighborSampler( [int(fanout) for fanout in args.fan_out.split(",")] ) dataloader = dgl.dataloading.DataLoader( train_g, train_nid, sampler, batch_size=args.batch_size, shuffle=True, drop_last=False, num_workers=args.num_workers, ) # Define model and optimizer model = SAGE( in_feats, args.num_hidden, n_classes, args.num_layers, F.relu, args.dropout, ) model = model.to(dev_id) if n_gpus > 1: model = DistributedDataParallel( model, device_ids=[dev_id], output_device=dev_id ) loss_fcn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.lr) # Training loop for step, (input_nodes, seeds, blocks) in enumerate(dataloader): if proc_id == 0: tic_step = time.time() batch_inputs, batch_labels = load_subtensor( train_nfeat, train_labels, seeds, input_nodes, dev_id ) blocks = [block.int().to(dev_id) for block in blocks] batch_pred = model(blocks, batch_inputs) loss = loss_fcn(batch_pred, batch_labels) optimizer.zero_grad() loss.backward() optimizer.step() if proc_id == 0: timing_records.append(time.time() - tic_step) if step >= 50: break if n_gpus > 1: th.distributed.barrier() if proc_id == 0: result_queue.put(np.array(timing_records)) @utils.benchmark("time", timeout=600) @utils.skip_if_not_4gpu() @utils.parametrize("data", ["reddit", "ogbn-products"]) def track_time(data): args = SimpleNamespace( num_hidden=16, fan_out="10,25", batch_size=1000, lr=0.003, dropout=0.5, num_layers=2, num_workers=4, ) devices = [0, 1, 2, 3] n_gpus = len(devices) data = utils.process_data(data) g = data[0] n_classes = data.num_classes train_g = val_g = test_g = g # Create csr/coo/csc formats before launching training processes with multi-gpu. # This avoids creating certain formats in each sub-process, which saves momory and CPU. train_g.create_formats_() val_g.create_formats_() test_g.create_formats_() # Pack data data = n_classes, train_g, val_g, test_g ctx = mp.get_context("spawn") result_queue = ctx.Queue() procs = [] for proc_id in range(n_gpus): p = ctx.Process( target=run, args=(result_queue, proc_id, n_gpus, args, devices, data), ) p.start() procs.append(p) for p in procs: p.join() time_records = result_queue.get(block=False) num_exclude = 10 # exclude first 10 iterations if len(time_records) < 15: # exclude less if less records num_exclude = int(len(time_records) * 0.3) return np.mean(time_records[num_exclude:]) ================================================ FILE: benchmarks/benchmarks/multigpu/rgcn_model.py ================================================ import dgl import torch as th import torch.nn as nn class BaseRGCN(nn.Module): def __init__( self, num_nodes, h_dim, out_dim, num_rels, num_bases, num_hidden_layers=1, dropout=0, use_self_loop=False, use_cuda=False, ): super(BaseRGCN, self).__init__() self.num_nodes = num_nodes self.h_dim = h_dim self.out_dim = out_dim self.num_rels = num_rels self.num_bases = None if num_bases < 0 else num_bases self.num_hidden_layers = num_hidden_layers self.dropout = dropout self.use_self_loop = use_self_loop self.use_cuda = use_cuda # create rgcn layers self.build_model() def build_model(self): self.layers = nn.ModuleList() # i2h i2h = self.build_input_layer() if i2h is not None: self.layers.append(i2h) # h2h for idx in range(self.num_hidden_layers): h2h = self.build_hidden_layer(idx) self.layers.append(h2h) # h2o h2o = self.build_output_layer() if h2o is not None: self.layers.append(h2o) def build_input_layer(self): return None def build_hidden_layer(self, idx): raise NotImplementedError def build_output_layer(self): return None def forward(self, g, h, r, norm): for layer in self.layers: h = layer(g, h, r, norm) return h def initializer(emb): emb.uniform_(-1.0, 1.0) return emb class RelGraphEmbedLayer(nn.Module): r"""Embedding layer for featureless heterograph. Parameters ---------- dev_id : int Device to run the layer. num_nodes : int Number of nodes. node_tides : tensor Storing the node type id for each node starting from 0 num_of_ntype : int Number of node types input_size : list of int A list of input feature size for each node type. If None, we then treat certain input feature as an one-hot encoding feature. embed_size : int Output embed size dgl_sparse : bool, optional If true, use dgl.nn.NodeEmbedding otherwise use torch.nn.Embedding """ def __init__( self, dev_id, num_nodes, node_tids, num_of_ntype, input_size, embed_size, dgl_sparse=False, ): super(RelGraphEmbedLayer, self).__init__() self.dev_id = th.device(dev_id if dev_id >= 0 else "cpu") self.embed_size = embed_size self.num_nodes = num_nodes self.dgl_sparse = dgl_sparse # create weight embeddings for each node for each relation self.embeds = nn.ParameterDict() self.node_embeds = {} if dgl_sparse else nn.ModuleDict() self.num_of_ntype = num_of_ntype for ntype in range(num_of_ntype): if isinstance(input_size[ntype], int): if dgl_sparse: self.node_embeds[str(ntype)] = dgl.nn.NodeEmbedding( input_size[ntype], embed_size, name=str(ntype), init_func=initializer, ) else: sparse_emb = th.nn.Embedding( input_size[ntype], embed_size, sparse=True ) nn.init.uniform_(sparse_emb.weight, -1.0, 1.0) self.node_embeds[str(ntype)] = sparse_emb else: input_emb_size = input_size[ntype].shape[1] embed = nn.Parameter(th.Tensor(input_emb_size, self.embed_size)) nn.init.xavier_uniform_(embed) self.embeds[str(ntype)] = embed @property def dgl_emb(self): """ """ if self.dgl_sparse: embs = [emb for emb in self.node_embeds.values()] return embs else: return [] def forward(self, node_ids, node_tids, type_ids, features): """Forward computation Parameters ---------- node_ids : tensor node ids to generate embedding for. node_ids : tensor node type ids features : list of features list of initial features for nodes belong to different node type. If None, the corresponding features is an one-hot encoding feature, else use the features directly as input feature and matmul a projection matrix. Returns ------- tensor embeddings as the input of the next layer """ tsd_ids = node_ids.to(self.dev_id) embeds = th.empty( node_ids.shape[0], self.embed_size, device=self.dev_id ) for ntype in range(self.num_of_ntype): loc = node_tids == ntype if isinstance(features[ntype], int): if self.dgl_sparse: embeds[loc] = self.node_embeds[str(ntype)]( type_ids[loc], self.dev_id ) else: embeds[loc] = self.node_embeds[str(ntype)]( type_ids[loc] ).to(self.dev_id) else: embeds[loc] = features[ntype][type_ids[loc]].to( self.dev_id ) @ self.embeds[str(ntype)].to(self.dev_id) return embeds ================================================ FILE: benchmarks/benchmarks/rgcn.py ================================================ import dgl import torch import torch.nn as nn import torch.nn.functional as F from dgl.nn.pytorch import RelGraphConv from . import utils class RGCN(nn.Module): def __init__( self, num_nodes, h_dim, out_dim, num_rels, regularizer="basis", num_bases=-1, dropout=0.0, self_loop=False, ns_mode=False, ): super(RGCN, self).__init__() if num_bases == -1: num_bases = num_rels self.emb = nn.Embedding(num_nodes, h_dim) self.conv1 = RelGraphConv( h_dim, h_dim, num_rels, regularizer, num_bases, self_loop=self_loop ) self.conv2 = RelGraphConv( h_dim, out_dim, num_rels, regularizer, num_bases, self_loop=self_loop, ) self.dropout = nn.Dropout(dropout) self.ns_mode = ns_mode def forward(self, g, nids=None): if self.ns_mode: # forward for neighbor sampling x = self.emb(g[0].srcdata[dgl.NID]) h = self.conv1(g[0], x, g[0].edata[dgl.ETYPE], g[0].edata["norm"]) h = self.dropout(F.relu(h)) h = self.conv2(g[1], h, g[1].edata[dgl.ETYPE], g[1].edata["norm"]) return h else: x = self.emb.weight if nids is None else self.emb(nids) h = self.conv1(g, x, g.edata[dgl.ETYPE], g.edata["norm"]) h = self.dropout(F.relu(h)) h = self.conv2(g, h, g.edata[dgl.ETYPE], g.edata["norm"]) return h def load_data(data_name, get_norm=False, inv_target=False): dataset = utils.process_data(data_name) # Load hetero-graph hg = dataset[0] num_rels = len(hg.canonical_etypes) category = dataset.predict_category num_classes = dataset.num_classes labels = hg.nodes[category].data.pop("labels") train_mask = hg.nodes[category].data.pop("train_mask") test_mask = hg.nodes[category].data.pop("test_mask") train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze() test_idx = torch.nonzero(test_mask, as_tuple=False).squeeze() if get_norm: # Calculate normalization weight for each edge, # 1. / d, d is the degree of the destination node for cetype in hg.canonical_etypes: hg.edges[cetype].data["norm"] = dgl.norm_by_dst( hg, cetype ).unsqueeze(1) edata = ["norm"] else: edata = None # get target category id category_id = hg.ntypes.index(category) g = dgl.to_homogeneous(hg, edata=edata) # Rename the fields as they can be changed by for example DataLoader g.ndata["ntype"] = g.ndata.pop(dgl.NTYPE) g.ndata["type_id"] = g.ndata.pop(dgl.NID) node_ids = torch.arange(g.num_nodes()) # find out the target node ids in g loc = g.ndata["ntype"] == category_id target_idx = node_ids[loc] if inv_target: # Map global node IDs to type-specific node IDs. This is required for # looking up type-specific labels in a minibatch inv_target = torch.empty((g.num_nodes(),), dtype=torch.int64) inv_target[target_idx] = torch.arange( 0, target_idx.shape[0], dtype=inv_target.dtype ) return ( g, num_rels, num_classes, labels, train_idx, test_idx, target_idx, inv_target, ) else: return g, num_rels, num_classes, labels, train_idx, test_idx, target_idx ================================================ FILE: benchmarks/benchmarks/utils.py ================================================ import inspect import json import os import pickle import shutil import time import zipfile from functools import partial, reduce, wraps from timeit import default_timer import dgl import numpy as np import pandas import requests import torch from ogb.nodeproppred import DglNodePropPredDataset def _download(url, path, filename): fn = os.path.join(path, filename) if os.path.exists(fn): return os.makedirs(path, exist_ok=True) f_remote = requests.get(url, stream=True) sz = f_remote.headers.get("content-length") assert f_remote.status_code == 200, "fail to open {}".format(url) with open(fn, "wb") as writer: for chunk in f_remote.iter_content(chunk_size=1024 * 1024): writer.write(chunk) print("Download finished.") import traceback from _thread import start_new_thread # GRAPH_CACHE = {} import torch.multiprocessing as mp def thread_wrapped_func(func): """ Wraps a process entry point to make it work with OpenMP. """ @wraps(func) def decorated_function(*args, **kwargs): queue = mp.Queue() def _queue_result(): exception, trace, res = None, None, None try: res = func(*args, **kwargs) except Exception as e: exception = e trace = traceback.format_exc() queue.put((res, exception, trace)) start_new_thread(_queue_result, ()) result, exception, trace = queue.get() if exception is None: return result else: assert isinstance(exception, Exception) raise exception.__class__(trace) return decorated_function def get_graph(name, format=None): # global GRAPH_CACHE # if name in GRAPH_CACHE: # return GRAPH_CACHE[name].to(format) if isinstance(format, str): format = [format] # didn't specify format if format is None: format = ["csc", "csr", "coo"] g = None if name == "cora": g = dgl.data.CoraGraphDataset(verbose=False)[0] elif name == "pubmed": g = dgl.data.PubmedGraphDataset(verbose=False)[0] elif name == "livejournal": bin_path = "/tmp/dataset/livejournal/livejournal_{}.bin".format(format) if os.path.exists(bin_path): g_list, _ = dgl.load_graphs(bin_path) g = g_list[0] else: g = get_livejournal().formats(format) dgl.save_graphs(bin_path, [g]) elif name == "friendster": bin_path = "/tmp/dataset/friendster/friendster_{}.bin".format(format) if os.path.exists(bin_path): g_list, _ = dgl.load_graphs(bin_path) g = g_list[0] else: # the original node IDs of friendster are not consecutive, so we compact it g = dgl.compact_graphs(get_friendster()).formats(format) dgl.save_graphs(bin_path, [g]) elif name == "reddit": bin_path = "/tmp/dataset/reddit/reddit_{}.bin".format(format) if os.path.exists(bin_path): g_list, _ = dgl.load_graphs(bin_path) g = g_list[0] else: g = dgl.data.RedditDataset(self_loop=True)[0].formats(format) dgl.save_graphs(bin_path, [g]) elif name.startswith("ogb"): g = get_ogb_graph(name) else: raise Exception("Unknown dataset") # GRAPH_CACHE[name] = g g = g.formats(format) return g def get_ogb_graph(name): os.symlink("/tmp/dataset/", os.path.join(os.getcwd(), "dataset")) data = DglNodePropPredDataset(name=name) return data[0][0] def get_livejournal(): # Same as https://snap.stanford.edu/data/soc-LiveJournal1.txt.gz _download( "https://dgl-asv-data.s3-us-west-2.amazonaws.com/dataset/livejournal/soc-LiveJournal1.txt.gz", "/tmp/dataset/livejournal", "soc-LiveJournal1.txt.gz", ) df = pandas.read_csv( "/tmp/dataset/livejournal/soc-LiveJournal1.txt.gz", sep="\t", skiprows=4, header=None, names=["src", "dst"], compression="gzip", ) src = df["src"].values dst = df["dst"].values print("construct the graph") return dgl.graph((src, dst)) def get_friendster(): # Same as https://snap.stanford.edu/data/bigdata/communities/com-friendster.ungraph.txt.gz _download( "https://dgl-asv-data.s3-us-west-2.amazonaws.com/dataset/friendster/com-friendster.ungraph.txt.gz", "/tmp/dataset/friendster", "com-friendster.ungraph.txt.gz", ) df = pandas.read_csv( "/tmp/dataset/friendster/com-friendster.ungraph.txt.gz", sep="\t", skiprows=4, header=None, names=["src", "dst"], compression="gzip", ) src = df["src"].values dst = df["dst"].values print("construct the graph") return dgl.graph((src, dst)) class OGBDataset(object): def __init__(self, g, num_labels, predict_category=None): self._g = g self._num_labels = num_labels self._predict_category = predict_category @property def num_labels(self): return self._num_labels @property def num_classes(self): return self._num_labels @property def predict_category(self): return self._predict_category def __getitem__(self, idx): return self._g def load_ogb_product(): name = "ogbn-products" os.symlink("/tmp/dataset/", os.path.join(os.getcwd(), "dataset")) print("load", name) data = DglNodePropPredDataset(name=name) print("finish loading", name) splitted_idx = data.get_idx_split() graph, labels = data[0] labels = labels[:, 0] graph.ndata["label"] = labels in_feats = graph.ndata["feat"].shape[1] num_labels = len( torch.unique(labels[torch.logical_not(torch.isnan(labels))]) ) # Find the node IDs in the training, validation, and test set. train_nid, val_nid, test_nid = ( splitted_idx["train"], splitted_idx["valid"], splitted_idx["test"], ) train_mask = torch.zeros((graph.num_nodes(),), dtype=torch.bool) train_mask[train_nid] = True val_mask = torch.zeros((graph.num_nodes(),), dtype=torch.bool) val_mask[val_nid] = True test_mask = torch.zeros((graph.num_nodes(),), dtype=torch.bool) test_mask[test_nid] = True graph.ndata["train_mask"] = train_mask graph.ndata["val_mask"] = val_mask graph.ndata["test_mask"] = test_mask return OGBDataset(graph, num_labels) def load_ogb_mag(): name = "ogbn-mag" os.symlink("/tmp/dataset/", os.path.join(os.getcwd(), "dataset")) print("load", name) dataset = DglNodePropPredDataset(name=name) print("finish loading", name) split_idx = dataset.get_idx_split() train_idx = split_idx["train"]["paper"] val_idx = split_idx["valid"]["paper"] test_idx = split_idx["test"]["paper"] hg_orig, labels = dataset[0] subgs = {} for etype in hg_orig.canonical_etypes: u, v = hg_orig.all_edges(etype=etype) subgs[etype] = (u, v) subgs[(etype[2], "rev-" + etype[1], etype[0])] = (v, u) hg = dgl.heterograph(subgs) hg.nodes["paper"].data["feat"] = hg_orig.nodes["paper"].data["feat"] hg.nodes["paper"].data["labels"] = labels["paper"].squeeze() train_mask = torch.zeros((hg.num_nodes("paper"),), dtype=torch.bool) train_mask[train_idx] = True val_mask = torch.zeros((hg.num_nodes("paper"),), dtype=torch.bool) val_mask[val_idx] = True test_mask = torch.zeros((hg.num_nodes("paper"),), dtype=torch.bool) test_mask[test_idx] = True hg.nodes["paper"].data["train_mask"] = train_mask hg.nodes["paper"].data["val_mask"] = val_mask hg.nodes["paper"].data["test_mask"] = test_mask num_classes = dataset.num_classes return OGBDataset(hg, num_classes, "paper") class PinsageDataset: def __init__(self, g, user_ntype, item_ntype, textset): self._g = g self._user_ntype = user_ntype self._item_ntype = item_ntype self._textset = textset @property def user_ntype(self): return self._user_ntype @property def item_ntype(self): return self._item_ntype @property def textset(self): return self._textset def __getitem__(self, idx): return self._g def load_nowplaying_rs(): import torchtext.legacy as torchtext # follow examples/pytorch/pinsage/README to create train_g.bin name = "train_g.bin" dataset_dir = os.path.join(os.getcwd(), "dataset") os.symlink("/tmp/dataset/", dataset_dir) dataset_path = os.path.join(dataset_dir, "nowplaying_rs", name) g_list, _ = dgl.load_graphs(dataset_path) g = g_list[0] user_ntype = "user" item_ntype = "track" # Assign user and movie IDs and use them as features (to learn an individual trainable # embedding for each entity) g.nodes[user_ntype].data["id"] = torch.arange(g.num_nodes(user_ntype)) g.nodes[item_ntype].data["id"] = torch.arange(g.num_nodes(item_ntype)) # Prepare torchtext dataset and vocabulary fields = {} examples = [] for i in range(g.num_nodes(item_ntype)): example = torchtext.data.Example.fromlist([], []) examples.append(example) textset = torchtext.data.Dataset(examples, fields) return PinsageDataset(g, user_ntype, item_ntype, textset) def process_data(name): if name == "cora": return dgl.data.CoraGraphDataset() elif name == "pubmed": return dgl.data.PubmedGraphDataset() elif name == "aifb": return dgl.data.AIFBDataset() elif name == "mutag": return dgl.data.MUTAGDataset() elif name == "bgs": return dgl.data.BGSDataset() elif name == "am": return dgl.data.AMDataset() elif name == "reddit": return dgl.data.RedditDataset(self_loop=True) elif name == "ogbn-products": return load_ogb_product() elif name == "ogbn-mag": return load_ogb_mag() elif name == "nowplaying_rs": return load_nowplaying_rs() else: raise ValueError("Invalid dataset name:", name) def get_bench_device(): device = os.environ.get("DGL_BENCH_DEVICE", "cpu") if device.lower() == "gpu": return "cuda:0" else: return device def setup_track_time(*args, **kwargs): # fix random seed np.random.seed(42) torch.random.manual_seed(42) def setup_track_acc(*args, **kwargs): # fix random seed np.random.seed(42) torch.random.manual_seed(42) def setup_track_flops(*args, **kwargs): # fix random seed np.random.seed(42) torch.random.manual_seed(42) TRACK_UNITS = { "time": "s", "acc": "%", "flops": "GFLOPS", } TRACK_SETUP = { "time": setup_track_time, "acc": setup_track_acc, "flops": setup_track_flops, } def parametrize(param_name, params): """Decorator for benchmarking over a set of parameters. Parameters ---------- param_name : str Parameter name. Must be one of the arguments of the decorated function. params : list[any] List of values to benchmark for the given parameter name. Recommend to use Python's native object type (e.g., int, str, list[int]) because ASV will display them on the plot. Examples -------- Benchmark function `foo` when argument `x` is equal to 10 or 20. .. code:: @benchmark('time') @parametrize('x', [10, 20]): def foo(x): pass Benchmark function with multiple parametrizations. It will run the function with all possible combinations. The example below generates 6 benchmarks. .. code:: @benchmark('time') @parametrize('x', [10, 20]): @parametrize('y', [-1, -2, -3]): def foo(x, y): pass When using multiple parametrizations, it can have arbitrary order. The example below is the same as the above one. .. code:: @benchmark('time') @parametrize('y', [-1, -2, -3]): @parametrize('x', [10, 20]): def foo(x, y): pass """ def _wrapper(func): sig_params = inspect.signature(func).parameters.keys() num_params = len(sig_params) if getattr(func, "params", None) is None: func.params = [None] * num_params if getattr(func, "param_names", None) is None: func.param_names = [None] * num_params found_param = False for i, sig_param in enumerate(sig_params): if sig_param == param_name: func.params[i] = params func.param_names[i] = param_name found_param = True break if not found_param: raise ValueError("Invalid parameter name:", param_name) return func return _wrapper def noop_decorator(param_name, params): """noop decorator""" def _wrapper(func): return func return _wrapper class TestFilter: def __init__(self): self.conf = None if "DGL_REG_CONF" in os.environ: current_dir = os.path.dirname(os.path.abspath(__file__)) path = os.path.join( current_dir, "../../", os.environ["DGL_REG_CONF"] ) with open(path, "r") as f: self.conf = json.load(f) if "INSTANCE_TYPE" in os.environ: instance_type = os.environ["INSTANCE_TYPE"] else: raise Exception( "Must set both DGL_REG_CONF and INSTANCE_TYPE as env" ) self.enabled_tests = self.conf[instance_type]["tests"] else: import logging logging.warning("No regression test conf file specified") def check(self, func): funcfullname = inspect.getmodule(func).__name__ + "." + func.__name__ if self.conf is None: return True else: for enabled_testname in self.enabled_tests: if enabled_testname in funcfullname: return True return False filter = TestFilter() device = os.environ.get("DGL_BENCH_DEVICE", "cpu") if device == "cpu": parametrize_cpu = parametrize parametrize_gpu = noop_decorator elif device == "gpu": parametrize_cpu = noop_decorator parametrize_gpu = parametrize else: raise Exception( "Unknown device. Must be one of ['cpu', 'gpu'], but got {}".format( device ) ) def skip_if_gpu(): """skip if DGL_BENCH_DEVICE is gpu""" device = os.environ.get("DGL_BENCH_DEVICE", "cpu") def _wrapper(func): if device == "gpu": # skip if not enabled func.benchmark_name = "skip_" + func.__name__ return func return _wrapper def _cuda_device_count(q): import torch q.put(torch.cuda.device_count()) def get_num_gpu(): import multiprocessing as mp q = mp.Queue() p = mp.Process(target=_cuda_device_count, args=(q,)) p.start() p.join() return q.get(block=False) GPU_COUNT = get_num_gpu() def skip_if_not_4gpu(): """skip if DGL_BENCH_DEVICE is gpu""" def _wrapper(func): if GPU_COUNT < 4: # skip if not enabled print("Skip {}".format(func.__name__)) func.benchmark_name = "skip_" + func.__name__ return func return _wrapper def benchmark(track_type, timeout=60): """Decorator for indicating the benchmark type. Parameters ---------- track_type : str Type. Must be either: - 'time' : For timing. Unit: second. - 'acc' : For accuracy. Unit: percentage, value between 0 and 100. - 'flops' : Unit: GFlops, number of floating point operations per second. timeout : int Timeout threshold in second. Examples -------- .. code:: @benchmark('time') def foo(): pass """ assert track_type in ["time", "acc", "flops"] def _wrapper(func): func.unit = TRACK_UNITS[track_type] func.setup = TRACK_SETUP[track_type] func.timeout = timeout if not filter.check(func): # skip if not enabled func.benchmark_name = "skip_" + func.__name__ return func return _wrapper ##################################### # Timer ##################################### class Timer: def __init__(self, device=None): self.timer = default_timer if device is None: self.device = get_bench_device() else: self.device = device def __enter__(self): if self.device == "cuda:0": self.start_event = torch.cuda.Event(enable_timing=True) self.end_event = torch.cuda.Event(enable_timing=True) self.start_event.record() else: self.tic = self.timer() return self def __exit__(self, type, value, traceback): if self.device == "cuda:0": self.end_event.record() torch.cuda.synchronize() # Wait for the events to be recorded! self.elapsed_secs = ( self.start_event.elapsed_time(self.end_event) / 1e3 ) else: self.elapsed_secs = self.timer() - self.tic ================================================ FILE: benchmarks/run.sh ================================================ #!/bin/bash set -e DEVICE=$1 ROOT=/asv/dgl . /opt/conda/etc/profile.d/conda.sh conda activate base pip install --upgrade pip # Newer asv version like 0.5.1 has different result format, # so we fix the version here. Or `generate_excel.py` has to be changed. pip install asv==0.4.2 pip uninstall -y dgl export DGL_BENCH_DEVICE=$DEVICE echo "DGL_BENCH_DEVICE=$DGL_BENCH_DEVICE" pushd $ROOT/benchmarks cat asv.conf.json asv machine --yes # If --launch-method is specified as 'spawn', multigpu tests will crash with # "No module named 'benchmarks' is found". asv run -e -v asv publish popd ================================================ FILE: benchmarks/scripts/README.md ================================================ Regression Test Suite ======================== ### Spec of task.json ```json # Note the test will be run if the name specified below is a substring of the full test name. # The fullname of "benchmarks/model_acc/bench_sage_ns.track_acc" will be "model_acc.bench_sage_ns.track_acc". Test will be run if it contains any keyword. # For example, "model_acc" will run all the tests under "model_acc" folder # "bench_sage" will run both "bench_sage" and "bench_sage_ns" # "bench_sage." will only run "bench_sage" # "ns" will run any tests name contains "ms" # "" will run all tests { "c5.9xlarge": { # The instance type to run the test "tests": [ "bench_sage" # The test to be run on this instance ], "env": { "DEVICE": "cpu" # The environment variable passed to publish.sh } }, "g4dn.2xlarge": { ... } } ``` ### Environment variable - `MOUNT_PATH` specify the directory in the host to be mapped into docker, if exists will map the `MOUNT_PATH`(in host) to `/tmp/dataset`(in docker) - `INSTANCE_TYPE` specify the current instance type - `DGL_REG_CONF` specify the path to `task.json`, which is relative to the repo root. If specified, must specify `INSTANCE_TYPE` also ================================================ FILE: benchmarks/scripts/build_dgl_asv.sh ================================================ #!/bin/bash set -e # Default building only with cpu DEVICE=${DGL_BENCH_DEVICE:-cpu} pip install -r /asv/torch_gpu_pip.txt # build # 'CUDA_TOOLKIT_ROOT_DIR' is always required for sparse build as torch1.13.1+cu116 is installed. CMAKE_VARS="-DUSE_OPENMP=ON -DBUILD_TORCH=ON -DCUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda" if [[ $DEVICE == "gpu" ]]; then CMAKE_VARS="-DUSE_CUDA=ON $CMAKE_VARS" fi mkdir -p build pushd build cmake $CMAKE_VARS .. make -j8 popd ================================================ FILE: benchmarks/scripts/fix_ram_info.py ================================================ import json from pathlib import Path def main(): result_dir = Path(__file__).parent / ".." / Path("results/") for per_machine_dir in result_dir.iterdir(): if per_machine_dir.is_dir(): try: machine_json = json.loads( (per_machine_dir / "machine.json").read_text() ) ram = machine_json["ram"] for f in per_machine_dir.glob("*.json"): if f.stem != "machine": result = json.loads(f.read_text()) result_ram = result["params"]["ram"] if result_ram != ram: result["params"]["ram"] = ram print(f"Fix ram in {f}") f.write_text(json.dumps(result)) else: print(f"Skip {f}") except Exception as e: print(e) main() ================================================ FILE: benchmarks/scripts/generate_excel.py ================================================ import json from itertools import product from pathlib import Path import pandas as pd def get_branch_name_from_hash(hash): import subprocess process = subprocess.Popen( ["git", "name-rev", "--name-only", hash], stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) stdout, stderr = process.communicate() if len(stderr) > 0: return hash[:10] else: return stdout.decode("utf-8").strip("\n") def main(): results_path = Path("../results") results_path.is_dir() machines = [f for f in results_path.glob("*") if f.is_dir()] output_results_dict = {} for machine in machines: per_machine_result = {} commit_results_json_paths = [ f for f in machine.glob("*") if f.name != "machine.json" ] for commit in commit_results_json_paths: with commit.open() as f: commit_result = json.load(f) commit_hash = commit_result["commit_hash"] per_commit_result = {} for test_name, result in commit_result["results"].items(): per_commit_result[test_name] = [] if result["result"] is None: for test_args in product(*result["params"]): per_commit_result[test_name].append( {"params": ", ".join(test_args), "result": None} ) else: for test_args, performance_number in zip( product(*result["params"]), result["result"] ): per_commit_result[test_name].append( { "params": ", ".join(test_args), "result": performance_number, } ) per_machine_result[commit_hash] = per_commit_result output_results_dict[machine.name] = per_machine_result return output_results_dict def dict_to_csv(output_results_dict): with open("../results/benchmarks.json") as f: benchmark_conf = json.load(f) unit_dict = {} for k, v in benchmark_conf.items(): if k != "version": unit_dict[k] = v["unit"] result_list = [] for machine, per_machine_result in output_results_dict.items(): for commit, test_cases in per_machine_result.items(): branch_name = get_branch_name_from_hash(commit) result_column_name = "number_{}".format(branch_name) # per_commit_result_list = [] for test_case_name, results in test_cases.items(): for result in results: result_list.append( { "test_name": test_case_name, "params": result["params"], "unit": unit_dict[test_case_name], "number": result["result"], "commit": branch_name, "machine": machine, } ) df = pd.DataFrame(result_list) return df def side_by_side_view(df): commits = df["commit"].unique().tolist() full_df = df.loc[df["commit"] == commits[0]] for commit in commits[1:]: per_commit_df = df.loc[df["commit"] == commit] full_df: pd.DataFrame = full_df.merge( per_commit_df, on=["test_name", "params", "machine", "unit"], how="outer", suffixes=( "_{}".format(full_df.iloc[0]["commit"]), "_{}".format(per_commit_df.iloc[0]["commit"]), ), ) full_df = full_df.loc[:, ~full_df.columns.str.startswith("commit")] return full_df output_results_dict = main() df = dict_to_csv(output_results_dict) sbs_df = side_by_side_view(df) sbs_df.to_csv("result.csv") ================================================ FILE: benchmarks/scripts/install_dgl_asv.sh ================================================ #!/bin/bash set -e # install pushd python rm -rf build *.egg-info dist pip uninstall -y dgl python3 setup.py install popd ================================================ FILE: benchmarks/scripts/publish.sh ================================================ #!/bin/bash # The script launches a docker container to run ASV benchmarks. We use the same docker # image as our CI (i.e., dgllib/dgl-ci-gpu:conda). It performs the following steps: # # 1. Start a docker container of the given machine name. The machine name will be # displayed on the generated website. # 2. Copy `.git` into the container. It allows ASV to determine the repository information # such as commit hash, branches, etc. # 3. Copy this folder into the container including the ASV configuration file `asv.conf.json`. # This means any changes to the files in this folder do not # require a git commit. By contrast, to correctly benchmark your changes to the core # library (e.g., "python/dgl"), you must call git commit first. # 4. It then calls the `run.sh` script inside the container. It will invoke `asv run`. # You can change the command such as specifying the benchmarks to run or adding some flags. # 5. After benchmarking, it copies the generated `results` and `html` folders back to # the host machine. # if [ $# -eq 2 ]; then MACHINE=$1 DEVICE=$2 else echo "publish.sh " exit 1 fi WS_ROOT=/asv/dgl docker pull public.ecr.aws/s1o7b3d9/benchmark_test:cu116_v230110 if [ -z "$DGL_REG_CONF" ]; then DOCKER_ENV_OPT="$DOCKER_ENV_OPT" else DOCKER_ENV_OPT=" -e DGL_REG_CONF=$DGL_REG_CONF $DOCKER_ENV_OPT" fi if [ -z "$INSTANCE_TYPE" ]; then DOCKER_ENV_OPT="$DOCKER_ENV_OPT" else DOCKER_ENV_OPT=" -e INSTANCE_TYPE=$INSTANCE_TYPE $DOCKER_ENV_OPT" fi if [ -z "$MOUNT_PATH" ]; then DOCKER_MOUNT_OPT="" else DOCKER_MOUNT_OPT="-v ${MOUNT_PATH}:/tmp/dataset -v ${MOUNT_PATH}/dgl_home/:/root/.dgl/" fi echo $HOME echo "Mount Point: ${DOCKER_MOUNT_OPT}" echo "Env opt: ${DOCKER_ENV_OPT}" echo "DEVICE: ${DEVICE}" if [[ $DEVICE == "cpu" ]]; then docker run --name dgl-reg \ --rm \ $DOCKER_MOUNT_OPT \ $DOCKER_ENV_OPT \ --shm-size="16g" \ --hostname=$MACHINE -dit public.ecr.aws/s1o7b3d9/benchmark_test:cu116_v230110 /bin/bash else docker run --name dgl-reg \ --rm --gpus all \ $DOCKER_MOUNT_OPT \ $DOCKER_ENV_OPT \ --shm-size="16g" \ --hostname=$MACHINE -dit public.ecr.aws/s1o7b3d9/benchmark_test:cu116_v230110 /bin/bash fi pwd docker exec dgl-reg mkdir -p $WS_ROOT docker cp ../../.git dgl-reg:$WS_ROOT docker cp ../ dgl-reg:$WS_ROOT/benchmarks/ docker cp torch_gpu_pip.txt dgl-reg:/asv docker exec $DOCKER_ENV_OPT dgl-reg bash $WS_ROOT/benchmarks/run.sh $DEVICE docker cp dgl-reg:$WS_ROOT/benchmarks/results ../ docker cp dgl-reg:$WS_ROOT/benchmarks/html ../ docker stop dgl-reg ================================================ FILE: benchmarks/scripts/replace_branch.py ================================================ import argparse import json import os import re def json_minify(string, strip_space=True): """ Based on JSON.minify.js: https://github.com/getify/JSON.minify Contributers: - Pradyun S. Gedam (conditions and variable names changed) """ tokenizer = re.compile(r'"|(/\*)|(\*/)|(//)|\n|\r') in_string = False in_multi = False in_single = False new_str = [] index = 0 for match in re.finditer(tokenizer, string): if not (in_multi or in_single): tmp = string[index : match.start()] if not in_string and strip_space: # replace white space as defined in standard tmp = re.sub("[ \t\n\r]+", "", tmp) new_str.append(tmp) index = match.end() val = match.group() if val == '"' and not (in_multi or in_single): escaped = re.search(r"(\\)*$", string[: match.start()]) # start of string or unescaped quote character to end string if not in_string or ( escaped is None or len(escaped.group()) % 2 == 0 ): in_string = not in_string index -= 1 # include " character in next catch elif not (in_string or in_multi or in_single): if val == "/*": in_multi = True elif val == "//": in_single = True elif val == "*/" and in_multi and not (in_string or in_single): in_multi = False elif val in "\r\n" and not (in_multi or in_string) and in_single: in_single = False elif not ( (in_multi or in_single) or (val in " \r\n\t" and strip_space) ): new_str.append(val) new_str.append(string[index:]) content = "".join(new_str) content = content.replace(",]", "]") content = content.replace(",}", "}") return content def add_prefix(branch_name): if "/" not in branch_name: return "origin/" + branch_name else: return branch_name def change_branch(branch_str: str): branches = [add_prefix(b) for b in branch_str.split(",")] with open("../asv.conf.json", "r") as f: ss = f.read() config_json = json.loads(json_minify(ss)) config_json["branches"] = branches with open("../asv.conf.json", "w") as f: json.dump(config_json, f) if __name__ == "__main__": if "BRANCH_STR" in os.environ: change_branch(os.environ["BRANCH_STR"]) ================================================ FILE: benchmarks/scripts/torch_gpu_pip.txt ================================================ --find-links https://download.pytorch.org/whl/torch_stable.html torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchmetrics pytest nose numpy cython scipy networkx matplotlib nltk requests[security] tqdm awscli torchtext pandas rdflib ogb ================================================ FILE: benchmarks/task.json ================================================ { "r5.16xlarge": { "tests": [ "api.", "kernel.", "model_acc.", "model_speed." ], "env": { "DEVICE": "cpu" } }, "g4dn.2xlarge": { "tests": [ "api.", "kernel.", "model_acc.", "model_speed." ], "env": { "DEVICE": "gpu" } }, "g4dn.12xlarge": { "tests": [ "multigpu." ], "env": { "DEVICE": "gpu" } } } ================================================ FILE: cmake/modules/CUDA.cmake ================================================ # CUDA Module if(USE_CUDA) find_cuda(${USE_CUDA} REQUIRED) else(USE_CUDA) return() endif() ###### Borrowed from MSHADOW project include(CheckCXXCompilerFlag) check_cxx_compiler_flag("-std=c++17" SUPPORT_CXX17) set(dgl_known_gpu_archs "35" "50" "60" "70" "75") set(dgl_cuda_arch_ptx "70") if (CUDA_VERSION_MAJOR GREATER_EQUAL "11") list(APPEND dgl_known_gpu_archs "80" "86") set(dgl_cuda_arch_ptx "80" "86") endif() if (CUDA_VERSION VERSION_GREATER_EQUAL "11.8") list(APPEND dgl_known_gpu_archs "89" "90") set(dgl_cuda_arch_ptx "90") endif() if (CUDA_VERSION VERSION_GREATER_EQUAL "12.0") list(REMOVE_ITEM dgl_known_gpu_archs "35") endif() ################################################################################################ # A function for automatic detection of GPUs installed (if autodetection is enabled) # Usage: # dgl_detect_installed_gpus(out_variable) function(dgl_detect_installed_gpus out_variable) set(CUDA_gpu_detect_output "") if(NOT CUDA_gpu_detect_output) message(STATUS "Running GPU architecture autodetection") set(__cufile ${PROJECT_BINARY_DIR}/detect_cuda_archs.cu) file(WRITE ${__cufile} "" "#include \n" "#include \n" "using namespace std;\n" "int main()\n" "{\n" " int count = 0;\n" " if (cudaSuccess != cudaGetDeviceCount(&count)) { return -1; }\n" " if (count == 0) { cerr << \"No cuda devices detected\" << endl; return -1; }\n" " for (int device = 0; device < count; ++device)\n" " {\n" " cudaDeviceProp prop;\n" " if (cudaSuccess == cudaGetDeviceProperties(&prop, device))\n" " std::printf(\"%d.%d \", prop.major, prop.minor);\n" " }\n" " return 0;\n" "}\n") if(MSVC) #find vcvarsall.bat and run it building msvc environment get_filename_component(MY_COMPILER_DIR ${CMAKE_CXX_COMPILER} DIRECTORY) find_file(MY_VCVARSALL_BAT vcvarsall.bat "${MY_COMPILER_DIR}/.." "${MY_COMPILER_DIR}/../..") execute_process(COMMAND ${MY_VCVARSALL_BAT} && ${CUDA_NVCC_EXECUTABLE} -arch native --run ${__cufile} WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/" RESULT_VARIABLE __nvcc_res OUTPUT_VARIABLE __nvcc_out OUTPUT_STRIP_TRAILING_WHITESPACE) else() if(CUDA_LIBRARY_PATH) set(CUDA_LINK_LIBRARY_PATH "-L${CUDA_LIBRARY_PATH}") endif() execute_process(COMMAND ${CUDA_NVCC_EXECUTABLE} -arch native --run ${__cufile} ${CUDA_LINK_LIBRARY_PATH} WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/" RESULT_VARIABLE __nvcc_res OUTPUT_VARIABLE __nvcc_out OUTPUT_STRIP_TRAILING_WHITESPACE) endif() if(__nvcc_res EQUAL 0) # nvcc outputs text containing line breaks when building with MSVC. # The line below prevents CMake from inserting a variable with line # breaks in the cache message(STATUS "Found GPU arch ${__nvcc_out}") string(REGEX MATCH "([1-9].[0-9])" __nvcc_out "${__nvcc_out}") if(__nvcc_out VERSION_LESS "3.5") # drop support for cc < 3.5 and build for all known archs. message(WARNING "GPU arch less than 3.5 is not supported.") else() set(CUDA_gpu_detect_output ${__nvcc_out} CACHE INTERNAL "Returned GPU architetures from mshadow_detect_gpus tool" FORCE) endif() else() message(WARNING "Running GPU detection script with nvcc failed: ${__nvcc_out}") endif() endif() if(NOT CUDA_gpu_detect_output) message(WARNING "Automatic GPU detection failed. Building for all known architectures (${dgl_known_gpu_archs}).") set(${out_variable} ${dgl_known_gpu_archs} PARENT_SCOPE) else() set(${out_variable} ${CUDA_gpu_detect_output} PARENT_SCOPE) endif() endfunction() ################################################################################################ # Function for selecting GPU arch flags for nvcc based on CUDA_ARCH_NAME # Usage: # dgl_select_nvcc_arch_flags(out_variable) function(dgl_select_nvcc_arch_flags out_variable) # List of arch names. Turing and Ada don't have a new major version, so they are not added to default build. set(__archs_names "Kepler" "Maxwell" "Pascal" "Volta" "Turing" "Ampere" "Ada" "Hopper" "All" "Manual") if (NOT CUDA_VERSION VERSION_LESS "12.0") list(REMOVE_ITEM __archs_names "Kepler") endif() set(__archs_name_default "All") if(NOT CMAKE_CROSSCOMPILING) list(APPEND __archs_names "Auto") set(__archs_name_default "Auto") endif() # set CUDA_ARCH_NAME strings (so it will be seen as dropbox in CMake-Gui) set(CUDA_ARCH_NAME ${__archs_name_default} CACHE STRING "Select target NVIDIA GPU achitecture.") set_property( CACHE CUDA_ARCH_NAME PROPERTY STRINGS "" ${__archs_names} ) mark_as_advanced(CUDA_ARCH_NAME) # verify CUDA_ARCH_NAME value if(NOT ";${__archs_names};" MATCHES ";${CUDA_ARCH_NAME};") string(REPLACE ";" ", " __archs_names "${__archs_names}") message(FATAL_ERROR "Only ${__archs_names} architeture names are supported.") endif() if(${CUDA_ARCH_NAME} STREQUAL "Manual") set(CUDA_ARCH_BIN ${dgl_known_gpu_archs} CACHE STRING "Specify 'real' GPU architectures to build binaries for, BIN(PTX) format is supported") set(CUDA_ARCH_PTX ${dgl_cuda_arch_ptx} CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for") mark_as_advanced(CUDA_ARCH_BIN CUDA_ARCH_PTX) else() unset(CUDA_ARCH_BIN CACHE) unset(CUDA_ARCH_PTX CACHE) endif() if(${CUDA_ARCH_NAME} STREQUAL "Kepler") set(__cuda_arch_bin "35") set(__cuda_arch_ptx "35") elseif(${CUDA_ARCH_NAME} STREQUAL "Maxwell") set(__cuda_arch_bin "50") set(__cuda_arch_ptx "50") elseif(${CUDA_ARCH_NAME} STREQUAL "Pascal") set(__cuda_arch_bin "60") set(__cuda_arch_ptx "60") elseif(${CUDA_ARCH_NAME} STREQUAL "Volta") set(__cuda_arch_bin "70") set(__cuda_arch_ptx "70") elseif(${CUDA_ARCH_NAME} STREQUAL "Turing") set(__cuda_arch_bin "75") set(__cuda_arch_ptx "75") elseif(${CUDA_ARCH_NAME} STREQUAL "Ampere") set(__cuda_arch_bin "80") set(__cuda_arch_ptx "80") elseif(${CUDA_ARCH_NAME} STREQUAL "Ada") set(__cuda_arch_bin "89") set(__cuda_arch_ptx "89") elseif(${CUDA_ARCH_NAME} STREQUAL "Hopper") set(__cuda_arch_bin "90") set(__cuda_arch_ptx "90") elseif(${CUDA_ARCH_NAME} STREQUAL "All") set(__cuda_arch_bin ${dgl_known_gpu_archs}) set(__cuda_arch_ptx ${dgl_cuda_arch_ptx}) elseif(${CUDA_ARCH_NAME} STREQUAL "Auto") dgl_detect_installed_gpus(__cuda_arch_bin) # if detect successes, __cuda_arch_ptx = __cuda_arch_bin # if detect fails, __cuda_arch_ptx is the latest arch in __cuda_arch_bin list(GET __cuda_arch_bin -1 __cuda_arch_ptx) else() # (${CUDA_ARCH_NAME} STREQUAL "Manual") set(__cuda_arch_bin ${CUDA_ARCH_BIN}) set(__cuda_arch_ptx ${CUDA_ARCH_PTX}) endif() # remove dots and convert to lists string(REGEX REPLACE "\\." "" __cuda_arch_bin "${__cuda_arch_bin}") string(REGEX REPLACE "\\." "" __cuda_arch_ptx "${__cuda_arch_ptx}") string(REGEX MATCHALL "[0-9()]+" __cuda_arch_bin "${__cuda_arch_bin}") string(REGEX MATCHALL "[0-9]+" __cuda_arch_ptx "${__cuda_arch_ptx}") mshadow_list_unique(__cuda_arch_bin __cuda_arch_ptx) set(__nvcc_flags "--expt-relaxed-constexpr") set(__nvcc_archs_readable "") set(__archs "") # Tell NVCC to add binaries for the specified GPUs foreach(__arch ${__cuda_arch_bin}) if(__arch MATCHES "([0-9]+)\\(([0-9]+)\\)") # User explicitly specified PTX for the concrete BIN list(APPEND __nvcc_flags -gencode arch=compute_${CMAKE_MATCH_2},code=sm_${CMAKE_MATCH_1}) list(APPEND __nvcc_archs_readable sm_${CMAKE_MATCH_1}) list(APPEND __archs ${CMAKE_MATCH_1}) else() # User didn't explicitly specify PTX for the concrete BIN, we assume PTX=BIN list(APPEND __nvcc_flags -gencode arch=compute_${__arch},code=sm_${__arch}) list(APPEND __nvcc_archs_readable sm_${__arch}) list(APPEND __archs ${__arch}) endif() endforeach() # Tell NVCC to add PTX intermediate code for the specified architectures foreach(__arch ${__cuda_arch_ptx}) list(APPEND __nvcc_flags -gencode arch=compute_${__arch},code=compute_${__arch}) list(APPEND __nvcc_archs_readable compute_${__arch}) endforeach() string(REPLACE ";" " " __nvcc_archs_readable "${__nvcc_archs_readable}") set(${out_variable} ${__nvcc_flags} PARENT_SCOPE) set(${out_variable}_readable ${__nvcc_archs_readable} PARENT_SCOPE) set(CUDA_ARCHITECTURES ${__archs} PARENT_SCOPE) endfunction() ################################################################################################ # Config cuda compilation and append CUDA libraries to linker_libs # Usage: # dgl_config_cuda(linker_libs) macro(dgl_config_cuda linker_libs) if(NOT CUDA_FOUND) message(FATAL_ERROR "Cannot find CUDA.") endif() # always set the includedir when cuda is available # avoid global retrigger of cmake include_directories(${CUDA_INCLUDE_DIRS}) add_definitions(-DDGL_USE_CUDA) # NVCC flags # Manually set everything set(CUDA_PROPAGATE_HOST_FLAGS OFF) # 0. Add host flags message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}") string(REGEX REPLACE "[ \t\n\r]" "," CXX_HOST_FLAGS "${CMAKE_CXX_FLAGS}") if(MSVC AND NOT USE_MSVC_MT) string(CONCAT CXX_HOST_FLAGS ${CXX_HOST_FLAGS} ",/MD") endif() list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "${CXX_HOST_FLAGS}") if(USE_OPENMP) # Needed by CUDA disjoint union source file. list(APPEND CUDA_NVCC_FLAGS "-Xcompiler" "${OpenMP_CXX_FLAGS}") endif(USE_OPENMP) # 1. Add arch flags dgl_select_nvcc_arch_flags(NVCC_FLAGS_ARCH) list(APPEND CUDA_NVCC_FLAGS ${NVCC_FLAGS_ARCH}) # 2. flags in third_party/moderngpu list(APPEND CUDA_NVCC_FLAGS "--expt-extended-lambda;-Wno-deprecated-declarations;-std=c++17") message(STATUS "CUDA_NVCC_FLAGS: ${CUDA_NVCC_FLAGS}") list(APPEND ${linker_libs} ${CUDA_CUDART_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_cusparse_LIBRARY}) endmacro() ================================================ FILE: cmake/modules/FindMETIS.cmake ================================================ # Find the METIS includes and library # # This module defines # METIS_INCLUDE_DIR - where to find metis.h # METIS_LIBRARIES - libraries to link against to use METIS. # METIS_FOUND - METIS library was found INCLUDE(FindPackageHandleStandardArgs) FIND_PATH(METIS_INCLUDE_DIR NAMES "metis.h" PATHS ${EXTERNAL_METIS_PATH} ) FIND_LIBRARY(METIS_LIBRARIES NAMES libmetis metis PATHS ${EXTERNAL_METIS_LIB_PATH} ) FIND_PACKAGE_HANDLE_STANDARD_ARGS(METIS DEFAULT_MSG METIS_INCLUDE_DIR METIS_LIBRARIES) MARK_AS_ADVANCED(METIS_LIBRARIES METIS_INCLUDE_DIR) ================================================ FILE: cmake/util/FindCUDA.cmake ================================================ ####################################################### # Enhanced version of find CUDA. # # Usage: # find_cuda(${USE_CUDA}) # # - When USE_CUDA=ON, use auto search # # Please use the CMAKE variable CUDA_TOOLKIT_ROOT_DIR to set CUDA directory # # Provide variables: # # - CUDA_FOUND # - CUDA_INCLUDE_DIRS # - CUDA_TOOLKIT_ROOT_DIR # - CUDA_CUDA_LIBRARY # - CUDA_CUDART_LIBRARY # - CUDA_NVRTC_LIBRARY # - CUDA_CUDNN_LIBRARY # - CUDA_CUBLAS_LIBRARY # macro(find_cuda use_cuda) set(__use_cuda ${use_cuda}) if(__use_cuda STREQUAL "ON") include(FindCUDA) endif() # additional libraries if(CUDA_FOUND) if(MSVC) find_library(CUDA_CUDA_LIBRARY cuda ${CUDA_TOOLKIT_ROOT_DIR}/lib/x64 ${CUDA_TOOLKIT_ROOT_DIR}/lib/Win32) find_library(CUDA_NVRTC_LIBRARY nvrtc ${CUDA_TOOLKIT_ROOT_DIR}/lib/x64 ${CUDA_TOOLKIT_ROOT_DIR}/lib/Win32) find_library(CUDA_CUDNN_LIBRARY cudnn ${CUDA_TOOLKIT_ROOT_DIR}/lib/x64 ${CUDA_TOOLKIT_ROOT_DIR}/lib/Win32) find_library(CUDA_CUBLAS_LIBRARY cublas ${CUDA_TOOLKIT_ROOT_DIR}/lib/x64 ${CUDA_TOOLKIT_ROOT_DIR}/lib/Win32) find_library(CUDA_CURAND_LIBRARY curand ${CUDA_TOOLKIT_ROOT_DIR}/lib/x64 ${CUDA_TOOLKIT_ROOT_DIR}/lib/Win32) else(MSVC) #find_library(CUDA_CUDA_LIBRARY cuda # PATHS ${CUDA_TOOLKIT_ROOT_DIR} # PATH_SUFFIXES lib lib64 targets/x86_64-linux/lib # NO_DEFAULT_PATH) find_library(CUDA_CUBLAS_LIBRARY cublas ${CUDA_TOOLKIT_ROOT_DIR}/lib64 ${CUDA_TOOLKIT_ROOT_DIR}/lib) find_library(CUDA_CURAND_LIBRARY curand ${CUDA_TOOLKIT_ROOT_DIR}/lib64 ${CUDA_TOOLKIT_ROOT_DIR}/lib) endif(MSVC) message(STATUS "Found CUDA_TOOLKIT_ROOT_DIR=" ${CUDA_TOOLKIT_ROOT_DIR}) #message(STATUS "Found CUDA_CUDA_LIBRARY=" ${CUDA_CUDA_LIBRARY}) message(STATUS "Found CUDA_CUDART_LIBRARY=" ${CUDA_CUDART_LIBRARY}) #message(STATUS "Found CUDA_NVRTC_LIBRARY=" ${CUDA_NVRTC_LIBRARY}) #message(STATUS "Found CUDA_CUDNN_LIBRARY=" ${CUDA_CUDNN_LIBRARY}) message(STATUS "Found CUDA_CUBLAS_LIBRARY=" ${CUDA_CUBLAS_LIBRARY}) message(STATUS "Found CUDA_CURAND_LIBRARY=" ${CUDA_CURAND_LIBRARY}) endif(CUDA_FOUND) endmacro(find_cuda) ================================================ FILE: cmake/util/MshadowUtil.cmake ================================================ ################################################################################################ # Command alias for debugging messages # Usage: # dmsg() function(dmsg) message(STATUS ${ARGN}) endfunction() ################################################################################################ # Removes duplicates from list(s) # Usage: # mshadow_list_unique( [] [...]) macro(mshadow_list_unique) foreach(__lst ${ARGN}) if(${__lst}) list(REMOVE_DUPLICATES ${__lst}) endif() endforeach() endmacro() ################################################################################################ # Clears variables from list # Usage: # mshadow_clear_vars() macro(mshadow_clear_vars) foreach(_var ${ARGN}) unset(${_var}) endforeach() endmacro() ################################################################################################ # Removes duplicates from string # Usage: # mshadow_string_unique() function(mshadow_string_unique __string) if(${__string}) set(__list ${${__string}}) separate_arguments(__list) list(REMOVE_DUPLICATES __list) foreach(__e ${__list}) set(__str "${__str} ${__e}") endforeach() set(${__string} ${__str} PARENT_SCOPE) endif() endfunction() ################################################################################################ # Prints list element per line # Usage: # mshadow_print_list() function(mshadow_print_list) foreach(e ${ARGN}) message(STATUS ${e}) endforeach() endfunction() ################################################################################################ # Function merging lists of compiler flags to single string. # Usage: # mshadow_merge_flag_lists(out_variable [] [] ...) function(mshadow_merge_flag_lists out_var) set(__result "") foreach(__list ${ARGN}) foreach(__flag ${${__list}}) string(STRIP ${__flag} __flag) set(__result "${__result} ${__flag}") endforeach() endforeach() string(STRIP ${__result} __result) set(${out_var} ${__result} PARENT_SCOPE) endfunction() ################################################################################################ # Converts all paths in list to absolute # Usage: # mshadow_convert_absolute_paths() function(mshadow_convert_absolute_paths variable) set(__dlist "") foreach(__s ${${variable}}) get_filename_component(__abspath ${__s} ABSOLUTE) list(APPEND __list ${__abspath}) endforeach() set(${variable} ${__list} PARENT_SCOPE) endfunction() ################################################################################################ # Reads set of version defines from the header file # Usage: # mshadow_parse_header( ..) macro(mshadow_parse_header FILENAME FILE_VAR) set(vars_regex "") set(__parnet_scope OFF) set(__add_cache OFF) foreach(name ${ARGN}) if("${name}" STREQUAL "PARENT_SCOPE") set(__parnet_scope ON) elseif("${name}" STREQUAL "CACHE") set(__add_cache ON) elseif(vars_regex) set(vars_regex "${vars_regex}|${name}") else() set(vars_regex "${name}") endif() endforeach() if(EXISTS "${FILENAME}") file(STRINGS "${FILENAME}" ${FILE_VAR} REGEX "#define[ \t]+(${vars_regex})[ \t]+[0-9]+" ) else() unset(${FILE_VAR}) endif() foreach(name ${ARGN}) if(NOT "${name}" STREQUAL "PARENT_SCOPE" AND NOT "${name}" STREQUAL "CACHE") if(${FILE_VAR}) if(${FILE_VAR} MATCHES ".+[ \t]${name}[ \t]+([0-9]+).*") string(REGEX REPLACE ".+[ \t]${name}[ \t]+([0-9]+).*" "\\1" ${name} "${${FILE_VAR}}") else() set(${name} "") endif() if(__add_cache) set(${name} ${${name}} CACHE INTERNAL "${name} parsed from ${FILENAME}" FORCE) elseif(__parnet_scope) set(${name} "${${name}}" PARENT_SCOPE) endif() else() unset(${name} CACHE) endif() endif() endforeach() endmacro() ################################################################################################ # Reads single version define from the header file and parses it # Usage: # mshadow_parse_header_single_define( ) function(mshadow_parse_header_single_define LIBNAME HDR_PATH VARNAME) set(${LIBNAME}_H "") if(EXISTS "${HDR_PATH}") file(STRINGS "${HDR_PATH}" ${LIBNAME}_H REGEX "^#define[ \t]+${VARNAME}[ \t]+\"[^\"]*\".*$" LIMIT_COUNT 1) endif() if(${LIBNAME}_H) string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MAJOR "${${LIBNAME}_H}") string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_MINOR "${${LIBNAME}_H}") string(REGEX REPLACE "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.[0-9]+\\.([0-9]+).*$" "\\1" ${LIBNAME}_VERSION_PATCH "${${LIBNAME}_H}") set(${LIBNAME}_VERSION_MAJOR ${${LIBNAME}_VERSION_MAJOR} ${ARGN} PARENT_SCOPE) set(${LIBNAME}_VERSION_MINOR ${${LIBNAME}_VERSION_MINOR} ${ARGN} PARENT_SCOPE) set(${LIBNAME}_VERSION_PATCH ${${LIBNAME}_VERSION_PATCH} ${ARGN} PARENT_SCOPE) set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_MAJOR}.${${LIBNAME}_VERSION_MINOR}.${${LIBNAME}_VERSION_PATCH}" PARENT_SCOPE) # append a TWEAK version if it exists: set(${LIBNAME}_VERSION_TWEAK "") if("${${LIBNAME}_H}" MATCHES "^.*[ \t]${VARNAME}[ \t]+\"[0-9]+\\.[0-9]+\\.[0-9]+\\.([0-9]+).*$") set(${LIBNAME}_VERSION_TWEAK "${CMAKE_MATCH_1}" ${ARGN} PARENT_SCOPE) endif() if(${LIBNAME}_VERSION_TWEAK) set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_STRING}.${${LIBNAME}_VERSION_TWEAK}" ${ARGN} PARENT_SCOPE) else() set(${LIBNAME}_VERSION_STRING "${${LIBNAME}_VERSION_STRING}" ${ARGN} PARENT_SCOPE) endif() endif() endfunction() ######################################################################################################## # An option that the user can select. Can accept condition to control when option is available for user. # Usage: # mshadow_option( "doc string" [IF ]) function(mshadow_option variable description value) set(__value ${value}) set(__condition "") set(__varname "__value") foreach(arg ${ARGN}) if(arg STREQUAL "IF" OR arg STREQUAL "if") set(__varname "__condition") else() list(APPEND ${__varname} ${arg}) endif() endforeach() unset(__varname) if("${__condition}" STREQUAL "") set(__condition 2 GREATER 1) endif() if(${__condition}) if("${__value}" MATCHES ";") if(${__value}) option(${variable} "${description}" ON) else() option(${variable} "${description}" OFF) endif() elseif(DEFINED ${__value}) if(${__value}) option(${variable} "${description}" ON) else() option(${variable} "${description}" OFF) endif() else() option(${variable} "${description}" ${__value}) endif() else() unset(${variable} CACHE) endif() endfunction() ################################################################################################ # Utility macro for comparing two lists. Used for CMake debugging purposes # Usage: # mshadow_compare_lists( [description]) function(mshadow_compare_lists list1 list2 desc) set(__list1 ${${list1}}) set(__list2 ${${list2}}) list(SORT __list1) list(SORT __list2) list(LENGTH __list1 __len1) list(LENGTH __list2 __len2) if(NOT ${__len1} EQUAL ${__len2}) message(FATAL_ERROR "Lists are not equal. ${__len1} != ${__len2}. ${desc}") endif() foreach(__i RANGE 1 ${__len1}) math(EXPR __index "${__i}- 1") list(GET __list1 ${__index} __item1) list(GET __list2 ${__index} __item2) if(NOT ${__item1} STREQUAL ${__item2}) message(FATAL_ERROR "Lists are not equal. Differ at element ${__index}. ${desc}") endif() endforeach() endfunction() ################################################################################################ # Command for disabling warnings for different platforms (see below for gcc and VisualStudio) # Usage: # mshadow_warnings_disable( -Wshadow /wd4996 ..,) macro(mshadow_warnings_disable) set(_flag_vars "") set(_msvc_warnings "") set(_gxx_warnings "") foreach(arg ${ARGN}) if(arg MATCHES "^CMAKE_") list(APPEND _flag_vars ${arg}) elseif(arg MATCHES "^/wd") list(APPEND _msvc_warnings ${arg}) elseif(arg MATCHES "^-W") list(APPEND _gxx_warnings ${arg}) endif() endforeach() if(NOT _flag_vars) set(_flag_vars CMAKE_C_FLAGS CMAKE_CXX_FLAGS) endif() if(MSVC AND _msvc_warnings) foreach(var ${_flag_vars}) foreach(warning ${_msvc_warnings}) set(${var} "${${var}} ${warning}") endforeach() endforeach() elseif((CMAKE_COMPILER_IS_GNUCXX OR CMAKE_COMPILER_IS_CLANGXX) AND _gxx_warnings) foreach(var ${_flag_vars}) foreach(warning ${_gxx_warnings}) if(NOT warning MATCHES "^-Wno-") string(REPLACE "${warning}" "" ${var} "${${var}}") string(REPLACE "-W" "-Wno-" warning "${warning}") endif() set(${var} "${${var}} ${warning}") endforeach() endforeach() endif() mshadow_clear_vars(_flag_vars _msvc_warnings _gxx_warnings) endmacro() ################################################################################################ # Helper function get current definitions # Usage: # mshadow_get_current_definitions() function(mshadow_get_current_definitions definitions_var) get_property(current_definitions DIRECTORY PROPERTY COMPILE_DEFINITIONS) set(result "") foreach(d ${current_definitions}) list(APPEND result -D${d}) endforeach() mshadow_list_unique(result) set(${definitions_var} ${result} PARENT_SCOPE) endfunction() ################################################################################################ # Helper function get current includes/definitions # Usage: # mshadow_get_current_cflags() function(mshadow_get_current_cflags cflags_var) get_property(current_includes DIRECTORY PROPERTY INCLUDE_DIRECTORIES) mshadow_convert_absolute_paths(current_includes) mshadow_get_current_definitions(cflags) foreach(i ${current_includes}) list(APPEND cflags "-I${i}") endforeach() mshadow_list_unique(cflags) set(${cflags_var} ${cflags} PARENT_SCOPE) endfunction() ################################################################################################ # Helper function to parse current linker libs into link directories, libflags and osx frameworks # Usage: # mshadow_parse_linker_libs( ) function(mshadow_parse_linker_libs mshadow_LINKER_LIBS_variable folders_var flags_var frameworks_var) set(__unspec "") set(__debug "") set(__optimized "") set(__framework "") set(__varname "__unspec") # split libs into debug, optimized, unspecified and frameworks foreach(list_elem ${${mshadow_LINKER_LIBS_variable}}) if(list_elem STREQUAL "debug") set(__varname "__debug") elseif(list_elem STREQUAL "optimized") set(__varname "__optimized") elseif(list_elem MATCHES "^-framework[ \t]+([^ \t].*)") list(APPEND __framework -framework ${CMAKE_MATCH_1}) else() list(APPEND ${__varname} ${list_elem}) set(__varname "__unspec") endif() endforeach() # attach debug or optimized libs to unspecified according to current configuration if(CMAKE_BUILD_TYPE MATCHES "Debug") set(__libs ${__unspec} ${__debug}) else() set(__libs ${__unspec} ${__optimized}) endif() set(libflags "") set(folders "") # convert linker libraries list to link flags foreach(lib ${__libs}) if(TARGET ${lib}) list(APPEND folders $) list(APPEND libflags -l${lib}) elseif(lib MATCHES "^-l.*") list(APPEND libflags ${lib}) elseif(IS_ABSOLUTE ${lib}) get_filename_component(name_we ${lib} NAME_WE) get_filename_component(folder ${lib} PATH) string(REGEX MATCH "^lib(.*)" __match ${name_we}) list(APPEND libflags -l${CMAKE_MATCH_1}) list(APPEND folders ${folder}) else() message(FATAL_ERROR "Logic error. Need to update cmake script") endif() endforeach() mshadow_list_unique(libflags folders) set(${folders_var} ${folders} PARENT_SCOPE) set(${flags_var} ${libflags} PARENT_SCOPE) set(${frameworks_var} ${__framework} PARENT_SCOPE) endfunction() ################################################################################################ # Helper function to detect Darwin version, i.e. 10.8, 10.9, 10.10, .... # Usage: # mshadow_detect_darwin_version() function(mshadow_detect_darwin_version output_var) if(APPLE) execute_process(COMMAND /usr/bin/sw_vers -productVersion RESULT_VARIABLE __sw_vers OUTPUT_VARIABLE __sw_vers_out ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) set(${output_var} ${__sw_vers_out} PARENT_SCOPE) else() set(${output_var} "" PARENT_SCOPE) endif() endfunction() ################################################################################################ # Convenient command to setup source group for IDEs that support this feature (VS, XCode) # Usage: # caffe_source_group( GLOB[_RECURSE] ) function(mshadow_source_group group) cmake_parse_arguments(CAFFE_SOURCE_GROUP "" "" "GLOB;GLOB_RECURSE" ${ARGN}) if(CAFFE_SOURCE_GROUP_GLOB) file(GLOB srcs1 ${CAFFE_SOURCE_GROUP_GLOB}) source_group(${group} FILES ${srcs1}) endif() if(CAFFE_SOURCE_GROUP_GLOB_RECURSE) file(GLOB_RECURSE srcs2 ${CAFFE_SOURCE_GROUP_GLOB_RECURSE}) source_group(${group} FILES ${srcs2}) endif() endfunction() ================================================ FILE: cmake/util/Util.cmake ================================================ # NOTE: __dgl_option will not reset existing variables. macro(__dgl_option variable description value) if(NOT DEFINED ${variable}) set(${variable} ${value} CACHE STRING ${description}) endif() endmacro() ####################################################### # An option to specify the build type for a feature. # Usage: # dgl_feature_option( "doc string" "dev" "release") macro(dgl_feature_option variable description) set(__value "") foreach(arg ${ARGN}) if(arg STREQUAL "all") __dgl_option(${variable} "${description}" ON) elseif(arg STREQUAL "dev" OR arg STREQUAL "dogfood" OR arg STREQUAL "release") list(APPEND __value ${arg}) endif() endforeach() if(${BUILD_TYPE} IN_LIST __value) __dgl_option(${variable} "${description}" ON) else() # NOTE: __dgl_option will not reset existing variables. __dgl_option(${variable} "${description}" OFF) endif() endmacro() ####################################################### # An option that the user can select. Can accept condition to control when option is available for user. # Usage: # dgl_option( "doc string" [IF ]) macro(dgl_option variable description value) set(__value ${value}) set(__condition "") set(__varname "__value") foreach(arg ${ARGN}) if(arg STREQUAL "IF" OR arg STREQUAL "if") set(__varname "__condition") else() list(APPEND ${__varname} ${arg}) endif() endforeach() unset(__varname) if("${__condition}" STREQUAL "") set(__condition 2 GREATER 1) endif() if(${__condition}) if("${__value}" MATCHES ";") if(${__value}) __dgl_option(${variable} "${description}" ON) else() __dgl_option(${variable} "${description}" OFF) endif() elseif(DEFINED ${__value}) if(${__value}) __dgl_option(${variable} "${description}" ON) else() __dgl_option(${variable} "${description}" OFF) endif() else() __dgl_option(${variable} "${description}" "${__value}") endif() else() unset(${variable} CACHE) endif() endmacro() ================================================ FILE: conda/dgl/README.md ================================================ conda recipe === Build the package with `conda build .` ================================================ FILE: conda/dgl/bld.bat ================================================ REM Needs vcvars64.bat to be called git submodule init git submodule update --recursive md build cd build COPY %TEMP%\dgl.dll . cd ..\python "%PYTHON%" setup.py install --single-version-externally-managed --record=record.txt || EXIT /B 1 EXIT /B ================================================ FILE: conda/dgl/build.sh ================================================ git submodule init git submodule update --recursive mkdir build cd build cmake -DUSE_CUDA=$USE_CUDA -DUSE_OPENMP=ON -DCUDA_ARCH_NAME=All .. make cd ../python $PYTHON setup.py install --single-version-externally-managed --record=record.txt ================================================ FILE: conda/dgl/conda_build_config.yaml ================================================ python: - 3.8 - 3.9 - 3.10 - 3.11 - 3.12 ================================================ FILE: conda/dgl/meta.yaml ================================================ package: name: dgl{{ environ.get('DGL_PACKAGE_SUFFIX', '') }} version: 2.5{{ environ.get('DGL_VERSION_SUFFIX', '') }} source: git_rev: {{ environ.get('DGL_RELEASE_BRANCH', 'master') }} git_url: https://github.com/dmlc/dgl.git requirements: build: - python {{ python }} - setuptools - cmake - git - cython run: - python - numpy - scipy - networkx - requests - tqdm - psutil build: script_env: - USE_CUDA - CUDA_VER - CACHEDIR - DGL_VERSION_SUFFIX about: home: https://github.com/dmlc/dgl.git license_file: {{ environ.get('SRC_DIR') }}/LICENSE license: Apache ================================================ FILE: conda/dgl/run_test.bat ================================================ set DGLBACKEND=numpy %PYTHON% -c "import dgl" ================================================ FILE: conda/dgl/run_test.sh ================================================ DGLBACKEND=numpy $PYTHON -c 'import dgl' ================================================ FILE: dgl_sparse/CMakeLists.txt ================================================ cmake_minimum_required(VERSION 3.8) project(dgl_sparse C CXX) # Find PyTorch cmake files and PyTorch versions with the python interpreter $PYTHON_INTERP # ("python3" or "python" if empty) if(NOT PYTHON_INTERP) find_program(PYTHON_INTERP NAMES python3 python) endif() message(STATUS "Using Python interpreter: ${PYTHON_INTERP}") file(TO_NATIVE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/find_cmake.py FIND_CMAKE_PY) execute_process( COMMAND ${PYTHON_INTERP} ${FIND_CMAKE_PY} OUTPUT_VARIABLE TORCH_PREFIX_VER OUTPUT_STRIP_TRAILING_WHITESPACE) message(STATUS "find_cmake.py output: ${TORCH_PREFIX_VER}") list(GET TORCH_PREFIX_VER 0 TORCH_PREFIX) list(GET TORCH_PREFIX_VER 1 TORCH_VER) message(STATUS "Configuring for PyTorch ${TORCH_VER}") string(REPLACE "." ";" TORCH_VERSION_LIST ${TORCH_VER}) list(GET TORCH_VERSION_LIST 0 TORCH_VERSION_MAJOR) list(GET TORCH_VERSION_LIST 1 TORCH_VERSION_MINOR) set(SPARSE_LINKER_LIBS "") if(USE_CUDA) add_definitions(-DDGL_USE_CUDA) enable_language(CUDA) endif() # For windows, define NOMINMAX to avoid conflict with std::min/max if(MSVC) add_definitions(-DNOMINMAX) endif() set(Torch_DIR "${TORCH_PREFIX}/Torch") message(STATUS "Setting directory to ${Torch_DIR}") find_package(Torch REQUIRED) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${TORCH_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g3 -ggdb") set(LIB_DGL_SPARSE_NAME "dgl_sparse_pytorch_${TORCH_VER}") list(APPEND SPARSE_LINKER_LIBS ${TORCH_LIBRARIES}) set(SPARSE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src") set(SPARSE_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/include") file(GLOB SPARSE_HEADERS ${SPARSE_INCLUDE}) file(GLOB SPARSE_SRC ${SPARSE_DIR}/*.cc ${SPARSE_DIR}/cpu/*.cc ) if(USE_CUDA) file(GLOB SPARSE_CUDA_SRC ${SPARSE_DIR}/cuda/*.cu ) list(APPEND SPARSE_SRC ${SPARSE_CUDA_SRC}) endif() add_library(${LIB_DGL_SPARSE_NAME} SHARED ${SPARSE_SRC} ${SPARSE_HEADERS}) target_include_directories( ${LIB_DGL_SPARSE_NAME} PRIVATE ${SPARSE_DIR} ${SPARSE_HEADERS}) target_link_libraries(${LIB_DGL_SPARSE_NAME} ${SPARSE_LINKER_LIBS}) target_compile_definitions(${LIB_DGL_SPARSE_NAME} PRIVATE TORCH_VERSION_MAJOR=${TORCH_VERSION_MAJOR}) target_compile_definitions(${LIB_DGL_SPARSE_NAME} PRIVATE TORCH_VERSION_MINOR=${TORCH_VERSION_MINOR}) target_include_directories(${LIB_DGL_SPARSE_NAME} PRIVATE "${CMAKE_SOURCE_DIR}/third_party/dmlc-core/include") message(STATUS "DGL include directories: ${DGL_INCLUDE_DIRS}") target_include_directories(${LIB_DGL_SPARSE_NAME} PRIVATE ${DGL_INCLUDE_DIRS}) target_link_directories(${LIB_DGL_SPARSE_NAME} PRIVATE ${DGL_BUILD_DIR} "${DGL_BUILD_DIR}/third_party/dmlc-core") # The Torch CMake configuration only sets up the path for the MKL library when # using the conda distribution. The following is a workaround to address this # when using a standalone installation of MKL. if(DEFINED MKL_LIBRARIES) target_link_directories(${LIB_DGL_SPARSE_NAME} PRIVATE ${MKL_ROOT}/lib/${MKL_ARCH}) endif() if (EXTERNAL_DMLC_LIB_PATH) # external dmlc requires OpenMP link include(FindOpenMP) if(OPENMP_FOUND) set(CMAKE_C_FLAGS "${OpenMP_C_FLAGS} ${CMAKE_C_FLAGS}") set(CMAKE_CXX_FLAGS "${OpenMP_CXX_FLAGS} ${CMAKE_CXX_FLAGS}") endif(OPENMP_FOUND) message(STATUS "looking for dmlc library in ${EXTERNAL_DMLC_LIB_PATH}") find_package(dmlc REQUIRED HINTS ${EXTERNAL_DMLC_LIB_PATH}) target_link_libraries(${LIB_DGL_SPARSE_NAME} dmlc::dmlc dgl) else (EXTERNAL_DMLC_LIB_PATH) target_link_libraries(${LIB_DGL_SPARSE_NAME} dmlc dgl) endif() set(GOOGLE_TEST 0) # Turn off dmlc-core test # Configure dgl_sparse library to use C++17 standard for compatibility with PyTorch set_property(TARGET ${LIB_DGL_SPARSE_NAME} PROPERTY CXX_STANDARD 17) ================================================ FILE: dgl_sparse/build.bat ================================================ REM Helper script to build DGL sparse libraries for PyTorch @ECHO OFF SETLOCAL EnableDelayedExpansion MD "%BINDIR%\dgl_sparse" DEL /S /Q build MD build PUSHD build IF x%1x == xx GOTO single COPY %BINDIR%\third_party\dmlc-core\Release\dmlc.lib %BINDIR% COPY %BINDIR%\Release\dgl.lib %BINDIR% FOR %%X IN (%*) DO ( DEL /S /Q * "%CMAKE_COMMAND%" -DDGL_BUILD_DIR=%BINDIR% -DCMAKE_CONFIGURATION_TYPES=Release -DCUDA_TOOLKIT_ROOT_DIR="%CUDA_TOOLKIT_ROOT_DIR%" -DTORCH_CUDA_ARCH_LIST=%TORCH_CUDA_ARCH_LIST% -DDGL_INCLUDE_DIRS=%INCLUDEDIR: =;% -DUSE_CUDA=%USE_CUDA% -DPYTHON_INTERP=%%X .. -G "Visual Studio 16 2019" || EXIT /B 1 msbuild dgl_sparse.sln /m /nr:false || EXIT /B 1 COPY /Y Release\*.dll "%BINDIR%\dgl_sparse" || EXIT /B 1 ) GOTO end :single DEL /S /Q * "%CMAKE_COMMAND%" -DDGL_BUILD_DIR=%BINDIR% -DCMAKE_CONFIGURATION_TYPES=Release -DCUDA_TOOLKIT_ROOT_DIR="%CUDA_TOOLKIT_ROOT_DIR%" -DTORCH_CUDA_ARCH_LIST=%TORCH_CUDA_ARCH_LIST% -DUSE_CUDA=%USE_CUDA% -DDGL_INCLUDE_DIRS=%INCLUDEDIR: =;% .. -G "Visual Studio 16 2019" || EXIT /B 1 msbuild dgl_sparse.sln /m /nr:false || EXIT /B 1 COPY /Y Release\*.dll "%BINDIR%\dgl_sparse" || EXIT /B 1 :end POPD ENDLOCAL ================================================ FILE: dgl_sparse/build.sh ================================================ #!/bin/bash # Helper script to build dgl sparse libraries for PyTorch set -e mkdir -p build mkdir -p $BINDIR/dgl_sparse cd build if [ $(uname) = 'Darwin' ]; then CPSOURCE=*.dylib else CPSOURCE=*.so fi CMAKE_FLAGS="-DCUDA_TOOLKIT_ROOT_DIR=$CUDA_TOOLKIT_ROOT_DIR -DTORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST -DUSE_CUDA=$USE_CUDA -DEXTERNAL_DMLC_LIB_PATH=$EXTERNAL_DMLC_LIB_PATH" # CMake passes in the list of directories separated by spaces. Here we replace them with semicolons. CMAKE_FLAGS="$CMAKE_FLAGS -DDGL_INCLUDE_DIRS=${INCLUDEDIR// /;} -DDGL_BUILD_DIR=$BINDIR" echo $CMAKE_FLAGS if [ $# -eq 0 ]; then $CMAKE_COMMAND $CMAKE_FLAGS .. make -j cp -v $CPSOURCE $BINDIR/dgl_sparse else for PYTHON_INTERP in $@; do TORCH_VER=$($PYTHON_INTERP -c 'import torch; print(torch.__version__.split("+")[0])') mkdir -p $TORCH_VER cd $TORCH_VER $CMAKE_COMMAND $CMAKE_FLAGS -DPYTHON_INTERP=$PYTHON_INTERP ../.. make -j cp -v $CPSOURCE $BINDIR/dgl_sparse cd .. done fi ================================================ FILE: dgl_sparse/find_cmake.py ================================================ import os import torch cmake_prefix_path = getattr( torch.utils, "cmake_prefix_path", os.path.join(os.path.dirname(torch.__file__), "share", "cmake"), ) version = torch.__version__.split("+")[0] print(";".join([cmake_prefix_path, version])) ================================================ FILE: dgl_sparse/include/sparse/dgl_headers.h ================================================ /** * Copyright (c) 2022 by Contributors * @file sparse/dgl_headers.h * @brief DGL headers used in the sparse library. This is a workaround to * avoid the macro naming conflict between dmlc/logging.h and torch logger. This * file includes all the DGL headers used in the sparse library and * undefines logging macros defined in dmlc/logging.h. There are two rules to * use this file. (1) All DGL headers used in the sparse library should be and * only be registered in this file. (2) When including Pytorch headers, this * file should be included in advance. */ #ifndef SPARSE_DGL_HEADERS_H_ #define SPARSE_DGL_HEADERS_H_ #include #include #include #include #include #undef CHECK #undef CHECK_OP #undef CHECK_EQ #undef CHECK_NE #undef CHECK_LE #undef CHECK_LT #undef CHECK_GE #undef CHECK_GT #undef CHECK_NOTNULL #undef DCHECK #undef DCHECK_EQ #undef DCHECK_NE #undef DCHECK_LE #undef DCHECK_LT #undef DCHECK_GE #undef DCHECK_GT #undef DCHECK_NOTNULL #undef VLOG #undef LOG #undef DLOG #undef LOG_IF #endif // SPARSE_DGL_HEADERS_H_ ================================================ FILE: dgl_sparse/include/sparse/elementwise_op.h ================================================ /** * Copyright (c) 2022 by Contributors * @file sparse/elementwise_op.h * @brief DGL C++ sparse elementwise operators. */ #ifndef SPARSE_ELEMENTWISE_OP_H_ #define SPARSE_ELEMENTWISE_OP_H_ #include namespace dgl { namespace sparse { /** * @brief Adds two sparse matrices possibly with different sparsities. * * @param lhs_mat SparseMatrix * @param rhs_mat SparseMatrix * * @return SparseMatrix */ c10::intrusive_ptr SpSpAdd( const c10::intrusive_ptr& lhs_mat, const c10::intrusive_ptr& rhs_mat); /** * @brief Multiplies two sparse matrices possibly with different sparsities. * * @param lhs_mat SparseMatrix * @param rhs_mat SparseMatrix * * @return SparseMatrix */ c10::intrusive_ptr SpSpMul( const c10::intrusive_ptr& lhs_mat, const c10::intrusive_ptr& rhs_mat); /** * @brief Divides two sparse matrices with the same sparsity. * * @param lhs_mat SparseMatrix * @param rhs_mat SparseMatrix * * @return SparseMatrix */ c10::intrusive_ptr SpSpDiv( const c10::intrusive_ptr& lhs_mat, const c10::intrusive_ptr& rhs_mat); } // namespace sparse } // namespace dgl #endif // SPARSE_ELEMENTWISE_OP_H_ ================================================ FILE: dgl_sparse/include/sparse/matrix_ops.h ================================================ /** * Copyright (c) 2023 by Contributors * @file sparse/matrix_ops.h * @brief DGL C++ sparse matrix operators. */ #ifndef SPARSE_MATRIX_OPS_H_ #define SPARSE_MATRIX_OPS_H_ #include #include namespace dgl { namespace sparse { /** * @brief Compute the intersection of two COO matrices. Return the intersection * matrix, and the indices of the intersection in the left-hand-side and * right-hand-side matrices. * * @param lhs The left-hand-side COO matrix. * @param rhs The right-hand-side COO matrix. * * @return A tuple of COO matrix, lhs indices, and rhs indices. */ std::tuple, torch::Tensor, torch::Tensor> COOIntersection( const std::shared_ptr& lhs, const std::shared_ptr& rhs); /** * @brief Compact sparse matrix by removing rows or columns without non-zero * elements in the sparse matrix and relabeling indices of the dimension. * * This function serves a dual purpose: it allows you to reorganize the * indices within a specific dimension (rows or columns) of the sparse matrix * and, if needed, place certain 'leading_indices' at the beginning of the * compact dimension. * * @param mat The sparse matrix to be compacted. * @param dim The dimension to compact. Should be 0 or 1. Use 0 for row-wise * compaction and 1 for column-wise compaction. * @param leading_indices An optional tensor containing row or column ids that * should be placed at the beginning of the compact dimension. * * @return A tuple containing the compacted sparse matrix and the index mapping * of the compact dimension from the new index to the original index. */ std::tuple, torch::Tensor> Compact( const c10::intrusive_ptr& mat, int64_t dim, const torch::optional& leading_indices); } // namespace sparse } // namespace dgl #endif // SPARSE_MATRIX_OPS_H_ ================================================ FILE: dgl_sparse/include/sparse/reduction.h ================================================ /** * Copyright (c) 2022 by Contributors * @file sparse/reduction.h * @brief DGL C++ sparse matrix reduction operators. */ #ifndef SPARSE_REDUCTION_H_ #define SPARSE_REDUCTION_H_ #include #include namespace dgl { namespace sparse { /** * @brief Reduces a sparse matrix along the specified sparse dimension. * * @param A The sparse matrix. * @param dim The sparse dimension to reduce along. Must be either 0 (rows) or * 1 (columns). * @param reduce The reduce operator. Must be either "sum", "smin", "smax", * "mean", or "sprod". * * @return Tensor */ torch::Tensor Reduce( const c10::intrusive_ptr& A, const std::string& reduce, const torch::optional& dim = torch::nullopt); inline torch::Tensor ReduceSum( const c10::intrusive_ptr& A, const torch::optional& dim = torch::nullopt) { return Reduce(A, "sum", dim); } inline torch::Tensor ReduceMin( const c10::intrusive_ptr& A, const torch::optional& dim = torch::nullopt) { return Reduce(A, "smin", dim); } inline torch::Tensor ReduceMax( const c10::intrusive_ptr& A, const torch::optional& dim = torch::nullopt) { return Reduce(A, "smax", dim); } inline torch::Tensor ReduceMean( const c10::intrusive_ptr& A, const torch::optional& dim = torch::nullopt) { return Reduce(A, "smean", dim); } inline torch::Tensor ReduceProd( const c10::intrusive_ptr& A, const torch::optional& dim = torch::nullopt) { return Reduce(A, "sprod", dim); } } // namespace sparse } // namespace dgl #endif // SPARSE_REDUCTION_H_ ================================================ FILE: dgl_sparse/include/sparse/sddmm.h ================================================ /** * Copyright (c) 2022 by Contributors * @file sparse/sddmm.h * @brief DGL C++ SDDMM operator. */ #ifndef SPARSE_SDDMM_H_ #define SPARSE_SDDMM_H_ #include #include namespace dgl { namespace sparse { /** * @brief Perform a sampled matrix multiplication of a sparse matrix and two * dense matrices. It calculates `sparse_mat * (mat1 @ mat2)`. The SDDMM can be * batched, where the batch dimension is the last dimension for all input * matrices. * * There are four cases for the input and output matrix shapes: * (1) (n, m), (n, k), (k, m), and (n, m); * (2) (n, m), (n,), and (m,), and (n, m); * (3) (n, m, b), (n, k, b), (k, m, b), and (n, m, b); * (4) (n, m), (n, k, b), (k, m, b), and (n, m, b); * * This function supports autograd for `mat1` and `mat2` but does not support * high order gradient. * * * @param sparse_mat The sparse matrix. * @param mat1 The first dense matrix. * @param mat2 The second dense matrix. * * @return SparseMatrix */ c10::intrusive_ptr SDDMM( const c10::intrusive_ptr& sparse_mat, torch::Tensor mat1, torch::Tensor mat2); } // namespace sparse } // namespace dgl #endif // SPARSE_SDDMM_H_ ================================================ FILE: dgl_sparse/include/sparse/softmax.h ================================================ /** * Copyright (c) 2022 by Contributors * @file sparse/softmax.h * @brief DGL C++ Softmax operator */ #ifndef SPARSE_SOFTMAX_H_ #define SPARSE_SOFTMAX_H_ #include namespace dgl { namespace sparse { /** * @brief Apply softmax to the non-zero entries of the sparse matrix on the * dimension dim. dim = 0 or 1 indicates column-wise or row-wise softmax * respectively. * * This function supports autograd for the sparse matrix, but it does not * support higher order gradient. * * @param sparse_mat The sparse matrix * @param dim The dimension to apply softmax * * @return Sparse matrix */ c10::intrusive_ptr Softmax( const c10::intrusive_ptr& sparse_mat, int64_t dim); } // namespace sparse } // namespace dgl #endif // SPARSE_SOFTMAX_H_ ================================================ FILE: dgl_sparse/include/sparse/sparse_format.h ================================================ /** * Copyright (c) 2022 by Contributors * @file sparse/sparse_format.h * @brief DGL C++ sparse format header. */ #ifndef SPARSE_SPARSE_FORMAT_H_ #define SPARSE_SPARSE_FORMAT_H_ // clang-format off #include // clang-format on #include #include #include #include namespace dgl { namespace sparse { /** @brief SparseFormat enumeration. */ enum SparseFormat { kCOO, kCSR, kCSC, kDiag }; /** @brief COO sparse structure. */ struct COO { /** @brief The shape of the matrix. */ int64_t num_rows = 0, num_cols = 0; /** * @brief COO tensor of shape (2, nnz), stacking the row and column indices. */ torch::Tensor indices; /** @brief Whether the row indices are sorted. */ bool row_sorted = false; /** @brief Whether the column indices per row are sorted. */ bool col_sorted = false; }; /** @brief CSR sparse structure. */ struct CSR { /** @brief The dense shape of the matrix. */ int64_t num_rows = 0, num_cols = 0; /** @brief CSR format index pointer array of the matrix. */ torch::Tensor indptr; /** @brief CSR format index array of the matrix. */ torch::Tensor indices; /** @brief Data index tensor. When it is null, assume it is from 0 to NNZ - 1. */ torch::optional value_indices; /** @brief Whether the column indices per row are sorted. */ bool sorted = false; }; struct Diag { /** @brief The dense shape of the matrix. */ int64_t num_rows = 0, num_cols = 0; }; /** @brief Convert an old DGL COO format to a COO in the sparse library. */ std::shared_ptr COOFromOldDGLCOO(const aten::COOMatrix& dgl_coo); /** @brief Convert a COO in the sparse library to an old DGL COO matrix. */ aten::COOMatrix COOToOldDGLCOO(const std::shared_ptr& coo); /** @brief Convert an old DGL CSR format to a CSR in the sparse library. */ std::shared_ptr CSRFromOldDGLCSR(const aten::CSRMatrix& dgl_csr); /** @brief Convert a CSR in the sparse library to an old DGL CSR matrix. */ aten::CSRMatrix CSRToOldDGLCSR(const std::shared_ptr& csr); /** * @brief Convert a COO and its nonzero values to a Torch COO matrix. * @param coo The COO format in the sparse library * @param value Values of the sparse matrix * * @return Torch Sparse Tensor in COO format */ torch::Tensor COOToTorchCOO( const std::shared_ptr& coo, torch::Tensor value); /** @brief Convert a CSR format to COO format. */ std::shared_ptr CSRToCOO(const std::shared_ptr& csr); /** @brief Convert a CSC format to COO format. */ std::shared_ptr CSCToCOO(const std::shared_ptr& csc); /** @brief Convert a COO format to CSR format. */ std::shared_ptr COOToCSR(const std::shared_ptr& coo); /** @brief Convert a CSC format to CSR format. */ std::shared_ptr CSCToCSR(const std::shared_ptr& csc); /** @brief Convert a COO format to CSC format. */ std::shared_ptr COOToCSC(const std::shared_ptr& coo); /** @brief Convert a CSR format to CSC format. */ std::shared_ptr CSRToCSC(const std::shared_ptr& csr); /** @brief Convert a Diag format to COO format. */ std::shared_ptr DiagToCOO( const std::shared_ptr& diag, const c10::TensorOptions& indices_options); /** @brief Convert a Diag format to CSR format. */ std::shared_ptr DiagToCSR( const std::shared_ptr& diag, const c10::TensorOptions& indices_options); /** @brief Convert a Diag format to CSC format. */ std::shared_ptr DiagToCSC( const std::shared_ptr& diag, const c10::TensorOptions& indices_options); /** @brief COO transposition. */ std::shared_ptr COOTranspose(const std::shared_ptr& coo); /** * @brief Sort the COO matrix by row and column indices. * @return A pair of the sorted COO matrix and the permutation indices. */ std::pair, torch::Tensor> COOSort( const std::shared_ptr& coo); } // namespace sparse } // namespace dgl #endif // SPARSE_SPARSE_FORMAT_H_ ================================================ FILE: dgl_sparse/include/sparse/sparse_matrix.h ================================================ /** * Copyright (c) 2022 by Contributors * @file sparse/sparse_matrix.h * @brief DGL C++ sparse matrix header. */ #ifndef SPARSE_SPARSE_MATRIX_H_ #define SPARSE_SPARSE_MATRIX_H_ // clang-format off #include // clang-format on #include #include #include #include #include #include #include namespace dgl { namespace sparse { /** @brief SparseMatrix bound to Python. */ class SparseMatrix : public torch::CustomClassHolder { public: /** * @brief General constructor to construct a sparse matrix for different * sparse formats. At least one of the sparse formats should be provided, * while others could be nullptrs. * * @param coo The COO format. * @param csr The CSR format. * @param csc The CSC format. * @param value Value of the sparse matrix. * @param shape Shape of the sparse matrix. */ SparseMatrix( const std::shared_ptr& coo, const std::shared_ptr& csr, const std::shared_ptr& csc, const std::shared_ptr& diag, torch::Tensor value, const std::vector& shape); /** * @brief Construct a SparseMatrix from a COO format. * @param coo The COO format * @param value Values of the sparse matrix * @param shape Shape of the sparse matrix * * @return SparseMatrix */ static c10::intrusive_ptr FromCOOPointer( const std::shared_ptr& coo, torch::Tensor value, const std::vector& shape); /** * @brief Construct a SparseMatrix from a CSR format. * @param csr The CSR format * @param value Values of the sparse matrix * @param shape Shape of the sparse matrix * * @return SparseMatrix */ static c10::intrusive_ptr FromCSRPointer( const std::shared_ptr& csr, torch::Tensor value, const std::vector& shape); /** * @brief Construct a SparseMatrix from a CSC format. * @param csc The CSC format * @param value Values of the sparse matrix * @param shape Shape of the sparse matrix * * @return SparseMatrix */ static c10::intrusive_ptr FromCSCPointer( const std::shared_ptr& csc, torch::Tensor value, const std::vector& shape); /** * @brief Construct a SparseMatrix from a Diag format. * @param diag The Diag format * @param value Values of the sparse matrix * @param shape Shape of the sparse matrix * * @return SparseMatrix */ static c10::intrusive_ptr FromDiagPointer( const std::shared_ptr& diag, torch::Tensor value, const std::vector& shape); /** * @brief Create a SparseMatrix from tensors in COO format. * @param indices COO coordinates with shape (2, nnz). * @param value Values of the sparse matrix. * @param shape Shape of the sparse matrix. * * @return SparseMatrix */ static c10::intrusive_ptr FromCOO( torch::Tensor indices, torch::Tensor value, const std::vector& shape); /** * @brief Create a SparseMatrix from tensors in CSR format. * @param indptr Index pointer array of the CSR * @param indices Indices array of the CSR * @param value Values of the sparse matrix * @param shape Shape of the sparse matrix * * @return SparseMatrix */ static c10::intrusive_ptr FromCSR( torch::Tensor indptr, torch::Tensor indices, torch::Tensor value, const std::vector& shape); /** * @brief Create a SparseMatrix from tensors in CSC format. * @param indptr Index pointer array of the CSC * @param indices Indices array of the CSC * @param value Values of the sparse matrix * @param shape Shape of the sparse matrix * * @return SparseMatrix */ static c10::intrusive_ptr FromCSC( torch::Tensor indptr, torch::Tensor indices, torch::Tensor value, const std::vector& shape); /** * @brief Create a SparseMatrix with Diag format. * @param value Values of the sparse matrix * @param shape Shape of the sparse matrix * * @return SparseMatrix */ static c10::intrusive_ptr FromDiag( torch::Tensor value, const std::vector& shape); /** * @brief Create a SparseMatrix by selecting rows or columns based on provided * indices. * * This function allows you to create a new SparseMatrix by selecting specific * rows or columns from the original SparseMatrix based on the provided * indices. The selection can be performed either row-wise or column-wise, * determined by the 'dim' parameter. * * @param dim Select rows (dim=0) or columns (dim=1). * @param ids A tensor containing the indices of the selected rows or columns. * * @return A new SparseMatrix containing the selected rows or columns. * * @note The 'dim' parameter should be either 0 (for row-wise selection) or 1 * (for column-wise selection). * @note The 'ids' tensor should contain valid indices within the range of the * original SparseMatrix's dimensions. */ c10::intrusive_ptr IndexSelect(int64_t dim, torch::Tensor ids); /** * @brief Create a SparseMatrix by selecting a range of rows or columns based * on provided indices. * * This function allows you to create a new SparseMatrix by selecting a range * of specific rows or columns from the original SparseMatrix based on the * provided indices. The selection can be performed either row-wise or * column-wise, determined by the 'dim' parameter. * * @param dim Select rows (dim=0) or columns (dim=1). * @param start The starting index (inclusive) of the range. * @param end The ending index (exclusive) of the range. * * @return A new SparseMatrix containing the selected range of rows or * columns. * * @note The 'dim' parameter should be either 0 (for row-wise selection) or 1 * (for column-wise selection). * @note The 'start' and 'end' indices should be valid indices within * the valid range of the original SparseMatrix's dimensions. */ c10::intrusive_ptr RangeSelect( int64_t dim, int64_t start, int64_t end); /** * @brief Create a SparseMatrix by sampling elements based on the specified * dimension and sample count. * * If `ids` is provided, this function samples elements from the specified * set of row or column IDs, resulting in a sparse matrix containing only * the sampled rows or columns. * * @param dim Select rows (dim=0) or columns (dim=1) for sampling. * @param fanout The number of elements to randomly sample from each row or * column. * @param ids An optional tensor containing row or column IDs from which to * sample elements. * @param replace Indicates whether repeated sampling of the same element * is allowed. If True, repeated sampling is allowed; otherwise, it is not * allowed. * @param bias An optional boolean flag indicating whether to enable biasing * during sampling. If True, the values of the sparse matrix will be used as * bias weights, meaning that elements with higher values will be more likely * to be sampled. Otherwise, all elements will be sampled uniformly, * regardless of their value. * * @return A new SparseMatrix with the same shape as the original matrix * containing the sampled elements. * * @note If 'replace = false' and there are fewer elements than 'fanout', * all non-zero elements will be sampled. * @note If 'ids' is not provided, the function will sample from * all rows or columns. */ c10::intrusive_ptr Sample( int64_t dim, int64_t fanout, torch::Tensor ids, bool replace, bool bias); /** * @brief Create a SparseMatrix from a SparseMatrix using new values. * @param mat An existing sparse matrix * @param value New values of the sparse matrix * * @return SparseMatrix */ static c10::intrusive_ptr ValLike( const c10::intrusive_ptr& mat, torch::Tensor value); /** @return Value of the sparse matrix. */ inline torch::Tensor value() const { return value_; } /** @return Shape of the sparse matrix. */ inline const std::vector& shape() const { return shape_; } /** @return Number of non-zero values */ inline int64_t nnz() const { return value_.size(0); } /** @return Non-zero value data type */ inline caffe2::TypeMeta dtype() const { return value_.dtype(); } /** @return Device of the sparse matrix */ inline torch::Device device() const { return value_.device(); } /** @return COO of the sparse matrix. The COO is created if not exists. */ std::shared_ptr COOPtr(); /** @return CSR of the sparse matrix. The CSR is created if not exists. */ std::shared_ptr CSRPtr(); /** @return CSC of the sparse matrix. The CSC is created if not exists. */ std::shared_ptr CSCPtr(); /** * @return Diagonal format of the sparse matrix. An error will be raised if * it does not have a diagonal format. */ std::shared_ptr DiagPtr(); /** @brief Check whether this sparse matrix has COO format. */ inline bool HasCOO() const { return coo_ != nullptr; } /** @brief Check whether this sparse matrix has CSR format. */ inline bool HasCSR() const { return csr_ != nullptr; } /** @brief Check whether this sparse matrix has CSC format. */ inline bool HasCSC() const { return csc_ != nullptr; } /** @brief Check whether this sparse matrix has Diag format. */ inline bool HasDiag() const { return diag_ != nullptr; } /** @return {row, col} tensors in the COO format. */ std::tuple COOTensors(); /** @return Stacked row and col tensors in the COO format. */ torch::Tensor Indices(); /** @return {row, col, value_indices} tensors in the CSR format. */ std::tuple> CSRTensors(); /** @return {row, col, value_indices} tensors in the CSC format. */ std::tuple> CSCTensors(); /** @brief Return the transposition of the sparse matrix. It transposes the * first existing sparse format by checking COO, CSR, and CSC. */ c10::intrusive_ptr Transpose() const; /** * @brief Return a new coalesced matrix. * * A coalesced sparse matrix satisfies the following properties: * - the indices of the non-zero elements are unique, * - the indices are sorted in lexicographical order. * * @return A coalesced sparse matrix. */ c10::intrusive_ptr Coalesce(); /** * @brief Return true if this sparse matrix contains duplicate indices. * @return A bool flag. */ bool HasDuplicate(); private: /** @brief Create the COO format for the sparse matrix internally */ void _CreateCOO(); /** @brief Create the CSR format for the sparse matrix internally */ void _CreateCSR(); /** @brief Create the CSC format for the sparse matrix internally */ void _CreateCSC(); // COO/CSC/CSR/Diag pointers. Nullptr indicates non-existence. std::shared_ptr coo_; std::shared_ptr csr_, csc_; std::shared_ptr diag_; // Value of the SparseMatrix torch::Tensor value_; // Shape of the SparseMatrix const std::vector shape_; }; } // namespace sparse } // namespace dgl #endif // SPARSE_SPARSE_MATRIX_H_ ================================================ FILE: dgl_sparse/include/sparse/spmm.h ================================================ /** * Copyright (c) 2022 by Contributors * @file sparse/spmm.h * @brief DGL C++ SpMM operator. */ #ifndef SPARSE_SPMM_H_ #define SPARSE_SPMM_H_ #include #include namespace dgl { namespace sparse { /** * @brief Perform a matrix multiplication of the sparse matrix and dense * matrix. The SpMM can be batched, where the batch dimension is the last * dimension for both sparse and dense matrices. * * There are three cases for sparse, dense, and output matrix shapes: * (1) (n, m), (m, k), and (n, k); * (2) (n, m), (m,), and (n,); * (3) (n, m, b), (m, k, b), and (n, k, b). * * This function supports autograd for both the sparse and dense matrix but does * not support higher order gradient. * * @param sparse_mat The sparse matrix. * @param dense_mat The dense matrix. * * @return Dense matrix. */ torch::Tensor SpMM( const c10::intrusive_ptr& sparse_mat, torch::Tensor dense_mat); } // namespace sparse } // namespace dgl #endif // SPARSE_SPMM_H_ ================================================ FILE: dgl_sparse/include/sparse/spspmm.h ================================================ /** * Copyright (c) 2022 by Contributors * @file sparse/spspmm.h * @brief DGL C++ SpSpMM operator. */ #ifndef SPARSE_SPSPMM_H_ #define SPARSE_SPSPMM_H_ #include #include namespace dgl { namespace sparse { /** * @brief Perform a sparse-sparse matrix multiplication on matrices with * possibly different sparsities. The two sparse matrices must have * 1-D values. If the first sparse matrix has shape (n, m), the second * sparse matrix must have shape (m, k), and the returned sparse matrix has * shape (n, k). * * This function supports autograd for both sparse matrices but does * not support higher order gradient. * * @param lhs_mat The first sparse matrix of shape (n, m). * @param rhs_mat The second sparse matrix of shape (m, k). * * @return Sparse matrix of shape (n, k). */ c10::intrusive_ptr SpSpMM( const c10::intrusive_ptr& lhs_mat, const c10::intrusive_ptr& rhs_mat); } // namespace sparse } // namespace dgl #endif // SPARSE_SPSPMM_H_ ================================================ FILE: dgl_sparse/src/cpu/matrix_ops_impl.cc ================================================ /** * Copyright (c) 2023 by Contributors * @file cpu/matrix_ops_impl.cc * @brief DGL C++ matrix operators. */ #include "./matrix_ops_impl.h" namespace dgl { namespace sparse {} // namespace sparse } // namespace dgl ================================================ FILE: dgl_sparse/src/elemenwise_op.cc ================================================ /** * Copyright (c) 2022 by Contributors * @file elementwise_op.cc * @brief DGL C++ sparse elementwise operator implementation. */ #include #include #include #include #include #include "./utils.h" namespace dgl { namespace sparse { using namespace torch::autograd; c10::intrusive_ptr SpSpAdd( const c10::intrusive_ptr& lhs_mat, const c10::intrusive_ptr& rhs_mat) { ElementwiseOpSanityCheck(lhs_mat, rhs_mat); if (lhs_mat->HasDiag() && rhs_mat->HasDiag()) { return SparseMatrix::FromDiagPointer( lhs_mat->DiagPtr(), lhs_mat->value() + rhs_mat->value(), lhs_mat->shape()); } auto torch_lhs = COOToTorchCOO(lhs_mat->COOPtr(), lhs_mat->value()); auto torch_rhs = COOToTorchCOO(rhs_mat->COOPtr(), rhs_mat->value()); auto sum = (torch_lhs + torch_rhs).coalesce(); return SparseMatrix::FromCOO(sum.indices(), sum.values(), lhs_mat->shape()); } class SpSpMulAutoGrad : public Function { public: static variable_list forward( AutogradContext* ctx, c10::intrusive_ptr lhs_mat, torch::Tensor lhs_val, c10::intrusive_ptr rhs_mat, torch::Tensor rhs_val); static tensor_list backward(AutogradContext* ctx, tensor_list grad_outputs); }; variable_list SpSpMulAutoGrad::forward( AutogradContext* ctx, c10::intrusive_ptr lhs_mat, torch::Tensor lhs_val, c10::intrusive_ptr rhs_mat, torch::Tensor rhs_val) { std::shared_ptr intersection; torch::Tensor lhs_indices, rhs_indices; std::tie(intersection, lhs_indices, rhs_indices) = COOIntersection(lhs_mat->COOPtr(), rhs_mat->COOPtr()); auto lhs_intersect_val = lhs_val.index_select(0, lhs_indices); auto rhs_intersect_val = rhs_val.index_select(0, rhs_indices); auto ret_val = lhs_intersect_val * rhs_intersect_val; auto ret_mat = SparseMatrix::FromCOOPointer(intersection, ret_val, lhs_mat->shape()); ctx->saved_data["lhs_require_grad"] = lhs_val.requires_grad(); ctx->saved_data["rhs_require_grad"] = rhs_val.requires_grad(); if (lhs_val.requires_grad()) { ctx->saved_data["lhs_val_shape"] = lhs_val.sizes().vec(); ctx->saved_data["rhs_intersect_lhs"] = SparseMatrix::ValLike(ret_mat, rhs_intersect_val); ctx->saved_data["lhs_indices"] = lhs_indices; } if (rhs_val.requires_grad()) { ctx->saved_data["rhs_val_shape"] = rhs_val.sizes().vec(); ctx->saved_data["lhs_intersect_rhs"] = SparseMatrix::ValLike(ret_mat, lhs_intersect_val); ctx->saved_data["rhs_indices"] = rhs_indices; } return {intersection->indices, ret_val}; } tensor_list SpSpMulAutoGrad::backward( AutogradContext* ctx, tensor_list grad_outputs) { torch::Tensor lhs_val_grad, rhs_val_grad; auto output_grad = grad_outputs[1]; if (ctx->saved_data["lhs_require_grad"].toBool()) { auto rhs_intersect_lhs = ctx->saved_data["rhs_intersect_lhs"].toCustomClass(); const auto& lhs_val_shape = ctx->saved_data["lhs_val_shape"].toIntVector(); auto lhs_indices = ctx->saved_data["lhs_indices"].toTensor(); lhs_val_grad = torch::zeros(lhs_val_shape, output_grad.options()); auto intersect_grad = rhs_intersect_lhs->value() * output_grad; lhs_val_grad.index_put_({lhs_indices}, intersect_grad); } if (ctx->saved_data["rhs_require_grad"].toBool()) { auto lhs_intersect_rhs = ctx->saved_data["lhs_intersect_rhs"].toCustomClass(); const auto& rhs_val_shape = ctx->saved_data["rhs_val_shape"].toIntVector(); auto rhs_indices = ctx->saved_data["rhs_indices"].toTensor(); rhs_val_grad = torch::zeros(rhs_val_shape, output_grad.options()); auto intersect_grad = lhs_intersect_rhs->value() * output_grad; rhs_val_grad.index_put_({rhs_indices}, intersect_grad); } return {torch::Tensor(), lhs_val_grad, torch::Tensor(), rhs_val_grad}; } c10::intrusive_ptr SpSpMul( const c10::intrusive_ptr& lhs_mat, const c10::intrusive_ptr& rhs_mat) { ElementwiseOpSanityCheck(lhs_mat, rhs_mat); if (lhs_mat->HasDiag() && rhs_mat->HasDiag()) { return SparseMatrix::FromDiagPointer( lhs_mat->DiagPtr(), lhs_mat->value() * rhs_mat->value(), lhs_mat->shape()); } TORCH_CHECK( !lhs_mat->HasDuplicate() && !rhs_mat->HasDuplicate(), "Only support SpSpMul on sparse matrices without duplicate values") auto results = SpSpMulAutoGrad::apply( lhs_mat, lhs_mat->value(), rhs_mat, rhs_mat->value()); const auto& indices = results[0]; const auto& val = results[1]; return SparseMatrix::FromCOO(indices, val, lhs_mat->shape()); } c10::intrusive_ptr SpSpDiv( const c10::intrusive_ptr& lhs_mat, const c10::intrusive_ptr& rhs_mat) { ElementwiseOpSanityCheck(lhs_mat, rhs_mat); if (lhs_mat->HasDiag() && rhs_mat->HasDiag()) { return SparseMatrix::FromDiagPointer( lhs_mat->DiagPtr(), lhs_mat->value() / rhs_mat->value(), lhs_mat->shape()); } std::shared_ptr sorted_lhs, sorted_rhs; torch::Tensor lhs_sorted_perm, rhs_sorted_perm; std::tie(sorted_lhs, lhs_sorted_perm) = COOSort(lhs_mat->COOPtr()); std::tie(sorted_rhs, rhs_sorted_perm) = COOSort(rhs_mat->COOPtr()); TORCH_CHECK( !lhs_mat->HasDuplicate() && !rhs_mat->HasDuplicate(), "Only support SpSpDiv on sparse matrices without duplicate values") TORCH_CHECK( torch::equal(sorted_lhs->indices, sorted_rhs->indices), "Cannot divide two COO matrices with different sparsities."); // This is to make sure the return matrix is in the same order as the lhs_mat auto lhs_sorted_rperm = lhs_sorted_perm.argsort(); auto rhs_perm_on_lhs = rhs_sorted_perm.index_select(0, lhs_sorted_rperm); auto lhs_value = lhs_mat->value(); auto rhs_value = rhs_mat->value().index_select(0, rhs_perm_on_lhs); auto ret_val = lhs_value / rhs_value; return SparseMatrix::FromCOOPointer( lhs_mat->COOPtr(), ret_val, lhs_mat->shape()); } } // namespace sparse } // namespace dgl ================================================ FILE: dgl_sparse/src/matmul.cc ================================================ /** * Copyright (c) 2022 by Contributors * @file matmul.cc * @brief DGL sparse matrix multiplication functions. */ #include "./matmul.h" // clang-format off #include // clang-format on #include #include #include "./utils.h" namespace dgl { namespace sparse { torch::Tensor SpMMNoAutoGrad( const c10::intrusive_ptr& sparse_mat, torch::Tensor sparse_val, torch::Tensor dense_mat, bool transpose_sparse) { const std::string op = "mul"; const std::string reduce = "sum"; const int64_t out_row = transpose_sparse ? sparse_mat->shape()[1] : sparse_mat->shape()[0]; std::vector shape = {out_row, dense_mat.size(1)}; // Batched SpMM if (sparse_val.dim() >= 2) { shape = {out_row, dense_mat.size(1), sparse_val.size(1)}; } auto ret = torch::zeros(shape, dense_mat.options()); auto dgl_sparse_val = TorchTensorToDGLArray(sparse_val); auto dgl_dense_mat = TorchTensorToDGLArray(dense_mat); auto dgl_ret = TorchTensorToDGLArray(ret); if (!transpose_sparse) { // The format for calculation will be chosen in the following order: CSR, // COO. CSR is created if the sparse matrix only has CSC format. if (sparse_mat->HasCSR() || !sparse_mat->HasCOO()) { // sparse_mat->CSRPtr() will implicitly convert CSC to CSR format if CSR // does not exist. auto csr = CSRToOldDGLCSR(sparse_mat->CSRPtr()); aten::CSRSpMM( op.c_str(), reduce.c_str(), csr, dgl_dense_mat, dgl_sparse_val, dgl_ret, {}); } else { // COO // Use the reverse order of aten::COOSpMM because it calculates A^T @ X. auto coo = COOToOldDGLCOO(sparse_mat->COOPtr()); coo = aten::COOTranspose(coo); aten::COOSpMM( op.c_str(), reduce.c_str(), coo, dgl_dense_mat, dgl_sparse_val, dgl_ret, {}); } } else { // transpose_sparse // The format for calculation will be chosen in the following order: CSC, // COO. CSC is created if the sparse matrix only has CSR format. if (sparse_mat->HasCSC() || !sparse_mat->HasCOO()) { // sparse_mat->CSCPtr() will implicitly convert CSR to CSC format if CSR // does not exist. // Use CSC in DGL's CSRSpMM is equivalent as computing A^T @ X. auto csc = CSRToOldDGLCSR(sparse_mat->CSCPtr()); aten::CSRSpMM( op.c_str(), reduce.c_str(), csc, dgl_dense_mat, dgl_sparse_val, dgl_ret, {}); } else { // COO // Use the reverse order of aten::COOSpMM because it calculates A^T @ X. auto coo = COOToOldDGLCOO(sparse_mat->COOPtr()); aten::COOSpMM( op.c_str(), reduce.c_str(), coo, dgl_dense_mat, dgl_sparse_val, dgl_ret, {}); } } return ret; } torch::Tensor SDDMMNoAutoGrad( const c10::intrusive_ptr& sparse_mat, torch::Tensor mat1, torch::Tensor mat2_tr) { const int64_t out_row = sparse_mat->nnz(); std::vector shape({out_row}); // Batched SDDMM if (mat1.dim() >= 3) { shape.push_back(mat1.size(2)); // (N, K, B) -> (N, B, K) mat1 = mat1.transpose(1, 2); // (M, K, B) -> (M, B, K) mat2_tr = mat2_tr.transpose(1, 2); } auto ret = torch::zeros(shape, mat1.options()); const std::string op = "dot"; auto dgl_mat1 = TorchTensorToDGLArray(mat1); auto dgl_mat2_tr = TorchTensorToDGLArray(mat2_tr); auto dgl_ret = TorchTensorToDGLArray(ret); // The format for calculation will be chosen in the following order: CSR, // COO. CSR is created if the sparse matrix only has CSC format. if (sparse_mat->HasCSR() || !sparse_mat->HasCOO()) { // sparse_mat->CSRPtr() will implicitly convert CSC to CSR format if CSR // does not exist. auto csr = CSRToOldDGLCSR(sparse_mat->CSRPtr()); aten::CSRSDDMM( op.c_str(), csr, dgl_mat1, dgl_mat2_tr, dgl_ret, 0 /* Lhs target: u */, 2 /* rhs target: v */); } else { // COO auto coo = COOToOldDGLCOO(sparse_mat->COOPtr()); aten::COOSDDMM( op.c_str(), coo, dgl_mat1, dgl_mat2_tr, dgl_ret, 0 /* Lhs target: u */, 2 /* rhs target: v */); } return ret; } torch::Tensor BroadcastOpNoAutoGrad( const c10::intrusive_ptr& sparse_mat, torch::Tensor dense_mat, const std::string& op, int64_t dim) { auto sparse_val = sparse_mat->value(); const int64_t out_row = sparse_mat->nnz(); const std::vector shape({out_row, sparse_val.size(1)}); auto ret = torch::zeros(shape, sparse_val.options()); auto dgl_sparse_val = TorchTensorToDGLArray(sparse_val); auto dgl_dense_mat = TorchTensorToDGLArray(dense_mat); auto dgl_ret = TorchTensorToDGLArray(ret); // Setting dgl_rhs_target to 0 or 2 means using row or column coordinators // to access dgl_dense_mat for each edge, respectively. auto dgl_rhs_target = dim == 0 ? 2 : 0; // The format for calculation will be chosen in the following order: COO, CSR // . COO is created if the sparse matrix only has CSC format. if (sparse_mat->HasCOO() || !sparse_mat->HasCSR()) { // sparse_mat->COOPtr() will implicitly convert CSC to COO format if COO // does not exist. auto coo = COOToOldDGLCOO(sparse_mat->COOPtr()); aten::COOSDDMM( op.c_str(), coo, dgl_sparse_val, dgl_dense_mat, dgl_ret, 1 /* Lhs target: e */, dgl_rhs_target); } else { auto csr = CSRToOldDGLCSR(sparse_mat->CSRPtr()); aten::CSRSDDMM( op.c_str(), csr, dgl_sparse_val, dgl_dense_mat, dgl_ret, 1 /* Lhs target: e */, dgl_rhs_target); } return ret; } torch::Tensor BroadcastSubNoAutoGrad( const c10::intrusive_ptr& sparse_mat, torch::Tensor dense_mat, int64_t dim) { return BroadcastOpNoAutoGrad(sparse_mat, dense_mat, "sub", dim); } torch::Tensor BroadcastDivNoAutoGrad( const c10::intrusive_ptr& sparse_mat, torch::Tensor dense_mat, int64_t dim) { return BroadcastOpNoAutoGrad(sparse_mat, dense_mat, "div", dim); } torch::Tensor BroadcastMulNoAutoGrad( const c10::intrusive_ptr& sparse_mat, torch::Tensor dense_mat, int64_t dim) { return BroadcastOpNoAutoGrad(sparse_mat, dense_mat, "mul", dim); } c10::intrusive_ptr SpSpMMNoAutoGrad( const c10::intrusive_ptr& lhs_mat, torch::Tensor lhs_val, const c10::intrusive_ptr& rhs_mat, torch::Tensor rhs_val, bool lhs_transpose, bool rhs_transpose) { aten::CSRMatrix lhs_dgl_csr, rhs_dgl_csr; if (!lhs_transpose) { lhs_dgl_csr = CSRToOldDGLCSR(lhs_mat->CSRPtr()); } else { lhs_dgl_csr = CSRToOldDGLCSR(lhs_mat->CSCPtr()); } if (!rhs_transpose) { rhs_dgl_csr = CSRToOldDGLCSR(rhs_mat->CSRPtr()); } else { rhs_dgl_csr = CSRToOldDGLCSR(rhs_mat->CSCPtr()); } auto lhs_dgl_val = TorchTensorToDGLArray(lhs_val); auto rhs_dgl_val = TorchTensorToDGLArray(rhs_val); const int64_t ret_row = lhs_transpose ? lhs_mat->shape()[1] : lhs_mat->shape()[0]; const int64_t ret_col = rhs_transpose ? rhs_mat->shape()[0] : rhs_mat->shape()[1]; std::vector ret_shape({ret_row, ret_col}); aten::CSRMatrix ret_dgl_csr; runtime::NDArray ret_val; std::tie(ret_dgl_csr, ret_val) = aten::CSRMM(lhs_dgl_csr, lhs_dgl_val, rhs_dgl_csr, rhs_dgl_val); return SparseMatrix::FromCSRPointer( CSRFromOldDGLCSR(ret_dgl_csr), DGLArrayToTorchTensor(ret_val), ret_shape); } } // namespace sparse } // namespace dgl ================================================ FILE: dgl_sparse/src/matmul.h ================================================ /** * Copyright (c) 2022 by Contributors * @file matmul.h * @brief DGL sparse matrix multiplication functions. */ #ifndef DGL_SPARSE_MATMUL_H_ #define DGL_SPARSE_MATMUL_H_ #include #include #include namespace dgl { namespace sparse { /** * @brief Perform a matrix multiplication of the sparse matrix and dense * matrix. It uses the sparse formats of `sparse_mat` and non-zero values of * `sparse_val` for SpMM. The `sparse_val` must be 1-dimensional. If the sparse * matrix has shape (n, m), the dense matrix must have shape (m, k). And * the returned dense matrix has shape (n, k). * * This function does not take care of autograd. * * @param sparse_mat The sparse matrix. * @param sparse_val Non-zero values of the sparse matrix. * @param dense_mat The dense matrix. * @param transpose_sparse Whether the sparse_mat is transposed. * * @return Dense tensor. */ torch::Tensor SpMMNoAutoGrad( const c10::intrusive_ptr& sparse_mat, torch::Tensor sparse_val, torch::Tensor dense_mat, bool transpose_sparse); /** * @brief Perform a sampled matrix multiplication of a sparse matrix and two * dense matrices. It calculates `(mat1 @ mat2_tr^T) * spy(A)` and does consider * the values of the sparse matrix. For efficiency, `mat2_tr` is the * transposition of the matrix to be multiplied. If the sparse matrix has shape * (n, m), `mat1` and `mat2_tr` must have shapes of `(n, k)` and `(m, * k)`respectively. And the returned tensor has shape * `(sparse_matrix->nnz(),)`. * * This function does not take care of autograd. * * @param sparse_mat The sparse matrix. * @param mat1 The first dense matrix. * @param mat2_tr Transposition of the second matrix. * * @return Dense tensor. */ torch::Tensor SDDMMNoAutoGrad( const c10::intrusive_ptr& sparse_mat, torch::Tensor mat1, torch::Tensor mat2_tr); /** * @brief Broadcast the dense feature to the nonzero entries and then compute * x_e = \phi(x_e, x_v) on the dimension dim, where x_e is the nonzero value, * x_v is the dense feature, and \phi is add, sub, mul, or div. dim = 0 or 1 * means column-wise or row-wise broadcast respectively. * * This function does not take care of autograd. * * @param sparse_mat The sparse matrix with N rows and (nnz, D) nonzero values * @param dense_mat Dense feature of shape (N, D) * @param op Operator, can be add, sub, mul, or div * @param dim The dimension to broadcast. * * @return Dense tensor of shape (nnz, D) */ torch::Tensor BroadcastOpNoAutoGrad( const c10::intrusive_ptr& sparse_mat, torch::Tensor dense_mat, const std::string& op, int64_t dim); /** * @brief Broadcast the dense feature to the nonzero entries and then compute * x_e = x_e - x_v on the dimension dim, where x_e is the nonzero value, x_v is * the dense feature. dim = 0 or 1 means column-wise or row-wise broadcast * respectively. * * This function does not take care of autograd. * * @param sparse_mat The sparse matrix with N rows and (nnz, D) nonzero values * @param dense_mat Dense feature of shape (N, D) * @param dim The dimension to broadcast. * * @return Dense tensor of shape (nnz, D) */ torch::Tensor BroadcastSubNoAutoGrad( const c10::intrusive_ptr& sparse_mat, torch::Tensor dense_mat, int64_t dim); /** * @brief Broadcast the dense feature to the nonzero entries and then compute * x_e = x_e / x_v on the dimension dim, where x_e is the nonzero value, x_v is * the dense feature. dim = 0 or 1 means column-wise or row-wise broadcast * respectively. * * This function does not take care of autograd. * * @param sparse_mat The sparse matrix with N rows and (nnz, D) nonzero values * @param dense_mat Dense feature of shape (N, D) * @param dim The dimension to broadcast. * * @return Dense tensor of shape (nnz, D) */ torch::Tensor BroadcastDivNoAutoGrad( const c10::intrusive_ptr& sparse_mat, torch::Tensor dense_mat, int64_t dim); /** * @brief Broadcast the dense feature to the nonzero entries and then compute * x_e = x_e * x_v on the dimension dim, where x_e is the nonzero value, x_v is * the dense feature. dim = 0 or 1 means column-wise or row-wise broadcast * respectively. * * This function does not take care of autograd. * * @param sparse_mat The sparse matrix with N rows and (nnz, D) nonzero values * @param dense_mat Dense feature of shape (N, D) * @param dim The dimension to broadcast. * * @return Dense tensor of shape (nnz, D) */ torch::Tensor BroadcastMulNoAutoGrad( const c10::intrusive_ptr& sparse_mat, torch::Tensor dense_mat, int64_t dim); /** * @brief Perform a sparse-sparse matrix multiplication with possibly different * sparsities. The two sparse values must have 1-dimensional values. If the * first sparse matrix has shape (n, m), the second sparse matrix must have * shape (m, k), and the returned sparse matrix has shape (n, k). * * This function does not take care of autograd. * * @param lhs_mat The first sparse matrix of shape (n, m). * @param lhs_val Sparse value for the first sparse matrix. * @param rhs_mat The second sparse matrix of shape (m, k). * @param rhs_val Sparse value for the second sparse matrix. * @param lhs_transpose Whether the first matrix is transposed. * @param rhs_transpose Whether the second matrix is transposed. * * @return Sparse matrix of shape (n, k). */ c10::intrusive_ptr SpSpMMNoAutoGrad( const c10::intrusive_ptr& lhs_mat, torch::Tensor lhs_val, const c10::intrusive_ptr& rhs_mat, torch::Tensor rhs_val, bool lhs_transpose, bool rhs_transpose); } // namespace sparse } // namespace dgl #endif // DGL_SPARSE_MATMUL_H_ ================================================ FILE: dgl_sparse/src/matrix_ops.cc ================================================ /** * Copyright (c) 2023 by Contributors * @file matrix_ops.cc * @brief DGL C++ matrix operators. */ #include #include namespace dgl { namespace sparse { /** * @brief Compute the intersection of two COO matrices. Return the intersection * COO matrix, and the indices of the intersection in the left-hand-side and * right-hand-side COO matrices. * * @param lhs The left-hand-side COO matrix. * @param rhs The right-hand-side COO matrix. * * @return A tuple of COO matrix, lhs indices, and rhs indices. */ std::tuple, torch::Tensor, torch::Tensor> COOIntersection( const std::shared_ptr& lhs, const std::shared_ptr& rhs) { // 1. Encode the two COO matrices into arrays of integers. auto lhs_arr = lhs->indices.index({0}) * lhs->num_cols + lhs->indices.index({1}); auto rhs_arr = rhs->indices.index({0}) * rhs->num_cols + rhs->indices.index({1}); // 2. Concatenate the two arrays. auto arr = torch::cat({lhs_arr, rhs_arr}); // 3. Unique the concatenated array. torch::Tensor unique, inverse, counts; std::tie(unique, inverse, counts) = torch::unique_dim(arr, 0, false, true, true); // 4. Find the indices of the counts greater than 1 in the unique array. auto mask = counts > 1; // 5. Map the inverse array to the original array to generate indices. auto lhs_inverse = inverse.slice(0, 0, lhs_arr.numel()); auto rhs_inverse = inverse.slice(0, lhs_arr.numel(), arr.numel()); auto map_to_original = torch::empty_like(unique); map_to_original.index_put_( {lhs_inverse}, torch::arange(lhs_inverse.numel(), map_to_original.options())); auto lhs_indices = map_to_original.index({mask}); map_to_original.index_put_( {rhs_inverse}, torch::arange(rhs_inverse.numel(), map_to_original.options())); auto rhs_indices = map_to_original.index({mask}); // 6. Decode the indices to get the intersection COO matrix. auto ret_arr = unique.index({mask}); auto ret_indices = torch::stack( {ret_arr.floor_divide(lhs->num_cols), ret_arr % lhs->num_cols}, 0); auto ret_coo = std::make_shared( COO{lhs->num_rows, lhs->num_cols, ret_indices, false, false}); return {ret_coo, lhs_indices, rhs_indices}; } /** @brief Return the reverted mapping of a permutation. */ static torch::Tensor RevertPermutation(const torch::Tensor& perm) { auto rev_tensor = torch::empty_like(perm); rev_tensor.index_put_( {perm}, torch::arange(0, perm.numel(), rev_tensor.options())); return rev_tensor; } /** * @brief Compute the compact indices of row indices and leading indices. Return * the compacted indices and the original row indices of compacted indices. * * @param row The row indices. * @param leading_indices The leading indices. * * @return A tuple of compact indices, original indices. */ static std::tuple CompactIndices( const torch::Tensor& row, const torch::optional& leading_indices) { torch::Tensor sorted, sort_indices, uniqued, unique_reverse_indices, counts; // 1. Sort leading indices and row indices in ascending order. int64_t n_leading_indices = 0; if (leading_indices.has_value()) { n_leading_indices = leading_indices.value().numel(); std::tie(sorted, sort_indices) = torch::cat({leading_indices.value(), row}).sort(); } else { std::tie(sorted, sort_indices) = row.sort(); } // 2. Reverse sort indices. auto sort_rev_indices = RevertPermutation(sort_indices); // 3. Unique the sorted array. std::tie(uniqued, unique_reverse_indices, counts) = torch::unique_consecutive(sorted, true); auto reverse_indices = unique_reverse_indices.index({sort_rev_indices}); auto n_uniqued = uniqued.numel(); // 4. Relabel the indices and map the inverse array to the original array. auto split_indices = torch::full({n_uniqued}, -1, reverse_indices.options()); split_indices.index_put_( {reverse_indices.slice(0, 0, n_leading_indices)}, torch::arange(0, n_leading_indices, split_indices.options())); split_indices.index_put_( {(split_indices == -1).nonzero().view(-1)}, torch::arange(n_leading_indices, n_uniqued, split_indices.options())); // 5. Decode the indices to get the compact indices. auto new_row = split_indices.index({reverse_indices.slice( 0, n_leading_indices, n_leading_indices + row.numel())}); return {new_row, uniqued.index({RevertPermutation(split_indices)})}; } static std::tuple, torch::Tensor> CompactCOO( const c10::intrusive_ptr& mat, int64_t dim, const torch::optional& leading_indices) { torch::Tensor row, col; auto coo = mat->COOTensors(); if (dim == 0) std::tie(row, col) = coo; else std::tie(col, row) = coo; torch::Tensor new_row, uniqued; std::tie(new_row, uniqued) = CompactIndices(row, leading_indices); if (dim == 0) { auto ret = SparseMatrix::FromCOO( torch::stack({new_row, col}, 0), mat->value(), std::vector{uniqued.numel(), mat->shape()[1]}); return {ret, uniqued}; } else { auto ret = SparseMatrix::FromCOO( torch::stack({col, new_row}, 0), mat->value(), std::vector{mat->shape()[0], uniqued.numel()}); return {ret, uniqued}; } } static std::tuple, torch::Tensor> CompactCSR( const c10::intrusive_ptr& mat, int64_t dim, const torch::optional& leading_indices) { std::shared_ptr csr; if (dim == 0) csr = mat->CSCPtr(); else csr = mat->CSRPtr(); torch::Tensor new_indices, uniqued; std::tie(new_indices, uniqued) = CompactIndices(csr->indices, leading_indices); auto ret_value = mat->value(); if (csr->value_indices.has_value()) ret_value = mat->value().index_select(0, csr->value_indices.value()); if (dim == 0) { auto ret = SparseMatrix::FromCSC( csr->indptr, new_indices, ret_value, std::vector{uniqued.numel(), mat->shape()[1]}); return {ret, uniqued}; } else { auto ret = SparseMatrix::FromCSR( csr->indptr, new_indices, ret_value, std::vector{mat->shape()[0], uniqued.numel()}); return {ret, uniqued}; } } std::tuple, torch::Tensor> Compact( const c10::intrusive_ptr& mat, int64_t dim, const torch::optional& leading_indices) { if (mat->HasCOO()) { return CompactCOO(mat, dim, leading_indices); } return CompactCSR(mat, dim, leading_indices); } } // namespace sparse } // namespace dgl ================================================ FILE: dgl_sparse/src/matrix_ops_impl.h ================================================ /** * Copyright (c) 2023 by Contributors * @file matrix_ops_impl.h * @brief DGL C++ sparse matrix operator implementations. */ #ifndef DGL_SPARSE_MATRIX_OPS_IMPL_H_ #define DGL_SPARSE_MATRIX_OPS_IMPL_H_ #include #include #include #include #include "./utils.h" namespace dgl { namespace sparse {} // namespace sparse } // namespace dgl #endif // DGL_SPARSE_MATRIX_OPS_IMPL_H_ ================================================ FILE: dgl_sparse/src/python_binding.cc ================================================ /** * Copyright (c) 2022 by Contributors * @file python_binding.cc * @brief DGL sparse library Python binding. */ // clang-format off #include // clang-format on #include #include #include #include #include #include #include #include #include #include namespace dgl { namespace sparse { TORCH_LIBRARY(dgl_sparse, m) { m.class_("SparseMatrix") .def("val", &SparseMatrix::value) .def("nnz", &SparseMatrix::nnz) .def("device", &SparseMatrix::device) .def("shape", &SparseMatrix::shape) .def("coo", &SparseMatrix::COOTensors) .def("indices", &SparseMatrix::Indices) .def("csr", &SparseMatrix::CSRTensors) .def("csc", &SparseMatrix::CSCTensors) .def("transpose", &SparseMatrix::Transpose) .def("coalesce", &SparseMatrix::Coalesce) .def("has_duplicate", &SparseMatrix::HasDuplicate) .def("is_diag", &SparseMatrix::HasDiag) .def("index_select", &SparseMatrix::IndexSelect) .def("range_select", &SparseMatrix::RangeSelect) .def("sample", &SparseMatrix::Sample); m.def("from_coo", &SparseMatrix::FromCOO) .def("from_csr", &SparseMatrix::FromCSR) .def("from_csc", &SparseMatrix::FromCSC) .def("from_diag", &SparseMatrix::FromDiag) .def("spsp_add", &SpSpAdd) .def("spsp_mul", &SpSpMul) .def("spsp_div", &SpSpDiv) .def("reduce", &Reduce) .def("sum", &ReduceSum) .def("smean", &ReduceMean) .def("smin", &ReduceMin) .def("smax", &ReduceMax) .def("sprod", &ReduceProd) .def("val_like", &SparseMatrix::ValLike) .def("spmm", &SpMM) .def("sddmm", &SDDMM) .def("softmax", &Softmax) .def("spspmm", &SpSpMM) .def("compact", &Compact); } } // namespace sparse } // namespace dgl ================================================ FILE: dgl_sparse/src/reduction.cc ================================================ /** * Copyright (c) 2022 by Contributors * @file reduction.cc * @brief DGL C++ sparse matrix reduction operator implementation. */ // clang-format off #include // clang-format on #include #include #include #include #include #include namespace dgl { namespace sparse { namespace { torch::Tensor ReduceAlong( const c10::intrusive_ptr& A, const std::string& reduce, int64_t dim) { auto value = A->value(); auto coo = A->COOPtr(); std::string reduce_op; if (reduce == "sum") { reduce_op = "sum"; } else if (reduce == "smin") { reduce_op = "amin"; } else if (reduce == "smax") { reduce_op = "amax"; } else if (reduce == "smean") { reduce_op = "mean"; } else if (reduce == "sprod") { reduce_op = "prod"; } else { TORCH_CHECK(false, "unknown reduce function ", reduce); return torch::Tensor(); } // Create the output tensor with shape // // [A.num_rows if dim == 1 else A.num_cols] + A.val.shape[1:] std::vector output_shape = value.sizes().vec(); std::vector view_dims(output_shape.size(), 1); view_dims[0] = -1; torch::Tensor idx; if (dim == 0) { output_shape[0] = coo->num_cols; idx = coo->indices.index({1}).view(view_dims).expand_as(value); } else if (dim == 1) { output_shape[0] = coo->num_rows; idx = coo->indices.index({0}).view(view_dims).expand_as(value); } torch::Tensor out = torch::zeros(output_shape, value.options()); if (dim == 0) { out.scatter_reduce_(0, idx, value, reduce_op, false); } else if (dim == 1) { out.scatter_reduce_(0, idx, value, reduce_op, false); } return out; } torch::Tensor ReduceAll( const c10::intrusive_ptr& A, const std::string& reduce) { if (reduce == "sum") { return A->value().sum(0); } else if (reduce == "smin") { return A->value().amin(0); } else if (reduce == "smax") { return A->value().amax(0); } else if (reduce == "smean") { return A->value().mean(0); } else if (reduce == "sprod") { return A->value().prod(0); } TORCH_CHECK(false, "unknown reduce function ", reduce); return torch::Tensor(); } } // namespace torch::Tensor Reduce( const c10::intrusive_ptr& A, const std::string& reduce, const torch::optional& dim) { return dim.has_value() ? ReduceAlong(A, reduce, dim.value()) : ReduceAll(A, reduce); } } // namespace sparse } // namespace dgl ================================================ FILE: dgl_sparse/src/sddmm.cc ================================================ /** * Copyright (c) 2022 by Contributors * @file sddmm.cc * @brief DGL C++ sparse SDDMM operator implementation. */ #include #include #include #include #include "./matmul.h" #include "./utils.h" namespace dgl { namespace sparse { using namespace torch::autograd; class SDDMMAutoGrad : public Function { public: static torch::Tensor forward( AutogradContext* ctx, const c10::intrusive_ptr& sparse_mat, torch::Tensor mat1, torch::Tensor mat2_tr); static tensor_list backward(AutogradContext* ctx, tensor_list grad_outputs); }; void _SDDMMSanityCheck( const c10::intrusive_ptr& sparse_mat, torch::Tensor mat1, torch::Tensor mat2) { bool shape_check = true; shape_check &= mat1.dim() == mat2.dim(); shape_check &= mat1.dim() <= 3; shape_check &= sparse_mat->shape()[0] == mat1.size(0); if (mat1.dim() == 3) { shape_check &= sparse_mat->shape()[1] == mat2.size(1); shape_check &= mat1.size(2) == mat2.size(2); if (sparse_mat->value().dim() > 1) { shape_check &= sparse_mat->value().size(1) == mat1.size(2); } } else { shape_check &= sparse_mat->shape()[1] == mat2.size(mat2.dim() - 1); } if (mat1.dim() >= 2) { shape_check &= mat1.size(1) == mat2.size(0); } if (!shape_check) { std::stringstream error; error << "SDDMM: Invalid input shapes. sparse_mat: " << c10::IntArrayRef(sparse_mat->shape()) << ", sparse_val: " << sparse_mat->value().sizes() << ", mat1: " << mat1.sizes() << ", mat2: " << mat2.sizes() << ". Valid input shapes (sparse_mat, mat1, mat2) are: (1) (n, m), " "(n, k), and (k, m); (2) (n, m), (n,), and (m,); (3) (n, m, b), " "(n, k, b) and (k, m, b); (4) " "(n, m), (n, k, b), and (k, m, b)."; TORCH_CHECK(false, error.str()); } TORCH_CHECK( mat1.dtype() == mat2.dtype(), "SDDMM: the two dense matrices should have the same dtype."); TORCH_CHECK( mat1.device() == mat2.device() && sparse_mat->device() == mat2.device(), "SDDMM: the two dense matrices and sparse matrix should on the same " "device."); } torch::Tensor SDDMMAutoGrad::forward( AutogradContext* ctx, const c10::intrusive_ptr& sparse_mat, torch::Tensor mat1, torch::Tensor mat2) { auto mat2_tr = mat2.transpose(0, 1); auto ret = SDDMMNoAutoGrad(sparse_mat, mat1, mat2_tr); torch::Tensor cache_mat1, cache_mat2; if (mat1.requires_grad()) { cache_mat2 = mat2; } if (mat2.requires_grad()) { cache_mat1 = mat1; } ctx->save_for_backward({cache_mat1, cache_mat2}); ctx->saved_data["mat1_requires_grad"] = mat1.requires_grad(); ctx->saved_data["mat2_requires_grad"] = mat2.requires_grad(); ctx->saved_data["sparse_mat"] = sparse_mat; return ret; } tensor_list SDDMMAutoGrad::backward( AutogradContext* ctx, tensor_list grad_outputs) { auto saved = ctx->get_saved_variables(); auto mat1 = saved[0]; auto mat2 = saved[1]; auto sparse_mat = ctx->saved_data["sparse_mat"].toCustomClass(); auto grad = grad_outputs[0]; torch::Tensor mat1_grad, mat2_grad; if (ctx->saved_data["mat1_requires_grad"].toBool()) { // SDDMM(M, A, B) = C. dA = SpMM(dC, B^T) mat1_grad = SpMMNoAutoGrad(sparse_mat, grad, mat2.transpose(0, 1), false); } if (ctx->saved_data["mat2_requires_grad"].toBool()) { // SDDMM(M, A, B) = C. dB = SpMM(dC^T, A)^T auto mat2_tr_grad = SpMMNoAutoGrad(sparse_mat, grad, mat1, true); mat2_grad = mat2_tr_grad.transpose(0, 1); } return {torch::Tensor(), mat1_grad, mat2_grad}; } c10::intrusive_ptr SDDMM( const c10::intrusive_ptr& sparse_mat, torch::Tensor mat1, torch::Tensor mat2) { if (mat1.dim() == 1) { mat1 = mat1.view({mat1.size(0), 1}); } if (mat2.dim() == 1) { mat2 = mat2.view({1, mat2.size(0)}); } _SDDMMSanityCheck(sparse_mat, mat1, mat2); auto val = SDDMMAutoGrad::apply(sparse_mat, mat1, mat2); auto sparse_val = sparse_mat->value(); // Broadcast the sparse value in batched SDDMM. if (sparse_val.dim() < val.dim()) { sparse_val = sparse_val.unsqueeze(-1); } val = val * sparse_val; return SparseMatrix::ValLike(sparse_mat, val); } } // namespace sparse } // namespace dgl ================================================ FILE: dgl_sparse/src/softmax.cc ================================================ /** * Copyright (c) 2022 by Contributors * @file softmax.cc * @brief DGL C++ Softmax operator implementation */ #include #include #include #include "./matmul.h" #include "./utils.h" namespace dgl { namespace sparse { using namespace torch::autograd; class SoftmaxAutoGrad : public Function { public: static torch::Tensor forward( AutogradContext* ctx, c10::intrusive_ptr sparse_mat, torch::Tensor sparse_val, int64_t dim); static tensor_list backward(AutogradContext* ctx, tensor_list grad_outputs); }; torch::Tensor SoftmaxAutoGrad::forward( AutogradContext* ctx, c10::intrusive_ptr sparse_mat, torch::Tensor sparse_val, int64_t dim) { // Reduce by columns with dim 1. auto sparse_val_max = ReduceMax(sparse_mat, dim); auto sparse_val_exp = BroadcastSubNoAutoGrad(sparse_mat, sparse_val_max, dim).exp(); auto sparse_val_sum = ReduceSum(SparseMatrix::ValLike(sparse_mat, sparse_val_exp), dim); auto sparse_score = BroadcastDivNoAutoGrad( SparseMatrix::ValLike(sparse_mat, sparse_val_exp), sparse_val_sum, dim); const bool sparse_requires_grad = sparse_val.requires_grad(); torch::Tensor cache_sparse_score; if (sparse_requires_grad) { cache_sparse_score = sparse_score; } ctx->saved_data["sparse_matrix"] = sparse_mat; ctx->saved_data["sparse_requires_grad"] = sparse_requires_grad; ctx->saved_data["dim"] = dim; ctx->save_for_backward({cache_sparse_score}); return sparse_score; } tensor_list SoftmaxAutoGrad::backward( AutogradContext* ctx, tensor_list grad_outputs) { auto saved = ctx->get_saved_variables(); auto sparse_score = saved[0]; auto output_grad = grad_outputs[0]; auto sparse_mat = ctx->saved_data["sparse_matrix"].toCustomClass(); const bool sparse_requires_grad = ctx->saved_data["sparse_requires_grad"].toBool(); const int64_t dim = ctx->saved_data["dim"].toInt(); torch::Tensor sparse_val_grad; if (sparse_requires_grad) { auto sds = sparse_score * output_grad; auto accum = ReduceSum(SparseMatrix::ValLike(sparse_mat, sds), dim); sparse_val_grad = sds - BroadcastMulNoAutoGrad( SparseMatrix::ValLike(sparse_mat, sparse_score), accum, dim); } return {torch::Tensor(), sparse_val_grad, torch::Tensor()}; } c10::intrusive_ptr Softmax( const c10::intrusive_ptr& sparse_mat, int64_t dim) { auto sparse_val = sparse_mat->value(); bool expand_dim = false; auto new_sparse_mat = sparse_mat; if (sparse_val.dim() == 1) { sparse_val = sparse_val.view({-1, 1}); expand_dim = true; new_sparse_mat = SparseMatrix::ValLike(sparse_mat, sparse_val); } auto new_sparse_val = SoftmaxAutoGrad::apply(new_sparse_mat, sparse_val, dim); if (expand_dim) { new_sparse_val = new_sparse_val.view(-1); } return SparseMatrix::ValLike(sparse_mat, new_sparse_val); } } // namespace sparse } // namespace dgl ================================================ FILE: dgl_sparse/src/sparse_format.cc ================================================ /** * Copyright (c) 2022 by Contributors * @file sparse_format.cc * @brief DGL C++ sparse format implementations. */ // clang-format off #include // clang-format on #include #include "./utils.h" namespace dgl { namespace sparse { std::shared_ptr COOFromOldDGLCOO(const aten::COOMatrix& dgl_coo) { auto row = DGLArrayToTorchTensor(dgl_coo.row); auto col = DGLArrayToTorchTensor(dgl_coo.col); TORCH_CHECK(aten::IsNullArray(dgl_coo.data)); auto indices = torch::stack({row, col}); return std::make_shared( COO{dgl_coo.num_rows, dgl_coo.num_cols, indices, dgl_coo.row_sorted, dgl_coo.col_sorted}); } aten::COOMatrix COOToOldDGLCOO(const std::shared_ptr& coo) { auto row = TorchTensorToDGLArray(coo->indices.index({0})); auto col = TorchTensorToDGLArray(coo->indices.index({1})); return aten::COOMatrix( coo->num_rows, coo->num_cols, row, col, aten::NullArray(), coo->row_sorted, coo->col_sorted); } std::shared_ptr CSRFromOldDGLCSR(const aten::CSRMatrix& dgl_csr) { auto indptr = DGLArrayToTorchTensor(dgl_csr.indptr); auto indices = DGLArrayToTorchTensor(dgl_csr.indices); auto value_indices = DGLArrayToOptionalTorchTensor(dgl_csr.data); return std::make_shared( CSR{dgl_csr.num_rows, dgl_csr.num_cols, indptr, indices, value_indices, dgl_csr.sorted}); } aten::CSRMatrix CSRToOldDGLCSR(const std::shared_ptr& csr) { auto indptr = TorchTensorToDGLArray(csr->indptr); auto indices = TorchTensorToDGLArray(csr->indices); auto data = OptionalTorchTensorToDGLArray(csr->value_indices); return aten::CSRMatrix( csr->num_rows, csr->num_cols, indptr, indices, data, csr->sorted); } torch::Tensor COOToTorchCOO( const std::shared_ptr& coo, torch::Tensor value) { torch::Tensor indices = coo->indices; if (value.ndimension() == 2) { return torch::sparse_coo_tensor( indices, value, {coo->num_rows, coo->num_cols, value.size(1)}); } else { return torch::sparse_coo_tensor( indices, value, {coo->num_rows, coo->num_cols}); } } std::shared_ptr CSRToCOO(const std::shared_ptr& csr) { auto dgl_csr = CSRToOldDGLCSR(csr); auto dgl_coo = aten::CSRToCOO(dgl_csr, csr->value_indices.has_value()); return COOFromOldDGLCOO(dgl_coo); } std::shared_ptr CSCToCOO(const std::shared_ptr& csc) { auto dgl_csc = CSRToOldDGLCSR(csc); auto dgl_coo = aten::CSRToCOO(dgl_csc, csc->value_indices.has_value()); dgl_coo = aten::COOTranspose(dgl_coo); return COOFromOldDGLCOO(dgl_coo); } std::shared_ptr COOToCSR(const std::shared_ptr& coo) { auto dgl_coo = COOToOldDGLCOO(coo); auto dgl_csr = aten::COOToCSR(dgl_coo); return CSRFromOldDGLCSR(dgl_csr); } std::shared_ptr CSCToCSR(const std::shared_ptr& csc) { auto dgl_csc = CSRToOldDGLCSR(csc); auto dgl_csr = aten::CSRTranspose(dgl_csc); return CSRFromOldDGLCSR(dgl_csr); } std::shared_ptr COOToCSC(const std::shared_ptr& coo) { auto dgl_coo = COOToOldDGLCOO(coo); auto dgl_coo_transpose = aten::COOTranspose(dgl_coo); auto dgl_csc = aten::COOToCSR(dgl_coo_transpose); return CSRFromOldDGLCSR(dgl_csc); } std::shared_ptr CSRToCSC(const std::shared_ptr& csr) { auto dgl_csr = CSRToOldDGLCSR(csr); auto dgl_csc = aten::CSRTranspose(dgl_csr); return CSRFromOldDGLCSR(dgl_csc); } std::shared_ptr DiagToCOO( const std::shared_ptr& diag, const c10::TensorOptions& indices_options) { int64_t nnz = std::min(diag->num_rows, diag->num_cols); auto indices = torch::arange(nnz, indices_options).repeat({2, 1}); return std::make_shared( COO{diag->num_rows, diag->num_cols, indices, true, true}); } std::shared_ptr DiagToCSR( const std::shared_ptr& diag, const c10::TensorOptions& indices_options) { int64_t nnz = std::min(diag->num_rows, diag->num_cols); auto indptr = torch::full(diag->num_rows + 1, nnz, indices_options); auto nnz_range = torch::arange(nnz + 1, indices_options); indptr.index_put_({nnz_range}, nnz_range); auto indices = torch::arange(nnz, indices_options); return std::make_shared( CSR{diag->num_rows, diag->num_cols, indptr, indices, torch::optional(), true}); } std::shared_ptr DiagToCSC( const std::shared_ptr& diag, const c10::TensorOptions& indices_options) { int64_t nnz = std::min(diag->num_rows, diag->num_cols); auto indptr = torch::full(diag->num_cols + 1, nnz, indices_options); auto nnz_range = torch::arange(nnz + 1, indices_options); indptr.index_put_({nnz_range}, nnz_range); auto indices = torch::arange(nnz, indices_options); return std::make_shared( CSR{diag->num_cols, diag->num_rows, indptr, indices, torch::optional(), true}); } std::shared_ptr COOTranspose(const std::shared_ptr& coo) { auto dgl_coo = COOToOldDGLCOO(coo); auto dgl_coo_tr = aten::COOTranspose(dgl_coo); return COOFromOldDGLCOO(dgl_coo_tr); } std::pair, torch::Tensor> COOSort( const std::shared_ptr& coo) { auto encoded_coo = coo->indices.index({0}) * coo->num_cols + coo->indices.index({1}); torch::Tensor sorted, perm; std::tie(sorted, perm) = encoded_coo.sort(); auto sorted_coo = std::make_shared( COO{coo->num_rows, coo->num_cols, coo->indices.index_select(1, perm), true, true}); return {sorted_coo, perm}; } } // namespace sparse } // namespace dgl ================================================ FILE: dgl_sparse/src/sparse_matrix.cc ================================================ /** * Copyright (c) 2022 by Contributors * @file sparse_matrix.cc * @brief DGL C++ sparse matrix implementations. */ // clang-format off #include // clang-format on #include #include #include #include #include "./utils.h" namespace dgl { namespace sparse { SparseMatrix::SparseMatrix( const std::shared_ptr& coo, const std::shared_ptr& csr, const std::shared_ptr& csc, const std::shared_ptr& diag, torch::Tensor value, const std::vector& shape) : coo_(coo), csr_(csr), csc_(csc), diag_(diag), value_(value), shape_(shape) { TORCH_CHECK( coo != nullptr || csr != nullptr || csc != nullptr || diag != nullptr, "At least one of CSR/COO/CSC/Diag is required to construct a " "SparseMatrix.") TORCH_CHECK( shape.size() == 2, "The shape of a sparse matrix should be ", "2-dimensional."); // NOTE: Currently all the tensors of a SparseMatrix should on the same // device. Do we allow the graph structure and values are on different // devices? if (coo != nullptr) { TORCH_CHECK(coo->indices.dim() == 2); TORCH_CHECK(coo->indices.size(0) == 2); TORCH_CHECK(coo->indices.size(1) == value.size(0)); TORCH_CHECK(coo->indices.device() == value.device()); } if (csr != nullptr) { TORCH_CHECK(csr->indptr.dim() == 1); TORCH_CHECK(csr->indices.dim() == 1); TORCH_CHECK(csr->indptr.size(0) == shape[0] + 1); TORCH_CHECK(csr->indices.size(0) == value.size(0)); TORCH_CHECK(csr->indptr.device() == value.device()); TORCH_CHECK(csr->indices.device() == value.device()); } if (csc != nullptr) { TORCH_CHECK(csc->indptr.dim() == 1); TORCH_CHECK(csc->indices.dim() == 1); TORCH_CHECK(csc->indptr.size(0) == shape[1] + 1); TORCH_CHECK(csc->indices.size(0) == value.size(0)); TORCH_CHECK(csc->indptr.device() == value.device()); TORCH_CHECK(csc->indices.device() == value.device()); } if (diag != nullptr) { TORCH_CHECK(value.size(0) == std::min(diag->num_rows, diag->num_cols)); } } c10::intrusive_ptr SparseMatrix::FromCOOPointer( const std::shared_ptr& coo, torch::Tensor value, const std::vector& shape) { return c10::make_intrusive( coo, nullptr, nullptr, nullptr, value, shape); } c10::intrusive_ptr SparseMatrix::FromCSRPointer( const std::shared_ptr& csr, torch::Tensor value, const std::vector& shape) { return c10::make_intrusive( nullptr, csr, nullptr, nullptr, value, shape); } c10::intrusive_ptr SparseMatrix::FromCSCPointer( const std::shared_ptr& csc, torch::Tensor value, const std::vector& shape) { return c10::make_intrusive( nullptr, nullptr, csc, nullptr, value, shape); } c10::intrusive_ptr SparseMatrix::FromDiagPointer( const std::shared_ptr& diag, torch::Tensor value, const std::vector& shape) { return c10::make_intrusive( nullptr, nullptr, nullptr, diag, value, shape); } c10::intrusive_ptr SparseMatrix::FromCOO( torch::Tensor indices, torch::Tensor value, const std::vector& shape) { auto coo = std::make_shared(COO{shape[0], shape[1], indices, false, false}); return SparseMatrix::FromCOOPointer(coo, value, shape); } c10::intrusive_ptr SparseMatrix::FromCSR( torch::Tensor indptr, torch::Tensor indices, torch::Tensor value, const std::vector& shape) { auto csr = std::make_shared( CSR{shape[0], shape[1], indptr, indices, torch::optional(), false}); return SparseMatrix::FromCSRPointer(csr, value, shape); } c10::intrusive_ptr SparseMatrix::FromCSC( torch::Tensor indptr, torch::Tensor indices, torch::Tensor value, const std::vector& shape) { auto csc = std::make_shared( CSR{shape[1], shape[0], indptr, indices, torch::optional(), false}); return SparseMatrix::FromCSCPointer(csc, value, shape); } c10::intrusive_ptr SparseMatrix::FromDiag( torch::Tensor value, const std::vector& shape) { auto diag = std::make_shared(Diag{shape[0], shape[1]}); return SparseMatrix::FromDiagPointer(diag, value, shape); } c10::intrusive_ptr SparseMatrix::IndexSelect( int64_t dim, torch::Tensor ids) { auto id_array = TorchTensorToDGLArray(ids); bool rowwise = dim == 0; auto csr = rowwise ? this->CSRPtr() : this->CSCPtr(); auto slice_csr = dgl::aten::CSRSliceRows(CSRToOldDGLCSR(csr), id_array); auto slice_value = this->value().index_select(0, DGLArrayToTorchTensor(slice_csr.data)); // To prevent potential errors in future conversions to the COO format, // where this array might be used as an initialization array for // constructing COO representations, it is necessary to clear this array. slice_csr.data = dgl::aten::NullArray(); auto ret = CSRFromOldDGLCSR(slice_csr); if (rowwise) { return SparseMatrix::FromCSRPointer( ret, slice_value, {ret->num_rows, ret->num_cols}); } else { return SparseMatrix::FromCSCPointer( ret, slice_value, {ret->num_cols, ret->num_rows}); } } c10::intrusive_ptr SparseMatrix::RangeSelect( int64_t dim, int64_t start, int64_t end) { bool rowwise = dim == 0; auto csr = rowwise ? this->CSRPtr() : this->CSCPtr(); auto slice_csr = dgl::aten::CSRSliceRows(CSRToOldDGLCSR(csr), start, end); auto slice_value = this->value().index_select(0, DGLArrayToTorchTensor(slice_csr.data)); // To prevent potential errors in future conversions to the COO format, // where this array might be used as an initialization array for // constructing COO representations, it is necessary to clear this array. slice_csr.data = dgl::aten::NullArray(); auto ret = CSRFromOldDGLCSR(slice_csr); if (rowwise) { return SparseMatrix::FromCSRPointer( ret, slice_value, {ret->num_rows, ret->num_cols}); } else { return SparseMatrix::FromCSCPointer( ret, slice_value, {ret->num_cols, ret->num_rows}); } } c10::intrusive_ptr SparseMatrix::Sample( int64_t dim, int64_t fanout, torch::Tensor ids, bool replace, bool bias) { bool rowwise = dim == 0; auto id_array = TorchTensorToDGLArray(ids); auto csr = rowwise ? this->CSRPtr() : this->CSCPtr(); // Slicing matrix. auto slice_csr = dgl::aten::CSRSliceRows(CSRToOldDGLCSR(csr), id_array); auto slice_value = this->value().index_select(0, DGLArrayToTorchTensor(slice_csr.data)); // Reset value indices. slice_csr.data = dgl::aten::NullArray(); auto prob = bias ? TorchTensorToDGLArray(slice_value) : dgl::aten::NullArray(); auto slice_id = dgl::aten::Range(0, id_array.NumElements(), 64, id_array->ctx); // Sampling all rows on sliced matrix. auto sample_coo = dgl::aten::CSRRowWiseSampling(slice_csr, slice_id, fanout, prob, replace); auto sample_value = slice_value.index_select(0, DGLArrayToTorchTensor(sample_coo.data)); sample_coo.data = dgl::aten::NullArray(); auto ret = COOFromOldDGLCOO(sample_coo); if (!rowwise) ret = COOTranspose(ret); return SparseMatrix::FromCOOPointer( ret, sample_value, {ret->num_rows, ret->num_cols}); } c10::intrusive_ptr SparseMatrix::ValLike( const c10::intrusive_ptr& mat, torch::Tensor value) { TORCH_CHECK( mat->value().size(0) == value.size(0), "The first dimension of ", "the old values and the new values must be the same."); TORCH_CHECK( mat->value().device() == value.device(), "The device of the ", "old values and the new values must be the same."); const auto& shape = mat->shape(); if (mat->HasDiag()) { return SparseMatrix::FromDiagPointer(mat->DiagPtr(), value, shape); } if (mat->HasCOO()) { return SparseMatrix::FromCOOPointer(mat->COOPtr(), value, shape); } if (mat->HasCSR()) { return SparseMatrix::FromCSRPointer(mat->CSRPtr(), value, shape); } TORCH_CHECK(mat->HasCSC(), "Invalid sparse format for ValLike.") return SparseMatrix::FromCSCPointer(mat->CSCPtr(), value, shape); } std::shared_ptr SparseMatrix::COOPtr() { if (coo_ == nullptr) { _CreateCOO(); } return coo_; } std::shared_ptr SparseMatrix::CSRPtr() { if (csr_ == nullptr) { _CreateCSR(); } return csr_; } std::shared_ptr SparseMatrix::CSCPtr() { if (csc_ == nullptr) { _CreateCSC(); } return csc_; } std::shared_ptr SparseMatrix::DiagPtr() { TORCH_CHECK( diag_ != nullptr, "Cannot get Diag sparse format from a non-diagonal sparse matrix"); return diag_; } std::tuple SparseMatrix::COOTensors() { auto coo = COOPtr(); return std::make_tuple(coo->indices.index({0}), coo->indices.index({1})); } torch::Tensor SparseMatrix::Indices() { auto coo = COOPtr(); return coo->indices; } std::tuple> SparseMatrix::CSRTensors() { auto csr = CSRPtr(); auto val = value(); return std::make_tuple(csr->indptr, csr->indices, csr->value_indices); } std::tuple> SparseMatrix::CSCTensors() { auto csc = CSCPtr(); return std::make_tuple(csc->indptr, csc->indices, csc->value_indices); } c10::intrusive_ptr SparseMatrix::Transpose() const { auto shape = shape_; std::swap(shape[0], shape[1]); auto value = value_; if (HasDiag()) { return SparseMatrix::FromDiag(value, shape); } else if (HasCOO()) { auto coo = COOTranspose(coo_); return SparseMatrix::FromCOOPointer(coo, value, shape); } else if (HasCSR()) { return SparseMatrix::FromCSCPointer(csr_, value, shape); } else { return SparseMatrix::FromCSRPointer(csc_, value, shape); } } void SparseMatrix::_CreateCOO() { if (HasCOO()) return; if (HasDiag()) { auto indices_options = torch::TensorOptions() .dtype(torch::kInt64) .layout(torch::kStrided) .device(this->device()); coo_ = DiagToCOO(diag_, indices_options); } else if (HasCSR()) { coo_ = CSRToCOO(csr_); } else if (HasCSC()) { coo_ = CSCToCOO(csc_); } else { LOG(FATAL) << "SparseMatrix does not have any sparse format"; } } void SparseMatrix::_CreateCSR() { if (HasCSR()) return; if (HasDiag()) { auto indices_options = torch::TensorOptions() .dtype(torch::kInt64) .layout(torch::kStrided) .device(this->device()); csr_ = DiagToCSR(diag_, indices_options); } else if (HasCOO()) { csr_ = COOToCSR(coo_); } else if (HasCSC()) { csr_ = CSCToCSR(csc_); } else { LOG(FATAL) << "SparseMatrix does not have any sparse format"; } } void SparseMatrix::_CreateCSC() { if (HasCSC()) return; if (HasDiag()) { auto indices_options = torch::TensorOptions() .dtype(torch::kInt64) .layout(torch::kStrided) .device(this->device()); csc_ = DiagToCSC(diag_, indices_options); } else if (HasCOO()) { csc_ = COOToCSC(coo_); } else if (HasCSR()) { csc_ = CSRToCSC(csr_); } else { LOG(FATAL) << "SparseMatrix does not have any sparse format"; } } } // namespace sparse } // namespace dgl ================================================ FILE: dgl_sparse/src/sparse_matrix_coalesce.cc ================================================ /** * Copyright (c) 2022 by Contributors * @file sparse_matrix_coalesce.cc * @brief Operators related to sparse matrix coalescing. */ // clang-format off #include // clang-format on #include #include "./utils.h" namespace dgl { namespace sparse { c10::intrusive_ptr SparseMatrix::Coalesce() { auto torch_coo = COOToTorchCOO(this->COOPtr(), this->value()); auto coalesced_coo = torch_coo.coalesce(); return SparseMatrix::FromCOO( coalesced_coo.indices(), coalesced_coo.values(), this->shape()); } bool SparseMatrix::HasDuplicate() { aten::CSRMatrix dgl_csr; if (HasDiag()) { return false; } // The format for calculation will be chosen in the following order: CSR, // CSC. CSR is created if the sparse matrix only has CSC format. if (HasCSR() || !HasCSC()) { dgl_csr = CSRToOldDGLCSR(CSRPtr()); } else { dgl_csr = CSRToOldDGLCSR(CSCPtr()); } return aten::CSRHasDuplicate(dgl_csr); } } // namespace sparse } // namespace dgl ================================================ FILE: dgl_sparse/src/spmm.cc ================================================ /** * Copyright (c) 2022 by Contributors * @file spmm.cc * @brief DGL C++ sparse SpMM operator implementation. */ #include #include #include #include #include #include "./matmul.h" #include "./utils.h" namespace dgl { namespace sparse { using namespace torch::autograd; class SpMMAutoGrad : public Function { public: static torch::Tensor forward( AutogradContext* ctx, c10::intrusive_ptr sparse_mat, torch::Tensor sparse_val, torch::Tensor dense_mat); static tensor_list backward(AutogradContext* ctx, tensor_list grad_outputs); }; void _SpMMSanityCheck( c10::intrusive_ptr sparse_mat, torch::Tensor sparse_val, torch::Tensor dense_mat) { const auto& sparse_mat_shape = sparse_mat->shape(); auto val_shape = sparse_val.sizes(); auto dense_shape = dense_mat.sizes(); bool shape_check = true; shape_check &= sparse_mat_shape[1] == dense_shape[0]; shape_check &= val_shape.size() <= 2; shape_check &= val_shape[0] == sparse_mat->nnz(); shape_check &= dense_shape.size() <= 3; if (dense_shape.size() == 3 || val_shape.size() == 2) { shape_check &= dense_shape.size() == val_shape.size() + 1; shape_check &= dense_shape[2] == val_shape[1]; } if (!shape_check) { std::stringstream error; error << "SpMM: Invalid input shapes. sparse_mat: " << c10::IntArrayRef(sparse_mat->shape()) << ", sparse_val: " << sparse_mat->value().sizes() << ", dense_mat: " << dense_mat.sizes() << ". Valid input shapes (sparse_mat, dense_mat) are: (1) (n, m) and " "(m, k); (2) (n, m) and (m,); (3) (n, m, b) and (m, k, b)."; TORCH_CHECK(false, error.str()); } TORCH_CHECK( sparse_val.dtype() == dense_mat.dtype(), "SpMM: the non-zero values does not have the same dtype as the dense " "matrix."); TORCH_CHECK( sparse_val.device() == sparse_mat->device() && sparse_val.device() == dense_mat.device(), "SpMM: sparse matrix, non-zero values and the dense matrix should be " "on the same device."); } torch::Tensor SpMMAutoGrad::forward( AutogradContext* ctx, c10::intrusive_ptr sparse_mat, torch::Tensor sparse_val, torch::Tensor dense_mat) { auto ret = SpMMNoAutoGrad(sparse_mat, sparse_val, dense_mat, false); const bool sparse_requires_grad = sparse_val.requires_grad(); const bool dense_requires_grad = dense_mat.requires_grad(); torch::Tensor cache_sparse_val, cache_dense_mat; if (dense_requires_grad) { cache_sparse_val = sparse_val; } if (sparse_requires_grad) { cache_dense_mat = dense_mat; } ctx->saved_data["sparse_matrix"] = sparse_mat; ctx->saved_data["sparse_requires_grad"] = sparse_requires_grad; ctx->saved_data["dense_requires_grad"] = dense_requires_grad; ctx->save_for_backward({cache_sparse_val, cache_dense_mat}); return ret; } tensor_list SpMMAutoGrad::backward( AutogradContext* ctx, tensor_list grad_outputs) { auto saved = ctx->get_saved_variables(); auto sparse_val = saved[0]; auto dense_mat = saved[1]; auto output_grad = grad_outputs[0]; auto sparse_mat = ctx->saved_data["sparse_matrix"].toCustomClass(); const bool sparse_requires_grad = ctx->saved_data["sparse_requires_grad"].toBool(); const bool dense_requires_grad = ctx->saved_data["dense_requires_grad"].toBool(); torch::Tensor dense_mat_grad, sparse_val_grad; if (sparse_requires_grad) { // A @ B = C -> dA = dC @ (B^T) sparse_val_grad = SDDMMNoAutoGrad(sparse_mat, output_grad, dense_mat); } if (dense_requires_grad) { // A @ B = C -> dB = (A^T) @ dC dense_mat_grad = SpMMNoAutoGrad(sparse_mat, sparse_val, output_grad, true); } return {torch::Tensor(), sparse_val_grad, dense_mat_grad}; } torch::Tensor SpMM( const c10::intrusive_ptr& sparse_mat, torch::Tensor dense_mat) { _SpMMSanityCheck(sparse_mat, sparse_mat->value(), dense_mat); bool expand_dim = false; if (dense_mat.dim() == 1) { dense_mat = dense_mat.view({-1, 1}); expand_dim = true; } auto ret = SpMMAutoGrad::apply(sparse_mat, sparse_mat->value(), dense_mat); if (expand_dim) { ret = ret.view(-1); } return ret; } } // namespace sparse } // namespace dgl ================================================ FILE: dgl_sparse/src/spspmm.cc ================================================ /** * Copyright (c) 2022 by Contributors * @file spspmm.cc * @brief DGL C++ sparse SpSpMM operator implementation. */ #include #include #include #include #include "./matmul.h" #include "./utils.h" namespace dgl { namespace sparse { using namespace torch::autograd; class SpSpMMAutoGrad : public Function { public: static variable_list forward( AutogradContext* ctx, c10::intrusive_ptr lhs_mat, torch::Tensor lhs_val, c10::intrusive_ptr rhs_mat, torch::Tensor rhs_val); static tensor_list backward(AutogradContext* ctx, tensor_list grad_outputs); }; void _SpSpMMSanityCheck( const c10::intrusive_ptr& lhs_mat, const c10::intrusive_ptr& rhs_mat) { const auto& lhs_shape = lhs_mat->shape(); const auto& rhs_shape = rhs_mat->shape(); TORCH_CHECK( lhs_shape[1] == rhs_shape[0], "SpSpMM: the second dim of lhs_mat should be equal to the first dim ", "of the second matrix"); TORCH_CHECK( lhs_mat->value().dim() == 1, "SpSpMM: the value shape of lhs_mat should be 1-D"); TORCH_CHECK( rhs_mat->value().dim() == 1, "SpSpMM: the value shape of rhs_mat should be 1-D"); TORCH_CHECK( lhs_mat->device() == rhs_mat->device(), "SpSpMM: lhs_mat and rhs_mat should be on the same device"); TORCH_CHECK( lhs_mat->dtype() == rhs_mat->dtype(), "SpSpMM: lhs_mat and rhs_mat should have the same dtype"); TORCH_CHECK( !lhs_mat->HasDuplicate(), "SpSpMM does not support lhs_mat with duplicate indices. ", "Call A = A.coalesce() to dedup first."); TORCH_CHECK( !rhs_mat->HasDuplicate(), "SpSpMM does not support rhs_mat with duplicate indices. ", "Call A = A.coalesce() to dedup first."); } // Mask select value of `mat` by `sub_mat`. torch::Tensor _CSRMask( const c10::intrusive_ptr& mat, torch::Tensor value, const c10::intrusive_ptr& sub_mat) { auto csr = CSRToOldDGLCSR(mat->CSRPtr()); auto val = TorchTensorToDGLArray(value); auto row = TorchTensorToDGLArray(sub_mat->COOPtr()->indices.index({0})); auto col = TorchTensorToDGLArray(sub_mat->COOPtr()->indices.index({1})); runtime::NDArray ret = aten::CSRGetFloatingData(csr, row, col, val, 0.); return DGLArrayToTorchTensor(ret); } variable_list SpSpMMAutoGrad::forward( AutogradContext* ctx, c10::intrusive_ptr lhs_mat, torch::Tensor lhs_val, c10::intrusive_ptr rhs_mat, torch::Tensor rhs_val) { auto ret_mat = SpSpMMNoAutoGrad(lhs_mat, lhs_val, rhs_mat, rhs_val, false, false); ctx->saved_data["lhs_mat"] = lhs_mat; ctx->saved_data["rhs_mat"] = rhs_mat; ctx->saved_data["ret_mat"] = ret_mat; ctx->saved_data["lhs_require_grad"] = lhs_val.requires_grad(); ctx->saved_data["rhs_require_grad"] = rhs_val.requires_grad(); ctx->save_for_backward({lhs_val, rhs_val}); auto csr = ret_mat->CSRPtr(); auto val = ret_mat->value(); TORCH_CHECK(!csr->value_indices.has_value()); return {csr->indptr, csr->indices, val}; } tensor_list SpSpMMAutoGrad::backward( AutogradContext* ctx, tensor_list grad_outputs) { auto saved = ctx->get_saved_variables(); auto lhs_val = saved[0]; auto rhs_val = saved[1]; auto output_grad = grad_outputs[2]; auto lhs_mat = ctx->saved_data["lhs_mat"].toCustomClass(); auto rhs_mat = ctx->saved_data["rhs_mat"].toCustomClass(); auto ret_mat = ctx->saved_data["ret_mat"].toCustomClass(); torch::Tensor lhs_val_grad, rhs_val_grad; if (ctx->saved_data["lhs_require_grad"].toBool()) { // A @ B = C -> dA = dC @ (B^T) auto lhs_mat_grad = SpSpMMNoAutoGrad(ret_mat, output_grad, rhs_mat, rhs_val, false, true); lhs_val_grad = _CSRMask(lhs_mat_grad, lhs_mat_grad->value(), lhs_mat); } if (ctx->saved_data["rhs_require_grad"].toBool()) { // A @ B = C -> dB = (A^T) @ dC auto rhs_mat_grad = SpSpMMNoAutoGrad(lhs_mat, lhs_val, ret_mat, output_grad, true, false); rhs_val_grad = _CSRMask(rhs_mat_grad, rhs_mat_grad->value(), rhs_mat); } return {torch::Tensor(), lhs_val_grad, torch::Tensor(), rhs_val_grad}; } c10::intrusive_ptr DiagSpSpMM( const c10::intrusive_ptr& lhs_mat, const c10::intrusive_ptr& rhs_mat) { if (lhs_mat->HasDiag() && rhs_mat->HasDiag()) { // Diag @ Diag const int64_t m = lhs_mat->shape()[0]; const int64_t n = lhs_mat->shape()[1]; const int64_t p = rhs_mat->shape()[1]; const int64_t common_diag_len = std::min({m, n, p}); const int64_t new_diag_len = std::min(m, p); auto slice = torch::indexing::Slice(0, common_diag_len); auto new_val = lhs_mat->value().index({slice}) * rhs_mat->value().index({slice}); new_val = torch::constant_pad_nd(new_val, {0, new_diag_len - common_diag_len}, 0); return SparseMatrix::FromDiag(new_val, {m, p}); } if (lhs_mat->HasDiag() && !rhs_mat->HasDiag()) { // Diag @ Sparse auto row = rhs_mat->Indices().index({0}); auto val = lhs_mat->value().index_select(0, row) * rhs_mat->value(); return SparseMatrix::ValLike(rhs_mat, val); } if (!lhs_mat->HasDiag() && rhs_mat->HasDiag()) { // Sparse @ Diag auto col = lhs_mat->Indices().index({1}); auto val = rhs_mat->value().index_select(0, col) * lhs_mat->value(); return SparseMatrix::ValLike(lhs_mat, val); } TORCH_CHECK( false, "For DiagSpSpMM, at least one of the sparse matries need to have kDiag " "format"); return c10::intrusive_ptr(); } c10::intrusive_ptr SpSpMM( const c10::intrusive_ptr& lhs_mat, const c10::intrusive_ptr& rhs_mat) { _SpSpMMSanityCheck(lhs_mat, rhs_mat); if (lhs_mat->HasDiag() || rhs_mat->HasDiag()) { return DiagSpSpMM(lhs_mat, rhs_mat); } auto results = SpSpMMAutoGrad::apply( lhs_mat, lhs_mat->value(), rhs_mat, rhs_mat->value()); std::vector ret_shape({lhs_mat->shape()[0], rhs_mat->shape()[1]}); auto indptr = results[0]; auto indices = results[1]; auto value = results[2]; return SparseMatrix::FromCSR(indptr, indices, value, ret_shape); } } // namespace sparse } // namespace dgl ================================================ FILE: dgl_sparse/src/utils.h ================================================ /** * Copyright (c) 2022 by Contributors * @file utils.h * @brief DGL C++ sparse API utilities */ #ifndef DGL_SPARSE_UTILS_H_ #define DGL_SPARSE_UTILS_H_ // clang-format off #include // clang-format on #include #include #include #include namespace dgl { namespace sparse { /** @brief Find a proper sparse format for two sparse matrices. It chooses * COO if anyone of the sparse matrices has COO format. If none of them has * COO, it tries CSR and CSC in the same manner. */ inline static SparseFormat FindAnyExistingFormat( const c10::intrusive_ptr& A, const c10::intrusive_ptr& B) { SparseFormat fmt; if (A->HasCOO() || B->HasCOO()) { fmt = SparseFormat::kCOO; } else if (A->HasCSR() || B->HasCSR()) { fmt = SparseFormat::kCSR; } else { fmt = SparseFormat::kCSC; } return fmt; } /** @brief Check whether two matrices has the same dtype and shape for * elementwise operators. */ inline static void ElementwiseOpSanityCheck( const c10::intrusive_ptr& A, const c10::intrusive_ptr& B) { TORCH_CHECK( A->value().dtype() == B->value().dtype(), "Elementwise operators" " do not support two sparse matrices with different dtypes."); TORCH_CHECK( A->shape()[0] == B->shape()[0] && A->shape()[1] == B->shape()[1], "Elementwise operators do not support two sparse matrices with different" " shapes."); } /** @brief Convert a Torch tensor to a DGL array. */ inline static runtime::NDArray TorchTensorToDGLArray(torch::Tensor tensor) { return runtime::DLPackConvert::FromDLPack(at::toDLPack(tensor.contiguous())); } /** @brief Convert a DGL array to a Torch tensor. */ inline static torch::Tensor DGLArrayToTorchTensor(runtime::NDArray array) { return at::fromDLPack(runtime::DLPackConvert::ToDLPack(array)); } /** @brief Convert an optional Torch tensor to a DGL array. */ inline static runtime::NDArray OptionalTorchTensorToDGLArray( torch::optional tensor) { if (!tensor.has_value()) { return aten::NullArray(); } return TorchTensorToDGLArray(tensor.value()); } /** @brief Convert a DGL array to an optional Torch tensor. */ inline static torch::optional DGLArrayToOptionalTorchTensor( runtime::NDArray array) { if (aten::IsNullArray(array)) { return torch::optional(); } return torch::make_optional(DGLArrayToTorchTensor(array)); } } // namespace sparse } // namespace dgl #endif // DGL_SPARSE_UTILS_H_ ================================================ FILE: dglgo/README.md ================================================ # DGL-Go DGL-Go is a command line tool for users to get started with training, using and studying Graph Neural Networks (GNNs). Data scientists can quickly apply GNNs to their problems, whereas researchers will find it useful to customize their experiments. ## Installation and get started DGL-Go requires DGL v0.8+ so please make sure DGL is updated properly. ### Install the latest stable version ``` pip install dglgo ``` ### Install from source for experimental features ``` python setup.py install ``` ### Get started Type `dgl` in your console: ``` Usage: dgl [OPTIONS] COMMAND [ARGS]... Options: --help Show this message and exit. Commands: configure Generate a configuration file export Export a runnable python script recipe Get example recipes train Launch training ```

Using DGL-Go is as easy as three steps: 1. Use `dgl configure` to pick the task, dataset and model of your interests. It generates a configuration file for later use. You could also use `dgl recipe get` to retrieve a configuration file we provided. 1. Use `dgl train` to launch training according to the configuration and see the results. 1. Use `dgl export` to generate a *self-contained, reproducible* Python script for advanced customization, or try the model on custom data stored in CSV format. Next, we will walk through all these steps one-by-one. ## Training GraphSAGE for node classification on Cora Let's use one of the most classical setups -- training a GraphSAGE model for node classification on the Cora citation graph dataset as an example. ### Step 1: `dgl configure` First step, use `dgl configure` to generate a YAML configuration file. ``` dgl configure nodepred --data cora --model sage --cfg cora_sage.yaml ``` Note that `nodepred` is the name of DGL-Go *pipeline*. For now, you can think of pipeline as training task: `nodepred` is for node multiclass classification task; other options include `linkpred` for link prediction task, and `graphpred` for graph binary classification etc. The command will generate a configurate file `cora_sage.yaml` which includes: * Options for the selected dataset (i.e., `cora` here). * Model hyperparameters (e.g., number of layers, hidden size, etc.). * Training hyperparameters (e.g., learning rate, loss function, etc.). Different choices of task, model and datasets may give very different options, so DGL-Go also adds a comment per option for explanation. At this point you can also change options to explore optimization potentials. The snippet below shows the configuration file generated by the command above. ```yaml version: 0.0.2 pipeline_name: nodepred pipeline_mode: train device: cpu data: name: cora split_ratio: # Ratio to generate split masks, for example set to [0.8, 0.1, 0.1] for 80% train/10% val/10% test. Leave blank to use builtin split in original dataset model: name: sage embed_size: -1 # The dimension of created embedding table. -1 means using original node embedding hidden_size: 16 # Hidden size. num_layers: 1 # Number of hidden layers. activation: relu # Activation function name under torch.nn.functional dropout: 0.5 # Dropout rate. aggregator_type: gcn # Aggregator type to use (``mean``, ``gcn``, ``pool``, ``lstm``). general_pipeline: early_stop: patience: 20 # Steps before early stop checkpoint_path: checkpoint.pth # Early stop checkpoint model file path num_epochs: 200 # Number of training epochs eval_period: 5 # Interval epochs between evaluations optimizer: name: Adam lr: 0.01 weight_decay: 0.0005 loss: CrossEntropyLoss save_path: results # Directory to save the experiment results num_runs: 1 # Number of experiments to run ``` Apart from `dgl configure`, you could also get one of DGL-Go's built-in configuration files (called *recipe*) using `dgl recipe`. There are two sub-commands: ``` dgl recipe list ``` will list the available recipes: ``` ➜ dgl recipe list =============================================================================== | Filename | Pipeline | Dataset | =============================================================================== | graphpred_pcba_gin.yaml | graphpred | ogbg-molpcba | | graphpred_hiv_pna.yaml | graphpred | ogbg-molhiv | | graphpred_hiv_gin.yaml | graphpred | ogbg-molhiv | | linkpred_citation2_sage.yaml | linkpred | ogbl-citation2 | | linkpred_collab_sage.yaml | linkpred | ogbl-collab | | nodepred_citeseer_sage.yaml | nodepred | citeseer | | nodepred_citeseer_gcn.yaml | nodepred | citeseer | | nodepred-ns_arxiv_gcn.yaml | nodepred-ns | ogbn-arxiv | | nodepred_cora_gat.yaml | nodepred | cora | | nodepred_pubmed_sage.yaml | nodepred | pubmed | | linkpred_cora_sage.yaml | linkpred | cora | | nodepred_pubmed_gcn.yaml | nodepred | pubmed | | nodepred_pubmed_gat.yaml | nodepred | pubmed | | nodepred_cora_gcn.yaml | nodepred | cora | | nodepred_cora_sage.yaml | nodepred | cora | | nodepred_citeseer_gat.yaml | nodepred | citeseer | | nodepred-ns_product_sage.yaml | nodepred-ns | ogbn-products | =============================================================================== ``` Then use ``` dgl recipe get nodepred_cora_sage.yaml ``` to copy the YAML configuration file to your local folder. ### Step 2: `dgl train` Simply run `dgl train --cfg cora_sage.yaml` will start the training process. ```log ... Epoch 00190 | Loss 1.5225 | TrainAcc 0.9500 | ValAcc 0.6840 Epoch 00191 | Loss 1.5416 | TrainAcc 0.9357 | ValAcc 0.6840 Epoch 00192 | Loss 1.5391 | TrainAcc 0.9357 | ValAcc 0.6840 Epoch 00193 | Loss 1.5257 | TrainAcc 0.9643 | ValAcc 0.6840 Epoch 00194 | Loss 1.5196 | TrainAcc 0.9286 | ValAcc 0.6840 EarlyStopping counter: 12 out of 20 Epoch 00195 | Loss 1.4862 | TrainAcc 0.9643 | ValAcc 0.6760 Epoch 00196 | Loss 1.5142 | TrainAcc 0.9714 | ValAcc 0.6760 Epoch 00197 | Loss 1.5145 | TrainAcc 0.9714 | ValAcc 0.6760 Epoch 00198 | Loss 1.5174 | TrainAcc 0.9571 | ValAcc 0.6760 Epoch 00199 | Loss 1.5235 | TrainAcc 0.9714 | ValAcc 0.6760 Test Accuracy 0.7740 Accuracy across 1 runs: 0.774 ± 0.0 ``` That's all! Basically you only need two commands to train a graph neural network. ### Step 3: `dgl export` for more advanced customization That's not everything yet. You may want to open the hood and invoke deeper customization. DGL-Go can export a **self-contained, reproducible** Python script for you to do anything you like. Try `dgl export --cfg cora_sage.yaml --output script.py`, and you'll get the script used to train the model. Here's the code snippet: ```python ... class GraphSAGE(nn.Module): def __init__(self, data_info: dict, embed_size: int = -1, hidden_size: int = 16, num_layers: int = 1, activation: str = "relu", dropout: float = 0.5, aggregator_type: str = "gcn"): """GraphSAGE model Parameters ---------- data_info : dict The information about the input dataset. embed_size : int The dimension of created embedding table. -1 means using original node embedding hidden_size : int Hidden size. num_layers : int Number of hidden layers. dropout : float Dropout rate. activation : str Activation function name under torch.nn.functional aggregator_type : str Aggregator type to use (``mean``, ``gcn``, ``pool``, ``lstm``). """ super(GraphSAGE, self).__init__() self.data_info = data_info self.embed_size = embed_size if embed_size > 0: self.embed = nn.Embedding(data_info["num_nodes"], embed_size) in_size = embed_size else: in_size = data_info["in_size"] self.layers = nn.ModuleList() self.dropout = nn.Dropout(dropout) self.activation = getattr(nn.functional, activation) for i in range(num_layers): in_hidden = hidden_size if i > 0 else in_size out_hidden = hidden_size if i < num_layers - \ 1 else data_info["out_size"] self.layers.append( dgl.nn.SAGEConv( in_hidden, out_hidden, aggregator_type)) def forward(self, graph, node_feat, edge_feat=None): if self.embed_size > 0: dgl_warning( "The embedding for node feature is used, and input node_feat is ignored, due to the provided embed_size.") h = self.embed.weight else: h = node_feat h = self.dropout(h) for l, layer in enumerate(self.layers): h = layer(graph, h, edge_feat) if l != len(self.layers) - 1: h = self.activation(h) h = self.dropout(h) return h ... def train(cfg, pipeline_cfg, device, data, model, optimizer, loss_fcn): g = data[0] # Only train on the first graph g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) g = g.to(device) node_feat = g.ndata.get('feat', None) edge_feat = g.edata.get('feat', None) label = g.ndata['label'] train_mask, val_mask, test_mask = g.ndata['train_mask'].bool( ), g.ndata['val_mask'].bool(), g.ndata['test_mask'].bool() stopper = EarlyStopping(**pipeline_cfg['early_stop']) val_acc = 0. for epoch in range(pipeline_cfg['num_epochs']): model.train() logits = model(g, node_feat, edge_feat) loss = loss_fcn(logits[train_mask], label[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() train_acc = accuracy(logits[train_mask], label[train_mask]) if epoch != 0 and epoch % pipeline_cfg['eval_period'] == 0: val_acc = accuracy(logits[val_mask], label[val_mask]) if stopper.step(val_acc, model): break print("Epoch {:05d} | Loss {:.4f} | TrainAcc {:.4f} | ValAcc {:.4f}". format(epoch, loss.item(), train_acc, val_acc)) stopper.load_checkpoint(model) stopper.close() model.eval() with torch.no_grad(): logits = model(g, node_feat, edge_feat) test_acc = accuracy(logits[test_mask], label[test_mask]) return test_acc def main(run, cfg, data): device = cfg['device'] pipeline_cfg = cfg['general_pipeline'] # create model model = GraphSAGE(**cfg["model"]) model = model.to(device) loss = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam( model.parameters(), **pipeline_cfg["optimizer"]) # train test_acc = train(cfg, pipeline_cfg, device, data, model, optimizer, loss) torch.save({'cfg': cfg, 'model': model.state_dict()}, os.path.join(pipeline_cfg["save_path"], 'run_{}.pth'.format(run))) return test_acc if __name__ == '__main__': ... # load data data = AsNodePredDataset(CoraGraphDataset()) model_cfg = cfg["model"] cfg["model"]["data_info"] = { "in_size": model_cfg['embed_size'] if model_cfg['embed_size'] > 0 else data[0].ndata['feat'].shape[1], "out_size": data.num_classes, "num_nodes": data[0].num_nodes() } os.makedirs(cfg['general_pipeline']["save_path"]) all_acc = [] num_runs = 1 for run in range(num_runs): print(f'Run experiment #{run}') test_acc = main(run, cfg, data) print("Test Accuracy {:.4f}".format(test_acc)) all_acc.append(test_acc) avg_acc = np.round(np.mean(all_acc), 6) std_acc = np.round(np.std(all_acc), 6) print(f'Accuracy across {num_runs} runs: {avg_acc} ± {std_acc}') ``` You can see that everything is collected into one Python script which includes the entire `GraphSAGE` model definition, data processing and training loop. Simply running `python script.py` will give you the *exact same* result as you've seen by `dgl train`. At this point, you can change any part as you wish such as plugging your own GNN module, changing the loss function and so on. ## Use DGL-Go on your own dataset DGL-Go supports training a model on custom dataset by DGL's `CSVDataset`. ### Step 1: Prepare your CSV and metadata file. Follow the tutorial at [Loading data from CSV files](https://docs.dgl.ai/en/latest/guide/data-loadcsv.html#guide-data-pipeline-loadcsv`) to prepare your dataset. Generally, the dataset folder should include: * At least one CSV file for node data. * At least one CSV file for edge data. * A metadata file called `meta.yaml`. ### Step 2: `dgl configure` with `--data csv` option Run ``` dgl configure nodepred --data csv --model sage --cfg csv_sage.yaml ``` to generate the configuration file. You will see that the file includes a section like the followings: ```yaml ... data: name: csv split_ratio: # Ratio to generate split masks, for example set to [0.8, 0.1, 0.1] for 80% train/10% val/10% test. Leave blank to use builtin split in original dataset data_path: ./ # metadata.yaml, nodes.csv, edges.csv should in this folder ... ``` Fill in the `data_path` option with the path to your dataset folder. If your dataset does not have any native split for training, validation and test sets, you can set the split ratio in the `split_ratio` option, which will generate a random split for you. ### Step 3: `train` the model / `export` the script Then you can do the same as the tutorial above, either train the model by `dgl train --cfg csv_sage.yaml` or use `dgl export --cfg csv_sage.yaml --output script.py` to get the training script. ## FAQ **Q: What are the available options for each command?** A: You can use `--help` for all commands. For example, use `dgl --help` for general help message; use `dgl configure --help` for the configuration options; use `dgl configure nodepred --help` for the configuration options of node prediction pipeline. **Q: What exactly is nodepred/linkpred? How many are they?** A: They are called DGL-Go pipelines. A pipeline represents the training methodology for a certain task. Therefore, its naming convention is *[-]*. For example, `nodepred` trains the selected GNN model for node classification using full-graph training method; while `nodepred-ns` trains the model for node classifiation but using neighbor sampling. Currently DGL-Go provides four training pipelines (`nodepred`, `nodepred-ns`, `linkpred`, and `graphpred`). Use `dgl configure --help` to see all the available pipelines. **Q: How to add my model to the official model recipe zoo?** A: Currently not supported. We will enable this feature soon. Please stay tuned! **Q: After training a model on some dataset, how can I apply it to another one?** A: The `save_path` option in the generated configuration file allows you to specify the directory to save the experiment results. After training, `{save_path}/run_{i}.pth` will be the checkpoint for the i-th run, consisting of the training configuration and trained model state dict. You can then use `dgl apply` as follows. ``` dgl configure-apply X --data Y --cpt {save_path}/run_{i}.pth --cfg Z dgl apply --cfg Z ``` - `X` is the pipeline name as in `dgl configure`. - `Y` is the dataset to apply and can be omitted if you are applying the trained model to the training dataset. - `Z` is the configuration file and a default value will be used if not specified. You can also use `dgl export --cfg Z` to generate a python script for further modification. ================================================ FILE: dglgo/dglgo/__init__.py ================================================ ================================================ FILE: dglgo/dglgo/apply_pipeline/__init__.py ================================================ from .graphpred import ApplyGraphpredPipeline from .nodepred import ApplyNodepredPipeline from .nodepred_sample import ApplyNodepredNsPipeline ================================================ FILE: dglgo/dglgo/apply_pipeline/graphpred/__init__.py ================================================ from .gen import * ================================================ FILE: dglgo/dglgo/apply_pipeline/graphpred/gen.py ================================================ from copy import deepcopy from pathlib import Path from typing import Optional import ruamel.yaml import torch import typer from jinja2 import Template from pydantic import BaseModel, Field from ...utils.factory import ( ApplyPipelineFactory, DataFactory, GraphModelFactory, PipelineBase, ) from ...utils.yaml_dump import deep_convert_dict, merge_comment pipeline_comments = { "batch_size": "Graph batch size", "num_workers": "Number of workers for data loading", "save_path": "Directory to save the inference results", } class ApplyGraphpredPipelineCfg(BaseModel): batch_size: int = 32 num_workers: int = 4 save_path: str = "apply_results" @ApplyPipelineFactory.register("graphpred") class ApplyGraphpredPipeline(PipelineBase): def __init__(self): self.pipeline = {"name": "graphpred", "mode": "apply"} @classmethod def setup_user_cfg_cls(cls): from ...utils.enter_config import UserConfig class ApplyGraphPredUserConfig(UserConfig): data: DataFactory.filter("graphpred").get_pydantic_config() = Field( ..., discriminator="name" ) general_pipeline: ApplyGraphpredPipelineCfg = ( ApplyGraphpredPipelineCfg() ) cls.user_cfg_cls = ApplyGraphPredUserConfig @property def user_cfg_cls(self): return self.__class__.user_cfg_cls def get_cfg_func(self): def config( data: DataFactory.filter( "graphpred" ).get_dataset_enum() = typer.Option(None, help="input data name"), cfg: Optional[str] = typer.Option( None, help="output configuration file path" ), cpt: str = typer.Option(..., help="input checkpoint file path"), ): # Training configuration train_cfg = torch.load(cpt, weights_only=False)["cfg"] if data is None: print("data is not specified, use the training dataset") data = train_cfg["data_name"] else: data = data.name if cfg is None: cfg = ( "_".join( ["apply", "graphpred", data, train_cfg["model_name"]] ) + ".yaml" ) self.__class__.setup_user_cfg_cls() generated_cfg = { "pipeline_name": self.pipeline["name"], "pipeline_mode": self.pipeline["mode"], "device": train_cfg["device"], "data": {"name": data}, "cpt_path": cpt, "general_pipeline": { "batch_size": train_cfg["general_pipeline"][ "eval_batch_size" ], "num_workers": train_cfg["general_pipeline"]["num_workers"], }, } output_cfg = self.user_cfg_cls(**generated_cfg).dict() output_cfg = deep_convert_dict(output_cfg) # Not applicable for inference output_cfg["data"].pop("split_ratio") comment_dict = { "device": "Torch device name, e.g., cpu or cuda or cuda:0", "cpt_path": "Path to the checkpoint file", "general_pipeline": pipeline_comments, } comment_dict = merge_comment(output_cfg, comment_dict) yaml = ruamel.yaml.YAML() yaml.dump(comment_dict, Path(cfg).open("w")) print( "Configuration file is generated at {}".format( Path(cfg).absolute() ) ) return config @classmethod def gen_script(cls, user_cfg_dict): # Check validation cls.setup_user_cfg_cls() cls.user_cfg_cls(**user_cfg_dict) # Training configuration train_cfg = torch.load(user_cfg_dict["cpt_path"], weights_only=False)[ "cfg" ] # Dict for code rendering render_cfg = deepcopy(user_cfg_dict) model_name = train_cfg["model_name"] model_code = GraphModelFactory.get_source_code(model_name) render_cfg["model_code"] = model_code render_cfg["model_class_name"] = GraphModelFactory.get_model_class_name( model_name ) render_cfg.update( DataFactory.get_generated_code_dict(user_cfg_dict["data"]["name"]) ) # Dict for defining cfg in the rendered code generated_user_cfg = deepcopy(user_cfg_dict) generated_user_cfg.pop("pipeline_name") generated_user_cfg.pop("pipeline_mode") # model arch configuration generated_user_cfg["model"] = train_cfg["model"] render_cfg["user_cfg_str"] = f"cfg = {str(generated_user_cfg)}" render_cfg["user_cfg"] = user_cfg_dict file_current_dir = Path(__file__).resolve().parent with open(file_current_dir / "graphpred.jinja-py", "r") as f: template = Template(f.read()) return template.render(**render_cfg) @staticmethod def get_description() -> str: return "Graph classification pipeline for inference on binary classification" ================================================ FILE: dglgo/dglgo/apply_pipeline/graphpred/graphpred.jinja-py ================================================ import torch import os import csv from tqdm import tqdm from dgl.data import AsGraphPredDataset from dgl.dataloading import GraphDataLoader {{ data_import_code }} {{ model_code }} def infer(device, loader, model): model = model.to(device) model.eval() all_pred = [] with torch.no_grad(): for _, (g, labels) in enumerate(tqdm(loader, desc="Iteration")): g = g.to(device) node_feat = g.ndata['feat'] edge_feat = g.edata['feat'] pred = model(g, node_feat, edge_feat) pred = (pred.sigmoid() >= 0.5).long() all_pred.append(pred) return torch.cat(all_pred, dim=0) def main(): {{ user_cfg_str }} device = cfg['device'] if not torch.cuda.is_available(): device = 'cpu' pipeline_cfg = cfg['general_pipeline'] # load data data = AsGraphPredDataset({{data_initialize_code}}) data_loader = GraphDataLoader(data, batch_size=pipeline_cfg['batch_size'], num_workers=pipeline_cfg['num_workers'], shuffle=False) # validation train_data_name = cfg['model']['data_info']['name'] infer_data_name = cfg['data']['name'] if train_data_name.startswith('ogbg-mol'): assert infer_data_name.startswith('ogbg-mol'), 'Expect the inference data name to start \ with ogbg-mol, got {}'.format(infer_data_name) else: assert train_data_name == infer_data_name, 'Expect the training and inference data to \ have the same name, got {} and {}'.format(train_data_name, infer_data_name) model_node_feat_size = cfg['model']['data_info']['node_feat_size'] model_edge_feat_size = cfg['model']['data_info']['edge_feat_size'] data_node_feat_size = data.node_feat_size data_edge_feat_size = data.edge_feat_size assert model_node_feat_size == data_node_feat_size, 'Expect the training data and inference \ data to have the same number of input node features, got {:d} and {:d}'.format(model_node_feat_size, data_node_feat_size) assert model_edge_feat_size == data_edge_feat_size, 'Expect the training data and inference \ data to have the same number of input edge features, got {:d} and {:d}'.format(model_edge_feat_size, data_edge_feat_size) model = {{ model_class_name }}(**cfg['model']) model.load_state_dict(torch.load(cfg['cpt_path'], weights_only=False, map_location='cpu')['model']) pred = infer(device, data_loader, model).detach().cpu() # Dump the results os.makedirs(cfg['general_pipeline']["save_path"]) file_path = os.path.join(cfg['general_pipeline']["save_path"], 'output.csv') header = ['graph id'] header.extend(['task_{:d}'.format(i) for i in range(cfg['model']['data_info']['out_size'])]) with open(file_path, 'w') as f: writer = csv.writer(f) writer.writerow(header) writer.writerows([ [i] + pred[i].tolist() for i in range(len(pred)) ]) print('Saved inference results to {}'.format(file_path)) if __name__ == '__main__': main() ================================================ FILE: dglgo/dglgo/apply_pipeline/nodepred/__init__.py ================================================ from .gen import * ================================================ FILE: dglgo/dglgo/apply_pipeline/nodepred/gen.py ================================================ from copy import deepcopy from pathlib import Path from typing import Optional import ruamel.yaml import torch import typer from jinja2 import Template from pydantic import Field from ...utils.factory import ( ApplyPipelineFactory, DataFactory, NodeModelFactory, PipelineBase, ) from ...utils.yaml_dump import deep_convert_dict, merge_comment @ApplyPipelineFactory.register("nodepred") class ApplyNodepredPipeline(PipelineBase): def __init__(self): self.pipeline = {"name": "nodepred", "mode": "apply"} @classmethod def setup_user_cfg_cls(cls): from ...utils.enter_config import UserConfig class ApplyNodePredUserConfig(UserConfig): data: DataFactory.filter("nodepred").get_pydantic_config() = Field( ..., discriminator="name" ) cls.user_cfg_cls = ApplyNodePredUserConfig @property def user_cfg_cls(self): return self.__class__.user_cfg_cls def get_cfg_func(self): def config( data: DataFactory.filter( "nodepred" ).get_dataset_enum() = typer.Option(None, help="input data name"), cfg: Optional[str] = typer.Option( None, help="output configuration file path" ), cpt: str = typer.Option(..., help="input checkpoint file path"), ): # Training configuration train_cfg = torch.load(cpt, weights_only=False)["cfg"] if data is None: print("data is not specified, use the training dataset") data = train_cfg["data_name"] else: data = data.name if cfg is None: cfg = ( "_".join( ["apply", "nodepred", data, train_cfg["model_name"]] ) + ".yaml" ) self.__class__.setup_user_cfg_cls() generated_cfg = { "pipeline_name": self.pipeline["name"], "pipeline_mode": self.pipeline["mode"], "device": train_cfg["device"], "data": {"name": data}, "cpt_path": cpt, "general_pipeline": {"save_path": "apply_results"}, } output_cfg = self.user_cfg_cls(**generated_cfg).dict() output_cfg = deep_convert_dict(output_cfg) # Not applicable for inference output_cfg["data"].pop("split_ratio") comment_dict = { "device": "Torch device name, e.g., cpu or cuda or cuda:0", "cpt_path": "Path to the checkpoint file", "general_pipeline": { "save_path": "Directory to save the inference results" }, } comment_dict = merge_comment(output_cfg, comment_dict) yaml = ruamel.yaml.YAML() yaml.dump(comment_dict, Path(cfg).open("w")) print( "Configuration file is generated at {}".format( Path(cfg).absolute() ) ) return config @classmethod def gen_script(cls, user_cfg_dict): # Check validation cls.setup_user_cfg_cls() cls.user_cfg_cls(**user_cfg_dict) # Training configuration train_cfg = torch.load(user_cfg_dict["cpt_path"], weights_only=False)[ "cfg" ] # Dict for code rendering render_cfg = deepcopy(user_cfg_dict) model_name = train_cfg["model_name"] model_code = NodeModelFactory.get_source_code(model_name) render_cfg["model_code"] = model_code render_cfg["model_class_name"] = NodeModelFactory.get_model_class_name( model_name ) render_cfg.update( DataFactory.get_generated_code_dict(user_cfg_dict["data"]["name"]) ) # Dict for defining cfg in the rendered code generated_user_cfg = deepcopy(user_cfg_dict) generated_user_cfg["data"].pop("name") generated_user_cfg.pop("pipeline_name") generated_user_cfg.pop("pipeline_mode") # model arch configuration generated_user_cfg["model"] = train_cfg["model"] render_cfg["user_cfg_str"] = f"cfg = {str(generated_user_cfg)}" render_cfg["user_cfg"] = user_cfg_dict file_current_dir = Path(__file__).resolve().parent with open(file_current_dir / "nodepred.jinja-py", "r") as f: template = Template(f.read()) return template.render(**render_cfg) @staticmethod def get_description() -> str: return "Node classification pipeline for inference" ================================================ FILE: dglgo/dglgo/apply_pipeline/nodepred/nodepred.jinja-py ================================================ import torch import dgl import os import csv from dgl.data import AsNodePredDataset {{ data_import_code }} {{ model_code }} def infer(device, data, model): g = data[0] # Only infer on the first graph g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) g = g.to(device) node_feat = g.ndata.get('feat', None) edge_feat = g.edata.get('feat', None) model = model.to(device) model.eval() with torch.no_grad(): logits = model(g, node_feat, edge_feat) return logits def main(): {{ user_cfg_str }} device = cfg['device'] if not torch.cuda.is_available(): device = 'cpu' # load data data = AsNodePredDataset({{data_initialize_code}}) # validation if cfg['model']['embed_size'] > 0: model_num_nodes = cfg['model']['data_info']['num_nodes'] data_num_nodes = data[0].num_nodes() assert model_num_nodes == data_num_nodes, \ 'Training and inference need to be on the same dataset when node embeddings were learned from scratch' else: model_in_size = cfg['model']['data_info']['in_size'] data_in_size = data[0].ndata['feat'].shape[1] assert model_in_size == data_in_size, \ 'Expect the training data and inference data to have the same number of input node \ features, got {:d} and {:d}'.format(model_in_size, data_in_size) model = {{ model_class_name }}(**cfg['model']) model.load_state_dict(torch.load(cfg['cpt_path'], weights_only=False, map_location='cpu')['model']) logits = infer(device, data, model) pred = logits.argmax(dim=1).cpu() # Dump the results os.makedirs(cfg['general_pipeline']["save_path"]) file_path = os.path.join(cfg['general_pipeline']["save_path"], 'output.csv') with open(file_path, 'w') as f: writer = csv.writer(f) writer.writerow(['node id', 'predicted label']) writer.writerows([ [i, pred[i].item()] for i in range(len(pred)) ]) print('Saved inference results to {}'.format(file_path)) if __name__ == '__main__': main() ================================================ FILE: dglgo/dglgo/apply_pipeline/nodepred_sample/__init__.py ================================================ from .gen import * ================================================ FILE: dglgo/dglgo/apply_pipeline/nodepred_sample/gen.py ================================================ from copy import deepcopy from pathlib import Path from typing import Optional import ruamel.yaml import torch import typer from jinja2 import Template from pydantic import Field from ...utils.factory import ( ApplyPipelineFactory, DataFactory, NodeModelFactory, PipelineBase, ) from ...utils.yaml_dump import deep_convert_dict, merge_comment @ApplyPipelineFactory.register("nodepred-ns") class ApplyNodepredNsPipeline(PipelineBase): def __init__(self): self.pipeline = {"name": "nodepred-ns", "mode": "apply"} @classmethod def setup_user_cfg_cls(cls): from ...utils.enter_config import UserConfig class ApplyNodePredUserConfig(UserConfig): data: DataFactory.filter( "nodepred-ns" ).get_pydantic_config() = Field(..., discriminator="name") cls.user_cfg_cls = ApplyNodePredUserConfig @property def user_cfg_cls(self): return self.__class__.user_cfg_cls def get_cfg_func(self): def config( data: DataFactory.filter( "nodepred-ns" ).get_dataset_enum() = typer.Option(None, help="input data name"), cfg: Optional[str] = typer.Option( None, help="output configuration file path" ), cpt: str = typer.Option(..., help="input checkpoint file path"), ): # Training configuration train_cfg = torch.load(cpt, weights_only=False)["cfg"] if data is None: print("data is not specified, use the training dataset") data = train_cfg["data_name"] else: data = data.name if cfg is None: cfg = ( "_".join( ["apply", "nodepred-ns", data, train_cfg["model_name"]] ) + ".yaml" ) self.__class__.setup_user_cfg_cls() generated_cfg = { "pipeline_name": self.pipeline["name"], "pipeline_mode": self.pipeline["mode"], "device": train_cfg["device"], "data": {"name": data}, "cpt_path": cpt, "general_pipeline": {"save_path": "apply_results"}, } output_cfg = self.user_cfg_cls(**generated_cfg).dict() output_cfg = deep_convert_dict(output_cfg) # Not applicable for inference output_cfg["data"].pop("split_ratio") comment_dict = { "device": "Torch device name, e.g., cpu or cuda or cuda:0", "cpt_path": "Path to the checkpoint file", "general_pipeline": { "save_path": "Directory to save the inference results" }, } comment_dict = merge_comment(output_cfg, comment_dict) yaml = ruamel.yaml.YAML() yaml.dump(comment_dict, Path(cfg).open("w")) print( "Configuration file is generated at {}".format( Path(cfg).absolute() ) ) return config @classmethod def gen_script(cls, user_cfg_dict): # Check validation cls.setup_user_cfg_cls() cls.user_cfg_cls(**user_cfg_dict) # Training configuration train_cfg = torch.load(user_cfg_dict["cpt_path"], weights_only=False)[ "cfg" ] # Dict for code rendering render_cfg = deepcopy(user_cfg_dict) model_name = train_cfg["model_name"] model_code = NodeModelFactory.get_source_code(model_name) render_cfg["model_code"] = model_code render_cfg["model_class_name"] = NodeModelFactory.get_model_class_name( model_name ) render_cfg.update( DataFactory.get_generated_code_dict(user_cfg_dict["data"]["name"]) ) # Dict for defining cfg in the rendered code generated_user_cfg = deepcopy(user_cfg_dict) generated_user_cfg["data"].pop("name") generated_user_cfg.pop("pipeline_name") generated_user_cfg.pop("pipeline_mode") # model arch configuration generated_user_cfg["model"] = train_cfg["model"] render_cfg["user_cfg_str"] = f"cfg = {str(generated_user_cfg)}" render_cfg["user_cfg"] = user_cfg_dict file_current_dir = Path(__file__).resolve().parent with open(file_current_dir / "nodepred-ns.jinja-py", "r") as f: template = Template(f.read()) return template.render(**render_cfg) @staticmethod def get_description() -> str: return "Node classification neighbor sampling pipeline for inference" ================================================ FILE: dglgo/dglgo/apply_pipeline/nodepred_sample/nodepred-ns.jinja-py ================================================ import torch import dgl import os import csv from dgl.data import AsNodePredDataset {{ data_import_code }} {{ model_code }} def infer(device, data, model): g = data[0] # Only infer on the first graph g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) g = g.to(device) node_feat = g.ndata.get('feat', None) edge_feat = g.edata.get('feat', None) model = model.to(device) model.eval() with torch.no_grad(): logits = model(g, node_feat, edge_feat) return logits def main(): {{ user_cfg_str }} device = cfg['device'] if not torch.cuda.is_available(): device = 'cpu' # load data data = AsNodePredDataset({{data_initialize_code}}) # validation if cfg['model']['embed_size'] > 0: model_num_nodes = cfg['model']['data_info']['num_nodes'] data_num_nodes = data[0].num_nodes() assert model_num_nodes == data_num_nodes, \ 'Training and inference need to be on the same dataset when node embeddings were learned from scratch' else: model_in_size = cfg['model']['data_info']['in_size'] data_in_size = data[0].ndata['feat'].shape[1] assert model_in_size == data_in_size, \ 'Expect the training data and inference data to have the same number of input node \ features, got {:d} and {:d}'.format(model_in_size, data_in_size) model = {{ model_class_name }}(**cfg['model']) model.load_state_dict(torch.load(cfg['cpt_path'], weights_only=False, map_location='cpu')['model']) logits = infer(device, data, model) pred = logits.argmax(dim=1).cpu() # Dump the results os.makedirs(cfg['general_pipeline']["save_path"]) file_path = os.path.join(cfg['general_pipeline']["save_path"], 'output.csv') with open(file_path, 'w') as f: writer = csv.writer(f) writer.writerow(['node id', 'predicted label']) writer.writerows([ [i, pred[i].item()] for i in range(len(pred)) ]) print('Saved inference results to {}'.format(file_path)) if __name__ == '__main__': main() ================================================ FILE: dglgo/dglgo/cli/__init__.py ================================================ from .cli import app if __name__ == "__main__": app() ================================================ FILE: dglgo/dglgo/cli/apply_cli.py ================================================ from pathlib import Path import autopep8 import isort import typer import yaml from ..utils.factory import ApplyPipelineFactory def apply(cfg: str = typer.Option(..., help="config yaml file name")): user_cfg = yaml.safe_load(Path(cfg).open("r")) pipeline_name = user_cfg["pipeline_name"] output_file_content = ApplyPipelineFactory.registry[ pipeline_name ].gen_script(user_cfg) f_code = autopep8.fix_code(output_file_content, options={"aggressive": 1}) f_code = isort.code(f_code) code = compile(f_code, "dglgo_tmp.py", "exec") exec(code, {"__name__": "__main__"}) ================================================ FILE: dglgo/dglgo/cli/cli.py ================================================ import typer from ..pipeline import * from ..model import * from .apply_cli import apply from .config_apply_cli import config_apply_app from .config_cli import config_app from .export_cli import export from .recipe_cli import recipe_app from .train_cli import train no_args_is_help = False app = typer.Typer(no_args_is_help=True, add_completion=False) app.add_typer(config_app, name="configure", no_args_is_help=no_args_is_help) app.add_typer(recipe_app, name="recipe", no_args_is_help=True) app.command(help="Launch training", no_args_is_help=no_args_is_help)(train) app.command( help="Export a runnable python script", no_args_is_help=no_args_is_help )(export) app.add_typer( config_apply_app, name="configure-apply", no_args_is_help=no_args_is_help ) app.command(help="Launch inference", no_args_is_help=no_args_is_help)(apply) def main(): app() if __name__ == "__main__": app() ================================================ FILE: dglgo/dglgo/cli/config_apply_cli.py ================================================ from ..apply_pipeline import * import typer from ..utils.factory import ApplyPipelineFactory config_apply_app = typer.Typer( help="Generate a configuration file for inference" ) for key, pipeline in ApplyPipelineFactory.registry.items(): config_apply_app.command(key, help=pipeline.get_description())( pipeline.get_cfg_func() ) ================================================ FILE: dglgo/dglgo/cli/config_cli.py ================================================ from ..pipeline import * import typing from enum import Enum from pathlib import Path import typer import yaml from ..utils.factory import ModelFactory, PipelineFactory config_app = typer.Typer(help="Generate a configuration file") for key, pipeline in PipelineFactory.registry.items(): config_app.command(key, help=pipeline.get_description())( pipeline.get_cfg_func() ) if __name__ == "__main__": config_app() ================================================ FILE: dglgo/dglgo/cli/export_cli.py ================================================ import typing from enum import Enum from pathlib import Path import autopep8 import isort import typer import yaml from ..utils.factory import ApplyPipelineFactory, ModelFactory, PipelineFactory def export( cfg: str = typer.Option("cfg.yaml", help="config yaml file name"), output: str = typer.Option("script.py", help="output python file name"), ): user_cfg = yaml.safe_load(Path(cfg).open("r")) pipeline_name = user_cfg["pipeline_name"] pipeline_mode = user_cfg["pipeline_mode"] if pipeline_mode == "train": output_file_content = PipelineFactory.registry[ pipeline_name ].gen_script(user_cfg) else: output_file_content = ApplyPipelineFactory.registry[ pipeline_name ].gen_script(user_cfg) f_code = autopep8.fix_code(output_file_content, options={"aggressive": 1}) f_code = isort.code(f_code) with open(output, "w") as f: f.write(f_code) print( "The python script is generated at {}, based on config file {}".format( Path(output).absolute(), Path(cfg).absolute() ) ) if __name__ == "__main__": export_app = typer.Typer() export_app.command()(export) export_app() ================================================ FILE: dglgo/dglgo/cli/recipe_cli.py ================================================ import os import shutil from pathlib import Path from typing import Optional import typer import yaml def list_recipes(): file_current_dir = Path(__file__).resolve().parent recipe_dir = file_current_dir.parent.parent / "recipes" file_list = list(recipe_dir.glob("*.yaml")) header = "| {:<30} | {:<18} | {:<20} |".format( "Filename", "Pipeline", "Dataset" ) typer.echo("=" * len(header)) typer.echo(header) typer.echo("=" * len(header)) output_list = [] for file in file_list: cfg = yaml.safe_load(Path(file).open("r")) output_list.append( { "file_name": file.name, "pipeline_name": cfg["pipeline_name"], "dataset_name": cfg["data"]["name"], } ) # sort by pipeline, if same sort by dataset, if same sort by file name output_list.sort( key=lambda f: (f["pipeline_name"], f["dataset_name"], f["file_name"]) ) for f in output_list: typer.echo( "| {:<30} | {:<18} | {:<20} |".format( f["file_name"], f["pipeline_name"], f["dataset_name"] ) ) typer.echo("=" * len(header)) def get_recipe( recipe_name: Optional[str] = typer.Argument( None, help="The recipe filename to get, e.q. nodepred_citeseer_gcn.yaml" ) ): if recipe_name is None: typer.echo("Usage: dgl recipe get [RECIPE_NAME] \n") typer.echo(" Copy the recipe to current directory \n") typer.echo(" Arguments:") typer.echo( " [RECIPE_NAME] The recipe filename to get, e.q. nodepred_citeseer_gcn.yaml\n" ) typer.echo("Here are all avaliable recipe filename") list_recipes() else: file_current_dir = Path(__file__).resolve().parent recipe_dir = file_current_dir.parent.parent / "recipes" current_dir = Path(os.getcwd()) recipe_path = recipe_dir / recipe_name shutil.copy(recipe_path, current_dir) print( "Recipe {} is copied to {}".format( recipe_path.absolute(), current_dir.absolute() ) ) recipe_app = typer.Typer(help="Get example recipes") recipe_app.command(name="list", help="List all available example recipes")( list_recipes ) recipe_app.command(name="get", help="Copy the recipe to current directory")( get_recipe ) if __name__ == "__main__": recipe_app() ================================================ FILE: dglgo/dglgo/cli/train_cli.py ================================================ import typing from enum import Enum from pathlib import Path import autopep8 import isort import typer import yaml from ..utils.factory import ModelFactory, PipelineFactory def train( cfg: str = typer.Option("cfg.yaml", help="config yaml file name"), ): user_cfg = yaml.safe_load(Path(cfg).open("r")) pipeline_name = user_cfg["pipeline_name"] output_file_content = PipelineFactory.registry[pipeline_name].gen_script( user_cfg ) f_code = autopep8.fix_code(output_file_content, options={"aggressive": 1}) f_code = isort.code(f_code) code = compile(f_code, "dglgo_tmp.py", "exec") exec(code, {"__name__": "__main__"}) if __name__ == "__main__": train_app = typer.Typer() train_app.command()(train) train_app() ================================================ FILE: dglgo/dglgo/model/__init__.py ================================================ from .node_encoder import * from .edge_encoder import * from .graph_encoder import * ================================================ FILE: dglgo/dglgo/model/edge_encoder/__init__.py ================================================ from ...utils.factory import EdgeModelFactory from .bilinear import BilinearPredictor from .ele import ElementWiseProductPredictor EdgeModelFactory.register("ele")(ElementWiseProductPredictor) EdgeModelFactory.register("bilinear")(BilinearPredictor) ================================================ FILE: dglgo/dglgo/model/edge_encoder/bilinear.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F class BilinearPredictor(nn.Module): def __init__( self, data_info: dict, hidden_size: int = 32, num_layers: int = 1, bias: bool = True, ): """Bilinear product model for edge scores Parameters ---------- data_info : dict The information about the input dataset. hidden_size : int Hidden size. num_layers : int Number of hidden layers. bias : bool Whether to use bias in the linaer layer. """ super(BilinearPredictor, self).__init__() in_size, out_size = data_info["in_size"], data_info["out_size"] self.bilinear = nn.Bilinear(in_size, in_size, hidden_size, bias=bias) lins_list = [] for _ in range(num_layers - 2): lins_list.append(nn.Linear(hidden_size, hidden_size, bias=bias)) lins_list.append(nn.ReLU()) lins_list.append(nn.Linear(hidden_size, out_size, bias=bias)) self.linear = nn.Sequential(*lins_list) def forward(self, h_src, h_dst): h = self.bilinear(h_src, h_dst) h = self.linear(h) h = torch.sigmoid(h) return h ================================================ FILE: dglgo/dglgo/model/edge_encoder/dot.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F class DotPredictor(nn.Module): def __init__( self, in_size: int = -1, out_size: int = 1, hidden_size: int = 256, num_layers: int = 3, bias: bool = False, ): super(DotPredictor, self).__init__() lins_list = [] for _ in range(num_layers - 2): lins_list.append(nn.Linear(in_size, hidden_size, bias=bias)) lins_list.append(nn.ReLU()) lins_list.append(nn.Linear(hidden_size, out_size, bias=bias)) self.linear = nn.Sequential(*lins_list) def forward(self, h_src, h_dst): h = h_src * h_dst h = self.linear(h) h = torch.sigmoid(h) return h ================================================ FILE: dglgo/dglgo/model/edge_encoder/ele.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F class ElementWiseProductPredictor(nn.Module): def __init__( self, data_info: dict, hidden_size: int = 64, num_layers: int = 2, bias: bool = True, ): """Elementwise product model for edge scores Parameters ---------- data_info : dict The information about the input dataset. hidden_size : int Hidden size. num_layers : int Number of hidden layers. bias : bool Whether to use bias in the linaer layer. """ super(ElementWiseProductPredictor, self).__init__() lins_list = [] in_size, out_size = data_info["in_size"], data_info["out_size"] for i in range(num_layers): in_hiddnen = in_size if i == 0 else hidden_size out_hidden = hidden_size if i < num_layers - 1 else out_size lins_list.append(nn.Linear(in_hiddnen, out_hidden, bias=bias)) if i < num_layers - 1: lins_list.append(nn.ReLU()) self.linear = nn.Sequential(*lins_list) def forward(self, h_src, h_dst): h = h_src * h_dst h = self.linear(h) h = torch.sigmoid(h) return h ================================================ FILE: dglgo/dglgo/model/graph_encoder/__init__.py ================================================ from ...utils.factory import GraphModelFactory from .gin_ogbg import OGBGGIN from .pna import PNA GraphModelFactory.register("gin")(OGBGGIN) GraphModelFactory.register("pna")(PNA) ================================================ FILE: dglgo/dglgo/model/graph_encoder/gin_ogbg.py ================================================ import dgl import torch import torch.nn as nn import torch.nn.functional as F from dgl.nn import AvgPooling, GINEConv, SumPooling from ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder class MLP(nn.Module): def __init__(self, feat_size: int): """Multilayer Perceptron (MLP)""" super(MLP, self).__init__() self.mlp = nn.Sequential( nn.Linear(feat_size, 2 * feat_size), nn.BatchNorm1d(2 * feat_size), nn.ReLU(), nn.Linear(2 * feat_size, feat_size), nn.BatchNorm1d(feat_size), ) def forward(self, h): return self.mlp(h) class OGBGGIN(nn.Module): def __init__( self, data_info: dict, embed_size: int = 300, num_layers: int = 5, dropout: float = 0.5, virtual_node: bool = False, ): """Graph Isomorphism Network (GIN) variant introduced in baselines for OGB graph property prediction datasets Parameters ---------- data_info : dict The information about the input dataset. embed_size : int Embedding size. num_layers : int Number of layers. dropout : float Dropout rate. virtual_node : bool Whether to use virtual node. """ super(OGBGGIN, self).__init__() self.data_info = data_info self.embed_size = embed_size self.num_layers = num_layers self.virtual_node = virtual_node if data_info["name"] in ["ogbg-molhiv", "ogbg-molpcba"]: self.node_encoder = AtomEncoder(embed_size) self.edge_encoders = nn.ModuleList( [BondEncoder(embed_size) for _ in range(num_layers)] ) else: # Handle other datasets self.node_encoder = nn.Linear( data_info["node_feat_size"], embed_size ) self.edge_encoders = nn.ModuleList( [ nn.Linear(data_info["edge_feat_size"], embed_size) for _ in range(num_layers) ] ) self.conv_layers = nn.ModuleList( [GINEConv(MLP(embed_size)) for _ in range(num_layers)] ) self.dropout = nn.Dropout(dropout) self.pool = AvgPooling() self.pred = nn.Linear(embed_size, data_info["out_size"]) if virtual_node: self.virtual_emb = nn.Embedding(1, embed_size) nn.init.constant_(self.virtual_emb.weight.data, 0) self.mlp_virtual = nn.ModuleList() for _ in range(num_layers - 1): self.mlp_virtual.append(MLP(embed_size)) self.virtual_pool = SumPooling() def forward(self, graph, node_feat, edge_feat): if self.virtual_node: virtual_emb = self.virtual_emb.weight.expand(graph.batch_size, -1) hn = self.node_encoder(node_feat) for layer in range(self.num_layers): if self.virtual_node: # messages from virtual nodes to graph nodes virtual_hn = dgl.broadcast_nodes(graph, virtual_emb) hn = hn + virtual_hn he = self.edge_encoders[layer](edge_feat) hn = self.conv_layers[layer](graph, hn, he) if layer != self.num_layers - 1: hn = F.relu(hn) hn = self.dropout(hn) if self.virtual_node and layer != self.num_layers - 1: # messages from graph nodes to virtual nodes virtual_emb_tmp = self.virtual_pool(graph, hn) + virtual_emb virtual_emb = self.mlp_virtual[layer](virtual_emb_tmp) virtual_emb = self.dropout(F.relu(virtual_emb)) hg = self.pool(graph, hn) return self.pred(hg) ================================================ FILE: dglgo/dglgo/model/graph_encoder/pna.py ================================================ from typing import List import dgl.function as fn import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from dgl.nn import AvgPooling, SumPooling from ogb.graphproppred.mol_encoder import AtomEncoder def aggregate_mean(h): """mean aggregation""" return torch.mean(h, dim=1) def aggregate_max(h): """max aggregation""" return torch.max(h, dim=1)[0] def aggregate_min(h): """min aggregation""" return torch.min(h, dim=1)[0] def aggregate_sum(h): """sum aggregation""" return torch.sum(h, dim=1) def aggregate_var(h): """variance aggregation""" h_mean_squares = torch.mean(h * h, dim=1) h_mean = torch.mean(h, dim=1) var = torch.relu(h_mean_squares - h_mean * h_mean) return var def aggregate_std(h): """standard deviation aggregation""" return torch.sqrt(aggregate_var(h) + 1e-5) AGGREGATORS = { "mean": aggregate_mean, "sum": aggregate_sum, "max": aggregate_max, "min": aggregate_min, "std": aggregate_std, "var": aggregate_var, } def scale_identity(h, D, delta): """identity scaling (no scaling operation)""" return h def scale_amplification(h, D, delta): """amplification scaling""" return h * (np.log(D + 1) / delta) def scale_attenuation(h, D, delta): """attenuation scaling""" return h * (delta / np.log(D + 1)) SCALERS = { "identity": scale_identity, "amplification": scale_amplification, "attenuation": scale_attenuation, } class MLP(nn.Module): def __init__( self, in_feat_size: int, out_feat_size: int, num_layers: int = 3, decreasing_hidden_size=False, ): """Multilayer Perceptron (MLP)""" super(MLP, self).__init__() self.layers = nn.ModuleList() if decreasing_hidden_size: for i in range(num_layers - 1): self.layers.append( nn.Linear( in_feat_size // 2**i, in_feat_size // 2 ** (i + 1) ) ) self.layers.append( nn.Linear(in_feat_size // 2 ** (num_layers - 1), out_feat_size) ) else: self.layers.append(nn.Linear(in_feat_size, out_feat_size)) for _ in range(num_layers - 1): self.layers.append(nn.Linear(out_feat_size, out_feat_size)) self.num_layers = num_layers def forward(self, h): for i, layer in enumerate(self.layers): h = layer(h) if i != self.num_layers - 1: h = F.relu(h) return h class SimplePNAConv(nn.Module): r"""A simplified PNAConv variant used in OGB submissions""" def __init__( self, feat_size: int, aggregators: List[str], scalers: List[str], delta: float, dropout: float, batch_norm: bool, residual: bool, num_mlp_layers: int, ): super(SimplePNAConv, self).__init__() self.aggregators = [AGGREGATORS[aggr] for aggr in aggregators] self.scalers = [SCALERS[scale] for scale in scalers] self.delta = delta self.mlp = MLP( in_feat_size=(len(aggregators) * len(scalers)) * feat_size, out_feat_size=feat_size, num_layers=num_mlp_layers, ) self.dropout = nn.Dropout(dropout) self.residual = residual if batch_norm: self.bn = nn.BatchNorm1d(feat_size) else: self.bn = None def reduce(self, nodes): h = nodes.mailbox["m"] D = h.shape[-2] h = torch.cat([aggregate(h) for aggregate in self.aggregators], dim=1) h = torch.cat( [scale(h, D=D, delta=self.delta) for scale in self.scalers], dim=1 ) return {"h": h} def forward(self, g, h): with g.local_scope(): g.ndata["h"] = h g.update_all(fn.copy_u("h", "m"), self.reduce) h_new = g.ndata["h"] h_new = self.mlp(h_new) if self.bn is not None: h_new = self.bn(h_new) h_new = F.relu(h_new) if self.residual: h_new = h_new + h h_new = self.dropout(h_new) return h_new class PNA(nn.Module): def __init__( self, data_info: dict, embed_size: int = 80, aggregators: str = "mean max min std", scalers: str = "identity amplification attenuation", dropout: float = 0.3, batch_norm: bool = True, residual: bool = True, num_mlp_layers: int = 1, num_layers: int = 4, readout: str = "mean", ): """Principal Neighbourhood Aggregation Parameters ---------- data_info : dict The information about the input dataset. embed_size : int Embedding size. aggregators : str Aggregation function names separated by space, can include mean, max, min, std, sum scalers : str Scaler function names separated by space, can include identity, amplification, and attenuation dropout : float Dropout rate. batch_norm : bool Whether to use batch normalization. residual : bool Whether to use residual connection. num_mlp_layers : int Number of MLP layers to use after message aggregation in each PNA layer. num_layers : int Number of PNA layers. readout : str Readout for computing graph-level representations, can be 'sum' or 'mean'. """ super(PNA, self).__init__() self.data_info = data_info self.embed_size = embed_size self.dropout = dropout self.batch_norm = batch_norm self.residual = residual self.num_mlp_layers = num_mlp_layers self.num_layers = num_layers self.readout = readout if aggregators is None: aggregators = ["mean", "max", "min", "std"] else: aggregators = [agg.strip() for agg in aggregators.split(" ")] assert set(aggregators).issubset( {"mean", "max", "min", "std", "sum"} ), "Expect aggregators to be a subset of ['mean', 'max', 'min', 'std', 'sum'], \ got {}".format( aggregators ) if scalers is None: scalers = ["identity", "amplification", "attenuation"] else: scalers = [scl.strip() for scl in scalers.split(" ")] assert set(scalers).issubset( {"identity", "amplification", "attenuation"} ), "Expect scalers to be a subset of ['identity', 'amplification', 'attenuation'], \ got {}".format( scalers ) self.aggregators = aggregators self.scalers = scalers if data_info["name"] in ["ogbg-molhiv", "ogbg-molpcba"]: self.node_encoder = AtomEncoder(embed_size) else: # Handle other datasets self.node_encoder = nn.Linear( data_info["node_feat_size"], embed_size ) self.conv_layers = nn.ModuleList( [ SimplePNAConv( feat_size=embed_size, aggregators=aggregators, scalers=scalers, delta=data_info["delta"], dropout=dropout, batch_norm=batch_norm, residual=residual, num_mlp_layers=num_mlp_layers, ) for _ in range(num_layers) ] ) if readout == "sum": self.pool = SumPooling() elif readout == "mean": self.pool = AvgPooling() else: raise ValueError( "Expect readout to be 'sum' or 'mean', got {}".format(readout) ) self.pred = MLP( embed_size, data_info["out_size"], decreasing_hidden_size=True ) def forward(self, graph, node_feat, edge_feat=None): hn = self.node_encoder(node_feat) for conv in self.conv_layers: hn = conv(graph, hn) hg = self.pool(graph, hn) return self.pred(hg) ================================================ FILE: dglgo/dglgo/model/node_encoder/__init__.py ================================================ from ...utils.factory import NodeModelFactory from .gat import GAT from .gcn import GCN from .gin import GIN from .sage import GraphSAGE from .sgc import SGC NodeModelFactory.register("gcn")(GCN) NodeModelFactory.register("gat")(GAT) NodeModelFactory.register("sage")(GraphSAGE) NodeModelFactory.register("sgc")(SGC) NodeModelFactory.register("gin")(GIN) ================================================ FILE: dglgo/dglgo/model/node_encoder/gat.py ================================================ from typing import List import torch import torch.nn as nn import torch.nn.functional as F from dgl.base import dgl_warning from dgl.nn import GATConv class GAT(nn.Module): def __init__( self, data_info: dict, embed_size: int = -1, num_layers: int = 2, hidden_size: int = 8, heads: List[int] = [8, 8], activation: str = "elu", feat_drop: float = 0.6, attn_drop: float = 0.6, negative_slope: float = 0.2, residual: bool = False, ): """Graph Attention Networks Parameters ---------- data_info : dict The information about the input dataset. embed_size : int The dimension of created embedding table. -1 means using original node embedding hidden_size : int Hidden size. num_layers : int Number of layers. norm : str GCN normalization type. Can be 'both', 'right', 'left', 'none'. activation : str Activation function. feat_drop : float Dropout rate for features. attn_drop : float Dropout rate for attentions. negative_slope : float Negative slope for leaky relu in GATConv residual : bool If true, the GATConv will use residule connection """ super(GAT, self).__init__() self.data_info = data_info self.embed_size = embed_size self.num_layers = num_layers self.gat_layers = nn.ModuleList() self.activation = getattr(torch.nn.functional, activation) if embed_size > 0: self.embed = nn.Embedding(data_info["num_nodes"], embed_size) in_size = embed_size else: in_size = data_info["in_size"] for i in range(num_layers): in_hidden = hidden_size * heads[i - 1] if i > 0 else in_size out_hidden = ( hidden_size if i < num_layers - 1 else data_info["out_size"] ) activation = None if i == num_layers - 1 else self.activation self.gat_layers.append( GATConv( in_hidden, out_hidden, heads[i], feat_drop, attn_drop, negative_slope, residual, activation, ) ) def forward(self, graph, node_feat, edge_feat=None): if self.embed_size > 0: dgl_warning( "The embedding for node feature is used, and input node_feat is ignored, due to the provided embed_size." ) h = self.embed.weight else: h = node_feat for l in range(self.num_layers - 1): h = self.gat_layers[l](graph, h).flatten(1) # output projection logits = self.gat_layers[-1](graph, h).mean(1) return logits def forward_block(self, blocks, node_feat, edge_feat=None): h = node_feat for l in range(self.num_layers - 1): h = self.gat_layers[l](blocks[l], h).flatten(1) logits = self.gat_layers[-1](blocks[-1], h).mean(1) return logits ================================================ FILE: dglgo/dglgo/model/node_encoder/gcn.py ================================================ import dgl import torch import torch.nn as nn from dgl.base import dgl_warning class GCN(nn.Module): def __init__( self, data_info: dict, embed_size: int = -1, hidden_size: int = 16, num_layers: int = 1, norm: str = "both", activation: str = "relu", dropout: float = 0.5, use_edge_weight: bool = False, ): """Graph Convolutional Networks Parameters ---------- data_info : dict The information about the input dataset. embed_size : int The dimension of created embedding table. -1 means using original node embedding hidden_size : int Hidden size. num_layers : int Number of layers. norm : str GCN normalization type. Can be 'both', 'right', 'left', 'none'. activation : str Activation function. dropout : float Dropout rate. use_edge_weight : bool If true, scale the messages by edge weights. """ super().__init__() self.use_edge_weight = use_edge_weight self.data_info = data_info self.embed_size = embed_size self.layers = nn.ModuleList() if embed_size > 0: self.embed = nn.Embedding(data_info["num_nodes"], embed_size) in_size = embed_size else: in_size = data_info["in_size"] for i in range(num_layers): in_hidden = hidden_size if i > 0 else in_size out_hidden = ( hidden_size if i < num_layers - 1 else data_info["out_size"] ) self.layers.append( dgl.nn.GraphConv( in_hidden, out_hidden, norm=norm, allow_zero_in_degree=True ) ) self.dropout = nn.Dropout(p=dropout) self.act = getattr(torch, activation) def forward(self, g, node_feat, edge_feat=None): if self.embed_size > 0: dgl_warning( "The embedding for node feature is used, and input node_feat is ignored, due to the provided embed_size." ) h = self.embed.weight else: h = node_feat edge_weight = edge_feat if self.use_edge_weight else None for l, layer in enumerate(self.layers): h = layer(g, h, edge_weight=edge_weight) if l != len(self.layers) - 1: h = self.act(h) h = self.dropout(h) return h def forward_block(self, blocks, node_feat, edge_feat=None): h = node_feat edge_weight = edge_feat if self.use_edge_weight else None for l, (layer, block) in enumerate(zip(self.layers, blocks)): h = layer(block, h, edge_weight=edge_weight) if l != len(self.layers) - 1: h = self.act(h) h = self.dropout(h) return h ================================================ FILE: dglgo/dglgo/model/node_encoder/gin.py ================================================ import torch.nn as nn from dgl.base import dgl_warning from dgl.nn import GINConv class GIN(nn.Module): def __init__( self, data_info: dict, embed_size: int = -1, hidden_size=64, num_layers=3, aggregator_type="sum", ): """Graph Isomophism Networks Edge feature is ignored in this model. Parameters ---------- data_info : dict The information about the input dataset. embed_size : int The dimension of created embedding table. -1 means using original node embedding hidden_size : int Hidden size. num_layers : int Number of layers. aggregator_type : str Aggregator type to use (``sum``, ``max`` or ``mean``), default: 'sum'. """ super().__init__() self.data_info = data_info self.embed_size = embed_size self.conv_list = nn.ModuleList() self.num_layers = num_layers if embed_size > 0: self.embed = nn.Embedding(data_info["num_nodes"], embed_size) in_size = embed_size else: in_size = data_info["in_size"] for i in range(num_layers): input_dim = in_size if i == 0 else hidden_size mlp = nn.Sequential( nn.Linear(input_dim, hidden_size), nn.BatchNorm1d(hidden_size), nn.ReLU(), nn.Linear(hidden_size, hidden_size), nn.ReLU(), ) self.conv_list.append(GINConv(mlp, aggregator_type, 1e-5, True)) self.out_mlp = nn.Linear(hidden_size, data_info["out_size"]) def forward(self, graph, node_feat, edge_feat=None): if self.embed_size > 0: dgl_warning( "The embedding for node feature is used, and input node_feat is ignored, due to the provided embed_size." ) h = self.embed.weight else: h = node_feat for i in range(self.num_layers): h = self.conv_list[i](graph, h) h = self.out_mlp(h) return h ================================================ FILE: dglgo/dglgo/model/node_encoder/sage.py ================================================ import dgl import torch.nn as nn from dgl.base import dgl_warning class GraphSAGE(nn.Module): def __init__( self, data_info: dict, embed_size: int = -1, hidden_size: int = 16, num_layers: int = 1, activation: str = "relu", dropout: float = 0.5, aggregator_type: str = "gcn", ): """GraphSAGE model Parameters ---------- data_info : dict The information about the input dataset. embed_size : int The dimension of created embedding table. -1 means using original node embedding hidden_size : int Hidden size. num_layers : int Number of hidden layers. dropout : float Dropout rate. activation : str Activation function name under torch.nn.functional aggregator_type : str Aggregator type to use (``mean``, ``gcn``, ``pool``, ``lstm``). """ super(GraphSAGE, self).__init__() self.data_info = data_info self.embed_size = embed_size if embed_size > 0: self.embed = nn.Embedding(data_info["num_nodes"], embed_size) in_size = embed_size else: in_size = data_info["in_size"] self.layers = nn.ModuleList() self.dropout = nn.Dropout(dropout) self.activation = getattr(nn.functional, activation) for i in range(num_layers): in_hidden = hidden_size if i > 0 else in_size out_hidden = ( hidden_size if i < num_layers - 1 else data_info["out_size"] ) self.layers.append( dgl.nn.SAGEConv(in_hidden, out_hidden, aggregator_type) ) def forward(self, graph, node_feat, edge_feat=None): if self.embed_size > 0: dgl_warning( "The embedding for node feature is used, and input node_feat is ignored, due to the provided embed_size." ) h = self.embed.weight else: h = node_feat h = self.dropout(h) for l, layer in enumerate(self.layers): h = layer(graph, h, edge_feat) if l != len(self.layers) - 1: h = self.activation(h) h = self.dropout(h) return h def forward_block(self, blocks, node_feat, edge_feat=None): h = node_feat for l, (layer, block) in enumerate(zip(self.layers, blocks)): h = layer(block, h, edge_feat) if l != len(self.layers) - 1: h = self.activation(h) h = self.dropout(h) return h ================================================ FILE: dglgo/dglgo/model/node_encoder/sgc.py ================================================ import dgl.function as fn import torch.nn as nn import torch.nn.functional as F from dgl.base import dgl_warning from dgl.nn import SGConv class SGC(nn.Module): def __init__(self, data_info: dict, embed_size: int = -1, bias=True, k=2): """Simplifying Graph Convolutional Networks Edge feature is ignored in this model. Parameters ---------- data_info : dict The information about the input dataset. embed_size : int The dimension of created embedding table. -1 means using original node embedding bias : bool If True, adds a learnable bias to the output. Default: ``True``. k : int Number of hops :math:`K`. Defaults:``1``. """ super().__init__() self.data_info = data_info self.out_size = data_info["out_size"] self.embed_size = embed_size if embed_size > 0: self.embed = nn.Embedding(data_info["num_nodes"], embed_size) in_size = embed_size else: in_size = data_info["in_size"] self.sgc = SGConv( in_size, self.out_size, k=k, cached=True, bias=bias, norm=self.normalize, ) def forward(self, g, node_feat, edge_feat=None): if self.embed_size > 0: dgl_warning( "The embedding for node feature is used, and input node_feat is ignored, due to the provided embed_size." ) h = self.embed.weight else: h = node_feat return self.sgc(g, h) @staticmethod def normalize(h): return (h - h.mean(0)) / (h.std(0) + 1e-5) ================================================ FILE: dglgo/dglgo/pipeline/__init__.py ================================================ from .graphpred import GraphpredPipeline from .linkpred import LinkpredPipeline from .nodepred import NodepredPipeline from .nodepred_sample import NodepredNsPipeline ================================================ FILE: dglgo/dglgo/pipeline/graphpred/__init__.py ================================================ from .gen import * ================================================ FILE: dglgo/dglgo/pipeline/graphpred/gen.py ================================================ import copy from pathlib import Path from typing import Optional import ruamel.yaml import typer from jinja2 import Template from pydantic import BaseModel, Field from ...utils.factory import ( DataFactory, GraphModelFactory, PipelineBase, PipelineFactory, ) from ...utils.yaml_dump import deep_convert_dict, merge_comment pipeline_comments = { "num_runs": "Number of experiments to run", "train_batch_size": "Graph batch size when training", "eval_batch_size": "Graph batch size when evaluating", "num_workers": "Number of workers for data loading", "num_epochs": "Number of training epochs", "save_path": "Directory to save the experiment results", } class GraphpredPipelineCfg(BaseModel): num_runs: int = 1 train_batch_size: int = 32 eval_batch_size: int = 32 num_workers: int = 4 optimizer: dict = {"name": "Adam", "lr": 0.001, "weight_decay": 0} # Default to no lr decay lr_scheduler: dict = {"name": "StepLR", "step_size": 100, "gamma": 1} loss: str = "BCEWithLogitsLoss" metric: str = "roc_auc_score" num_epochs: int = 100 save_path: str = "results" @PipelineFactory.register("graphpred") class GraphpredPipeline(PipelineBase): def __init__(self): self.pipeline = {"name": "graphpred", "mode": "train"} @classmethod def setup_user_cfg_cls(cls): from ...utils.enter_config import UserConfig class GraphPredUserConfig(UserConfig): data: DataFactory.filter("graphpred").get_pydantic_config() = Field( ..., discriminator="name" ) model: GraphModelFactory.get_pydantic_model_config() = Field( ..., discriminator="name" ) general_pipeline: GraphpredPipelineCfg = GraphpredPipelineCfg() cls.user_cfg_cls = GraphPredUserConfig @property def user_cfg_cls(self): return self.__class__.user_cfg_cls def get_cfg_func(self): def config( data: DataFactory.filter( "graphpred" ).get_dataset_enum() = typer.Option(..., help="input data name"), cfg: Optional[str] = typer.Option( None, help="output configuration path" ), model: GraphModelFactory.get_model_enum() = typer.Option( ..., help="Model name" ), ): self.__class__.setup_user_cfg_cls() generated_cfg = { "pipeline_name": self.pipeline["name"], "pipeline_mode": self.pipeline["mode"], "device": "cpu", "data": {"name": data.name}, "model": {"name": model.value}, "general_pipeline": {}, } output_cfg = self.user_cfg_cls(**generated_cfg).dict() output_cfg = deep_convert_dict(output_cfg) comment_dict = { "device": "Torch device name, e.g., cpu or cuda or cuda:0", "data": { "split_ratio": "Ratio to generate data split, for example set to [0.8, 0.1, 0.1] for 80% train/10% val/10% test. Leave blank to use builtin split in original dataset" }, "general_pipeline": pipeline_comments, "model": GraphModelFactory.get_constructor_doc_dict( model.value ), } comment_dict = merge_comment(output_cfg, comment_dict) yaml = ruamel.yaml.YAML() if cfg is None: cfg = "_".join(["graphpred", data.value, model.value]) + ".yaml" yaml.dump(comment_dict, Path(cfg).open("w")) print( "Configuration file is generated at {}".format( Path(cfg).absolute() ) ) return config @classmethod def gen_script(cls, user_cfg_dict): cls.setup_user_cfg_cls() file_current_dir = Path(__file__).resolve().parent with open(file_current_dir / "graphpred.jinja-py", "r") as f: template = Template(f.read()) render_cfg = copy.deepcopy(user_cfg_dict) model_code = GraphModelFactory.get_source_code( user_cfg_dict["model"]["name"] ) render_cfg["model_code"] = model_code render_cfg["model_class_name"] = GraphModelFactory.get_model_class_name( user_cfg_dict["model"]["name"] ) render_cfg.update( DataFactory.get_generated_code_dict( user_cfg_dict["data"]["name"], '**cfg["data"]' ) ) generated_user_cfg = copy.deepcopy(user_cfg_dict) if "split_ratio" in generated_user_cfg["data"]: generated_user_cfg["data"].pop("split_ratio") generated_user_cfg["data_name"] = generated_user_cfg["data"].pop("name") generated_user_cfg.pop("pipeline_name") generated_user_cfg.pop("pipeline_mode") generated_user_cfg["model_name"] = generated_user_cfg["model"].pop( "name" ) generated_user_cfg["general_pipeline"]["optimizer"].pop("name") generated_user_cfg["general_pipeline"]["lr_scheduler"].pop("name") generated_train_cfg = copy.deepcopy(user_cfg_dict["general_pipeline"]) generated_train_cfg["optimizer"].pop("name") generated_train_cfg["lr_scheduler"].pop("name") if user_cfg_dict["data"].get("split_ratio", None) is not None: render_cfg["data_initialize_code"] = "{}, split_ratio={}".format( render_cfg["data_initialize_code"], user_cfg_dict["data"]["split_ratio"], ) render_cfg["user_cfg_str"] = f"cfg = {str(generated_user_cfg)}" render_cfg["user_cfg"] = user_cfg_dict return template.render(**render_cfg) @staticmethod def get_description() -> str: return "Graph property prediction pipeline on binary classification" ================================================ FILE: dglgo/dglgo/pipeline/graphpred/graphpred.jinja-py ================================================ import numpy as np import sklearn import torch import torch.nn as nn import os from torch.optim.lr_scheduler import ReduceLROnPlateau from tqdm import tqdm from dgl.data import AsGraphPredDataset from dgl.dataloading import GraphDataLoader {{ data_import_code }} {{ model_code }} def train(device, loader, model, criterion, optimizer): model.train() for _, (g, labels) in enumerate(tqdm(loader, desc="Iteration")): g = g.to(device) labels = labels.to(device) node_feat = g.ndata['feat'] edge_feat = g.edata['feat'] pred = model(g, node_feat, edge_feat) optimizer.zero_grad() # ignore nan targets (unlabeled) when computing training loss is_labeled = labels == labels loss = criterion(pred.float()[is_labeled], labels.float()[is_labeled]) loss.backward() optimizer.step() def calc_metric(y_true, y_pred): task_metric_list = [] for i in range(y_true.shape[1]): # AUC is only defined when there is at least one positive and negative datapoint. if np.sum(y_true[:, i] == 1) > 0 and np.sum(y_true[:, i] == 0) > 0: # ignore nan values is_labeled = y_true[:,i] == y_true[:,i] task_metric = sklearn.metrics.{{ user_cfg.general_pipeline.metric }}( y_true[is_labeled, i], y_pred[is_labeled, i]) task_metric_list.append(task_metric) return sum(task_metric_list) / len(task_metric_list) def evaluate(device, loader, model): model.eval() y_true = [] y_pred = [] for _, (g, labels) in enumerate(tqdm(loader, desc="Iteration")): g = g.to(device) labels = labels.to(device) node_feat = g.ndata['feat'] edge_feat = g.edata['feat'] with torch.no_grad(): pred = model(g, node_feat, edge_feat) y_true.append(labels.view(pred.shape).detach().cpu()) y_pred.append(pred.detach().cpu()) y_true = torch.cat(y_true, dim=0).numpy() y_pred = torch.cat(y_pred, dim=0).numpy() return calc_metric(y_true, y_pred) def main(run, cfg, data): device = cfg['device'] pipeline_cfg = cfg['general_pipeline'] train_loader = GraphDataLoader(data[data.train_idx], batch_size=pipeline_cfg['train_batch_size'], shuffle=True, num_workers=pipeline_cfg['num_workers']) val_loader = GraphDataLoader(data[data.val_idx], batch_size=pipeline_cfg['eval_batch_size'], shuffle=False, num_workers=pipeline_cfg['num_workers']) test_loader = GraphDataLoader(data[data.test_idx], batch_size=pipeline_cfg['eval_batch_size'], shuffle=False, num_workers=pipeline_cfg['num_workers']) # create model model = {{ model_class_name }}(**cfg["model"]) model = model.to(device) criterion = nn.{{ user_cfg.general_pipeline.loss }}() optimizer = torch.optim.{{ user_cfg.general_pipeline.optimizer.name }}( model.parameters(), **pipeline_cfg["optimizer"]) lr_scheduler = torch.optim.lr_scheduler.{{ user_cfg.general_pipeline.lr_scheduler.name }}( optimizer, **pipeline_cfg["lr_scheduler"]) best_val_metric = 0. tmp_cpt_path = 'checkpoint.pth' for epoch in range(pipeline_cfg['num_epochs']): train(device, train_loader, model, criterion, optimizer) val_metric = evaluate(device, val_loader, model) if val_metric >= best_val_metric: best_val_metric = val_metric torch.save(model.state_dict(), tmp_cpt_path) print('Run {:d} | Epoch {:d} | Val Metric {:.4f} | Best Val Metric {:.4f}'.format( run, epoch, val_metric, best_val_metric)) if isinstance(lr_scheduler, ReduceLROnPlateau): lr_scheduler.step(val_metric) else: lr_scheduler.step() model.load_state_dict(torch.load(tmp_cpt_path, weights_only=False)) os.remove(tmp_cpt_path) test_metric = evaluate(device, test_loader, model) print('Test Metric: {:.4f}'.format(test_metric)) cpt_path = os.path.join(pipeline_cfg["save_path"], 'run_{}.pth'.format(run)) torch.save({'cfg': cfg, 'model': model.state_dict()}, cpt_path) print('Saved training checkpoint to {}'.format(cpt_path)) return test_metric if __name__ == '__main__': {{ user_cfg_str }} if not torch.cuda.is_available(): cfg['device'] = 'cpu' # load data data = AsGraphPredDataset({{data_initialize_code}}) cfg["model"]["data_info"] = { "name": cfg["data_name"], "node_feat_size": data.node_feat_size, "edge_feat_size": data.edge_feat_size, "out_size": data.num_tasks } if cfg["model_name"] == 'pna': in_deg = torch.cat([g.in_degrees() for (g, _) in data[data.train_idx]]) cfg["model"]["data_info"]["delta"] = torch.mean(torch.log(in_deg + 1)).item() os.makedirs(cfg['general_pipeline']["save_path"]) all_run_metrics = [] num_runs = {{ user_cfg.general_pipeline.num_runs }} for run in range(num_runs): print('Run experiment {:d}'.format(run)) test_metric = main(run, cfg, data) all_run_metrics.append(test_metric) avg_metric = np.round(np.mean(all_run_metrics), 6) std_metric = np.round(np.std(all_run_metrics), 6) print('Test Metric across {:d} runs: {:.6f} ± {:.6f}'.format( num_runs, avg_metric, std_metric)) ================================================ FILE: dglgo/dglgo/pipeline/linkpred/__init__.py ================================================ from .gen import * ================================================ FILE: dglgo/dglgo/pipeline/linkpred/gen.py ================================================ import copy from pathlib import Path from typing import Optional import ruamel.yaml import typer import yaml from jinja2 import Template from pydantic import BaseModel, Field from ruamel.yaml.comments import CommentedMap from ...utils.base_model import DeviceEnum, EarlyStopConfig from ...utils.factory import ( DataFactory, EdgeModelFactory, NegativeSamplerFactory, NodeModelFactory, PipelineBase, PipelineFactory, ) from ...utils.yaml_dump import deep_convert_dict, merge_comment class LinkpredPipelineCfg(BaseModel): hidden_size: int = 256 eval_batch_size: int = 32769 train_batch_size: int = 32769 num_epochs: int = 200 eval_period: int = 5 optimizer: dict = {"name": "Adam", "lr": 0.005} loss: str = "BCELoss" save_path: str = "results" num_runs: int = 1 pipeline_comments = { "hidden_size": "The intermediate hidden size between node model and edge model", "eval_batch_size": "Edge batch size when evaluating", "train_batch_size": "Edge batch size when training", "num_epochs": "Number of training epochs", "eval_period": "Interval epochs between evaluations", "save_path": "Directory to save the experiment results", "num_runs": "Number of experiments to run", } @PipelineFactory.register("linkpred") class LinkpredPipeline(PipelineBase): user_cfg_cls = None pipeline_name = "linkpred" def __init__(self): self.pipeline = {"name": "linkpred", "mode": "train"} @classmethod def setup_user_cfg_cls(cls): from ...utils.enter_config import UserConfig class LinkPredUserConfig(UserConfig): data: DataFactory.filter("linkpred").get_pydantic_config() = Field( ..., discriminator="name" ) node_model: NodeModelFactory.get_pydantic_model_config() = Field( ..., discriminator="name" ) edge_model: EdgeModelFactory.get_pydantic_model_config() = Field( ..., discriminator="name" ) neg_sampler: NegativeSamplerFactory.get_pydantic_model_config() = ( Field(..., discriminator="name") ) general_pipeline: LinkpredPipelineCfg = LinkpredPipelineCfg() cls.user_cfg_cls = LinkPredUserConfig @property def user_cfg_cls(self): return self.__class__.user_cfg_cls def get_cfg_func(self): def config( data: DataFactory.filter( "linkpred" ).get_dataset_enum() = typer.Option(..., help="input data name"), cfg: str = typer.Option( "cfg.yaml", help="output configuration path" ), node_model: NodeModelFactory.get_model_enum() = typer.Option( ..., help="Model name" ), edge_model: EdgeModelFactory.get_model_enum() = typer.Option( ..., help="Model name" ), neg_sampler: NegativeSamplerFactory.get_model_enum() = typer.Option( "persource", help="Negative sampler name" ), ): self.__class__.setup_user_cfg_cls() generated_cfg = { "pipeline_name": self.pipeline["name"], "pipeline_mode": self.pipeline["mode"], "device": "cpu", "data": {"name": data.name}, "neg_sampler": {"name": neg_sampler.value}, "node_model": {"name": node_model.value}, "edge_model": {"name": edge_model.value}, } output_cfg = self.user_cfg_cls(**generated_cfg).dict() output_cfg = deep_convert_dict(output_cfg) comment_dict = { "device": "Torch device name, e.g., cpu or cuda or cuda:0", "general_pipeline": pipeline_comments, "node_model": NodeModelFactory.get_constructor_doc_dict( node_model.value ), "edge_model": EdgeModelFactory.get_constructor_doc_dict( edge_model.value ), "neg_sampler": NegativeSamplerFactory.get_constructor_doc_dict( neg_sampler.value ), "data": { "split_ratio": "List of float, e.q. [0.8, 0.1, 0.1]. Split ratios for training, validation and test sets. Must sum to one. Leave blank to use builtin split in original dataset", "neg_ratio": "Int, e.q. 2. Indicate how much negative samples to be sampled per positive samples. Leave blank to use builtin split in original dataset", }, } comment_dict = merge_comment(output_cfg, comment_dict) if cfg is None: cfg = ( "_".join( [ "linkpred", data.value, node_model.value, edge_model.value, ] ) + ".yaml" ) yaml = ruamel.yaml.YAML() yaml.dump(comment_dict, Path(cfg).open("w")) print( "Configuration file is generated at {}".format( Path(cfg).absolute() ) ) return config @classmethod def gen_script(cls, user_cfg_dict): cls.setup_user_cfg_cls() # Check validation user_cfg = cls.user_cfg_cls(**user_cfg_dict) file_current_dir = Path(__file__).resolve().parent with open(file_current_dir / "linkpred.jinja-py", "r") as f: template = Template(f.read()) render_cfg = copy.deepcopy(user_cfg_dict) render_cfg["node_model_code"] = NodeModelFactory.get_source_code( user_cfg_dict["node_model"]["name"] ) render_cfg["edge_model_code"] = EdgeModelFactory.get_source_code( user_cfg_dict["edge_model"]["name"] ) render_cfg[ "node_model_class_name" ] = NodeModelFactory.get_model_class_name( user_cfg_dict["node_model"]["name"] ) render_cfg[ "edge_model_class_name" ] = EdgeModelFactory.get_model_class_name( user_cfg_dict["edge_model"]["name"] ) render_cfg[ "neg_sampler_name" ] = NegativeSamplerFactory.get_model_class_name( user_cfg_dict["neg_sampler"]["name"] ) render_cfg["loss"] = user_cfg_dict["general_pipeline"]["loss"] # update import and initialization code render_cfg.update( DataFactory.get_generated_code_dict( user_cfg_dict["data"]["name"], '**cfg["data"]' ) ) generated_user_cfg = copy.deepcopy(user_cfg_dict) if len(generated_user_cfg["data"]) == 1: generated_user_cfg.pop("data") else: generated_user_cfg["data"].pop("name") generated_user_cfg.pop("pipeline_name") generated_user_cfg.pop("pipeline_mode") generated_user_cfg["node_model"].pop("name") generated_user_cfg["edge_model"].pop("name") generated_user_cfg["neg_sampler"].pop("name") generated_user_cfg["general_pipeline"]["optimizer"].pop("name") generated_user_cfg["general_pipeline"].pop("loss") generated_train_cfg = copy.deepcopy(user_cfg_dict["general_pipeline"]) generated_train_cfg["optimizer"].pop("name") if user_cfg_dict["data"].get("split_ratio", None) is not None: assert ( user_cfg_dict["data"].get("neg_ratio", None) is not None ), "Please specify both split_ratio and neg_ratio" render_cfg[ "data_initialize_code" ] = "{}, split_ratio={}, neg_ratio={}".format( render_cfg["data_initialize_code"], user_cfg_dict["data"]["split_ratio"], user_cfg_dict["data"]["neg_ratio"], ) generated_user_cfg["data"].pop("split_ratio") generated_user_cfg["data"].pop("neg_ratio") render_cfg["user_cfg_str"] = f"cfg = {str(generated_user_cfg)}" render_cfg["user_cfg"] = user_cfg_dict return template.render(**render_cfg) @staticmethod def get_description() -> str: return "Link prediction pipeline" ================================================ FILE: dglgo/dglgo/pipeline/linkpred/linkpred.jinja-py ================================================ import numpy as np import torch import torch.nn as nn import torch.nn.functional as F import dgl import os from torch.utils.data import DataLoader from dgl.data import AsLinkPredDataset {{ data_import_code }} {{ node_model_code}} {{ edge_model_code }} class Model(nn.Module): def __init__(self, node_model, edge_model, neg_sampler, eval_batch_size): super().__init__() self.node_model = node_model self.edge_model = edge_model self.neg_sampler = neg_sampler self.eval_batch_size = eval_batch_size def inference(self, g, x, edges): src, dst = edges h = self.node_model(g, x) eid_dataloader = DataLoader( range( src.shape[-1]), batch_size=self.eval_batch_size) score_list = [] for eids in eid_dataloader: score = self.edge_model(h[src[eids]], h[dst[eids]]) score_list.append(score) return torch.cat(score_list, dim=0) def calc_hitsk(y_pred_pos, y_pred_neg, k): kth_score_in_negative_edges = torch.topk(y_pred_neg.flatten(), k)[0][-1] hitsK = (y_pred_pos > kth_score_in_negative_edges).float().mean() return hitsK.item() def train(cfg, pipeline_cfg, device, data, model, optimizer, loss_fcn): train_g = data.train_graph train_g = train_g.to(device) node_feat = train_g.ndata['feat'] train_src, train_dst = train_g.edges() for epoch in range(pipeline_cfg['num_epochs']): model.train() eid_dataloader = DataLoader(range(train_g.num_edges()), batch_size = pipeline_cfg["train_batch_size"], shuffle=True) for eids in eid_dataloader: h = model.node_model(train_g, node_feat) eids = eids.to(device) src, dst = train_src[eids], train_dst[eids] pos_score = model.edge_model(h[src], h[dst]) neg_src, neg_dst = model.neg_sampler(train_g, eids) neg_score = model.edge_model(h[neg_src], h[neg_dst]) loss = loss_fcn(torch.cat([pos_score, neg_score]), torch.cat( [torch.ones_like(pos_score), torch.zeros_like(neg_score)])) optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) optimizer.step() with torch.no_grad(): model.eval() val_neg_edges = data.val_edges[1] val_neg_score = model.inference(train_g, node_feat, val_neg_edges) train_hits = calc_hitsk(pos_score, val_neg_score, k=50) print("Epoch {:05d} | Loss {:.4f} | Train Hits@50 {:.4f}".format(epoch, loss, train_hits)) if epoch != 0 and epoch % pipeline_cfg['eval_period'] == 0: with torch.no_grad(): model.eval() val_pos_edge, val_neg_edges = data.val_edges pos_result = model.inference(train_g, node_feat, val_pos_edge) neg_result = model.inference(train_g, node_feat, val_neg_edges) val_hits = calc_hitsk(pos_result, neg_result, k=50) print("Epoch {:05d} | Val Hits@50 {:.4f}".format(epoch, val_hits)) with torch.no_grad(): model.eval() test_pos_edge, test_neg_edges = data.test_edges pos_result = model.inference(train_g, node_feat, test_pos_edge) neg_result = model.inference(train_g, node_feat, test_neg_edges) test_hits = calc_hitsk(pos_result, neg_result, k=50) print("Test Hits@50 {:.4f}".format(test_hits)) return test_hits def main(run, cfg, data): device = cfg['device'] pipeline_cfg = cfg['general_pipeline'] node_model = {{node_model_class_name}}(**cfg["node_model"]) edge_model = {{edge_model_class_name}}(**cfg["edge_model"]) neg_sampler = dgl.dataloading.negative_sampler.{{ neg_sampler_name }}(**cfg["neg_sampler"]) model = Model(node_model, edge_model, neg_sampler, pipeline_cfg["eval_batch_size"]) model = model.to(device) loss = torch.nn.{{ loss }}() optimizer = torch.optim.Adam(model.parameters(), **pipeline_cfg["optimizer"]) test_hits = train(cfg, pipeline_cfg, device, data, model, optimizer, loss) cpt_path = os.path.join(pipeline_cfg["save_path"], 'run_{}.pth'.format(run)) torch.save({'cfg': cfg, 'model': model.state_dict()}, cpt_path) print('Saved training checkpoint to {}'.format(cpt_path)) return test_hits if __name__ == '__main__': {{user_cfg_str}} if not torch.cuda.is_available(): cfg['device'] = 'cpu' # load data data = AsLinkPredDataset({{ data_initialize_code }}) nmodel_cfg = cfg["node_model"] pipeline_cfg = cfg['general_pipeline'] if 'feat' not in data[0].ndata: assert nmodel_cfg["embed_size"] > 0, "Need to specify embed size if graph doesn't have feat in ndata" cfg["node_model"]["data_info"] = { "in_size": nmodel_cfg['embed_size'] if nmodel_cfg['embed_size'] > 0 else data[0].ndata['feat'].shape[1], "out_size": pipeline_cfg['hidden_size'], "num_nodes": data[0].num_nodes() } cfg["edge_model"]["data_info"] = { "in_size": pipeline_cfg['hidden_size'], "out_size": 1 # output each edge score } os.makedirs(pipeline_cfg["save_path"]) all_acc = [] num_runs = {{ user_cfg.general_pipeline.num_runs }} for run in range(num_runs): print(f'Run experiment #{run}') test_acc = main(run, cfg, data) print("Test Hits@50 {:.4f}".format(test_acc)) all_acc.append(test_acc) avg_acc = np.round(np.mean(all_acc), 6) std_acc = np.round(np.std(all_acc), 6) print(f'Test Hits@50 across {num_runs} runs: {avg_acc} ± {std_acc}') ================================================ FILE: dglgo/dglgo/pipeline/nodepred/__init__.py ================================================ from .gen import * ================================================ FILE: dglgo/dglgo/pipeline/nodepred/gen.py ================================================ import copy from pathlib import Path from typing import Optional import ruamel.yaml import typer import yaml from jinja2 import Template from pydantic import BaseModel, Field from ruamel.yaml.comments import CommentedMap from ...utils.base_model import DeviceEnum, EarlyStopConfig from ...utils.factory import ( DataFactory, NodeModelFactory, PipelineBase, PipelineFactory, ) from ...utils.yaml_dump import deep_convert_dict, merge_comment pipeline_comments = { "num_epochs": "Number of training epochs", "eval_period": "Interval epochs between evaluations", "early_stop": { "patience": "Steps before early stop", "checkpoint_path": "Early stop checkpoint model file path", }, "save_path": "Directory to save the experiment results", "num_runs": "Number of experiments to run", } class NodepredPipelineCfg(BaseModel): early_stop: Optional[EarlyStopConfig] = EarlyStopConfig() num_epochs: int = 200 eval_period: int = 5 optimizer: dict = {"name": "Adam", "lr": 0.01, "weight_decay": 5e-4} loss: str = "CrossEntropyLoss" save_path: str = "results" num_runs: int = 1 @PipelineFactory.register("nodepred") class NodepredPipeline(PipelineBase): user_cfg_cls = None def __init__(self): self.pipeline = {"name": "nodepred", "mode": "train"} @classmethod def setup_user_cfg_cls(cls): from ...utils.enter_config import UserConfig class NodePredUserConfig(UserConfig): data: DataFactory.filter("nodepred").get_pydantic_config() = Field( ..., discriminator="name" ) model: NodeModelFactory.get_pydantic_model_config() = Field( ..., discriminator="name" ) general_pipeline: NodepredPipelineCfg = NodepredPipelineCfg() cls.user_cfg_cls = NodePredUserConfig @property def user_cfg_cls(self): return self.__class__.user_cfg_cls def get_cfg_func(self): def config( data: DataFactory.filter( "nodepred" ).get_dataset_enum() = typer.Option(..., help="input data name"), cfg: Optional[str] = typer.Option( None, help="output configuration path" ), model: NodeModelFactory.get_model_enum() = typer.Option( ..., help="Model name" ), ): self.__class__.setup_user_cfg_cls() generated_cfg = { "pipeline_name": self.pipeline["name"], "pipeline_mode": self.pipeline["mode"], "device": "cpu", "data": {"name": data.name}, "model": {"name": model.value}, "general_pipeline": {}, } output_cfg = self.user_cfg_cls(**generated_cfg).dict() output_cfg = deep_convert_dict(output_cfg) comment_dict = { "device": "Torch device name, e.g., cpu or cuda or cuda:0", "data": { "split_ratio": "Ratio to generate split masks, for example set to [0.8, 0.1, 0.1] for 80% train/10% val/10% test. Leave blank to use builtin split in original dataset" }, "general_pipeline": pipeline_comments, "model": NodeModelFactory.get_constructor_doc_dict(model.value), } comment_dict = merge_comment(output_cfg, comment_dict) yaml = ruamel.yaml.YAML() if cfg is None: cfg = "_".join(["nodepred", data.value, model.value]) + ".yaml" yaml.dump(comment_dict, Path(cfg).open("w")) print( "Configuration file is generated at {}".format( Path(cfg).absolute() ) ) return config @classmethod def gen_script(cls, user_cfg_dict): # Check validation cls.setup_user_cfg_cls() user_cfg = cls.user_cfg_cls(**user_cfg_dict) file_current_dir = Path(__file__).resolve().parent with open(file_current_dir / "nodepred.jinja-py", "r") as f: template = Template(f.read()) render_cfg = copy.deepcopy(user_cfg_dict) model_code = NodeModelFactory.get_source_code( user_cfg_dict["model"]["name"] ) render_cfg["model_code"] = model_code render_cfg["model_class_name"] = NodeModelFactory.get_model_class_name( user_cfg_dict["model"]["name"] ) render_cfg.update( DataFactory.get_generated_code_dict( user_cfg_dict["data"]["name"], '**cfg["data"]' ) ) generated_user_cfg = copy.deepcopy(user_cfg_dict) if "split_ratio" in generated_user_cfg["data"]: generated_user_cfg["data"].pop("split_ratio") generated_user_cfg["data_name"] = generated_user_cfg["data"].pop("name") generated_user_cfg.pop("pipeline_name") generated_user_cfg.pop("pipeline_mode") generated_user_cfg["model_name"] = generated_user_cfg["model"].pop( "name" ) generated_user_cfg["general_pipeline"]["optimizer"].pop("name") generated_train_cfg = copy.deepcopy(user_cfg_dict["general_pipeline"]) generated_train_cfg["optimizer"].pop("name") if user_cfg_dict["data"].get("split_ratio", None) is not None: render_cfg["data_initialize_code"] = "{}, split_ratio={}".format( render_cfg["data_initialize_code"], user_cfg_dict["data"]["split_ratio"], ) render_cfg["user_cfg_str"] = f"cfg = {str(generated_user_cfg)}" render_cfg["user_cfg"] = user_cfg_dict return template.render(**render_cfg) @staticmethod def get_description() -> str: return "Node classification pipeline for training" ================================================ FILE: dglgo/dglgo/pipeline/nodepred/nodepred.jinja-py ================================================ import numpy as np import torch import torch.nn as nn import torch.nn.functional as F import dgl import os from dgl.data import AsNodePredDataset {{ data_import_code }} {{ model_code }} {% if user_cfg.general_pipeline.early_stop %} class EarlyStopping: def __init__(self, patience: int = -1, checkpoint_path: str = 'checkpoint.pth'): self.patience = patience self.checkpoint_path = checkpoint_path self.counter = 0 self.best_score = None self.early_stop = False def step(self, acc, model): score = acc if self.best_score is None: self.best_score = score self.save_checkpoint(model) elif score < self.best_score: self.counter += 1 print(f'EarlyStopping counter: {self.counter} out of {self.patience}') if self.counter >= self.patience: self.early_stop = True else: self.best_score = score self.save_checkpoint(model) self.counter = 0 return self.early_stop def save_checkpoint(self, model): '''Save model when validation loss decreases.''' torch.save(model.state_dict(), self.checkpoint_path) def load_checkpoint(self, model): model.load_state_dict(torch.load(self.checkpoint_path, weights_only=False)) def close(self): os.remove(self.checkpoint_path) {% endif %} def accuracy(logits, labels): _, indices = torch.max(logits, dim=1) correct = torch.sum(indices == labels) return correct.item() * 1.0 / len(labels) def train(cfg, pipeline_cfg, device, data, model, optimizer, loss_fcn): g = data[0] # Only train on the first graph g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) g = g.to(device) node_feat = g.ndata.get('feat', None) edge_feat = g.edata.get('feat', None) label = g.ndata['label'] train_mask, val_mask, test_mask = g.ndata['train_mask'].bool(), g.ndata['val_mask'].bool(), g.ndata['test_mask'].bool() {% if user_cfg.general_pipeline.early_stop %} stopper = EarlyStopping(**pipeline_cfg['early_stop']) {% endif %} val_acc = 0. for epoch in range(pipeline_cfg['num_epochs']): model.train() logits = model(g, node_feat, edge_feat) loss = loss_fcn(logits[train_mask], label[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() train_acc = accuracy(logits[train_mask], label[train_mask]) if epoch != 0 and epoch % pipeline_cfg['eval_period'] == 0: val_acc = accuracy(logits[val_mask], label[val_mask]) {% if user_cfg.general_pipeline.early_stop %} if stopper.step(val_acc, model): break {% endif %} print("Epoch {:05d} | Loss {:.4f} | TrainAcc {:.4f} | ValAcc {:.4f}". format(epoch, loss.item(), train_acc, val_acc)) {% if user_cfg.general_pipeline.early_stop %} stopper.load_checkpoint(model) stopper.close() {% endif %} model.eval() with torch.no_grad(): logits = model(g, node_feat, edge_feat) test_acc = accuracy(logits[test_mask], label[test_mask]) return test_acc def main(run, cfg, data): device = cfg['device'] pipeline_cfg = cfg['general_pipeline'] model = {{ model_class_name }}(**cfg["model"]) model = model.to(device) loss = torch.nn.{{ user_cfg.general_pipeline.loss }}() optimizer = torch.optim.{{ user_cfg.general_pipeline.optimizer.name }}(model.parameters(), **pipeline_cfg["optimizer"]) test_acc = train(cfg, pipeline_cfg, device, data, model, optimizer, loss) cpt_path = os.path.join(pipeline_cfg["save_path"], 'run_{}.pth'.format(run)) torch.save({'cfg': cfg, 'model': model.state_dict()}, cpt_path) print('Saved training checkpoint to {}'.format(cpt_path)) return test_acc if __name__ == '__main__': {{ user_cfg_str }} if not torch.cuda.is_available(): cfg['device'] = 'cpu' # load data data = AsNodePredDataset({{data_initialize_code}}) model_cfg = cfg["model"] cfg["model"]["data_info"] = { "in_size": model_cfg['embed_size'] if model_cfg['embed_size'] > 0 else data[0].ndata['feat'].shape[1], "out_size": data.num_classes, "num_nodes": data[0].num_nodes() } os.makedirs(cfg['general_pipeline']["save_path"]) all_acc = [] num_runs = {{ user_cfg.general_pipeline.num_runs }} for run in range(num_runs): print(f'Run experiment #{run}') test_acc = main(run, cfg, data) print("Test Accuracy {:.4f}".format(test_acc)) all_acc.append(test_acc) avg_acc = np.round(np.mean(all_acc), 6) std_acc = np.round(np.std(all_acc), 6) print(f'Accuracy across {num_runs} runs: {avg_acc} ± {std_acc}') ================================================ FILE: dglgo/dglgo/pipeline/nodepred_sample/__init__.py ================================================ from .gen import * ================================================ FILE: dglgo/dglgo/pipeline/nodepred_sample/gen.py ================================================ import copy from enum import Enum from pathlib import Path from typing import List, Optional, Union import ruamel.yaml import typer import yaml from jinja2 import ext, Template from pydantic import BaseModel, Field from ruamel.yaml.comments import CommentedMap from typing_extensions import Literal from ...utils.base_model import DeviceEnum, EarlyStopConfig, extract_name from ...utils.factory import ( DataFactory, NodeModelFactory, PipelineBase, PipelineFactory, ) from ...utils.yaml_dump import deep_convert_dict, merge_comment class SamplerConfig(BaseModel): name: Literal["neighbor"] fan_out: List[int] = [5, 10] batch_size: int = Field(64, description="Batch size") num_workers: int = 4 eval_batch_size: int = 1024 eval_num_workers: int = 4 class Config: extra = "forbid" pipeline_comments = { "num_epochs": "Number of training epochs", "eval_period": "Interval epochs between evaluations", "early_stop": { "patience": "Steps before early stop", "checkpoint_path": "Early stop checkpoint model file path", }, "sampler": { "fan_out": "List of neighbors to sample per edge type for each GNN layer, with the i-th element being the fanout for the i-th GNN layer. Length should be the same as num_layers in model setting", "batch_size": "Batch size of seed nodes in training stage", "num_workers": "Number of workers to accelerate the graph data processing step", "eval_batch_size": "Batch size of seed nodes in training stage in evaluation stage", "eval_num_workers": "Number of workers to accelerate the graph data processing step in evaluation stage", }, "save_path": "Directory to save the experiment results", "num_runs": "Number of experiments to run", } class NodepredNSPipelineCfg(BaseModel): sampler: SamplerConfig = Field("neighbor") early_stop: Optional[EarlyStopConfig] = EarlyStopConfig() num_epochs: int = 200 eval_period: int = 5 optimizer: dict = {"name": "Adam", "lr": 0.005, "weight_decay": 0.0} loss: str = "CrossEntropyLoss" num_runs: int = 1 save_path: str = "results" @PipelineFactory.register("nodepred-ns") class NodepredNsPipeline(PipelineBase): def __init__(self): self.pipeline = {"name": "nodepred-ns", "mode": "train"} self.default_cfg = None @classmethod def setup_user_cfg_cls(cls): from ...utils.enter_config import UserConfig class NodePredUserConfig(UserConfig): eval_device: DeviceEnum = Field("cpu") data: DataFactory.filter( "nodepred-ns" ).get_pydantic_config() = Field(..., discriminator="name") model: NodeModelFactory.filter( lambda cls: hasattr(cls, "forward_block") ).get_pydantic_model_config() = Field(..., discriminator="name") general_pipeline: NodepredNSPipelineCfg cls.user_cfg_cls = NodePredUserConfig @property def user_cfg_cls(self): return self.__class__.user_cfg_cls def get_cfg_func(self): def config( data: DataFactory.filter( "nodepred-ns" ).get_dataset_enum() = typer.Option(..., help="input data name"), cfg: Optional[str] = typer.Option( None, help="output configuration path" ), model: NodeModelFactory.filter( lambda cls: hasattr(cls, "forward_block") ).get_model_enum() = typer.Option(..., help="Model name"), ): self.__class__.setup_user_cfg_cls() generated_cfg = { "pipeline_name": self.pipeline["name"], "pipeline_mode": self.pipeline["mode"], "device": "cpu", "data": {"name": data.name}, "model": {"name": model.value}, "general_pipeline": {"sampler": {"name": "neighbor"}}, } output_cfg = self.user_cfg_cls(**generated_cfg).dict() output_cfg = deep_convert_dict(output_cfg) comment_dict = { "device": "Torch device name, e.g., cpu or cuda or cuda:0", "data": { "split_ratio": "Ratio to generate split masks, for example set to [0.8, 0.1, 0.1] for 80% train/10% val/10% test. Leave blank to use builtin split in original dataset" }, "general_pipeline": pipeline_comments, "model": NodeModelFactory.get_constructor_doc_dict(model.value), } comment_dict = merge_comment(output_cfg, comment_dict) # truncate length fan_out to be the same as num_layers in model if "num_layers" in comment_dict["model"]: comment_dict["general_pipeline"]["sampler"]["fan_out"] = [ 5, 10, 15, 15, 15, ][: int(comment_dict["model"]["num_layers"])] if cfg is None: cfg = ( "_".join(["nodepred-ns", data.value, model.value]) + ".yaml" ) yaml = ruamel.yaml.YAML() yaml.dump(comment_dict, Path(cfg).open("w")) print( "Configuration file is generated at {}".format( Path(cfg).absolute() ) ) return config @staticmethod def gen_script(user_cfg_dict): file_current_dir = Path(__file__).resolve().parent template_filename = file_current_dir / "nodepred-ns.jinja-py" with open(template_filename, "r") as f: template = Template(f.read()) pipeline_cfg = NodepredNSPipelineCfg( **user_cfg_dict["general_pipeline"] ) if "num_layers" in user_cfg_dict["model"]: assert user_cfg_dict["model"]["num_layers"] == len( user_cfg_dict["general_pipeline"]["sampler"]["fan_out"] ), "The num_layers in model config should be the same as the length of fan_out in sampler. For example, if num_layers is 1, the fan_out cannot be [5, 10]" render_cfg = copy.deepcopy(user_cfg_dict) model_code = NodeModelFactory.get_source_code( user_cfg_dict["model"]["name"] ) render_cfg["model_code"] = model_code render_cfg["model_class_name"] = NodeModelFactory.get_model_class_name( user_cfg_dict["model"]["name"] ) render_cfg.update( DataFactory.get_generated_code_dict( user_cfg_dict["data"]["name"], '**cfg["data"]' ) ) generated_user_cfg = copy.deepcopy(user_cfg_dict) if "split_ratio" in generated_user_cfg["data"]: generated_user_cfg["data"].pop("split_ratio") generated_user_cfg["data_name"] = generated_user_cfg["data"].pop("name") generated_user_cfg.pop("pipeline_name") generated_user_cfg.pop("pipeline_mode") generated_user_cfg["model_name"] = generated_user_cfg["model"].pop( "name" ) generated_user_cfg["general_pipeline"]["optimizer"].pop("name") if user_cfg_dict["data"].get("split_ratio", None) is not None: render_cfg["data_initialize_code"] = "{}, split_ratio={}".format( render_cfg["data_initialize_code"], user_cfg_dict["data"]["split_ratio"], ) render_cfg["user_cfg_str"] = f"cfg = {str(generated_user_cfg)}" render_cfg["user_cfg"] = user_cfg_dict with open("output.py", "w") as f: return template.render(**render_cfg) @staticmethod def get_description() -> str: return "Node classification neighbor sampling pipeline for training" ================================================ FILE: dglgo/dglgo/pipeline/nodepred_sample/nodepred-ns.jinja-py ================================================ import numpy as np import torch import torch.nn as nn import torch.nn.functional as F import dgl import os from dgl.data import AsNodePredDataset {{ data_import_code }} {{ model_code }} {% if user_cfg.early_stop %} class EarlyStopping: def __init__(self, patience: int = -1, checkpoint_path: str = 'checkpoint.pth'): self.patience = patience self.checkpoint_path = checkpoint_path self.counter = 0 self.best_score = None self.early_stop = False def step(self, acc, model): score = acc if self.best_score is None: self.best_score = score self.save_checkpoint(model) elif score < self.best_score: self.counter += 1 print(f'EarlyStopping counter: {self.counter} out of {self.patience}') if self.counter >= self.patience: self.early_stop = True else: self.best_score = score self.save_checkpoint(model) self.counter = 0 return self.early_stop def save_checkpoint(self, model): '''Save model when validation loss decreases.''' torch.save(model.state_dict(), self.checkpoint_path) def load_checkpoint(self, model): model.load_state_dict(torch.load(self.checkpoint_path, weights_only=False)) def close(self): os.remove(self.checkpoint_path) {% endif %} def load_subtensor(nfeat, labels, seeds, input_nodes, device): """ Extracts features and labels for a subset of nodes """ batch_inputs = nfeat[input_nodes].to(device) batch_labels = labels[seeds].to(device) return batch_inputs, batch_labels def evaluate(model, g, nfeat, labels, val_nid, eval_device): """ Evaluate the model on the validation set specified by ``val_nid``. g : The entire graph. inputs : The features of all the nodes. labels : The labels of all the nodes. val_nid : the node Ids for validation. device : The GPU device to evaluate on. """ model.eval() eval_model = model.to(eval_device) g = g.to(eval_device) nfeat = nfeat.to(eval_device) with torch.no_grad(): y = eval_model(g, nfeat) model.train() return accuracy(y[val_nid], labels[val_nid].to(y.device)) def accuracy(logits, labels): _, indices = torch.max(logits, dim=1) correct = torch.sum(indices == labels) return correct.item() * 1.0 / len(labels) def train(cfg, pipeline_cfg, device, data, model, optimizer, loss_fcn): g = data[0] # Only train on the first graph g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) train_g = val_g = test_g = g train_nfeat = val_nfeat = test_nfeat = train_g.ndata['feat'] train_labels = val_labels = test_labels = train_g.ndata['label'] train_nid = torch.nonzero(train_g.ndata['train_mask'], as_tuple=True)[0] val_nid = torch.nonzero(val_g.ndata['val_mask'], as_tuple=True)[0] test_nid = torch.nonzero(~(test_g.ndata['train_mask'] | test_g.ndata['val_mask']), as_tuple=True)[0] sampler = dgl.dataloading.MultiLayerNeighborSampler( [int(fanout) for fanout in pipeline_cfg["sampler"]["fan_out"]]) dataloader = dgl.dataloading.NodeDataLoader( train_g, train_nid, sampler, device=device, batch_size=pipeline_cfg["sampler"]["batch_size"], shuffle=True, drop_last=False, num_workers=pipeline_cfg["sampler"]["num_workers"]) {% if user_cfg.early_stop %} stopper = EarlyStopping(pipeline_cfg['patience'], pipeline_cfg['checkpoint_path']) {% endif %} val_acc = 0. for epoch in range(pipeline_cfg['num_epochs']): model.train() model = model.to(device) for step, (input_nodes, seeds, subgs) in enumerate(dataloader): # Load the input features as well as output labels batch_inputs, batch_labels = load_subtensor(train_nfeat, train_labels, seeds, input_nodes, device) subgs = [subg.int().to(device) for subg in subgs] batch_pred = model.forward_block(subgs, batch_inputs) loss = loss_fcn(batch_pred, batch_labels) optimizer.zero_grad() loss.backward() optimizer.step() train_acc = accuracy(batch_pred, batch_labels) print("Epoch {:05d} | Step {:05d} | Loss {:.4f} | TrainAcc {:.4f}". format(epoch, step, loss.item(), train_acc)) if epoch % pipeline_cfg["eval_period"] == 0 and epoch != 0: val_acc = evaluate(model, val_g, val_nfeat, val_labels, val_nid, cfg["eval_device"]) print('Eval Acc {:.4f}'.format(val_acc)) {% if user_cfg.early_stop %} if stopper.step(val_acc, model): break {% endif %} {% if user_cfg.early_stop %} stopper.load_checkpoint(model) stopper.close() {% endif %} model.eval() with torch.no_grad(): test_acc = evaluate(model, test_g, test_nfeat, test_labels, test_nid, cfg["eval_device"]) return test_acc def main(run, cfg, data): device = cfg['device'] pipeline_cfg = cfg["general_pipeline"] model = {{ model_class_name }}(**cfg["model"]) model = model.to(device) loss = torch.nn.{{ user_cfg.general_pipeline.loss }}() optimizer = torch.optim.{{ user_cfg.general_pipeline.optimizer.name }}(model.parameters(), **pipeline_cfg["optimizer"]) test_acc = train(cfg, pipeline_cfg, device, data, model, optimizer, loss) cpt_path = os.path.join(pipeline_cfg["save_path"], 'run_{}.pth'.format(run)) torch.save({'cfg': cfg, 'model': model.state_dict()}, cpt_path) print('Saved training checkpoint to {}'.format(cpt_path)) return test_acc if __name__ == '__main__': {{ user_cfg_str }} if not torch.cuda.is_available(): cfg['device'] = 'cpu' # load data data = AsNodePredDataset({{data_initialize_code}}) model_cfg = cfg["model"] cfg["model"]["data_info"] = { "in_size": model_cfg['embed_size'] if model_cfg['embed_size'] > 0 else data[0].ndata['feat'].shape[1], "out_size": data.num_classes, "num_nodes": data[0].num_nodes() } os.makedirs(cfg['general_pipeline']["save_path"]) all_acc = [] num_runs = {{ user_cfg.general_pipeline.num_runs }} for run in range(num_runs): print(f'Run experiment #{run}') test_acc = main(run, cfg, data) print("Test Accuracy {:.4f}".format(test_acc)) all_acc.append(test_acc) avg_acc = np.round(np.mean(all_acc), 6) std_acc = np.round(np.std(all_acc), 6) print(f'Accuracy across {num_runs} runs: {avg_acc} ± {std_acc}') ================================================ FILE: dglgo/dglgo/utils/__init__.py ================================================ from .factory import * ================================================ FILE: dglgo/dglgo/utils/base_model.py ================================================ import copy import enum from enum import Enum, IntEnum from typing import Optional from jinja2 import Template from pydantic import ( BaseModel as PydanticBaseModel, create_model, create_model, Field, ) class DeviceEnum(str, Enum): cpu = "cpu" cuda = "cuda" class DGLBaseModel(PydanticBaseModel): class Config: extra = "allow" use_enum_values = True @classmethod def with_fields(cls, model_name, **field_definitions): return create_model(model_name, __base__=cls, **field_definitions) def get_literal_value(type_): if hasattr(type_, "__values__"): name = type_.__values__[0] elif hasattr(type_, "__args__"): name = type_.__args__[0] return name def extract_name(union_type): name_dict = {} for t in union_type.__args__: type_ = t.__fields__["name"].type_ name = get_literal_value(type_) name_dict[name] = name return enum.Enum("Choice", name_dict) class EarlyStopConfig(DGLBaseModel): patience: int = 20 checkpoint_path: str = "checkpoint.pth" ================================================ FILE: dglgo/dglgo/utils/early_stop.py ================================================ import torch class EarlyStopping: def __init__( self, patience: int = -1, checkpoint_path: str = "checkpoint.pth" ): self.patience = patience self.checkpoint_path = checkpoint_path self.counter = 0 self.best_score = None self.early_stop = False def step(self, acc, model): score = acc if self.best_score is None: self.best_score = score self.save_checkpoint(model) elif score < self.best_score: self.counter += 1 print( f"EarlyStopping counter: {self.counter} out of {self.patience}" ) if self.counter >= self.patience: self.early_stop = True else: self.best_score = score self.save_checkpoint(model) self.counter = 0 return self.early_stop def save_checkpoint(self, model): """Save model when validation loss decreases.""" torch.save(model.state_dict(), self.checkpoint_path) def load_checkpoint(self, model): model.load_state_dict( torch.load(self.checkpoint_path, weights_only=False) ) ================================================ FILE: dglgo/dglgo/utils/enter_config.py ================================================ import copy from enum import Enum, IntEnum from typing import Optional import jinja2 import yaml from jinja2 import Template from pydantic import BaseModel as PydanticBaseModel, create_model, Field from .base_model import DGLBaseModel # from ..pipeline import nodepred, nodepred_sample from .factory import DataFactory, ModelFactory, PipelineFactory class PipelineConfig(DGLBaseModel): node_embed_size: Optional[int] = -1 early_stop: Optional[dict] num_epochs: int = 200 eval_period: int = 5 optimizer: dict = {"name": "Adam", "lr": 0.005} loss: str = "CrossEntropyLoss" class UserConfig(DGLBaseModel): version: Optional[str] = "0.0.2" pipeline_name: PipelineFactory.get_pipeline_enum() pipeline_mode: str device: str = "cpu" ================================================ FILE: dglgo/dglgo/utils/factory.py ================================================ import enum import inspect import logging from abc import ABC, abstractmethod, abstractstaticmethod from pathlib import Path from typing import Callable, Dict, List, Optional, Tuple, Union import yaml from dgl.dataloading.negative_sampler import GlobalUniform, PerSourceUniform from numpydoc import docscrape from pydantic import create_model, create_model_from_typeddict, Field from typing_extensions import Literal from .base_model import DGLBaseModel logger = logging.getLogger(__name__) ALL_PIPELINE = ["nodepred", "nodepred-ns", "linkpred", "graphpred"] class PipelineBase(ABC): @abstractmethod def __init__(self) -> None: super().__init__() @abstractmethod def get_cfg_func(self): pass @abstractstaticmethod def gen_script(user_cfg_dict: dict): pass @abstractstaticmethod def get_description() -> str: pass class DataFactoryClass: def __init__(self): self.registry = {} self.pipeline_name = None self.pipeline_allowed = {} def register( self, name: str, import_code: str, class_name: str, allowed_pipeline: List[str], extra_args={}, ): self.registry[name] = { "name": name, "import_code": import_code, "class_name": class_name, "extra_args": extra_args, } for pipeline in allowed_pipeline: if pipeline in self.pipeline_allowed: self.pipeline_allowed[pipeline].append(name) else: self.pipeline_allowed[pipeline] = [name] return self def get_dataset_enum(self): enum_class = enum.Enum( "DatasetName", {v["name"]: k for k, v in self.registry.items()} ) return enum_class def get_dataset_classname(self, name): return self.registry[name]["class_name"] def get_constructor_arg_type(self, model_name): sigs = inspect.signature(self.registry[model_name].__init__) type_annotation_dict = {} for k, param in dict(sigs.parameters).items(): type_annotation_dict[k] = param.annotation return type_annotation_dict def get_pydantic_config(self): type_annotation_dict = {} dataset_list = [] for k, v in self.registry.items(): dataset_name = v["name"] type_annotation_dict = v["extra_args"] if "name" in type_annotation_dict: del type_annotation_dict["name"] base = self.get_base_class(dataset_name, self.pipeline_name) dataset_list.append( create_model( f"{dataset_name}Config", **type_annotation_dict, __base__=base, ) ) output = dataset_list[0] for d in dataset_list[1:]: output = Union[output, d] return output def get_import_code(self, name): return self.registry[name]["import_code"] def get_import_code(self, name): return self.registry[name]["import_code"] def get_extra_args(self, name): return self.registry[name]["extra_args"] def get_class_name(self, name): return self.registry[name]["class_name"] def get_generated_code_dict(self, name, args='**cfg["data"]'): d = {} d["data_import_code"] = self.registry[name]["import_code"] data_initialize_code = self.registry[name]["class_name"] extra_args_dict = self.registry[name]["extra_args"] if len(extra_args_dict) > 0: data_initialize_code = data_initialize_code.format('**cfg["data"]') d["data_initialize_code"] = data_initialize_code return d def filter(self, pipeline_name): allowed_name = self.pipeline_allowed[pipeline_name] new_registry = { k: v for k, v in self.registry.items() if k in allowed_name } d = DataFactoryClass() d.registry = new_registry d.pipeline_name = pipeline_name return d @staticmethod def get_base_class(dataset_name, pipeline_name): if pipeline_name == "linkpred": class EdgeBase(DGLBaseModel): name: Literal[dataset_name] split_ratio: Optional[Tuple[float, float, float]] = None neg_ratio: Optional[int] = None return EdgeBase else: class NodeBase(DGLBaseModel): name: Literal[dataset_name] split_ratio: Optional[Tuple[float, float, float]] = None return NodeBase DataFactory = DataFactoryClass() DataFactory.register( "cora", import_code="from dgl.data import CoraGraphDataset", class_name="CoraGraphDataset()", allowed_pipeline=["nodepred", "nodepred-ns", "linkpred"], ) DataFactory.register( "citeseer", import_code="from dgl.data import CiteseerGraphDataset", class_name="CiteseerGraphDataset()", allowed_pipeline=["nodepred", "nodepred-ns", "linkpred"], ) DataFactory.register( "pubmed", import_code="from dgl.data import PubmedGraphDataset", class_name="PubmedGraphDataset()", allowed_pipeline=["nodepred", "nodepred-ns", "linkpred"], ) DataFactory.register( "csv", import_code="from dgl.data import CSVDataset", extra_args={"data_path": "./"}, class_name="CSVDataset({})", allowed_pipeline=["nodepred", "nodepred-ns", "linkpred", "graphpred"], ) DataFactory.register( "reddit", import_code="from dgl.data import RedditDataset", class_name="RedditDataset()", allowed_pipeline=["nodepred", "nodepred-ns", "linkpred"], ) DataFactory.register( "co-buy-computer", import_code="from dgl.data import AmazonCoBuyComputerDataset", class_name="AmazonCoBuyComputerDataset()", allowed_pipeline=["nodepred", "nodepred-ns", "linkpred"], ) DataFactory.register( "ogbn-arxiv", import_code="from ogb.nodeproppred import DglNodePropPredDataset", extra_args={}, class_name="DglNodePropPredDataset('ogbn-arxiv')", allowed_pipeline=["nodepred", "nodepred-ns", "linkpred"], ) DataFactory.register( "ogbn-products", import_code="from ogb.nodeproppred import DglNodePropPredDataset", extra_args={}, class_name="DglNodePropPredDataset('ogbn-products')", allowed_pipeline=["nodepred", "nodepred-ns", "linkpred"], ) DataFactory.register( "ogbl-collab", import_code="from ogb.linkproppred import DglLinkPropPredDataset", extra_args={}, class_name="DglLinkPropPredDataset('ogbl-collab')", allowed_pipeline=["linkpred"], ) DataFactory.register( "ogbl-citation2", import_code="from ogb.linkproppred import DglLinkPropPredDataset", extra_args={}, class_name="DglLinkPropPredDataset('ogbl-citation2')", allowed_pipeline=["linkpred"], ) DataFactory.register( "ogbg-molhiv", import_code="from ogb.graphproppred import DglGraphPropPredDataset", extra_args={}, class_name="DglGraphPropPredDataset(name='ogbg-molhiv')", allowed_pipeline=["graphpred"], ) DataFactory.register( "ogbg-molpcba", import_code="from ogb.graphproppred import DglGraphPropPredDataset", extra_args={}, class_name="DglGraphPropPredDataset(name='ogbg-molpcba')", allowed_pipeline=["graphpred"], ) class PipelineFactory: """The factory class for creating executors""" registry: Dict[str, PipelineBase] = {} default_config_registry = {} """ Internal registry for available executors """ @classmethod def register(cls, name: str) -> Callable: def inner_wrapper(wrapped_class) -> Callable: if name in cls.registry: logger.warning( "Executor %s already exists. Will replace it", name ) cls.registry[name] = wrapped_class() return wrapped_class return inner_wrapper @classmethod def register_default_config_generator(cls, name: str) -> Callable: def inner_wrapper(wrapped_class) -> Callable: if name in cls.registry: logger.warning( "Executor %s already exists. Will replace it", name ) cls.default_config_registry[name] = wrapped_class return wrapped_class return inner_wrapper @classmethod def call_default_config_generator( cls, generator_name, model_name, dataset_name ): return cls.default_config_registry[generator_name]( model_name, dataset_name ) @classmethod def call_generator(cls, generator_name, cfg): return cls.registry[generator_name](cfg) @classmethod def get_pipeline_enum(cls): enum_class = enum.Enum( "PipelineName", {k: k for k, v in cls.registry.items()} ) return enum_class class ApplyPipelineFactory: """The factory class for creating executors for inference""" registry: Dict[str, PipelineBase] = {} """ Internal registry for available executors """ @classmethod def register(cls, name: str) -> Callable: def inner_wrapper(wrapped_class) -> Callable: if name in cls.registry: logger.warning( "Executor %s already exists. Will replace it", name ) cls.registry[name] = wrapped_class() return wrapped_class return inner_wrapper model_dir = Path(__file__).parent.parent / "model" class ModelFactory: """The factory class for creating executors""" def __init__(self): self.registry = {} self.code_registry = {} """ Internal registry for available executors """ def get_model_enum(self): enum_class = enum.Enum( "ModelName", {k: k for k, v in self.registry.items()} ) return enum_class def register(self, model_name: str) -> Callable: def inner_wrapper(wrapped_class) -> Callable: if model_name in self.registry: logger.warning( "Executor %s already exists. Will replace it", model_name ) self.registry[model_name] = wrapped_class # code_filename = model_dir / filename code_filename = Path(inspect.getfile(wrapped_class)) self.code_registry[model_name] = code_filename.read_text() return wrapped_class return inner_wrapper def get_source_code(self, model_name): return self.code_registry[model_name] def get_constructor_default_args(self, model_name): sigs = inspect.signature(self.registry[model_name].__init__) default_map = {} for k, param in dict(sigs.parameters).items(): default_map[k] = param.default return default_map def get_pydantic_constructor_arg_type(self, model_name: str): model_enum = self.get_model_enum() arg_dict = self.get_constructor_default_args(model_name) type_annotation_dict = {} # type_annotation_dict["name"] = Literal[""] exempt_keys = ["self", "in_size", "out_size", "data_info"] for k, param in arg_dict.items(): if k not in exempt_keys: type_annotation_dict[k] = arg_dict[k] class Base(DGLBaseModel): name: Literal[model_name] return create_model( f"{model_name.upper()}ModelConfig", **type_annotation_dict, __base__=Base, ) def get_constructor_doc_dict(self, name): model_class = self.registry[name] docs = inspect.getdoc(model_class.__init__) param_docs = docscrape.NumpyDocString(docs) param_docs_dict = {} for param in param_docs["Parameters"]: param_docs_dict[param.name] = param.desc[0] return param_docs_dict def get_pydantic_model_config(self): model_list = [] for k in self.registry: model_list.append(self.get_pydantic_constructor_arg_type(k)) output = model_list[0] for m in model_list[1:]: output = Union[output, m] return output def get_model_class_name(self, model_name): return self.registry[model_name].__name__ def get_constructor_arg_type(self, model_name): sigs = inspect.signature(self.registry[model_name].__init__) type_annotation_dict = {} for k, param in dict(sigs.parameters).items(): type_annotation_dict[k] = param.annotation return type_annotation_dict def filter(self, filter_func): new_fac = ModelFactory() for name in self.registry: if filter_func(self.registry[name]): new_fac.registry[name] = self.registry[name] new_fac.code_registry[name] = self.code_registry[name] return new_fac class SamplerFactory: """The factory class for creating executors""" def __init__(self): self.registry = {} def get_model_enum(self): enum_class = enum.Enum( "NegativeSamplerName", {k: k for k, v in self.registry.items()} ) return enum_class def register(self, sampler_name: str) -> Callable: def inner_wrapper(wrapped_class) -> Callable: if sampler_name in self.registry: logger.warning( "Sampler %s already exists. Will replace it", sampler_name ) self.registry[sampler_name] = wrapped_class return wrapped_class return inner_wrapper def get_constructor_default_args(self, sampler_name): sigs = inspect.signature(self.registry[sampler_name].__init__) default_map = {} for k, param in dict(sigs.parameters).items(): default_map[k] = param.default return default_map def get_pydantic_constructor_arg_type(self, sampler_name: str): model_enum = self.get_model_enum() arg_dict = self.get_constructor_default_args(sampler_name) type_annotation_dict = {} # type_annotation_dict["name"] = Literal[""] exempt_keys = ["self", "in_size", "out_size", "redundancy"] for k, param in arg_dict.items(): if k not in exempt_keys or param is None: if k == "k" or k == "redundancy": type_annotation_dict[k] = 3 else: type_annotation_dict[k] = arg_dict[k] class Base(DGLBaseModel): name: Literal[sampler_name] return create_model( f"{sampler_name.upper()}SamplerConfig", **type_annotation_dict, __base__=Base, ) def get_pydantic_model_config(self): model_list = [] for k in self.registry: model_list.append(self.get_pydantic_constructor_arg_type(k)) output = model_list[0] for m in model_list[1:]: output = Union[output, m] return output def get_model_class_name(self, model_name): return self.registry[model_name].__name__ def get_constructor_arg_type(self, model_name): sigs = inspect.signature(self.registry[model_name].__init__) type_annotation_dict = {} for k, param in dict(sigs.parameters).items(): type_annotation_dict[k] = param.annotation return type_annotation_dict def get_constructor_doc_dict(self, name): model_class = self.registry[name] docs = inspect.getdoc(model_class) param_docs = docscrape.NumpyDocString(docs) param_docs_dict = {} for param in param_docs["Parameters"]: param_docs_dict[param.name] = param.desc[0] return param_docs_dict NegativeSamplerFactory = SamplerFactory() NegativeSamplerFactory.register("global")(GlobalUniform) NegativeSamplerFactory.register("persource")(PerSourceUniform) NodeModelFactory = ModelFactory() EdgeModelFactory = ModelFactory() GraphModelFactory = ModelFactory() ================================================ FILE: dglgo/dglgo/utils/yaml_dump.py ================================================ from ruamel.yaml.comments import CommentedMap def deep_convert_dict(layer): to_ret = layer if isinstance(layer, dict): to_ret = CommentedMap(layer) try: for key, value in to_ret.items(): to_ret[key] = deep_convert_dict(value) except AttributeError: pass return to_ret import collections.abc def merge_comment(d, comment_dict, column=30): for k, v in comment_dict.items(): if isinstance(v, collections.abc.Mapping): d[k] = merge_comment(d.get(k, CommentedMap()), v) else: d.yaml_add_eol_comment(v, key=k, column=column) return d ================================================ FILE: dglgo/recipes/__init__.py ================================================ ================================================ FILE: dglgo/recipes/graphpred_hiv_gin.yaml ================================================ version: 0.0.2 pipeline_name: graphpred pipeline_mode: train device: cuda:0 # Torch device name, e.q. cpu or cuda or cuda:0 data: name: ogbg-molhiv split_ratio: # Ratio to generate data split, for example set to [0.8, 0.1, 0.1] for 80% train/10% val/10% test. Leave blank to use builtin split in original dataset model: name: gin embed_size: 300 # Embedding size. num_layers: 5 # Number of layers. dropout: 0.5 # Dropout rate. virtual_node: true # Whether to use virtual node. general_pipeline: num_runs: 10 # Number of experiments to run train_batch_size: 32 # Graph batch size when training eval_batch_size: 32 # Graph batch size when evaluating num_workers: 4 # Number of workers for data loading optimizer: name: Adam lr: 0.001 weight_decay: 0 lr_scheduler: name: StepLR step_size: 100 gamma: 1 loss: BCEWithLogitsLoss metric: roc_auc_score num_epochs: 100 # Number of training epochs save_path: "results" # Directory to save the experiment results ================================================ FILE: dglgo/recipes/graphpred_hiv_pna.yaml ================================================ version: 0.0.2 pipeline_name: graphpred pipeline_mode: train device: cuda:0 # Torch device name, e.q. cpu or cuda or cuda:0 data: name: ogbg-molhiv split_ratio: # Ratio to generate data split, for example set to [0.8, 0.1, 0.1] for 80% train/10% val/10% test. Leave blank to use builtin split in original dataset model: name: pna embed_size: 80 # Embedding size. aggregators: mean max min std # Aggregation function names separated by space, can include mean, max, min, std, sum scalers: identity amplification attenuation # Scaler function names separated by space, can include identity, amplification, and attenuation dropout: 0.3 # Dropout rate. batch_norm: true # Whether to use batch normalization. residual: true # Whether to use residual connection. num_mlp_layers: 1 # Number of MLP layers to use after message aggregation in each PNA layer. num_layers: 4 # Number of PNA layers. readout: mean # Readout for computing graph-level representations, can be 'sum' or 'mean'. general_pipeline: num_runs: 10 # Number of experiments to run train_batch_size: 128 # Graph batch size when training eval_batch_size: 128 # Graph batch size when evaluating num_workers: 4 # Number of workers for data loading optimizer: name: Adam lr: 0.01 weight_decay: 0.000003 lr_scheduler: name: ReduceLROnPlateau mode: max factor: 0.5 patience: 20 verbose: true loss: BCEWithLogitsLoss metric: roc_auc_score num_epochs: 200 # Number of training epochs save_path: "results" # Directory to save the experiment results ================================================ FILE: dglgo/recipes/graphpred_pcba_gin.yaml ================================================ version: 0.0.2 pipeline_name: graphpred pipeline_mode: train device: cuda:0 # Torch device name, e.q. cpu or cuda or cuda:0 data: name: ogbg-molpcba split_ratio: # Ratio to generate data split, for example set to [0.8, 0.1, 0.1] for 80% train/10% val/10% test. Leave blank to use builtin split in original dataset model: name: gin embed_size: 300 # Embedding size. num_layers: 5 # Number of layers. dropout: 0.5 # Dropout rate. virtual_node: true # Whether to use virtual node. general_pipeline: num_runs: 10 # Number of experiments to run train_batch_size: 32 # Graph batch size when training eval_batch_size: 32 # Graph batch size when evaluating num_workers: 4 # Number of workers for data loading optimizer: name: Adam lr: 0.001 weight_decay: 0 lr_scheduler: name: StepLR step_size: 100 gamma: 1 loss: BCEWithLogitsLoss metric: average_precision_score num_epochs: 100 # Number of training epochs save_path: "results" # Directory to save the experiment results ================================================ FILE: dglgo/recipes/linkpred_citation2_sage.yaml ================================================ version: 0.0.2 pipeline_name: linkpred pipeline_mode: train device: cpu data: name: ogbl-citation2 split_ratio: # List of float, e.q. [0.8, 0.1, 0.1]. Split ratios for training, validation and test sets. Must sum to one. Leave blank to use builtin split in original dataset neg_ratio: # Int, e.q. 2. Indicate how much negative samples to be sampled per positive samples. Leave blank to use builtin split in original dataset node_model: name: sage embed_size: -1 # The dimension of created embedding table. -1 means using original node embedding hidden_size: 16 # Hidden size. num_layers: 1 # Number of hidden layers. activation: relu dropout: 0.5 # Dropout rate. aggregator_type: gcn # Aggregator type to use (``mean``, ``gcn``, ``pool``, ``lstm``). edge_model: name: ele hidden_size: 64 # Hidden size. num_layers: 2 # Number of hidden layers. bias: true # Whether to use bias in the linaer layer. neg_sampler: name: persource k: 3 # The number of negative samples per edge. general_pipeline: hidden_size: 256 # The intermediate hidden size between node model and edge model eval_batch_size: 32769 # Edge batch size when evaluating train_batch_size: 32769 # Edge batch size when training num_epochs: 200 # Number of training epochs eval_period: 5 # Interval epochs between evaluations optimizer: name: Adam lr: 0.005 loss: BCELoss save_path: "results" # Directory to save the experiment results num_runs: 1 # Number of experiments to run ================================================ FILE: dglgo/recipes/linkpred_collab_sage.yaml ================================================ version: 0.0.2 pipeline_name: linkpred pipeline_mode: train device: cpu data: name: ogbl-collab split_ratio: # List of float, e.q. [0.8, 0.1, 0.1]. Split ratios for training, validation and test sets. Must sum to one. Leave blank to use builtin split in original dataset neg_ratio: # Int, e.q. 2. Indicate how much negative samples to be sampled per positive samples. Leave blank to use builtin split in original dataset node_model: name: sage embed_size: -1 # The dimension of created embedding table. -1 means using original node embedding hidden_size: 16 # Hidden size. num_layers: 1 # Number of hidden layers. activation: relu dropout: 0.5 # Dropout rate. aggregator_type: gcn # Aggregator type to use (``mean``, ``gcn``, ``pool``, ``lstm``). edge_model: name: ele hidden_size: 64 # Hidden size. num_layers: 2 # Number of hidden layers. bias: true # Whether to use bias in the linaer layer. neg_sampler: name: persource k: 3 # The number of negative samples per edge. general_pipeline: hidden_size: 256 # The intermediate hidden size between node model and edge model eval_batch_size: 32769 # Edge batch size when evaluating train_batch_size: 32769 # Edge batch size when training num_epochs: 200 # Number of training epochs eval_period: 5 # Interval epochs between evaluations optimizer: name: Adam lr: 0.005 loss: BCELoss save_path: "results" # Directory to save the experiment results num_runs: 1 # Number of experiments to run ================================================ FILE: dglgo/recipes/linkpred_cora_sage.yaml ================================================ version: 0.0.2 pipeline_name: linkpred pipeline_mode: train device: cuda data: name: cora split_ratio: [0.8, 0.1, 0.1] # List of float, e.q. [0.8, 0.1, 0.1]. Split ratios for training, validation and test sets. Must sum to one. Leave blank to use builtin split in original dataset neg_ratio: 3 # Int, e.q. 2. Indicate how much negative samples to be sampled per positive samples. Leave blank to use builtin split in original dataset node_model: name: sage embed_size: -1 # The dimension of created embedding table. -1 means using original node embedding hidden_size: 32 # Hidden size. num_layers: 2 # Number of hidden layers. activation: relu dropout: 0.5 # Dropout rate. aggregator_type: gcn # Aggregator type to use (``mean``, ``gcn``, ``pool``, ``lstm``). edge_model: name: ele hidden_size: 64 # Hidden size. num_layers: 2 # Number of hidden layers. bias: true # Whether to use bias in the linaer layer. neg_sampler: name: persource k: 3 # The number of negative samples per edge. general_pipeline: hidden_size: 256 # The intermediate hidden size between node model and edge model eval_batch_size: 32769 # Edge batch size when evaluating train_batch_size: 32769 # Edge batch size when training num_epochs: 200 # Number of training epochs eval_period: 5 # Interval epochs between evaluations optimizer: name: Adam lr: 0.005 loss: BCELoss save_path: "results" # Directory to save the experiment results num_runs: 1 # Number of experiments to run ================================================ FILE: dglgo/recipes/nodepred-ns_arxiv_gcn.yaml ================================================ # Accuracy across 5 runs: 0.593288 ± 0.006103 version: 0.0.2 pipeline_name: nodepred-ns pipeline_mode: train device: 'cuda:0' eval_device: 'cpu' data: name: ogbn-arxiv model: name: gcn embed_size: -1 # The dimension of created embedding table. -1 means using original node embedding hidden_size: 256 # Hidden size. num_layers: 2 # Number of layers. norm: both # GCN normalization type. Can be 'both', 'right', 'left', 'none'. activation: relu # Activation function. dropout: 0.5 # Dropout rate. use_edge_weight: false # If true, scale the messages by edge weights. general_pipeline: sampler: name: neighbor fan_out: - 5 - 10 batch_size: 1024 num_workers: 4 eval_batch_size: 10240 eval_num_workers: 4 num_epochs: 20 # Number of training epochs eval_period: 1 # Interval epochs between evaluations optimizer: name: Adam lr: 0.005 weight_decay: 0.0 loss: CrossEntropyLoss save_path: "results" # Directory to save the experiment results num_runs: 5 ================================================ FILE: dglgo/recipes/nodepred-ns_product_sage.yaml ================================================ # Accuracy across 1 runs: 0.796911 version: 0.0.2 pipeline_name: nodepred-ns pipeline_mode: train device: cuda eval_device: cpu data: name: ogbn-products split_ratio: # Ratio to generate split masks, for example set to [0.8, 0.1, 0.1] for 80% train/10% val/10% test. Leave blank to use builtin split in original dataset model: name: sage embed_size: -1 # The dimension of created embedding table. -1 means using original node embedding hidden_size: 256 # Hidden size. num_layers: 3 # Number of hidden layers. activation: relu dropout: 0.5 # Dropout rate. aggregator_type: gcn # Aggregator type to use (``mean``, ``gcn``, ``pool``, ``lstm``). general_pipeline: sampler: name: neighbor fan_out: - 5 - 10 - 15 batch_size: 1000 num_workers: 4 eval_batch_size: 10000 eval_num_workers: 4 early_stop: patience: 20 # Steps before early stop checkpoint_path: checkpoint.pth # Early stop checkpoint model file path num_epochs: 20 # Number of training epochs eval_period: 5 # Interval epochs between evaluations optimizer: name: Adam lr: 0.005 weight_decay: 0.0 loss: CrossEntropyLoss save_path: "results" # Directory to save the experiment results num_runs: 5 # Number of experiments to run ================================================ FILE: dglgo/recipes/nodepred_citeseer_gat.yaml ================================================ # Accuracy across 10 runs: 0.7097 ± 0.006914 version: 0.0.2 pipeline_name: nodepred pipeline_mode: train device: cuda:0 data: name: citeseer model: name: gat embed_size: -1 # The dimension of created embedding table. -1 means using original node embedding num_layers: 2 # Number of layers. hidden_size: 8 # Hidden size. heads: - 8 - 1 activation: elu # Activation function. feat_drop: 0.6 # Dropout rate for features. attn_drop: 0.6 # Dropout rate for attentions. negative_slope: 0.2 residual: false # If true, the GATConv will use residule connection general_pipeline: early_stop: patience: 100 # Steps before early stop checkpoint_path: checkpoint.pth # Early stop checkpoint model file path num_epochs: 200 # Number of training epochs eval_period: 5 # Interval epochs between evaluations optimizer: name: Adam lr: 0.005 weight_decay: 0.0005 loss: CrossEntropyLoss save_path: "results" # Directory to save the experiment results num_runs: 10 # Number of experiments to run ================================================ FILE: dglgo/recipes/nodepred_citeseer_gcn.yaml ================================================ # Accuracy across 10 runs: 0.6852 ± 0.008875 version: 0.0.2 pipeline_name: nodepred pipeline_mode: train device: cuda:0 data: name: citeseer model: name: gcn embed_size: -1 # The dimension of created embedding table. -1 means using original node embedding hidden_size: 16 # Hidden size. num_layers: 2 # Number of layers. norm: both # GCN normalization type. Can be 'both', 'right', 'left', 'none'. activation: relu # Activation function. dropout: 0.5 # Dropout rate. use_edge_weight: false # If true, scale the messages by edge weights. general_pipeline: early_stop: patience: 100 # Steps before early stop checkpoint_path: checkpoint.pth # Early stop checkpoint model file path num_epochs: 200 # Number of training epochs eval_period: 5 # Interval epochs between evaluations optimizer: name: Adam lr: 0.01 weight_decay: 0.0005 loss: CrossEntropyLoss save_path: "results" # Directory to save the experiment results num_runs: 10 # Number of experiments to run ================================================ FILE: dglgo/recipes/nodepred_citeseer_sage.yaml ================================================ # Accuracy across 10 runs: 0.6994 ± 0.004005 version: 0.0.2 pipeline_name: nodepred pipeline_mode: train device: cuda:0 data: name: citeseer model: name: sage embed_size: -1 # The dimension of created embedding table. -1 means using original node embedding hidden_size: 16 # Hidden size. num_layers: 2 # Number of layers. activation: relu dropout: 0.5 # Dropout rate. aggregator_type: gcn # Aggregator type to use (``mean``, ``gcn``, ``pool``, ``lstm``). general_pipeline: early_stop: patience: 100 # Steps before early stop checkpoint_path: checkpoint.pth # Early stop checkpoint model file path num_epochs: 200 # Number of training epochs eval_period: 5 # Interval epochs between evaluations optimizer: name: Adam lr: 0.01 weight_decay: 0.0005 loss: CrossEntropyLoss save_path: "results" # Directory to save the experiment results num_runs: 10 # Number of experiments to run ================================================ FILE: dglgo/recipes/nodepred_cora_gat.yaml ================================================ # Accuracy across 10 runs: 0.8208 ± 0.00663 version: 0.0.2 pipeline_name: nodepred pipeline_mode: train device: cuda:0 data: name: cora model: name: gat embed_size: -1 # The dimension of created embedding table. -1 means using original node embedding num_layers: 2 # Number of layers. hidden_size: 8 # Hidden size. heads: - 8 - 1 activation: elu # Activation function. feat_drop: 0.6 # Dropout rate for features. attn_drop: 0.6 # Dropout rate for attentions. negative_slope: 0.2 residual: false # If true, the GATConv will use residule connection general_pipeline: early_stop: patience: 100 # Steps before early stop checkpoint_path: checkpoint.pth # Early stop checkpoint model file path num_epochs: 200 # Number of training epochs eval_period: 5 # Interval epochs between evaluations optimizer: name: Adam lr: 0.005 weight_decay: 0.0005 loss: CrossEntropyLoss save_path: "results" # Directory to save the experiment results num_runs: 10 # Number of experiments to run ================================================ FILE: dglgo/recipes/nodepred_cora_gcn.yaml ================================================ # Accuracy across 10 runs: 0.802 ± 0.005329 version: 0.0.2 pipeline_name: nodepred pipeline_mode: train device: cuda:0 data: name: cora model: name: gcn embed_size: -1 # The dimension of created embedding table. -1 means using original node embedding hidden_size: 16 # Hidden size. num_layers: 2 # Number of layers. norm: both # GCN normalization type. Can be 'both', 'right', 'left', 'none'. activation: relu # Activation function. dropout: 0.5 # Dropout rate. use_edge_weight: false # If true, scale the messages by edge weights. general_pipeline: early_stop: patience: 100 # Steps before early stop checkpoint_path: checkpoint.pth # Early stop checkpoint model file path num_epochs: 200 # Number of training epochs eval_period: 5 # Interval epochs between evaluations optimizer: name: Adam lr: 0.01 weight_decay: 0.0005 loss: CrossEntropyLoss save_path: "results" # Directory to save the experiment results num_runs: 10 # Number of experiments to run ================================================ FILE: dglgo/recipes/nodepred_cora_sage.yaml ================================================ # Accuracy across 10 runs: 0.8163 ± 0.006856 version: 0.0.2 pipeline_name: nodepred pipeline_mode: train device: cuda:0 data: name: cora model: name: sage embed_size: -1 # The dimension of created embedding table. -1 means using original node embedding hidden_size: 16 # Hidden size. num_layers: 2 # Number of layers. activation: relu dropout: 0.5 # Dropout rate. aggregator_type: gcn # Aggregator type to use (``mean``, ``gcn``, ``pool``, ``lstm``). general_pipeline: early_stop: patience: 100 # Steps before early stop checkpoint_path: checkpoint.pth # Early stop checkpoint model file path num_epochs: 200 # Number of training epochs eval_period: 5 # Interval epochs between evaluations optimizer: name: Adam lr: 0.01 weight_decay: 0.0005 loss: CrossEntropyLoss save_path: "results" # Directory to save the experiment results num_runs: 10 # Number of experiments to run ================================================ FILE: dglgo/recipes/nodepred_pubmed_gat.yaml ================================================ # Accuracy across 10 runs: 0.7788 ± 0.002227 version: 0.0.2 pipeline_name: nodepred pipeline_mode: train device: cuda:0 data: name: pubmed model: name: gat embed_size: -1 # The dimension of created embedding table. -1 means using original node embedding num_layers: 2 # Number of layers. hidden_size: 8 # Hidden size. heads: - 8 - 8 activation: elu # Activation function. feat_drop: 0.6 # Dropout rate for features. attn_drop: 0.6 # Dropout rate for attentions. negative_slope: 0.2 residual: false # If true, the GATConv will use residule connection general_pipeline: early_stop: patience: 100 # Steps before early stop checkpoint_path: checkpoint.pth # Early stop checkpoint model file path num_epochs: 200 # Number of training epochs eval_period: 5 # Interval epochs between evaluations optimizer: name: Adam lr: 0.005 weight_decay: 0.001 loss: CrossEntropyLoss save_path: "results" # Directory to save the experiment results num_runs: 10 # Number of experiments to run ================================================ FILE: dglgo/recipes/nodepred_pubmed_gcn.yaml ================================================ # Accuracy across 10 runs: 0.7826 ± 0.004317 version: 0.0.2 pipeline_name: nodepred pipeline_mode: train device: cuda:0 data: name: pubmed model: name: gcn embed_size: -1 # The dimension of created embedding table. -1 means using original node embedding hidden_size: 16 # Hidden size. num_layers: 2 # Number of layers. norm: both # GCN normalization type. Can be 'both', 'right', 'left', 'none'. activation: relu # Activation function. dropout: 0.5 # Dropout rate. use_edge_weight: false # If true, scale the messages by edge weights. general_pipeline: early_stop: patience: 100 # Steps before early stop checkpoint_path: checkpoint.pth # Early stop checkpoint model file path num_epochs: 200 # Number of training epochs eval_period: 5 # Interval epochs between evaluations optimizer: name: Adam lr: 0.01 weight_decay: 0.0005 loss: CrossEntropyLoss save_path: "results" # Directory to save the experiment results num_runs: 10 # Number of experiments to run ================================================ FILE: dglgo/recipes/nodepred_pubmed_sage.yaml ================================================ # Accuracy across 10 runs: 0.7819 ± 0.003176 version: 0.0.2 pipeline_name: nodepred pipeline_mode: train device: cuda:0 data: name: pubmed model: name: sage embed_size: -1 # The dimension of created embedding table. -1 means using original node embedding hidden_size: 16 # Hidden size. num_layers: 2 # Number of layers. activation: relu dropout: 0.5 # Dropout rate. aggregator_type: gcn # Aggregator type to use (``mean``, ``gcn``, ``pool``, ``lstm``). general_pipeline: early_stop: patience: 100 # Steps before early stop checkpoint_path: checkpoint.pth # Early stop checkpoint model file path num_epochs: 200 # Number of training epochs eval_period: 5 # Interval epochs between evaluations optimizer: name: Adam lr: 0.01 weight_decay: 0.0005 loss: CrossEntropyLoss save_path: "results" # Directory to save the experiment results num_runs: 10 # Number of experiments to run ================================================ FILE: dglgo/setup.py ================================================ #!/usr/bin/env python from setuptools import find_packages, setup setup( name="dglgo", version="0.0.2", description="DGL", author="DGL Team", author_email="wmjlyjemaine@gmail.com", packages=find_packages(), install_requires=[ "typer>=0.4.0", "isort>=5.10.1", "autopep8>=1.6.0", "numpydoc>=1.1.0", "pydantic>=1.9.0", "ruamel.yaml>=0.17.20", "PyYAML>=5.1", "ogb>=1.3.3", "rdkit-pypi", "scikit-learn>=0.20.0", ], package_data={"": ["./*"]}, include_package_data=True, license="APACHE", entry_points={"console_scripts": ["dgl = dglgo.cli.cli:main"]}, url="https://github.com/dmlc/dgl", ) ================================================ FILE: dglgo/tests/cfg.yml ================================================ version: 0.0.2 pipeline_name: nodepred pipeline_mode: train device: cpu data: name: cora split_ratio: # Ratio to generate split masks, for example set to [0.8, 0.1, 0.1] for 80% train/10% val/10% test. Leave blank to use builtin split in original dataset model: name: sage embed_size: -1 # The dimension of created embedding table. -1 means using original node embedding hidden_size: 16 # Hidden size. num_layers: 1 # Number of hidden layers. activation: relu # Activation function name under torch.nn.functional dropout: 0.5 # Dropout rate. aggregator_type: gcn # Aggregator type to use (``mean``, ``gcn``, ``pool``, ``lstm``). general_pipeline: early_stop: patience: 20 # Steps before early stop checkpoint_path: checkpoint.pth # Early stop checkpoint model file path num_epochs: 200 # Number of training epochs eval_period: 5 # Interval epochs between evaluations optimizer: name: Adam lr: 0.01 weight_decay: 0.0005 loss: CrossEntropyLoss num_runs: 1 # Number of experiments to run ================================================ FILE: dglgo/tests/run_test.sh ================================================ python -m pytest --pdb -vv --capture=tee-sys test_pipeline.py::test_recipe ================================================ FILE: dglgo/tests/test_pipeline.py ================================================ import subprocess from pathlib import Path from typing import NamedTuple import pytest # class DatasetSpec: dataset_spec = {"cora": {"timeout": 30}} class ExperimentSpec(NamedTuple): pipeline: str dataset: str model: str timeout: int extra_cfg: dict = {} exps = [ ExperimentSpec( pipeline="nodepred", dataset="cora", model="sage", timeout=0.5 ) ] @pytest.mark.parametrize("spec", exps) def test_train(spec): cfg_path = "/tmp/test.yaml" run = subprocess.run( [ "dgl", "config", spec.pipeline, "--data", spec.dataset, "--model", spec.model, "--cfg", cfg_path, ], timeout=spec.timeout, capture_output=True, ) assert ( run.stderr is None or len(run.stderr) == 0 ), "Found error message: {}".format(run.stderr) output = run.stdout.decode("utf-8") print(output) run = subprocess.run( ["dgl", "train", "--cfg", cfg_path], timeout=spec.timeout, capture_output=True, ) assert ( run.stderr is None or len(run.stderr) == 0 ), "Found error message: {}".format(run.stderr) output = run.stdout.decode("utf-8") print(output) TEST_RECIPE_FOLDER = "my_recipes" @pytest.fixture def setup_recipe_folder(): run = subprocess.run( ["dgl", "recipe", "copy", "--dir", TEST_RECIPE_FOLDER], timeout=15, capture_output=True, ) @pytest.mark.parametrize( "file", [str(f) for f in Path(TEST_RECIPE_FOLDER).glob("*.yaml")] ) def test_recipe(file, setup_recipe_folder): print("DGL enter train {}".format(file)) try: run = subprocess.run( ["dgl", "train", "--cfg", file], timeout=5, capture_output=True ) sh_stdout, sh_stderr = run.stdout, run.stderr except subprocess.TimeoutExpired as e: sh_stdout = e.stdout sh_stderr = e.stderr if sh_stderr is not None and len(sh_stderr) != 0: error_str = sh_stderr.decode("utf-8") lines = error_str.split("\n") for line in lines: line = line.strip() if ( line.startswith("WARNING") or line.startswith("Aborted") or line.startswith("0%") ): continue else: assert len(line) == 0, error_str print("{} stdout: {}".format(file, sh_stdout)) print("{} stderr: {}".format(file, sh_stderr)) # test_recipe( , None) ================================================ FILE: docker/Dockerfile.awscli ================================================ # Using the Ubuntu image (our OS) FROM ubuntu:latest # Update package manager (apt-get) # and install (with the yes flag `-y`) # Python and Pip RUN apt-get update && apt-get install -y \ python3.8 \ python3-pip RUN apt-get install -y \ unzip \ curl \ && apt-get clean \ && curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" \ && unzip awscliv2.zip \ && ./aws/install \ && rm -rf \ awscliv2.zip RUN pip install pytest pytest-html requests ================================================ FILE: docker/Dockerfile.ci_benchmark ================================================ # CI docker GPU env FROM nvidia/cuda:11.6.0-cudnn8-devel-ubuntu20.04 ENV TZ=US RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN apt-get update --fix-missing COPY install/ubuntu_install_core.sh /install/ubuntu_install_core.sh RUN bash /install/ubuntu_install_core.sh COPY install/ubuntu_install_build.sh /install/ubuntu_install_build.sh RUN bash /install/ubuntu_install_build.sh # python COPY install/ubuntu_install_conda.sh /install/ubuntu_install_conda.sh RUN bash /install/ubuntu_install_conda.sh ENV CONDA_ALWAYS_YES="true" ENV CONDA_ALWAYS_YES= # Environment variables ENV PATH=/usr/local/nvidia/bin:${PATH} ENV PATH=/usr/local/cuda/bin:${PATH} ENV CPLUS_INCLUDE_PATH=/usr/local/cuda/include:${CPLUS_INCLUDE_PATH} ENV C_INCLUDE_PATH=/usr/local/cuda/include:${C_INCLUDE_PATH} ENV LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64:${LIBRARY_PATH} ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64:${LD_LIBRARY_PATH} ENV TF_FORCE_GPU_ALLOW_GROWTH=true ================================================ FILE: docker/Dockerfile.ci_cpu ================================================ # CI docker CPU env # Adapted from github.com/dmlc/tvm/docker/Dockerfile.ci_cpu FROM ubuntu:20.04 ENV TZ=US RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN apt-get update --fix-missing COPY install/ubuntu_install_core.sh /install/ubuntu_install_core.sh RUN bash /install/ubuntu_install_core.sh COPY install/ubuntu_install_build.sh /install/ubuntu_install_build.sh RUN bash /install/ubuntu_install_build.sh # tcmalloc RUN apt-get install -y libgoogle-perftools4 ENV LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4:$LD_PRELOAD # python COPY install/ubuntu_install_conda.sh /install/ubuntu_install_conda.sh RUN bash /install/ubuntu_install_conda.sh ENV CONDA_ALWAYS_YES="true" COPY install/conda_env/torch_cpu.yml /install/conda_env/torch_cpu.yml COPY install/conda_env/torch_cpu_pip.txt /install/conda_env/torch_cpu_pip.txt RUN ["/bin/bash", "-i", "-c", "conda env create -f /install/conda_env/torch_cpu.yml"] COPY install/conda_env/tensorflow_cpu.yml /install/conda_env/tensorflow_cpu.yml RUN ["/bin/bash", "-i", "-c", "conda env create -f /install/conda_env/tensorflow_cpu.yml"] COPY install/conda_env/mxnet_cpu.yml /install/conda_env/mxnet_cpu.yml RUN ["/bin/bash", "-i", "-c", "conda env create -f /install/conda_env/mxnet_cpu.yml"] ENV CONDA_ALWAYS_YES= # SSH RUN ["/bin/bash", "-i", "-c", "ssh-keygen -f ~/.ssh/id_rsa -N ''"] RUN ["/bin/bash", "-i", "-c", "cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys"] ENTRYPOINT service ssh restart && bash ================================================ FILE: docker/Dockerfile.ci_gpu ================================================ # CI docker GPU env FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04 ENV TZ=US RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone COPY install/ubuntu_install_core.sh /install/ubuntu_install_core.sh RUN bash /install/ubuntu_install_core.sh COPY install/ubuntu_install_build.sh /install/ubuntu_install_build.sh RUN bash /install/ubuntu_install_build.sh # python COPY install/ubuntu_install_conda.sh /install/ubuntu_install_conda.sh RUN bash /install/ubuntu_install_conda.sh ENV CONDA_ALWAYS_YES="true" COPY install/conda_env/torch_gpu.yml /install/conda_env/torch_gpu.yml COPY install/conda_env/torch_gpu_pip.txt /install/conda_env/torch_gpu_pip.txt RUN ["/bin/bash", "-i", "-c", "conda env create -f /install/conda_env/torch_gpu.yml"] COPY install/conda_env/tensorflow_gpu.yml /install/conda_env/tensorflow_gpu.yml RUN ["/bin/bash", "-i", "-c", "conda env create -f /install/conda_env/tensorflow_gpu.yml"] COPY install/conda_env/mxnet_gpu.yml /install/conda_env/mxnet_gpu.yml RUN ["/bin/bash", "-i", "-c", "conda env create -f /install/conda_env/mxnet_gpu.yml"] ENV CONDA_ALWAYS_YES= # Environment variables ENV PATH=/usr/local/nvidia/bin:${PATH} ENV PATH=/usr/local/cuda/bin:${PATH} ENV CPLUS_INCLUDE_PATH=/usr/local/cuda/include:${CPLUS_INCLUDE_PATH} ENV C_INCLUDE_PATH=/usr/local/cuda/include:${C_INCLUDE_PATH} ENV LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64:${LIBRARY_PATH} ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/nvidia/lib64:${LD_LIBRARY_PATH} ENV TF_FORCE_GPU_ALLOW_GROWTH=true ================================================ FILE: docker/Dockerfile.ci_lint ================================================ # CI docker for lint # Adapted from github.com/dmlc/tvm/docker/Dockerfile.ci_lint FROM ubuntu:18.04 ENV DEBIAN_FRONTEND=noninteractive COPY install/ubuntu_install_core.sh /install/ubuntu_install_core.sh RUN bash /install/ubuntu_install_core.sh COPY install/ubuntu_install_python.sh /install/ubuntu_install_python.sh RUN bash /install/ubuntu_install_python.sh RUN apt-get install -y doxygen graphviz RUN pip3 install cpplint==1.3.0 pylint==2.7.0 mypy ================================================ FILE: docker/README.md ================================================ ## Build docker image for CI ### CPU image ```bash docker build -t dgl-cpu -f Dockerfile.ci_cpu . ``` ### GPU image ```bash docker build -t dgl-gpu -f Dockerfile.ci_gpu . ``` ### Lint image ```bash docker build -t dgl-lint -f Dockerfile.ci_lint . ``` ### CPU image for kg ```bash wget https://data.dgl.ai/dataset/FB15k.zip -P install/ docker build -t dgl-cpu:torch-1.2.0 -f Dockerfile.ci_cpu_torch_1.2.0 . ``` ### GPU image for kg ```bash wget https://data.dgl.ai/dataset/FB15k.zip -P install/ docker build -t dgl-gpu:torch-1.2.0 -f Dockerfile.ci_gpu_torch_1.2.0 . ``` ================================================ FILE: docker/install/conda_env/kg_cpu.yml ================================================ name: kg-ci dependencies: - python=3.6.9 - pip - pip: - torch - torchvision - mxnet - pytest - nose - numpy - cython - scipy - networkx - matplotlib - nltk - requests[security] - tqdm ================================================ FILE: docker/install/conda_env/kg_gpu.yml ================================================ name: kg-ci dependencies: - python=3.6.9 - pip - pip: - torch - torchvision - mxnet-cu101 - pytest - nose - numpy - cython - scipy - networkx - matplotlib - nltk - requests[security] - tqdm ================================================ FILE: docker/install/conda_env/mxnet_cpu.yml ================================================ name: mxnet-ci dependencies: - python=3.7.0 - pip - pip: - mxnet==1.6.0 - pytest - nose - numpy - cython==0.29 - scipy - networkx - matplotlib - nltk - requests[security] - tqdm - psutil - pyyaml - pydantic - pandas - rdflib - ogb ================================================ FILE: docker/install/conda_env/mxnet_gpu.yml ================================================ name: mxnet-ci dependencies: - python=3.7.0 - pip - pip: - mxnet-cu101==1.7.0 - pytest - nose - numpy - cython==0.29 - scipy - networkx - matplotlib - nltk - requests[security] - tqdm - psutil - pyyaml - pydantic - pandas - rdflib - ogb ================================================ FILE: docker/install/conda_env/tensorflow_cpu.yml ================================================ name: tensorflow-ci dependencies: - python=3.7 - pip - pip: - tensorflow==2.3.0 - pytest - nose - numpy - cython==0.29 - scipy - networkx - matplotlib - nltk - requests[security] - tqdm - psutil - pyyaml - pydantic - pandas - rdflib - ogb ================================================ FILE: docker/install/conda_env/tensorflow_gpu.yml ================================================ name: tensorflow-ci dependencies: - python=3.7.0 - pip - pip: - tensorflow==2.3.0 - pytest - nose - numpy - cython==0.29 - scipy - networkx - matplotlib - nltk - requests[security] - tqdm - psutil - pyyaml - pydantic - pandas - rdflib - ogb ================================================ FILE: docker/install/conda_env/torch_cpu.yml ================================================ name: pytorch-ci dependencies: - python=3.10 - pip - pip: - --find-links https://download.pytorch.org/whl/torch_stable.html - --requirement torch_cpu_pip.txt ================================================ FILE: docker/install/conda_env/torch_cpu_pip.txt ================================================ --find-links https://download.pytorch.org/whl/torch_stable.html cython filelock matplotlib networkx nltk nose numpy ogb pandas psutil pyarrow pydantic pytest pyyaml rdflib requests[security]==2.28 scikit-learn scipy torch==2.3.0+cpu torcheval torchmetrics torch_geometric tqdm ================================================ FILE: docker/install/conda_env/torch_gpu.yml ================================================ name: pytorch-ci dependencies: - python=3.10 - pip - pip: - --find-links https://download.pytorch.org/whl/torch_stable.html - --requirement torch_gpu_pip.txt ================================================ FILE: docker/install/conda_env/torch_gpu_pip.txt ================================================ --find-links https://download.pytorch.org/whl/torch_stable.html cython matplotlib networkx nltk nose numpy ogb pandas psutil pydantic pytest pyyaml rdflib requests[security]==2.28 scikit-learn scipy torch==2.3.0+cu121 torcheval torchmetrics torch_geometric tqdm ================================================ FILE: docker/install/ubuntu_install_antlr.sh ================================================ #!/bin/bash set -e set -u set -o pipefail cd /usr/local/lib wget -q https://www.antlr.org/download/antlr-4.7.1-complete.jar cd - ================================================ FILE: docker/install/ubuntu_install_build.sh ================================================ # Install cmake with minimum required version. version=3.18 build=0 mkdir ~/temp cd ~/temp wget https://cmake.org/files/v$version/cmake-$version.$build-Linux-x86_64.sh sudo mkdir /opt/cmake sudo sh cmake-$version.$build-Linux-x86_64.sh --prefix=/opt/cmake --skip-license sudo ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake cd ~ rm -rf ~/temp ================================================ FILE: docker/install/ubuntu_install_conda.sh ================================================ #!/bin/sh export LANG=C.UTF-8 LC_ALL=C.UTF-8 export PATH=/opt/conda/bin:$PATH apt-get update --fix-missing && \ apt-get install -y wget bzip2 ca-certificates curl git && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \ /bin/bash ~/miniconda.sh -b -p /opt/conda && \ rm ~/miniconda.sh && \ /opt/conda/bin/conda clean -tipy && \ ln -s /opt/conda/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \ echo ". /opt/conda/etc/profile.d/conda.sh" >> ~/.bashrc && \ echo "conda activate base" >> ~/.bashrc export TINI_VERSION=v0.16.1 source ~/.bashrc ================================================ FILE: docker/install/ubuntu_install_core.sh ================================================ # install libraries for building c++ core on ubuntu apt update && apt install -y --no-install-recommends --force-yes \ apt-utils git build-essential make wget unzip sudo \ libz-dev libxml2-dev libopenblas-dev libopencv-dev \ graphviz graphviz-dev libgraphviz-dev ca-certificates \ systemd vim openssh-client openssh-server ================================================ FILE: docker/install/ubuntu_install_java.sh ================================================ #!/bin/bash set -o errexit -o nounset set -o pipefail apt-get update && apt-get install -y openjdk-8-jdk maven test -d "/usr/lib/jvm/java-8-openjdk-amd64/jre" echo "export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/jre" >> /etc/profile ================================================ FILE: docker/install/ubuntu_install_mxnet_cpu.sh ================================================ pip3 install mxnet ================================================ FILE: docker/install/ubuntu_install_mxnet_gpu.sh ================================================ pip3 install mxnet-cu90 ================================================ FILE: docker/install/ubuntu_install_python.sh ================================================ # install python and pip, don't modify this, modify install_python_package.sh apt-get update apt-get install -y python-dev python3-dev # install pip cd /tmp && wget https://bootstrap.pypa.io/get-pip.py python2 get-pip.py python3 get-pip.py # santiy check python2 --version python3 --version pip2 --version pip3 --version ================================================ FILE: docker/install/ubuntu_install_python_package.sh ================================================ # install libraries for python package on ubuntu #pip2 install nose numpy cython scipy networkx matplotlib nltk requests[security] tqdm pip3 install nose numpy cython scipy networkx matplotlib nltk requests[security] tqdm ================================================ FILE: docker/install/ubuntu_install_torch.sh ================================================ #!/bin/bash # install torch pip2 install torch==1.0.1 torchvision==0.2.2 pip3 install torch==1.0.1 torchvision==0.2.2 ================================================ FILE: docker/install/ubuntu_install_torch_1.2.0.sh ================================================ #!/bin/bash # install torch pip3 install torch==1.2.0+cu92 torchvision==0.4.0+cu92 -f https://download.pytorch.org/whl/torch_stable.html ================================================ FILE: docker/pods/ci-compile-cpu.yaml ================================================ apiVersion: v1 kind: Pod spec: securityContext: runAsUser: 0 containers: - name: dgl-ci-cpu-compile image: dgllib/dgl-ci-cpu:cu101_v220123 imagePullPolicy: Always tty: true resources: requests: cpu: 16 # affinity: # nodeAffinity: # requiredDuringSchedulingIgnoredDuringExecution: # nodeSelectorTerms: # - matchExpressions: # - key: beta.kubernetes.io/instance-type # operator: In # values: # - c5.9xlarge ================================================ FILE: docker/pods/ci-compile-gpu.yaml ================================================ apiVersion: v1 kind: Pod spec: securityContext: runAsUser: 0 containers: - name: dgl-ci-gpu-compile image: dgllib/dgl-ci-gpu:cu101_v220123 imagePullPolicy: Always tty: true resources: requests: cpu: 32 # affinity: # nodeAffinity: # requiredDuringSchedulingIgnoredDuringExecution: # nodeSelectorTerms: # - matchExpressions: # - key: beta.kubernetes.io/instance-type # operator: In # values: # - c5.9xlarge ================================================ FILE: docker/pods/ci-cpu.yaml ================================================ apiVersion: v1 kind: Pod spec: securityContext: runAsUser: 0 containers: - name: dgl-ci-cpu image: dgllib/dgl-ci-cpu:cu101_v220217 imagePullPolicy: Always tty: true resources: requests: cpu: 16 volumeMounts: # - name: persistent-storage # mountPath: /tmp/dataset - name: dshm mountPath: /dev/shm volumes: # - name: persistent-storage # persistentVolumeClaim: # claimName: ogb-efs-claim - name: dshm emptyDir: medium: Memory ================================================ FILE: docker/pods/ci-gpu.yaml ================================================ apiVersion: v1 kind: Pod spec: securityContext: runAsUser: 0 containers: - name: dgl-ci-gpu image: dgllib/dgl-ci-gpu:cu101_v220217 imagePullPolicy: Always tty: true resources: limits: nvidia.com/gpu: 1 # requesting 1 GPU volumeMounts: - name: dshm mountPath: /dev/shm volumes: - name: dshm emptyDir: medium: Memory affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: beta.kubernetes.io/instance-type operator: In values: - g4dn.2xlarge ================================================ FILE: docker/pods/ci-lint.yaml ================================================ apiVersion: v1 kind: Pod spec: securityContext: runAsUser: 0 containers: - name: dgl-ci-lint image: dgllib/dgl-ci-lint imagePullPolicy: Always tty: true resources: requests: cpu: 1 serviceAccountName: dglciuser ================================================ FILE: docs/.gitignore ================================================ build # tutorials are auto-generated source/tutorials source/new-tutorial source/generated ================================================ FILE: docs/Makefile ================================================ # Minimal makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build SOURCEDIR = source BUILDDIR = build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile mxnet: @echo "##################################################################" @echo "# #" @echo "# Step 1: Building MXNet tutorials #" @echo "# #" @echo "##################################################################" @DGLBACKEND=mxnet $(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) pytorch: @echo "##################################################################" @echo "# #" @echo "# Step 2: Building PyTorch tutorials #" @echo "# #" @echo "##################################################################" @DGLBACKEND=pytorch $(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) tensorflow: @echo "##################################################################" @echo "# #" @echo "# Step 3: Building Tensorflow tutorials #" @echo "# #" @echo "##################################################################" @DGLBACKEND=tensorflow $(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) html-noexec: $(SPHINXBUILD) -D plot_gallery=0 -b html "$(SOURCEDIR)" "$(BUILDDIR)/html" @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." html: Makefile mxnet pytorch tensorflow # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) ================================================ FILE: docs/README.md ================================================ DGL document and tutorial folder ================================ To build the doc: - Create the developer conda environment using the script [here](../script/create_dev_conda_env.sh). - Activate the developer conda environment. - Build DGL from source using the script [here](../script/build_dgl.sh). - Build the doc using the script [here](../script/build_doc.sh). To render locally: ``` cd build/html python3 -m http.server 8000 ``` ================================================ FILE: docs/clean.sh ================================================ #!/bin/sh make clean rm -rf build rm -rf source/tutorials rm -rf source/generated ================================================ FILE: docs/migrate-guide-0.5.md ================================================ # Migration Guide for DGL 0.5 ## Breaking changes The following changes may break existing codes if the related APIs are used. Note that **most of the removed APIs have quite rare use cases** and have quite easy replacements. 1. DGLGraph now requires the graph structure and feature data to have the same device placement. If the given node/edge feature tensors have different devices as the graph’s, dgl.ndata and dgl.edata will raise an error as follow: ```bash dgl._ffi.base.DGLError: Cannot assign node feature "x" on device cpu to a graph on device cuda:0. Call DGLGraph.to() to copy the graph to the same device. ``` To fix it, copy either the graph (using the `DGLGraph.to` API) or the feature tensors to the same device. 1. Changes to `dgl.graph`: * No longer accept SciPy matrix/NetworkX graph as the input data. Use `dgl.from_scipy`/`dgl.from_networkx` instead. * `ntype` and `etype` are removed from the arguments. To construct graphs with named node/edge types, use `dgl.heterograph`. ```python g = dgl.heterograph(('user', 'follows', 'user') : ...) ``` * `validate` is removed from the arguments. DGL now always checks whether the num_nodes is greater than the largest node ID if specified. 1. `dgl.bipartite` is removed. * To create a uni-directional bipartite graph, use `dgl.heterograph`. E.g., ```python g = dgl.hetrograph(('user', 'rates', 'movie'): ...) ``` * To create a uni-directional bipartite graph from a SciPy matrix, use the new API `dgl.bipartite_from_scipy`. * To create a uni-directional bipartite graph from a NetworkX graph, use the new API `dgl.bipartite_from_networkx`. 1. Changes to `dgl.heterograph`: * No longer accept SciPy matrix/NetworkX graph as the input data. Use the `from_*` APIs to create graphs first and then pass their edges to the `dgl.heterograph` API. E.g., ```python nx_g = ... # some networkx graph spmat = ... # some scipy matrix g1 = dgl.from_networkx(nx_g) g2 = dgl.bipartite_from_scipy(spmat) g = dgl.heterograph({('user', 'follows', 'user') : g1.edges(), ('user', 'rates', 'movie') : g2.edges()}) ``` 1. `dgl.hetero_from_relations` is removed. Use `dgl.heterograph` instead. 1. From 0.5, subgraphs extracted via DGL APIs automatically inherits node and edge features from the parent graph. DGL also saves the original nodes/edge IDs in `subg.ndata[dgl.NID]` and `subg.edata[dgl.EID]` if nodes/edges are relabeled. This new behavior makes the following `DGLGraph` methods useless and we thus remove them: * `DGLGraph.parent`, `DGLGraph.parent_nid`, `DGLGraph.parent_eid`, `DGLGraph.map_to_subgraph_nid`, `DGLGraph.copy_from_parent`, `DGLGraph.copy_to_parent` and `DGLGraph.detach_parent`. 1. Other removed DGLGraph APIs: * `DGLGraph.from_networkx`. Use `dgl.from_networkx` to construct a DGLGraph from a NetworkX graph. * `DGLGraph.from_scipy_sparse_matrix`. Use `dgl.from_scipy` to construct a DGLGraph from a SciPy matrix. * `DGLGraph.register_apply_node_func` , `DGLGraph.register_apply_edge_func`, `DGLGraph.register_message_func` and `DGLGraph.register_reduce_func`. Please specify them directly as the arguments of the message passing APIs. ```python g = ... # some graph # before 0.5 g.register_message_func(mfunc) g.register_reduce_func(rfunc) g.update_all() # starting from 0.5 g.update_all(mfunc, rfunc) ``` * `DGLGraph.group_apply_edges`. To normalize edge weights within the neighborhood of each destination node, use `dgl.nn.edge_softmax`. To normalize edge weights within the neighborhood of each source node, use `dgl.reverse` first before the edge softmax. * `DGLGraph.send` and `DGLGraph.recv`. There are rarely any cases where send and recv must be invoked separately. Use `DGLGraph.send_and_recv` or `DGLGraph.update_all` for message passing. * `DGLGraph.multi_recv`, `DGLGraph.multi_pull`, `DGLGraph.multi_send_and_recv`. To perform message passing on a part of the nodes and edges, use `dgl.node_subgraph` or `dgl.edge_subgraph` to extract the subset first and then call `DGLGraph.multi_update_all`. * `DGLGraph.clear`. Use `dgl.graph(([], []))`` to create a new empty graph. * `DGLGraph.subgraphs`. Use `DGLGraph.subgraph`. * `DGLGraph.batch_num_nodes` and `DGLGraph.batch_num_edges` are now functions that accept node/edge type as the only argument for getting batching information of a heterograph. * `DGLGraph.flatten`. To create a new graph without batching information, use `new_g = gl.graph(old_g.edges())``. 1. The reduce function `dgl.function.prod` is removed. 1. `dgl.add_self_loop` will NOT remove existing self loops automatically. It is recommanded to call `dgl.remove_self_loop` before invoking `dgl.add_self_loop`. ## Deprecations Will not break old codes but will throw deprecation warning. ### Core APIs 1. Creating a graph using `dgl.DGLGraph(data)` is deprecated. Use `dgl.graph(data)`. 1. Deprecated `DGLGraph` methods: - `DGLGraph.to_networkx` -> `dgl.to_networkx` - `DGLGraph.readonly` and `DGLGraph.is_readonly`. Before 0.5, this flag is a hint for more efficient implementation. From 0.5, the efficiency issue has been resolved so they become useless. - `DGLGraph.__len__` -> `DGLGraph.number_of_nodes` - `dgl.DGLGraph.__contains__` -> `DGLGraph.has_nodes` - `DGLGraph.add_node` -> `DGLGraph.add_nodes` - `DGLGraph.add_edge` -> `DGLGraph.add_edges` - `DGLGraph.has_node` -> `DGLGraph.has_nodes` - `DGLGraph.has_edge_between` -> `DGLGraph.has_edges_between` - `DGLGraph.edge_id` -> `dgl.DGLGraph.edge_ids`. - `DGLGraph.in_degree` -> `dgl.DGLGraph.in_degrees`. - `DGLGraph.out_degree` -> `dgl.DGLGraph.out_degrees`. 1. `dgl.to_simple_graph` -> `dgl.to_simple`. 1. `dgl.to_homo` -> `dgl.to_homogeneous`. 1. `dgl.to_hetero` -> `dgl.to_heterogeneous`. 1. `dgl.as_heterograph` and `dgl.as_immutable_graph` are deprecated as `dgl.DGLGraph` and `dgl.DGLHeteroGraph` are now merged. 1. `dgl.batch_hetero` -> `dgl.batch` 1. `dgl.unbatch_hetero` -> `dgl.unbatch` 1. The `node_attrs` / `edge_attrs` arguments of `dgl.batch` are renamed to `ndata` / `edata`. 1. The arguments `share_ndata` and `share_edata` of `dgl.reverse` are renamed to `copy_ndata` and `copy_edata`. ### Dataset APIs For all the current datsets, their class attributes such as `graph`, `feat`, etc. are deprecated. The recommended usage is to get them from each sample: ```python # Before 0.5 dataset = dgl.data.CoraFull() g = dataset.graph feat = dataset.feat ... # From 0.5 dataset = dgl.data.CoraFullDataset() # in 0.5, all the classes have a "Dataset" in the name. g = dataset[0] # is directly a DGLGraph object feat = g.ndata['feat'] ... ``` **Other changes** * ``dgl.data.SST`` is deprecated and replaced by ``dgl.data.SSTDataset``. The attribute ``trees`` is deprecated and replaced by ``__getitem__``. The attribute ``num_vocabs`` is deprecated and replaced by ``vocab_size`` ================================================ FILE: docs/source/_static/css/custom.css ================================================ .wy-table-responsive table td, .wy-table-responsive table th { white-space: normal; } .wy-table-bordered-all, .rst-content table.docutils { border: none; } .wy-table-bordered-all td, .rst-content table.docutils td { border: none; } .wy-table td, .rst-content table.docutils td, .rst-content table.field-list td, .wy-table th, .rst-content table.docutils th, .rst-content table.field-list th { padding: 14px; } ================================================ FILE: docs/source/_templates/classtemplate.rst ================================================ .. role:: hidden :class: hidden-section .. currentmodule:: {{ module }} {{ name | underline}} .. autoclass:: {{ name }} :show-inheritance: :members: __getitem__, __len__, collate_fn, forward, reset_parameters, rel_emb, rel_project, explain_node, explain_graph, train_step, train_step_node ================================================ FILE: docs/source/_templates/graphbolt_classtemplate.rst ================================================ .. role:: hidden :class: hidden-section .. currentmodule:: {{ module }} {{ name | underline}} .. autoclass:: {{ name }} :show-inheritance: :members: :member-order: groupwise ================================================ FILE: docs/source/api/python/dgl.DGLGraph.rst ================================================ .. _apigraph: dgl.DGLGraph ===================================================== .. currentmodule:: dgl .. class:: DGLGraph Class for storing graph structure and node/edge feature data. There are a few ways to create a DGLGraph: * To create a homogeneous graph from Tensor data, use :func:`dgl.graph`. * To create a heterogeneous graph from Tensor data, use :func:`dgl.heterograph`. * To create a graph from other data sources, use ``dgl.*`` create ops. See :ref:`api-graph-create-ops`. Read the user guide chapter :ref:`guide-graph` for an in-depth explanation about its usage. Querying metagraph structure ---------------------------- Methods for getting information about the node and edge types. They are typically useful when the graph is heterogeneous. .. autosummary:: :toctree: ../../generated/ DGLGraph.ntypes DGLGraph.etypes DGLGraph.srctypes DGLGraph.dsttypes DGLGraph.canonical_etypes DGLGraph.metagraph DGLGraph.to_canonical_etype .. _apigraph-querying-graph-structure: Querying graph structure ------------------------ Methods for getting information about the graph structure such as capacity, connectivity, neighborhood, etc. .. autosummary:: :toctree: ../../generated/ DGLGraph.num_nodes DGLGraph.number_of_nodes DGLGraph.num_edges DGLGraph.number_of_edges DGLGraph.num_src_nodes DGLGraph.number_of_src_nodes DGLGraph.num_dst_nodes DGLGraph.number_of_dst_nodes DGLGraph.is_unibipartite DGLGraph.is_multigraph DGLGraph.is_homogeneous DGLGraph.has_nodes DGLGraph.has_edges_between DGLGraph.predecessors DGLGraph.successors DGLGraph.edge_ids DGLGraph.find_edges DGLGraph.in_edges DGLGraph.out_edges DGLGraph.in_degrees DGLGraph.out_degrees Querying and manipulating sparse format --------------------------------------- Methods for getting or manipulating the internal storage formats of a ``DGLGraph``. .. autosummary:: :toctree: ../../generated/ DGLGraph.formats DGLGraph.create_formats_ Querying and manipulating node/edge ID type ----------------------------------------- Methods for getting or manipulating the data type for storing structure-related data such as node and edge IDs. .. autosummary:: :toctree: ../../generated/ DGLGraph.idtype DGLGraph.long DGLGraph.int Using Node/edge features ------------------------ Methods for getting or setting the data type for storing structure-related data such as node and edge IDs. .. autosummary:: :toctree: ../../generated/ DGLGraph.nodes DGLGraph.ndata DGLGraph.edges DGLGraph.edata DGLGraph.node_attr_schemes DGLGraph.edge_attr_schemes DGLGraph.srcnodes DGLGraph.dstnodes DGLGraph.srcdata DGLGraph.dstdata Transforming graph ------------------ Methods for generating a new graph by transforming the current ones. Most of them are alias of the :ref:`api-subgraph-extraction` and :ref:`api-transform` under the ``dgl`` namespace. .. autosummary:: :toctree: ../../generated/ DGLGraph.subgraph DGLGraph.edge_subgraph DGLGraph.node_type_subgraph DGLGraph.edge_type_subgraph DGLGraph.__getitem__ DGLGraph.line_graph DGLGraph.reverse DGLGraph.add_self_loop DGLGraph.remove_self_loop DGLGraph.to_simple DGLGraph.to_cugraph DGLGraph.reorder_graph Adjacency and incidence matrix --------------------------------- Methods for getting the adjacency and the incidence matrix of the graph. .. autosummary:: :toctree: ../../generated/ DGLGraph.adj DGLGraph.adjacency_matrix DGLGraph.adj_tensors DGLGraph.adj_external DGLGraph.inc DGLGraph.incidence_matrix Computing with DGLGraph ----------------------------- Methods for performing message passing, applying functions on node/edge features, etc. .. autosummary:: :toctree: ../../generated/ DGLGraph.apply_nodes DGLGraph.apply_edges DGLGraph.send_and_recv DGLGraph.pull DGLGraph.push DGLGraph.update_all DGLGraph.multi_update_all DGLGraph.prop_nodes DGLGraph.prop_edges DGLGraph.filter_nodes DGLGraph.filter_edges Querying and manipulating batch information ---------------------------------------------- Methods for getting/setting the batching information if the current graph is a batched graph generated from :func:`dgl.batch`. They are also widely used in the :ref:`api-batch`. .. autosummary:: :toctree: ../../generated/ DGLGraph.batch_size DGLGraph.batch_num_nodes DGLGraph.batch_num_edges DGLGraph.set_batch_num_nodes DGLGraph.set_batch_num_edges Mutating topology ----------------- Methods for mutating the graph structure *in-place*. .. autosummary:: :toctree: ../../generated/ DGLGraph.add_nodes DGLGraph.add_edges DGLGraph.remove_nodes DGLGraph.remove_edges Device Control -------------- Methods for getting or changing the device on which the graph is hosted. .. autosummary:: :toctree: ../../generated/ DGLGraph.to DGLGraph.device DGLGraph.cpu DGLGraph.pin_memory_ DGLGraph.unpin_memory_ DGLGraph.is_pinned Misc ---- Other utility methods. .. autosummary:: :toctree: ../../generated/ DGLGraph.local_scope ================================================ FILE: docs/source/api/python/dgl.data.rst ================================================ .. _apidata: dgl.data ========= .. currentmodule:: dgl.data .. automodule:: dgl.data Base Class --------------------------------------- .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: classtemplate.rst DGLDataset CSVDataset Node Prediction Datasets --------------------------------------- Datasets for node classification/regression tasks .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: classtemplate.rst SSTDataset KarateClubDataset CoraGraphDataset CiteseerGraphDataset PubmedGraphDataset CoraFullDataset AIFBDataset MUTAGDataset BGSDataset AMDataset AmazonCoBuyComputerDataset AmazonCoBuyPhotoDataset CoauthorCSDataset CoauthorPhysicsDataset PPIDataset RedditDataset SBMMixtureDataset FraudDataset FraudYelpDataset FraudAmazonDataset BAShapeDataset BACommunityDataset TreeCycleDataset TreeGridDataset WikiCSDataset FlickrDataset YelpDataset PATTERNDataset CLUSTERDataset ChameleonDataset SquirrelDataset ActorDataset CornellDataset TexasDataset WisconsinDataset RomanEmpireDataset AmazonRatingsDataset MinesweeperDataset TolokersDataset QuestionsDataset MovieLensDataset VOCSuperpixelsDataset COCOSuperpixelsDataset Edge Prediction Datasets --------------------------------------- Datasets for edge classification/regression and link prediction .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: classtemplate.rst FB15k237Dataset FB15kDataset WN18Dataset BitcoinOTCDataset ICEWS18Dataset GDELTDataset Graph Prediction Datasets --------------------------------------- Datasets for graph classification/regression tasks .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: classtemplate.rst QM7bDataset QM9Dataset QM9EdgeDataset MiniGCDataset TUDataset LegacyTUDataset GINDataset FakeNewsDataset BA2MotifDataset ZINCDataset PeptidesStructuralDataset PeptidesFunctionalDataset MNISTSuperPixelDataset CIFAR10SuperPixelDataset Dataset adapters ------------------- .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: classtemplate.rst AsNodePredDataset AsLinkPredDataset AsGraphPredDataset Utilities ----------------- .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: classtemplate.rst utils.get_download_dir utils.download utils.check_sha1 utils.extract_archive utils.split_dataset utils.load_labels utils.save_info utils.load_info utils.add_nodepred_split utils.mask_nodes_by_property utils.add_node_property_split utils.Subset ================================================ FILE: docs/source/api/python/dgl.dataloading.rst ================================================ .. _api-dataloading: dgl.dataloading ================================= .. currentmodule:: dgl.dataloading The ``dgl.dataloading`` package provides two primitives to compose a data pipeline for loading from graph data. ``Sampler`` represents algorithms to generate subgraph samples from the original graph, and ``DataLoader`` represents the iterable over these samples. DGL provides a number of built-in samplers that subclass :class:`~dgl.dataloading.Sampler`. Creating new samplers follow the same paradigm. Read our user guide chapter :ref:`guide-minibatch` for more examples and explanations. The entire package only works for PyTorch backend. DataLoaders ----------- .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: classtemplate.rst DataLoader GraphDataLoader .. _api-dataloading-neighbor-sampling: Samplers -------- .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: classtemplate.rst Sampler NeighborSampler LaborSampler MultiLayerFullNeighborSampler ClusterGCNSampler ShaDowKHopSampler SAINTSampler Sampler Transformations ----------------------- .. autosummary:: :toctree: ../../generated/ as_edge_prediction_sampler BlockSampler .. _api-dataloading-negative-sampling: Negative Samplers for Link Prediction ------------------------------------- .. currentmodule:: dgl.dataloading.negative_sampler .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: classtemplate.rst Uniform PerSourceUniform GlobalUniform Utility Class and Functions for Feature Prefetching --------------------------------------------------- .. currentmodule:: dgl.dataloading.base .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: classtemplate.rst set_node_lazy_features set_edge_lazy_features set_src_lazy_features set_dst_lazy_features LazyFeature ================================================ FILE: docs/source/api/python/dgl.distributed.rst ================================================ .. _api-distributed: dgl.distributed ================================= .. currentmodule:: dgl.distributed DGL distributed module contains classes and functions to support distributed Graph Neural Network training and inference on a cluster of machines. This includes a few submodules: * distributed data structures including distributed graph, distributed tensor and distributed embeddings. * distributed sampling. * distributed workload split at runtime. * graph partition. Initialization --------------- .. autosummary:: :toctree: ../../generated/ initialize Distributed Graph ----------------- .. autoclass:: DistGraph :members: ndata, edata, idtype, device, ntypes, etypes, number_of_nodes, number_of_edges, node_attr_schemes, edge_attr_schemes, rank, find_edges, get_partition_book, barrier, local_partition, num_nodes, num_edges, get_node_partition_policy, get_edge_partition_policy, get_etype_id, get_ntype_id, nodes, edges, out_degrees, in_degrees Distributed Tensor ------------------ .. autoclass:: DistTensor :members: part_policy, shape, dtype, name Distributed Node Embedding --------------------- .. autoclass:: DistEmbedding Distributed embedding optimizer ------------------------- .. autoclass:: dgl.distributed.optim.SparseAdagrad :members: step, save, load .. autoclass:: dgl.distributed.optim.SparseAdam :members: step, save, load Distributed workload split -------------------------- .. autosummary:: :toctree: ../../generated/ node_split edge_split Distributed Sampling -------------------- Distributed DataLoader `````````````````````` .. autoclass:: NodeCollator .. autoclass:: EdgeCollator .. autoclass:: DistDataLoader .. autoclass:: DistNodeDataLoader .. autoclass:: DistEdgeDataLoader .. _api-distributed-sampling-ops: Distributed Graph Sampling Operators ``````````````````````````````````````` .. autosummary:: :toctree: ../../generated/ sample_neighbors sample_etype_neighbors find_edges in_subgraph Partition --------- Graph partition book ```````````````````` .. autoclass:: GraphPartitionBook :members: shared_memory, num_partitions, metadata, nid2partid, eid2partid, partid2nids, partid2eids, nid2localnid, eid2localeid, partid, map_to_per_ntype, map_to_per_etype, map_to_homo_nid, map_to_homo_eid, canonical_etypes .. autoclass:: PartitionPolicy :members: policy_str, part_id, partition_book, to_local, to_partid, get_part_size, get_size Split and Load Partitions ```````````````````````````` .. autosummary:: :toctree: ../../generated/ load_partition load_partition_feats load_partition_book partition_graph dgl_partition_to_graphbolt ================================================ FILE: docs/source/api/python/dgl.function.rst ================================================ .. _apifunction: .. currentmodule:: dgl.function dgl.function ================================== This subpackage hosts all the **built-in functions** provided by DGL. Built-in functions are DGL's recommended way to express different types of :ref:`guide-message-passing` computation (i.e., via :func:`~dgl.DGLGraph.update_all`) or computing edge-wise features from node-wise features (i.e., via :func:`~dgl.DGLGraph.apply_edges`). Built-in functions describe the node-wise and edge-wise computation in a symbolic way without any actual computation, so DGL can analyze and map them to efficient low-level kernels. Here are some examples: .. code:: python import dgl import dgl.function as fn import torch as th g = ... # create a DGLGraph g.ndata['h'] = th.randn((g.num_nodes(), 10)) # each node has feature size 10 g.edata['w'] = th.randn((g.num_edges(), 1)) # each edge has feature size 1 # collect features from source nodes and aggregate them in destination nodes g.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h_sum')) # multiply source node features with edge weights and aggregate them in destination nodes g.update_all(fn.u_mul_e('h', 'w', 'm'), fn.max('m', 'h_max')) # compute edge embedding by multiplying source and destination node embeddings g.apply_edges(fn.u_mul_v('h', 'h', 'w_new')) ``fn.copy_u``, ``fn.u_mul_e``, ``fn.u_mul_v`` are built-in message functions, while ``fn.sum`` and ``fn.max`` are built-in reduce functions. DGL's convention is to use ``u``, ``v`` and ``e`` to represent source nodes, destination nodes, and edges, respectively. For example, ``copy_u`` tells DGL to copy the source node data as the messages; ``u_mul_e`` tells DGL to multiply source node features with edge features. To define a unary message function (e.g. ``copy_u``), specify one input feature name and one output message name. To define a binary message function (e.g. ``u_mul_e``), specify two input feature names and one output message name. During the computation, the message function will read the data under the given names, perform computation, and return the output using the output name. For example, the above ``fn.u_mul_e('h', 'w', 'm')`` is the same as the following user-defined function: .. code:: python def udf_u_mul_e(edges): return {'m' : edges.src['h'] * edges.data['w']} To define a reduce function, one input message name and one output node feature name need to be specified. For example, the above ``fn.max('m', 'h_max')`` is the same as the following user-defined function: .. code:: python def udf_max(nodes): return {'h_max' : th.max(nodes.mailbox['m'], 1)[0]} All binary message function supports **broadcasting**, a mechanism for extending element-wise operations to tensor inputs with different shapes. DGL generally follows the standard broadcasting semantic by `NumPy `_ and `PyTorch `_. Below are some examples: .. code:: python import dgl import dgl.function as fn import torch as th g = ... # create a DGLGraph # case 1 g.ndata['h'] = th.randn((g.num_nodes(), 10)) g.edata['w'] = th.randn((g.num_edges(), 1)) # OK, valid broadcasting between feature shapes (10,) and (1,) g.update_all(fn.u_mul_e('h', 'w', 'm'), fn.sum('m', 'h_new')) g.ndata['h_new'] # shape: (g.num_nodes(), 10) # case 2 g.ndata['h'] = th.randn((g.num_nodes(), 5, 10)) g.edata['w'] = th.randn((g.num_edges(), 10)) # OK, valid broadcasting between feature shapes (5, 10) and (10,) g.update_all(fn.u_mul_e('h', 'w', 'm'), fn.sum('m', 'h_new')) g.ndata['h_new'] # shape: (g.num_nodes(), 5, 10) # case 3 g.ndata['h'] = th.randn((g.num_nodes(), 5, 10)) g.edata['w'] = th.randn((g.num_edges(), 5)) # NOT OK, invalid broadcasting between feature shapes (5, 10) and (5,) # shapes are aligned from right g.update_all(fn.u_mul_e('h', 'w', 'm'), fn.sum('m', 'h_new')) # case 3 g.ndata['h1'] = th.randn((g.num_nodes(), 1, 10)) g.ndata['h2'] = th.randn((g.num_nodes(), 5, 1)) # OK, valid broadcasting between feature shapes (1, 10) and (5, 1) g.apply_edges(fn.u_add_v('h1', 'h2', 'x')) # apply_edges also supports broadcasting g.edata['x'] # shape: (g.num_edges(), 5, 10) # case 4 g.ndata['h1'] = th.randn((g.num_nodes(), 1, 10, 128)) g.ndata['h2'] = th.randn((g.num_nodes(), 5, 1, 128)) # OK, u_dot_v supports broadcasting but requires the last dimension to match g.apply_edges(fn.u_dot_v('h1', 'h2', 'x')) g.edata['x'] # shape: (g.num_edges(), 5, 10, 1) .. _api-built-in: DGL Built-in Function ------------------------- Here is a cheatsheet of all the DGL built-in functions. +-------------------------+-----------------------------------------------------------------+-----------------------+ | Category | Functions | Memo | +=========================+=================================================================+=======================+ | Unary message function | ``copy_u`` | | | +-----------------------------------------------------------------+-----------------------+ | | ``copy_e`` | | +-------------------------+-----------------------------------------------------------------+-----------------------+ | Binary message function | ``u_add_v``, ``u_sub_v``, ``u_mul_v``, ``u_div_v``, ``u_dot_v`` | | | +-----------------------------------------------------------------+-----------------------+ | | ``u_add_e``, ``u_sub_e``, ``u_mul_e``, ``u_div_e``, ``u_dot_e`` | | | +-----------------------------------------------------------------+-----------------------+ | | ``v_add_u``, ``v_sub_u``, ``v_mul_u``, ``v_div_u``, ``v_dot_u`` | | | +-----------------------------------------------------------------+-----------------------+ | | ``v_add_e``, ``v_sub_e``, ``v_mul_e``, ``v_div_e``, ``v_dot_e`` | | | +-----------------------------------------------------------------+-----------------------+ | | ``e_add_u``, ``e_sub_u``, ``e_mul_u``, ``e_div_u``, ``e_dot_u`` | | | +-----------------------------------------------------------------+-----------------------+ | | ``e_add_v``, ``e_sub_v``, ``e_mul_v``, ``e_div_v``, ``e_dot_v`` | | +-------------------------+-----------------------------------------------------------------+-----------------------+ | Reduce function | ``max`` | | | +-----------------------------------------------------------------+-----------------------+ | | ``min`` | | | +-----------------------------------------------------------------+-----------------------+ | | ``sum`` | | | +-----------------------------------------------------------------+-----------------------+ | | ``mean`` | | +-------------------------+-----------------------------------------------------------------+-----------------------+ Message functions ----------------- .. autosummary:: :toctree: ../../generated/ copy_u copy_e u_add_v u_sub_v u_mul_v u_div_v u_add_e u_sub_e u_mul_e u_div_e v_add_u v_sub_u v_mul_u v_div_u v_add_e v_sub_e v_mul_e v_div_e e_add_u e_sub_u e_mul_u e_div_u e_add_v e_sub_v e_mul_v e_div_v u_dot_v u_dot_e v_dot_e v_dot_u e_dot_u e_dot_v Reduce functions ---------------- .. autosummary:: :toctree: ../../generated/ sum max min mean ================================================ FILE: docs/source/api/python/dgl.geometry.rst ================================================ .. _api-geometry: dgl.geometry ================================= .. automodule:: dgl.geometry .. _api-geometry-farthest-point-sampler: Farthest Point Sampler ----------- Farthest point sampling is a greedy algorithm that samples from a point cloud data iteratively. It starts from a random single sample of point. In each iteration, it samples from the rest points that is the farthest from the set of sampled points. .. autoclass:: farthest_point_sampler .. _api-geometry-neighbor-matching: Neighbor Matching ----------------------------- Neighbor matching is an important module in the Graclus clustering algorithm. .. autoclass:: neighbor_matching ================================================ FILE: docs/source/api/python/dgl.graphbolt.rst ================================================ .. _apibackend: 🆕 dgl.graphbolt ================================= .. currentmodule:: dgl.graphbolt **dgl.graphbolt** is a dataloading framework for GNNs that provides well-defined APIs for each stage of the data pipeline and multiple standard implementations. Dataset ------- A dataset is a collection of graph structure data, feature data and tasks. .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: graphbolt_classtemplate.rst Dataset OnDiskDataset BuiltinDataset LegacyDataset Task Graph ----- A graph is a collection of nodes and edges. It can be a homogeneous graph or a heterogeneous graph. .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: graphbolt_classtemplate.rst SamplingGraph FusedCSCSamplingGraph Feature and FeatureStore ------------------------ A feature is a collection of data(tensor, array). A feature store is a collection of features. .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: graphbolt_classtemplate.rst Feature FeatureStore BasicFeatureStore TorchBasedFeature TorchBasedFeatureStore DiskBasedFeature CPUCachedFeature GPUCachedFeature DataLoader ---------- A dataloader is for iterating over a dataset and generate mini-batches. .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: graphbolt_classtemplate.rst DataLoader ItemSet ------- An item set is an iterable collection of items. .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: graphbolt_classtemplate.rst ItemSet HeteroItemSet ItemSampler ----------- An item sampler is for sampling items from an item set. .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: graphbolt_classtemplate.rst ItemSampler DistributedItemSampler MiniBatch --------- A mini-batch is a collection of sampled subgraphs and their corresponding features. It is the basic unit for training a GNN model. .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: graphbolt_classtemplate.rst MiniBatch MiniBatchTransformer NegativeSampler --------------- A negative sampler is for sampling negative items from mini-batches. .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: graphbolt_classtemplate.rst NegativeSampler UniformNegativeSampler SubgraphSampler --------------- A subgraph sampler is for sampling subgraphs from a graph. .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: graphbolt_classtemplate.rst SubgraphSampler SampledSubgraph NeighborSampler LayerNeighborSampler TemporalNeighborSampler TemporalLayerNeighborSampler SampledSubgraphImpl FusedSampledSubgraphImpl InSubgraphSampler FeatureFetcher -------------- A feature fetcher is for fetching features from a feature store. .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: graphbolt_classtemplate.rst FeatureFetcher CopyTo ------ This datapipe is for copying data to a device. .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: graphbolt_classtemplate.rst CopyTo Utilities --------- .. autosummary:: :toctree: ../../generated/ :nosignatures: cpu_cached_feature gpu_cached_feature fused_csc_sampling_graph load_from_shared_memory from_dglgraph etype_str_to_tuple etype_tuple_to_str isin seed index_select expand_indptr indptr_edge_ids add_reverse_edges exclude_seed_edges compact_csc_format unique_and_compact unique_and_compact_csc_formats numpy_save_aligned ================================================ FILE: docs/source/api/python/dgl.multiprocessing.rst ================================================ .. _apimultiprocessing: dgl.multiprocessing =================== This is a minimal wrapper of Python's native :mod:`multiprocessing` module. It modifies the :class:`multiprocessing.Process` class to make forking work with OpenMP in the DGL core library. The API usage is exactly the same as the native module, so DGL does not provide additional documentation. In addition, if your backend is PyTorch, this module will also be compatible with :mod:`torch.multiprocessing` module. .. currentmodule:: dgl.multiprocessing.pytorch .. autosummary:: :toctree: ../../generated/ call_once_and_share shared_tensor ================================================ FILE: docs/source/api/python/dgl.ops.rst ================================================ .. _apibackend: .. currentmodule:: dgl.ops dgl.ops ================================== Frame-agnostic operators for message passing on graphs. GSpMM functions --------------- Generalized Sparse-Matrix Dense-Matrix Multiplication functions. It *fuses* two steps into one kernel. 1. Computes messages by add/sub/mul/div source node and edge features, or copy node features to edges. 2. Aggregate the messages by sum/max/min/mean as the features on destination nodes. Our implementation supports tensors on CPU/GPU in PyTorch/MXNet/Tensorflow as input. All operators are equipped with autograd (computing the input gradients given output gradient) and broadcasting (if the feature shape of operands do not match, we first broadcast them to the same shape, then applies the binary operators). Our broadcast semantics follows NumPy, please see https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html for more details. What do we mean by *fuses* is that the messages are not materialized on edges, instead we compute the result on destination nodes directly, thus saving memory cost. The space complexity of GSpMM operators is :math:`O(|N|D)` where :math:`|N|` refers to the number of nodes in the graph, and :math:`D` refers to the feature size (:math:`D=\prod_{i=1}^{N}D_i` if your feature is a multi-dimensional tensor). The following is an example showing how GSpMM works (we use PyTorch as the backend here, you can enjoy the same convenience on other frameworks by similar usage): >>> import dgl >>> import torch as th >>> import dgl.ops as F >>> g = dgl.graph(([0, 0, 0, 1, 1, 2], [0, 1, 2, 1, 2, 2])) # 3 nodes, 6 edges >>> x = th.ones(3, 2, requires_grad=True) >>> x tensor([[1., 1.], [1., 1.], [1., 1.]], requires_grad=True) >>> y = th.arange(1, 13).float().view(6, 2).requires_grad_() tensor([[ 1., 2.], [ 3., 4.], [ 5., 6.], [ 7., 8.], [ 9., 10.], [11., 12.]], requires_grad=True) >>> out_1 = F.u_mul_e_sum(g, x, y) >>> out_1 # (10, 12) = ((1, 1) * (3, 4)) + ((1, 1) * (7, 8)) tensor([[ 1., 2.], [10., 12.], [25., 28.]], grad_fn=) >>> out_1.sum().backward() >>> x.grad tensor([[12., 15.], [18., 20.], [12., 13.]]) >>> y.grad tensor([[1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.], [1., 1.]]) >>> out_2 = F.copy_u_sum(g, x) >>> out_2 tensor([[1., 1.], [2., 2.], [3., 3.]], grad_fn=) >>> out_3 = F.u_add_e_max(g, x, y) >>> out_3 tensor([[ 2., 3.], [ 8., 9.], [12., 13.]], grad_fn=) >>> y1 = th.rand(6, 4, 2, requires_grad=True) # test broadcast >>> F.u_mul_e_sum(g, x, y1).shape # (2,), (4, 2) -> (4, 2) torch.Size([3, 4, 2]) For all operators, the input graph could either be a homogeneous or a bipartite graph. .. autosummary:: :toctree: ../../generated/ gspmm u_add_e_sum u_sub_e_sum u_mul_e_sum u_div_e_sum u_add_e_max u_sub_e_max u_mul_e_max u_div_e_max u_add_e_min u_sub_e_min u_mul_e_min u_div_e_min u_add_e_mean u_sub_e_mean u_mul_e_mean u_div_e_mean copy_u_sum copy_e_sum copy_u_max copy_e_max copy_u_min copy_e_min copy_u_mean copy_e_mean GSDDMM functions ---------------- Generalized Sampled Dense-Dense Matrix Multiplication. It computes edge features by add/sub/mul/div/dot features on source/destination nodes or edges. Like GSpMM, our implementation supports tensors on CPU/GPU in PyTorch/MXNet/Tensorflow as input. All operators are equipped with autograd and broadcasting. The memory cost of GSDDMM is :math:`O(|E|D)` where :math:`|E|` refers to the number of edges in the graph while :math:`D` refers to the feature size. Note that we support ``dot`` operator, which semantically is the same as reduce the last dimension by sum to the result of ``mul`` operator. However, the ``dot`` is more memory efficient because it *fuses* ``mul`` and sum reduction, which is critical in the cases while the feature size on last dimension is non-trivial (e.g. multi-head attention in Transformer-like models). The following is an example showing how GSDDMM works: >>> import dgl >>> import torch as th >>> import dgl.ops as F >>> g = dgl.graph(([0, 0, 0, 1, 1, 2], [0, 1, 2, 1, 2, 2])) # 3 nodes, 6 edges >>> x = th.ones(3, 2, requires_grad=True) >>> x tensor([[1., 1.], [1., 1.], [1., 1.]], requires_grad=True) >>> y = th.arange(1, 7).float().view(3, 2).requires_grad_() >>> y tensor([[1., 2.], [3., 4.], [5., 6.]], requires_grad=True) >>> e = th.ones(6, 1, 2, requires_grad=True) * 2 tensor([[[2., 2.]], [[2., 2.]], [[2., 2.]], [[2., 2.]], [[2., 2.]], [[2., 2.]]], grad_fn=) >>> out1 = F.u_div_v(g, x, y) tensor([[1.0000, 0.5000], [0.3333, 0.2500], [0.2000, 0.1667], [0.3333, 0.2500], [0.2000, 0.1667], [0.2000, 0.1667]], grad_fn=) >>> out1.sum().backward() >>> x.grad tensor([[1.5333, 0.9167], [0.5333, 0.4167], [0.2000, 0.1667]]) >>> y.grad tensor([[-1.0000, -0.2500], [-0.2222, -0.1250], [-0.1200, -0.0833]]) >>> out2 = F.e_sub_v(g, e, y) >>> out2 tensor([[[ 1., 0.]], [[-1., -2.]], [[-3., -4.]], [[-1., -2.]], [[-3., -4.]], [[-3., -4.]]], grad_fn=) >>> out3 = F.copy_v(g, y) >>> out3 tensor([[1., 2.], [3., 4.], [5., 6.], [3., 4.], [5., 6.], [5., 6.]], grad_fn=) >>> out4 = F.u_dot_v(g, x, y) >>> out4 # the last dimension was reduced to size 1. tensor([[ 3.], [ 7.], [11.], [ 7.], [11.], [11.]], grad_fn=) .. autosummary:: :toctree: ../../generated/ gsddmm u_add_v u_sub_v u_mul_v u_dot_v u_div_v u_add_e u_sub_e u_mul_e u_dot_e u_div_e e_add_v e_sub_v e_mul_v e_dot_v e_div_v v_add_u v_sub_u v_mul_u v_dot_u v_div_u e_add_u e_sub_u e_mul_u e_dot_u e_div_u v_add_e v_sub_e v_mul_e v_dot_e v_div_e copy_u copy_v Like GSpMM, GSDDMM operators support both homogeneous and bipartite graph. Segment Reduce Module --------------------- DGL provide operators to reduce value tensor along the first dimension by segments. .. autosummary:: :toctree: ../../generated/ segment_reduce GatherMM and SegmentMM Module ----------------------------- SegmentMM: DGL provide operators to perform matrix multiplication according to segments. GatherMM: DGL provide operators to gather data according to the given indices and perform matrix multiplication. .. autosummary:: :toctree: ../../generated/ gather_mm segment_mm Supported Data types -------------------- Operators defined in ``dgl.ops`` support floating point data types, i.e. the operands must be ``half`` (``float16``) /``float``/``double`` tensors. The input tensors must have the same data type (if one input tensor has type float16 and the other input tensor has data type float32, user must convert one of them to align with the other one). ``float16`` data type support is disabled by default as it has a minimum GPU compute capacity requirement of ``sm_53`` (Pascal, Volta, Turing and Ampere architectures). User can enable float16 for mixed precision training by compiling DGL from source (see :doc:`Mixed Precision Training ` tutorial for details). Relation with Message Passing APIs ---------------------------------- ``dgl.update_all`` and ``dgl.apply_edges`` calls with built-in message/reduce functions would be dispatched into function calls of operators defined in ``dgl.ops``: >>> import dgl >>> import torch as th >>> import dgl.ops as F >>> import dgl.function as fn >>> g = dgl.rand_graph(100, 1000) # create a DGLGraph with 100 nodes and 1000 edges. >>> x = th.rand(100, 20) # node features. >>> e = th.rand(1000, 20) >>> >>> # dgl.update_all + builtin functions >>> g.srcdata['x'] = x # srcdata is the same as ndata for graphs with one node type. >>> g.edata['e'] = e >>> g.update_all(fn.u_mul_e('x', 'e', 'm'), fn.sum('m', 'y')) >>> y = g.dstdata['y'] # dstdata is the same as ndata for graphs with one node type. >>> >>> # use GSpMM operators defined in dgl.ops directly >>> y = F.u_mul_e_sum(g, x, e) >>> >>> # dgl.apply_edges + builtin functions >>> g.srcdata['x'] = x >>> g.dstdata['y'] = y >>> g.apply_edges(fn.u_dot_v('x', 'y', 'z')) >>> z = g.edata['z'] >>> >>> # use GSDDMM operators defined in dgl.ops directly >>> z = F.u_dot_v(g, x, y) It up to user to decide whether to use message-passing APIs or GSpMM/GSDDMM operators, and both of them have the same efficiency. Programs written in message-passing APIs look more like DGL-style but in some cases calling GSpMM/GSDDMM operators is more concise. Note that on PyTorch all operators defined in ``dgl.ops`` support higher-order gradients, so as message passing APIs because they entirely depend on these operators. ================================================ FILE: docs/source/api/python/dgl.optim.rst ================================================ .. _apioptim: dgl.optim ========= .. automodule:: dgl.optim Node embedding optimizer ------------------------- .. currentmodule:: dgl.optim.pytorch .. autoclass:: SparseAdagrad .. autoclass:: SparseAdam ================================================ FILE: docs/source/api/python/dgl.rst ================================================ .. _apidgl: dgl ============================= .. currentmodule:: dgl .. automodule:: dgl .. _api-graph-create-ops: Graph Create Ops ------------------------- Operators for constructing :class:`DGLGraph` from raw data formats. .. autosummary:: :toctree: ../../generated/ graph heterograph from_cugraph from_scipy from_networkx bipartite_from_scipy bipartite_from_networkx rand_graph rand_bipartite knn_graph segmented_knn_graph radius_graph create_block block_to_graph merge .. _api-subgraph-extraction: Subgraph Extraction Ops ------------------------------------- Operators for extracting and returning subgraphs. .. autosummary:: :toctree: ../../generated/ node_subgraph edge_subgraph node_type_subgraph edge_type_subgraph in_subgraph out_subgraph khop_in_subgraph khop_out_subgraph .. _api-transform: Graph Transform Ops ---------------------------------- Operators for generating new graphs by manipulating the structure of the existing ones. .. autosummary:: :toctree: ../../generated/ add_edges add_nodes add_reverse_edges add_self_loop adj_product_graph adj_sum_graph compact_graphs khop_adj khop_graph knn_graph laplacian_lambda_max line_graph metapath_reachable_graph metis_partition metis_partition_assignment norm_by_dst partition_graph_with_halo radius_graph remove_edges remove_nodes remove_self_loop reorder_graph reverse segmented_knn_graph sort_csr_by_tag sort_csc_by_tag to_bidirected to_bidirected_stale to_block to_cugraph to_double to_float to_half to_heterogeneous to_homogeneous to_networkx to_simple to_simple_graph .. _api-positional-encoding: Graph Positional Encoding Ops: ----------------------------------------- Operators for generating positional encodings of each node. .. autosummary:: :toctree: ../../generated random_walk_pe lap_pe double_radius_node_labeling shortest_dist svd_pe .. _api-partition: Graph Partition Utilities ------------------------- .. autosummary:: :toctree: ../../generated/ metis_partition metis_partition_assignment partition_graph_with_halo .. _api-batch: Batching and Reading Out Ops ------------------------------- Operators for batching multiple graphs into one for batch processing and operators for computing graph-level representation for both single and batched graphs. .. autosummary:: :toctree: ../../generated/ batch unbatch slice_batch readout_nodes readout_edges sum_nodes sum_edges mean_nodes mean_edges max_nodes max_edges softmax_nodes softmax_edges broadcast_nodes broadcast_edges topk_nodes topk_edges Adjacency Related Utilities ------------------------------- Utilities for computing adjacency matrix and Lapacian matrix. .. autosummary:: :toctree: ../../generated/ khop_adj laplacian_lambda_max Graph Traversal & Message Propagation ------------------------------------------ DGL implements graph traversal algorithms implemented as python generators, which returns the visited set of nodes or edges (in ID tensor) at each iteration. The naming convention is ``_[nodes|edges]_generator``. An example usage is as follows. .. code:: python g = ... # some DGLGraph for nodes in dgl.bfs_nodes_generator(g, 0): do_something(nodes) .. autosummary:: :toctree: ../../generated/ bfs_nodes_generator bfs_edges_generator topological_nodes_generator dfs_edges_generator dfs_labeled_edges_generator DGL provides APIs to perform message passing following graph traversal order. ``prop_nodes_XXX`` calls traversal algorithm ``XXX`` and triggers :func:`~DGLGraph.pull()` on the visited node set at each iteration. ``prop_edges_YYY`` applies traversal algorithm ``YYY`` and triggers :func:`~DGLGraph.send_and_recv()` on the visited edge set at each iteration. .. autosummary:: :toctree: ../../generated/ prop_nodes prop_nodes_bfs prop_nodes_topo prop_edges prop_edges_dfs Homophily Measures ------------------------- Utilities for measuring homophily of a graph .. autosummary:: :toctree: ../../generated/ edge_homophily node_homophily linkx_homophily adjusted_homophily Label Informativeness Measures ------------------------- Utilities for measuring label informativeness of a graph .. autosummary:: :toctree: ../../generated/ edge_label_informativeness node_label_informativeness Utilities ----------------------------------------------- Other utilities for controlling randomness, saving and loading graphs, setting and getting runtime configurations, functions that applies the same function to every elements in a container, etc. .. autosummary:: :toctree: ../../generated/ seed save_graphs load_graphs apply_each use_libxsmm is_libxsmm_enabled ================================================ FILE: docs/source/api/python/dgl.sampling.rst ================================================ .. _api-sampling: dgl.sampling ================================= .. automodule:: dgl.sampling Random walk ------------------------------ .. autosummary:: :toctree: ../../generated/ random_walk node2vec_random_walk pack_traces Neighbor sampling --------------------------- .. autosummary:: :toctree: ../../generated/ sample_neighbors sample_labors sample_neighbors_biased select_topk PinSAGESampler Negative sampling ----------------- .. autosummary:: :toctree: ../../generated/ global_uniform_negative_sampling ================================================ FILE: docs/source/api/python/dgl.sparse_v0.rst ================================================ .. _apibackend: dgl.sparse ================================= `dgl.sparse` is a library for sparse operators that are commonly used in GNN models. Sparse matrix class ------------------------- .. currentmodule:: dgl.sparse .. class:: SparseMatrix A SparseMatrix can be created from Coordinate format indices using the :func:`spmatrix` constructor: >>> indices = torch.tensor([[1, 1, 2], >>> [2, 4, 3]]) >>> A = dglsp.spmatrix(indices) SparseMatrix(indices=tensor([[1, 1, 2], [2, 4, 3]]), values=tensor([1., 1., 1.]), shape=(3, 5), nnz=3) Creation Ops ```````` .. autosummary:: :toctree: ../../generated/ spmatrix val_like from_coo from_csr from_csc diag identity Attributes and methods `````````````````````` .. autosummary:: :toctree: ../../generated/ SparseMatrix.shape SparseMatrix.nnz SparseMatrix.dtype SparseMatrix.device SparseMatrix.val SparseMatrix.row SparseMatrix.col SparseMatrix.indices SparseMatrix.coo SparseMatrix.csr SparseMatrix.csc SparseMatrix.coalesce SparseMatrix.has_duplicate SparseMatrix.to_dense SparseMatrix.to SparseMatrix.cuda SparseMatrix.cpu SparseMatrix.float SparseMatrix.double SparseMatrix.int SparseMatrix.long SparseMatrix.transpose SparseMatrix.t SparseMatrix.T SparseMatrix.neg SparseMatrix.reduce SparseMatrix.sum SparseMatrix.smax SparseMatrix.smin SparseMatrix.smean SparseMatrix.softmax Operators --------- .. currentmodule:: dgl.sparse Elementwise Operators ```````` .. autosummary:: :toctree: ../../generated/ add sub mul div power Matrix Multiplication ```````` .. autosummary:: :toctree: ../../generated/ matmul spmm bspmm spspmm sddmm bsddmm Non-linear activation functions ```````` .. autosummary:: :toctree: ../../generated/ softmax Broadcast operators ```````` .. autosummary:: :toctree: ../../generated/ sp_broadcast_v sp_add_v sp_sub_v sp_mul_v sp_div_v ================================================ FILE: docs/source/api/python/index.rst ================================================ API Reference ============= .. toctree:: :maxdepth: 2 dgl dgl.data dgl.dataloading dgl.DGLGraph dgl.distributed dgl.function nn-pytorch nn-tensorflow nn-mxnet dgl.ops dgl.sampling udf transforms ================================================ FILE: docs/source/api/python/knn_benchmark.rst ================================================ .. _knn_benchmark: Benchmark the performance of KNN algorithms =========================================== In this doc, we benchmark the performance on multiple K-Nearest Neighbor algorithms implemented by :func:`dgl.knn_graph`. Given a dataset of ``N`` samples with ``D`` dimensions, the common use case of KNN algorithms in graph learning is to build a KNN graph by finding the ``K`` nearest neighbors for each of the ``N`` samples among the dataset. Empirically, the three parameters, ``N``, ``D``, and ``K``, all have impact on the computation cost. To benchmark the algorithms, we pick a few represensitive datasets to cover most common scenarios: * A synthetic dataset with mixed gaussian samples: ``N = 1000``, ``D = 3``. * A point cloud sample from ModelNet: ``N = 10000``, ``D = 3``. * Subsets of MNIST - A small subset: ``N = 1000``, ``D = 784`` - A medium subset: ``N = 10000``, ``D = 784`` - A large subset: ``N = 50000``, ``D = 784`` Some notes: * ``bruteforce-sharemem`` is an optimized implementation of ``bruteforce`` on GPU. * ``kd-tree`` is currently only implemented on CPU. * ``bruteforce-blas`` conducts matrix multiplication, thus is memory inefficient. * ``nn-descent`` is an approximate algorithm, and we also report the recall rate of its result. Results ------- In this section, we show the runtime and recall rate (where applicable) for the algorithms under various scenarios. The experiments are run on an Amazon EC2 P3.2xlarge instance. This instance has 8 vCPUs with 61GB RAM, and one Tesla V100 GPU with 16GB RAM. In terms of the environment, we obtain the numbers with DGL==0.7.0(`64d0f3f `_), PyTorch==1.8.1, CUDA==11.1 on Ubuntu 18.04.5 LTS. * **Mixed Gaussian:** +---------------------+------------------+-------------------+------------------+------------------+ | Model | CPU | GPU | | +------------------+-------------------+------------------+------------------+ | | K = 8 | K = 64 | K = 8 | K = 64 | +=====================+==================+===================+==================+==================+ | bruteforce-blas | 0.010 | 0.011 | 0.002 | 0.003 | +---------------------+------------------+-------------------+------------------+------------------+ | kd-tree | 0.004 | 0.006 | n/a | n/a | +---------------------+------------------+-------------------+------------------+------------------+ | bruteforce | 0.004 | 0.006 | 0.126 | 0.009 | +---------------------+------------------+-------------------+------------------+------------------+ | bruteforce-sharemem | n/a | n/a | 0.002 | 0.003 | +---------------------+------------------+-------------------+------------------+------------------+ | nn-descent | 0.014 (R: 0.985) | 0.148 (R: 1.000) | 0.016 (R: 0.973) | 0.077 (R: 1.000) | +---------------------+------------------+-------------------+------------------+------------------+ * **Point Cloud** +---------------------+------------------+-------------------+------------------+------------------+ | Model | CPU | GPU | | +------------------+-------------------+------------------+------------------+ | | K = 8 | K = 64 | K = 8 | K = 64 | +=====================+==================+===================+==================+==================+ | bruteforce-blas | 0.359 | 0.432 | 0.010 | 0.010 | +---------------------+------------------+-------------------+------------------+------------------+ | kd-tree | 0.007 | 0.026 | n/a | n/a | +---------------------+------------------+-------------------+------------------+------------------+ | bruteforce | 0.074 | 0.167 | 0.008 | 0.039 | +---------------------+------------------+-------------------+------------------+------------------+ | bruteforce-sharemem | n/a | n/a | 0.004 | 0.017 | +---------------------+------------------+-------------------+------------------+------------------+ | nn-descent | 0.161 (R: 0.977) | 1.345 (R: 0.999) | 0.086 (R: 0.966) | 0.445 (R: 0.999) | +---------------------+------------------+-------------------+------------------+------------------+ * **Small MNIST** +---------------------+------------------+-------------------+------------------+------------------+ | Model | CPU | GPU | | +------------------+-------------------+------------------+------------------+ | | K = 8 | K = 64 | K = 8 | K = 64 | +=====================+==================+===================+==================+==================+ | bruteforce-blas | 0.014 | 0.015 | 0.002 | 0.002 | +---------------------+------------------+-------------------+------------------+------------------+ | kd-tree | 0.179 | 0.182 | n/a | n/a | +---------------------+------------------+-------------------+------------------+------------------+ | bruteforce | 0.173 | 0.228 | 0.123 | 0.170 | +---------------------+------------------+-------------------+------------------+------------------+ | bruteforce-sharemem | n/a | n/a | 0.045 | 0.054 | +---------------------+------------------+-------------------+------------------+------------------+ | nn-descent | 0.060 (R: 0.878) | 1.077 (R: 0.999) | 0.030 (R: 0.952) | 0.457 (R: 0.999) | +---------------------+------------------+-------------------+------------------+------------------+ * **Medium MNIST** +---------------------+------------------+-------------------+------------------+------------------+ | Model | CPU | GPU | | +------------------+-------------------+------------------+------------------+ | | K = 8 | K = 64 | K = 8 | K = 64 | +=====================+==================+===================+==================+==================+ | bruteforce-blas | 0.897 | 0.970 | 0.019 | 0.023 | +---------------------+------------------+-------------------+------------------+------------------+ | kd-tree | 18.902 | 18.928 | n/a | n/a | +---------------------+------------------+-------------------+------------------+------------------+ | bruteforce | 14.495 | 17.652 | 2.058 | 2.588 | +---------------------+------------------+-------------------+------------------+------------------+ | bruteforce-sharemem | n/a | n/a | 2.257 | 2.524 | +---------------------+------------------+-------------------+------------------+------------------+ | nn-descent | 0.804 (R: 0.755) | 14.108 (R: 0.999) | 0.158 (R: 0.900) | 1.794 (R: 0.999) | +---------------------+------------------+-------------------+------------------+------------------+ * **Large MNIST** +---------------------+------------------+-------------------+------------------+------------------+ | Model | CPU | GPU | | +------------------+-------------------+------------------+------------------+ | | K = 8 | K = 64 | K = 8 | K = 64 | +=====================+==================+===================+==================+==================+ | bruteforce-blas | 21.829 | 22.135 | Out of Memory | Out of Memory | +---------------------+------------------+-------------------+------------------+------------------+ | kd-tree | 542.688 | 573.379 | n/a | n/a | +---------------------+------------------+-------------------+------------------+------------------+ | bruteforce | 373.823 | 432.963 | 10.317 | 12.639 | +---------------------+------------------+-------------------+------------------+------------------+ | bruteforce-sharemem | n/a | n/a | 53.133 | 58.419 | +---------------------+------------------+-------------------+------------------+------------------+ | nn-descent | 4.995 (R: 0.658) | 75.487 (R: 0.999) | 1.478 (R: 0.860) | 15.698 (R: 0.999)| +---------------------+------------------+-------------------+------------------+------------------+ Conclusion ---------- - As long as you have enough memory, ``bruteforce-blas`` is the default algorithm to go with. - Specifically, when ``D`` is small and the data is on CPU, ``kd-tree`` is the best algorithm. ================================================ FILE: docs/source/api/python/nn-mxnet.rst ================================================ .. _apinn-mxnet: dgl.nn (MXNet) ================ Conv Layers ---------------------------------------- .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: classtemplate.rst ~dgl.nn.mxnet.conv.GraphConv ~dgl.nn.mxnet.conv.RelGraphConv ~dgl.nn.mxnet.conv.TAGConv ~dgl.nn.mxnet.conv.GATConv ~dgl.nn.mxnet.conv.EdgeConv ~dgl.nn.mxnet.conv.SAGEConv ~dgl.nn.mxnet.conv.SGConv ~dgl.nn.mxnet.conv.APPNPConv ~dgl.nn.mxnet.conv.GINConv ~dgl.nn.mxnet.conv.GatedGraphConv ~dgl.nn.mxnet.conv.GMMConv ~dgl.nn.mxnet.conv.ChebConv ~dgl.nn.mxnet.conv.AGNNConv ~dgl.nn.mxnet.conv.NNConv Dense Conv Layers ---------------------------------------- .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: classtemplate.rst ~dgl.nn.mxnet.conv.DenseGraphConv ~dgl.nn.mxnet.conv.DenseSAGEConv ~dgl.nn.mxnet.conv.DenseChebConv Global Pooling Layers ---------------------------------------- .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: classtemplate.rst ~dgl.nn.mxnet.glob.SumPooling ~dgl.nn.mxnet.glob.AvgPooling ~dgl.nn.mxnet.glob.MaxPooling ~dgl.nn.mxnet.glob.SortPooling ~dgl.nn.mxnet.glob.GlobalAttentionPooling ~dgl.nn.mxnet.glob.Set2Set Heterogeneous Learning Modules ---------------------------------------- .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: classtemplate.rst ~dgl.nn.mxnet.HeteroGraphConv Utility Modules ---------------------------------------- .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: classtemplate.rst ~dgl.nn.mxnet.utils.Sequential ================================================ FILE: docs/source/api/python/nn-pytorch.rst ================================================ .. _apinn-pytorch: dgl.nn (PyTorch) ================ Conv Layers ---------------------------------------- .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: classtemplate.rst ~dgl.nn.pytorch.conv.GraphConv ~dgl.nn.pytorch.conv.EdgeWeightNorm ~dgl.nn.pytorch.conv.RelGraphConv ~dgl.nn.pytorch.conv.TAGConv ~dgl.nn.pytorch.conv.GATConv ~dgl.nn.pytorch.conv.GATv2Conv ~dgl.nn.pytorch.conv.EGATConv ~dgl.nn.pytorch.conv.EdgeGATConv ~dgl.nn.pytorch.conv.EdgeConv ~dgl.nn.pytorch.conv.SAGEConv ~dgl.nn.pytorch.conv.SGConv ~dgl.nn.pytorch.conv.APPNPConv ~dgl.nn.pytorch.conv.GINConv ~dgl.nn.pytorch.conv.GINEConv ~dgl.nn.pytorch.conv.GatedGraphConv ~dgl.nn.pytorch.conv.GatedGCNConv ~dgl.nn.pytorch.conv.GMMConv ~dgl.nn.pytorch.conv.ChebConv ~dgl.nn.pytorch.conv.AGNNConv ~dgl.nn.pytorch.conv.NNConv ~dgl.nn.pytorch.conv.AtomicConv ~dgl.nn.pytorch.conv.CFConv ~dgl.nn.pytorch.conv.DotGatConv ~dgl.nn.pytorch.conv.TWIRLSConv ~dgl.nn.pytorch.conv.TWIRLSUnfoldingAndAttention ~dgl.nn.pytorch.conv.GCN2Conv ~dgl.nn.pytorch.conv.HGTConv ~dgl.nn.pytorch.conv.GroupRevRes ~dgl.nn.pytorch.conv.EGNNConv ~dgl.nn.pytorch.conv.PNAConv ~dgl.nn.pytorch.conv.DGNConv CuGraph Conv Layers ---------------------------------------- .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: classtemplate.rst ~dgl.nn.pytorch.conv.CuGraphRelGraphConv ~dgl.nn.pytorch.conv.CuGraphGATConv ~dgl.nn.pytorch.conv.CuGraphSAGEConv Dense Conv Layers ---------------------------------------- .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: classtemplate.rst ~dgl.nn.pytorch.conv.DenseGraphConv ~dgl.nn.pytorch.conv.DenseSAGEConv ~dgl.nn.pytorch.conv.DenseChebConv Global Pooling Layers ---------------------------------------- .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: classtemplate.rst ~dgl.nn.pytorch.glob.SumPooling ~dgl.nn.pytorch.glob.AvgPooling ~dgl.nn.pytorch.glob.MaxPooling ~dgl.nn.pytorch.glob.SortPooling ~dgl.nn.pytorch.glob.WeightAndSum ~dgl.nn.pytorch.glob.GlobalAttentionPooling ~dgl.nn.pytorch.glob.Set2Set ~dgl.nn.pytorch.glob.SetTransformerEncoder ~dgl.nn.pytorch.glob.SetTransformerDecoder Score Modules for Link Prediction and Knowledge Graph Completion ---------------------------------------- .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: classtemplate.rst ~dgl.nn.pytorch.link.EdgePredictor ~dgl.nn.pytorch.link.TransE ~dgl.nn.pytorch.link.TransR Heterogeneous Learning Modules ---------------------------------------- .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: classtemplate.rst ~dgl.nn.pytorch.HeteroGraphConv ~dgl.nn.pytorch.HeteroLinear ~dgl.nn.pytorch.HeteroEmbedding ~dgl.nn.pytorch.TypedLinear Utility Modules ---------------------------------------- .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: classtemplate.rst ~dgl.nn.pytorch.utils.Sequential ~dgl.nn.pytorch.utils.WeightBasis ~dgl.nn.pytorch.factory.KNNGraph ~dgl.nn.pytorch.factory.SegmentedKNNGraph ~dgl.nn.pytorch.factory.RadiusGraph ~dgl.nn.pytorch.utils.JumpingKnowledge ~dgl.nn.pytorch.sparse_emb.NodeEmbedding ~dgl.nn.pytorch.explain.GNNExplainer ~dgl.nn.pytorch.explain.HeteroGNNExplainer ~dgl.nn.pytorch.explain.SubgraphX ~dgl.nn.pytorch.explain.HeteroSubgraphX ~dgl.nn.pytorch.explain.PGExplainer ~dgl.nn.pytorch.explain.HeteroPGExplainer ~dgl.nn.pytorch.utils.LabelPropagation ~dgl.nn.pytorch.utils.LaplacianPosEnc Network Embedding Modules ---------------------------------------- .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: classtemplate.rst ~dgl.nn.pytorch.DeepWalk ~dgl.nn.pytorch.MetaPath2Vec Utility Modules for Graph Transformer ---------------------------------------- .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: classtemplate.rst ~dgl.nn.pytorch.gt.DegreeEncoder ~dgl.nn.pytorch.gt.LapPosEncoder ~dgl.nn.pytorch.gt.PathEncoder ~dgl.nn.pytorch.gt.SpatialEncoder ~dgl.nn.pytorch.gt.SpatialEncoder3d ~dgl.nn.pytorch.gt.BiasedMHA ~dgl.nn.pytorch.gt.GraphormerLayer ~dgl.nn.pytorch.gt.EGTLayer ================================================ FILE: docs/source/api/python/nn-tensorflow.rst ================================================ .. _apinn-tensorflow: dgl.nn (TensorFlow) ================ Conv Layers ---------------------------------------- .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: classtemplate.rst ~dgl.nn.tensorflow.conv.GraphConv ~dgl.nn.tensorflow.conv.RelGraphConv ~dgl.nn.tensorflow.conv.GATConv ~dgl.nn.tensorflow.conv.SAGEConv ~dgl.nn.tensorflow.conv.ChebConv ~dgl.nn.tensorflow.conv.SGConv ~dgl.nn.tensorflow.conv.APPNPConv ~dgl.nn.tensorflow.conv.GINConv Global Pooling Layers ---------------------------------------- .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: classtemplate.rst ~dgl.nn.tensorflow.glob.SumPooling ~dgl.nn.tensorflow.glob.AvgPooling ~dgl.nn.tensorflow.glob.MaxPooling ~dgl.nn.tensorflow.glob.SortPooling ~dgl.nn.tensorflow.glob.GlobalAttentionPooling Heterogeneous Learning Modules ---------------------------------------- .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: classtemplate.rst ~dgl.nn.tensorflow.glob.HeteroGraphConv ================================================ FILE: docs/source/api/python/nn.functional.rst ================================================ .. _apinn-functional: dgl.nn.functional ================= .. automodule:: dgl.nn.functional .. autosummary:: :toctree: ../../generated/ edge_softmax ================================================ FILE: docs/source/api/python/transforms.rst ================================================ .. _apitransform-namespace: dgl.transforms ============== .. currentmodule:: dgl.transforms .. automodule:: dgl.transforms .. autosummary:: :toctree: ../../generated/ :nosignatures: :template: classtemplate.rst BaseTransform Compose AddSelfLoop RemoveSelfLoop AddReverse ToSimple LineGraph KHopGraph AddMetaPaths GCNNorm PPR HeatKernel GDC NodeShuffle DropNode DropEdge AddEdge RandomWalkPE LapPE FeatMask RowFeatNormalizer SIGNDiffusion ToLevi SVDPE ================================================ FILE: docs/source/api/python/udf.rst ================================================ .. _apiudf: User-defined Functions ================================================== .. currentmodule:: dgl.udf User-defined functions (UDFs) allow arbitrary computation in message passing (see :ref:`guide-message-passing`) and edge feature update with :func:`~dgl.DGLGraph.apply_edges`. They bring more flexibility when :ref:`apifunction` cannot realize a desired computation. Edge-wise User-defined Function ------------------------------- One can use an edge-wise user defined function for a message function in message passing or a function to apply in :func:`~dgl.DGLGraph.apply_edges`. It takes a batch of edges as input and returns messages (in message passing) or features (in :func:`~dgl.DGLGraph.apply_edges`) for each edge. The function may combine the features of the edges and their end nodes in computation. Formally, it takes the following form .. code:: def edge_udf(edges): """ Parameters ---------- edges : EdgeBatch A batch of edges. Returns ------- dict[str, tensor] The messages or edge features generated. It maps a message/feature name to the corresponding messages/features of all edges in the batch. The order of the messages/features is the same as the order of the edges in the input argument. """ DGL generates :class:`~dgl.udf.EdgeBatch` instances internally, which expose the following interface for defining ``edge_udf``. .. autosummary:: :toctree: ../../generated/ EdgeBatch.src EdgeBatch.dst EdgeBatch.data EdgeBatch.edges EdgeBatch.batch_size Node-wise User-defined Function ------------------------------- One can use a node-wise user defined function for a reduce function in message passing. It takes a batch of nodes as input and returns the updated features for each node. It may combine the current node features and the messages nodes received. Formally, it takes the following form .. code:: def node_udf(nodes): """ Parameters ---------- nodes : NodeBatch A batch of nodes. Returns ------- dict[str, tensor] The updated node features. It maps a feature name to the corresponding features of all nodes in the batch. The order of the nodes is the same as the order of the nodes in the input argument. """ DGL generates :class:`~dgl.udf.NodeBatch` instances internally, which expose the following interface for defining ``node_udf``. .. autosummary:: :toctree: ../../generated/ NodeBatch.data NodeBatch.mailbox NodeBatch.nodes NodeBatch.batch_size Degree Bucketing for Message Passing with User Defined Functions ---------------------------------------------------------------- DGL employs a degree-bucketing mechanism for message passing with UDFs. It groups nodes with a same in-degree and invokes message passing for each group of nodes. As a result, one shall not make any assumptions about the batch size of :class:`~dgl.udf.NodeBatch` instances. For a batch of nodes, DGL stacks the incoming messages of each node along the second dimension, ordered by edge ID. An example goes as follows: .. code:: python >>> import dgl >>> import torch >>> import dgl.function as fn >>> g = dgl.graph(([1, 3, 5, 0, 4, 2, 3, 3, 4, 5], [1, 1, 0, 0, 1, 2, 2, 0, 3, 3])) >>> g.edata['eid'] = torch.arange(10) >>> def reducer(nodes): ... print(nodes.mailbox['eid']) ... return {'n': nodes.mailbox['eid'].sum(1)} >>> g.update_all(fn.copy_e('eid', 'eid'), reducer) tensor([[5, 6], [8, 9]]) tensor([[3, 7, 2], [0, 1, 4]]) Essentially, node #2 and node #3 are grouped into one bucket with in-degree of 2, and node #0 and node #1 are grouped into one bucket with in-degree of 3. Within each bucket, the edges are ordered by the edge IDs for each node. ================================================ FILE: docs/source/conf.py ================================================ # -*- coding: utf-8 -*- # # Configuration file for the Sphinx documentation builder. # # This file does only contain a selection of the most common options. For a # full list see the documentation: # http://www.sphinx-doc.org/en/master/config # -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import os import sys sys.path.insert(0, os.path.abspath("../../python")) # -- Project information ----------------------------------------------------- project = "DGL" copyright = "2018, DGL Team" author = "DGL Team" import dgl version = dgl.__version__ release = dgl.__version__ dglbackend = os.environ.get("DGLBACKEND", "pytorch") # -- General configuration --------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. # # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ "sphinx.ext.autodoc", "sphinx.ext.autosummary", "sphinx.ext.coverage", "sphinx.ext.mathjax", "sphinx.ext.napoleon", "sphinx.ext.viewcode", "sphinx.ext.intersphinx", "sphinx.ext.graphviz", "sphinxemoji.sphinxemoji", "sphinx_gallery.gen_gallery", "sphinx_copybutton", "nbsphinx", "nbsphinx_link", ] # Do not run notebooks on non-pytorch backends if dglbackend != "pytorch": nbsphinx_execute = "never" # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # source_suffix = [".rst", ".md"] # The master toctree document. master_doc = "index" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = None # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. exclude_patterns = [ "tutorials/**/*.ipynb", "tutorials/**/*.py", ] # The name of the Pygments (syntax highlighting) style to use. pygments_style = None # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = "sphinx_rtd_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # # html_theme_options = {} # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] html_css_files = ["css/custom.css"] # Custom sidebar templates, must be a dictionary that maps document names # to template names. # # The default sidebars (for documents that don't match any pattern) are # defined by theme itself. Builtin themes are using these templates by # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', # 'searchbox.html']``. # # html_sidebars = {} # -- Options for HTMLHelp output --------------------------------------------- # Output file base name for HTML help builder. htmlhelp_basename = "dgldoc" # -- Options for LaTeX output ------------------------------------------------ latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # # 'preamble': '', # Latex figure (float) alignment # # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, "dgl.tex", "DGL Documentation", "DGL Team", "manual"), ] # -- Options for manual page output ------------------------------------------ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [(master_doc, "dgl", "DGL Documentation", [author], 1)] # -- Options for Texinfo output ---------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ ( master_doc, "dgl", "DGL Documentation", author, "dgl", "Library for deep learning on graphs.", "Miscellaneous", ), ] # -- Options for Epub output ------------------------------------------------- # Bibliographic Dublin Core info. epub_title = project # The unique identifier of the text. This can be a ISBN number # or the project homepage. # # epub_identifier = '' # A unique identification for the text. # # epub_uid = '' # A list of files that should not be packed into the epub file. epub_exclude_files = ["search.html"] # -- Extension configuration ------------------------------------------------- autosummary_generate = True autodoc_member_order = "alphabetical" # Skip the following members. autodoc_mock_imports = ["dgl.nn.mxnet", "dgl.nn.tensorflow"] intersphinx_mapping = { "python": ( "https://docs.python.org/{.major}".format(sys.version_info), None, ), "numpy": ("http://docs.scipy.org/doc/numpy/", None), "scipy": ("http://docs.scipy.org/doc/scipy/reference", None), "matplotlib": ("http://matplotlib.org/", None), "networkx": ("https://networkx.github.io/documentation/stable", None), } # sphinx gallery configurations from sphinx_gallery.sorting import FileNameSortKey examples_dirs = [ "../../tutorials/blitz", "../../tutorials/dist", "../../tutorials/models", "../../tutorials/multi", "../../tutorials/cpu", ] # path to find sources gallery_dirs = [ "tutorials/blitz/", "tutorials/dist/", "tutorials/models/", "tutorials/multi/", "tutorials/cpu", ] # path to generate docs if dglbackend != "pytorch": examples_dirs = [] gallery_dirs = [] reference_url = { "dgl": None, "numpy": "http://docs.scipy.org/doc/numpy/", "scipy": "http://docs.scipy.org/doc/scipy/reference", "matplotlib": "http://matplotlib.org/", "networkx": "https://networkx.github.io/documentation/stable", } sphinx_gallery_conf = { "backreferences_dir": "generated/backreferences", "doc_module": ("dgl", "numpy"), "examples_dirs": examples_dirs, "gallery_dirs": gallery_dirs, "within_subsection_order": FileNameSortKey, "filename_pattern": ".py", "download_all_examples": False, } # Compatibility for different backend when builds tutorials if dglbackend == "mxnet": sphinx_gallery_conf["filename_pattern"] = "/*(?<=mx)\.py" if dglbackend == "pytorch": sphinx_gallery_conf["filename_pattern"] = "/*(?>> |\.\.\. " copybutton_prompt_is_regexp = True ================================================ FILE: docs/source/contribute.rst ================================================ Contribute to DGL ================= Any contribution to DGL is welcome. This guide covers everything about how to contribute to DGL. General development process --------------------------- A non-inclusive list of types of contribution is as follows: * New features and enhancements (`example `__). * New NN Modules (`example `__). * Bugfix (`example `__). * Document improvement (`example `__). * New models and examples (`example `__). For features and bugfix, we recommend first raise an `issue `__ using the corresponding issue template, so that the change could be fully discussed with the community before implementation. For document improvement and new models, we suggest post a thread in our `discussion forum `__. Before development, please first read the following sections about coding styles and testing. All the changes need to be reviewed in the form of `pull request `__. Our `committors `__ (who have write permission on the repository) will review the codes and suggest the necessary changes. The PR could be merged once the reviewers approve the changes. Git setup (for developers) -------------------------- First, fork the DGL github repository. Suppose the forked repo is ``https://github.com/username/dgl``. Clone your forked repository locally: .. code-block:: bash git clone --recursive https://github.com/username/dgl.git Setup the upstream to the DGL official repository: .. code-block:: bash git remote add upstream https://github.com/dmlc/dgl.git You could verify the remote setting by typing ``git remote -v``: .. code-block:: bash origin https://github.com/username/dgl.git (fetch) origin https://github.com/username/dgl.git (push) upstream https://github.com/dmlc/dgl.git (fetch) upstream https://github.com/dmlc/dgl.git (push) During developing, we suggest work on another branch than the master. .. code-block:: bash git branch working-branch git checkout working-branch Once the changes are done, `create a pull request `__ so we could review your codes. Once the pull request is merged, update your forked repository and delete your working branch: .. code-block:: bash git checkout master git pull upstream master git push origin master # update your forked repo git branch -D working-branch # the local branch could be deleted Coding styles ------------- For python codes, we generally follow the `PEP8 style guide `__. The python comments follow `NumPy style python docstrings `__. For C++ codes, we generally follow the `Google C++ style guide `__. The C++ comments should be `Doxygen compatible `__. Coding styles check is mandatory for every pull requests. To ease the development, please check it locally first (require cpplint and pylint to be installed first): .. code-block:: bash bash tests/scripts/task_lint.sh The python code style configure file is ``tests/lint/pylintrc``. We tweak it a little bit from the standard. For example, following variable names are accepted: * ``i,j,k``: for loop variables * ``u,v``: for representing nodes * ``e``: for representing edges * ``g``: for representing graph * ``fn``: for representing functions * ``n,m``: for representing sizes * ``w,x,y``: for representing weight, input, output tensors * ``_``: for unused variables Contributing New Models as Examples ----------------------------------- To contribute a new model within a specific supported tensor framework (e.g. PyTorch, or MXNet), simply 1. Make a directory with the name of your model (say ``awesome-gnn``) within the directory ``examples/${DGLBACKEND}`` where ``${DGLBACKEND}`` refers to the framework name. 2. Populate it with your work, along with a README. Make a pull request once you are done. Your README should contain at least these: * Instructions for running your program. * The performance results, such as speed or accuracy or any metric, along with comparisons against some alternative implementations (if available). * Your performance metric does not have to beat others' implementation; they are just a signal of your code being *likely* correct. * Your speed also does not have to surpass others'. * However, better numbers are always welcomed. 3. The committers will review it, suggesting or making changes as necessary. 4. Resolve the suggestions and reviews, and go back to step 3 until approved. 5. Merge it and enjoy your day. Data hosting ```````````` One often wishes to upload a dataset when contributing a new runnable model example, especially when covering a new field not in our existing examples. Uploading data file into the Git repository directly is a **bad idea** because we do not want the cloners to always download the dataset no matter what. Instead, we strongly suggest the data files be hosted on a permanent cloud storage service (e.g. DropBox, Amazon S3, Baidu, Google Drive, etc.). One can either * Make your scripts automatically download your data if possible (e.g. when using Amazon S3), or * Clearly state the instructions of downloading your dataset (e.g. when using Baidu, where auto-downloading is hard). If you have trouble doing so (e.g. you cannot find a permanent cloud storage), feel free to post in our `discussion forum `__. Depending on the commonality of the contributed task, model, or dataset, we (the DGL team) would migrate your dataset to the official DGL Dataset Repository on Amazon S3. If you wish to host a particular dataset, you can either * DIY: make changes in the ``dgl.data`` module; see our :ref:`dataset APIs ` for more details, or, * Post in our `discussion forum `__ (again). Currently, all the datasets of DGL model examples are hosted on Amazon S3. Contributing Core Features -------------------------- We call a feature that goes into the Python ``dgl`` package a *core feature*. Since DGL supports multiple tensor frameworks, contributing a core feature is no easy job. However, we do **NOT** require knowledge of all tensor frameworks. Instead, 1. Before making a pull request, please make sure your code is covered with unit tests on **at least one** supported frameworks; see the `Building and Testing`_ section for details. 2. Once you have done that, make a pull request and summarize your changes, and wait for the CI to finish. 3. If the CI fails on a tensor platform that you are unfamiliar with (which is well often the case), please refer to `Supporting Multiple Platforms`_ section. 4. The committers will review it, suggesting or making changes as necessary. 5. Resolve the suggestions and reviews, and go back to step 3 until approved. 6. Merge it and enjoy your day. Supporting Multiple Platforms ````````````````````````````` This is the hard one, but you don't have to know PyTorch AND MXNet (maybe AND Tensorflow, AND Chainer, etc., in the future) to do so. The rule of thumb in supporting Multiple Platforms is simple: * In the ``dgl`` Python package, **always** avoid using framework-specific operators (*including array indexing!*) directly. Use the wrappers in ``dgl.backend`` or ``numpy`` arrays instead. * If you have trouble doing so (either because ``dgl.backend`` does not cover the necessary operator, or you don't have a GPU, or for whatever reason), please label your PR with the ``backend support`` tag, and one or more DGL team member who understand CPU AND GPU AND PyTorch AND MXNet (AND Tensorflow AND Chainer AND etc.) will look into it. Building and Testing ```````````````````` To build DGL locally, follow the steps described in :ref:`Install from source `. However, to ease the development, we suggest NOT install DGL but directly working in the source tree. To achieve this, export following environment variables: .. code-block:: bash export DGL_HOME=/path/to/your/dgl/clone export DGL_LIBRARY_PATH=$DGL_HOME/build export PYTHONPATH=$PYTHONPATH:$DGL_HOME/python If you are working on performance critical part, you may want to turn on Cython build: .. code-block:: bash cd python python setup.py build_ext --inplace You could test the build by running the following command and see the path of your local clone. .. code-block:: bash python -c 'import dgl; print(dgl.__path__)' Unit tests ~~~~~~~~~~ Currently, we use ``nose`` for unit tests. The organization goes as follows: * ``backend``: Additional unified tensor interface for supported frameworks. The functions there are only used in unit tests, not DGL itself. Note that the code there are not unit tests by themselves. The additional backend can be imported with .. code-block:: python import backend The additional backend contains the following files: - ``backend/backend_unittest.py``: stub file for all additional tensor functions. - ``backend/${DGLBACKEND}/__init__.py``: implementations of the stubs for the backend ``${DGLBACKEND}``. - ``backend/__init__.py``: when imported, it replaces the stub implementations with the framework-specific code, depending on the selected backend. It also changes the signature of some existing backend functions to automatically select dtypes and contexts. * ``compute``: All framework-agnostic computation-related unit tests go there. Anything inside should not depend on a specific tensor library. Tensor functions not provided in DGL unified tensor interface (i.e. ``dgl.backend``) should go into ``backend`` directory. * ``${DGLBACKEND}`` (e.g. ``pytorch`` and ``mxnet``): All framework-specific computation-related unit tests go there. * ``graph_index``: All unit tests for C++ graph structure implementation go there. The Python API being tested in this directory, if any, should be as minimal as possible (usually simple wrappers of corresponding C++ functions). * ``lint``: Pylint-related files. * ``scripts``: Automated test scripts for CI. To run unit tests, run .. code-block:: bash sh tests/scripts/task_unit_test.sh where ```` can be any supported backends (i.e. ``pytorch`` or ``mxnet``). Contributing Documentations --------------------------- If the change is about document improvement, we suggest (and strongly suggest if you change the runnable code there) building the document and render it locally before making a pull request. Building Docs Locally ````````````````````` In general building the docs locally involves the following: 1. Install ``sphinx``, ``sphinx-gallery``, and ``sphinx_rtd_theme``. 2. You need both PyTorch and MXNet because our tutorial contains code from both frameworks. This does *not* require knowledge of coding with both frameworks, though. 3. Run the following: .. code-block:: bash cd docs ./clean.sh make html cd build/html python3 -m http.server 8080 4. Open ``http://localhost:8080`` and enjoy your work. See `here `__ for more details. Contributing Editorial Changes via GitHub Web Interface ``````````````````````````````````````````````````````` If one is only changing the wording (i.e. not touching the runnable code at all), one can simply do without the usage of Git CLI: 1. Make your fork by clicking on the **Fork** button in the DGL main repository web page. 2. Make whatever changes in the web interface *within your own fork*. You can usually tell if you are inside your own fork or in the main repository by checking whether you can commit to the ``master`` branch: if you cannot, you are in the wrong place. 3. Once done, make a pull request (on the web interface). 4. The committers will review it, suggesting or making changes as necessary. 5. Resolve the suggestions and reviews, and go back to step 4 until approved. 6. Merge it and enjoy your day. Contributing Code Changes ````````````````````````` When changing code, please make sure to build it locally and see if it fails. ================================================ FILE: docs/source/developer/ffi.rst ================================================ .. currentmodule:: dgl DGL Foreign Function Interface (FFI) ==================================== We all like Python because it is easy to manipulate. We all like C because it is fast, reliable and typed. To have the merits of both ends, DGL is mostly in python, for quick prototyping, while lowers the performance-critical part to C. Thus, DGL developers frequently face the scenario to write a C routine and has it exposed to python, via a mechanism called *Foreign Function Interface (FFI)*. There are many FFI solutions out there. In DGL, we want to keep it simple, intuitive and efficient for critical use cases. That's why when we came across the FFI solution in the TVM project, we immediately fell for it. It exploits the idea of functional programming so that it exposes only a dozens of C APIs and new APIs can be built upon it. We decided to borrow the idea (shamelessly). For example, to define a C API that is exposed to python is only a few lines of codes: .. code:: c++ // file: calculator.cc (put it in dgl/src folder) #include #include using namespace dgl::runtime; DGL_REGISTER_GLOBAL("calculator.MyAdd") .set_body([] (DGLArgs args, DGLRetValue* rv) { int a = args[0]; int b = args[1]; *rv = a + b; }); Compile and build the library. On the python side, create a ``calculator.py`` file under ``dgl/python/dgl/`` .. code:: python # file: calculator.py from ._ffi.function import _init_api def add(a, b): # MyAdd has been registered via `_ini_api` call below return MyAdd(a, b) _init_api("dgl.calculator") The trick is that the FFI system first masks the type information of the function arguments, so all the C function calls can go through one C API (``DGLFuncCall``). The type information is retrieved in the function body by static conversion, and we will do runtime type check to make sure that the type conversion is correct. The overhead of such back-and-forth is negligible as long as the function call is not too light (the above example is actually a bad one). TVM's `PackedFunc document `_ has more details. Defining new types ------------------ ``DGLArgs`` and ``DGLRetValue`` only support a limited number of types: * Numerical values: int, float, double, ... * string * Function (in the form of PackedFunc) * NDArray Though limited, the above type system is very powerful because it supports function as a first-class citizen. For example, if you want to return multiple values, you can return a PackedFunc which returns each value given an integer index. However, in many cases, new types are still desired to ease the development process: * The argument/return value is a composition of collections (e.g. dictionary of dictionary of list). * Sometimes we just want to have a notion of "structure" (e.g. given an apple, get its color by ``apple.color``). To achieve this, we introduce the Object type system. For example, to define a new type ``Calculator``: .. code:: c++ // file: calculator.cc #include using namespace runtime; class CalculatorObject : public Object { public: std::string brand; int price; void VisitAttrs(AttrVisitor *v) final { v->Visit("brand", &brand); v->Visit("price", &price); } static constexpr const char* _type_key = "Calculator"; DGL_DECLARE_OBJECT_TYPE_INFO(CalculatorObject, Object); }; // This is to define a reference class (the wrapper of an object shared pointer). // A minimal implementation is as follows, but you could define extra methods. class Calculator : public ObjectRef { public: const CalculatorObject* operator->() const { return static_cast(obj_.get()); } using ContainerType = CalculatorObject; }; DGL_REGISTER_GLOBAL("calculator.CreateCaculator") .set_body([] (DGLArgs args, DGLRetValue* rv) { std::string brand = args[0]; int price = args[1]; auto o = std::make_shared(); o->brand = brand; o->price = price; *rv = o; } On the python side: .. code:: python # file: calculator.py from dgl._ffi.object import register_object, ObjectBase from ._ffi.function import _init_api @register_object class Calculator(ObjectBase): @staticmethod def create(brand, price): # invoke a C API, the return value is of `Calculator` type return CreateCalculator(brand, price) _init_api("dgl.calculator") We can then simply create ``Calculator`` object by: .. code:: python calc = Calculator.create("casio", 100) What is nice about this object is that, it defines a visitor pattern that is essentially a reflection mechanism to get its internal attributes. For example, you can print the calculator's brand and by simply accessing its attributes. .. code:: python print(calc.brand) print(calc.price) The reflection is indeed a little bit slow due to the string key lookup. To speed it up, you could define an attribute access API: .. code:: c++ // file: calculator.cc DGL_REGISTER_GLOBAL("calculator.CaculatorGetBrand") .set_body([] (DGLArgs args, DGLRetValue* rv) { Calculator calc = args[0]; *rv = calc->brand; } Containers ---------- Containers are also objects. For example, the C API below accepts a list of integers and return their sum: .. code:: c++ // in file: calculator.cc #include using namespace runtime; DGL_REGISTER_GLOBAL("calculator.Sum") .set_body([] (DGLArgs args, DGLRetValue* rv) { // All the DGL supported values are represented as a ValueObject, which // contains a data field. List values = args[0]; int sum = 0; for (int i = 0; i < values.size(); ++i) { sum += static_cast(values[i]->data); } } Invoking this API is simple -- just pass a python list of integers. DGL FFI will automatically convert python list/tuple/dictionary to the corresponding object type. .. code:: python # in file: calculator.py from ._ffi.function import _init_api Sum([0, 1, 2, 3, 4, 5]) _init_api("dgl.calculator") The elements in the containers can be any objects, which allows the containers to be composed. Below is an API that accepts a list of calculators and print out their price: .. code:: c++ // in file: calculator.cc #include #include using namespace runtime; DGL_REGISTER_GLOBAL("calculator.PrintCalculators") .set_body([] (DGLArgs args, DGLRetValue* rv) { List calcs = args[0]; for (int i = 0; i < calcs.size(); ++i) { std::cout << calcs[i]->price << std::endl; } } Please note that containers are NOT meant for passing a large collection of items from/to C APIs. It will be quite slow in these cases. It is recommended to benchmark first. As an alternative, use NDArray for a large collection of numerical values and use ``dgl.batch`` to batch a lot of ``DGLGraph``'s into a single ``DGLGraph``. ================================================ FILE: docs/source/env_var.rst ================================================ Environment Variables ===================== Global Configurations --------------------- * ``DGLDEFAULTDIR``: * Values: String (default=``"${HOME}/.dgl"``) * The directory to save the DGL configuration files. * ``DGL_LOG_DEBUG``: * Values: Set to ``"1"`` to enable debug level logging for DGL * Enable debug level logging for DGL Backend Options --------------- * ``DGLBACKEND``: * Values: String (default='pytorch') * The backend deep learning framework for DGL. * Choices: * 'pytorch': use PyTorch as the backend implementation. * 'tensorflow': use Apache TensorFlow as the backend implementation. * 'mxnet': use Apache MXNet as the backend implementation. Data Repository --------------- * ``DGL_REPO``: * Values: String (default='https://data.dgl.ai/') * The repository url to be used for DGL datasets and pre-trained models. * Suggested values: * 'https://data.dgl.ai/': DGL repo for Global Region. * 'https://dgl-data.s3.cn-north-1.amazonaws.com.cn/': DGL repo for Mainland China * ``DGL_DOWNLOAD_DIR``: * Values: String (default=``"${HOME}/.dgl"``) * The local directory to cache the downloaded data. ================================================ FILE: docs/source/faq.rst ================================================ Frequently Asked Questions (FAQ) ================================ For frequently asked questions, refer to `this post `__. ================================================ FILE: docs/source/features/dataset.rst ================================================ Dataset (Temporary) .. table:: +----------------+----------------------------------------------------------+-----------+---------------+---------------+--------------------------------------------+-----------+--------+ | Datset Name | Usage |# of graphs|Avg. # of nodes|Avg. # of edges| Node field |Edge field |Temporal| +================+==========================================================+===========+===============+===============+============================================+===========+========+ |BitcoinOTC |BitcoinOTC() | 136| 6005.00| 21209.98| |h |True | +----------------+----------------------------------------------------------+-----------+---------------+---------------+--------------------------------------------+-----------+--------+ |Cora |CitationGraphDataset('cora') | 1| 2708.00| 10556.00|train_mask, val_mask, test_mask, label, feat| |False | +----------------+----------------------------------------------------------+-----------+---------------+---------------+--------------------------------------------+-----------+--------+ |Citeseer |CitationGraphDataset('citeseer') | 1| 3327.00| 9228.00|train_mask, val_mask, test_mask, label, feat| |False | +----------------+----------------------------------------------------------+-----------+---------------+---------------+--------------------------------------------+-----------+--------+ |PubMed |CitationGraphDataset('pubmed') | 1| 19717.00| 88651.00|train_mask, val_mask, test_mask, label, feat| |False | +----------------+----------------------------------------------------------+-----------+---------------+---------------+--------------------------------------------+-----------+--------+ |QM7b |QM7b() | 7211| 15.42| 244.95| |h |False | +----------------+----------------------------------------------------------+-----------+---------------+---------------+--------------------------------------------+-----------+--------+ |Reddit |RedditDataset() | 1| 232965.00| 114615892.00|train_mask, val_mask, test_mask, feat, label| |False | +----------------+----------------------------------------------------------+-----------+---------------+---------------+--------------------------------------------+-----------+--------+ |ENZYMES |TUDataset('ENZYMES') | 600| 32.63| 124.27|node_labels, node_attr | |False | +----------------+----------------------------------------------------------+-----------+---------------+---------------+--------------------------------------------+-----------+--------+ |DD |TUDataset('DD') | 1178| 284.32| 1431.32|node_labels | |False | +----------------+----------------------------------------------------------+-----------+---------------+---------------+--------------------------------------------+-----------+--------+ |COLLAB |TUDataset('COLLAB') | 5000| 74.49| 9830.00| | |False | +----------------+----------------------------------------------------------+-----------+---------------+---------------+--------------------------------------------+-----------+--------+ |MUTAG |TUDataset('MUTAG') | 188| 17.93| 39.59|node_labels |edge_labels|False | +----------------+----------------------------------------------------------+-----------+---------------+---------------+--------------------------------------------+-----------+--------+ |PROTEINS |TUDataset('PROTEINS') | 1113| 39.06| 145.63|node_labels, node_attr | |False | +----------------+----------------------------------------------------------+-----------+---------------+---------------+--------------------------------------------+-----------+--------+ |PPI |PPIDataset('train')/PPIDataset('valid')/PPIDataset('test')| 20| 2245.30| 63563.70|feat | |False | +----------------+----------------------------------------------------------+-----------+---------------+---------------+--------------------------------------------+-----------+--------+ |KarateClub |KarateClub() | 1| 34.00| 156.00|label | |False | +----------------+----------------------------------------------------------+-----------+---------------+---------------+--------------------------------------------+-----------+--------+ |Amazon computer |AmazonCoBuy('computers') | 1| 13752.00| 574418.00|feat, label | |False | +----------------+----------------------------------------------------------+-----------+---------------+---------------+--------------------------------------------+-----------+--------+ |Amazon photo |AmazonCoBuy('photo') | 1| 7650.00| 287326.00|feat, label | |False | +----------------+----------------------------------------------------------+-----------+---------------+---------------+--------------------------------------------+-----------+--------+ |Coauthor cs |Coauthor('cs') | 1| 18333.00| 327576.00|feat, label | |False | +----------------+----------------------------------------------------------+-----------+---------------+---------------+--------------------------------------------+-----------+--------+ |Coauthor physics|Coauthor('physics') | 1| 34493.00| 991848.00|feat, label | |False | +----------------+----------------------------------------------------------+-----------+---------------+---------------+--------------------------------------------+-----------+--------+ |GDELT |GDELT('train')/GDELT('valid')/GDELT('test') | 2304| 23033.00| 811333.15| |rel_type |True | +----------------+----------------------------------------------------------+-----------+---------------+---------------+--------------------------------------------+-----------+--------+ |ICEWS18 |ICEWS18('train')/ICEWS18('valid')/ICEWS18('test') | 240| 23033.00| 192640.22| |rel_type |True | +----------------+----------------------------------------------------------+-----------+---------------+---------------+--------------------------------------------+-----------+--------+ |CoraFull |CoraFull() | 1| 19793.00| 130622.00|feat, label | |False | +----------------+----------------------------------------------------------+-----------+---------------+---------------+--------------------------------------------+-----------+--------+ ================================================ FILE: docs/source/gen_dataset_stat.py ================================================ import numpy as np import pandas as pd from dgl import DGLGraph # from dgl.data.qm9 import QM9 from dgl.data import CitationGraphDataset, PPIDataset, RedditDataset, TUDataset from dgl.data.bitcoinotc import BitcoinOTC from dgl.data.gdelt import GDELT from dgl.data.gindt import GINDataset from dgl.data.gnn_benchmark import AmazonCoBuy, Coauthor, CoraFull from dgl.data.icews18 import ICEWS18 from dgl.data.karate import KarateClub from dgl.data.qm7b import QM7b from pytablewriter import MarkdownTableWriter, RstGridTableWriter ds_list = { "BitcoinOTC": "BitcoinOTC()", "Cora": "CitationGraphDataset('cora')", "Citeseer": "CitationGraphDataset('citeseer')", "PubMed": "CitationGraphDataset('pubmed')", "QM7b": "QM7b()", "Reddit": "RedditDataset()", "ENZYMES": "TUDataset('ENZYMES')", "DD": "TUDataset('DD')", "COLLAB": "TUDataset('COLLAB')", "MUTAG": "TUDataset('MUTAG')", "PROTEINS": "TUDataset('PROTEINS')", "PPI": "PPIDataset('train')/PPIDataset('valid')/PPIDataset('test')", # "Cora Binary": "CitationGraphDataset('cora_binary')", "KarateClub": "KarateClub()", "Amazon computer": "AmazonCoBuy('computers')", "Amazon photo": "AmazonCoBuy('photo')", "Coauthor cs": "Coauthor('cs')", "Coauthor physics": "Coauthor('physics')", "GDELT": "GDELT('train')/GDELT('valid')/GDELT('test')", "ICEWS18": "ICEWS18('train')/ICEWS18('valid')/ICEWS18('test')", "CoraFull": "CoraFull()", } writer = RstGridTableWriter() # writer = MarkdownTableWriter() extract_graph = lambda g: g if isinstance(g, DGLGraph) else g[0] stat_list = [] for k, v in ds_list.items(): print(k, " ", v) ds = eval(v.split("/")[0]) num_nodes = [] num_edges = [] for i in range(len(ds)): g = extract_graph(ds[i]) num_nodes.append(g.num_nodes()) num_edges.append(g.num_edges()) gg = extract_graph(ds[0]) dd = { "Datset Name": k, "Usage": v, "# of graphs": len(ds), "Avg. # of nodes": np.mean(num_nodes), "Avg. # of edges": np.mean(num_edges), "Node field": ", ".join(list(gg.ndata.keys())), "Edge field": ", ".join(list(gg.edata.keys())), # "Graph field": ', '.join(ds[0][0].gdata.keys()) if hasattr(ds[0][0], "gdata") else "", "Temporal": hasattr(ds, "is_temporal"), } stat_list.append(dd) print(dd.keys()) df = pd.DataFrame(stat_list) df = df.reindex(columns=dd.keys()) writer.from_dataframe(df) writer.write_table() ================================================ FILE: docs/source/graphtransformer/data.rst ================================================ Prepare Data ============ In this section, we will prepare the data for the Graphormer model introduced before. We can use any dataset containing :class:`~dgl.DGLGraph` objects and standard PyTorch dataloader to feed the data to the model. The key is to define a collate function to group features of multiple graphs into batches. We show an example of the collate function as follows: .. code:: python def collate(graphs): # compute shortest path features, can be done in advance for g in graphs: spd, path = dgl.shortest_dist(g, root=None, return_paths=True) g.ndata["spd"] = spd g.ndata["path"] = path num_graphs = len(graphs) num_nodes = [g.num_nodes() for g in graphs] max_num_nodes = max(num_nodes) attn_mask = th.zeros(num_graphs, max_num_nodes, max_num_nodes) node_feat = [] in_degree, out_degree = [], [] path_data = [] # Since shortest_dist returns -1 for unreachable node pairs and padded # nodes are unreachable to others, distance relevant to padded nodes # use -1 padding as well. dist = -th.ones( (num_graphs, max_num_nodes, max_num_nodes), dtype=th.long ) for i in range(num_graphs): # A binary mask where invalid positions are indicated by True. # Avoid the case where all positions are invalid. attn_mask[i, :, num_nodes[i] + 1 :] = 1 # +1 to distinguish padded non-existing nodes from real nodes node_feat.append(graphs[i].ndata["feat"] + 1) # 0 for padding in_degree.append( th.clamp(graphs[i].in_degrees() + 1, min=0, max=512) ) out_degree.append( th.clamp(graphs[i].out_degrees() + 1, min=0, max=512) ) # Path padding to make all paths to the same length "max_len". path = graphs[i].ndata["path"] path_len = path.size(dim=2) # shape of shortest_path: [n, n, max_len] max_len = 5 if path_len >= max_len: shortest_path = path[:, :, :max_len] else: p1d = (0, max_len - path_len) # Use the same -1 padding as shortest_dist for # invalid edge IDs. shortest_path = th.nn.functional.pad(path, p1d, "constant", -1) pad_num_nodes = max_num_nodes - num_nodes[i] p3d = (0, 0, 0, pad_num_nodes, 0, pad_num_nodes) shortest_path = th.nn.functional.pad(shortest_path, p3d, "constant", -1) # +1 to distinguish padded non-existing edges from real edges edata = graphs[i].edata["feat"] + 1 # shortest_dist pads non-existing edges (at the end of shortest # paths) with edge IDs -1, and th.zeros(1, edata.shape[1]) stands # for all padded edge features. edata = th.cat( (edata, th.zeros(1, edata.shape[1]).to(edata.device)), dim=0 ) path_data.append(edata[shortest_path]) dist[i, : num_nodes[i], : num_nodes[i]] = graphs[i].ndata["spd"] # node feat padding node_feat = th.nn.utils.rnn.pad_sequence(node_feat, batch_first=True) # degree padding in_degree = th.nn.utils.rnn.pad_sequence(in_degree, batch_first=True) out_degree = th.nn.utils.rnn.pad_sequence(out_degree, batch_first=True) return ( node_feat, in_degree, out_degree, attn_mask, th.stack(path_data), dist, ) In this example, we also omit details like the addition of a virtual node. For more details, please refer to the `Graphormer example `_. ================================================ FILE: docs/source/graphtransformer/index.rst ================================================ 🆕 Tutorial: Graph Transformer ========== This tutorial introduces the **graph transformer** (:mod:`~dgl.nn.gt`) module, which is a set of utility modules for building and training graph transformer models. .. toctree:: :maxdepth: 2 :titlesonly: model data ================================================ FILE: docs/source/graphtransformer/model.rst ================================================ Build Model =========== **GraphTransformer** is a graph neural network that uses multi-head self-attention (sparse or dense) to encode the graph structure and node features. It is a generalization of the `Transformer `_ architecture to arbitrary graphs. In this tutorial, we will show how to build a graph transformer model with DGL using the `Graphormer `_ model as an example. Graphormer is a Transformer model designed for graph-structured data, which encodes the structural information of a graph into the standard Transformer. Specifically, Graphormer utilizes degree encoding to measure the importance of nodes, spatial and path Encoding to measure the relation between node pairs. The degree encoding and the node features serve as input to Graphormer, while the spatial and path encoding act as bias terms in the self-attention module. Degree Encoding ------------------- The degree encoder is a learnable embedding layer that encodes the degree of each node into a vector. It takes as input the batched input and output degrees of graph nodes, and outputs the degree embeddings of the nodes. .. code:: python degree_encoder = dgl.nn.DegreeEncoder( max_degree=8, # the maximum degree to cut off embedding_dim=512 # the dimension of the degree embedding ) Path Encoding ------------- The path encoder encodes the edge features on the shortest path between two nodes to get attention bias for the self-attention module. It takes as input the batched edge features in shape and outputs the attention bias based on path encoding. .. code:: python path_encoder = PathEncoder( max_len=5, # the maximum length of the shortest path feat_dim=512, # the dimension of the edge feature num_heads=8, # the number of attention heads ) Spatial Encoding ---------------- The spatial encoder encodes the shortest distance between two nodes to get attention bias for the self-attention module. It takes as input the shortest distance between two nodes and outputs the attention bias based on spatial encoding. .. code:: python spatial_encoder = SpatialEncoder( max_dist=5, # the maximum distance between two nodes num_heads=8, # the number of attention heads ) Graphormer Layer ---------------- The Graphormer layer is like a Transformer encoder layer with the Multi-head Attention part replaced with :class:`~dgl.nn.BiasedMHA`. It takes in not only the input node features, but also the attention bias computed computed above, and outputs the updated node features. We can stack multiple Graphormer layers as a list just like implementing a Transformer encoder in PyTorch. .. code:: python layers = th.nn.ModuleList([ GraphormerLayer( feat_size=512, # the dimension of the input node features hidden_size=1024, # the dimension of the hidden layer num_heads=8, # the number of attention heads dropout=0.1, # the dropout rate activation=th.nn.ReLU(), # the activation function norm_first=False, # whether to put the normalization before attention and feedforward ) for _ in range(6) ]) Model Forward ------------- Grouping the modules above defines the primary components of the Graphormer model. We then can define the forward process as follows: .. code:: python node_feat, in_degree, out_degree, attn_mask, path_data, dist = \ next(iter(dataloader)) # we will use the first batch as an example num_graphs, max_num_nodes, _ = node_feat.shape deg_emb = degree_encoder(th.stack((in_degree, out_degree))) # node feature + degree encoding as input node_feat = node_feat + deg_emb # spatial encoding and path encoding serve as attention bias path_encoding = path_encoder(dist, path_data) spatial_encoding = spatial_encoder(dist) attn_bias[:, 1:, 1:, :] = path_encoding + spatial_encoding # graphormer layers for layer in layers: x = layer( x, attn_mask=attn_mask, attn_bias=attn_bias, ) For simplicity, we omit some details in the forward process. For the complete implementation, please refer to the `Graphormer example `_. You can also explore other `utility modules `_ to customize your own graph transformer model. In the next section, we will show how to prepare the data for training. ================================================ FILE: docs/source/guide/data-dataset.rst ================================================ .. _guide-data-pipeline-dataset: 4.1 DGLDataset class -------------------- :ref:`(中文版) ` :class:`~dgl.data.DGLDataset` is the base class for processing, loading and saving graph datasets defined in :ref:`apidata`. It implements the basic pipeline for processing graph data. The following flow chart shows how the pipeline works. To process a graph dataset located in a remote server or local disk, one can define a class, say ``MyDataset``, inheriting from :class:`dgl.data.DGLDataset`. The template of ``MyDataset`` is as follows. .. figure:: https://data.dgl.ai/asset/image/userguide_data_flow.png :align: center Flow chart for graph data input pipeline defined in class DGLDataset. .. code:: from dgl.data import DGLDataset class MyDataset(DGLDataset): """ Template for customizing graph datasets in DGL. Parameters ---------- url : str URL to download the raw dataset raw_dir : str Specifying the directory that will store the downloaded data or the directory that already stores the input data. Default: ~/.dgl/ save_dir : str Directory to save the processed dataset. Default: the value of `raw_dir` force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information """ def __init__(self, url=None, raw_dir=None, save_dir=None, force_reload=False, verbose=False): super(MyDataset, self).__init__(name='dataset_name', url=url, raw_dir=raw_dir, save_dir=save_dir, force_reload=force_reload, verbose=verbose) def download(self): # download raw data to local disk pass def process(self): # process raw data to graphs, labels, splitting masks pass def __getitem__(self, idx): # get one example by index pass def __len__(self): # number of data examples pass def save(self): # save processed data to directory `self.save_path` pass def load(self): # load processed data from directory `self.save_path` pass def has_cache(self): # check whether there are processed data in `self.save_path` pass :class:`~dgl.data.DGLDataset` class has abstract functions ``process()``, ``__getitem__(idx)`` and ``__len__()`` that must be implemented in the subclass. DGL also recommends implementing saving and loading as well, since they can save significant time for processing large datasets, and there are several APIs making it easy (see :ref:`guide-data-pipeline-savenload`). Note that the purpose of :class:`~dgl.data.DGLDataset` is to provide a standard and convenient way to load graph data. One can store graphs, features, labels, masks and basic information about the dataset, such as number of classes, number of labels, etc. Operations such as sampling, partition or feature normalization are done outside of the :class:`~dgl.data.DGLDataset` subclass. The rest of this chapter shows the best practices to implement the functions in the pipeline. ================================================ FILE: docs/source/guide/data-download.rst ================================================ .. _guide-data-pipeline-download: 4.2 Download raw data (optional) -------------------------------- :ref:`(中文版) ` If a dataset is already in local disk, make sure it’s in directory ``raw_dir``. If one wants to run the code anywhere without bothering to download and move data to the right directory, one can do it automatically by implementing function ``download()``. If the dataset is a zip file, make ``MyDataset`` inherit from :class:`dgl.data.DGLBuiltinDataset` class, which handles the zip file extraction for us. Otherwise, one needs to implement ``download()`` like in :class:`~dgl.data.QM7bDataset`: .. code:: import os from dgl.data.utils import download def download(self): # path to store the file file_path = os.path.join(self.raw_dir, self.name + '.mat') # download file download(self.url, path=file_path) The above code downloads a .mat file to directory ``self.raw_dir``. If the file is a .gz, .tar, .tar.gz or .tgz file, use :func:`~dgl.data.utils.extract_archive` function to extract. The following code shows how to download a .gz file in :class:`~dgl.data.BitcoinOTCDataset`: .. code:: from dgl.data.utils import download, check_sha1 def download(self): # path to store the file # make sure to use the same suffix as the original file name's gz_file_path = os.path.join(self.raw_dir, self.name + '.csv.gz') # download file download(self.url, path=gz_file_path) # check SHA-1 if not check_sha1(gz_file_path, self._sha1_str): raise UserWarning('File {} is downloaded but the content hash does not match.' 'The repo may be outdated or download may be incomplete. ' 'Otherwise you can create an issue for it.'.format(self.name + '.csv.gz')) # extract file to directory `self.name` under `self.raw_dir` self._extract_gz(gz_file_path, self.raw_path) The above code will extract the file into directory ``self.name`` under ``self.raw_dir``. If the class inherits from :class:`dgl.data.DGLBuiltinDataset` to handle zip file, it will extract the file into directory ``self.name`` as well. Optionally, one can check SHA-1 string of the downloaded file as the example above does, in case the author changed the file in the remote server some day. ================================================ FILE: docs/source/guide/data-loadcsv.rst ================================================ .. _guide-data-pipeline-loadcsv: 4.6 Loading data from CSV files ---------------------------------------------- Comma Separated Value (CSV) is a widely used data storage format. DGL provides :class:`~dgl.data.CSVDataset` for loading and parsing graph data stored in CSV format. To create a ``CSVDataset`` object: .. code:: python import dgl ds = dgl.data.CSVDataset('/path/to/dataset') The returned ``ds`` object is a standard :class:`~dgl.data.DGLDataset`. For example, one can get graph samples using ``__getitem__`` as well as node/edge features using ``ndata``/``edata``. .. code:: python # A demonstration of how to use the loaded dataset. The feature names # may vary depending on the CSV contents. g = ds[0] # get the graph label = g.ndata['label'] feat = g.ndata['feat'] Data folder structure ~~~~~~~~~~~~~~~~~~~~~ .. code:: /path/to/dataset/ |-- meta.yaml # metadata of the dataset |-- edges_0.csv # edge data including src_id, dst_id, feature, label and so on |-- ... # you can have as many CSVs for edge data as you want |-- nodes_0.csv # node data including node_id, feature, label and so on |-- ... # you can have as many CSVs for node data as you want |-- graphs.csv # graph-level features Node/edge/graph-level data are stored in CSV files. ``meta.yaml`` is a metadata file specifying where to read nodes/edges/graphs data and how to parse them to construct the dataset object. A minimal data folder contains one ``meta.yaml`` and two CSVs, one for node data and one for edge data, in which case the dataset contains only a single graph with no graph-level data. Dataset of a single feature-less graph ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ When the dataset contains only one graph with no node or edge features, there need only three files in the data folder: ``meta.yaml``, one CSV for node IDs and one CSV for edges: .. code:: ./mini_featureless_dataset/ |-- meta.yaml |-- nodes.csv |-- edges.csv ``meta.yaml`` contains the following information: .. code:: yaml dataset_name: mini_featureless_dataset edge_data: - file_name: edges.csv node_data: - file_name: nodes.csv ``nodes.csv`` lists the node IDs under the ``node_id`` field: .. code:: node_id 0 1 2 3 4 ``edges.csv`` lists all the edges in two columns (``src_id`` and ``dst_id``) specifying the source and destination node ID of each edge: .. code:: src_id,dst_id 4,4 4,1 3,0 4,1 4,0 1,2 1,3 3,3 1,1 4,1 After loaded, the dataset has one graph without any features: .. code:: python >>> import dgl >>> dataset = dgl.data.CSVDataset('./mini_featureless_dataset') >>> g = dataset[0] # only one graph >>> print(g) Graph(num_nodes=5, num_edges=10, ndata_schemes={} edata_schemes={}) .. note:: Non-integer node IDs are allowed. When constructing the graph, ``CSVDataset`` will map each raw ID to an integer ID starting from zero. If the node IDs are already distinct integers from 0 to ``num_nodes-1``, no mapping is applied. .. note:: Edges are always directed. To have both directions, add reversed edges in the edge CSV file or use :class:`~dgl.transforms.AddReverse` to transform the loaded graph. A graph without any feature is often of less interest. In the next example, we will show how to load and parse node or edge features. Dataset of a single graph with features and labels ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ When the dataset contains a single graph with node or edge features and labels, there still need only three files in the data folder: ``meta.yaml``, one CSV for node IDs and one CSV for edges: .. code:: ./mini_feature_dataset/ |-- meta.yaml |-- nodes.csv |-- edges.csv ``meta.yaml``: .. code:: yaml dataset_name: mini_feature_dataset edge_data: - file_name: edges.csv node_data: - file_name: nodes.csv ``edges.csv`` with five synthetic edge data (``label``, ``train_mask``, ``val_mask``, ``test_mask``, ``feat``): .. code:: src_id,dst_id,label,train_mask,val_mask,test_mask,feat 4,0,2,False,True,True,"0.5477868606453535, 0.4470617033458436, 0.936706701616337" 4,0,0,False,False,True,"0.9794634290792008, 0.23682038840665198, 0.049629338970987646" 0,3,1,True,True,True,"0.8586722047523594, 0.5746912787380253, 0.6462162561249654" 0,1,2,True,False,False,"0.2730008213674695, 0.5937484188166621, 0.765544096939567" 0,2,1,True,True,True,"0.45441619816038514, 0.1681403185591509, 0.9952376085297715" 0,0,0,False,False,False,"0.4197669213305396, 0.849983324532477, 0.16974127573016262" 2,2,1,False,True,True,"0.5495035052928215, 0.21394654203489705, 0.7174910641836348" 1,0,2,False,True,False,"0.008790817766266334, 0.4216530595907526, 0.529195480661293" 3,0,0,True,True,True,"0.6598715708878852, 0.1932390907048961, 0.9774471538377553" 4,0,1,False,False,False,"0.16846068931179736, 0.41516080644186737, 0.002158116134429955" ``nodes.csv`` with five synthetic node data (``label``, ``train_mask``, ``val_mask``, ``test_mask``, ``feat``): .. code:: node_id,label,train_mask,val_mask,test_mask,feat 0,1,False,True,True,"0.07816474278491703, 0.9137336384979067, 0.4654086994009452" 1,1,True,True,True,"0.05354099924658973, 0.8753101998792645, 0.33929432608774135" 2,1,True,False,True,"0.33234211884156384, 0.9370522452510665, 0.6694943496824788" 3,0,False,True,False,"0.9784264442230887, 0.22131880861864428, 0.3161154827254189" 4,1,True,True,False,"0.23142237259162102, 0.8715767748481147, 0.19117861103555467" After loaded, the dataset has one graph. Node/edge features are stored in ``ndata`` and ``edata`` with the same column names. The example demonstrates how to specify a vector-shaped feature using comma-separated list enclosed by double quotes ``"..."``. .. code:: python >>> import dgl >>> dataset = dgl.data.CSVDataset('./mini_feature_dataset') >>> g = dataset[0] # only one graph >>> print(g) Graph(num_nodes=5, num_edges=10, ndata_schemes={'label': Scheme(shape=(), dtype=torch.int64), 'train_mask': Scheme(shape=(), dtype=torch.bool), 'val_mask': Scheme(shape=(), dtype=torch.bool), 'test_mask': Scheme(shape=(), dtype=torch.bool), 'feat': Scheme(shape=(3,), dtype=torch.float64)} edata_schemes={'label': Scheme(shape=(), dtype=torch.int64), 'train_mask': Scheme(shape=(), dtype=torch.bool), 'val_mask': Scheme(shape=(), dtype=torch.bool), 'test_mask': Scheme(shape=(), dtype=torch.bool), 'feat': Scheme(shape=(3,), dtype=torch.float64)}) .. note:: By default, ``CSVDatatset`` assumes all feature data to be numerical values (e.g., int, float, bool or list) and missing values are not allowed. Users could provide custom data parser for these cases. See `Custom Data Parser`_ for more details. Dataset of a single heterogeneous graph ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ One can specify multiple node and edge CSV files (each for one type) to represent a heterogeneous graph. Here is an example data with two node types and two edge types: .. code:: ./mini_hetero_dataset/ |-- meta.yaml |-- nodes_0.csv |-- nodes_1.csv |-- edges_0.csv |-- edges_1.csv The ``meta.yaml`` specifies the node type name (using ``ntype``) and edge type name (using ``etype``) of each CSV file. The edge type name is a string triplet containing the source node type name, relation name and the destination node type name. .. code:: yaml dataset_name: mini_hetero_dataset edge_data: - file_name: edges_0.csv etype: [user, follow, user] - file_name: edges_1.csv etype: [user, like, item] node_data: - file_name: nodes_0.csv ntype: user - file_name: nodes_1.csv ntype: item The node and edge CSV files follow the same format as in homogeneous graphs. Here are some synthetic data for demonstration purposes: ``edges_0.csv`` and ``edges_1.csv``: .. code:: src_id,dst_id,label,feat 4,4,1,"0.736833152378035,0.10522806046048205,0.9418796835016118" 3,4,2,"0.5749339182767451,0.20181320245665535,0.490938012147181" 1,4,2,"0.7697294432580938,0.49397782380750765,0.10864079337442234" 0,4,0,"0.1364240150959487,0.1393107840629273,0.7901988878812207" 2,3,1,"0.42988138237505735,0.18389137408509248,0.18431292077750894" 0,4,2,"0.8613368738351794,0.67985810014162,0.6580438064356824" 2,4,1,"0.6594951663841697,0.26499036865016423,0.7891429392727503" 4,1,0,"0.36649684241348557,0.9511783938523962,0.8494919263589972" 1,1,2,"0.698592283371875,0.038622249776255946,0.5563827995742111" 0,4,1,"0.5227112950269823,0.3148264185956532,0.47562693094002173" ``nodes_0.csv`` and ``nodes_1.csv``: .. code:: node_id,label,feat 0,2,"0.5400687466285844,0.7588441197954202,0.4268254673041745" 1,1,"0.08680051341900807,0.11446843700743892,0.7196969604886617" 2,2,"0.8964389655603473,0.23368113896545695,0.8813472954005022" 3,1,"0.5454703921677284,0.7819383771535038,0.3027939452162367" 4,1,"0.5365210052235699,0.8975240205792763,0.7613943085507672" After loaded, the dataset has one heterograph with features and labels: .. code:: python >>> import dgl >>> dataset = dgl.data.CSVDataset('./mini_hetero_dataset') >>> g = dataset[0] # only one graph >>> print(g) Graph(num_nodes={'item': 5, 'user': 5}, num_edges={('user', 'follow', 'user'): 10, ('user', 'like', 'item'): 10}, metagraph=[('user', 'user', 'follow'), ('user', 'item', 'like')]) >>> g.nodes['user'].data {'label': tensor([2, 1, 2, 1, 1]), 'feat': tensor([[0.5401, 0.7588, 0.4268], [0.0868, 0.1145, 0.7197], [0.8964, 0.2337, 0.8813], [0.5455, 0.7819, 0.3028], [0.5365, 0.8975, 0.7614]], dtype=torch.float64)} >>> g.edges['like'].data {'label': tensor([1, 2, 2, 0, 1, 2, 1, 0, 2, 1]), 'feat': tensor([[0.7368, 0.1052, 0.9419], [0.5749, 0.2018, 0.4909], [0.7697, 0.4940, 0.1086], [0.1364, 0.1393, 0.7902], [0.4299, 0.1839, 0.1843], [0.8613, 0.6799, 0.6580], [0.6595, 0.2650, 0.7891], [0.3665, 0.9512, 0.8495], [0.6986, 0.0386, 0.5564], [0.5227, 0.3148, 0.4756]], dtype=torch.float64)} Dataset of multiple graphs ~~~~~~~~~~~~~~~~~~~~~~~~~~ When there are multiple graphs, one can include an additional CSV file for storing graph-level features. Here is an example: .. code:: ./mini_multi_dataset/ |-- meta.yaml |-- nodes.csv |-- edges.csv |-- graphs.csv Accordingly, the ``meta.yaml`` should include an extra ``graph_data`` key to tell which CSV file to load graph-level features from. .. code:: yaml dataset_name: mini_multi_dataset edge_data: - file_name: edges.csv node_data: - file_name: nodes.csv graph_data: file_name: graphs.csv To distinguish nodes and edges of different graphs, the ``node.csv`` and ``edge.csv`` must contain an extra column ``graph_id``: ``edges.csv``: .. code:: graph_id,src_id,dst_id,feat 0,0,4,"0.39534097273254654,0.9422093637539785,0.634899790318452" 0,3,0,"0.04486384200747007,0.6453746567017163,0.8757520744192612" 0,3,2,"0.9397636966928355,0.6526403892728874,0.8643238446466464" 0,1,1,"0.40559906615287566,0.9848072295736628,0.493888090726854" 0,4,1,"0.253458867276219,0.9168191778828504,0.47224962583565544" 0,0,1,"0.3219496197945605,0.3439899477636117,0.7051530741717352" 0,2,1,"0.692873149428549,0.4770019763881086,0.21937428942781778" 0,4,0,"0.620118223673067,0.08691420300562658,0.86573472329756" 0,2,1,"0.00743445923710373,0.5251800239734318,0.054016385555202384" 0,4,1,"0.6776417760682221,0.7291568018841328,0.4523600060547709" 1,1,3,"0.6375445528248924,0.04878384701995819,0.4081642382536248" 1,0,4,"0.776002616178397,0.8851294998284638,0.7321742043493028" 1,1,0,"0.0928555079874982,0.6156748364694707,0.6985674921582508" 1,0,2,"0.31328748118329997,0.8326121496142408,0.04133991340612775" 1,1,0,"0.36786902637778773,0.39161865931662243,0.9971749359397111" 1,1,1,"0.4647410679872376,0.8478810655406659,0.6746269314422184" 1,0,2,"0.8117650553546695,0.7893727601272978,0.41527155506593394" 1,1,3,"0.40707309111756307,0.2796588354307046,0.34846782265758314" 1,1,0,"0.18626464175355095,0.3523777809254057,0.7863421810531344" 1,3,0,"0.28357022069634585,0.13774964202156292,0.5913335505943637" ``nodes.csv``: .. code:: graph_id,node_id,feat 0,0,"0.5725330322207948,0.8451870383322376,0.44412796119211184" 0,1,"0.6624186423087752,0.6118386331195641,0.7352138669985214" 0,2,"0.7583372765843964,0.15218126307872892,0.6810484348765842" 0,3,"0.14627522432017592,0.7457985352827006,0.1037097085190507" 0,4,"0.49037522512771525,0.8778998699783784,0.0911194482288028" 1,0,"0.11158102039672668,0.08543289788089736,0.6901745368284345" 1,1,"0.28367647637469273,0.07502571020414439,0.01217200152200748" 1,2,"0.2472495901894738,0.24285506608575758,0.6494437360242048" 1,3,"0.5614197853127827,0.059172654879085296,0.4692371689047904" 1,4,"0.17583413999295983,0.5191278830882644,0.8453123358491914" The ``graphs.csv`` contains a ``graph_id`` column and arbitrary number of feature columns. The example dataset here has two graphs, each with a ``feat`` and a ``label`` graph-level data. .. code:: graph_id,feat,label 0,"0.7426272601929126,0.5197462471155317,0.8149104951283953",0 1,"0.534822233529295,0.2863627767733977,0.1154897249106891",0 After loaded, the dataset has multiple homographs with features and labels: .. code:: python >>> import dgl >>> dataset = dgl.data.CSVDataset('./mini_multi_dataset') >>> print(len(dataset)) 2 >>> graph0, data0 = dataset[0] >>> print(graph0) Graph(num_nodes=5, num_edges=10, ndata_schemes={'feat': Scheme(shape=(3,), dtype=torch.float64)} edata_schemes={'feat': Scheme(shape=(3,), dtype=torch.float64)}) >>> print(data0) {'feat': tensor([0.7426, 0.5197, 0.8149], dtype=torch.float64), 'label': tensor(0)} >>> graph1, data1 = dataset[1] >>> print(graph1) Graph(num_nodes=5, num_edges=10, ndata_schemes={'feat': Scheme(shape=(3,), dtype=torch.float64)} edata_schemes={'feat': Scheme(shape=(3,), dtype=torch.float64)}) >>> print(data1) {'feat': tensor([0.5348, 0.2864, 0.1155], dtype=torch.float64), 'label': tensor(0)} If there is a single feature column in ``graphs.csv``, ``data0`` will directly be a tensor for the feature. Custom Data Parser ~~~~~~~~~~~~~~~~~~ By default, ``CSVDataset`` assumes that all the stored node-/edge-/graph- level data are numerical values. Users can provide custom ``DataParser`` to ``CSVDataset`` to handle more complex data type. A ``DataParser`` needs to implement the ``__call__`` method which takes in the :class:`pandas.DataFrame` object created from CSV file and should return a dictionary of parsed feature data. The parsed feature data will be saved to the ``ndata`` and ``edata`` of the corresponding ``DGLGraph`` object, and thus must be tensors or numpy arrays. Below shows an example ``DataParser`` which converts string type labels to integers: Given a dataset as follows, .. code:: ./customized_parser_dataset/ |-- meta.yaml |-- nodes.csv |-- edges.csv ``meta.yaml``: .. code:: yaml dataset_name: customized_parser_dataset edge_data: - file_name: edges.csv node_data: - file_name: nodes.csv ``edges.csv``: .. code:: src_id,dst_id,label 4,0,positive 4,0,negative 0,3,positive 0,1,positive 0,2,negative 0,0,positive 2,2,negative 1,0,positive 3,0,negative 4,0,positive ``nodes.csv``: .. code:: node_id,label 0,positive 1,negative 2,positive 3,negative 4,positive To parse the string type labels, one can define a ``DataParser`` class as follows: .. code:: python import numpy as np import pandas as pd class MyDataParser: def __call__(self, df: pd.DataFrame): parsed = {} for header in df: if 'Unnamed' in header: # Handle Unnamed column print("Unnamed column is found. Ignored...") continue dt = df[header].to_numpy().squeeze() if header == 'label': dt = np.array([1 if e == 'positive' else 0 for e in dt]) parsed[header] = dt return parsed Create a ``CSVDataset`` using the defined ``DataParser``: .. code:: python >>> import dgl >>> dataset = dgl.data.CSVDataset('./customized_parser_dataset', ... ndata_parser=MyDataParser(), ... edata_parser=MyDataParser()) >>> print(dataset[0].ndata['label']) tensor([1, 0, 1, 0, 1]) >>> print(dataset[0].edata['label']) tensor([1, 0, 1, 1, 0, 1, 0, 1, 0, 1]) .. note:: To specify different ``DataParser``\s for different node/edge types, pass a dictionary to ``ndata_parser`` and ``edata_parser``, where the key is type name (a single string for node type; a string triplet for edge type) and the value is the ``DataParser`` to use. Full YAML Specification ~~~~~~~~~~~~~~~~~~~~~~~ ``CSVDataset`` allows more flexible control over the loading and parsing process. For example, one can change the ID column names via ``meta.yaml``. The example below lists all the supported keys. .. code:: yaml version: 1.0.0 dataset_name: some_complex_data separator: ',' # CSV separator symbol. Default: ',' edge_data: - file_name: edges_0.csv etype: [user, follow, user] src_id_field: src_id # Column name for source node IDs. Default: src_id dst_id_field: dst_id # Column name for destination node IDs. Default: dst_id - file_name: edges_1.csv etype: [user, like, item] src_id_field: src_id dst_id_field: dst_id node_data: - file_name: nodes_0.csv ntype: user node_id_field: node_id # Column name for node IDs. Default: node_id - file_name: nodes_1.csv ntype: item node_id_field: node_id # Column name for node IDs. Default: node_id graph_data: file_name: graphs.csv graph_id_field: graph_id # Column name for graph IDs. Default: graph_id Top-level ^^^^^^^^^^^^^^ At the top level, only 6 keys are available: - ``version``: Optional. String. It specifies which version of ``meta.yaml`` is used. More feature may be added in the future. - ``dataset_name``: Required. String. It specifies the dataset name. - ``separator``: Optional. String. It specifies how to parse data in CSV files. Default: ``','``. - ``edge_data``: Required. List of ``EdgeData``. Meta data for parsing edge CSV files. - ``node_data``: Required. List of ``NodeData``. Meta data for parsing node CSV files. - ``graph_data``: Optional. ``GraphData``. Meta data for parsing the graph CSV file. ``EdgeData`` ^^^^^^^^^^^^^^^^^^^^^^ There are 4 keys: - ``file_name``: Required. String. The CSV file to load data from. - ``etype``: Optional. List of string. Edge type name in string triplet: [source node type, relation type, destination node type]. - ``src_id_field``: Optional. String. Which column to read for source node IDs. Default: ``src_id``. - ``dst_id_field``: Optional. String. Which column to read for destination node IDs. Default: ``dst_id``. ``NodeData`` ^^^^^^^^^^^^^^^^^^^^^^ There are 3 keys: - ``file_name``: Required. String. The CSV file to load data from. - ``ntype``: Optional. String. Node type name. - ``node_id_field``: Optional. String. Which column to read for node IDs. Default: ``node_id``. ``GraphData`` ^^^^^^^^^^^^^^^^^^^^^^ There are 2 keys: - ``file_name``: Required. String. The CSV file to load data from. - ``graph_id_field``: Optional. String. Which column to read for graph IDs. Default: ``graph_id``. ================================================ FILE: docs/source/guide/data-loadogb.rst ================================================ .. _guide-data-pipeline-loadogb: 4.5 Loading OGB datasets using ``ogb`` package ---------------------------------------------- :ref:`(中文版) ` `Open Graph Benchmark (OGB) `__ is a collection of benchmark datasets. The official OGB package `ogb `__ provides APIs for downloading and processing OGB datasets into :class:`dgl.data.DGLGraph` objects. The section introduce their basic usage here. First install ogb package using pip: .. code:: pip install ogb The following code shows how to load datasets for *Graph Property Prediction* tasks. .. code:: # Load Graph Property Prediction datasets in OGB import dgl import torch from ogb.graphproppred import DglGraphPropPredDataset from dgl.dataloading import GraphDataLoader def _collate_fn(batch): # batch is a list of tuple (graph, label) graphs = [e[0] for e in batch] g = dgl.batch(graphs) labels = [e[1] for e in batch] labels = torch.stack(labels, 0) return g, labels # load dataset dataset = DglGraphPropPredDataset(name='ogbg-molhiv') split_idx = dataset.get_idx_split() # dataloader train_loader = GraphDataLoader(dataset[split_idx["train"]], batch_size=32, shuffle=True, collate_fn=_collate_fn) valid_loader = GraphDataLoader(dataset[split_idx["valid"]], batch_size=32, shuffle=False, collate_fn=_collate_fn) test_loader = GraphDataLoader(dataset[split_idx["test"]], batch_size=32, shuffle=False, collate_fn=_collate_fn) Loading *Node Property Prediction* datasets is similar, but note that there is only one graph object in this kind of dataset. .. code:: # Load Node Property Prediction datasets in OGB from ogb.nodeproppred import DglNodePropPredDataset dataset = DglNodePropPredDataset(name='ogbn-proteins') split_idx = dataset.get_idx_split() # there is only one graph in Node Property Prediction datasets g, labels = dataset[0] # get split labels train_label = dataset.labels[split_idx['train']] valid_label = dataset.labels[split_idx['valid']] test_label = dataset.labels[split_idx['test']] *Link Property Prediction* datasets also contain one graph per dataset. .. code:: # Load Link Property Prediction datasets in OGB from ogb.linkproppred import DglLinkPropPredDataset dataset = DglLinkPropPredDataset(name='ogbl-ppa') split_edge = dataset.get_edge_split() graph = dataset[0] print(split_edge['train'].keys()) print(split_edge['valid'].keys()) print(split_edge['test'].keys()) ================================================ FILE: docs/source/guide/data-process.rst ================================================ .. _guide-data-pipeline-process: 4.3 Process data ---------------- :ref:`(中文版) ` One can implement the data processing code in function ``process()``, and it assumes that the raw data is located in ``self.raw_dir`` already. There are typically three types of tasks in machine learning on graphs: graph classification, node classification, and link prediction. This section will show how to process datasets related to these tasks. The section focuses on the standard way to process graphs, features and masks. It will use builtin datasets as examples and skip the implementations for building graphs from files, but add links to the detailed implementations. Please refer to :ref:`guide-graph-external` to see a complete guide on how to build graphs from external sources. Processing Graph Classification datasets ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Graph classification datasets are almost the same as most datasets in typical machine learning tasks, where mini-batch training is used. So one can process the raw data to a list of :class:`dgl.DGLGraph` objects and a list of label tensors. In addition, if the raw data has been split into several files, one can add a parameter ``split`` to load specific part of the data. Take :class:`~dgl.data.QM7bDataset` as example: .. code:: from dgl.data import DGLDataset class QM7bDataset(DGLDataset): _url = 'http://deepchem.io.s3-website-us-west-1.amazonaws.com/' \ 'datasets/qm7b.mat' _sha1_str = '4102c744bb9d6fd7b40ac67a300e49cd87e28392' def __init__(self, raw_dir=None, force_reload=False, verbose=False): super(QM7bDataset, self).__init__(name='qm7b', url=self._url, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose) def process(self): mat_path = self.raw_path + '.mat' # process data to a list of graphs and a list of labels self.graphs, self.label = self._load_graph(mat_path) def __getitem__(self, idx): """ Get graph and label by index Parameters ---------- idx : int Item index Returns ------- (dgl.DGLGraph, Tensor) """ return self.graphs[idx], self.label[idx] def __len__(self): """Number of graphs in the dataset""" return len(self.graphs) In ``process()``, the raw data is processed to a list of graphs and a list of labels. One must implement ``__getitem__(idx)`` and ``__len__()`` for iteration. DGL recommends making ``__getitem__(idx)`` return a tuple ``(graph, label)`` as above. Please check the `QM7bDataset source code `__ for details of ``self._load_graph()`` and ``__getitem__``. One can also add properties to the class to indicate some useful information of the dataset. In :class:`~dgl.data.QM7bDataset`, one can add a property ``num_tasks`` to indicate the total number of prediction tasks in this multi-task dataset: .. code:: @property def num_tasks(self): """Number of labels for each graph, i.e. number of prediction tasks.""" return 14 After all these coding, one can finally use :class:`~dgl.data.QM7bDataset` as follows: .. code:: import dgl import torch from dgl.dataloading import GraphDataLoader # load data dataset = QM7bDataset() num_tasks = dataset.num_tasks # create dataloaders dataloader = GraphDataLoader(dataset, batch_size=1, shuffle=True) # training for epoch in range(100): for g, labels in dataloader: # your training code here pass A complete guide for training graph classification models can be found in :ref:`guide-training-graph-classification`. For more examples of graph classification datasets, please refer to DGL's builtin graph classification datasets: * :ref:`gindataset` * :ref:`minigcdataset` * :ref:`qm7bdata` * :ref:`tudata` Processing Node Classification datasets ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Different from graph classification, node classification is typically on a single graph. As such, splits of the dataset are on the nodes of the graph. DGL recommends using node masks to specify the splits. The section uses builtin dataset `CitationGraphDataset `__ as an example: In addition, DGL recommends re-arrange the nodes and edges so that nodes near to each other have IDs in a close range. The procedure could improve the locality to access a node's neighbors, which may benefit follow-up computation and analysis conducted on the graph. DGL provides an API called :func:`dgl.reorder_graph` for this purpose. Please refer to ``process()`` part in below example for more details. .. code:: from dgl.data import DGLBuiltinDataset from dgl.data.utils import _get_dgl_url class CitationGraphDataset(DGLBuiltinDataset): _urls = { 'cora_v2' : 'dataset/cora_v2.zip', 'citeseer' : 'dataset/citeseer.zip', 'pubmed' : 'dataset/pubmed.zip', } def __init__(self, name, raw_dir=None, force_reload=False, verbose=True): assert name.lower() in ['cora', 'citeseer', 'pubmed'] if name.lower() == 'cora': name = 'cora_v2' url = _get_dgl_url(self._urls[name]) super(CitationGraphDataset, self).__init__(name, url=url, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose) def process(self): # Skip some processing code # === data processing skipped === # build graph g = dgl.graph(graph) # splitting masks g.ndata['train_mask'] = train_mask g.ndata['val_mask'] = val_mask g.ndata['test_mask'] = test_mask # node labels g.ndata['label'] = torch.tensor(labels) # node features g.ndata['feat'] = torch.tensor(_preprocess_features(features), dtype=F.data_type_dict['float32']) self._num_tasks = onehot_labels.shape[1] self._labels = labels # reorder graph to obtain better locality. self._g = dgl.reorder_graph(g) def __getitem__(self, idx): assert idx == 0, "This dataset has only one graph" return self._g def __len__(self): return 1 For brevity, this section skips some code in ``process()`` to highlight the key part for processing node classification dataset: splitting masks. Node features and node labels are stored in ``g.ndata``. For detailed implementation, please refer to `CitationGraphDataset source code `__. Note that the implementations of ``__getitem__(idx)`` and ``__len__()`` are changed as well, since there is often only one graph for node classification tasks. The masks are ``bool tensors`` in PyTorch and TensorFlow, and ``float tensors`` in MXNet. The section uses a subclass of ``CitationGraphDataset``, :class:`dgl.data.CiteseerGraphDataset`, to show the usage of it: .. code:: # load data dataset = CiteseerGraphDataset(raw_dir='') graph = dataset[0] # get split masks train_mask = graph.ndata['train_mask'] val_mask = graph.ndata['val_mask'] test_mask = graph.ndata['test_mask'] # get node features feats = graph.ndata['feat'] # get labels labels = graph.ndata['label'] A complete guide for training node classification models can be found in :ref:`guide-training-node-classification`. For more examples of node classification datasets, please refer to DGL's builtin datasets: * :ref:`citationdata` * :ref:`corafulldata` * :ref:`amazoncobuydata` * :ref:`coauthordata` * :ref:`karateclubdata` * :ref:`ppidata` * :ref:`redditdata` * :ref:`sbmdata` * :ref:`sstdata` * :ref:`rdfdata` Processing dataset for Link Prediction datasets ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The processing of link prediction datasets is similar to that for node classification’s, there is often one graph in the dataset. The section uses builtin dataset `KnowledgeGraphDataset `__ as an example, and still skips the detailed data processing code to highlight the key part for processing link prediction datasets: .. code:: # Example for creating Link Prediction datasets class KnowledgeGraphDataset(DGLBuiltinDataset): def __init__(self, name, reverse=True, raw_dir=None, force_reload=False, verbose=True): self._name = name self.reverse = reverse url = _get_dgl_url('dataset/') + '{}.tgz'.format(name) super(KnowledgeGraphDataset, self).__init__(name, url=url, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose) def process(self): # Skip some processing code # === data processing skipped === # splitting mask g.edata['train_mask'] = train_mask g.edata['val_mask'] = val_mask g.edata['test_mask'] = test_mask # edge type g.edata['etype'] = etype # node type g.ndata['ntype'] = ntype self._g = g def __getitem__(self, idx): assert idx == 0, "This dataset has only one graph" return self._g def __len__(self): return 1 As shown in the code, it adds splitting masks into ``edata`` field of the graph. Check `KnowledgeGraphDataset source code `__ to see the complete code. The following code uses a subclass of ``KnowledgeGraphDataset``, :class:`dgl.data.FB15k237Dataset`, to show the usage of it: .. code:: from dgl.data import FB15k237Dataset # load data dataset = FB15k237Dataset() graph = dataset[0] # get training mask train_mask = graph.edata['train_mask'] train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze() src, dst = graph.edges(train_idx) # get edge types in training set rel = graph.edata['etype'][train_idx] A complete guide for training link prediction models can be found in :ref:`guide-training-link-prediction`. For more examples of link prediction datasets, please refer to DGL's builtin datasets: * :ref:`kgdata` * :ref:`bitcoinotcdata` ================================================ FILE: docs/source/guide/data-savenload.rst ================================================ .. _guide-data-pipeline-savenload: 4.4 Save and load data ---------------------- :ref:`(中文版) ` DGL recommends implementing saving and loading functions to cache the processed data in local disk. This saves a lot of data processing time in most cases. DGL provides four functions to make things simple: - :func:`dgl.save_graphs` and :func:`dgl.load_graphs`: save/load DGLGraph objects and labels to/from local disk. - :func:`dgl.data.utils.save_info` and :func:`dgl.data.utils.load_info`: save/load useful information of the dataset (python ``dict`` object) to/from local disk. The following example shows how to save and load a list of graphs and dataset information. .. code:: import os from dgl import save_graphs, load_graphs from dgl.data.utils import makedirs, save_info, load_info def save(self): # save graphs and labels graph_path = os.path.join(self.save_path, self.mode + '_dgl_graph.bin') save_graphs(graph_path, self.graphs, {'labels': self.labels}) # save other information in python dict info_path = os.path.join(self.save_path, self.mode + '_info.pkl') save_info(info_path, {'num_classes': self.num_classes}) def load(self): # load processed data from directory `self.save_path` graph_path = os.path.join(self.save_path, self.mode + '_dgl_graph.bin') self.graphs, label_dict = load_graphs(graph_path) self.labels = label_dict['labels'] info_path = os.path.join(self.save_path, self.mode + '_info.pkl') self.num_classes = load_info(info_path)['num_classes'] def has_cache(self): # check whether there are processed data in `self.save_path` graph_path = os.path.join(self.save_path, self.mode + '_dgl_graph.bin') info_path = os.path.join(self.save_path, self.mode + '_info.pkl') return os.path.exists(graph_path) and os.path.exists(info_path) Note that there are cases not suitable to save processed data. For example, in the builtin dataset :class:`~dgl.data.GDELTDataset`, the processed data is quite large, so it’s more effective to process each data example in ``__getitem__(idx)``. ================================================ FILE: docs/source/guide/data.rst ================================================ .. _guide-data-pipeline: Chapter 4: Graph Data Pipeline ============================== :ref:`(中文版) ` DGL implements many commonly used graph datasets in :ref:`apidata`. They follow a standard pipeline defined in class :class:`dgl.data.DGLDataset`. DGL highly recommends processing graph data into a :class:`dgl.data.DGLDataset` subclass, as the pipeline provides simple and clean solution for loading, processing and saving graph data. Roadmap ------- This chapter introduces how to create a custom DGL-Dataset. The following sections explain how the pipeline works, and shows how to implement each component of it. * :ref:`guide-data-pipeline-dataset` * :ref:`guide-data-pipeline-download` * :ref:`guide-data-pipeline-process` * :ref:`guide-data-pipeline-savenload` * :ref:`guide-data-pipeline-loadogb` * :ref:`guide-data-pipeline-loadcsv` .. toctree:: :maxdepth: 1 :hidden: :glob: data-dataset data-download data-process data-savenload data-loadogb data-loadcsv ================================================ FILE: docs/source/guide/distributed-apis.rst ================================================ .. _guide-distributed-apis: 7.3 Programming APIs ----------------------------------- :ref:`(中文版) ` This section covers the core python components commonly used in a training script. DGL provides three distributed data structures and various APIs for initialization, distributed sampling and workload split. * :class:`~dgl.distributed.DistGraph` for accessing structure and feature of a distributedly stored graph. * :class:`~dgl.distributed.DistTensor` for accessing node/edge feature tensor that is partitioned across machines. * :class:`~dgl.distributed.DistEmbedding` for accessing learnable node/edge embedding tensor that is partitioned across machines. Initialization of the DGL distributed module ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ :func:`dgl.distributed.initialize` initializes the distributed module. If invoked by a trainer, this API creates sampler processes and builds connections with graph servers; if invoked by graph server, this API starts a service loop to listen to trainer/sampler requests. The API *must* be called before :func:`torch.distributed.init_process_group` and any other ``dgl.distributed`` APIs as shown in the order below: .. code:: python dgl.distributed.initialize('ip_config.txt') th.distributed.init_process_group(backend='gloo') .. note:: If the training script contains user-defined functions (UDFs) that have to be invoked on the servers (see the section of DistTensor and DistEmbedding for more details), these UDFs have to be declared before :func:`~dgl.distributed.initialize`. Distributed graph ~~~~~~~~~~~~~~~~~ :class:`~dgl.distributed.DistGraph` is a Python class to access the graph structure and node/edge features in a cluster of machines. Each machine is responsible for one and only one partition. It loads the partition data (the graph structure and the node data and edge data in the partition) and makes it accessible to all trainers in the cluster. :class:`~dgl.distributed.DistGraph` provides a small subset of :class:`~dgl.DGLGraph` APIs for data access. Distributed mode vs. standalone mode ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ :class:`~dgl.distributed.DistGraph` can run in two modes: *distributed mode* and *standalone mode*. When a user executes a training script in a Python command line or Jupyter Notebook, it runs in a standalone mode. That is, it runs all computation in a single process and does not communicate with any other processes. Thus, the standalone mode requires the input graph to have only one partition. This mode is mainly used for development and testing (e.g., develop and run the code in Jupyter Notebook). When a user executes a training script with a launch script (see the section of launch script), :class:`~dgl.distributed.DistGraph` runs in the distributed mode. The launch tool starts servers (node/edge feature access and graph sampling) behind the scene and loads the partition data in each machine automatically. :class:`~dgl.distributed.DistGraph` connects with the servers in the cluster of machines and access them through the network. DistGraph creation ^^^^^^^^^^^^^^^^^^ In the distributed mode, the creation of :class:`~dgl.distributed.DistGraph` requires the graph name given during graph partitioning. The graph name identifies the graph loaded in the cluster. .. code:: python import dgl g = dgl.distributed.DistGraph('graph_name') When running in the standalone mode, it loads the graph data in the local machine. Therefore, users need to provide the partition configuration file, which contains all information about the input graph. .. code:: python import dgl g = dgl.distributed.DistGraph('graph_name', part_config='data/graph_name.json') .. note:: DGL only allows one single ``DistGraph`` object. The behavior of destroying a DistGraph and creating a new one is undefined. Accessing graph structure ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ :class:`~dgl.distributed.DistGraph` provides a set of APIs to access the graph structure. Currently, most APIs provide graph information, such as the number of nodes and edges. The main use case of DistGraph is to run sampling APIs to support mini-batch training (see `Distributed sampling`_). .. code:: python print(g.num_nodes()) Access node/edge data ^^^^^^^^^^^^^^^^^^^^^ Like :class:`~dgl.DGLGraph`, :class:`~dgl.distributed.DistGraph` provides ``ndata`` and ``edata`` to access data in nodes and edges. The difference is that ``ndata``/``edata`` in :class:`~dgl.distributed.DistGraph` returns :class:`~dgl.distributed.DistTensor`, instead of the tensor of the underlying framework. Users can also assign a new :class:`~dgl.distributed.DistTensor` to :class:`~dgl.distributed.DistGraph` as node data or edge data. .. code:: python g.ndata['train_mask'] # g.ndata['train_mask'][0] # tensor([1], dtype=torch.uint8) Distributed Tensor ~~~~~~~~~~~~~~~~~~~~~ As mentioned earlier, DGL shards node/edge features and stores them in a cluster of machines. DGL provides distributed tensors with a tensor-like interface to access the partitioned node/edge features in the cluster. In the distributed setting, DGL only supports dense node/edge features. :class:`~dgl.distributed.DistTensor` manages the dense tensors partitioned and stored in multiple machines. Right now, a distributed tensor has to be associated with nodes or edges of a graph. In other words, the number of rows in a DistTensor has to be the same as the number of nodes or the number of edges in a graph. The following code creates a distributed tensor. In addition to the shape and dtype for the tensor, a user can also provide a unique tensor name. This name is useful if a user wants to reference a persistent distributed tensor (the one exists in the cluster even if the :class:`~dgl.distributed.DistTensor` object disappears). .. code:: python tensor = dgl.distributed.DistTensor((g.num_nodes(), 10), th.float32, name='test') .. note:: :class:`~dgl.distributed.DistTensor` creation is a synchronized operation. All trainers have to invoke the creation and the creation succeeds only when all trainers call it. A user can add a :class:`~dgl.distributed.DistTensor` to a :class:`~dgl.distributed.DistGraph` object as one of the node data or edge data. .. code:: python g.ndata['feat'] = tensor .. note:: The node data name and the tensor name do not have to be the same. The former identifies node data from :class:`~dgl.distributed.DistGraph` (in the trainer process) while the latter identifies a distributed tensor in DGL servers. :class:`~dgl.distributed.DistTensor` has the same APIs as regular tensors to access its metadata, such as the shape and dtype. It also supports indexed reads and writes but does not support computation operators, such as sum and mean. .. code:: python data = g.ndata['feat'][[1, 2, 3]] print(data) g.ndata['feat'][[3, 4, 5]] = data .. note:: Currently, DGL does not provide protection for concurrent writes from multiple trainers when a machine runs multiple servers. This may result in data corruption. One way to avoid concurrent writes to the same row of data is to run one server process on a machine. Distributed DistEmbedding ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DGL provides :class:`~dgl.distributed.DistEmbedding` to support transductive models that require node embeddings. Creating distributed embeddings is very similar to creating distributed tensors. .. code:: python def initializer(shape, dtype): arr = th.zeros(shape, dtype=dtype) arr.uniform_(-1, 1) return arr emb = dgl.distributed.DistEmbedding(g.num_nodes(), 10, init_func=initializer) Internally, distributed embeddings are built on top of distributed tensors, and, thus, has very similar behaviors to distributed tensors. For example, when embeddings are created, they are sharded and stored across all machines in the cluster. It can be uniquely identified by a name. .. note:: The initializer function is invoked in the server process. Therefore, it has to be declared before :class:`dgl.distributed.initialize`. Because the embeddings are part of the model, a user has to attach them to an optimizer for mini-batch training. Currently, DGL provides a sparse Adagrad optimizer :class:`~dgl.distributed.SparseAdagrad` (DGL will add more optimizers for sparse embeddings later). Users need to collect all distributed embeddings from a model and pass them to the sparse optimizer. If a model has both node embeddings and regular dense model parameters and users want to perform sparse updates on the embeddings, they need to create two optimizers, one for node embeddings and the other for dense model parameters, as shown in the code below: .. code:: python sparse_optimizer = dgl.distributed.SparseAdagrad([emb], lr=lr1) optimizer = th.optim.Adam(model.parameters(), lr=lr2) feats = emb(nids) loss = model(feats) loss.backward() optimizer.step() sparse_optimizer.step() .. note:: :class:`~dgl.distributed.DistEmbedding` does not inherit :class:`torch.nn.Module`, so we recommend using it outside of your own NN module. Distributed sampling ~~~~~~~~~~~~~~~~~~~~ DGL provides two levels of APIs for sampling nodes and edges to generate mini-batches (see the section of mini-batch training). The low-level APIs require users to write code to explicitly define how a layer of nodes are sampled (e.g., using :func:`dgl.sampling.sample_neighbors` ). The high-level sampling APIs implement a few popular sampling algorithms for node classification and link prediction tasks (e.g., :class:`~dgl.dataloading.NodeDataLoader` and :class:`~dgl.dataloading.EdgeDataLoader` ). The distributed sampling module follows the same design and provides two levels of sampling APIs. For the lower-level sampling API, it provides :func:`~dgl.distributed.sample_neighbors` for distributed neighborhood sampling on :class:`~dgl.distributed.DistGraph`. In addition, DGL provides a distributed DataLoader (:class:`~dgl.distributed.DistDataLoader` ) for distributed sampling. The distributed DataLoader has the same interface as Pytorch DataLoader except that users cannot specify the number of worker processes when creating a dataloader. The worker processes are created in :func:`dgl.distributed.initialize`. .. note:: When running :func:`dgl.distributed.sample_neighbors` on :class:`~dgl.distributed.DistGraph`, the sampler cannot run in Pytorch DataLoader with multiple worker processes. The main reason is that Pytorch DataLoader creates new sampling worker processes in every epoch, which leads to creating and destroying :class:`~dgl.distributed.DistGraph` objects many times. When using the low-level API, the sampling code is similar to single-process sampling. The only difference is that users need to use :func:`dgl.distributed.sample_neighbors` and :class:`~dgl.distributed.DistDataLoader`. .. code:: python def sample_blocks(seeds): seeds = th.LongTensor(np.asarray(seeds)) blocks = [] for fanout in [10, 25]: frontier = dgl.distributed.sample_neighbors(g, seeds, fanout, replace=True) block = dgl.to_block(frontier, seeds) seeds = block.srcdata[dgl.NID] blocks.insert(0, block) return blocks dataloader = dgl.distributed.DistDataLoader(dataset=train_nid, batch_size=batch_size, collate_fn=sample_blocks, shuffle=True) for batch in dataloader: ... The high-level sampling APIs (:class:`~dgl.dataloading.NodeDataLoader` and :class:`~dgl.dataloading.EdgeDataLoader` ) has distributed counterparts (:class:`~dgl.distributed.DistNodeDataLoader` and :class:`~dgl.distributed.DistEdgeDataLoader`). The code is exactly the same as single-process sampling otherwise. .. code:: python sampler = dgl.sampling.MultiLayerNeighborSampler([10, 25]) dataloader = dgl.distributed.DistNodeDataLoader(g, train_nid, sampler, batch_size=batch_size, shuffle=True) for batch in dataloader: ... Split workloads ~~~~~~~~~~~~~~~~~~ To train a model, users first need to split the dataset into training, validation and test sets. For distributed training, this step is usually done before we invoke :func:`dgl.distributed.partition_graph` to partition a graph. We recommend to store the data split in boolean arrays as node data or edge data. For node classification tasks, the length of these boolean arrays is the number of nodes in a graph and each of their elements indicates the existence of a node in a training/validation/test set. Similar boolean arrays should be used for link prediction tasks. :func:`dgl.distributed.partition_graph` splits these boolean arrays (because they are stored as the node data or edge data of the graph) based on the graph partitioning result and store them with graph partitions. During distributed training, users need to assign training nodes/edges to each trainer. Similarly, we also need to split the validation and test set in the same way. DGL provides :func:`~dgl.distributed.node_split` and :func:`~dgl.distributed.edge_split` to split the training, validation and test set at runtime for distributed training. The two functions take the boolean arrays constructed before graph partitioning as input, split them and return a portion for the local trainer. By default, they ensure that all portions have the same number of nodes/edges. This is important for synchronous SGD, which assumes each trainer has the same number of mini-batches. The example below splits the training set and returns a subset of nodes for the local process. .. code:: python train_nids = dgl.distributed.node_split(g.ndata['train_mask']) ================================================ FILE: docs/source/guide/distributed-hetero.rst ================================================ .. _guide-distributed-hetero: 7.5 Heterogeneous Graph Under The Hood -------------------------------------------- The chapter covers the implementation details of distributed heterogeneous graph. They are transparent to users in most scenarios but could be useful for advanced customization. In DGL, a node or edge in a heterogeneous graph has a unique ID in its own node type or edge type. Therefore, DGL can identify a node or an edge with a tuple: ``(node/edge type, type-wise ID)``. We call IDs of such form as **heterogeneous IDs**. To patition a heterogeneous graph for distributed training, DGL converts it to a homogeneous graph so that we can reuse the partitioning algorithms designed for homogeneous graphs. Each node/edge is thus uniquely mapped to an integer ID in a consecutive ID range (e.g., from 0 to the total number of nodes of all types). We call the IDs after conversion as **homogeneous IDs**. Below is an illustration of the ID conversion process. Here, the graph has two types of nodes (:math:`T0` and :math:`T1` ), and four types of edges (:math:`R0`, :math:`R1`, :math:`R2`, :math:`R3` ). There are a total of 400 nodes in the graph and each type has 200 nodes. Nodes of :math:`T0` have IDs in [0,200), while nodes of :math:`T1` have IDs in [200, 400). In this example, if we use a tuple to identify the nodes, nodes of :math:`T0` are identified as (T0, type-wise ID), where type-wise ID falls in [0, 200); nodes of :math:`T1` are identified as (T1, type-wise ID), where type-wise ID also falls in [0, 200). .. figure:: https://data.dgl.ai/tutorial/hetero/heterograph_ids.png :alt: Imgur ID Conversion Utilities ^^^^^^^^^^^^^^^^^^^^^^^^ During Preprocessing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The steps of :ref:`Parallel Processing Pipeline ` all use heterogeneous IDs for their inputs and outputs. Nevertheless, some steps such as ParMETIS partitioning are easier to be implemented using homogeneous IDs, thus requiring a utility to perform ID conversion. The code below implements a simple ``IDConverter`` using the metadata information in the metadata JSON from the chunked graph data format. It starts from some node type :math:`A` as node type 0, then assigns all its nodes with IDs in range :math:`[0, |V_A|-1)`. It then moves to the next node type B as node type 1 and assigns all its nodes with IDs in range :math:`[|V_A|, |V_A|+|V_B|-1)`. .. code:: python from bisect import bisect_left import numpy as np class IDConverter: def __init__(self, meta): # meta is the JSON object loaded from metadata.json self.node_type = meta['node_type'] self.edge_type = meta['edge_type'] self.ntype2id_map = {ntype : i for i, ntype in enumerate(self.node_type)} self.etype2id_map = {etype : i for i, etype in enumerate(self.edge_type)} self.num_nodes = [sum(ns) for ns in meta['num_nodes_per_chunk']] self.num_edges = [sum(ns) for ns in meta['num_edges_per_chunk']] self.nid_offset = np.cumsum([0] + self.num_nodes) self.eid_offset = np.cumsum([0] + self.num_edges) def ntype2id(self, ntype): """From node type name to node type ID""" return self.ntype2id_map[ntype] def etype2id(self, etype): """From edge type name to edge type ID""" return self.etype2id_map[etype] def id2ntype(self, id): """From node type ID to node type name""" return self.node_type[id] def id2etype(self, id): """From edge type ID to edge type name""" return self.edge_type[id] def nid_het2hom(self, ntype, id): """From heterogeneous node ID to homogeneous node ID""" tid = self.ntype2id(ntype) if id < 0 or id >= self.num_nodes[tid]: raise ValueError(f'Invalid node ID of type {ntype}. Must be within range [0, {self.num_nodes[tid]})') return self.nid_offset[tid] + id def nid_hom2het(self, id): """From heterogeneous node ID to homogeneous node ID""" if id < 0 or id >= self.nid_offset[-1]: raise ValueError(f'Invalid homogeneous node ID. Must be within range [0, self.nid_offset[-1])') tid = bisect_left(self.nid_offset, id) - 1 # Return a pair (node_type, type_wise_id) return self.id2ntype(tid), id - self.nid_offset[tid] def eid_het2hom(self, etype, id): """From heterogeneous edge ID to homogeneous edge ID""" tid = self.etype2id(etype) if id < 0 or id >= self.num_edges[tid]: raise ValueError(f'Invalid edge ID of type {etype}. Must be within range [0, {self.num_edges[tid]})') return self.eid_offset[tid] + id def eid_hom2het(self, id): """From heterogeneous edge ID to homogeneous edge ID""" if id < 0 or id >= self.eid_offset[-1]: raise ValueError(f'Invalid homogeneous edge ID. Must be within range [0, self.eid_offset[-1])') tid = bisect_left(self.eid_offset, id) - 1 # Return a pair (edge_type, type_wise_id) return self.id2etype(tid), id - self.eid_offset[tid] After Partition Loading ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ After the partitions are loaded into trainer or server processes, the loaded :class:`~dgl.distributed.GraphPartitionBook` provides utilities for conversion between homogeneous IDs and heterogeneous IDs. * :func:`~dgl.distributed.GraphPartitionBook.map_to_per_ntype`: convert a homogeneous node ID to type-wise ID and node type ID. * :func:`~dgl.distributed.GraphPartitionBook.map_to_per_etype`: convert a homogeneous edge ID to type-wise ID and edge type ID. * :func:`~dgl.distributed.GraphPartitionBook.map_to_homo_nid`: convert type-wise ID and node type to a homogeneous node ID. * :func:`~dgl.distributed.GraphPartitionBook.map_to_homo_eid`: convert type-wise ID and edge type to a homogeneous edge ID. Because all DGL's low-level :ref:`distributed graph sampling operators ` use homogeneous IDs, DGL internally converts the heterogeneous IDs specified by users to homogeneous IDs before invoking sampling operators. Below shows an example of sampling a subgraph by :func:`~dgl.distributed.sample_neighbors` from nodes of type ``"paper"``. It first performs ID conversion, and after getting the sampled subgraph, converts the homogeneous node/edge IDs back to heterogeneous ones. .. code:: python gpb = g.get_partition_book() # We need to map the type-wise node IDs to homogeneous IDs. cur = gpb.map_to_homo_nid(seeds, 'paper') # For a heterogeneous input graph, the returned frontier is stored in # the homogeneous graph format. frontier = dgl.distributed.sample_neighbors(g, cur, fanout, replace=False) block = dgl.to_block(frontier, cur) cur = block.srcdata[dgl.NID] block.edata[dgl.EID] = frontier.edata[dgl.EID] # Map the homogeneous edge Ids to their edge type. block.edata[dgl.ETYPE], block.edata[dgl.EID] = gpb.map_to_per_etype(block.edata[dgl.EID]) # Map the homogeneous node Ids to their node types and per-type Ids. block.srcdata[dgl.NTYPE], block.srcdata[dgl.NID] = gpb.map_to_per_ntype(block.srcdata[dgl.NID]) block.dstdata[dgl.NTYPE], block.dstdata[dgl.NID] = gpb.map_to_per_ntype(block.dstdata[dgl.NID]) Note that getting node/edge types from type IDs is simple -- just getting them from the ``ntypes`` attributes of a ``DistGraph``, i.e., ``g.ntypes[node_type_id]``. Access distributed graph data ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The :class:`~dgl.distributed.DistGraph` class supports similar interface as :class:`~dgl.DGLGraph`. Below shows an example of getting the feature data of nodes 0, 10, 20 of type :math:`T0`. When accessing data in :class:`~dgl.distributed.DistGraph`, a user needs to use type-wise IDs and corresponding node types or edge types. .. code:: python import dgl g = dgl.distributed.DistGraph('graph_name', part_config='data/graph_name.json') feat = g.nodes['T0'].data['feat'][[0, 10, 20]] A user can create distributed tensors and distributed embeddings for a particular node type or edge type. Distributed tensors and embeddings are split and stored in multiple machines. To create one, a user needs to specify how it is partitioned with :class:`~dgl.distributed.PartitionPolicy`. By default, DGL chooses the right partition policy based on the size of the first dimension. However, if multiple node types or edge types have the same number of nodes or edges, DGL cannot determine the partition policy automatically. A user needs to explicitly specify the partition policy. Below shows an example of creating a distributed tensor for node type :math:`T0` by using the partition policy for :math:`T0` and store it as node data of :math:`T0`. .. code:: python g.nodes['T0'].data['feat1'] = dgl.distributed.DistTensor( (g.num_nodes('T0'), 1), th.float32, 'feat1', part_policy=g.get_node_partition_policy('T0')) The partition policies used for creating distributed tensors and embeddings are initialized when a heterogeneous graph is loaded into the graph server. A user cannot create a new partition policy at runtime. Therefore, a user can only create distributed tensors or embeddings for a node type or edge type. Accessing distributed tensors and embeddings also requires type-wise IDs. ================================================ FILE: docs/source/guide/distributed-partition.rst ================================================ .. _guide-distributed-partition: 7.4 Advanced Graph Partitioning --------------------------------------- The chapter covers some of the advanced topics for graph partitioning. METIS partition algorithm ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `METIS `__ is a state-of-the-art graph partitioning algorithm that can generate partitions with minimal number of cross-partition edges, making it suitable for distributed message passing where the amount of network communication is proportional to the number of cross-partition edges. DGL has integrated METIS as the default partitioning algorithm in its :func:`dgl.distributed.partition_graph` API. Output format ~~~~~~~~~~~~~~~~~~~~~~~~~~ Regardless of the partitioning algorithm in use, the partitioned results are stored in data files organized as follows: .. code-block:: none data_root_dir/ |-- graph_name.json # partition configuration file in JSON |-- part0/ # data for partition 0 | |-- node_feats.dgl # node features stored in binary format | |-- edge_feats.dgl # edge features stored in binary format | |-- graph.dgl # graph structure of this partition stored in binary format | |-- part1/ # data for partition 1 | |-- node_feats.dgl | |-- edge_feats.dgl | |-- graph.dgl | |-- ... # data for other partitions When distributed to a cluster, the metadata JSON should be copied to all the machines while the ``partX`` folders should be dispatched accordingly. DGL provides a :func:`dgl.distributed.load_partition` function to load one partition for inspection. .. code:: python >>> import dgl >>> # load partition 0 >>> part_data = dgl.distributed.load_partition('data_root_dir/graph_name.json', 0) >>> g, nfeat, efeat, partition_book, graph_name, ntypes, etypes = part_data # unpack >>> print(g) Graph(num_nodes=966043, num_edges=34270118, ndata_schemes={'orig_id': Scheme(shape=(), dtype=torch.int64), 'part_id': Scheme(shape=(), dtype=torch.int64), '_ID': Scheme(shape=(), dtype=torch.int64), 'inner_node': Scheme(shape=(), dtype=torch.int32)} edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64), 'inner_edge': Scheme(shape=(), dtype=torch.int8), 'orig_id': Scheme(shape=(), dtype=torch.int64)}) As mentioned in the `ID mapping`_ section, each partition carries auxiliary information saved as ndata or edata such as original node/edge IDs, partition IDs, etc. Each partition not only saves nodes/edges it owns, but also includes node/edges that are adjacent to the partition (called **HALO** nodes/edges). The ``inner_node`` and ``inner_edge`` indicate whether a node/edge truely belongs to the partition (value is ``True``) or is a HALO node/edge (value is ``False``). The :func:`~dgl.distributed.load_partition` function loads all data at once. Users can load features or the partition book using the :func:`dgl.distributed.load_partition_feats` and :func:`dgl.distributed.load_partition_book` APIs respectively. Parallel METIS partitioning ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For massive graphs where parallel preprocessing is desired, DGL supports `ParMETIS `__ as one of the choices of partitioning algorithms. .. note:: Because ParMETIS does not support heterogeneous graph, users need to conduct ID conversion before and after running ParMETIS. Check out chapter :ref:`guide-distributed-hetero` for explanation. .. note:: Please make sure that the input graph to ParMETIS does not have duplicate edges (or parallel edges) and self-loop edges. ParMETIS Installation ^^^^^^^^^^^^^^^^^^^^^^ ParMETIS requires METIS and GKLib. Please follow the instructions `here `__ to compile and install GKLib. For compiling and install METIS, please follow the instructions below to clone METIS with GIT and compile it with int64 support. .. code-block:: bash git clone https://github.com/KarypisLab/METIS.git make config shared=1 cc=gcc prefix=~/local i64=1 make install For now, we need to compile and install ParMETIS manually. We clone the DGL branch of ParMETIS as follows: .. code-block:: bash git clone --branch dgl https://github.com/KarypisLab/ParMETIS.git Then compile and install ParMETIS. .. code-block:: bash make config cc=mpicc prefix=~/local make install Before running ParMETIS, we need to set two environment variables: ``PATH`` and ``LD_LIBRARY_PATH``. .. code-block:: bash export PATH=$PATH:$HOME/local/bin export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/local/lib/ Input format ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. note:: As a prerequisite, read chapter :doc:`guide-distributed-hetero` to understand how DGL organize heterogeneous graph for distributed training. The input graph for ParMETIS is stored in three files with the following names: ``xxx_nodes.txt``, ``xxx_edges.txt`` and ``xxx_stats.txt``, where ``xxx`` is a graph name. Each row in ``xxx_nodes.txt`` stores the information of a node. Row ID is also the *homogeneous* ID of a node, e.g., row 0 is for node 0; row 1 is for node 1, etc. Each row has the following format: .. code-block:: none All fields are separated by whitespace: * ```` is an integer starting from 0. Each node type is mapped to an integer. For a homogeneous graph, its value is always 0. * ```` are integers (separated by whitespace) that indicate the node weights used by ParMETIS to balance graph partitions. For homogeneous graphs, the list has only one integer while for heterogeneous graphs with :math:`T` node types, the list should has :math:`T` integers. If the node belongs to node type :math:`t`, then all the integers except the :math:`t^{th}` one are zero; the :math:`t^{th}` integer is the weight of that node. ParMETIS will try to balance the total node weight of each partition. For heterogeneous graph, it will try to distribute nodes of the same type to all partitions. The recommended node weights are 1 for balancing the number of nodes in each partition or node degrees for balancing the number of edges in each partition. * ```` is an integer representing the node ID in its own type. Below shows an example of a node file for a heterogeneous graph with two node types. Node type 0 has three nodes; node type 1 has four nodes. It uses two node weights to ensure that ParMETIS will generate partitions with roughly the same number of nodes for type 0 and the same number of nodes for type 1. .. code-block:: none 0 1 0 0 0 1 0 1 0 1 0 2 1 0 1 0 1 0 1 1 1 0 1 2 1 0 1 3 Similarly, each row in ``xxx_edges.txt`` stores the information of an edge. Row ID is also the *homogeneous* ID of an edge, e.g., row 0 is for edge 0; row 1 is for edge 1, etc. Each row has the following format: .. code-block:: none All fields are separated by whitespace: * ```` is the *homogeneous* ID of the source node. * ```` is the *homogeneous* ID of the destination node. * ```` is the edge ID for the edge type. * ```` is an integer starting from 0. Each edge type is mapped to an integer. For a homogeneous graph, its value is always 0. ``xxx_stats.txt`` stores some basic statistics of the graph. It has only one line with three fields separated by whitespace: .. code-block:: none * ``num_nodes`` stores the total number of nodes regardless of node types. * ``num_edges`` stores the total number of edges regardless of edge types. * ``total_node_weights`` stores the number of node weights in the node file. Run ParMETIS and output format ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ParMETIS contains a command called ``pm_dglpart``, which loads the graph stored in the three files from the machine where ``pm_dglpart`` is invoked, distributes data to all machines in the cluster and invokes ParMETIS to partition the graph. When it completes, it generates three files for each partition: ``p-xxx_nodes.txt``, ``p-xxx_edges.txt``, ``p-xxx_stats.txt``. .. note:: ParMETIS reassigns IDs to nodes during the partitioning. After ID reassignment, the nodes in a partition are assigned with contiguous IDs; furthermore, the nodes of the same type are assigned with contiguous IDs. ``p-xxx_nodes.txt`` stores the node data of the partition. Each row represents a node with the following fields: .. code-block:: none * ```` is the *homogeneous* node ID after ID reassignment. * ```` is the node type ID. * ```` is the node weight used by ParMETIS (copied from the input file). * ```` is an integer representing the node ID in its own type. ``p-xxx_edges.txt`` stores the edge data of the partition. Each row represents an edge with the following fields: .. code-block:: none * ```` is the *homogeneous* ID of the source node after ID reassignment. * ```` is the *homogeneous* ID of the destination node after ID reassignment. * ```` is the *homogeneous* ID of the source node in the input graph. * ```` is the *homogeneous* ID of the destination node in the input graph. * ```` is the edge ID in its own type. * ```` is the edge type ID. When invoking ``pm_dglpart``, the three input files: ``xxx_nodes.txt``, ``xxx_edges.txt``, ``xxx_stats.txt`` should be located in the directory where ``pm_dglpart`` runs. The following command run four ParMETIS processes to partition the graph named ``xxx`` into eight partitions (each process handles two partitions). .. code-block:: bash mpirun -np 4 pm_dglpart xxx 2 The output files from ParMETIS then need to be converted to the :ref:`partition assignment format ` to in order to run subsequent preprocessing steps. ================================================ FILE: docs/source/guide/distributed-preprocessing.rst ================================================ .. _guide-distributed-preprocessing: 7.1 Data Preprocessing ------------------------------------------ Before launching training jobs, DGL requires the input data to be partitioned and distributed to the target machines. In order to handle different scales of graphs, DGL provides 2 partitioning approaches: * A partitioning API for graphs that can fit in a single machine memory. * A distributed partition pipeline for graphs beyond a single machine capacity. 7.1.1 Partitioning API ^^^^^^^^^^^^^^^^^^^^^^ For relatively small graphs, DGL provides a partitioning API :func:`~dgl.distributed.partition_graph` that partitions an in-memory :class:`~dgl.DGLGraph` object. It supports multiple partitioning algorithms such as random partitioning and `Metis `__. The benefit of Metis partitioning is that it can generate partitions with minimal edge cuts to reduce network communication for distributed training and inference. DGL uses the latest version of Metis with the options optimized for the real-world graphs with power-law distribution. After partitioning, the API constructs the partitioned results in a format that is easy to load during the training. For example, .. code-block:: python import dgl g = ... # create or load a DGLGraph object dgl.distributed.partition_graph(g, 'mygraph', 2, 'data_root_dir') will outputs the following data file. .. code-block:: none data_root_dir/ |-- mygraph.json # metadata JSON. File name is the given graph name. |-- part0/ # data for partition 0 | |-- node_feats.dgl # node features stored in binary format | |-- edge_feats.dgl # edge features stored in binary format | |-- graph.dgl # graph structure of this partition stored in binary format | |-- part1/ # data for partition 1 |-- node_feats.dgl |-- edge_feats.dgl |-- graph.dgl Chapter :ref:`guide-distributed-partition` covers more details about the partition format. To distribute the partitions to a cluster, users can either save the data in some shared folder accessible by all machines, or copy the metadata JSON as well as the corresponding partition folder ``partX`` to the X^th machine. Using :func:`~dgl.distributed.partition_graph` requires an instance with large enough CPU RAM to hold the entire graph structure and features, which may not be viable for graphs with hundreds of billions of edges or large features. We describe how to use the *parallel data preparation pipeline* for such cases next. Load balancing ~~~~~~~~~~~~~~ When partitioning a graph, by default, METIS only balances the number of nodes in each partition. This can result in suboptimal configuration, depending on the task at hand. For example, in the case of semi-supervised node classification, a trainer performs computation on a subset of labeled nodes in a local partition. A partitioning that only balances nodes in a graph (both labeled and unlabeled), may end up with computational load imbalance. To get a balanced workload in each partition, the partition API allows balancing between partitions with respect to the number of nodes in each node type, by specifying ``balance_ntypes`` in :func:`~dgl.distributed.partition_graph`. Users can take advantage of this and consider nodes in the training set, validation set and test set are of different node types. The following example considers nodes inside the training set and outside the training set are two types of nodes: .. code:: python dgl.distributed.partition_graph(g, 'graph_name', 4, '/tmp/test', balance_ntypes=g.ndata['train_mask']) In addition to balancing the node types, :func:`dgl.distributed.partition_graph` also allows balancing between in-degrees of nodes of different node types by specifying ``balance_edges``. This balances the number of edges incident to the nodes of different types. ID mapping ~~~~~~~~~~~~~ After partitioning, :func:`~dgl.distributed.partition_graph` remap node and edge IDs so that nodes of the same partition are aranged together (in a consecutive ID range), making it easier to store partitioned node/edge features. The API also automatically shuffles the node/edge features according to the new IDs. However, some downstream tasks may want to recover the original node/edge IDs (such as extracting the computed node embeddings for later use). For such cases, pass ``return_mapping=True`` to :func:`~dgl.distributed.partition_graph`, which makes the API returns the ID mappings between the remapped node/edge IDs and their origianl ones. For a homogeneous graph, it returns two vectors. The first vector maps every new node ID to its original ID; the second vector maps every new edge ID to its original ID. For a heterogeneous graph, it returns two dictionaries of vectors. The first dictionary contains the mapping for each node type; the second dictionary contains the mapping for each edge type. .. code:: python node_map, edge_map = dgl.distributed.partition_graph(g, 'graph_name', 4, '/tmp/test', balance_ntypes=g.ndata['train_mask'], return_mapping=True) # Let's assume that node_emb is saved from the distributed training. orig_node_emb = th.zeros(node_emb.shape, dtype=node_emb.dtype) orig_node_emb[node_map] = node_emb Load partitioned graphs ^^^^^^^^^^^^^^^^^^^^^^^ DGL provides a :func:`dgl.distributed.load_partition` function to load one partition for inspection. .. code:: python >>> import dgl >>> # load partition 0 >>> part_data = dgl.distributed.load_partition('data_root_dir/graph_name.json', 0) >>> g, nfeat, efeat, partition_book, graph_name, ntypes, etypes = part_data # unpack >>> print(g) Graph(num_nodes=966043, num_edges=34270118, ndata_schemes={'orig_id': Scheme(shape=(), dtype=torch.int64), 'part_id': Scheme(shape=(), dtype=torch.int64), '_ID': Scheme(shape=(), dtype=torch.int64), 'inner_node': Scheme(shape=(), dtype=torch.int32)} edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64), 'inner_edge': Scheme(shape=(), dtype=torch.int8), 'orig_id': Scheme(shape=(), dtype=torch.int64)}) As mentioned in the `ID mapping`_ section, each partition carries auxiliary information saved as ndata or edata such as original node/edge IDs, partition IDs, etc. Each partition not only saves nodes/edges it owns, but also includes node/edges that are adjacent to the partition (called **HALO** nodes/edges). The ``inner_node`` and ``inner_edge`` indicate whether a node/edge truely belongs to the partition (value is ``True``) or is a HALO node/edge (value is ``False``). The :func:`~dgl.distributed.load_partition` function loads all data at once. Users can load features or the partition book using the :func:`dgl.distributed.load_partition_feats` and :func:`dgl.distributed.load_partition_book` APIs respectively. 7.1.2 Distributed Graph Partitioning Pipeline ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ To handle massive graph data that cannot fit in the CPU RAM of a single machine, DGL utilizes data chunking and parallel processing to reduce memory footprint and running time. The figure below illustrates the pipeline: .. figure:: https://data.dgl.ai/asset/image/guide_7_distdataprep.png * The pipeline takes input data stored in *Chunked Graph Format* and produces and dispatches data partitions to the target machines. * **Step.1 Graph Partitioning:** It calculates the ownership of each partition and saves the results as a set of files called *partition assignment*. To speedup the step, some algorithms (e.g., ParMETIS) support parallel computing using multiple machines. * **Step.2 Data Dispatching:** Given the partition assignment, the step then physically partitions the graph data and dispatches them to the machines user specified. It also converts the graph data into formats that are suitable for distributed training and evaluation. The whole pipeline is modularized so that each step can be invoked individually. For example, users can replace Step.1 with some custom graph partition algorithm as long as it produces partition assignment files correctly. .. _guide-distributed-prep-chunk: Chunked Graph Format ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ To run the pipeline, DGL requires the input graph to be stored in multiple data chunks. Each data chunk is the unit of data preprocessing and thus should fit into CPU RAM. In this section, we use the MAG240M-LSC data from `Open Graph Benchmark `__ as an example to describe the overall design, followed by a formal specification and tips for creating data in such format. Example: MAG240M-LSC ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The MAG240M-LSC graph is a heterogeneous academic graph extracted from the Microsoft Academic Graph (MAG), whose schema diagram is illustrated below: .. figure:: https://data.dgl.ai/asset/image/guide_7_mag240m.png Its raw data files are organized as follows: .. code-block:: none /mydata/MAG240M-LSC/ |-- meta.pt # # A dictionary of the number of nodes for each type saved by torch.save, | # as well as num_classes |-- processed/ |-- author___affiliated_with___institution/ | |-- edge_index.npy # graph, 713 MB | |-- paper/ | |-- node_feat.npy # feature, 187 GB, (numpy memmap format) | |-- node_label.npy # label, 974 MB | |-- node_year.npy # year, 974 MB | |-- paper___cites___paper/ | |-- edge_index.npy # graph, 21 GB | |-- author___writes___paper/ |-- edge_index.npy # graph, 6GB The graph has three node types (``"paper"``, ``"author"`` and ``"institution"``), three edge types/relations (``"cites"``, ``"writes"`` and ``"affiliated_with"``). The ``"paper"`` nodes have three attributes (``"feat"``, ``"label"``, ``"year"'``), while other types of nodes and edges are featureless. Below shows the data files when it is stored in DGL Chunked Graph Format: .. code-block:: none /mydata/MAG240M-LSC_chunked/ |-- metadata.json # metadata json file |-- edges/ # stores edge ID data | |-- writes-part1.csv | |-- writes-part2.csv | |-- affiliated_with-part1.csv | |-- affiliated_with-part2.csv | |-- cites-part1.csv | |-- cites-part1.csv | |-- node_data/ # stores node feature data |-- paper-feat-part1.npy |-- paper-feat-part2.npy |-- paper-label-part1.npy |-- paper-label-part2.npy |-- paper-year-part1.npy |-- paper-year-part2.npy All the data files are chunked into two parts, including the edges of each relation (e.g., writes, affiliates, cites) and node features. If the graph has edge features, they will be chunked into multiple files too. All ID data are stored in CSV (we will illustrate the contents soon) while node features are stored in numpy arrays. The ``metadata.json`` stores all the metadata information such as file names and chunk sizes (e.g., number of nodes, number of edges). .. code-block:: python { "graph_name" : "MAG240M-LSC", # given graph name "node_type": ["author", "paper", "institution"], "num_nodes_per_chunk": [ [61191556, 61191556], # number of author nodes per chunk [61191553, 61191552], # number of paper nodes per chunk [12861, 12860] # number of institution nodes per chunk ], # The edge type name is a colon-joined string of source, edge, and destination type. "edge_type": [ "author:writes:paper", "author:affiliated_with:institution", "paper:cites:paper" ], "num_edges_per_chunk": [ [193011360, 193011360], # number of author:writes:paper edges per chunk [22296293, 22296293], # number of author:affiliated_with:institution edges per chunk [648874463, 648874463] # number of paper:cites:paper edges per chunk ], "edges" : { "author:writes:paper" : { # edge type "format" : {"name": "csv", "delimiter": " "}, # The list of paths. Can be relative or absolute. "data" : ["edges/writes-part1.csv", "edges/writes-part2.csv"] }, "author:affiliated_with:institution" : { "format" : {"name": "csv", "delimiter": " "}, "data" : ["edges/affiliated_with-part1.csv", "edges/affiliated_with-part2.csv"] }, "paper:cites:paper" : { "format" : {"name": "csv", "delimiter": " "}, "data" : ["edges/cites-part1.csv", "edges/cites-part2.csv"] } }, "node_data" : { "paper": { # node type "feat": { # feature key "format": {"name": "numpy"}, "data": ["node_data/paper-feat-part1.npy", "node_data/paper-feat-part2.npy"] }, "label": { # feature key "format": {"name": "numpy"}, "data": ["node_data/paper-label-part1.npy", "node_data/paper-label-part2.npy"] }, "year": { # feature key "format": {"name": "numpy"}, "data": ["node_data/paper-year-part1.npy", "node_data/paper-year-part2.npy"] } } }, "edge_data" : {} # MAG240M-LSC does not have edge features } There are three parts in ``metadata.json``: * Graph schema information and chunk sizes, e.g., ``"node_type"`` , ``"num_nodes_per_chunk"``, etc. * Edge index data under key ``"edges"``. * Node/edge feature data under keys ``"node_data"`` and ``"edge_data"``. The edge index files contain edges in the form of node ID pairs: .. code-block:: bash # writes-part1.csv 0 0 0 1 0 20 0 29 0 1203 ... Specification ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In general, a chunked graph data folder just needs a ``metadata.json`` and a bunch of data files. The folder structure in the MAG240M-LSC example is not a strict requirement as long as ``metadata.json`` contains valid file paths. ``metadata.json`` top-level keys: * ``graph_name``: String. Unique name used by :class:`dgl.distributed.DistGraph` to load graph. * ``node_type``: List of string. Node type names. * ``num_nodes_per_chunk``: List of list of integer. For graphs with :math:`T` node types stored in :math:`P` chunks, the value contains :math:`T` integer lists. Each list contains :math:`P` integers, which specify the number of nodes in each chunk. * ``edge_type``: List of string. Edge type names in the form of ``::``. * ``num_edges_per_chunk``: List of list of integer. For graphs with :math:`R` edge types stored in :math:`P` chunks, the value contains :math:`R` integer lists. Each list contains :math:`P` integers, which specify the number of edges in each chunk. * ``edges``: Dict of ``ChunkFileSpec``. Edge index files. Dictionary keys are edge type names in the form of ``::``. * ``node_data``: Dict of ``ChunkFileSpec``. Data files that store node attributes could have arbitrary number of files regardless of ``num_parts``. Dictionary keys are node type names. * ``edge_data``: Dict of ``ChunkFileSpec``. Data files that store edge attributes could have arbitrary number of files regardless of ``num_parts``. Dictionary keys are edge type names in the form of ``::``. ``ChunkFileSpec`` has two keys: * ``format``: File format. Depending on the format ``name``, users can configure more details about how to parse each data file. - ``"csv"``: CSV file. Use the ``delimiter`` key to specify delimiter in use. - ``"numpy"``: NumPy array binary file created by :func:`numpy.save`. - ``"parquet"``: parquet table binary file created by :func:`pyarrow.parquet.write_table`. * ``data``: List of string. File path to each data chunk. Support absolute path. Tips for making chunked graph data ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Depending on the raw data, the implementation could include: * Construct graphs out of non-structured data such as texts or tabular data. * Augment or transform the input graph struture or features. E.g., adding reverse or self-loop edges, normalizing features, etc. * Chunk the input graph structure and features into multiple data files so that each one can fit in CPU RAM for subsequent preprocessing steps. To avoid running into out-of-memory error, it is recommended to process graph structures and feature data separately. Processing one chunk at a time can also reduce the maximal runtime memory footprint. As an example, DGL provides a `tools/chunk_graph.py `_ script that chunks an in-memory feature-less :class:`~dgl.DGLGraph` and feature tensors stored in :class:`numpy.memmap`. .. _guide-distributed-prep-partition: Step.1 Graph Partitioning ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ This step reads the chunked graph data and calculates which partition each node should belong to. The results are saved in a set of *partition assignment files*. For example, to randomly partition MAG240M-LSC to two parts, run the ``partition_algo/random_partition.py`` script in the ``tools`` folder: .. code-block:: bash python /my/repo/dgl/tools/partition_algo/random_partition.py --in_dir /mydata/MAG240M-LSC_chunked --out_dir /mydata/MAG240M-LSC_2parts --num_partitions 2 , which outputs files as follows: .. code-block:: none MAG240M-LSC_2parts/ |-- paper.txt |-- author.txt |-- institution.txt Each file stores the partition assignment of the corresponding node type. The contents are the partition ID of each node stored in lines, i.e., line i is the partition ID of node i. .. code-block:: bash # paper.txt 0 1 1 0 0 1 0 ... Despite its simplicity, random partitioning may result in frequent cross-machine communication. Check out chapter :ref:`guide-distributed-partition` for more advanced options. Step.2 Data Dispatching ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ DGL provides a ``dispatch_data.py`` script to physically partition the data and dispatch partitions to each training machines. It will also convert the data once again to data objects that can be loaded by DGL training processes efficiently. The entire step can be further accelerated using multi-processing. .. code-block:: bash python /myrepo/dgl/tools/dispatch_data.py \ --in-dir /mydata/MAG240M-LSC_chunked/ \ --partitions-dir /mydata/MAG240M-LSC_2parts/ \ --out-dir data/MAG_LSC_partitioned \ --ip-config ip_config.txt * ``--in-dir`` specifies the path to the folder of the input chunked graph data produced * ``--partitions-dir`` specifies the path to the partition assignment folder produced by Step.1. * ``--out-dir`` specifies the path to stored the data partition on each machine. * ``--ip-config`` specifies the IP configuration file of the cluster. An example IP configuration file is as follows: .. code-block:: bash 172.31.19.1 172.31.23.205 As a counterpart of ``return_mapping=True`` in :func:`~dgl.distributed.partition_graph`, the :ref:`distributed partitioning pipeline ` provides two arguments in ``dispatch_data.py`` to save the original node/edge IDs to disk. * ``--save-orig-nids`` save original node IDs into files. * ``--save-orig-eids`` save original edge IDs into files. Specifying the two options will create two files ``orig_nids.dgl`` and ``orig_eids.dgl`` under each partition folder. .. code-block:: none data_root_dir/ |-- graph_name.json # partition configuration file in JSON |-- part0/ # data for partition 0 | |-- orig_nids.dgl # original node IDs | |-- orig_eids.dgl # original edge IDs | |-- ... # other data such as graph and node/edge feats | |-- part1/ # data for partition 1 | |-- orig_nids.dgl | |-- orig_eids.dgl | |-- ... | |-- ... # data for other partitions The two files store the original IDs as a dictionary of tensors, where keys are node/edge type names and values are ID tensors. Users can use the :func:`dgl.data.load_tensors` utility to load them: .. code:: python # Load the original IDs for the nodes in partition 0. orig_nids_0 = dgl.data.load_tensors('/path/to/data/part0/orig_nids.dgl') # Get the original node IDs for node type 'user' user_orig_nids_0 = orig_nids_0['user'] # Load the original IDs for the edges in partition 0. orig_eids_0 = dgl.data.load_tensors('/path/to/data/part0/orig_eids.dgl') # Get the original edge IDs for edge type 'like' like_orig_eids_0 = orig_nids_0['like'] During data dispatching, DGL assumes that the combined CPU RAM of the cluster is able to hold the entire graph data. Node ownership is determined by the result of partitioning algorithm where as for edges the owner of the destination node also owns the edge as well. ================================================ FILE: docs/source/guide/distributed-tools.rst ================================================ .. _guide-distributed-tools: 7.2 Tools for launching distributed training/inference ------------------------------------------------------ DGL provides a launching script ``launch.py`` under `dgl/tools `__ to launch a distributed training job in a cluster. This script makes the following assumptions: * The partitioned data and the training script have been provisioned to the cluster or a shared storage (e.g., NFS) accessible to all the worker machines. * The machine that invokes ``launch.py`` has passwordless ssh access to all other machines. The launching machine must be one of the worker machines. Below shows an example of launching a distributed training job in a cluster. .. code:: bash python3 tools/launch.py \ --workspace /my/workspace/ \ --num_trainers 2 \ --num_samplers 4 \ --num_servers 1 \ --part_config data/mygraph.json \ --ip_config ip_config.txt \ "python3 my_train_script.py" The argument specifies the workspace path, where to find the partition metadata JSON and machine IP configurations, how many trainer, sampler, and server processes to be launched on each machine. The last argument is the command to launch which is usually the model training/evaluation script. Each line of ``ip_config.txt`` is the IP address of a machine in the cluster. Optionally, the IP address can be followed by a network port (default is ``30050``). A typical example is as follows: .. code:: none 172.31.19.1 172.31.23.205 172.31.29.175 172.31.16.98 The workspace specified in the launch script is the working directory in the machines, which contains the training script, the IP configuration file, the partition configuration file as well as the graph partitions. All paths of the files should be specified as relative paths to the workspace. The launch script creates a specified number of training jobs (``--num_trainers``) on each machine. In addition, users need to specify the number of sampler processes for each trainer (``--num_samplers``). ================================================ FILE: docs/source/guide/distributed.rst ================================================ .. _guide-distributed: Chapter 7: Distributed Training ===================================== :ref:`(中文版) ` .. note:: Distributed training is only available for PyTorch backend. DGL adopts a fully distributed approach that distributes both data and computation across a collection of computation resources. In the context of this section, we will assume a cluster setting (i.e., a group of machines). DGL partitions a graph into subgraphs and each machine in a cluster is responsible for one subgraph (partition). DGL runs an identical training script on all machines in the cluster to parallelize the computation and runs servers on the same machines to serve partitioned data to the trainers. For the training script, DGL provides distributed APIs that are similar to the ones for mini-batch training. This makes distributed training require only small code modifications from mini-batch training on a single machine. Below shows an example of training GraphSage in a distributed fashion. The notable code modifications are: 1) initialization of DGL's distributed module, 2) create a distributed graph object, and 3) split the training set and calculate the nodes for the local process. The rest of the code, including sampler creation, model definition, training loops are the same as :ref:`mini-batch training `. .. code:: python import dgl from dgl.dataloading import NeighborSampler from dgl.distributed import DistGraph, DistDataLoader, node_split import torch as th # initialize distributed contexts dgl.distributed.initialize('ip_config.txt') th.distributed.init_process_group(backend='gloo') # load distributed graph g = DistGraph('graph_name', 'part_config.json') pb = g.get_partition_book() # get training workload, i.e., training node IDs train_nid = node_split(g.ndata['train_mask'], pb, force_even=True) # Create sampler sampler = NeighborSampler(g, [10,25], dgl.distributed.sample_neighbors, device) dataloader = DistDataLoader( dataset=train_nid.numpy(), batch_size=batch_size, collate_fn=sampler.sample_blocks, shuffle=True, drop_last=False) # Define model and optimizer model = SAGE(in_feats, num_hidden, n_classes, num_layers, F.relu, dropout) model = th.nn.parallel.DistributedDataParallel(model) loss_fcn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.lr) # training loop for epoch in range(args.num_epochs): with model.join(): for step, blocks in enumerate(dataloader): batch_inputs, batch_labels = load_subtensor(g, blocks[0].srcdata[dgl.NID], blocks[-1].dstdata[dgl.NID]) batch_pred = model(blocks, batch_inputs) loss = loss_fcn(batch_pred, batch_labels) optimizer.zero_grad() loss.backward() optimizer.step() DGL implements a few distributed components to support distributed training. The figure below shows the components and their interactions. .. figure:: https://data.dgl.ai/asset/image/distributed.png :alt: Imgur Specifically, DGL's distributed training has three types of interacting processes: *server*, *sampler* and *trainer*. * **Servers** store graph partitions which includes both structure data and node/edge features. They provide services such as sampling, getting or updating node/edge features. Note that each machine may run multiple server processes simultaneously to increase service throughput. One of them is *main server* in charge of data loading and sharing data via shared memory with *backup servers* that provide services. * **Sampler processes** interact with the servers and sample nodes and edges to generate mini-batches for training. * **Trainers** are in charge of training networks on mini-batches. They utilize APIs such as :class:`~dgl.distributed.DistGraph` to access partitioned graph data, :class:`~dgl.distributed.DistEmbedding` and :class:`~dgl.distributed.DistTensor` to access node/edge features/embeddings and :class:`~dgl.distributed.DistDataLoader` to interact with samplers to get mini-batches. Trainers communicate gradients among each other using PyTorch's native ``DistributedDataParallel`` paradigm. Besides Python APIs, DGL also provides `tools `__ for provisioning graph data and processes to the entire cluster. Having the distributed components in mind, the rest of the section will cover the following distributed components: * :ref:`guide-distributed-preprocessing` * :ref:`guide-distributed-tools` * :ref:`guide-distributed-apis` For more advanced users who are interested in more details: * :ref:`guide-distributed-partition` * :ref:`guide-distributed-hetero` .. toctree:: :maxdepth: 1 :hidden: :glob: distributed-preprocessing distributed-tools distributed-apis distributed-partition distributed-hetero ================================================ FILE: docs/source/guide/graph-basic.rst ================================================ .. _guide-graph-basic: 1.1 Some Basic Definitions about Graphs (Graphs 101) ---------------------------------------------------- :ref:`(中文版)` A graph :math:`G=(V, E)` is a structure used to represent entities and their relations. It consists of two sets -- the set of nodes :math:`V` (also called vertices) and the set of edges :math:`E` (also called arcs). An edge :math:`(u, v) \in E` connecting a pair of nodes :math:`u` and :math:`v` indicates that there is a relation between them. The relation can either be undirected, e.g., capturing symmetric relations between nodes, or directed, capturing asymmetric relations. For example, if a graph is used to model the friendships relations of people in a social network, then the edges will be undirected as friendship is mutual; however, if the graph is used to model how people follow each other on Twitter, then the edges are directed. Depending on the edges' directionality, a graph can be *directed* or *undirected*. Graphs can be *weighted* or *unweighted*. In a weighted graph, each edge is associated with a scalar weight. For example, such weights might represent lengths or connectivity strengths. Graphs can also be either *homogeneous* or *heterogeneous*. In a homogeneous graph, all the nodes represent instances of the same type and all the edges represent relations of the same type. For instance, a social network is a graph consisting of people and their connections, representing the same entity type. In contrast, in a heterogeneous graph, the nodes and edges can be of different types. For instance, the graph encoding a marketplace will have buyer, seller, and product nodes that are connected via wants-to-buy, has-bought, is-customer-of, and is-selling edges. The bipartite graph is a special, commonly-used type of heterogeneous graph, where edges exist between nodes of two different types. For example, in a recommender system, one can use a bipartite graph to represent the interactions between users and items. For working with heterogeneous graphs in DGL, see :ref:`guide-graph-heterogeneous`. Multigraphs are graphs that can have multiple (directed) edges between the same pair of nodes, including self loops. For instance, two authors can coauthor a paper in different years, resulting in edges with different features. ================================================ FILE: docs/source/guide/graph-external.rst ================================================ .. _guide-graph-external: 1.4 Creating Graphs from External Sources ----------------------------------------- :ref:`(中文版)` The options to construct a :class:`~dgl.DGLGraph` from external sources include: - Conversion from external python libraries for graphs and sparse matrices (NetworkX and SciPy). - Loading graphs from disk. The section does not cover functions that generate graphs by transforming from other graphs. See the API reference manual for an overview of them. Creating Graphs from External Libraries ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The following code snippet is an example for creating a graph from a SciPy sparse matrix and a NetworkX graph. .. code:: >>> import dgl >>> import torch as th >>> import scipy.sparse as sp >>> spmat = sp.rand(100, 100, density=0.05) # 5% nonzero entries >>> dgl.from_scipy(spmat) # from SciPy Graph(num_nodes=100, num_edges=500, ndata_schemes={} edata_schemes={}) >>> import networkx as nx >>> nx_g = nx.path_graph(5) # a chain 0-1-2-3-4 >>> dgl.from_networkx(nx_g) # from networkx Graph(num_nodes=5, num_edges=8, ndata_schemes={} edata_schemes={}) Note that when constructing from the `nx.path_graph(5)`, the resulting :class:`~dgl.DGLGraph` has 8 edges instead of 4. This is because `nx.path_graph(5)` constructs an undirected NetworkX graph :class:`networkx.Graph` while a :class:`~dgl.DGLGraph` is always directed. In converting an undirected NetworkX graph into a :class:`~dgl.DGLGraph`, DGL internally converts undirected edges to two directed edges. Using directed NetworkX graphs :class:`networkx.DiGraph` can avoid such behavior. .. code:: >>> nxg = nx.DiGraph([(2, 1), (1, 2), (2, 3), (0, 0)]) >>> dgl.from_networkx(nxg) Graph(num_nodes=4, num_edges=4, ndata_schemes={} edata_schemes={}) .. note:: DGL internally converts SciPy matrices and NetworkX graphs to tensors to construct graphs. Hence, these construction methods are not meant for performance critical parts. See APIs: :func:`dgl.from_scipy`, :func:`dgl.from_networkx`. Loading Graphs from Disk ^^^^^^^^^^^^^^^^^^^^^^^^ There are many data formats for storing graphs and it isn't possible to enumerate every option. Thus, this section only gives some general pointers on certain common ones. Comma Separated Values (CSV) """""""""""""""""""""""""""" One very common format is CSV, which stores nodes, edges, and their features in a tabular format: .. table:: nodes.csv +-----------+ |age, title | +===========+ |43, 1 | +-----------+ |23, 3 | +-----------+ |... | +-----------+ .. table:: edges.csv +-----------------+ |src, dst, weight | +=================+ |0, 1, 0.4 | +-----------------+ |0, 3, 0.9 | +-----------------+ |... | +-----------------+ There are known Python libraries (e.g. pandas) for loading this type of data into python objects (e.g., :class:`numpy.ndarray`), which can then be used to construct a DGLGraph. If the backend framework also provides utilities to save/load tensors from disk (e.g., :func:`torch.save`, :func:`torch.load`), one can follow the same principle to build a graph. See also: `Tutorial for loading a Karate Club Network from edge pairs CSV `_. JSON/GML Format """"""""""""""" Though not particularly fast, NetworkX provides many utilities to parse `a variety of data formats `_ which indirectly allows DGL to create graphs from these sources. DGL Binary Format """"""""""""""""" DGL provides APIs to save and load graphs from disk stored in binary format. Apart from the graph structure, the APIs also handle feature data and graph-level label data. DGL also supports checkpointing graphs directly to S3 or HDFS. The reference manual provides more details about the usage. See APIs: :func:`dgl.save_graphs`, :func:`dgl.load_graphs`. ================================================ FILE: docs/source/guide/graph-feature.rst ================================================ .. _guide-graph-feature: 1.3 Node and Edge Features -------------------------- :ref:`(中文版)` The nodes and edges of a :class:`~dgl.DGLGraph` can have several user-defined named features for storing graph-specific properties of the nodes and edges. These features can be accessed via the :py:attr:`~dgl.DGLGraph.ndata` and :py:attr:`~dgl.DGLGraph.edata` interface. For example, the following code creates two node features (named ``'x'`` and ``'y'`` in line 8 and 15) and one edge feature (named ``'x'`` in line 9). .. code-block:: python :linenos: >>> import dgl >>> import torch as th >>> g = dgl.graph(([0, 0, 1, 5], [1, 2, 2, 0])) # 6 nodes, 4 edges >>> g Graph(num_nodes=6, num_edges=4, ndata_schemes={} edata_schemes={}) >>> g.ndata['x'] = th.ones(g.num_nodes(), 3) # node feature of length 3 >>> g.edata['x'] = th.ones(g.num_edges(), dtype=th.int32) # scalar integer feature >>> g Graph(num_nodes=6, num_edges=4, ndata_schemes={'x' : Scheme(shape=(3,), dtype=torch.float32)} edata_schemes={'x' : Scheme(shape=(,), dtype=torch.int32)}) >>> # different names can have different shapes >>> g.ndata['y'] = th.randn(g.num_nodes(), 5) >>> g.ndata['x'][1] # get node 1's feature tensor([1., 1., 1.]) >>> g.edata['x'][th.tensor([0, 3])] # get features of edge 0 and 3 tensor([1, 1], dtype=torch.int32) Important facts about the :py:attr:`~dgl.DGLGraph.ndata`/:py:attr:`~dgl.DGLGraph.edata` interface: - Only features of numerical types (e.g., float, double, and int) are allowed. They can be scalars, vectors or multi-dimensional tensors. - Each node feature has a unique name and each edge feature has a unique name. The features of nodes and edges can have the same name. (e.g., 'x' in the above example). - A feature is created via tensor assignment, which assigns a feature to each node/edge in the graph. The leading dimension of that tensor must be equal to the number of nodes/edges in the graph. You cannot assign a feature to a subset of the nodes/edges in the graph. - Features of the same name must have the same dimensionality and data type. - The feature tensor is in row-major layout -- each row-slice stores the feature of one node or edge (e.g., see lines 16 and 18 in the above example). For weighted graphs, one can store the weights as an edge feature as below. .. code-block:: python >>> # edges 0->1, 0->2, 0->3, 1->3 >>> edges = th.tensor([0, 0, 0, 1]), th.tensor([1, 2, 3, 3]) >>> weights = th.tensor([0.1, 0.6, 0.9, 0.7]) # weight of each edge >>> g = dgl.graph(edges) >>> g.edata['w'] = weights # give it a name 'w' >>> g Graph(num_nodes=4, num_edges=4, ndata_schemes={} edata_schemes={'w' : Scheme(shape=(,), dtype=torch.float32)}) See APIs: :py:attr:`~dgl.DGLGraph.ndata`, :py:attr:`~dgl.DGLGraph.edata`. ================================================ FILE: docs/source/guide/graph-gpu.rst ================================================ .. _guide-graph-gpu: 1.6 Using DGLGraph on a GPU --------------------------- :ref:`(中文版)` One can create a :class:`~dgl.DGLGraph` on a GPU by passing two GPU tensors during construction. Another approach is to use the :func:`~dgl.DGLGraph.to` API to copy a :class:`~dgl.DGLGraph` to a GPU, which copies the graph structure as well as the feature data to the given device. .. code:: >>> import dgl >>> import torch as th >>> u, v = th.tensor([0, 1, 2]), th.tensor([2, 3, 4]) >>> g = dgl.graph((u, v)) >>> g.ndata['x'] = th.randn(5, 3) # original feature is on CPU >>> g.device device(type='cpu') >>> cuda_g = g.to('cuda:0') # accepts any device objects from backend framework >>> cuda_g.device device(type='cuda', index=0) >>> cuda_g.ndata['x'].device # feature data is copied to GPU too device(type='cuda', index=0) >>> # A graph constructed from GPU tensors is also on GPU >>> u, v = u.to('cuda:0'), v.to('cuda:0') >>> g = dgl.graph((u, v)) >>> g.device device(type='cuda', index=0) Any operations involving a GPU graph are performed on a GPU. Thus, they require all tensor arguments to be placed on GPU already and the results (graph or tensor) will be on GPU too. Furthermore, a GPU graph only accepts feature data on a GPU. .. code:: >>> cuda_g.in_degrees() tensor([0, 0, 1, 1, 1], device='cuda:0') >>> cuda_g.in_edges([2, 3, 4]) # ok for non-tensor type arguments (tensor([0, 1, 2], device='cuda:0'), tensor([2, 3, 4], device='cuda:0')) >>> cuda_g.in_edges(th.tensor([2, 3, 4]).to('cuda:0')) # tensor type must be on GPU (tensor([0, 1, 2], device='cuda:0'), tensor([2, 3, 4], device='cuda:0')) >>> cuda_g.ndata['h'] = th.randn(5, 4) # ERROR! feature must be on GPU too! DGLError: Cannot assign node feature "h" on device cpu to a graph on device cuda:0. Call DGLGraph.to() to copy the graph to the same device. ================================================ FILE: docs/source/guide/graph-graphs-nodes-edges.rst ================================================ .. _guide-graph-graphs-nodes-edges: 1.2 Graphs, Nodes, and Edges ---------------------------- :ref:`(中文版)` DGL represents each node by a unique integer, called its node ID, and each edge by a pair of integers corresponding to the IDs of its end nodes. DGL assigns to each edge a unique integer, called its **edge ID**, based on the order in which it was added to the graph. The numbering of node and edge IDs starts from 0. In DGL, all the edges are directed, and an edge :math:`(u, v)` indicates that the direction goes from node :math:`u` to node :math:`v`. To specify multiple nodes, DGL uses a 1-D integer tensor (i.e., PyTorch's tensor, TensorFlow's Tensor, or MXNet's ndarray) of node IDs. DGL calls this format "node-tensors". To specify multiple edges, it uses a tuple of node-tensors :math:`(U, V)`. :math:`(U[i], V[i])` decides an edge from :math:`U[i]` to :math:`V[i]`. One way to create a :class:`~dgl.DGLGraph` is to use the :func:`dgl.graph` method, which takes as input a set of edges. DGL also supports creating graphs from other data sources, see :ref:`guide-graph-external`. The following code snippet uses the :func:`dgl.graph` method to create a :class:`~dgl.DGLGraph` corresponding to the four-node graph shown below and illustrates some of its APIs for querying the graph's structure. .. figure:: https://data.dgl.ai/asset/image/user_guide_graphch_1.png :height: 200px :width: 300px :align: center .. code:: >>> import dgl >>> import torch as th >>> # edges 0->1, 0->2, 0->3, 1->3 >>> u, v = th.tensor([0, 0, 0, 1]), th.tensor([1, 2, 3, 3]) >>> g = dgl.graph((u, v)) >>> print(g) # number of nodes are inferred from the max node IDs in the given edges Graph(num_nodes=4, num_edges=4, ndata_schemes={} edata_schemes={}) >>> # Node IDs >>> print(g.nodes()) tensor([0, 1, 2, 3]) >>> # Edge end nodes >>> print(g.edges()) (tensor([0, 0, 0, 1]), tensor([1, 2, 3, 3])) >>> # Edge end nodes and edge IDs >>> print(g.edges(form='all')) (tensor([0, 0, 0, 1]), tensor([1, 2, 3, 3]), tensor([0, 1, 2, 3])) >>> # If the node with the largest ID is isolated (meaning no edges), >>> # then one needs to explicitly set the number of nodes >>> g = dgl.graph((u, v), num_nodes=8) For an undirected graph, one needs to create edges for both directions. :func:`dgl.to_bidirected` can be helpful in this case, which converts a graph into a new one with edges for both directions. .. code:: >>> bg = dgl.to_bidirected(g) >>> bg.edges() (tensor([0, 0, 0, 1, 1, 2, 3, 3]), tensor([1, 2, 3, 0, 3, 0, 0, 1])) .. note:: Tensor types are generally preferred throughout DGL APIs due to their efficient internal storage in C and explicit data type and device context information. However, most DGL APIs do support python iterable (e.g., list) or numpy.ndarray as arguments for quick prototyping. DGL can use either :math:`32`- or :math:`64`-bit integers to store the node and edge IDs. The data types for the node and edge IDs should be the same. By using :math:`64` bits, DGL can handle graphs with up to :math:`2^{63} - 1` nodes or edges. However, if a graph contains less than :math:`2^{31} - 1` nodes or edges, one should use :math:`32`-bit integers as it leads to better speed and requires less memory. DGL provides methods for making such conversions. See below for an example. .. code:: >>> edges = th.tensor([2, 5, 3]), th.tensor([3, 5, 0]) # edges 2->3, 5->5, 3->0 >>> g64 = dgl.graph(edges) # DGL uses int64 by default >>> print(g64.idtype) torch.int64 >>> g32 = dgl.graph(edges, idtype=th.int32) # create a int32 graph >>> g32.idtype torch.int32 >>> g64_2 = g32.long() # convert to int64 >>> g64_2.idtype torch.int64 >>> g32_2 = g64.int() # convert to int32 >>> g32_2.idtype torch.int32 See APIs: :func:`dgl.graph`, :func:`dgl.DGLGraph.nodes`, :func:`dgl.DGLGraph.edges`, :func:`dgl.to_bidirected`, :func:`dgl.DGLGraph.int`, :func:`dgl.DGLGraph.long`, and :py:attr:`dgl.DGLGraph.idtype`. ================================================ FILE: docs/source/guide/graph-heterogeneous.rst ================================================ .. _guide-graph-heterogeneous: 1.5 Heterogeneous Graphs ------------------------ :ref:`(中文版)` A heterogeneous graph can have nodes and edges of different types. Nodes/Edges of different types have independent ID space and feature storage. For example in the figure below, the user and game node IDs both start from zero and they have different features. .. figure:: https://data.dgl.ai/asset/image/user_guide_graphch_2.png An example heterogeneous graph with two types of nodes (user and game) and two types of edges (follows and plays). Creating a Heterogeneous Graph ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ In DGL, a heterogeneous graph (heterograph for short) is specified with a series of graphs as below, one per relation. Each relation is a string triplet ``(source node type, edge type, destination node type)``. Since relations disambiguate the edge types, DGL calls them canonical edge types. The following code snippet is an example for creating a heterogeneous graph in DGL. .. code:: >>> import dgl >>> import torch as th >>> # Create a heterograph with 3 node types and 3 edges types. >>> graph_data = { ... ('drug', 'interacts', 'drug'): (th.tensor([0, 1]), th.tensor([1, 2])), ... ('drug', 'interacts', 'gene'): (th.tensor([0, 1]), th.tensor([2, 3])), ... ('drug', 'treats', 'disease'): (th.tensor([1]), th.tensor([2])) ... } >>> g = dgl.heterograph(graph_data) >>> g.ntypes ['disease', 'drug', 'gene'] >>> g.etypes ['interacts', 'interacts', 'treats'] >>> g.canonical_etypes [('drug', 'interacts', 'drug'), ('drug', 'interacts', 'gene'), ('drug', 'treats', 'disease')] Note that homogeneous and bipartite graphs are just special heterogeneous graphs with one relation. .. code:: >>> # A homogeneous graph >>> dgl.heterograph({('node_type', 'edge_type', 'node_type'): (u, v)}) >>> # A bipartite graph >>> dgl.heterograph({('source_type', 'edge_type', 'destination_type'): (u, v)}) The *metagraph* associated with a heterogeneous graph is the schema of the graph. It specifies type constraints on the sets of nodes and edges between the nodes. A node :math:`u` in a metagraph corresponds to a node type in the associated heterograph. An edge :math:`(u, v)` in a metagraph indicates that there are edges from nodes of type :math:`u` to nodes of type :math:`v` in the associated heterograph. .. code:: >>> g Graph(num_nodes={'disease': 3, 'drug': 3, 'gene': 4}, num_edges={('drug', 'interacts', 'drug'): 2, ('drug', 'interacts', 'gene'): 2, ('drug', 'treats', 'disease'): 1}, metagraph=[('drug', 'drug', 'interacts'), ('drug', 'gene', 'interacts'), ('drug', 'disease', 'treats')]) >>> g.metagraph().edges() OutMultiEdgeDataView([('drug', 'drug'), ('drug', 'gene'), ('drug', 'disease')]) See APIs: :func:`dgl.heterograph`, :py:attr:`~dgl.DGLGraph.ntypes`, :py:attr:`~dgl.DGLGraph.etypes`, :py:attr:`~dgl.DGLGraph.canonical_etypes`, :py:attr:`~dgl.DGLGraph.metagraph`. Working with Multiple Types ^^^^^^^^^^^^^^^^^^^^^^^^^^^ When multiple node/edge types are introduced, users need to specify the particular node/edge type when invoking a DGLGraph API for type-specific information. In addition, nodes/edges of different types have separate IDs. .. code:: >>> # Get the number of all nodes in the graph >>> g.num_nodes() 10 >>> # Get the number of drug nodes >>> g.num_nodes('drug') 3 >>> # Nodes of different types have separate IDs, >>> # hence not well-defined without a type specified >>> g.nodes() DGLError: Node type name must be specified if there are more than one node types. >>> g.nodes('drug') tensor([0, 1, 2]) To set/get features for a specific node/edge type, DGL provides two new types of syntax -- `g.nodes['node_type'].data['feat_name']` and `g.edges['edge_type'].data['feat_name']`. .. code:: >>> # Set/get feature 'hv' for nodes of type 'drug' >>> g.nodes['drug'].data['hv'] = th.ones(3, 1) >>> g.nodes['drug'].data['hv'] tensor([[1.], [1.], [1.]]) >>> # Set/get feature 'he' for edge of type 'treats' >>> g.edges['treats'].data['he'] = th.zeros(1, 1) >>> g.edges['treats'].data['he'] tensor([[0.]]) If the graph only has one node/edge type, there is no need to specify the node/edge type. .. code:: >>> g = dgl.heterograph({ ... ('drug', 'interacts', 'drug'): (th.tensor([0, 1]), th.tensor([1, 2])), ... ('drug', 'is similar', 'drug'): (th.tensor([0, 1]), th.tensor([2, 3])) ... }) >>> g.nodes() tensor([0, 1, 2, 3]) >>> # To set/get feature with a single type, no need to use the new syntax >>> g.ndata['hv'] = th.ones(4, 1) .. note:: When the edge type uniquely determines the types of source and destination nodes, one can just use one string instead of a string triplet to specify the edge type. For example, for a heterograph with two relations ``('user', 'plays', 'game')`` and ``('user', 'likes', 'game')``, it is safe to just use ``'plays'`` or ``'likes'`` to refer to the two relations. Loading Heterographs from Disk ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Comma Separated Values (CSV) """""""""""""""""""""""""""" A common way to store a heterograph is to store nodes and edges of different types in different CSV files. An example is as follows. .. code:: # data folder data/ |-- drug.csv # drug nodes |-- gene.csv # gene nodes |-- disease.csv # disease nodes |-- drug-interact-drug.csv # drug-drug interaction edges |-- drug-interact-gene.csv # drug-gene interaction edges |-- drug-treat-disease.csv # drug-treat-disease edges Similar to the case of homogeneous graphs, one can use packages like Pandas to parse CSV files into numpy arrays or framework tensors, build a relation dictionary and construct a heterograph from that. The approach also applies to other popular formats like GML/JSON. DGL Binary Format """"""""""""""""" DGL provides :func:`dgl.save_graphs` and :func:`dgl.load_graphs` respectively for saving heterogeneous graphs in binary format and loading them from binary format. Edge Type Subgraph ^^^^^^^^^^^^^^^^^^ One can create a subgraph of a heterogeneous graph by specifying the relations to retain, with features copied if any. .. code:: >>> g = dgl.heterograph({ ... ('drug', 'interacts', 'drug'): (th.tensor([0, 1]), th.tensor([1, 2])), ... ('drug', 'interacts', 'gene'): (th.tensor([0, 1]), th.tensor([2, 3])), ... ('drug', 'treats', 'disease'): (th.tensor([1]), th.tensor([2])) ... }) >>> g.nodes['drug'].data['hv'] = th.ones(3, 1) >>> # Retain relations ('drug', 'interacts', 'drug') and ('drug', 'treats', 'disease') >>> # All nodes for 'drug' and 'disease' will be retained >>> eg = dgl.edge_type_subgraph(g, [('drug', 'interacts', 'drug'), ... ('drug', 'treats', 'disease')]) >>> eg Graph(num_nodes={'disease': 3, 'drug': 3}, num_edges={('drug', 'interacts', 'drug'): 2, ('drug', 'treats', 'disease'): 1}, metagraph=[('drug', 'drug', 'interacts'), ('drug', 'disease', 'treats')]) >>> # The associated features will be copied as well >>> eg.nodes['drug'].data['hv'] tensor([[1.], [1.], [1.]]) Converting Heterogeneous Graphs to Homogeneous Graphs ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Heterographs provide a clean interface for managing nodes/edges of different types and their associated features. This is particularly helpful when: 1. The features for nodes/edges of different types have different data types or sizes. 2. We want to apply different operations to nodes/edges of different types. If the above conditions do not hold and one does not want to distinguish node/edge types in modeling, then DGL allows converting a heterogeneous graph to a homogeneous graph with :func:`dgl.DGLGraph.to_homogeneous` API. It proceeds as follows: 1. Relabels nodes/edges of all types using consecutive integers starting from 0 2. Merges the features across node/edge types specified by the user. .. code:: >>> g = dgl.heterograph({ ... ('drug', 'interacts', 'drug'): (th.tensor([0, 1]), th.tensor([1, 2])), ... ('drug', 'treats', 'disease'): (th.tensor([1]), th.tensor([2]))}) >>> g.nodes['drug'].data['hv'] = th.zeros(3, 1) >>> g.nodes['disease'].data['hv'] = th.ones(3, 1) >>> g.edges['interacts'].data['he'] = th.zeros(2, 1) >>> g.edges['treats'].data['he'] = th.zeros(1, 2) >>> # By default, it does not merge any features >>> hg = dgl.to_homogeneous(g) >>> 'hv' in hg.ndata False >>> # Copy edge features >>> # For feature copy, it expects features to have >>> # the same size and dtype across node/edge types >>> hg = dgl.to_homogeneous(g, edata=['he']) DGLError: Cannot concatenate column ‘he’ with shape Scheme(shape=(2,), dtype=torch.float32) and shape Scheme(shape=(1,), dtype=torch.float32) >>> # Copy node features >>> hg = dgl.to_homogeneous(g, ndata=['hv']) >>> hg.ndata['hv'] tensor([[1.], [1.], [1.], [0.], [0.], [0.]]) The original node/edge types and type-specific IDs are stored in :py:attr:`~dgl.DGLGraph.ndata` and :py:attr:`~dgl.DGLGraph.edata`. .. code:: >>> # Order of node types in the heterograph >>> g.ntypes ['disease', 'drug'] >>> # Original node types >>> hg.ndata[dgl.NTYPE] tensor([0, 0, 0, 1, 1, 1]) >>> # Original type-specific node IDs >>> hg.ndata[dgl.NID] tensor([0, 1, 2, 0, 1, 2]) >>> # Order of edge types in the heterograph >>> g.etypes ['interacts', 'treats'] >>> # Original edge types >>> hg.edata[dgl.ETYPE] tensor([0, 0, 1]) >>> # Original type-specific edge IDs >>> hg.edata[dgl.EID] tensor([0, 1, 0]) For modeling purposes, one may want to group some relations together and apply the same operation to them. To address this need, one can first take an edge type subgraph of the heterograph and then convert the subgraph to a homogeneous graph. .. code:: >>> g = dgl.heterograph({ ... ('drug', 'interacts', 'drug'): (th.tensor([0, 1]), th.tensor([1, 2])), ... ('drug', 'interacts', 'gene'): (th.tensor([0, 1]), th.tensor([2, 3])), ... ('drug', 'treats', 'disease'): (th.tensor([1]), th.tensor([2])) ... }) >>> sub_g = dgl.edge_type_subgraph(g, [('drug', 'interacts', 'drug'), ... ('drug', 'interacts', 'gene')]) >>> h_sub_g = dgl.to_homogeneous(sub_g) >>> h_sub_g Graph(num_nodes=7, num_edges=4, ...) ================================================ FILE: docs/source/guide/graph.rst ================================================ .. _guide-graph: Chapter 1: Graph ====================== :ref:`(中文版)` Graphs express entities (nodes) along with their relations (edges), and both nodes and edges can be typed (e.g., ``"user"`` and ``"item"`` are two different types of nodes). DGL provides a graph-centric programming abstraction with its core data structure -- :class:`~dgl.DGLGraph`. :class:`~dgl.DGLGraph` provides its interface to handle a graph's structure, its node/edge features, and the resulting computations that can be performed using these components. Roadmap ------- The chapter starts with a brief introduction to graph definitions in 1.1 and then introduces some core concepts of :class:`~dgl.DGLGraph`: * :ref:`guide-graph-basic` * :ref:`guide-graph-graphs-nodes-edges` * :ref:`guide-graph-feature` * :ref:`guide-graph-external` * :ref:`guide-graph-heterogeneous` * :ref:`guide-graph-gpu` .. toctree:: :maxdepth: 1 :hidden: :glob: graph-basic graph-graphs-nodes-edges graph-feature graph-external graph-heterogeneous graph-gpu ================================================ FILE: docs/source/guide/index.rst ================================================ User Guide ========== .. toctree:: :maxdepth: 2 :titlesonly: graph message nn data training minibatch distributed mixed_precision ================================================ FILE: docs/source/guide/message-api.rst ================================================ .. _guide-message-passing-api: 2.1 Built-in Functions and Message Passing APIs ----------------------------------------------- :ref:`(中文版) ` In DGL, **message function** takes a single argument ``edges``, which is an :class:`~dgl.udf.EdgeBatch` instance. During message passing, DGL generates it internally to represent a batch of edges. It has three members ``src``, ``dst`` and ``data`` to access features of source nodes, destination nodes, and edges, respectively. **reduce function** takes a single argument ``nodes``, which is a :class:`~dgl.udf.NodeBatch` instance. During message passing, DGL generates it internally to represent a batch of nodes. It has member ``mailbox`` to access the messages received for the nodes in the batch. Some of the most common reduce operations include ``sum``, ``max``, ``min``, etc. **update function** takes a single argument ``nodes`` as described above. This function operates on the aggregation result from ``reduce function``, typically combining it with a node’s original feature at the the last step and saving the result as a node feature. DGL has implemented commonly used message functions and reduce functions as **built-in** in the namespace ``dgl.function``. In general, DGL suggests using built-in functions **whenever possible** since they are heavily optimized and automatically handle dimension broadcasting. If your message passing functions cannot be implemented with built-ins, you can implement user-defined message/reduce function (aka. **UDF**). Built-in message functions can be unary or binary. DGL supports ``copy`` for unary. For binary funcs, DGL supports ``add``, ``sub``, ``mul``, ``div``, ``dot``. The naming convention for message built-in funcs is that ``u`` represents ``src`` nodes, ``v`` represents ``dst`` nodes, and ``e`` represents ``edges``. The parameters for those functions are strings indicating the input and output field names for the corresponding nodes and edges. The list of supported built-in functions can be found in :ref:`api-built-in`. For example, to add the ``hu`` feature from src nodes and ``hv`` feature from dst nodes then save the result on the edge at ``he`` field, one can use built-in function ``dgl.function.u_add_v('hu', 'hv', 'he')``. This is equivalent to the Message UDF: .. code:: def message_func(edges): return {'he': edges.src['hu'] + edges.dst['hv']} Built-in reduce functions support operations ``sum``, ``max``, ``min``, and ``mean``. Reduce functions usually have two parameters, one for field name in ``mailbox``, one for field name in node features, both are strings. For example, ``dgl.function.sum('m', 'h')`` is equivalent to the Reduce UDF that sums up the message ``m``: .. code:: import torch def reduce_func(nodes): return {'h': torch.sum(nodes.mailbox['m'], dim=1)} For advanced usage of UDF, see :ref:`apiudf`. It is also possible to invoke only edge-wise computation by :meth:`~dgl.DGLGraph.apply_edges` without invoking message passing. :meth:`~dgl.DGLGraph.apply_edges` takes a message function for parameter and by default updates the features of all edges. For example: .. code:: import dgl.function as fn graph.apply_edges(fn.u_add_v('el', 'er', 'e')) For message passing, :meth:`~dgl.DGLGraph.update_all` is a high-level API that merges message generation, message aggregation and node update in a single call, which leaves room for optimization as a whole. The parameters for :meth:`~dgl.DGLGraph.update_all` are a message function, a reduce function and an update function. One can call update function outside of ``update_all`` and not specify it in invoking :meth:`~dgl.DGLGraph.update_all`. DGL recommends this approach since the update function can usually be written as pure tensor operations to make the code concise. For example: .. code:: def update_all_example(graph): # store the result in graph.ndata['ft'] graph.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft')) # Call update function outside of update_all final_ft = graph.ndata['ft'] * 2 return final_ft This call will generate the messages ``m`` by multiply src node features ``ft`` and edge features ``a``, sum up the messages ``m`` to update node features ``ft``, and finally multiply ``ft`` by 2 to get the result ``final_ft``. After the call, DGL will clean the intermediate messages ``m``. The math formula for the above function is: .. math:: {final\_ft}_i = 2 * \sum_{j\in\mathcal{N}(i)} ({ft}_j * a_{ji}) DGL's built-in functions support floating point data types, i.e. the feature must be ``half`` (``float16``) /``float``/``double`` tensors. ``float16`` data type support is disabled by default as it has a minimum GPU compute capacity requirement of ``sm_53`` (Pascal, Volta, Turing and Ampere architectures). User can enable float16 for mixed precision training by compiling DGL from source (see :doc:`Mixed Precision Training ` tutorial for details). ================================================ FILE: docs/source/guide/message-efficient.rst ================================================ .. _guide-message-passing-efficient: 2.2 Writing Efficient Message Passing Code ------------------------------------------ :ref:`(中文版) ` DGL optimizes memory consumption and computing speed for message passing. A common practise to leverage those optimizations is to construct one's own message passing functionality as a combination of :meth:`~dgl.DGLGraph.update_all` calls with built-in functions as parameters. Besides that, considering that the number of edges is much larger than the number of nodes for some graphs, avoiding unnecessary memory copy from nodes to edges is beneficial. For some cases like :class:`~dgl.nn.pytorch.conv.GATConv`, where it is necessary to save message on the edges, one needs to call :meth:`~dgl.DGLGraph.apply_edges` with built-in functions. Sometimes the messages on the edges can be high dimensional, which is memory consuming. DGL recommends keeping the dimension of edge features as low as possible. Here’s an example on how to achieve this by splitting operations on the edges to nodes. The approach does the following: concatenate the ``src`` feature and ``dst`` feature, then apply a linear layer, i.e. :math:`W\times (u || v)`. The ``src`` and ``dst`` feature dimension is high, while the linear layer output dimension is low. A straight forward implementation would be like: .. code:: import torch import torch.nn as nn linear = nn.Parameter(torch.FloatTensor(size=(node_feat_dim * 2, out_dim))) def concat_message_function(edges): return {'cat_feat': torch.cat([edges.src['feat'], edges.dst['feat']], dim=1)} g.apply_edges(concat_message_function) g.edata['out'] = g.edata['cat_feat'] @ linear The suggested implementation splits the linear operation into two, one applies on ``src`` feature, the other applies on ``dst`` feature. It then adds the output of the linear operations on the edges at the final stage, i.e. performing :math:`W_l\times u + W_r \times v`. This is because :math:`W \times (u||v) = W_l \times u + W_r \times v`, where :math:`W_l` and :math:`W_r` are the left and the right half of the matrix :math:`W`, respectively: .. code:: import dgl.function as fn linear_src = nn.Parameter(torch.FloatTensor(size=(node_feat_dim, out_dim))) linear_dst = nn.Parameter(torch.FloatTensor(size=(node_feat_dim, out_dim))) out_src = g.ndata['feat'] @ linear_src out_dst = g.ndata['feat'] @ linear_dst g.srcdata.update({'out_src': out_src}) g.dstdata.update({'out_dst': out_dst}) g.apply_edges(fn.u_add_v('out_src', 'out_dst', 'out')) The above two implementations are mathematically equivalent. The latter one is more efficient because it does not need to save feat_src and feat_dst on edges, which is not memory-efficient. Plus, addition could be optimized with DGL’s built-in function :func:`~dgl.function.u_add_v`, which further speeds up computation and saves memory footprint. ================================================ FILE: docs/source/guide/message-heterograph.rst ================================================ .. _guide-message-passing-heterograph: 2.5 Message Passing on Heterogeneous Graph ------------------------------------------ :ref:`(中文版) ` Heterogeneous graphs (:ref:`guide-graph-heterogeneous`), or heterographs for short, are graphs that contain different types of nodes and edges. The different types of nodes and edges tend to have different types of attributes that are designed to capture the characteristics of each node and edge type. Within the context of graph neural networks, depending on their complexity, certain node and edge types might need to be modeled with representations that have a different number of dimensions. The message passing on heterographs can be split into two parts: 1. Message computation and aggregation for each relation r. 2. Reduction that merges the aggregation results from all relations for each node type. DGL’s interface to call message passing on heterographs is :meth:`~dgl.DGLGraph.multi_update_all`. :meth:`~dgl.DGLGraph.multi_update_all` takes a dictionary containing the parameters for :meth:`~dgl.DGLGraph.update_all` within each relation using relation as the key, and a string representing the cross type reducer. The reducer can be one of ``sum``, ``min``, ``max``, ``mean``, ``stack``. Here’s an example: .. code:: import dgl.function as fn for c_etype in G.canonical_etypes: srctype, etype, dsttype = c_etype Wh = self.weight[etype](feat_dict[srctype]) # Save it in graph for message passing G.nodes[srctype].data['Wh_%s' % etype] = Wh # Specify per-relation message passing functions: (message_func, reduce_func). # Note that the results are saved to the same destination feature 'h', which # hints the type wise reducer for aggregation. funcs[etype] = (fn.copy_u('Wh_%s' % etype, 'm'), fn.mean('m', 'h')) # Trigger message passing of multiple types. G.multi_update_all(funcs, 'sum') # return the updated node feature dictionary return {ntype : G.nodes[ntype].data['h'] for ntype in G.ntypes} ================================================ FILE: docs/source/guide/message-part.rst ================================================ .. _guide-message-passing-part: 2.3 Apply Message Passing On Part Of The Graph ---------------------------------------------- :ref:`(中文版) ` If one only wants to update part of the nodes in the graph, the practice is to create a subgraph by providing the IDs for the nodes to include in the update, then call :meth:`~dgl.DGLGraph.update_all` on the subgraph. For example: .. code:: nid = [0, 2, 3, 6, 7, 9] sg = g.subgraph(nid) sg.update_all(message_func, reduce_func, apply_node_func) This is a common usage in mini-batch training. Check :ref:`guide-minibatch` for more detailed usages. ================================================ FILE: docs/source/guide/message.rst ================================================ .. _guide-message-passing: Chapter 2: Message Passing ========================== :ref:`(中文版) ` Message Passing Paradigm ------------------------ Let :math:`x_v\in\mathbb{R}^{d_1}` be the feature for node :math:`v`, and :math:`w_{e}\in\mathbb{R}^{d_2}` be the feature for edge :math:`({u}, {v})`. The **message passing paradigm** defines the following node-wise and edge-wise computation at step :math:`t+1`: .. math:: \text{Edge-wise: } m_{e}^{(t+1)} = \phi \left( x_v^{(t)}, x_u^{(t)}, w_{e}^{(t)} \right) , ({u}, {v},{e}) \in \mathcal{E}. .. math:: \text{Node-wise: } x_v^{(t+1)} = \psi \left(x_v^{(t)}, \rho\left(\left\lbrace m_{e}^{(t+1)} : ({u}, {v},{e}) \in \mathcal{E} \right\rbrace \right) \right). In the above equations, :math:`\phi` is a **message function** defined on each edge to generate a message by combining the edge feature with the features of its incident nodes; :math:`\psi` is an **update function** defined on each node to update the node feature by aggregating its incoming messages using the **reduce function** :math:`\rho`. Roadmap ------- This chapter introduces DGL's message passing APIs, and how to efficiently use them on both nodes and edges. The last section of it explains how to implement message passing on heterogeneous graphs. * :ref:`guide-message-passing-api` * :ref:`guide-message-passing-efficient` * :ref:`guide-message-passing-part` * :ref:`guide-message-passing-heterograph` .. toctree:: :maxdepth: 1 :hidden: :glob: message-api message-efficient message-part message-heterograph ================================================ FILE: docs/source/guide/minibatch-custom-sampler.rst ================================================ .. _guide-minibatch-customizing-neighborhood-sampler: 6.4 Implementing Custom Graph Samplers ---------------------------------------------- Implementing custom samplers involves subclassing the :class:`dgl.graphbolt.SubgraphSampler` base class and implementing its abstract :attr:`sample_subgraphs` method. The :attr:`sample_subgraphs` method should take in seed nodes which are the nodes to sample neighbors from: .. code:: python def sample_subgraphs(self, seed_nodes): return input_nodes, sampled_subgraphs The method should return the input node IDs list and a list of subgraphs. Each subgraph is a :class:`~dgl.graphbolt.SampledSubgraph` object. Any other data that are required during sampling such as the graph structure, fanout size, etc. should be passed to the sampler via the constructor. The code below implements a classical neighbor sampler: .. code:: python @functional_datapipe("customized_sample_neighbor") class CustomizedNeighborSampler(dgl.graphbolt.SubgraphSampler): def __init__(self, datapipe, graph, fanouts): super().__init__(datapipe) self.graph = graph self.fanouts = fanouts def sample_subgraphs(self, seed_nodes): subgs = [] for fanout in reversed(self.fanouts): # Sample a fixed number of neighbors of the current seed nodes. input_nodes, sg = g.sample_neighbors(seed_nodes, fanout) subgs.insert(0, sg) seed_nodes = input_nodes return input_nodes, subgs To use this sampler with :class:`~dgl.graphbolt.DataLoader`: .. code:: python datapipe = gb.ItemSampler(train_set, batch_size=1024, shuffle=True) datapipe = datapipe.customized_sample_neighbor(g, [10, 10]) # 2 layers. datapipe = datapipe.fetch_feature(feature, node_feature_keys=["feat"]) datapipe = datapipe.copy_to(device) dataloader = gb.DataLoader(datapipe) for data in dataloader: input_features = data.node_features["feat"] output_labels = data.labels output_predictions = model(data.blocks, input_features) loss = compute_loss(output_labels, output_predictions) opt.zero_grad() loss.backward() opt.step() Sampler for Heterogeneous Graphs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To write a sampler for heterogeneous graphs, one needs to be aware that the argument `graph` is a heterogeneous graph while `seeds` could be a dictionary of ID tensors. Most of DGL's graph sampling operators (e.g., the ``sample_neighbors`` and ``to_block`` functions in the above example) can work on heterogeneous graph natively, so many samplers are automatically ready for heterogeneous graph. For example, the above ``CustomizedNeighborSampler`` can be used on heterogeneous graphs: .. code:: python import dgl.graphbolt as gb hg = gb.FusedCSCSamplingGraph() train_set = item_set = gb.HeteroItemSet( { "user": gb.ItemSet( (torch.arange(0, 5), torch.arange(5, 10)), names=("seeds", "labels"), ), "item": gb.ItemSet( (torch.arange(5, 10), torch.arange(10, 15)), names=("seeds", "labels"), ), } ) datapipe = gb.ItemSampler(train_set, batch_size=1024, shuffle=True) datapipe = datapipe.customized_sample_neighbor(g, [10, 10]) # 2 layers. datapipe = datapipe.fetch_feature( feature, node_feature_keys={"user": ["feat"], "item": ["feat"]} ) datapipe = datapipe.copy_to(device) dataloader = gb.DataLoader(datapipe) for data in dataloader: input_features = { ntype: data.node_features[(ntype, "feat")] for ntype in data.blocks[0].srctypes } output_labels = data.labels["user"] output_predictions = model(data.blocks, input_features)["user"] loss = compute_loss(output_labels, output_predictions) opt.zero_grad() loss.backward() opt.step() Exclude Edges After Sampling ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In some cases, we may want to exclude seed edges from the sampled subgraph. For example, in link prediction tasks, we want to exclude the edges in the training set from the sampled subgraph to prevent information leakage. To do so, we need to add an additional datapipe right after sampling as follows: .. code:: python datapipe = datapipe.customized_sample_neighbor(g, [10, 10]) # 2 layers. datapipe = datapipe.transform(gb.exclude_seed_edges) Please check the API page of :func:`~dgl.graphbolt.exclude_seed_edges` for more details. The above API is based on :meth:`~dgl.graphbolt.SampledSubgrahp.exclude_edges`. If you want to exclude edges from the sampled subgraph based on some other criteria, you could write your own transform function. Please check the method for reference. You could also refer to examples in `Link Prediction `__. ================================================ FILE: docs/source/guide/minibatch-edge.rst ================================================ .. _guide-minibatch-edge-classification-sampler: 6.2 Training GNN for Edge Classification with Neighborhood Sampling ---------------------------------------------------------------------- :ref:`(中文版) ` Training for edge classification/regression is somewhat similar to that of node classification/regression with several notable differences. Define a neighborhood sampler and data loader ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You can use the :ref:`same neighborhood samplers as node classification `. .. code:: python datapipe = datapipe.sample_neighbor(g, [10, 10]) # Or equivalently datapipe = dgl.graphbolt.NeighborSampler(datapipe, g, [10, 10]) The code for defining a data loader is also the same as that of node classification. The only difference is that it iterates over the edges(namely, node pairs) in the training set instead of the nodes. .. code:: python import dgl.graphbolt as gb device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') g = gb.SamplingGraph() seeds = torch.arange(0, 1000).reshape(-1, 2) labels = torch.randint(0, 2, (5,)) train_set = gb.ItemSet((seeds, labels), names=("seeds", "labels")) datapipe = gb.ItemSampler(train_set, batch_size=128, shuffle=True) datapipe = datapipe.sample_neighbor(g, [10, 10]) # 2 layers. # Or equivalently: # datapipe = gb.NeighborSampler(datapipe, g, [10, 10]) datapipe = datapipe.fetch_feature(feature, node_feature_keys=["feat"]) datapipe = datapipe.copy_to(device) dataloader = gb.DataLoader(datapipe) Iterating over the DataLoader will yield :class:`~dgl.graphbolt.MiniBatch` which contains a list of specially created graphs representing the computation dependencies on each layer. You can access the *message flow graphs* (MFGs) via `mini_batch.blocks`. .. code:: python mini_batch = next(iter(dataloader)) print(mini_batch.blocks) .. note:: See the :doc:`Stochastic Training Tutorial <../notebooks/stochastic_training/neighbor_sampling_overview.nblink>`__ for the concept of message flow graph. If you wish to develop your own neighborhood sampler or you want a more detailed explanation of the concept of MFGs, please refer to :ref:`guide-minibatch-customizing-neighborhood-sampler`. .. _guide-minibatch-edge-classification-sampler-exclude: Removing edges in the minibatch from the original graph for neighbor sampling ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ When training edge classification models, sometimes you wish to remove the edges appearing in the training data from the computation dependency as if they never existed. Otherwise, the model will “know” the fact that an edge exists between the two nodes, and potentially use it for advantage. Therefore in edge classification you sometimes would like to exclude the seed edges as well as their reverse edges from the sampled minibatch. You can use :func:`~dgl.graphbolt.exclude_seed_edges` alongside with :class:`~dgl.graphbolt.MiniBatchTransformer` to achieve this. .. code:: python import dgl.graphbolt as gb from functools import partial device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') g = gb.SamplingGraph() seeds = torch.arange(0, 1000).reshape(-1, 2) labels = torch.randint(0, 2, (5,)) train_set = gb.ItemSet((seeds, labels), names=("seeds", "labels")) datapipe = gb.ItemSampler(train_set, batch_size=128, shuffle=True) datapipe = datapipe.sample_neighbor(g, [10, 10]) # 2 layers. exclude_seed_edges = partial(gb.exclude_seed_edges, include_reverse_edges=True) datapipe = datapipe.transform(exclude_seed_edges) datapipe = datapipe.fetch_feature(feature, node_feature_keys=["feat"]) datapipe = datapipe.copy_to(device) dataloader = gb.DataLoader(datapipe) Adapt your model for minibatch training ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The edge classification model usually consists of two parts: - One part that obtains the representation of incident nodes. - The other part that computes the edge score from the incident node representations. The former part is exactly the same as :ref:`that from node classification ` and we can simply reuse it. The input is still the list of MFGs generated from a data loader provided by DGL, as well as the input features. .. code:: python class StochasticTwoLayerGCN(nn.Module): def __init__(self, in_features, hidden_features, out_features): super().__init__() self.conv1 = dglnn.GraphConv(in_features, hidden_features) self.conv2 = dglnn.GraphConv(hidden_features, out_features) def forward(self, blocks, x): x = F.relu(self.conv1(blocks[0], x)) x = F.relu(self.conv2(blocks[1], x)) return x The input to the latter part is usually the output from the former part, as well as the subgraph(node pairs) of the original graph induced by the edges in the minibatch. The subgraph is yielded from the same data loader. The following code shows an example of predicting scores on the edges by concatenating the incident node features and projecting it with a dense layer. .. code:: python class ScorePredictor(nn.Module): def __init__(self, num_classes, in_features): super().__init__() self.W = nn.Linear(2 * in_features, num_classes) def forward(self, seeds, x): src_x = x[seeds[:, 0]] dst_x = x[seeds[:, 1]] data = torch.cat([src_x, dst_x], 1) return self.W(data) The entire model will take the list of MFGs and the edges generated by the data loader, as well as the input node features as follows: .. code:: python class Model(nn.Module): def __init__(self, in_features, hidden_features, out_features, num_classes): super().__init__() self.gcn = StochasticTwoLayerGCN( in_features, hidden_features, out_features) self.predictor = ScorePredictor(num_classes, out_features) def forward(self, blocks, x, seeds): x = self.gcn(blocks, x) return self.predictor(seeds, x) DGL ensures that that the nodes in the edge subgraph are the same as the output nodes of the last MFG in the generated list of MFGs. Training Loop ~~~~~~~~~~~~~ The training loop is very similar to node classification. You can iterate over the dataloader and get a subgraph induced by the edges in the minibatch, as well as the list of MFGs necessary for computing their incident node representations. .. code:: python import torch.nn.functional as F model = Model(in_features, hidden_features, out_features, num_classes) model = model.to(device) opt = torch.optim.Adam(model.parameters()) for data in dataloader: blocks = data.blocks x = data.edge_features("feat") y_hat = model(data.blocks, x, data.compacted_seeds) loss = F.cross_entropy(data.labels, y_hat) opt.zero_grad() loss.backward() opt.step() For heterogeneous graphs ~~~~~~~~~~~~~~~~~~~~~~~~ The models computing the node representations on heterogeneous graphs can also be used for computing incident node representations for edge classification/regression. .. code:: python class StochasticTwoLayerRGCN(nn.Module): def __init__(self, in_feat, hidden_feat, out_feat, rel_names): super().__init__() self.conv1 = dglnn.HeteroGraphConv({ rel : dglnn.GraphConv(in_feat, hidden_feat, norm='right') for rel in rel_names }) self.conv2 = dglnn.HeteroGraphConv({ rel : dglnn.GraphConv(hidden_feat, out_feat, norm='right') for rel in rel_names }) def forward(self, blocks, x): x = self.conv1(blocks[0], x) x = self.conv2(blocks[1], x) return x For score prediction, the only implementation difference between the homogeneous graph and the heterogeneous graph is that we are looping over the edge types. .. code:: python class ScorePredictor(nn.Module): def __init__(self, num_classes, in_features): super().__init__() self.W = nn.Linear(2 * in_features, num_classes) def forward(self, seeds, x): scores = {} for etype in seeds.keys(): src, dst = seeds[etype].T data = torch.cat([x[etype][src], x[etype][dst]], 1) scores[etype] = self.W(data) return scores class Model(nn.Module): def __init__(self, in_features, hidden_features, out_features, num_classes, etypes): super().__init__() self.rgcn = StochasticTwoLayerRGCN( in_features, hidden_features, out_features, etypes) self.pred = ScorePredictor(num_classes, out_features) def forward(self, seeds, blocks, x): x = self.rgcn(blocks, x) return self.pred(seeds, x) Data loader definition is almost identical to that of homogeneous graph. The only difference is that the train_set is now an instance of :class:`~dgl.graphbolt.HeteroItemSet` instead of :class:`~dgl.graphbolt.ItemSet`. .. code:: python import dgl.graphbolt as gb device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') g = gb.SamplingGraph() seeds = torch.arange(0, 1000).reshape(-1, 2) labels = torch.randint(0, 3, (1000,)) seeds_labels = { "user:like:item": gb.ItemSet( (seeds, labels), names=("seeds", "labels") ), "user:follow:user": gb.ItemSet( (seeds, labels), names=("seeds", "labels") ), } train_set = gb.HeteroItemSet(seeds_labels) datapipe = gb.ItemSampler(train_set, batch_size=128, shuffle=True) datapipe = datapipe.sample_neighbor(g, [10, 10]) # 2 layers. datapipe = datapipe.fetch_feature( feature, node_feature_keys={"item": ["feat"], "user": ["feat"]} ) datapipe = datapipe.copy_to(device) dataloader = gb.DataLoader(datapipe) Things become a little different if you wish to exclude the reverse edges on heterogeneous graphs. On heterogeneous graphs, reverse edges usually have a different edge type from the edges themselves, in order to differentiate the “forward” and “backward” relationships (e.g. ``follow`` and ``followed_by`` are reverse relations of each other, ``like`` and ``liked_by`` are reverse relations of each other, etc.). If each edge in a type has a reverse edge with the same ID in another type, you can specify the mapping between edge types and their reverse types. The way to exclude the edges in the minibatch as well as their reverse edges then goes as follows. .. code:: python exclude_seed_edges = partial( gb.exclude_seed_edges, include_reverse_edges=True, reverse_etypes_mapping={ "user:like:item": "item:liked_by:user", "user:follow:user": "user:followed_by:user", }, ) datapipe = datapipe.transform(exclude_seed_edges) The training loop is again almost the same as that on homogeneous graph, except for the implementation of ``compute_loss`` that will take in two dictionaries of node types and predictions here. .. code:: python import torch.nn.functional as F model = Model(in_features, hidden_features, out_features, num_classes, etypes) model = model.to(device) opt = torch.optim.Adam(model.parameters()) for data in dataloader: blocks = data.blocks x = data.edge_features(("user:like:item", "feat")) y_hat = model(data.blocks, x, data.compacted_seeds) loss = F.cross_entropy(data.labels, y_hat) opt.zero_grad() loss.backward() opt.step() ================================================ FILE: docs/source/guide/minibatch-gpu-sampling.rst ================================================ .. _guide-minibatch-gpu-sampling: 6.8 Using GPU for Neighborhood Sampling --------------------------------------- .. note:: GraphBolt does not support GPU-based neighborhood sampling yet. So this guide is utilizing :class:`~dgl.dataloading.DataLoader` for illustration. DGL since 0.7 has been supporting GPU-based neighborhood sampling, which has a significant speed advantage over CPU-based neighborhood sampling. If you estimate that your graph can fit onto GPU and your model does not take a lot of GPU memory, then it is best to put the graph onto GPU memory and use GPU-based neighbor sampling. For example, `OGB Products `_ has 2.4M nodes and 61M edges. The graph takes less than 1GB since the memory consumption of a graph depends on the number of edges. Therefore it is entirely possible to fit the whole graph onto GPU. Using GPU-based neighborhood sampling in DGL data loaders ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ One can use GPU-based neighborhood sampling with DGL data loaders via: * Put the graph onto GPU. * Put the ``train_nid`` onto GPU. * Set ``device`` argument to a GPU device. * Set ``num_workers`` argument to 0, because CUDA does not allow multiple processes accessing the same context. All the other arguments for the :class:`~dgl.dataloading.DataLoader` can be the same as the other user guides and tutorials. .. code:: python g = g.to('cuda:0') train_nid = train_nid.to('cuda:0') dataloader = dgl.dataloading.DataLoader( g, # The graph must be on GPU. train_nid, # train_nid must be on GPU. sampler, device=torch.device('cuda:0'), # The device argument must be GPU. num_workers=0, # Number of workers must be 0. batch_size=1000, drop_last=False, shuffle=True) .. note:: GPU-based neighbor sampling also works for custom neighborhood samplers as long as (1) your sampler is subclassed from :class:`~dgl.dataloading.BlockSampler`, and (2) your sampler entirely works on GPU. Using CUDA UVA-based neighborhood sampling in DGL data loaders ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. note:: New feature introduced in DGL 0.8. For the case where the graph is too large to fit onto the GPU memory, we introduce the CUDA UVA (Unified Virtual Addressing)-based sampling, in which GPUs perform the sampling on the graph pinned in CPU memory via zero-copy access. You can enable UVA-based neighborhood sampling in DGL data loaders via: * Put the ``train_nid`` onto GPU. * Set ``device`` argument to a GPU device. * Set ``num_workers`` argument to 0, because CUDA does not allow multiple processes accessing the same context. * Set ``use_uva=True``. All the other arguments for the :class:`~dgl.dataloading.DataLoader` can be the same as the other user guides and tutorials. .. code:: python train_nid = train_nid.to('cuda:0') dataloader = dgl.dataloading.DataLoader( g, train_nid, # train_nid must be on GPU. sampler, device=torch.device('cuda:0'), # The device argument must be GPU. num_workers=0, # Number of workers must be 0. batch_size=1000, drop_last=False, shuffle=True, use_uva=True) # Set use_uva=True UVA-based sampling is the recommended solution for mini-batch training on large graphs, especially for multi-GPU training. .. note:: To use UVA-based sampling in multi-GPU training, you should first materialize all the necessary sparse formats of the graph before spawning training processes. Refer to our `GraphSAGE example `_ for more details. UVA and GPU support for PinSAGESampler/RandomWalkNeighborSampler ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PinSAGESampler and RandomWalkNeighborSampler support UVA and GPU sampling. You can enable them via: * Pin the graph (for UVA sampling) or put the graph onto GPU (for GPU sampling). * Put the ``train_nid`` onto GPU. .. code:: python g = dgl.heterograph({ ('item', 'bought-by', 'user'): ([0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 2, 3, 2, 3]), ('user', 'bought', 'item'): ([0, 1, 0, 1, 2, 3, 2, 3], [0, 0, 1, 1, 2, 2, 3, 3])}) # UVA setup # g.create_formats_() # g.pin_memory_() # GPU setup device = torch.device('cuda:0') g = g.to(device) sampler1 = dgl.sampling.PinSAGESampler(g, 'item', 'user', 4, 0.5, 3, 2) sampler2 = dgl.sampling.RandomWalkNeighborSampler(g, 4, 0.5, 3, 2, ['bought-by', 'bought']) train_nid = torch.tensor([0, 2], dtype=g.idtype, device=device) sampler1(train_nid) sampler2(train_nid) Using GPU-based neighbor sampling with DGL functions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You can build your own GPU sampling pipelines with the following functions that support operating on GPU: * :func:`dgl.sampling.sample_neighbors` * :func:`dgl.sampling.random_walk` Subgraph extraction ops: * :func:`dgl.node_subgraph` * :func:`dgl.edge_subgraph` * :func:`dgl.in_subgraph` * :func:`dgl.out_subgraph` Graph transform ops for subgraph construction: * :func:`dgl.to_block` * :func:`dgl.compact_graph` ================================================ FILE: docs/source/guide/minibatch-inference.rst ================================================ .. _guide-minibatch-inference: 6.7 Exact Offline Inference on Large Graphs ------------------------------------------------------ :ref:`(中文版) ` Both subgraph sampling and neighborhood sampling are to reduce the memory and time consumption for training GNNs with GPUs. When performing inference it is usually better to truly aggregate over all neighbors instead to get rid of the randomness introduced by sampling. However, full-graph forward propagation is usually infeasible on GPU due to limited memory, and slow on CPU due to slow computation. This section introduces the methodology of full-graph forward propagation with limited GPU memory via minibatch and neighborhood sampling. The inference algorithm is different from the training algorithm, as the representations of all nodes should be computed layer by layer, starting from the first layer. Specifically, for a particular layer, we need to compute the output representations of all nodes from this GNN layer in minibatches. The consequence is that the inference algorithm will have an outer loop iterating over the layers, and an inner loop iterating over the minibatches of nodes. In contrast, the training algorithm has an outer loop iterating over the minibatches of nodes, and an inner loop iterating over the layers for both neighborhood sampling and message passing. The following animation shows how the computation would look like (note that for every layer only the first three minibatches are drawn). .. figure:: https://data.dgl.ai/asset/image/guide_6_6_0.gif :alt: Imgur Implementing Offline Inference ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Consider the two-layer GCN we have mentioned in Section 6.1 :ref:`guide-minibatch-node-classification-model`. The way to implement offline inference still involves using :class:`~dgl.graphbolt.NeighborSampler`, but sampling for only one layer at a time. .. code:: python datapipe = gb.ItemSampler(all_nodes_set, batch_size=1024, shuffle=True) datapipe = datapipe.sample_neighbor(g, [-1]) # 1 layers. datapipe = datapipe.fetch_feature(feature, node_feature_keys=["feat"]) datapipe = datapipe.copy_to(device) dataloader = gb.DataLoader(datapipe) Note that offline inference is implemented as a method of the GNN module because the computation on one layer depends on how messages are aggregated and combined as well. .. code:: python class SAGE(nn.Module): def __init__(self, in_size, hidden_size, out_size): super().__init__() self.layers = nn.ModuleList() # Three-layer GraphSAGE-mean. self.layers.append(dglnn.SAGEConv(in_size, hidden_size, "mean")) self.layers.append(dglnn.SAGEConv(hidden_size, hidden_size, "mean")) self.layers.append(dglnn.SAGEConv(hidden_size, out_size, "mean")) self.dropout = nn.Dropout(0.5) self.hidden_size = hidden_size self.out_size = out_size def forward(self, blocks, x): hidden_x = x for layer_idx, (layer, block) in enumerate(zip(self.layers, blocks)): hidden_x = layer(block, hidden_x) is_last_layer = layer_idx == len(self.layers) - 1 if not is_last_layer: hidden_x = F.relu(hidden_x) hidden_x = self.dropout(hidden_x) return hidden_x def inference(self, graph, features, dataloader, device): """ Offline inference with this module """ feature = features.read("node", None, "feat") # Compute representations layer by layer for layer_idx, layer in enumerate(self.layers): is_last_layer = layer_idx == len(self.layers) - 1 y = torch.empty( graph.total_num_nodes, self.out_size if is_last_layer else self.hidden_size, dtype=torch.float32, device=buffer_device, pin_memory=pin_memory, ) feature = feature.to(device) for step, data in tqdm(enumerate(dataloader)): x = feature[data.input_nodes] hidden_x = layer(data.blocks[0], x) # len(blocks) = 1 if not is_last_layer: hidden_x = F.relu(hidden_x) hidden_x = self.dropout(hidden_x) # By design, our output nodes are contiguous. y[ data.seeds[0] : data.seeds[-1] + 1 ] = hidden_x.to(device) feature = y return y Note that for the purpose of computing evaluation metric on the validation set for model selection we usually don’t have to compute exact offline inference. The reason is that we need to compute the representation for every single node on every single layer, which is usually very costly especially in the semi-supervised regime with a lot of unlabeled data. Neighborhood sampling will work fine for model selection and validation. One can see `GraphSAGE `__ and `RGCN `__ for examples of offline inference. ================================================ FILE: docs/source/guide/minibatch-link.rst ================================================ .. _guide-minibatch-link-classification-sampler: 6.3 Training GNN for Link Prediction with Neighborhood Sampling -------------------------------------------------------------------- :ref:`(中文版) ` Define a data loader with neighbor and negative sampling ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You can still use the same data loader as the one in node/edge classification. The only difference is that you need to add an additional stage `negative sampling` before neighbor sampling stage. The following data loader will pick 5 negative destination nodes uniformly for each source node of an edge. .. code:: python datapipe = datapipe.sample_uniform_negative(graph, 5) The whole data loader pipeline is as follows: .. code:: python datapipe = gb.ItemSampler(itemset, batch_size=1024, shuffle=True) datapipe = datapipe.sample_uniform_negative(graph, 5) datapipe = datapipe.sample_neighbor(g, [10, 10]) # 2 layers. datapipe = datapipe.transform(gb.exclude_seed_edges) datapipe = datapipe.fetch_feature(feature, node_feature_keys=["feat"]) datapipe = datapipe.copy_to(device) dataloader = gb.DataLoader(datapipe) For the details about the builtin uniform negative sampler please see :class:`~dgl.graphbolt.UniformNegativeSampler`. You can also give your own negative sampler function, as long as it inherits from :class:`~dgl.graphbolt.NegativeSampler` and overrides the :meth:`~dgl.graphbolt.NegativeSampler._sample_with_etype` method which takes in the node pairs in minibatch, and returns the negative node pairs back. The following gives an example of custom negative sampler that samples negative destination nodes according to a probability distribution proportional to a power of degrees. .. code:: python @functional_datapipe("customized_sample_negative") class CustomizedNegativeSampler(dgl.graphbolt.NegativeSampler): def __init__(self, datapipe, k, node_degrees): super().__init__(datapipe, k) # caches the probability distribution self.weights = node_degrees ** 0.75 self.k = k def _sample_with_etype(self, seeds, etype=None): src, _ = seeds.T src = src.repeat_interleave(self.k) dst = self.weights.multinomial(len(src), replacement=True) return src, dst datapipe = datapipe.customized_sample_negative(5, node_degrees) Define a GraphSAGE model for minibatch training ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: python class SAGE(nn.Module): def __init__(self, in_size, hidden_size): super().__init__() self.layers = nn.ModuleList() self.layers.append(dglnn.SAGEConv(in_size, hidden_size, "mean")) self.layers.append(dglnn.SAGEConv(hidden_size, hidden_size, "mean")) self.layers.append(dglnn.SAGEConv(hidden_size, hidden_size, "mean")) self.hidden_size = hidden_size self.predictor = nn.Sequential( nn.Linear(hidden_size, hidden_size), nn.ReLU(), nn.Linear(hidden_size, hidden_size), nn.ReLU(), nn.Linear(hidden_size, 1), ) def forward(self, blocks, x): hidden_x = x for layer_idx, (layer, block) in enumerate(zip(self.layers, blocks)): hidden_x = layer(block, hidden_x) is_last_layer = layer_idx == len(self.layers) - 1 if not is_last_layer: hidden_x = F.relu(hidden_x) return hidden_x When a negative sampler is provided, the data loader will generate positive and negative node pairs for each minibatch besides the *Message Flow Graphs* (MFGs). Use `compacted_seeds` and `labels` to get compact node pairs and corresponding labels. Training loop ~~~~~~~~~~~~~ The training loop simply involves iterating over the data loader and feeding in the graphs as well as the input features to the model defined above. .. code:: python optimizer = torch.optim.Adam(model.parameters(), lr=0.01) for epoch in tqdm.trange(args.epochs): model.train() total_loss = 0 start_epoch_time = time.time() for step, data in enumerate(dataloader): # Unpack MiniBatch. compacted_seeds = data.compacted_seeds.T labels = data.labels node_feature = data.node_features["feat"] # Convert sampled subgraphs to DGL blocks. blocks = data.blocks # Get the embeddings of the input nodes. y = model(blocks, node_feature) logits = model.predictor( y[compacted_seeds[0]] * y[compacted_seeds[1]] ).squeeze() # Compute loss. loss = F.binary_cross_entropy_with_logits(logits, labels) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.item() end_epoch_time = time.time() DGL provides the `unsupervised learning GraphSAGE `__ that shows an example of link prediction on homogeneous graphs. For heterogeneous graphs ~~~~~~~~~~~~~~~~~~~~~~~~ The previous model could be easily extended to heterogeneous graphs. The only difference is that you need to use :class:`~dgl.nn.HeteroGraphConv` to wrap :class:`~dgl.nn.SAGEConv` according to edge types. .. code:: python class SAGE(nn.Module): def __init__(self, in_size, hidden_size): super().__init__() self.layers = nn.ModuleList() self.layers.append(dglnn.HeteroGraphConv({ rel : dglnn.SAGEConv(in_size, hidden_size, "mean") for rel in rel_names })) self.layers.append(dglnn.HeteroGraphConv({ rel : dglnn.SAGEConv(hidden_size, hidden_size, "mean") for rel in rel_names })) self.layers.append(dglnn.HeteroGraphConv({ rel : dglnn.SAGEConv(hidden_size, hidden_size, "mean") for rel in rel_names })) self.hidden_size = hidden_size self.predictor = nn.Sequential( nn.Linear(hidden_size, hidden_size), nn.ReLU(), nn.Linear(hidden_size, hidden_size), nn.ReLU(), nn.Linear(hidden_size, 1), ) def forward(self, blocks, x): hidden_x = x for layer_idx, (layer, block) in enumerate(zip(self.layers, blocks)): hidden_x = layer(block, hidden_x) is_last_layer = layer_idx == len(self.layers) - 1 if not is_last_layer: hidden_x = F.relu(hidden_x) return hidden_x Data loader definition is also very similar to that for homogeneous graph. The only difference is that you need to give edge types for feature fetching. .. code:: python datapipe = gb.ItemSampler(itemset, batch_size=1024, shuffle=True) datapipe = datapipe.sample_uniform_negative(graph, 5) datapipe = datapipe.sample_neighbor(g, [10, 10]) # 2 layers. datapipe = datapipe.transform(gb.exclude_seed_edges) datapipe = datapipe.fetch_feature( feature, node_feature_keys={"user": ["feat"], "item": ["feat"]} ) datapipe = datapipe.copy_to(device) dataloader = gb.DataLoader(datapipe) If you want to give your own negative sampling function, just inherit from the :class:`~dgl.graphbolt.NegativeSampler` class and override the :meth:`~dgl.graphbolt.NegativeSampler._sample_with_etype` method. .. code:: python @functional_datapipe("customized_sample_negative") class CustomizedNegativeSampler(dgl.graphbolt.NegativeSampler): def __init__(self, datapipe, k, node_degrees): super().__init__(datapipe, k) # caches the probability distribution self.weights = { etype: node_degrees[etype] ** 0.75 for etype in node_degrees } self.k = k def _sample_with_etype(self, seeds, etype): src, _ = seeds.T src = src.repeat_interleave(self.k) dst = self.weights[etype].multinomial(len(src), replacement=True) return src, dst datapipe = datapipe.customized_sample_negative(5, node_degrees) For heterogeneous graphs, node pairs are grouped by edge types. The training loop is again almost the same as that on homogeneous graph, except for computing loss on specific edge type. .. code:: python optimizer = torch.optim.Adam(model.parameters(), lr=0.01) category = "user" for epoch in tqdm.trange(args.epochs): model.train() total_loss = 0 start_epoch_time = time.time() for step, data in enumerate(dataloader): # Unpack MiniBatch. compacted_seeds = data.compacted_seeds labels = data.labels node_features = { ntype: data.node_features[(ntype, "feat")] for ntype in data.blocks[0].srctypes } # Convert sampled subgraphs to DGL blocks. blocks = data.blocks # Get the embeddings of the input nodes. y = model(blocks, node_feature) logits = model.predictor( y[category][compacted_pairs[category][:, 0]] * y[category][compacted_pairs[category][:, 1]] ).squeeze() # Compute loss. loss = F.binary_cross_entropy_with_logits(logits, labels[category]) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.item() end_epoch_time = time.time() ================================================ FILE: docs/source/guide/minibatch-nn.rst ================================================ .. _guide-minibatch-custom-gnn-module: 6.6 Implementing Custom GNN Module for Mini-batch Training ------------------------------------------------------------- :ref:`(中文版) ` .. note:: :doc:`This tutorial ` has similar content to this section for the homogeneous graph case. If you were familiar with how to write a custom GNN module for updating the entire graph for homogeneous or heterogeneous graphs (see :ref:`guide-nn`), the code for computing on MFGs is similar, with the exception that the nodes are divided into input nodes and output nodes. For example, consider the following custom graph convolution module code. Note that it is not necessarily among the most efficient implementations - they only serve for an example of how a custom GNN module could look like. .. code:: python class CustomGraphConv(nn.Module): def __init__(self, in_feats, out_feats): super().__init__() self.W = nn.Linear(in_feats * 2, out_feats) def forward(self, g, h): with g.local_scope(): g.ndata['h'] = h g.update_all(fn.copy_u('h', 'm'), fn.mean('m', 'h_neigh')) return self.W(torch.cat([g.ndata['h'], g.ndata['h_neigh']], 1)) If you have a custom message passing NN module for the full graph, and you would like to make it work for MFGs, you only need to rewrite the forward function as follows. Note that the corresponding statements from the full-graph implementation are commented; you can compare the original statements with the new statements. .. code:: python class CustomGraphConv(nn.Module): def __init__(self, in_feats, out_feats): super().__init__() self.W = nn.Linear(in_feats * 2, out_feats) # h is now a pair of feature tensors for input and output nodes, instead of # a single feature tensor. # def forward(self, g, h): def forward(self, block, h): # with g.local_scope(): with block.local_scope(): # g.ndata['h'] = h h_src = h h_dst = h[:block.number_of_dst_nodes()] block.srcdata['h'] = h_src block.dstdata['h'] = h_dst # g.update_all(fn.copy_u('h', 'm'), fn.mean('m', 'h_neigh')) block.update_all(fn.copy_u('h', 'm'), fn.mean('m', 'h_neigh')) # return self.W(torch.cat([g.ndata['h'], g.ndata['h_neigh']], 1)) return self.W(torch.cat( [block.dstdata['h'], block.dstdata['h_neigh']], 1)) In general, you need to do the following to make your NN module work for MFGs. - Obtain the features for output nodes from the input features by slicing the first few rows. The number of rows can be obtained by :meth:`block.number_of_dst_nodes `. - Replace :attr:`g.ndata ` with either :attr:`block.srcdata ` for features on input nodes or :attr:`block.dstdata ` for features on output nodes, if the original graph has only one node type. - Replace :attr:`g.nodes ` with either :attr:`block.srcnodes ` for features on input nodes or :attr:`block.dstnodes ` for features on output nodes, if the original graph has multiple node types. - Replace :meth:`g.num_nodes ` with either :meth:`block.number_of_src_nodes ` or :meth:`block.number_of_dst_nodes ` for the number of input nodes or output nodes respectively. Heterogeneous graphs ~~~~~~~~~~~~~~~~~~~~ For heterogeneous graph the way of writing custom GNN modules is similar. For instance, consider the following module that work on full graph. .. code:: python class CustomHeteroGraphConv(nn.Module): def __init__(self, g, in_feats, out_feats): super().__init__() self.Ws = nn.ModuleDict() for etype in g.canonical_etypes: utype, _, vtype = etype self.Ws[etype] = nn.Linear(in_feats[utype], out_feats[vtype]) for ntype in g.ntypes: self.Vs[ntype] = nn.Linear(in_feats[ntype], out_feats[ntype]) def forward(self, g, h): with g.local_scope(): for ntype in g.ntypes: g.nodes[ntype].data['h_dst'] = self.Vs[ntype](h[ntype]) g.nodes[ntype].data['h_src'] = h[ntype] for etype in g.canonical_etypes: utype, _, vtype = etype g.update_all( fn.copy_u('h_src', 'm'), fn.mean('m', 'h_neigh'), etype=etype) g.nodes[vtype].data['h_dst'] = g.nodes[vtype].data['h_dst'] + \ self.Ws[etype](g.nodes[vtype].data['h_neigh']) return {ntype: g.nodes[ntype].data['h_dst'] for ntype in g.ntypes} For ``CustomHeteroGraphConv``, the principle is to replace ``g.nodes`` with ``g.srcnodes`` or ``g.dstnodes`` depend on whether the features serve for input or output. .. code:: python class CustomHeteroGraphConv(nn.Module): def __init__(self, g, in_feats, out_feats): super().__init__() self.Ws = nn.ModuleDict() for etype in g.canonical_etypes: utype, _, vtype = etype self.Ws[etype] = nn.Linear(in_feats[utype], out_feats[vtype]) for ntype in g.ntypes: self.Vs[ntype] = nn.Linear(in_feats[ntype], out_feats[ntype]) def forward(self, g, h): with g.local_scope(): for ntype in g.ntypes: h_src, h_dst = h[ntype] g.dstnodes[ntype].data['h_dst'] = self.Vs[ntype](h[ntype]) g.srcnodes[ntype].data['h_src'] = h[ntype] for etype in g.canonical_etypes: utype, _, vtype = etype g.update_all( fn.copy_u('h_src', 'm'), fn.mean('m', 'h_neigh'), etype=etype) g.dstnodes[vtype].data['h_dst'] = \ g.dstnodes[vtype].data['h_dst'] + \ self.Ws[etype](g.dstnodes[vtype].data['h_neigh']) return {ntype: g.dstnodes[ntype].data['h_dst'] for ntype in g.ntypes} Writing modules that work on homogeneous graphs, bipartite graphs, and MFGs ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ All message passing modules in DGL work on homogeneous graphs, unidirectional bipartite graphs (that have two node types and one edge type), and a MFG with one edge type. Essentially, the input graph and feature of a builtin DGL neural network module must satisfy either of the following cases. - If the input feature is a pair of tensors, then the input graph must be unidirectional bipartite. - If the input feature is a single tensor and the input graph is a MFG, DGL will automatically set the feature on the output nodes as the first few rows of the input node features. - If the input feature must be a single tensor and the input graph is not a MFG, then the input graph must be homogeneous. For example, the following is simplified from the PyTorch implementation of :class:`dgl.nn.pytorch.SAGEConv` (also available in MXNet and Tensorflow) (removing normalization and dealing with only mean aggregation etc.). .. code:: python import dgl.function as fn class SAGEConv(nn.Module): def __init__(self, in_feats, out_feats): super().__init__() self.W = nn.Linear(in_feats * 2, out_feats) def forward(self, g, h): if isinstance(h, tuple): h_src, h_dst = h elif g.is_block: h_src = h h_dst = h[:g.number_of_dst_nodes()] else: h_src = h_dst = h g.srcdata['h'] = h_src g.dstdata['h'] = h_dst g.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h_neigh')) return F.relu( self.W(torch.cat([g.dstdata['h'], g.dstdata['h_neigh']], 1))) :ref:`guide-nn` also provides a walkthrough on :class:`dgl.nn.pytorch.SAGEConv`, which works on unidirectional bipartite graphs, homogeneous graphs, and MFGs. ================================================ FILE: docs/source/guide/minibatch-node.rst ================================================ .. _guide-minibatch-node-classification-sampler: 6.1 Training GNN for Node Classification with Neighborhood Sampling ----------------------------------------------------------------------- :ref:`(中文版) ` To make your model been trained stochastically, you need to do the followings: - Define a neighborhood sampler. - Adapt your model for minibatch training. - Modify your training loop. The following sub-subsections address these steps one by one. Define a neighborhood sampler and data loader ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DGL provides several neighborhood sampler classes that generates the computation dependencies needed for each layer given the nodes we wish to compute on. The simplest neighborhood sampler is :class:`~dgl.graphbolt.NeighborSampler` or the equivalent function-like interface :func:`~dgl.graphbolt.sample_neighbor` which makes the node gather messages from its neighbors. To use a sampler provided by DGL, one also need to combine it with :class:`~dgl.graphbolt.DataLoader`, which iterates over a set of indices (nodes in this case) in minibatches. For example, the following code creates a DataLoader that iterates over the training node ID set of ``ogbn-arxiv`` in batches, putting the list of generated MFGs onto GPU. .. code:: python import dgl import dgl.graphbolt as gb import dgl.nn as dglnn import torch import torch.nn as nn import torch.nn.functional as F device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') dataset = gb.BuiltinDataset("ogbn-arxiv").load() g = dataset.graph feature = dataset.feature train_set = dataset.tasks[0].train_set datapipe = gb.ItemSampler(train_set, batch_size=1024, shuffle=True) datapipe = datapipe.sample_neighbor(g, [10, 10]) # 2 layers. # Or equivalently: # datapipe = gb.NeighborSampler(datapipe, g, [10, 10]) datapipe = datapipe.fetch_feature(feature, node_feature_keys=["feat"]) datapipe = datapipe.copy_to(device) dataloader = gb.DataLoader(datapipe) Iterating over the DataLoader will yield :class:`~dgl.graphbolt.MiniBatch` which contains a list of specially created graphs representing the computation dependencies on each layer. In order to train with DGL, you can access the *message flow graphs* (MFGs) by calling `mini_batch.blocks`. .. code:: python mini_batch = next(iter(dataloader)) print(mini_batch.blocks) .. note:: See the `Stochastic Training Tutorial <../notebooks/stochastic_training/neighbor_sampling_overview.nblink>`__ for the concept of message flow graph. If you wish to develop your own neighborhood sampler or you want a more detailed explanation of the concept of MFGs, please refer to :ref:`guide-minibatch-customizing-neighborhood-sampler`. .. _guide-minibatch-node-classification-model: Adapt your model for minibatch training ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If your message passing modules are all provided by DGL, the changes required to adapt your model to minibatch training is minimal. Take a multi-layer GCN as an example. If your model on full graph is implemented as follows: .. code:: python class TwoLayerGCN(nn.Module): def __init__(self, in_features, hidden_features, out_features): super().__init__() self.conv1 = dglnn.GraphConv(in_features, hidden_features) self.conv2 = dglnn.GraphConv(hidden_features, out_features) def forward(self, g, x): x = F.relu(self.conv1(g, x)) x = F.relu(self.conv2(g, x)) return x Then all you need is to replace ``g`` with ``blocks`` generated above. .. code:: python class StochasticTwoLayerGCN(nn.Module): def __init__(self, in_features, hidden_features, out_features): super().__init__() self.conv1 = dgl.nn.GraphConv(in_features, hidden_features) self.conv2 = dgl.nn.GraphConv(hidden_features, out_features) def forward(self, blocks, x): x = F.relu(self.conv1(blocks[0], x)) x = F.relu(self.conv2(blocks[1], x)) return x The DGL ``GraphConv`` modules above accepts an element in ``blocks`` generated by the data loader as an argument. :ref:`The API reference of each NN module ` will tell you whether it supports accepting a MFG as an argument. If you wish to use your own message passing module, please refer to :ref:`guide-minibatch-custom-gnn-module`. Training Loop ~~~~~~~~~~~~~ The training loop simply consists of iterating over the dataset with the customized batching iterator. During each iteration that yields :class:`~dgl.graphbolt.MiniBatch`, we: 1. Access the node features corresponding to the input nodes via ``data.node_features["feat"]``. These features are already moved to the target device (CPU or GPU) by the data loader. 2. Access the node labels corresponding to the output nodes via ``data.labels``. These labels are already moved to the target device (CPU or GPU) by the data loader. 3. Feed the list of MFGs and the input node features to the multilayer GNN and get the outputs. 4. Compute the loss and backpropagate. .. code:: python model = StochasticTwoLayerGCN(in_features, hidden_features, out_features) model = model.to(device) opt = torch.optim.Adam(model.parameters()) for data in dataloader: input_features = data.node_features["feat"] output_labels = data.labels output_predictions = model(data.blocks, input_features) loss = compute_loss(output_labels, output_predictions) opt.zero_grad() loss.backward() opt.step() DGL provides an end-to-end stochastic training example `GraphSAGE implementation `__. For heterogeneous graphs ~~~~~~~~~~~~~~~~~~~~~~~~ Training a graph neural network for node classification on heterogeneous graph is similar. For instance, we have previously seen :ref:`how to train a 2-layer RGCN on full graph `. The code for RGCN implementation on minibatch training looks very similar to that (with self-loops, non-linearity and basis decomposition removed for simplicity): .. code:: python class StochasticTwoLayerRGCN(nn.Module): def __init__(self, in_feat, hidden_feat, out_feat, rel_names): super().__init__() self.conv1 = dglnn.HeteroGraphConv({ rel : dglnn.GraphConv(in_feat, hidden_feat, norm='right') for rel in rel_names }) self.conv2 = dglnn.HeteroGraphConv({ rel : dglnn.GraphConv(hidden_feat, out_feat, norm='right') for rel in rel_names }) def forward(self, blocks, x): x = self.conv1(blocks[0], x) x = self.conv2(blocks[1], x) return x The samplers provided by DGL also support heterogeneous graphs. For example, one can still use the provided :class:`~dgl.graphbolt.NeighborSampler` class and :class:`~dgl.graphbolt.DataLoader` class for stochastic training. The only difference is that the itemset is now an instance of :class:`~dgl.graphbolt.HeteroItemSet` which is a dictionary of node types to node IDs. .. code:: python device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') dataset = gb.BuiltinDataset("ogbn-mag").load() g = dataset.graph feature = dataset.feature train_set = dataset.tasks[0].train_set datapipe = gb.ItemSampler(train_set, batch_size=1024, shuffle=True) datapipe = datapipe.sample_neighbor(g, [10, 10]) # 2 layers. # Or equivalently: # datapipe = gb.NeighborSampler(datapipe, g, [10, 10]) # For heterogeneous graphs, we need to specify the node feature keys # for each node type. datapipe = datapipe.fetch_feature( feature, node_feature_keys={"author": ["feat"], "paper": ["feat"]} ) datapipe = datapipe.copy_to(device) dataloader = gb.DataLoader(datapipe) The training loop is almost the same as that of homogeneous graphs, except for the implementation of ``compute_loss`` that will take in two dictionaries of node types and predictions here. .. code:: python model = StochasticTwoLayerRGCN(in_features, hidden_features, out_features, etypes) model = model.to(device) opt = torch.optim.Adam(model.parameters()) for data in dataloader: # For heterogeneous graphs, we need to specify the node types and # feature name when accessing the node features. So does the labels. input_features = { "author": data.node_features[("author", "feat")], "paper": data.node_features[("paper", "feat")] } output_labels = data.labels["paper"] output_predictions = model(data.blocks, input_features) loss = compute_loss(output_labels, output_predictions) opt.zero_grad() loss.backward() opt.step() DGL provides an end-to-end stochastic training example `RGCN implementation `__. ================================================ FILE: docs/source/guide/minibatch-parallelism.rst ================================================ .. _guide-minibatch-parallelism: 6.9 Data Loading Parallelism ----------------------- In minibatch training of GNNs, we usually need to cover several stages to generate a minibatch, including: * Iterate over item set and generate minibatch seeds in batch size. * Sample negative items for each seed from graph. * Sample neighbors for each seed from graph. * Exclude seed edges from the sampled subgraphs. * Fetch node and edge features for the sampled subgraphs. * Copy the MiniBatches to the target device. .. code:: python datapipe = gb.ItemSampler(itemset, batch_size=1024, shuffle=True) datapipe = datapipe.sample_uniform_negative(g, 5) datapipe = datapipe.sample_neighbor(g, [10, 10]) # 2 layers. datapipe = datapipe.transform(gb.exclude_seed_edges) datapipe = datapipe.fetch_feature(feature, node_feature_keys=["feat"]) datapipe = datapipe.copy_to(device) dataloader = gb.DataLoader(datapipe) All these stages are implemented in separate `IterableDataPipe `__ and stacked together with `PyTorch DataLoader `__. This design allows us to easily customize the data loading process by chaining different data pipes together. For example, if we want to sample negative items for each seed from graph, we can simply chain the :class:`~dgl.graphbolt.NegativeSampler` after the :class:`~dgl.graphbolt.ItemSampler`. But simply chaining data pipes together incurs performance overheads as various hardware resources such as CPU, GPU, PCIe, etc. are utilized by different stages. As a result, the data loading mechanism is optimized to minimize the overheads and achieve the best performance. In specific, GraphBolt wraps the data pipes before ``fetch_feature`` with multiprocessing which enables multiple processes to run in parallel. As for ``fetch_feature`` data pipe, we keep it running in the main process to avoid data movement overheads between processes. What's more, in order to overlap the data movement and model computation, we wrap data pipes before ``copy_to`` with `torchdata.datapipes.iter.Perfetcher `__ which prefetches elements from previous data pipes and puts them into a buffer. Such prefetching is totally transparent to users and requires no extra code. It brings a significant performance boost to minibatch training of GNNs. Please refer to the source code of :class:`~dgl.graphbolt.DataLoader` for more details. ================================================ FILE: docs/source/guide/minibatch-sparse.rst ================================================ .. _guide-minibatch-sparse: 6.5 Training GNN with DGL sparse --------------------------------- This tutorial demonstrates how to use dgl sparse library to sample on graph and train model. It trains and tests a GraphSAGE model using the sparse sample and compact operators to sample submatrix from the whole matrix. Training GNN with DGL sparse is quite similar to :ref:`guide-minibatch-node-classification-sampler`. The major difference is the customized sampler and matrix that represents graph. We have cutomized one sampler in :ref:`guide-minibatch-customizing-neighborhood-sampler`. In this tutorial, we will customize another sampler with DGL sparse library as shown below. .. code:: python @functional_datapipe("sample_sparse_neighbor") class SparseNeighborSampler(SubgraphSampler): def __init__(self, datapipe, matrix, fanouts): super().__init__(datapipe) self.matrix = matrix # Convert fanouts to a list of tensors. self.fanouts = [] for fanout in fanouts: if not isinstance(fanout, torch.Tensor): fanout = torch.LongTensor([int(fanout)]) self.fanouts.insert(0, fanout) def sample_subgraphs(self, seeds): sampled_matrices = [] src = seeds ##################################################################### # (HIGHLIGHT) Using the sparse sample operator to preform random # sampling on the neighboring nodes of the seeds nodes. The sparse # compact operator is then employed to compact and relabel the sampled # matrix, resulting in the sampled matrix and the relabel index. ##################################################################### for fanout in self.fanouts: # Sample neighbors. sampled_matrix = self.matrix.sample(1, fanout, ids=src).coalesce() # Compact the sampled matrix. compacted_mat, row_ids = sampled_matrix.compact(0) sampled_matrices.insert(0, compacted_mat) src = row_ids return src, sampled_matrices Another major difference is the matrix that represents graph. Previously we use :class:`~dgl.graphbolt.FusedCSCSamplingGraph` for sampling. In this tutorial, we use :class:`~dgl.sparse.SparseMatrix` to represent graph. .. code:: python dataset = gb.BuiltinDataset("ogbn-products").load() g = dataset.graph # Create sparse. N = g.num_nodes A = dglsp.from_csc(g.csc_indptr, g.indices, shape=(N, N)) The remaining code is almost same as node classification tutorial. To use this sampler with :class:`~dgl.graphbolt.DataLoader`: .. code:: python datapipe = gb.ItemSampler(ids, batch_size=1024) # Customize graphbolt sampler by sparse. datapipe = datapipe.sample_sparse_neighbor(A, fanouts) # Use grapbolt to fetch features. datapipe = datapipe.fetch_feature(features, node_feature_keys=["feat"]) datapipe = datapipe.copy_to(device) dataloader = gb.DataLoader(datapipe) Model definition is shown below: .. code:: python class SAGEConv(nn.Module): r"""GraphSAGE layer from `Inductive Representation Learning on Large Graphs `__ """ def __init__( self, in_feats, out_feats, ): super(SAGEConv, self).__init__() self._in_src_feats, self._in_dst_feats = in_feats, in_feats self._out_feats = out_feats self.fc_neigh = nn.Linear(self._in_src_feats, out_feats, bias=False) self.fc_self = nn.Linear(self._in_dst_feats, out_feats, bias=True) self.reset_parameters() def reset_parameters(self): gain = nn.init.calculate_gain("relu") nn.init.xavier_uniform_(self.fc_self.weight, gain=gain) nn.init.xavier_uniform_(self.fc_neigh.weight, gain=gain) def forward(self, A, feat): feat_src = feat feat_dst = feat[: A.shape[1]] # Aggregator type: mean. srcdata = self.fc_neigh(feat_src) # Divided by degree. D_hat = dglsp.diag(A.sum(0)) ** -1 A_div = A @ D_hat # Conv neighbors. dstdata = A_div.T @ srcdata rst = self.fc_self(feat_dst) + dstdata return rst class SAGE(nn.Module): def __init__(self, in_size, hid_size, out_size): super().__init__() self.layers = nn.ModuleList() # Three-layer GraphSAGE-gcn. self.layers.append(SAGEConv(in_size, hid_size)) self.layers.append(SAGEConv(hid_size, hid_size)) self.layers.append(SAGEConv(hid_size, out_size)) self.dropout = nn.Dropout(0.5) self.hid_size = hid_size self.out_size = out_size def forward(self, sampled_matrices, x): hidden_x = x for layer_idx, (layer, sampled_matrix) in enumerate( zip(self.layers, sampled_matrices) ): hidden_x = layer(sampled_matrix, hidden_x) if layer_idx != len(self.layers) - 1: hidden_x = F.relu(hidden_x) hidden_x = self.dropout(hidden_x) return hidden_x Launch training: .. code:: python features = dataset.feature # Create GraphSAGE model. in_size = features.size("node", None, "feat")[0] num_classes = dataset.tasks[0].metadata["num_classes"] out_size = num_classes model = SAGE(in_size, 256, out_size).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=5e-4) for epoch in range(10): model.train() total_loss = 0 for it, data in enumerate(dataloader): node_feature = data.node_features["feat"].float() blocks = data.sampled_subgraphs y = data.labels y_hat = model(blocks, node_feature) loss = F.cross_entropy(y_hat, y) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.item() For more details, please refer to the `full example `__. ================================================ FILE: docs/source/guide/minibatch.rst ================================================ .. _guide-minibatch: Chapter 6: Stochastic Training on Large Graphs ======================================================= :ref:`(中文版) ` If we have a massive graph with, say, millions or even billions of nodes or edges, usually full-graph training as described in :ref:`guide-training` would not work. Consider an :math:`L`-layer graph convolutional network with hidden state size :math:`H` running on an :math:`N`-node graph. Storing the intermediate hidden states requires :math:`O(NLH)` memory, easily exceeding one GPU’s capacity with large :math:`N`. This section provides a way to perform stochastic minibatch training, where we do not have to fit the feature of all the nodes into GPU. Overview of Neighborhood Sampling Approaches -------------------------------------------- Neighborhood sampling methods generally work as the following. For each gradient descent step, we select a minibatch of nodes whose final representations at the :math:`L`-th layer are to be computed. We then take all or some of their neighbors at the :math:`L-1` layer. This process continues until we reach the input. This iterative process builds the dependency graph starting from the output and working backwards to the input, as the figure below shows: .. figure:: https://data.dgl.ai/asset/image/guide_6_0_0.png :alt: Imgur With this, one can save the workload and computation resources for training a GNN on a large graph. DGL provides a few neighborhood samplers and a pipeline for training a GNN with neighborhood sampling, as well as ways to customize your sampling strategies. Roadmap ----------- The chapter starts with sections for training GNNs stochastically under different scenarios. * :ref:`guide-minibatch-node-classification-sampler` * :ref:`guide-minibatch-edge-classification-sampler` * :ref:`guide-minibatch-link-classification-sampler` The remaining sections cover more advanced topics, suitable for those who wish to develop new sampling algorithms, new GNN modules compatible with mini-batch training and understand how evaluation and inference can be conducted in mini-batches. * :ref:`guide-minibatch-customizing-neighborhood-sampler` * :ref:`guide-minibatch-sparse` * :ref:`guide-minibatch-custom-gnn-module` * :ref:`guide-minibatch-inference` The following are performance tips for implementing and using neighborhood sampling: * :ref:`guide-minibatch-gpu-sampling` * :ref:`guide-minibatch-parallelism` .. toctree:: :maxdepth: 1 :hidden: :glob: minibatch-node minibatch-edge minibatch-link minibatch-custom-sampler minibatch-sparse minibatch-nn minibatch-inference minibatch-gpu-sampling minibatch-parallelism ================================================ FILE: docs/source/guide/mixed_precision.rst ================================================ .. _guide-mixed_precision: Chapter 8: Mixed Precision Training =================================== DGL is compatible with the `PyTorch Automatic Mixed Precision (AMP) package `_ for mixed precision training, thus saving both training time and GPU/CPU memory consumption. This feature requires DGL 0.9+ and 1.1+ for CPU bloat16. Message-Passing with Half Precision ----------------------------------- DGL allows message-passing on ``float16 (fp16)`` / ``bfloat16 (bf16)`` features for both UDFs (User Defined Functions) and built-in functions (e.g., ``dgl.function.sum``, ``dgl.function.copy_u``). .. note:: Please check bfloat16 support via ``torch.cuda.is_bf16_supported()`` before using it. Typically it requires CUDA >= 11.0 and GPU compute capability >= 8.0. The following example shows how to use DGL's message-passing APIs on half-precision features: >>> import torch >>> import dgl >>> import dgl.function as fn >>> dev = torch.device('cuda') >>> g = dgl.rand_graph(30, 100).to(dev) # Create a graph on GPU w/ 30 nodes and 100 edges. >>> g.ndata['h'] = torch.rand(30, 16).to(dev).half() # Create fp16 node features. >>> g.edata['w'] = torch.rand(100, 1).to(dev).half() # Create fp16 edge features. >>> # Use DGL's built-in functions for message passing on fp16 features. >>> g.update_all(fn.u_mul_e('h', 'w', 'm'), fn.sum('m', 'x')) >>> g.ndata['x'].dtype torch.float16 >>> g.apply_edges(fn.u_dot_v('h', 'x', 'hx')) >>> g.edata['hx'].dtype torch.float16 >>> # Use UDFs for message passing on fp16 features. >>> def message(edges): ... return {'m': edges.src['h'] * edges.data['w']} ... >>> def reduce(nodes): ... return {'y': torch.sum(nodes.mailbox['m'], 1)} ... >>> def dot(edges): ... return {'hy': (edges.src['h'] * edges.dst['y']).sum(-1, keepdims=True)} ... >>> g.update_all(message, reduce) >>> g.ndata['y'].dtype torch.float16 >>> g.apply_edges(dot) >>> g.edata['hy'].dtype torch.float16 End-to-End Mixed Precision Training ----------------------------------- DGL relies on PyTorch's AMP package for mixed precision training, and the user experience is exactly the same as `PyTorch's `_. By wrapping the forward pass with ``torch.amp.autocast()``, PyTorch automatically selects the appropriate datatype for each op and tensor. Half precision tensors are memory efficient, most operators on half precision tensors are faster as they leverage GPU tensorcores and CPU special instructon set. .. code:: import torch.nn.functional as F from torch.amp import autocast def forward(device_type, g, feat, label, mask, model, amp_dtype): amp_enabled = amp_dtype in (torch.float16, torch.bfloat16) with autocast(device_type, enabled=amp_enabled, dtype=amp_dtype): logit = model(g, feat) loss = F.cross_entropy(logit[mask], label[mask]) return loss Small Gradients in ``float16`` format have underflow problems (flush to zero). PyTorch provides a ``GradScaler`` module to address this issue. It multiplies the loss by a factor and invokes backward pass on the scaled loss to prevent the underflow problem. It then unscales the computed gradients before the optimizer updates the parameters. The scale factor is determined automatically. Note that ``bfloat16`` doesn't require a ``GradScaler``. .. code:: from torch.cuda.amp import GradScaler scaler = GradScaler() def backward(scaler, loss, optimizer): scaler.scale(loss).backward() scaler.step(optimizer) scaler.update() The following example trains a 3-layer GAT on the Reddit dataset (w/ 114 million edges). Pay attention to the differences in the code when AMP is activated or not. .. code:: import torch import torch.nn as nn import dgl from dgl.data import RedditDataset from dgl.nn import GATConv from dgl.transforms import AddSelfLoop amp_dtype = torch.bfloat16 # or torch.float16 class GAT(nn.Module): def __init__(self, in_feats, n_hidden, n_classes, heads): super().__init__() self.layers = nn.ModuleList() self.layers.append(GATConv(in_feats, n_hidden, heads[0], activation=F.elu)) self.layers.append(GATConv(n_hidden * heads[0], n_hidden, heads[1], activation=F.elu)) self.layers.append(GATConv(n_hidden * heads[1], n_classes, heads[2], activation=F.elu)) def forward(self, g, h): for l, layer in enumerate(self.layers): h = layer(g, h) if l != len(self.layers) - 1: h = h.flatten(1) else: h = h.mean(1) return h # Data loading transform = AddSelfLoop() data = RedditDataset(transform) device_type = 'cuda' # or 'cpu' dev = torch.device(device_type) g = data[0] g = g.int().to(dev) train_mask = g.ndata['train_mask'] feat = g.ndata['feat'] label = g.ndata['label'] in_feats = feat.shape[1] n_hidden = 256 n_classes = data.num_classes heads = [1, 1, 1] model = GAT(in_feats, n_hidden, n_classes, heads) model = model.to(dev) model.train() # Create optimizer optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=5e-4) for epoch in range(100): optimizer.zero_grad() loss = forward(device_type, g, feat, label, train_mask, model, amp_dtype) if amp_dtype == torch.float16: # Backprop w/ gradient scaling backward(scaler, loss, optimizer) else: loss.backward() optimizer.step() print('Epoch {} | Loss {}'.format(epoch, loss.item())) On a NVIDIA V100 (16GB) machine, training this model without fp16 consumes 15.2GB GPU memory; with fp16 turned on, the training consumes 12.8G GPU memory, the loss converges to similar values in both settings. If we change the number of heads to ``[2, 2, 2]``, training without fp16 triggers GPU OOM(out-of-memory) issue while training with fp16 consumes 15.7G GPU memory. BFloat16 CPU example ----------------------------------- DGL supports running training in the bfloat16 data type on the CPU. This data type doesn't require any CPU feature and can improve the performance of a memory-bound model. Starting with Intel Xeon 4th Generation, which has `AMX `_ instructon set, bfloat16 should significantly improve training and inference performance without huge code changes. Here is an example of simple GCN bfloat16 training: .. code:: import torch import torch.nn as nn import torch.nn.functional as F import dgl from dgl.data import CiteseerGraphDataset from dgl.nn import GraphConv from dgl.transforms import AddSelfLoop class GCN(nn.Module): def __init__(self, in_size, hid_size, out_size): super().__init__() self.layers = nn.ModuleList() # two-layer GCN self.layers.append( GraphConv(in_size, hid_size, activation=F.relu) ) self.layers.append(GraphConv(hid_size, out_size)) self.dropout = nn.Dropout(0.5) def forward(self, g, features): h = features for i, layer in enumerate(self.layers): if i != 0: h = self.dropout(h) h = layer(g, h) return h # Data loading transform = AddSelfLoop() data = CiteseerGraphDataset(transform=transform) g = data[0] g = g.int() train_mask = g.ndata['train_mask'] feat = g.ndata['feat'] label = g.ndata['label'] in_size = feat.shape[1] hid_size = 16 out_size = data.num_classes model = GCN(in_size, hid_size, out_size) # Convert model and graph to bfloat16 g = dgl.to_bfloat16(g) feat = feat.to(dtype=torch.bfloat16) model = model.to(dtype=torch.bfloat16) model.train() # Create optimizer optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4) loss_fcn = nn.CrossEntropyLoss() for epoch in range(100): logits = model(g, feat) loss = loss_fcn(logits[train_mask], label[train_mask]) loss.backward() optimizer.step() print('Epoch {} | Loss {}'.format(epoch, loss.item())) The only difference with common training is model and graph conversion before training/inference. .. code:: g = dgl.to_bfloat16(g) feat = feat.to(dtype=torch.bfloat16) model = model.to(dtype=torch.bfloat16) DGL is still improving its half-precision support and the compute kernel's performance is far from optimal, please stay tuned to our future updates. ================================================ FILE: docs/source/guide/nn-construction.rst ================================================ .. _guide-nn-construction: 3.1 DGL NN Module Construction Function --------------------------------------- :ref:`(中文版) ` The construction function performs the following steps: 1. Set options. 2. Register learnable parameters or submodules. 3. Reset parameters. .. code:: import torch.nn as nn from dgl.utils import expand_as_pair class SAGEConv(nn.Module): def __init__(self, in_feats, out_feats, aggregator_type, bias=True, norm=None, activation=None): super(SAGEConv, self).__init__() self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats self._aggre_type = aggregator_type self.norm = norm self.activation = activation In construction function, one first needs to set the data dimensions. For general PyTorch module, the dimensions are usually input dimension, output dimension and hidden dimensions. For graph neural networks, the input dimension can be split into source node dimension and destination node dimension. Besides data dimensions, a typical option for graph neural network is aggregation type (``self._aggre_type``). Aggregation type determines how messages on different edges are aggregated for a certain destination node. Commonly used aggregation types include ``mean``, ``sum``, ``max``, ``min``. Some modules may apply more complicated aggregation like an ``lstm``. ``norm`` here is a callable function for feature normalization. In the SAGEConv paper, such normalization can be l2 normalization: :math:`h_v = h_v / \lVert h_v \rVert_2`. .. code:: # aggregator type: mean, pool, lstm, gcn if aggregator_type not in ['mean', 'pool', 'lstm', 'gcn']: raise KeyError('Aggregator type {} not supported.'.format(aggregator_type)) if aggregator_type == 'pool': self.fc_pool = nn.Linear(self._in_src_feats, self._in_src_feats) if aggregator_type == 'lstm': self.lstm = nn.LSTM(self._in_src_feats, self._in_src_feats, batch_first=True) if aggregator_type in ['mean', 'pool', 'lstm']: self.fc_self = nn.Linear(self._in_dst_feats, out_feats, bias=bias) self.fc_neigh = nn.Linear(self._in_src_feats, out_feats, bias=bias) self.reset_parameters() Register parameters and submodules. In SAGEConv, submodules vary according to the aggregation type. Those modules are pure PyTorch nn modules like ``nn.Linear``, ``nn.LSTM``, etc. At the end of construction function, weight initialization is applied by calling ``reset_parameters()``. .. code:: def reset_parameters(self): """Reinitialize learnable parameters.""" gain = nn.init.calculate_gain('relu') if self._aggre_type == 'pool': nn.init.xavier_uniform_(self.fc_pool.weight, gain=gain) if self._aggre_type == 'lstm': self.lstm.reset_parameters() if self._aggre_type != 'gcn': nn.init.xavier_uniform_(self.fc_self.weight, gain=gain) nn.init.xavier_uniform_(self.fc_neigh.weight, gain=gain) ================================================ FILE: docs/source/guide/nn-forward.rst ================================================ .. _guide-nn-forward: 3.2 DGL NN Module Forward Function ---------------------------------- :ref:`(中文版) ` In NN module, ``forward()`` function does the actual message passing and computation. Compared with PyTorch’s NN module which usually takes tensors as the parameters, DGL NN module takes an additional parameter :class:`dgl.DGLGraph`. The workload for ``forward()`` function can be split into three parts: - Graph checking and graph type specification. - Message passing. - Feature update. The rest of the section takes a deep dive into the ``forward()`` function in SAGEConv example. Graph checking and graph type specification ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: def forward(self, graph, feat): with graph.local_scope(): # Specify graph type then expand input feature according to graph type feat_src, feat_dst = expand_as_pair(feat, graph) ``forward()`` needs to handle many corner cases on the input that can lead to invalid values in computing and message passing. One typical check in conv modules like :class:`~dgl.nn.pytorch.conv.GraphConv` is to verify that the input graph has no 0-in-degree nodes. When a node has 0 in-degree, the ``mailbox`` will be empty and the reduce function will produce all-zero values. This may cause silent regression in model performance. However, in :class:`~dgl.nn.pytorch.conv.SAGEConv` module, the aggregated representation will be concatenated with the original node feature, the output of ``forward()`` will not be all-zero. No such check is needed in this case. DGL NN module should be reusable across different types of graph input including: homogeneous graph, heterogeneous graph (:ref:`guide-graph-heterogeneous`), subgraph block (:ref:`guide-minibatch`). The math formulas for SAGEConv are: .. math:: h_{\mathcal{N}(dst)}^{(l+1)} = \mathrm{aggregate} \left(\{h_{src}^{l}, \forall src \in \mathcal{N}(dst) \}\right) .. math:: h_{dst}^{(l+1)} = \sigma \left(W \cdot \mathrm{concat} (h_{dst}^{l}, h_{\mathcal{N}(dst)}^{l+1}) + b \right) .. math:: h_{dst}^{(l+1)} = \mathrm{norm}(h_{dst}^{l+1}) One needs to specify the source node feature ``feat_src`` and destination node feature ``feat_dst`` according to the graph type. :meth:`~dgl.utils.expand_as_pair` is a function that specifies the graph type and expand ``feat`` into ``feat_src`` and ``feat_dst``. The detail of this function is shown below. .. code:: def expand_as_pair(input_, g=None): if isinstance(input_, tuple): # Bipartite graph case return input_ elif g is not None and g.is_block: # Subgraph block case if isinstance(input_, Mapping): input_dst = { k: F.narrow_row(v, 0, g.number_of_dst_nodes(k)) for k, v in input_.items()} else: input_dst = F.narrow_row(input_, 0, g.number_of_dst_nodes()) return input_, input_dst else: # Homogeneous graph case return input_, input_ For homogeneous whole graph training, source nodes and destination nodes are the same. They are all the nodes in the graph. For heterogeneous case, the graph can be split into several bipartite graphs, one for each relation. The relations are represented as ``(src_type, edge_type, dst_dtype)``. When it identifies that the input feature ``feat`` is a tuple, it will treat the graph as bipartite. The first element in the tuple will be the source node feature and the second element will be the destination node feature. In mini-batch training, the computing is applied on a subgraph sampled based on a bunch of destination nodes. The subgraph is called as ``block`` in DGL. In the block creation phase, ``dst nodes`` are in the front of the node list. One can find the ``feat_dst`` by the index ``[0:g.number_of_dst_nodes()]``. After determining ``feat_src`` and ``feat_dst``, the computing for the above three graph types are the same. Message passing and reducing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: import dgl.function as fn import torch.nn.functional as F from dgl.utils import check_eq_shape if self._aggre_type == 'mean': graph.srcdata['h'] = feat_src graph.update_all(fn.copy_u('h', 'm'), fn.mean('m', 'neigh')) h_neigh = graph.dstdata['neigh'] elif self._aggre_type == 'gcn': check_eq_shape(feat) graph.srcdata['h'] = feat_src graph.dstdata['h'] = feat_dst graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'neigh')) # divide in_degrees degs = graph.in_degrees().to(feat_dst) h_neigh = (graph.dstdata['neigh'] + graph.dstdata['h']) / (degs.unsqueeze(-1) + 1) elif self._aggre_type == 'pool': graph.srcdata['h'] = F.relu(self.fc_pool(feat_src)) graph.update_all(fn.copy_u('h', 'm'), fn.max('m', 'neigh')) h_neigh = graph.dstdata['neigh'] else: raise KeyError('Aggregator type {} not recognized.'.format(self._aggre_type)) # GraphSAGE GCN does not require fc_self. if self._aggre_type == 'gcn': rst = self.fc_neigh(h_neigh) else: rst = self.fc_self(h_self) + self.fc_neigh(h_neigh) The code actually does message passing and reducing computing. This part of code varies module by module. Note that all the message passing in the above code are implemented using :meth:`~dgl.DGLGraph.update_all` API and ``built-in`` message/reduce functions to fully utilize DGL’s performance optimization as described in :ref:`guide-message-passing-efficient`. Update feature after reducing for output ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: # activation if self.activation is not None: rst = self.activation(rst) # normalization if self.norm is not None: rst = self.norm(rst) return rst The last part of ``forward()`` function is to update the feature after the ``reduce function``. Common update operations are applying activation function and normalization according to the option set in the object construction phase. ================================================ FILE: docs/source/guide/nn-heterograph.rst ================================================ .. _guide-nn-heterograph: 3.3 Heterogeneous GraphConv Module ------------------------------------ :ref:`(中文版) ` :class:`~dgl.nn.pytorch.HeteroGraphConv` is a module-level encapsulation to run DGL NN module on heterogeneous graphs. The implementation logic is the same as message passing level API :meth:`~dgl.DGLGraph.multi_update_all`, including: - DGL NN module within each relation :math:`r`. - Reduction that merges the results on the same node type from multiple relations. This can be formulated as: .. math:: h_{dst}^{(l+1)} = \underset{r\in\mathcal{R}, r_{dst}=dst}{AGG} (f_r(g_r, h_{r_{src}}^l, h_{r_{dst}}^l)) where :math:`f_r` is the NN module for each relation :math:`r`, :math:`AGG` is the aggregation function. HeteroGraphConv implementation logic: ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: import torch.nn as nn class HeteroGraphConv(nn.Module): def __init__(self, mods, aggregate='sum'): super(HeteroGraphConv, self).__init__() self.mods = nn.ModuleDict(mods) if isinstance(aggregate, str): # An internal function to get common aggregation functions self.agg_fn = get_aggregate_fn(aggregate) else: self.agg_fn = aggregate The heterograph convolution takes a dictionary ``mods`` that maps each relation to an nn module and sets the function that aggregates results on the same node type from multiple relations. .. code:: def forward(self, g, inputs, mod_args=None, mod_kwargs=None): if mod_args is None: mod_args = {} if mod_kwargs is None: mod_kwargs = {} outputs = {nty : [] for nty in g.dsttypes} Besides input graph and input tensors, the ``forward()`` function takes two additional dictionary parameters ``mod_args`` and ``mod_kwargs``. These two dictionaries have the same keys as ``self.mods``. They are used as customized parameters when calling their corresponding NN modules in ``self.mods`` for different types of relations. An output dictionary is created to hold output tensor for each destination type ``nty`` . Note that the value for each ``nty`` is a list, indicating a single node type may get multiple outputs if more than one relations have ``nty`` as the destination type. ``HeteroGraphConv`` will perform a further aggregation on the lists. .. code:: if g.is_block: src_inputs = inputs dst_inputs = {k: v[:g.number_of_dst_nodes(k)] for k, v in inputs.items()} else: src_inputs = dst_inputs = inputs for stype, etype, dtype in g.canonical_etypes: rel_graph = g[stype, etype, dtype] if rel_graph.num_edges() == 0: continue if stype not in src_inputs or dtype not in dst_inputs: continue dstdata = self.mods[etype]( rel_graph, (src_inputs[stype], dst_inputs[dtype]), *mod_args.get(etype, ()), **mod_kwargs.get(etype, {})) outputs[dtype].append(dstdata) The input ``g`` can be a heterogeneous graph or a subgraph block from a heterogeneous graph. As in ordinary NN module, the ``forward()`` function need to handle different input graph types separately. Each relation is represented as a ``canonical_etype``, which is ``(stype, etype, dtype)``. Using ``canonical_etype`` as the key, one can extract out a bipartite graph ``rel_graph``. For bipartite graph, the input feature will be organized as a tuple ``(src_inputs[stype], dst_inputs[dtype])``. The NN module for each relation is called and the output is saved. To avoid unnecessary call, relations with no edges or no nodes with the src type will be skipped. .. code:: rsts = {} for nty, alist in outputs.items(): if len(alist) != 0: rsts[nty] = self.agg_fn(alist, nty) Finally, the results on the same destination node type from multiple relations are aggregated using ``self.agg_fn`` function. Examples can be found in the API Doc for :class:`~dgl.nn.pytorch.HeteroGraphConv`. ================================================ FILE: docs/source/guide/nn.rst ================================================ .. _guide-nn: Chapter 3: Building GNN Modules =============================== :ref:`(中文版) ` DGL NN module consists of building blocks for GNN models. An NN module inherits from `Pytorch’s NN Module `__, `MXNet Gluon’s NN Block `__ and `TensorFlow’s Keras Layer `__, depending on the DNN framework backend in use. In a DGL NN module, the parameter registration in construction function and tensor operation in forward function are the same with the backend framework. In this way, DGL code can be seamlessly integrated into the backend framework code. The major difference lies in the message passing operations that are unique in DGL. DGL has integrated many commonly used :ref:`apinn-pytorch-conv`, :ref:`apinn-pytorch-dense-conv`, :ref:`apinn-pytorch-pooling`, and :ref:`apinn-pytorch-util`. We welcome your contribution! This chapter takes :class:`~dgl.nn.pytorch.conv.SAGEConv` with Pytorch backend as an example to introduce how to build a custom DGL NN Module. Roadmap ------- * :ref:`guide-nn-construction` * :ref:`guide-nn-forward` * :ref:`guide-nn-heterograph` .. toctree:: :maxdepth: 1 :hidden: :glob: nn-construction nn-forward nn-heterograph ================================================ FILE: docs/source/guide/training-edge.rst ================================================ .. _guide-training-edge-classification: 5.2 Edge Classification/Regression --------------------------------------------- :ref:`(中文版) ` Sometimes you wish to predict the attributes on the edges of the graph. In that case, you would like to have an *edge classification/regression* model. Here we generate a random graph for edge prediction as a demonstration. .. code:: python src = np.random.randint(0, 100, 500) dst = np.random.randint(0, 100, 500) # make it symmetric edge_pred_graph = dgl.graph((np.concatenate([src, dst]), np.concatenate([dst, src]))) # synthetic node and edge features, as well as edge labels edge_pred_graph.ndata['feature'] = torch.randn(100, 10) edge_pred_graph.edata['feature'] = torch.randn(1000, 10) edge_pred_graph.edata['label'] = torch.randn(1000) # synthetic train-validation-test splits edge_pred_graph.edata['train_mask'] = torch.zeros(1000, dtype=torch.bool).bernoulli(0.6) Overview ~~~~~~~~ From the previous section you have learned how to do node classification with a multilayer GNN. The same technique can be applied for computing a hidden representation of any node. The prediction on edges can then be derived from the representation of their incident nodes. The most common case of computing the prediction on an edge is to express it as a parameterized function of the representation of its incident nodes, and optionally the features on the edge itself. Model Implementation Difference from Node Classification ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Assuming that you compute the node representation with the model from the previous section, you only need to write another component that computes the edge prediction with the :meth:`~dgl.DGLGraph.apply_edges` method. For instance, if you would like to compute a score for each edge for edge regression, the following code computes the dot product of incident node representations on each edge. .. code:: python import dgl.function as fn class DotProductPredictor(nn.Module): def forward(self, graph, h): # h contains the node representations computed from the GNN defined # in the node classification section (Section 5.1). with graph.local_scope(): graph.ndata['h'] = h graph.apply_edges(fn.u_dot_v('h', 'h', 'score')) return graph.edata['score'] One can also write a prediction function that predicts a vector for each edge with an MLP. Such vector can be used in further downstream tasks, e.g. as logits of a categorical distribution. .. code:: python class MLPPredictor(nn.Module): def __init__(self, in_features, out_classes): super().__init__() self.W = nn.Linear(in_features * 2, out_classes) def apply_edges(self, edges): h_u = edges.src['h'] h_v = edges.dst['h'] score = self.W(torch.cat([h_u, h_v], 1)) return {'score': score} def forward(self, graph, h): # h contains the node representations computed from the GNN defined # in the node classification section (Section 5.1). with graph.local_scope(): graph.ndata['h'] = h graph.apply_edges(self.apply_edges) return graph.edata['score'] Training loop ~~~~~~~~~~~~~ Given the node representation computation model and an edge predictor model, we can easily write a full-graph training loop where we compute the prediction on all edges. The following example takes ``SAGE`` in the previous section as the node representation computation model and ``DotPredictor`` as an edge predictor model. .. code:: python class Model(nn.Module): def __init__(self, in_features, hidden_features, out_features): super().__init__() self.sage = SAGE(in_features, hidden_features, out_features) self.pred = DotProductPredictor() def forward(self, g, x): h = self.sage(g, x) return self.pred(g, h) In this example, we also assume that the training/validation/test edge sets are identified by boolean masks on edges. This example also does not include early stopping and model saving. .. code:: python node_features = edge_pred_graph.ndata['feature'] edge_label = edge_pred_graph.edata['label'] train_mask = edge_pred_graph.edata['train_mask'] model = Model(10, 20, 5) opt = torch.optim.Adam(model.parameters()) for epoch in range(10): pred = model(edge_pred_graph, node_features) loss = ((pred[train_mask] - edge_label[train_mask]) ** 2).mean() opt.zero_grad() loss.backward() opt.step() print(loss.item()) .. _guide-training-edge-classification-heterogeneous-graph: Heterogeneous graph ~~~~~~~~~~~~~~~~~~~ Edge classification on heterogeneous graphs is not very different from that on homogeneous graphs. If you wish to perform edge classification on one edge type, you only need to compute the node representation for all node types, and predict on that edge type with :meth:`~dgl.DGLGraph.apply_edges` method. For example, to make ``DotProductPredictor`` work on one edge type of a heterogeneous graph, you only need to specify the edge type in ``apply_edges`` method. .. code:: python class HeteroDotProductPredictor(nn.Module): def forward(self, graph, h, etype): # h contains the node representations for each edge type computed from # the GNN for heterogeneous graphs defined in the node classification # section (Section 5.1). with graph.local_scope(): graph.ndata['h'] = h # assigns 'h' of all node types in one shot graph.apply_edges(fn.u_dot_v('h', 'h', 'score'), etype=etype) return graph.edges[etype].data['score'] You can similarly write a ``HeteroMLPPredictor``. .. code:: python class HeteroMLPPredictor(nn.Module): def __init__(self, in_features, out_classes): super().__init__() self.W = nn.Linear(in_features * 2, out_classes) def apply_edges(self, edges): h_u = edges.src['h'] h_v = edges.dst['h'] score = self.W(torch.cat([h_u, h_v], 1)) return {'score': score} def forward(self, graph, h, etype): # h contains the node representations for each edge type computed from # the GNN for heterogeneous graphs defined in the node classification # section (Section 5.1). with graph.local_scope(): graph.ndata['h'] = h # assigns 'h' of all node types in one shot graph.apply_edges(self.apply_edges, etype=etype) return graph.edges[etype].data['score'] The end-to-end model that predicts a score for each edge on a single edge type will look like this: .. code:: python class Model(nn.Module): def __init__(self, in_features, hidden_features, out_features, rel_names): super().__init__() self.sage = RGCN(in_features, hidden_features, out_features, rel_names) self.pred = HeteroDotProductPredictor() def forward(self, g, x, etype): h = self.sage(g, x) return self.pred(g, h, etype) Using the model simply involves feeding the model a dictionary of node types and features. .. code:: python model = Model(10, 20, 5, hetero_graph.etypes) user_feats = hetero_graph.nodes['user'].data['feature'] item_feats = hetero_graph.nodes['item'].data['feature'] label = hetero_graph.edges['click'].data['label'] train_mask = hetero_graph.edges['click'].data['train_mask'] node_features = {'user': user_feats, 'item': item_feats} Then the training loop looks almost the same as that in homogeneous graph. For instance, if you wish to predict the edge labels on edge type ``click``, then you can simply do .. code:: python opt = torch.optim.Adam(model.parameters()) for epoch in range(10): pred = model(hetero_graph, node_features, 'click') loss = ((pred[train_mask] - label[train_mask]) ** 2).mean() opt.zero_grad() loss.backward() opt.step() print(loss.item()) Predicting Edge Type of an Existing Edge on a Heterogeneous Graph ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Sometimes you may want to predict which type an existing edge belongs to. For instance, given the :ref:`heterogeneous graph example `, your task is given an edge connecting a user and an item, to predict whether the user would ``click`` or ``dislike`` an item. This is a simplified version of rating prediction, which is common in recommendation literature. You can use a heterogeneous graph convolution network to obtain the node representations. For instance, you can still use the :ref:`RGCN defined previously ` for this purpose. To predict the type of an edge, you can simply repurpose the ``HeteroDotProductPredictor`` above so that it takes in another graph with only one edge type that “merges” all the edge types to be predicted, and emits the score of each type for every edge. In the example here, you will need a graph that has two node types ``user`` and ``item``, and one single edge type that “merges” all the edge types from ``user`` and ``item``, i.e. ``click`` and ``dislike``. This can be conveniently created using the following syntax: .. code:: python dec_graph = hetero_graph['user', :, 'item'] which returns a heterogeneous graphs with node type ``user`` and ``item``, as well as a single edge type combining all edge types in between, i.e. ``click`` and ``dislike``. Since the statement above also returns the original edge types as a feature named ``dgl.ETYPE``, we can use that as labels. .. code:: python edge_label = dec_graph.edata[dgl.ETYPE] Given the graph above as input to the edge type predictor module, you can write your predictor module as follows. .. code:: python class HeteroMLPPredictor(nn.Module): def __init__(self, in_dims, n_classes): super().__init__() self.W = nn.Linear(in_dims * 2, n_classes) def apply_edges(self, edges): x = torch.cat([edges.src['h'], edges.dst['h']], 1) y = self.W(x) return {'score': y} def forward(self, graph, h): # h contains the node representations for each edge type computed from # the GNN for heterogeneous graphs defined in the node classification # section (Section 5.1). with graph.local_scope(): graph.ndata['h'] = h # assigns 'h' of all node types in one shot graph.apply_edges(self.apply_edges) return graph.edata['score'] The model that combines the node representation module and the edge type predictor module is the following: .. code:: python class Model(nn.Module): def __init__(self, in_features, hidden_features, out_features, rel_names): super().__init__() self.sage = RGCN(in_features, hidden_features, out_features, rel_names) self.pred = HeteroMLPPredictor(out_features, len(rel_names)) def forward(self, g, x, dec_graph): h = self.sage(g, x) return self.pred(dec_graph, h) The training loop then simply be the following: .. code:: python model = Model(10, 20, 5, hetero_graph.etypes) user_feats = hetero_graph.nodes['user'].data['feature'] item_feats = hetero_graph.nodes['item'].data['feature'] node_features = {'user': user_feats, 'item': item_feats} opt = torch.optim.Adam(model.parameters()) for epoch in range(10): logits = model(hetero_graph, node_features, dec_graph) loss = F.cross_entropy(logits, edge_label) opt.zero_grad() loss.backward() opt.step() print(loss.item()) DGL provides `Graph Convolutional Matrix Completion `__ as an example of rating prediction, which is formulated by predicting the type of an existing edge on a heterogeneous graph. The node representation module in the `model implementation file `__ is called ``GCMCLayer``. The edge type predictor module is called ``BiDecoder``. Both of them are more complicated than the setting described here. ================================================ FILE: docs/source/guide/training-eweight.rst ================================================ .. _guide-training-eweight: 5.5 Use of Edge Weights ---------------------------------- :ref:`(中文版) ` In a weighted graph, each edge is associated with a semantically meaningful scalar weight. For example, the edge weights can be connectivity strengths or confidence scores. Naturally, one may want to utilize edge weights in model development. Message Passing with Edge Weights ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Most graph neural networks (GNNs) integrate the graph topology information in forward computation by and only by the message passing mechanism. A message passing operation can be viewed as a function that takes an adjacency matrix and additional input features as input arguments. For an unweighted graph, the entries in the adjacency matrix can be zero or one, where a one-valued entry indicates an edge. If this graph is weighted, the non-zero entries can take arbitrary scalar values. This is equivalent to multiplying each message by its corresponding edge weight as in `GAT `__. With DGL, one can achieve this by: - Saving the edge weights as an edge feature - Multplying the original message by the edge feature in the message function Consider the message passing example with DGL below. .. code:: import dgl.function as fn # Suppose graph.ndata['ft'] stores the input node features graph.update_all(fn.copy_u('ft', 'm'), fn.sum('m', 'ft')) One can modify it for edge weight support as follows. .. code:: import dgl.function as fn # Save edge weights as an edge feature, which is a tensor of shape (E, *) # E is the number of edges graph.edata['w'] = eweight # Suppose graph.ndata['ft'] stores the input node features graph.update_all(fn.u_mul_e('ft', 'w', 'm'), fn.sum('m', 'ft')) Using NN Modules with Edge Weights ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ One can modify an NN module for edge weight support by modifying all message passing operations in it. The following code snippet is an example for NN module supporting edge weights. .. code:: import dgl.function as fn import torch.nn as nn class GNN(nn.Module): def __init__(self, in_feats, out_feats): super().__init__() self.linear = nn.Linear(in_feats, out_feats) def forward(self, g, feat, edge_weight=None): with g.local_scope(): g.ndata['ft'] = self.linear(feat) if edge_weight is None: msg_func = fn.copy_u('ft', 'm') else: g.edata['w'] = edge_weight msg_func = fn.u_mul_e('ft', 'w', 'm') g.update_all(msg_func, fn.sum('m', 'ft')) return g.ndata['ft'] DGL's built-in NN modules support edge weights if they take an optional :attr:`edge_weight` argument in the forward function. One may need to normalize raw edge weights. In this regard, DGL provides :func:`~dgl.nn.pytorch.conv.EdgeWeightNorm`. ================================================ FILE: docs/source/guide/training-graph.rst ================================================ .. _guide-training-graph-classification: 5.4 Graph Classification ---------------------------------- :ref:`(中文版) ` Instead of a big single graph, sometimes one might have the data in the form of multiple graphs, for example a list of different types of communities of people. By characterizing the friendship among people in the same community by a graph, one can get a list of graphs to classify. In this scenario, a graph classification model could help identify the type of the community, i.e. to classify each graph based on the structure and overall information. Overview ~~~~~~~~ The major difference between graph classification and node classification or link prediction is that the prediction result characterizes the property of the entire input graph. One can perform the message passing over nodes/edges just like the previous tasks, but also needs to retrieve a graph-level representation. The graph classification pipeline proceeds as follows: .. figure:: https://data.dgl.ai/tutorial/batch/graph_classifier.png :alt: Graph Classification Process Graph Classification Process From left to right, the common practice is: - Prepare a batch of graphs - Perform message passing on the batched graphs to update node/edge features - Aggregate node/edge features into graph-level representations - Classify graphs based on graph-level representations Batch of Graphs ^^^^^^^^^^^^^^^ Usually a graph classification task trains on a lot of graphs, and it will be very inefficient to use only one graph at a time when training the model. Borrowing the idea of mini-batch training from common deep learning practice, one can build a batch of multiple graphs and send them together for one training iteration. In DGL, one can build a single batched graph from a list of graphs. This batched graph can be simply used as a single large graph, with connected components corresponding to the original small graphs. .. figure:: https://data.dgl.ai/tutorial/batch/batch.png :alt: Batched Graph Batched Graph The following example calls :func:`dgl.batch` on a list of graphs. A batched graph is a single graph, while it also carries information about the list. .. code:: python import dgl import torch as th g1 = dgl.graph((th.tensor([0, 1, 2]), th.tensor([1, 2, 3]))) g2 = dgl.graph((th.tensor([0, 0, 0, 1]), th.tensor([0, 1, 2, 0]))) bg = dgl.batch([g1, g2]) bg # Graph(num_nodes=7, num_edges=7, # ndata_schemes={} # edata_schemes={}) bg.batch_size # 2 bg.batch_num_nodes() # tensor([4, 3]) bg.batch_num_edges() # tensor([3, 4]) bg.edges() # (tensor([0, 1, 2, 4, 4, 4, 5], tensor([1, 2, 3, 4, 5, 6, 4])) Please note that most dgl transformation functions will discard the batch information. In order to maintain such information, please use :func:`dgl.DGLGraph.set_batch_num_nodes` and :func:`dgl.DGLGraph.set_batch_num_edges` on the transformed graph. Graph Readout ^^^^^^^^^^^^^ Every graph in the data may have its unique structure, as well as its node and edge features. In order to make a single prediction, one usually aggregates and summarizes over the possibly abundant information. This type of operation is named *readout*. Common readout operations include summation, average, maximum or minimum over all node or edge features. Given a graph :math:`g`, one can define the average node feature readout as .. math:: h_g = \frac{1}{|\mathcal{V}|}\sum_{v\in \mathcal{V}}h_v where :math:`h_g` is the representation of :math:`g`, :math:`\mathcal{V}` is the set of nodes in :math:`g`, :math:`h_v` is the feature of node :math:`v`. DGL provides built-in support for common readout operations. For example, :func:`dgl.mean_nodes` implements the above readout operation. Once :math:`h_g` is available, one can pass it through an MLP layer for classification output. Writing Neural Network Model ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The input to the model is the batched graph with node and edge features. Computation on a Batched Graph ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ First, different graphs in a batch are entirely separated, i.e. no edges between any two graphs. With this nice property, all message passing functions still have the same results. Second, the readout function on a batched graph will be conducted over each graph separately. Assuming the batch size is :math:`B` and the feature to be aggregated has dimension :math:`D`, the shape of the readout result will be :math:`(B, D)`. .. code:: python import dgl import torch g1 = dgl.graph(([0, 1], [1, 0])) g1.ndata['h'] = torch.tensor([1., 2.]) g2 = dgl.graph(([0, 1], [1, 2])) g2.ndata['h'] = torch.tensor([1., 2., 3.]) dgl.readout_nodes(g1, 'h') # tensor([3.]) # 1 + 2 bg = dgl.batch([g1, g2]) dgl.readout_nodes(bg, 'h') # tensor([3., 6.]) # [1 + 2, 1 + 2 + 3] Finally, each node/edge feature in a batched graph is obtained by concatenating the corresponding features from all graphs in order. .. code:: python bg.ndata['h'] # tensor([1., 2., 1., 2., 3.]) Model Definition ^^^^^^^^^^^^^^^^ Being aware of the above computation rules, one can define a model as follows. .. code:: python import dgl.nn.pytorch as dglnn import torch.nn as nn class Classifier(nn.Module): def __init__(self, in_dim, hidden_dim, n_classes): super(Classifier, self).__init__() self.conv1 = dglnn.GraphConv(in_dim, hidden_dim) self.conv2 = dglnn.GraphConv(hidden_dim, hidden_dim) self.classify = nn.Linear(hidden_dim, n_classes) def forward(self, g, h): # Apply graph convolution and activation. h = F.relu(self.conv1(g, h)) h = F.relu(self.conv2(g, h)) with g.local_scope(): g.ndata['h'] = h # Calculate graph representation by average readout. hg = dgl.mean_nodes(g, 'h') return self.classify(hg) Training Loop ~~~~~~~~~~~~~ Data Loading ^^^^^^^^^^^^ Once the model is defined, one can start training. Since graph classification deals with lots of relatively small graphs instead of a big single one, one can train efficiently on stochastic mini-batches of graphs, without the need to design sophisticated graph sampling algorithms. Assuming that one have a graph classification dataset as introduced in :ref:`guide-data-pipeline`. .. code:: python import dgl.data dataset = dgl.data.GINDataset('MUTAG', False) Each item in the graph classification dataset is a pair of a graph and its label. One can speed up the data loading process by taking advantage of the GraphDataLoader to iterate over the dataset of graphs in mini-batches. .. code:: python from dgl.dataloading import GraphDataLoader dataloader = GraphDataLoader( dataset, batch_size=1024, drop_last=False, shuffle=True) Training loop then simply involves iterating over the dataloader and updating the model. .. code:: python import torch.nn.functional as F # Only an example, 7 is the input feature size model = Classifier(7, 20, 5) opt = torch.optim.Adam(model.parameters()) for epoch in range(20): for batched_graph, labels in dataloader: feats = batched_graph.ndata['attr'] logits = model(batched_graph, feats) loss = F.cross_entropy(logits, labels) opt.zero_grad() loss.backward() opt.step() For an end-to-end example of graph classification, see `DGL's GIN example `__. The training loop is inside the function ``train`` in `main.py `__. The model implementation is inside `gin.py `__ with more components such as using :class:`dgl.nn.pytorch.GINConv` (also available in MXNet and Tensorflow) as the graph convolution layer, batch normalization, etc. Heterogeneous graph ~~~~~~~~~~~~~~~~~~~ Graph classification with heterogeneous graphs is a little different from that with homogeneous graphs. In addition to graph convolution modules compatible with heterogeneous graphs, one also needs to aggregate over the nodes of different types in the readout function. The following shows an example of summing up the average of node representations for each node type. .. code:: python class RGCN(nn.Module): def __init__(self, in_feats, hid_feats, out_feats, rel_names): super().__init__() self.conv1 = dglnn.HeteroGraphConv({ rel: dglnn.GraphConv(in_feats, hid_feats) for rel in rel_names}, aggregate='sum') self.conv2 = dglnn.HeteroGraphConv({ rel: dglnn.GraphConv(hid_feats, out_feats) for rel in rel_names}, aggregate='sum') def forward(self, graph, inputs): # inputs is features of nodes h = self.conv1(graph, inputs) h = {k: F.relu(v) for k, v in h.items()} h = self.conv2(graph, h) return h class HeteroClassifier(nn.Module): def __init__(self, in_dim, hidden_dim, n_classes, rel_names): super().__init__() self.rgcn = RGCN(in_dim, hidden_dim, hidden_dim, rel_names) self.classify = nn.Linear(hidden_dim, n_classes) def forward(self, g): h = g.ndata['feat'] h = self.rgcn(g, h) with g.local_scope(): g.ndata['h'] = h # Calculate graph representation by average readout. hg = 0 for ntype in g.ntypes: hg = hg + dgl.mean_nodes(g, 'h', ntype=ntype) return self.classify(hg) The rest of the code is not different from that for homogeneous graphs. .. code:: python # etypes is the list of edge types as strings. model = HeteroClassifier(10, 20, 5, etypes) opt = torch.optim.Adam(model.parameters()) for epoch in range(20): for batched_graph, labels in dataloader: logits = model(batched_graph) loss = F.cross_entropy(logits, labels) opt.zero_grad() loss.backward() opt.step() ================================================ FILE: docs/source/guide/training-link.rst ================================================ .. _guide-training-link-prediction: 5.3 Link Prediction --------------------------- :ref:`(中文版) ` In some other settings you may want to predict whether an edge exists between two given nodes or not. Such task is called a *link prediction* task. Overview ~~~~~~~~ A GNN-based link prediction model represents the likelihood of connectivity between two nodes :math:`u` and :math:`v` as a function of :math:`\boldsymbol{h}_u^{(L)}` and :math:`\boldsymbol{h}_v^{(L)}`, their node representation computed from the multi-layer GNN. .. math:: y_{u,v} = \phi(\boldsymbol{h}_u^{(L)}, \boldsymbol{h}_v^{(L)}) In this section we refer to :math:`y_{u,v}` the *score* between node :math:`u` and node :math:`v`. Training a link prediction model involves comparing the scores between nodes connected by an edge against the scores between an arbitrary pair of nodes. For example, given an edge connecting :math:`u` and :math:`v`, we encourage the score between node :math:`u` and :math:`v` to be higher than the score between node :math:`u` and a sampled node :math:`v'` from an arbitrary *noise* distribution :math:`v' \sim P_n(v)`. Such methodology is called *negative sampling*. There are lots of loss functions that can achieve the behavior above if minimized. A non-exhaustive list include: - Cross-entropy loss: :math:`\mathcal{L} = - \log \sigma (y_{u,v}) - \sum_{v_i \sim P_n(v), i=1,\dots,k}\log \left[ 1 - \sigma (y_{u,v_i})\right]` - BPR loss: :math:`\mathcal{L} = \sum_{v_i \sim P_n(v), i=1,\dots,k} - \log \sigma (y_{u,v} - y_{u,v_i})` - Margin loss: :math:`\mathcal{L} = \sum_{v_i \sim P_n(v), i=1,\dots,k} \max(0, M - y_{u, v} + y_{u, v_i})`, where :math:`M` is a constant hyperparameter. You may find this idea familiar if you know what `implicit feedback `__ or `noise-contrastive estimation `__ is. The neural network model to compute the score between :math:`u` and :math:`v` is identical to the edge regression model described :ref:`above `. Here is an example of using dot product to compute the scores on edges. .. code:: python class DotProductPredictor(nn.Module): def forward(self, graph, h): # h contains the node representations computed from the GNN defined # in the node classification section (Section 5.1). with graph.local_scope(): graph.ndata['h'] = h graph.apply_edges(fn.u_dot_v('h', 'h', 'score')) return graph.edata['score'] Training loop ~~~~~~~~~~~~~ Because our score prediction model operates on graphs, we need to express the negative examples as another graph. The graph will contain all negative node pairs as edges. The following shows an example of expressing negative examples as a graph. Each edge :math:`(u,v)` gets :math:`k` negative examples :math:`(u,v_i)` where :math:`v_i` is sampled from a uniform distribution. .. code:: python def construct_negative_graph(graph, k): src, dst = graph.edges() neg_src = src.repeat_interleave(k) neg_dst = torch.randint(0, graph.num_nodes(), (len(src) * k,)) return dgl.graph((neg_src, neg_dst), num_nodes=graph.num_nodes()) The model that predicts edge scores is the same as that of edge classification/regression. .. code:: python class Model(nn.Module): def __init__(self, in_features, hidden_features, out_features): super().__init__() self.sage = SAGE(in_features, hidden_features, out_features) self.pred = DotProductPredictor() def forward(self, g, neg_g, x): h = self.sage(g, x) return self.pred(g, h), self.pred(neg_g, h) The training loop then repeatedly constructs the negative graph and computes loss. .. code:: python def compute_loss(pos_score, neg_score): # Margin loss n_edges = pos_score.shape[0] return (1 - pos_score + neg_score.view(n_edges, -1)).clamp(min=0).mean() node_features = graph.ndata['feat'] n_features = node_features.shape[1] k = 5 model = Model(n_features, 100, 100) opt = torch.optim.Adam(model.parameters()) for epoch in range(10): negative_graph = construct_negative_graph(graph, k) pos_score, neg_score = model(graph, negative_graph, node_features) loss = compute_loss(pos_score, neg_score) opt.zero_grad() loss.backward() opt.step() print(loss.item()) After training, the node representation can be obtained via .. code:: python node_embeddings = model.sage(graph, node_features) There are multiple ways of using the node embeddings. Examples include training downstream classifiers, or doing nearest neighbor search or maximum inner product search for relevant entity recommendation. Heterogeneous graphs ~~~~~~~~~~~~~~~~~~~~ Link prediction on heterogeneous graphs is not very different from that on homogeneous graphs. The following assumes that we are predicting on one edge type, and it is easy to extend it to multiple edge types. For example, you can reuse the ``HeteroDotProductPredictor`` :ref:`above ` for computing the scores of the edges of an edge type for link prediction. .. code:: python class HeteroDotProductPredictor(nn.Module): def forward(self, graph, h, etype): # h contains the node representations for each node type computed from # the GNN defined in the previous section (Section 5.1). with graph.local_scope(): graph.ndata['h'] = h graph.apply_edges(fn.u_dot_v('h', 'h', 'score'), etype=etype) return graph.edges[etype].data['score'] To perform negative sampling, one can construct a negative graph for the edge type you are performing link prediction on as well. .. code:: python def construct_negative_graph(graph, k, etype): utype, _, vtype = etype src, dst = graph.edges(etype=etype) neg_src = src.repeat_interleave(k) neg_dst = torch.randint(0, graph.num_nodes(vtype), (len(src) * k,)) return dgl.heterograph( {etype: (neg_src, neg_dst)}, num_nodes_dict={ntype: graph.num_nodes(ntype) for ntype in graph.ntypes}) The model is a bit different from that in edge classification on heterogeneous graphs since you need to specify edge type where you perform link prediction. .. code:: python class Model(nn.Module): def __init__(self, in_features, hidden_features, out_features, rel_names): super().__init__() self.sage = RGCN(in_features, hidden_features, out_features, rel_names) self.pred = HeteroDotProductPredictor() def forward(self, g, neg_g, x, etype): h = self.sage(g, x) return self.pred(g, h, etype), self.pred(neg_g, h, etype) The training loop is similar to that of homogeneous graphs. .. code:: python def compute_loss(pos_score, neg_score): # Margin loss n_edges = pos_score.shape[0] return (1 - pos_score + neg_score.view(n_edges, -1)).clamp(min=0).mean() k = 5 model = Model(10, 20, 5, hetero_graph.etypes) user_feats = hetero_graph.nodes['user'].data['feature'] item_feats = hetero_graph.nodes['item'].data['feature'] node_features = {'user': user_feats, 'item': item_feats} opt = torch.optim.Adam(model.parameters()) for epoch in range(10): negative_graph = construct_negative_graph(hetero_graph, k, ('user', 'click', 'item')) pos_score, neg_score = model(hetero_graph, negative_graph, node_features, ('user', 'click', 'item')) loss = compute_loss(pos_score, neg_score) opt.zero_grad() loss.backward() opt.step() print(loss.item()) ================================================ FILE: docs/source/guide/training-node.rst ================================================ .. _guide-training-node-classification: 5.1 Node Classification/Regression -------------------------------------------------- :ref:`(中文版) ` One of the most popular and widely adopted tasks for graph neural networks is node classification, where each node in the training/validation/test set is assigned a ground truth category from a set of predefined categories. Node regression is similar, where each node in the training/validation/test set is assigned a ground truth number. Overview ~~~~~~~~ To classify nodes, graph neural network performs message passing discussed in :ref:`guide-message-passing` to utilize the node’s own features, but also its neighboring node and edge features. Message passing can be repeated multiple rounds to incorporate information from larger range of neighborhood. Writing neural network model ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DGL provides a few built-in graph convolution modules that can perform one round of message passing. In this guide, we choose :class:`dgl.nn.pytorch.SAGEConv` (also available in MXNet and Tensorflow), the graph convolution module for GraphSAGE. Usually for deep learning models on graphs we need a multi-layer graph neural network, where we do multiple rounds of message passing. This can be achieved by stacking graph convolution modules as follows. .. code:: python # Contruct a two-layer GNN model import dgl.nn as dglnn import torch.nn as nn import torch.nn.functional as F class SAGE(nn.Module): def __init__(self, in_feats, hid_feats, out_feats): super().__init__() self.conv1 = dglnn.SAGEConv( in_feats=in_feats, out_feats=hid_feats, aggregator_type='mean') self.conv2 = dglnn.SAGEConv( in_feats=hid_feats, out_feats=out_feats, aggregator_type='mean') def forward(self, graph, inputs): # inputs are features of nodes h = self.conv1(graph, inputs) h = F.relu(h) h = self.conv2(graph, h) return h Note that you can use the model above for not only node classification, but also obtaining hidden node representations for other downstream tasks such as :ref:`guide-training-edge-classification`, :ref:`guide-training-link-prediction`, or :ref:`guide-training-graph-classification`. For a complete list of built-in graph convolution modules, please refer to :ref:`apinn`. For more details in how DGL neural network modules work and how to write a custom neural network module with message passing please refer to the example in :ref:`guide-nn`. Training loop ~~~~~~~~~~~~~ Training on the full graph simply involves a forward propagation of the model defined above, and computing the loss by comparing the prediction against ground truth labels on the training nodes. This section uses a DGL built-in dataset :class:`dgl.data.CiteseerGraphDataset` to show a training loop. The node features and labels are stored on its graph instance, and the training-validation-test split are also stored on the graph as boolean masks. This is similar to what you have seen in :ref:`guide-data-pipeline`. .. code:: python node_features = graph.ndata['feat'] node_labels = graph.ndata['label'] train_mask = graph.ndata['train_mask'] valid_mask = graph.ndata['val_mask'] test_mask = graph.ndata['test_mask'] n_features = node_features.shape[1] n_labels = int(node_labels.max().item() + 1) The following is an example of evaluating your model by accuracy. .. code:: python def evaluate(model, graph, features, labels, mask): model.eval() with torch.no_grad(): logits = model(graph, features) logits = logits[mask] labels = labels[mask] _, indices = torch.max(logits, dim=1) correct = torch.sum(indices == labels) return correct.item() * 1.0 / len(labels) You can then write our training loop as follows. .. code:: python model = SAGE(in_feats=n_features, hid_feats=100, out_feats=n_labels) opt = torch.optim.Adam(model.parameters()) for epoch in range(10): model.train() # forward propagation by using all nodes logits = model(graph, node_features) # compute loss loss = F.cross_entropy(logits[train_mask], node_labels[train_mask]) # compute validation accuracy acc = evaluate(model, graph, node_features, node_labels, valid_mask) # backward propagation opt.zero_grad() loss.backward() opt.step() print(loss.item()) # Save model if necessary. Omitted in this example. `GraphSAGE `__ provides an end-to-end homogeneous graph node classification example. You could see the corresponding model implementation is in the ``GraphSAGE`` class in the example with adjustable number of layers, dropout probabilities, and customizable aggregation functions and nonlinearities. .. _guide-training-rgcn-node-classification: Heterogeneous graph ~~~~~~~~~~~~~~~~~~~ If your graph is heterogeneous, you may want to gather message from neighbors along all edge types. You can use the module :class:`dgl.nn.pytorch.HeteroGraphConv` (also available in MXNet and Tensorflow) to perform message passing on all edge types, then combining different graph convolution modules for each edge type. The following code will define a heterogeneous graph convolution module that first performs a separate graph convolution on each edge type, then sums the message aggregations on each edge type as the final result for all node types. .. code:: python # Define a Heterograph Conv model class RGCN(nn.Module): def __init__(self, in_feats, hid_feats, out_feats, rel_names): super().__init__() self.conv1 = dglnn.HeteroGraphConv({ rel: dglnn.GraphConv(in_feats, hid_feats) for rel in rel_names}, aggregate='sum') self.conv2 = dglnn.HeteroGraphConv({ rel: dglnn.GraphConv(hid_feats, out_feats) for rel in rel_names}, aggregate='sum') def forward(self, graph, inputs): # inputs are features of nodes h = self.conv1(graph, inputs) h = {k: F.relu(v) for k, v in h.items()} h = self.conv2(graph, h) return h ``dgl.nn.HeteroGraphConv`` takes in a dictionary of node types and node feature tensors as input, and returns another dictionary of node types and node features. So given that we have the user and item features in the :ref:`heterogeneous graph example `. .. code:: python model = RGCN(n_hetero_features, 20, n_user_classes, hetero_graph.etypes) user_feats = hetero_graph.nodes['user'].data['feature'] item_feats = hetero_graph.nodes['item'].data['feature'] labels = hetero_graph.nodes['user'].data['label'] train_mask = hetero_graph.nodes['user'].data['train_mask'] One can simply perform a forward propagation as follows: .. code:: python node_features = {'user': user_feats, 'item': item_feats} h_dict = model(hetero_graph, {'user': user_feats, 'item': item_feats}) h_user = h_dict['user'] h_item = h_dict['item'] Training loop is the same as the one for homogeneous graph, except that now you have a dictionary of node representations from which you compute the predictions. For instance, if you are only predicting the ``user`` nodes, you can just extract the ``user`` node embeddings from the returned dictionary: .. code:: python opt = torch.optim.Adam(model.parameters()) for epoch in range(5): model.train() # forward propagation by using all nodes and extracting the user embeddings logits = model(hetero_graph, node_features)['user'] # compute loss loss = F.cross_entropy(logits[train_mask], labels[train_mask]) # Compute validation accuracy. Omitted in this example. # backward propagation opt.zero_grad() loss.backward() opt.step() print(loss.item()) # Save model if necessary. Omitted in the example. DGL provides an end-to-end example of `RGCN `__ for node classification. You can see the definition of heterogeneous graph convolution in ``RelGraphConvLayer`` in the `model implementation file `__. ================================================ FILE: docs/source/guide/training.rst ================================================ .. _guide-training: Chapter 5: Training Graph Neural Networks ===================================================== :ref:`(中文版) ` Overview -------- This chapter discusses how to train a graph neural network for node classification, edge classification, link prediction, and graph classification for small graph(s), by message passing methods introduced in :ref:`guide-message-passing` and neural network modules introduced in :ref:`guide-nn`. This chapter assumes that your graph as well as all of its node and edge features can fit into GPU; see :ref:`guide-minibatch` if they cannot. The following text assumes that the graph(s) and node/edge features are already prepared. If you plan to use the dataset DGL provides or other compatible ``DGLDataset`` as is described in :ref:`guide-data-pipeline`, you can get the graph for a single-graph dataset with something like .. code:: python import dgl dataset = dgl.data.CiteseerGraphDataset() graph = dataset[0] Note: In this chapter we will use PyTorch as backend. .. _guide-training-heterogeneous-graph-example: Heterogeneous Graphs ~~~~~~~~~~~~~~~~~~~~ Sometimes you would like to work on heterogeneous graphs. Here we take a synthetic heterogeneous graph as an example for demonstrating node classification, edge classification, and link prediction tasks. The synthetic heterogeneous graph ``hetero_graph`` has these edge types: - ``('user', 'follow', 'user')`` - ``('user', 'followed-by', 'user')`` - ``('user', 'click', 'item')`` - ``('item', 'clicked-by', 'user')`` - ``('user', 'dislike', 'item')`` - ``('item', 'disliked-by', 'user')`` .. code:: python import numpy as np import torch n_users = 1000 n_items = 500 n_follows = 3000 n_clicks = 5000 n_dislikes = 500 n_hetero_features = 10 n_user_classes = 5 n_max_clicks = 10 follow_src = np.random.randint(0, n_users, n_follows) follow_dst = np.random.randint(0, n_users, n_follows) click_src = np.random.randint(0, n_users, n_clicks) click_dst = np.random.randint(0, n_items, n_clicks) dislike_src = np.random.randint(0, n_users, n_dislikes) dislike_dst = np.random.randint(0, n_items, n_dislikes) hetero_graph = dgl.heterograph({ ('user', 'follow', 'user'): (follow_src, follow_dst), ('user', 'followed-by', 'user'): (follow_dst, follow_src), ('user', 'click', 'item'): (click_src, click_dst), ('item', 'clicked-by', 'user'): (click_dst, click_src), ('user', 'dislike', 'item'): (dislike_src, dislike_dst), ('item', 'disliked-by', 'user'): (dislike_dst, dislike_src)}) hetero_graph.nodes['user'].data['feature'] = torch.randn(n_users, n_hetero_features) hetero_graph.nodes['item'].data['feature'] = torch.randn(n_items, n_hetero_features) hetero_graph.nodes['user'].data['label'] = torch.randint(0, n_user_classes, (n_users,)) hetero_graph.edges['click'].data['label'] = torch.randint(1, n_max_clicks, (n_clicks,)).float() # randomly generate training masks on user nodes and click edges hetero_graph.nodes['user'].data['train_mask'] = torch.zeros(n_users, dtype=torch.bool).bernoulli(0.6) hetero_graph.edges['click'].data['train_mask'] = torch.zeros(n_clicks, dtype=torch.bool).bernoulli(0.6) Roadmap ------------ The chapter has four sections, each for one type of graph learning tasks. * :ref:`guide-training-node-classification` * :ref:`guide-training-edge-classification` * :ref:`guide-training-link-prediction` * :ref:`guide-training-graph-classification` * :ref:`guide-training-eweight` .. toctree:: :maxdepth: 1 :hidden: :glob: training-node training-edge training-link training-graph training-eweight ================================================ FILE: docs/source/guide_cn/data-dataset.rst ================================================ .. _guide_cn-data-pipeline-dataset: 4.1 DGLDataset类 -------------------- :ref:`(English Version) ` :class:`~dgl.data.DGLDataset` 是处理、导入和保存 :ref:`apidata` 中定义的图数据集的基类。 它实现了用于处理图数据的基本模版。下面的流程图展示了这个模版的工作方式。 .. figure:: https://data.dgl.ai/asset/image/userguide_data_flow.png :align: center 在类DGLDataset中定义的图数据处理模版的流程图。 为了处理位于远程服务器或本地磁盘上的图数据集,下面的例子中定义了一个类,称为 ``MyDataset``, 它继承自 :class:`dgl.data.DGLDataset`。 .. code:: from dgl.data import DGLDataset class MyDataset(DGLDataset): """ 用于在DGL中自定义图数据集的模板: Parameters ---------- url : str 下载原始数据集的url。 raw_dir : str 指定下载数据的存储目录或已下载数据的存储目录。默认: ~/.dgl/ save_dir : str 处理完成的数据集的保存目录。默认:raw_dir指定的值 force_reload : bool 是否重新导入数据集。默认:False verbose : bool 是否打印进度信息。 """ def __init__(self, url=None, raw_dir=None, save_dir=None, force_reload=False, verbose=False): super(MyDataset, self).__init__(name='dataset_name', url=url, raw_dir=raw_dir, save_dir=save_dir, force_reload=force_reload, verbose=verbose) def download(self): # 将原始数据下载到本地磁盘 pass def process(self): # 将原始数据处理为图、标签和数据集划分的掩码 pass def __getitem__(self, idx): # 通过idx得到与之对应的一个样本 pass def __len__(self): # 数据样本的数量 pass def save(self): # 将处理后的数据保存至 `self.save_path` pass def load(self): # 从 `self.save_path` 导入处理后的数据 pass def has_cache(self): # 检查在 `self.save_path` 中是否存有处理后的数据 pass :class:`~dgl.data.DGLDataset` 类有抽象函数 ``process()``, ``__getitem__(idx)`` 和 ``__len__()``。子类必须实现这些函数。同时DGL也建议实现保存和导入函数, 因为对于处理后的大型数据集,这么做可以节省大量的时间, 并且有多个已有的API可以简化此操作(请参阅 :ref:`guide_cn-data-pipeline-savenload`)。 请注意, :class:`~dgl.data.DGLDataset` 的目的是提供一种标准且方便的方式来导入图数据。 用户可以存储有关数据集的图、特征、标签、掩码,以及诸如类别数、标签数等基本信息。 诸如采样、划分或特征归一化等操作建议在 :class:`~dgl.data.DGLDataset` 子类之外完成。 本章的后续部分展示了实现这些函数的最佳实践。 ================================================ FILE: docs/source/guide_cn/data-download.rst ================================================ .. _guide_cn-data-pipeline-download: 4.2 下载原始数据(可选) -------------------------------- :ref:`(English Version) ` 如果用户的数据集已经在本地磁盘中,请确保它被存放在目录 ``raw_dir`` 中。 如果用户想在任何地方运行代码而又不想自己下载数据并将其移动到正确的目录中,则可以通过实现函数 ``download()`` 来自动完成。 如果数据集是一个zip文件,可以直接继承 :class:`dgl.data.DGLBuiltinDataset` 类。后者支持解压缩zip文件。 否则用户需要自己实现 ``download()``,具体可以参考 :class:`~dgl.data.QM7bDataset` 类: .. code:: import os from dgl.data.utils import download def download(self): # 存储文件的路径 file_path = os.path.join(self.raw_dir, self.name + '.mat') # 下载文件 download(self.url, path=file_path) 上面的代码将一个.mat文件下载到目录 ``self.raw_dir``。如果文件是.gz、.tar、.tar.gz或.tgz文件,请使用 :func:`~dgl.data.utils.extract_archive` 函数进行解压缩。以下代码展示了如何在 :class:`~dgl.data.BitcoinOTCDataset` 类中下载一个.gz文件: .. code:: from dgl.data.utils import download, check_sha1 def download(self): # 存储文件的路径,请确保使用与原始文件名相同的后缀 gz_file_path = os.path.join(self.raw_dir, self.name + '.csv.gz') # 下载文件 download(self.url, path=gz_file_path) # 检查 SHA-1 if not check_sha1(gz_file_path, self._sha1_str): raise UserWarning('File {} is downloaded but the content hash does not match.' 'The repo may be outdated or download may be incomplete. ' 'Otherwise you can create an issue for it.'.format(self.name + '.csv.gz')) # 将文件解压缩到目录self.raw_dir下的self.name目录中 self._extract_gz(gz_file_path, self.raw_path) 上面的代码会将文件解压缩到 ``self.raw_dir`` 下的目录 ``self.name`` 中。 如果该类继承自 :class:`dgl.data.DGLBuiltinDataset` 来处理zip文件, 则它也会将文件解压缩到目录 ``self.name`` 中。 一个可选项是用户可以按照上面的示例检查下载后文件的SHA-1字符串,以防作者在远程服务器上更改了文件。 ================================================ FILE: docs/source/guide_cn/data-loadogb.rst ================================================ .. _guide_cn-data-pipeline-loadogb: 4.5 使用ogb包导入OGB数据集 ---------------------------------------------- :ref:`(English Version) ` `Open Graph Benchmark (OGB) `__ 是一个图深度学习的基准数据集。 官方的 `ogb `__ 包提供了用于下载和处理OGB数据集到 :class:`dgl.data.DGLGraph` 对象的API。本节会介绍它们的基本用法。 首先使用pip安装ogb包: .. code:: pip install ogb 以下代码显示了如何为 *Graph Property Prediction* 任务加载数据集。 .. code:: # 载入OGB的Graph Property Prediction数据集 import dgl import torch from ogb.graphproppred import DglGraphPropPredDataset from dgl.dataloading import GraphDataLoader def _collate_fn(batch): # 小批次是一个元组(graph, label)列表 graphs = [e[0] for e in batch] g = dgl.batch(graphs) labels = [e[1] for e in batch] labels = torch.stack(labels, 0) return g, labels # 载入数据集 dataset = DglGraphPropPredDataset(name='ogbg-molhiv') split_idx = dataset.get_idx_split() # dataloader train_loader = GraphDataLoader(dataset[split_idx["train"]], batch_size=32, shuffle=True, collate_fn=_collate_fn) valid_loader = GraphDataLoader(dataset[split_idx["valid"]], batch_size=32, shuffle=False, collate_fn=_collate_fn) test_loader = GraphDataLoader(dataset[split_idx["test"]], batch_size=32, shuffle=False, collate_fn=_collate_fn) 加载 *Node Property Prediction* 数据集类似,但要注意的是这种数据集只有一个图对象。 .. code:: # 载入OGB的Node Property Prediction数据集 from ogb.nodeproppred import DglNodePropPredDataset dataset = DglNodePropPredDataset(name='ogbn-proteins') split_idx = dataset.get_idx_split() # there is only one graph in Node Property Prediction datasets # 在Node Property Prediction数据集里只有一个图 g, labels = dataset[0] # 获取划分的标签 train_label = dataset.labels[split_idx['train']] valid_label = dataset.labels[split_idx['valid']] test_label = dataset.labels[split_idx['test']] 每个 *Link Property Prediction* 数据集也只包括一个图。 .. code:: # 载入OGB的Link Property Prediction数据集 from ogb.linkproppred import DglLinkPropPredDataset dataset = DglLinkPropPredDataset(name='ogbl-ppa') split_edge = dataset.get_edge_split() graph = dataset[0] print(split_edge['train'].keys()) print(split_edge['valid'].keys()) print(split_edge['test'].keys()) ================================================ FILE: docs/source/guide_cn/data-process.rst ================================================ .. _guide_cn-data-pipeline-process: 4.3 处理数据 ---------------- :ref:`(English Version) ` 用户可以在 ``process()`` 函数中实现数据处理。该函数假定原始数据已经位于 ``self.raw_dir`` 目录中。 图上的机器学习任务通常有三种类型:整图分类、节点分类和链接预测。本节将展示如何处理与这些任务相关的数据集。 本节重点介绍了处理图、特征和划分掩码的标准方法。用户指南将以内置数据集为例,并跳过从文件构建图的实现。 用户可以参考 :ref:`guide_cn-graph-external` 以查看如何从外部数据源构建图的完整指南。 处理整图分类数据集 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 整图分类数据集与用小批次训练的典型机器学习任务中的大多数数据集类似。 因此,需要将原始数据处理为 :class:`dgl.DGLGraph` 对象的列表和标签张量的列表。 此外,如果原始数据已被拆分为多个文件,则可以添加参数 ``split`` 以导入数据的特定部分。 下面以 :class:`~dgl.data.QM7bDataset` 为例: .. code:: from dgl.data import DGLDataset class QM7bDataset(DGLDataset): _url = 'http://deepchem.io.s3-website-us-west-1.amazonaws.com/' \ 'datasets/qm7b.mat' _sha1_str = '4102c744bb9d6fd7b40ac67a300e49cd87e28392' def __init__(self, raw_dir=None, force_reload=False, verbose=False): super(QM7bDataset, self).__init__(name='qm7b', url=self._url, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose) def process(self): mat_path = self.raw_path + '.mat' # 将数据处理为图列表和标签列表 self.graphs, self.label = self._load_graph(mat_path) def __getitem__(self, idx): """ 通过idx获取对应的图和标签 Parameters ---------- idx : int Item index Returns ------- (dgl.DGLGraph, Tensor) """ return self.graphs[idx], self.label[idx] def __len__(self): """数据集中图的数量""" return len(self.graphs) 函数 ``process()`` 将原始数据处理为图列表和标签列表。用户必须实现 ``__getitem__(idx)`` 和 ``__len__()`` 以进行迭代。 DGL建议让 ``__getitem__(idx)`` 返回如上面代码所示的元组 ``(图,标签)``。 用户可以参考 `QM7bDataset源代码 `__ 以获得 ``self._load_graph()`` 和 ``__getitem__`` 的详细信息。 用户还可以向类添加属性以指示一些有用的数据集信息。在 :class:`~dgl.data.QM7bDataset` 中, 用户可以添加属性 ``num_tasks`` 来指示此多任务数据集中的预测任务总数: .. code:: @property def num_tasks(self): """每个图的标签数,即预测任务数。""" return 14 在编写完这些代码之后,用户可以按如下所示的方式来使用 :class:`~dgl.data.QM7bDataset`: .. code:: import dgl import torch from dgl.dataloading import GraphDataLoader # 数据导入 dataset = QM7bDataset() num_tasks = dataset.num_tasks # 创建 dataloaders dataloader = GraphDataLoader(dataset, batch_size=1, shuffle=True) # 训练 for epoch in range(100): for g, labels in dataloader: # 用户自己的训练代码 pass 训练整图分类模型的完整指南可以在 :ref:`guide_cn-training-graph-classification` 中找到。 有关整图分类数据集的更多示例,用户可以参考 :ref:`guide_cn-training-graph-classification`: * :ref:`gindataset` * :ref:`minigcdataset` * :ref:`qm7bdata` * :ref:`tudata` 处理节点分类数据集 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 与整图分类不同,节点分类通常在单个图上进行。因此数据集的划分是在图的节点集上进行。 DGL建议使用节点掩码来指定数据集的划分。 本节以内置数据集 `CitationGraphDataset `__ 为例: 此外,DGL推荐重新排列图的节点/边,使得相邻节点/边的ID位于邻近区间内。这个过程 可以提高节点/边的邻居的局部性,为后续在图上进行的计算与分析的性能改善提供可能。 DGL提供了名为 :func:`dgl.reorder_graph` 的API用于此优化。更多细节,请参考 下面例子中的 ``process()`` 的部分。 .. code:: from dgl.data import DGLBuiltinDataset from dgl.data.utils import _get_dgl_url class CitationGraphDataset(DGLBuiltinDataset): _urls = { 'cora_v2' : 'dataset/cora_v2.zip', 'citeseer' : 'dataset/citeseer.zip', 'pubmed' : 'dataset/pubmed.zip', } def __init__(self, name, raw_dir=None, force_reload=False, verbose=True): assert name.lower() in ['cora', 'citeseer', 'pubmed'] if name.lower() == 'cora': name = 'cora_v2' url = _get_dgl_url(self._urls[name]) super(CitationGraphDataset, self).__init__(name, url=url, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose) def process(self): # 跳过一些处理的代码 # === 跳过数据处理 === # 构建图 g = dgl.graph(graph) # 划分掩码 g.ndata['train_mask'] = train_mask g.ndata['val_mask'] = val_mask g.ndata['test_mask'] = test_mask # 节点的标签 g.ndata['label'] = torch.tensor(labels) # 节点的特征 g.ndata['feat'] = torch.tensor(_preprocess_features(features), dtype=F.data_type_dict['float32']) self._num_tasks = onehot_labels.shape[1] self._labels = labels # 重排图以获得更优的局部性 self._g = dgl.reorder_graph(g) def __getitem__(self, idx): assert idx == 0, "这个数据集里只有一个图" return self._g def __len__(self): return 1 为简便起见,这里省略了 ``process()`` 中的一些代码,以突出展示用于处理节点分类数据集的关键部分:划分掩码。 节点特征和节点的标签被存储在 ``g.ndata`` 中。详细的实现请参考 `CitationGraphDataset源代码 `__ 。 请注意,这里 ``__getitem__(idx)`` 和 ``__len__()`` 的实现也发生了变化, 这是因为节点分类任务通常只用一个图。掩码在PyTorch和TensorFlow中是bool张量,在MXNet中是float张量。 下面中使用 :class:`dgl.data.CitationGraphDataset` 的子类 :class:`dgl.data.CiteseerGraphDataset` 来演示如何使用用于节点分类的数据集: .. code:: # 导入数据 dataset = CiteseerGraphDataset(raw_dir='') graph = dataset[0] # 获取划分的掩码 train_mask = graph.ndata['train_mask'] val_mask = graph.ndata['val_mask'] test_mask = graph.ndata['test_mask'] # 获取节点特征 feats = graph.ndata['feat'] # 获取标签 labels = graph.ndata['label'] :ref:`guide_cn-training-node-classification` 提供了训练节点分类模型的完整指南。 有关节点分类数据集的更多示例,用户可以参考以下内置数据集: * :ref:`citationdata` * :ref:`corafulldata` * :ref:`amazoncobuydata` * :ref:`coauthordata` * :ref:`karateclubdata` * :ref:`ppidata` * :ref:`redditdata` * :ref:`sbmdata` * :ref:`sstdata` * :ref:`rdfdata` 处理链接预测数据集 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 链接预测数据集的处理与节点分类相似,数据集中通常只有一个图。 本节以内置的数据集 `KnowledgeGraphDataset `__ 为例,同时省略了详细的数据处理代码以突出展示处理链接预测数据集的关键部分: .. code:: # 创建链接预测数据集示例 class KnowledgeGraphDataset(DGLBuiltinDataset): def __init__(self, name, reverse=True, raw_dir=None, force_reload=False, verbose=True): self._name = name self.reverse = reverse url = _get_dgl_url('dataset/') + '{}.tgz'.format(name) super(KnowledgeGraphDataset, self).__init__(name, url=url, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose) def process(self): # 跳过一些处理的代码 # === 跳过数据处理 === # 划分掩码 g.edata['train_mask'] = train_mask g.edata['val_mask'] = val_mask g.edata['test_mask'] = test_mask # 边类型 g.edata['etype'] = etype # 节点类型 g.ndata['ntype'] = ntype self._g = g def __getitem__(self, idx): assert idx == 0, "这个数据集只有一个图" return self._g def __len__(self): return 1 如代码所示,图的 ``edata`` 存储了划分掩码。在 `KnowledgeGraphDataset 源代码 `__ 中可以查看完整的代码。下面使用 ``KnowledgeGraphDataset``的子类 :class:`dgl.data.FB15k237Dataset` 来做演示如何使用用于链路预测的数据集: .. code:: from dgl.data import FB15k237Dataset # 导入数据 dataset = FB15k237Dataset() graph = dataset[0] # 获取训练集掩码 train_mask = graph.edata['train_mask'] train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze() src, dst = graph.edges(train_idx) # 获取训练集中的边类型 rel = graph.edata['etype'][train_idx] 有关训练链接预测模型的完整指南,请参见 :ref:`guide_cn-training-link-prediction`。 有关链接预测数据集的更多示例,请参考DGL的内置数据集: * :ref:`kgdata` * :ref:`bitcoinotcdata` ================================================ FILE: docs/source/guide_cn/data-savenload.rst ================================================ .. _guide_cn-data-pipeline-savenload: 4.4 保存和加载数据 ---------------------- :ref:`(English Version) ` DGL建议用户实现保存和加载数据的函数,将处理后的数据缓存在本地磁盘中。 这样在多数情况下可以帮用户节省大量的数据处理时间。DGL提供了4个函数让任务变得简单。 - :func:`dgl.save_graphs` 和 :func:`dgl.load_graphs`: 保存DGLGraph对象和标签到本地磁盘和从本地磁盘读取它们。 - :func:`dgl.data.utils.save_info` 和 :func:`dgl.data.utils.load_info`: 将数据集的有用信息(python dict对象)保存到本地磁盘和从本地磁盘读取它们。 下面的示例显示了如何保存和读取图和数据集信息的列表。 .. code:: import os from dgl import save_graphs, load_graphs from dgl.data.utils import makedirs, save_info, load_info def save(self): # 保存图和标签 graph_path = os.path.join(self.save_path, self.mode + '_dgl_graph.bin') save_graphs(graph_path, self.graphs, {'labels': self.labels}) # 在Python字典里保存其他信息 info_path = os.path.join(self.save_path, self.mode + '_info.pkl') save_info(info_path, {'num_classes': self.num_classes}) def load(self): # 从目录 `self.save_path` 里读取处理过的数据 graph_path = os.path.join(self.save_path, self.mode + '_dgl_graph.bin') self.graphs, label_dict = load_graphs(graph_path) self.labels = label_dict['labels'] info_path = os.path.join(self.save_path, self.mode + '_info.pkl') self.num_classes = load_info(info_path)['num_classes'] def has_cache(self): # 检查在 `self.save_path` 里是否有处理过的数据文件 graph_path = os.path.join(self.save_path, self.mode + '_dgl_graph.bin') info_path = os.path.join(self.save_path, self.mode + '_info.pkl') return os.path.exists(graph_path) and os.path.exists(info_path) 请注意:有些情况下不适合保存处理过的数据。例如,在内置数据集 :class:`~dgl.data.GDELTDataset` 中, 处理过的数据比较大。所以这个时候,在 ``__getitem__(idx)`` 中处理每个数据实例是更高效的方法。 ================================================ FILE: docs/source/guide_cn/data.rst ================================================ .. _guide_cn-data-pipeline: 第4章:图数据处理管道 ============================== :ref:`(English Version) ` DGL在 :ref:`apidata` 里实现了很多常用的图数据集。它们遵循了由 :class:`dgl.data.DGLDataset` 类定义的标准的数据处理管道。 DGL推荐用户将图数据处理为 :class:`dgl.data.DGLDataset` 的子类。该类为导入、处理和保存图数据提供了简单而干净的解决方案。 本章路线图 ----------- 本章介绍了如何为用户自己的图数据创建一个DGL数据集。以下内容说明了管道的工作方式,并展示了如何实现管道的每个组件。 * :ref:`guide_cn-data-pipeline-dataset` * :ref:`guide_cn-data-pipeline-download` * :ref:`guide_cn-data-pipeline-process` * :ref:`guide_cn-data-pipeline-savenload` * :ref:`guide_cn-data-pipeline-loadogb` .. toctree:: :maxdepth: 1 :hidden: :glob: data-dataset data-download data-process data-savenload data-loadogb ================================================ FILE: docs/source/guide_cn/distributed-apis.rst ================================================ .. _guide_cn-distributed-apis: 7.2 分布式计算的API -------------------- :ref:`(English Version) ` 本节介绍了在训练脚本中使用的分布式计算API。DGL提供了三种分布式数据结构和多种API,用于初始化、分布式采样和数据分割。 对于分布式训练/推断,DGL提供了三种分布式数据结构:用于分布式图的 :class:`~dgl.distributed.DistGraph`、 用于分布式张量的 :class:`~dgl.distributed.DistTensor` 和用于分布式可学习嵌入的 :class:`~dgl.distributed.DistEmbedding`。 DGL分布式模块的初始化 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ :func:`~dgl.distributed.initialize` 可以用于初始化分布式模块。当训练脚本在训练器模式下运行时, 这个API会与DGL服务器建立连接并创建采样器进程。当脚本在服务器模式下运行时,这个API将运行服务器代码, 直到训练任务结束。必须在DGL的任何其他分布式API之前,调用此API。在使用PyTorch时,必须在 ``torch.distributed.init_process_group`` 之前调用 :func:`~dgl.distributed.initialize`。 通常,初始化API应按以下顺序调用: .. code:: python dgl.distributed.initialize('ip_config.txt') th.distributed.init_process_group(backend='gloo') **Note**: 如果训练脚本里包含需要在服务器(细节内容可以在下面的DistTensor和DistEmbedding章节里查看)上调用的用户自定义函数(UDF), 这些UDF必须在 :func:`~dgl.distributed.initialize` 之前被声明。 分布式图 ~~~~~~~~~~~~~~~~~ :class:`~dgl.distributed.DistGraph` 是一个Python类,用于访问计算机集群中的图结构和节点/边特征。每台计算机负责一个且只负责一个分区。 它加载分区数据(包括分区中的图结构、节点数据和边数据),并使集群中的所有训练器均可访问它们。 :class:`~dgl.distributed.DistGraph` 提供了一小部分 :class:`~dgl.DGLGraph` 的API以方便数据访问。 **Note**: :class:`~dgl.distributed.DistGraph` 当前仅支持一种节点类型和一种边类型的图。 分布式模式与独立模式 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ :class:`~dgl.distributed.DistGraph` 可以在两种模式下运行:分布式模式和独立模式。 当用户在Python命令行或Jupyter Notebook中执行训练脚本时,它将以独立模式运行。也就是说,它在单个进程中运行所有计算, 并且不与任何其他进程通信。因此,独立模式要求输入图仅具有一个分区。此模式主要用于开发和测试 (例如,在Jupyter Notebook中开发和运行代码)。当用户使用启动脚本执行训练脚本时(请参见启动脚本部分), :class:`~dgl.distributed.DistGraph` 将以分布式模式运行。启动脚本在后台启动服务器(包括访问节点/边特征和图采样), 并将分区数据自动加载到每台计算机中。:class:`~dgl.distributed.DistGraph` 与集群中的服务器连接并通过网络访问它们。 创建DistGraph ^^^^^^^^^^^^^^^^^^ 在分布式模式下,:class:`~dgl.distributed.DistGraph` 的创建需要(定义)在图划分期间的图名称。 图名称标识了集群中所需加载的图。 .. code:: python import dgl g = dgl.distributed.DistGraph('graph_name') 在独立模式下运行时,DistGraph将图数据加载到本地计算机中。因此,用户需要提供分区配置文件,其中包含有关输入图的所有信息。 .. code:: python import dgl g = dgl.distributed.DistGraph('graph_name', part_config='data/graph_name.json') **Note**: 在当前实现中,DGL仅允许创建单个DistGraph对象。销毁DistGraph并创建一个新DistGraph的行为没有被定义。 访问图结构 ^^^^^^^^^^^^^^^^^^^^^^ :class:`~dgl.distributed.DistGraph` 提供了几个API来访问图结构。当前,它们主要被用来提供图信息,例如节点和边的数量。 主要应用场景是运行采样API以支持小批量训练(请参阅下文里分布式图采样部分)。 .. code:: python print(g.num_nodes()) 访问节点/边数据 ^^^^^^^^^^^^^^^^^^^^^ 与 :class:`~dgl.DGLGraph` 一样, :class:`~dgl.distributed.DistGraph` 也提供了 ``ndata`` 和 ``edata`` 来访问节点和边中的数据。它们的区别在于 :class:`~dgl.distributed.DistGraph` 中的 ``ndata`` / ``edata`` 返回的是 :class:`~dgl.distributed.DistTensor`, 而不是底层框架里的张量。用户还可以将新的 :class:`~dgl.distributed.DistTensor` 分配给 :class:`~dgl.distributed.DistGraph` 作为节点数据或边数据。 .. code:: python g.ndata['train_mask'] g.ndata['train_mask'][0] tensor([1], dtype=torch.uint8) 分布式张量 ~~~~~~~~~~~~~~~~~ 如前所述,在分布式模式下,DGL会划分节点和边特征,并将它们存储在计算机集群中。 DGL为分布式张量提供了类似于单机普通张量的接口,以访问群集中的分区节点和边特征。 在分布式设置中,DGL仅支持密集节点和边特征,暂不支持稀疏节点和边特征。 :class:`~dgl.distributed.DistTensor` 管理在多个计算机中被划分和存储的密集张量。 目前,分布式张量必须与图的节点或边相关联。换句话说,DistTensor中的行数必须与图中的节点数或边数相同。 以下代码创建一个分布式张量。 除了张量的形状和数据类型之外,用户还可以提供唯一的张量名称。 如果用户要引用一个固定的分布式张量(即使 :class:`~dgl.distributed.DistTensor` 对象消失,该名称仍存在于群集中), 则(使用这样的)名称就很有用。 .. code:: python tensor = dgl.distributed.DistTensor((g.num_nodes(), 10), th.float32, name='test') **Note**: :class:`~dgl.distributed.DistTensor` 的创建是一个同步操作。所有训练器都必须调用创建, 并且只有当所有训练器都调用它时,此创建过程才能成功。 用户可以将 :class:`~dgl.distributed.DistTensor` 作为节点数据或边数据之一添加到 :class:`~dgl.distributed.DistGraph` 对象。 .. code:: python g.ndata['feat'] = tensor **Note**: 节点数据名称和张量名称不必相同。前者在 :class:`~dgl.distributed.DistGraph` 中标识节点数据(在训练器进程中), 而后者则标识DGL服务器中的分布式张量。 :class:`~dgl.distributed.DistTensor` 提供了一些功能。它具有与常规张量相同的API,用于访问其元数据, 例如形状和数据类型。:class:`~dgl.distributed.DistTensor` 支持索引读取和写入, 但不支持一些计算运算符,例如求和以及求均值。 .. code:: python data = g.ndata['feat'][[1, 2, 3]] print(data) g.ndata['feat'][[3, 4, 5]] = data **Note**: 当前,当一台机器运行多个服务器时,DGL不提供对来自多个训练器的并发写入的保护。 这可能会导致数据损坏。 分布式嵌入 ~~~~~~~~~~~~~~~~~~~~~ DGL提供 :class:`~dgl.distributed.DistEmbedding` 以支持需要节点嵌入的直推(transductive)模型。 分布式嵌入的创建与分布式张量的创建非常相似。 .. code:: python def initializer(shape, dtype): arr = th.zeros(shape, dtype=dtype) arr.uniform_(-1, 1) return arr emb = dgl.distributed.DistEmbedding(g.num_nodes(), 10, init_func=initializer) 在内部,分布式嵌入建立在分布式张量之上,因此,其行为与分布式张量非常相似。 例如,创建嵌入时,DGL会将它们分片并存储在集群中的所有计算机上。(分布式嵌入)可以通过名称唯一标识。 **Note**: 服务器进程负责调用初始化函数。因此,必须在初始化( :class:`~dgl.distributed.initialize` )之前声明分布式嵌入。 因为嵌入是模型的一部分,所以用户必须将其附加到优化器上以进行小批量训练。当前, DGL提供了一个稀疏的Adagrad优化器 :class:`~dgl.distributed.SparseAdagrad` (DGL以后将为稀疏嵌入添加更多的优化器)。 用户需要从模型中收集所有分布式嵌入,并将它们传递给稀疏优化器。如果模型同时具有节点嵌入和规则的密集模型参数, 并且用户希望对嵌入执行稀疏更新,则需要创建两个优化器,一个用于节点嵌入,另一个用于密集模型参数,如以下代码所示: .. code:: python sparse_optimizer = dgl.distributed.SparseAdagrad([emb], lr=lr1) optimizer = th.optim.Adam(model.parameters(), lr=lr2) feats = emb(nids) loss = model(feats) loss.backward() optimizer.step() sparse_optimizer.step() **Note**: :class:`~dgl.distributed.DistEmbedding` 不是PyTorch的nn模块,因此用户无法从nn模块的参数访问它。 分布式采样 ~~~~~~~~~~~~~~~~~~~~ DGL提供了两个级别的API,用于对节点和边进行采样以生成小批次训练数据(请参阅小批次训练的章节)。 底层API要求用户编写代码以明确定义如何对节点层进行采样(例如,使用 :func:`dgl.sampling.sample_neighbors` )。 高层采样API为节点分类和链接预测任务实现了一些流行的采样算法(例如 :class:`~dgl.dataloading.pytorch.NodeDataLoader` 和 :class:`~dgl.dataloading.pytorch.EdgeDataLoader` )。 分布式采样模块遵循相同的设计,也提供两个级别的采样API。对于底层的采样API,它为 :class:`~dgl.distributed.DistGraph` 上的分布式邻居采样提供了 :func:`~dgl.distributed.sample_neighbors`。另外,DGL提供了用于分布式采样的分布式数据加载器( :class:`~dgl.distributed.DistDataLoader`)。除了用户在创建数据加载器时无法指定工作进程的数量, 分布式数据加载器具有与PyTorch DataLoader相同的接口。其中的工作进程(worker)在 :func:`dgl.distributed.initialize` 中创建。 **Note**: 在 :class:`~dgl.distributed.DistGraph` 上运行 :func:`dgl.distributed.sample_neighbors` 时, 采样器无法在具有多个工作进程的PyTorch DataLoader中运行。主要原因是PyTorch DataLoader在每个训练周期都会创建新的采样工作进程, 从而导致多次创建和删除 :class:`~dgl.distributed.DistGraph` 对象。 使用底层API时,采样代码类似于单进程采样。唯一的区别是用户需要使用 :func:`dgl.distributed.sample_neighbors` 和 :class:`~dgl.distributed.DistDataLoader`。 .. code:: python def sample_blocks(seeds): seeds = th.LongTensor(np.asarray(seeds)) blocks = [] for fanout in [10, 25]: frontier = dgl.distributed.sample_neighbors(g, seeds, fanout, replace=True) block = dgl.to_block(frontier, seeds) seeds = block.srcdata[dgl.NID] blocks.insert(0, block) return blocks dataloader = dgl.distributed.DistDataLoader(dataset=train_nid, batch_size=batch_size, collate_fn=sample_blocks, shuffle=True) for batch in dataloader: ... :class:`~dgl.dataloading.pytorch.NodeDataLoader` 和 :class:`~dgl.dataloading.pytorch.EdgeDataLoader` 有分布式的版本 :class:`~dgl.dataloading.pytorch.DistNodeDataLoader` 和 :class:`~dgl.dataloading.pytorch.DistEdgeDataLoader` 。使用 时分布式采样代码与单进程采样几乎完全相同。 .. code:: python sampler = dgl.sampling.MultiLayerNeighborSampler([10, 25]) dataloader = dgl.sampling.DistNodeDataLoader(g, train_nid, sampler, batch_size=batch_size, shuffle=True) for batch in dataloader: ... 分割数据集 ~~~~~~~~~~~~~~~ 用户需要分割训练集,以便每个训练器都可以使用自己的训练集子集。同样,用户还需要以相同的方式分割验证和测试集。 对于分布式训练和评估,推荐的方法是使用布尔数组表示训练、验证和测试集。对于节点分类任务, 这些布尔数组的长度是图中节点的数量,并且它们的每个元素都表示训练/验证/测试集中是否存在对应节点。 链接预测任务也应使用类似的布尔数组。 DGL提供了 :func:`~dgl.distributed.node_split` 和 :func:`~dgl.distributed.edge_split` 函数来在运行时拆分训练、验证和测试集,以进行分布式训练。这两个函数将布尔数组作为输入,对其进行拆分,并向本地训练器返回一部分。 默认情况下,它们确保所有部分都具有相同数量的节点和边。这对于同步SGD非常重要, 因为同步SGD会假定每个训练器具有相同数量的小批次。 下面的示例演示了训练集拆分,并向本地进程返回节点的子集。 .. code:: python train_nids = dgl.distributed.node_split(g.ndata['train_mask']) ================================================ FILE: docs/source/guide_cn/distributed-preprocessing.rst ================================================ .. _guide_cn-distributed-preprocessing: 7.1 分布式训练所需的图数据预处理 ------------------------------------------ :ref:`(English Version) ` DGL要求预处理图数据以进行分布式训练,这包括两个步骤:1)将一张图划分为多张子图(分区),2)为节点和边分配新的ID。 DGL提供了一个API以执行这两个步骤。该API支持随机划分和一个基于 `Metis `__ 的划分。Metis划分的好处在于, 它可以用最少的边分割以生成分区,从而减少了用于分布式训练和推理的网络通信。DGL使用最新版本的Metis, 并针对真实世界中具有幂律分布的图进行了优化。在图划分后,API以易于在训练期间加载的格式构造划分结果。 **Note**: 图划分API当前在一台机器上运行。 因此如果一张图很大,用户将需要一台大内存的机器来对图进行划分。 未来DGL将支持分布式图划分。 默认情况下,为了在分布式训练/推理期间定位节点/边,API将新ID分配给输入图的节点和边。 分配ID后,该API会相应地打乱所有节点数据和边数据。在训练期间,用户只需使用新的节点和边的ID。 与此同时,用户仍然可以通过 ``g.ndata['orig_id']`` 和 ``g.edata['orig_id']`` 获取原始ID。 其中 ``g`` 是 ``DistGraph`` 对象(详细解释,请参见:ref:`guide-distributed-apis`)。 DGL将图划分结果存储在输出目录中的多个文件中。输出目录里始终包含一个名为xxx.json的JSON文件,其中xxx是提供给划分API的图的名称。 JSON文件包含所有划分的配置。如果该API没有为节点和边分配新ID,它将生成两个额外的NumPy文件:`node_map.npy` 和 `edge_map.npy`。 它们存储节点和边ID与分区ID之间的映射。对于具有十亿级数量节点和边的图,两个文件中的NumPy数组会很大, 这是因为图中的每个节点和边都对应一个条目。在每个分区的文件夹内,有3个文件以DGL格式存储分区数据。 `graph.dgl` 存储分区的图结构以及节点和边上的一些元数据。`node_feats.dgl` 和 `edge_feats.dgl` 存储属于该分区的节点和边的所有特征。 .. code-block:: none data_root_dir/ |-- xxx.json # JSON中的分区配置文件 |-- node_map.npy # 存储在NumPy数组中的每个节点的分区ID(可选) |-- edge_map.npy # 存储在NumPy数组中的每个边的分区ID(可选) |-- part0/ # 分区0的数据 |-- node_feats.dgl # 以二进制格式存储的节点特征 |-- edge_feats.dgl # 以二进制格式存储的边特征 |-- graph.dgl # 以二进制格式存储的子图结构 |-- part1/ # 分区1的数据 |-- node_feats.dgl |-- edge_feats.dgl |-- graph.dgl 负载均衡 ~~~~~~~~~~~~~~ 在对图进行划分时,默认情况下,Metis仅平衡每个子图中的节点数。根据当前的任务情况,这可能带来非最优的配置。 例如,在半监督节点分类的场景里,训练器会对局部分区中带标签节点的子集进行计算。 一个仅平衡图中节点(带标签和未带标签)的划分可能会导致计算负载不平衡。为了在每个分区中获得平衡的工作负载, 划分API通过在 :func:`dgl.distributed.partition_graph` 中指定 ``balance_ntypes`` 在每个节点类型中的节点数上实现分区间的平衡。用户可以利用这一点将训练集、验证集和测试集中的节点看作不同类型的节点。 以下示例将训练集内和训练集外的节点看作两种类型的节点: .. code:: python dgl.distributed.partition_graph(g, 'graph_name', 4, '/tmp/test', balance_ntypes=g.ndata['train_mask']) 除了平衡节点的类型之外, :func:`dgl.distributed.partition_graph` 还允许通过指定 ``balance_edges`` 来平衡每个类型节点在子图中的入度。这平衡了不同类型节点的连边数量。 **Note**: 传给 :func:`dgl.distributed.partition_graph` 的图名称是一个重要的参数。 :class:`dgl.distributed.DistGraph` 使用该名称来识别一个分布式的图。一个有效的图名称应该仅包含字母和下划线。 ================================================ FILE: docs/source/guide_cn/distributed-tools.rst ================================================ .. _guide_cn-distributed-tools: 7.3 运行分布式训练/推断所需的工具 ------------------------------------------------------ :ref:`(English Version) ` DGL提供了两个脚本来帮助用户进行分布式训练: * *tools/copy_files.py* 用于将图分区复制到集群, * *tools/launch.py* 用于在机器集群中启动分布式训练任务。 *copy_files.py* 将计算机(对图进行分区的计算机)中的分区数据和相关文件(例如,训练脚本) 复制到(负责分布式训练的)机器集群上。在这些机器上,分布式训练将需要用到这些分区。该脚本包含四个参数: * ``--part_config`` 指定分区配置文件,该文件包含本地计算机中分区数据的信息。 * ``--ip_config`` 指定集群的IP配置文件。 * ``--workspace`` 指定训练机器中存储与分布式训练有关的所有数据的目录。 * ``--rel_data_path`` 指定工作空间目录下存储分区数据的相对路径。 * ``--script_folder`` 指定工作空间目录下存储用户的训练脚本的相对路径。 **Note**: *copy_files.py* 会根据IP配置文件找到对应的计算机来存储图分区。因此,copy_files.py和launch.py应该使用相同的IP配置文件。 DGL提供了用于启动集群中的分布式训练任务的tools/launch.py。该脚本有以下假设: * 分区数据和训练脚本都已被复制到集群或存在集群中所有计算机均可访问的全局存储空间(例如NFS)。 * 主计算机(执行启动脚本的计算机)具有对集群内所有其他计算机的无密码ssh访问权限。 **Note**: 必须在集群中的一台计算机上调用启动脚本。 下面展示了在集群中启动分布式训练任务的示例。 .. code:: none python3 tools/launch.py \ --workspace ~graphsage/ \ --num_trainers 2 \ --num_samplers 4 \ --num_servers 1 \ --part_config data/ogb-product.json \ --ip_config ip_config.txt \ "python3 code/train_dist.py --graph-name ogb-product --ip_config ip_config.txt --num-epochs 5 --batch-size 1000 --lr 0.1 --num_workers 4" 配置文件 *ip_config.txt* 包含了集群中计算机的IP地址。*ip_config.txt* 的典型示例如下: .. code:: none 172.31.19.1 172.31.23.205 172.31.29.175 172.31.16.98 每行是一个计算机的IP地址。IP地址后面还可以有一个端口,用来指定不同训练器之间的网络通信所使用的端口。 如果未提供具体端口,则默认值为 ``30050``。 启动脚本中指定的工作空间(--workspace)是计算机中的工作目录,里面保存了训练脚本、IP配置文件、分区配置文件以及图分区。 文件的所有路径都应指定为工作空间的相对路径。 启动脚本会在每台计算机上创建指定数量的训练任务(``--num_trainers``)。另外, 用户需要为每个训练器指定采样器进程的数量(``--num_samplers``)。 采样器进程的数量必须匹配 :func:`~dgl.distributed.initialize` 中指定的工作进程的数量。 ================================================ FILE: docs/source/guide_cn/distributed.rst ================================================ .. _guide_cn-distributed: 第7章:分布式训练 ===================================== :ref:`(English Version) ` DGL采用完全分布式的方法,可将数据和计算同时分布在一组计算资源中。在本节中, 我们默认使用一个集群的环境设置(即一组机器)。DGL会将一张图划分为多张子图, 集群中的每台机器各自负责一张子图(分区)。为了并行化计算,DGL在集群所有机器上运行相同的训练脚本, 并在同样的机器上运行服务器以将分区数据提供给训练器。 对于训练脚本,DGL提供了分布式的API。它们与小批次训练的API相似。用户仅需对单机小批次训练的代码稍作修改就可实现分布式训练。 以下代码给出了一个用分布式方式训练GraphSage的示例。仅有的代码修改出现在第4-7行:1)初始化DGL的分布式模块,2)创建分布式图对象,以及 3)拆分训练集,并计算本地进程的节点。其余代码保持不变,与 :ref:`mini_cn-batch training ` 类似, 包括:创建采样器,模型定义,模型训练的循环。 .. code:: python import dgl import torch as th dgl.distributed.initialize('ip_config.txt') th.distributed.init_process_group(backend='gloo') g = dgl.distributed.DistGraph('graph_name', 'part_config.json') pb = g.get_partition_book() train_nid = dgl.distributed.node_split(g.ndata['train_mask'], pb, force_even=True) # 创建采样器 sampler = NeighborSampler(g, [10,25], dgl.distributed.sample_neighbors, device) dataloader = DistDataLoader( dataset=train_nid.numpy(), batch_size=batch_size, collate_fn=sampler.sample_blocks, shuffle=True, drop_last=False) # 定义模型和优化器 model = SAGE(in_feats, num_hidden, n_classes, num_layers, F.relu, dropout) model = th.nn.parallel.DistributedDataParallel(model) loss_fcn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.lr) # 模型训练的循环 for epoch in range(args.num_epochs): for step, blocks in enumerate(dataloader): batch_inputs, batch_labels = load_subtensor(g, blocks[0].srcdata[dgl.NID], blocks[-1].dstdata[dgl.NID]) batch_pred = model(blocks, batch_inputs) loss = loss_fcn(batch_pred, batch_labels) optimizer.zero_grad() loss.backward() optimizer.step() 在一个集群的机器上运行训练脚本时,DGL提供了一些工具,可将数据复制到集群的计算机上,并在所有机器上启动训练任务。 **Note**: 当前版本的分布式训练API仅支持PyTorch后端。 **Note**: 当前版本的实现仅支持具有一种节点类型和一种边类型的图。 DGL实现了一些分布式组件以支持分布式训练,下图显示了这些组件及它们间的相互作用。 .. figure:: https://data.dgl.ai/asset/image/distributed.png :alt: Imgur 具体来说,DGL的分布式训练具有三种类型的交互进程: *服务器*, *采样器* 和 *训练器*。 * *服务器进程* 在存储图分区数据(这包括图结构和节点/边特征)的每台计算机上运行。 这些服务器一起工作以将图数据提供给训练器。请注意,一台机器可能同时运行多个服务器进程,以并行化计算和网络通信。 * *采样器进程* 与服务器进行交互,并对节点和边采样以生成用于训练的小批次数据。 * *训练器进程* 包含多个与服务器交互的类。它用 :class:`~dgl.distributed.DistGraph` 来获取被划分的图分区数据, 用 :class:`~dgl.distributed.DistEmbedding` 和 :class:`~dgl.distributed.DistTensor` 来获取节点/边特征/嵌入,用 :class:`~dgl.distributed.dist_dataloader.DistDataLoader` 与采样器进行交互以获得小批次数据。 在初步了解了分布式组件后,本章的剩余部分将介绍以下分布式组件: * :ref:`guide_cn-distributed-preprocessing` * :ref:`guide_cn-distributed-apis` * :ref:`guide_cn-distributed-tools` .. toctree:: :maxdepth: 1 :hidden: :glob: distributed-preprocessing distributed-apis distributed-tools ================================================ FILE: docs/source/guide_cn/graph-basic.rst ================================================ .. _guide_cn-graph-basic: 1.1 关于图的基本概念 ----------------- :ref:`(English Version) ` 图是用以表示实体及其关系的结构,记为 :math:`G=(V, E)` 。图由两个集合组成,一是节点的集合 :math:`V` ,一个是边的集合 :math:`E` 。 在边集 :math:`E` 中,一条边 :math:`(u, v)` 连接一对节点 :math:`u` 和 :math:`v` ,表明两节点间存在关系。关系可以是无向的, 如描述节点之间的对称关系;也可以是有向的,如描述非对称关系。例如,若用图对社交网络中人们的友谊关系进行建模,因为友谊是相互的,则边是无向的; 若用图对Twitter用户的关注行为进行建模,则边是有向的。图可以是 *有向的* 或 *无向的* ,这取决于图中边的方向性。 图可以是 *加权的* 或 *未加权的* 。在加权图中,每条边都与一个标量权重值相关联。例如,该权重可以表示长度或连接的强度。 图可以是 *同构的* 或是 *异构的* 。在同构图中,所有节点表示同一类型的实体,所有边表示同一类型的关系。 例如,社交网络的图由表示同一实体类型的人及其相互之间的社交关系组成。 相对地,在异构图中,节点和边的类型可以是不同的。例如,编码市场的图可以有表示"顾客"、"商家"和"商品"的节点, 它们通过“想购买”、“已经购买”、“是顾客”和“正在销售”的边互相连接。二分图是一类特殊的、常用的异构图, 其中的边连接两类不同类型的节点。例如,在推荐系统中,可以使用二分图表示"用户"和"物品"之间的关系。想了解更多信息,读者可参考 :ref:`guide_cn-graph-heterogeneous`。 在多重图中,同一对节点之间可以有多条(有向)边,包括自循环的边。例如,两名作者可以在不同年份共同署名文章, 这就带来了具有不同特征的多条边。 ================================================ FILE: docs/source/guide_cn/graph-external.rst ================================================ .. _guide_cn-graph-external: 1.4 从外部源创建图 --------------- :ref:`(English Version)` 可以从外部来源构造一个 :class:`~dgl.DGLGraph` 对象,包括: - 从用于图和稀疏矩阵的外部Python库(NetworkX 和 SciPy)创建而来。 - 从磁盘加载图数据。 本节不涉及通过转换其他图来生成图的函数,相关概述请阅读API参考手册。 从外部库创建图 ^^^^^^^^^^^ 以下代码片段为从SciPy稀疏矩阵和NetworkX图创建DGL图的示例。 .. code:: >>> import dgl >>> import torch as th >>> import scipy.sparse as sp >>> spmat = sp.rand(100, 100, density=0.05) # 5%非零项 >>> dgl.from_scipy(spmat) # 来自SciPy Graph(num_nodes=100, num_edges=500, ndata_schemes={} edata_schemes={}) >>> import networkx as nx >>> nx_g = nx.path_graph(5) # 一条链路0-1-2-3-4 >>> dgl.from_networkx(nx_g) # 来自NetworkX Graph(num_nodes=5, num_edges=8, ndata_schemes={} edata_schemes={}) 注意,当使用 `nx.path_graph(5)` 进行创建时, :class:`~dgl.DGLGraph` 对象有8条边,而非4条。 这是由于 `nx.path_graph(5)` 构建了一个无向的NetworkX图 :class:`networkx.Graph` ,而 :class:`~dgl.DGLGraph` 的边总是有向的。 所以当将无向的NetworkX图转换为 :class:`~dgl.DGLGraph` 对象时,DGL会在内部将1条无向边转换为2条有向边。 使用有向的NetworkX图 :class:`networkx.DiGraph` 可避免该行为。 .. code:: >>> nxg = nx.DiGraph([(2, 1), (1, 2), (2, 3), (0, 0)]) >>> dgl.from_networkx(nxg) Graph(num_nodes=4, num_edges=4, ndata_schemes={} edata_schemes={}) .. note:: DGL在内部将SciPy矩阵和NetworkX图转换为张量来创建图。因此,这些构建方法并不适用于重视性能的场景。 相关API: :func:`dgl.from_scipy`、 :func:`dgl.from_networkx`。 从磁盘加载图 ^^^^^^^^^^ 有多种文件格式可储存图,所以这里难以枚举所有选项。本节仅给出一些常见格式的一般情况。 逗号分隔值(CSV) """""""""""""" CSV是一种常见的格式,以表格格式储存节点、边及其特征: .. table:: nodes.csv +-----------+ |age, title | +===========+ |43, 1 | +-----------+ |23, 3 | +-----------+ |... | +-----------+ .. table:: edges.csv +-----------------+ |src, dst, weight | +=================+ |0, 1, 0.4 | +-----------------+ |0, 3, 0.9 | +-----------------+ |... | +-----------------+ 许多知名Python库(如Pandas)可以将该类型数据加载到python对象(如 :class:`numpy.ndarray`)中, 进而使用这些对象来构建DGLGraph对象。如果后端框架也提供了从磁盘中保存或加载张量的工具(如 :func:`torch.save`, :func:`torch.load` ), 可以遵循相同的原理来构建图。 另见: `从成对的边 CSV 文件中加载 Karate Club Network 的教程 `_。 JSON/GML 格式 """""""""""" 如果对速度不太关注的话,读者可以使用NetworkX提供的工具来解析 `各种数据格式 `_, DGL可以间接地从这些来源创建图。 DGL 二进制格式 """""""""""" DGL提供了API以从磁盘中加载或向磁盘里保存二进制格式的图。除了图结构,API也能处理特征数据和图级别的标签数据。 DGL也支持直接从S3/HDFS中加载或向S3/HDFS保存图。参考手册提供了该用法的更多细节。 相关API: :func:`dgl.save_graphs`、 :func:`dgl.load_graphs`。 ================================================ FILE: docs/source/guide_cn/graph-feature.rst ================================================ .. _guide_cn-graph-feature: 1.3 节点和边的特征 --------------- :ref:`(English Version)` :class:`~dgl.DGLGraph` 对象的节点和边可具有多个用户定义的、可命名的特征,以储存图的节点和边的属性。 通过 :py:attr:`~dgl.DGLGraph.ndata` 和 :py:attr:`~dgl.DGLGraph.edata` 接口可访问这些特征。 例如,以下代码创建了2个节点特征(分别在第8、15行命名为 ``'x'`` 、 ``'y'`` )和1个边特征(在第9行命名为 ``'x'`` )。 .. code-block:: python :linenos: >>> import dgl >>> import torch as th >>> g = dgl.graph(([0, 0, 1, 5], [1, 2, 2, 0])) # 6个节点,4条边 >>> g Graph(num_nodes=6, num_edges=4, ndata_schemes={} edata_schemes={}) >>> g.ndata['x'] = th.ones(g.num_nodes(), 3) # 长度为3的节点特征 >>> g.edata['x'] = th.ones(g.num_edges(), dtype=th.int32) # 标量整型特征 >>> g Graph(num_nodes=6, num_edges=4, ndata_schemes={'x' : Scheme(shape=(3,), dtype=torch.float32)} edata_schemes={'x' : Scheme(shape=(,), dtype=torch.int32)}) >>> # 不同名称的特征可以具有不同形状 >>> g.ndata['y'] = th.randn(g.num_nodes(), 5) >>> g.ndata['x'][1] # 获取节点1的特征 tensor([1., 1., 1.]) >>> g.edata['x'][th.tensor([0, 3])] # 获取边0和3的特征 tensor([1, 1], dtype=torch.int32) 关于 :py:attr:`~dgl.DGLGraph.ndata` 和 :py:attr:`~dgl.DGLGraph.edata` 接口的重要说明: - 仅允许使用数值类型(如单精度浮点型、双精度浮点型和整型)的特征。这些特征可以是标量、向量或多维张量。 - 每个节点特征具有唯一名称,每个边特征也具有唯一名称。节点和边的特征可以具有相同的名称(如上述示例代码中的 ``'x'`` )。 - 通过张量分配创建特征时,DGL会将特征赋给图中的每个节点和每条边。该张量的第一维必须与图中节点或边的数量一致。 不能将特征赋给图中节点或边的子集。 - 相同名称的特征必须具有相同的维度和数据类型。 - 特征张量使用"行优先"的原则,即每个行切片储存1个节点或1条边的特征(参考上述示例代码的第16和18行)。 对于加权图,用户可以将权重储存为一个边特征,如下。 .. code-block:: python >>> # 边 0->1, 0->2, 0->3, 1->3 >>> edges = th.tensor([0, 0, 0, 1]), th.tensor([1, 2, 3, 3]) >>> weights = th.tensor([0.1, 0.6, 0.9, 0.7]) # 每条边的权重 >>> g = dgl.graph(edges) >>> g.edata['w'] = weights # 将其命名为 'w' >>> g Graph(num_nodes=4, num_edges=4, ndata_schemes={} edata_schemes={'w' : Scheme(shape=(,), dtype=torch.float32)}) 相关API: :py:attr:`~dgl.DGLGraph.ndata`、 :py:attr:`~dgl.DGLGraph.edata`。 ================================================ FILE: docs/source/guide_cn/graph-gpu.rst ================================================ .. _guide_cn-graph-gpu: 1.6 在GPU上使用DGLGraph ---------------------- :ref:`(English Version)` 用户可以通过在构造过程中传入两个GPU张量来创建GPU上的 :class:`~dgl.DGLGraph` 。 另一种方法是使用 :func:`~dgl.DGLGraph.to` API将 :class:`~dgl.DGLGraph` 复制到GPU,这会将图结构和特征数据都拷贝到指定的设备。 .. code:: >>> import dgl >>> import torch as th >>> u, v = th.tensor([0, 1, 2]), th.tensor([2, 3, 4]) >>> g = dgl.graph((u, v)) >>> g.ndata['x'] = th.randn(5, 3) # 原始特征在CPU上 >>> g.device device(type='cpu') >>> cuda_g = g.to('cuda:0') # 接受来自后端框架的任何设备对象 >>> cuda_g.device device(type='cuda', index=0) >>> cuda_g.ndata['x'].device # 特征数据也拷贝到了GPU上 device(type='cuda', index=0) >>> # 由GPU张量构造的图也在GPU上 >>> u, v = u.to('cuda:0'), v.to('cuda:0') >>> g = dgl.graph((u, v)) >>> g.device device(type='cuda', index=0) 任何涉及GPU图的操作都是在GPU上运行的。因此,这要求所有张量参数都已经放在GPU上,其结果(图或张量)也将在GPU上。 此外,GPU图只接受GPU上的特征数据。 .. code:: >>> cuda_g.in_degrees() tensor([0, 0, 1, 1, 1], device='cuda:0') >>> cuda_g.in_edges([2, 3, 4]) # 可以接受非张量类型的参数 (tensor([0, 1, 2], device='cuda:0'), tensor([2, 3, 4], device='cuda:0')) >>> cuda_g.in_edges(th.tensor([2, 3, 4]).to('cuda:0')) # 张量类型的参数必须在GPU上 (tensor([0, 1, 2], device='cuda:0'), tensor([2, 3, 4], device='cuda:0')) >>> cuda_g.ndata['h'] = th.randn(5, 4) # ERROR! 特征也必须在GPU上! DGLError: Cannot assign node feature "h" on device cpu to a graph on device cuda:0. Call DGLGraph.to() to copy the graph to the same device. ================================================ FILE: docs/source/guide_cn/graph-graphs-nodes-edges.rst ================================================ .. _guide_cn-graph-graphs-nodes-edges: 1.2 图、节点和边 -------------- :ref:`(English Version)` DGL使用一个唯一的整数来表示一个节点,称为点ID;并用对应的两个端点ID表示一条边。同时,DGL也会根据边被添加的顺序, 给每条边分配一个唯一的整数编号,称为边ID。节点和边的ID都是从0开始构建的。在DGL的图里,所有的边都是有方向的, 即边 :math:`(u, v)` 表示它是从节点 :math:`u` 指向节点 :math:`v` 的。 对于多个节点,DGL使用一个一维的整型张量(如,PyTorch的Tensor类,TensorFlow的Tensor类或MXNet的ndarray类)来保存图的点ID, DGL称之为"节点张量"。为了指代多条边,DGL使用一个包含2个节点张量的元组 :math:`(U, V)` ,其中,用 :math:`(U[i], V[i])` 指代一条 :math:`U[i]` 到 :math:`V[i]` 的边。 创建一个 :class:`~dgl.DGLGraph` 对象的一种方法是使用 :func:`dgl.graph` 函数。它接受一个边的集合作为输入。DGL也支持从其他的数据源来创建图对象。 读者可参考 :ref:`guide_cn-graph-external`。 下面的代码段使用了 :func:`dgl.graph` 函数来构建一个 :class:`~dgl.DGLGraph` 对象,对应着下图所示的包含4个节点的图。 其中一些代码演示了查询图结构的部分API的使用方法。 .. figure:: https://data.dgl.ai/asset/image/user_guide_graphch_1.png :height: 200px :width: 300px :align: center .. code:: >>> import dgl >>> import torch as th >>> # 边 0->1, 0->2, 0->3, 1->3 >>> u, v = th.tensor([0, 0, 0, 1]), th.tensor([1, 2, 3, 3]) >>> g = dgl.graph((u, v)) >>> print(g) # 图中节点的数量是DGL通过给定的图的边列表中最大的点ID推断所得出的 Graph(num_nodes=4, num_edges=4, ndata_schemes={} edata_schemes={}) >>> # 获取节点的ID >>> print(g.nodes()) tensor([0, 1, 2, 3]) >>> # 获取边的对应端点 >>> print(g.edges()) (tensor([0, 0, 0, 1]), tensor([1, 2, 3, 3])) >>> # 获取边的对应端点和边ID >>> print(g.edges(form='all')) (tensor([0, 0, 0, 1]), tensor([1, 2, 3, 3]), tensor([0, 1, 2, 3])) >>> # 如果具有最大ID的节点没有边,在创建图的时候,用户需要明确地指明节点的数量。 >>> g = dgl.graph((u, v), num_nodes=8) 对于无向的图,用户需要为每条边都创建两个方向的边。可以使用 :func:`dgl.to_bidirected` 函数来实现这个目的。 如下面的代码段所示,这个函数可以把原图转换成一个包含反向边的图。 .. code:: >>> bg = dgl.to_bidirected(g) >>> bg.edges() (tensor([0, 0, 0, 1, 1, 2, 3, 3]), tensor([1, 2, 3, 0, 3, 0, 0, 1])) .. note:: 由于Tensor类内部使用C来存储,且显性定义了数据类型以及存储的设备信息,DGL推荐使用Tensor作为DGL API的输入。 不过大部分的DGL API也支持Python的可迭代类型(比如列表)或numpy.ndarray类型作为API的输入,方便用户快速进行开发验证。 DGL支持使用 :math:`32` 位或 :math:`64` 位的整数作为节点ID和边ID。节点和边ID的数据类型必须一致。如果使用 :math:`64` 位整数, DGL可以处理最多 :math:`2^{63} - 1` 个节点或边。不过,如果图里的节点或者边的数量小于 :math:`2^{31} - 1` ,用户最好使用 :math:`32` 位整数。 这样不仅能提升速度,还能减少内存的使用。DGL提供了进行数据类型转换的方法,如下例所示。 .. code:: >>> edges = th.tensor([2, 5, 3]), th.tensor([3, 5, 0]) # 边:2->3, 5->5, 3->0 >>> g64 = dgl.graph(edges) # DGL默认使用int64 >>> print(g64.idtype) torch.int64 >>> g32 = dgl.graph(edges, idtype=th.int32) # 使用int32构建图 >>> g32.idtype torch.int32 >>> g64_2 = g32.long() # 转换成int64 >>> g64_2.idtype torch.int64 >>> g32_2 = g64.int() # 转换成int32 >>> g32_2.idtype torch.int32 相关API::func:`dgl.graph`、 :func:`dgl.DGLGraph.nodes`、 :func:`dgl.DGLGraph.edges`、 :func:`dgl.to_bidirected`、 :func:`dgl.DGLGraph.int`、 :func:`dgl.DGLGraph.long` 和 :py:attr:`dgl.DGLGraph.idtype`。 ================================================ FILE: docs/source/guide_cn/graph-heterogeneous.rst ================================================ .. _guide_cn-graph-heterogeneous: 1.5 异构图 --------- :ref:`(English Version)` 相比同构图,异构图里可以有不同类型的节点和边。这些不同类型的节点和边具有独立的ID空间和特征。 例如在下图中,"用户"和"游戏"节点的ID都是从0开始的,而且两种节点具有不同的特征。 .. figure:: https://data.dgl.ai/asset/image/user_guide_graphch_2.png 一个异构图示例。该图具有两种类型的节点("用户"和"游戏")和两种类型的边("关注"和"玩")。 创建异构图 ^^^^^^^^ 在DGL中,一个异构图由一系列子图构成,一个子图对应一种关系。每个关系由一个字符串三元组 定义 ``(源节点类型, 边类型, 目标节点类型)`` 。由于这里的关系定义消除了边类型的歧义,DGL称它们为规范边类型。 下面的代码是一个在DGL中创建异构图的示例。 .. code:: >>> import dgl >>> import torch as th >>> # 创建一个具有3种节点类型和3种边类型的异构图 >>> graph_data = { ... ('drug', 'interacts', 'drug'): (th.tensor([0, 1]), th.tensor([1, 2])), ... ('drug', 'interacts', 'gene'): (th.tensor([0, 1]), th.tensor([2, 3])), ... ('drug', 'treats', 'disease'): (th.tensor([1]), th.tensor([2])) ... } >>> g = dgl.heterograph(graph_data) >>> g.ntypes ['disease', 'drug', 'gene'] >>> g.etypes ['interacts', 'interacts', 'treats'] >>> g.canonical_etypes [('drug', 'interacts', 'drug'), ('drug', 'interacts', 'gene'), ('drug', 'treats', 'disease')] 注意,同构图和二分图只是一种特殊的异构图,它们只包括一种关系。 .. code:: >>> # 一个同构图 >>> dgl.heterograph({('node_type', 'edge_type', 'node_type'): (u, v)}) >>> # 一个二分图 >>> dgl.heterograph({('source_type', 'edge_type', 'destination_type'): (u, v)}) 与异构图相关联的 *metagraph* 就是图的模式。它指定节点集和节点之间的边的类型约束。 *metagraph* 中的一个节点 :math:`u` 对应于相关异构图中的一个节点类型。 *metagraph* 中的边 :math:`(u,v)` 表示在相关异构图中存在从 :math:`u` 型节点到 :math:`v` 型节点的边。 .. code:: >>> g Graph(num_nodes={'disease': 3, 'drug': 3, 'gene': 4}, num_edges={('drug', 'interacts', 'drug'): 2, ('drug', 'interacts', 'gene'): 2, ('drug', 'treats', 'disease'): 1}, metagraph=[('drug', 'drug', 'interacts'), ('drug', 'gene', 'interacts'), ('drug', 'disease', 'treats')]) >>> g.metagraph().edges() OutMultiEdgeDataView([('drug', 'drug'), ('drug', 'gene'), ('drug', 'disease')]) 相关API: :func:`dgl.heterograph`、 :py:attr:`~dgl.DGLGraph.ntypes`、 :py:attr:`~dgl.DGLGraph.etypes`、 :py:attr:`~dgl.DGLGraph.canonical_etypes`、 :py:attr:`~dgl.DGLGraph.metagraph`。 使用多种类型 ^^^^^^^^^^ 当引入多种节点和边类型后,用户在调用DGLGraph API以获取特定类型的信息时,需要指定具体的节点和边类型。此外,不同类型的节点和边具有单独的ID。 .. code:: >>> # 获取图中所有节点的数量 >>> g.num_nodes() 10 >>> # 获取drug节点的数量 >>> g.num_nodes('drug') 3 >>> # 不同类型的节点有单独的ID。因此,没有指定节点类型就没有明确的返回值。 >>> g.nodes() DGLError: Node type name must be specified if there are more than one node types. >>> g.nodes('drug') tensor([0, 1, 2]) 为了设置/获取特定节点和边类型的特征,DGL提供了两种新类型的语法: `g.nodes['node_type'].data['feat_name']` 和 `g.edges['edge_type'].data['feat_name']` 。 .. code:: >>> # 设置/获取"drug"类型的节点的"hv"特征 >>> g.nodes['drug'].data['hv'] = th.ones(3, 1) >>> g.nodes['drug'].data['hv'] tensor([[1.], [1.], [1.]]) >>> # 设置/获取"treats"类型的边的"he"特征 >>> g.edges['treats'].data['he'] = th.zeros(1, 1) >>> g.edges['treats'].data['he'] tensor([[0.]]) 如果图里只有一种节点或边类型,则不需要指定节点或边的类型。 .. code:: >>> g = dgl.heterograph({ ... ('drug', 'interacts', 'drug'): (th.tensor([0, 1]), th.tensor([1, 2])), ... ('drug', 'is similar', 'drug'): (th.tensor([0, 1]), th.tensor([2, 3])) ... }) >>> g.nodes() tensor([0, 1, 2, 3]) >>> # 设置/获取单一类型的节点或边特征,不必使用新的语法 >>> g.ndata['hv'] = th.ones(4, 1) .. note:: 当边类型唯一地确定了源节点和目标节点的类型时,用户可以只使用一个字符串而不是字符串三元组来指定边类型。例如, 对于具有两个关系 ``('user', 'plays', 'game')`` 和 ``('user', 'likes', 'game')`` 的异构图, 只使用 ``'plays'`` 或 ``'like'`` 来指代这两个关系是可以的。 从磁盘加载异构图 ^^^^^^^^^^^^^ 逗号分隔值(CSV) """""""""""""" 一种存储异构图的常见方法是在不同的CSV文件中存储不同类型的节点和边。下面是一个例子。 .. code:: # 数据文件夹 data/ |-- drug.csv # drug节点 |-- gene.csv # gene节点 |-- disease.csv # disease节点 |-- drug-interact-drug.csv # drug-drug相互作用边 |-- drug-interact-gene.csv # drug-gene相互作用边 |-- drug-treat-disease.csv # drug-disease治疗边 与同构图的情况类似,用户可以使用像Pandas这样的包先将CSV文件解析为numpy数组或框架张量,再构建一个关系字典,并用它构造一个异构图。 这种方法也适用于其他流行的文件格式,比如GML或JSON。 DGL二进制格式 """"""""""" DGL提供了 :func:`dgl.save_graphs` 和 :func:`dgl.load_graphs` 函数,分别用于以二进制格式保存异构图和加载它们。 边类型子图 ^^^^^^^^ 用户可以通过指定要保留的关系来创建异构图的子图,相关的特征也会被拷贝。 .. code:: >>> g = dgl.heterograph({ ... ('drug', 'interacts', 'drug'): (th.tensor([0, 1]), th.tensor([1, 2])), ... ('drug', 'interacts', 'gene'): (th.tensor([0, 1]), th.tensor([2, 3])), ... ('drug', 'treats', 'disease'): (th.tensor([1]), th.tensor([2])) ... }) >>> g.nodes['drug'].data['hv'] = th.ones(3, 1) >>> # 保留关系 ('drug', 'interacts', 'drug') 和 ('drug', 'treats', 'disease') 。 >>> # 'drug' 和 'disease' 类型的节点也会被保留 >>> eg = dgl.edge_type_subgraph(g, [('drug', 'interacts', 'drug'), ... ('drug', 'treats', 'disease')]) >>> eg Graph(num_nodes={'disease': 3, 'drug': 3}, num_edges={('drug', 'interacts', 'drug'): 2, ('drug', 'treats', 'disease'): 1}, metagraph=[('drug', 'drug', 'interacts'), ('drug', 'disease', 'treats')]) >>> # 相关的特征也会被拷贝 >>> eg.nodes['drug'].data['hv'] tensor([[1.], [1.], [1.]]) 将异构图转化为同构图 ^^^^^^^^^^^^^^^^ 异构图为管理不同类型的节点和边及其相关特征提供了一个清晰的接口。这在以下情况下尤其有用: 1. 不同类型的节点和边的特征具有不同的数据类型或大小。 2. 用户希望对不同类型的节点和边应用不同的操作。 如果上述情况不适用,并且用户不希望在建模中区分节点和边的类型,则DGL允许使用 :func:`dgl.DGLGraph.to_homogeneous` API将异构图转换为同构图。 具体行为如下: 1. 用从0开始的连续整数重新标记所有类型的节点和边。 2. 对所有的节点和边合并用户指定的特征。 .. code:: >>> g = dgl.heterograph({ ... ('drug', 'interacts', 'drug'): (th.tensor([0, 1]), th.tensor([1, 2])), ... ('drug', 'treats', 'disease'): (th.tensor([1]), th.tensor([2]))}) >>> g.nodes['drug'].data['hv'] = th.zeros(3, 1) >>> g.nodes['disease'].data['hv'] = th.ones(3, 1) >>> g.edges['interacts'].data['he'] = th.zeros(2, 1) >>> g.edges['treats'].data['he'] = th.zeros(1, 2) >>> # 默认情况下不进行特征合并 >>> hg = dgl.to_homogeneous(g) >>> 'hv' in hg.ndata False >>> # 拷贝边的特征 >>> # 对于要拷贝的特征,DGL假定不同类型的节点或边的需要合并的特征具有相同的大小和数据类型 >>> hg = dgl.to_homogeneous(g, edata=['he']) DGLError: Cannot concatenate column ‘he’ with shape Scheme(shape=(2,), dtype=torch.float32) and shape Scheme(shape=(1,), dtype=torch.float32) >>> # 拷贝节点特征 >>> hg = dgl.to_homogeneous(g, ndata=['hv']) >>> hg.ndata['hv'] tensor([[1.], [1.], [1.], [0.], [0.], [0.]]) 原始的节点或边的类型和对应的ID被存储在 :py:attr:`~dgl.DGLGraph.ndata` 和 :py:attr:`~dgl.DGLGraph.edata` 中。 .. code:: >>> # 异构图中节点类型的顺序 >>> g.ntypes ['disease', 'drug'] >>> # 原始节点类型 >>> hg.ndata[dgl.NTYPE] tensor([0, 0, 0, 1, 1, 1]) >>> # 原始的特定类型节点ID >>> hg.ndata[dgl.NID] tensor([0, 1, 2, 0, 1, 2]) >>> # 异构图中边类型的顺序 >>> g.etypes ['interacts', 'treats'] >>> # 原始边类型 >>> hg.edata[dgl.ETYPE] tensor([0, 0, 1]) >>> # 原始的特定类型边ID >>> hg.edata[dgl.EID] tensor([0, 1, 0]) 出于建模的目的,用户可能需要将一些关系合并,并对它们应用相同的操作。为了实现这一目的,可以先抽取异构图的边类型子图,然后将该子图转换为同构图。 .. code:: >>> g = dgl.heterograph({ ... ('drug', 'interacts', 'drug'): (th.tensor([0, 1]), th.tensor([1, 2])), ... ('drug', 'interacts', 'gene'): (th.tensor([0, 1]), th.tensor([2, 3])), ... ('drug', 'treats', 'disease'): (th.tensor([1]), th.tensor([2])) ... }) >>> sub_g = dgl.edge_type_subgraph(g, [('drug', 'interacts', 'drug'), ... ('drug', 'interacts', 'gene')]) >>> h_sub_g = dgl.to_homogeneous(sub_g) >>> h_sub_g Graph(num_nodes=7, num_edges=4, ...) ================================================ FILE: docs/source/guide_cn/graph.rst ================================================ .. _guide_cn-graph: 第1章:图 ============= :ref:`(English Version)` 图表示实体(节点)和它们的关系(边),其中节点和边可以是有类型的 (例如,``"用户"`` 和 ``"物品"`` 是两种不同类型的节点)。 DGL通过其核心数据结构 :class:`~dgl.DGLGraph` 提供了一个以图为中心的编程抽象。 :class:`~dgl.DGLGraph` 提供了接口以处理图的结构、节点/边 的特征,以及使用这些组件可以执行的计算。 本章路线图 -------------- 本章首先简要介绍了图的定义(见1.1节),然后介绍了一些 :class:`~dgl.DGLGraph` 相关的核心概念: * :ref:`guide_cn-graph-basic` * :ref:`guide_cn-graph-graphs-nodes-edges` * :ref:`guide_cn-graph-feature` * :ref:`guide_cn-graph-external` * :ref:`guide_cn-graph-heterogeneous` * :ref:`guide_cn-graph-gpu` .. toctree:: :maxdepth: 1 :hidden: :glob: graph-basic graph-graphs-nodes-edges graph-feature graph-external graph-heterogeneous graph-gpu ================================================ FILE: docs/source/guide_cn/index.rst ================================================ 用户指南【包含过时信息】 =================== .. toctree:: :maxdepth: 2 :titlesonly: graph message nn data training minibatch distributed 2020年9月,DGL社区的一群热心贡献者把DGL用户指南译成了中文,方便广大中文用户群学习和使用DGL。 特此致谢下述贡献者: .. list-table:: :widths: 20 20 20 :header-rows: 1 * - 章节 - 个人姓名/昵称 - 个人链接 * - :ref:`guide_cn-graph` - 张怀文/Huaiwen Zhang - https://github.com/huaiwen * - :ref:`guide_cn-graph-basic` - 沈成 / mlsoar - https://github.com/mlsoar * - :ref:`guide_cn-graph-graphs-nodes-edges` - 张建 / zhjwy9343 - https://github.com/zhjwy9343 * - :ref:`guide_cn-graph-feature` - 沈成 / mlsoar - https://github.com/mlsoar * - :ref:`guide_cn-graph-external` - 沈成 / mlsoar - https://github.com/mlsoar * - :ref:`guide_cn-graph-heterogeneous` - 张怀文/Huaiwen Zhang - https://github.com/huaiwen * - :ref:`guide_cn-message-passing`, - 黄崟/Brook Huang - https://github.com/brookhuang16211 * - :ref:`guide_cn-message-passing-api` - 黄崟/Brook Huang - https://github.com/brookhuang16211 * - :ref:`guide_cn-message-passing-efficient` - 黄崟/Brook Huang - https://github.com/brookhuang16211 * - :ref:`guide_cn-message-passing-part` - 陈知雨/Zhiyu Chen - https://www.zhiyuchen.com * - :ref:`guide_cn-message-passing-edge` - 陈知雨/Zhiyu Chen - https://www.zhiyuchen.com * - :ref:`guide_cn-message-passing-heterograph` - 陈知雨/Zhiyu Chen - https://www.zhiyuchen.com * - :ref:`guide_cn-nn` - 陈知雨/Zhiyu Chen - https://www.zhiyuchen.com * - :ref:`guide_cn-nn-construction` - 陈知雨/Zhiyu Chen - https://www.zhiyuchen.com * - :ref:`guide_cn-nn-forward` - 栩栩的夏天 - * - :ref:`guide_cn-nn-heterograph` - 栩栩的夏天 - * - :ref:`guide_cn-data-pipeline` - 吴紫薇/ Maggie Wu - https://github.com/hhhiddleston * - :ref:`guide_cn-data-pipeline-dataset` - 吴紫薇/ Maggie Wu - https://github.com/hhhiddleston * - :ref:`guide_cn-data-pipeline-download` - 吴紫薇/ Maggie Wu - https://github.com/hhhiddleston * - :ref:`guide_cn-data-pipeline-process` - 吴紫薇/ Maggie Wu - https://github.com/hhhiddleston * - :ref:`guide_cn-data-pipeline-savenload` - 王建民/DrugAI - https://github.com/AspirinCode * - :ref:`guide_cn-data-pipeline-loadogb` - 王建民/DrugAI - https://github.com/AspirinCode * - :ref:`guide_cn-training` - 王建民/DrugAI - https://github.com/AspirinCode * - :ref:`guide_cn-training-node-classification`, - 王建民/DrugAI - https://github.com/AspirinCode * - :ref:`guide_cn-training-edge-classification` - 徐东辉/DonghuiXu - https://github.com/rewonderful * - :ref:`guide_cn-training-link-prediction` - 徐东辉/DonghuiXu - https://github.com/rewonderful * - :ref:`guide_cn-training-graph-classification` - 莫佳帅子/Molasses - https://github.com/sleeplessai * - :ref:`guide_cn-minibatch` - 莫佳帅子/Molasses - https://github.com/sleeplessai * - :ref:`guide_cn-minibatch-node-classification-sampler` - 孟凡荣/kevin-meng - https://github.com/kevin-meng * - :ref:`guide_cn-minibatch-edge-classification-sampler` - 莫佳帅子/Molasses - https://github.com/sleeplessai * - :ref:`guide_cn-minibatch-link-classification-sampler` - 孟凡荣/kevin-meng - https://github.com/kevin-meng * - :ref:`guide_cn-minibatch-customizing-neighborhood-sampler` - 孟凡荣/kevin-meng - https://github.com/kevin-meng * - :ref:`guide_cn-minibatch-custom-gnn-module` - 胡骏 - https://github.com/CrawlScript * - :ref:`guide_cn-minibatch-inference` - 胡骏 - https://github.com/CrawlScript * - :ref:`guide_cn-distributed` - 宋怡然/Yiran Song - https://github.com/rr-Yiran * - :ref:`guide_cn-distributed-preprocessing` - 宋怡然/Yiran Song - https://github.com/rr-Yiran * - :ref:`guide_cn-distributed-apis` - 李庆标/Qingbiao Li - https://qingbiaoli.github.io/ * - :ref:`guide_cn-distributed-tools` - 李庆标/Qingbiao Li - https://qingbiaoli.github.io/ ================================================ FILE: docs/source/guide_cn/message-api.rst ================================================ .. _guide_cn-message-passing-api: 2.1 内置函数和消息传递API ---------------------- :ref:`(English Version) ` 在DGL中,**消息函数** 接受一个参数 ``edges``,这是一个 :class:`~dgl.udf.EdgeBatch` 的实例, 在消息传递时,它被DGL在内部生成以表示一批边。 ``edges`` 有 ``src``、 ``dst`` 和 ``data`` 共3个成员属性, 分别用于访问源节点、目标节点和边的特征。 **聚合函数** 接受一个参数 ``nodes``,这是一个 :class:`~dgl.udf.NodeBatch` 的实例, 在消息传递时,它被DGL在内部生成以表示一批节点。 ``nodes`` 的成员属性 ``mailbox`` 可以用来访问节点收到的消息。 一些最常见的聚合操作包括 ``sum``、``max``、``min`` 等。 **更新函数** 接受一个如上所述的参数 ``nodes``。此函数对 ``聚合函数`` 的聚合结果进行操作, 通常在消息传递的最后一步将其与节点的特征相结合,并将输出作为节点的新特征。 DGL在命名空间 ``dgl.function`` 中实现了常用的消息函数和聚合函数作为 **内置函数**。 一般来说,DGL建议 **尽可能** 使用内置函数,因为它们经过了大量优化,并且可以自动处理维度广播。 如果用户的消息传递函数无法用内置函数实现,则可以实现自己的消息或聚合函数(也称为 **用户定义函数** )。 内置消息函数可以是一元函数或二元函数。对于一元函数,DGL支持 ``copy`` 函数。对于二元函数, DGL现在支持 ``add``、 ``sub``、 ``mul``、 ``div``、 ``dot`` 函数。消息的内置函数的命名约定是 ``u`` 表示 ``源`` 节点, ``v`` 表示 ``目标`` 节点,``e`` 表示 ``边``。这些函数的参数是字符串,指示相应节点和边的输入和输出特征字段名。 关于内置函数的列表,请参见 :ref:`api-built-in`。例如,要对源节点的 ``hu`` 特征和目标节点的 ``hv`` 特征求和, 然后将结果保存在边的 ``he`` 特征上,用户可以使用内置函数 ``dgl.function.u_add_v('hu', 'hv', 'he')``。 而以下用户定义消息函数与此内置函数等价。 .. code:: def message_func(edges): return {'he': edges.src['hu'] + edges.dst['hv']} DGL支持内置的聚合函数 ``sum``、 ``max``、 ``min`` 和 ``mean`` 操作。 聚合函数通常有两个参数,它们的类型都是字符串。一个用于指定 ``mailbox`` 中的字段名,一个用于指示目标节点特征的字段名, 例如, ``dgl.function.sum('m', 'h')`` 等价于如下所示的对接收到消息求和的用户定义函数: .. code:: import torch def reduce_func(nodes): return {'h': torch.sum(nodes.mailbox['m'], dim=1)} 关于用户定义函数的进阶用法,参见 :ref:`apiudf`。 在DGL中,也可以在不涉及消息传递的情况下,通过 :meth:`~dgl.DGLGraph.apply_edges` 单独调用逐边计算。 :meth:`~dgl.DGLGraph.apply_edges` 的参数是一个消息函数。并且在默认情况下,这个接口将更新所有的边。例如: .. code:: import dgl.function as fn graph.apply_edges(fn.u_add_v('el', 'er', 'e')) 对于消息传递, :meth:`~dgl.DGLGraph.update_all` 是一个高级API。它在单个API调用里合并了消息生成、 消息聚合和节点特征更新,这为从整体上进行系统优化提供了空间。 :meth:`~dgl.DGLGraph.update_all` 的参数是一个消息函数、一个聚合函数和一个更新函数。 更新函数是一个可选择的参数,用户也可以不使用它,而是在 ``update_all`` 执行完后直接对节点特征进行操作。 由于更新函数通常可以用纯张量操作实现,所以DGL不推荐在 ``update_all`` 中指定更新函数。例如: .. code:: def update_all_example(graph): # 在graph.ndata['ft']中存储结果 graph.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft')) # 在update_all外调用更新函数 final_ft = graph.ndata['ft'] * 2 return final_ft 此调用通过将源节点特征 ``ft`` 与边特征 ``a`` 相乘生成消息 ``m``, 然后对所有消息求和来更新节点特征 ``ft``,再将 ``ft`` 乘以2得到最终结果 ``final_ft``。 调用后,中间消息 ``m`` 将被清除。上述函数的数学公式为: .. math:: {final\_ft}_i = 2 * \sum_{j\in\mathcal{N}(i)} ({ft}_j * a_{ij}) ================================================ FILE: docs/source/guide_cn/message-efficient.rst ================================================ .. _guide_cn-message-passing-efficient: 2.2 编写高效的消息传递代码 ---------------------- :ref:`(English Version) ` DGL优化了消息传递的内存消耗和计算速度。利用这些优化的一个常见实践是通过基于内置函数的 :meth:`~dgl.DGLGraph.update_all` 来开发消息传递功能。 除此之外,考虑到某些图边的数量远远大于节点的数量,DGL建议避免不必要的从点到边的内存拷贝。对于某些情况,比如 :class:`~dgl.nn.pytorch.conv.GATConv`,计算必须在边上保存消息, 那么用户就需要调用基于内置函数的 :meth:`~dgl.DGLGraph.apply_edges`。有时边上的消息可能是高维的,这会非常消耗内存。 DGL建议用户尽量减少边的特征维数。 下面是一个如何通过对节点特征降维来减少消息维度的示例。该做法执行以下操作:拼接 ``源`` 节点和 ``目标`` 节点特征, 然后应用一个线性层,即 :math:`W\times (u || v)`。 ``源`` 节点和 ``目标`` 节点特征维数较高,而线性层输出维数较低。 一个直截了当的实现方式如下: .. code:: import torch import torch.nn as nn linear = nn.Parameter(torch.FloatTensor(size=(node_feat_dim * 2, out_dim))) def concat_message_function(edges): return {'cat_feat': torch.cat([edges.src['feat'], edges.dst['feat']], dim=1)} g.apply_edges(concat_message_function) g.edata['out'] = g.edata['cat_feat'] @ linear 建议的实现是将线性操作分成两部分,一个应用于 ``源`` 节点特征,另一个应用于 ``目标`` 节点特征。 在最后一个阶段,在边上将以上两部分线性操作的结果相加,即执行 :math:`W_l\times u + W_r \times v`, 因为 :math:`W \times (u||v) = W_l \times u + W_r \times v`,其中 :math:`W_l` 和 :math:`W_r` 分别是矩阵 :math:`W` 的左半部分和右半部分: .. code:: import dgl.function as fn linear_src = nn.Parameter(torch.FloatTensor(size=(node_feat_dim, out_dim))) linear_dst = nn.Parameter(torch.FloatTensor(size=(node_feat_dim, out_dim))) out_src = g.ndata['feat'] @ linear_src out_dst = g.ndata['feat'] @ linear_dst g.srcdata.update({'out_src': out_src}) g.dstdata.update({'out_dst': out_dst}) g.apply_edges(fn.u_add_v('out_src', 'out_dst', 'out')) 以上两个实现在数学上是等价的。后一种方法效率高得多,因为不需要在边上保存feat_src和feat_dst, 从内存角度来说是高效的。另外,加法可以通过DGL的内置函数 ``u_add_v`` 进行优化,从而进一步加快计算速度并节省内存占用。 ================================================ FILE: docs/source/guide_cn/message-heterograph.rst ================================================ .. _guide_cn-message-passing-heterograph: 2.5 在异构图上进行消息传递 ---------------------- :ref:`(English Version) ` 异构图(参考用户指南 :ref:`1.5 异构图 ` )是包含不同类型的节点和边的图。 不同类型的节点和边常常具有不同类型的属性。这些属性旨在刻画每一种节点和边的特征。在使用图神经网络时,根据其复杂性, 可能需要使用不同维度的表示来对不同类型的节点和边进行建模。 异构图上的消息传递可以分为两个部分: 1. 对每个关系计算和聚合消息。 2. 对每个结点聚合来自不同关系的消息。 在DGL中,对异构图进行消息传递的接口是 :meth:`~dgl.DGLGraph.multi_update_all`。 :meth:`~dgl.DGLGraph.multi_update_all` 接受一个字典。这个字典的每一个键值对里,键是一种关系, 值是这种关系对应 :meth:`~dgl.DGLGraph.update_all` 的参数。 :meth:`~dgl.DGLGraph.multi_update_all` 还接受一个字符串来表示跨类型整合函数,来指定整合不同关系聚合结果的方式。 这个整合方式可以是 ``sum``、 ``min``、 ``max``、 ``mean`` 和 ``stack`` 中的一个。以下是一个例子: .. code:: import dgl.function as fn for c_etype in G.canonical_etypes: srctype, etype, dsttype = c_etype Wh = self.weight[etype](feat_dict[srctype]) # 把它存在图中用来做消息传递 G.nodes[srctype].data['Wh_%s' % etype] = Wh # 指定每个关系的消息传递函数:(message_func, reduce_func). # 注意结果保存在同一个目标特征“h”,说明聚合是逐类进行的。 funcs[etype] = (fn.copy_u('Wh_%s' % etype, 'm'), fn.mean('m', 'h')) # 将每个类型消息聚合的结果相加。 G.multi_update_all(funcs, 'sum') # 返回更新过的节点特征字典 return {ntype : G.nodes[ntype].data['h'] for ntype in G.ntypes} ================================================ FILE: docs/source/guide_cn/message-part.rst ================================================ .. _guide_cn-message-passing-part: 2.3 在图的一部分上进行消息传递 ------------------------- :ref:`(English Version) ` 如果用户只想更新图中的部分节点,可以先通过想要囊括的节点编号创建一个子图, 然后在子图上调用 :meth:`~dgl.DGLGraph.update_all` 方法。例如: .. code:: nid = [0, 2, 3, 6, 7, 9] sg = g.subgraph(nid) sg.update_all(message_func, reduce_func, apply_node_func) 这是小批量训练中的常见用法。更多详细用法请参考用户指南 :ref:`guide_cn-minibatch`。 ================================================ FILE: docs/source/guide_cn/message.rst ================================================ .. _guide_cn-message-passing: 第2章:消息传递范式 =========================== :ref:`(English Version) ` 消息传递是实现GNN的一种通用框架和编程范式。它从聚合与更新的角度归纳总结了多种GNN模型的实现。 消息传递范式 ---------------------- 假设节点 :math:`v` 上的的特征为 :math:`x_v\in\mathbb{R}^{d_1}`,边 :math:`({u}, {v})` 上的特征为 :math:`w_{e}\in\mathbb{R}^{d_2}`。 **消息传递范式** 定义了以下逐节点和边上的计算: .. math:: \text{边上计算: } m_{e}^{(t+1)} = \phi \left( x_v^{(t)}, x_u^{(t)}, w_{e}^{(t)} \right) , ({u}, {v},{e}) \in \mathcal{E}. .. math:: \text{点上计算: } x_v^{(t+1)} = \psi \left(x_v^{(t)}, \rho\left(\left\lbrace m_{e}^{(t+1)} : ({u}, {v},{e}) \in \mathcal{E} \right\rbrace \right) \right). 在上面的等式中, :math:`\phi` 是定义在每条边上的消息函数,它通过将边上特征与其两端节点的特征相结合来生成消息。 **聚合函数** :math:`\rho` 会聚合节点接受到的消息。 **更新函数** :math:`\psi` 会结合聚合后的消息和节点本身的特征来更新节点的特征。 本章路线图 -------------------- 本章首先介绍了DGL的消息传递API。然后讲解了如何高效地在点和边上使用这些API。本章的最后一节解释了如何在异构图上实现消息传递。 * :ref:`guide_cn-message-passing-api` * :ref:`guide_cn-message-passing-efficient` * :ref:`guide_cn-message-passing-part` * :ref:`guide_cn-message-passing-heterograph` .. toctree:: :maxdepth: 1 :hidden: :glob: message-api message-efficient message-part message-heterograph ================================================ FILE: docs/source/guide_cn/minibatch-custom-sampler.rst ================================================ .. _guide_cn-minibatch-customizing-neighborhood-sampler: 6.4 定制用户自己的邻居采样器 ---------------------------------------------- :ref:`(English Version) ` 虽然DGL提供了一些邻居采样器,但有时用户还是希望编写自己的采样器。 本节会说明如何编写用户自己的采样器并将其加入到GNN的训练框架中。 回想一下在 `How Powerful are Graph Neural Networks `__ 的论文中,消息传递的定义是: .. math:: \begin{gathered} \boldsymbol{a}_v^{(l)} = \rho^{(l)} \left( \left\lbrace \boldsymbol{h}_u^{(l-1)} : u \in \mathcal{N} \left( v \right) \right\rbrace \right) \\ \boldsymbol{h}_v^{(l)} = \phi^{(l)} \left( \boldsymbol{h}_v^{(l-1)}, \boldsymbol{a}_v^{(l)} \right) \end{gathered} 其中, :math:`\rho^{(l)}` 和 :math:`\phi^{(l)}` 分别是可自定义的消息函数与聚合函数, :math:`\mathcal{N}(v)` 为有向图 :math:`\mathcal{G}` 上的节点 :math:`v` 的前驱节点(或无向图中的邻居)。 以下图为例,假设红色节点为需要更新的目标节点: .. figure:: https://data.dgl.ai/asset/image/guide_6_4_0.png :alt: Imgur 消息传递需要聚集其邻居(绿色节点)的节点特征,如下图所示: .. figure:: https://data.dgl.ai/asset/image/guide_6_4_1.png :alt: Imgur 理解邻居采样的工作原理 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 在介绍DGL中邻居采样的用法之前,这里先解释一下邻居采样的工作原理。下文继续使用上述的例子。 首先定义一个如上图所示的DGLGraph。 .. code:: python import torch import dgl src = torch.LongTensor( [0, 0, 0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 7, 8, 9, 10, 1, 2, 3, 3, 3, 4, 5, 5, 6, 5, 8, 6, 8, 9, 8, 11, 11, 10, 11]) dst = torch.LongTensor( [1, 2, 3, 3, 3, 4, 5, 5, 6, 5, 8, 6, 8, 9, 8, 11, 11, 10, 11, 0, 0, 0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 7, 8, 9, 10]) g = dgl.graph((src, dst)) 该例子的目标是计算单个节点(节点8)的输出。DGL将需要计算GNN输出的节点称为 *种子节点* 。 找出消息传递的依赖 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 假设要使用2层GNN计算种子节点8(红色点)的输出: .. figure:: https://data.dgl.ai/asset/image/guide_6_4_2.png :alt: Imgur 其消息传递的计算公式如下: .. math:: \begin{gathered} \boldsymbol{a}_8^{(2)} = \rho^{(2)} \left( \left\lbrace \boldsymbol{h}_u^{(1)} : u \in \mathcal{N} \left( 8 \right) \right\rbrace \right) = \rho^{(2)} \left( \left\lbrace \boldsymbol{h}_4^{(1)}, \boldsymbol{h}_5^{(1)}, \boldsymbol{h}_7^{(1)}, \boldsymbol{h}_{11}^{(1)} \right\rbrace \right) \\ \boldsymbol{h}_8^{(2)} = \phi^{(2)} \left( \boldsymbol{h}_8^{(1)}, \boldsymbol{a}_8^{(2)} \right) \end{gathered} 从公式中可以看出,要计算 :math:`\boldsymbol{h}_8^{(2)}`,需要下图中的来自节点4、5、7和11(绿色点)的消息。 .. figure:: https://data.dgl.ai/asset/image/guide_6_4_3.png :alt: Imgur 上图中隐去了和计算不相关的边,仅仅保留了输出节点所需要收集消息的边。DGL称它们为红色节点8在第二个GNN层的 *边界子图*。 DGL实现了多个可用于生成边界的函数。例如, :func:`dgl.in_subgraph()` 是一个生成子图的函数,该子图包括初始图中的所有节点和指定节点的入边。 用户可以将其用作沿所有入边传递消息的边界。 .. code:: python frontier = dgl.in_subgraph(g, [8]) print(frontier.all_edges()) 想了解更多的相关函数,用户可以参考 :ref:`api-subgraph-extraction` 和 :ref:`api-sampling`。 在DGL中,任何具有与初始图相同的节点的图都可以用作边界。这点在之后的 :ref:`guide_cn-minibatch-customizing-neighborhood-sampler-impl` 章节中也会提到。 多层小批量消息传递的二分计算图 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 从上图中可以看到,从 :math:`\boldsymbol{h}_\cdot^{(1)}` 计算 :math:`\boldsymbol{h}_8^{(2)}` 只需要节点4, 5, 7, 8和11(绿色和红色节点)作为输入。 原图上的其他节点是不参与计算的,因此直接在边界子图上执行消息传递有很大开销。 因此,DGL对边界子图做了一个转换,把它的计算依赖关系变成了一个小的二分图。 DGL称这种仅包含必要的输入节点和输出节点的二分图为一个 *块* (block)。 下图显示了以节点8为种子节点时第二个GNN层所需的块。 .. figure:: https://data.dgl.ai/asset/image/guide_6_4_4.png :alt: Imgur 请注意,输出节点也出现在输入节点中。原因是消息传递后的特征组合需要前一层的输出节点表示 (即 :math:`\phi^{(2)}`)。 DGL提供了 :func:`dgl.to_block` 以将任何边界转换为块。其中第一个参数指定边界, 第二个参数指定输出节点。例如,可以使用以下代码将上述边界转换为输出节点为8的块。 .. code:: python output_nodes = torch.LongTensor([8]) block = dgl.to_block(frontier, output_nodes) 要查找给定节点类型的输入节点和输出节点的数量,可以使用 :meth:`dgl.DGLGraph.number_of_src_nodes` 和 :meth:`dgl.DGLGraph.number_of_dst_nodes` 方法。 .. code:: python num_input_nodes, num_output_nodes = block.number_of_src_nodes(), block.number_of_dst_nodes() print(num_input_nodes, num_output_nodes) 可以通过 :attr:`dgl.DGLGraph.srcdata` 和 :attr:`dgl.DGLGraph.srcnodes` 访问该块的输入节点特征, 并且可以通过 :attr:`dgl.DGLGraph.dstdata` 和 :attr:`dgl.DGLGraph.dstnodes` 访问其输出节点特征。 ``srcdata``/``dstdata`` 和 ``srcnodes``/``dstnodes`` 的语法与常规图中的 :attr:`dgl.DGLGraph.ndata` 和 :attr:`dgl.DGLGraph.nodes` 相同。 .. code:: python block.srcdata['h'] = torch.randn(num_input_nodes, 5) block.dstdata['h'] = torch.randn(num_output_nodes, 5) 如果是从图中得到的边界,再由边界转换成块,则可以通过以下方式直接读取块的输入和输出节点的特征。 .. code:: python print(block.srcdata['x']) print(block.dstdata['y']) .. raw:: html
:: 用户可以通过 ``dgl.NID`` 得到块中输入节点和输出节点的初始节点ID,可以通过 ``dgl.EID`` 得到边ID到输入边界中边的初始ID的映射。 .. raw:: html
**输出节点** DGL确保块的输出节点将始终出现在输入节点中。如下代码所演示的,在输入节点中,输出节点的ID位于其它节点之前。 .. code:: python input_nodes = block.srcdata[dgl.NID] output_nodes = block.dstdata[dgl.NID] assert torch.equal(input_nodes[:len(output_nodes)], output_nodes) 因此,在用多层图神经网络时,中间某一层对应的边界需要包含该层及所有后续层计算涉及边的目标节点。例如,考虑以下边界 .. figure:: https://data.dgl.ai/asset/image/guide_6_4_5.png :alt: Imgur 其中红色和绿色节点(即节点4、5、7、8和11)都是后续图神经网络层计算中某条边的目标节点。 以下代码由于输出节点未覆盖所有这些节点,将会报错。 .. code:: python dgl.to_block(frontier2, torch.LongTensor([4, 5])) # ERROR 但是,输出节点可以比以上节点包含更多节点。下例的输出节点包含了没有入边的孤立节点。 输入节点和输出节点将同时包含这些孤立节点。 .. code:: python # 节点3是一个孤立节点,没有任何指向它的边. block3 = dgl.to_block(frontier2, torch.LongTensor([4, 5, 7, 8, 11, 3])) print(block3.srcdata[dgl.NID]) print(block3.dstdata[dgl.NID]) 异构图上的采样 ^^^^^^^^^^^^^^^^^^^^ 块也可用于异构图。假设有如下的边界: .. code:: python hetero_frontier = dgl.heterograph({ ('user', 'follow', 'user'): ([1, 3, 7], [3, 6, 8]), ('user', 'play', 'game'): ([5, 5, 4], [6, 6, 2]), ('game', 'played-by', 'user'): ([2], [6]) }, num_nodes_dict={'user': 10, 'game': 10}) 可以创建一个如下的块,块的输出节点为 ``User`` 节点3、6、8和 ``Game`` 节点2、6。 .. code:: python hetero_block = dgl.to_block(hetero_frontier, {'user': [3, 6, 8], 'block': [2, 6]}) 对于这个块,用户可以按节点类型来获取输入节点和输出节点: .. code:: python # 输入的User和Game节点 print(hetero_block.srcnodes['user'].data[dgl.NID], hetero_block.srcnodes['game'].data[dgl.NID]) # 输出的User和Game节点 print(hetero_block.dstnodes['user'].data[dgl.NID], hetero_block.dstnodes['game'].data[dgl.NID]) .. _guide_cn-minibatch-customizing-neighborhood-sampler-impl: 实现一个自定义邻居采样器 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 前面章节里给出了以下用在节点分类任务的邻居采样器。 .. code:: python sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2) 想实现自定义的邻居采样策略,用户可以将采样器对象替换为自定义的采样器对象。 为此,先来看一下 :class:`~dgl.dataloading.neighbor.MultiLayerFullNeighborSampler` 的父类 :class:`~dgl.dataloading.dataloader.BlockSampler`。 :class:`~dgl.dataloading.dataloader.BlockSampler` 负责使用 :meth:`~dgl.dataloading.dataloader.BlockSampler.sample_blocks` 方法从最后一层开始生成一个块的列表。 ``sample_blocks`` 的默认实现是向后迭代,生成边界,并将其转换为块。 因此,对于邻居采样,**用户仅需要实现**\ :meth:`~dgl.dataloading.dataloader.BlockSampler.sample_frontier`\ **方法**。 给定GNN层、初始图和要计算表示的节点,该方法负责为它们生成边界。 同时,用户还必须将GNN的层数传递给父类。 例如, :class:`~dgl.dataloading.neighbor.MultiLayerFullNeighborSampler` 的实现如下。 .. code:: python class MultiLayerFullNeighborSampler(dgl.dataloading.BlockSampler): def __init__(self, n_layers): super().__init__(n_layers) def sample_frontier(self, block_id, g, seed_nodes): frontier = dgl.in_subgraph(g, seed_nodes) return frontier :class:`dgl.dataloading.neighbor.MultiLayerNeighborSampler` 是一个更复杂的邻居采样器类,它允许用户为每个节点采样部分邻居节点以汇聚信息,如下所示。 .. code:: python class MultiLayerNeighborSampler(dgl.dataloading.BlockSampler): def __init__(self, fanouts): super().__init__(len(fanouts)) self.fanouts = fanouts def sample_frontier(self, block_id, g, seed_nodes): fanout = self.fanouts[block_id] if fanout is None: frontier = dgl.in_subgraph(g, seed_nodes) else: frontier = dgl.sampling.sample_neighbors(g, seed_nodes, fanout) return frontier 虽然上面的函数可以生成边界,但是任何拥有与初始图相同节点的图都可用作边界。 例如,如果要以某种概率将种子节点的入边随机剔除,则可以按照以下方式简单地定义采样器: .. code:: python class MultiLayerDropoutSampler(dgl.dataloading.BlockSampler): def __init__(self, p, num_layers): super().__init__(num_layers) self.p = p def sample_frontier(self, block_id, g, seed_nodes, *args, **kwargs): # 获取种 `seed_nodes` 的所有入边 src, dst = dgl.in_subgraph(g, seed_nodes).all_edges() # 以概率p随机选择边 mask = torch.zeros_like(src).bernoulli_(self.p) src = src[mask] dst = dst[mask] # 返回一个与初始图有相同节点的边界 frontier = dgl.graph((src, dst), num_nodes=g.num_nodes()) return frontier def __len__(self): return self.num_layers 在实现自定义采样器后,用户可以创建一个数据加载器。这个数据加载器使用用户自定义的采样器, 并且遍历种子节点生成一系列的块。 .. code:: python sampler = MultiLayerDropoutSampler(0.5, 2) dataloader = dgl.dataloading.NodeDataLoader( g, train_nids, sampler, batch_size=1024, shuffle=True, drop_last=False, num_workers=4) model = StochasticTwoLayerRGCN(in_features, hidden_features, out_features) model = model.cuda() opt = torch.optim.Adam(model.parameters()) for input_nodes, blocks in dataloader: blocks = [b.to(torch.device('cuda')) for b in blocks] input_features = blocks[0].srcdata # 返回一个字典 output_labels = blocks[-1].dstdata # 返回一个字典 output_predictions = model(blocks, input_features) loss = compute_loss(output_labels, output_predictions) opt.zero_grad() loss.backward() opt.step() 异构图上自定义采样器 ^^^^^^^^^^^^^^^^^^^^ 为异构图生成边界与为同构图生成边界没有什么不同。只要使返回的图具有与初始图相同的节点, 就可以正常工作。例如,可以重写上面的 ``MultiLayerDropoutSampler`` 以遍历所有的边类型, 以便它也可以在异构图上使用。 .. code:: python class MultiLayerDropoutSampler(dgl.dataloading.BlockSampler): def __init__(self, p, num_layers): super().__init__(num_layers) self.p = p def sample_frontier(self, block_id, g, seed_nodes, *args, **kwargs): # 获取 `seed_nodes` 的所有入边 sg = dgl.in_subgraph(g, seed_nodes) new_edges_masks = {} # 遍历所有边的类型 for etype in sg.canonical_etypes: edge_mask = torch.zeros(sg.num_edges(etype)) edge_mask.bernoulli_(self.p) new_edges_masks[etype] = edge_mask.bool() # 返回一个与初始图有相同节点的图作为边界 frontier = dgl.edge_subgraph(new_edges_masks, relabel_nodes=False) return frontier def __len__(self): return self.num_layers ================================================ FILE: docs/source/guide_cn/minibatch-edge.rst ================================================ .. _guide_cn-minibatch-edge-classification-sampler: 6.2 针对边分类任务的邻居采样训练方法 ---------------------------------------------------------------------- :ref:`(English Version) ` 边分类/回归的训练与节点分类/回归的训练类似,但还是有一些明显的区别。 定义邻居采样器和数据加载器 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 用户可以使用 :ref:`和节点分类一样的邻居采样器 `。 .. code:: python sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2) 想要用DGL提供的邻居采样器做边分类,需要将其与 :class:`~dgl.dataloading.pytorch.EdgeDataLoader` 结合使用。 :class:`~dgl.dataloading.pytorch.EdgeDataLoader` 以小批次的形式对一组边进行迭代, 从而产生包含边小批次的子图以及供下文中模块使用的 ``块``。 例如,以下代码创建了一个PyTorch数据加载器,该PyTorch数据加载器以批的形式迭代训练边ID的数组 ``train_eids``,并将生成的块列表放到GPU上。 .. code:: python dataloader = dgl.dataloading.EdgeDataLoader( g, train_eid_dict, sampler, batch_size=1024, shuffle=True, drop_last=False, num_workers=4) 有关DGL的内置采样器的完整列表,用户可以参考 :ref:`neighborhood sampler API reference `。 如果用户希望开发自己的邻居采样器,或者想要对块的概念有更详细的了解,请参考 :ref:`guide_cn-minibatch-customizing-neighborhood-sampler`。 小批次邻居采样训练时删边 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 用户在训练边分类模型时,有时希望从计算依赖中删除出现在训练数据中的边,就好像这些边根本不存在一样。 否则,模型将 "知道" 两个节点之间存在边的联系,并有可能利用这点 "作弊" 。 因此,在基于邻居采样的边分类中,用户有时会希望从采样得到的小批次图中删去部分边及其对应的反向边。 用户可以在实例化 :class:`~dgl.dataloading.pytorch.EdgeDataLoader` 时设置 ``exclude='reverse_id'``,同时将边ID映射到其反向边ID。 通常这样做会导致采样过程变慢很多,这是因为DGL要定位并删除包含在小批次中的反向边。 .. code:: python n_edges = g.num_edges() dataloader = dgl.dataloading.EdgeDataLoader( g, train_eid_dict, sampler, # 下面的两个参数专门用于在邻居采样时删除小批次的一些边和它们的反向边 exclude='reverse_id', reverse_eids=torch.cat([ torch.arange(n_edges // 2, n_edges), torch.arange(0, n_edges // 2)]), batch_size=1024, shuffle=True, drop_last=False, num_workers=4) 调整模型以适用小批次训练 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 边分类模型通常由两部分组成: - 获取边两端节点的表示。 - 用边两端节点表示为每个类别打分。 第一部分与 :ref:`随机批次训练节点分类 ` 完全相同,用户可以简单地复用它。输入仍然是DGL的数据加载器生成的块列表和输入特征。 .. code:: python class StochasticTwoLayerGCN(nn.Module): def __init__(self, in_features, hidden_features, out_features): super().__init__() self.conv1 = dglnn.GraphConv(in_features, hidden_features) self.conv2 = dglnn.GraphConv(hidden_features, out_features) def forward(self, blocks, x): x = F.relu(self.conv1(blocks[0], x)) x = F.relu(self.conv2(blocks[1], x)) return x 第二部分的输入通常是前一部分的输出,以及由小批次边导出的原始图的子图。 子图是从相同的数据加载器产生的。用户可以调用 :meth:`dgl.DGLGraph.apply_edges` 计算边子图中边的得分。 以下代码片段实现了通过合并边两端节点的特征并将其映射到全连接层来预测边的得分。 .. code:: python class ScorePredictor(nn.Module): def __init__(self, num_classes, in_features): super().__init__() self.W = nn.Linear(2 * in_features, num_classes) def apply_edges(self, edges): data = torch.cat([edges.src['x'], edges.dst['x']], 1) return {'score': self.W(data)} def forward(self, edge_subgraph, x): with edge_subgraph.local_scope(): edge_subgraph.ndata['x'] = x edge_subgraph.apply_edges(self.apply_edges) return edge_subgraph.edata['score'] 模型接受数据加载器生成的块列表、边子图以及输入节点特征进行前向传播,如下所示: .. code:: python class Model(nn.Module): def __init__(self, in_features, hidden_features, out_features, num_classes): super().__init__() self.gcn = StochasticTwoLayerGCN( in_features, hidden_features, out_features) self.predictor = ScorePredictor(num_classes, out_features) def forward(self, edge_subgraph, blocks, x): x = self.gcn(blocks, x) return self.predictor(edge_subgraph, x) DGL保证边子图中的节点与生成的块列表中最后一个块的输出节点相同。 模型的训练 ~~~~~~~~~~~~~ 模型的训练与节点分类的随机批次训练的情况非常相似。用户可以遍历数据加载器以获得由小批次边组成的子图, 以及计算其两端节点表示所需的块列表。 .. code:: python model = Model(in_features, hidden_features, out_features, num_classes) model = model.cuda() opt = torch.optim.Adam(model.parameters()) for input_nodes, edge_subgraph, blocks in dataloader: blocks = [b.to(torch.device('cuda')) for b in blocks] edge_subgraph = edge_subgraph.to(torch.device('cuda')) input_features = blocks[0].srcdata['features'] edge_labels = edge_subgraph.edata['labels'] edge_predictions = model(edge_subgraph, blocks, input_features) loss = compute_loss(edge_labels, edge_predictions) opt.zero_grad() loss.backward() opt.step() 异构图上的模型训练 ~~~~~~~~~~~~~~~~~~~~~~~~ 在异构图上,计算节点表示的模型也可以用于计算边分类/回归所需的两端节点的表示。 .. code:: python class StochasticTwoLayerRGCN(nn.Module): def __init__(self, in_feat, hidden_feat, out_feat, rel_names): super().__init__() self.conv1 = dglnn.HeteroGraphConv({ rel : dglnn.GraphConv(in_feat, hidden_feat, norm='right') for rel in rel_names }) self.conv2 = dglnn.HeteroGraphConv({ rel : dglnn.GraphConv(hidden_feat, out_feat, norm='right') for rel in rel_names }) def forward(self, blocks, x): x = self.conv1(blocks[0], x) x = self.conv2(blocks[1], x) return x 在同构图和异构图上做评分预测时,代码实现的唯一不同在于调用 :meth:`~dgl.DGLGraph.apply_edges` 时需要在特定类型的边上进行迭代。 .. code:: python class ScorePredictor(nn.Module): def __init__(self, num_classes, in_features): super().__init__() self.W = nn.Linear(2 * in_features, num_classes) def apply_edges(self, edges): data = torch.cat([edges.src['x'], edges.dst['x']], 1) return {'score': self.W(data)} def forward(self, edge_subgraph, x): with edge_subgraph.local_scope(): edge_subgraph.ndata['x'] = x for etype in edge_subgraph.canonical_etypes: edge_subgraph.apply_edges(self.apply_edges, etype=etype) return edge_subgraph.edata['score'] class Model(nn.Module): def __init__(self, in_features, hidden_features, out_features, num_classes, etypes): super().__init__() self.rgcn = StochasticTwoLayerRGCN( in_features, hidden_features, out_features, etypes) self.pred = ScorePredictor(num_classes, out_features) def forward(self, edge_subgraph, blocks, x): x = self.rgcn(blocks, x) return self.pred(edge_subgraph, x) 数据加载器的定义也与节点分类的非常相似。唯一的区别是用户需要使用 :class:`~dgl.dataloading.pytorch.EdgeDataLoader` 而不是 :class:`~dgl.dataloading.pytorch.NodeDataLoader`, 并且提供边类型和边ID张量的字典,而不是节点类型和节点ID张量的字典。 .. code:: python sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2) dataloader = dgl.dataloading.EdgeDataLoader( g, train_eid_dict, sampler, batch_size=1024, shuffle=True, drop_last=False, num_workers=4) 如果用户希望删除异构图中的反向边,情况会有所不同。在异构图上, 反向边通常具有与正向边本身不同的边类型,以便区分 ``向前`` 和 ``向后`` 关系。 例如,``关注`` 和 ``被关注`` 是一对相反的关系, ``购买`` 和 ``被买下`` 也是一对相反的关系。 如果一个类型中的每个边都有一个与之对应的ID相同、属于另一类型的反向边, 则用户可以指定边类型及其反向边类型之间的映射。删除小批次中的边及其反向边的方法如下。 .. code:: python dataloader = dgl.dataloading.EdgeDataLoader( g, train_eid_dict, sampler, # 下面的两个参数专门用于在邻居采样时删除小批次的一些边和它们的反向边 exclude='reverse_types', reverse_etypes={'follow': 'followed by', 'followed by': 'follow', 'purchase': 'purchased by', 'purchased by': 'purchase'} batch_size=1024, shuffle=True, drop_last=False, num_workers=4) 除了 ``compute_loss`` 的代码实现有所不同,异构图的训练循环与同构图中的训练循环几乎相同, 计算损失函数接受节点类型和预测的两个字典。 .. code:: python model = Model(in_features, hidden_features, out_features, num_classes, etypes) model = model.cuda() opt = torch.optim.Adam(model.parameters()) for input_nodes, edge_subgraph, blocks in dataloader: blocks = [b.to(torch.device('cuda')) for b in blocks] edge_subgraph = edge_subgraph.to(torch.device('cuda')) input_features = blocks[0].srcdata['features'] edge_labels = edge_subgraph.edata['labels'] edge_predictions = model(edge_subgraph, blocks, input_features) loss = compute_loss(edge_labels, edge_predictions) opt.zero_grad() loss.backward() opt.step() `GCMC `__ 是一个在二分图上做边分类的代码示例。 ================================================ FILE: docs/source/guide_cn/minibatch-inference.rst ================================================ .. _guide_cn-minibatch-inference: 6.6 超大图上的精准离线推断 ------------------------------------------------------ :ref:`(English Version) ` 子图采样和邻居采样都是为了减少用GPU训练GNN模型的内存和时间消耗。在进行推断时, 通常更好的方法是将所有邻居进行真正的聚合,以避免采样所带来的随机性。 然而,在GPU上进行全图前向传播通常由于显存大小的限制而不可行,而在CPU上进行则计算速度很慢。 本节介绍了在GPU显存有限的情况下通过小批次处理和邻居采样实现全图前向传播的方法。 推断算法不同于训练算法,因为需要从第一层开始对节点表示逐层计算。具体来说,对于一个指定的层, 需要以小批次的方式计算这个GNN层所有节点的输出表示。其结果是,推断算法将包含一个外循环以迭代执行各层, 和一个内循环以迭代处理各个节点小批次。相比之下,训练算法有一个外循环以迭代处理各个节点小批次, 和一个内循环以迭代执行各层(包含邻居采样和消息传递)。 下面的动画展示了计算的过程(注意,每层只展示前3个小批次): .. figure:: https://data.dgl.ai/asset/image/guide_6_6_0.gif :alt: Imgur 实现离线推断 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 这里以6.1节中 :ref:`guide_cn-minibatch-node-classification-model` 提到的两层GCN为例。实现离线推断的方法依然需要使用 ``MultiLayerFullNeighborSampler``, 但它每次只为一层进行采样。注意,这里的离线推断被实现为GNN模块的一个方法, 这是因为它对一层的计算依赖于消息的聚合和结合。 .. code:: python class StochasticTwoLayerGCN(nn.Module): def __init__(self, in_features, hidden_features, out_features): super().__init__() self.hidden_features = hidden_features self.out_features = out_features self.conv1 = dgl.nn.GraphConv(in_features, hidden_features) self.conv2 = dgl.nn.GraphConv(hidden_features, out_features) self.n_layers = 2 def forward(self, blocks, x): x_dst = x[:blocks[0].number_of_dst_nodes()] x = F.relu(self.conv1(blocks[0], (x, x_dst))) x_dst = x[:blocks[1].number_of_dst_nodes()] x = F.relu(self.conv2(blocks[1], (x, x_dst))) return x def inference(self, g, x, batch_size, device): """ 用该模块进行离线推断 """ # 逐层计算表示 for l, layer in enumerate([self.conv1, self.conv2]): y = torch.zeros(g.num_nodes(), self.hidden_features if l != self.n_layers - 1 else self.out_features) sampler = dgl.dataloading.MultiLayerFullNeighborSampler(1) dataloader = dgl.dataloading.NodeDataLoader( g, torch.arange(g.num_nodes()), sampler, batch_size=batch_size, shuffle=True, drop_last=False) # 在一层中,依批次对节点进行迭代 for input_nodes, output_nodes, blocks in dataloader: block = blocks[0] # 将必要输入节点的特征复制到GPU上 h = x[input_nodes].to(device) # 计算输出,注意计算方法是一样的,但只对一层进行计算 h_dst = h[:block.number_of_dst_nodes()] h = F.relu(layer(block, (h, h_dst))) # 将输出复制回CPU y[output_nodes] = h.cpu() x = y return y 注意,如果以模型选择为目的在验证集上计算评价指标,则通常不需要进行计算精确的离线推断。 原因是这需要为每一层上的每个节点计算表示,会非常消耗资源,尤其是在包含大量未标记数据的半监督系统中。 邻居采样在这个时候可以更好地发挥作用。 对于离线推断的示例,用户可以参照 `GraphSAGE `__ 和 `RGCN `__。 ================================================ FILE: docs/source/guide_cn/minibatch-link.rst ================================================ .. _guide_cn-minibatch-link-classification-sampler: 6.3 针对链接预测任务的邻居采样训练方法 -------------------------------------------------------------------- :ref:`(English Version) ` 结合负采样来定义邻居采样器和数据加载器 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 用户仍然可以使用与节点/边分类中相同的邻居采样器。 .. code:: python sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2) DGL中的 :class:`~dgl.dataloading.pytorch.EdgeDataLoader` 还支持生成用于链接预测的负样本。为此,用户需要定义负采样函数。例如, :class:`~dgl.dataloading.negative_sampler.Uniform` 函数是基于均匀分布的采样函数,它对于每个边的源节点,采样 ``k`` 个负样本的目标节点。 以下数据加载器将为每个边的源节点均匀采样5个负样本的目标节点。 .. code:: python dataloader = dgl.dataloading.EdgeDataLoader( g, train_seeds, sampler, negative_sampler=dgl.dataloading.negative_sampler.Uniform(5), batch_size=args.batch_size, shuffle=True, drop_last=False, pin_memory=True, num_workers=args.num_workers) 关于内置的负采样方法,用户可以参考 :ref:`api-dataloading-negative-sampling`。 用户还可以自定义负采样函数,它应当以原图 ``g`` 和小批量的边ID数组 ``eid`` 作为入参, 并返回源节点ID数组和目标节点ID数组。 下面给出了一个自定义的负采样方法的示例,该采样方法根据与节点的度的幂成正比的概率分布对负样本目标节点进行采样。 .. code:: python class NegativeSampler(object): def __init__(self, g, k): # 缓存概率分布 self.weights = g.in_degrees().float() ** 0.75 self.k = k def __call__(self, g, eids): src, _ = g.find_edges(eids) src = src.repeat_interleave(self.k) dst = self.weights.multinomial(len(src), replacement=True) return src, dst dataloader = dgl.dataloading.EdgeDataLoader( g, train_seeds, sampler, negative_sampler=NegativeSampler(g, 5), batch_size=args.batch_size, shuffle=True, drop_last=False, pin_memory=True, num_workers=args.num_workers) 调整模型以进行小批次训练 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 如 :ref:`guide_cn-training-link-prediction` 中所介绍的, 用户可以通过比较边(正样本)与不存在的边(负样本)的得分来训练链路模型。用户可以重用在边分类/回归中的节点表示模型, 来计算边的分数。 .. code:: python class StochasticTwoLayerGCN(nn.Module): def __init__(self, in_features, hidden_features, out_features): super().__init__() self.conv1 = dgl.nn.GraphConv(in_features, hidden_features) self.conv2 = dgl.nn.GraphConv(hidden_features, out_features) def forward(self, blocks, x): x = F.relu(self.conv1(blocks[0], x)) x = F.relu(self.conv2(blocks[1], x)) return x 对于得分的预测,只需要预测每个边的标量分数而不是类别的概率分布, 因此本示例说明了如何使用边的两个端点的向量的点积来计算分数。 .. code:: python class ScorePredictor(nn.Module): def forward(self, edge_subgraph, x): with edge_subgraph.local_scope(): edge_subgraph.ndata['x'] = x edge_subgraph.apply_edges(dgl.function.u_dot_v('x', 'x', 'score')) return edge_subgraph.edata['score'] 使用负采样方法后,DGL的数据加载器将为每个小批次生成三项: - 一个正样本图,其中包含采样得到的小批次内所有的边。 - 一个负样本图,其中包含由负采样方法生成的所有不存在的边。 - 邻居采样方法生成的块的列表。 因此,可以如下定义链接预测模型,该模型的输入包括上述三项以及输入的特征。 .. code:: python class Model(nn.Module): def __init__(self, in_features, hidden_features, out_features): super().__init__() self.gcn = StochasticTwoLayerGCN( in_features, hidden_features, out_features) def forward(self, positive_graph, negative_graph, blocks, x): x = self.gcn(blocks, x) pos_score = self.predictor(positive_graph, x) neg_score = self.predictor(negative_graph, x) return pos_score, neg_score 模型的训练 ~~~~~~~~~~~~~ 训练循环通过数据加载器去遍历数据,将得到的图和输入特征传入上述模型。 .. code:: python model = Model(in_features, hidden_features, out_features) model = model.cuda() opt = torch.optim.Adam(model.parameters()) for input_nodes, positive_graph, negative_graph, blocks in dataloader: blocks = [b.to(torch.device('cuda')) for b in blocks] positive_graph = positive_graph.to(torch.device('cuda')) negative_graph = negative_graph.to(torch.device('cuda')) input_features = blocks[0].srcdata['features'] pos_score, neg_score = model(positive_graph, negative_graph, blocks, input_features) loss = compute_loss(pos_score, neg_score) opt.zero_grad() loss.backward() opt.step() DGL提供了在同构图上做链路预测的一个示例: `无监督学习GraphSAGE `__。 异构图上的随机批次训练 ~~~~~~~~~~~~~~~~~~~~~~~~ 计算异构图上的节点表示的模型也可以用于计算边分类/回归中的边两端节点的表示。 .. code:: python class StochasticTwoLayerRGCN(nn.Module): def __init__(self, in_feat, hidden_feat, out_feat, rel_names): super().__init__() self.conv1 = dglnn.HeteroGraphConv({ rel : dglnn.GraphConv(in_feat, hidden_feat, norm='right') for rel in rel_names }) self.conv2 = dglnn.HeteroGraphConv({ rel : dglnn.GraphConv(hidden_feat, out_feat, norm='right') for rel in rel_names }) def forward(self, blocks, x): x = self.conv1(blocks[0], x) x = self.conv2(blocks[1], x) return x 对于得分的预测,同构图和异构图之间唯一的实现差异是后者需要用 :meth:`dgl.DGLGraph.apply_edges` 来遍历所有的边类型。 .. code:: python class ScorePredictor(nn.Module): def forward(self, edge_subgraph, x): with edge_subgraph.local_scope(): edge_subgraph.ndata['x'] = x for etype in edge_subgraph.canonical_etypes: edge_subgraph.apply_edges( dgl.function.u_dot_v('x', 'x', 'score'), etype=etype) return edge_subgraph.edata['score'] class Model(nn.Module): def __init__(self, in_features, hidden_features, out_features, num_classes, etypes): super().__init__() self.rgcn = StochasticTwoLayerRGCN( in_features, hidden_features, out_features, etypes) self.pred = ScorePredictor() def forward(self, positive_graph, negative_graph, blocks, x): x = self.rgcn(blocks, x) pos_score = self.pred(positive_graph, x) neg_score = self.pred(negative_graph, x) return pos_score, neg_score 数据加载器的定义也与边分类/回归里的定义非常相似。唯一的区别是用户需要提供负采样方法, 并且提供边类型和边ID张量的字典,而不是节点类型和节点ID张量的字典。 .. code:: python sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2) dataloader = dgl.dataloading.EdgeDataLoader( g, train_eid_dict, sampler, negative_sampler=dgl.dataloading.negative_sampler.Uniform(5), batch_size=1024, shuffle=True, drop_last=False, num_workers=4) 如果用户想自定义负采样函数,那么该函数应以初始图以及由边类型和边ID张量构成的字典作为输入。 它返回以边类型为键、源节点-目标节点数组对为值的字典。示例如下所示: .. code:: python class NegativeSampler(object): def __init__(self, g, k): # 缓存概率分布 self.weights = { etype: g.in_degrees(etype=etype).float() ** 0.75 for _, etype, _ in g.canonical_etypes } self.k = k def __call__(self, g, eids_dict): result_dict = {} for etype, eids in eids_dict.items(): src, _ = g.find_edges(eids, etype=etype) src = src.repeat_interleave(self.k) dst = self.weights[etype].multinomial(len(src), replacement=True) result_dict[etype] = (src, dst) return result_dict 随后,需要向数据载入器提供边类型和对应边ID的字典,以及负采样器。示例如下所示: .. code:: python train_eid_dict = { g.edges(etype=etype, form='eid') for etype in g.etypes} dataloader = dgl.dataloading.EdgeDataLoader( g, train_eid_dict, sampler, negative_sampler=NegativeSampler(g, 5), batch_size=1024, shuffle=True, drop_last=False, num_workers=4) 异构图上的随机批次模型训练与同构图中的训练几乎相同,不同之处在于, ``compute_loss`` 是以边类型字典和预测结果字典作为输入。 .. code:: python model = Model(in_features, hidden_features, out_features, num_classes, etypes) model = model.cuda() opt = torch.optim.Adam(model.parameters()) for input_nodes, positive_graph, negative_graph, blocks in dataloader: blocks = [b.to(torch.device('cuda')) for b in blocks] positive_graph = positive_graph.to(torch.device('cuda')) negative_graph = negative_graph.to(torch.device('cuda')) input_features = blocks[0].srcdata['features'] pos_score, neg_score = model(positive_graph, negative_graph, blocks, input_features) loss = compute_loss(pos_score, neg_score) opt.zero_grad() loss.backward() opt.step() ================================================ FILE: docs/source/guide_cn/minibatch-nn.rst ================================================ .. _guide_cn-minibatch-custom-gnn-module: 6.5 为小批次训练实现定制化的GNN模块 ------------------------------------------------------------- :ref:`(English Version) ` 如果用户熟悉如何定制用于更新整个同构图或异构图的GNN模块(参见 :ref:`guide_cn-nn`),那么在块上计算的代码也是类似的,区别只在于节点被划分为输入节点和输出节点。 以下面的自定义图卷积模块代码为例。注意,该代码并不一定是最高效的实现, 此处只是将其作为自定义GNN模块的一个示例。 .. code:: python class CustomGraphConv(nn.Module): def __init__(self, in_feats, out_feats): super().__init__() self.W = nn.Linear(in_feats * 2, out_feats) def forward(self, g, h): with g.local_scope(): g.ndata['h'] = h g.update_all(fn.copy_u('h', 'm'), fn.mean('m', 'h_neigh')) return self.W(torch.cat([g.ndata['h'], g.ndata['h_neigh']], 1)) 如果用户已有一个用于整个图的自定义消息传递模块,并且想将其用于块,则只需要按照如下的方法重写forward函数。 注意,以下代码在注释里保留了整图实现的语句,用户可以将用于块的语句和原先用于整图的语句进行比较。 .. code:: python class CustomGraphConv(nn.Module): def __init__(self, in_feats, out_feats): super().__init__() self.W = nn.Linear(in_feats * 2, out_feats) # h现在是输入和输出节点的特征张量对,而不是一个单独的特征张量 # def forward(self, g, h): def forward(self, block, h): # with g.local_scope(): with block.local_scope(): # g.ndata['h'] = h h_src = h h_dst = h[:block.number_of_dst_nodes()] block.srcdata['h'] = h_src block.dstdata['h'] = h_dst # g.update_all(fn.copy_u('h', 'm'), fn.mean('m', 'h_neigh')) block.update_all(fn.copy_u('h', 'm'), fn.mean('m', 'h_neigh')) # return self.W(torch.cat([g.ndata['h'], g.ndata['h_neigh']], 1)) return self.W(torch.cat( [block.dstdata['h'], block.dstdata['h_neigh']], 1)) 通常,需要对用于整图的GNN模块进行如下调整以将其用于块作为输入的情况: - 切片取输入特征的前几行,得到输出节点的特征。切片行数可以通过 :meth:`block.number_of_dst_nodes ` 获得。 - 如果原图只包含一种节点类型,对输入节点特征,将 :attr:`g.ndata ` 替换为 :attr:`block.srcdata `;对于输出节点特征,将 :attr:`g.ndata ` 替换为 :attr:`block.dstdata `。 - 如果原图包含多种节点类型,对于输入节点特征,将 :attr:`g.nodes ` 替换为 :attr:`block.srcnodes `;对于输出节点特征,将 :attr:`g.nodes ` 替换为 :attr:`block.dstnodes `。 - 对于输入节点数量,将 :meth:`g.num_nodes ` 替换为 :meth:`block.number_of_src_nodes ` ; 对于输出节点数量,将 :meth:`g.num_nodes ` 替换为 :meth:`block.number_of_dst_nodes ` 。 异构图上的模型定制 ~~~~~~~~~~~~~~~~~~~~ 为异构图修改GNN模块的方法是类似的。例如,以下面用于全图的GNN模块为例: .. code:: python class CustomHeteroGraphConv(nn.Module): def __init__(self, g, in_feats, out_feats): super().__init__() self.Ws = nn.ModuleDict() for etype in g.canonical_etypes: utype, _, vtype = etype self.Ws[etype] = nn.Linear(in_feats[utype], out_feats[vtype]) for ntype in g.ntypes: self.Vs[ntype] = nn.Linear(in_feats[ntype], out_feats[ntype]) def forward(self, g, h): with g.local_scope(): for ntype in g.ntypes: g.nodes[ntype].data['h_dst'] = self.Vs[ntype](h[ntype]) g.nodes[ntype].data['h_src'] = h[ntype] for etype in g.canonical_etypes: utype, _, vtype = etype g.update_all( fn.copy_u('h_src', 'm'), fn.mean('m', 'h_neigh'), etype=etype) g.nodes[vtype].data['h_dst'] = g.nodes[vtype].data['h_dst'] + \ self.Ws[etype](g.nodes[vtype].data['h_neigh']) return {ntype: g.nodes[ntype].data['h_dst'] for ntype in g.ntypes} 对于 ``CustomHeteroGraphConv``,原则是将 ``g.nodes`` 替换为 ``g.srcnodes`` 或 ``g.dstnodes`` (根据需要输入还是输出节点的特征来选择)。 .. code:: python class CustomHeteroGraphConv(nn.Module): def __init__(self, g, in_feats, out_feats): super().__init__() self.Ws = nn.ModuleDict() for etype in g.canonical_etypes: utype, _, vtype = etype self.Ws[etype] = nn.Linear(in_feats[utype], out_feats[vtype]) for ntype in g.ntypes: self.Vs[ntype] = nn.Linear(in_feats[ntype], out_feats[ntype]) def forward(self, g, h): with g.local_scope(): for ntype in g.ntypes: h_src, h_dst = h[ntype] g.dstnodes[ntype].data['h_dst'] = self.Vs[ntype](h[ntype]) g.srcnodes[ntype].data['h_src'] = h[ntype] for etype in g.canonical_etypes: utype, _, vtype = etype g.update_all( fn.copy_u('h_src', 'm'), fn.mean('m', 'h_neigh'), etype=etype) g.dstnodes[vtype].data['h_dst'] = \ g.dstnodes[vtype].data['h_dst'] + \ self.Ws[etype](g.dstnodes[vtype].data['h_neigh']) return {ntype: g.dstnodes[ntype].data['h_dst'] for ntype in g.ntypes} 实现能够处理同构图、二分图和块的模块 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DGL中所有的消息传递模块(参见 :ref:`apinn`)都能够处理同构图、 单向二分图(包含两种节点类型和一种边类型)和包含一种边类型的块。 本质上,内置的DGL神经网络模块的输入图及特征必须满足下列情况之一: - 如果输入特征是一个张量对,则输入图必须是一个单向二分图 - 如果输入特征是一个单独的张量且输入图是一个块,则DGL会自动将输入节点特征前一部分设为输出节点的特征。 - 如果输入特征是一个单独的张量且输入图不是块,则输入图必须是同构图。 例如,下面的代码是 :class:`dgl.nn.pytorch.SAGEConv` 的简化版(DGL同样支持它在MXNet和TensorFlow后端里的实现)。 代码里移除了归一化,且只考虑平均聚合函数的情况。 .. code:: python import dgl.function as fn class SAGEConv(nn.Module): def __init__(self, in_feats, out_feats): super().__init__() self.W = nn.Linear(in_feats * 2, out_feats) def forward(self, g, h): if isinstance(h, tuple): h_src, h_dst = h elif g.is_block: h_src = h h_dst = h[:g.number_of_dst_nodes()] else: h_src = h_dst = h g.srcdata['h'] = h_src g.dstdata['h'] = h_dst g.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h_neigh')) return F.relu( self.W(torch.cat([g.dstdata['h'], g.dstdata['h_neigh']], 1))) :ref:`guide_cn-nn` 提供了对 :class:`dgl.nn.pytorch.SAGEConv` 代码的详细解读, 其适用于单向二分图、同构图和块。 ================================================ FILE: docs/source/guide_cn/minibatch-node.rst ================================================ .. _guide_cn-minibatch-node-classification-sampler: 6.1 针对节点分类任务的邻居采样训练方法 ----------------------------------------------------------------------- :ref:`(English Version) ` 为了随机(批次)训练模型,需要进行以下操作: - 定义邻居采样器。 - 调整模型以进行小批次训练。 - 修改模型训练循环部分。 以下小节将逐一介绍这些步骤。 定义邻居采样器和数据加载器 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DGL提供了几个邻居采样类,这些类会生成需计算的节点在每一层计算时所需的依赖图。 最简单的邻居采样器是 :class:`~dgl.dataloading.neighbor.MultiLayerFullNeighborSampler`,它可获取节点的所有邻居。 要使用DGL提供的采样器,还需要将其与 :class:`~dgl.dataloading.pytorch.NodeDataLoader` 结合使用,后者可以以小批次的形式对一个节点的集合进行迭代。 例如,以下代码创建了一个PyTorch的 DataLoader,它分批迭代训练节点ID数组 ``train_nids``, 并将生成的子图列表放到GPU上。 .. code:: python import dgl import dgl.nn as dglnn import torch import torch.nn as nn import torch.nn.functional as F sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2) dataloader = dgl.dataloading.NodeDataLoader( g, train_nids, sampler, batch_size=1024, shuffle=True, drop_last=False, num_workers=4) 对DataLoader进行迭代,将会创建一个特定图的列表,这些图表示每层的计算依赖。在DGL中称之为 *块*。 .. code:: python input_nodes, output_nodes, blocks = next(iter(dataloader)) print(blocks) 上面的dataloader一次迭代会生成三个输出。 ``input_nodes`` 代表计算 ``output_nodes`` 的表示所需的节点。 ``块`` 包含了每个GNN层要计算哪些节点表示作为输出,要将哪些节点表示作为输入,以及来自输入节点的表示如何传播到输出节点。 完整的内置采样方法清单,用户可以参考 :ref:`neighborhood sampler API reference `。 如果用户希望编写自己的邻居采样器,或者想要关于块的更深入的介绍,读者可以参考 :ref:`guide_cn-minibatch-customizing-neighborhood-sampler`。 .. _guide_cn-minibatch-node-classification-model: 调整模型以进行小批次训练 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 如果用户的消息传递模块全使用的是DGL内置模块,则模型在进行小批次训练时只需做很小的调整。 以多层GCN为例。如果用户模型在全图上是按以下方式实现的: .. code:: python class TwoLayerGCN(nn.Module): def __init__(self, in_features, hidden_features, out_features): super().__init__() self.conv1 = dglnn.GraphConv(in_features, hidden_features) self.conv2 = dglnn.GraphConv(hidden_features, out_features) def forward(self, g, x): x = F.relu(self.conv1(g, x)) x = F.relu(self.conv2(g, x)) return x 然后,用户所需要做的就是用上面生成的块( ``block`` )来替换图( ``g`` )。 .. code:: python class StochasticTwoLayerGCN(nn.Module): def __init__(self, in_features, hidden_features, out_features): super().__init__() self.conv1 = dgl.nn.GraphConv(in_features, hidden_features) self.conv2 = dgl.nn.GraphConv(hidden_features, out_features) def forward(self, blocks, x): x = F.relu(self.conv1(blocks[0], x)) x = F.relu(self.conv2(blocks[1], x)) return x 上面的DGL ``GraphConv`` 模块接受的一个参数是数据加载器生成的 ``块`` 中的一个元素。 用户可以查阅 :ref:`NN模块的API参考 ` 来查看DGL的内置模型模块是否支持接受 ``块`` 作为参数。 如果希望使用自定义的消息传递模块,用户可以参考 :ref:`guide_cn-minibatch-custom-gnn-module`。 模型的训练 ~~~~~~~~~~~~~ 这里的模型的训练循环仅包含使用定制的批处理迭代器遍历数据集的内容。在每个生成块列表的迭代中: 1. 将与输入节点相对应的节点特征加载到GPU上。节点特征可以存储在内存或外部存储中。 请注意,用户只需要加载输入节点的特征,而不是像整图训练那样加载所有节点的特征。 如果特征存储在 ``g.ndata`` 中,则可以通过 ``blocks[0].srcdata`` 来加载第一个块的输入节点的特征, 这些节点是计算节点最终表示所需的所有必需的节点。 2. 将块列表和输入节点特征传入多层GNN并获取输出。 3. 将与输出节点相对应的节点标签加载到GPU上。同样,节点标签可以存储在内存或外部存储器中。 再次提醒下,用户只需要加载输出节点的标签,而不是像整图训练那样加载所有节点的标签。 如果特征存储在 ``g.ndata`` 中,则可以通过访问 ``blocks[-1].dstdata`` 中的特征来加载标签, 它是最后一个块的输出节点的特征,这些节点与用户希望计算最终表示的节点相同。 4. 计算损失并反向传播。 .. code:: python model = StochasticTwoLayerGCN(in_features, hidden_features, out_features) model = model.cuda() opt = torch.optim.Adam(model.parameters()) for input_nodes, output_nodes, blocks in dataloader: blocks = [b.to(torch.device('cuda')) for b in blocks] input_features = blocks[0].srcdata['features'] output_labels = blocks[-1].dstdata['label'] output_predictions = model(blocks, input_features) loss = compute_loss(output_labels, output_predictions) opt.zero_grad() loss.backward() opt.step() DGL提供了一个端到端的随机批次训练示例 `GraphSAGE的实现 `__。 异构图上模型的训练 ~~~~~~~~~~~~~~~~~~~~~~~~ 在异构图上训练图神经网络进行节点分类的方法也是类似的。 例如,在 :ref:`guide_cn-training-rgcn-node-classification` 中介绍了如何在整图上训练一个2层的RGCN模型。 RGCN小批次训练的代码与它非常相似(为简单起见,这里删除了自环、非线性和基分解): .. code:: python class StochasticTwoLayerRGCN(nn.Module): def __init__(self, in_feat, hidden_feat, out_feat, rel_names): super().__init__() self.conv1 = dglnn.HeteroGraphConv({ rel : dglnn.GraphConv(in_feat, hidden_feat, norm='right') for rel in rel_names }) self.conv2 = dglnn.HeteroGraphConv({ rel : dglnn.GraphConv(hidden_feat, out_feat, norm='right') for rel in rel_names }) def forward(self, blocks, x): x = self.conv1(blocks[0], x) x = self.conv2(blocks[1], x) return x DGL提供的一些采样方法也支持异构图。例如,用户仍然可以使用 :class:`~dgl.dataloading.neighbor.MultiLayerFullNeighborSampler` 类和 :class:`~dgl.dataloading.pytorch.NodeDataLoader` 类进行随机批次训练。 对于全邻居采样,唯一的区别是用户需要为训练集指定节点类型和节点ID的字典。 .. code:: python sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2) dataloader = dgl.dataloading.NodeDataLoader( g, train_nid_dict, sampler, batch_size=1024, shuffle=True, drop_last=False, num_workers=4) 模型的训练与同构图几乎相同。不同之处在于, ``compute_loss`` 的实现会包含两个字典:节点类型和预测结果。 .. code:: python model = StochasticTwoLayerRGCN(in_features, hidden_features, out_features, etypes) model = model.cuda() opt = torch.optim.Adam(model.parameters()) for input_nodes, output_nodes, blocks in dataloader: blocks = [b.to(torch.device('cuda')) for b in blocks] input_features = blocks[0].srcdata # returns a dict output_labels = blocks[-1].dstdata # returns a dict output_predictions = model(blocks, input_features) loss = compute_loss(output_labels, output_predictions) opt.zero_grad() loss.backward() opt.step() DGL提供了端到端随机批次训练的 `RGCN的实现 `__。 ================================================ FILE: docs/source/guide_cn/minibatch.rst ================================================ .. _guide_cn-minibatch: 第6章:在大图上的随机(批次)训练 ======================================================= :ref:`(English Version) ` 如果用户有包含数百万甚至数十亿个节点或边的大图,通常无法进行 :ref:`guide_cn-training` 中所述的全图训练。考虑在一个有 :math:`N` 个节点的图上运行的、隐层大小为 :math:`H` 的 :math:`L` 层图卷积网络, 存储隐层表示需要 :math:`O(NLH)` 的内存空间,当 :math:`N` 较大时,这很容易超过一块GPU的显存限制。 本章介绍了一种在大图上进行随机小批次训练的方法,可以让用户不用一次性把所有节点特征拷贝到GPU上。 邻居采样方法概述 -------------------------------------------- 邻居节点采样的工作流程通常如下:每次梯度下降,选择一个小批次的图节点, 其最终表示将在神经网络的第 :math:`L` 层进行计算,然后在网络的第 :math:`L-1` 层选择该批次节点的全部或部分邻居节点。 重复这个过程,直到到达输入层。这个迭代过程会构建计算的依赖关系图,从输出开始,一直到输入,如下图所示: .. figure:: https://data.dgl.ai/asset/image/guide_6_0_0.png :alt: Imgur 该方法能节省在大图上训练图神经网络的开销和计算资源。 DGL实现了一些邻居节点采样的方法和使用邻居节点采样训练图神经网络的管道,同时也支持让用户自定义采样策略。 本章路线图 ----------- 本章的前半部分介绍了不同场景下如何进行随机训练的方法。 * :ref:`guide_cn-minibatch-node-classification-sampler` * :ref:`guide_cn-minibatch-edge-classification-sampler` * :ref:`guide_cn-minibatch-link-classification-sampler` 本章余下的小节介绍了更多的高级主题,面向那些想要开发新的采样算法、 想要实现与小批次训练兼容的图神经网络模块、以及想要了解如何在小批次数据上进行评估和推理模型的用户。 * :ref:`guide_cn-minibatch-customizing-neighborhood-sampler` * :ref:`guide_cn-minibatch-custom-gnn-module` * :ref:`guide_cn-minibatch-inference` .. toctree:: :maxdepth: 1 :hidden: :glob: minibatch-node minibatch-edge minibatch-link minibatch-custom-sampler minibatch-nn minibatch-inference ================================================ FILE: docs/source/guide_cn/nn-construction.rst ================================================ .. _guide_cn-nn-construction: 3.1 DGL NN模块的构造函数 ----------------------------- :ref:`(English Version) ` 构造函数完成以下几个任务: 1. 设置选项。 2. 注册可学习的参数或者子模块。 3. 初始化参数。 .. code:: import torch.nn as nn from dgl.utils import expand_as_pair class SAGEConv(nn.Module): def __init__(self, in_feats, out_feats, aggregator_type, bias=True, norm=None, activation=None): super(SAGEConv, self).__init__() self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats self._aggre_type = aggregator_type self.norm = norm self.activation = activation 在构造函数中,用户首先需要设置数据的维度。对于一般的PyTorch模块,维度通常包括输入的维度、输出的维度和隐层的维度。 对于图神经网络,输入维度可被分为源节点特征维度和目标节点特征维度。 除了数据维度,图神经网络的一个典型选项是聚合类型(``self._aggre_type``)。对于特定目标节点,聚合类型决定了如何聚合不同边上的信息。 常用的聚合类型包括 ``mean``、 ``sum``、 ``max`` 和 ``min``。一些模块可能会使用更加复杂的聚合函数,比如 ``lstm``。 上面代码里的 ``norm`` 是用于特征归一化的可调用函数。在SAGEConv论文里,归一化可以是L2归一化: :math:`h_v = h_v / \lVert h_v \rVert_2`。 .. code:: # 聚合类型:mean、pool、lstm、gcn if aggregator_type not in ['mean', 'pool', 'lstm', 'gcn']: raise KeyError('Aggregator type {} not supported.'.format(aggregator_type)) if aggregator_type == 'pool': self.fc_pool = nn.Linear(self._in_src_feats, self._in_src_feats) if aggregator_type == 'lstm': self.lstm = nn.LSTM(self._in_src_feats, self._in_src_feats, batch_first=True) if aggregator_type in ['mean', 'pool', 'lstm']: self.fc_self = nn.Linear(self._in_dst_feats, out_feats, bias=bias) self.fc_neigh = nn.Linear(self._in_src_feats, out_feats, bias=bias) self.reset_parameters() 注册参数和子模块。在SAGEConv中,子模块根据聚合类型而有所不同。这些模块是纯PyTorch NN模块,例如 ``nn.Linear``、 ``nn.LSTM`` 等。 构造函数的最后调用了 ``reset_parameters()`` 进行权重初始化。 .. code:: def reset_parameters(self): """重新初始化可学习的参数""" gain = nn.init.calculate_gain('relu') if self._aggre_type == 'pool': nn.init.xavier_uniform_(self.fc_pool.weight, gain=gain) if self._aggre_type == 'lstm': self.lstm.reset_parameters() if self._aggre_type != 'gcn': nn.init.xavier_uniform_(self.fc_self.weight, gain=gain) nn.init.xavier_uniform_(self.fc_neigh.weight, gain=gain) ================================================ FILE: docs/source/guide_cn/nn-forward.rst ================================================ .. _guide_cn-nn-forward: 3.2 编写DGL NN模块的forward函数 --------------------------------- :ref:`(English Version) ` 在NN模块中, ``forward()`` 函数执行了实际的消息传递和计算。与通常以张量为参数的PyTorch NN模块相比, DGL NN模块额外增加了1个参数 :class:`dgl.DGLGraph`。``forward()`` 函数的内容一般可以分为3项操作: - 检测输入图对象是否符合规范。 - 消息传递和聚合。 - 聚合后,更新特征作为输出。 下文展示了SAGEConv示例中的 ``forward()`` 函数。 输入图对象的规范检测 ~~~~~~~~~~~~~~~~~~~~~ .. code:: def forward(self, graph, feat): with graph.local_scope(): # 指定图类型,然后根据图类型扩展输入特征 feat_src, feat_dst = expand_as_pair(feat, graph) ``forward()`` 函数需要处理输入的许多极端情况,这些情况可能导致计算和消息传递中的值无效。 比如在 :class:`~dgl.nn.pytorch.conv.GraphConv` 等conv模块中,DGL会检查输入图中是否有入度为0的节点。 当1个节点入度为0时, ``mailbox`` 将为空,并且聚合函数的输出值全为0, 这可能会导致模型性能不佳。但是,在 :class:`~dgl.nn.pytorch.conv.SAGEConv` 模块中,被聚合的特征将会与节点的初始特征拼接起来, ``forward()`` 函数的输出不会全为0。在这种情况下,无需进行此类检验。 DGL NN模块可在不同类型的图输入中重复使用,包括:同构图、异构图(:ref:`guide_cn-graph-heterogeneous`)和子图块(:ref:`guide_cn-minibatch`)。 SAGEConv的数学公式如下: .. math:: h_{\mathcal{N}(dst)}^{(l+1)} = \mathrm{aggregate} \left(\{h_{src}^{l}, \forall src \in \mathcal{N}(dst) \}\right) .. math:: h_{dst}^{(l+1)} = \sigma \left(W \cdot \mathrm{concat} (h_{dst}^{l}, h_{\mathcal{N}(dst)}^{l+1}) + b \right) .. math:: h_{dst}^{(l+1)} = \mathrm{norm}(h_{dst}^{l+1}) 源节点特征 ``feat_src`` 和目标节点特征 ``feat_dst`` 需要根据图类型被指定。 用于指定图类型并将 ``feat`` 扩展为 ``feat_src`` 和 ``feat_dst`` 的函数是 :meth:`~dgl.utils.expand_as_pair`。 该函数的细节如下所示。 .. code:: def expand_as_pair(input_, g=None): if isinstance(input_, tuple): # 二分图的情况 return input_ elif g is not None and g.is_block: # 子图块的情况 if isinstance(input_, Mapping): input_dst = { k: F.narrow_row(v, 0, g.number_of_dst_nodes(k)) for k, v in input_.items()} else: input_dst = F.narrow_row(input_, 0, g.number_of_dst_nodes()) return input_, input_dst else: # 同构图的情况 return input_, input_ 对于同构图上的全图训练,源节点和目标节点相同,它们都是图中的所有节点。 在异构图的情况下,图可以分为几个二分图,每种关系对应一个。关系表示为 ``(src_type, edge_type, dst_dtype)``。 当输入特征 ``feat`` 是1个元组时,图将会被视为二分图。元组中的第1个元素为源节点特征,第2个元素为目标节点特征。 在小批次训练中,计算应用于给定的一堆目标节点所采样的子图。子图在DGL中称为区块(``block``)。 在区块创建的阶段,``dst nodes`` 位于节点列表的最前面。通过索引 ``[0:g.number_of_dst_nodes()]`` 可以找到 ``feat_dst``。 确定 ``feat_src`` 和 ``feat_dst`` 之后,以上3种图类型的计算方法是相同的。 消息传递和聚合 ~~~~~~~~~~~~~~~~~ .. code:: import dgl.function as fn import torch.nn.functional as F from dgl.utils import check_eq_shape if self._aggre_type == 'mean': graph.srcdata['h'] = feat_src graph.update_all(fn.copy_u('h', 'm'), fn.mean('m', 'neigh')) h_neigh = graph.dstdata['neigh'] elif self._aggre_type == 'gcn': check_eq_shape(feat) graph.srcdata['h'] = feat_src graph.dstdata['h'] = feat_dst graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'neigh')) # 除以入度 degs = graph.in_degrees().to(feat_dst) h_neigh = (graph.dstdata['neigh'] + graph.dstdata['h']) / (degs.unsqueeze(-1) + 1) elif self._aggre_type == 'pool': graph.srcdata['h'] = F.relu(self.fc_pool(feat_src)) graph.update_all(fn.copy_u('h', 'm'), fn.max('m', 'neigh')) h_neigh = graph.dstdata['neigh'] else: raise KeyError('Aggregator type {} not recognized.'.format(self._aggre_type)) # GraphSAGE中gcn聚合不需要fc_self if self._aggre_type == 'gcn': rst = self.fc_neigh(h_neigh) else: rst = self.fc_self(h_self) + self.fc_neigh(h_neigh) 上面的代码执行了消息传递和聚合的计算。这部分代码会因模块而异。请注意,代码中的所有消息传递均使用 :meth:`~dgl.DGLGraph.update_all` API和 DGL内置的消息/聚合函数来实现,以充分利用 :ref:`guide_cn-message-passing-efficient` 里所介绍的性能优化。 聚合后,更新特征作为输出 ~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: # 激活函数 if self.activation is not None: rst = self.activation(rst) # 归一化 if self.norm is not None: rst = self.norm(rst) return rst ``forward()`` 函数的最后一部分是在完成消息聚合后更新节点的特征。 常见的更新操作是根据构造函数中设置的选项来应用激活函数和进行归一化。 ================================================ FILE: docs/source/guide_cn/nn-heterograph.rst ================================================ .. _guide_cn-nn-heterograph: 3.3 异构图上的GraphConv模块 -------------------------------- :ref:`(English Version) ` DGL提供了 :class:`~dgl.nn.pytorch.HeteroGraphConv`,用于定义异构图上GNN模块。 实现逻辑与消息传递级别的API :meth:`~dgl.DGLGraph.multi_update_all` 相同,它包括: - 每个关系上的DGL NN模块。 - 聚合来自不同关系上的结果。 其数学定义为: .. math:: h_{dst}^{(l+1)} = \underset{r\in\mathcal{R}, r_{dst}=dst}{AGG} (f_r(g_r, h_{r_{src}}^l, h_{r_{dst}}^l)) 其中 :math:`f_r` 是对应每个关系 :math:`r` 的NN模块,:math:`AGG` 是聚合函数。 HeteroGraphConv的实现逻辑 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: import torch.nn as nn class HeteroGraphConv(nn.Module): def __init__(self, mods, aggregate='sum'): super(HeteroGraphConv, self).__init__() self.mods = nn.ModuleDict(mods) if isinstance(aggregate, str): # 获取聚合函数的内部函数 self.agg_fn = get_aggregate_fn(aggregate) else: self.agg_fn = aggregate 异构图的卷积操作接受一个字典类型参数 ``mods``。这个字典的键为关系名,值为作用在该关系上NN模块对象。参数 ``aggregate`` 则指定了如何聚合来自不同关系的结果。 .. code:: def forward(self, g, inputs, mod_args=None, mod_kwargs=None): if mod_args is None: mod_args = {} if mod_kwargs is None: mod_kwargs = {} outputs = {nty : [] for nty in g.dsttypes} 除了输入图和输入张量,``forward()`` 函数还使用2个额外的字典参数 ``mod_args`` 和 ``mod_kwargs``。 这2个字典与 ``self.mods`` 具有相同的键,值则为对应NN模块的自定义参数。 ``forward()`` 函数的输出结果也是一个字典类型的对象。其键为 ``nty``,其值为每个目标节点类型 ``nty`` 的输出张量的列表, 表示来自不同关系的计算结果。``HeteroGraphConv`` 会对这个列表进一步聚合,并将结果返回给用户。 .. code:: if g.is_block: src_inputs = inputs dst_inputs = {k: v[:g.number_of_dst_nodes(k)] for k, v in inputs.items()} else: src_inputs = dst_inputs = inputs for stype, etype, dtype in g.canonical_etypes: rel_graph = g[stype, etype, dtype] if rel_graph.num_edges() == 0: continue if stype not in src_inputs or dtype not in dst_inputs: continue dstdata = self.mods[etype]( rel_graph, (src_inputs[stype], dst_inputs[dtype]), *mod_args.get(etype, ()), **mod_kwargs.get(etype, {})) outputs[dtype].append(dstdata) 输入 ``g`` 可以是异构图或来自异构图的子图区块。和普通的NN模块一样,``forward()`` 函数需要分别处理不同的输入图类型。 上述代码中的for循环为处理异构图计算的主要逻辑。首先我们遍历图中所有的关系(通过调用 ``canonical_etypes``)。 通过关系名,我们可以使用g[ ``stype, etype, dtype`` ]的语法将只包含该关系的子图( ``rel_graph`` )抽取出来。 对于二分图,输入特征将被组织为元组 ``(src_inputs[stype], dst_inputs[dtype])``。 接着调用用户预先注册在该关系上的NN模块,并将结果保存在outputs字典中。 .. code:: rsts = {} for nty, alist in outputs.items(): if len(alist) != 0: rsts[nty] = self.agg_fn(alist, nty) 最后,``HeteroGraphConv`` 会调用用户注册的 ``self.agg_fn`` 函数聚合来自多个关系的结果。 读者可以在API文档中找到 :class:~dgl.nn.pytorch.HeteroGraphConv 的示例。 ================================================ FILE: docs/source/guide_cn/nn.rst ================================================ .. _guide_cn-nn: 第3章:构建图神经网络(GNN)模块 =================================== :ref:`(English Version) ` DGL NN模块是用户构建GNN模型的基本模块。根据DGL所使用的后端深度神经网络框架, DGL NN模块的父类取决于后端所使用的深度神经网络框架。对于PyTorch后端, 它应该继承 `PyTorch的NN模块 `__;对于MXNet后端,它应该继承 `MXNet Gluon的NN块 `__; 对于TensorFlow后端,它应该继承 `Tensorflow的Keras层 `__。 在DGL NN模块中,构造函数中的参数注册和前向传播函数中使用的张量操作与后端框架一样。这种方式使得DGL的代码可以无缝嵌入到后端框架的代码中。 DGL和这些深度神经网络框架的主要差异是其独有的消息传递操作。 DGL已经集成了很多常用的 :ref:`apinn-pytorch-conv`、 :ref:`apinn-pytorch-dense-conv`、 :ref:`apinn-pytorch-pooling` 和 :ref:`apinn-pytorch-util`。欢迎给DGL贡献更多的模块! 本章将使用PyTorch作为后端,用 :class:`~dgl.nn.pytorch.conv.SAGEConv` 作为例子来介绍如何构建用户自己的DGL NN模块。 本章路线图 ------------ * :ref:`guide_cn-nn-construction` * :ref:`guide_cn-nn-forward` * :ref:`guide_cn-nn-heterograph` .. toctree:: :maxdepth: 1 :hidden: :glob: nn-construction nn-forward nn-heterograph ================================================ FILE: docs/source/guide_cn/training-edge.rst ================================================ .. _guide_cn-training-edge-classification: 5.2 边分类/回归 --------------------------------------------- :ref:`(English Version) ` 有时用户希望预测图中边的属性值,这种情况下,用户需要构建一个边分类/回归的模型。 以下代码生成了一个随机图用于演示边分类/回归。 .. code:: python src = np.random.randint(0, 100, 500) dst = np.random.randint(0, 100, 500) # 同时建立反向边 edge_pred_graph = dgl.graph((np.concatenate([src, dst]), np.concatenate([dst, src]))) # 建立点和边特征,以及边的标签 edge_pred_graph.ndata['feature'] = torch.randn(100, 10) edge_pred_graph.edata['feature'] = torch.randn(1000, 10) edge_pred_graph.edata['label'] = torch.randn(1000) # 进行训练、验证和测试集划分 edge_pred_graph.edata['train_mask'] = torch.zeros(1000, dtype=torch.bool).bernoulli(0.6) 概述 ~~~~~~~~ 上一节介绍了如何使用多层GNN进行节点分类。同样的方法也可以被用于计算任何节点的隐藏表示。 并从边的两个端点的表示,通过计算得出对边属性的预测。 对一条边计算预测值最常见的情况是将预测表示为一个函数,函数的输入为两个端点的表示, 输入还可以包括边自身的特征。 与节点分类在模型实现上的差别 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 如果用户使用上一节中的模型计算了节点的表示,那么用户只需要再编写一个用 :meth:`~dgl.DGLGraph.apply_edges` 方法计算边预测的组件即可进行边分类/回归任务。 例如,对于边回归任务,如果用户想为每条边计算一个分数,可按下面的代码对每一条边计算它的两端节点隐藏表示的点积来作为分数。 .. code:: python import dgl.function as fn class DotProductPredictor(nn.Module): def forward(self, graph, h): # h是从5.1节的GNN模型中计算出的节点表示 with graph.local_scope(): graph.ndata['h'] = h graph.apply_edges(fn.u_dot_v('h', 'h', 'score')) return graph.edata['score'] 用户也可以使用MLP(多层感知机)对每条边生成一个向量表示(例如,作为一个未经过归一化的类别的分布), 并在下游任务中使用。 .. code:: python class MLPPredictor(nn.Module): def __init__(self, in_features, out_classes): super().__init__() self.W = nn.Linear(in_features * 2, out_classes) def apply_edges(self, edges): h_u = edges.src['h'] h_v = edges.dst['h'] score = self.W(torch.cat([h_u, h_v], 1)) return {'score': score} def forward(self, graph, h): # h是从5.1节的GNN模型中计算出的节点表示 with graph.local_scope(): graph.ndata['h'] = h graph.apply_edges(self.apply_edges) return graph.edata['score'] 模型的训练 ~~~~~~~~~~~~~ 给定计算节点和边上表示的模型后,用户可以轻松地编写在所有边上进行预测的全图训练代码。 以下代码用了 :ref:`guide_cn-message-passing` 中定义的 ``SAGE`` 作为节点表示计算模型以及前一小节中定义的 ``DotPredictor`` 作为边预测模型。 .. code:: python class Model(nn.Module): def __init__(self, in_features, hidden_features, out_features): super().__init__() self.sage = SAGE(in_features, hidden_features, out_features) self.pred = DotProductPredictor() def forward(self, g, x): h = self.sage(g, x) return self.pred(g, h) 在训练模型时可以使用布尔掩码区分训练、验证和测试数据集。该例子里省略了训练早停和模型保存部分的代码。 .. code:: python node_features = edge_pred_graph.ndata['feature'] edge_label = edge_pred_graph.edata['label'] train_mask = edge_pred_graph.edata['train_mask'] model = Model(10, 20, 5) opt = torch.optim.Adam(model.parameters()) for epoch in range(10): pred = model(edge_pred_graph, node_features) loss = ((pred[train_mask] - edge_label[train_mask]) ** 2).mean() opt.zero_grad() loss.backward() opt.step() print(loss.item()) .. _guide_cn-training-edge-classification-heterogeneous-graph: 异构图上的边预测模型的训练 ~~~~~~~~~~~~~~~~~~~~~~~~~ 例如想在某一特定类型的边上进行分类任务,用户只需要计算所有节点类型的节点表示, 然后同样通过调用 :meth:`~dgl.DGLGraph.apply_edges` 方法计算预测值即可。 唯一的区别是在调用 ``apply_edges`` 时需要指定边的类型。 .. code:: python class HeteroDotProductPredictor(nn.Module): def forward(self, graph, h, etype): # h是从5.1节中对每种类型的边所计算的节点表示 with graph.local_scope(): graph.ndata['h'] = h #一次性为所有节点类型的 'h'赋值 graph.apply_edges(fn.u_dot_v('h', 'h', 'score'), etype=etype) return graph.edges[etype].data['score'] 同样地,用户也可以编写一个 ``HeteroMLPPredictor``。 .. code:: python class MLPPredictor(nn.Module): def __init__(self, in_features, out_classes): super().__init__() self.W = nn.Linear(in_features * 2, out_classes) def apply_edges(self, edges): h_u = edges.src['h'] h_v = edges.dst['h'] score = self.W(torch.cat([h_u, h_v], 1)) return {'score': score} def forward(self, graph, h, etype): # h是从5.1节中对异构图的每种类型的边所计算的节点表示 with graph.local_scope(): graph.ndata['h'] = h #一次性为所有节点类型的 'h'赋值 graph.apply_edges(self.apply_edges, etype=etype) return graph.edges[etype].data['score'] 在某种类型的边上为每一条边预测的端到端模型的定义如下所示: .. code:: python class Model(nn.Module): def __init__(self, in_features, hidden_features, out_features, rel_names): super().__init__() self.sage = RGCN(in_features, hidden_features, out_features, rel_names) self.pred = HeteroDotProductPredictor() def forward(self, g, x, etype): h = self.sage(g, x) return self.pred(g, h, etype) 使用模型时只需要简单地向模型提供一个包含节点类型和数据特征的字典。 .. code:: python model = Model(10, 20, 5, hetero_graph.etypes) user_feats = hetero_graph.nodes['user'].data['feature'] item_feats = hetero_graph.nodes['item'].data['feature'] label = hetero_graph.edges['click'].data['label'] train_mask = hetero_graph.edges['click'].data['train_mask'] node_features = {'user': user_feats, 'item': item_feats} 训练部分和同构图的训练基本一致。例如,如果用户想预测边类型为 ``click`` 的边的标签,只需要按下例编写代码。 .. code:: python opt = torch.optim.Adam(model.parameters()) for epoch in range(10): pred = model(hetero_graph, node_features, 'click') loss = ((pred[train_mask] - label[train_mask]) ** 2).mean() opt.zero_grad() loss.backward() opt.step() print(loss.item()) 在异构图中预测已有边的类型 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 预测图中已经存在的边属于哪个类型是一个非常常见的任务类型。例如,根据 :ref:`本章的异构图样例数据 `, 用户的任务是给定一条连接 ``user`` 节点和 ``item`` 节点的边,预测它的类型是 ``click`` 还是 ``dislike``。 这个例子是评分预测的一个简化版本,在推荐场景中很常见。 边类型预测的第一步仍然是计算节点表示。可以通过类似 :ref:`节点分类的RGCN模型 ` 这一章中提到的图卷积网络获得。第二步是计算边上的预测值。 在这里可以复用上述提到的 ``HeteroDotProductPredictor``。 这里需要注意的是输入的图数据不能包含边的类型信息, 因此需要将所要预测的边类型(如 ``click`` 和 ``dislike``)合并成一种边的图, 并为每条边计算出每种边类型的可能得分。下面的例子使用一个拥有 ``user`` 和 ``item`` 两种节点类型和一种边类型的图。该边类型是通过合并所有从 ``user`` 到 ``item`` 的边类型(如 ``like`` 和 ``dislike``)得到。 用户可以很方便地用关系切片的方式创建这个图。 .. code:: python dec_graph = hetero_graph['user', :, 'item'] 这个方法会返回一个异构图,它具有 ``user`` 和 ``item`` 两种节点类型, 以及把它们之间的所有边的类型进行合并后的单一边类型。 由于上面这行代码将原来的边类型存成边特征 ``dgl.ETYPE``,用户可以将它作为标签使用。 .. code:: python edge_label = dec_graph.edata[dgl.ETYPE] 将上述图作为边类型预测模块的输入,用户可以按如下方式编写预测模块: .. code:: python class HeteroMLPPredictor(nn.Module): def __init__(self, in_dims, n_classes): super().__init__() self.W = nn.Linear(in_dims * 2, n_classes) def apply_edges(self, edges): x = torch.cat([edges.src['h'], edges.dst['h']], 1) y = self.W(x) return {'score': y} def forward(self, graph, h): # h是从5.1节中对异构图的每种类型的边所计算的节点表示 with graph.local_scope(): graph.ndata['h'] = h #一次性为所有节点类型的 'h'赋值 graph.apply_edges(self.apply_edges) return graph.edata['score'] 结合了节点表示模块和边类型预测模块的模型如下所示: .. code:: python class Model(nn.Module): def __init__(self, in_features, hidden_features, out_features, rel_names): super().__init__() self.sage = RGCN(in_features, hidden_features, out_features, rel_names) self.pred = HeteroMLPPredictor(out_features, len(rel_names)) def forward(self, g, x, dec_graph): h = self.sage(g, x) return self.pred(dec_graph, h) 训练部分如下所示: .. code:: python model = Model(10, 20, 5, hetero_graph.etypes) user_feats = hetero_graph.nodes['user'].data['feature'] item_feats = hetero_graph.nodes['item'].data['feature'] node_features = {'user': user_feats, 'item': item_feats} opt = torch.optim.Adam(model.parameters()) for epoch in range(10): logits = model(hetero_graph, node_features, dec_graph) loss = F.cross_entropy(logits, edge_label) opt.zero_grad() loss.backward() opt.step() print(loss.item()) 读者可以进一步参考 `Graph Convolutional Matrix Completion `__ 这一示例来了解如何预测异构图中的边类型。 `模型实现文件中 `__ 的节点表示模块称作 ``GCMCLayer``。边类型预测模块称作 ``BiDecoder``。 虽然这两个模块都比上述的示例代码要复杂,但其基本思想和本章描述的流程是一致的。 ================================================ FILE: docs/source/guide_cn/training-eweight.rst ================================================ .. _guide_cn-training-eweight: 5.5 使用边权重 ---------------------------------- :ref:`(English Version) ` 在一个加权图里,每条边都有一个有意义的标量权重。例如,边权重可以是连接强度或者信心指数。 人们自然会想要在模型开发中使用它们。 使用边权重的消息传递 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 大部分图神经网络在前馈计算中仅通过消息传递引入图结构信息。一个消息传递运算可以视为一个函数。 这个函数的输入变量是一个邻接矩阵和其他输入特征。对于一个不带权重的图,邻接矩阵里的元素不是零就是一。 值为一的元素表示一条边。对于一个加权图,非零的元素可以取任意标量值。这等价于把每条消息和对应的边权重相乘, 即`图注意力网络 `__中的做法。 在DGL里可以通过以下步骤实现这一需求: - 把边权重保存为一个边特征 - 在消息函数里,用保存的边特征与对应边的原始消息相乘 考虑以下基于DGL的消息传递示例: .. code:: import dgl.function as fn # 假定graph.ndata['ft']存储了输入节点特征 graph.update_all(fn.copy_u('ft', 'm'), fn.sum('m', 'ft')) 可以将其按以下方式修改以支持边权重: .. code:: import dgl.function as fn # 将边权重保存为一个边特征。边权重是一个形状为(E, *)的张量。 # E是边的数量 graph.edata['w'] = eweight # 假定graph.ndata['ft']存储了输入节点特征 graph.update_all(fn.u_mul_e('ft', 'w', 'm'), fn.sum('m', 'ft')) 在NN模块中使用边权重 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 用户可以通过修改NN模块中所有的消息传递操作来给NN模块增加边权重支持。以下代码块提供了一个例子。 .. code:: import dgl.function as fn import torch.nn as nn class GNN(nn.Module): def __init__(self, in_feats, out_feats): super().__init__() self.linear = nn.Linear(in_feats, out_feats) def forward(self, g, feat, edge_weight=None): with g.local_scope(): g.ndata['ft'] = self.linear(feat) if edge_weight is None: msg_func = fn.copy_u('ft', 'm') else: g.edata['w'] = edge_weight msg_func = fn.u_mul_e('ft', 'w', 'm') g.update_all(msg_func, fn.sum('m', 'ft')) return g.ndata['ft'] DGL内置的NN模块如果在forward函数中支持一个可选的:attr:`edge_weight`变量,那么它们已经支持了边权重。 用户可能会需要标准化原始边权重。DGL提供了一个满足这个功能的函数 :func:`~dgl.nn.pytorch.conv.EdgeWeightNorm`。 ================================================ FILE: docs/source/guide_cn/training-graph.rst ================================================ .. _guide_cn-training-graph-classification: 5.4 整图分类 ---------------------------------- :ref:`(English Version) ` 许多场景中的图数据是由多个图组成,而不是单个的大图数据。例如不同类型的人群社区。 通过用图刻画同一社区里人与人间的友谊,可以得到多张用于分类的图。 在这个场景里,整图分类模型可以识别社区的类型,即根据结构和整体信息对图进行分类。 概述 ~~~~~~~~ 整图分类与节点分类或链接预测的主要区别是:预测结果刻画了整个输入图的属性。 与之前的任务类似,用户还是在节点或边上进行消息传递。但不同的是,整图分类任务还需要得到整个图的表示。 整图分类的处理流程如下图所示: .. figure:: https://data.dgl.ai/tutorial/batch/graph_classifier.png :alt: Graph Classification Process 整图分类流程 从左至右,一般流程是: - 准备一个批次的图; - 在这个批次的图上进行消息传递以更新节点或边的特征; - 将一张图里的节点或边特征聚合成整张图的图表示; - 根据任务设计分类层。 批次的图 ^^^^^^^^^^^^^^^ 整图分类任务通常需要在很多图上进行训练。如果用户在训练模型时一次仅使用一张图,训练效率会很低。 借用深度学习实践中常用的小批次训练方法,用户可将多张图组成一个批次,在整个图批次上进行一次训练迭代。 使用DGL,用户可将一系列的图建立成一个图批次。一个图批次可以被看作是一张大图,图中的每个连通子图对应一张原始小图。 .. figure:: https://data.dgl.ai/tutorial/batch/batch.png :alt: Batched Graph 批次化的图 需要注意,DGL里对图进行变换的函数会去掉图上的批次信息。用户可以通过 :func:`dgl.DGLGraph.set_batch_num_nodes` 和 :func:`dgl.DGLGraph.set_batch_num_edges` 两个函数在变换后的图上重新加入批次信息。 图读出 ^^^^^^^^^^^^^ 数据集中的每一张图都有它独特的结构和节点与边的特征。为了完成单个图的预测,通常会聚合并汇总单个图尽可能多的信息。 这类操作叫做“读出”。常见的聚合方法包括:对所有节点或边特征求和、取平均值、逐元素求最大值或最小值。 给定一张图 :math:`g`,对它所有节点特征取平均值的聚合读出公式如下: .. math:: h_g = \frac{1}{|\mathcal{V}|}\sum_{v\in \mathcal{V}}h_v 其中,:math:`h_g` 是图 :math:`g` 的表征, :math:`\mathcal{V}` 是图 :math:`g` 中节点的集合, :math:`h_v` 是节点 :math:`v` 的特征。 DGL内置了常见的图读出函数,例如 :func:`dgl.readout_nodes` 就实现了上述的平均值读出计算。 在得到 :math:`h_g` 后,用户可将其传给一个多层感知机(MLP)来获得分类输出。 编写神经网络模型 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 模型的输入是带节点和边特征的批次化图。需要注意的是批次化图中的节点和边属性没有批次大小对应的维度。 模型中应特别注意以下几点。 批次化图上的计算 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 首先,一个批次中不同的图是完全分开的,即任意两个图之间没有边连接。 根据这个良好的性质,所有消息传递函数(的计算)仍然具有相同的结果。 其次,读出函数会分别作用在图批次中的每张图上。假设批次大小为 :math:`B`,要聚合的特征大小为 :math:`D`, 则图读出的张量形状为 :math:`(B, D)`。 .. code:: python import dgl import torch g1 = dgl.graph(([0, 1], [1, 0])) g1.ndata['h'] = torch.tensor([1., 2.]) g2 = dgl.graph(([0, 1], [1, 2])) g2.ndata['h'] = torch.tensor([1., 2., 3.]) dgl.readout_nodes(g1, 'h') # tensor([3.]) # 1 + 2 bg = dgl.batch([g1, g2]) dgl.readout_nodes(bg, 'h') # tensor([3., 6.]) # [1 + 2, 1 + 2 + 3] 最后,批次化图中的每个节点或边特征张量均通过将所有图上的相应特征拼接得到。 .. code:: python bg.ndata['h'] # tensor([1., 2., 1., 2., 3.]) 模型定义 ^^^^^^^^^^^^^^^^ 了解了上述计算规则后,用户可以定义一个非常简单的模型。 .. code:: python import dgl.nn.pytorch as dglnn import torch.nn as nn class Classifier(nn.Module): def __init__(self, in_dim, hidden_dim, n_classes): super(Classifier, self).__init__() self.conv1 = dglnn.GraphConv(in_dim, hidden_dim) self.conv2 = dglnn.GraphConv(hidden_dim, hidden_dim) self.classify = nn.Linear(hidden_dim, n_classes) def forward(self, g, h): # 应用图卷积和激活函数 h = F.relu(self.conv1(g, h)) h = F.relu(self.conv2(g, h)) with g.local_scope(): g.ndata['h'] = h # 使用平均读出计算图表示 hg = dgl.mean_nodes(g, 'h') return self.classify(hg) 模型的训练 ~~~~~~~~~~~~~ 数据加载 ^^^^^^^^^^^^ 模型定义完成后,用户就可以开始训练模型。由于整图分类处理的是很多相对较小的图,而不是一个大图, 因此通常可以在随机抽取的小批次图上进行高效的训练,而无需设计复杂的图采样算法。 以下例子中使用了 :ref:`guide_cn-data-pipeline` 中的整图分类数据集。 .. code:: python import dgl.data dataset = dgl.data.GINDataset('MUTAG', False) 整图分类数据集里的每个数据点是一个图和它对应标签的元组。为提升数据加载速度, 用户可以调用GraphDataLoader,从而以小批次遍历整个图数据集。 .. code:: python from dgl.dataloading import GraphDataLoader dataloader = GraphDataLoader( dataset, batch_size=1024, drop_last=False, shuffle=True) 训练过程包括遍历dataloader和更新模型参数的部分。 .. code:: python import torch.nn.functional as F # 这仅是个例子,特征尺寸是7 model = Classifier(7, 20, 5) opt = torch.optim.Adam(model.parameters()) for epoch in range(20): for batched_graph, labels in dataloader: feats = batched_graph.ndata['attr'] logits = model(batched_graph, feats) loss = F.cross_entropy(logits, labels) opt.zero_grad() loss.backward() opt.step() DGL实现了一个整图分类的样例: `DGL的GIN样例 `__。 模型训练的代码请参考位于 `main.py `__ 源文件中的 ``train`` 函数。 模型实现位于 `gin.py `__ , 其中使用了更多的模块组件,例如使用 :class:`dgl.nn.pytorch.GINConv` 模块作为图卷积层(DGL同样支持它在MXNet和TensorFlow后端里的实现)、批量归一化等。 异构图上的整图分类模型的训练 ~~~~~~~~~~~~~~~~~~~ 在异构图上做整图分类和在同构图上做整图分类略有不同。用户除了需要使用异构图卷积模块,还需要在读出函数中聚合不同类别的节点。 以下代码演示了如何对每种节点类型的节点表示取平均值并求和。 .. code:: python class RGCN(nn.Module): def __init__(self, in_feats, hid_feats, out_feats, rel_names): super().__init__() self.conv1 = dglnn.HeteroGraphConv({ rel: dglnn.GraphConv(in_feats, hid_feats) for rel in rel_names}, aggregate='sum') self.conv2 = dglnn.HeteroGraphConv({ rel: dglnn.GraphConv(hid_feats, out_feats) for rel in rel_names}, aggregate='sum') def forward(self, graph, inputs): # inputs是节点的特征 h = self.conv1(graph, inputs) h = {k: F.relu(v) for k, v in h.items()} h = self.conv2(graph, h) return h class HeteroClassifier(nn.Module): def __init__(self, in_dim, hidden_dim, n_classes, rel_names): super().__init__() self.rgcn = RGCN(in_dim, hidden_dim, hidden_dim, rel_names) self.classify = nn.Linear(hidden_dim, n_classes) def forward(self, g): h = g.ndata['feat'] h = self.rgcn(g, h) with g.local_scope(): g.ndata['h'] = h # 通过平均读出值来计算单图的表征 hg = 0 for ntype in g.ntypes: hg = hg + dgl.mean_nodes(g, 'h', ntype=ntype) return self.classify(hg) 剩余部分的训练代码和同构图代码相同。 .. code:: python # etypes是一个列表,元素是字符串类型的边类型 model = HeteroClassifier(10, 20, 5, etypes) opt = torch.optim.Adam(model.parameters()) for epoch in range(20): for batched_graph, labels in dataloader: logits = model(batched_graph) loss = F.cross_entropy(logits, labels) opt.zero_grad() loss.backward() opt.step() ================================================ FILE: docs/source/guide_cn/training-link.rst ================================================ .. _guide_cn-training-link-prediction: 5.3 链接预测 --------------------------- :ref:`(English Version) ` 在某些场景中,用户可能希望预测给定节点之间是否存在边,这样的任务称作 **链接预测** 任务。 概述 ~~~~~~~~ 基于GNN的链接预测模型的基本思想是通过使用所需预测的节点对 :math:`u`, :math:`v` 的节点表示 :math:`\boldsymbol{h}_u^{(L)}` 和 :math:`\boldsymbol{h}_v^{(L)}`,计算它们之间存在链接可能性的得分 :math:`y_{u,v}`。 其中 :math:`\boldsymbol{h}_u^{(L)}` 和 :math:`\boldsymbol{h}_v^{(L)}` 由多层GNN计算得出。 .. math:: y_{u,v} = \phi(\boldsymbol{h}_u^{(L)}, \boldsymbol{h}_v^{(L)}) 本节把节点 :math:`u` 和 :math:`v` 之间存在连接可能性的 *得分* 记作 :math:`y_{u,v}`。 训练一个链接预测模型涉及到比对两个相连接节点之间的得分与任意一对节点之间的得分的差异。 例如,给定一条连接 :math:`u` 和 :math:`v` 的边,一个好的模型希望 :math:`u` 和 :math:`v` 之间的得分要高于 :math:`u` 和从一个任意的噪声分布 :math:`v′∼Pn(v)` 中所采样的节点 :math:`v′` 之间的得分。 这样的方法称作 *负采样*。 许多损失函数都可以实现上述目标,包括但不限于。 - 交叉熵损失: :math:`\mathcal{L} = - \log \sigma (y_{u,v}) - \sum_{v_i \sim P_n(v), i=1,\dots,k}\log \left[ 1 - \sigma (y_{u,v_i})\right]` - 贝叶斯个性化排序损失: :math:`\mathcal{L} = \sum_{v_i \sim P_n(v), i=1,\dots,k} - \log \sigma (y_{u,v} - y_{u,v_i})` - 间隔损失: :math:`\mathcal{L} = \sum_{v_i \sim P_n(v), i=1,\dots,k} \max(0, M - y_{u, v} + y_{u, v_i})`, 其中 :math:`M` 是常数项超参数。 如果用户熟悉 `implicit feedback `__ 和 `noise-contrastive estimation `__ , 可能会发现这些工作的想法都很类似。 计算 :math:`u` 和 :math:`v` 之间分数的神经网络模型与 :ref:`guide_cn-training-edge-classification` 中所述的边回归模型相同。 下面是使用点积计算边得分的例子。 .. code:: python class DotProductPredictor(nn.Module): def forward(self, graph, h): # h是从5.1节的GNN模型中计算出的节点表示 with graph.local_scope(): graph.ndata['h'] = h graph.apply_edges(fn.u_dot_v('h', 'h', 'score')) return graph.edata['score'] 模型的训练 ~~~~~~~~~~~~~ 因为上述的得分预测模型在图上进行计算,用户需要将负采样的样本表示为另外一个图, 其中包含所有负采样的节点对作为边。 下面的例子展示了将负采样的样本表示为一个图。每一条边 :math:`(u,v)` 都有 :math:`k` 个对应的负采样样本 :math:`(u,v_i)`,其中 :math:`v_i` 是从均匀分布中采样的。 .. code:: python def construct_negative_graph(graph, k): src, dst = graph.edges() neg_src = src.repeat_interleave(k) neg_dst = torch.randint(0, graph.num_nodes(), (len(src) * k,)) return dgl.graph((neg_src, neg_dst), num_nodes=graph.num_nodes()) 预测边得分的模型和边分类/回归模型中的预测边得分模型相同。 .. code:: python class Model(nn.Module): def __init__(self, in_features, hidden_features, out_features): super().__init__() self.sage = SAGE(in_features, hidden_features, out_features) self.pred = DotProductPredictor() def forward(self, g, neg_g, x): h = self.sage(g, x) return self.pred(g, h), self.pred(neg_g, h) 训练的循环部分里会重复构建负采样图并计算损失函数值。 .. code:: python def compute_loss(pos_score, neg_score): # 间隔损失 n_edges = pos_score.shape[0] return (1 - pos_score.unsqueeze(1) + neg_score.view(n_edges, -1)).clamp(min=0).mean() node_features = graph.ndata['feat'] n_features = node_features.shape[1] k = 5 model = Model(n_features, 100, 100) opt = torch.optim.Adam(model.parameters()) for epoch in range(10): negative_graph = construct_negative_graph(graph, k) pos_score, neg_score = model(graph, negative_graph, node_features) loss = compute_loss(pos_score, neg_score) opt.zero_grad() loss.backward() opt.step() print(loss.item()) 训练后,节点表示可以通过以下代码获取。 .. code:: python node_embeddings = model.sage(graph, node_features) (实际应用中),有着许多使用节点嵌入的方法,例如,训练下游任务的分类器,或为相关实体推荐进行最近邻搜索或最大内积搜索。 异构图上的链接预测模型的训练 ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 异构图上的链接预测和同构图上的链接预测没有太大区别。下文是在一种边类型上进行预测, 用户可以很容易地将其拓展为对多种边类型上进行预测。 例如,为某一种边类型,用户可以重复使用 :ref:`guide_cn-training-edge-classification-heterogeneous-graph` 里的 ``HeteroDotProductPredictor`` 来计算节点间存在连接可能性的得分。 .. code:: python class HeteroDotProductPredictor(nn.Module): def forward(self, graph, h, etype): # h是从5.1节中对异构图的每种类型的边所计算的节点表示 with graph.local_scope(): graph.ndata['h'] = h graph.apply_edges(fn.u_dot_v('h', 'h', 'score'), etype=etype) return graph.edges[etype].data['score'] 要执行负采样,用户可以对要进行链接预测的边类型构造一个负采样图。 .. code:: python def construct_negative_graph(graph, k, etype): utype, _, vtype = etype src, dst = graph.edges(etype=etype) neg_src = src.repeat_interleave(k) neg_dst = torch.randint(0, graph.num_nodes(vtype), (len(src) * k,)) return dgl.heterograph( {etype: (neg_src, neg_dst)}, num_nodes_dict={ntype: graph.num_nodes(ntype) for ntype in graph.ntypes}) 该模型与异构图上边分类的模型有些不同,因为用户需要指定在哪种边类型上进行链接预测。 .. code:: python class Model(nn.Module): def __init__(self, in_features, hidden_features, out_features, rel_names): super().__init__() self.sage = RGCN(in_features, hidden_features, out_features, rel_names) self.pred = HeteroDotProductPredictor() def forward(self, g, neg_g, x, etype): h = self.sage(g, x) return self.pred(g, h, etype), self.pred(neg_g, h, etype) 训练的循环部分和同构图时一致。 .. code:: python def compute_loss(pos_score, neg_score): # 间隔损失 n_edges = pos_score.shape[0] return (1 - pos_score.unsqueeze(1) + neg_score.view(n_edges, -1)).clamp(min=0).mean() k = 5 model = Model(10, 20, 5, hetero_graph.etypes) user_feats = hetero_graph.nodes['user'].data['feature'] item_feats = hetero_graph.nodes['item'].data['feature'] node_features = {'user': user_feats, 'item': item_feats} opt = torch.optim.Adam(model.parameters()) for epoch in range(10): negative_graph = construct_negative_graph(hetero_graph, k, ('user', 'click', 'item')) pos_score, neg_score = model(hetero_graph, negative_graph, node_features, ('user', 'click', 'item')) loss = compute_loss(pos_score, neg_score) opt.zero_grad() loss.backward() opt.step() print(loss.item()) ================================================ FILE: docs/source/guide_cn/training-node.rst ================================================ .. _guide_cn-training-node-classification: 5.1 节点分类/回归 -------------------------------------------------- :ref:`(English Version) ` 对于图神经网络来说,最常见和被广泛使用的任务之一就是节点分类。 图数据中的训练、验证和测试集中的每个节点都具有从一组预定义的类别中分配的一个类别,即正确的标注。 节点回归任务也类似,训练、验证和测试集中的每个节点都被标注了一个正确的数字。 概述 ~~~~~~~~ 为了对节点进行分类,图神经网络执行了 :ref:`guide_cn-message-passing` 中介绍的消息传递机制,利用节点自身的特征和其邻节点及边的特征来计算节点的隐藏表示。 消息传递可以重复多轮,以利用更大范围的邻居信息。 编写神经网络模型 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DGL提供了一些内置的图卷积模块,可以完成一轮消息传递计算。 本章中选择 :class:`dgl.nn.pytorch.SAGEConv` 作为演示的样例代码(针对MXNet和PyTorch后端也有对应的模块), 它是GraphSAGE模型中使用的图卷积模块。 对于图上的深度学习模型,通常需要一个多层的图神经网络,并在这个网络中要进行多轮的信息传递。 可以通过堆叠图卷积模块来实现这种网络架构,具体如下所示。 .. code:: python # 构建一个2层的GNN模型 import dgl.nn as dglnn import torch.nn as nn import torch.nn.functional as F class SAGE(nn.Module): def __init__(self, in_feats, hid_feats, out_feats): super().__init__() # 实例化SAGEConve,in_feats是输入特征的维度,out_feats是输出特征的维度,aggregator_type是聚合函数的类型 self.conv1 = dglnn.SAGEConv( in_feats=in_feats, out_feats=hid_feats, aggregator_type='mean') self.conv2 = dglnn.SAGEConv( in_feats=hid_feats, out_feats=out_feats, aggregator_type='mean') def forward(self, graph, inputs): # 输入是节点的特征 h = self.conv1(graph, inputs) h = F.relu(h) h = self.conv2(graph, h) return h 请注意,这个模型不仅可以做节点分类,还可以为其他下游任务获取隐藏节点表示,如: :ref:`guide_cn-training-edge-classification`、 :ref:`guide_cn-training-link-prediction` 和 :ref:`guide_cn-training-graph-classification`。 关于DGL内置图卷积模块的完整列表,读者可以参考 :ref:`apinn`。 有关DGL神经网络模块如何工作,以及如何编写一个自定义的带有消息传递的GNN模块的更多细节,请参考 :ref:`guide_cn-nn` 中的例子。 模型的训练 ~~~~~~~~~~~~~ 全图(使用所有的节点和边的特征)上的训练只需要使用上面定义的模型进行前向传播计算,并通过在训练节点上比较预测和真实标签来计算损失,从而完成后向传播。 本节使用DGL内置的数据集 :class:`dgl.data.CiteseerGraphDataset` 来展示模型的训练。 节点特征和标签存储在其图上,训练、验证和测试的分割也以布尔掩码的形式存储在图上。这与在 :ref:`guide_cn-data-pipeline` 中的做法类似。 .. code:: python node_features = graph.ndata['feat'] node_labels = graph.ndata['label'] train_mask = graph.ndata['train_mask'] valid_mask = graph.ndata['val_mask'] test_mask = graph.ndata['test_mask'] n_features = node_features.shape[1] n_labels = int(node_labels.max().item() + 1) 下面是通过使用准确性来评估模型的一个例子。 .. code:: python def evaluate(model, graph, features, labels, mask): model.eval() with torch.no_grad(): logits = model(graph, features) logits = logits[mask] labels = labels[mask] _, indices = torch.max(logits, dim=1) correct = torch.sum(indices == labels) return correct.item() * 1.0 / len(labels) 用户可以按如下方式实现模型的训练。 .. code:: python model = SAGE(in_feats=n_features, hid_feats=100, out_feats=n_labels) opt = torch.optim.Adam(model.parameters()) for epoch in range(10): model.train() # 使用所有节点(全图)进行前向传播计算 logits = model(graph, node_features) # 计算损失值 loss = F.cross_entropy(logits[train_mask], node_labels[train_mask]) # 计算验证集的准确度 acc = evaluate(model, graph, node_features, node_labels, valid_mask) # 进行反向传播计算 opt.zero_grad() loss.backward() opt.step() print(loss.item()) # 如果需要的话,保存训练好的模型。本例中省略。 `DGL的GraphSAGE样例 `__ 提供了一个端到端的同构图节点分类的例子。用户可以在 ``GraphSAGE`` 类中看到模型实现的细节。 这个模型具有可调节的层数、dropout概率,以及可定制的聚合函数和非线性函数。 .. _guide_cn-training-rgcn-node-classification: 异构图上的节点分类模型的训练 ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 如果图是异构的,用户可能希望沿着所有边类型从邻居那里收集消息。 用户可以使用 :class:`dgl.nn.pytorch.HeteroGraphConv` 模块(针对MXNet和PyTorch后端也有对应的模块)在所有边类型上执行消息传递, 并为每种边类型使用一种图卷积模块。 下面的代码定义了一个异构图卷积模块。模块首先对每种边类型进行单独的图卷积计算,然后将每种边类型上的消息聚合结果再相加, 并作为所有节点类型的最终结果。 .. code:: python # Define a Heterograph Conv model class RGCN(nn.Module): def __init__(self, in_feats, hid_feats, out_feats, rel_names): super().__init__() # 实例化HeteroGraphConv,in_feats是输入特征的维度,out_feats是输出特征的维度,aggregate是聚合函数的类型 self.conv1 = dglnn.HeteroGraphConv({ rel: dglnn.GraphConv(in_feats, hid_feats) for rel in rel_names}, aggregate='sum') self.conv2 = dglnn.HeteroGraphConv({ rel: dglnn.GraphConv(hid_feats, out_feats) for rel in rel_names}, aggregate='sum') def forward(self, graph, inputs): # 输入是节点的特征字典 h = self.conv1(graph, inputs) h = {k: F.relu(v) for k, v in h.items()} h = self.conv2(graph, h) return h ``dgl.nn.HeteroGraphConv`` 接收一个节点类型和节点特征张量的字典作为输入,并返回另一个节点类型和节点特征的字典。 本章的 :ref:`guide_cn-training-heterogeneous-graph-example` 中已经有了 ``user`` 和 ``item`` 的特征,用户可用如下代码获取。 .. code:: python model = RGCN(n_hetero_features, 20, n_user_classes, hetero_graph.etypes) user_feats = hetero_graph.nodes['user'].data['feature'] item_feats = hetero_graph.nodes['item'].data['feature'] labels = hetero_graph.nodes['user'].data['label'] train_mask = hetero_graph.nodes['user'].data['train_mask'] 然后,用户可以简单地按如下形式进行前向传播计算: .. code:: python node_features = {'user': user_feats, 'item': item_feats} h_dict = model(hetero_graph, {'user': user_feats, 'item': item_feats}) h_user = h_dict['user'] h_item = h_dict['item'] 异构图上模型的训练和同构图的模型训练是一样的,只是这里使用了一个包括节点表示的字典来计算预测值。 例如,如果只预测 ``user`` 节点的类别,用户可以从返回的字典中提取 ``user`` 的节点嵌入。 .. code:: python opt = torch.optim.Adam(model.parameters()) for epoch in range(5): model.train() # 使用所有节点的特征进行前向传播计算,并提取输出的user节点嵌入 logits = model(hetero_graph, node_features)['user'] # 计算损失值 loss = F.cross_entropy(logits[train_mask], labels[train_mask]) # 计算验证集的准确度。在本例中省略。 # 进行反向传播计算 opt.zero_grad() loss.backward() opt.step() print(loss.item()) # 如果需要的话,保存训练好的模型。本例中省略。 DGL提供了一个用于节点分类的RGCN的端到端的例子 `RGCN `__ 。用户可以在 `RGCN模型实现文件 `__ 中查看异构图卷积 ``RelGraphConvLayer`` 的具体定义。 ================================================ FILE: docs/source/guide_cn/training.rst ================================================ .. _guide_cn-training: 第5章:训练图神经网络 ===================================================== :ref:`(English Version) ` 概述 -------- 本章通过使用 :ref:`guide_cn-message-passing` 中介绍的消息传递方法和 :ref:`guide_cn-nn` 中介绍的图神经网络模块, 讲解了如何对小规模的图数据进行节点分类、边分类、链接预测和整图分类的图神经网络的训练。 本章假设用户的图以及所有的节点和边特征都能存进GPU。对于无法全部载入的情况,请参考用户指南的 :ref:`guide_cn-minibatch`。 后续章节的内容均假设用户已经准备好了图和节点/边的特征数据。如果用户希望使用DGL提供的数据集或其他兼容 ``DGLDataset`` 的数据(如 :ref:`guide_cn-data-pipeline` 所述), 可以使用类似以下代码的方法获取单个图数据集的图数据。 .. code:: python import dgl dataset = dgl.data.CiteseerGraphDataset() graph = dataset[0] 注意: 本章代码使用PyTorch作为DGL的后端框架。 .. _guide_cn-training-heterogeneous-graph-example: 异构图训练的样例数据 ~~~~~~~~~~~~~~~~~~~~~~~~~ 有时用户会想在异构图上进行图神经网络的训练。本章会以下面代码所创建的一个异构图为例,来演示如何进行节点分类、边分类和链接预测的训练。 这个 ``hetero_graph`` 异构图有以下这些边的类型: - ``('user', 'follow', 'user')`` - ``('user', 'followed-by', 'user')`` - ``('user', 'click', 'item')`` - ``('item', 'clicked-by', 'user')`` - ``('user', 'dislike', 'item')`` - ``('item', 'disliked-by', 'user')`` .. code:: python import numpy as np import torch n_users = 1000 n_items = 500 n_follows = 3000 n_clicks = 5000 n_dislikes = 500 n_hetero_features = 10 n_user_classes = 5 n_max_clicks = 10 follow_src = np.random.randint(0, n_users, n_follows) follow_dst = np.random.randint(0, n_users, n_follows) click_src = np.random.randint(0, n_users, n_clicks) click_dst = np.random.randint(0, n_items, n_clicks) dislike_src = np.random.randint(0, n_users, n_dislikes) dislike_dst = np.random.randint(0, n_items, n_dislikes) hetero_graph = dgl.heterograph({ ('user', 'follow', 'user'): (follow_src, follow_dst), ('user', 'followed-by', 'user'): (follow_dst, follow_src), ('user', 'click', 'item'): (click_src, click_dst), ('item', 'clicked-by', 'user'): (click_dst, click_src), ('user', 'dislike', 'item'): (dislike_src, dislike_dst), ('item', 'disliked-by', 'user'): (dislike_dst, dislike_src)}) hetero_graph.nodes['user'].data['feature'] = torch.randn(n_users, n_hetero_features) hetero_graph.nodes['item'].data['feature'] = torch.randn(n_items, n_hetero_features) hetero_graph.nodes['user'].data['label'] = torch.randint(0, n_user_classes, (n_users,)) hetero_graph.edges['click'].data['label'] = torch.randint(1, n_max_clicks, (n_clicks,)).float() # 在user类型的节点和click类型的边上随机生成训练集的掩码 hetero_graph.nodes['user'].data['train_mask'] = torch.zeros(n_users, dtype=torch.bool).bernoulli(0.6) hetero_graph.edges['click'].data['train_mask'] = torch.zeros(n_clicks, dtype=torch.bool).bernoulli(0.6) 本章路线图 ------------ 本章共有四节,每节对应一种图学习任务。 * :ref:`guide_cn-training-node-classification` * :ref:`guide_cn-training-edge-classification` * :ref:`guide_cn-training-link-prediction` * :ref:`guide_cn-training-graph-classification` * :ref:`guide_cn-training-graph-eweight` .. toctree:: :maxdepth: 1 :hidden: :glob: training-node training-edge training-link training-graph ================================================ FILE: docs/source/guide_ko/data-dataset.rst ================================================ .. _guide_ko-data-pipeline-dataset: 4.1 DGLDataset 클래스 -------------------- :ref:`(English Version) ` :class:`~dgl.data.DGLDataset` 는 :ref:`apidata` 에서 정의된 그래프 데이터셋을 프로세싱하고, 로딩하고 저장하기 위한 기본 클래스이다. 이는 그래프 데이트를 서치하는 기본 파이프라인을 구현한다. 아래 순서도는 파이프라인이 어떻게 동작하는지를 보여준다. .. figure:: https://data.dgl.ai/asset/image/userguide_data_flow.png :align: center DGLDataset 클래스에 정의된 그래프 데이터 입력 파이프라인에 대한 순서도 원격 또는 로컬 디스크에 있는 그래프 데이터셋을 처리하기 위해서, :class:`dgl.data.DGLDataset` 를 상속해서 클래스를 정의하나. 예로, ``MyDataset`` 이라고 하자. ``MyDataset`` 템플릿은 다음과 같다. .. code:: from dgl.data import DGLDataset class MyDataset(DGLDataset): """ Template for customizing graph datasets in DGL. Parameters ---------- url : str URL to download the raw dataset raw_dir : str Specifying the directory that will store the downloaded data or the directory that already stores the input data. Default: ~/.dgl/ save_dir : str Directory to save the processed dataset. Default: the value of `raw_dir` force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information """ def __init__(self, url=None, raw_dir=None, save_dir=None, force_reload=False, verbose=False): super(MyDataset, self).__init__(name='dataset_name', url=url, raw_dir=raw_dir, save_dir=save_dir, force_reload=force_reload, verbose=verbose) def download(self): # download raw data to local disk pass def process(self): # process raw data to graphs, labels, splitting masks pass def __getitem__(self, idx): # get one example by index pass def __len__(self): # number of data examples pass def save(self): # save processed data to directory `self.save_path` pass def load(self): # load processed data from directory `self.save_path` pass def has_cache(self): # check whether there are processed data in `self.save_path` pass :class:`~dgl.data.DGLDataset` 클래스에는 서브클래스에서 꼭 구현되어야 하는 함수들 ``process()`` , ``__getitem__(idx)`` 와 ``__len__()`` 이 있다. 또한 DGL은 저장과 로딩을 구현하는 것을 권장하는데, 그 이유는 큰 데이터셋 처리 시간을 많이 줄일 수 있고, 이를 쉽게 구현하는데 필요한 API들이 있기 때문이다. (:ref:`guide_ko-data-pipeline-savenload` 참고) :class:`~dgl.data.DGLDataset` 의 목적은 그래프 데이터 로드에 필요한 편리하고 표준적인 방법을 제공하는 것이다. 그래프, 피쳐, 레이블, 그리고 데이터셋에 대한 기본적인 정보 (클래스 개수, 레이블 개수 등)을 저장할 수 있다. 샘플링, 파티셔닝 또는 파쳐 normalization과 같은 작업은 :class:`~dgl.data.DGLDataset` 의 서브클래스 밖에서 수행된다. 이 장의 나머지에서는 파이프라인에서 함수를 구현하는 best practice들을 소개한다. ================================================ FILE: docs/source/guide_ko/data-download.rst ================================================ .. _guide_ko-data-pipeline-download: 4.2 Raw 데이터 다운로드하기 (optional) --------------------------------- :ref:`(English Version) ` 로컬 디스크에 데이터셋이 이미 존재한다면, ``raw_dir`` 디렉토리에 있어야 한다. 만약 데이터를 다운로드하고 특정 디렉토리에 옮기는 일을 직접 수행하지 않고 코드를 실행하고 어디서나 실행하고 싶다면, ``download()`` 구현해서 이를 자동화할 수 있다. 데이터셋이 zip 파일 포멧인 경우, zip 파일 추출을 자동을 해주는 :class:`dgl.data.DGLBuiltinDataset` 클래스를 상속해서 ``MyDataset`` 클래스를 만들자. 그렇지 않은 경우 :class:`~dgl.data.QM7bDataset` 처럼 ``download()`` 함수를 직접 구현한다: .. code:: import os from dgl.data.utils import download def download(self): # path to store the file file_path = os.path.join(self.raw_dir, self.name + '.mat') # download file download(self.url, path=file_path) 위 코드는 .mat 파일을 ``self.raw_dir`` 디렉토리에 다운로드한다. 만약 파일 포멧이 .gz, .tar, .tar.gz 또는 .tgz 이라면, :func:`~dgl.data.utils.extract_archive` 함수로 파일들을 추출하자. 다음 코드는 :class:`~dgl.data.BitcoinOTCDataset` 에서 .gz 파일을 다운로드하는 예이다: .. code:: from dgl.data.utils import download, check_sha1 def download(self): # path to store the file # make sure to use the same suffix as the original file name's gz_file_path = os.path.join(self.raw_dir, self.name + '.csv.gz') # download file download(self.url, path=gz_file_path) # check SHA-1 if not check_sha1(gz_file_path, self._sha1_str): raise UserWarning('File {} is downloaded but the content hash does not match.' 'The repo may be outdated or download may be incomplete. ' 'Otherwise you can create an issue for it.'.format(self.name + '.csv.gz')) # extract file to directory `self.name` under `self.raw_dir` self._extract_gz(gz_file_path, self.raw_path) 위 코드는 ``self.raw_dir`` 디렉토리 아래의 ``self.name`` 서브 디렉토리에 파일을 추출한다. 만약 zip 파일을 다루기 위해서 :class:`dgl.data.DGLBuiltinDataset` 를 상속해서 사용했다면, 파일들은 자동으로 ``self.name`` 디렉토리로 추출될 것이다. 추가적으로, 다운로드한 파일에 대한 SHA-1 값 검증을 수행해서 파일이 변경되었는지 확인하는 것도 위 예제처럼 구현할 수 있다. ================================================ FILE: docs/source/guide_ko/data-loadogb.rst ================================================ .. _guide_ko-data-pipeline-loadogb: 4.5 ``ogb`` 패키지를 사용해서 OGB 데이터셋들 로드하기 ------------------------------------------- :ref:`(English Version) ` `Open Graph Benchmark (OGB) `__ 은 벤치마킹 데이터셋의 모음이다. 공식 OGB 패키지 `ogb `__ 는 OBG 데이터셋들을 다운로드해서 :class:`dgl.data.DGLGraph` 객체로 프로세싱하는 API들을 제공한다. 이 절은 기본적인 사용법을 설명한다. 우선 obg 패키지를 pip 명령으로 설치한다. .. code:: pip install ogb 다음 코드는 *Graph Property Prediction* 테스크를 위한 데이터셋 로딩 방법을 보여준다. .. code:: # Load Graph Property Prediction datasets in OGB import dgl import torch from ogb.graphproppred import DglGraphPropPredDataset from dgl.dataloading import GraphDataLoader def _collate_fn(batch): # batch is a list of tuple (graph, label) graphs = [e[0] for e in batch] g = dgl.batch(graphs) labels = [e[1] for e in batch] labels = torch.stack(labels, 0) return g, labels # load dataset dataset = DglGraphPropPredDataset(name='ogbg-molhiv') split_idx = dataset.get_idx_split() # dataloader train_loader = GraphDataLoader(dataset[split_idx["train"]], batch_size=32, shuffle=True, collate_fn=_collate_fn) valid_loader = GraphDataLoader(dataset[split_idx["valid"]], batch_size=32, shuffle=False, collate_fn=_collate_fn) test_loader = GraphDataLoader(dataset[split_idx["test"]], batch_size=32, shuffle=False, collate_fn=_collate_fn) *Node Property Prediction* 데이터셋을 로딩하는 것이 비슷하지만, 이런 종류의 데이터셋은 오직 한 개의 그래프 객체만 존재한다는 것이 다름을 유의하자. .. code:: # Load Node Property Prediction datasets in OGB from ogb.nodeproppred import DglNodePropPredDataset dataset = DglNodePropPredDataset(name='ogbn-proteins') split_idx = dataset.get_idx_split() # there is only one graph in Node Property Prediction datasets g, labels = dataset[0] # get split labels train_label = dataset.labels[split_idx['train']] valid_label = dataset.labels[split_idx['valid']] test_label = dataset.labels[split_idx['test']] *Link Property Prediction* 데이터셋 역시 데이터셋에 한개의 그래프를 갖고 있다. .. code:: # Load Link Property Prediction datasets in OGB from ogb.linkproppred import DglLinkPropPredDataset dataset = DglLinkPropPredDataset(name='ogbl-ppa') split_edge = dataset.get_edge_split() graph = dataset[0] print(split_edge['train'].keys()) print(split_edge['valid'].keys()) print(split_edge['test'].keys()) ================================================ FILE: docs/source/guide_ko/data-process.rst ================================================ .. _guide_ko-data-pipeline-process: 4.3 데이터 프로세싱 --------------- :ref:`(English Version) ` 데이터 프로세싱 코드를 ``process()`` 함수에 구현할 수 있으며, 이때 처리되지 않은 데이터는 ``self.raw_dir`` 디렉토리에 있어야 한다. 그래프 머신러닝에는 일반적으로 3가지 종류의 일이 있다: 그래프 분류, 노드 분류, 그리고 링크 예측. 이 절에서는 이 일들에 관련된 데이터셋 처리 방법을 설명한다. 이 절에서 그래프들, 피쳐들, 그리고 마스크들을 처리하는 표준 방법에 집중해서 알아본다. 빌트인 데이터셋을 예제로 사용할 것이고, 파일로 부터 그래프를 만드는 방법은 생략한다. 하지만, 이와 관련된 구현에 대한 링크를 제공할 것이다. 외부 소스들로 부터 그래프를 만드는 방법에 대한 완벽한 가이드는 :ref:`guide_ko-graph-external` 를 참고하자. 그래프 분류 데이터셋 프로세싱 ~~~~~~~~~~~~~~~~~~~~~~ 그래프 분류 데이터셋은 미니-배치 학습이 사용되는 전형적인 머신러닝 테스크에서 사용되는 데이터셋과 거의 동일하다. 즉, 처리되지 않은 데이터는 :class:`dgl.DGLGraph` 객체들의 리스트와 레이블 텐서들의 리스트로 변환하면 된다. 또한, 만약 처리되지 않은 데이터가 여러 파일들로 나눠져 있을 경우에는, 데이터의 특정 부분을 로드하기 위해서 ``split`` 파라메터를 더할 수 있다. :class:`~dgl.data.QM7bDataset` 를 예로 살펴보자: .. code:: from dgl.data import DGLDataset class QM7bDataset(DGLDataset): _url = 'http://deepchem.io.s3-website-us-west-1.amazonaws.com/' \ 'datasets/qm7b.mat' _sha1_str = '4102c744bb9d6fd7b40ac67a300e49cd87e28392' def __init__(self, raw_dir=None, force_reload=False, verbose=False): super(QM7bDataset, self).__init__(name='qm7b', url=self._url, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose) def process(self): mat_path = self.raw_path + '.mat' # process data to a list of graphs and a list of labels self.graphs, self.label = self._load_graph(mat_path) def __getitem__(self, idx): """ Get graph and label by index Parameters ---------- idx : int Item index Returns ------- (dgl.DGLGraph, Tensor) """ return self.graphs[idx], self.label[idx] def __len__(self): """Number of graphs in the dataset""" return len(self.graphs) ``process()`` 함수에서 처리되지 않은 데이터는 그래프들의 리스트와 레이블들의 리스트로 변환된다. Iteration을 위해서 ``__getitem__(idx)`` 와 ``__len__()`` 를 구현해야 한다. 위의 예제에서와 같이, DGL에서는 ``__getitem__(idx)`` 가 ``(graph, label)`` tuple을 리턴하도록 권장한다. ``self._load_graph()`` 와 ``__getitem__`` 함수의 구체적인 구현은 `QM7bDataset source code `__ 를 확인하자. 데이터셋의 유용한 정보들을 지정하기 위해서 클래스에 프로퍼티들을 추가하는 것이 가능하다. :class:`~dgl.data.QM7bDataset` 에 이 멀티 테스크 데이터셋의 예측 테스트의 총 개숫를 지정하기 위해 ``num_tasks`` 라는 프로퍼티를 추가할 수 있다. .. code:: @property def num_tasks(self): """Number of labels for each graph, i.e. number of prediction tasks.""" return 14 구현 코드를 마친 후에, :class:`~dgl.data.QM7bDataset` 를 다음과 같이 사용한다. .. code:: import dgl import torch from dgl.dataloading import GraphDataLoader # load data dataset = QM7bDataset() num_tasks = dataset.num_tasks # create dataloaders dataloader = GraphDataLoader(dataset, batch_size=1, shuffle=True) # training for epoch in range(100): for g, labels in dataloader: # your training code here pass 그래프 분류 모델 학습에 대한 전체 가이드는 :ref:`guide_ko-training-graph-classification` 를 참고하자. DGL의 빌트인 그래프 분류 데이터셋을 참고하면 그래프 분류 데이터셋의 더 많은 예들을 확인할 수 있다. * :ref:`gindataset` * :ref:`minigcdataset` * :ref:`qm7bdata` * :ref:`tudata` 노드 분류 데이터셋 프로세싱 ~~~~~~~~~~~~~~~~~~~~ 그래프 분류와는 다르게 노드 분류는 일번적으로 단일 그래프에서 이뤄진다. 따라서, 데이터셋의 분할(split)은 그래프 노드에서 일어난다. DGL은 노드 마스크를 사용해서 분할을 지정하는 것을 권장한다. 이 절에서는 빌트인 데이터셋 `CitationGraphDataset `__ 을 예로 들겠다. 추가로, DGL은 노드들와 에지들이 서로 가까운 ID값들이 서로 가까운 범위에 있도록 재배열하는 것을 권장한다. 이 절차는 노드의 neighbor들에 대한 접근성을 향상시켜서, 이 후의 연산 및 그래프에 대한 분석을 빠르게 하기 위함이다. 이를 위해서 DGL은 :func:`dgl.reorder_graph` API를 제공한다. 더 자세한 내용은 다음 예제의 ``process()`` 를 참고하자. .. code:: from dgl.data import DGLBuiltinDataset from dgl.data.utils import _get_dgl_url class CitationGraphDataset(DGLBuiltinDataset): _urls = { 'cora_v2' : 'dataset/cora_v2.zip', 'citeseer' : 'dataset/citeseer.zip', 'pubmed' : 'dataset/pubmed.zip', } def __init__(self, name, raw_dir=None, force_reload=False, verbose=True): assert name.lower() in ['cora', 'citeseer', 'pubmed'] if name.lower() == 'cora': name = 'cora_v2' url = _get_dgl_url(self._urls[name]) super(CitationGraphDataset, self).__init__(name, url=url, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose) def process(self): # Skip some processing code # === data processing skipped === # build graph g = dgl.graph(graph) # splitting masks g.ndata['train_mask'] = train_mask g.ndata['val_mask'] = val_mask g.ndata['test_mask'] = test_mask # node labels g.ndata['label'] = torch.tensor(labels) # node features g.ndata['feat'] = torch.tensor(_preprocess_features(features), dtype=F.data_type_dict['float32']) self._num_tasks = onehot_labels.shape[1] self._labels = labels # reorder graph to obtain better locality. self._g = dgl.reorder_graph(g) def __getitem__(self, idx): assert idx == 0, "This dataset has only one graph" return self._g def __len__(self): return 1 분류 데이터셋 프로세싱 코드의 중요한 부분(마스크 분할하기)을 강조하기 위해서 ``process()`` 함수의 코드 일부는 생략해서 간략하게 만들었다. 일반적으로 노드 분류 테스크에서 하나의 그래프만 사용되기 때문에, ``__getitem__(idx)`` 와 ``__len__()`` 함수 구현이 바뀐 점을 알아두자. 마스크는 PyTorch와 TensorFlow에서는 ``bool tensors`` 이고 MXNet에서는 ``float tensors`` 이다. 다음 예는 ``CitationGraphDataset`` 의 서브 클래스인 :class:`dgl.data.CiteseerGraphDataset` 를 사용하는 방법이다. .. code:: # load data dataset = CiteseerGraphDataset(raw_dir='') graph = dataset[0] # get split masks train_mask = graph.ndata['train_mask'] val_mask = graph.ndata['val_mask'] test_mask = graph.ndata['test_mask'] # get node features feats = graph.ndata['feat'] # get labels labels = graph.ndata['label'] 노드 분류 모델에 대한 전체 가이드는 :ref:`guide_ko-training-node-classification` 를 참고하자. DGL의 빌트인 데이터셋들은 노드 분류 데이터셋의 여러 예제들을 포함하고 있다. * :ref:`citationdata` * :ref:`corafulldata` * :ref:`amazoncobuydata` * :ref:`coauthordata` * :ref:`karateclubdata` * :ref:`ppidata` * :ref:`redditdata` * :ref:`sbmdata` * :ref:`sstdata` * :ref:`rdfdata` 링크 예측 데이터셋 프로세싱 ~~~~~~~~~~~~~~~~~~~~ 링크 예측 데이테셋을 프로세싱하는 것은 주로 데이터셋에 하나의 그래프만 있기 때문에, 노드 분류의 경우와 비슷하다. 예제로 `KnowledgeGraphDataset `__ 빌트인 데이터셋을 사용하는데, 링크 예측 데이터셋 프로세싱의 주요 부분을 강조하기 위해서 자세한 데이터 프로세싱 코드는 생략했다. .. code:: # Example for creating Link Prediction datasets class KnowledgeGraphDataset(DGLBuiltinDataset): def __init__(self, name, reverse=True, raw_dir=None, force_reload=False, verbose=True): self._name = name self.reverse = reverse url = _get_dgl_url('dataset/') + '{}.tgz'.format(name) super(KnowledgeGraphDataset, self).__init__(name, url=url, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose) def process(self): # Skip some processing code # === data processing skipped === # splitting mask g.edata['train_mask'] = train_mask g.edata['val_mask'] = val_mask g.edata['test_mask'] = test_mask # edge type g.edata['etype'] = etype # node type g.ndata['ntype'] = ntype self._g = g def __getitem__(self, idx): assert idx == 0, "This dataset has only one graph" return self._g def __len__(self): return 1 위 코드에서 볼 수 있듯이 분할 마스크들을 그래프의 ``edata`` 필드에 추가한다. 전체 구현은 `KnowledgeGraphDataset 소스 코드 `__ 를 참고하자. .. code:: from dgl.data import FB15k237Dataset # load data dataset = FB15k237Dataset() graph = dataset[0] # get training mask train_mask = graph.edata['train_mask'] train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze() src, dst = graph.edges(train_idx) # get edge types in training set rel = graph.edata['etype'][train_idx] 링크 예측 모델에 대한 전체 가이드는 :ref:`guide_ko-training-link-prediction` 에 있다. DGL의 빌트인 데이터셋들은 링크 예측 데이터셋의 여러 예제들을 포함하고 있다. * :ref:`kgdata` * :ref:`bitcoinotcdata` ================================================ FILE: docs/source/guide_ko/data-savenload.rst ================================================ .. _guide_ko-data-pipeline-savenload: 4.4 데이터 저장과 로딩 ------------------ :ref:`(English Version) ` DGL에서는 프로세싱된 데이터를 로컬 디스크에 임시로 저장하기 위해 저장 및 로딩 함수를 구현할 것을 권장한다. 이는 대부분의 경우에 데이터 프로세싱 시간을 상당히 절약할 수 있게한다. DGL은 이를 간단하게 구현하기 위한 4가지 함수를 제공한다: - :func:`dgl.save_graphs` 와 :func:`dgl.load_graphs` : DGLGraph 객체와 레이블을 로컬 디스크로 저장/로딩함 - :func:`dgl.data.utils.save_info` 와 :func:`dgl.data.utils.load_info` : 데이터셋에 대한 유용한 정보(python의 ``dict`` 객체)를 로컬 디스크로 저장/로딩함 다음 예는 그래프들의 리스트와 데이터셋 정보를 저장하는 것을 보여준다. .. code:: import os from dgl import save_graphs, load_graphs from dgl.data.utils import makedirs, save_info, load_info def save(self): # save graphs and labels graph_path = os.path.join(self.save_path, self.mode + '_dgl_graph.bin') save_graphs(graph_path, self.graphs, {'labels': self.labels}) # save other information in python dict info_path = os.path.join(self.save_path, self.mode + '_info.pkl') save_info(info_path, {'num_classes': self.num_classes}) def load(self): # load processed data from directory `self.save_path` graph_path = os.path.join(self.save_path, self.mode + '_dgl_graph.bin') self.graphs, label_dict = load_graphs(graph_path) self.labels = label_dict['labels'] info_path = os.path.join(self.save_path, self.mode + '_info.pkl') self.num_classes = load_info(info_path)['num_classes'] def has_cache(self): # check whether there are processed data in `self.save_path` graph_path = os.path.join(self.save_path, self.mode + '_dgl_graph.bin') info_path = os.path.join(self.save_path, self.mode + '_info.pkl') return os.path.exists(graph_path) and os.path.exists(info_path) 단, 프로세싱된 데이터를 저장하는 것이 적합하지 않은 경우도 있다. 예를 들어, 빌트인 데이터셋 중 :class:`~dgl.data.GDELTDataset` 의 경우 프로세스된 데이터가 굉장히 크기 때문에 ``__getitem__(idx)`` 에서 각 데이터 예제들을 처리하는 것이 더 효율적이다. ================================================ FILE: docs/source/guide_ko/data.rst ================================================ .. _guide_ko-data-pipeline: 4장: 그래프 데이터 파이프라인 ====================== :ref:`(English Version) ` DGL은 :ref:`apidata` 에서 일반적으로 많이 사용되는 그래프 데이터셋을 구현하고 있다. 이것들은 :class:`dgl.data.DGLDataset` 클래스에서 정의하고 있는 표준 파이프라인을 따른다. DGL은 :class:`dgl.data.DGLDataset` 의 서브클래스로 그래프 데이터 프로세싱하는 것을 강하게 권장한다. 이는 파이프라인이 그래프 데이터를 로딩하고, 처리하고, 저장하는데 대한 간단하고 깔끔한 방법을 제공하기 때문이다. 로드맵 ---- 이 장은 커스텀 DGL-Dataset를 만드는 방법을 소개한다. 이를 위해 다음 절들에서 파이프라인이 어떻게 동작하는지 설명하고, 각 파이프라인의 컴포넌트를 구현하는 방법을 보여준다. * :ref:`guide_ko-data-pipeline-dataset` * :ref:`guide_ko-data-pipeline-download` * :ref:`guide_ko-data-pipeline-process` * :ref:`guide_ko-data-pipeline-savenload` * :ref:`guide_ko-data-pipeline-loadogb` .. toctree:: :maxdepth: 1 :hidden: :glob: data-dataset data-download data-process data-savenload data-loadogb ================================================ FILE: docs/source/guide_ko/distributed-apis.rst ================================================ .. _guide_ko-distributed-apis: 7.2 분산 APIs -------------------- :ref:`(English Version) ` 이 절은 학습 스크립트에 사용할 분산 API들을 다룬다. DGL은 초기화, 분산 샘플링, 그리고 워크로드 분할(split)을 위한 세가지 분산 데이터 구조와 다양한 API들을 제공한다. 분산 학습/추론에 사용되는 세가지 분산 자료 구조는 분산 그래프를 위한 :class:`~dgl.distributed.DistGraph` , 분산 텐서를 위한 :class:`~dgl.distributed.DistTensor` , 그리고 분산 learnable 임베딩을 위한 :class:`~dgl.distributed.DistEmbedding` 이다. DGL 분산 모듈 초기화 ~~~~~~~~~~~~~~~~ :func:`~dgl.distributed.initialize` 은 분산 모듈을 초기화한다. 학습 스크립트가 학습 모드로 수행되면, 이 API는 DGL 서버들간의 연결을 만들고, 샘플러 프로세스들을 생성한다; 스크립트가 서버 모드로 실행되면, 이 API는 서버 코드를 실행하고 절대로 리턴되지 않는다. 이 API는 어떤 DGL 분산 API들 보다 먼저 호출되어야 한다. PyTorch와 함께 사용될 때, :func:`~dgl.distributed.initialize` 는 ``torch.distributed.init_process_group`` 전에 호출되어야 한다. 일반적으로 초기화 API들은 다음 순서로 실행된다. .. code:: python dgl.distributed.initialize('ip_config.txt') th.distributed.init_process_group(backend='gloo') Distributed 그래프 ~~~~~~~~~~~~~~~~~ :class:`~dgl.distributed.DistGraph` 는 클러스터에서 그래프 구조와 노드/에지 피쳐들을 접근하기 위한 Python 클래스이다. 각 컴퓨터는 단 하나의 파티션을 담당한다. 이 클래스는 파티션 데이터(그 파티션의 그래프 구조, 노드 데이터와 에지 데이터)를 로드하고, 클러스터의 모든 트레이너들이 접근할 수 있도록 만들어 준다. :class:`~dgl.distributed.DistGraph` 는 데이터 접근을 위한 :class:`~dgl.DGLGraph` API들의 작은 서브셋을 지원한다. **Note**: :class:`~dgl.distributed.DistGraph` 는 현재 한 개의 노드 타입과 한 개의 에지 타입만을 지원한다. 분산 모드 vs. 단독(standalone) 모드 ^^^^^^^^^^^^^^^^^^ :class:`~dgl.distributed.DistGraph` 는 두가지 모드로 실행된다: 분산 모드와 단독 모드. 사용자가 학습 스크립트를 Python 명령행이나 Jupyter notebook에서 실행하면, 단독 모드로 수행된다. 즉, 모든 계산이 단일 프로세스에서 수행되고, 다른 어떤 프로세스들과의 통신이 없다. 따라서, 단독 모드에서는 입력 그래프가 한 개의 파티션이다. 이 모드는 주로 개발 및 테스트를 위해서 사용된다 (즉, Jupyter notebook에서 코드를 개발하고 수행할 때). 학습 스크립트가 launch 스크립트를 사용해서 실행되면 (launch 스크립트 섹션 참조), :class:`~dgl.distributed.DistGraph` 가 분산 모드로 동작한다. Launch 툴은 자동으로 (노드/에지 피쳐 접근 및 그래프 샘플링을 하는) 서버들을 구동하고, 클러스터의 각 컴퓨터에 파티션 데이터를 자동으로 로드한다. :class:`~dgl.distributed.DistGraph` 는 클러스터의 서버들과 네트워크를 통해서 연결한다. DistGraph 생성 ^^^^^^^^^^^^^ 분산 모드에서는, :class:`~dgl.distributed.DistGraph` 를 생성할 때 파티션에서 사용된 그래프 이름이 필요하다. 그래프 이름은 클러스터에서 로드될 그래프를 지정한다. .. code:: python import dgl g = dgl.distributed.DistGraph('graph_name') 단독 모드로 수행될 때, 로컬 머신의 그래프 데이터를 로드한다. 따라서, 사용자는 입력 그래프에 대한 모든 정보를 담고 있는 파티션 설정 파일을 제공해야 한다. .. code:: python import dgl g = dgl.distributed.DistGraph('graph_name', part_config='data/graph_name.json') **Note**: DGL의 현재 구현은 `DistGraph` 객체를 한 개만 만들 수 있다. `DistGraph` 를 없애고 새로운 것을 다시 만드는 것은 정의되어 있지 않다. 그래프 구조 접근 ^^^^^^^^^^^^ :class:`~dgl.distributed.DistGraph` 는 그래프 구조 접근을 위한 적은 수의 API들을 갖고 있다. 현재 대부분 API들은 노드 및 에지 수와 같은 그래프 정보를 제공한다. DistGraph의 주요 사용 케이스는 미니-배치 학습을 지원하기 위한 샘플링 API를 수행하는 것이다. (분산 그래프 샘플링은 섹션 참조) .. code:: python print(g.num_nodes()) 노드/에지 데이터 접근 ^^^^^^^^^^^^^^^^ :class:`~dgl.DGLGraph` 처럼 :class:`~dgl.distributed.DistGraph` 는 노드와 에지의 데이터 접근을 위해서 ``ndata`` 와 ``edata`` 를 제공한다. 차이점은 :class:`~dgl.distributed.DistGraph` 의 ``ndata`` / ``edata`` 는 사용되는 프레임워크의 텐서 대신 :class:`~dgl.distributed.DistTensor` 를 리턴한다는 것이다. 사용자는 새로운 :class:`~dgl.distributed.DistTensor` 를 :class:`~dgl.distributed.DistGraph` 노드 데이터 또는 에지 데이터로서 할당할 수 있다. .. code:: python g.ndata['train_mask'] # g.ndata['train_mask'][0] # tensor([1], dtype=torch.uint8) 분산 텐서(Distributed Tensor) ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 앞에서 언급했듯이, DGL은 노드/에치 피쳐들을 샤드(shard)해서, 머신들의 클러스터에 이것들을 저장한다. DGL은 클러스터에서 파티션된 노드/에지 피쳐들을 접근하기 위해서 tensor-like 인터패이스를 갖는 분산 텐서를 제공한다. 분산 세팅에서 DGL은 덴스 노드/에지 피쳐들만 지원한다. :class:`~dgl.distributed.DistTensor` 는 파티션되어 여러 머신들에 저장되어 있는 덴스 텐서들을 관리한다. 지금은 부산 텐서는 그래프의 노드 또는 에지와 연결되어 있어야만 한다. 다르게 말하자면, `DistTensor` 의 행 개수는 그래프의 노드 개수 또는 에지의 개수과 같아야만 한다. 아래 코드는 분산 텐서를 생성하고 있다. `shape` 과 `dtype` 뿐만아니라, 유일한 텐서 이름을 지정할 수 있다. 사용자가 영속적인 분산 텐서를 참고하고자 할 경우 이 이름은 유용하다 (즉, :class:`~dgl.distributed.DistTensor` 객체가 사라져도 클러스터에 존재하는 텐서). .. code:: python tensor = dgl.distributed.DistTensor((g.num_nodes(), 10), th.float32, name='test') **Note**: :class:`~dgl.distributed.DistTensor` 생성은 동기화 수행이다. 모든 트레이너들은 생성을 실행해야하고, 모든 트레이너가 이를 호출한 경우에만 생성이 완료된다. 사용자는 :class:`~dgl.distributed.DistTensor` 를 노드 데이터 또는 에지 데이터의 하나로서 :class:`~dgl.distributed.DistGraph` 객체에 추가할 수 있다. .. code:: python g.ndata['feat'] = tensor **Note**: 노드 데이터 이름과 텐서 이름이 같을 필요는 없다. 전자는 :class:`~dgl.distributed.DistGraph` 로부터 노드 데이터를 구별하고(트레이너 프로세스에서), 후자는 DGL 서버들에서 분산 텐서를 구별하는데 사용된다. :class:`~dgl.distributed.DistTensor` 는 적은 수의 함수들을 제공한다. 이는 일반 텐서가 `shape` 또는 `dtype` 과 같은 메타데이터를 접근하는 것과 같은 API들이다. :class:`~dgl.distributed.DistTensor` 는 인덱스를 사용한 읽기와 쓰기를 지원하지만, `sum` 또는 `mean` 과 같은 연산 오퍼레이터는 지원하지 않는다. .. code:: python data = g.ndata['feat'][[1, 2, 3]] print(data) g.ndata['feat'][[3, 4, 5]] = data **Note**: 현재 DGL은 한 머신이 여러 서버들을 수행할 때, 다중의 서버들이 동시에 쓰기를 동시에 수행하는 경우에 대한 보호를 지원하지 않는다. 이 경우 데이터 깨짐(data corruption)이 발생할 수 있다. 같은 행의 데이터에 동시 쓰기를 방지하는 방법 중에 하나로 한 머신에서 한 개의 서버 프로세스만 실행하는 것이다. 분산 DistEmbedding ~~~~~~~~~~~~~~~~~ DGL은 노드 임베딩들을 필요로 하는 변환 모델(transductive models)을 지원하기 위해서 :class:`~dgl.distributed.DistEmbedding` 를 제공한다. 분산 임베딩을 생성하는 것은 분산 텐서를 생성하는 것과 비슷하다. .. code:: python def initializer(shape, dtype): arr = th.zeros(shape, dtype=dtype) arr.uniform_(-1, 1) return arr emb = dgl.distributed.DistEmbedding(g.num_nodes(), 10, init_func=initializer) 내부적으로는 분산 임배딩은 분산 텐서를 사용해서 만들어진다. 따라서, 분산 텐서와 비슷하게 동작한다. 예를 들어, 임베딩이 만들어지면, 그것들은 클러스터의 여러 머신들에 나눠져서(shard) 저장된다. 이는 이름을 통해서 고유하게 식별될 수 있다. **Note**: 초기화 함수가 서버 프로세스에서 호출된다. 따라서, :class:`~dgl.distributed.initialize` 전에 선언되야 한다. 임배딩은 모델의 일부이기 때문에, 미니배치 학습을 위해서 이를 optimizer에 붙여줘야 한다. 현재는, DGL은 sparse Adagrad optimizer, :class:`~dgl.distributed.SparseAdagrad` 를 지원한다 (DGL은 sparse 임베딩을 위핸 더 많은 optimizer들을 추가할 예정이다). 사용자는 모델로 부터 모든 분산 임베딩을 수집하고, 이를 sparse optimizer에 전달해야 한다. 만약 모델이 노드 임베딩과 정상적인 dense 모델 파라메터들을 갖고, 사용자가 임베딩들에 sparse 업데이트를 수행하고 싶은 경우, optimizer 두 개를 만들어야 한다. 하나는 노드 임베딩을 위한 것이고, 다른 하나는 dense model 파라메터들을 위한 것이다. 다음 코드를 보자. .. code:: python sparse_optimizer = dgl.distributed.SparseAdagrad([emb], lr=lr1) optimizer = th.optim.Adam(model.parameters(), lr=lr2) feats = emb(nids) loss = model(feats) loss.backward() optimizer.step() sparse_optimizer.step() **Note**: :class:`~dgl.distributed.DistEmbedding` 는 PyTorch nn 모듈이 아니다. 따라서, PyTorch nn 모듈의 파라메터들을 통해서 접근할 수 없다. 분산 샘플링 ~~~~~~~~ DGL은 미니-배치를 생성하기 위해 노드 및 에지 샘플링을 하는 두 수준의 API를 제공한다 (미니-배치 학습 섹션 참조). Low-level API는 노드들의 레이어가 어떻게 샘플링될지를 명시적으로 정의하는 코드를 직접 작성해야한다 (예를 들면, :func:`dgl.sampling.sample_neighbors` 사용해서). High-level API는 노드 분류 및 링크 예측(예, :class:`~dgl.dataloading.pytorch.NodeDataLoader` 와 :class:`~dgl.dataloading.pytorch.EdgeDataLoader`) 에 사용되는 몇 가지 유명한 샘플링 알고리즘을 구현하고 있다. 분산 샘플링 모듈도 같은 디자인을 따르고 있고, 두 level의 샘플링 API를 제공한다. Low-level 샘플링 API의 경우, :class:`~dgl.distributed.DistGraph` 에 대한 분산 이웃 샘플링을 위해 :func:`~dgl.distributed.sample_neighbors` 가 있다. 또한, DGL은 분산 샘플링을 위해 분산 데이터 로더, :class:`~dgl.distributed.DistDataLoader` 를 제공한다. 분산 DataLoader는 PyTorch DataLoader와 같은 인터페이스를 갖는데, 다른 점은 사용자가 데이터 로더를 생성할 때 worker 프로세스의 개수를 지정할 수 없다는 점이다. Worker 프로세스들은 :func:`dgl.distributed.initialize` 에서 만들어진다. **Note**: :class:`~dgl.distributed.DistGraph` 에 :func:`dgl.distributed.sample_neighbors` 를 실행할 때, 샘플러는 다중의 worker 프로세스를 갖는 PyTorch DataLoader에서 실행될 수 없다. 주요 이유는 PyTorch DataLoader는 매 epoch 마다 새로운 샘플링 worker 프로세스는 생성하는데, 이는 :class:`~dgl.distributed.DistGraph` 객체들을 여러번 생성하고 삭제하게하기 때문이다. Low-level API를 사용할 때, 샘플링 코드는 단일 프로세스 샘플링과 비슷하다. 유일한 차이점은 사용자가 :func:`dgl.distributed.sample_neighbors` 와 :class:`~dgl.distributed.DistDataLoader` 를 사용한다는 것이다. .. code:: python def sample_blocks(seeds): seeds = th.LongTensor(np.asarray(seeds)) blocks = [] for fanout in [10, 25]: frontier = dgl.distributed.sample_neighbors(g, seeds, fanout, replace=True) block = dgl.to_block(frontier, seeds) seeds = block.srcdata[dgl.NID] blocks.insert(0, block) return blocks dataloader = dgl.distributed.DistDataLoader(dataset=train_nid, batch_size=batch_size, collate_fn=sample_blocks, shuffle=True) for batch in dataloader: ... 동일한 high-level 샘플링 API들(:class:`~dgl.dataloading.pytorch.NodeDataLoader` 와 :class:`~dgl.dataloading.pytorch.EdgeDataLoader` )이 :class:`~dgl.DGLGraph` 와 :class:`~dgl.distributed.DistGraph` 에 대해서 동작한다. :class:`~dgl.dataloading.pytorch.NodeDataLoader` 과 :class:`~dgl.dataloading.pytorch.EdgeDataLoader` 를 사용할 때, 분산 샘플링 코드는 싱글-프로세스 샘플링 코드와 정확하게 같다. .. code:: python sampler = dgl.sampling.MultiLayerNeighborSampler([10, 25]) dataloader = dgl.sampling.DistNodeDataLoader(g, train_nid, sampler, batch_size=batch_size, shuffle=True) for batch in dataloader: ... 워크로드 나누기(Split workloads) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 모델을 학습하기 위해서, 사용자는 우선 데이터를 학습, 검증 그리고 테스트 셋으로 나눠야한다. 분산 학습에서는, 이 단계가 보통은 그래프를 파터션하기 위해 :func:`dgl.distributed.partition_graph` 를 호출하기 전에 일어난다. 우리는 데이터 split를 노드 데이 또는 에지 데이터로서 boolean array들에 저장하는 것을 권장한다. 노드 분류 테스크의 경우에 이 boolean array들의 길이는 그래프의 노드의 개수와 같고, 각 원소들은 노드가 학습/검증/테스트 셋에 속하는지를 지정한다. 링크 예측 테스크에도 비슷한 boolean array들을 사용해야 한다. :func:`dgl.distributed.partition_graph` 는 그래프 파티션 결과에 따라서 이 boolean array들을 나누고, 이를 그래프 파타션과 함께 저장한다. 분산 학습을 수행하는 동안에 사용자는 학습 노드들/에지들을 각 트레이너에게 할당해야 한다. 비슷하게, 검증 및 테스트 셋도 같은 방법으로 나눠야만 한다. DGL은 분산학습이 수행될 때 학습, 검증, 테스트 셋을 나누는 :func:`~dgl.distributed.node_split` 와 :func:`~dgl.distributed.edge_split` 를 제공한다. 이 두 함수는 그래프 파티셔닝 전에 생성된 boolean array들을 입력으로 받고, 그것들을 나누고 나눠진 부분을 로컬 트레이너에게 리턴한다. 기본 설정으로는 모든 부분들이 같은 개수의 노드와 에지를 갖도록 해준다. 이는 각 트레이너가 같은 크기의 미니-배치들을 갖는다고 가정하는 synchronous SDG에서 중요하다. 아래 예제는 학습 셋을 나누고, 노들의 서브셋을 로컬 프로세스에 리턴한다. .. code:: python train_nids = dgl.distributed.node_split(g.ndata['train_mask']) ================================================ FILE: docs/source/guide_ko/distributed-hetero.rst ================================================ .. _guide_ko-distributed-hetero: 7.3 분산 heterogeneous 그래프 학습하기 --------------------------------- :ref:`(English Version) ` DGL v0.6.0은 heterogeneous 그래프들을 위한 분산 학습을 실험적으로 지원한다. DGL에서 heterogeneous 그래프의 노드와 에지는 그 노드 타입 및 에지 타입에서 고유한 ID를 갖는다. DGL은 노드/에지 타입과 타입별 ID의 tuple을 사용해서 노드 및 에지를 지정한다. 분산 학습에서는 노드/에지 타입과 타입별 ID의 tuple과 더불어서 노드 또는 에지는 homogeneous ID를 통해서 지정될 수 있다. Homogeneous ID는 노드 타입이나 에지 타입과 관련없이 고유하다. DGL은 같은 타입의 모든 노드들이 연속된 homogeneous ID값들을 갖도록 노드와 에지를 정렬한다. 아래 그림은 homegeneous ID 할당을 보여주는 heterogeneous 그래프의 adjacency matrix이다. 여기서 그래프틑 두가지 노드 타입( `T0` 와 `T1` )을, 네가지 에지 타입(`R0` , `R1` , `R2` , `R3` )를 갖는다. 그래프는 총 400개의 노드를 갖고, 각 타입은 200개 노드를 갖는다. `T0` 의 노드들은 [0,200)의 ID를 갖고, `T1` 의 노드들은 [200, 400)의 ID 값을 갖는다. 여기서 만약 tuple을 사용해서 노드를 구분한다면, `T0` 의 노드들은 (T0, type-wise ID)로 지정될 수 있다. 여기서 type-wise ID는 [0,200)에 속한다; `T1` 의 노드들은 (T1, type-wise ID)으로 지정되고, type-wise ID는 [0, 200)에 속한다. .. figure:: https://data.dgl.ai/tutorial/hetero/heterograph_ids.png :alt: Imgur 7.3.1 분산 그래프 데이터 접근하기 ^^^^^^^^^^^^^^^^^^^^^^^^^^ 분산 학습을 위해 :class:`~dgl.distributed.DistGraph` 은 :class:`~dgl.DGLGraph` 에서 heterogeneous 그래프 API를 지원한다. 아래 코드는 `T0` 의 노드 데이터를 type-wise 노드 ID를 사용해서 얻는 것을 보여준다. :class:`~dgl.DGLGraph` 의 데이터를 접근할 때, 사용자는 type-wise ID와 연관된 노드 타입 또는 에지 타입을 사용해야 한다. .. code:: python import dgl g = dgl.distributed.DistGraph('graph_name', part_config='data/graph_name.json') feat = g.nodes['T0'].data['feat'][type_wise_ids] 사용자는 특정 노드 타입 또는 에지 타입에 대한 분산 텐서 및 분산 임베딩을 생성할 수 있다. 분산 텐서들과 분산 임베딩들은 여러 머신에 나눠져서 저장된다. 만들 때는 :class:`~dgl.distributed.PartitionPolicy` 로 파티션을 어떻게 할지를 명시해야 한다. 기본 설정으로 DGL은 첫 차원 값의 크기를 기반으로 적절한 파티션 정책을 선택한다. 하지만, 다중 노드 타입 또는 에지 타입이 같은 수의 노드 또는 에지를 갖는 다면, DGL은 파티션 정책을 자동으로 결정할 수 없고, 사용자는 직접 파티션 정책을 지정해야 한다. 아래 코드는 노드 타입 `T0` 의 분산 텐서를 `T0` 를 위한 파티션 정책을 사용해서 생성하고, 이를 `T0` 의 노드 데이터로 저장한다. .. code:: python g.nodes['T0'].data['feat1'] = dgl.distributed.DistTensor((g.num_nodes('T0'), 1), th.float32, 'feat1', part_policy=g.get_node_partition_policy('T0')) 분산 텐서 및 분산 임베딩을 만들기 위한 파티션 정책은 heterogeneous 그래프가 그래프 서버에 로드될 때 초기화된다. 사용자는 새로운 파티션 정책을 실행 중에 생성할 수 없다. 따라서, 사용자는 노드 타입 이나 에지 타입에 대한 분산 텐서 또는 분산 임베딩 만을 만들 수 있다. 7.3.2 분산 샘플링 ^^^^^^^^^^^^^^ DGL v0.6은 분산 샘플링에서 homogeneous ID를 사용한다. **Note**: 이는 앞으로 릴리즈에서 바뀔 수도 있다. DGL은 homogeneous ID와 type-wise ID 간에 노드 ID와 에지 ID를 변환하는 네 개의 API를 제공한다. * :func:`~dgl.distributed.GraphPartitionBook.map_to_per_ntype` : homogeneous 노드 ID를 type-wise ID와 노드 타입 ID로 변환한다. * :func:`~dgl.distributed.GraphPartitionBook.map_to_per_etype` : homogeneous 에지 ID를 type-wise ID와 에지 타입 ID로 변환한다. * :func:`~dgl.distributed.GraphPartitionBook.map_to_homo_nid` : type-wise ID와 노드 타입을 homogeneous 노드 ID로 변환한다. * :func:`~dgl.distributed.GraphPartitionBook.map_to_homo_eid` : type-wise ID와 에지 타입을 homogeneous 에지 ID로 변환한다. 다음 예제는 `paper` 라는 노드 타입을 갖는 heterogeneous 그래프로부터 :func:`~dgl.distributed.sample_neighbors` 를 사용해서 서브 그래프를 샘플링한다. 이는 우선 type-wise 노드 ID들을 homogeneous 노드 ID들로 변환한다. 시드 노드들로 서브 그래프를 샘플링 한 다음, homogeneous 노드 ID들과 에지 ID들을 type-wise ID들로 바꾸고, 타입 ID를 노드 데이터와 에지 데이터에 저장한다. .. code:: python gpb = g.get_partition_book() # We need to map the type-wise node IDs to homogeneous IDs. cur = gpb.map_to_homo_nid(seeds, 'paper') # For a heterogeneous input graph, the returned frontier is stored in # the homogeneous graph format. frontier = dgl.distributed.sample_neighbors(g, cur, fanout, replace=False) block = dgl.to_block(frontier, cur) cur = block.srcdata[dgl.NID] block.edata[dgl.EID] = frontier.edata[dgl.EID] # Map the homogeneous edge Ids to their edge type. block.edata[dgl.ETYPE], block.edata[dgl.EID] = gpb.map_to_per_etype(block.edata[dgl.EID]) # Map the homogeneous node Ids to their node types and per-type Ids. block.srcdata[dgl.NTYPE], block.srcdata[dgl.NID] = gpb.map_to_per_ntype(block.srcdata[dgl.NID]) block.dstdata[dgl.NTYPE], block.dstdata[dgl.NID] = gpb.map_to_per_ntype(block.dstdata[dgl.NID]) 노드/에지 타입 ID를 위해서, 사용자는 노드/에지 타입을 검색할 수 있다. 예를 들어, `g.ntypes[node_type_id]` . 노드/에지 타입들과 type-wise ID들을 사용해서, 사용자는 미니배치 계산을 위해서 `DistGraph` 로부터 노드/에지 데이터를 검색할 수 있다. ================================================ FILE: docs/source/guide_ko/distributed-preprocessing.rst ================================================ .. _guide_ko-distributed-preprocessing: 7.1 분산 학습을 위한 전처리 --------------------- :ref:`(English Version) ` DGL의 분산 학습을 사용하기 위해서는 그래프 데이터에 대한 전처리가 필요하다. 이 전처리는 두 단계로 구성된다: 1) 그래프를 서브 그래프들로 파티션하기, 2) 노드/에지들에 새로운 ID를 부여하기. 상대적으로 작은 그래프들의 경우, DGL이 제공하는 파티셔닝 API :func:`dgl.distributed.partition_graph` 를 사용해서 위 두 단계를 수행할 수 있다. 이 API는 한 컴퓨터에서 수행된다. 따라서, 그래프가 큰 경우, 이 API를 사용하고 싶다면 큰 컴퓨터를 사용해야 한다. 이 API과 더불어, 여기서는 큰 그래프를 컴퓨터들의 클러스터에서 파티션을 하는 솔루션을 소개한다. (7.1.1 절을 보라) :func:`dgl.distributed.partition_graph` 는 랜덤 파티션과 `Metis `__ 기반의 파티셔닝을 모두 지원한다. Metis 파티셔닝의 장점은 최소의 에지 컷(edge cut)을 갖는 파티션들을 만들 수 있다는 것이다. 이는 분산 학습 및 추론에서 네트워크 통신을 줄여준다. DGL은 최신 버전의 Metis은 실제(real world)에서 거듭 제곱 법칙의 분포를 갖는 그래프에 최적화되어 있다. 파타셔닝 후, API는 학습시 쉽게 로딩될 수 있는 형태로 파티션된 결과를 만든다. 기본 설정으로 파티션 API는 분산 학습/추론이 실행될 때 노드/에지를 구별하는 것을 돕기 위해서 입력 그래프의 노드와 에지에 새로운 ID를 부여한다. ID를 할당한 후, 파티션 API은 모든 노드 데이터와 에지 데이터를 섞는다. 파티션된 서브 그래프를 생선한 후, 각 서브 그래프는 ``DGLGraph`` 객체로 저장된다. 섞기전의 원본 노드/에지 ID들은 서브 그래프들의 노드/에지 데이터에 `orig_id` 필드에 저장된다. 서브 그래프의 노드 데이터 `dgl.NID` 와 에지 데이터 `dgl.EID` 는 노드/에지들이 reshuffle 후의 전체 그래프의 새로운 노드/에지 ID를 저장한다. 학습이 실행되는 동안, 사용자는 새로운 노드/에지 ID만을 사용한다. 파티션된 결과는 출력 디렉토리의 여러 파일로 저장된다. 이는 한개의 JSON 파일을 포함하는데, 파일 이름은 xxx.json 형태이고, xxx는 파티션 API에 사용된 그래프 이름이다. JSON 파일은 모든 파티션 설정들을 갖는다. 먄약 파티션 API가 새로운 ID를 노드와 에지에 할당하지 않은 경우에는, 추가적으로 두 개의 Numpy 파일; `node_map.npy` 와 `edge_map.npy` 를 생성하는데, 이는 노드/에지 ID와 파티션 ID의 매핑을 저장한다. 만약 그래프에 수십억 개의 노드와 에지가 있다면, 두 파일의 Numpy array는 커질 것인다. 그 이유는 그래프의 각 노드 및 에지에 대해서 하나의 엔트리를 갖기 때문이다. 각 파티션에 대한 폴더는 DGL 포멧으로 파티션 데이터를 저장하는 세 개의 파일이 있다. `graph.dgl` 은 파티션의 그래프 구조와 노드 및 에지에 대한 메타 데이터를 저장하고 있고, `node_feats.dgl` 과 `edge_feats.dlg` 은 파티션에 속하는 노드와 에지의 모든 피쳐들을 저장하고 있다. .. code-block:: none data_root_dir/ |-- xxx.json # partition configuration file in JSON |-- node_map.npy # partition id of each node stored in a numpy array (optional) |-- edge_map.npy # partition id of each edge stored in a numpy array (optional) |-- part0/ # data for partition 0 |-- node_feats.dgl # node features stored in binary format |-- edge_feats.dgl # edge features stored in binary format |-- graph.dgl # graph structure of this partition stored in binary format |-- part1/ |-- node_feats.dgl |-- edge_feats.dgl |-- graph.dgl 로드 밸런싱 ~~~~~~~~ 그래프를 파티셔닝할 때, Metis의 기본 설정은 각 파티션의 노드 수에 대해서 균형을 맞춘다. 그 결과 주어진 테스크에 따라서 최적이지 않은 구성(suboptimal configuration)이 될 수 있다. 예를 들어, semi-supervised 노드 분류의 경우, 트레이너는 로컬 파티션의 레이블이 있는 노들의 서브셋에 대해서 계산을 수행한다. 그래프의 노드들(레이블이 있는 것과 없는 모든 노드)에 균형을 맞추는 파티셔닝은 계산적인 로드(computational node)가 불균형하게 될 수 있다. 각 파티션에 균형잡힌 워크로드를 얻기 위해서 파티션 API는 각 노드 타입에 대한 노드 수를 고려해서 파티션들에 대한 균형을 만드는 것을 지원한다. 이는 :func:`dgl.distributed.partition_graph` 에서 ``balance_ntypes`` 를 설정하는 것으로 가능하다. 사용자들은 이 기능을 활용해서, 학습 셋, 검증 셋, 그리고 테스트 셋에 다른 노드 타입들이 포함된 것을 고려하게 할 수 있다. 아래 코드는 학습 셋 내에서 그리고 학습 셋 외에 두 가지 노드 타입이 있다는 것을 고려한 코드 예제이다. .. code:: python dgl.distributed.partition_graph(g, 'graph_name', 4, '/tmp/test', balance_ntypes=g.ndata['train_mask']) 노드 타입 균형을 맞추는 것에 더해서, :func:`dgl.distributed.partition_graph` 는 ``balance_edges`` 설정을 통해서 다른 노드 타입들의 노드들의 in-degree들 사이의 균형을 잡는 것을 지원한다. 이는 다른 타입의 노드들에 부속되는 에지들의 개수에 대한 균형을 만든다. **Note**: :func:`dgl.distributed.partition_graph` 에 전달되는 그래프 이름은 중요한 인자이다. 그 그래프 이름은 :class:`dgl.distributed.DistGraph` 이 분산 그래프를 지정하는데 사용된다. 그래프 이름은 알파벳 문자들과 밑줄 기호만으로 구성되어야 한다. ID 매핑 ~~~~~~ :func:`dgl.distributed.partition_graph` 는 파티셔닝을 하는 과정에서 노드 ID와 에지 ID를 섞고, 노드 데이터와 에지 데이터도 그에 따라서 섞어준다. 학습이 끝나면, 다운스트림 과제를 위해서 계산된 노드 임베딩들을 저장할 필요가 있다. 따라서, 저장된 노드 임베딩을 원본 ID에 따라서 다시 섞어야한다. `return_mapping=True` 인 경우, :func:`dgl.distributed.partition_graph` 는 섞인 노드/에지 ID와 그것들의 원본 ID 사이의 매핑을 리턴한다. Homogeneous 그래프의 경우, 두 벡터를 리턴한다. 첫번째 벡터는 모든 섞인 노드 ID와 그것의 원본 ID 메핑을, 두번째 벡터는 모든 섞인 에지 ID와 그것의 원본 ID 매핑이다. Heterogeneous 그래프의 경우에는 벡터들의 dictionary 두 개가 리턴된다. 첫번째 dictionary는 각 노드 타입에 대한 매핑을, 두번째 dictionary는 각 에지 타입에 대한 매핑이다. .. code:: python node_map, edge_map = dgl.distributed.partition_graph(g, 'graph_name', 4, '/tmp/test', balance_ntypes=g.ndata['train_mask'], return_mapping=True) # Let's assume that node_emb is saved from the distributed training. orig_node_emb = th.zeros(node_emb.shape, dtype=node_emb.dtype) orig_node_emb[node_map] = node_emb 7.1.1 분산 파티셔닝 ^^^^^^^^^^^^^^^^ 큰 그래프를 위해서 DGL은 `ParMetis `__ 을 사용해서 컴퓨터들의 클러스터에서 그래프를 파티셔닝한다. 이 솔루션은 사용자가 ParMETIS에 맞도록 데이터를 준비하고, ParMETIS에 의해 만들어질 파티션들을 위한 :class:`dgl.DGLGraph` 를 만들기 위해서 DGL 스크립트 `tools/convert_partition.py` 를 사용해야 한다. **Note**: `convert_partition.py` 는 `pyarrow` 패키지를 사용해서 csv 파일을 로드안다. `pyarrow` 설치하자. ParMETIS 설치 ~~~~~~~~~~~~ ParMETIS는 METIS와 GKLib을 필요로 한다. GKLib 컴파일과 설치는 `here `__ 에 있는 설명을 참고하자. METIS 컴파일과 설치는 아래 설명을 따라 GIT에서 METIRS를 클론하고 int64 지원을 활성화해서 컴파일한다. .. code-block:: none git clone https://github.com/KarypisLab/METIS.git make config shared=1 cc=gcc prefix=~/local i64=1 make install 여기서부터는 PartMETIS를 직접 컴파일하고 설치하는 것이 필요하다. 아래 명령을 사용해서 ParMETIS의 DGL 브랜치를 클론한다. .. code-block:: none git clone --branch dgl https://github.com/KarypisLab/ParMETIS.git 그리고, ParMETIS를 컴파일하고 설치한다. .. code-block:: none make config cc=mpicc prefix=~/local make install ParMETIS를 실행하기 전에, 두 환경 변수들, `PATH`와 `LD_LIBRARY_PATH`을 설정해야 한다: .. code-block:: none export PATH=$PATH:$HOME/local/bin export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HOME/local/lib/ ParMETIS를 위한 입력 포멧 ~~~~~~~~~~~~~~~~~~~~~ ParMETIS의 입력 그래프는 다음 이름들을 사용해서 세 개의 파일들에 저장된다: `xxx_nodes.txt` , `xxx_edges.txt` 와 `xxx_stats.txt`. 여기서 `xxx` 는 그래프 이름이다. `xxx_nodes.txt` 의 각 행은 다음 형식으로 노드에 대한 정보를 담고 있다. .. code-block:: none ... 모든 필드들은 공백 문자로 구분된다. * `` 은 정수 값이다. Homogeneous 그래프에서는 항상 0이고, heterogenous 그래프에서는 그 값이 각 노드의 타입을 의미한다. * ``, ``, 등은 정수 값들인데, ParMETIS가 그래프 파티션들의 균형을 맞출 때 노드 가중치로 사용하는 값들이다. 사용자가 노드 가중치를 명시하지 않는 경우, ParMETIS는 각 파티션의 노드 수에 대한 균형을 고려해서 파티션을 나눈다 (좋은 학습 속도를 얻기 위해서는 그래프 파티션들의 균헝을 맞추는 것이 중요하다). 하지만, 이 기본 전략은 많은 use case들에 충분하지 않을 수 있다. 예를 들어, heterogeneous 그래프의 경우, 우리는 모든 파티션들이 각 노드 타입별로 비슷한 개수의 노드들을 갖도록 그래프에 대한 파티션을 나누고 싶다. 아래 토이 예제는 노드 가중치를 사용해서 다른 테입들의 노드 개수의 균형을 맞추것을 어떻게 하는지 보여준다. * `` 은 노드 타입에서의 노드 ID를 표현하는 정수 값이다. DGL에서 각 타입의 노드들은 0부터 시작하는 ID가 부여된다. Homogeneous 그래프에서 이 필드는 노드 ID의 값도 동일하다. * `` 는 선택적인 필드들이다. 이는 임의의 값을 저장하는데 사용될 수 있으며, ParMETIS는 이 필드들을 사용하지 않는다. 잠재적으로는 homogenous 그래프들의 경우 노드 피쳐들과 에지 피쳐들을 이 필드에 저장할 수 있다. * 행(row) ID는 그래프의 *homogeneous* ID를 의미한다 (모든 노드에 고유한 ID가 할당된다). 같은 타입의 모든 노드들에 ID는 연속된 값으로 부여된다. 즉, 같은 타입의 노드들은 `xxx_notes.txt` 파일에 함께 저장되어야 한다. 다음은 두 노드 타입을 갖는 heterogenous 그래프의 노트 파일 예이다. 노드 타입 0은 세 개의 노드를 갖고 있고, 노드 타입 1은 네 개의 노드들을 갖는다. 두 노드 가중치를 사용해서 ParMETIS느 노드 타입 0에 속한 노드 개수와 노드 타입 1에 속한 노드 개수가 대략 같도록 파티션 나눈다. .. code-block:: none 0 1 0 0 0 1 0 1 0 1 0 2 1 0 1 0 1 0 1 1 1 0 1 2 1 0 1 3 비슷하게, `xxx_edges.txt` 의 각 행은 아래 형식으로 에지에 대한 정보를 저장한다. .. code-block:: none 모든 필드들은 공백 문자로 구분된다. * `` 는 소스 노드의 *homogeneous* ID이다. * `` 는 목적지 노드의 *homogeneous* ID이다. * `` 는 에지 타입에 대한 에지 ID이다. * `` 는 선택적인 필드들이다. 임의의 값을 저장하는데 사용할 수 있는데, ParMETIS는 이 필드를 사용하지 않는다. **Note**: 에지 파일에 중복된 에지나 셀프-룹을 갖는 에지가 없어야 한다. `xxx_stats.txt` 는 그래프에 대한 기본적인 통계들을 저장한다. 이 파일은 공백으로 구분되는 세 필드들로 구성된 단 한 줄만 갖는다. .. code-block:: none * `num_nodes` 는 노드 타입을 상관하지 않고 전체 노드 수를 저장한다. * `num_edges` 는 에지 타입을 상관하지 않고 전체 에지 수를 저장한다. * `num_node_weights` 는 노드 파일의 노드 가중치 수를 저장한다. ParMETIS 실행하기 및 결과 포멧들 ~~~~~~~~~~~~~~~~~~~~~~~~~~ ParMETIS는 `pm_dglpart` 명령이 실행된 머신에서 세 파일들에 저장된 그래프를 로드하고, 클러스터의 모든 머신에 데이터를 분산하고, ParMETIS를 실행해서 그래프의 파티션을 나누는 명령 `pm_dglpart` 을 포함하고 있다. 이 명령의 수행이 완료되면, 각 파타션에 대해서 세 개의 파일이 생성된다: `p-xxx_nodes.txt`, `p-xxx_edges.txt`, `p-xxx_stats.txt` **Note**: ParMETIS는 파티셔닝을 수행하면서 노드들에 ID를 재할당한다. ID 재할당이 끝나면, 한 파티션의 노드들은 연속된 ID값을 갖는다; 더 나아가, 같은 타입의 노드들은 연속된 ID들을 부여 받는다. `p-xxx_nodes.txt` 는 파티션의 노드 데이터를 저장한다. 각 행은 한 노드에 대한 다음 정보들을 담고 있다. .. code-block:: none ... * `` 는 ID 재할당 후의 *homogeneous* 노드 ID이다. * `` 는 노드 타입이다. * `` 는 ParMETIS가 사용하는 노드 가중치이다. * `` 는 입력 heterogeneous 그래프의 특정 노드 티입에 대한 원본 노드 ID이다. * `` 는 선택적인 필드들로 입력 노드 파일에서 임의의 값을 갖는다. `p-xxx_edges.txt` 는 파티션의 에지 데이터를 저장한다. 각 행은 한 에지에 대한 다음 정보를 담고 있다. .. code-block:: none * `` 는 ID 재할당 후의 소스 노드의 *homogeneous* ID이다. * `` 는 ID 재할당 후의 목적지 노드의 *homogeneous* ID이다. * `` 는 입력 그래프의 소스 노드에 대한 *homogeneous* ID이다. * `` 는 입력 그래프의 목적지 노드에 대한 *homogeneous* ID이다. * `` 는 입력 그래프의 특정 에지 타입에 대한 에지 ID이다. * `` 은 에지 타입이다. * `` 는 선택적인 필드들로 입력 에지 파일에서 임의의 에지 속성 값을 갖는다. `pm_dglpart` 이 실행된 때, 세 입력 파일들(`xxx_nodes.txt`, `xxx_edges.txt`, `xxx_stats.txt`)은 `pm_dglpart` 명령이 실행된 디렉토리와 같은 곳에 있어야 한다. 다음 명령은 네 개의 ParMETIS 프로세스를 실행해서, `xxx` 라는 이름의 그래프를 8개의 파티션으로 나눈다 (각 프로세스는 2개의 파티션을 담당한다). .. code-block:: none mpirun -np 4 pm_dglpart xxx 2 ParMETIS 결과들을 DGLGraph로 변환하기 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DGL은 `convert_partition.py` 라는 스크립트를 제공한다. 이는 `tool` 디렉토리에 있는데, 파티션 파일들에 있는 데이터를 :class:`dgl.DGLGraph` 객체로 변환하고 파일들에 저장하는 역할을 한다. **Note** `convert_partition.py` 는 단일 머신에서 실행된다. 향후, 우리는 이를 확장해서 여러 머신들에 걸쳐서 데이터를 병렬로 변환하도록 만들 것이다. **Note**: csv 파일로 저장된 데이터를 로딩하기 위해서 `pyarrow` 패키지를 설치하자. `convert_partition.py` 는 다음 인자들을 받는다: * `--input-dir INPUT_DIR` 는 ParMETIS가 생성한 파티션 파일들이 있는 디렉토리를 지정한다. * `--graph-name GRAPH_NAME` 는 그래프 이름을 지정한다. * `--schema SCHEMA` 는 입력 heterogeneous 그래프의 스키마를 명시하는 파일이다. 스키마 파일은 JSON 파일로서, 노드 타입들과 에지 타입들을 나열하고, 또한 각 노드 타입 및 에지 타입에 대한 homogeneous ID의 범위를 포함한다. * `--num-parts NUM_PARTS` 는 파티션의 개수를 명시한다. * `--num-node-weights NUM_NODE_WEIGHTS` 는 ParMETIS가 파티션들의 균형을 위해서 사용한 노드 가중치의 개수를 지정한다. * `[--workspace WORKSPACE]` 는 선택적인 인자로, 중간 결과들을 저장할 workspace 디렉토리를 지정한다. * `[--node-attr-dtype NODE_ATTR_DTYPE]` 는 선택적인 인자로, 노드 파일들의 나머지 필드인 `` 에 저장된 노드 속성들의 데이터 타입을 명시한다. * `[--edge-attr-dtype EDGE_ATTR_DTYPE]` 는 선택적인 인자로, 에지 파일들의 나머지 필드인 `` 에 저장된 에지 속성들의 데이터 타입을 명시한다. * `--output OUTPUT` 는 파티션 결과들이 저장될 출력 디렉토리를 지정한다. `convert_partition.py` 의 결과 파일들은 다음과 같다: .. code-block:: none data_root_dir/ |-- xxx.json # partition configuration file in JSON |-- part0/ # data for partition 0 |-- node_feats.dgl # node features stored in binary format (optional) |-- edge_feats.dgl # edge features stored in binary format (optional) |-- graph.dgl # graph structure of this partition stored in binary format |-- part1/ |-- node_feats.dgl |-- edge_feats.dgl |-- graph.dgl **Note**: 노드 속성 또는 에지 속성의 데이터 타입이 명시된다면, `convert_partition.py` 는 모든 타입의 모든 노드들 및 에지들이 꼭 이 속성들을 갖는다고 가정한다. 따라서, 다른 타입의 노드들이나 에지들이 서로 다른 개수의 속성을 갖는다면, 사용자는 이를 직접 만들어야 한다. 다음은 `convert_partition.py` 를 위한 OGBN-MAG의 스키마 예제이다. 이는 두 필드를 갖는다: `nid` 와 `eid`. `nid` 안에는, 모든 노드 타입들이 나열되어 있고, 각 노드 타입에 대한 homogeneous ID 범위도 포함되어 있다; `eid` 안에는, 모든 에지 타입들이 나열되어 있고, 각 에지 타입에 대한 homogeneous ID 범위도 포함되어 있다. .. code-block:: none { "nid": { "author": [ 0, 1134649 ], "field_of_study": [ 1134649, 1194614 ], "institution": [ 1194614, 1203354 ], "paper": [ 1203354, 1939743 ] }, "eid": { "affiliated_with": [ 0, 1043998 ], "writes": [ 1043998, 8189658 ], "rev-has_topic": [ 8189658, 15694736 ], "rev-affiliated_with": [ 15694736, 16738734 ], "cites": [ 16738734, 22155005 ], "has_topic": [ 22155005, 29660083 ], "rev-cites": [ 29660083, 35076354 ], "rev-writes": [ 35076354, 42222014 ] } } 아래 코드는 스키마 파일을 만드는 예제이다. .. code-block:: none nid_ranges = {} eid_ranges = {} for ntype in hg.ntypes: ntype_id = hg.get_ntype_id(ntype) nid = th.nonzero(g.ndata[dgl.NTYPE] == ntype_id, as_tuple=True)[0] nid_ranges[ntype] = [int(nid[0]), int(nid[-1] + 1)] for etype in hg.etypes: etype_id = hg.get_etype_id(etype) eid = th.nonzero(g.edata[dgl.ETYPE] == etype_id, as_tuple=True)[0] eid_ranges[etype] = [int(eid[0]), int(eid[-1] + 1)] with open('mag.json', 'w') as outfile: json.dump({'nid': nid_ranges, 'eid': eid_ranges}, outfile, indent=4) Heterogeneous 그래프에 대한 노드/에지 피처들 생성하기 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ `convert_partition.py` 이 만든 :class:`dgl.DGLGraph` 아웃풋은 heterogeneous 그래프 파티션들을 homogeneous 그래프로 저장한다. 노드 데이터는 `orig_id` 라는 필드를 갖는데, 이는 원본 heterogeneous 그래프의 특정 노드 타입의 노드 ID들을 저장하고, `NTYPE` 의 필드는 노드 타입을 저장한다. 추가로, 이는 `inner_node` 라는 노드 데이터를 저장하는데, 이는 그래프 파티션의 노드가 파티션이 할당되어 있는지 여부를 알려준다. 만약 어떤 노드가 파티션에 할당되었다면, `inner_node` 는 1을 갖고, 반대의 경우에는 0을 갖는다. **Note**: 그래프 파티션은 몇 개의 HALO 노드들을 포함하는데, 이는 다른 파티션에 할당된 것지만, 이 그래프 파티션의 몇 개의 에지와 연결되어 있는 것들이다. 이 정보를 사용해서, 우리는 별도로 각 노드 타입에 대한 노드 피쳐들을 구성할 수 있으며, 이들을 `/` 를 키로 갖고 값은 노드 피쳐 벡터인 dictionary에 저장할 수 있다. 아래 코드는 노드 피쳐 dictionary를 구성하는 방법을 보여준다. 텐서들의 dictionary가 만들어지면, 이는 파일에 저장된다. .. code-block:: none node_data = {} for ntype in hg.ntypes: local_node_idx = th.logical_and(part.ndata['inner_node'].bool(), part.ndata[dgl.NTYPE] == hg.get_ntype_id(ntype)) local_nodes = part.ndata['orig_id'][local_node_idx].numpy() for name in hg.nodes[ntype].data: node_data[ntype + '/' + name] = hg.nodes[ntype].data[name][local_nodes] dgl.data.utils.save_tensors(metadata['part-{}'.format(part_id)]['node_feats'], node_data) 에지 피쳐도 비슷한 방법으로 구성할 수 있다. 차이점은 :class:`dgl.DGLGraph` 의 모든 에지들이 파티션에 포함된다는 점이다. 그래서, 구성 방법은 더 간단하다. .. code-block:: none edge_data = {} for etype in hg.etypes: local_edges = subg.edata['orig_id'][subg.edata[dgl.ETYPE] == hg.get_etype_id(etype)] for name in hg.edges[etype].data: edge_data[etype + '/' + name] = hg.edges[etype].data[name][local_edges] dgl.data.utils.save_tensors(metadata['part-{}'.format(part_id)]['edge_feats'], edge_data) ================================================ FILE: docs/source/guide_ko/distributed-tools.rst ================================================ .. _guide_ko-distributed-tools: 7.4 분산 학습/추론을 런칭하기 위한 툴들 ------------------------------- :ref:`(English Version) ` DGL은 분산 학습을 돕는 두 스크립트들을 제공한다. * *tools/copy_files.py* : 그래프 파티션들을 하나의 그래프로 복사 * *tools/launch.py* : 머신들의 클러스터에서 분산 학습 잡을 시작 *copy_files.py* 는 (그래프가 파티션이 수행된) 한 머신의 파타션된 데이터와 관련 파일들(예, 학습 스크립트)을 (분산 학습이 수행 될) 클러스터에 복사한다. 스크립트는 한 파티션을 해당 파티션을 사용해서 분산 학습 잡이 실행될 머신에 복사한다. 스크립트는 네 개의 인자를 사용한다. * ``--part_config`` 는 로컬 머신의 파티션된 데이터에 대한 정보를 저장하는 파티션 설정 파일을 지정한다. * ``--ip_config`` 는 클러스터의 IP 설정 파일을 지정한다. * ``--workspace`` 는 분산 학습에 관련된 모든 데이터가 저장될 학습 머신의 디렉토리를 지정한다. * ``--rel_data_path`` 는 파티션된 데이터가 저장될 workspace 디렉토리 아래 상대 경로를 지정한다. * ``--script_folder`` 는 사용자의 학습 스크립트가 저장될 workspace 디렉토리 아래 상대 경로를 지정한다. **Note**: *copy_files.py* 는 IP 설정 파일을 기반으로 파티션을 저장할 머신을 찾는다. 따라서, 같은 IP 설정 파일이 *copy_files.py* 과 *launch.py* 에 사용되어야 한다. DGL은 클러스터에서 분산 학습 잡을 시작하기 위해서 *tools/launch.py* 를 제공한다. 이 스크립트는 다음을 가정한다. * 파티션된 데이터와 학습 스크립트는 클러스터 또는 클러스터의 모든 머신이 접근 가능한 클로벌 스토리지(예, NFS)로 복사된다. * (런치 스크립트가 실행되는) 마스터 머신은 다른 모든 머신에 패스워드 없이(passwordless) ssh 접근을 할 수 있다. **Note**: 런치 스크립트는 클러스터의 머신 중에 하나에서 실행되야 한다. 다음은 클러스터에서 분산 학습 잡을 수행하는 예를 보여준다. .. code:: none python3 tools/launch.py \ --workspace ~graphsage/ \ --num_trainers 2 \ --num_samplers 4 \ --num_servers 1 \ --part_config data/ogb-product.json \ --ip_config ip_config.txt \ "python3 code/train_dist.py --graph-name ogb-product --ip_config ip_config.txt --num-epochs 5 --batch-size 1000 --lr 0.1 --num_workers 4" 설정 파일 *ip_config.txt* 은 클러스터의 머신들의 IP 주소들을 저장한다. *ip_config.txt* 의 전형적인 예는 다음과 같다: .. code:: none 172.31.19.1 172.31.23.205 172.31.29.175 172.31.16.98 각 줄은 한 머신의 IP 주소이다. 선택적으로 IP 주소 뒤에 트레이너들의 네트워크 통신에 사용될 포트 번호도 지정할 수 있다. 포트 번호가 지정되지 않은 경우 기본 값인 ``30050`` 이 사용된다. 런치 스크립트에서 지정된 workspace는 머신들의 작업 디렉토리로, 학습 스크립트, IP 설정 파일, 파티션 설정 파일 그리고 그래프 파티션들이 저장되는 위치이다. 파일들의 모든 경로들은 workspace의 상대 경로로 지정되어야 한다. 런치 스크립트는 한 머신에서 지정된 수의 학습 잡(``--num_trainers`` )을 생성한다. 또한, 사용자는 각 트레이너에 대한 샘플러 프로세스의 개수(``--num_samplers``)를 정해야 한다. 샘플러 프로세스의 개수는 :func:`~dgl.distributed.initialize` 에서 명시된 worker 프로세스의 개수과 같아야 한다. ================================================ FILE: docs/source/guide_ko/distributed.rst ================================================ .. _guide_ko-distributed: 7장: 분산 학습 =========== :ref:`(English Version) ` DGL은 데이터와 연산을 컴퓨터 리소스들의 집합들에 분산하는 완전한 분산 방식을 채택하고 있다. 이 절에서는 클러스터 설정(컴퓨터들의 그룹)을 가정하고 있다. DGL은 그래프를 서브 그래프들로 나누고, 클러스터의 각 컴퓨터는 한개의 서브 그래프 (또는 파티션)에 대해 책임을 진다. DGL은 클러스터이 모든 컴퓨터에서 동일한 학습 스크립트를 실행해서 계산을 병렬화시키고, trainer에게 파티션된 데이터를 제공하기 위해서 같은 컴퓨터에서 서버들을 실행한다. 학습 스크립트를 위해서 DGL은 미니-배치 학습과 비슷한 분산 API를 제공한다. 이는 단일 컴퓨터에서 미니-배치 학습을 수행하는 코드를 아주 조금만 수정하면 되게 해준다. 아래 코드는 GraphSAGE를 분산 형태로 학습하는 예제이다. 유일한 코드 변경은 4-7 라인이다: 1) DGL의 분산 모듈 초기화하기, 2) 분산 그래프 객체 생성하기, 3) 학습 셋을 나누고 로컬 프로세스를 위해서 노드들을 계산하기. 샘플러 생성, 모델 정의, 학습 룹과 같은 나머지 코드는 :ref:`mini-batch training ` 과 같다. .. code:: python import dgl import torch as th dgl.distributed.initialize('ip_config.txt') th.distributed.init_process_group(backend='gloo') g = dgl.distributed.DistGraph('graph_name', 'part_config.json') pb = g.get_partition_book() train_nid = dgl.distributed.node_split(g.ndata['train_mask'], pb, force_even=True) # Create sampler sampler = NeighborSampler(g, [10,25], dgl.distributed.sample_neighbors, device) dataloader = DistDataLoader( dataset=train_nid.numpy(), batch_size=batch_size, collate_fn=sampler.sample_blocks, shuffle=True, drop_last=False) # Define model and optimizer model = SAGE(in_feats, num_hidden, n_classes, num_layers, F.relu, dropout) model = th.nn.parallel.DistributedDataParallel(model) loss_fcn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.lr) # training loop for epoch in range(args.num_epochs): for step, blocks in enumerate(dataloader): batch_inputs, batch_labels = load_subtensor(g, blocks[0].srcdata[dgl.NID], blocks[-1].dstdata[dgl.NID]) batch_pred = model(blocks, batch_inputs) loss = loss_fcn(batch_pred, batch_labels) optimizer.zero_grad() loss.backward() optimizer.step() 컴퓨터들의 클러스터에서 학습 스크립트를 수행할 때, DGL은 데이터를 클러스터의 컴퓨터들에 복사하고 모든 컴퓨터에서 학습 잡을 실행하는 도구들을 제공한다. **Note**: 현재 분산 학습 API는 PyTorch 백앤드만 지원한다. DGL은 분산 학습을 지원하기 위해서 몇 가지 분산 컴포넌트를 구현하고 있다. 아래 그림은 컴포넌트들과 그것들의 인터엑션을 보여준다. .. figure:: https://data.dgl.ai/asset/image/distributed.png :alt: Imgur 특히, DGL의 분산 학습은 3가지 종류의 프로세스들을 갖는다: *서버*, *샘플러*, 그리고 *트레이너* * 서버 프로세스는 그래프 파티션(그래프 구조와 노드/에지 피처를 포함)을 저장하고 있는 각 컴퓨터에서 실행된다. 이 서버들은 함께 작동하면서 그래프 데이터를 트레이너에게 제공한다. 한 컴퓨터는 여러 서버 프로세스들을 동시에 수행하면서 연산과 네트워크 통신을 병렬화 한다. * 샘플러 프로세스들은 서버들과 상호작용을 하면서, 학습에 사용될 미니-배치를 만들기 위해서 노드와 에지를 샘플링한다. * 트레이너들은 서버들과 상호작용을 하기 위한 여러 클래스를 포함하고 있다. 파티션된 그래프 데이터를 접근하기 위한 :class:`~dgl.distributed.DistGraph` , 노드/에지의 피쳐/임베딩을 접근하기 위한 :class:`~dgl.distributed.DistEmbedding` 와 :class:`~dgl.distributed.DistTensor` 를 갖는다. 미니-배치를 얻기 위해서 샘플러와 상호작용을 하는 :class:`~dgl.distributed.dist_dataloader.DistDataLoader` 가 있다. 분산 컴포넌드들을 염두해두고, 이 절의 나머지에서는 다음과 같은 분산 컴포넌트들을 다룬다. * :ref:`guide_ko-distributed-preprocessing` * :ref:`guide_ko-distributed-apis` * :ref:`guide_ko-distributed-hetero` * :ref:`guide_ko-distributed-tools` .. toctree:: :maxdepth: 1 :hidden: :glob: distributed-preprocessing distributed-apis distributed-hetero distributed-tools ================================================ FILE: docs/source/guide_ko/graph-basic.rst ================================================ .. _guide_ko-graph-basic: 1.1 그래프에 대한 몇가지 기본적인 정의 (그래프 101) ---------------------------------------------------- :ref:`(English Version)` 그래프 :math:`G=(V, E)` 는 인티티들과 그것들의 관계를 표현하기 위한 자료 구조이다. 그래프는 노드들의 집합(또는 버틱스들):math:`V` 과 에지들의 집합(또는 아크들) :math:`E` , 두개의 집합으로 구성된다. 두 노드 :math:`u` 와 :math:`v` 의 쌍을 연결하는 에지 :math:`(u, v) \in E` 는 이들 사이에 관계가 있음을 나타낸다. 이 관계는 노드들간의 대칭적인 관계를 표현하는 것과 같이 방향성이 없거나, 비대칭적인 관계를 표현하기 위해서 방향성을 갖을 수 있다. 예를 들어, 소셜 네트워크에서 사람들 간의 친구 관계 모델링에 그래프를 사용한다면, 친구 관계는 양방향이기 때문에 에지는 방향성이 없을 것이다. 하지만, 그래프가 트위터의 팔로우 관계를 모델링하는데 사용된다면, 에지는 방향성이 있다. 에지의 방향성에 따라서, 그래프는 *방향성(directed)* 또는 *비방향성(undirected)* 이 된다. 그래프는 *가중치를 갖거나(unweight)* , *가중치를 갖지 않는다(unweighted)*. 가중치 그래프에서 각 에지는 스칼라 가중치와 연결된다. 예를 들어, 가중치는 길이 또는 연결 강도를 의미할 수 있다. 그래프는 *동종(homogeneous)* 또는 *이종(heterogeneous)* 일 수 있다. 동종 그래프(homogeneous graph)에서 모든 노드들은 같은 타입의 인스턴스를 표현하고, 모든 에지들도 같은 타입의 관계를 나타낸다. 예를 들어, 소셜 네트워크는 사람들과 그들의 연결로 구성된 그래프이고, 이들은 모두 같은 타입을 갖는다. 그와 반대로 이종 그래프(heterogeneous graph)에서는 노드들과 에지들이 여러 타입을 갖는다. 예들 들어, 메켓플래이스를 인코딩한 그래프는 구매자, 판매자, 그리고 상품 노드들이 구입-원함(want-to-buy), 구입했음(has-bought), ~의-고객(is-coustomer-of), 그리고 ~을-판매함(is-selling) 에지로 연결되어 있다. 이분 그래프(bipartite graph)는 이종 그래프의 특별한 형태로 흔히 사용되는 그래프 타입으로, 에지는 서로 다른 두 타입의 노드를 연결한다. 예를 들어, 추천 시스템에서 이분 그래프를 사용해서 사용자들과 아이템들의 상호관계를 표현할 수 있다. DGL에서 이종 그래프를 어떻게 사용하는지는 :ref:`guide_ko-graph-heterogeneous` 를 참고하자. 다중 그래프(multigraph)는 자체 루프(self loop)를 포함한 노드들의 같은 쌍들 사이에 (방향성이 있는) 여러 에지들을 갖는 그래프이다. 예를 들어, 두 저자가 서로 다른 해에 공동 저작을 했다면, 다른 피처들을 갖는 여러 에지가 만들어진다. ================================================ FILE: docs/source/guide_ko/graph-external.rst ================================================ .. _guide_ko-graph-external: 1.4 외부 소스를 사용한 그래프 생성하기 ----------------------------------------- :ref:`(English Version)` 외부 소스들로부터 :class:`~dgl.DGLGraph` 를 만드는 옵션들: - 그래프 및 회소 행렬을 위한 python 라이브러리(NetworkX 및 SciPy)로부터 변환하기 - 디스크에서 그래프를 로딩하기 이 절에서는 다른 그래프를 변환해서 그래프를 생성하는 함수들은 다루지 않겠다. 그 방법들에 대한 소개는 매뉴얼의 API를 참조하자. 외부 라이브러리를 사용해서 그래프 생성하기 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 아래 코드는 SciPy 희소행렬과 NetworkX 그래프로부터 그래프를 생성하는 예제이다. .. code:: >>> import dgl >>> import torch as th >>> import scipy.sparse as sp >>> spmat = sp.rand(100, 100, density=0.05) # 5% nonzero entries >>> dgl.from_scipy(spmat) # from SciPy Graph(num_nodes=100, num_edges=500, ndata_schemes={} edata_schemes={}) >>> import networkx as nx >>> nx_g = nx.path_graph(5) # a chain 0-1-2-3-4 >>> dgl.from_networkx(nx_g) # from networkx Graph(num_nodes=5, num_edges=8, ndata_schemes={} edata_schemes={}) `nx.path_graph(5)` 로부터 만들면 생성된 :class:`~dgl.DGLGraph` 는 4개가 아니라 8개의 에지를 갖는 점을 유의하자. 이유는 `nx.path_graph(5)` 는 방향이 없는 NetworkX 그래프 :class:`networkx.Graph` 를 만드는데, :class:`~dgl.DGLGraph` 는 항상 방향이 있는 그래프이기 때문이다. 방향이 없는 NetworkX 그래프를 :class:`~dgl.DGLGraph` 로 변환하면, DGL은 내부적으로 방향이 없는 에지를 두개의 방향이 있는 에지로 변환한다. :class:`networkx.DiGraph` 를 사용하면 이런 현상을 피할 수 있다. .. code:: >>> nxg = nx.DiGraph([(2, 1), (1, 2), (2, 3), (0, 0)]) >>> dgl.from_networkx(nxg) Graph(num_nodes=4, num_edges=4, ndata_schemes={} edata_schemes={}) .. note:: 내부적으로 DGL은 SciPy 행렬과 NetworkX 그래프를 텐서로 변환해서 그래프를 만든다. 따라서, 이 생성 방법은 성능이 중요한 곳에 사용되기 적합하지 않다. 참고할 API들: :func:`dgl.from_scipy` , :func:`dgl.from_networkx` . 디스크에서 그래프 로딩하기 ^^^^^^^^^^^^^^^^^^^ 그래프를 저장하기 위한 여러 데이터 포멧들이 있는데, 모든 옵션들을 나열하기는 불가능하다. 그래서 이 절에서는 공통적인 것들에 대한 일반적인 참조만 소개한다. Comma Separated Values (CSV) """""""""""""""""""""""""""" 아주 일반적인 포멧으로 CSV가 사용된다. 이는 노드, 에치, 그리고 그것들의 피처들을 테이블 형태로 저장한다. .. table:: nodes.csv +-----------+ |age, title | +===========+ |43, 1 | +-----------+ |23, 3 | +-----------+ |... | +-----------+ .. table:: edges.csv +-----------------+ |src, dst, weight | +=================+ |0, 1, 0.4 | +-----------------+ |0, 3, 0.9 | +-----------------+ |... | +-----------------+ 잘 알려진 Python 라이브러리들(예, pandas)을 사용해서 이 형태의 데이터를 python 객체(예, :class:`numpy.ndarray` )로 로딩하고, 이를 DGLGraph로 변환하는데 사용할 수 있다. 만약 백엔드 프레임워크가 디스크에서 텐서를 저장하고/읽는 기능(예, :func:`torch.save` , :func:`torch.load` )을 제공한다면, 그래프를 만드는데 이용할 수 있다. 함께 참조하기: `Tutorial for loading a Karate Club Network from edge pairs CSV `_. JSON/GML 포멧 """""""""""" 특별히 빠르지는 않지만 NetworkX는 `다양한 데이터 포멧 `_ 을 파싱하는 유틸리티들을 제공하는데, 이를 통해서 DGL 그래프를 만들 수 있다. DGL 바이너리 포멧 """""""""""""" DGL은 디스크에 그래프를 바이너리 형태로 저장하고 로딩하는 API들을 제공한다. 그래프 구조와 더불어, API들은 피처 데이터와 그래프 수준의 레이블 데이터도 다룰 수 있다. DGL은 그래프를 직접 S3 또는 HDFS에 체크포인트를 할 수 있는 기능을 제공한다. 러퍼런스 메뉴얼에 자세한 내용이 있으니 참고하자. 참고할 API들: :func:`dgl.save_graphs` , :func:`dgl.load_graphs` ================================================ FILE: docs/source/guide_ko/graph-feature.rst ================================================ .. _guide_ko-graph-feature: 1.3 노드와 에지의 피처 -------------------------- :ref:`(English Version)` 노드들과 에지들의 그래프별 속성을 저장하기 위해서, :class:`~dgl.DGLGraph` 의 노드들과 에지들은 이름을 갖는 사용자 정의 피쳐를 갖을 수 있다. :py:attr:`~dgl.DGLGraph.ndata` 와 :py:attr:`~dgl.DGLGraph.edata` 인터페이스를 이용해서 이 피쳐들을 접근할 수 있다. 예를 들어, 아래 코드는 두 노드에 대한 피쳐를 생성하고(라인 8과 15에서 ``'x'`` 와 ``'y'`` 이름 피처), 한개의 에지 피처(라인 9에서 ``'x'`` 이름 피처)를 생성한다. .. code-block:: python :linenos: >>> import dgl >>> import torch as th >>> g = dgl.graph(([0, 0, 1, 5], [1, 2, 2, 0])) # 6 nodes, 4 edges >>> g Graph(num_nodes=6, num_edges=4, ndata_schemes={} edata_schemes={}) >>> g.ndata['x'] = th.ones(g.num_nodes(), 3) # node feature of length 3 >>> g.edata['x'] = th.ones(g.num_edges(), dtype=th.int32) # scalar integer feature >>> g Graph(num_nodes=6, num_edges=4, ndata_schemes={'x' : Scheme(shape=(3,), dtype=torch.float32)} edata_schemes={'x' : Scheme(shape=(,), dtype=torch.int32)}) >>> # different names can have different shapes >>> g.ndata['y'] = th.randn(g.num_nodes(), 5) >>> g.ndata['x'][1] # get node 1's feature tensor([1., 1., 1.]) >>> g.edata['x'][th.tensor([0, 3])] # get features of edge 0 and 3 tensor([1, 1], dtype=torch.int32) :py:attr:`~dgl.DGLGraph.ndata`/:py:attr:`~dgl.DGLGraph.edata` 인터페이스의 중요한 사실들: - 숫자 타입(예, float, double, int)의 피처들만 허용된다. 피처는 스칼라, 벡터, 또는 다차원 텐서가 가능하다. - 각 노드 피처는 고유한 이름을 갖고, 각 에지 피쳐도 고유한 이름을 갖는다. 노드와 에지의 피쳐는 같은 이름을 갖을 수 있다. (예, 위 예의 'x') - 턴서 할당으로 피처가 만들어진다. 즉, 피처를 그래프의 각 노드/에지에 할당하는 것이다. 텐서의 첫번째 차원은 그래프의 노드/에지들의 개수와 같아야 한다. 그래프의 노드/에지의 일부에만 피쳐를 할당하는 것은 불가능하다. - 같은 이름의 피처들은 같은 차원 및 같은 타입을 갖아야 한다. - 피처 텐서는 행 위주(row-major)의 레이아웃을 따른다. 각 행-슬라이스는 한 노드 또는 이제의 피처를 저장한다. (아래 예제의 16줄 및 18줄을 보자) 가중치 그래프인 경우, 에지 피처로 가중치를 저장할 수 있다. .. code-block:: python >>> # edges 0->1, 0->2, 0->3, 1->3 >>> edges = th.tensor([0, 0, 0, 1]), th.tensor([1, 2, 3, 3]) >>> weights = th.tensor([0.1, 0.6, 0.9, 0.7]) # weight of each edge >>> g = dgl.graph(edges) >>> g.edata['w'] = weights # give it a name 'w' >>> g Graph(num_nodes=4, num_edges=4, ndata_schemes={} edata_schemes={'w' : Scheme(shape=(,), dtype=torch.float32)}) 참고할 API들: :py:attr:`~dgl.DGLGraph.ndata` , :py:attr:`~dgl.DGLGraph.edata` ================================================ FILE: docs/source/guide_ko/graph-gpu.rst ================================================ .. _guide_ko-graph-gpu: 1.6 GPU에서 DGLGraph 사용하기 -------------------------- :ref:`(English Version)` 그래프 생성시, 두 GPU 텐서를 전달해서 GPU에 위치한 :class:`~dgl.DGLGraph` 를 만들 수 있다. 다른 방법으로는 :func:`~dgl.DGLGraph.to` API를 사용해서 :class:`~dgl.DGLGraph` 를 GPU로 복사할 수 있다. 이는 그래프 구조와 피처 데이터를 함께 복사한다. .. code:: >>> import dgl >>> import torch as th >>> u, v = th.tensor([0, 1, 2]), th.tensor([2, 3, 4]) >>> g = dgl.graph((u, v)) >>> g.ndata['x'] = th.randn(5, 3) # original feature is on CPU >>> g.device device(type='cpu') >>> cuda_g = g.to('cuda:0') # accepts any device objects from backend framework >>> cuda_g.device device(type='cuda', index=0) >>> cuda_g.ndata['x'].device # feature data is copied to GPU too device(type='cuda', index=0) >>> # A graph constructed from GPU tensors is also on GPU >>> u, v = u.to('cuda:0'), v.to('cuda:0') >>> g = dgl.graph((u, v)) >>> g.device device(type='cuda', index=0) GPU 그래프에 대한 모든 연산은 GPU에서 수행된다. 따라서, 모든 텐서 인자들이 GPU에 이미 존재해야하며, 연산 결과(그래프 또는 텐서) 역시 GPU에 저장된다. 더 나아가, GPU 그래프는 GPU에 있는 피쳐 데이터만 받아들인다. .. code:: >>> cuda_g.in_degrees() tensor([0, 0, 1, 1, 1], device='cuda:0') >>> cuda_g.in_edges([2, 3, 4]) # ok for non-tensor type arguments (tensor([0, 1, 2], device='cuda:0'), tensor([2, 3, 4], device='cuda:0')) >>> cuda_g.in_edges(th.tensor([2, 3, 4]).to('cuda:0')) # tensor type must be on GPU (tensor([0, 1, 2], device='cuda:0'), tensor([2, 3, 4], device='cuda:0')) >>> cuda_g.ndata['h'] = th.randn(5, 4) # ERROR! feature must be on GPU too! DGLError: Cannot assign node feature "h" on device cpu to a graph on device cuda:0. Call DGLGraph.to() to copy the graph to the same device. ================================================ FILE: docs/source/guide_ko/graph-graphs-nodes-edges.rst ================================================ .. _guide_ko-graph-graphs-nodes-edges: 1.2 그래프, 노드, 그리고 에지 ---------------------------- :ref:`(English Version)` DGL은 각 노드에 고유한 번호를 부여하는데 이를 노드 ID라고 하고, 각 에지에는 연결된 노드의 ID들에 해당하는 번호 쌍으로 표현된다. DGL은 각 에지에 고유한 번호를 부여하고, 이를 **에지 ID**라고 하며, 그래프에 추가된 순서에 따라 번호가 부여된다. 노드와 에지 ID의 번호는 0부터 시작한다. DGL에서는 모든 에지는 방향을 갖고, 에지 :math:`(u,v)` 는 노드 :math:`u` 에서 노드 :math:`v` 로 이어진 방향을 나타낸다. 여러 노드를 표현하기 위해서 DGL는 노드 ID로 1차원 정수 텐서를 사용한다. (PyTorch의 tensor, TensorFlow의 Tensor, 또는 MXNet의 ndarry) DGL은 이 포멧을 "노드-텐서"라고 부른다. DGL에서 에지들은 노드-텐서의 튜플 :math:`(U, V)` 로 표현된다. :math:`(U[i], V[i])` 는 :math:`U[i]` 에서 :math:`V[i]` 로의 에지이다. :class:`~dgl.DGLGraph` 를 만드는 방법 중의 하나는 :func:`dgl.graph` 메소드를 사용하는 것이다. 이는 에지 집합을 입력으로 받는다. 또한 DGL은 다른 데이터 소스로부터 그래프들을 생성하는 것도 지원한다. :ref:`guide_ko-graph-external` 참고하자. 다음 코드는 아래와 같은 4개의 노드를 갖는 그래프를 :func:`dgl.graph` 를 사용해서 :class:`~dgl.DGLGraph` 만들고, 그래프 구조를 쿼리하는 API들을 보여준다. .. figure:: https://data.dgl.ai/asset/image/user_guide_graphch_1.png :height: 200px :width: 300px :align: center .. code:: >>> import dgl >>> import torch as th >>> # edges 0->1, 0->2, 0->3, 1->3 >>> u, v = th.tensor([0, 0, 0, 1]), th.tensor([1, 2, 3, 3]) >>> g = dgl.graph((u, v)) >>> print(g) # number of nodes are inferred from the max node IDs in the given edges Graph(num_nodes=4, num_edges=4, ndata_schemes={} edata_schemes={}) >>> # Node IDs >>> print(g.nodes()) tensor([0, 1, 2, 3]) >>> # Edge end nodes >>> print(g.edges()) (tensor([0, 0, 0, 1]), tensor([1, 2, 3, 3])) >>> # Edge end nodes and edge IDs >>> print(g.edges(form='all')) (tensor([0, 0, 0, 1]), tensor([1, 2, 3, 3]), tensor([0, 1, 2, 3])) >>> # If the node with the largest ID is isolated (meaning no edges), >>> # then one needs to explicitly set the number of nodes >>> g = dgl.graph((u, v), num_nodes=8) 비방향성 그래프를 만들기 위해서는 양방향에 대한 에지들을 만들어야 한다. :func:`dgl.to_bidirected` 함수를 사용하면, 그래프를 양방향의 에지를 갖는 그래프로 변환할 수 있다. .. code:: >>> bg = dgl.to_bidirected(g) >>> bg.edges() (tensor([0, 0, 0, 1, 1, 2, 3, 3]), tensor([1, 2, 3, 0, 3, 0, 0, 1])) .. note:: DGL API에서는 일반적으로 텐서 타입이 사용된다. 이는 C 언어에서 효율적으로 저장되는 특징과, 명시적인 데이터 타입, 그리고 디바이스 컨택스트 정보 때문이다. 하지만, 빠른 프로토타입 개발을 지원하기 위해서, 대부분 DGL API는 파이선 iterable (예 list) 및 numpy.array를 함수 인자로 지원하고 있다. DGL은 노드 및 에지 ID를 저장하는데 :math:`32` 비트 또는 :math:`64` 비트 정수를 사용할 수 있다. 노드와 에지 ID의 데이터 타입은 같아야 한다. :math:`64` 비트를 사용하면 DGL은 노드 또는 에지를 :math:`2^{64} - 1` 개까지 다룰 수 있다. 하지만 그래프의 노드 또는 에지가 :math:`2^{31} - 1` 개 이하인 경우에는 :math:`32` 비트 정수를 사용해야한다. 이유는 속도도 빠르고 저장공간도 적게 사용하기 때문이다. DGL은 이 변환을 위한 방법들을 제공한다. 아래 예제를 보자. .. code:: >>> edges = th.tensor([2, 5, 3]), th.tensor([3, 5, 0]) # edges 2->3, 5->5, 3->0 >>> g64 = dgl.graph(edges) # DGL uses int64 by default >>> print(g64.idtype) torch.int64 >>> g32 = dgl.graph(edges, idtype=th.int32) # create a int32 graph >>> g32.idtype torch.int32 >>> g64_2 = g32.long() # convert to int64 >>> g64_2.idtype torch.int64 >>> g32_2 = g64.int() # convert to int32 >>> g32_2.idtype torch.int32 참고할 API들: :func:`dgl.graph` , :func:`dgl.DGLGraph.nodes` , :func:`dgl.DGLGraph.edges` , :func:`dgl.to_bidirected` , :func:`dgl.DGLGraph.int` , :func:`dgl.DGLGraph.long` , 그리고 :py:attr:`dgl.DGLGraph.idtype` ================================================ FILE: docs/source/guide_ko/graph-heterogeneous.rst ================================================ .. _guide_ko-graph-heterogeneous: 1.5 이종 그래프 (Heterogeneous Graph) ---------------------------------- :ref:`(English Version)` 이종 그래프는 다른 타입의 노드와 에지를 갖는다. 다른 타입의 노드/에지는 독립적인 ID 공간과 피처 저장소를 갖는다. 아래 그램의 예를 보면, user와 game 노드 ID는 모두 0부터 시작하고, 서로 다른 피처들을 갖고 있다. .. figure:: https://data.dgl.ai/asset/image/user_guide_graphch_2.png 두 타입의 노드(user와 game)와 두 타입의 에지(follows와 plays)를 갖는 이종 그래프 예 이종 그래프 생성하기 ^^^^^^^^^^^^^^^ DGL에서 이종 그래프(짧게 heterograph)는 관계당 하나의 그래프들의 시리즈로 표현된다. 각 관계는 문자열 트리플 ``(source node type, edge type, destination node type)`` 이다. 관계가 에지 타입을 명확하게 하기 때문에, DGL은 이것들을 캐노니컬(canonical) 에지 타입이라고 한다. 아래 코드는 DGL에서 이종 그래프를 만드는 예제이다. .. code:: >>> import dgl >>> import torch as th >>> # Create a heterograph with 3 node types and 3 edges types. >>> graph_data = { ... ('drug', 'interacts', 'drug'): (th.tensor([0, 1]), th.tensor([1, 2])), ... ('drug', 'interacts', 'gene'): (th.tensor([0, 1]), th.tensor([2, 3])), ... ('drug', 'treats', 'disease'): (th.tensor([1]), th.tensor([2])) ... } >>> g = dgl.heterograph(graph_data) >>> g.ntypes ['disease', 'drug', 'gene'] >>> g.etypes ['interacts', 'interacts', 'treats'] >>> g.canonical_etypes [('drug', 'interacts', 'drug'), ('drug', 'interacts', 'gene'), ('drug', 'treats', 'disease')] 동종(homogeneous) 및 이분(bipartite) 그래프는 하나의 관계를 갖는 특별한 이종 그래프일 뿐임을 알아두자. .. code:: >>> # A homogeneous graph >>> dgl.heterograph({('node_type', 'edge_type', 'node_type'): (u, v)}) >>> # A bipartite graph >>> dgl.heterograph({('source_type', 'edge_type', 'destination_type'): (u, v)}) 이종 그래프와 연관된 *메타그래프(metagraph)* 는 그래프의 스키마이다. 이것은 노드들과 노드간의 에지들의 집합에 대한 타입 제약 조건을 지정한다. 메타그래프의 노드 :math:`u` 는 연관된 이종 그래프의 노드 타입에 해당한다. 메타그래프의 에지 :math:`(u,v)` 는 연관된 이종 그래프의 노드 타입 :math:`u` 와 노드 타입 :math:`v` 간에 에지가 있다는 것을 알려준다. .. code:: >>> g Graph(num_nodes={'disease': 3, 'drug': 3, 'gene': 4}, num_edges={('drug', 'interacts', 'drug'): 2, ('drug', 'interacts', 'gene'): 2, ('drug', 'treats', 'disease'): 1}, metagraph=[('drug', 'drug', 'interacts'), ('drug', 'gene', 'interacts'), ('drug', 'disease', 'treats')]) >>> g.metagraph().edges() OutMultiEdgeDataView([('drug', 'drug'), ('drug', 'gene'), ('drug', 'disease')]) 참고할 API들: :func:`dgl.heterograph` , :py:attr:`~dgl.DGLGraph.ntypes` , :py:attr:`~dgl.DGLGraph.etypes` , :py:attr:`~dgl.DGLGraph.canonical_etypes` , :py:attr:`~dgl.DGLGraph.metagraph` 다양한 타입을 다루기 ^^^^^^^^^^^^^^^ 노드와 에지가 여러 타입이 사용되는 경우, 타입 관련된 정보를 위한 DGLGraph API를 호출할 때는 노드/에지의 타입을 명시해야한다. 추가로 다른 타입의 노드/에지는 별도의 ID를 갖는다. .. code:: >>> # Get the number of all nodes in the graph >>> g.num_nodes() 10 >>> # Get the number of drug nodes >>> g.num_nodes('drug') 3 >>> # Nodes of different types have separate IDs, >>> # hence not well-defined without a type specified >>> g.nodes() DGLError: Node type name must be specified if there are more than one node types. >>> g.nodes('drug') tensor([0, 1, 2]) 특정 노드/에지 타입에 대한 피쳐를 설정하고 얻을 때, DGL은 두가지 새로운 형태의 문법을 제공한다 -- `g.nodes['node_type'].data['feat_name']`와 `g.edges['edge_type'].data['feat_name']`. .. code:: >>> # Set/get feature 'hv' for nodes of type 'drug' >>> g.nodes['drug'].data['hv'] = th.ones(3, 1) >>> g.nodes['drug'].data['hv'] tensor([[1.], [1.], [1.]]) >>> # Set/get feature 'he' for edge of type 'treats' >>> g.edges['treats'].data['he'] = th.zeros(1, 1) >>> g.edges['treats'].data['he'] tensor([[0.]]) 만약 그래프가 오직 한개의 노드/에지 타입을 갖는다면, 노드/에지 타입을 명시할 필요가 없다. .. code:: >>> g = dgl.heterograph({ ... ('drug', 'interacts', 'drug'): (th.tensor([0, 1]), th.tensor([1, 2])), ... ('drug', 'is similar', 'drug'): (th.tensor([0, 1]), th.tensor([2, 3])) ... }) >>> g.nodes() tensor([0, 1, 2, 3]) >>> # To set/get feature with a single type, no need to use the new syntax >>> g.ndata['hv'] = th.ones(4, 1) .. note:: 에지 타입이 목적지와 도착지 노드의 타입을 고유하게 결정할 수 있다면, 에지 타입을 명시할 때 문자 트리플 대신 한 문자만들 사용할 수 있다. 예를 듬녀, 두 관계 ``('user', 'plays', 'game')`` and ``('user', 'likes', 'game')``를 갖는 이종 그래프가 있을 때, 두 관계를 지정하기 위해서 단지 ``'plays'`` 또는 ``'likes'`` 를 사용해도 된다. 디스크에서 이종 그래프 로딩하기 ^^^^^^^^^^^^^^^^^^^^^^^ Comma Separated Values (CSV) """""""""""""""""""""""""""" 이종 그래프를 저장하는 일반적인 방법은 다른 타입의 노드와 에지를 서로 다른 CSV 파일에 저장하는 것이다. 예를들면 다음과 같다. .. code:: # data folder data/ |-- drug.csv # drug nodes |-- gene.csv # gene nodes |-- disease.csv # disease nodes |-- drug-interact-drug.csv # drug-drug interaction edges |-- drug-interact-gene.csv # drug-gene interaction edges |-- drug-treat-disease.csv # drug-treat-disease edges 동종 그래프의 경우와 동일하게, Pandas와 같은 패키지들을 사용해서 CSV 파일들을 파싱하고, 이를 numpy 배열 또는 프레임워크의 텐서들에 저장하고, 관계 사전을 만들고, 이를 이용해서 이종 그래프를 생성할 수 있다. 이 방법은 GML/JSON과 같은 다른 유명한 포멧들에도 동일하게 적용된다. DGL 바이너리 포멧 """""""""""""" DGL은 이종 그래프를 바이너리 포멧으로 저장하고 읽기 위한 함수 :func:`dgl.save_graphs` 와 :func:`dgl.load_graphs` 를 제공한다. 에지 타입 서브그래프 ^^^^^^^^^^^^^^^ 보존하고 싶은 관계를 명시하고, 피처가 있을 경우는 이를 복사하면서 이종 그래프의 서브그래프를 생성할 수 있다. .. code:: >>> g = dgl.heterograph({ ... ('drug', 'interacts', 'drug'): (th.tensor([0, 1]), th.tensor([1, 2])), ... ('drug', 'interacts', 'gene'): (th.tensor([0, 1]), th.tensor([2, 3])), ... ('drug', 'treats', 'disease'): (th.tensor([1]), th.tensor([2])) ... }) >>> g.nodes['drug'].data['hv'] = th.ones(3, 1) >>> # Retain relations ('drug', 'interacts', 'drug') and ('drug', 'treats', 'disease') >>> # All nodes for 'drug' and 'disease' will be retained >>> eg = dgl.edge_type_subgraph(g, [('drug', 'interacts', 'drug'), ... ('drug', 'treats', 'disease')]) >>> eg Graph(num_nodes={'disease': 3, 'drug': 3}, num_edges={('drug', 'interacts', 'drug'): 2, ('drug', 'treats', 'disease'): 1}, metagraph=[('drug', 'drug', 'interacts'), ('drug', 'disease', 'treats')]) >>> # The associated features will be copied as well >>> eg.nodes['drug'].data['hv'] tensor([[1.], [1.], [1.]]) 이종 그래프를 동종 그래프로 변환하기 ^^^^^^^^^^^^^^^^^^^^^^^^^^^ 이종 그래프는 다른 타입의 노드/에지와 그것들에 연관된 피쳐들을 관리하는데 깔끔한 인터페이스를 제공한다. 이것을 아래의 경우 특히 유용하다. 1. 다른 타입의 노드/에지에 대한 피쳐가 다른 데이터 타입 또는 크기를 갖는다. 2. 다른 타입의 노드/에지에 다른 연산을 적용하고 싶다. 만약 위 조건을 만족하지 않고 모델링에서 노드/에지 타입의 구별이 필요하지 않는다면, DGL의 :func:`dgl.DGLGraph.to_homogeneous` API를 이용해서 이종 그래프를 동종 그래프로 변환할 수 있다. 이 변환은 다음 절처로 이뤄진다. 1. 모든 타입의 노드/에지를 0부터 시작하는 정수로 레이블을 다시 부여한다. 2. 사용자가 지정한 노드/에지 타입들에 걸쳐서 피쳐들을 합친다. .. code:: >>> g = dgl.heterograph({ ... ('drug', 'interacts', 'drug'): (th.tensor([0, 1]), th.tensor([1, 2])), ... ('drug', 'treats', 'disease'): (th.tensor([1]), th.tensor([2]))}) >>> g.nodes['drug'].data['hv'] = th.zeros(3, 1) >>> g.nodes['disease'].data['hv'] = th.ones(3, 1) >>> g.edges['interacts'].data['he'] = th.zeros(2, 1) >>> g.edges['treats'].data['he'] = th.zeros(1, 2) >>> # By default, it does not merge any features >>> hg = dgl.to_homogeneous(g) >>> 'hv' in hg.ndata False >>> # Copy edge features >>> # For feature copy, it expects features to have >>> # the same size and dtype across node/edge types >>> hg = dgl.to_homogeneous(g, edata=['he']) DGLError: Cannot concatenate column ‘he’ with shape Scheme(shape=(2,), dtype=torch.float32) and shape Scheme(shape=(1,), dtype=torch.float32) >>> # Copy node features >>> hg = dgl.to_homogeneous(g, ndata=['hv']) >>> hg.ndata['hv'] tensor([[1.], [1.], [1.], [0.], [0.], [0.]]) 원래의 노드/에지 타입과 타입별 ID들은 :py:attr:`~dgl.DGLGraph.ndata` 와 :py:attr:`~dgl.DGLGraph.edata` 에 저장된다. .. code:: >>> # Order of node types in the heterograph >>> g.ntypes ['disease', 'drug'] >>> # Original node types >>> hg.ndata[dgl.NTYPE] tensor([0, 0, 0, 1, 1, 1]) >>> # Original type-specific node IDs >>> hg.ndata[dgl.NID] tensor([0, 1, 2, 0, 1, 2]) >>> # Order of edge types in the heterograph >>> g.etypes ['interacts', 'treats'] >>> # Original edge types >>> hg.edata[dgl.ETYPE] tensor([0, 0, 1]) >>> # Original type-specific edge IDs >>> hg.edata[dgl.EID] tensor([0, 1, 0]) 모델링 목적으로, 특정 관계들을 모아서 그룹으로 만들고, 그것들에 같은 연산을 적용하고 싶은 경우가 있다. 이를 위해서, 우선 이종 그래프의 에지 타입 서브그래프를 추출하고, 그리고 그 서브그래프를 동종 그래프로 변환한다. .. code:: >>> g = dgl.heterograph({ ... ('drug', 'interacts', 'drug'): (th.tensor([0, 1]), th.tensor([1, 2])), ... ('drug', 'interacts', 'gene'): (th.tensor([0, 1]), th.tensor([2, 3])), ... ('drug', 'treats', 'disease'): (th.tensor([1]), th.tensor([2])) ... }) >>> sub_g = dgl.edge_type_subgraph(g, [('drug', 'interacts', 'drug'), ... ('drug', 'interacts', 'gene')]) >>> h_sub_g = dgl.to_homogeneous(sub_g) >>> h_sub_g Graph(num_nodes=7, num_edges=4, ...) ================================================ FILE: docs/source/guide_ko/graph.rst ================================================ .. _guide_ko-graph: 1장: 그래프 ========= :ref:`(English version)` 그래프는 앤티티들(entity 또는 노드들)과 노드들간의 관계(에지)로 표현되며, 노드와 에지들을 타입을 갖을 수 있다. (예를 들어, ``"user"`` 와 ``"item"`` 은 서로 다른 타입의 노드들이다.) DGL은 :class:`~dgl.DGLGraph` 를 핵심 자료 구조로 갖는 그래프-중심의 프로그래밍 추상화를 제공한다. :class:`~dgl.DGLGraph` 그래프의 구조, 그 그래프의 노드 및 에지 피처들과 이 컴포넌트들을 사용해서 수행된 연산 결과를 다루는데 필요한 인터페이스를 제공한다. 로드맵 ------- 이 장은 1.1절의 그래프 정의에 대한 간단한 소개를 시작으로 :class:`~dgl.DGLGraph`: 의 몇가지 핵심 개념을 소개한다. * :ref:`guide_ko-graph-basic` * :ref:`guide_ko-graph-graphs-nodes-edges` * :ref:`guide_ko-graph-feature` * :ref:`guide_ko-graph-external` * :ref:`guide_ko-graph-heterogeneous` * :ref:`guide_ko-graph-gpu` .. toctree:: :maxdepth: 1 :hidden: :glob: graph-basic graph-graphs-nodes-edges graph-feature graph-external graph-heterogeneous graph-gpu ================================================ FILE: docs/source/guide_ko/index.rst ================================================ 사용자 가이드[시대에 뒤쳐진] ===================== .. toctree:: :maxdepth: 2 :titlesonly: graph message nn data training minibatch distributed mixed_precision 이 한글 버전 DGL 사용자 가이드 2021년 11월 기준의 영문 :ref:`(User Guide) ` 을 Amazon Machine Learning Solutions Lab의 김무현 Principal Data Scientist가 번역한 것입니다. 오류 및 질문은 `muhyun@amazon.com` 으로 보내주세요. ================================================ FILE: docs/source/guide_ko/message-api.rst ================================================ .. _guide_ko-message-passing-api: 2.1 빌트인 함수 및 메시지 전달 API들 ----------------------------- :ref:`(English Version) ` DGL에서 **메시지 함수** 는 한개의 인자 ``edges`` 를 갖는데, 이는 :class:`~dgl.udf.EdgeBatch` 의 객체이다. 메시지 전달이 실행되는 동안 DGL은 에지 배치를 표현하기 위해서 이 객체를 내부적으로 생성한다. 이것은 3개의 맴버, ``src`` , ``dst`` , 그리고 ``data`` 를 갖고, 이는 각각 소스 노드, 목적지 노드, 그리고 에지의 피쳐를 의미한다. **축약 함수(reduce function)** 는 한개의 인자 ``nodes`` 를 갖는데, 이는 :class:`~dgl.udf.NodeBatch` 의 객체이다. 메시지 전달이 실행되는 동안 DGL은 노드 배치를 표현하기 위해서 이 객체를 내부적으로 생성한다. 이 객체는 ``mailbox`` 라는 맴버를 갖는데, 이는 배치에 속한 노드들에게 전달된 메시지들을 접근 방법을 제공한다. 가장 흔한 축약 함수로는 ``sum`` , ``max`` , ``min`` 등이 있다. **업데이트 함수** 는 위에서 언급한 ``nodes`` 를 한개의 인자로 갖는다. 이 함수는 ``축약 함수`` 의 집계 결과에 적용되는데, 보통은 마지막 스탭에서 노드의 원래 피처와 이 결과와 결합하고, 그 결과를 노드의 피처로 저장한다. DGL은 일반적으로 사용되는 메시지 전달 함수들과 축약 함수들을 ``dgl.function`` 네임스패이스에 **빌트인** 으로 구현하고 있다. 일반적으로, **가능한 경우라면 항상** DLG의 빌트인 함수를 사용하는 것을 권장하는데, 그 이유는 이 함수들은 가장 최적화된 형태로 구현되어 있고, 차원 브로드캐스팅을 자동으로 해주기 때문이다. 만약 여러분의 메시지 전달 함수가 빌트인 함수로 구현이 불가능하다면, 사용자 정의 메시지/축소 함수를 직접 구현할 수 있다. 이를 **UDF** 라고 한다. 빌트인 메시지 함수들은 단항(unary) 또는 이상(binary)이다. 단항의 경우 DGL은 ``copy`` 를 지원한다. 이항 함수로 DGL은 ``add`` , ``sub`` , ``mul`` , ``div`` , 그리고 ``dot`` 를 지원한다. 빌트인 메시지 함수의 이름 규칙은 다음과 같다. ``u`` 는 ``src`` 노드를, ``v`` 는 ``dst`` 노드를 그리고 ``e`` 는 ``edges`` 를 의미한다. 이 함수들에 대한 파라미터들은 관련된 노드와 에지의 입력과 출력 필드 이름을 지칭하는 문자열이다. 지원되는 빌트인 함수의 목록은 :ref:`api-built-in` 을 참고하자. 한가지 예를 들면, 소스 노드의 ``hu`` 피처와 목적지 노드의 ``hv`` 피처를 더해서 그 결과를 에지의 ``he`` 필드에 저장하는 것을 빌트인 함수 ``dgl.function.u_add_v('hu', 'hv', 'he')`` 를 사용해서 구현할 수 있다. 이와 동일한 기능을 하는 메시지 UDF는 다음과 같다. .. code:: def message_func(edges): return {'he': edges.src['hu'] + edges.dst['hv']} 빌트인 축약 함수는 ``sum``, ``max``, ``min`` 그리고 ``mean`` 연산을 지원한다. 보통 축약 함수는 두개의 파라메터를 갖는데, 하나는 ``mailbox`` 의 필드 이름이고, 다른 하나는 노드 피처의 필드 이름이다. 이는 모두 문자열이다. 예를 들어, ``dgl.function.sum('m', 'h')`` 는 메시지 ``m`` 을 합하는 아래 축약 UDF와 같다. .. code:: import torch def reduce_func(nodes): return {'h': torch.sum(nodes.mailbox['m'], dim=1)} UDF의 고급 사용법을 더 알고 싶으면 :ref:`apiudf` 를 참고하자. :meth:`~dgl.DGLGraph.apply_edges` 를 사용해서 메시지 전달 함수를 호출하지 않고 에지별 연산만 호출하는 것도 가능하다. :meth:`~dgl.DGLGraph.apply_edges` 는 파라미터로 메시지 함수를 받는데, 기본 설정으로는 모든 에지의 피쳐를 업데이트한다. 다음 예를 살펴보자. .. code:: import dgl.function as fn graph.apply_edges(fn.u_add_v('el', 'er', 'e')) 메시지 전달을 위한 :meth:`~dgl.DGLGraph.update_all` 는 하이레벨 API로 메시지 생성, 메시지 병합 그리고 노드 업데이트를 단일 호출로 합쳤는데, 전반적으로 최적화할 여지가 남아있다. :meth:`~dgl.DGLGraph.update_all` 의 파라메터들은 메시지 함수, 축약 함수, 그리고 업데이트 함수이다. :meth:`~dgl.DGLGraph.update_all` 를 호출할 때 업데이트 함수를 지정하지 않는 경우, 업데이트 함수는 ``update_all`` 밖에서 수행될 수 있다. DGL은 이 방법을 권장하는데, 업데이트 함수는 코드를 간결하게 만들기 위해서 보통은 순수 텐서 연산으로 구현되어 있기 때문이다. 예를 들면, 다음과 같다. .. code:: def update_all_example(graph): # store the result in graph.ndata['ft'] graph.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft')) # Call update function outside of update_all final_ft = graph.ndata['ft'] * 2 return final_ft 이 함수는 소스 노드의 피처 ``ft`` 와 에지 피처 ``a`` 를 곱해서 메시지 ``m`` 을 생성하고, 메시지 ``m`` 들을 더해서 노드 피처 ``ft`` 를 업데이트하고, 마지막으로 ``final_ft`` 결과를 구하기 위해서 ``ft`` 에 2를 곱하고 있다. 호출이 완료되면 DGL은 중간에 사용된 메시지들 ``m`` 을 제거한다. 위 함수를 수학 공식으로 표현하면 다음과 같다. .. math:: {final\_ft}_i = 2 * \sum_{j\in\mathcal{N}(i)} ({ft}_j * a_{ij}) DGL의 빌트인 함수는 부동소수점 데이터 타입을 지원한다. 즉, 피쳐들은 반드시 ``half`` (``float16``), ``float``, 또는 ``double`` 텐서여야만 한다. ``float16`` 데이터 타입에 대한 지원은 기본 설정에서는 비활성화되어 있다. 그 이유는 이를 지원하기 위해서는 ``sm_53`` (Pascal, Volta, Turing, 그리고 Ampere 아키텍타)와 같은 최소한의 GPU 컴퓨팅 능력이 요구되기 때문이다. 사용자는 DGL 소스 컴파일을 통해서 mixed precision training을 위해서 float16을 활성화시킬 수 있다. (자세한 내용은 :doc:`Mixed Precision Training ` 튜토리얼 참고) ================================================ FILE: docs/source/guide_ko/message-edge.rst ================================================ .. _guide_ko-message-passing-edge: 2.4 메시지 전달에 에지 가중치 적용하기 ----------------------------- :ref:`(English Version) ` `GAT `__ 또는 일부 `GCN 변형 `__ 에서 사용되는 것처럼 메시지 병합이전에 에지의 가중치를 적용하는 것은 GNN 모델링에서 흔하게 사용되는 기법이다. DGL은 이를 다음과 같은 밥벙으로 지원하고 있다. - 가중치를 에지 피쳐로 저장 - 메시지 함수에서 에지 피쳐를 소스 노드의 피쳐와 곱하기 예를 들면, .. code:: import dgl.function as fn # Suppose eweight is a tensor of shape (E, *), where E is the number of edges. graph.edata['a'] = eweight graph.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft')) 이 예제는 eweight를 이제 가중치고 사용하고 있다. 에지 가중치는 보통은 스칼라 값을 갖는다. ================================================ FILE: docs/source/guide_ko/message-efficient.rst ================================================ .. _guide_ko-message-passing-efficient: 2.2 효율적인 메시지 전달 코드 작성 방법 ------------------------------ :ref:`(English Version) ` DGL은 메시지 전달에 대한 메모리 사용과 연산 속드를 최적화하고 있다. 이 최적화들을 활용하는 일반적으로 사용되는 방법은 직접 메시지 전달 함수를 만들어서 이를 :meth:`~dgl.DGLGraph.update_all` 호출시 빌트인 함수와 함께 파라메터로 사용하는 것이다. 만약 그래프의 에지들의 수가 노드들의 수보다 훨씬 많은 경우에는 노드에서 에지로의 불필요한 메모리 복사를 피하는 것이 도움이 된다. 에지에 메시지를 저장할 필요가 있는 :class:`~dgl.nn.pytorch.conv.GATConv` 와 같은 경우에는 빌트인 함수를 사용해서 :meth:`~dgl.DGLGraph.apply_edges` 를 호출해야 한다. 때로는 에지에 저장할 메시지의 차원이 너무 커서 메모리를 많이 차지하기도 한다. DGL에서는 가능한 에지 피쳐의 차원을 낮추는 것을 권장한다. 에지에 대한 연산을 노드로 분할하여 이를 달성하는 방법에 대한 예제이다. 이 방법은 다음과 같다. ``src`` 피쳐와 ``dst`` 피쳐를 연결하고, 선형 레이어 :math:`W\times (u || v)`를 적용하는 경우를 들어보자. ``src``와 ``dst`` 피처 차원은 매우 높은 반면에 선형 레이어의 결과 차원은 낮다고 가정하자. 이 예제를 직관적으로 구현하면 다음과 같다. .. code:: import torch import torch.nn as nn linear = nn.Parameter(torch.FloatTensor(size=(node_feat_dim * 2, out_dim))) def concat_message_function(edges): return {'cat_feat': torch.cat([edges.src['feat'], edges.dst['feat']], dim=1)} g.apply_edges(concat_message_function) g.edata['out'] = g.edata['cat_feat'] @ linear 제안하는 구현은 이 선형 연산을 두개로 나누는 것이다. 하나는 ``src`` 피처에 적용하고, 다른 하나는 ``dst`` 피쳐에 적용한다. 그 후, 에지에 대한 두 선형 연산의 결과를 마지막 단계에서 더한다. 즉, :math:`W_l\times u + W_r \times v` 를 실행하는 것이다. :math:`W` 행렬의 왼쪽 반과 오른쪽 반이 각각 :math:`W_l` 와 :math:`W_r` 일 때, :math:`W \times (u||v) = W_l \times u + W_r \times v` 가 성립하기 때문에 가능하다. .. code:: import dgl.function as fn linear_src = nn.Parameter(torch.FloatTensor(size=(node_feat_dim, out_dim))) linear_dst = nn.Parameter(torch.FloatTensor(size=(node_feat_dim, out_dim))) out_src = g.ndata['feat'] @ linear_src out_dst = g.ndata['feat'] @ linear_dst g.srcdata.update({'out_src': out_src}) g.dstdata.update({'out_dst': out_dst}) g.apply_edges(fn.u_add_v('out_src', 'out_dst', 'out')) 위 두 구현은 수학적으로 동일하다. 후자가 더 효율적인데, 그 이유는 메모리 비효율적인 에지에 feat_src와 feat_dst의 저장이 필요가 없기 때문이다. 추가로, 합은 연산속도가 더 빠르고 메모리 사용량을 줄인 DGL의 빌트인 함수 ``u_add_v`` 를 사용하면 최적화될 수 있다. ================================================ FILE: docs/source/guide_ko/message-heterograph.rst ================================================ .. _guide_ko-message-passing-heterograph: 2.5 이종 그래프에서의 메시지 전달 -------------------------- :ref:`(English Version) ` 이종 그래프 ( :ref:`guide_ko-graph-heterogeneous` ) 또는 헤테로그래프는 여러 타입의 노드와 에지를 갖는 그래프이다. 각 노드와 에지의 특징을 표현하기 위해서 다른 타입의 속성을 갖기 위해서 노드와 에지들이 다른 타입을 갖을 수 있다. 복잡한 그래프 뉴럴 네트워크들에서 어떤 노드나 에지 타입들은 다른 차원들을 갖게 모델링 되기도 한다. 이종 그래프에서 메시지 전달은 두 파트로 나뉜다: 1. 각 관계(relation) r에 대한, 메지시 연산과 집계(aggregation) 2. 가 노트 타입에 대한 모든 관계의 집계 결과를 합치는 축약(reduction) 이종 그래프에서 메시지 전달을 담당하는 DGL 인터페이스는 :meth:`~dgl.DGLGraph.multi_update_all` 이다. :meth:`~dgl.DGLGraph.multi_update_all` 는 :meth:`~dgl.DGLGraph.update_all` 에 대한 파라메터들을 갖는 사전(dictionary)을 인자로 받는다. 이 사전의 각 키값는 관계이고, 그에 대한 값은 크로스 타입 리듀셔(cross type reducer)에 대한 문자열이다. Reducer는 ``sum``, ``min``, ``max``, ``mean``, ``stack`` 중에 하나가 된다. 예제는 다음과 같다. .. code:: import dgl.function as fn for c_etype in G.canonical_etypes: srctype, etype, dsttype = c_etype Wh = self.weight[etype](feat_dict[srctype]) # Save it in graph for message passing G.nodes[srctype].data['Wh_%s' % etype] = Wh # Specify per-relation message passing functions: (message_func, reduce_func). # Note that the results are saved to the same destination feature 'h', which # hints the type wise reducer for aggregation. funcs[etype] = (fn.copy_u('Wh_%s' % etype, 'm'), fn.mean('m', 'h')) # Trigger message passing of multiple types. G.multi_update_all(funcs, 'sum') # return the updated node feature dictionary return {ntype : G.nodes[ntype].data['h'] for ntype in G.ntypes} ================================================ FILE: docs/source/guide_ko/message-part.rst ================================================ .. _guide_ko-message-passing-part: 2.3 그래프 일부에 메지시 전달 적용하기 ------------------------------ :ref:`(English Version) ` 그래프 노드의 일부만 업데이트를 하기 원하는 경우, 업데이트를 하고 싶은 노드들의 ID를 사용해서 서브그래프를 만든 후, 그 서브그래프에 :meth:`~dgl.DGLGraph.update_all` 를 호출하는 방법으로 가능하다. .. code:: nid = [0, 2, 3, 6, 7, 9] sg = g.subgraph(nid) sg.update_all(message_func, reduce_func, apply_node_func) 이는 미니-배치 학습에서 흔히 사용되는 방법이다. 자세한 사용법은 :ref:`guide_ko-minibatch` 참고하자. ================================================ FILE: docs/source/guide_ko/message.rst ================================================ .. _guide_ko-message-passing: 2장: 메지시 전달(Message Passing) ============================= :ref:`(English Version) ` 메지시 전달 패러다임(Message Passing Paradigm) ----------------------------------------- :math:`x_v\in\mathbb{R}^{d_1}` 이 노드 :math:`v` 의 피처이고, :math:`w_{e}\in\mathbb{R}^{d_2}` 가 에지 :math:`({u}, {v})` 의 피처라고 하자. **메시지 전달 패러다임** 은 :math:`t+1` 단계에서 노드별(node-wise) 그리고 에지별(edge-wise)의 연산을 다음과 같이 정의한다: .. math:: \text{에지별: } m_{e}^{(t+1)} = \phi \left( x_v^{(t)}, x_u^{(t)}, w_{e}^{(t)} \right) , ({u}, {v},{e}) \in \mathcal{E}. .. math:: \text{노드별: } x_v^{(t+1)} = \psi \left(x_v^{(t)}, \rho\left(\left\lbrace m_{e}^{(t+1)} : ({u}, {v},{e}) \in \mathcal{E} \right\rbrace \right) \right). 위 수식에서 :math:`\phi` 는 각 에지에 대한 **메시지 함수** 로서 에지의 부속 노드(incident node)들의 피처를 그 에지 피처와 합쳐서 메시지를 만드는 역할을 수행한다. :math:`\psi` 는 각 노드에 대한 **업데이트 함수** 로, **축소 함수(reduce function)** :math:`\rho` 를 사용해서 전달된 메시지들을 통합하는 방식으로 노드의 피처를 업데이트한다. 로드맵 ---- 이 장는 DGL의 메시지 전달 API들과, 노드와 에지에 효율적으로 적용하는 방법을 소개한다. 마지막 절에서는 이종 그래프에 메시지 전달을 어떻게 구현하는지 설명한다. * :ref:`guide_ko-message-passing-api` * :ref:`guide_ko-message-passing-efficient` * :ref:`guide_ko-message-passing-part` * :ref:`guide_ko-message-passing-edge` * :ref:`guide_ko-message-passing-heterograph` .. toctree:: :maxdepth: 1 :hidden: :glob: message-api message-efficient message-part message-edge message-heterograph ================================================ FILE: docs/source/guide_ko/minibatch-custom-sampler.rst ================================================ .. _guide_ko-minibatch-customizing-neighborhood-sampler: 6.4 이웃 샘플러 커스터마이징하기 ------------------------- :ref:`(English Version) ` DGL이 여러 이웃 샘플링 방법들을 제공하지만, 샘플링 방법을 직접 만들어야할 경우도 있다. 이 절에서는 샘플링 방법을 직접 만드는 방법과 stochastic GNN 학습 프레임워크에서 사용하는 방법을 설명한다. `그래프 뉴럴 네트워크가 얼마나 강력한가(How Powerful are Graph Neural Networks) `__ 에서 설명했듯이, 메시지 전달은 다음과 같이 정의된다. .. math:: \begin{gathered} \boldsymbol{a}_v^{(l)} = \rho^{(l)} \left( \left\lbrace \boldsymbol{h}_u^{(l-1)} : u \in \mathcal{N} \left( v \right) \right\rbrace \right) \\ \boldsymbol{h}_v^{(l)} = \phi^{(l)} \left( \boldsymbol{h}_v^{(l-1)}, \boldsymbol{a}_v^{(l)} \right) \end{gathered} 여기서, :math:`\rho^{(l)}` 와 :math:`\phi^{(l)}` 는 파라메터를 갖는 함수이고, :math:`\mathcal{N}(v)`는 그래프 :math:`\mathcal{G}` 에 속한 노드 :math:`v` 의 선행 노드(predecessor)들 (또는 방향성 그래프의 경우 *이웃 노드들*)의 집합을 의미한다. 아래 그래프의 빨간색 노드를 업데이트하는 메시지 전달을 수행하기 위해서는, .. figure:: https://data.dgl.ai/asset/image/guide_6_4_0.png :alt: Imgur 아래 그림의 녹색으로 표시된 이웃 노드들의 노드 피쳐들을 합쳐야한다(aggregate). .. figure:: https://data.dgl.ai/asset/image/guide_6_4_1.png :alt: Imgur 이웃 샘플링 직접 해보기 ~~~~~~~~~~~~~~~~~~ 우선 위 그림의 그래프를 DGL 그래프로 정의한다. .. code:: python import torch import dgl src = torch.LongTensor( [0, 0, 0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 7, 8, 9, 10, 1, 2, 3, 3, 3, 4, 5, 5, 6, 5, 8, 6, 8, 9, 8, 11, 11, 10, 11]) dst = torch.LongTensor( [1, 2, 3, 3, 3, 4, 5, 5, 6, 5, 8, 6, 8, 9, 8, 11, 11, 10, 11, 0, 0, 0, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 7, 8, 9, 10]) g = dgl.graph((src, dst)) 그리고 노드 한개에 대한 결과를 계산하기 위해서 멀티-레이어 메시지 전달을 어떻게 수행할지를 고려하자. 메시지 전달 의존성 찾기 ^^^^^^^^^^^^^^^^^ 아래 그래프에서 2-레이어 GNN을 사용해서 시드 노드 8의 결과를 계산하는 것을 생각해보자. .. figure:: https://data.dgl.ai/asset/image/guide_6_4_2.png :alt: Imgur 공식은 다음과 같다. .. math:: \begin{gathered} \boldsymbol{a}_8^{(2)} = \rho^{(2)} \left( \left\lbrace \boldsymbol{h}_u^{(1)} : u \in \mathcal{N} \left( 8 \right) \right\rbrace \right) = \rho^{(2)} \left( \left\lbrace \boldsymbol{h}_4^{(1)}, \boldsymbol{h}_5^{(1)}, \boldsymbol{h}_7^{(1)}, \boldsymbol{h}_{11}^{(1)} \right\rbrace \right) \\ \boldsymbol{h}_8^{(2)} = \phi^{(2)} \left( \boldsymbol{h}_8^{(1)}, \boldsymbol{a}_8^{(2)} \right) \end{gathered} 이 공식에 따르면, :math:`\boldsymbol{h}_8^{(2)}` 을 계산하기 위해서는 아래 그림에서와 같이 (녹색으로 표시된) 노드 4,5,7 그리고 11번에서 에지을 따라서 메시지를 수집하는 것이 필요하다. .. figure:: https://data.dgl.ai/asset/image/guide_6_4_3.png :alt: Imgur 이 그래프는 원본 그래프의 모든 노드들을 포함하고 있지만, 특정 출력 노드들에 메시지를 전달할 에지들만을 포함하고 있다. 이런 그래프를 빨간색 노드 8에 대한 두번째 GNN 레이어에 대한 *프론티어(frontier)* 라고 부른다. 프론티어들을 생성하는데 여러 함수들이 사용된다. 예를 들어, :func:`dgl.in_subgraph()` 는 원본 그래프의 모든 노드를 포함하지만, 특정 노드의 진입 에지(incoming edge)들만 포함하는 서브 그래프를 유도하는 함수이다. .. code:: python frontier = dgl.in_subgraph(g, [8]) print(frontier.all_edges()) 전체 구현은 :ref:`api-subgraph-extraction` 와 :ref:`api-sampling` 를 참고하자. 기술적으로는 원본 그래프와 같은 노들들 집합을 잡는 어떤 그래프도 프로티어가 될 수 있다. 이는 :ref:`guide_ko-minibatch-customizing-neighborhood-sampler-impl` 에 대한 기반이다. 멀티-레이어 미니배치 메시지 전달을 위한 이분 구조(Bipartite Structure) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 하지만, :math:`\boldsymbol{h}_\cdot^{(1)}` 로부터 단순히 :math:`\boldsymbol{h}_8^{(2)}` 를 계산하는 것은 프론티어에서 메시지 전달을 계산하는 방식으로 할 수 없다. 그 이유는, 여전히 프론티어가 원본 그래프의 모든 노드를 포함하고 있기 때문이다. 이 그래프의 경우, (녹색과 빨간색 노드들) 4, 5, 7, 8, 11 노드들만이 입력으로 필요하고, 출력으로는 (빨간색 노드) 노드 8번이 필요하다. 입력과 출력의 노드 개수가 다르기 때문에, 작은 이분-구조(bipartite-structured) 그래프에서 메시지 전달을 수행할 필요가 있다. 아래 그림은 노드 8에 대해서 2번째 GNN 레이어의 MFG를 보여준다. .. figure:: https://data.dgl.ai/asset/image/guide_6_4_4.png :alt: Imgur .. note:: Message Flow Graph에 대한 개념은 :doc:`Stochastic Training Tutorial ` 참고하자. 목적지 노드들이 소스 노드에도 등장한다는 점을 유의하자. 그 이유는 메시지 전달(예를 들어, :math:`\phi^{(2)}` )이 수행된 후에 이전 레이어의 목적지 노드들의 representation들이 피처를 합치는데 사용되기 때문이다. DGL은 임의의 프론티어를 MFG로 변환하는 :func:`dgl.to_block` 함수를 제공한다. 이 함수의 첫번째 인자는 프론티어이고, 두번째 인자는 목적지 노드들이다. 예를 들어, 위 프론티어는 목적지 노드 8에 대한 MFG로 전환하는 코드는 다음과 같다. .. code:: python dst_nodes = torch.LongTensor([8]) block = dgl.to_block(frontier, dst_nodes) :meth:`dgl.DGLGraph.number_of_src_nodes` 와 :meth:`dgl.DGLGraph.number_of_dst_nodes` 메소스들 사용해서 특정 노트 타입의 소스 노드 및 목적지 노드의 수를 알아낼 수 있다. .. code:: python num_src_nodes, num_dst_nodes = block.number_of_src_nodes(), block.number_of_dst_nodes() print(num_src_nodes, num_dst_nodes) :attr:`dgl.DGLGraph.srcdata` 와 :attr:`dgl.DGLGraph.srcnodes` 같은 멤머를 통해서 MFG의 소스 노드 피쳐들을 접근할 수 있고, :attr:`dgl.DGLGraph.dstdata` 와 :attr:`dgl.DGLGraph.dstnodes` 를 통해서는 목적지 노드의 피쳐들을 접근할 수 있다. ``srcdata`` / ``dstdata`` 와 ``srcnodes`` / ``dstnodes`` 의 사용법은 일반 그래프에 사용하는 :attr:`dgl.DGLGraph.ndata` 와 :attr:`dgl.DGLGraph.nodes` 와 동일하다. .. code:: python block.srcdata['h'] = torch.randn(num_src_nodes, 5) block.dstdata['h'] = torch.randn(num_dst_nodes, 5) 만약 MFG가 프론티어에서 만들어졌다면, 즉 프래프에서 만들어졌다면, MFG의 소스 및 목적지 노드의 피쳐는 다음과 같이 직접 읽을 수 있다. .. code:: python print(block.srcdata['x']) print(block.dstdata['y']) .. note:: MFG에서의 소스 노드와 목적지 노드의 원본의 노드 ID는 ``dgl.NID`` 피쳐에 저장되어 있고, MFG의 에지 ID들와 프론티어의 에지 ID 사이의 매핑은 ``dgl.EID`` 에 있다. DGL에서는 MFG의 목적지 노드들이 항상 소스 노드에도 있도록 하고 있다. 다음 코드에서 알수 있듯이, 목적지 노드들은 소스 노드들에서 늘 먼저 위치한다. .. code:: python src_nodes = block.srcdata[dgl.NID] dst_nodes = block.dstdata[dgl.NID] assert torch.equal(src_nodes[:len(dst_nodes)], dst_nodes) 그 결과, 목적지 노드들은 프론티어의 에지들의 목적지인 모든 노들들을 포함해야 한다. 예를 들어, 아래 프론티어를 생각해 보자. .. figure:: https://data.dgl.ai/asset/image/guide_6_4_5.png :alt: Imgur 여기서 빨간 노드와 녹색 노드들 (즉, 4, 5, 7, 8 그리고 11번 노드)는 에지의 목적지가 되는 노드들이다. 이 경우, 아래 코드는 에러를 발생시키는데, 이유는 목적지 노드 목록이 이들 노드를 모두 포함하지 않기 때문이다. .. code:: python dgl.to_block(frontier2, torch.LongTensor([4, 5])) # ERROR 하지만, 목적지 노드들은 위 보다 더 많은 노드들을 포함할 수 있다. 이 예제의 경우, 어떤 에지도 연결되지 않은 고립된 노드들(isolated node)이 있고, 이 고립 노드들은 소스 노드와 목적지 노드 모두에 포함될 수 있다. .. code:: python # Node 3 is an isolated node that do not have any edge pointing to it. block3 = dgl.to_block(frontier2, torch.LongTensor([4, 5, 7, 8, 11, 3])) print(block3.srcdata[dgl.NID]) print(block3.dstdata[dgl.NID]) Heterogeneous 그래프들 ^^^^^^^^^^^^^^^^^^^^ MFG들은 heterogeneous 그래프에도 적용됩니다. 다음 프론티어를 예로 들어보자. .. code:: python hetero_frontier = dgl.heterograph({ ('user', 'follow', 'user'): ([1, 3, 7], [3, 6, 8]), ('user', 'play', 'game'): ([5, 5, 4], [6, 6, 2]), ('game', 'played-by', 'user'): ([2], [6]) }, num_nodes_dict={'user': 10, 'game': 10}) 목적지 노드들 User #3, #4, #8 그리고 Game #2, #6을 포함한 MFG를 생성한다. .. code:: python hetero_block = dgl.to_block(hetero_frontier, {'user': [3, 6, 8], 'game': [2, 6]}) 소스 노드들과 목적지 노드들의 타입별로 얻을 수 있다. .. code:: python # source users and games print(hetero_block.srcnodes['user'].data[dgl.NID], hetero_block.srcnodes['game'].data[dgl.NID]) # destination users and games print(hetero_block.dstnodes['user'].data[dgl.NID], hetero_block.dstnodes['game'].data[dgl.NID]) .. _guide_ko-minibatch-customizing-neighborhood-sampler-impl: 커스텀 이웃 샘플러 구현하기 ~~~~~~~~~~~~~~~~~~~~ 아래 코드는 노드 분류를 위한 이웃 샘플링을 수행한다는 것을 떠올려 보자. .. code:: python sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2) 이웃 샘플링 전략을 직접 구현하기 위해서는 ``sampler`` 를 직접 구현한 내용으로 바꾸기만 하면 된다. 이를 살펴보기 위해서, 우선 :class:`~dgl.dataloading.neighbor.MultiLayerFullNeighborSampler` 를 상속한 클래스인 :class:`~dgl.dataloading.dataloader.BlockSampler` 를 살펴보자. :class:`~dgl.dataloading.dataloader.BlockSampler` 클래스는 :meth:`~dgl.dataloading.dataloader.BlockSampler.sample_blocks` 메소드를 통해서 마지막 레이어로부터 시작하는 MFG들의 리스트를 만들어내는 역할을 한다. ``sample_blocks`` 의 기본 구현은 프론티어들과 그것들을 MFG들로 변환하면서 backwards를 iterate한다. 따라서, 이웃 샘플링을 하기 위해서 단지 :meth:`~dgl.dataloading.dataloader.BlockSampler.sample_frontier` **메소드** 를 **구현하기만 하면된다**. 어떤 레이어를 위한 프론티어를 생성할 것인지, 원본 그래프, representation들을 계산할 노드들이 주어지면, 이 메소드는 그것들을 위한 프론티어를 생성하는것을 담당한다. GNN 레이어 수를 상위 클래스에 전달해야 한다. 예를 들어, :class:`~dgl.dataloading.neighbor.MultiLayerFullNeighborSampler` 구현은 다음과 같다. .. code:: python class MultiLayerFullNeighborSampler(dgl.dataloading.BlockSampler): def __init__(self, n_layers): super().__init__(n_layers) def sample_frontier(self, block_id, g, seed_nodes): frontier = dgl.in_subgraph(g, seed_nodes) return frontier :class:`dgl.dataloading.neighbor.MultiLayerNeighborSampler` 는 더 복잡한 이웃 샘플러로, 각 노들에 대해서 메시지를 수집할 적은 수의 이웃 노드들을 샘플하는 기능을 하는데, 구현은 다음과 같다. .. code:: python class MultiLayerNeighborSampler(dgl.dataloading.BlockSampler): def __init__(self, fanouts): super().__init__(len(fanouts)) self.fanouts = fanouts def sample_frontier(self, block_id, g, seed_nodes): fanout = self.fanouts[block_id] if fanout is None: frontier = dgl.in_subgraph(g, seed_nodes) else: frontier = dgl.sampling.sample_neighbors(g, seed_nodes, fanout) return frontier 위의 함수는 프론티어를 생성하지만, 원본 그래프와 같은 노들을 갖는 어떤 그래프도 프론티어로 사용될 수 있다. 예를 들어, 주어진 확률에 따라서 시드 노드들에 연결되는 인바운드 에지를 임의로 삭제하기를 원한다면, 다음과 같이 샘플러를 정의할 수 있다. .. code:: python class MultiLayerDropoutSampler(dgl.dataloading.BlockSampler): def __init__(self, p, num_layers): super().__init__(num_layers) self.p = p def sample_frontier(self, block_id, g, seed_nodes, *args, **kwargs): # Get all inbound edges to `seed_nodes` src, dst = dgl.in_subgraph(g, seed_nodes).all_edges() # Randomly select edges with a probability of p mask = torch.zeros_like(src).bernoulli_(self.p) src = src[mask] dst = dst[mask] # Return a new graph with the same nodes as the original graph as a # frontier frontier = dgl.graph((src, dst), num_nodes=g.num_nodes()) return frontier def __len__(self): return self.num_layers 샘플러를 직접 구현한 다음에는, 그 샘플러를 사용하는 데이터 로더를 생성하고, 예전과 같이 시드 노드들을 iterate하면서 MFG들의 리스트를 만들게 한다. .. code:: python sampler = MultiLayerDropoutSampler(0.5, 2) dataloader = dgl.dataloading.NodeDataLoader( g, train_nids, sampler, batch_size=1024, shuffle=True, drop_last=False, num_workers=4) model = StochasticTwoLayerRGCN(in_features, hidden_features, out_features) model = model.cuda() opt = torch.optim.Adam(model.parameters()) for input_nodes, blocks in dataloader: blocks = [b.to(torch.device('cuda')) for b in blocks] input_features = blocks[0].srcdata # returns a dict output_labels = blocks[-1].dstdata # returns a dict output_predictions = model(blocks, input_features) loss = compute_loss(output_labels, output_predictions) opt.zero_grad() loss.backward() opt.step() Heterogeneous 그래프들 ^^^^^^^^^^^^^^^^^^^^ Heterogeneous 그래프에 대한 프론티어를 생성하는 것은 homogeneous 그래프의 경우와 동일하다. 리턴된 그래프가 원본 그래프와 같은 노드들을 갖도록 하면, 나머지는 그대로 동작할 것이다. 예를 들어, 위 ``MultiLayerDropoutSampler`` 를 재작성해서 모든 에지 타입들을 iterate 해서, heterogeneous 그래프에도 작동하게 만들 수 있다. .. code:: python class MultiLayerDropoutSampler(dgl.dataloading.BlockSampler): def __init__(self, p, num_layers): super().__init__(num_layers) self.p = p def sample_frontier(self, block_id, g, seed_nodes, *args, **kwargs): # Get all inbound edges to `seed_nodes` sg = dgl.in_subgraph(g, seed_nodes) new_edges_masks = {} # Iterate over all edge types for etype in sg.canonical_etypes: edge_mask = torch.zeros(sg.num_edges(etype)) edge_mask.bernoulli_(self.p) new_edges_masks[etype] = edge_mask.bool() # Return a new graph with the same nodes as the original graph as a # frontier frontier = dgl.edge_subgraph(new_edges_masks, relabel_nodes=False) return frontier def __len__(self): return self.num_layers ================================================ FILE: docs/source/guide_ko/minibatch-edge.rst ================================================ .. _guide_ko-minibatch-edge-classification-sampler: 6.2 이웃 샘플링을 사용한 에지 분류 GNN 모델 학습하기 ----------------------------------------- :ref:`(English Version) ` 에지 분류/리그레션 모델을 학습하는 것은 몇 가지 눈에 띄는 차이점이 있지만 노드 분류/리그레션과 어느정도 비슷하다. 이웃 샘플러 및 데이터 로더 정의하기 ~~~~~~~~~~~~~~~~~~~~~~~~~~~ :ref:`노드 분류에서 사용한 것과 같은 이웃 샘플러` 를 사용할 수 있다. .. code:: python sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2) 에지 분류에 DGL이 제공하는 이웃 샘플러를 사용하려면, 미니-배치의 에지들의 집합을 iterate 하는 :class:`~dgl.dataloading.pytorch.EdgeDataLoader` 와 함께 사용해야한다. 이것은 아래 모듈에서 사용될 에지 미니-배치로부터 만들어질 서브 그래프와 *message flow graph* (MFG)들을 리턴한다. 다음 코드 예제는 PyTorch DataLoader를 만든다. 이는 베치들에 있는 학습 에지 ID 배열 :math:`train_eids` 들을 iterate 하고, 생성된 MFG들의 리스트를 GPU로 옮겨놓는다. .. code:: python dataloader = dgl.dataloading.EdgeDataLoader( g, train_eid_dict, sampler, batch_size=1024, shuffle=True, drop_last=False, num_workers=4) .. note:: Message flow graph의 개념은 :doc:`Stochastic Training Tutorial ` 를 참고하자. 빌트인으로 지원되는 샘플러들에 대한 전체 목록은 :ref:`neighborhood sampler API reference ` 에 있다. :ref:`guide_ko-minibatch-customizing-neighborhood-sampler` 에는 여러분만의 이웃 샘플러 만드는 방법과 MFG 개념에 대한 보다 상세한 설명을 담고 있다. 이웃 샘플링을 위해서 원본 그래프에서 미니 배치의 에지들 제거하기 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 에지 분류 모델을 학습할 때, 때로는 computation dependency에서 학습 데이터에 있는 에지들을 존재하지 않았던 것처럼 만들기 위해 제거하는 것이 필요하다. 그렇지 않으면, 모델은 두 노드들 사이에 에지가 존재한다는 사실을 *인지* 할 것이고, 이 정보를 학습에 잠재적으로 이용할 수 있기 때문이다. 따라서, 에지 분류의 경우 때로는 이웃 샘플링은 미니-배치안에 샘플된 에지들 및 undirected 그래프인 경우 샘플된 에지의 역방향 에지들도 원본 그래프에서 삭제하기도 한다. :class:`~dgl.dataloading.pytorch.EdgeDataLoader` 객체를 만들 때, ``exclude='reverse_id'`` 를 에지 ID와 그와 연관된 reverse 에지 ID들의 매핑 정보와 함께 지정할 수 있다. .. code:: python n_edges = g.num_edges() dataloader = dgl.dataloading.EdgeDataLoader( g, train_eid_dict, sampler, # The following two arguments are specifically for excluding the minibatch # edges and their reverse edges from the original graph for neighborhood # sampling. exclude='reverse_id', reverse_eids=torch.cat([ torch.arange(n_edges // 2, n_edges), torch.arange(0, n_edges // 2)]), batch_size=1024, shuffle=True, drop_last=False, num_workers=4) 모델을 미니-배치 학습에 맞게 만들기 ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 에지 분류 모델은 보통은 다음과 같이 두 부분으로 구성된다: - 첫번째는 부속 노드(incident node)들의 representation을 얻는 부분 - 두번째는 부속 노드의 representation들로부터 에지 점수를 계산하는 부분 첫번째 부분은 :ref:`노드 분류` 와 완전히 동일하기에, 단순하게 이를 재사용할 수 있다. 입력 DGL에서 제공하는 데이터 로더가 만들어 낸 MFG들의 리스트와 입력 피쳐들이 된다. .. code:: python class StochasticTwoLayerGCN(nn.Module): def __init__(self, in_features, hidden_features, out_features): super().__init__() self.conv1 = dglnn.GraphConv(in_features, hidden_features) self.conv2 = dglnn.GraphConv(hidden_features, out_features) def forward(self, blocks, x): x = F.relu(self.conv1(blocks[0], x)) x = F.relu(self.conv2(blocks[1], x)) return x 두번째 부분에 대한 입력은 보통은 이전 부분의 출력과 미니배치의 에지들에 의해서 유도된 원본 그래프의 서브 그래프가 된다. 서브 그래프는 같은 데이터 로더에서 리턴된다. :meth:`dgl.DGLGraph.apply_edges` 를 사용해서 에지 서브 그래프를 사용해서 에지들의 점수를 계산한다. 다음 코드는 부속 노드 피처들을 연결하고, 이를 dense 레이어에 입력해서 얻은 결과로 에지들의 점수를 예측하는 예를 보여준다. .. code:: python class ScorePredictor(nn.Module): def __init__(self, num_classes, in_features): super().__init__() self.W = nn.Linear(2 * in_features, num_classes) def apply_edges(self, edges): data = torch.cat([edges.src['x'], edges.dst['x']], 1) return {'score': self.W(data)} def forward(self, edge_subgraph, x): with edge_subgraph.local_scope(): edge_subgraph.ndata['x'] = x edge_subgraph.apply_edges(self.apply_edges) return edge_subgraph.edata['score'] 전체 모델은 아래와 같이 데이터 로더로부터 얻은 MFG들의 리스트와 에지 서브 그래프, 그리고 입력 노드 피쳐들을 사용한다. .. code:: python class Model(nn.Module): def __init__(self, in_features, hidden_features, out_features, num_classes): super().__init__() self.gcn = StochasticTwoLayerGCN( in_features, hidden_features, out_features) self.predictor = ScorePredictor(num_classes, out_features) def forward(self, edge_subgraph, blocks, x): x = self.gcn(blocks, x) return self.predictor(edge_subgraph, x) DGL에서는 에지 서브 그래프의 노드들이 MFG들의 리스트에서 마지막 MFG의 출력 노드들과 동일하도록 확인한다. 학습 룹 ~~~~~ 학습 룹은 노드 분류의 학습 룹과 비슷하다. 데이터 로더를 iterate해서, 미니배치의 에지들에 의해서 유도된 서브 그래프와 에지들의 부속 노드(incident node)들의 representation들을 계산하기 위한 MFG들의 목록을 얻는다. .. code:: python model = Model(in_features, hidden_features, out_features, num_classes) model = model.cuda() opt = torch.optim.Adam(model.parameters()) for input_nodes, edge_subgraph, blocks in dataloader: blocks = [b.to(torch.device('cuda')) for b in blocks] edge_subgraph = edge_subgraph.to(torch.device('cuda')) input_features = blocks[0].srcdata['features'] edge_labels = edge_subgraph.edata['labels'] edge_predictions = model(edge_subgraph, blocks, input_features) loss = compute_loss(edge_labels, edge_predictions) opt.zero_grad() loss.backward() opt.step() Heterogeneous 그래프의 경우 ~~~~~~~~~~~~~~~~~~~~~~~~ Heterogeneous 그래프들의 노드 representation들을 계산하는 모델은 에지 분류/리그레션을 위한 부속 노드 representation들을 구하는데 사용될 수 있다. .. code:: python class StochasticTwoLayerRGCN(nn.Module): def __init__(self, in_feat, hidden_feat, out_feat, rel_names): super().__init__() self.conv1 = dglnn.HeteroGraphConv({ rel : dglnn.GraphConv(in_feat, hidden_feat, norm='right') for rel in rel_names }) self.conv2 = dglnn.HeteroGraphConv({ rel : dglnn.GraphConv(hidden_feat, out_feat, norm='right') for rel in rel_names }) def forward(self, blocks, x): x = self.conv1(blocks[0], x) x = self.conv2(blocks[1], x) return x 점수를 예측하기 위한 homogeneous 그래프와 heterogeneous 그래프간의 유일한 구현상의 차이점은 :meth:`~dgl.DGLGraph.apply_edges` 를 호출할 때 에지 타입들을 사용한다는 점이다. .. code:: python class ScorePredictor(nn.Module): def __init__(self, num_classes, in_features): super().__init__() self.W = nn.Linear(2 * in_features, num_classes) def apply_edges(self, edges): data = torch.cat([edges.src['x'], edges.dst['x']], 1) return {'score': self.W(data)} def forward(self, edge_subgraph, x): with edge_subgraph.local_scope(): edge_subgraph.ndata['x'] = x for etype in edge_subgraph.canonical_etypes: edge_subgraph.apply_edges(self.apply_edges, etype=etype) return edge_subgraph.edata['score'] class Model(nn.Module): def __init__(self, in_features, hidden_features, out_features, num_classes, etypes): super().__init__() self.rgcn = StochasticTwoLayerRGCN( in_features, hidden_features, out_features, etypes) self.pred = ScorePredictor(num_classes, out_features) def forward(self, edge_subgraph, blocks, x): x = self.rgcn(blocks, x) return self.pred(edge_subgraph, x) 데이터 로더 구현도 노드 분류을 위한 것과 아주 비슷하다. 유일한 차이점은 :class:`~dgl.dataloading.pytorch.NodeDataLoader` 대신에 :class:`~dgl.dataloading.pytorch.EdgeDataLoader` 를 사용하고, 노드 타입과 노드 ID 텐서들의 사전 대신에 에지 타입과 에지 ID 텐서들의 사전을 사용한다는 것이다. .. code:: python sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2) dataloader = dgl.dataloading.EdgeDataLoader( g, train_eid_dict, sampler, batch_size=1024, shuffle=True, drop_last=False, num_workers=4) 만약 heterogeneous 그래프에서 역방향의 에지를 배제하고자 한다면 약간 달라진다. Heterogeneous 그래프에서 역방향 에지들은 에지와는 다른 에지 타입을 갖는 것이 보통이다. 이는 “forward”와 “backward” 관계들을 구분직기 위해서이다. (즉, ``follow`` 와 ``followed by`` 는 서로 역 관계이고, ``purchase`` 와 ``purchased by`` 는 서로 역 관계인 것 처럼) 만약 어떤 타입의 에지들이 다른 타입의 같은 ID를 갖는 역방향 에지를 갖는다면, 에지 타입들과 그것들의 반대 타입간의 매핑을 명시할 수 있다. 미니배치에서 에지들과 그것들의 역방향 에지를 배제하는 것은 다음과 같다. .. code:: python dataloader = dgl.dataloading.EdgeDataLoader( g, train_eid_dict, sampler, # The following two arguments are specifically for excluding the minibatch # edges and their reverse edges from the original graph for neighborhood # sampling. exclude='reverse_types', reverse_etypes={'follow': 'followed by', 'followed by': 'follow', 'purchase': 'purchased by', 'purchased by': 'purchase'} batch_size=1024, shuffle=True, drop_last=False, num_workers=4) 학습 룹은 ``compute_loss`` 의 구현이 노드 타입들과 예측 값에 대한 두 사전들을 인자로 받는다는 점을 제외하면, homogeneous 그래프의 학습 룹 구현과 거의 같다. .. code:: python model = Model(in_features, hidden_features, out_features, num_classes, etypes) model = model.cuda() opt = torch.optim.Adam(model.parameters()) for input_nodes, edge_subgraph, blocks in dataloader: blocks = [b.to(torch.device('cuda')) for b in blocks] edge_subgraph = edge_subgraph.to(torch.device('cuda')) input_features = blocks[0].srcdata['features'] edge_labels = edge_subgraph.edata['labels'] edge_predictions = model(edge_subgraph, blocks, input_features) loss = compute_loss(edge_labels, edge_predictions) opt.zero_grad() loss.backward() opt.step() `GCMC `__ 은 이분 그래프(bipartite graph)에 대한 에지 분류 예제이다. ================================================ FILE: docs/source/guide_ko/minibatch-gpu-sampling.rst ================================================ .. _guide_ko-minibatch-gpu-sampling: 6.7 이웃 샘플링에 GPU 사용하기 ------------------------ :ref:`(English Version) ` DGL 0.7부터 GPU 기반의 이웃 샘플링을 지원하는데, 이는 CPU 기반의 이웃 샘플링에 비해서 상당한 속도 향상을 가져다 준다. 만약 다루는 그래프와 피쳐들이 GPU에 들어갈 수 있는 크기이고, 모델이 너무 많은 GPU 메모리를 차지하지 않는다면, GPU 메모리에 올려서 GPU 기반의 이웃 샘플링을 하는 것이 최선의 방법이다. 예를 들어, `OGB Products `__ 는 2.4M 노드들과 61M 에지들을 갖고, 각 노드는 100 차원의 피쳐를 갖는다. 노트 피쳐들을 모두 합해서 1GB 미만의 메모리를 차지하고, 그래프는 약 1GB 보다 적은 메모리를 사용한다. 그래프의 메모리 요구량은 에지의 개수에 관련이 있다. 따라서, 전체 그래프를 GPU에 로딩하는 것이 가능하다. .. note:: 이 기능은 실험적인 것으로 개발이 진행 중이다. 추가 업데이트를 지켜보자. DGL 데이터 로더에서 GPU 기반의 이웃 샘플링 사용하기 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DGL 데이터 로더에서 GPU 기반의 이웃 샘플링은 다음 방법으로 사용할 수 있다. * 그래프를 GPU에 넣기 * ``num_workers`` 인자를 0으로 설정하기. CUDA는 같은 context를 사용하는 멀티 프로세스를 지원하지 않기 때문이다. * ``device`` 인자를 GPU 디바이스로 설정하기 :class:`~dgl.dataloading.pytorch.NodeDataLoader` 의 다른 모든 인자들은 다른 가이드와 튜토리얼에서 사용한 것돠 같다. .. code:: python g = g.to('cuda:0') dataloader = dgl.dataloading.NodeDataLoader( g, # The graph must be on GPU. train_nid, sampler, device=torch.device('cuda:0'), # The device argument must be GPU. num_workers=0, # Number of workers must be 0. batch_size=1000, drop_last=False, shuffle=True) GPU 기반의 이웃 샘플링은 커스텀 이웃 샘플러가 두가지 조건을 충족하면 동작한다. (1) 커스텀 샘플러가 :class:`~dgl.dataloading.BlockSampler` 의 서브 클래스이고, (2) 샘플러가 GPU에서 완전하게 동작한다. .. note:: 현재는 :class:`~dgl.dataloading.pytorch.EdgeDataLoader` 와 heterogeneous 그래프는 지원하지 않는다. GPU 기반의 이웃 샘플러를 DGL 함수와 함께 사용하기 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 다음 함수들은 GPU에서 작동을 지원한다. * :func:`dgl.sampling.sample_neighbors` * 균일 샘플링(uniform sampling)만 지원함. non-uniform샘플링은 CPU에서만 동작함. 위 함수들 이외의 GPU에서 동작하는 함수들은 :func:`dgl.to_block` 를 참고하자. ================================================ FILE: docs/source/guide_ko/minibatch-inference.rst ================================================ .. _guide_ko-minibatch-inference: 6.6 큰 그래프들에 대핸 정확한 오프라인 추론 --------------------------------- :ref:`(English Version) ` GPU를 사용해서 GNN을 학습하는데 메모리와 걸리는 시간을 줄이기 위해서 서브 샘플링과 이웃 샘플링이 모두 사용된다. 추론을 수행할 때 보통은 샘플링으로 발생할 수 있는 임의성을 제거하기 위해서 전체 이웃들에 대해서 aggretate하는 것이 더 좋다. 하지만, GPU 메모리 제약이나, CPU의 느린 속도 때문에 전체 그래프에 대한 forward propagagtion을 수행하는 것은 쉽지 않다. 이 절은 미니배치와 이웃 샘플링을 통해서 제한적인 GPU를 사용한 전체 그래프 forward propagation의 방법을 소개한다. 추론 알고리즘은 학습 알고리즘과는 다른데, 추론 알고리즘은 첫번째 레이어부터 시작해서 각 레이이별로 모든 노드의 representation들을 계산해야하기 때문이다. 특히, 특정 레이어의 경우에 우리는 미니배치의 모든 노드들에 대해서 이 레이어의 출력 representation을 계산해야한다. 그 결과, 추론 알고리즘은 모든 레이어들 iterate하는 outer 룹과 노들들의 미니배치를 iterate하는 inner 룹을 갖는다. 반면, 학습 알고리즘은 노드들의 미니배치를 iterate하는 outer 룹과, 이웃 샘플링과 메시지 전달을 위한 레이어들을 iterate하는 inner 룹을 갖는다. 아래 애니매이션은 이 연산이 어떻게 일어나는지를 보여주고 있다 (각 레이어에 대해서 첫 3개의 미니배치만 표현되고 있음을 주의하자) .. figure:: https://data.dgl.ai/asset/image/guide_6_6_0.gif :alt: Imgur 오프라인 추론 구현하기 ~~~~~~~~~~~~~~~~ 6.1 :ref:`guide_ko-minibatch-node-classification-model` 에서 다룬 2-레이어 GCN을 생각해 보자. 오프라인 추론을 구현하는 방법은 여전히 :class:`~dgl.dataloading.neighbor.MultiLayerFullNeighborSampler` 를 사용하지만, 한번에 하나의 레이어에 대한 샘플링을 수행한다. 하나의 레이어에 대한 계산은 메시지들어 어떻게 aggregate되고 합쳐지는지에 의존하기 때문에 오프라인 추론은 GNN 모듈의 메소드로 구현된다는 점을 주목하자. .. code:: python class StochasticTwoLayerGCN(nn.Module): def __init__(self, in_features, hidden_features, out_features): super().__init__() self.hidden_features = hidden_features self.out_features = out_features self.conv1 = dgl.nn.GraphConv(in_features, hidden_features) self.conv2 = dgl.nn.GraphConv(hidden_features, out_features) self.n_layers = 2 def forward(self, blocks, x): x_dst = x[:blocks[0].number_of_dst_nodes()] x = F.relu(self.conv1(blocks[0], (x, x_dst))) x_dst = x[:blocks[1].number_of_dst_nodes()] x = F.relu(self.conv2(blocks[1], (x, x_dst))) return x def inference(self, g, x, batch_size, device): """ Offline inference with this module """ # Compute representations layer by layer for l, layer in enumerate([self.conv1, self.conv2]): y = torch.zeros(g.num_nodes(), self.hidden_features if l != self.n_layers - 1 else self.out_features) sampler = dgl.dataloading.MultiLayerFullNeighborSampler(1) dataloader = dgl.dataloading.NodeDataLoader( g, torch.arange(g.num_nodes()), sampler, batch_size=batch_size, shuffle=True, drop_last=False) # Within a layer, iterate over nodes in batches for input_nodes, output_nodes, blocks in dataloader: block = blocks[0] # Copy the features of necessary input nodes to GPU h = x[input_nodes].to(device) # Compute output. Note that this computation is the same # but only for a single layer. h_dst = h[:block.number_of_dst_nodes()] h = F.relu(layer(block, (h, h_dst))) # Copy to output back to CPU. y[output_nodes] = h.cpu() x = y return y 모델 선택을 위해서 검증 데이터셋에 평가 metric을 계산하는 목적으로 정확한 오프라인 추론을 계산할 필요가 없다는 점을 주목하자. 모든 레이어에 대해서 모든 노드들의 representation을 계산하는 것이 필요한데, 이것은 레이블이 없는 데이터가 많은 semi-supervised 영역에서는 아주 많은 리소스를 필요로하기 때문이다. 이웃 샘플링은 모델 선택 및 평가 목적으로는 충분하다. 오프라인 추론의 예들로 `GraphSAGE `__ 및 `RGCN `__ 를 참고하자. ================================================ FILE: docs/source/guide_ko/minibatch-link.rst ================================================ .. _guide_ko-minibatch-link-classification-sampler: 6.3 이웃 샘플링을 사용한 링크 예측 GNN 모델 학습하기 ----------------------------------------- :ref:`(English Version) ` Negative 샘플링을 사용한 이웃 샘플러 및 데이터 로더 정의하기 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 노드/에지 분류에서 사용한 이웃 샘플러를 그대로 사용하는 것이 가능하다. .. code:: python sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2) DGL의 :class:`~dgl.dataloading.pytorch.EdgeDataLoader` 는 링크 예측를 위한 negative 샘플 생성을 지원한다. 이를 사용하기 위해서는, negative 샘플링 함수를 제공해야한다. :class:`~dgl.dataloading.negative_sampler.Uniform` 은 uniform 샘플링을 해주는 함수이다. 에지의 각 소스 노드에 대해서,이 함수는 ``k`` 개의 negative 목적지 노드들을 샘플링한다. 아래 코드는 에지의 각 소스 노드에 대해서 5개의 negative 목적지 노드를 균등하게 선택한다. .. code:: python dataloader = dgl.dataloading.EdgeDataLoader( g, train_seeds, sampler, negative_sampler=dgl.dataloading.negative_sampler.Uniform(5), batch_size=args.batch_size, shuffle=True, drop_last=False, pin_memory=True, num_workers=args.num_workers) 빌드인 negative 샘플러들은 :ref:`api-dataloading-negative-sampling` 에서 확인하자. 직접 만든 negative 샘플러 함수를 사용할 수도 있다. 이 함수는 원본 그래프 ``g`` 와, 미니배치 에지 ID 배열 ``eid`` 를 받아서 소스 ID 배열과 목적지 ID 배열의 쌍을 리턴해야 한다. 아래 코드 예제는 degree의 거듭제곱에 비례하는 확률 분포에 따라서 negative 목적지 노드들을 샘플링하는 custom negative 샘플러다. .. code:: python class NegativeSampler(object): def __init__(self, g, k): # caches the probability distribution self.weights = g.in_degrees().float() ** 0.75 self.k = k def __call__(self, g, eids): src, _ = g.find_edges(eids) src = src.repeat_interleave(self.k) dst = self.weights.multinomial(len(src), replacement=True) return src, dst dataloader = dgl.dataloading.EdgeDataLoader( g, train_seeds, sampler, negative_sampler=NegativeSampler(g, 5), batch_size=args.batch_size, shuffle=True, drop_last=False, pin_memory=True, num_workers=args.num_workers) 모델을 미니-배치 학습에 맞게 만들기 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ :ref:`guide_ko-training-link-prediction` 에서 설명한 것처럼, 링크 예측은 (positive 예제인) 에지의 점수와 존재하지 않는 에지(즉, negative 예제)의 점수를 비교하는 것을 통해서 학습될 수 있다. 에지들의 점수를 계산하기 위해서, 에지 분류/리그레션에서 사용했던 노드 representation 계산 모델을 재사용한다. .. code:: python class StochasticTwoLayerGCN(nn.Module): def __init__(self, in_features, hidden_features, out_features): super().__init__() self.conv1 = dgl.nn.GraphConv(in_features, hidden_features) self.conv2 = dgl.nn.GraphConv(hidden_features, out_features) def forward(self, blocks, x): x = F.relu(self.conv1(blocks[0], x)) x = F.relu(self.conv2(blocks[1], x)) return x 점수 예측을 위해서 확률 분포 대신 각 에지의 scalar 점수를 예측하기만 하면되기 때문에, 이 예제는 부속 노드 representation들의 dot product로 점수를 계산하는 방법을 사용한다. .. code:: python class ScorePredictor(nn.Module): def forward(self, edge_subgraph, x): with edge_subgraph.local_scope(): edge_subgraph.ndata['x'] = x edge_subgraph.apply_edges(dgl.function.u_dot_v('x', 'x', 'score')) return edge_subgraph.edata['score'] Negative 샘플러가 지정되면, DGL의 데이터 로더는 미니배치 마다 다음 3가지 아이템들을 만들어낸다. - 샘플된 미니배치에 있는 모든 에지를 포함한 postive 그래프 - Negative 샘플러가 생성한 존재하지 않는 에지 모두를 포함한 negative 그래프 - 이웃 샘플러가 생성한 *message flow graph* (MFG)들의 리스트 이제 3가지 아이템와 입력 피쳐들을 받는 링크 예측 모델을 다음과 같이 정의할 수 있다. .. code:: python class Model(nn.Module): def __init__(self, in_features, hidden_features, out_features): super().__init__() self.gcn = StochasticTwoLayerGCN( in_features, hidden_features, out_features) def forward(self, positive_graph, negative_graph, blocks, x): x = self.gcn(blocks, x) pos_score = self.predictor(positive_graph, x) neg_score = self.predictor(negative_graph, x) return pos_score, neg_score 학습 룹 ~~~~~ 학습 룹은 데이터 로더를 iterate하고, 그래프들과 입력 피쳐들을 위해서 정의한 모델에 입력하는 것일 뿐이다. .. code:: python def compute_loss(pos_score, neg_score): # an example hinge loss n = pos_score.shape[0] return (neg_score.view(n, -1) - pos_score.view(n, -1) + 1).clamp(min=0).mean() model = Model(in_features, hidden_features, out_features) model = model.cuda() opt = torch.optim.Adam(model.parameters()) for input_nodes, positive_graph, negative_graph, blocks in dataloader: blocks = [b.to(torch.device('cuda')) for b in blocks] positive_graph = positive_graph.to(torch.device('cuda')) negative_graph = negative_graph.to(torch.device('cuda')) input_features = blocks[0].srcdata['features'] pos_score, neg_score = model(positive_graph, negative_graph, blocks, input_features) loss = compute_loss(pos_score, neg_score) opt.zero_grad() loss.backward() opt.step() DGL에서는 homogeneous 그래프들에 대한 링크 예측의 예제로 `unsupervised learning GraphSAGE `__ 를 제공한다. Heterogeneous 그래프의 경우 ~~~~~~~~~~~~~~~~~~~~~~~~ Heterogeneous 그래프들의 노드 representation들을 계산하는 모델은 에지 분류/리그레션을 위한 부속 노드 representation들을 구하는데 사용될 수 있다. .. code:: python class StochasticTwoLayerRGCN(nn.Module): def __init__(self, in_feat, hidden_feat, out_feat, rel_names): super().__init__() self.conv1 = dglnn.HeteroGraphConv({ rel : dglnn.GraphConv(in_feat, hidden_feat, norm='right') for rel in rel_names }) self.conv2 = dglnn.HeteroGraphConv({ rel : dglnn.GraphConv(hidden_feat, out_feat, norm='right') for rel in rel_names }) def forward(self, blocks, x): x = self.conv1(blocks[0], x) x = self.conv2(blocks[1], x) return x 점수를 예측하기 위한 homogeneous 그래프와 heterogeneous 그래프간의 유일한 구현상의 차이점은 :meth:`dgl.DGLGraph.apply_edges` 를 호출할 때 에지 타입들을 사용한다는 점이다. .. code:: python class ScorePredictor(nn.Module): def forward(self, edge_subgraph, x): with edge_subgraph.local_scope(): edge_subgraph.ndata['x'] = x for etype in edge_subgraph.canonical_etypes: edge_subgraph.apply_edges( dgl.function.u_dot_v('x', 'x', 'score'), etype=etype) return edge_subgraph.edata['score'] class Model(nn.Module): def __init__(self, in_features, hidden_features, out_features, num_classes, etypes): super().__init__() self.rgcn = StochasticTwoLayerRGCN( in_features, hidden_features, out_features, etypes) self.pred = ScorePredictor() def forward(self, positive_graph, negative_graph, blocks, x): x = self.rgcn(blocks, x) pos_score = self.pred(positive_graph, x) neg_score = self.pred(negative_graph, x) return pos_score, neg_score 데이터 로더 구현도 노드 분류을 위한 것과 아주 비슷하다. 유일한 차이점은 negative 샘플러를 사용하며, 노드 타입과 노드 ID 텐서들의 사전 대신에 에지 타입과 에지 ID 텐서들의 사전을 사용한다는 것이다. .. code:: python sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2) dataloader = dgl.dataloading.EdgeDataLoader( g, train_eid_dict, sampler, negative_sampler=dgl.dataloading.negative_sampler.Uniform(5), batch_size=1024, shuffle=True, drop_last=False, num_workers=4) 만약 직접 만든 negative 샘플링 함수를 사용하기를 원한다면, 그 함수는 원본 그래프, 에지 타입과 에지 ID 텐서들의 dictionary를 인자로 받아야하고, 에지 타입들과 소스-목적지 배열 쌍의 dictionary를 리턴해야한다. 다음은 예제 함수이다. .. code:: python class NegativeSampler(object): def __init__(self, g, k): # caches the probability distribution self.weights = { etype: g.in_degrees(etype=etype).float() ** 0.75 for etype in g.canonical_etypes} self.k = k def __call__(self, g, eids_dict): result_dict = {} for etype, eids in eids_dict.items(): src, _ = g.find_edges(eids, etype=etype) src = src.repeat_interleave(self.k) dst = self.weights[etype].multinomial(len(src), replacement=True) result_dict[etype] = (src, dst) return result_dict 다음으로는 에지 타입들와 에지 ID들의 dictionary와 negative 샘플러를 데이터 로더에 전달한다. 예를 들면, 아래 코드는 heterogeneous 그래프의 모든 에지들을 iterate하는 예이다. .. code:: python train_eid_dict = { etype: g.edges(etype=etype, form='eid') for etype in g.canonical_etypes} dataloader = dgl.dataloading.EdgeDataLoader( g, train_eid_dict, sampler, negative_sampler=NegativeSampler(g, 5), batch_size=1024, shuffle=True, drop_last=False, num_workers=4) 학습 룹은 ``compute_loss`` 의 구현이 노드 타입들과 예측 값에 대한 두 사전들을 인자로 받는다는 점을 제외하면, homogeneous 그래프의 학습 룹 구현과 거의 같다. .. code:: python model = Model(in_features, hidden_features, out_features, num_classes, etypes) model = model.cuda() opt = torch.optim.Adam(model.parameters()) for input_nodes, positive_graph, negative_graph, blocks in dataloader: blocks = [b.to(torch.device('cuda')) for b in blocks] positive_graph = positive_graph.to(torch.device('cuda')) negative_graph = negative_graph.to(torch.device('cuda')) input_features = blocks[0].srcdata['features'] pos_score, neg_score = model(positive_graph, negative_graph, blocks, input_features) loss = compute_loss(pos_score, neg_score) opt.zero_grad() loss.backward() opt.step() ================================================ FILE: docs/source/guide_ko/minibatch-nn.rst ================================================ .. _guide_ko-minibatch-custom-gnn-module: 6.5 미니-배치 학습을 위한 커스텀 GNN 모듈 구현하기 ---------------------------------------- :ref:`(English Version) ` Homogeneous 그래프나 heterogeneous 그래프를 대상으로 전체 그래프를 업데이트하는 커스텀 GNN 모듈을 만드는 것에 익숙하다면, MFG에 대한 연산을 구현하는 코드도 비슷하다는 것을 알 수 있다. 차이점은 노드들이 입력 노드와 출력 노드로 나뉜다는 것 뿐이다. 커스텀 graph convolution 모듈을 예로 들자. 이 코드는 단지 커스텀 GNN 모듈이 어떻게 동작하는지 보여주기 위함이지, 가장 효율적인 구현이 아님을 주의하자. .. code:: python class CustomGraphConv(nn.Module): def __init__(self, in_feats, out_feats): super().__init__() self.W = nn.Linear(in_feats * 2, out_feats) def forward(self, g, h): with g.local_scope(): g.ndata['h'] = h g.update_all(fn.copy_u('h', 'm'), fn.mean('m', 'h_neigh')) return self.W(torch.cat([g.ndata['h'], g.ndata['h_neigh']], 1)) 전체 그래프에 대한 커스텀 메시지 전달 NN 모듈이 있고, 이를 MFG에서 작동하도록 만들고 싶다면, 다음과 같이 forward 함수를 다시 작성하는 것만이 필요하다. 전체 그래프에 대한 구현은 주석 처리를 했으니, 새로운 코드들과 비교해 보자. .. code:: python class CustomGraphConv(nn.Module): def __init__(self, in_feats, out_feats): super().__init__() self.W = nn.Linear(in_feats * 2, out_feats) # h is now a pair of feature tensors for input and output nodes, instead of # a single feature tensor. # def forward(self, g, h): def forward(self, block, h): # with g.local_scope(): with block.local_scope(): # g.ndata['h'] = h h_src = h h_dst = h[:block.number_of_dst_nodes()] block.srcdata['h'] = h_src block.dstdata['h'] = h_dst # g.update_all(fn.copy_u('h', 'm'), fn.mean('m', 'h_neigh')) block.update_all(fn.copy_u('h', 'm'), fn.mean('m', 'h_neigh')) # return self.W(torch.cat([g.ndata['h'], g.ndata['h_neigh']], 1)) return self.W(torch.cat( [block.dstdata['h'], block.dstdata['h_neigh']], 1)) 일반적으로, 직접 구현한 NN 모듈이 MFG에서 동작하게 만들기 위해서는 다음과 같은 것을 해야한다. - 첫 몇 행들(row)을 잘라서 입력 피쳐들로부터 출력 노드의 피처를 얻는다. 행의 개수는 :meth:`block.number_of_dst_nodes ` 로 얻는다. - 원본 그래프가 한 하나의 노드 타입을 갖는 경우, :attr:`g.ndata ` 를 입력 노드의 피쳐의 경우 :attr:`block.srcdata ` 로 또는 출력 노드의 피쳐의 경우 :attr:`block.dstdata ` 로 교체한다. - 원본 그래프가 여러 종류의 노드 타입을 갖는 경우, :attr:`g.nodes ` 를 입력 노드의 피쳐의 경우 :attr:`block.srcnodes ` 로 또는 출력 노드의 피처의 경우 :attr:`block.dstnodes ` 로 교체한다. - :meth:`g.num_nodes ` 를 입력 노드의 개수는 :meth:`block.number_of_src_nodes ` 로 출력 노드의 개수는 :meth:`block.number_of_dst_nodes ` 로 각각 교체한다. Heterogeneous 그래프들 ~~~~~~~~~~~~~~~~~~~~ Heterogeneous 그래프의 경우도 커스텀 GNN 모듈을 만드는 것은 비슷하다. 예를 들어, 전체 그래프에 적용되는 다음 모듈을 예로 들어보자. .. code:: python class CustomHeteroGraphConv(nn.Module): def __init__(self, g, in_feats, out_feats): super().__init__() self.Ws = nn.ModuleDict() for etype in g.canonical_etypes: utype, _, vtype = etype self.Ws[etype] = nn.Linear(in_feats[utype], out_feats[vtype]) for ntype in g.ntypes: self.Vs[ntype] = nn.Linear(in_feats[ntype], out_feats[ntype]) def forward(self, g, h): with g.local_scope(): for ntype in g.ntypes: g.nodes[ntype].data['h_dst'] = self.Vs[ntype](h[ntype]) g.nodes[ntype].data['h_src'] = h[ntype] for etype in g.canonical_etypes: utype, _, vtype = etype g.update_all( fn.copy_u('h_src', 'm'), fn.mean('m', 'h_neigh'), etype=etype) g.nodes[vtype].data['h_dst'] = g.nodes[vtype].data['h_dst'] + \ self.Ws[etype](g.nodes[vtype].data['h_neigh']) return {ntype: g.nodes[ntype].data['h_dst'] for ntype in g.ntypes} ``CustomHeteroGraphConv`` 에서의 원칙은 ``g.nodes`` 를 대상 피쳐가 입력 노드의 것인지 출력 노드의 것인지에 따라서 ``g.srcnodes`` 또는 ``g.dstnodes`` 바꾸는 것이다. .. code:: python class CustomHeteroGraphConv(nn.Module): def __init__(self, g, in_feats, out_feats): super().__init__() self.Ws = nn.ModuleDict() for etype in g.canonical_etypes: utype, _, vtype = etype self.Ws[etype] = nn.Linear(in_feats[utype], out_feats[vtype]) for ntype in g.ntypes: self.Vs[ntype] = nn.Linear(in_feats[ntype], out_feats[ntype]) def forward(self, g, h): with g.local_scope(): for ntype in g.ntypes: h_src, h_dst = h[ntype] g.dstnodes[ntype].data['h_dst'] = self.Vs[ntype](h[ntype]) g.srcnodes[ntype].data['h_src'] = h[ntype] for etype in g.canonical_etypes: utype, _, vtype = etype g.update_all( fn.copy_u('h_src', 'm'), fn.mean('m', 'h_neigh'), etype=etype) g.dstnodes[vtype].data['h_dst'] = \ g.dstnodes[vtype].data['h_dst'] + \ self.Ws[etype](g.dstnodes[vtype].data['h_neigh']) return {ntype: g.dstnodes[ntype].data['h_dst'] for ntype in g.ntypes} Homogeneous 그래프, 이분 그래프(bipartite graph), 그리고 MFG를 위한 모듈 작성하기 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ DGL의 모든 메시지 전달 모듈들은 homogeneous 그래프, 단방향 이분 그래프 (unidirectional bipartite graphs, 두개 노드 타입을 갖고, 하나의 에지 타입을 갖음), 그리고 하나의 에지 타입을 갖는 MFG에서 동작한다. 기본적으로 DGL 빌트인 뉴럴 네트워크 모듈의 입력 그래프와 피쳐는 아래 경우들 중에 하나를 만족해야 한다. - 입력 피쳐가 텐서들의 쌍인 경우, 입력 그래프는 단방향 이분(unidirectional bipartite) 그래프이어야 한다. - 입력 피쳐가 단일 텐서이고 입력 그래프가 MFG인 경우, DGL은 자동으로 출력 노드의 피쳐를 입력 노드 피처의 첫 몇개의 행으로 정의한다. - 입력 피쳐가 단일 텐서이고 입력 그래프가 MGF가 아닌 경우, 입력 그래프는 반드시 homogeneous여야 한다. 다음 코드는 :class:`dgl.nn.pytorch.SAGEConv` 을 PyTorch로 단순하게 구현한 것이다. (MXNet이나 TensorFlow 버전도 제공함. (이 코드는 normalization이 제거되어 있고, mean aggregation만 사용한다.) .. code:: python import dgl.function as fn class SAGEConv(nn.Module): def __init__(self, in_feats, out_feats): super().__init__() self.W = nn.Linear(in_feats * 2, out_feats) def forward(self, g, h): if isinstance(h, tuple): h_src, h_dst = h elif g.is_block: h_src = h h_dst = h[:g.number_of_dst_nodes()] else: h_src = h_dst = h g.srcdata['h'] = h_src g.dstdata['h'] = h_dst g.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h_neigh')) return F.relu( self.W(torch.cat([g.dstdata['h'], g.dstdata['h_neigh']], 1))) :ref:`guide_ko-nn` 은 단방향 이분 그래프, homogeneous 그래프와 MFG에 적용되는 :class:`dgl.nn.pytorch.SAGEConv` 를 자세히 다루고 있다. ================================================ FILE: docs/source/guide_ko/minibatch-node.rst ================================================ .. _guide_ko-minibatch-node-classification-sampler: 6.1 이웃 샘플링을 사용한 노드 분류 GNN 모델 학습하기 ----------------------------------------- :ref:`(English Version) ` Stochastic 학습이 되도록 모델을 만들기 위해서는, 다음과 같은 것이 필요하다. - 이웃 샘플러 정의하기 - 미니 배치 학습이 되도록 모델을 변경하기 - 학습 룹 고치기 이제, 이 단계를 어떻게 구현하는 하나씩 살펴보자. 이웃 샘플러 및 데이터 로더 정의하기 ~~~~~~~~~~~~~~~~~~~~~~~~~~~ DGL는 계산하기를 원하는 노드들에 대해서 각 레이어에서 필요한 computation dependency들을 생성하는 몇 가지 이웃 샘플러 클래스들을 가지고 있다. 가장 단순한 이웃 샘플러는 :class:`~dgl.dataloading.neighbor.MultiLayerFullNeighborSampler` 로, 노드가 그 노드의 모든 이웃들로부터 메시지를 수집하도록 해준다. DGL의 샘플러를 사용하기 위해서는 이를 미니배치에 있는 노드들의 집한은 iterate하는 :class:`~dgl.dataloading.pytorch.NodeDataLoader` 와 합쳐야한다. 다음 예제 코드는 배치들의 학습 노드 ID 배열 ``train_nids`` 를 iterate하고, 생성된 MFG(Message Flow Graph)들의 목록을 GPU로 옮기는 PyTorch DataLoader를 만든다. .. code:: python import dgl import dgl.nn as dglnn import torch import torch.nn as nn import torch.nn.functional as F sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2) dataloader = dgl.dataloading.NodeDataLoader( g, train_nids, sampler, batch_size=1024, shuffle=True, drop_last=False, num_workers=4) DataLoader를 iterate 하면서 각 레이어에 대한 computation dependency들을 대표하도록 특별하게 생성된 그래프들의 리스트를 얻을 수 있다. DGL에서 이것들을 *message flow graph* (MFG) 라고 부른다. .. code:: python input_nodes, output_nodes, blocks = next(iter(dataloader)) print(blocks) Iterator는 매번 세개의 아이템을 생성한다. ``input_nodes`` 는 ``output_nodes`` 의 representation을 계산하는데 필요한 노드들을 담고 있다. ``block`` 은 그것의 노드가 출력으로 계산되어야 할 각 GNN 레이어에 대해 어떤 노드 representation들이 입력으로 필요한지, 입력 노드들의 representation들이 출력 노드로 어떻게 전파되어야 하는지를 설명한다. .. note:: Message flow graph의 개념은 :doc:`Stochastic Training Tutorial ` 을 참고하자. 지원되는 빌드인 샘플러들의 전체 목록은 :ref:`neighborhood sampler API reference ` 에서 찾아볼 수 있다. :ref:`guide_ko-minibatch-customizing-neighborhood-sampler` 에는 여러분만의 이웃 샘플러 만드는 방법과 MFG 개념에 대한 보다 상세한 설명을 담고 있다. .. _guide_ko-minibatch-node-classification-model: 모델을 미니-배치 학습에 맞게 만들기 ~~~~~~~~~~~~~~~~~~~~~~~~~~~ 만약 DGL에서 제공하는 메시지 전달 모듈만을 사용하고 있다면, 모델을 미니-배치 학습에 맞도록 수정할 것은 적다. 멀티-레이어 GCN을 예로 들어보자. 그래프 전체에 대한 모델 구현은 아래와 같다. .. code:: python class TwoLayerGCN(nn.Module): def __init__(self, in_features, hidden_features, out_features): super().__init__() self.conv1 = dglnn.GraphConv(in_features, hidden_features) self.conv2 = dglnn.GraphConv(hidden_features, out_features) def forward(self, g, x): x = F.relu(self.conv1(g, x)) x = F.relu(self.conv2(g, x)) return x 이 때, 변경해야할 것은 ``g`` 를 앞에서 생성된 ``block`` 로 교체하는 것이 전부이다. .. code:: python class StochasticTwoLayerGCN(nn.Module): def __init__(self, in_features, hidden_features, out_features): super().__init__() self.conv1 = dgl.nn.GraphConv(in_features, hidden_features) self.conv2 = dgl.nn.GraphConv(hidden_features, out_features) def forward(self, blocks, x): x = F.relu(self.conv1(blocks[0], x)) x = F.relu(self.conv2(blocks[1], x)) return x 위 DGL ``GraphConv`` 모듈들은 데이터 로더가 생성한 ``block`` 의 원소를 argument로 받는다. :ref:`The API reference of each NN module ` 는 모듈이 MFG를 argument로 받을 수 있는지 없는지를 알려주고 있다. 만약 여러분 자신의 메시지 전달 모듈을 사용하고 싶다면, :ref:`guide_ko-minibatch-custom-gnn-module` 를 참고하자. 학습 룹 ~~~~~ 단순하게 학습 룹은 커스터마이징된 배치 iterator를 사용해서 데이터셋을 iterating하는 것으로 구성된다. MFG들의 리스트를 반환하는 매 iteration마다, 다음과 같은 일을 한다. 1. 입력 노드들의 노드 피처들을 GPU로 로딩한다. 노드 피쳐들은 메모리나 외부 저장소에 저장되어 있을 수 있다. 그래프 전체 학습에서 모든 노드들의 피처를 로드하는 것과는 다르게, 입력 노드들의 피처만 로드하면 된다는 점을 유의하자. 만약 피쳐들이 ``g.ndata`` 에 저장되어 있다면, 그 피쳐들은 ``blocks[0].srcdata`` 에 저장된 피쳐들, 즉 첫번째 MFG의 소스 노드들의 피처들을 접근해서 로드될 수 있다. 여기서 노드들은 최종 representation을 계산하는데 필요한 모든 노드들을 의미한다. 2. MFG들의 리스트 및 입력 노드 피쳐들을 멀티-레이어 GNN에 입력해서 결과를 얻는다. 3. 출력 노드에 해당하는 노드 레이블을 GPU에 로드한다. 비슷하게, 노드 레이블은 메모리나 외부 저장소에 저장되어 있을 수 있다. 역시, 그래프 전체 학습에서 모든 노드들의 레이블을 로드하는 것과는 다르게, 출력 노드들의 레이블만 로드한다는 점을 알아두자. 피처가 ``g.ndata`` 에 저장되어 있다면, 레이블은 ``blocks[-1].dstdata`` 의 피쳐들 즉, 마지막 MFG의 목적지 노드들의 피쳐들을 접근해서 로드될 수 있다. 이것들은 최종 representation을 계산할 노드들과 같다. 4. loss를 계산한 후, backpropagate를 수행한다. .. code:: python model = StochasticTwoLayerGCN(in_features, hidden_features, out_features) model = model.cuda() opt = torch.optim.Adam(model.parameters()) for input_nodes, output_nodes, blocks in dataloader: blocks = [b.to(torch.device('cuda')) for b in blocks] input_features = blocks[0].srcdata['features'] output_labels = blocks[-1].dstdata['label'] output_predictions = model(blocks, input_features) loss = compute_loss(output_labels, output_predictions) opt.zero_grad() loss.backward() opt.step() DGL에서는 end-to-end stochastic 학습 예제인 `GraphSAGE implementation `__ 를 제공한다. Heterogeneous 그래프의 경우 ~~~~~~~~~~~~~~~~~~~~~~~~ Heterogeneous 그래프에 대한 노드 분류 그래프 뉴럴 네트워크를 학습하는 것은 간단하다. :ref:`how to train a 2-layer RGCN on full graph ` 를 예로 들어보자. 미니-배치 학습을 하는 RGCN 구현 코드는 이 예제와 매우 비슷하다. (간단하게 하기 위해서 self-loop, non-linearity와 기본적인 decomposition은 제거했다.) .. code:: python class StochasticTwoLayerRGCN(nn.Module): def __init__(self, in_feat, hidden_feat, out_feat, rel_names): super().__init__() self.conv1 = dglnn.HeteroGraphConv({ rel : dglnn.GraphConv(in_feat, hidden_feat, norm='right') for rel in rel_names }) self.conv2 = dglnn.HeteroGraphConv({ rel : dglnn.GraphConv(hidden_feat, out_feat, norm='right') for rel in rel_names }) def forward(self, blocks, x): x = self.conv1(blocks[0], x) x = self.conv2(blocks[1], x) return x 또한, DGL이 제공하는 일부 샘플러들은 heterogeneous 그래프를 지원한다. 예를 들어, 제공되는 :class:`~dgl.dataloading.neighbor.MultiLayerFullNeighborSampler` 클래스 및 :class:`~dgl.dataloading.pytorch.NodeDataLoader` 클래스를 stochastic 학습에도 여전히 사용할 수 있다. 전체 이웃 샘플링에서 다른 점은 학습 셋에 노드 타입들과 노드 ID들의 사전을 명시해야한다는 것 뿐이다. .. code:: python sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2) dataloader = dgl.dataloading.NodeDataLoader( g, train_nid_dict, sampler, batch_size=1024, shuffle=True, drop_last=False, num_workers=4) 학습 룹은 homogeneous 그래프에 대한 학습 룹이랑 거의 유사하다. 다른 점은 ``compute_loss`` 의 구현에서 노드 타입들와 예측 결과라는 두개의 dictionary들을 인자로 받는다는 것이다. .. code:: python model = StochasticTwoLayerRGCN(in_features, hidden_features, out_features, etypes) model = model.cuda() opt = torch.optim.Adam(model.parameters()) for input_nodes, output_nodes, blocks in dataloader: blocks = [b.to(torch.device('cuda')) for b in blocks] input_features = blocks[0].srcdata # returns a dict output_labels = blocks[-1].dstdata # returns a dict output_predictions = model(blocks, input_features) loss = compute_loss(output_labels, output_predictions) opt.zero_grad() loss.backward() opt.step() End-to-end stochastic 학습 예제는 `RGCN implementation `__ 를 참고하자. ================================================ FILE: docs/source/guide_ko/minibatch.rst ================================================ .. _guide_ko-minibatch: 6장: 큰 그래프에 대한 stochastic 학습 =============================== :ref:`(English Version) ` 만약 수백만, 수십억개의 노드들 또는 에지들을 갖는 큰 그래프인 경우에는 :ref:`guide_ko-training` 에서 소개한 그래프 전체를 사용한 학습을 적용하기 어려울 것이다. Hidden state 크기가 :math:`H` 인 노드가 :math:`N` 개인 그래프에 :math:`L` -레이어의 graph convolutional network를 생각해보자. 중간 hidden 상태를 저장하는데 :math:`(NLH)` 메모리가 필요하고, :math:`N` 이 큰 경우 GPU 하나의 용량을 훨씬 넘을 것이다. 이 절에서 모든 노드들의 피쳐를 GPU에 올려야할 필요가 없는 stochastic 미니-배치 학습을 수행하는 법을 알아본다. 이웃 샘플링(Neighborhood Sampling) 방법 개요 --------------------------------------- 이웃 샘플링 방법은 일반적으로 다음과 같다. 각 gradient descent 단계마다, :math:`L-1` 레이어의 최종 representation을 계산되어야 할 노드들의 미니 배치를 선택한다. 그 다음으로 :math:`L-1` 레이어에서 그것들의 이웃 전체 또는 일부를 선택한다. 이 절차는 모델의 입력에 이를 때까지 반복된다. 이 반복 프로세스는 출력시작해서 거꾸로 입력까지의 의존성 그래프(dependency graph)를 생성하며, 이를 시각화하면 다음과 같다: .. figure:: https://data.dgl.ai/asset/image/guide_6_0_0.png :alt: Imgur 이를 사용하면, 큰 그래프에 대한 GNN 모델을 학습하는데 필요한 워크로드 및 연산 자원을 절약할 수 있다. DGL은 이웃 샘플링을 사용한 GNN 학습을 위한 몇 가지 이웃 샘플러들과 파이프라인을 제공한다. 또한, 샘플링 전략을 커스터마이징하는 방법도 지원한다. 로드맵 ---- 이 장은 GNN은 stochastical하게 학습하는 여러 시나리오들로 시작한다. * :ref:`guide_ko-minibatch-node-classification-sampler` * :ref:`guide_ko-minibatch-edge-classification-sampler` * :ref:`guide_ko-minibatch-link-classification-sampler` 이 후 절들에서는 새로운 샘플링 알고리즘들, 미니-배치 학습과 호환되는 새로운 GNN 모듈을 만들고자 하거나, 검증과 추론이 미니-배치에서 어떻게 수행되는지 이해하고 싶은 분들을 위한 보다 고급 토픽들을 다룬다. * :ref:`guide_ko-minibatch-customizing-neighborhood-sampler` * :ref:`guide_ko-minibatch-custom-gnn-module` * :ref:`guide_ko-minibatch-inference` 마지막으로 이웃 샘플링을 구현하고 사용하는데 대한 성능 팁을 알아본다. * :ref:`guide_ko-minibatch-gpu-sampling` .. toctree:: :maxdepth: 1 :hidden: :glob: minibatch-node minibatch-edge minibatch-link minibatch-custom-sampler minibatch-nn minibatch-inference minibatch-gpu-sampling ================================================ FILE: docs/source/guide_ko/mixed_precision.rst ================================================ .. _guide_ko-mixed_precision: 8장: Mixed Precision 학습 ======================= :ref:`(English Version) ` DGL은 mixed precision 학습을 위해서 `PyTorch's automatic mixed precision package `_ 와 호환된다. 따라서, 학습 시간 및 GPU 메모리 사용량을 절약할 수 있다. Half precision을 사용한 메시지 전달 ------------------------------ fp16을 지원하는 DGL은 UDF(User Defined Function)이나 빌트인 함수(예, ``dgl.function.sum``, ``dgl.function.copy_u``)를 사용해서 ``float16`` 피쳐에 대한 메시지 전달을 허용한다. 다음 예제는 DGL 메시지 전달 API를 half-precision 피쳐들에 사용하는 방법을 보여준다. >>> import torch >>> import dgl >>> import dgl.function as fn >>> g = dgl.rand_graph(30, 100).to(0) # Create a graph on GPU w/ 30 nodes and 100 edges. >>> g.ndata['h'] = torch.rand(30, 16).to(0).half() # Create fp16 node features. >>> g.edata['w'] = torch.rand(100, 1).to(0).half() # Create fp16 edge features. >>> # Use DGL's built-in functions for message passing on fp16 features. >>> g.update_all(fn.u_mul_e('h', 'w', 'm'), fn.sum('m', 'x')) >>> g.ndata['x'][0] tensor([0.3391, 0.2208, 0.7163, 0.6655, 0.7031, 0.5854, 0.9404, 0.7720, 0.6562, 0.4028, 0.6943, 0.5908, 0.9307, 0.5962, 0.7827, 0.5034], device='cuda:0', dtype=torch.float16) >>> g.apply_edges(fn.u_dot_v('h', 'x', 'hx')) >>> g.edata['hx'][0] tensor([5.4570], device='cuda:0', dtype=torch.float16) >>> # Use UDF(User Defined Functions) for message passing on fp16 features. >>> def message(edges): ... return {'m': edges.src['h'] * edges.data['w']} ... >>> def reduce(nodes): ... return {'y': torch.sum(nodes.mailbox['m'], 1)} ... >>> def dot(edges): ... return {'hy': (edges.src['h'] * edges.dst['y']).sum(-1, keepdims=True)} ... >>> g.update_all(message, reduce) >>> g.ndata['y'][0] tensor([0.3394, 0.2209, 0.7168, 0.6655, 0.7026, 0.5854, 0.9404, 0.7720, 0.6562, 0.4028, 0.6943, 0.5908, 0.9307, 0.5967, 0.7827, 0.5039], device='cuda:0', dtype=torch.float16) >>> g.apply_edges(dot) >>> g.edata['hy'][0] tensor([5.4609], device='cuda:0', dtype=torch.float16) End-to-End Mixed Precision 학습 ------------------------------ DGL은 PyTorch의 AMP package를 사용해서 mixed precision 학습을 구현하고 있어서, 사용 방법은 `PyTorch의 것 `_ 과 동일하다. GNN 모델의 forward 패스(loss 계산 포함)를 ``torch.cuda.amp.autocast()`` 로 래핑하면 PyTorch는 각 op 및 텐서에 대해서 적절한 데이터 타입을 자동으로 선택한다. Half precision 텐서는 메모리 효율적이고, half precision 텐서에 대한 대부분 연산들은 GPU tensorcore들을 활용하기 때문에 더 빠르다. ``float16`` 포멧의 작은 graident들은 언더플로우(underflow) 문제를 갖는데 (0이 되버림), PyTorch는 이를 해결하기 위해서 ``GradScaler`` 모듈을 제공한다. ``GradScaler`` 는 loss 값에 factor를 곱하고, 이 scaled loss에 backward pass를 수행한다. 그리고 파라메터들을 업데이트하는 optimizer를 수행하기 전에 unscale 한다. 다음은 3-레이어 GAT를 Reddit 데이터셋(1140억개의 에지를 갖는)에 학습을 하는 스크립트이다. ``use_fp16`` 가 활성화/비활성화되었을 때의 코드 차이를 살펴보자. .. code:: import torch import torch.nn as nn import torch.nn.functional as F from torch.cuda.amp import autocast, GradScaler import dgl from dgl.data import RedditDataset from dgl.nn import GATConv use_fp16 = True class GAT(nn.Module): def __init__(self, in_feats, n_hidden, n_classes, heads): super().__init__() self.layers = nn.ModuleList() self.layers.append(GATConv(in_feats, n_hidden, heads[0], activation=F.elu)) self.layers.append(GATConv(n_hidden * heads[0], n_hidden, heads[1], activation=F.elu)) self.layers.append(GATConv(n_hidden * heads[1], n_classes, heads[2], activation=F.elu)) def forward(self, g, h): for l, layer in enumerate(self.layers): h = layer(g, h) if l != len(self.layers) - 1: h = h.flatten(1) else: h = h.mean(1) return h # Data loading data = RedditDataset() device = torch.device(0) g = data[0] g = dgl.add_self_loop(g) g = g.int().to(device) train_mask = g.ndata['train_mask'] features = g.ndata['feat'] labels = g.ndata['label'] in_feats = features.shape[1] n_hidden = 256 n_classes = data.num_classes n_edges = g.num_edges() heads = [1, 1, 1] model = GAT(in_feats, n_hidden, n_classes, heads) model = model.to(device) # Create optimizer optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=5e-4) # Create gradient scaler scaler = GradScaler() for epoch in range(100): model.train() optimizer.zero_grad() # Wrap forward pass with autocast with autocast(enabled=use_fp16): logits = model(g, features) loss = F.cross_entropy(logits[train_mask], labels[train_mask]) if use_fp16: # Backprop w/ gradient scaling scaler.scale(loss).backward() scaler.step(optimizer) scaler.update() else: loss.backward() optimizer.step() print('Epoch {} | Loss {}'.format(epoch, loss.item())) NVIDIA V100 (16GB) 한개를 갖는 컴퓨터에서, 이 모델을 fp16을 사용하지 않고 학습할 때는 15.2GB GPU 메모리가 사용되는데, fp16을 활성화하면, 학습에 12.8G GPU 메모리가 사용된며, 두 경우 loss가 비슷한 값으로 수렴한다. 만약 head의 갯수를 ``[2, 2, 2]`` 로 바꾸면, fp16를 사용하지 않는 학습은 GPU OOM(out-of-memory) 이슈가 생길 것이지만, fp16를 사용한 학습은 15.7G GPU 메모리를 사용하면서 수행된다. DGL은 half-precision 지원을 계속 향상하고 있고, 연산 커널의 성능은 아직 최적은 아니다. 앞으로의 업데이트를 계속 지켜보자. ================================================ FILE: docs/source/guide_ko/nn-construction.rst ================================================ .. _guide_ko-nn-construction: 3.1 DGL NN 모듈 생성 함수 --------------------- :ref:`(English Version) ` 생성 함수는 다음 단계들을 수행한다: 1. 옵션 설정 2. 학습할 파라메터 또는 서브모듈 등록 3. 파라메터 리셋 .. code:: import torch.nn as nn from dgl.utils import expand_as_pair class SAGEConv(nn.Module): def __init__(self, in_feats, out_feats, aggregator_type, bias=True, norm=None, activation=None): super(SAGEConv, self).__init__() self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats self._aggre_type = aggregator_type self.norm = norm self.activation = activation 생성 함수를 만들 때 데이터 차원을 지정해야 한다. 일반적인 PyTorch 모듈의 경우에는 차원이란 보통은 입력 차원, 출력 차원, 그리고 은닉(hidden) 치원을 의미하는데, 그래프 뉴럴 네트워크의 경우 입력 차원은 소스 노드의 차원과 목적지 노드의 차원으로 나뉜다. 데이터 차원들 이외의 전형적인 그래프 뉴럴 네트워크의 옵션으로 aggregation 타입( ``self._aggre_type`` )이 있다. Aggregation 타입은 특정 목적지 노드에 대해서 관련된 여러 에지의 메시지들이 어떻게 집합되어야 하는지를 결정한다. 흔히 사용되는 aggregation 타입으로는 ``mean`` , ``sum`` , ``max`` , ``min`` 이 있으며, 어떤 모듈은 ``lstm`` 과 같이 좀더 복잡한 aggregation을 적용하기도 한다. 여기서 ``norm`` 은 피처 normalization을 위해서 호출될 수 있는 함수이다. SAGEConv 페이퍼에서는 l2 normlization, :math:`h_v = h_v / \lVert h_v \rVert_2` 이 normalization으로 사용되고 있다. .. code:: # aggregator type: mean, pool, lstm, gcn if aggregator_type not in ['mean', 'pool', 'lstm', 'gcn']: raise KeyError('Aggregator type {} not supported.'.format(aggregator_type)) if aggregator_type == 'pool': self.fc_pool = nn.Linear(self._in_src_feats, self._in_src_feats) if aggregator_type == 'lstm': self.lstm = nn.LSTM(self._in_src_feats, self._in_src_feats, batch_first=True) if aggregator_type in ['mean', 'pool', 'lstm']: self.fc_self = nn.Linear(self._in_dst_feats, out_feats, bias=bias) self.fc_neigh = nn.Linear(self._in_src_feats, out_feats, bias=bias) self.reset_parameters() 다음으로는 파라메터들과 서브모듈들을 등록한다. SAGEConv의 경우에는 서브모듈은 aggregation 타입에 따라 달라진다. 그 모듈들은 ``nn.Linear`` , ``nn.LSTM`` 등과 같은 순수한 PyTorch nn 모듈이다. 생성 함수의 마지막에는 ``reset_parameters()`` 호출로 가중치들을 초기화한다. .. code:: def reset_parameters(self): """Reinitialize learnable parameters.""" gain = nn.init.calculate_gain('relu') if self._aggre_type == 'pool': nn.init.xavier_uniform_(self.fc_pool.weight, gain=gain) if self._aggre_type == 'lstm': self.lstm.reset_parameters() if self._aggre_type != 'gcn': nn.init.xavier_uniform_(self.fc_self.weight, gain=gain) nn.init.xavier_uniform_(self.fc_neigh.weight, gain=gain) ================================================ FILE: docs/source/guide_ko/nn-forward.rst ================================================ .. _guide_ko-nn-forward: 3.2 DGL NN 모듈의 Forward 함수 --------------------------- :ref:`(English Versin) ` NN 모듈에서 ``forward()`` 함수는 실제 메시지 전달과 연산을 수행한다. 일반적으로 텐서들을 파라메터로 받는 PyTorch의 NN 모듈과 비교하면, DGL NN 모듈은 :class:`dgl.DGLGraph` 를 추가 파라메터로 받는다. ``forward()`` 함수는 3단계로 수행된다. - 그래프 체크 및 그래프 타입 명세화 - 메시지 전달 - 피쳐 업데이트 이 절에서는 SAGEConv에서 사용되는 ``forward()`` 함수를 자세하게 살펴보겠다. 그래프 체크와 그래프 타입 명세화(graph type specification) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: def forward(self, graph, feat): with graph.local_scope(): # Specify graph type then expand input feature according to graph type feat_src, feat_dst = expand_as_pair(feat, graph) ``forward()`` 는 계산 및 메시지 전달 과정에서 유효하지 않은 값을 만들 수 있는 여러 특별한 케이스들을 다룰 수 있어야 한다. :class:`~dgl.nn.pytorch.conv.GraphConv` 와 같은 그래프 conv 모듈에서 수행하는 가장 전형적인 점검은 입력 그래프가 in-degree가 0인 노드를 갖지 않는지 확인하는 것이다. in-degree가 0인 경우에, ``mailbox`` 에 아무것도 없게 되고, 축약 함수는 모두 0인 값을 만들어낼 것이다. 이는 잠재적인 모델 성능 문제를 일이킬 수도 있다. 하지만, :class:`~dgl.nn.pytorch.conv.SAGEConv` 모듈의 경우, aggregated representation은 원래의 노드 피쳐와 연결(concatenated)되기 때문에, ``forward()`` 의 결과는 항상 0이 아니기 때문에, 이런 체크가 필요 없다. DGL NN 모듈은 여러 종류의 그래프, 단종 그래프, 이종 그래프(:ref:`guide_ko-graph-heterogeneous`), 서브그래프 블록(:ref:`guide_ko-minibatch` ), 입력에 걸쳐서 재사용될 수 있다. SAGEConv의 수학 공식은 다음과 같다: .. math:: h_{\mathcal{N}(dst)}^{(l+1)} = \mathrm{aggregate} \left(\{h_{src}^{l}, \forall src \in \mathcal{N}(dst) \}\right) .. math:: h_{dst}^{(l+1)} = \sigma \left(W \cdot \mathrm{concat} (h_{dst}^{l}, h_{\mathcal{N}(dst)}^{l+1}) + b \right) .. math:: h_{dst}^{(l+1)} = \mathrm{norm}(h_{dst}^{l+1}) 그래프 타입에 따라서 소스 노드 피쳐(``feat_src``)와 목적지 노드 피쳐(``feat_dst``)를 명시해야 한다. :meth:`~dgl.utils.expand_as_pair` 는 명시된 그래프 타입에 따라 ``feat`` 를 ``feat_src`` 와 ``feat_dst`` 로 확장하는 함수이다. 이 함수의 동작은 다음과 같다. .. code:: def expand_as_pair(input_, g=None): if isinstance(input_, tuple): # Bipartite graph case return input_ elif g is not None and g.is_block: # Subgraph block case if isinstance(input_, Mapping): input_dst = { k: F.narrow_row(v, 0, g.number_of_dst_nodes(k)) for k, v in input_.items()} else: input_dst = F.narrow_row(input_, 0, g.number_of_dst_nodes()) return input_, input_dst else: # Homogeneous graph case return input_, input_ homogeneous 그래프 전체를 학습시키는 경우, 소스 노드와 목적지 노드들의 타입이 같다. 이것들은 그래프의 전체 노드들이다. Heterogeneous 그래프의 경우, 그래프는 여러 이분 그래프로 나뉠 수 있다. 즉, 각 관계당 하나의 그래프로. 관계는 ``(src_type, edge_type, dst_dtype)`` 로 표현된다. 입력 피쳐 ``feat`` 가 tuple 이라고 확인되면, 이 함수는 그 그래프는 이분 그래프로 취급한다. Tuple의 첫번째 요소는 소스 노드 피처이고, 두번째는 목적지 노드의 피처이다. 미니-배치 학습의 경우, 연산이 여러 목적지 노드들을 기반으로 샘플된 서브 그래프에 적용된다. DGL에서 서브 그래프는 ``block`` 이라고 한다. 블록이 생성되는 단계에서, ``dst_nodes`` 가 노드 리스트의 앞에 놓이게 된다. ``[0:g.number_of_dst_nodes()]`` 인덱스를 이용해서 ``feat_dst`` 를 찾아낼 수 있다. ``feat_src`` 와 ``feat_dst`` 가 정해진 후에는, 세가지 그래프 타입들에 대한 연산은 모두 동일하다. 메시지 전달과 축약 ~~~~~~~~~~~~~~ .. code:: import dgl.function as fn import torch.nn.functional as F from dgl.utils import check_eq_shape if self._aggre_type == 'mean': graph.srcdata['h'] = feat_src graph.update_all(fn.copy_u('h', 'm'), fn.mean('m', 'neigh')) h_neigh = graph.dstdata['neigh'] elif self._aggre_type == 'gcn': check_eq_shape(feat) graph.srcdata['h'] = feat_src graph.dstdata['h'] = feat_dst graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'neigh')) # divide in_degrees degs = graph.in_degrees().to(feat_dst) h_neigh = (graph.dstdata['neigh'] + graph.dstdata['h']) / (degs.unsqueeze(-1) + 1) elif self._aggre_type == 'pool': graph.srcdata['h'] = F.relu(self.fc_pool(feat_src)) graph.update_all(fn.copy_u('h', 'm'), fn.max('m', 'neigh')) h_neigh = graph.dstdata['neigh'] else: raise KeyError('Aggregator type {} not recognized.'.format(self._aggre_type)) # GraphSAGE GCN does not require fc_self. if self._aggre_type == 'gcn': rst = self.fc_neigh(h_neigh) else: rst = self.fc_self(h_self) + self.fc_neigh(h_neigh) 이 코드는 실제로 메시지 전달과 축약 연산을 실행하고 있다. 이 부분의 코드는 모듈에 따라 다르게 구현된다. 이 코드의 모든 메시지 전달은 :meth:`~dgl.DGLGraph.update_all` API와 ``built-in`` 메시지/축약 함수들로 구현되어 있는데, 이는 :ref:`guide_ko-message-passing-efficient` 에서 설명된 DGL의 성능 최적화를 모두 활용하기 위해서이다. 출력값을 위한 축약 후 피쳐 업데이트 ~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: # activation if self.activation is not None: rst = self.activation(rst) # normalization if self.norm is not None: rst = self.norm(rst) return rst ``forward()`` 함수의 마지막 부분은 ``reduce function`` 다음에 피쳐를 업데이트하는 것이다. 일반적인 업데이트 연산들은 활성화 함수를 적용하고, 객체 생성 단계에서 설정된 옵션에 따라 normalization을 수행한다. ================================================ FILE: docs/source/guide_ko/nn-heterograph.rst ================================================ .. _guide_ko-nn-heterograph: 3.3 Heterogeneous GraphConv 모듈 ------------------------------- :ref:`(English Version) ` :class:`~dgl.nn.pytorch.HeteroGraphConv` 는 heterogeneous 그래프들에 DGL NN 모듈을 적용하기 위한 모듈 수준의 인캡슐레이션이다. 메시지 전달 API :meth:`~dgl.DGLGraph.multi_update_all` 와 같은 로직으로 구현되어 있고, 이는 다음을 포함한다. - :math:`r` 관계에 대한 DGL NN 모듈 - 한 노드에 연결된 여러 관계로부터 얻은 결과를 통합하는 축약(reduction) 이는 다음과 같이 공식으로 표현된다: .. math:: h_{dst}^{(l+1)} = \underset{r\in\mathcal{R}, r_{dst}=dst}{AGG} (f_r(g_r, h_{r_{src}}^l, h_{r_{dst}}^l)) , 여기서 :math:`f_r` 는 각 :math:`r` 관계에 대한 NN 모듈이고, :math:`AGG` 는 aggregation 함수이다. HeteroGraphConv 구현 로직: ~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: import torch.nn as nn class HeteroGraphConv(nn.Module): def __init__(self, mods, aggregate='sum'): super(HeteroGraphConv, self).__init__() self.mods = nn.ModuleDict(mods) if isinstance(aggregate, str): # An internal function to get common aggregation functions self.agg_fn = get_aggregate_fn(aggregate) else: self.agg_fn = aggregate Heterograph convolution은 각 관계를 NN 모듈에 매핑하는 ``mods`` 사전을 인자로 받고, 한 노드에 대한 여러 관계들의 결과를 집계하는 함수를 설정한다. .. code:: def forward(self, g, inputs, mod_args=None, mod_kwargs=None): if mod_args is None: mod_args = {} if mod_kwargs is None: mod_kwargs = {} outputs = {nty : [] for nty in g.dsttypes} 입력 그래프와 입력 텐서들과 더불어, ``forward()`` 함수는 두가지 추가적인 파라메터들, ``mod_args`` 와 ``mod_kwargs`` 을 받는다. 이것들은 ``self.mods`` 안에서, 다른 종류의 관계에 연관된 NN 모듈을 수행할 때, 커스터마이즈된 파라메터들로써 사용된다. 각 목적지 타입 ``nty`` 에 대한 결과 텐서를 저장하기 위해서 결과 사전(output dictionary)가 생성된다. 각 ``nty`` 에 대한 값은 리스트이다. 이는 ``nty`` 를 목적 타입으로 갖을 관계가 여러개가 있는 경우, 단일 노드 타입이 여러 아웃풋들을 갖을 수 있음을 의미한다. ``HeteroGraphConv`` 는 이 리스트들에 대해서 추가적인 aggregation을 수행할 것이다. .. code:: if g.is_block: src_inputs = inputs dst_inputs = {k: v[:g.number_of_dst_nodes(k)] for k, v in inputs.items()} else: src_inputs = dst_inputs = inputs for stype, etype, dtype in g.canonical_etypes: rel_graph = g[stype, etype, dtype] if rel_graph.num_edges() == 0: continue if stype not in src_inputs or dtype not in dst_inputs: continue dstdata = self.mods[etype]( rel_graph, (src_inputs[stype], dst_inputs[dtype]), *mod_args.get(etype, ()), **mod_kwargs.get(etype, {})) outputs[dtype].append(dstdata) 입력 그래프 ``g`` 는 heterogeneous 그래프 또는 heterogeneous 그래프의 서브그래프 블록일 수 있다. 보통의 NN 모듈처럼, ``forward()`` 함수는 다양한 입력 그래프 타입들을 별로도 다룰 수 있어야 한다. 각 관계는 ``(stype, etype, dtype)`` 인 ``canonical_etype`` 으로 표현된다. ``canonical_etype`` 을 키로 사용해서, 이분 그래프(bipartite graph)인 ``rel_graph`` 를 추출할 수 있다. 이분 그래프에서 입력 피쳐는 ``(src_inputs[stype], dst_inputs[dtype])`` 로 구성된다. 각 관계에 대한 NN 모듈이 호출되고, 결과는 저장된다. .. code:: rsts = {} for nty, alist in outputs.items(): if len(alist) != 0: rsts[nty] = self.agg_fn(alist, nty) 마지막으로 한 목적 노드 타입에 대해 여러 관계로 부터 얻어진 결과들은 ``self.agg_fn`` 를 통해서 집계된다. :class:`~dgl.nn.pytorch.HeteroGraphConv` 의 API DOC에서 관련 예제들이 있다. ================================================ FILE: docs/source/guide_ko/nn.rst ================================================ .. _guide_ko-nn: 3장: GNN 모듈 만들기 ================= :ref:`(English Version) ` DGL NN 모듈은 GNN 모델을 만드는데 필요한 빌딩 블록들로 구성되어 있다. NN 모듈은 백엔드로 사용되는 DNN 프레임워크에 따라 `Pytorch’s NN Module `__ , `MXNet Gluon’s NN Block `__ 그리고 `TensorFlow’s Keras Layer `__ 를 상속한다. DGL NN 모듈에서, 생성 함수에서의 파라메터 등록과 forward 함수에서 텐서 연산은 백엔드 프레임워크의 것과 동일하다. 이런 방식의 구현덕에 DGL 코드는 백엔드 프레임워크 코드와 원활하게 통합될 수 있다. 주요 차이점은 DGL 고유의 메시지 전달 연산에 존재한다. DGL은 일반적으로 많이 사용되는 :ref:`apinn-pytorch-conv` , :ref:`apinn-pytorch-dense-conv` , :ref:`apinn-pytorch-pooling` 와 :ref:`apinn-pytorch-util` 를 포함하고 있고. 여러분의 기여를 환영한다. 이 장에서는 PyTorch 백엔드를 사용한 :class:`~dgl.nn.pytorch.conv.SAGEConv` 를 예제로 커스텀 DGL NN 모듈을 만드는 방법을 소개한다. 로드맵 ---- * :ref:`guide_ko-nn-construction` * :ref:`guide_ko-nn-forward` * :ref:`guide_ko-nn-heterograph` .. toctree:: :maxdepth: 1 :hidden: :glob: nn-construction nn-forward nn-heterograph ================================================ FILE: docs/source/guide_ko/training-edge.rst ================================================ .. _guide_ko-training-edge-classification: 5.2 에지 분류 및 리그레션(Regression) -------------------------------- :ref:`(English Version) ` 때론 그래프의 에지들의 속성을 예측을 원하는 경우가 있다. 이를 위해서 *에지 분류/리그레션* 모델을 만들고자 한다. 우선, 예제로 사용할 에지 예측을 위한 임의의 그래프를 만든다. .. code:: python src = np.random.randint(0, 100, 500) dst = np.random.randint(0, 100, 500) # make it symmetric edge_pred_graph = dgl.graph((np.concatenate([src, dst]), np.concatenate([dst, src]))) # synthetic node and edge features, as well as edge labels edge_pred_graph.ndata['feature'] = torch.randn(100, 10) edge_pred_graph.edata['feature'] = torch.randn(1000, 10) edge_pred_graph.edata['label'] = torch.randn(1000) # synthetic train-validation-test splits edge_pred_graph.edata['train_mask'] = torch.zeros(1000, dtype=torch.bool).bernoulli(0.6) 개요 ~~~~~~~~~ 앞 절에서 우리는 멀티 레이어 GNN을 사용해서 노드 분류하는 방법을 알아봤다. 임의의 노드에 대한 hidden representation을 계산하기 위해서 같은 기법을 적용한다. 그러면 에지들에 대한 예측은 그것들의 부속 노드들의 representation들로 부터 도출할 수 있다. 에지에 대한 예측을 계산하는 가장 일반적인 방법은 그 에지의 부속 노드들의 representation들과 부수적으로 그 에지에 대한 피쳐들의 parameterized 함수로 표현하는 것이다. 노드 분류 모델과 구현상의 차이점 ~~~~~~~~~~~~~~~~~~~~~~~~ 이전 절에서 만든 모델을 사용해서 노드 representation을 계산한다고 가정하면, :meth:`~dgl.DGLGraph.apply_edges` 메소드로 에지 예측을 계산하는 컴포넌트만 작성하면 된다. 예를 들어, 에지 리그레션을 위해서 각 에지에 대한 점수를 계산하고자 한다면, 아래 코드와 같이 각 에지에 대한 부속 노드의 representation들의 dot product를 계산하면 된다. .. code:: python import dgl.function as fn class DotProductPredictor(nn.Module): def forward(self, graph, h): # h contains the node representations computed from the GNN defined # in the node classification section (Section 5.1). with graph.local_scope(): graph.ndata['h'] = h graph.apply_edges(fn.u_dot_v('h', 'h', 'score')) return graph.edata['score'] 또한 MLP를 사용해서 각 에지에 대한 벡터 값을 예측하는 예측하는 함수를 작성할 수도 있다. 이 벡터 값은 미래의 다운스트림 테스크들에 사용될 수 있다. 즉, 범주형 분류의 logit으로 사용. .. code:: python class MLPPredictor(nn.Module): def __init__(self, in_features, out_classes): super().__init__() self.W = nn.Linear(in_features * 2, out_classes) def apply_edges(self, edges): h_u = edges.src['h'] h_v = edges.dst['h'] score = self.W(torch.cat([h_u, h_v], 1)) return {'score': score} def forward(self, graph, h): # h contains the node representations computed from the GNN defined # in the node classification section (Section 5.1). with graph.local_scope(): graph.ndata['h'] = h graph.apply_edges(self.apply_edges) return graph.edata['score'] 학습 룹(loop) ~~~~~~~~~~~ 노드 representation 계산 모델과 에지 예측 모델을 만들었다면, 모든 에지들에 대한 예측값을 계산하는 전체 그래프를 이용한 학습 룹을 작성할 수 있다. 노드 representation 계산 모델로 ``SAGE`` 를, 에지 예측 모델로 ``DotPredictor`` 을 사용한다. .. code:: python class Model(nn.Module): def __init__(self, in_features, hidden_features, out_features): super().__init__() self.sage = SAGE(in_features, hidden_features, out_features) self.pred = DotProductPredictor() def forward(self, g, x): h = self.sage(g, x) return self.pred(g, h) 이 예제에서 학습/검증/테스트 에지 셋이 에지의 이진 마스크로 구분된다고 가정한다. 또한 early stopping이나 모델 저장은 포함하지 않는다. .. code:: python node_features = edge_pred_graph.ndata['feature'] edge_label = edge_pred_graph.edata['label'] train_mask = edge_pred_graph.edata['train_mask'] model = Model(10, 20, 5) opt = torch.optim.Adam(model.parameters()) for epoch in range(10): pred = model(edge_pred_graph, node_features) loss = ((pred[train_mask] - edge_label[train_mask]) ** 2).mean() opt.zero_grad() loss.backward() opt.step() print(loss.item()) .. _guide_ko-training-edge-classification-heterogeneous-graph: Heterogeneous 그래프 ~~~~~~~~~~~~~~~~~~ Heterogeneous 그래프들에 대한 에지 분류는 homogeneous 그래프와 크게 다르지 않다. 하나의 에지 타입에 대해서 에지 분류를 수행하자 한다면, 모든 노드 티압에 대한 노드 representation을 구하고, :meth:`~dgl.DGLGraph.apply_edges` 메소드를 사용해서 에지 타입을 예측하면 된다. 예를 들면, heterogeneous 그래프의 하나의 에지 타입에 대한 동작하는 ``DotProductPredictor`` 를 작성하고자 한다면, ``apply_edges`` 메소드에 해당 에지 타입을 명시하기만 하면 된다. .. code:: python class HeteroDotProductPredictor(nn.Module): def forward(self, graph, h, etype): # h contains the node representations for each edge type computed from # the GNN for heterogeneous graphs defined in the node classification # section (Section 5.1). with graph.local_scope(): graph.ndata['h'] = h # assigns 'h' of all node types in one shot graph.apply_edges(fn.u_dot_v('h', 'h', 'score'), etype=etype) return graph.edges[etype].data['score'] 비슷하게 ``HeteroMLPPredictor`` 를 작성할 수 있다. .. code:: python class HeteroMLPPredictor(nn.Module): def __init__(self, in_features, out_classes): super().__init__() self.W = nn.Linear(in_features * 2, out_classes) def apply_edges(self, edges): h_u = edges.src['h'] h_v = edges.dst['h'] score = self.W(torch.cat([h_u, h_v], 1)) return {'score': score} def forward(self, graph, h, etype): # h contains the node representations for each edge type computed from # the GNN for heterogeneous graphs defined in the node classification # section (Section 5.1). with graph.local_scope(): graph.ndata['h'] = h # assigns 'h' of all node types in one shot graph.apply_edges(self.apply_edges, etype=etype) return graph.edges[etype].data['score'] 특정 타입의 에지에 대해서, 각 에지의 점수를 예측하는 end-to-end 모델을 다음과 같다: .. code:: python class Model(nn.Module): def __init__(self, in_features, hidden_features, out_features, rel_names): super().__init__() self.sage = RGCN(in_features, hidden_features, out_features, rel_names) self.pred = HeteroDotProductPredictor() def forward(self, g, x, etype): h = self.sage(g, x) return self.pred(g, h, etype) 모델을 사용하는 방법은 노드 타입과 피쳐들에 대한 사전을 모델에 간단하게 입력하면 된다. .. code:: python model = Model(10, 20, 5, hetero_graph.etypes) user_feats = hetero_graph.nodes['user'].data['feature'] item_feats = hetero_graph.nodes['item'].data['feature'] label = hetero_graph.edges['click'].data['label'] train_mask = hetero_graph.edges['click'].data['train_mask'] node_features = {'user': user_feats, 'item': item_feats} 학습 룹은 homogeneous 그래프의 것과 거의 유사하다. 예를 들어, 에지 타입 ``click`` 에 대한 에지 레이블을 예측하는 것은 다음과 같이 간단히 구현된다. .. code:: python opt = torch.optim.Adam(model.parameters()) for epoch in range(10): pred = model(hetero_graph, node_features, 'click') loss = ((pred[train_mask] - label[train_mask]) ** 2).mean() opt.zero_grad() loss.backward() opt.step() print(loss.item()) Heterogeneous 그래프의 에지들에 대한 에지 타입 예측하기 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 주어진 에지의 타입을 예측하는 일도 종종 하게된다. :ref:`heterogeneous 그래프 예제 ` 에서는 user와 item을 연결하는 에지가 주어졌을 때, user가 ``click`` 을 선택할지, ``dislike`` 를 선택할지를 예측하고 있다. 이는 추천에서 흔히 쓰이는 평가 예측의 간략한 버전이다. 노드 representation을 얻기 위해서 heterogeneous graph convolution 네트워크를 사용할 수 있다. 이를 위해서 :ref:`이전에 정의한 RGCN ` 를 사용하는 것도 가능하다. 에지 타입을 예측하기 위해서 ``HeteroDotProductPredictor`` 의 용도를 간단히 변경해서 예측할 모든 에지 타입을 “병합“하고 모든 에지들의 각 타입에 대한 점수를 내보내는 하나의 에지 타입만 있는 다른 그래프를 취하게하면 된다. 이 예제에 적용해보면, ``user`` 와 ``item`` 두 노트 타입을 갖으며 ``user`` 와 ``item`` 에 대한 ``click`` 이나 ``dislike`` 같은 모든 에지 타입을 병합하는 단일 에지 타입을 갖는 그래프가 필요하다. 다음 문장으로 간단하게 생성할 수 있다. .. code:: python dec_graph = hetero_graph['user', :, 'item'] 이 함수는 ``user`` 와 ``item`` 을 노드 타입으로 갖고, 두 노드 타입을 연결하고 있는 모든 에지 타입(예, ``click`` 와 ``dislike`` )을 합친 단일 에지 타입을 갖는 heterogeneous 그래프를 리턴한다. 위 코드는 원래의 에지 타입을 ``dgl.ETYPE`` 이라는 이름의 피처로 리턴하기 때문에, 이를 레이블로 사용할 수 있다. .. code:: python edge_label = dec_graph.edata[dgl.ETYPE] 에지 타입 예측 모듈의 입력으로 위 그래프를 사용해서 예측 모델을 다음과 같이 작성한다. .. code:: python class HeteroMLPPredictor(nn.Module): def __init__(self, in_dims, n_classes): super().__init__() self.W = nn.Linear(in_dims * 2, n_classes) def apply_edges(self, edges): x = torch.cat([edges.src['h'], edges.dst['h']], 1) y = self.W(x) return {'score': y} def forward(self, graph, h): # h contains the node representations for each edge type computed from # the GNN for heterogeneous graphs defined in the node classification # section (Section 5.1). with graph.local_scope(): graph.ndata['h'] = h # assigns 'h' of all node types in one shot graph.apply_edges(self.apply_edges) return graph.edata['score'] 노드 representation 모듈과 에지 타입 예측 모듈을 합친 모델은 다음과 같다. .. code:: python class Model(nn.Module): def __init__(self, in_features, hidden_features, out_features, rel_names): super().__init__() self.sage = RGCN(in_features, hidden_features, out_features, rel_names) self.pred = HeteroMLPPredictor(out_features, len(rel_names)) def forward(self, g, x, dec_graph): h = self.sage(g, x) return self.pred(dec_graph, h) 학습 룹은 아래와 같이 간단하다. .. code:: python model = Model(10, 20, 5, hetero_graph.etypes) user_feats = hetero_graph.nodes['user'].data['feature'] item_feats = hetero_graph.nodes['item'].data['feature'] node_features = {'user': user_feats, 'item': item_feats} opt = torch.optim.Adam(model.parameters()) for epoch in range(10): logits = model(hetero_graph, node_features, dec_graph) loss = F.cross_entropy(logits, edge_label) opt.zero_grad() loss.backward() opt.step() print(loss.item()) DGL은 heterogeneous 그래프의 에지들에 대한 타입을 예측하는 문제인 평가 예측 예제로 `Graph Convolutional Matrix Completion `__ 를 제공한다. `모델 구현 파일 `__ 에 있는 노드 representation 모듈은 ``GCMCLayer`` 라고 불린다. 이 둘은 여기서 설명하기에는 너무 복잡하니 자세한 설명은 생략한다. ================================================ FILE: docs/source/guide_ko/training-graph.rst ================================================ .. _guide_ko-training-graph-classification: 5.4 그래프 분류 ------------ :ref:`(English Version) ` 데이터가 커다란 하나의 그래프가 아닌 여러 그래프로 구성된 경우도 종종 있다. 예를 들면, 사람들의 커뮤니티의 여러 종류 목록 같은 것을 들 수 있다. 같은 커뮤니티에 있는 사람들의 친목 관계를 그래프로 특징을 지어본다면, 분류할 수 있는 그래프들의 리스트를 만들 수 있다. 이 상황에서 그래프 분류 모델을 이용해서 커뮤니티의 종류를 구별해볼 수 있다. 개요 ~~~~~~~~~ 그래프 분류가 노드 분류나 링크 예측 문제와 주요 차이점은 예측 결과가 전체 입력 그래프의 특성을 나타낸다는 것이다. 이전 문제들과 똑같이 노드들이나 에지들에 대해서 메시지 전달을 수행하지만, 그래프 수준의 representation을 찾아내야한다. 그래프 분류 파이프라인은 다음과 같다: .. figure:: https://data.dgl.ai/tutorial/batch/graph_classifier.png :alt: Graph Classification Process 그래프 분류 프로세스 일반적인 방법은 (왼쪽부터 오른쪽으로 진행): - 그래프들의 배치를 준비한다 - 그래프들의 배치에 메시지 전달을 수행해서 노드/에지 피쳐를 업데이트한다 - 노드/에지 피쳐들을 모두 합쳐서 그래프 수준의 representation들을 만든다 - 그래프 수준의 representation들을 사용해서 그래프들을 분류한다 그래프들의 배치(batch) ^^^^^^^^^^^^^^^^^^ 보통의 경우 그래프 분류 문제는 많은 수의 그래프를 사용해서 학습하기 때문에, 모델을 학습할 때 그래프를 한개씩 사용하는 것은 굉장히 비효율적이다. 일반적 딥러닝에서 사용되는 미니-배치 학습의 아이디어를 발려와서, 그래프들의 배치를 만들어서 한번의 학습 이터레이션에 사용하는 것이 가능하다. DGL는 그래프들의 리스트로부터 하나의 배치 그래프(batched graph)를 생성할 수 있다. 단순하게, 이 배치 그래프는 원래의 작은 그래프들을 연결하는 컴포넌트를 가지고 있는 하나의 큰 그래프로 사용된다. .. figure:: https://data.dgl.ai/tutorial/batch/batch.png :alt: Batched Graph 배치 그래프(Batched Graph) 다음 코드 예제는 그래프들의 목록에 :func:`dgl.batch` 를 호출한다. 배치 그래프는 하나의 그래프이자, 그 리스트에 대한 정보를 담고 있다. .. code:: python import dgl import torch as th g1 = dgl.graph((th.tensor([0, 1, 2]), th.tensor([1, 2, 3]))) g2 = dgl.graph((th.tensor([0, 0, 0, 1]), th.tensor([0, 1, 2, 0]))) bg = dgl.batch([g1, g2]) bg # Graph(num_nodes=7, num_edges=7, # ndata_schemes={} # edata_schemes={}) bg.batch_size # 2 bg.batch_num_nodes() # tensor([4, 3]) bg.batch_num_edges() # tensor([3, 4]) bg.edges() # (tensor([0, 1, 2, 4, 4, 4, 5], tensor([1, 2, 3, 4, 5, 6, 4])) 대부분의 DGL 변환 함수들은 배치 정보를 버린다는 점을 주의하자. 이 정보를 유지하기 위해서, 변환된 그래프에 :func:`dgl.DGLGraph.set_batch_num_nodes` 와 :func:`dgl.DGLGraph.set_batch_num_edges` 를 사용한다. 그래프 리드아웃(readout) ^^^^^^^^^^^^^^^^^^^^ 모든 그래프는 노드와 에지의 피쳐들과 더불어 유일한 구조를 지니고 있다. 하나의 예측을 만들어내기 위해서, 보통은 아마도 풍부한 정보들을 합치고 요약한다. 이런 종류의 연산을 *리드아웃(readout)* 이라고 부른다. 흔히 쓰이는 리드아웃 연산들은 모든 노드 또는 에지 피쳐들에 대한 합(summation), 평균, 최대 또는 최소들이 있다. 그래프 :math:`g` 에 대해서, 평균 노드 피처 리드아웃은 아래와 같이 정의된다. .. math:: h_g = \frac{1}{|\mathcal{V}|}\sum_{v\in \mathcal{V}}h_v 여기서 :math:`h_g` 는 :math:`g` 에 대한 representation이고, :math:`\mathcal{V}` 는 :math:`g` 의 노드들의 집합, 그리고 :math:`h_v` 는 노드 :math:`v` 의 피쳐이다. DGL은 많이 쓰이는 리드아웃 연산들을 빌드인 함수로 지원한다. 예를 들어, :func:`dgl.mean_nodes` 는 위의 리드아웃 연산을 구현하고 있다. :math:`h_g` 가 구해진 후, 이를 MLP 레이어에 전달해서 분류 결과를 얻는다. 뉴럴 네트워크 모델 작성하기 ~~~~~~~~~~~~~~~~~~~~ 모델에 대한 입력은 노드와 에지의 피쳐들 갖는 배치 그래프이다. 배치 그래프에 연산하기 ^^^^^^^^^^^^^^^^ 첫째로, 배치 그래프에 있는 그래프들을 완전히 분리되어 있다. 즉, 두 그래들 사이에 에지가 존재하지 않는다. 이런 멋진 성질 덕에, 모든 메시지 전달 함수는 같은 결과를 만들어낸다. (즉 그래프 간의 간섭이 없다) 두번째로, 배치 그래프에 대한 리드아웃 함수는 각 그래프에 별도록 수행된다. 배치 크기가 :math:`B` 이고 협쳐진 피쳐(aggregated feature)의 차원이 :math:`D` 인 경우, 리드아웃 결과의 shape은 :math:`(B, D)` 가 된다. .. code:: python import dgl import torch g1 = dgl.graph(([0, 1], [1, 0])) g1.ndata['h'] = torch.tensor([1., 2.]) g2 = dgl.graph(([0, 1], [1, 2])) g2.ndata['h'] = torch.tensor([1., 2., 3.]) dgl.readout_nodes(g1, 'h') # tensor([3.]) # 1 + 2 bg = dgl.batch([g1, g2]) dgl.readout_nodes(bg, 'h') # tensor([3., 6.]) # [1 + 2, 1 + 2 + 3] 마지막으로, 배치 그래프의 각 노드/에치 피쳐는 모든 그래프의 노드와 에지 피쳐들을 순서대로 연결해서 얻는다. .. code:: python bg.ndata['h'] # tensor([1., 2., 1., 2., 3.]) 모델 정의하기 ^^^^^^^^^ 위 연산 규칙을 염두해서, 모델을 다음과 같이 정의한다. .. code:: python import dgl.nn.pytorch as dglnn import torch.nn as nn class Classifier(nn.Module): def __init__(self, in_dim, hidden_dim, n_classes): super(Classifier, self).__init__() self.conv1 = dglnn.GraphConv(in_dim, hidden_dim) self.conv2 = dglnn.GraphConv(hidden_dim, hidden_dim) self.classify = nn.Linear(hidden_dim, n_classes) def forward(self, g, h): # Apply graph convolution and activation. h = F.relu(self.conv1(g, h)) h = F.relu(self.conv2(g, h)) with g.local_scope(): g.ndata['h'] = h # Calculate graph representation by average readout. hg = dgl.mean_nodes(g, 'h') return self.classify(hg) 학습 룹 ~~~~~ 데이터 로딩 ^^^^^^^^ 모델이 정의되었다면, 학습을 시작할 수 있다. 그래프 분류는 커다란 그래프 한개가 아니라 상대적으로 작은 그래프를 많이 다루기 때문에, 복잡한 그래프 샘플링 알고리즘을 사용하지 않고 그래프들의 stochastic 미니-배치를 사용해서 효과적으로 학습을 수행할 수 있다. :ref:`guide_ko-data-pipeline` 에서 소개한 그래프 분류 데이터셋을 사용하자. .. code:: python import dgl.data dataset = dgl.data.GINDataset('MUTAG', False) 그래프 분류 데이터셋의 각 아이템은 한개의 그래프와 그 그래프의 레이블 쌍이다. 데이터 로딩 프로세스를 빠르게 하기 위해서 GraphDataLoader의 장점을 사용해 그래프들의 데이터셋을 미니-배치 단위로 iterate한다. .. code:: python from dgl.dataloading import GraphDataLoader dataloader = GraphDataLoader( dataset, batch_size=1024, drop_last=False, shuffle=True) 학습 룹은 데이터로더를 iterate하면서 모델을 업데이트하는 것일 뿐이다. .. code:: python import torch.nn.functional as F # Only an example, 7 is the input feature size model = Classifier(7, 20, 5) opt = torch.optim.Adam(model.parameters()) for epoch in range(20): for batched_graph, labels in dataloader: feats = batched_graph.ndata['attr'] logits = model(batched_graph, feats) loss = F.cross_entropy(logits, labels) opt.zero_grad() loss.backward() opt.step() `DGL's GIN example `__ 의 end-to-end 그래프 분류 예를 참고하자. 이 학습 룹은 `main.py `__ 의 `train` 함수안에 있다. 모델의 구현은 `gin.py `__ 에 있고, :class:`dgl.nn.pytorch.GINConv` (MXNet 및 Tensorflow 버전도 있음)와 같은 컴포넌트들과 graph convolution layer와 배치 normalization 등이 적용되어 있다. Heterogeneous 그래프 ~~~~~~~~~~~~~~~~~~ Heterogeneous 그래프들에 대한 그래프 분류는 homogeneous 그래프의 경우와는 약간 차이가 있다. Heterogeneous 그래프와 호환되는 graph convolution 모듈에 더해서, 리드아웃 함수에서 다른 종류의 노드들에 대한 aggregate를 해야한다. 다음 코드는 각 노트 타입에 대해서 노드 representation을 평균을 합산하는 예제이다. .. code:: python class RGCN(nn.Module): def __init__(self, in_feats, hid_feats, out_feats, rel_names): super().__init__() self.conv1 = dglnn.HeteroGraphConv({ rel: dglnn.GraphConv(in_feats, hid_feats) for rel in rel_names}, aggregate='sum') self.conv2 = dglnn.HeteroGraphConv({ rel: dglnn.GraphConv(hid_feats, out_feats) for rel in rel_names}, aggregate='sum') def forward(self, graph, inputs): # inputs is features of nodes h = self.conv1(graph, inputs) h = {k: F.relu(v) for k, v in h.items()} h = self.conv2(graph, h) return h class HeteroClassifier(nn.Module): def __init__(self, in_dim, hidden_dim, n_classes, rel_names): super().__init__() self.rgcn = RGCN(in_dim, hidden_dim, hidden_dim, rel_names) self.classify = nn.Linear(hidden_dim, n_classes) def forward(self, g): h = g.ndata['feat'] h = self.rgcn(g, h) with g.local_scope(): g.ndata['h'] = h # Calculate graph representation by average readout. hg = 0 for ntype in g.ntypes: hg = hg + dgl.mean_nodes(g, 'h', ntype=ntype) return self.classify(hg) 나머지 코드는 homegeneous 그래프의 경우와 다르지 않다. .. code:: python # etypes is the list of edge types as strings. model = HeteroClassifier(10, 20, 5, etypes) opt = torch.optim.Adam(model.parameters()) for epoch in range(20): for batched_graph, labels in dataloader: logits = model(batched_graph) loss = F.cross_entropy(logits, labels) opt.zero_grad() loss.backward() opt.step() ================================================ FILE: docs/source/guide_ko/training-link.rst ================================================ .. _guide_ko-training-link-prediction: 5.3 링크 예측 ----------- :ref:`(English Version) ` 어떤 두 노드들 사이에 에지가 존재하는지 아닌지를 예측하고 싶은 경우가 있고, 이를 *링크 예측 과제* 라고 한다. 개요 ~~~~~~~~~ GNN 기반의 링크 예측 모델은 두 노드 :math:`u` 와 :math:`v` 간의 연결 가능도(likelihood)를 :math:`\boldsymbol{h}_u^{(L)}` 의 함수로 표현하는데, 여기서 :math:`\boldsymbol{h}_v^{(L)}` 는 멀티-레이어 GNN을 통해서 계단된 노드 representation이다. .. math:: y_{u,v} = \phi(\boldsymbol{h}_u^{(L)}, \boldsymbol{h}_v^{(L)}) :math:`y_{u,v}` 는 노드 :math:`u` 와 :math:`v` 사이의 점수를 뜻 한다. 링크 예측 모델을 학습시키는 것은 에지로 연결된 두 노드들에 대한 점수와 임의의 두 노드 쌍에 대한 점수를 비교하면서 이뤄진다. 예를 들어, 노드 :math:`u` 와 :math:`v` 사이에 에지가 존재하는 경우 노드 :math:`u` 와 :math:`v` 사이의 점수가 노드 :math:`u` 와 임의의 *노이즈* 분표 :math:`v' \sim P_n(v)`에 따라 샘플링된 노드 :math:`v'` 간의 점수보다 높도록 하는 학습이다. 위를 달성하기 위한 다양한 loss 함수가 있다. 몇 가지 예는 다음과 같다: - Cross-entropy loss: :math:`\mathcal{L} = - \log \sigma (y_{u,v}) - \sum_{v_i \sim P_n(v), i=1,\dots,k}\log \left[ 1 - \sigma (y_{u,v_i})\right]` - BPR loss: :math:`\mathcal{L} = \sum_{v_i \sim P_n(v), i=1,\dots,k} - \log \sigma (y_{u,v} - y_{u,v_i})` - Margin loss: :math:`\mathcal{L} = \sum_{v_i \sim P_n(v), i=1,\dots,k} \max(0, M - y_{u, v} + y_{u, v_i})`, 여기서 :math:`M` 은 상수 하이퍼-파라메터이다. `implicit feedback `__ 이나 `noise-contrastive estimation `__ 를 알고 있다면, 이 아이디어는 친숙할 것이다. :math:`u` 와 :math:`v` 사이의 점수를 계산하는 뉴럴 네트워크 모델은 :ref:`위에서 설명한 ` 에지 리그레션 모델과 동일하다. 다음은 dot product를 사용해서 에지들의 점수를 계산하는 예제이다. .. code:: python class DotProductPredictor(nn.Module): def forward(self, graph, h): # h contains the node representations computed from the GNN defined # in the node classification section (Section 5.1). with graph.local_scope(): graph.ndata['h'] = h graph.apply_edges(fn.u_dot_v('h', 'h', 'score')) return graph.edata['score'] 학습 룹 ~~~~~ 점수를 예측하는 모델은 그래프들에 적용되기 때문에, 네가티브 샘들은 별도의 그래프로 표현되어야 한다. 즉, 그것은 에지들이 모두 네가티브 노드들의 쌍들로만 구성된 그래프이다. 아래 코드는 네가티브 샘들로 구성된 그래프를 만드는 예제이다. 각 에지 :math:`(u,v)` 는 :math:`k` 개의 네가티브 셈플들 :math:`(u,v_i)` 을 갖는다. 여기서 :math:`v_i` 는 균등 분포에서 샘플링된다. .. code:: python def construct_negative_graph(graph, k): src, dst = graph.edges() neg_src = src.repeat_interleave(k) neg_dst = torch.randint(0, graph.num_nodes(), (len(src) * k,)) return dgl.graph((neg_src, neg_dst), num_nodes=graph.num_nodes()) 에지 점수를 예측하는 모델은 에지 분류 또는 에지 리그레션 모델과 같다. .. code:: python class Model(nn.Module): def __init__(self, in_features, hidden_features, out_features): super().__init__() self.sage = SAGE(in_features, hidden_features, out_features) self.pred = DotProductPredictor() def forward(self, g, neg_g, x): h = self.sage(g, x) return self.pred(g, h), self.pred(neg_g, h) 그런 다음, 학습 룹은 반복적으로 네가티브 그래프를 만들고 loss를 계산한다. .. code:: python def compute_loss(pos_score, neg_score): # Margin loss n_edges = pos_score.shape[0] return (1 - pos_score + neg_score.view(n_edges, -1)).clamp(min=0).mean() node_features = graph.ndata['feat'] n_features = node_features.shape[1] k = 5 model = Model(n_features, 100, 100) opt = torch.optim.Adam(model.parameters()) for epoch in range(10): negative_graph = construct_negative_graph(graph, k) pos_score, neg_score = model(graph, negative_graph, node_features) loss = compute_loss(pos_score, neg_score) opt.zero_grad() loss.backward() opt.step() print(loss.item()) 학습이 종료되면, 노드 representation은 다음과 같이 얻을 수 있다: .. code:: python node_embeddings = model.sage(graph, node_features) 노드 임베딩을 사용하는 방법은 여러가지가 있다. 몇가지 예를 들면, 다운스트림 분류기 학습, 관련된 엔터리 추천을 위한 nearest neighbor search 또는 maximum inner product search와 같은 것이 있다. Heterogeneous 그래프들 ~~~~~~~~~~~~~~~~~~~~ Heterogeneous 그래프에서의 링크 예측은 homogeneous 그래프에서의 링크 예측과 많이 다르지 않다. 다음 예제는 하나의 에지 타입에 대해서 예측을 수행한다고 가정하고 있는데, 이를 여러 에지 타입으로 확장하는 것은 쉽다. 링크 예측을 위해서 :ref:`앞에서 ` 의 ``HeteroDotProductPredictor`` 를 재활용해서 한 에지 타입에 대한 에지의 점수를 계산할 수 있다. .. code:: python class HeteroDotProductPredictor(nn.Module): def forward(self, graph, h, etype): # h contains the node representations for each node type computed from # the GNN defined in the previous section (Section 5.1). with graph.local_scope(): graph.ndata['h'] = h graph.apply_edges(fn.u_dot_v('h', 'h', 'score'), etype=etype) return graph.edges[etype].data['score'] 네가티브 샘플링을 수행하기 위해서, 링크 예측을 수행할 에지 타입에 대한 네가티브 그램프를 생성하면 된다. .. code:: python def construct_negative_graph(graph, k, etype): utype, _, vtype = etype src, dst = graph.edges(etype=etype) neg_src = src.repeat_interleave(k) neg_dst = torch.randint(0, graph.num_nodes(vtype), (len(src) * k,)) return dgl.heterograph( {etype: (neg_src, neg_dst)}, num_nodes_dict={ntype: graph.num_nodes(ntype) for ntype in graph.ntypes}) 모델을 heterogeneous 그래프들에서 에지 분류하는 모델과는 약간 다른데, 그 이유는 링크 예측을 할 때 에지 타입을 지정해야하기 때문이다. .. code:: python class Model(nn.Module): def __init__(self, in_features, hidden_features, out_features, rel_names): super().__init__() self.sage = RGCN(in_features, hidden_features, out_features, rel_names) self.pred = HeteroDotProductPredictor() def forward(self, g, neg_g, x, etype): h = self.sage(g, x) return self.pred(g, h, etype), self.pred(neg_g, h, etype) 학습 룹은 homogeneous 그래프에 대한 학습 룹과 비슷하다. .. code:: python def compute_loss(pos_score, neg_score): # Margin loss n_edges = pos_score.shape[0] return (1 - pos_score + neg_score.view(n_edges, -1)).clamp(min=0).mean() k = 5 model = Model(10, 20, 5, hetero_graph.etypes) user_feats = hetero_graph.nodes['user'].data['feature'] item_feats = hetero_graph.nodes['item'].data['feature'] node_features = {'user': user_feats, 'item': item_feats} opt = torch.optim.Adam(model.parameters()) for epoch in range(10): negative_graph = construct_negative_graph(hetero_graph, k, ('user', 'click', 'item')) pos_score, neg_score = model(hetero_graph, negative_graph, node_features, ('user', 'click', 'item')) loss = compute_loss(pos_score, neg_score) opt.zero_grad() loss.backward() opt.step() print(loss.item()) ================================================ FILE: docs/source/guide_ko/training-node.rst ================================================ .. _guide_ko-training-node-classification: 5.1 노드 분류/리그래션(Regression) -------------------------------------------------- :ref:`(English Version) ` 가장 유명하고 널리 적용되고 있는 그래프 뉴럴 네트워크 중에 하나가 노드 분류이다. 학습/검증/테스트 셋의 각 노드는 미리 정해진 카테로기들로 중에 하나를 ground truth 카테고리로 분류되어 있다. 노드 regression도 비슷하다. 학습/검증/테스트 셋의 각 노드에 ground truth 수가 할당되어 있다. 개요 ~~~~~~ 노드를 분류하기 위해서 그래프 뉴럴 네트워크는 :ref:`guide_ko-message-passing` 에서 소개한 메시지 전달 방법을 수행해서 노드 자신의 피쳐 뿐만 아니라 그 노드의 이웃 노드 및 에지의 피쳐도 함께 활용한다. 메시지 전달은 여러 회 반복해서 더 큰 범위의 이웃들에 대한 정보를 활용할 수 있다. 뉴럴 네트워크 모델 작성하기 ~~~~~~~~~~~~~~~~~~~~ DGL은 한 차례 메시지 전달을 수행하는 몇 가지 빌트인 graph convolution 모듈을 제공한다. 여기서 우리는 GraphSAGE에서 사용되는 graph convolution 모듈인 :class:`dgl.nn.pytorch.SAGEConv` (MXNet과 TensorFlow에서도 사용 가능)를 사용한다. 보통 그래프에 대한 딥러닝 모델에서는 메시지 전달이 여러 번 수행되는 멀티-레이어 그래프 뉴럴 네트워크가 필요하다. 이는 다음 코드처럼 graph convolution 모듈들을 쌓아서 구현할 수 있다. .. code:: python # Contruct a two-layer GNN model import dgl.nn as dglnn import torch.nn as nn import torch.nn.functional as F class SAGE(nn.Module): def __init__(self, in_feats, hid_feats, out_feats): super().__init__() self.conv1 = dglnn.SAGEConv( in_feats=in_feats, out_feats=hid_feats, aggregator_type='mean') self.conv2 = dglnn.SAGEConv( in_feats=hid_feats, out_feats=out_feats, aggregator_type='mean') def forward(self, graph, inputs): # inputs are features of nodes h = self.conv1(graph, inputs) h = F.relu(h) h = self.conv2(graph, h) return h 위 모델은 노드 분류 뿐만 아니라, :ref:`guide_ko-training-edge-classification` , :ref:`guide_ko-training-link-prediction` , 또는 :ref:`guide_ko-training-graph-classification` 와 같은 다른 다운스트림 테스크들을 위한 히든 노드 표현을 구하기 위해서 사용될 수 있음을 알아두자. 빌트인 graph convolution 모듈의 전체 목록은 :ref:`apinn` 를 참고하자. DGL 뉴럴 네트워크 모듈이 어떻게 동작하는지 그리고 메시지 전달을 활용한 커스텀 뉴럴 네트워크 모듈을 작성하는 방법은 :ref:`guide_ko-nn` 에 있는 예제들을 참고하자. 학습 룹(loop) ~~~~~~~~~~~ 전체 그래프를 이용한 학습은 단지 위에서 정의된 모델에 forward propagation 그리고 학습 노드들의 groud truth 레이블과 예측을 비교해서 loss를 계산하는 것으로 구성된다. 이 절은 빌드인 데이터셋 :class:`dgl.data.CiteseerGraphDataset` 을 사용해서 학습 룹을 설명한다. 노드 피처 및 레이블은 각 그래프 인스턴스에 저장되어 있고, 학습-검증-테스트 분할 또한 그래프에 이진 마스크로서 저장되어 있다. 이는 :ref:`guide_ko-data-pipeline` 에서 본것과 비슷하다. .. code:: python node_features = graph.ndata['feat'] node_labels = graph.ndata['label'] train_mask = graph.ndata['train_mask'] valid_mask = graph.ndata['val_mask'] test_mask = graph.ndata['test_mask'] n_features = node_features.shape[1] n_labels = int(node_labels.max().item() + 1) 다음은 정확도(accuracy)로 모델을 평가하는 예제 코드이다. .. code:: python def evaluate(model, graph, features, labels, mask): model.eval() with torch.no_grad(): logits = model(graph, features) logits = logits[mask] labels = labels[mask] _, indices = torch.max(logits, dim=1) correct = torch.sum(indices == labels) return correct.item() * 1.0 / len(labels) 그리고, 학습 룹은 다음과 같이 작성할 수 있다. .. code:: python model = SAGE(in_feats=n_features, hid_feats=100, out_feats=n_labels) opt = torch.optim.Adam(model.parameters()) for epoch in range(10): model.train() # forward propagation by using all nodes logits = model(graph, node_features) # compute loss loss = F.cross_entropy(logits[train_mask], node_labels[train_mask]) # compute validation accuracy acc = evaluate(model, graph, node_features, node_labels, valid_mask) # backward propagation opt.zero_grad() loss.backward() opt.step() print(loss.item()) # Save model if necessary. Omitted in this example. `GraphSAGE `__ 는 end-to-end homogeneous 그래프 노드 분류 예제를 제공한다. 해당 모델은 ``GraphSAGE`` 클래스에 구현되어 있고, 조정가능 한 레이어 수, dropout 확률들, 그리고 커스터마이징이 가능한 aggregation 함수 및 비선형성 등의 예제가 포함되어 있다. .. _guide_ko-training-rgcn-node-classification: Heterogeneous 그래프 ~~~~~~~~~~~~~~~~~~ 만약 그래프가 heterogeneous(이종)이라면, 여러분은 노드의 모든 에지 타입에 대한 이웃들로부터 메시지를 수집하기를 원할 것이다. 모든 에지 종류에 대해서 각 에지 타입별로 서로 다른 graph convolution 모듈을 사용한 메시지 전달을 수행하는 것은, :class:`dgl.nn.pytorch.HeteroGraphConv` (MXNet과 Tensorflow에서도 제공함) 모듈을 사용해서 가능하다. 아래 코드는 heterogeneous graph convolution을 정의하는데, 이는 각 에지 타입에 따라 별도의 graph convolution을 수행하고, 모든 노드 타입들에 대한 결과로서 각 에지 타입에 대한 메시지 aggregation 값들을 합하는 일을 수행한다. .. code:: python # Define a Heterograph Conv model class RGCN(nn.Module): def __init__(self, in_feats, hid_feats, out_feats, rel_names): super().__init__() self.conv1 = dglnn.HeteroGraphConv({ rel: dglnn.GraphConv(in_feats, hid_feats) for rel in rel_names}, aggregate='sum') self.conv2 = dglnn.HeteroGraphConv({ rel: dglnn.GraphConv(hid_feats, out_feats) for rel in rel_names}, aggregate='sum') def forward(self, graph, inputs): # inputs are features of nodes h = self.conv1(graph, inputs) h = {k: F.relu(v) for k, v in h.items()} h = self.conv2(graph, h) return h ``dgl.nn.HeteroGraphConv`` 는 노드 타입들과 노드 피쳐 텐서들의 사전을 입력으로 받고, 노드 타입과 노드 피쳐의 다른 사전을 리턴한다. 여기서 사용되는 데이터셋은 이미 user 및 item 피쳐를 가지고 있고, 이는 :ref:`heterogeneous graph example ` 에서 확인할 수 있다. .. code:: python model = RGCN(n_hetero_features, 20, n_user_classes, hetero_graph.etypes) user_feats = hetero_graph.nodes['user'].data['feature'] item_feats = hetero_graph.nodes['item'].data['feature'] labels = hetero_graph.nodes['user'].data['label'] train_mask = hetero_graph.nodes['user'].data['train_mask'] Forward propagation을 다음과 같이 단순하게 실행된다. .. code:: python node_features = {'user': user_feats, 'item': item_feats} h_dict = model(hetero_graph, {'user': user_feats, 'item': item_feats}) h_user = h_dict['user'] h_item = h_dict['item'] 학습 룹은 예측을 계산할 노드 representation들의 사전을 사용하는 것을 제외하고는 homogeneous graph의 학습 룹과 동일하다. 예를 들어, ``user`` 노드 만을 예측하고 싶다면, 단지 리턴된 사전에서 ``user`` 노드 임베딩을 추출하면 된다. .. code:: python opt = torch.optim.Adam(model.parameters()) for epoch in range(5): model.train() # forward propagation by using all nodes and extracting the user embeddings logits = model(hetero_graph, node_features)['user'] # compute loss loss = F.cross_entropy(logits[train_mask], labels[train_mask]) # Compute validation accuracy. Omitted in this example. # backward propagation opt.zero_grad() loss.backward() opt.step() print(loss.item()) # Save model if necessary. Omitted in the example. DGL은 `RGCN `__ 의 end-to-end 예제를 제공한다. Heterogeneous graph convolution의 정의는 `모델 구현 파일 `__ ``RelGraphConvLayer`` 에서 확인할 수 있다. ================================================ FILE: docs/source/guide_ko/training.rst ================================================ .. _guide_ko-training: 5장: 그래프 뉴럴 네트워크 학습하기 ========================== :ref:`(English Version) ` 개요 ---------------- 이 장에서는 :ref:`guide_ko-message-passing` 에서 소개한 메시지 전달 방법과 :ref:`guide_ko-nn` 에서 소개한 뉴럴 네트워크 모듈을 사용해서 작은 그래프들에 대한 노드 분류, 에지 분류, 링크 예측, 그리고 그래프 분류를 위한 그래프 뉴럴 네트워크를 학습하는 방법에 대해서 알아본다. 여기서는 그래프 및 노드 및 에지 피쳐들이 GPU 메모리에 들어갈 수 있는 크기라고 가정한다. 만약 그렇지 않다면, :ref:`guide_ko-minibatch` 를 참고하자. 그리고, 그래프와 노드/에지 피쳐들은 이미 프로세싱되어 있다고 가정한다. 만약 DGL에서 제공되는 데이터셋 또는 :ref:`guide_ko-data-pipeline` 에서 소개한 ``DGLDataset`` 과 호환되는 다른 데이터셋을 사용할 계획이라면, 다음과 같이 단일-그래프 데이터셋을 위한 그래프를 얻을 수 있다. .. code:: python import dgl dataset = dgl.data.CiteseerGraphDataset() graph = dataset[0] 주의: 이 장의 예제들은 PyTorch를 백엔드로 사용한다. .. _guide_ko-training-heterogeneous-graph-example: Heterogeneous 그래프 ~~~~~~~~~~~~~~~~~~ 때로는 heterogeneous 그래프를 사용할 경우도 있다. 노드 분류, 에지 분류, 그리고 링크 예측 과제들의 예제를 위해서 임의로 만든 heterogeneous 그래프를 사용하겠다. 임의로 생성한 heterogeneous 그래프 ``hetero_graph`` 는 다음과 같은 에지 타입을 갖는다: - ``('user', 'follow', 'user')`` - ``('user', 'followed-by', 'user')`` - ``('user', 'click', 'item')`` - ``('item', 'clicked-by', 'user')`` - ``('user', 'dislike', 'item')`` - ``('item', 'disliked-by', 'user')`` .. code:: python import numpy as np import torch n_users = 1000 n_items = 500 n_follows = 3000 n_clicks = 5000 n_dislikes = 500 n_hetero_features = 10 n_user_classes = 5 n_max_clicks = 10 follow_src = np.random.randint(0, n_users, n_follows) follow_dst = np.random.randint(0, n_users, n_follows) click_src = np.random.randint(0, n_users, n_clicks) click_dst = np.random.randint(0, n_items, n_clicks) dislike_src = np.random.randint(0, n_users, n_dislikes) dislike_dst = np.random.randint(0, n_items, n_dislikes) hetero_graph = dgl.heterograph({ ('user', 'follow', 'user'): (follow_src, follow_dst), ('user', 'followed-by', 'user'): (follow_dst, follow_src), ('user', 'click', 'item'): (click_src, click_dst), ('item', 'clicked-by', 'user'): (click_dst, click_src), ('user', 'dislike', 'item'): (dislike_src, dislike_dst), ('item', 'disliked-by', 'user'): (dislike_dst, dislike_src)}) hetero_graph.nodes['user'].data['feature'] = torch.randn(n_users, n_hetero_features) hetero_graph.nodes['item'].data['feature'] = torch.randn(n_items, n_hetero_features) hetero_graph.nodes['user'].data['label'] = torch.randint(0, n_user_classes, (n_users,)) hetero_graph.edges['click'].data['label'] = torch.randint(1, n_max_clicks, (n_clicks,)).float() # randomly generate training masks on user nodes and click edges hetero_graph.nodes['user'].data['train_mask'] = torch.zeros(n_users, dtype=torch.bool).bernoulli(0.6) hetero_graph.edges['click'].data['train_mask'] = torch.zeros(n_clicks, dtype=torch.bool).bernoulli(0.6) 로드맵 ---- 이 장은 그래프 학습 테스크를 설명하기 위해서 4개의 절로 구성되어 있다. * :ref:`guide_ko-training-node-classification` * :ref:`guide_ko-training-edge-classification` * :ref:`guide_ko-training-link-prediction` * :ref:`guide_ko-training-graph-classification` .. toctree:: :maxdepth: 1 :hidden: :glob: training-node training-edge training-link training-graph ================================================ FILE: docs/source/index.rst ================================================ .. DGL documentation master file, created by sphinx-quickstart on Fri Oct 5 14:18:01 2018. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. Welcome to Deep Graph Library Tutorials and Documentation ========================================================= .. toctree:: :maxdepth: 1 :caption: Get Started :hidden: :glob: install/index tutorials/blitz/index .. toctree:: :maxdepth: 2 :caption: Advanced Materials :hidden: :titlesonly: :glob: stochastic_training/index guide/index guide_cn/index guide_ko/index graphtransformer/index notebooks/sparse/index tutorials/cpu/index tutorials/multi/index tutorials/dist/index tutorials/models/index .. toctree:: :maxdepth: 2 :caption: API Reference :hidden: :glob: api/python/dgl api/python/dgl.data api/python/dgl.dataloading api/python/dgl.DGLGraph api/python/dgl.distributed api/python/dgl.function api/python/dgl.geometry api/python/dgl.graphbolt api/python/nn-pytorch api/python/nn.functional api/python/dgl.ops api/python/dgl.optim api/python/dgl.sampling api/python/dgl.sparse_v0 api/python/dgl.multiprocessing api/python/transforms api/python/udf .. toctree:: :maxdepth: 1 :caption: Notes :hidden: :glob: contribute developer/ffi performance .. toctree:: :maxdepth: 1 :caption: Misc :hidden: :glob: faq env_var resources Deep Graph Library (DGL) is a Python package built for easy implementation of graph neural network model family, on top of existing DL frameworks (currently supporting PyTorch, MXNet and TensorFlow). It offers a versatile control of message passing, speed optimization via auto-batching and highly tuned sparse matrix kernels, and multi-GPU/CPU training to scale to graphs of hundreds of millions of nodes and edges. Getting Started --------------- For absolute beginners, start with the :doc:`Blitz Introduction to DGL `. It covers the basic concepts of common graph machine learning tasks and a step-by-step on building Graph Neural Networks (GNNs) to solve them. For acquainted users who wish to learn more advanced usage, * `Learn DGL by examples `_. * Read the :doc:`User Guide` (:doc:`中文版链接`), which explains the concepts and usage of DGL in much more details. * Go through the tutorials for :doc:`Stochastic Training of GNNs `, which covers the basic steps for training GNNs on large graphs in mini-batches. * :doc:`Study classical papers ` on graph machine learning alongside DGL. * Search for the usage of a specific API in the :doc:`API reference manual `, which organizes all DGL APIs by their namespace. Contribution ------------- DGL is free software; you can redistribute it and/or modify it under the terms of the Apache License 2.0. We welcome contributions. Join us on `GitHub `_ and check out our :doc:`contribution guidelines `. Index ----- * :ref:`genindex` ================================================ FILE: docs/source/install/index.rst ================================================ Install and Setup ================= System requirements ------------------- DGL works with the following operating systems: * Ubuntu 20.04+ * CentOS 8+ (Although gcc 9 is needed) * RHEL 8+ * macOS X * Windows 10 DGL requires Python version 3.7, 3.8, 3.9, 3.10, 3.11. DGL supports multiple tensor libraries as backends, e.g., PyTorch, MXNet. For requirements on backends and how to select one, see :ref:`backends`. Starting at version 0.3, DGL is separated into CPU and CUDA builds. The builds share the same Python package name. If you install DGL with a CUDA 9 build after you install the CPU build, then the CPU build is overwritten. Install from Conda or Pip ------------------------- We recommend installing DGL by ``conda`` or ``pip``. Check out the instructions on the `Get Started page `_. .. note:: For Windows users: you will need to install `Visual C++ 2015 Redistributable `_. .. _install-from-source: Install from source ------------------- Download the source files from GitHub. .. code:: bash git clone --recurse-submodules https://github.com/dmlc/dgl.git (Optional) Clone the repository first, and then run the following: .. code:: bash git submodule update --init --recursive Linux ````` Install the system packages for building the shared library. For Debian and Ubuntu users, run: .. code:: bash sudo apt-get update sudo apt-get install -y build-essential python3-dev make cmake For Fedora/RHEL/CentOS users, run: .. code:: bash sudo yum install -y gcc-c++ python3-devel make cmake To create a Conda environment for CPU development, run: .. code:: bash bash script/create_dev_conda_env.sh -c To create a Conda environment for GPU development, run: .. code:: bash bash script/create_dev_conda_env.sh -g 11.7 To further configure the conda environment, run the following command for more details: .. code:: bash bash script/create_dev_conda_env.sh -h To build the shared library for CPU development, run: .. code:: bash bash script/build_dgl.sh -c To build the shared library for GPU development, run: .. code:: bash bash script/build_dgl.sh -g To further build the shared library, run the following command for more details: .. code:: bash bash script/build_dgl.sh -h Finally, install the Python binding. .. code:: bash cd python python setup.py install # Build Cython extension python setup.py build_ext --inplace macOS ````` Installation on macOS is similar to Linux. But macOS users need to install build tools like clang, GNU Make, and cmake first. These installation steps were tested on macOS X with clang 10.0.0, GNU Make 3.81, and cmake 3.13.1. Tools like clang and GNU Make are packaged in **Command Line Tools** for macOS. To install, run the following: .. code:: bash xcode-select --install To install other needed packages like cmake, we recommend first installing **Homebrew**, which is a popular package manager for macOS. To learn more, see the `Homebrew website `_. After you install Homebrew, install cmake. .. code:: bash brew install cmake Go to root directory of the DGL repository, build a shared library, and install the Python binding for DGL. .. code:: bash mkdir build cd build cmake -DUSE_OPENMP=off -DUSE_LIBXSMM=OFF .. make -j4 cd ../python python setup.py install # Build Cython extension python setup.py build_ext --inplace Windows ``````` You can build DGL with MSBuild. With `MS Build Tools `_ and `CMake on Windows `_ installed, run the following in VS2019 x64 Native tools command prompt. * CPU only build:: MD build CD build cmake -DCMAKE_CXX_FLAGS="/DDGL_EXPORTS" -DCMAKE_CONFIGURATION_TYPES="Release" -DDMLC_FORCE_SHARED_CRT=ON .. -G "Visual Studio 16 2019" msbuild dgl.sln /m CD ..\python python setup.py install * CUDA build:: MD build CD build cmake -DCMAKE_CXX_FLAGS="/DDGL_EXPORTS" -DCMAKE_CONFIGURATION_TYPES="Release" -DDMLC_FORCE_SHARED_CRT=ON -DUSE_CUDA=ON .. -G "Visual Studio 16 2019" msbuild dgl.sln /m CD ..\python python setup.py install .. _backends: Working with different backends ------------------------------- DGL supports PyTorch, MXNet and Tensorflow backends. DGL will choose the backend on the following options (high priority to low priority) * Use the ``DGLBACKEND`` environment variable: - You can use ``DGLBACKEND=[BACKEND] python gcn.py ...`` to specify the backend - Or ``export DGLBACKEND=[BACKEND]`` to set the global environment variable * Modify the ``config.json`` file under "~/.dgl": - You can use ``python -m dgl.backend.set_default_backend [BACKEND]`` to set the default backend Currently BACKEND can be chosen from mxnet, pytorch, tensorflow. PyTorch backend ``````````````` Export ``DGLBACKEND`` as ``pytorch`` to specify PyTorch backend. The required PyTorch version is 1.12.0 or later. See `pytorch.org `_ for installation instructions. MXNet backend ````````````` Export ``DGLBACKEND`` as ``mxnet`` to specify MXNet backend. The required MXNet version is 1.6 or later. See `mxnet.apache.org `_ for installation instructions. MXNet uses uint32 as the default data type for integer tensors, which only supports graph of size smaller than 2^32. To enable large graph training, *build* MXNet with ``USE_INT64_TENSOR_SIZE=1`` flag. See `this FAQ `_ for more information. MXNet 1.5 and later has an option to enable Numpy shape mode for ``NDArray`` objects, some DGL models need this mode to be enabled to run correctly. However, this mode may not compatible with pretrained model parameters with this mode disabled, e.g. pretrained models from GluonCV and GluonNLP. By setting ``DGL_MXNET_SET_NP_SHAPE``, users can switch this mode on or off. Tensorflow backend `````````````````` Export ``DGLBACKEND`` as ``tensorflow`` to specify Tensorflow backend. The required Tensorflow version is 2.3.0 or later. See `tensorflow.org `_ for installation instructions. In addition, DGL will set ``TF_FORCE_GPU_ALLOW_GROWTH`` to ``true`` to prevent Tensorflow take over the whole GPU memory: ================================================ FILE: docs/source/notebooks/sparse/gcn.nblink ================================================ { "path": "../../../../notebooks/sparse/gcn.ipynb" } ================================================ FILE: docs/source/notebooks/sparse/graph_diffusion.nblink ================================================ { "path": "../../../../notebooks/sparse/graph_diffusion.ipynb" } ================================================ FILE: docs/source/notebooks/sparse/graph_transformer.nblink ================================================ { "path": "../../../../notebooks/sparse/graph_transformer.ipynb" } ================================================ FILE: docs/source/notebooks/sparse/hgnn.nblink ================================================ { "path": "../../../../notebooks/sparse/hgnn.ipynb" } ================================================ FILE: docs/source/notebooks/sparse/index.rst ================================================ Tutorials: dgl.sparse ========================= The tutorial set cover the basic usage of DGL's sparse matrix class and operators. You can begin with "Quickstart" and "Building a Graph Convolutional Network Using Sparse Matrices". The rest of the tutorials demonstrate the usage by end-to-end examples. All the tutorials are written in Jupyter Notebook and can be played on Google Colab. .. toctree:: :maxdepth: 3 :titlesonly: quickstart.nblink gcn.nblink graph_diffusion.nblink hgnn.nblink graph_transformer.nblink ================================================ FILE: docs/source/notebooks/sparse/quickstart.nblink ================================================ { "path": "../../../../notebooks/sparse/quickstart.ipynb" } ================================================ FILE: docs/source/performance.rst ================================================ Performance Benchmarks ====================== Integrated Benchmarks --------------------- DGL continuously evaluates the speed of its core APIs, kernels as well as the training speed of the state-of-the-art GNN models. The benchmark code is available at `the main repository `_. They are triggered for every nightly-built version and the results are published to `https://asv.dgl.ai/ `_. v0.6 Benchmarks --------------- To understand the performance gain of DGL v0.6, we re-evaluated it on the v0.5 benchmarks plus some new ones for graph classification tasks against the updated baselines. The results are available in `a standalone repository `_. v0.5 Benchmarks --------------- Check out our paper `Deep Graph Library: A Graph-Centric, Highly-Performant Package for Graph Neural Networks `_. v0.4.3 Benchmarks ------------------ **Microbenchmark on speed and memory usage**: While leaving tensor and autograd functions to backend frameworks (e.g. PyTorch, MXNet, and TensorFlow), DGL aggressively optimizes storage and computation with its own kernels. Here's a comparison to another popular package -- PyTorch Geometric (PyG). The short story is that raw speed is similar, but DGL has much better memory management. +----------+--------------+-----------------+-------------------------+-------------------------+ | Dataset | Model | Accuracy | Time | Memory | | | | +------------+------------+------------+------------+ | | | | PyG | DGL | PyG | DGL | +==========+==============+=================+============+============+============+============+ | Cora | GCN | 81.31 ± 0.88 | **0.478** | 0.666 | 1.1 | 1.1 | + +--------------+-----------------+------------+------------+------------+------------+ | | GAT | 83.98 ± 0.52 | 1.608 | **1.399** | 1.2 | **1.1** | +----------+--------------+-----------------+------------+------------+------------+------------+ | CiteSeer | GCN | 70.98 ± 0.68 | **0.490** | 0.674 | 1.1 | 1.1 | + +--------------+-----------------+------------+------------+------------+------------+ | | GAT | 69.96 ± 0.53 | 1.606 | **1.399** | 1.3 | **1.1** | +----------+--------------+-----------------+------------+------------+------------+------------+ | PubMed | GCN | 79.00 ± 0.41 | **0.491** | 0.690 | 1.1 | 1.1 | + +--------------+-----------------+------------+------------+------------+------------+ | | GAT | 77.65 ± 0.32 | 1.946 | **1.393** | 1.6 | **1.1** | +----------+--------------+-----------------+------------+------------+------------+------------+ | Reddit | GCN | 93.46 ± 0.06 | OOM | **28.6** | OOM | **11.7** | +----------+--------------+-----------------+------------+------------+------------+------------+ | Reddit-S | GCN | N/A | 29.12 | **9.44** | 15.7 | **3.6** | +----------+--------------+-----------------+------------+------------+------------+------------+ Table: Training time(in seconds) for 200 epochs and memory consumption(GB) Here is another comparison of DGL on TensorFlow backend with other TF-based GNN tools (training time in seconds for one epoch): +---------+-------+--------+----------+--------------+ | Dateset | Model | DGL | GraphNet | tf_geometric | +=========+=======+========+==========+==============+ | Core | GCN | 0.0148 | 0.0152 | 0.0192 | +---------+-------+--------+----------+--------------+ | Reddit | GCN | 0.1095 | OOM | OOM | +---------+-------+--------+----------+--------------+ | PubMed | GCN | 0.0156 | 0.0553 | 0.0185 | +---------+-------+--------+----------+--------------+ | PPI | GCN | 0.09 | 0.16 | 0.21 | +---------+-------+--------+----------+--------------+ | Cora | GAT | 0.0442 | n/a | 0.058 | +---------+-------+--------+----------+--------------+ | PPI | GAT | 0.398 | n/a | 0.752 | +---------+-------+--------+----------+--------------+ High memory utilization allows DGL to push the limit of single-GPU performance, as seen in below images. .. image:: http://data.dgl.ai/asset/image/DGLvsPyG-time1.png .. image:: http://data.dgl.ai/asset/image/DGLvsPyG-time2.png **Scalability**: DGL has fully leveraged multiple GPUs in both one machine and clusters for increasing training speed, and has better performance than alternatives, as seen in below images. .. image:: http://data.dgl.ai/asset/image/one-four-GPUs.png .. image:: http://data.dgl.ai/asset/image/one-four-GPUs-DGLvsGraphVite.png .. image:: http://data.dgl.ai/asset/image/one-fourMachines.png **Further reading**: Detailed comparison of DGL and other alternatives can be found [here](https://arxiv.org/abs/1909.01315). ================================================ FILE: docs/source/resources.rst ================================================ Resources ========= * If you are new to deep learning, `Dive into Deep Learning `__ is a nice book to start with. * `Pytorch tutorials `__ * Thomas Kipf's `blog on Graph Convolutional Networks `__ ================================================ FILE: docs/source/stochastic_training/index.rst ================================================ 🆕 Stochastic Training of GNNs with GraphBolt ============================================= GraphBolt is a data loading framework for GNN with high flexibility and scalability. It is built on top of DGL and PyTorch. This tutorial introduces how to enable stochastic training of GNNs with GraphBolt. Overview ^^^^^^^ .. image:: ../_static/graphbolt_overview.jpg :width: 700 :alt: Graphbolt Overview GraphBolt integrates seamlessly with the PyTorch `datapipe `_, relying on the unified "MiniBatch" data structure to connect processing stages. It streamlines data loading and preprocessing for GNN training, validation, and testing. By default, GraphBolt provides a collection of built-in datasets and exceptionally efficient implementations of datapipes for common scenarios, which can be summarized as follows: 1. **Item Sampler:** Randomly selects a subset (nodes, edges, graphs) from the entire training set as an initial mini-batch for downstream computation. 2. **Negative Sampler:** Specially designed for link prediction tasks, it generates non-existing edges as negative examples for training. 3. **Subgraph Sampler:** Generates subgraphs based on the input nodes/edges for computation. 4. **Feature Fetcher:** Fetches related node/edge features from the dataset for the given input. By exposing the entire data loading process as a pipeline, GraphBolt provides significant flexibility and customization opportunities. Users can easily substitute any stage with their own implementations. Additionally, users can benefit from the optimized scheduling strategy for datapipes, even with customized stages. In summary, GraphBolt offers the following benefits: 1. A flexible, pipelined framework for GNN data loading and preprocessing. 2. Highly efficient canonical implementations. 3. Efficient scheduling. Scenarios ^^^^^^^ .. toctree:: :maxdepth: 1 neighbor_sampling_overview.nblink node_classification.nblink link_prediction.nblink multigpu_node_classification.nblink ondisk-dataset.rst ================================================ FILE: docs/source/stochastic_training/link_prediction.nblink ================================================ { "path": "../../../notebooks/stochastic_training/link_prediction.ipynb" } ================================================ FILE: docs/source/stochastic_training/multigpu_node_classification.nblink ================================================ { "path": "../../../notebooks/stochastic_training/multigpu_node_classification.ipynb" } ================================================ FILE: docs/source/stochastic_training/neighbor_sampling_overview.nblink ================================================ { "path": "../../../notebooks/stochastic_training/neighbor_sampling_overview.ipynb" } ================================================ FILE: docs/source/stochastic_training/node_classification.nblink ================================================ { "path": "../../../notebooks/stochastic_training/node_classification.ipynb" } ================================================ FILE: docs/source/stochastic_training/ondisk-dataset-specification.rst ================================================ .. _stochastic_training-ondisk-dataset-specification: YAML specification ================== This document describes the YAML specification of ``metadata.yaml`` file for ``OnDiskDataset``. ``metadata.yaml`` file is used to specify the dataset information, including the graph structure, feature data and tasks. .. code:: yaml dataset_name: graph: nodes: - type: num: - type: num: edges: - type: format: path: - type: format: path: feature_data: - domain: node type: name: format: in_memory: path: - domain: node type: name: format: in_memory: path: - domain: edge type: name: format: in_memory: path: - domain: edge type: name: format: in_memory: path: tasks: - name: num_classes: train_set: - type: data: - name: format: in_memory: path: - name: format: in_memory: path: validation_set: - type: data: - name: format: in_memory: path: - name: format: in_memory: path: test_set: - type: data: - name: format: in_memory: path: - name: format: in_memory: path: ``dataset_name`` --------------- The ``dataset_name`` field is used to specify the name of the dataset. It is user-defined. ``graph`` --------- The ``graph`` field is used to specify the graph structure. It has two fields: ``nodes`` and ``edges``. - ``nodes``: ``list`` The ``nodes`` field is used to specify the number of nodes for each node type. It is a list of ``node`` objects. Each ``node`` object has two fields: ``type`` and ``num``. - ``type``: ``string``, optional The ``type`` field is used to specify the node type. It is ``null`` for homogeneous graphs. For heterogeneous graphs, it is the node type. - ``num``: ``int`` The ``num`` field is used to specify the number of nodes for the node type. It is mandatory for both homogeneous graphs and heterogeneous graphs. - ``edges``: ``list`` The ``edges`` field is used to specify the edges. It is a list of ``edge`` objects. Each ``edge`` object has three fields: ``type``, ``format`` and ``path``. - ``type``: ``string``, optional The ``type`` field is used to specify the edge type. It is ``null`` for homogeneous graphs. For heterogeneous graphs, it is the edge type. - ``format``: ``string`` The ``format`` field is used to specify the format of the edge data. It can be ``csv`` or ``numpy``. If it is ``csv``, no ``index`` and ``header`` fields are needed. If it is ``numpy``, the array requires to be in shape of ``(2, num_edges)``. ``numpy`` format is recommended for large graphs. - ``path``: ``string`` The ``path`` field is used to specify the path of the edge data. It is relative to the directory of ``metadata.yaml`` file. ``feature_data`` ---------------- The ``feature_data`` field is used to specify the feature data. It is a list of ``feature`` objects. Each ``feature`` object has five canonical fields: ``domain``, ``type``, ``name``, ``format`` and ``path``. Any other fields will be passed to the ``Feature.metadata`` object. - ``domain``: ``string`` The ``domain`` field is used to specify the domain of the feature data. It can be either ``node`` or ``edge``. - ``type``: ``string``, optional The ``type`` field is used to specify the type of the feature data. It is ``null`` for homogeneous graphs. For heterogeneous graphs, it is the node or edge type. - ``name``: ``string`` The ``name`` field is used to specify the name of the feature data. It is user-defined. - ``format``: ``string`` The ``format`` field is used to specify the format of the feature data. It can be either ``numpy`` or ``torch``. - ``in_memory``: ``bool``, optional The ``in_memory`` field is used to specify whether the feature data is loaded into memory. It can be either ``true`` or ``false``. Default is ``true``. - ``path``: ``string`` The ``path`` field is used to specify the path of the feature data. It is relative to the directory of ``metadata.yaml`` file. ``tasks`` --------- The ``tasks`` field is used to specify the tasks. It is a list of ``task`` objects. Each ``task`` object has at least three fields: ``train_set``, ``validation_set``, ``test_set``. And you are free to add other fields such as ``num_classes`` and all these fields will be passed to the ``Task.metadata`` object. - ``name``: ``string``, optional The ``name`` field is used to specify the name of the task. It is user-defined. - ``num_classes``: ``int``, optional The ``num_classes`` field is used to specify the number of classes of the task. - ``train_set``: ``list`` The ``train_set`` field is used to specify the training set. It is a list of ``set`` objects. Each ``set`` object has two fields: ``type`` and ``data``. - ``type``: ``string``, optional The ``type`` field is used to specify the node/edge type of the set. It is ``null`` for homogeneous graphs. For heterogeneous graphs, it is the node or edge type. - ``data``: ``list`` The ``data`` field is used to load ``train_set``. It is a list of ``data`` objects. Each ``data`` object has four fields: ``name``, ``format``, ``in_memory`` and ``path``. - ``name``: ``string`` The ``name`` field is used to specify the name of the data. It is mandatory and used to specify the data fields of ``MiniBatch`` for sampling. It can be either ``seeds``, ``labels`` or ``indexes``. If any other name is used, it will be added into the ``MiniBatch`` data fields. - ``format``: ``string`` The ``format`` field is used to specify the format of the data. It can be either ``numpy`` or ``torch``. - ``in_memory``: ``bool``, optional The ``in_memory`` field is used to specify whether the data is loaded into memory. It can be either ``true`` or ``false``. Default is ``true``. - ``path``: ``string`` The ``path`` field is used to specify the path of the data. It is relative to the directory of ``metadata.yaml`` file. - ``validation_set``: ``list`` - ``test_set``: ``list`` The ``validation_set`` and ``test_set`` fields are used to specify the validation set and test set respectively. They are similar to the ``train_set`` field. ================================================ FILE: docs/source/stochastic_training/ondisk-dataset.rst ================================================ .. _stochastic_training-ondisk-dataset: Composing OnDiskDataset from raw data ===================================== This tutorial shows how to compose :class:`~dgl.graphbolt.OnDiskDataset` from raw data. A full specification of ``metadata.yaml`` is also provided. **GraphBolt** provides the ``OnDiskDataset`` class to help user organize plain data of graph strucutre, feature data and tasks. ``OnDiskDataset`` is also designed to efficiently handle large graphs and features that do not fit into memory by storing them on disk. .. toctree:: :maxdepth: 1 :glob: ondisk_dataset_homograph.nblink ondisk_dataset_heterograph.nblink ondisk-dataset-specification.rst ================================================ FILE: docs/source/stochastic_training/ondisk_dataset_heterograph.nblink ================================================ { "path": "../../../notebooks/stochastic_training/ondisk_dataset_heterograph.ipynb" } ================================================ FILE: docs/source/stochastic_training/ondisk_dataset_homograph.nblink ================================================ { "path": "../../../notebooks/stochastic_training/ondisk_dataset_homograph.ipynb" } ================================================ FILE: examples/README.md ================================================ # Official DGL Examples and Modules The folder contains example implementations of selected research papers related to Graph Neural Networks. Note that the examples may not work with incompatible DGL versions. * For examples working with the latest master (or the latest [nightly build](https://www.dgl.ai/pages/start.html)), check out https://github.com/dmlc/dgl/tree/master/examples. * For examples working with a certain release, check out `https://github.com/dmlc/dgl/tree//examples` (E.g., https://github.com/dmlc/dgl/tree/0.5.x/examples) To quickly locate the examples of your interest, search for the tagged keywords or use the search tool on [dgl.ai](https://www.dgl.ai/). ## 2024 - Lin et al. ARGO: An Auto-Tuning Runtime System for Scalable GNN Training on Multi-Core Processor. [Paper link](https://arxiv.org/abs/2402.03671) - Example code: [PyTorch](https://github.com/dmlc/dgl/tree/master/examples/pytorch/argo) - Tags: semi-supervised node classification ## 2023 - Zheng Wang et al. From Cluster Assumption to Graph Convolution: Graph-based Semi-Supervised Learning Revisited. [Paper link](https://arxiv.org/abs/2210.13339) - Example code: [PyTorch](../examples/pytorch/ogc) - Tags: semi-supervised node classification ## 2022 - Balin et al. Layer-Neighbor Sampling -- Defusing Neighborhood Explosion in GNNs. [Paper link](https://arxiv.org/abs/2210.13339) - Example code: [PyTorch](../examples/labor/train_lightning.py) - Tags: node classification, weighted graphs, sampling ## 2021 - Mallet et al. Learning Protein and Small Molecule binding sites in RNA molecules with 2.5D graphs. [Paper link](https://academic.oup.com/bioinformatics/article/38/5/1458/6462185?login=true) - Example code: [PyTorch](https://jwgitlab.cs.mcgill.ca/cgoliver/rnaglib) - Tags: semi-supervised node classification - Xing et al. Learning Hierarchical Graph Neural Networks for Image Clustering. - Example code: [PyTorch](../examples/pytorch/hilander) - Tags: clustering - Ivanov et al. Boost then Convolve: Gradient Boosting Meets Graph Neural Networks. [Paper link](https://openreview.net/forum?id=ebS5NUfoMKL). - Example code: [PyTorch](../examples/pytorch/bgnn) - Tags: semi-supervised node classification, tabular data, GBDT - Huang et al. Combining Label Propagation and Simple Models Out-performs Graph Neural Networks. [Paper link](https://arxiv.org/abs/2010.13993). - Example code: [PyTorch](../examples/pytorch/correct_and_smooth) - Tags: efficiency, node classification, label propagation - Zhao et al. Point Transformer. [Paper link](http://arxiv.org/abs/2012.09164). - Example code: [PyTorch](../examples/pytorch/pointcloud/point_transformer) - Tags: point cloud classification, point cloud part-segmentation - Guo et al. PCT: Point cloud transformer. [Paper link](http://arxiv.org/abs/2012.09688). - Example code: [PyTorch](../examples/pytorch/pointcloud/pct) - Tags: point cloud classification, point cloud part-segmentation - Brody et al. How Attentive are Graph Attention Networks? [Paper link](https://arxiv.org/abs/2105.14491). - Example code: [PyTorch](../examples/pytorch/gatv2) - Tags: graph attention, gat, gatv2, attention - Thakoor et al. Large-Scale Representation Learning on Graphs via Bootstrapping. [Paper link](https://arxiv.org/abs/2102.06514). - Example code: [PyTorch](../examples/pytorch/bgrl) - Tags: contrastive learning for node classification. - Bouritsas et al. Improving Graph Neural Network Expressivity via Subgraph Isomorphism Counting. [Paper link](https://arxiv.org/abs/2006.09252). - Example code: [PyTorch](../examples/pytorch/ogb/directional_GSN) - Tags: subgraph isomorphism counting, graph classification. - Song et al. Network In Graph Neural Network. [Paper link](https://arxiv.org/abs/2111.11638). - Example code: [PyTorch](../examples/pytorch/ogb/ngnn) - Tags: model-agnostic methodology, link prediction, open graph benchmark. - Qin et al. BiPointNet: Binary Neural Network for Point Clouds. [Paper link](https://openreview.net/forum?id=9QLRCVysdlO) - Example code: [PyTorch](../examples/pytorch/pointcloud/bipointnet) - Tags: point cloud classification, network binarization. ## 2020 - Wagh et al. EEG-GCNN: Augmenting Electroencephalogram-based Neurological Disease Diagnosis using a Domain-guided Graph Convolutional Neural Network. [Paper link](http://proceedings.mlr.press/v136/wagh20a.html). - Example code: [PyTorch](../examples/pytorch/eeg-gcnn) - Tags: graph classification, eeg representation learning, brain activity, graph convolution, neurological disease classification, large dataset, edge weights, node features, fully-connected graph, graph neural network - Wang et al. Network Embedding with Completely-imbalanced Labels. [Paper link](https://ieeexplore.ieee.org/document/8979355). - Example code: [PyTorch](../examples/pytorch/rect) - Tags: node classification, network embedding, completely-imbalanced labels - Hassani and Khasahmadi. Contrastive Multi-View Representation Learning on Graphs. [Paper link](https://arxiv.org/abs/2006.05582). - Example code: [PyTorch](../examples/pytorch/mvgrl) - Tags: graph diffusion, self-supervised learning - Zhu et al. Deep Graph Contrastive Representation Learning. [Paper link](https://arxiv.org/abs/2006.04131). - Example code: [PyTorch](../examples/pytorch/grace) - Tags: contrastive learning for node classification. - Feng et al. Graph Random Neural Network for Semi-Supervised Learning on Graphs. [Paper link](https://arxiv.org/abs/2005.11079). - Example code: [PyTorch](../examples/pytorch/grand) - Tags: semi-supervised node classification, simplifying graph convolution, data augmentation - Hu et al. Heterogeneous Graph Transformer. [Paper link](https://arxiv.org/abs/2003.01332). - Example code: [PyTorch](../examples/pytorch/hgt) - Tags: dynamic heterogeneous graph, large-scale, node classification, link prediction - Chen. Graph Convolutional Networks for Graphs with Multi-Dimensionally Weighted Edges. [Paper link](https://cims.nyu.edu/~chenzh/files/GCN_with_edge_weights.pdf). - Example code: [PyTorch on ogbn-proteins](../examples/pytorch/ogb/ogbn-proteins) - Tags: node classification, weighted graphs, OGB - Frasca et al. SIGN: Scalable Inception Graph Neural Networks. [Paper link](https://arxiv.org/abs/2004.11198). - Example code: [PyTorch on ogbn-arxiv/products/mag](../examples/pytorch/ogb/sign), [PyTorch](../examples/pytorch/sign) - Tags: node classification, OGB, large-scale, heterogeneous graph - Hu et al. Strategies for Pre-training Graph Neural Networks. [Paper link](https://arxiv.org/abs/1905.12265). - Example code: [Molecule embedding](https://github.com/awslabs/dgl-lifesci/tree/master/examples/molecule_embeddings), [PyTorch for custom data](https://github.com/awslabs/dgl-lifesci/tree/master/examples/property_prediction/csv_data_configuration) - Tags: molecules, graph classification, unsupervised learning, self-supervised learning, molecular property prediction - Marc Brockschmidt. GNN-FiLM: Graph Neural Networks with Feature-wise Linear Modulation. [Paper link](https://arxiv.org/abs/1906.12192). - Example code: [PyTorch](../examples/pytorch/GNN-FiLM) - Tags: multi-relational graphs, hypernetworks, GNN architectures - Li, Maosen, et al. Graph Cross Networks with Vertex Infomax Pooling. [Paper link](https://arxiv.org/abs/2010.01804). - Example code: [PyTorch](../examples/pytorch/gxn) - Tags: pooling, graph classification - Liu et al. Towards Deeper Graph Neural Networks. [Paper link](https://arxiv.org/abs/2007.09296). - Example code: [PyTorch](../examples/pytorch/dagnn) - Tags: over-smoothing, node classification - Klicpera et al. Directional Message Passing for Molecular Graphs. [Paper link](https://arxiv.org/abs/2003.03123). - Example code: [PyTorch](../examples/pytorch/dimenet) - Tags: molecules, molecular property prediction, quantum chemistry - Rossi et al. Temporal Graph Networks For Deep Learning on Dynamic Graphs. [Paper link](https://arxiv.org/abs/2006.10637). - Example code: [Pytorch](../examples/pytorch/tgn) - Tags: temporal, node classification - Vashishth, Shikhar, et al. Composition-based Multi-Relational Graph Convolutional Networks. [Paper link](https://arxiv.org/abs/1911.03082). - Example code: [PyTorch](../examples/pytorch/compGCN) - Tags: multi-relational graphs, graph neural network - Li et al. DeeperGCN: All You Need to Train Deeper GCNs. [Paper link](https://arxiv.org/abs/2006.07739). - Example code: [PyTorch](../examples/pytorch/deepergcn) - Tags: over-smoothing, deeper gnn, OGB - Bi, Ye, et al. A Heterogeneous Information Network based Cross DomainInsurance Recommendation System for Cold Start Users. [Paper link](https://arxiv.org/abs/2007.15293). - Example code: [Pytorch](../examples/pytorch/TAHIN) - Tags: cross-domain recommendation, graph neural network - Fu X, Zhang J, Meng Z, et al. MAGNN: metapath aggregated graph neural network for heterogeneous graph embedding. [Paper link](https://dl.acm.org/doi/abs/10.1145/3366423.3380297). - Example code: [OpenHGNN](https://github.com/BUPT-GAMMA/OpenHGNN/tree/main/openhgnn/output/MAGNN) - Tags: Heterogeneous graph, Graph neural network, Graph embedding - Zhao J, Wang X, et al. Network Schema Preserving Heterogeneous Information Network Embedding. [Paper link](https://www.ijcai.org/Proceedings/2020/0190.pdf). - Example code: [OpenHGNN](https://github.com/BUPT-GAMMA/OpenHGNN/tree/main/openhgnn/output/NSHE) - Tags: Heterogeneous graph, Graph neural network, Graph embedding, Network Schema - Dou Y, Liu Z, et al. Enhancing Graph Neural Network-based Fraud Detectors against Camouflaged Fraudsters. [Paper link](https://arxiv.org/abs/2008.08692). - Example code: [PyTorch](../examples/pytorch/caregnn) - Tags: Multi-relational graph, Graph neural network, Fraud detection, Reinforcement learning, Node classification - Zhang et al. Labeling Trick: A Theory of Using Graph Neural Networks for Multi-Node Representation Learning. [Paper link](https://arxiv.org/pdf/2010.16103.pdf). - Example code: [PyTorch](../examples/pytorch/ogb/seal_ogbl) - Tags: link prediction, labeling trick, OGB ## 2019 - Sun et al. InfoGraph: Unsupervised and Semi-supervised Graph-Level Representation Learning via Mutual Information Maximization. [Paper link](https://arxiv.org/abs/1908.01000). - Example code: [PyTorch](../examples/pytorch/infograph) - Tags: semi-supervised graph regression, unsupervised graph classification - Bianchi et al. Graph Neural Networks with Convolutional ARMA Filters. [Paper link](https://arxiv.org/abs/1901.01343). - Example code: [PyTorch](../examples/pytorch/arma) - Tags: node classification - Klicpera et al. Predict then Propagate: Graph Neural Networks meet Personalized PageRank. [Paper link](https://arxiv.org/abs/1810.05997). - Example code: [PyTorch](../examples/pytorch/appnp), [MXNet](../examples/mxnet/appnp) - Tags: node classification - Chiang et al. Cluster-GCN: An Efficient Algorithm for Training Deep and Large Graph Convolutional Networks. [Paper link](https://arxiv.org/abs/1905.07953). - Example code: [PyTorch](../examples/pytorch/cluster_gcn), [PyTorch-based GraphSAGE variant on OGB](../examples/pytorch/ogb/cluster-sage), [PyTorch-based GAT variant on OGB](../examples/pytorch/ogb/cluster-gat) - Tags: graph partition, node classification, large-scale, OGB, sampling - Veličković et al. Deep Graph Infomax. [Paper link](https://arxiv.org/abs/1809.10341). - Example code: [PyTorch](../examples/pytorch/dgi), [TensorFlow](../examples/tensorflow/dgi) - Tags: unsupervised learning, node classification - Ying et al. Hierarchical Graph Representation Learning with Differentiable Pooling. [Paper link](https://arxiv.org/abs/1806.08804). - Example code: [PyTorch](../examples/pytorch/diffpool) - Tags: pooling, graph classification, graph coarsening - Cen et al. Representation Learning for Attributed Multiplex Heterogeneous Network. [Paper link](https://arxiv.org/abs/1905.01669v2). - Example code: [PyTorch](../examples/pytorch/GATNE-T) - Tags: heterogeneous graph, link prediction, large-scale - Xu et al. How Powerful are Graph Neural Networks? [Paper link](https://arxiv.org/abs/1810.00826). - Example code: [PyTorch on graph classification](../examples/pytorch/gin), [PyTorch on node classification](../examples/pytorch/model_zoo/citation_network), [PyTorch on ogbg-ppa](https://github.com/awslabs/dgl-lifesci/tree/master/examples/property_prediction/ogbg_ppa), [MXNet](../examples/mxnet/gin) - Tags: graph classification, node classification, OGB - Koncel-Kedziorski et al. Text Generation from Knowledge Graphs with Graph Transformers. [Paper link](https://arxiv.org/abs/1904.02342). - Example code: [PyTorch](../examples/pytorch/graphwriter) - Tags: knowledge graph, text generation - Wang et al. Heterogeneous Graph Attention Network. [Paper link](https://arxiv.org/abs/1903.07293). - Example code: [PyTorch](../examples/pytorch/han), [OpenHGNN](https://github.com/BUPT-GAMMA/OpenHGNN/tree/main/openhgnn/output/HAN) - Tags: heterogeneous graph, node classification - Chen et al. Supervised Community Detection with Line Graph Neural Networks. [Paper link](https://arxiv.org/abs/1705.08415). - Example code: [PyTorch](../examples/pytorch/line_graph) - Tags: line graph, community detection - Wu et al. Simplifying Graph Convolutional Networks. [Paper link](https://arxiv.org/abs/1902.07153). - Example code: [PyTorch](../examples/pytorch/sgc), [MXNet](../examples/mxnet/sgc) - Tags: node classification - Wang et al. Dynamic Graph CNN for Learning on Point Clouds. [Paper link](https://arxiv.org/abs/1801.07829). - Example code: [PyTorch](../examples/pytorch/pointcloud/edgeconv) - Tags: point cloud classification - Zhang et al. Graphical Contrastive Losses for Scene Graph Parsing. [Paper link](https://arxiv.org/abs/1903.02728). - Example code: [MXNet](../examples/mxnet/scenegraph) - Tags: scene graph extraction - Lee et al. Set Transformer: A Framework for Attention-based Permutation-Invariant Neural Networks. [Paper link](https://arxiv.org/abs/1810.00825). - Pooling module: [PyTorch encoder](https://docs.dgl.ai/api/python/nn.pytorch.html#settransformerencoder), [PyTorch decoder](https://docs.dgl.ai/api/python/nn.pytorch.html#settransformerdecoder) - Tags: graph classification - Coley et al. A graph-convolutional neural network model for the prediction of chemical reactivity. [Paper link](https://pubs.rsc.org/en/content/articlelanding/2019/sc/c8sc04228d#!divAbstract). - Example code: [PyTorch](https://github.com/awslabs/dgl-lifesci/tree/master/examples/reaction_prediction/rexgen_direct) - Tags: molecules, reaction prediction - Lu et al. Molecular Property Prediction: A Multilevel Quantum Interactions Modeling Perspective. [Paper link](https://arxiv.org/abs/1906.11081). - Example code: [PyTorch](https://github.com/awslabs/dgl-lifesci/tree/master/examples/property_prediction/alchemy) - Tags: molecules, quantum chemistry - Xiong et al. Pushing the Boundaries of Molecular Representation for Drug Discovery with the Graph Attention Mechanism. [Paper link](https://pubs.acs.org/doi/10.1021/acs.jmedchem.9b00959). - Example code: [PyTorch (with attention visualization)](https://github.com/awslabs/dgl-lifesci/tree/master/examples/property_prediction/pubchem_aromaticity), [PyTorch for custom data](https://github.com/awslabs/dgl-lifesci/tree/master/examples/property_prediction/csv_data_configuration) - Tags: molecules, molecular property prediction - Sun et al. RotatE: Knowledge Graph Embedding by Relational Rotation in Complex Space. [Paper link](https://arxiv.org/pdf/1902.10197.pdf). - Example code: [PyTorch](https://github.com/awslabs/dgl-ke/tree/master/examples), [PyTorch for custom data](https://aws-dglke.readthedocs.io/en/latest/commands.html) - Tags: knowledge graph - Abu-El-Haija et al. MixHop: Higher-Order Graph Convolutional Architectures via Sparsified Neighborhood Mixing. [Paper link](https://arxiv.org/abs/1905.00067). - Example code: [PyTorch](../examples/pytorch/mixhop) - Tags: node classification - Lee, Junhyun, et al. Self-Attention Graph Pooling. [Paper link](https://arxiv.org/abs/1904.08082). - Example code: [PyTorch](../examples/pytorch/sagpool) - Tags: graph classification, pooling - Zhang, Zhen, et al. Hierarchical Graph Pooling with Structure Learning. [Paper link](https://arxiv.org/abs/1911.05954). - Example code: [PyTorch](../examples/pytorch/hgp_sl) - Tags: graph classification, pooling - Gao, Hongyang, et al. Graph Representation Learning via Hard and Channel-Wise Attention Networks [Paper link](https://arxiv.org/abs/1907.04652). - Example code: [PyTorch](../examples/pytorch/hardgat) - Tags: node classification, graph attention - Wang, Xiang, et al. Neural Graph Collaborative Filtering. [Paper link](https://arxiv.org/abs/1905.08108). - Example code: [PyTorch](../examples/pytorch/NGCF) - Tags: Collaborative Filtering, recommender system, Graph Neural Network - Ying, Rex, et al. GNNExplainer: Generating Explanations for Graph Neural Networks. [Paper link](https://arxiv.org/abs/1903.03894). - Example code: [PyTorch](../examples/pytorch/gnn_explainer) - Tags: Graph Neural Network, Explainability - Zhang C, Song D, et al. Heterogeneous graph neural network. [Paper link](https://dl.acm.org/doi/abs/10.1145/3292500.3330961). - Example code: [OpenHGNN](https://github.com/BUPT-GAMMA/OpenHGNN/tree/main/openhgnn/output/HetGNN) - Tags: Heterogeneous graph, Graph neural network, Graph embedding - Yun S, Jeong M, et al. Graph transformer networks. [Paper link](https://arxiv.org/abs/1911.06455). - Example code: [OpenHGNN](https://github.com/BUPT-GAMMA/OpenHGNN/tree/main/openhgnn/output/GTN) - Tags: Heterogeneous graph, Graph neural network, Graph structure - Li A, Qin Z, et al. Spam Review Detection with Graph Convolutional Networks. [Paper link](https://arxiv.org/abs/1908.10679). - Example code: [PyTorch](../examples/pytorch/gas) - Tags: Fraud detection, Heterogeneous graph, Edge classification, Graph attention - Liu Z, et al. Geniepath: Graph neural networks with adaptive receptive paths. [Paper link](https://arxiv.org/abs/1802.00910). - Example code: [PyTorch](../examples/pytorch/geniepath) - Tags: Fraud detection, Node classification, Graph attention, LSTM, Adaptive receptive fields - You J, et al. Position-aware graph neural networks. [Paper link](https://arxiv.org/abs/1906.04817). - Example code: [PyTorch](../examples/pytorch/P-GNN) - Tags: Positional encoding, Link prediction, Link-pair prediction ## 2018 - Li et al. Learning Deep Generative Models of Graphs. [Paper link](https://arxiv.org/abs/1803.03324). - Example code: [PyTorch example for cycles](../examples/pytorch/dgmg), [PyTorch example for molecules](https://github.com/awslabs/dgl-lifesci/tree/master/examples/generative_models/dgmg) - Tags: generative models, autoregressive models, molecules - Veličković et al. Graph Attention Networks. [Paper link](https://arxiv.org/abs/1710.10903). - Example code: [PyTorch](../examples/pytorch/gat), [PyTorch on ogbn-arxiv](../examples/pytorch/ogb/ogbn-arxiv), [PyTorch on ogbn-products](../examples/pytorch/ogb/ogbn-products), [TensorFlow](../examples/tensorflow/gat), [MXNet](../examples/mxnet/gat) - Tags: node classification, OGB - Jin et al. Junction Tree Variational Autoencoder for Molecular Graph Generation. [Paper link](https://arxiv.org/abs/1802.04364). - Example code: [PyTorch](../examples/pytorch/jtnn) - Tags: generative models, molecules, VAE - Thekumparampil et al. Attention-based Graph Neural Network for Semi-supervised Learning. [Paper link](https://arxiv.org/abs/1803.03735). - Example code: [PyTorch](../examples/pytorch/model_zoo/citation_network) - Tags: node classification - Ying et al. Graph Convolutional Neural Networks for Web-Scale Recommender Systems. [Paper link](https://arxiv.org/abs/1806.01973). - Example code: [PyTorch](../examples/pytorch/pinsage) - Tags: recommender system, large-scale, sampling - Berg Palm et al. Recurrent Relational Networks. [Paper link](https://arxiv.org/abs/1711.08028). - Example code: [PyTorch](../examples/pytorch/rrn) - Tags: sudoku solving - Yu et al. Spatio-Temporal Graph Convolutional Networks: A Deep Learning Framework for Traffic Forecasting. [Paper link](https://arxiv.org/abs/1709.04875v4). - Example code: [PyTorch](../examples/pytorch/stgcn_wave) - Tags: spatio-temporal, traffic forecasting - Zhang et al. An End-to-End Deep Learning Architecture for Graph Classification. [Paper link](https://www.cse.wustl.edu/~ychen/public/DGCNN.pdf). - Pooling module: [PyTorch](https://docs.dgl.ai/api/python/nn.pytorch.html#sortpooling), [TensorFlow](https://docs.dgl.ai/api/python/nn.tensorflow.html#sortpooling), [MXNet](https://docs.dgl.ai/api/python/nn.mxnet.html#sortpooling) - Tags: graph classification - Zhang et al. Link Prediction Based on Graph Neural Networks. [Paper link](https://papers.nips.cc/paper/2018/file/53f0d7c537d99b3824f0f99d62ea2428-Paper.pdf). - Example code: [PyTorch](../examples/pytorch/seal) - Tags: link prediction, sampling - Xu et al. Representation Learning on Graphs with Jumping Knowledge Networks. [Paper link](https://arxiv.org/abs/1806.03536). - Example code: [PyTorch](../examples/pytorch/jknet) - Tags: message passing, neighborhood - Zhang et al. GaAN: Gated Attention Networks for Learning on Large and Spatiotemporal Graphs. [Paper link](https://arxiv.org/abs/1803.07294). - Example code: [pytorch](../examples/pytorch/dtgrnn) - Tags: Static discrete temporal graph, traffic forecasting - Feng et al. Hypergraph Neural Networks. [Paper link](https://arxiv.org/abs/1809.09401). - Example code: [pytorch](../examples/sparse/hgnn) - Tags: hypergraph ## 2017 - Kipf and Welling. Semi-Supervised Classification with Graph Convolutional Networks. [Paper link](https://arxiv.org/abs/1609.02907). - Example code: [PyTorch](../examples/pytorch/gcn), [PyTorch on ogbn-arxiv](../examples/pytorch/ogb/ogbn-arxiv), [PyTorch on ogbl-ppa](https://github.com/awslabs/dgl-lifesci/tree/master/examples/link_prediction/ogbl-ppa), [PyTorch on ogbg-ppa](https://github.com/awslabs/dgl-lifesci/tree/master/examples/property_prediction/ogbg_ppa), [TensorFlow](../examples/tensorflow/gcn), [MXNet](../examples/mxnet/gcn) - Tags: node classification, link prediction, graph classification, OGB - Sabour et al. Dynamic Routing Between Capsules. [Paper link](https://arxiv.org/abs/1710.09829). - Example code: [PyTorch](../examples/pytorch/capsule) - Tags: image classification - van den Berg et al. Graph Convolutional Matrix Completion. [Paper link](https://arxiv.org/abs/1706.02263). - Example code: [PyTorch](../examples/pytorch/gcmc) - Tags: matrix completion, recommender system, link prediction, bipartite graphs - Hamilton et al. Inductive Representation Learning on Large Graphs. [Paper link](https://cs.stanford.edu/people/jure/pubs/graphsage-nips17.pdf). - Example code: [PyTorch](../examples/pytorch/graphsage), [PyTorch on ogbn-products](../examples/pytorch/ogb/ogbn-products), [PyTorch on ogbn-mag](../examples/pytorch/ogb/ogbn-mag), [PyTorch on ogbl-ppa](https://github.com/awslabs/dgl-lifesci/tree/master/examples/link_prediction/ogbl-ppa), [MXNet](../examples/mxnet/graphsage) - Tags: node classification, sampling, unsupervised learning, link prediction, OGB - Dong et al. metapath2vec: Scalable Representation Learning for Heterogeneous Networks. [Paper link](https://dl.acm.org/doi/10.1145/3097983.3098036). - Example code: [PyTorch](../examples/pytorch/metapath2vec) - Tags: heterogeneous graph, network embedding, large-scale, node classification - Du et al. Topology Adaptive Graph Convolutional Networks. [Paper link](https://arxiv.org/abs/1710.10370). - Example code: [PyTorch](../examples/pytorch/tagcn), [MXNet](../examples/mxnet/tagcn) - Tags: node classification - Qi et al. PointNet: Deep Learning on Point Sets for 3D Classification and Segmentation. [Paper link](https://arxiv.org/abs/1612.00593). - Example code: [PyTorch](../examples/pytorch/pointcloud/pointnet) - Tags: point cloud classification, point cloud part-segmentation - Qi et al. PointNet++: Deep Hierarchical Feature Learning on Point Sets in a Metric Space. [Paper link](https://arxiv.org/abs/1706.02413). - Example code: [PyTorch](../examples/pytorch/pointcloud/pointnet) - Tags: point cloud classification - Schlichtkrull. Modeling Relational Data with Graph Convolutional Networks. [Paper link](https://arxiv.org/abs/1703.06103). - Example code: [PyTorch example using homogeneous DGLGraphs](../examples/pytorch/rgcn), [PyTorch](../examples/pytorch/rgcn-hetero), [TensorFlow](../examples/tensorflow/rgcn), [MXNet](../examples/mxnet/rgcn) - Tags: node classification, link prediction, heterogeneous graph, sampling - Vaswani et al. Attention Is All You Need. [Paper link](https://arxiv.org/abs/1706.03762). - Example code: [PyTorch](../examples/pytorch/transformer) - Tags: machine translation - Gilmer et al. Neural Message Passing for Quantum Chemistry. [Paper link](https://arxiv.org/abs/1704.01212). - Example code: [PyTorch](https://github.com/awslabs/dgl-lifesci/tree/master/examples/property_prediction/alchemy), [PyTorch for custom data](https://github.com/awslabs/dgl-lifesci/tree/master/examples/property_prediction/csv_data_configuration) - Tags: molecules, quantum chemistry - Gomes et al. Atomic Convolutional Networks for Predicting Protein-Ligand Binding Affinity. [Paper link](https://arxiv.org/abs/1703.10603). - Example code: [PyTorch](https://github.com/awslabs/dgl-lifesci/tree/master/examples/binding_affinity_prediction) - Tags: binding affinity prediction, molecules, proteins - Schütt et al. SchNet: A continuous-filter convolutional neural network for modeling quantum interactions. [Paper link](https://arxiv.org/abs/1706.08566). - Example code: [PyTorch](https://github.com/awslabs/dgl-lifesci/tree/master/examples/property_prediction/alchemy) - Tags: molecules, quantum chemistry - Li et al. Diffusion Convolutional Recurrent Neural Network: Data-Driven Traffic Forcasting. [Paper link](https://arxiv.org/abs/1707.01926). - Example code: [Pytorch](../examples/pytorch/dtgrnn) - Tags: Static discrete temporal graph, traffic forecasting ## 2016 - Li et al. Gated Graph Sequence Neural Networks. [Paper link](https://arxiv.org/abs/1511.05493). - Example code: [PyTorch](../examples/pytorch/ggnn) - Tags: question answering - Defferrard et al. Convolutional Neural Networks on Graphs with Fast Localized Spectral Filtering. [Paper link](https://arxiv.org/abs/1606.09375). - Example code: [PyTorch on image classification](../examples/pytorch/model_zoo/geometric), [PyTorch on node classification](../examples/pytorch/model_zoo/citation_network) - Tags: image classification, graph classification, node classification - Monti et al. Geometric deep learning on graphs and manifolds using mixture model CNNs. [Paper link](https://arxiv.org/abs/1611.08402). - Example code: [PyTorch on image classification](../examples/pytorch/model_zoo/geometric), [PyTorch on node classification](../examples/pytorch/monet), [MXNet on node classification](../examples/mxnet/monet) - Tags: image classification, graph classification, node classification - Kearnes et al. Molecular Graph Convolutions: Moving Beyond Fingerprints. [Paper link](https://arxiv.org/abs/1603.00856). - Example code: [PyTorch](https://github.com/awslabs/dgl-lifesci/tree/master/examples/property_prediction/moleculenet), [PyTorch for custom data](https://github.com/awslabs/dgl-lifesci/tree/master/examples/property_prediction/csv_data_configuration) - Tags: molecular property prediction - Trouillon et al. Complex Embeddings for Simple Link Prediction. [Paper link](http://proceedings.mlr.press/v48/trouillon16.pdf). - Example code: [PyTorch](https://github.com/awslabs/dgl-ke/tree/master/examples), [PyTorch for custom data](https://aws-dglke.readthedocs.io/en/latest/commands.html) - Tags: knowledge graph - Thomas et al. Variational Graph Auto-Encoders. [Paper link](https://arxiv.org/abs/1611.07308). - Example code: [PyTorch](../examples/pytorch/vgae) - Tags: link prediction ## 2015 - Tang et al. LINE: Large-scale Information Network Embedding. [Paper link](https://arxiv.org/abs/1503.03578). - Example code: [PyTorch on OGB](../examples/pytorch/ogb/line) - Tags: network embedding, transductive learning, OGB, link prediction - Sheng Tai et al. Improved Semantic Representations From Tree-Structured Long Short-Term Memory Networks. [Paper link](https://arxiv.org/abs/1503.00075). - Example code: [PyTorch](../examples/pytorch/tree_lstm), [MXNet](../examples/mxnet/tree_lstm) - Tags: sentiment classification - Vinyals et al. Order Matters: Sequence to sequence for sets. [Paper link](https://arxiv.org/abs/1511.06391). - Pooling module: [PyTorch](https://docs.dgl.ai/api/python/nn.pytorch.html#set2set), [MXNet](https://docs.dgl.ai/api/python/nn.mxnet.html#set2set) - Tags: graph classification - Lin et al. Learning Entity and Relation Embeddings for Knowledge Graph Completion. [Paper link](https://www.aaai.org/ocs/index.php/AAAI/AAAI15/paper/viewPaper/9571). - Example code: [PyTorch](https://github.com/awslabs/dgl-ke/tree/master/examples), [PyTorch for custom data](https://aws-dglke.readthedocs.io/en/latest/commands.html) - Tags: knowledge graph - Yang et al. Embedding Entities and Relations for Learning and Inference in Knowledge Bases. [Paper link](https://arxiv.org/abs/1412.6575). - Example code: [PyTorch](https://github.com/awslabs/dgl-ke/tree/master/examples), [PyTorch for custom data](https://aws-dglke.readthedocs.io/en/latest/commands.html) - Tags: knowledge graph - Duvenaud et al. Convolutional Networks on Graphs for Learning Molecular Fingerprints. [Paper link](https://arxiv.org/abs/1509.09292). - Example code: [PyTorch](https://github.com/awslabs/dgl-lifesci/tree/master/examples/property_prediction/moleculenet), [PyTorch for custom data](https://github.com/awslabs/dgl-lifesci/tree/master/examples/property_prediction/csv_data_configuration) - Tags: molecules, molecular property prediction ## 2014 - Perozzi et al. DeepWalk: Online Learning of Social Representations. [Paper link](https://arxiv.org/abs/1403.6652). - Example code: [PyTorch on OGB](../examples/pytorch/ogb/deepwalk) - Tags: network embedding, transductive learning, OGB, link prediction - Fischer et al. A Hausdorff Heuristic for Efficient Computation of Graph Edit Distance. [Paper link](https://link.springer.com/chapter/10.1007/978-3-662-44415-3_9). - Example code: [PyTorch](../examples/pytorch/graph_matching) - Tags: graph edit distance, graph matching ## 2013 - Bordes et al. Translating Embeddings for Modeling Multi-relational Data. [Paper link](https://proceedings.neurips.cc/paper/2013/file/1cecc7a77928ca8133fa24680a88d2f9-Paper.pdf). - Example code: [PyTorch](https://github.com/awslabs/dgl-ke/tree/master/examples), [PyTorch for custom data](https://aws-dglke.readthedocs.io/en/latest/commands.html) - Tags: knowledge graph ## 2011 - Fankhauser et al. Speeding Up Graph Edit Distance Computation through Fast Bipartite Matching. [Paper link](https://link.springer.com/chapter/10.1007/978-3-642-20844-7_11). - Example code: [PyTorch](../examples/pytorch/graph_matching) - Tags: graph edit distance, graph matching - Nickel et al. A Three-Way Model for Collective Learning on Multi-Relational Data. [Paper link](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.383.2015&rep=rep1&type=pdf). - Example code: [PyTorch](https://github.com/awslabs/dgl-ke/tree/master/examples), [PyTorch for custom data](https://aws-dglke.readthedocs.io/en/latest/commands.html) - Tags: knowledge graph ## 2010 - Hoffman et al. Online Learning for Latent Dirichlet Allocation. [Paper link](https://papers.nips.cc/paper/2010/file/71f6278d140af599e06ad9bf1ba03cb0-Paper.pdf). - Example code: [PyTorch](../examples/pytorch/lda) - Tags: sklearn, decomposition, latent Dirichlet allocation ## 2009 - Riesen et al. Speeding Up Graph Edit Distance Computation with a Bipartite Heuristic. [Paper link](https://core.ac.uk/download/pdf/33054885.pdf). - Example code: [PyTorch](../examples/pytorch/graph_matching) - Tags: graph edit distance, graph matching ## 2006 - Neuhaus et al. Fast Suboptimal Algorithms for the Computation of Graph Edit Distance. [Paper link](https://link.springer.com/chapter/10.1007/11815921_17). - Example code: [PyTorch](../examples/pytorch/graph_matching) - Tags: graph edit distance, graph matching ## 2002 - Zhu & Ghahramani. Learning from Labeled and Unlabeled Data with Label Propagation. [Paper link](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.14.3864&rep=rep1&type=pdf). - Example code: [PyTorch](../examples/pytorch/label_propagation) - Tags: node classification, label propagation ## 1998 - Page et al. The PageRank Citation Ranking: Bringing Order to the Web. [Paper link](http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.38.5427). - Example code: [PyTorch](../examples/pytorch/pagerank.py) - Tags: PageRank ================================================ FILE: examples/advanced/cugraph/graphsage.py ================================================ import argparse import torch import torch.nn as nn import torch.nn.functional as F import torchmetrics.functional as MF import tqdm from dgl.data import AsNodePredDataset from dgl.dataloading import ( DataLoader, MultiLayerFullNeighborSampler, NeighborSampler, ) from dgl.nn import CuGraphSAGEConv from ogb.nodeproppred import DglNodePropPredDataset class SAGE(nn.Module): def __init__(self, in_size, hid_size, out_size): super().__init__() self.layers = nn.ModuleList() # three-layer GraphSAGE-mean self.layers.append(CuGraphSAGEConv(in_size, hid_size, "mean")) self.layers.append(CuGraphSAGEConv(hid_size, hid_size, "mean")) self.layers.append(CuGraphSAGEConv(hid_size, out_size, "mean")) self.dropout = nn.Dropout(0.5) self.hid_size = hid_size self.out_size = out_size def forward(self, blocks, x): h = x for l, (layer, block) in enumerate(zip(self.layers, blocks)): h = layer(block, h) if l != len(self.layers) - 1: h = F.relu(h) h = self.dropout(h) return h def inference(self, g, device, batch_size): """Conduct layer-wise inference to get all the node embeddings.""" feat = g.ndata["feat"] sampler = MultiLayerFullNeighborSampler(1, prefetch_node_feats=["feat"]) dataloader = DataLoader( g, torch.arange(g.num_nodes()).to(g.device), sampler, device=device, batch_size=batch_size, shuffle=False, drop_last=False, num_workers=0, ) buffer_device = torch.device("cpu") pin_memory = buffer_device != device for l, layer in enumerate(self.layers): y = torch.empty( g.num_nodes(), self.hid_size if l != len(self.layers) - 1 else self.out_size, device=buffer_device, pin_memory=pin_memory, ) feat = feat.to(device) for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader): x = feat[input_nodes] h = layer(blocks[0], x) # len(blocks) = 1 if l != len(self.layers) - 1: h = F.relu(h) h = self.dropout(h) # by design, our output nodes are contiguous y[output_nodes[0] : output_nodes[-1] + 1] = h.to(buffer_device) feat = y return y def evaluate(model, graph, dataloader): model.eval() ys = [] y_hats = [] for it, (input_nodes, output_nodes, blocks) in enumerate(dataloader): with torch.no_grad(): x = blocks[0].srcdata["feat"] ys.append(blocks[-1].dstdata["label"]) y_hats.append(model(blocks, x)) num_classes = y_hats[0].shape[1] return MF.accuracy( torch.cat(y_hats), torch.cat(ys), task="multiclass", num_classes=num_classes, ) def layerwise_infer(device, graph, nid, model, batch_size): model.eval() with torch.no_grad(): pred = model.inference( graph, device, batch_size ) # pred in buffer_device pred = pred[nid] label = graph.ndata["label"][nid].to(pred.device) num_classes = pred.shape[1] return MF.accuracy( pred, label, task="multiclass", num_classes=num_classes ) def train(args, device, g, dataset, model): # create sampler & dataloader train_idx = dataset.train_idx.to(device) val_idx = dataset.val_idx.to(device) sampler = NeighborSampler( [10, 10, 10], # fanout for [layer-0, layer-1, layer-2] prefetch_node_feats=["feat"], prefetch_labels=["label"], ) use_uva = args.mode == "mixed" train_dataloader = DataLoader( g, train_idx, sampler, device=device, batch_size=1024, shuffle=True, drop_last=False, num_workers=0, use_uva=use_uva, ) val_dataloader = DataLoader( g, val_idx, sampler, device=device, batch_size=1024, shuffle=True, drop_last=False, num_workers=0, use_uva=use_uva, ) opt = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=5e-4) for epoch in range(10): model.train() total_loss = 0 for it, (input_nodes, output_nodes, blocks) in enumerate( train_dataloader ): x = blocks[0].srcdata["feat"] y = blocks[-1].dstdata["label"] y_hat = model(blocks, x) loss = F.cross_entropy(y_hat, y) opt.zero_grad() loss.backward() opt.step() total_loss += loss.item() acc = evaluate(model, g, val_dataloader) print( "Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} ".format( epoch, total_loss / (it + 1), acc.item() ) ) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--mode", default="mixed", choices=["mixed", "puregpu"], help="Training mode. 'mixed' for CPU-GPU mixed training, " "'puregpu' for pure-GPU training.", ) args = parser.parse_args() if not torch.cuda.is_available(): args.mode = "cpu" print(f"Training in {args.mode} mode.") # load and preprocess dataset print("Loading data") dataset = AsNodePredDataset(DglNodePropPredDataset("ogbn-products")) g = dataset[0] g = g.to("cuda" if args.mode == "puregpu" else "cpu") device = torch.device("cpu" if args.mode == "cpu" else "cuda") # create GraphSAGE model in_size = g.ndata["feat"].shape[1] out_size = dataset.num_classes model = SAGE(in_size, 256, out_size).to(device) # model training print("Training...") train(args, device, g, dataset, model) # test the model print("Testing...") acc = layerwise_infer(device, g, dataset.test_idx, model, batch_size=4096) print("Test Accuracy {:.4f}".format(acc.item())) ================================================ FILE: examples/advanced/cugraph/rgcn.py ================================================ """ [RGCN: Relational Graph Convolutional Networks] (https://arxiv.org/abs/1703.06103) This example showcases the usage of `CuGraphRelGraphConv` via the entity classification problem in the RGCN paper with mini-batch training. It offers a 1.5~2x speed-up over `RelGraphConv` on cuda devices and only requires minimal code changes from the current `entity_sample.py` example. """ import argparse import dgl import torch import torch.nn as nn import torch.nn.functional as F from dgl.data.rdf import AIFBDataset, AMDataset, BGSDataset, MUTAGDataset from dgl.dataloading import DataLoader, MultiLayerNeighborSampler from dgl.nn import CuGraphRelGraphConv from torchmetrics.functional import accuracy class RGCN(nn.Module): def __init__(self, num_nodes, h_dim, out_dim, num_rels, num_bases): super().__init__() self.emb = nn.Embedding(num_nodes, h_dim) # two-layer RGCN self.conv1 = CuGraphRelGraphConv( h_dim, h_dim, num_rels, regularizer="basis", num_bases=num_bases, self_loop=True, apply_norm=True, ) self.conv2 = CuGraphRelGraphConv( h_dim, out_dim, num_rels, regularizer="basis", num_bases=num_bases, self_loop=True, apply_norm=True, ) def forward(self, g, fanouts=[None, None]): x = self.emb(g[0].srcdata[dgl.NID]) h = F.relu(self.conv1(g[0], x, g[0].edata[dgl.ETYPE], fanouts[0])) h = self.conv2(g[1], h, g[1].edata[dgl.ETYPE], fanouts[1]) return h def evaluate(model, labels, dataloader, inv_target): model.eval() eval_logits = [] eval_seeds = [] with torch.no_grad(): for _, output_nodes, blocks in dataloader: output_nodes = inv_target[output_nodes.type(torch.int64)] logits = model(blocks) eval_logits.append(logits.cpu().detach()) eval_seeds.append(output_nodes.cpu().detach()) num_classes = eval_logits[0].shape[1] eval_logits = torch.cat(eval_logits) eval_seeds = torch.cat(eval_seeds) return accuracy( eval_logits.argmax(dim=1), labels[eval_seeds].cpu(), task="multiclass", num_classes=num_classes, ).item() def train(device, g, target_idx, labels, train_mask, model, fanouts): # Define train idx, loss function and optimizer. train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze() loss_fcn = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4) # Construct sampler and dataloader. sampler = MultiLayerNeighborSampler(fanouts) train_loader = DataLoader( g, target_idx[train_idx].type(g.idtype), sampler, device=device, batch_size=100, shuffle=True, ) # No separate validation subset, use train index instead for validation. val_loader = DataLoader( g, target_idx[train_idx].type(g.idtype), sampler, device=device, batch_size=100, shuffle=False, ) for epoch in range(50): model.train() total_loss = 0 for it, (_, output_nodes, blocks) in enumerate(train_loader): output_nodes = inv_target[output_nodes.type(torch.int64)] logits = model(blocks, fanouts=fanouts) loss = loss_fcn(logits, labels[output_nodes]) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.item() acc = evaluate(model, labels, val_loader, inv_target) print( f"Epoch {epoch:05d} | Loss {total_loss / (it+1):.4f} | " f"Val. Accuracy {acc:.4f}" ) if __name__ == "__main__": parser = argparse.ArgumentParser( description="RGCN for entity classification with sampling" ) parser.add_argument( "--dataset", type=str, default="aifb", choices=["aifb", "mutag", "bgs", "am"], ) args = parser.parse_args() device = torch.device("cuda") print(f"Training with DGL CuGraphRelGraphConv module with sampling.") # Load and preprocess dataset. if args.dataset == "aifb": data = AIFBDataset() elif args.dataset == "mutag": data = MUTAGDataset() elif args.dataset == "bgs": data = BGSDataset() elif args.dataset == "am": data = AMDataset() else: raise ValueError(f"Unknown dataset: {args.dataset}") hg = data[0].to(device) num_rels = len(hg.canonical_etypes) category = data.predict_category labels = hg.nodes[category].data.pop("labels") train_mask = hg.nodes[category].data.pop("train_mask") test_mask = hg.nodes[category].data.pop("test_mask") # Find target category and node id. category_id = hg.ntypes.index(category) g = dgl.to_homogeneous(hg) node_ids = torch.arange(g.num_nodes()).to(device) target_idx = node_ids[g.ndata[dgl.NTYPE] == category_id] g.ndata["ntype"] = g.ndata.pop(dgl.NTYPE) g.ndata["type_id"] = g.ndata.pop(dgl.NID) # Find the mapping from global node IDs to type-specific node IDs. inv_target = torch.empty((g.num_nodes(),), dtype=torch.int64).to(device) inv_target[target_idx] = torch.arange( 0, target_idx.shape[0], dtype=inv_target.dtype ).to(device) # Create RGCN model. in_size = g.num_nodes() # featureless with one-hot encoding out_size = data.num_classes num_bases = 20 fanouts = [4, 4] model = RGCN(in_size, 16, out_size, num_rels, num_bases).to(device) train( device, g, target_idx, labels, train_mask, model, fanouts, ) test_idx = torch.nonzero(test_mask, as_tuple=False).squeeze() test_sampler = MultiLayerNeighborSampler([-1, -1]) test_loader = DataLoader( g, target_idx[test_idx].type(g.idtype), test_sampler, device=device, batch_size=32, shuffle=False, ) acc = evaluate(model, labels, test_loader, inv_target) print(f"Test accuracy {acc:.4f}") ================================================ FILE: examples/core/Graphormer/README.md ================================================ Graphormer ============================== ## Introduction * Graphormer is a Transformer model designed for graph-structured data, which encodes the structural information of a graph into the standard Transformer. Specifically, Graphormer utilizes Degree Encoding to measure the importance of nodes, Spatial Encoding and Path Encoding to measure the relation between node pairs. The former plus the node features serve as input to Graphormer, while the latter acts as bias terms in the self-attention module. * paper link: [https://arxiv.org/abs/2106.05234](https://arxiv.org/abs/2106.05234) ## Requirements - accelerate - transformers - ogb ## Dataset Task: Graph Property Prediction | Dataset | #Graphs | #Node Feats | #Edge Feats | Metric | | :---------: | :-----: | :---------: | :---------: | :-----: | | ogbg-molhiv | 41,127 | 9 | 3 | ROC-AUC | How to run ---------- ```bash accelerate launch --multi_gpu --mixed_precision=fp16 main.py ``` > **_NOTE:_** The script will automatically download weights pre-trained on PCQM4Mv2. To reproduce the same result, set the total batch size to 64. ## Summary * ogbg-molhiv (pretrained on PCQM4Mv2): ~0.791 ================================================ FILE: examples/core/Graphormer/dataset.py ================================================ """ This file contains the MolHIVDataset class, which handles data preprocessing (computing required graph features, converting graphs to tensors) of the ogbg-molhiv dataset. """ import torch as th import torch.nn.functional as F from dgl import shortest_dist from ogb.graphproppred import DglGraphPropPredDataset from torch.nn.utils.rnn import pad_sequence class MolHIVDataset(th.utils.data.Dataset): def __init__(self): dataset = DglGraphPropPredDataset(name="ogbg-molhiv") split_idx = dataset.get_idx_split() # Compute the shortest path distances and their corresponding paths # of all graphs during preprocessing. for g, label in dataset: spd, path = shortest_dist(g, root=None, return_paths=True) g.ndata["spd"] = spd g.ndata["path"] = path self.train, self.val, self.test = ( dataset[split_idx["train"]], dataset[split_idx["valid"]], dataset[split_idx["test"]], ) def collate(self, samples): # To match Graphormer's input style, all graph features should be # padded to the same size. Keep in mind that different graphs may # have varying feature sizes since they have different number of # nodes, so they will be aligned with the graph having the maximum # number of nodes. graphs, labels = map(list, zip(*samples)) labels = th.stack(labels) num_graphs = len(graphs) num_nodes = [g.num_nodes() for g in graphs] max_num_nodes = max(num_nodes) # Graphormer adds a virual node to the graph, which is connected to # all other nodes and supposed to represent the graph embedding. So # here +1 is for the virtual node. attn_mask = th.zeros(num_graphs, max_num_nodes + 1, max_num_nodes + 1) node_feat = [] in_degree, out_degree = [], [] path_data = [] # Since shortest_dist returns -1 for unreachable node pairs and padded # nodes are unreachable to others, distance relevant to padded nodes # use -1 padding as well. dist = -th.ones( (num_graphs, max_num_nodes, max_num_nodes), dtype=th.long ) for i in range(num_graphs): # A binary mask where invalid positions are indicated by True. attn_mask[i, :, num_nodes[i] + 1 :] = 1 # +1 to distinguish padded non-existing nodes from real nodes node_feat.append(graphs[i].ndata["feat"] + 1) in_degree.append( th.clamp(graphs[i].in_degrees() + 1, min=0, max=512) ) out_degree.append( th.clamp(graphs[i].out_degrees() + 1, min=0, max=512) ) # Path padding to make all paths to the same length "max_len". path = graphs[i].ndata["path"] path_len = path.size(dim=2) # shape of shortest_path: [n, n, max_len] max_len = 5 if path_len >= max_len: shortest_path = path[:, :, :max_len] else: p1d = (0, max_len - path_len) # Use the same -1 padding as shortest_dist for # invalid edge IDs. shortest_path = F.pad(path, p1d, "constant", -1) pad_num_nodes = max_num_nodes - num_nodes[i] p3d = (0, 0, 0, pad_num_nodes, 0, pad_num_nodes) shortest_path = F.pad(shortest_path, p3d, "constant", -1) # +1 to distinguish padded non-existing edges from real edges edata = graphs[i].edata["feat"] + 1 # shortest_dist pads non-existing edges (at the end of shortest # paths) with edge IDs -1, and th.zeros(1, edata.shape[1]) stands # for all padded edge features. edata = th.cat( (edata, th.zeros(1, edata.shape[1]).to(edata.device)), dim=0 ) path_data.append(edata[shortest_path]) dist[i, : num_nodes[i], : num_nodes[i]] = graphs[i].ndata["spd"] # node feat padding node_feat = pad_sequence(node_feat, batch_first=True) # degree padding in_degree = pad_sequence(in_degree, batch_first=True) out_degree = pad_sequence(out_degree, batch_first=True) return ( labels.reshape(num_graphs, -1), attn_mask, node_feat, in_degree, out_degree, th.stack(path_data), dist, ) ================================================ FILE: examples/core/Graphormer/main.py ================================================ """ This script finetunes and tests a Graphormer model (pretrained on PCQM4Mv2) for graph classification on ogbg-molhiv dataset. Paper: [Do Transformers Really Perform Bad for Graph Representation?] (https://arxiv.org/abs/2106.05234) This flowchart describes the main functional sequence of the provided example. main │ └───> train_val_pipeline │ ├───> Load and preprocess dataset │ ├───> Download pretrained model │ ├───> train_epoch │ │ │ └───> Graphormer.forward │ └───> evaluate_network │ └───> Graphormer.inference """ import argparse import random import torch as th import torch.nn as nn from accelerate import Accelerator from dataset import MolHIVDataset from dgl.data import download from dgl.dataloading import GraphDataLoader from model import Graphormer from ogb.graphproppred import Evaluator from transformers.optimization import ( AdamW, get_polynomial_decay_schedule_with_warmup, ) # Instantiate an accelerator object to support distributed # training and inference. accelerator = Accelerator() def train_epoch(model, optimizer, data_loader, lr_scheduler): model.train() epoch_loss = 0 list_scores = [] list_labels = [] loss_fn = nn.BCEWithLogitsLoss() for ( batch_labels, attn_mask, node_feat, in_degree, out_degree, path_data, dist, ) in data_loader: optimizer.zero_grad() device = accelerator.device batch_scores = model( node_feat.to(device), in_degree.to(device), out_degree.to(device), path_data.to(device), dist.to(device), attn_mask=attn_mask, ) loss = loss_fn(batch_scores, batch_labels.float()) accelerator.backward(loss) optimizer.step() lr_scheduler.step() epoch_loss += loss.item() list_scores.append(batch_scores) list_labels.append(batch_labels) # Release GPU memory. del ( batch_labels, batch_scores, loss, attn_mask, node_feat, in_degree, out_degree, path_data, dist, ) th.cuda.empty_cache() epoch_loss /= len(data_loader) evaluator = Evaluator(name="ogbg-molhiv") epoch_auc = evaluator.eval( {"y_pred": th.cat(list_scores), "y_true": th.cat(list_labels)} )["rocauc"] return epoch_loss, epoch_auc def evaluate_network(model, data_loader): model.eval() epoch_loss = 0 loss_fn = nn.BCEWithLogitsLoss() with th.no_grad(): list_scores = [] list_labels = [] for ( batch_labels, attn_mask, node_feat, in_degree, out_degree, path_data, dist, ) in data_loader: device = accelerator.device batch_scores = model( node_feat.to(device), in_degree.to(device), out_degree.to(device), path_data.to(device), dist.to(device), attn_mask=attn_mask, ) # Gather all predictions and targets. all_predictions, all_targets = accelerator.gather_for_metrics( (batch_scores, batch_labels) ) loss = loss_fn(all_predictions, all_targets.float()) epoch_loss += loss.item() list_scores.append(all_predictions) list_labels.append(all_targets) epoch_loss /= len(data_loader) evaluator = Evaluator(name="ogbg-molhiv") epoch_auc = evaluator.eval( {"y_pred": th.cat(list_scores), "y_true": th.cat(list_labels)} )["rocauc"] return epoch_loss, epoch_auc def train_val_pipeline(params): dataset = MolHIVDataset() accelerator.print( f"train, test, val sizes: {len(dataset.train)}, " f"{len(dataset.test)}, {len(dataset.val)}." ) accelerator.print("Finished loading.") train_loader = GraphDataLoader( dataset.train, batch_size=params.batch_size, shuffle=True, collate_fn=dataset.collate, pin_memory=True, num_workers=16, ) val_loader = GraphDataLoader( dataset.val, batch_size=params.batch_size, shuffle=False, collate_fn=dataset.collate, pin_memory=True, num_workers=16, ) test_loader = GraphDataLoader( dataset.test, batch_size=params.batch_size, shuffle=False, collate_fn=dataset.collate, pin_memory=True, num_workers=16, ) # Load pre-trained model. download(url="https://data.dgl.ai/pre_trained/graphormer_pcqm.pth") model = Graphormer() state_dict = th.load("graphormer_pcqm.pth") model.load_state_dict(state_dict) model.reset_output_layer_parameters() num_epochs = 16 total_updates = 33000 * num_epochs / params.batch_size # Use warmup schedule to avoid overfitting at the very beginning # of training, the ratio 0.16 is the same as the paper. warmup_updates = total_updates * 0.16 optimizer = AdamW(model.parameters(), lr=1e-4, eps=1e-8, weight_decay=0) lr_scheduler = get_polynomial_decay_schedule_with_warmup( optimizer, num_warmup_steps=warmup_updates, num_training_steps=total_updates, lr_end=1e-9, power=1.0, ) epoch_train_AUCs, epoch_val_AUCs, epoch_test_AUCs = [], [], [] # Pass all objects relevant to training to the prepare() method as required # by Accelerate. ( model, optimizer, train_loader, val_loader, test_loader, lr_scheduler, ) = accelerator.prepare( model, optimizer, train_loader, val_loader, test_loader, lr_scheduler ) for epoch in range(num_epochs): epoch_train_loss, epoch_train_auc = train_epoch( model, optimizer, train_loader, lr_scheduler ) epoch_val_loss, epoch_val_auc = evaluate_network(model, val_loader) epoch_test_loss, epoch_test_auc = evaluate_network(model, test_loader) epoch_train_AUCs.append(epoch_train_auc) epoch_val_AUCs.append(epoch_val_auc) epoch_test_AUCs.append(epoch_test_auc) accelerator.print( f"Epoch={epoch + 1} | train_AUC={epoch_train_auc:.3f} | " f"val_AUC={epoch_val_auc:.3f} | test_AUC={epoch_test_auc:.3f}" ) # Return test and train AUCs with best val AUC. index = epoch_val_AUCs.index(max(epoch_val_AUCs)) val_auc = epoch_val_AUCs[index] train_auc = epoch_train_AUCs[index] test_auc = epoch_test_AUCs[index] accelerator.print("Test ROCAUC: {:.4f}".format(test_auc)) accelerator.print("Val ROCAUC: {:.4f}".format(val_auc)) accelerator.print("Train ROCAUC: {:.4f}".format(train_auc)) accelerator.print("Best epoch index: {:.4f}".format(index)) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--seed", default=1, type=int, help="Please give a value for random seed", ) parser.add_argument( "--batch_size", default=16, type=int, help="Please give a value for batch_size", ) args = parser.parse_args() # Set manual seed to bind the order of training data to the random seed. random.seed(args.seed) th.manual_seed(args.seed) if th.cuda.is_available(): th.cuda.manual_seed(args.seed) train_val_pipeline(args) ================================================ FILE: examples/core/Graphormer/model.py ================================================ """ This file defines the Graphormer model, which utilizes DegreeEncoder, SpatialEncoder, PathEncoder and GraphormerLayer from DGL build-in modules. """ import torch as th import torch.nn as nn from dgl.nn import DegreeEncoder, GraphormerLayer, PathEncoder, SpatialEncoder class Graphormer(nn.Module): def __init__( self, num_classes=1, edge_dim=3, num_atoms=4608, max_degree=512, num_spatial=511, multi_hop_max_dist=5, num_encoder_layers=12, embedding_dim=768, ffn_embedding_dim=768, num_attention_heads=32, dropout=0.1, pre_layernorm=True, activation_fn=nn.GELU(), ): super().__init__() self.dropout = nn.Dropout(p=dropout) self.embedding_dim = embedding_dim self.num_heads = num_attention_heads self.atom_encoder = nn.Embedding( num_atoms + 1, embedding_dim, padding_idx=0 ) self.graph_token = nn.Embedding(1, embedding_dim) self.degree_encoder = DegreeEncoder( max_degree=max_degree, embedding_dim=embedding_dim ) self.path_encoder = PathEncoder( max_len=multi_hop_max_dist, feat_dim=edge_dim, num_heads=num_attention_heads, ) self.spatial_encoder = SpatialEncoder( max_dist=num_spatial, num_heads=num_attention_heads ) self.graph_token_virtual_distance = nn.Embedding(1, num_attention_heads) self.emb_layer_norm = nn.LayerNorm(self.embedding_dim) self.layers = nn.ModuleList([]) self.layers.extend( [ GraphormerLayer( feat_size=self.embedding_dim, hidden_size=ffn_embedding_dim, num_heads=num_attention_heads, dropout=dropout, activation=activation_fn, norm_first=pre_layernorm, ) for _ in range(num_encoder_layers) ] ) # map graph_rep to num_classes self.lm_head_transform_weight = nn.Linear( self.embedding_dim, self.embedding_dim ) self.layer_norm = nn.LayerNorm(self.embedding_dim) self.activation_fn = activation_fn self.embed_out = nn.Linear(self.embedding_dim, num_classes, bias=False) self.lm_output_learned_bias = nn.Parameter(th.zeros(num_classes)) def reset_output_layer_parameters(self): self.lm_output_learned_bias = nn.Parameter(th.zeros(1)) self.embed_out.reset_parameters() def forward( self, node_feat, in_degree, out_degree, path_data, dist, attn_mask=None, ): num_graphs, max_num_nodes, _ = node_feat.shape deg_emb = self.degree_encoder(th.stack((in_degree, out_degree))) # node feature + degree encoding as input node_feat = self.atom_encoder(node_feat.int()).sum(dim=-2) node_feat = node_feat + deg_emb graph_token_feat = self.graph_token.weight.unsqueeze(0).repeat( num_graphs, 1, 1 ) x = th.cat([graph_token_feat, node_feat], dim=1) # spatial encoding and path encoding serve as attention bias attn_bias = th.zeros( num_graphs, max_num_nodes + 1, max_num_nodes + 1, self.num_heads, device=dist.device, ) path_encoding = self.path_encoder(dist, path_data) spatial_encoding = self.spatial_encoder(dist) attn_bias[:, 1:, 1:, :] = path_encoding + spatial_encoding # spatial encoding of the virtual node t = self.graph_token_virtual_distance.weight.reshape( 1, 1, self.num_heads ) # Since the virtual node comes first, the spatial encodings between it # and other nodes will fill the 1st row and 1st column (omit num_graphs # and num_heads dimensions) of attn_bias matrix by broadcasting. attn_bias[:, 1:, 0, :] = attn_bias[:, 1:, 0, :] + t attn_bias[:, 0, :, :] = attn_bias[:, 0, :, :] + t x = self.emb_layer_norm(x) for layer in self.layers: x = layer( x, attn_mask=attn_mask, attn_bias=attn_bias, ) graph_rep = x[:, 0, :] graph_rep = self.layer_norm( self.activation_fn(self.lm_head_transform_weight(graph_rep)) ) graph_rep = self.embed_out(graph_rep) + self.lm_output_learned_bias return graph_rep ================================================ FILE: examples/core/gat/README.md ================================================ Graph Attention Networks (GAT) ============ - Paper link: [https://arxiv.org/abs/1710.10903](https://arxiv.org/abs/1710.10903) - Author's code repo (tensorflow implementation): [https://github.com/PetarV-/GAT](https://github.com/PetarV-/GAT). - Popular pytorch implementation: [https://github.com/Diego999/pyGAT](https://github.com/Diego999/pyGAT). How to run ------- Run with the following for multiclass node classification (available datasets: "cora", "citeseer", "pubmed") ```bash python3 train.py --dataset cora ``` > **_NOTE:_** Users may occasionally run into low accuracy issue (e.g., test accuracy < 0.8) due to overfitting. This can be resolved by adding Early Stopping or reducing maximum number of training epochs. Summary ------- * cora: ~0.821 * citeseer: ~0.710 * pubmed: ~0.780 ================================================ FILE: examples/core/gat/train.py ================================================ import argparse import time import dgl.nn as dglnn import torch import torch.nn as nn import torch.nn.functional as F from dgl import AddSelfLoop from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset class GAT(nn.Module): def __init__(self, in_size, hid_size, out_size, heads): super().__init__() self.gat_layers = nn.ModuleList() # two-layer GAT self.gat_layers.append( dglnn.GATConv( in_size, hid_size, heads[0], feat_drop=0.6, attn_drop=0.6, activation=F.elu, ) ) self.gat_layers.append( dglnn.GATConv( hid_size * heads[0], out_size, heads[1], feat_drop=0.6, attn_drop=0.6, activation=None, ) ) def forward(self, g, inputs): h = inputs for i, layer in enumerate(self.gat_layers): h = layer(g, h) if i == len(self.gat_layers) - 1: # last layer h = h.mean(1) else: # other layer(s) h = h.flatten(1) return h def evaluate(g, features, labels, mask, model): model.eval() with torch.no_grad(): logits = model(g, features) logits = logits[mask] labels = labels[mask] _, indices = torch.max(logits, dim=1) correct = torch.sum(indices == labels) return correct.item() * 1.0 / len(labels) def train(g, features, labels, masks, model, num_epochs): # Define train/val samples, loss function and optimizer train_mask = masks[0] val_mask = masks[1] loss_fcn = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=5e-3, weight_decay=5e-4) for epoch in range(num_epochs): t0 = time.time() model.train() logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() acc = evaluate(g, features, labels, val_mask, model) t1 = time.time() print( "Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} | Time {:.4f}".format( epoch, loss.item(), acc, t1 - t0 ) ) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--dataset", type=str, default="cora", help="Dataset name ('cora', 'citeseer', 'pubmed').", ) parser.add_argument( "--num_epochs", type=int, default=200, help="Number of epochs for train.", ) parser.add_argument( "--num_gpus", type=int, default=0, help="Number of GPUs used for train and evaluation.", ) args = parser.parse_args() print(f"Training with DGL built-in GATConv module.") # Load and preprocess dataset transform = ( AddSelfLoop() ) # by default, it will first remove self-loops to prevent duplication if args.dataset == "cora": data = CoraGraphDataset(transform=transform) elif args.dataset == "citeseer": data = CiteseerGraphDataset(transform=transform) elif args.dataset == "pubmed": data = PubmedGraphDataset(transform=transform) else: raise ValueError("Unknown dataset: {}".format(args.dataset)) g = data[0] if args.num_gpus > 0 and torch.cuda.is_available(): device = torch.device("cuda") else: device = torch.device("cpu") g = g.int().to(device) features = g.ndata["feat"] labels = g.ndata["label"] masks = g.ndata["train_mask"], g.ndata["val_mask"], g.ndata["test_mask"] # Create GAT model in_size = features.shape[1] out_size = data.num_classes model = GAT(in_size, 8, out_size, heads=[8, 1]).to(device) print("Training...") train(g, features, labels, masks, model, args.num_epochs) print("Testing...") acc = evaluate(g, features, labels, masks[2], model) print("Test accuracy {:.4f}".format(acc)) ================================================ FILE: examples/core/gated_gcn/README.md ================================================ Gated Graph ConvNet (GatedGCN) ============================== * paper link: [https://arxiv.org/abs/2003.00982.pdf](https://arxiv.org/abs/2003.00982.pdf) ## Dataset Task: Graph Property Prediction | Dataset | #Graphs | #Node Feats | #Edge Feats | Metric | | :---------: | :-----: | :---------: | :---------: | :-----: | | ogbg-molhiv | 41,127 | 9 | 3 | ROC-AUC | How to run ---------- ```bash python3 train.py --dataset ogbg-molhiv --num_gpus 0 --num_epochs 50 ``` ## Summary * ogbg-molhiv: ~0.781 ================================================ FILE: examples/core/gated_gcn/train.py ================================================ """ Gated Graph Convolutional Network module for graph classification tasks """ import argparse import time import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from dgl.dataloading import GraphDataLoader from dgl.nn.pytorch import GatedGCNConv from dgl.nn.pytorch.glob import AvgPooling from ogb.graphproppred import DglGraphPropPredDataset, Evaluator from ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder class GatedGCN(nn.Module): def __init__( self, hid_dim, out_dim, num_layers, dropout=0.2, batch_norm=True, residual=True, activation=F.relu, ): super(GatedGCN, self).__init__() self.num_layers = num_layers self.dropout = dropout self.node_encoder = AtomEncoder(hid_dim) self.edge_encoder = BondEncoder(hid_dim) self.layers = nn.ModuleList() for _ in range(self.num_layers): layer = GatedGCNConv( input_feats=hid_dim, edge_feats=hid_dim, output_feats=hid_dim, dropout=dropout, batch_norm=batch_norm, residual=residual, activation=activation, ) self.layers.append(layer) self.pooling = AvgPooling() self.output = nn.Linear(hid_dim, out_dim) def forward(self, g, node_feat, edge_feat): # Encode node and edge feature. hv = self.node_encoder(node_feat) he = self.edge_encoder(edge_feat) # GatedGCNConv layers. for layer in self.layers: hv, he = layer(g, hv, he) # Output project. h_g = self.pooling(g, hv) return self.output(h_g) def train(model, device, data_loader, opt, loss_fn): model.train() train_loss = [] for g, labels in data_loader: g = g.to(device) labels = labels.to(torch.float32).to(device) logits = model(g, g.ndata["feat"], g.edata["feat"]) loss = loss_fn(logits, labels) opt.zero_grad() loss.backward() opt.step() train_loss.append(loss.item()) return sum(train_loss) / len(train_loss) @torch.no_grad() def evaluate(model, device, data_loader, evaluator): model.eval() y_true, y_pred = [], [] for g, labels in data_loader: g = g.to(device) logits = model(g, g.ndata["feat"], g.edata["feat"]) y_true.append(labels.detach().cpu()) y_pred.append(logits.detach().cpu()) y_true = torch.cat(y_true, dim=0).numpy() y_pred = torch.cat(y_pred, dim=0).numpy() return evaluator.eval({"y_true": y_true, "y_pred": y_pred})["rocauc"] if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--dataset", type=str, default="ogbg-molhiv", help="Dataset name ('ogbg-molhiv', 'ogbg-molbace', 'ogbg-molmuv').", ) parser.add_argument( "--num_epochs", type=int, default=200, help="Number of epochs for train.", ) parser.add_argument( "--num_gpus", type=int, default=0, help="Number of GPUs used for train and evaluation.", ) args = parser.parse_args() print("Training with DGL built-in GATConv module.") # Load ogb dataset & evaluator. dataset = DglGraphPropPredDataset(name=args.dataset) evaluator = Evaluator(name=args.dataset) if args.num_gpus > 0 and torch.cuda.is_available(): device = torch.device("cuda") else: device = torch.device("cpu") n_classes = dataset.num_tasks split_idx = dataset.get_idx_split() train_loader = GraphDataLoader( dataset[split_idx["train"]], batch_size=32, shuffle=True, ) valid_loader = GraphDataLoader(dataset[split_idx["valid"]], batch_size=32) test_loader = GraphDataLoader(dataset[split_idx["test"]], batch_size=32) # Load model. model = GatedGCN(hid_dim=256, out_dim=n_classes, num_layers=8).to(device) print(model) opt = optim.Adam(model.parameters(), lr=0.01) loss_fn = nn.BCEWithLogitsLoss() print("---------- Training ----------") for epoch in range(args.num_epochs): # Kick off training. t0 = time.time() loss = train(model, device, train_loader, opt, loss_fn) t1 = time.time() # Evaluate the prediction. val_acc = evaluate(model, device, valid_loader, evaluator) print( f"Epoch {epoch:05d} | Loss {loss:.4f} | Accuracy {val_acc:.4f} | " f"Time {t1 - t0:.4f}" ) acc = evaluate(model, device, test_loader, evaluator) print(f"Test accuracy {acc:.4f}") ================================================ FILE: examples/core/graphsage/node_classification.py ================================================ """ This script trains and tests a GraphSAGE model based on the information of a full graph. This flowchart describes the main functional sequence of the provided example. main │ ├───> Load and preprocess full dataset │ ├───> Instantiate SAGE model │ ├───> train │ │ │ └───> Training loop │ │ │ └───> SAGE.forward └───> test │ └───> Evaluate the model """ import argparse import time import dgl.nn as dglnn import torch import torch.nn as nn import torch.nn.functional as F from dgl import AddSelfLoop from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset class SAGE(nn.Module): def __init__(self, in_size, hidden_size, out_size): super().__init__() self.layers = nn.ModuleList() # Two-layer GraphSAGE-gcn. self.layers.append(dglnn.SAGEConv(in_size, hidden_size, "gcn")) self.layers.append(dglnn.SAGEConv(hidden_size, out_size, "gcn")) self.dropout = nn.Dropout(0.5) def forward(self, graph, x): hidden_x = x for layer_idx, layer in enumerate(self.layers): hidden_x = layer(graph, hidden_x) is_last_layer = layer_idx == len(self.layers) - 1 if not is_last_layer: hidden_x = F.relu(hidden_x) hidden_x = self.dropout(hidden_x) return hidden_x def evaluate(g, features, labels, mask, model): model.eval() with torch.no_grad(): logits = model(g, features) logits = logits[mask] labels = labels[mask] _, indices = torch.max(logits, dim=1) correct = torch.sum(indices == labels) return correct.item() * 1.0 / len(labels) def train(g, features, labels, masks, model): # Define train/val samples, loss function and optimizer. train_mask, val_mask = masks loss_fcn = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4) # Training loop. for epoch in range(200): t0 = time.time() model.train() logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() t1 = time.time() acc = evaluate(g, features, labels, val_mask, model) print( f"Epoch {epoch:05d} | Loss {loss.item():.4f} | Accuracy {acc:.4f} | " f"Time {t1 - t0:.4f}" ) if __name__ == "__main__": parser = argparse.ArgumentParser(description="GraphSAGE") parser.add_argument( "--dataset", type=str, default="cora", help="Dataset name ('cora', 'citeseer', 'pubmed')", ) args = parser.parse_args() print(f"Training with DGL built-in GraphSage module") ##################################################################### # (HIGHLIGHT) Node classification task is a supervise learning task # in which the model try to predict the label of a certain node. # In this example, graph sage algorithm is applied to this task. # A good accuracy can be achieved after a few steps of training. # # First, the whole graph is loaded and transformed. Then the training # process is performed on a model which is composed of 2 GraphSAGE-gcn # layer. Finally, the performance of the model is evaluated on test set. ##################################################################### # Load and preprocess dataset. transform = ( AddSelfLoop() ) # By default, it will first remove self-loops to prevent duplication. if args.dataset == "cora": data = CoraGraphDataset(transform=transform) elif args.dataset == "citeseer": data = CiteseerGraphDataset(transform=transform) elif args.dataset == "pubmed": data = PubmedGraphDataset(transform=transform) else: raise ValueError(f"Unknown dataset: {args.dataset}") g = data[0] device = torch.device("cuda" if torch.cuda.is_available() else "cpu") g = g.int().to(device) features = g.ndata["feat"] labels = g.ndata["label"] masks = (g.ndata["train_mask"], g.ndata["val_mask"]) # Create GraphSAGE model. in_size = features.shape[1] out_size = data.num_classes model = SAGE(in_size, 16, out_size).to(device) # Model training. print("Training...") train(g, features, labels, masks, model) # Test the model. print("Testing...") acc = evaluate(g, features, labels, g.ndata["test_mask"], model) print(f"Test accuracy {acc:.4f}") ================================================ FILE: examples/core/rgcn/README.md ================================================ # Node classification on heterogeneous graph with RGCN This example aims to demonstrate how to run node classification task on heterogeneous graph with **DGL**. Models are not tuned to achieve the best accuracy yet. ## Run on `ogbn-mag` dataset In the preprocess stage, reverse edges are added and duplicate edges are removed. Feature data of `author` and `institution` node types are generated dynamically with embedding layer. ### Sample on CPU and train/infer on CPU ``` python3 hetero_rgcn.py --dataset ogbn-mag ``` ### Sample on CPU and train/infer on GPU ``` python3 hetero_rgcn.py --dataset ogbn-mag --num_gpus 1 ``` ### Resource usage and time cost Below results are roughly collected from an AWS EC2 **g4dn.metal**, 384GB RAM, 96 vCPUs(Cascade Lake P-8259L), 8 NVIDIA T4 GPUs(16GB RAM). CPU RAM usage is the peak value of `used` field of `free` command which is a bit rough. Please refer to `RSS`/`USS`/`PSS` which are more accurate. GPU RAM usage is the peak value recorded by `nvidia-smi` command. | Dataset Size | CPU RAM Usage | Num of GPUs | GPU RAM Usage | Time Per Epoch(Training) | | ------------ | ------------- | ----------- | ------------- | ------------------------ | | ~1.1GB | ~7GB | 0 | 0GB | ~233s | | ~1.1GB | ~5GB | 1 | 4.5GB | ~73.6s | ### Accuracies ``` Epoch: 01, Loss: 2.3386, Valid: 47.67%, Test: 46.96% Epoch: 02, Loss: 1.5563, Valid: 47.66%, Test: 47.02% Epoch: 03, Loss: 1.1557, Valid: 46.58%, Test: 45.42% Test accuracy 45.3850 ``` ## Run on `ogb-lsc-mag240m` dataset In the preprocess stage, reverse edges are added and duplicate edges are removed. What's more, feature data are generated in advance for `author` and `institution` node types via message passing. Since such preprocessing will usually take a long time, we also offer the above files for download: * [`paper-feat.npy`](https://dgl-data.s3-accelerate.amazonaws.com/dataset/OGB-LSC/paper-feat.npy) * [`author-feat.npy`](https://dgl-data.s3-accelerate.amazonaws.com/dataset/OGB-LSC/author-feat.npy) * [`inst-feat.npy`](https://dgl-data.s3-accelerate.amazonaws.com/dataset/OGB-LSC/inst-feat.npy) * [`hetero-graph.dgl`](https://dgl-data.s3-accelerate.amazonaws.com/dataset/OGB-LSC/hetero-graph.dgl) ### Sample on CPU and train/infer on CPU ``` python3 hetero_rgcn.py --dataset ogb-lsc-mag240m ``` ### Sample on CPU and train/infer on GPU ``` python3 hetero_rgcn.py --dataset ogb-lsc-mag240m --num_gpus 1 ``` ### Resource usage and time cost Below results are roughly collected from an AWS EC2 **g4dn.metal**, 384GB RAM, 96 vCPUs(Cascade Lake P-8259L), 8 NVIDIA T4 GPUs(16GB RAM). CPU RAM usage is the peak value of `used` field of `free` command which is a bit rough. Please refer to `RSS`/`USS`/`PSS` which are more accurate. GPU RAM usage is the peak value recorded by `nvidia-smi` command. | Dataset Size | CPU RAM Usage | Num of GPUs | GPU RAM Usage | Time Per Epoch(Training) | | ------------ | ------------- | ----------- | ------------- | ------------------------ | | ~404GB | ~72GB | 0 | 0GB | ~325s | | ~404GB | ~61GB | 1 | 14GB | ~178s | ### Accuracies ``` Epoch: 01, Loss: 2.0798, Valid: 52.04% Epoch: 02, Loss: 1.8652, Valid: 54.51% Epoch: 03, Loss: 1.8175, Valid: 53.71% ``` ================================================ FILE: examples/core/rgcn/hetero_rgcn.py ================================================ """ This script, `hetero_rgcn.py`, trains and tests a Relational Graph Convolutional Network (R-GCN) model for node classification on the Open Graph Benchmark (OGB) dataset "ogbn-mag". For more details on "ogbn-mag", please refer to the OGB website: (https://ogb.stanford.edu/docs/linkprop/) Paper [Modeling Relational Data with Graph Convolutional Networks] (https://arxiv.org/abs/1703.06103). Generation of graph embeddings is the main difference between homograph node classification and heterograph node classification: - Homograph: Since all nodes and edges are of the same type, embeddings can be generated using a unified approach. Type-specific handling is typically not required. - Heterograph: Due to the existence of multiple types of nodes and edges, specific embeddings need to be generated for each type. This allows for a more nuanced capture of the complex structure and semantic information within the heterograph. This flowchart describes the main functional sequence of the provided example. main │ ├───> prepare_data │ │ │ └───> Load and preprocess dataset │ ├───> rel_graph_embed [HIGHLIGHT] │ │ │ └───> Generate graph embeddings │ ├───> Instantiate RGCN model │ │ │ ├───> RelGraphConvLayer (input to hidden) │ │ │ └───> RelGraphConvLayer (hidden to output) │ └───> train │ │ └───> Training loop │ ├───> EntityClassify.forward (RGCN model forward pass) │ └───> test │ └───> EntityClassify.evaluate """ import argparse import itertools import sys import time import dgl import dgl.nn as dglnn import numpy as np import psutil import torch import torch.nn as nn import torch.nn.functional as F from dgl import AddReverse, Compose, ToSimple from dgl.nn import HeteroEmbedding from ogb.lsc import MAG240MDataset, MAG240MEvaluator from ogb.nodeproppred import DglNodePropPredDataset, Evaluator from tqdm import tqdm def prepare_data(args, device): feats = {} if args.dataset == "ogbn-mag": dataset = DglNodePropPredDataset(name="ogbn-mag", root=args.rootdir) # - graph: dgl graph object. # - label: torch tensor of shape (num_nodes, num_tasks). g, labels = dataset[0] # Flatten the labels for "paper" type nodes. This step reduces the # dimensionality of the labels. We need to flatten the labels because # the model requires a 1-dimensional label tensor. labels = labels["paper"].flatten().long() # Apply transformation to the graph. # - "ToSimple()" removes multi-edge between two nodes. # - "AddReverse()" adds reverse edges to the graph. print("Start to transform graph. This may take a while...") transform = Compose([ToSimple(), AddReverse()]) g = transform(g) else: dataset = MAG240MDataset(root=args.rootdir) (g,), _ = dgl.load_graphs(args.graph_path) g = g.formats(["csc"]) labels = torch.as_tensor(dataset.paper_label).long() # As feature data is too large to fit in memory, we read it from disk. feats["paper"] = torch.as_tensor( np.load(args.paper_feature_path, mmap_mode="r+") ) feats["author"] = torch.as_tensor( np.load(args.author_feature_path, mmap_mode="r+") ) feats["institution"] = torch.as_tensor( np.load(args.inst_feature_path, mmap_mode="r+") ) print(f"Loaded graph: {g}") # Get train/valid/test index. split_idx = dataset.get_idx_split() if args.dataset == "ogb-lsc-mag240m": split_idx = { split_type: {"paper": split_idx[split_type]} for split_type in split_idx } # Initialize a train sampler that samples neighbors for multi-layer graph # convolution. It samples 25 and 10 neighbors for the first and second # layers respectively. sampler = dgl.dataloading.MultiLayerNeighborSampler([25, 10], fused=False) num_workers = args.num_workers train_loader = dgl.dataloading.DataLoader( g, split_idx["train"], sampler, batch_size=1024, shuffle=True, num_workers=num_workers, device=device, ) return g, labels, dataset.num_classes, split_idx, train_loader, feats def extract_embed(node_embed, input_nodes): emb = node_embed( {ntype: input_nodes[ntype] for ntype in input_nodes if ntype != "paper"} ) return emb def rel_graph_embed(graph, embed_size): """Initialize a heterogenous embedding layer for all node types in the graph, except for the "paper" node type. The function constructs a dictionary 'node_num', where the keys are node types (ntype) and the values are the number of nodes for each type. This dictionary is used to create a HeteroEmbedding instance. (HIGHLIGHT) A HeteroEmbedding instance holds separate embedding layers for each node type, each with its own feature space of dimensionality (node_num[ntype], embed_size), where 'node_num[ntype]' is the number of nodes of type 'ntype' and 'embed_size' is the embedding dimension. The "paper" node type is specifically excluded, possibly because these nodes might already have predefined feature representations, and therefore, do not require an additional embedding layer. Parameters ---------- graph : DGLGraph The graph for which to create the heterogenous embedding layer. embed_size : int The size of the embedding vectors. Returns -------- HeteroEmbedding A heterogenous embedding layer for all node types in the graph, except for the "paper" node type. """ node_num = {} for ntype in graph.ntypes: # Skip the "paper" node type. if ntype == "paper": continue node_num[ntype] = graph.num_nodes(ntype) return HeteroEmbedding(node_num, embed_size) class RelGraphConvLayer(nn.Module): def __init__( self, in_size, out_size, ntypes, relation_names, activation=None, dropout=0.0, ): super(RelGraphConvLayer, self).__init__() self.in_size = in_size self.out_size = out_size self.ntypes = ntypes self.relation_names = relation_names self.activation = activation ######################################################################## # (HIGHLIGHT) HeteroGraphConv is a graph convolution operator over # heterogeneous graphs. A dictionary is passed where the key is the # relation name and the value is the instance of GraphConv. norm="right" # is to divide the aggregated messages by each node’s in-degrees, which # is equivalent to averaging the received messages. weight=False and # bias=False as we will use our own weight matrices defined later. ######################################################################## self.conv = dglnn.HeteroGraphConv( { rel: dglnn.GraphConv( in_size, out_size, norm="right", weight=False, bias=False ) for rel in relation_names } ) # Create a separate Linear layer for each relationship. Each # relationship has its own weights which will be applied to the node # features before performing convolution. self.weight = nn.ModuleDict( { rel_name: nn.Linear(in_size, out_size, bias=False) for rel_name in self.relation_names } ) # Create a separate Linear layer for each node type. # loop_weights are used to update the output embedding of each target node # based on its own features, thereby allowing the model to refine the node # representations. Note that this does not imply the existence of self-loop # edges in the graph. It is similar to residual connection. self.loop_weights = nn.ModuleDict( { ntype: nn.Linear(in_size, out_size, bias=True) for ntype in self.ntypes } ) self.loop_weights = nn.ModuleDict( { ntype: nn.Linear(in_size, out_size, bias=True) for ntype in self.ntypes } ) self.dropout = nn.Dropout(dropout) # Initialize parameters of the model. self.reset_parameters() def reset_parameters(self): for layer in self.weight.values(): layer.reset_parameters() for layer in self.loop_weights.values(): layer.reset_parameters() def forward(self, g, inputs): """ Parameters ---------- g : DGLGraph Input graph. inputs : dict[str, torch.Tensor] Node feature for each node type. Returns ------- dict[str, torch.Tensor] New node features for each node type. """ # Create a deep copy of the graph g with features saved in local # frames to prevent side effects from modifying the graph. g = g.local_var() # Create a dictionary of weights for each relationship. The weights # are retrieved from the Linear layers defined earlier. weight_dict = { rel_name: {"weight": self.weight[rel_name].weight.T} for rel_name in self.relation_names } # Create a dictionary of node features for the destination nodes in # the graph. We slice the node features according to the number of # destination nodes of each type. This is necessary because when # incorporating the effect of self-loop edges, we perform computations # only on the destination nodes' features. By doing so, we ensure the # feature dimensions match and prevent any misuse of incorrect node # features. inputs_dst = { k: v[: g.number_of_dst_nodes(k)] for k, v in inputs.items() } # Apply the convolution operation on the graph. mod_kwargs are # additional arguments for each relation function defined in the # HeteroGraphConv. In this case, it's the weights for each relation. hs = self.conv(g, inputs, mod_kwargs=weight_dict) def _apply(ntype, h): # Apply the `loop_weight` to the input node features, effectively # acting as a residual connection. This allows the model to refine # node embeddings based on its current features. h = h + self.loop_weights[ntype](inputs_dst[ntype]) if self.activation: h = self.activation(h) return self.dropout(h) # Apply the function defined above for each node type. This will update # the node features using the `loop_weights`, apply the activation # function and dropout. return {ntype: _apply(ntype, h) for ntype, h in hs.items()} class EntityClassify(nn.Module): def __init__(self, g, in_size, out_size): super(EntityClassify, self).__init__() self.in_size = in_size self.hidden_size = 64 self.out_size = out_size # Generate and sort a list of unique edge types from the input graph. # eg. ['writes', 'cites'] self.relation_names = list(set(g.etypes)) self.relation_names.sort() self.dropout = 0.5 self.layers = nn.ModuleList() # First layer: transform input features to hidden features. Use ReLU # as the activation function and apply dropout for regularization. self.layers.append( RelGraphConvLayer( self.in_size, self.hidden_size, g.ntypes, self.relation_names, activation=F.relu, dropout=self.dropout, ) ) # Second layer: transform hidden features to output features. No # activation function is applied at this stage. self.layers.append( RelGraphConvLayer( self.hidden_size, self.out_size, g.ntypes, self.relation_names, activation=None, ) ) def reset_parameters(self): # Reset the parameters of each layer. for layer in self.layers: layer.reset_parameters() def forward(self, h, blocks): for layer, block in zip(self.layers, blocks): h = layer(block, h) return h def extract_node_features(name, g, input_nodes, node_embed, feats, device): """Extract the node features from embedding layer or raw features.""" if name == "ogbn-mag": # Extract node embeddings for the input nodes. node_features = extract_embed(node_embed, input_nodes) # Add the batch's raw "paper" features. Corresponds to the content # in the function `rel_graph_embed` comment. node_features.update( {"paper": g.ndata["feat"]["paper"][input_nodes["paper"].cpu()]} ) node_features = {k: e.to(device) for k, e in node_features.items()} else: node_features = { ntype: feats[ntype][input_nodes[ntype].cpu()].to(device) for ntype in input_nodes } # Original feature data are stored in float16 while model weights are # float32, so we need to convert the features to float32. # [TODO] Enable mixed precision training on GPU. node_features = {k: v.float() for k, v in node_features.items()} return node_features def train( dataset, g, feats, model, node_embed, optimizer, train_loader, split_idx, labels, device, ): print("Start training...") category = "paper" # Typically, the best Validation performance is obtained after # the 1st or 2nd epoch. This is why the max epoch is set to 3. for epoch in range(3): num_train = split_idx["train"][category].shape[0] t0 = time.time() model.train() total_loss = 0 for input_nodes, seeds, blocks in tqdm( train_loader, desc=f"Epoch {epoch:02d}" ): # Move the input data onto the device. blocks = [blk.to(device) for blk in blocks] # We only predict the nodes with type "category". seeds = seeds[category] batch_size = seeds.shape[0] # Extract the node features from embedding layer or raw features. node_features = extract_node_features( dataset, g, input_nodes, node_embed, feats, device ) lbl = labels[seeds.cpu()].to(device) # Reset gradients. optimizer.zero_grad() # Generate predictions. logits = model(node_features, blocks)[category] y_hat = logits.log_softmax(dim=-1) loss = F.nll_loss(y_hat, lbl) loss.backward() optimizer.step() total_loss += loss.item() * batch_size t1 = time.time() loss = total_loss / num_train # Evaluate the model on the val/test set. valid_acc = evaluate( dataset, g, feats, model, node_embed, labels, device, split_idx["valid"], ) test_key = "test" if dataset == "ogbn-mag" else "test-dev" test_acc = evaluate( dataset, g, feats, model, node_embed, labels, device, split_idx[test_key], save_test_submission=(dataset == "ogb-lsc-mag240m"), ) print( f"Epoch: {epoch +1 :02d}, " f"Loss: {loss:.4f}, " f"Valid: {100 * valid_acc:.2f}%, " f"Test: {100 * test_acc:.2f}%, " f"Time {t1 - t0:.4f}" ) @torch.no_grad() def evaluate( dataset, g, feats, model, node_embed, labels, device, idx, save_test_submission=False, ): # Switches the model to evaluation mode. model.eval() category = "paper" if dataset == "ogbn-mag": evaluator = Evaluator(name="ogbn-mag") else: evaluator = MAG240MEvaluator() sampler = dgl.dataloading.MultiLayerNeighborSampler([25, 10], fused=False) dataloader = dgl.dataloading.DataLoader( g, idx, sampler, batch_size=4096, shuffle=False, num_workers=0, device=device, ) # To store the predictions. y_hats = list() y_true = list() for input_nodes, seeds, blocks in tqdm(dataloader, desc="Inference"): blocks = [blk.to(device) for blk in blocks] # We only predict the nodes with type "category". node_features = extract_node_features( dataset, g, input_nodes, node_embed, feats, device ) # Generate predictions. logits = model(node_features, blocks)[category] # Apply softmax to the logits and get the prediction by selecting the # argmax. y_hat = logits.log_softmax(dim=-1).argmax(dim=1, keepdims=True) y_hats.append(y_hat.cpu()) y_true.append(labels[seeds["paper"].cpu()]) y_pred = torch.cat(y_hats, dim=0) y_true = torch.cat(y_true, dim=0) y_true = torch.unsqueeze(y_true, 1) if dataset == "ogb-lsc-mag240m": y_pred = y_pred.view(-1) y_true = y_true.view(-1) if save_test_submission: evaluator.save_test_submission( input_dict={"y_pred": y_pred}, dir_path=".", mode="test-dev" ) return evaluator.eval({"y_true": y_true, "y_pred": y_pred})["acc"] def main(args): device = ( "cuda:0" if torch.cuda.is_available() and args.num_gpus > 0 else "cpu" ) # Prepare the data. g, labels, num_classes, split_idx, train_loader, feats = prepare_data( args, device ) feat_size = 128 if args.dataset == "ogbn-mag" else 768 # Create the embedding layer and move it to the appropriate device. embed_layer = None if args.dataset == "ogbn-mag": embed_layer = rel_graph_embed(g, feat_size).to(device) print( "Number of embedding parameters: " f"{sum(p.numel() for p in embed_layer.parameters())}" ) # Initialize the entity classification model. model = EntityClassify(g, feat_size, num_classes).to(device) print( "Number of model parameters: " f"{sum(p.numel() for p in model.parameters())}" ) try: if embed_layer is not None: embed_layer.reset_parameters() model.reset_parameters() except: # Old pytorch version doesn't support reset_parameters() API. ################################################################## # [Why we need to reset the parameters?] # If parameters are not reset, the model will start with the # parameters learned from the last run, potentially resulting # in biased outcomes or sub-optimal performance if the model was # previously stuck in a poor local minimum. ################################################################## pass # `itertools.chain()` is a function in Python's itertools module. # It is used to flatten a list of iterables, making them act as # one big iterable. # In this context, the following code is used to create a single # iterable over the parameters of both the model and the embed_layer, # which is passed to the optimizer. The optimizer then updates all # these parameters during the training process. all_params = itertools.chain( model.parameters(), [] if embed_layer is None else embed_layer.parameters(), ) optimizer = torch.optim.Adam(all_params, lr=0.01) # `expected_max`` is the number of physical cores on your machine. # The `logical` parameter, when set to False, ensures that the count # returned is the number of physical cores instead of logical cores # (which could be higher due to technologies like Hyper-Threading). expected_max = int(psutil.cpu_count(logical=False)) if args.num_workers >= expected_max: print( "[ERROR] You specified num_workers are larger than physical" f"cores, please set any number less than {expected_max}", file=sys.stderr, ) train( args.dataset, g, feats, model, embed_layer, optimizer, train_loader, split_idx, labels, device, ) print("Testing...") test_key = "test" if args.dataset == "ogbn-mag" else "test-dev" test_acc = evaluate( args.dataset, g, feats, model, embed_layer, labels, device, split_idx[test_key], save_test_submission=(args.dataset == "ogb-lsc-mag240m"), ) print(f"Test accuracy {test_acc*100:.4f}") if __name__ == "__main__": parser = argparse.ArgumentParser(description="RGCN") parser.add_argument( "--dataset", type=str, default="ogbn-mag", help="Dataset for train: ogbn-mag, ogb-lsc-mag240m", ) parser.add_argument( "--num_gpus", type=int, default=0, help="Number of GPUs. Use 0 for CPU training.", ) parser.add_argument( "--num_workers", type=int, default=0, help="Number of worker processes for data loading.", ) parser.add_argument( "--rootdir", type=str, default="./dataset/", help="Directory to download the OGB dataset.", ) parser.add_argument( "--graph_path", type=str, default="./graph.dgl", help="Path to the graph file.", ) parser.add_argument( "--paper_feature_path", type=str, default="./paper-feat.npy", help="Path to the features of paper nodes.", ) parser.add_argument( "--author_feature_path", type=str, default="./author-feat.npy", help="Path to the features of author nodes.", ) parser.add_argument( "--inst_feature_path", type=str, default="./inst-feat.npy", help="Path to the features of institution nodes.", ) args = parser.parse_args() main(args) ================================================ FILE: examples/distributed/graphsage/README.md ================================================ ## Distributed training This is an example of training GraphSage in a distributed fashion. Before training, please install some python libs by pip: ``` pip3 install ogb ``` **Requires PyTorch 1.12.0+ to work.** To train GraphSage, it has five steps: ### Step 0: Setup a Distributed File System * You may skip this step if your cluster already has folder(s) synchronized across machines. To perform distributed training, files and codes need to be accessed across multiple machines. A distributed file system would perfectly handle the job (i.e., NFS, Ceph). #### Server side setup Here is an example of how to setup NFS. First, install essential libs on the storage server ``` sudo apt-get install nfs-kernel-server ``` Below we assume the user account is `ubuntu` and we create a directory of `workspace` in the home directory. ``` mkdir -p /home/ubuntu/workspace ``` We assume that the all servers are under a subnet with ip range `192.168.0.0` to `192.168.255.255`. The exports configuration needs to be modifed to ``` sudo vim /etc/exports # add the following line /home/ubuntu/workspace 192.168.0.0/16(rw,sync,no_subtree_check) ``` The server's internal ip can be checked via `ifconfig` or `ip`. If the ip does not begin with `192.168`, then you may use ``` /home/ubuntu/workspace 10.0.0.0/8(rw,sync,no_subtree_check) /home/ubuntu/workspace 172.16.0.0/12(rw,sync,no_subtree_check) ``` Then restart NFS, the setup on server side is finished. ``` sudo systemctl restart nfs-kernel-server ``` For configraution details, please refer to [NFS ArchWiki](https://wiki.archlinux.org/index.php/NFS). #### Client side setup To use NFS, clients also require to install essential packages ``` sudo apt-get install nfs-common ``` You can either mount the NFS manually ``` mkdir -p /home/ubuntu/workspace sudo mount -t nfs :/home/ubuntu/workspace /home/ubuntu/workspace ``` or edit the fstab so the folder will be mounted automatically ``` # vim /etc/fstab ## append the following line to the file :/home/ubuntu/workspace /home/ubuntu/workspace nfs defaults 0 0 ``` Then run `mount -a`. Now go to `/home/ubuntu/workspace` and clone the DGL Github repository. ### Step 1: set IP configuration file. User need to set their own IP configuration file `ip_config.txt` before training. For example, if we have four machines in current cluster, the IP configuration could like this: ``` 172.31.19.1 172.31.23.205 172.31.29.175 172.31.16.98 ``` Users need to make sure that the master node (node-0) has right permission to ssh to all the other nodes without password authentication. [This link](https://linuxize.com/post/how-to-setup-passwordless-ssh-login/) provides instructions of setting passwordless SSH login. ### Step 2: partition the graph. The example provides a script to partition some builtin graphs such as Reddit and OGB product graph. If we want to train GraphSage on 4 machines, we need to partition the graph into 4 parts. In this example, we partition the ogbn-products graph into 4 parts with Metis on node-0. The partitions are balanced with respect to the number of nodes, the number of edges and the number of labelled nodes. ``` python3 partition_graph.py --dataset ogbn-products --num_parts 4 --balance_train --balance_edges ``` This script generates partitioned graphs and store them in the directory called `data`. ### Step 3: Launch distributed jobs DGL provides a script to launch the training job in the cluster. `part_config` and `ip_config` specify relative paths to the path of the workspace. The command below launches one process per machine for both sampling and training. ``` python3 ~/workspace/dgl/tools/launch.py \ --workspace ~/workspace/dgl/examples/distributed/graphsage/ \ --num_trainers 1 \ --num_samplers 0 \ --num_servers 1 \ --part_config data/ogbn-products.json \ --ip_config ip_config.txt \ "python3 node_classification.py --graph_name ogbn-products --ip_config ip_config.txt --num_epochs 30 --batch_size 1000" ``` By default, this code will run on CPU. If you have GPU support, you can just add a `--num_gpus` argument in user command: ``` python3 ~/workspace/dgl/tools/launch.py \ --workspace ~/workspace/dgl/examples/distributed/graphsage/ \ --num_trainers 4 \ --num_samplers 0 \ --num_servers 1 \ --part_config data/ogbn-products.json \ --ip_config ip_config.txt \ "python3 node_classification.py --graph_name ogbn-products --ip_config ip_config.txt --num_epochs 30 --batch_size 1000 --num_gpus 4" ``` Unsupervised training(train with link prediction dataloader). ``` python3 ~/workspace/dgl/tools/launch.py \ --workspace ~/workspace/dgl/examples/distributed/graphsage/ \ --num_trainers 1 \ --num_samplers 0 \ --num_servers 1 \ --part_config data/ogbn-products.json \ --ip_config ip_config.txt \ "python3 node_classification_unsupervised.py --graph_name ogbn-products --ip_config ip_config.txt --num_epochs 30 --batch_size 1000 --remove_edge" ``` ### Running with GraphBolt In order to run with `GraphBolt`, we need to partition graph into `GraphBolt` data formats.Please note that both `DGL` and `GraphBolt` partitions are saved together. If we have already partitioned into `DGL` format, just convert them directly like below: ``` python3 -c "import dgl; dgl.distributed.dgl_partition_to_graphbolt('ogbn-products.json')" ``` Or partition from scratch like this: ``` python3 partition_graph.py --dataset ogbn-products --num_parts 2 --balance_train --balance_edges --use_graphbolt ``` #### Partition sizes compared to DGL Compared to `DGL`, `GraphBolt` partitions are much smaller(reduced to **16%** and **19%** for `ogbn-products` and `ogbn-papers100M` respectively). `ogbn-products` | Data Formats | File Name | Part 0 | Part 1 | | ------------ | ---------------------------- | ------ | ------ | | DGL | graph.dgl | 1.5GB | 1.6GB | | GraphBolt | fused_csc_sampling_graph.pt | 255MB | 265MB | `ogbn-papers100M` | Data Formats | File Name | Part 0 | Part 1 | | ------------ | ---------------------------- | ------ | ------ | | DGL | graph.dgl | 23GB | 22GB | | GraphBolt | fused_csc_sampling_graph.pt | 4.4GB | 4.1GB | Then run example with `--use_graphbolt`. ``` python3 ~/workspace/dgl/tools/launch.py \ --workspace ~/workspace/dgl/examples/distributed/graphsage/ \ --num_trainers 4 \ --num_samplers 0 \ --num_servers 2 \ --part_config data/ogbn-products.json \ --ip_config ip_config.txt \ "python3 node_classification.py --graph_name ogbn-products --ip_config ip_config.txt --num_epochs 10 --use_graphbolt" ``` #### Performance compared to `DGL` Compared to `DGL`, `GraphBolt`'s sampler works faster(reduced to **80%** and **77%** for `ogbn-products` and `ogbn-papers100M` respectively). `Min` and `Max` are statistics of all trainers on all nodes(machines). As for RAM usage, the shared memory(measured by **shared** field of `free` command) usage is decreased due to smaller graph partitions in `GraphBolt` though the peak memory used by processes(measured by **used** field of `free` command) does not decrease. `ogbn-products` | Data Formats | Sample Time Per Epoch (CPU) | Test Accuracy (10 epochs) | shared | used (peak) | | ------------ | --------------------------- | -------------------------------- | ----- | ---- | | DGL | Min: 1.2884s, Max: 1.4159s | Min: 64.38%, Max: 70.42% | 2.4GB | 7.8GB| | GraphBolt | Min: 1.0589s, Max: 1.1400s | Min: 61.68%, Max: 71.23% | 1.1GB | 7.8GB| `ogbn-papers100M` | Data Formats | Sample Time Per Epoch (CPU) | Test Accuracy (10 epochs) | shared | used (peak) | | ------------ | --------------------------- | -------------------------------- | ----- | ---- | | DGL | Min: 5.5570s, Max: 6.1900s | Min: 29.12%, Max: 34.33% | 84GB | 43GB | | GraphBolt | Min: 4.5046s, Max: 4.7718s | Min: 29.11%, Max: 33.49% | 67GB | 43GB | ================================================ FILE: examples/distributed/graphsage/node_classification.py ================================================ import argparse import socket import time import dgl import dgl.distributed import dgl.nn.pytorch as dglnn import numpy as np import torch as th import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import tqdm class DistSAGE(nn.Module): """ SAGE model for distributed train and evaluation. Parameters ---------- in_feats : int Feature dimension. n_hidden : int Hidden layer dimension. n_classes : int Number of classes. n_layers : int Number of layers. activation : callable Activation function. dropout : float Dropout value. """ def __init__( self, in_feats, n_hidden, n_classes, n_layers, activation, dropout ): super().__init__() self.n_layers = n_layers self.n_hidden = n_hidden self.n_classes = n_classes self.layers = nn.ModuleList() self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, "mean")) for _ in range(1, n_layers - 1): self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, "mean")) self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, "mean")) self.dropout = nn.Dropout(dropout) self.activation = activation def forward(self, blocks, x): """ Forward function. Parameters ---------- blocks : List[DGLBlock] Sampled blocks. x : DistTensor Feature data. """ h = x for i, (layer, block) in enumerate(zip(self.layers, blocks)): h = layer(block, h) if i != len(self.layers) - 1: h = self.activation(h) h = self.dropout(h) return h def inference(self, g, x, batch_size, device): """ Distributed layer-wise inference with the GraphSAGE model on full neighbors. Parameters ---------- g : DistGraph Input Graph for inference. x : DistTensor Node feature data of input graph. Returns ------- DistTensor Inference results. """ # Split nodes to each trainer. nodes = dgl.distributed.node_split( np.arange(g.num_nodes()), g.get_partition_book(), force_even=True, ) for i, layer in enumerate(self.layers): # Create DistTensor to save forward results. if i == len(self.layers) - 1: out_dim = self.n_classes name = "h_last" else: out_dim = self.n_hidden name = "h" y = dgl.distributed.DistTensor( (g.num_nodes(), out_dim), th.float32, name, persistent=True, ) print(f"|V|={g.num_nodes()}, inference batch size: {batch_size}") # `-1` indicates all inbound edges will be inlcuded, namely, full # neighbor sampling. sampler = dgl.dataloading.NeighborSampler([-1]) dataloader = dgl.distributed.DistNodeDataLoader( g, nodes, sampler, batch_size=batch_size, shuffle=False, drop_last=False, ) for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader): block = blocks[0].to(device) h = x[input_nodes].to(device) h_dst = h[: block.number_of_dst_nodes()] h = layer(block, (h, h_dst)) if i != len(self.layers) - 1: h = self.activation(h) h = self.dropout(h) # Copy back to CPU as DistTensor requires data reside on CPU. y[output_nodes] = h.cpu() x = y # Synchronize trainers. g.barrier() return x def compute_acc(pred, labels): """ Compute the accuracy of prediction given the labels. Parameters ---------- pred : torch.Tensor Predicted labels. labels : torch.Tensor Ground-truth labels. Returns ------- float Accuracy. """ labels = labels.long() return (th.argmax(pred, dim=1) == labels).float().sum() / len(pred) def evaluate(model, g, inputs, labels, val_nid, test_nid, batch_size, device): """ Evaluate the model on the validation and test set. Parameters ---------- model : DistSAGE The model to be evaluated. g : DistGraph The entire graph. inputs : DistTensor The feature data of all the nodes. labels : DistTensor The labels of all the nodes. val_nid : torch.Tensor The node IDs for validation. test_nid : torch.Tensor The node IDs for test. batch_size : int Batch size for evaluation. device : torch.Device The target device to evaluate on. Returns ------- float Validation accuracy. float Test accuracy. """ model.eval() with th.no_grad(): pred = model.inference(g, inputs, batch_size, device) model.train() return compute_acc(pred[val_nid], labels[val_nid]), compute_acc( pred[test_nid], labels[test_nid] ) def run(args, device, data): """ Train and evaluate DistSAGE. Parameters ---------- args : argparse.Args Arguments for train and evaluate. device : torch.Device Target device for train and evaluate. data : Packed Data Packed data includes train/val/test IDs, feature dimension, number of classes, graph. """ train_nid, val_nid, test_nid, in_feats, n_classes, g = data sampler = dgl.dataloading.NeighborSampler( [int(fanout) for fanout in args.fan_out.split(",")] ) dataloader = dgl.distributed.DistNodeDataLoader( g, train_nid, sampler, batch_size=args.batch_size, shuffle=True, drop_last=False, ) model = DistSAGE( in_feats, args.num_hidden, n_classes, args.num_layers, F.relu, args.dropout, ) model = model.to(device) if args.num_gpus == 0: model = th.nn.parallel.DistributedDataParallel(model) else: model = th.nn.parallel.DistributedDataParallel( model, device_ids=[device], output_device=device ) loss_fcn = nn.CrossEntropyLoss() loss_fcn = loss_fcn.to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr) # Training loop. iter_tput = [] epoch = 0 epoch_time = [] test_acc = 0.0 for _ in range(args.num_epochs): epoch += 1 tic = time.time() # Various time statistics. sample_time = 0 forward_time = 0 backward_time = 0 update_time = 0 num_seeds = 0 num_inputs = 0 start = time.time() step_time = [] with model.join(): for step, (input_nodes, seeds, blocks) in enumerate(dataloader): tic_step = time.time() sample_time += tic_step - start # Slice feature and label. batch_inputs = g.ndata["features"][input_nodes] batch_labels = g.ndata["labels"][seeds].long() num_seeds += len(blocks[-1].dstdata[dgl.NID]) num_inputs += len(blocks[0].srcdata[dgl.NID]) # Move to target device. blocks = [block.to(device) for block in blocks] batch_inputs = batch_inputs.to(device) batch_labels = batch_labels.to(device) # Compute loss and prediction. start = time.time() batch_pred = model(blocks, batch_inputs) loss = loss_fcn(batch_pred, batch_labels) forward_end = time.time() optimizer.zero_grad() loss.backward() compute_end = time.time() forward_time += forward_end - start backward_time += compute_end - forward_end optimizer.step() update_time += time.time() - compute_end step_t = time.time() - tic_step step_time.append(step_t) iter_tput.append(len(blocks[-1].dstdata[dgl.NID]) / step_t) if (step + 1) % args.log_every == 0: acc = compute_acc(batch_pred, batch_labels) gpu_mem_alloc = ( th.cuda.max_memory_allocated() / 1000000 if th.cuda.is_available() else 0 ) sample_speed = np.mean(iter_tput[-args.log_every :]) mean_step_time = np.mean(step_time[-args.log_every :]) print( f"Part {g.rank()} | Epoch {epoch:05d} | Step {step:05d}" f" | Loss {loss.item():.4f} | Train Acc {acc.item():.4f}" f" | Speed (samples/sec) {sample_speed:.4f}" f" | GPU {gpu_mem_alloc:.1f} MB | " f"Mean step time {mean_step_time:.3f} s" ) start = time.time() toc = time.time() print( f"Part {g.rank()}, Epoch Time(s): {toc - tic:.4f}, " f"sample+data_copy: {sample_time:.4f}, forward: {forward_time:.4f}," f" backward: {backward_time:.4f}, update: {update_time:.4f}, " f"#seeds: {num_seeds}, #inputs: {num_inputs}" ) epoch_time.append(toc - tic) if epoch % args.eval_every == 0 or epoch == args.num_epochs: start = time.time() val_acc, test_acc = evaluate( model.module, g, g.ndata["features"], g.ndata["labels"], val_nid, test_nid, args.batch_size_eval, device, ) print( f"Part {g.rank()}, Val Acc {val_acc:.4f}, " f"Test Acc {test_acc:.4f}, time: {time.time() - start:.4f}" ) return np.mean(epoch_time[-int(args.num_epochs * 0.8) :]), test_acc def main(args): """ Main function. """ host_name = socket.gethostname() print(f"{host_name}: Initializing DistDGL.") dgl.distributed.initialize(args.ip_config, use_graphbolt=args.use_graphbolt) print(f"{host_name}: Initializing PyTorch process group.") th.distributed.init_process_group(backend=args.backend) print(f"{host_name}: Initializing DistGraph.") g = dgl.distributed.DistGraph(args.graph_name, part_config=args.part_config) print(f"Rank of {host_name}: {g.rank()}") # Split train/val/test IDs for each trainer. pb = g.get_partition_book() if "trainer_id" in g.ndata: train_nid = dgl.distributed.node_split( g.ndata["train_mask"], pb, force_even=True, node_trainer_ids=g.ndata["trainer_id"], ) val_nid = dgl.distributed.node_split( g.ndata["val_mask"], pb, force_even=True, node_trainer_ids=g.ndata["trainer_id"], ) test_nid = dgl.distributed.node_split( g.ndata["test_mask"], pb, force_even=True, node_trainer_ids=g.ndata["trainer_id"], ) else: train_nid = dgl.distributed.node_split( g.ndata["train_mask"], pb, force_even=True ) val_nid = dgl.distributed.node_split( g.ndata["val_mask"], pb, force_even=True ) test_nid = dgl.distributed.node_split( g.ndata["test_mask"], pb, force_even=True ) local_nid = pb.partid2nids(pb.partid).detach().numpy() num_train_local = len(np.intersect1d(train_nid.numpy(), local_nid)) num_val_local = len(np.intersect1d(val_nid.numpy(), local_nid)) num_test_local = len(np.intersect1d(test_nid.numpy(), local_nid)) print( f"part {g.rank()}, train: {len(train_nid)} (local: {num_train_local}), " f"val: {len(val_nid)} (local: {num_val_local}), " f"test: {len(test_nid)} (local: {num_test_local})" ) del local_nid if args.num_gpus == 0: device = th.device("cpu") else: dev_id = g.rank() % args.num_gpus device = th.device("cuda:" + str(dev_id)) n_classes = args.n_classes if n_classes == 0: labels = g.ndata["labels"][np.arange(g.num_nodes())] n_classes = len(th.unique(labels[th.logical_not(th.isnan(labels))])) del labels print(f"Number of classes: {n_classes}") # Pack data. in_feats = g.ndata["features"].shape[1] data = train_nid, val_nid, test_nid, in_feats, n_classes, g # Train and evaluate. epoch_time, test_acc = run(args, device, data) print( f"Summary of node classification(GraphSAGE): GraphName " f"{args.graph_name} | TrainEpochTime(mean) {epoch_time:.4f} " f"| TestAccuracy {test_acc:.4f}" ) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Distributed GraphSAGE.") parser.add_argument("--graph_name", type=str, help="graph name") parser.add_argument( "--ip_config", type=str, help="The file for IP configuration" ) parser.add_argument( "--part_config", type=str, help="The path to the partition config file" ) parser.add_argument( "--n_classes", type=int, default=0, help="the number of classes" ) parser.add_argument( "--backend", type=str, default="gloo", help="pytorch distributed backend", ) parser.add_argument( "--num_gpus", type=int, default=0, help="the number of GPU device. Use 0 for CPU training", ) parser.add_argument("--num_epochs", type=int, default=20) parser.add_argument("--num_hidden", type=int, default=16) parser.add_argument("--num_layers", type=int, default=2) parser.add_argument("--fan_out", type=str, default="10,25") parser.add_argument("--batch_size", type=int, default=1000) parser.add_argument("--batch_size_eval", type=int, default=100000) parser.add_argument("--log_every", type=int, default=20) parser.add_argument("--eval_every", type=int, default=5) parser.add_argument("--lr", type=float, default=0.003) parser.add_argument("--dropout", type=float, default=0.5) parser.add_argument( "--local_rank", type=int, help="get rank of the process" ) parser.add_argument( "--pad-data", default=False, action="store_true", help="Pad train nid to the same length across machine, to ensure num " "of batches to be the same.", ) parser.add_argument( "--use_graphbolt", action="store_true", help="Use GraphBolt for distributed train.", ) args = parser.parse_args() print(f"Arguments: {args}") main(args) ================================================ FILE: examples/distributed/graphsage/node_classification_unsupervised.py ================================================ import argparse import time from contextlib import contextmanager import dgl import dgl.distributed import dgl.function as fn import dgl.nn.pytorch as dglnn import numpy as np import sklearn.linear_model as lm import sklearn.metrics as skm import torch as th import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import tqdm class DistSAGE(nn.Module): def __init__( self, in_feats, n_hidden, n_classes, n_layers, activation, dropout ): super().__init__() self.n_layers = n_layers self.n_hidden = n_hidden self.n_classes = n_classes self.layers = nn.ModuleList() self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, "mean")) for i in range(1, n_layers - 1): self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, "mean")) self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, "mean")) self.dropout = nn.Dropout(dropout) self.activation = activation def forward(self, blocks, x): h = x for i, (layer, block) in enumerate(zip(self.layers, blocks)): h = layer(block, h) if i != len(self.layers) - 1: h = self.activation(h) h = self.dropout(h) return h def inference(self, g, x, batch_size, device): """ Inference with the GraphSAGE model on full neighbors (i.e. without neighbor sampling). g : the entire graph. x : the input of entire node set. The inference code is written in a fashion that it could handle any number of nodes and layers. """ # During inference with sampling, multi-layer blocks are very # inefficient because lots of computations in the first few layers are # repeated. Therefore, we compute the representation of all nodes layer # by layer. The nodes on each layer are of course splitted in batches. # TODO: can we standardize this? nodes = dgl.distributed.node_split( np.arange(g.num_nodes()), g.get_partition_book(), force_even=True, ) y = dgl.distributed.DistTensor( (g.num_nodes(), self.n_hidden), th.float32, "h", persistent=True, ) for i, layer in enumerate(self.layers): if i == len(self.layers) - 1: y = dgl.distributed.DistTensor( (g.num_nodes(), self.n_classes), th.float32, "h_last", persistent=True, ) # Create sampler sampler = dgl.dataloading.NeighborSampler([-1]) # Create dataloader dataloader = dgl.distributed.DistNodeDataLoader( g, nodes, sampler, batch_size=batch_size, shuffle=False, drop_last=False, ) for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader): block = blocks[0].to(device) h = x[input_nodes].to(device) h_dst = h[: block.number_of_dst_nodes()] h = layer(block, (h, h_dst)) if i != len(self.layers) - 1: h = self.activation(h) h = self.dropout(h) y[output_nodes] = h.cpu() x = y g.barrier() return y @contextmanager def join(self): """dummy join for standalone""" yield def load_subtensor(g, input_nodes, device): """ Copys features and labels of a set of nodes onto GPU. """ batch_inputs = g.ndata["features"][input_nodes].to(device) return batch_inputs class CrossEntropyLoss(nn.Module): def forward(self, block_outputs, pos_graph, neg_graph): with pos_graph.local_scope(): pos_graph.ndata["h"] = block_outputs pos_graph.apply_edges(fn.u_dot_v("h", "h", "score")) pos_score = pos_graph.edata["score"] with neg_graph.local_scope(): neg_graph.ndata["h"] = block_outputs neg_graph.apply_edges(fn.u_dot_v("h", "h", "score")) neg_score = neg_graph.edata["score"] score = th.cat([pos_score, neg_score]) label = th.cat( [th.ones_like(pos_score), th.zeros_like(neg_score)] ).long() loss = F.binary_cross_entropy_with_logits(score, label.float()) return loss def generate_emb(model, g, inputs, batch_size, device): """ Generate embeddings for each node g : The entire graph. inputs : The features of all the nodes. batch_size : Number of nodes to compute at the same time. device : The GPU device to evaluate on. """ model.eval() with th.no_grad(): pred = model.inference(g, inputs, batch_size, device) return pred def compute_acc(emb, labels, train_nids, val_nids, test_nids): """ Compute the accuracy of prediction given the labels. We will fist train a LogisticRegression model using the trained embeddings, the training set, validation set and test set is provided as the arguments. The final result is predicted by the lr model. emb: The pretrained embeddings labels: The ground truth train_nids: The training set node ids val_nids: The validation set node ids test_nids: The test set node ids """ emb = emb[np.arange(labels.shape[0])].cpu().numpy() train_nids = train_nids.cpu().numpy() val_nids = val_nids.cpu().numpy() test_nids = test_nids.cpu().numpy() labels = labels.cpu().numpy() emb = (emb - emb.mean(0, keepdims=True)) / emb.std(0, keepdims=True) lr = lm.LogisticRegression(multi_class="multinomial", max_iter=10000) lr.fit(emb[train_nids], labels[train_nids]) pred = lr.predict(emb) eval_acc = skm.accuracy_score(labels[val_nids], pred[val_nids]) test_acc = skm.accuracy_score(labels[test_nids], pred[test_nids]) return eval_acc, test_acc def run(args, device, data): # Unpack data ( train_eids, train_nids, in_feats, g, global_train_nid, global_valid_nid, global_test_nid, labels, ) = data # Create sampler neg_sampler = dgl.dataloading.negative_sampler.Uniform(args.num_negs) sampler = dgl.dataloading.NeighborSampler( [int(fanout) for fanout in args.fan_out.split(",")] ) # Create dataloader exclude = "reverse_id" if args.remove_edge else None reverse_eids = th.arange(g.num_edges()) if args.remove_edge else None dataloader = dgl.distributed.DistEdgeDataLoader( g, train_eids, sampler, negative_sampler=neg_sampler, exclude=exclude, reverse_eids=reverse_eids, batch_size=args.batch_size, shuffle=True, drop_last=False, ) # Define model and optimizer model = DistSAGE( in_feats, args.num_hidden, args.num_hidden, args.num_layers, F.relu, args.dropout, ) model = model.to(device) if not args.standalone: if args.num_gpus == -1: model = th.nn.parallel.DistributedDataParallel(model) else: dev_id = g.rank() % args.num_gpus model = th.nn.parallel.DistributedDataParallel( model, device_ids=[dev_id], output_device=dev_id ) loss_fcn = CrossEntropyLoss() loss_fcn = loss_fcn.to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr) # Training loop epoch = 0 for epoch in range(args.num_epochs): num_seeds = 0 num_inputs = 0 step_time = [] sample_t = [] feat_copy_t = [] forward_t = [] backward_t = [] update_t = [] iter_tput = [] start = time.time() with model.join(): # Loop over the dataloader to sample the computation dependency # graph as a list of blocks. for step, (input_nodes, pos_graph, neg_graph, blocks) in enumerate( dataloader ): if args.debug: # Verify exclude_edges functionality. for block in blocks: current_eids = block.edata[dgl.EID] seed_eids = pos_graph.edata[dgl.EID] if exclude is None: assert th.any(th.isin(current_eids, seed_eids)) elif exclude == "self": assert not th.any(th.isin(current_eids, seed_eids)) elif exclude == "reverse_id": assert not th.any(th.isin(current_eids, seed_eids)) else: raise ValueError( f"Unsupported exclude type: {exclude}" ) tic_step = time.time() sample_t.append(tic_step - start) copy_t = time.time() pos_graph = pos_graph.to(device) neg_graph = neg_graph.to(device) blocks = [block.to(device) for block in blocks] batch_inputs = load_subtensor(g, input_nodes, device) copy_time = time.time() feat_copy_t.append(copy_time - copy_t) # Compute loss and prediction batch_pred = model(blocks, batch_inputs) loss = loss_fcn(batch_pred, pos_graph, neg_graph) forward_end = time.time() optimizer.zero_grad() loss.backward() compute_end = time.time() forward_t.append(forward_end - copy_time) backward_t.append(compute_end - forward_end) # Aggregate gradients in multiple nodes. optimizer.step() update_t.append(time.time() - compute_end) pos_edges = pos_graph.num_edges() step_t = time.time() - start step_time.append(step_t) iter_tput.append(pos_edges / step_t) num_seeds += pos_edges if step % args.log_every == 0: print( "[{}] Epoch {:05d} | Step {:05d} | Loss {:.4f} | Speed " "(samples/sec) {:.4f} | time {:.3f}s | sample {:.3f} | " "copy {:.3f} | forward {:.3f} | backward {:.3f} | " "update {:.3f}".format( g.rank(), epoch, step, loss.item(), np.mean(iter_tput[3:]), np.sum(step_time[-args.log_every :]), np.sum(sample_t[-args.log_every :]), np.sum(feat_copy_t[-args.log_every :]), np.sum(forward_t[-args.log_every :]), np.sum(backward_t[-args.log_every :]), np.sum(update_t[-args.log_every :]), ) ) start = time.time() print( "[{}]Epoch Time(s): {:.4f}, sample: {:.4f}, data copy: {:.4f}, " "forward: {:.4f}, backward: {:.4f}, update: {:.4f}, #seeds: {}, " "#inputs: {}".format( g.rank(), np.sum(step_time), np.sum(sample_t), np.sum(feat_copy_t), np.sum(forward_t), np.sum(backward_t), np.sum(update_t), num_seeds, num_inputs, ) ) epoch += 1 # evaluate the embedding using LogisticRegression pred = generate_emb( model if args.standalone else model.module, g, g.ndata["features"], args.batch_size_eval, device, ) if g.rank() == 0: eval_acc, test_acc = compute_acc( pred, labels, global_train_nid, global_valid_nid, global_test_nid ) print("eval acc {:.4f}; test acc {:.4f}".format(eval_acc, test_acc)) # sync for eval and test if not args.standalone: th.distributed.barrier() if not args.standalone: g._client.barrier() # save features into file if g.rank() == 0: th.save(pred, "emb.pt") else: th.save(pred, "emb.pt") def main(args): print("--- Distributed node classification with GraphSAGE unsuperised ---") dgl.distributed.initialize(args.ip_config) if not args.standalone: th.distributed.init_process_group(backend="gloo") g = dgl.distributed.DistGraph(args.graph_name, part_config=args.part_config) print("rank:", g.rank()) print("number of edges", g.num_edges()) train_eids = dgl.distributed.edge_split( th.ones((g.num_edges(),), dtype=th.bool), g.get_partition_book(), force_even=True, ) train_nids = dgl.distributed.node_split( th.ones((g.num_nodes(),), dtype=th.bool), g.get_partition_book() ) global_train_nid = th.LongTensor( np.nonzero(g.ndata["train_mask"][np.arange(g.num_nodes())]) ) global_valid_nid = th.LongTensor( np.nonzero(g.ndata["val_mask"][np.arange(g.num_nodes())]) ) global_test_nid = th.LongTensor( np.nonzero(g.ndata["test_mask"][np.arange(g.num_nodes())]) ) labels = g.ndata["labels"][np.arange(g.num_nodes())] if args.num_gpus == -1: device = th.device("cpu") else: dev_id = g.rank() % args.num_gpus device = th.device("cuda:" + str(dev_id)) # Pack data in_feats = g.ndata["features"].shape[1] global_train_nid = global_train_nid.squeeze() global_valid_nid = global_valid_nid.squeeze() global_test_nid = global_test_nid.squeeze() print("number of train {}".format(global_train_nid.shape[0])) print("number of valid {}".format(global_valid_nid.shape[0])) print("number of test {}".format(global_test_nid.shape[0])) data = ( train_eids, train_nids, in_feats, g, global_train_nid, global_valid_nid, global_test_nid, labels, ) run(args, device, data) print("parent ends") if __name__ == "__main__": parser = argparse.ArgumentParser(description="GCN") parser.add_argument("--graph_name", type=str, help="graph name") parser.add_argument("--id", type=int, help="the partition id") parser.add_argument( "--ip_config", type=str, help="The file for IP configuration" ) parser.add_argument( "--part_config", type=str, help="The path to the partition config file" ) parser.add_argument("--n_classes", type=int, help="the number of classes") parser.add_argument( "--num_gpus", type=int, default=-1, help="the number of GPU device. Use -1 for CPU training", ) parser.add_argument("--num_epochs", type=int, default=20) parser.add_argument("--num_hidden", type=int, default=16) parser.add_argument("--num-layers", type=int, default=2) parser.add_argument("--fan_out", type=str, default="10,25") parser.add_argument("--batch_size", type=int, default=1000) parser.add_argument("--batch_size_eval", type=int, default=100000) parser.add_argument("--log_every", type=int, default=20) parser.add_argument("--eval_every", type=int, default=5) parser.add_argument("--lr", type=float, default=0.003) parser.add_argument("--dropout", type=float, default=0.5) parser.add_argument( "--local_rank", type=int, help="get rank of the process" ) parser.add_argument( "--standalone", action="store_true", help="run in the standalone mode" ) parser.add_argument("--num_negs", type=int, default=1) parser.add_argument( "--remove_edge", default=False, action="store_true", help="whether to remove edges during sampling", ) parser.add_argument( "--debug", default=False, action="store_true", help="whether to verify functionality of remove edges", ) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/distributed/graphsage/partition_graph.py ================================================ import argparse import time import dgl import torch as th from dgl.data import RedditDataset from ogb.nodeproppred import DglNodePropPredDataset def load_reddit(self_loop=True): """Load reddit dataset.""" data = RedditDataset(self_loop=self_loop) g = data[0] g.ndata["features"] = g.ndata.pop("feat") g.ndata["labels"] = g.ndata.pop("label") return g, data.num_classes def load_ogb(name, root="dataset"): """Load ogbn dataset.""" data = DglNodePropPredDataset(name=name, root=root) splitted_idx = data.get_idx_split() graph, labels = data[0] labels = labels[:, 0] graph.ndata["features"] = graph.ndata.pop("feat") graph.ndata["labels"] = labels num_labels = len(th.unique(labels[th.logical_not(th.isnan(labels))])) # Find the node IDs in the training, validation, and test set. train_nid, val_nid, test_nid = ( splitted_idx["train"], splitted_idx["valid"], splitted_idx["test"], ) train_mask = th.zeros((graph.num_nodes(),), dtype=th.bool) train_mask[train_nid] = True val_mask = th.zeros((graph.num_nodes(),), dtype=th.bool) val_mask[val_nid] = True test_mask = th.zeros((graph.num_nodes(),), dtype=th.bool) test_mask[test_nid] = True graph.ndata["train_mask"] = train_mask graph.ndata["val_mask"] = val_mask graph.ndata["test_mask"] = test_mask return graph, num_labels if __name__ == "__main__": argparser = argparse.ArgumentParser("Partition graph") argparser.add_argument( "--dataset", type=str, default="reddit", help="datasets: reddit, ogbn-products, ogbn-papers100M", ) argparser.add_argument( "--num_parts", type=int, default=4, help="number of partitions" ) argparser.add_argument( "--part_method", type=str, default="metis", help="the partition method" ) argparser.add_argument( "--balance_train", action="store_true", help="balance the training size in each partition.", ) argparser.add_argument( "--undirected", action="store_true", help="turn the graph into an undirected graph.", ) argparser.add_argument( "--balance_edges", action="store_true", help="balance the number of edges in each partition.", ) argparser.add_argument( "--num_trainers_per_machine", type=int, default=1, help="the number of trainers per machine. The trainer ids are stored\ in the node feature 'trainer_id'", ) argparser.add_argument( "--output", type=str, default="data", help="Output path of partitioned graph.", ) argparser.add_argument( "--use_graphbolt", action="store_true", help="Use GraphBolt for distributed train.", ) args = argparser.parse_args() start = time.time() if args.dataset == "reddit": g, _ = load_reddit() elif args.dataset in ["ogbn-products", "ogbn-papers100M"]: g, _ = load_ogb(args.dataset) else: raise RuntimeError(f"Unknown dataset: {args.dataset}") print( "Load {} takes {:.3f} seconds".format(args.dataset, time.time() - start) ) print("|V|={}, |E|={}".format(g.num_nodes(), g.num_edges())) print( "train: {}, valid: {}, test: {}".format( th.sum(g.ndata["train_mask"]), th.sum(g.ndata["val_mask"]), th.sum(g.ndata["test_mask"]), ) ) if args.balance_train: balance_ntypes = g.ndata["train_mask"] else: balance_ntypes = None if args.undirected: sym_g = dgl.to_bidirected(g, readonly=True) for key in g.ndata: sym_g.ndata[key] = g.ndata[key] g = sym_g dgl.distributed.partition_graph( g, args.dataset, args.num_parts, args.output, part_method=args.part_method, balance_ntypes=balance_ntypes, balance_edges=args.balance_edges, num_trainers_per_machine=args.num_trainers_per_machine, use_graphbolt=args.use_graphbolt, ) ================================================ FILE: examples/distributed/rgcn/README.md ================================================ ## Distributed training This is an example of training RGCN node classification in a distributed fashion. Currently, the example train RGCN graphs with input node features. Before training, install python libs by pip: ```bash pip3 install ogb pyarrow ``` To train RGCN, it has four steps: ### Step 0: Setup a Distributed File System * You may skip this step if your cluster already has folder(s) synchronized across machines. To perform distributed training, files and codes need to be accessed across multiple machines. A distributed file system would perfectly handle the job (i.e., NFS, Ceph). #### Server side setup Here is an example of how to setup NFS. First, install essential libs on the storage server ```bash sudo apt-get install nfs-kernel-server ``` Below we assume the user account is `ubuntu` and we create a directory of `workspace` in the home directory. ```bash mkdir -p /home/ubuntu/workspace ``` We assume that the all servers are under a subnet with ip range `192.168.0.0` to `192.168.255.255`. The exports configuration needs to be modifed to ```bash sudo vim /etc/exports # add the following line /home/ubuntu/workspace 192.168.0.0/16(rw,sync,no_subtree_check) ``` The server's internal ip can be checked via `ifconfig` or `ip`. If the ip does not begin with `192.168`, then you may use ```bash # for ip range 10.0.0.0 - 10.255.255.255 /home/ubuntu/workspace 10.0.0.0/8(rw,sync,no_subtree_check) # for ip range 172.16.0.0 - 172.31.255.255 /home/ubuntu/workspace 172.16.0.0/12(rw,sync,no_subtree_check) ``` Then restart NFS, the setup on server side is finished. ``` sudo systemctl restart nfs-kernel-server ``` For configraution details, please refer to [NFS ArchWiki](https://wiki.archlinux.org/index.php/NFS). #### Client side setup To use NFS, clients also require to install essential packages ``` sudo apt-get install nfs-common ``` You can either mount the NFS manually ``` mkdir -p /home/ubuntu/workspace sudo mount -t nfs :/home/ubuntu/workspace /home/ubuntu/workspace ``` or edit the fstab so the folder will be mounted automatically ``` # vim /etc/fstab ## append the following line to the file :/home/ubuntu/workspace /home/ubuntu/workspace nfs defaults 0 0 ``` Then run `mount -a`. Now go to `/home/ubuntu/workspace` and clone the DGL Github repository. ### Step 1: set IP configuration file. User need to set their own IP configuration file `ip_config.txt` before training. For example, if we have four machines in current cluster, the IP configuration could like this: ```bash 172.31.0.1 172.31.0.2 ``` Users need to make sure that the master node (node-0) has right permission to ssh to all the other nodes without password authentication. [This link](https://linuxize.com/post/how-to-setup-passwordless-ssh-login/) provides instructions of setting passwordless SSH login. ### Step 2: partition the graph. The example provides a script to partition some builtin graphs such as ogbn-mag graph. If we want to train RGCN on 2 machines, we need to partition the graph into 2 parts. In this example, we partition the ogbn-mag graph into 2 parts with Metis. The partitions are balanced with respect to the number of nodes, the number of edges and the number of labelled nodes. ```bash python3 partition_graph.py --dataset ogbn-mag --num_parts 2 --balance_train --balance_edges ``` If we want to train RGCN with `GraphBolt`, we need to append `--use_graphbolt` to generate partitions in `GraphBolt` format. ```bash python3 partition_graph.py --dataset ogbn-mag --num_parts 2 --balance_train --balance_edges --use_graphbolt ``` If we have already partitioned into `DGL` format, just convert them directly like below: ``` python3 -c "import dgl; dgl.distributed.dgl_partition_to_graphbolt('ogbn-products.json')" ``` ### Step 3: Launch distributed jobs DGL provides a script to launch the training job in the cluster. `part_config` and `ip_config` specify relative paths to the path of the workspace. The command below launches 4 training processes on each machine as we'd like to utilize 4 GPUs for training. ```bash python3 ~/workspace/dgl/tools/launch.py \ --workspace ~/workspace/dgl/examples/distributed/rgcn/ \ --num_trainers 4 \ --num_servers 2 \ --num_samplers 0 \ --part_config data/ogbn-mag.json \ --ip_config ip_config.txt \ "python3 node_classification.py --graph-name ogbn-mag --dataset ogbn-mag --fanout='25,25' --batch-size 1024 --n-hidden 64 --lr 0.01 --eval-batch-size 1024 --low-mem --dropout 0.5 --use-self-loop --n-bases 2 --n-epochs 3 --layer-norm --ip-config ip_config.txt --num_gpus 4" ``` If we want to train RGCN with `GraphBolt`, we need to append `--use_graphbolt`. ```bash python3 ~/workspace/dgl/tools/launch.py \ --workspace ~/workspace/dgl/examples/distributed/rgcn/ \ --num_trainers 4 \ --num_servers 2 \ --num_samplers 0 \ --part_config data/ogbn-mag.json \ --ip_config ip_config.txt \ "python3 node_classification.py --graph-name ogbn-mag --dataset ogbn-mag --fanout='25,25' --batch-size 1024 --n-hidden 64 --lr 0.01 --eval-batch-size 1024 --low-mem --dropout 0.5 --use-self-loop --n-bases 2 --n-epochs 3 --layer-norm --ip-config ip_config.txt --num_gpus 4 --use_graphbolt" ``` **Note:** if you are using conda or other virtual environments on the remote machines, you need to replace `python3` in the command string (i.e. the last argument) with the path to the Python interpreter in that environment. ## Comparison between `DGL` and `GraphBolt` ### Partition sizes Compared to `DGL`, `GraphBolt` partitions are reduced to **19%** for `ogbn-mag`. `ogbn-mag` | Data Formats | File Name | Part 0 | Part 1 | | ------------ | ---------------------------- | ------ | ------ | | DGL | graph.dgl | 714MB | 716MB | | GraphBolt | fused_csc_sampling_graph.pt | 137MB | 136MB | ### Performance Compared to `DGL`, `GraphBolt`'s sampler works faster(reduced to **16%** `ogbn-mag`). `Min` and `Max` are statistics of all trainers on all nodes(machines). As for RAM usage, the shared memory(measured by **shared** field of `free` command) usage decreases due to smaller graph partitions in `GraphBolt`. The peak memory used by processes(measured by **used** field of `free` command) decreases as well. `ogbn-mag` | Data Formats | Sample Time Per Epoch (CPU) | Test Accuracy (3 epochs) | shared | used (peak) | CPU Util | | ------------ | --------------------------- | ------------------------- | ----- | ---- | ----- | | DGL | Min: 48.2s, Max: 91.4s | 42.76% | 1.3GB | 9.2GB| 10.4% | | GraphBolt | Min: 9.2s, Max: 11.9s | 42.46% | 742MB | 5.9GB| 18.1% | ## Demonstrate and profile sampling for Link Prediction task ### DGL ``` python3 ~/workspace/dgl/tools/launch.py \ --workspace ~/workspace/dgl/examples/distributed/rgcn/ \ --num_trainers 4 \ --num_servers 2 \ --num_samplers 0 \ --part_config ~/data/ogbn_mag_lp/ogbn-mag.json \ --ip_config ~/workspace/ip_config.txt \ "python3 lp_perf.py --fanout='25,25' --batch-size 1024 --n-epochs 1 --graph-name ogbn-mag --ip-config ~/workspace/ip_config.txt --num_gpus 4 --remove_edge" ``` ### GraphBolt In order to sample with `GraphBolt`, we need to convert partitions into `GraphBolt` formats with below command. ``` python3 -c "import dgl;dgl.distributed.dgl_partition_to_graphbolt('/home/ubuntu/workspace/data/ogbn_mag_lp/ogbn-mag.json', store_eids=True, graph_formats='coo')" ``` Then train with appended `--use_graphbolt`. ``` python3 ~/workspace/dgl/tools/launch.py \ --workspace ~/workspace/dgl/examples/distributed/rgcn/ \ --num_trainers 4 \ --num_servers 2 \ --num_samplers 0 \ --part_config ~/data/ogbn_mag_lp/ogbn-mag.json \ --ip_config ~/workspace/ip_config.txt \ "python3 lp_perf.py --fanout='25,25' --batch-size 1024 --n-epochs 1 --graph-name ogbn-mag --ip-config ~/workspace/ip_config.txt --num_gpus 4 --remove_edge --use_graphbolt" ``` ### Partition sizes Compared to `DGL`, `GraphBolt` partitions are reduced to **72%** for `ogbn-mag`. #### ogbn-mag | Data Formats | File Name | Part 0 | Part 1 | | ------------ | ---------------------------- | ------ | ------ | | DGL | graph.dgl | 714MB | 716MB | | GraphBolt | fused_csc_sampling_graph.pt | 512MB | 514MB | ### Performance Comparison #### Major used parameters 1. 2 nodes(g4dn.metal), 4 trainers, 2 servers per node. Sample on main process. 2. 2 layers. 3. fanouts = 25, 25 for all edge types. 4. batch_size = 1024. 5. seed edge IDs are all edges of ("author", "writes", "paper"), ~7M in total. 6. ratio of negative sampler = 3. 7. exclude = "reverse_types". #### ogbn-mag Compared to `DGL`, sampling with `GraphBolt` is reduced to **15%**. As for the overhead of `exclude`, it's about **5%** in this test. This number could be higher if larger `fanout` or `batch size` is applied. The time shown below is the mean sampling time per iteration(60 iters in total, slowest rank). Unit: seconds | Data Formats | No Exclude | Exclude | | ------------ | ---------- | ------- | | DGL | 6.50 | 6.86 | | GraphBolt | 0.95 | 1.00 | ================================================ FILE: examples/distributed/rgcn/lp_perf.py ================================================ """ [For internal use only] Demonstrate and profile the performance of sampling for link prediction tasks. """ import argparse import time import dgl import numpy as np import torch as th def run(args, g, train_eids): fanouts = [int(fanout) for fanout in args.fanout.split(",")] neg_sampler = dgl.dataloading.negative_sampler.Uniform(3) prob = args.prob_or_mask sampler = dgl.dataloading.MultiLayerNeighborSampler( fanouts, prob=prob, ) exclude = None reverse_etypes = None if args.remove_edge: exclude = "reverse_types" # add reverse edge types mapping. reverse_etypes = { ("author", "affiliated_with", "institution"): ( "institution", "rev-affiliated_with", "author", ), ("author", "writes", "paper"): ("paper", "rev-writes", "author"), ("paper", "has_topic", "field_of_study"): ( "field_of_study", "rev-has_topic", "paper", ), ("paper", "cites", "paper"): ("paper", "rev-cites", "paper"), ("institution", "rev-affiliated_with", "author"): ( "author", "affiliated_with", "institution", ), ("paper", "rev-writes", "author"): ("author", "writes", "paper"), ("field_of_study", "rev-has_topic", "paper"): ( "paper", "has_topic", "field_of_study", ), ("paper", "rev-cites", "paper"): ("paper", "cites", "paper"), } dataloader = dgl.dataloading.DistEdgeDataLoader( g, train_eids, sampler, negative_sampler=neg_sampler, exclude=exclude, reverse_etypes=reverse_etypes, batch_size=args.batch_size, shuffle=True, drop_last=False, ) for epoch in range(args.n_epochs): sample_times = [] tic = time.time() epoch_tic = time.time() for step, sample_data in enumerate(dataloader): input_nodes, pos_graph, neg_graph, blocks = sample_data if args.debug: # Verify prob/mask values. for block in blocks: for c_etype in block.canonical_etypes: homo_eids = block.edges[c_etype].data[dgl.EID] assert th.all( g.edges[c_etype].data[prob][homo_eids] > 0 ) # Verify exclude_edges functionality. current_eids = blocks[-1].edata[dgl.EID] seed_eids = pos_graph.edata[dgl.EID] if exclude is None: assert th.any(th.isin(current_eids, seed_eids)) elif exclude == "self": assert not th.any(th.isin(current_eids, seed_eids)) elif exclude == "reverse_id": assert not th.any(th.isin(current_eids, seed_eids)) elif exclude == "reverse_types": for src_type, etype, dst_type in pos_graph.canonical_etypes: reverse_etype = reverse_etypes[ (src_type, etype, dst_type) ] seed_eids = pos_graph.edges[etype].data[dgl.EID] if (src_type, etype, dst_type) in blocks[ -1 ].canonical_etypes: assert not th.any( th.isin( blocks[-1].edges[etype].data[dgl.EID], seed_eids, ) ) if reverse_etype in blocks[-1].canonical_etypes: assert not th.any( th.isin( blocks[-1] .edges[reverse_etype] .data[dgl.EID], seed_eids, ) ) else: raise ValueError(f"Unsupported exclude type: {exclude}") sample_times.append(time.time() - tic) if step % 10 == 0: print( f"[{g.rank()}]Epoch {epoch} | Step {step} | Sample Time {np.mean(sample_times[10:]):.4f}" ) tic = time.time() print( f"[{g.rank()}]Epoch {epoch} | Total time {time.time() - epoch_tic} | Sample Time {np.mean(sample_times[100:]):.4f}" ) g.barrier() def rand_init_prob(shape, dtype): prob = th.rand(shape) prob[th.randperm(len(prob))[: int(len(prob) * 0.5)]] = 0.0 return prob def rand_init_mask(shape, dtype): prob = th.rand(shape) prob[th.randperm(len(prob))[: int(len(prob) * 0.5)]] = 0.0 return (prob > 0.2).to(th.float32) def main(args): dgl.distributed.initialize(args.ip_config, use_graphbolt=args.use_graphbolt) backend = "gloo" if args.num_gpus == -1 else "nccl" th.distributed.init_process_group(backend=backend) g = dgl.distributed.DistGraph(args.graph_name) print("rank:", g.rank()) # Assign prob/masks to edges. for c_etype in g.canonical_etypes: shape = (g.num_edges(etype=c_etype),) g.edges[c_etype].data["prob"] = dgl.distributed.DistTensor( shape, th.float32, init_func=rand_init_prob, part_policy=g.get_edge_partition_policy(c_etype), ) g.edges[c_etype].data["mask"] = dgl.distributed.DistTensor( shape, th.float32, init_func=rand_init_mask, part_policy=g.get_edge_partition_policy(c_etype), ) pb = g.get_partition_book() c_etype = ("author", "writes", "paper") train_eids = dgl.distributed.edge_split( th.ones((g.num_edges(etype=c_etype),), dtype=th.bool), g.get_partition_book(), etype=c_etype, force_even=True, ) train_eids = {c_etype: train_eids} local_eids = pb.partid2eids(pb.partid, c_etype).detach().numpy() print( "part {}, train: {} (local: {})".format( g.rank(), len(train_eids[c_etype]), len(np.intersect1d(train_eids[c_etype].numpy(), local_eids)), ) ) run( args, g, train_eids, ) if __name__ == "__main__": parser = argparse.ArgumentParser( description="Sampling Performance Profiling For Link Prediction Tasks" ) parser.add_argument("--graph-name", type=str, help="graph name") parser.add_argument( "--ip-config", type=str, help="The file for IP configuration" ) parser.add_argument( "--num_gpus", type=int, default=-1, help="the number of GPU device. Use -1 for CPU training", ) parser.add_argument( "-e", "--n-epochs", type=int, default=5, help="number of training epochs", ) parser.add_argument( "--fanout", type=str, default="4, 4", help="Fan-out of neighbor sampling.", ) parser.add_argument( "--batch-size", type=int, default=100, help="Mini-batch size. " ) parser.add_argument( "--use_graphbolt", default=False, action="store_true", help="Use GraphBolt for distributed train.", ) parser.add_argument( "--remove_edge", default=False, action="store_true", help="whether to remove edges during sampling", ) parser.add_argument( "--debug", default=False, action="store_true", help="whether to remove edges during sampling", ) parser.add_argument( "--prob_or_mask", type=str, default="prob", help="whether to use prob or mask during sampling", ) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/distributed/rgcn/node_classification.py ================================================ """ Modeling Relational Data with Graph Convolutional Networks Paper: https://arxiv.org/abs/1703.06103 Code: https://github.com/tkipf/relational-gcn Difference compared to tkipf/relation-gcn * l2norm applied to all weights * remove nodes that won't be touched """ import argparse import gc, os import itertools import time import numpy as np os.environ["DGLBACKEND"] = "pytorch" from functools import partial import dgl import dgl.distributed import torch as th import torch.multiprocessing as mp import torch.nn as nn import torch.nn.functional as F import tqdm from dgl import DGLGraph, nn as dglnn from dgl.distributed import DistDataLoader from ogb.nodeproppred import DglNodePropPredDataset from torch.multiprocessing import Queue from torch.nn.parallel import DistributedDataParallel from torch.utils.data import DataLoader class RelGraphConvLayer(nn.Module): r"""Relational graph convolution layer. Parameters ---------- in_feat : int Input feature size. out_feat : int Output feature size. rel_names : list[str] Relation names. num_bases : int, optional Number of bases. If is none, use number of relations. Default: None. weight : bool, optional True if a linear layer is applied after message passing. Default: True bias : bool, optional True if bias is added. Default: True activation : callable, optional Activation function. Default: None self_loop : bool, optional True to include self loop message. Default: False dropout : float, optional Dropout rate. Default: 0.0 """ def __init__( self, in_feat, out_feat, rel_names, num_bases, *, weight=True, bias=True, activation=None, self_loop=False, dropout=0.0 ): super(RelGraphConvLayer, self).__init__() self.in_feat = in_feat self.out_feat = out_feat self.rel_names = rel_names self.num_bases = num_bases self.bias = bias self.activation = activation self.self_loop = self_loop self.conv = dglnn.HeteroGraphConv( { rel: dglnn.GraphConv( in_feat, out_feat, norm="right", weight=False, bias=False ) for rel in rel_names } ) self.use_weight = weight self.use_basis = num_bases < len(self.rel_names) and weight if self.use_weight: if self.use_basis: self.basis = dglnn.WeightBasis( (in_feat, out_feat), num_bases, len(self.rel_names) ) else: self.weight = nn.Parameter( th.Tensor(len(self.rel_names), in_feat, out_feat) ) nn.init.xavier_uniform_( self.weight, gain=nn.init.calculate_gain("relu") ) # bias if bias: self.h_bias = nn.Parameter(th.Tensor(out_feat)) nn.init.zeros_(self.h_bias) # weight for self loop if self.self_loop: self.loop_weight = nn.Parameter(th.Tensor(in_feat, out_feat)) nn.init.xavier_uniform_( self.loop_weight, gain=nn.init.calculate_gain("relu") ) self.dropout = nn.Dropout(dropout) def forward(self, g, inputs): """Forward computation Parameters ---------- g : DGLGraph Input graph. inputs : dict[str, torch.Tensor] Node feature for each node type. Returns ------- dict[str, torch.Tensor] New node features for each node type. """ g = g.local_var() if self.use_weight: weight = self.basis() if self.use_basis else self.weight wdict = { self.rel_names[i]: {"weight": w.squeeze(0)} for i, w in enumerate(th.split(weight, 1, dim=0)) } else: wdict = {} if g.is_block: inputs_src = inputs inputs_dst = { k: v[: g.number_of_dst_nodes(k)] for k, v in inputs.items() } else: inputs_src = inputs_dst = inputs hs = self.conv(g, inputs, mod_kwargs=wdict) def _apply(ntype, h): if self.self_loop: h = h + th.matmul(inputs_dst[ntype], self.loop_weight) if self.bias: h = h + self.h_bias if self.activation: h = self.activation(h) return self.dropout(h) return {ntype: _apply(ntype, h) for ntype, h in hs.items()} class EntityClassify(nn.Module): """Entity classification class for RGCN Parameters ---------- device : int Device to run the layer. num_nodes : int Number of nodes. h_dim : int Hidden dim size. out_dim : int Output dim size. rel_names : list of str A list of relation names. num_bases : int Number of bases. If is none, use number of relations. num_hidden_layers : int Number of hidden RelGraphConv Layer dropout : float Dropout use_self_loop : bool Use self loop if True, default False. """ def __init__( self, device, h_dim, out_dim, rel_names, num_bases=None, num_hidden_layers=1, dropout=0, use_self_loop=False, layer_norm=False, ): super(EntityClassify, self).__init__() self.device = device self.h_dim = h_dim self.out_dim = out_dim self.num_bases = None if num_bases < 0 else num_bases self.num_hidden_layers = num_hidden_layers self.dropout = dropout self.use_self_loop = use_self_loop self.layer_norm = layer_norm self.layers = nn.ModuleList() # i2h self.layers.append( RelGraphConvLayer( self.h_dim, self.h_dim, rel_names, self.num_bases, activation=F.relu, self_loop=self.use_self_loop, dropout=self.dropout, ) ) # h2h for idx in range(self.num_hidden_layers): self.layers.append( RelGraphConvLayer( self.h_dim, self.h_dim, rel_names, self.num_bases, activation=F.relu, self_loop=self.use_self_loop, dropout=self.dropout, ) ) # h2o self.layers.append( RelGraphConvLayer( self.h_dim, self.out_dim, rel_names, self.num_bases, activation=None, self_loop=self.use_self_loop, ) ) def forward(self, blocks, feats, norm=None): if blocks is None: # full graph training blocks = [self.g] * len(self.layers) h = feats for layer, block in zip(self.layers, blocks): block = block.to(self.device) h = layer(block, h) return h def init_emb(shape, dtype): arr = th.zeros(shape, dtype=dtype) nn.init.uniform_(arr, -1.0, 1.0) return arr class DistEmbedLayer(nn.Module): r"""Embedding layer for featureless heterograph. Parameters ---------- dev_id : int Device to run the layer. g : DistGraph training graph embed_size : int Output embed size sparse_emb: bool Whether to use sparse embedding Default: False dgl_sparse_emb: bool Whether to use DGL sparse embedding Default: False embed_name : str, optional Embed name """ def __init__( self, dev_id, g, embed_size, sparse_emb=False, dgl_sparse_emb=False, feat_name="feat", embed_name="node_emb", ): super(DistEmbedLayer, self).__init__() self.dev_id = dev_id self.embed_size = embed_size self.embed_name = embed_name self.feat_name = feat_name self.sparse_emb = sparse_emb self.g = g self.ntype_id_map = {g.get_ntype_id(ntype): ntype for ntype in g.ntypes} self.node_projs = nn.ModuleDict() for ntype in g.ntypes: if feat_name in g.nodes[ntype].data: self.node_projs[ntype] = nn.Linear( g.nodes[ntype].data[feat_name].shape[1], embed_size ) nn.init.xavier_uniform_(self.node_projs[ntype].weight) print("node {} has data {}".format(ntype, feat_name)) if sparse_emb: if dgl_sparse_emb: self.node_embeds = {} for ntype in g.ntypes: # We only create embeddings for nodes without node features. if feat_name not in g.nodes[ntype].data: part_policy = g.get_node_partition_policy(ntype) self.node_embeds[ntype] = dgl.distributed.DistEmbedding( g.num_nodes(ntype), self.embed_size, embed_name + "_" + ntype, init_emb, part_policy, ) else: self.node_embeds = nn.ModuleDict() for ntype in g.ntypes: # We only create embeddings for nodes without node features. if feat_name not in g.nodes[ntype].data: self.node_embeds[ntype] = th.nn.Embedding( g.num_nodes(ntype), self.embed_size, sparse=self.sparse_emb, ) nn.init.uniform_( self.node_embeds[ntype].weight, -1.0, 1.0 ) else: self.node_embeds = nn.ModuleDict() for ntype in g.ntypes: # We only create embeddings for nodes without node features. if feat_name not in g.nodes[ntype].data: self.node_embeds[ntype] = th.nn.Embedding( g.num_nodes(ntype), self.embed_size ) nn.init.uniform_(self.node_embeds[ntype].weight, -1.0, 1.0) def forward(self, node_ids): """Forward computation Parameters ---------- node_ids : dict of Tensor node ids to generate embedding for. Returns ------- tensor embeddings as the input of the next layer """ embeds = {} for ntype in node_ids: if self.feat_name in self.g.nodes[ntype].data: embeds[ntype] = self.node_projs[ntype]( self.g.nodes[ntype] .data[self.feat_name][node_ids[ntype]] .to(self.dev_id) ) else: embeds[ntype] = self.node_embeds[ntype](node_ids[ntype]).to( self.dev_id ) return embeds def compute_acc(results, labels): """ Compute the accuracy of prediction given the labels. """ labels = labels.long() return (results == labels).float().sum() / len(results) def evaluate( g, model, embed_layer, labels, eval_loader, test_loader, all_val_nid, all_test_nid, ): model.eval() embed_layer.eval() eval_logits = [] eval_seeds = [] global_results = dgl.distributed.DistTensor( labels.shape, th.long, "results", persistent=True ) with th.no_grad(): th.cuda.empty_cache() for sample_data in tqdm.tqdm(eval_loader): input_nodes, seeds, blocks = sample_data seeds = seeds["paper"] feats = embed_layer(input_nodes) logits = model(blocks, feats) assert len(logits) == 1 logits = logits["paper"] eval_logits.append(logits.cpu().detach()) assert np.all(seeds.numpy() < g.num_nodes("paper")) eval_seeds.append(seeds.cpu().detach()) eval_logits = th.cat(eval_logits) eval_seeds = th.cat(eval_seeds) global_results[eval_seeds] = eval_logits.argmax(dim=1) test_logits = [] test_seeds = [] with th.no_grad(): th.cuda.empty_cache() for sample_data in tqdm.tqdm(test_loader): input_nodes, seeds, blocks = sample_data seeds = seeds["paper"] feats = embed_layer(input_nodes) logits = model(blocks, feats) assert len(logits) == 1 logits = logits["paper"] test_logits.append(logits.cpu().detach()) assert np.all(seeds.numpy() < g.num_nodes("paper")) test_seeds.append(seeds.cpu().detach()) test_logits = th.cat(test_logits) test_seeds = th.cat(test_seeds) global_results[test_seeds] = test_logits.argmax(dim=1) g.barrier() if g.rank() == 0: return compute_acc( global_results[all_val_nid], labels[all_val_nid] ), compute_acc(global_results[all_test_nid], labels[all_test_nid]) else: return -1, -1 def run(args, device, data): ( g, num_classes, train_nid, val_nid, test_nid, labels, all_val_nid, all_test_nid, ) = data fanouts = [int(fanout) for fanout in args.fanout.split(",")] val_fanouts = [int(fanout) for fanout in args.validation_fanout.split(",")] sampler = dgl.dataloading.MultiLayerNeighborSampler(fanouts) dataloader = dgl.distributed.DistNodeDataLoader( g, {"paper": train_nid}, sampler, batch_size=args.batch_size, shuffle=True, drop_last=False, ) valid_sampler = dgl.dataloading.MultiLayerNeighborSampler(val_fanouts) valid_dataloader = dgl.distributed.DistNodeDataLoader( g, {"paper": val_nid}, valid_sampler, batch_size=args.batch_size, shuffle=False, drop_last=False, ) test_sampler = dgl.dataloading.MultiLayerNeighborSampler(val_fanouts) test_dataloader = dgl.distributed.DistNodeDataLoader( g, {"paper": test_nid}, test_sampler, batch_size=args.eval_batch_size, shuffle=False, drop_last=False, ) embed_layer = DistEmbedLayer( device, g, args.n_hidden, sparse_emb=args.sparse_embedding, dgl_sparse_emb=args.dgl_sparse, feat_name="feat", ) model = EntityClassify( device, args.n_hidden, num_classes, g.etypes, num_bases=args.n_bases, num_hidden_layers=args.n_layers - 2, dropout=args.dropout, use_self_loop=args.use_self_loop, layer_norm=args.layer_norm, ) model = model.to(device) if not args.standalone: if args.num_gpus == -1: model = DistributedDataParallel(model) # If there are dense parameters in the embedding layer # or we use Pytorch saprse embeddings. if len(embed_layer.node_projs) > 0 or not args.dgl_sparse: embed_layer = DistributedDataParallel(embed_layer) else: dev_id = g.rank() % args.num_gpus model = DistributedDataParallel( model, device_ids=[dev_id], output_device=dev_id ) # If there are dense parameters in the embedding layer # or we use Pytorch saprse embeddings. if len(embed_layer.node_projs) > 0 or not args.dgl_sparse: embed_layer = embed_layer.to(device) embed_layer = DistributedDataParallel( embed_layer, device_ids=[dev_id], output_device=dev_id ) if args.sparse_embedding: if args.dgl_sparse and args.standalone: emb_optimizer = dgl.distributed.optim.SparseAdam( list(embed_layer.node_embeds.values()), lr=args.sparse_lr ) print( "optimize DGL sparse embedding:", embed_layer.node_embeds.keys() ) elif args.dgl_sparse: emb_optimizer = dgl.distributed.optim.SparseAdam( list(embed_layer.module.node_embeds.values()), lr=args.sparse_lr ) print( "optimize DGL sparse embedding:", embed_layer.module.node_embeds.keys(), ) elif args.standalone: emb_optimizer = th.optim.SparseAdam( list(embed_layer.node_embeds.parameters()), lr=args.sparse_lr ) print("optimize Pytorch sparse embedding:", embed_layer.node_embeds) else: emb_optimizer = th.optim.SparseAdam( list(embed_layer.module.node_embeds.parameters()), lr=args.sparse_lr, ) print( "optimize Pytorch sparse embedding:", embed_layer.module.node_embeds, ) dense_params = list(model.parameters()) if args.standalone: dense_params += list(embed_layer.node_projs.parameters()) print("optimize dense projection:", embed_layer.node_projs) else: dense_params += list(embed_layer.module.node_projs.parameters()) print("optimize dense projection:", embed_layer.module.node_projs) optimizer = th.optim.Adam( dense_params, lr=args.lr, weight_decay=args.l2norm ) else: all_params = list(model.parameters()) + list(embed_layer.parameters()) optimizer = th.optim.Adam( all_params, lr=args.lr, weight_decay=args.l2norm ) # training loop print("start training...") for epoch in range(args.n_epochs): tic = time.time() sample_time = 0 copy_time = 0 forward_time = 0 backward_time = 0 update_time = 0 number_train = 0 number_input = 0 step_time = [] iter_t = [] sample_t = [] feat_copy_t = [] forward_t = [] backward_t = [] update_t = [] iter_tput = [] start = time.time() # Loop over the dataloader to sample the computation dependency graph as a list of # blocks. step_time = [] for step, sample_data in enumerate(dataloader): input_nodes, seeds, blocks = sample_data seeds = seeds["paper"] number_train += seeds.shape[0] number_input += np.sum( [blocks[0].num_src_nodes(ntype) for ntype in blocks[0].ntypes] ) tic_step = time.time() sample_time += tic_step - start sample_t.append(tic_step - start) feats = embed_layer(input_nodes) label = labels[seeds].to(device) copy_time = time.time() feat_copy_t.append(copy_time - tic_step) # forward logits = model(blocks, feats) assert len(logits) == 1 logits = logits["paper"] loss = F.cross_entropy(logits, label) forward_end = time.time() # backward optimizer.zero_grad() if args.sparse_embedding: emb_optimizer.zero_grad() loss.backward() compute_end = time.time() forward_t.append(forward_end - copy_time) backward_t.append(compute_end - forward_end) # Update model parameters optimizer.step() if args.sparse_embedding: emb_optimizer.step() update_t.append(time.time() - compute_end) step_t = time.time() - start step_time.append(step_t) train_acc = th.sum(logits.argmax(dim=1) == label).item() / len( seeds ) if step % args.log_every == 0: print( "[{}] Epoch {:05d} | Step {:05d} | Train acc {:.4f} | Loss {:.4f} | time {:.3f} s" "| sample {:.3f} | copy {:.3f} | forward {:.3f} | backward {:.3f} | update {:.3f}".format( g.rank(), epoch, step, train_acc, loss.item(), np.sum(step_time[-args.log_every :]), np.sum(sample_t[-args.log_every :]), np.sum(feat_copy_t[-args.log_every :]), np.sum(forward_t[-args.log_every :]), np.sum(backward_t[-args.log_every :]), np.sum(update_t[-args.log_every :]), ) ) start = time.time() gc.collect() print( "[{}]Epoch Time(s): {:.4f}, sample: {:.4f}, data copy: {:.4f}, forward: {:.4f}, backward: {:.4f}, update: {:.4f}, #train: {}, #input: {}".format( g.rank(), np.sum(step_time), np.sum(sample_t), np.sum(feat_copy_t), np.sum(forward_t), np.sum(backward_t), np.sum(update_t), number_train, number_input, ) ) epoch += 1 start = time.time() g.barrier() val_acc, test_acc = evaluate( g, model, embed_layer, labels, valid_dataloader, test_dataloader, all_val_nid, all_test_nid, ) if val_acc >= 0: print( "Val Acc {:.4f}, Test Acc {:.4f}, time: {:.4f}".format( val_acc, test_acc, time.time() - start ) ) def main(args): dgl.distributed.initialize(args.ip_config, use_graphbolt=args.use_graphbolt) if not args.standalone: backend = "gloo" if args.num_gpus == -1 else "nccl" if args.sparse_embedding and args.dgl_sparse: # `nccl` is not fully supported in DistDGL's sparse optimizer. backend = "gloo" th.distributed.init_process_group(backend=backend) g = dgl.distributed.DistGraph(args.graph_name, part_config=args.conf_path) print("rank:", g.rank()) pb = g.get_partition_book() if "trainer_id" in g.nodes["paper"].data: train_nid = dgl.distributed.node_split( g.nodes["paper"].data["train_mask"], pb, ntype="paper", force_even=True, node_trainer_ids=g.nodes["paper"].data["trainer_id"], ) val_nid = dgl.distributed.node_split( g.nodes["paper"].data["val_mask"], pb, ntype="paper", force_even=True, node_trainer_ids=g.nodes["paper"].data["trainer_id"], ) test_nid = dgl.distributed.node_split( g.nodes["paper"].data["test_mask"], pb, ntype="paper", force_even=True, node_trainer_ids=g.nodes["paper"].data["trainer_id"], ) else: train_nid = dgl.distributed.node_split( g.nodes["paper"].data["train_mask"], pb, ntype="paper", force_even=True, ) val_nid = dgl.distributed.node_split( g.nodes["paper"].data["val_mask"], pb, ntype="paper", force_even=True, ) test_nid = dgl.distributed.node_split( g.nodes["paper"].data["test_mask"], pb, ntype="paper", force_even=True, ) local_nid = pb.partid2nids(pb.partid, "paper").detach().numpy() print( "part {}, train: {} (local: {}), val: {} (local: {}), test: {} (local: {})".format( g.rank(), len(train_nid), len(np.intersect1d(train_nid.numpy(), local_nid)), len(val_nid), len(np.intersect1d(val_nid.numpy(), local_nid)), len(test_nid), len(np.intersect1d(test_nid.numpy(), local_nid)), ) ) if args.num_gpus == -1: device = th.device("cpu") else: dev_id = g.rank() % args.num_gpus device = th.device("cuda:" + str(dev_id)) labels = g.nodes["paper"].data["labels"][np.arange(g.num_nodes("paper"))] all_val_nid = th.LongTensor( np.nonzero( g.nodes["paper"].data["val_mask"][np.arange(g.num_nodes("paper"))] ) ).squeeze() all_test_nid = th.LongTensor( np.nonzero( g.nodes["paper"].data["test_mask"][np.arange(g.num_nodes("paper"))] ) ).squeeze() n_classes = len(th.unique(labels[labels >= 0])) print("#classes:", n_classes) run( args, device, ( g, n_classes, train_nid, val_nid, test_nid, labels, all_val_nid, all_test_nid, ), ) if __name__ == "__main__": parser = argparse.ArgumentParser(description="RGCN") # distributed training related parser.add_argument("--graph-name", type=str, help="graph name") parser.add_argument("--id", type=int, help="the partition id") parser.add_argument( "--ip-config", type=str, help="The file for IP configuration" ) parser.add_argument( "--conf-path", type=str, help="The path to the partition config file" ) # rgcn related parser.add_argument( "--num_gpus", type=int, default=-1, help="the number of GPU device. Use -1 for CPU training", ) parser.add_argument( "--dropout", type=float, default=0, help="dropout probability" ) parser.add_argument( "--n-hidden", type=int, default=16, help="number of hidden units" ) parser.add_argument("--lr", type=float, default=1e-2, help="learning rate") parser.add_argument( "--sparse-lr", type=float, default=1e-2, help="sparse lr rate" ) parser.add_argument( "--n-bases", type=int, default=-1, help="number of filter weight matrices, default: -1 [use all]", ) parser.add_argument( "--n-layers", type=int, default=2, help="number of propagation rounds" ) parser.add_argument( "-e", "--n-epochs", type=int, default=50, help="number of training epochs", ) parser.add_argument( "-d", "--dataset", type=str, required=True, help="dataset to use" ) parser.add_argument("--l2norm", type=float, default=0, help="l2 norm coef") parser.add_argument( "--relabel", default=False, action="store_true", help="remove untouched nodes and relabel", ) parser.add_argument( "--fanout", type=str, default="4, 4", help="Fan-out of neighbor sampling.", ) parser.add_argument( "--validation-fanout", type=str, default=None, help="Fan-out of neighbor sampling during validation.", ) parser.add_argument( "--use-self-loop", default=False, action="store_true", help="include self feature as a special relation", ) parser.add_argument( "--batch-size", type=int, default=100, help="Mini-batch size. " ) parser.add_argument( "--eval-batch-size", type=int, default=128, help="Mini-batch size. " ) parser.add_argument("--log-every", type=int, default=20) parser.add_argument( "--low-mem", default=False, action="store_true", help="Whether use low mem RelGraphCov", ) parser.add_argument( "--sparse-embedding", action="store_true", help="Use sparse embedding for node embeddings.", ) parser.add_argument( "--dgl-sparse", action="store_true", help="Whether to use DGL sparse embedding", ) parser.add_argument( "--layer-norm", default=False, action="store_true", help="Use layer norm", ) parser.add_argument( "--local_rank", type=int, help="get rank of the process" ) parser.add_argument( "--standalone", action="store_true", help="run in the standalone mode" ) parser.add_argument( "--use_graphbolt", action="store_true", help="Use GraphBolt for distributed train.", ) args = parser.parse_args() # if validation_fanout is None, set it with args.fanout if args.validation_fanout is None: args.validation_fanout = args.fanout print(args) main(args) ================================================ FILE: examples/distributed/rgcn/partition_graph.py ================================================ import argparse import time import dgl import numpy as np import torch as th from ogb.nodeproppred import DglNodePropPredDataset def load_ogb(dataset): if dataset == "ogbn-mag": dataset = DglNodePropPredDataset(name=dataset) split_idx = dataset.get_idx_split() train_idx = split_idx["train"]["paper"] val_idx = split_idx["valid"]["paper"] test_idx = split_idx["test"]["paper"] hg_orig, labels = dataset[0] subgs = {} for etype in hg_orig.canonical_etypes: u, v = hg_orig.all_edges(etype=etype) subgs[etype] = (u, v) subgs[(etype[2], "rev-" + etype[1], etype[0])] = (v, u) hg = dgl.heterograph(subgs) hg.nodes["paper"].data["feat"] = hg_orig.nodes["paper"].data["feat"] paper_labels = labels["paper"].squeeze() num_rels = len(hg.canonical_etypes) num_of_ntype = len(hg.ntypes) num_classes = dataset.num_classes category = "paper" print("Number of relations: {}".format(num_rels)) print("Number of class: {}".format(num_classes)) print("Number of train: {}".format(len(train_idx))) print("Number of valid: {}".format(len(val_idx))) print("Number of test: {}".format(len(test_idx))) # get target category id category_id = len(hg.ntypes) for i, ntype in enumerate(hg.ntypes): if ntype == category: category_id = i train_mask = th.zeros((hg.num_nodes("paper"),), dtype=th.bool) train_mask[train_idx] = True val_mask = th.zeros((hg.num_nodes("paper"),), dtype=th.bool) val_mask[val_idx] = True test_mask = th.zeros((hg.num_nodes("paper"),), dtype=th.bool) test_mask[test_idx] = True hg.nodes["paper"].data["train_mask"] = train_mask hg.nodes["paper"].data["val_mask"] = val_mask hg.nodes["paper"].data["test_mask"] = test_mask hg.nodes["paper"].data["labels"] = paper_labels return hg else: raise ("Do not support other ogbn datasets.") if __name__ == "__main__": argparser = argparse.ArgumentParser("Partition builtin graphs") argparser.add_argument( "--dataset", type=str, default="ogbn-mag", help="datasets: ogbn-mag" ) argparser.add_argument( "--num_parts", type=int, default=4, help="number of partitions" ) argparser.add_argument( "--part_method", type=str, default="metis", help="the partition method" ) argparser.add_argument( "--balance_train", action="store_true", help="balance the training size in each partition.", ) argparser.add_argument( "--undirected", action="store_true", help="turn the graph into an undirected graph.", ) argparser.add_argument( "--balance_edges", action="store_true", help="balance the number of edges in each partition.", ) argparser.add_argument( "--num_trainers_per_machine", type=int, default=1, help="the number of trainers per machine. The trainer ids are stored\ in the node feature 'trainer_id'", ) argparser.add_argument( "--output", type=str, default="data", help="Output path of partitioned graph.", ) argparser.add_argument( "--use_graphbolt", action="store_true", help="Use GraphBolt for distributed train.", ) args = argparser.parse_args() start = time.time() g = load_ogb(args.dataset) print( "load {} takes {:.3f} seconds".format(args.dataset, time.time() - start) ) print("|V|={}, |E|={}".format(g.num_nodes(), g.num_edges())) print( "train: {}, valid: {}, test: {}".format( th.sum(g.nodes["paper"].data["train_mask"]), th.sum(g.nodes["paper"].data["val_mask"]), th.sum(g.nodes["paper"].data["test_mask"]), ) ) if args.balance_train: balance_ntypes = {"paper": g.nodes["paper"].data["train_mask"]} else: balance_ntypes = None dgl.distributed.partition_graph( g, args.dataset, args.num_parts, args.output, part_method=args.part_method, balance_ntypes=balance_ntypes, balance_edges=args.balance_edges, num_trainers_per_machine=args.num_trainers_per_machine, use_graphbolt=args.use_graphbolt, ) ================================================ FILE: examples/graphbolt/README.md ================================================ ## How to run the code? ```bash python link_prediction.py ``` Results (10 epochs): ``` Valid MRR 0.7040 Test MRR 0.7043 ``` ================================================ FILE: examples/graphbolt/disk_based_feature/README.md ================================================ ## Overview This project demonstrates how to use GraphBolt to train and evaluate a GraphSAGE model for node classification task on large graphs, where node features are on-disk and fetched using `DiskBasedFeature`. GraphBolt utilizes various in-house implemented caching policy algorithms such as [SIEVE](https://cachemon.github.io/SIEVE-website/), [S3-FIFO](https://s3fifo.com), LRU and [CLOCK](https://people.csail.mit.edu/saltzer/Multics/MHP-Saltzer-060508/bookcases/M00s/M0104%20074-12%29.PDF) to cache frequently required features and io_uring to fetch cache-missed features from disk. The SIEVE algorithm is the default option. # Node classification task This example demonstrates how to run node classification task with **GraphBolt.DiskBasedFeature**. All results are collected on an AWS EC2 g5.8xlarge instance with 128GB RAM, 32 cores, an 24GB A10G GPU and a instance storage of 250K IOPS. ## Run on `ogbn-papers100M` dataset | Dataset | Graph Size | Feature Size | Feature Dim | | :-------------: | :--------: | :----------: | :---------: | | ogbn-papers100M | 13 GB | 53 GB | 128 | ## Results with various caching policies This part trains a three-layer GraphSAGE model for 3 epochs on `ogbn-papers100M` dataset with 10GB CPU cache, using neighbor sampling. ### Run default SIEVE policy Instruction: ``` python node_classification.py --gpu-cache-size-in-gigabytes=0 --cpu-cache-size-in-gigabytes=10 --dataset=ogbn-papers100M --epochs=3 ``` Result: ``` Training: 1178it [03:00, 6.53it/s, num_nodes=671260, gpu_cache_miss=1, cpu_cache_miss=0.0578] Evaluating: 123it [00:16, 7.47it/s, num_nodes=624816, gpu_cache_miss=1, cpu_cache_miss=0.0569] Epoch 00, Loss: 1.4173, Approx. Train: 0.5787, Approx. Val: 0.6353, Time: 180.33928060531616s Training: 1178it [01:39, 11.79it/s, num_nodes=648380, gpu_cache_miss=1, cpu_cache_miss=0.0451] Evaluating: 123it [00:15, 7.90it/s, num_nodes=625373, gpu_cache_miss=1, cpu_cache_miss=0.0451] Epoch 01, Loss: 1.1446, Approx. Train: 0.6386, Approx. Val: 0.6382, Time: 99.92613315582275s Training: 1178it [01:36, 12.15it/s, num_nodes=674194, gpu_cache_miss=1, cpu_cache_miss=0.0408] Evaluating: 123it [00:15, 8.08it/s, num_nodes=628233, gpu_cache_miss=1, cpu_cache_miss=0.0409] Epoch 02, Loss: 1.0975, Approx. Train: 0.6507, Approx. Val: 0.6535, Time: 96.95083212852478s ``` ### Performance Comparison on four caching polices Below results demonstrate the epoch time with four different caching policies. | Policy | Epoch 1 (s) | Epoch 2 (s) | Epoch 3 (s) | | :-----: | :---------: | :---------: | :---------: | | SIEVE | 180.339 | 99.926 | 96.951 | | S3-FiFO | 181.438 | 110.054 | 108.310 | | LRU | 194.583 | 138.352 | 138.369 | | CLOCK | 188.915 | 129.372 | 129.388 | ## Results with Layer-Neighbor Sampling This part trains a three-layer GraphSAGE model for 3 epochs on `ogbn-papers100M` dataset with 10GB CPU cache, using Layer-Neighbor Sampling and default SIEVE policy. ### Run default `--batch-dependency=1` Instruction: ``` python node_classification.py --gpu-cache-size-in-gigabytes=0 --cpu-cache-size-in-gigabytes=10 --dataset=ogbn-papers100M --sample-mode=sample_layer_neighbor --batch-dependency=1 --epochs=3 ``` Result: ``` Training: 1178it [02:51, 6.88it/s, num_nodes=463495, gpu_cache_miss=1, cpu_cache_miss=0.0774] Evaluating: 123it [00:15, 7.94it/s, num_nodes=465592, gpu_cache_miss=1, cpu_cache_miss=0.0762] Epoch 00, Loss: 1.4173, Approx. Train: 0.5774, Approx. Val: 0.6300, Time: 171.11454963684082s Training: 1178it [01:34, 12.43it/s, num_nodes=474446, gpu_cache_miss=1, cpu_cache_miss=0.0604] Evaluating: 123it [00:14, 8.45it/s, num_nodes=462042, gpu_cache_miss=1, cpu_cache_miss=0.0603] Epoch 01, Loss: 1.1463, Approx. Train: 0.6384, Approx. Val: 0.6395, Time: 94.7821741104126s Training: 1178it [01:31, 12.82it/s, num_nodes=479331, gpu_cache_miss=1, cpu_cache_miss=0.0545] Evaluating: 123it [00:14, 8.67it/s, num_nodes=463628, gpu_cache_miss=1, cpu_cache_miss=0.0546] Epoch 02, Loss: 1.1000, Approx. Train: 0.6501, Approx. Val: 0.6516, Time: 91.8746063709259s ``` ### Performance Comparison on different `--batch-dependency` | batch-dependency | Epoch 1 (s) | Epoch 2 (s) | Epoch 3 (s) | | :--------------: | :---------: | :---------: | :---------: | | 1 | 171.114 | 94.782 | 91.875 | | 64 | 144.241 | 78.749 | 75.270 | | 4096 | 92.494 | 56.111 | 57.647 | ### Effect of `--layer-dependency` Below results demonstrate the effect of enabling `--layer-dependency` on epoch time when setting `--batch-dependency=1`. | layer-dependency | Epoch 1 (s) | Epoch 2 (s) | Epoch 3 (s) | | :--------------: | :---------: | :---------: | :---------: | | False | 171.114 | 94.782 | 91.875 | | True | 159.625 | 86.209 | 83.171 | ## Compared to In-mem Performance This part trains a three-layer GraphSAGE model for 3 epochs on `ogbn-papers100M` dataset with 20GB CPU cache and 5GB GPU cache, using neighbor sampling. We compare it to the in-mem performance with 5GB GPU cache. Following result demonstrates that with sufficient cache memory, the performance of DiskBasedFeature is not bottlenecked by the cache itself and comparable with in-memory feature stores. Note that the first epoch of training initiates the cache, thus taking longer time. Instruction: ``` python node_classification.py --gpu-cache-size-in-gigabytes=5 --cpu-cache-size-in-gigabytes=20 --dataset=ogbn-papers100M --epochs=3 ``` Result: | Feature Store | Epoch 1 (s) | Epoch 2 (s) | Epoch 3 (s) | | :--------------: | :---------: | :---------: | :---------: | | DiskBasedFeature | 143.761 | 32.018 | 31.889 | | In-memory | 28.861 | 28.330 | 28.305 | ================================================ FILE: examples/graphbolt/disk_based_feature/node_classification.py ================================================ """ This example references examples/graphbolt/pyg/labor/node_classification.py """ import argparse import time from copy import deepcopy import dgl.graphbolt as gb import dgl.nn as dglnn import torch import torch.nn as nn import torch.nn.functional as F from tqdm import tqdm def accuracy(out, labels): assert out.ndim == 2 assert out.size(0) == labels.size(0) assert labels.ndim == 1 or (labels.ndim == 2 and labels.size(1) == 1) labels = labels.flatten() predictions = torch.argmax(out, 1) return (labels == predictions).sum(dtype=torch.float64) / labels.size(0) class SAGE(nn.Module): def __init__(self, in_size, hidden_size, out_size, num_layers, dropout): super().__init__() self.layers = nn.ModuleList() # Three-layer GraphSAGE-mean. self.layers.append(dglnn.SAGEConv(in_size, hidden_size, "mean")) for _ in range(num_layers - 2): self.layers.append(dglnn.SAGEConv(hidden_size, hidden_size, "mean")) self.layers.append(dglnn.SAGEConv(hidden_size, out_size, "mean")) self.dropout = nn.Dropout(dropout) self.hidden_size = hidden_size self.out_size = out_size # Set the dtype for the layers manually. self.set_layer_dtype(torch.float32) def set_layer_dtype(self, _dtype): for layer in self.layers: for param in layer.parameters(): param.data = param.data.to(_dtype) def forward(self, blocks, x): hidden_x = x for layer_idx, (layer, block) in enumerate(zip(self.layers, blocks)): hidden_x = layer(block, hidden_x) is_last_layer = layer_idx == len(self.layers) - 1 if not is_last_layer: hidden_x = F.relu(hidden_x) hidden_x = self.dropout(hidden_x) return hidden_x def inference(self, graph, features, dataloader, storage_device): """Conduct layer-wise inference to get all the node embeddings.""" pin_memory = storage_device == "pinned" buffer_device = torch.device("cpu" if pin_memory else storage_device) for layer_idx, layer in enumerate(self.layers): is_last_layer = layer_idx == len(self.layers) - 1 y = torch.empty( graph.total_num_nodes, self.out_size if is_last_layer else self.hidden_size, dtype=torch.float32, device=buffer_device, pin_memory=pin_memory, ) for data in tqdm(dataloader): # len(blocks) = 1 hidden_x = layer(data.blocks[0], data.node_features["feat"]) if not is_last_layer: hidden_x = F.relu(hidden_x) hidden_x = self.dropout(hidden_x) # By design, our output nodes are contiguous. y[data.seeds[0] : data.seeds[-1] + 1] = hidden_x.to( buffer_device ) if not is_last_layer: features.update("node", None, "feat", y) return y def create_dataloader( graph, features, itemset, batch_size, fanout, device, job ): # Initialize an ItemSampler to sample mini-batches from the dataset. datapipe = gb.ItemSampler( itemset, batch_size=batch_size, shuffle=(job == "train"), drop_last=(job == "train"), ) # Copy the data to the specified device. if args.graph_device != "cpu": datapipe = datapipe.copy_to(device=device) # Sample neighbors for each node in the mini-batch. kwargs = ( { # Layer dependency makes it so that the sampled neighborhoods across layers # become correlated, reducing the total number of sampled unique nodes in a # minibatch, thus reducing the amount of feature data requested. "layer_dependency": args.layer_dependency, # Batch dependency makes it so that the sampled neighborhoods across minibatches # become correlated, reducing the total number of sampled unique nodes across # minibatches, thus increasing temporal locality and reducing cache miss rates. "batch_dependency": args.batch_dependency, } if args.sample_mode == "sample_layer_neighbor" else {} ) datapipe = getattr(datapipe, args.sample_mode)( graph, fanout if job != "infer" else [-1], overlap_fetch=args.overlap_graph_fetch, **kwargs, ) # Copy the data to the specified device. if args.feature_device != "cpu": datapipe = datapipe.copy_to(device=device) # Fetch node features for the sampled subgraph. datapipe = datapipe.fetch_feature( features, node_feature_keys=["feat"], overlap_fetch=args.overlap_feature_fetch, ) # Copy the data to the specified device. if args.feature_device == "cpu": datapipe = datapipe.copy_to(device=device) # Create and return a DataLoader to handle data loading. return gb.DataLoader(datapipe, num_workers=args.num_workers) def train_step(minibatch, optimizer, model, loss_fn): node_features = minibatch.node_features["feat"] labels = minibatch.labels optimizer.zero_grad() out = model(minibatch.blocks, node_features) loss = loss_fn(out, labels) num_correct = accuracy(out, labels) * labels.size(0) loss.backward() optimizer.step() return loss.detach(), num_correct, labels.size(0) def train_helper( dataloader, model, optimizer, loss_fn, gpu_cache_miss_rate_fn, cpu_cache_miss_rate_fn, device, ): model.train() # Set the model to training mode total_loss = torch.zeros(1, device=device) # Accumulator for the total loss # Accumulator for the total number of correct predictions total_correct = torch.zeros(1, dtype=torch.float64, device=device) total_samples = 0 # Accumulator for the total number of samples processed num_batches = 0 # Counter for the number of mini-batches processed start = time.time() dataloader = tqdm(dataloader, "Training") for step, minibatch in enumerate(dataloader): loss, num_correct, num_samples = train_step( minibatch, optimizer, model, loss_fn ) total_loss += loss total_correct += num_correct total_samples += num_samples num_batches += 1 if step % 25 == 0: # log every 25 steps for performance. dataloader.set_postfix( { "num_nodes": minibatch.node_ids().size(0), "gpu_cache_miss": gpu_cache_miss_rate_fn(), "cpu_cache_miss": cpu_cache_miss_rate_fn(), } ) train_loss = total_loss / num_batches train_acc = total_correct / total_samples end = time.time() return train_loss, train_acc, end - start def train( train_dataloader, valid_dataloader, model, gpu_cache_miss_rate_fn, cpu_cache_miss_rate_fn, device, ): optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) loss_fn = nn.CrossEntropyLoss() best_model = None best_model_acc = 0 best_model_epoch = -1 for epoch in range(args.epochs): train_loss, train_acc, duration = train_helper( train_dataloader, model, optimizer, loss_fn, gpu_cache_miss_rate_fn, cpu_cache_miss_rate_fn, device, ) val_acc = evaluate( model, valid_dataloader, gpu_cache_miss_rate_fn, cpu_cache_miss_rate_fn, device, ) if val_acc > best_model_acc: best_model_acc = val_acc best_model = deepcopy(model.state_dict()) best_model_epoch = epoch print( f"Epoch {epoch:02d}, Loss: {train_loss.item():.4f}, " f"Approx. Train: {train_acc.item():.4f}, " f"Approx. Val: {val_acc.item():.4f}, " f"Time: {duration}s" ) if best_model_epoch + args.early_stopping_patience < epoch: break return best_model @torch.no_grad() def layerwise_infer( args, graph, features, itemsets, all_nodes_set, model, ): model.eval() dataloader = create_dataloader( graph=graph, features=features, itemset=all_nodes_set, batch_size=args.batch_size, fanout=[-1], device=args.device, job="infer", ) pred = model.inference(graph, features, dataloader, args.feature_device) metrics = {} for split_name, itemset in itemsets.items(): nid, labels = itemset[:] acc = accuracy( pred[nid.to(pred.device)], labels.to(pred.device), ) metrics[split_name] = acc.item() return metrics def evaluate_step(minibatch, model): node_features = minibatch.node_features["feat"] labels = minibatch.labels out = model(minibatch.blocks, node_features) num_correct = accuracy(out, labels) * labels.size(0) return num_correct, labels.size(0) @torch.no_grad() def evaluate( model, dataloader, gpu_cache_miss_rate_fn, cpu_cache_miss_rate_fn, device, ): model.eval() total_correct = torch.zeros(1, dtype=torch.float64, device=device) total_samples = 0 val_dataloader_tqdm = tqdm(dataloader, "Evaluating") for step, minibatch in enumerate(val_dataloader_tqdm): num_correct, num_samples = evaluate_step(minibatch, model) total_correct += num_correct total_samples += num_samples if step % 25 == 0: val_dataloader_tqdm.set_postfix( { "num_nodes": minibatch.node_ids().size(0), "gpu_cache_miss": gpu_cache_miss_rate_fn(), "cpu_cache_miss": cpu_cache_miss_rate_fn(), } ) return total_correct / total_samples def parse_args(): parser = argparse.ArgumentParser( description="Which dataset are you going to use?" ) parser.add_argument( "--epochs", type=int, default=9999999, help="Number of training epochs." ) parser.add_argument( "--lr", type=float, default=0.001, help="Learning rate for optimization.", ) parser.add_argument("--num-hidden", type=int, default=256) parser.add_argument("--dropout", type=float, default=0.2) parser.add_argument( "--batch-size", type=int, default=1024, help="Batch size for training." ) parser.add_argument( "--num-workers", type=int, default=0, help="Number of workers for data loading.", ) parser.add_argument( "--dataset", type=str, default="ogbn-products", choices=[ "ogbn-arxiv", "ogbn-products", "ogbn-papers100M", "igb-hom-tiny", "igb-hom-small", "igb-hom-medium", "igb-hom-large", "igb-hom", ], ) parser.add_argument("--root", type=str, default="datasets") parser.add_argument( "--fanout", type=str, default="10,10,10", help="Fan-out of neighbor sampling. len(fanout) determines the number of" " GNN layers in your model. Default: 10,10,10", ) parser.add_argument( "--mode", default="pinned-pinned-cuda", choices=[ "cpu-cpu-cpu", "cpu-cpu-cuda", "cpu-pinned-cuda", "pinned-pinned-cuda", "cuda-pinned-cuda", "cuda-cuda-cuda", ], help="Graph storage - feature storage - Train device: 'cpu' for CPU and" " RAM, 'pinned' for pinned memory in RAM, 'cuda' for GPU and GPU memory.", ) parser.add_argument("--layer-dependency", action="store_true") parser.add_argument("--batch-dependency", type=int, default=1) parser.add_argument( "--cpu-feature-cache-policy", type=str, default=None, choices=["s3-fifo", "sieve", "lru", "clock"], help="The cache policy for the CPU feature cache.", ) parser.add_argument( "--cpu-cache-size-in-gigabytes", type=float, default=0, help="The capacity of the CPU cache in GiB.", ) parser.add_argument( "--gpu-cache-size-in-gigabytes", type=float, default=0, help="The capacity of the GPU cache in GiB.", ) parser.add_argument("--early-stopping-patience", type=int, default=25) parser.add_argument( "--sample-mode", default="sample_neighbor", choices=["sample_neighbor", "sample_layer_neighbor"], help="The sampling function when doing layerwise sampling.", ) parser.add_argument("--precision", type=str, default="high") parser.add_argument("--enable-inference", action="store_true") return parser.parse_args() def main(): torch.set_float32_matmul_precision(args.precision) if not torch.cuda.is_available(): args.mode = "cpu-cpu-cpu" print(f"Training in {args.mode} mode.") args.graph_device, args.feature_device, args.device = args.mode.split("-") args.overlap_feature_fetch = args.feature_device == "pinned" args.overlap_graph_fetch = args.graph_device == "pinned" """ Load and preprocess on-disk dataset. We inspect the in_memory field of the feature_data in the YAML file and modify it to False. This will make sure the feature_data is loaded as DiskBasedFeature. """ print("Loading data...") disk_based_feature_keys = None if args.cpu_cache_size_in_gigabytes > 0: disk_based_feature_keys = [("node", None, "feat")] dataset = gb.BuiltinDataset(args.dataset, root=args.root) if disk_based_feature_keys is None: disk_based_feature_keys = set() for feature in dataset.yaml_data["feature_data"]: feature_key = (feature["domain"], feature["type"], feature["name"]) # Set the in_memory setting to False without modifying YAML file. if feature_key in disk_based_feature_keys: feature["in_memory"] = False dataset = dataset.load() # Move the dataset to the selected storage. graph = ( dataset.graph.pin_memory_() if args.graph_device == "pinned" else dataset.graph.to(args.graph_device) ) features = ( dataset.feature.pin_memory_() if args.feature_device == "pinned" else dataset.feature.to(args.feature_device) ) train_set = dataset.tasks[0].train_set valid_set = dataset.tasks[0].validation_set test_set = dataset.tasks[0].test_set all_nodes_set = dataset.all_nodes_set args.fanout = list(map(int, args.fanout.split(","))) num_classes = dataset.tasks[0].metadata["num_classes"] """ If the CPU cache size is greater than 0, we wrap the DiskBasedFeature to be a CPUCachedFeature. This internally manages the CPU feature cache by the specified cache replacement policy. This will reduce the amount of data transferred during disk read operations for this feature. Note: It is advised to set the CPU cache size to be at least 4 times the number of sampled nodes in a mini-batch, otherwise the feature fetcher might get into a deadlock, causing a hang. """ if args.cpu_cache_size_in_gigabytes > 0 and isinstance( features[("node", None, "feat")], gb.DiskBasedFeature ): features[("node", None, "feat")] = gb.cpu_cached_feature( features[("node", None, "feat")], int(args.cpu_cache_size_in_gigabytes * 1024 * 1024 * 1024), args.cpu_feature_cache_policy, args.feature_device == "pinned", ) cpu_cached_feature = features[("node", None, "feat")] cpu_cache_miss_rate_fn = lambda: cpu_cached_feature.miss_rate else: cpu_cache_miss_rate_fn = lambda: 1 """ If the GPU cache size is greater than 0, we wrap the underlying feature store to be a GPUCachedFeature. This will reduce the amount of data transferred during host-to-device copy operations for this feature. """ if args.gpu_cache_size_in_gigabytes > 0 and args.feature_device != "cuda": features[("node", None, "feat")] = gb.gpu_cached_feature( features[("node", None, "feat")], int(args.gpu_cache_size_in_gigabytes * 1024 * 1024 * 1024), ) gpu_cached_feature = features[("node", None, "feat")] gpu_cache_miss_rate_fn = lambda: gpu_cached_feature.miss_rate else: gpu_cache_miss_rate_fn = lambda: 1 train_dataloader, valid_dataloader = ( create_dataloader( graph=graph, features=features, itemset=itemset, batch_size=args.batch_size, fanout=args.fanout, device=args.device, job=job, ) for itemset, job in zip([train_set, valid_set], ["train", "evaluate"]) ) in_channels = features.size("node", None, "feat")[0] model = SAGE( in_channels, args.num_hidden, num_classes, len(args.fanout), args.dropout, ).to(args.device) assert len(args.fanout) == len(model.layers) best_model = train( train_dataloader, valid_dataloader, model, gpu_cache_miss_rate_fn, cpu_cache_miss_rate_fn, args.device, ) model.load_state_dict(best_model) if args.enable_inference: # Test the model. print("Testing...") itemsets = {"train": train_set, "val": valid_set, "test": test_set} final_acc = layerwise_infer( args, graph, features, itemsets, all_nodes_set, model, ) print("Final accuracy values:") print(final_acc) if __name__ == "__main__": args = parse_args() print(args) main() ================================================ FILE: examples/graphbolt/lightning/README.md ================================================ # Node classification on homogeneous graph with GraphSAGE ## Run on `ogbn-products` dataset ### Command ``` python3 node_classification.py ``` ### Results ``` Valid Accuracy: 0.907 ``` ================================================ FILE: examples/graphbolt/lightning/node_classification.py ================================================ """ This flowchart describes the main functional sequence of the provided example. main │ ├───> Instantiate DataModule │ │ │ └───> Load dataset │ │ │ └───> Create train and valid dataloader[HIGHLIGHT] │ │ │ └───> ItemSampler (Distribute data to minibatchs) │ │ │ └───> sample_neighbor or sample_layer_neighbor (Sample a subgraph for a minibatch) │ │ │ └───> fetch_feature (Fetch features for the sampled subgraph) │ ├───> Instantiate GraphSAGE model │ │ │ ├───> SAGEConvLayer (input to hidden) │ │ │ └───> SAGEConvLayer (hidden to hidden) │ │ │ └───> SAGEConvLayer (hidden to output) │ │ │ └───> DropoutLayer │ └───> Run │ │ └───> Trainer[HIGHLIGHT] │ ├───> SAGE.forward (GraphSAGE model forward pass) │ └───> Validate """ import argparse import dgl.graphbolt as gb import dgl.nn.pytorch as dglnn import torch import torch.nn as nn import torch.nn.functional as F from pytorch_lightning import LightningDataModule, LightningModule, Trainer from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint from torchmetrics import Accuracy class SAGE(LightningModule): def __init__(self, in_feats, n_hidden, n_classes): super().__init__() self.save_hyperparameters() self.layers = nn.ModuleList() self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, "mean")) self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, "mean")) self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, "mean")) self.dropout = nn.Dropout(0.5) self.n_hidden = n_hidden self.n_classes = n_classes self.train_acc = Accuracy(task="multiclass", num_classes=n_classes) self.val_acc = Accuracy(task="multiclass", num_classes=n_classes) def forward(self, blocks, x): h = x for l, (layer, block) in enumerate(zip(self.layers, blocks)): h = layer(block, h) if l != len(self.layers) - 1: h = F.relu(h) h = self.dropout(h) return h def log_node_and_edge_counts(self, blocks): node_counts = [block.num_src_nodes() for block in blocks] + [ blocks[-1].num_dst_nodes() ] edge_counts = [block.num_edges() for block in blocks] for i, c in enumerate(node_counts): self.log( f"num_nodes/{i}", float(c), prog_bar=True, on_step=True, on_epoch=False, ) if i < len(edge_counts): self.log( f"num_edges/{i}", float(edge_counts[i]), prog_bar=True, on_step=True, on_epoch=False, ) def training_step(self, batch, batch_idx): blocks = [block.to("cuda") for block in batch.blocks] x = batch.node_features["feat"] y = batch.labels.to("cuda") y_hat = self(blocks, x) loss = F.cross_entropy(y_hat, y) self.train_acc(torch.argmax(y_hat, 1), y) self.log( "train_acc", self.train_acc, prog_bar=True, on_step=True, on_epoch=False, ) self.log_node_and_edge_counts(blocks) return loss def validation_step(self, batch, batch_idx): blocks = [block.to("cuda") for block in batch.blocks] x = batch.node_features["feat"] y = batch.labels.to("cuda") y_hat = self(blocks, x) self.val_acc(torch.argmax(y_hat, 1), y) self.log( "val_acc", self.val_acc, prog_bar=True, on_step=False, on_epoch=True, sync_dist=True, ) self.log_node_and_edge_counts(blocks) def configure_optimizers(self): optimizer = torch.optim.Adam( self.parameters(), lr=0.001, weight_decay=5e-4 ) return optimizer class DataModule(LightningDataModule): def __init__(self, dataset, fanouts, batch_size, num_workers): super().__init__() self.fanouts = fanouts self.batch_size = batch_size self.num_workers = num_workers self.feature_store = dataset.feature self.graph = dataset.graph self.train_set = dataset.tasks[0].train_set self.valid_set = dataset.tasks[0].validation_set self.num_classes = dataset.tasks[0].metadata["num_classes"] def create_dataloader(self, node_set, is_train): datapipe = gb.ItemSampler( node_set, batch_size=self.batch_size, shuffle=True, drop_last=True, ) sampler = ( datapipe.sample_layer_neighbor if is_train else datapipe.sample_neighbor ) datapipe = sampler(self.graph, self.fanouts) datapipe = datapipe.fetch_feature(self.feature_store, ["feat"]) dataloader = gb.DataLoader(datapipe, num_workers=self.num_workers) return dataloader ######################################################################## # (HIGHLIGHT) The 'train_dataloader' and 'val_dataloader' hooks are # essential components of the Lightning framework, defining how data is # loaded during training and validation. In this example, we utilize a # specialized 'graphbolt dataloader', which are concatenated by a series # of datapipes, for these purposes. ######################################################################## def train_dataloader(self): return self.create_dataloader(self.train_set, is_train=True) def val_dataloader(self): return self.create_dataloader(self.valid_set, is_train=False) if __name__ == "__main__": parser = argparse.ArgumentParser( description="GNN baselines on ogbn-products data with GraphBolt" ) parser.add_argument( "--num_gpus", type=int, default=1, help="number of GPUs used for computing (default: 1)", ) parser.add_argument( "--batch_size", type=int, default=1024, help="input batch size for training (default: 1024)", ) parser.add_argument( "--epochs", type=int, default=40, help="number of epochs to train (default: 40)", ) parser.add_argument( "--num_workers", type=int, default=0, help="number of workers (default: 0)", ) args = parser.parse_args() dataset = gb.BuiltinDataset("ogbn-products").load() datamodule = DataModule( dataset, [10, 10, 10], args.batch_size, args.num_workers, ) in_size = dataset.feature.size("node", None, "feat")[0] model = SAGE(in_size, 256, datamodule.num_classes) # Train. checkpoint_callback = ModelCheckpoint(monitor="val_acc", mode="max") early_stopping_callback = EarlyStopping(monitor="val_acc", mode="max") ######################################################################## # (HIGHLIGHT) The `Trainer` is the key Class in lightning, which automates # everything after defining `LightningDataModule` and # `LightningDataModule`. More details can be found in # https://lightning.ai/docs/pytorch/stable/common/trainer.html. ######################################################################## trainer = Trainer( accelerator="gpu", devices=args.num_gpus, max_epochs=args.epochs, callbacks=[checkpoint_callback, early_stopping_callback], ) trainer.fit(model, datamodule=datamodule) ================================================ FILE: examples/graphbolt/link_prediction.py ================================================ """ This script trains and tests a GraphSAGE model for link prediction on large graphs using graphbolt dataloader. Paper: [Inductive Representation Learning on Large Graphs] (https://arxiv.org/abs/1706.02216) Unlike previous dgl examples, we've utilized the newly defined dataloader from GraphBolt. This example will help you grasp how to build an end-to-end training pipeline using GraphBolt. While node classification predicts labels for nodes based on their local neighborhoods, link prediction assesses the likelihood of an edge existing between two nodes, necessitating different sampling strategies that account for pairs of nodes and their joint neighborhoods. TODO: Add the link_prediction.py example to core/graphsage. Before reading this example, please familiar yourself with graphsage link prediction by reading the example in the `examples/core/graphsage/link_prediction.py` If you want to train graphsage on a large graph in a distributed fashion, read the example in the `examples/distributed/graphsage/`. This flowchart describes the main functional sequence of the provided example. main │ ├───> OnDiskDataset pre-processing │ ├───> Instantiate SAGE model │ ├───> train │ │ │ ├───> Get graphbolt dataloader (HIGHLIGHT) │ │ │ └───> Training loop │ │ │ ├───> SAGE.forward │ │ │ └───> Validation set evaluation │ └───> Test set evaluation """ import argparse import time from functools import partial import dgl.graphbolt as gb import dgl.nn as dglnn import torch import torch.nn as nn import torch.nn.functional as F import tqdm from torchmetrics.retrieval import RetrievalMRR class SAGE(nn.Module): def __init__(self, in_size, hidden_size): super().__init__() self.layers = nn.ModuleList() self.layers.append(dglnn.SAGEConv(in_size, hidden_size, "mean")) self.layers.append(dglnn.SAGEConv(hidden_size, hidden_size, "mean")) self.layers.append(dglnn.SAGEConv(hidden_size, hidden_size, "mean")) self.hidden_size = hidden_size self.predictor = nn.Sequential( nn.Linear(hidden_size, hidden_size), nn.ReLU(), nn.Linear(hidden_size, hidden_size), nn.ReLU(), nn.Linear(hidden_size, 1), ) def forward(self, blocks, x): hidden_x = x for layer_idx, (layer, block) in enumerate(zip(self.layers, blocks)): hidden_x = layer(block, hidden_x) is_last_layer = layer_idx == len(self.layers) - 1 if not is_last_layer: hidden_x = F.relu(hidden_x) return hidden_x def inference(self, graph, features, dataloader, storage_device): """Conduct layer-wise inference to get all the node embeddings.""" pin_memory = storage_device == "pinned" buffer_device = torch.device("cpu" if pin_memory else storage_device) print("Start node embedding inference.") for layer_idx, layer in enumerate(self.layers): is_last_layer = layer_idx == len(self.layers) - 1 y = torch.empty( graph.total_num_nodes, self.hidden_size, dtype=torch.float32, device=buffer_device, pin_memory=pin_memory, ) for data in tqdm.tqdm(dataloader): # len(blocks) = 1 hidden_x = layer(data.blocks[0], data.node_features["feat"]) if not is_last_layer: hidden_x = F.relu(hidden_x) # By design, our seed nodes are contiguous. y[data.seeds[0] : data.seeds[-1] + 1] = hidden_x.to( buffer_device, non_blocking=True ) if not is_last_layer: features.update("node", None, "feat", y) return y def create_dataloader(args, graph, features, itemset, is_train=True): """Get a GraphBolt version of a dataloader for link prediction tasks. This function demonstrates how to utilize functional forms of datapipes in GraphBolt. Alternatively, you can create a datapipe using its class constructor. For a more detailed tutorial, please read the examples in `dgl/notebooks/graphbolt/walkthrough.ipynb`. """ ############################################################################ # [Input]: # 'itemset': The current dataset. # 'args.batch_size': Specify the number of samples to be processed together, # referred to as a 'mini-batch'. (The term 'mini-batch' is used here to # indicate a subset of the entire dataset that is processed together. This # is in contrast to processing the entire dataset, known as a 'full batch'.) # 'is_train': Determining if data should be shuffled. (Shuffling is # generally used only in training to improve model generalization. It's # not used in validation and testing as the focus there is to evaluate # performance rather than to learn from the data.) # [Output]: # An ItemSampler object for handling mini-batch sampling. # [Role]: # Initialize the ItemSampler to sample mini-batche from the dataset. ############################################################################ datapipe = gb.ItemSampler( itemset, batch_size=args.train_batch_size if is_train else args.eval_batch_size, shuffle=is_train, ) ############################################################################ # [Input]: # 'device': The device to copy the data to. # [Output]: # A CopyTo object to copy the data to the specified device. Copying here # ensures that the rest of the operations run on the GPU. ############################################################################ if args.storage_device != "cpu": datapipe = datapipe.copy_to(device=args.device) ############################################################################ # [Input]: # 'args.neg_ratio': Specify the ratio of negative to positive samples. # (E.g., if neg_ratio is 1, for each positive sample there will be 1 # negative sample.) # 'graph': The overall network topology for negative sampling. # [Output]: # A UniformNegativeSampler object that will handle the generation of # negative samples for link prediction tasks. # [Role]: # Initialize the UniformNegativeSampler for negative sampling in link # prediction. # [Note]: # If 'is_train' is False, the UniformNegativeSampler will not be used. # Since, in validation and testing, the itemset already contains the # negative edges information. ############################################################################ if is_train: datapipe = datapipe.sample_uniform_negative(graph, args.neg_ratio) ############################################################################ # [Input]: # 'datapipe' is either 'ItemSampler' or 'UniformNegativeSampler' depending # on whether training is needed ('is_train'), # 'graph': The network topology for sampling. # 'args.fanout': Number of neighbors to sample per node. # [Output]: # A NeighborSampler object to sample neighbors. # [Role]: # Initialize a neighbor sampler for sampling the neighborhoods of nodes. ############################################################################ datapipe = datapipe.sample_neighbor( graph, args.fanout if is_train else [-1], overlap_fetch=args.storage_device == "pinned", asynchronous=args.storage_device != "cpu", ) ############################################################################ # [Input]: # 'gb.exclude_seed_edges': Function to exclude seed edges, optionally # including their reverse edges, from the sampled subgraphs in the # minibatch. # [Output]: # A MiniBatchTransformer object with excluded seed edges. # [Role]: # During the training phase of link prediction, negative edges are # sampled. It's essential to exclude the seed edges from the process # to ensure that positive samples are not inadvertently included within # the negative samples. ############################################################################ if is_train and args.exclude_edges: datapipe = datapipe.exclude_seed_edges( include_reverse_edges=True, asynchronous=args.storage_device != "cpu", ) ############################################################################ # [Input]: # 'features': The node features. # 'node_feature_keys': The node feature keys (list) to be fetched. # [Output]: # A FeatureFetcher object to fetch node features. # [Role]: # Initialize a feature fetcher for fetching features of the sampled # subgraphs. ############################################################################ datapipe = datapipe.fetch_feature(features, node_feature_keys=["feat"]) ############################################################################ # [Input]: # 'device': The device to copy the data to. # [Output]: # A CopyTo object to copy the data to the specified device. ############################################################################ if args.storage_device == "cpu": datapipe = datapipe.copy_to(device=args.device) ############################################################################ # [Input]: # 'datapipe': The datapipe object to be used for data loading. # 'args.num_workers': The number of processes to be used for data loading. # [Output]: # A DataLoader object to handle data loading. # [Role]: # Initialize a multi-process dataloader to load the data in parallel. ############################################################################ dataloader = gb.DataLoader( datapipe, num_workers=args.num_workers, ) # Return the fully-initialized DataLoader object. return dataloader @torch.no_grad() def compute_mrr(args, model, node_emb, seeds, labels, indexes): """Compute the Mean Reciprocal Rank (MRR) for given source and destination nodes. This function computes the MRR for a set of node pairs, dividing the task into batches to handle potentially large graphs. """ preds = torch.empty(seeds.shape[0], device=indexes.device) mrr = RetrievalMRR() seeds_src, seeds_dst = seeds.T # The constant number is 1001, due to negtive ratio in the `ogbl-citation2` # dataset is 1000. eval_size = args.eval_batch_size * 1001 # Loop over node pairs in batches. for start in tqdm.trange(0, seeds_src.shape[0], eval_size, desc="Evaluate"): end = min(start + eval_size, seeds_src.shape[0]) # Fetch embeddings for current batch of source and destination nodes. h_src = node_emb[seeds_src[start:end]].to(args.device) h_dst = node_emb[seeds_dst[start:end]].to(args.device) # Compute prediction scores using the model. pred = model.predictor(h_src * h_dst).squeeze() preds[start:end] = pred return mrr(preds, labels, indexes=indexes) @torch.no_grad() def evaluate(args, model, graph, features, all_nodes_set, valid_set, test_set): """Evaluate the model on validation and test sets.""" model.eval() dataloader = create_dataloader( args, graph, features, all_nodes_set, is_train=False ) # Compute node embeddings for the entire graph. node_emb = model.inference(graph, features, dataloader, args.storage_device) results = [] # Loop over both validation and test sets. for split in [valid_set, test_set]: # Unpack the item set. seeds = split._items[0].to(node_emb.device) labels = split._items[1].to(node_emb.device) indexes = split._items[2].to(node_emb.device) # Compute MRR values for the current split. results.append( compute_mrr(args, model, node_emb, seeds, labels, indexes) ) return results def train(args, model, graph, features, train_set): optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) dataloader = create_dataloader(args, graph, features, train_set) for epoch in range(args.epochs): model.train() total_loss = 0 start_epoch_time = time.time() for step, data in tqdm.tqdm(enumerate(dataloader)): # Get node pairs with labels for loss calculation. compacted_seeds = data.compacted_seeds.T labels = data.labels node_feature = data.node_features["feat"] blocks = data.blocks # Get the embeddings of the input nodes. y = model(blocks, node_feature) logits = model.predictor( y[compacted_seeds[0]] * y[compacted_seeds[1]] ).squeeze() # Compute loss. loss = F.binary_cross_entropy_with_logits(logits, labels) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.item() if step + 1 == args.early_stop: break end_epoch_time = time.time() print( f"Epoch {epoch:05d} | " f"Loss {(total_loss) / (step + 1):.4f} | " f"Time {(end_epoch_time - start_epoch_time):.4f} s" ) def parse_args(): parser = argparse.ArgumentParser(description="OGBL-Citation2 (GraphBolt)") parser.add_argument("--epochs", type=int, default=10) parser.add_argument("--lr", type=float, default=0.0005) parser.add_argument("--neg-ratio", type=int, default=1) parser.add_argument("--train-batch-size", type=int, default=512) parser.add_argument("--eval-batch-size", type=int, default=1024) parser.add_argument("--num-workers", type=int, default=0) parser.add_argument( "--early-stop", type=int, default=0, help="0 means no early stop, otherwise stop at the input-th step", ) parser.add_argument( "--fanout", type=str, default="15,10,5", help="Fan-out of neighbor sampling. Default: 15,10,5", ) parser.add_argument( "--exclude-edges", type=int, default=1, help="Whether to exclude reverse edges during sampling. Default: 1", ) parser.add_argument( "--mode", default="pinned-cuda", choices=["cpu-cpu", "cpu-cuda", "pinned-cuda", "cuda-cuda"], help="Dataset storage placement and Train device: 'cpu' for CPU and RAM," " 'pinned' for pinned memory in RAM, 'cuda' for GPU and GPU memory.", ) return parser.parse_args() def main(args): if not torch.cuda.is_available(): args.mode = "cpu-cpu" print(f"Training in {args.mode} mode.") args.storage_device, args.device = args.mode.split("-") args.device = torch.device(args.device) # Load and preprocess dataset. print("Loading data") dataset = gb.BuiltinDataset("ogbl-citation2").load() # Move the dataset to the selected storage. if args.storage_device == "pinned": graph = dataset.graph.pin_memory_() features = dataset.feature.pin_memory_() else: graph = dataset.graph.to(args.storage_device) features = dataset.feature.to(args.storage_device) train_set = dataset.tasks[0].train_set args.fanout = list(map(int, args.fanout.split(","))) in_size = features.size("node", None, "feat")[0] hidden_channels = 256 args.device = torch.device(args.device) model = SAGE(in_size, hidden_channels).to(args.device) # Model training. print("Training...") train(args, model, graph, features, train_set) # Test the model. print("Testing...") test_set = dataset.tasks[0].test_set valid_set = dataset.tasks[0].validation_set all_nodes_set = dataset.all_nodes_set valid_mrr, test_mrr = evaluate( args, model, graph, features, all_nodes_set, valid_set, test_set ) print( f"Validation MRR {valid_mrr.item():.4f}, " f"Test MRR {test_mrr.item():.4f}" ) if __name__ == "__main__": args = parse_args() main(args) ================================================ FILE: examples/graphbolt/node_classification.py ================================================ """ This script trains and tests a GraphSAGE model for node classification on large graphs using GraphBolt dataloader. Paper: [Inductive Representation Learning on Large Graphs] (https://arxiv.org/abs/1706.02216) Unlike previous dgl examples, we've utilized the newly defined dataloader from GraphBolt. This example will help you grasp how to build an end-to-end training pipeline using GraphBolt. Before reading this example, please familiar yourself with graphsage node classification by reading the example in the `examples/core/graphsage/node_classification.py`. This introduction, [A Blitz Introduction to Node Classification with DGL] (https://docs.dgl.ai/tutorials/blitz/1_introduction.html), might be helpful. If you want to train graphsage on a large graph in a distributed fashion, please read the example in the `examples/distributed/graphsage/`. This flowchart describes the main functional sequence of the provided example: main │ ├───> OnDiskDataset pre-processing │ ├───> Instantiate SAGE model │ ├───> train │ │ │ ├───> Get graphbolt dataloader (HIGHLIGHT) │ │ │ └───> Training loop │ │ │ ├───> SAGE.forward │ │ │ └───> Validation set evaluation │ └───> All nodes set inference & Test set evaluation """ import argparse import time import dgl.graphbolt as gb import dgl.nn as dglnn import torch import torch.nn as nn import torch.nn.functional as F import torchmetrics.functional as MF from tqdm import tqdm def create_dataloader( graph, features, itemset, batch_size, fanout, device, num_workers, job ): """ [HIGHLIGHT] Get a GraphBolt version of a dataloader for node classification tasks. This function demonstrates how to utilize functional forms of datapipes in GraphBolt. For a more detailed tutorial, please read the examples in `dgl/notebooks/graphbolt/walkthrough.ipynb`. Alternatively, you can create a datapipe using its class constructor. Parameters ---------- job : one of ["train", "evaluate", "infer"] The stage where dataloader is created, with options "train", "evaluate" and "infer". Other parameters are explicated in the comments below. """ ############################################################################ # [Step-1]: # gb.ItemSampler() # [Input]: # 'itemset': The current dataset. (e.g. `train_set` or `valid_set`) # 'batch_size': Specify the number of samples to be processed together, # referred to as a 'mini-batch'. (The term 'mini-batch' is used here to # indicate a subset of the entire dataset that is processed together. This # is in contrast to processing the entire dataset, known as a 'full batch'.) # 'job': Determines whether data should be shuffled. (Shuffling is # generally used only in training to improve model generalization. It's # not used in validation and testing as the focus there is to evaluate # performance rather than to learn from the data.) # [Output]: # An ItemSampler object for handling mini-batch sampling. # [Role]: # Initialize the ItemSampler to sample mini-batche from the dataset. ############################################################################ datapipe = gb.ItemSampler( itemset, batch_size=batch_size, shuffle=(job == "train") ) ############################################################################ # [Step-2]: # self.copy_to() # [Input]: # 'device': The device to copy the data to. # [Output]: # A CopyTo object to copy the data to the specified device. Copying here # ensures that the rest of the operations run on the GPU. ############################################################################ if args.storage_device != "cpu": datapipe = datapipe.copy_to(device=device) ############################################################################ # [Step-3]: # self.sample_neighbor() # [Input]: # 'graph': The network topology for sampling. # '[-1] or fanout': Number of neighbors to sample per node. In # training or validation, the length of `fanout` should be equal to the # number of layers in the model. In inference, this parameter is set to # [-1], indicating that all neighbors of a node are sampled. # [Output]: # A NeighborSampler object to sample neighbors. # [Role]: # Initialize a neighbor sampler for sampling the neighborhoods of nodes. ############################################################################ datapipe = getattr(datapipe, args.sample_mode)( graph, fanout if job != "infer" else [-1], overlap_fetch=args.storage_device == "pinned", asynchronous=args.storage_device != "cpu", ) ############################################################################ # [Step-4]: # self.fetch_feature() # [Input]: # 'features': The node features. # 'node_feature_keys': The keys of the node features to be fetched. # [Output]: # A FeatureFetcher object to fetch node features. # [Role]: # Initialize a feature fetcher for fetching features of the sampled # subgraphs. ############################################################################ datapipe = datapipe.fetch_feature(features, node_feature_keys=["feat"]) ############################################################################ # [Step-5]: # self.copy_to() # [Input]: # 'device': The device to copy the data to. # [Output]: # A CopyTo object to copy the data to the specified device. ############################################################################ if args.storage_device == "cpu": datapipe = datapipe.copy_to(device=device) ############################################################################ # [Step-6]: # gb.DataLoader() # [Input]: # 'datapipe': The datapipe object to be used for data loading. # 'num_workers': The number of processes to be used for data loading. # [Output]: # A DataLoader object to handle data loading. # [Role]: # Initialize a multi-process dataloader to load the data in parallel. ############################################################################ dataloader = gb.DataLoader(datapipe, num_workers=num_workers) # Return the fully-initialized DataLoader object. return dataloader class SAGE(nn.Module): def __init__(self, in_size, hidden_size, out_size): super().__init__() self.layers = nn.ModuleList() # Three-layer GraphSAGE-mean. self.layers.append(dglnn.SAGEConv(in_size, hidden_size, "mean")) self.layers.append(dglnn.SAGEConv(hidden_size, hidden_size, "mean")) self.layers.append(dglnn.SAGEConv(hidden_size, out_size, "mean")) self.dropout = nn.Dropout(0.5) self.hidden_size = hidden_size self.out_size = out_size # Set the dtype for the layers manually. self.set_layer_dtype(torch.float32) def set_layer_dtype(self, _dtype): for layer in self.layers: for param in layer.parameters(): param.data = param.data.to(_dtype) def forward(self, blocks, x): hidden_x = x for layer_idx, (layer, block) in enumerate(zip(self.layers, blocks)): hidden_x = layer(block, hidden_x) is_last_layer = layer_idx == len(self.layers) - 1 if not is_last_layer: hidden_x = F.relu(hidden_x) hidden_x = self.dropout(hidden_x) return hidden_x def inference(self, graph, features, dataloader, storage_device): """Conduct layer-wise inference to get all the node embeddings.""" pin_memory = storage_device == "pinned" buffer_device = torch.device("cpu" if pin_memory else storage_device) for layer_idx, layer in enumerate(self.layers): is_last_layer = layer_idx == len(self.layers) - 1 y = torch.empty( graph.total_num_nodes, self.out_size if is_last_layer else self.hidden_size, dtype=torch.float32, device=buffer_device, pin_memory=pin_memory, ) for data in tqdm(dataloader): # len(blocks) = 1 hidden_x = layer(data.blocks[0], data.node_features["feat"]) if not is_last_layer: hidden_x = F.relu(hidden_x) hidden_x = self.dropout(hidden_x) # By design, our output nodes are contiguous. y[data.seeds[0] : data.seeds[-1] + 1] = hidden_x.to( buffer_device ) if not is_last_layer: features.update("node", None, "feat", y) return y @torch.no_grad() def layerwise_infer( args, graph, features, test_set, all_nodes_set, model, num_classes ): model.eval() dataloader = create_dataloader( graph=graph, features=features, itemset=all_nodes_set, batch_size=4 * args.batch_size, fanout=[-1], device=args.device, num_workers=args.num_workers, job="infer", ) pred = model.inference(graph, features, dataloader, args.storage_device) pred = pred[test_set._items[0]] label = test_set._items[1].to(pred.device) return MF.accuracy( pred, label, task="multiclass", num_classes=num_classes, ) @torch.no_grad() def evaluate(args, model, graph, features, itemset, num_classes): model.eval() y = [] y_hats = [] dataloader = create_dataloader( graph=graph, features=features, itemset=itemset, batch_size=args.batch_size, fanout=args.fanout, device=args.device, num_workers=args.num_workers, job="evaluate", ) for step, data in tqdm(enumerate(dataloader), "Evaluating"): x = data.node_features["feat"] y.append(data.labels) y_hats.append(model(data.blocks, x)) return MF.accuracy( torch.cat(y_hats), torch.cat(y), task="multiclass", num_classes=num_classes, ) def train(args, graph, features, train_set, valid_set, num_classes, model): optimizer = torch.optim.Adam( model.parameters(), lr=args.lr, weight_decay=5e-4 ) dataloader = create_dataloader( graph=graph, features=features, itemset=train_set, batch_size=args.batch_size, fanout=args.fanout, device=args.device, num_workers=args.num_workers, job="train", ) for epoch in range(args.epochs): t0 = time.time() model.train() total_loss = 0 for step, data in tqdm(enumerate(dataloader), "Training"): # The input features from the source nodes in the first layer's # computation graph. x = data.node_features["feat"] # The ground truth labels from the destination nodes # in the last layer's computation graph. y = data.labels y_hat = model(data.blocks, x) # Compute loss. loss = F.cross_entropy(y_hat, y) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.item() t1 = time.time() # Evaluate the model. acc = evaluate(args, model, graph, features, valid_set, num_classes) print( f"Epoch {epoch:05d} | Loss {total_loss / (step + 1):.4f} | " f"Accuracy {acc.item():.4f} | Time {t1 - t0:.4f}" ) def parse_args(): parser = argparse.ArgumentParser( description="A script trains and tests a GraphSAGE model " "for node classification using GraphBolt dataloader." ) parser.add_argument( "--epochs", type=int, default=10, help="Number of training epochs." ) parser.add_argument( "--lr", type=float, default=1e-3, help="Learning rate for optimization.", ) parser.add_argument( "--batch-size", type=int, default=1024, help="Batch size for training." ) parser.add_argument( "--num-workers", type=int, default=0, help="Number of workers for data loading.", ) parser.add_argument( "--fanout", type=str, default="10,10,10", help="Fan-out of neighbor sampling. It is IMPORTANT to keep len(fanout)" " identical with the number of layers in your model. Default: 10,10,10", ) parser.add_argument( "--dataset", type=str, default="ogbn-products", choices=[ "ogbn-arxiv", "ogbn-products", "ogbn-papers100M", "igb-hom-tiny", "igb-hom-small", "igb-hom-medium", "igb-hom-large", "igb-hom", ], help="The dataset we can use for node classification example. Currently" " ogbn-products, ogbn-arxiv, ogbn-papers100M and" " igb-hom-[tiny|small|medium|large] and igb-hom datasets are supported.", ) parser.add_argument( "--mode", default="pinned-cuda", choices=["cpu-cpu", "cpu-cuda", "pinned-cuda", "cuda-cuda"], help="Dataset storage placement and Train device: 'cpu' for CPU and RAM," " 'pinned' for pinned memory in RAM, 'cuda' for GPU and GPU memory.", ) parser.add_argument( "--sample-mode", default="sample_neighbor", choices=["sample_neighbor", "sample_layer_neighbor"], help="The sampling function when doing layerwise sampling.", ) return parser.parse_args() def main(args): if not torch.cuda.is_available(): args.mode = "cpu-cpu" print(f"Training in {args.mode} mode.") args.storage_device, args.device = args.mode.split("-") args.device = torch.device(args.device) # Load and preprocess dataset. print("Loading data...") dataset = gb.BuiltinDataset(args.dataset).load() # Move the dataset to the selected storage. if args.storage_device == "pinned": graph = dataset.graph.pin_memory_() features = dataset.feature.pin_memory_() else: graph = dataset.graph.to(args.storage_device) features = dataset.feature.to(args.storage_device) train_set = dataset.tasks[0].train_set valid_set = dataset.tasks[0].validation_set test_set = dataset.tasks[0].test_set all_nodes_set = dataset.all_nodes_set args.fanout = list(map(int, args.fanout.split(","))) num_classes = dataset.tasks[0].metadata["num_classes"] in_size = features.size("node", None, "feat")[0] hidden_size = 256 out_size = num_classes model = SAGE(in_size, hidden_size, out_size) assert len(args.fanout) == len(model.layers) model = model.to(args.device) # Model training. print("Training...") train(args, graph, features, train_set, valid_set, num_classes, model) # Test the model. print("Testing...") test_acc = layerwise_infer( args, graph, features, test_set, all_nodes_set, model, num_classes, ) print(f"Test accuracy {test_acc.item():.4f}") if __name__ == "__main__": args = parse_args() main(args) ================================================ FILE: examples/graphbolt/pyg/README.md ================================================ ## Overview This project demonstrates the training and evaluation of a GraphSAGE model for node classification on large graphs. The example utilizes GraphBolt for efficient data handling and PyG for the GNN training. # Node classification on graph This example aims to demonstrate how to run node classification task on heterogeneous graph with **GraphBolt**. ## Model The model is a three-layer GraphSAGE network implemented using PyTorch Geometric's SAGEConv layers. ## Default Run on `ogbn-arxiv` dataset ``` python node_classification.py ``` ## Accuracies ``` Final performance(for ogbn-arxiv): All runs: Highest Train: 62.26 Highest Valid: 59.89 Final Train: 62.26 Final Test: 52.78 ``` ## Run on `ogbn-products` dataset ### Sample on CPU and train/infer on CPU ``` python node_classification.py --dataset ogbn-products ``` ## Accuracies ``` Final performance(for ogbn-products): All runs: Highest Train: 90.79 Highest Valid: 89.86 Final Train: 90.79 Final Test: 75.24 ``` ================================================ FILE: examples/graphbolt/pyg/hetero/node_classification.py ================================================ """ This script is a PyG counterpart of ``/examples/graphbolt/rgcn/hetero_rgcn.py``. """ import argparse import time import dgl.graphbolt as gb import torch import torch.nn as nn import torch.nn.functional as F from torch_geometric.nn import SimpleConv from tqdm import tqdm def accuracy(out, labels): assert out.ndim == 2 assert out.size(0) == labels.size(0) assert labels.ndim == 1 or (labels.ndim == 2 and labels.size(1) == 1) labels = labels.flatten() predictions = torch.argmax(out, 1) return (labels == predictions).sum(dtype=torch.float64) / labels.size(0) def create_dataloader( graph, features, itemset, batch_size, fanout, device, job, ): """Create a GraphBolt dataloader for training, validation or testing.""" datapipe = gb.ItemSampler( itemset, batch_size=batch_size, shuffle=(job == "train"), drop_last=(job == "train"), ) need_copy = True # Copy the data to the specified device. if args.graph_device != "cpu" and need_copy: datapipe = datapipe.copy_to(device=device) need_copy = False # Sample neighbors for each node in the mini-batch. datapipe = getattr(datapipe, args.sample_mode)( graph, fanout if job != "infer" else [-1], overlap_fetch=args.overlap_graph_fetch, num_gpu_cached_edges=args.num_gpu_cached_edges, gpu_cache_threshold=args.gpu_graph_caching_threshold, asynchronous=args.graph_device != "cpu", ) # Copy the data to the specified device. if args.feature_device != "cpu" and need_copy: datapipe = datapipe.copy_to(device=device) need_copy = False node_feature_keys = {"paper": ["feat"], "author": ["feat"]} if args.dataset == "ogb-lsc-mag240m": node_feature_keys["institution"] = ["feat"] if "igb-het" in args.dataset: node_feature_keys["institute"] = ["feat"] node_feature_keys["fos"] = ["feat"] # Fetch node features for the sampled subgraph. datapipe = datapipe.fetch_feature( features, node_feature_keys, overlap_fetch=args.overlap_feature_fetch, ) # Copy the data to the specified device. if need_copy: datapipe = datapipe.copy_to(device=device) # Create and return a DataLoader to handle data loading. return gb.DataLoader(datapipe, num_workers=args.num_workers) class RelGraphConvLayer(nn.Module): def __init__( self, in_size, out_size, ntypes, etypes, activation, dropout=0.0, ): super().__init__() self.in_size = in_size self.out_size = out_size self.activation = activation # Create a separate convolution layer for each relationship. PyG's # SimpleConv does not have any weights and only performs message passing # and aggregation. self.convs = nn.ModuleDict( {etype: SimpleConv(aggr="mean") for etype in etypes} ) # Create a separate Linear layer for each relationship. Each # relationship has its own weights which will be applied to the node # features before performing convolution. self.weight = nn.ModuleDict( { etype: nn.Linear(in_size, out_size, bias=False) for etype in etypes } ) # Create a separate Linear layer for each node type. # loop_weights are used to update the output embedding of each target node # based on its own features, thereby allowing the model to refine the node # representations. Note that this does not imply the existence of self-loop # edges in the graph. It is similar to residual connection. self.loop_weights = nn.ModuleDict( {ntype: nn.Linear(in_size, out_size, bias=True) for ntype in ntypes} ) self.dropout = nn.Dropout(dropout) def forward(self, subgraph, x): # Create a dictionary of node features for the destination nodes in # the graph. We slice the node features according to the number of # destination nodes of each type. This is necessary because when # incorporating the effect of self-loop edges, we perform computations # only on the destination nodes' features. By doing so, we ensure the # feature dimensions match and prevent any misuse of incorrect node # features. (h, h_dst), edge_index, size = subgraph.to_pyg(x) h_out = {} for etype in edge_index: src_ntype, _, dst_ntype = gb.etype_str_to_tuple(etype) # h_dst is unused in SimpleConv. t = self.convs[etype]( (h[src_ntype], h_dst[dst_ntype]), edge_index[etype], size=size[etype], ) t = self.weight[etype](t) if dst_ntype in h_out: h_out[dst_ntype] += t else: h_out[dst_ntype] = t def _apply(ntype, x): # Apply the `loop_weight` to the input node features, effectively # acting as a residual connection. This allows the model to refine # node embeddings based on its current features. x = x + self.loop_weights[ntype](h_dst[ntype]) return self.dropout(self.activation(x)) # Apply the function defined above for each node type. This will update # the node features using the `loop_weights`, apply the activation # function and dropout. return {ntype: _apply(ntype, h) for ntype, h in h_out.items()} class EntityClassify(nn.Module): def __init__(self, graph, in_size, hidden_size, out_size, n_layers): super(EntityClassify, self).__init__() self.layers = nn.ModuleList() sizes = [in_size] + [hidden_size] * (n_layers - 1) + [out_size] for i in range(n_layers): self.layers.append( RelGraphConvLayer( sizes[i], sizes[i + 1], graph.node_type_to_id.keys(), graph.edge_type_to_id.keys(), activation=F.relu if i != n_layers - 1 else lambda x: x, dropout=0.5, ) ) def forward(self, subgraphs, h): for layer, subgraph in zip(self.layers, subgraphs): h = layer(subgraph, h) return h @torch.compile def evaluate_step(minibatch, model): category = "paper" node_features = { ntype: feat.float() for (ntype, name), feat in minibatch.node_features.items() if name == "feat" } labels = minibatch.labels[category].long() out = model(minibatch.sampled_subgraphs, node_features)[category] num_correct = accuracy(out, labels) * labels.size(0) return num_correct, labels.size(0) @torch.no_grad() def evaluate( model, dataloader, gpu_cache_miss_rate_fn, cpu_cache_miss_rate_fn, device, ): model.eval() total_correct = torch.zeros(1, dtype=torch.float64, device=device) total_samples = 0 dataloader = tqdm(dataloader, desc="Evaluating") for step, minibatch in enumerate(dataloader): num_correct, num_samples = evaluate_step(minibatch, model) total_correct += num_correct total_samples += num_samples if step % 15 == 0: num_nodes = sum(id.size(0) for id in minibatch.node_ids().values()) dataloader.set_postfix( { "num_nodes": num_nodes, "gpu_cache_miss": gpu_cache_miss_rate_fn(), "cpu_cache_miss": cpu_cache_miss_rate_fn(), } ) return total_correct / total_samples @torch.compile def train_step(minibatch, optimizer, model, loss_fn): category = "paper" node_features = { ntype: feat.float() for (ntype, name), feat in minibatch.node_features.items() if name == "feat" } labels = minibatch.labels[category].long() optimizer.zero_grad() out = model(minibatch.sampled_subgraphs, node_features)[category] loss = loss_fn(out, labels) # https://github.com/pytorch/pytorch/issues/133942 # num_correct = accuracy(out, labels) * labels.size(0) num_correct = torch.zeros(1, dtype=torch.float64, device=out.device) loss.backward() optimizer.step() return loss.detach(), num_correct, labels.size(0) def train_helper( dataloader, model, optimizer, loss_fn, gpu_cache_miss_rate_fn, cpu_cache_miss_rate_fn, device, ): model.train() total_loss = torch.zeros(1, device=device) total_correct = torch.zeros(1, dtype=torch.float64, device=device) total_samples = 0 start = time.time() dataloader = tqdm(dataloader, "Training") for step, minibatch in enumerate(dataloader): loss, num_correct, num_samples = train_step( minibatch, optimizer, model, loss_fn ) total_loss += loss * num_samples total_correct += num_correct total_samples += num_samples if step % 15 == 0: # log every 15 steps for performance. num_nodes = sum(id.size(0) for id in minibatch.node_ids().values()) dataloader.set_postfix( { "num_nodes": num_nodes, "gpu_cache_miss": gpu_cache_miss_rate_fn(), "cpu_cache_miss": cpu_cache_miss_rate_fn(), } ) loss = total_loss / total_samples acc = total_correct / total_samples end = time.time() return loss, acc, end - start def train( train_dataloader, valid_dataloader, model, gpu_cache_miss_rate_fn, cpu_cache_miss_rate_fn, device, ): optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) loss_fn = nn.CrossEntropyLoss() for epoch in range(args.epochs): train_loss, train_acc, duration = train_helper( train_dataloader, model, optimizer, loss_fn, gpu_cache_miss_rate_fn, cpu_cache_miss_rate_fn, device, ) val_acc = evaluate( model, valid_dataloader, gpu_cache_miss_rate_fn, cpu_cache_miss_rate_fn, device, ) print( f"Epoch: {epoch:02d}, Loss: {train_loss.item():.4f}, " f"Approx. Train: {train_acc.item():.4f}, " f"Approx. Val: {val_acc.item():.4f}, " f"Time: {duration}s" ) def parse_args(): parser = argparse.ArgumentParser(description="GraphBolt PyG R-SAGE") parser.add_argument( "--epochs", type=int, default=10, help="Number of training epochs." ) parser.add_argument( "--lr", type=float, default=0.001, help="Learning rate for optimization.", ) parser.add_argument("--num-hidden", type=int, default=1024) parser.add_argument( "--batch-size", type=int, default=1024, help="Batch size for training." ) parser.add_argument("--num_workers", type=int, default=0) parser.add_argument( "--dataset", type=str, default="ogb-lsc-mag240m", choices=[ "ogb-lsc-mag240m", "igb-het-tiny", "igb-het-small", "igb-het-medium", ], help="Dataset name. Possible values: ogb-lsc-mag240m, igb-het-[tiny|small|medium].", ) parser.add_argument( "--fanout", type=str, default="25,10", help="Fan-out of neighbor sampling. It is IMPORTANT to keep len(fanout)" " identical with the number of layers in your model. Default: 25,10", ) parser.add_argument( "--mode", default="pinned-pinned-cuda", choices=[ "cpu-cpu-cpu", "cpu-cpu-cuda", "cpu-pinned-cuda", "pinned-pinned-cuda", "cuda-pinned-cuda", "cuda-cuda-cuda", ], help="Graph storage - feature storage - Train device: 'cpu' for CPU and RAM," " 'pinned' for pinned memory in RAM, 'cuda' for GPU and GPU memory.", ) parser.add_argument( "--sample-mode", default="sample_neighbor", choices=["sample_neighbor", "sample_layer_neighbor"], help="The sampling function when doing layerwise sampling.", ) parser.add_argument( "--cpu-feature-cache-policy", type=str, default=None, choices=["s3-fifo", "sieve", "lru", "clock"], help="The cache policy for the CPU feature cache.", ) parser.add_argument( "--cpu-cache-size", type=float, default=0, help="The capacity of the CPU feature cache in GiB.", ) parser.add_argument( "--gpu-cache-size", type=float, default=0, help="The capacity of the GPU feature cache in GiB.", ) parser.add_argument( "--num-gpu-cached-edges", type=int, default=0, help="The number of edges to be cached from the graph on the GPU.", ) parser.add_argument( "--gpu-graph-caching-threshold", type=int, default=1, help="The number of accesses after which a vertex neighborhood will be cached.", ) parser.add_argument("--precision", type=str, default="high") return parser.parse_args() def main(): torch.set_float32_matmul_precision(args.precision) if not torch.cuda.is_available(): args.mode = "cpu-cpu-cpu" print(f"Training in {args.mode} mode.") args.graph_device, args.feature_device, args.device = args.mode.split("-") args.overlap_feature_fetch = args.feature_device == "pinned" args.overlap_graph_fetch = args.graph_device == "pinned" # Load dataset. dataset = gb.BuiltinDataset(args.dataset).load() print("Dataset loaded") # Move the dataset to the selected storage. graph = ( dataset.graph.pin_memory_() if args.graph_device == "pinned" else dataset.graph.to(args.graph_device) ) features = ( dataset.feature.pin_memory_() if args.feature_device == "pinned" else dataset.feature.to(args.feature_device) ) train_set = dataset.tasks[0].train_set valid_set = dataset.tasks[0].validation_set test_set = dataset.tasks[0].test_set args.fanout = list(map(int, args.fanout.split(","))) num_classes = dataset.tasks[0].metadata["num_classes"] num_etypes = len(graph.num_edges) feats_on_disk = { k: features[k] for k in features.keys() if k[2] == "feat" and isinstance(features[k], gb.DiskBasedFeature) } if args.cpu_cache_size > 0 and len(feats_on_disk) > 0: cached_features = gb.cpu_cached_feature( feats_on_disk, int(args.cpu_cache_size * (2**30)), args.cpu_feature_cache_policy, args.feature_device == "pinned", ) for k, cpu_cached_feature in cached_features.items(): features[k] = cpu_cached_feature cpu_cache_miss_rate_fn = lambda: cpu_cached_feature.miss_rate else: cpu_cache_miss_rate_fn = lambda: 1 if args.gpu_cache_size > 0 and args.feature_device != "cuda": feats = {k: features[k] for k in features.keys() if k[2] == "feat"} cached_features = gb.gpu_cached_feature( feats, int(args.gpu_cache_size * (2**30)), ) for k, gpu_cached_feature in cached_features.items(): features[k] = gpu_cached_feature gpu_cache_miss_rate_fn = lambda: gpu_cached_feature.miss_rate else: gpu_cache_miss_rate_fn = lambda: 1 train_dataloader, valid_dataloader, test_dataloader = ( create_dataloader( graph=graph, features=features, itemset=itemset, batch_size=args.batch_size, fanout=[ torch.full((num_etypes,), fanout) for fanout in args.fanout ], device=args.device, job=job, ) for itemset, job in zip( [train_set, valid_set, test_set], ["train", "evaluate", "evaluate"] ) ) feat_size = features.size("node", "paper", "feat")[0] hidden_channels = args.num_hidden # Initialize the entity classification model. model = EntityClassify( graph, feat_size, hidden_channels, num_classes, len(args.fanout) ).to(args.device) print( "Number of model parameters: " f"{sum(p.numel() for p in model.parameters())}" ) train( train_dataloader, valid_dataloader, model, gpu_cache_miss_rate_fn, cpu_cache_miss_rate_fn, args.device, ) # Labels are currently unavailable for mag240M so the test acc will be 0. print("Testing...") test_acc = evaluate( model, test_dataloader, gpu_cache_miss_rate_fn, cpu_cache_miss_rate_fn, args.device, ) print(f"Test accuracy {test_acc.item():.4f}") if __name__ == "__main__": args = parse_args() main() ================================================ FILE: examples/graphbolt/pyg/labor/README.md ================================================ Layer-Neighbor Sampling -- Defusing Neighborhood Explosion in GNNs ============ - Paper link: [https://papers.nips.cc/paper_files/paper/2023/hash/51f9036d5e7ae822da8f6d4adda1fb39-Abstract-Conference.html](NeurIPS 2023) This is an official Labor sampling example to showcase the use of [https://docs.dgl.ai/en/latest/generated/dgl.graphbolt.LayerNeighborSampler.html](dgl.graphbolt.LayerNeighborSampler). This sampler has 2 parameters, `layer_dependency=[False|True]` and `batch_dependency=k`, where k is any nonnegative integer. We use early stopping so that the final accuracy numbers are reported with a fairly well converged model. Additional contributions to improve the validation accuracy are welcome, and hence hopefully also improving the test accuracy. ### layer_dependency Enabling this parameter by the command line option `--layer-dependency` makes it so that the random variates for sampling are identical across layers. This ensures that the same vertex gets the same neighborhood in each layer. ### batch_dependency This method is proposed in Section 3.2 of [https://arxiv.org/pdf/2310.12403](Cooperative Minibatching in Graph Neural Networks), it is denoted as kappa in the paper. It makes the random variates used across minibatches dependent, thus increasing temporal locality. When used with a cache, the increase in the temporal locality can be observed by monitoring the drop in the cache miss rate with higher values of the batch dependency parameter, speeding up embedding transfers to the GPU. ### Performance Use the `--torch-compile` option for best performance. If your GPU has spare memory, consider using `--mode=cuda-cuda-cuda` to move the whole dataset to the GPU. If not, consider using `--mode=cuda-pinned-cuda --num-gpu-cached-features=N` to keep the graph on the GPU and features in system RAM with `N` of the node features cached on the GPU. If you can not even fit the graph on the GPU, then consider using `--mode=pinned-pinned-cuda --num-gpu-cached-features=N`. Finally, you can use `--mode=cpu-pinned=cuda --num-gpu-cached-features=N` to perform the sampling operation on the CPU. ### Examples We use `--num-gpu-cached-features=500000` to cache the 500k of the node embeddings for the `ogbn-products` dataset (default). Check the command line arguments to see which other datasets can be run. When running with the yelp dataset, using `--dropout=0` gives better final validation and test accuracy. Example run with batch_dependency=1, cache miss rate is 62%: ```bash python node_classification.py --num-gpu-cached-features=500000 --batch-dependency=1 Training in pinned-pinned-cuda mode. Loading data... The dataset is already preprocessed. Training: 192it [00:03, 50.95it/s, num_nodes=247243, cache_miss=0.619] Evaluating: 39it [00:00, 76.01it/s, num_nodes=137466, cache_miss=0.621] Epoch 00, Loss: 1.1161, Approx. Train: 0.7024, Approx. Val: 0.8612, Time: 3.7688188552856445s ``` Example run with batch_dependency=32, cache miss rate is 22%: ```bash python node_classification.py --num-gpu-cached-features=500000 --batch-dependency=32 Training in pinned-pinned-cuda mode. Loading data... The dataset is already preprocessed. Training: 192it [00:03, 54.34it/s, num_nodes=250479, cache_miss=0.221] Evaluating: 39it [00:00, 84.66it/s, num_nodes=135142, cache_miss=0.226] Epoch 00, Loss: 1.1288, Approx. Train: 0.6993, Approx. Val: 0.8607, Time: 3.5339605808258057s ``` Example run with layer_dependency=True, # sampled nodes is 190k vs 250k without this option: ```bash python node_classification.py --num-gpu-cached-features=500000 --layer-dependency Training in pinned-pinned-cuda mode. Loading data... The dataset is already preprocessed. Training: 192it [00:03, 54.03it/s, num_nodes=191259, cache_miss=0.626] Evaluating: 39it [00:00, 79.49it/s, num_nodes=108720, cache_miss=0.627] Epoch 00, Loss: 1.1495, Approx. Train: 0.6932, Approx. Val: 0.8586, Time: 3.5540308952331543s ``` Example run with the original GraphSAGE sampler (Neighbor Sampler), # sampled nodes is 520k, more than 2x higher than Labor sampler. ```bash python node_classification.py --num-gpu-cached-features=500000 --sample-mode=sample_neighbor Training in pinned-pinned-cuda mode. Loading data... The dataset is already preprocessed. Training: 192it [00:04, 45.60it/s, num_nodes=517522, cache_miss=0.563] Evaluating: 39it [00:00, 77.53it/s, num_nodes=255686, cache_miss=0.565] Epoch 00, Loss: 1.1152, Approx. Train: 0.7015, Approx. Val: 0.8652, Time: 4.211000919342041s ``` ================================================ FILE: examples/graphbolt/pyg/labor/load_dataset.py ================================================ import dgl.graphbolt as gb def load_dgl(name): from dgl.data import ( CiteseerGraphDataset, CoraGraphDataset, FlickrDataset, PubmedGraphDataset, RedditDataset, YelpDataset, ) d = { "cora": CoraGraphDataset, "citeseer": CiteseerGraphDataset, "pubmed": PubmedGraphDataset, "reddit": RedditDataset, "yelp": YelpDataset, "flickr": FlickrDataset, } dataset = gb.LegacyDataset(d[name]()) new_feature = gb.TorchBasedFeatureStore([]) new_feature._features = dataset.feature._features dataset._feature = new_feature multilabel = name in ["yelp"] return dataset, multilabel def load_dataset(dataset_name, disk_based_feature_keys=None): multilabel = False if dataset_name in [ "reddit", "cora", "citeseer", "pubmed", "yelp", "flickr", ]: dataset, multilabel = load_dgl(dataset_name) else: if "mag240M" in dataset_name: dataset_name = "ogb-lsc-mag240m" dataset = gb.BuiltinDataset(dataset_name) if disk_based_feature_keys is None: disk_based_feature_keys = set() for feature in dataset.yaml_data["feature_data"]: feature_key = (feature["domain"], feature["type"], feature["name"]) # Set the in_memory setting to False without modifying YAML file. if feature_key in disk_based_feature_keys: feature["in_memory"] = False dataset = dataset.load() return dataset, multilabel ================================================ FILE: examples/graphbolt/pyg/labor/node_classification.py ================================================ import argparse import time from copy import deepcopy from functools import partial import dgl.graphbolt as gb import torch # For torch.compile until https://github.com/pytorch/pytorch/issues/121197 is # resolved. import torch._inductor.codecache torch._dynamo.config.cache_size_limit = 32 import torch.nn as nn import torchmetrics.functional as MF from load_dataset import load_dataset from sage_conv import SAGEConv as CustomSAGEConv from torch_geometric.nn import SAGEConv from tqdm import tqdm def accuracy(out, labels): assert out.ndim == 2 assert out.size(0) == labels.size(0) assert labels.ndim == 1 or (labels.ndim == 2 and labels.size(1) == 1) labels = labels.flatten() predictions = torch.argmax(out, 1) return (labels == predictions).sum(dtype=torch.float64) / labels.size(0) class GraphSAGE(torch.nn.Module): def __init__( self, in_size, hidden_size, out_size, n_layers, dropout, variant ): super().__init__() assert variant in ["original", "custom"] self.layers = torch.nn.ModuleList() if variant == "custom": sizes = [in_size] + [hidden_size] * n_layers for i in range(n_layers): self.layers.append(CustomSAGEConv(sizes[i], sizes[i + 1])) self.linear = nn.Linear(hidden_size, out_size) self.activation = nn.GELU() else: sizes = [in_size] + [hidden_size] * (n_layers - 1) + [out_size] for i in range(n_layers): self.layers.append(SAGEConv(sizes[i], sizes[i + 1])) self.activation = nn.ReLU() self.dropout = nn.Dropout(dropout) self.hidden_size = hidden_size self.out_size = out_size self.variant = variant def forward(self, subgraphs, x): h = x for i, (layer, subgraph) in enumerate(zip(self.layers, subgraphs)): h, edge_index, size = subgraph.to_pyg(h) h = layer(h, edge_index, size=size) if self.variant == "custom": h = self.activation(h) h = self.dropout(h) elif i != len(subgraphs) - 1: h = self.activation(h) return self.linear(h) if self.variant == "custom" else h def inference(self, graph, features, dataloader, storage_device): """Conduct layer-wise inference to get all the node embeddings.""" pin_memory = storage_device == "pinned" buffer_device = torch.device("cpu" if pin_memory else storage_device) for layer_idx, layer in enumerate(self.layers): is_last_layer = layer_idx == len(self.layers) - 1 y = torch.empty( graph.total_num_nodes, self.out_size if is_last_layer else self.hidden_size, dtype=torch.float32, device=buffer_device, pin_memory=pin_memory, ) for data in tqdm(dataloader, "Inferencing"): # len(data.sampled_subgraphs) = 1 h, edge_index, size = data.sampled_subgraphs[0].to_pyg( data.node_features["feat"] ) hidden_x = layer(h, edge_index, size=size) if self.variant == "custom": hidden_x = self.activation(hidden_x) if is_last_layer: hidden_x = self.linear(hidden_x) elif not is_last_layer: hidden_x = self.activation(hidden_x) # By design, our output nodes are contiguous. y[data.seeds[0] : data.seeds[-1] + 1] = hidden_x.to( buffer_device ) if not is_last_layer: features.update("node", None, "feat", y) return y def create_dataloader( graph, features, itemset, batch_size, fanout, device, job ): # Initialize an ItemSampler to sample mini-batches from the dataset. datapipe = gb.ItemSampler( itemset, batch_size=batch_size, shuffle=(job == "train"), drop_last=(job == "train"), ) need_copy = True # Copy the data to the specified device. if args.graph_device != "cpu" and need_copy: datapipe = datapipe.copy_to(device=device) need_copy = False # Sample neighbors for each node in the mini-batch. kwargs = ( { "layer_dependency": args.layer_dependency, "batch_dependency": args.batch_dependency, } if args.sample_mode == "sample_layer_neighbor" else {} ) datapipe = getattr(datapipe, args.sample_mode)( graph, fanout if job != "infer" else [-1], overlap_fetch=args.overlap_graph_fetch, asynchronous=args.graph_device != "cpu", **kwargs, ) # Copy the data to the specified device. if args.feature_device != "cpu" and need_copy: datapipe = datapipe.copy_to(device=device) need_copy = False # Fetch node features for the sampled subgraph. datapipe = datapipe.fetch_feature( features, node_feature_keys=["feat"], overlap_fetch=args.overlap_feature_fetch, ) # Copy the data to the specified device. if need_copy: datapipe = datapipe.copy_to(device=device) # Create and return a DataLoader to handle data loading. return gb.DataLoader(datapipe, num_workers=args.num_workers) @torch.compile def train_step(minibatch, optimizer, model, loss_fn, multilabel, eval_fn): node_features = minibatch.node_features["feat"] labels = minibatch.labels optimizer.zero_grad() out = model(minibatch.sampled_subgraphs, node_features) label_dtype = out.dtype if multilabel else None loss = loss_fn(out, labels.to(label_dtype)) num_correct = eval_fn(out, labels) * labels.size(0) loss.backward() optimizer.step() return loss.detach(), num_correct, labels.size(0) def train_helper( dataloader, model, optimizer, loss_fn, multilabel, eval_fn, gpu_cache_miss_rate_fn, cpu_cache_miss_rate_fn, device, ): model.train() # Set the model to training mode total_loss = torch.zeros(1, device=device) # Accumulator for the total loss # Accumulator for the total number of correct predictions total_correct = torch.zeros(1, dtype=torch.float64, device=device) total_samples = 0 # Accumulator for the total number of samples processed num_batches = 0 # Counter for the number of mini-batches processed start = time.time() dataloader = tqdm(dataloader, "Training") for step, minibatch in enumerate(dataloader): loss, num_correct, num_samples = train_step( minibatch, optimizer, model, loss_fn, multilabel, eval_fn ) total_loss += loss total_correct += num_correct total_samples += num_samples num_batches += 1 if step % 25 == 0: # log every 25 steps for performance. dataloader.set_postfix( { "num_nodes": minibatch.node_ids().size(0), "gpu_cache_miss": gpu_cache_miss_rate_fn(), "cpu_cache_miss": cpu_cache_miss_rate_fn(), } ) train_loss = total_loss / num_batches train_acc = total_correct / total_samples end = time.time() return train_loss, train_acc, end - start def train( train_dataloader, valid_dataloader, model, multilabel, eval_fn, gpu_cache_miss_rate_fn, cpu_cache_miss_rate_fn, device, ): optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) loss_fn = nn.BCEWithLogitsLoss() if multilabel else nn.CrossEntropyLoss() best_model = None best_model_acc = 0 best_model_epoch = -1 for epoch in range(args.epochs): train_loss, train_acc, duration = train_helper( train_dataloader, model, optimizer, loss_fn, multilabel, eval_fn, gpu_cache_miss_rate_fn, cpu_cache_miss_rate_fn, device, ) val_acc = evaluate( model, valid_dataloader, eval_fn, gpu_cache_miss_rate_fn, cpu_cache_miss_rate_fn, device, ) if val_acc > best_model_acc: best_model_acc = val_acc best_model = deepcopy(model.state_dict()) best_model_epoch = epoch print( f"Epoch {epoch:02d}, Loss: {train_loss.item():.4f}, " f"Approx. Train: {train_acc.item():.4f}, " f"Approx. Val: {val_acc.item():.4f}, " f"Time: {duration}s" ) if best_model_epoch + args.early_stopping_patience < epoch: break return best_model @torch.no_grad() def layerwise_infer( args, graph, features, itemsets, all_nodes_set, model, eval_fn, ): model.eval() dataloader = create_dataloader( graph=graph, features=features, itemset=all_nodes_set, batch_size=args.batch_size, fanout=[-1], device=args.device, job="infer", ) pred = model.inference(graph, features, dataloader, args.feature_device) metrics = {} for split_name, itemset in itemsets.items(): nid, labels = itemset[:] acc = eval_fn( pred[nid.to(pred.device)], labels.to(pred.device), ) metrics[split_name] = acc.item() return metrics @torch.compile def evaluate_step(minibatch, model, eval_fn): node_features = minibatch.node_features["feat"] labels = minibatch.labels out = model(minibatch.sampled_subgraphs, node_features) num_correct = eval_fn(out, labels) * labels.size(0) return num_correct, labels.size(0) @torch.no_grad() def evaluate( model, dataloader, eval_fn, gpu_cache_miss_rate_fn, cpu_cache_miss_rate_fn, device, ): model.eval() total_correct = torch.zeros(1, dtype=torch.float64, device=device) total_samples = 0 dataloader = tqdm(dataloader, "Evaluating") for step, minibatch in enumerate(dataloader): num_correct, num_samples = evaluate_step(minibatch, model, eval_fn) total_correct += num_correct total_samples += num_samples if step % 25 == 0: dataloader.set_postfix( { "num_nodes": minibatch.node_ids().size(0), "gpu_cache_miss": gpu_cache_miss_rate_fn(), "cpu_cache_miss": cpu_cache_miss_rate_fn(), } ) return total_correct / total_samples def parse_args(): parser = argparse.ArgumentParser( description="Which dataset are you going to use?" ) parser.add_argument( "--epochs", type=int, default=9999999, help="Number of training epochs." ) parser.add_argument( "--lr", type=float, default=0.001, help="Learning rate for optimization.", ) parser.add_argument("--num-hidden", type=int, default=256) parser.add_argument("--dropout", type=float, default=0.5) parser.add_argument( "--batch-size", type=int, default=1024, help="Batch size for training." ) parser.add_argument( "--num-workers", type=int, default=0, help="Number of workers for data loading.", ) parser.add_argument( "--dataset", type=str, default="ogbn-products", choices=[ "ogbn-arxiv", "ogbn-products", "ogbn-papers100M", "igb-hom-tiny", "igb-hom-small", "igb-hom-medium", "igb-hom-large", "igb-hom", "reddit", "yelp", "flickr", ], ) parser.add_argument( "--fanout", type=str, default="10,10,10", help="Fan-out of neighbor sampling. len(fanout) determines the number of" " GNN layers in your model. Default: 10,10,10", ) parser.add_argument( "--mode", default="pinned-pinned-cuda", choices=[ "cpu-cpu-cpu", "cpu-cpu-cuda", "cpu-pinned-cuda", "pinned-pinned-cuda", "cuda-pinned-cuda", "cuda-cuda-cuda", ], help="Graph storage - feature storage - Train device: 'cpu' for CPU and" " RAM, 'pinned' for pinned memory in RAM, 'cuda' for GPU and GPU memory.", ) parser.add_argument("--layer-dependency", action="store_true") parser.add_argument("--batch-dependency", type=int, default=1) parser.add_argument( "--cpu-feature-cache-policy", type=str, default=None, choices=["s3-fifo", "sieve", "lru", "clock"], help="The cache policy for the CPU feature cache.", ) parser.add_argument( "--num-cpu-cached-features", type=int, default=0, help="The capacity of the CPU cache, the number of features to store.", ) parser.add_argument( "--num-gpu-cached-features", type=int, default=0, help="The capacity of the GPU cache, the number of features to store.", ) parser.add_argument("--early-stopping-patience", type=int, default=25) parser.add_argument( "--sample-mode", default="sample_layer_neighbor", choices=["sample_neighbor", "sample_layer_neighbor"], help="The sampling function when doing layerwise sampling.", ) parser.add_argument( "--sage-model-variant", default="custom", choices=["custom", "original"], help="The custom SAGE GNN model provides higher accuracy with lower" " runtime performance.", ) parser.add_argument("--precision", type=str, default="high") return parser.parse_args() def main(): torch.set_float32_matmul_precision(args.precision) if not torch.cuda.is_available(): args.mode = "cpu-cpu-cpu" print(f"Training in {args.mode} mode.") args.graph_device, args.feature_device, args.device = args.mode.split("-") args.overlap_feature_fetch = args.feature_device == "pinned" args.overlap_graph_fetch = args.graph_device == "pinned" # Load and preprocess dataset. print("Loading data...") disk_based_feature_keys = None if args.num_cpu_cached_features > 0: disk_based_feature_keys = [("node", None, "feat")] dataset, multilabel = load_dataset(args.dataset, disk_based_feature_keys) # Move the dataset to the selected storage. graph = ( dataset.graph.pin_memory_() if args.graph_device == "pinned" else dataset.graph.to(args.graph_device) ) features = ( dataset.feature.pin_memory_() if args.feature_device == "pinned" else dataset.feature.to(args.feature_device) ) train_set = dataset.tasks[0].train_set valid_set = dataset.tasks[0].validation_set test_set = dataset.tasks[0].test_set all_nodes_set = dataset.all_nodes_set args.fanout = list(map(int, args.fanout.split(","))) num_classes = dataset.tasks[0].metadata["num_classes"] feature_index_device = ( args.feature_device if args.feature_device != "pinned" else None ) feature_num_bytes = ( features[("node", None, "feat")] # Read a single row to query its size in bytes. .read(torch.zeros(1, device=feature_index_device).long()).nbytes ) if args.num_cpu_cached_features > 0 and isinstance( features[("node", None, "feat")], gb.DiskBasedFeature ): features[("node", None, "feat")] = gb.cpu_cached_feature( features[("node", None, "feat")], args.num_cpu_cached_features * feature_num_bytes, args.cpu_feature_cache_policy, args.feature_device == "pinned", ) cpu_cached_feature = features[("node", None, "feat")] cpu_cache_miss_rate_fn = lambda: cpu_cached_feature.miss_rate else: cpu_cache_miss_rate_fn = lambda: 1 if args.num_gpu_cached_features > 0 and args.feature_device != "cuda": features[("node", None, "feat")] = gb.gpu_cached_feature( features[("node", None, "feat")], args.num_gpu_cached_features * feature_num_bytes, ) gpu_cached_feature = features[("node", None, "feat")] gpu_cache_miss_rate_fn = lambda: gpu_cached_feature.miss_rate else: gpu_cache_miss_rate_fn = lambda: 1 train_dataloader, valid_dataloader = ( create_dataloader( graph=graph, features=features, itemset=itemset, batch_size=args.batch_size, fanout=args.fanout, device=args.device, job=job, ) for itemset, job in zip([train_set, valid_set], ["train", "evaluate"]) ) in_channels = features.size("node", None, "feat")[0] model = GraphSAGE( in_channels, args.num_hidden, num_classes, len(args.fanout), args.dropout, args.sage_model_variant, ).to(args.device) assert len(args.fanout) == len(model.layers) eval_fn = ( partial( # TODO @mfbalin: Find an implementation that does not synchronize. MF.f1_score, task="multilabel", num_labels=num_classes, validate_args=False, ) if multilabel else accuracy ) best_model = train( train_dataloader, valid_dataloader, model, multilabel, eval_fn, gpu_cache_miss_rate_fn, cpu_cache_miss_rate_fn, args.device, ) model.load_state_dict(best_model) # Test the model. print("Testing...") itemsets = {"train": train_set, "val": valid_set, "test": test_set} final_acc = layerwise_infer( args, graph, features, itemsets, all_nodes_set, model, eval_fn, ) print("Final accuracy values:") print(final_acc) if __name__ == "__main__": args = parse_args() main() ================================================ FILE: examples/graphbolt/pyg/labor/sage_conv.py ================================================ from typing import List, Optional, Tuple, Union import torch.nn.functional as F from torch import Tensor from torch_geometric.nn.aggr import Aggregation, MultiAggregation from torch_geometric.nn.conv import MessagePassing from torch_geometric.nn.dense.linear import Linear from torch_geometric.typing import Adj, OptPairTensor, Size, SparseTensor from torch_geometric.utils import spmm class SAGEConv(MessagePassing): r"""A variant of the GraphSAGE operator from the `"Inductive Representation Learning on Large Graphs" `_ paper. .. math:: \mathbf{x}^{\prime}_i = \mathbf{W}_1 \mathbf{x}_i + \mathbf{W}_2 \cdot \mathrm{mean}_{j \in \mathcal{N(i)}} \mathbf{x}_j If :obj:`project = True`, then :math:`\mathbf{x}_j` will first get projected via .. math:: \mathbf{x}_j \leftarrow \sigma ( \mathbf{W}_3 \mathbf{x}_j + \mathbf{b}) as described in Eq. (3) of the paper. Args: in_channels (int or tuple): Size of each input sample, or :obj:`-1` to derive the size from the first input(s) to the forward method. A tuple corresponds to the sizes of source and target dimensionalities. out_channels (int): Size of each output sample. aggr (str or Aggregation, optional): The aggregation scheme to use. Any aggregation of :obj:`torch_geometric.nn.aggr` can be used, *e.g.*, :obj:`"mean"`, :obj:`"max"`, or :obj:`"lstm"`. (default: :obj:`"mean"`) project (bool, optional): If set to :obj:`True`, the layer will apply a linear transformation followed by an activation function before aggregation (as described in Eq. (3) of the paper). (default: :obj:`True`) bias (bool, optional): If set to :obj:`False`, the layer will not learn an additive bias. (default: :obj:`True`) **kwargs (optional): Additional arguments of :class:`torch_geometric.nn.conv.MessagePassing`. Shapes: - **inputs:** node features :math:`(|\mathcal{V}|, F_{in})` or :math:`((|\mathcal{V_s}|, F_{s}), (|\mathcal{V_t}|, F_{t}))` if bipartite, edge indices :math:`(2, |\mathcal{E}|)` - **outputs:** node features :math:`(|\mathcal{V}|, F_{out})` or :math:`(|\mathcal{V_t}|, F_{out})` if bipartite """ def __init__( self, in_channels: Union[int, Tuple[int, int]], out_channels: int, aggr: Optional[Union[str, List[str], Aggregation]] = "mean", project: bool = True, bias: bool = True, **kwargs, ): self.in_channels = in_channels self.out_channels = out_channels self.project = project if isinstance(in_channels, int): in_channels = (in_channels, in_channels) if aggr == "lstm": kwargs.setdefault("aggr_kwargs", {}) kwargs["aggr_kwargs"].setdefault("in_channels", in_channels[0]) kwargs["aggr_kwargs"].setdefault("out_channels", in_channels[0]) super().__init__(aggr, **kwargs) if self.project: if in_channels[0] <= 0: raise ValueError( f"'{self.__class__.__name__}' does not " f"support lazy initialization with " f"`project=True`" ) self.lin = Linear(in_channels[0], in_channels[0], bias=True) if isinstance(self.aggr_module, MultiAggregation): aggr_out_channels = self.aggr_module.get_out_channels( in_channels[0] ) else: aggr_out_channels = in_channels[0] self.lin_l = Linear(aggr_out_channels, out_channels, bias=bias) self.lin_r = Linear(in_channels[1], out_channels, bias=False) self.reset_parameters() def reset_parameters(self): super().reset_parameters() if self.project: self.lin.reset_parameters() self.lin_l.reset_parameters() self.lin_r.reset_parameters() def forward( self, x: Union[Tensor, OptPairTensor], edge_index: Adj, size: Size = None, ) -> Tensor: if isinstance(x, Tensor): x = (x, x) if self.project and hasattr(self, "lin"): x = (F.gelu(self.lin(x[0])), x[1]) # propagate_type: (x: OptPairTensor) AX = self.propagate(edge_index, x=x, size=size) out = self.lin_l(AX) x_r = x[1] if x_r is not None: out = out + self.lin_r(x_r) return out def message(self, x_j: Tensor) -> Tensor: return x_j def message_and_aggregate(self, adj_t: Adj, x: OptPairTensor) -> Tensor: if isinstance(adj_t, SparseTensor): adj_t = adj_t.set_value(None, layout=None) return spmm(adj_t, x[0], reduce=self.aggr) def __repr__(self) -> str: return ( f"{self.__class__.__name__}({self.in_channels}, " f"{self.out_channels}, aggr={self.aggr})" ) ================================================ FILE: examples/graphbolt/pyg/link_prediction.py ================================================ """ This script trains and tests a GraphSAGE model for link prediction on large graphs using graphbolt dataloader. It is the PyG counterpart of the example in `examples/graphbolt/link_prediction.py`. Paper: [Inductive Representation Learning on Large Graphs] (https://arxiv.org/abs/1706.02216) While node classification predicts labels for nodes based on their local neighborhoods, link prediction assesses the likelihood of an edge existing between two nodes, necessitating different sampling strategies that account for pairs of nodes and their joint neighborhoods. This flowchart describes the main functional sequence of the provided example. main │ ├───> OnDiskDataset pre-processing │ ├───> Instantiate SAGE model │ ├───> train │ │ │ ├───> Get graphbolt dataloader (HIGHLIGHT) | | | |───> Define a PyG GNN model for link prediction (HIGHLIGHT) │ │ │ └───> Training loop │ │ │ ├───> SAGE.forward │ └───> Validation and test set evaluation """ import argparse import time from functools import partial import dgl.graphbolt as gb import torch # For torch.compile until https://github.com/pytorch/pytorch/issues/121197 is # resolved. import torch._inductor.codecache torch._dynamo.config.cache_size_limit = 32 import torch.nn.functional as F from torch_geometric.nn import SAGEConv from torchmetrics.retrieval import RetrievalMRR from tqdm import tqdm, trange class GraphSAGE(torch.nn.Module): ##################################################################### # (HIGHLIGHT) Define the GraphSAGE model architecture. # # - This class inherits from `torch.nn.Module`. # - Two convolutional layers are created using the SAGEConv class from PyG. # - The forward method defines the computation performed at every call. ##################################################################### def __init__(self, in_size, hidden_size, n_layers): super(GraphSAGE, self).__init__() self.layers = torch.nn.ModuleList() sizes = [in_size] + [hidden_size] * n_layers for i in range(n_layers): self.layers.append(SAGEConv(sizes[i], sizes[i + 1])) self.hidden_size = hidden_size self.predictor = torch.nn.Sequential( torch.nn.Linear(hidden_size, hidden_size), torch.nn.ReLU(), torch.nn.Linear(hidden_size, hidden_size), torch.nn.ReLU(), torch.nn.Linear(hidden_size, 1), ) def forward(self, subgraphs, x): h = x for i, (layer, subgraph) in enumerate(zip(self.layers, subgraphs)): ##################################################################### # (HIGHLIGHT) Convert given features to be consumed by a PyG layer. # # PyG layers have two modes, bipartite and normal. We slice the # given features to get src and dst features to use the PyG layers # in the more efficient bipartite mode. ##################################################################### h, edge_index, size = subgraph.to_pyg(h) h = layer(h, edge_index, size=size) if i != len(subgraphs) - 1: h = F.relu(h) return h def inference(self, graph, features, dataloader, storage_device): """Conduct layer-wise inference to get all the node embeddings.""" pin_memory = storage_device == "pinned" buffer_device = torch.device("cpu" if pin_memory else storage_device) for layer_idx, layer in enumerate(self.layers): is_last_layer = layer_idx == len(self.layers) - 1 y = torch.empty( graph.total_num_nodes, self.hidden_size, dtype=torch.float32, device=buffer_device, pin_memory=pin_memory, ) for data in tqdm(dataloader, "Inferencing"): # len(data.sampled_subgraphs) = 1 h, edge_index, size = data.sampled_subgraphs[0].to_pyg( data.node_features["feat"] ) hidden_x = layer(h, edge_index, size=size) if not is_last_layer: hidden_x = F.relu(hidden_x) # By design, our output nodes are contiguous. y[data.seeds[0] : data.seeds[-1] + 1] = hidden_x.to( buffer_device ) if not is_last_layer: features.update("node", None, "feat", y) return y def create_dataloader( graph, features, itemset, batch_size, fanout, device, job ): ##################################################################### # (HIGHLIGHT) Create a data loader for efficiently loading graph data. # # - 'ItemSampler' samples mini-batches of node IDs from the dataset. # - 'CopyTo' copies the fetched data to the specified device. # - 'sample_neighbor' performs neighbor sampling on the graph. # - 'FeatureFetcher' fetches node features based on the sampled subgraph. ##################################################################### # Create a datapipe for mini-batch sampling with a specific neighbor fanout. # Here, [10, 10, 10] specifies the number of neighbors sampled for each node at each layer. # We're using `sample_neighbor` for consistency with DGL's sampling API. # Note: GraphBolt offers additional sampling methods, such as `sample_layer_neighbor`, # which could provide further optimization and efficiency for GNN training. # Users are encouraged to explore these advanced features for potentially improved performance. # Initialize an ItemSampler to sample mini-batches from the dataset. datapipe = gb.ItemSampler( itemset, batch_size=batch_size, shuffle=(job == "train"), drop_last=(job == "train"), ) need_copy = True # Copy the data to the specified device. if args.graph_device != "cpu" and need_copy: datapipe = datapipe.copy_to(device=device) need_copy = False # Sample negative edges. if job == "train": datapipe = datapipe.sample_uniform_negative(graph, args.neg_ratio) # Sample neighbors for each node in the mini-batch. datapipe = getattr(datapipe, args.sample_mode)( graph, fanout if job != "infer" else [-1], overlap_fetch=args.overlap_graph_fetch, asynchronous=args.graph_device != "cpu", ) if job == "train" and args.exclude_edges: datapipe = datapipe.exclude_seed_edges( include_reverse_edges=True, asynchronous=args.graph_device != "cpu", ) # Copy the data to the specified device. if args.feature_device != "cpu" and need_copy: datapipe = datapipe.copy_to(device=device) need_copy = False # Fetch node features for the sampled subgraph. datapipe = datapipe.fetch_feature( features, node_feature_keys=["feat"], overlap_fetch=args.overlap_feature_fetch, ) # Copy the data to the specified device. if need_copy: datapipe = datapipe.copy_to(device=device) # Create and return a DataLoader to handle data loading. return gb.DataLoader(datapipe, num_workers=args.num_workers) @torch.compile def predictions_step(model, h_src, h_dst): return model.predictor(h_src * h_dst).squeeze() def compute_predictions(model, node_emb, seeds, device): """Compute the predictions for given source and destination nodes. This function computes the predictions for a set of node pairs, dividing the task into batches to handle potentially large graphs. """ preds = torch.empty(seeds.shape[0], device=device) seeds_src, seeds_dst = seeds.T # The constant number is 1001, due to negtive ratio in the `ogbl-citation2` # dataset is 1000. eval_size = args.eval_batch_size * 1001 # Loop over node pairs in batches. for start in trange(0, seeds_src.shape[0], eval_size, desc="Evaluate"): end = min(start + eval_size, seeds_src.shape[0]) # Fetch embeddings for current batch of source and destination nodes. h_src = node_emb[seeds_src[start:end]].to(device, non_blocking=True) h_dst = node_emb[seeds_dst[start:end]].to(device, non_blocking=True) # Compute prediction scores using the model. preds[start:end] = predictions_step(model, h_src, h_dst) return preds @torch.no_grad() def evaluate(model, graph, features, all_nodes_set, valid_set, test_set): """Evaluate the model on validation and test sets.""" model.eval() dataloader = create_dataloader( graph, features, all_nodes_set, args.eval_batch_size, [-1], args.device, job="infer", ) # Compute node embeddings for the entire graph. node_emb = model.inference(graph, features, dataloader, args.feature_device) results = [] # Loop over both validation and test sets. for split in [valid_set, test_set]: # Unpack the item set. seeds = split._items[0].to(node_emb.device) labels = split._items[1].to(node_emb.device) indexes = split._items[2].to(node_emb.device) preds = compute_predictions(model, node_emb, seeds, indexes.device) # Compute MRR values for the current split. results.append(RetrievalMRR()(preds, labels, indexes)) return results @torch.compile def train_step(minibatch, optimizer, model): node_features = minibatch.node_features["feat"] compacted_seeds = minibatch.compacted_seeds.T labels = minibatch.labels optimizer.zero_grad() y = model(minibatch.sampled_subgraphs, node_features) logits = model.predictor( y[compacted_seeds[0]] * y[compacted_seeds[1]] ).squeeze() loss = F.binary_cross_entropy_with_logits(logits, labels) loss.backward() optimizer.step() return loss.detach(), labels.size(0) def train_helper(dataloader, model, optimizer, device): model.train() # Set the model to training mode total_loss = torch.zeros(1, device=device) # Accumulator for the total loss total_samples = 0 # Accumulator for the total number of samples processed start = time.time() for step, minibatch in tqdm(enumerate(dataloader), "Training"): loss, num_samples = train_step(minibatch, optimizer, model) total_loss += loss * num_samples total_samples += num_samples if step + 1 == args.early_stop: break train_loss = total_loss / total_samples end = time.time() return train_loss, end - start def train(dataloader, model, device): ##################################################################### # (HIGHLIGHT) Train the model for one epoch. # # - Iterates over the data loader, fetching mini-batches of graph data. # - For each mini-batch, it performs a forward pass, computes loss, and # updates the model parameters. # - The function returns the average loss and accuracy for the epoch. # # Parameters: # dataloader: DataLoader that provides mini-batches of graph data. # model: The GraphSAGE model. # device: The device (CPU/GPU) to run the training on. ##################################################################### optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) for epoch in range(args.epochs): train_loss, duration = train_helper( dataloader, model, optimizer, device ) print( f"Epoch {epoch:02d}, Loss: {train_loss.item():.4f}, " f"Time: {duration}s" ) def parse_args(): parser = argparse.ArgumentParser( description="Which dataset are you going to use?" ) parser.add_argument( "--epochs", type=int, default=10, help="Number of training epochs." ) parser.add_argument( "--lr", type=float, default=0.003, help="Learning rate for optimization.", ) parser.add_argument("--neg-ratio", type=int, default=1) parser.add_argument("--train-batch-size", type=int, default=512) parser.add_argument("--eval-batch-size", type=int, default=1024) parser.add_argument( "--batch-size", type=int, default=1024, help="Batch size for training." ) parser.add_argument( "--num-workers", type=int, default=0, help="Number of workers for data loading.", ) parser.add_argument( "--early-stop", type=int, default=0, help="0 means no early stop, otherwise stop at the input-th step", ) parser.add_argument( "--dataset", type=str, default="ogbl-citation2", choices=["ogbl-citation2"], help="The dataset we can use for link prediction. Currently" " only ogbl-citation2 dataset is supported.", ) parser.add_argument( "--fanout", type=str, default="10,10,10", help="Fan-out of neighbor sampling. It is IMPORTANT to keep len(fanout)" " identical with the number of layers in your model. Default: 10,10,10", ) parser.add_argument( "--exclude-edges", type=bool, default=True, help="Whether to exclude reverse edges during sampling. Default: True", ) parser.add_argument( "--mode", default="pinned-pinned-cuda", choices=[ "cpu-cpu-cpu", "cpu-cpu-cuda", "cpu-pinned-cuda", "pinned-pinned-cuda", "cuda-pinned-cuda", "cuda-cuda-cuda", ], help="Graph storage - feature storage - Train device: 'cpu' for CPU and RAM," " 'pinned' for pinned memory in RAM, 'cuda' for GPU and GPU memory.", ) parser.add_argument( "--gpu-cache-size", type=int, default=0, help="The capacity of the GPU cache in bytes.", ) parser.add_argument( "--sample-mode", default="sample_neighbor", choices=["sample_neighbor", "sample_layer_neighbor"], help="The sampling function when doing layerwise sampling.", ) parser.add_argument("--precision", type=str, default="high") return parser.parse_args() def main(): torch.set_float32_matmul_precision(args.precision) if not torch.cuda.is_available(): args.mode = "cpu-cpu-cpu" print(f"Training in {args.mode} mode.") args.graph_device, args.feature_device, args.device = args.mode.split("-") args.overlap_feature_fetch = args.feature_device == "pinned" args.overlap_graph_fetch = args.graph_device == "pinned" # Load and preprocess dataset. print("Loading data...") dataset = gb.BuiltinDataset(args.dataset).load() # Move the dataset to the selected storage. graph = ( dataset.graph.pin_memory_() if args.graph_device == "pinned" else dataset.graph.to(args.graph_device) ) features = ( dataset.feature.pin_memory_() if args.feature_device == "pinned" else dataset.feature.to(args.feature_device) ) train_set = dataset.tasks[0].train_set valid_set = dataset.tasks[0].validation_set test_set = dataset.tasks[0].test_set all_nodes_set = dataset.all_nodes_set args.fanout = list(map(int, args.fanout.split(","))) if args.gpu_cache_size > 0 and args.feature_device != "cuda": features._features[("node", None, "feat")] = gb.gpu_cached_feature( features._features[("node", None, "feat")], args.gpu_cache_size, ) train_dataloader = create_dataloader( graph=graph, features=features, itemset=train_set, batch_size=args.train_batch_size, fanout=args.fanout, device=args.device, job="train", ) in_channels = features.size("node", None, "feat")[0] hidden_channels = 256 model = GraphSAGE(in_channels, hidden_channels, len(args.fanout)).to( args.device ) assert len(args.fanout) == len(model.layers) train(train_dataloader, model, args.device) # Test the model. print("Testing...") valid_mrr, test_mrr = evaluate( model, graph, features, all_nodes_set, valid_set, test_set, ) print( f"Validation MRR {valid_mrr.item():.4f}, Test MRR {test_mrr.item():.4f}" ) if __name__ == "__main__": args = parse_args() main() ================================================ FILE: examples/graphbolt/pyg/multigpu/node_classification.py ================================================ """ This script demonstrates node classification with GraphSAGE on large graphs, merging GraphBolt (GB) and PyTorch Geometric (PyG). GraphBolt efficiently manages data loading for large datasets, crucial for mini-batch processing. Post data loading, PyG's user-friendly framework takes over for training, showcasing seamless integration with GraphBolt. This combination offers an efficient alternative to traditional Deep Graph Library (DGL) methods, highlighting adaptability and scalability in handling large-scale graph data for diverse real-world applications. Key Features: - Implements the GraphSAGE model, a scalable GNN, for node classification on large graphs. - Utilizes GraphBolt, an efficient framework for large-scale graph data processing. - Integrates with PyTorch Geometric for building and training the GraphSAGE model. - The script is well-documented, providing clear explanations at each step. This flowchart describes the main functional sequence of the provided example. main: main │ ├───> Load and preprocess dataset (GraphBolt) │ │ │ └───> Utilize GraphBolt's BuiltinDataset for dataset handling │ ├───> Instantiate the SAGE model (PyTorch Geometric) │ │ │ └───> Define the GraphSAGE model architecture │ ├───> Train the model │ │ │ ├───> Mini-Batch Processing with GraphBolt │ │ │ │ │ └───> Efficient handling of mini-batches using GraphBolt's utilities │ │ │ └───> Training Loop │ │ │ ├───> Forward and backward passes │ │ │ └───> Parameters optimization │ └───> Evaluate the model │ └───> Performance assessment on validation and test datasets │ └───> Accuracy and other relevant metrics calculation """ import argparse import os import time import dgl.graphbolt as gb import torch # For torch.compile until https://github.com/pytorch/pytorch/issues/121197 is # resolved. import torch._inductor.codecache torch._dynamo.config.cache_size_limit = 32 import torch.distributed as dist import torch.multiprocessing as mp import torch.nn.functional as F from torch_geometric.nn import SAGEConv from tqdm import tqdm def accuracy(out, labels): assert out.ndim == 2 assert out.size(0) == labels.size(0) assert labels.ndim == 1 or (labels.ndim == 2 and labels.size(1) == 1) labels = labels.flatten() predictions = torch.argmax(out, 1) return (labels == predictions).sum(dtype=torch.float64) / labels.size(0) class GraphSAGE(torch.nn.Module): ##################################################################### # (HIGHLIGHT) Define the GraphSAGE model architecture. # # - This class inherits from `torch.nn.Module`. # - Two convolutional layers are created using the SAGEConv class from PyG. # - 'in_size', 'hidden_size', 'out_size' are the sizes of # the input, hidden, and output features, respectively. # - The forward method defines the computation performed at every call. ##################################################################### def __init__(self, in_size, hidden_size, out_size, n_layers, cooperative): super(GraphSAGE, self).__init__() self.layers = torch.nn.ModuleList() sizes = [in_size] + [hidden_size] * (n_layers - 1) + [out_size] for i in range(n_layers): self.layers.append(SAGEConv(sizes[i], sizes[i + 1])) self.hidden_size = hidden_size self.out_size = out_size self.cooperative = cooperative def forward(self, minibatch, x): subgraphs = minibatch.sampled_subgraphs h = x for i, (layer, subgraph) in enumerate(zip(self.layers, subgraphs)): ##################################################################### # (HIGHLIGHT) Convert given features to be consumed by a PyG layer. # # PyG layers have two modes, bipartite and normal. We slice the # given features to get src and dst features to use the PyG layers # in the more efficient bipartite mode. ##################################################################### if i != 0 and self.cooperative: h = gb.CooperativeConvFunction.apply(subgraph, h) h, edge_index, size = subgraph.to_pyg(h) h = layer(h, edge_index, size=size) if i != len(subgraphs) - 1: h = F.relu(h) if self.cooperative: h = gb.CooperativeConvFunction.apply(minibatch, h) h = h[minibatch.compacted_seeds] return h def create_dataloader( args, graph, features, itemset, batch_size, fanout, device, job ): ##################################################################### # (HIGHLIGHT) Create a data loader for efficiently loading graph data. # # - 'ItemSampler' samples mini-batches of node IDs from the dataset. # - 'CopyTo' copies the fetched data to the specified device. # - 'sample_neighbor' performs neighbor sampling on the graph. # - 'FeatureFetcher' fetches node features based on the sampled subgraph. ##################################################################### # Create a datapipe for mini-batch sampling with a specific neighbor fanout. # Here, [10, 10, 10] specifies the number of neighbors sampled for each node at each layer. # We're using `sample_neighbor` for consistency with DGL's sampling API. # Note: GraphBolt offers additional sampling methods, such as `sample_layer_neighbor`, # which could provide further optimization and efficiency for GNN training. # Users are encouraged to explore these advanced features for potentially improved performance. # Initialize an ItemSampler to sample mini-batches from the dataset. datapipe = gb.DistributedItemSampler( itemset, batch_size=batch_size, shuffle=(job == "train"), drop_last=(job == "train"), drop_uneven_inputs=True, ) need_copy = True # Copy the data to the specified device. if args.graph_device != "cpu" and need_copy: datapipe = datapipe.copy_to(device=device) need_copy = False # Sample neighbors for each node in the mini-batch. datapipe = getattr(datapipe, args.sample_mode)( graph, fanout if job != "infer" else [-1], overlap_fetch=args.overlap_graph_fetch, num_gpu_cached_edges=args.num_gpu_cached_edges, gpu_cache_threshold=args.gpu_graph_caching_threshold, cooperative=args.cooperative, asynchronous=args.graph_device != "cpu", ) # Copy the data to the specified device. if args.feature_device != "cpu" and need_copy: datapipe = datapipe.copy_to(device=device) need_copy = False # Fetch node features for the sampled subgraph. datapipe = datapipe.fetch_feature( features, node_feature_keys=["feat"], overlap_fetch=args.overlap_feature_fetch, cooperative=args.cooperative, ) # Copy the data to the specified device. if need_copy: datapipe = datapipe.copy_to(device=device) # Create and return a DataLoader to handle data loading. return gb.DataLoader(datapipe, num_workers=args.num_workers) def weighted_reduce(tensor, weight, dst=0): ######################################################################## # (HIGHLIGHT) Collect accuracy and loss values from sub-processes and # obtain overall average values. # # `torch.distributed.reduce` is used to reduce tensors from all the # sub-processes to a specified process, ReduceOp.SUM is used by default. # # Because the GPUs may have differing numbers of processed items, we # perform a weighted mean to calculate the exact loss and accuracy. ######################################################################## dist.reduce(tensor=tensor, dst=dst) weight = torch.tensor(weight, device=tensor.device) dist.reduce(tensor=weight, dst=dst) return tensor / weight @torch.compile def train_step(minibatch, optimizer, model, loss_fn): node_features = minibatch.node_features["feat"] labels = minibatch.labels optimizer.zero_grad() out = model(minibatch, node_features) loss = loss_fn(out, labels) num_correct = accuracy(out, labels) * labels.size(0) loss.backward() optimizer.step() return loss.detach(), num_correct, labels.size(0) def train_helper(rank, dataloader, model, optimizer, loss_fn, device): model.train() # Set the model to training mode total_loss = torch.zeros(1, device=device) # Accumulator for the total loss # Accumulator for the total number of correct predictions total_correct = torch.zeros(1, dtype=torch.float64, device=device) total_samples = 0 # Accumulator for the total number of samples processed num_batches = 0 # Counter for the number of mini-batches processed start = time.time() for minibatch in tqdm(dataloader, "Training") if rank == 0 else dataloader: loss, num_correct, num_samples = train_step( minibatch, optimizer, model, loss_fn ) total_loss += loss total_correct += num_correct total_samples += num_samples num_batches += 1 train_loss = weighted_reduce(total_loss, num_batches) train_acc = weighted_reduce(total_correct, total_samples) end = time.time() return train_loss, train_acc, end - start def train(args, rank, train_dataloader, valid_dataloader, model, device): ##################################################################### # (HIGHLIGHT) Train the model for one epoch. # # - Iterates over the data loader, fetching mini-batches of graph data. # - For each mini-batch, it performs a forward pass, computes loss, and # updates the model parameters. # - The function returns the average loss and accuracy for the epoch. # # Parameters: # model: The GraphSAGE model. # dataloader: DataLoader that provides mini-batches of graph data. # optimizer: Optimizer used for updating model parameters. # loss_fn: Loss function used for training. # device: The device (CPU/GPU) to run the training on. ##################################################################### optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) loss_fn = torch.nn.CrossEntropyLoss() for epoch in range(args.epochs): train_loss, train_acc, duration = train_helper( rank, train_dataloader, model, optimizer, loss_fn, device, ) val_acc = evaluate(rank, model, valid_dataloader, device) if rank == 0: print( f"Epoch {epoch:02d}, Loss: {train_loss.item():.4f}, " f"Approx. Train: {train_acc.item():.4f}, " f"Approx. Val: {val_acc.item():.4f}, " f"Time: {duration}s" ) @torch.compile def evaluate_step(minibatch, model): node_features = minibatch.node_features["feat"] labels = minibatch.labels out = model(minibatch, node_features) num_correct = accuracy(out, labels) * labels.size(0) return num_correct, labels.size(0) @torch.no_grad() def evaluate(rank, model, dataloader, device): model.eval() total_correct = torch.zeros(1, dtype=torch.float64, device=device) total_samples = 0 for minibatch in ( tqdm(dataloader, "Evaluating") if rank == 0 else dataloader ): num_correct, num_samples = evaluate_step(minibatch, model) total_correct += num_correct total_samples += num_samples return weighted_reduce(total_correct, total_samples) def parse_args(): parser = argparse.ArgumentParser( description="Which dataset are you going to use?" ) parser.add_argument( "--epochs", type=int, default=10, help="Number of training epochs." ) parser.add_argument( "--lr", type=float, default=0.003, help="Learning rate for optimization.", ) parser.add_argument( "--batch-size", type=int, default=1024, help="Batch size for training." ) parser.add_argument( "--num-workers", type=int, default=0, help="Number of workers for data loading.", ) parser.add_argument( "--dataset", type=str, default="ogbn-products", choices=[ "ogbn-arxiv", "ogbn-products", "ogbn-papers100M", "igb-hom-tiny", "igb-hom-small", "igb-hom-medium", "igb-hom-large", "igb-hom", ], help="The dataset we can use for node classification example. Currently" " ogbn-products, ogbn-arxiv, ogbn-papers100M and" " igb-hom-[tiny|small|medium|large] and igb-hom datasets are supported.", ) parser.add_argument( "--fanout", type=str, default="10,10,10", help="Fan-out of neighbor sampling. It is IMPORTANT to keep len(fanout)" " identical with the number of layers in your model. Default: 10,10,10", ) parser.add_argument( "--mode", default="pinned-pinned-cuda", choices=[ "pinned-pinned-cuda", "cuda-pinned-cuda", "cuda-cuda-cuda", ], help="Graph storage - feature storage - Train device: 'cpu' for CPU and RAM," " 'pinned' for pinned memory in RAM, 'cuda' for GPU and GPU memory.", ) parser.add_argument( "--gpu-cache-size", type=int, default=0, help="The capacity of the GPU cache in bytes.", ) parser.add_argument( "--sample-mode", default="sample_neighbor", choices=["sample_neighbor", "sample_layer_neighbor"], help="The sampling function when doing layerwise sampling.", ) parser.add_argument( "--num-gpu-cached-edges", type=int, default=0, help="The number of edges to be cached from the graph on the GPU.", ) parser.add_argument( "--gpu-graph-caching-threshold", type=int, default=1, help="The number of accesses after which a vertex neighborhood will be cached.", ) parser.add_argument("--precision", type=str, default="medium") parser.add_argument( "--cooperative", action="store_true", help="Enables Cooperative Minibatching from arXiv:2310.12403.", ) return parser.parse_args() def run(rank, world_size, args, dataset): # Set up multiprocessing environment. torch.cuda.set_device(rank) dist.init_process_group( init_method="tcp://127.0.0.1:12345", rank=rank, world_size=world_size, ) print(f"Training in {args.mode} mode.") args.graph_device, args.feature_device, args.device = args.mode.split("-") args.overlap_feature_fetch = args.feature_device == "pinned" args.overlap_graph_fetch = args.graph_device == "pinned" # Move the dataset to the selected storage. graph = ( dataset.graph.pin_memory_() if args.graph_device == "pinned" else dataset.graph.to(args.graph_device) ) features = ( dataset.feature.pin_memory_() if args.feature_device == "pinned" else dataset.feature.to(args.feature_device) ) train_set = dataset.tasks[0].train_set valid_set = dataset.tasks[0].validation_set args.fanout = list(map(int, args.fanout.split(","))) num_classes = dataset.tasks[0].metadata["num_classes"] if args.gpu_cache_size > 0 and args.feature_device != "cuda": features._features[("node", None, "feat")] = gb.gpu_cached_feature( features._features[("node", None, "feat")], args.gpu_cache_size, ) train_dataloader, valid_dataloader = ( create_dataloader( args, graph=graph, features=features, itemset=itemset, batch_size=args.batch_size, fanout=args.fanout, device=args.device, job=job, ) for itemset, job in zip([train_set, valid_set], ["train", "evaluate"]) ) in_channels = features.size("node", None, "feat")[0] hidden_channels = 256 model = GraphSAGE( in_channels, hidden_channels, num_classes, len(args.fanout), args.cooperative, ).to(args.device) assert len(args.fanout) == len(model.layers) model = torch.nn.parallel.DistributedDataParallel(model) train(args, rank, train_dataloader, valid_dataloader, model, args.device) dist.destroy_process_group() if __name__ == "__main__": args = parse_args() if not torch.cuda.is_available(): print("Multi-GPU training requires GPUs.") exit(0) torch.set_float32_matmul_precision(args.precision) # Load and preprocess dataset. print("Loading data...") dataset = gb.BuiltinDataset(args.dataset).load() world_size = torch.cuda.device_count() # Thread limiting to avoid resource competition. os.environ["OMP_NUM_THREADS"] = str(mp.cpu_count() // 2 // world_size) mp.set_sharing_strategy("file_system") mp.spawn( run, args=(world_size, args, dataset), nprocs=world_size, join=True, ) ================================================ FILE: examples/graphbolt/pyg/node_classification.py ================================================ """ This script demonstrates node classification with GraphSAGE on large graphs, merging GraphBolt (GB) and PyTorch Geometric (PyG). GraphBolt efficiently manages data loading for large datasets, crucial for mini-batch processing. Post data loading, PyG's user-friendly framework takes over for training, showcasing seamless integration with GraphBolt. This combination offers an efficient alternative to traditional Deep Graph Library (DGL) methods, highlighting adaptability and scalability in handling large-scale graph data for diverse real-world applications. Key Features: - Implements the GraphSAGE model, a scalable GNN, for node classification on large graphs. - Utilizes GraphBolt, an efficient framework for large-scale graph data processing. - Integrates with PyTorch Geometric for building and training the GraphSAGE model. - The script is well-documented, providing clear explanations at each step. This flowchart describes the main functional sequence of the provided example. main: main │ ├───> Load and preprocess dataset (GraphBolt) │ │ │ └───> Utilize GraphBolt's BuiltinDataset for dataset handling │ ├───> Instantiate the SAGE model (PyTorch Geometric) │ │ │ └───> Define the GraphSAGE model architecture │ ├───> Train the model │ │ │ ├───> Mini-Batch Processing with GraphBolt │ │ │ │ │ └───> Efficient handling of mini-batches using GraphBolt's utilities │ │ │ └───> Training Loop │ │ │ ├───> Forward and backward passes │ │ │ ├───> Convert GraphBolt MiniBatch to PyG Data │ │ │ └───> Parameters optimization │ └───> Evaluate the model │ └───> Performance assessment on validation and test datasets │ └───> Accuracy and other relevant metrics calculation """ import argparse import dgl.graphbolt as gb import torch import torch.nn.functional as F import torchmetrics.functional as MF from torch_geometric.nn import SAGEConv from tqdm import tqdm class GraphSAGE(torch.nn.Module): ##################################################################### # (HIGHLIGHT) Define the GraphSAGE model architecture. # # - This class inherits from `torch.nn.Module`. # - Two convolutional layers are created using the SAGEConv class from PyG. # - 'in_size', 'hidden_size', 'out_size' are the sizes of # the input, hidden, and output features, respectively. # - The forward method defines the computation performed at every call. # - It's adopted from the official PyG example which can be found at # https://github.com/pyg-team/pytorch_geometric/blob/master/examples/ogbn_products_sage.py ##################################################################### def __init__(self, in_size, hidden_size, out_size): super(GraphSAGE, self).__init__() self.layers = torch.nn.ModuleList() self.layers.append(SAGEConv(in_size, hidden_size)) self.layers.append(SAGEConv(hidden_size, hidden_size)) self.layers.append(SAGEConv(hidden_size, out_size)) def forward(self, x, edge_index): for i, layer in enumerate(self.layers): x = layer(x, edge_index) if i != len(self.layers) - 1: x = x.relu() x = F.dropout(x, p=0.5, training=self.training) return x def inference(self, dataloader, x_all, device): """Conduct layer-wise inference to get all the node embeddings.""" for i, layer in tqdm(enumerate(self.layers), "inference"): xs = [] for minibatch in dataloader: # Call `to_pyg_data` to convert GB Minibatch to PyG Data. pyg_data = minibatch.to_pyg_data() n_id = pyg_data.n_id.to("cpu") x = x_all[n_id].to(device) edge_index = pyg_data.edge_index x = layer(x, edge_index) x = x[: pyg_data.batch_size] if i != len(self.layers) - 1: x = x.relu() xs.append(x.cpu()) x_all = torch.cat(xs, dim=0) return x_all def create_dataloader( dataset_set, graph, feature, batch_size, fanout, device, job ): # Initialize an ItemSampler to sample mini-batches from the dataset. datapipe = gb.ItemSampler( dataset_set, batch_size=batch_size, shuffle=(job == "train"), drop_last=(job == "train"), ) # Sample neighbors for each node in the mini-batch. datapipe = datapipe.sample_neighbor( graph, fanout if job != "infer" else [-1] ) # Copy the data to the specified device. datapipe = datapipe.copy_to(device=device) # Fetch node features for the sampled subgraph. datapipe = datapipe.fetch_feature(feature, node_feature_keys=["feat"]) # Create and return a DataLoader to handle data loading. dataloader = gb.DataLoader(datapipe, num_workers=0) return dataloader def train(model, dataloader, optimizer): model.train() # Set the model to training mode total_loss = 0 # Accumulator for the total loss total_correct = 0 # Accumulator for the total number of correct predictions total_samples = 0 # Accumulator for the total number of samples processed num_batches = 0 # Counter for the number of mini-batches processed for _, minibatch in tqdm(enumerate(dataloader), "training"): ##################################################################### # (HIGHLIGHT) Convert GraphBolt MiniBatch to PyG Data class. # # Call `MiniBatch.to_pyg_data()` and it will return a PyG Data class # with necessary data and information. ##################################################################### pyg_data = minibatch.to_pyg_data() optimizer.zero_grad() out = model(pyg_data.x, pyg_data.edge_index)[: pyg_data.y.shape[0]] y = pyg_data.y loss = F.cross_entropy(out, y) loss.backward() optimizer.step() total_loss += float(loss) total_correct += int(out.argmax(dim=-1).eq(y).sum()) total_samples += y.shape[0] num_batches += 1 avg_loss = total_loss / num_batches avg_accuracy = total_correct / total_samples return avg_loss, avg_accuracy @torch.no_grad() def evaluate(model, dataloader, num_classes): model.eval() y_hats = [] ys = [] for _, minibatch in tqdm(enumerate(dataloader), "evaluating"): pyg_data = minibatch.to_pyg_data() out = model(pyg_data.x, pyg_data.edge_index)[: pyg_data.y.shape[0]] y = pyg_data.y y_hats.append(out) ys.append(y) return MF.accuracy( torch.cat(y_hats), torch.cat(ys), task="multiclass", num_classes=num_classes, ) @torch.no_grad() def layerwise_infer( model, infer_dataloader, test_set, feature, num_classes, device ): model.eval() features = feature.read("node", None, "feat") pred = model.inference(infer_dataloader, features, device) pred = pred[test_set._items[0]] label = test_set._items[1].to(pred.device) return MF.accuracy( pred, label, task="multiclass", num_classes=num_classes, ) def main(): parser = argparse.ArgumentParser( description="Which dataset are you going to use?" ) parser.add_argument( "--dataset", type=str, default="ogbn-products", help='Name of the dataset to use (e.g., "ogbn-products", "ogbn-arxiv")', ) parser.add_argument( "--epochs", type=int, default=10, help="Number of training epochs." ) parser.add_argument( "--batch-size", type=int, default=1024, help="Batch size for training." ) args = parser.parse_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") dataset_name = args.dataset dataset = gb.BuiltinDataset(dataset_name).load() graph = dataset.graph feature = dataset.feature.pin_memory_() train_set = dataset.tasks[0].train_set valid_set = dataset.tasks[0].validation_set test_set = dataset.tasks[0].test_set all_nodes_set = dataset.all_nodes_set num_classes = dataset.tasks[0].metadata["num_classes"] train_dataloader = create_dataloader( train_set, graph, feature, args.batch_size, [5, 10, 15], device, job="train", ) valid_dataloader = create_dataloader( valid_set, graph, feature, args.batch_size, [5, 10, 15], device, job="evaluate", ) infer_dataloader = create_dataloader( all_nodes_set, graph, feature, 4 * args.batch_size, [-1], device, job="infer", ) in_channels = feature.size("node", None, "feat")[0] hidden_channels = 256 model = GraphSAGE(in_channels, hidden_channels, num_classes).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=0.003) for epoch in range(args.epochs): train_loss, train_accuracy = train(model, train_dataloader, optimizer) valid_accuracy = evaluate(model, valid_dataloader, num_classes) print( f"Epoch {epoch}, Train Loss: {train_loss:.4f}, " f"Train Accuracy: {train_accuracy:.4f}, " f"Valid Accuracy: {valid_accuracy:.4f}" ) test_accuracy = layerwise_infer( model, infer_dataloader, test_set, feature, num_classes, device ) print(f"Test Accuracy: {test_accuracy:.4f}") if __name__ == "__main__": main() ================================================ FILE: examples/graphbolt/pyg/node_classification_advanced.py ================================================ """ This script demonstrates node classification with GraphSAGE on large graphs, merging GraphBolt (GB) and PyTorch Geometric (PyG). GraphBolt efficiently manages data loading for large datasets, crucial for mini-batch processing. Post data loading, PyG's user-friendly framework takes over for training, showcasing seamless integration with GraphBolt. This combination offers an efficient alternative to traditional Deep Graph Library (DGL) methods, highlighting adaptability and scalability in handling large-scale graph data for diverse real-world applications. Key Features: - Implements the GraphSAGE model, a scalable GNN, for node classification on large graphs. - Utilizes GraphBolt, an efficient framework for large-scale graph data processing. - Integrates with PyTorch Geometric for building and training the GraphSAGE model. - The script is well-documented, providing clear explanations at each step. This flowchart describes the main functional sequence of the provided example. main: main │ ├───> Load and preprocess dataset (GraphBolt) │ │ │ └───> Utilize GraphBolt's BuiltinDataset for dataset handling │ ├───> Instantiate the SAGE model (PyTorch Geometric) │ │ │ └───> Define the GraphSAGE model architecture │ ├───> Train the model │ │ │ ├───> Mini-Batch Processing with GraphBolt │ │ │ │ │ └───> Efficient handling of mini-batches using GraphBolt's utilities │ │ │ └───> Training Loop │ │ │ ├───> Forward and backward passes │ │ │ └───> Parameters optimization │ └───> Evaluate the model │ └───> Performance assessment on validation and test datasets │ └───> Accuracy and other relevant metrics calculation """ import argparse import time import dgl.graphbolt as gb import torch # For torch.compile until https://github.com/pytorch/pytorch/issues/121197 is # resolved. import torch._inductor.codecache torch._dynamo.config.cache_size_limit = 32 import torch.nn.functional as F from torch_geometric.nn import SAGEConv from tqdm import tqdm def accuracy(out, labels): assert out.ndim == 2 assert out.size(0) == labels.size(0) assert labels.ndim == 1 or (labels.ndim == 2 and labels.size(1) == 1) labels = labels.flatten() predictions = torch.argmax(out, 1) return (labels == predictions).sum(dtype=torch.float64) / labels.size(0) class GraphSAGE(torch.nn.Module): ##################################################################### # (HIGHLIGHT) Define the GraphSAGE model architecture. # # - This class inherits from `torch.nn.Module`. # - Two convolutional layers are created using the SAGEConv class from PyG. # - 'in_size', 'hidden_size', 'out_size' are the sizes of # the input, hidden, and output features, respectively. # - The forward method defines the computation performed at every call. ##################################################################### def __init__(self, in_size, hidden_size, out_size, n_layers): super(GraphSAGE, self).__init__() self.layers = torch.nn.ModuleList() sizes = [in_size] + [hidden_size] * (n_layers - 1) + [out_size] for i in range(n_layers): self.layers.append(SAGEConv(sizes[i], sizes[i + 1])) self.hidden_size = hidden_size self.out_size = out_size def forward(self, subgraphs, x): h = x for i, (layer, subgraph) in enumerate(zip(self.layers, subgraphs)): ##################################################################### # (HIGHLIGHT) Convert given features to be consumed by a PyG layer. # # PyG layers have two modes, bipartite and normal. We slice the # given features to get src and dst features to use the PyG layers # in the more efficient bipartite mode. ##################################################################### h, edge_index, size = subgraph.to_pyg(h) h = layer(h, edge_index, size=size) if i != len(subgraphs) - 1: h = F.relu(h) return h def inference(self, graph, features, dataloader, storage_device): """Conduct layer-wise inference to get all the node embeddings.""" pin_memory = storage_device == "pinned" buffer_device = torch.device("cpu" if pin_memory else storage_device) for layer_idx, layer in enumerate(self.layers): is_last_layer = layer_idx == len(self.layers) - 1 y = torch.empty( graph.total_num_nodes, self.out_size if is_last_layer else self.hidden_size, dtype=torch.float32, device=buffer_device, pin_memory=pin_memory, ) for data in tqdm(dataloader, "Inferencing"): # len(data.sampled_subgraphs) = 1 h, edge_index, size = data.sampled_subgraphs[0].to_pyg( data.node_features["feat"] ) hidden_x = layer(h, edge_index, size=size) if not is_last_layer: hidden_x = F.relu(hidden_x) # By design, our output nodes are contiguous. y[data.seeds[0] : data.seeds[-1] + 1] = hidden_x.to( buffer_device ) if not is_last_layer: features.update("node", None, "feat", y) return y def create_dataloader( graph, features, itemset, batch_size, fanout, device, job ): ##################################################################### # (HIGHLIGHT) Create a data loader for efficiently loading graph data. # # - 'ItemSampler' samples mini-batches of node IDs from the dataset. # - 'CopyTo' copies the fetched data to the specified device. # - 'sample_neighbor' performs neighbor sampling on the graph. # - 'FeatureFetcher' fetches node features based on the sampled subgraph. ##################################################################### # Create a datapipe for mini-batch sampling with a specific neighbor fanout. # Here, [10, 10, 10] specifies the number of neighbors sampled for each node at each layer. # We're using `sample_neighbor` for consistency with DGL's sampling API. # Note: GraphBolt offers additional sampling methods, such as `sample_layer_neighbor`, # which could provide further optimization and efficiency for GNN training. # Users are encouraged to explore these advanced features for potentially improved performance. # Initialize an ItemSampler to sample mini-batches from the dataset. datapipe = gb.ItemSampler( itemset, batch_size=batch_size, shuffle=(job == "train"), drop_last=(job == "train"), ) need_copy = True # Copy the data to the specified device. if args.graph_device != "cpu" and need_copy: datapipe = datapipe.copy_to(device=device) need_copy = False # Sample neighbors for each node in the mini-batch. datapipe = getattr(datapipe, args.sample_mode)( graph, fanout if job != "infer" else [-1], overlap_fetch=args.overlap_graph_fetch, num_gpu_cached_edges=args.num_gpu_cached_edges, gpu_cache_threshold=args.gpu_graph_caching_threshold, asynchronous=args.graph_device != "cpu", ) # Copy the data to the specified device. if args.feature_device != "cpu" and need_copy: datapipe = datapipe.copy_to(device=device) need_copy = False # Fetch node features for the sampled subgraph. datapipe = datapipe.fetch_feature( features, node_feature_keys=["feat"], overlap_fetch=args.overlap_feature_fetch, ) # Copy the data to the specified device. if need_copy: datapipe = datapipe.copy_to(device=device) # Create and return a DataLoader to handle data loading. return gb.DataLoader(datapipe, num_workers=args.num_workers) @torch.compile def train_step(minibatch, optimizer, model, loss_fn): node_features = minibatch.node_features["feat"] labels = minibatch.labels optimizer.zero_grad() out = model(minibatch.sampled_subgraphs, node_features) loss = loss_fn(out, labels) num_correct = accuracy(out, labels) * labels.size(0) loss.backward() optimizer.step() return loss.detach(), num_correct, labels.size(0) def train_helper(dataloader, model, optimizer, loss_fn, device): model.train() # Set the model to training mode total_loss = torch.zeros(1, device=device) # Accumulator for the total loss # Accumulator for the total number of correct predictions total_correct = torch.zeros(1, dtype=torch.float64, device=device) total_samples = 0 # Accumulator for the total number of samples processed num_batches = 0 # Counter for the number of mini-batches processed start = time.time() for minibatch in tqdm(dataloader, "Training"): loss, num_correct, num_samples = train_step( minibatch, optimizer, model, loss_fn ) total_loss += loss total_correct += num_correct total_samples += num_samples num_batches += 1 train_loss = total_loss / num_batches train_acc = total_correct / total_samples end = time.time() return train_loss, train_acc, end - start def train(train_dataloader, valid_dataloader, model, device): ##################################################################### # (HIGHLIGHT) Train the model for one epoch. # # - Iterates over the data loader, fetching mini-batches of graph data. # - For each mini-batch, it performs a forward pass, computes loss, and # updates the model parameters. # - The function returns the average loss and accuracy for the epoch. # # Parameters: # model: The GraphSAGE model. # dataloader: DataLoader that provides mini-batches of graph data. # optimizer: Optimizer used for updating model parameters. # loss_fn: Loss function used for training. # device: The device (CPU/GPU) to run the training on. ##################################################################### optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) loss_fn = torch.nn.CrossEntropyLoss() for epoch in range(args.epochs): train_loss, train_acc, duration = train_helper( train_dataloader, model, optimizer, loss_fn, device ) val_acc = evaluate(model, valid_dataloader, device) print( f"Epoch {epoch:02d}, Loss: {train_loss.item():.4f}, " f"Approx. Train: {train_acc.item():.4f}, " f"Approx. Val: {val_acc.item():.4f}, " f"Time: {duration}s" ) @torch.no_grad() def layerwise_infer(args, graph, features, test_set, all_nodes_set, model): model.eval() dataloader = create_dataloader( graph=graph, features=features, itemset=all_nodes_set, batch_size=4 * args.batch_size, fanout=[-1], device=args.device, job="infer", ) pred = model.inference(graph, features, dataloader, args.feature_device) pred = pred[test_set._items[0]] label = test_set._items[1].to(pred.device) return accuracy(pred, label) @torch.compile def evaluate_step(minibatch, model): node_features = minibatch.node_features["feat"] labels = minibatch.labels out = model(minibatch.sampled_subgraphs, node_features) num_correct = accuracy(out, labels) * labels.size(0) return num_correct, labels.size(0) @torch.no_grad() def evaluate(model, dataloader, device): model.eval() total_correct = torch.zeros(1, dtype=torch.float64, device=device) total_samples = 0 for minibatch in tqdm(dataloader, "Evaluating"): num_correct, num_samples = evaluate_step(minibatch, model) total_correct += num_correct total_samples += num_samples return total_correct / total_samples def parse_args(): parser = argparse.ArgumentParser( description="Which dataset are you going to use?" ) parser.add_argument( "--epochs", type=int, default=10, help="Number of training epochs." ) parser.add_argument( "--lr", type=float, default=0.003, help="Learning rate for optimization.", ) parser.add_argument( "--batch-size", type=int, default=1024, help="Batch size for training." ) parser.add_argument( "--num-workers", type=int, default=0, help="Number of workers for data loading.", ) parser.add_argument( "--dataset", type=str, default="ogbn-products", choices=[ "ogbn-arxiv", "ogbn-products", "ogbn-papers100M", "igb-hom-tiny", "igb-hom-small", "igb-hom-medium", "igb-hom-large", "igb-hom", ], help="The dataset we can use for node classification example. Currently" " ogbn-products, ogbn-arxiv, ogbn-papers100M and" " igb-hom-[tiny|small|medium|large] and igb-hom datasets are supported.", ) parser.add_argument( "--fanout", type=str, default="10,10,10", help="Fan-out of neighbor sampling. It is IMPORTANT to keep len(fanout)" " identical with the number of layers in your model. Default: 10,10,10", ) parser.add_argument( "--mode", default="pinned-pinned-cuda", choices=[ "cpu-cpu-cpu", "cpu-cpu-cuda", "cpu-pinned-cuda", "pinned-pinned-cuda", "cuda-pinned-cuda", "cuda-cuda-cuda", ], help="Graph storage - feature storage - Train device: 'cpu' for CPU and RAM," " 'pinned' for pinned memory in RAM, 'cuda' for GPU and GPU memory.", ) parser.add_argument( "--gpu-cache-size", type=int, default=0, help="The capacity of the GPU cache in bytes.", ) parser.add_argument( "--sample-mode", default="sample_neighbor", choices=["sample_neighbor", "sample_layer_neighbor"], help="The sampling function when doing layerwise sampling.", ) parser.add_argument( "--num-gpu-cached-edges", type=int, default=0, help="The number of edges to be cached from the graph on the GPU.", ) parser.add_argument( "--gpu-graph-caching-threshold", type=int, default=1, help="The number of accesses after which a vertex neighborhood will be cached.", ) parser.add_argument("--precision", type=str, default="high") return parser.parse_args() def main(): torch.set_float32_matmul_precision(args.precision) if not torch.cuda.is_available(): args.mode = "cpu-cpu-cpu" print(f"Training in {args.mode} mode.") args.graph_device, args.feature_device, args.device = args.mode.split("-") args.overlap_feature_fetch = args.feature_device == "pinned" args.overlap_graph_fetch = args.graph_device == "pinned" # Load and preprocess dataset. print("Loading data...") dataset = gb.BuiltinDataset(args.dataset).load() # Move the dataset to the selected storage. graph = ( dataset.graph.pin_memory_() if args.graph_device == "pinned" else dataset.graph.to(args.graph_device) ) features = ( dataset.feature.pin_memory_() if args.feature_device == "pinned" else dataset.feature.to(args.feature_device) ) train_set = dataset.tasks[0].train_set valid_set = dataset.tasks[0].validation_set test_set = dataset.tasks[0].test_set all_nodes_set = dataset.all_nodes_set args.fanout = list(map(int, args.fanout.split(","))) num_classes = dataset.tasks[0].metadata["num_classes"] if args.gpu_cache_size > 0 and args.feature_device != "cuda": features._features[("node", None, "feat")] = gb.gpu_cached_feature( features._features[("node", None, "feat")], args.gpu_cache_size, ) train_dataloader, valid_dataloader = ( create_dataloader( graph=graph, features=features, itemset=itemset, batch_size=args.batch_size, fanout=args.fanout, device=args.device, job=job, ) for itemset, job in zip([train_set, valid_set], ["train", "evaluate"]) ) in_channels = features.size("node", None, "feat")[0] hidden_channels = 256 model = GraphSAGE( in_channels, hidden_channels, num_classes, len(args.fanout) ).to(args.device) assert len(args.fanout) == len(model.layers) train(train_dataloader, valid_dataloader, model, args.device) # Test the model. print("Testing...") test_acc = layerwise_infer( args, graph, features, test_set, all_nodes_set, model, ) print(f"Test accuracy {test_acc.item():.4f}") if __name__ == "__main__": args = parse_args() main() ================================================ FILE: examples/graphbolt/quickstart/README.md ================================================ # Graphbolt Quickstart Tutorial Graphbolt provides all you need to create a dataloader to train a Graph Neural Networks. ## Examples - The [node_classification.py](https://github.com/dmlc/dgl/blob/master/examples/graphbolt/quickstart/node_classification.py) shows how to create a Graphbolt dataloader to train a 2 layer Graph Convolutional Networks node classification model. - The [link_prediction.py](https://github.com/dmlc/dgl/blob/master/examples/graphbolt/quickstart/link_prediction.py) shows how to create a Graphbolt dataloader to train a 2 layer GraphSage link prediction model. ================================================ FILE: examples/graphbolt/quickstart/link_prediction.py ================================================ """ This example shows how to create a GraphBolt dataloader to sample and train a link prediction model with the Cora dataset. Disclaimer: Please note that the test edges are not excluded from the original graph in the dataset, which could lead to data leakage. We are ignoring this issue for this example because we are focused on demonstrating usability. """ import dgl.graphbolt as gb import torch import torch.nn as nn import torch.nn.functional as F from dgl.nn import SAGEConv from torcheval.metrics import BinaryAUROC ############################################################################ # (HIGHLIGHT) Create a single process dataloader with dgl graphbolt package. ############################################################################ def create_dataloader(dataset, device, is_train=True): # The second of two tasks in the dataset is link prediction. task = dataset.tasks[1] itemset = task.train_set if is_train else task.test_set # Sample seed edges from the itemset. datapipe = gb.ItemSampler(itemset, batch_size=256) # Copy the mini-batch to the designated device for sampling and training. datapipe = datapipe.copy_to(device) if is_train: # Sample negative edges for the seed edges. datapipe = datapipe.sample_uniform_negative( dataset.graph, negative_ratio=1 ) # Sample neighbors for the seed nodes. datapipe = datapipe.sample_neighbor(dataset.graph, fanouts=[4, 2]) # Exclude seed edges from the subgraph. datapipe = datapipe.transform(gb.exclude_seed_edges) else: # Sample neighbors for the seed nodes. datapipe = datapipe.sample_neighbor(dataset.graph, fanouts=[-1, -1]) # Fetch features for sampled nodes. datapipe = datapipe.fetch_feature( dataset.feature, node_feature_keys=["feat"] ) # Initiate the dataloader for the datapipe. return gb.DataLoader(datapipe) class GraphSAGE(nn.Module): def __init__(self, in_size, hidden_size=16): super().__init__() self.layers = nn.ModuleList() self.layers.append(SAGEConv(in_size, hidden_size, "mean")) self.layers.append(SAGEConv(hidden_size, hidden_size, "mean")) self.predictor = nn.Sequential( nn.Linear(hidden_size, hidden_size), nn.ReLU(), nn.Linear(hidden_size, 1), ) def forward(self, blocks, x): hidden_x = x for layer_idx, (layer, block) in enumerate(zip(self.layers, blocks)): hidden_x = layer(block, hidden_x) is_last_layer = layer_idx == len(self.layers) - 1 if not is_last_layer: hidden_x = F.relu(hidden_x) return hidden_x @torch.no_grad() def evaluate(model, dataset, device): model.eval() dataloader = create_dataloader(dataset, device, is_train=False) logits = [] labels = [] for step, data in enumerate(dataloader): # Get node pairs with labels for loss calculation. compacted_seeds = data.compacted_seeds.T label = data.labels # The features of sampled nodes. x = data.node_features["feat"] # Forward. y = model(data.blocks, x) logit = ( model.predictor( y[compacted_seeds[0].long()] * y[compacted_seeds[1].long()] ) .squeeze() .detach() ) logits.append(logit) labels.append(label) logits = torch.cat(logits, dim=0) labels = torch.cat(labels, dim=0) # Compute the AUROC score. metric = BinaryAUROC() metric.update(logits, labels) score = metric.compute().item() print(f"AUC: {score:.3f}") def train(model, dataset, device): dataloader = create_dataloader(dataset, device) optimizer = torch.optim.Adam(model.parameters(), lr=1e-2) for epoch in range(10): model.train() total_loss = 0 ######################################################################## # (HIGHLIGHT) Iterate over the dataloader and train the model with all # mini-batches. ######################################################################## for step, data in enumerate(dataloader): # Get node pairs with labels for loss calculation. compacted_seeds = data.compacted_seeds.T labels = data.labels # The features of sampled nodes. x = data.node_features["feat"] # Forward. y = model(data.blocks, x) logits = model.predictor( y[compacted_seeds[0].long()] * y[compacted_seeds[1].long()] ).squeeze() # Compute loss. loss = F.binary_cross_entropy_with_logits(logits, labels.float()) # Backward. optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.item() print(f"Epoch {epoch:03d} | Loss {total_loss / (step + 1):.3f}") if __name__ == "__main__": device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(f"Training in {device} mode.") # Load and preprocess dataset. print("Loading data...") dataset = gb.BuiltinDataset("cora").load() # If a CUDA device is selected, we pin the graph and the features so that # the GPU can access them. if device == torch.device("cuda:0"): dataset.graph.pin_memory_() dataset.feature.pin_memory_() in_size = dataset.feature.size("node", None, "feat")[0] model = GraphSAGE(in_size).to(device) # Model training. print("Training...") train(model, dataset, device) # Test the model. print("Testing...") evaluate(model, dataset, device) ================================================ FILE: examples/graphbolt/quickstart/node_classification.py ================================================ """ This example shows how to create a GraphBolt dataloader to sample and train a node classification model with the Cora dataset. """ import dgl.graphbolt as gb import dgl.nn as dglnn import torch import torch.nn as nn import torch.nn.functional as F import torchmetrics.functional as MF ############################################################################ # (HIGHLIGHT) Create a single process dataloader with dgl graphbolt package. ############################################################################ def create_dataloader(dataset, itemset, device): # Sample seed nodes from the itemset. datapipe = gb.ItemSampler(itemset, batch_size=16) # Copy the mini-batch to the designated device for sampling and training. datapipe = datapipe.copy_to(device) # Sample neighbors for the seed nodes. datapipe = datapipe.sample_neighbor(dataset.graph, fanouts=[4, 2]) # Fetch features for sampled nodes. datapipe = datapipe.fetch_feature( dataset.feature, node_feature_keys=["feat"] ) # Initiate the dataloader for the datapipe. return gb.DataLoader(datapipe) class GCN(nn.Module): def __init__(self, in_size, out_size, hidden_size=16): super().__init__() self.layers = nn.ModuleList() self.layers.append(dglnn.GraphConv(in_size, hidden_size)) self.layers.append(dglnn.GraphConv(hidden_size, out_size)) def forward(self, blocks, x): hidden_x = x for layer_idx, (layer, block) in enumerate(zip(self.layers, blocks)): hidden_x = layer(block, hidden_x) is_last_layer = layer_idx == len(self.layers) - 1 if not is_last_layer: hidden_x = F.relu(hidden_x) return hidden_x @torch.no_grad() def evaluate(model, dataset, itemset, device): model.eval() y = [] y_hats = [] dataloader = create_dataloader(dataset, itemset, device) for step, data in enumerate(dataloader): x = data.node_features["feat"] y.append(data.labels) y_hats.append(model(data.blocks, x)) return MF.accuracy( torch.cat(y_hats), torch.cat(y), task="multiclass", num_classes=dataset.tasks[0].metadata["num_classes"], ) def train(model, dataset, device): # The first of two tasks in the dataset is node classification. task = dataset.tasks[0] dataloader = create_dataloader(dataset, task.train_set, device) optimizer = torch.optim.Adam(model.parameters(), lr=1e-2) for epoch in range(10): model.train() total_loss = 0 ######################################################################## # (HIGHLIGHT) Iterate over the dataloader and train the model with all # mini-batches. ######################################################################## for step, data in enumerate(dataloader): # The features of sampled nodes. x = data.node_features["feat"] # The ground truth labels of the seed nodes. y = data.labels # Forward. y_hat = model(data.blocks, x) # Compute loss. loss = F.cross_entropy(y_hat, y) # Backward. optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.item() # Evaluate the model. val_acc = evaluate(model, dataset, task.validation_set, device) test_acc = evaluate(model, dataset, task.test_set, device) print( f"Epoch {epoch:03d} | Loss {total_loss / (step + 1):.3f} | " f"Val Acc {val_acc.item():.3f} | Test Acc {test_acc.item():.3f}" ) if __name__ == "__main__": device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(f"Training in {device} mode.") # Load and preprocess dataset. print("Loading data...") dataset = gb.BuiltinDataset("cora").load() # If a CUDA device is selected, we pin the graph and the features so that # the GPU can access them. if device == torch.device("cuda:0"): dataset.graph.pin_memory_() dataset.feature.pin_memory_() in_size = dataset.feature.size("node", None, "feat")[0] out_size = dataset.tasks[0].metadata["num_classes"] model = GCN(in_size, out_size).to(device) # Model training. print("Training...") train(model, dataset, device) ================================================ FILE: examples/graphbolt/rgcn/README.md ================================================ # Node classification on heterogeneous graph with RGCN This example aims to demonstrate how to run node classification task on heterogeneous graph with **GraphBolt**. Models are not tuned to achieve the best accuracy yet. ## Run on `ogbn-mag` dataset ### Sample on CPU and train/infer on CPU ``` python3 hetero_rgcn.py --dataset ogbn-mag ``` ### Sample on CPU and train/infer on GPU ``` python3 hetero_rgcn.py --dataset ogbn-mag --num_gpus 1 ``` ### Resource usage and time cost Below results are roughly collected from an AWS EC2 **g4dn.metal**, 384GB RAM, 96 vCPUs(Cascade Lake P-8259L), 8 NVIDIA T4 GPUs(16GB RAM). CPU RAM usage is the peak value of `used` field of `free` command which is a bit rough. Please refer to `RSS`/`USS`/`PSS` which are more accurate. GPU RAM usage is the peak value recorded by `nvidia-smi` command. | Dataset Size | CPU RAM Usage | Num of GPUs | GPU RAM Usage | Time Per Epoch(Training) | | ------------ | ------------- | ----------- | ------------- | ------------------------ | | ~1.1GB | ~5.3GB | 0 | 0GB | ~230s | | ~1.1GB | ~3GB | 1 | 3.87GB | ~64.6s | ### Accuracies ``` Epoch: 01, Loss: 2.3434, Valid accuracy: 48.23% Epoch: 02, Loss: 1.5646, Valid accuracy: 48.49% Epoch: 03, Loss: 1.1633, Valid accuracy: 45.79% Test accuracy 44.6792 ``` ## Run on `ogb-lsc-mag240m` dataset ### Sample on CPU and train/infer on CPU ``` python3 hetero_rgcn.py --dataset ogb-lsc-mag240m ``` ### Sample on CPU and train/infer on GPU ``` python3 hetero_rgcn.py --dataset ogb-lsc-mag240m --num_gpus 1 ``` ### Resource usage and time cost Below results are roughly collected from an AWS EC2 **g4dn.metal**, 384GB RAM, 96 vCPUs(Cascade Lake P-8259L), 8 NVIDIA T4 GPUs(16GB RAM). CPU RAM usage is the peak value of `used` field of `free` command which is a bit rough. Please refer to `RSS`/`USS`/`PSS` which are more accurate. GPU RAM usage is the peak value recorded by `nvidia-smi` command. > **note:** `buffer/cache` are highly used during train, it's about 300GB. If more RAM is available, more `buffer/cache` will be consumed as graph size is about 55GB and feature data is about 350GB. One more thing, first epoch is quite slow as `buffer/cache` is not ready yet. For GPU train, first epoch takes **1030s**. Even in following epochs, time consumption varies. | Dataset Size | CPU RAM Usage | Num of GPUs | GPU RAM Usage | Time Per Epoch(Training) | | ------------ | ------------- | ----------- | ------------- | ------------------------ | | ~404GB | ~67GB | 0 | 0GB | ~248s | | ~404GB | ~60GB | 1 | 15GB | ~166s | ### Accuracies ``` Epoch: 01, Loss: 2.1432, Valid accuracy: 50.21% Epoch: 02, Loss: 1.9267, Valid accuracy: 50.77% Epoch: 03, Loss: 1.8797, Valid accuracy: 53.38% ``` ================================================ FILE: examples/graphbolt/rgcn/hetero_rgcn.py ================================================ """ This script is a GraphBolt counterpart of ``/examples/core/rgcn/hetero_rgcn.py``. It demonstrates how to use GraphBolt to train a R-GCN model for node classification on the Open Graph Benchmark (OGB) dataset "ogbn-mag" and "ogb-lsc-mag240m". For more details on "ogbn-mag", please refer to the OGB website: (https://ogb.stanford.edu/docs/linkprop/). For more details on "ogb-lsc-mag240m", please refer to the OGB website: (https://ogb.stanford.edu/docs/lsc/mag240m/). Paper [Modeling Relational Data with Graph Convolutional Networks] (https://arxiv.org/abs/1703.06103). This example highlights the user experience of GraphBolt while the model and training/evaluation procedures are almost identical to the original DGL implementation. Please refer to original DGL implementation for more details. This flowchart describes the main functional sequence of the provided example. main │ ├───> load_dataset │ │ │ └───> Load dataset │ ├───> rel_graph_embed [HIGHLIGHT] │ │ │ └───> Generate graph embeddings │ ├───> Instantiate RGCN model │ │ │ ├───> RelGraphConvLayer (input to hidden) │ │ │ └───> RelGraphConvLayer (hidden to output) │ └───> run │ │ └───> Training loop │ ├───> EntityClassify.forward (RGCN model forward pass) │ └───> validate and test │ └───> EntityClassify.evaluate """ import argparse import itertools import sys import time import dgl import dgl.graphbolt as gb import dgl.nn as dglnn import psutil import torch import torch.nn as nn import torch.nn.functional as F from dgl.nn import HeteroEmbedding from ogb.lsc import MAG240MEvaluator from ogb.nodeproppred import Evaluator from tqdm import tqdm def load_dataset(dataset_name): """Load the dataset and return the graph, features, train/valid/test sets and the number of classes. Here, we use `BuiltInDataset` to load the dataset which returns graph, features, train/valid/test sets and the number of classes. """ dataset = gb.BuiltinDataset(dataset_name).load() print(f"Loaded dataset: {dataset.tasks[0].metadata['name']}") graph = dataset.graph features = dataset.feature train_set = dataset.tasks[0].train_set valid_set = dataset.tasks[0].validation_set test_set = dataset.tasks[0].test_set num_classes = dataset.tasks[0].metadata["num_classes"] return ( graph, features, train_set, valid_set, test_set, num_classes, ) def create_dataloader( name, graph, features, item_set, device, batch_size, fanouts, shuffle, num_workers, ): """Create a GraphBolt dataloader for training, validation or testing.""" ########################################################################### # Initialize the ItemSampler to sample mini-batches from the dataset. # `item_set`: # The set of items to sample from. This is typically the # training, validation or test set. # `batch_size`: # The number of nodes to sample in each mini-batch. # `shuffle`: # Whether to shuffle the items in the dataset before sampling. datapipe = gb.ItemSampler(item_set, batch_size=batch_size, shuffle=shuffle) # Move the mini-batch to the appropriate device. # `device`: # The device to move the mini-batch to. datapipe = datapipe.copy_to(device) # Sample neighbors for each seed node in the mini-batch. # `graph`: # The graph(FusedCSCSamplingGraph) from which to sample neighbors. # `fanouts`: # The number of neighbors to sample for each node in each layer. datapipe = datapipe.sample_neighbor( graph, fanouts=fanouts, overlap_fetch=args.overlap_graph_fetch, asynchronous=args.asynchronous, ) # Fetch the features for each node in the mini-batch. # `features`: # The feature store from which to fetch the features. # `node_feature_keys`: # The node features to fetch. This is a dictionary where the keys are # node types and the values are lists of feature names. node_feature_keys = {"paper": ["feat"]} if name == "ogb-lsc-mag240m": node_feature_keys["author"] = ["feat"] node_feature_keys["institution"] = ["feat"] datapipe = datapipe.fetch_feature(features, node_feature_keys) # Create a DataLoader from the datapipe. # `num_workers`: # The number of worker processes to use for data loading. return gb.DataLoader(datapipe, num_workers=num_workers) def extract_embed(node_embed, input_nodes): emb = node_embed( {ntype: input_nodes[ntype] for ntype in input_nodes if ntype != "paper"} ) return emb def extract_node_features(name, block, data, node_embed, device): """Extract the node features from embedding layer or raw features.""" if name == "ogbn-mag": input_nodes = { k: v.to(device) for k, v in block.srcdata[dgl.NID].items() } # Extract node embeddings for the input nodes. node_features = extract_embed(node_embed, input_nodes) # Add the batch's raw "paper" features. Corresponds to the content # in the function `rel_graph_embed` comment. node_features.update( {"paper": data.node_features[("paper", "feat")].to(device)} ) else: node_features = { ntype: data.node_features[(ntype, "feat")] for ntype in block.srctypes } # Original feature data are stored in float16 while model weights are # float32, so we need to convert the features to float32. node_features = { k: v.to(device).float() for k, v in node_features.items() } return node_features def rel_graph_embed(graph, embed_size): """Initialize a heterogenous embedding layer for all node types in the graph, except for the "paper" node type. The function constructs a dictionary 'node_num', where the keys are node types (ntype) and the values are the number of nodes for each type. This dictionary is used to create a HeteroEmbedding instance. (HIGHLIGHT) A HeteroEmbedding instance holds separate embedding layers for each node type, each with its own feature space of dimensionality (node_num[ntype], embed_size), where 'node_num[ntype]' is the number of nodes of type 'ntype' and 'embed_size' is the embedding dimension. The "paper" node type is specifically excluded, possibly because these nodes might already have predefined feature representations, and therefore, do not require an additional embedding layer. Parameters ---------- graph : FusedCSCSamplingGraph The graph for which to create the heterogenous embedding layer. embed_size : int The size of the embedding vectors. Returns -------- HeteroEmbedding A heterogenous embedding layer for all node types in the graph, except for the "paper" node type. """ node_num = {} node_type_to_id = graph.node_type_to_id node_type_offset = graph.node_type_offset for ntype, ntype_id in node_type_to_id.items(): # Skip the "paper" node type. if ntype == "paper": continue node_num[ntype] = ( node_type_offset[ntype_id + 1] - node_type_offset[ntype_id] ) print(f"node_num for rel_graph_embed: {node_num}") return HeteroEmbedding(node_num, embed_size) class RelGraphConvLayer(nn.Module): def __init__( self, in_size, out_size, ntypes, relation_names, activation=None, dropout=0.0, ): super(RelGraphConvLayer, self).__init__() self.in_size = in_size self.out_size = out_size self.ntypes = ntypes self.relation_names = relation_names self.activation = activation ######################################################################## # (HIGHLIGHT) HeteroGraphConv is a graph convolution operator over # heterogeneous graphs. A dictionary is passed where the key is the # relation name and the value is the instance of GraphConv. norm="right" # is to divide the aggregated messages by each node’s in-degrees, which # is equivalent to averaging the received messages. weight=False and # bias=False as we will use our own weight matrices defined later. ######################################################################## self.conv = dglnn.HeteroGraphConv( { rel: dglnn.GraphConv( in_size, out_size, norm="right", weight=False, bias=False ) for rel in relation_names } ) # Create a separate Linear layer for each relationship. Each # relationship has its own weights which will be applied to the node # features before performing convolution. self.weight = nn.ModuleDict( { rel_name: nn.Linear(in_size, out_size, bias=False) for rel_name in self.relation_names } ) # Create a separate Linear layer for each node type. # loop_weights are used to update the output embedding of each target node # based on its own features, thereby allowing the model to refine the node # representations. Note that this does not imply the existence of self-loop # edges in the graph. It is similar to residual connection. self.loop_weights = nn.ModuleDict( { ntype: nn.Linear(in_size, out_size, bias=True) for ntype in self.ntypes } ) self.loop_weights = nn.ModuleDict( { ntype: nn.Linear(in_size, out_size, bias=True) for ntype in self.ntypes } ) self.dropout = nn.Dropout(dropout) # Initialize parameters of the model. self.reset_parameters() def reset_parameters(self): for layer in self.weight.values(): layer.reset_parameters() for layer in self.loop_weights.values(): layer.reset_parameters() def forward(self, g, inputs): """ Parameters ---------- g : DGLGraph Input graph. inputs : dict[str, torch.Tensor] Node feature for each node type. Returns ------- dict[str, torch.Tensor] New node features for each node type. """ # Create a deep copy of the graph g with features saved in local # frames to prevent side effects from modifying the graph. g = g.local_var() # Create a dictionary of weights for each relationship. The weights # are retrieved from the Linear layers defined earlier. weight_dict = { rel_name: {"weight": self.weight[rel_name].weight.T} for rel_name in self.relation_names } # Create a dictionary of node features for the destination nodes in # the graph. We slice the node features according to the number of # destination nodes of each type. This is necessary because when # incorporating the effect of self-loop edges, we perform computations # only on the destination nodes' features. By doing so, we ensure the # feature dimensions match and prevent any misuse of incorrect node # features. inputs_dst = { k: v[: g.number_of_dst_nodes(k)] for k, v in inputs.items() } # Apply the convolution operation on the graph. mod_kwargs are # additional arguments for each relation function defined in the # HeteroGraphConv. In this case, it's the weights for each relation. hs = self.conv(g, inputs, mod_kwargs=weight_dict) def _apply(ntype, h): # Apply the `loop_weight` to the input node features, effectively # acting as a residual connection. This allows the model to refine # node embeddings based on its current features. h = h + self.loop_weights[ntype](inputs_dst[ntype]) if self.activation: h = self.activation(h) return self.dropout(h) # Apply the function defined above for each node type. This will update # the node features using the `loop_weights`, apply the activation # function and dropout. return {ntype: _apply(ntype, h) for ntype, h in hs.items()} class EntityClassify(nn.Module): def __init__(self, graph, in_size, out_size): super(EntityClassify, self).__init__() self.in_size = in_size self.hidden_size = 64 self.out_size = out_size # Generate and sort a list of unique edge types from the input graph. # eg. ['writes', 'cites'] etypes = list(graph.edge_type_to_id.keys()) etypes = [gb.etype_str_to_tuple(etype)[1] for etype in etypes] self.relation_names = etypes self.relation_names.sort() self.dropout = 0.5 ntypes = list(graph.node_type_to_id.keys()) self.layers = nn.ModuleList() # First layer: transform input features to hidden features. Use ReLU # as the activation function and apply dropout for regularization. self.layers.append( RelGraphConvLayer( self.in_size, self.hidden_size, ntypes, self.relation_names, activation=F.relu, dropout=self.dropout, ) ) # Second layer: transform hidden features to output features. No # activation function is applied at this stage. self.layers.append( RelGraphConvLayer( self.hidden_size, self.out_size, ntypes, self.relation_names, activation=None, ) ) def reset_parameters(self): # Reset the parameters of each layer. for layer in self.layers: layer.reset_parameters() def forward(self, blocks, h): for layer, block in zip(self.layers, blocks): h = layer(block, h) return h @torch.no_grad() def evaluate( name, g, model, node_embed, device, item_set, features, num_workers, ): # Switches the model to evaluation mode. model.eval() category = "paper" # An evaluator for the dataset. if name == "ogbn-mag": evaluator = Evaluator(name=name) else: evaluator = MAG240MEvaluator() num_etype = len(g.num_edges) data_loader = create_dataloader( name, g, features, item_set, device, batch_size=4096, fanouts=[torch.full((num_etype,), 25), torch.full((num_etype,), 10)], shuffle=False, num_workers=num_workers, ) # To store the predictions. y_hats = list() y_true = list() for data in tqdm(data_loader, desc="Inference"): # Convert MiniBatch to DGL Blocks and move them to the target device. blocks = [block.to(device) for block in data.blocks] node_features = extract_node_features( name, blocks[0], data, node_embed, device ) # Generate predictions. logits = model(blocks, node_features) logits = logits[category] # Apply softmax to the logits and get the prediction by selecting the # argmax. y_hat = logits.log_softmax(dim=-1).argmax(dim=1, keepdims=True) y_hats.append(y_hat.cpu()) y_true.append(data.labels[category].long()) y_pred = torch.cat(y_hats, dim=0) y_true = torch.cat(y_true, dim=0) y_true = torch.unsqueeze(y_true, 1) if name == "ogb-lsc-mag240m": y_pred = y_pred.view(-1) y_true = y_true.view(-1) return evaluator.eval({"y_true": y_true, "y_pred": y_pred})["acc"] def train( name, g, model, node_embed, optimizer, train_set, valid_set, device, features, num_workers, num_epochs, ): print("Start to train...") category = "paper" num_etype = len(g.num_edges) data_loader = create_dataloader( name, g, features, train_set, device, batch_size=1024, fanouts=[torch.full((num_etype,), 25), torch.full((num_etype,), 10)], shuffle=True, num_workers=num_workers, ) # Typically, the best Validation performance is obtained after # the 1st or 2nd epoch. This is why the max epoch is set to 3. for epoch in range(num_epochs): num_train = len(train_set) t0 = time.time() model.train() total_loss = 0 for data in tqdm(data_loader, desc=f"Training~Epoch {epoch + 1:02d}"): # Convert MiniBatch to DGL Blocks and move them to the target # device. blocks = [block.to(device) for block in data.blocks] # Fetch the number of seed nodes in the batch. num_seeds = blocks[-1].num_dst_nodes(category) # Extract the node features from embedding layer or raw features. node_features = extract_node_features( name, blocks[0], data, node_embed, device ) # Reset gradients. optimizer.zero_grad() # Generate predictions. logits = model(blocks, node_features)[category] y_hat = logits.log_softmax(dim=-1) loss = F.nll_loss(y_hat, data.labels[category].long()) loss.backward() optimizer.step() total_loss += loss.item() * num_seeds t1 = time.time() loss = total_loss / num_train # Evaluate the model on the val/test set. print("Evaluating the model on the validation set.") valid_acc = evaluate( name, g, model, node_embed, device, valid_set, features, num_workers ) print("Finish evaluating on validation set.") print( f"Epoch: {epoch + 1:02d}, " f"Loss: {loss:.4f}, " f"Valid accuracy: {100 * valid_acc:.2f}%, " f"Time {t1 - t0:.4f}" ) def main(args): device = torch.device( "cuda" if args.num_gpus > 0 and torch.cuda.is_available() else "cpu" ) # Load dataset. ( g, features, train_set, valid_set, test_set, num_classes, ) = load_dataset(args.dataset) # Move the dataset to the pinned memory to enable GPU access. args.overlap_graph_fetch = False args.asynchronous = False if device == torch.device("cuda"): g = g.pin_memory_() features = features.pin_memory_() # Enable optimizations for sampling on the GPU. args.overlap_graph_fetch = True args.asynchronous = True feat_size = features.size("node", "paper", "feat")[0] # As `ogb-lsc-mag240m` is a large dataset, features of `author` and # `institution` are generated in advance and stored in the feature store. # For `ogbn-mag`, we generate the features on the fly. embed_layer = None if args.dataset == "ogbn-mag": # Create the embedding layer and move it to the appropriate device. embed_layer = rel_graph_embed(g, feat_size).to(device) print( "Number of embedding parameters: " f"{sum(p.numel() for p in embed_layer.parameters())}" ) # Initialize the entity classification model. model = EntityClassify(g, feat_size, num_classes).to(device) print( "Number of model parameters: " f"{sum(p.numel() for p in model.parameters())}" ) if embed_layer is not None: embed_layer.reset_parameters() model.reset_parameters() # `itertools.chain()` is a function in Python's itertools module. # It is used to flatten a list of iterables, making them act as # one big iterable. # In this context, the following code is used to create a single # iterable over the parameters of both the model and the embed_layer, # which is passed to the optimizer. The optimizer then updates all # these parameters during the training process. all_params = itertools.chain( model.parameters(), [] if embed_layer is None else embed_layer.parameters(), ) optimizer = torch.optim.Adam(all_params, lr=0.01) expected_max = int(psutil.cpu_count(logical=False)) if args.num_workers >= expected_max: print( "[ERROR] You specified num_workers are larger than physical" f"cores, please set any number less than {expected_max}", file=sys.stderr, ) train( args.dataset, g, model, embed_layer, optimizer, train_set, valid_set, device, features, args.num_workers, args.num_epochs, ) print("Testing...") test_acc = evaluate( args.dataset, g, model, embed_layer, device, test_set, features, args.num_workers, ) print(f"Test accuracy {test_acc*100:.4f}") if __name__ == "__main__": parser = argparse.ArgumentParser(description="GraphBolt RGCN") parser.add_argument( "--dataset", type=str, default="ogbn-mag", choices=["ogbn-mag", "ogb-lsc-mag240m"], help="Dataset name. Possible values: ogbn-mag, ogb-lsc-mag240m", ) parser.add_argument("--num_epochs", type=int, default=3) parser.add_argument("--num_workers", type=int, default=0) parser.add_argument("--num_gpus", type=int, default=1) args = parser.parse_args() main(args) ================================================ FILE: examples/graphbolt/sparse/graphsage.py ================================================ """ This script demonstrate how to use dgl sparse library to sample on graph and train model. It trains and tests a GraphSAGE model using the sparse sample and compact operators to sample submatrix from the whole matrix. This flowchart describes the main functional sequence of the provided example. main │ ├───> Load and preprocess full dataset │ ├───> Instantiate SAGE model │ ├───> train │ │ │ └───> Training loop │ │ │ ├───> Sample submatrix │ │ │ └───> SAGE.forward └───> test │ ├───> Sample submatrix │ └───> Evaluate the model """ import argparse from functools import partial import dgl.graphbolt as gb import dgl.sparse as dglsp import torch import torch.nn as nn import torch.nn.functional as F import torchmetrics.functional as MF from dgl.graphbolt.subgraph_sampler import SubgraphSampler from torch.utils.data import functional_datapipe from tqdm import tqdm class SAGEConv(nn.Module): r"""GraphSAGE layer from `Inductive Representation Learning on Large Graphs `__ """ def __init__( self, in_feats, out_feats, ): super(SAGEConv, self).__init__() self._in_src_feats, self._in_dst_feats = in_feats, in_feats self._out_feats = out_feats self.fc_neigh = nn.Linear(self._in_src_feats, out_feats, bias=False) self.fc_self = nn.Linear(self._in_dst_feats, out_feats, bias=True) self.reset_parameters() def reset_parameters(self): gain = nn.init.calculate_gain("relu") nn.init.xavier_uniform_(self.fc_self.weight, gain=gain) nn.init.xavier_uniform_(self.fc_neigh.weight, gain=gain) def forward(self, A, feat): feat_src = feat feat_dst = feat[: A.shape[1]] # Aggregator type: mean. srcdata = self.fc_neigh(feat_src) # Divided by degree. D_hat = dglsp.diag(A.sum(0)) ** -1 A_div = A @ D_hat # Conv neighbors. dstdata = A_div.T @ srcdata rst = self.fc_self(feat_dst) + dstdata return rst class SAGE(nn.Module): def __init__(self, in_size, hid_size, out_size): super().__init__() self.layers = nn.ModuleList() # Three-layer GraphSAGE-gcn. self.layers.append(SAGEConv(in_size, hid_size)) self.layers.append(SAGEConv(hid_size, hid_size)) self.layers.append(SAGEConv(hid_size, out_size)) self.dropout = nn.Dropout(0.5) self.hid_size = hid_size self.out_size = out_size def forward(self, sampled_matrices, x): hidden_x = x for layer_idx, (layer, sampled_matrix) in enumerate( zip(self.layers, sampled_matrices) ): hidden_x = layer(sampled_matrix, hidden_x) if layer_idx != len(self.layers) - 1: hidden_x = F.relu(hidden_x) hidden_x = self.dropout(hidden_x) return hidden_x @functional_datapipe("sample_sparse_neighbor") class SparseNeighborSampler(SubgraphSampler): def __init__(self, datapipe, matrix, fanouts): super().__init__(datapipe) self.matrix = matrix # Convert fanouts to a list of tensors. self.fanouts = [] for fanout in fanouts: if not isinstance(fanout, torch.Tensor): fanout = torch.LongTensor([int(fanout)]) self.fanouts.insert(0, fanout) def sample_subgraphs(self, seeds, seeds_timestamp=None): sampled_matrices = [] src = seeds.long() ##################################################################### # (HIGHLIGHT) Using the sparse sample operator to preform random # sampling on the neighboring nodes of the seeds nodes. The sparse # compact operator is then employed to compact and relabel the sampled # matrix, resulting in the sampled matrix and the relabel index. ##################################################################### for fanout in self.fanouts: # Sample neighbors. sampled_matrix = self.matrix.sample(1, fanout, ids=src).coalesce() # Compact the sampled matrix. compacted_mat, row_ids = sampled_matrix.compact(0) sampled_matrices.insert(0, compacted_mat) src = row_ids return src, sampled_matrices ############################################################################ # (HIGHLIGHT) Create a multi-process dataloader with dgl graphbolt package. ############################################################################ def create_dataloader(A, fanouts, ids, features, device): datapipe = gb.ItemSampler(ids, batch_size=1024) # Customize graphbolt sampler by sparse. datapipe = datapipe.sample_sparse_neighbor(A, fanouts) # Use grapbolt to fetch features. datapipe = datapipe.fetch_feature(features, node_feature_keys=["feat"]) datapipe = datapipe.copy_to(device) dataloader = gb.DataLoader(datapipe) return dataloader def evaluate(model, dataloader, num_classes): model.eval() ys = [] y_hats = [] for it, data in tqdm(enumerate(dataloader), "Evaluating"): with torch.no_grad(): node_feature = data.node_features["feat"].float() blocks = data.sampled_subgraphs y = data.labels ys.append(y) y_hats.append(model(blocks, node_feature)) return MF.accuracy( torch.cat(y_hats), torch.cat(ys), task="multiclass", num_classes=num_classes, ) def validate(device, dataset, model, num_classes): test_set = dataset.tasks[0].test_set test_dataloader = create_dataloader( A, [10, 10, 10], test_set, features, device ) acc = evaluate(model, test_dataloader, num_classes) return acc def train(device, A, features, dataset, num_classes, model): # Create sampler & dataloader. train_set = dataset.tasks[0].train_set train_dataloader = create_dataloader( A, [10, 10, 10], train_set, features, device ) valid_set = dataset.tasks[0].validation_set val_dataloader = create_dataloader( A, [10, 10, 10], valid_set, features, device ) optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=5e-4) for epoch in range(10): model.train() total_loss = 0 for it, data in tqdm(enumerate(train_dataloader), "Training"): node_feature = data.node_features["feat"].float() blocks = data.sampled_subgraphs y = data.labels y_hat = model(blocks, node_feature) loss = F.cross_entropy(y_hat, y) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.item() acc = evaluate(model, val_dataloader, num_classes) print( "Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} ".format( epoch, total_loss / (it + 1), acc.item() ) ) if __name__ == "__main__": parser = argparse.ArgumentParser(description="GraphSAGE") parser.add_argument( "--mode", default="gpu", choices=["cpu", "gpu"], help="Training mode. 'cpu' for CPU training, 'gpu' for GPU training.", ) args = parser.parse_args() if not torch.cuda.is_available(): args.mode = "cpu" print(f"Training in {args.mode} mode.") ##################################################################### # (HIGHLIGHT) This example implements a graphSAGE algorithm by sparse # operators, which involves sampling a subgraph from a full graph and # conducting training. # # First, the whole graph is loaded onto the CPU or GPU and transformed # to sparse matrix. To obtain the training subgraph, it samples three # submatrices by seed nodes, which contains their randomly sampled # 1-hop, 2-hop, and 3-hop neighbors. Then, the features of the # subgraph are input to the network for training. ##################################################################### # Load and preprocess dataset. print("Loading data") device = torch.device("cpu" if args.mode == "cpu" else "cuda") dataset = gb.BuiltinDataset("ogbn-products").load() g = dataset.graph features = dataset.feature # Create GraphSAGE model. in_size = features.size("node", None, "feat")[0] num_classes = dataset.tasks[0].metadata["num_classes"] out_size = num_classes model = SAGE(in_size, 256, out_size).to(device) # Create sparse. N = g.num_nodes A = dglsp.from_csc(g.csc_indptr.long(), g.indices.long(), shape=(N, N)) # Model training. print("Training...") train(device, A, features, dataset, num_classes, model) # Test the model. print("Testing...") acc = validate(device, dataset, model, num_classes) print(f"Test accuracy {acc:.4f}") ================================================ FILE: examples/graphbolt/temporal_link_prediction.py ================================================ """ This script trains and tests a Heterogeneous GraphSAGE model for link prediction with temporal information using graphbolt dataloader. While node classification predicts labels for nodes based on their local neighborhoods, link prediction assesses the likelihood of an edge existing between two nodes, necessitating different sampling strategies that account for pairs of nodes and their joint neighborhoods. An additional temporal attribute is provided in both graph and TVT sets, ensuring that during sampling, only neighbors whose timestamps are earlier than the seed timestamp will be sampled. This flowchart describes the main functional sequence of the provided example. main │ ├───> OnDiskDataset pre-processing │ ├───> Instantiate HeteroSAGE model │ ├───> train │ │ │ ├───> Get graphbolt dataloader (HIGHLIGHT) │ │ │ └───> Training loop │ │ │ ├───> HeteroSAGE.forward │ │ │ └───> Validation set evaluation │ └───> Test set evaluation """ import argparse import os import time import dgl.graphbolt as gb import dgl.nn as dglnn import torch import torch.nn as nn import torch.nn.functional as F import tqdm from dgl.data.utils import download, extract_archive TIMESTAMP_FEATURE_NAME = "__timestamp__" NODE_FEATURE_KEYS = { "Product": ["categoryId"], "Query": ["categoryId"], } TARGET_TYPE = ("Query", "Click", "Product") ALL_TYPES = [ TARGET_TYPE, ("Product", "reverse_Click", "Query"), ("Product", "reverse_QueryResult", "Query"), ("Query", "QueryResult", "Product"), ] class CategoricalEncoder(nn.Module): def __init__( self, num_categories, out_size, ): super().__init__() self.embed = nn.Embedding(num_categories, out_size) self.reset_parameters() def reset_parameters(self): nn.init.xavier_uniform_(self.embed.weight) def forward(self, input_feat: torch.Tensor): return self.embed(input_feat.view(-1)) class HeteroSAGE(nn.Module): def __init__(self, in_size, hidden_size): super().__init__() self.layers = nn.ModuleList() sizes = [in_size, hidden_size] for size in sizes: self.layers.append( dglnn.HeteroGraphConv( { etype: dglnn.SAGEConv( size, hidden_size, "mean", ) for etype in ALL_TYPES }, aggregate="sum", ) ) self.predictor = nn.Sequential( nn.Linear(hidden_size, hidden_size), nn.ReLU(), nn.Linear(hidden_size, hidden_size), nn.ReLU(), nn.Linear(hidden_size, 1), ) def forward(self, blocks, X_node_dict): H_node_dict = X_node_dict for layer_idx, (layer, block) in enumerate(zip(self.layers, blocks)): H_node_dict = layer(block, H_node_dict) is_last_layer = layer_idx == len(self.layers) - 1 if not is_last_layer: H_node_dict = { ntype: F.relu(H) for ntype, H in H_node_dict.items() } return H_node_dict def create_dataloader(args, graph, features, itemset, is_train=True): datapipe = gb.ItemSampler( itemset, batch_size=args.train_batch_size if is_train else args.eval_batch_size, shuffle=is_train, ) if args.storage_device != "cpu": datapipe = datapipe.copy_to(device=args.device) ############################################################################ # [Input]: # 'datapipe' is either 'ItemSampler' or 'UniformNegativeSampler' depending # on whether training is needed ('is_train'), # 'graph': The network topology for sampling. # 'args.fanout': Number of neighbors to sample per node. # [Output]: # A NeighborSampler object to sample neighbors. # [Role]: # Initialize a neighbor sampler for sampling the neighborhoods of nodes with # considering of temporal information. Only neighbors that is earlier than # the seed will be sampled. ############################################################################ datapipe = getattr(datapipe, args.sample_mode)( graph, args.fanout if is_train else [-1], node_timestamp_attr_name=TIMESTAMP_FEATURE_NAME, edge_timestamp_attr_name=TIMESTAMP_FEATURE_NAME, ) datapipe = datapipe.fetch_feature( features, node_feature_keys=NODE_FEATURE_KEYS ) if args.storage_device == "cpu": datapipe = datapipe.copy_to(device=args.device) dataloader = gb.DataLoader( datapipe, num_workers=args.num_workers, ) # Return the fully-initialized DataLoader object. return dataloader def train(args, model, graph, features, train_set, encoders): optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) dataloader = create_dataloader(args, graph, features, train_set) for epoch in range(args.epochs): model.train() total_loss = 0 start_epoch_time = time.time() for step, data in tqdm.tqdm(enumerate(dataloader)): # Get node pairs with labels for loss calculation. compacted_seeds = data.compacted_seeds[ gb.etype_tuple_to_str(TARGET_TYPE) ].T labels = data.labels node_feature = {} for ntype, keys in NODE_FEATURE_KEYS.items(): ntype, feat = ntype, keys[0] node_feature[ntype] = data.node_features[ (ntype, feat) ].squeeze() blocks = data.blocks # Get the embeddings of the input nodes. X_node_dict = { ntype: encoders[ntype](feat) for ntype, feat in node_feature.items() } X_node_dict = model(blocks, X_node_dict) src_type, _, dst_type = TARGET_TYPE logits = model.predictor( X_node_dict[src_type][compacted_seeds[0]] * X_node_dict[dst_type][compacted_seeds[1]] ).squeeze() # Compute loss. loss = F.binary_cross_entropy_with_logits( logits, labels[gb.etype_tuple_to_str(TARGET_TYPE)].float() ) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.item() if step + 1 == args.early_stop: # Early stopping requires a new dataloader to reset its state. dataloader = create_dataloader(args, graph, features, train_set) break end_epoch_time = time.time() print( f"Epoch {epoch:05d} | " f"Loss {(total_loss) / (step + 1):.4f} | " f"Time {(end_epoch_time - start_epoch_time):.4f} s" ) def parse_args(): parser = argparse.ArgumentParser(description="diginetica-r2ne (GraphBolt)") parser.add_argument("--epochs", type=int, default=10) parser.add_argument("--lr", type=float, default=0.0005) parser.add_argument("--neg-ratio", type=int, default=1) parser.add_argument("--train-batch-size", type=int, default=1024) parser.add_argument("--eval-batch-size", type=int, default=1024) parser.add_argument("--num-workers", type=int, default=0) parser.add_argument( "--dataset", default="diginetica-r2ne", choices=["diginetica-r2ne"], help="Dataset.", ) parser.add_argument( "--early-stop", type=int, default=0, help="0 means no early stop, otherwise stop at the input-th step", ) parser.add_argument( "--fanout", type=str, default="20,20", help="Fan-out of neighbor sampling. Default: 20, 20", ) parser.add_argument( "--exclude-edges", type=int, default=1, help="Whether to exclude reverse edges during sampling. Default: 1", ) parser.add_argument( "--mode", default="cpu-cuda", choices=["cpu-cpu", "cpu-cuda", "cuda-cuda"], help="Dataset storage placement and Train device: 'cpu' for CPU and RAM," " 'pinned' for pinned memory in RAM, 'cuda' for GPU and GPU memory.", ) parser.add_argument( "--sample-mode", default="temporal_sample_neighbor", choices=["temporal_sample_neighbor", "temporal_sample_layer_neighbor"], help="The sampling function when doing layerwise sampling.", ) return parser.parse_args() def download_datasets(name, root="datasets"): url = "https://dgl-data.s3-accelerate.amazonaws.com/dataset/" dataset_dir = os.path.join(root, name) if not os.path.exists(dataset_dir): url += name + ".zip" os.makedirs(root, exist_ok=True) zip_file_path = os.path.join(root, name + ".zip") download(url, path=zip_file_path) extract_archive(zip_file_path, root, overwrite=True) os.remove(zip_file_path) return dataset_dir def main(args): if not torch.cuda.is_available(): args.mode = "cpu-cpu" print(f"Training in {args.mode} mode.") args.storage_device, args.device = args.mode.split("-") args.device = torch.device(args.device) # Load and preprocess dataset. print("Loading data") # TODO: Add the datasets to built-in. dataset_path = download_datasets(args.dataset) dataset = gb.OnDiskDataset(dataset_path).load() # Move the dataset to the selected storage. graph = dataset.graph.to(args.storage_device) features = dataset.feature.to(args.storage_device) train_set = dataset.tasks[0].train_set # TODO: Fix the dataset so that this modification is not needed. node_pairs # needs to be cast into graph.indices.dtype, which is int32. train_set._itemsets["Query:Click:Product"]._items = tuple( item.to(graph.indices.dtype if i == 0 else None) for i, item in enumerate( train_set._itemsets["Query:Click:Product"]._items ) ) args.fanout = list(map(int, args.fanout.split(","))) in_size = 128 hidden_channels = 256 query_size = features.metadata("node", "Query", "categoryId")[ "num_categories" ] product_size = features.metadata("node", "Product", "categoryId")[ "num_categories" ] args.device = torch.device(args.device) model = HeteroSAGE(in_size, hidden_channels).to(args.device) encoders = { "Query": CategoricalEncoder(query_size, in_size).to(args.device), "Product": CategoricalEncoder(product_size, in_size).to(args.device), } # Model training. print("Training...") train(args, model, graph, features, train_set, encoders) if __name__ == "__main__": args = parse_args() main(args) ================================================ FILE: examples/legacy/README.md ================================================ # New sampling examples via `dgl.graphbolt` Consider taking a look at our new sampling examples in the `../graphbolt` folder using `dgl.graphbolt`. # Sampling Examples Running ## Requirements ```bash pip install torchmetrics==0.11.4 ``` ## How to run ### Node classification Run with following (available mode: "cpu", "mixed"(default), "gpu") ```bash python3 node_classification.py --mode mixed ``` ================================================ FILE: examples/legacy/link_prediction.py ================================================ """ This script trains and tests a GraphSAGE model for link prediction on large graphs using efficient and tailor-made neighbor sampling. Paper: [Inductive Representation Learning on Large Graphs] (https://arxiv.org/abs/1706.02216) While node classification predicts labels for nodes based on their local neighborhoods, link prediction assesses the likelihood of an edge existing between two nodes, necessitating different sampling strategies that account for pairs of nodes and their joint neighborhoods. Before reading this example, please familiar yourself with graphsage node classification by reading the example in the `examples/core/graphsage/node_classification.py` If you want to train graphsage on a large graph in a distributed fashion, read the example in the `examples/distributed/graphsage/`. This flowchart describes the main functional sequence of the provided example. main │ ├───> Load and preprocess dataset │ ├───> Instantiate SAGE model │ ├───> train │ │ │ ├───> NeighborSampler (HIGHLIGHT) │ │ │ └───> Training loop │ │ │ └───> SAGE.forward │ └───> evaluate │ └───> SAGE.inference │ └───> MultiLayerFullNeighborSampler (HIGHLIGHT) """ import argparse import time import dgl import dgl.nn as dglnn import torch import torch.nn as nn import torch.nn.functional as F import tqdm from dgl.dataloading import ( as_edge_prediction_sampler, DataLoader, MultiLayerFullNeighborSampler, negative_sampler, NeighborSampler, ) from ogb.linkproppred import DglLinkPropPredDataset, Evaluator def to_bidirected_with_reverse_mapping(g): """Convert the graph to bidirectional and return the reverse mapping. This function transforms the input graph into its bidirectional form. It then returns the newly formed bidirectional graph and the mapping that represents the reverse edges. The function does not work with graphs that have self-loops. Parameters: ---------- g : DGLGraph Input graph. Returns: ------- DGLGraph : Bidirectional graph. Tensor : Mapping to reverse edges. """ # First, add reverse edges to the graph, effectively making it # bidirectional. Then, simplify the resulting graph by merging any duplicate # edges. The resulting simplified graph is stored in `g_simple`, and # `mapping` provides information on how edges in `g_simple` correspond to # edges in the original graph. g_simple, mapping = dgl.to_simple( dgl.add_reverse_edges(g), return_counts="count", writeback_mapping=True ) # The `return_counts` option in `dgl.to_simple` returns the count of how # many times each edge in the simplified graph corresponds to an edge in the # original graph. This count is saved in the edge data of the returned # graph with the key "count". c = g_simple.edata["count"] num_edges = g.num_edges() # `mapping_offset` is an auxiliary tensor used to understand how edges in # the simplified bidirectional graph (g_simple) relate to the edges in the # original graph. mapping_offset = torch.zeros( g_simple.num_edges() + 1, dtype=g_simple.idtype ) # Calculate the cumulative sum of counts to determine boundaries for each # unique edge. mapping_offset[1:] = c.cumsum(0) # Sort the mapping tensor to group the same edge indices. idx = mapping.argsort() # Using the previously computed `mapping_offset`, it extracts the first # index of each group, which represents the unique edge indices from the # sorted mapping. idx_uniq = idx[mapping_offset[:-1]] # If an edge index is greater than or equal to the number of edges in the # original graph, it indicates that this edge is a reversed edge, and the # original edge index for it is (idx_uniq - num_edges). Otherwise, its # reverse edge index is (idx_uniq + num_edges). reverse_idx = torch.where( idx_uniq >= num_edges, idx_uniq - num_edges, idx_uniq + num_edges ) reverse_mapping = mapping[reverse_idx] # Sanity check to ensure valid mapping. src1, dst1 = g_simple.edges() src2, dst2 = g_simple.find_edges(reverse_mapping) assert torch.equal(src1, dst2) assert torch.equal(src2, dst1) return g_simple, reverse_mapping class SAGE(nn.Module): def __init__(self, in_size, hidden_size): super().__init__() self.layers = nn.ModuleList() # Three-layer GraphSAGE-mean. self.layers.append(dglnn.SAGEConv(in_size, hidden_size, "mean")) self.layers.append(dglnn.SAGEConv(hidden_size, hidden_size, "mean")) self.layers.append(dglnn.SAGEConv(hidden_size, hidden_size, "mean")) self.hidden_size = hidden_size self.predictor = nn.Sequential( nn.Linear(hidden_size, hidden_size), nn.ReLU(), nn.Linear(hidden_size, hidden_size), nn.ReLU(), nn.Linear(hidden_size, 1), ) def forward(self, pair_graph, neg_pair_graph, blocks, x): hidden_x = x for layer_idx, (layer, block) in enumerate(zip(self.layers, blocks)): hidden_x = layer(block, hidden_x) is_last_layer = layer_idx == len(self.layers) - 1 if not is_last_layer: hidden_x = F.relu(hidden_x) pos_src, pos_dst = pair_graph.edges() neg_src, neg_dst = neg_pair_graph.edges() hidden_pos = self.predictor(hidden_x[pos_src] * hidden_x[pos_dst]) hidden_neg = self.predictor(hidden_x[neg_src] * hidden_x[neg_dst]) return hidden_pos, hidden_neg def inference(self, g, device, batch_size): """Layer-wise inference algorithm to compute GNN node embeddings.""" feat = g.ndata["feat"] ##################################################################### # (HIGHLIGHT) Creating a MultiLayerFullNeighborSampler instance. # This sampler is used in the Graph Neural Networks (GNN) training # process to provide neighbor sampling, which is crucial for # efficient training of GNN on large graphs. # # The first argument '1' indicates the number of layers for # the neighbor sampling. In this case, it's set to 1, meaning # only the direct neighbors of each node will be included in the # sampling. # # The 'prefetch_node_feats' parameter specifies the node features # that need to be pre-fetched during sampling. In this case, the # feature named 'feat' will be pre-fetched. # # `prefetch` in DGL initiates data fetching operations in parallel # with model computations. This ensures data is ready when the # computation needs it, thereby eliminating waiting times between # fetching and computing steps and reducing the I/O overhead during # the training process. # # The difference between whether to use prefetch or not is shown: # # Without Prefetch: # Fetch1 ──> Compute1 ──> Fetch2 ──> Compute2 ──> Fetch3 ──> Compute3 # # With Prefetch: # Fetch1 ──> Fetch2 ──> Fetch3 # │ │ │ # └─Compute1 └─Compute2 └─Compute3 ##################################################################### sampler = MultiLayerFullNeighborSampler(1, prefetch_node_feats=["feat"]) dataloader = DataLoader( g, torch.arange(g.num_nodes()).to(g.device), sampler, device=device, batch_size=batch_size, shuffle=False, drop_last=False, num_workers=0, ) buffer_device = torch.device("cpu") # Enable pin_memory for faster CPU to GPU data transfer if the model is # running on a GPU. pin_memory = buffer_device != device for layer_idx, layer in enumerate(self.layers): is_last_layer = layer_idx == len(self.layers) - 1 y = torch.empty( g.num_nodes(), self.hidden_size, device=buffer_device, pin_memory=pin_memory, ) feat = feat.to(device) for input_nodes, output_nodes, blocks in tqdm.tqdm( dataloader, desc="Inference" ): x = feat[input_nodes] hidden_x = layer(blocks[0], x) if not is_last_layer: hidden_x = F.relu(hidden_x) y[output_nodes] = hidden_x.to(buffer_device) feat = y return y @torch.no_grad() def compute_mrr( model, evaluator, node_emb, src, dst, neg_dst, device, batch_size=500 ): """Compute the Mean Reciprocal Rank (MRR) for given source and destination nodes. This function computes the MRR for a set of node pairs, dividing the task into batches to handle potentially large graphs. """ rr = torch.zeros(src.shape[0]) # Loop over node pairs in batches. for start in tqdm.trange(0, src.shape[0], batch_size, desc="Evaluate"): end = min(start + batch_size, src.shape[0]) # Concatenate positive and negative destination nodes. all_dst = torch.cat([dst[start:end, None], neg_dst[start:end]], 1) # Fetch embeddings for current batch of source and destination nodes. h_src = node_emb[src[start:end]][:, None, :].to(device) h_dst = node_emb[all_dst.view(-1)].view(*all_dst.shape, -1).to(device) # Compute prediction scores using the model. pred = model.predictor(h_src * h_dst).squeeze(-1) # Evaluate the predictions to obtain MRR values. input_dict = {"y_pred_pos": pred[:, 0], "y_pred_neg": pred[:, 1:]} rr[start:end] = evaluator.eval(input_dict)["mrr_list"] return rr.mean() @torch.no_grad() def evaluate(device, graph, edge_split, model, batch_size): """Evaluate the model on validation and test sets.""" model.eval() evaluator = Evaluator(name="ogbl-citation2") # Compute node embeddings for the entire graph. node_emb = model.inference(graph, device, batch_size) results = [] # Loop over both validation and test sets. for split in ["valid", "test"]: src = edge_split[split]["source_node"].to(node_emb.device) dst = edge_split[split]["target_node"].to(node_emb.device) neg_dst = edge_split[split]["target_node_neg"].to(node_emb.device) # Compute MRR values for the current split. results.append( compute_mrr(model, evaluator, node_emb, src, dst, neg_dst, device) ) return results def train( args, device, g, reverse_eids, seed_edges, model, use_uva, fused_sampling ): ##################################################################### # (HIGHLIGHT) Instantiate a NeighborSampler object for efficient # training of Graph Neural Networks (GNNs) on large-scale graphs. # # The argument [15, 10, 5] sets the number of neighbors (fanout) # to be sampled at each layer. Here, we have three layers, and # 15/10/5 neighbors will be randomly selected for each node at each # layer. # # The 'prefetch_node_feats' parameter specify the node features that # needs to be pre-fetched during sampling. More details about # `prefetch` can be found in the `SAGE.inference` function. # # (HIGHLIGHT) Modify the NeighborSampler for Edge Prediction # # This `as_edge_prediction_sampler` augments the original NeighborSampler # to specifically handle edge prediction tasks, where not only the # structure but also the relationships between nodes (edges) are of # importance. # # - `exclude="reverse_id"` ensures that the edges corresponding to the # reverse of the original edges are excluded during sampling, given that # reverse edges can introduce unnecessary redundancy in edge prediction. # # - `reverse_eids=reverse_eids` specifies the IDs of the reverse edges. # This information is vital so the sampler knows which edges to avoid. # # - The negative sampling strategy is specified using the # `negative_sampler`. Here, a uniform negative sampling method is # employed, where a negative sample (an edge that doesn't exist in the # original graph) is uniformly drawn from the set of all possible edges. # # The modified sampler is tailor-made for scenarios where the goal is # not just to learn node representations, but also to predict the # likelihood of an edge existing between two nodes (link prediction). ##################################################################### sampler = NeighborSampler( [15, 10, 5], prefetch_node_feats=["feat"], fused=fused_sampling, ) sampler = as_edge_prediction_sampler( sampler, exclude="reverse_id" if args.exclude_edges else None, reverse_eids=reverse_eids if args.exclude_edges else None, negative_sampler=negative_sampler.Uniform(1), ) dataloader = DataLoader( g, seed_edges, sampler, device=device, batch_size=args.train_batch_size, shuffle=True, drop_last=False, # If `g` is on gpu or `use_uva` is True, `num_workers` must be zero, # otherwise it will cause error. num_workers=0, use_uva=use_uva, ) opt = torch.optim.Adam(model.parameters(), lr=args.lr) for epoch in range(args.epochs): model.train() total_loss = 0 start_epoch_time = time.time() # A block is a graph consisting of two sets of nodes: the # source nodes and destination nodes. The source and destination # nodes can have multiple node types. All the edges connect from # source nodes to destination nodes. # For more details: https://discuss.dgl.ai/t/what-is-the-block/2932. for it, (input_nodes, pair_graph, neg_pair_graph, blocks) in enumerate( dataloader ): # The input features from the source nodes in the first layer's # computation graph. x = blocks[0].srcdata["feat"] pos_score, neg_score = model(pair_graph, neg_pair_graph, blocks, x) score = torch.cat([pos_score, neg_score]) # Create true labels for positive and negative samples. pos_label = torch.ones_like(pos_score) neg_label = torch.zeros_like(neg_score) labels = torch.cat([pos_label, neg_label]) # Compute the binary cross-entropy loss. loss = F.binary_cross_entropy_with_logits(score, labels) opt.zero_grad() loss.backward() opt.step() total_loss += loss.item() if (it + 1) == args.early_stop: break end_epoch_time = time.time() print( f"Epoch {epoch:05d} | " f"Loss {total_loss / (it + 1):.4f} | " f"Time {(end_epoch_time - start_epoch_time):.4f} s" ) def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("--epochs", type=int, default=10) parser.add_argument( "--lr", type=float, default=0.0005, help="Learning rate. Default: 0.0005", ) parser.add_argument( "--train-batch-size", type=int, default=512, help="Batch size for training. Default: 512", ) parser.add_argument( "--eval-batch-size", type=int, default=1024, help="Batch size during evaluation. Default: 1024", ) parser.add_argument( "--early-stop", type=int, default=0, help="0 means no early stop, otherwise stop at the input-th step", ) parser.add_argument( "--exclude-edges", type=int, default=1, help="Whether to exclude reverse edges during sampling. Default: 1", ) parser.add_argument( "--compare-graphbolt", action="store_true", help="Compare with GraphBolt", ) parser.add_argument( "--mode", default="mixed", choices=["cpu", "mixed", "puregpu"], help="Training mode. 'cpu' for CPU training, 'mixed' for CPU-GPU mixed " "training, 'puregpu' for pure-GPU training.", ) return parser.parse_args() def main(args): if not torch.cuda.is_available(): args.mode = "cpu" print(f"Training in {args.mode} mode.") # Load and preprocess dataset. print("Loading data") dataset = DglLinkPropPredDataset("ogbl-citation2") g = dataset[0] if args.compare_graphbolt: fused_sampling = False else: fused_sampling = True g = g.to("cuda" if args.mode == "puregpu" else "cpu") # Whether use Unified Virtual Addressing (UVA) for CUDA computation. use_uva = args.mode == "mixed" device = torch.device("cpu" if args.mode == "cpu" else "cuda") # Convert the graph to its bidirectional form. g, reverse_eids = to_bidirected_with_reverse_mapping(g) reverse_eids = reverse_eids.to(g.device) seed_edges = torch.arange(g.num_edges()).to(g.device) edge_split = dataset.get_edge_split() # Create GraphSAGE model. in_size = g.ndata["feat"].shape[1] model = SAGE(in_size, 256).to(device) # Model training. print("Training...") train( args, device, g, reverse_eids, seed_edges, model, use_uva, fused_sampling, ) # Validate/Test the model. print("Validation/Testing...") valid_mrr, test_mrr = evaluate( device, g, edge_split, model, batch_size=args.eval_batch_size ) print( f"Validation MRR {valid_mrr.item():.4f}, Test MRR {test_mrr.item():.4f}" ) if __name__ == "__main__": args = parse_args() main(args) ================================================ FILE: examples/legacy/node_classification.py ================================================ """ This script trains and tests a GraphSAGE model for node classification on large graphs using efficient neighbor sampling. Paper: [Inductive Representation Learning on Large Graphs] (https://arxiv.org/abs/1706.02216) Before reading this example, please familiar yourself with graphsage node classification by reading the example in the `examples/core/graphsage/node_classification.py` If you want to train graphsage on a large graph in a distributed fashion, read the example in the `examples/distributed/graphsage/`. This flowchart describes the main functional sequence of the provided example. main │ ├───> Load and preprocess dataset │ ├───> Instantiate SAGE model │ ├───> train │ │ │ ├───> NeighborSampler (HIGHLIGHT) │ │ │ └───> Training loop │ │ │ └───> SAGE.forward │ └───> layerwise_infer │ └───> SAGE.inference │ └───> MultiLayerFullNeighborSampler (HIGHLIGHT) """ import argparse import time import dgl import dgl.nn as dglnn import torch import torch.nn as nn import torch.nn.functional as F import torchmetrics.functional as MF import tqdm from dgl.data import AsNodePredDataset from dgl.dataloading import ( DataLoader, MultiLayerFullNeighborSampler, NeighborSampler, ) from ogb.nodeproppred import DglNodePropPredDataset class SAGE(nn.Module): def __init__(self, in_size, hidden_size, out_size): super().__init__() self.layers = nn.ModuleList() # Three-layer GraphSAGE-mean. self.layers.append(dglnn.SAGEConv(in_size, hidden_size, "mean")) self.layers.append(dglnn.SAGEConv(hidden_size, hidden_size, "mean")) self.layers.append(dglnn.SAGEConv(hidden_size, out_size, "mean")) self.dropout = nn.Dropout(0.5) self.hidden_size = hidden_size self.out_size = out_size def forward(self, blocks, x): hidden_x = x for layer_idx, (layer, block) in enumerate(zip(self.layers, blocks)): hidden_x = layer(block, hidden_x) is_last_layer = layer_idx == len(self.layers) - 1 if not is_last_layer: hidden_x = F.relu(hidden_x) hidden_x = self.dropout(hidden_x) return hidden_x def inference(self, g, device, batch_size, fused_sampling: bool = True): """Conduct layer-wise inference to get all the node embeddings.""" feat = g.ndata["feat"] ##################################################################### # (HIGHLIGHT) Creating a MultiLayerFullNeighborSampler instance. # This sampler is used in the Graph Neural Networks (GNN) training # process to provide neighbor sampling, which is crucial for # efficient training of GNN on large graphs. # # The first argument '1' indicates the number of layers for # the neighbor sampling. In this case, it's set to 1, meaning # only the direct neighbors of each node will be included in the # sampling. # # The 'prefetch_node_feats' parameter specifies the node features # that need to be pre-fetched during sampling. In this case, the # feature named 'feat' will be pre-fetched. # # `prefetch` in DGL initiates data fetching operations in parallel # with model computations. This ensures data is ready when the # computation needs it, thereby eliminating waiting times between # fetching and computing steps and reducing the I/O overhead during # the training process. # # The difference between whether to use prefetch or not is shown: # # Without Prefetch: # Fetch1 ──> Compute1 ──> Fetch2 ──> Compute2 ──> Fetch3 ──> Compute3 # # With Prefetch: # Fetch1 ──> Fetch2 ──> Fetch3 # │ │ │ # └─Compute1 └─Compute2 └─Compute3 ##################################################################### sampler = MultiLayerFullNeighborSampler( 1, prefetch_node_feats=["feat"], fused=fused_sampling ) dataloader = DataLoader( g, torch.arange(g.num_nodes()).to(g.device), sampler, device=device, batch_size=batch_size, shuffle=False, drop_last=False, num_workers=0, ) buffer_device = torch.device("cpu") # Enable pin_memory for faster CPU to GPU data transfer if the # model is running on a GPU. pin_memory = buffer_device != device for layer_idx, layer in enumerate(self.layers): is_last_layer = layer_idx == len(self.layers) - 1 y = torch.empty( g.num_nodes(), self.out_size if is_last_layer else self.hidden_size, device=buffer_device, pin_memory=pin_memory, ) feat = feat.to(device) for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader): x = feat[input_nodes] hidden_x = layer(blocks[0], x) # len(blocks) = 1 if layer_idx != len(self.layers) - 1: hidden_x = F.relu(hidden_x) hidden_x = self.dropout(hidden_x) # By design, our output nodes are contiguous. y[output_nodes[0] : output_nodes[-1] + 1] = hidden_x.to( buffer_device ) feat = y return y @torch.no_grad() def evaluate(model, graph, dataloader, num_classes): model.eval() ys = [] y_hats = [] for it, (input_nodes, output_nodes, blocks) in enumerate(dataloader): x = blocks[0].srcdata["feat"] ys.append(blocks[-1].dstdata["label"]) y_hats.append(model(blocks, x)) return MF.accuracy( torch.cat(y_hats), torch.cat(ys), task="multiclass", num_classes=num_classes, ) @torch.no_grad() def layerwise_infer( device, graph, nid, model, num_classes, batch_size, fused_sampling ): model.eval() pred = model.inference( graph, device, batch_size, fused_sampling ) # pred in buffer_device. pred = pred[nid] label = graph.ndata["label"][nid].to(pred.device) return MF.accuracy(pred, label, task="multiclass", num_classes=num_classes) def train(device, g, dataset, model, num_classes, use_uva, fused_sampling): # Create sampler & dataloader. train_idx = dataset.train_idx.to(g.device if not use_uva else device) val_idx = dataset.val_idx.to(g.device if not use_uva else device) ##################################################################### # (HIGHLIGHT) Instantiate a NeighborSampler object for efficient # training of Graph Neural Networks (GNNs) on large-scale graphs. # # The argument [10, 10, 10] sets the number of neighbors (fanout) # to be sampled at each layer. Here, we have three layers, and # 10 neighbors will be randomly selected for each node at each # layer. # # The 'prefetch_node_feats' and 'prefetch_labels' parameters # specify the node features and labels that need to be pre-fetched # during sampling. More details about `prefetch` can be found in the # `SAGE.inference` function. ##################################################################### sampler = NeighborSampler( [10, 10, 10], # fanout for [layer-0, layer-1, layer-2] prefetch_node_feats=["feat"], prefetch_labels=["label"], fused=fused_sampling, ) train_dataloader = DataLoader( g, train_idx, sampler, device=device, batch_size=1024, shuffle=True, drop_last=False, # If `g` is on gpu or `use_uva` is True, `num_workers` must be zero, # otherwise it will cause error. num_workers=0, use_uva=use_uva, ) val_dataloader = DataLoader( g, val_idx, sampler, device=device, batch_size=1024, # No need to shuffle for validation. shuffle=False, drop_last=False, num_workers=0, use_uva=use_uva, ) opt = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=5e-4) for epoch in range(10): t0 = time.time() model.train() total_loss = 0 # A block is a graph consisting of two sets of nodes: the # source nodes and destination nodes. The source and destination # nodes can have multiple node types. All the edges connect from # source nodes to destination nodes. # For more details: https://discuss.dgl.ai/t/what-is-the-block/2932. for it, (input_nodes, output_nodes, blocks) in enumerate( train_dataloader ): # The input features from the source nodes in the first layer's # computation graph. x = blocks[0].srcdata["feat"] # The ground truth labels from the destination nodes # in the last layer's computation graph. y = blocks[-1].dstdata["label"] y_hat = model(blocks, x) loss = F.cross_entropy(y_hat, y) opt.zero_grad() loss.backward() opt.step() total_loss += loss.item() t1 = time.time() acc = evaluate(model, g, val_dataloader, num_classes) print( f"Epoch {epoch:05d} | Loss {total_loss / (it + 1):.4f} | " f"Accuracy {acc.item():.4f} | Time {t1 - t0:.4f}" ) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--mode", default="mixed", choices=["cpu", "mixed", "gpu"], help="Training mode. 'cpu' for CPU training, 'mixed' for " "CPU-GPU mixed training, 'gpu' for pure-GPU training.", ) parser.add_argument( "--compare-to-graphbolt", default="false", choices=["false", "true"], help="Whether comparing to GraphBolt or not, 'false' by default.", ) args = parser.parse_args() if not torch.cuda.is_available(): args.mode = "cpu" print(f"Training in {args.mode} mode.") # Load and preprocess dataset. print("Loading data") dataset = AsNodePredDataset(DglNodePropPredDataset("ogbn-products")) g = dataset[0] if args.compare_to_graphbolt == "false": g = g.to("cuda" if args.mode == "gpu" else "cpu") num_classes = dataset.num_classes # Whether use Unified Virtual Addressing (UVA) for CUDA computation. use_uva = args.mode == "mixed" device = torch.device("cpu" if args.mode == "cpu" else "cuda") fused_sampling = args.compare_to_graphbolt == "false" # Create GraphSAGE model. in_size = g.ndata["feat"].shape[1] out_size = dataset.num_classes model = SAGE(in_size, 256, out_size).to(device) # Model training. print("Training...") train(device, g, dataset, model, num_classes, use_uva, fused_sampling) # Test the model. print("Testing...") acc = layerwise_infer( device, g, dataset.test_idx, model, num_classes, batch_size=4096, fused_sampling=fused_sampling, ) print(f"Test accuracy {acc.item():.4f}") ================================================ FILE: examples/multigpu/README.md ================================================ # Multiple GPU Training ## Requirements ```bash pip install torchmetrics==0.11.4 ``` ## How to run ### Node classification Run with following (available dataset: "ogbn-products", "ogbn-arxiv") ```bash python3 node_classification_sage.py --dataset_name ogbn-products ``` #### __Results__ with default arguments ``` * Test Accuracy of "ogbn-products": ~0.7716 * Test Accuracy of "ogbn-arxiv": ~0.6994 ``` ================================================ FILE: examples/multigpu/graphbolt/README.md ================================================ # Multi-gpu training with GraphBolt data loader ## How to run ```bash python node_classification.py --gpu=0,1 ``` ================================================ FILE: examples/multigpu/graphbolt/node_classification.py ================================================ """ This script trains and tests a GraphSAGE model for node classification on multiple GPUs using distributed data-parallel training (DDP) and GraphBolt data loader. Before reading this example, please familiar yourself with graphsage node classification using GtaphBolt data loader by reading the example in the `examples/graphbolt/node_classification.py`. For the usage of DDP provided by PyTorch, please read its documentation: https://pytorch.org/tutorials/beginner/dist_overview.html and https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParal lel.html This flowchart describes the main functional sequence of the provided example: main │ ├───> OnDiskDataset pre-processing │ └───> run (multiprocessing) │ ├───> Init process group and build distributed SAGE model (HIGHLIGHT) │ ├───> train │ │ │ ├───> Get GraphBolt dataloader with DistributedItemSampler │ │ (HIGHLIGHT) │ │ │ └───> Training loop │ │ │ ├───> SAGE.forward │ │ │ ├───> Validation set evaluation │ │ │ └───> Collect accuracy and loss from all ranks (HIGHLIGHT) │ └───> Test set evaluation """ import argparse import os import time import dgl.graphbolt as gb import dgl.nn as dglnn import torch import torch.distributed as dist import torch.multiprocessing as mp import torch.nn as nn import torch.nn.functional as F import torchmetrics.functional as MF import tqdm from torch.distributed.algorithms.join import Join from torch.nn.parallel import DistributedDataParallel as DDP class SAGE(nn.Module): def __init__(self, in_size, hidden_size, out_size): super().__init__() self.layers = nn.ModuleList() # Three-layer GraphSAGE-mean. self.layers.append(dglnn.SAGEConv(in_size, hidden_size, "mean")) self.layers.append(dglnn.SAGEConv(hidden_size, hidden_size, "mean")) self.layers.append(dglnn.SAGEConv(hidden_size, out_size, "mean")) self.dropout = nn.Dropout(0.5) self.hidden_size = hidden_size self.out_size = out_size # Set the dtype for the layers manually. self.set_layer_dtype(torch.float32) def set_layer_dtype(self, dtype): for layer in self.layers: for param in layer.parameters(): param.data = param.data.to(dtype) def forward(self, blocks, x): hidden_x = x for layer_idx, (layer, block) in enumerate(zip(self.layers, blocks)): hidden_x = layer(block, hidden_x) is_last_layer = layer_idx == len(self.layers) - 1 if not is_last_layer: hidden_x = F.relu(hidden_x) hidden_x = self.dropout(hidden_x) return hidden_x def create_dataloader( args, graph, features, itemset, device, is_train, ): ############################################################################ # [HIGHLIGHT] # Get a GraphBolt dataloader for node classification tasks with multi-gpu # distributed training. DistributedItemSampler instead of ItemSampler should # be used. ############################################################################ ############################################################################ # [Note]: # gb.DistributedItemSampler() # [Input]: # 'item_set': The current dataset. (e.g. `train_set` or `valid_set`) # 'batch_size': Specifies the number of samples to be processed together, # referred to as a 'mini-batch'. (The term 'mini-batch' is used here to # indicate a subset of the entire dataset that is processed together. This # is in contrast to processing the entire dataset, known as a 'full batch'.) # 'drop_last': Determines whether the last non-full minibatch should be # dropped. # 'shuffle': Determines if the items should be shuffled. # 'num_replicas': Specifies the number of replicas. # 'drop_uneven_inputs': Determines whether the numbers of minibatches on all # ranks should be kept the same by dropping uneven minibatches. # [Output]: # An DistributedItemSampler object for handling mini-batch sampling on # multiple replicas. ############################################################################ datapipe = gb.DistributedItemSampler( item_set=itemset, batch_size=args.batch_size, drop_last=is_train, shuffle=is_train, drop_uneven_inputs=is_train, ) ############################################################################ # [Note]: # datapipe.copy_to() / gb.CopyTo() # [Input]: # 'device': The specified device that data should be copied to. # [Output]: # A CopyTo object copying data in the datapipe to a specified device.\ ############################################################################ if args.storage_device != "cpu": datapipe = datapipe.copy_to(device) datapipe = datapipe.sample_neighbor( graph, args.fanout, overlap_fetch=args.storage_device == "pinned", asynchronous=args.storage_device != "cpu", ) datapipe = datapipe.fetch_feature(features, node_feature_keys=["feat"]) if args.storage_device == "cpu": datapipe = datapipe.copy_to(device) dataloader = gb.DataLoader(datapipe, args.num_workers) # Return the fully-initialized DataLoader object. return dataloader def weighted_reduce(tensor, weight, dst=0): ######################################################################## # (HIGHLIGHT) Collect accuracy and loss values from sub-processes and # obtain overall average values. # # `torch.distributed.reduce` is used to reduce tensors from all the # sub-processes to a specified process, ReduceOp.SUM is used by default. # # Because the GPUs may have differing numbers of processed items, we # perform a weighted mean to calculate the exact loss and accuracy. ######################################################################## dist.reduce(tensor=tensor, dst=dst) weight = torch.tensor(weight, device=tensor.device) dist.reduce(tensor=weight, dst=dst) return tensor / weight @torch.no_grad() def evaluate(rank, model, dataloader, num_classes, device): model.eval() y = [] y_hats = [] for data in tqdm.tqdm(dataloader) if rank == 0 else dataloader: blocks = data.blocks x = data.node_features["feat"] y.append(data.labels) y_hats.append(model.module(blocks, x)) res = MF.accuracy( torch.cat(y_hats), torch.cat(y), task="multiclass", num_classes=num_classes, ) return res.to(device), sum(y_i.size(0) for y_i in y) def train( rank, args, train_dataloader, valid_dataloader, num_classes, model, device, ): optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) for epoch in range(args.epochs): epoch_start = time.time() model.train() total_loss = torch.tensor(0, dtype=torch.float, device=device) num_train_items = 0 ######################################################################## # (HIGHLIGHT) Use Join Context Manager to solve uneven input problem. # # The mechanics of Distributed Data Parallel (DDP) training in PyTorch # requires the number of inputs are the same for all ranks, otherwise # the program may error or hang. To solve it, PyTorch provides Join # Context Manager. Please refer to # https://pytorch.org/tutorials/advanced/generic_join.html for detailed # information. # # Another method is to set `drop_uneven_inputs` as True in GraphBolt's # DistributedItemSampler, which will solve this problem by dropping # uneven inputs. ######################################################################## with Join([model]): for data in ( tqdm.tqdm(train_dataloader) if rank == 0 else train_dataloader ): # The input features are from the source nodes in the first # layer's computation graph. x = data.node_features["feat"] # The ground truth labels are from the destination nodes # in the last layer's computation graph. y = data.labels blocks = data.blocks y_hat = model(blocks, x) # Compute loss. loss = F.cross_entropy(y_hat, y) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.detach() * y.size(0) num_train_items += y.size(0) # Evaluate the model. if rank == 0: print("Validating...") acc, num_val_items = evaluate( rank, model, valid_dataloader, num_classes, device, ) total_loss = weighted_reduce(total_loss, num_train_items) acc = weighted_reduce(acc * num_val_items, num_val_items) # We synchronize before measuring the epoch time. torch.cuda.synchronize() epoch_end = time.time() if rank == 0: print( f"Epoch {epoch:05d} | " f"Average Loss {total_loss.item():.4f} | " f"Accuracy {acc.item():.4f} | " f"Time {epoch_end - epoch_start:.4f}" ) def run(rank, world_size, args, devices, dataset): # Set up multiprocessing environment. device = devices[rank] torch.cuda.set_device(device) dist.init_process_group( backend="nccl", # Use NCCL backend for distributed GPU training init_method="tcp://127.0.0.1:12345", world_size=world_size, rank=rank, ) # Pin the graph and features to enable GPU access. if args.storage_device == "pinned": graph = dataset.graph.pin_memory_() feature = dataset.feature.pin_memory_() else: graph = dataset.graph.to(args.storage_device) feature = dataset.feature.to(args.storage_device) train_set = dataset.tasks[0].train_set valid_set = dataset.tasks[0].validation_set test_set = dataset.tasks[0].test_set args.fanout = list(map(int, args.fanout.split(","))) num_classes = dataset.tasks[0].metadata["num_classes"] in_size = feature.size("node", None, "feat")[0] hidden_size = 256 out_size = num_classes if args.gpu_cache_size > 0 and args.storage_device != "cuda": feature[("node", None, "feat")] = gb.gpu_cached_feature( feature[("node", None, "feat")], args.gpu_cache_size, ) # Create GraphSAGE model. It should be copied onto a GPU as a replica. model = SAGE(in_size, hidden_size, out_size).to(device) model = DDP(model) # Create data loaders. train_dataloader = create_dataloader( args, graph, feature, train_set, device, is_train=True, ) valid_dataloader = create_dataloader( args, graph, feature, valid_set, device, is_train=False, ) test_dataloader = create_dataloader( args, graph, feature, test_set, device, is_train=False, ) # Model training. if rank == 0: print("Training...") train( rank, args, train_dataloader, valid_dataloader, num_classes, model, device, ) # Test the model. if rank == 0: print("Testing...") test_acc, num_test_items = evaluate( rank, model, test_dataloader, num_classes, device, ) test_acc = weighted_reduce(test_acc * num_test_items, num_test_items) if rank == 0: print(f"Test Accuracy {test_acc.item():.4f}") dist.destroy_process_group() def parse_args(): parser = argparse.ArgumentParser( description="A script does a multi-gpu training on a GraphSAGE model " "for node classification using GraphBolt dataloader." ) parser.add_argument( "--gpu", type=str, default="0", help="GPU(s) in use. Can be a list of gpu ids for multi-gpu training," " e.g., 0,1,2,3.", ) parser.add_argument( "--epochs", type=int, default=10, help="Number of training epochs." ) parser.add_argument( "--lr", type=float, default=0.001, help="Learning rate for optimization.", ) parser.add_argument( "--batch-size", type=int, default=1024, help="Batch size for training." ) parser.add_argument( "--fanout", type=str, default="10,10,10", help="Fan-out of neighbor sampling. It is IMPORTANT to keep len(fanout)" " identical with the number of layers in your model. Default: 10,10,10", ) parser.add_argument( "--num-workers", type=int, default=0, help="The number of processes." ) parser.add_argument( "--gpu-cache-size", type=int, default=0, help="The capacity of the GPU cache in bytes.", ) parser.add_argument( "--dataset", type=str, default="ogbn-products", choices=["ogbn-arxiv", "ogbn-products", "ogbn-papers100M"], help="The dataset we can use for node classification example. Currently" " ogbn-products, ogbn-arxiv, ogbn-papers100M datasets are supported.", ) parser.add_argument( "--mode", default="pinned-cuda", choices=["cpu-cuda", "pinned-cuda", "cuda-cuda"], help="Dataset storage placement and Train device: 'cpu' for CPU and RAM" ", 'pinned' for pinned memory in RAM, 'cuda' for GPU and GPU memory.", ) return parser.parse_args() if __name__ == "__main__": args = parse_args() if not torch.cuda.is_available(): print(f"Multi-gpu training needs to be in gpu mode.") exit(0) args.storage_device, _ = args.mode.split("-") devices = list(map(int, args.gpu.split(","))) world_size = len(devices) print(f"Training with {world_size} gpus.") # Load and preprocess dataset. dataset = gb.BuiltinDataset(args.dataset).load() # Thread limiting to avoid resource competition. os.environ["OMP_NUM_THREADS"] = str(mp.cpu_count() // 2 // world_size) mp.set_sharing_strategy("file_system") mp.spawn( run, args=(world_size, args, devices, dataset), nprocs=world_size, join=True, ) ================================================ FILE: examples/multigpu/node_classification_sage.py ================================================ """ This script trains and tests a GraphSAGE model for node classification on multiple GPUs with distributed data-parallel training (DDP). Before reading this example, please familiar yourself with graphsage node classification using neighbor sampling by reading the example in the `examples/sampling/node_classification.py` This flowchart describes the main functional sequence of the provided example. main │ ├───> Load and preprocess dataset │ └───> run (multiprocessing) │ ├───> Init process group and build distributed SAGE model (HIGHLIGHT) │ ├───> train │ │ │ ├───> NeighborSampler │ │ │ └───> Training loop │ │ │ ├───> SAGE.forward │ │ │ └───> Collect validation accuracy (HIGHLIGHT) │ └───> layerwise_infer │ └───> SAGE.inference │ ├───> MultiLayerFullNeighborSampler │ └───> Use a shared output tensor """ import argparse import os import time import dgl import dgl.nn as dglnn import torch import torch.distributed as dist import torch.multiprocessing as mp import torch.nn as nn import torch.nn.functional as F import torchmetrics.functional as MF import tqdm from dgl.data import AsNodePredDataset from dgl.dataloading import ( DataLoader, MultiLayerFullNeighborSampler, NeighborSampler, ) from dgl.multiprocessing import shared_tensor from ogb.nodeproppred import DglNodePropPredDataset from torch.nn.parallel import DistributedDataParallel class SAGE(nn.Module): def __init__(self, in_size, hid_size, out_size): super().__init__() self.layers = nn.ModuleList() # Three-layer GraphSAGE-mean self.layers.append(dglnn.SAGEConv(in_size, hid_size, "mean")) self.layers.append(dglnn.SAGEConv(hid_size, hid_size, "mean")) self.layers.append(dglnn.SAGEConv(hid_size, out_size, "mean")) self.dropout = nn.Dropout(0.5) self.hid_size = hid_size self.out_size = out_size def forward(self, blocks, x): h = x for l, (layer, block) in enumerate(zip(self.layers, blocks)): h = layer(block, h) if l != len(self.layers) - 1: h = F.relu(h) h = self.dropout(h) return h def inference(self, g, device, batch_size, use_uva): g.ndata["h"] = g.ndata["feat"] sampler = MultiLayerFullNeighborSampler(1, prefetch_node_feats=["h"]) for l, layer in enumerate(self.layers): dataloader = DataLoader( g, torch.arange(g.num_nodes(), device=device), sampler, device=device, batch_size=batch_size, shuffle=False, drop_last=False, num_workers=0, use_ddp=True, # use DDP use_uva=use_uva, ) # In order to prevent running out of GPU memory, allocate a shared # output tensor 'y' in host memory. y = shared_tensor( ( g.num_nodes(), self.hid_size if l != len(self.layers) - 1 else self.out_size, ) ) for input_nodes, output_nodes, blocks in ( tqdm.tqdm(dataloader) if dist.get_rank() == 0 else dataloader ): x = blocks[0].srcdata["h"] h = layer(blocks[0], x) # len(blocks) = 1 if l != len(self.layers) - 1: h = F.relu(h) h = self.dropout(h) # Non_blocking (with pinned memory) to accelerate data transfer y[output_nodes] = h.to(y.device, non_blocking=True) # Use a barrier to make sure all GPUs are done writing to 'y' dist.barrier() g.ndata["h"] = y if use_uva else y.to(device) g.ndata.pop("h") return y def evaluate(device, model, g, num_classes, dataloader): model.eval() ys = [] y_hats = [] for it, (input_nodes, output_nodes, blocks) in enumerate(dataloader): with torch.no_grad(): blocks = [block.to(device) for block in blocks] x = blocks[0].srcdata["feat"] ys.append(blocks[-1].dstdata["label"]) y_hats.append(model(blocks, x)) return MF.accuracy( torch.cat(y_hats), torch.cat(ys), task="multiclass", num_classes=num_classes, ) def layerwise_infer( proc_id, device, g, num_classes, nid, model, use_uva, batch_size=2**10 ): model.eval() with torch.no_grad(): if not use_uva: g = g.to(device) pred = model.module.inference(g, device, batch_size, use_uva) pred = pred[nid] labels = g.ndata["label"][nid].to(pred.device) if proc_id == 0: acc = MF.accuracy( pred, labels, task="multiclass", num_classes=num_classes ) print(f"Test accuracy {acc.item():.4f}") def train( proc_id, nprocs, device, args, g, num_classes, train_idx, val_idx, model, use_uva, ): # Instantiate a neighbor sampler if args.mode == "benchmark": # A work-around to prevent CUDA running error. For more details, please # see https://github.com/dmlc/dgl/issues/6697. sampler = NeighborSampler([10, 10, 10], fused=False) else: sampler = NeighborSampler( [10, 10, 10], prefetch_node_feats=["feat"], prefetch_labels=["label"], ) train_dataloader = DataLoader( g, train_idx, sampler, device=device, batch_size=1024, shuffle=True, drop_last=False, num_workers=args.num_workers, use_ddp=True, # To split the set for each process use_uva=use_uva, ) val_dataloader = DataLoader( g, val_idx, sampler, device=device, batch_size=1024, shuffle=True, drop_last=False, num_workers=args.num_workers, use_ddp=True, use_uva=use_uva, ) opt = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4) for epoch in range(args.num_epochs): t0 = time.time() model.train() total_loss = 0 for it, (input_nodes, output_nodes, blocks) in enumerate( train_dataloader ): x = blocks[0].srcdata["feat"] y = blocks[-1].dstdata["label"].to(torch.int64) y_hat = model(blocks, x) loss = F.cross_entropy(y_hat, y) opt.zero_grad() loss.backward() opt.step() # Gradients are synchronized in DDP total_loss += loss ##################################################################### # (HIGHLIGHT) Collect accuracy values from sub-processes and obtain # overall accuracy. # # `torch.distributed.reduce` is used to reduce tensors from all the # sub-processes to a specified process, ReduceOp.SUM is used by default. # # Other multiprocess functions supported by the backend are also # available. Please refer to # https://pytorch.org/docs/stable/distributed.html # for more information. ##################################################################### acc = ( evaluate(device, model, g, num_classes, val_dataloader).to(device) / nprocs ) t1 = time.time() # Reduce `acc` tensors to process 0. dist.reduce(tensor=acc, dst=0) if proc_id == 0: print( f"Epoch {epoch:05d} | Loss {total_loss / (it + 1):.4f} | " f"Accuracy {acc.item():.4f} | Time {t1 - t0:.4f}" ) def run(proc_id, nprocs, devices, g, data, args): # Find corresponding device for current process. device = devices[proc_id] torch.cuda.set_device(device) ######################################################################### # (HIGHLIGHT) Build a data-parallel distributed GraphSAGE model. # # DDP in PyTorch provides data parallelism across the devices specified # by the `process_group`. Gradients are synchronized across each model # replica. # # To prepare a training sub-process, there are four steps involved: # 1. Initialize the process group # 2. Unpack data for the sub-process. # 3. Instantiate a GraphSAGE model on the corresponding device. # 4. Parallelize the model with `DistributedDataParallel`. # # For the detailed usage of `DistributedDataParallel`, please refer to # PyTorch documentation. ######################################################################### dist.init_process_group( backend="nccl", # Use NCCL backend for distributed GPU training init_method="tcp://127.0.0.1:12345", world_size=nprocs, rank=proc_id, ) num_classes, train_idx, val_idx, test_idx = data if args.mode != "benchmark": train_idx = train_idx.to(device) val_idx = val_idx.to(device) g = g.to(device if args.mode == "puregpu" else "cpu") in_size = g.ndata["feat"].shape[1] model = SAGE(in_size, 256, num_classes).to(device) model = DistributedDataParallel( model, device_ids=[device], output_device=device ) # Training. use_uva = args.mode == "mixed" if proc_id == 0: print("Training...") train( proc_id, nprocs, device, args, g, num_classes, train_idx, val_idx, model, use_uva, ) # Testing. if proc_id == 0: print("Testing...") layerwise_infer(proc_id, device, g, num_classes, test_idx, model, use_uva) # Cleanup the process group. dist.destroy_process_group() if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--mode", default="mixed", choices=["mixed", "puregpu", "benchmark"], help="Training mode. 'mixed' for CPU-GPU mixed training, " "'puregpu' for pure-GPU training.", ) parser.add_argument( "--gpu", type=str, default="0", help="GPU(s) in use. Can be a list of gpu ids for multi-gpu training," " e.g., 0,1,2,3.", ) parser.add_argument( "--num_epochs", type=int, default=10, help="Number of epochs for train.", ) parser.add_argument( "--dataset_name", type=str, default="ogbn-products", help="Dataset name.", ) parser.add_argument( "--dataset_dir", type=str, default="dataset", help="Root directory of dataset.", ) parser.add_argument( "--num_workers", type=int, default=0, help="Number of workers", ) args = parser.parse_args() devices = list(map(int, args.gpu.split(","))) nprocs = len(devices) assert ( torch.cuda.is_available() ), f"Must have GPUs to enable multi-gpu training." print(f"Training in {args.mode} mode using {nprocs} GPU(s)") # Load and preprocess the dataset. print("Loading data") dataset = AsNodePredDataset( DglNodePropPredDataset(args.dataset_name, root=args.dataset_dir) ) g = dataset[0] # Explicitly create desired graph formats before multi-processing to avoid # redundant creation in each sub-process and to save memory. g.create_formats_() if args.dataset_name == "ogbn-arxiv": g = dgl.to_bidirected(g, copy_ndata=True) g = dgl.add_self_loop(g) # Thread limiting to avoid resource competition. os.environ["OMP_NUM_THREADS"] = str(mp.cpu_count() // 2 // nprocs) data = ( dataset.num_classes, dataset.train_idx, dataset.val_idx, dataset.test_idx, ) # To use DDP with n GPUs, spawn up n processes. mp.spawn( run, args=(nprocs, devices, g, data, args), nprocs=nprocs, ) ================================================ FILE: examples/mxnet/README.md ================================================ # Model Examples using DGL (w/ MXNet backend) use `DGLBACKEND=mxnet` to use MXNet as DGL's backend ## Examples: ``` DGLBACKEND=mxnet python gcn_batch.py --dataset cora DGLBACKEND=mxnet python gat_batch.py --dataset cora ``` Each model is hosted in their own folders. Please read their README.md to see how to run them. To understand step-by-step how these models are implemented in DGL. Check out our [tutorials](https://docs.dgl.ai/tutorials/models/index.html) ================================================ FILE: examples/mxnet/appnp/README.md ================================================ Predict then Propagate: Graph Neural Networks meet Personalized PageRank (APPNP) ============ - Paper link: [Predict then Propagate: Graph Neural Networks meet Personalized PageRank](https://arxiv.org/abs/1810.05997) - Author's code repo: [https://github.com/klicperajo/ppnp](https://github.com/klicperajo/ppnp). Dependencies ------------ - MXNET 1.5+ - requests ``bash pip install torch requests `` Code ----- The folder contains an implementation of APPNP (`appnp.py`). Results ------- Run with following (available dataset: "cora", "citeseer", "pubmed") ```bash DGLBACKEND=mxnet python3 appnp.py --dataset cora --gpu 0 ``` * cora: 0.8370 (paper: 0.850) * citeseer: 0.713 (paper: 0.757) * pubmed: 0.798 (paper: 0.797) Experiments were done on dgl datasets (GCN settings) which are different from those used in the original implementation. (discrepancies are detailed in experimental section of the original paper) ================================================ FILE: examples/mxnet/appnp/appnp.py ================================================ import argparse import time import dgl import mxnet as mx import numpy as np from dgl.data import ( CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset, register_data_args, ) from dgl.nn.mxnet.conv import APPNPConv from mxnet import gluon, nd from mxnet.gluon import nn class APPNP(nn.Block): def __init__( self, g, in_feats, hiddens, n_classes, activation, feat_drop, edge_drop, alpha, k, ): super(APPNP, self).__init__() self.g = g with self.name_scope(): self.layers = nn.Sequential() # input layer self.layers.add(nn.Dense(hiddens[0], in_units=in_feats)) # hidden layers for i in range(1, len(hiddens)): self.layers.add(nn.Dense(hiddens[i], in_units=hiddens[i - 1])) # output layer self.layers.add(nn.Dense(n_classes, in_units=hiddens[-1])) self.activation = activation if feat_drop: self.feat_drop = nn.Dropout(feat_drop) else: self.feat_drop = lambda x: x self.propagate = APPNPConv(k, alpha, edge_drop) def forward(self, features): # prediction step h = features h = self.feat_drop(h) h = self.activation(self.layers[0](h)) for layer in self.layers[1:-1]: h = self.activation(layer(h)) h = self.layers[-1](self.feat_drop(h)) # propagation step h = self.propagate(self.g, h) return h def evaluate(model, features, labels, mask): pred = model(features).argmax(axis=1) accuracy = ((pred == labels) * mask).sum() / mask.sum().asscalar() return accuracy.asscalar() def main(args): # load and preprocess dataset if args.dataset == "cora": data = CoraGraphDataset() elif args.dataset == "citeseer": data = CiteseerGraphDataset() elif args.dataset == "pubmed": data = PubmedGraphDataset() else: raise ValueError("Unknown dataset: {}".format(args.dataset)) g = data[0] if args.gpu < 0: cuda = False ctx = mx.cpu(0) else: cuda = True ctx = mx.gpu(args.gpu) g = g.to(ctx) features = g.ndata["feat"] labels = mx.nd.array(g.ndata["label"], dtype="float32", ctx=ctx) train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] in_feats = features.shape[1] n_classes = data.num_classes n_edges = data.graph.number_of_edges() print( """----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % ( n_edges, n_classes, train_mask.sum().asscalar(), val_mask.sum().asscalar(), test_mask.sum().asscalar(), ) ) # add self loop g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) # create APPNP model model = APPNP( g, in_feats, args.hidden_sizes, n_classes, nd.relu, args.in_drop, args.edge_drop, args.alpha, args.k, ) model.initialize(ctx=ctx) n_train_samples = train_mask.sum().asscalar() loss_fcn = gluon.loss.SoftmaxCELoss() # use optimizer print(model.collect_params()) trainer = gluon.Trainer( model.collect_params(), "adam", {"learning_rate": args.lr, "wd": args.weight_decay}, ) # initialize graph dur = [] for epoch in range(args.n_epochs): if epoch >= 3: t0 = time.time() # forward with mx.autograd.record(): pred = model(features) loss = loss_fcn(pred, labels, mx.nd.expand_dims(train_mask, 1)) loss = loss.sum() / n_train_samples loss.backward() trainer.step(batch_size=1) if epoch >= 3: loss.asscalar() dur.append(time.time() - t0) acc = evaluate(model, features, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format( epoch, np.mean(dur), loss.asscalar(), acc, n_edges / np.mean(dur) / 1000, ) ) # test set accuracy acc = evaluate(model, features, labels, test_mask) print("Test accuracy {:.2%}".format(acc)) if __name__ == "__main__": parser = argparse.ArgumentParser(description="APPNP") register_data_args(parser) parser.add_argument( "--in-drop", type=float, default=0.5, help="input feature dropout" ) parser.add_argument( "--edge-drop", type=float, default=0.5, help="edge propagation dropout" ) parser.add_argument("--gpu", type=int, default=-1, help="gpu") parser.add_argument("--lr", type=float, default=1e-2, help="learning rate") parser.add_argument( "--n-epochs", type=int, default=200, help="number of training epochs" ) parser.add_argument( "--hidden_sizes", type=int, nargs="+", default=[64], help="hidden unit sizes for appnp", ) parser.add_argument( "--k", type=int, default=10, help="Number of propagation steps" ) parser.add_argument( "--alpha", type=float, default=0.1, help="Teleport Probability" ) parser.add_argument( "--weight-decay", type=float, default=5e-4, help="Weight for L2 loss" ) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/mxnet/gat/README.md ================================================ Graph Attention Networks (GAT) ============ - Paper link: [https://arxiv.org/abs/1710.10903](https://arxiv.org/abs/1710.10903) - Author's code repo: [https://github.com/PetarV-/GAT](https://github.com/PetarV-/GAT). Note that the original code is implemented with Tensorflow for the paper. ### Dependencies * MXNet nightly build * requests ```bash pip install mxnet --pre pip install requests ``` ### Usage (make sure that DGLBACKEND is changed into mxnet) ```bash DGLBACKEND=mxnet python3 train.py --dataset cora --gpu 0 DGLBACKEND=mxnet python3 train.py --dataset citeseer --gpu 0 --early-stop DGLBACKEND=mxnet python3 train.py --dataset pubmed --gpu 0 --early-stop ``` ================================================ FILE: examples/mxnet/gat/gat.py ================================================ """ Graph Attention Networks in DGL using SPMV optimization. References ---------- Paper: https://arxiv.org/abs/1710.10903 Author's code: https://github.com/PetarV-/GAT Pytorch implementation: https://github.com/Diego999/pyGAT """ import mxnet.gluon.nn as nn from dgl.nn.mxnet.conv import GATConv class GAT(nn.Block): def __init__( self, g, num_layers, in_dim, num_hidden, num_classes, heads, activation, feat_drop, attn_drop, alpha, residual, ): super(GAT, self).__init__() self.g = g self.num_layers = num_layers self.gat_layers = [] self.activation = activation # input projection (no residual) self.gat_layers.append( GATConv( in_dim, num_hidden, heads[0], feat_drop, attn_drop, alpha, False ) ) # hidden layers for l in range(1, num_layers): # due to multi-head, the in_dim = num_hidden * num_heads self.gat_layers.append( GATConv( num_hidden * heads[l - 1], num_hidden, heads[l], feat_drop, attn_drop, alpha, residual, ) ) # output projection self.gat_layers.append( GATConv( num_hidden * heads[-2], num_classes, heads[-1], feat_drop, attn_drop, alpha, residual, ) ) for i, layer in enumerate(self.gat_layers): self.register_child(layer, "gat_layer_{}".format(i)) def forward(self, inputs): h = inputs for l in range(self.num_layers): h = self.gat_layers[l](self.g, h).flatten() h = self.activation(h) # output projection logits = self.gat_layers[-1](self.g, h).mean(1) return logits ================================================ FILE: examples/mxnet/gat/train.py ================================================ """ Graph Attention Networks in DGL using SPMV optimization. Multiple heads are also batched together for faster training. References ---------- Paper: https://arxiv.org/abs/1710.10903 Author's code: https://github.com/PetarV-/GAT Pytorch implementation: https://github.com/Diego999/pyGAT """ import argparse import time import dgl import mxnet as mx import networkx as nx import numpy as np from dgl.data import ( CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset, register_data_args, ) from gat import GAT from mxnet import gluon from utils import EarlyStopping def elu(data): return mx.nd.LeakyReLU(data, act_type="elu") def evaluate(model, features, labels, mask): logits = model(features) logits = logits[mask].asnumpy().squeeze() val_labels = labels[mask].asnumpy().squeeze() max_index = np.argmax(logits, axis=1) accuracy = np.sum(np.where(max_index == val_labels, 1, 0)) / len(val_labels) return accuracy def main(args): # load and preprocess dataset if args.dataset == "cora": data = CoraGraphDataset() elif args.dataset == "citeseer": data = CiteseerGraphDataset() elif args.dataset == "pubmed": data = PubmedGraphDataset() else: raise ValueError("Unknown dataset: {}".format(args.dataset)) g = data[0] if args.gpu < 0: cuda = False ctx = mx.cpu(0) else: cuda = True ctx = mx.gpu(args.gpu) g = g.to(ctx) features = g.ndata["feat"] labels = mx.nd.array(g.ndata["label"], dtype="float32", ctx=ctx) mask = g.ndata["train_mask"] mask = mx.nd.array(np.nonzero(mask.asnumpy())[0], ctx=ctx) val_mask = g.ndata["val_mask"] val_mask = mx.nd.array(np.nonzero(val_mask.asnumpy())[0], ctx=ctx) test_mask = g.ndata["test_mask"] test_mask = mx.nd.array(np.nonzero(test_mask.asnumpy())[0], ctx=ctx) in_feats = features.shape[1] n_classes = data.num_classes n_edges = data.graph.number_of_edges() g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = GAT( g, args.num_layers, in_feats, args.num_hidden, n_classes, heads, elu, args.in_drop, args.attn_drop, args.alpha, args.residual, ) if args.early_stop: stopper = EarlyStopping(patience=100) model.initialize(ctx=ctx) # use optimizer trainer = gluon.Trainer( model.collect_params(), "adam", {"learning_rate": args.lr} ) dur = [] for epoch in range(args.epochs): if epoch >= 3: t0 = time.time() # forward with mx.autograd.record(): logits = model(features) loss = mx.nd.softmax_cross_entropy( logits[mask].squeeze(), labels[mask].squeeze() ) loss.backward() trainer.step(mask.shape[0]) if epoch >= 3: dur.append(time.time() - t0) print( "Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | ETputs(KTEPS) {:.2f}".format( epoch, loss.asnumpy()[0], np.mean(dur), n_edges / np.mean(dur) / 1000, ) ) val_accuracy = evaluate(model, features, labels, val_mask) print("Validation Accuracy {:.4f}".format(val_accuracy)) if args.early_stop: if stopper.step(val_accuracy, model): break print() if args.early_stop: model.load_parameters("model.param") test_accuracy = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(test_accuracy)) if __name__ == "__main__": parser = argparse.ArgumentParser(description="GAT") register_data_args(parser) parser.add_argument( "--gpu", type=int, default=-1, help="which GPU to use. Set -1 to use CPU.", ) parser.add_argument( "--epochs", type=int, default=200, help="number of training epochs" ) parser.add_argument( "--num-heads", type=int, default=8, help="number of hidden attention heads", ) parser.add_argument( "--num-out-heads", type=int, default=1, help="number of output attention heads", ) parser.add_argument( "--num-layers", type=int, default=1, help="number of hidden layers" ) parser.add_argument( "--num-hidden", type=int, default=8, help="number of hidden units" ) parser.add_argument( "--residual", action="store_true", default=False, help="use residual connection", ) parser.add_argument( "--in-drop", type=float, default=0.6, help="input feature dropout" ) parser.add_argument( "--attn-drop", type=float, default=0.6, help="attention dropout" ) parser.add_argument("--lr", type=float, default=0.005, help="learning rate") parser.add_argument( "--weight-decay", type=float, default=5e-4, help="weight decay" ) parser.add_argument( "--alpha", type=float, default=0.2, help="the negative slop of leaky relu", ) parser.add_argument( "--early-stop", action="store_true", default=False, help="indicates whether to use early stop or not", ) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/mxnet/gat/utils.py ================================================ import numpy as np class EarlyStopping: def __init__(self, patience=10): self.patience = patience self.counter = 0 self.best_score = None self.early_stop = False def step(self, acc, model): score = acc if self.best_score is None: self.best_score = score self.save_checkpoint(model) elif score < self.best_score: self.counter += 1 print( f"EarlyStopping counter: {self.counter} out of {self.patience}" ) if self.counter >= self.patience: self.early_stop = True else: self.best_score = score self.save_checkpoint(model) self.counter = 0 return self.early_stop def save_checkpoint(self, model): """Saves model when validation loss decrease.""" model.save_parameters("model.param") ================================================ FILE: examples/mxnet/gcn/README.md ================================================ Graph Convolutional Networks (GCN) ============ Paper link: [https://arxiv.org/abs/1609.02907](https://arxiv.org/abs/1609.02907) Author's code repo: [https://github.com/tkipf/gcn](https://github.com/tkipf/gcn) Dependencies ------------ - MXNet nightly build - requests ``bash pip install mxnet --pre pip install requests `` Codes ----- The folder contains three implementations of GCN: - `gcn.py` uses DGL's predefined graph convolution module. - `gcn_mp.py` uses user-defined message and reduce functions. Modify `train.py` to switch between different implementations. The provided implementation in `gcn_concat.py` is a bit different from the original paper for better performance, credit to @yifeim and @ZiyueHuang. Results ------- Run with following (available dataset: "cora", "citeseer", "pubmed") ```bash DGLBACKEND=mxnet python3 train.py --dataset cora --gpu 0 --self-loop ``` * cora: ~0.810 (paper: 0.815) * citeseer: ~0.702 (paper: 0.703) * pubmed: ~0.780 (paper: 0.790) Results (`gcn_concat.py vs. gcn.py`) ------------------------------------ `gcn_concat.py` uses concatenation of hidden units to account for multi-hop skip-connections. We feel concatenation is superior because all neighboring information is presented without additional modeling assumptions. These results are based on single-run training to minimize the cross-entropy loss. We can see clear skip connection can help train a GCN with many layers. The experiments show that adding depth may or may not improve accuracy. While adding depth is a clear way to mimic power iterations of matrix factorizations, training multiple epochs to obtain stationary points could equivalently solve matrix factorization. Given the small datasets, we can't draw such conclusions from these experiments. ``` # Final accuracy 57.70% MLP without GCN DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_concat.py --dataset "citeseer" --n-epochs 200 --n-layers 0 # Final accuracy 65.70% with 10-layer GCN with skip connection DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_concat.py --dataset "citeseer" --n-epochs 200 --n-layers 2 --normalization 'sym' --self-loop # Final accuracy 64.70% with 10-layer GCN with skip connection DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_concat.py --dataset "citeseer" --n-epochs 200 --n-layers 10 --normalization 'sym' --self-loop ``` ``` # Final accuracy 53.20% MLP without GCN DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_concat.py --dataset "cora" --n-epochs 200 --n-layers 0 # Final accuracy 72.60% with 2-layer GCN with skip connection DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_concat.py --dataset "cora" --n-epochs 200 --n-layers 2 --normalization 'sym' --self-loop # Final accuracy 78.90% with 10-layer GCN with skip connection DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_concat.py --dataset "cora" --n-epochs 200 --n-layers 10 --normalization 'sym' --self-loop ``` ``` # Final accuracy 70.30% MLP without GCN DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_concat.py --dataset "pubmed" --n-epochs 200 --n-layers 0 # Final accuracy 78.30% with 2-layer GCN with skip connection DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_concat.py --dataset "pubmed" --n-epochs 200 --n-layers 2 --normalization 'sym' --self-loop # Final accuracy 76.30% with 10-layer GCN with skip connection DGLBACKEND=mxnet python3 examples/mxnet/gcn/gcn_concat.py --dataset "pubmed" --n-epochs 200 --n-layers 10 --normalization 'sym' --self-loop ``` ================================================ FILE: examples/mxnet/gcn/gcn.py ================================================ """GCN using DGL nn package References: - Semi-Supervised Classification with Graph Convolutional Networks - Paper: https://arxiv.org/abs/1609.02907 - Code: https://github.com/tkipf/gcn """ import dgl import mxnet as mx from dgl.nn.mxnet import GraphConv from mxnet import gluon class GCN(gluon.Block): def __init__( self, g, in_feats, n_hidden, n_classes, n_layers, activation, dropout ): super(GCN, self).__init__() self.g = g self.layers = gluon.nn.Sequential() # input layer self.layers.add(GraphConv(in_feats, n_hidden, activation=activation)) # hidden layers for i in range(n_layers - 1): self.layers.add( GraphConv(n_hidden, n_hidden, activation=activation) ) # output layer self.layers.add(GraphConv(n_hidden, n_classes)) self.dropout = gluon.nn.Dropout(rate=dropout) def forward(self, features): h = features for i, layer in enumerate(self.layers): if i != 0: h = self.dropout(h) h = layer(self.g, h) return h ================================================ FILE: examples/mxnet/gcn/gcn_concat.py ================================================ """ Semi-Supervised Classification with Graph Convolutional Networks Paper: https://arxiv.org/abs/1609.02907 Code: https://github.com/tkipf/gcn GCN with batch processing """ import argparse import time import dgl import dgl.function as fn import mxnet as mx import numpy as np from dgl.data import ( CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset, register_data_args, ) from mxnet import gluon class GCNLayer(gluon.Block): def __init__(self, g, out_feats, activation, dropout): super(GCNLayer, self).__init__() self.g = g self.dense = gluon.nn.Dense(out_feats, activation) self.dropout = dropout def forward(self, h): self.g.ndata["h"] = h * self.g.ndata["out_norm"] self.g.update_all( fn.copy_u(u="h", out="m"), fn.sum(msg="m", out="accum") ) accum = self.g.ndata.pop("accum") accum = self.dense(accum * self.g.ndata["in_norm"]) if self.dropout: accum = mx.nd.Dropout(accum, p=self.dropout) h = self.g.ndata.pop("h") h = mx.nd.concat(h / self.g.ndata["out_norm"], accum, dim=1) return h class GCN(gluon.Block): def __init__(self, g, n_hidden, n_classes, n_layers, activation, dropout): super(GCN, self).__init__() self.inp_layer = gluon.nn.Dense(n_hidden, activation) self.dropout = dropout self.layers = gluon.nn.Sequential() for i in range(n_layers): self.layers.add(GCNLayer(g, n_hidden, activation, dropout)) self.out_layer = gluon.nn.Dense(n_classes) def forward(self, features): emb_inp = [features, self.inp_layer(features)] if self.dropout: emb_inp[-1] = mx.nd.Dropout(emb_inp[-1], p=self.dropout) h = mx.nd.concat(*emb_inp, dim=1) for layer in self.layers: h = layer(h) h = self.out_layer(h) return h def evaluate(model, features, labels, mask): pred = model(features).argmax(axis=1) accuracy = ((pred == labels) * mask).sum() / mask.sum().asscalar() return accuracy.asscalar() def main(args): # load and preprocess dataset if args.dataset == "cora": data = CoraGraphDataset() elif args.dataset == "citeseer": data = CiteseerGraphDataset() elif args.dataset == "pubmed": data = PubmedGraphDataset() else: raise ValueError("Unknown dataset: {}".format(args.dataset)) g = data[0] if args.gpu < 0: cuda = False ctx = mx.cpu(0) else: cuda = True ctx = mx.gpu(args.gpu) g = g.to(ctx) features = g.ndata["feat"] labels = mx.nd.array(g.ndata["label"], dtype="float32", ctx=ctx) train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] in_feats = features.shape[1] n_classes = data.num_classes n_edges = data.graph.number_of_edges() print( """----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % ( n_edges, n_classes, train_mask.sum().asscalar(), val_mask.sum().asscalar(), test_mask.sum().asscalar(), ) ) # add self loop if args.self_loop: g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) # normalization in_degs = g.in_degrees().astype("float32") out_degs = g.out_degrees().astype("float32") in_norm = mx.nd.power(in_degs, -0.5) out_norm = mx.nd.power(out_degs, -0.5) if cuda: in_norm = in_norm.as_in_context(ctx) out_norm = out_norm.as_in_context(ctx) g.ndata["in_norm"] = mx.nd.expand_dims(in_norm, 1) g.ndata["out_norm"] = mx.nd.expand_dims(out_norm, 1) model = GCN( g, args.n_hidden, n_classes, args.n_layers, "relu", args.dropout, ) model.initialize(ctx=ctx) n_train_samples = train_mask.sum().asscalar() loss_fcn = gluon.loss.SoftmaxCELoss() # use optimizer print(model.collect_params()) trainer = gluon.Trainer( model.collect_params(), "adam", {"learning_rate": args.lr, "wd": args.weight_decay}, ) # initialize graph dur = [] for epoch in range(args.n_epochs): if epoch >= 3: t0 = time.time() # forward with mx.autograd.record(): pred = model(features) loss = loss_fcn(pred, labels, mx.nd.expand_dims(train_mask, 1)) loss = loss.sum() / n_train_samples loss.backward() trainer.step(batch_size=1) if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model, features, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format( epoch, np.mean(dur), loss.asscalar(), acc, n_edges / np.mean(dur) / 1000, ) ) # test set accuracy acc = evaluate(model, features, labels, test_mask) print("Test accuracy {:.2%}".format(acc)) if __name__ == "__main__": parser = argparse.ArgumentParser(description="GCN") register_data_args(parser) parser.add_argument( "--dropout", type=float, default=0.5, help="dropout probability" ) parser.add_argument("--gpu", type=int, default=-1, help="gpu") parser.add_argument("--lr", type=float, default=1e-2, help="learning rate") parser.add_argument( "--n-epochs", type=int, default=200, help="number of training epochs" ) parser.add_argument( "--n-hidden", type=int, default=16, help="number of hidden gcn units" ) parser.add_argument( "--n-layers", type=int, default=1, help="number of hidden gcn layers" ) parser.add_argument( "--normalization", choices=["sym", "left"], default=None, help="graph normalization types (default=None)", ) parser.add_argument( "--self-loop", action="store_true", help="graph self-loop (default=False)", ) parser.add_argument( "--weight-decay", type=float, default=5e-4, help="Weight for L2 loss" ) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/mxnet/gcn/gcn_mp.py ================================================ """GCN using basic message passing References: - Semi-Supervised Classification with Graph Convolutional Networks - Paper: https://arxiv.org/abs/1609.02907 - Code: https://github.com/tkipf/gcn """ import mxnet as mx from mxnet import gluon def gcn_msg(edge): msg = edge.src["h"] * edge.src["norm"] return {"m": msg} def gcn_reduce(node): accum = mx.nd.sum(node.mailbox["m"], 1) * node.data["norm"] return {"h": accum} class NodeUpdate(gluon.Block): def __init__(self, out_feats, activation=None, bias=True): super(NodeUpdate, self).__init__() with self.name_scope(): if bias: self.bias = self.params.get( "bias", shape=(out_feats,), init=mx.init.Zero() ) else: self.bias = None self.activation = activation def forward(self, node): h = node.data["h"] if self.bias is not None: h = h + self.bias.data(h.context) if self.activation: h = self.activation(h) return {"h": h} class GCNLayer(gluon.Block): def __init__(self, g, in_feats, out_feats, activation, dropout, bias=True): super(GCNLayer, self).__init__() self.g = g self.dropout = dropout with self.name_scope(): self.weight = self.params.get( "weight", shape=(in_feats, out_feats), init=mx.init.Xavier() ) self.node_update = NodeUpdate(out_feats, activation, bias) def forward(self, h): if self.dropout: h = mx.nd.Dropout(h, p=self.dropout) h = mx.nd.dot(h, self.weight.data(h.context)) self.g.ndata["h"] = h self.g.update_all(gcn_msg, gcn_reduce, self.node_update) h = self.g.ndata.pop("h") return h class GCN(gluon.Block): def __init__( self, g, in_feats, n_hidden, n_classes, n_layers, activation, dropout ): super(GCN, self).__init__() self.layers = gluon.nn.Sequential() # input layer self.layers.add(GCNLayer(g, in_feats, n_hidden, activation, 0)) # hidden layers for i in range(n_layers - 1): self.layers.add( GCNLayer(g, n_hidden, n_hidden, activation, dropout) ) # output layer self.layers.add(GCNLayer(g, n_hidden, n_classes, None, dropout)) def forward(self, features): h = features for layer in self.layers: h = layer(h) return h ================================================ FILE: examples/mxnet/gcn/train.py ================================================ """Training GCN model on citation graphs.""" import argparse import time import dgl import mxnet as mx import numpy as np from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset from gcn import GCN from mxnet import gluon # from gcn_mp import GCN # from gcn_spmv import GCN def evaluate(model, features, labels, mask): pred = model(features).argmax(axis=1) accuracy = ((pred == labels) * mask).sum() / mask.sum().asscalar() return accuracy.asscalar() def main(args): # load and preprocess dataset if args.dataset == "cora": data = CoraGraphDataset() elif args.dataset == "citeseer": data = CiteseerGraphDataset() elif args.dataset == "pubmed": data = PubmedGraphDataset() else: raise ValueError("Unknown dataset: {}".format(args.dataset)) g = data[0] if args.gpu < 0: cuda = False ctx = mx.cpu(0) else: cuda = True ctx = mx.gpu(args.gpu) g = g.int().to(ctx) features = g.ndata["feat"] labels = mx.nd.array(g.ndata["label"], dtype="float32", ctx=ctx) train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] in_feats = features.shape[1] n_classes = data.num_classes n_edges = data.graph.number_of_edges() print( """----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % ( n_edges, n_classes, train_mask.sum().asscalar(), val_mask.sum().asscalar(), test_mask.sum().asscalar(), ) ) # add self loop if args.self_loop: g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) # normalization degs = g.in_degrees().astype("float32") norm = mx.nd.power(degs, -0.5) if cuda: norm = norm.as_in_context(ctx) g.ndata["norm"] = mx.nd.expand_dims(norm, 1) model = GCN( g, in_feats, args.n_hidden, n_classes, args.n_layers, mx.nd.relu, args.dropout, ) model.initialize(ctx=ctx) n_train_samples = train_mask.sum().asscalar() loss_fcn = gluon.loss.SoftmaxCELoss() # use optimizer print(model.collect_params()) trainer = gluon.Trainer( model.collect_params(), "adam", {"learning_rate": args.lr, "wd": args.weight_decay}, ) # initialize graph dur = [] for epoch in range(args.n_epochs): if epoch >= 3: t0 = time.time() # forward with mx.autograd.record(): pred = model(features) loss = loss_fcn(pred, labels, mx.nd.expand_dims(train_mask, 1)) loss = loss.sum() / n_train_samples loss.backward() trainer.step(batch_size=1) if epoch >= 3: loss.asscalar() dur.append(time.time() - t0) acc = evaluate(model, features, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format( epoch, np.mean(dur), loss.asscalar(), acc, n_edges / np.mean(dur) / 1000, ) ) # test set accuracy acc = evaluate(model, features, labels, test_mask) print("Test accuracy {:.2%}".format(acc)) if __name__ == "__main__": parser = argparse.ArgumentParser(description="GCN") parser.add_argument( "--dataset", type=str, default="cora", help="Dataset name ('cora', 'citeseer', 'pubmed').", ) parser.add_argument( "--dropout", type=float, default=0.5, help="dropout probability" ) parser.add_argument("--gpu", type=int, default=-1, help="gpu") parser.add_argument("--lr", type=float, default=3e-2, help="learning rate") parser.add_argument( "--n-epochs", type=int, default=200, help="number of training epochs" ) parser.add_argument( "--n-hidden", type=int, default=16, help="number of hidden gcn units" ) parser.add_argument( "--n-layers", type=int, default=1, help="number of hidden gcn layers" ) parser.add_argument( "--weight-decay", type=float, default=5e-4, help="Weight for L2 loss" ) parser.add_argument( "--self-loop", action="store_true", help="graph self-loop (default=False)", ) parser.set_defaults(self_loop=False) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/mxnet/gin/README.md ================================================ Graph Isomorphism Network (GIN) ============ - Paper link: [arXiv](https://arxiv.org/abs/1810.00826) [OpenReview](https://openreview.net/forum?id=ryGs6iA5Km) - Author's code repo: [https://github.com/weihua916/powerful-gnns](https://github.com/weihua916/powerful-gnns). Dependencies ------------ - MXNet 1.5+ - sklearn - tqdm ``bash pip install torch sklearn tqdm `` How to run ---------- An experiment on the GIN in default settings can be run with ```bash DGLBACKEND=mxnet python main.py ``` An experiment on the GIN in customized settings can be run with ```bash DGLBACKEND=mxnet python main.py [--device 0 | --disable-cuda] --dataset COLLAB \ --graph_pooling_type max --neighbor_pooling_type sum ``` Results ------- Run with following with the double SUM pooling way: (tested dataset: "MUTAG"(default), "COLLAB", "IMDBBINARY", "IMDBMULTI") ```bash DGLBACKEND=mxnet python main.py --dataset MUTAG --device 0 \ --graph_pooling_type sum --neighbor_pooling_type sum ``` ================================================ FILE: examples/mxnet/gin/dataloader.py ================================================ """ MxNet compatible dataloader """ import math import dgl import numpy as np from mxnet import nd from mxnet.gluon.data import DataLoader, Sampler from sklearn.model_selection import StratifiedKFold class SubsetRandomSampler(Sampler): def __init__(self, indices): self.indices = indices def __iter__(self): return iter( [self.indices[i] for i in np.random.permutation(len(self.indices))] ) def __len__(self): return len(self.indices) # default collate function def collate(samples): # The input `samples` is a list of pairs (graph, label). graphs, labels = map(list, zip(*samples)) for g in graphs: # deal with node feats for key in g.node_attr_schemes().keys(): g.ndata[key] = nd.array(g.ndata[key]) # no edge feats batched_graph = dgl.batch(graphs) labels = [nd.reshape(label, (1,)) for label in labels] labels = nd.concat(*labels, dim=0) return batched_graph, labels class GraphDataLoader: def __init__( self, dataset, batch_size, collate_fn=collate, seed=0, shuffle=True, split_name="fold10", fold_idx=0, split_ratio=0.7, ): self.shuffle = shuffle self.seed = seed labels = [l for _, l in dataset] if split_name == "fold10": train_idx, valid_idx = self._split_fold10( labels, fold_idx, seed, shuffle ) elif split_name == "rand": train_idx, valid_idx = self._split_rand( labels, split_ratio, seed, shuffle ) else: raise NotImplementedError() train_sampler = SubsetRandomSampler(train_idx) valid_sampler = SubsetRandomSampler(valid_idx) self.train_loader = DataLoader( dataset, sampler=train_sampler, batch_size=batch_size, batchify_fn=collate_fn, ) self.valid_loader = DataLoader( dataset, sampler=valid_sampler, batch_size=batch_size, batchify_fn=collate_fn, ) def train_valid_loader(self): return self.train_loader, self.valid_loader def _split_fold10(self, labels, fold_idx=0, seed=0, shuffle=True): """10 flod""" assert 0 <= fold_idx and fold_idx < 10, print( "fold_idx must be from 0 to 9." ) skf = StratifiedKFold(n_splits=10, shuffle=shuffle, random_state=seed) idx_list = [] for idx in skf.split( np.zeros(len(labels)), [label.asnumpy() for label in labels] ): # split(x, y) idx_list.append(idx) train_idx, valid_idx = idx_list[fold_idx] print("train_set : test_set = %d : %d", len(train_idx), len(valid_idx)) return train_idx, valid_idx def _split_rand(self, labels, split_ratio=0.7, seed=0, shuffle=True): num_entries = len(labels) indices = list(range(num_entries)) np.random.seed(seed) np.random.shuffle(indices) split = int(math.floor(split_ratio * num_entries)) train_idx, valid_idx = indices[:split], indices[split:] print("train_set : test_set = %d : %d", len(train_idx), len(valid_idx)) return train_idx, valid_idx ================================================ FILE: examples/mxnet/gin/gin.py ================================================ """ How Powerful are Graph Neural Networks https://arxiv.org/abs/1810.00826 https://openreview.net/forum?id=ryGs6iA5Km Author's implementation: https://github.com/weihua916/powerful-gnns """ import mxnet as mx from dgl.nn.mxnet.conv import GINConv from dgl.nn.mxnet.glob import AvgPooling, MaxPooling, SumPooling from mxnet import gluon, nd from mxnet.gluon import nn class ApplyNodeFunc(nn.Block): """Update the node feature hv with MLP, BN and ReLU.""" def __init__(self, mlp): super(ApplyNodeFunc, self).__init__() with self.name_scope(): self.mlp = mlp self.bn = nn.BatchNorm(in_channels=self.mlp.output_dim) def forward(self, h): h = self.mlp(h) h = self.bn(h) h = nd.relu(h) return h class MLP(nn.Block): """MLP with linear output""" def __init__(self, num_layers, input_dim, hidden_dim, output_dim): """MLP layers construction Paramters --------- num_layers: int The number of linear layers input_dim: int The dimensionality of input features hidden_dim: int The dimensionality of hidden units at ALL layers output_dim: int The number of classes for prediction """ super(MLP, self).__init__() self.linear_or_not = True self.num_layers = num_layers self.output_dim = output_dim with self.name_scope(): if num_layers < 1: raise ValueError("number of layers should be positive!") elif num_layers == 1: # Linear model self.linear = nn.Dense(output_dim, in_units=input_dim) else: self.linear_or_not = False self.linears = nn.Sequential() self.batch_norms = nn.Sequential() self.linears.add(nn.Dense(hidden_dim, in_units=input_dim)) for layer in range(num_layers - 2): self.linears.add(nn.Dense(hidden_dim, in_units=hidden_dim)) self.linears.add(nn.Dense(output_dim, in_units=hidden_dim)) for layer in range(num_layers - 1): self.batch_norms.add(nn.BatchNorm(in_channels=hidden_dim)) def forward(self, x): if self.linear_or_not: return self.linear(x) else: h = x for i in range(self.num_layers - 1): h = nd.relu(self.batch_norms[i](self.linears[i](h))) return self.linears[-1](h) class GIN(nn.Block): """GIN model""" def __init__( self, num_layers, num_mlp_layers, input_dim, hidden_dim, output_dim, final_dropout, learn_eps, graph_pooling_type, neighbor_pooling_type, ): """model parameters setting Paramters --------- num_layers: int The number of linear layers in the neural network num_mlp_layers: int The number of linear layers in mlps input_dim: int The dimensionality of input features hidden_dim: int The dimensionality of hidden units at ALL layers output_dim: int The number of classes for prediction final_dropout: float dropout ratio on the final linear layer learn_eps: boolean If True, learn epsilon to distinguish center nodes from neighbors If False, aggregate neighbors and center nodes altogether. neighbor_pooling_type: str how to aggregate neighbors (sum, mean, or max) graph_pooling_type: str how to aggregate entire nodes in a graph (sum, mean or max) """ super(GIN, self).__init__() self.num_layers = num_layers self.learn_eps = learn_eps with self.name_scope(): # List of MLPs self.ginlayers = nn.Sequential() self.batch_norms = nn.Sequential() for i in range(self.num_layers - 1): if i == 0: mlp = MLP(num_mlp_layers, input_dim, hidden_dim, hidden_dim) else: mlp = MLP( num_mlp_layers, hidden_dim, hidden_dim, hidden_dim ) self.ginlayers.add( GINConv( ApplyNodeFunc(mlp), neighbor_pooling_type, 0, self.learn_eps, ) ) self.batch_norms.add(nn.BatchNorm(in_channels=hidden_dim)) self.linears_prediction = nn.Sequential() for i in range(num_layers): if i == 0: self.linears_prediction.add( nn.Dense(output_dim, in_units=input_dim) ) else: self.linears_prediction.add( nn.Dense(output_dim, in_units=hidden_dim) ) self.drop = nn.Dropout(final_dropout) if graph_pooling_type == "sum": self.pool = SumPooling() elif graph_pooling_type == "mean": self.pool = AvgPooling() elif graph_pooling_type == "max": self.pool = MaxPooling() else: raise NotImplementedError def forward(self, g, h): hidden_rep = [h] for i in range(self.num_layers - 1): h = self.ginlayers[i](g, h) h = self.batch_norms[i](h) h = nd.relu(h) hidden_rep.append(h) score_over_layer = 0 # perform pooling over all nodes in each graph in every layer for i, h in enumerate(hidden_rep): pooled_h = self.pool(g, h) score_over_layer = score_over_layer + self.drop( self.linears_prediction[i](pooled_h) ) return score_over_layer ================================================ FILE: examples/mxnet/gin/main.py ================================================ import sys from parser import Parser import mxnet as mx import numpy as np from dataloader import collate, GraphDataLoader from dgl.data.gindt import GINDataset from gin import GIN from mxnet import gluon, nd from mxnet.gluon import nn from tqdm import tqdm def train(args, net, trainloader, trainer, criterion, epoch): running_loss = 0 total_iters = len(trainloader) # setup the offset to avoid the overlap with mouse cursor bar = tqdm(range(total_iters), unit="batch", position=2, file=sys.stdout) for pos, (graphs, labels) in zip(bar, trainloader): # batch graphs will be shipped to device in forward part of model labels = labels.as_in_context(args.device) feat = graphs.ndata["attr"].as_in_context(args.device) with mx.autograd.record(): graphs = graphs.to(args.device) outputs = net(graphs, feat) loss = criterion(outputs, labels) loss = loss.sum() / len(labels) running_loss += loss.asscalar() # backprop loss.backward() trainer.step(batch_size=1) # report bar.set_description("epoch-{}".format(epoch)) bar.close() # the final batch will be aligned running_loss = running_loss / total_iters return running_loss def eval_net(args, net, dataloader, criterion): total = 0 total_loss = 0 total_correct = 0 for data in dataloader: graphs, labels = data labels = labels.as_in_context(args.device) feat = graphs.ndata["attr"].as_in_context(args.device) total += len(labels) graphs = graphs.to(args.device) outputs = net(graphs, feat) predicted = nd.argmax(outputs, axis=1) predicted = predicted.astype("int64") total_correct += (predicted == labels).sum().asscalar() loss = criterion(outputs, labels) # crossentropy(reduce=True) for default total_loss += loss.sum().asscalar() loss, acc = 1.0 * total_loss / total, 1.0 * total_correct / total return loss, acc def main(args): # set up seeds, args.seed supported mx.random.seed(0) np.random.seed(seed=0) if args.device >= 0: args.device = mx.gpu(args.device) else: args.device = mx.cpu() dataset = GINDataset(args.dataset, not args.learn_eps) trainloader, validloader = GraphDataLoader( dataset, batch_size=args.batch_size, collate_fn=collate, seed=args.seed, shuffle=True, split_name="fold10", fold_idx=args.fold_idx, ).train_valid_loader() # or split_name='rand', split_ratio=0.7 model = GIN( args.num_layers, args.num_mlp_layers, dataset.dim_nfeats, args.hidden_dim, dataset.gclasses, args.final_dropout, args.learn_eps, args.graph_pooling_type, args.neighbor_pooling_type, ) model.initialize(ctx=args.device) criterion = gluon.loss.SoftmaxCELoss() print(model.collect_params()) lr_scheduler = mx.lr_scheduler.FactorScheduler(50, 0.5) trainer = gluon.Trainer( model.collect_params(), "adam", {"lr_scheduler": lr_scheduler} ) # it's not cost-effective to hanle the cursor and init 0 # https://stackoverflow.com/a/23121189 tbar = tqdm( range(args.epochs), unit="epoch", position=3, ncols=0, file=sys.stdout ) vbar = tqdm( range(args.epochs), unit="epoch", position=4, ncols=0, file=sys.stdout ) lrbar = tqdm( range(args.epochs), unit="epoch", position=5, ncols=0, file=sys.stdout ) for epoch, _, _ in zip(tbar, vbar, lrbar): train(args, model, trainloader, trainer, criterion, epoch) train_loss, train_acc = eval_net(args, model, trainloader, criterion) tbar.set_description( "train set - average loss: {:.4f}, accuracy: {:.0f}%".format( train_loss, 100.0 * train_acc ) ) valid_loss, valid_acc = eval_net(args, model, validloader, criterion) vbar.set_description( "valid set - average loss: {:.4f}, accuracy: {:.0f}%".format( valid_loss, 100.0 * valid_acc ) ) if not args.filename == "": with open(args.filename, "a") as f: f.write( "%s %s %s %s" % ( args.dataset, args.learn_eps, args.neighbor_pooling_type, args.graph_pooling_type, ) ) f.write("\n") f.write( "%f %f %f %f" % (train_loss, train_acc, valid_loss, valid_acc) ) f.write("\n") lrbar.set_description( "Learning eps with learn_eps={}: {}".format( args.learn_eps, [ layer.eps.data(args.device).asscalar() for layer in model.ginlayers ], ) ) tbar.close() vbar.close() lrbar.close() if __name__ == "__main__": args = Parser(description="GIN").args print("show all arguments configuration...") print(args) main(args) ================================================ FILE: examples/mxnet/gin/parser.py ================================================ """Parser for arguments Put all arguments in one file and group similar arguments """ import argparse class Parser: def __init__(self, description): """ arguments parser """ self.parser = argparse.ArgumentParser(description=description) self.args = None self._parse() def _parse(self): # dataset self.parser.add_argument( "--dataset", type=str, default="MUTAG", help="name of dataset (default: MUTAG)", ) self.parser.add_argument( "--batch_size", type=int, default=32, help="batch size for training and validation (default: 32)", ) self.parser.add_argument( "--fold_idx", type=int, default=0, help="the index(<10) of fold in 10-fold validation.", ) self.parser.add_argument( "--filename", type=str, default="", help="output file" ) # device self.parser.add_argument( "--disable-cuda", action="store_true", help="Disable CUDA" ) self.parser.add_argument( "--device", type=int, default=0, help="which gpu device to use (default: 0)", ) # net self.parser.add_argument( "--net", type=str, default="gin", help="gnn net (default: gin)" ) self.parser.add_argument( "--num_layers", type=int, default=5, help="number of layers (default: 5)", ) self.parser.add_argument( "--num_mlp_layers", type=int, default=2, help="number of MLP layers(default: 2). 1 means linear model.", ) self.parser.add_argument( "--hidden_dim", type=int, default=64, help="number of hidden units (default: 64)", ) # graph self.parser.add_argument( "--graph_pooling_type", type=str, default="sum", choices=["sum", "mean", "max"], help="type of graph pooling: sum, mean or max", ) self.parser.add_argument( "--neighbor_pooling_type", type=str, default="sum", choices=["sum", "mean", "max"], help="type of neighboring pooling: sum, mean or max", ) self.parser.add_argument( "--learn_eps", action="store_true", help="learn the epsilon weighting", ) self.parser.add_argument( "--degree_as_tag", action="store_true", help="take the degree of nodes as input feature", ) # learning self.parser.add_argument( "--seed", type=int, default=0, help="random seed (default: 0)" ) self.parser.add_argument( "--epochs", type=int, default=350, help="number of epochs to train (default: 350)", ) self.parser.add_argument( "--lr", type=float, default=0.01, help="learning rate (default: 0.01)", ) self.parser.add_argument( "--final_dropout", type=float, default=0.5, help="final layer dropout (default: 0.5)", ) # done self.args = self.parser.parse_args() ================================================ FILE: examples/mxnet/graphsage/README.md ================================================ Inductive Representation Learning on Large Graphs (GraphSAGE) ============ - Paper link: [http://papers.nips.cc/paper/6703-inductive-representation-learning-on-large-graphs.pdf](http://papers.nips.cc/paper/6703-inductive-representation-learning-on-large-graphs.pdf) - Author's code repo: [https://github.com/williamleif/graphsage-simple](https://github.com/williamleif/graphsage-simple). Note that the original code is simple reference implementation of GraphSAGE. Requirements ------------ - requests ``bash pip install requests `` Results ------- Run with following (available dataset: "cora", "citeseer", "pubmed") ```bash python3 main.py --dataset cora --gpu 0 ``` * cora: ~0.817 * citeseer: ~0.699 * pubmed: ~0.790 ================================================ FILE: examples/mxnet/graphsage/main.py ================================================ """ Inductive Representation Learning on Large Graphs Paper: http://papers.nips.cc/paper/6703-inductive-representation-learning-on-large-graphs.pdf Code: https://github.com/williamleif/graphsage-simple Simple reference implementation of GraphSAGE. """ import argparse import time import dgl import mxnet as mx import networkx as nx import numpy as np from dgl.data import ( CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset, register_data_args, ) from dgl.nn.mxnet.conv import SAGEConv from mxnet import gluon, nd from mxnet.gluon import nn class GraphSAGE(nn.Block): def __init__( self, g, in_feats, n_hidden, n_classes, n_layers, activation, dropout, aggregator_type, ): super(GraphSAGE, self).__init__() self.g = g with self.name_scope(): self.layers = nn.Sequential() # input layer self.layers.add( SAGEConv( in_feats, n_hidden, aggregator_type, feat_drop=dropout, activation=activation, ) ) # hidden layers for i in range(n_layers - 1): self.layers.add( SAGEConv( n_hidden, n_hidden, aggregator_type, feat_drop=dropout, activation=activation, ) ) # output layer self.layers.add( SAGEConv( n_hidden, n_classes, aggregator_type, feat_drop=dropout, activation=None, ) ) # activation None def forward(self, features): h = features for layer in self.layers: h = layer(self.g, h) return h def evaluate(model, features, labels, mask): pred = model(features).argmax(axis=1) accuracy = ((pred == labels) * mask).sum() / mask.sum().asscalar() return accuracy.asscalar() def main(args): # load and preprocess dataset if args.dataset == "cora": data = CoraGraphDataset() elif args.dataset == "citeseer": data = CiteseerGraphDataset() elif args.dataset == "pubmed": data = PubmedGraphDataset() else: raise ValueError("Unknown dataset: {}".format(args.dataset)) g = data[0] if args.gpu < 0: cuda = False ctx = mx.cpu(0) else: cuda = True ctx = mx.gpu(args.gpu) g = g.int().to(ctx) features = g.ndata["feat"] labels = mx.nd.array(g.ndata["label"], dtype="float32", ctx=ctx) train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] in_feats = features.shape[1] n_classes = data.num_classes n_edges = data.graph.number_of_edges() print( """----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % ( n_edges, n_classes, train_mask.sum().asscalar(), val_mask.sum().asscalar(), test_mask.sum().asscalar(), ) ) # add self loop g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) n_edges = g.number_of_edges() # create GraphSAGE model model = GraphSAGE( g, in_feats, args.n_hidden, n_classes, args.n_layers, nd.relu, args.dropout, args.aggregator_type, ) model.initialize(ctx=ctx) n_train_samples = train_mask.sum().asscalar() loss_fcn = gluon.loss.SoftmaxCELoss() print(model.collect_params()) trainer = gluon.Trainer( model.collect_params(), "adam", {"learning_rate": args.lr, "wd": args.weight_decay}, ) # initialize graph dur = [] for epoch in range(args.n_epochs): if epoch >= 3: t0 = time.time() # forward with mx.autograd.record(): pred = model(features) loss = loss_fcn(pred, labels, mx.nd.expand_dims(train_mask, 1)) loss = loss.sum() / n_train_samples loss.backward() trainer.step(batch_size=1) if epoch >= 3: loss.asscalar() dur.append(time.time() - t0) acc = evaluate(model, features, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format( epoch, np.mean(dur), loss.asscalar(), acc, n_edges / np.mean(dur) / 1000, ) ) # test set accuracy acc = evaluate(model, features, labels, test_mask) print("Test accuracy {:.2%}".format(acc)) if __name__ == "__main__": parser = argparse.ArgumentParser(description="GraphSAGE") register_data_args(parser) parser.add_argument( "--dropout", type=float, default=0.5, help="dropout probability" ) parser.add_argument("--gpu", type=int, default=-1, help="gpu") parser.add_argument("--lr", type=float, default=1e-2, help="learning rate") parser.add_argument( "--n-epochs", type=int, default=200, help="number of training epochs" ) parser.add_argument( "--n-hidden", type=int, default=16, help="number of hidden gcn units" ) parser.add_argument( "--n-layers", type=int, default=1, help="number of hidden gcn layers" ) parser.add_argument( "--weight-decay", type=float, default=5e-4, help="Weight for L2 loss" ) parser.add_argument( "--aggregator-type", type=str, default="gcn", help="Aggregator type: mean/gcn/pool/lstm", ) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/mxnet/monet/README.md ================================================ MoNet ===== - paper link: [Geometric deep learning on graphs and manifolds using mixture model CNNs](https://arxiv.org/pdf/1611.08402.pdf) Dependencies ============ - MXNet 1.5+ Results ======= ## Citation networks Run with following (available dataset: "cora", "citeseer", "pubmed") ```bash python3 citation.py --dataset cora --gpu 0 ``` - Cora: ~0.814 - Pubmed: ~0.748 ================================================ FILE: examples/mxnet/monet/citation.py ================================================ import argparse import time import dgl import mxnet as mx import networkx as nx import numpy as np from dgl.data import ( CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset, register_data_args, ) from dgl.nn.mxnet.conv import GMMConv from mxnet import gluon, nd from mxnet.gluon import nn class MoNet(nn.Block): def __init__( self, g, in_feats, n_hidden, out_feats, n_layers, dim, n_kernels, dropout, ): super(MoNet, self).__init__() self.g = g with self.name_scope(): self.layers = nn.Sequential() self.pseudo_proj = nn.Sequential() # Input layer self.layers.add(GMMConv(in_feats, n_hidden, dim, n_kernels)) self.pseudo_proj.add(nn.Dense(dim, in_units=2, activation="tanh")) # Hidden layer for _ in range(n_layers - 1): self.layers.add(GMMConv(n_hidden, n_hidden, dim, n_kernels)) self.pseudo_proj.add( nn.Dense(dim, in_units=2, activation="tanh") ) # Output layer self.layers.add(GMMConv(n_hidden, out_feats, dim, n_kernels)) self.pseudo_proj.add(nn.Dense(dim, in_units=2, activation="tanh")) self.dropout = nn.Dropout(dropout) def forward(self, feat, pseudo): h = feat for i in range(len(self.layers)): if i > 0: h = self.dropout(h) h = self.layers[i](self.g, h, self.pseudo_proj[i](pseudo)) return h def evaluate(model, features, pseudo, labels, mask): pred = model(features, pseudo).argmax(axis=1) accuracy = ((pred == labels) * mask).sum() / mask.sum().asscalar() return accuracy.asscalar() def main(args): # load and preprocess dataset if args.dataset == "cora": data = CoraGraphDataset() elif args.dataset == "citeseer": data = CiteseerGraphDataset() elif args.dataset == "pubmed": data = PubmedGraphDataset() else: raise ValueError("Unknown dataset: {}".format(args.dataset)) g = data[0] if args.gpu < 0: cuda = False ctx = mx.cpu(0) else: cuda = True ctx = mx.gpu(args.gpu) g = g.to(ctx) features = g.ndata["feat"] labels = mx.nd.array(g.ndata["label"], dtype="float32", ctx=ctx) train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] in_feats = features.shape[1] n_classes = data.num_classes n_edges = data.graph.number_of_edges() print( """----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % ( n_edges, n_classes, train_mask.sum().asscalar(), val_mask.sum().asscalar(), test_mask.sum().asscalar(), ) ) # add self loop g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) n_edges = g.number_of_edges() us, vs = g.edges() us = us.asnumpy() vs = vs.asnumpy() pseudo = [] for i in range(g.number_of_edges()): pseudo.append( [1 / np.sqrt(g.in_degrees(us[i])), 1 / np.sqrt(g.in_degrees(vs[i]))] ) pseudo = nd.array(pseudo, ctx=ctx) # create GraphSAGE model model = MoNet( g, in_feats, args.n_hidden, n_classes, args.n_layers, args.pseudo_dim, args.n_kernels, args.dropout, ) model.initialize(ctx=ctx) n_train_samples = train_mask.sum().asscalar() loss_fcn = gluon.loss.SoftmaxCELoss() print(model.collect_params()) trainer = gluon.Trainer( model.collect_params(), "adam", {"learning_rate": args.lr, "wd": args.weight_decay}, ) # initialize graph dur = [] for epoch in range(args.n_epochs): if epoch >= 3: t0 = time.time() # forward with mx.autograd.record(): pred = model(features, pseudo) loss = loss_fcn(pred, labels, mx.nd.expand_dims(train_mask, 1)) loss = loss.sum() / n_train_samples loss.backward() trainer.step(batch_size=1) if epoch >= 3: loss.asscalar() dur.append(time.time() - t0) acc = evaluate(model, features, pseudo, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format( epoch, np.mean(dur), loss.asscalar(), acc, n_edges / np.mean(dur) / 1000, ) ) # test set accuracy acc = evaluate(model, features, pseudo, labels, test_mask) print("Test accuracy {:.2%}".format(acc)) if __name__ == "__main__": parser = argparse.ArgumentParser(description="MoNet on citation network") register_data_args(parser) parser.add_argument( "--dropout", type=float, default=0.5, help="dropout probability" ) parser.add_argument("--gpu", type=int, default=-1, help="gpu") parser.add_argument("--lr", type=float, default=1e-2, help="learning rate") parser.add_argument( "--n-epochs", type=int, default=200, help="number of training epochs" ) parser.add_argument( "--n-hidden", type=int, default=16, help="number of hidden gcn units" ) parser.add_argument( "--n-layers", type=int, default=1, help="number of hidden gcn layers" ) parser.add_argument( "--pseudo-dim", type=int, default=2, help="Pseudo coordinate dimensions in GMMConv, 2 for cora and 3 for pubmed", ) parser.add_argument( "--n-kernels", type=int, default=3, help="Number of kernels in GMMConv layer", ) parser.add_argument( "--weight-decay", type=float, default=5e-5, help="Weight for L2 loss" ) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/mxnet/rgcn/README.md ================================================ # Relational-GCN * Paper: [https://arxiv.org/abs/1703.06103](https://arxiv.org/abs/1703.06103) * Author's code for entity classification: [https://github.com/tkipf/relational-gcn](https://github.com/tkipf/relational-gcn) * Author's code for link prediction: [https://github.com/MichSchli/RelationPrediction](https://github.com/MichSchli/RelationPrediction) ### Dependencies Two extra python packages are needed for this example: - MXNet nightly build - requests - rdflib - pandas ```bash pip install mxnet --pre pip install requests rdflib pandas ``` Example code was tested with rdflib 4.2.2 and pandas 0.23.4 ### Entity Classification AIFB: accuracy 97.22% (5 runs, DGL), 95.83% (paper) ``` DGLBACKEND=mxnet python3 entity_classify.py -d aifb --testing --gpu 0 ``` MUTAG: accuracy 70.59% (5 runs, DGL), 73.23% (paper) ``` DGLBACKEND=mxnet python3 entity_classify.py -d mutag --l2norm 5e-4 --n-bases 40 --testing --gpu 0 ``` BGS: accuracy 86.21% (5 runs, DGL, n-basese=20), 83.10% (paper) ``` DGLBACKEND=mxnet python3 entity_classify.py -d bgs --l2norm 5e-4 --n-bases 20 --testing --gpu 0 ``` ================================================ FILE: examples/mxnet/rgcn/entity_classify.py ================================================ """ Modeling Relational Data with Graph Convolutional Networks Paper: https://arxiv.org/abs/1703.06103 Code: https://github.com/tkipf/relational-gcn Difference compared to tkipf/relation-gcn * l2norm applied to all weights * remove nodes that won't be touched """ import argparse import time from functools import partial import dgl import mxnet as mx import mxnet.ndarray as F import numpy as np from dgl.data.rdf import AIFBDataset, AMDataset, BGSDataset, MUTAGDataset from dgl.nn.mxnet import RelGraphConv from model import BaseRGCN from mxnet import gluon class EntityClassify(BaseRGCN): def build_input_layer(self): return RelGraphConv( self.num_nodes, self.h_dim, self.num_rels, "basis", self.num_bases, activation=F.relu, self_loop=self.use_self_loop, dropout=self.dropout, ) def build_hidden_layer(self, idx): return RelGraphConv( self.h_dim, self.h_dim, self.num_rels, "basis", self.num_bases, activation=F.relu, self_loop=self.use_self_loop, dropout=self.dropout, ) def build_output_layer(self): return RelGraphConv( self.h_dim, self.out_dim, self.num_rels, "basis", self.num_bases, activation=None, self_loop=self.use_self_loop, ) def main(args): # load graph data if args.dataset == "aifb": dataset = AIFBDataset() elif args.dataset == "mutag": dataset = MUTAGDataset() elif args.dataset == "bgs": dataset = BGSDataset() elif args.dataset == "am": dataset = AMDataset() else: raise ValueError() # Load from hetero-graph hg = dataset[0] num_rels = len(hg.canonical_etypes) category = dataset.predict_category num_classes = dataset.num_classes train_mask = hg.nodes[category].data.pop("train_mask") test_mask = hg.nodes[category].data.pop("test_mask") train_idx = mx.nd.array(np.nonzero(train_mask.asnumpy())[0], dtype="int64") test_idx = mx.nd.array(np.nonzero(test_mask.asnumpy())[0], dtype="int64") labels = mx.nd.array(hg.nodes[category].data.pop("labels"), dtype="int64") # split dataset into train, validate, test if args.validation: val_idx = train_idx[: len(train_idx) // 5] train_idx = train_idx[len(train_idx) // 5 :] else: val_idx = train_idx # calculate norm for each edge type and store in edge for canonical_etype in hg.canonical_etypes: u, v, eid = hg.all_edges(form="all", etype=canonical_etype) v = v.asnumpy() _, inverse_index, count = np.unique( v, return_inverse=True, return_counts=True ) degrees = count[inverse_index] norm = np.ones(eid.shape[0]) / degrees hg.edges[canonical_etype].data["norm"] = mx.nd.expand_dims( mx.nd.array(norm), axis=1 ) # get target category id category_id = len(hg.ntypes) for i, ntype in enumerate(hg.ntypes): if ntype == category: category_id = i g = dgl.to_homogeneous(hg, edata=["norm"]) num_nodes = g.number_of_nodes() node_ids = mx.nd.arange(num_nodes) edge_norm = g.edata["norm"] edge_type = g.edata[dgl.ETYPE] # find out the target node ids in g node_tids = g.ndata[dgl.NTYPE] loc = node_tids == category_id loc = mx.nd.array(np.nonzero(loc.asnumpy())[0], dtype="int64") target_idx = node_ids[loc] # since the nodes are featureless, the input feature is then the node id. feats = mx.nd.arange(num_nodes, dtype="int32") # check cuda use_cuda = args.gpu >= 0 if use_cuda: ctx = mx.gpu(args.gpu) feats = feats.as_in_context(ctx) edge_type = edge_type.as_in_context(ctx) edge_norm = edge_norm.as_in_context(ctx) labels = labels.as_in_context(ctx) train_idx = train_idx.as_in_context(ctx) g = g.to(ctx) else: ctx = mx.cpu(0) # create model model = EntityClassify( num_nodes, args.n_hidden, num_classes, num_rels, num_bases=args.n_bases, num_hidden_layers=args.n_layers - 2, dropout=args.dropout, use_self_loop=args.use_self_loop, gpu_id=args.gpu, ) model.initialize(ctx=ctx) # optimizer trainer = gluon.Trainer( model.collect_params(), "adam", {"learning_rate": args.lr, "wd": args.l2norm}, ) loss_fcn = gluon.loss.SoftmaxCELoss(from_logits=False) # training loop print("start training...") forward_time = [] backward_time = [] for epoch in range(args.n_epochs): t0 = time.time() with mx.autograd.record(): pred = model(g, feats, edge_type, edge_norm) pred = pred[target_idx] loss = loss_fcn(pred[train_idx], labels[train_idx]) t1 = time.time() loss.backward() trainer.step(len(train_idx)) t2 = time.time() forward_time.append(t1 - t0) backward_time.append(t2 - t1) print( "Epoch {:05d} | Train Forward Time(s) {:.4f} | Backward Time(s) {:.4f}".format( epoch, forward_time[-1], backward_time[-1] ) ) train_acc = ( F.sum( mx.nd.cast(pred[train_idx].argmax(axis=1), "int64") == labels[train_idx] ).asscalar() / train_idx.shape[0] ) val_acc = F.sum( mx.nd.cast(pred[val_idx].argmax(axis=1), "int64") == labels[val_idx] ).asscalar() / len(val_idx) print( "Train Accuracy: {:.4f} | Validation Accuracy: {:.4f}".format( train_acc, val_acc ) ) print() logits = model.forward(g, feats, edge_type, edge_norm) logits = logits[target_idx] test_acc = F.sum( mx.nd.cast(logits[test_idx].argmax(axis=1), "int64") == labels[test_idx] ).asscalar() / len(test_idx) print("Test Accuracy: {:.4f}".format(test_acc)) print() print( "Mean forward time: {:4f}".format( np.mean(forward_time[len(forward_time) // 4 :]) ) ) print( "Mean backward time: {:4f}".format( np.mean(backward_time[len(backward_time) // 4 :]) ) ) if __name__ == "__main__": parser = argparse.ArgumentParser(description="RGCN") parser.add_argument( "--dropout", type=float, default=0, help="dropout probability" ) parser.add_argument( "--n-hidden", type=int, default=16, help="number of hidden units" ) parser.add_argument("--gpu", type=int, default=-1, help="gpu") parser.add_argument("--lr", type=float, default=1e-2, help="learning rate") parser.add_argument( "--n-bases", type=int, default=-1, help="number of filter weight matrices, default: -1 [use all]", ) parser.add_argument( "--n-layers", type=int, default=2, help="number of propagation rounds" ) parser.add_argument( "-e", "--n-epochs", type=int, default=50, help="number of training epochs", ) parser.add_argument( "-d", "--dataset", type=str, required=True, help="dataset to use" ) parser.add_argument("--l2norm", type=float, default=0, help="l2 norm coef") parser.add_argument( "--use-self-loop", default=False, action="store_true", help="include self feature as a special relation", ) fp = parser.add_mutually_exclusive_group(required=False) fp.add_argument("--validation", dest="validation", action="store_true") fp.add_argument("--testing", dest="validation", action="store_false") parser.set_defaults(validation=True) args = parser.parse_args() print(args) args.bfs_level = args.n_layers + 1 # pruning used nodes for memory main(args) ================================================ FILE: examples/mxnet/rgcn/model.py ================================================ import mxnet as mx from mxnet import gluon class BaseRGCN(gluon.Block): def __init__( self, num_nodes, h_dim, out_dim, num_rels, num_bases=-1, num_hidden_layers=1, dropout=0, use_self_loop=False, gpu_id=-1, ): super(BaseRGCN, self).__init__() self.num_nodes = num_nodes self.h_dim = h_dim self.out_dim = out_dim self.num_rels = num_rels self.num_bases = num_bases self.num_hidden_layers = num_hidden_layers self.dropout = dropout self.use_self_loop = use_self_loop self.gpu_id = gpu_id # create rgcn layers self.build_model() def build_model(self): self.layers = gluon.nn.Sequential() # i2h i2h = self.build_input_layer() if i2h is not None: self.layers.add(i2h) # h2h for idx in range(self.num_hidden_layers): h2h = self.build_hidden_layer(idx) self.layers.add(h2h) # h2o h2o = self.build_output_layer() if h2o is not None: self.layers.add(h2o) def build_input_layer(self): return None def build_hidden_layer(self): raise NotImplementedError def build_output_layer(self): return None def forward(self, g, h, r, norm): for layer in self.layers: h = layer(g, h, r, norm) return h ================================================ FILE: examples/mxnet/scenegraph/README.md ================================================ # Scene Graph Extraction Scene graph extraction aims at not only detect objects in the given image, but also classify the relationships between pairs of them. This example reproduces [Graphical Contrastive Losses for Scene Graph Parsing](https://arxiv.org/abs/1903.02728), author's code can be found [here](https://github.com/NVIDIA/ContrastiveLosses4VRD). ![DEMO](https://raw.githubusercontent.com/dmlc/web-data/master/dgl/examples/mxnet/scenegraph/old-couple-pred.png) ## Results **VisualGenome** | Model | Backbone | mAP@50 | SGDET@20 | SGDET@50 | SGDET@100 | PHRCLS@20 | PHRCLS@50 |PHRCLS@100 | PREDCLS@20 | PREDCLS@50 | PREDCLS@100 | | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | | RelDN, L0 | ResNet101 | 29.5 | 22.65 | 30.02 | 35.04 | 32.84 | 35.60 | 36.26 | 60.58 | 65.53 | 66.51 | ## Preparation This implementation is based on GluonCV. Install GluonCV with ``` pip install gluoncv --upgrade ``` The implementation contains the following files: ``` . |-- data | |-- dataloader.py | |-- __init__.py | |-- object.py | |-- prepare_visualgenome.py | `-- relation.py |-- demo_reldn.py |-- model | |-- faster_rcnn.py | |-- __init__.py | `-- reldn.py |-- README.md |-- train_faster_rcnn.py |-- train_faster_rcnn.sh |-- train_freq_prior.py |-- train_reldn.py |-- train_reldn.sh |-- utils | |-- build_graph.py | |-- __init__.py | |-- metric.py | |-- sampling.py | `-- viz.py |-- validate_reldn.py `-- validate_reldn.sh ``` - The folder `data` contains the data preparation script, and definition of datasets for object detection and scene graph extraction. - The folder `model` contains model definition. - The folder `utils` contains helper functions for training, validation, and visualization. - The script `train_faster_rcnn.py` trains a Faster R-CNN model on VisualGenome dataset, and `train_faster_rcnn.sh` includes preset parameters. - The script `train_freq_prior.py` trains the frequency counts for RelDN model training. - The script `train_reldn.py` trains a RelDN model, and `train_reldn.sh` includes preset parameters. - The script `validate_reldn.py` validate the trained Faster R-CNN and RelDN models, and `validate_reldn.sh` includes preset parameters. - The script `demo_reldh.py` makes use of trained parameters and extract an scene graph from an arbitrary input image. Below are further steps on training your own models. Besides, we also provide pretrained model files for validation and demo: 1. [Faster R-CNN Model for Object Detection](http://dgl-data/models/SceneGraph/faster_rcnn_resnet101_v1d_visualgenome.params) 2. [RelDN Model](http://dgl-data/models/SceneGraph/reldn.params) 3. [Faster R-CNN Model for Edge Feature](http://dgl-data/models/SceneGraph/detector_feature.params) ## Data preparation We provide scripts to download and prepare the VisualGenome dataset. One can run with ``` python data/prepare_visualgenome.py ``` ## Object Detector First one need to train the object detection model on VisualGenome. ``` bash train_faster_rcnn.sh ``` It runs for about 20 hours on a machine with 64 CPU cores and 8 V100 GPUs. ## Training RelDN With a trained Faster R-CNN model, one can start the training of RelDN model by ``` bash train_reldn.sh ``` It runs for about 2 days with one single GPU and 8 CPU cores. ## Validate RelDN After the training, one can evaluate the results with multiple commonly-used metrics: ``` bash validate_reldn.sh ``` ## Demo We provide a demo script of running the model with real-world pictures. Be aware that you need trained model to generate meaningful results from the demo, otherwise the script will download the pre-trained model automatically. ================================================ FILE: examples/mxnet/scenegraph/data/__init__.py ================================================ from .dataloader import * from .object import * from .relation import * ================================================ FILE: examples/mxnet/scenegraph/data/dataloader.py ================================================ """DataLoader utils.""" import dgl from gluoncv.data.batchify import Pad from mxnet import nd def dgl_mp_batchify_fn(data): if isinstance(data[0], tuple): data = zip(*data) return [dgl_mp_batchify_fn(i) for i in data] for dt in data: if dt is not None: if isinstance(dt, dgl.DGLGraph): return [d for d in data if isinstance(d, dgl.DGLGraph)] elif isinstance(dt, nd.NDArray): pad = Pad(axis=(1, 2), num_shards=1, ret_length=False) data_list = [dt for dt in data if dt is not None] return pad(data_list) ================================================ FILE: examples/mxnet/scenegraph/data/object.py ================================================ """Pascal VOC object detection dataset.""" from __future__ import absolute_import, division import json import logging import os import pickle import warnings from collections import Counter import mxnet as mx import numpy as np from gluoncv.data import COCODetection class VGObject(COCODetection): CLASSES = [ "airplane", "animal", "arm", "bag", "banana", "basket", "beach", "bear", "bed", "bench", "bike", "bird", "board", "boat", "book", "boot", "bottle", "bowl", "box", "boy", "branch", "building", "bus", "cabinet", "cap", "car", "cat", "chair", "child", "clock", "coat", "counter", "cow", "cup", "curtain", "desk", "dog", "door", "drawer", "ear", "elephant", "engine", "eye", "face", "fence", "finger", "flag", "flower", "food", "fork", "fruit", "giraffe", "girl", "glass", "glove", "guy", "hair", "hand", "handle", "hat", "head", "helmet", "hill", "horse", "house", "jacket", "jean", "kid", "kite", "lady", "lamp", "laptop", "leaf", "leg", "letter", "light", "logo", "man", "men", "motorcycle", "mountain", "mouth", "neck", "nose", "number", "orange", "pant", "paper", "paw", "people", "person", "phone", "pillow", "pizza", "plane", "plant", "plate", "player", "pole", "post", "pot", "racket", "railing", "rock", "roof", "room", "screen", "seat", "sheep", "shelf", "shirt", "shoe", "short", "sidewalk", "sign", "sink", "skateboard", "ski", "skier", "sneaker", "snow", "sock", "stand", "street", "surfboard", "table", "tail", "tie", "tile", "tire", "toilet", "towel", "tower", "track", "train", "tree", "truck", "trunk", "umbrella", "vase", "vegetable", "vehicle", "wave", "wheel", "window", "windshield", "wing", "wire", "woman", "zebra", ] def __init__(self, **kwargs): super(VGObject, self).__init__(**kwargs) @property def annotation_dir(self): return "" def _parse_image_path(self, entry): dirname = "VG_100K" filename = entry["file_name"] abs_path = os.path.join(self._root, dirname, filename) return abs_path ================================================ FILE: examples/mxnet/scenegraph/data/prepare_visualgenome.py ================================================ """Prepare Visual Genome datasets""" import argparse import json import os import pickle import random import shutil import zipfile import tqdm from gluoncv.utils import download, makedirs _TARGET_DIR = os.path.expanduser("~/.mxnet/datasets/visualgenome") def parse_args(): parser = argparse.ArgumentParser( description="Initialize Visual Genome dataset.", epilog="Example: python visualgenome.py --download-dir ~/visualgenome", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( "--download-dir", type=str, default="~/visualgenome/", help="dataset directory on disk", ) parser.add_argument( "--no-download", action="store_true", help="disable automatic download if set", ) parser.add_argument( "--overwrite", action="store_true", help="overwrite downloaded files if set, in case they are corrupted", ) args = parser.parse_args() return args def download_vg(path, overwrite=False): _DOWNLOAD_URLS = [ ( "https://cs.stanford.edu/people/rak248/VG_100K_2/images.zip", "a055367f675dd5476220e9b93e4ca9957b024b94", ), ( "https://cs.stanford.edu/people/rak248/VG_100K_2/images2.zip", "2add3aab77623549e92b7f15cda0308f50b64ecf", ), ] makedirs(path) for url, checksum in _DOWNLOAD_URLS: filename = download( url, path=path, overwrite=overwrite, sha1_hash=checksum ) # extract if filename.endswith("zip"): with zipfile.ZipFile(filename) as zf: zf.extractall(path=path) # move all images into folder `VG_100K` vg_100k_path = os.path.join(path, "VG_100K") vg_100k_2_path = os.path.join(path, "VG_100K_2") files_2 = os.listdir(vg_100k_2_path) for fl in files_2: shutil.move( os.path.join(vg_100k_2_path, fl), os.path.join(vg_100k_path, fl) ) def download_json(path, overwrite=False): url = "https://data.dgl.ai/dataset/vg.zip" output = "vg.zip" download(url, path=path) with zipfile.ZipFile(output) as zf: zf.extractall(path=path) json_path = os.path.join(path, "vg") json_files = os.listdir(json_path) for fl in json_files: shutil.move(os.path.join(json_path, fl), os.path.join(path, fl)) os.rmdir(json_path) if __name__ == "__main__": args = parse_args() path = os.path.expanduser(args.download_dir) if not os.path.isdir(path): if args.no_download: raise ValueError( ( "{} is not a valid directory, make sure it is present." ' Or you should not disable "--no-download" to grab it'.format( path ) ) ) else: download_vg(path, overwrite=args.overwrite) download_json(path, overwrite=args.overwrite) # make symlink makedirs(os.path.expanduser("~/.mxnet/datasets")) if os.path.isdir(_TARGET_DIR): os.rmdir(_TARGET_DIR) os.symlink(path, _TARGET_DIR) ================================================ FILE: examples/mxnet/scenegraph/data/relation.py ================================================ """Pascal VOC object detection dataset.""" from __future__ import absolute_import, division import json import logging import os import pickle import warnings from collections import Counter import dgl import mxnet as mx import numpy as np from gluoncv.data.base import VisionDataset from gluoncv.data.transforms.presets.rcnn import ( FasterRCNNDefaultTrainTransform, FasterRCNNDefaultValTransform, ) class VGRelation(VisionDataset): def __init__( self, root=os.path.join("~", ".mxnet", "datasets", "visualgenome"), split="train", ): super(VGRelation, self).__init__(root) self._root = os.path.expanduser(root) self._img_path = os.path.join(self._root, "VG_100K", "{}") if split == "train": self._dict_path = os.path.join( self._root, "rel_annotations_train.json" ) elif split == "val": self._dict_path = os.path.join( self._root, "rel_annotations_val.json" ) else: raise NotImplementedError with open(self._dict_path) as f: tmp = f.read() self._dict = json.loads(tmp) self._predicates_path = os.path.join(self._root, "predicates.json") with open(self._predicates_path, "r") as f: tmp = f.read() self.rel_classes = json.loads(tmp) self.num_rel_classes = len(self.rel_classes) + 1 self._objects_path = os.path.join(self._root, "objects.json") with open(self._objects_path, "r") as f: tmp = f.read() self.obj_classes = json.loads(tmp) self.num_obj_classes = len(self.obj_classes) if split == "val": self.img_transform = FasterRCNNDefaultValTransform( short=600, max_size=1000 ) else: self.img_transform = FasterRCNNDefaultTrainTransform( short=600, max_size=1000 ) self.split = split def __len__(self): return len(self._dict) def _hash_bbox(self, object): num_list = [object["category"]] + object["bbox"] return "_".join([str(num) for num in num_list]) def __getitem__(self, idx): img_id = list(self._dict)[idx] img_path = self._img_path.format(img_id) img = mx.image.imread(img_path) item = self._dict[img_id] n_edges = len(item) # edge to node ids sub_node_hash = [] ob_node_hash = [] for i, it in enumerate(item): sub_node_hash.append(self._hash_bbox(it["subject"])) ob_node_hash.append(self._hash_bbox(it["object"])) node_set = sorted(list(set(sub_node_hash + ob_node_hash))) n_nodes = len(node_set) node_to_id = {} for i, node in enumerate(node_set): node_to_id[node] = i sub_id = [] ob_id = [] for i in range(n_edges): sub_id.append(node_to_id[sub_node_hash[i]]) ob_id.append(node_to_id[ob_node_hash[i]]) # node features bbox = mx.nd.zeros((n_nodes, 4)) node_class_ids = mx.nd.zeros((n_nodes, 1)) node_visited = [False for i in range(n_nodes)] for i, it in enumerate(item): if not node_visited[sub_id[i]]: ind = sub_id[i] sub = it["subject"] node_class_ids[ind] = sub["category"] # y1y2x1x2 to x1y1x2y2 bbox[ind, 0] = sub["bbox"][2] bbox[ind, 1] = sub["bbox"][0] bbox[ind, 2] = sub["bbox"][3] bbox[ind, 3] = sub["bbox"][1] node_visited[ind] = True if not node_visited[ob_id[i]]: ind = ob_id[i] ob = it["object"] node_class_ids[ind] = ob["category"] # y1y2x1x2 to x1y1x2y2 bbox[ind, 0] = ob["bbox"][2] bbox[ind, 1] = ob["bbox"][0] bbox[ind, 2] = ob["bbox"][3] bbox[ind, 3] = ob["bbox"][1] node_visited[ind] = True eta = 0.1 node_class_vec = node_class_ids[:, 0].one_hot( self.num_obj_classes, on_value=1 - eta + eta / self.num_obj_classes, off_value=eta / self.num_obj_classes, ) # augmentation if self.split == "val": img, bbox, _ = self.img_transform(img, bbox) else: img, bbox = self.img_transform(img, bbox) # build the graph g = dgl.DGLGraph() g.add_nodes(n_nodes) adjmat = np.zeros((n_nodes, n_nodes)) predicate = [] for i, it in enumerate(item): adjmat[sub_id[i], ob_id[i]] = 1 predicate.append(it["predicate"]) predicate = mx.nd.array(predicate).expand_dims(1) g.add_edges(sub_id, ob_id, {"rel_class": mx.nd.array(predicate) + 1}) empty_edge_list = [] for i in range(n_nodes): for j in range(n_nodes): if i != j and adjmat[i, j] == 0: empty_edge_list.append((i, j)) if len(empty_edge_list) > 0: src, dst = tuple(zip(*empty_edge_list)) g.add_edges( src, dst, {"rel_class": mx.nd.zeros((len(empty_edge_list), 1))} ) # assign features g.ndata["bbox"] = bbox g.ndata["node_class"] = node_class_ids g.ndata["node_class_vec"] = node_class_vec return g, img ================================================ FILE: examples/mxnet/scenegraph/demo_reldn.py ================================================ import argparse import gluoncv as gcv import mxnet as mx from data import * from gluoncv.data.transforms import presets from gluoncv.utilz import download from model import faster_rcnn_resnet101_v1d_custom, RelDN from utils import * import dgl def parse_args(): parser = argparse.ArgumentParser( description="Demo of Scene Graph Extraction." ) parser.add_argument( "--image", type=str, default="", help="The image for scene graph extraction.", ) parser.add_argument( "--gpu", type=str, default="", help="GPU id to use for inference, default is not using GPU.", ) parser.add_argument( "--pretrained-faster-rcnn-params", type=str, default="", help="Path to saved Faster R-CNN model parameters.", ) parser.add_argument( "--reldn-params", type=str, default="", help="Path to saved Faster R-CNN model parameters.", ) parser.add_argument( "--faster-rcnn-params", type=str, default="", help="Path to saved Faster R-CNN model parameters.", ) parser.add_argument( "--freq-prior", type=str, default="freq_prior.pkl", help="Path to saved frequency prior data.", ) args = parser.parse_args() return args args = parse_args() if args.gpu: ctx = mx.gpu(int(args.gpu)) else: ctx = mx.cpu() net = RelDN(n_classes=50, prior_pkl=args.freq_prior, semantic_only=False) if args.reldn_params == "": download("http://data.dgl.ai/models/SceneGraph/reldn.params") net.load_parameters("rendl.params", ctx=ctx) else: net.load_parameters(args.reldn_params, ctx=ctx) # dataset and dataloader vg_val = VGRelation(split="val") detector = faster_rcnn_resnet101_v1d_custom( classes=vg_val.obj_classes, pretrained_base=False, pretrained=False, additional_output=True, ) if args.pretrained_faster_rcnn_params == "": download( "http://data.dgl.ai/models/SceneGraph/faster_rcnn_resnet101_v1d_visualgenome.params" ) params_path = "faster_rcnn_resnet101_v1d_visualgenome.params" else: params_path = args.pretrained_faster_rcnn_params detector.load_parameters( params_path, ctx=ctx, ignore_extra=True, allow_missing=True ) detector_feat = faster_rcnn_resnet101_v1d_custom( classes=vg_val.obj_classes, pretrained_base=False, pretrained=False, additional_output=True, ) detector_feat.load_parameters( params_path, ctx=ctx, ignore_extra=True, allow_missing=True ) if args.faster_rcnn_params == "": download( "http://data.dgl.ai/models/SceneGraph/faster_rcnn_resnet101_v1d_visualgenome.params" ) detector_feat.features.load_parameters( "faster_rcnn_resnet101_v1d_visualgenome.params", ctx=ctx ) else: detector_feat.features.load_parameters(args.faster_rcnn_params, ctx=ctx) # image input if args.image: image_path = args.image else: gcv.utils.download( "https://raw.githubusercontent.com/dmlc/web-data/master/" + "dgl/examples/mxnet/scenegraph/old-couple.png", "old-couple.png", ) image_path = "old-couple.png" x, img = presets.rcnn.load_test( args.image, short=detector.short, max_size=detector.max_size ) x = x.as_in_context(ctx) # detector prediction ids, scores, bboxes, feat, feat_ind, spatial_feat = detector(x) # build graph, extract edge features g = build_graph_validate_pred( x, ids, scores, bboxes, feat_ind, spatial_feat, bbox_improvement=True, scores_top_k=75, overlap=False, ) rel_bbox = g.edata["rel_bbox"].expand_dims(0).as_in_context(ctx) _, _, _, spatial_feat_rel = detector_feat(x, None, None, rel_bbox) g.edata["edge_feat"] = spatial_feat_rel[0] # graph prediction g = net(g) _, preds = extract_pred(g, joint_preds=True) preds = preds[preds[:, 1].argsort()[::-1]] plot_sg(img, preds, detector.classes, vg_val.rel_classes, 10) ================================================ FILE: examples/mxnet/scenegraph/model/__init__.py ================================================ from .faster_rcnn import * from .reldn import * ================================================ FILE: examples/mxnet/scenegraph/model/faster_rcnn.py ================================================ """Faster RCNN Model.""" from __future__ import absolute_import import os import warnings import mxnet as mx from gluoncv.model_zoo.faster_rcnn.rcnn_target import ( RCNNTargetGenerator, RCNNTargetSampler, ) from gluoncv.model_zoo.rcnn import RCNN from gluoncv.model_zoo.rpn import RPN from gluoncv.nn.feature import FPNFeatureExpander from mxnet import autograd from mxnet.gluon import nn from mxnet.gluon.contrib.nn import SyncBatchNorm __all__ = [ "FasterRCNN", "get_faster_rcnn", "faster_rcnn_resnet50_v1b_coco", "faster_rcnn_resnet50_v1b_custom", "faster_rcnn_resnet101_v1d_coco", "faster_rcnn_resnet101_v1d_custom", ] class FasterRCNN(RCNN): r"""Faster RCNN network. Parameters ---------- features : gluon.HybridBlock Base feature extractor before feature pooling layer. top_features : gluon.HybridBlock Tail feature extractor after feature pooling layer. classes : iterable of str Names of categories, its length is ``num_class``. box_features : gluon.HybridBlock, default is None feature head for transforming shared ROI output (top_features) for box prediction. If set to None, global average pooling will be used. short : int, default is 600. Input image short side size. max_size : int, default is 1000. Maximum size of input image long side. min_stage : int, default is 4 Minimum stage NO. for FPN stages. max_stage : int, default is 4 Maximum stage NO. for FPN stages. train_patterns : str, default is None. Matching pattern for trainable parameters. nms_thresh : float, default is 0.3. Non-maximum suppression threshold. You can specify < 0 or > 1 to disable NMS. nms_topk : int, default is 400 Apply NMS to top k detection results, use -1 to disable so that every Detection result is used in NMS. post_nms : int, default is 100 Only return top `post_nms` detection results, the rest is discarded. The number is based on COCO dataset which has maximum 100 objects per image. You can adjust this number if expecting more objects. You can use -1 to return all detections. roi_mode : str, default is align ROI pooling mode. Currently support 'pool' and 'align'. roi_size : tuple of int, length 2, default is (14, 14) (height, width) of the ROI region. strides : int/tuple of ints, default is 16 Feature map stride with respect to original image. This is usually the ratio between original image size and feature map size. For FPN, use a tuple of ints. clip : float, default is None Clip bounding box target to this value. rpn_channel : int, default is 1024 Channel number used in RPN convolutional layers. base_size : int The width(and height) of reference anchor box. scales : iterable of float, default is (8, 16, 32) The areas of anchor boxes. We use the following form to compute the shapes of anchors: .. math:: width_{anchor} = size_{base} \times scale \times \sqrt{ 1 / ratio} height_{anchor} = size_{base} \times scale \times \sqrt{ratio} ratios : iterable of float, default is (0.5, 1, 2) The aspect ratios of anchor boxes. We expect it to be a list or tuple. alloc_size : tuple of int Allocate size for the anchor boxes as (H, W). Usually we generate enough anchors for large feature map, e.g. 128x128. Later in inference we can have variable input sizes, at which time we can crop corresponding anchors from this large anchor map so we can skip re-generating anchors for each input. rpn_train_pre_nms : int, default is 12000 Filter top proposals before NMS in training of RPN. rpn_train_post_nms : int, default is 2000 Return top proposal results after NMS in training of RPN. Will be set to rpn_train_pre_nms if it is larger than rpn_train_pre_nms. rpn_test_pre_nms : int, default is 6000 Filter top proposals before NMS in testing of RPN. rpn_test_post_nms : int, default is 300 Return top proposal results after NMS in testing of RPN. Will be set to rpn_test_pre_nms if it is larger than rpn_test_pre_nms. rpn_nms_thresh : float, default is 0.7 IOU threshold for NMS. It is used to remove overlapping proposals. rpn_num_sample : int, default is 256 Number of samples for RPN targets. rpn_pos_iou_thresh : float, default is 0.7 Anchor with IOU larger than ``pos_iou_thresh`` is regarded as positive samples. rpn_neg_iou_thresh : float, default is 0.3 Anchor with IOU smaller than ``neg_iou_thresh`` is regarded as negative samples. Anchors with IOU in between ``pos_iou_thresh`` and ``neg_iou_thresh`` are ignored. rpn_pos_ratio : float, default is 0.5 ``pos_ratio`` defines how many positive samples (``pos_ratio * num_sample``) is to be sampled. rpn_box_norm : array-like of size 4, default is (1., 1., 1., 1.) Std value to be divided from encoded values. rpn_min_size : int, default is 16 Proposals whose size is smaller than ``min_size`` will be discarded. per_device_batch_size : int, default is 1 Batch size for each device during training. num_sample : int, default is 128 Number of samples for RCNN targets. pos_iou_thresh : float, default is 0.5 Proposal whose IOU larger than ``pos_iou_thresh`` is regarded as positive samples. pos_ratio : float, default is 0.25 ``pos_ratio`` defines how many positive samples (``pos_ratio * num_sample``) is to be sampled. max_num_gt : int, default is 300 Maximum ground-truth number in whole training dataset. This is only an upper bound, not necessarily very precise. However, using a very big number may impact the training speed. additional_output : boolean, default is False ``additional_output`` is only used for Mask R-CNN to get internal outputs. force_nms : bool, default is False Appy NMS to all categories, this is to avoid overlapping detection results from different categories. Attributes ---------- classes : iterable of str Names of categories, its length is ``num_class``. num_class : int Number of positive categories. short : int Input image short side size. max_size : int Maximum size of input image long side. train_patterns : str Matching pattern for trainable parameters. nms_thresh : float Non-maximum suppression threshold. You can specify < 0 or > 1 to disable NMS. nms_topk : int Apply NMS to top k detection results, use -1 to disable so that every Detection result is used in NMS. force_nms : bool Appy NMS to all categories, this is to avoid overlapping detection results from different categories. post_nms : int Only return top `post_nms` detection results, the rest is discarded. The number is based on COCO dataset which has maximum 100 objects per image. You can adjust this number if expecting more objects. You can use -1 to return all detections. rpn_target_generator : gluon.Block Generate training targets with cls_target, box_target, and box_mask. target_generator : gluon.Block Generate training targets with boxes, samples, matches, gt_label and gt_box. """ def __init__( self, features, top_features, classes, box_features=None, short=600, max_size=1000, min_stage=4, max_stage=4, train_patterns=None, nms_thresh=0.3, nms_topk=400, post_nms=100, roi_mode="align", roi_size=(14, 14), strides=16, clip=None, rpn_channel=1024, base_size=16, scales=(8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(128, 128), rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000, rpn_test_pre_nms=6000, rpn_test_post_nms=300, rpn_min_size=16, per_device_batch_size=1, num_sample=128, pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=300, additional_output=False, force_nms=False, **kwargs ): super(FasterRCNN, self).__init__( features=features, top_features=top_features, classes=classes, box_features=box_features, short=short, max_size=max_size, train_patterns=train_patterns, nms_thresh=nms_thresh, nms_topk=nms_topk, post_nms=post_nms, roi_mode=roi_mode, roi_size=roi_size, strides=strides, clip=clip, force_nms=force_nms, **kwargs ) if rpn_train_post_nms > rpn_train_pre_nms: rpn_train_post_nms = rpn_train_pre_nms if rpn_test_post_nms > rpn_test_pre_nms: rpn_test_post_nms = rpn_test_pre_nms self.ashape = alloc_size[0] self._min_stage = min_stage self._max_stage = max_stage self.num_stages = max_stage - min_stage + 1 if self.num_stages > 1: assert len(scales) == len(strides) == self.num_stages, ( "The num_stages (%d) must match number of scales (%d) and strides (%d)" % (self.num_stages, len(scales), len(strides)) ) self._batch_size = per_device_batch_size self._num_sample = num_sample self._rpn_test_post_nms = rpn_test_post_nms self._target_generator = RCNNTargetGenerator( self.num_class, int(num_sample * pos_ratio), self._batch_size ) self._additional_output = additional_output with self.name_scope(): self.rpn = RPN( channels=rpn_channel, strides=strides, base_size=base_size, scales=scales, ratios=ratios, alloc_size=alloc_size, clip=clip, nms_thresh=rpn_nms_thresh, train_pre_nms=rpn_train_pre_nms, train_post_nms=rpn_train_post_nms, test_pre_nms=rpn_test_pre_nms, test_post_nms=rpn_test_post_nms, min_size=rpn_min_size, multi_level=self.num_stages > 1, per_level_nms=False, ) self.sampler = RCNNTargetSampler( num_image=self._batch_size, num_proposal=rpn_train_post_nms, num_sample=num_sample, pos_iou_thresh=pos_iou_thresh, pos_ratio=pos_ratio, max_num_gt=max_num_gt, ) @property def target_generator(self): """Returns stored target generator Returns ------- mxnet.gluon.HybridBlock The RCNN target generator """ return self._target_generator def reset_class(self, classes, reuse_weights=None): """Reset class categories and class predictors. Parameters ---------- classes : iterable of str The new categories. ['apple', 'orange'] for example. reuse_weights : dict A {new_integer : old_integer} or mapping dict or {new_name : old_name} mapping dict, or a list of [name0, name1,...] if class names don't change. This allows the new predictor to reuse the previously trained weights specified. Example ------- >>> net = gluoncv.model_zoo.get_model('faster_rcnn_resnet50_v1b_coco', pretrained=True) >>> # use direct name to name mapping to reuse weights >>> net.reset_class(classes=['person'], reuse_weights={'person':'person'}) >>> # or use interger mapping, person is the 14th category in VOC >>> net.reset_class(classes=['person'], reuse_weights={0:14}) >>> # you can even mix them >>> net.reset_class(classes=['person'], reuse_weights={'person':14}) >>> # or use a list of string if class name don't change >>> net.reset_class(classes=['person'], reuse_weights=['person']) """ super(FasterRCNN, self).reset_class(classes, reuse_weights) self._target_generator = RCNNTargetGenerator( self.num_class, self.sampler._max_pos, self._batch_size ) def _pyramid_roi_feats( self, F, features, rpn_rois, roi_size, strides, roi_mode="align", roi_canonical_scale=224.0, eps=1e-6, ): """Assign rpn_rois to specific FPN layers according to its area and then perform `ROIPooling` or `ROIAlign` to generate final region proposals aggregated features. Parameters ---------- features : list of mx.ndarray or mx.symbol Features extracted from FPN base network rpn_rois : mx.ndarray or mx.symbol (N, 5) with [[batch_index, x1, y1, x2, y2], ...] like roi_size : tuple The size of each roi with regard to ROI-Wise operation each region proposal will be roi_size spatial shape. strides : tuple e.g. [4, 8, 16, 32] Define the gap that ori image and feature map have roi_mode : str, default is align ROI pooling mode. Currently support 'pool' and 'align'. roi_canonical_scale : float, default is 224.0 Hyperparameters for the RoI-to-FPN level mapping heuristic. Returns ------- Pooled roi features aggregated according to its roi_level """ max_stage = self._max_stage if self._max_stage > 5: # do not use p6 for RCNN max_stage = self._max_stage - 1 _, x1, y1, x2, y2 = F.split(rpn_rois, axis=-1, num_outputs=5) h = y2 - y1 + 1 w = x2 - x1 + 1 roi_level = F.floor( 4 + F.log2(F.sqrt(w * h) / roi_canonical_scale + eps) ) roi_level = F.squeeze(F.clip(roi_level, self._min_stage, max_stage)) # [2,2,..,3,3,...,4,4,...,5,5,...] ``Prohibit swap order here`` # roi_level_sorted_args = F.argsort(roi_level, is_ascend=True) # roi_level = F.sort(roi_level, is_ascend=True) # rpn_rois = F.take(rpn_rois, roi_level_sorted_args, axis=0) pooled_roi_feats = [] for i, l in enumerate(range(self._min_stage, max_stage + 1)): if roi_mode == "pool": # Pool features with all rois first, and then set invalid pooled features to zero, # at last ele-wise add together to aggregate all features. pooled_feature = F.ROIPooling( features[i], rpn_rois, roi_size, 1.0 / strides[i] ) pooled_feature = F.where( roi_level == l, pooled_feature, F.zeros_like(pooled_feature) ) elif roi_mode == "align": if ( "box_encode" in F.contrib.__dict__ and "box_decode" in F.contrib.__dict__ ): # TODO(jerryzcn): clean this up for once mx 1.6 is released. masked_rpn_rois = F.where( roi_level == l, rpn_rois, F.ones_like(rpn_rois) * -1.0 ) pooled_feature = F.contrib.ROIAlign( features[i], masked_rpn_rois, roi_size, 1.0 / strides[i], sample_ratio=2, ) else: pooled_feature = F.contrib.ROIAlign( features[i], rpn_rois, roi_size, 1.0 / strides[i], sample_ratio=2, ) pooled_feature = F.where( roi_level == l, pooled_feature, F.zeros_like(pooled_feature), ) else: raise ValueError("Invalid roi mode: {}".format(roi_mode)) pooled_roi_feats.append(pooled_feature) # Ele-wise add to aggregate all pooled features pooled_roi_feats = F.ElementWiseSum(*pooled_roi_feats) # Sort all pooled features by asceding order # [2,2,..,3,3,...,4,4,...,5,5,...] # pooled_roi_feats = F.take(pooled_roi_feats, roi_level_sorted_args) # pooled roi feats (B*N, C, 7, 7), N = N2 + N3 + N4 + N5 = num_roi, C=256 in ori paper return pooled_roi_feats # pylint: disable=arguments-differ def hybrid_forward(self, F, x, gt_box=None, gt_label=None, m_rpn_box=None): """Forward Faster-RCNN network. The behavior during training and inference is different. Parameters ---------- x : mxnet.nd.NDArray or mxnet.symbol The network input tensor. gt_box : type, only required during training The ground-truth bbox tensor with shape (B, N, 4). gt_label : type, only required during training The ground-truth label tensor with shape (B, 1, 4). Returns ------- (ids, scores, bboxes) During inference, returns final class id, confidence scores, bounding boxes. """ def _split(x, axis, num_outputs, squeeze_axis): x = F.split( x, axis=axis, num_outputs=num_outputs, squeeze_axis=squeeze_axis ) if isinstance(x, list): return x else: return [x] if m_rpn_box is not None: manual_rpn_box = True else: manual_rpn_box = False feat = self.features(x) if not isinstance(feat, (list, tuple)): feat = [feat] # RPN proposals if autograd.is_training(): if manual_rpn_box: rpn_box = m_rpn_box self.nms_thresh = 1 else: ( rpn_score, rpn_box, raw_rpn_score, raw_rpn_box, anchors, ) = self.rpn(F.zeros_like(x), *feat) rpn_box, samples, matches = self.sampler( rpn_box, rpn_score, gt_box ) else: if manual_rpn_box: rpn_box = m_rpn_box self.nms_thresh = 1 else: _, rpn_box = self.rpn(F.zeros_like(x), *feat) # create batchid for roi if not manual_rpn_box: num_roi = ( self._num_sample if autograd.is_training() else self._rpn_test_post_nms ) batch_size = self._batch_size if autograd.is_training() else 1 else: num_roi = m_rpn_box.shape[1] batch_size = rpn_box.shape[0] with autograd.pause(): roi_batchid = F.arange(0, batch_size) roi_batchid = F.repeat(roi_batchid, num_roi) # remove batch dim because ROIPooling require 2d input rpn_roi = F.concat( *[roi_batchid.reshape((-1, 1)), rpn_box.reshape((-1, 4))], dim=-1 ) rpn_roi = F.stop_gradient(rpn_roi) if self.num_stages > 1: # using FPN pooled_feat = self._pyramid_roi_feats( F, feat, rpn_roi, self._roi_size, self._strides, roi_mode=self._roi_mode, ) else: # ROI features if self._roi_mode == "pool": pooled_feat = F.ROIPooling( feat[0], rpn_roi, self._roi_size, 1.0 / self._strides ) elif self._roi_mode == "align": pooled_feat = F.contrib.ROIAlign( feat[0], rpn_roi, self._roi_size, 1.0 / self._strides, sample_ratio=2, ) else: raise ValueError("Invalid roi mode: {}".format(self._roi_mode)) # RCNN prediction if self.top_features is not None: top_feat = self.top_features(pooled_feat) else: top_feat = pooled_feat if self.box_features is None: box_feat = F.contrib.AdaptiveAvgPooling2D(top_feat, output_size=1) else: box_feat = self.box_features(top_feat) cls_pred = self.class_predictor(box_feat) # cls_pred (B * N, C) -> (B, N, C) cls_pred = cls_pred.reshape((batch_size, num_roi, self.num_class + 1)) if manual_rpn_box: spatial_feat = top_feat.mean(axis=1).reshape( (-4, rpn_box.shape[0], rpn_box.shape[1], -3) ) cls_ids, scores = self.cls_decoder(F.softmax(cls_pred, axis=-1)) cls_ids = cls_ids.transpose((0, 2, 1)).reshape((0, 0, 0, 1)) scores = scores.transpose((0, 2, 1)).reshape((0, 0, 0, 1)) cls_ids = _split( cls_ids, axis=0, num_outputs=batch_size, squeeze_axis=True ) scores = _split( scores, axis=0, num_outputs=batch_size, squeeze_axis=True ) return cls_ids, scores, rpn_box, spatial_feat # no need to convert bounding boxes in training, just return if autograd.is_training(): ( cls_targets, box_targets, box_masks, indices, ) = self._target_generator( rpn_box, samples, matches, gt_label, gt_box ) box_feat = F.reshape(box_feat.expand_dims(0), (batch_size, -1, 0)) box_pred = self.box_predictor( F.concat( *[ F.take( F.slice_axis( box_feat, axis=0, begin=i, end=i + 1 ).squeeze(), F.slice_axis( indices, axis=0, begin=i, end=i + 1 ).squeeze(), ) for i in range(batch_size) ], dim=0 ) ) # box_pred (B * N, C * 4) -> (B, N, C, 4) box_pred = box_pred.reshape((batch_size, -1, self.num_class, 4)) if self._additional_output: return ( cls_pred, box_pred, rpn_box, samples, matches, raw_rpn_score, raw_rpn_box, anchors, cls_targets, box_targets, box_masks, top_feat, indices, ) return ( cls_pred, box_pred, rpn_box, samples, matches, raw_rpn_score, raw_rpn_box, anchors, cls_targets, box_targets, box_masks, indices, ) box_pred = self.box_predictor(box_feat) # box_pred (B * N, C * 4) -> (B, N, C, 4) box_pred = box_pred.reshape((batch_size, num_roi, self.num_class, 4)) # cls_ids (B, N, C), scores (B, N, C) cls_ids, scores = self.cls_decoder(F.softmax(cls_pred, axis=-1)) # cls_ids, scores (B, N, C) -> (B, C, N) -> (B, C, N, 1) cls_ids = cls_ids.transpose((0, 2, 1)).reshape((0, 0, 0, 1)) scores = scores.transpose((0, 2, 1)).reshape((0, 0, 0, 1)) # box_pred (B, N, C, 4) -> (B, C, N, 4) box_pred = box_pred.transpose((0, 2, 1, 3)) # rpn_boxes (B, N, 4) -> B * (1, N, 4) rpn_boxes = _split( rpn_box, axis=0, num_outputs=batch_size, squeeze_axis=False ) # cls_ids, scores (B, C, N, 1) -> B * (C, N, 1) cls_ids = _split( cls_ids, axis=0, num_outputs=batch_size, squeeze_axis=True ) scores = _split( scores, axis=0, num_outputs=batch_size, squeeze_axis=True ) # box_preds (B, C, N, 4) -> B * (C, N, 4) box_preds = _split( box_pred, axis=0, num_outputs=batch_size, squeeze_axis=True ) # per batch predict, nms, each class has topk outputs results = [] # add feat index if self._additional_output: sizes = scores[0].shape[0:2] # ind = mx.nd.array(list(range(sizes[1]))) ind = mx.nd.linspace(0, 999, 1000) ind = mx.nd.repeat(ind, repeats=sizes[0]) ind = ( ind.reshape(sizes[1], sizes[0]) .transpose((1, 0)) .expand_dims(axis=2) ) for rpn_box, cls_id, score, box_pred in zip( rpn_boxes, cls_ids, scores, box_preds ): # box_pred (C, N, 4) rpn_box (1, N, 4) -> bbox (C, N, 4) bbox = self.box_decoder(box_pred, rpn_box) if self._additional_output: # res (C, N, 7) res = F.concat(*[cls_id, score, bbox, ind], dim=-1) else: # res (C, N, 6) res = F.concat(*[cls_id, score, bbox], dim=-1) if self.force_nms: # res (1, C*N, 6), to allow cross-catogory suppression res = res.reshape((1, -1, 0)) # res (C, self.nms_topk, 6) res = F.contrib.box_nms( res, overlap_thresh=self.nms_thresh, topk=self.nms_topk, valid_thresh=0.001, id_index=0, score_index=1, coord_start=2, force_suppress=self.force_nms, ) # res (C * self.nms_topk, 6) res = res.reshape((-3, 0)) results.append(res) # result B * (C * topk, 6) -> (B, C * topk, 6) result = F.stack(*results, axis=0) ids = F.slice_axis(result, axis=-1, begin=0, end=1) scores = F.slice_axis(result, axis=-1, begin=1, end=2) bboxes = F.slice_axis(result, axis=-1, begin=2, end=6) if self._additional_output: feat_ind = F.slice_axis(result, axis=-1, begin=6, end=7) spatial_feat = ( top_feat.mean(axis=1).expand_dims(0).reshape(batch_size, 0, -1) ) return ids, scores, bboxes, feat, feat_ind, spatial_feat return ids, scores, bboxes def get_faster_rcnn( name, dataset, pretrained=False, ctx=mx.cpu(), root=os.path.join("~", ".mxnet", "models"), **kwargs ): r"""Utility function to return faster rcnn networks. Parameters ---------- name : str Model name. dataset : str The name of dataset. pretrained : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. ctx : mxnet.Context Context such as mx.cpu(), mx.gpu(0). root : str Model weights storing path. Returns ------- mxnet.gluon.HybridBlock The Faster-RCNN network. """ net = FasterRCNN(**kwargs) if pretrained: from gluoncv.model_zoo.model_store import get_model_file full_name = "_".join(("faster_rcnn", name, dataset)) net.load_parameters( get_model_file(full_name, tag=pretrained, root=root), ctx=ctx, ignore_extra=True, allow_missing=True, ) else: for v in net.collect_params().values(): try: v.reset_ctx(ctx) except ValueError: pass return net def faster_rcnn_resnet50_v1b_coco( pretrained=False, pretrained_base=True, **kwargs ): r"""Faster RCNN model from the paper "Ren, S., He, K., Girshick, R., & Sun, J. (2015). Faster r-cnn: Towards real-time object detection with region proposal networks" Parameters ---------- pretrained : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. pretrained_base : bool or str, optional, default is True Load pretrained base network, the extra layers are randomized. Note that if pretrained is `True`, this has no effect. ctx : Context, default CPU The context in which to load the pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. Examples -------- >>> model = get_faster_rcnn_resnet50_v1b_coco(pretrained=True) >>> print(model) """ from gluoncv.data import COCODetection from gluoncv.model_zoo.resnetv1b import resnet50_v1b classes = COCODetection.CLASSES pretrained_base = False if pretrained else pretrained_base base_network = resnet50_v1b( pretrained=pretrained_base, dilated=False, use_global_stats=True, **kwargs ) features = nn.HybridSequential() top_features = nn.HybridSequential() for layer in [ "conv1", "bn1", "relu", "maxpool", "layer1", "layer2", "layer3", ]: features.add(getattr(base_network, layer)) for layer in ["layer4"]: top_features.add(getattr(base_network, layer)) train_patterns = "|".join( [".*dense", ".*rpn", ".*down(2|3|4)_conv", ".*layers(2|3|4)_conv"] ) return get_faster_rcnn( name="resnet50_v1b", dataset="coco", pretrained=pretrained, features=features, top_features=top_features, classes=classes, short=800, max_size=1333, train_patterns=train_patterns, nms_thresh=0.7, nms_topk=-1, post_nms=-1, roi_mode="align", roi_size=(14, 14), strides=16, clip=4.14, rpn_channel=1024, base_size=16, scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(128, 128), rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000, rpn_test_pre_nms=6000, rpn_test_post_nms=1000, rpn_min_size=1, num_sample=128, pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=3000, **kwargs ) def faster_rcnn_resnet50_v1b_custom( classes, transfer=None, pretrained_base=True, pretrained=False, **kwargs ): r"""Faster RCNN model with resnet50_v1b base network on custom dataset. Parameters ---------- classes : iterable of str Names of custom foreground classes. `len(classes)` is the number of foreground classes. transfer : str or None If not `None`, will try to reuse pre-trained weights from faster RCNN networks trained on other datasets. pretrained : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. pretrained_base : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. ctx : Context, default CPU The context in which to load the pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. Returns ------- mxnet.gluon.HybridBlock Hybrid faster RCNN network. """ if pretrained: warnings.warn( "Custom models don't provide `pretrained` weights, ignored." ) if transfer is None: from gluoncv.model_zoo.resnetv1b import resnet50_v1b base_network = resnet50_v1b( pretrained=pretrained_base, dilated=False, use_global_stats=True, **kwargs ) features = nn.HybridSequential() top_features = nn.HybridSequential() for layer in [ "conv1", "bn1", "relu", "maxpool", "layer1", "layer2", "layer3", ]: features.add(getattr(base_network, layer)) for layer in ["layer4"]: top_features.add(getattr(base_network, layer)) train_patterns = "|".join( [".*dense", ".*rpn", ".*down(2|3|4)_conv", ".*layers(2|3|4)_conv"] ) return get_faster_rcnn( name="resnet50_v1b", dataset="custom", pretrained=pretrained, features=features, top_features=top_features, classes=classes, short=600, max_size=1000, train_patterns=train_patterns, nms_thresh=0.7, nms_topk=400, post_nms=100, roi_mode="align", roi_size=(14, 14), strides=16, clip=4.14, rpn_channel=1024, base_size=16, scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(128, 128), rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000, rpn_test_pre_nms=6000, rpn_test_post_nms=300, rpn_min_size=16, num_sample=128, pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=3000, **kwargs ) else: from gluoncv.model_zoo import get_model net = get_model( "faster_rcnn_resnet50_v1b_" + str(transfer), pretrained=True, **kwargs ) reuse_classes = [x for x in classes if x in net.classes] net.reset_class(classes, reuse_weights=reuse_classes) return net def faster_rcnn_resnet101_v1d_coco( pretrained=False, pretrained_base=True, **kwargs ): r"""Faster RCNN model from the paper "Ren, S., He, K., Girshick, R., & Sun, J. (2015). Faster r-cnn: Towards real-time object detection with region proposal networks" Parameters ---------- pretrained : bool, optional, default is False Load pretrained weights. pretrained_base : bool or str, optional, default is True Load pretrained base network, the extra layers are randomized. Note that if pretrained is `True`, this has no effect. ctx : Context, default CPU The context in which to load the pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. Examples -------- >>> model = get_faster_rcnn_resnet101_v1d_coco(pretrained=True) >>> print(model) """ from gluoncv.data import COCODetection from gluoncv.model_zoo.resnetv1b import resnet101_v1d classes = COCODetection.CLASSES pretrained_base = False if pretrained else pretrained_base base_network = resnet101_v1d( pretrained=pretrained_base, dilated=False, use_global_stats=True, **kwargs ) features = nn.HybridSequential() top_features = nn.HybridSequential() for layer in [ "conv1", "bn1", "relu", "maxpool", "layer1", "layer2", "layer3", ]: features.add(getattr(base_network, layer)) for layer in ["layer4"]: top_features.add(getattr(base_network, layer)) train_patterns = "|".join( [".*dense", ".*rpn", ".*down(2|3|4)_conv", ".*layers(2|3|4)_conv"] ) return get_faster_rcnn( name="resnet101_v1d", dataset="coco", pretrained=pretrained, features=features, top_features=top_features, classes=classes, short=800, max_size=1333, train_patterns=train_patterns, nms_thresh=0.5, nms_topk=-1, post_nms=100, roi_mode="align", roi_size=(14, 14), strides=16, clip=4.14, rpn_channel=1024, base_size=16, scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(128, 128), rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000, rpn_test_pre_nms=6000, rpn_test_post_nms=1000, rpn_min_size=1, num_sample=128, pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=3000, **kwargs ) def faster_rcnn_resnet101_v1d_custom( classes, transfer=None, pretrained_base=True, pretrained=False, **kwargs ): r"""Faster RCNN model with resnet101_v1d base network on custom dataset. Parameters ---------- classes : iterable of str Names of custom foreground classes. `len(classes)` is the number of foreground classes. transfer : str or None If not `None`, will try to reuse pre-trained weights from faster RCNN networks trained on other datasets. pretrained_base : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. ctx : Context, default CPU The context in which to load the pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. Returns ------- mxnet.gluon.HybridBlock Hybrid faster RCNN network. """ if pretrained: warnings.warn( "Custom models don't provide `pretrained` weights, ignored." ) if transfer is None: from gluoncv.model_zoo.resnetv1b import resnet101_v1d base_network = resnet101_v1d( pretrained=pretrained_base, dilated=False, use_global_stats=True, **kwargs ) features = nn.HybridSequential() top_features = nn.HybridSequential() for layer in [ "conv1", "bn1", "relu", "maxpool", "layer1", "layer2", "layer3", ]: features.add(getattr(base_network, layer)) for layer in ["layer4"]: top_features.add(getattr(base_network, layer)) train_patterns = "|".join( [".*dense", ".*rpn", ".*down(2|3|4)_conv", ".*layers(2|3|4)_conv"] ) return get_faster_rcnn( name="resnet101_v1d", dataset="custom", pretrained=pretrained, features=features, top_features=top_features, classes=classes, short=600, max_size=1000, train_patterns=train_patterns, nms_thresh=0.5, nms_topk=400, post_nms=100, roi_mode="align", roi_size=(14, 14), strides=16, clip=4.14, rpn_channel=1024, base_size=16, scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(128, 128), rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000, rpn_test_pre_nms=6000, rpn_test_post_nms=300, rpn_min_size=16, num_sample=128, pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=3000, **kwargs ) else: net = faster_rcnn_resnet101_v1d_coco(pretrained=True) reuse_classes = [x for x in classes if x in net.classes] net.reset_class(classes, reuse_weights=reuse_classes) return net ================================================ FILE: examples/mxnet/scenegraph/model/reldn.py ================================================ import pickle import dgl import gluoncv as gcv import mxnet as mx import numpy as np from dgl.nn.mxnet import GraphConv from dgl.utils import toindex from mxnet import nd from mxnet.gluon import nn __all__ = ["RelDN"] class EdgeConfMLP(nn.Block): """compute the confidence for edges""" def __init__(self): super(EdgeConfMLP, self).__init__() def forward(self, edges): score_pred = nd.log_softmax(edges.data["preds"])[:, 1:].max(axis=1) score_phr = ( score_pred + edges.src["node_class_logit"] + edges.dst["node_class_logit"] ) return {"score_pred": score_pred, "score_phr": score_phr} class EdgeBBoxExtend(nn.Block): """encode the bounding boxes""" def __init__(self): super(EdgeBBoxExtend, self).__init__() def bbox_delta(self, bbox_a, bbox_b): n = bbox_a.shape[0] result = nd.zeros((n, 4), ctx=bbox_a.context) result[:, 0] = bbox_a[:, 0] - bbox_b[:, 0] result[:, 1] = bbox_a[:, 1] - bbox_b[:, 1] result[:, 2] = nd.log( (bbox_a[:, 2] - bbox_a[:, 0] + 1e-8) / (bbox_b[:, 2] - bbox_b[:, 0] + 1e-8) ) result[:, 3] = nd.log( (bbox_a[:, 3] - bbox_a[:, 1] + 1e-8) / (bbox_b[:, 3] - bbox_b[:, 1] + 1e-8) ) return result def forward(self, edges): ctx = edges.src["pred_bbox"].context n = edges.src["pred_bbox"].shape[0] delta_src_obj = self.bbox_delta( edges.src["pred_bbox"], edges.dst["pred_bbox"] ) delta_src_rel = self.bbox_delta( edges.src["pred_bbox"], edges.data["rel_bbox"] ) delta_rel_obj = self.bbox_delta( edges.data["rel_bbox"], edges.dst["pred_bbox"] ) result = nd.zeros((n, 12), ctx=ctx) result[:, 0:4] = delta_src_obj result[:, 4:8] = delta_src_rel result[:, 8:12] = delta_rel_obj return {"pred_bbox_additional": result} class EdgeFreqPrior(nn.Block): """make use of the pre-trained frequency prior""" def __init__(self, prior_pkl): super(EdgeFreqPrior, self).__init__() with open(prior_pkl, "rb") as f: freq_prior = pickle.load(f) self.freq_prior = freq_prior def forward(self, edges): ctx = edges.src["node_class_pred"].context src_ind = edges.src["node_class_pred"].asnumpy().astype(int) dst_ind = edges.dst["node_class_pred"].asnumpy().astype(int) prob = self.freq_prior[src_ind, dst_ind] out = nd.array(prob, ctx=ctx) return {"freq_prior": out} class EdgeSpatial(nn.Block): """spatial feature branch""" def __init__(self, n_classes): super(EdgeSpatial, self).__init__() self.mlp = nn.Sequential() self.mlp.add(nn.Dense(64)) self.mlp.add(nn.LeakyReLU(0.1)) self.mlp.add(nn.Dense(64)) self.mlp.add(nn.LeakyReLU(0.1)) self.mlp.add(nn.Dense(n_classes)) def forward(self, edges): feat = nd.concat( edges.src["pred_bbox"], edges.dst["pred_bbox"], edges.data["rel_bbox"], edges.data["pred_bbox_additional"], ) out = self.mlp(feat) return {"spatial": out} class EdgeVisual(nn.Block): """visual feature branch""" def __init__(self, n_classes, vis_feat_dim=7 * 7 * 3): super(EdgeVisual, self).__init__() self.dim_in = vis_feat_dim self.mlp_joint = nn.Sequential() self.mlp_joint.add(nn.Dense(vis_feat_dim // 2)) self.mlp_joint.add(nn.LeakyReLU(0.1)) self.mlp_joint.add(nn.Dense(vis_feat_dim // 3)) self.mlp_joint.add(nn.LeakyReLU(0.1)) self.mlp_joint.add(nn.Dense(n_classes)) self.mlp_sub = nn.Dense(n_classes) self.mlp_ob = nn.Dense(n_classes) def forward(self, edges): feat = nd.concat( edges.src["node_feat"], edges.dst["node_feat"], edges.data["edge_feat"], ) out_joint = self.mlp_joint(feat) out_sub = self.mlp_sub(edges.src["node_feat"]) out_ob = self.mlp_ob(edges.dst["node_feat"]) out = out_joint + out_sub + out_ob return {"visual": out} class RelDN(nn.Block): """The RelDN Model""" def __init__(self, n_classes, prior_pkl, semantic_only=False): super(RelDN, self).__init__() # output layers self.edge_bbox_extend = EdgeBBoxExtend() # semantic through mlp encoding if prior_pkl is not None: self.freq_prior = EdgeFreqPrior(prior_pkl) # with predicate class and a link class self.spatial = EdgeSpatial(n_classes + 1) # with visual features self.visual = EdgeVisual(n_classes + 1) self.edge_conf_mlp = EdgeConfMLP() self.semantic_only = semantic_only def forward(self, g): if g is None or g.number_of_nodes() == 0: return g # predictions g.apply_edges(self.freq_prior) if self.semantic_only: g.edata["preds"] = g.edata["freq_prior"] else: # bbox extension g.apply_edges(self.edge_bbox_extend) g.apply_edges(self.spatial) g.apply_edges(self.visual) g.edata["preds"] = ( g.edata["freq_prior"] + g.edata["spatial"] + g.edata["visual"] ) # subgraph for gconv g.apply_edges(self.edge_conf_mlp) return g ================================================ FILE: examples/mxnet/scenegraph/train_faster_rcnn.py ================================================ """Train Faster-RCNN end to end.""" import argparse import os # disable autotune os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0" import logging import time import gluoncv as gcv import mxnet as mx import numpy as np from data import * from gluoncv import data as gdata, utils as gutils from gluoncv.data.batchify import Append, FasterRCNNTrainBatchify, Tuple from gluoncv.data.transforms.presets.rcnn import ( FasterRCNNDefaultTrainTransform, FasterRCNNDefaultValTransform, ) from gluoncv.model_zoo import get_model from gluoncv.utils.metrics.coco_detection import COCODetectionMetric from gluoncv.utils.metrics.rcnn import ( RCNNAccMetric, RCNNL1LossMetric, RPNAccMetric, RPNL1LossMetric, ) from gluoncv.utils.metrics.voc_detection import VOC07MApMetric from gluoncv.utils.parallel import Parallel, Parallelizable from model import ( faster_rcnn_resnet101_v1d_custom, faster_rcnn_resnet50_v1b_custom, ) from mxnet import autograd, gluon from mxnet.contrib import amp try: import horovod.mxnet as hvd except ImportError: hvd = None def parse_args(): parser = argparse.ArgumentParser( description="Train Faster-RCNN networks e2e." ) parser.add_argument( "--network", type=str, default="resnet101_v1d", help="Base network name which serves as feature extraction base.", ) parser.add_argument( "--dataset", type=str, default="visualgenome", help="Training dataset. Now support voc and coco.", ) parser.add_argument( "--num-workers", "-j", dest="num_workers", type=int, default=8, help="Number of data workers, you can use larger " "number to accelerate data loading, " "if your CPU and GPUs are powerful.", ) parser.add_argument( "--batch-size", type=int, default=8, help="Training mini-batch size." ) parser.add_argument( "--gpus", type=str, default="0", help="Training with GPUs, you can specify 1,3 for example.", ) parser.add_argument( "--epochs", type=str, default="", help="Training epochs." ) parser.add_argument( "--resume", type=str, default="", help="Resume from previously saved parameters if not None. " "For example, you can resume from ./faster_rcnn_xxx_0123.params", ) parser.add_argument( "--start-epoch", type=int, default=0, help="Starting epoch for resuming, default is 0 for new training." "You can specify it to 100 for example to start from 100 epoch.", ) parser.add_argument( "--lr", type=str, default="", help="Learning rate, default is 0.001 for voc single gpu training.", ) parser.add_argument( "--lr-decay", type=float, default=0.1, help="decay rate of learning rate. default is 0.1.", ) parser.add_argument( "--lr-decay-epoch", type=str, default="", help="epochs at which learning rate decays. default is 14,20 for voc.", ) parser.add_argument( "--lr-warmup", type=str, default="", help="warmup iterations to adjust learning rate, default is 0 for voc.", ) parser.add_argument( "--lr-warmup-factor", type=float, default=1.0 / 3.0, help="warmup factor of base lr.", ) parser.add_argument( "--momentum", type=float, default=0.9, help="SGD momentum, default is 0.9", ) parser.add_argument( "--wd", type=str, default="", help="Weight decay, default is 5e-4 for voc", ) parser.add_argument( "--log-interval", type=int, default=100, help="Logging mini-batch interval. Default is 100.", ) parser.add_argument( "--save-prefix", type=str, default="", help="Saving parameter prefix" ) parser.add_argument( "--save-interval", type=int, default=1, help="Saving parameters epoch interval, best model will always be saved.", ) parser.add_argument( "--val-interval", type=int, default=1, help="Epoch interval for validation, increase the number will reduce the " "training time if validation is slow.", ) parser.add_argument( "--seed", type=int, default=233, help="Random seed to be fixed." ) parser.add_argument( "--verbose", dest="verbose", action="store_true", help="Print helpful debugging info once set.", ) parser.add_argument( "--mixup", action="store_true", help="Use mixup training." ) parser.add_argument( "--no-mixup-epochs", type=int, default=20, help="Disable mixup training if enabled in the last N epochs.", ) # Norm layer options parser.add_argument( "--norm-layer", type=str, default=None, help="Type of normalization layer to use. " "If set to None, backbone normalization layer will be fixed," " and no normalization layer will be used. " "Currently supports 'bn', and None, default is None." "Note that if horovod is enabled, sync bn will not work correctly.", ) # FPN options parser.add_argument( "--use-fpn", action="store_true", help="Whether to use feature pyramid network.", ) # Performance options parser.add_argument( "--disable-hybridization", action="store_true", help="Whether to disable hybridize the model. " "Memory usage and speed will decrese.", ) parser.add_argument( "--static-alloc", action="store_true", help="Whether to use static memory allocation. Memory usage will increase.", ) parser.add_argument( "--amp", action="store_true", help="Use MXNet AMP for mixed precision training.", ) parser.add_argument( "--horovod", action="store_true", help="Use MXNet Horovod for distributed training. Must be run with OpenMPI. " "--gpus is ignored when using --horovod.", ) parser.add_argument( "--executor-threads", type=int, default=1, help="Number of threads for executor for scheduling ops. " "More threads may incur higher GPU memory footprint, " "but may speed up throughput. Note that when horovod is used, " "it is set to 1.", ) parser.add_argument( "--kv-store", type=str, default="nccl", help="KV store options. local, device, nccl, dist_sync, dist_device_sync, " "dist_async are available.", ) args = parser.parse_args() if args.horovod: if hvd is None: raise SystemExit( "Horovod not found, please check if you installed it correctly." ) hvd.init() if args.dataset == "voc": args.epochs = int(args.epochs) if args.epochs else 20 args.lr_decay_epoch = ( args.lr_decay_epoch if args.lr_decay_epoch else "14,20" ) args.lr = float(args.lr) if args.lr else 0.001 args.lr_warmup = args.lr_warmup if args.lr_warmup else -1 args.wd = float(args.wd) if args.wd else 5e-4 elif args.dataset == "visualgenome": args.epochs = int(args.epochs) if args.epochs else 20 args.lr_decay_epoch = ( args.lr_decay_epoch if args.lr_decay_epoch else "14,20" ) args.lr = float(args.lr) if args.lr else 0.001 args.lr_warmup = args.lr_warmup if args.lr_warmup else -1 args.wd = float(args.wd) if args.wd else 5e-4 elif args.dataset == "coco": args.epochs = int(args.epochs) if args.epochs else 26 args.lr_decay_epoch = ( args.lr_decay_epoch if args.lr_decay_epoch else "17,23" ) args.lr = float(args.lr) if args.lr else 0.01 args.lr_warmup = args.lr_warmup if args.lr_warmup else 1000 args.wd = float(args.wd) if args.wd else 1e-4 return args def get_dataset(dataset, args): if dataset.lower() == "voc": train_dataset = gdata.VOCDetection( splits=[(2007, "trainval"), (2012, "trainval")] ) val_dataset = gdata.VOCDetection(splits=[(2007, "test")]) val_metric = VOC07MApMetric( iou_thresh=0.5, class_names=val_dataset.classes ) elif dataset.lower() == "coco": train_dataset = gdata.COCODetection( splits="instances_train2017", use_crowd=False ) val_dataset = gdata.COCODetection( splits="instances_val2017", skip_empty=False ) val_metric = COCODetectionMetric( val_dataset, args.save_prefix + "_eval", cleanup=True ) elif dataset.lower() == "visualgenome": train_dataset = VGObject( root=os.path.join("~", ".mxnet", "datasets", "visualgenome"), splits="detections_train", use_crowd=False, ) val_dataset = VGObject( root=os.path.join("~", ".mxnet", "datasets", "visualgenome"), splits="detections_val", skip_empty=False, ) val_metric = COCODetectionMetric( val_dataset, args.save_prefix + "_eval", cleanup=True ) else: raise NotImplementedError( "Dataset: {} not implemented.".format(dataset) ) if args.mixup: from gluoncv.data.mixup import detection train_dataset = detection.MixupDetection(train_dataset) return train_dataset, val_dataset, val_metric def get_dataloader( net, train_dataset, val_dataset, train_transform, val_transform, batch_size, num_shards, args, ): """Get dataloader.""" train_bfn = FasterRCNNTrainBatchify(net, num_shards) if hasattr(train_dataset, "get_im_aspect_ratio"): im_aspect_ratio = train_dataset.get_im_aspect_ratio() else: im_aspect_ratio = [1.0] * len(train_dataset) train_sampler = gcv.nn.sampler.SplitSortedBucketSampler( im_aspect_ratio, batch_size, num_parts=hvd.size() if args.horovod else 1, part_index=hvd.rank() if args.horovod else 0, shuffle=True, ) train_loader = mx.gluon.data.DataLoader( train_dataset.transform( train_transform( net.short, net.max_size, net, ashape=net.ashape, multi_stage=args.use_fpn, ) ), batch_sampler=train_sampler, batchify_fn=train_bfn, num_workers=args.num_workers, ) if val_dataset is None: val_loader = None else: val_bfn = Tuple(*[Append() for _ in range(3)]) short = ( net.short[-1] if isinstance(net.short, (tuple, list)) else net.short ) # validation use 1 sample per device val_loader = mx.gluon.data.DataLoader( val_dataset.transform(val_transform(short, net.max_size)), num_shards, False, batchify_fn=val_bfn, last_batch="keep", num_workers=args.num_workers, ) return train_loader, val_loader def save_params( net, logger, best_map, current_map, epoch, save_interval, prefix ): current_map = float(current_map) if current_map > best_map[0]: logger.info( "[Epoch {}] mAP {} higher than current best {} saving to {}".format( epoch, current_map, best_map, "{:s}_best.params".format(prefix) ) ) best_map[0] = current_map net.save_parameters("{:s}_best.params".format(prefix)) with open(prefix + "_best_map.log", "a") as f: f.write("{:04d}:\t{:.4f}\n".format(epoch, current_map)) if save_interval and (epoch + 1) % save_interval == 0: logger.info( "[Epoch {}] Saving parameters to {}".format( epoch, "{:s}_{:04d}_{:.4f}.params".format(prefix, epoch, current_map), ) ) net.save_parameters( "{:s}_{:04d}_{:.4f}.params".format(prefix, epoch, current_map) ) def split_and_load(batch, ctx_list): """Split data to 1 batch each device.""" new_batch = [] for i, data in enumerate(batch): if isinstance(data, (list, tuple)): new_data = [x.as_in_context(ctx) for x, ctx in zip(data, ctx_list)] else: new_data = [data.as_in_context(ctx_list[0])] new_batch.append(new_data) return new_batch def validate(net, val_data, ctx, eval_metric, args): """Test on validation dataset.""" clipper = gcv.nn.bbox.BBoxClipToImage() eval_metric.reset() if not args.disable_hybridization: # input format is differnet than training, thus rehybridization is needed. net.hybridize(static_alloc=args.static_alloc) for i, batch in enumerate(val_data): batch = split_and_load(batch, ctx_list=ctx) det_bboxes = [] det_ids = [] det_scores = [] gt_bboxes = [] gt_ids = [] gt_difficults = [] for x, y, im_scale in zip(*batch): # get prediction results ids, scores, bboxes = net(x) det_ids.append(ids) det_scores.append(scores) # clip to image size det_bboxes.append(clipper(bboxes, x)) # rescale to original resolution im_scale = im_scale.reshape((-1)).asscalar() det_bboxes[-1] *= im_scale # split ground truths gt_ids.append(y.slice_axis(axis=-1, begin=4, end=5)) gt_bboxes.append(y.slice_axis(axis=-1, begin=0, end=4)) gt_bboxes[-1] *= im_scale gt_difficults.append( y.slice_axis(axis=-1, begin=5, end=6) if y.shape[-1] > 5 else None ) # update metric for det_bbox, det_id, det_score, gt_bbox, gt_id, gt_diff in zip( det_bboxes, det_ids, det_scores, gt_bboxes, gt_ids, gt_difficults ): eval_metric.update( det_bbox, det_id, det_score, gt_bbox, gt_id, gt_diff ) return eval_metric.get() def get_lr_at_iter(alpha, lr_warmup_factor=1.0 / 3.0): return lr_warmup_factor * (1 - alpha) + alpha class ForwardBackwardTask(Parallelizable): def __init__( self, net, optimizer, rpn_cls_loss, rpn_box_loss, rcnn_cls_loss, rcnn_box_loss, mix_ratio, ): super(ForwardBackwardTask, self).__init__() self.net = net self._optimizer = optimizer self.rpn_cls_loss = rpn_cls_loss self.rpn_box_loss = rpn_box_loss self.rcnn_cls_loss = rcnn_cls_loss self.rcnn_box_loss = rcnn_box_loss self.mix_ratio = mix_ratio def forward_backward(self, x): data, label, rpn_cls_targets, rpn_box_targets, rpn_box_masks = x with autograd.record(): gt_label = label[:, :, 4:5] gt_box = label[:, :, :4] ( cls_pred, box_pred, roi, samples, matches, rpn_score, rpn_box, anchors, cls_targets, box_targets, box_masks, _, ) = net(data, gt_box, gt_label) # losses of rpn rpn_score = rpn_score.squeeze(axis=-1) num_rpn_pos = (rpn_cls_targets >= 0).sum() rpn_loss1 = ( self.rpn_cls_loss( rpn_score, rpn_cls_targets, rpn_cls_targets >= 0 ) * rpn_cls_targets.size / num_rpn_pos ) rpn_loss2 = ( self.rpn_box_loss(rpn_box, rpn_box_targets, rpn_box_masks) * rpn_box.size / num_rpn_pos ) # rpn overall loss, use sum rather than average rpn_loss = rpn_loss1 + rpn_loss2 # losses of rcnn num_rcnn_pos = (cls_targets >= 0).sum() rcnn_loss1 = ( self.rcnn_cls_loss( cls_pred, cls_targets, cls_targets.expand_dims(-1) >= 0 ) * cls_targets.size / num_rcnn_pos ) rcnn_loss2 = ( self.rcnn_box_loss(box_pred, box_targets, box_masks) * box_pred.size / num_rcnn_pos ) rcnn_loss = rcnn_loss1 + rcnn_loss2 # overall losses total_loss = ( rpn_loss.sum() * self.mix_ratio + rcnn_loss.sum() * self.mix_ratio ) rpn_loss1_metric = rpn_loss1.mean() * self.mix_ratio rpn_loss2_metric = rpn_loss2.mean() * self.mix_ratio rcnn_loss1_metric = rcnn_loss1.mean() * self.mix_ratio rcnn_loss2_metric = rcnn_loss2.mean() * self.mix_ratio rpn_acc_metric = [ [rpn_cls_targets, rpn_cls_targets >= 0], [rpn_score], ] rpn_l1_loss_metric = [[rpn_box_targets, rpn_box_masks], [rpn_box]] rcnn_acc_metric = [[cls_targets], [cls_pred]] rcnn_l1_loss_metric = [[box_targets, box_masks], [box_pred]] if args.amp: with amp.scale_loss( total_loss, self._optimizer ) as scaled_losses: autograd.backward(scaled_losses) else: total_loss.backward() return ( rpn_loss1_metric, rpn_loss2_metric, rcnn_loss1_metric, rcnn_loss2_metric, rpn_acc_metric, rpn_l1_loss_metric, rcnn_acc_metric, rcnn_l1_loss_metric, ) def train(net, train_data, val_data, eval_metric, batch_size, ctx, args): """Training pipeline""" args.kv_store = ( "device" if (args.amp and "nccl" in args.kv_store) else args.kv_store ) kv = mx.kvstore.create(args.kv_store) net.collect_params().setattr("grad_req", "null") net.collect_train_params().setattr("grad_req", "write") optimizer_params = { "learning_rate": args.lr, "wd": args.wd, "momentum": args.momentum, } if args.horovod: hvd.broadcast_parameters(net.collect_params(), root_rank=0) trainer = hvd.DistributedTrainer( net.collect_train_params(), # fix batchnorm, fix first stage, etc... "sgd", optimizer_params, ) else: trainer = gluon.Trainer( net.collect_train_params(), # fix batchnorm, fix first stage, etc... "sgd", optimizer_params, update_on_kvstore=(False if args.amp else None), kvstore=kv, ) if args.amp: amp.init_trainer(trainer) # lr decay policy lr_decay = float(args.lr_decay) lr_steps = sorted( [float(ls) for ls in args.lr_decay_epoch.split(",") if ls.strip()] ) lr_warmup = float(args.lr_warmup) # avoid int division # TODO(zhreshold) losses? rpn_cls_loss = mx.gluon.loss.SigmoidBinaryCrossEntropyLoss( from_sigmoid=False ) rpn_box_loss = mx.gluon.loss.HuberLoss(rho=1 / 9.0) # == smoothl1 rcnn_cls_loss = mx.gluon.loss.SoftmaxCrossEntropyLoss() rcnn_box_loss = mx.gluon.loss.HuberLoss() # == smoothl1 metrics = [ mx.metric.Loss("RPN_Conf"), mx.metric.Loss("RPN_SmoothL1"), mx.metric.Loss("RCNN_CrossEntropy"), mx.metric.Loss("RCNN_SmoothL1"), ] rpn_acc_metric = RPNAccMetric() rpn_bbox_metric = RPNL1LossMetric() rcnn_acc_metric = RCNNAccMetric() rcnn_bbox_metric = RCNNL1LossMetric() metrics2 = [ rpn_acc_metric, rpn_bbox_metric, rcnn_acc_metric, rcnn_bbox_metric, ] # set up logger logging.basicConfig() logger = logging.getLogger() logger.setLevel(logging.INFO) log_file_path = args.save_prefix + "_train.log" log_dir = os.path.dirname(log_file_path) if log_dir and not os.path.exists(log_dir): os.makedirs(log_dir) fh = logging.FileHandler(log_file_path) logger.addHandler(fh) logger.info(args) if args.verbose: logger.info("Trainable parameters:") logger.info(net.collect_train_params().keys()) logger.info("Start training from [Epoch {}]".format(args.start_epoch)) best_map = [0] for epoch in range(args.start_epoch, args.epochs): mix_ratio = 1.0 if not args.disable_hybridization: net.hybridize(static_alloc=args.static_alloc) rcnn_task = ForwardBackwardTask( net, trainer, rpn_cls_loss, rpn_box_loss, rcnn_cls_loss, rcnn_box_loss, mix_ratio=1.0, ) executor = ( Parallel(args.executor_threads, rcnn_task) if not args.horovod else None ) if args.mixup: # TODO(zhreshold) only support evenly mixup now, target generator needs to be modified otherwise train_data._dataset._data.set_mixup(np.random.uniform, 0.5, 0.5) mix_ratio = 0.5 if epoch >= args.epochs - args.no_mixup_epochs: train_data._dataset._data.set_mixup(None) mix_ratio = 1.0 while lr_steps and epoch >= lr_steps[0]: new_lr = trainer.learning_rate * lr_decay lr_steps.pop(0) trainer.set_learning_rate(new_lr) logger.info( "[Epoch {}] Set learning rate to {}".format(epoch, new_lr) ) for metric in metrics: metric.reset() tic = time.time() btic = time.time() base_lr = trainer.learning_rate rcnn_task.mix_ratio = mix_ratio logger.info("Total Num of Batches: %d" % (len(train_data))) for i, batch in enumerate(train_data): if epoch == 0 and i <= lr_warmup: # adjust based on real percentage new_lr = base_lr * get_lr_at_iter( i / lr_warmup, args.lr_warmup_factor ) if new_lr != trainer.learning_rate: if i % args.log_interval == 0: logger.info( "[Epoch 0 Iteration {}] Set learning rate to {}".format( i, new_lr ) ) trainer.set_learning_rate(new_lr) batch = split_and_load(batch, ctx_list=ctx) metric_losses = [[] for _ in metrics] add_losses = [[] for _ in metrics2] if executor is not None: for data in zip(*batch): executor.put(data) for j in range(len(ctx)): if executor is not None: result = executor.get() else: result = rcnn_task.forward_backward(list(zip(*batch))[0]) if (not args.horovod) or hvd.rank() == 0: for k in range(len(metric_losses)): metric_losses[k].append(result[k]) for k in range(len(add_losses)): add_losses[k].append(result[len(metric_losses) + k]) for metric, record in zip(metrics, metric_losses): metric.update(0, record) for metric, records in zip(metrics2, add_losses): for pred in records: metric.update(pred[0], pred[1]) trainer.step(batch_size) # update metrics if ( (not args.horovod or hvd.rank() == 0) and args.log_interval and not (i + 1) % args.log_interval ): msg = ",".join( [ "{}={:.3f}".format(*metric.get()) for metric in metrics + metrics2 ] ) logger.info( "[Epoch {}][Batch {}], Speed: {:.3f} samples/sec, {}".format( epoch, i, args.log_interval * args.batch_size / (time.time() - btic), msg, ) ) btic = time.time() if (not args.horovod) or hvd.rank() == 0: msg = ",".join( ["{}={:.3f}".format(*metric.get()) for metric in metrics] ) logger.info( "[Epoch {}] Training cost: {:.3f}, {}".format( epoch, (time.time() - tic), msg ) ) if not (epoch + 1) % args.val_interval: # consider reduce the frequency of validation to save time if val_data is not None: map_name, mean_ap = validate( net, val_data, ctx, eval_metric, args ) val_msg = "\n".join( [ "{}={}".format(k, v) for k, v in zip(map_name, mean_ap) ] ) logger.info( "[Epoch {}] Validation: \n{}".format(epoch, val_msg) ) current_map = float(mean_ap[-1]) else: current_map = 0 else: current_map = 0.0 save_params( net, logger, best_map, current_map, epoch, args.save_interval, args.save_prefix, ) if __name__ == "__main__": import sys sys.setrecursionlimit(1100) args = parse_args() # fix seed for mxnet, numpy and python builtin random generator. gutils.random.seed(args.seed) if args.amp: amp.init() # training contexts if args.horovod: ctx = [mx.gpu(hvd.local_rank())] else: ctx = [mx.gpu(int(i)) for i in args.gpus.split(",") if i.strip()] ctx = ctx if ctx else [mx.cpu()] # network kwargs = {} module_list = [] if args.use_fpn: module_list.append("fpn") if args.norm_layer is not None: module_list.append(args.norm_layer) if args.norm_layer == "bn": kwargs["num_devices"] = len(args.gpus.split(",")) net_name = "_".join(("faster_rcnn", *module_list, args.network, "custom")) args.save_prefix += net_name gutils.makedirs(args.save_prefix) train_dataset, val_dataset, eval_metric = get_dataset(args.dataset, args) net = faster_rcnn_resnet101_v1d_custom( classes=train_dataset.classes, transfer="coco", pretrained_base=False, additional_output=False, per_device_batch_size=args.batch_size // len(ctx), **kwargs ) if args.resume.strip(): net.load_parameters(args.resume.strip()) else: for param in net.collect_params().values(): if param._data is not None: continue param.initialize() net.collect_params().reset_ctx(ctx) # training data batch_size = ( args.batch_size // len(ctx) if args.horovod else args.batch_size ) train_data, val_data = get_dataloader( net, train_dataset, val_dataset, FasterRCNNDefaultTrainTransform, FasterRCNNDefaultValTransform, batch_size, len(ctx), args, ) # training train(net, train_data, val_data, eval_metric, batch_size, ctx, args) ================================================ FILE: examples/mxnet/scenegraph/train_faster_rcnn.sh ================================================ MXNET_CUDNN_AUTOTUNE_DEFAULT=0 CUDNN_AUTOTUNE_DEFAULT=0 MXNET_GPU_MEM_POOL_TYPE=Round MXNET_GPU_MEM_POOL_ROUND_LINEAR_CUTOFF=28 python train_faster_rcnn.py \ --gpus 0,1,2,3,4,5,6,7 --dataset visualgenome -j 60 --batch-size 8 --val-interval 20 --save-prefix faster_rcnn_resnet101_v1d_visualgenome/ ================================================ FILE: examples/mxnet/scenegraph/train_freq_prior.py ================================================ import argparse import json import os import pickle import numpy as np def parse_args(): parser = argparse.ArgumentParser( description="Train the Frequenct Prior For RelDN." ) parser.add_argument( "--overlap", action="store_true", help="Only count overlap boxes." ) parser.add_argument( "--json-path", type=str, default="~/.mxnet/datasets/visualgenome", help="Only count overlap boxes.", ) args = parser.parse_args() return args args = parse_args() use_overlap = args.overlap PATH_TO_DATASETS = os.path.expanduser(args.json_path) path_to_json = os.path.join(PATH_TO_DATASETS, "rel_annotations_train.json") # format in y1y2x1x2 def with_overlap(boxA, boxB): xA = max(boxA[2], boxB[2]) xB = min(boxA[3], boxB[3]) if xB > xA: yA = max(boxA[0], boxB[0]) yB = min(boxA[1], boxB[1]) if yB > yA: return 1 return 0 def box_ious(boxes): n = len(boxes) res = np.zeros((n, n)) for i in range(n - 1): for j in range(i + 1, n): iou_val = with_overlap(boxes[i], boxes[j]) res[i, j] = iou_val res[j, i] = iou_val return res with open(path_to_json, "r") as f: tmp = f.read() train_data = json.loads(tmp) fg_matrix = np.zeros((150, 150, 51), dtype=np.int64) bg_matrix = np.zeros((150, 150), dtype=np.int64) for _, item in train_data.items(): gt_box_to_label = {} for rel in item: sub_bbox = rel["subject"]["bbox"] ob_bbox = rel["object"]["bbox"] sub_class = rel["subject"]["category"] ob_class = rel["object"]["category"] rel_class = rel["predicate"] sub_node = tuple(sub_bbox) ob_node = tuple(ob_bbox) if sub_node not in gt_box_to_label: gt_box_to_label[sub_node] = sub_class if ob_node not in gt_box_to_label: gt_box_to_label[ob_node] = ob_class fg_matrix[sub_class, ob_class, rel_class + 1] += 1 if use_overlap: gt_boxes = [*gt_box_to_label] gt_classes = np.array([*gt_box_to_label.values()]) iou_mat = box_ious(gt_boxes) cols, rows = np.where(iou_mat) if len(cols) and len(rows): for col, row in zip(cols, rows): bg_matrix[gt_classes[col], gt_classes[row]] += 1 else: all_possib = np.ones_like(iou_mat, dtype=np.bool_) np.fill_diagonal(all_possib, 0) cols, rows = np.where(all_possib) for col, row in zip(cols, rows): bg_matrix[gt_classes[col], gt_classes[row]] += 1 else: for b1, l1 in gt_box_to_label.items(): for b2, l2 in gt_box_to_label.items(): if b1 == b2: continue bg_matrix[l1, l2] += 1 eps = 1e-3 bg_matrix += 1 fg_matrix[:, :, 0] = bg_matrix pred_dist = np.log(fg_matrix / (fg_matrix.sum(2)[:, :, None] + eps) + eps) if use_overlap: with open("freq_prior_overlap.pkl", "wb") as f: pickle.dump(pred_dist, f) else: with open("freq_prior.pkl", "wb") as f: pickle.dump(pred_dist, f) ================================================ FILE: examples/mxnet/scenegraph/train_reldn.py ================================================ import argparse import logging import time import mxnet as mx import numpy as np from data import * from gluoncv.data.batchify import Pad from gluoncv.utils import makedirs from model import faster_rcnn_resnet101_v1d_custom, RelDN from mxnet import gluon, nd from utils import * import dgl def parse_args(): parser = argparse.ArgumentParser(description="Train RelDN Model.") parser.add_argument( "--gpus", type=str, default="0", help="Training with GPUs, you can specify 1,3 for example.", ) parser.add_argument( "--batch-size", type=int, default=8, help="Total batch-size for training.", ) parser.add_argument( "--epochs", type=int, default=9, help="Training epochs." ) parser.add_argument( "--lr-reldn", type=float, default=0.01, help="Learning rate for RelDN module.", ) parser.add_argument( "--wd-reldn", type=float, default=0.0001, help="Weight decay for RelDN module.", ) parser.add_argument( "--lr-faster-rcnn", type=float, default=0.01, help="Learning rate for Faster R-CNN module.", ) parser.add_argument( "--wd-faster-rcnn", type=float, default=0.0001, help="Weight decay for RelDN module.", ) parser.add_argument( "--lr-decay-epochs", type=str, default="5,8", help="Learning rate decay points.", ) parser.add_argument( "--lr-warmup-iters", type=int, default=4000, help="Learning rate warm-up iterations.", ) parser.add_argument( "--save-dir", type=str, default="params_resnet101_v1d_reldn", help="Path to save model parameters.", ) parser.add_argument( "--log-dir", type=str, default="reldn_output.log", help="Path to save training logs.", ) parser.add_argument( "--pretrained-faster-rcnn-params", type=str, required=True, help="Path to saved Faster R-CNN model parameters.", ) parser.add_argument( "--freq-prior", type=str, default="freq_prior.pkl", help="Path to saved frequency prior data.", ) parser.add_argument( "--verbose-freq", type=int, default=100, help="Frequency of log printing in number of iterations.", ) args = parser.parse_args() return args args = parse_args() filehandler = logging.FileHandler(args.log_dir) streamhandler = logging.StreamHandler() logger = logging.getLogger("") logger.setLevel(logging.INFO) logger.addHandler(filehandler) logger.addHandler(streamhandler) # Hyperparams ctx = [mx.gpu(int(i)) for i in args.gpus.split(",") if i.strip()] if ctx: num_gpus = len(ctx) assert args.batch_size % num_gpus == 0 per_device_batch_size = int(args.batch_size / num_gpus) else: ctx = [mx.cpu()] per_device_batch_size = args.batch_size aggregate_grad = per_device_batch_size > 1 nepoch = args.epochs N_relations = 50 N_objects = 150 save_dir = args.save_dir makedirs(save_dir) batch_verbose_freq = args.verbose_freq lr_decay_epochs = [int(i) for i in args.lr_decay_epochs.split(",")] # Dataset and dataloader vg_train = VGRelation(split="train") logger.info("data loaded!") train_data = gluon.data.DataLoader( vg_train, batch_size=len(ctx), shuffle=True, num_workers=8 * num_gpus, batchify_fn=dgl_mp_batchify_fn, ) n_batches = len(train_data) # Network definition net = RelDN(n_classes=N_relations, prior_pkl=args.freq_prior) net.spatial.initialize(mx.init.Normal(1e-4), ctx=ctx) net.visual.initialize(mx.init.Normal(1e-4), ctx=ctx) for k, v in net.collect_params().items(): v.grad_req = "add" if aggregate_grad else "write" net_params = net.collect_params() net_trainer = gluon.Trainer( net.collect_params(), "adam", {"learning_rate": args.lr_reldn, "wd": args.wd_reldn}, ) det_params_path = args.pretrained_faster_rcnn_params detector = faster_rcnn_resnet101_v1d_custom( classes=vg_train.obj_classes, pretrained_base=False, pretrained=False, additional_output=True, ) detector.load_parameters( det_params_path, ctx=ctx, ignore_extra=True, allow_missing=True ) for k, v in detector.collect_params().items(): v.grad_req = "null" detector_feat = faster_rcnn_resnet101_v1d_custom( classes=vg_train.obj_classes, pretrained_base=False, pretrained=False, additional_output=True, ) detector_feat.load_parameters( det_params_path, ctx=ctx, ignore_extra=True, allow_missing=True ) for k, v in detector_feat.collect_params().items(): v.grad_req = "null" for k, v in detector_feat.features.collect_params().items(): v.grad_req = "add" if aggregate_grad else "write" det_params = detector_feat.features.collect_params() det_trainer = gluon.Trainer( detector_feat.features.collect_params(), "adam", {"learning_rate": args.lr_faster_rcnn, "wd": args.wd_faster_rcnn}, ) def get_data_batch(g_list, img_list, ctx_list): if g_list is None or len(g_list) == 0: return None, None n_gpu = len(ctx_list) size = len(g_list) if size < n_gpu: raise Exception("too small batch") step = size // n_gpu G_list = [ g_list[i * step : (i + 1) * step] if i < n_gpu - 1 else g_list[i * step : size] for i in range(n_gpu) ] img_list = [ img_list[i * step : (i + 1) * step] if i < n_gpu - 1 else img_list[i * step : size] for i in range(n_gpu) ] for G_slice, ctx in zip(G_list, ctx_list): for G in G_slice: G.ndata["bbox"] = G.ndata["bbox"].as_in_context(ctx) G.ndata["node_class"] = G.ndata["node_class"].as_in_context(ctx) G.ndata["node_class_vec"] = G.ndata["node_class_vec"].as_in_context( ctx ) G.edata["rel_class"] = G.edata["rel_class"].as_in_context(ctx) img_list = [img.as_in_context(ctx) for img in img_list] return G_list, img_list L_rel = gluon.loss.SoftmaxCELoss() train_metric = mx.metric.Accuracy(name="rel_acc") train_metric_top5 = mx.metric.TopKAccuracy(5, name="rel_acc_top5") metric_list = [train_metric, train_metric_top5] def batch_print( epoch, i, batch_verbose_freq, n_batches, btic, loss_rel_val, metric_list ): if (i + 1) % batch_verbose_freq == 0: print_txt = "Epoch[%d] Batch[%d/%d], time: %d, loss_rel=%.4f " % ( epoch, i, n_batches, int(time.time() - btic), loss_rel_val / (i + 1), ) for metric in metric_list: metric_name, metric_val = metric.get() print_txt += "%s=%.4f " % (metric_name, metric_val) logger.info(print_txt) btic = time.time() loss_rel_val = 0 return btic, loss_rel_val for epoch in range(nepoch): loss_rel_val = 0 tic = time.time() btic = time.time() for metric in metric_list: metric.reset() if epoch == 0: net_trainer_base_lr = net_trainer.learning_rate det_trainer_base_lr = det_trainer.learning_rate if epoch == 5 or epoch == 8: net_trainer.set_learning_rate(net_trainer.learning_rate * 0.1) det_trainer.set_learning_rate(det_trainer.learning_rate * 0.1) for i, (G_list, img_list) in enumerate(train_data): if epoch == 0 and i < args.lr_warmup_iters: alpha = i / args.lr_warmup_iters warmup_factor = 1 / 3 * (1 - alpha) + alpha net_trainer.set_learning_rate(net_trainer_base_lr * warmup_factor) det_trainer.set_learning_rate(det_trainer_base_lr * warmup_factor) G_list, img_list = get_data_batch(G_list, img_list, ctx) if G_list is None or img_list is None: btic, loss_rel_val = batch_print( epoch, i, batch_verbose_freq, n_batches, btic, loss_rel_val, metric_list, ) continue loss = [] detector_res_list = [] G_batch = [] bbox_pad = Pad(axis=(0)) with mx.autograd.record(): for G_slice, img in zip(G_list, img_list): cur_ctx = img.context bbox_list = [G.ndata["bbox"] for G in G_slice] bbox_stack = bbox_pad(bbox_list).as_in_context(cur_ctx) with mx.autograd.pause(): ids, scores, bbox, feat, feat_ind, spatial_feat = detector( img ) g_pred_batch = build_graph_train( G_slice, bbox_stack, img, ids, scores, bbox, feat_ind, spatial_feat, scores_top_k=300, overlap=False, ) g_batch = l0_sample(g_pred_batch) if g_batch is None: continue rel_bbox = g_batch.edata["rel_bbox"] batch_id = g_batch.edata["batch_id"].asnumpy() n_sample_edges = g_batch.number_of_edges() n_graph = len(G_slice) bbox_rel_list = [] for j in range(n_graph): eids = np.where(batch_id == j)[0] if len(eids) > 0: bbox_rel_list.append(rel_bbox[eids]) bbox_rel_stack = bbox_pad(bbox_rel_list).as_in_context(cur_ctx) img_size = img.shape[2:4] bbox_rel_stack[:, :, 0] *= img_size[1] bbox_rel_stack[:, :, 1] *= img_size[0] bbox_rel_stack[:, :, 2] *= img_size[1] bbox_rel_stack[:, :, 3] *= img_size[0] _, _, _, spatial_feat_rel = detector_feat( img, None, None, bbox_rel_stack ) spatial_feat_rel_list = [] for j in range(n_graph): eids = np.where(batch_id == j)[0] if len(eids) > 0: spatial_feat_rel_list.append( spatial_feat_rel[j, 0 : len(eids)] ) g_batch.edata["edge_feat"] = nd.concat( *spatial_feat_rel_list, dim=0 ) G_batch.append(g_batch) G_batch = [net(G) for G in G_batch] for G_pred, img in zip(G_batch, img_list): if G_pred is None or G_pred.number_of_nodes() == 0: continue loss_rel = L_rel( G_pred.edata["preds"], G_pred.edata["rel_class"], G_pred.edata["sample_weights"], ) loss.append(loss_rel.sum()) loss_rel_val += loss_rel.mean().asscalar() / num_gpus if len(loss) == 0: btic, loss_rel_val = batch_print( epoch, i, batch_verbose_freq, n_batches, btic, loss_rel_val, metric_list, ) continue for l in loss: l.backward() if (i + 1) % per_device_batch_size == 0 or i == n_batches - 1: net_trainer.step(args.batch_size) det_trainer.step(args.batch_size) if aggregate_grad: for k, v in net_params.items(): v.zero_grad() for k, v in det_params.items(): v.zero_grad() for G_pred, img_slice in zip(G_batch, img_list): if G_pred is None or G_pred.number_of_nodes() == 0: continue link_ind = np.where(G_pred.edata["rel_class"].asnumpy() > 0)[0] if len(link_ind) == 0: continue train_metric.update( [G_pred.edata["rel_class"][link_ind]], [G_pred.edata["preds"][link_ind]], ) train_metric_top5.update( [G_pred.edata["rel_class"][link_ind]], [G_pred.edata["preds"][link_ind]], ) btic, loss_rel_val = batch_print( epoch, i, batch_verbose_freq, n_batches, btic, loss_rel_val, metric_list, ) if (i + 1) % batch_verbose_freq == 0: net.save_parameters("%s/model-%d.params" % (save_dir, epoch)) detector_feat.features.save_parameters( "%s/detector_feat.features-%d.params" % (save_dir, epoch) ) print_txt = "Epoch[%d], time: %d, loss_rel=%.4f," % ( epoch, int(time.time() - tic), loss_rel_val / (i + 1), ) for metric in metric_list: metric_name, metric_val = metric.get() print_txt += "%s=%.4f " % (metric_name, metric_val) logger.info(print_txt) net.save_parameters("%s/model-%d.params" % (save_dir, epoch)) detector_feat.features.save_parameters( "%s/detector_feat.features-%d.params" % (save_dir, epoch) ) ================================================ FILE: examples/mxnet/scenegraph/train_reldn.sh ================================================ MXNET_CUDNN_AUTOTUNE_DEFAULT=0 python train_reldn.py \ --pretrained-faster-rcnn-params faster_rcnn_resnet101_v1d_visualgenome/faster_rcnn_resnet101_v1d_custom_best.params ================================================ FILE: examples/mxnet/scenegraph/utils/__init__.py ================================================ from .build_graph import * from .metric import * from .sampling import * from .viz import * ================================================ FILE: examples/mxnet/scenegraph/utils/build_graph.py ================================================ import dgl import numpy as np from mxnet import nd def bbox_improve(bbox): """bbox encoding""" area = (bbox[:, 2] - bbox[:, 0]) * (bbox[:, 3] - bbox[:, 1]) return nd.concat(bbox, area.expand_dims(1)) def extract_edge_bbox(g): """bbox encoding""" src, dst = g.edges(order="eid") n = g.number_of_edges() src_bbox = g.ndata["pred_bbox"][src.asnumpy()] dst_bbox = g.ndata["pred_bbox"][dst.asnumpy()] edge_bbox = nd.zeros((n, 4), ctx=g.ndata["pred_bbox"].context) edge_bbox[:, 0] = nd.stack(src_bbox[:, 0], dst_bbox[:, 0]).min(axis=0) edge_bbox[:, 1] = nd.stack(src_bbox[:, 1], dst_bbox[:, 1]).min(axis=0) edge_bbox[:, 2] = nd.stack(src_bbox[:, 2], dst_bbox[:, 2]).max(axis=0) edge_bbox[:, 3] = nd.stack(src_bbox[:, 3], dst_bbox[:, 3]).max(axis=0) return edge_bbox def build_graph_train( g_slice, gt_bbox, img, ids, scores, bbox, feat_ind, spatial_feat, iou_thresh=0.5, bbox_improvement=True, scores_top_k=50, overlap=False, ): """given ground truth and predicted bboxes, assign the label to the predicted w.r.t iou_thresh""" # match and re-factor the graph img_size = img.shape[2:4] gt_bbox[:, :, 0] /= img_size[1] gt_bbox[:, :, 1] /= img_size[0] gt_bbox[:, :, 2] /= img_size[1] gt_bbox[:, :, 3] /= img_size[0] bbox[:, :, 0] /= img_size[1] bbox[:, :, 1] /= img_size[0] bbox[:, :, 2] /= img_size[1] bbox[:, :, 3] /= img_size[0] n_graph = len(g_slice) g_pred_batch = [] for gi in range(n_graph): g = g_slice[gi] ctx = g.ndata["bbox"].context inds = np.where(scores[gi, :, 0].asnumpy() > 0)[0].tolist() if len(inds) == 0: return None if len(inds) > scores_top_k: top_score_inds = ( scores[gi, inds, 0].asnumpy().argsort()[::-1][0:scores_top_k] ) inds = np.array(inds)[top_score_inds].tolist() n_nodes = len(inds) roi_ind = feat_ind[gi, inds].squeeze(axis=1) g_pred = dgl.DGLGraph() g_pred.add_nodes( n_nodes, { "pred_bbox": bbox[gi, inds], "node_feat": spatial_feat[gi, roi_ind], "node_class_pred": ids[gi, inds, 0], "node_class_logit": nd.log(scores[gi, inds, 0] + 1e-7), }, ) # iou matching ious = nd.contrib.box_iou( gt_bbox[gi], g_pred.ndata["pred_bbox"] ).asnumpy() H, W = ious.shape h = H w = W pred_to_gt_ind = np.array([-1 for i in range(W)]) pred_to_gt_class_match = [0 for i in range(W)] pred_to_gt_class_match_id = [0 for i in range(W)] while h > 0 and w > 0: ind = int(ious.argmax()) row_ind = ind // W col_ind = ind % W if ious[row_ind, col_ind] < iou_thresh: break pred_to_gt_ind[col_ind] = row_ind gt_node_class = g.ndata["node_class"][row_ind] pred_node_class = g_pred.ndata["node_class_pred"][col_ind] if gt_node_class == pred_node_class: pred_to_gt_class_match[col_ind] = 1 pred_to_gt_class_match_id[col_ind] = row_ind ious[row_ind, :] = -1 ious[:, col_ind] = -1 h -= 1 w -= 1 n_nodes = g_pred.number_of_nodes() triplet = [] adjmat = np.zeros((n_nodes, n_nodes)) src, dst = g.all_edges(order="eid") eid_keys = np.column_stack([src.asnumpy(), dst.asnumpy()]) eid_dict = {} for i, key in enumerate(eid_keys): k = tuple(key) if k not in eid_dict: eid_dict[k] = [i] else: eid_dict[k].append(i) ori_rel_class = g.edata["rel_class"].asnumpy() for i in range(n_nodes): for j in range(n_nodes): if i != j: if pred_to_gt_class_match[i] and pred_to_gt_class_match[j]: sub_gt_id = pred_to_gt_class_match_id[i] ob_gt_id = pred_to_gt_class_match_id[j] eids = eid_dict[(sub_gt_id, ob_gt_id)] rel_cls = ori_rel_class[eids] n_edges_between = len(rel_cls) for ii in range(n_edges_between): triplet.append((i, j, rel_cls[ii])) adjmat[i, j] = 1 else: triplet.append((i, j, 0)) src, dst, rel_class = tuple(zip(*triplet)) rel_class = nd.array(rel_class, ctx=ctx).expand_dims(1) g_pred.add_edges(src, dst, data={"rel_class": rel_class}) # other operations n_nodes = g_pred.number_of_nodes() n_edges = g_pred.number_of_edges() if bbox_improvement: g_pred.ndata["pred_bbox"] = bbox_improve(g_pred.ndata["pred_bbox"]) g_pred.edata["rel_bbox"] = extract_edge_bbox(g_pred) g_pred.edata["batch_id"] = nd.zeros((n_edges, 1), ctx=ctx) + gi # remove non-overlapping edges if overlap: overlap_ious = nd.contrib.box_iou( g_pred.ndata["pred_bbox"][:, 0:4], g_pred.ndata["pred_bbox"][:, 0:4], ).asnumpy() cols, rows = np.where(overlap_ious <= 1e-7) if cols.shape[0] > 0: eids = g_pred.edge_ids(cols, rows)[2].asnumpy().tolist() if len(eids): g_pred.remove_edges(eids) if g_pred.number_of_edges() == 0: g_pred = None g_pred_batch.append(g_pred) if n_graph > 1: return dgl.batch(g_pred_batch) else: return g_pred_batch[0] def build_graph_validate_gt_obj( img, gt_ids, bbox, spatial_feat, bbox_improvement=True, overlap=False ): """given ground truth bbox and label, build graph for validation""" n_batch = img.shape[0] img_size = img.shape[2:4] bbox[:, :, 0] /= img_size[1] bbox[:, :, 1] /= img_size[0] bbox[:, :, 2] /= img_size[1] bbox[:, :, 3] /= img_size[0] ctx = img.context g_batch = [] for btc in range(n_batch): inds = np.where(bbox[btc].sum(1).asnumpy() > 0)[0].tolist() if len(inds) == 0: continue n_nodes = len(inds) g_pred = dgl.DGLGraph() g_pred.add_nodes( n_nodes, { "pred_bbox": bbox[btc, inds], "node_feat": spatial_feat[btc, inds], "node_class_pred": gt_ids[btc, inds, 0], "node_class_logit": nd.zeros_like( gt_ids[btc, inds, 0], ctx=ctx ), }, ) edge_list = [] for i in range(n_nodes - 1): for j in range(i + 1, n_nodes): edge_list.append((i, j)) src, dst = tuple(zip(*edge_list)) g_pred.add_edges(src, dst) g_pred.add_edges(dst, src) n_nodes = g_pred.number_of_nodes() n_edges = g_pred.number_of_edges() if bbox_improvement: g_pred.ndata["pred_bbox"] = bbox_improve(g_pred.ndata["pred_bbox"]) g_pred.edata["rel_bbox"] = extract_edge_bbox(g_pred) g_pred.edata["batch_id"] = nd.zeros((n_edges, 1), ctx=ctx) + btc g_batch.append(g_pred) if len(g_batch) == 0: return None if len(g_batch) > 1: return dgl.batch(g_batch) return g_batch[0] def build_graph_validate_gt_bbox( img, ids, scores, bbox, spatial_feat, gt_ids=None, bbox_improvement=True, overlap=False, ): """given ground truth bbox, build graph for validation""" n_batch = img.shape[0] img_size = img.shape[2:4] bbox[:, :, 0] /= img_size[1] bbox[:, :, 1] /= img_size[0] bbox[:, :, 2] /= img_size[1] bbox[:, :, 3] /= img_size[0] ctx = img.context g_batch = [] for btc in range(n_batch): id_btc = scores[btc][:, :, 0].argmax(0) score_btc = scores[btc][:, :, 0].max(0) inds = np.where(bbox[btc].sum(1).asnumpy() > 0)[0].tolist() if len(inds) == 0: continue n_nodes = len(inds) g_pred = dgl.DGLGraph() g_pred.add_nodes( n_nodes, { "pred_bbox": bbox[btc, inds], "node_feat": spatial_feat[btc, inds], "node_class_pred": id_btc, "node_class_logit": nd.log(score_btc + 1e-7), }, ) edge_list = [] for i in range(n_nodes - 1): for j in range(i + 1, n_nodes): edge_list.append((i, j)) src, dst = tuple(zip(*edge_list)) g_pred.add_edges(src, dst) g_pred.add_edges(dst, src) n_nodes = g_pred.number_of_nodes() n_edges = g_pred.number_of_edges() if bbox_improvement: g_pred.ndata["pred_bbox"] = bbox_improve(g_pred.ndata["pred_bbox"]) g_pred.edata["rel_bbox"] = extract_edge_bbox(g_pred) g_pred.edata["batch_id"] = nd.zeros((n_edges, 1), ctx=ctx) + btc g_batch.append(g_pred) if len(g_batch) == 0: return None if len(g_batch) > 1: return dgl.batch(g_batch) return g_batch[0] def build_graph_validate_pred( img, ids, scores, bbox, feat_ind, spatial_feat, bbox_improvement=True, scores_top_k=50, overlap=False, ): """given predicted bbox, build graph for validation""" n_batch = img.shape[0] img_size = img.shape[2:4] bbox[:, :, 0] /= img_size[1] bbox[:, :, 1] /= img_size[0] bbox[:, :, 2] /= img_size[1] bbox[:, :, 3] /= img_size[0] ctx = img.context g_batch = [] for btc in range(n_batch): inds = np.where(scores[btc, :, 0].asnumpy() > 0)[0].tolist() if len(inds) == 0: continue if len(inds) > scores_top_k: top_score_inds = ( scores[btc, inds, 0].asnumpy().argsort()[::-1][0:scores_top_k] ) inds = np.array(inds)[top_score_inds].tolist() n_nodes = len(inds) roi_ind = feat_ind[btc, inds].squeeze(axis=1) g_pred = dgl.DGLGraph() g_pred.add_nodes( n_nodes, { "pred_bbox": bbox[btc, inds], "node_feat": spatial_feat[btc, roi_ind], "node_class_pred": ids[btc, inds, 0], "node_class_logit": nd.log(scores[btc, inds, 0] + 1e-7), }, ) edge_list = [] for i in range(n_nodes - 1): for j in range(i + 1, n_nodes): edge_list.append((i, j)) src, dst = tuple(zip(*edge_list)) g_pred.add_edges(src, dst) g_pred.add_edges(dst, src) n_nodes = g_pred.number_of_nodes() n_edges = g_pred.number_of_edges() if bbox_improvement: g_pred.ndata["pred_bbox"] = bbox_improve(g_pred.ndata["pred_bbox"]) g_pred.edata["rel_bbox"] = extract_edge_bbox(g_pred) g_pred.edata["batch_id"] = nd.zeros((n_edges, 1), ctx=ctx) + btc g_batch.append(g_pred) if len(g_batch) == 0: return None if len(g_batch) > 1: return dgl.batch(g_batch) return g_batch[0] ================================================ FILE: examples/mxnet/scenegraph/utils/metric.py ================================================ import logging import time from operator import attrgetter, itemgetter import dgl import mxnet as mx import numpy as np from dgl.nn.mxnet import GraphConv from dgl.utils import toindex from gluoncv.data.batchify import Pad from gluoncv.model_zoo import get_model from mxnet import gluon, nd from mxnet.gluon import nn def iou(boxA, boxB): # determine the (x, y)-coordinates of the intersection rectangle xA = max(boxA[0], boxB[0]) yA = max(boxA[1], boxB[1]) xB = min(boxA[2], boxB[2]) yB = min(boxA[3], boxB[3]) interArea = max(0, xB - xA) * max(0, yB - yA) if interArea < 1e-7: return 0 boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1]) boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1]) if boxAArea + boxBArea - interArea < 1e-7: return 0 iou_val = interArea / float(boxAArea + boxBArea - interArea) return iou_val def object_iou_thresh(gt_object, pred_object, iou_thresh=0.5): obj_iou = iou(gt_object[1:5], pred_object[1:5]) if obj_iou >= iou_thresh: return True return False def triplet_iou_thresh(pred_triplet, gt_triplet, iou_thresh=0.5): sub_iou = iou(gt_triplet[5:9], pred_triplet[5:9]) if sub_iou >= iou_thresh: ob_iou = iou(gt_triplet[9:13], pred_triplet[9:13]) if ob_iou >= iou_thresh: return True return False @mx.metric.register @mx.metric.alias("auc") class AUCMetric(mx.metric.EvalMetric): def __init__(self, name="auc", eps=1e-12): super(AUCMetric, self).__init__(name) self.eps = eps def update(self, labels, preds): mx.metric.check_label_shapes(labels, preds) label_weight = labels[0].asnumpy() preds = preds[0].asnumpy() tmp = [] for i in range(preds.shape[0]): tmp.append((label_weight[i], preds[i][1])) tmp = sorted(tmp, key=itemgetter(1), reverse=True) label_sum = label_weight.sum() if label_sum == 0 or label_sum == label_weight.size: return label_one_num = np.count_nonzero(label_weight) label_zero_num = len(label_weight) - label_one_num total_area = label_zero_num * label_one_num height = 0 width = 0 area = 0 for a, _ in tmp: if a == 1.0: height += 1.0 else: width += 1.0 area += height self.sum_metric += area / total_area self.num_inst += 1 @mx.metric.register @mx.metric.alias("predcls") class PredCls(mx.metric.EvalMetric): """Metric with ground truth object location and label""" def __init__(self, topk=20, iou_thresh=0.99): super(PredCls, self).__init__("predcls@%d" % (topk)) self.topk = topk self.iou_thresh = iou_thresh def update(self, labels, preds): if labels is None or preds is None: self.num_inst += 1 return preds = preds[preds[:, 0].argsort()[::-1]] m = min(self.topk, preds.shape[0]) count = 0 gt_edge_num = labels.shape[0] label_matched = [False for label in labels] for i in range(m): pred = preds[i] for j in range(gt_edge_num): if label_matched[j]: continue label = labels[j] if int(label[2]) == int(pred[2]) and triplet_iou_thresh( pred, label, self.iou_thresh ): count += 1 label_matched[j] = True total = labels.shape[0] self.sum_metric += count / total self.num_inst += 1 @mx.metric.register @mx.metric.alias("phrcls") class PhrCls(mx.metric.EvalMetric): """Metric with ground truth object location and predicted object label from detector""" def __init__(self, topk=20, iou_thresh=0.99): super(PhrCls, self).__init__("phrcls@%d" % (topk)) self.topk = topk self.iou_thresh = iou_thresh def update(self, labels, preds): if labels is None or preds is None: self.num_inst += 1 return preds = preds[preds[:, 1].argsort()[::-1]] m = min(self.topk, preds.shape[0]) count = 0 gt_edge_num = labels.shape[0] label_matched = [False for label in labels] for i in range(m): pred = preds[i] for j in range(gt_edge_num): if label_matched[j]: continue label = labels[j] if ( int(label[2]) == int(pred[2]) and int(label[3]) == int(pred[3]) and int(label[4]) == int(pred[4]) and triplet_iou_thresh(pred, label, self.iou_thresh) ): count += 1 label_matched[j] = True total = labels.shape[0] self.sum_metric += count / total self.num_inst += 1 @mx.metric.register @mx.metric.alias("sgdet") class SGDet(mx.metric.EvalMetric): """Metric with predicted object information by the detector""" def __init__(self, topk=20, iou_thresh=0.5): super(SGDet, self).__init__("sgdet@%d" % (topk)) self.topk = topk self.iou_thresh = iou_thresh def update(self, labels, preds): if labels is None or preds is None: self.num_inst += 1 return preds = preds[preds[:, 1].argsort()[::-1]] m = min(self.topk, len(preds)) count = 0 gt_edge_num = labels.shape[0] label_matched = [False for label in labels] for i in range(m): pred = preds[i] for j in range(gt_edge_num): if label_matched[j]: continue label = labels[j] if ( int(label[2]) == int(pred[2]) and int(label[3]) == int(pred[3]) and int(label[4]) == int(pred[4]) and triplet_iou_thresh(pred, label, self.iou_thresh) ): count += 1 label_matched[j] = True total = labels.shape[0] self.sum_metric += count / total self.num_inst += 1 @mx.metric.register @mx.metric.alias("sgdet+") class SGDetPlus(mx.metric.EvalMetric): """Metric proposed by `Graph R-CNN for Scene Graph Generation`""" def __init__(self, topk=20, iou_thresh=0.5): super(SGDetPlus, self).__init__("sgdet+@%d" % (topk)) self.topk = topk self.iou_thresh = iou_thresh def update(self, labels, preds): label_objects, label_triplets = labels pred_objects, pred_triplets = preds if label_objects is None or pred_objects is None: self.num_inst += 1 return count = 0 # count objects object_matched = [False for obj in label_objects] m = len(pred_objects) gt_obj_num = label_objects.shape[0] for i in range(m): pred = pred_objects[i] for j in range(gt_obj_num): if object_matched[j]: continue label = label_objects[j] if int(label[0]) == int(pred[0]) and object_iou_thresh( pred, label, self.iou_thresh ): count += 1 object_matched[j] = True # count predicate and triplet pred_triplets = pred_triplets[pred_triplets[:, 1].argsort()[::-1]] m = min(self.topk, len(pred_triplets)) gt_triplet_num = label_triplets.shape[0] triplet_matched = [False for label in label_triplets] predicate_matched = [False for label in label_triplets] for i in range(m): pred = pred_triplets[i] for j in range(gt_triplet_num): label = label_triplets[j] if not predicate_matched: if int(label[2]) == int(pred[2]) and triplet_iou_thresh( pred, label, self.iou_thresh ): count += label[3] predicate_matched[j] = True if not triplet_matched[j]: if ( int(label[2]) == int(pred[2]) and int(label[3]) == int(pred[3]) and int(label[4]) == int(pred[4]) and triplet_iou_thresh(pred, label, self.iou_thresh) ): count += 1 triplet_matched[j] = True # compute sum total = labels.shape[0] N = gt_obj_num + 2 * total self.sum_metric += count / N self.num_inst += 1 def extract_gt(g, img_size): """extract prediction from ground truth graph""" if g is None or g.number_of_nodes() == 0: return None, None gt_eids = np.where(g.edata["rel_class"].asnumpy() > 0)[0] if len(gt_eids) == 0: return None, None gt_class = g.ndata["node_class"][:, 0].asnumpy() gt_bbox = g.ndata["bbox"].asnumpy() gt_bbox[:, 0] /= img_size[1] gt_bbox[:, 1] /= img_size[0] gt_bbox[:, 2] /= img_size[1] gt_bbox[:, 3] /= img_size[0] gt_objects = np.vstack([gt_class, gt_bbox.transpose(1, 0)]).transpose(1, 0) gt_node_ids = g.find_edges(gt_eids) gt_node_sub = gt_node_ids[0].asnumpy() gt_node_ob = gt_node_ids[1].asnumpy() gt_rel_class = g.edata["rel_class"][gt_eids, 0].asnumpy() - 1 gt_sub_class = gt_class[gt_node_sub] gt_ob_class = gt_class[gt_node_ob] gt_sub_bbox = gt_bbox[gt_node_sub] gt_ob_bbox = gt_bbox[gt_node_ob] n = len(gt_eids) gt_triplets = np.vstack( [ np.ones(n), np.ones(n), gt_rel_class, gt_sub_class, gt_ob_class, gt_sub_bbox.transpose(1, 0), gt_ob_bbox.transpose(1, 0), ] ).transpose(1, 0) return gt_objects, gt_triplets def extract_pred(g, topk=100, joint_preds=False): """extract prediction from prediction graph for validation and visualization""" if g is None or g.number_of_nodes() == 0: return None, None pred_class = g.ndata["node_class_pred"].asnumpy() pred_class_prob = g.ndata["node_class_logit"].asnumpy() pred_bbox = g.ndata["pred_bbox"][:, 0:4].asnumpy() pred_objects = np.vstack([pred_class, pred_bbox.transpose(1, 0)]).transpose( 1, 0 ) score_pred = g.edata["score_pred"].asnumpy() score_phr = g.edata["score_phr"].asnumpy() score_pred_topk_eids = (-score_pred).argsort()[0:topk].tolist() score_phr_topk_eids = (-score_phr).argsort()[0:topk].tolist() topk_eids = sorted(list(set(score_pred_topk_eids + score_phr_topk_eids))) pred_rel_prob = g.edata["preds"][topk_eids].asnumpy() if joint_preds: pred_rel_class = pred_rel_prob[:, 1:].argmax(axis=1) else: pred_rel_class = pred_rel_prob.argmax(axis=1) pred_node_ids = g.find_edges(topk_eids) pred_node_sub = pred_node_ids[0].asnumpy() pred_node_ob = pred_node_ids[1].asnumpy() pred_sub_class = pred_class[pred_node_sub] pred_sub_class_prob = pred_class_prob[pred_node_sub] pred_sub_bbox = pred_bbox[pred_node_sub] pred_ob_class = pred_class[pred_node_ob] pred_ob_class_prob = pred_class_prob[pred_node_ob] pred_ob_bbox = pred_bbox[pred_node_ob] pred_triplets = np.vstack( [ score_pred[topk_eids], score_phr[topk_eids], pred_rel_class, pred_sub_class, pred_ob_class, pred_sub_bbox.transpose(1, 0), pred_ob_bbox.transpose(1, 0), ] ).transpose(1, 0) return pred_objects, pred_triplets ================================================ FILE: examples/mxnet/scenegraph/utils/sampling.py ================================================ import dgl import mxnet as mx import numpy as np from dgl.utils import toindex def l0_sample(g, positive_max=128, negative_ratio=3): """sampling positive and negative edges""" if g is None: return None n_eids = g.number_of_edges() pos_eids = np.where(g.edata["rel_class"].asnumpy() > 0)[0] neg_eids = np.where(g.edata["rel_class"].asnumpy() == 0)[0] if len(pos_eids) == 0: return None positive_num = min(len(pos_eids), positive_max) negative_num = min(len(neg_eids), positive_num * negative_ratio) pos_sample = np.random.choice(pos_eids, positive_num, replace=False) neg_sample = np.random.choice(neg_eids, negative_num, replace=False) weights = np.zeros(n_eids) # np.add.at(weights, pos_sample, 1) weights[pos_sample] = 1 weights[neg_sample] = 1 # g.edata['sample_weights'] = mx.nd.array(weights, ctx=g.edata['rel_class'].context) # return g eids = np.where(weights > 0)[0] sub_g = g.edge_subgraph(toindex(eids.tolist())) sub_g.copy_from_parent() sub_g.edata["sample_weights"] = mx.nd.array( weights[eids], ctx=g.edata["rel_class"].context ) return sub_g ================================================ FILE: examples/mxnet/scenegraph/utils/viz.py ================================================ import gluoncv as gcv import numpy as np from matplotlib import pyplot as plt def plot_sg(img, preds, obj_classes, rel_classes, topk=1): """visualization of generated scene graph""" size = img.shape[0:2] box_scale = np.array([size[1], size[0], size[1], size[0]]) topk = min(topk, preds.shape[0]) ax = gcv.utils.viz.plot_image(img) for i in range(topk): rel = int(preds[i, 2]) src = int(preds[i, 3]) dst = int(preds[i, 4]) src_name = obj_classes[src] dst_name = obj_classes[dst] rel_name = rel_classes[rel] src_bbox = preds[i, 5:9] * box_scale dst_bbox = preds[i, 9:13] * box_scale src_center = np.array( [(src_bbox[0] + src_bbox[2]) / 2, (src_bbox[1] + src_bbox[3]) / 2] ) dst_center = np.array( [(dst_bbox[0] + dst_bbox[2]) / 2, (dst_bbox[1] + dst_bbox[3]) / 2] ) rel_center = (src_center + dst_center) / 2 line_x = np.array( [(src_bbox[0] + src_bbox[2]) / 2, (dst_bbox[0] + dst_bbox[2]) / 2] ) line_y = np.array( [(src_bbox[1] + src_bbox[3]) / 2, (dst_bbox[1] + dst_bbox[3]) / 2] ) ax.plot( line_x, line_y, linewidth=3.0, alpha=0.7, color=plt.cm.cool(rel) ) ax.text( src_center[0], src_center[1], "{:s}".format(src_name), bbox=dict(alpha=0.5), fontsize=12, color="white", ) ax.text( dst_center[0], dst_center[1], "{:s}".format(dst_name), bbox=dict(alpha=0.5), fontsize=12, color="white", ) ax.text( rel_center[0], rel_center[1], "{:s}".format(rel_name), bbox=dict(alpha=0.5), fontsize=12, color="white", ) return ax plot_sg(img, preds, 2) ================================================ FILE: examples/mxnet/scenegraph/validate_reldn.py ================================================ import argparse import logging import time import mxnet as mx import numpy as np from data import * from gluoncv.data.batchify import Pad from model import faster_rcnn_resnet101_v1d_custom, RelDN from mxnet import gluon, nd from utils import * import dgl def parse_args(): parser = argparse.ArgumentParser( description="Validate Pre-trained RelDN Model." ) parser.add_argument( "--gpus", type=str, default="0", help="Training with GPUs, you can specify 1,3 for example.", ) parser.add_argument( "--batch-size", type=int, default=8, help="Total batch-size for training.", ) parser.add_argument( "--metric", type=str, default="sgdet", help="Evaluation metric, could be 'predcls', 'phrcls', 'sgdet' or 'sgdet+'.", ) parser.add_argument( "--pretrained-faster-rcnn-params", type=str, required=True, help="Path to saved Faster R-CNN model parameters.", ) parser.add_argument( "--reldn-params", type=str, required=True, help="Path to saved Faster R-CNN model parameters.", ) parser.add_argument( "--faster-rcnn-params", type=str, required=True, help="Path to saved Faster R-CNN model parameters.", ) parser.add_argument( "--log-dir", type=str, default="reldn_output.log", help="Path to save training logs.", ) parser.add_argument( "--freq-prior", type=str, default="freq_prior.pkl", help="Path to saved frequency prior data.", ) parser.add_argument( "--verbose-freq", type=int, default=100, help="Frequency of log printing in number of iterations.", ) args = parser.parse_args() return args args = parse_args() filehandler = logging.FileHandler(args.log_dir) streamhandler = logging.StreamHandler() logger = logging.getLogger("") logger.setLevel(logging.INFO) logger.addHandler(filehandler) logger.addHandler(streamhandler) # Hyperparams ctx = [mx.gpu(int(i)) for i in args.gpus.split(",") if i.strip()] if ctx: num_gpus = len(ctx) assert args.batch_size % num_gpus == 0 per_device_batch_size = int(args.batch_size / num_gpus) else: ctx = [mx.cpu()] per_device_batch_size = args.batch_size batch_size = args.batch_size N_relations = 50 N_objects = 150 batch_verbose_freq = args.verbose_freq mode = args.metric metric_list = [] topk_list = [20, 50, 100] if mode == "predcls": for topk in topk_list: metric_list.append(PredCls(topk=topk)) if mode == "phrcls": for topk in topk_list: metric_list.append(PhrCls(topk=topk)) if mode == "sgdet": for topk in topk_list: metric_list.append(SGDet(topk=topk)) if mode == "sgdet+": for topk in topk_list: metric_list.append(SGDetPlus(topk=topk)) for metric in metric_list: metric.reset() semantic_only = False net = RelDN( n_classes=N_relations, prior_pkl=args.freq_prior, semantic_only=semantic_only, ) net.load_parameters(args.reldn_params, ctx=ctx) # dataset and dataloader vg_val = VGRelation(split="val") logger.info("data loaded!") val_data = gluon.data.DataLoader( vg_val, batch_size=len(ctx), shuffle=False, num_workers=16 * num_gpus, batchify_fn=dgl_mp_batchify_fn, ) n_batches = len(val_data) detector = faster_rcnn_resnet101_v1d_custom( classes=vg_val.obj_classes, pretrained_base=False, pretrained=False, additional_output=True, ) params_path = args.pretrained_faster_rcnn_params detector.load_parameters( params_path, ctx=ctx, ignore_extra=True, allow_missing=True ) detector_feat = faster_rcnn_resnet101_v1d_custom( classes=vg_val.obj_classes, pretrained_base=False, pretrained=False, additional_output=True, ) detector_feat.load_parameters( params_path, ctx=ctx, ignore_extra=True, allow_missing=True ) detector_feat.features.load_parameters(args.faster_rcnn_params, ctx=ctx) def get_data_batch(g_list, img_list, ctx_list): if g_list is None or len(g_list) == 0: return None, None n_gpu = len(ctx_list) size = len(g_list) if size < n_gpu: raise Exception("too small batch") step = size // n_gpu G_list = [ g_list[i * step : (i + 1) * step] if i < n_gpu - 1 else g_list[i * step : size] for i in range(n_gpu) ] img_list = [ img_list[i * step : (i + 1) * step] if i < n_gpu - 1 else img_list[i * step : size] for i in range(n_gpu) ] for G_slice, ctx in zip(G_list, ctx_list): for G in G_slice: G.ndata["bbox"] = G.ndata["bbox"].as_in_context(ctx) G.ndata["node_class"] = G.ndata["node_class"].as_in_context(ctx) G.ndata["node_class_vec"] = G.ndata["node_class_vec"].as_in_context( ctx ) G.edata["rel_class"] = G.edata["rel_class"].as_in_context(ctx) img_list = [img.as_in_context(ctx) for img in img_list] return G_list, img_list for i, (G_list, img_list) in enumerate(val_data): G_list, img_list = get_data_batch(G_list, img_list, ctx) if G_list is None or img_list is None: if (i + 1) % batch_verbose_freq == 0: print_txt = "Batch[%d/%d] " % (i, n_batches) for metric in metric_list: metric_name, metric_val = metric.get() print_txt += "%s=%.4f " % (metric_name, metric_val) logger.info(print_txt) continue detector_res_list = [] G_batch = [] bbox_pad = Pad(axis=(0)) # loss_cls_val = 0 for G_slice, img in zip(G_list, img_list): cur_ctx = img.context if mode == "predcls": bbox_list = [G.ndata["bbox"] for G in G_slice] bbox_stack = bbox_pad(bbox_list).as_in_context(cur_ctx) ids, scores, bbox, spatial_feat = detector( img, None, None, bbox_stack ) node_class_list = [G.ndata["node_class"] for G in G_slice] node_class_stack = bbox_pad(node_class_list).as_in_context(cur_ctx) g_pred_batch = build_graph_validate_gt_obj( img, node_class_stack, bbox, spatial_feat, bbox_improvement=True, overlap=False, ) elif mode == "phrcls": # use ground truth bbox bbox_list = [G.ndata["bbox"] for G in G_slice] bbox_stack = bbox_pad(bbox_list).as_in_context(cur_ctx) ids, scores, bbox, spatial_feat = detector( img, None, None, bbox_stack ) g_pred_batch = build_graph_validate_gt_bbox( img, ids, scores, bbox, spatial_feat, bbox_improvement=True, overlap=False, ) else: # use predicted bbox ids, scores, bbox, feat, feat_ind, spatial_feat = detector(img) g_pred_batch = build_graph_validate_pred( img, ids, scores, bbox, feat_ind, spatial_feat, bbox_improvement=True, scores_top_k=75, overlap=False, ) if not semantic_only: rel_bbox = g_pred_batch.edata["rel_bbox"] batch_id = g_pred_batch.edata["batch_id"].asnumpy() n_sample_edges = g_pred_batch.number_of_edges() # g_pred_batch.edata['edge_feat'] = mx.nd.zeros((n_sample_edges, 49), ctx=cur_ctx) n_graph = len(G_slice) bbox_rel_list = [] for j in range(n_graph): eids = np.where(batch_id == j)[0] if len(eids) > 0: bbox_rel_list.append(rel_bbox[eids]) bbox_rel_stack = bbox_pad(bbox_rel_list).as_in_context(cur_ctx) _, _, _, spatial_feat_rel = detector_feat( img, None, None, bbox_rel_stack ) spatial_feat_rel_list = [] for j in range(n_graph): eids = np.where(batch_id == j)[0] if len(eids) > 0: spatial_feat_rel_list.append( spatial_feat_rel[j, 0 : len(eids)] ) g_pred_batch.edata["edge_feat"] = nd.concat( *spatial_feat_rel_list, dim=0 ) G_batch.append(g_pred_batch) G_batch = [net(G) for G in G_batch] for G_slice, G_pred, img_slice in zip(G_list, G_batch, img_list): for G_gt, G_pred_one in zip(G_slice, [G_pred]): if G_pred_one is None or G_pred_one.number_of_nodes() == 0: continue gt_objects, gt_triplet = extract_gt(G_gt, img_slice.shape[2:4]) pred_objects, pred_triplet = extract_pred(G_pred, joint_preds=True) for metric in metric_list: if ( isinstance(metric, PredCls) or isinstance(metric, PhrCls) or isinstance(metric, SGDet) ): metric.update(gt_triplet, pred_triplet) else: metric.update( (gt_objects, gt_triplet), (pred_objects, pred_triplet) ) if (i + 1) % batch_verbose_freq == 0: print_txt = "Batch[%d/%d] " % (i, n_batches) for metric in metric_list: metric_name, metric_val = metric.get() print_txt += "%s=%.4f " % (metric_name, metric_val) logger.info(print_txt) print_txt = "Batch[%d/%d] " % (n_batches, n_batches) for metric in metric_list: metric_name, metric_val = metric.get() print_txt += "%s=%.4f " % (metric_name, metric_val) logger.info(print_txt) ================================================ FILE: examples/mxnet/scenegraph/validate_reldn.sh ================================================ MXNET_CUDNN_AUTOTUNE_DEFAULT=0 python validate_reldn.py \ --pretrained-faster-rcnn-params faster_rcnn_resnet101_v1d_visualgenome/faster_rcnn_resnet101_v1d_custom_best.params \ --reldn-params params_resnet101_v1d_reldn/model-8.params \ --faster-rcnn-params params_resnet101_v1d_reldn/detector_feat.features-8.params ================================================ FILE: examples/mxnet/sgc/README.md ================================================ Simple Graph Convolution (SGC) ============ - Paper link: [Simplifying Graph Convolutional Networks](https://arxiv.org/abs/1902.07153) - Author's code repo: [https://github.com/Tiiiger/SGC](https://github.com/Tiiiger/SGC). Dependencies ------------ - MXNET 1.5+ - requests ``bash pip install torch requests `` Codes ----- The folder contains an implementation of SGC (`sgc.py`). Results ------- Run with following (available dataset: "cora", "citeseer", "pubmed") ```bash DGLBACKEND=mxnet python3 sgc.py --dataset cora --gpu 0 DGLBACKEND=mxnet python3 sgc.py --dataset citeseer --weight-decay 5e-5 --n-epochs 150 --bias --gpu 0 DGLBACKEND=mxnet python3 sgc.py --dataset pubmed --weight-decay 5e-5 --bias --gpu 0 ``` On NVIDIA V100 * cora: 0.818 (paper: 0.810) * citeseer: 0.725 (paper: 0.719) * pubmed: 0.788 (paper: 0.789) ================================================ FILE: examples/mxnet/sgc/sgc.py ================================================ """ This code was modified from the GCN implementation in DGL examples. Simplifying Graph Convolutional Networks Paper: https://arxiv.org/abs/1902.07153 Code: https://github.com/Tiiiger/SGC SGC implementation in DGL. """ import argparse import math import time import dgl import mxnet as mx import numpy as np from dgl.data import ( CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset, register_data_args, ) from dgl.nn.mxnet.conv import SGConv from mxnet import gluon, nd from mxnet.gluon import nn def evaluate(model, g, features, labels, mask): pred = model(g, features).argmax(axis=1) accuracy = ((pred == labels) * mask).sum() / mask.sum().asscalar() return accuracy.asscalar() def main(args): # load and preprocess dataset if args.dataset == "cora": data = CoraGraphDataset() elif args.dataset == "citeseer": data = CiteseerGraphDataset() elif args.dataset == "pubmed": data = PubmedGraphDataset() else: raise ValueError("Unknown dataset: {}".format(args.dataset)) g = data[0] if args.gpu < 0: cuda = False ctx = mx.cpu(0) else: cuda = True ctx = mx.gpu(args.gpu) g = g.int().to(ctx) features = g.ndata["feat"] labels = mx.nd.array(g.ndata["label"], dtype="float32", ctx=ctx) train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] in_feats = features.shape[1] n_classes = data.num_classes n_edges = data.graph.number_of_edges() print( """----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % ( n_edges, n_classes, train_mask.sum().asscalar(), val_mask.sum().asscalar(), test_mask.sum().asscalar(), ) ) # add self loop g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) # create SGC model model = SGConv(in_feats, n_classes, k=2, cached=True, bias=args.bias) model.initialize(ctx=ctx) n_train_samples = train_mask.sum().asscalar() loss_fcn = gluon.loss.SoftmaxCELoss() # use optimizer print(model.collect_params()) trainer = gluon.Trainer( model.collect_params(), "adam", {"learning_rate": args.lr, "wd": args.weight_decay}, ) # initialize graph dur = [] for epoch in range(args.n_epochs): if epoch >= 3: t0 = time.time() # forward with mx.autograd.record(): pred = model(g, features) loss = loss_fcn(pred, labels, mx.nd.expand_dims(train_mask, 1)) loss = loss.sum() / n_train_samples loss.backward() trainer.step(batch_size=1) if epoch >= 3: loss.asscalar() dur.append(time.time() - t0) acc = evaluate(model, g, features, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format( epoch, np.mean(dur), loss.asscalar(), acc, n_edges / np.mean(dur) / 1000, ) ) # test set accuracy acc = evaluate(model, g, features, labels, test_mask) print("Test accuracy {:.2%}".format(acc)) if __name__ == "__main__": parser = argparse.ArgumentParser(description="SGC") register_data_args(parser) parser.add_argument("--gpu", type=int, default=-1, help="gpu") parser.add_argument("--lr", type=float, default=0.2, help="learning rate") parser.add_argument( "--bias", action="store_true", default=False, help="flag to use bias" ) parser.add_argument( "--n-epochs", type=int, default=100, help="number of training epochs" ) parser.add_argument( "--weight-decay", type=float, default=5e-6, help="Weight for L2 loss" ) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/mxnet/tagcn/README.md ================================================ Topology Adaptive Graph Convolutional networks (TAGCN) ============ - Paper link: [https://arxiv.org/abs/1710.10370](https://arxiv.org/abs/1710.10370) Dependencies ------------ - MXNet nightly build - requests ``bash pip install mxnet --pre pip install requests `` Results ------- Run with following (available dataset: "cora", "citeseer", "pubmed") ```bash DGLBACKEND=mxnet python3 train.py --dataset cora --gpu 0 --self-loop ``` * cora: ~0.820 (paper: 0.833) * citeseer: ~0.702 (paper: 0.714) * pubmed: ~0.798 (paper: 0.811) ================================================ FILE: examples/mxnet/tagcn/tagcn.py ================================================ """TAGCN using DGL nn package References: - Topology Adaptive Graph Convolutional Networks - Paper: https://arxiv.org/abs/1710.10370 """ import dgl import mxnet as mx from dgl.nn.mxnet import TAGConv from mxnet import gluon class TAGCN(gluon.Block): def __init__( self, g, in_feats, n_hidden, n_classes, n_layers, activation, dropout ): super(TAGCN, self).__init__() self.g = g self.layers = gluon.nn.Sequential() # input layer self.layers.add(TAGConv(in_feats, n_hidden, activation=activation)) # hidden layers for i in range(n_layers - 1): self.layers.add(TAGConv(n_hidden, n_hidden, activation=activation)) # output layer self.layers.add(TAGConv(n_hidden, n_classes)) # activation=None self.dropout = gluon.nn.Dropout(rate=dropout) def forward(self, features): h = features for i, layer in enumerate(self.layers): if i != 0: h = self.dropout(h) h = layer(self.g, h) return h ================================================ FILE: examples/mxnet/tagcn/train.py ================================================ import argparse import time import dgl import mxnet as mx import networkx as nx import numpy as np from dgl.data import ( CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset, register_data_args, ) from mxnet import gluon from tagcn import TAGCN def evaluate(model, features, labels, mask): pred = model(features).argmax(axis=1) accuracy = ((pred == labels) * mask).sum() / mask.sum().asscalar() return accuracy.asscalar() def main(args): # load and preprocess dataset if args.dataset == "cora": data = CoraGraphDataset() elif args.dataset == "citeseer": data = CiteseerGraphDataset() elif args.dataset == "pubmed": data = PubmedGraphDataset() else: raise ValueError("Unknown dataset: {}".format(args.dataset)) g = data[0] if args.gpu < 0: cuda = False ctx = mx.cpu(0) else: cuda = True ctx = mx.gpu(args.gpu) g = g.to(ctx) features = g.ndata["feat"] labels = mx.nd.array(g.ndata["label"], dtype="float32", ctx=ctx) train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] in_feats = features.shape[1] n_classes = data.num_classes n_edges = data.graph.number_of_edges() print( """----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % ( n_edges, n_classes, train_mask.sum().asscalar(), val_mask.sum().asscalar(), test_mask.sum().asscalar(), ) ) # add self loop g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) # create TAGCN model model = TAGCN( g, in_feats, args.n_hidden, n_classes, args.n_layers, mx.nd.relu, args.dropout, ) model.initialize(ctx=ctx) n_train_samples = train_mask.sum().asscalar() loss_fcn = gluon.loss.SoftmaxCELoss() # use optimizer print(model.collect_params()) trainer = gluon.Trainer( model.collect_params(), "adam", {"learning_rate": args.lr, "wd": args.weight_decay}, ) # initialize graph dur = [] for epoch in range(args.n_epochs): if epoch >= 3: t0 = time.time() # forward with mx.autograd.record(): pred = model(features) loss = loss_fcn(pred, labels, mx.nd.expand_dims(train_mask, 1)) loss = loss.sum() / n_train_samples loss.backward() trainer.step(batch_size=1) if epoch >= 3: loss.asscalar() dur.append(time.time() - t0) acc = evaluate(model, features, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format( epoch, np.mean(dur), loss.asscalar(), acc, n_edges / np.mean(dur) / 1000, ) ) print() acc = evaluate(model, features, labels, val_mask) print("Test accuracy {:.2%}".format(acc)) if __name__ == "__main__": parser = argparse.ArgumentParser(description="TAGCN") register_data_args(parser) parser.add_argument( "--dropout", type=float, default=0.5, help="dropout probability" ) parser.add_argument("--gpu", type=int, default=-1, help="gpu") parser.add_argument("--lr", type=float, default=1e-2, help="learning rate") parser.add_argument( "--n-epochs", type=int, default=200, help="number of training epochs" ) parser.add_argument( "--n-hidden", type=int, default=16, help="number of hidden tagcn units" ) parser.add_argument( "--n-layers", type=int, default=1, help="number of hidden tagcn layers" ) parser.add_argument( "--weight-decay", type=float, default=5e-4, help="Weight for L2 loss" ) parser.add_argument( "--self-loop", action="store_true", help="graph self-loop (default=False)", ) parser.set_defaults(self_loop=False) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/mxnet/tree_lstm/README.md ================================================ # Tree-LSTM This is a re-implementation of the following paper: > [**Improved Semantic Representations From Tree-Structured Long Short-Term Memory Networks**](http://arxiv.org/abs/1503.00075) > *Kai Sheng Tai, Richard Socher, and Christopher Manning*. The provided implementation can achieve a test accuracy of 51.72 which is comparable with the result reported in the original paper: 51.0(±0.5). ## Dependencies * MXNet nightly build * requests * nltk ```bash pip install mxnet --pre pip install requests nltk ``` ## Data The script will download the [SST dataset] (http://nlp.stanford.edu/sentiment/index.html) and the GloVe 840B.300d embedding automatically if `--use-glove` is specified (note: download may take a while). ## Usage ``` DGLBACKEND=mxnet python3 train.py --gpu 0 ``` ## Speed Test See https://docs.google.com/spreadsheets/d/1eCQrVn7g0uWriz63EbEDdes2ksMdKdlbWMyT8PSU4rc . ## Note The code can work with MXNet 1.5.1 ================================================ FILE: examples/mxnet/tree_lstm/train.py ================================================ import argparse import collections import os import time import warnings import zipfile os.environ["DGLBACKEND"] = "mxnet" os.environ["MXNET_GPU_MEM_POOL_TYPE"] = "Round" import dgl import dgl.data as data import mxnet as mx import numpy as np from mxnet import gluon from tree_lstm import TreeLSTM SSTBatch = collections.namedtuple( "SSTBatch", ["graph", "mask", "wordid", "label"] ) def batcher(ctx): def batcher_dev(batch): batch_trees = dgl.batch(batch) return SSTBatch( graph=batch_trees, mask=batch_trees.ndata["mask"].as_in_context(ctx), wordid=batch_trees.ndata["x"].as_in_context(ctx), label=batch_trees.ndata["y"].as_in_context(ctx), ) return batcher_dev def prepare_glove(): if not ( os.path.exists("glove.840B.300d.txt") and data.utils.check_sha1( "glove.840B.300d.txt", sha1_hash="294b9f37fa64cce31f9ebb409c266fc379527708", ) ): zip_path = data.utils.download( "http://nlp.stanford.edu/data/glove.840B.300d.zip", sha1_hash="8084fbacc2dee3b1fd1ca4cc534cbfff3519ed0d", ) with zipfile.ZipFile(zip_path, "r") as zf: zf.extractall() if not data.utils.check_sha1( "glove.840B.300d.txt", sha1_hash="294b9f37fa64cce31f9ebb409c266fc379527708", ): warnings.warn( "The downloaded glove embedding file checksum mismatch. File content " "may be corrupted." ) def main(args): np.random.seed(args.seed) mx.random.seed(args.seed) best_epoch = -1 best_dev_acc = 0 cuda = args.gpu >= 0 if cuda: if args.gpu in mx.test_utils.list_gpus(): ctx = mx.gpu(args.gpu) else: print( "Requested GPU id {} was not found. Defaulting to CPU implementation".format( args.gpu ) ) ctx = mx.cpu() else: ctx = mx.cpu() if args.use_glove: prepare_glove() trainset = data.SSTDataset() train_loader = gluon.data.DataLoader( dataset=trainset, batch_size=args.batch_size, batchify_fn=batcher(ctx), shuffle=True, num_workers=0, ) devset = data.SSTDataset(mode="dev") dev_loader = gluon.data.DataLoader( dataset=devset, batch_size=100, batchify_fn=batcher(ctx), shuffle=True, num_workers=0, ) testset = data.SSTDataset(mode="test") test_loader = gluon.data.DataLoader( dataset=testset, batch_size=100, batchify_fn=batcher(ctx), shuffle=False, num_workers=0, ) model = TreeLSTM( trainset.vocab_size, args.x_size, args.h_size, trainset.num_classes, args.dropout, cell_type="childsum" if args.child_sum else "nary", pretrained_emb=trainset.pretrained_emb, ctx=ctx, ) print(model) params_ex_emb = [ x for x in model.collect_params().values() if x.grad_req != "null" and x.shape[0] != trainset.vocab_size ] params_emb = list(model.embedding.collect_params().values()) for p in params_emb: p.lr_mult = 0.1 model.initialize(mx.init.Xavier(magnitude=1), ctx=ctx) model.hybridize() trainer = gluon.Trainer( model.collect_params("^(?!embedding).*$"), "adagrad", {"learning_rate": args.lr, "wd": args.weight_decay}, ) trainer_emb = gluon.Trainer( model.collect_params("^embedding.*$"), "adagrad", {"learning_rate": args.lr}, ) dur = [] L = gluon.loss.SoftmaxCrossEntropyLoss(axis=1) for epoch in range(args.epochs): t_epoch = time.time() for step, batch in enumerate(train_loader): g = batch.graph n = g.number_of_nodes() # TODO begin_states function? h = mx.nd.zeros((n, args.h_size), ctx=ctx) c = mx.nd.zeros((n, args.h_size), ctx=ctx) if step >= 3: t0 = time.time() # tik with mx.autograd.record(): pred = model(batch, h, c) loss = L(pred, batch.label) loss.backward() trainer.step(args.batch_size) trainer_emb.step(args.batch_size) if step >= 3: dur.append(time.time() - t0) # tok if step > 0 and step % args.log_every == 0: pred = pred.argmax(axis=1).astype(batch.label.dtype) acc = (batch.label == pred).sum() root_ids = [ i for i in range(batch.graph.number_of_nodes()) if batch.graph.out_degrees(i) == 0 ] root_acc = np.sum( batch.label.asnumpy()[root_ids] == pred.asnumpy()[root_ids] ) print( "Epoch {:05d} | Step {:05d} | Loss {:.4f} | Acc {:.4f} | Root Acc {:.4f} | Time(s) {:.4f}".format( epoch, step, loss.sum().asscalar(), 1.0 * acc.asscalar() / len(batch.label), 1.0 * root_acc / len(root_ids), np.mean(dur), ) ) print( "Epoch {:05d} training time {:.4f}s".format( epoch, time.time() - t_epoch ) ) # eval on dev set accs = [] root_accs = [] for step, batch in enumerate(dev_loader): g = batch.graph n = g.number_of_nodes() h = mx.nd.zeros((n, args.h_size), ctx=ctx) c = mx.nd.zeros((n, args.h_size), ctx=ctx) pred = model(batch, h, c).argmax(1).astype(batch.label.dtype) acc = (batch.label == pred).sum().asscalar() accs.append([acc, len(batch.label)]) root_ids = [ i for i in range(batch.graph.number_of_nodes()) if batch.graph.out_degrees(i) == 0 ] root_acc = np.sum( batch.label.asnumpy()[root_ids] == pred.asnumpy()[root_ids] ) root_accs.append([root_acc, len(root_ids)]) dev_acc = ( 1.0 * np.sum([x[0] for x in accs]) / np.sum([x[1] for x in accs]) ) dev_root_acc = ( 1.0 * np.sum([x[0] for x in root_accs]) / np.sum([x[1] for x in root_accs]) ) print( "Epoch {:05d} | Dev Acc {:.4f} | Root Acc {:.4f}".format( epoch, dev_acc, dev_root_acc ) ) if dev_root_acc > best_dev_acc: best_dev_acc = dev_root_acc best_epoch = epoch model.save_parameters("best_{}.params".format(args.seed)) else: if best_epoch <= epoch - 10: break # lr decay trainer.set_learning_rate(max(1e-5, trainer.learning_rate * 0.99)) print(trainer.learning_rate) trainer_emb.set_learning_rate( max(1e-5, trainer_emb.learning_rate * 0.99) ) print(trainer_emb.learning_rate) # test model.load_parameters("best_{}.params".format(args.seed)) accs = [] root_accs = [] for step, batch in enumerate(test_loader): g = batch.graph n = g.number_of_nodes() h = mx.nd.zeros((n, args.h_size), ctx=ctx) c = mx.nd.zeros((n, args.h_size), ctx=ctx) pred = model(batch, h, c).argmax(axis=1).astype(batch.label.dtype) acc = (batch.label == pred).sum().asscalar() accs.append([acc, len(batch.label)]) root_ids = [ i for i in range(batch.graph.number_of_nodes()) if batch.graph.out_degrees(i) == 0 ] root_acc = np.sum( batch.label.asnumpy()[root_ids] == pred.asnumpy()[root_ids] ) root_accs.append([root_acc, len(root_ids)]) test_acc = 1.0 * np.sum([x[0] for x in accs]) / np.sum([x[1] for x in accs]) test_root_acc = ( 1.0 * np.sum([x[0] for x in root_accs]) / np.sum([x[1] for x in root_accs]) ) print( "------------------------------------------------------------------------------------" ) print( "Epoch {:05d} | Test Acc {:.4f} | Root Acc {:.4f}".format( best_epoch, test_acc, test_root_acc ) ) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--gpu", type=int, default=0) parser.add_argument("--seed", type=int, default=41) parser.add_argument("--batch-size", type=int, default=256) parser.add_argument("--child-sum", action="store_true") parser.add_argument("--x-size", type=int, default=300) parser.add_argument("--h-size", type=int, default=150) parser.add_argument("--epochs", type=int, default=100) parser.add_argument("--log-every", type=int, default=5) parser.add_argument("--lr", type=float, default=0.05) parser.add_argument("--weight-decay", type=float, default=1e-4) parser.add_argument("--dropout", type=float, default=0.5) parser.add_argument("--use-glove", action="store_true") args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/mxnet/tree_lstm/tree_lstm.py ================================================ """ Improved Semantic Representations From Tree-Structured Long Short-Term Memory Networks https://arxiv.org/abs/1503.00075 """ import itertools import time import dgl import mxnet as mx import networkx as nx import numpy as np from mxnet import gluon class _TreeLSTMCellNodeFunc(gluon.HybridBlock): def hybrid_forward(self, F, iou, b_iou, c): iou = F.broadcast_add(iou, b_iou) i, o, u = iou.split(num_outputs=3, axis=1) i, o, u = i.sigmoid(), o.sigmoid(), u.tanh() c = i * u + c h = o * c.tanh() return h, c class _TreeLSTMCellReduceFunc(gluon.HybridBlock): def __init__(self, U_iou, U_f): super(_TreeLSTMCellReduceFunc, self).__init__() self.U_iou = U_iou self.U_f = U_f def hybrid_forward(self, F, h, c): h_cat = h.reshape((0, -1)) f = self.U_f(h_cat).sigmoid().reshape_like(h) c = (f * c).sum(axis=1) iou = self.U_iou(h_cat) return iou, c class _TreeLSTMCell(gluon.HybridBlock): def __init__(self, h_size): super(_TreeLSTMCell, self).__init__() self._apply_node_func = _TreeLSTMCellNodeFunc() self.b_iou = self.params.get( "bias", shape=(1, 3 * h_size), init="zeros" ) def message_func(self, edges): return {"h": edges.src["h"], "c": edges.src["c"]} def apply_node_func(self, nodes): iou = nodes.data["iou"] b_iou, c = self.b_iou.data(iou.context), nodes.data["c"] h, c = self._apply_node_func(iou, b_iou, c) return {"h": h, "c": c} class TreeLSTMCell(_TreeLSTMCell): def __init__(self, x_size, h_size): super(TreeLSTMCell, self).__init__(h_size) self._reduce_func = _TreeLSTMCellReduceFunc( gluon.nn.Dense(3 * h_size, use_bias=False), gluon.nn.Dense(2 * h_size), ) self.W_iou = gluon.nn.Dense(3 * h_size, use_bias=False) def reduce_func(self, nodes): h, c = nodes.mailbox["h"], nodes.mailbox["c"] iou, c = self._reduce_func(h, c) return {"iou": iou, "c": c} class ChildSumTreeLSTMCell(_TreeLSTMCell): def __init__(self, x_size, h_size): super(ChildSumTreeLSTMCell, self).__init__() self.W_iou = gluon.nn.Dense(3 * h_size, use_bias=False) self.U_iou = gluon.nn.Dense(3 * h_size, use_bias=False) self.U_f = gluon.nn.Dense(h_size) def reduce_func(self, nodes): h_tild = nodes.mailbox["h"].sum(axis=1) f = self.U_f(nodes.mailbox["h"]).sigmoid() c = (f * nodes.mailbox["c"]).sum(axis=1) return {"iou": self.U_iou(h_tild), "c": c} class TreeLSTM(gluon.nn.Block): def __init__( self, num_vocabs, x_size, h_size, num_classes, dropout, cell_type="nary", pretrained_emb=None, ctx=None, ): super(TreeLSTM, self).__init__() self.x_size = x_size self.embedding = gluon.nn.Embedding(num_vocabs, x_size) if pretrained_emb is not None: print("Using glove") self.embedding.initialize(ctx=ctx) self.embedding.weight.set_data(pretrained_emb) self.dropout = gluon.nn.Dropout(dropout) self.linear = gluon.nn.Dense(num_classes) cell = TreeLSTMCell if cell_type == "nary" else ChildSumTreeLSTMCell self.cell = cell(x_size, h_size) self.ctx = ctx def forward(self, batch, h, c): """Compute tree-lstm prediction given a batch. Parameters ---------- batch : dgl.data.SSTBatch The data batch. h : Tensor Initial hidden state. c : Tensor Initial cell state. Returns ------- logits : Tensor The prediction of each node. """ g = batch.graph g = g.to(self.ctx) # feed embedding embeds = self.embedding(batch.wordid * batch.mask) wiou = self.cell.W_iou(self.dropout(embeds)) g.ndata["iou"] = wiou * batch.mask.expand_dims(-1).astype(wiou.dtype) g.ndata["h"] = h g.ndata["c"] = c # propagate dgl.prop_nodes_topo( g, message_func=self.cell.message_func, reduce_func=self.cell.reduce_func, apply_node_func=self.cell.apply_node_func, ) # compute logits h = self.dropout(g.ndata.pop("h")) logits = self.linear(h) return logits ================================================ FILE: examples/pytorch/GATNE-T/README.md ================================================ Representation Learning for Attributed Multiplex Heterogeneous Network (GANTE) ============ - Paper link: [https://arxiv.org/abs/1905.01669](https://arxiv.org/abs/1905.01669) - Author's code repo: [https://github.com/THUDM/GATNE](https://github.com/THUDM/GATNE). Note that only GATNE-T is implemented here. Requirements ------------ - requirements ```bash pip install -r requirements.txt ``` Also requires PyTorch 1.7.0+. Datasets -------- To prepare the datasets: 1. ```bash mkdir data cd data ``` 2. Download datasets from the following links: - example: https://s3.us-west-2.amazonaws.com/dgl-data/dataset/recsys/GATNE/example.zip - amazon: https://s3.us-west-2.amazonaws.com/dgl-data/dataset/recsys/GATNE/amazon.zip - youtube: https://s3.us-west-2.amazonaws.com/dgl-data/dataset/recsys/GATNE/youtube.zip - twitter: https://s3.us-west-2.amazonaws.com/dgl-data/dataset/recsys/GATNE/twitter.zip 3. Unzip the datasets Training -------- Run with following (available dataset: "example", "youtube", "amazon") ```bash python src/main.py --input data/example ``` To run on "twitter" dataset, use ```bash python src/main.py --input data/twitter --eval-type 1 --gpu 0 ``` For a big dataset, use sparse to avoid cuda out of memory in backward ```bash python src/main_sparse.py --input data/example --gpu 0 ``` If you have multiple GPUs, you can also accelerate training with [`DistributedDataParallel`](https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html) ```bash python src/main_sparse_multi_gpus.py --input data/example --gpu 0,1 ``` **It is worth noting that DistributedDataParallel will cause more cuda memory consumption and a certain loss of preformance.** Results ------- All the results match the [official code](https://github.com/THUDM/GATNE/blob/master/src/main_pytorch.py) with the same hyper parameter values, including twiiter dataset (auc, pr, f1 is 76.29, 76.17, 69.34, respectively). | | auc | pr | f1 | | ------- | ----- | ----- | ----- | | amazon | 96.88 | 96.31 | 92.12 | | youtube | 82.29 | 80.35 | 74.63 | | twitter | 72.40 | 74.40 | 65.89 | | example | 94.65 | 94.57 | 89.99 | ================================================ FILE: examples/pytorch/GATNE-T/requirements.txt ================================================ tqdm numpy scikit-learn networkx gensim requests --pre dgl-cu101 ================================================ FILE: examples/pytorch/GATNE-T/scripts/run_example.sh ================================================ python src/main.py --input data/example --gpu 0 ================================================ FILE: examples/pytorch/GATNE-T/scripts/run_example_sparse.sh ================================================ python src/main_sparse.py --input data/example --gpu 0 ================================================ FILE: examples/pytorch/GATNE-T/scripts/run_example_sparse_multi_gpus.sh ================================================ python src/main_sparse_multi_gpus.py --input data/example ================================================ FILE: examples/pytorch/GATNE-T/src/main.py ================================================ import math import os import sys import time from collections import defaultdict import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from numpy import random from torch.nn.parameter import Parameter from tqdm.auto import tqdm from utils import * import dgl import dgl.function as fn def get_graph(network_data, vocab): """Build graph, treat all nodes as the same type Parameters ---------- network_data: a dict keys describing the edge types, values representing edges vocab: a dict mapping node IDs to node indices Output ------ DGLGraph a heterogenous graph, with one node type and different edge types """ graphs = [] node_type = "_N" # '_N' can be replaced by an arbitrary name data_dict = dict() num_nodes_dict = {node_type: len(vocab)} for edge_type in network_data: tmp_data = network_data[edge_type] src = [] dst = [] for edge in tmp_data: src.extend([vocab[edge[0]], vocab[edge[1]]]) dst.extend([vocab[edge[1]], vocab[edge[0]]]) data_dict[(node_type, edge_type, node_type)] = (src, dst) graph = dgl.heterograph(data_dict, num_nodes_dict) return graph class NeighborSampler(object): def __init__(self, g, num_fanouts): self.g = g self.num_fanouts = num_fanouts def sample(self, pairs): heads, tails, types = zip(*pairs) seeds, head_invmap = torch.unique( torch.LongTensor(heads), return_inverse=True ) blocks = [] for fanout in reversed(self.num_fanouts): sampled_graph = dgl.sampling.sample_neighbors(self.g, seeds, fanout) sampled_block = dgl.to_block(sampled_graph, seeds) seeds = sampled_block.srcdata[dgl.NID] blocks.insert(0, sampled_block) return ( blocks, torch.LongTensor(head_invmap), torch.LongTensor(tails), torch.LongTensor(types), ) class DGLGATNE(nn.Module): def __init__( self, num_nodes, embedding_size, embedding_u_size, edge_types, edge_type_count, dim_a, ): super(DGLGATNE, self).__init__() self.num_nodes = num_nodes self.embedding_size = embedding_size self.embedding_u_size = embedding_u_size self.edge_types = edge_types self.edge_type_count = edge_type_count self.dim_a = dim_a self.node_embeddings = Parameter( torch.FloatTensor(num_nodes, embedding_size) ) self.node_type_embeddings = Parameter( torch.FloatTensor(num_nodes, edge_type_count, embedding_u_size) ) self.trans_weights = Parameter( torch.FloatTensor(edge_type_count, embedding_u_size, embedding_size) ) self.trans_weights_s1 = Parameter( torch.FloatTensor(edge_type_count, embedding_u_size, dim_a) ) self.trans_weights_s2 = Parameter( torch.FloatTensor(edge_type_count, dim_a, 1) ) self.reset_parameters() def reset_parameters(self): self.node_embeddings.data.uniform_(-1.0, 1.0) self.node_type_embeddings.data.uniform_(-1.0, 1.0) self.trans_weights.data.normal_( std=1.0 / math.sqrt(self.embedding_size) ) self.trans_weights_s1.data.normal_( std=1.0 / math.sqrt(self.embedding_size) ) self.trans_weights_s2.data.normal_( std=1.0 / math.sqrt(self.embedding_size) ) # embs: [batch_size, embedding_size] def forward(self, block): input_nodes = block.srcdata[dgl.NID] output_nodes = block.dstdata[dgl.NID] batch_size = block.number_of_dst_nodes() node_embed = self.node_embeddings node_type_embed = [] with block.local_scope(): for i in range(self.edge_type_count): edge_type = self.edge_types[i] block.srcdata[edge_type] = self.node_type_embeddings[ input_nodes, i ] block.dstdata[edge_type] = self.node_type_embeddings[ output_nodes, i ] block.update_all( fn.copy_u(edge_type, "m"), fn.sum("m", edge_type), etype=edge_type, ) node_type_embed.append(block.dstdata[edge_type]) node_type_embed = torch.stack(node_type_embed, 1) tmp_node_type_embed = node_type_embed.unsqueeze(2).view( -1, 1, self.embedding_u_size ) trans_w = ( self.trans_weights.unsqueeze(0) .repeat(batch_size, 1, 1, 1) .view(-1, self.embedding_u_size, self.embedding_size) ) trans_w_s1 = ( self.trans_weights_s1.unsqueeze(0) .repeat(batch_size, 1, 1, 1) .view(-1, self.embedding_u_size, self.dim_a) ) trans_w_s2 = ( self.trans_weights_s2.unsqueeze(0) .repeat(batch_size, 1, 1, 1) .view(-1, self.dim_a, 1) ) attention = ( F.softmax( torch.matmul( torch.tanh( torch.matmul(tmp_node_type_embed, trans_w_s1) ), trans_w_s2, ) .squeeze(2) .view(-1, self.edge_type_count), dim=1, ) .unsqueeze(1) .repeat(1, self.edge_type_count, 1) ) node_type_embed = torch.matmul(attention, node_type_embed).view( -1, 1, self.embedding_u_size ) node_embed = node_embed[output_nodes].unsqueeze(1).repeat( 1, self.edge_type_count, 1 ) + torch.matmul(node_type_embed, trans_w).view( -1, self.edge_type_count, self.embedding_size ) last_node_embed = F.normalize(node_embed, dim=2) return ( last_node_embed # [batch_size, edge_type_count, embedding_size] ) class NSLoss(nn.Module): def __init__(self, num_nodes, num_sampled, embedding_size): super(NSLoss, self).__init__() self.num_nodes = num_nodes self.num_sampled = num_sampled self.embedding_size = embedding_size self.weights = Parameter(torch.FloatTensor(num_nodes, embedding_size)) # [ (log(i+2) - log(i+1)) / log(num_nodes + 1)] self.sample_weights = F.normalize( torch.Tensor( [ (math.log(k + 2) - math.log(k + 1)) / math.log(num_nodes + 1) for k in range(num_nodes) ] ), dim=0, ) self.reset_parameters() def reset_parameters(self): self.weights.data.normal_(std=1.0 / math.sqrt(self.embedding_size)) def forward(self, input, embs, label): n = input.shape[0] log_target = torch.log( torch.sigmoid(torch.sum(torch.mul(embs, self.weights[label]), 1)) ) negs = torch.multinomial( self.sample_weights, self.num_sampled * n, replacement=True ).view(n, self.num_sampled) noise = torch.neg(self.weights[negs]) sum_log_sampled = torch.sum( torch.log(torch.sigmoid(torch.bmm(noise, embs.unsqueeze(2)))), 1 ).squeeze() loss = log_target + sum_log_sampled return -loss.sum() / n def train_model(network_data): index2word, vocab, type_nodes = generate_vocab(network_data) edge_types = list(network_data.keys()) num_nodes = len(index2word) edge_type_count = len(edge_types) epochs = args.epoch batch_size = args.batch_size embedding_size = args.dimensions embedding_u_size = args.edge_dim u_num = edge_type_count num_sampled = args.negative_samples dim_a = args.att_dim att_head = 1 neighbor_samples = args.neighbor_samples num_workers = args.workers device = torch.device( "cuda" if args.gpu is not None and torch.cuda.is_available() else "cpu" ) g = get_graph(network_data, vocab) all_walks = [] for i in range(edge_type_count): nodes = torch.LongTensor(type_nodes[i] * args.num_walks) traces, types = dgl.sampling.random_walk( g, nodes, metapath=[edge_types[i]] * (neighbor_samples - 1) ) all_walks.append(traces) train_pairs = generate_pairs(all_walks, args.window_size, num_workers) neighbor_sampler = NeighborSampler(g, [neighbor_samples]) train_dataloader = torch.utils.data.DataLoader( train_pairs, batch_size=batch_size, collate_fn=neighbor_sampler.sample, shuffle=True, num_workers=num_workers, pin_memory=True, ) model = DGLGATNE( num_nodes, embedding_size, embedding_u_size, edge_types, edge_type_count, dim_a, ) nsloss = NSLoss(num_nodes, num_sampled, embedding_size) model.to(device) nsloss.to(device) optimizer = torch.optim.Adam( [{"params": model.parameters()}, {"params": nsloss.parameters()}], lr=1e-3, ) best_score = 0 patience = 0 for epoch in range(epochs): model.train() random.shuffle(train_pairs) data_iter = tqdm( train_dataloader, desc="epoch %d" % (epoch), total=(len(train_pairs) + (batch_size - 1)) // batch_size, ) avg_loss = 0.0 for i, (block, head_invmap, tails, block_types) in enumerate(data_iter): optimizer.zero_grad() # embs: [batch_size, edge_type_count, embedding_size] block_types = block_types.to(device) embs = model(block[0].to(device))[head_invmap] embs = embs.gather( 1, block_types.view(-1, 1, 1).expand( embs.shape[0], 1, embs.shape[2] ), )[:, 0] loss = nsloss( block[0].dstdata[dgl.NID][head_invmap].to(device), embs, tails.to(device), ) loss.backward() optimizer.step() avg_loss += loss.item() post_fix = { "epoch": epoch, "iter": i, "avg_loss": avg_loss / (i + 1), "loss": loss.item(), } data_iter.set_postfix(post_fix) model.eval() # {'1': {}, '2': {}} final_model = dict( zip(edge_types, [dict() for _ in range(edge_type_count)]) ) for i in range(num_nodes): train_inputs = ( torch.tensor([i for _ in range(edge_type_count)]) .unsqueeze(1) .to(device) ) # [i, i] train_types = ( torch.tensor(list(range(edge_type_count))) .unsqueeze(1) .to(device) ) # [0, 1] pairs = torch.cat( (train_inputs, train_inputs, train_types), dim=1 ) # (2, 3) ( train_blocks, train_invmap, fake_tails, train_types, ) = neighbor_sampler.sample(pairs) node_emb = model(train_blocks[0].to(device))[train_invmap] node_emb = node_emb.gather( 1, train_types.to(device) .view(-1, 1, 1) .expand(node_emb.shape[0], 1, node_emb.shape[2]), )[:, 0] for j in range(edge_type_count): final_model[edge_types[j]][index2word[i]] = ( node_emb[j].cpu().detach().numpy() ) valid_aucs, valid_f1s, valid_prs = [], [], [] test_aucs, test_f1s, test_prs = [], [], [] for i in range(edge_type_count): if args.eval_type == "all" or edge_types[i] in args.eval_type.split( "," ): tmp_auc, tmp_f1, tmp_pr = evaluate( final_model[edge_types[i]], valid_true_data_by_edge[edge_types[i]], valid_false_data_by_edge[edge_types[i]], num_workers, ) valid_aucs.append(tmp_auc) valid_f1s.append(tmp_f1) valid_prs.append(tmp_pr) tmp_auc, tmp_f1, tmp_pr = evaluate( final_model[edge_types[i]], testing_true_data_by_edge[edge_types[i]], testing_false_data_by_edge[edge_types[i]], num_workers, ) test_aucs.append(tmp_auc) test_f1s.append(tmp_f1) test_prs.append(tmp_pr) print("valid auc:", np.mean(valid_aucs)) print("valid pr:", np.mean(valid_prs)) print("valid f1:", np.mean(valid_f1s)) average_auc = np.mean(test_aucs) average_f1 = np.mean(test_f1s) average_pr = np.mean(test_prs) cur_score = np.mean(valid_aucs) if cur_score > best_score: best_score = cur_score patience = 0 else: patience += 1 if patience > args.patience: print("Early Stopping") break return average_auc, average_f1, average_pr if __name__ == "__main__": args = parse_args() file_name = args.input print(args) training_data_by_type = load_training_data(file_name + "/train.txt") valid_true_data_by_edge, valid_false_data_by_edge = load_testing_data( file_name + "/valid.txt" ) testing_true_data_by_edge, testing_false_data_by_edge = load_testing_data( file_name + "/test.txt" ) start = time.time() average_auc, average_f1, average_pr = train_model(training_data_by_type) end = time.time() print("Overall ROC-AUC:", average_auc) print("Overall PR-AUC", average_pr) print("Overall F1:", average_f1) print("Training Time", end - start) ================================================ FILE: examples/pytorch/GATNE-T/src/main_sparse.py ================================================ import math import os import sys import time from collections import defaultdict import numpy as np import torch import torch.nn as nn import torch.nn.functional as F import tqdm from numpy import random from torch.nn.parameter import Parameter from utils import * import dgl import dgl.function as fn def get_graph(network_data, vocab): """Build graph, treat all nodes as the same type Parameters ---------- network_data: a dict keys describing the edge types, values representing edges vocab: a dict mapping node IDs to node indices Output ------ DGLGraph a heterogenous graph, with one node type and different edge types """ graphs = [] node_type = "_N" # '_N' can be replaced by an arbitrary name data_dict = dict() num_nodes_dict = {node_type: len(vocab)} for edge_type in network_data: tmp_data = network_data[edge_type] src = [] dst = [] for edge in tmp_data: src.extend([vocab[edge[0]], vocab[edge[1]]]) dst.extend([vocab[edge[1]], vocab[edge[0]]]) data_dict[(node_type, edge_type, node_type)] = (src, dst) graph = dgl.heterograph(data_dict, num_nodes_dict) return graph class NeighborSampler(object): def __init__(self, g, num_fanouts): self.g = g self.num_fanouts = num_fanouts def sample(self, pairs): pairs = np.stack(pairs) heads, tails, types = pairs[:, 0], pairs[:, 1], pairs[:, 2] seeds, head_invmap = torch.unique( torch.LongTensor(heads), return_inverse=True ) blocks = [] for fanout in reversed(self.num_fanouts): sampled_graph = dgl.sampling.sample_neighbors(self.g, seeds, fanout) sampled_block = dgl.to_block(sampled_graph, seeds) seeds = sampled_block.srcdata[dgl.NID] blocks.insert(0, sampled_block) return ( blocks, torch.LongTensor(head_invmap), torch.LongTensor(tails), torch.LongTensor(types), ) class DGLGATNE(nn.Module): def __init__( self, num_nodes, embedding_size, embedding_u_size, edge_types, edge_type_count, dim_a, ): super(DGLGATNE, self).__init__() self.num_nodes = num_nodes self.embedding_size = embedding_size self.embedding_u_size = embedding_u_size self.edge_types = edge_types self.edge_type_count = edge_type_count self.dim_a = dim_a self.node_embeddings = nn.Embedding( num_nodes, embedding_size, sparse=True ) self.node_type_embeddings = nn.Embedding( num_nodes * edge_type_count, embedding_u_size, sparse=True ) self.trans_weights = Parameter( torch.FloatTensor(edge_type_count, embedding_u_size, embedding_size) ) self.trans_weights_s1 = Parameter( torch.FloatTensor(edge_type_count, embedding_u_size, dim_a) ) self.trans_weights_s2 = Parameter( torch.FloatTensor(edge_type_count, dim_a, 1) ) self.reset_parameters() def reset_parameters(self): self.node_embeddings.weight.data.uniform_(-1.0, 1.0) self.node_type_embeddings.weight.data.uniform_(-1.0, 1.0) self.trans_weights.data.normal_( std=1.0 / math.sqrt(self.embedding_size) ) self.trans_weights_s1.data.normal_( std=1.0 / math.sqrt(self.embedding_size) ) self.trans_weights_s2.data.normal_( std=1.0 / math.sqrt(self.embedding_size) ) # embs: [batch_size, embedding_size] def forward(self, block): input_nodes = block.srcdata[dgl.NID] output_nodes = block.dstdata[dgl.NID] batch_size = block.number_of_dst_nodes() node_type_embed = [] with block.local_scope(): for i in range(self.edge_type_count): edge_type = self.edge_types[i] block.srcdata[edge_type] = self.node_type_embeddings( input_nodes * self.edge_type_count + i ) block.dstdata[edge_type] = self.node_type_embeddings( output_nodes * self.edge_type_count + i ) block.update_all( fn.copy_u(edge_type, "m"), fn.sum("m", edge_type), etype=edge_type, ) node_type_embed.append(block.dstdata[edge_type]) node_type_embed = torch.stack(node_type_embed, 1) tmp_node_type_embed = node_type_embed.unsqueeze(2).view( -1, 1, self.embedding_u_size ) trans_w = ( self.trans_weights.unsqueeze(0) .repeat(batch_size, 1, 1, 1) .view(-1, self.embedding_u_size, self.embedding_size) ) trans_w_s1 = ( self.trans_weights_s1.unsqueeze(0) .repeat(batch_size, 1, 1, 1) .view(-1, self.embedding_u_size, self.dim_a) ) trans_w_s2 = ( self.trans_weights_s2.unsqueeze(0) .repeat(batch_size, 1, 1, 1) .view(-1, self.dim_a, 1) ) attention = ( F.softmax( torch.matmul( torch.tanh( torch.matmul(tmp_node_type_embed, trans_w_s1) ), trans_w_s2, ) .squeeze(2) .view(-1, self.edge_type_count), dim=1, ) .unsqueeze(1) .repeat(1, self.edge_type_count, 1) ) node_type_embed = torch.matmul(attention, node_type_embed).view( -1, 1, self.embedding_u_size ) node_embed = self.node_embeddings(output_nodes).unsqueeze(1).repeat( 1, self.edge_type_count, 1 ) + torch.matmul(node_type_embed, trans_w).view( -1, self.edge_type_count, self.embedding_size ) last_node_embed = F.normalize(node_embed, dim=2) return ( last_node_embed # [batch_size, edge_type_count, embedding_size] ) class NSLoss(nn.Module): def __init__(self, num_nodes, num_sampled, embedding_size): super(NSLoss, self).__init__() self.num_nodes = num_nodes self.num_sampled = num_sampled self.embedding_size = embedding_size # [ (log(i+2) - log(i+1)) / log(num_nodes + 1)] self.sample_weights = F.normalize( torch.Tensor( [ (math.log(k + 2) - math.log(k + 1)) / math.log(num_nodes + 1) for k in range(num_nodes) ] ), dim=0, ) self.weights = nn.Embedding(num_nodes, embedding_size, sparse=True) self.reset_parameters() def reset_parameters(self): self.weights.weight.data.normal_( std=1.0 / math.sqrt(self.embedding_size) ) def forward(self, input, embs, label): n = input.shape[0] log_target = torch.log( torch.sigmoid(torch.sum(torch.mul(embs, self.weights(label)), 1)) ) negs = ( torch.multinomial( self.sample_weights, self.num_sampled * n, replacement=True ) .view(n, self.num_sampled) .to(input.device) ) noise = torch.neg(self.weights(negs)) sum_log_sampled = torch.sum( torch.log(torch.sigmoid(torch.bmm(noise, embs.unsqueeze(2)))), 1 ).squeeze() loss = log_target + sum_log_sampled return -loss.sum() / n def train_model(network_data): index2word, vocab, type_nodes = generate_vocab(network_data) edge_types = list(network_data.keys()) num_nodes = len(index2word) edge_type_count = len(edge_types) epochs = args.epoch batch_size = args.batch_size embedding_size = args.dimensions embedding_u_size = args.edge_dim u_num = edge_type_count num_sampled = args.negative_samples dim_a = args.att_dim att_head = 1 neighbor_samples = args.neighbor_samples num_workers = args.workers device = torch.device( "cuda" if args.gpu is not None and torch.cuda.is_available() else "cpu" ) g = get_graph(network_data, vocab) all_walks = [] for i in range(edge_type_count): nodes = torch.LongTensor(type_nodes[i] * args.num_walks) traces, types = dgl.sampling.random_walk( g, nodes, metapath=[edge_types[i]] * (neighbor_samples - 1) ) all_walks.append(traces) train_pairs = generate_pairs(all_walks, args.window_size, num_workers) neighbor_sampler = NeighborSampler(g, [neighbor_samples]) train_dataloader = torch.utils.data.DataLoader( train_pairs, batch_size=batch_size, collate_fn=neighbor_sampler.sample, shuffle=True, num_workers=num_workers, pin_memory=True, ) model = DGLGATNE( num_nodes, embedding_size, embedding_u_size, edge_types, edge_type_count, dim_a, ) nsloss = NSLoss(num_nodes, num_sampled, embedding_size) model.to(device) nsloss.to(device) embeddings_params = list( map(id, model.node_embeddings.parameters()) ) + list(map(id, model.node_type_embeddings.parameters())) weights_params = list(map(id, nsloss.weights.parameters())) optimizer = torch.optim.Adam( [ { "params": filter( lambda p: id(p) not in embeddings_params, model.parameters(), ) }, { "params": filter( lambda p: id(p) not in weights_params, nsloss.parameters(), ) }, ], lr=1e-3, ) sparse_optimizer = torch.optim.SparseAdam( [ {"params": model.node_embeddings.parameters()}, {"params": model.node_type_embeddings.parameters()}, {"params": nsloss.weights.parameters()}, ], lr=1e-3, ) best_score = 0 patience = 0 for epoch in range(epochs): model.train() random.shuffle(train_pairs) data_iter = tqdm.tqdm( train_dataloader, desc="epoch %d" % (epoch), total=(len(train_pairs) + (batch_size - 1)) // batch_size, ) avg_loss = 0.0 for i, (block, head_invmap, tails, block_types) in enumerate(data_iter): optimizer.zero_grad() sparse_optimizer.zero_grad() # embs: [batch_size, edge_type_count, embedding_size] block_types = block_types.to(device) embs = model(block[0].to(device))[head_invmap] embs = embs.gather( 1, block_types.view(-1, 1, 1).expand( embs.shape[0], 1, embs.shape[2] ), )[:, 0] loss = nsloss( block[0].dstdata[dgl.NID][head_invmap].to(device), embs, tails.to(device), ) loss.backward() optimizer.step() sparse_optimizer.step() avg_loss += loss.item() post_fix = { "epoch": epoch, "iter": i, "avg_loss": avg_loss / (i + 1), "loss": loss.item(), } data_iter.set_postfix(post_fix) model.eval() # {'1': {}, '2': {}} final_model = dict( zip(edge_types, [dict() for _ in range(edge_type_count)]) ) for i in range(num_nodes): train_inputs = ( torch.tensor([i for _ in range(edge_type_count)]) .unsqueeze(1) .to(device) ) # [i, i] train_types = ( torch.tensor(list(range(edge_type_count))) .unsqueeze(1) .to(device) ) # [0, 1] pairs = torch.cat( (train_inputs, train_inputs, train_types), dim=1 ) # (2, 3) ( train_blocks, train_invmap, fake_tails, train_types, ) = neighbor_sampler.sample(pairs.cpu()) node_emb = model(train_blocks[0].to(device))[train_invmap] node_emb = node_emb.gather( 1, train_types.to(device) .view(-1, 1, 1) .expand(node_emb.shape[0], 1, node_emb.shape[2]), )[:, 0] for j in range(edge_type_count): final_model[edge_types[j]][index2word[i]] = ( node_emb[j].cpu().detach().numpy() ) valid_aucs, valid_f1s, valid_prs = [], [], [] test_aucs, test_f1s, test_prs = [], [], [] for i in range(edge_type_count): if args.eval_type == "all" or edge_types[i] in args.eval_type.split( "," ): tmp_auc, tmp_f1, tmp_pr = evaluate( final_model[edge_types[i]], valid_true_data_by_edge[edge_types[i]], valid_false_data_by_edge[edge_types[i]], num_workers, ) valid_aucs.append(tmp_auc) valid_f1s.append(tmp_f1) valid_prs.append(tmp_pr) tmp_auc, tmp_f1, tmp_pr = evaluate( final_model[edge_types[i]], testing_true_data_by_edge[edge_types[i]], testing_false_data_by_edge[edge_types[i]], num_workers, ) test_aucs.append(tmp_auc) test_f1s.append(tmp_f1) test_prs.append(tmp_pr) print("valid auc:", np.mean(valid_aucs)) print("valid pr:", np.mean(valid_prs)) print("valid f1:", np.mean(valid_f1s)) average_auc = np.mean(test_aucs) average_f1 = np.mean(test_f1s) average_pr = np.mean(test_prs) cur_score = np.mean(valid_aucs) if cur_score > best_score: best_score = cur_score patience = 0 else: patience += 1 if patience > args.patience: print("Early Stopping") break return average_auc, average_f1, average_pr if __name__ == "__main__": args = parse_args() file_name = args.input print(args) training_data_by_type = load_training_data(file_name + "/train.txt") valid_true_data_by_edge, valid_false_data_by_edge = load_testing_data( file_name + "/valid.txt" ) testing_true_data_by_edge, testing_false_data_by_edge = load_testing_data( file_name + "/test.txt" ) start = time.time() average_auc, average_f1, average_pr = train_model(training_data_by_type) end = time.time() print("Overall ROC-AUC:", average_auc) print("Overall PR-AUC", average_pr) print("Overall F1:", average_f1) print("Training Time", end - start) ================================================ FILE: examples/pytorch/GATNE-T/src/main_sparse_multi_gpus.py ================================================ import datetime import math import os import sys import time from collections import defaultdict import numpy as np import torch import torch.multiprocessing as mp import torch.nn as nn import torch.nn.functional as F from numpy import random from torch.nn.parallel import DistributedDataParallel from torch.nn.parameter import Parameter from tqdm.auto import tqdm from utils import * import dgl import dgl.function as fn def setup_seed(seed): torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) np.random.seed(seed) random.seed(seed) torch.backends.cudnn.deterministic = True def get_graph(network_data, vocab): """Build graph, treat all nodes as the same type Parameters ---------- network_data: a dict keys describing the edge types, values representing edges vocab: a dict mapping node IDs to node indices Output ------ DGLGraph a heterogenous graph, with one node type and different edge types """ graphs = [] node_type = "_N" # '_N' can be replaced by an arbitrary name data_dict = dict() num_nodes_dict = {node_type: len(vocab)} for edge_type in network_data: tmp_data = network_data[edge_type] src = [] dst = [] for edge in tmp_data: src.extend([vocab[edge[0]], vocab[edge[1]]]) dst.extend([vocab[edge[1]], vocab[edge[0]]]) data_dict[(node_type, edge_type, node_type)] = (src, dst) graph = dgl.heterograph(data_dict, num_nodes_dict) return graph class NeighborSampler(object): def __init__(self, g, num_fanouts): self.g = g self.num_fanouts = num_fanouts def sample(self, pairs): pairs = np.stack(pairs) heads, tails, types = pairs[:, 0], pairs[:, 1], pairs[:, 2] seeds, head_invmap = torch.unique( torch.LongTensor(heads), return_inverse=True ) blocks = [] for fanout in reversed(self.num_fanouts): sampled_graph = dgl.sampling.sample_neighbors(self.g, seeds, fanout) sampled_block = dgl.to_block(sampled_graph, seeds) seeds = sampled_block.srcdata[dgl.NID] blocks.insert(0, sampled_block) return ( blocks, torch.LongTensor(head_invmap), torch.LongTensor(tails), torch.LongTensor(types), ) class DGLGATNE(nn.Module): def __init__( self, num_nodes, embedding_size, embedding_u_size, edge_types, edge_type_count, dim_a, ): super(DGLGATNE, self).__init__() self.num_nodes = num_nodes self.embedding_size = embedding_size self.embedding_u_size = embedding_u_size self.edge_types = edge_types self.edge_type_count = edge_type_count self.dim_a = dim_a self.node_embeddings = nn.Embedding( num_nodes, embedding_size, sparse=True ) self.node_type_embeddings = nn.Embedding( num_nodes * edge_type_count, embedding_u_size, sparse=True ) self.trans_weights = Parameter( torch.FloatTensor(edge_type_count, embedding_u_size, embedding_size) ) self.trans_weights_s1 = Parameter( torch.FloatTensor(edge_type_count, embedding_u_size, dim_a) ) self.trans_weights_s2 = Parameter( torch.FloatTensor(edge_type_count, dim_a, 1) ) self.reset_parameters() def reset_parameters(self): self.node_embeddings.weight.data.uniform_(-1.0, 1.0) self.node_type_embeddings.weight.data.uniform_(-1.0, 1.0) self.trans_weights.data.normal_( std=1.0 / math.sqrt(self.embedding_size) ) self.trans_weights_s1.data.normal_( std=1.0 / math.sqrt(self.embedding_size) ) self.trans_weights_s2.data.normal_( std=1.0 / math.sqrt(self.embedding_size) ) # embs: [batch_size, embedding_size] def forward(self, block): input_nodes = block.srcdata[dgl.NID] output_nodes = block.dstdata[dgl.NID] batch_size = block.number_of_dst_nodes() node_type_embed = [] with block.local_scope(): for i in range(self.edge_type_count): edge_type = self.edge_types[i] block.srcdata[edge_type] = self.node_type_embeddings( input_nodes * self.edge_type_count + i ) block.dstdata[edge_type] = self.node_type_embeddings( output_nodes * self.edge_type_count + i ) block.update_all( fn.copy_u(edge_type, "m"), fn.sum("m", edge_type), etype=edge_type, ) node_type_embed.append(block.dstdata[edge_type]) node_type_embed = torch.stack(node_type_embed, 1) tmp_node_type_embed = node_type_embed.unsqueeze(2).view( -1, 1, self.embedding_u_size ) trans_w = ( self.trans_weights.unsqueeze(0) .repeat(batch_size, 1, 1, 1) .view(-1, self.embedding_u_size, self.embedding_size) ) trans_w_s1 = ( self.trans_weights_s1.unsqueeze(0) .repeat(batch_size, 1, 1, 1) .view(-1, self.embedding_u_size, self.dim_a) ) trans_w_s2 = ( self.trans_weights_s2.unsqueeze(0) .repeat(batch_size, 1, 1, 1) .view(-1, self.dim_a, 1) ) attention = ( F.softmax( torch.matmul( torch.tanh( torch.matmul(tmp_node_type_embed, trans_w_s1) ), trans_w_s2, ) .squeeze(2) .view(-1, self.edge_type_count), dim=1, ) .unsqueeze(1) .repeat(1, self.edge_type_count, 1) ) node_type_embed = torch.matmul(attention, node_type_embed).view( -1, 1, self.embedding_u_size ) node_embed = self.node_embeddings(output_nodes).unsqueeze(1).repeat( 1, self.edge_type_count, 1 ) + torch.matmul(node_type_embed, trans_w).view( -1, self.edge_type_count, self.embedding_size ) last_node_embed = F.normalize(node_embed, dim=2) return ( last_node_embed # [batch_size, edge_type_count, embedding_size] ) class NSLoss(nn.Module): def __init__(self, num_nodes, num_sampled, embedding_size): super(NSLoss, self).__init__() self.num_nodes = num_nodes self.num_sampled = num_sampled self.embedding_size = embedding_size # [ (log(i+2) - log(i+1)) / log(num_nodes + 1)] self.sample_weights = F.normalize( torch.Tensor( [ (math.log(k + 2) - math.log(k + 1)) / math.log(num_nodes + 1) for k in range(num_nodes) ] ), dim=0, ) self.weights = nn.Embedding(num_nodes, embedding_size, sparse=True) self.reset_parameters() def reset_parameters(self): self.weights.weight.data.normal_( std=1.0 / math.sqrt(self.embedding_size) ) def forward(self, input, embs, label): n = input.shape[0] log_target = torch.log( torch.sigmoid(torch.sum(torch.mul(embs, self.weights(label)), 1)) ) negs = ( torch.multinomial( self.sample_weights, self.num_sampled * n, replacement=True ) .view(n, self.num_sampled) .to(input.device) ) noise = torch.neg(self.weights(negs)) sum_log_sampled = torch.sum( torch.log(torch.sigmoid(torch.bmm(noise, embs.unsqueeze(2)))), 1 ).squeeze() loss = log_target + sum_log_sampled return -loss.sum() / n def run(proc_id, n_gpus, args, devices, data): dev_id = devices[proc_id] if n_gpus > 1: dist_init_method = "tcp://{master_ip}:{master_port}".format( master_ip="127.0.0.1", master_port="12345" ) world_size = n_gpus torch.distributed.init_process_group( backend="gloo", init_method=dist_init_method, world_size=world_size, rank=proc_id, timeout=datetime.timedelta(seconds=100), ) torch.cuda.set_device(dev_id) g, train_pairs, index2word, edge_types, num_nodes, edge_type_count = data epochs = args.epoch batch_size = args.batch_size embedding_size = args.dimensions embedding_u_size = args.edge_dim u_num = edge_type_count num_sampled = args.negative_samples dim_a = args.att_dim att_head = 1 neighbor_samples = args.neighbor_samples num_workers = args.workers neighbor_sampler = NeighborSampler(g, [neighbor_samples]) if n_gpus > 1: train_sampler = torch.utils.data.distributed.DistributedSampler( train_pairs, num_replicas=world_size, rank=proc_id, shuffle=True, drop_last=False, ) train_dataloader = torch.utils.data.DataLoader( train_pairs, batch_size=batch_size, collate_fn=neighbor_sampler.sample, num_workers=num_workers, sampler=train_sampler, pin_memory=True, ) else: train_dataloader = torch.utils.data.DataLoader( train_pairs, batch_size=batch_size, collate_fn=neighbor_sampler.sample, num_workers=num_workers, shuffle=True, drop_last=False, pin_memory=True, ) model = DGLGATNE( num_nodes, embedding_size, embedding_u_size, edge_types, edge_type_count, dim_a, ) nsloss = NSLoss(num_nodes, num_sampled, embedding_size) model.to(dev_id) if n_gpus > 1: model = DistributedDataParallel( model, device_ids=[dev_id], output_device=dev_id ) nsloss.to(dev_id) if n_gpus > 1: mmodel = model.module else: mmodel = model embeddings_params = list( map(id, mmodel.node_embeddings.parameters()) ) + list(map(id, mmodel.node_type_embeddings.parameters())) weights_params = list(map(id, nsloss.weights.parameters())) optimizer = torch.optim.Adam( [ { "params": filter( lambda p: id(p) not in embeddings_params, model.parameters(), ) }, { "params": filter( lambda p: id(p) not in weights_params, nsloss.parameters(), ) }, ], lr=2e-3, ) sparse_optimizer = torch.optim.SparseAdam( [ {"params": mmodel.node_embeddings.parameters()}, {"params": mmodel.node_type_embeddings.parameters()}, {"params": nsloss.weights.parameters()}, ], lr=2e-3, ) if n_gpus > 1: torch.distributed.barrier() if proc_id == 0: start = time.time() for epoch in range(epochs): if n_gpus > 1: train_sampler.set_epoch(epoch) model.train() data_iter = train_dataloader if proc_id == 0: data_iter = tqdm( train_dataloader, desc="epoch %d" % (epoch), total=(len(train_pairs) + (batch_size - 1)) // batch_size, ) avg_loss = 0.0 for i, (block, head_invmap, tails, block_types) in enumerate(data_iter): optimizer.zero_grad() sparse_optimizer.zero_grad() # embs: [batch_size, edge_type_count, embedding_size] block_types = block_types.to(dev_id) embs = model(block[0].to(dev_id))[head_invmap] embs = embs.gather( 1, block_types.view(-1, 1, 1).expand( embs.shape[0], 1, embs.shape[2] ), )[:, 0] loss = nsloss( block[0].dstdata[dgl.NID][head_invmap].to(dev_id), embs, tails.to(dev_id), ) loss.backward() optimizer.step() sparse_optimizer.step() if proc_id == 0: avg_loss += loss.item() post_fix = { "avg_loss": avg_loss / (i + 1), "loss": loss.item(), } data_iter.set_postfix(post_fix) if n_gpus > 1: torch.distributed.barrier() if proc_id == 0: model.eval() # {'1': {}, '2': {}} final_model = dict( zip(edge_types, [dict() for _ in range(edge_type_count)]) ) for i in range(num_nodes): train_inputs = ( torch.tensor([i for _ in range(edge_type_count)]) .unsqueeze(1) .to(dev_id) ) # [i, i] train_types = ( torch.tensor(list(range(edge_type_count))) .unsqueeze(1) .to(dev_id) ) # [0, 1] pairs = torch.cat( (train_inputs, train_inputs, train_types), dim=1 ) # (2, 3) ( train_blocks, train_invmap, fake_tails, train_types, ) = neighbor_sampler.sample(pairs.cpu()) node_emb = model(train_blocks[0].to(dev_id))[train_invmap] node_emb = node_emb.gather( 1, train_types.to(dev_id) .view(-1, 1, 1) .expand(node_emb.shape[0], 1, node_emb.shape[2]), )[:, 0] for j in range(edge_type_count): final_model[edge_types[j]][index2word[i]] = ( node_emb[j].cpu().detach().numpy() ) valid_aucs, valid_f1s, valid_prs = [], [], [] test_aucs, test_f1s, test_prs = [], [], [] for i in range(edge_type_count): if args.eval_type == "all" or edge_types[ i ] in args.eval_type.split(","): tmp_auc, tmp_f1, tmp_pr = evaluate( final_model[edge_types[i]], valid_true_data_by_edge[edge_types[i]], valid_false_data_by_edge[edge_types[i]], num_workers, ) valid_aucs.append(tmp_auc) valid_f1s.append(tmp_f1) valid_prs.append(tmp_pr) tmp_auc, tmp_f1, tmp_pr = evaluate( final_model[edge_types[i]], testing_true_data_by_edge[edge_types[i]], testing_false_data_by_edge[edge_types[i]], num_workers, ) test_aucs.append(tmp_auc) test_f1s.append(tmp_f1) test_prs.append(tmp_pr) print("valid auc:", np.mean(valid_aucs)) print("valid pr:", np.mean(valid_prs)) print("valid f1:", np.mean(valid_f1s)) if proc_id == 0: end = time.time() average_auc = np.mean(test_aucs) average_f1 = np.mean(test_f1s) average_pr = np.mean(test_prs) print("Overall ROC-AUC:", average_auc) print("Overall PR-AUC", average_pr) print("Overall F1:", average_f1) print("Training Time", end - start) def train_model(network_data): index2word, vocab, type_nodes = generate_vocab(network_data) edge_types = list(network_data.keys()) num_nodes = len(index2word) edge_type_count = len(edge_types) devices = list(map(int, args.gpu.split(","))) n_gpus = len(devices) neighbor_samples = args.neighbor_samples num_workers = args.workers g = get_graph(network_data, vocab) all_walks = [] for i in range(edge_type_count): nodes = torch.LongTensor(type_nodes[i] * args.num_walks) traces, types = dgl.sampling.random_walk( g, nodes, metapath=[edge_types[i]] * (neighbor_samples - 1) ) all_walks.append(traces) train_pairs = generate_pairs(all_walks, args.window_size, num_workers) data = g, train_pairs, index2word, edge_types, num_nodes, edge_type_count if n_gpus == 1: run(0, n_gpus, args, devices, data) else: mp.spawn(run, args=(n_gpus, args, devices, data), nprocs=n_gpus) if __name__ == "__main__": args = parse_args() file_name = args.input print(args) setup_seed(1234) training_data_by_type = load_training_data(file_name + "/train.txt") valid_true_data_by_edge, valid_false_data_by_edge = load_testing_data( file_name + "/valid.txt" ) testing_true_data_by_edge, testing_false_data_by_edge = load_testing_data( file_name + "/test.txt" ) train_model(training_data_by_type) ================================================ FILE: examples/pytorch/GATNE-T/src/utils.py ================================================ import argparse import multiprocessing import time from collections import defaultdict from functools import partial, reduce, wraps import networkx as nx import numpy as np import torch from gensim.models.keyedvectors import Vocab from six import iteritems from sklearn.metrics import auc, f1_score, precision_recall_curve, roc_auc_score def parse_args(): parser = argparse.ArgumentParser() parser.add_argument( "--input", type=str, default="data/amazon", help="Input dataset path" ) parser.add_argument( "--features", type=str, default=None, help="Input node features" ) parser.add_argument( "--epoch", type=int, default=100, help="Number of epoch. Default is 100.", ) parser.add_argument( "--batch-size", type=int, default=64, help="Number of batch_size. Default is 64.", ) parser.add_argument( "--eval-type", type=str, default="all", help="The edge type(s) for evaluation.", ) parser.add_argument( "--schema", type=str, default=None, help="The metapath schema (e.g., U-I-U,I-U-I).", ) parser.add_argument( "--dimensions", type=int, default=200, help="Number of dimensions. Default is 200.", ) parser.add_argument( "--edge-dim", type=int, default=10, help="Number of edge embedding dimensions. Default is 10.", ) parser.add_argument( "--att-dim", type=int, default=20, help="Number of attention dimensions. Default is 20.", ) parser.add_argument( "--walk-length", type=int, default=10, help="Length of walk per source. Default is 10.", ) parser.add_argument( "--num-walks", type=int, default=20, help="Number of walks per source. Default is 20.", ) parser.add_argument( "--window-size", type=int, default=5, help="Context size for optimization. Default is 5.", ) parser.add_argument( "--negative-samples", type=int, default=5, help="Negative samples for optimization. Default is 5.", ) parser.add_argument( "--neighbor-samples", type=int, default=10, help="Neighbor samples for aggregation. Default is 10.", ) parser.add_argument( "--patience", type=int, default=5, help="Early stopping patience. Default is 5.", ) parser.add_argument( "--gpu", type=str, default=None, help="Comma separated list of GPU device IDs.", ) parser.add_argument( "--workers", type=int, default=4, help="Number of workers.", ) return parser.parse_args() # for each line, the data is [edge_type, node, node] def load_training_data(f_name): print("We are loading data from:", f_name) edge_data_by_type = dict() all_nodes = list() with open(f_name, "r") as f: for line in f: words = line[:-1].split(" ") # line[-1] == '\n' if words[0] not in edge_data_by_type: edge_data_by_type[words[0]] = list() x, y = words[1], words[2] edge_data_by_type[words[0]].append((x, y)) all_nodes.append(x) all_nodes.append(y) all_nodes = list(set(all_nodes)) print("Total training nodes: " + str(len(all_nodes))) return edge_data_by_type # for each line, the data is [edge_type, node, node, true_or_false] def load_testing_data(f_name): print("We are loading data from:", f_name) true_edge_data_by_type = dict() false_edge_data_by_type = dict() all_edges = list() all_nodes = list() with open(f_name, "r") as f: for line in f: words = line[:-1].split(" ") x, y = words[1], words[2] if int(words[3]) == 1: if words[0] not in true_edge_data_by_type: true_edge_data_by_type[words[0]] = list() true_edge_data_by_type[words[0]].append((x, y)) else: if words[0] not in false_edge_data_by_type: false_edge_data_by_type[words[0]] = list() false_edge_data_by_type[words[0]].append((x, y)) all_nodes.append(x) all_nodes.append(y) all_nodes = list(set(all_nodes)) return true_edge_data_by_type, false_edge_data_by_type def load_node_type(f_name): print("We are loading node type from:", f_name) node_type = {} with open(f_name, "r") as f: for line in f: items = line.strip().split() node_type[items[0]] = items[1] return node_type def generate_pairs_parallel(walks, skip_window=None, layer_id=None): pairs = [] for walk in walks: walk = walk.tolist() for i in range(len(walk)): for j in range(1, skip_window + 1): if i - j >= 0: pairs.append((walk[i], walk[i - j], layer_id)) if i + j < len(walk): pairs.append((walk[i], walk[i + j], layer_id)) return pairs def generate_pairs(all_walks, window_size, num_workers): # for each node, choose the first neighbor and second neighbor of it to form pairs # Get all worker processes start_time = time.time() print("We are generating pairs with {} cores.".format(num_workers)) # Start all worker processes pool = multiprocessing.Pool(processes=num_workers) pairs = [] skip_window = window_size // 2 for layer_id, walks in enumerate(all_walks): block_num = len(walks) // num_workers if block_num > 0: walks_list = [ walks[i * block_num : min((i + 1) * block_num, len(walks))] for i in range(num_workers) ] else: walks_list = [walks] tmp_result = pool.map( partial( generate_pairs_parallel, skip_window=skip_window, layer_id=layer_id, ), walks_list, ) pairs += reduce(lambda x, y: x + y, tmp_result) pool.close() end_time = time.time() print("Generate pairs end, use {}s.".format(end_time - start_time)) return np.array([list(pair) for pair in set(pairs)]) def generate_vocab(network_data): nodes, index2word = [], [] for edge_type in network_data: node1, node2 = zip(*network_data[edge_type]) index2word = index2word + list(node1) + list(node2) index2word = list(set(index2word)) vocab = {} i = 0 for word in index2word: vocab[word] = i i = i + 1 for edge_type in network_data: node1, node2 = zip(*network_data[edge_type]) tmp_nodes = list(set(list(node1) + list(node2))) tmp_nodes = [vocab[word] for word in tmp_nodes] nodes.append(tmp_nodes) return index2word, vocab, nodes def get_score(local_model, edge): node1, node2 = str(edge[0]), str(edge[1]) try: vector1 = local_model[node1] vector2 = local_model[node2] return np.dot(vector1, vector2) / ( np.linalg.norm(vector1) * np.linalg.norm(vector2) ) except Exception as e: pass def evaluate(model, true_edges, false_edges, num_workers): true_list = list() prediction_list = list() true_num = 0 # Start all worker processes pool = multiprocessing.Pool(processes=num_workers) tmp_true_score_list = pool.map(partial(get_score, model), true_edges) tmp_false_score_list = pool.map(partial(get_score, model), false_edges) pool.close() prediction_list += [ tmp_score for tmp_score in tmp_true_score_list if tmp_score is not None ] true_num = len(prediction_list) true_list += [1] * true_num prediction_list += [ tmp_score for tmp_score in tmp_false_score_list if tmp_score is not None ] true_list += [0] * (len(prediction_list) - true_num) sorted_pred = prediction_list[:] sorted_pred.sort() threshold = sorted_pred[-true_num] y_pred = np.zeros(len(prediction_list), dtype=np.int32) for i in range(len(prediction_list)): if prediction_list[i] >= threshold: y_pred[i] = 1 y_true = np.array(true_list) y_scores = np.array(prediction_list) ps, rs, _ = precision_recall_curve(y_true, y_scores) return ( roc_auc_score(y_true, y_scores), f1_score(y_true, y_pred), auc(rs, ps), ) ================================================ FILE: examples/pytorch/GNN-FiLM/README.md ================================================ # DGL Implementation of the GNN-FiLM Model This DGL example implements the GNN model proposed in the paper [GNN-FiLM: Graph Neural Networks with Feature-wise Linear Modulation](https://arxiv.org/pdf/1906.12192.pdf). The author's codes of implementation is in [here](https://github.com/Microsoft/tf-gnn-samples) Example implementor ---------------------- This example was implemented by [Kounianhua Du](https://github.com/KounianhuaDu) during her Software Dev Engineer Intern work at the AWS Shanghai AI Lab. Dependencies ---------------------- - numpy 1.19.4 - scikit-learn 0.22.1 - pytorch 1.4.0 - dgl 0.5.3 The graph dataset used in this example --------------------------------------- The DGL's built-in PPIDataset. This is a Protein-Protein Interaction dataset for inductive node classification. The PPIDataset is a toy Protein-Protein Interaction network dataset. The dataset contains 24 graphs. The average number of nodes per graph is 2372. Each node has 50 features and 121 labels. There are 20 graphs for training, 2 for validation, and 2 for testing. NOTE: Following the paper, in addition to the dataset-provided untyped edges, a fresh "self-loop" edge type is added. Statistics: - Train examples: 20 - Valid examples: 2 - Test examples: 2 - AvgNodesPerGraph: 2372 - NumFeats: 50 - NumLabels: 121 How to run example files -------------------------------- In the GNNFiLM folder, run ```bash python main.py ``` If want to use a GPU, run ```bash python main.py --gpu ${your_device_id_here} ``` Performance ------------------------- NOTE: We do not perform grid search or finetune here, so there is a gap between the performance reported in the original paper and this example. Below results, mean(standard deviation), were computed over ten runs. **GNN-FiLM results on PPI task** | Model | Paper (tensorflow) | ours (dgl) | | ------------- | -------------------------------- | --------------------------- | | Avg. Micro-F1 | 0.992 (0.000) | 0.983 (0.001) | ================================================ FILE: examples/pytorch/GNN-FiLM/data_loader.py ================================================ import collections import dgl from dgl.data import PPIDataset from torch.utils.data import DataLoader, Dataset # implement the collate_fn for dgl graph data class PPIBatch = collections.namedtuple("PPIBatch", ["graph", "label"]) def batcher(device): def batcher_dev(batch): batch_graphs = dgl.batch(batch) return PPIBatch( graph=batch_graphs, label=batch_graphs.ndata["label"].to(device) ) return batcher_dev # add a fresh "self-loop" edge type to the untyped PPI dataset and prepare train, val, test loaders def load_PPI(batch_size=1, device="cpu"): train_set = PPIDataset(mode="train") valid_set = PPIDataset(mode="valid") test_set = PPIDataset(mode="test") # for each graph, add self-loops as a new relation type # here we reconstruct the graph since the schema of a heterograph cannot be changed once constructed for i in range(len(train_set)): g = dgl.heterograph( { ("_N", "_E", "_N"): train_set[i].edges(), ("_N", "self", "_N"): ( train_set[i].nodes(), train_set[i].nodes(), ), } ) g.ndata["label"] = train_set[i].ndata["label"] g.ndata["feat"] = train_set[i].ndata["feat"] g.ndata["_ID"] = train_set[i].ndata["_ID"] g.edges["_E"].data["_ID"] = train_set[i].edata["_ID"] train_set.graphs[i] = g for i in range(len(valid_set)): g = dgl.heterograph( { ("_N", "_E", "_N"): valid_set[i].edges(), ("_N", "self", "_N"): ( valid_set[i].nodes(), valid_set[i].nodes(), ), } ) g.ndata["label"] = valid_set[i].ndata["label"] g.ndata["feat"] = valid_set[i].ndata["feat"] g.ndata["_ID"] = valid_set[i].ndata["_ID"] g.edges["_E"].data["_ID"] = valid_set[i].edata["_ID"] valid_set.graphs[i] = g for i in range(len(test_set)): g = dgl.heterograph( { ("_N", "_E", "_N"): test_set[i].edges(), ("_N", "self", "_N"): ( test_set[i].nodes(), test_set[i].nodes(), ), } ) g.ndata["label"] = test_set[i].ndata["label"] g.ndata["feat"] = test_set[i].ndata["feat"] g.ndata["_ID"] = test_set[i].ndata["_ID"] g.edges["_E"].data["_ID"] = test_set[i].edata["_ID"] test_set.graphs[i] = g etypes = train_set[0].etypes in_size = train_set[0].ndata["feat"].shape[1] out_size = train_set[0].ndata["label"].shape[1] # prepare train, valid, and test dataloaders train_loader = DataLoader( train_set, batch_size=batch_size, collate_fn=batcher(device), shuffle=True, ) valid_loader = DataLoader( valid_set, batch_size=batch_size, collate_fn=batcher(device), shuffle=True, ) test_loader = DataLoader( test_set, batch_size=batch_size, collate_fn=batcher(device), shuffle=True, ) return train_loader, valid_loader, test_loader, etypes, in_size, out_size ================================================ FILE: examples/pytorch/GNN-FiLM/main.py ================================================ import argparse import os import dgl import dgl.function as fn import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from data_loader import load_PPI from utils import evaluate_f1_score class GNNFiLMLayer(nn.Module): def __init__(self, in_size, out_size, etypes, dropout=0.1): super(GNNFiLMLayer, self).__init__() self.in_size = in_size self.out_size = out_size # weights for different types of edges self.W = nn.ModuleDict( {name: nn.Linear(in_size, out_size, bias=False) for name in etypes} ) # hypernets to learn the affine functions for different types of edges self.film = nn.ModuleDict( { name: nn.Linear(in_size, 2 * out_size, bias=False) for name in etypes } ) # layernorm before each propogation self.layernorm = nn.LayerNorm(out_size) # dropout layer self.dropout = nn.Dropout(dropout) def forward(self, g, feat_dict): # the input graph is a multi-relational graph, so treated as hetero-graph. funcs = {} # message and reduce functions dict # for each type of edges, compute messages and reduce them all for srctype, etype, dsttype in g.canonical_etypes: messages = self.W[etype]( feat_dict[srctype] ) # apply W_l on src feature film_weights = self.film[etype]( feat_dict[dsttype] ) # use dst feature to compute affine function paras gamma = film_weights[ :, : self.out_size ] # "gamma" for the affine function beta = film_weights[ :, self.out_size : ] # "beta" for the affine function messages = gamma * messages + beta # compute messages messages = F.relu_(messages) g.nodes[srctype].data[etype] = messages # store in ndata funcs[etype] = ( fn.copy_u(etype, "m"), fn.sum("m", "h"), ) # define message and reduce functions g.multi_update_all( funcs, "sum" ) # update all, reduce by first type-wisely then across different types feat_dict = {} for ntype in g.ntypes: feat_dict[ntype] = self.dropout( self.layernorm(g.nodes[ntype].data["h"]) ) # apply layernorm and dropout return feat_dict class GNNFiLM(nn.Module): def __init__( self, etypes, in_size, hidden_size, out_size, num_layers, dropout=0.1 ): super(GNNFiLM, self).__init__() self.film_layers = nn.ModuleList() self.film_layers.append( GNNFiLMLayer(in_size, hidden_size, etypes, dropout) ) for i in range(num_layers - 1): self.film_layers.append( GNNFiLMLayer(hidden_size, hidden_size, etypes, dropout) ) self.predict = nn.Linear(hidden_size, out_size, bias=True) def forward(self, g, out_key): h_dict = { ntype: g.nodes[ntype].data["feat"] for ntype in g.ntypes } # prepare input feature dict for layer in self.film_layers: h_dict = layer(g, h_dict) h = self.predict( h_dict[out_key] ) # use the final embed to predict, out_size = num_classes h = torch.sigmoid(h) return h def main(args): # Step 1: Prepare graph data and retrieve train/validation/test dataloader ============================= # if args.gpu >= 0 and torch.cuda.is_available(): device = "cuda:{}".format(args.gpu) else: device = "cpu" if args.dataset == "PPI": train_set, valid_set, test_set, etypes, in_size, out_size = load_PPI( args.batch_size, device ) # Step 2: Create model and training components=========================================================== # model = GNNFiLM( etypes, in_size, args.hidden_size, out_size, args.num_layers ).to(device) criterion = nn.BCELoss() optimizer = torch.optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.wd ) scheduler = torch.optim.lr_scheduler.StepLR( optimizer, args.step_size, gamma=args.gamma ) # Step 4: training epoches ============================================================================== # lastf1 = 0 cnt = 0 best_val_f1 = 0 for epoch in range(args.max_epoch): train_loss = [] train_f1 = [] val_loss = [] val_f1 = [] model.train() for batch in train_set: g = batch.graph g = g.to(device) logits = model.forward(g, "_N") labels = batch.label loss = criterion(logits, labels) f1 = evaluate_f1_score( logits.detach().cpu().numpy(), labels.detach().cpu().numpy() ) optimizer.zero_grad() loss.backward() optimizer.step() train_loss.append(loss.item()) train_f1.append(f1) train_loss = np.mean(train_loss) train_f1 = np.mean(train_f1) scheduler.step() model.eval() with torch.no_grad(): for batch in valid_set: g = batch.graph g = g.to(device) logits = model.forward(g, "_N") labels = batch.label loss = criterion(logits, labels) f1 = evaluate_f1_score( logits.detach().cpu().numpy(), labels.detach().cpu().numpy() ) val_loss.append(loss.item()) val_f1.append(f1) val_loss = np.mean(val_loss) val_f1 = np.mean(val_f1) print( "Epoch {:d} | Train Loss {:.4f} | Train F1 {:.4f} | Val Loss {:.4f} | Val F1 {:.4f} |".format( epoch + 1, train_loss, train_f1, val_loss, val_f1 ) ) if val_f1 > best_val_f1: best_val_f1 = val_f1 torch.save( model.state_dict(), os.path.join(args.save_dir, args.name) ) if val_f1 < lastf1: cnt += 1 if cnt == args.early_stopping: print("Early stop.") break else: cnt = 0 lastf1 = val_f1 model.eval() test_loss = [] test_f1 = [] model.load_state_dict( torch.load(os.path.join(args.save_dir, args.name), weights_only=False) ) with torch.no_grad(): for batch in test_set: g = batch.graph g = g.to(device) logits = model.forward(g, "_N") labels = batch.label loss = criterion(logits, labels) f1 = evaluate_f1_score( logits.detach().cpu().numpy(), labels.detach().cpu().numpy() ) test_loss.append(loss.item()) test_f1.append(f1) test_loss = np.mean(test_loss) test_f1 = np.mean(test_f1) print("Test F1: {:.4f} | Test loss: {:.4f}".format(test_f1, test_loss)) if __name__ == "__main__": parser = argparse.ArgumentParser(description="GNN-FiLM") parser.add_argument( "--dataset", type=str, default="PPI", help="DGL dataset for this GNN-FiLM", ) parser.add_argument( "--gpu", type=int, default=-1, help="GPU Index. Default: -1, using CPU." ) parser.add_argument( "--in_size", type=int, default=50, help="Input dimensionalities" ) parser.add_argument( "--hidden_size", type=int, default=320, help="Hidden layer dimensionalities", ) parser.add_argument( "--out_size", type=int, default=121, help="Output dimensionalities" ) parser.add_argument( "--num_layers", type=int, default=4, help="Number of GNN layers" ) parser.add_argument("--batch_size", type=int, default=5, help="Batch size") parser.add_argument( "--max_epoch", type=int, default=1500, help="The max number of epoches. Default: 500", ) parser.add_argument( "--early_stopping", type=int, default=80, help="Early stopping. Default: 50", ) parser.add_argument( "--lr", type=float, default=0.001, help="Learning rate. Default: 3e-1" ) parser.add_argument( "--wd", type=float, default=0.0009, help="Weight decay. Default: 3e-1" ) parser.add_argument( "--step-size", type=int, default=40, help="Period of learning rate decay.", ) parser.add_argument( "--gamma", type=float, default=0.8, help="Multiplicative factor of learning rate decay.", ) parser.add_argument( "--dropout", type=float, default=0.1, help="Dropout rate. Default: 0.9" ) parser.add_argument( "--save_dir", type=str, default="./out", help="Path to save the model." ) parser.add_argument( "--name", type=str, default="GNN-FiLM", help="Saved model name." ) args = parser.parse_args() print(args) if not os.path.exists(args.save_dir): os.mkdir(args.save_dir) main(args) ================================================ FILE: examples/pytorch/GNN-FiLM/utils.py ================================================ import numpy as np from sklearn.metrics import f1_score # function to compute f1 score def evaluate_f1_score(pred, label): pred = np.round(pred, 0).astype(np.int16) pred = pred.flatten() label = label.flatten() return f1_score(y_pred=pred, y_true=label) ================================================ FILE: examples/pytorch/NGCF/Data/load_amazon-book.sh ================================================ wget https://s3.us-west-2.amazonaws.com/dgl-data/dataset/amazon-book.zip unzip amazon-book.zip ================================================ FILE: examples/pytorch/NGCF/Data/load_gowalla.sh ================================================ wget https://s3.us-west-2.amazonaws.com/dgl-data/dataset/gowalla.zip unzip gowalla.zip ================================================ FILE: examples/pytorch/NGCF/NGCF/main.py ================================================ import os from time import time import torch import torch.optim as optim from model import NGCF from utility.batch_test import * from utility.helper import early_stopping def main(args): # Step 1: Prepare graph data and device ================================================================= # if args.gpu >= 0 and torch.cuda.is_available(): device = "cuda:{}".format(args.gpu) else: device = "cpu" g = data_generator.g g = g.to(device) # Step 2: Create model and training components=========================================================== # model = NGCF( g, args.embed_size, args.layer_size, args.mess_dropout, args.regs[0] ).to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr) # Step 3: training epoches ============================================================================== # n_batch = data_generator.n_train // args.batch_size + 1 t0 = time() cur_best_pre_0, stopping_step = 0, 0 loss_loger, pre_loger, rec_loger, ndcg_loger, hit_loger = [], [], [], [], [] for epoch in range(args.epoch): t1 = time() loss, mf_loss, emb_loss = 0.0, 0.0, 0.0 for idx in range(n_batch): users, pos_items, neg_items = data_generator.sample() u_g_embeddings, pos_i_g_embeddings, neg_i_g_embeddings = model( g, "user", "item", users, pos_items, neg_items ) batch_loss, batch_mf_loss, batch_emb_loss = model.create_bpr_loss( u_g_embeddings, pos_i_g_embeddings, neg_i_g_embeddings ) optimizer.zero_grad() batch_loss.backward() optimizer.step() loss += batch_loss mf_loss += batch_mf_loss emb_loss += batch_emb_loss if (epoch + 1) % 10 != 0: if args.verbose > 0 and epoch % args.verbose == 0: perf_str = "Epoch %d [%.1fs]: train==[%.5f=%.5f + %.5f]" % ( epoch, time() - t1, loss, mf_loss, emb_loss, ) print(perf_str) continue # end the current epoch and move to the next epoch, let the following evaluation run every 10 epoches # evaluate the model every 10 epoches t2 = time() users_to_test = list(data_generator.test_set.keys()) ret = test(model, g, users_to_test) t3 = time() loss_loger.append(loss) rec_loger.append(ret["recall"]) pre_loger.append(ret["precision"]) ndcg_loger.append(ret["ndcg"]) hit_loger.append(ret["hit_ratio"]) if args.verbose > 0: perf_str = ( "Epoch %d [%.1fs + %.1fs]: train==[%.5f=%.5f + %.5f], recall=[%.5f, %.5f], " "precision=[%.5f, %.5f], hit=[%.5f, %.5f], ndcg=[%.5f, %.5f]" % ( epoch, t2 - t1, t3 - t2, loss, mf_loss, emb_loss, ret["recall"][0], ret["recall"][-1], ret["precision"][0], ret["precision"][-1], ret["hit_ratio"][0], ret["hit_ratio"][-1], ret["ndcg"][0], ret["ndcg"][-1], ) ) print(perf_str) cur_best_pre_0, stopping_step, should_stop = early_stopping( ret["recall"][0], cur_best_pre_0, stopping_step, expected_order="acc", flag_step=5, ) # early stop if should_stop == True: break if ret["recall"][0] == cur_best_pre_0 and args.save_flag == 1: torch.save(model.state_dict(), args.weights_path + args.model_name) print( "save the weights in path: ", args.weights_path + args.model_name, ) recs = np.array(rec_loger) pres = np.array(pre_loger) ndcgs = np.array(ndcg_loger) hit = np.array(hit_loger) best_rec_0 = max(recs[:, 0]) idx = list(recs[:, 0]).index(best_rec_0) final_perf = ( "Best Iter=[%d]@[%.1f]\trecall=[%s], precision=[%s], hit=[%s], ndcg=[%s]" % ( idx, time() - t0, "\t".join(["%.5f" % r for r in recs[idx]]), "\t".join(["%.5f" % r for r in pres[idx]]), "\t".join(["%.5f" % r for r in hit[idx]]), "\t".join(["%.5f" % r for r in ndcgs[idx]]), ) ) print(final_perf) if __name__ == "__main__": if not os.path.exists(args.weights_path): os.mkdir(args.weights_path) args.mess_dropout = eval(args.mess_dropout) args.layer_size = eval(args.layer_size) args.regs = eval(args.regs) print(args) main(args) ================================================ FILE: examples/pytorch/NGCF/NGCF/model.py ================================================ import dgl.function as fn import torch import torch.nn as nn import torch.nn.functional as F class NGCFLayer(nn.Module): def __init__(self, in_size, out_size, norm_dict, dropout): super(NGCFLayer, self).__init__() self.in_size = in_size self.out_size = out_size # weights for different types of messages self.W1 = nn.Linear(in_size, out_size, bias=True) self.W2 = nn.Linear(in_size, out_size, bias=True) # leaky relu self.leaky_relu = nn.LeakyReLU(0.2) # dropout layer self.dropout = nn.Dropout(dropout) # initialization torch.nn.init.xavier_uniform_(self.W1.weight) torch.nn.init.constant_(self.W1.bias, 0) torch.nn.init.xavier_uniform_(self.W2.weight) torch.nn.init.constant_(self.W2.bias, 0) # norm self.norm_dict = norm_dict def forward(self, g, feat_dict): funcs = {} # message and reduce functions dict # for each type of edges, compute messages and reduce them all for srctype, etype, dsttype in g.canonical_etypes: if srctype == dsttype: # for self loops messages = self.W1(feat_dict[srctype]) g.nodes[srctype].data[etype] = messages # store in ndata funcs[(srctype, etype, dsttype)] = ( fn.copy_u(etype, "m"), fn.sum("m", "h"), ) # define message and reduce functions else: src, dst = g.edges(etype=(srctype, etype, dsttype)) norm = self.norm_dict[(srctype, etype, dsttype)] messages = norm * ( self.W1(feat_dict[srctype][src]) + self.W2(feat_dict[srctype][src] * feat_dict[dsttype][dst]) ) # compute messages g.edges[(srctype, etype, dsttype)].data[ etype ] = messages # store in edata funcs[(srctype, etype, dsttype)] = ( fn.copy_e(etype, "m"), fn.sum("m", "h"), ) # define message and reduce functions g.multi_update_all( funcs, "sum" ) # update all, reduce by first type-wisely then across different types feature_dict = {} for ntype in g.ntypes: h = self.leaky_relu(g.nodes[ntype].data["h"]) # leaky relu h = self.dropout(h) # dropout h = F.normalize(h, dim=1, p=2) # l2 normalize feature_dict[ntype] = h return feature_dict class NGCF(nn.Module): def __init__(self, g, in_size, layer_size, dropout, lmbd=1e-5): super(NGCF, self).__init__() self.lmbd = lmbd self.norm_dict = dict() for srctype, etype, dsttype in g.canonical_etypes: src, dst = g.edges(etype=(srctype, etype, dsttype)) dst_degree = g.in_degrees( dst, etype=(srctype, etype, dsttype) ).float() # obtain degrees src_degree = g.out_degrees( src, etype=(srctype, etype, dsttype) ).float() norm = torch.pow(src_degree * dst_degree, -0.5).unsqueeze( 1 ) # compute norm self.norm_dict[(srctype, etype, dsttype)] = norm self.layers = nn.ModuleList() self.layers.append( NGCFLayer(in_size, layer_size[0], self.norm_dict, dropout[0]) ) self.num_layers = len(layer_size) for i in range(self.num_layers - 1): self.layers.append( NGCFLayer( layer_size[i], layer_size[i + 1], self.norm_dict, dropout[i + 1], ) ) self.initializer = nn.init.xavier_uniform_ # embeddings for different types of nodes self.feature_dict = nn.ParameterDict( { ntype: nn.Parameter( self.initializer(torch.empty(g.num_nodes(ntype), in_size)) ) for ntype in g.ntypes } ) def create_bpr_loss(self, users, pos_items, neg_items): pos_scores = (users * pos_items).sum(1) neg_scores = (users * neg_items).sum(1) mf_loss = nn.LogSigmoid()(pos_scores - neg_scores).mean() mf_loss = -1 * mf_loss regularizer = ( torch.norm(users) ** 2 + torch.norm(pos_items) ** 2 + torch.norm(neg_items) ** 2 ) / 2 emb_loss = self.lmbd * regularizer / users.shape[0] return mf_loss + emb_loss, mf_loss, emb_loss def rating(self, u_g_embeddings, pos_i_g_embeddings): return torch.matmul(u_g_embeddings, pos_i_g_embeddings.t()) def forward(self, g, user_key, item_key, users, pos_items, neg_items): h_dict = {ntype: self.feature_dict[ntype] for ntype in g.ntypes} # obtain features of each layer and concatenate them all user_embeds = [] item_embeds = [] user_embeds.append(h_dict[user_key]) item_embeds.append(h_dict[item_key]) for layer in self.layers: h_dict = layer(g, h_dict) user_embeds.append(h_dict[user_key]) item_embeds.append(h_dict[item_key]) user_embd = torch.cat(user_embeds, 1) item_embd = torch.cat(item_embeds, 1) u_g_embeddings = user_embd[users, :] pos_i_g_embeddings = item_embd[pos_items, :] neg_i_g_embeddings = item_embd[neg_items, :] return u_g_embeddings, pos_i_g_embeddings, neg_i_g_embeddings ================================================ FILE: examples/pytorch/NGCF/NGCF/utility/batch_test.py ================================================ # This file is based on the NGCF author's implementation # . # It implements the batch test. import heapq import multiprocessing import utility.metrics as metrics from utility.load_data import * from utility.parser import parse_args cores = multiprocessing.cpu_count() args = parse_args() Ks = eval(args.Ks) data_generator = Data( path=args.data_path + args.dataset, batch_size=args.batch_size ) USR_NUM, ITEM_NUM = data_generator.n_users, data_generator.n_items N_TRAIN, N_TEST = data_generator.n_train, data_generator.n_test BATCH_SIZE = args.batch_size def ranklist_by_heapq(user_pos_test, test_items, rating, Ks): item_score = {} for i in test_items: item_score[i] = rating[i] K_max = max(Ks) K_max_item_score = heapq.nlargest(K_max, item_score, key=item_score.get) r = [] for i in K_max_item_score: if i in user_pos_test: r.append(1) else: r.append(0) auc = 0.0 return r, auc def get_auc(item_score, user_pos_test): item_score = sorted(item_score.items(), key=lambda kv: kv[1]) item_score.reverse() item_sort = [x[0] for x in item_score] posterior = [x[1] for x in item_score] r = [] for i in item_sort: if i in user_pos_test: r.append(1) else: r.append(0) auc = metrics.auc(ground_truth=r, prediction=posterior) return auc def ranklist_by_sorted(user_pos_test, test_items, rating, Ks): item_score = {} for i in test_items: item_score[i] = rating[i] K_max = max(Ks) K_max_item_score = heapq.nlargest(K_max, item_score, key=item_score.get) r = [] for i in K_max_item_score: if i in user_pos_test: r.append(1) else: r.append(0) auc = get_auc(item_score, user_pos_test) return r, auc def get_performance(user_pos_test, r, auc, Ks): precision, recall, ndcg, hit_ratio = [], [], [], [] for K in Ks: precision.append(metrics.precision_at_k(r, K)) recall.append(metrics.recall_at_k(r, K, len(user_pos_test))) ndcg.append(metrics.ndcg_at_k(r, K)) hit_ratio.append(metrics.hit_at_k(r, K)) return { "recall": np.array(recall), "precision": np.array(precision), "ndcg": np.array(ndcg), "hit_ratio": np.array(hit_ratio), "auc": auc, } def test_one_user(x): # user u's ratings for user u rating = x[0] # uid u = x[1] # user u's items in the training set try: training_items = data_generator.train_items[u] except Exception: training_items = [] # user u's items in the test set user_pos_test = data_generator.test_set[u] all_items = set(range(ITEM_NUM)) test_items = list(all_items - set(training_items)) if args.test_flag == "part": r, auc = ranklist_by_heapq(user_pos_test, test_items, rating, Ks) else: r, auc = ranklist_by_sorted(user_pos_test, test_items, rating, Ks) return get_performance(user_pos_test, r, auc, Ks) def test(model, g, users_to_test, batch_test_flag=False): result = { "precision": np.zeros(len(Ks)), "recall": np.zeros(len(Ks)), "ndcg": np.zeros(len(Ks)), "hit_ratio": np.zeros(len(Ks)), "auc": 0.0, } pool = multiprocessing.Pool(cores) u_batch_size = 5000 i_batch_size = BATCH_SIZE test_users = users_to_test n_test_users = len(test_users) n_user_batchs = n_test_users // u_batch_size + 1 count = 0 for u_batch_id in range(n_user_batchs): start = u_batch_id * u_batch_size end = (u_batch_id + 1) * u_batch_size user_batch = test_users[start:end] if batch_test_flag: # batch-item test n_item_batchs = ITEM_NUM // i_batch_size + 1 rate_batch = np.zeros(shape=(len(user_batch), ITEM_NUM)) i_count = 0 for i_batch_id in range(n_item_batchs): i_start = i_batch_id * i_batch_size i_end = min((i_batch_id + 1) * i_batch_size, ITEM_NUM) item_batch = range(i_start, i_end) u_g_embeddings, pos_i_g_embeddings, _ = model( g, "user", "item", user_batch, item_batch, [] ) i_rate_batch = ( model.rating(u_g_embeddings, pos_i_g_embeddings) .detach() .cpu() ) rate_batch[:, i_start:i_end] = i_rate_batch i_count += i_rate_batch.shape[1] assert i_count == ITEM_NUM else: # all-item test item_batch = range(ITEM_NUM) u_g_embeddings, pos_i_g_embeddings, _ = model( g, "user", "item", user_batch, item_batch, [] ) rate_batch = ( model.rating(u_g_embeddings, pos_i_g_embeddings).detach().cpu() ) user_batch_rating_uid = zip(rate_batch.numpy(), user_batch) batch_result = pool.map(test_one_user, user_batch_rating_uid) count += len(batch_result) for re in batch_result: result["precision"] += re["precision"] / n_test_users result["recall"] += re["recall"] / n_test_users result["ndcg"] += re["ndcg"] / n_test_users result["hit_ratio"] += re["hit_ratio"] / n_test_users result["auc"] += re["auc"] / n_test_users assert count == n_test_users pool.close() return result ================================================ FILE: examples/pytorch/NGCF/NGCF/utility/helper.py ================================================ # This file is copied from the NGCF author's implementation # . # It implements the helper functions. """ Created on Aug 19, 2016 @author: Xiang Wang (xiangwang@u.nus.edu) """ __author__ = "xiangwang" import os import re def txt2list(file_src): orig_file = open(file_src, "r") lines = orig_file.readlines() return lines def ensureDir(dir_path): d = os.path.dirname(dir_path) if not os.path.exists(d): os.makedirs(d) def uni2str(unicode_str): return str(unicode_str.encode("ascii", "ignore")).replace("\n", "").strip() def hasNumbers(inputString): return bool(re.search(r"\d", inputString)) def delMultiChar(inputString, chars): for ch in chars: inputString = inputString.replace(ch, "") return inputString def merge_two_dicts(x, y): z = x.copy() # start with x's keys and values z.update(y) # modifies z with y's keys and values & returns None return z def early_stopping( log_value, best_value, stopping_step, expected_order="acc", flag_step=100 ): # early stopping strategy: assert expected_order in ["acc", "dec"] if (expected_order == "acc" and log_value >= best_value) or ( expected_order == "dec" and log_value <= best_value ): stopping_step = 0 best_value = log_value else: stopping_step += 1 if stopping_step >= flag_step: print( "Early stopping is trigger at step: {} log:{}".format( flag_step, log_value ) ) should_stop = True else: should_stop = False return best_value, stopping_step, should_stop ================================================ FILE: examples/pytorch/NGCF/NGCF/utility/load_data.py ================================================ # This file is based on the NGCF author's implementation # . # It implements the data processing and graph construction. import random as rd import dgl import numpy as np class Data(object): def __init__(self, path, batch_size): self.path = path self.batch_size = batch_size train_file = path + "/train.txt" test_file = path + "/test.txt" # get number of users and items self.n_users, self.n_items = 0, 0 self.n_train, self.n_test = 0, 0 self.exist_users = [] user_item_src = [] user_item_dst = [] with open(train_file) as f: for l in f.readlines(): if len(l) > 0: l = l.strip("\n").split(" ") items = [int(i) for i in l[1:]] uid = int(l[0]) self.exist_users.append(uid) self.n_items = max(self.n_items, max(items)) self.n_users = max(self.n_users, uid) self.n_train += len(items) for i in l[1:]: user_item_src.append(uid) user_item_dst.append(int(i)) with open(test_file) as f: for l in f.readlines(): if len(l) > 0: l = l.strip("\n") try: items = [int(i) for i in l.split(" ")[1:]] except Exception: continue self.n_items = max(self.n_items, max(items)) self.n_test += len(items) self.n_items += 1 self.n_users += 1 self.print_statistics() # training positive items corresponding to each user; testing positive items corresponding to each user self.train_items, self.test_set = {}, {} with open(train_file) as f_train: with open(test_file) as f_test: for l in f_train.readlines(): if len(l) == 0: break l = l.strip("\n") items = [int(i) for i in l.split(" ")] uid, train_items = items[0], items[1:] self.train_items[uid] = train_items for l in f_test.readlines(): if len(l) == 0: break l = l.strip("\n") try: items = [int(i) for i in l.split(" ")] except Exception: continue uid, test_items = items[0], items[1:] self.test_set[uid] = test_items # construct graph from the train data and add self-loops user_selfs = [i for i in range(self.n_users)] item_selfs = [i for i in range(self.n_items)] data_dict = { ("user", "user_self", "user"): (user_selfs, user_selfs), ("item", "item_self", "item"): (item_selfs, item_selfs), ("user", "ui", "item"): (user_item_src, user_item_dst), ("item", "iu", "user"): (user_item_dst, user_item_src), } num_dict = {"user": self.n_users, "item": self.n_items} self.g = dgl.heterograph(data_dict, num_nodes_dict=num_dict) def sample(self): if self.batch_size <= self.n_users: users = rd.sample(self.exist_users, self.batch_size) else: users = [ rd.choice(self.exist_users) for _ in range(self.batch_size) ] def sample_pos_items_for_u(u, num): # sample num pos items for u-th user pos_items = self.train_items[u] n_pos_items = len(pos_items) pos_batch = [] while True: if len(pos_batch) == num: break pos_id = np.random.randint(low=0, high=n_pos_items, size=1)[0] pos_i_id = pos_items[pos_id] if pos_i_id not in pos_batch: pos_batch.append(pos_i_id) return pos_batch def sample_neg_items_for_u(u, num): # sample num neg items for u-th user neg_items = [] while True: if len(neg_items) == num: break neg_id = np.random.randint(low=0, high=self.n_items, size=1)[0] if ( neg_id not in self.train_items[u] and neg_id not in neg_items ): neg_items.append(neg_id) return neg_items pos_items, neg_items = [], [] for u in users: pos_items += sample_pos_items_for_u(u, 1) neg_items += sample_neg_items_for_u(u, 1) return users, pos_items, neg_items def get_num_users_items(self): return self.n_users, self.n_items def print_statistics(self): print("n_users=%d, n_items=%d" % (self.n_users, self.n_items)) print("n_interactions=%d" % (self.n_train + self.n_test)) print( "n_train=%d, n_test=%d, sparsity=%.5f" % ( self.n_train, self.n_test, (self.n_train + self.n_test) / (self.n_users * self.n_items), ) ) ================================================ FILE: examples/pytorch/NGCF/NGCF/utility/metrics.py ================================================ # This file is copied from the NGCF author's implementation # . # It implements the metrics. """ Created on Oct 10, 2018 Tensorflow Implementation of Neural Graph Collaborative Filtering (NGCF) model in: Wang Xiang et al. Neural Graph Collaborative Filtering. In SIGIR 2019. @author: Xiang Wang (xiangwang@u.nus.edu) """ import numpy as np from sklearn.metrics import roc_auc_score def recall(rank, ground_truth, N): return len(set(rank[:N]) & set(ground_truth)) / float( len(set(ground_truth)) ) def precision_at_k(r, k): """Score is precision @ k Relevance is binary (nonzero is relevant). Returns: Precision @ k Raises: ValueError: len(r) must be >= k """ assert k >= 1 r = np.asarray(r)[:k] return np.mean(r) def average_precision(r, cut): """Score is average precision (area under PR curve) Relevance is binary (nonzero is relevant). Returns: Average precision """ r = np.asarray(r) out = [precision_at_k(r, k + 1) for k in range(cut) if r[k]] if not out: return 0.0 return np.sum(out) / float(min(cut, np.sum(r))) def mean_average_precision(rs): """Score is mean average precision Relevance is binary (nonzero is relevant). Returns: Mean average precision """ return np.mean([average_precision(r) for r in rs]) def dcg_at_k(r, k, method=1): """Score is discounted cumulative gain (dcg) Relevance is positive real values. Can use binary as the previous methods. Returns: Discounted cumulative gain """ r = np.asfarray(r)[:k] if r.size: if method == 0: return r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 1))) elif method == 1: return np.sum(r / np.log2(np.arange(2, r.size + 2))) else: raise ValueError("method must be 0 or 1.") return 0.0 def ndcg_at_k(r, k, method=1): """Score is normalized discounted cumulative gain (ndcg) Relevance is positive real values. Can use binary as the previous methods. Returns: Normalized discounted cumulative gain """ dcg_max = dcg_at_k(sorted(r, reverse=True), k, method) if not dcg_max: return 0.0 return dcg_at_k(r, k, method) / dcg_max def recall_at_k(r, k, all_pos_num): r = np.asfarray(r)[:k] return np.sum(r) / all_pos_num def hit_at_k(r, k): r = np.array(r)[:k] if np.sum(r) > 0: return 1.0 else: return 0.0 def F1(pre, rec): if pre + rec > 0: return (2.0 * pre * rec) / (pre + rec) else: return 0.0 def auc(ground_truth, prediction): try: res = roc_auc_score(y_true=ground_truth, y_score=prediction) except Exception: res = 0.0 return res ================================================ FILE: examples/pytorch/NGCF/NGCF/utility/parser.py ================================================ # This file is based on the NGCF author's implementation # . import argparse def parse_args(): parser = argparse.ArgumentParser(description="Run NGCF.") parser.add_argument( "--weights_path", nargs="?", default="model/", help="Store model path." ) parser.add_argument( "--data_path", nargs="?", default="../Data/", help="Input data path." ) parser.add_argument( "--model_name", type=str, default="NGCF.pkl", help="Saved model name." ) parser.add_argument( "--dataset", nargs="?", default="gowalla", help="Choose a dataset from {gowalla, yelp2018, amazon-book}", ) parser.add_argument( "--verbose", type=int, default=1, help="Interval of evaluation." ) parser.add_argument( "--epoch", type=int, default=400, help="Number of epoch." ) parser.add_argument( "--embed_size", type=int, default=64, help="Embedding size." ) parser.add_argument( "--layer_size", nargs="?", default="[64,64,64]", help="Output sizes of every layer", ) parser.add_argument( "--batch_size", type=int, default=1024, help="Batch size." ) parser.add_argument( "--regs", nargs="?", default="[1e-5]", help="Regularizations." ) parser.add_argument( "--lr", type=float, default=0.0001, help="Learning rate." ) parser.add_argument( "--gpu", type=int, default=0, help="0 for NAIS_prod, 1 for NAIS_concat" ) parser.add_argument( "--mess_dropout", nargs="?", default="[0.1,0.1,0.1]", help="Keep probability w.r.t. message dropout (i.e., 1-dropout_ratio) for each deep layer. 1: no dropout.", ) parser.add_argument( "--Ks", nargs="?", default="[20, 40]", help="Output sizes of every layer", ) parser.add_argument( "--save_flag", type=int, default=1, help="0: Disable model saver, 1: Activate model saver", ) parser.add_argument( "--test_flag", nargs="?", default="part", help="Specify the test type from {part, full}, indicating whether the reference is done in mini-batch", ) parser.add_argument( "--report", type=int, default=0, help="0: Disable performance report w.r.t. sparsity levels, 1: Show performance report w.r.t. sparsity levels", ) return parser.parse_args() ================================================ FILE: examples/pytorch/NGCF/README.md ================================================ # DGL Implementation of the NGCF Model This DGL example implements the GNN model proposed in the paper [Neural Graph Collaborative Filtering](https://arxiv.org/abs/1905.08108). The author's codes of implementation is in [here](https://github.com/xiangwang1223/neural_graph_collaborative_filtering). A pytorch re-implementation can be found [here](https://github.com/huangtinglin/NGCF-PyTorch). Example implementor ---------------------- This example was implemented by [Kounianhua Du](https://github.com/KounianhuaDu) during her Software Dev Engineer Intern work at the AWS Shanghai AI Lab. The graph dataset used in this example --------------------------------------- Gowalla: This is the check-in dataset obtained from Gowalla, where users share their locations by checking-in. To ensure the quality of the dataset, we use the 10-core setting, i.e., retaining users and items with at least ten interactions. The dataset used can be found [here](https://github.com/xiangwang1223/neural_graph_collaborative_filtering/tree/master/Data). Statistics: - Users: 29858 - Items: 40981 - Interactions: 1027370 - Density: 0.00084 How to run example files -------------------------------- First to get the data, in the Data folder, run ```bash sh load_gowalla.sh ``` Then, in the NGCF folder, run ```bash python main.py --dataset gowalla --regs [1e-5] --embed_size 64 --layer_size [64,64,64] --lr 0.0001 --save_flag 1 --batch_size 1024 --epoch 400 --verbose 1 --mess_dropout [0.1,0.1,0.1] --gpu 0 ``` NOTE: Following the paper's setting, the node dropout is disabled. Performance ------------------------- The following results are the results in 400 epoches. **NGCF results** | Model | Paper (tensorflow) | ours (DGL) | | ------------- | -------------------------------- | --------------------------- | | recall@20 | 0.1569 | 0.1552 | | ndcg@20 | 0.1327 | 0.2707 | ================================================ FILE: examples/pytorch/P-GNN/README.md ================================================ # DGL Implementations of P-GNN This DGL example implements the GNN model proposed in the paper [Position-aware Graph Neural Networks](http://proceedings.mlr.press/v97/you19b/you19b.pdf). For the original implementation, see [here](https://github.com/JiaxuanYou/P-GNN). Contributor: [RecLusIve-F](https://github.com/RecLusIve-F) ## Requirements The codebase is implemented in Python 3.8. For version requirement of packages, see below. ``` dgl 0.7.2 numpy 1.21.2 torch 1.10.1 networkx 2.6.3 scikit-learn 1.0.2 ``` ## Instructions for experiments ### Link prediction ```bash # Communities-T python main.py --task link # Communities python main.py --task link --inductive ``` ### Link pair prediction ```bash # Communities python main.py --task link_pair --inductive ``` ## Performance ### Link prediction (Grid-T and Communities-T refer to the transductive learning setting of Grid and Communities) | Dataset | Communities-T | Communities | | :------------------------------: | :-----------: | :-----------: | | ROC AUC ( P-GNN-E-2L in Table 1) | 0.988 ± 0.003 | 0.985 ± 0.008 | | ROC AUC (DGL: P-GNN-E-2L) | 0.984 ± 0.010 | 0.991 ± 0.004 | ### Link pair prediction | Dataset | Communities | | :------------------------------: | :---------: | | ROC AUC ( P-GNN-E-2L in Table 1) | 1.0 ± 0.001 | | ROC AUC (DGL: P-GNN-E-2L) | 1.0 ± 0.000 | ================================================ FILE: examples/pytorch/P-GNN/main.py ================================================ import os import warnings import dgl import numpy as np import torch import torch.nn as nn from model import PGNN from sklearn.metrics import roc_auc_score from utils import get_dataset, preselect_anchor warnings.filterwarnings("ignore") def get_loss(p, data, out, loss_func, device, get_auc=True): edge_mask = np.concatenate( ( data["positive_edges_{}".format(p)], data["negative_edges_{}".format(p)], ), axis=-1, ) nodes_first = torch.index_select( out, 0, torch.from_numpy(edge_mask[0, :]).long().to(out.device) ) nodes_second = torch.index_select( out, 0, torch.from_numpy(edge_mask[1, :]).long().to(out.device) ) pred = torch.sum(nodes_first * nodes_second, dim=-1) label_positive = torch.ones( [ data["positive_edges_{}".format(p)].shape[1], ], dtype=pred.dtype, ) label_negative = torch.zeros( [ data["negative_edges_{}".format(p)].shape[1], ], dtype=pred.dtype, ) label = torch.cat((label_positive, label_negative)).to(device) loss = loss_func(pred, label) if get_auc: auc = roc_auc_score( label.flatten().cpu().numpy(), torch.sigmoid(pred).flatten().data.cpu().numpy(), ) return loss, auc else: return loss def train_model(data, model, loss_func, optimizer, device, g_data): model.train() out = model(g_data) loss = get_loss("train", data, out, loss_func, device, get_auc=False) optimizer.zero_grad() loss.backward() optimizer.step() optimizer.zero_grad() return g_data def eval_model(data, g_data, model, loss_func, device): model.eval() out = model(g_data) # train loss and auc tmp_loss, auc_train = get_loss("train", data, out, loss_func, device) loss_train = tmp_loss.cpu().data.numpy() # val loss and auc _, auc_val = get_loss("val", data, out, loss_func, device) # test loss and auc _, auc_test = get_loss("test", data, out, loss_func, device) return loss_train, auc_train, auc_val, auc_test def main(args): # The mean and standard deviation of the experiment results # are stored in the 'results' folder if not os.path.isdir("results"): os.mkdir("results") if torch.cuda.is_available(): device = "cuda:0" else: device = "cpu" print( "Learning Type: {}".format( ["Transductive", "Inductive"][args.inductive] ), "Task: {}".format(args.task), ) results = [] for repeat in range(args.repeat_num): data = get_dataset(args) # pre-sample anchor nodes and compute shortest distance values for all epochs ( g_list, anchor_eid_list, dist_max_list, edge_weight_list, ) = preselect_anchor(data, args) # model model = PGNN(input_dim=data["feature"].shape[1]).to(device) # loss optimizer = torch.optim.Adam( model.parameters(), lr=1e-2, weight_decay=5e-4 ) loss_func = nn.BCEWithLogitsLoss() best_auc_val = -1 best_auc_test = -1 for epoch in range(args.epoch_num): if epoch == 200: for param_group in optimizer.param_groups: param_group["lr"] /= 10 g = dgl.graph(g_list[epoch]) g.ndata["feat"] = torch.FloatTensor(data["feature"]) g.edata["sp_dist"] = torch.FloatTensor(edge_weight_list[epoch]) g_data = { "graph": g.to(device), "anchor_eid": anchor_eid_list[epoch], "dists_max": dist_max_list[epoch], } train_model(data, model, loss_func, optimizer, device, g_data) loss_train, auc_train, auc_val, auc_test = eval_model( data, g_data, model, loss_func, device ) if auc_val > best_auc_val: best_auc_val = auc_val best_auc_test = auc_test if epoch % args.epoch_log == 0: print( repeat, epoch, "Loss {:.4f}".format(loss_train), "Train AUC: {:.4f}".format(auc_train), "Val AUC: {:.4f}".format(auc_val), "Test AUC: {:.4f}".format(auc_test), "Best Val AUC: {:.4f}".format(best_auc_val), "Best Test AUC: {:.4f}".format(best_auc_test), ) results.append(best_auc_test) results = np.array(results) results_mean = np.mean(results).round(6) results_std = np.std(results).round(6) print("-----------------Final-------------------") print(results_mean, results_std) with open( "results/{}_{}_{}.txt".format( ["Transductive", "Inductive"][args.inductive], args.task, args.k_hop_dist, ), "w", ) as f: f.write("{}, {}\n".format(results_mean, results_std)) if __name__ == "__main__": from argparse import ArgumentParser parser = ArgumentParser() parser.add_argument( "--task", type=str, default="link", choices=["link", "link_pair"] ) parser.add_argument( "--inductive", action="store_true", help="Inductive learning or transductive learning", ) parser.add_argument( "--k_hop_dist", default=-1, type=int, help="K-hop shortest path distance, -1 means exact shortest path.", ) parser.add_argument("--epoch_num", type=int, default=2000) parser.add_argument("--repeat_num", type=int, default=10) parser.add_argument("--epoch_log", type=int, default=100) args = parser.parse_args() main(args) ================================================ FILE: examples/pytorch/P-GNN/model.py ================================================ import dgl.function as fn import torch import torch.nn as nn import torch.nn.functional as F class PGNN_layer(nn.Module): def __init__(self, input_dim, output_dim): super(PGNN_layer, self).__init__() self.input_dim = input_dim self.linear_hidden_u = nn.Linear(input_dim, output_dim) self.linear_hidden_v = nn.Linear(input_dim, output_dim) self.linear_out_position = nn.Linear(output_dim, 1) self.act = nn.ReLU() def forward(self, graph, feature, anchor_eid, dists_max): with graph.local_scope(): u_feat = self.linear_hidden_u(feature) v_feat = self.linear_hidden_v(feature) graph.srcdata.update({"u_feat": u_feat}) graph.dstdata.update({"v_feat": v_feat}) graph.apply_edges(fn.u_mul_e("u_feat", "sp_dist", "u_message")) graph.apply_edges(fn.v_add_e("v_feat", "u_message", "message")) messages = torch.index_select( graph.edata["message"], 0, torch.LongTensor(anchor_eid).to(feature.device), ) messages = messages.reshape( dists_max.shape[0], dists_max.shape[1], messages.shape[-1] ) messages = self.act(messages) # n*m*d out_position = self.linear_out_position(messages).squeeze( -1 ) # n*m_out out_structure = torch.mean(messages, dim=1) # n*d return out_position, out_structure class PGNN(nn.Module): def __init__(self, input_dim, feature_dim=32, dropout=0.5): super(PGNN, self).__init__() self.dropout = nn.Dropout(dropout) self.linear_pre = nn.Linear(input_dim, feature_dim) self.conv_first = PGNN_layer(feature_dim, feature_dim) self.conv_out = PGNN_layer(feature_dim, feature_dim) def forward(self, data): x = data["graph"].ndata["feat"] graph = data["graph"] x = self.linear_pre(x) x_position, x = self.conv_first( graph, x, data["anchor_eid"], data["dists_max"] ) x = self.dropout(x) x_position, x = self.conv_out( graph, x, data["anchor_eid"], data["dists_max"] ) x_position = F.normalize(x_position, p=2, dim=-1) return x_position ================================================ FILE: examples/pytorch/P-GNN/utils.py ================================================ import multiprocessing as mp import random from multiprocessing import get_context import networkx as nx import numpy as np import torch from tqdm.auto import tqdm def get_communities(remove_feature): community_size = 20 # Create 20 cliques (communities) of size 20, # then rewire a single edge in each clique to a node in an adjacent clique graph = nx.connected_caveman_graph(20, community_size) # Randomly rewire 1% edges node_list = list(graph.nodes) for u, v in graph.edges(): if random.random() < 0.01: x = random.choice(node_list) if graph.has_edge(u, x): continue graph.remove_edge(u, v) graph.add_edge(u, x) # remove self-loops graph.remove_edges_from(nx.selfloop_edges(graph)) edge_index = np.array(list(graph.edges)) # Add (i, j) for an edge (j, i) edge_index = np.concatenate((edge_index, edge_index[:, ::-1]), axis=0) edge_index = torch.from_numpy(edge_index).long().permute(1, 0) n = graph.number_of_nodes() label = np.zeros((n, n), dtype=int) for u in node_list: # the node IDs are simply consecutive integers from 0 for v in range(u): if u // community_size == v // community_size: label[u, v] = 1 if remove_feature: feature = torch.ones((n, 1)) else: rand_order = np.random.permutation(n) feature = np.identity(n)[:, rand_order] data = { "edge_index": edge_index, "feature": feature, "positive_edges": np.stack(np.nonzero(label)), "num_nodes": feature.shape[0], } return data def to_single_directed(edges): edges_new = np.zeros((2, edges.shape[1] // 2), dtype=int) j = 0 for i in range(edges.shape[1]): if edges[0, i] < edges[1, i]: edges_new[:, j] = edges[:, i] j += 1 return edges_new # each node at least remain in the new graph def split_edges(p, edges, data, non_train_ratio=0.2): e = edges.shape[1] edges = edges[:, np.random.permutation(e)] split1 = int((1 - non_train_ratio) * e) split2 = int((1 - non_train_ratio / 2) * e) data.update( { "{}_edges_train".format(p): edges[:, :split1], # 80% "{}_edges_val".format(p): edges[:, split1:split2], # 10% "{}_edges_test".format(p): edges[:, split2:], # 10% } ) def to_bidirected(edges): return np.concatenate((edges, edges[::-1, :]), axis=-1) def get_negative_edges(positive_edges, num_nodes, num_negative_edges): positive_edge_set = [] positive_edges = to_bidirected(positive_edges) for i in range(positive_edges.shape[1]): positive_edge_set.append(tuple(positive_edges[:, i])) positive_edge_set = set(positive_edge_set) negative_edges = np.zeros( (2, num_negative_edges), dtype=positive_edges.dtype ) for i in range(num_negative_edges): while True: mask_temp = tuple( np.random.choice(num_nodes, size=(2,), replace=False) ) if mask_temp not in positive_edge_set: negative_edges[:, i] = mask_temp break return negative_edges def get_pos_neg_edges(data, infer_link_positive=True): if infer_link_positive: data["positive_edges"] = to_single_directed(data["edge_index"].numpy()) split_edges("positive", data["positive_edges"], data) # resample edge mask link negative negative_edges = get_negative_edges( data["positive_edges"], data["num_nodes"], num_negative_edges=data["positive_edges"].shape[1], ) split_edges("negative", negative_edges, data) return data def shortest_path(graph, node_range, cutoff): dists_dict = {} for node in tqdm(node_range, leave=False): dists_dict[node] = nx.single_source_shortest_path_length( graph, node, cutoff ) return dists_dict def merge_dicts(dicts): result = {} for dictionary in dicts: result.update(dictionary) return result def all_pairs_shortest_path(graph, cutoff=None, num_workers=4): nodes = list(graph.nodes) random.shuffle(nodes) pool = mp.Pool(processes=num_workers) interval_size = len(nodes) / num_workers results = [ pool.apply_async( shortest_path, args=( graph, nodes[int(interval_size * i) : int(interval_size * (i + 1))], cutoff, ), ) for i in range(num_workers) ] output = [p.get() for p in results] dists_dict = merge_dicts(output) pool.close() pool.join() return dists_dict def precompute_dist_data(edge_index, num_nodes, approximate=0): """ Here dist is 1/real_dist, higher actually means closer, 0 means disconnected :return: """ graph = nx.Graph() edge_list = edge_index.transpose(1, 0).tolist() graph.add_edges_from(edge_list) n = num_nodes dists_array = np.zeros((n, n)) dists_dict = all_pairs_shortest_path( graph, cutoff=approximate if approximate > 0 else None ) node_list = graph.nodes() for node_i in node_list: shortest_dist = dists_dict[node_i] for node_j in node_list: dist = shortest_dist.get(node_j, -1) if dist != -1: dists_array[node_i, node_j] = 1 / (dist + 1) return dists_array def get_dataset(args): # Generate graph data data_info = get_communities(args.inductive) # Get positive and negative edges data = get_pos_neg_edges( data_info, infer_link_positive=True if args.task == "link" else False ) # Pre-compute shortest path length if args.task == "link": dists_removed = precompute_dist_data( data["positive_edges_train"], data["num_nodes"], approximate=args.k_hop_dist, ) data["dists"] = torch.from_numpy(dists_removed).float() data["edge_index"] = torch.from_numpy( to_bidirected(data["positive_edges_train"]) ).long() else: dists = precompute_dist_data( data["edge_index"].numpy(), data["num_nodes"], approximate=args.k_hop_dist, ) data["dists"] = torch.from_numpy(dists).float() return data def get_anchors(n): """Get a list of NumPy arrays, each of them is an anchor node set""" m = int(np.log2(n)) anchor_set_id = [] for i in range(m): anchor_size = int(n / np.exp2(i + 1)) for _ in range(m): anchor_set_id.append( np.random.choice(n, size=anchor_size, replace=False) ) return anchor_set_id def get_dist_max(anchor_set_id, dist): # N x K, N is number of nodes, K is the number of anchor sets dist_max = torch.zeros((dist.shape[0], len(anchor_set_id))) dist_argmax = torch.zeros((dist.shape[0], len(anchor_set_id))).long() for i in range(len(anchor_set_id)): temp_id = torch.as_tensor(anchor_set_id[i], dtype=torch.long) # Get reciprocal of shortest distance to each node in the i-th anchor set dist_temp = torch.index_select(dist, 1, temp_id) # For each node in the graph, find its closest anchor node in the set # and the reciprocal of shortest distance dist_max_temp, dist_argmax_temp = torch.max(dist_temp, dim=-1) dist_max[:, i] = dist_max_temp dist_argmax[:, i] = torch.index_select(temp_id, 0, dist_argmax_temp) return dist_max, dist_argmax def get_a_graph(dists_max, dists_argmax): src = [] dst = [] real_src = [] real_dst = [] edge_weight = [] dists_max = dists_max.numpy() for i in range(dists_max.shape[0]): # Get unique closest anchor nodes for node i across all anchor sets tmp_dists_argmax, tmp_dists_argmax_idx = np.unique( dists_argmax[i, :], True ) src.extend([i] * tmp_dists_argmax.shape[0]) real_src.extend([i] * dists_argmax[i, :].shape[0]) real_dst.extend(list(dists_argmax[i, :].numpy())) dst.extend(list(tmp_dists_argmax)) edge_weight.extend(dists_max[i, tmp_dists_argmax_idx].tolist()) eid_dict = {(u, v): i for i, (u, v) in enumerate(list(zip(dst, src)))} anchor_eid = [eid_dict.get((u, v)) for u, v in zip(real_dst, real_src)] g = (dst, src) return g, anchor_eid, edge_weight def get_graphs(data, anchor_sets): graphs = [] anchor_eids = [] dists_max_list = [] edge_weights = [] for anchor_set in tqdm(anchor_sets, leave=False): dists_max, dists_argmax = get_dist_max(anchor_set, data["dists"]) g, anchor_eid, edge_weight = get_a_graph(dists_max, dists_argmax) graphs.append(g) anchor_eids.append(anchor_eid) dists_max_list.append(dists_max) edge_weights.append(edge_weight) return graphs, anchor_eids, dists_max_list, edge_weights def merge_result(outputs): graphs = [] anchor_eids = [] dists_max_list = [] edge_weights = [] for g, anchor_eid, dists_max, edge_weight in outputs: graphs.extend(g) anchor_eids.extend(anchor_eid) dists_max_list.extend(dists_max) edge_weights.extend(edge_weight) return graphs, anchor_eids, dists_max_list, edge_weights def preselect_anchor(data, args, num_workers=4): pool = get_context("spawn").Pool(processes=num_workers) # Pre-compute anchor sets, a collection of anchor sets per epoch anchor_set_ids = [ get_anchors(data["num_nodes"]) for _ in range(args.epoch_num) ] interval_size = len(anchor_set_ids) / num_workers results = [ pool.apply_async( get_graphs, args=( data, anchor_set_ids[ int(interval_size * i) : int(interval_size * (i + 1)) ], ), ) for i in range(num_workers) ] output = [p.get() for p in results] graphs, anchor_eids, dists_max_list, edge_weights = merge_result(output) pool.close() pool.join() return graphs, anchor_eids, dists_max_list, edge_weights ================================================ FILE: examples/pytorch/TAHIN/TAHIN.py ================================================ import dgl import dgl.function as fn import torch import torch.nn as nn import torch.nn.functional as F from dgl.nn.pytorch import GATConv # Semantic attention in the metapath-based aggregation (the same as that in the HAN) class SemanticAttention(nn.Module): def __init__(self, in_size, hidden_size=128): super(SemanticAttention, self).__init__() self.project = nn.Sequential( nn.Linear(in_size, hidden_size), nn.Tanh(), nn.Linear(hidden_size, 1, bias=False), ) def forward(self, z): """ Shape of z: (N, M , D*K) N: number of nodes M: number of metapath patterns D: hidden_size K: number of heads """ w = self.project(z).mean(0) # (M, 1) beta = torch.softmax(w, dim=0) # (M, 1) beta = beta.expand((z.shape[0],) + beta.shape) # (N, M, 1) return (beta * z).sum(1) # (N, D * K) # Metapath-based aggregation (the same as the HANLayer) class HANLayer(nn.Module): def __init__( self, meta_path_patterns, in_size, out_size, layer_num_heads, dropout ): super(HANLayer, self).__init__() # One GAT layer for each meta path based adjacency matrix self.gat_layers = nn.ModuleList() for i in range(len(meta_path_patterns)): self.gat_layers.append( GATConv( in_size, out_size, layer_num_heads, dropout, dropout, activation=F.elu, allow_zero_in_degree=True, ) ) self.semantic_attention = SemanticAttention( in_size=out_size * layer_num_heads ) self.meta_path_patterns = list( tuple(meta_path_pattern) for meta_path_pattern in meta_path_patterns ) self._cached_graph = None self._cached_coalesced_graph = {} def forward(self, g, h): semantic_embeddings = [] # obtain metapath reachable graph if self._cached_graph is None or self._cached_graph is not g: self._cached_graph = g self._cached_coalesced_graph.clear() for meta_path_pattern in self.meta_path_patterns: self._cached_coalesced_graph[ meta_path_pattern ] = dgl.metapath_reachable_graph(g, meta_path_pattern) for i, meta_path_pattern in enumerate(self.meta_path_patterns): new_g = self._cached_coalesced_graph[meta_path_pattern] semantic_embeddings.append(self.gat_layers[i](new_g, h).flatten(1)) semantic_embeddings = torch.stack( semantic_embeddings, dim=1 ) # (N, M, D * K) return self.semantic_attention(semantic_embeddings) # (N, D * K) # Relational neighbor aggregation class RelationalAGG(nn.Module): def __init__(self, g, in_size, out_size, dropout=0.1): super(RelationalAGG, self).__init__() self.in_size = in_size self.out_size = out_size # Transform weights for different types of edges self.W_T = nn.ModuleDict( { name: nn.Linear(in_size, out_size, bias=False) for name in g.etypes } ) # Attention weights for different types of edges self.W_A = nn.ModuleDict( {name: nn.Linear(out_size, 1, bias=False) for name in g.etypes} ) # layernorm self.layernorm = nn.LayerNorm(out_size) # dropout layer self.dropout = nn.Dropout(dropout) def forward(self, g, feat_dict): funcs = {} for srctype, etype, dsttype in g.canonical_etypes: g.nodes[dsttype].data["h"] = feat_dict[ dsttype ] # nodes' original feature g.nodes[srctype].data["h"] = feat_dict[srctype] g.nodes[srctype].data["t_h"] = self.W_T[etype]( feat_dict[srctype] ) # src nodes' transformed feature # compute the attention numerator (exp) g.apply_edges(fn.u_mul_v("t_h", "h", "x"), etype=etype) g.edges[etype].data["x"] = torch.exp( self.W_A[etype](g.edges[etype].data["x"]) ) # first update to compute the attention denominator (\sum exp) funcs[etype] = (fn.copy_e("x", "m"), fn.sum("m", "att")) g.multi_update_all(funcs, "sum") funcs = {} for srctype, etype, dsttype in g.canonical_etypes: g.apply_edges( fn.e_div_v("x", "att", "att"), etype=etype ) # compute attention weights (numerator/denominator) funcs[etype] = ( fn.u_mul_e("h", "att", "m"), fn.sum("m", "h"), ) # \sum(h0*att) -> h1 # second update to obtain h1 g.multi_update_all(funcs, "sum") # apply activation, layernorm, and dropout feat_dict = {} for ntype in g.ntypes: feat_dict[ntype] = self.dropout( self.layernorm(F.relu_(g.nodes[ntype].data["h"])) ) # apply activation, layernorm, and dropout return feat_dict class TAHIN(nn.Module): def __init__( self, g, meta_path_patterns, in_size, out_size, num_heads, dropout ): super(TAHIN, self).__init__() # embeddings for different types of nodes, h0 self.initializer = nn.init.xavier_uniform_ self.feature_dict = nn.ParameterDict( { ntype: nn.Parameter( self.initializer(torch.empty(g.num_nodes(ntype), in_size)) ) for ntype in g.ntypes } ) # relational neighbor aggregation, this produces h1 self.RelationalAGG = RelationalAGG(g, in_size, out_size) # metapath-based aggregation modules for user and item, this produces h2 self.meta_path_patterns = meta_path_patterns # one HANLayer for user, one HANLayer for item self.hans = nn.ModuleDict( { key: HANLayer(value, in_size, out_size, num_heads, dropout) for key, value in self.meta_path_patterns.items() } ) # layers to combine h0, h1, and h2 # used to update node embeddings self.user_layer1 = nn.Linear( (num_heads + 1) * out_size, out_size, bias=True ) self.user_layer2 = nn.Linear(2 * out_size, out_size, bias=True) self.item_layer1 = nn.Linear( (num_heads + 1) * out_size, out_size, bias=True ) self.item_layer2 = nn.Linear(2 * out_size, out_size, bias=True) # layernorm self.layernorm = nn.LayerNorm(out_size) # network to score the node pairs self.pred = nn.Linear(out_size, out_size) self.dropout = nn.Dropout(dropout) self.fc = nn.Linear(out_size, 1) def forward(self, g, user_key, item_key, user_idx, item_idx): # relational neighbor aggregation, h1 h1 = self.RelationalAGG(g, self.feature_dict) # metapath-based aggregation, h2 h2 = {} for key in self.meta_path_patterns.keys(): h2[key] = self.hans[key](g, self.feature_dict[key]) # update node embeddings user_emb = torch.cat((h1[user_key], h2[user_key]), 1) item_emb = torch.cat((h1[item_key], h2[item_key]), 1) user_emb = self.user_layer1(user_emb) item_emb = self.item_layer1(item_emb) user_emb = self.user_layer2( torch.cat((user_emb, self.feature_dict[user_key]), 1) ) item_emb = self.item_layer2( torch.cat((item_emb, self.feature_dict[item_key]), 1) ) # Relu user_emb = F.relu_(user_emb) item_emb = F.relu_(item_emb) # layer norm user_emb = self.layernorm(user_emb) item_emb = self.layernorm(item_emb) # obtain users/items embeddings and their interactions user_feat = user_emb[user_idx] item_feat = item_emb[item_idx] interaction = user_feat * item_feat # score the node pairs pred = self.pred(interaction) pred = self.dropout(pred) # dropout pred = self.fc(pred) pred = torch.sigmoid(pred) return pred.squeeze(1) ================================================ FILE: examples/pytorch/TAHIN/data_loader.py ================================================ import os import pickle as pkl import random import dgl import numpy as np import torch from torch.utils.data import DataLoader, Dataset # Split data into train/eval/test def split_data(hg, etype_name): src, dst = hg.edges(etype=etype_name) user_item_src = src.numpy().tolist() user_item_dst = dst.numpy().tolist() num_link = len(user_item_src) pos_label = [1] * num_link pos_data = list(zip(user_item_src, user_item_dst, pos_label)) ui_adj = np.array(hg.adj_external(etype=etype_name).to_dense()) full_idx = np.where(ui_adj == 0) sample = random.sample(range(0, len(full_idx[0])), num_link) neg_label = [0] * num_link neg_data = list(zip(full_idx[0][sample], full_idx[1][sample], neg_label)) full_data = pos_data + neg_data random.shuffle(full_data) train_size = int(len(full_data) * 0.6) eval_size = int(len(full_data) * 0.2) test_size = len(full_data) - train_size - eval_size train_data = full_data[:train_size] eval_data = full_data[train_size : train_size + eval_size] test_data = full_data[ train_size + eval_size : train_size + eval_size + test_size ] train_data = np.array(train_data) eval_data = np.array(eval_data) test_data = np.array(test_data) return train_data, eval_data, test_data def process_amazon(root_path): # User-Item 3584 2753 50903 UIUI # Item-View 2753 3857 5694 UIVI # Item-Brand 2753 334 2753 UIBI # Item-Category 2753 22 5508 UICI # Construct graph from raw data. # load data of amazon data_path = os.path.join(root_path, "Amazon") if not (os.path.exists(data_path)): print( "Can not find amazon in {}, please download the dataset first.".format( data_path ) ) # item_view item_view_src = [] item_view_dst = [] with open(os.path.join(data_path, "item_view.dat")) as fin: for line in fin.readlines(): _line = line.strip().split(",") item, view = int(_line[0]), int(_line[1]) item_view_src.append(item) item_view_dst.append(view) # user_item user_item_src = [] user_item_dst = [] with open(os.path.join(data_path, "user_item.dat")) as fin: for line in fin.readlines(): _line = line.strip().split("\t") user, item, rate = int(_line[0]), int(_line[1]), int(_line[2]) if rate > 3: user_item_src.append(user) user_item_dst.append(item) # item_brand item_brand_src = [] item_brand_dst = [] with open(os.path.join(data_path, "item_brand.dat")) as fin: for line in fin.readlines(): _line = line.strip().split(",") item, brand = int(_line[0]), int(_line[1]) item_brand_src.append(item) item_brand_dst.append(brand) # item_category item_category_src = [] item_category_dst = [] with open(os.path.join(data_path, "item_category.dat")) as fin: for line in fin.readlines(): _line = line.strip().split(",") item, category = int(_line[0]), int(_line[1]) item_category_src.append(item) item_category_dst.append(category) # build graph hg = dgl.heterograph( { ("item", "iv", "view"): (item_view_src, item_view_dst), ("view", "vi", "item"): (item_view_dst, item_view_src), ("user", "ui", "item"): (user_item_src, user_item_dst), ("item", "iu", "user"): (user_item_dst, user_item_src), ("item", "ib", "brand"): (item_brand_src, item_brand_dst), ("brand", "bi", "item"): (item_brand_dst, item_brand_src), ("item", "ic", "category"): (item_category_src, item_category_dst), ("category", "ci", "item"): (item_category_dst, item_category_src), } ) print("Graph constructed.") # Split data into train/eval/test train_data, eval_data, test_data = split_data(hg, "ui") # delete the positive edges in eval/test data in the original graph train_pos = np.nonzero(train_data[:, 2]) train_pos_idx = train_pos[0] user_item_src_processed = train_data[train_pos_idx, 0] user_item_dst_processed = train_data[train_pos_idx, 1] edges_dict = { ("item", "iv", "view"): (item_view_src, item_view_dst), ("view", "vi", "item"): (item_view_dst, item_view_src), ("user", "ui", "item"): ( user_item_src_processed, user_item_dst_processed, ), ("item", "iu", "user"): ( user_item_dst_processed, user_item_src_processed, ), ("item", "ib", "brand"): (item_brand_src, item_brand_dst), ("brand", "bi", "item"): (item_brand_dst, item_brand_src), ("item", "ic", "category"): (item_category_src, item_category_dst), ("category", "ci", "item"): (item_category_dst, item_category_src), } nodes_dict = { "user": hg.num_nodes("user"), "item": hg.num_nodes("item"), "view": hg.num_nodes("view"), "brand": hg.num_nodes("brand"), "category": hg.num_nodes("category"), } hg_processed = dgl.heterograph( data_dict=edges_dict, num_nodes_dict=nodes_dict ) print("Graph processed.") # save the processed data with open(os.path.join(root_path, "amazon_hg.pkl"), "wb") as file: pkl.dump(hg_processed, file) with open(os.path.join(root_path, "amazon_train.pkl"), "wb") as file: pkl.dump(train_data, file) with open(os.path.join(root_path, "amazon_test.pkl"), "wb") as file: pkl.dump(test_data, file) with open(os.path.join(root_path, "amazon_eval.pkl"), "wb") as file: pkl.dump(eval_data, file) return hg_processed, train_data, eval_data, test_data def process_movielens(root_path): # User-Movie 943 1682 100000 UMUM # User-Age 943 8 943 UAUM # User-Occupation 943 21 943 UOUM # Movie-Genre 1682 18 2861 UMGM data_path = os.path.join(root_path, "Movielens") if not (os.path.exists(data_path)): print( "Can not find movielens in {}, please download the dataset first.".format( data_path ) ) # Construct graph from raw data. # movie_genre movie_genre_src = [] movie_genre_dst = [] with open(os.path.join(data_path, "movie_genre.dat")) as fin: for line in fin.readlines(): _line = line.strip().split("\t") movie, genre = int(_line[0]), int(_line[1]) movie_genre_src.append(movie) movie_genre_dst.append(genre) # user_movie user_movie_src = [] user_movie_dst = [] with open(os.path.join(data_path, "user_movie.dat")) as fin: for line in fin.readlines(): _line = line.strip().split("\t") user, item, rate = int(_line[0]), int(_line[1]), int(_line[2]) if rate > 3: user_movie_src.append(user) user_movie_dst.append(item) # user_occupation user_occupation_src = [] user_occupation_dst = [] with open(os.path.join(data_path, "user_occupation.dat")) as fin: for line in fin.readlines(): _line = line.strip().split("\t") user, occupation = int(_line[0]), int(_line[1]) user_occupation_src.append(user) user_occupation_dst.append(occupation) # user_age user_age_src = [] user_age_dst = [] with open(os.path.join(data_path, "user_age.dat")) as fin: for line in fin.readlines(): _line = line.strip().split("\t") user, age = int(_line[0]), int(_line[1]) user_age_src.append(user) user_age_dst.append(age) # build graph hg = dgl.heterograph( { ("movie", "mg", "genre"): (movie_genre_src, movie_genre_dst), ("genre", "gm", "movie"): (movie_genre_dst, movie_genre_src), ("user", "um", "movie"): (user_movie_src, user_movie_dst), ("movie", "mu", "user"): (user_movie_dst, user_movie_src), ("user", "uo", "occupation"): ( user_occupation_src, user_occupation_dst, ), ("occupation", "ou", "user"): ( user_occupation_dst, user_occupation_src, ), ("user", "ua", "age"): (user_age_src, user_age_dst), ("age", "au", "user"): (user_age_dst, user_age_src), } ) print("Graph constructed.") # Split data into train/eval/test train_data, eval_data, test_data = split_data(hg, "um") # delete the positive edges in eval/test data in the original graph train_pos = np.nonzero(train_data[:, 2]) train_pos_idx = train_pos[0] user_movie_src_processed = train_data[train_pos_idx, 0] user_movie_dst_processed = train_data[train_pos_idx, 1] edges_dict = { ("movie", "mg", "genre"): (movie_genre_src, movie_genre_dst), ("genre", "gm", "movie"): (movie_genre_dst, movie_genre_src), ("user", "um", "movie"): ( user_movie_src_processed, user_movie_dst_processed, ), ("movie", "mu", "user"): ( user_movie_dst_processed, user_movie_src_processed, ), ("user", "uo", "occupation"): ( user_occupation_src, user_occupation_dst, ), ("occupation", "ou", "user"): ( user_occupation_dst, user_occupation_src, ), ("user", "ua", "age"): (user_age_src, user_age_dst), ("age", "au", "user"): (user_age_dst, user_age_src), } nodes_dict = { "user": hg.num_nodes("user"), "movie": hg.num_nodes("movie"), "genre": hg.num_nodes("genre"), "occupation": hg.num_nodes("occupation"), "age": hg.num_nodes("age"), } hg_processed = dgl.heterograph( data_dict=edges_dict, num_nodes_dict=nodes_dict ) print("Graph processed.") # save the processed data with open(os.path.join(root_path, "movielens_hg.pkl"), "wb") as file: pkl.dump(hg_processed, file) with open(os.path.join(root_path, "movielens_train.pkl"), "wb") as file: pkl.dump(train_data, file) with open(os.path.join(root_path, "movielens_test.pkl"), "wb") as file: pkl.dump(test_data, file) with open(os.path.join(root_path, "movielens_eval.pkl"), "wb") as file: pkl.dump(eval_data, file) return hg_processed, train_data, eval_data, test_data class MyDataset(Dataset): def __init__(self, triple): self.triple = triple self.len = self.triple.shape[0] def __getitem__(self, index): return ( self.triple[index, 0], self.triple[index, 1], self.triple[index, 2].float(), ) def __len__(self): return self.len def load_data(dataset, batch_size=128, num_workers=10, root_path="./data"): if os.path.exists(os.path.join(root_path, dataset + "_train.pkl")): g_file = open(os.path.join(root_path, dataset + "_hg.pkl"), "rb") hg = pkl.load(g_file) g_file.close() train_set_file = open( os.path.join(root_path, dataset + "_train.pkl"), "rb" ) train_set = pkl.load(train_set_file) train_set_file.close() test_set_file = open( os.path.join(root_path, dataset + "_test.pkl"), "rb" ) test_set = pkl.load(test_set_file) test_set_file.close() eval_set_file = open( os.path.join(root_path, dataset + "_eval.pkl"), "rb" ) eval_set = pkl.load(eval_set_file) eval_set_file.close() else: if dataset == "movielens": hg, train_set, eval_set, test_set = process_movielens(root_path) elif dataset == "amazon": hg, train_set, eval_set, test_set = process_amazon(root_path) else: print("Available datasets: movielens, amazon.") raise NotImplementedError if dataset == "movielens": meta_paths = { "user": [["um", "mu"]], "movie": [["mu", "um"], ["mg", "gm"]], } user_key = "user" item_key = "movie" elif dataset == "amazon": meta_paths = { "user": [["ui", "iu"]], "item": [["iu", "ui"], ["ic", "ci"], ["ib", "bi"], ["iv", "vi"]], } user_key = "user" item_key = "item" else: print("Available datasets: movielens, amazon.") raise NotImplementedError train_set = torch.Tensor(train_set).long() eval_set = torch.Tensor(eval_set).long() test_set = torch.Tensor(test_set).long() train_set = MyDataset(train_set) train_loader = DataLoader( dataset=train_set, batch_size=batch_size, shuffle=True, num_workers=num_workers, ) eval_set = MyDataset(eval_set) eval_loader = DataLoader( dataset=eval_set, batch_size=batch_size, shuffle=True, num_workers=num_workers, ) test_set = MyDataset(test_set) test_loader = DataLoader( dataset=test_set, batch_size=batch_size, shuffle=True, num_workers=num_workers, ) return ( hg, train_loader, eval_loader, test_loader, meta_paths, user_key, item_key, ) ================================================ FILE: examples/pytorch/TAHIN/main.py ================================================ import argparse import pickle as pkl import dgl import numpy as np import torch import torch.nn as nn import torch.optim as optim from data_loader import load_data from TAHIN import TAHIN from utils import ( evaluate_acc, evaluate_auc, evaluate_f1_score, evaluate_logloss, ) def main(args): # step 1: Check device if args.gpu >= 0 and torch.cuda.is_available(): device = "cuda:{}".format(args.gpu) else: device = "cpu" # step 2: Load data ( g, train_loader, eval_loader, test_loader, meta_paths, user_key, item_key, ) = load_data(args.dataset, args.batch, args.num_workers, args.path) g = g.to(device) print("Data loaded.") # step 3: Create model and training components model = TAHIN( g, meta_paths, args.in_size, args.out_size, args.num_heads, args.dropout ) model = model.to(device) criterion = nn.BCELoss() optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd) print("Model created.") # step 4: Training print("Start training.") best_acc = 0.0 kill_cnt = 0 for epoch in range(args.epochs): # Training and validation using a full graph model.train() train_loss = [] for step, batch in enumerate(train_loader): user, item, label = [_.to(device) for _ in batch] logits = model.forward(g, user_key, item_key, user, item) # compute loss tr_loss = criterion(logits, label) train_loss.append(tr_loss) # backward optimizer.zero_grad() tr_loss.backward() optimizer.step() train_loss = torch.stack(train_loss).sum().cpu().item() model.eval() with torch.no_grad(): validate_loss = [] validate_acc = [] for step, batch in enumerate(eval_loader): user, item, label = [_.to(device) for _ in batch] logits = model.forward(g, user_key, item_key, user, item) # compute loss val_loss = criterion(logits, label) val_acc = evaluate_acc( logits.detach().cpu().numpy(), label.detach().cpu().numpy() ) validate_loss.append(val_loss) validate_acc.append(val_acc) validate_loss = torch.stack(validate_loss).sum().cpu().item() validate_acc = np.mean(validate_acc) # validate if validate_acc > best_acc: best_acc = validate_acc best_epoch = epoch torch.save(model.state_dict(), "TAHIN" + "_" + args.dataset) kill_cnt = 0 print("saving model...") else: kill_cnt += 1 if kill_cnt > args.early_stop: print("early stop.") print("best epoch:{}".format(best_epoch)) break print( "In epoch {}, Train Loss: {:.4f}, Valid Loss: {:.5}\n, Valid ACC: {:.5}".format( epoch, train_loss, validate_loss, validate_acc ) ) # test use the best model model.eval() with torch.no_grad(): model.load_state_dict( torch.load("TAHIN" + "_" + args.dataset, weights_only=False) ) test_loss = [] test_acc = [] test_auc = [] test_f1 = [] test_logloss = [] for step, batch in enumerate(test_loader): user, item, label = [_.to(device) for _ in batch] logits = model.forward(g, user_key, item_key, user, item) # compute loss loss = criterion(logits, label) acc = evaluate_acc( logits.detach().cpu().numpy(), label.detach().cpu().numpy() ) auc = evaluate_auc( logits.detach().cpu().numpy(), label.detach().cpu().numpy() ) f1 = evaluate_f1_score( logits.detach().cpu().numpy(), label.detach().cpu().numpy() ) log_loss = evaluate_logloss( logits.detach().cpu().numpy(), label.detach().cpu().numpy() ) test_loss.append(loss) test_acc.append(acc) test_auc.append(auc) test_f1.append(f1) test_logloss.append(log_loss) test_loss = torch.stack(test_loss).sum().cpu().item() test_acc = np.mean(test_acc) test_auc = np.mean(test_auc) test_f1 = np.mean(test_f1) test_logloss = np.mean(test_logloss) print( "Test Loss: {:.5}\n, Test ACC: {:.5}\n, AUC: {:.5}\n, F1: {:.5}\n, Logloss: {:.5}\n".format( test_loss, test_acc, test_auc, test_f1, test_logloss ) ) if __name__ == "__main__": parser = argparse.ArgumentParser( description="Parser For Arguments", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( "--dataset", default="movielens", help="Dataset to use, default: movielens", ) parser.add_argument( "--path", default="./data", help="Path to save the data" ) parser.add_argument("--model", default="TAHIN", help="Model Name") parser.add_argument("--batch", default=128, type=int, help="Batch size") parser.add_argument( "--gpu", type=int, default="0", help="Set GPU Ids : Eg: For CPU = -1, For Single GPU = 0", ) parser.add_argument( "--epochs", type=int, default=500, help="Maximum number of epochs" ) parser.add_argument( "--wd", type=float, default=0, help="L2 Regularization for Optimizer" ) parser.add_argument("--lr", type=float, default=0.001, help="Learning Rate") parser.add_argument( "--num_workers", type=int, default=10, help="Number of processes to construct batches", ) parser.add_argument( "--early_stop", default=15, type=int, help="Patience for early stop." ) parser.add_argument( "--in_size", default=128, type=int, help="Initial dimension size for entities.", ) parser.add_argument( "--out_size", default=128, type=int, help="Output dimension size for entities.", ) parser.add_argument( "--num_heads", default=1, type=int, help="Number of attention heads" ) parser.add_argument("--dropout", default=0.1, type=float, help="Dropout.") args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/pytorch/TAHIN/readme.md ================================================ # DGL Implementation of the TAHIN This DGL example implements the TAHIN module proposed in the paper [HCDIR](https://arxiv.org/pdf/2007.15293.pdf). Since the code and dataset have not been published yet, we implement its main idea and experiment on two other datasets. Example implementor ---------------------- This example was implemented by [KounianhuaDu](https://github.com/KounianhuaDu) during her software development intern time at the AWS Shanghai AI Lab. Dependencies ---------------------- - pytorch 1.7.1 - dgl 0.6.0 - scikit-learn 0.22.1 Datasets --------------------------------------- The datasets used can be downloaded from [here](https://github.com/librahu/HIN-Datasets-for-Recommendation-and-Network-Embedding). For the experiments, all the positive edges are fetched and the same number of negative edges are randomly sampled. The edges are then shuffled and splitted into train/validate/test at a ratio of 6:2:2. The positive edges that appear in the validation and test sets are then removed from the original graph. The original graph statistics: **Movielens** (Source : https://grouplens.org/datasets/movielens/) | Entity |#Entity | | :-------------:|:-------------:| | User | 943 | | Age | 8 | | Occupation | 21 | | Movie | 1,682 | | Genre | 18 | | Relation |#Relation | | :-------------: |:-------------:| | User - Movie | 100,000 | | User - User (KNN) | 47,150 | | User - Age | 943 | | User - Occupation | 943 | | Movie - Movie (KNN) | 82,798 | | Movie - Genre | 2,861 | **Amazon** (Source : http://jmcauley.ucsd.edu/data/amazon/) | Entity |#Entity | | :-------------:|:-------------:| | User | 6,170 | | Item | 2,753 | | View | 3,857 | | Category | 22 | | Brand | 334 | | Relation |#Relation | | :-------------: |:-------------:| | User - Item | 195,791 | | Item - View | 5,694 | | Item - Category | 5,508 | | Item - Brand | 2,753 | How to run -------------------------------- ```python python main.py --dataset amazon --gpu 0 ``` ```python python main.py --dataset movielens --gpu 0 ``` Performance ------------------------- **Results** | Dataset | Movielens | Amazon | |---------| ------------------------ | ------------------------ | | Metric | HAN / TAHIN | HAN / TAHIN | | AUC | 0.9297 / 0.9392 | 0.8470 / 0.8442 | | ACC | 0.8627 / 0.8683 | 0.7672 / 0.7619 | | F1 | 0.8631 / 0.8707 | 0.7628 / 0.7499 | | Logloss | 0.3689 / 0.3266 | 0.5311 / 0.5150 | ================================================ FILE: examples/pytorch/TAHIN/utils.py ================================================ from sklearn.metrics import accuracy_score, f1_score, log_loss, roc_auc_score def evaluate_auc(pred, label): res = roc_auc_score(y_score=pred, y_true=label) return res def evaluate_acc(pred, label): res = [] for _value in pred: res.append(1 if _value >= 0.5 else 0) return accuracy_score(y_pred=res, y_true=label) def evaluate_f1_score(pred, label): res = [] for _value in pred: res.append(1 if _value >= 0.5 else 0) return f1_score(y_pred=res, y_true=label) def evaluate_logloss(pred, label): res = log_loss(y_true=label, y_pred=pred, normalize=True) return res ================================================ FILE: examples/pytorch/appnp/README.md ================================================ Predict then Propagate: Graph Neural Networks meet Personalized PageRank (APPNP) ============ - Paper link: [Predict then Propagate: Graph Neural Networks meet Personalized PageRank](https://arxiv.org/abs/1810.05997) - Author's code repo: [https://github.com/klicperajo/ppnp](https://github.com/klicperajo/ppnp). Dependencies ------------ - PyTorch 0.4.1+ - requests ``bash pip install torch requests `` Code ----- The folder contains an implementation of APPNP (`appnp.py`). Results ------- Run with following (available dataset: "cora", "citeseer", "pubmed") ```bash python3 train.py --dataset cora --gpu 0 ``` * cora: 0.8370 (paper: 0.850) * citeseer: 0.715 (paper: 0.757) * pubmed: 0.793 (paper: 0.797) Experiments were done on dgl datasets (GCN settings) which are different from those used in the original implementation. (discrepancies are detailed in experimental section of the original paper) ================================================ FILE: examples/pytorch/appnp/appnp.py ================================================ """ APPNP implementation in DGL. References ---------- Paper: https://arxiv.org/abs/1810.05997 Author's code: https://github.com/klicperajo/ppnp """ import torch.nn as nn from dgl.nn.pytorch.conv import APPNPConv class APPNP(nn.Module): def __init__( self, g, in_feats, hiddens, n_classes, activation, feat_drop, edge_drop, alpha, k, ): super(APPNP, self).__init__() self.g = g self.layers = nn.ModuleList() # input layer self.layers.append(nn.Linear(in_feats, hiddens[0])) # hidden layers for i in range(1, len(hiddens)): self.layers.append(nn.Linear(hiddens[i - 1], hiddens[i])) # output layer self.layers.append(nn.Linear(hiddens[-1], n_classes)) self.activation = activation if feat_drop: self.feat_drop = nn.Dropout(feat_drop) else: self.feat_drop = lambda x: x self.propagate = APPNPConv(k, alpha, edge_drop) self.reset_parameters() def reset_parameters(self): for layer in self.layers: layer.reset_parameters() def forward(self, features): # prediction step h = features h = self.feat_drop(h) h = self.activation(self.layers[0](h)) for layer in self.layers[1:-1]: h = self.activation(layer(h)) h = self.layers[-1](self.feat_drop(h)) # propagation step h = self.propagate(self.g, h) return h ================================================ FILE: examples/pytorch/appnp/train.py ================================================ import argparse import time import dgl import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from appnp import APPNP from dgl.data import ( CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset, register_data_args, ) def evaluate(model, features, labels, mask): model.eval() with torch.no_grad(): logits = model(features) logits = logits[mask] labels = labels[mask] _, indices = torch.max(logits, dim=1) correct = torch.sum(indices == labels) return correct.item() * 1.0 / len(labels) def main(args): # load and preprocess dataset if args.dataset == "cora": data = CoraGraphDataset() elif args.dataset == "citeseer": data = CiteseerGraphDataset() elif args.dataset == "pubmed": data = PubmedGraphDataset() else: raise ValueError("Unknown dataset: {}".format(args.dataset)) g = data[0] if args.gpu < 0: cuda = False else: cuda = True g = g.to(args.gpu) features = g.ndata["feat"] labels = g.ndata["label"] train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] in_feats = features.shape[1] n_classes = data.num_classes n_edges = g.num_edges() print( """----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % ( n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item(), ) ) n_edges = g.num_edges() # add self loop g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) # create APPNP model model = APPNP( g, in_feats, args.hidden_sizes, n_classes, F.relu, args.in_drop, args.edge_drop, args.alpha, args.k, ) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.weight_decay ) # initialize graph mean = 0 for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: mean = (mean * (epoch - 3) + (time.time() - t0)) / (epoch - 2) acc = evaluate(model, features, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format( epoch, mean, loss.item(), acc, n_edges / mean / 1000, ) ) print() acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc)) if __name__ == "__main__": parser = argparse.ArgumentParser(description="APPNP") register_data_args(parser) parser.add_argument( "--in-drop", type=float, default=0.5, help="input feature dropout" ) parser.add_argument( "--edge-drop", type=float, default=0.5, help="edge propagation dropout" ) parser.add_argument("--gpu", type=int, default=-1, help="gpu") parser.add_argument("--lr", type=float, default=1e-2, help="learning rate") parser.add_argument( "--n-epochs", type=int, default=200, help="number of training epochs" ) parser.add_argument( "--hidden_sizes", type=int, nargs="+", default=[64], help="hidden unit sizes for appnp", ) parser.add_argument( "--k", type=int, default=10, help="Number of propagation steps" ) parser.add_argument( "--alpha", type=float, default=0.1, help="Teleport Probability" ) parser.add_argument( "--weight-decay", type=float, default=5e-4, help="Weight for L2 loss" ) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/pytorch/argo/README.md ================================================ # ARGO: An Auto-Tuning Runtime System for Scalable GNN Training on Multi-Core Processor ## Overview Graph Neural Network (GNN) training suffers from low scalability on multi-core processors. ARGO is a runtime system that offers scalable performance. The figure below shows an example of GNN training on a Xeon 8380H platform with 112 cores. Without ARGO, there is no performance improvement after applying more than 16 cores; we observe a similar scalability limit on a Xeon 6430L platform with 64 cores as well. However, with ARGO enabled, we are able to scale over 64 cores, allowing ARGO to speedup GNN training (in terms of epoch time) by up to 4.30x and 3.32x on a Xeon 8380H and a Xeon 6430L, respectively. ![ARGO](https://github.com/dmlc/dgl/blob/master/examples/pytorch/argo/argo_scale.png) This README includes how to: 1. [Installation](#1-installation) 2. [Run the example code](#2-running-the-example-GNN-program) 3. [Modify your own GNN program to enable ARGO.](#3-enabling-ARGO-on-your-own-GNN-program) ## 1. Installation 1. ARGO utilizes the scikit-optimize library for auto-tuning. Please install scikit-optimize to run ARGO: ```shell conda install -c conda-forge "scikit-optimize>=0.9.0" ``` or ```shell pip install scikit-optimize>=0.9 ``` ## 2. Running the example GNN program ### Usage ```shell python main.py --dataset ogbn-products --sampler shadow --model sage ``` Important Arguments: - `--dataset`: the training datasets. Available choices [ogbn-products, ogbn-papers100M, reddit, flickr, yelp] - `--sampler`: the mini-batch sampling algorithm. Available choices [shadow, neighbor] - `--model`: GNN model. Available choices [gcn, sage] - `--layer`: number of GNN layers. - `--fan_out`: number of fanout neighbors for each layer. - `--hidden`: hidden feature dimension. - `--batch_size`: the size of the mini-batch. ## 3. Enabling ARGO on your own GNN program In this section, we provide a step-by-step tutorial on how to enable ARGO on a DGL program. We use the ```ogb_example.py``` file in this repo as an example. > Note: we also provide the complete example file ```ogb_example_ARGO.py``` which followed the steps below to enable ARGO on ```ogb_example.py```. 1. First, include all necessary packages on top of the file. Please place your file and ```argo.py``` in the same directory. ```python import os import torch.distributed as dist from torch.nn.parallel import DistributedDataParallel import torch.multiprocessing as mp from argo import ARGO ``` 2. Setup PyTorch Distributed Data Parallel (DDP). 1. Add the initialization function on top of the training program, and wrap the ```model``` with the DDP wrapper ```python def train(...): dist.init_process_group('gloo', rank=rank, world_size=world_size) # newly added model = SAGE(...) # original code model = DistributedDataParallel(model) # newly added ... ``` 2. In the main program, add the following before launching the training function ```python os.environ['MASTER_ADDR'] = '127.0.0.1' os.environ['MASTER_PORT'] = '29501' mp.set_start_method('fork', force=True) train(args, device, data) # original code for launching the training function ``` 3. Enable ARGO by initializing the runtime system, and wrapping the training function ```python runtime = ARGO(n_search = 15, epoch = args.num_epochs, batch_size = args.batch_size) #initialization runtime.run(train, args=(args, device, data)) # wrap the training function ``` > ARGO takes three input paramters: number of searches ```n_search```, number of epochs, and the mini-batch size. Increasing ```n_search``` potentially leads to a better configuration with less epoch time; however, searching itself also causes extra overhead. We recommend setting ```n_search``` from 15 to 45 for an optimal overall performance. Details of ```n_search``` can be found in the paper. 4. Modify the input of the training function, by directly adding ARGO parameters after the original inputs. This is the original function: ```python def train(args, device, data): ``` Add ```rank, world_size, comp_core, load_core, counter, b_size, ep``` like this: ```python def train(args, device, data, rank, world_size, comp_core, load_core, counter, b_size, ep): ``` 5. Modify the ```dataloader``` function in the training function ```python dataloader = dgl.dataloading.DataLoader( g, train_nid, sampler, batch_size=b_size, # modified shuffle=True, drop_last=False, num_workers=len(load_core), # modified use_ddp = True) # newly added ``` 6. Enable core-binding by adding ```enable_cpu_affinity()``` before the training for-loop, and also change the number of epochs into the variable ```ep```: ```python with dataloader.enable_cpu_affinity(loader_cores=load_core, compute_cores=comp_core): for epoch in range(ep): # change num_epochs to ep ``` 7. Last step! Load the model before training and save it afterward. Original Program: ```python with dataloader.enable_cpu_affinity(loader_cores=load_core, compute_cores=comp_core): for epoch in range(ep): ... # training operations ``` Modified: ```python PATH = "model.pt" if counter[0] != 0: checkpoint = th.load(PATH) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) epoch = checkpoint['epoch'] loss = checkpoint['loss'] with dataloader.enable_cpu_affinity(loader_cores=load_core, compute_cores=comp_core): for epoch in range(ep): ... # training operations dist.barrier() if rank == 0: th.save({'epoch': counter[0], 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': loss, }, PATH) ``` 8. Done! You can now run your GNN program with ARGO enabled. ```shell python .py ``` ## Citation & Acknowledgement This work has been supported by the U.S. National Science Foundation (NSF) under grants CCF-1919289/SPX-2333009, CNS-2009057 and OAC-2209563, and the Semiconductor Research Corporation (SRC). ``` @INPROCEEDINGS{argo-ipdps24, author={Yi-Chien Lin and Yuyang Chen and Sameh Gobriel and Nilesh Jain and Gopi Krishna Jhaand and Viktor Prasanna}, booktitle={IEEE International Parallel and Distributed Processing Symposium (IPDPS)}, title={ARGO: An Auto-Tuning Runtime System for Scalable GNN Training on Multi-Core Processor}, year={2024}} ``` ================================================ FILE: examples/pytorch/argo/argo.py ================================================ """ ARGO: An Auto-Tuning Runtime System for Scalable GNN Training on Multi-Core Processor -------------------------------------------- Graph Neural Network (GNN) training suffers from low scalability on multi-core CPUs. Specificially, the performance often caps at 16 cores, and no improvement is observed when applying more than 16 cores. ARGO is a runtime system that offers scalable performance by overlapping the computation and communication during GNN training. With ARGO enabled, we are able to scale over 64 cores, allowing ARGO to speedup GNN training (in terms of epoch time) by up to 4.30x and 3.32x on a Xeon 8380H and a Xeon 6430L, respectively. -------------------------------------------- Paper Link: https://arxiv.org/abs/2402.03671 """ import time from typing import Callable, List, Tuple import dgl.multiprocessing as dmp import numpy as np import psutil from skopt import gp_minimize from skopt.space import Normalize def transform(self, X): X = np.asarray(X) if self.is_int: if np.any(np.round(X) > self.high): raise ValueError( "All integer values should" "be less than %f" % self.high ) if np.any(np.round(X) < self.low): raise ValueError( "All integer values should" "be greater than %f" % self.low ) else: if np.any(X > self.high + self._eps): raise ValueError("All values should" "be less than %f" % self.high) if np.any(X < self.low - self._eps): raise ValueError( "All values should" "be greater than %f" % self.low ) if (self.high - self.low) == 0.0: return X * 0.0 if self.is_int: return (np.round(X).astype(int) - self.low) / (self.high - self.low) else: return (X - self.low) / (self.high - self.low) def inverse_transform(self, X): X = np.asarray(X) if np.any(X > 1.0 + self._eps): raise ValueError("All values should be less than 1.0") if np.any(X < 0.0 - self._eps): raise ValueError("All values should be greater than 0.0") X_orig = X * (self.high - self.low) + self.low if self.is_int: return np.round(X_orig).astype(int) return X_orig # This is a workaround for scikit-optimize's incompatibility with NumPy, which results in an error:: # AttributeError: module 'numpy' has no attribute 'int' Normalize.transform = transform Normalize.inverse_transform = inverse_transform class ARGO: def __init__( self, n_search=10, epoch=200, batch_size=4096, space=[(2, 8), (1, 4), (1, 32)], random_state=1, ): """ Initialization Parameters ---------- n_search: int Number of configuration searches the auto-tuner will conduct epoch: int Number of epochs of GNN training batch_size: int Size of the mini-batch space: list[Tuple(int,int)] Range of the search space; [range of processes, range of samplers for each process, range of trainers for each process] random_state: int Number of random initializations before searching """ self.n_search = n_search self.epoch = epoch self.batch_size = batch_size self.space = space self.random_state = random_state self.acq_func = "EI" self.counter = [0] def core_binder( self, num_cpu_proc: int, n_samp: int, n_train: int, rank: int ) -> Tuple[List[int], List[int]]: """ Core Binder The Core Binder binds CPU cores to perform sampling (i.e., sampling cores) and model propagation (i.e., training cores). The actual binding is done using the CPU affinity function in the data_loader. The core_binder function here is used to produce the list of CPU IDs for the CPU affinity function. Parameters ---------- num_cpu_proc: int Number of processes instantiated n_samp: int Number of sampling cores for each process n_train: int Number of training cores for each process rank: int The rank of the current process Returns: Tuple[list[int], list[int]] ------- load_core: list[int] For a given process rank, the load_core specifies a list of CPU core IDs to be used for sampling, the length of load_core = n_samp. comp_core: list[int] For a given process rank, the comp_core specifies a list of CPU core IDs to be used for training, the length of comp_core = n_comp. .. note:: Each process is assigned with a unique list of sampling cores and training cores, and no CPU core will appear in two lists or more. """ load_core, comp_core = [], [] n = psutil.cpu_count(logical=False) size = num_cpu_proc num_of_samplers = n_samp load_core = list( range(n // size * rank, n // size * rank + num_of_samplers) ) comp_core = list( range( n // size * rank + num_of_samplers, n // size * rank + num_of_samplers + n_train, ) ) return load_core, comp_core def auto_tuning(self, train: Callable, args) -> List[int]: """ Auto-tuner The auto-tuner runs Bayesian Optimization (BO) to search for the optimal configuration (number of processes, samplers, trainers). During the search, the auto-tuner explores the design space by collecting the epoch time of various configurations. Specifically, the exploration is done by feeding the Multi-Process Engine with various configurations, and record the epoch time. After the searching is done, the optimal configuration will be used repeatedly until the end of model training. Parameters ---------- train: Callable The GNN training function. args: The inputs of the GNN training function. Returns ------- result: list[int] The optimal configurations (which leads to the shortest epoch time) found by running BO. - result[0]: number of processes to instantiate - result[1]: number of sampling cores for each process - result[2]: number of training cores for each process """ ep = 1 result = gp_minimize( lambda x: self.mp_engine(x, train, args, ep), dimensions=self.space, n_calls=self.n_search, random_state=self.random_state, acq_func=self.acq_func, ) return result def mp_engine(self, x: List[int], train: Callable, args, ep: int) -> float: """ Multi-Process Engine (MP Engine) The MP Engine launches multiple GNN training processes in parallel to overlap computation with communication. Such an approach effectively improves the utilization of the memory bandwidth and the CPU cores. The MP Engine also adjust the batch size according to the number of processes instantiated, so that the effective batch size remains the same as the original program without ARGO. Parameters ---------- x: list[int] Optimal configurations provided by the auto-tuner. - x[0]: number of processes to instantiate - x[1]: number of sampling cores for each process - x[2]: number of training cores for each process train: Callable The GNN training function. args: The inputs of the GNN training function. ep: int number of epochs. Returns ------- t: float The epoch time using the current configuration `x`. """ n_proc = x[0] n_samp = x[1] n_train = x[2] n_total = psutil.cpu_count(logical=False) if n_proc * (n_samp + n_train) > n_total: # handling corner cases n_proc = 2 n_samp = 2 n_train = (n_total // n_proc) - n_samp processes = [] cnt = self.counter b_size = self.batch_size // n_proc # adjust batch size tik = time.time() for i in range(n_proc): load_core, comp_core = self.core_binder(n_proc, n_samp, n_train, i) p = dmp.Process( target=train, args=(*args, i, n_proc, comp_core, load_core, cnt, b_size, ep), ) p.start() processes.append(p) for p in processes: p.join() t = time.time() - tik self.counter[0] = self.counter[0] + 1 return t def run(self, train, args): """ The "run" function launches ARGO to traing GNN model Step 1: run the auto-tuner to search for the optimal configuration Step 2: record the optimal configuration Step 3: use the optimal configuration repeatedly until the end of the model training Parameters ---------- train: Callable The GNN training function. args: The inputs of the GNN training function. """ result = self.auto_tuning(train, args) # Step 1 x = result.x # Step 2 self.mp_engine( x, train, args, ep=(self.epoch - self.n_search) ) # Step 3 ================================================ FILE: examples/pytorch/argo/main.py ================================================ import argparse import os import dgl import dgl.nn as dglnn import torch import torch.distributed as dist import torch.multiprocessing as mp import torch.nn as nn import torch.nn.functional as F from argo import ARGO from dgl.data import ( AsNodePredDataset, FlickrDataset, RedditDataset, YelpDataset, ) from dgl.dataloading import DataLoader, NeighborSampler, ShaDowKHopSampler from ogb.nodeproppred import DglNodePropPredDataset from torch.nn.parallel import DistributedDataParallel class GNN(nn.Module): def __init__( self, in_size, hid_size, out_size, num_layers=3, model_name="sage" ): super().__init__() self.layers = nn.ModuleList() # GraphSAGE-mean if model_name.lower() == "sage": self.layers.append(dglnn.SAGEConv(in_size, hid_size, "mean")) for i in range(num_layers - 2): self.layers.append(dglnn.SAGEConv(hid_size, hid_size, "mean")) self.layers.append(dglnn.SAGEConv(hid_size, out_size, "mean")) # GCN elif model_name.lower() == "gcn": kwargs = { "norm": "both", "weight": True, "bias": True, "allow_zero_in_degree": True, } self.layers.append(dglnn.GraphConv(in_size, hid_size, **kwargs)) for i in range(num_layers - 2): self.layers.append( dglnn.GraphConv(hid_size, hid_size, **kwargs) ) self.layers.append(dglnn.GraphConv(hid_size, out_size, **kwargs)) else: raise NotImplementedError self.dropout = nn.Dropout(0.5) self.hid_size = hid_size self.out_size = out_size def forward(self, blocks, x): h = x if hasattr(blocks, "__len__"): for l, (layer, block) in enumerate(zip(self.layers, blocks)): h = layer(block, h) if l != len(self.layers) - 1: h = F.relu(h) h = self.dropout(h) else: for l, layer in enumerate(self.layers): h = layer(blocks, h) if l != len(self.layers) - 1: h = F.relu(h) h = self.dropout(h) return h def _train(**kwargs): total_loss = 0 loader = kwargs["loader"] model = kwargs["model"] opt = kwargs["opt"] load_core = kwargs["load_core"] comp_core = kwargs["comp_core"] device = torch.device("cpu") with loader.enable_cpu_affinity( loader_cores=load_core, compute_cores=comp_core ): for it, (input_nodes, output_nodes, blocks) in enumerate(loader): if hasattr(blocks, "__len__"): x = blocks[0].srcdata["feat"].to(torch.float32) y = blocks[-1].dstdata["label"] else: x = blocks.srcdata["feat"].to(torch.float32) y = blocks.dstdata["label"] if kwargs["device"] == "cpu": # for papers100M y = y.type(torch.LongTensor) y_hat = model(blocks, x) else: y = y.type(torch.LongTensor).to(device) y_hat = model(blocks, x).to(device) try: loss = F.cross_entropy( y_hat[: output_nodes.shape[0]], y[: output_nodes.shape[0]] ) except: loss = F.binary_cross_entropy_with_logits( y_hat[: output_nodes.shape[0]].float(), y[: output_nodes.shape[0]].float(), reduction="sum", ) opt.zero_grad() loss.backward() opt.step() del input_nodes, output_nodes, blocks total_loss += loss.item() return total_loss def train( args, g, data, rank, world_size, comp_core, load_core, counter, b_size, ep ): num_classes, train_idx = data dist.init_process_group("gloo", rank=rank, world_size=world_size) device = torch.device("cpu") hidden = args.hidden # create GraphSAGE model in_size = g.ndata["feat"].shape[1] model = GNN( in_size, hidden, num_classes, num_layers=args.layer, model_name=args.model, ).to(device) model = DistributedDataParallel(model) num_of_samplers = len(load_core) # create loader drop_last, shuffle = True, True if args.sampler.lower() == "neighbor": sampler = NeighborSampler( [int(fanout) for fanout in args.fan_out.split(",")], prefetch_node_feats=["feat"], prefetch_labels=["label"], ) assert len(sampler.fanouts) == args.layer elif args.sampler.lower() == "shadow": sampler = ShaDowKHopSampler( [10, 5], output_device=device, prefetch_node_feats=["feat"], ) else: raise NotImplementedError train_dataloader = DataLoader( g, train_idx.to(device), sampler, device=device, batch_size=b_size, drop_last=drop_last, shuffle=shuffle, num_workers=num_of_samplers, use_ddp=True, ) # training loop opt = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=5e-4) params = { # training "loader": train_dataloader, "model": model, "opt": opt, # logging "rank": rank, "train_size": len(train_idx), "batch_size": b_size, "device": device, "process": world_size, } PATH = "model.pt" if counter[0] != 0: checkpoint = torch.load(PATH, weights_only=False) model.load_state_dict(checkpoint["model_state_dict"]) opt.load_state_dict(checkpoint["optimizer_state_dict"]) epoch = checkpoint["epoch"] loss = checkpoint["loss"] for epoch in range(ep): params["epoch"] = epoch model.train() params["load_core"] = load_core params["comp_core"] = comp_core loss = _train(**params) if rank == 0: print("loss:", loss) dist.barrier() EPOCH = counter[0] LOSS = loss if rank == 0: torch.save( { "epoch": EPOCH, "model_state_dict": model.state_dict(), "optimizer_state_dict": opt.state_dict(), "loss": LOSS, }, PATH, ) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--dataset", type=str, default="ogbn-products", choices=[ "ogbn-papers100M", "ogbn-products", "reddit", "yelp", "flickr", ], ) parser.add_argument("--batch_size", type=int, default=1024 * 4) parser.add_argument("--layer", type=int, default=3) parser.add_argument("--fan_out", type=str, default="15,10,5") parser.add_argument( "--sampler", type=str, default="neighbor", choices=["neighbor", "shadow"], ) parser.add_argument( "--model", type=str, default="sage", choices=["sage", "gcn"] ) parser.add_argument("--hidden", type=int, default=128) arguments = parser.parse_args() os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:512" if arguments.dataset in ["reddit", "flickr", "yelp"]: if arguments.dataset == "reddit": dataset = RedditDataset() elif arguments.dataset == "flickr": dataset = FlickrDataset() else: dataset = YelpDataset() g = dataset[0] train_mask = g.ndata["train_mask"] idx = [] for i in range(len(train_mask)): if train_mask[i]: idx.append(i) dataset.train_idx = torch.tensor(idx) else: dataset = AsNodePredDataset(DglNodePropPredDataset(arguments.dataset)) g = dataset[0] data = (dataset.num_classes, dataset.train_idx) in_size = g.ndata["feat"].shape[1] out_size = dataset.num_classes hidden_size = int(arguments.hidden) os.environ["MASTER_ADDR"] = "127.0.0.1" os.environ["MASTER_PORT"] = "29501" mp.set_start_method("fork", force=True) runtime = ARGO(n_search=10, epoch=20, batch_size=arguments.batch_size) runtime.run(train, args=(arguments, g, data)) ================================================ FILE: examples/pytorch/argo/ogb_example.py ================================================ """ This is modified version of: https://github.com/dmlc/dgl/blob/master/examples/pytorch/ogb/ogbn-products/graphsage/main.py """ import argparse import time import dgl import dgl.nn.pytorch as dglnn import numpy as np import torch as th import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import tqdm from ogb.nodeproppred import DglNodePropPredDataset class SAGE(nn.Module): def __init__( self, in_feats, n_hidden, n_classes, n_layers, activation, dropout ): super().__init__() self.n_layers = n_layers self.n_hidden = n_hidden self.n_classes = n_classes self.layers = nn.ModuleList() self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, "mean")) for i in range(1, n_layers - 1): self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, "mean")) self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, "mean")) self.dropout = nn.Dropout(dropout) self.activation = activation def forward(self, blocks, x): h = x for l, (layer, block) in enumerate(zip(self.layers, blocks)): # We need to first copy the representation of nodes on the RHS from the # appropriate nodes on the LHS. # Note that the shape of h is (num_nodes_LHS, D) and the shape of h_dst # would be (num_nodes_RHS, D) h_dst = h[: block.num_dst_nodes()] # Then we compute the updated representation on the RHS. # The shape of h now becomes (num_nodes_RHS, D) h = layer(block, (h, h_dst)) if l != len(self.layers) - 1: h = self.activation(h) h = self.dropout(h) return h def inference(self, g, x, device): """ Inference with the GraphSAGE model on full neighbors (i.e. without neighbor sampling). g : the entire graph. x : the input of entire node set. The inference code is written in a fashion that it could handle any number of nodes and layers. """ # During inference with sampling, multi-layer blocks are very inefficient because # lots of computations in the first few layers are repeated. # Therefore, we compute the representation of all nodes layer by layer. The nodes # on each layer are of course splitted in batches. # TODO: can we standardize this? for l, layer in enumerate(self.layers): y = th.zeros( g.num_nodes(), self.n_hidden if l != len(self.layers) - 1 else self.n_classes, ).to(device) sampler = dgl.dataloading.MultiLayerFullNeighborSampler(1) dataloader = dgl.dataloading.DataLoader( g, th.arange(g.num_nodes()), sampler, batch_size=args.batch_size, shuffle=True, drop_last=False, num_workers=args.num_workers, ) for input_nodes, output_nodes, blocks in tqdm.tqdm( dataloader, disable=None ): block = blocks[0].int().to(device) h = x[input_nodes] h_dst = h[: block.num_dst_nodes()] h = layer(block, (h, h_dst)) if l != len(self.layers) - 1: h = self.activation(h) h = self.dropout(h) y[output_nodes] = h x = y return y def compute_acc(pred, labels): """ Compute the accuracy of prediction given the labels. """ return (th.argmax(pred, dim=1) == labels).float().sum() / len(pred) def evaluate(model, g, nfeat, labels, val_nid, test_nid, device): """ Evaluate the model on the validation set specified by ``val_mask``. g : The entire graph. inputs : The features of all the nodes. labels : The labels of all the nodes. val_mask : A 0-1 mask indicating which nodes do we actually compute the accuracy for. device : The GPU device to evaluate on. """ model.eval() with th.no_grad(): pred = model.inference(g, nfeat, device) model.train() return ( compute_acc(pred[val_nid], labels[val_nid]), compute_acc(pred[test_nid], labels[test_nid]), pred, ) def load_subtensor(nfeat, labels, seeds, input_nodes): """ Extracts features and labels for a set of nodes. """ batch_inputs = nfeat[input_nodes] batch_labels = labels[seeds] return batch_inputs, batch_labels #### Entry point def train(args, device, data): # Unpack data train_nid, val_nid, test_nid, in_feats, labels, n_classes, nfeat, g = data # Create PyTorch DataLoader for constructing blocks sampler = dgl.dataloading.MultiLayerNeighborSampler( [int(fanout) for fanout in args.fan_out.split(",")] ) dataloader = dgl.dataloading.DataLoader( g, train_nid, sampler, batch_size=args.batch_size, shuffle=True, drop_last=False, num_workers=args.num_workers, ) # Define model and optimizer model = SAGE( in_feats, args.num_hidden, n_classes, args.num_layers, F.relu, args.dropout, ) model = model.to(device) loss_fcn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd) # Training loop avg = 0 iter_tput = [] best_eval_acc = 0 best_test_acc = 0 with dataloader.enable_cpu_affinity(): for epoch in range(args.num_epochs): tic = time.time() # Loop over the dataloader to sample the computation dependency graph as a list of # blocks. for step, (input_nodes, seeds, blocks) in enumerate(dataloader): tic_step = time.time() # copy block to gpu blocks = [blk.int().to(device) for blk in blocks] # Load the input features as well as output labels batch_inputs, batch_labels = load_subtensor( nfeat, labels, seeds, input_nodes ) # Compute loss and prediction batch_pred = model(blocks, batch_inputs) loss = loss_fcn(batch_pred, batch_labels) optimizer.zero_grad() loss.backward() optimizer.step() iter_tput.append(len(seeds) / (time.time() - tic_step)) if step % args.log_every == 0 and step != 0: acc = compute_acc(batch_pred, batch_labels) print( "Step {:05d} | Loss {:.4f} | Train Acc {:.4f} | Speed (samples/sec) {:.4f}".format( step, loss.item(), acc.item(), np.mean(iter_tput[3:]), ) ) toc = time.time() print("Epoch Time(s): {:.4f}".format(toc - tic)) avg += toc - tic if epoch % args.eval_every == 0 and epoch != 0: eval_acc, test_acc, pred = evaluate( model, g, nfeat, labels, val_nid, test_nid, device ) if args.save_pred: np.savetxt( args.save_pred + "%02d" % epoch, pred.argmax(1).cpu().numpy(), "%d", ) print("Eval Acc {:.4f}".format(eval_acc)) if eval_acc > best_eval_acc: best_eval_acc = eval_acc best_test_acc = test_acc print( "Best Eval Acc {:.4f} Test Acc {:.4f}".format( best_eval_acc, best_test_acc ) ) print("Avg epoch time: {}".format(avg / args.num_epochs)) return best_test_acc if __name__ == "__main__": argparser = argparse.ArgumentParser("multi-gpu training") argparser.add_argument( "--gpu", type=int, default=0, help="GPU device ID. Use -1 for CPU training", ) argparser.add_argument("--num-epochs", type=int, default=20) argparser.add_argument("--num-hidden", type=int, default=256) argparser.add_argument("--num-layers", type=int, default=3) argparser.add_argument("--fan-out", type=str, default="5,10,15") argparser.add_argument("--batch-size", type=int, default=1000) argparser.add_argument("--val-batch-size", type=int, default=10000) argparser.add_argument("--log-every", type=int, default=20) argparser.add_argument("--eval-every", type=int, default=1) argparser.add_argument("--lr", type=float, default=0.003) argparser.add_argument("--dropout", type=float, default=0.5) argparser.add_argument( "--dataset", type=str, default="ogbn-products", choices=["ogbn-papers100M", "ogbn-products"], ) argparser.add_argument( "--num-workers", type=int, default=4, help="Number of sampling processes. Use 0 for no extra process.", ) argparser.add_argument("--save-pred", type=str, default="") argparser.add_argument("--wd", type=float, default=0) args = argparser.parse_args() device = th.device("cpu") # load ogbn-products data data = DglNodePropPredDataset(args.dataset) splitted_idx = data.get_idx_split() train_idx, val_idx, test_idx = ( splitted_idx["train"], splitted_idx["valid"], splitted_idx["test"], ) graph, labels = data[0] nfeat = graph.ndata.pop("feat").to(device) labels = labels[:, 0].to(device) in_feats = nfeat.shape[1] n_classes = (labels.max() + 1).item() # Create csr/coo/csc formats before launching sampling processes # This avoids creating certain formats in each data loader process, which saves momory and CPU. graph.create_formats_() # Pack data data = ( train_idx, val_idx, test_idx, in_feats, labels, n_classes, nfeat, graph, ) test_acc = train(args, device, data).cpu().numpy() print("Test accuracy:", test_acc) ================================================ FILE: examples/pytorch/argo/ogb_example_ARGO.py ================================================ """ This is a modified version of: https://github.com/dmlc/dgl/blob/master/examples/pytorch/ogb/ogbn-products/graphsage/main.py This example shows how to enable ARGO to automatically instantiate multi-processing and adjust CPU core assignment to achieve better performance. """ import argparse import ctypes import os import time from multiprocessing import RawValue import dgl import dgl.nn.pytorch as dglnn import numpy as np import torch as th import torch.distributed as dist import torch.multiprocessing as mp import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import tqdm from argo import ARGO from ogb.nodeproppred import DglNodePropPredDataset from torch.nn.parallel import DistributedDataParallel avg_total = RawValue(ctypes.c_float, 0.0) class SAGE(nn.Module): def __init__( self, in_feats, n_hidden, n_classes, n_layers, activation, dropout ): super().__init__() self.n_layers = n_layers self.n_hidden = n_hidden self.n_classes = n_classes self.layers = nn.ModuleList() self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, "mean")) for i in range(1, n_layers - 1): self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, "mean")) self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, "mean")) self.dropout = nn.Dropout(dropout) self.activation = activation def forward(self, blocks, x): h = x for l, (layer, block) in enumerate(zip(self.layers, blocks)): # We need to first copy the representation of nodes on the RHS from the # appropriate nodes on the LHS. # Note that the shape of h is (num_nodes_LHS, D) and the shape of h_dst # would be (num_nodes_RHS, D) h_dst = h[: block.num_dst_nodes()] # Then we compute the updated representation on the RHS. # The shape of h now becomes (num_nodes_RHS, D) h = layer(block, (h, h_dst)) if l != len(self.layers) - 1: h = self.activation(h) h = self.dropout(h) return h def inference(self, g, x, device): """ Inference with the GraphSAGE model on full neighbors (i.e. without neighbor sampling). g : the entire graph. x : the input of entire node set. The inference code is written in a fashion that it could handle any number of nodes and layers. """ # During inference with sampling, multi-layer blocks are very inefficient because # lots of computations in the first few layers are repeated. # Therefore, we compute the representation of all nodes layer by layer. The nodes # on each layer are of course splitted in batches. # TODO: can we standardize this? for l, layer in enumerate(self.layers): y = th.zeros( g.num_nodes(), self.n_hidden if l != len(self.layers) - 1 else self.n_classes, ).to(device) sampler = dgl.dataloading.MultiLayerFullNeighborSampler(1) dataloader = dgl.dataloading.DataLoader( g, th.arange(g.num_nodes()), sampler, batch_size=args.batch_size, shuffle=True, drop_last=False, num_workers=args.num_workers, ) for input_nodes, output_nodes, blocks in tqdm.tqdm( dataloader, disable=None ): block = blocks[0].int().to(device) h = x[input_nodes] h_dst = h[: block.num_dst_nodes()] h = layer(block, (h, h_dst)) if l != len(self.layers) - 1: h = self.activation(h) h = self.dropout(h) y[output_nodes] = h x = y return y def compute_acc(pred, labels): """ Compute the accuracy of prediction given the labels. """ return (th.argmax(pred, dim=1) == labels).float().sum() / len(pred) def evaluate(model, g, nfeat, labels, val_nid, test_nid, device): """ Evaluate the model on the validation set specified by ``val_mask``. g : The entire graph. inputs : The features of all the nodes. labels : The labels of all the nodes. val_mask : A 0-1 mask indicating which nodes do we actually compute the accuracy for. device : The GPU device to evaluate on. """ model.eval() with th.no_grad(): pred = model.module.inference(g, nfeat, device) model.train() return ( compute_acc(pred[val_nid], labels[val_nid]), compute_acc(pred[test_nid], labels[test_nid]), pred, ) def load_subtensor(nfeat, labels, seeds, input_nodes): """ Extracts features and labels for a set of nodes. """ batch_inputs = nfeat[input_nodes] batch_labels = labels[seeds] return batch_inputs, batch_labels #### Entry point def train( args, device, data, rank, world_size, comp_core, load_core, counter, b_size, ep, ): dist.init_process_group("gloo", rank=rank, world_size=world_size) # Unpack data train_nid, val_nid, test_nid, in_feats, labels, n_classes, nfeat, g = data # Create PyTorch DataLoader for constructing blocks sampler = dgl.dataloading.MultiLayerNeighborSampler( [int(fanout) for fanout in args.fan_out.split(",")] ) dataloader = dgl.dataloading.DataLoader( g, train_nid, sampler, batch_size=b_size, shuffle=True, drop_last=False, num_workers=len(load_core), use_ddp=True, ) # Define model and optimizer model = SAGE( in_feats, args.num_hidden, n_classes, args.num_layers, F.relu, args.dropout, ) model = model.to(device) model = DistributedDataParallel(model) loss_fcn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd) # Training loop avg = 0 iter_tput = [] best_eval_acc = 0 best_test_acc = 0 PATH = "model.pt" if counter[0] != 0: checkpoint = th.load(PATH) model.load_state_dict(checkpoint["model_state_dict"]) optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) epoch = checkpoint["epoch"] loss = checkpoint["loss"] with dataloader.enable_cpu_affinity( loader_cores=load_core, compute_cores=comp_core ): for epoch in range(ep): tic = time.time() # Loop over the dataloader to sample the computation dependency graph as a list of # blocks. for step, (input_nodes, seeds, blocks) in enumerate(dataloader): tic_step = time.time() # copy block to gpu blocks = [blk.int().to(device) for blk in blocks] # Load the input features as well as output labels batch_inputs, batch_labels = load_subtensor( nfeat, labels, seeds, input_nodes ) # Compute loss and prediction batch_pred = model(blocks, batch_inputs) loss = loss_fcn(batch_pred, batch_labels) optimizer.zero_grad() loss.backward() optimizer.step() iter_tput.append(len(seeds) / (time.time() - tic_step)) if step % args.log_every == 0 and step != 0: acc = compute_acc(batch_pred, batch_labels) print( "Step {:05d} | Loss {:.4f} | Train Acc {:.4f} | Speed (samples/sec) {:.4f}".format( step, loss.item(), acc.item(), np.mean(iter_tput[3:]), ) ) toc = time.time() print("Epoch Time(s): {:.4f}".format(toc - tic)) if rank == 0: global avg_total avg_total.value += toc - tic avg += toc - tic if epoch % args.eval_every == 0 and epoch != 0: eval_acc, test_acc, pred = evaluate( model, g, nfeat, labels, val_nid, test_nid, device ) if args.save_pred: np.savetxt( args.save_pred + "%02d" % epoch, pred.argmax(1).cpu().numpy(), "%d", ) print("Eval Acc {:.4f}".format(eval_acc)) if eval_acc > best_eval_acc: best_eval_acc = eval_acc best_test_acc = test_acc print( "Best Eval Acc {:.4f} Test Acc {:.4f}".format( best_eval_acc, best_test_acc ) ) dist.barrier() if rank == 0: th.save( { "epoch": counter[0], "model_state_dict": model.state_dict(), "optimizer_state_dict": optimizer.state_dict(), "loss": loss, }, PATH, ) if args.num_epochs == counter[0] + epoch + 1: print( "Avg epoch time: {}".format(avg_total.value / args.num_epochs) ) print( "Avg epoch time after auto-tuning: {}".format(avg / (epoch + 1)) ) return best_test_acc if __name__ == "__main__": argparser = argparse.ArgumentParser("multi-gpu training") argparser.add_argument( "--gpu", type=int, default=0, help="GPU device ID. Use -1 for CPU training", ) argparser.add_argument("--num-epochs", type=int, default=20) argparser.add_argument("--num-hidden", type=int, default=256) argparser.add_argument("--num-layers", type=int, default=3) argparser.add_argument("--fan-out", type=str, default="5,10,15") argparser.add_argument("--batch-size", type=int, default=1000) argparser.add_argument("--val-batch-size", type=int, default=10000) argparser.add_argument("--log-every", type=int, default=20) argparser.add_argument("--eval-every", type=int, default=1) argparser.add_argument("--lr", type=float, default=0.003) argparser.add_argument("--dropout", type=float, default=0.5) argparser.add_argument( "--dataset", type=str, default="ogbn-products", choices=["ogbn-papers100M", "ogbn-products"], ) argparser.add_argument( "--num-workers", type=int, default=4, help="Number of sampling processes. Use 0 for no extra process.", ) argparser.add_argument("--save-pred", type=str, default="") argparser.add_argument("--wd", type=float, default=0) args = argparser.parse_args() device = th.device("cpu") # load ogbn-products data data = DglNodePropPredDataset(args.dataset) splitted_idx = data.get_idx_split() train_idx, val_idx, test_idx = ( splitted_idx["train"], splitted_idx["valid"], splitted_idx["test"], ) graph, labels = data[0] nfeat = graph.ndata.pop("feat").to(device) labels = labels[:, 0].to(device) in_feats = nfeat.shape[1] n_classes = (labels.max() + 1).item() # Create csr/coo/csc formats before launching sampling processes # This avoids creating certain formats in each data loader process, which saves momory and CPU. graph.create_formats_() # Pack data data = ( train_idx, val_idx, test_idx, in_feats, labels, n_classes, nfeat, graph, ) os.environ["MASTER_ADDR"] = "127.0.0.1" os.environ["MASTER_PORT"] = "29501" mp.set_start_method("fork", force=True) runtime = ARGO( n_search=15, epoch=args.num_epochs, batch_size=args.batch_size ) # initialization runtime.run(train, args=(args, device, data)) # wrap the training function ================================================ FILE: examples/pytorch/arma/README.md ================================================ # DGL Implementation of ARMA This DGL example implements the GNN model proposed in the paper [Graph Neural Networks with convolutional ARMA filters](https://arxiv.org/abs/1901.01343). Contributor: [xnuohz](https://github.com/xnuohz) ### Requirements The codebase is implemented in Python 3.6. For version requirement of packages, see below. ``` dgl numpy 1.19.5 networkx 2.5 scikit-learn 0.24.1 tqdm 4.56.0 torch 1.7.0 ``` ### The graph datasets used in this example ###### Node Classification The DGL's built-in Cora, Pubmed, Citeseer datasets. Dataset summary: | Dataset | #Nodes | #Edges | #Feats | #Classes | #Train Nodes | #Val Nodes | #Test Nodes | | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | | Cora | 2,708 | 10,556 | 1,433 | 7(single label) | 140 | 500 | 1000 | | Citeseer | 3,327 | 9,228 | 3,703 | 6(single label) | 120 | 500 | 1000 | | Pubmed | 19,717 | 88,651 | 500 | 3(single label) | 60 | 500 | 1000 | ### Usage ###### Dataset options ``` --dataset str The graph dataset name. Default is 'Cora'. ``` ###### GPU options ``` --gpu int GPU index. Default is -1, using CPU. ``` ###### Model options ``` --epochs int Number of training epochs. Default is 2000. --early-stopping int Early stopping rounds. Default is 100. --lr float Adam optimizer learning rate. Default is 0.01. --lamb float L2 regularization coefficient. Default is 0.0005. --hid-dim int Hidden layer dimensionalities. Default is 16. --num-stacks int Number of K. Default is 2. --num-layers int Number of T. Default is 1. --dropout float Dropout applied at all layers. Default is 0.75. ``` ###### Examples The following commands learn a neural network and predict on the test set. Train an ARMA model which follows the original hyperparameters on different datasets. ```bash # Cora: python citation.py --gpu 0 # Citeseer: python citation.py --gpu 0 --dataset Citeseer --num-stacks 3 # Pubmed: python citation.py --gpu 0 --dataset Pubmed --dropout 0.25 --num-stacks 1 ``` ### Performance ###### Node Classification | Dataset | Cora | Citeseer | Pubmed | | :-: | :-: | :-: | :-: | | Metrics(Table 1.Node classification accuracy) | 83.4±0.6 | 72.5±0.4 | 78.9±0.3 | | Metrics(PyG) | 82.3±0.5 | 70.9±1.1 | 78.3±0.8 | | Metrics(DGL) | 80.9±0.6 | 71.6±0.8 | 75.0±4.2 | ================================================ FILE: examples/pytorch/arma/citation.py ================================================ """ The main file to train an ARMA model using a full graph """ import argparse import copy import numpy as np import torch import torch.nn as nn import torch.optim as optim from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset from model import ARMA4NC from tqdm import trange def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # # Load from DGL dataset if args.dataset == "Cora": dataset = CoraGraphDataset() elif args.dataset == "Citeseer": dataset = CiteseerGraphDataset() elif args.dataset == "Pubmed": dataset = PubmedGraphDataset() else: raise ValueError("Dataset {} is invalid.".format(args.dataset)) graph = dataset[0] # check cuda device = ( f"cuda:{args.gpu}" if args.gpu >= 0 and torch.cuda.is_available() else "cpu" ) # retrieve the number of classes n_classes = dataset.num_classes # retrieve labels of ground truth labels = graph.ndata.pop("label").to(device).long() # Extract node features feats = graph.ndata.pop("feat").to(device) n_features = feats.shape[-1] # retrieve masks for train/validation/test train_mask = graph.ndata.pop("train_mask") val_mask = graph.ndata.pop("val_mask") test_mask = graph.ndata.pop("test_mask") train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze().to(device) val_idx = torch.nonzero(val_mask, as_tuple=False).squeeze().to(device) test_idx = torch.nonzero(test_mask, as_tuple=False).squeeze().to(device) graph = graph.to(device) # Step 2: Create model =================================================================== # model = ARMA4NC( in_dim=n_features, hid_dim=args.hid_dim, out_dim=n_classes, num_stacks=args.num_stacks, num_layers=args.num_layers, activation=nn.ReLU(), dropout=args.dropout, ).to(device) best_model = copy.deepcopy(model) # Step 3: Create training components ===================================================== # loss_fn = nn.CrossEntropyLoss() opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.lamb) # Step 4: training epoches =============================================================== # acc = 0 no_improvement = 0 epochs = trange(args.epochs, desc="Accuracy & Loss") for _ in epochs: # Training using a full graph model.train() logits = model(graph, feats) # compute loss train_loss = loss_fn(logits[train_idx], labels[train_idx]) train_acc = torch.sum( logits[train_idx].argmax(dim=1) == labels[train_idx] ).item() / len(train_idx) # backward opt.zero_grad() train_loss.backward() opt.step() # Validation using a full graph model.eval() with torch.no_grad(): valid_loss = loss_fn(logits[val_idx], labels[val_idx]) valid_acc = torch.sum( logits[val_idx].argmax(dim=1) == labels[val_idx] ).item() / len(val_idx) # Print out performance epochs.set_description( "Train Acc {:.4f} | Train Loss {:.4f} | Val Acc {:.4f} | Val loss {:.4f}".format( train_acc, train_loss.item(), valid_acc, valid_loss.item() ) ) if valid_acc < acc: no_improvement += 1 if no_improvement == args.early_stopping: print("Early stop.") break else: no_improvement = 0 acc = valid_acc best_model = copy.deepcopy(model) best_model.eval() logits = best_model(graph, feats) test_acc = torch.sum( logits[test_idx].argmax(dim=1) == labels[test_idx] ).item() / len(test_idx) print("Test Acc {:.4f}".format(test_acc)) return test_acc if __name__ == "__main__": """ ARMA Model Hyperparameters """ parser = argparse.ArgumentParser(description="ARMA GCN") # data source params parser.add_argument( "--dataset", type=str, default="Cora", help="Name of dataset." ) # cuda params parser.add_argument( "--gpu", type=int, default=-1, help="GPU index. Default: -1, using CPU." ) # training params parser.add_argument( "--epochs", type=int, default=2000, help="Training epochs." ) parser.add_argument( "--early-stopping", type=int, default=100, help="Patient epochs to wait before early stopping.", ) parser.add_argument("--lr", type=float, default=0.01, help="Learning rate.") parser.add_argument("--lamb", type=float, default=5e-4, help="L2 reg.") # model params parser.add_argument( "--hid-dim", type=int, default=16, help="Hidden layer dimensionalities." ) parser.add_argument( "--num-stacks", type=int, default=2, help="Number of K." ) parser.add_argument( "--num-layers", type=int, default=1, help="Number of T." ) parser.add_argument( "--dropout", type=float, default=0.75, help="Dropout applied at all layers.", ) args = parser.parse_args() print(args) acc_lists = [] for _ in range(100): acc_lists.append(main(args)) mean = np.around(np.mean(acc_lists, axis=0), decimals=3) std = np.around(np.std(acc_lists, axis=0), decimals=3) print("Total acc: ", acc_lists) print("mean", mean) print("std", std) ================================================ FILE: examples/pytorch/arma/model.py ================================================ import math import dgl.function as fn import torch import torch.nn as nn import torch.nn.functional as F def glorot(tensor): if tensor is not None: stdv = math.sqrt(6.0 / (tensor.size(-2) + tensor.size(-1))) tensor.data.uniform_(-stdv, stdv) def zeros(tensor): if tensor is not None: tensor.data.fill_(0) class ARMAConv(nn.Module): def __init__( self, in_dim, out_dim, num_stacks, num_layers, activation=None, dropout=0.0, bias=True, ): super(ARMAConv, self).__init__() self.in_dim = in_dim self.out_dim = out_dim self.K = num_stacks self.T = num_layers self.activation = activation self.dropout = nn.Dropout(p=dropout) # init weight self.w_0 = nn.ModuleDict( { str(k): nn.Linear(in_dim, out_dim, bias=False) for k in range(self.K) } ) # deeper weight self.w = nn.ModuleDict( { str(k): nn.Linear(out_dim, out_dim, bias=False) for k in range(self.K) } ) # v self.v = nn.ModuleDict( { str(k): nn.Linear(in_dim, out_dim, bias=False) for k in range(self.K) } ) # bias if bias: self.bias = nn.Parameter( torch.Tensor(self.K, self.T, 1, self.out_dim) ) else: self.register_parameter("bias", None) self.reset_parameters() def reset_parameters(self): for k in range(self.K): glorot(self.w_0[str(k)].weight) glorot(self.w[str(k)].weight) glorot(self.v[str(k)].weight) zeros(self.bias) def forward(self, g, feats): with g.local_scope(): init_feats = feats # assume that the graphs are undirected and graph.in_degrees() is the same as graph.out_degrees() degs = g.in_degrees().float().clamp(min=1) norm = torch.pow(degs, -0.5).to(feats.device).unsqueeze(1) output = [] for k in range(self.K): feats = init_feats for t in range(self.T): feats = feats * norm g.ndata["h"] = feats g.update_all(fn.copy_u("h", "m"), fn.sum("m", "h")) feats = g.ndata.pop("h") feats = feats * norm if t == 0: feats = self.w_0[str(k)](feats) else: feats = self.w[str(k)](feats) feats += self.dropout(self.v[str(k)](init_feats)) feats += self.v[str(k)](self.dropout(init_feats)) if self.bias is not None: feats += self.bias[k][t] if self.activation is not None: feats = self.activation(feats) output.append(feats) return torch.stack(output).mean(dim=0) class ARMA4NC(nn.Module): def __init__( self, in_dim, hid_dim, out_dim, num_stacks, num_layers, activation=None, dropout=0.0, ): super(ARMA4NC, self).__init__() self.conv1 = ARMAConv( in_dim=in_dim, out_dim=hid_dim, num_stacks=num_stacks, num_layers=num_layers, activation=activation, dropout=dropout, ) self.conv2 = ARMAConv( in_dim=hid_dim, out_dim=out_dim, num_stacks=num_stacks, num_layers=num_layers, activation=activation, dropout=dropout, ) self.dropout = nn.Dropout(p=dropout) def forward(self, g, feats): feats = F.relu(self.conv1(g, feats)) feats = self.dropout(feats) feats = self.conv2(g, feats) return feats ================================================ FILE: examples/pytorch/bgnn/BGNN.py ================================================ import itertools import time from collections import defaultdict as ddict import numpy as np import pandas as pd import torch import torch.nn.functional as F from catboost import CatBoostClassifier, CatBoostRegressor, Pool, sum_models from sklearn import preprocessing from sklearn.metrics import r2_score from tqdm import tqdm class BGNNPredictor: """ Description ----------- Boost GNN predictor for semi-supervised node classification or regression problems. Publication: https://arxiv.org/abs/2101.08543 Parameters ---------- gnn_model : nn.Module DGL implementation of GNN model. task: str, optional Regression or classification task. loss_fn : callable, optional Function that takes torch tensors, pred and true, and returns a scalar. trees_per_epoch : int, optional Number of GBDT trees to build each epoch. backprop_per_epoch : int, optional Number of backpropagation steps to make each epoch. lr : float, optional Learning rate of gradient descent optimizer. append_gbdt_pred : bool, optional Append GBDT predictions or replace original input node features. train_input_features : bool, optional Train original input node features. gbdt_depth : int, optional Depth of each tree in GBDT model. gbdt_lr : float, optional Learning rate of GBDT model. gbdt_alpha : int, optional Weight to combine previous and new GBDT trees. random_seed : int, optional random seed for GNN and GBDT models. Examples ---------- gnn_model = GAT(10, 20, num_heads=5), bgnn = BGNNPredictor(gnn_model) metrics = bgnn.fit(graph, X, y, train_mask, val_mask, test_mask, cat_features) """ def __init__( self, gnn_model, task="regression", loss_fn=None, trees_per_epoch=10, backprop_per_epoch=10, lr=0.01, append_gbdt_pred=True, train_input_features=False, gbdt_depth=6, gbdt_lr=0.1, gbdt_alpha=1, random_seed=0, ): self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu" ) self.model = gnn_model.to(self.device) self.task = task self.loss_fn = loss_fn self.trees_per_epoch = trees_per_epoch self.backprop_per_epoch = backprop_per_epoch self.lr = lr self.append_gbdt_pred = append_gbdt_pred self.train_input_features = train_input_features self.gbdt_depth = gbdt_depth self.gbdt_lr = gbdt_lr self.gbdt_alpha = gbdt_alpha self.random_seed = random_seed torch.manual_seed(random_seed) np.random.seed(random_seed) def init_gbdt_model(self, num_epochs, epoch): if self.task == "regression": catboost_model_obj = CatBoostRegressor catboost_loss_fn = "RMSE" else: if epoch == 0: # we predict multiclass probs at first epoch catboost_model_obj = CatBoostClassifier catboost_loss_fn = "MultiClass" else: # we predict the gradients for each class at epochs > 0 catboost_model_obj = CatBoostRegressor catboost_loss_fn = "MultiRMSE" return catboost_model_obj( iterations=num_epochs, depth=self.gbdt_depth, learning_rate=self.gbdt_lr, loss_function=catboost_loss_fn, random_seed=self.random_seed, nan_mode="Min", ) def fit_gbdt(self, pool, trees_per_epoch, epoch): gbdt_model = self.init_gbdt_model(trees_per_epoch, epoch) gbdt_model.fit(pool, verbose=False) return gbdt_model def append_gbdt_model(self, new_gbdt_model, weights): if self.gbdt_model is None: return new_gbdt_model return sum_models([self.gbdt_model, new_gbdt_model], weights=weights) def train_gbdt( self, gbdt_X_train, gbdt_y_train, cat_features, epoch, gbdt_trees_per_epoch, gbdt_alpha, ): pool = Pool(gbdt_X_train, gbdt_y_train, cat_features=cat_features) epoch_gbdt_model = self.fit_gbdt(pool, gbdt_trees_per_epoch, epoch) if epoch == 0 and self.task == "classification": self.base_gbdt = epoch_gbdt_model else: self.gbdt_model = self.append_gbdt_model( epoch_gbdt_model, weights=[1, gbdt_alpha] ) def update_node_features(self, node_features, X, original_X): # get predictions from gbdt model if self.task == "regression": predictions = np.expand_dims( self.gbdt_model.predict(original_X), axis=1 ) else: predictions = self.base_gbdt.predict_proba(original_X) if self.gbdt_model is not None: predictions_after_one = self.gbdt_model.predict(original_X) predictions += predictions_after_one # update node features with predictions if self.append_gbdt_pred: if self.train_input_features: predictions = np.append( node_features.detach().cpu().data[:, : -self.out_dim], predictions, axis=1, ) # replace old predictions with new predictions else: predictions = np.append( X, predictions, axis=1 ) # append original features with new predictions predictions = torch.from_numpy(predictions).to(self.device) node_features.data = predictions.float().data def update_gbdt_targets( self, node_features, node_features_before, train_mask ): return ( (node_features - node_features_before) .detach() .cpu() .numpy()[train_mask, -self.out_dim :] ) def init_node_features(self, X): node_features = torch.empty( X.shape[0], self.in_dim, requires_grad=True, device=self.device ) if self.append_gbdt_pred: node_features.data[:, : -self.out_dim] = torch.from_numpy( X.to_numpy(copy=True) ) return node_features def init_optimizer( self, node_features, optimize_node_features, learning_rate ): params = [self.model.parameters()] if optimize_node_features: params.append([node_features]) optimizer = torch.optim.Adam(itertools.chain(*params), lr=learning_rate) return optimizer def train_model(self, model_in, target_labels, train_mask, optimizer): y = target_labels[train_mask] self.model.train() logits = self.model(*model_in).squeeze() pred = logits[train_mask] if self.loss_fn is not None: loss = self.loss_fn(pred, y) else: if self.task == "regression": loss = torch.sqrt(F.mse_loss(pred, y)) elif self.task == "classification": loss = F.cross_entropy(pred, y.long()) else: raise NotImplemented( "Unknown task. Supported tasks: classification, regression." ) optimizer.zero_grad() loss.backward() optimizer.step() return loss def evaluate_model(self, logits, target_labels, mask): metrics = {} y = target_labels[mask] with torch.no_grad(): pred = logits[mask] if self.task == "regression": metrics["loss"] = torch.sqrt( F.mse_loss(pred, y).squeeze() + 1e-8 ) metrics["rmsle"] = torch.sqrt( F.mse_loss(torch.log(pred + 1), torch.log(y + 1)).squeeze() + 1e-8 ) metrics["mae"] = F.l1_loss(pred, y) metrics["r2"] = torch.Tensor( [r2_score(y.cpu().numpy(), pred.cpu().numpy())] ) elif self.task == "classification": metrics["loss"] = F.cross_entropy(pred, y.long()) metrics["accuracy"] = torch.Tensor( [(y == pred.max(1)[1]).sum().item() / y.shape[0]] ) return metrics def train_and_evaluate( self, model_in, target_labels, train_mask, val_mask, test_mask, optimizer, metrics, gnn_passes_per_epoch, ): loss = None for _ in range(gnn_passes_per_epoch): loss = self.train_model( model_in, target_labels, train_mask, optimizer ) self.model.eval() logits = self.model(*model_in).squeeze() train_results = self.evaluate_model(logits, target_labels, train_mask) val_results = self.evaluate_model(logits, target_labels, val_mask) test_results = self.evaluate_model(logits, target_labels, test_mask) for metric_name in train_results: metrics[metric_name].append( ( train_results[metric_name].detach().item(), val_results[metric_name].detach().item(), test_results[metric_name].detach().item(), ) ) return loss def update_early_stopping( self, metrics, epoch, best_metric, best_val_epoch, epochs_since_last_best_metric, metric_name, lower_better=False, ): train_metric, val_metric, test_metric = metrics[metric_name][-1] if (lower_better and val_metric < best_metric[1]) or ( not lower_better and val_metric > best_metric[1] ): best_metric = metrics[metric_name][-1] best_val_epoch = epoch epochs_since_last_best_metric = 0 else: epochs_since_last_best_metric += 1 return best_metric, best_val_epoch, epochs_since_last_best_metric def log_epoch( self, pbar, metrics, epoch, loss, epoch_time, logging_epochs, metric_name="loss", ): train_metric, val_metric, test_metric = metrics[metric_name][-1] if epoch and epoch % logging_epochs == 0: pbar.set_description( "Epoch {:05d} | Loss {:.3f} | Loss {:.3f}/{:.3f}/{:.3f} | Time {:.4f}".format( epoch, loss, train_metric, val_metric, test_metric, epoch_time, ) ) def fit( self, graph, X, y, train_mask, val_mask, test_mask, original_X=None, cat_features=None, num_epochs=100, patience=10, logging_epochs=1, metric_name="loss", ): """ :param graph : dgl.DGLGraph Input graph :param X : pd.DataFrame Input node features. Each column represents one input feature. Each row is a node. Values in dataframe are numerical, after preprocessing. :param y : pd.DataFrame Input node targets. Each column represents one target. Each row is a node (order of nodes should be the same as in X). :param train_mask : list[int] Node indexes (rows) that belong to train set. :param val_mask : list[int] Node indexes (rows) that belong to validation set. :param test_mask : list[int] Node indexes (rows) that belong to test set. :param original_X : pd.DataFrame, optional Input node features before preprocessing. Each column represents one input feature. Each row is a node. Values in dataframe can be of any type, including categorical (e.g. string, bool) or missing values (None). This is useful if you want to preprocess X with GBDT model. :param cat_features: list[int] Feature indexes (columns) which are categorical features. :param num_epochs : int Number of epochs to run. :param patience : int Number of epochs to wait until early stopping. :param logging_epochs : int Log every n epoch. :param metric_name : str Metric to use for early stopping. :param normalize_features : bool If to normalize original input features X (column wise). :param replace_na: bool If to replace missing values (None) in X. :return: metrics evaluated during training """ # initialize for early stopping and metrics if metric_name in ["r2", "accuracy"]: best_metric = [np.cfloat("-inf")] * 3 # for train/val/test else: best_metric = [np.cfloat("inf")] * 3 # for train/val/test best_val_epoch = 0 epochs_since_last_best_metric = 0 metrics = ddict(list) if cat_features is None: cat_features = [] if self.task == "regression": self.out_dim = y.shape[1] elif self.task == "classification": self.out_dim = len(set(y.iloc[test_mask, 0])) self.in_dim = ( self.out_dim + X.shape[1] if self.append_gbdt_pred else self.out_dim ) if original_X is None: original_X = X.copy() cat_features = [] gbdt_X_train = original_X.iloc[train_mask] gbdt_y_train = y.iloc[train_mask] gbdt_alpha = self.gbdt_alpha self.gbdt_model = None node_features = self.init_node_features(X) optimizer = self.init_optimizer( node_features, optimize_node_features=True, learning_rate=self.lr ) y = ( torch.from_numpy(y.to_numpy(copy=True)) .float() .squeeze() .to(self.device) ) graph = graph.to(self.device) pbar = tqdm(range(num_epochs)) for epoch in pbar: start2epoch = time.time() # gbdt part self.train_gbdt( gbdt_X_train, gbdt_y_train, cat_features, epoch, self.trees_per_epoch, gbdt_alpha, ) self.update_node_features(node_features, X, original_X) node_features_before = node_features.clone() model_in = (graph, node_features) loss = self.train_and_evaluate( model_in, y, train_mask, val_mask, test_mask, optimizer, metrics, self.backprop_per_epoch, ) gbdt_y_train = self.update_gbdt_targets( node_features, node_features_before, train_mask ) self.log_epoch( pbar, metrics, epoch, loss, time.time() - start2epoch, logging_epochs, metric_name=metric_name, ) # check early stopping ( best_metric, best_val_epoch, epochs_since_last_best_metric, ) = self.update_early_stopping( metrics, epoch, best_metric, best_val_epoch, epochs_since_last_best_metric, metric_name, lower_better=(metric_name not in ["r2", "accuracy"]), ) if patience and epochs_since_last_best_metric > patience: break if np.isclose(gbdt_y_train.sum(), 0.0): print("Node embeddings do not change anymore. Stopping...") break print( "Best {} at iteration {}: {:.3f}/{:.3f}/{:.3f}".format( metric_name, best_val_epoch, *best_metric ) ) return metrics def predict(self, graph, X, test_mask): graph = graph.to(self.device) node_features = torch.empty(X.shape[0], self.in_dim).to(self.device) self.update_node_features(node_features, X, X) logits = self.model(graph, node_features).squeeze() if self.task == "regression": return logits[test_mask] else: return logits[test_mask].max(1)[1] def plot_interactive( self, metrics, legend, title, logx=False, logy=False, metric_name="loss", start_from=0, ): import plotly.graph_objects as go metric_results = metrics[metric_name] xs = [list(range(len(metric_results)))] * len(metric_results[0]) ys = list(zip(*metric_results)) fig = go.Figure() for i in range(len(ys)): fig.add_trace( go.Scatter( x=xs[i][start_from:], y=ys[i][start_from:], mode="lines+markers", name=legend[i], ) ) fig.update_layout( title=title, title_x=0.5, xaxis_title="Epoch", yaxis_title=metric_name, font=dict( size=40, ), height=600, ) if logx: fig.update_layout(xaxis_type="log") if logy: fig.update_layout(yaxis_type="log") fig.show() ================================================ FILE: examples/pytorch/bgnn/Readme.md ================================================ # Instructions to download datasets: 1. Download datasets from here: https://www.dropbox.com/s/verx1evkykzli88/datasets.zip 2. Extract zip folder in this directory 3. Choose the dataset you wish in `run.py` file. # Details about BGNN model `run.py` implements a class for GNN model. You can select GAT, GCN, ChebNet, AGNN, or APPNP gnn models. Or you can provide your favorite GNN model. You can also pretrain your model or setup the hyperparameters you like. Hyperparameters of BGNN model. * `append_gbdt_pred` -- this decides whether to append GBDT predictions from GNN to original input features or to replace original input features with predictions of GBDT. This can be important for performance, so try both values, True and False. * `trees_per_epoch` and `backprop_per_epoch`. Values in the range 5-15 usually gives good results. The more, the longer training is. * `lr` is learning rate for GNN. 0.01-0.1 are good values to try. * `gbdt_lr` is learning rate for GBDT. Should be that important. * `gbdt_depth` number of levels in GBDT tree. 4-8 are good values. The more, the longer it trains. ================================================ FILE: examples/pytorch/bgnn/run.py ================================================ import json import os import numpy as np import pandas as pd import torch import torch.nn.functional as F from BGNN import BGNNPredictor from category_encoders import CatBoostEncoder from dgl.data.utils import load_graphs from dgl.nn.pytorch import ( AGNNConv as AGNNConvDGL, APPNPConv, ChebConv as ChebConvDGL, GATConv as GATConvDGL, GraphConv, ) from sklearn import preprocessing from torch.nn import Dropout, ELU, Linear, ReLU, Sequential class GNNModelDGL(torch.nn.Module): def __init__( self, in_dim, hidden_dim, out_dim, dropout=0.0, name="gat", residual=True, use_mlp=False, join_with_mlp=False, ): super(GNNModelDGL, self).__init__() self.name = name self.use_mlp = use_mlp self.join_with_mlp = join_with_mlp self.normalize_input_columns = True if name == "gat": self.l1 = GATConvDGL( in_dim, hidden_dim // 8, 8, feat_drop=dropout, attn_drop=dropout, residual=False, activation=F.elu, ) self.l2 = GATConvDGL( hidden_dim, out_dim, 1, feat_drop=dropout, attn_drop=dropout, residual=residual, activation=None, ) elif name == "gcn": self.l1 = GraphConv(in_dim, hidden_dim, activation=F.elu) self.l2 = GraphConv(hidden_dim, out_dim, activation=F.elu) self.drop = Dropout(p=dropout) elif name == "cheb": self.l1 = ChebConvDGL(in_dim, hidden_dim, k=3) self.l2 = ChebConvDGL(hidden_dim, out_dim, k=3) self.drop = Dropout(p=dropout) elif name == "agnn": self.lin1 = Sequential( Dropout(p=dropout), Linear(in_dim, hidden_dim), ELU() ) self.l1 = AGNNConvDGL(learn_beta=False) self.l2 = AGNNConvDGL(learn_beta=True) self.lin2 = Sequential( Dropout(p=dropout), Linear(hidden_dim, out_dim), ELU() ) elif name == "appnp": self.lin1 = Sequential( Dropout(p=dropout), Linear(in_dim, hidden_dim), ReLU(), Dropout(p=dropout), Linear(hidden_dim, out_dim), ) self.l1 = APPNPConv(k=10, alpha=0.1, edge_drop=0.0) def forward(self, graph, features): h = features if self.use_mlp: if self.join_with_mlp: h = torch.cat((h, self.mlp(features)), 1) else: h = self.mlp(features) if self.name == "gat": h = self.l1(graph, h).flatten(1) logits = self.l2(graph, h).mean(1) elif self.name in ["appnp"]: h = self.lin1(h) logits = self.l1(graph, h) elif self.name == "agnn": h = self.lin1(h) h = self.l1(graph, h) h = self.l2(graph, h) logits = self.lin2(h) elif self.name == "che3b": lambda_max = dgl.laplacian_lambda_max(graph) h = self.drop(h) h = self.l1(graph, h, lambda_max) logits = self.l2(graph, h, lambda_max) elif self.name == "gcn": h = self.drop(h) h = self.l1(graph, h) logits = self.l2(graph, h) return logits def read_input(input_folder): X = pd.read_csv(f"{input_folder}/X.csv") y = pd.read_csv(f"{input_folder}/y.csv") categorical_columns = [] if os.path.exists(f"{input_folder}/cat_features.txt"): with open(f"{input_folder}/cat_features.txt") as f: for line in f: if line.strip(): categorical_columns.append(line.strip()) cat_features = None if categorical_columns: columns = X.columns cat_features = np.where(columns.isin(categorical_columns))[0] for col in list(columns[cat_features]): X[col] = X[col].astype(str) gs, _ = load_graphs(f"{input_folder}/graph.dgl") graph = gs[0] with open(f"{input_folder}/masks.json") as f: masks = json.load(f) return graph, X, y, cat_features, masks def normalize_features(X, train_mask, val_mask, test_mask): min_max_scaler = preprocessing.MinMaxScaler() A = X.to_numpy(copy=True) A[train_mask] = min_max_scaler.fit_transform(A[train_mask]) A[val_mask + test_mask] = min_max_scaler.transform(A[val_mask + test_mask]) return pd.DataFrame(A, columns=X.columns).astype(float) def replace_na(X, train_mask): if X.isna().any().any(): return X.fillna(X.iloc[train_mask].min() - 1) return X def encode_cat_features(X, y, cat_features, train_mask, val_mask, test_mask): enc = CatBoostEncoder() A = X.to_numpy(copy=True) b = y.to_numpy(copy=True) A[np.ix_(train_mask, cat_features)] = enc.fit_transform( A[np.ix_(train_mask, cat_features)], b[train_mask] ) A[np.ix_(val_mask + test_mask, cat_features)] = enc.transform( A[np.ix_(val_mask + test_mask, cat_features)] ) A = A.astype(float) return pd.DataFrame(A, columns=X.columns) if __name__ == "__main__": # datasets can be found here: https://www.dropbox.com/s/verx1evkykzli88/datasets.zip # Read dataset input_folder = "datasets/avazu" graph, X, y, cat_features, masks = read_input(input_folder) train_mask, val_mask, test_mask = ( masks["0"]["train"], masks["0"]["val"], masks["0"]["test"], ) encoded_X = X.copy() normalizeFeatures = False replaceNa = True if len(cat_features): encoded_X = encode_cat_features( encoded_X, y, cat_features, train_mask, val_mask, test_mask ) if normalizeFeatures: encoded_X = normalize_features( encoded_X, train_mask, val_mask, test_mask ) if replaceNa: encoded_X = replace_na(encoded_X, train_mask) # specify parameters task = "regression" hidden_dim = 128 trees_per_epoch = 5 # 5-10 are good values to try backprop_per_epoch = 5 # 5-10 are good values to try lr = 0.1 # 0.01-0.1 are good values to try append_gbdt_pred = ( False # this can be important for performance (try True and False) ) train_input_features = False gbdt_depth = 6 gbdt_lr = 0.1 out_dim = ( y.shape[1] if task == "regression" else len(set(y.iloc[test_mask, 0])) ) in_dim = out_dim + X.shape[1] if append_gbdt_pred else out_dim # specify GNN model gnn_model = GNNModelDGL(in_dim, hidden_dim, out_dim) # initialize BGNN model bgnn = BGNNPredictor( gnn_model, task=task, loss_fn=None, trees_per_epoch=trees_per_epoch, backprop_per_epoch=backprop_per_epoch, lr=lr, append_gbdt_pred=append_gbdt_pred, train_input_features=train_input_features, gbdt_depth=gbdt_depth, gbdt_lr=gbdt_lr, ) # train metrics = bgnn.fit( graph, encoded_X, y, train_mask, val_mask, test_mask, original_X=X, cat_features=cat_features, num_epochs=100, patience=10, metric_name="loss", ) bgnn.plot_interactive( metrics, legend=["train", "valid", "test"], title="Avazu", metric_name="loss", ) ================================================ FILE: examples/pytorch/bgrl/README.md ================================================ # DGL Implementation of BGRL This DGL example implements the GNN experiment proposed in the paper [Large-Scale Representation Learning on Graphs via Bootstrapping](https://arxiv.org/abs/2102.06514). For the original implementation, see [here](https://github.com/nerdslab/bgrl). Contributor: [RecLusIve-F](https://github.com/RecLusIve-F) ### Requirements The codebase is implemented in Python 3.8. For version requirement of packages, see below. ``` dgl 0.8.3 numpy 1.21.2 torch 1.10.2 scikit-learn 1.0.2 ``` ### Dataset Dataset summary: | Dataset | Task | Nodes | Edges | Features | Classes | |:----------------:|:------------:|:------:|:-------:|:--------:|:---------------:| | WikiCS | Transductive | 11,701 | 216,123 | 300 | 10 | | Amazon Computers | Transductive | 13,752 | 245,861 | 767 | 10 | | Amazon Photos | Transductive | 7,650 | 119,081 | 745 | 8 | | Coauthor CS | Transductive | 18,333 | 81,894 | 6,805 | 15 | | Coauthor Physics | Transductive | 34,493 | 247,962 | 8,415 | 5 | | PPI(24 graphs) | Inductive | 56,944 | 818,716 | 50 | 121(multilabel) | ### Usage ##### Dataset options ``` --dataset str The graph dataset name. Default is 'amazon_photos'. ``` ##### Model options ``` --graph_encoder_layer list Convolutional layer hidden sizes. Default is [256, 128]. --predictor_hidden_size int Hidden size of predictor. Default is 512. ``` ##### Training options ``` --epochs int The number of training epochs. Default is 10000. --lr float The learning rate. Default is 0.00001. --weight_decay float The weight decay. Default is 0.00001. --mm float The momentum for moving average. Default is 0.99. --lr_warmup_epochs int Warmup period for learning rate scheduling. Default is 1000. --weights_dir str Where to save the weights. Default is '../weights'. ``` ##### Augmentation options ``` --drop_edge_p float Probability of edge dropout. Default is [0., 0.]. --feat_mask_p float Probability of node feature masking. Default is [0., 0.]. ``` ##### Evaluation options ``` --eval_epochs int Evaluate every eval_epochs. Default is 250. --num_eval_splits int Number of evaluation splits. Default is 20. --data_seed int Data split seed for evaluation. Default is 1. ``` ### Instructions for experiments ##### Transductive task ``` # Coauthor CS python main.py --dataset coauthor_cs --graph_encoder_layer 512 256 --drop_edge_p 0.3 0.2 --feat_mask_p 0.3 0.4 # Coauthor Physics python main.py --dataset coauthor_physics --graph_encoder_layer 256 128 --drop_edge_p 0.4 0.1 --feat_mask_p 0.1 0.4 # WikiCS python main.py --dataset wiki_cs --graph_encoder_layer 512 256 --drop_edge_p 0.2 0.3 --feat_mask_p 0.2 0.1 --lr 5e-4 # Amazon Photos python main.py --dataset amazon_photos --graph_encoder_layer 256 128 --drop_edge_p 0.4 0.1 --feat_mask_p 0.1 0.2 --lr 1e-4 # Amazon Computers python main.py --dataset amazon_computers --graph_encoder_layer 256 128 --drop_edge_p 0.5 0.4 --feat_mask_p 0.2 0.1 --lr 5e-4 ``` ##### Inductive task ``` # PPI python main.py --dataset ppi --graph_encoder_layer 512 512 --drop_edge_p 0.3 0.25 --feat_mask_p 0.25 0. --lr 5e-3 ``` ### Performance ##### Transductive Task | Dataset | WikiCS | Am. Comp. | Am. Photos | Co. CS | Co. Phy | |:----------------------:|:------------:|:------------:|:------------:|:------------:|:------------:| | Accuracy Reported | 79.98 ± 0.10 | 90.34 ± 0.19 | 93.17 ± 0.30 | 93.31 ± 0.13 | 95.73 ± 0.05 | | Accuracy Official Code | 79.94 | 90.62 | 93.45 | 93.42 | 95.74 | | Accuracy DGL | 80.00 | 90.64 | 93.34 | 93.76 | 95.79 | ##### Inductive Task | Dataset | PPI | |:----------------------:|:------------:| | Micro-F1 Reported | 69.41 ± 0.15 | | Accuracy Official Code | 68.83 | | Micro-F1 DGL | 68.65 | ##### Accuracy reported is over 20 random dataset splits and model initializations. Micro-F1 reported is over 20 random model initializations. ##### Accuracy official code and Accuracy DGL is only over 1 random dataset splits and model initialization. Micro-F1 official code and Micro-F1 DGL is only over 1 random model initialization. ================================================ FILE: examples/pytorch/bgrl/eval_function.py ================================================ import numpy as np import torch from sklearn import metrics from sklearn.linear_model import LogisticRegression from sklearn.model_selection import GridSearchCV, ShuffleSplit, train_test_split from sklearn.multiclass import OneVsRestClassifier from sklearn.preprocessing import normalize, OneHotEncoder def fit_logistic_regression(X, y, data_random_seed=1, repeat=1): # transform targets to one-hot vector one_hot_encoder = OneHotEncoder(categories="auto", sparse=False) y = one_hot_encoder.fit_transform(y.reshape(-1, 1)).astype(np.bool_) # normalize x X = normalize(X, norm="l2") # set random state, this will ensure the dataset will be split exactly the same throughout training rng = np.random.RandomState(data_random_seed) accuracies = [] for _ in range(repeat): # different random split after each repeat X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.8, random_state=rng ) # grid search with one-vs-rest classifiers logreg = LogisticRegression(solver="liblinear") c = 2.0 ** np.arange(-10, 11) cv = ShuffleSplit(n_splits=5, test_size=0.5) clf = GridSearchCV( estimator=OneVsRestClassifier(logreg), param_grid=dict(estimator__C=c), n_jobs=5, cv=cv, verbose=0, ) clf.fit(X_train, y_train) y_pred = clf.predict_proba(X_test) y_pred = np.argmax(y_pred, axis=1) y_pred = one_hot_encoder.transform(y_pred.reshape(-1, 1)).astype( np.bool_ ) test_acc = metrics.accuracy_score(y_test, y_pred) accuracies.append(test_acc) return accuracies def fit_logistic_regression_preset_splits( X, y, train_mask, val_mask, test_mask ): # transform targets to one-hot vector one_hot_encoder = OneHotEncoder(categories="auto", sparse=False) y = one_hot_encoder.fit_transform(y.reshape(-1, 1)).astype(np.bool_) # normalize x X = normalize(X, norm="l2") accuracies = [] for split_id in range(train_mask.shape[1]): # get train/val/test masks tmp_train_mask, tmp_val_mask = ( train_mask[:, split_id], val_mask[:, split_id], ) # make custom cv X_train, y_train = X[tmp_train_mask], y[tmp_train_mask] X_val, y_val = X[tmp_val_mask], y[tmp_val_mask] X_test, y_test = X[test_mask], y[test_mask] # grid search with one-vs-rest classifiers best_test_acc, best_acc = 0, 0 for c in 2.0 ** np.arange(-10, 11): clf = OneVsRestClassifier( LogisticRegression(solver="liblinear", C=c) ) clf.fit(X_train, y_train) y_pred = clf.predict_proba(X_val) y_pred = np.argmax(y_pred, axis=1) y_pred = one_hot_encoder.transform(y_pred.reshape(-1, 1)).astype( np.bool_ ) val_acc = metrics.accuracy_score(y_val, y_pred) if val_acc > best_acc: best_acc = val_acc y_pred = clf.predict_proba(X_test) y_pred = np.argmax(y_pred, axis=1) y_pred = one_hot_encoder.transform( y_pred.reshape(-1, 1) ).astype(np.bool_) best_test_acc = metrics.accuracy_score(y_test, y_pred) accuracies.append(best_test_acc) return accuracies def fit_ppi_linear( num_classes, train_data, val_data, test_data, device, repeat=1 ): r""" Trains a linear layer on top of the representations. This function is specific to the PPI dataset, which has multiple labels. """ def train(classifier, train_data, optimizer): classifier.train() x, label = train_data x, label = x.to(device), label.to(device) for step in range(100): # forward optimizer.zero_grad() pred_logits = classifier(x) # loss and backprop loss = criterion(pred_logits, label) loss.backward() optimizer.step() def test(classifier, data): classifier.eval() x, label = data label = label.cpu().numpy().squeeze() # feed to network and classifier with torch.no_grad(): pred_logits = classifier(x.to(device)) pred_class = (pred_logits > 0).float().cpu().numpy() return ( metrics.f1_score(label, pred_class, average="micro") if pred_class.sum() > 0 else 0 ) num_feats = train_data[0].size(1) criterion = torch.nn.BCEWithLogitsLoss() # normalization mean, std = train_data[0].mean(0, keepdim=True), train_data[0].std( 0, unbiased=False, keepdim=True ) train_data[0] = (train_data[0] - mean) / std val_data[0] = (val_data[0] - mean) / std test_data[0] = (test_data[0] - mean) / std best_val_f1 = [] test_f1 = [] for _ in range(repeat): tmp_best_val_f1 = 0 tmp_test_f1 = 0 for weight_decay in 2.0 ** np.arange(-10, 11, 2): classifier = torch.nn.Linear(num_feats, num_classes).to(device) optimizer = torch.optim.AdamW( params=classifier.parameters(), lr=0.01, weight_decay=weight_decay, ) train(classifier, train_data, optimizer) val_f1 = test(classifier, val_data) if val_f1 > tmp_best_val_f1: tmp_best_val_f1 = val_f1 tmp_test_f1 = test(classifier, test_data) best_val_f1.append(tmp_best_val_f1) test_f1.append(tmp_test_f1) return [best_val_f1], [test_f1] ================================================ FILE: examples/pytorch/bgrl/main.py ================================================ import copy import os import warnings import dgl import numpy as np import torch from eval_function import ( fit_logistic_regression, fit_logistic_regression_preset_splits, fit_ppi_linear, ) from model import ( BGRL, compute_representations, GCN, GraphSAGE_GCN, MLP_Predictor, ) from torch.nn.functional import cosine_similarity from torch.optim import AdamW from tqdm import tqdm from utils import CosineDecayScheduler, get_dataset, get_graph_drop_transform warnings.filterwarnings("ignore") def train( step, model, optimizer, lr_scheduler, mm_scheduler, transform_1, transform_2, data, args, ): model.train() # update learning rate lr = lr_scheduler.get(step) for param_group in optimizer.param_groups: param_group["lr"] = lr # update momentum mm = 1 - mm_scheduler.get(step) # forward optimizer.zero_grad() x1, x2 = transform_1(data), transform_2(data) if args.dataset != "ppi": x1, x2 = dgl.add_self_loop(x1), dgl.add_self_loop(x2) q1, y2 = model(x1, x2) q2, y1 = model(x2, x1) loss = ( 2 - cosine_similarity(q1, y2.detach(), dim=-1).mean() - cosine_similarity(q2, y1.detach(), dim=-1).mean() ) loss.backward() # update online network optimizer.step() # update target network model.update_target_network(mm) return loss.item() def eval(model, dataset, device, args, train_data, val_data, test_data): # make temporary copy of encoder tmp_encoder = copy.deepcopy(model.online_encoder).eval() val_scores = None if args.dataset == "ppi": train_data = compute_representations(tmp_encoder, train_data, device) val_data = compute_representations(tmp_encoder, val_data, device) test_data = compute_representations(tmp_encoder, test_data, device) num_classes = train_data[1].shape[1] val_scores, test_scores = fit_ppi_linear( num_classes, train_data, val_data, test_data, device, args.num_eval_splits, ) elif args.dataset != "wiki_cs": representations, labels = compute_representations( tmp_encoder, dataset, device ) test_scores = fit_logistic_regression( representations.cpu().numpy(), labels.cpu().numpy(), data_random_seed=args.data_seed, repeat=args.num_eval_splits, ) else: g = dataset[0] train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] representations, labels = compute_representations( tmp_encoder, dataset, device ) test_scores = fit_logistic_regression_preset_splits( representations.cpu().numpy(), labels.cpu().numpy(), train_mask, val_mask, test_mask, ) return val_scores, test_scores def main(args): # use CUDA_VISIBLE_DEVICES to select gpu device = ( torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") ) print("Using device:", device) dataset, train_data, val_data, test_data = get_dataset(args.dataset) g = dataset[0] g = g.to(device) input_size, representation_size = ( g.ndata["feat"].size(1), args.graph_encoder_layer[-1], ) # prepare transforms transform_1 = get_graph_drop_transform( drop_edge_p=args.drop_edge_p[0], feat_mask_p=args.feat_mask_p[0] ) transform_2 = get_graph_drop_transform( drop_edge_p=args.drop_edge_p[1], feat_mask_p=args.feat_mask_p[1] ) # scheduler lr_scheduler = CosineDecayScheduler( args.lr, args.lr_warmup_epochs, args.epochs ) mm_scheduler = CosineDecayScheduler(1 - args.mm, 0, args.epochs) # build networks if args.dataset == "ppi": encoder = GraphSAGE_GCN([input_size] + args.graph_encoder_layer) else: encoder = GCN([input_size] + args.graph_encoder_layer) predictor = MLP_Predictor( representation_size, representation_size, hidden_size=args.predictor_hidden_size, ) model = BGRL(encoder, predictor).to(device) # optimizer optimizer = AdamW( model.trainable_parameters(), lr=args.lr, weight_decay=args.weight_decay ) # train for epoch in tqdm(range(1, args.epochs + 1), desc=" - (Training) "): train( epoch - 1, model, optimizer, lr_scheduler, mm_scheduler, transform_1, transform_2, g, args, ) if epoch % args.eval_epochs == 0: val_scores, test_scores = eval( model, dataset, device, args, train_data, val_data, test_data ) if args.dataset == "ppi": print( "Epoch: {:04d} | Best Val F1: {:.4f} | Test F1: {:.4f}".format( epoch, np.mean(val_scores), np.mean(test_scores) ) ) else: print( "Epoch: {:04d} | Test Accuracy: {:.4f}".format( epoch, np.mean(test_scores) ) ) # save encoder weights if not os.path.isdir(args.weights_dir): os.mkdir(args.weights_dir) torch.save( {"model": model.online_encoder.state_dict()}, os.path.join(args.weights_dir, "bgrl-{}.pt".format(args.dataset)), ) if __name__ == "__main__": from argparse import ArgumentParser parser = ArgumentParser() # Dataset options. parser.add_argument( "--dataset", type=str, default="amazon_photos", choices=[ "coauthor_cs", "coauthor_physics", "amazon_photos", "amazon_computers", "wiki_cs", "ppi", ], ) # Model options. parser.add_argument( "--graph_encoder_layer", type=int, nargs="+", default=[256, 128] ) parser.add_argument("--predictor_hidden_size", type=int, default=512) # Training options. parser.add_argument("--epochs", type=int, default=10000) parser.add_argument("--lr", type=float, default=1e-5) parser.add_argument("--weight_decay", type=float, default=1e-5) parser.add_argument("--mm", type=float, default=0.99) parser.add_argument("--lr_warmup_epochs", type=int, default=1000) parser.add_argument("--weights_dir", type=str, default="../weights") # Augmentations options. parser.add_argument( "--drop_edge_p", type=float, nargs="+", default=[0.0, 0.0] ) parser.add_argument( "--feat_mask_p", type=float, nargs="+", default=[0.0, 0.0] ) # Evaluation options. parser.add_argument("--eval_epochs", type=int, default=250) parser.add_argument("--num_eval_splits", type=int, default=20) parser.add_argument("--data_seed", type=int, default=1) # Experiment options. parser.add_argument("--num_experiments", type=int, default=20) args = parser.parse_args() main(args) ================================================ FILE: examples/pytorch/bgrl/model.py ================================================ import copy import dgl import torch from dgl.nn.pytorch.conv import GraphConv, SAGEConv from torch import nn from torch.nn import BatchNorm1d, Parameter from torch.nn.init import ones_, zeros_ class LayerNorm(nn.Module): def __init__(self, in_channels, eps=1e-5, affine=True): super().__init__() self.in_channels = in_channels self.eps = eps if affine: self.weight = Parameter(torch.Tensor(in_channels)) self.bias = Parameter(torch.Tensor(in_channels)) else: self.register_parameter("weight", None) self.register_parameter("bias", None) self.reset_parameters() def reset_parameters(self): ones_(self.weight) zeros_(self.bias) def forward(self, x, batch=None): device = x.device if batch is None: x = x - x.mean() out = x / (x.std(unbiased=False) + self.eps) else: batch_size = int(batch.max()) + 1 batch_idx = [batch == i for i in range(batch_size)] norm = ( torch.tensor([i.sum() for i in batch_idx], dtype=x.dtype) .clamp_(min=1) .to(device) ) norm = norm.mul_(x.size(-1)).view(-1, 1) tmp_list = [x[i] for i in batch_idx] mean = ( torch.concat([i.sum(0).unsqueeze(0) for i in tmp_list], dim=0) .sum(dim=-1, keepdim=True) .to(device) ) mean = mean / norm x = x - mean.index_select(0, batch.long()) var = ( torch.concat( [(i * i).sum(0).unsqueeze(0) for i in tmp_list], dim=0 ) .sum(dim=-1, keepdim=True) .to(device) ) var = var / norm out = x / (var + self.eps).sqrt().index_select(0, batch.long()) if self.weight is not None and self.bias is not None: out = out * self.weight + self.bias return out def __repr__(self): return f"{self.__class__.__name__}({self.in_channels})" class MLP_Predictor(nn.Module): r"""MLP used for predictor. The MLP has one hidden layer. Args: input_size (int): Size of input features. output_size (int): Size of output features. hidden_size (int, optional): Size of hidden layer. (default: :obj:`4096`). """ def __init__(self, input_size, output_size, hidden_size=512): super().__init__() self.net = nn.Sequential( nn.Linear(input_size, hidden_size, bias=True), nn.PReLU(1), nn.Linear(hidden_size, output_size, bias=True), ) self.reset_parameters() def forward(self, x): return self.net(x) def reset_parameters(self): # kaiming_uniform for m in self.modules(): if isinstance(m, nn.Linear): m.reset_parameters() class GCN(nn.Module): def __init__(self, layer_sizes, batch_norm_mm=0.99): super(GCN, self).__init__() self.layers = nn.ModuleList() for in_dim, out_dim in zip(layer_sizes[:-1], layer_sizes[1:]): self.layers.append(GraphConv(in_dim, out_dim)) self.layers.append(BatchNorm1d(out_dim, momentum=batch_norm_mm)) self.layers.append(nn.PReLU()) def forward(self, g): x = g.ndata["feat"] for layer in self.layers: if isinstance(layer, GraphConv): x = layer(g, x) else: x = layer(x) return x def reset_parameters(self): for layer in self.layers: if hasattr(layer, "reset_parameters"): layer.reset_parameters() class GraphSAGE_GCN(nn.Module): def __init__(self, layer_sizes): super().__init__() input_size, hidden_size, embedding_size = layer_sizes self.convs = nn.ModuleList( [ SAGEConv(input_size, hidden_size, "mean"), SAGEConv(hidden_size, hidden_size, "mean"), SAGEConv(hidden_size, embedding_size, "mean"), ] ) self.skip_lins = nn.ModuleList( [ nn.Linear(input_size, hidden_size, bias=False), nn.Linear(input_size, hidden_size, bias=False), ] ) self.layer_norms = nn.ModuleList( [ LayerNorm(hidden_size), LayerNorm(hidden_size), LayerNorm(embedding_size), ] ) self.activations = nn.ModuleList( [ nn.PReLU(), nn.PReLU(), nn.PReLU(), ] ) def forward(self, g): x = g.ndata["feat"] if "batch" in g.ndata.keys(): batch = g.ndata["batch"] else: batch = None h1 = self.convs[0](g, x) h1 = self.layer_norms[0](h1, batch) h1 = self.activations[0](h1) x_skip_1 = self.skip_lins[0](x) h2 = self.convs[1](g, h1 + x_skip_1) h2 = self.layer_norms[1](h2, batch) h2 = self.activations[1](h2) x_skip_2 = self.skip_lins[1](x) ret = self.convs[2](g, h1 + h2 + x_skip_2) ret = self.layer_norms[2](ret, batch) ret = self.activations[2](ret) return ret def reset_parameters(self): for m in self.convs: m.reset_parameters() for m in self.skip_lins: m.reset_parameters() for m in self.activations: m.weight.data.fill_(0.25) for m in self.layer_norms: m.reset_parameters() class BGRL(nn.Module): r"""BGRL architecture for Graph representation learning. Args: encoder (torch.nn.Module): Encoder network to be duplicated and used in both online and target networks. predictor (torch.nn.Module): Predictor network used to predict the target projection from the online projection. .. note:: `encoder` must have a `reset_parameters` method, as the weights of the target network will be initialized differently from the online network. """ def __init__(self, encoder, predictor): super(BGRL, self).__init__() # online network self.online_encoder = encoder self.predictor = predictor # target network self.target_encoder = copy.deepcopy(encoder) # reinitialize weights self.target_encoder.reset_parameters() # stop gradient for param in self.target_encoder.parameters(): param.requires_grad = False def trainable_parameters(self): r"""Returns the parameters that will be updated via an optimizer.""" return list(self.online_encoder.parameters()) + list( self.predictor.parameters() ) @torch.no_grad() def update_target_network(self, mm): r"""Performs a momentum update of the target network's weights. Args: mm (float): Momentum used in moving average update. """ for param_q, param_k in zip( self.online_encoder.parameters(), self.target_encoder.parameters() ): param_k.data.mul_(mm).add_(param_q.data, alpha=1.0 - mm) def forward(self, online_x, target_x): # forward online network online_y = self.online_encoder(online_x) # prediction online_q = self.predictor(online_y) # forward target network with torch.no_grad(): target_y = self.target_encoder(target_x).detach() return online_q, target_y def compute_representations(net, dataset, device): r"""Pre-computes the representations for the entire data. Returns: [torch.Tensor, torch.Tensor]: Representations and labels. """ net.eval() reps = [] labels = [] if len(dataset) == 1: g = dataset[0] g = dgl.add_self_loop(g) g = g.to(device) with torch.no_grad(): reps.append(net(g)) labels.append(g.ndata["label"]) else: for g in dataset: # forward g = g.to(device) with torch.no_grad(): reps.append(net(g)) labels.append(g.ndata["label"]) reps = torch.cat(reps, dim=0) labels = torch.cat(labels, dim=0) return [reps, labels] ================================================ FILE: examples/pytorch/bgrl/utils.py ================================================ import copy import numpy as np import torch from dgl.data import ( AmazonCoBuyComputerDataset, AmazonCoBuyPhotoDataset, CoauthorCSDataset, CoauthorPhysicsDataset, PPIDataset, WikiCSDataset, ) from dgl.dataloading import GraphDataLoader from dgl.transforms import Compose, DropEdge, FeatMask, RowFeatNormalizer class CosineDecayScheduler: def __init__(self, max_val, warmup_steps, total_steps): self.max_val = max_val self.warmup_steps = warmup_steps self.total_steps = total_steps def get(self, step): if step < self.warmup_steps: return self.max_val * step / self.warmup_steps elif self.warmup_steps <= step <= self.total_steps: return ( self.max_val * ( 1 + np.cos( (step - self.warmup_steps) * np.pi / (self.total_steps - self.warmup_steps) ) ) / 2 ) else: raise ValueError( "Step ({}) > total number of steps ({}).".format( step, self.total_steps ) ) def get_graph_drop_transform(drop_edge_p, feat_mask_p): transforms = list() # make copy of graph transforms.append(copy.deepcopy) # drop edges if drop_edge_p > 0.0: transforms.append(DropEdge(drop_edge_p)) # drop features if feat_mask_p > 0.0: transforms.append(FeatMask(feat_mask_p, node_feat_names=["feat"])) return Compose(transforms) def get_wiki_cs(transform=RowFeatNormalizer(subtract_min=True)): dataset = WikiCSDataset(transform=transform) g = dataset[0] std, mean = torch.std_mean(g.ndata["feat"], dim=0, unbiased=False) g.ndata["feat"] = (g.ndata["feat"] - mean) / std return [g] def get_ppi(): train_dataset = PPIDataset(mode="train") val_dataset = PPIDataset(mode="valid") test_dataset = PPIDataset(mode="test") train_val_dataset = [i for i in train_dataset] + [i for i in val_dataset] for idx, data in enumerate(train_val_dataset): data.ndata["batch"] = torch.zeros(data.num_nodes()) + idx data.ndata["batch"] = data.ndata["batch"].long() g = list(GraphDataLoader(train_val_dataset, batch_size=22, shuffle=True)) return g, PPIDataset(mode="train"), PPIDataset(mode="valid"), test_dataset def get_dataset(name, transform=RowFeatNormalizer(subtract_min=True)): dgl_dataset_dict = { "coauthor_cs": CoauthorCSDataset, "coauthor_physics": CoauthorPhysicsDataset, "amazon_computers": AmazonCoBuyComputerDataset, "amazon_photos": AmazonCoBuyPhotoDataset, "wiki_cs": get_wiki_cs, "ppi": get_ppi, } dataset_class = dgl_dataset_dict[name] train_data, val_data, test_data = None, None, None if name != "ppi": dataset = dataset_class(transform=transform) else: dataset, train_data, val_data, test_data = dataset_class() return dataset, train_data, val_data, test_data ================================================ FILE: examples/pytorch/capsule/DGLDigitCapsule.py ================================================ import dgl import dgl.function as fn import torch from DGLRoutingLayer import DGLRoutingLayer from torch import nn from torch.nn import functional as F class DGLDigitCapsuleLayer(nn.Module): def __init__( self, in_nodes_dim=8, in_nodes=1152, out_nodes=10, out_nodes_dim=16, device="cpu", ): super(DGLDigitCapsuleLayer, self).__init__() self.device = device self.in_nodes_dim, self.out_nodes_dim = in_nodes_dim, out_nodes_dim self.in_nodes, self.out_nodes = in_nodes, out_nodes self.weight = nn.Parameter( torch.randn(in_nodes, out_nodes, out_nodes_dim, in_nodes_dim) ) def forward(self, x): self.batch_size = x.size(0) u_hat = self.compute_uhat(x) routing = DGLRoutingLayer( self.in_nodes, self.out_nodes, self.out_nodes_dim, batch_size=self.batch_size, device=self.device, ) routing(u_hat, routing_num=3) out_nodes_feature = routing.g.nodes[routing.out_indx].data["v"] # shape transformation is for further classification return ( out_nodes_feature.transpose(0, 1) .unsqueeze(1) .unsqueeze(4) .squeeze(1) ) def compute_uhat(self, x): # x is the input vextor with shape [batch_size, in_nodes_dim, in_nodes] # Transpose x to [batch_size, in_nodes, in_nodes_dim] x = x.transpose(1, 2) # Expand x to [batch_size, in_nodes, out_nodes, in_nodes_dim, 1] x = torch.stack([x] * self.out_nodes, dim=2).unsqueeze(4) # Expand W from [in_nodes, out_nodes, in_nodes_dim, out_nodes_dim] # to [batch_size, in_nodes, out_nodes, out_nodes_dim, in_nodes_dim] W = self.weight.expand(self.batch_size, *self.weight.size()) # u_hat's shape is [in_nodes, out_nodes, batch_size, out_nodes_dim] u_hat = torch.matmul(W, x).permute(1, 2, 0, 3, 4).squeeze().contiguous() return u_hat.view(-1, self.batch_size, self.out_nodes_dim) ================================================ FILE: examples/pytorch/capsule/DGLRoutingLayer.py ================================================ import dgl import torch as th import torch.nn as nn import torch.nn.functional as F class DGLRoutingLayer(nn.Module): def __init__(self, in_nodes, out_nodes, f_size, batch_size=0, device="cpu"): super(DGLRoutingLayer, self).__init__() self.batch_size = batch_size self.g = init_graph(in_nodes, out_nodes, f_size, device=device) self.in_nodes = in_nodes self.out_nodes = out_nodes self.in_indx = list(range(in_nodes)) self.out_indx = list(range(in_nodes, in_nodes + out_nodes)) self.device = device def forward(self, u_hat, routing_num=1): self.g.edata["u_hat"] = u_hat batch_size = self.batch_size # step 2 (line 5) def cap_message(edges): if batch_size: return {"m": edges.data["c"].unsqueeze(1) * edges.data["u_hat"]} else: return {"m": edges.data["c"] * edges.data["u_hat"]} def cap_reduce(nodes): return {"s": th.sum(nodes.mailbox["m"], dim=1)} for r in range(routing_num): # step 1 (line 4): normalize over out edges edges_b = self.g.edata["b"].view(self.in_nodes, self.out_nodes) self.g.edata["c"] = F.softmax(edges_b, dim=1).view(-1, 1) # Execute step 1 & 2 self.g.update_all(message_func=cap_message, reduce_func=cap_reduce) # step 3 (line 6) if self.batch_size: self.g.nodes[self.out_indx].data["v"] = squash( self.g.nodes[self.out_indx].data["s"], dim=2 ) else: self.g.nodes[self.out_indx].data["v"] = squash( self.g.nodes[self.out_indx].data["s"], dim=1 ) # step 4 (line 7) v = th.cat( [self.g.nodes[self.out_indx].data["v"]] * self.in_nodes, dim=0 ) if self.batch_size: self.g.edata["b"] = self.g.edata["b"] + ( self.g.edata["u_hat"] * v ).mean(dim=1).sum(dim=1, keepdim=True) else: self.g.edata["b"] = self.g.edata["b"] + ( self.g.edata["u_hat"] * v ).sum(dim=1, keepdim=True) def squash(s, dim=1): sq = th.sum(s**2, dim=dim, keepdim=True) s_norm = th.sqrt(sq) s = (sq / (1.0 + sq)) * (s / s_norm) return s def init_graph(in_nodes, out_nodes, f_size, device="cpu"): src, dst = [], [] in_indx = list(range(in_nodes)) out_indx = list(range(in_nodes, in_nodes + out_nodes)) # add edges use edge broadcasting for u in in_indx: src += [u] * len(out_indx) dst += out_indx g = dgl.graph((src, dst)) # dgl.graph once; g.set_n_initializer(dgl.frame.zero_initializer) g = g.to(device) g.edata["b"] = th.zeros(in_nodes * out_nodes, 1).to(device) return g ================================================ FILE: examples/pytorch/capsule/README.md ================================================ DGL implementation of Capsule Network ===================================== This repo implements Hinton and his team's [Capsule Network](https://arxiv.org/abs/1710.09829). Only margin loss is implemented, for simplicity to understand the DGL. Dependencies -------------- * PyTorch 0.4.1+ * torchvision ```bash pip install torch torchvision ``` Training & Evaluation ---------------------- ```bash # Run with default config python3 main.py # Run with train and test batch size 128, and for 50 epochs python3 main.py --batch-size 128 --test-batch-size 128 --epochs 50 ``` ================================================ FILE: examples/pytorch/capsule/main.py ================================================ import argparse import torch import torch.optim as optim from model import Net from torchvision import datasets, transforms def train(args, model, device, train_loader, optimizer, epoch): model.train() for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = model.margin_loss(output, target) loss.backward() optimizer.step() if batch_idx % args.log_interval == 0: print( "Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format( epoch, batch_idx * len(data), len(train_loader.dataset), 100.0 * batch_idx / len(train_loader), loss.item(), ) ) def test(args, model, device, test_loader): model.eval() test_loss = 0 correct = 0 with torch.no_grad(): for data, target in test_loader: data, target = data.to(device), target.to(device) output = model(data) test_loss += model.margin_loss( output, target ).item() # sum up batch loss pred = ( output.norm(dim=2).squeeze().max(1, keepdim=True)[1] ) # get the index of the max log-probability correct += pred.eq(target.view_as(pred)).sum().item() test_loss /= len(test_loader.dataset) print( "\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".format( test_loss, correct, len(test_loader.dataset), 100.0 * correct / len(test_loader.dataset), ) ) def main(): # Training settings parser = argparse.ArgumentParser(description="PyTorch MNIST Example") parser.add_argument( "--batch-size", type=int, default=512, metavar="N", help="input batch size for training (default: 64)", ) parser.add_argument( "--test-batch-size", type=int, default=512, metavar="N", help="input batch size for testing (default: 1000)", ) parser.add_argument( "--epochs", type=int, default=10, metavar="N", help="number of epochs to train (default: 10)", ) parser.add_argument( "--lr", type=float, default=0.01, metavar="LR", help="learning rate (default: 0.01)", ) parser.add_argument( "--no-cuda", action="store_true", default=False, help="disables CUDA training", ) parser.add_argument( "--seed", type=int, default=1, metavar="S", help="random seed (default: 1)", ) parser.add_argument( "--log-interval", type=int, default=10, metavar="N", help="how many batches to wait before logging training status", ) args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {"num_workers": 1, "pin_memory": True} if use_cuda else {} train_loader = torch.utils.data.DataLoader( datasets.MNIST( "../data", train=True, download=True, transform=transforms.Compose( [ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)), ] ), ), batch_size=args.batch_size, shuffle=True, **kwargs ) test_loader = torch.utils.data.DataLoader( datasets.MNIST( "../data", train=False, transform=transforms.Compose( [ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)), ] ), ), batch_size=args.test_batch_size, shuffle=True, **kwargs ) model = Net(device=device).to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr) for epoch in range(1, args.epochs + 1): train(args, model, device, train_loader, optimizer, epoch) test(args, model, device, test_loader) if __name__ == "__main__": main() ================================================ FILE: examples/pytorch/capsule/model.py ================================================ import torch from DGLDigitCapsule import DGLDigitCapsuleLayer from DGLRoutingLayer import squash from torch import nn class Net(nn.Module): def __init__(self, device="cpu"): super(Net, self).__init__() self.device = device self.conv1 = nn.Sequential( nn.Conv2d(in_channels=1, out_channels=256, kernel_size=9, stride=1), nn.ReLU(inplace=True), ) self.primary = PrimaryCapsuleLayer(device=device) self.digits = DGLDigitCapsuleLayer(device=device) def forward(self, x): out_conv1 = self.conv1(x) out_primary_caps = self.primary(out_conv1) out_digit_caps = self.digits(out_primary_caps) return out_digit_caps def margin_loss(self, input, target): batch_s = target.size(0) one_hot_vec = torch.zeros(batch_s, 10).to(self.device) for i in range(batch_s): one_hot_vec[i, target[i]] = 1.0 batch_size = input.size(0) v_c = torch.sqrt((input**2).sum(dim=2, keepdim=True)) zero = torch.zeros(1).to(self.device) m_plus = 0.9 m_minus = 0.1 loss_lambda = 0.5 max_left = torch.max(m_plus - v_c, zero).view(batch_size, -1) ** 2 max_right = torch.max(v_c - m_minus, zero).view(batch_size, -1) ** 2 t_c = one_hot_vec l_c = t_c * max_left + loss_lambda * (1.0 - t_c) * max_right l_c = l_c.sum(dim=1) return l_c.mean() class PrimaryCapsuleLayer(nn.Module): def __init__(self, in_channel=256, num_unit=8, device="cpu"): super(PrimaryCapsuleLayer, self).__init__() self.in_channel = in_channel self.num_unit = num_unit self.deivce = device self.conv_units = nn.ModuleList( [nn.Conv2d(self.in_channel, 32, 9, 2) for _ in range(self.num_unit)] ) def forward(self, x): unit = [self.conv_units[i](x) for i, l in enumerate(self.conv_units)] unit = torch.stack(unit, dim=1) batch_size = x.size(0) unit = unit.view(batch_size, 8, -1) return squash(unit, dim=2) ================================================ FILE: examples/pytorch/capsule/simple_routing.py ================================================ import dgl import torch as th import torch.nn as nn from DGLRoutingLayer import DGLRoutingLayer from torch.nn import functional as F g = dgl.DGLGraph() g.graph_data = {} in_nodes = 20 out_nodes = 10 g.graph_data["in_nodes"] = in_nodes g.graph_data["out_nodes"] = out_nodes all_nodes = in_nodes + out_nodes g.add_nodes(all_nodes) in_indx = list(range(in_nodes)) out_indx = list(range(in_nodes, in_nodes + out_nodes)) g.graph_data["in_indx"] = in_indx g.graph_data["out_indx"] = out_indx # add edges use edge broadcasting for u in out_indx: g.add_edges(in_indx, u) # init states f_size = 4 g.ndata["v"] = th.zeros(all_nodes, f_size) g.edata["u_hat"] = th.randn(in_nodes * out_nodes, f_size) g.edata["b"] = th.randn(in_nodes * out_nodes, 1) routing_layer = DGLRoutingLayer(g) entropy_list = [] for i in range(15): routing_layer() dist_matrix = g.edata["c"].view(in_nodes, out_nodes) entropy = (-dist_matrix * th.log(dist_matrix)).sum(dim=0) entropy_list.append(entropy.data.numpy()) std = dist_matrix.std(dim=0) ================================================ FILE: examples/pytorch/caregnn/README.md ================================================ # DGL Implementation of the CARE-GNN Paper This DGL example implements the CAmouflage-REsistant GNN (CARE-GNN) model proposed in the paper [Enhancing Graph Neural Network-based Fraud Detectors against Camouflaged Fraudsters](https://arxiv.org/abs/2008.08692). The author's codes of implementation is [here](https://github.com/YingtongDou/CARE-GNN). **NOTE**: The sampling version of this model has been modified according to the feature of the DGL's NodeDataLoader. For the formula 2 in the paper, rather than using the embedding of the last layer, this version uses the embedding of the current layer in the previous epoch to measure the similarity between center nodes and their neighbors. Example implementor ---------------------- This example was implemented by [Kay Liu](https://github.com/kayzliu) during his SDE intern work at the AWS Shanghai AI Lab. Dependencies ---------------------- - Python 3.7.10 - PyTorch 1.8.1 - dgl 0.7.1 - scikit-learn 0.23.2 Dataset --------------------------------------- The datasets used for node classification are DGL's built-in FraudDataset. The statistics are summarized as followings: **Amazon** - Nodes: 11,944 - Edges: - U-P-U: 351,216 - U-S-U: 7,132,958 - U-V-U: 2,073,474 - Classes: - Positive (fraudulent): 821 - Negative (benign): 7,818 - Unlabeled: 3,305 - Positive-Negative ratio: 1 : 10.5 - Node feature size: 25 **YelpChi** - Nodes: 45,954 - Edges: - R-U-R: 98,630 - R-T-R: 1,147,232 - R-S-R: 6,805,486 - Classes: - Positive (spam): 6,677 - Negative (legitimate): 39,277 - Positive-Negative ratio: 1 : 5.9 - Node feature size: 32 How to run -------------------------------- To run the full graph version and use early stopping, in the care-gnn folder, run ``` python main.py --early-stop ``` If want to use a GPU, run ``` python main.py --gpu 0 ``` To train on Yelp dataset instead of Amazon, run ``` python main.py --dataset yelp ``` To run the sampling version, run ``` python main_sampling.py ``` Performance ------------------------- The result reported by the paper is the best validation results within 30 epochs, and the table below reports the val and test results (same setting in the paper except for the random seed, here `seed=717`).
Dataset Amazon Yelp
Metric (val / test) Max Epoch 30 30
AUC (val/test) paper reported 0.8973 / - 0.7570 / -
DGL full graph 0.8849 / 0.8922 0.6856 / 0.6867
DGL sampling 0.9350 / 0.9331 0.7857 / 0.7890
Recall (val/test) paper reported 0.8848 / - 0.7192 / -
DGL full graph 0.8615 / 0.8544 0.6667/ 0.6619
DGL sampling 0.9130 / 0.9045 0.7537 / 0.7540
================================================ FILE: examples/pytorch/caregnn/main.py ================================================ import argparse import dgl import torch as th import torch.optim as optim from model import CAREGNN from sklearn.metrics import recall_score, roc_auc_score from torch.nn.functional import softmax from utils import EarlyStopping def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # # Load dataset dataset = dgl.data.FraudDataset(args.dataset, train_size=0.4) graph = dataset[0] num_classes = dataset.num_classes # check cuda if args.gpu >= 0 and th.cuda.is_available(): device = "cuda:{}".format(args.gpu) else: device = "cpu" # retrieve labels of ground truth labels = graph.ndata["label"].to(device) # Extract node features feat = graph.ndata["feature"].to(device) # retrieve masks for train/validation/test train_mask = graph.ndata["train_mask"] val_mask = graph.ndata["val_mask"] test_mask = graph.ndata["test_mask"] train_idx = th.nonzero(train_mask, as_tuple=False).squeeze(1).to(device) val_idx = th.nonzero(val_mask, as_tuple=False).squeeze(1).to(device) test_idx = th.nonzero(test_mask, as_tuple=False).squeeze(1).to(device) # Reinforcement learning module only for positive training nodes rl_idx = th.nonzero( train_mask.to(device) & labels.bool(), as_tuple=False ).squeeze(1) graph = graph.to(device) # Step 2: Create model =================================================================== # model = CAREGNN( in_dim=feat.shape[-1], num_classes=num_classes, hid_dim=args.hid_dim, num_layers=args.num_layers, activation=th.tanh, step_size=args.step_size, edges=graph.canonical_etypes, ) model = model.to(device) # Step 3: Create training components ===================================================== # _, cnt = th.unique(labels, return_counts=True) loss_fn = th.nn.CrossEntropyLoss(weight=1 / cnt) optimizer = optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.weight_decay ) if args.early_stop: stopper = EarlyStopping(patience=100) # Step 4: training epochs =============================================================== # for epoch in range(args.max_epoch): # Training and validation using a full graph model.train() logits_gnn, logits_sim = model(graph, feat) # compute loss tr_loss = loss_fn( logits_gnn[train_idx], labels[train_idx] ) + args.sim_weight * loss_fn(logits_sim[train_idx], labels[train_idx]) tr_recall = recall_score( labels[train_idx].cpu(), logits_gnn.data[train_idx].argmax(dim=1).cpu(), ) tr_auc = roc_auc_score( labels[train_idx].cpu(), softmax(logits_gnn, dim=1).data[train_idx][:, 1].cpu(), ) # validation val_loss = loss_fn( logits_gnn[val_idx], labels[val_idx] ) + args.sim_weight * loss_fn(logits_sim[val_idx], labels[val_idx]) val_recall = recall_score( labels[val_idx].cpu(), logits_gnn.data[val_idx].argmax(dim=1).cpu() ) val_auc = roc_auc_score( labels[val_idx].cpu(), softmax(logits_gnn, dim=1).data[val_idx][:, 1].cpu(), ) # backward optimizer.zero_grad() tr_loss.backward() optimizer.step() # Print out performance print( "Epoch {}, Train: Recall: {:.4f} AUC: {:.4f} Loss: {:.4f} | Val: Recall: {:.4f} AUC: {:.4f} Loss: {:.4f}".format( epoch, tr_recall, tr_auc, tr_loss.item(), val_recall, val_auc, val_loss.item(), ) ) # Adjust p value with reinforcement learning module model.RLModule(graph, epoch, rl_idx) if args.early_stop: if stopper.step(val_auc, model): break # Test after all epoch model.eval() if args.early_stop: model.load_state_dict(th.load("es_checkpoint.pt")) # forward logits_gnn, logits_sim = model.forward(graph, feat) # compute loss test_loss = loss_fn( logits_gnn[test_idx], labels[test_idx] ) + args.sim_weight * loss_fn(logits_sim[test_idx], labels[test_idx]) test_recall = recall_score( labels[test_idx].cpu(), logits_gnn[test_idx].argmax(dim=1).cpu() ) test_auc = roc_auc_score( labels[test_idx].cpu(), softmax(logits_gnn, dim=1).data[test_idx][:, 1].cpu(), ) print( "Test Recall: {:.4f} AUC: {:.4f} Loss: {:.4f}".format( test_recall, test_auc, test_loss.item() ) ) if __name__ == "__main__": parser = argparse.ArgumentParser(description="GCN-based Anti-Spam Model") parser.add_argument( "--dataset", type=str, default="amazon", help="DGL dataset for this model (yelp, or amazon)", ) parser.add_argument( "--gpu", type=int, default=-1, help="GPU index. Default: -1, using CPU." ) parser.add_argument( "--hid_dim", type=int, default=64, help="Hidden layer dimension" ) parser.add_argument( "--num_layers", type=int, default=1, help="Number of layers" ) parser.add_argument( "--max_epoch", type=int, default=30, help="The max number of epochs. Default: 30", ) parser.add_argument( "--lr", type=float, default=0.01, help="Learning rate. Default: 0.01" ) parser.add_argument( "--weight_decay", type=float, default=0.001, help="Weight decay. Default: 0.001", ) parser.add_argument( "--step_size", type=float, default=0.02, help="RL action step size (lambda 2). Default: 0.02", ) parser.add_argument( "--sim_weight", type=float, default=2, help="Similarity loss weight (lambda 1). Default: 2", ) parser.add_argument( "--early-stop", action="store_true", default=False, help="indicates whether to use early stop", ) args = parser.parse_args() print(args) th.manual_seed(717) main(args) ================================================ FILE: examples/pytorch/caregnn/main_sampling.py ================================================ import argparse import dgl import torch as th import torch.optim as optim from model_sampling import _l1_dist, CAREGNN, CARESampler from sklearn.metrics import recall_score, roc_auc_score from torch.nn.functional import softmax from utils import EarlyStopping def evaluate(model, loss_fn, dataloader, device="cpu"): loss = 0 auc = 0 recall = 0 num_blocks = 0 for input_nodes, output_nodes, blocks in dataloader: blocks = [b.to(device) for b in blocks] feature = blocks[0].srcdata["feature"] label = blocks[-1].dstdata["label"] logits_gnn, logits_sim = model(blocks, feature) # compute loss loss += ( loss_fn(logits_gnn, label).item() + args.sim_weight * loss_fn(logits_sim, label).item() ) recall += recall_score( label.cpu(), logits_gnn.argmax(dim=1).detach().cpu() ) auc += roc_auc_score( label.cpu(), softmax(logits_gnn, dim=1)[:, 1].detach().cpu() ) num_blocks += 1 return recall / num_blocks, auc / num_blocks, loss / num_blocks def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # # Load dataset dataset = dgl.data.FraudDataset(args.dataset, train_size=0.4) graph = dataset[0] num_classes = dataset.num_classes # check cuda if args.gpu >= 0 and th.cuda.is_available(): device = "cuda:{}".format(args.gpu) args.num_workers = 0 else: device = "cpu" # retrieve labels of ground truth labels = graph.ndata["label"].to(device) # Extract node features feat = graph.ndata["feature"].to(device) layers_feat = feat.expand(args.num_layers, -1, -1) # retrieve masks for train/validation/test train_mask = graph.ndata["train_mask"] val_mask = graph.ndata["val_mask"] test_mask = graph.ndata["test_mask"] train_idx = th.nonzero(train_mask, as_tuple=False).squeeze(1).to(device) val_idx = th.nonzero(val_mask, as_tuple=False).squeeze(1).to(device) test_idx = th.nonzero(test_mask, as_tuple=False).squeeze(1).to(device) # Reinforcement learning module only for positive training nodes rl_idx = th.nonzero( train_mask.to(device) & labels.bool(), as_tuple=False ).squeeze(1) graph = graph.to(device) # Step 2: Create model =================================================================== # model = CAREGNN( in_dim=feat.shape[-1], num_classes=num_classes, hid_dim=args.hid_dim, num_layers=args.num_layers, activation=th.tanh, step_size=args.step_size, edges=graph.canonical_etypes, ) model = model.to(device) # Step 3: Create training components ===================================================== # _, cnt = th.unique(labels, return_counts=True) loss_fn = th.nn.CrossEntropyLoss(weight=1 / cnt) optimizer = optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.weight_decay ) if args.early_stop: stopper = EarlyStopping(patience=100) # Step 4: training epochs =============================================================== # for epoch in range(args.max_epoch): # calculate the distance of each edges and sample based on the distance dists = [] p = [] for i in range(args.num_layers): dist = {} graph.ndata["nd"] = th.tanh(model.layers[i].MLP(layers_feat[i])) for etype in graph.canonical_etypes: graph.apply_edges(_l1_dist, etype=etype) dist[etype] = graph.edges[etype].data.pop("ed").detach().cpu() dists.append(dist) p.append(model.layers[i].p) graph.ndata.pop("nd") sampler = CARESampler(p, dists, args.num_layers) # train model.train() tr_loss = 0 tr_recall = 0 tr_auc = 0 tr_blk = 0 train_dataloader = dgl.dataloading.DataLoader( graph, train_idx, sampler, batch_size=args.batch_size, shuffle=True, drop_last=False, num_workers=args.num_workers, ) for input_nodes, output_nodes, blocks in train_dataloader: blocks = [b.to(device) for b in blocks] train_feature = blocks[0].srcdata["feature"] train_label = blocks[-1].dstdata["label"] logits_gnn, logits_sim = model(blocks, train_feature) # compute loss blk_loss = loss_fn( logits_gnn, train_label ) + args.sim_weight * loss_fn(logits_sim, train_label) tr_loss += blk_loss.item() tr_recall += recall_score( train_label.cpu(), logits_gnn.argmax(dim=1).detach().cpu() ) tr_auc += roc_auc_score( train_label.cpu(), softmax(logits_gnn, dim=1)[:, 1].detach().cpu(), ) tr_blk += 1 # backward optimizer.zero_grad() blk_loss.backward() optimizer.step() # Reinforcement learning module model.RLModule(graph, epoch, rl_idx, dists) # validation model.eval() val_dataloader = dgl.dataloading.DataLoader( graph, val_idx, sampler, batch_size=args.batch_size, shuffle=True, drop_last=False, num_workers=args.num_workers, ) val_recall, val_auc, val_loss = evaluate( model, loss_fn, val_dataloader, device ) # Print out performance print( "In epoch {}, Train Recall: {:.4f} | Train AUC: {:.4f} | Train Loss: {:.4f}; " "Valid Recall: {:.4f} | Valid AUC: {:.4f} | Valid loss: {:.4f}".format( epoch, tr_recall / tr_blk, tr_auc / tr_blk, tr_loss / tr_blk, val_recall, val_auc, val_loss, ) ) if args.early_stop: if stopper.step(val_auc, model): break # Test with mini batch after all epoch model.eval() if args.early_stop: model.load_state_dict(th.load("es_checkpoint.pt")) test_dataloader = dgl.dataloading.DataLoader( graph, test_idx, sampler, batch_size=args.batch_size, shuffle=True, drop_last=False, num_workers=args.num_workers, ) test_recall, test_auc, test_loss = evaluate( model, loss_fn, test_dataloader, device ) print( "Test Recall: {:.4f} | Test AUC: {:.4f} | Test loss: {:.4f}".format( test_recall, test_auc, test_loss ) ) if __name__ == "__main__": parser = argparse.ArgumentParser(description="GCN-based Anti-Spam Model") parser.add_argument( "--dataset", type=str, default="amazon", help="DGL dataset for this model (yelp, or amazon)", ) parser.add_argument( "--gpu", type=int, default=-1, help="GPU index. Default: -1, using CPU." ) parser.add_argument( "--hid_dim", type=int, default=64, help="Hidden layer dimension" ) parser.add_argument( "--num_layers", type=int, default=1, help="Number of layers" ) parser.add_argument( "--batch_size", type=int, default=256, help="Size of mini-batch" ) parser.add_argument( "--max_epoch", type=int, default=30, help="The max number of epochs. Default: 30", ) parser.add_argument( "--lr", type=float, default=0.01, help="Learning rate. Default: 0.01" ) parser.add_argument( "--weight_decay", type=float, default=0.001, help="Weight decay. Default: 0.001", ) parser.add_argument( "--step_size", type=float, default=0.02, help="RL action step size (lambda 2). Default: 0.02", ) parser.add_argument( "--sim_weight", type=float, default=2, help="Similarity loss weight (lambda 1). Default: 0.001", ) parser.add_argument( "--num_workers", type=int, default=4, help="Number of node dataloader" ) parser.add_argument( "--early-stop", action="store_true", default=False, help="indicates whether to use early stop", ) args = parser.parse_args() th.manual_seed(717) print(args) main(args) ================================================ FILE: examples/pytorch/caregnn/model.py ================================================ import dgl.function as fn import numpy as np import torch as th import torch.nn as nn class CAREConv(nn.Module): """One layer of CARE-GNN.""" def __init__( self, in_dim, out_dim, num_classes, edges, activation=None, step_size=0.02, ): super(CAREConv, self).__init__() self.activation = activation self.step_size = step_size self.in_dim = in_dim self.out_dim = out_dim self.num_classes = num_classes self.edges = edges self.dist = {} self.linear = nn.Linear(self.in_dim, self.out_dim) self.MLP = nn.Linear(self.in_dim, self.num_classes) self.p = {} self.last_avg_dist = {} self.f = {} self.cvg = {} for etype in edges: self.p[etype] = 0.5 self.last_avg_dist[etype] = 0 self.f[etype] = [] self.cvg[etype] = False def _calc_distance(self, edges): # formula 2 d = th.norm( th.tanh(self.MLP(edges.src["h"])) - th.tanh(self.MLP(edges.dst["h"])), 1, 1, ) return {"d": d} def _top_p_sampling(self, g, p): # this implementation is low efficient # optimization requires dgl.sampling.select_top_p requested in issue #3100 dist = g.edata["d"] neigh_list = [] for node in g.nodes(): edges = g.in_edges(node, form="eid") num_neigh = th.ceil(g.in_degrees(node) * p).int().item() neigh_dist = dist[edges] if neigh_dist.shape[0] > num_neigh: neigh_index = np.argpartition( neigh_dist.cpu().detach(), num_neigh )[:num_neigh] else: neigh_index = np.arange(num_neigh) neigh_list.append(edges[neigh_index]) return th.cat(neigh_list) def forward(self, g, feat): with g.local_scope(): g.ndata["h"] = feat hr = {} for i, etype in enumerate(g.canonical_etypes): g.apply_edges(self._calc_distance, etype=etype) self.dist[etype] = g.edges[etype].data["d"] sampled_edges = self._top_p_sampling(g[etype], self.p[etype]) # formula 8 g.send_and_recv( sampled_edges, fn.copy_u("h", "m"), fn.mean("m", "h_%s" % etype[1]), etype=etype, ) hr[etype] = g.ndata["h_%s" % etype[1]] if self.activation is not None: hr[etype] = self.activation(hr[etype]) # formula 9 using mean as inter-relation aggregator p_tensor = ( th.Tensor(list(self.p.values())).view(-1, 1, 1).to(g.device) ) h_homo = th.sum(th.stack(list(hr.values())) * p_tensor, dim=0) h_homo += feat if self.activation is not None: h_homo = self.activation(h_homo) return self.linear(h_homo) class CAREGNN(nn.Module): def __init__( self, in_dim, num_classes, hid_dim=64, edges=None, num_layers=2, activation=None, step_size=0.02, ): super(CAREGNN, self).__init__() self.in_dim = in_dim self.hid_dim = hid_dim self.num_classes = num_classes self.edges = edges self.activation = activation self.step_size = step_size self.num_layers = num_layers self.layers = nn.ModuleList() if self.num_layers == 1: # Single layer self.layers.append( CAREConv( self.in_dim, self.num_classes, self.num_classes, self.edges, activation=self.activation, step_size=self.step_size, ) ) else: # Input layer self.layers.append( CAREConv( self.in_dim, self.hid_dim, self.num_classes, self.edges, activation=self.activation, step_size=self.step_size, ) ) # Hidden layers with n - 2 layers for i in range(self.num_layers - 2): self.layers.append( CAREConv( self.hid_dim, self.hid_dim, self.num_classes, self.edges, activation=self.activation, step_size=self.step_size, ) ) # Output layer self.layers.append( CAREConv( self.hid_dim, self.num_classes, self.num_classes, self.edges, activation=self.activation, step_size=self.step_size, ) ) def forward(self, graph, feat): # For full graph training, directly use the graph # formula 4 sim = th.tanh(self.layers[0].MLP(feat)) # Forward of n layers of CARE-GNN for layer in self.layers: feat = layer(graph, feat) return feat, sim def RLModule(self, graph, epoch, idx): for layer in self.layers: for etype in self.edges: if not layer.cvg[etype]: # formula 5 eid = graph.in_edges(idx, form="eid", etype=etype) avg_dist = th.mean(layer.dist[etype][eid]) # formula 6 if layer.last_avg_dist[etype] < avg_dist: if layer.p[etype] - self.step_size > 0: layer.p[etype] -= self.step_size layer.f[etype].append(-1) else: if layer.p[etype] + self.step_size <= 1: layer.p[etype] += self.step_size layer.f[etype].append(+1) layer.last_avg_dist[etype] = avg_dist # formula 7 if epoch >= 9 and abs(sum(layer.f[etype][-10:])) <= 2: layer.cvg[etype] = True ================================================ FILE: examples/pytorch/caregnn/model_sampling.py ================================================ import dgl import dgl.function as fn import numpy as np import torch as th import torch.nn as nn def _l1_dist(edges): # formula 2 ed = th.norm(edges.src["nd"] - edges.dst["nd"], 1, 1) return {"ed": ed} class CARESampler(dgl.dataloading.BlockSampler): def __init__(self, p, dists, num_layers): super().__init__() self.p = p self.dists = dists self.num_layers = num_layers def sample_frontier(self, block_id, g, seed_nodes, *args, **kwargs): with g.local_scope(): new_edges_masks = {} for etype in g.canonical_etypes: edge_mask = th.zeros(g.num_edges(etype)) # extract each node from dict because of single node type for node in seed_nodes: edges = g.in_edges(node, form="eid", etype=etype) num_neigh = ( th.ceil( g.in_degrees(node, etype=etype) * self.p[block_id][etype] ) .int() .item() ) neigh_dist = self.dists[block_id][etype][edges] if neigh_dist.shape[0] > num_neigh: neigh_index = np.argpartition(neigh_dist, num_neigh)[ :num_neigh ] else: neigh_index = np.arange(num_neigh) edge_mask[edges[neigh_index]] = 1 new_edges_masks[etype] = edge_mask.bool() return dgl.edge_subgraph(g, new_edges_masks, relabel_nodes=False) def sample_blocks(self, g, seed_nodes, exclude_eids=None): output_nodes = seed_nodes blocks = [] for block_id in reversed(range(self.num_layers)): frontier = self.sample_frontier(block_id, g, seed_nodes) eid = frontier.edata[dgl.EID] block = dgl.to_block(frontier, seed_nodes) block.edata[dgl.EID] = eid seed_nodes = block.srcdata[dgl.NID] blocks.insert(0, block) return seed_nodes, output_nodes, blocks def __len__(self): return self.num_layers class CAREConv(nn.Module): """One layer of CARE-GNN.""" def __init__( self, in_dim, out_dim, num_classes, edges, activation=None, step_size=0.02, ): super(CAREConv, self).__init__() self.activation = activation self.step_size = step_size self.in_dim = in_dim self.out_dim = out_dim self.num_classes = num_classes self.edges = edges self.linear = nn.Linear(self.in_dim, self.out_dim) self.MLP = nn.Linear(self.in_dim, self.num_classes) self.p = {} self.last_avg_dist = {} self.f = {} # indicate whether the RL converges self.cvg = {} for etype in edges: self.p[etype] = 0.5 self.last_avg_dist[etype] = 0 self.f[etype] = [] self.cvg[etype] = False def forward(self, g, feat): g.srcdata["h"] = feat # formula 8 hr = {} for etype in g.canonical_etypes: g.update_all(fn.copy_u("h", "m"), fn.mean("m", "hr"), etype=etype) hr[etype] = g.dstdata["hr"] if self.activation is not None: hr[etype] = self.activation(hr[etype]) # formula 9 using mean as inter-relation aggregator p_tensor = ( th.Tensor(list(self.p.values())).view(-1, 1, 1).to(feat.device) ) h_homo = th.sum(th.stack(list(hr.values())) * p_tensor, dim=0) h_homo += feat[: g.number_of_dst_nodes()] if self.activation is not None: h_homo = self.activation(h_homo) return self.linear(h_homo) class CAREGNN(nn.Module): def __init__( self, in_dim, num_classes, hid_dim=64, edges=None, num_layers=2, activation=None, step_size=0.02, ): super(CAREGNN, self).__init__() self.in_dim = in_dim self.hid_dim = hid_dim self.num_classes = num_classes self.edges = edges self.num_layers = num_layers self.activation = activation self.step_size = step_size self.layers = nn.ModuleList() if self.num_layers == 1: # Single layer self.layers.append( CAREConv( self.in_dim, self.num_classes, self.num_classes, self.edges, activation=self.activation, step_size=self.step_size, ) ) else: # Input layer self.layers.append( CAREConv( self.in_dim, self.hid_dim, self.num_classes, self.edges, activation=self.activation, step_size=self.step_size, ) ) # Hidden layers with n - 2 layers for i in range(self.num_layers - 2): self.layers.append( CAREConv( self.hid_dim, self.hid_dim, self.num_classes, self.edges, activation=self.activation, step_size=self.step_size, ) ) # Output layer self.layers.append( CAREConv( self.hid_dim, self.num_classes, self.num_classes, self.edges, activation=self.activation, step_size=self.step_size, ) ) def forward(self, blocks, feat): # formula 4 sim = th.tanh(self.layers[0].MLP(blocks[-1].dstdata["feature"].float())) # Forward of n layers of CARE-GNN for block, layer in zip(blocks, self.layers): feat = layer(block, feat) return feat, sim def RLModule(self, graph, epoch, idx, dists): for i, layer in enumerate(self.layers): for etype in self.edges: if not layer.cvg[etype]: # formula 5 eid = graph.in_edges(idx, form="eid", etype=etype) avg_dist = th.mean(dists[i][etype][eid]) # formula 6 if layer.last_avg_dist[etype] < avg_dist: layer.p[etype] -= self.step_size layer.f[etype].append(-1) # avoid overflow, follow the author's implement if layer.p[etype] < 0: layer.p[etype] = 0.001 else: layer.p[etype] += self.step_size layer.f[etype].append(+1) if layer.p[etype] > 1: layer.p[etype] = 0.999 layer.last_avg_dist[etype] = avg_dist # formula 7 if epoch >= 9 and abs(sum(layer.f[etype][-10:])) <= 2: layer.cvg[etype] = True ================================================ FILE: examples/pytorch/caregnn/utils.py ================================================ """ From GAT utils """ import torch class EarlyStopping: def __init__(self, patience=10): self.patience = patience self.counter = 0 self.best_score = None self.early_stop = False def step(self, acc, model): score = acc if self.best_score is None: self.best_score = score self.save_checkpoint(model) elif score < self.best_score: self.counter += 1 print( f"EarlyStopping counter: {self.counter} out of {self.patience}" ) if self.counter >= self.patience: self.early_stop = True else: self.best_score = score self.save_checkpoint(model) self.counter = 0 return self.early_stop def save_checkpoint(self, model): """Saves model when validation loss decrease.""" torch.save(model.state_dict(), "es_checkpoint.pt") ================================================ FILE: examples/pytorch/cluster_gcn/README.md ================================================ Cluster-GCN: An Efficient Algorithm for Training Deep and Large Graph Convolutional Networks ============ - Paper link: [Cluster-GCN: An Efficient Algorithm for Training Deep and Large Graph Convolutional Networks](https://arxiv.org/abs/1905.07953) - Author's code repo: [https://github.com/google-research/google-research/blob/master/cluster_gcn/](https://github.com/google-research/google-research/blob/master/cluster_gcn/). This repo reproduce the reported speed and performance maximally on Reddit and PPI. However, the diag enhancement is not covered, as the GraphSage aggregator already achieves satisfying F1 score. Dependencies ------------ - Python 3.7+(for string formatting features) - PyTorch 1.9.0+ - scikit-learn - TorchMetrics 0.11.4 ## Run Experiments ```bash python cluster_gcn.py ``` ================================================ FILE: examples/pytorch/cluster_gcn/cluster_gcn.py ================================================ import time import dgl import dgl.nn as dglnn import numpy as np import torch import torch.nn as nn import torch.nn.functional as F import torchmetrics.functional as MF from ogb.nodeproppred import DglNodePropPredDataset class SAGE(nn.Module): def __init__(self, in_feats, n_hidden, n_classes): super().__init__() self.layers = nn.ModuleList() self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, "mean")) self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, "mean")) self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, "mean")) self.dropout = nn.Dropout(0.5) def forward(self, sg, x): h = x for l, layer in enumerate(self.layers): h = layer(sg, h) if l != len(self.layers) - 1: h = F.relu(h) h = self.dropout(h) return h dataset = dgl.data.AsNodePredDataset(DglNodePropPredDataset("ogbn-products")) graph = dataset[ 0 ] # already prepares ndata['label'/'train_mask'/'val_mask'/'test_mask'] model = SAGE(graph.ndata["feat"].shape[1], 256, dataset.num_classes).cuda() opt = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4) num_partitions = 1000 sampler = dgl.dataloading.ClusterGCNSampler( graph, num_partitions, prefetch_ndata=["feat", "label", "train_mask", "val_mask", "test_mask"], ) # DataLoader for generic dataloading with a graph, a set of indices (any indices, like # partition IDs here), and a graph sampler. dataloader = dgl.dataloading.DataLoader( graph, torch.arange(num_partitions).to("cuda"), sampler, device="cuda", batch_size=100, shuffle=True, drop_last=False, num_workers=0, use_uva=True, ) durations = [] for epoch in range(10): t0 = time.time() model.train() for it, sg in enumerate(dataloader): x = sg.ndata["feat"] y = sg.ndata["label"] m = sg.ndata["train_mask"].bool() y_hat = model(sg, x) loss = F.cross_entropy(y_hat[m], y[m]) opt.zero_grad() loss.backward() opt.step() if it % 20 == 0: acc = MF.accuracy( y_hat[m], y[m], task="multiclass", num_classes=dataset.num_classes, ) mem = torch.cuda.max_memory_allocated() / 1000000 print("Loss", loss.item(), "Acc", acc.item(), "GPU Mem", mem, "MB") tt = time.time() - t0 print("Run time for epoch# %d: %.2fs" % (epoch, tt)) durations.append(tt) model.eval() with torch.no_grad(): val_preds, test_preds = [], [] val_labels, test_labels = [], [] for it, sg in enumerate(dataloader): x = sg.ndata["feat"] y = sg.ndata["label"] m_val = sg.ndata["val_mask"].bool() m_test = sg.ndata["test_mask"].bool() y_hat = model(sg, x) val_preds.append(y_hat[m_val]) val_labels.append(y[m_val]) test_preds.append(y_hat[m_test]) test_labels.append(y[m_test]) val_preds = torch.cat(val_preds, 0) val_labels = torch.cat(val_labels, 0) test_preds = torch.cat(test_preds, 0) test_labels = torch.cat(test_labels, 0) val_acc = MF.accuracy( val_preds, val_labels, task="multiclass", num_classes=dataset.num_classes, ) test_acc = MF.accuracy( test_preds, test_labels, task="multiclass", num_classes=dataset.num_classes, ) print("Validation acc:", val_acc.item(), "Test acc:", test_acc.item()) print( "Average run time for last %d epochs: %.2fs standard deviation: %.3f" % ((epoch - 3), np.mean(durations[4:]), np.std(durations[4:])) ) ================================================ FILE: examples/pytorch/compGCN/README.md ================================================ # DGL Implementation of the CompGCN Paper This DGL example implements the GNN model proposed in the paper [CompositionGCN](https://arxiv.org/abs/1911.03082). The author's codes of implementation is in [here](https://github.com/malllabiisc/CompGCN) Example implementor ---------------------- This example was implemented by [zhjwy9343](https://github.com/zhjwy9343) and [KounianhuaDu](https://github.com/KounianhuaDu) at the AWS Shanghai AI Lab. Dependencies ---------------------- - pytorch 1.9.0 - dgl 0.7.1 - numpy 1.20.3 - ordered_set 4.0.2 Dataset --------------------------------------- The datasets used for link predictions are FB15k-237 constructed from Freebase and WN18RR constructed from WordNet. The statistics are summarized as followings: **FB15k-237** - Nodes: 14541 - Relation types: 237 - Reversed relation types: 237 - Train: 272115 - Valid: 17535 - Test: 20466 **WN18RR** - Nodes: 40943 - Relation types: 11 - Reversed relation types: 11 - Train: 86835 - Valid: 3034 - Test: 3134 How to run -------------------------------- First to get the data, one can run ```python sh get_fb15k-237.sh ``` ```python sh get_wn18rr.sh ``` Then for FB15k-237, run ```python python main.py --score_func conve --opn ccorr --gpu 0 --data FB15k-237 ``` For WN18RR, run ```python python main.py --score_func conve --opn ccorr --gpu 0 --data wn18rr ``` Performance ------------------------- **Link Prediction Results** | Dataset | FB15k-237 | WN18RR | |---------| ------------------------ | ------------------------ | | Metric | Paper / ours (dgl) | Paper / ours (dgl) | | MRR | 0.355 / 0.348 | 0.479 / 0.466 | | MR | 197 / 208 | 3533 / 3542 | | Hit@10 | 0.535 / 0.527 | 0.546 / 0.525 | | Hit@3 | 0.390 / 0.380 | 0.494 / 0.476 | | Hit@1 | 0.264 / 0.259 | 0.443 / 0.435 | ================================================ FILE: examples/pytorch/compGCN/data_loader.py ================================================ from collections import defaultdict as ddict import dgl import numpy as np import torch from ordered_set import OrderedSet from torch.utils.data import DataLoader, Dataset class TrainDataset(Dataset): """ Training Dataset class. Parameters ---------- triples: The triples used for training the model num_ent: Number of entities in the knowledge graph lbl_smooth: Label smoothing Returns ------- A training Dataset class instance used by DataLoader """ def __init__(self, triples, num_ent, lbl_smooth): self.triples = triples self.num_ent = num_ent self.lbl_smooth = lbl_smooth self.entities = np.arange(self.num_ent, dtype=np.int32) def __len__(self): return len(self.triples) def __getitem__(self, idx): ele = self.triples[idx] triple, label = torch.LongTensor(ele["triple"]), np.int32(ele["label"]) trp_label = self.get_label(label) # label smoothing if self.lbl_smooth != 0.0: trp_label = (1.0 - self.lbl_smooth) * trp_label + ( 1.0 / self.num_ent ) return triple, trp_label @staticmethod def collate_fn(data): triples = [] labels = [] for triple, label in data: triples.append(triple) labels.append(label) triple = torch.stack(triples, dim=0) trp_label = torch.stack(labels, dim=0) return triple, trp_label # for edges that exist in the graph, the entry is 1.0, otherwise the entry is 0.0 def get_label(self, label): y = np.zeros([self.num_ent], dtype=np.float32) for e2 in label: y[e2] = 1.0 return torch.FloatTensor(y) class TestDataset(Dataset): """ Evaluation Dataset class. Parameters ---------- triples: The triples used for evaluating the model num_ent: Number of entities in the knowledge graph Returns ------- An evaluation Dataset class instance used by DataLoader for model evaluation """ def __init__(self, triples, num_ent): self.triples = triples self.num_ent = num_ent def __len__(self): return len(self.triples) def __getitem__(self, idx): ele = self.triples[idx] triple, label = torch.LongTensor(ele["triple"]), np.int32(ele["label"]) label = self.get_label(label) return triple, label @staticmethod def collate_fn(data): triples = [] labels = [] for triple, label in data: triples.append(triple) labels.append(label) triple = torch.stack(triples, dim=0) label = torch.stack(labels, dim=0) return triple, label # for edges that exist in the graph, the entry is 1.0, otherwise the entry is 0.0 def get_label(self, label): y = np.zeros([self.num_ent], dtype=np.float32) for e2 in label: y[e2] = 1.0 return torch.FloatTensor(y) class Data(object): def __init__(self, dataset, lbl_smooth, num_workers, batch_size): """ Reading in raw triples and converts it into a standard format. Parameters ---------- dataset: The name of the dataset lbl_smooth: Label smoothing num_workers: Number of workers of dataloaders batch_size: Batch size of dataloaders Returns ------- self.ent2id: Entity to unique identifier mapping self.rel2id: Relation to unique identifier mapping self.id2ent: Inverse mapping of self.ent2id self.id2rel: Inverse mapping of self.rel2id self.num_ent: Number of entities in the knowledge graph self.num_rel: Number of relations in the knowledge graph self.g: The dgl graph constucted from the edges in the traing set and all the entities in the knowledge graph self.data['train']: Stores the triples corresponding to training dataset self.data['valid']: Stores the triples corresponding to validation dataset self.data['test']: Stores the triples corresponding to test dataset self.data_iter: The dataloader for different data splits """ self.dataset = dataset self.lbl_smooth = lbl_smooth self.num_workers = num_workers self.batch_size = batch_size # read in raw data and get mappings ent_set, rel_set = OrderedSet(), OrderedSet() for split in ["train", "test", "valid"]: for line in open("./{}/{}.txt".format(self.dataset, split)): sub, rel, obj = map(str.lower, line.strip().split("\t")) ent_set.add(sub) rel_set.add(rel) ent_set.add(obj) self.ent2id = {ent: idx for idx, ent in enumerate(ent_set)} self.rel2id = {rel: idx for idx, rel in enumerate(rel_set)} self.rel2id.update( { rel + "_reverse": idx + len(self.rel2id) for idx, rel in enumerate(rel_set) } ) self.id2ent = {idx: ent for ent, idx in self.ent2id.items()} self.id2rel = {idx: rel for rel, idx in self.rel2id.items()} self.num_ent = len(self.ent2id) self.num_rel = len(self.rel2id) // 2 # read in ids of subjects, relations, and objects for train/test/valid self.data = ddict(list) # stores the triples sr2o = ddict( set ) # The key of sr20 is (subject, relation), and the items are all the successors following (subject, relation) src = [] dst = [] rels = [] inver_src = [] inver_dst = [] inver_rels = [] for split in ["train", "test", "valid"]: for line in open("./{}/{}.txt".format(self.dataset, split)): sub, rel, obj = map(str.lower, line.strip().split("\t")) sub_id, rel_id, obj_id = ( self.ent2id[sub], self.rel2id[rel], self.ent2id[obj], ) self.data[split].append((sub_id, rel_id, obj_id)) if split == "train": sr2o[(sub_id, rel_id)].add(obj_id) sr2o[(obj_id, rel_id + self.num_rel)].add( sub_id ) # append the reversed edges src.append(sub_id) dst.append(obj_id) rels.append(rel_id) inver_src.append(obj_id) inver_dst.append(sub_id) inver_rels.append(rel_id + self.num_rel) # construct dgl graph src = src + inver_src dst = dst + inver_dst rels = rels + inver_rels self.g = dgl.graph((src, dst), num_nodes=self.num_ent) self.g.edata["etype"] = torch.Tensor(rels).long() # identify in and out edges in_edges_mask = [True] * (self.g.num_edges() // 2) + [False] * ( self.g.num_edges() // 2 ) out_edges_mask = [False] * (self.g.num_edges() // 2) + [True] * ( self.g.num_edges() // 2 ) self.g.edata["in_edges_mask"] = torch.Tensor(in_edges_mask) self.g.edata["out_edges_mask"] = torch.Tensor(out_edges_mask) # Prepare train/valid/test data self.data = dict(self.data) self.sr2o = { k: list(v) for k, v in sr2o.items() } # store only the train data for split in ["test", "valid"]: for sub, rel, obj in self.data[split]: sr2o[(sub, rel)].add(obj) sr2o[(obj, rel + self.num_rel)].add(sub) self.sr2o_all = { k: list(v) for k, v in sr2o.items() } # store all the data self.triples = ddict(list) for (sub, rel), obj in self.sr2o.items(): self.triples["train"].append( {"triple": (sub, rel, -1), "label": self.sr2o[(sub, rel)]} ) for split in ["test", "valid"]: for sub, rel, obj in self.data[split]: rel_inv = rel + self.num_rel self.triples["{}_{}".format(split, "tail")].append( { "triple": (sub, rel, obj), "label": self.sr2o_all[(sub, rel)], } ) self.triples["{}_{}".format(split, "head")].append( { "triple": (obj, rel_inv, sub), "label": self.sr2o_all[(obj, rel_inv)], } ) self.triples = dict(self.triples) def get_train_data_loader(split, batch_size, shuffle=True): return DataLoader( TrainDataset( self.triples[split], self.num_ent, self.lbl_smooth ), batch_size=batch_size, shuffle=shuffle, num_workers=max(0, self.num_workers), collate_fn=TrainDataset.collate_fn, ) def get_test_data_loader(split, batch_size, shuffle=True): return DataLoader( TestDataset(self.triples[split], self.num_ent), batch_size=batch_size, shuffle=shuffle, num_workers=max(0, self.num_workers), collate_fn=TestDataset.collate_fn, ) # train/valid/test dataloaders self.data_iter = { "train": get_train_data_loader("train", self.batch_size), "valid_head": get_test_data_loader("valid_head", self.batch_size), "valid_tail": get_test_data_loader("valid_tail", self.batch_size), "test_head": get_test_data_loader("test_head", self.batch_size), "test_tail": get_test_data_loader("test_tail", self.batch_size), } ================================================ FILE: examples/pytorch/compGCN/get_fb15k-237.sh ================================================ wget https://dgl-data.s3.cn-north-1.amazonaws.com.cn/dataset/FB15k-237.zip unzip FB15k-237.zip ================================================ FILE: examples/pytorch/compGCN/get_wn18rr.sh ================================================ wget https://dgl-data.s3.cn-north-1.amazonaws.com.cn/dataset/wn18rr.zip unzip wn18rr.zip ================================================ FILE: examples/pytorch/compGCN/main.py ================================================ import argparse from time import time import numpy as np import torch as th import torch.optim as optim from data_loader import Data from models import CompGCN_ConvE from utils import in_out_norm # predict the tail for (head, rel, -1) or head for (-1, rel, tail) def predict(model, graph, device, data_iter, split="valid", mode="tail"): model.eval() with th.no_grad(): results = {} train_iter = iter(data_iter["{}_{}".format(split, mode)]) for step, batch in enumerate(train_iter): triple, label = batch[0].to(device), batch[1].to(device) sub, rel, obj, label = ( triple[:, 0], triple[:, 1], triple[:, 2], label, ) pred = model(graph, sub, rel) b_range = th.arange(pred.size()[0], device=device) target_pred = pred[b_range, obj] pred = th.where(label.bool(), -th.ones_like(pred) * 10000000, pred) pred[b_range, obj] = target_pred # compute metrics ranks = ( 1 + th.argsort( th.argsort(pred, dim=1, descending=True), dim=1, descending=False, )[b_range, obj] ) ranks = ranks.float() results["count"] = th.numel(ranks) + results.get("count", 0.0) results["mr"] = th.sum(ranks).item() + results.get("mr", 0.0) results["mrr"] = th.sum(1.0 / ranks).item() + results.get( "mrr", 0.0 ) for k in [1, 3, 10]: results["hits@{}".format(k)] = th.numel( ranks[ranks <= (k)] ) + results.get("hits@{}".format(k), 0.0) return results # evaluation function, evaluate the head and tail prediction and then combine the results def evaluate(model, graph, device, data_iter, split="valid"): # predict for head and tail left_results = predict(model, graph, device, data_iter, split, mode="tail") right_results = predict(model, graph, device, data_iter, split, mode="head") results = {} count = float(left_results["count"]) # combine the head and tail prediction results # Metrics: MRR, MR, and Hit@k results["left_mr"] = round(left_results["mr"] / count, 5) results["left_mrr"] = round(left_results["mrr"] / count, 5) results["right_mr"] = round(right_results["mr"] / count, 5) results["right_mrr"] = round(right_results["mrr"] / count, 5) results["mr"] = round( (left_results["mr"] + right_results["mr"]) / (2 * count), 5 ) results["mrr"] = round( (left_results["mrr"] + right_results["mrr"]) / (2 * count), 5 ) for k in [1, 3, 10]: results["left_hits@{}".format(k)] = round( left_results["hits@{}".format(k)] / count, 5 ) results["right_hits@{}".format(k)] = round( right_results["hits@{}".format(k)] / count, 5 ) results["hits@{}".format(k)] = round( ( left_results["hits@{}".format(k)] + right_results["hits@{}".format(k)] ) / (2 * count), 5, ) return results def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # # check cuda if args.gpu >= 0 and th.cuda.is_available(): device = "cuda:{}".format(args.gpu) else: device = "cpu" # construct graph, split in/out edges and prepare train/validation/test data_loader data = Data( args.dataset, args.lbl_smooth, args.num_workers, args.batch_size ) data_iter = data.data_iter # train/validation/test data_loader graph = data.g.to(device) num_rel = th.max(graph.edata["etype"]).item() + 1 # Compute in/out edge norms and store in edata graph = in_out_norm(graph) # Step 2: Create model =================================================================== # compgcn_model = CompGCN_ConvE( num_bases=args.num_bases, num_rel=num_rel, num_ent=graph.num_nodes(), in_dim=args.init_dim, layer_size=args.layer_size, comp_fn=args.opn, batchnorm=True, dropout=args.dropout, layer_dropout=args.layer_dropout, num_filt=args.num_filt, hid_drop=args.hid_drop, feat_drop=args.feat_drop, ker_sz=args.ker_sz, k_w=args.k_w, k_h=args.k_h, ) compgcn_model = compgcn_model.to(device) # Step 3: Create training components ===================================================== # loss_fn = th.nn.BCELoss() optimizer = optim.Adam( compgcn_model.parameters(), lr=args.lr, weight_decay=args.l2 ) # Step 4: training epoches =============================================================== # best_mrr = 0.0 kill_cnt = 0 for epoch in range(args.max_epochs): # Training and validation using a full graph compgcn_model.train() train_loss = [] t0 = time() for step, batch in enumerate(data_iter["train"]): triple, label = batch[0].to(device), batch[1].to(device) sub, rel, obj, label = ( triple[:, 0], triple[:, 1], triple[:, 2], label, ) logits = compgcn_model(graph, sub, rel) # compute loss tr_loss = loss_fn(logits, label) train_loss.append(tr_loss.item()) # backward optimizer.zero_grad() tr_loss.backward() optimizer.step() train_loss = np.sum(train_loss) t1 = time() val_results = evaluate( compgcn_model, graph, device, data_iter, split="valid" ) t2 = time() # validate if val_results["mrr"] > best_mrr: best_mrr = val_results["mrr"] th.save( compgcn_model.state_dict(), "comp_link" + "_" + args.dataset ) kill_cnt = 0 print("saving model...") else: kill_cnt += 1 if kill_cnt > 100: print("early stop.") break print( "In epoch {}, Train Loss: {:.4f}, Valid MRR: {:.5}, Train time: {}, Valid time: {}".format( epoch, train_loss, val_results["mrr"], t1 - t0, t2 - t1 ) ) # test use the best model compgcn_model.eval() compgcn_model.load_state_dict(th.load("comp_link" + "_" + args.dataset)) test_results = evaluate( compgcn_model, graph, device, data_iter, split="test" ) print( "Test MRR: {:.5}\n, MR: {:.10}\n, H@10: {:.5}\n, H@3: {:.5}\n, H@1: {:.5}\n".format( test_results["mrr"], test_results["mr"], test_results["hits@10"], test_results["hits@3"], test_results["hits@1"], ) ) if __name__ == "__main__": parser = argparse.ArgumentParser( description="Parser For Arguments", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( "--data", dest="dataset", default="FB15k-237", help="Dataset to use, default: FB15k-237", ) parser.add_argument( "--model", dest="model", default="compgcn", help="Model Name" ) parser.add_argument( "--score_func", dest="score_func", default="conve", help="Score Function for Link prediction", ) parser.add_argument( "--opn", dest="opn", default="ccorr", help="Composition Operation to be used in CompGCN", ) parser.add_argument( "--batch", dest="batch_size", default=1024, type=int, help="Batch size" ) parser.add_argument( "--gpu", type=int, default="0", help="Set GPU Ids : Eg: For CPU = -1, For Single GPU = 0", ) parser.add_argument( "--epoch", dest="max_epochs", type=int, default=500, help="Number of epochs", ) parser.add_argument( "--l2", type=float, default=0.0, help="L2 Regularization for Optimizer" ) parser.add_argument( "--lr", type=float, default=0.001, help="Starting Learning Rate" ) parser.add_argument( "--lbl_smooth", dest="lbl_smooth", type=float, default=0.1, help="Label Smoothing", ) parser.add_argument( "--num_workers", type=int, default=10, help="Number of processes to construct batches", ) parser.add_argument( "--seed", dest="seed", default=41504, type=int, help="Seed for randomization", ) parser.add_argument( "--num_bases", dest="num_bases", default=-1, type=int, help="Number of basis relation vectors to use", ) parser.add_argument( "--init_dim", dest="init_dim", default=100, type=int, help="Initial dimension size for entities and relations", ) parser.add_argument( "--layer_size", nargs="?", default="[200]", help="List of output size for each compGCN layer", ) parser.add_argument( "--gcn_drop", dest="dropout", default=0.1, type=float, help="Dropout to use in GCN Layer", ) parser.add_argument( "--layer_dropout", nargs="?", default="[0.3]", help="List of dropout value after each compGCN layer", ) # ConvE specific hyperparameters parser.add_argument( "--hid_drop", dest="hid_drop", default=0.3, type=float, help="ConvE: Hidden dropout", ) parser.add_argument( "--feat_drop", dest="feat_drop", default=0.3, type=float, help="ConvE: Feature Dropout", ) parser.add_argument( "--k_w", dest="k_w", default=10, type=int, help="ConvE: k_w" ) parser.add_argument( "--k_h", dest="k_h", default=20, type=int, help="ConvE: k_h" ) parser.add_argument( "--num_filt", dest="num_filt", default=200, type=int, help="ConvE: Number of filters in convolution", ) parser.add_argument( "--ker_sz", dest="ker_sz", default=7, type=int, help="ConvE: Kernel size to use", ) args = parser.parse_args() np.random.seed(args.seed) th.manual_seed(args.seed) print(args) args.layer_size = eval(args.layer_size) args.layer_dropout = eval(args.layer_dropout) main(args) ================================================ FILE: examples/pytorch/compGCN/models.py ================================================ import dgl import dgl.function as fn import torch as th import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from utils import ccorr class CompGraphConv(nn.Module): """One layer of CompGCN.""" def __init__( self, in_dim, out_dim, comp_fn="sub", batchnorm=True, dropout=0.1 ): super(CompGraphConv, self).__init__() self.in_dim = in_dim self.out_dim = out_dim self.comp_fn = comp_fn self.actvation = th.tanh self.batchnorm = batchnorm # define dropout layer self.dropout = nn.Dropout(dropout) # define batch norm layer if self.batchnorm: self.bn = nn.BatchNorm1d(out_dim) # define in/out/loop transform layer self.W_O = nn.Linear(self.in_dim, self.out_dim) self.W_I = nn.Linear(self.in_dim, self.out_dim) self.W_S = nn.Linear(self.in_dim, self.out_dim) # define relation transform layer self.W_R = nn.Linear(self.in_dim, self.out_dim) # self loop embedding self.loop_rel = nn.Parameter(th.Tensor(1, self.in_dim)) nn.init.xavier_normal_(self.loop_rel) def forward(self, g, n_in_feats, r_feats): with g.local_scope(): # Assign values to source nodes. In a homogeneous graph, this is equal to # assigning them to all nodes. g.srcdata["h"] = n_in_feats # append loop_rel embedding to r_feats r_feats = th.cat((r_feats, self.loop_rel), 0) # Assign features to all edges with the corresponding relation embeddings g.edata["h"] = r_feats[g.edata["etype"]] * g.edata["norm"] # Compute composition function in 4 steps # Step 1: compute composition by edge in the edge direction, and store results in edges. if self.comp_fn == "sub": g.apply_edges(fn.u_sub_e("h", "h", out="comp_h")) elif self.comp_fn == "mul": g.apply_edges(fn.u_mul_e("h", "h", out="comp_h")) elif self.comp_fn == "ccorr": g.apply_edges( lambda edges: { "comp_h": ccorr(edges.src["h"], edges.data["h"]) } ) else: raise Exception("Only supports sub, mul, and ccorr") # Step 2: use extracted edge direction to compute in and out edges comp_h = g.edata["comp_h"] in_edges_idx = th.nonzero( g.edata["in_edges_mask"], as_tuple=False ).squeeze() out_edges_idx = th.nonzero( g.edata["out_edges_mask"], as_tuple=False ).squeeze() comp_h_O = self.W_O(comp_h[out_edges_idx]) comp_h_I = self.W_I(comp_h[in_edges_idx]) new_comp_h = th.zeros(comp_h.shape[0], self.out_dim).to( comp_h.device ) new_comp_h[out_edges_idx] = comp_h_O new_comp_h[in_edges_idx] = comp_h_I g.edata["new_comp_h"] = new_comp_h # Step 3: sum comp results to both src and dst nodes g.update_all(fn.copy_e("new_comp_h", "m"), fn.sum("m", "comp_edge")) # Step 4: add results of self-loop if self.comp_fn == "sub": comp_h_s = n_in_feats - r_feats[-1] elif self.comp_fn == "mul": comp_h_s = n_in_feats * r_feats[-1] elif self.comp_fn == "ccorr": comp_h_s = ccorr(n_in_feats, r_feats[-1]) else: raise Exception("Only supports sub, mul, and ccorr") # Sum all of the comp results as output of nodes and dropout n_out_feats = ( self.W_S(comp_h_s) + self.dropout(g.ndata["comp_edge"]) ) * (1 / 3) # Compute relation output r_out_feats = self.W_R(r_feats) # Batch norm if self.batchnorm: n_out_feats = self.bn(n_out_feats) # Activation function if self.actvation is not None: n_out_feats = self.actvation(n_out_feats) return n_out_feats, r_out_feats[:-1] class CompGCN(nn.Module): def __init__( self, num_bases, num_rel, num_ent, in_dim=100, layer_size=[200], comp_fn="sub", batchnorm=True, dropout=0.1, layer_dropout=[0.3], ): super(CompGCN, self).__init__() self.num_bases = num_bases self.num_rel = num_rel self.num_ent = num_ent self.in_dim = in_dim self.layer_size = layer_size self.comp_fn = comp_fn self.batchnorm = batchnorm self.dropout = dropout self.layer_dropout = layer_dropout self.num_layer = len(layer_size) # CompGCN layers self.layers = nn.ModuleList() self.layers.append( CompGraphConv( self.in_dim, self.layer_size[0], comp_fn=self.comp_fn, batchnorm=self.batchnorm, dropout=self.dropout, ) ) for i in range(self.num_layer - 1): self.layers.append( CompGraphConv( self.layer_size[i], self.layer_size[i + 1], comp_fn=self.comp_fn, batchnorm=self.batchnorm, dropout=self.dropout, ) ) # Initial relation embeddings if self.num_bases > 0: self.basis = nn.Parameter(th.Tensor(self.num_bases, self.in_dim)) self.weights = nn.Parameter(th.Tensor(self.num_rel, self.num_bases)) nn.init.xavier_normal_(self.basis) nn.init.xavier_normal_(self.weights) else: self.rel_embds = nn.Parameter(th.Tensor(self.num_rel, self.in_dim)) nn.init.xavier_normal_(self.rel_embds) # Node embeddings self.n_embds = nn.Parameter(th.Tensor(self.num_ent, self.in_dim)) nn.init.xavier_normal_(self.n_embds) # Dropout after compGCN layers self.dropouts = nn.ModuleList() for i in range(self.num_layer): self.dropouts.append(nn.Dropout(self.layer_dropout[i])) def forward(self, graph): # node and relation features n_feats = self.n_embds if self.num_bases > 0: r_embds = th.mm(self.weights, self.basis) r_feats = r_embds else: r_feats = self.rel_embds for layer, dropout in zip(self.layers, self.dropouts): n_feats, r_feats = layer(graph, n_feats, r_feats) n_feats = dropout(n_feats) return n_feats, r_feats # Use convE as the score function class CompGCN_ConvE(nn.Module): def __init__( self, num_bases, num_rel, num_ent, in_dim, layer_size, comp_fn="sub", batchnorm=True, dropout=0.1, layer_dropout=[0.3], num_filt=200, hid_drop=0.3, feat_drop=0.3, ker_sz=5, k_w=5, k_h=5, ): super(CompGCN_ConvE, self).__init__() self.embed_dim = layer_size[-1] self.hid_drop = hid_drop self.feat_drop = feat_drop self.ker_sz = ker_sz self.k_w = k_w self.k_h = k_h self.num_filt = num_filt # compGCN model to get sub/rel embs self.compGCN_Model = CompGCN( num_bases, num_rel, num_ent, in_dim, layer_size, comp_fn, batchnorm, dropout, layer_dropout, ) # batchnorms to the combined (sub+rel) emb self.bn0 = th.nn.BatchNorm2d(1) self.bn1 = th.nn.BatchNorm2d(self.num_filt) self.bn2 = th.nn.BatchNorm1d(self.embed_dim) # dropouts and conv module to the combined (sub+rel) emb self.hidden_drop = th.nn.Dropout(self.hid_drop) self.feature_drop = th.nn.Dropout(self.feat_drop) self.m_conv1 = th.nn.Conv2d( 1, out_channels=self.num_filt, kernel_size=(self.ker_sz, self.ker_sz), stride=1, padding=0, bias=False, ) flat_sz_h = int(2 * self.k_w) - self.ker_sz + 1 flat_sz_w = self.k_h - self.ker_sz + 1 self.flat_sz = flat_sz_h * flat_sz_w * self.num_filt self.fc = th.nn.Linear(self.flat_sz, self.embed_dim) # bias to the score self.bias = nn.Parameter(th.zeros(num_ent)) # combine entity embeddings and relation embeddings def concat(self, e1_embed, rel_embed): e1_embed = e1_embed.view(-1, 1, self.embed_dim) rel_embed = rel_embed.view(-1, 1, self.embed_dim) stack_inp = th.cat([e1_embed, rel_embed], 1) stack_inp = th.transpose(stack_inp, 2, 1).reshape( (-1, 1, 2 * self.k_w, self.k_h) ) return stack_inp def forward(self, graph, sub, rel): # get sub_emb and rel_emb via compGCN n_feats, r_feats = self.compGCN_Model(graph) sub_emb = n_feats[sub, :] rel_emb = r_feats[rel, :] # combine the sub_emb and rel_emb stk_inp = self.concat(sub_emb, rel_emb) # use convE to score the combined emb x = self.bn0(stk_inp) x = self.m_conv1(x) x = self.bn1(x) x = F.relu(x) x = self.feature_drop(x) x = x.view(-1, self.flat_sz) x = self.fc(x) x = self.hidden_drop(x) x = self.bn2(x) x = F.relu(x) # compute score x = th.mm(x, n_feats.transpose(1, 0)) # add in bias x += self.bias.expand_as(x) score = th.sigmoid(x) return score ================================================ FILE: examples/pytorch/compGCN/utils.py ================================================ # This file is based on the CompGCN author's implementation # . # It implements the operation of circular convolution in the ccorr function and an additional in_out_norm function for norm computation. import dgl import torch as th def com_mult(a, b): r1, i1 = a[..., 0], a[..., 1] r2, i2 = b[..., 0], b[..., 1] return th.stack([r1 * r2 - i1 * i2, r1 * i2 + i1 * r2], dim=-1) def conj(a): a[..., 1] = -a[..., 1] return a def ccorr(a, b): """ Compute circular correlation of two tensors. Parameters ---------- a: Tensor, 1D or 2D b: Tensor, 1D or 2D Notes ----- Input a and b should have the same dimensions. And this operation supports broadcasting. Returns ------- Tensor, having the same dimension as the input a. """ return th.fft.irfftn( th.conj(th.fft.rfftn(a, (-1))) * th.fft.rfftn(b, (-1)), (-1) ) # identify in/out edges, compute edge norm for each and store in edata def in_out_norm(graph): src, dst, EID = graph.edges(form="all") graph.edata["norm"] = th.ones(EID.shape[0]).to(graph.device) in_edges_idx = th.nonzero( graph.edata["in_edges_mask"], as_tuple=False ).squeeze() out_edges_idx = th.nonzero( graph.edata["out_edges_mask"], as_tuple=False ).squeeze() for idx in [in_edges_idx, out_edges_idx]: u, v = src[idx], dst[idx] deg = th.zeros(graph.num_nodes()).to(graph.device) n_idx, inverse_index, count = th.unique( v, return_inverse=True, return_counts=True ) deg[n_idx] = count.float() deg_inv = deg.pow(-0.5) # D^{-0.5} deg_inv[deg_inv == float("inf")] = 0 norm = deg_inv[u] * deg_inv[v] graph.edata["norm"][idx] = norm graph.edata["norm"] = graph.edata["norm"].unsqueeze(1) return graph ================================================ FILE: examples/pytorch/correct_and_smooth/README.md ================================================ # DGL Implementation of CorrectAndSmooth This DGL example implements the GNN model proposed in the paper [Combining Label Propagation and Simple Models Out-performs Graph Neural Networks](https://arxiv.org/abs/2010.13993). For the original implementation, see [here](https://github.com/CUAI/CorrectAndSmooth). Contributor: [xnuohz](https://github.com/xnuohz) ### Requirements The codebase is implemented in Python 3.7. For version requirement of packages, see below. ``` dgl 0.6.0.post1 torch 1.7.0 ogb 1.3.0 ``` ### Limitations Spectral and Diffusion Embeddings used by the authors for feature augmentation are not currently implemented. Without these feature augmentations only the "Plain" (without feature augmentations) results from the authors can be replicated. ### The graph datasets used in this example Open Graph Benchmark(OGB). Dataset summary: | Dataset | #Nodes | #Edges | #Node Feats | Metric | | :-----------: | :-------: | :--------: | :---------: | :------: | | ogbn-arxiv | 169,343 | 1,166,243 | 128 | Accuracy | | ogbn-products | 2,449,029 | 61,859,140 | 100 | Accuracy | ### Usage Training a **Base predictor** and using **Correct&Smooth** which follows the original hyperparameters on different datasets. ##### ogbn-arxiv * **Plain MLP + C&S** ```bash python main.py --dropout 0.5 python main.py --pretrain --correction-adj DA --smoothing-adj AD --autoscale ``` * **Plain Linear + C&S** ```bash python main.py --model linear --dropout 0.5 --epochs 1000 python main.py --model linear --pretrain --correction-alpha 0.87 --smoothing-alpha 0.81 --correction-adj AD --autoscale ``` ##### ogbn-products * **Plain Linear + C&S** ```bash python main.py --dataset ogbn-products --model linear --dropout 0.5 --epochs 1000 --lr 0.1 python main.py --dataset ogbn-products --model linear --pretrain --correction-alpha 1. --smoothing-alpha 0.9 ``` ### Performance #### ogbn-arxiv | | Linear | Plain Linear + C&S | | :-------------: | :----: | :----------: | | Results(Author) | 52.5 | 71.26 | | Results(DGL) | 52.48 | 71.26 | #### ogbn-products | | Plain Linear | Plain Linear + C&S | | :-------------: | :----: | :----------: | | Results(Author) | 47.67 | 82.34 | | Results(DGL) | 47.65 | 82.86 | ### Speed | ogb-arxiv | Time | GPU Memory | Params | | :------------------: | :-----------: | :--------: | :-----: | | Author, Plain Linear + C&S | 6.3 * 10 ^ -3 | 1,248M | 5,160 | | DGL, Plain Linear + C&S | 5.6 * 10 ^ -3 | 1,252M | 5,160 | ================================================ FILE: examples/pytorch/correct_and_smooth/main.py ================================================ import argparse import copy import os import dgl import torch import torch.nn.functional as F import torch.optim as optim from model import CorrectAndSmooth, MLP, MLPLinear from ogb.nodeproppred import DglNodePropPredDataset, Evaluator def evaluate(y_pred, y_true, idx, evaluator): return evaluator.eval({"y_true": y_true[idx], "y_pred": y_pred[idx]})["acc"] def main(): # check cuda device = ( f"cuda:{args.gpu}" if torch.cuda.is_available() and args.gpu >= 0 else "cpu" ) # load data dataset = DglNodePropPredDataset(name=args.dataset) evaluator = Evaluator(name=args.dataset) split_idx = dataset.get_idx_split() g, labels = dataset[ 0 ] # graph: DGLGraph object, label: torch tensor of shape (num_nodes, num_tasks) if args.dataset == "ogbn-arxiv": g = dgl.to_bidirected(g, copy_ndata=True) feat = g.ndata["feat"] feat = (feat - feat.mean(0)) / feat.std(0) g.ndata["feat"] = feat g = g.to(device) feats = g.ndata["feat"] labels = labels.to(device) # load masks for train / validation / test train_idx = split_idx["train"].to(device) valid_idx = split_idx["valid"].to(device) test_idx = split_idx["test"].to(device) n_features = feats.size()[-1] n_classes = dataset.num_classes # load model if args.model == "mlp": model = MLP( n_features, args.hid_dim, n_classes, args.num_layers, args.dropout ) elif args.model == "linear": model = MLPLinear(n_features, n_classes) else: raise NotImplementedError(f"Model {args.model} is not supported.") model = model.to(device) print(f"Model parameters: {sum(p.numel() for p in model.parameters())}") if args.pretrain: print("---------- Before ----------") model.load_state_dict( torch.load( f"base/{args.dataset}-{args.model}.pt", weights_only=False ) ) model.eval() y_soft = model(feats).exp() y_pred = y_soft.argmax(dim=-1, keepdim=True) valid_acc = evaluate(y_pred, labels, valid_idx, evaluator) test_acc = evaluate(y_pred, labels, test_idx, evaluator) print(f"Valid acc: {valid_acc:.4f} | Test acc: {test_acc:.4f}") print("---------- Correct & Smoothing ----------") cs = CorrectAndSmooth( num_correction_layers=args.num_correction_layers, correction_alpha=args.correction_alpha, correction_adj=args.correction_adj, num_smoothing_layers=args.num_smoothing_layers, smoothing_alpha=args.smoothing_alpha, smoothing_adj=args.smoothing_adj, autoscale=args.autoscale, scale=args.scale, ) y_soft = cs.correct(g, y_soft, labels[train_idx], train_idx) y_soft = cs.smooth(g, y_soft, labels[train_idx], train_idx) y_pred = y_soft.argmax(dim=-1, keepdim=True) valid_acc = evaluate(y_pred, labels, valid_idx, evaluator) test_acc = evaluate(y_pred, labels, test_idx, evaluator) print(f"Valid acc: {valid_acc:.4f} | Test acc: {test_acc:.4f}") else: opt = optim.Adam(model.parameters(), lr=args.lr) best_acc = 0 best_model = copy.deepcopy(model) # training print("---------- Training ----------") for i in range(args.epochs): model.train() opt.zero_grad() logits = model(feats) train_loss = F.nll_loss( logits[train_idx], labels.squeeze(1)[train_idx] ) train_loss.backward() opt.step() model.eval() with torch.no_grad(): logits = model(feats) y_pred = logits.argmax(dim=-1, keepdim=True) train_acc = evaluate(y_pred, labels, train_idx, evaluator) valid_acc = evaluate(y_pred, labels, valid_idx, evaluator) print( f"Epoch {i} | Train loss: {train_loss.item():.4f} | Train acc: {train_acc:.4f} | Valid acc {valid_acc:.4f}" ) if valid_acc > best_acc: best_acc = valid_acc best_model = copy.deepcopy(model) # testing & saving model print("---------- Testing ----------") best_model.eval() logits = best_model(feats) y_pred = logits.argmax(dim=-1, keepdim=True) test_acc = evaluate(y_pred, labels, test_idx, evaluator) print(f"Test acc: {test_acc:.4f}") if not os.path.exists("base"): os.makedirs("base") torch.save( best_model.state_dict(), f"base/{args.dataset}-{args.model}.pt" ) if __name__ == "__main__": """ Correct & Smoothing Hyperparameters """ parser = argparse.ArgumentParser(description="Base predictor(C&S)") # Dataset parser.add_argument("--gpu", type=int, default=0, help="-1 for cpu") parser.add_argument( "--dataset", type=str, default="ogbn-arxiv", choices=["ogbn-arxiv", "ogbn-products"], ) # Base predictor parser.add_argument( "--model", type=str, default="mlp", choices=["mlp", "linear"] ) parser.add_argument("--num-layers", type=int, default=3) parser.add_argument("--hid-dim", type=int, default=256) parser.add_argument("--dropout", type=float, default=0.4) parser.add_argument("--lr", type=float, default=0.01) parser.add_argument("--epochs", type=int, default=300) # extra options for gat parser.add_argument("--n-heads", type=int, default=3) parser.add_argument("--attn_drop", type=float, default=0.05) # C & S parser.add_argument( "--pretrain", action="store_true", help="Whether to perform C & S" ) parser.add_argument("--num-correction-layers", type=int, default=50) parser.add_argument("--correction-alpha", type=float, default=0.979) parser.add_argument("--correction-adj", type=str, default="DAD") parser.add_argument("--num-smoothing-layers", type=int, default=50) parser.add_argument("--smoothing-alpha", type=float, default=0.756) parser.add_argument("--smoothing-adj", type=str, default="DAD") parser.add_argument("--autoscale", action="store_true") parser.add_argument("--scale", type=float, default=20.0) args = parser.parse_args() print(args) main() ================================================ FILE: examples/pytorch/correct_and_smooth/model.py ================================================ import dgl.function as fn import torch import torch.nn as nn import torch.nn.functional as F class MLPLinear(nn.Module): def __init__(self, in_dim, out_dim): super(MLPLinear, self).__init__() self.linear = nn.Linear(in_dim, out_dim) self.reset_parameters() def reset_parameters(self): self.linear.reset_parameters() def forward(self, x): return F.log_softmax(self.linear(x), dim=-1) class MLP(nn.Module): def __init__(self, in_dim, hid_dim, out_dim, num_layers, dropout=0.0): super(MLP, self).__init__() assert num_layers >= 2 self.linears = nn.ModuleList() self.bns = nn.ModuleList() self.linears.append(nn.Linear(in_dim, hid_dim)) self.bns.append(nn.BatchNorm1d(hid_dim)) for _ in range(num_layers - 2): self.linears.append(nn.Linear(hid_dim, hid_dim)) self.bns.append(nn.BatchNorm1d(hid_dim)) self.linears.append(nn.Linear(hid_dim, out_dim)) self.dropout = dropout self.reset_parameters() def reset_parameters(self): for layer in self.linears: layer.reset_parameters() for layer in self.bns: layer.reset_parameters() def forward(self, x): for linear, bn in zip(self.linears[:-1], self.bns): x = linear(x) x = F.relu(x, inplace=True) x = bn(x) x = F.dropout(x, p=self.dropout, training=self.training) x = self.linears[-1](x) return F.log_softmax(x, dim=-1) class LabelPropagation(nn.Module): r""" Description ----------- Introduced in `Learning from Labeled and Unlabeled Data with Label Propagation `_ .. math:: \mathbf{Y}^{\prime} = \alpha \cdot \mathbf{D}^{-1/2} \mathbf{A} \mathbf{D}^{-1/2} \mathbf{Y} + (1 - \alpha) \mathbf{Y}, where unlabeled data is inferred by labeled data via propagation. Parameters ---------- num_layers: int The number of propagations. alpha: float The :math:`\alpha` coefficient. adj: str 'DAD': D^-0.5 * A * D^-0.5 'DA': D^-1 * A 'AD': A * D^-1 """ def __init__(self, num_layers, alpha, adj="DAD"): super(LabelPropagation, self).__init__() self.num_layers = num_layers self.alpha = alpha self.adj = adj @torch.no_grad() def forward( self, g, labels, mask=None, post_step=lambda y: y.clamp_(0.0, 1.0) ): with g.local_scope(): if labels.dtype == torch.long: labels = F.one_hot(labels.view(-1)).to(torch.float32) y = labels if mask is not None: y = torch.zeros_like(labels) y[mask] = labels[mask] last = (1 - self.alpha) * y degs = g.in_degrees().float().clamp(min=1) norm = ( torch.pow(degs, -0.5 if self.adj == "DAD" else -1) .to(labels.device) .unsqueeze(1) ) for _ in range(self.num_layers): # Assume the graphs to be undirected if self.adj in ["DAD", "AD"]: y = norm * y g.ndata["h"] = y g.update_all(fn.copy_u("h", "m"), fn.sum("m", "h")) y = self.alpha * g.ndata.pop("h") if self.adj in ["DAD", "DA"]: y = y * norm y = post_step(last + y) return y class CorrectAndSmooth(nn.Module): r""" Description ----------- Introduced in `Combining Label Propagation and Simple Models Out-performs Graph Neural Networks `_ Parameters ---------- num_correction_layers: int The number of correct propagations. correction_alpha: float The coefficient of correction. correction_adj: str 'DAD': D^-0.5 * A * D^-0.5 'DA': D^-1 * A 'AD': A * D^-1 num_smoothing_layers: int The number of smooth propagations. smoothing_alpha: float The coefficient of smoothing. smoothing_adj: str 'DAD': D^-0.5 * A * D^-0.5 'DA': D^-1 * A 'AD': A * D^-1 autoscale: bool, optional If set to True, will automatically determine the scaling factor :math:`\sigma`. Default is True. scale: float, optional The scaling factor :math:`\sigma`, in case :obj:`autoscale = False`. Default is 1. """ def __init__( self, num_correction_layers, correction_alpha, correction_adj, num_smoothing_layers, smoothing_alpha, smoothing_adj, autoscale=True, scale=1.0, ): super(CorrectAndSmooth, self).__init__() self.autoscale = autoscale self.scale = scale self.prop1 = LabelPropagation( num_correction_layers, correction_alpha, correction_adj ) self.prop2 = LabelPropagation( num_smoothing_layers, smoothing_alpha, smoothing_adj ) def correct(self, g, y_soft, y_true, mask): with g.local_scope(): assert abs(float(y_soft.sum()) / y_soft.size(0) - 1.0) < 1e-2 numel = ( int(mask.sum()) if mask.dtype == torch.bool else mask.size(0) ) assert y_true.size(0) == numel if y_true.dtype == torch.long: y_true = F.one_hot(y_true.view(-1), y_soft.size(-1)).to( y_soft.dtype ) error = torch.zeros_like(y_soft) error[mask] = y_true - y_soft[mask] if self.autoscale: smoothed_error = self.prop1( g, error, post_step=lambda x: x.clamp_(-1.0, 1.0) ) sigma = error[mask].abs().sum() / numel scale = sigma / smoothed_error.abs().sum(dim=1, keepdim=True) scale[scale.isinf() | (scale > 1000)] = 1.0 result = y_soft + scale * smoothed_error result[result.isnan()] = y_soft[result.isnan()] return result else: def fix_input(x): x[mask] = error[mask] return x smoothed_error = self.prop1(g, error, post_step=fix_input) result = y_soft + self.scale * smoothed_error result[result.isnan()] = y_soft[result.isnan()] return result def smooth(self, g, y_soft, y_true, mask): with g.local_scope(): numel = ( int(mask.sum()) if mask.dtype == torch.bool else mask.size(0) ) assert y_true.size(0) == numel if y_true.dtype == torch.long: y_true = F.one_hot(y_true.view(-1), y_soft.size(-1)).to( y_soft.dtype ) y_soft[mask] = y_true return self.prop2(g, y_soft) ================================================ FILE: examples/pytorch/dagnn/README.md ================================================ # DAGNN This DGL example implements the GNN model proposed in the paper [Towards Deeper Graph Neural Networks](https://arxiv.org/abs/2007.09296). Paper link: https://arxiv.org/abs/2007.09296 Author's code: https://github.com/divelab/DeeperGNN Contributor: Liu Tang ([@lt610](https://github.com/lt610)) ## Dependecies - Python 3.6.10 - PyTorch 1.4.0 - numpy 1.18.1 - dgl 0.5.3 - tqdm 4.44.1 ## Dataset The DGL's built-in Cora, Pubmed and Citeseer datasets. Dataset summary: | Dataset | #Nodes | #Edges | #Feats | #Classes | #Train Nodes | #Val Nodes | #Test Nodes | | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | | Citeseer | 3,327 | 9,228 | 3,703 | 6 | 120 | 500 | 1000 | | Cora | 2,708 | 10,556 | 1,433 | 7 | 140 | 500 | 1000 | | Pubmed | 19,717 | 88,651 | 500 | 3 | 60 | 500 | 1000 | ## Arguments ###### Dataset options ``` --dataset str The graph dataset name. Default is 'Cora'. ``` ###### GPU options ``` --gpu int GPU index. Default is -1, using CPU. ``` ###### Model options ``` --runs int Number of training runs. Default is 1 --epochs int Number of training epochs. Default is 1500. --early-stopping int Early stopping patience rounds. Default is 100. --lr float Adam optimizer learning rate. Default is 0.01. --lamb float L2 regularization coefficient. Default is 5e-3. --k int Number of propagation layers. Default is 10. --hid-dim int Hidden layer dimensionalities. Default is 64. --dropout float Dropout rate Default is 0.8 ``` ## Examples Train a model which follows the original hyperparameters on different datasets. ```bash # Cora: python main.py --dataset Cora --gpu 0 --runs 100 --lamb 0.005 --k 12 # Citeseer: python main.py --dataset Citeseer --gpu 0 --runs 100 --lamb 0.02 --k 16 # Pubmed: python main.py --dataset Pubmed --gpu 0 --runs 100 --lamb 0.005 --k 20 ``` ### Performance #### On Cora, Citeseer and Pubmed | Dataset | Cora | Citeseer | Pubmed | | :-: | :-: | :-: | :-: | | Accuracy Reported(100 runs) | 84.4 ± 0.5 | 73.3 ± 0.6 | 80.5 ± 0.5 | | Accuracy DGL(100 runs) | 84.3 ± 0.5 | 73.1 ± 0.9 | 80.5 ± 0.4 | ================================================ FILE: examples/pytorch/dagnn/main.py ================================================ import argparse import dgl.function as fn import numpy as np import torch from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset from torch import nn from torch.nn import functional as F, Parameter from tqdm import trange from utils import evaluate, generate_random_seeds, set_random_state class DAGNNConv(nn.Module): def __init__(self, in_dim, k): super(DAGNNConv, self).__init__() self.s = Parameter(torch.FloatTensor(in_dim, 1)) self.k = k self.reset_parameters() def reset_parameters(self): gain = nn.init.calculate_gain("sigmoid") nn.init.xavier_uniform_(self.s, gain=gain) def forward(self, graph, feats): with graph.local_scope(): results = [feats] degs = graph.in_degrees().float() norm = torch.pow(degs, -0.5) norm = norm.to(feats.device).unsqueeze(1) for _ in range(self.k): feats = feats * norm graph.ndata["h"] = feats graph.update_all(fn.copy_u("h", "m"), fn.sum("m", "h")) feats = graph.ndata["h"] feats = feats * norm results.append(feats) H = torch.stack(results, dim=1) S = F.sigmoid(torch.matmul(H, self.s)) S = S.permute(0, 2, 1) H = torch.matmul(S, H).squeeze() return H class MLPLayer(nn.Module): def __init__(self, in_dim, out_dim, bias=True, activation=None, dropout=0): super(MLPLayer, self).__init__() self.linear = nn.Linear(in_dim, out_dim, bias=bias) self.activation = activation self.dropout = nn.Dropout(dropout) self.reset_parameters() def reset_parameters(self): gain = 1.0 if self.activation is F.relu: gain = nn.init.calculate_gain("relu") nn.init.xavier_uniform_(self.linear.weight, gain=gain) if self.linear.bias is not None: nn.init.zeros_(self.linear.bias) def forward(self, feats): feats = self.dropout(feats) feats = self.linear(feats) if self.activation: feats = self.activation(feats) return feats class DAGNN(nn.Module): def __init__( self, k, in_dim, hid_dim, out_dim, bias=True, activation=F.relu, dropout=0, ): super(DAGNN, self).__init__() self.mlp = nn.ModuleList() self.mlp.append( MLPLayer( in_dim=in_dim, out_dim=hid_dim, bias=bias, activation=activation, dropout=dropout, ) ) self.mlp.append( MLPLayer( in_dim=hid_dim, out_dim=out_dim, bias=bias, activation=None, dropout=dropout, ) ) self.dagnn = DAGNNConv(in_dim=out_dim, k=k) def forward(self, graph, feats): for layer in self.mlp: feats = layer(feats) feats = self.dagnn(graph, feats) return feats def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # # Load from DGL dataset if args.dataset == "Cora": dataset = CoraGraphDataset() elif args.dataset == "Citeseer": dataset = CiteseerGraphDataset() elif args.dataset == "Pubmed": dataset = PubmedGraphDataset() else: raise ValueError("Dataset {} is invalid.".format(args.dataset)) graph = dataset[0] graph = graph.add_self_loop() # check cuda if args.gpu >= 0 and torch.cuda.is_available(): device = "cuda:{}".format(args.gpu) else: device = "cpu" # retrieve the number of classes n_classes = dataset.num_classes # retrieve labels of ground truth labels = graph.ndata.pop("label").to(device).long() # Extract node features feats = graph.ndata.pop("feat").to(device) n_features = feats.shape[-1] # retrieve masks for train/validation/test train_mask = graph.ndata.pop("train_mask") val_mask = graph.ndata.pop("val_mask") test_mask = graph.ndata.pop("test_mask") train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze().to(device) val_idx = torch.nonzero(val_mask, as_tuple=False).squeeze().to(device) test_idx = torch.nonzero(test_mask, as_tuple=False).squeeze().to(device) graph = graph.to(device) # Step 2: Create model =================================================================== # model = DAGNN( k=args.k, in_dim=n_features, hid_dim=args.hid_dim, out_dim=n_classes, dropout=args.dropout, ) model = model.to(device) # Step 3: Create training components ===================================================== # loss_fn = F.cross_entropy opt = torch.optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.lamb ) # Step 4: training epochs =============================================================== # loss = float("inf") best_acc = 0 no_improvement = 0 epochs = trange(args.epochs, desc="Accuracy & Loss") for _ in epochs: model.train() logits = model(graph, feats) # compute loss train_loss = loss_fn(logits[train_idx], labels[train_idx]) # backward opt.zero_grad() train_loss.backward() opt.step() ( train_loss, train_acc, valid_loss, valid_acc, test_loss, test_acc, ) = evaluate( model, graph, feats, labels, (train_idx, val_idx, test_idx) ) # Print out performance epochs.set_description( "Train Acc {:.4f} | Train Loss {:.4f} | Val Acc {:.4f} | Val loss {:.4f}".format( train_acc, train_loss.item(), valid_acc, valid_loss.item() ) ) if valid_loss > loss: no_improvement += 1 if no_improvement == args.early_stopping: print("Early stop.") break else: no_improvement = 0 loss = valid_loss best_acc = test_acc print("Test Acc {:.4f}".format(best_acc)) return best_acc if __name__ == "__main__": """ DAGNN Model Hyperparameters """ parser = argparse.ArgumentParser(description="DAGNN") # data source params parser.add_argument( "--dataset", type=str, default="Cora", choices=["Cora", "Citeseer", "Pubmed"], help="Name of dataset.", ) # cuda params parser.add_argument( "--gpu", type=int, default=-1, help="GPU index. Default: -1, using CPU." ) # training params parser.add_argument("--runs", type=int, default=1, help="Training runs.") parser.add_argument( "--epochs", type=int, default=1500, help="Training epochs." ) parser.add_argument( "--early-stopping", type=int, default=100, help="Patient epochs to wait before early stopping.", ) parser.add_argument("--lr", type=float, default=0.01, help="Learning rate.") parser.add_argument("--lamb", type=float, default=0.005, help="L2 reg.") # model params parser.add_argument( "--k", type=int, default=12, help="Number of propagation layers." ) parser.add_argument( "--hid-dim", type=int, default=64, help="Hidden layer dimensionalities." ) parser.add_argument("--dropout", type=float, default=0.8, help="dropout") args = parser.parse_args() print(args) acc_lists = [] random_seeds = generate_random_seeds(seed=1222, nums=args.runs) for run in range(args.runs): set_random_state(random_seeds[run]) acc_lists.append(main(args)) acc_lists = np.array(acc_lists) mean = np.around(np.mean(acc_lists, axis=0), decimals=4) std = np.around(np.std(acc_lists, axis=0), decimals=4) print("Total acc: ", acc_lists) print("mean", mean) print("std", std) ================================================ FILE: examples/pytorch/dagnn/utils.py ================================================ import random import numpy as np import torch from torch.nn import functional as F def evaluate(model, graph, feats, labels, idxs): model.eval() with torch.no_grad(): logits = model(graph, feats) results = () for idx in idxs: loss = F.cross_entropy(logits[idx], labels[idx]) acc = torch.sum( logits[idx].argmax(dim=1) == labels[idx] ).item() / len(idx) results += (loss, acc) return results def generate_random_seeds(seed, nums): random.seed(seed) return [random.randint(1, 999999999) for _ in range(nums)] def set_random_state(seed): random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed) torch.backends.cudnn.deterministic = True ================================================ FILE: examples/pytorch/deepergcn/README.md ================================================ # DGL Implementation of DeeperGCN This DGL example implements the GNN model proposed in the paper [DeeperGCN: All You Need to Train Deeper GCNs](https://arxiv.org/abs/2006.07739). For the original implementation, see [here](https://github.com/lightaime/deep_gcns_torch). Contributor: [xnuohz](https://github.com/xnuohz) ### Requirements The codebase is implemented in Python 3.7. For version requirement of packages, see below. ``` dgl 0.6.0.post1 torch 1.7.0 ogb 1.3.0 ``` ### The graph datasets used in this example Open Graph Benchmark(OGB). Dataset summary: ###### Graph Property Prediction | Dataset | #Graphs | #Node Feats | #Edge Feats | Metric | | :---------: | :-----: | :---------: | :---------: | :-----: | | ogbg-molhiv | 41,127 | 9 | 3 | ROC-AUC | ### Usage Train a model which follows the original hyperparameters on different datasets. ```bash # ogbg-molhiv python main.py --gpu 0 --learn-beta ``` ### Performance * Table 6: Numbers associated with "Table 6" are the ones from table 6 in the paper. * Author: Numbers associated with "Author" are the ones we got by running the original code. * DGL: Numbers associated with "DGL" are the ones we got by running the DGL example. | Dataset | ogbg-molhiv | | :--------------: | :---------: | | Results(Table 6) | 0.786 | | Results(Author) | 0.781 | | Results(DGL) | 0.778 | ### Speed | Dataset | ogbg-molhiv | | :-------------: | :---------: | | Results(Author) | 11.833 | | Results(DGL) | 8.965 | ================================================ FILE: examples/pytorch/deepergcn/layers.py ================================================ import dgl.function as fn import torch import torch.nn as nn import torch.nn.functional as F from dgl.nn.functional import edge_softmax from modules import MessageNorm, MLP from ogb.graphproppred.mol_encoder import BondEncoder class GENConv(nn.Module): r""" Description ----------- Generalized Message Aggregator was introduced in "DeeperGCN: All You Need to Train Deeper GCNs " Parameters ---------- in_dim: int Input size. out_dim: int Output size. aggregator: str Type of aggregation. Default is 'softmax'. beta: float A continuous variable called an inverse temperature. Default is 1.0. learn_beta: bool Whether beta is a learnable variable or not. Default is False. p: float Initial power for power mean aggregation. Default is 1.0. learn_p: bool Whether p is a learnable variable or not. Default is False. msg_norm: bool Whether message normalization is used. Default is False. learn_msg_scale: bool Whether s is a learnable scaling factor or not in message normalization. Default is False. mlp_layers: int The number of MLP layers. Default is 1. eps: float A small positive constant in message construction function. Default is 1e-7. """ def __init__( self, in_dim, out_dim, aggregator="softmax", beta=1.0, learn_beta=False, p=1.0, learn_p=False, msg_norm=False, learn_msg_scale=False, mlp_layers=1, eps=1e-7, ): super(GENConv, self).__init__() self.aggr = aggregator self.eps = eps channels = [in_dim] for _ in range(mlp_layers - 1): channels.append(in_dim * 2) channels.append(out_dim) self.mlp = MLP(channels) self.msg_norm = MessageNorm(learn_msg_scale) if msg_norm else None self.beta = ( nn.Parameter(torch.Tensor([beta]), requires_grad=True) if learn_beta and self.aggr == "softmax" else beta ) self.p = ( nn.Parameter(torch.Tensor([p]), requires_grad=True) if learn_p else p ) self.edge_encoder = BondEncoder(in_dim) def forward(self, g, node_feats, edge_feats): with g.local_scope(): # Node and edge feature size need to match. g.ndata["h"] = node_feats g.edata["h"] = self.edge_encoder(edge_feats) g.apply_edges(fn.u_add_e("h", "h", "m")) if self.aggr == "softmax": g.edata["m"] = F.relu(g.edata["m"]) + self.eps g.edata["a"] = edge_softmax(g, g.edata["m"] * self.beta) g.update_all( lambda edge: {"x": edge.data["m"] * edge.data["a"]}, fn.sum("x", "m"), ) elif self.aggr == "power": minv, maxv = 1e-7, 1e1 torch.clamp_(g.edata["m"], minv, maxv) g.update_all( lambda edge: {"x": torch.pow(edge.data["m"], self.p)}, fn.mean("x", "m"), ) torch.clamp_(g.ndata["m"], minv, maxv) g.ndata["m"] = torch.pow(g.ndata["m"], self.p) else: raise NotImplementedError( f"Aggregator {self.aggr} is not supported." ) if self.msg_norm is not None: g.ndata["m"] = self.msg_norm(node_feats, g.ndata["m"]) feats = node_feats + g.ndata["m"] return self.mlp(feats) ================================================ FILE: examples/pytorch/deepergcn/main.py ================================================ import argparse import copy import time import torch import torch.nn as nn import torch.optim as optim from models import DeeperGCN from ogb.graphproppred import collate_dgl, DglGraphPropPredDataset, Evaluator from torch.utils.data import DataLoader def train(model, device, data_loader, opt, loss_fn): model.train() train_loss = [] for g, labels in data_loader: g = g.to(device) labels = labels.to(torch.float32).to(device) logits = model(g, g.edata["feat"], g.ndata["feat"]) loss = loss_fn(logits, labels) train_loss.append(loss.item()) opt.zero_grad() loss.backward() opt.step() return sum(train_loss) / len(train_loss) @torch.no_grad() def test(model, device, data_loader, evaluator): model.eval() y_true, y_pred = [], [] for g, labels in data_loader: g = g.to(device) logits = model(g, g.edata["feat"], g.ndata["feat"]) y_true.append(labels.detach().cpu()) y_pred.append(logits.detach().cpu()) y_true = torch.cat(y_true, dim=0).numpy() y_pred = torch.cat(y_pred, dim=0).numpy() return evaluator.eval({"y_true": y_true, "y_pred": y_pred})["rocauc"] def main(): # check cuda device = ( f"cuda:{args.gpu}" if args.gpu >= 0 and torch.cuda.is_available() else "cpu" ) # load ogb dataset & evaluator dataset = DglGraphPropPredDataset(name="ogbg-molhiv") evaluator = Evaluator(name="ogbg-molhiv") g, _ = dataset[0] node_feat_dim = g.ndata["feat"].size()[-1] edge_feat_dim = g.edata["feat"].size()[-1] n_classes = dataset.num_tasks split_idx = dataset.get_idx_split() train_loader = DataLoader( dataset[split_idx["train"]], batch_size=args.batch_size, shuffle=True, collate_fn=collate_dgl, ) valid_loader = DataLoader( dataset[split_idx["valid"]], batch_size=args.batch_size, shuffle=False, collate_fn=collate_dgl, ) test_loader = DataLoader( dataset[split_idx["test"]], batch_size=args.batch_size, shuffle=False, collate_fn=collate_dgl, ) # load model model = DeeperGCN( node_feat_dim=node_feat_dim, edge_feat_dim=edge_feat_dim, hid_dim=args.hid_dim, out_dim=n_classes, num_layers=args.num_layers, dropout=args.dropout, learn_beta=args.learn_beta, ).to(device) print(model) opt = optim.Adam(model.parameters(), lr=args.lr) loss_fn = nn.BCEWithLogitsLoss() # training & validation & testing best_auc = 0 best_model = copy.deepcopy(model) times = [] print("---------- Training ----------") for i in range(args.epochs): t1 = time.time() train_loss = train(model, device, train_loader, opt, loss_fn) t2 = time.time() if i >= 5: times.append(t2 - t1) train_auc = test(model, device, train_loader, evaluator) valid_auc = test(model, device, valid_loader, evaluator) print( f"Epoch {i} | Train Loss: {train_loss:.4f} | Train Auc: {train_auc:.4f} | Valid Auc: {valid_auc:.4f}" ) if valid_auc > best_auc: best_auc = valid_auc best_model = copy.deepcopy(model) print("---------- Testing ----------") test_auc = test(best_model, device, test_loader, evaluator) print(f"Test Auc: {test_auc}") if len(times) > 0: print("Times/epoch: ", sum(times) / len(times)) if __name__ == "__main__": """ DeeperGCN Hyperparameters """ parser = argparse.ArgumentParser(description="DeeperGCN") # training parser.add_argument( "--gpu", type=int, default=-1, help="GPU index, -1 for CPU." ) parser.add_argument( "--epochs", type=int, default=300, help="Number of epochs to train." ) parser.add_argument("--lr", type=float, default=0.01, help="Learning rate.") parser.add_argument( "--dropout", type=float, default=0.2, help="Dropout rate." ) parser.add_argument( "--batch-size", type=int, default=2048, help="Batch size." ) # model parser.add_argument( "--num-layers", type=int, default=7, help="Number of GNN layers." ) parser.add_argument( "--hid-dim", type=int, default=256, help="Hidden channel size." ) # learnable parameters in aggr parser.add_argument("--learn-beta", action="store_true") args = parser.parse_args() print(args) main() ================================================ FILE: examples/pytorch/deepergcn/models.py ================================================ import dgl.function as fn import torch.nn as nn import torch.nn.functional as F from dgl.nn.pytorch.glob import AvgPooling from layers import GENConv from ogb.graphproppred.mol_encoder import AtomEncoder class DeeperGCN(nn.Module): r""" Description ----------- Introduced in "DeeperGCN: All You Need to Train Deeper GCNs " Parameters ---------- node_feat_dim: int Size of node feature. edge_feat_dim: int Size of edge feature. hid_dim: int Size of hidden representations. out_dim: int Size of output. num_layers: int Number of graph convolutional layers. dropout: float Dropout rate. Default is 0. beta: float A continuous variable called an inverse temperature. Default is 1.0. learn_beta: bool Whether beta is a learnable weight. Default is False. aggr: str Type of aggregation. Default is 'softmax'. mlp_layers: int Number of MLP layers in message normalization. Default is 1. """ def __init__( self, node_feat_dim, edge_feat_dim, hid_dim, out_dim, num_layers, dropout=0.0, beta=1.0, learn_beta=False, aggr="softmax", mlp_layers=1, ): super(DeeperGCN, self).__init__() self.num_layers = num_layers self.dropout = dropout self.gcns = nn.ModuleList() self.norms = nn.ModuleList() for _ in range(self.num_layers): conv = GENConv( in_dim=hid_dim, out_dim=hid_dim, aggregator=aggr, beta=beta, learn_beta=learn_beta, mlp_layers=mlp_layers, ) self.gcns.append(conv) self.norms.append(nn.BatchNorm1d(hid_dim, affine=True)) self.node_encoder = AtomEncoder(hid_dim) self.pooling = AvgPooling() self.output = nn.Linear(hid_dim, out_dim) def forward(self, g, edge_feats, node_feats=None): with g.local_scope(): hv = self.node_encoder(node_feats) he = edge_feats for layer in range(self.num_layers): hv1 = self.norms[layer](hv) hv1 = F.relu(hv1) hv1 = F.dropout(hv1, p=self.dropout, training=self.training) hv = self.gcns[layer](g, hv1, he) + hv h_g = self.pooling(g, hv) return self.output(h_g) ================================================ FILE: examples/pytorch/deepergcn/modules.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F class MLP(nn.Sequential): r""" Description ----------- From equation (5) in "DeeperGCN: All You Need to Train Deeper GCNs " """ def __init__(self, channels, act="relu", dropout=0.0, bias=True): layers = [] for i in range(1, len(channels)): layers.append(nn.Linear(channels[i - 1], channels[i], bias)) if i < len(channels) - 1: layers.append(nn.BatchNorm1d(channels[i], affine=True)) layers.append(nn.ReLU()) layers.append(nn.Dropout(dropout)) super(MLP, self).__init__(*layers) class MessageNorm(nn.Module): r""" Description ----------- Message normalization was introduced in "DeeperGCN: All You Need to Train Deeper GCNs " Parameters ---------- learn_scale: bool Whether s is a learnable scaling factor or not. Default is False. """ def __init__(self, learn_scale=False): super(MessageNorm, self).__init__() self.scale = nn.Parameter( torch.FloatTensor([1.0]), requires_grad=learn_scale ) def forward(self, feats, msg, p=2): msg = F.normalize(msg, p=2, dim=-1) feats_norm = feats.norm(p=p, dim=-1, keepdim=True) return msg * feats_norm * self.scale ================================================ FILE: examples/pytorch/deepwalk/README.md ================================================ # DeepWalk - Paper link: [here](https://arxiv.org/pdf/1403.6652.pdf) The example code was moved to examples/pytorch/ogb/deepwalk. ================================================ FILE: examples/pytorch/dgi/README.md ================================================ Deep Graph Infomax (DGI) ======================== - Paper link: [https://arxiv.org/abs/1809.10341](https://arxiv.org/abs/1809.10341) - Author's code repo (in Pytorch): [https://github.com/PetarV-/DGI](https://github.com/PetarV-/DGI) Dependencies ------------ - PyTorch 0.4.1+ - requests ```bash pip install torch requests ``` How to run ---------- Run with following: ```bash python3 train.py --dataset=cora --gpu=0 --self-loop ``` ```bash python3 train.py --dataset=citeseer --gpu=0 ``` ```bash python3 train.py --dataset=pubmed --gpu=0 ``` Results ------- * cora: ~81.6 (81.2-82.1) (paper: 82.3) * citeseer: ~69.4 (paper: 71.8) * pubmed: ~76.1 (paper: 76.8) ================================================ FILE: examples/pytorch/dgi/dgi.py ================================================ """ Deep Graph Infomax in DGL References ---------- Papers: https://arxiv.org/abs/1809.10341 Author's code: https://github.com/PetarV-/DGI """ import math import torch import torch.nn as nn from gcn import GCN class Encoder(nn.Module): def __init__(self, g, in_feats, n_hidden, n_layers, activation, dropout): super(Encoder, self).__init__() self.g = g self.conv = GCN( g, in_feats, n_hidden, n_hidden, n_layers, activation, dropout ) def forward(self, features, corrupt=False): if corrupt: perm = torch.randperm(self.g.num_nodes()) features = features[perm] features = self.conv(features) return features class Discriminator(nn.Module): def __init__(self, n_hidden): super(Discriminator, self).__init__() self.weight = nn.Parameter(torch.Tensor(n_hidden, n_hidden)) self.reset_parameters() def uniform(self, size, tensor): bound = 1.0 / math.sqrt(size) if tensor is not None: tensor.data.uniform_(-bound, bound) def reset_parameters(self): size = self.weight.size(0) self.uniform(size, self.weight) def forward(self, features, summary): features = torch.matmul(features, torch.matmul(self.weight, summary)) return features class DGI(nn.Module): def __init__(self, g, in_feats, n_hidden, n_layers, activation, dropout): super(DGI, self).__init__() self.encoder = Encoder( g, in_feats, n_hidden, n_layers, activation, dropout ) self.discriminator = Discriminator(n_hidden) self.loss = nn.BCEWithLogitsLoss() def forward(self, features): positive = self.encoder(features, corrupt=False) negative = self.encoder(features, corrupt=True) summary = torch.sigmoid(positive.mean(dim=0)) positive = self.discriminator(positive, summary) negative = self.discriminator(negative, summary) l1 = self.loss(positive, torch.ones_like(positive)) l2 = self.loss(negative, torch.zeros_like(negative)) return l1 + l2 class Classifier(nn.Module): def __init__(self, n_hidden, n_classes): super(Classifier, self).__init__() self.fc = nn.Linear(n_hidden, n_classes) self.reset_parameters() def reset_parameters(self): self.fc.reset_parameters() def forward(self, features): features = self.fc(features) return torch.log_softmax(features, dim=-1) ================================================ FILE: examples/pytorch/dgi/gcn.py ================================================ """ This code was copied from the GCN implementation in DGL examples. """ import torch import torch.nn as nn from dgl.nn.pytorch import GraphConv class GCN(nn.Module): def __init__( self, g, in_feats, n_hidden, n_classes, n_layers, activation, dropout ): super(GCN, self).__init__() self.g = g self.layers = nn.ModuleList() # input layer self.layers.append(GraphConv(in_feats, n_hidden, activation=activation)) # hidden layers for i in range(n_layers - 1): self.layers.append( GraphConv(n_hidden, n_hidden, activation=activation) ) # output layer self.layers.append(GraphConv(n_hidden, n_classes)) self.dropout = nn.Dropout(p=dropout) def forward(self, features): h = features for i, layer in enumerate(self.layers): if i != 0: h = self.dropout(h) h = layer(self.g, h) return h ================================================ FILE: examples/pytorch/dgi/train.py ================================================ import argparse, time import dgl import networkx as nx import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from dgi import Classifier, DGI from dgl import DGLGraph from dgl.data import load_data, register_data_args def evaluate(model, features, labels, mask): model.eval() with torch.no_grad(): logits = model(features) logits = logits[mask] labels = labels[mask] _, indices = torch.max(logits, dim=1) correct = torch.sum(indices == labels) return correct.item() * 1.0 / len(labels) def main(args): # load and preprocess dataset data = load_data(args) g = data[0] features = torch.FloatTensor(g.ndata["feat"]) labels = torch.LongTensor(g.ndata["label"]) if hasattr(torch, "BoolTensor"): train_mask = torch.BoolTensor(g.ndata["train_mask"]) val_mask = torch.BoolTensor(g.ndata["val_mask"]) test_mask = torch.BoolTensor(g.ndata["test_mask"]) else: train_mask = torch.ByteTensor(g.ndata["train_mask"]) val_mask = torch.ByteTensor(g.ndata["val_mask"]) test_mask = torch.ByteTensor(g.ndata["test_mask"]) in_feats = features.shape[1] n_classes = data.num_classes n_edges = g.num_edges() if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) features = features.cuda() labels = labels.cuda() train_mask = train_mask.cuda() val_mask = val_mask.cuda() test_mask = test_mask.cuda() # add self loop if args.self_loop: g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) n_edges = g.num_edges() if args.gpu >= 0: g = g.to(args.gpu) # create DGI model dgi = DGI( g, in_feats, args.n_hidden, args.n_layers, nn.PReLU(args.n_hidden), args.dropout, ) if cuda: dgi.cuda() dgi_optimizer = torch.optim.Adam( dgi.parameters(), lr=args.dgi_lr, weight_decay=args.weight_decay ) # train deep graph infomax cnt_wait = 0 best = 1e9 best_t = 0 mean = 0 for epoch in range(args.n_dgi_epochs): dgi.train() if epoch >= 3: t0 = time.time() dgi_optimizer.zero_grad() loss = dgi(features) loss.backward() dgi_optimizer.step() if loss < best: best = loss best_t = epoch cnt_wait = 0 torch.save(dgi.state_dict(), "best_dgi.pkl") else: cnt_wait += 1 if cnt_wait == args.patience: print("Early stopping!") break if epoch >= 3: mean = (mean * (epoch - 3) + (time.time() - t0)) / (epoch - 2) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | " "ETputs(KTEPS) {:.2f}".format( epoch, mean, loss.item(), n_edges / mean / 1000 ) ) # create classifier model classifier = Classifier(args.n_hidden, n_classes) if cuda: classifier.cuda() classifier_optimizer = torch.optim.Adam( classifier.parameters(), lr=args.classifier_lr, weight_decay=args.weight_decay, ) # train classifier print("Loading {}th epoch".format(best_t)) dgi.load_state_dict(torch.load("best_dgi.pkl", weights_only=False)) embeds = dgi.encoder(features, corrupt=False) embeds = embeds.detach() mean = 0 for epoch in range(args.n_classifier_epochs): classifier.train() if epoch >= 3: t0 = time.time() classifier_optimizer.zero_grad() preds = classifier(embeds) loss = F.nll_loss(preds[train_mask], labels[train_mask]) loss.backward() classifier_optimizer.step() if epoch >= 3: mean = (mean * (epoch - 3) + (time.time() - t0)) / (epoch - 2) acc = evaluate(classifier, embeds, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format( epoch, mean, loss.item(), acc, n_edges / mean / 1000, ) ) print() acc = evaluate(classifier, embeds, labels, test_mask) print("Test Accuracy {:.4f}".format(acc)) if __name__ == "__main__": parser = argparse.ArgumentParser(description="DGI") register_data_args(parser) parser.add_argument( "--dropout", type=float, default=0.0, help="dropout probability" ) parser.add_argument("--gpu", type=int, default=-1, help="gpu") parser.add_argument( "--dgi-lr", type=float, default=1e-3, help="dgi learning rate" ) parser.add_argument( "--classifier-lr", type=float, default=1e-2, help="classifier learning rate", ) parser.add_argument( "--n-dgi-epochs", type=int, default=300, help="number of training epochs", ) parser.add_argument( "--n-classifier-epochs", type=int, default=300, help="number of training epochs", ) parser.add_argument( "--n-hidden", type=int, default=512, help="number of hidden gcn units" ) parser.add_argument( "--n-layers", type=int, default=1, help="number of hidden gcn layers" ) parser.add_argument( "--weight-decay", type=float, default=0.0, help="Weight for L2 loss" ) parser.add_argument( "--patience", type=int, default=20, help="early stop patience condition" ) parser.add_argument( "--self-loop", action="store_true", help="graph self-loop (default=False)", ) parser.set_defaults(self_loop=False) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/pytorch/dgmg/README.md ================================================ # Learning Deep Generative Models of Graphs This is an implementation of [Learning Deep Generative Models of Graphs](https://arxiv.org/pdf/1803.03324.pdf) by Yujia Li, Oriol Vinyals, Chris Dyer, Razvan Pascanu, Peter Battaglia. For molecule generation, see [DGL-LifeSci](https://github.com/awslabs/dgl-lifesci/tree/master/examples/generative_models/dgmg). ## Dependencies - Python 3.5.2 - [Pytorch 0.4.1](https://pytorch.org/) - [Matplotlib 2.2.2](https://matplotlib.org/) ## Usage `python3 main.py` ## Performance 90% accuracy for cycles compared with 84% accuracy reported in the original paper. ## Speed On AWS p3.2x instance (w/ V100), one epoch takes ~526s. ## Acknowledgement We would like to thank Yujia Li for providing details on the implementation. ================================================ FILE: examples/pytorch/dgmg/configure.py ================================================ """We intend to make our reproduction as close as possible to the original paper. The configuration in the file is mostly from the description in the original paper and will be loaded when setting up.""" def dataset_based_configure(opts): if opts["dataset"] == "cycles": ds_configure = cycles_configure else: raise ValueError("Unsupported dataset: {}".format(opts["dataset"])) opts = {**opts, **ds_configure} return opts synthetic_dataset_configure = { "node_hidden_size": 16, "num_propagation_rounds": 2, "optimizer": "Adam", "nepochs": 25, "ds_size": 4000, "num_generated_samples": 10000, } cycles_configure = { **synthetic_dataset_configure, **{ "min_size": 10, "max_size": 20, "lr": 5e-4, }, } ================================================ FILE: examples/pytorch/dgmg/cycles.py ================================================ import os import pickle import random import matplotlib.pyplot as plt import networkx as nx from torch.utils.data import Dataset def get_previous(i, v_max): if i == 0: return v_max else: return i - 1 def get_next(i, v_max): if i == v_max: return 0 else: return i + 1 def is_cycle(g): size = g.num_nodes() if size < 3: return False for node in range(size): neighbors = g.successors(node) if len(neighbors) != 2: return False if get_previous(node, size - 1) not in neighbors: return False if get_next(node, size - 1) not in neighbors: return False return True def get_decision_sequence(size): """ Get the decision sequence for generating valid cycles with DGMG for teacher forcing optimization. """ decision_sequence = [] for i in range(size): decision_sequence.append(0) # Add node if i != 0: decision_sequence.append(0) # Add edge decision_sequence.append( i - 1 ) # Set destination to be previous node. if i == size - 1: decision_sequence.append(0) # Add edge decision_sequence.append(0) # Set destination to be the root. decision_sequence.append(1) # Stop adding edge decision_sequence.append(1) # Stop adding node return decision_sequence def generate_dataset(v_min, v_max, n_samples, fname): samples = [] for _ in range(n_samples): size = random.randint(v_min, v_max) samples.append(get_decision_sequence(size)) with open(fname, "wb") as f: pickle.dump(samples, f) class CycleDataset(Dataset): def __init__(self, fname): super(CycleDataset, self).__init__() with open(fname, "rb") as f: self.dataset = pickle.load(f) def __len__(self): return len(self.dataset) def __getitem__(self, index): return self.dataset[index] def collate_single(self, batch): assert len(batch) == 1, "Currently we do not support batched training" return batch[0] def collate_batch(self, batch): return batch def dglGraph_to_adj_list(g): adj_list = {} for node in range(g.num_nodes()): # For undirected graph. successors and # predecessors are equivalent. adj_list[node] = g.successors(node).tolist() return adj_list class CycleModelEvaluation(object): def __init__(self, v_min, v_max, dir): super(CycleModelEvaluation, self).__init__() self.v_min = v_min self.v_max = v_max self.dir = dir def rollout_and_examine(self, model, num_samples): assert not model.training, "You need to call model.eval()." num_total_size = 0 num_valid_size = 0 num_cycle = 0 num_valid = 0 plot_times = 0 adj_lists_to_plot = [] for i in range(num_samples): sampled_graph = model() if isinstance(sampled_graph, list): # When the model is a batched implementation, a list of # DGLGraph objects is returned. Note that with model(), # we generate a single graph as with the non-batched # implementation. We actually support batched generation # during the inference so feel free to modify the code. sampled_graph = sampled_graph[0] sampled_adj_list = dglGraph_to_adj_list(sampled_graph) adj_lists_to_plot.append(sampled_adj_list) graph_size = sampled_graph.num_nodes() valid_size = self.v_min <= graph_size <= self.v_max cycle = is_cycle(sampled_graph) num_total_size += graph_size if valid_size: num_valid_size += 1 if cycle: num_cycle += 1 if valid_size and cycle: num_valid += 1 if len(adj_lists_to_plot) >= 4: plot_times += 1 fig, ((ax0, ax1), (ax2, ax3)) = plt.subplots(2, 2) axes = {0: ax0, 1: ax1, 2: ax2, 3: ax3} for i in range(4): nx.draw_circular( nx.from_dict_of_lists(adj_lists_to_plot[i]), with_labels=True, ax=axes[i], ) plt.savefig(self.dir + "/samples/{:d}".format(plot_times)) plt.close() adj_lists_to_plot = [] self.num_samples_examined = num_samples self.average_size = num_total_size / num_samples self.valid_size_ratio = num_valid_size / num_samples self.cycle_ratio = num_cycle / num_samples self.valid_ratio = num_valid / num_samples def write_summary(self): def _format_value(v): if isinstance(v, float): return "{:.4f}".format(v) elif isinstance(v, int): return "{:d}".format(v) else: return "{}".format(v) statistics = { "num_samples": self.num_samples_examined, "v_min": self.v_min, "v_max": self.v_max, "average_size": self.average_size, "valid_size_ratio": self.valid_size_ratio, "cycle_ratio": self.cycle_ratio, "valid_ratio": self.valid_ratio, } model_eval_path = os.path.join(self.dir, "model_eval.txt") with open(model_eval_path, "w") as f: for key, value in statistics.items(): msg = "{}\t{}\n".format(key, _format_value(value)) f.write(msg) print("Saved model evaluation statistics to {}".format(model_eval_path)) class CyclePrinting(object): def __init__(self, num_epochs, num_batches): super(CyclePrinting, self).__init__() self.num_epochs = num_epochs self.num_batches = num_batches self.batch_count = 0 def update(self, epoch, metrics): self.batch_count = (self.batch_count) % self.num_batches + 1 msg = "epoch {:d}/{:d}, batch {:d}/{:d}".format( epoch, self.num_epochs, self.batch_count, self.num_batches ) for key, value in metrics.items(): msg += ", {}: {:4f}".format(key, value) print(msg) ================================================ FILE: examples/pytorch/dgmg/main.py ================================================ """ Learning Deep Generative Models of Graphs Paper: https://arxiv.org/pdf/1803.03324.pdf This implementation works with a minibatch of size 1 only for both training and inference. """ import argparse import datetime import time import torch from model import DGMG from torch.nn.utils import clip_grad_norm_ from torch.optim import Adam from torch.utils.data import DataLoader def main(opts): t1 = time.time() # Setup dataset and data loader if opts["dataset"] == "cycles": from cycles import CycleDataset, CycleModelEvaluation, CyclePrinting dataset = CycleDataset(fname=opts["path_to_dataset"]) evaluator = CycleModelEvaluation( v_min=opts["min_size"], v_max=opts["max_size"], dir=opts["log_dir"] ) printer = CyclePrinting( num_epochs=opts["nepochs"], num_batches=opts["ds_size"] // opts["batch_size"], ) else: raise ValueError("Unsupported dataset: {}".format(opts["dataset"])) data_loader = DataLoader( dataset, batch_size=1, shuffle=True, num_workers=0, collate_fn=dataset.collate_single, ) # Initialize_model model = DGMG( v_max=opts["max_size"], node_hidden_size=opts["node_hidden_size"], num_prop_rounds=opts["num_propagation_rounds"], ) # Initialize optimizer if opts["optimizer"] == "Adam": optimizer = Adam(model.parameters(), lr=opts["lr"]) else: raise ValueError("Unsupported argument for the optimizer") t2 = time.time() # Training model.train() for epoch in range(opts["nepochs"]): batch_count = 0 batch_loss = 0 batch_prob = 0 optimizer.zero_grad() for i, data in enumerate(data_loader): log_prob = model(actions=data) prob = log_prob.detach().exp() loss = -log_prob / opts["batch_size"] prob_averaged = prob / opts["batch_size"] loss.backward() batch_loss += loss.item() batch_prob += prob_averaged.item() batch_count += 1 if batch_count % opts["batch_size"] == 0: printer.update( epoch + 1, {"averaged_loss": batch_loss, "averaged_prob": batch_prob}, ) if opts["clip_grad"]: clip_grad_norm_(model.parameters(), opts["clip_bound"]) optimizer.step() batch_loss = 0 batch_prob = 0 optimizer.zero_grad() t3 = time.time() model.eval() evaluator.rollout_and_examine(model, opts["num_generated_samples"]) evaluator.write_summary() t4 = time.time() print("It took {} to setup.".format(datetime.timedelta(seconds=t2 - t1))) print( "It took {} to finish training.".format( datetime.timedelta(seconds=t3 - t2) ) ) print( "It took {} to finish evaluation.".format( datetime.timedelta(seconds=t4 - t3) ) ) print( "--------------------------------------------------------------------------" ) print( "On average, an epoch takes {}.".format( datetime.timedelta(seconds=(t3 - t2) / opts["nepochs"]) ) ) del model.g torch.save(model, "./model.pth") if __name__ == "__main__": parser = argparse.ArgumentParser(description="DGMG") # configure parser.add_argument("--seed", type=int, default=9284, help="random seed") # dataset parser.add_argument( "--dataset", choices=["cycles"], default="cycles", help="dataset to use" ) parser.add_argument( "--path-to-dataset", type=str, default="cycles.p", help="load the dataset if it exists, " "generate it and save to the path otherwise", ) # log parser.add_argument( "--log-dir", default="./results", help="folder to save info like experiment configuration " "or model evaluation results", ) # optimization parser.add_argument( "--batch-size", type=int, default=10, help="batch size to use for training", ) parser.add_argument( "--clip-grad", action="store_true", default=True, help="gradient clipping is required to prevent gradient explosion", ) parser.add_argument( "--clip-bound", type=float, default=0.25, help="constraint of gradient norm for gradient clipping", ) args = parser.parse_args() from utils import setup opts = setup(args) main(opts) ================================================ FILE: examples/pytorch/dgmg/model.py ================================================ from functools import partial import dgl import torch import torch.nn as nn import torch.nn.functional as F from torch.distributions import Bernoulli, Categorical class GraphEmbed(nn.Module): def __init__(self, node_hidden_size): super(GraphEmbed, self).__init__() # Setting from the paper self.graph_hidden_size = 2 * node_hidden_size # Embed graphs self.node_gating = nn.Sequential( nn.Linear(node_hidden_size, 1), nn.Sigmoid() ) self.node_to_graph = nn.Linear(node_hidden_size, self.graph_hidden_size) def forward(self, g): if g.num_nodes() == 0: return torch.zeros(1, self.graph_hidden_size) else: # Node features are stored as hv in ndata. hvs = g.ndata["hv"] return (self.node_gating(hvs) * self.node_to_graph(hvs)).sum( 0, keepdim=True ) class GraphProp(nn.Module): def __init__(self, num_prop_rounds, node_hidden_size): super(GraphProp, self).__init__() self.num_prop_rounds = num_prop_rounds # Setting from the paper self.node_activation_hidden_size = 2 * node_hidden_size message_funcs = [] self.reduce_funcs = [] node_update_funcs = [] for t in range(num_prop_rounds): # input being [hv, hu, xuv] message_funcs.append( nn.Linear( 2 * node_hidden_size + 1, self.node_activation_hidden_size ) ) self.reduce_funcs.append(partial(self.dgmg_reduce, round=t)) node_update_funcs.append( nn.GRUCell(self.node_activation_hidden_size, node_hidden_size) ) self.message_funcs = nn.ModuleList(message_funcs) self.node_update_funcs = nn.ModuleList(node_update_funcs) def dgmg_msg(self, edges): """For an edge u->v, return concat([h_u, x_uv])""" return {"m": torch.cat([edges.src["hv"], edges.data["he"]], dim=1)} def dgmg_reduce(self, nodes, round): hv_old = nodes.data["hv"] m = nodes.mailbox["m"] message = torch.cat( [hv_old.unsqueeze(1).expand(-1, m.size(1), -1), m], dim=2 ) node_activation = (self.message_funcs[round](message)).sum(1) return {"a": node_activation} def forward(self, g): if g.num_edges() == 0: return else: for t in range(self.num_prop_rounds): g.update_all( message_func=self.dgmg_msg, reduce_func=self.reduce_funcs[t] ) g.ndata["hv"] = self.node_update_funcs[t]( g.ndata["a"], g.ndata["hv"] ) def bernoulli_action_log_prob(logit, action): """Calculate the log p of an action with respect to a Bernoulli distribution. Use logit rather than prob for numerical stability.""" if action == 0: return F.logsigmoid(-logit) else: return F.logsigmoid(logit) class AddNode(nn.Module): def __init__(self, graph_embed_func, node_hidden_size): super(AddNode, self).__init__() self.graph_op = {"embed": graph_embed_func} self.stop = 1 self.add_node = nn.Linear(graph_embed_func.graph_hidden_size, 1) # If to add a node, initialize its hv self.node_type_embed = nn.Embedding(1, node_hidden_size) self.initialize_hv = nn.Linear( node_hidden_size + graph_embed_func.graph_hidden_size, node_hidden_size, ) self.init_node_activation = torch.zeros(1, 2 * node_hidden_size) def _initialize_node_repr(self, g, node_type, graph_embed): num_nodes = g.num_nodes() hv_init = self.initialize_hv( torch.cat( [ self.node_type_embed(torch.LongTensor([node_type])), graph_embed, ], dim=1, ) ) g.nodes[num_nodes - 1].data["hv"] = hv_init g.nodes[num_nodes - 1].data["a"] = self.init_node_activation def prepare_training(self): self.log_prob = [] def forward(self, g, action=None): graph_embed = self.graph_op["embed"](g) logit = self.add_node(graph_embed) prob = torch.sigmoid(logit) if not self.training: action = Bernoulli(prob).sample().item() stop = bool(action == self.stop) if not stop: g.add_nodes(1) self._initialize_node_repr(g, action, graph_embed) if self.training: sample_log_prob = bernoulli_action_log_prob(logit, action) self.log_prob.append(sample_log_prob) return stop class AddEdge(nn.Module): def __init__(self, graph_embed_func, node_hidden_size): super(AddEdge, self).__init__() self.graph_op = {"embed": graph_embed_func} self.add_edge = nn.Linear( graph_embed_func.graph_hidden_size + node_hidden_size, 1 ) def prepare_training(self): self.log_prob = [] def forward(self, g, action=None): graph_embed = self.graph_op["embed"](g) src_embed = g.nodes[g.num_nodes() - 1].data["hv"] logit = self.add_edge(torch.cat([graph_embed, src_embed], dim=1)) prob = torch.sigmoid(logit) if not self.training: action = Bernoulli(prob).sample().item() to_add_edge = bool(action == 0) if self.training: sample_log_prob = bernoulli_action_log_prob(logit, action) self.log_prob.append(sample_log_prob) return to_add_edge class ChooseDestAndUpdate(nn.Module): def __init__(self, graph_prop_func, node_hidden_size): super(ChooseDestAndUpdate, self).__init__() self.graph_op = {"prop": graph_prop_func} self.choose_dest = nn.Linear(2 * node_hidden_size, 1) def _initialize_edge_repr(self, g, src_list, dest_list): # For untyped edges, we only add 1 to indicate its existence. # For multiple edge types, we can use a one hot representation # or an embedding module. edge_repr = torch.ones(len(src_list), 1) g.edges[src_list, dest_list].data["he"] = edge_repr def prepare_training(self): self.log_prob = [] def forward(self, g, dest): src = g.num_nodes() - 1 possible_dests = range(src) src_embed_expand = g.nodes[src].data["hv"].expand(src, -1) possible_dests_embed = g.nodes[possible_dests].data["hv"] dests_scores = self.choose_dest( torch.cat([possible_dests_embed, src_embed_expand], dim=1) ).view(1, -1) dests_probs = F.softmax(dests_scores, dim=1) if not self.training: dest = Categorical(dests_probs).sample().item() if not g.has_edges_between(src, dest): # For undirected graphs, we add edges for both directions # so that we can perform graph propagation. src_list = [src, dest] dest_list = [dest, src] g.add_edges(src_list, dest_list) self._initialize_edge_repr(g, src_list, dest_list) self.graph_op["prop"](g) if self.training: if dests_probs.nelement() > 1: self.log_prob.append( F.log_softmax(dests_scores, dim=1)[:, dest : dest + 1] ) class DGMG(nn.Module): def __init__(self, v_max, node_hidden_size, num_prop_rounds): super(DGMG, self).__init__() # Graph configuration self.v_max = v_max # Graph embedding module self.graph_embed = GraphEmbed(node_hidden_size) # Graph propagation module self.graph_prop = GraphProp(num_prop_rounds, node_hidden_size) # Actions self.add_node_agent = AddNode(self.graph_embed, node_hidden_size) self.add_edge_agent = AddEdge(self.graph_embed, node_hidden_size) self.choose_dest_agent = ChooseDestAndUpdate( self.graph_prop, node_hidden_size ) # Weight initialization self.init_weights() def init_weights(self): from utils import dgmg_message_weight_init, weights_init self.graph_embed.apply(weights_init) self.graph_prop.apply(weights_init) self.add_node_agent.apply(weights_init) self.add_edge_agent.apply(weights_init) self.choose_dest_agent.apply(weights_init) self.graph_prop.message_funcs.apply(dgmg_message_weight_init) @property def action_step(self): old_step_count = self.step_count self.step_count += 1 return old_step_count def prepare_for_train(self): self.step_count = 0 self.add_node_agent.prepare_training() self.add_edge_agent.prepare_training() self.choose_dest_agent.prepare_training() def add_node_and_update(self, a=None): """Decide if to add a new node. If a new node should be added, update the graph.""" return self.add_node_agent(self.g, a) def add_edge_or_not(self, a=None): """Decide if a new edge should be added.""" return self.add_edge_agent(self.g, a) def choose_dest_and_update(self, a=None): """Choose destination and connect it to the latest node. Add edges for both directions and update the graph.""" self.choose_dest_agent(self.g, a) def get_log_prob(self): return ( torch.cat(self.add_node_agent.log_prob).sum() + torch.cat(self.add_edge_agent.log_prob).sum() + torch.cat(self.choose_dest_agent.log_prob).sum() ) def forward_train(self, actions): self.prepare_for_train() stop = self.add_node_and_update(a=actions[self.action_step]) while not stop: to_add_edge = self.add_edge_or_not(a=actions[self.action_step]) while to_add_edge: self.choose_dest_and_update(a=actions[self.action_step]) to_add_edge = self.add_edge_or_not(a=actions[self.action_step]) stop = self.add_node_and_update(a=actions[self.action_step]) return self.get_log_prob() def forward_inference(self): stop = self.add_node_and_update() while (not stop) and (self.g.num_nodes() < self.v_max + 1): num_trials = 0 to_add_edge = self.add_edge_or_not() while to_add_edge and (num_trials < self.g.num_nodes() - 1): self.choose_dest_and_update() num_trials += 1 to_add_edge = self.add_edge_or_not() stop = self.add_node_and_update() return self.g def forward(self, actions=None): # The graph we will work on self.g = dgl.DGLGraph() # If there are some features for nodes and edges, # zero tensors will be set for those of new nodes and edges. self.g.set_n_initializer(dgl.frame.zero_initializer) self.g.set_e_initializer(dgl.frame.zero_initializer) if self.training: return self.forward_train(actions) else: return self.forward_inference() ================================================ FILE: examples/pytorch/dgmg/utils.py ================================================ import datetime import os import random from pprint import pprint import matplotlib.pyplot as plt import torch import torch.backends.cudnn as cudnn import torch.nn as nn import torch.nn.init as init ######################################################################################################################## # configuration # ######################################################################################################################## def mkdir_p(path): import errno try: os.makedirs(path) print("Created directory {}".format(path)) except OSError as exc: if exc.errno == errno.EEXIST and os.path.isdir(path): print("Directory {} already exists.".format(path)) else: raise def date_filename(base_dir="./"): dt = datetime.datetime.now() return os.path.join( base_dir, "{}_{:02d}-{:02d}-{:02d}".format( dt.date(), dt.hour, dt.minute, dt.second ), ) def setup_log_dir(opts): log_dir = "{}".format(date_filename(opts["log_dir"])) mkdir_p(log_dir) return log_dir def save_arg_dict(opts, filename="settings.txt"): def _format_value(v): if isinstance(v, float): return "{:.4f}".format(v) elif isinstance(v, int): return "{:d}".format(v) else: return "{}".format(v) save_path = os.path.join(opts["log_dir"], filename) with open(save_path, "w") as f: for key, value in opts.items(): f.write("{}\t{}\n".format(key, _format_value(value))) print("Saved settings to {}".format(save_path)) def setup(args): opts = args.__dict__.copy() cudnn.benchmark = False cudnn.deterministic = True # Seed if opts["seed"] is None: opts["seed"] = random.randint(1, 10000) random.seed(opts["seed"]) torch.manual_seed(opts["seed"]) # Dataset from configure import dataset_based_configure opts = dataset_based_configure(opts) assert ( opts["path_to_dataset"] is not None ), "Expect path to dataset to be set." if not os.path.exists(opts["path_to_dataset"]): if opts["dataset"] == "cycles": from cycles import generate_dataset generate_dataset( opts["min_size"], opts["max_size"], opts["ds_size"], opts["path_to_dataset"], ) else: raise ValueError("Unsupported dataset: {}".format(opts["dataset"])) # Optimization if opts["clip_grad"]: assert ( opts["clip_grad"] is not None ), "Expect the gradient norm constraint to be set." # Log print("Prepare logging directory...") log_dir = setup_log_dir(opts) opts["log_dir"] = log_dir mkdir_p(log_dir + "/samples") plt.switch_backend("Agg") save_arg_dict(opts) pprint(opts) return opts ######################################################################################################################## # model # ######################################################################################################################## def weights_init(m): """ Code from https://gist.github.com/jeasinema/ed9236ce743c8efaf30fa2ff732749f5 Usage: model = Model() model.apply(weight_init) """ if isinstance(m, nn.Linear): init.xavier_normal_(m.weight.data) init.normal_(m.bias.data) elif isinstance(m, nn.GRUCell): for param in m.parameters(): if len(param.shape) >= 2: init.orthogonal_(param.data) else: init.normal_(param.data) def dgmg_message_weight_init(m): """ This is similar as the function above where we initialize linear layers from a normal distribution with std 1./10 as suggested by the author. This should only be used for the message passing functions, i.e. fe's in the paper. """ def _weight_init(m): if isinstance(m, nn.Linear): init.normal_(m.weight.data, std=1.0 / 10) init.normal_(m.bias.data, std=1.0 / 10) else: raise ValueError("Expected the input to be of type nn.Linear!") if isinstance(m, nn.ModuleList): for layer in m: layer.apply(_weight_init) else: m.apply(_weight_init) ================================================ FILE: examples/pytorch/diffpool/README.md ================================================ Hierarchical Graph Representation Learning with Differentiable Pooling ============ Paper link: [https://arxiv.org/abs/1806.08804](https://arxiv.org/abs/1806.08804) Author's code repo: [https://github.com/RexYing/diffpool](https://github.com/RexYing/diffpool) This folder contains a DGL implementation of the DiffPool model. The first pooling layer is computed with DGL, and following pooling layers are computed with tensorized operation since the pooled graphs are dense. Dependencies ------------ * PyTorch 1.0+ How to run ---------- ```bash python train.py --dataset ENZYMES --pool_ratio 0.10 --num_pool 1 --epochs 1000 python train.py --dataset DD --pool_ratio 0.15 --num_pool 1 --batch-size 10 ``` Performance ----------- ENZYMES 63.33% (with early stopping) DD 79.31% (with early stopping) ## Update (2021-03-09) **Changes:** * Fix bug in Diffpool: the wrong `assign_dim` parameter * Improve efficiency of DiffPool, make the model independent of batch size. Remove redundant computation. **Efficiency:** On V100-SXM2 16GB | | Train time/epoch (original) (s) | Train time/epoch (improved) (s) | | ------------------ | ------------------------------: | ------------------------------: | | DD (batch_size=10) | 21.302 | **17.282** | | DD (batch_size=20) | OOM | **44.682** | | ENZYMES | 1.749 | **1.685** | | | Memory usage (original) (MB) | Memory usage (improved) (MB) | | ------------------ | ---------------------------: | ---------------------------: | | DD (batch_size=10) | 5274.620 | **2928.568** | | DD (batch_size=20) | OOM | **10088.889** | | ENZYMES | 25.685 | **21.909** | **Accuracy** Each experiment with improved model is only conducted once, thus the result may has noise. | | Original | Improved | | ------- | ---------: | ---------: | | DD | **79.31%** | 78.33% | | ENZYMES | 63.33% | **68.33%** | ================================================ FILE: examples/pytorch/diffpool/data_utils.py ================================================ import numpy as np import torch def one_hotify(labels, pad=-1): """ cast label to one hot vector """ num_instances = len(labels) if pad <= 0: dim_embedding = np.max(labels) + 1 # zero-indexed assumed else: assert pad > 0, "result_dim for padding one hot embedding not set!" dim_embedding = pad + 1 embeddings = np.zeros((num_instances, dim_embedding)) embeddings[np.arange(num_instances), labels] = 1 return embeddings def pre_process(dataset, prog_args): """ diffpool specific data partition, pre-process and shuffling """ if prog_args.data_mode != "default": print("overwrite node attributes with DiffPool's preprocess setting") if prog_args.data_mode == "id": for g, _ in dataset: id_list = np.arange(g.num_nodes()) g.ndata["feat"] = one_hotify(id_list, pad=dataset.max_num_node) elif prog_args.data_mode == "deg-num": for g, _ in dataset: g.ndata["feat"] = np.expand_dims(g.in_degrees(), axis=1) elif prog_args.data_mode == "deg": for g in dataset: degs = list(g.in_degrees()) degs_one_hot = one_hotify(degs, pad=dataset.max_degrees) g.ndata["feat"] = degs_one_hot ================================================ FILE: examples/pytorch/diffpool/model/__init__.py ================================================ ================================================ FILE: examples/pytorch/diffpool/model/dgl_layers/__init__.py ================================================ from .gnn import DiffPoolBatchedGraphLayer, GraphSage, GraphSageLayer ================================================ FILE: examples/pytorch/diffpool/model/dgl_layers/aggregator.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F class Aggregator(nn.Module): """ Base Aggregator class. Adapting from PR# 403 This class is not supposed to be called """ def __init__(self): super(Aggregator, self).__init__() def forward(self, node): neighbour = node.mailbox["m"] c = self.aggre(neighbour) return {"c": c} def aggre(self, neighbour): # N x F raise NotImplementedError class MeanAggregator(Aggregator): """ Mean Aggregator for graphsage """ def __init__(self): super(MeanAggregator, self).__init__() def aggre(self, neighbour): mean_neighbour = torch.mean(neighbour, dim=1) return mean_neighbour class MaxPoolAggregator(Aggregator): """ Maxpooling aggregator for graphsage """ def __init__(self, in_feats, out_feats, activation, bias): super(MaxPoolAggregator, self).__init__() self.linear = nn.Linear(in_feats, out_feats, bias=bias) self.activation = activation # Xavier initialization of weight nn.init.xavier_uniform_( self.linear.weight, gain=nn.init.calculate_gain("relu") ) def aggre(self, neighbour): neighbour = self.linear(neighbour) if self.activation: neighbour = self.activation(neighbour) maxpool_neighbour = torch.max(neighbour, dim=1)[0] return maxpool_neighbour class LSTMAggregator(Aggregator): """ LSTM aggregator for graphsage """ def __init__(self, in_feats, hidden_feats): super(LSTMAggregator, self).__init__() self.lstm = nn.LSTM(in_feats, hidden_feats, batch_first=True) self.hidden_dim = hidden_feats self.hidden = self.init_hidden() nn.init.xavier_uniform_( self.lstm.weight, gain=nn.init.calculate_gain("relu") ) def init_hidden(self): """ Defaulted to initialite all zero """ return ( torch.zeros(1, 1, self.hidden_dim), torch.zeros(1, 1, self.hidden_dim), ) def aggre(self, neighbours): """ aggregation function """ # N X F rand_order = torch.randperm(neighbours.size()[1]) neighbours = neighbours[:, rand_order, :] (lstm_out, self.hidden) = self.lstm( neighbours.view(neighbours.size()[0], neighbours.size()[1], -1) ) return lstm_out[:, -1, :] def forward(self, node): neighbour = node.mailbox["m"] c = self.aggre(neighbour) return {"c": c} ================================================ FILE: examples/pytorch/diffpool/model/dgl_layers/bundler.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F class Bundler(nn.Module): """ Bundler, which will be the node_apply function in DGL paradigm """ def __init__(self, in_feats, out_feats, activation, dropout, bias=True): super(Bundler, self).__init__() self.dropout = nn.Dropout(p=dropout) self.linear = nn.Linear(in_feats * 2, out_feats, bias) self.activation = activation nn.init.xavier_uniform_( self.linear.weight, gain=nn.init.calculate_gain("relu") ) def concat(self, h, aggre_result): bundle = torch.cat((h, aggre_result), 1) bundle = self.linear(bundle) return bundle def forward(self, node): h = node.data["h"] c = node.data["c"] bundle = self.concat(h, c) bundle = F.normalize(bundle, p=2, dim=1) if self.activation: bundle = self.activation(bundle) return {"h": bundle} ================================================ FILE: examples/pytorch/diffpool/model/dgl_layers/gnn.py ================================================ import dgl.function as fn import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from scipy.linalg import block_diag from model.loss import EntropyLoss from ..model_utils import masked_softmax from .aggregator import LSTMAggregator, MaxPoolAggregator, MeanAggregator from .bundler import Bundler class GraphSageLayer(nn.Module): """ GraphSage layer in Inductive learning paper by hamilton Here, graphsage layer is a reduced function in DGL framework """ def __init__( self, in_feats, out_feats, activation, dropout, aggregator_type, bn=False, bias=True, ): super(GraphSageLayer, self).__init__() self.use_bn = bn self.bundler = Bundler( in_feats, out_feats, activation, dropout, bias=bias ) self.dropout = nn.Dropout(p=dropout) if aggregator_type == "maxpool": self.aggregator = MaxPoolAggregator( in_feats, in_feats, activation, bias ) elif aggregator_type == "lstm": self.aggregator = LSTMAggregator(in_feats, in_feats) else: self.aggregator = MeanAggregator() def forward(self, g, h): h = self.dropout(h) g.ndata["h"] = h if self.use_bn and not hasattr(self, "bn"): device = h.device self.bn = nn.BatchNorm1d(h.size()[1]).to(device) g.update_all(fn.copy_u(u="h", out="m"), self.aggregator, self.bundler) if self.use_bn: h = self.bn(h) h = g.ndata.pop("h") return h class GraphSage(nn.Module): """ Grahpsage network that concatenate several graphsage layer """ def __init__( self, in_feats, n_hidden, n_classes, n_layers, activation, dropout, aggregator_type, ): super(GraphSage, self).__init__() self.layers = nn.ModuleList() # input layer self.layers.append( GraphSageLayer( in_feats, n_hidden, activation, dropout, aggregator_type ) ) # hidden layers for _ in range(n_layers - 1): self.layers.append( GraphSageLayer( n_hidden, n_hidden, activation, dropout, aggregator_type ) ) # output layer self.layers.append( GraphSageLayer(n_hidden, n_classes, None, dropout, aggregator_type) ) def forward(self, g, features): h = features for layer in self.layers: h = layer(g, h) return h class DiffPoolBatchedGraphLayer(nn.Module): def __init__( self, input_dim, assign_dim, output_feat_dim, activation, dropout, aggregator_type, link_pred, ): super(DiffPoolBatchedGraphLayer, self).__init__() self.embedding_dim = input_dim self.assign_dim = assign_dim self.hidden_dim = output_feat_dim self.link_pred = link_pred self.feat_gc = GraphSageLayer( input_dim, output_feat_dim, activation, dropout, aggregator_type ) self.pool_gc = GraphSageLayer( input_dim, assign_dim, activation, dropout, aggregator_type ) self.reg_loss = nn.ModuleList([]) self.loss_log = {} self.reg_loss.append(EntropyLoss()) def forward(self, g, h): feat = self.feat_gc( g, h ) # size = (sum_N, F_out), sum_N is num of nodes in this batch device = feat.device assign_tensor = self.pool_gc( g, h ) # size = (sum_N, N_a), N_a is num of nodes in pooled graph. assign_tensor = F.softmax(assign_tensor, dim=1) assign_tensor = torch.split(assign_tensor, g.batch_num_nodes().tolist()) assign_tensor = torch.block_diag( *assign_tensor ) # size = (sum_N, batch_size * N_a) h = torch.matmul(torch.t(assign_tensor), feat) adj = g.adj_external(transpose=True, ctx=device) adj_new = torch.sparse.mm(adj, assign_tensor) adj_new = torch.mm(torch.t(assign_tensor), adj_new) if self.link_pred: current_lp_loss = torch.norm( adj.to_dense() - torch.mm(assign_tensor, torch.t(assign_tensor)) ) / np.power(g.num_nodes(), 2) self.loss_log["LinkPredLoss"] = current_lp_loss for loss_layer in self.reg_loss: loss_name = str(type(loss_layer).__name__) self.loss_log[loss_name] = loss_layer(adj, adj_new, assign_tensor) return adj_new, h ================================================ FILE: examples/pytorch/diffpool/model/encoder.py ================================================ import time import dgl import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from scipy.linalg import block_diag from torch.nn import init from .dgl_layers import DiffPoolBatchedGraphLayer, GraphSage, GraphSageLayer from .model_utils import batch2tensor from .tensorized_layers import * class DiffPool(nn.Module): """ DiffPool Fuse """ def __init__( self, input_dim, hidden_dim, embedding_dim, label_dim, activation, n_layers, dropout, n_pooling, linkpred, batch_size, aggregator_type, assign_dim, pool_ratio, cat=False, ): super(DiffPool, self).__init__() self.link_pred = linkpred self.concat = cat self.n_pooling = n_pooling self.batch_size = batch_size self.link_pred_loss = [] self.entropy_loss = [] # list of GNN modules before the first diffpool operation self.gc_before_pool = nn.ModuleList() self.diffpool_layers = nn.ModuleList() # list of list of GNN modules, each list after one diffpool operation self.gc_after_pool = nn.ModuleList() self.assign_dim = assign_dim self.bn = True self.num_aggs = 1 # constructing layers # layers before diffpool assert n_layers >= 3, "n_layers too few" self.gc_before_pool.append( GraphSageLayer( input_dim, hidden_dim, activation, dropout, aggregator_type, self.bn, ) ) for _ in range(n_layers - 2): self.gc_before_pool.append( GraphSageLayer( hidden_dim, hidden_dim, activation, dropout, aggregator_type, self.bn, ) ) self.gc_before_pool.append( GraphSageLayer( hidden_dim, embedding_dim, None, dropout, aggregator_type ) ) assign_dims = [] assign_dims.append(self.assign_dim) if self.concat: # diffpool layer receive pool_emedding_dim node feature tensor # and return pool_embedding_dim node embedding pool_embedding_dim = hidden_dim * (n_layers - 1) + embedding_dim else: pool_embedding_dim = embedding_dim self.first_diffpool_layer = DiffPoolBatchedGraphLayer( pool_embedding_dim, self.assign_dim, hidden_dim, activation, dropout, aggregator_type, self.link_pred, ) gc_after_per_pool = nn.ModuleList() for _ in range(n_layers - 1): gc_after_per_pool.append(BatchedGraphSAGE(hidden_dim, hidden_dim)) gc_after_per_pool.append(BatchedGraphSAGE(hidden_dim, embedding_dim)) self.gc_after_pool.append(gc_after_per_pool) self.assign_dim = int(self.assign_dim * pool_ratio) # each pooling module for _ in range(n_pooling - 1): self.diffpool_layers.append( BatchedDiffPool( pool_embedding_dim, self.assign_dim, hidden_dim, self.link_pred, ) ) gc_after_per_pool = nn.ModuleList() for _ in range(n_layers - 1): gc_after_per_pool.append( BatchedGraphSAGE(hidden_dim, hidden_dim) ) gc_after_per_pool.append( BatchedGraphSAGE(hidden_dim, embedding_dim) ) self.gc_after_pool.append(gc_after_per_pool) assign_dims.append(self.assign_dim) self.assign_dim = int(self.assign_dim * pool_ratio) # predicting layer if self.concat: self.pred_input_dim = ( pool_embedding_dim * self.num_aggs * (n_pooling + 1) ) else: self.pred_input_dim = embedding_dim * self.num_aggs self.pred_layer = nn.Linear(self.pred_input_dim, label_dim) # weight initialization for m in self.modules(): if isinstance(m, nn.Linear): m.weight.data = init.xavier_uniform_( m.weight.data, gain=nn.init.calculate_gain("relu") ) if m.bias is not None: m.bias.data = init.constant_(m.bias.data, 0.0) def gcn_forward(self, g, h, gc_layers, cat=False): """ Return gc_layer embedding cat. """ block_readout = [] for gc_layer in gc_layers[:-1]: h = gc_layer(g, h) block_readout.append(h) h = gc_layers[-1](g, h) block_readout.append(h) if cat: block = torch.cat(block_readout, dim=1) # N x F, F = F1 + F2 + ... else: block = h return block def gcn_forward_tensorized(self, h, adj, gc_layers, cat=False): block_readout = [] for gc_layer in gc_layers: h = gc_layer(h, adj) block_readout.append(h) if cat: block = torch.cat(block_readout, dim=2) # N x F, F = F1 + F2 + ... else: block = h return block def forward(self, g): self.link_pred_loss = [] self.entropy_loss = [] h = g.ndata["feat"] # node feature for assignment matrix computation is the same as the # original node feature h_a = h out_all = [] # we use GCN blocks to get an embedding first g_embedding = self.gcn_forward(g, h, self.gc_before_pool, self.concat) g.ndata["h"] = g_embedding readout = dgl.sum_nodes(g, "h") out_all.append(readout) if self.num_aggs == 2: readout = dgl.max_nodes(g, "h") out_all.append(readout) adj, h = self.first_diffpool_layer(g, g_embedding) node_per_pool_graph = int(adj.size()[0] / len(g.batch_num_nodes())) h, adj = batch2tensor(adj, h, node_per_pool_graph) h = self.gcn_forward_tensorized( h, adj, self.gc_after_pool[0], self.concat ) readout = torch.sum(h, dim=1) out_all.append(readout) if self.num_aggs == 2: readout, _ = torch.max(h, dim=1) out_all.append(readout) for i, diffpool_layer in enumerate(self.diffpool_layers): h, adj = diffpool_layer(h, adj) h = self.gcn_forward_tensorized( h, adj, self.gc_after_pool[i + 1], self.concat ) readout = torch.sum(h, dim=1) out_all.append(readout) if self.num_aggs == 2: readout, _ = torch.max(h, dim=1) out_all.append(readout) if self.concat or self.num_aggs > 1: final_readout = torch.cat(out_all, dim=1) else: final_readout = readout ypred = self.pred_layer(final_readout) return ypred def loss(self, pred, label): """ loss function """ # softmax + CE criterion = nn.CrossEntropyLoss() loss = criterion(pred, label) for key, value in self.first_diffpool_layer.loss_log.items(): loss += value for diffpool_layer in self.diffpool_layers: for key, value in diffpool_layer.loss_log.items(): loss += value return loss ================================================ FILE: examples/pytorch/diffpool/model/loss.py ================================================ import torch import torch.nn as nn class EntropyLoss(nn.Module): # Return Scalar def forward(self, adj, anext, s_l): entropy = ( (torch.distributions.Categorical(probs=s_l).entropy()) .sum(-1) .mean(-1) ) assert not torch.isnan(entropy) return entropy class LinkPredLoss(nn.Module): def forward(self, adj, anext, s_l): link_pred_loss = (adj - s_l.matmul(s_l.transpose(-1, -2))).norm( dim=(1, 2) ) link_pred_loss = link_pred_loss / (adj.size(1) * adj.size(2)) return link_pred_loss.mean() ================================================ FILE: examples/pytorch/diffpool/model/model_utils.py ================================================ import torch as th from torch.autograd import Function def batch2tensor(batch_adj, batch_feat, node_per_pool_graph): """ transform a batched graph to batched adjacency tensor and node feature tensor """ batch_size = int(batch_adj.size()[0] / node_per_pool_graph) adj_list = [] feat_list = [] for i in range(batch_size): start = i * node_per_pool_graph end = (i + 1) * node_per_pool_graph adj_list.append(batch_adj[start:end, start:end]) feat_list.append(batch_feat[start:end, :]) adj_list = list(map(lambda x: th.unsqueeze(x, 0), adj_list)) feat_list = list(map(lambda x: th.unsqueeze(x, 0), feat_list)) adj = th.cat(adj_list, dim=0) feat = th.cat(feat_list, dim=0) return feat, adj def masked_softmax( matrix, mask, dim=-1, memory_efficient=True, mask_fill_value=-1e32 ): """ masked_softmax for dgl batch graph code snippet contributed by AllenNLP (https://github.com/allenai/allennlp) """ if mask is None: result = th.nn.functional.softmax(matrix, dim=dim) else: mask = mask.float() while mask.dim() < matrix.dim(): mask = mask.unsqueeze(1) if not memory_efficient: result = th.nn.functional.softmax(matrix * mask, dim=dim) result = result * mask result = result / (result.sum(dim=dim, keepdim=True) + 1e-13) else: masked_matrix = matrix.masked_fill( (1 - mask).byte(), mask_fill_value ) result = th.nn.functional.softmax(masked_matrix, dim=dim) return result ================================================ FILE: examples/pytorch/diffpool/model/tensorized_layers/__init__.py ================================================ from .diffpool import BatchedDiffPool from .graphsage import BatchedGraphSAGE ================================================ FILE: examples/pytorch/diffpool/model/tensorized_layers/assignment.py ================================================ import torch from torch import nn as nn from torch.autograd import Variable from torch.nn import functional as F from model.tensorized_layers.graphsage import BatchedGraphSAGE class DiffPoolAssignment(nn.Module): def __init__(self, nfeat, nnext): super().__init__() self.assign_mat = BatchedGraphSAGE(nfeat, nnext, use_bn=True) def forward(self, x, adj, log=False): s_l_init = self.assign_mat(x, adj) s_l = F.softmax(s_l_init, dim=-1) return s_l ================================================ FILE: examples/pytorch/diffpool/model/tensorized_layers/diffpool.py ================================================ import torch from torch import nn as nn from model.loss import EntropyLoss, LinkPredLoss from model.tensorized_layers.assignment import DiffPoolAssignment from model.tensorized_layers.graphsage import BatchedGraphSAGE class BatchedDiffPool(nn.Module): def __init__(self, nfeat, nnext, nhid, link_pred=False, entropy=True): super(BatchedDiffPool, self).__init__() self.link_pred = link_pred self.log = {} self.link_pred_layer = LinkPredLoss() self.embed = BatchedGraphSAGE(nfeat, nhid, use_bn=True) self.assign = DiffPoolAssignment(nfeat, nnext) self.reg_loss = nn.ModuleList([]) self.loss_log = {} if link_pred: self.reg_loss.append(LinkPredLoss()) if entropy: self.reg_loss.append(EntropyLoss()) def forward(self, x, adj, log=False): z_l = self.embed(x, adj) s_l = self.assign(x, adj) if log: self.log["s"] = s_l.cpu().numpy() xnext = torch.matmul(s_l.transpose(-1, -2), z_l) anext = (s_l.transpose(-1, -2)).matmul(adj).matmul(s_l) for loss_layer in self.reg_loss: loss_name = str(type(loss_layer).__name__) self.loss_log[loss_name] = loss_layer(adj, anext, s_l) if log: self.log["a"] = anext.cpu().numpy() return xnext, anext ================================================ FILE: examples/pytorch/diffpool/model/tensorized_layers/graphsage.py ================================================ import torch from torch import nn as nn from torch.nn import functional as F class BatchedGraphSAGE(nn.Module): def __init__( self, infeat, outfeat, use_bn=True, mean=False, add_self=False ): super().__init__() self.add_self = add_self self.use_bn = use_bn self.mean = mean self.W = nn.Linear(infeat, outfeat, bias=True) nn.init.xavier_uniform_( self.W.weight, gain=nn.init.calculate_gain("relu") ) def forward(self, x, adj): num_node_per_graph = adj.size(1) if self.use_bn and not hasattr(self, "bn"): self.bn = nn.BatchNorm1d(num_node_per_graph).to(adj.device) if self.add_self: adj = adj + torch.eye(num_node_per_graph).to(adj.device) if self.mean: adj = adj / adj.sum(-1, keepdim=True) h_k_N = torch.matmul(adj, x) h_k = self.W(h_k_N) h_k = F.normalize(h_k, dim=2, p=2) h_k = F.relu(h_k) if self.use_bn: h_k = self.bn(h_k) return h_k def __repr__(self): if self.use_bn: return "BN" + super(BatchedGraphSAGE, self).__repr__() else: return super(BatchedGraphSAGE, self).__repr__() ================================================ FILE: examples/pytorch/diffpool/train.py ================================================ import argparse import os import random import time import dgl import dgl.function as fn import networkx as nx import numpy as np import torch import torch.nn as nn import torch.nn.functional as F import torch.utils.data from data_utils import pre_process from dgl import DGLGraph from dgl.data import tu from model.encoder import DiffPool global_train_time_per_epoch = [] def arg_parse(): """ argument parser """ parser = argparse.ArgumentParser(description="DiffPool arguments") parser.add_argument("--dataset", dest="dataset", help="Input Dataset") parser.add_argument( "--pool_ratio", dest="pool_ratio", type=float, help="pooling ratio" ) parser.add_argument( "--num_pool", dest="num_pool", type=int, help="num_pooling layer" ) parser.add_argument( "--no_link_pred", dest="linkpred", action="store_false", help="switch of link prediction object", ) parser.add_argument("--cuda", dest="cuda", type=int, help="switch cuda") parser.add_argument("--lr", dest="lr", type=float, help="learning rate") parser.add_argument( "--clip", dest="clip", type=float, help="gradient clipping" ) parser.add_argument( "--batch-size", dest="batch_size", type=int, help="batch size" ) parser.add_argument("--epochs", dest="epoch", type=int, help="num-of-epoch") parser.add_argument( "--train-ratio", dest="train_ratio", type=float, help="ratio of trainning dataset split", ) parser.add_argument( "--test-ratio", dest="test_ratio", type=float, help="ratio of testing dataset split", ) parser.add_argument( "--num_workers", dest="n_worker", type=int, help="number of workers when dataloading", ) parser.add_argument( "--gc-per-block", dest="gc_per_block", type=int, help="number of graph conv layer per block", ) parser.add_argument( "--bn", dest="bn", action="store_const", const=True, default=True, help="switch for bn", ) parser.add_argument( "--dropout", dest="dropout", type=float, help="dropout rate" ) parser.add_argument( "--bias", dest="bias", action="store_const", const=True, default=True, help="switch for bias", ) parser.add_argument( "--save_dir", dest="save_dir", help="model saving directory: SAVE_DICT/DATASET", ) parser.add_argument( "--load_epoch", dest="load_epoch", type=int, help="load trained model params from\ SAVE_DICT/DATASET/model-LOAD_EPOCH", ) parser.add_argument( "--data_mode", dest="data_mode", help="data\ preprocessing mode: default, id, degree, or one-hot\ vector of degree number", choices=["default", "id", "deg", "deg_num"], ) parser.set_defaults( dataset="ENZYMES", pool_ratio=0.15, num_pool=1, cuda=1, lr=1e-3, clip=2.0, batch_size=20, epoch=4000, train_ratio=0.7, test_ratio=0.1, n_worker=1, gc_per_block=3, dropout=0.0, method="diffpool", bn=True, bias=True, save_dir="./model_param", load_epoch=-1, data_mode="default", ) return parser.parse_args() def prepare_data(dataset, prog_args, train=False, pre_process=None): """ preprocess TU dataset according to DiffPool's paper setting and load dataset into dataloader """ if train: shuffle = True else: shuffle = False if pre_process: pre_process(dataset, prog_args) # dataset.set_fold(fold) return dgl.dataloading.GraphDataLoader( dataset, batch_size=prog_args.batch_size, shuffle=shuffle, num_workers=prog_args.n_worker, ) def graph_classify_task(prog_args): """ perform graph classification task """ dataset = tu.LegacyTUDataset(name=prog_args.dataset) train_size = int(prog_args.train_ratio * len(dataset)) test_size = int(prog_args.test_ratio * len(dataset)) val_size = int(len(dataset) - train_size - test_size) dataset_train, dataset_val, dataset_test = torch.utils.data.random_split( dataset, (train_size, val_size, test_size) ) train_dataloader = prepare_data( dataset_train, prog_args, train=True, pre_process=pre_process ) val_dataloader = prepare_data( dataset_val, prog_args, train=False, pre_process=pre_process ) test_dataloader = prepare_data( dataset_test, prog_args, train=False, pre_process=pre_process ) input_dim, label_dim, max_num_node = dataset.statistics() print("++++++++++STATISTICS ABOUT THE DATASET") print("dataset feature dimension is", input_dim) print("dataset label dimension is", label_dim) print("the max num node is", max_num_node) print("number of graphs is", len(dataset)) # assert len(dataset) % prog_args.batch_size == 0, "training set not divisible by batch size" hidden_dim = 64 # used to be 64 embedding_dim = 64 # calculate assignment dimension: pool_ratio * largest graph's maximum # number of nodes in the dataset assign_dim = int(max_num_node * prog_args.pool_ratio) print("++++++++++MODEL STATISTICS++++++++") print("model hidden dim is", hidden_dim) print("model embedding dim for graph instance embedding", embedding_dim) print("initial batched pool graph dim is", assign_dim) activation = F.relu # initialize model # 'diffpool' : diffpool model = DiffPool( input_dim, hidden_dim, embedding_dim, label_dim, activation, prog_args.gc_per_block, prog_args.dropout, prog_args.num_pool, prog_args.linkpred, prog_args.batch_size, "meanpool", assign_dim, prog_args.pool_ratio, ) if prog_args.load_epoch >= 0 and prog_args.save_dir is not None: model.load_state_dict( torch.load( prog_args.save_dir + "/" + prog_args.dataset + "/model.iter-" + str(prog_args.load_epoch), weights_only=False, ) ) print("model init finished") print("MODEL:::::::", prog_args.method) if prog_args.cuda: model = model.cuda() logger = train( train_dataloader, model, prog_args, val_dataset=val_dataloader ) result = evaluate(test_dataloader, model, prog_args, logger) print("test accuracy {:.2f}%".format(result * 100)) def train(dataset, model, prog_args, same_feat=True, val_dataset=None): """ training function """ dir = prog_args.save_dir + "/" + prog_args.dataset if not os.path.exists(dir): os.makedirs(dir) dataloader = dataset optimizer = torch.optim.Adam( filter(lambda p: p.requires_grad, model.parameters()), lr=0.001 ) early_stopping_logger = {"best_epoch": -1, "val_acc": -1} if prog_args.cuda > 0: torch.cuda.set_device(0) for epoch in range(prog_args.epoch): begin_time = time.time() model.train() accum_correct = 0 total = 0 print("\nEPOCH ###### {} ######".format(epoch)) computation_time = 0.0 for batch_idx, (batch_graph, graph_labels) in enumerate(dataloader): for key, value in batch_graph.ndata.items(): batch_graph.ndata[key] = value.float() graph_labels = graph_labels.long() if torch.cuda.is_available(): batch_graph = batch_graph.to(torch.cuda.current_device()) graph_labels = graph_labels.cuda() model.zero_grad() compute_start = time.time() ypred = model(batch_graph) indi = torch.argmax(ypred, dim=1) correct = torch.sum(indi == graph_labels).item() accum_correct += correct total += graph_labels.size()[0] loss = model.loss(ypred, graph_labels) loss.backward() batch_compute_time = time.time() - compute_start computation_time += batch_compute_time nn.utils.clip_grad_norm_(model.parameters(), prog_args.clip) optimizer.step() train_accu = accum_correct / total print( "train accuracy for this epoch {} is {:.2f}%".format( epoch, train_accu * 100 ) ) elapsed_time = time.time() - begin_time print( "loss {:.4f} with epoch time {:.4f} s & computation time {:.4f} s ".format( loss.item(), elapsed_time, computation_time ) ) global_train_time_per_epoch.append(elapsed_time) if val_dataset is not None: result = evaluate(val_dataset, model, prog_args) print("validation accuracy {:.2f}%".format(result * 100)) if ( result >= early_stopping_logger["val_acc"] and result <= train_accu ): early_stopping_logger.update(best_epoch=epoch, val_acc=result) if prog_args.save_dir is not None: torch.save( model.state_dict(), prog_args.save_dir + "/" + prog_args.dataset + "/model.iter-" + str(early_stopping_logger["best_epoch"]), ) print( "best epoch is EPOCH {}, val_acc is {:.2f}%".format( early_stopping_logger["best_epoch"], early_stopping_logger["val_acc"] * 100, ) ) torch.cuda.empty_cache() return early_stopping_logger def evaluate(dataloader, model, prog_args, logger=None): """ evaluate function """ if logger is not None and prog_args.save_dir is not None: model.load_state_dict( torch.load( prog_args.save_dir + "/" + prog_args.dataset + "/model.iter-" + str(logger["best_epoch"]), weights_only=False, ) ) model.eval() correct_label = 0 with torch.no_grad(): for batch_idx, (batch_graph, graph_labels) in enumerate(dataloader): for key, value in batch_graph.ndata.items(): batch_graph.ndata[key] = value.float() graph_labels = graph_labels.long() if torch.cuda.is_available(): batch_graph = batch_graph.to(torch.cuda.current_device()) graph_labels = graph_labels.cuda() ypred = model(batch_graph) indi = torch.argmax(ypred, dim=1) correct = torch.sum(indi == graph_labels) correct_label += correct.item() result = correct_label / (len(dataloader) * prog_args.batch_size) return result def main(): """ main """ prog_args = arg_parse() print(prog_args) graph_classify_task(prog_args) print( "Train time per epoch: {:.4f}".format( sum(global_train_time_per_epoch) / len(global_train_time_per_epoch) ) ) print( "Max memory usage: {:.4f}".format( torch.cuda.max_memory_allocated(0) / (1024 * 1024) ) ) if __name__ == "__main__": main() ================================================ FILE: examples/pytorch/dimenet/README.md ================================================ # DGL Implementation of DimeNet and DimeNet++ This DGL example implements the GNN model proposed in the paper [Directional Message Passing for Molecular Graphs](https://arxiv.org/abs/2003.03123) and [Fast and Uncertainty-Aware Directional Message Passing for Non-Equilibrium Molecules](https://arxiv.org/abs/2011.14115). For the original implementation, see [here](https://github.com/klicperajo/dimenet). Contributor: [xnuohz](https://github.com/xnuohz) * This example implements both DimeNet and DimeNet++. * The advantages of DimeNet++ over DimeNet - Fast interactions: replacing bilinear layer with a simple Hadamard priduct - Embedding hierarchy: using a higher number of embeddings by reducing the embedding size in blocks via down- and up-projection layers - Other improvements: using less interaction blocks ### Requirements The codebase is implemented in Python 3.6. For version requirement of packages, see below. ``` click 7.1.2 dgl 0.6.0 logzero 1.6.3 numpy 1.19.5 ruamel.yaml 0.16.12 scikit-learn 0.24.1 scipy 1.5.4 sympy 1.7.1 torch 1.7.0 tqdm 4.56.0 ``` ### The graph datasets used in this example The DGL's built-in QM9 dataset. Dataset summary: * Number of Molecular Graphs: 130,831 * Number of Tasks: 12 ### Usage **Note: DimeNet++ is recommended to use over DimeNet.** ##### Examples The following commands learn a neural network and predict on the test set. Training a DimeNet model on QM9 dataset. ```bash python main.py --model-cnf config/dimenet.yaml ``` Training a DimeNet++ model on QM9 dataset. ```bash python main.py --model-cnf config/dimenet_pp.yaml ``` For faster experimentation, you should first put the author's [pretrained](https://github.com/klicperajo/dimenet/tree/master/pretrained) folder here, which contains pre-trained TensorFlow models. You can convert a TensorFlow model to a PyTorch model by using the following commands. ``` python convert_tf_ckpt_to_pytorch.py --model-cnf config/dimenet_pp.yaml --convert-cnf config/convert.yaml ``` Then you can set `flag: True` in `dimenet_pp.yaml` and run the above script, DimeNet++ will use the pretrained weights to predict on the test set. ##### Configuration For more details, please see `config/dimenet.yaml` and `config/dimenet_pp.yaml` ###### Model options ``` // The following paramaters are only used in DimeNet++ out_emb_size int Output embedding size. Default is 256 int_emb_size int Input embedding size. Default is 64 basis_emb_size int Basis embedding size. Default is 8 extensive bool Readout operator for generating a graph-level representation. Default is True // The following paramater is only used in DimeNet num_bilinear int Third dimension of the bilinear layer tensor in DimeNet. Default is 8 // The following paramaters are used in both DimeNet and DimeNet++ emb_size int Embedding size used throughout the model. Default is 128 num_blocks int Number of building blocks to be stacked. Default is 6 in DimeNet and 4 in DimeNet++ num_spherical int Number of spherical harmonics. Default is 7 num_radial int Number of radial basis functions. Default is 6 envelope_exponent int Shape of the smooth cutoff. Default is 5 cutoff float Cutoff distance for interatomic interactions. Default is 5.0 num_before_skip int Number of residual layers in interaction block before skip connection. Default is 1 num_after_skip int Number of residual layers in interaction block after skip connection. Default is 2 num_dense_output int Number of dense layers for the output blocks. Default is 3 targets list List of targets to predict. Default is ['mu'] output_init string Initial function name for output layer. Default is 'GlorotOrthogonal' ``` ###### Training options ``` num_train int Number of training samples. Default is 110000 num_valid int Number of validation samples. Default is 10000 data_seed int Random seed. Default is 42 lr float Learning rate. Default is 0.001 weight_decay float Weight decay. Default is 0.0001 ema_decay float EMA decay. Default is 0. batch_size int Batch size. Default is 100 epochs int Training epochs. Default is 300 early_stopping int Patient epochs to wait before early stopping. Default is 20 num_workers int Number of subprocesses to use for data loading. Default is 18 gpu int GPU index. Default is 0, using CUDA:0 interval int Time intervals for model evaluation. Default is 50 step_size int Period of learning rate decay. Default is 100 gamma float Factor of learning rate decay. Default is 0.3 ``` ### Performance - Batch size is different - Linear learning rate warm-up is not used - Exponential learning rate decay is not used - Exponential moving average (EMA) is not used - The values for tasks except mu, alpha, r2, Cv should be x 10^-3 - The author's code didn't provide the pretrained model for gap task - MAE(DimeNet in Table 1) is from [here](https://arxiv.org/abs/2003.03123) - MAE(DimeNet++ in Table 2) is from [here](https://arxiv.org/abs/2011.14115) | Target | mu | alpha | homo | lumo | gap | r2 | zpve | U0 | U | H | G | Cv | | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | | MAE(DimeNet in Table 1) | 0.0286 | 0.0469 | 27.8 | 19.7 | 34.8 | 0.331 | 1.29 | 8.02 | 7.89 | 8.11 | 8.98 | 0.0249 | | MAE(DimeNet++ in Table 2) | 0.0297 | 0.0435 | 24.6 | 19.5 | 32.6 | 0.331 | 1.21 | 6.32 | 6.28 | 6.53 | 7.56 | 0.0230 | | MAE(DimeNet++, TF, pretrain) | 0.0297 | 0.0435 | 0.0246 | 0.0195 | - | 0.3312 | 0.00121 | 0.0063 | 0.00628 | 0.00653 | 0.00756 | 0.0230 | | MAE(DimeNet++, TF, scratch) | 0.0330 | 0.0447 | 0.0251 | 0.0227 | 0.0486 | 0.3574 | 0.00123 | 0.0065 | 0.00635 | 0.00658 | 0.00747 | 0.0224 | | MAE(DimeNet++, DGL) | 0.0326 | 0.0537 | 0.0311 | 0.0255 | 0.0490 | 0.4801 | 0.0043 | 0.0141 | 0.0109 | 0.0117 | 0.0150 | 0.0254 | ### Speed | Model | Original Implementation | DGL Implementation | Improvement | | :-: | :-: | :-: | :-: | | DimeNet | 2839 | 1345 | 2.1x | | DimeNet++ | 624 | 238 | 2.6x | ================================================ FILE: examples/pytorch/dimenet/config/convert.yaml ================================================ tf: ckpt_path: 'pretrained/dimenet_pp/mu' torch: dump_path: 'pretrained/converted' ================================================ FILE: examples/pytorch/dimenet/config/dimenet.yaml ================================================ name: "dimenet" model: emb_size: 128 num_blocks: 6 num_bilinear: 8 num_spherical: 7 num_radial: 6 envelope_exponent: 5 cutoff: 5.0 num_before_skip: 1 num_after_skip: 2 num_dense_output: 3 # ['mu', 'alpha', 'homo', 'lumo', 'gap', 'r2', 'zpve', 'U0', 'U', 'H', 'G', 'Cv'] targets: ['U0'] train: num_train: 110000 num_valid: 10000 data_seed: 42 lr: 0.001 weight_decay: 0.0001 ema_decay: 0 batch_size: 45 epochs: 300 early_stopping: 20 num_workers: 18 gpu: 0 interval: 50 step_size: 100 gamma: 0.3 pretrain: flag: False path: 'pretrained/converted/' ================================================ FILE: examples/pytorch/dimenet/config/dimenet_pp.yaml ================================================ name: "dimenet++" model: emb_size: 128 out_emb_size: 256 int_emb_size: 64 basis_emb_size: 8 num_blocks: 4 num_spherical: 7 num_radial: 6 envelope_exponent: 5 cutoff: 5.0 extensive: True num_before_skip: 1 num_after_skip: 2 num_dense_output: 3 # ['mu', 'alpha', 'homo', 'lumo', 'gap', 'r2', 'zpve', 'U0', 'U', 'H', 'G', 'Cv'] targets: ['mu'] train: num_train: 110000 num_valid: 10000 data_seed: 42 lr: 0.001 weight_decay: 0.0001 ema_decay: 0 batch_size: 100 epochs: 300 early_stopping: 20 num_workers: 18 gpu: 0 interval: 50 step_size: 100 gamma: 0.3 pretrain: flag: False path: 'pretrained/converted/' ================================================ FILE: examples/pytorch/dimenet/convert_tf_ckpt_to_pytorch.py ================================================ import os from pathlib import Path import click import numpy as np import tensorflow as tf import torch import torch.nn as nn from logzero import logger from modules.dimenet_pp import DimeNetPP from modules.initializers import GlorotOrthogonal from ruamel.yaml import YAML @click.command() @click.option( "-m", "--model-cnf", type=click.Path(exists=True), help="Path of model config yaml.", ) @click.option( "-c", "--convert-cnf", type=click.Path(exists=True), help="Path of convert config yaml.", ) def main(model_cnf, convert_cnf): yaml = YAML(typ="safe") model_cnf = yaml.load(Path(model_cnf)) convert_cnf = yaml.load(Path(convert_cnf)) model_name, model_params, _ = ( model_cnf["name"], model_cnf["model"], model_cnf["train"], ) logger.info(f"Model name: {model_name}") logger.info(f"Model params: {model_params}") if model_params["targets"] in ["mu", "homo", "lumo", "gap", "zpve"]: model_params["output_init"] = nn.init.zeros_ else: # 'GlorotOrthogonal' for alpha, R2, U0, U, H, G, and Cv model_params["output_init"] = GlorotOrthogonal # model initialization logger.info("Loading Model") model = DimeNetPP( emb_size=model_params["emb_size"], out_emb_size=model_params["out_emb_size"], int_emb_size=model_params["int_emb_size"], basis_emb_size=model_params["basis_emb_size"], num_blocks=model_params["num_blocks"], num_spherical=model_params["num_spherical"], num_radial=model_params["num_radial"], cutoff=model_params["cutoff"], envelope_exponent=model_params["envelope_exponent"], num_before_skip=model_params["num_before_skip"], num_after_skip=model_params["num_after_skip"], num_dense_output=model_params["num_dense_output"], num_targets=len(model_params["targets"]), extensive=model_params["extensive"], output_init=model_params["output_init"], ) logger.info(model.state_dict()) tf_path, torch_path = ( convert_cnf["tf"]["ckpt_path"], convert_cnf["torch"]["dump_path"], ) init_vars = tf.train.list_variables(tf_path) tf_vars_dict = {} # 147 keys for name, shape in init_vars: if name == "_CHECKPOINTABLE_OBJECT_GRAPH": continue array = tf.train.load_variable(tf_path, name) logger.info(f"Loading TF weight {name} with shape {shape}") tf_vars_dict[name] = array for name, array in tf_vars_dict.items(): name = name.split("/")[:-2] pointer = model for m_name in name: if m_name == "kernel": pointer = getattr(pointer, "weight") elif m_name == "int_blocks": pointer = getattr(pointer, "interaction_blocks") elif m_name == "embeddings": pointer = getattr(pointer, "embedding") pointer = getattr(pointer, "weight") else: pointer = getattr(pointer, m_name) if name[-1] == "kernel": array = np.transpose(array) assert array.shape == pointer.shape logger.info(f"Initialize PyTorch weight {name}") pointer.data = torch.from_numpy(array) logger.info(f"Save PyTorch model to {torch_path}") if not os.path.exists(torch_path): os.makedirs(torch_path) target = model_params["targets"][0] torch.save(model.state_dict(), f"{torch_path}/{target}.pt") logger.info(model.state_dict()) if __name__ == "__main__": main() ================================================ FILE: examples/pytorch/dimenet/main.py ================================================ import copy from pathlib import Path import click import dgl import numpy as np import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from dgl.data.utils import Subset from logzero import logger from modules.dimenet import DimeNet from modules.dimenet_pp import DimeNetPP from modules.initializers import GlorotOrthogonal from qm9 import QM9 from ruamel.yaml import YAML from sklearn.metrics import mean_absolute_error from torch.utils.data import DataLoader def split_dataset( dataset, num_train, num_valid, shuffle=False, random_state=None ): """Split dataset into training, validation and test set. Parameters ---------- dataset We assume that ``len(dataset)`` gives the number of datapoints and ``dataset[i]`` gives the ith datapoint. num_train : int Number of training datapoints. num_valid : int Number of validation datapoints. shuffle : bool, optional By default we perform a consecutive split of the dataset. If True, we will first randomly shuffle the dataset. random_state : None, int or array_like, optional Random seed used to initialize the pseudo-random number generator. This can be any integer between 0 and 2^32 - 1 inclusive, an array (or other sequence) of such integers, or None (the default value). If seed is None, then RandomState will try to read data from /dev/urandom (or the Windows analogue) if available or seed from the clock otherwise. Returns ------- list of length 3 Subsets for training, validation and test. """ from itertools import accumulate num_data = len(dataset) assert num_train + num_valid < num_data lengths = [num_train, num_valid, num_data - num_train - num_valid] if shuffle: indices = np.random.RandomState(seed=random_state).permutation(num_data) else: indices = np.arange(num_data) return [ Subset(dataset, indices[offset - length : offset]) for offset, length in zip(accumulate(lengths), lengths) ] @torch.no_grad() def ema(ema_model, model, decay): msd = model.state_dict() for k, ema_v in ema_model.state_dict().items(): model_v = msd[k].detach() ema_v.copy_(ema_v * decay + (1.0 - decay) * model_v) def edge_init(edges): R_src, R_dst = edges.src["R"], edges.dst["R"] dist = torch.sqrt(F.relu(torch.sum((R_src - R_dst) ** 2, -1))) # d: bond length, o: bond orientation return {"d": dist, "o": R_src - R_dst} def _collate_fn(batch): graphs, line_graphs, labels = map(list, zip(*batch)) g, l_g = dgl.batch(graphs), dgl.batch(line_graphs) labels = torch.tensor(labels, dtype=torch.float32) return g, l_g, labels def train(device, model, opt, loss_fn, train_loader): model.train() epoch_loss = 0 num_samples = 0 for g, l_g, labels in train_loader: g = g.to(device) l_g = l_g.to(device) labels = labels.to(device) logits = model(g, l_g) loss = loss_fn(logits, labels.view([-1, 1])) epoch_loss += loss.data.item() * len(labels) num_samples += len(labels) opt.zero_grad() loss.backward() opt.step() return epoch_loss / num_samples @torch.no_grad() def evaluate(device, model, valid_loader): model.eval() predictions_all, labels_all = [], [] for g, l_g, labels in valid_loader: g = g.to(device) l_g = l_g.to(device) logits = model(g, l_g) labels_all.extend(labels) predictions_all.extend( logits.view( -1, ) .cpu() .numpy() ) return np.array(predictions_all), np.array(labels_all) @click.command() @click.option( "-m", "--model-cnf", type=click.Path(exists=True), help="Path of model config yaml.", ) def main(model_cnf): yaml = YAML(typ="safe") model_cnf = yaml.load(Path(model_cnf)) model_name, model_params, train_params, pretrain_params = ( model_cnf["name"], model_cnf["model"], model_cnf["train"], model_cnf["pretrain"], ) logger.info(f"Model name: {model_name}") logger.info(f"Model params: {model_params}") logger.info(f"Train params: {train_params}") if model_params["targets"] in ["mu", "homo", "lumo", "gap", "zpve"]: model_params["output_init"] = nn.init.zeros_ else: # 'GlorotOrthogonal' for alpha, R2, U0, U, H, G, and Cv model_params["output_init"] = GlorotOrthogonal logger.info("Loading Data Set") dataset = QM9(label_keys=model_params["targets"], edge_funcs=[edge_init]) # data split train_data, valid_data, test_data = split_dataset( dataset, num_train=train_params["num_train"], num_valid=train_params["num_valid"], shuffle=True, random_state=train_params["data_seed"], ) logger.info(f"Size of Training Set: {len(train_data)}") logger.info(f"Size of Validation Set: {len(valid_data)}") logger.info(f"Size of Test Set: {len(test_data)}") # data loader train_loader = DataLoader( train_data, batch_size=train_params["batch_size"], shuffle=True, collate_fn=_collate_fn, num_workers=train_params["num_workers"], ) valid_loader = DataLoader( valid_data, batch_size=train_params["batch_size"], shuffle=False, collate_fn=_collate_fn, num_workers=train_params["num_workers"], ) test_loader = DataLoader( test_data, batch_size=train_params["batch_size"], shuffle=False, collate_fn=_collate_fn, num_workers=train_params["num_workers"], ) # check cuda gpu = train_params["gpu"] device = f"cuda:{gpu}" if gpu >= 0 and torch.cuda.is_available() else "cpu" # model initialization logger.info("Loading Model") if model_name == "dimenet": model = DimeNet( emb_size=model_params["emb_size"], num_blocks=model_params["num_blocks"], num_bilinear=model_params["num_bilinear"], num_spherical=model_params["num_spherical"], num_radial=model_params["num_radial"], cutoff=model_params["cutoff"], envelope_exponent=model_params["envelope_exponent"], num_before_skip=model_params["num_before_skip"], num_after_skip=model_params["num_after_skip"], num_dense_output=model_params["num_dense_output"], num_targets=len(model_params["targets"]), output_init=model_params["output_init"], ).to(device) elif model_name == "dimenet++": model = DimeNetPP( emb_size=model_params["emb_size"], out_emb_size=model_params["out_emb_size"], int_emb_size=model_params["int_emb_size"], basis_emb_size=model_params["basis_emb_size"], num_blocks=model_params["num_blocks"], num_spherical=model_params["num_spherical"], num_radial=model_params["num_radial"], cutoff=model_params["cutoff"], envelope_exponent=model_params["envelope_exponent"], num_before_skip=model_params["num_before_skip"], num_after_skip=model_params["num_after_skip"], num_dense_output=model_params["num_dense_output"], num_targets=len(model_params["targets"]), extensive=model_params["extensive"], output_init=model_params["output_init"], ).to(device) else: raise ValueError(f"Invalid Model Name {model_name}") if pretrain_params["flag"]: torch_path = pretrain_params["path"] target = model_params["targets"][0] model.load_state_dict( torch.load(f"{torch_path}/{target}.pt", weights_only=False) ) logger.info("Testing with Pretrained model") predictions, labels = evaluate(device, model, test_loader) test_mae = mean_absolute_error(labels, predictions) logger.info(f"Test MAE {test_mae:.4f}") return # define loss function and optimization loss_fn = nn.L1Loss() opt = optim.Adam( model.parameters(), lr=train_params["lr"], weight_decay=train_params["weight_decay"], amsgrad=True, ) scheduler = optim.lr_scheduler.StepLR( opt, train_params["step_size"], gamma=train_params["gamma"] ) # model training best_mae = 1e9 no_improvement = 0 # EMA for valid and test logger.info("EMA Init") ema_model = copy.deepcopy(model) for p in ema_model.parameters(): p.requires_grad_(False) best_model = copy.deepcopy(ema_model) logger.info("Training") for i in range(train_params["epochs"]): train_loss = train(device, model, opt, loss_fn, train_loader) ema(ema_model, model, train_params["ema_decay"]) if i % train_params["interval"] == 0: predictions, labels = evaluate(device, ema_model, valid_loader) valid_mae = mean_absolute_error(labels, predictions) logger.info( f"Epoch {i} | Train Loss {train_loss:.4f} | Val MAE {valid_mae:.4f}" ) if valid_mae > best_mae: no_improvement += 1 if no_improvement == train_params["early_stopping"]: logger.info("Early stop.") break else: no_improvement = 0 best_mae = valid_mae best_model = copy.deepcopy(ema_model) else: logger.info(f"Epoch {i} | Train Loss {train_loss:.4f}") scheduler.step() logger.info("Testing") predictions, labels = evaluate(device, best_model, test_loader) test_mae = mean_absolute_error(labels, predictions) logger.info("Test MAE {:.4f}".format(test_mae)) if __name__ == "__main__": main() ================================================ FILE: examples/pytorch/dimenet/modules/activations.py ================================================ import torch def swish(x): """ Swish activation function, from Ramachandran, Zopf, Le 2017. "Searching for Activation Functions" """ return x * torch.sigmoid(x) ================================================ FILE: examples/pytorch/dimenet/modules/basis_utils.py ================================================ import numpy as np import sympy as sym from scipy import special as sp from scipy.optimize import brentq def Jn(r, n): """ r: int or list n: int or list len(r) == len(n) return value should be the same shape as the input data === example: r = n = np.array([1, 2, 3, 4]) res = [0.3, 0.1, 0.1, 0.1] === numerical spherical bessel functions of order n """ return np.sqrt(np.pi / (2 * r)) * sp.jv(n + 0.5, r) # the same shape as n def Jn_zeros(n, k): """ n: int k: int res: array of shape [n, k] Compute the first k zeros of the spherical bessel functions up to order n (excluded) """ zerosj = np.zeros((n, k), dtype="float32") zerosj[0] = np.arange(1, k + 1) * np.pi points = np.arange(1, k + n) * np.pi racines = np.zeros(k + n - 1, dtype="float32") for i in range(1, n): for j in range(k + n - 1 - i): foo = brentq(Jn, points[j], points[j + 1], (i,)) racines[j] = foo points = racines zerosj[i][:k] = racines[:k] return zerosj def spherical_bessel_formulas(n): """ n: int res: array of shape [n,] n sympy functions Computes the sympy formulas for the spherical bessel functions up to order n (excluded) """ x = sym.symbols("x") f = [sym.sin(x) / x] a = sym.sin(x) / x for i in range(1, n): b = sym.diff(a, x) / x f += [sym.simplify(b * (-x) ** i)] a = sym.simplify(b) return f def bessel_basis(n, k): """ n: int k: int res: [n, k] n * k sympy functions Computes the sympy formulas for the normalized and rescaled spherical bessel functions up to order n (excluded) and maximum frequency k (excluded). """ zeros = Jn_zeros(n, k) normalizer = [] for order in range(n): normalizer_tmp = [] for i in range(k): normalizer_tmp += [0.5 * Jn(zeros[order, i], order + 1) ** 2] normalizer_tmp = 1 / np.array(normalizer_tmp) ** 0.5 normalizer += [normalizer_tmp] f = spherical_bessel_formulas(n) x = sym.symbols("x") bess_basis = [] for order in range(n): bess_basis_tmp = [] for i in range(k): bess_basis_tmp += [ sym.simplify( normalizer[order][i] * f[order].subs(x, zeros[order, i] * x) ) ] bess_basis += [bess_basis_tmp] return bess_basis def sph_harm_prefactor(l, m): """ l: int m: int res: float Computes the constant pre-factor for the spherical harmonic of degree l and order m input: l: int, l>=0 m: int, -l<=m<=l """ return ( (2 * l + 1) * np.math.factorial(l - abs(m)) / (4 * np.pi * np.math.factorial(l + abs(m))) ) ** 0.5 def associated_legendre_polynomials(l, zero_m_only=True): """ l: int return: l sympy functions Computes sympy formulas of the associated legendre polynomials up to order l (excluded). """ z = sym.symbols("z") P_l_m = [[0] * (j + 1) for j in range(l)] P_l_m[0][0] = 1 if l > 0: P_l_m[1][0] = z for j in range(2, l): P_l_m[j][0] = sym.simplify( ((2 * j - 1) * z * P_l_m[j - 1][0] - (j - 1) * P_l_m[j - 2][0]) / j ) if not zero_m_only: for i in range(1, l): P_l_m[i][i] = sym.simplify((1 - 2 * i) * P_l_m[i - 1][i - 1]) if i + 1 < l: P_l_m[i + 1][i] = sym.simplify( (2 * i + 1) * z * P_l_m[i][i] ) for j in range(i + 2, l): P_l_m[j][i] = sym.simplify( ( (2 * j - 1) * z * P_l_m[j - 1][i] - (i + j - 1) * P_l_m[j - 2][i] ) / (j - i) ) return P_l_m def real_sph_harm(l, zero_m_only=True, spherical_coordinates=True): """ return: a sympy function list of length l, for i-th index of the list, it is also a list of length (2 * i + 1) Computes formula strings of the real part of the spherical harmonics up to order l (excluded). Variables are either cartesian coordinates x,y,z on the unit sphere or spherical coordinates phi and theta. """ if not zero_m_only: S_m = [0] C_m = [1] for i in range(1, l): x = sym.symbols("x") y = sym.symbols("y") S_m += [x * S_m[i - 1] + y * C_m[i - 1]] C_m += [x * C_m[i - 1] - y * S_m[i - 1]] P_l_m = associated_legendre_polynomials(l, zero_m_only) if spherical_coordinates: theta = sym.symbols("theta") z = sym.symbols("z") for i in range(len(P_l_m)): for j in range(len(P_l_m[i])): if type(P_l_m[i][j]) != int: P_l_m[i][j] = P_l_m[i][j].subs(z, sym.cos(theta)) if not zero_m_only: phi = sym.symbols("phi") for i in range(len(S_m)): S_m[i] = ( S_m[i] .subs(x, sym.sin(theta) * sym.cos(phi)) .subs(y, sym.sin(theta) * sym.sin(phi)) ) for i in range(len(C_m)): C_m[i] = ( C_m[i] .subs(x, sym.sin(theta) * sym.cos(phi)) .subs(y, sym.sin(theta) * sym.sin(phi)) ) Y_func_l_m = [["0"] * (2 * j + 1) for j in range(l)] for i in range(l): Y_func_l_m[i][0] = sym.simplify(sph_harm_prefactor(i, 0) * P_l_m[i][0]) if not zero_m_only: for i in range(1, l): for j in range(1, i + 1): Y_func_l_m[i][j] = sym.simplify( 2**0.5 * sph_harm_prefactor(i, j) * C_m[j] * P_l_m[i][j] ) for i in range(1, l): for j in range(1, i + 1): Y_func_l_m[i][-j] = sym.simplify( 2**0.5 * sph_harm_prefactor(i, -j) * S_m[j] * P_l_m[i][j] ) return Y_func_l_m ================================================ FILE: examples/pytorch/dimenet/modules/bessel_basis_layer.py ================================================ import numpy as np import torch import torch.nn as nn from modules.envelope import Envelope class BesselBasisLayer(nn.Module): def __init__(self, num_radial, cutoff, envelope_exponent=5): super(BesselBasisLayer, self).__init__() self.cutoff = cutoff self.envelope = Envelope(envelope_exponent) self.frequencies = nn.Parameter(torch.Tensor(num_radial)) self.reset_params() def reset_params(self): with torch.no_grad(): torch.arange( 1, self.frequencies.numel() + 1, out=self.frequencies ).mul_(np.pi) self.frequencies.requires_grad_() def forward(self, g): d_scaled = g.edata["d"] / self.cutoff # Necessary for proper broadcasting behaviour d_scaled = torch.unsqueeze(d_scaled, -1) d_cutoff = self.envelope(d_scaled) g.edata["rbf"] = d_cutoff * torch.sin(self.frequencies * d_scaled) return g ================================================ FILE: examples/pytorch/dimenet/modules/dimenet.py ================================================ import torch import torch.nn as nn from modules.activations import swish from modules.bessel_basis_layer import BesselBasisLayer from modules.embedding_block import EmbeddingBlock from modules.interaction_block import InteractionBlock from modules.output_block import OutputBlock from modules.spherical_basis_layer import SphericalBasisLayer class DimeNet(nn.Module): """ DimeNet model. Parameters ---------- emb_size Embedding size used throughout the model num_blocks Number of building blocks to be stacked num_bilinear Third dimension of the bilinear layer tensor num_spherical Number of spherical harmonics num_radial Number of radial basis functions cutoff Cutoff distance for interatomic interactions envelope_exponent Shape of the smooth cutoff num_before_skip Number of residual layers in interaction block before skip connection num_after_skip Number of residual layers in interaction block after skip connection num_dense_output Number of dense layers for the output blocks num_targets Number of targets to predict activation Activation function output_init Initial function in output block """ def __init__( self, emb_size, num_blocks, num_bilinear, num_spherical, num_radial, cutoff=5.0, envelope_exponent=5, num_before_skip=1, num_after_skip=2, num_dense_output=3, num_targets=12, activation=swish, output_init=nn.init.zeros_, ): super(DimeNet, self).__init__() self.num_blocks = num_blocks self.num_radial = num_radial # cosine basis function expansion layer self.rbf_layer = BesselBasisLayer( num_radial=num_radial, cutoff=cutoff, envelope_exponent=envelope_exponent, ) self.sbf_layer = SphericalBasisLayer( num_spherical=num_spherical, num_radial=num_radial, cutoff=cutoff, envelope_exponent=envelope_exponent, ) # embedding block self.emb_block = EmbeddingBlock( emb_size=emb_size, num_radial=num_radial, bessel_funcs=self.sbf_layer.get_bessel_funcs(), cutoff=cutoff, envelope_exponent=envelope_exponent, activation=activation, ) # output block self.output_blocks = nn.ModuleList( { OutputBlock( emb_size=emb_size, num_radial=num_radial, num_dense=num_dense_output, num_targets=num_targets, activation=activation, output_init=output_init, ) for _ in range(num_blocks + 1) } ) # interaction block self.interaction_blocks = nn.ModuleList( { InteractionBlock( emb_size=emb_size, num_radial=num_radial, num_spherical=num_spherical, num_bilinear=num_bilinear, num_before_skip=num_before_skip, num_after_skip=num_after_skip, activation=activation, ) for _ in range(num_blocks) } ) def edge_init(self, edges): # Calculate angles k -> j -> i R1, R2 = edges.src["o"], edges.dst["o"] x = torch.sum(R1 * R2, dim=-1) y = torch.cross(R1, R2) y = torch.norm(y, dim=-1) angle = torch.atan2(y, x) # Transform via angles cbf = [f(angle) for f in self.sbf_layer.get_sph_funcs()] cbf = torch.stack(cbf, dim=1) # [None, 7] cbf = cbf.repeat_interleave(self.num_radial, dim=1) # [None, 42] sbf = edges.src["rbf_env"] * cbf # [None, 42] return {"sbf": sbf} def forward(self, g, l_g): # add rbf features for each edge in one batch graph, [num_radial,] g = self.rbf_layer(g) # Embedding block g = self.emb_block(g) # Output block P = self.output_blocks[0](g) # [batch_size, num_targets] # Prepare sbf feature before the following blocks for k, v in g.edata.items(): l_g.ndata[k] = v l_g.apply_edges(self.edge_init) # Interaction blocks for i in range(self.num_blocks): g = self.interaction_blocks[i](g, l_g) P += self.output_blocks[i + 1](g) return P ================================================ FILE: examples/pytorch/dimenet/modules/dimenet_pp.py ================================================ import torch import torch.nn as nn from modules.activations import swish from modules.bessel_basis_layer import BesselBasisLayer from modules.embedding_block import EmbeddingBlock from modules.interaction_pp_block import InteractionPPBlock from modules.output_pp_block import OutputPPBlock from modules.spherical_basis_layer import SphericalBasisLayer class DimeNetPP(nn.Module): """ DimeNet++ model. Parameters ---------- emb_size Embedding size used for the messages out_emb_size Embedding size used for atoms in the output block int_emb_size Embedding size used for interaction triplets basis_emb_size Embedding size used inside the basis transformation num_blocks Number of building blocks to be stacked num_spherical Number of spherical harmonics num_radial Number of radial basis functions cutoff Cutoff distance for interatomic interactions envelope_exponent Shape of the smooth cutoff num_before_skip Number of residual layers in interaction block before skip connection num_after_skip Number of residual layers in interaction block after skip connection num_dense_output Number of dense layers for the output blocks num_targets Number of targets to predict activation Activation function extensive Whether the output should be extensive (proportional to the number of atoms) output_init Initial function in output block """ def __init__( self, emb_size, out_emb_size, int_emb_size, basis_emb_size, num_blocks, num_spherical, num_radial, cutoff=5.0, envelope_exponent=5, num_before_skip=1, num_after_skip=2, num_dense_output=3, num_targets=12, activation=swish, extensive=True, output_init=nn.init.zeros_, ): super(DimeNetPP, self).__init__() self.num_blocks = num_blocks self.num_radial = num_radial # cosine basis function expansion layer self.rbf_layer = BesselBasisLayer( num_radial=num_radial, cutoff=cutoff, envelope_exponent=envelope_exponent, ) self.sbf_layer = SphericalBasisLayer( num_spherical=num_spherical, num_radial=num_radial, cutoff=cutoff, envelope_exponent=envelope_exponent, ) # embedding block self.emb_block = EmbeddingBlock( emb_size=emb_size, num_radial=num_radial, bessel_funcs=self.sbf_layer.get_bessel_funcs(), cutoff=cutoff, envelope_exponent=envelope_exponent, activation=activation, ) # output block self.output_blocks = nn.ModuleList( { OutputPPBlock( emb_size=emb_size, out_emb_size=out_emb_size, num_radial=num_radial, num_dense=num_dense_output, num_targets=num_targets, activation=activation, extensive=extensive, output_init=output_init, ) for _ in range(num_blocks + 1) } ) # interaction block self.interaction_blocks = nn.ModuleList( { InteractionPPBlock( emb_size=emb_size, int_emb_size=int_emb_size, basis_emb_size=basis_emb_size, num_radial=num_radial, num_spherical=num_spherical, num_before_skip=num_before_skip, num_after_skip=num_after_skip, activation=activation, ) for _ in range(num_blocks) } ) def edge_init(self, edges): # Calculate angles k -> j -> i R1, R2 = edges.src["o"], edges.dst["o"] x = torch.sum(R1 * R2, dim=-1) y = torch.cross(R1, R2) y = torch.norm(y, dim=-1) angle = torch.atan2(y, x) # Transform via angles cbf = [f(angle) for f in self.sbf_layer.get_sph_funcs()] cbf = torch.stack(cbf, dim=1) # [None, 7] cbf = cbf.repeat_interleave(self.num_radial, dim=1) # [None, 42] # Notice: it's dst, not src sbf = edges.dst["rbf_env"] * cbf # [None, 42] return {"sbf": sbf} def forward(self, g, l_g): # add rbf features for each edge in one batch graph, [num_radial,] g = self.rbf_layer(g) # Embedding block g = self.emb_block(g) # Output block P = self.output_blocks[0](g) # [batch_size, num_targets] # Prepare sbf feature before the following blocks for k, v in g.edata.items(): l_g.ndata[k] = v l_g.apply_edges(self.edge_init) # Interaction blocks for i in range(self.num_blocks): g = self.interaction_blocks[i](g, l_g) P += self.output_blocks[i + 1](g) return P ================================================ FILE: examples/pytorch/dimenet/modules/embedding_block.py ================================================ import numpy as np import torch import torch.nn as nn from modules.envelope import Envelope from modules.initializers import GlorotOrthogonal class EmbeddingBlock(nn.Module): def __init__( self, emb_size, num_radial, bessel_funcs, cutoff, envelope_exponent, num_atom_types=95, activation=None, ): super(EmbeddingBlock, self).__init__() self.bessel_funcs = bessel_funcs self.cutoff = cutoff self.activation = activation self.envelope = Envelope(envelope_exponent) self.embedding = nn.Embedding(num_atom_types, emb_size) self.dense_rbf = nn.Linear(num_radial, emb_size) self.dense = nn.Linear(emb_size * 3, emb_size) self.reset_params() def reset_params(self): nn.init.uniform_(self.embedding.weight, a=-np.sqrt(3), b=np.sqrt(3)) GlorotOrthogonal(self.dense_rbf.weight) GlorotOrthogonal(self.dense.weight) def edge_init(self, edges): """msg emb init""" # m init rbf = self.dense_rbf(edges.data["rbf"]) if self.activation is not None: rbf = self.activation(rbf) m = torch.cat([edges.src["h"], edges.dst["h"], rbf], dim=-1) m = self.dense(m) if self.activation is not None: m = self.activation(m) # rbf_env init d_scaled = edges.data["d"] / self.cutoff rbf_env = [f(d_scaled) for f in self.bessel_funcs] rbf_env = torch.stack(rbf_env, dim=1) d_cutoff = self.envelope(d_scaled) rbf_env = d_cutoff[:, None] * rbf_env return {"m": m, "rbf_env": rbf_env} def forward(self, g): g.ndata["h"] = self.embedding(g.ndata["Z"]) g.apply_edges(self.edge_init) return g ================================================ FILE: examples/pytorch/dimenet/modules/envelope.py ================================================ import torch.nn as nn class Envelope(nn.Module): """ Envelope function that ensures a smooth cutoff """ def __init__(self, exponent): super(Envelope, self).__init__() self.p = exponent + 1 self.a = -(self.p + 1) * (self.p + 2) / 2 self.b = self.p * (self.p + 2) self.c = -self.p * (self.p + 1) / 2 def forward(self, x): # Envelope function divided by r x_p_0 = x.pow(self.p - 1) x_p_1 = x_p_0 * x x_p_2 = x_p_1 * x env_val = 1 / x + self.a * x_p_0 + self.b * x_p_1 + self.c * x_p_2 return env_val ================================================ FILE: examples/pytorch/dimenet/modules/initializers.py ================================================ import torch.nn as nn def GlorotOrthogonal(tensor, scale=2.0): if tensor is not None: nn.init.orthogonal_(tensor.data) scale /= (tensor.size(-2) + tensor.size(-1)) * tensor.var() tensor.data *= scale.sqrt() ================================================ FILE: examples/pytorch/dimenet/modules/interaction_block.py ================================================ import dgl.function as fn import torch import torch.nn as nn from modules.initializers import GlorotOrthogonal from modules.residual_layer import ResidualLayer class InteractionBlock(nn.Module): def __init__( self, emb_size, num_radial, num_spherical, num_bilinear, num_before_skip, num_after_skip, activation=None, ): super(InteractionBlock, self).__init__() self.activation = activation # Transformations of Bessel and spherical basis representations self.dense_rbf = nn.Linear(num_radial, emb_size, bias=False) self.dense_sbf = nn.Linear( num_radial * num_spherical, num_bilinear, bias=False ) # Dense transformations of input messages self.dense_ji = nn.Linear(emb_size, emb_size) self.dense_kj = nn.Linear(emb_size, emb_size) # Bilinear layer bilin_initializer = torch.empty( (emb_size, num_bilinear, emb_size) ).normal_(mean=0, std=2 / emb_size) self.W_bilin = nn.Parameter(bilin_initializer) # Residual layers before skip connection self.layers_before_skip = nn.ModuleList( [ ResidualLayer(emb_size, activation=activation) for _ in range(num_before_skip) ] ) self.final_before_skip = nn.Linear(emb_size, emb_size) # Residual layers after skip connection self.layers_after_skip = nn.ModuleList( [ ResidualLayer(emb_size, activation=activation) for _ in range(num_after_skip) ] ) self.reset_params() def reset_params(self): GlorotOrthogonal(self.dense_rbf.weight) GlorotOrthogonal(self.dense_sbf.weight) GlorotOrthogonal(self.dense_ji.weight) GlorotOrthogonal(self.dense_kj.weight) GlorotOrthogonal(self.final_before_skip.weight) def edge_transfer(self, edges): # Transform from Bessel basis to dence vector rbf = self.dense_rbf(edges.data["rbf"]) # Initial transformation x_ji = self.dense_ji(edges.data["m"]) x_kj = self.dense_kj(edges.data["m"]) if self.activation is not None: x_ji = self.activation(x_ji) x_kj = self.activation(x_kj) # w: W * e_RBF \bigodot \sigma(W * m + b) return {"x_kj": x_kj * rbf, "x_ji": x_ji} def msg_func(self, edges): sbf = self.dense_sbf(edges.data["sbf"]) # Apply bilinear layer to interactions and basis function activation # [None, 8] * [128, 8, 128] * [None, 128] -> [None, 128] x_kj = torch.einsum( "wj,wl,ijl->wi", sbf, edges.src["x_kj"], self.W_bilin ) return {"x_kj": x_kj} def forward(self, g, l_g): g.apply_edges(self.edge_transfer) # nodes correspond to edges and edges correspond to nodes in the original graphs # node: d, rbf, o, rbf_env, x_kj, x_ji for k, v in g.edata.items(): l_g.ndata[k] = v l_g.update_all(self.msg_func, fn.sum("x_kj", "m_update")) for k, v in l_g.ndata.items(): g.edata[k] = v # Transformations before skip connection g.edata["m_update"] = g.edata["m_update"] + g.edata["x_ji"] for layer in self.layers_before_skip: g.edata["m_update"] = layer(g.edata["m_update"]) g.edata["m_update"] = self.final_before_skip(g.edata["m_update"]) if self.activation is not None: g.edata["m_update"] = self.activation(g.edata["m_update"]) # Skip connection g.edata["m"] = g.edata["m"] + g.edata["m_update"] # Transformations after skip connection for layer in self.layers_after_skip: g.edata["m"] = layer(g.edata["m"]) return g ================================================ FILE: examples/pytorch/dimenet/modules/interaction_pp_block.py ================================================ import dgl import dgl.function as fn import torch.nn as nn from modules.initializers import GlorotOrthogonal from modules.residual_layer import ResidualLayer class InteractionPPBlock(nn.Module): def __init__( self, emb_size, int_emb_size, basis_emb_size, num_radial, num_spherical, num_before_skip, num_after_skip, activation=None, ): super(InteractionPPBlock, self).__init__() self.activation = activation # Transformations of Bessel and spherical basis representations self.dense_rbf1 = nn.Linear(num_radial, basis_emb_size, bias=False) self.dense_rbf2 = nn.Linear(basis_emb_size, emb_size, bias=False) self.dense_sbf1 = nn.Linear( num_radial * num_spherical, basis_emb_size, bias=False ) self.dense_sbf2 = nn.Linear(basis_emb_size, int_emb_size, bias=False) # Dense transformations of input messages self.dense_ji = nn.Linear(emb_size, emb_size) self.dense_kj = nn.Linear(emb_size, emb_size) # Embedding projections for interaction triplets self.down_projection = nn.Linear(emb_size, int_emb_size, bias=False) self.up_projection = nn.Linear(int_emb_size, emb_size, bias=False) # Residual layers before skip connection self.layers_before_skip = nn.ModuleList( [ ResidualLayer(emb_size, activation=activation) for _ in range(num_before_skip) ] ) self.final_before_skip = nn.Linear(emb_size, emb_size) # Residual layers after skip connection self.layers_after_skip = nn.ModuleList( [ ResidualLayer(emb_size, activation=activation) for _ in range(num_after_skip) ] ) self.reset_params() def reset_params(self): GlorotOrthogonal(self.dense_rbf1.weight) GlorotOrthogonal(self.dense_rbf2.weight) GlorotOrthogonal(self.dense_sbf1.weight) GlorotOrthogonal(self.dense_sbf2.weight) GlorotOrthogonal(self.dense_ji.weight) nn.init.zeros_(self.dense_ji.bias) GlorotOrthogonal(self.dense_kj.weight) nn.init.zeros_(self.dense_kj.bias) GlorotOrthogonal(self.down_projection.weight) GlorotOrthogonal(self.up_projection.weight) def edge_transfer(self, edges): # Transform from Bessel basis to dense vector rbf = self.dense_rbf1(edges.data["rbf"]) rbf = self.dense_rbf2(rbf) # Initial transformation x_ji = self.dense_ji(edges.data["m"]) x_kj = self.dense_kj(edges.data["m"]) if self.activation is not None: x_ji = self.activation(x_ji) x_kj = self.activation(x_kj) x_kj = self.down_projection(x_kj * rbf) if self.activation is not None: x_kj = self.activation(x_kj) return {"x_kj": x_kj, "x_ji": x_ji} def msg_func(self, edges): sbf = self.dense_sbf1(edges.data["sbf"]) sbf = self.dense_sbf2(sbf) x_kj = edges.src["x_kj"] * sbf return {"x_kj": x_kj} def forward(self, g, l_g): g.apply_edges(self.edge_transfer) # nodes correspond to edges and edges correspond to nodes in the original graphs # node: d, rbf, o, rbf_env, x_kj, x_ji for k, v in g.edata.items(): l_g.ndata[k] = v l_g_reverse = dgl.reverse(l_g, copy_edata=True) l_g_reverse.update_all(self.msg_func, fn.sum("x_kj", "m_update")) g.edata["m_update"] = self.up_projection(l_g_reverse.ndata["m_update"]) if self.activation is not None: g.edata["m_update"] = self.activation(g.edata["m_update"]) # Transformations before skip connection g.edata["m_update"] = g.edata["m_update"] + g.edata["x_ji"] for layer in self.layers_before_skip: g.edata["m_update"] = layer(g.edata["m_update"]) g.edata["m_update"] = self.final_before_skip(g.edata["m_update"]) if self.activation is not None: g.edata["m_update"] = self.activation(g.edata["m_update"]) # Skip connection g.edata["m"] = g.edata["m"] + g.edata["m_update"] # Transformations after skip connection for layer in self.layers_after_skip: g.edata["m"] = layer(g.edata["m"]) return g ================================================ FILE: examples/pytorch/dimenet/modules/output_block.py ================================================ import dgl import dgl.function as fn import torch.nn as nn from modules.initializers import GlorotOrthogonal class OutputBlock(nn.Module): def __init__( self, emb_size, num_radial, num_dense, num_targets, activation=None, output_init=nn.init.zeros_, ): super(OutputBlock, self).__init__() self.activation = activation self.output_init = output_init self.dense_rbf = nn.Linear(num_radial, emb_size, bias=False) self.dense_layers = nn.ModuleList( [nn.Linear(emb_size, emb_size) for _ in range(num_dense)] ) self.dense_final = nn.Linear(emb_size, num_targets, bias=False) self.reset_params() def reset_params(self): GlorotOrthogonal(self.dense_rbf.weight) for layer in self.dense_layers: GlorotOrthogonal(layer.weight) self.output_init(self.dense_final.weight) def forward(self, g): with g.local_scope(): g.edata["tmp"] = g.edata["m"] * self.dense_rbf(g.edata["rbf"]) g.update_all(fn.copy_e("tmp", "x"), fn.sum("x", "t")) for layer in self.dense_layers: g.ndata["t"] = layer(g.ndata["t"]) if self.activation is not None: g.ndata["t"] = self.activation(g.ndata["t"]) g.ndata["t"] = self.dense_final(g.ndata["t"]) return dgl.readout_nodes(g, "t") ================================================ FILE: examples/pytorch/dimenet/modules/output_pp_block.py ================================================ import dgl import dgl.function as fn import torch.nn as nn from modules.initializers import GlorotOrthogonal class OutputPPBlock(nn.Module): def __init__( self, emb_size, out_emb_size, num_radial, num_dense, num_targets, activation=None, output_init=nn.init.zeros_, extensive=True, ): super(OutputPPBlock, self).__init__() self.activation = activation self.output_init = output_init self.extensive = extensive self.dense_rbf = nn.Linear(num_radial, emb_size, bias=False) self.up_projection = nn.Linear(emb_size, out_emb_size, bias=False) self.dense_layers = nn.ModuleList( [nn.Linear(out_emb_size, out_emb_size) for _ in range(num_dense)] ) self.dense_final = nn.Linear(out_emb_size, num_targets, bias=False) self.reset_params() def reset_params(self): GlorotOrthogonal(self.dense_rbf.weight) GlorotOrthogonal(self.up_projection.weight) for layer in self.dense_layers: GlorotOrthogonal(layer.weight) self.output_init(self.dense_final.weight) def forward(self, g): with g.local_scope(): g.edata["tmp"] = g.edata["m"] * self.dense_rbf(g.edata["rbf"]) g_reverse = dgl.reverse(g, copy_edata=True) g_reverse.update_all(fn.copy_e("tmp", "x"), fn.sum("x", "t")) g.ndata["t"] = self.up_projection(g_reverse.ndata["t"]) for layer in self.dense_layers: g.ndata["t"] = layer(g.ndata["t"]) if self.activation is not None: g.ndata["t"] = self.activation(g.ndata["t"]) g.ndata["t"] = self.dense_final(g.ndata["t"]) return dgl.readout_nodes( g, "t", op="sum" if self.extensive else "mean" ) ================================================ FILE: examples/pytorch/dimenet/modules/residual_layer.py ================================================ import torch.nn as nn from modules.initializers import GlorotOrthogonal class ResidualLayer(nn.Module): def __init__(self, units, activation=None): super(ResidualLayer, self).__init__() self.activation = activation self.dense_1 = nn.Linear(units, units) self.dense_2 = nn.Linear(units, units) self.reset_params() def reset_params(self): GlorotOrthogonal(self.dense_1.weight) nn.init.zeros_(self.dense_1.bias) GlorotOrthogonal(self.dense_2.weight) nn.init.zeros_(self.dense_2.bias) def forward(self, inputs): x = self.dense_1(inputs) if self.activation is not None: x = self.activation(x) x = self.dense_2(x) if self.activation is not None: x = self.activation(x) return inputs + x ================================================ FILE: examples/pytorch/dimenet/modules/spherical_basis_layer.py ================================================ import sympy as sym import torch import torch.nn as nn from modules.basis_utils import bessel_basis, real_sph_harm from modules.envelope import Envelope class SphericalBasisLayer(nn.Module): def __init__(self, num_spherical, num_radial, cutoff, envelope_exponent=5): super(SphericalBasisLayer, self).__init__() assert num_radial <= 64 self.num_radial = num_radial self.num_spherical = num_spherical self.cutoff = cutoff self.envelope = Envelope(envelope_exponent) # retrieve formulas self.bessel_formulas = bessel_basis( num_spherical, num_radial ) # x, [num_spherical, num_radial] sympy functions self.sph_harm_formulas = real_sph_harm( num_spherical ) # theta, [num_spherical, ] sympy functions self.sph_funcs = [] self.bessel_funcs = [] # convert to torch functions x = sym.symbols("x") theta = sym.symbols("theta") modules = {"sin": torch.sin, "cos": torch.cos} for i in range(num_spherical): if i == 0: first_sph = sym.lambdify( [theta], self.sph_harm_formulas[i][0], modules )(0) self.sph_funcs.append( lambda tensor: torch.zeros_like(tensor) + first_sph ) else: self.sph_funcs.append( sym.lambdify([theta], self.sph_harm_formulas[i][0], modules) ) for j in range(num_radial): self.bessel_funcs.append( sym.lambdify([x], self.bessel_formulas[i][j], modules) ) def get_bessel_funcs(self): return self.bessel_funcs def get_sph_funcs(self): return self.sph_funcs ================================================ FILE: examples/pytorch/dimenet/qm9.py ================================================ """QM9 dataset for graph property prediction (regression).""" import os import dgl import numpy as np import scipy.sparse as sp import torch from dgl.convert import graph as dgl_graph from dgl.data import QM9Dataset from dgl.data.utils import load_graphs, save_graphs from tqdm import trange class QM9(QM9Dataset): r"""QM9 dataset for graph property prediction (regression) This dataset consists of 130,831 molecules with 12 regression targets. Nodes correspond to atoms and edges correspond to bonds. Reference: - `"Quantum-Machine.org" `_ - `"Directional Message Passing for Molecular Graphs" `_ Statistics: - Number of graphs: 130,831 - Number of regression targets: 12 +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | Keys | Property | Description | Unit | +========+==================================+===================================================================================+=============================================+ | mu | :math:`\mu` | Dipole moment | :math:`\textrm{D}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | alpha | :math:`\alpha` | Isotropic polarizability | :math:`{a_0}^3` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | homo | :math:`\epsilon_{\textrm{HOMO}}` | Highest occupied molecular orbital energy | :math:`\textrm{eV}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | lumo | :math:`\epsilon_{\textrm{LUMO}}` | Lowest unoccupied molecular orbital energy | :math:`\textrm{eV}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | gap | :math:`\Delta \epsilon` | Gap between :math:`\epsilon_{\textrm{HOMO}}` and :math:`\epsilon_{\textrm{LUMO}}` | :math:`\textrm{eV}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | r2 | :math:`\langle R^2 \rangle` | Electronic spatial extent | :math:`{a_0}^2` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | zpve | :math:`\textrm{ZPVE}` | Zero point vibrational energy | :math:`\textrm{eV}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | U0 | :math:`U_0` | Internal energy at 0K | :math:`\textrm{eV}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | U | :math:`U` | Internal energy at 298.15K | :math:`\textrm{eV}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | H | :math:`H` | Enthalpy at 298.15K | :math:`\textrm{eV}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | G | :math:`G` | Free energy at 298.15K | :math:`\textrm{eV}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | Cv | :math:`c_{\textrm{v}}` | Heat capavity at 298.15K | :math:`\frac{\textrm{cal}}{\textrm{mol K}}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ Parameters ---------- label_keys: list Names of the regression property, which should be a subset of the keys in the table above. edge_funcs: list A list of edge-wise user-defined functions for chemical bonds. Default: None cutoff: float Cutoff distance for interatomic interactions, i.e. two atoms are connected in the corresponding graph if the distance between them is no larger than this. Default: 5.0 Angstrom raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose: bool Whether to print out progress information. Default: True Attributes ---------- num_labels : int Number of labels for each graph, i.e. number of prediction tasks Raises ------ UserWarning If the raw data is changed in the remote server by the author. Examples -------- >>> data = QM9Dataset(label_keys=['mu', 'gap'], cutoff=5.0) >>> data.num_classes 2 >>> >>> # iterate over the dataset >>> for g, label in data: ... R = g.ndata['R'] # get coordinates of each atom ... Z = g.ndata['Z'] # get atomic numbers of each atom ... # your code here... >>> """ def __init__( self, label_keys, edge_funcs=None, cutoff=5.0, raw_dir=None, force_reload=False, verbose=False, ): self.edge_funcs = edge_funcs self._keys = [ "mu", "alpha", "homo", "lumo", "gap", "r2", "zpve", "U0", "U", "H", "G", "Cv", ] super(QM9, self).__init__( label_keys=label_keys, cutoff=cutoff, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, ) @property def graph_path(self): return f"{self.save_path}/dgl_graph.bin" @property def line_graph_path(self): return f"{self.save_path}/dgl_line_graph.bin" def has_cache(self): """step 1, if True, goto step 5; else goto download(step 2), then step 3""" return os.path.exists(self.graph_path) and os.path.exists( self.line_graph_path ) def process(self): """step 3""" npz_path = f"{self.raw_dir}/qm9_eV.npz" data_dict = np.load(npz_path, allow_pickle=True) # data_dict['N'] contains the number of atoms in each molecule, # data_dict['R'] consists of the atomic coordinates, # data_dict['Z'] consists of the atomic numbers. # Atomic properties (Z and R) of all molecules are concatenated as single tensors, # so you need this value to select the correct atoms for each molecule. self.N = data_dict["N"] self.R = data_dict["R"] self.Z = data_dict["Z"] self.N_cumsum = np.concatenate([[0], np.cumsum(self.N)]) # graph labels self.label_dict = {} for k in self._keys: self.label_dict[k] = torch.tensor(data_dict[k], dtype=torch.float32) self.label = torch.stack( [self.label_dict[key] for key in self.label_keys], dim=1 ) # graphs & features self.graphs, self.line_graphs = self._load_graph() def _load_graph(self): num_graphs = self.label.shape[0] graphs = [] line_graphs = [] for idx in trange(num_graphs): n_atoms = self.N[idx] # get all the atomic coordinates of the idx-th molecular graph R = self.R[self.N_cumsum[idx] : self.N_cumsum[idx + 1]] # calculate the distance between all atoms dist = np.linalg.norm(R[:, None, :] - R[None, :, :], axis=-1) # keep all edges that don't exceed the cutoff and delete self-loops adj = sp.csr_matrix(dist <= self.cutoff) - sp.eye( n_atoms, dtype=np.bool_ ) adj = adj.tocoo() u, v = torch.tensor(adj.row), torch.tensor(adj.col) g = dgl_graph((u, v)) g.ndata["R"] = torch.tensor(R, dtype=torch.float32) g.ndata["Z"] = torch.tensor( self.Z[self.N_cumsum[idx] : self.N_cumsum[idx + 1]], dtype=torch.long, ) # add user-defined features if self.edge_funcs is not None: for func in self.edge_funcs: g.apply_edges(func) graphs.append(g) l_g = dgl.line_graph(g, backtracking=False) line_graphs.append(l_g) return graphs, line_graphs def save(self): """step 4""" save_graphs(str(self.graph_path), self.graphs, self.label_dict) save_graphs(str(self.line_graph_path), self.line_graphs) def load(self): """step 5""" self.graphs, label_dict = load_graphs(self.graph_path) self.line_graphs, _ = load_graphs(self.line_graph_path) self.label = torch.stack( [label_dict[key] for key in self.label_keys], dim=1 ) def __getitem__(self, idx): r"""Get graph and label by index Parameters ---------- idx : int Item index Returns ------- dgl.DGLGraph The graph contains: - ``ndata['R']``: the coordinates of each atom - ``ndata['Z']``: the atomic number Tensor Property values of molecular graphs """ return self.graphs[idx], self.line_graphs[idx], self.label[idx] ================================================ FILE: examples/pytorch/dtgrnn/README.md ================================================ # Discrete Temporal Dynamic Graph with recurrent structure ## DGL Implementation of DCRNN and GaAN paper. This DGL example implements the GNN model proposed in the paper [Diffusion Convolutional Recurrent Neural Network: Data-Driven Traffic Forecasting](https://arxiv.org/abs/1707.01926) and [GaAN:Gated Attention Networks for Learning on Large and Spatiotemporal Graphs](https://arxiv.org/pdf/1803.07294). Model implementor ---------------------- This example was implemented by [Ericcsr](https://github.com/Ericcsr) during his Internship work at the AWS Shanghai AI Lab. The graph dataset used in this example --------------------------------------- METR-LA dataset. Dataset summary: - NumNodes: 207 - NumEdges: 1722 - NumFeats: 2 - TrainingSamples: 70% - ValidationSamples: 20% - TestSamples: 10% PEMS-BAY dataset. Dataset Summary: - NumNodes: 325 - NumEdges: 2694 - NumFeats: 2 - TrainingSamples: 70% - ValidationSamples: 20% - TestSamples: 10% How to run example files -------------------------------- In the dtdg folder, run **Please use `train.py`** Train the DCRNN model on METR-LA Dataset ```python python train.py --dataset LA --model dcrnn ``` If want to use a GPU, run ```python python train.py --gpu 0 --dataset LA --model dcrnn ``` if you want to use PEMS-BAY dataset ```python python train.py --gpu 0 --dataset BAY --model dcrnn ``` Train GaAN model ```python python train.py --gpu 0 --model gaan --dataset ``` Performance on METR-LA ------------------------- | Models/Datasets | Test MAE | | :-------------- | --------:| | DCRNN in DGL | 2.91 | | DCRNN paper | 3.17 | | GaAN in DGL | 3.20 | | GaAN paper | 3.16 | Notice that Any Graph Convolution module can be plugged into the recurrent discrete temporal dynamic graph template to test performance; simply replace DiffConv or GaAN. ================================================ FILE: examples/pytorch/dtgrnn/dataloading.py ================================================ import os import ssl import dgl import numpy as np import torch from six.moves import urllib from torch.utils.data import DataLoader, Dataset def download_file(dataset): print("Start Downloading data: {}".format(dataset)) url = "https://s3.us-west-2.amazonaws.com/dgl-data/dataset/{}".format( dataset ) print("Start Downloading File....") context = ssl._create_unverified_context() data = urllib.request.urlopen(url, context=context) with open("./data/{}".format(dataset), "wb") as handle: handle.write(data.read()) class SnapShotDataset(Dataset): def __init__(self, path, npz_file): if not os.path.exists(path + "/" + npz_file): if not os.path.exists(path): os.mkdir(path) download_file(npz_file) zipfile = np.load(path + "/" + npz_file) self.x = zipfile["x"] self.y = zipfile["y"] def __len__(self): return len(self.x) def __getitem__(self, idx): if torch.is_tensor(idx): idx = idx.tolist() return self.x[idx, ...], self.y[idx, ...] def METR_LAGraphDataset(): if not os.path.exists("data/graph_la.bin"): if not os.path.exists("data"): os.mkdir("data") download_file("graph_la.bin") g, _ = dgl.load_graphs("data/graph_la.bin") return g[0] class METR_LATrainDataset(SnapShotDataset): def __init__(self): super(METR_LATrainDataset, self).__init__("data", "metr_la_train.npz") self.mean = self.x[..., 0].mean() self.std = self.x[..., 0].std() class METR_LATestDataset(SnapShotDataset): def __init__(self): super(METR_LATestDataset, self).__init__("data", "metr_la_test.npz") class METR_LAValidDataset(SnapShotDataset): def __init__(self): super(METR_LAValidDataset, self).__init__("data", "metr_la_valid.npz") def PEMS_BAYGraphDataset(): if not os.path.exists("data/graph_bay.bin"): if not os.path.exists("data"): os.mkdir("data") download_file("graph_bay.bin") g, _ = dgl.load_graphs("data/graph_bay.bin") return g[0] class PEMS_BAYTrainDataset(SnapShotDataset): def __init__(self): super(PEMS_BAYTrainDataset, self).__init__("data", "pems_bay_train.npz") self.mean = self.x[..., 0].mean() self.std = self.x[..., 0].std() class PEMS_BAYTestDataset(SnapShotDataset): def __init__(self): super(PEMS_BAYTestDataset, self).__init__("data", "pems_bay_test.npz") class PEMS_BAYValidDataset(SnapShotDataset): def __init__(self): super(PEMS_BAYValidDataset, self).__init__("data", "pems_bay_valid.npz") ================================================ FILE: examples/pytorch/dtgrnn/dcrnn.py ================================================ import dgl import dgl.function as fn import numpy as np import scipy.sparse as sparse import torch import torch.nn as nn from dgl.base import DGLError class DiffConv(nn.Module): """DiffConv is the implementation of diffusion convolution from paper DCRNN It will compute multiple diffusion matrix and perform multiple diffusion conv on it, this layer can be used for traffic prediction, pedamic model. Parameter ========== in_feats : int number of input feature out_feats : int number of output feature k : int number of diffusion steps dir : str [both/in/out] direction of diffusion convolution From paper default both direction """ def __init__( self, in_feats, out_feats, k, in_graph_list, out_graph_list, dir="both" ): super(DiffConv, self).__init__() self.in_feats = in_feats self.out_feats = out_feats self.k = k self.dir = dir self.num_graphs = self.k - 1 if self.dir == "both" else 2 * self.k - 2 self.project_fcs = nn.ModuleList() for i in range(self.num_graphs): self.project_fcs.append( nn.Linear(self.in_feats, self.out_feats, bias=False) ) self.merger = nn.Parameter(torch.randn(self.num_graphs + 1)) self.in_graph_list = in_graph_list self.out_graph_list = out_graph_list @staticmethod def attach_graph(g, k): device = g.device out_graph_list = [] in_graph_list = [] wadj, ind, outd = DiffConv.get_weight_matrix(g) adj = sparse.coo_matrix(wadj / outd.cpu().numpy()) outg = dgl.from_scipy(adj, eweight_name="weight").to(device) outg.edata["weight"] = outg.edata["weight"].float().to(device) out_graph_list.append(outg) for i in range(k - 1): out_graph_list.append( DiffConv.diffuse(out_graph_list[-1], wadj, outd) ) adj = sparse.coo_matrix(wadj.T / ind.cpu().numpy()) ing = dgl.from_scipy(adj, eweight_name="weight").to(device) ing.edata["weight"] = ing.edata["weight"].float().to(device) in_graph_list.append(ing) for i in range(k - 1): in_graph_list.append( DiffConv.diffuse(in_graph_list[-1], wadj.T, ind) ) return out_graph_list, in_graph_list @staticmethod def get_weight_matrix(g): adj = g.adj_external(scipy_fmt="coo") ind = g.in_degrees() outd = g.out_degrees() weight = g.edata["weight"] adj.data = weight.cpu().numpy() return adj, ind, outd @staticmethod def diffuse(progress_g, weighted_adj, degree): device = progress_g.device progress_adj = progress_g.adj_external(scipy_fmt="coo") progress_adj.data = progress_g.edata["weight"].cpu().numpy() ret_adj = sparse.coo_matrix( progress_adj @ (weighted_adj / degree.cpu().numpy()) ) ret_graph = dgl.from_scipy(ret_adj, eweight_name="weight").to(device) ret_graph.edata["weight"] = ret_graph.edata["weight"].float().to(device) return ret_graph def forward(self, g, x): feat_list = [] if self.dir == "both": graph_list = self.in_graph_list + self.out_graph_list elif self.dir == "in": graph_list = self.in_graph_list elif self.dir == "out": graph_list = self.out_graph_list for i in range(self.num_graphs): g = graph_list[i] with g.local_scope(): g.ndata["n"] = self.project_fcs[i](x) g.update_all( fn.u_mul_e("n", "weight", "e"), fn.sum("e", "feat") ) feat_list.append(g.ndata["feat"]) # Each feat has shape [N,q_feats] feat_list.append(self.project_fcs[-1](x)) feat_list = torch.cat(feat_list).view( len(feat_list), -1, self.out_feats ) ret = ( (self.merger * feat_list.permute(1, 2, 0)).permute(2, 0, 1).mean(0) ) return ret ================================================ FILE: examples/pytorch/dtgrnn/gaan.py ================================================ import dgl import dgl.function as fn import dgl.nn as dglnn import numpy as np import torch import torch.nn as nn from dgl.base import DGLError from dgl.nn.functional import edge_softmax class WeightedGATConv(dglnn.GATConv): """ This model inherit from dgl GATConv for traffic prediction task, it add edge weight when aggregating the node feature. """ def forward(self, graph, feat, get_attention=False): with graph.local_scope(): if not self._allow_zero_in_degree: if (graph.in_degrees() == 0).any(): raise DGLError( "There are 0-in-degree nodes in the graph, " "output for those nodes will be invalid. " "This is harmful for some applications, " "causing silent performance regression. " "Adding self-loop on the input graph by " "calling `g = dgl.add_self_loop(g)` will resolve " "the issue. Setting ``allow_zero_in_degree`` " "to be `True` when constructing this module will " "suppress the check and let the code run." ) if isinstance(feat, tuple): h_src = self.feat_drop(feat[0]) h_dst = self.feat_drop(feat[1]) if not hasattr(self, "fc_src"): feat_src = self.fc(h_src).view( -1, self._num_heads, self._out_feats ) feat_dst = self.fc(h_dst).view( -1, self._num_heads, self._out_feats ) else: feat_src = self.fc_src(h_src).view( -1, self._num_heads, self._out_feats ) feat_dst = self.fc_dst(h_dst).view( -1, self._num_heads, self._out_feats ) else: h_src = h_dst = self.feat_drop(feat) feat_src = feat_dst = self.fc(h_src).view( -1, self._num_heads, self._out_feats ) if graph.is_block: feat_dst = feat_src[: graph.number_of_dst_nodes()] # NOTE: GAT paper uses "first concatenation then linear projection" # to compute attention scores, while ours is "first projection then # addition", the two approaches are mathematically equivalent: # We decompose the weight vector a mentioned in the paper into # [a_l || a_r], then # a^T [Wh_i || Wh_j] = a_l Wh_i + a_r Wh_j # Our implementation is much efficient because we do not need to # save [Wh_i || Wh_j] on edges, which is not memory-efficient. Plus, # addition could be optimized with DGL's built-in function u_add_v, # which further speeds up computation and saves memory footprint. el = (feat_src * self.attn_l).sum(dim=-1).unsqueeze(-1) er = (feat_dst * self.attn_r).sum(dim=-1).unsqueeze(-1) graph.srcdata.update({"ft": feat_src, "el": el}) graph.dstdata.update({"er": er}) # compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively. graph.apply_edges(fn.u_add_v("el", "er", "e")) e = self.leaky_relu(graph.edata.pop("e")) # compute softmax graph.edata["a"] = self.attn_drop(edge_softmax(graph, e)) # compute weighted attention graph.edata["a"] = ( graph.edata["a"].permute(1, 2, 0) * graph.edata["weight"] ).permute(2, 0, 1) # message passing graph.update_all(fn.u_mul_e("ft", "a", "m"), fn.sum("m", "ft")) rst = graph.dstdata["ft"] # residual if self.res_fc is not None: resval = self.res_fc(h_dst).view( h_dst.shape[0], -1, self._out_feats ) rst = rst + resval # activation if self.activation: rst = self.activation(rst) if get_attention: return rst, graph.edata["a"] else: return rst class GatedGAT(nn.Module): """Gated Graph Attention module, it is a general purpose graph attention module proposed in paper GaAN. The paper use it for traffic prediction task Parameter ========== in_feats : int number of input feature out_feats : int number of output feature map_feats : int intermediate feature size for gate computation num_heads : int number of head for multihead attention """ def __init__(self, in_feats, out_feats, map_feats, num_heads): super(GatedGAT, self).__init__() self.in_feats = in_feats self.out_feats = out_feats self.map_feats = map_feats self.num_heads = num_heads self.gatlayer = WeightedGATConv( self.in_feats, self.out_feats, self.num_heads ) self.gate_fn = nn.Linear( 2 * self.in_feats + self.map_feats, self.num_heads ) self.gate_m = nn.Linear(self.in_feats, self.map_feats) self.merger_layer = nn.Linear( self.in_feats + self.out_feats, self.out_feats ) def forward(self, g, x): with g.local_scope(): g.ndata["x"] = x g.ndata["z"] = self.gate_m(x) g.update_all(fn.copy_u("x", "x"), fn.mean("x", "mean_z")) g.update_all(fn.copy_u("z", "z"), fn.max("z", "max_z")) nft = torch.cat( [g.ndata["x"], g.ndata["max_z"], g.ndata["mean_z"]], dim=1 ) gate = self.gate_fn(nft).sigmoid() attn_out = self.gatlayer(g, x) node_num = g.num_nodes() gated_out = ( (gate.view(-1) * attn_out.view(-1, self.out_feats).T).T ).view(node_num, self.num_heads, self.out_feats) gated_out = gated_out.mean(1) merge = self.merger_layer(torch.cat([x, gated_out], dim=1)) return merge ================================================ FILE: examples/pytorch/dtgrnn/model.py ================================================ import dgl import dgl.function as fn import dgl.nn as dglnn import numpy as np import scipy.sparse as sparse import torch import torch.nn as nn from dgl.base import DGLError from dgl.nn.functional import edge_softmax class GraphGRUCell(nn.Module): """Graph GRU unit which can use any message passing net to replace the linear layer in the original GRU Parameter ========== in_feats : int number of input features out_feats : int number of output features net : torch.nn.Module message passing network """ def __init__(self, in_feats, out_feats, net): super(GraphGRUCell, self).__init__() self.in_feats = in_feats self.out_feats = out_feats self.dir = dir # net can be any GNN model self.r_net = net(in_feats + out_feats, out_feats) self.u_net = net(in_feats + out_feats, out_feats) self.c_net = net(in_feats + out_feats, out_feats) # Manually add bias Bias self.r_bias = nn.Parameter(torch.rand(out_feats)) self.u_bias = nn.Parameter(torch.rand(out_feats)) self.c_bias = nn.Parameter(torch.rand(out_feats)) def forward(self, g, x, h): r = torch.sigmoid(self.r_net(g, torch.cat([x, h], dim=1)) + self.r_bias) u = torch.sigmoid(self.u_net(g, torch.cat([x, h], dim=1)) + self.u_bias) h_ = r * h c = torch.sigmoid( self.c_net(g, torch.cat([x, h_], dim=1)) + self.c_bias ) new_h = u * h + (1 - u) * c return new_h class StackedEncoder(nn.Module): """One step encoder unit for hidden representation generation it can stack multiple vertical layers to increase the depth. Parameter ========== in_feats : int number if input features out_feats : int number of output features num_layers : int vertical depth of one step encoding unit net : torch.nn.Module message passing network for graph computation """ def __init__(self, in_feats, out_feats, num_layers, net): super(StackedEncoder, self).__init__() self.in_feats = in_feats self.out_feats = out_feats self.num_layers = num_layers self.net = net self.layers = nn.ModuleList() if self.num_layers <= 0: raise DGLError("Layer Number must be greater than 0! ") self.layers.append( GraphGRUCell(self.in_feats, self.out_feats, self.net) ) for _ in range(self.num_layers - 1): self.layers.append( GraphGRUCell(self.out_feats, self.out_feats, self.net) ) # hidden_states should be a list which for different layer def forward(self, g, x, hidden_states): hiddens = [] for i, layer in enumerate(self.layers): x = layer(g, x, hidden_states[i]) hiddens.append(x) return x, hiddens class StackedDecoder(nn.Module): """One step decoder unit for hidden representation generation it can stack multiple vertical layers to increase the depth. Parameter ========== in_feats : int number if input features hid_feats : int number of feature before the linear output layer out_feats : int number of output features num_layers : int vertical depth of one step encoding unit net : torch.nn.Module message passing network for graph computation """ def __init__(self, in_feats, hid_feats, out_feats, num_layers, net): super(StackedDecoder, self).__init__() self.in_feats = in_feats self.hid_feats = hid_feats self.out_feats = out_feats self.num_layers = num_layers self.net = net self.out_layer = nn.Linear(self.hid_feats, self.out_feats) self.layers = nn.ModuleList() if self.num_layers <= 0: raise DGLError("Layer Number must be greater than 0!") self.layers.append(GraphGRUCell(self.in_feats, self.hid_feats, net)) for _ in range(self.num_layers - 1): self.layers.append( GraphGRUCell(self.hid_feats, self.hid_feats, net) ) def forward(self, g, x, hidden_states): hiddens = [] for i, layer in enumerate(self.layers): x = layer(g, x, hidden_states[i]) hiddens.append(x) x = self.out_layer(x) return x, hiddens class GraphRNN(nn.Module): """Graph Sequence to sequence prediction framework Support multiple backbone GNN. Mainly used for traffic prediction. Parameter ========== in_feats : int number of input features out_feats : int number of prediction output features seq_len : int input and predicted sequence length num_layers : int vertical number of layers in encoder and decoder unit net : torch.nn.Module Message passing GNN as backbone decay_steps : int number of steps for the teacher forcing probability to decay """ def __init__( self, in_feats, out_feats, seq_len, num_layers, net, decay_steps ): super(GraphRNN, self).__init__() self.in_feats = in_feats self.out_feats = out_feats self.seq_len = seq_len self.num_layers = num_layers self.net = net self.decay_steps = decay_steps self.encoder = StackedEncoder( self.in_feats, self.out_feats, self.num_layers, self.net ) self.decoder = StackedDecoder( self.in_feats, self.out_feats, self.in_feats, self.num_layers, self.net, ) # Threshold For Teacher Forcing def compute_thresh(self, batch_cnt): return self.decay_steps / ( self.decay_steps + np.exp(batch_cnt / self.decay_steps) ) def encode(self, g, inputs, device): hidden_states = [ torch.zeros(g.num_nodes(), self.out_feats).to(device) for _ in range(self.num_layers) ] for i in range(self.seq_len): _, hidden_states = self.encoder(g, inputs[i], hidden_states) return hidden_states def decode(self, g, teacher_states, hidden_states, batch_cnt, device): outputs = [] inputs = torch.zeros(g.num_nodes(), self.in_feats).to(device) for i in range(self.seq_len): if ( np.random.random() < self.compute_thresh(batch_cnt) and self.training ): inputs, hidden_states = self.decoder( g, teacher_states[i], hidden_states ) else: inputs, hidden_states = self.decoder(g, inputs, hidden_states) outputs.append(inputs) outputs = torch.stack(outputs) return outputs def forward(self, g, inputs, teacher_states, batch_cnt, device): hidden = self.encode(g, inputs, device) outputs = self.decode(g, teacher_states, hidden, batch_cnt, device) return outputs ================================================ FILE: examples/pytorch/dtgrnn/train.py ================================================ import argparse from functools import partial import dgl import numpy as np import torch import torch.nn as nn from dataloading import ( METR_LAGraphDataset, METR_LATestDataset, METR_LATrainDataset, METR_LAValidDataset, PEMS_BAYGraphDataset, PEMS_BAYTestDataset, PEMS_BAYTrainDataset, PEMS_BAYValidDataset, ) from dcrnn import DiffConv from gaan import GatedGAT from model import GraphRNN from torch.utils.data import DataLoader from utils import get_learning_rate, masked_mae_loss, NormalizationLayer batch_cnt = [0] def train( model, graph, dataloader, optimizer, scheduler, normalizer, loss_fn, device, args, ): total_loss = [] graph = graph.to(device) model.train() batch_size = args.batch_size for i, (x, y) in enumerate(dataloader): optimizer.zero_grad() # Padding: Since the diffusion graph is precmputed we need to pad the batch so that # each batch have same batch size if x.shape[0] != batch_size: x_buff = torch.zeros(batch_size, x.shape[1], x.shape[2], x.shape[3]) y_buff = torch.zeros(batch_size, x.shape[1], x.shape[2], x.shape[3]) x_buff[: x.shape[0], :, :, :] = x x_buff[x.shape[0] :, :, :, :] = x[-1].repeat( batch_size - x.shape[0], 1, 1, 1 ) y_buff[: x.shape[0], :, :, :] = y y_buff[x.shape[0] :, :, :, :] = y[-1].repeat( batch_size - x.shape[0], 1, 1, 1 ) x = x_buff y = y_buff # Permute the dimension for shaping x = x.permute(1, 0, 2, 3) y = y.permute(1, 0, 2, 3) x_norm = ( normalizer.normalize(x) .reshape(x.shape[0], -1, x.shape[3]) .float() .to(device) ) y_norm = ( normalizer.normalize(y) .reshape(x.shape[0], -1, x.shape[3]) .float() .to(device) ) y = y.reshape(y.shape[0], -1, y.shape[3]).float().to(device) batch_graph = dgl.batch([graph] * batch_size) output = model(batch_graph, x_norm, y_norm, batch_cnt[0], device) # Denormalization for loss compute y_pred = normalizer.denormalize(output) loss = loss_fn(y_pred, y) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() if get_learning_rate(optimizer) > args.minimum_lr: scheduler.step() total_loss.append(float(loss)) batch_cnt[0] += 1 print("\rBatch: ", i, end="") return np.mean(total_loss) def eval(model, graph, dataloader, normalizer, loss_fn, device, args): total_loss = [] graph = graph.to(device) model.eval() batch_size = args.batch_size for i, (x, y) in enumerate(dataloader): # Padding: Since the diffusion graph is precmputed we need to pad the batch so that # each batch have same batch size if x.shape[0] != batch_size: x_buff = torch.zeros(batch_size, x.shape[1], x.shape[2], x.shape[3]) y_buff = torch.zeros(batch_size, x.shape[1], x.shape[2], x.shape[3]) x_buff[: x.shape[0], :, :, :] = x x_buff[x.shape[0] :, :, :, :] = x[-1].repeat( batch_size - x.shape[0], 1, 1, 1 ) y_buff[: x.shape[0], :, :, :] = y y_buff[x.shape[0] :, :, :, :] = y[-1].repeat( batch_size - x.shape[0], 1, 1, 1 ) x = x_buff y = y_buff # Permute the order of dimension x = x.permute(1, 0, 2, 3) y = y.permute(1, 0, 2, 3) x_norm = ( normalizer.normalize(x) .reshape(x.shape[0], -1, x.shape[3]) .float() .to(device) ) y_norm = ( normalizer.normalize(y) .reshape(x.shape[0], -1, x.shape[3]) .float() .to(device) ) y = y.reshape(x.shape[0], -1, x.shape[3]).to(device) batch_graph = dgl.batch([graph] * batch_size) output = model(batch_graph, x_norm, y_norm, i, device) y_pred = normalizer.denormalize(output) loss = loss_fn(y_pred, y) total_loss.append(float(loss)) return np.mean(total_loss) if __name__ == "__main__": parser = argparse.ArgumentParser() # Define the arguments parser.add_argument( "--batch_size", type=int, default=64, help="Size of batch for minibatch Training", ) parser.add_argument( "--num_workers", type=int, default=0, help="Number of workers for parallel dataloading", ) parser.add_argument( "--model", type=str, default="dcrnn", help="WHich model to use DCRNN vs GaAN", ) parser.add_argument( "--gpu", type=int, default=-1, help="GPU indexm -1 for CPU training" ) parser.add_argument( "--diffsteps", type=int, default=2, help="Step of constructing the diffusiob matrix", ) parser.add_argument( "--num_heads", type=int, default=2, help="Number of multiattention head" ) parser.add_argument( "--decay_steps", type=int, default=2000, help="Teacher forcing probability decay ratio", ) parser.add_argument( "--lr", type=float, default=0.01, help="Initial learning rate" ) parser.add_argument( "--minimum_lr", type=float, default=2e-6, help="Lower bound of learning rate", ) parser.add_argument( "--dataset", type=str, default="LA", help="dataset LA for METR_LA; BAY for PEMS_BAY", ) parser.add_argument( "--epochs", type=int, default=100, help="Number of epoches for training" ) parser.add_argument( "--max_grad_norm", type=float, default=5.0, help="Maximum gradient norm for update parameters", ) args = parser.parse_args() # Load the datasets if args.dataset == "LA": g = METR_LAGraphDataset() train_data = METR_LATrainDataset() test_data = METR_LATestDataset() valid_data = METR_LAValidDataset() elif args.dataset == "BAY": g = PEMS_BAYGraphDataset() train_data = PEMS_BAYTrainDataset() test_data = PEMS_BAYTestDataset() valid_data = PEMS_BAYValidDataset() if args.gpu == -1: device = torch.device("cpu") else: device = torch.device("cuda:{}".format(args.gpu)) train_loader = DataLoader( train_data, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=True, ) valid_loader = DataLoader( valid_data, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=True, ) test_loader = DataLoader( test_data, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=True, ) normalizer = NormalizationLayer(train_data.mean, train_data.std) if args.model == "dcrnn": batch_g = dgl.batch([g] * args.batch_size).to(device) out_gs, in_gs = DiffConv.attach_graph(batch_g, args.diffsteps) net = partial( DiffConv, k=args.diffsteps, in_graph_list=in_gs, out_graph_list=out_gs, ) elif args.model == "gaan": net = partial(GatedGAT, map_feats=64, num_heads=args.num_heads) dcrnn = GraphRNN( in_feats=2, out_feats=64, seq_len=12, num_layers=2, net=net, decay_steps=args.decay_steps, ).to(device) optimizer = torch.optim.Adam(dcrnn.parameters(), lr=args.lr) scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99) loss_fn = masked_mae_loss for e in range(args.epochs): train_loss = train( dcrnn, g, train_loader, optimizer, scheduler, normalizer, loss_fn, device, args, ) valid_loss = eval( dcrnn, g, valid_loader, normalizer, loss_fn, device, args ) test_loss = eval( dcrnn, g, test_loader, normalizer, loss_fn, device, args ) print( "\rEpoch: {} Train Loss: {} Valid Loss: {} Test Loss: {}".format( e, train_loss, valid_loss, test_loss ) ) ================================================ FILE: examples/pytorch/dtgrnn/utils.py ================================================ import dgl import numpy as np import scipy.sparse as sparse import torch import torch.nn as nn class NormalizationLayer(nn.Module): def __init__(self, mean, std): self.mean = mean self.std = std # Here we shall expect mean and std be scaler def normalize(self, x): return (x - self.mean) / self.std def denormalize(self, x): return x * self.std + self.mean def masked_mae_loss(y_pred, y_true): mask = (y_true != 0).float() mask /= mask.mean() loss = torch.abs(y_pred - y_true) loss = loss * mask # trick for nans: https://discuss.pytorch.org/t/how-to-set-nan-in-tensor-to-0/3918/3 loss[loss != loss] = 0 return loss.mean() def get_learning_rate(optimizer): for param in optimizer.param_groups: return param["lr"] ================================================ FILE: examples/pytorch/eeg-gcnn/EEGGraphDataset.py ================================================ import math from itertools import product import dgl import numpy as np import pandas as pd import torch from dgl.data import DGLDataset class EEGGraphDataset(DGLDataset): """Build graph, treat all nodes as the same type Parameters ---------- x: edge weights of 8-node complete graph There are 1 x 64 edges y: labels (diseased/healthy) num_nodes: the number of nodes of the graph. In our case, it is 8. indices: Patient level indices. They are used to generate edge weights. Output ------ a complete 8-node DGLGraph with node features and edge weights """ def __init__(self, x, y, num_nodes, indices): # CAUTION - x and labels are memory-mapped, used as if they are in RAM. self.x = x self.labels = y self.indices = indices self.num_nodes = num_nodes # NOTE: this order decides the node index, keep consistent! self.ch_names = [ "F7-F3", "F8-F4", "T7-C3", "T8-C4", "P7-P3", "P8-P4", "O1-P3", "O2-P4", ] # in the 10-10 system, in between the 2 10-20 electrodes in ch_names, used for calculating edge weights # Note: "01" is for "P03", and "02" is for "P04." self.ref_names = ["F5", "F6", "C5", "C6", "P5", "P6", "O1", "O2"] # edge indices source to target - 2 x E = 2 x 64 # fully connected undirected graph so 8*8=64 edges self.node_ids = range(len(self.ch_names)) self.edge_index = ( torch.tensor( [[a, b] for a, b in product(self.node_ids, self.node_ids)], dtype=torch.long, ) .t() .contiguous() ) # edge attributes - E x 1 # only the spatial distance between electrodes for now - standardize between 0 and 1 self.distances = self.get_sensor_distances() a = np.array(self.distances) self.distances = (a - np.min(a)) / (np.max(a) - np.min(a)) self.spec_coh_values = np.load("spec_coh_values.npy", allow_pickle=True) # sensor distances don't depend on window ID def get_sensor_distances(self): coords_1010 = pd.read_csv("standard_1010.tsv.txt", sep="\t") num_edges = self.edge_index.shape[1] distances = [] for edge_idx in range(num_edges): sensor1_idx = self.edge_index[0, edge_idx] sensor2_idx = self.edge_index[1, edge_idx] dist = self.get_geodesic_distance( sensor1_idx, sensor2_idx, coords_1010 ) distances.append(dist) assert len(distances) == num_edges return distances def get_geodesic_distance( self, montage_sensor1_idx, montage_sensor2_idx, coords_1010 ): def get_coord(ref_sensor, coord): return float( (coords_1010[coords_1010.label == ref_sensor][coord]).iloc[0] ) # get the reference sensor in the 10-10 system for the current montage pair in 10-20 system ref_sensor1 = self.ref_names[montage_sensor1_idx] ref_sensor2 = self.ref_names[montage_sensor2_idx] x1 = get_coord(ref_sensor1, "x") y1 = get_coord(ref_sensor1, "y") z1 = get_coord(ref_sensor1, "z") x2 = get_coord(ref_sensor2, "x") y2 = get_coord(ref_sensor2, "y") z2 = get_coord(ref_sensor2, "z") # https://math.stackexchange.com/questions/1304169/distance-between-two-points-on-a-sphere r = 1 # since coords are on unit sphere # rounding is for numerical stability, domain is [-1, 1] dist = r * math.acos( round(((x1 * x2) + (y1 * y2) + (z1 * z2)) / (r**2), 2) ) return dist # returns size of dataset = number of indices def __len__(self): return len(self.indices) # retrieve one sample from the dataset after applying all transforms def __getitem__(self, idx): if torch.is_tensor(idx): idx = idx.tolist() # map input idx (ranging from 0 to __len__() inside self.indices) # to an idx in the whole dataset (inside self.x) # assert idx < len(self.indices) idx = self.indices[idx] node_features = self.x[idx] node_features = torch.from_numpy(node_features.reshape(8, 6)) # spectral coherence between 2 montage channels! spec_coh_values = self.spec_coh_values[idx, :] # combine edge weights and spect coh values into one value/ one E x 1 tensor edge_weights = self.distances + spec_coh_values edge_weights = torch.tensor(edge_weights) # trucated to integer # create 8-node complete graph src = [ [0 for i in range(self.num_nodes)] for j in range(self.num_nodes) ] for i in range(len(src)): for j in range(len(src[i])): src[i][j] = i src = np.array(src).flatten() det = [ [i for i in range(self.num_nodes)] for j in range(self.num_nodes) ] det = np.array(det).flatten() u, v = (torch.tensor(src), torch.tensor(det)) g = dgl.graph((u, v)) # add node features and edge features g.ndata["x"] = node_features g.edata["edge_weights"] = edge_weights return g, torch.tensor(idx), torch.tensor(self.labels[idx]) ================================================ FILE: examples/pytorch/eeg-gcnn/README.md ================================================ # DGL Implementation of EEG-GCNN Paper This example is a simplified version that presents how to utilize the original EEG-GCNN model proposed in the paper [EEG-GCNN](http://proceedings.mlr.press/v136/wagh20a.html), implemented with DGL library. The example removes cross validation and optimal decision boundary that are used in the original code. The performance stats are slightly different from what is present in the paper. The original code is [here](https://github.com/neerajwagh/eeg-gcnn). ## All References - [ML4H Poster](https://drive.google.com/file/d/14nuAQKiIud3p6-c8r9WLV2tAvCyRwRev/view?usp=sharing) can be helpful for understanding data preprocessing, model, and performance of the project. - The recording of presentation by the author Neeraj Wagh can be found on [slideslive](https://slideslive.com/38941020/eeggcnn-augmenting-electroencephalogrambased-neurological-disease-diagnosis-using-a-domainguided-graph-convolutional-neural-network?ref=account-folder-62123-folders). - The slides used during the presentation can be found [here](https://drive.google.com/file/d/1dXT4QAUXKauf7CAkhrVyhR2PFUsNh4b8/view?usp=sharing). - Raw Data can be found with these two links: [MPI LEMON](http://fcon_1000.projects.nitrc.org/indi/retro/MPI_LEMON.html) (no registration needed), [TUH EEG Abnormal Corpus](https://www.isip.piconepress.com/projects/tuh_eeg/downloads/tuh_eeg_abnormal/) ([needs registration](https://www.isip.piconepress.com/projects/tuh_eeg/html/request_access.php)) ## Dependencies - Python 3.8.1 - PyTorch 1.7.0 - DGL 0.6.1 - numpy 1.20.2 - Sklearn 0.24.2 - pandas 1.2.4 ## Dataset - Final Models, Pre-computed Features, Training Metadata can be downloaded through [FigShare](https://figshare.com/articles/software/EEG-GCNN_Supporting_Resources_for_Reproducibility/13251452). - In ```EEGGraphDataset.py```, we specify the channels and electrodes and use precomputed spectral coherence values to compute the edge weights. To use this example in your own advantage, please specify your channels and electrodes in ```__init__``` function of ```EEGGraphDataset.py```. - To generate spectral coherence values, please refer to [spectral_connectivity](https://mne.tools/stable/generated/mne.connectivity.spectral_connectivity.html) function in mne library. An example usage may take the following form: ```python # ....loop over all windows in dataset.... # window data is 10-second preprocessed multi-channel timeseries (shape: n_channels x n_timepoints) containing all channels in ch_names window_data = np.expand_dims(window_data, axis=0) # ch_names are listed in EEGGraphDataset.py for ch_idx, ch in enumerate(ch_names): # number of channels is is len(ch_names), which is 8 in our case. spec_coh_values, _, _, _, _ = mne.connectivity.spectral_connectivity(data=window_data, method='coh', indices=([ch_idx]*8, range(8)), sfreq=SAMPLING_FREQ, fmin=1.0, fmax=40.0, faverage=True, verbose=False) ``` ## How to Run - First, download ```figshare_upload/master_metadata_index.csv```, ```figshare_upload/psd_features_data_X```, ```figshare_upload/labels_y```, ```figshare_upload/psd_shallow_eeg-gcnn/spec_coh_values.npy```, and ```figshare_upload/psd_shallow_eeg-gcnn/standard_1010.tsv.txt```. Put them in the repo.
- You may download these files by running: ```python wget https://ndownloader.figshare.com/files/25518170 ``` - You will need to unzip the downloaded file. - Then run: ```python python main.py ``` - The default model used is ```shallow_EEGGraphConvNet.py```. To use ```deep_EEGGraphConvNet.py```, run: ```python python main.py --model deep ``` - After the code executes, you will be able to see similar stats in performance section printed. The code will save the trained model from every epoch. ## Performance | DGL | AUC | Bal. Accuracy | |-------------------|-------------|---------------| | Shallow EEG-GCNN | 0.832 | 0.750 | | Deep EEG-GCNN | 0.830 | 0.736 | Shallow_EEGGraphConvNet | AUC | Bal.Accuracy | :-------------------------:|:-------------------------:|:---------------------:| ![shallow_loss](https://user-images.githubusercontent.com/53772888/128595442-d185bd74-5c5d-4118-a6b7-b89dd307d3aa.png) |![shallow_auc](https://user-images.githubusercontent.com/53772888/128595453-2f3b181a-bcb7-4da4-becd-7a7aa62083bc.png)|![shallow_bacc](https://user-images.githubusercontent.com/53772888/128595456-b293c888-bf8c-4f37-bd58-d01885da3832.png) Deep_EEGGraphConvNet | AUC | Bal.Accuracy | :-------------------------:|:-------------------------:|:---------------:| ![deep_loss](https://user-images.githubusercontent.com/53772888/128595458-e4a76591-11cf-405f-9c20-2d161e49c358.png)|![deep_auc](https://user-images.githubusercontent.com/53772888/128595462-7a7bfb67-4601-4e83-8764-d7c44bf979b5.png)|![deep_bacc](https://user-images.githubusercontent.com/53772888/128595467-1a0cd37d-0152-431b-a29b-a40bafb71be5.png) ### Contact - Email to John(_wei33@illinois.edu_) - You may also contact the authors: - Neeraj: nwagh2@illinois.edu / [Website](http://neerajwagh.com/) / [Twitter](https://twitter.com/neeraj_wagh) / [Google Scholar](https://scholar.google.com/citations?hl=en&user=lCy5VsUAAAAJ) - Yoga: varatha2@illinois.edu / [Website](https://sites.google.com/view/yoga-personal/home) / [Google Scholar](https://scholar.google.com/citations?user=XwL4dBgAAAAJ&hl=en) ### Citation Wagh, N. & Varatharajah, Y.. (2020). EEG-GCNN: Augmenting Electroencephalogram-based Neurological Disease Diagnosis using a Domain-guided Graph Convolutional Neural Network. Proceedings of the Machine Learning for Health NeurIPS Workshop, in PMLR 136:367-378 Available from http://proceedings.mlr.press/v136/wagh20a.html. ================================================ FILE: examples/pytorch/eeg-gcnn/deep_EEGGraphConvNet.py ================================================ import torch.nn as nn import torch.nn.functional as function from dgl.nn import GraphConv, SumPooling from torch.nn import BatchNorm1d class EEGGraphConvNet(nn.Module): """EEGGraph Convolution Net Parameters ---------- num_feats: the number of features per node. In our case, it is 6. """ def __init__(self, num_feats): super(EEGGraphConvNet, self).__init__() self.conv1 = GraphConv(num_feats, 16) self.conv2 = GraphConv(16, 32) self.conv3 = GraphConv(32, 64) self.conv4 = GraphConv(64, 50) self.conv4_bn = BatchNorm1d( 50, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True ) self.fc_block1 = nn.Linear(50, 30) self.fc_block2 = nn.Linear(30, 10) self.fc_block3 = nn.Linear(10, 2) # Xavier initializations self.fc_block1.apply(lambda x: nn.init.xavier_normal_(x.weight, gain=1)) self.fc_block2.apply(lambda x: nn.init.xavier_normal_(x.weight, gain=1)) self.fc_block3.apply(lambda x: nn.init.xavier_normal_(x.weight, gain=1)) self.sumpool = SumPooling() def forward(self, g, return_graph_embedding=False): x = g.ndata["x"] edge_weight = g.edata["edge_weights"] x = self.conv1(g, x, edge_weight=edge_weight) x = function.leaky_relu(x, negative_slope=0.01) x = function.dropout(x, p=0.2, training=self.training) x = self.conv2(g, x, edge_weight=edge_weight) x = function.leaky_relu(x, negative_slope=0.01) x = function.dropout(x, p=0.2, training=self.training) x = self.conv3(g, x, edge_weight=edge_weight) x = function.leaky_relu(x, negative_slope=0.01) x = function.dropout(x, p=0.2, training=self.training) x = self.conv4(g, x, edge_weight=edge_weight) x = self.conv4_bn(x) x = function.leaky_relu(x, negative_slope=0.01) x = function.dropout(x, p=0.2, training=self.training) # NOTE: this takes node-level features/"embeddings" # and aggregates to graph-level - use for graph-level classification out = self.sumpool(g, x) if return_graph_embedding: return out out = function.leaky_relu(self.fc_block1(out), negative_slope=0.1) out = function.dropout(out, p=0.2, training=self.training) out = function.leaky_relu(self.fc_block2(out), negative_slope=0.1) out = function.dropout(out, p=0.2, training=self.training) out = self.fc_block3(out) return out ================================================ FILE: examples/pytorch/eeg-gcnn/main.py ================================================ import argparse import numpy as np import pandas as pd import torch import torch.nn as nn from dgl.dataloading import GraphDataLoader from EEGGraphDataset import EEGGraphDataset from joblib import dump, load from sklearn import preprocessing from sklearn.metrics import balanced_accuracy_score, roc_auc_score from sklearn.model_selection import train_test_split from torch.utils.data import WeightedRandomSampler def _load_memory_mapped_array(file_name): # Due to a legacy problem related to memory alignment in joblib [1], the # data provided in the example may not be byte-aligned. This can be risky # when loading with mmap_mode. To fix the issue, load and re-dump the data. # [1] https://joblib.readthedocs.io/en/latest/developing.html#release-1-2-0 dump(load(file_name), file_name) return load(file_name, mmap_mode="r") if __name__ == "__main__": # argparse commandline args parser = argparse.ArgumentParser( description="Execute training pipeline on a given train/val subjects" ) parser.add_argument( "--num_feats", type=int, default=6, help="Number of features per node for the graph", ) parser.add_argument( "--num_nodes", type=int, default=8, help="Number of nodes in the graph" ) parser.add_argument( "--num_workers", type=int, default=4, help="Number of epochs used to train", ) parser.add_argument( "--gpu_idx", type=int, default=0, help="index of GPU device that should be used for this run, defaults to 0.", ) parser.add_argument( "--num_epochs", type=int, default=40, help="Number of epochs used to train", ) parser.add_argument( "--exp_name", type=str, default="default", help="Name for the test." ) parser.add_argument( "--batch_size", type=int, default=512, help="Batch Size. Default is 512.", ) parser.add_argument( "--model", type=str, default="shallow", help="type shallow to use shallow_EEGGraphDataset; " "type deep to use deep_EEGGraphDataset. Default is shallow", ) args = parser.parse_args() # choose model if args.model == "shallow": from shallow_EEGGraphConvNet import EEGGraphConvNet if args.model == "deep": from deep_EEGGraphConvNet import EEGGraphConvNet # set the random seed so that we can reproduce the results np.random.seed(42) torch.manual_seed(42) # use GPU when available _GPU_IDX = args.gpu_idx _DEVICE = torch.device( f"cuda:{_GPU_IDX}" if torch.cuda.is_available() else "cpu" ) torch.cuda.set_device(_DEVICE) print(f" Using device: {_DEVICE} {torch.cuda.get_device_name(_DEVICE)}") # load patient level indices _DATASET_INDEX = pd.read_csv("master_metadata_index.csv", low_memory=False) all_subjects = _DATASET_INDEX["patient_ID"].astype("str").unique() print(f"Subject list fetched! Total subjects are {len(all_subjects)}.") # retrieve inputs num_nodes = args.num_nodes _NUM_EPOCHS = args.num_epochs _EXPERIMENT_NAME = args.exp_name _BATCH_SIZE = args.batch_size num_feats = args.num_feats num_workers = args.num_workers # set up input and targets from files x = _load_memory_mapped_array(f"psd_features_data_X") y = _load_memory_mapped_array(f"labels_y") # normalize psd features data normd_x = [] for i in range(len(y)): arr = x[i, :] arr = arr.reshape(1, -1) arr2 = preprocessing.normalize(arr) arr2 = arr2.reshape(48) normd_x.append(arr2) norm = np.array(normd_x) x = norm.reshape(len(y), 48) # map 0/1 to diseased/healthy label_mapping, y = np.unique(y, return_inverse=True) print(f"Unique labels 0/1 mapping: {label_mapping}") # split the dataset to train and test. The ratio of test is 0.3. train_and_val_subjects, heldout_subjects = train_test_split( all_subjects, test_size=0.3, random_state=42 ) # split the dataset using patient indices train_window_indices = _DATASET_INDEX.index[ _DATASET_INDEX["patient_ID"].astype("str").isin(train_and_val_subjects) ].tolist() heldout_test_window_indices = _DATASET_INDEX.index[ _DATASET_INDEX["patient_ID"].astype("str").isin(heldout_subjects) ].tolist() # define model, optimizer, scheduler model = EEGGraphConvNet(num_feats) loss_function = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.01) scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=[i * 10 for i in range(1, 26)], gamma=0.1 ) model = model.to(_DEVICE).double() num_trainable_params = np.sum( [ np.prod(p.size()) if p.requires_grad else 0 for p in model.parameters() ] ) # Dataloader======================================================================================================== # use WeightedRandomSampler to balance the training dataset labels_unique, counts = np.unique(y, return_counts=True) class_weights = np.array([1.0 / x for x in counts]) # provide weights for samples in the training set only sample_weights = class_weights[y[train_window_indices]] # sampler needs to come up with training set size number of samples weighted_sampler = WeightedRandomSampler( weights=sample_weights, num_samples=len(train_window_indices), replacement=True, ) # train data loader train_dataset = EEGGraphDataset( x=x, y=y, num_nodes=num_nodes, indices=train_window_indices ) train_loader = GraphDataLoader( dataset=train_dataset, batch_size=_BATCH_SIZE, sampler=weighted_sampler, num_workers=num_workers, pin_memory=True, ) # this loader is used without weighted sampling, to evaluate metrics on full training set after each epoch train_metrics_loader = GraphDataLoader( dataset=train_dataset, batch_size=_BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True, ) # test data loader test_dataset = EEGGraphDataset( x=x, y=y, num_nodes=num_nodes, indices=heldout_test_window_indices ) test_loader = GraphDataLoader( dataset=test_dataset, batch_size=_BATCH_SIZE, shuffle=False, num_workers=num_workers, pin_memory=True, ) auroc_train_history = [] auroc_test_history = [] balACC_train_history = [] balACC_test_history = [] loss_train_history = [] loss_test_history = [] # training========================================================================================================= for epoch in range(_NUM_EPOCHS): model.train() train_loss = [] for batch_idx, batch in enumerate(train_loader): # send batch to GPU g, dataset_idx, y = batch g_batch = g.to(device=_DEVICE, non_blocking=True) y_batch = y.to(device=_DEVICE, non_blocking=True) optimizer.zero_grad() # forward pass outputs = model(g_batch) loss = loss_function(outputs, y_batch) train_loss.append(loss.item()) # backward pass loss.backward() optimizer.step() # update learning rate scheduler.step() # evaluate model after each epoch for train-metric data============================================================ model.eval() with torch.no_grad(): y_probs_train = torch.empty(0, 2).to(_DEVICE) y_true_train, y_pred_train = [], [] for i, batch in enumerate(train_metrics_loader): g, dataset_idx, y = batch g_batch = g.to(device=_DEVICE, non_blocking=True) y_batch = y.to(device=_DEVICE, non_blocking=True) # forward pass outputs = model(g_batch) _, predicted = torch.max(outputs.data, 1) y_pred_train += predicted.cpu().numpy().tolist() # concatenate along 0th dimension y_probs_train = torch.cat((y_probs_train, outputs.data), 0) y_true_train += y_batch.cpu().numpy().tolist() # returning prob distribution over target classes, take softmax over the 1st dimension y_probs_train = ( nn.functional.softmax(y_probs_train, dim=1).cpu().numpy() ) y_true_train = np.array(y_true_train) # evaluate model after each epoch for validation data ============================================================== y_probs_test = torch.empty(0, 2).to(_DEVICE) y_true_test, minibatch_loss, y_pred_test = [], [], [] for i, batch in enumerate(test_loader): g, dataset_idx, y = batch g_batch = g.to(device=_DEVICE, non_blocking=True) y_batch = y.to(device=_DEVICE, non_blocking=True) # forward pass outputs = model(g_batch) _, predicted = torch.max(outputs.data, 1) y_pred_test += predicted.cpu().numpy().tolist() loss = loss_function(outputs, y_batch) minibatch_loss.append(loss.item()) y_probs_test = torch.cat((y_probs_test, outputs.data), 0) y_true_test += y_batch.cpu().numpy().tolist() # returning prob distribution over target classes, take softmax over the 1st dimension y_probs_test = ( torch.nn.functional.softmax(y_probs_test, dim=1).cpu().numpy() ) y_true_test = np.array(y_true_test) # record training auroc and testing auroc auroc_train_history.append( roc_auc_score(y_true_train, y_probs_train[:, 1]) ) auroc_test_history.append( roc_auc_score(y_true_test, y_probs_test[:, 1]) ) # record training balanced accuracy and testing balanced accuracy balACC_train_history.append( balanced_accuracy_score(y_true_train, y_pred_train) ) balACC_test_history.append( balanced_accuracy_score(y_true_test, y_pred_test) ) # LOSS - epoch loss is defined as mean of minibatch losses within epoch loss_train_history.append(np.mean(train_loss)) loss_test_history.append(np.mean(minibatch_loss)) # print the metrics print( "Train loss: {}, test loss: {}".format( loss_train_history[-1], loss_test_history[-1] ) ) print( "Train AUC: {}, test AUC: {}".format( auroc_train_history[-1], auroc_test_history[-1] ) ) print( "Train Bal.ACC: {}, test Bal.ACC: {}".format( balACC_train_history[-1], balACC_test_history[-1] ) ) # save model from each epoch==================================================================================== state = { "epochs": _NUM_EPOCHS, "experiment_name": _EXPERIMENT_NAME, "model_description": str(model), "state_dict": model.state_dict(), "optimizer": optimizer.state_dict(), } torch.save(state, f"{_EXPERIMENT_NAME}_Epoch_{epoch}.ckpt") ================================================ FILE: examples/pytorch/eeg-gcnn/shallow_EEGGraphConvNet.py ================================================ import torch.nn as nn import torch.nn.functional as function from dgl.nn import GraphConv, SumPooling class EEGGraphConvNet(nn.Module): """EEGGraph Convolution Net Parameters ---------- num_feats: the number of features per node. In our case, it is 6. """ def __init__(self, num_feats): super(EEGGraphConvNet, self).__init__() self.conv1 = GraphConv(num_feats, 32) self.conv2 = GraphConv(32, 20) self.conv2_bn = nn.BatchNorm1d( 20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True ) self.fc_block1 = nn.Linear(20, 10) self.fc_block2 = nn.Linear(10, 2) # Xavier initializations self.fc_block1.apply(lambda x: nn.init.xavier_normal_(x.weight, gain=1)) self.fc_block2.apply(lambda x: nn.init.xavier_normal_(x.weight, gain=1)) def forward(self, g, return_graph_embedding=False): x = g.ndata["x"] edge_weight = g.edata["edge_weights"] x = function.leaky_relu(self.conv1(g, x, edge_weight=edge_weight)) x = function.leaky_relu( self.conv2_bn(self.conv2(g, x, edge_weight=edge_weight)) ) # NOTE: this takes node-level features/"embeddings" # and aggregates to graph-level - use for graph-level classification sumpool = SumPooling() out = sumpool(g, x) if return_graph_embedding: return out out = function.dropout(out, p=0.2, training=self.training) out = self.fc_block1(out) out = function.leaky_relu(out) out = self.fc_block2(out) return out ================================================ FILE: examples/pytorch/eges/.gitignore ================================================ __pycache__ ================================================ FILE: examples/pytorch/eges/README.md ================================================ # DGL & Pytorch implementation of Enhanced Graph Embedding with Side information (EGES) Paper link: https://arxiv.org/pdf/1803.02349.pdf Reference code repo: (https://github.com/wangzhegeek/EGES.git) ## How to run - Create a folder named `data`. `mkdir data` - Download csv data `wget https://raw.githubusercontent.com/Wang-Yu-Qing/dgl_data/master/eges_data/action_head.csv -P data/` `wget https://raw.githubusercontent.com/Wang-Yu-Qing/dgl_data/master/eges_data/jdata_product.csv -P data/` - Run with the following command (with default configuration) `python main.py` ## Result ``` Evaluate link prediction AUC: 0.7084 ``` ================================================ FILE: examples/pytorch/eges/main.py ================================================ import dgl import torch as th import torch.optim as optim import utils from model import EGES from sampler import Sampler from sklearn import metrics from torch.utils.data import DataLoader def train(args, train_g, sku_info, num_skus, num_brands, num_shops, num_cates): sampler = Sampler( train_g, args.walk_length, args.num_walks, args.window_size, args.num_negative, ) # for each node in the graph, we sample pos and neg # pairs for it, and feed these sampled pairs into the model. # (nodes in the graph are of course batched before sampling) dataloader = DataLoader( th.arange(train_g.num_nodes()), # this is the batch_size of input nodes batch_size=args.batch_size, shuffle=True, collate_fn=lambda x: sampler.sample(x, sku_info), ) model = EGES(args.dim, num_skus, num_brands, num_shops, num_cates) optimizer = optim.Adam(model.parameters(), lr=args.lr) for epoch in range(args.epochs): epoch_total_loss = 0 for step, (srcs, dsts, labels) in enumerate(dataloader): # the batch size of output pairs is unfixed # TODO: shuffle the triples? srcs_embeds, dsts_embeds = model(srcs, dsts) loss = model.loss(srcs_embeds, dsts_embeds, labels) optimizer.zero_grad() loss.backward() optimizer.step() epoch_total_loss += loss.item() if step % args.log_every == 0: print( "Epoch {:05d} | Step {:05d} | Step Loss {:.4f} | Epoch Avg Loss: {:.4f}".format( epoch, step, loss.item(), epoch_total_loss / (step + 1) ) ) eval(model, test_g, sku_info) return model def eval(model, test_graph, sku_info): preds, labels = [], [] for edge in test_graph: src = th.tensor(sku_info[edge.src.numpy()[0]]).view(1, 4) dst = th.tensor(sku_info[edge.dst.numpy()[0]]).view(1, 4) # (1, dim) src = model.query_node_embed(src) dst = model.query_node_embed(dst) # (1, dim) -> (1, dim) -> (1, ) logit = th.sigmoid(th.sum(src * dst)) preds.append(logit.detach().numpy().tolist()) labels.append(edge.label) fpr, tpr, thresholds = metrics.roc_curve(labels, preds, pos_label=1) print("Evaluate link prediction AUC: {:.4f}".format(metrics.auc(fpr, tpr))) if __name__ == "__main__": args = utils.init_args() valid_sku_raw_ids = utils.get_valid_sku_set(args.item_info_data) g, sku_encoder, sku_decoder = utils.construct_graph( args.action_data, args.session_interval_sec, valid_sku_raw_ids ) train_g, test_g = utils.split_train_test_graph(g) sku_info_encoder, sku_info_decoder, sku_info = utils.encode_sku_fields( args.item_info_data, sku_encoder, sku_decoder ) num_skus = len(sku_encoder) num_brands = len(sku_info_encoder["brand"]) num_shops = len(sku_info_encoder["shop"]) num_cates = len(sku_info_encoder["cate"]) print( "Num skus: {}, num brands: {}, num shops: {}, num cates: {}".format( num_skus, num_brands, num_shops, num_cates ) ) model = train( args, train_g, sku_info, num_skus, num_brands, num_shops, num_cates ) ================================================ FILE: examples/pytorch/eges/model.py ================================================ import torch as th class EGES(th.nn.Module): def __init__(self, dim, num_nodes, num_brands, num_shops, num_cates): super(EGES, self).__init__() self.dim = dim # embeddings for nodes base_embeds = th.nn.Embedding(num_nodes, dim) brand_embeds = th.nn.Embedding(num_brands, dim) shop_embeds = th.nn.Embedding(num_shops, dim) cate_embeds = th.nn.Embedding(num_cates, dim) self.embeds = [base_embeds, brand_embeds, shop_embeds, cate_embeds] # weights for each node's side information self.side_info_weights = th.nn.Embedding(num_nodes, 4) def forward(self, srcs, dsts): # srcs: sku_id, brand_id, shop_id, cate_id srcs = self.query_node_embed(srcs) dsts = self.query_node_embed(dsts) return srcs, dsts def query_node_embed(self, nodes): """ @nodes: tensor of shape (batch_size, num_side_info) """ batch_size = nodes.shape[0] # query side info weights, (batch_size, 4) side_info_weights = th.exp(self.side_info_weights(nodes[:, 0])) # merge all embeddings side_info_weighted_embeds_sum = [] side_info_weights_sum = [] for i in range(4): # weights for i-th side info, (batch_size, ) -> (batch_size, 1) i_th_side_info_weights = side_info_weights[:, i].view( (batch_size, 1) ) # batch of i-th side info embedding * its weight, (batch_size, dim) side_info_weighted_embeds_sum.append( i_th_side_info_weights * self.embeds[i](nodes[:, i]) ) side_info_weights_sum.append(i_th_side_info_weights) # stack: (batch_size, 4, dim), sum: (batch_size, dim) side_info_weighted_embeds_sum = th.sum( th.stack(side_info_weighted_embeds_sum, axis=1), axis=1 ) # stack: (batch_size, 4), sum: (batch_size, ) side_info_weights_sum = th.sum( th.stack(side_info_weights_sum, axis=1), axis=1 ) # (batch_size, dim) H = side_info_weighted_embeds_sum / side_info_weights_sum return H def loss(self, srcs, dsts, labels): dots = th.sigmoid(th.sum(srcs * dsts, axis=1)) dots = th.clamp(dots, min=1e-7, max=1 - 1e-7) return th.mean( -(labels * th.log(dots) + (1 - labels) * th.log(1 - dots)) ) ================================================ FILE: examples/pytorch/eges/sampler.py ================================================ import dgl import numpy as np import torch as th class Sampler: def __init__( self, graph, walk_length, num_walks, window_size, num_negative ): self.graph = graph self.walk_length = walk_length self.num_walks = num_walks self.window_size = window_size self.num_negative = num_negative self.node_weights = self.compute_node_sample_weight() def sample(self, batch, sku_info): """ Given a batch of target nodes, sample postive pairs and negative pairs from the graph """ batch = np.repeat(batch, self.num_walks) pos_pairs = self.generate_pos_pairs(batch) neg_pairs = self.generate_neg_pairs(pos_pairs) # get sku info with id srcs, dsts, labels = [], [], [] for pair in pos_pairs + neg_pairs: src, dst, label = pair src_info = sku_info[src] dst_info = sku_info[dst] srcs.append(src_info) dsts.append(dst_info) labels.append(label) return th.tensor(srcs), th.tensor(dsts), th.tensor(labels) def filter_padding(self, traces): for i in range(len(traces)): traces[i] = [x for x in traces[i] if x != -1] def generate_pos_pairs(self, nodes): """ For seq [1, 2, 3, 4] and node NO.2, the window_size=1 will generate: (1, 2) and (2, 3) """ # random walk traces, types = dgl.sampling.random_walk( g=self.graph, nodes=nodes, length=self.walk_length, prob="weight" ) traces = traces.tolist() self.filter_padding(traces) # skip-gram pairs = [] for trace in traces: for i in range(len(trace)): center = trace[i] left = max(0, i - self.window_size) right = min(len(trace), i + self.window_size + 1) pairs.extend([[center, x, 1] for x in trace[left:i]]) pairs.extend([[center, x, 1] for x in trace[i + 1 : right]]) return pairs def compute_node_sample_weight(self): """ Using node degree as sample weight """ return self.graph.in_degrees().float() def generate_neg_pairs(self, pos_pairs): """ Sample based on node freq in traces, frequently shown nodes will have larger chance to be sampled as negative node. """ # sample `self.num_negative` neg dst node # for each pos node pair's src node. negs = th.multinomial( self.node_weights, len(pos_pairs) * self.num_negative, replacement=True, ).tolist() tar = np.repeat([pair[0] for pair in pos_pairs], self.num_negative) assert len(tar) == len(negs) neg_pairs = [[x, y, 0] for x, y in zip(tar, negs)] return neg_pairs ================================================ FILE: examples/pytorch/eges/utils.py ================================================ import argparse import random from datetime import datetime import dgl import networkx as nx import numpy as np import torch as th def init_args(): # TODO: change args argparser = argparse.ArgumentParser() argparser.add_argument("--session_interval_sec", type=int, default=1800) argparser.add_argument( "--action_data", type=str, default="data/action_head.csv" ) argparser.add_argument( "--item_info_data", type=str, default="data/jdata_product.csv" ) argparser.add_argument("--walk_length", type=int, default=10) argparser.add_argument("--num_walks", type=int, default=5) argparser.add_argument("--batch_size", type=int, default=64) argparser.add_argument("--dim", type=int, default=16) argparser.add_argument("--epochs", type=int, default=30) argparser.add_argument("--window_size", type=int, default=2) argparser.add_argument("--num_negative", type=int, default=5) argparser.add_argument("--lr", type=float, default=0.001) argparser.add_argument("--log_every", type=int, default=100) return argparser.parse_args() def construct_graph(datapath, session_interval_gap_sec, valid_sku_raw_ids): user_clicks, sku_encoder, sku_decoder = parse_actions( datapath, valid_sku_raw_ids ) # {src,dst: weight} graph = {} for user_id, action_list in user_clicks.items(): # sort by action time _action_list = sorted(action_list, key=lambda x: x[1]) last_action_time = datetime.strptime( _action_list[0][1], "%Y-%m-%d %H:%M:%S" ) session = [_action_list[0][0]] # cut sessions and add to graph for sku_id, action_time in _action_list[1:]: action_time = datetime.strptime(action_time, "%Y-%m-%d %H:%M:%S") gap = action_time - last_action_time if gap.seconds < session_interval_gap_sec: session.append(sku_id) else: # here we have a new session # add prev session to graph add_session(session, graph) # create a new session session = [sku_id] # add last session add_session(session, graph) g = convert_to_dgl_graph(graph) return g, sku_encoder, sku_decoder def convert_to_dgl_graph(graph): # directed graph g = nx.DiGraph() for edge, weight in graph.items(): nodes = edge.split(",") src, dst = int(nodes[0]), int(nodes[1]) g.add_edge(src, dst, weight=float(weight)) return dgl.from_networkx(g, edge_attrs=["weight"]) def add_session(session, graph): """ For session like: [sku1, sku2, sku3] add 1 weight to each of the following edges: sku1 -> sku2 sku2 -> sku3 If sesson length < 2, no nodes/edges will be added """ for i in range(len(session) - 1): edge = str(session[i]) + "," + str(session[i + 1]) try: graph[edge] += 1 except KeyError: graph[edge] = 1 def parse_actions(datapath, valid_sku_raw_ids): user_clicks = {} with open(datapath, "r") as f: f.readline() # raw_id -> new_id and new_id -> raw_id sku_encoder, sku_decoder = {}, [] sku_id = -1 for line in f: line = line.replace("\n", "") fields = line.split(",") action_type = fields[-1] # actually, all types in the dataset is "1" if action_type == "1": user_id = fields[0] sku_raw_id = fields[1] if sku_raw_id in valid_sku_raw_ids: action_time = fields[2] # encode sku_id sku_id = encode_id( sku_encoder, sku_decoder, sku_raw_id, sku_id ) # add to user clicks try: user_clicks[user_id].append((sku_id, action_time)) except KeyError: user_clicks[user_id] = [(sku_id, action_time)] return user_clicks, sku_encoder, sku_decoder def encode_id(encoder, decoder, raw_id, encoded_id): if raw_id in encoder: return encoded_id else: encoded_id += 1 encoder[raw_id] = encoded_id decoder.append(raw_id) return encoded_id def get_valid_sku_set(datapath): sku_ids = set() with open(datapath, "r") as f: for line in f.readlines(): line.replace("\n", "") sku_raw_id = line.split(",")[0] sku_ids.add(sku_raw_id) return sku_ids def encode_sku_fields(datapath, sku_encoder, sku_decoder): # sku_id,brand,shop_id,cate,market_time sku_info_encoder = {"brand": {}, "shop": {}, "cate": {}} sku_info_decoder = {"brand": [], "shop": [], "cate": []} sku_info = {} brand_id, shop_id, cate_id = -1, -1, -1 with open(datapath, "r") as f: f.readline() for line in f: line = line.replace("\n", "") fields = line.split(",") sku_raw_id = fields[0] brand_raw_id = fields[1] shop_raw_id = fields[2] cate_raw_id = fields[3] if sku_raw_id in sku_encoder: sku_id = sku_encoder[sku_raw_id] brand_id = encode_id( sku_info_encoder["brand"], sku_info_decoder["brand"], brand_raw_id, brand_id, ) shop_id = encode_id( sku_info_encoder["shop"], sku_info_decoder["shop"], shop_raw_id, shop_id, ) cate_id = encode_id( sku_info_encoder["cate"], sku_info_decoder["cate"], cate_raw_id, cate_id, ) sku_info[sku_id] = [sku_id, brand_id, shop_id, cate_id] return sku_info_encoder, sku_info_decoder, sku_info class TestEdge: def __init__(self, src, dst, label): self.src = src self.dst = dst self.label = label def split_train_test_graph(graph): """ For test true edges, 1/3 of the edges are randomly chosen and removed as ground truth in the test set, the remaining graph is taken as the training set. """ test_edges = [] neg_sampler = dgl.dataloading.negative_sampler.Uniform(1) sampled_edge_ids = random.sample( range(graph.num_edges()), int(graph.num_edges() / 3) ) for edge_id in sampled_edge_ids: src, dst = graph.find_edges(edge_id) test_edges.append(TestEdge(src, dst, 1)) src, dst = neg_sampler(graph, th.tensor([edge_id])) test_edges.append(TestEdge(src, dst, 0)) graph.remove_edges(sampled_edge_ids) test_graph = test_edges return graph, test_graph ================================================ FILE: examples/pytorch/evolveGCN/README.md ================================================ # Implement EvolveGCN with DGL paper link: [EvolveGCN](https://arxiv.org/abs/1902.10191) official code: [IBM/EvolveGCN](https://github.com/IBM/EvolveGCN) another implement: [pyG_temporal](https://github.com/benedekrozemberczki/pytorch_geometric_temporal/blob/master/torch_geometric_temporal/nn/recurrent/evolvegcno.py) ## Dependency: * dgl * pandas * numpy ## Run * donwload Elliptic dataset from [kaggle](https://kaggle.com/ellipticco/elliptic-data-set) * unzip the dataset into a raw directory, such as /home/Elliptic/elliptic_bitcoin_dataset/ * make a new dir to save processed data, such as /home/Elliptic/processed/ * run train.py by: ```bash python train.py --raw-dir /home/Elliptic/elliptic_bitcoin_dataset/ --processed-dir /home/Elliptic/processed/ ``` ## Result Using EvolveGCN-O can match the results of Fig.3 and Fig.4 in the paper. (May need to run several times to get the average) ## Attention: * Currently only the Elliptic dataset is used. * EvolveGCN-H is not solid in Elliptic dataset, the official code is the same. Official code result when use EvolveGCN-H: 1. set seed to 1234, finally result is : > TEST epoch 189: TEST measures for class 1 - precision 0.3875 - recall 0.5714 - f1 0.4618 2. not set seed manually, run the same code three times: > TEST epoch 168: TEST measures for class 1 - precision 0.3189 - recall 0.0680 - f1 0.1121 > TEST epoch 270: TEST measures for class 1 - precision 0.3517 - recall 0.3018 - f1 0.3249 > TEST epoch 455: TEST measures for class 1 - precision 0.2271 - recall 0.2995 - f1 0.2583 ================================================ FILE: examples/pytorch/evolveGCN/dataset.py ================================================ import os import dgl import numpy import pandas import torch def process_raw_data(raw_dir, processed_dir): r""" Description ----------- Preprocess Elliptic dataset like the EvolveGCN official instruction: github.com/IBM/EvolveGCN/blob/master/elliptic_construction.md The main purpose is to convert original idx to contiguous idx start at 0. """ oid_nid_path = os.path.join(processed_dir, "oid_nid.npy") id_label_path = os.path.join(processed_dir, "id_label.npy") id_time_features_path = os.path.join(processed_dir, "id_time_features.npy") src_dst_time_path = os.path.join(processed_dir, "src_dst_time.npy") if ( os.path.exists(oid_nid_path) and os.path.exists(id_label_path) and os.path.exists(id_time_features_path) and os.path.exists(src_dst_time_path) ): print( "The preprocessed data already exists, skip the preprocess stage!" ) return print("starting process raw data in {}".format(raw_dir)) id_label = pandas.read_csv( os.path.join(raw_dir, "elliptic_txs_classes.csv") ) src_dst = pandas.read_csv( os.path.join(raw_dir, "elliptic_txs_edgelist.csv") ) # elliptic_txs_features.csv has no header, and it has the same order idx with elliptic_txs_classes.csv id_time_features = pandas.read_csv( os.path.join(raw_dir, "elliptic_txs_features.csv"), header=None ) # get oldId_newId oid_nid = id_label.loc[:, ["txId"]] oid_nid = oid_nid.rename(columns={"txId": "originalId"}) oid_nid.insert(1, "newId", range(len(oid_nid))) # map classes unknown,1,2 to -1,1,0 and construct id_label. type 1 means illicit. id_label = pandas.concat( [ oid_nid["newId"], id_label["class"].map({"unknown": -1.0, "1": 1.0, "2": 0.0}), ], axis=1, ) # replace originalId to newId. # Attention: the timestamp in features start at 1. id_time_features[0] = oid_nid["newId"] # construct originalId2newId dict oid_nid_dict = oid_nid.set_index(["originalId"])["newId"].to_dict() # construct newId2timestamp dict nid_time_dict = id_time_features.set_index([0])[1].to_dict() # Map id in edgelist to newId, and add a timestamp to each edge. # Attention: From the EvolveGCN official instruction, the timestamp with edgelist start at 0, rather than 1. # see: github.com/IBM/EvolveGCN/blob/master/elliptic_construction.md # Here we dose not follow the official instruction, which means timestamp with edgelist also start at 1. # In EvolveGCN example, the edge timestamp will not be used. # # Note: in the dataset, src and dst node has the same timestamp, so it's easy to set edge's timestamp. new_src = src_dst["txId1"].map(oid_nid_dict).rename("newSrc") new_dst = src_dst["txId2"].map(oid_nid_dict).rename("newDst") edge_time = new_src.map(nid_time_dict).rename("timestamp") src_dst_time = pandas.concat([new_src, new_dst, edge_time], axis=1) # save oid_nid, id_label, id_time_features, src_dst_time to disk. we can convert them to numpy. # oid_nid: type int. id_label: type int. id_time_features: type float. src_dst_time: type int. oid_nid = oid_nid.to_numpy(dtype=int) id_label = id_label.to_numpy(dtype=int) id_time_features = id_time_features.to_numpy(dtype=float) src_dst_time = src_dst_time.to_numpy(dtype=int) numpy.save(oid_nid_path, oid_nid) numpy.save(id_label_path, id_label) numpy.save(id_time_features_path, id_time_features) numpy.save(src_dst_time_path, src_dst_time) print( "Process Elliptic raw data done, data has saved into {}".format( processed_dir ) ) class EllipticDataset: def __init__( self, raw_dir, processed_dir, self_loop=True, reverse_edge=True ): self.raw_dir = raw_dir self.processd_dir = processed_dir self.self_loop = self_loop self.reverse_edge = reverse_edge def process(self): process_raw_data(self.raw_dir, self.processd_dir) id_time_features = torch.Tensor( numpy.load(os.path.join(self.processd_dir, "id_time_features.npy")) ) id_label = torch.IntTensor( numpy.load(os.path.join(self.processd_dir, "id_label.npy")) ) src_dst_time = torch.IntTensor( numpy.load(os.path.join(self.processd_dir, "src_dst_time.npy")) ) src = src_dst_time[:, 0] dst = src_dst_time[:, 1] # id_label[:, 0] is used to add self loop if self.self_loop: if self.reverse_edge: g = dgl.graph( data=( torch.cat((src, dst, id_label[:, 0])), torch.cat((dst, src, id_label[:, 0])), ), num_nodes=id_label.shape[0], ) g.edata["timestamp"] = torch.cat( ( src_dst_time[:, 2], src_dst_time[:, 2], id_time_features[:, 1].int(), ) ) else: g = dgl.graph( data=( torch.cat((src, id_label[:, 0])), torch.cat((dst, id_label[:, 0])), ), num_nodes=id_label.shape[0], ) g.edata["timestamp"] = torch.cat( (src_dst_time[:, 2], id_time_features[:, 1].int()) ) else: if self.reverse_edge: g = dgl.graph( data=(torch.cat((src, dst)), torch.cat((dst, src))), num_nodes=id_label.shape[0], ) g.edata["timestamp"] = torch.cat( (src_dst_time[:, 2], src_dst_time[:, 2]) ) else: g = dgl.graph(data=(src, dst), num_nodes=id_label.shape[0]) g.edata["timestamp"] = src_dst_time[:, 2] time_features = id_time_features[:, 1:] label = id_label[:, 1] g.ndata["label"] = label g.ndata["feat"] = time_features # used to construct time-based sub-graph. node_mask_by_time = [] start_time = int(torch.min(id_time_features[:, 1])) end_time = int(torch.max(id_time_features[:, 1])) for i in range(start_time, end_time + 1): node_mask = id_time_features[:, 1] == i node_mask_by_time.append(node_mask) return g, node_mask_by_time @property def num_classes(self): r"""Number of classes for each node.""" return 2 ================================================ FILE: examples/pytorch/evolveGCN/model.py ================================================ import torch import torch.nn as nn from dgl.nn.pytorch import GraphConv from torch.nn import init from torch.nn.parameter import Parameter class MatGRUCell(torch.nn.Module): """ GRU cell for matrix, similar to the official code. Please refer to section 3.4 of the paper for the formula. """ def __init__(self, in_feats, out_feats): super().__init__() self.update = MatGRUGate(in_feats, out_feats, torch.nn.Sigmoid()) self.reset = MatGRUGate(in_feats, out_feats, torch.nn.Sigmoid()) self.htilda = MatGRUGate(in_feats, out_feats, torch.nn.Tanh()) def forward(self, prev_Q, z_topk=None): if z_topk is None: z_topk = prev_Q update = self.update(z_topk, prev_Q) reset = self.reset(z_topk, prev_Q) h_cap = reset * prev_Q h_cap = self.htilda(z_topk, h_cap) new_Q = (1 - update) * prev_Q + update * h_cap return new_Q class MatGRUGate(torch.nn.Module): """ GRU gate for matrix, similar to the official code. Please refer to section 3.4 of the paper for the formula. """ def __init__(self, rows, cols, activation): super().__init__() self.activation = activation self.W = Parameter(torch.Tensor(rows, rows)) self.U = Parameter(torch.Tensor(rows, rows)) self.bias = Parameter(torch.Tensor(rows, cols)) self.reset_parameters() def reset_parameters(self): init.xavier_uniform_(self.W) init.xavier_uniform_(self.U) init.zeros_(self.bias) def forward(self, x, hidden): out = self.activation( self.W.matmul(x) + self.U.matmul(hidden) + self.bias ) return out class TopK(torch.nn.Module): """ Similar to the official `egcn_h.py`. We only consider the node in a timestamp based subgraph, so we need to pay attention to `K` should be less than the min node numbers in all subgraph. Please refer to section 3.4 of the paper for the formula. """ def __init__(self, feats, k): super().__init__() self.scorer = Parameter(torch.Tensor(feats, 1)) self.reset_parameters() self.k = k def reset_parameters(self): init.xavier_uniform_(self.scorer) def forward(self, node_embs): scores = node_embs.matmul(self.scorer) / self.scorer.norm().clamp( min=1e-6 ) vals, topk_indices = scores.view(-1).topk(self.k) out = node_embs[topk_indices] * torch.tanh( scores[topk_indices].view(-1, 1) ) # we need to transpose the output return out.t() class EvolveGCNH(nn.Module): def __init__( self, in_feats=166, n_hidden=76, num_layers=2, n_classes=2, classifier_hidden=510, ): # default parameters follow the official config super(EvolveGCNH, self).__init__() self.num_layers = num_layers self.pooling_layers = nn.ModuleList() self.recurrent_layers = nn.ModuleList() self.gnn_convs = nn.ModuleList() self.gcn_weights_list = nn.ParameterList() self.pooling_layers.append(TopK(in_feats, n_hidden)) # similar to EvolveGCNO self.recurrent_layers.append( MatGRUCell(in_feats=in_feats, out_feats=n_hidden) ) self.gcn_weights_list.append( Parameter(torch.Tensor(in_feats, n_hidden)) ) self.gnn_convs.append( GraphConv( in_feats=in_feats, out_feats=n_hidden, bias=False, activation=nn.RReLU(), weight=False, ) ) for _ in range(num_layers - 1): self.pooling_layers.append(TopK(n_hidden, n_hidden)) self.recurrent_layers.append( MatGRUCell(in_feats=n_hidden, out_feats=n_hidden) ) self.gcn_weights_list.append( Parameter(torch.Tensor(n_hidden, n_hidden)) ) self.gnn_convs.append( GraphConv( in_feats=n_hidden, out_feats=n_hidden, bias=False, activation=nn.RReLU(), weight=False, ) ) self.mlp = nn.Sequential( nn.Linear(n_hidden, classifier_hidden), nn.ReLU(), nn.Linear(classifier_hidden, n_classes), ) self.reset_parameters() def reset_parameters(self): for gcn_weight in self.gcn_weights_list: init.xavier_uniform_(gcn_weight) def forward(self, g_list): feature_list = [] for g in g_list: feature_list.append(g.ndata["feat"]) for i in range(self.num_layers): W = self.gcn_weights_list[i] for j, g in enumerate(g_list): X_tilde = self.pooling_layers[i](feature_list[j]) W = self.recurrent_layers[i](W, X_tilde) feature_list[j] = self.gnn_convs[i]( g, feature_list[j], weight=W ) return self.mlp(feature_list[-1]) class EvolveGCNO(nn.Module): def __init__( self, in_feats=166, n_hidden=256, num_layers=2, n_classes=2, classifier_hidden=307, ): # default parameters follow the official config super(EvolveGCNO, self).__init__() self.num_layers = num_layers self.recurrent_layers = nn.ModuleList() self.gnn_convs = nn.ModuleList() self.gcn_weights_list = nn.ParameterList() # In the paper, EvolveGCN-O use LSTM as RNN layer. According to the official code, # EvolveGCN-O use GRU as RNN layer. Here we follow the official code. # See: https://github.com/IBM/EvolveGCN/blob/90869062bbc98d56935e3d92e1d9b1b4c25be593/egcn_o.py#L53 # PS: I try to use torch.nn.LSTM directly, # like [pyg_temporal](github.com/benedekrozemberczki/pytorch_geometric_temporal/blob/master/torch_geometric_temporal/nn/recurrent/evolvegcno.py) # but the performance is worse than use torch.nn.GRU. # PPS: I think torch.nn.GRU can't match the manually implemented GRU cell in the official code, # we follow the official code here. self.recurrent_layers.append( MatGRUCell(in_feats=in_feats, out_feats=n_hidden) ) self.gcn_weights_list.append( Parameter(torch.Tensor(in_feats, n_hidden)) ) self.gnn_convs.append( GraphConv( in_feats=in_feats, out_feats=n_hidden, bias=False, activation=nn.RReLU(), weight=False, ) ) for _ in range(num_layers - 1): self.recurrent_layers.append( MatGRUCell(in_feats=n_hidden, out_feats=n_hidden) ) self.gcn_weights_list.append( Parameter(torch.Tensor(n_hidden, n_hidden)) ) self.gnn_convs.append( GraphConv( in_feats=n_hidden, out_feats=n_hidden, bias=False, activation=nn.RReLU(), weight=False, ) ) self.mlp = nn.Sequential( nn.Linear(n_hidden, classifier_hidden), nn.ReLU(), nn.Linear(classifier_hidden, n_classes), ) self.reset_parameters() def reset_parameters(self): for gcn_weight in self.gcn_weights_list: init.xavier_uniform_(gcn_weight) def forward(self, g_list): feature_list = [] for g in g_list: feature_list.append(g.ndata["feat"]) for i in range(self.num_layers): W = self.gcn_weights_list[i] for j, g in enumerate(g_list): # Attention: I try to use the below code to set gcn.weight(similar to pyG_temporal), # but it doesn't work. It seems that the gradient function lost in this situation, # more discussion see here: https://github.com/benedekrozemberczki/pytorch_geometric_temporal/issues/80 # ==================================================== # W = self.gnn_convs[i].weight[None, :, :] # W, _ = self.recurrent_layers[i](W) # self.gnn_convs[i].weight = nn.Parameter(W.squeeze()) # ==================================================== # Remove the following line of code, it will become `GCN`. W = self.recurrent_layers[i](W) feature_list[j] = self.gnn_convs[i]( g, feature_list[j], weight=W ) return self.mlp(feature_list[-1]) ================================================ FILE: examples/pytorch/evolveGCN/train.py ================================================ import argparse import time import dgl import torch import torch.nn.functional as F from dataset import EllipticDataset from model import EvolveGCNH, EvolveGCNO from utils import Measure def train(args, device): elliptic_dataset = EllipticDataset( raw_dir=args.raw_dir, processed_dir=args.processed_dir, self_loop=True, reverse_edge=True, ) g, node_mask_by_time = elliptic_dataset.process() num_classes = elliptic_dataset.num_classes cached_subgraph = [] cached_labeled_node_mask = [] for i in range(len(node_mask_by_time)): # we add self loop edge when we construct full graph, not here node_subgraph = dgl.node_subgraph(graph=g, nodes=node_mask_by_time[i]) cached_subgraph.append(node_subgraph.to(device)) valid_node_mask = node_subgraph.ndata["label"] >= 0 cached_labeled_node_mask.append(valid_node_mask) if args.model == "EvolveGCN-O": model = EvolveGCNO( in_feats=int(g.ndata["feat"].shape[1]), n_hidden=args.n_hidden, num_layers=args.n_layers, ) elif args.model == "EvolveGCN-H": model = EvolveGCNH( in_feats=int(g.ndata["feat"].shape[1]), num_layers=args.n_layers ) else: return NotImplementedError("Unsupported model {}".format(args.model)) model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) # split train, valid, test(0-30,31-35,36-48) # train/valid/test split follow the paper. train_max_index = 30 valid_max_index = 35 test_max_index = 48 time_window_size = args.n_hist_steps loss_class_weight = [float(w) for w in args.loss_class_weight.split(",")] loss_class_weight = torch.Tensor(loss_class_weight).to(device) train_measure = Measure( num_classes=num_classes, target_class=args.eval_class_id ) valid_measure = Measure( num_classes=num_classes, target_class=args.eval_class_id ) test_measure = Measure( num_classes=num_classes, target_class=args.eval_class_id ) test_res_f1 = 0 for epoch in range(args.num_epochs): model.train() for i in range(time_window_size, train_max_index + 1): g_list = cached_subgraph[i - time_window_size : i + 1] predictions = model(g_list) # get predictions which has label predictions = predictions[cached_labeled_node_mask[i]] labels = ( cached_subgraph[i] .ndata["label"][cached_labeled_node_mask[i]] .long() ) loss = F.cross_entropy( predictions, labels, weight=loss_class_weight ) optimizer.zero_grad() loss.backward() optimizer.step() train_measure.append_measures(predictions, labels) # get each epoch measures during training. cl_precision, cl_recall, cl_f1 = train_measure.get_total_measure() train_measure.update_best_f1(cl_f1, epoch) # reset measures for next epoch train_measure.reset_info() print( "Train Epoch {} | class {} | precision:{:.4f} | recall: {:.4f} | f1: {:.4f}".format( epoch, args.eval_class_id, cl_precision, cl_recall, cl_f1 ) ) # eval model.eval() for i in range(train_max_index + 1, valid_max_index + 1): g_list = cached_subgraph[i - time_window_size : i + 1] predictions = model(g_list) # get node predictions which has label predictions = predictions[cached_labeled_node_mask[i]] labels = ( cached_subgraph[i] .ndata["label"][cached_labeled_node_mask[i]] .long() ) valid_measure.append_measures(predictions, labels) # get each epoch measure during eval. cl_precision, cl_recall, cl_f1 = valid_measure.get_total_measure() valid_measure.update_best_f1(cl_f1, epoch) # reset measures for next epoch valid_measure.reset_info() print( "Eval Epoch {} | class {} | precision:{:.4f} | recall: {:.4f} | f1: {:.4f}".format( epoch, args.eval_class_id, cl_precision, cl_recall, cl_f1 ) ) # early stop if epoch - valid_measure.target_best_f1_epoch >= args.patience: print( "Best eval Epoch {}, Cur Epoch {}".format( valid_measure.target_best_f1_epoch, epoch ) ) break # if cur valid f1 score is best, do test if epoch == valid_measure.target_best_f1_epoch: print( "###################Epoch {} Test###################".format( epoch ) ) for i in range(valid_max_index + 1, test_max_index + 1): g_list = cached_subgraph[i - time_window_size : i + 1] predictions = model(g_list) # get predictions which has label predictions = predictions[cached_labeled_node_mask[i]] labels = ( cached_subgraph[i] .ndata["label"][cached_labeled_node_mask[i]] .long() ) test_measure.append_measures(predictions, labels) # we get each subgraph measure when testing to match fig 4 in EvolveGCN paper. ( cl_precisions, cl_recalls, cl_f1s, ) = test_measure.get_each_timestamp_measure() for index, (sub_p, sub_r, sub_f1) in enumerate( zip(cl_precisions, cl_recalls, cl_f1s) ): print( " Test | Time {} | precision:{:.4f} | recall: {:.4f} | f1: {:.4f}".format( valid_max_index + index + 2, sub_p, sub_r, sub_f1 ) ) # get each epoch measure during test. cl_precision, cl_recall, cl_f1 = test_measure.get_total_measure() test_measure.update_best_f1(cl_f1, epoch) # reset measures for next test test_measure.reset_info() test_res_f1 = cl_f1 print( " Test | Epoch {} | class {} | precision:{:.4f} | recall: {:.4f} | f1: {:.4f}".format( epoch, args.eval_class_id, cl_precision, cl_recall, cl_f1 ) ) print( "Best test f1 is {}, in Epoch {}".format( test_measure.target_best_f1, test_measure.target_best_f1_epoch ) ) if test_measure.target_best_f1_epoch != valid_measure.target_best_f1_epoch: print( "The Epoch get best Valid measure not get the best Test measure, " "please checkout the test result in Epoch {}, which f1 is {}".format( valid_measure.target_best_f1_epoch, test_res_f1 ) ) if __name__ == "__main__": argparser = argparse.ArgumentParser("EvolveGCN") argparser.add_argument( "--model", type=str, default="EvolveGCN-O", help="We can choose EvolveGCN-O or EvolveGCN-H," "but the EvolveGCN-H performance on Elliptic dataset is not good.", ) argparser.add_argument( "--raw-dir", type=str, default="/home/Elliptic/elliptic_bitcoin_dataset/", help="Dir after unzip downloaded dataset, which contains 3 csv files.", ) argparser.add_argument( "--processed-dir", type=str, default="/home/Elliptic/processed/", help="Dir to store processed raw data.", ) argparser.add_argument( "--gpu", type=int, default=0, help="GPU device ID. Use -1 for CPU training.", ) argparser.add_argument("--num-epochs", type=int, default=1000) argparser.add_argument("--n-hidden", type=int, default=256) argparser.add_argument("--n-layers", type=int, default=2) argparser.add_argument( "--n-hist-steps", type=int, default=5, help="If it is set to 5, it means in the first batch," "we use historical data of 0-4 to predict the data of time 5.", ) argparser.add_argument("--lr", type=float, default=0.001) argparser.add_argument( "--loss-class-weight", type=str, default="0.35,0.65", help="Weight for loss function. Follow the official code," "we need to change it to 0.25, 0.75 when use EvolveGCN-H", ) argparser.add_argument( "--eval-class-id", type=int, default=1, help="Class type to eval. On Elliptic, type 1(illicit) is the main interest.", ) argparser.add_argument( "--patience", type=int, default=100, help="Patience for early stopping." ) args = argparser.parse_args() if args.gpu >= 0: device = torch.device("cuda:%d" % args.gpu) else: device = torch.device("cpu") start_time = time.perf_counter() train(args, device) print("train time is: {}".format(time.perf_counter() - start_time)) ================================================ FILE: examples/pytorch/evolveGCN/utils.py ================================================ def calculate_measure(tp, fn, fp): # avoid nan if tp == 0: return 0, 0, 0 p = tp * 1.0 / (tp + fp) r = tp * 1.0 / (tp + fn) if (p + r) > 0: f1 = 2.0 * (p * r) / (p + r) else: f1 = 0 return p, r, f1 class Measure(object): def __init__(self, num_classes, target_class): """ Args: num_classes: number of classes. target_class: target class we focus on, used to print info and do early stopping. """ self.num_classes = num_classes self.target_class = target_class self.true_positives = {} self.false_positives = {} self.false_negatives = {} self.target_best_f1 = 0.0 self.target_best_f1_epoch = 0 self.reset_info() def reset_info(self): """ reset info after each epoch. """ self.true_positives = { cur_class: [] for cur_class in range(self.num_classes) } self.false_positives = { cur_class: [] for cur_class in range(self.num_classes) } self.false_negatives = { cur_class: [] for cur_class in range(self.num_classes) } def append_measures(self, predictions, labels): predicted_classes = predictions.argmax(dim=1) for cl in range(self.num_classes): cl_indices = labels == cl pos = predicted_classes == cl hits = predicted_classes[cl_indices] == labels[cl_indices] tp = hits.sum() fn = hits.size(0) - tp fp = pos.sum() - tp self.true_positives[cl].append(tp.cpu()) self.false_negatives[cl].append(fn.cpu()) self.false_positives[cl].append(fp.cpu()) def get_each_timestamp_measure(self): precisions = [] recalls = [] f1s = [] for i in range(len(self.true_positives[self.target_class])): tp = self.true_positives[self.target_class][i] fn = self.false_negatives[self.target_class][i] fp = self.false_positives[self.target_class][i] p, r, f1 = calculate_measure(tp, fn, fp) precisions.append(p) recalls.append(r) f1s.append(f1) return precisions, recalls, f1s def get_total_measure(self): tp = sum(self.true_positives[self.target_class]) fn = sum(self.false_negatives[self.target_class]) fp = sum(self.false_positives[self.target_class]) p, r, f1 = calculate_measure(tp, fn, fp) return p, r, f1 def update_best_f1(self, cur_f1, cur_epoch): if cur_f1 > self.target_best_f1: self.target_best_f1 = cur_f1 self.target_best_f1_epoch = cur_epoch ================================================ FILE: examples/pytorch/gas/README.md ================================================ # DGL Implementation of the GAS Paper This DGL example implements the Heterogeneous GCN part of the model proposed in the paper [Spam Review Detection with Graph Convolutional Networks](https://arxiv.org/abs/1908.10679). Example implementor ---------------------- This example was implemented by [Kay Liu](https://github.com/kayzliu) during his SDE intern work at the AWS Shanghai AI Lab. Dependencies ---------------------- - Python 3.7.10 - PyTorch 1.8.1 - dgl 0.7.0 - scikit-learn 0.23.2 Dataset --------------------------------------- The datasets used for edge classification are variants of DGL's built-in [fake news datasets](https://github.com/dmlc/dgl/blob/master/python/dgl/data/fakenews.py). The converting process from tree-structured graph to bipartite graph is shown in the figure. ![variant](variant.png) **NOTE**: Same as the original fake news dataset, this variant is for academic use only as well, and commercial use is prohibited. The statistics are summarized as followings: **Politifact** - Nodes: - user (u): 276,277 - news (v): 581 - Edges: - forward: 399,016 - backward: 399,016 - Number of Classes: 2 - Node feature size: 300 - Edge feature size: 300 **Gossicop** - Nodes: - user (u): 565,660 - news (v): 10,333 - Edges: - forward: 1,254,469 - backward: 1,254,469 - Number of Classes: 2 - Node feature size: 300 - Edge feature size: 300 How to run -------------------------------- In the gas folder, run ``` python main.py ``` If want to use a GPU, run ``` python main.py --gpu 0 ``` If the mini-batch training is required to run on a GPU, run ``` python main_sampling.py --gpu 0 ``` Performance ------------------------- |Dataset | Xianyu Graph (paper reported) | Fake News Politifact | Fake News Gossipcop | | -------------------- | ----------------- | -------------------- | ------------------- | | F1 | 0.8143 | 0.9994 | 0.9942 | | AUC | 0.9860 | 1.0000 | 0.9991 | | Recall@90% precision | 0.6702 | 0.9999 | 0.9976 | ================================================ FILE: examples/pytorch/gas/dataloader.py ================================================ import os import dgl import numpy as np import scipy.io as sio import torch as th from dgl.data import DGLBuiltinDataset from dgl.data.utils import _get_dgl_url, load_graphs, save_graphs class GASDataset(DGLBuiltinDataset): file_urls = {"pol": "dataset/GASPOL.zip", "gos": "dataset/GASGOS.zip"} def __init__( self, name, raw_dir=None, random_seed=717, train_size=0.7, val_size=0.1 ): assert name in ["gos", "pol"], "Only supports 'gos' or 'pol'." self.seed = random_seed self.train_size = train_size self.val_size = val_size url = _get_dgl_url(self.file_urls[name]) super(GASDataset, self).__init__(name=name, url=url, raw_dir=raw_dir) def process(self): """process raw data to graph, labels and masks""" data = sio.loadmat( os.path.join(self.raw_path, f"{self.name}_retweet_graph.mat") ) adj = data["graph"].tocoo() num_edges = len(adj.row) row, col = adj.row[: int(num_edges / 2)], adj.col[: int(num_edges / 2)] graph = dgl.graph( (np.concatenate((row, col)), np.concatenate((col, row))) ) news_labels = data["label"].squeeze() num_news = len(news_labels) node_feature = np.load( os.path.join(self.raw_path, f"{self.name}_node_feature.npy") ) edge_feature = np.load( os.path.join(self.raw_path, f"{self.name}_edge_feature.npy") )[: int(num_edges / 2)] graph.ndata["feat"] = th.tensor(node_feature) graph.edata["feat"] = th.tensor(np.tile(edge_feature, (2, 1))) pos_news = news_labels.nonzero()[0] edge_labels = th.zeros(num_edges) edge_labels[graph.in_edges(pos_news, form="eid")] = 1 edge_labels[graph.out_edges(pos_news, form="eid")] = 1 graph.edata["label"] = edge_labels ntypes = th.ones(graph.num_nodes(), dtype=int) etypes = th.ones(graph.num_edges(), dtype=int) ntypes[graph.nodes() < num_news] = 0 etypes[: int(num_edges / 2)] = 0 graph.ndata["_TYPE"] = ntypes graph.edata["_TYPE"] = etypes hg = dgl.to_heterogeneous(graph, ["v", "u"], ["forward", "backward"]) self._random_split(hg, self.seed, self.train_size, self.val_size) self.graph = hg @property def graph_path(self): return os.path.join(self.save_path, self.name + "_dgl_graph.bin") def save(self): """save the graph list and the labels""" save_graphs(str(self.graph_path), self.graph) def has_cache(self): """check whether there are processed data in `self.save_path`""" return os.path.exists(self.graph_path) def load(self): """load processed data from directory `self.save_path`""" graph, _ = load_graphs(str(self.graph_path)) self.graph = graph[0] @property def num_classes(self): """Number of classes for each graph, i.e. number of prediction tasks.""" return 2 def __getitem__(self, idx): r"""Get graph object Parameters ---------- idx : int Item index Returns ------- :class:`dgl.DGLGraph` """ assert idx == 0, "This dataset has only one graph" return self.graph def __len__(self): r"""Number of data examples Return ------- int """ return len(self.graph) def _random_split(self, graph, seed=717, train_size=0.7, val_size=0.1): """split the dataset into training set, validation set and testing set""" assert 0 <= train_size + val_size <= 1, ( "The sum of valid training set size and validation set size " "must between 0 and 1 (inclusive)." ) num_edges = graph.num_edges(etype="forward") index = np.arange(num_edges) index = np.random.RandomState(seed).permutation(index) train_idx = index[: int(train_size * num_edges)] val_idx = index[num_edges - int(val_size * num_edges) :] test_idx = index[ int(train_size * num_edges) : num_edges - int(val_size * num_edges) ] train_mask = np.zeros(num_edges, dtype=np.bool_) val_mask = np.zeros(num_edges, dtype=np.bool_) test_mask = np.zeros(num_edges, dtype=np.bool_) train_mask[train_idx] = True val_mask[val_idx] = True test_mask[test_idx] = True graph.edges["forward"].data["train_mask"] = th.tensor(train_mask) graph.edges["forward"].data["val_mask"] = th.tensor(val_mask) graph.edges["forward"].data["test_mask"] = th.tensor(test_mask) graph.edges["backward"].data["train_mask"] = th.tensor(train_mask) graph.edges["backward"].data["val_mask"] = th.tensor(val_mask) graph.edges["backward"].data["test_mask"] = th.tensor(test_mask) ================================================ FILE: examples/pytorch/gas/main.py ================================================ import argparse import torch as th import torch.nn.functional as F import torch.optim as optim from dataloader import GASDataset from model import GAS from sklearn.metrics import f1_score, precision_recall_curve, roc_auc_score def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # # Load dataset dataset = GASDataset(args.dataset) graph = dataset[0] # check cuda if args.gpu >= 0 and th.cuda.is_available(): device = "cuda:{}".format(args.gpu) else: device = "cpu" # binary classification num_classes = dataset.num_classes # retrieve labels of ground truth labels = graph.edges["forward"].data["label"].to(device).long() # Extract node features e_feat = graph.edges["forward"].data["feat"].to(device) u_feat = graph.nodes["u"].data["feat"].to(device) v_feat = graph.nodes["v"].data["feat"].to(device) # retrieve masks for train/validation/test train_mask = graph.edges["forward"].data["train_mask"] val_mask = graph.edges["forward"].data["val_mask"] test_mask = graph.edges["forward"].data["test_mask"] train_idx = th.nonzero(train_mask, as_tuple=False).squeeze(1).to(device) val_idx = th.nonzero(val_mask, as_tuple=False).squeeze(1).to(device) test_idx = th.nonzero(test_mask, as_tuple=False).squeeze(1).to(device) graph = graph.to(device) # Step 2: Create model =================================================================== # model = GAS( e_in_dim=e_feat.shape[-1], u_in_dim=u_feat.shape[-1], v_in_dim=v_feat.shape[-1], e_hid_dim=args.e_hid_dim, u_hid_dim=args.u_hid_dim, v_hid_dim=args.v_hid_dim, out_dim=num_classes, num_layers=args.num_layers, dropout=args.dropout, activation=F.relu, ) model = model.to(device) # Step 3: Create training components ===================================================== # loss_fn = th.nn.CrossEntropyLoss() optimizer = optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.weight_decay ) # Step 4: training epochs =============================================================== # for epoch in range(args.max_epoch): # Training and validation using a full graph model.train() logits = model(graph, e_feat, u_feat, v_feat) # compute loss tr_loss = loss_fn(logits[train_idx], labels[train_idx]) tr_f1 = f1_score( labels[train_idx].cpu(), logits[train_idx].argmax(dim=1).cpu() ) tr_auc = roc_auc_score( labels[train_idx].cpu(), logits[train_idx][:, 1].detach().cpu() ) tr_pre, tr_re, _ = precision_recall_curve( labels[train_idx].cpu(), logits[train_idx][:, 1].detach().cpu() ) tr_rap = tr_re[tr_pre > args.precision].max() # validation valid_loss = loss_fn(logits[val_idx], labels[val_idx]) valid_f1 = f1_score( labels[val_idx].cpu(), logits[val_idx].argmax(dim=1).cpu() ) valid_auc = roc_auc_score( labels[val_idx].cpu(), logits[val_idx][:, 1].detach().cpu() ) valid_pre, valid_re, _ = precision_recall_curve( labels[val_idx].cpu(), logits[val_idx][:, 1].detach().cpu() ) valid_rap = valid_re[valid_pre > args.precision].max() # backward optimizer.zero_grad() tr_loss.backward() optimizer.step() # Print out performance print( "In epoch {}, Train R@P: {:.4f} | Train F1: {:.4f} | Train AUC: {:.4f} | Train Loss: {:.4f}; " "Valid R@P: {:.4f} | Valid F1: {:.4f} | Valid AUC: {:.4f} | Valid loss: {:.4f}".format( epoch, tr_rap, tr_f1, tr_auc, tr_loss.item(), valid_rap, valid_f1, valid_auc, valid_loss.item(), ) ) # Test after all epoch model.eval() # forward logits = model(graph, e_feat, u_feat, v_feat) # compute loss test_loss = loss_fn(logits[test_idx], labels[test_idx]) test_f1 = f1_score( labels[test_idx].cpu(), logits[test_idx].argmax(dim=1).cpu() ) test_auc = roc_auc_score( labels[test_idx].cpu(), logits[test_idx][:, 1].detach().cpu() ) test_pre, test_re, _ = precision_recall_curve( labels[test_idx].cpu(), logits[test_idx][:, 1].detach().cpu() ) test_rap = test_re[test_pre > args.precision].max() print( "Test R@P: {:.4f} | Test F1: {:.4f} | Test AUC: {:.4f} | Test loss: {:.4f}".format( test_rap, test_f1, test_auc, test_loss.item() ) ) if __name__ == "__main__": parser = argparse.ArgumentParser(description="GCN-based Anti-Spam Model") parser.add_argument( "--dataset", type=str, default="pol", help="'pol', or 'gos'" ) parser.add_argument( "--gpu", type=int, default=-1, help="GPU Index. Default: -1, using CPU." ) parser.add_argument( "--e_hid_dim", type=int, default=128, help="Hidden layer dimension for edges", ) parser.add_argument( "--u_hid_dim", type=int, default=128, help="Hidden layer dimension for source nodes", ) parser.add_argument( "--v_hid_dim", type=int, default=128, help="Hidden layer dimension for destination nodes", ) parser.add_argument( "--num_layers", type=int, default=2, help="Number of GCN layers" ) parser.add_argument( "--max_epoch", type=int, default=100, help="The max number of epochs. Default: 100", ) parser.add_argument( "--lr", type=float, default=0.001, help="Learning rate. Default: 1e-3" ) parser.add_argument( "--dropout", type=float, default=0.0, help="Dropout rate. Default: 0.0" ) parser.add_argument( "--weight_decay", type=float, default=5e-4, help="Weight Decay. Default: 0.0005", ) parser.add_argument( "--precision", type=float, default=0.9, help="The value p in recall@p precision. Default: 0.9", ) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/pytorch/gas/main_sampling.py ================================================ import argparse import dgl import torch as th import torch.nn.functional as F import torch.optim as optim from dataloader import GASDataset from model_sampling import GAS from sklearn.metrics import f1_score, precision_recall_curve, roc_auc_score def evaluate(model, loss_fn, dataloader, device="cpu"): loss = 0 f1 = 0 auc = 0 rap = 0 num_blocks = 0 for input_nodes, edge_subgraph, blocks in dataloader: blocks = [b.to(device) for b in blocks] edge_subgraph = edge_subgraph.to(device) u_feat = blocks[0].srcdata["feat"]["u"] v_feat = blocks[0].srcdata["feat"]["v"] f_feat = blocks[0].edges["forward"].data["feat"] b_feat = blocks[0].edges["backward"].data["feat"] labels = edge_subgraph.edges["forward"].data["label"].long() logits = model(edge_subgraph, blocks, f_feat, b_feat, u_feat, v_feat) loss += loss_fn(logits, labels).item() f1 += f1_score(labels.cpu(), logits.argmax(dim=1).cpu()) auc += roc_auc_score(labels.cpu(), logits[:, 1].detach().cpu()) pre, re, _ = precision_recall_curve( labels.cpu(), logits[:, 1].detach().cpu() ) rap += re[pre > args.precision].max() num_blocks += 1 return ( rap / num_blocks, f1 / num_blocks, auc / num_blocks, loss / num_blocks, ) def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # # Load dataset dataset = GASDataset(args.dataset) graph = dataset[0] # generate mini-batch only for forward edges sampler = dgl.dataloading.MultiLayerNeighborSampler([10, 10]) tr_eid_dict = {} val_eid_dict = {} test_eid_dict = {} tr_eid_dict["forward"] = ( graph.edges["forward"].data["train_mask"].nonzero().squeeze() ) val_eid_dict["forward"] = ( graph.edges["forward"].data["val_mask"].nonzero().squeeze() ) test_eid_dict["forward"] = ( graph.edges["forward"].data["test_mask"].nonzero().squeeze() ) sampler = dgl.dataloading.as_edge_prediction_sampler(sampler) tr_loader = dgl.dataloading.DataLoader( graph, tr_eid_dict, sampler, batch_size=args.batch_size, shuffle=True, drop_last=False, num_workers=args.num_workers, ) val_loader = dgl.dataloading.DataLoader( graph, val_eid_dict, sampler, batch_size=args.batch_size, shuffle=True, drop_last=False, num_workers=args.num_workers, ) test_loader = dgl.dataloading.DataLoader( graph, test_eid_dict, sampler, batch_size=args.batch_size, shuffle=True, drop_last=False, num_workers=args.num_workers, ) # check cuda if args.gpu >= 0 and th.cuda.is_available(): device = "cuda:{}".format(args.gpu) else: device = "cpu" # binary classification num_classes = dataset.num_classes # Extract node features e_feats = graph.edges["forward"].data["feat"].shape[-1] u_feats = graph.nodes["u"].data["feat"].shape[-1] v_feats = graph.nodes["v"].data["feat"].shape[-1] # Step 2: Create model =================================================================== # model = GAS( e_in_dim=e_feats, u_in_dim=u_feats, v_in_dim=v_feats, e_hid_dim=args.e_hid_dim, u_hid_dim=args.u_hid_dim, v_hid_dim=args.v_hid_dim, out_dim=num_classes, num_layers=args.num_layers, dropout=args.dropout, activation=F.relu, ) model = model.to(device) # Step 3: Create training components ===================================================== # loss_fn = th.nn.CrossEntropyLoss() optimizer = optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.weight_decay ) # Step 4: training epochs =============================================================== # for epoch in range(args.max_epoch): model.train() tr_loss = 0 tr_f1 = 0 tr_auc = 0 tr_rap = 0 tr_blocks = 0 for input_nodes, edge_subgraph, blocks in tr_loader: blocks = [b.to(device) for b in blocks] edge_subgraph = edge_subgraph.to(device) u_feat = blocks[0].srcdata["feat"]["u"] v_feat = blocks[0].srcdata["feat"]["v"] f_feat = blocks[0].edges["forward"].data["feat"] b_feat = blocks[0].edges["backward"].data["feat"] labels = edge_subgraph.edges["forward"].data["label"].long() logits = model( edge_subgraph, blocks, f_feat, b_feat, u_feat, v_feat ) # compute loss batch_loss = loss_fn(logits, labels) tr_loss += batch_loss.item() tr_f1 += f1_score(labels.cpu(), logits.argmax(dim=1).cpu()) tr_auc += roc_auc_score(labels.cpu(), logits[:, 1].detach().cpu()) tr_pre, tr_re, _ = precision_recall_curve( labels.cpu(), logits[:, 1].detach().cpu() ) tr_rap += tr_re[tr_pre > args.precision].max() tr_blocks += 1 # backward optimizer.zero_grad() batch_loss.backward() optimizer.step() # validation model.eval() val_rap, val_f1, val_auc, val_loss = evaluate( model, loss_fn, val_loader, device ) # Print out performance print( "In epoch {}, Train R@P: {:.4f} | Train F1: {:.4f} | Train AUC: {:.4f} | Train Loss: {:.4f}; " "Valid R@P: {:.4f} | Valid F1: {:.4f} | Valid AUC: {:.4f} | Valid loss: {:.4f}".format( epoch, tr_rap / tr_blocks, tr_f1 / tr_blocks, tr_auc / tr_blocks, tr_loss / tr_blocks, val_rap, val_f1, val_auc, val_loss, ) ) # Test with mini batch after all epoch model.eval() test_rap, test_f1, test_auc, test_loss = evaluate( model, loss_fn, test_loader, device ) print( "Test R@P: {:.4f} | Test F1: {:.4f} | Test AUC: {:.4f} | Test loss: {:.4f}".format( test_rap, test_f1, test_auc, test_loss ) ) if __name__ == "__main__": parser = argparse.ArgumentParser(description="GCN-based Anti-Spam Model") parser.add_argument( "--dataset", type=str, default="pol", help="'pol', or 'gos'" ) parser.add_argument( "--gpu", type=int, default=-1, help="GPU Index. Default: -1, using CPU." ) parser.add_argument( "--e_hid_dim", type=int, default=128, help="Hidden layer dimension for edges", ) parser.add_argument( "--u_hid_dim", type=int, default=128, help="Hidden layer dimension for source nodes", ) parser.add_argument( "--v_hid_dim", type=int, default=128, help="Hidden layer dimension for destination nodes", ) parser.add_argument( "--num_layers", type=int, default=2, help="Number of GCN layers" ) parser.add_argument( "--max_epoch", type=int, default=100, help="The max number of epochs. Default: 100", ) parser.add_argument( "--lr", type=float, default=0.001, help="Learning rate. Default: 1e-3" ) parser.add_argument( "--dropout", type=float, default=0.0, help="Dropout rate. Default: 0.0" ) parser.add_argument( "--batch_size", type=int, default=64, help="Size of mini-batches. Default: 64", ) parser.add_argument( "--num_workers", type=int, default=4, help="Number of node dataloader" ) parser.add_argument( "--weight_decay", type=float, default=5e-4, help="Weight Decay. Default: 0.0005", ) parser.add_argument( "--precision", type=float, default=0.9, help="The value p in recall@p precision. Default: 0.9", ) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/pytorch/gas/model.py ================================================ import dgl.function as fn import torch as th import torch.nn as nn from dgl.nn.functional import edge_softmax class MLP(nn.Module): def __init__(self, in_dim, out_dim): super().__init__() self.W = nn.Linear(in_dim, out_dim) def apply_edges(self, edges): h_e = edges.data["h"] h_u = edges.src["h"] h_v = edges.dst["h"] score = self.W(th.cat([h_e, h_u, h_v], -1)) return {"score": score} def forward(self, g, e_feat, u_feat, v_feat): with g.local_scope(): g.edges["forward"].data["h"] = e_feat g.nodes["u"].data["h"] = u_feat g.nodes["v"].data["h"] = v_feat g.apply_edges(self.apply_edges, etype="forward") return g.edges["forward"].data["score"] class GASConv(nn.Module): """One layer of GAS.""" def __init__( self, e_in_dim, u_in_dim, v_in_dim, e_out_dim, u_out_dim, v_out_dim, activation=None, dropout=0, ): super(GASConv, self).__init__() self.activation = activation self.dropout = nn.Dropout(dropout) self.e_linear = nn.Linear(e_in_dim, e_out_dim) self.u_linear = nn.Linear(u_in_dim, e_out_dim) self.v_linear = nn.Linear(v_in_dim, e_out_dim) self.W_ATTN_u = nn.Linear(u_in_dim, v_in_dim + e_in_dim) self.W_ATTN_v = nn.Linear(v_in_dim, u_in_dim + e_in_dim) # the proportion of h_u and h_Nu are specified as 1/2 in formula 8 nu_dim = int(u_out_dim / 2) nv_dim = int(v_out_dim / 2) self.W_u = nn.Linear(v_in_dim + e_in_dim, nu_dim) self.W_v = nn.Linear(u_in_dim + e_in_dim, nv_dim) self.Vu = nn.Linear(u_in_dim, u_out_dim - nu_dim) self.Vv = nn.Linear(v_in_dim, v_out_dim - nv_dim) def forward(self, g, e_feat, u_feat, v_feat): with g.local_scope(): g.nodes["u"].data["h"] = u_feat g.nodes["v"].data["h"] = v_feat g.edges["forward"].data["h"] = e_feat g.edges["backward"].data["h"] = e_feat # formula 3 and 4 (optimized implementation to save memory) g.nodes["u"].data.update({"he_u": self.u_linear(u_feat)}) g.nodes["v"].data.update({"he_v": self.v_linear(v_feat)}) g.edges["forward"].data.update({"he_e": self.e_linear(e_feat)}) g.apply_edges( lambda edges: { "he": edges.data["he_e"] + edges.src["he_u"] + edges.dst["he_v"] }, etype="forward", ) he = g.edges["forward"].data["he"] if self.activation is not None: he = self.activation(he) # formula 6 g.apply_edges( lambda edges: { "h_ve": th.cat([edges.src["h"], edges.data["h"]], -1) }, etype="backward", ) g.apply_edges( lambda edges: { "h_ue": th.cat([edges.src["h"], edges.data["h"]], -1) }, etype="forward", ) # formula 7, self-attention g.nodes["u"].data["h_att_u"] = self.W_ATTN_u(u_feat) g.nodes["v"].data["h_att_v"] = self.W_ATTN_v(v_feat) # Step 1: dot product g.apply_edges( fn.e_dot_v("h_ve", "h_att_u", "edotv"), etype="backward" ) g.apply_edges( fn.e_dot_v("h_ue", "h_att_v", "edotv"), etype="forward" ) # Step 2. softmax g.edges["backward"].data["sfm"] = edge_softmax( g["backward"], g.edges["backward"].data["edotv"] ) g.edges["forward"].data["sfm"] = edge_softmax( g["forward"], g.edges["forward"].data["edotv"] ) # Step 3. Broadcast softmax value to each edge, and then attention is done g.apply_edges( lambda edges: {"attn": edges.data["h_ve"] * edges.data["sfm"]}, etype="backward", ) g.apply_edges( lambda edges: {"attn": edges.data["h_ue"] * edges.data["sfm"]}, etype="forward", ) # Step 4. Aggregate attention to dst,user nodes, so formula 7 is done g.update_all( fn.copy_e("attn", "m"), fn.sum("m", "agg_u"), etype="backward" ) g.update_all( fn.copy_e("attn", "m"), fn.sum("m", "agg_v"), etype="forward" ) # formula 5 h_nu = self.W_u(g.nodes["u"].data["agg_u"]) h_nv = self.W_v(g.nodes["v"].data["agg_v"]) if self.activation is not None: h_nu = self.activation(h_nu) h_nv = self.activation(h_nv) # Dropout he = self.dropout(he) h_nu = self.dropout(h_nu) h_nv = self.dropout(h_nv) # formula 8 hu = th.cat([self.Vu(u_feat), h_nu], -1) hv = th.cat([self.Vv(v_feat), h_nv], -1) return he, hu, hv class GAS(nn.Module): def __init__( self, e_in_dim, u_in_dim, v_in_dim, e_hid_dim, u_hid_dim, v_hid_dim, out_dim, num_layers=2, dropout=0.0, activation=None, ): super(GAS, self).__init__() self.e_in_dim = e_in_dim self.u_in_dim = u_in_dim self.v_in_dim = v_in_dim self.e_hid_dim = e_hid_dim self.u_hid_dim = u_hid_dim self.v_hid_dim = v_hid_dim self.out_dim = out_dim self.num_layer = num_layers self.dropout = dropout self.activation = activation self.predictor = MLP(e_hid_dim + u_hid_dim + v_hid_dim, out_dim) self.layers = nn.ModuleList() # Input layer self.layers.append( GASConv( self.e_in_dim, self.u_in_dim, self.v_in_dim, self.e_hid_dim, self.u_hid_dim, self.v_hid_dim, activation=self.activation, dropout=self.dropout, ) ) # Hidden layers with n - 1 CompGraphConv layers for i in range(self.num_layer - 1): self.layers.append( GASConv( self.e_hid_dim, self.u_hid_dim, self.v_hid_dim, self.e_hid_dim, self.u_hid_dim, self.v_hid_dim, activation=self.activation, dropout=self.dropout, ) ) def forward(self, graph, e_feat, u_feat, v_feat): # For full graph training, directly use the graph # Forward of n layers of GAS for layer in self.layers: e_feat, u_feat, v_feat = layer(graph, e_feat, u_feat, v_feat) # return the result of final prediction layer return self.predictor(graph, e_feat, u_feat, v_feat) ================================================ FILE: examples/pytorch/gas/model_sampling.py ================================================ import dgl.function as fn import torch as th import torch.nn as nn from dgl.nn.functional import edge_softmax class MLP(nn.Module): def __init__(self, in_dim, out_dim): super().__init__() self.W = nn.Linear(in_dim, out_dim) def apply_edges(self, edges): h_e = edges.data["h"] h_u = edges.src["h"] h_v = edges.dst["h"] score = self.W(th.cat([h_e, h_u, h_v], -1)) return {"score": score} def forward(self, g, e_feat, u_feat, v_feat): with g.local_scope(): g.edges["forward"].data["h"] = e_feat g.nodes["u"].data["h"] = u_feat g.nodes["v"].data["h"] = v_feat g.apply_edges(self.apply_edges, etype="forward") return g.edges["forward"].data["score"] class GASConv(nn.Module): """One layer of GAS.""" def __init__( self, e_in_dim, u_in_dim, v_in_dim, e_out_dim, u_out_dim, v_out_dim, activation=None, dropout=0, ): super(GASConv, self).__init__() self.activation = activation self.dropout = nn.Dropout(dropout) self.e_linear = nn.Linear(e_in_dim, e_out_dim) self.u_linear = nn.Linear(u_in_dim, e_out_dim) self.v_linear = nn.Linear(v_in_dim, e_out_dim) self.W_ATTN_u = nn.Linear(u_in_dim, v_in_dim + e_in_dim) self.W_ATTN_v = nn.Linear(v_in_dim, u_in_dim + e_in_dim) # the proportion of h_u and h_Nu are specified as 1/2 in formula 8 nu_dim = int(u_out_dim / 2) nv_dim = int(v_out_dim / 2) self.W_u = nn.Linear(v_in_dim + e_in_dim, nu_dim) self.W_v = nn.Linear(u_in_dim + e_in_dim, nv_dim) self.Vu = nn.Linear(u_in_dim, u_out_dim - nu_dim) self.Vv = nn.Linear(v_in_dim, v_out_dim - nv_dim) def forward(self, g, f_feat, b_feat, u_feat, v_feat): g.srcnodes["u"].data["h"] = u_feat g.srcnodes["v"].data["h"] = v_feat g.dstnodes["u"].data["h"] = u_feat[: g.number_of_dst_nodes(ntype="u")] g.dstnodes["v"].data["h"] = v_feat[: g.number_of_dst_nodes(ntype="v")] g.edges["forward"].data["h"] = f_feat g.edges["backward"].data["h"] = b_feat # formula 3 and 4 (optimized implementation to save memory) g.srcnodes["u"].data.update( {"he_u": self.u_linear(g.srcnodes["u"].data["h"])} ) g.srcnodes["v"].data.update( {"he_v": self.v_linear(g.srcnodes["v"].data["h"])} ) g.dstnodes["u"].data.update( {"he_u": self.u_linear(g.dstnodes["u"].data["h"])} ) g.dstnodes["v"].data.update( {"he_v": self.v_linear(g.dstnodes["v"].data["h"])} ) g.edges["forward"].data.update({"he_e": self.e_linear(f_feat)}) g.edges["backward"].data.update({"he_e": self.e_linear(b_feat)}) g.apply_edges( lambda edges: { "he": edges.data["he_e"] + edges.dst["he_u"] + edges.src["he_v"] }, etype="backward", ) g.apply_edges( lambda edges: { "he": edges.data["he_e"] + edges.src["he_u"] + edges.dst["he_v"] }, etype="forward", ) hf = g.edges["forward"].data["he"] hb = g.edges["backward"].data["he"] if self.activation is not None: hf = self.activation(hf) hb = self.activation(hb) # formula 6 g.apply_edges( lambda edges: { "h_ve": th.cat([edges.src["h"], edges.data["h"]], -1) }, etype="backward", ) g.apply_edges( lambda edges: { "h_ue": th.cat([edges.src["h"], edges.data["h"]], -1) }, etype="forward", ) # formula 7, self-attention g.srcnodes["u"].data["h_att_u"] = self.W_ATTN_u( g.srcnodes["u"].data["h"] ) g.srcnodes["v"].data["h_att_v"] = self.W_ATTN_v( g.srcnodes["v"].data["h"] ) g.dstnodes["u"].data["h_att_u"] = self.W_ATTN_u( g.dstnodes["u"].data["h"] ) g.dstnodes["v"].data["h_att_v"] = self.W_ATTN_v( g.dstnodes["v"].data["h"] ) # Step 1: dot product g.apply_edges(fn.e_dot_v("h_ve", "h_att_u", "edotv"), etype="backward") g.apply_edges(fn.e_dot_v("h_ue", "h_att_v", "edotv"), etype="forward") # Step 2. softmax g.edges["backward"].data["sfm"] = edge_softmax( g["backward"], g.edges["backward"].data["edotv"] ) g.edges["forward"].data["sfm"] = edge_softmax( g["forward"], g.edges["forward"].data["edotv"] ) # Step 3. Broadcast softmax value to each edge, and then attention is done g.apply_edges( lambda edges: {"attn": edges.data["h_ve"] * edges.data["sfm"]}, etype="backward", ) g.apply_edges( lambda edges: {"attn": edges.data["h_ue"] * edges.data["sfm"]}, etype="forward", ) # Step 4. Aggregate attention to dst,user nodes, so formula 7 is done g.update_all( fn.copy_e("attn", "m"), fn.sum("m", "agg_u"), etype="backward" ) g.update_all( fn.copy_e("attn", "m"), fn.sum("m", "agg_v"), etype="forward" ) # formula 5 h_nu = self.W_u(g.dstnodes["u"].data["agg_u"]) h_nv = self.W_v(g.dstnodes["v"].data["agg_v"]) if self.activation is not None: h_nu = self.activation(h_nu) h_nv = self.activation(h_nv) # Dropout hf = self.dropout(hf) hb = self.dropout(hb) h_nu = self.dropout(h_nu) h_nv = self.dropout(h_nv) # formula 8 hu = th.cat([self.Vu(g.dstnodes["u"].data["h"]), h_nu], -1) hv = th.cat([self.Vv(g.dstnodes["v"].data["h"]), h_nv], -1) return hf, hb, hu, hv class GAS(nn.Module): def __init__( self, e_in_dim, u_in_dim, v_in_dim, e_hid_dim, u_hid_dim, v_hid_dim, out_dim, num_layers=2, dropout=0.0, activation=None, ): super(GAS, self).__init__() self.e_in_dim = e_in_dim self.u_in_dim = u_in_dim self.v_in_dim = v_in_dim self.e_hid_dim = e_hid_dim self.u_hid_dim = u_hid_dim self.v_hid_dim = v_hid_dim self.out_dim = out_dim self.num_layer = num_layers self.dropout = dropout self.activation = activation self.predictor = MLP(e_hid_dim + u_hid_dim + v_hid_dim, out_dim) self.layers = nn.ModuleList() # Input layer self.layers.append( GASConv( self.e_in_dim, self.u_in_dim, self.v_in_dim, self.e_hid_dim, self.u_hid_dim, self.v_hid_dim, activation=self.activation, dropout=self.dropout, ) ) # Hidden layers with n - 1 CompGraphConv layers for i in range(self.num_layer - 1): self.layers.append( GASConv( self.e_hid_dim, self.u_hid_dim, self.v_hid_dim, self.e_hid_dim, self.u_hid_dim, self.v_hid_dim, activation=self.activation, dropout=self.dropout, ) ) def forward(self, subgraph, blocks, f_feat, b_feat, u_feat, v_feat): # Forward of n layers of GAS for layer, block in zip(self.layers, blocks): f_feat, b_feat, u_feat, v_feat = layer( block, f_feat[: block.num_edges(etype="forward")], b_feat[: block.num_edges(etype="backward")], u_feat, v_feat, ) # return the result of final prediction layer return self.predictor( subgraph, f_feat[: subgraph.num_edges(etype="forward")], u_feat, v_feat, ) ================================================ FILE: examples/pytorch/gat/README.md ================================================ Graph Attention Networks (GAT) ============ - Paper link: [https://arxiv.org/abs/1710.10903](https://arxiv.org/abs/1710.10903) - Author's code repo (tensorflow implementation): [https://github.com/PetarV-/GAT](https://github.com/PetarV-/GAT). - Popular pytorch implementation: [https://github.com/Diego999/pyGAT](https://github.com/Diego999/pyGAT). How to run ------- > **_NOTE:_** `train.py` is deprecated and please check the new version in `//examples/core/gat/train.py`. Run with the following for multiclass node classification (available datasets: "cora", "citeseer", "pubmed") ```bash python3 train.py --dataset cora ``` Run with the following for multilabel classification with PPI dataset ```bash python3 train_ppi.py ``` > **_NOTE:_** Users may occasionally run into low accuracy issue (e.g., test accuracy < 0.8) due to overfitting. This can be resolved by adding Early Stopping or reducing maximum number of training epochs. Summary ------- * cora: ~0.821 * citeseer: ~0.710 * pubmed: ~0.780 * ppi: ~0.9744 ================================================ FILE: examples/pytorch/gat/train.py ================================================ import argparse import dgl import dgl.nn as dglnn import torch import torch.nn as nn import torch.nn.functional as F from dgl import AddSelfLoop from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset class GAT(nn.Module): def __init__(self, in_size, hid_size, out_size, heads): super().__init__() self.gat_layers = nn.ModuleList() # two-layer GAT self.gat_layers.append( dglnn.GATConv( in_size, hid_size, heads[0], feat_drop=0.6, attn_drop=0.6, activation=F.elu, ) ) self.gat_layers.append( dglnn.GATConv( hid_size * heads[0], out_size, heads[1], feat_drop=0.6, attn_drop=0.6, activation=None, ) ) def forward(self, g, inputs): h = inputs for i, layer in enumerate(self.gat_layers): h = layer(g, h) if i == 1: # last layer h = h.mean(1) else: # other layer(s) h = h.flatten(1) return h def evaluate(g, features, labels, mask, model): model.eval() with torch.no_grad(): logits = model(g, features) logits = logits[mask] labels = labels[mask] _, indices = torch.max(logits, dim=1) correct = torch.sum(indices == labels) return correct.item() * 1.0 / len(labels) def train(g, features, labels, masks, model): # define train/val samples, loss function and optimizer train_mask = masks[0] val_mask = masks[1] loss_fcn = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=5e-3, weight_decay=5e-4) # training loop for epoch in range(200): model.train() logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() acc = evaluate(g, features, labels, val_mask, model) print( "Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} ".format( epoch, loss.item(), acc ) ) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--dataset", type=str, default="cora", help="Dataset name ('cora', 'citeseer', 'pubmed').", ) parser.add_argument( "--dt", type=str, default="float", help="data type(float, bfloat16)", ) args = parser.parse_args() print(f"Training with DGL built-in GATConv module.") # load and preprocess dataset transform = ( AddSelfLoop() ) # by default, it will first remove self-loops to prevent duplication if args.dataset == "cora": data = CoraGraphDataset(transform=transform) elif args.dataset == "citeseer": data = CiteseerGraphDataset(transform=transform) elif args.dataset == "pubmed": data = PubmedGraphDataset(transform=transform) else: raise ValueError("Unknown dataset: {}".format(args.dataset)) g = data[0] device = torch.device("cuda" if torch.cuda.is_available() else "cpu") g = g.int().to(device) features = g.ndata["feat"] labels = g.ndata["label"] masks = g.ndata["train_mask"], g.ndata["val_mask"], g.ndata["test_mask"] # create GAT model in_size = features.shape[1] out_size = data.num_classes model = GAT(in_size, 8, out_size, heads=[8, 1]).to(device) # convert model and graph to bfloat16 if needed if args.dt == "bfloat16": g = dgl.to_bfloat16(g) features = features.to(dtype=torch.bfloat16) model = model.to(dtype=torch.bfloat16) # model training print("Training...") train(g, features, labels, masks, model) # test the model print("Testing...") acc = evaluate(g, features, labels, masks[2], model) print("Test accuracy {:.4f}".format(acc)) ================================================ FILE: examples/pytorch/gat/train_ppi.py ================================================ import dgl.nn as dglnn import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from dgl.data.ppi import PPIDataset from dgl.dataloading import GraphDataLoader from sklearn.metrics import f1_score class GAT(nn.Module): def __init__(self, in_size, hid_size, out_size, heads): super().__init__() self.gat_layers = nn.ModuleList() # three-layer GAT self.gat_layers.append( dglnn.GATConv(in_size, hid_size, heads[0], activation=F.elu) ) self.gat_layers.append( dglnn.GATConv( hid_size * heads[0], hid_size, heads[1], residual=True, activation=F.elu, ) ) self.gat_layers.append( dglnn.GATConv( hid_size * heads[1], out_size, heads[2], residual=True, activation=None, ) ) def forward(self, g, inputs): h = inputs for i, layer in enumerate(self.gat_layers): h = layer(g, h) if i == 2: # last layer h = h.mean(1) else: # other layer(s) h = h.flatten(1) return h def evaluate(g, features, labels, model): model.eval() with torch.no_grad(): output = model(g, features) pred = np.where(output.data.cpu().numpy() >= 0, 1, 0) score = f1_score(labels.data.cpu().numpy(), pred, average="micro") return score def evaluate_in_batches(dataloader, device, model): total_score = 0 for batch_id, batched_graph in enumerate(dataloader): batched_graph = batched_graph.to(device) features = batched_graph.ndata["feat"] labels = batched_graph.ndata["label"] score = evaluate(batched_graph, features, labels, model) total_score += score return total_score / (batch_id + 1) # return average score def train(train_dataloader, val_dataloader, device, model): # define loss function and optimizer loss_fcn = nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(model.parameters(), lr=5e-3, weight_decay=0) # training loop for epoch in range(400): model.train() logits = [] total_loss = 0 # mini-batch loop for batch_id, batched_graph in enumerate(train_dataloader): batched_graph = batched_graph.to(device) features = batched_graph.ndata["feat"].float() labels = batched_graph.ndata["label"].float() logits = model(batched_graph, features) loss = loss_fcn(logits, labels) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.item() print( "Epoch {:05d} | Loss {:.4f} |".format( epoch, total_loss / (batch_id + 1) ) ) if (epoch + 1) % 5 == 0: avg_score = evaluate_in_batches( val_dataloader, device, model ) # evaluate F1-score instead of loss print( " Acc. (F1-score) {:.4f} ".format( avg_score ) ) if __name__ == "__main__": print(f"Training PPI Dataset with DGL built-in GATConv module.") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # load and preprocess datasets train_dataset = PPIDataset(mode="train") val_dataset = PPIDataset(mode="valid") test_dataset = PPIDataset(mode="test") features = train_dataset[0].ndata["feat"] # create GAT model in_size = features.shape[1] out_size = train_dataset.num_classes model = GAT(in_size, 256, out_size, heads=[4, 4, 6]).to(device) # model training print("Training...") train_dataloader = GraphDataLoader(train_dataset, batch_size=2) val_dataloader = GraphDataLoader(val_dataset, batch_size=2) train(train_dataloader, val_dataloader, device, model) # test the model print("Testing...") test_dataloader = GraphDataLoader(test_dataset, batch_size=2) avg_score = evaluate_in_batches(test_dataloader, device, model) print("Test Accuracy (F1-score) {:.4f}".format(avg_score)) ================================================ FILE: examples/pytorch/gatv2/README.md ================================================ Graph Attention Networks v2 (GATv2) ============ - Paper link: [How Attentive are Graph Attention Networks?](https://arxiv.org/pdf/2105.14491.pdf) - Author's code repo: [https://github.com/tech-srl/how_attentive_are_gats](https://github.com/tech-srl/how_attentive_are_gats). - Annotated implemetnation: [https://nn.labml.ai/graphs/gatv2/index.html] Dependencies ------------ - torch - requests - scikit-learn How to run ---------- Run with following: ```bash python3 train.py --dataset=cora ``` ```bash python3 train.py --dataset=citeseer ``` ```bash python3 train.py --dataset=pubmed ``` Results ------- | Dataset | Test Accuracy | | -------- | ------------- | | Cora | 82.10 | | Citeseer | 70.00 | | Pubmed | 77.2 | * All the accuracy numbers are obtained after 200 epochs. ================================================ FILE: examples/pytorch/gatv2/gatv2.py ================================================ """ Graph Attention Networks in DGL using SPMV optimization. References ---------- Paper: https://arxiv.org/pdf/2105.14491.pdf Author's code: https://github.com/tech-srl/how_attentive_are_gats """ import torch import torch.nn as nn from dgl.nn import GATv2Conv class GATv2(nn.Module): def __init__( self, num_layers, in_dim, num_hidden, num_classes, heads, activation, feat_drop, attn_drop, negative_slope, residual, ): super(GATv2, self).__init__() self.num_layers = num_layers self.gatv2_layers = nn.ModuleList() self.activation = activation # input projection (no residual) self.gatv2_layers.append( GATv2Conv( in_dim, num_hidden, heads[0], feat_drop, attn_drop, negative_slope, False, self.activation, bias=False, share_weights=True, ) ) # hidden layers for l in range(1, num_layers): # due to multi-head, the in_dim = num_hidden * num_heads self.gatv2_layers.append( GATv2Conv( num_hidden * heads[l - 1], num_hidden, heads[l], feat_drop, attn_drop, negative_slope, residual, self.activation, bias=False, share_weights=True, ) ) # output projection self.gatv2_layers.append( GATv2Conv( num_hidden * heads[-2], num_classes, heads[-1], feat_drop, attn_drop, negative_slope, residual, None, bias=False, share_weights=True, ) ) def forward(self, g, inputs): h = inputs for l in range(self.num_layers): h = self.gatv2_layers[l](g, h).flatten(1) # output projection logits = self.gatv2_layers[-1](g, h).mean(1) return logits ================================================ FILE: examples/pytorch/gatv2/train.py ================================================ """ Graph Attention Networks v2 (GATv2) in DGL using SPMV optimization. Multiple heads are also batched together for faster training. """ import argparse import time import dgl import numpy as np import torch import torch.nn.functional as F from dgl.data import ( CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset, register_data_args, ) from gatv2 import GATv2 class EarlyStopping: def __init__(self, patience=10): self.patience = patience self.counter = 0 self.best_score = None self.early_stop = False def step(self, acc, model): score = acc if self.best_score is None: self.best_score = score self.save_checkpoint(model) elif score < self.best_score: self.counter += 1 print( f"EarlyStopping counter: {self.counter} out of {self.patience}" ) if self.counter >= self.patience: self.early_stop = True else: self.best_score = score self.save_checkpoint(model) self.counter = 0 return self.early_stop def save_checkpoint(self, model): """Saves model when validation loss decrease.""" torch.save(model.state_dict(), "es_checkpoint.pt") def accuracy(logits, labels): _, indices = torch.max(logits, dim=1) correct = torch.sum(indices == labels) return correct.item() * 1.0 / len(labels) def evaluate(g, model, features, labels, mask): model.eval() with torch.no_grad(): logits = model(g, features) logits = logits[mask] labels = labels[mask] return accuracy(logits, labels) def main(args): # load and preprocess dataset if args.dataset == "cora": data = CoraGraphDataset() elif args.dataset == "citeseer": data = CiteseerGraphDataset() elif args.dataset == "pubmed": data = PubmedGraphDataset() else: raise ValueError("Unknown dataset: {}".format(args.dataset)) g = data[0] if args.gpu < 0: cuda = False else: cuda = True g = g.int().to(args.gpu) features = g.ndata["feat"] labels = g.ndata["label"] train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] num_feats = features.shape[1] n_classes = data.num_classes n_edges = g.num_edges() print( """----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % ( n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item(), ) ) # add self loop g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) n_edges = g.num_edges() # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = GATv2( args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.in_drop, args.attn_drop, args.negative_slope, args.residual, ) print(model) if args.early_stop: stopper = EarlyStopping(patience=100) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.weight_decay ) # initialize graph mean = 0 for epoch in range(args.epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: mean = (mean * (epoch - 3) + (time.time() - t0)) / (epoch - 2) train_acc = accuracy(logits[train_mask], labels[train_mask]) if args.fastmode: val_acc = accuracy(logits[val_mask], labels[val_mask]) else: val_acc = evaluate(g, model, features, labels, val_mask) if args.early_stop: if stopper.step(val_acc, model): break print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |" " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format( epoch, mean, loss.item(), train_acc, val_acc, n_edges / mean / 1000, ) ) print() if args.early_stop: model.load_state_dict( torch.load("es_checkpoint.pt", weights_only=False) ) acc = evaluate(g, model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc)) if __name__ == "__main__": parser = argparse.ArgumentParser(description="GAT") register_data_args(parser) parser.add_argument( "--gpu", type=int, default=-1, help="which GPU to use. Set -1 to use CPU.", ) parser.add_argument( "--epochs", type=int, default=200, help="number of training epochs" ) parser.add_argument( "--num-heads", type=int, default=8, help="number of hidden attention heads", ) parser.add_argument( "--num-out-heads", type=int, default=1, help="number of output attention heads", ) parser.add_argument( "--num-layers", type=int, default=1, help="number of hidden layers" ) parser.add_argument( "--num-hidden", type=int, default=8, help="number of hidden units" ) parser.add_argument( "--residual", action="store_true", default=False, help="use residual connection", ) parser.add_argument( "--in-drop", type=float, default=0.7, help="input feature dropout" ) parser.add_argument( "--attn-drop", type=float, default=0.7, help="attention dropout" ) parser.add_argument("--lr", type=float, default=0.005, help="learning rate") parser.add_argument( "--weight-decay", type=float, default=5e-4, help="weight decay" ) parser.add_argument( "--negative-slope", type=float, default=0.2, help="the negative slope of leaky relu", ) parser.add_argument( "--early-stop", action="store_true", default=False, help="indicates whether to use early stop or not", ) parser.add_argument( "--fastmode", action="store_true", default=False, help="skip re-evaluate the validation set", ) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/pytorch/gcmc/README.md ================================================ # Graph Convolutional Matrix Completion Paper link: [https://arxiv.org/abs/1706.02263](https://arxiv.org/abs/1706.02263) Author's code: [https://github.com/riannevdberg/gc-mc](https://github.com/riannevdberg/gc-mc) The implementation does not handle side-channel features and mini-epoching and thus achieves slightly worse performance when using node features. Credit: Jiani Zhang ([@jennyzhang0215](https://github.com/jennyzhang0215)) ## Dependencies * PyTorch 1.2+ * pandas * torchtext 0.9+ (if using user and item contents as node features) * spacy (if using user and item contents as node features) - You will also need to run `python -m spacy download en_core_web_sm` ## Data Supported datasets: ml-100k, ml-1m, ml-10m ## How to run ### Train with full-graph ml-100k, no feature ```bash python3 train.py --data_name=ml-100k --use_one_hot_fea --gcn_agg_accum=stack ``` Results: RMSE=0.9088 (0.910 reported) ml-100k, with feature ```bash python3 train.py --data_name=ml-100k --gcn_agg_accum=stack ``` Results: RMSE=0.9448 (0.905 reported) ml-1m, no feature ```bash python3 train.py --data_name=ml-1m --gcn_agg_accum=sum --use_one_hot_fea ``` Results: RMSE=0.8377 (0.832 reported) ml-10m, no feature ```bash python3 train.py --data_name=ml-10m --gcn_agg_accum=stack --gcn_dropout=0.3 \ --train_lr=0.001 --train_min_lr=0.0001 --train_max_iter=15000 \ --use_one_hot_fea --gen_r_num_basis_func=4 ``` Results: RMSE=0.7800 (0.777 reported) Testbed: EC2 p3.2xlarge instance(Amazon Linux 2) ### Train with minibatch on a single GPU ml-100k, no feature ```bash python3 train_sampling.py --data_name=ml-100k \ --use_one_hot_fea \ --gcn_agg_accum=stack \ --gpu 0 ``` ml-100k, no feature with mix_cpu_gpu run, for mix_cpu_gpu run with no feature, the W_r is stored in CPU by default other than in GPU. ```bash python3 train_sampling.py --data_name=ml-100k \ --use_one_hot_fea \ --gcn_agg_accum=stack \ --mix_cpu_gpu \ --gpu 0 ``` Results: RMSE=0.9380 ml-100k, with feature ```bash python3 train_sampling.py --data_name=ml-100k \ --gcn_agg_accum=stack \ --train_max_epoch 90 \ --gpu 0 ``` Results: RMSE=0.9574 ml-1m, no feature ```bash python3 train_sampling.py --data_name=ml-1m \ --gcn_agg_accum=sum \ --use_one_hot_fea \ --train_max_epoch 160 \ --gpu 0 ``` ml-1m, no feature with mix_cpu_gpu run ```bash python3 train_sampling.py --data_name=ml-1m \ --gcn_agg_accum=sum \ --use_one_hot_fea \ --train_max_epoch 60 \ --mix_cpu_gpu \ --gpu 0 ``` Results: RMSE=0.8632 ml-10m, no feature ```bash python3 train_sampling.py --data_name=ml-10m \ --gcn_agg_accum=stack \ --gcn_dropout=0.3 \ --train_lr=0.001 \ --train_min_lr=0.0001 \ --train_max_epoch=60 \ --use_one_hot_fea \ --gen_r_num_basis_func=4 \ --gpu 0 ``` ml-10m, no feature with mix_cpu_gpu run ```bash python3 train_sampling.py --data_name=ml-10m \ --gcn_agg_accum=stack \ --gcn_dropout=0.3 \ --train_lr=0.001 \ --train_min_lr=0.0001 \ --train_max_epoch=60 \ --use_one_hot_fea \ --gen_r_num_basis_func=4 \ --mix_cpu_gpu \ --gpu 0 ``` Results: RMSE=0.8050 Testbed: EC2 p3.2xlarge instance ### Train with minibatch on multi-GPU ml-100k, no feature ```bash python train_sampling.py --data_name=ml-100k \ --gcn_agg_accum=stack \ --train_max_epoch 30 \ --train_lr 0.02 \ --use_one_hot_fea \ --gpu 0,1,2,3,4,5,6,7 ``` ml-100k, no feature with mix_cpu_gpu run ```bash python train_sampling.py --data_name=ml-100k \ --gcn_agg_accum=stack \ --train_max_epoch 30 \ --train_lr 0.02 \ --use_one_hot_fea \ --mix_cpu_gpu \ --gpu 0,1,2,3,4,5,6,7 ``` Result: RMSE=0.9397 ml-100k, with feature ```bash python train_sampling.py --data_name=ml-100k \ --gcn_agg_accum=stack \ --train_max_epoch 30 \ --gpu 0,1,2,3,4,5,6,7 ``` Result: RMSE=0.9655 ml-1m, no feature ```bash python train_sampling.py --data_name=ml-1m \ --gcn_agg_accum=sum \ --train_max_epoch 40 \ --use_one_hot_fea \ --gpu 0,1,2,3,4,5,6,7 ``` ml-1m, no feature with mix_cpu_gpu run ```bash python train_sampling.py --data_name=ml-1m \ --gcn_agg_accum=sum \ --train_max_epoch 40 \ --use_one_hot_fea \ --mix_cpu_gpu \ --gpu 0,1,2,3,4,5,6,7 ``` Results: RMSE=0.8621 ml-10m, no feature ```bash python train_sampling.py --data_name=ml-10m \ --gcn_agg_accum=stack \ --gcn_dropout=0.3 \ --train_lr=0.001 \ --train_min_lr=0.0001 \ --train_max_epoch=30 \ --use_one_hot_fea \ --gen_r_num_basis_func=4 \ --gpu 0,1,2,3,4,5,6,7 ``` ml-10m, no feature with mix_cpu_gpu run ```bash python train_sampling.py --data_name=ml-10m \ --gcn_agg_accum=stack \ --gcn_dropout=0.3 \ --train_lr=0.001 \ --train_min_lr=0.0001 \ --train_max_epoch=30 \ --use_one_hot_fea \ --gen_r_num_basis_func=4 \ --mix_cpu_gpu \ --gpu 0,1,2,3,4,5,6,7 ``` Results: RMSE=0.8084 Testbed: EC2 p3.16xlarge instance ### Train with minibatch on CPU ml-100k, no feature ```bash python3 train_sampling.py --data_name=ml-100k \ --use_one_hot_fea \ --gcn_agg_accum=stack \ --gpu -1 ``` Testbed: EC2 r5.xlarge instance ================================================ FILE: examples/pytorch/gcmc/data.py ================================================ """MovieLens dataset""" import os import re import dgl import numpy as np import pandas as pd import scipy.sparse as sp import torch as th from dgl.data.utils import download, extract_archive, get_download_dir from utils import to_etype_name _urls = { "ml-100k": "http://files.grouplens.org/datasets/movielens/ml-100k.zip", "ml-1m": "http://files.grouplens.org/datasets/movielens/ml-1m.zip", "ml-10m": "http://files.grouplens.org/datasets/movielens/ml-10m.zip", } READ_DATASET_PATH = get_download_dir() GENRES_ML_100K = [ "unknown", "Action", "Adventure", "Animation", "Children", "Comedy", "Crime", "Documentary", "Drama", "Fantasy", "Film-Noir", "Horror", "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller", "War", "Western", ] GENRES_ML_1M = GENRES_ML_100K[1:] GENRES_ML_10M = GENRES_ML_100K + ["IMAX"] class MovieLens(object): """MovieLens dataset used by GCMC model TODO(minjie): make this dataset more general The dataset stores MovieLens ratings in two types of graphs. The encoder graph contains rating value information in the form of edge types. The decoder graph stores plain user-movie pairs in the form of a bipartite graph with no rating information. All graphs have two types of nodes: "user" and "movie". The training, validation and test set can be summarized as follows: training_enc_graph : training user-movie pairs + rating info training_dec_graph : training user-movie pairs valid_enc_graph : training user-movie pairs + rating info valid_dec_graph : validation user-movie pairs test_enc_graph : training user-movie pairs + validation user-movie pairs + rating info test_dec_graph : test user-movie pairs Attributes ---------- train_enc_graph : dgl.DGLGraph Encoder graph for training. train_dec_graph : dgl.DGLGraph Decoder graph for training. train_labels : torch.Tensor The categorical label of each user-movie pair train_truths : torch.Tensor The actual rating values of each user-movie pair valid_enc_graph : dgl.DGLGraph Encoder graph for validation. valid_dec_graph : dgl.DGLGraph Decoder graph for validation. valid_labels : torch.Tensor The categorical label of each user-movie pair valid_truths : torch.Tensor The actual rating values of each user-movie pair test_enc_graph : dgl.DGLGraph Encoder graph for test. test_dec_graph : dgl.DGLGraph Decoder graph for test. test_labels : torch.Tensor The categorical label of each user-movie pair test_truths : torch.Tensor The actual rating values of each user-movie pair user_feature : torch.Tensor User feature tensor. If None, representing an identity matrix. movie_feature : torch.Tensor Movie feature tensor. If None, representing an identity matrix. possible_rating_values : np.ndarray Available rating values in the dataset Parameters ---------- name : str Dataset name. Could be "ml-100k", "ml-1m", "ml-10m" device : torch.device Device context mix_cpu_gpu : boo, optional If true, the ``user_feature`` attribute is stored in CPU use_one_hot_fea : bool, optional If true, the ``user_feature`` attribute is None, representing an one-hot identity matrix. (Default: False) symm : bool, optional If true, the use symmetric normalize constant. Otherwise, use left normalize constant. (Default: True) test_ratio : float, optional Ratio of test data valid_ratio : float, optional Ratio of validation data """ def __init__( self, name, device, mix_cpu_gpu=False, use_one_hot_fea=False, symm=True, test_ratio=0.1, valid_ratio=0.1, ): self._name = name self._device = device self._symm = symm self._test_ratio = test_ratio self._valid_ratio = valid_ratio # download and extract download_dir = get_download_dir() zip_file_path = "{}/{}.zip".format(download_dir, name) download(_urls[name], path=zip_file_path) extract_archive(zip_file_path, "{}/{}".format(download_dir, name)) if name == "ml-10m": root_folder = "ml-10M100K" else: root_folder = name self._dir = os.path.join(download_dir, name, root_folder) print("Starting processing {} ...".format(self._name)) self._load_raw_user_info() self._load_raw_movie_info() print("......") if self._name == "ml-100k": self.all_train_rating_info = self._load_raw_rates( os.path.join(self._dir, "u1.base"), "\t" ) self.test_rating_info = self._load_raw_rates( os.path.join(self._dir, "u1.test"), "\t" ) self.all_rating_info = pd.concat( [self.all_train_rating_info, self.test_rating_info] ) elif self._name == "ml-1m" or self._name == "ml-10m": self.all_rating_info = self._load_raw_rates( os.path.join(self._dir, "ratings.dat"), "::" ) num_test = int( np.ceil(self.all_rating_info.shape[0] * self._test_ratio) ) shuffled_idx = np.random.permutation(self.all_rating_info.shape[0]) self.test_rating_info = self.all_rating_info.iloc[ shuffled_idx[:num_test] ] self.all_train_rating_info = self.all_rating_info.iloc[ shuffled_idx[num_test:] ] else: raise NotImplementedError print("......") num_valid = int( np.ceil(self.all_train_rating_info.shape[0] * self._valid_ratio) ) shuffled_idx = np.random.permutation( self.all_train_rating_info.shape[0] ) self.valid_rating_info = self.all_train_rating_info.iloc[ shuffled_idx[:num_valid] ] self.train_rating_info = self.all_train_rating_info.iloc[ shuffled_idx[num_valid:] ] self.possible_rating_values = np.unique( self.train_rating_info["rating"].values ) print("All rating pairs : {}".format(self.all_rating_info.shape[0])) print( "\tAll train rating pairs : {}".format( self.all_train_rating_info.shape[0] ) ) print( "\t\tTrain rating pairs : {}".format( self.train_rating_info.shape[0] ) ) print( "\t\tValid rating pairs : {}".format( self.valid_rating_info.shape[0] ) ) print( "\tTest rating pairs : {}".format(self.test_rating_info.shape[0]) ) self.user_info = self._drop_unseen_nodes( orign_info=self.user_info, cmp_col_name="id", reserved_ids_set=set(self.all_rating_info["user_id"].values), label="user", ) self.movie_info = self._drop_unseen_nodes( orign_info=self.movie_info, cmp_col_name="id", reserved_ids_set=set(self.all_rating_info["movie_id"].values), label="movie", ) # Map user/movie to the global id self.global_user_id_map = { ele: i for i, ele in enumerate(self.user_info["id"]) } self.global_movie_id_map = { ele: i for i, ele in enumerate(self.movie_info["id"]) } print( "Total user number = {}, movie number = {}".format( len(self.global_user_id_map), len(self.global_movie_id_map) ) ) self._num_user = len(self.global_user_id_map) self._num_movie = len(self.global_movie_id_map) ### Generate features if use_one_hot_fea: self.user_feature = None self.movie_feature = None else: # if mix_cpu_gpu, we put features in CPU if mix_cpu_gpu: self.user_feature = th.FloatTensor(self._process_user_fea()) self.movie_feature = th.FloatTensor(self._process_movie_fea()) else: self.user_feature = th.FloatTensor(self._process_user_fea()).to( self._device ) self.movie_feature = th.FloatTensor( self._process_movie_fea() ).to(self._device) if self.user_feature is None: self.user_feature_shape = (self.num_user, self.num_user) self.movie_feature_shape = (self.num_movie, self.num_movie) else: self.user_feature_shape = self.user_feature.shape self.movie_feature_shape = self.movie_feature.shape info_line = "Feature dim: " info_line += "\nuser: {}".format(self.user_feature_shape) info_line += "\nmovie: {}".format(self.movie_feature_shape) print(info_line) ( all_train_rating_pairs, all_train_rating_values, ) = self._generate_pair_value(self.all_train_rating_info) train_rating_pairs, train_rating_values = self._generate_pair_value( self.train_rating_info ) valid_rating_pairs, valid_rating_values = self._generate_pair_value( self.valid_rating_info ) test_rating_pairs, test_rating_values = self._generate_pair_value( self.test_rating_info ) def _make_labels(ratings): labels = th.LongTensor( np.searchsorted(self.possible_rating_values, ratings) ).to(device) return labels self.train_enc_graph = self._generate_enc_graph( train_rating_pairs, train_rating_values, add_support=True ) self.train_dec_graph = self._generate_dec_graph(train_rating_pairs) self.train_labels = _make_labels(train_rating_values) self.train_truths = th.FloatTensor(train_rating_values).to(device) self.valid_enc_graph = self.train_enc_graph self.valid_dec_graph = self._generate_dec_graph(valid_rating_pairs) self.valid_labels = _make_labels(valid_rating_values) self.valid_truths = th.FloatTensor(valid_rating_values).to(device) self.test_enc_graph = self._generate_enc_graph( all_train_rating_pairs, all_train_rating_values, add_support=True ) self.test_dec_graph = self._generate_dec_graph(test_rating_pairs) self.test_labels = _make_labels(test_rating_values) self.test_truths = th.FloatTensor(test_rating_values).to(device) def _npairs(graph): rst = 0 for r in self.possible_rating_values: r = to_etype_name(r) rst += graph.num_edges(str(r)) return rst print( "Train enc graph: \t#user:{}\t#movie:{}\t#pairs:{}".format( self.train_enc_graph.num_nodes("user"), self.train_enc_graph.num_nodes("movie"), _npairs(self.train_enc_graph), ) ) print( "Train dec graph: \t#user:{}\t#movie:{}\t#pairs:{}".format( self.train_dec_graph.num_nodes("user"), self.train_dec_graph.num_nodes("movie"), self.train_dec_graph.num_edges(), ) ) print( "Valid enc graph: \t#user:{}\t#movie:{}\t#pairs:{}".format( self.valid_enc_graph.num_nodes("user"), self.valid_enc_graph.num_nodes("movie"), _npairs(self.valid_enc_graph), ) ) print( "Valid dec graph: \t#user:{}\t#movie:{}\t#pairs:{}".format( self.valid_dec_graph.num_nodes("user"), self.valid_dec_graph.num_nodes("movie"), self.valid_dec_graph.num_edges(), ) ) print( "Test enc graph: \t#user:{}\t#movie:{}\t#pairs:{}".format( self.test_enc_graph.num_nodes("user"), self.test_enc_graph.num_nodes("movie"), _npairs(self.test_enc_graph), ) ) print( "Test dec graph: \t#user:{}\t#movie:{}\t#pairs:{}".format( self.test_dec_graph.num_nodes("user"), self.test_dec_graph.num_nodes("movie"), self.test_dec_graph.num_edges(), ) ) def _generate_pair_value(self, rating_info): rating_pairs = ( np.array( [ self.global_user_id_map[ele] for ele in rating_info["user_id"] ], dtype=np.int64, ), np.array( [ self.global_movie_id_map[ele] for ele in rating_info["movie_id"] ], dtype=np.int64, ), ) rating_values = rating_info["rating"].values.astype(np.float32) return rating_pairs, rating_values def _generate_enc_graph( self, rating_pairs, rating_values, add_support=False ): user_movie_R = np.zeros( (self._num_user, self._num_movie), dtype=np.float32 ) user_movie_R[rating_pairs] = rating_values data_dict = dict() num_nodes_dict = {"user": self._num_user, "movie": self._num_movie} rating_row, rating_col = rating_pairs for rating in self.possible_rating_values: ridx = np.where(rating_values == rating) rrow = rating_row[ridx] rcol = rating_col[ridx] rating = to_etype_name(rating) data_dict.update( { ("user", str(rating), "movie"): (rrow, rcol), ("movie", "rev-%s" % str(rating), "user"): (rcol, rrow), } ) graph = dgl.heterograph(data_dict, num_nodes_dict=num_nodes_dict) # sanity check assert ( len(rating_pairs[0]) == sum([graph.num_edges(et) for et in graph.etypes]) // 2 ) if add_support: def _calc_norm(x): x = x.numpy().astype("float32") x[x == 0.0] = np.inf x = th.FloatTensor(1.0 / np.sqrt(x)) return x.unsqueeze(1) user_ci = [] user_cj = [] movie_ci = [] movie_cj = [] for r in self.possible_rating_values: r = to_etype_name(r) user_ci.append(graph["rev-%s" % r].in_degrees()) movie_ci.append(graph[r].in_degrees()) if self._symm: user_cj.append(graph[r].out_degrees()) movie_cj.append(graph["rev-%s" % r].out_degrees()) else: user_cj.append(th.zeros((self.num_user,))) movie_cj.append(th.zeros((self.num_movie,))) user_ci = _calc_norm(sum(user_ci)) movie_ci = _calc_norm(sum(movie_ci)) if self._symm: user_cj = _calc_norm(sum(user_cj)) movie_cj = _calc_norm(sum(movie_cj)) else: user_cj = th.ones( self.num_user, ) movie_cj = th.ones( self.num_movie, ) graph.nodes["user"].data.update({"ci": user_ci, "cj": user_cj}) graph.nodes["movie"].data.update({"ci": movie_ci, "cj": movie_cj}) return graph def _generate_dec_graph(self, rating_pairs): ones = np.ones_like(rating_pairs[0]) user_movie_ratings_coo = sp.coo_matrix( (ones, rating_pairs), shape=(self.num_user, self.num_movie), dtype=np.float32, ) g = dgl.bipartite_from_scipy( user_movie_ratings_coo, utype="_U", etype="_E", vtype="_V" ) return dgl.heterograph( {("user", "rate", "movie"): g.edges()}, num_nodes_dict={"user": self.num_user, "movie": self.num_movie}, ) @property def num_links(self): return self.possible_rating_values.size @property def num_user(self): return self._num_user @property def num_movie(self): return self._num_movie def _drop_unseen_nodes( self, orign_info, cmp_col_name, reserved_ids_set, label ): # print(" -----------------") # print("{}: {}(reserved) v.s. {}(from info)".format(label, len(reserved_ids_set), # len(set(orign_info[cmp_col_name].values)))) if reserved_ids_set != set(orign_info[cmp_col_name].values): pd_rating_ids = pd.DataFrame( list(reserved_ids_set), columns=["id_graph"] ) # print("\torign_info: ({}, {})".format(orign_info.shape[0], orign_info.shape[1])) data_info = orign_info.merge( pd_rating_ids, left_on=cmp_col_name, right_on="id_graph", how="outer", ) data_info = data_info.dropna(subset=[cmp_col_name, "id_graph"]) data_info = data_info.drop(columns=["id_graph"]) data_info = data_info.reset_index(drop=True) # print("\tAfter dropping, data shape: ({}, {})".format(data_info.shape[0], data_info.shape[1])) return data_info else: orign_info = orign_info.reset_index(drop=True) return orign_info def _load_raw_rates(self, file_path, sep): """In MovieLens, the rates have the following format ml-100k user id \t movie id \t rating \t timestamp ml-1m/10m UserID::MovieID::Rating::Timestamp timestamp is unix timestamp and can be converted by pd.to_datetime(X, unit='s') Parameters ---------- file_path : str Returns ------- rating_info : pd.DataFrame """ rating_info = pd.read_csv( file_path, sep=sep, header=None, names=["user_id", "movie_id", "rating", "timestamp"], dtype={ "user_id": np.int32, "movie_id": np.int32, "ratings": np.float32, "timestamp": np.int64, }, engine="python", ) return rating_info def _load_raw_user_info(self): """In MovieLens, the user attributes file have the following formats: ml-100k: user id | age | gender | occupation | zip code ml-1m: UserID::Gender::Age::Occupation::Zip-code For ml-10m, there is no user information. We read the user id from the rating file. Parameters ---------- name : str Returns ------- user_info : pd.DataFrame """ if self._name == "ml-100k": self.user_info = pd.read_csv( os.path.join(self._dir, "u.user"), sep="|", header=None, names=["id", "age", "gender", "occupation", "zip_code"], engine="python", ) elif self._name == "ml-1m": self.user_info = pd.read_csv( os.path.join(self._dir, "users.dat"), sep="::", header=None, names=["id", "gender", "age", "occupation", "zip_code"], engine="python", ) elif self._name == "ml-10m": rating_info = pd.read_csv( os.path.join(self._dir, "ratings.dat"), sep="::", header=None, names=["user_id", "movie_id", "rating", "timestamp"], dtype={ "user_id": np.int32, "movie_id": np.int32, "ratings": np.float32, "timestamp": np.int64, }, engine="python", ) self.user_info = pd.DataFrame( np.unique(rating_info["user_id"].values.astype(np.int32)), columns=["id"], ) else: raise NotImplementedError def _process_user_fea(self): """ Parameters ---------- user_info : pd.DataFrame name : str For ml-100k and ml-1m, the column name is ['id', 'gender', 'age', 'occupation', 'zip_code']. We take the age, gender, and the one-hot encoding of the occupation as the user features. For ml-10m, there is no user feature and we set the feature to be a single zero. Returns ------- user_features : np.ndarray """ if self._name == "ml-100k" or self._name == "ml-1m": ages = self.user_info["age"].values.astype(np.float32) gender = (self.user_info["gender"] == "F").values.astype(np.float32) all_occupations = set(self.user_info["occupation"]) occupation_map = {ele: i for i, ele in enumerate(all_occupations)} occupation_one_hot = np.zeros( shape=(self.user_info.shape[0], len(all_occupations)), dtype=np.float32, ) occupation_one_hot[ np.arange(self.user_info.shape[0]), np.array( [ occupation_map[ele] for ele in self.user_info["occupation"] ] ), ] = 1 user_features = np.concatenate( [ ages.reshape((self.user_info.shape[0], 1)) / 50.0, gender.reshape((self.user_info.shape[0], 1)), occupation_one_hot, ], axis=1, ) elif self._name == "ml-10m": user_features = np.zeros( shape=(self.user_info.shape[0], 1), dtype=np.float32 ) else: raise NotImplementedError return user_features def _load_raw_movie_info(self): """In MovieLens, the movie attributes may have the following formats: In ml_100k: movie id | movie title | release date | video release date | IMDb URL | [genres] In ml_1m, ml_10m: MovieID::Title (Release Year)::Genres Also, Genres are separated by |, e.g., Adventure|Animation|Children|Comedy|Fantasy Parameters ---------- name : str Returns ------- movie_info : pd.DataFrame For ml-100k, the column name is ['id', 'title', 'release_date', 'video_release_date', 'url'] + [GENRES (19)]] For ml-1m and ml-10m, the column name is ['id', 'title'] + [GENRES (18/20)]] """ if self._name == "ml-100k": GENRES = GENRES_ML_100K elif self._name == "ml-1m": GENRES = GENRES_ML_1M elif self._name == "ml-10m": GENRES = GENRES_ML_10M else: raise NotImplementedError if self._name == "ml-100k": file_path = os.path.join(self._dir, "u.item") self.movie_info = pd.read_csv( file_path, sep="|", header=None, names=[ "id", "title", "release_date", "video_release_date", "url", ] + GENRES, encoding="iso-8859-1", ) elif self._name == "ml-1m" or self._name == "ml-10m": file_path = os.path.join(self._dir, "movies.dat") movie_info = pd.read_csv( file_path, sep="::", header=None, names=["id", "title", "genres"], encoding="iso-8859-1", engine="python", ) genre_map = {ele: i for i, ele in enumerate(GENRES)} genre_map["Children's"] = genre_map["Children"] genre_map["Childrens"] = genre_map["Children"] movie_genres = np.zeros( shape=(movie_info.shape[0], len(GENRES)), dtype=np.float32 ) for i, genres in enumerate(movie_info["genres"]): for ele in genres.split("|"): if ele in genre_map: movie_genres[i, genre_map[ele]] = 1.0 else: print( "genres not found, filled with unknown: {}".format( genres ) ) movie_genres[i, genre_map["unknown"]] = 1.0 for idx, genre_name in enumerate(GENRES): assert idx == genre_map[genre_name] movie_info[genre_name] = movie_genres[:, idx] self.movie_info = movie_info.drop(columns=["genres"]) else: raise NotImplementedError def _process_movie_fea(self): """ Parameters ---------- movie_info : pd.DataFrame name : str Returns ------- movie_features : np.ndarray Generate movie features by concatenating embedding and the year """ import torchtext from torchtext.data.utils import get_tokenizer if self._name == "ml-100k": GENRES = GENRES_ML_100K elif self._name == "ml-1m": GENRES = GENRES_ML_1M elif self._name == "ml-10m": GENRES = GENRES_ML_10M else: raise NotImplementedError # Old torchtext-legacy API commented below # TEXT = torchtext.legacy.data.Field(tokenize='spacy', tokenizer_language='en_core_web_sm') tokenizer = get_tokenizer( "spacy", language="en_core_web_sm" ) # new API (torchtext 0.9+) embedding = torchtext.vocab.GloVe(name="840B", dim=300) title_embedding = np.zeros( shape=(self.movie_info.shape[0], 300), dtype=np.float32 ) release_years = np.zeros( shape=(self.movie_info.shape[0], 1), dtype=np.float32 ) p = re.compile(r"(.+)\s*\((\d+)\)") for i, title in enumerate(self.movie_info["title"]): match_res = p.match(title) if match_res is None: print( "{} cannot be matched, index={}, name={}".format( title, i, self._name ) ) title_context, year = title, 1950 else: title_context, year = match_res.groups() # We use average of glove # Upgraded torchtext API: TEXT.tokenize(title_context) --> tokenizer(title_context) title_embedding[i, :] = ( embedding.get_vecs_by_tokens(tokenizer(title_context)) .numpy() .mean(axis=0) ) release_years[i] = float(year) movie_features = np.concatenate( ( title_embedding, (release_years - 1950.0) / 100.0, self.movie_info[GENRES], ), axis=1, ) return movie_features if __name__ == "__main__": MovieLens("ml-100k", device=th.device("cpu"), symm=True) ================================================ FILE: examples/pytorch/gcmc/model.py ================================================ """NN modules""" import dgl.function as fn import dgl.nn.pytorch as dglnn import torch as th import torch.nn as nn from torch.nn import init from utils import get_activation, to_etype_name class GCMCGraphConv(nn.Module): """Graph convolution module used in the GCMC model. Parameters ---------- in_feats : int Input feature size. out_feats : int Output feature size. weight : bool, optional If True, apply a linear layer. Otherwise, aggregating the messages without a weight matrix or with an shared weight provided by caller. device: str, optional Which device to put data in. Useful in mix_cpu_gpu training and multi-gpu training """ def __init__( self, in_feats, out_feats, weight=True, device=None, dropout_rate=0.0 ): super(GCMCGraphConv, self).__init__() self._in_feats = in_feats self._out_feats = out_feats self.device = device self.dropout = nn.Dropout(dropout_rate) if weight: self.weight = nn.Parameter(th.Tensor(in_feats, out_feats)) else: self.register_parameter("weight", None) self.reset_parameters() def reset_parameters(self): """Reinitialize learnable parameters.""" if self.weight is not None: init.xavier_uniform_(self.weight) def forward(self, graph, feat, weight=None): """Compute graph convolution. Normalizer constant :math:`c_{ij}` is stored as two node data "ci" and "cj". Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor The input feature weight : torch.Tensor, optional Optional external weight tensor. dropout : torch.nn.Dropout, optional Optional external dropout layer. Returns ------- torch.Tensor The output feature """ with graph.local_scope(): if isinstance(feat, tuple): feat, _ = feat # dst feature not used cj = graph.srcdata["cj"] ci = graph.dstdata["ci"] if self.device is not None: cj = cj.to(self.device) ci = ci.to(self.device) if weight is not None: if self.weight is not None: raise DGLError( "External weight is provided while at the same time the" " module has defined its own weight parameter. Please" " create the module with flag weight=False." ) else: weight = self.weight if weight is not None: feat = dot_or_identity(feat, weight, self.device) feat = feat * self.dropout(cj) graph.srcdata["h"] = feat graph.update_all( fn.copy_u(u="h", out="m"), fn.sum(msg="m", out="h") ) rst = graph.dstdata["h"] rst = rst * ci return rst class GCMCLayer(nn.Module): r"""GCMC layer .. math:: z_j^{(l+1)} = \sigma_{agg}\left[\mathrm{agg}\left( \sum_{j\in\mathcal{N}_1}\frac{1}{c_{ij}}W_1h_j, \ldots, \sum_{j\in\mathcal{N}_R}\frac{1}{c_{ij}}W_Rh_j \right)\right] After that, apply an extra output projection: .. math:: h_j^{(l+1)} = \sigma_{out}W_oz_j^{(l+1)} The equation is applied to both user nodes and movie nodes and the parameters are not shared unless ``share_user_item_param`` is true. Parameters ---------- rating_vals : list of int or float Possible rating values. user_in_units : int Size of user input feature movie_in_units : int Size of movie input feature msg_units : int Size of message :math:`W_rh_j` out_units : int Size of of final output user and movie features dropout_rate : float, optional Dropout rate (Default: 0.0) agg : str, optional Function to aggregate messages of different ratings. Could be any of the supported cross type reducers: "sum", "max", "min", "mean", "stack". (Default: "stack") agg_act : callable, str, optional Activation function :math:`sigma_{agg}`. (Default: None) out_act : callable, str, optional Activation function :math:`sigma_{agg}`. (Default: None) share_user_item_param : bool, optional If true, user node and movie node share the same set of parameters. Require ``user_in_units`` and ``move_in_units`` to be the same. (Default: False) device: str, optional Which device to put data in. Useful in mix_cpu_gpu training and multi-gpu training """ def __init__( self, rating_vals, user_in_units, movie_in_units, msg_units, out_units, dropout_rate=0.0, agg="stack", # or 'sum' agg_act=None, out_act=None, share_user_item_param=False, device=None, ): super(GCMCLayer, self).__init__() self.rating_vals = rating_vals self.agg = agg self.share_user_item_param = share_user_item_param self.ufc = nn.Linear(msg_units, out_units) if share_user_item_param: self.ifc = self.ufc else: self.ifc = nn.Linear(msg_units, out_units) if agg == "stack": # divide the original msg unit size by number of ratings to keep # the dimensionality assert msg_units % len(rating_vals) == 0 msg_units = msg_units // len(rating_vals) self.dropout = nn.Dropout(dropout_rate) self.W_r = nn.ParameterDict() subConv = {} for rating in rating_vals: # PyTorch parameter name can't contain "." rating = to_etype_name(rating) rev_rating = "rev-%s" % rating if share_user_item_param and user_in_units == movie_in_units: self.W_r[rating] = nn.Parameter( th.randn(user_in_units, msg_units) ) self.W_r["rev-%s" % rating] = self.W_r[rating] subConv[rating] = GCMCGraphConv( user_in_units, msg_units, weight=False, device=device, dropout_rate=dropout_rate, ) subConv[rev_rating] = GCMCGraphConv( user_in_units, msg_units, weight=False, device=device, dropout_rate=dropout_rate, ) else: self.W_r = None subConv[rating] = GCMCGraphConv( user_in_units, msg_units, weight=True, device=device, dropout_rate=dropout_rate, ) subConv[rev_rating] = GCMCGraphConv( movie_in_units, msg_units, weight=True, device=device, dropout_rate=dropout_rate, ) self.conv = dglnn.HeteroGraphConv(subConv, aggregate=agg) self.agg_act = get_activation(agg_act) self.out_act = get_activation(out_act) self.device = device self.reset_parameters() def partial_to(self, device): """Put parameters into device except W_r Parameters ---------- device : torch device Which device the parameters are put in. """ assert device == self.device if device is not None: self.ufc.cuda(device) if self.share_user_item_param is False: self.ifc.cuda(device) self.dropout.cuda(device) def reset_parameters(self): for p in self.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) def forward(self, graph, ufeat=None, ifeat=None): """Forward function Parameters ---------- graph : DGLGraph User-movie rating graph. It should contain two node types: "user" and "movie" and many edge types each for one rating value. ufeat : torch.Tensor, optional User features. If None, using an identity matrix. ifeat : torch.Tensor, optional Movie features. If None, using an identity matrix. Returns ------- new_ufeat : torch.Tensor New user features new_ifeat : torch.Tensor New movie features """ in_feats = {"user": ufeat, "movie": ifeat} mod_args = {} for i, rating in enumerate(self.rating_vals): rating = to_etype_name(rating) rev_rating = "rev-%s" % rating mod_args[rating] = ( self.W_r[rating] if self.W_r is not None else None, ) mod_args[rev_rating] = ( self.W_r[rev_rating] if self.W_r is not None else None, ) out_feats = self.conv(graph, in_feats, mod_args=mod_args) ufeat = out_feats["user"] ifeat = out_feats["movie"] ufeat = ufeat.view(ufeat.shape[0], -1) ifeat = ifeat.view(ifeat.shape[0], -1) # fc and non-linear ufeat = self.agg_act(ufeat) ifeat = self.agg_act(ifeat) ufeat = self.dropout(ufeat) ifeat = self.dropout(ifeat) ufeat = self.ufc(ufeat) ifeat = self.ifc(ifeat) return self.out_act(ufeat), self.out_act(ifeat) class BiDecoder(nn.Module): r"""Bi-linear decoder. Given a bipartite graph G, for each edge (i, j) ~ G, compute the likelihood of it being class r by: .. math:: p(M_{ij}=r) = \text{softmax}(u_i^TQ_rv_j) The trainable parameter :math:`Q_r` is further decomposed to a linear combination of basis weight matrices :math:`P_s`: .. math:: Q_r = \sum_{s=1}^{b} a_{rs}P_s Parameters ---------- in_units : int Size of input user and movie features num_classes : int Number of classes. num_basis : int, optional Number of basis. (Default: 2) dropout_rate : float, optional Dropout raite (Default: 0.0) """ def __init__(self, in_units, num_classes, num_basis=2, dropout_rate=0.0): super(BiDecoder, self).__init__() self._num_basis = num_basis self.dropout = nn.Dropout(dropout_rate) self.Ps = nn.ParameterList( nn.Parameter(th.randn(in_units, in_units)) for _ in range(num_basis) ) self.combine_basis = nn.Linear(self._num_basis, num_classes, bias=False) self.reset_parameters() def reset_parameters(self): for p in self.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) def forward(self, graph, ufeat, ifeat): """Forward function. Parameters ---------- graph : DGLGraph "Flattened" user-movie graph with only one edge type. ufeat : th.Tensor User embeddings. Shape: (|V_u|, D) ifeat : th.Tensor Movie embeddings. Shape: (|V_m|, D) Returns ------- th.Tensor Predicting scores for each user-movie edge. """ with graph.local_scope(): ufeat = self.dropout(ufeat) ifeat = self.dropout(ifeat) graph.nodes["movie"].data["h"] = ifeat basis_out = [] for i in range(self._num_basis): graph.nodes["user"].data["h"] = ufeat @ self.Ps[i] graph.apply_edges(fn.u_dot_v("h", "h", "sr")) basis_out.append(graph.edata["sr"]) out = th.cat(basis_out, dim=1) out = self.combine_basis(out) return out class DenseBiDecoder(nn.Module): r"""Dense bi-linear decoder. Dense implementation of the bi-linear decoder used in GCMC. Suitable when the graph can be efficiently represented by a pair of arrays (one for source nodes; one for destination nodes). Parameters ---------- in_units : int Size of input user and movie features num_classes : int Number of classes. num_basis : int, optional Number of basis. (Default: 2) dropout_rate : float, optional Dropout raite (Default: 0.0) """ def __init__(self, in_units, num_classes, num_basis=2, dropout_rate=0.0): super().__init__() self._num_basis = num_basis self.dropout = nn.Dropout(dropout_rate) self.P = nn.Parameter(th.randn(num_basis, in_units, in_units)) self.combine_basis = nn.Linear(self._num_basis, num_classes, bias=False) self.reset_parameters() def reset_parameters(self): for p in self.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) def forward(self, ufeat, ifeat): """Forward function. Compute logits for each pair ``(ufeat[i], ifeat[i])``. Parameters ---------- ufeat : th.Tensor User embeddings. Shape: (B, D) ifeat : th.Tensor Movie embeddings. Shape: (B, D) Returns ------- th.Tensor Predicting scores for each user-movie edge. Shape: (B, num_classes) """ ufeat = self.dropout(ufeat) ifeat = self.dropout(ifeat) out = th.einsum("ai,bij,aj->ab", ufeat, self.P, ifeat) out = self.combine_basis(out) return out def dot_or_identity(A, B, device=None): # if A is None, treat as identity matrix if A is None: return B elif len(A.shape) == 1: if device is None: return B[A] else: return B[A].to(device) else: return A @ B ================================================ FILE: examples/pytorch/gcmc/train.py ================================================ """Training GCMC model on the MovieLens data set. The script loads the full graph to the training device. """ import argparse import logging import os import random import string import time import numpy as np import torch as th import torch.nn as nn from data import MovieLens from model import BiDecoder, GCMCLayer from utils import ( get_activation, get_optimizer, MetricLogger, torch_net_info, torch_total_param_num, ) class Net(nn.Module): def __init__(self, args): super(Net, self).__init__() self._act = get_activation(args.model_activation) self.encoder = GCMCLayer( args.rating_vals, args.src_in_units, args.dst_in_units, args.gcn_agg_units, args.gcn_out_units, args.gcn_dropout, args.gcn_agg_accum, agg_act=self._act, share_user_item_param=args.share_param, device=args.device, ) self.decoder = BiDecoder( in_units=args.gcn_out_units, num_classes=len(args.rating_vals), num_basis=args.gen_r_num_basis_func, ) def forward(self, enc_graph, dec_graph, ufeat, ifeat): user_out, movie_out = self.encoder(enc_graph, ufeat, ifeat) pred_ratings = self.decoder(dec_graph, user_out, movie_out) return pred_ratings def evaluate(args, net, dataset, segment="valid"): possible_rating_values = dataset.possible_rating_values nd_possible_rating_values = th.FloatTensor(possible_rating_values).to( args.device ) if segment == "valid": rating_values = dataset.valid_truths enc_graph = dataset.valid_enc_graph dec_graph = dataset.valid_dec_graph elif segment == "test": rating_values = dataset.test_truths enc_graph = dataset.test_enc_graph dec_graph = dataset.test_dec_graph else: raise NotImplementedError # Evaluate RMSE net.eval() with th.no_grad(): pred_ratings = net( enc_graph, dec_graph, dataset.user_feature, dataset.movie_feature ) real_pred_ratings = ( th.softmax(pred_ratings, dim=1) * nd_possible_rating_values.view(1, -1) ).sum(dim=1) rmse = ((real_pred_ratings - rating_values) ** 2.0).mean().item() rmse = np.sqrt(rmse) return rmse def train(args): print(args) dataset = MovieLens( args.data_name, args.device, use_one_hot_fea=args.use_one_hot_fea, symm=args.gcn_agg_norm_symm, test_ratio=args.data_test_ratio, valid_ratio=args.data_valid_ratio, ) print("Loading data finished ...\n") args.src_in_units = dataset.user_feature_shape[1] args.dst_in_units = dataset.movie_feature_shape[1] args.rating_vals = dataset.possible_rating_values ### build the net net = Net(args=args) net = net.to(args.device) nd_possible_rating_values = th.FloatTensor( dataset.possible_rating_values ).to(args.device) rating_loss_net = nn.CrossEntropyLoss() learning_rate = args.train_lr optimizer = get_optimizer(args.train_optimizer)( net.parameters(), lr=learning_rate ) print("Loading network finished ...\n") ### perpare training data train_gt_labels = dataset.train_labels train_gt_ratings = dataset.train_truths ### prepare the logger train_loss_logger = MetricLogger( ["iter", "loss", "rmse"], ["%d", "%.4f", "%.4f"], os.path.join(args.save_dir, "train_loss%d.csv" % args.save_id), ) valid_loss_logger = MetricLogger( ["iter", "rmse"], ["%d", "%.4f"], os.path.join(args.save_dir, "valid_loss%d.csv" % args.save_id), ) test_loss_logger = MetricLogger( ["iter", "rmse"], ["%d", "%.4f"], os.path.join(args.save_dir, "test_loss%d.csv" % args.save_id), ) ### declare the loss information best_valid_rmse = np.inf no_better_valid = 0 best_iter = -1 count_rmse = 0 count_num = 0 count_loss = 0 dataset.train_enc_graph = dataset.train_enc_graph.int().to(args.device) dataset.train_dec_graph = dataset.train_dec_graph.int().to(args.device) dataset.valid_enc_graph = dataset.train_enc_graph dataset.valid_dec_graph = dataset.valid_dec_graph.int().to(args.device) dataset.test_enc_graph = dataset.test_enc_graph.int().to(args.device) dataset.test_dec_graph = dataset.test_dec_graph.int().to(args.device) print("Start training ...") dur = [] for iter_idx in range(1, args.train_max_iter): if iter_idx > 3: t0 = time.time() net.train() pred_ratings = net( dataset.train_enc_graph, dataset.train_dec_graph, dataset.user_feature, dataset.movie_feature, ) loss = rating_loss_net(pred_ratings, train_gt_labels).mean() count_loss += loss.item() optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(net.parameters(), args.train_grad_clip) optimizer.step() if iter_idx > 3: dur.append(time.time() - t0) if iter_idx == 1: print("Total #Param of net: %d" % (torch_total_param_num(net))) print( torch_net_info( net, save_path=os.path.join( args.save_dir, "net%d.txt" % args.save_id ), ) ) real_pred_ratings = ( th.softmax(pred_ratings, dim=1) * nd_possible_rating_values.view(1, -1) ).sum(dim=1) rmse = ((real_pred_ratings - train_gt_ratings) ** 2).sum() count_rmse += rmse.item() count_num += pred_ratings.shape[0] if iter_idx % args.train_log_interval == 0: train_loss_logger.log( iter=iter_idx, loss=count_loss / (iter_idx + 1), rmse=count_rmse / count_num, ) logging_str = "Iter={:4d}, loss={:.4f}, rmse={:.4f}".format( iter_idx, count_loss / iter_idx, count_rmse / count_num, ) if iter_idx > 3: logging_str += ", time={:.4f}".format(np.average(dur)) count_rmse = 0 count_num = 0 if iter_idx % args.train_valid_interval == 0: valid_rmse = evaluate( args=args, net=net, dataset=dataset, segment="valid" ) valid_loss_logger.log(iter=iter_idx, rmse=valid_rmse) logging_str += ",\tVal RMSE={:.4f}".format(valid_rmse) if valid_rmse < best_valid_rmse: best_valid_rmse = valid_rmse no_better_valid = 0 best_iter = iter_idx test_rmse = evaluate( args=args, net=net, dataset=dataset, segment="test" ) best_test_rmse = test_rmse test_loss_logger.log(iter=iter_idx, rmse=test_rmse) logging_str += ", Test RMSE={:.4f}".format(test_rmse) else: no_better_valid += 1 if ( no_better_valid > args.train_early_stopping_patience and learning_rate <= args.train_min_lr ): logging.info( "Early stopping threshold reached. Stop training." ) break if no_better_valid > args.train_decay_patience: new_lr = max( learning_rate * args.train_lr_decay_factor, args.train_min_lr, ) if new_lr < learning_rate: learning_rate = new_lr logging.info("\tChange the LR to %g" % new_lr) for p in optimizer.param_groups: p["lr"] = learning_rate no_better_valid = 0 if iter_idx % args.train_log_interval == 0: print(logging_str) print( "Best Iter Idx={}, Best Valid RMSE={:.4f}, Best Test RMSE={:.4f}".format( best_iter, best_valid_rmse, best_test_rmse ) ) train_loss_logger.close() valid_loss_logger.close() test_loss_logger.close() def config(): parser = argparse.ArgumentParser(description="GCMC") parser.add_argument("--seed", default=123, type=int) parser.add_argument( "--device", default="0", type=int, help="Running device. E.g `--device 0`, if using cpu, set `--device -1`", ) parser.add_argument("--save_dir", type=str, help="The saving directory") parser.add_argument("--save_id", type=int, help="The saving log id") parser.add_argument("--silent", action="store_true") parser.add_argument( "--data_name", default="ml-1m", type=str, help="The dataset name: ml-100k, ml-1m, ml-10m", ) parser.add_argument( "--data_test_ratio", type=float, default=0.1 ) ## for ml-100k the test ration is 0.2 parser.add_argument("--data_valid_ratio", type=float, default=0.1) parser.add_argument("--use_one_hot_fea", action="store_true", default=False) parser.add_argument("--model_activation", type=str, default="leaky") parser.add_argument("--gcn_dropout", type=float, default=0.7) parser.add_argument("--gcn_agg_norm_symm", type=bool, default=True) parser.add_argument("--gcn_agg_units", type=int, default=500) parser.add_argument("--gcn_agg_accum", type=str, default="sum") parser.add_argument("--gcn_out_units", type=int, default=75) parser.add_argument("--gen_r_num_basis_func", type=int, default=2) parser.add_argument("--train_max_iter", type=int, default=2000) parser.add_argument("--train_log_interval", type=int, default=1) parser.add_argument("--train_valid_interval", type=int, default=1) parser.add_argument("--train_optimizer", type=str, default="adam") parser.add_argument("--train_grad_clip", type=float, default=1.0) parser.add_argument("--train_lr", type=float, default=0.01) parser.add_argument("--train_min_lr", type=float, default=0.001) parser.add_argument("--train_lr_decay_factor", type=float, default=0.5) parser.add_argument("--train_decay_patience", type=int, default=50) parser.add_argument( "--train_early_stopping_patience", type=int, default=100 ) parser.add_argument("--share_param", default=False, action="store_true") args = parser.parse_args() args.device = ( th.device(args.device) if args.device >= 0 else th.device("cpu") ) ### configure save_fir to save all the info if args.save_dir is None: args.save_dir = ( args.data_name + "_" + "".join( random.choices(string.ascii_uppercase + string.digits, k=2) ) ) if args.save_id is None: args.save_id = np.random.randint(20) args.save_dir = os.path.join("log", args.save_dir) if not os.path.isdir(args.save_dir): os.makedirs(args.save_dir) return args if __name__ == "__main__": args = config() np.random.seed(args.seed) th.manual_seed(args.seed) if th.cuda.is_available(): th.cuda.manual_seed_all(args.seed) train(args) ================================================ FILE: examples/pytorch/gcmc/train_sampling.py ================================================ """Training GCMC model on the MovieLens data set by mini-batch sampling. The script loads the full graph in CPU and samples subgraphs for computing gradients on the training device. The script also supports multi-GPU for further acceleration. """ import argparse import logging import os, time import random import string import traceback import dgl import numpy as np import torch as th import torch.multiprocessing as mp import torch.nn as nn import tqdm from data import MovieLens from model import BiDecoder, DenseBiDecoder, GCMCLayer from torch.nn.parallel import DistributedDataParallel from torch.utils.data import DataLoader from utils import ( get_activation, get_optimizer, MetricLogger, to_etype_name, torch_net_info, torch_total_param_num, ) class Net(nn.Module): def __init__(self, args, dev_id): super(Net, self).__init__() self._act = get_activation(args.model_activation) self.encoder = GCMCLayer( args.rating_vals, args.src_in_units, args.dst_in_units, args.gcn_agg_units, args.gcn_out_units, args.gcn_dropout, args.gcn_agg_accum, agg_act=self._act, share_user_item_param=args.share_param, device=dev_id, ) if args.mix_cpu_gpu and args.use_one_hot_fea: # if use_one_hot_fea, user and movie feature is None # W can be extremely large, with mix_cpu_gpu W should be stored in CPU self.encoder.partial_to(dev_id) else: self.encoder.to(dev_id) self.decoder = BiDecoder( in_units=args.gcn_out_units, num_classes=len(args.rating_vals), num_basis=args.gen_r_num_basis_func, ) self.decoder.to(dev_id) def forward( self, compact_g, frontier, ufeat, ifeat, possible_rating_values ): user_out, movie_out = self.encoder(frontier, ufeat, ifeat) pred_ratings = self.decoder(compact_g, user_out, movie_out) return pred_ratings def load_subtensor(input_nodes, pair_graph, blocks, dataset, parent_graph): output_nodes = pair_graph.ndata[dgl.NID] head_feat = ( input_nodes["user"] if dataset.user_feature is None else dataset.user_feature[input_nodes["user"]] ) tail_feat = ( input_nodes["movie"] if dataset.movie_feature is None else dataset.movie_feature[input_nodes["movie"]] ) for block in blocks: block.dstnodes["user"].data["ci"] = parent_graph.nodes["user"].data[ "ci" ][block.dstnodes["user"].data[dgl.NID]] block.srcnodes["user"].data["cj"] = parent_graph.nodes["user"].data[ "cj" ][block.srcnodes["user"].data[dgl.NID]] block.dstnodes["movie"].data["ci"] = parent_graph.nodes["movie"].data[ "ci" ][block.dstnodes["movie"].data[dgl.NID]] block.srcnodes["movie"].data["cj"] = parent_graph.nodes["movie"].data[ "cj" ][block.srcnodes["movie"].data[dgl.NID]] return head_feat, tail_feat, blocks def flatten_etypes(pair_graph, dataset, segment): n_users = pair_graph.num_nodes("user") n_movies = pair_graph.num_nodes("movie") src = [] dst = [] labels = [] ratings = [] for rating in dataset.possible_rating_values: src_etype, dst_etype = pair_graph.edges( order="eid", etype=to_etype_name(rating) ) src.append(src_etype) dst.append(dst_etype) label = np.searchsorted(dataset.possible_rating_values, rating) ratings.append(th.LongTensor(np.full_like(src_etype, rating))) labels.append(th.LongTensor(np.full_like(src_etype, label))) src = th.cat(src) dst = th.cat(dst) ratings = th.cat(ratings) labels = th.cat(labels) flattened_pair_graph = dgl.heterograph( {("user", "rate", "movie"): (src, dst)}, num_nodes_dict={"user": n_users, "movie": n_movies}, ) flattened_pair_graph.edata["rating"] = ratings flattened_pair_graph.edata["label"] = labels return flattened_pair_graph def evaluate(args, dev_id, net, dataset, dataloader, segment="valid"): possible_rating_values = dataset.possible_rating_values nd_possible_rating_values = th.FloatTensor(possible_rating_values).to( dev_id ) real_pred_ratings = [] true_rel_ratings = [] for input_nodes, pair_graph, blocks in dataloader: head_feat, tail_feat, blocks = load_subtensor( input_nodes, pair_graph, blocks, dataset, dataset.valid_enc_graph if segment == "valid" else dataset.test_enc_graph, ) frontier = blocks[0] true_relation_ratings = ( dataset.valid_truths[pair_graph.edata[dgl.EID]] if segment == "valid" else dataset.test_truths[pair_graph.edata[dgl.EID]] ) frontier = frontier.to(dev_id) head_feat = head_feat.to(dev_id) tail_feat = tail_feat.to(dev_id) pair_graph = pair_graph.to(dev_id) with th.no_grad(): pred_ratings = net( pair_graph, frontier, head_feat, tail_feat, possible_rating_values, ) batch_pred_ratings = ( th.softmax(pred_ratings, dim=1) * nd_possible_rating_values.view(1, -1) ).sum(dim=1) real_pred_ratings.append(batch_pred_ratings) true_rel_ratings.append(true_relation_ratings) real_pred_ratings = th.cat(real_pred_ratings, dim=0) true_rel_ratings = th.cat(true_rel_ratings, dim=0).to(dev_id) rmse = ((real_pred_ratings - true_rel_ratings) ** 2.0).mean().item() rmse = np.sqrt(rmse) return rmse def config(): parser = argparse.ArgumentParser(description="GCMC") parser.add_argument("--seed", default=123, type=int) parser.add_argument("--gpu", type=str, default="0") parser.add_argument("--save_dir", type=str, help="The saving directory") parser.add_argument("--save_id", type=int, help="The saving log id") parser.add_argument("--silent", action="store_true") parser.add_argument( "--data_name", default="ml-1m", type=str, help="The dataset name: ml-100k, ml-1m, ml-10m", ) parser.add_argument( "--data_test_ratio", type=float, default=0.1 ) ## for ml-100k the test ration is 0.2 parser.add_argument("--data_valid_ratio", type=float, default=0.1) parser.add_argument("--use_one_hot_fea", action="store_true", default=False) parser.add_argument("--model_activation", type=str, default="leaky") parser.add_argument("--gcn_dropout", type=float, default=0.7) parser.add_argument("--gcn_agg_norm_symm", type=bool, default=True) parser.add_argument("--gcn_agg_units", type=int, default=500) parser.add_argument("--gcn_agg_accum", type=str, default="sum") parser.add_argument("--gcn_out_units", type=int, default=75) parser.add_argument("--gen_r_num_basis_func", type=int, default=2) parser.add_argument("--train_max_epoch", type=int, default=1000) parser.add_argument("--train_log_interval", type=int, default=1) parser.add_argument("--train_valid_interval", type=int, default=1) parser.add_argument("--train_optimizer", type=str, default="adam") parser.add_argument("--train_grad_clip", type=float, default=1.0) parser.add_argument("--train_lr", type=float, default=0.01) parser.add_argument("--train_min_lr", type=float, default=0.0001) parser.add_argument("--train_lr_decay_factor", type=float, default=0.5) parser.add_argument("--train_decay_patience", type=int, default=25) parser.add_argument("--train_early_stopping_patience", type=int, default=50) parser.add_argument("--share_param", default=False, action="store_true") parser.add_argument("--mix_cpu_gpu", default=False, action="store_true") parser.add_argument("--minibatch_size", type=int, default=20000) parser.add_argument("--num_workers_per_gpu", type=int, default=8) args = parser.parse_args() ### configure save_fir to save all the info if args.save_dir is None: args.save_dir = ( args.data_name + "_" + "".join( random.choices(string.ascii_uppercase + string.digits, k=2) ) ) if args.save_id is None: args.save_id = np.random.randint(20) args.save_dir = os.path.join("log", args.save_dir) if not os.path.isdir(args.save_dir): os.makedirs(args.save_dir) return args def run(proc_id, n_gpus, args, devices, dataset): dev_id = devices[proc_id] if n_gpus > 1: dist_init_method = "tcp://{master_ip}:{master_port}".format( master_ip="127.0.0.1", master_port="12345" ) world_size = n_gpus th.distributed.init_process_group( backend="nccl", init_method=dist_init_method, world_size=world_size, rank=dev_id, ) if n_gpus > 0: th.cuda.set_device(dev_id) train_labels = dataset.train_labels train_truths = dataset.train_truths num_edges = train_truths.shape[0] reverse_types = { to_etype_name(k): "rev-" + to_etype_name(k) for k in dataset.possible_rating_values } reverse_types.update({v: k for k, v in reverse_types.items()}) sampler = dgl.dataloading.MultiLayerNeighborSampler( [None], return_eids=True ) sampler = dgl.dataloading.as_edge_prediction_sampler(sampler) dataloader = dgl.dataloading.DataLoader( dataset.train_enc_graph, { to_etype_name(k): th.arange( dataset.train_enc_graph.num_edges(etype=to_etype_name(k)) ) for k in dataset.possible_rating_values }, sampler, use_ddp=n_gpus > 1, batch_size=args.minibatch_size, shuffle=True, drop_last=False, ) if proc_id == 0: valid_dataloader = dgl.dataloading.DataLoader( dataset.valid_dec_graph, th.arange(dataset.valid_dec_graph.num_edges()), sampler, g_sampling=dataset.valid_enc_graph, batch_size=args.minibatch_size, shuffle=False, drop_last=False, ) test_dataloader = dgl.dataloading.DataLoader( dataset.test_dec_graph, th.arange(dataset.test_dec_graph.num_edges()), sampler, g_sampling=dataset.test_enc_graph, batch_size=args.minibatch_size, shuffle=False, drop_last=False, ) nd_possible_rating_values = th.FloatTensor(dataset.possible_rating_values) nd_possible_rating_values = nd_possible_rating_values.to(dev_id) net = Net(args=args, dev_id=dev_id) net = net.to(dev_id) if n_gpus > 1: net = DistributedDataParallel( net, device_ids=[dev_id], output_device=dev_id ) rating_loss_net = nn.CrossEntropyLoss() learning_rate = args.train_lr optimizer = get_optimizer(args.train_optimizer)( net.parameters(), lr=learning_rate ) print("Loading network finished ...\n") ### declare the loss information best_valid_rmse = np.inf no_better_valid = 0 best_epoch = -1 count_rmse = 0 count_num = 0 count_loss = 0 print("Start training ...") dur = [] iter_idx = 1 for epoch in range(1, args.train_max_epoch): if n_gpus > 1: dataloader.set_epoch(epoch) if epoch > 1: t0 = time.time() net.train() with tqdm.tqdm(dataloader) as tq: for step, (input_nodes, pair_graph, blocks) in enumerate(tq): head_feat, tail_feat, blocks = load_subtensor( input_nodes, pair_graph, blocks, dataset, dataset.train_enc_graph, ) frontier = blocks[0] compact_g = flatten_etypes(pair_graph, dataset, "train").to( dev_id ) true_relation_labels = compact_g.edata["label"] true_relation_ratings = compact_g.edata["rating"] head_feat = head_feat.to(dev_id) tail_feat = tail_feat.to(dev_id) frontier = frontier.to(dev_id) pred_ratings = net( compact_g, frontier, head_feat, tail_feat, dataset.possible_rating_values, ) loss = rating_loss_net( pred_ratings, true_relation_labels.to(dev_id) ).mean() count_loss += loss.item() optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(net.parameters(), args.train_grad_clip) optimizer.step() if proc_id == 0 and iter_idx == 1: print( "Total #Param of net: %d" % (torch_total_param_num(net)) ) real_pred_ratings = ( th.softmax(pred_ratings, dim=1) * nd_possible_rating_values.view(1, -1) ).sum(dim=1) rmse = ( (real_pred_ratings - true_relation_ratings.to(dev_id)) ** 2 ).sum() count_rmse += rmse.item() count_num += pred_ratings.shape[0] tq.set_postfix( { "loss": "{:.4f}".format(count_loss / iter_idx), "rmse": "{:.4f}".format(count_rmse / count_num), }, refresh=False, ) iter_idx += 1 if epoch > 1: epoch_time = time.time() - t0 print("Epoch {} time {}".format(epoch, epoch_time)) if epoch % args.train_valid_interval == 0: if n_gpus > 1: th.distributed.barrier() if proc_id == 0: valid_rmse = evaluate( args=args, dev_id=dev_id, net=net, dataset=dataset, dataloader=valid_dataloader, segment="valid", ) logging_str = "Val RMSE={:.4f}".format(valid_rmse) if valid_rmse < best_valid_rmse: best_valid_rmse = valid_rmse no_better_valid = 0 best_epoch = epoch test_rmse = evaluate( args=args, dev_id=dev_id, net=net, dataset=dataset, dataloader=test_dataloader, segment="test", ) best_test_rmse = test_rmse logging_str += ", Test RMSE={:.4f}".format(test_rmse) else: no_better_valid += 1 if ( no_better_valid > args.train_early_stopping_patience and learning_rate <= args.train_min_lr ): logging.info( "Early stopping threshold reached. Stop training." ) break if no_better_valid > args.train_decay_patience: new_lr = max( learning_rate * args.train_lr_decay_factor, args.train_min_lr, ) if new_lr < learning_rate: logging.info("\tChange the LR to %g" % new_lr) learning_rate = new_lr for p in optimizer.param_groups: p["lr"] = learning_rate no_better_valid = 0 print("Change the LR to %g" % new_lr) # sync on evalution if n_gpus > 1: th.distributed.barrier() if proc_id == 0: print(logging_str) if proc_id == 0: print( "Best epoch Idx={}, Best Valid RMSE={:.4f}, Best Test RMSE={:.4f}".format( best_epoch, best_valid_rmse, best_test_rmse ) ) if __name__ == "__main__": args = config() devices = list(map(int, args.gpu.split(","))) n_gpus = len(devices) # For GCMC based on sampling, we require node has its own features. # Otherwise (node_id is the feature), the model can not scale dataset = MovieLens( args.data_name, "cpu", mix_cpu_gpu=args.mix_cpu_gpu, use_one_hot_fea=args.use_one_hot_fea, symm=args.gcn_agg_norm_symm, test_ratio=args.data_test_ratio, valid_ratio=args.data_valid_ratio, ) print("Loading data finished ...\n") args.src_in_units = dataset.user_feature_shape[1] args.dst_in_units = dataset.movie_feature_shape[1] args.rating_vals = dataset.possible_rating_values # cpu if devices[0] == -1: run(0, 0, args, ["cpu"], dataset) # gpu elif n_gpus == 1: run(0, n_gpus, args, devices, dataset) # multi gpu else: # Create csr/coo/csc formats before launching training processes with multi-gpu. # This avoids creating certain formats in each sub-process, which saves momory and CPU. dataset.train_enc_graph.create_formats_() dataset.train_dec_graph.create_formats_() mp.spawn(run, args=(n_gpus, args, devices, dataset), nprocs=n_gpus) ================================================ FILE: examples/pytorch/gcmc/utils.py ================================================ import csv import re from collections import OrderedDict import numpy as np import torch as th import torch.nn as nn import torch.optim as optim class MetricLogger(object): def __init__(self, attr_names, parse_formats, save_path): self._attr_format_dict = OrderedDict(zip(attr_names, parse_formats)) self._file = open(save_path, "w") self._csv = csv.writer(self._file) self._csv.writerow(attr_names) self._file.flush() def log(self, **kwargs): self._csv.writerow( [ parse_format % kwargs[attr_name] for attr_name, parse_format in self._attr_format_dict.items() ] ) self._file.flush() def close(self): self._file.close() def torch_total_param_num(net): return sum([np.prod(p.shape) for p in net.parameters()]) def torch_net_info(net, save_path=None): info_str = ( "Total Param Number: {}\n".format(torch_total_param_num(net)) + "Params:\n" ) for k, v in net.named_parameters(): info_str += "\t{}: {}, {}\n".format(k, v.shape, np.prod(v.shape)) info_str += str(net) if save_path is not None: with open(save_path, "w") as f: f.write(info_str) return info_str def get_activation(act): """Get the activation based on the act string Parameters ---------- act: str or callable function Returns ------- ret: callable function """ if act is None: return lambda x: x if isinstance(act, str): if act == "leaky": return nn.LeakyReLU(0.1) elif act == "relu": return nn.ReLU() elif act == "tanh": return nn.Tanh() elif act == "sigmoid": return nn.Sigmoid() elif act == "softsign": return nn.Softsign() else: raise NotImplementedError else: return act def get_optimizer(opt): if opt == "sgd": return optim.SGD elif opt == "adam": return optim.Adam else: raise NotImplementedError def to_etype_name(rating): return str(rating).replace(".", "_") ================================================ FILE: examples/pytorch/gcn/README.md ================================================ Graph Convolutional Networks (GCN) ============ - Paper link: [https://arxiv.org/abs/1609.02907](https://arxiv.org/abs/1609.02907) - Author's code repo: [https://github.com/tkipf/gcn](https://github.com/tkipf/gcn). How to run ------- ### DGL built-in GraphConv module Run with the following (available dataset: "cora", "citeseer", "pubmed") ```bash python3 train.py --dataset cora ``` Summary ------- * cora: ~0.810 (paper: 0.815) * citeseer: ~0.707 (paper: 0.703) * pubmed: ~0.792 (paper: 0.790) ================================================ FILE: examples/pytorch/gcn/train.py ================================================ import argparse import dgl import dgl.nn as dglnn import torch import torch.nn as nn import torch.nn.functional as F from dgl import AddSelfLoop from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset class GCN(nn.Module): def __init__(self, in_size, hid_size, out_size): super().__init__() self.layers = nn.ModuleList() # two-layer GCN self.layers.append( dglnn.GraphConv(in_size, hid_size, activation=F.relu) ) self.layers.append(dglnn.GraphConv(hid_size, out_size)) self.dropout = nn.Dropout(0.5) def forward(self, g, features): h = features for i, layer in enumerate(self.layers): if i != 0: h = self.dropout(h) h = layer(g, h) return h def evaluate(g, features, labels, mask, model): model.eval() with torch.no_grad(): logits = model(g, features) logits = logits[mask] labels = labels[mask] _, indices = torch.max(logits, dim=1) correct = torch.sum(indices == labels) return correct.item() * 1.0 / len(labels) def train(g, features, labels, masks, model): # define train/val samples, loss function and optimizer train_mask = masks[0] val_mask = masks[1] loss_fcn = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4) # training loop for epoch in range(200): model.train() logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() acc = evaluate(g, features, labels, val_mask, model) print( "Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} ".format( epoch, loss.item(), acc ) ) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--dataset", type=str, default="cora", help="Dataset name ('cora', 'citeseer', 'pubmed').", ) parser.add_argument( "--dt", type=str, default="float", help="data type(float, bfloat16)", ) args = parser.parse_args() print(f"Training with DGL built-in GraphConv module.") # load and preprocess dataset transform = ( AddSelfLoop() ) # by default, it will first remove self-loops to prevent duplication if args.dataset == "cora": data = CoraGraphDataset(transform=transform) elif args.dataset == "citeseer": data = CiteseerGraphDataset(transform=transform) elif args.dataset == "pubmed": data = PubmedGraphDataset(transform=transform) else: raise ValueError("Unknown dataset: {}".format(args.dataset)) g = data[0] device = torch.device("cuda" if torch.cuda.is_available() else "cpu") g = g.int().to(device) features = g.ndata["feat"] labels = g.ndata["label"] masks = g.ndata["train_mask"], g.ndata["val_mask"], g.ndata["test_mask"] # create GCN model in_size = features.shape[1] out_size = data.num_classes model = GCN(in_size, 16, out_size).to(device) # convert model and graph to bfloat16 if needed if args.dt == "bfloat16": g = dgl.to_bfloat16(g) features = features.to(dtype=torch.bfloat16) model = model.to(dtype=torch.bfloat16) # model training print("Training...") train(g, features, labels, masks, model) # test the model print("Testing...") acc = evaluate(g, features, labels, masks[2], model) print("Test accuracy {:.4f}".format(acc)) ================================================ FILE: examples/pytorch/geniepath/README.md ================================================ # DGL Implementation of the GeniePath Paper This DGL example implements the GNN model proposed in the paper [GeniePath: Graph Neural Networks with Adaptive Receptive Paths](https://arxiv.org/abs/1802.00910). Example implementor ---------------------- This example was implemented by [Kay Liu](https://github.com/kayzliu) during his SDE intern work at the AWS Shanghai AI Lab. Dependencies ---------------------- - Python 3.7.10 - PyTorch 1.8.1 - dgl 0.7.0 - scikit-learn 0.23.2 Dataset --------------------------------------- The datasets used for node classification are [Pubmed citation network dataset](https://docs.dgl.ai/api/python/dgl.data.html#dgl.data.PubmedGraphDataset) (tranductive) and [Protein-Protein Interaction dataset](https://docs.dgl.ai/api/python/dgl.data.html#dgl.data.PPIDataset) (inductive). How to run -------------------------------- If want to train on Pubmed (transductive), run ``` python pubmed.py ``` If want to use a GPU, run ``` python pubmed.py --gpu 0 ``` If want to train GeniePath-Lazy, run ``` python pubmed.py --lazy True ``` If want to train on PPI (inductive), run ``` python ppi.py ``` Performance ------------------------- Dataset: Pubmed (ACC) |Method | GeniePath| | ------ | ----------- | | Paper | 78.5% | | DGL | 73.0% | Dataset: PPI (micro-F1) |Method | GeniePath| GeniePath-lazy| GeniePath-lazy-residual| | ------ | ----------- | ------------- | ------------------ | | Paper | 0.9520 | 0.9790 | 0.9850 | | DGL | 0.9729 | 0.9802 | 0.9798 | ================================================ FILE: examples/pytorch/geniepath/model.py ================================================ import torch as th import torch.nn as nn from dgl.nn import GATConv from torch.nn import LSTM class GeniePathConv(nn.Module): def __init__(self, in_dim, hid_dim, out_dim, num_heads=1, residual=False): super(GeniePathConv, self).__init__() self.breadth_func = GATConv( in_dim, hid_dim, num_heads=num_heads, residual=residual ) self.depth_func = LSTM(hid_dim, out_dim) def forward(self, graph, x, h, c): x = self.breadth_func(graph, x) x = th.tanh(x) x = th.mean(x, dim=1) x, (h, c) = self.depth_func(x.unsqueeze(0), (h, c)) x = x[0] return x, (h, c) class GeniePath(nn.Module): def __init__( self, in_dim, out_dim, hid_dim=16, num_layers=2, num_heads=1, residual=False, ): super(GeniePath, self).__init__() self.hid_dim = hid_dim self.linear1 = nn.Linear(in_dim, hid_dim) self.linear2 = nn.Linear(hid_dim, out_dim) self.layers = nn.ModuleList() for i in range(num_layers): self.layers.append( GeniePathConv( hid_dim, hid_dim, hid_dim, num_heads=num_heads, residual=residual, ) ) def forward(self, graph, x): h = th.zeros(1, x.shape[0], self.hid_dim).to(x.device) c = th.zeros(1, x.shape[0], self.hid_dim).to(x.device) x = self.linear1(x) for layer in self.layers: x, (h, c) = layer(graph, x, h, c) x = self.linear2(x) return x class GeniePathLazy(nn.Module): def __init__( self, in_dim, out_dim, hid_dim=16, num_layers=2, num_heads=1, residual=False, ): super(GeniePathLazy, self).__init__() self.hid_dim = hid_dim self.linear1 = nn.Linear(in_dim, hid_dim) self.linear2 = th.nn.Linear(hid_dim, out_dim) self.breaths = nn.ModuleList() self.depths = nn.ModuleList() for i in range(num_layers): self.breaths.append( GATConv( hid_dim, hid_dim, num_heads=num_heads, residual=residual ) ) self.depths.append(LSTM(hid_dim * 2, hid_dim)) def forward(self, graph, x): h = th.zeros(1, x.shape[0], self.hid_dim).to(x.device) c = th.zeros(1, x.shape[0], self.hid_dim).to(x.device) x = self.linear1(x) h_tmps = [] for layer in self.breaths: h_tmps.append(th.mean(th.tanh(layer(graph, x)), dim=1)) x = x.unsqueeze(0) for h_tmp, layer in zip(h_tmps, self.depths): in_cat = th.cat((h_tmp.unsqueeze(0), x), -1) x, (h, c) = layer(in_cat, (h, c)) x = self.linear2(x[0]) return x ================================================ FILE: examples/pytorch/geniepath/ppi.py ================================================ import argparse import numpy as np import torch as th import torch.optim as optim from dgl.data import PPIDataset from dgl.dataloading import GraphDataLoader from model import GeniePath, GeniePathLazy from sklearn.metrics import f1_score def evaluate(model, loss_fn, dataloader, device="cpu"): loss = 0 f1 = 0 num_blocks = 0 for subgraph in dataloader: subgraph = subgraph.to(device) label = subgraph.ndata["label"].to(device) feat = subgraph.ndata["feat"] logits = model(subgraph, feat) # compute loss loss += loss_fn(logits, label).item() predict = np.where(logits.data.cpu().numpy() >= 0.0, 1, 0) f1 += f1_score(label.cpu(), predict, average="micro") num_blocks += 1 return f1 / num_blocks, loss / num_blocks def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # # Load dataset train_dataset = PPIDataset(mode="train") valid_dataset = PPIDataset(mode="valid") test_dataset = PPIDataset(mode="test") train_dataloader = GraphDataLoader( train_dataset, batch_size=args.batch_size ) valid_dataloader = GraphDataLoader( valid_dataset, batch_size=args.batch_size ) test_dataloader = GraphDataLoader(test_dataset, batch_size=args.batch_size) # check cuda if args.gpu >= 0 and th.cuda.is_available(): device = "cuda:{}".format(args.gpu) else: device = "cpu" num_classes = train_dataset.num_classes # Extract node features graph = train_dataset[0] feat = graph.ndata["feat"] # Step 2: Create model =================================================================== # if args.lazy: model = GeniePathLazy( in_dim=feat.shape[-1], out_dim=num_classes, hid_dim=args.hid_dim, num_layers=args.num_layers, num_heads=args.num_heads, residual=args.residual, ) else: model = GeniePath( in_dim=feat.shape[-1], out_dim=num_classes, hid_dim=args.hid_dim, num_layers=args.num_layers, num_heads=args.num_heads, residual=args.residual, ) model = model.to(device) # Step 3: Create training components ===================================================== # loss_fn = th.nn.BCEWithLogitsLoss() optimizer = optim.Adam(model.parameters(), lr=args.lr) # Step 4: training epochs =============================================================== # for epoch in range(args.max_epoch): model.train() tr_loss = 0 tr_f1 = 0 num_blocks = 0 for subgraph in train_dataloader: subgraph = subgraph.to(device) label = subgraph.ndata["label"] feat = subgraph.ndata["feat"] logits = model(subgraph, feat) # compute loss batch_loss = loss_fn(logits, label) tr_loss += batch_loss.item() tr_predict = np.where(logits.data.cpu().numpy() >= 0.0, 1, 0) tr_f1 += f1_score(label.cpu(), tr_predict, average="micro") num_blocks += 1 # backward optimizer.zero_grad() batch_loss.backward() optimizer.step() # validation model.eval() val_f1, val_loss = evaluate(model, loss_fn, valid_dataloader, device) print( "In epoch {}, Train F1: {:.4f} | Train Loss: {:.4f}; Valid F1: {:.4f} | Valid loss: {:.4f}".format( epoch, tr_f1 / num_blocks, tr_loss / num_blocks, val_f1, val_loss, ) ) # Test after all epoch model.eval() test_f1, test_loss = evaluate(model, loss_fn, test_dataloader, device) print("Test F1: {:.4f} | Test loss: {:.4f}".format(test_f1, test_loss)) if __name__ == "__main__": parser = argparse.ArgumentParser(description="GeniePath") parser.add_argument( "--gpu", type=int, default=-1, help="GPU Index. Default: -1, using CPU." ) parser.add_argument( "--hid_dim", type=int, default=256, help="Hidden layer dimension" ) parser.add_argument( "--num_layers", type=int, default=3, help="Number of GeniePath layers" ) parser.add_argument( "--max_epoch", type=int, default=1000, help="The max number of epochs. Default: 1000", ) parser.add_argument( "--lr", type=float, default=0.0004, help="Learning rate. Default: 0.0004", ) parser.add_argument( "--num_heads", type=int, default=1, help="Number of head in breadth function. Default: 1", ) parser.add_argument( "--residual", type=bool, default=False, help="Residual in GAT or not" ) parser.add_argument( "--batch_size", type=int, default=2, help="Batch size of graph dataloader", ) parser.add_argument( "--lazy", type=bool, default=False, help="Variant GeniePath-Lazy" ) args = parser.parse_args() print(args) th.manual_seed(16) main(args) ================================================ FILE: examples/pytorch/geniepath/pubmed.py ================================================ import argparse import torch as th import torch.optim as optim from dgl.data import PubmedGraphDataset from model import GeniePath, GeniePathLazy from sklearn.metrics import accuracy_score def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # # Load dataset dataset = PubmedGraphDataset() graph = dataset[0] # check cuda if args.gpu >= 0 and th.cuda.is_available(): device = "cuda:{}".format(args.gpu) else: device = "cpu" num_classes = dataset.num_classes # retrieve label of ground truth label = graph.ndata["label"].to(device) # Extract node features feat = graph.ndata["feat"].to(device) # retrieve masks for train/validation/test train_mask = graph.ndata["train_mask"] val_mask = graph.ndata["val_mask"] test_mask = graph.ndata["test_mask"] train_idx = th.nonzero(train_mask, as_tuple=False).squeeze(1).to(device) val_idx = th.nonzero(val_mask, as_tuple=False).squeeze(1).to(device) test_idx = th.nonzero(test_mask, as_tuple=False).squeeze(1).to(device) graph = graph.to(device) # Step 2: Create model =================================================================== # if args.lazy: model = GeniePathLazy( in_dim=feat.shape[-1], out_dim=num_classes, hid_dim=args.hid_dim, num_layers=args.num_layers, num_heads=args.num_heads, residual=args.residual, ) else: model = GeniePath( in_dim=feat.shape[-1], out_dim=num_classes, hid_dim=args.hid_dim, num_layers=args.num_layers, num_heads=args.num_heads, residual=args.residual, ) model = model.to(device) # Step 3: Create training components ===================================================== # loss_fn = th.nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.lr) # Step 4: training epochs =============================================================== # for epoch in range(args.max_epoch): # Training and validation model.train() logits = model(graph, feat) # compute loss tr_loss = loss_fn(logits[train_idx], label[train_idx]) tr_acc = accuracy_score( label[train_idx].cpu(), logits[train_idx].argmax(dim=1).cpu() ) # validation valid_loss = loss_fn(logits[val_idx], label[val_idx]) valid_acc = accuracy_score( label[val_idx].cpu(), logits[val_idx].argmax(dim=1).cpu() ) # backward optimizer.zero_grad() tr_loss.backward() optimizer.step() # Print out performance print( "In epoch {}, Train ACC: {:.4f} | Train Loss: {:.4f}; Valid ACC: {:.4f} | Valid loss: {:.4f}".format( epoch, tr_acc, tr_loss.item(), valid_acc, valid_loss.item() ) ) # Test after all epoch model.eval() # forward logits = model(graph, feat) # compute loss test_loss = loss_fn(logits[test_idx], label[test_idx]) test_acc = accuracy_score( label[test_idx].cpu(), logits[test_idx].argmax(dim=1).cpu() ) print( "Test ACC: {:.4f} | Test loss: {:.4f}".format( test_acc, test_loss.item() ) ) if __name__ == "__main__": parser = argparse.ArgumentParser(description="GeniePath") parser.add_argument( "--gpu", type=int, default=-1, help="GPU Index. Default: -1, using CPU." ) parser.add_argument( "--hid_dim", type=int, default=16, help="Hidden layer dimension" ) parser.add_argument( "--num_layers", type=int, default=2, help="Number of GeniePath layers" ) parser.add_argument( "--max_epoch", type=int, default=300, help="The max number of epochs. Default: 300", ) parser.add_argument( "--lr", type=float, default=0.0004, help="Learning rate. Default: 0.0004", ) parser.add_argument( "--num_heads", type=int, default=1, help="Number of head in breadth function. Default: 1", ) parser.add_argument( "--residual", type=bool, default=False, help="Residual in GAT or not" ) parser.add_argument( "--lazy", type=bool, default=False, help="Variant GeniePath-Lazy" ) args = parser.parse_args() th.manual_seed(16) print(args) main(args) ================================================ FILE: examples/pytorch/ggnn/README.md ================================================ # Gated Graph Neural Network (GGNN) - Paper link: https://arxiv.org/pdf/1511.05493.pdf ## Dependencies - PyTorch 1.0+ - DGL 0.3.1+ ## GGNN implemented in dgl In dgl, GGNN is implemented as module `GatedGraphConv`, it can be imported as follows: ```python from dgl.nn.pytorch import GatedGraphConv ``` ## Solving bAbI tasks In this example, we use GGNN to solve some of the [bAbI](https://github.com/facebook/bAbI-tasks) tasks solved in the paper. #### Overview of bAbI tasks bAbI is a set of question answering tasks that require a system to do multi-step reasoning. Datasets of bAbI tasks are generated by templates, which can be natural language or symbolic form. In this example, we follow the paper to generate the datasets using symbolic form. There are 20 tasks in bAbI, in this example, we follow the paper to do task 4, 15, 16, 18 and 19. #### Task 4: Two argument relations: subject vs. object An example of task 4 is as follows ``` 1 C e A 2 A e B 3 eval A w C ``` A, B, C are nodes; e, w are edges, there are totally four kinds of edges: `n, s, w, e`, which can be viewed as north, south, west, east. The first two lines are conditions, and the third line are the question and answer. So the explanation of the example is: ``` 1 Go east from C, we can reach A 2 Go east from A, we can reach B 3 Question: where can we reach if we go west from A? Answer: C ``` If we represent the conditions using a graph, we can view this task as a `Node Selection` task. For different edges in questions, we view them as different question types, we train separate models for each question type. The module for solving node selection tasks is implemented in `ggnn_ns.py`. For four question types `n, s, w, e`, we assign a question id for them ranging from 0 to 3. For each question id, run the following commands for training and testing: ```bash python train_ns.py --task_id=4 --question_id=0 --train_num=50 --epochs=10 python train_ns.py --task_id=4 --question_id=1 --train_num=50 --epochs=10 python train_ns.py --task_id=4 --question_id=2 --train_num=50 --epochs=10 python train_ns.py --task_id=4 --question_id=3 --train_num=50 --epochs=10 ``` The training file name `train_ns` means training node selection. `train_num` means the number of training examples used. #### Task 15: Basic deduction Task 15 is similar to task 4, it's also a Node Selection task. An example is shown below: ``` 1 I has_fear C 2 H is C 3 G is I 4 A is B 5 E has_fear C 6 C has_fear I 7 B has_fear C 8 F is E 9 eval H has_fear I ``` There are two types of edges in this task: `is, has_fear`. There is only one question type in this task: `has_fear`, we assign question id `1` for it. Run the following command for training and testing: ```bash python train_ns.py --task_id=15 --question_id=1 --train_num=50 --epochs=15 --lr=1e-2 ``` #### Task 16: Basic induction Task 16 is similar to task 15. An example of task 16 is shown below ``` 1 J has_color F 2 K has_color I 3 A has_color I 4 G is D 5 J is C 6 H has_color I 7 H is D 8 A is D 9 K is D 10 eval G has_color I ``` There are two types of edges in this task: `is, has_color`. There is only one question type in this task: `has_color`, we assign question id `1` for it. Run the following command for training and testing: ```bash python train_ns.py --task_id=16 --question_id=1 --train_num=50 --epochs=20 --lr=1e-2 ``` #### Task 18: Reasoning about size Task 18 is a `Graph Classification` task, an example is shown below: ``` 1 G > B 2 G > D 3 E > F 4 E > A 5 B > A 6 E > B 7 eval G < A false ``` Line 1 to line 6 give some conditions for comparision of the size of entities, line 7 is the question, asking whether `G < A` is `true` or `false`. So the input is a graph, the output is a binary classification result. We view it as a `Graph Classification` task. Following the paper, we use GGNN to encode the graph, followed by a `GlobalAttentionPooling` layer to pool the graph into a hidden vector, which is used to classify the graph. The module for solving graph classification tasks is implemented in `ggnn_gc.py`. There are two types of edges in this task: `>, <`, and so are the question types. We assign question ids `0, 1` to them. Run the following commands for training and testing: ```bash python train_gc.py --task_id=18 --question_id=0 --train_num=50 --batch_size=10 --lr=1e-3 --epochs=20 python train_gc.py --task_id=18 --question_id=1 --train_num=50 --batch_size=10 --lr=1e-3 --epochs=20 ``` #### Task 19: Path finding An example of task 19 is as follows: ``` 1 D n A 2 D s E 3 G w D 4 E s B 5 eval path G A w,n ``` Similar to task 4, there are four types of edges: `n, s, w, e`, which can be viewed as north, south, west, east. The conditions are the same as task 4, the question in line 5 means `Question: find a path from G to A. Answer: first go west, then go north`. The output is a sequence of edges. So there is no question type in this task. The paper uses *Gated Graph Sequence Neural Networks (GGS-NNs)* to solve this kind of problems. In this example, we implemented GGS-NNs in `ggsnn.py`, run the following command for training and testing: ```bash python train_path_finding.py --train_num=250 --epochs=200 ``` #### Results Following the paper, we use 10 different test sets for evaluation. The result is the mean and standard deviation of the evaluation performance across the 10 datasets. Numbers in the parentheses are the number of training data used. | Task ID | Reported
Accuracy | DGL
Accuracy | |:---------:|-----------------------------|------------------------------| | 4 | 100.0 ± 0.00 (50) | 100.0 ± 0.00 (50)| | 15 | 100.0 ± 0.00 (50) | 100.0 ± 0.00 (50)| | 16 | 100.0 ± 0.00 (50) | 100.0 ± 0.00 (50)| | 18 | 100.0 ± 0.00 (50) | 100.0 ± 0.00 (50)| | 19 | 99.0 ± 1.1 (250) | 97.8 ± 0.02 (50) | ================================================ FILE: examples/pytorch/ggnn/data_utils.py ================================================ """ Data utils for processing bAbI datasets """ import os import string import dgl import torch from dgl.data.utils import ( _get_dgl_url, download, extract_archive, get_download_dir, ) from torch.utils.data import DataLoader def get_babi_dataloaders(batch_size, train_size=50, task_id=4, q_type=0): _download_babi_data() node_dict = dict( zip(list(string.ascii_uppercase), range(len(string.ascii_uppercase))) ) if task_id == 4: edge_dict = {"n": 0, "s": 1, "w": 2, "e": 3} reverse_edge = {} return _ns_dataloader( train_size, q_type, batch_size, node_dict, edge_dict, reverse_edge, "04", ) elif task_id == 15: edge_dict = {"is": 0, "has_fear": 1} reverse_edge = {} return _ns_dataloader( train_size, q_type, batch_size, node_dict, edge_dict, reverse_edge, "15", ) elif task_id == 16: edge_dict = {"is": 0, "has_color": 1} reverse_edge = {0: 0} return _ns_dataloader( train_size, q_type, batch_size, node_dict, edge_dict, reverse_edge, "16", ) elif task_id == 18: edge_dict = {">": 0, "<": 1} label_dict = {"false": 0, "true": 1} reverse_edge = {0: 1, 1: 0} return _gc_dataloader( train_size, q_type, batch_size, node_dict, edge_dict, label_dict, reverse_edge, "18", ) elif task_id == 19: edge_dict = {"n": 0, "s": 1, "w": 2, "e": 3, "": 4} reverse_edge = {0: 1, 1: 0, 2: 3, 3: 2} max_seq_length = 2 return _path_finding_dataloader( train_size, batch_size, node_dict, edge_dict, reverse_edge, "19", max_seq_length, ) def _ns_dataloader( train_size, q_type, batch_size, node_dict, edge_dict, reverse_edge, path ): def _collate_fn(batch): graphs = [] labels = [] for d in batch: edges = d["edges"] node_ids = [] for s, e, t in edges: if s not in node_ids: node_ids.append(s) if t not in node_ids: node_ids.append(t) g = dgl.graph([]) g.add_nodes(len(node_ids)) g.ndata["node_id"] = torch.tensor(node_ids, dtype=torch.long) nid2idx = dict(zip(node_ids, list(range(len(node_ids))))) # convert label to node index label = d["eval"][2] label_idx = nid2idx[label] labels.append(label_idx) edge_types = [] for s, e, t in edges: g.add_edges(nid2idx[s], nid2idx[t]) edge_types.append(e) if e in reverse_edge: g.add_edges(nid2idx[t], nid2idx[s]) edge_types.append(reverse_edge[e]) g.edata["type"] = torch.tensor(edge_types, dtype=torch.long) annotation = torch.zeros(len(node_ids), dtype=torch.long) annotation[nid2idx[d["eval"][0]]] = 1 g.ndata["annotation"] = annotation.unsqueeze(-1) graphs.append(g) batch_graph = dgl.batch(graphs) labels = torch.tensor(labels, dtype=torch.long) return batch_graph, labels def _get_dataloader(data, shuffle): return DataLoader( dataset=data, batch_size=batch_size, shuffle=shuffle, collate_fn=_collate_fn, ) train_set, dev_set, test_sets = _convert_ns_dataset( train_size, node_dict, edge_dict, path, q_type ) train_dataloader = _get_dataloader(train_set, True) dev_dataloader = _get_dataloader(dev_set, False) test_dataloaders = [] for d in test_sets: dl = _get_dataloader(d, False) test_dataloaders.append(dl) return train_dataloader, dev_dataloader, test_dataloaders def _convert_ns_dataset(train_size, node_dict, edge_dict, path, q_type): total_num = 11000 def convert(file): dataset = [] d = dict() with open(file, "r") as f: for i, line in enumerate(f.readlines()): line = line.strip().split() if line[0] == "1" and len(d) > 0: d = dict() if line[1] == "eval": # (src, edge, label) d["eval"] = ( node_dict[line[2]], edge_dict[line[3]], node_dict[line[4]], ) if d["eval"][1] == q_type: dataset.append(d) if len(dataset) >= total_num: break else: if "edges" not in d: d["edges"] = [] d["edges"].append( ( node_dict[line[1]], edge_dict[line[2]], node_dict[line[3]], ) ) return dataset download_dir = get_download_dir() filename = os.path.join(download_dir, "babi_data", path, "data.txt") data = convert(filename) assert len(data) == total_num train_set = data[:train_size] dev_set = data[950:1000] test_sets = [] for i in range(10): test = data[1000 * (i + 1) : 1000 * (i + 2)] test_sets.append(test) return train_set, dev_set, test_sets def _gc_dataloader( train_size, q_type, batch_size, node_dict, edge_dict, label_dict, reverse_edge, path, ): def _collate_fn(batch): graphs = [] labels = [] for d in batch: edges = d["edges"] node_ids = [] for s, e, t in edges: if s not in node_ids: node_ids.append(s) if t not in node_ids: node_ids.append(t) g = dgl.graph([]) g.add_nodes(len(node_ids)) g.ndata["node_id"] = torch.tensor(node_ids, dtype=torch.long) nid2idx = dict(zip(node_ids, list(range(len(node_ids))))) labels.append(d["eval"][-1]) edge_types = [] for s, e, t in edges: g.add_edges(nid2idx[s], nid2idx[t]) edge_types.append(e) if e in reverse_edge: g.add_edges(nid2idx[t], nid2idx[s]) edge_types.append(reverse_edge[e]) g.edata["type"] = torch.tensor(edge_types, dtype=torch.long) annotation = torch.zeros([len(node_ids), 2], dtype=torch.long) annotation[nid2idx[d["eval"][0]]][0] = 1 annotation[nid2idx[d["eval"][2]]][1] = 1 g.ndata["annotation"] = annotation graphs.append(g) batch_graph = dgl.batch(graphs) labels = torch.tensor(labels, dtype=torch.long) return batch_graph, labels def _get_dataloader(data, shuffle): return DataLoader( dataset=data, batch_size=batch_size, shuffle=shuffle, collate_fn=_collate_fn, ) train_set, dev_set, test_sets = _convert_gc_dataset( train_size, node_dict, edge_dict, label_dict, path, q_type ) train_dataloader = _get_dataloader(train_set, True) dev_dataloader = _get_dataloader(dev_set, False) test_dataloaders = [] for d in test_sets: dl = _get_dataloader(d, False) test_dataloaders.append(dl) return train_dataloader, dev_dataloader, test_dataloaders def _convert_gc_dataset( train_size, node_dict, edge_dict, label_dict, path, q_type ): total_num = 11000 def convert(file): dataset = [] d = dict() with open(file, "r") as f: for i, line in enumerate(f.readlines()): line = line.strip().split() if line[0] == "1" and len(d) > 0: d = dict() if line[1] == "eval": # (src, edge, label) if "eval" not in d: d["eval"] = ( node_dict[line[2]], edge_dict[line[3]], node_dict[line[4]], label_dict[line[5]], ) if d["eval"][1] == q_type: dataset.append(d) if len(dataset) >= total_num: break else: if "edges" not in d: d["edges"] = [] d["edges"].append( ( node_dict[line[1]], edge_dict[line[2]], node_dict[line[3]], ) ) return dataset download_dir = get_download_dir() filename = os.path.join(download_dir, "babi_data", path, "data.txt") data = convert(filename) assert len(data) == total_num train_set = data[:train_size] dev_set = data[950:1000] test_sets = [] for i in range(10): test = data[1000 * (i + 1) : 1000 * (i + 2)] test_sets.append(test) return train_set, dev_set, test_sets def _path_finding_dataloader( train_size, batch_size, node_dict, edge_dict, reverse_edge, path, max_seq_length, ): def _collate_fn(batch): graphs = [] ground_truths = [] seq_lengths = [] for d in batch: edges = d["edges"] node_ids = [] for s, e, t in edges: if s not in node_ids: node_ids.append(s) if t not in node_ids: node_ids.append(t) g = dgl.graph([]) g.add_nodes(len(node_ids)) g.ndata["node_id"] = torch.tensor(node_ids, dtype=torch.long) nid2idx = dict(zip(node_ids, list(range(len(node_ids))))) truth = d["seq_out"] + [edge_dict[""]] * ( max_seq_length - len(d["seq_out"]) ) seq_len = len(d["seq_out"]) ground_truths.append(truth) seq_lengths.append(seq_len) edge_types = [] for s, e, t in edges: g.add_edges(nid2idx[s], nid2idx[t]) edge_types.append(e) if e in reverse_edge: g.add_edges(nid2idx[t], nid2idx[s]) edge_types.append(reverse_edge[e]) g.edata["type"] = torch.tensor(edge_types, dtype=torch.long) annotation = torch.zeros([len(node_ids), 2], dtype=torch.long) annotation[nid2idx[d["eval"][0]]][0] = 1 annotation[nid2idx[d["eval"][1]]][1] = 1 g.ndata["annotation"] = annotation graphs.append(g) batch_graph = dgl.batch(graphs) ground_truths = torch.tensor(ground_truths, dtype=torch.long) seq_lengths = torch.tensor(seq_lengths, dtype=torch.long) return batch_graph, ground_truths, seq_lengths def _get_dataloader(data, shuffle): return DataLoader( dataset=data, batch_size=batch_size, shuffle=shuffle, collate_fn=_collate_fn, ) train_set, dev_set, test_sets = _convert_path_finding( train_size, node_dict, edge_dict, path ) train_dataloader = _get_dataloader(train_set, True) dev_dataloader = _get_dataloader(dev_set, False) test_dataloaders = [] for d in test_sets: dl = _get_dataloader(d, False) test_dataloaders.append(dl) return train_dataloader, dev_dataloader, test_dataloaders def _convert_path_finding(train_size, node_dict, edge_dict, path): total_num = 11000 def convert(file): dataset = [] d = dict() with open(file, "r") as f: for line in f.readlines(): line = line.strip().split() if line[0] == "1" and len(d) > 0: d = dict() if line[1] == "eval": # (src, edge, label) d["eval"] = (node_dict[line[3]], node_dict[line[4]]) d["seq_out"] = [] seq_out = line[5].split(",") for e in seq_out: d["seq_out"].append(edge_dict[e]) dataset.append(d) if len(dataset) >= total_num: break else: if "edges" not in d: d["edges"] = [] d["edges"].append( ( node_dict[line[1]], edge_dict[line[2]], node_dict[line[3]], ) ) return dataset download_dir = get_download_dir() filename = os.path.join(download_dir, "babi_data", path, "data.txt") data = convert(filename) assert len(data) == total_num train_set = data[:train_size] dev_set = data[950:1000] test_sets = [] for i in range(10): test = data[1000 * (i + 1) : 1000 * (i + 2)] test_sets.append(test) return train_set, dev_set, test_sets def _download_babi_data(): download_dir = get_download_dir() zip_file_path = os.path.join(download_dir, "babi_data.zip") data_url = _get_dgl_url("models/ggnn_babi_data.zip") download(data_url, path=zip_file_path) extract_dir = os.path.join(download_dir, "babi_data") if not os.path.exists(extract_dir): extract_archive(zip_file_path, extract_dir) ================================================ FILE: examples/pytorch/ggnn/ggnn_gc.py ================================================ """ Gated Graph Neural Network module for graph classification tasks """ import torch from dgl.nn.pytorch import GatedGraphConv, GlobalAttentionPooling from torch import nn class GraphClsGGNN(nn.Module): def __init__(self, annotation_size, out_feats, n_steps, n_etypes, num_cls): super(GraphClsGGNN, self).__init__() self.annotation_size = annotation_size self.out_feats = out_feats self.ggnn = GatedGraphConv( in_feats=out_feats, out_feats=out_feats, n_steps=n_steps, n_etypes=n_etypes, ) pooling_gate_nn = nn.Linear(annotation_size + out_feats, 1) self.pooling = GlobalAttentionPooling(pooling_gate_nn) self.output_layer = nn.Linear(annotation_size + out_feats, num_cls) self.loss_fn = nn.CrossEntropyLoss() def forward(self, graph, labels=None): etypes = graph.edata.pop("type") annotation = graph.ndata.pop("annotation").float() assert annotation.size()[-1] == self.annotation_size node_num = graph.num_nodes() zero_pad = torch.zeros( [node_num, self.out_feats - self.annotation_size], dtype=torch.float, device=annotation.device, ) h1 = torch.cat([annotation, zero_pad], -1) out = self.ggnn(graph, h1, etypes) out = torch.cat([out, annotation], -1) out = self.pooling(graph, out) logits = self.output_layer(out) preds = torch.argmax(logits, -1) if labels is not None: loss = self.loss_fn(logits, labels) return loss, preds return preds ================================================ FILE: examples/pytorch/ggnn/ggnn_ns.py ================================================ """ Gated Graph Neural Network module for node selection tasks """ import dgl import torch from dgl.nn.pytorch import GatedGraphConv from torch import nn class NodeSelectionGGNN(nn.Module): def __init__(self, annotation_size, out_feats, n_steps, n_etypes): super(NodeSelectionGGNN, self).__init__() self.annotation_size = annotation_size self.out_feats = out_feats self.ggnn = GatedGraphConv( in_feats=out_feats, out_feats=out_feats, n_steps=n_steps, n_etypes=n_etypes, ) self.output_layer = nn.Linear(annotation_size + out_feats, 1) self.loss_fn = nn.CrossEntropyLoss() def forward(self, graph, labels=None): etypes = graph.edata.pop("type") annotation = graph.ndata.pop("annotation").float() assert annotation.size()[-1] == self.annotation_size node_num = graph.num_nodes() zero_pad = torch.zeros( [node_num, self.out_feats - self.annotation_size], dtype=torch.float, device=annotation.device, ) h1 = torch.cat([annotation, zero_pad], -1) out = self.ggnn(graph, h1, etypes) all_logits = self.output_layer( torch.cat([out, annotation], -1) ).squeeze(-1) graph.ndata["logits"] = all_logits batch_g = dgl.unbatch(graph) preds = [] if labels is not None: loss = 0.0 for i, g in enumerate(batch_g): logits = g.ndata["logits"] preds.append(torch.argmax(logits)) if labels is not None: logits = logits.unsqueeze(0) y = labels[i].unsqueeze(0) loss += self.loss_fn(logits, y) if labels is not None: loss /= float(len(batch_g)) return loss, preds return preds ================================================ FILE: examples/pytorch/ggnn/ggsnn.py ================================================ """ Gated Graph Sequence Neural Network for sequence outputs """ import torch import torch.nn.functional as F from dgl.nn.pytorch import GatedGraphConv, GlobalAttentionPooling from torch import nn class GGSNN(nn.Module): def __init__( self, annotation_size, out_feats, n_steps, n_etypes, max_seq_length, num_cls, ): super(GGSNN, self).__init__() self.annotation_size = annotation_size self.out_feats = out_feats self.max_seq_length = max_seq_length self.ggnn = GatedGraphConv( in_feats=out_feats, out_feats=out_feats, n_steps=n_steps, n_etypes=n_etypes, ) self.annotation_out_layer = nn.Linear( annotation_size + out_feats, annotation_size ) pooling_gate_nn = nn.Linear(annotation_size + out_feats, 1) self.pooling = GlobalAttentionPooling(pooling_gate_nn) self.output_layer = nn.Linear(annotation_size + out_feats, num_cls) self.loss_fn = nn.CrossEntropyLoss(reduction="none") def forward(self, graph, seq_lengths, ground_truth=None): etypes = graph.edata.pop("type") annotation = graph.ndata.pop("annotation").float() assert annotation.size()[-1] == self.annotation_size node_num = graph.num_nodes() all_logits = [] for _ in range(self.max_seq_length): zero_pad = torch.zeros( [node_num, self.out_feats - self.annotation_size], dtype=torch.float, device=annotation.device, ) h1 = torch.cat([annotation.detach(), zero_pad], -1) out = self.ggnn(graph, h1, etypes) out = torch.cat([out, annotation], -1) logits = self.pooling(graph, out) logits = self.output_layer(logits) all_logits.append(logits) annotation = self.annotation_out_layer(out) annotation = F.softmax(annotation, -1) all_logits = torch.stack(all_logits, 1) preds = torch.argmax(all_logits, -1) if ground_truth is not None: loss = sequence_loss(all_logits, ground_truth, seq_lengths) return loss, preds return preds def sequence_loss(logits, ground_truth, seq_length=None): def sequence_mask(length): max_length = logits.size(1) batch_size = logits.size(0) range_tensor = torch.arange( 0, max_length, dtype=seq_length.dtype, device=seq_length.device ) range_tensor = torch.stack([range_tensor] * batch_size, 0) expanded_length = torch.stack([length] * max_length, -1) mask = (range_tensor < expanded_length).float() return mask loss = nn.CrossEntropyLoss(reduction="none")( logits.permute((0, 2, 1)), ground_truth ) if seq_length is None: loss = loss.mean() else: mask = sequence_mask(seq_length) loss = (loss * mask).sum(-1) / seq_length.float() loss = loss.mean() return loss ================================================ FILE: examples/pytorch/ggnn/train_gc.py ================================================ """ Training and testing for graph classification tasks in bAbI """ import argparse import numpy as np import torch from data_utils import get_babi_dataloaders from ggnn_gc import GraphClsGGNN from torch.optim import Adam def main(args): out_feats = {18: 3} n_etypes = {18: 2} train_dataloader, dev_dataloader, test_dataloaders = get_babi_dataloaders( batch_size=args.batch_size, train_size=args.train_num, task_id=args.task_id, q_type=args.question_id, ) model = GraphClsGGNN( annotation_size=2, out_feats=out_feats[args.task_id], n_steps=5, n_etypes=n_etypes[args.task_id], num_cls=2, ) opt = Adam(model.parameters(), lr=args.lr) print(f"Task {args.task_id}, question_id {args.question_id}") print(f"Training set size: {len(train_dataloader.dataset)}") print(f"Dev set size: {len(dev_dataloader.dataset)}") # training and dev stage for epoch in range(args.epochs): model.train() for i, batch in enumerate(train_dataloader): g, labels = batch loss, _ = model(g, labels) opt.zero_grad() loss.backward() opt.step() if epoch % 20 == 0: print(f"Epoch {epoch}, batch {i} loss: {loss.data}") if epoch % 20 != 0: continue dev_preds = [] dev_labels = [] model.eval() for g, labels in dev_dataloader: with torch.no_grad(): preds = model(g) preds = preds.data.numpy().tolist() labels = labels.data.numpy().tolist() dev_preds += preds dev_labels += labels acc = np.equal(dev_labels, dev_preds).astype(float).tolist() acc = sum(acc) / len(acc) print(f"Epoch {epoch}, Dev acc {acc}") # test stage for i, dataloader in enumerate(test_dataloaders): print(f"Test set {i} size: {len(dataloader.dataset)}") test_acc_list = [] for dataloader in test_dataloaders: test_preds = [] test_labels = [] model.eval() for g, labels in dataloader: with torch.no_grad(): preds = model(g) preds = preds.data.numpy().tolist() labels = labels.data.numpy().tolist() test_preds += preds test_labels += labels acc = np.equal(test_labels, test_preds).astype(float).tolist() acc = sum(acc) / len(acc) test_acc_list.append(acc) test_acc_mean = np.mean(test_acc_list) test_acc_std = np.std(test_acc_list) print( f"Mean of accuracy in 10 test datasets: {test_acc_mean}, std: {test_acc_std}" ) if __name__ == "__main__": parser = argparse.ArgumentParser( description="Gated Graph Neural Networks for graph classification tasks in bAbI" ) parser.add_argument( "--task_id", type=int, default=18, help="task id from 1 to 20" ) parser.add_argument( "--question_id", type=int, default=0, help="question id for each task" ) parser.add_argument( "--train_num", type=int, default=950, help="Number of training examples" ) parser.add_argument("--batch_size", type=int, default=50, help="batch size") parser.add_argument("--lr", type=float, default=1e-3, help="learning rate") parser.add_argument( "--epochs", type=int, default=200, help="number of training epochs" ) args = parser.parse_args() main(args) ================================================ FILE: examples/pytorch/ggnn/train_ns.py ================================================ """ Training and testing for node selection tasks in bAbI """ import argparse import time import numpy as np import torch from data_utils import get_babi_dataloaders from ggnn_ns import NodeSelectionGGNN from torch.optim import Adam def main(args): out_feats = {4: 4, 15: 5, 16: 6} n_etypes = {4: 4, 15: 2, 16: 2} train_dataloader, dev_dataloader, test_dataloaders = get_babi_dataloaders( batch_size=args.batch_size, train_size=args.train_num, task_id=args.task_id, q_type=args.question_id, ) model = NodeSelectionGGNN( annotation_size=1, out_feats=out_feats[args.task_id], n_steps=5, n_etypes=n_etypes[args.task_id], ) opt = Adam(model.parameters(), lr=args.lr) print(f"Task {args.task_id}, question_id {args.question_id}") print(f"Training set size: {len(train_dataloader.dataset)}") print(f"Dev set size: {len(dev_dataloader.dataset)}") # training and dev stage for epoch in range(args.epochs): model.train() for i, batch in enumerate(train_dataloader): g, labels = batch loss, _ = model(g, labels) opt.zero_grad() loss.backward() opt.step() print(f"Epoch {epoch}, batch {i} loss: {loss.data}") dev_preds = [] dev_labels = [] model.eval() for g, labels in dev_dataloader: with torch.no_grad(): preds = model(g) preds = ( torch.tensor(preds, dtype=torch.long).data.numpy().tolist() ) labels = labels.data.numpy().tolist() dev_preds += preds dev_labels += labels acc = np.equal(dev_labels, dev_preds).astype(float).tolist() acc = sum(acc) / len(acc) print(f"Epoch {epoch}, Dev acc {acc}") # test stage for i, dataloader in enumerate(test_dataloaders): print(f"Test set {i} size: {len(dataloader.dataset)}") test_acc_list = [] for dataloader in test_dataloaders: test_preds = [] test_labels = [] model.eval() for g, labels in dataloader: with torch.no_grad(): preds = model(g) preds = ( torch.tensor(preds, dtype=torch.long).data.numpy().tolist() ) labels = labels.data.numpy().tolist() test_preds += preds test_labels += labels acc = np.equal(test_labels, test_preds).astype(float).tolist() acc = sum(acc) / len(acc) test_acc_list.append(acc) test_acc_mean = np.mean(test_acc_list) test_acc_std = np.std(test_acc_list) print( f"Mean of accuracy in 10 test datasets: {test_acc_mean}, std: {test_acc_std}" ) if __name__ == "__main__": parser = argparse.ArgumentParser( description="Gated Graph Neural Networks for node selection tasks in bAbI" ) parser.add_argument( "--task_id", type=int, default=16, help="task id from 1 to 20" ) parser.add_argument( "--question_id", type=int, default=1, help="question id for each task" ) parser.add_argument( "--train_num", type=int, default=50, help="Number of training examples" ) parser.add_argument("--batch_size", type=int, default=10, help="batch size") parser.add_argument("--lr", type=float, default=1e-3, help="learning rate") parser.add_argument( "--epochs", type=int, default=100, help="number of training epochs" ) args = parser.parse_args() main(args) ================================================ FILE: examples/pytorch/ggnn/train_path_finding.py ================================================ """ Training and testing for sequence output tasks in bAbI. Here we take task 19 'Path Finding' as an example """ import argparse import numpy as np import torch from data_utils import get_babi_dataloaders from ggsnn import GGSNN from torch.optim import Adam def main(args): out_feats = {19: 6} n_etypes = {19: 4} train_dataloader, dev_dataloader, test_dataloaders = get_babi_dataloaders( batch_size=args.batch_size, train_size=args.train_num, task_id=args.task_id, q_type=-1, ) model = GGSNN( annotation_size=2, out_feats=out_feats[args.task_id], n_steps=5, n_etypes=n_etypes[args.task_id], max_seq_length=2, num_cls=5, ) opt = Adam(model.parameters(), lr=args.lr) print(f"Task {args.task_id}") print(f"Training set size: {len(train_dataloader.dataset)}") print(f"Dev set size: {len(dev_dataloader.dataset)}") # training and dev stage for epoch in range(args.epochs): model.train() for i, batch in enumerate(train_dataloader): g, ground_truths, seq_lengths = batch loss, _ = model(g, seq_lengths, ground_truths) opt.zero_grad() loss.backward() opt.step() if epoch % 20 == 0: print(f"Epoch {epoch}, batch {i} loss: {loss.data}") if epoch % 20 != 0: continue dev_res = [] model.eval() for g, ground_truths, seq_lengths in dev_dataloader: with torch.no_grad(): preds = model(g, seq_lengths) preds = preds.data.numpy().tolist() ground_truths = ground_truths.data.numpy().tolist() for i, p in enumerate(preds): if p == ground_truths[i]: dev_res.append(1.0) else: dev_res.append(0.0) acc = sum(dev_res) / len(dev_res) print(f"Epoch {epoch}, Dev acc {acc}") # test stage for i, dataloader in enumerate(test_dataloaders): print(f"Test set {i} size: {len(dataloader.dataset)}") test_acc_list = [] for dataloader in test_dataloaders: test_res = [] model.eval() for g, ground_truths, seq_lengths in dataloader: with torch.no_grad(): preds = model(g, seq_lengths) preds = preds.data.numpy().tolist() ground_truths = ground_truths.data.numpy().tolist() for i, p in enumerate(preds): if p == ground_truths[i]: test_res.append(1.0) else: test_res.append(0.0) acc = sum(test_res) / len(test_res) test_acc_list.append(acc) test_acc_mean = np.mean(test_acc_list) test_acc_std = np.std(test_acc_list) print( f"Mean of accuracy in 10 test datasets: {test_acc_mean}, std: {test_acc_std}" ) if __name__ == "__main__": parser = argparse.ArgumentParser( description="Gated Graph Sequence Neural Networks for sequential output tasks in " "bAbI" ) parser.add_argument( "--task_id", type=int, default=19, help="task id from 1 to 20" ) parser.add_argument( "--train_num", type=int, default=250, help="Number of training examples" ) parser.add_argument("--batch_size", type=int, default=10, help="batch size") parser.add_argument("--lr", type=float, default=1e-3, help="learning rate") parser.add_argument( "--epochs", type=int, default=200, help="number of training epochs" ) args = parser.parse_args() main(args) ================================================ FILE: examples/pytorch/gin/README.md ================================================ Graph Isomorphism Network (GIN) ============ - Paper link: [arXiv](https://arxiv.org/abs/1810.00826) [OpenReview](https://openreview.net/forum?id=ryGs6iA5Km) - Author's code repo: [https://github.com/weihua916/powerful-gnns](https://github.com/weihua916/powerful-gnns). Dependencies ------------ - scikit-learn Install as follows: ```bash pip install scikit-learn ``` How to run ------- Run with the following for bioinformatics graph classification (available datasets: MUTAG (default), PTC, NCI1, and PROTEINS) ```bash python3 train.py --dataset MUTAG ``` > **_NOTE:_** Users may observe results fluctuate due to the randomness with relatively small dataset. In consistence with the original [paper](https://arxiv.org/abs/1810.00826), five social network datasets, 'COLLAB', 'IMDBBINARY' 'IMDBMULTI' 'REDDITBINARY' and 'REDDITMULTI5K', are also available as the input. Users are encouraged to update the script slightly for social network applications, for example, replacing sum readout on bioinformatics datasets with mean readout on social network datasets and using one-hot encodings of node degrees by setting "degree_as_nlabel=True" in GINDataset. Summary (10-fold cross-validation) ------- | Dataset | Result | ------------- | ------- | MUTAG | ~89.4 | PTC | ~68.5 | NCI1 | ~82.9 | PROTEINS | ~74.1 ================================================ FILE: examples/pytorch/gin/train.py ================================================ import argparse import numpy as np import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from dgl.data import GINDataset from dgl.dataloading import GraphDataLoader from dgl.nn.pytorch.conv import GINConv from dgl.nn.pytorch.glob import SumPooling from sklearn.model_selection import StratifiedKFold from torch.utils.data.sampler import SubsetRandomSampler class MLP(nn.Module): """Construct two-layer MLP-type aggreator for GIN model""" def __init__(self, input_dim, hidden_dim, output_dim): super().__init__() self.linears = nn.ModuleList() # two-layer MLP self.linears.append(nn.Linear(input_dim, hidden_dim, bias=False)) self.linears.append(nn.Linear(hidden_dim, output_dim, bias=False)) self.batch_norm = nn.BatchNorm1d((hidden_dim)) def forward(self, x): h = x h = F.relu(self.batch_norm(self.linears[0](h))) return self.linears[1](h) class GIN(nn.Module): def __init__(self, input_dim, hidden_dim, output_dim): super().__init__() self.ginlayers = nn.ModuleList() self.batch_norms = nn.ModuleList() num_layers = 5 # five-layer GCN with two-layer MLP aggregator and sum-neighbor-pooling scheme for layer in range(num_layers - 1): # excluding the input layer if layer == 0: mlp = MLP(input_dim, hidden_dim, hidden_dim) else: mlp = MLP(hidden_dim, hidden_dim, hidden_dim) self.ginlayers.append( GINConv(mlp, learn_eps=False) ) # set to True if learning epsilon self.batch_norms.append(nn.BatchNorm1d(hidden_dim)) # linear functions for graph sum poolings of output of each layer self.linear_prediction = nn.ModuleList() for layer in range(num_layers): if layer == 0: self.linear_prediction.append(nn.Linear(input_dim, output_dim)) else: self.linear_prediction.append(nn.Linear(hidden_dim, output_dim)) self.drop = nn.Dropout(0.5) self.pool = ( SumPooling() ) # change to mean readout (AvgPooling) on social network datasets def forward(self, g, h): # list of hidden representation at each layer (including the input layer) hidden_rep = [h] for i, layer in enumerate(self.ginlayers): h = layer(g, h) h = self.batch_norms[i](h) h = F.relu(h) hidden_rep.append(h) score_over_layer = 0 # perform graph sum pooling over all nodes in each layer for i, h in enumerate(hidden_rep): pooled_h = self.pool(g, h) score_over_layer += self.drop(self.linear_prediction[i](pooled_h)) return score_over_layer def split_fold10(labels, fold_idx=0): skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=0) idx_list = [] for idx in skf.split(np.zeros(len(labels)), labels): idx_list.append(idx) train_idx, valid_idx = idx_list[fold_idx] return train_idx, valid_idx def evaluate(dataloader, device, model): model.eval() total = 0 total_correct = 0 for batched_graph, labels in dataloader: batched_graph = batched_graph.to(device) labels = labels.to(device) feat = batched_graph.ndata.pop("attr") total += len(labels) logits = model(batched_graph, feat) _, predicted = torch.max(logits, 1) total_correct += (predicted == labels).sum().item() acc = 1.0 * total_correct / total return acc def train(train_loader, val_loader, device, model): # loss function, optimizer and scheduler loss_fcn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.01) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5) # training loop for epoch in range(350): model.train() total_loss = 0 for batch, (batched_graph, labels) in enumerate(train_loader): batched_graph = batched_graph.to(device) labels = labels.to(device) feat = batched_graph.ndata.pop("attr") logits = model(batched_graph, feat) loss = loss_fcn(logits, labels) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.item() scheduler.step() train_acc = evaluate(train_loader, device, model) valid_acc = evaluate(val_loader, device, model) print( "Epoch {:05d} | Loss {:.4f} | Train Acc. {:.4f} | Validation Acc. {:.4f} ".format( epoch, total_loss / (batch + 1), train_acc, valid_acc ) ) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--dataset", type=str, default="MUTAG", choices=["MUTAG", "PTC", "NCI1", "PROTEINS"], help="name of dataset (default: MUTAG)", ) args = parser.parse_args() print(f"Training with DGL built-in GINConv module with a fixed epsilon = 0") device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # load and split dataset dataset = GINDataset( args.dataset, self_loop=True, degree_as_nlabel=False ) # add self_loop and disable one-hot encoding for input features labels = [l for _, l in dataset] train_idx, val_idx = split_fold10(labels) # create dataloader train_loader = GraphDataLoader( dataset, sampler=SubsetRandomSampler(train_idx), batch_size=128, pin_memory=torch.cuda.is_available(), ) val_loader = GraphDataLoader( dataset, sampler=SubsetRandomSampler(val_idx), batch_size=128, pin_memory=torch.cuda.is_available(), ) # create GIN model in_size = dataset.dim_nfeats out_size = dataset.gclasses model = GIN(in_size, 16, out_size).to(device) # model training/validating print("Training...") train(train_loader, val_loader, device, model) ================================================ FILE: examples/pytorch/gnn_explainer/README.md ================================================ # DGL Implementation of GNNExplainer This is a DGL example for [GNNExplainer: Generating Explanations for Graph Neural Networks](https://arxiv.org/abs/1903.03894). For the authors' original implementation, see [here](https://github.com/RexYing/gnn-model-explainer). Contributors: - [Jian Zhang](https://github.com/zhjwy9343) - [Kounianhua Du](https://github.com/KounianhuaDu) - [Yanjun Zhao](https://github.com/zyj-111) Datasets ---------------------- Four built-in synthetic datasets are used in this example. - [BA-SHAPES](https://docs.dgl.ai/generated/dgl.data.BAShapeDataset.html#dgl.data.BAShapeDataset) - [BA-COMMUNITY](https://docs.dgl.ai/generated/dgl.data.BACommunityDataset.html#dgl.data.BACommunityDataset) - [TREE-CYCLE](https://docs.dgl.ai/generated/dgl.data.TreeCycleDataset.html#dgl.data.TreeCycleDataset) - [TREE-GRID](https://docs.dgl.ai/generated/dgl.data.TreeGridDataset.html#dgl.data.TreeGridDataset) Usage ---------------------- **First**, train a GNN model on a dataset. ```bash python train_main.py --dataset $DATASET ``` Valid options for `$DATASET`: `BAShape`, `BACommunity`, `TreeCycle`, `TreeGrid` The trained model weights will be saved to `model_{dataset}.pth` **Second**, install [GNNLens2](https://github.com/dmlc/GNNLens2) with ```bash pip install -U flask-cors pip install Flask==2.0.3 pip install gnnlens ``` **Third**, explain the trained model with the same dataset ```bash python explain_main.py --dataset $DATASET ``` **Finally**, launch `GNNLens2` to visualize the explanations ```bash gnnlens --logdir gnn_subgraph ``` By entering `localhost:7777` in your web browser address bar, you can see the GNNLens2 interface. `7777` is the default port GNNLens2 uses. You can specify an alternative one by adding `--port xxxx` after the command line and change the address in the web browser accordingly. A sample visualization is available below. For more details of using `GNNLens2`, check its [tutorials](https://github.com/dmlc/GNNLens2#tutorials).


Figure: Explanation for node 41 of BAShape

================================================ FILE: examples/pytorch/gnn_explainer/explain_main.py ================================================ import argparse import os import dgl import torch as th from dgl import load_graphs from dgl.data import ( BACommunityDataset, BAShapeDataset, TreeCycleDataset, TreeGridDataset, ) from dgl.nn import GNNExplainer from gnnlens import Writer from models import Model def main(args): if args.dataset == "BAShape": dataset = BAShapeDataset(seed=0) elif args.dataset == "BACommunity": dataset = BACommunityDataset(seed=0) elif args.dataset == "TreeCycle": dataset = TreeCycleDataset(seed=0) elif args.dataset == "TreeGrid": dataset = TreeGridDataset(seed=0) graph = dataset[0] labels = graph.ndata["label"] feats = graph.ndata["feat"] num_classes = dataset.num_classes # load an existing model model_path = os.path.join("./", f"model_{args.dataset}.pth") model_stat_dict = th.load(model_path) model = Model(feats.shape[-1], num_classes) model.load_state_dict(model_stat_dict) # Choose the first node of the class 1 for explaining prediction target_class = 1 for n_idx, n_label in enumerate(labels): if n_label == target_class: break explainer = GNNExplainer(model, num_hops=3) new_center, sub_graph, feat_mask, edge_mask = explainer.explain_node( n_idx, graph, feats ) # gnnlens2 # Specify the path to create a new directory for dumping data files. writer = Writer("gnn_subgraph") writer.add_graph( name=args.dataset, graph=graph, nlabels=labels, num_nlabel_types=num_classes, ) writer.add_subgraph( graph_name=args.dataset, subgraph_name="GNNExplainer", node_id=n_idx, subgraph_nids=sub_graph.ndata[dgl.NID], subgraph_eids=sub_graph.edata[dgl.EID], subgraph_eweights=edge_mask, ) # Finish dumping. writer.close() if __name__ == "__main__": parser = argparse.ArgumentParser(description="Demo of GNN explainer in DGL") parser.add_argument( "--dataset", type=str, default="BAShape", choices=["BAShape", "BACommunity", "TreeCycle", "TreeGrid"], ) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/pytorch/gnn_explainer/gnn_subgraph/1/graph.json ================================================ {"graph_obj": {"name": "BAShape", "srcs": [41, 45, 59, 64, 67, 70, 74, 80, 83, 92, 99, 102, 105, 108, 112, 115, 118, 121, 124, 127, 131, 134, 137, 140, 143, 146, 149, 152, 157, 160, 163, 167, 170, 173, 177, 180, 183, 187, 190, 197, 202, 205, 43, 0, 206, 47, 209, 1, 210, 213, 216, 219, 225, 228, 231, 233, 236, 239, 242, 61, 248, 2, 249, 252, 255, 66, 3, 256, 69, 4, 72, 257, 5, 245, 76, 259, 6, 260, 95, 263, 266, 82, 7, 267, 85, 8, 268, 271, 273, 276, 279, 282, 285, 94, 9, 286, 97, 78, 287, 290, 101, 10, 104, 291, 11, 107, 293, 12, 110, 294, 13, 295, 114, 298, 14, 299, 117, 15, 120, 300, 16, 302, 123, 17, 303, 126, 18, 129, 304, 19, 305, 308, 133, 20, 310, 136, 21, 311, 139, 22, 142, 312, 214, 23, 145, 313, 24, 148, 314, 25, 151, 315, 26, 154, 316, 27, 317, 155, 320, 159, 28, 321, 162, 29, 165, 322, 30, 324, 327, 169, 31, 328, 172, 32, 332, 175, 33, 333, 179, 334, 34, 335, 182, 35, 336, 185, 36, 200, 338, 189, 37, 339, 192, 38, 194, 340, 193, 342, 345, 199, 39, 201, 346, 347, 204, 40, 42, 43, 208, 349, 44, 47, 46, 352, 212, 48, 214, 354, 49, 218, 356, 50, 221, 357, 51, 176, 359, 227, 364, 52, 230, 365, 53, 301, 369, 54, 371, 235, 55, 372, 238, 56, 373, 241, 57, 244, 374, 58, 247, 383, 73, 60, 61, 251, 387, 62, 254, 393, 63, 66, 65, 69, 68, 71, 72, 329, 73, 76, 75, 407, 261, 306, 77, 265, 409, 78, 79, 81, 82, 84, 85, 413, 270, 86, 323, 416, 87, 419, 275, 348, 88, 420, 278, 89, 281, 421, 90, 284, 422, 91, 94, 93, 341, 97, 96, 289, 427, 98, 101, 100, 103, 104, 433, 106, 107, 109, 110, 455, 297, 111, 114, 113, 117, 116, 119, 120, 123, 122, 125, 126, 129, 128, 473, 307, 130, 133, 132, 483, 135, 136, 139, 138, 142, 141, 144, 145, 148, 147, 151, 150, 154, 153, 512, 155, 156, 513, 158, 159, 161, 162, 165, 164, 534, 326, 166, 168, 169, 172, 171, 331, 544, 258, 174, 175, 358, 223, 178, 179, 182, 181, 184, 185, 186, 189, 188, 191, 192, 194, 341, 195, 558, 344, 562, 196, 198, 199, 337, 201, 204, 203, 566, 569, 517, 390, 370, 205, 207, 208, 209, 437, 519, 573, 491, 566, 379, 438, 553, 575, 425, 212, 211, 556, 463, 385, 577, 458, 215, 214, 567, 580, 478, 516, 508, 218, 217, 220, 221, 222, 223, 601, 361, 224, 569, 478, 602, 555, 535, 576, 553, 378, 605, 545, 227, 226, 230, 229, 563, 521, 609, 571, 605, 613, 591, 389, 603, 516, 491, 517, 613, 602, 540, 232, 301, 574, 445, 486, 614, 554, 234, 235, 238, 237, 241, 240, 244, 243, 503, 619, 555, 606, 493, 554, 555, 500, 614, 619, 464, 619, 591, 578, 595, 559, 364, 436, 620, 478, 397, 557, 545, 432, 619, 474, 620, 591, 605, 538, 582, 620, 582, 620, 525, 567, 516, 621, 410, 576, 508, 619, 246, 247, 620, 602, 567, 626, 570, 613, 504, 626, 570, 459, 417, 248, 572, 553, 626, 591, 250, 251, 627, 628, 521, 570, 567, 619, 621, 591, 554, 629, 629, 362, 603, 536, 535, 569, 619, 592, 629, 491, 457, 551, 539, 629, 563, 253, 254, 630, 567, 255, 581, 577, 545, 444, 423, 605, 570, 630, 429, 521, 605, 370, 630, 603, 436, 626, 256, 504, 631, 468, 257, 353, 631, 537, 491, 487, 631, 603, 448, 591, 560, 258, 593, 631, 573, 592, 535, 370, 570, 569, 631, 631, 627, 567, 577, 481, 493, 632, 620, 578, 573, 259, 518, 633, 410, 519, 598, 632, 588, 566, 633, 537, 461, 486, 621, 634, 517, 524, 262, 261, 581, 635, 630, 619, 478, 265, 264, 587, 583, 266, 636, 604, 522, 475, 619, 636, 606, 499, 410, 553, 637, 619, 267, 463, 517, 270, 269, 581, 632, 572, 613, 639, 639, 575, 503, 635, 465, 272, 323, 378, 630, 580, 635, 619, 517, 397, 615, 640, 520, 275, 274, 277, 278, 281, 280, 284, 283, 580, 389, 619, 516, 285, 626, 645, 591, 503, 619, 574, 632, 432, 579, 591, 619, 519, 647, 404, 423, 286, 474, 289, 288, 492, 519, 649, 570, 632, 290, 291, 573, 575, 592, 397, 384, 398, 629, 602, 619, 620, 613, 423, 436, 579, 575, 644, 603, 569, 566, 563, 654, 435, 292, 592, 655, 630, 566, 570, 549, 554, 436, 605, 655, 617, 508, 655, 626, 463, 535, 388, 446, 655, 630, 613, 388, 620, 503, 655, 384, 593, 655, 539, 635, 632, 655, 621, 616, 555, 519, 655, 629, 508, 567, 656, 591, 553, 631, 566, 317, 656, 571, 621, 619, 570, 517, 656, 627, 378, 655, 630, 637, 436, 498, 656, 293, 518, 554, 486, 491, 656, 555, 573, 294, 553, 656, 630, 537, 596, 410, 656, 386, 631, 468, 656, 493, 460, 581, 656, 559, 563, 390, 471, 556, 362, 656, 456, 620, 466, 635, 493, 656, 296, 297, 657, 630, 566, 620, 613, 298, 299, 619, 545, 464, 657, 384, 520, 656, 574, 619, 657, 580, 657, 620, 655, 636, 300, 456, 657, 602, 375, 437, 519, 301, 614, 538, 555, 657, 573, 657, 641, 498, 499, 656, 658, 478, 619, 592, 554, 302, 656, 630, 619, 555, 605, 303, 619, 545, 658, 631, 457, 516, 658, 638, 569, 468, 570, 593, 486, 658, 659, 619, 498, 634, 554, 631, 304, 659, 468, 629, 535, 579, 659, 629, 573, 591, 605, 583, 660, 628, 478, 606, 496, 582, 660, 629, 307, 306, 655, 570, 621, 631, 619, 594, 580, 631, 661, 622, 308, 468, 516, 599, 661, 634, 594, 591, 655, 661, 626, 657, 631, 620, 659, 575, 399, 398, 486, 662, 457, 660, 662, 567, 650, 446, 496, 640, 584, 478, 631, 640, 596, 547, 602, 557, 485, 665, 309, 657, 666, 488, 310, 478, 569, 655, 436, 666, 567, 632, 311, 535, 555, 657, 666, 445, 570, 619, 629, 545, 666, 632, 444, 666, 555, 572, 603, 492, 487, 667, 545, 667, 655, 545, 554, 621, 667, 569, 666, 633, 570, 603, 491, 667, 556, 604, 579, 627, 570, 667, 643, 668, 630, 655, 520, 389, 668, 468, 637, 312, 667, 584, 656, 566, 655, 592, 313, 591, 353, 619, 314, 634, 669, 629, 581, 315, 474, 553, 669, 659, 669, 615, 367, 488, 620, 447, 669, 655, 492, 458, 538, 536, 634, 631, 591, 569, 464, 658, 619, 553, 657, 597, 492, 367, 670, 445, 607, 670, 655, 397, 474, 671, 492, 593, 316, 486, 658, 629, 566, 605, 631, 479, 603, 437, 672, 661, 545, 672, 630, 646, 556, 468, 669, 499, 673, 457, 564, 318, 674, 515, 319, 569, 630, 445, 631, 675, 604, 675, 655, 666, 619, 491, 320, 675, 655, 576, 620, 666, 571, 602, 572, 675, 656, 321, 655, 569, 675, 567, 659, 571, 478, 675, 655, 602, 535, 569, 571, 675, 556, 675, 627, 322, 671, 574, 559, 538, 619, 423, 675, 553, 446, 657, 592, 660, 675, 546, 630, 481, 571, 675, 323, 667, 645, 630, 675, 593, 537, 582, 669, 626, 675, 593, 671, 675, 670, 423, 666, 566, 367, 605, 675, 570, 380, 477, 578, 675, 406, 620, 511, 675, 472, 572, 655, 554, 546, 675, 326, 325, 545, 676, 569, 675, 655, 676, 619, 555, 668, 675, 629, 621, 676, 554, 675, 327, 676, 620, 516, 444, 666, 626, 570, 536, 629, 676, 328, 675, 676, 554, 503, 655, 676, 675, 597, 463, 592, 492, 676, 627, 594, 620, 610, 676, 488, 370, 646, 330, 331, 677, 675, 332, 333, 456, 676, 555, 677, 411, 602, 628, 594, 354, 535, 522, 570, 334, 677, 676, 677, 566, 572, 655, 536, 630, 677, 520, 375, 566, 335, 457, 667, 657, 677, 634, 518, 430, 486, 677, 639, 337, 336, 569, 676, 656, 602, 621, 338, 633, 620, 619, 592, 631, 655, 629, 675, 632, 339, 602, 487, 678, 604, 577, 655, 340, 634, 638, 655, 678, 536, 678, 571, 659, 602, 629, 463, 602, 668, 586, 655, 668, 486, 620, 555, 518, 571, 629, 626, 668, 679, 344, 343, 537, 538, 629, 657, 518, 345, 680, 655, 603, 663, 595, 572, 634, 478, 346, 633, 655, 619, 633, 632, 655, 656, 517, 633, 657, 655, 675, 629, 347, 475, 458, 444, 535, 655, 545, 632, 675, 656, 683, 569, 621, 630, 571, 656, 569, 633, 619, 683, 349, 555, 683, 638, 569, 627, 592, 619, 656, 683, 570, 516, 683, 626, 620, 570, 535, 676, 553, 630, 683, 604, 566, 675, 352, 683, 569, 683, 621, 567, 389, 571, 668, 354, 569, 683, 517, 577, 621, 478, 538, 683, 539, 536, 683, 629, 572, 539, 632, 655, 683, 644, 636, 604, 628, 683, 655, 678, 675, 683, 631, 571, 630, 356, 629, 683, 621, 631, 553, 621, 683, 569, 573, 629, 459, 683, 498, 655, 614, 504, 683, 423, 676, 666, 385, 683, 456, 382, 572, 675, 444, 683, 591, 658, 408, 675, 487, 683, 668, 619, 629, 545, 655, 621, 675, 619, 655, 656, 676, 684, 602, 357, 605, 656, 545, 463, 656, 684, 629, 491, 684, 566, 516, 456, 436, 657, 658, 358, 684, 670, 630, 629, 626, 684, 642, 604, 572, 684, 675, 508, 629, 582, 684, 642, 569, 637, 626, 604, 684, 619, 576, 360, 361, 629, 675, 545, 656, 676, 676, 571, 620, 602, 656, 555, 630, 633, 629, 655, 675, 685, 630, 444, 629, 384, 444, 685, 572, 478, 685, 602, 508, 568, 619, 364, 655, 685, 657, 626, 574, 365, 685, 657, 555, 615, 594, 629, 379, 685, 425, 537, 397, 685, 643, 535, 592, 521, 619, 637, 431, 685, 570, 655, 492, 675, 369, 436, 371, 436, 686, 554, 445, 662, 616, 604, 686, 630, 372, 498, 636, 545, 373, 574, 676, 686, 655, 631, 619, 374, 686, 487, 518, 686, 554, 568, 683, 655, 667, 629, 675, 569, 631, 657, 632, 553, 633, 535, 619, 545, 592, 569, 383, 459, 571, 575, 545, 675, 659, 655, 684, 687, 683, 554, 546, 655, 687, 667, 632, 553, 595, 687, 458, 536, 619, 602, 535, 629, 387, 554, 591, 585, 571, 632, 630, 619, 655, 554, 572, 393, 619, 655, 675, 535, 689, 689, 569, 667, 675, 602, 655, 675, 619, 676, 569, 676, 407, 566, 655, 689, 633, 518, 487, 577, 409, 689, 628, 689, 655, 602, 571, 537, 595, 656, 689, 486, 602, 676, 655, 413, 634, 545, 689, 574, 657, 444, 689, 604, 416, 689, 683, 498, 419, 667, 417, 420, 487, 689, 553, 603, 436, 689, 655, 629, 656, 685, 421, 422, 583, 689, 602, 516, 658, 684, 689, 566, 633, 591, 689, 423, 592, 619, 656, 486, 425, 657, 554, 689, 630, 567, 536, 655, 689, 632, 627, 689, 427, 566, 655, 689, 675, 508, 632, 469, 655, 553, 619, 689, 570, 629, 545, 689, 429, 578, 430, 431, 621, 689, 553, 553, 432, 689, 579, 629, 435, 434, 545, 632, 656, 667, 629, 455, 571, 675, 566, 629, 619, 676, 553, 690, 478, 464, 573, 619, 633, 553, 683, 602, 473, 676, 555, 690, 620, 474, 603, 571, 569, 690, 574, 666, 569, 690, 478, 565, 620, 690, 666, 667, 482, 497, 620, 481, 690, 484, 485, 675, 545, 629, 619, 632, 655, 604, 571, 576, 676, 498, 578, 675, 691, 603, 503, 504, 691, 655, 689, 689, 677, 691, 603, 557, 691, 602, 641, 508, 538, 512, 574, 553, 632, 591, 691, 514, 515, 655, 629, 545, 534, 656, 667, 544, 629, 655, 569, 619, 675, 585, 545, 678, 592, 613, 676, 633, 555, 553, 554, 692, 562, 579, 559, 675, 560, 692, 567, 666, 570, 563, 675, 619, 602, 565, 655, 692, 567, 619, 568, 566, 656, 569, 655, 675, 676, 591, 655, 601, 656, 668, 592, 690, 602, 603, 604, 619, 689, 619, 675, 613, 685, 619, 620, 621, 693, 622, 685, 693, 626, 627, 628, 629, 633, 654, 656, 632, 655, 655, 676, 665, 659, 657, 656, 684, 683, 666, 667, 668, 674, 675, 677, 678, 681, 676, 684, 689, 686, 683, 685, 691, 689, 692, 693, 697, 690, 696, 698, 699, 695, 697, 699], "dsts": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 41, 42, 44, 45, 45, 46, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 59, 60, 62, 63, 64, 64, 65, 67, 67, 68, 70, 70, 71, 73, 74, 74, 75, 77, 78, 79, 80, 80, 81, 83, 83, 84, 86, 87, 88, 89, 90, 91, 92, 92, 93, 95, 95, 96, 98, 99, 99, 100, 102, 102, 103, 105, 105, 106, 108, 108, 109, 111, 112, 112, 113, 115, 115, 116, 118, 118, 119, 121, 121, 122, 124, 124, 125, 127, 127, 128, 130, 131, 131, 132, 134, 134, 135, 137, 137, 138, 140, 140, 140, 141, 143, 143, 144, 146, 146, 147, 149, 149, 150, 152, 152, 153, 156, 156, 157, 157, 158, 160, 160, 161, 163, 163, 164, 166, 167, 167, 168, 170, 170, 171, 173, 173, 174, 176, 177, 177, 178, 180, 180, 181, 183, 183, 184, 186, 187, 187, 188, 190, 190, 191, 193, 193, 195, 196, 197, 197, 198, 200, 200, 202, 202, 203, 205, 205, 206, 206, 207, 209, 209, 210, 210, 211, 213, 213, 215, 216, 216, 217, 219, 219, 220, 222, 224, 225, 225, 226, 228, 228, 229, 231, 231, 232, 233, 233, 234, 236, 236, 237, 239, 239, 240, 242, 242, 243, 245, 245, 246, 248, 248, 249, 249, 250, 252, 252, 253, 255, 255, 256, 256, 257, 257, 258, 258, 259, 259, 260, 260, 261, 262, 263, 263, 264, 264, 266, 266, 267, 267, 268, 268, 269, 271, 271, 272, 273, 273, 273, 274, 276, 276, 277, 279, 279, 280, 282, 282, 283, 285, 285, 285, 286, 286, 287, 287, 288, 290, 290, 291, 291, 292, 293, 293, 294, 294, 295, 295, 296, 298, 298, 299, 299, 300, 300, 302, 302, 303, 303, 304, 304, 305, 305, 306, 308, 308, 309, 310, 310, 311, 311, 312, 312, 313, 313, 314, 314, 315, 315, 316, 316, 317, 317, 318, 319, 320, 320, 321, 321, 322, 322, 324, 324, 325, 327, 327, 328, 328, 329, 329, 330, 332, 332, 333, 333, 334, 334, 335, 335, 336, 336, 337, 338, 338, 339, 339, 340, 340, 341, 341, 342, 342, 343, 345, 345, 346, 346, 347, 347, 348, 348, 348, 348, 348, 348, 349, 349, 350, 350, 350, 350, 350, 350, 351, 351, 351, 351, 351, 352, 352, 353, 353, 353, 353, 353, 354, 354, 355, 355, 355, 355, 355, 356, 356, 357, 357, 358, 358, 359, 359, 360, 362, 362, 362, 362, 362, 363, 363, 363, 363, 363, 364, 364, 365, 365, 366, 366, 366, 366, 366, 367, 367, 367, 367, 367, 368, 368, 368, 368, 368, 369, 369, 370, 370, 370, 370, 370, 371, 371, 372, 372, 373, 373, 374, 374, 375, 375, 375, 375, 375, 375, 376, 376, 376, 376, 376, 377, 377, 377, 377, 377, 378, 378, 378, 378, 378, 378, 379, 379, 379, 379, 379, 380, 380, 380, 380, 380, 381, 381, 381, 381, 381, 382, 382, 382, 382, 382, 383, 383, 384, 384, 384, 384, 384, 385, 385, 385, 385, 385, 386, 386, 386, 386, 386, 386, 387, 387, 388, 388, 388, 388, 388, 389, 389, 389, 389, 389, 390, 390, 390, 390, 390, 391, 391, 391, 391, 391, 392, 392, 392, 392, 392, 393, 393, 394, 394, 394, 394, 394, 394, 395, 395, 395, 395, 395, 396, 396, 396, 396, 396, 397, 397, 397, 397, 397, 397, 398, 398, 398, 398, 398, 398, 399, 399, 399, 399, 399, 400, 400, 400, 400, 400, 400, 401, 401, 401, 401, 401, 402, 402, 402, 402, 402, 403, 403, 403, 403, 403, 404, 404, 404, 404, 404, 404, 404, 405, 405, 405, 405, 405, 406, 406, 406, 406, 406, 407, 407, 408, 408, 408, 408, 408, 409, 409, 410, 410, 410, 410, 410, 410, 411, 411, 411, 411, 411, 412, 412, 412, 412, 412, 412, 413, 413, 413, 414, 414, 414, 414, 414, 415, 415, 415, 415, 415, 416, 416, 417, 417, 417, 417, 417, 418, 418, 418, 418, 418, 419, 419, 420, 420, 421, 421, 422, 422, 423, 423, 423, 423, 423, 423, 424, 424, 424, 424, 424, 425, 425, 425, 425, 425, 426, 426, 426, 426, 426, 426, 427, 427, 428, 428, 428, 428, 428, 428, 429, 429, 429, 429, 429, 429, 430, 430, 430, 430, 430, 431, 431, 431, 431, 431, 432, 432, 432, 432, 432, 433, 433, 434, 436, 436, 436, 436, 436, 437, 437, 437, 437, 437, 438, 438, 438, 438, 438, 439, 439, 439, 439, 439, 440, 440, 440, 440, 440, 441, 441, 441, 441, 441, 442, 442, 442, 442, 442, 443, 443, 443, 443, 443, 444, 444, 444, 444, 444, 445, 445, 445, 445, 445, 445, 446, 446, 446, 446, 446, 447, 447, 447, 447, 447, 447, 448, 448, 448, 448, 448, 449, 449, 449, 449, 449, 449, 450, 450, 450, 450, 450, 451, 451, 451, 451, 451, 452, 452, 452, 452, 452, 453, 453, 453, 453, 453, 454, 454, 454, 454, 454, 455, 455, 456, 456, 456, 456, 456, 456, 457, 457, 457, 457, 457, 457, 458, 458, 458, 458, 458, 459, 459, 459, 459, 459, 460, 460, 460, 460, 460, 460, 461, 461, 461, 461, 461, 461, 462, 462, 462, 462, 462, 463, 463, 463, 463, 463, 464, 464, 464, 464, 464, 464, 465, 465, 465, 465, 465, 465, 466, 466, 466, 466, 466, 467, 467, 467, 467, 467, 468, 468, 468, 468, 468, 469, 469, 469, 469, 469, 469, 470, 470, 470, 470, 470, 471, 471, 471, 471, 471, 472, 472, 472, 472, 472, 473, 473, 474, 474, 474, 474, 474, 475, 475, 475, 475, 475, 476, 476, 476, 476, 476, 476, 477, 477, 477, 477, 477, 478, 478, 478, 478, 478, 479, 479, 479, 479, 479, 480, 480, 480, 480, 480, 481, 481, 481, 481, 481, 482, 482, 482, 482, 482, 483, 483, 484, 486, 486, 486, 486, 486, 486, 487, 487, 487, 487, 487, 487, 488, 488, 488, 488, 488, 489, 489, 489, 489, 489, 490, 490, 490, 490, 490, 491, 491, 491, 491, 491, 492, 492, 492, 492, 492, 493, 493, 493, 493, 493, 494, 494, 494, 494, 494, 495, 495, 495, 495, 495, 496, 496, 496, 496, 496, 497, 497, 497, 497, 497, 497, 498, 498, 498, 498, 498, 498, 499, 499, 499, 499, 499, 499, 500, 500, 500, 500, 500, 500, 501, 501, 501, 501, 501, 502, 502, 502, 502, 502, 503, 503, 503, 503, 503, 504, 504, 504, 504, 504, 505, 505, 505, 505, 505, 506, 506, 506, 506, 506, 507, 507, 507, 507, 507, 507, 508, 508, 508, 508, 508, 509, 509, 509, 509, 509, 510, 510, 510, 510, 510, 511, 511, 511, 511, 511, 511, 512, 513, 513, 514, 516, 516, 516, 516, 516, 517, 517, 517, 517, 517, 518, 518, 518, 518, 518, 518, 519, 519, 519, 519, 519, 520, 520, 520, 520, 520, 520, 521, 521, 521, 521, 521, 522, 522, 522, 522, 522, 523, 523, 523, 523, 523, 523, 523, 524, 524, 524, 524, 524, 525, 525, 525, 525, 525, 526, 526, 526, 526, 526, 526, 527, 527, 527, 527, 527, 528, 528, 528, 528, 528, 529, 529, 529, 529, 529, 530, 530, 530, 530, 530, 531, 531, 531, 531, 531, 532, 532, 532, 532, 532, 533, 533, 533, 533, 533, 534, 534, 535, 535, 535, 535, 535, 536, 536, 536, 536, 536, 537, 537, 537, 537, 537, 537, 538, 538, 538, 538, 538, 539, 539, 539, 539, 539, 539, 540, 540, 540, 540, 540, 541, 541, 541, 541, 541, 542, 542, 542, 542, 542, 543, 543, 543, 543, 543, 544, 544, 546, 546, 546, 546, 546, 546, 546, 547, 547, 547, 547, 547, 547, 548, 548, 548, 548, 548, 548, 549, 549, 549, 549, 549, 550, 550, 550, 550, 550, 550, 551, 551, 551, 551, 551, 552, 552, 552, 552, 552, 553, 553, 553, 553, 553, 553, 553, 554, 554, 554, 554, 554, 554, 555, 555, 555, 555, 555, 555, 556, 556, 556, 556, 556, 557, 557, 557, 557, 557, 557, 558, 558, 558, 558, 558, 559, 559, 559, 559, 559, 560, 560, 560, 560, 560, 561, 561, 561, 561, 561, 562, 562, 563, 563, 563, 563, 563, 563, 564, 564, 564, 564, 564, 565, 565, 565, 565, 565, 565, 566, 566, 566, 566, 566, 567, 567, 567, 567, 567, 568, 568, 568, 568, 568, 568, 569, 569, 569, 569, 569, 570, 570, 570, 570, 570, 571, 571, 571, 571, 571, 571, 572, 572, 572, 572, 572, 573, 573, 573, 573, 573, 574, 574, 574, 574, 574, 575, 575, 575, 575, 575, 576, 576, 576, 576, 576, 576, 577, 577, 577, 577, 577, 578, 578, 578, 578, 578, 578, 579, 579, 579, 579, 579, 580, 580, 580, 580, 580, 581, 581, 581, 581, 581, 582, 582, 582, 582, 582, 583, 583, 583, 583, 583, 584, 584, 584, 584, 584, 584, 585, 585, 585, 585, 585, 586, 586, 586, 586, 586, 587, 587, 587, 587, 587, 588, 588, 588, 588, 588, 589, 589, 589, 589, 589, 590, 590, 590, 590, 590, 591, 591, 591, 591, 591, 592, 592, 592, 592, 592, 593, 593, 593, 593, 593, 593, 594, 594, 594, 594, 594, 595, 595, 595, 595, 595, 596, 596, 596, 596, 596, 596, 597, 597, 597, 597, 597, 598, 598, 598, 598, 598, 599, 599, 599, 599, 599, 600, 600, 600, 600, 600, 601, 601, 602, 602, 602, 602, 602, 603, 603, 603, 603, 603, 604, 604, 604, 604, 604, 605, 605, 605, 605, 605, 606, 606, 606, 606, 606, 607, 607, 607, 607, 607, 607, 608, 608, 608, 608, 608, 608, 609, 609, 609, 609, 609, 610, 610, 610, 610, 610, 611, 611, 611, 611, 611, 612, 612, 612, 612, 612, 613, 613, 613, 613, 613, 613, 614, 614, 614, 614, 614, 614, 615, 615, 615, 615, 615, 615, 616, 616, 616, 616, 616, 616, 617, 617, 617, 617, 617, 617, 618, 618, 618, 618, 618, 619, 619, 619, 619, 619, 620, 620, 620, 620, 620, 621, 621, 621, 621, 621, 621, 622, 622, 622, 622, 622, 623, 623, 623, 623, 623, 624, 624, 624, 624, 624, 625, 625, 625, 625, 625, 626, 626, 626, 626, 626, 627, 627, 627, 627, 627, 627, 628, 628, 628, 628, 628, 629, 630, 630, 630, 630, 630, 631, 631, 631, 631, 631, 633, 633, 633, 633, 633, 634, 634, 634, 634, 634, 634, 635, 635, 635, 635, 635, 635, 636, 636, 636, 636, 636, 637, 637, 637, 637, 637, 638, 638, 638, 638, 638, 638, 639, 639, 639, 639, 639, 639, 640, 640, 640, 640, 640, 640, 641, 641, 641, 641, 641, 641, 642, 642, 642, 642, 642, 642, 643, 643, 643, 643, 643, 643, 644, 644, 644, 644, 644, 645, 645, 645, 645, 645, 646, 646, 646, 646, 646, 647, 647, 647, 647, 647, 648, 648, 648, 648, 648, 648, 649, 649, 649, 649, 649, 650, 650, 650, 650, 650, 651, 651, 651, 651, 651, 652, 652, 652, 652, 652, 653, 653, 653, 653, 653, 654, 654, 655, 655, 655, 655, 655, 656, 657, 657, 657, 657, 657, 658, 658, 658, 658, 658, 659, 659, 659, 659, 659, 660, 660, 660, 660, 660, 660, 661, 661, 661, 661, 661, 662, 662, 662, 662, 662, 663, 663, 663, 663, 663, 664, 664, 664, 664, 664, 665, 665, 666, 666, 666, 666, 666, 668, 668, 668, 668, 668, 669, 669, 669, 669, 669, 670, 670, 670, 670, 670, 671, 671, 671, 671, 671, 672, 672, 672, 672, 672, 673, 673, 673, 673, 673, 673, 674, 674, 675, 675, 675, 675, 675, 675, 676, 676, 676, 676, 676, 676, 677, 677, 677, 677, 677, 678, 678, 678, 678, 678, 679, 679, 679, 679, 679, 679, 680, 680, 680, 680, 680, 681, 681, 681, 681, 681, 682, 682, 682, 682, 682, 683, 683, 683, 683, 683, 684, 684, 684, 684, 684, 684, 685, 685, 685, 685, 685, 686, 686, 686, 686, 686, 687, 687, 687, 687, 687, 688, 688, 688, 688, 688, 689, 689, 689, 689, 689, 689, 690, 690, 690, 690, 690, 690, 691, 691, 691, 691, 691, 691, 692, 692, 692, 692, 692, 693, 693, 693, 693, 693, 694, 694, 694, 694, 694, 694, 695, 696, 697, 697, 698, 698], "num_nodes": 700, "nlabels": [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 3, 2, 1, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 3, 2, 2, 1, 2, 3, 1, 2, 3, 1, 2, 3, 2, 1, 2, 3, 2, 2, 2, 1, 2, 3, 1, 2, 3, 2, 2, 2, 2, 2, 2, 1, 2, 3, 1, 2, 3, 2, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 2, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 2, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 3, 1, 1, 2, 3, 1, 2, 3, 1, 2, 3, 2, 1, 2, 3, 1, 2, 3, 1, 2, 3, 2, 1, 2, 3, 1, 2, 3, 1, 2, 3, 2, 1, 2, 3, 1, 2, 3, 1, 3, 2, 2, 1, 2, 3, 1, 3, 1, 2, 3, 1, 1, 2, 3, 1, 1, 2, 3, 1, 3, 2, 1, 2, 3, 1, 2, 3, 2, 3, 2, 1, 2, 3, 1, 2, 3, 1, 2, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 1, 2, 3, 1, 2, 3, 1, 1, 1, 2, 1, 1, 3, 2, 1, 2, 3, 1, 1, 1, 2, 3, 1, 2, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 1, 1, 2, 3, 1, 1, 2, 1, 1, 1, 2, 3, 1, 1, 1, 3, 1, 1, 1, 1, 2, 3, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 3, 1, 2, 3, 1, 1, 1, 2, 3, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 2, 3, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 2, 3, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 1, 1, 3], "num_nlabel_types": 4, "eweights": {}}, "success": true} ================================================ FILE: examples/pytorch/gnn_explainer/gnn_subgraph/1/model_list.json ================================================ {"models": [], "success": true} ================================================ FILE: examples/pytorch/gnn_explainer/gnn_subgraph/1/subgraph_1.json ================================================ {"name": "GNNExplainer", "success": true, "node_subgraphs": {"0": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "1": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "2": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "3": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "4": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "5": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "6": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "7": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "8": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "9": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "10": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "11": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "12": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "13": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "14": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "15": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "16": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "17": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "18": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "19": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "20": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "21": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "22": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "23": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "24": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "25": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "26": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "27": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "28": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "29": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "30": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "31": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "32": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "33": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "34": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "35": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "36": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "37": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "38": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "39": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "40": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "41": {"nodes": [41, 42, 43, 205], "nweight": [1.0, 1.0, 1.0, 1.0], "eids": [41, 42, 206, 207], "eweight": [0.7634373307228088, 0.5627130270004272, 0.5729275941848755, 0.3973456621170044]}, "42": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "43": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "44": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "45": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "46": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "47": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "48": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "49": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "50": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "51": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "52": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "53": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "54": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "55": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "56": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "57": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "58": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "59": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "60": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "61": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "62": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "63": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "64": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "65": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "66": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "67": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "68": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "69": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "70": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "71": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "72": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "73": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "74": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "75": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "76": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "77": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "78": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "79": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "80": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "81": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "82": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "83": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "84": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "85": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "86": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "87": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "88": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "89": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "90": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "91": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "92": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "93": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "94": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "95": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "96": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "97": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "98": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "99": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "100": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "101": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "102": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "103": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "104": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "105": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "106": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "107": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "108": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "109": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "110": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "111": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "112": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "113": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "114": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "115": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "116": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "117": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "118": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "119": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "120": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "121": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "122": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "123": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "124": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "125": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "126": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "127": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "128": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "129": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "130": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "131": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "132": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "133": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "134": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "135": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "136": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "137": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "138": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "139": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "140": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "141": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "142": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "143": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "144": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "145": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "146": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "147": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "148": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "149": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "150": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "151": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "152": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "153": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "154": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "155": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "156": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "157": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "158": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "159": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "160": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "161": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "162": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "163": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "164": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "165": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "166": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "167": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "168": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "169": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "170": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "171": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "172": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "173": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "174": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "175": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "176": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "177": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "178": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "179": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "180": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "181": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "182": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "183": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "184": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "185": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "186": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "187": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "188": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "189": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "190": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "191": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "192": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "193": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "194": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "195": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "196": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "197": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "198": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "199": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "200": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "201": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "202": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "203": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "204": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "205": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "206": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "207": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "208": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "209": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "210": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "211": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "212": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "213": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "214": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "215": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "216": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "217": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "218": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "219": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "220": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "221": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "222": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "223": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "224": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "225": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "226": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "227": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "228": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "229": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "230": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "231": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "232": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "233": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "234": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "235": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "236": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "237": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "238": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "239": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "240": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "241": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "242": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "243": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "244": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "245": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "246": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "247": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "248": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "249": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "250": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "251": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "252": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "253": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "254": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "255": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "256": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "257": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "258": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "259": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "260": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "261": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "262": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "263": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "264": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "265": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "266": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "267": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "268": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "269": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "270": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "271": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "272": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "273": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "274": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "275": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "276": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "277": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "278": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "279": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "280": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "281": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "282": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "283": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "284": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "285": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "286": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "287": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "288": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "289": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "290": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "291": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "292": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "293": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "294": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "295": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "296": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "297": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "298": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "299": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "300": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "301": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "302": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "303": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "304": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "305": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "306": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "307": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "308": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "309": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "310": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "311": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "312": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "313": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "314": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "315": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "316": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "317": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "318": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "319": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "320": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "321": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "322": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "323": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "324": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "325": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "326": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "327": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "328": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "329": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "330": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "331": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "332": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "333": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "334": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "335": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "336": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "337": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "338": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "339": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "340": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "341": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "342": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "343": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "344": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "345": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "346": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "347": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "348": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "349": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "350": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "351": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "352": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "353": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "354": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "355": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "356": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "357": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "358": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "359": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "360": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "361": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "362": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "363": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "364": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "365": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "366": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "367": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "368": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "369": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "370": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "371": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "372": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "373": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "374": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "375": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "376": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "377": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "378": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "379": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "380": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "381": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "382": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "383": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "384": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "385": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "386": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "387": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "388": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "389": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "390": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "391": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "392": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "393": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "394": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "395": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "396": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "397": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "398": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "399": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "400": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "401": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "402": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "403": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "404": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "405": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "406": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "407": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "408": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "409": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "410": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "411": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "412": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "413": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "414": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "415": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "416": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "417": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "418": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "419": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "420": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "421": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "422": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "423": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "424": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "425": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "426": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "427": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "428": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "429": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "430": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "431": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "432": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "433": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "434": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "435": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "436": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "437": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "438": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "439": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "440": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "441": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "442": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "443": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "444": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "445": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "446": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "447": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "448": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "449": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "450": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "451": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "452": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "453": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "454": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "455": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "456": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "457": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "458": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "459": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "460": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "461": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "462": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "463": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "464": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "465": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "466": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "467": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "468": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "469": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "470": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "471": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "472": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "473": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "474": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "475": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "476": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "477": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "478": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "479": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "480": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "481": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "482": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "483": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "484": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "485": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "486": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "487": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "488": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "489": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "490": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "491": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "492": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "493": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "494": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "495": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "496": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "497": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "498": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "499": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "500": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "501": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "502": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "503": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "504": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "505": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "506": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "507": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "508": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "509": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "510": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "511": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "512": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "513": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "514": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "515": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "516": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "517": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "518": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "519": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "520": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "521": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "522": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "523": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "524": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "525": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "526": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "527": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "528": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "529": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "530": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "531": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "532": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "533": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "534": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "535": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "536": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "537": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "538": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "539": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "540": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "541": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "542": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "543": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "544": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "545": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "546": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "547": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "548": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "549": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "550": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "551": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "552": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "553": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "554": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "555": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "556": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "557": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "558": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "559": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "560": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "561": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "562": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "563": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "564": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "565": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "566": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "567": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "568": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "569": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "570": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "571": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "572": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "573": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "574": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "575": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "576": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "577": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "578": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "579": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "580": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "581": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "582": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "583": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "584": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "585": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "586": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "587": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "588": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "589": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "590": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "591": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "592": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "593": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "594": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "595": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "596": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "597": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "598": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "599": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "600": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "601": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "602": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "603": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "604": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "605": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "606": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "607": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "608": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "609": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "610": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "611": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "612": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "613": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "614": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "615": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "616": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "617": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "618": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "619": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "620": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "621": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "622": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "623": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "624": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "625": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "626": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "627": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "628": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "629": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "630": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "631": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "632": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "633": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "634": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "635": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "636": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "637": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "638": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "639": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "640": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "641": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "642": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "643": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "644": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "645": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "646": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "647": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "648": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "649": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "650": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "651": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "652": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "653": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "654": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "655": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "656": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "657": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "658": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "659": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "660": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "661": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "662": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "663": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "664": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "665": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "666": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "667": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "668": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "669": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "670": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "671": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "672": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "673": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "674": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "675": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "676": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "677": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "678": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "679": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "680": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "681": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "682": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "683": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "684": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "685": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "686": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "687": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "688": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "689": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "690": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "691": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "692": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "693": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "694": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "695": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "696": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "697": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "698": {"nodes": [], "nweight": [], "eids": [], "eweight": []}, "699": {"nodes": [], "nweight": [], "eids": [], "eweight": []}}} ================================================ FILE: examples/pytorch/gnn_explainer/gnn_subgraph/1/subgraph_list.json ================================================ {"subgraphs": [{"id": 1, "name": "GNNExplainer"}], "success": true} ================================================ FILE: examples/pytorch/gnn_explainer/gnn_subgraph/dataset_list.json ================================================ {"datasets": [{"id": 1, "name": "BAShape"}], "success": true} ================================================ FILE: examples/pytorch/gnn_explainer/models.py ================================================ import dgl.function as fn import torch as th import torch.nn as nn import torch.nn.functional as F class Layer(nn.Module): def __init__(self, in_dim, out_dim): super().__init__() self.layer = nn.Linear(in_dim * 2, out_dim, bias=True) def forward(self, graph, feat, eweight=None): with graph.local_scope(): graph.ndata["h"] = feat if eweight is None: graph.update_all(fn.copy_u("h", "m"), fn.mean("m", "h")) else: graph.edata["ew"] = eweight graph.update_all(fn.u_mul_e("h", "ew", "m"), fn.mean("m", "h")) h = self.layer(th.cat([graph.ndata["h"], feat], dim=-1)) return h class Model(nn.Module): def __init__(self, in_dim, out_dim, hid_dim=40): super().__init__() self.in_layer = Layer(in_dim, hid_dim) self.hid_layer = Layer(hid_dim, hid_dim) self.out_layer = Layer(hid_dim, out_dim) def forward(self, graph, feat, eweight=None): h = self.in_layer(graph, feat.float(), eweight) h = F.relu(h) h = self.hid_layer(graph, h, eweight) h = F.relu(h) h = self.out_layer(graph, h, eweight) return h ================================================ FILE: examples/pytorch/gnn_explainer/train_main.py ================================================ import argparse import os import torch as th import torch.nn as nn from dgl import save_graphs from dgl.data import ( BACommunityDataset, BAShapeDataset, TreeCycleDataset, TreeGridDataset, ) from models import Model def main(args): if args.dataset == "BAShape": dataset = BAShapeDataset(seed=0) elif args.dataset == "BACommunity": dataset = BACommunityDataset(seed=0) elif args.dataset == "TreeCycle": dataset = TreeCycleDataset(seed=0) elif args.dataset == "TreeGrid": dataset = TreeGridDataset(seed=0) graph = dataset[0] labels = graph.ndata["label"] n_feats = graph.ndata["feat"] num_classes = dataset.num_classes model = Model(n_feats.shape[-1], num_classes) loss_fn = nn.CrossEntropyLoss() optim = th.optim.Adam(model.parameters(), lr=0.001) for epoch in range(500): model.train() # For demo purpose, we train the model on all datapoints # In practice, you should train only on the training datapoints logits = model(graph, n_feats) loss = loss_fn(logits, labels) acc = th.sum(logits.argmax(dim=1) == labels).item() / len(labels) optim.zero_grad() loss.backward() optim.step() print(f"In Epoch: {epoch}; Acc: {acc}; Loss: {loss.item()}") model_stat_dict = model.state_dict() model_path = os.path.join("./", f"model_{args.dataset}.pth") th.save(model_stat_dict, model_path) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Dummy model training") parser.add_argument( "--dataset", type=str, default="BAShape", choices=["BAShape", "BACommunity", "TreeCycle", "TreeGrid"], ) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/pytorch/grace/README.md ================================================ # DGL Implementation of GRACE This DGL example implements the model proposed in the paper [Deep Graph Contrastive Representation Learning](https://arxiv.org/abs/2006.04131). Author's code: https://github.com/CRIPAC-DIG/GRACE ## Example Implementor This example was implemented by [Hengrui Zhang](https://github.com/hengruizhang98) when he was an applied scientist intern at AWS Shanghai AI Lab. ## Dependencies - Python 3.7 - PyTorch 1.7.1 - dgl 0.6.0 - scikit-learn 0.22.1 ## Datasets ##### Unsupervised Node Classification Datasets: 'Cora', 'Citeseer' and 'Pubmed' | Dataset | # Nodes | # Edges | # Classes | | -------- | ------- | ------- | --------- | | Cora | 2,708 | 10,556 | 7 | | Citeseer | 3,327 | 9,228 | 6 | | Pubmed | 19,717 | 88,651 | 3 | ## Arguments ``` --dataname str The graph dataset name. Default is 'cora'. --gpu int GPU index. Default is 0. --split int Dataset spliting method. Default is 'random'. --epochs int Number of training periods. Default is 500. --lr float Learning rate. Default is 0.001. --wd float Weight decay. Default is 1e-5. --temp float Temperature. Default is 1.0. --act_fn str Activation function. Default is relu. --hid_dim int Hidden dimension. Default is 256. --out_dim int Output dimension. Default is 256. --num_layers int Number of GNN layers. Default is 2. --der1 float Drop edge ratio 1. Default is 0.2. --der2 float Drop edge ratio 2. Default is 0.2. --dfr1 float Drop feature ratio 1. Default is 0.2. --dfr2 float Drop feature ratio 2. Default is 0.2. ``` ## How to run examples In the paper(as well as authors' repo), the training set and testing set are split randomly with 1:9 ratio. In order to fairly compare it with other methods with the public split (20 training nodes each class), in this repo we also provide its results using the public split (with fine-tuned hyper-parameters). To run the examples, follow the following instructions. ```python # Cora with random split python main.py --dataname cora --epochs 200 --lr 5e-4 --wd 1e-5 --hid_dim 128 --out_dim 128 --act_fn relu --der1 0.2 --der2 0.4 --dfr1 0.3 --dfr2 0.4 --temp 0.4 # Cora with public split python main.py --dataname cora --split public --epochs 400 --lr 5e-4 --wd 1e-5 --hid_dim 256 --out_dim 256 --act_fn relu --der1 0.3 --der2 0.4 --dfr1 0.3 --dfr2 0.4 --temp 0.4 # Citeseer with random split python main.py --dataname citeseer --epochs 200 --lr 1e-3 --wd 1e-5 --hid_dim 256 --out_dim 256 --act_fn prelu --der1 0.2 --der2 0.0 --dfr1 0.3 --dfr2 0.2 --temp 0.9 # Citeseer with public split python main.py --dataname citeseer --split public --epochs 100 --lr 1e-3 --wd 1e-5 --hid_dim 512 --out_dim 512 --act_fn prelu --der1 0.3 --der2 0.3 --dfr1 0.3 --dfr2 0.3 --temp 0.4 # Pubmed with random split python main.py --dataname pubmed --epochs 1500 --lr 1e-3 --wd 1e-5 --hid_dim 256 --out_dim 256 --act_fn relu --der1 0.4 --der2 0.1 --dfr1 0.0 --dfr2 0.2 --temp 0.7 # Pubmed with public split python main.py --dataname pubmed --split public --epochs 1500 --lr 1e-3 --wd 1e-5 --hid_dim 256 --out_dim 256 --act_fn relu --der1 0.4 --der2 0.1 --dfr1 0.0 --dfr2 0.2 --temp 0.7 ``` ## Performance For random split, we use the hyper-parameters as stated in the paper. For public split, we find the given hyper-parameters lead to poor performance, so we select the hyperparameters via a small grid search. Random split (Train/Test = 1:9) | Dataset | Cora | Citeseer | Pubmed | | :---------------: | :--: | :------: | :----: | | Accuracy Reported | 83.3 | 72.1 | 86.7 | | Author's Code | 83.1 | 71.0 | 86.3 | | DGL | 83.4 | 71.4 | 86.1 | Public split | Dataset | Cora | Citeseer | Pubmed | | :-----------: | :--: | :------: | :----: | | Author's Code | 81.9 | 71.2 | 80.6 | | DGL | 82.2 | 71.4 | 80.2 | ================================================ FILE: examples/pytorch/grace/aug.py ================================================ # Data augmentation on graphs via edge dropping and feature masking import dgl import numpy as np import torch as th def aug(graph, x, feat_drop_rate, edge_mask_rate): n_node = graph.num_nodes() edge_mask = mask_edge(graph, edge_mask_rate) feat = drop_feature(x, feat_drop_rate) src = graph.edges()[0] dst = graph.edges()[1] nsrc = src[edge_mask] ndst = dst[edge_mask] ng = dgl.graph((nsrc, ndst), num_nodes=n_node) ng = ng.add_self_loop() return ng, feat def drop_feature(x, drop_prob): drop_mask = ( th.empty((x.size(1),), dtype=th.float32, device=x.device).uniform_(0, 1) < drop_prob ) x = x.clone() x[:, drop_mask] = 0 return x def mask_edge(graph, mask_prob): E = graph.num_edges() mask_rates = th.FloatTensor(np.ones(E) * mask_prob) masks = th.bernoulli(1 - mask_rates) mask_idx = masks.nonzero().squeeze(1) return mask_idx ================================================ FILE: examples/pytorch/grace/dataset.py ================================================ from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset def load(name): if name == "cora": dataset = CoraGraphDataset() elif name == "citeseer": dataset = CiteseerGraphDataset() elif name == "pubmed": dataset = PubmedGraphDataset() graph = dataset[0] train_mask = graph.ndata.pop("train_mask") test_mask = graph.ndata.pop("test_mask") feat = graph.ndata.pop("feat") labels = graph.ndata.pop("label") return graph, feat, labels, train_mask, test_mask ================================================ FILE: examples/pytorch/grace/eval.py ================================================ """ Code adapted from https://github.com/CRIPAC-DIG/GRACE Linear evaluation on learned node embeddings """ import functools import numpy as np from sklearn.linear_model import LogisticRegression from sklearn.metrics import f1_score from sklearn.model_selection import GridSearchCV, train_test_split from sklearn.multiclass import OneVsRestClassifier from sklearn.preprocessing import normalize, OneHotEncoder def repeat(n_times): def decorator(f): @functools.wraps(f) def wrapper(*args, **kwargs): results = [f(*args, **kwargs) for _ in range(n_times)] statistics = {} for key in results[0].keys(): values = [r[key] for r in results] statistics[key] = { "mean": np.mean(values), "std": np.std(values), } print_statistics(statistics, f.__name__) return statistics return wrapper return decorator def prob_to_one_hot(y_pred): ret = np.zeros(y_pred.shape, np.bool_) indices = np.argmax(y_pred, axis=1) for i in range(y_pred.shape[0]): ret[i][indices[i]] = True return ret def print_statistics(statistics, function_name): print(f"(E) | {function_name}:", end=" ") for i, key in enumerate(statistics.keys()): mean = statistics[key]["mean"] std = statistics[key]["std"] print(f"{key}={mean:.4f}+-{std:.4f}", end="") if i != len(statistics.keys()) - 1: print(",", end=" ") else: print() @repeat(3) def label_classification( embeddings, y, train_mask, test_mask, split="random", ratio=0.1 ): X = embeddings.detach().cpu().numpy() Y = y.detach().cpu().numpy() Y = Y.reshape(-1, 1) onehot_encoder = OneHotEncoder(categories="auto").fit(Y) Y = onehot_encoder.transform(Y).toarray().astype(np.bool_) X = normalize(X, norm="l2") if split == "random": X_train, X_test, y_train, y_test = train_test_split( X, Y, test_size=1 - ratio ) elif split == "public": X_train = X[train_mask] X_test = X[test_mask] y_train = Y[train_mask] y_test = Y[test_mask] logreg = LogisticRegression(solver="liblinear") c = 2.0 ** np.arange(-10, 10) clf = GridSearchCV( estimator=OneVsRestClassifier(logreg), param_grid=dict(estimator__C=c), n_jobs=8, cv=5, verbose=0, ) clf.fit(X_train, y_train) y_pred = clf.predict_proba(X_test) y_pred = prob_to_one_hot(y_pred) micro = f1_score(y_test, y_pred, average="micro") macro = f1_score(y_test, y_pred, average="macro") return {"F1Mi": micro, "F1Ma": macro} ================================================ FILE: examples/pytorch/grace/main.py ================================================ import argparse import warnings import numpy as np import torch as th import torch.nn as nn from aug import aug from dataset import load from eval import label_classification from model import Grace warnings.filterwarnings("ignore") def count_parameters(model): return sum( [np.prod(p.size()) for p in model.parameters() if p.requires_grad] ) parser = argparse.ArgumentParser() parser.add_argument("--dataname", type=str, default="cora") parser.add_argument("--gpu", type=int, default=0) parser.add_argument("--split", type=str, default="random") parser.add_argument( "--epochs", type=int, default=500, help="Number of training periods." ) parser.add_argument("--lr", type=float, default=0.001, help="Learning rate.") parser.add_argument("--wd", type=float, default=1e-5, help="Weight decay.") parser.add_argument("--temp", type=float, default=1.0, help="Temperature.") parser.add_argument("--act_fn", type=str, default="relu") parser.add_argument( "--hid_dim", type=int, default=256, help="Hidden layer dim." ) parser.add_argument( "--out_dim", type=int, default=256, help="Output layer dim." ) parser.add_argument( "--num_layers", type=int, default=2, help="Number of GNN layers." ) parser.add_argument( "--der1", type=float, default=0.2, help="Drop edge ratio of the 1st augmentation.", ) parser.add_argument( "--der2", type=float, default=0.2, help="Drop edge ratio of the 2nd augmentation.", ) parser.add_argument( "--dfr1", type=float, default=0.2, help="Drop feature ratio of the 1st augmentation.", ) parser.add_argument( "--dfr2", type=float, default=0.2, help="Drop feature ratio of the 2nd augmentation.", ) args = parser.parse_args() if args.gpu != -1 and th.cuda.is_available(): args.device = "cuda:{}".format(args.gpu) else: args.device = "cpu" if __name__ == "__main__": # Step 1: Load hyperparameters =================================================================== # lr = args.lr hid_dim = args.hid_dim out_dim = args.out_dim num_layers = args.num_layers act_fn = ({"relu": nn.ReLU(), "prelu": nn.PReLU()})[args.act_fn] drop_edge_rate_1 = args.der1 drop_edge_rate_2 = args.der2 drop_feature_rate_1 = args.dfr1 drop_feature_rate_2 = args.dfr2 temp = args.temp epochs = args.epochs wd = args.wd # Step 2: Prepare data =================================================================== # graph, feat, labels, train_mask, test_mask = load(args.dataname) in_dim = feat.shape[1] # Step 3: Create model =================================================================== # model = Grace(in_dim, hid_dim, out_dim, num_layers, act_fn, temp) model = model.to(args.device) print(f"# params: {count_parameters(model)}") optimizer = th.optim.Adam(model.parameters(), lr=lr, weight_decay=wd) # Step 4: Training ======================================================================= for epoch in range(epochs): model.train() optimizer.zero_grad() graph1, feat1 = aug(graph, feat, drop_feature_rate_1, drop_edge_rate_1) graph2, feat2 = aug(graph, feat, drop_feature_rate_2, drop_edge_rate_2) graph1 = graph1.to(args.device) graph2 = graph2.to(args.device) feat1 = feat1.to(args.device) feat2 = feat2.to(args.device) loss = model(graph1, graph2, feat1, feat2) loss.backward() optimizer.step() print(f"Epoch={epoch:03d}, loss={loss.item():.4f}") # Step 5: Linear evaluation ============================================================== # print("=== Final ===") graph = graph.add_self_loop() graph = graph.to(args.device) feat = feat.to(args.device) embeds = model.get_embedding(graph, feat) """Evaluation Embeddings """ label_classification( embeds, labels, train_mask, test_mask, split=args.split ) ================================================ FILE: examples/pytorch/grace/model.py ================================================ import torch as th import torch.nn as nn import torch.nn.functional as F from dgl.nn import GraphConv # Multi-layer Graph Convolutional Networks class GCN(nn.Module): def __init__(self, in_dim, out_dim, act_fn, num_layers=2): super(GCN, self).__init__() assert num_layers >= 2 self.num_layers = num_layers self.convs = nn.ModuleList() self.convs.append(GraphConv(in_dim, out_dim * 2)) for _ in range(self.num_layers - 2): self.convs.append(GraphConv(out_dim * 2, out_dim * 2)) self.convs.append(GraphConv(out_dim * 2, out_dim)) self.act_fn = act_fn def forward(self, graph, feat): for i in range(self.num_layers): feat = self.act_fn(self.convs[i](graph, feat)) return feat # Multi-layer(2-layer) Perceptron class MLP(nn.Module): def __init__(self, in_dim, out_dim): super(MLP, self).__init__() self.fc1 = nn.Linear(in_dim, out_dim) self.fc2 = nn.Linear(out_dim, in_dim) def forward(self, x): z = F.elu(self.fc1(x)) return self.fc2(z) class Grace(nn.Module): r""" GRACE model Parameters ----------- in_dim: int Input feature size. hid_dim: int Hidden feature size. out_dim: int Output feature size. num_layers: int Number of the GNN encoder layers. act_fn: nn.Module Activation function. temp: float Temperature constant. """ def __init__(self, in_dim, hid_dim, out_dim, num_layers, act_fn, temp): super(Grace, self).__init__() self.encoder = GCN(in_dim, hid_dim, act_fn, num_layers) self.temp = temp self.proj = MLP(hid_dim, out_dim) def sim(self, z1, z2): # normalize embeddings across feature dimension z1 = F.normalize(z1) z2 = F.normalize(z2) s = th.mm(z1, z2.t()) return s def get_loss(self, z1, z2): # calculate SimCLR loss f = lambda x: th.exp(x / self.temp) refl_sim = f(self.sim(z1, z1)) # intra-view pairs between_sim = f(self.sim(z1, z2)) # inter-view pairs # between_sim.diag(): positive pairs x1 = refl_sim.sum(1) + between_sim.sum(1) - refl_sim.diag() loss = -th.log(between_sim.diag() / x1) return loss def get_embedding(self, graph, feat): # get embeddings from the model for evaluation h = self.encoder(graph, feat) return h.detach() def forward(self, graph1, graph2, feat1, feat2): # encoding h1 = self.encoder(graph1, feat1) h2 = self.encoder(graph2, feat2) # projection z1 = self.proj(h1) z2 = self.proj(h2) # get loss l1 = self.get_loss(z1, z2) l2 = self.get_loss(z2, z1) ret = (l1 + l2) * 0.5 return ret.mean() ================================================ FILE: examples/pytorch/grand/README.md ================================================ # Graph Random Neural Network(GRAND) This DGL example implements the GNN model proposed in the paper [Graph Random Neural Network for Semi-Supervised Learning on Graphs]( https://arxiv.org/abs/2005.11079). Author's code: https://github.com/THUDM/GRAND ## Example Implementor This example was implemented by [Hengrui Zhang](https://github.com/hengruizhang98) when he was an applied scientist intern at AWS Shanghai AI Lab. ## Dependencies - Python 3.7 - PyTorch 1.7.1 - dgl 0.5.3 ## Dataset The DGL's built-in Cora, Pubmed and Citeseer datasets. Dataset summary: | Dataset | #Nodes | #Edges | #Feats | #Classes | #Train Nodes | #Val Nodes | #Test Nodes | | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | | Citeseer | 3,327 | 9,228 | 3,703 | 6 | 120 | 500 | 1000 | | Cora | 2,708 | 10,556 | 1,433 | 7 | 140 | 500 | 1000 | | Pubmed | 19,717 | 88,651 | 500 | 3 | 60 | 500 | 1000 | ## Arguments ###### Dataset options ``` --dataname str The graph dataset name. Default is 'cora'. ``` ###### GPU options ``` --gpu int GPU index. Default is -1, using CPU. ``` ###### Model options ``` --epochs int Number of training epochs. Default is 2000. --early_stopping int Early stopping patience rounds. Default is 200. --lr float Adam optimizer learning rate. Default is 0.01. --weight_decay float L2 regularization coefficient. Default is 5e-4. --dropnode_rate float Dropnode rate (1 - keep probability). Default is 0.5. --input_droprate float Dropout rate of input layer. Default is 0.5. --hidden_droprate float Dropout rate of hidden layer. Default is 0.5. --hid_dim int Hidden layer dimensionalities. Default is 32. --order int Propagation step. Default is 8. --sample int Sampling times of dropnode. Default is 4. --tem float Sharpening temperaturer. Default is 0.5. --lam float Coefficient of Consistency reg Default is 1.0. --use_bn bool Using batch normalization. Default is False ``` ## Examples Train a model which follows the original hyperparameters on different datasets. ```bash # Cora: python main.py --dataname cora --gpu 0 --lam 1.0 --tem 0.5 --order 8 --sample 4 --input_droprate 0.5 --hidden_droprate 0.5 --dropnode_rate 0.5 --hid_dim 32 --early_stopping 100 --lr 1e-2 --epochs 2000 # Citeseer: python main.py --dataname citeseer --gpu 0 --lam 0.7 --tem 0.3 --order 2 --sample 2 --input_droprate 0.0 --hidden_droprate 0.2 --dropnode_rate 0.5 --hid_dim 32 --early_stopping 100 --lr 1e-2 --epochs 2000 # Pubmed: python main.py --dataname pubmed --gpu 0 --lam 1.0 --tem 0.2 --order 5 --sample 4 --input_droprate 0.6 --hidden_droprate 0.8 --dropnode_rate 0.5 --hid_dim 32 --early_stopping 200 --lr 0.2 --epochs 2000 --use_bn ``` ### Performance The hyperparameter setting in our implementation is identical to that reported in the paper. | Dataset | Cora | Citeseer | Pubmed | | :-: | :-: | :-: | :-: | | Accuracy Reported(100 runs) | **85.4(±0.4)** | **75.4(±0.4)** | 82.7(±0.6) | | Accuracy DGL(20 runs) | 85.33(±0.41) | 75.36(±0.36) | **82.90(±0.66)** | ================================================ FILE: examples/pytorch/grand/main.py ================================================ import argparse import warnings import dgl import numpy as np import torch as th import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset from model import GRAND warnings.filterwarnings("ignore") def argument(): parser = argparse.ArgumentParser(description="GRAND") # data source params parser.add_argument( "--dataname", type=str, default="cora", help="Name of dataset." ) # cuda params parser.add_argument( "--gpu", type=int, default=-1, help="GPU index. Default: -1, using CPU." ) # training params parser.add_argument( "--epochs", type=int, default=200, help="Training epochs." ) parser.add_argument( "--early_stopping", type=int, default=200, help="Patient epochs to wait before early stopping.", ) parser.add_argument("--lr", type=float, default=0.01, help="Learning rate.") parser.add_argument( "--weight_decay", type=float, default=5e-4, help="L2 reg." ) # model params parser.add_argument( "--hid_dim", type=int, default=32, help="Hidden layer dimensionalities." ) parser.add_argument( "--dropnode_rate", type=float, default=0.5, help="Dropnode rate (1 - keep probability).", ) parser.add_argument( "--input_droprate", type=float, default=0.0, help="dropout rate of input layer", ) parser.add_argument( "--hidden_droprate", type=float, default=0.0, help="dropout rate of hidden layer", ) parser.add_argument("--order", type=int, default=8, help="Propagation step") parser.add_argument( "--sample", type=int, default=4, help="Sampling times of dropnode" ) parser.add_argument( "--tem", type=float, default=0.5, help="Sharpening temperature" ) parser.add_argument( "--lam", type=float, default=1.0, help="Coefficient of consistency regularization", ) parser.add_argument( "--use_bn", action="store_true", default=False, help="Using Batch Normalization", ) args = parser.parse_args() # check cuda if args.gpu != -1 and th.cuda.is_available(): args.device = "cuda:{}".format(args.gpu) else: args.device = "cpu" return args def consis_loss(logps, temp, lam): ps = [th.exp(p) for p in logps] ps = th.stack(ps, dim=2) avg_p = th.mean(ps, dim=2) sharp_p = ( th.pow(avg_p, 1.0 / temp) / th.sum(th.pow(avg_p, 1.0 / temp), dim=1, keepdim=True) ).detach() sharp_p = sharp_p.unsqueeze(2) loss = th.mean(th.sum(th.pow(ps - sharp_p, 2), dim=1, keepdim=True)) loss = lam * loss return loss if __name__ == "__main__": # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # # Load from DGL dataset args = argument() print(args) if args.dataname == "cora": dataset = CoraGraphDataset() elif args.dataname == "citeseer": dataset = CiteseerGraphDataset() elif args.dataname == "pubmed": dataset = PubmedGraphDataset() graph = dataset[0] graph = dgl.add_self_loop(graph) device = args.device # retrieve the number of classes n_classes = dataset.num_classes # retrieve labels of ground truth labels = graph.ndata.pop("label").to(device).long() # Extract node features feats = graph.ndata.pop("feat").to(device) n_features = feats.shape[-1] # retrieve masks for train/validation/test train_mask = graph.ndata.pop("train_mask") val_mask = graph.ndata.pop("val_mask") test_mask = graph.ndata.pop("test_mask") train_idx = th.nonzero(train_mask, as_tuple=False).squeeze().to(device) val_idx = th.nonzero(val_mask, as_tuple=False).squeeze().to(device) test_idx = th.nonzero(test_mask, as_tuple=False).squeeze().to(device) # Step 2: Create model =================================================================== # model = GRAND( n_features, args.hid_dim, n_classes, args.sample, args.order, args.dropnode_rate, args.input_droprate, args.hidden_droprate, args.use_bn, ) model = model.to(args.device) graph = graph.to(args.device) # Step 3: Create training components ===================================================== # loss_fn = nn.NLLLoss() opt = optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.weight_decay ) loss_best = np.inf acc_best = 0 # Step 4: training epoches =============================================================== # for epoch in range(args.epochs): """Training""" model.train() loss_sup = 0 logits = model(graph, feats, True) # calculate supervised loss for k in range(args.sample): loss_sup += F.nll_loss(logits[k][train_idx], labels[train_idx]) loss_sup = loss_sup / args.sample # calculate consistency loss loss_consis = consis_loss(logits, args.tem, args.lam) loss_train = loss_sup + loss_consis acc_train = th.sum( logits[0][train_idx].argmax(dim=1) == labels[train_idx] ).item() / len(train_idx) # backward opt.zero_grad() loss_train.backward() opt.step() """ Validating """ model.eval() with th.no_grad(): val_logits = model(graph, feats, False) loss_val = F.nll_loss(val_logits[val_idx], labels[val_idx]) acc_val = th.sum( val_logits[val_idx].argmax(dim=1) == labels[val_idx] ).item() / len(val_idx) # Print out performance print( "In epoch {}, Train Acc: {:.4f} | Train Loss: {:.4f} ,Val Acc: {:.4f} | Val Loss: {:.4f}".format( epoch, acc_train, loss_train.item(), acc_val, loss_val.item(), ) ) # set early stopping counter if loss_val < loss_best or acc_val > acc_best: if loss_val < loss_best: best_epoch = epoch th.save(model.state_dict(), args.dataname + ".pkl") no_improvement = 0 loss_best = min(loss_val, loss_best) acc_best = max(acc_val, acc_best) else: no_improvement += 1 if no_improvement == args.early_stopping: print("Early stopping.") break print("Optimization Finished!") print("Loading {}th epoch".format(best_epoch)) model.load_state_dict(th.load(args.dataname + ".pkl")) """ Testing """ model.eval() test_logits = model(graph, feats, False) test_acc = th.sum( test_logits[test_idx].argmax(dim=1) == labels[test_idx] ).item() / len(test_idx) print("Test Acc: {:.4f}".format(test_acc)) ================================================ FILE: examples/pytorch/grand/model.py ================================================ import dgl.function as fn import numpy as np import torch as th import torch.nn as nn import torch.nn.functional as F def drop_node(feats, drop_rate, training): n = feats.shape[0] drop_rates = th.FloatTensor(np.ones(n) * drop_rate) if training: masks = th.bernoulli(1.0 - drop_rates).unsqueeze(1) feats = masks.to(feats.device) * feats else: feats = feats * (1.0 - drop_rate) return feats class MLP(nn.Module): def __init__( self, nfeat, nhid, nclass, input_droprate, hidden_droprate, use_bn=False ): super(MLP, self).__init__() self.layer1 = nn.Linear(nfeat, nhid, bias=True) self.layer2 = nn.Linear(nhid, nclass, bias=True) self.input_dropout = nn.Dropout(input_droprate) self.hidden_dropout = nn.Dropout(hidden_droprate) self.bn1 = nn.BatchNorm1d(nfeat) self.bn2 = nn.BatchNorm1d(nhid) self.use_bn = use_bn def reset_parameters(self): self.layer1.reset_parameters() self.layer2.reset_parameters() def forward(self, x): if self.use_bn: x = self.bn1(x) x = self.input_dropout(x) x = F.relu(self.layer1(x)) if self.use_bn: x = self.bn2(x) x = self.hidden_dropout(x) x = self.layer2(x) return x def GRANDConv(graph, feats, order): """ Parameters ----------- graph: dgl.Graph The input graph feats: Tensor (n_nodes * feat_dim) Node features order: int Propagation Steps """ with graph.local_scope(): """Calculate Symmetric normalized adjacency matrix \hat{A}""" degs = graph.in_degrees().float().clamp(min=1) norm = th.pow(degs, -0.5).to(feats.device).unsqueeze(1) graph.ndata["norm"] = norm graph.apply_edges(fn.u_mul_v("norm", "norm", "weight")) """ Graph Conv """ x = feats y = 0 + feats for i in range(order): graph.ndata["h"] = x graph.update_all(fn.u_mul_e("h", "weight", "m"), fn.sum("m", "h")) x = graph.ndata.pop("h") y.add_(x) return y / (order + 1) class GRAND(nn.Module): r""" Parameters ----------- in_dim: int Input feature size. i.e, the number of dimensions of: math: `H^{(i)}`. hid_dim: int Hidden feature size. n_class: int Number of classes. S: int Number of Augmentation samples K: int Number of Propagation Steps node_dropout: float Dropout rate on node features. input_dropout: float Dropout rate of the input layer of a MLP hidden_dropout: float Dropout rate of the hidden layer of a MLPx batchnorm: bool, optional If True, use batch normalization. """ def __init__( self, in_dim, hid_dim, n_class, S=1, K=3, node_dropout=0.0, input_droprate=0.0, hidden_droprate=0.0, batchnorm=False, ): super(GRAND, self).__init__() self.in_dim = in_dim self.hid_dim = hid_dim self.S = S self.K = K self.n_class = n_class self.mlp = MLP( in_dim, hid_dim, n_class, input_droprate, hidden_droprate, batchnorm ) self.dropout = node_dropout self.node_dropout = nn.Dropout(node_dropout) def forward(self, graph, feats, training=True): X = feats S = self.S if training: # Training Mode output_list = [] for s in range(S): drop_feat = drop_node(X, self.dropout, True) # Drop node feat = GRANDConv(graph, drop_feat, self.K) # Graph Convolution output_list.append( th.log_softmax(self.mlp(feat), dim=-1) ) # Prediction return output_list else: # Inference Mode drop_feat = drop_node(X, self.dropout, False) X = GRANDConv(graph, drop_feat, self.K) return th.log_softmax(self.mlp(X), dim=-1) ================================================ FILE: examples/pytorch/graph_matching/README.md ================================================ # Graph Matching Routines Implementation of various algorithms to compute the Graph Edit Distance (GED) between two DGLGraphs G1 and G2. The graph edit distance between two graphs is a generalization of the string edit distance between strings. The following four algorithms are implemented: - astar: Calculates exact GED using A* graph traversal algorithm, the heuristic used is the one proposed in (Riesen and Bunke, 2009) [1]. - beam: Calculates approximate GED using A* graph traversal algorithm, with a threshold on the size of the open list. [2] - bipartite: Calculates approximate GED using linear assignment on the nodes, with Jonker-Volgerand (JV) algorithm. [3] - hausdorff: Approximation of graph edit distance based on Hausdorff matching [4]. ### Dependencies - lapjv (https://github.com/src-d/lapjv): We use the lapjv implementation to solve assignment problem, because of its scalability. Another option is to use the hungarian algorithm provided by scipy (scipy.optimize.linear_sum_assignment). ### Usage Examples of usage are provided in examples.py. The function signature and an example is also given below: ```sh graph_edit_distance(G1, G2, node_substitution_cost=None, edge_substitution_cost=None, G1_node_deletion_cost=None, G2_node_insertion_cost=None, G1_edge_deletion_cost=None, G2_edge_insertion_cost=None, algorithm='bipartite', max_beam_size=100) """ Parameters ---------- G1, G2: DGLGraphs node_substitution_cost, edge_substitution_cost : 2D numpy arrays node_substitution_cost[i,j] is the cost of substitution node i of G1 with node j of G2, similar definition for edge_substitution_cost. If None, default cost of 0 is used. G1_node_deletion_cost, G1_edge_deletion_cost : 1D numpy arrays G1_node_deletion_cost[i] is the cost of deletion of node i of G1, similar definition for G1_edge_deletion_cost. If None, default cost of 1 is used. G2_node_insertion_cost, G2_edge_insertion_cost : 1D numpy arrays G2_node_insertion_cost[i] is the cost of insertion of node i of G2, similar definition for G2_edge_insertion_cost. If None, default cost of 1 is used. algorithm : string Algorithm to use to calculate the edit distance. Can be either 'astar', 'beam', 'bipartite' or 'hausdorff'. max_beam_size : int Maximum number of nodes in the open list, in case the algorithm is 'beam'. Returns ------- A tuple of three objects: (edit_distance, node_mapping, edge_mapping) edit distance is the calculated edit distance (float). node_mapping is a tuple of size two, containing the node assignments of the two graphs respectively. eg., node_mapping[0][i] is the node mapping of node i of graph G1 (None means that the node is deleted). Similar definition for the edge_mapping. For 'hausdorff', node_mapping and edge_mapping are returned as None, as this approximation does not return a unique edit path. Examples -------- >>> src1 = [0, 1, 2, 3, 4, 5]; >>> dst1 = [1, 2, 3, 4, 5, 6]; >>> src2 = [0, 1, 3, 4, 5]; >>> dst2 = [1, 2, 4, 5, 6]; >>> G1 = dgl.DGLGraph((src1, dst1)) >>> G2 = dgl.DGLGraph((src2, dst2)) >>> distance, node_mapping, edge_mapping = graph_edit_distance(G1, G1, algorithm='astar') >>> print(distance) 0.0 >>> distance, node_mapping, edge_mapping = graph_edit_distance(G1, G2, algorithm='astar') >>> print(distance) 1.0 ``` ### References [1] Riesen, Kaspar, Stefan Fankhauser, and Horst Bunke. "Speeding Up Graph Edit Distance Computation with a Bipartite Heuristic." MLG. 2007. [2] Neuhaus, Michel, Kaspar Riesen, and Horst Bunke. "Fast suboptimal algorithms for the computation of graph edit distance." Joint IAPR International Workshops on Statistical Techniques in Pattern Recognition (SPR) and Structural and Syntactic Pattern Recognition (SSPR). 2006. [3] Fankhauser, Stefan, Kaspar Riesen, and Horst Bunke. "Speeding up graph edit distance computation through fast bipartite matching." International Workshop on Graph-Based Representations in Pattern Recognition. 2011. [4] Fischer, Andreas, et al. "A hausdorff heuristic for efficient computation of graph edit distance." Joint IAPR International Workshops on Statistical Techniques in Pattern Recognition (SPR) and Structural and Syntactic Pattern Recognition (SSPR). 2014. ================================================ FILE: examples/pytorch/graph_matching/examples.py ================================================ import dgl import numpy as np from ged import graph_edit_distance src1 = [0, 1, 2, 3, 4, 5] dst1 = [1, 2, 3, 4, 5, 6] src2 = [0, 1, 3, 4, 5] dst2 = [1, 2, 4, 5, 6] G1 = dgl.DGLGraph((src1, dst1)) G2 = dgl.DGLGraph((src2, dst2)) # Exact edit distance with astar search distance, node_mapping, edge_mapping = graph_edit_distance( G1, G1, algorithm="astar" ) print(distance) # 0.0 distance, node_mapping, edge_mapping = graph_edit_distance( G1, G2, algorithm="astar" ) print(distance) # 1.0 # With user-input cost matrices node_substitution_cost = np.empty((G1.num_nodes(), G2.num_nodes())) G1_node_deletion_cost = np.empty(G1.num_nodes()) G2_node_insertion_cost = np.empty(G2.num_nodes()) edge_substitution_cost = np.empty((G1.num_edges(), G2.num_edges())) G1_edge_deletion_cost = np.empty(G1.num_edges()) G2_edge_insertion_cost = np.empty(G2.num_edges()) # Node substitution cost of 0 when node-ids are same, else 1 node_substitution_cost.fill(1.0) for i in range(G1.num_nodes()): for j in range(G2.num_nodes()): node_substitution_cost[i, j] = 0.0 # Node insertion/deletion cost of 1 G1_node_deletion_cost.fill(1.0) G2_node_insertion_cost.fill(1.0) # Edge substitution cost of 0 edge_substitution_cost.fill(0.0) # Edge insertion/deletion cost of 0.5 G1_edge_deletion_cost.fill(0.5) G2_edge_insertion_cost.fill(0.5) distance, node_mapping, edge_mapping = graph_edit_distance( G1, G2, node_substitution_cost, edge_substitution_cost, G1_node_deletion_cost, G2_node_insertion_cost, G1_edge_deletion_cost, G2_edge_insertion_cost, algorithm="astar", ) print(distance) # 0.5 # Approximate edit distance with beam search, it is more than or equal to the exact edit distance distance, node_mapping, edge_mapping = graph_edit_distance( G1, G2, algorithm="beam", max_beam_size=2 ) print(distance) # 3.0 # Approximate edit distance with bipartite heuristic, it is more than or equal to the exact edit distance distance, node_mapping, edge_mapping = graph_edit_distance( G1, G2, algorithm="bipartite" ) print( distance ) # 9.0, can be different as multiple solutions possible for the intermediate LAP used in this approximation # Approximate edit distance with hausdorff heuristic, it is less than or equal to the exact edit distance distance, node_mapping, edge_mapping = graph_edit_distance( G1, G2, algorithm="hausdorff" ) print(distance) # 0.0 ================================================ FILE: examples/pytorch/graph_matching/ged.py ================================================ from copy import deepcopy from heapq import heapify, heappop, heappush, nsmallest import dgl import numpy as np # We use lapjv implementation (https://github.com/src-d/lapjv) to solve assignment problem, because of its scalability # Also see https://github.com/berhane/LAP-solvers for benchmarking of LAP solvers from lapjv import lapjv EPSILON = 0.0000001 def validate_cost_functions( G1, G2, node_substitution_cost=None, edge_substitution_cost=None, G1_node_deletion_cost=None, G1_edge_deletion_cost=None, G2_node_insertion_cost=None, G2_edge_insertion_cost=None, ): """Validates cost functions (substitution, insertion, deletion) and initializes them with default=0 for substitution and default=1 for insertion/deletion if the provided ones are None. Parameters : see graph_edit_distance """ num_G1_nodes = G1.num_nodes() num_G2_nodes = G2.num_nodes() num_G1_edges = G1.num_edges() num_G2_edges = G2.num_edges() # if any cost matrix is None, initialize it with default costs if node_substitution_cost is None: node_substitution_cost = np.zeros( (num_G1_nodes, num_G2_nodes), dtype=float ) else: assert node_substitution_cost.shape == (num_G1_nodes, num_G2_nodes) if edge_substitution_cost is None: edge_substitution_cost = np.zeros( (num_G1_edges, num_G2_edges), dtype=float ) else: assert edge_substitution_cost.shape == (num_G1_edges, num_G2_edges) if G1_node_deletion_cost is None: G1_node_deletion_cost = np.ones(num_G1_nodes, dtype=float) else: assert G1_node_deletion_cost.shape[0] == num_G1_nodes if G1_edge_deletion_cost is None: G1_edge_deletion_cost = np.ones(num_G1_edges, dtype=float) else: assert G1_edge_deletion_cost.shape[0] == num_G1_edges if G2_node_insertion_cost is None: G2_node_insertion_cost = np.ones(num_G2_nodes, dtype=float) else: assert G2_node_insertion_cost.shape[0] == num_G2_nodes if G2_edge_insertion_cost is None: G2_edge_insertion_cost = np.ones(num_G2_edges, dtype=float) else: assert G2_edge_insertion_cost.shape[0] == num_G2_edges return ( node_substitution_cost, edge_substitution_cost, G1_node_deletion_cost, G1_edge_deletion_cost, G2_node_insertion_cost, G2_edge_insertion_cost, ) def construct_cost_functions( G1, G2, node_substitution_cost, edge_substitution_cost, G1_node_deletion_cost, G1_edge_deletion_cost, G2_node_insertion_cost, G2_edge_insertion_cost, ): """Constructs cost matrices for LAP solution Parameters : see graph_edit_distance """ num_G1_nodes = G1.num_nodes() num_G2_nodes = G2.num_nodes() num_G1_edges = G1.num_edges() num_G2_edges = G2.num_edges() # cost matrix of node mappings cost_upper_bound = ( node_substitution_cost.sum() + G1_node_deletion_cost.sum() + G2_node_insertion_cost.sum() + 1 ) C_node = np.zeros( (num_G1_nodes + num_G2_nodes, num_G1_nodes + num_G2_nodes), dtype=float ) C_node[0:num_G1_nodes, 0:num_G2_nodes] = node_substitution_cost C_node[ 0:num_G1_nodes, num_G2_nodes : num_G2_nodes + num_G1_nodes ] = np.array( [ G1_node_deletion_cost[i] if i == j else cost_upper_bound for i in range(num_G1_nodes) for j in range(num_G1_nodes) ] ).reshape( num_G1_nodes, num_G1_nodes ) C_node[ num_G1_nodes : num_G1_nodes + num_G2_nodes, 0:num_G2_nodes ] = np.array( [ G2_node_insertion_cost[i] if i == j else cost_upper_bound for i in range(num_G2_nodes) for j in range(num_G2_nodes) ] ).reshape( num_G2_nodes, num_G2_nodes ) # cost matrix of edge mappings cost_upper_bound = ( edge_substitution_cost.sum() + G1_edge_deletion_cost.sum() + G2_edge_insertion_cost.sum() + 1 ) C_edge = np.zeros( (num_G1_edges + num_G2_edges, num_G1_edges + num_G2_edges), dtype=float ) C_edge[0:num_G1_edges, 0:num_G2_edges] = edge_substitution_cost C_edge[ 0:num_G1_edges, num_G2_edges : num_G2_edges + num_G1_edges ] = np.array( [ G1_edge_deletion_cost[i] if i == j else cost_upper_bound for i in range(num_G1_edges) for j in range(num_G1_edges) ] ).reshape( num_G1_edges, num_G1_edges ) C_edge[ num_G1_edges : num_G1_edges + num_G2_edges, 0:num_G2_edges ] = np.array( [ G2_edge_insertion_cost[i] if i == j else cost_upper_bound for i in range(num_G2_edges) for j in range(num_G2_edges) ] ).reshape( num_G2_edges, num_G2_edges ) return C_node, C_edge def get_edges_to_match(G, node_id, matched_nodes): # Find the edges in G with one end-point as node_id and other in matched_nodes or node_id incident_edges = np.array([], dtype=int) index = np.array([], dtype=int) direction = np.array([], dtype=int) if G.has_edge_between(node_id, node_id): self_edge_ids = G.edge_ids(node_id, node_id, return_array=True).numpy() incident_edges = np.concatenate((incident_edges, self_edge_ids)) index = np.concatenate((index, [-1] * len(self_edge_ids))) direction = np.concatenate((direction, [0] * len(self_edge_ids))) # Find predecessors src, _, eid = G.in_edges([node_id], "all") eid = eid.numpy() src = src.numpy() filtered_indices = [ (i, matched_nodes.index(src[i])) for i in range(len(src)) if src[i] in matched_nodes ] matched_index = np.array([_[1] for _ in filtered_indices], dtype=int) eid_index = np.array([_[0] for _ in filtered_indices], dtype=int) index = np.concatenate((index, matched_index)) incident_edges = np.concatenate((incident_edges, eid[eid_index])) direction = np.concatenate( (direction, np.array([-1] * len(filtered_indices), dtype=int)) ) # Find successors _, dst, eid = G.out_edges([node_id], "all") eid = eid.numpy() dst = dst.numpy() filtered_indices = [ (i, matched_nodes.index(dst[i])) for i in range(len(dst)) if dst[i] in matched_nodes ] matched_index = np.array([_[1] for _ in filtered_indices], dtype=int) eid_index = np.array([_[0] for _ in filtered_indices], dtype=int) index = np.concatenate((index, matched_index)) incident_edges = np.concatenate((incident_edges, eid[eid_index])) direction = np.concatenate( (direction, np.array([1] * len(filtered_indices), dtype=int)) ) return incident_edges, index, direction def subset_cost_matrix(cost_matrix, row_ids, col_ids, num_rows, num_cols): # Extract thr subset of cost matrix corresponding to rows/cols in arrays row_ids/col_ids # Note that the shape of cost_matrix is (num_rows+num_cols) * (num_rows+num_cols) extended_row_ids = np.concatenate( (row_ids, np.array([k + num_rows for k in col_ids])) ) extended_col_ids = np.concatenate( (col_ids, np.array([k + num_cols for k in row_ids])) ) return cost_matrix[extended_row_ids, :][:, extended_col_ids] class search_tree_node: def __init__( self, G1, G2, parent_matched_cost, parent_matched_nodes, parent_matched_edges, node_G1, node_G2, parent_unprocessed_nodes_G1, parent_unprocessed_nodes_G2, parent_unprocessed_edges_G1, parent_unprocessed_edges_G2, cost_matrix_nodes, cost_matrix_edges, ): self.matched_cost = parent_matched_cost self.future_approximate_cost = 0.0 self.matched_nodes = deepcopy(parent_matched_nodes) self.matched_nodes[0].append(node_G1) self.matched_nodes[1].append(node_G2) self.matched_edges = deepcopy(parent_matched_edges) self.unprocessed_nodes_G1 = [ _ for _ in parent_unprocessed_nodes_G1 if _ != node_G1 ] self.unprocessed_nodes_G2 = [ _ for _ in parent_unprocessed_nodes_G2 if _ != node_G2 ] # Add the cost of matching nodes at this tree-node to the matched cost if ( node_G1 is not None and node_G2 is not None ): # Substitute node_G1 with node_G2 self.matched_cost += cost_matrix_nodes[node_G1, node_G2] elif node_G1 is not None: # Delete node_G1 self.matched_cost += cost_matrix_nodes[ node_G1, node_G1 + G2.num_nodes() ] elif node_G2 is not None: # Insert node_G2 self.matched_cost += cost_matrix_nodes[ node_G2 + G1.num_nodes(), node_G2 ] # Add the cost of matching edges at this tree-node to the matched cost incident_edges_G1 = [] if ( node_G1 is not None ): # Find the edges with one end-point as node_G1 and other in matched nodes or node_G1 incident_edges_G1, index_G1, direction_G1 = get_edges_to_match( G1, node_G1, parent_matched_nodes[0] ) incident_edges_G2 = np.array([]) if ( node_G2 is not None ): # Find the edges with one end-point as node_G2 and other in matched nodes or node_G2 incident_edges_G2, index_G2, direction_G2 = get_edges_to_match( G2, node_G2, parent_matched_nodes[1] ) if ( len(incident_edges_G1) > 0 and len(incident_edges_G2) > 0 ): # Consider substituting matched_edges_cost_matrix = subset_cost_matrix( cost_matrix_edges, incident_edges_G1, incident_edges_G2, G1.num_edges(), G2.num_edges(), ) max_sum = matched_edges_cost_matrix.sum() # take care of impossible assignments by assigning maximum cost for i in range(len(incident_edges_G1)): for j in range(len(incident_edges_G2)): # both edges need to have same direction and the other end nodes are matched if ( direction_G1[i] == direction_G2[j] and index_G1[i] == index_G2[j] ): continue else: matched_edges_cost_matrix[i, j] = max_sum # Match the edges as per the LAP solution row_ind, col_ind, _ = lapjv(matched_edges_cost_matrix) lap_cost = 0.00 for i in range(len(row_ind)): lap_cost += matched_edges_cost_matrix[i, row_ind[i]] # Update matched edges for i in range(len(row_ind)): if i < len(incident_edges_G1): self.matched_edges[0].append(incident_edges_G1[i]) if row_ind[i] < len(incident_edges_G2): self.matched_edges[1].append( incident_edges_G2[row_ind[i]] ) else: self.matched_edges[1].append(None) elif row_ind[i] < len(incident_edges_G2): self.matched_edges[0].append(None) self.matched_edges[1].append(incident_edges_G2[row_ind[i]]) self.matched_cost += lap_cost elif len(incident_edges_G1) > 0: # only deletion possible edge_deletion_cost = 0.0 for edge in incident_edges_G1: edge_deletion_cost += cost_matrix_edges[ edge, G2.num_edges() + edge ] # Update matched edges for edge in incident_edges_G1: self.matched_edges[0].append(edge) self.matched_edges[1].append(None) # Update matched edges self.matched_cost += edge_deletion_cost elif len(incident_edges_G2) > 0: # only insertion possible edge_insertion_cost = 0.0 for edge in incident_edges_G2: edge_insertion_cost += cost_matrix_edges[ G1.num_edges() + edge, edge ] # Update matched edges for edge in incident_edges_G2: self.matched_edges[0].append(None) self.matched_edges[1].append(edge) self.matched_cost += edge_insertion_cost # Add the cost of matching of unprocessed nodes to the future approximate cost if ( len(self.unprocessed_nodes_G1) > 0 and len(self.unprocessed_nodes_G2) > 0 ): # Consider substituting unmatched_nodes_cost_matrix = subset_cost_matrix( cost_matrix_nodes, self.unprocessed_nodes_G1, self.unprocessed_nodes_G2, G1.num_nodes(), G2.num_nodes(), ) # Match the edges as per the LAP solution row_ind, col_ind, _ = lapjv(unmatched_nodes_cost_matrix) lap_cost = 0.00 for i in range(len(row_ind)): lap_cost += unmatched_nodes_cost_matrix[i, row_ind[i]] self.future_approximate_cost += lap_cost elif len(self.unprocessed_nodes_G1) > 0: # only deletion possible node_deletion_cost = 0.0 for node in self.unprocessed_nodes_G1: node_deletion_cost += cost_matrix_nodes[ node, G2.num_nodes() + node ] self.future_approximate_cost += node_deletion_cost elif len(self.unprocessed_nodes_G2) > 0: # only insertion possible node_insertion_cost = 0.0 for node in self.unprocessed_nodes_G2: node_insertion_cost += cost_matrix_nodes[ G1.num_nodes() + node, node ] self.future_approximate_cost += node_insertion_cost # Add the cost of LAP matching of unprocessed edges to the future approximate cost self.unprocessed_edges_G1 = [ _ for _ in parent_unprocessed_edges_G1 if _ not in incident_edges_G1 ] self.unprocessed_edges_G2 = [ _ for _ in parent_unprocessed_edges_G2 if _ not in incident_edges_G2 ] if ( len(self.unprocessed_edges_G1) > 0 and len(self.unprocessed_edges_G2) > 0 ): # Consider substituting unmatched_edges_cost_matrix = subset_cost_matrix( cost_matrix_edges, self.unprocessed_edges_G1, self.unprocessed_edges_G2, G1.num_edges(), G2.num_edges(), ) # Match the edges as per the LAP solution row_ind, col_ind, _ = lapjv(unmatched_edges_cost_matrix) lap_cost = 0.00 for i in range(len(row_ind)): lap_cost += unmatched_edges_cost_matrix[i, row_ind[i]] self.future_approximate_cost += lap_cost elif len(self.unprocessed_edges_G1) > 0: # only deletion possible edge_deletion_cost = 0.0 for edge in self.unprocessed_edges_G1: edge_deletion_cost += cost_matrix_edges[ edge, G2.num_edges() + edge ] self.future_approximate_cost += edge_deletion_cost elif len(self.unprocessed_edges_G2) > 0: # only insertion possible edge_insertion_cost = 0.0 for edge in self.unprocessed_edges_G2: edge_insertion_cost += cost_matrix_edges[ G1.num_edges() + edge, edge ] self.future_approximate_cost += edge_insertion_cost # For heap insertion order def __lt__(self, other): if ( abs( (self.matched_cost + self.future_approximate_cost) - (other.matched_cost + other.future_approximate_cost) ) > EPSILON ): return (self.matched_cost + self.future_approximate_cost) < ( other.matched_cost + other.future_approximate_cost ) elif abs(self.matched_cost - other.matched_cost) > EPSILON: return other.matched_cost < self.matched_cost # matched cost is closer to reality else: return ( len(self.unprocessed_nodes_G1) + len(self.unprocessed_nodes_G2) + len(self.unprocessed_edges_G1) + len(self.unprocessed_edges_G2) ) < ( len(other.unprocessed_nodes_G1) + len(other.unprocessed_nodes_G2) + len(other.unprocessed_edges_G1) + len(other.unprocessed_edges_G2) ) def edit_cost_from_node_matching( G1, G2, cost_matrix_nodes, cost_matrix_edges, node_matching ): matched_cost = 0.0 matched_nodes = ([], []) matched_edges = ([], []) # Add the cost of matching nodes for i in range(G1.num_nodes()): matched_cost += cost_matrix_nodes[i, node_matching[i]] matched_nodes[0].append(i) if node_matching[i] < G2.num_nodes(): matched_nodes[1].append(node_matching[i]) else: matched_nodes[1].append(None) for i in range(G1.num_nodes(), len(node_matching)): matched_cost += cost_matrix_nodes[i, node_matching[i]] if node_matching[i] < G2.num_nodes(): matched_nodes[0].append(None) matched_nodes[1].append(node_matching[i]) for i in range(len(matched_nodes[0])): # Add the cost of matching edges incident_edges_G1 = [] if ( matched_nodes[0][i] is not None ): # Find the edges with one end-point as node_G1 and other in matched nodes or node_G1 incident_edges_G1, index_G1, direction_G1 = get_edges_to_match( G1, matched_nodes[0][i], matched_nodes[0][:i] ) incident_edges_G2 = np.array([]) if ( matched_nodes[1][i] is not None ): # Find the edges with one end-point as node_G2 and other in matched nodes or node_G2 incident_edges_G2, index_G2, direction_G2 = get_edges_to_match( G2, matched_nodes[1][i], matched_nodes[1][:i] ) if ( len(incident_edges_G1) > 0 and len(incident_edges_G2) > 0 ): # Consider substituting matched_edges_cost_matrix = subset_cost_matrix( cost_matrix_edges, incident_edges_G1, incident_edges_G2, G1.num_edges(), G2.num_edges(), ) max_sum = matched_edges_cost_matrix.sum() # take care of impossible assignments by assigning maximum cost for i in range(len(incident_edges_G1)): for j in range(len(incident_edges_G2)): # both edges need to have same direction and the other end nodes are matched if ( direction_G1[i] == direction_G2[j] and index_G1[i] == index_G2[j] ): continue else: matched_edges_cost_matrix[i, j] = max_sum # Match the edges as per the LAP solution row_ind, col_ind, _ = lapjv(matched_edges_cost_matrix) lap_cost = 0.00 for i in range(len(row_ind)): lap_cost += matched_edges_cost_matrix[i, row_ind[i]] # Update matched edges for i in range(len(row_ind)): if i < len(incident_edges_G1): matched_edges[0].append(incident_edges_G1[i]) if row_ind[i] < len(incident_edges_G2): matched_edges[1].append(incident_edges_G2[row_ind[i]]) else: matched_edges[1].append(None) elif row_ind[i] < len(incident_edges_G2): matched_edges[0].append(None) matched_edges[1].append(incident_edges_G2[row_ind[i]]) matched_cost += lap_cost elif len(incident_edges_G1) > 0: # only deletion possible edge_deletion_cost = 0.0 for edge in incident_edges_G1: edge_deletion_cost += cost_matrix_edges[ edge, G2.num_edges() + edge ] # Update matched edges for edge in incident_edges_G1: matched_edges[0].append(edge) matched_edges[1].append(None) # Update matched edges matched_cost += edge_deletion_cost elif len(incident_edges_G2) > 0: # only insertion possible edge_insertion_cost = 0.0 for edge in incident_edges_G2: edge_insertion_cost += cost_matrix_edges[ G1.num_edges() + edge, edge ] # Update matched edges for edge in incident_edges_G2: matched_edges[0].append(None) matched_edges[1].append(edge) matched_cost += edge_insertion_cost return (matched_cost, matched_nodes, matched_edges) def contextual_cost_matrix_construction( G1, G2, node_substitution_cost, edge_substitution_cost, G1_node_deletion_cost, G1_edge_deletion_cost, G2_node_insertion_cost, G2_edge_insertion_cost, ): # Calculates approximate GED using linear assignment on the nodes with bipartite algorithm # cost matrix of node mappings num_G1_nodes = G1.num_nodes() num_G2_nodes = G2.num_nodes() num_G1_edges = G1.num_edges() num_G2_edges = G2.num_edges() cost_upper_bound = 2 * ( node_substitution_cost.sum() + G1_node_deletion_cost.sum() + G2_node_insertion_cost.sum() + 1 ) cost_matrix = np.zeros( (num_G1_nodes + num_G2_nodes, num_G1_nodes + num_G2_nodes), dtype=float ) cost_matrix[0:num_G1_nodes, 0:num_G2_nodes] = node_substitution_cost cost_matrix[ 0:num_G1_nodes, num_G2_nodes : num_G2_nodes + num_G1_nodes ] = np.array( [ G1_node_deletion_cost[i] if i == j else cost_upper_bound for i in range(num_G1_nodes) for j in range(num_G1_nodes) ] ).reshape( num_G1_nodes, num_G1_nodes ) cost_matrix[ num_G1_nodes : num_G1_nodes + num_G2_nodes, 0:num_G2_nodes ] = np.array( [ G2_node_insertion_cost[i] if i == j else cost_upper_bound for i in range(num_G2_nodes) for j in range(num_G2_nodes) ] ).reshape( num_G2_nodes, num_G2_nodes ) self_edge_list_G1 = [np.array([], dtype=int)] * num_G1_nodes self_edge_list_G2 = [np.array([], dtype=int)] * num_G2_nodes incoming_edges_G1 = [np.array([], dtype=int)] * num_G1_nodes incoming_edges_G2 = [np.array([], dtype=int)] * num_G2_nodes outgoing_edges_G1 = [np.array([], dtype=int)] * num_G1_nodes outgoing_edges_G2 = [np.array([], dtype=int)] * num_G2_nodes for i in range(num_G1_nodes): if G1.has_edge_between(i, i): self_edge_list_G1[i] = sorted( G1.edge_ids(i, i, return_array=True).numpy() ) incoming_edges_G1[i] = G1.in_edges([i], "eid").numpy() incoming_edges_G1[i] = np.setdiff1d( incoming_edges_G1[i], self_edge_list_G1[i] ) outgoing_edges_G1[i] = G1.out_edges([i], "eid").numpy() outgoing_edges_G1[i] = np.setdiff1d( outgoing_edges_G1[i], self_edge_list_G1[i] ) for i in range(num_G2_nodes): if G2.has_edge_between(i, i): self_edge_list_G2[i] = sorted( G2.edge_ids(i, i, return_array=True).numpy() ) incoming_edges_G2[i] = G2.in_edges([i], "eid").numpy() incoming_edges_G2[i] = np.setdiff1d( incoming_edges_G2[i], self_edge_list_G2[i] ) outgoing_edges_G2[i] = G2.out_edges([i], "eid").numpy() outgoing_edges_G2[i] = np.setdiff1d( outgoing_edges_G2[i], self_edge_list_G2[i] ) selected_deletion_G1 = [ G1_edge_deletion_cost[ np.concatenate( ( self_edge_list_G1[i], incoming_edges_G1[i], outgoing_edges_G1[i], ) ) ] for i in range(G1.num_nodes()) ] selected_insertion_G2 = [ G2_edge_insertion_cost[ np.concatenate( ( self_edge_list_G2[i], incoming_edges_G2[i], outgoing_edges_G2[i], ) ) ] for i in range(G2.num_nodes()) ] # Add the cost of edge edition which are dependent of a node (see this as the cost associated with a substructure) for i in range(num_G1_nodes): for j in range(num_G2_nodes): m = ( len(self_edge_list_G1[i]) + len(incoming_edges_G1[i]) + len(outgoing_edges_G1[i]) ) n = ( len(self_edge_list_G2[j]) + len(incoming_edges_G2[j]) + len(outgoing_edges_G2[j]) ) matrix_dim = m + n if matrix_dim == 0: continue temp_edge_cost_matrix = np.empty((matrix_dim, matrix_dim)) temp_edge_cost_matrix.fill(cost_upper_bound) temp_edge_cost_matrix[ : len(self_edge_list_G1[i]), : len(self_edge_list_G2[j]) ] = edge_substitution_cost[self_edge_list_G1[i], :][ :, self_edge_list_G2[j] ] temp_edge_cost_matrix[ len(self_edge_list_G1[i]) : len(self_edge_list_G1[i]) + len(incoming_edges_G1[i]), len(self_edge_list_G2[j]) : len(self_edge_list_G2[j]) + len(incoming_edges_G2[j]), ] = edge_substitution_cost[incoming_edges_G1[i], :][ :, incoming_edges_G2[j] ] temp_edge_cost_matrix[ len(self_edge_list_G1[i]) + len(incoming_edges_G1[i]) : m, len(self_edge_list_G2[j]) + len(incoming_edges_G2[j]) : n, ] = edge_substitution_cost[outgoing_edges_G1[i], :][ :, outgoing_edges_G2[j] ] np.fill_diagonal( temp_edge_cost_matrix[:m, n:], selected_deletion_G1[i] ) np.fill_diagonal( temp_edge_cost_matrix[m:, :n], selected_insertion_G2[j] ) temp_edge_cost_matrix[m:, n:].fill(0) row_ind, col_ind, _ = lapjv(temp_edge_cost_matrix) lap_cost = 0.00 for k in range(len(row_ind)): lap_cost += temp_edge_cost_matrix[k, row_ind[k]] cost_matrix[i, j] += lap_cost for i in range(num_G1_nodes): cost_matrix[i, num_G2_nodes + i] += selected_deletion_G1[i].sum() for i in range(num_G2_nodes): cost_matrix[num_G1_nodes + i, i] += selected_insertion_G2[i].sum() return cost_matrix def hausdorff_matching( G1, G2, node_substitution_cost, edge_substitution_cost, G1_node_deletion_cost, G1_edge_deletion_cost, G2_node_insertion_cost, G2_edge_insertion_cost, ): # Calculates approximate GED using hausdorff_matching # cost matrix of node mappings num_G1_nodes = G1.num_nodes() num_G2_nodes = G2.num_nodes() num_G1_edges = G1.num_edges() num_G2_edges = G2.num_edges() self_edge_list_G1 = [np.array([], dtype=int)] * num_G1_nodes self_edge_list_G2 = [np.array([], dtype=int)] * num_G2_nodes incoming_edges_G1 = [np.array([], dtype=int)] * num_G1_nodes incoming_edges_G2 = [np.array([], dtype=int)] * num_G2_nodes outgoing_edges_G1 = [np.array([], dtype=int)] * num_G1_nodes outgoing_edges_G2 = [np.array([], dtype=int)] * num_G2_nodes for i in range(num_G1_nodes): if G1.has_edge_between(i, i): self_edge_list_G1[i] = sorted( G1.edge_ids(i, i, return_array=True).numpy() ) incoming_edges_G1[i] = G1.in_edges([i], "eid").numpy() incoming_edges_G1[i] = np.setdiff1d( incoming_edges_G1[i], self_edge_list_G1[i] ) outgoing_edges_G1[i] = G1.out_edges([i], "eid").numpy() outgoing_edges_G1[i] = np.setdiff1d( outgoing_edges_G1[i], self_edge_list_G1[i] ) for i in range(num_G2_nodes): if G2.has_edge_between(i, i): self_edge_list_G2[i] = sorted( G2.edge_ids(i, i, return_array=True).numpy() ) incoming_edges_G2[i] = G2.in_edges([i], "eid").numpy() incoming_edges_G2[i] = np.setdiff1d( incoming_edges_G2[i], self_edge_list_G2[i] ) outgoing_edges_G2[i] = G2.out_edges([i], "eid").numpy() outgoing_edges_G2[i] = np.setdiff1d( outgoing_edges_G2[i], self_edge_list_G2[i] ) selected_deletion_self_G1 = [ G1_edge_deletion_cost[self_edge_list_G1[i]] for i in range(G1.num_nodes()) ] selected_insertion_self_G2 = [ G2_edge_insertion_cost[self_edge_list_G2[i]] for i in range(G2.num_nodes()) ] selected_deletion_incoming_G1 = [ G1_edge_deletion_cost[incoming_edges_G1[i]] for i in range(G1.num_nodes()) ] selected_insertion_incoming_G2 = [ G2_edge_insertion_cost[incoming_edges_G2[i]] for i in range(G2.num_nodes()) ] selected_deletion_outgoing_G1 = [ G1_edge_deletion_cost[outgoing_edges_G1[i]] for i in range(G1.num_nodes()) ] selected_insertion_outgoing_G2 = [ G2_edge_insertion_cost[outgoing_edges_G2[i]] for i in range(G2.num_nodes()) ] selected_deletion_G1 = [ G1_edge_deletion_cost[ np.concatenate( ( self_edge_list_G1[i], incoming_edges_G1[i], outgoing_edges_G1[i], ) ) ] for i in range(G1.num_nodes()) ] selected_insertion_G2 = [ G2_edge_insertion_cost[ np.concatenate( ( self_edge_list_G2[i], incoming_edges_G2[i], outgoing_edges_G2[i], ) ) ] for i in range(G2.num_nodes()) ] cost_G1 = np.array( [ (G1_node_deletion_cost[i] + selected_deletion_G1[i].sum() / 2) for i in range(num_G1_nodes) ] ) cost_G2 = np.array( [ (G2_node_insertion_cost[i] + selected_insertion_G2[i].sum() / 2) for i in range(num_G2_nodes) ] ) for i in range(num_G1_nodes): for j in range(num_G2_nodes): c1_self = deepcopy(selected_deletion_self_G1[i]) c2_self = deepcopy(selected_insertion_self_G2[j]) c1_incoming = deepcopy(selected_deletion_incoming_G1[i]) c2_incoming = deepcopy(selected_insertion_incoming_G2[j]) c1_outgoing = deepcopy(selected_deletion_outgoing_G1[i]) c2_outgoing = deepcopy(selected_insertion_outgoing_G2[j]) for k, a in enumerate(self_edge_list_G1[i]): for l, b in enumerate(self_edge_list_G2[j]): c1_self[k] = min( c1_self[k], edge_substitution_cost[a, b] / 2 ) c2_self[l] = min( c2_self[l], edge_substitution_cost[a, b] / 2 ) for k, a in enumerate(incoming_edges_G1[i]): for l, b in enumerate(incoming_edges_G2[j]): c1_incoming[k] = min( c1_incoming[k], edge_substitution_cost[a, b] / 2 ) c2_incoming[l] = min( c2_incoming[l], edge_substitution_cost[a, b] / 2 ) for k, a in enumerate(outgoing_edges_G1[i]): for l, b in enumerate(outgoing_edges_G2[j]): c1_outgoing[k] = min( c1_outgoing[k], edge_substitution_cost[a, b] / 2 ) c2_outgoing[l] = min( c2_outgoing[l], edge_substitution_cost[a, b] / 2 ) edge_hausdorff_lower_bound = 0.0 if len(selected_deletion_G1[i]) > len(selected_insertion_G2[j]): idx = np.argpartition( selected_deletion_G1[i], ( len(selected_deletion_G1[i]) - len(selected_insertion_G2[j]) ), ) edge_hausdorff_lower_bound = selected_deletion_G1[i][ idx[ : ( len(selected_deletion_G1[i]) - len(selected_insertion_G2[j]) ) ] ].sum() elif len(selected_deletion_G1[i]) < len(selected_insertion_G2[j]): idx = np.argpartition( selected_insertion_G2[j], ( len(selected_insertion_G2[j]) - len(selected_deletion_G1[i]) ), ) edge_hausdorff_lower_bound = selected_insertion_G2[j][ idx[ : ( len(selected_insertion_G2[j]) - len(selected_deletion_G1[i]) ) ] ].sum() sc_cost = 0.5 * ( node_substitution_cost[i, j] + 0.5 * max( c1_self.sum() + c2_self.sum() + c1_incoming.sum() + c2_incoming.sum() + c1_outgoing.sum() + c2_outgoing.sum(), edge_hausdorff_lower_bound, ) ) if cost_G1[i] > sc_cost: cost_G1[i] = sc_cost if cost_G2[j] > sc_cost: cost_G2[j] = sc_cost graph_hausdorff_lower_bound = 0.0 if num_G1_nodes > num_G2_nodes: idx = np.argpartition( G1_node_deletion_cost, (num_G1_nodes - num_G2_nodes) ) graph_hausdorff_lower_bound = G1_node_deletion_cost[ idx[: (num_G1_nodes - num_G2_nodes)] ].sum() elif num_G1_nodes < num_G2_nodes: idx = np.argpartition( G2_node_insertion_cost, (num_G2_nodes - num_G1_nodes) ) graph_hausdorff_lower_bound = G2_node_insertion_cost[ idx[: (num_G2_nodes - num_G1_nodes)] ].sum() graph_hausdorff_cost = max( graph_hausdorff_lower_bound, cost_G1.sum() + cost_G2.sum() ) return graph_hausdorff_cost def a_star_search(G1, G2, cost_matrix_nodes, cost_matrix_edges, max_beam_size): # A-star traversal open_list = [] # Create first nodes in the A-star search tree, matching node 0 of G1 with all possibilities (each node of G2, and deletion) matched_cost = 0.0 matched_nodes = ([], []) # No nodes matched in the beginning matched_edges = ([], []) # No edges matched in the beginning unprocessed_nodes_G1 = [ i for i in range(G1.num_nodes()) ] # No nodes matched in the beginning unprocessed_nodes_G2 = [ i for i in range(G2.num_nodes()) ] # No nodes matched in the beginning unprocessed_edges_G1 = [ i for i in range(G1.num_edges()) ] # No edges matched in the beginning unprocessed_edges_G2 = [ i for i in range(G2.num_edges()) ] # No edges matched in the beginning for i in range(len(unprocessed_nodes_G2)): tree_node = search_tree_node( G1, G2, matched_cost, matched_nodes, matched_edges, unprocessed_nodes_G1[0], unprocessed_nodes_G2[i], unprocessed_nodes_G1, unprocessed_nodes_G2, unprocessed_edges_G1, unprocessed_edges_G2, cost_matrix_nodes, cost_matrix_edges, ) # Insert into open-list, implemented as a heap heappush(open_list, tree_node) # Consider node deletion tree_node = search_tree_node( G1, G2, matched_cost, matched_nodes, matched_edges, unprocessed_nodes_G1[0], None, unprocessed_nodes_G1, unprocessed_nodes_G2, unprocessed_edges_G1, unprocessed_edges_G2, cost_matrix_nodes, cost_matrix_edges, ) # Insert into open-list, implemented as a heap heappush(open_list, tree_node) while len(open_list) > 0: # TODO: Create a node that processes multi node insertion deletion in one search node, # as opposed in multiple search nodes here parent_tree_node = heappop(open_list) matched_cost = parent_tree_node.matched_cost matched_nodes = parent_tree_node.matched_nodes matched_edges = parent_tree_node.matched_edges unprocessed_nodes_G1 = parent_tree_node.unprocessed_nodes_G1 unprocessed_nodes_G2 = parent_tree_node.unprocessed_nodes_G2 unprocessed_edges_G1 = parent_tree_node.unprocessed_edges_G1 unprocessed_edges_G2 = parent_tree_node.unprocessed_edges_G2 if len(unprocessed_nodes_G1) == 0 and len(unprocessed_nodes_G2) == 0: return (matched_cost, matched_nodes, matched_edges) elif len(unprocessed_nodes_G1) > 0: for i in range(len(unprocessed_nodes_G2)): tree_node = search_tree_node( G1, G2, matched_cost, matched_nodes, matched_edges, unprocessed_nodes_G1[0], unprocessed_nodes_G2[i], unprocessed_nodes_G1, unprocessed_nodes_G2, unprocessed_edges_G1, unprocessed_edges_G2, cost_matrix_nodes, cost_matrix_edges, ) # Insert into open-list, implemented as a heap heappush(open_list, tree_node) # Consider node deletion tree_node = search_tree_node( G1, G2, matched_cost, matched_nodes, matched_edges, unprocessed_nodes_G1[0], None, unprocessed_nodes_G1, unprocessed_nodes_G2, unprocessed_edges_G1, unprocessed_edges_G2, cost_matrix_nodes, cost_matrix_edges, ) # Insert into open-list, implemented as a heap heappush(open_list, tree_node) elif len(unprocessed_nodes_G2) > 0: for i in range(len(unprocessed_nodes_G2)): tree_node = search_tree_node( G1, G2, matched_cost, matched_nodes, matched_edges, None, unprocessed_nodes_G2[i], unprocessed_nodes_G1, unprocessed_nodes_G2, unprocessed_edges_G1, unprocessed_edges_G2, cost_matrix_nodes, cost_matrix_edges, ) # Insert into open-list, implemented as a heap heappush(open_list, tree_node) # Retain the top-k elements in open-list iff algorithm is beam if max_beam_size > 0 and len(open_list) > max_beam_size: open_list = nsmallest(max_beam_size, open_list) heapify(open_list) return None def get_sorted_mapping(mapping_tuple, len1, len2): # Get sorted mapping of nodes/edges result_0 = [None] * len1 result_1 = [None] * len2 for i in range(len(mapping_tuple[0])): if mapping_tuple[0][i] is not None and mapping_tuple[1][i] is not None: result_0[mapping_tuple[0][i]] = mapping_tuple[1][i] result_1[mapping_tuple[1][i]] = mapping_tuple[0][i] return (result_0, result_1) def graph_edit_distance( G1, G2, node_substitution_cost=None, edge_substitution_cost=None, G1_node_deletion_cost=None, G2_node_insertion_cost=None, G1_edge_deletion_cost=None, G2_edge_insertion_cost=None, algorithm="bipartite", max_beam_size=100, ): """Returns GED (graph edit distance) between DGLGraphs G1 and G2. Parameters ---------- G1, G2: DGLGraphs node_substitution_cost, edge_substitution_cost : 2D numpy arrays node_substitution_cost[i,j] is the cost of substitution node i of G1 with node j of G2, similar definition for edge_substitution_cost. If None, default cost of 0 is used. G1_node_deletion_cost, G1_edge_deletion_cost : 1D numpy arrays G1_node_deletion_cost[i] is the cost of deletion of node i of G1, similar definition for G1_edge_deletion_cost. If None, default cost of 1 is used. G2_node_insertion_cost, G2_edge_insertion_cost : 1D numpy arrays G2_node_insertion_cost[i] is the cost of insertion of node i of G2, similar definition for G2_edge_insertion_cost. If None, default cost of 1 is used. algorithm : string Algorithm to use to calculate the edit distance. For now, 4 algorithms are supported i) astar: Calculates exact GED using A* graph traversal algorithm, the heuristic used is the one proposed in (Riesen and Bunke, 2009) [1]. ii) beam: Calculates approximate GED using A* graph traversal algorithm, with a maximum number of nodes in the open list. [2] iii) bipartite (default): Calculates approximate GED using linear assignment on the nodes, with jv (Jonker-Volgerand) algorithm. [3] iv) hausdorff: Approximation of graph edit distance based on Hausdorff matching [4]. max_beam_size : int Maximum number of nodes in the open list, in case the algorithm is 'beam'. Returns ------- A tuple of three objects: (edit_distance, node_mapping, edge_mapping) edit distance is the calculated edit distance (float) node_mapping is a tuple of size two, containing the node assignments of the two graphs respectively eg., node_mapping[0][i] is the node mapping of node i of graph G1 (None means that the node is deleted) Similar definition for the edge_mapping For 'hausdorff', node_mapping and edge_mapping are returned as None, as this approximation does not return a unique edit path Examples -------- >>> src1 = [0, 1, 2, 3, 4, 5]; >>> dst1 = [1, 2, 3, 4, 5, 6]; >>> src2 = [0, 1, 3, 4, 5]; >>> dst2 = [1, 2, 4, 5, 6]; >>> G1 = dgl.DGLGraph((src1, dst1)) >>> G2 = dgl.DGLGraph((src2, dst2)) >>> distance, node_mapping, edge_mapping = graph_edit_distance(G1, G1, algorithm='astar') >>> print(distance) 0.0 >>> distance, node_mapping, edge_mapping = graph_edit_distance(G1, G2, algorithm='astar') >>> print(distance) 1.0 References ---------- [1] Riesen, Kaspar, Stefan Fankhauser, and Horst Bunke. "Speeding Up Graph Edit Distance Computation with a Bipartite Heuristic." MLG. 2007. [2] Neuhaus, Michel, Kaspar Riesen, and Horst Bunke. "Fast suboptimal algorithms for the computation of graph edit distance." Joint IAPR International Workshops on Statistical Techniques in Pattern Recognition (SPR) and Structural and Syntactic Pattern Recognition (SSPR). 2006. [3] Fankhauser, Stefan, Kaspar Riesen, and Horst Bunke. "Speeding up graph edit distance computation through fast bipartite matching." International Workshop on Graph-Based Representations in Pattern Recognition. 2011. [4] Fischer, Andreas, et al. "A hausdorff heuristic for efficient computation of graph edit distance." Joint IAPR International Workshops on Statistical Techniques in Pattern Recognition (SPR) and Structural and Syntactic Pattern Recognition (SSPR). 2014. """ # Handle corner cases if G1 is None and G2 is None: return (0.0, ([], []), ([], [])) elif G1 is None: edit_cost = 0.0 # Validate if algorithm != "beam": max_beam_size = -1 ( node_substitution_cost, edge_substitution_cost, G1_node_deletion_cost, G1_edge_deletion_cost, G2_node_insertion_cost, G2_edge_insertion_cost, ) = validate_cost_functions( G1, G2, node_substitution_cost, edge_substitution_cost, G1_node_deletion_cost, G1_edge_deletion_cost, G2_node_insertion_cost, G2_edge_insertion_cost, ) # cost matrices for LAP solution cost_matrix_nodes, cost_matrix_edges = construct_cost_functions( G1, G2, node_substitution_cost, edge_substitution_cost, G1_node_deletion_cost, G1_edge_deletion_cost, G2_node_insertion_cost, G2_edge_insertion_cost, ) if algorithm == "astar" or algorithm == "beam": (matched_cost, matched_nodes, matched_edges) = a_star_search( G1, G2, cost_matrix_nodes, cost_matrix_edges, max_beam_size ) return ( matched_cost, get_sorted_mapping(matched_nodes, G1.num_nodes(), G2.num_nodes()), get_sorted_mapping(matched_edges, G1.num_edges(), G2.num_edges()), ) elif algorithm == "hausdorff": hausdorff_cost = hausdorff_matching( G1, G2, node_substitution_cost, edge_substitution_cost, G1_node_deletion_cost, G1_edge_deletion_cost, G2_node_insertion_cost, G2_edge_insertion_cost, ) return (hausdorff_cost, None, None) else: cost_matrix = contextual_cost_matrix_construction( G1, G2, node_substitution_cost, edge_substitution_cost, G1_node_deletion_cost, G1_edge_deletion_cost, G2_node_insertion_cost, G2_edge_insertion_cost, ) # Match the nodes as per the LAP solution row_ind, col_ind, _ = lapjv(cost_matrix) ( matched_cost, matched_nodes, matched_edges, ) = edit_cost_from_node_matching( G1, G2, cost_matrix_nodes, cost_matrix_edges, row_ind ) return ( matched_cost, get_sorted_mapping(matched_nodes, G1.num_nodes(), G2.num_nodes()), get_sorted_mapping(matched_edges, G1.num_edges(), G2.num_edges()), ) ================================================ FILE: examples/pytorch/graphsage/README.md ================================================ Inductive Representation Learning on Large Graphs (GraphSAGE) ============ - Paper link: [http://papers.nips.cc/paper/6703-inductive-representation-learning-on-large-graphs.pdf](http://papers.nips.cc/paper/6703-inductive-representation-learning-on-large-graphs.pdf) - Author's code repo: [https://github.com/williamleif/graphsage-simple](https://github.com/williamleif/graphsage-simple) For advanced usages, including training with multi-gpu/multi-node, and PyTorch Lightning, etc., more examples can be found in [advanced](https://github.com/dmlc/dgl/tree/master/examples/pytorch/graphsage/advanced) and [dist](https://github.com/dmlc/dgl/tree/master/examples/pytorch/graphsage/dist) directory. Requirements ------------ ```bash pip install requests torchmetrics==0.11.4 ogb ``` How to run ------- ### Full graph training Run with following (available dataset: "cora", "citeseer", "pubmed") ```bash python3 train_full.py --dataset cora --gpu 0 # full graph ``` Results: ``` * cora: ~0.8330 * citeseer: ~0.7110 * pubmed: ~0.7830 ``` ### Minibatch training for node classification Train w/ mini-batch sampling in mixed mode (CPU+GPU) for node classification on "ogbn-products" ```bash python3 node_classification.py ``` Results: ``` Test Accuracy: 0.7632 ``` ### PyTorch Lightning for node classification Train w/ mini-batch sampling for node classification with PyTorch Lightning on OGB-products. It requires PyTorch Lightning 2.0.1. It works with both single GPU and multiple GPUs: ```bash python3 lightning/node_classification.py ``` ### Minibatch training for link prediction Train w/ mini-batch sampling for link prediction on OGB-citation2: ```bash python3 link_pred.py ``` Results (10 epochs): ``` Test MRR: 0.7386 ``` ================================================ FILE: examples/pytorch/graphsage/advanced/README.md ================================================ More Examples for Training GraphSAGE ============================ ### Training with PyTorch Lightning We provide minibatch training scripts with PyTorch Lightning in `train_lightning_unsupervised.py`. Requires `pytorch_lightning` and `torchmetrics`. ```bash python3 train_lightning_unsupervised.py ``` ================================================ FILE: examples/pytorch/graphsage/advanced/model.py ================================================ import dgl import dgl.nn as dglnn import sklearn.linear_model as lm import sklearn.metrics as skm import torch as th import torch.functional as F import torch.nn as nn import tqdm class SAGE(nn.Module): def __init__( self, in_feats, n_hidden, n_classes, n_layers, activation, dropout ): super().__init__() self.init(in_feats, n_hidden, n_classes, n_layers, activation, dropout) def init( self, in_feats, n_hidden, n_classes, n_layers, activation, dropout ): self.n_layers = n_layers self.n_hidden = n_hidden self.n_classes = n_classes self.layers = nn.ModuleList() if n_layers > 1: self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, "mean")) for i in range(1, n_layers - 1): self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, "mean")) self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, "mean")) else: self.layers.append(dglnn.SAGEConv(in_feats, n_classes, "mean")) self.dropout = nn.Dropout(dropout) self.activation = activation def forward(self, blocks, x): h = x for l, (layer, block) in enumerate(zip(self.layers, blocks)): h = layer(block, h) if l != len(self.layers) - 1: h = self.activation(h) h = self.dropout(h) return h def inference(self, g, x, device, batch_size, num_workers): """ Inference with the GraphSAGE model on full neighbors (i.e. without neighbor sampling). g : the entire graph. x : the input of entire node set. The inference code is written in a fashion that it could handle any number of nodes and layers. """ # During inference with sampling, multi-layer blocks are very inefficient because # lots of computations in the first few layers are repeated. # Therefore, we compute the representation of all nodes layer by layer. The nodes # on each layer are of course splitted in batches. # TODO: can we standardize this? for l, layer in enumerate(self.layers): y = th.zeros( g.num_nodes(), self.n_hidden if l != len(self.layers) - 1 else self.n_classes, ) sampler = dgl.dataloading.MultiLayerFullNeighborSampler(1) dataloader = dgl.dataloading.DataLoader( g, th.arange(g.num_nodes()).to(g.device), sampler, device=device if num_workers == 0 else None, batch_size=batch_size, shuffle=False, drop_last=False, num_workers=num_workers, ) for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader): block = blocks[0] block = block.int().to(device) h = x[input_nodes].to(device) h = layer(block, h) if l != len(self.layers) - 1: h = self.activation(h) h = self.dropout(h) y[output_nodes] = h.cpu() x = y return y def compute_acc_unsupervised(emb, labels, train_nids, val_nids, test_nids): """ Compute the accuracy of prediction given the labels. """ emb = emb.cpu().numpy() labels = labels.cpu().numpy() train_nids = train_nids.cpu().numpy() train_labels = labels[train_nids] val_nids = val_nids.cpu().numpy() val_labels = labels[val_nids] test_nids = test_nids.cpu().numpy() test_labels = labels[test_nids] emb = (emb - emb.mean(0, keepdims=True)) / emb.std(0, keepdims=True) lr = lm.LogisticRegression(multi_class="multinomial", max_iter=10000) lr.fit(emb[train_nids], train_labels) pred = lr.predict(emb) f1_micro_eval = skm.f1_score(val_labels, pred[val_nids], average="micro") f1_micro_test = skm.f1_score(test_labels, pred[test_nids], average="micro") return f1_micro_eval, f1_micro_test ================================================ FILE: examples/pytorch/graphsage/advanced/negative_sampler.py ================================================ import dgl import torch as th class NegativeSampler(object): def __init__(self, g, k, neg_share=False, device=None): if device is None: device = g.device self.weights = g.in_degrees().float().to(device) ** 0.75 self.k = k self.neg_share = neg_share def __call__(self, g, eids): src, _ = g.find_edges(eids) n = len(src) if self.neg_share and n % self.k == 0: dst = self.weights.multinomial(n, replacement=True) dst = dst.view(-1, 1, self.k).expand(-1, self.k, -1).flatten() else: dst = self.weights.multinomial(n * self.k, replacement=True) src = src.repeat_interleave(self.k) return src, dst ================================================ FILE: examples/pytorch/graphsage/advanced/train_lightning_unsupervised.py ================================================ import argparse import glob import os import sys import time import dgl import dgl.function as fn import dgl.nn.pytorch as dglnn import numpy as np import torch as th import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import tqdm from model import compute_acc_unsupervised as compute_acc, SAGE from negative_sampler import NegativeSampler from pytorch_lightning import LightningDataModule, LightningModule, Trainer from pytorch_lightning.callbacks import Callback, ModelCheckpoint sys.path.append(os.path.join(os.path.dirname(__file__), "..")) from load_graph import inductive_split, load_ogb, load_reddit class CrossEntropyLoss(nn.Module): def forward(self, block_outputs, pos_graph, neg_graph): with pos_graph.local_scope(): pos_graph.ndata["h"] = block_outputs pos_graph.apply_edges(fn.u_dot_v("h", "h", "score")) pos_score = pos_graph.edata["score"] with neg_graph.local_scope(): neg_graph.ndata["h"] = block_outputs neg_graph.apply_edges(fn.u_dot_v("h", "h", "score")) neg_score = neg_graph.edata["score"] score = th.cat([pos_score, neg_score]) label = th.cat( [th.ones_like(pos_score), th.zeros_like(neg_score)] ).long() loss = F.binary_cross_entropy_with_logits(score, label.float()) return loss class SAGELightning(LightningModule): def __init__( self, in_feats, n_hidden, n_classes, n_layers, activation, dropout, lr ): super().__init__() self.save_hyperparameters() self.module = SAGE( in_feats, n_hidden, n_classes, n_layers, activation, dropout ) self.lr = lr self.loss_fcn = CrossEntropyLoss() def training_step(self, batch, batch_idx): input_nodes, pos_graph, neg_graph, mfgs = batch mfgs = [mfg.int().to(device) for mfg in mfgs] pos_graph = pos_graph.to(device) neg_graph = neg_graph.to(device) batch_inputs = mfgs[0].srcdata["features"] batch_labels = mfgs[-1].dstdata["labels"] batch_pred = self.module(mfgs, batch_inputs) loss = self.loss_fcn(batch_pred, pos_graph, neg_graph) self.log( "train_loss", loss, prog_bar=True, on_step=False, on_epoch=True ) return loss def validation_step(self, batch, batch_idx): input_nodes, output_nodes, mfgs = batch mfgs = [mfg.int().to(device) for mfg in mfgs] batch_inputs = mfgs[0].srcdata["features"] batch_labels = mfgs[-1].dstdata["labels"] batch_pred = self.module(mfgs, batch_inputs) return batch_pred def configure_optimizers(self): optimizer = th.optim.Adam(self.parameters(), lr=self.lr) return optimizer class DataModule(LightningDataModule): def __init__( self, dataset_name, data_cpu=False, fan_out=[10, 25], device=th.device("cpu"), batch_size=1000, num_workers=4, ): super().__init__() if dataset_name == "reddit": g, n_classes = load_reddit() n_edges = g.num_edges() reverse_eids = th.cat( [th.arange(n_edges // 2, n_edges), th.arange(0, n_edges // 2)] ) elif dataset_name == "ogbn-products": g, n_classes = load_ogb("ogbn-products") n_edges = g.num_edges() # The reverse edge of edge 0 in OGB products dataset is 1. # The reverse edge of edge 2 is 3. So on so forth. reverse_eids = th.arange(n_edges) ^ 1 else: raise ValueError("unknown dataset") train_nid = th.nonzero(g.ndata["train_mask"], as_tuple=True)[0] val_nid = th.nonzero(g.ndata["val_mask"], as_tuple=True)[0] test_nid = th.nonzero( ~(g.ndata["train_mask"] | g.ndata["val_mask"]), as_tuple=True )[0] sampler = dgl.dataloading.MultiLayerNeighborSampler( [int(_) for _ in fan_out] ) dataloader_device = th.device("cpu") if not data_cpu: train_nid = train_nid.to(device) val_nid = val_nid.to(device) test_nid = test_nid.to(device) g = g.formats(["csc"]) g = g.to(device) dataloader_device = device self.g = g self.train_nid, self.val_nid, self.test_nid = ( train_nid, val_nid, test_nid, ) self.sampler = sampler self.device = dataloader_device self.batch_size = batch_size self.num_workers = num_workers self.in_feats = g.ndata["features"].shape[1] self.n_classes = n_classes self.reverse_eids = reverse_eids def train_dataloader(self): sampler = dgl.dataloading.as_edge_prediction_sampler( self.sampler, exclude="reverse_id", reverse_eids=self.reverse_eids, negative_sampler=NegativeSampler( self.g, args.num_negs, args.neg_share ), ) return dgl.dataloading.DataLoader( self.g, np.arange(self.g.num_edges()), sampler, device=self.device, batch_size=self.batch_size, shuffle=True, drop_last=False, num_workers=self.num_workers, ) def val_dataloader(self): # Note that the validation data loader is a DataLoader # as we want to evaluate all the node embeddings. return dgl.dataloading.DataLoader( self.g, np.arange(self.g.num_nodes()), self.sampler, device=self.device, batch_size=self.batch_size, shuffle=False, drop_last=False, num_workers=self.num_workers, ) class UnsupervisedClassification(Callback): def on_validation_epoch_start(self, trainer, pl_module): self.val_outputs = [] def on_validation_batch_end( self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx ): self.val_outputs.append(outputs) def on_validation_epoch_end(self, trainer, pl_module): node_emb = th.cat(self.val_outputs, 0) g = trainer.datamodule.g labels = g.ndata["labels"] f1_micro, f1_macro = compute_acc( node_emb, labels, trainer.datamodule.train_nid, trainer.datamodule.val_nid, trainer.datamodule.test_nid, ) pl_module.log("val_f1_micro", f1_micro) if __name__ == "__main__": argparser = argparse.ArgumentParser("multi-gpu training") argparser.add_argument("--gpu", type=int, default=0) argparser.add_argument("--dataset", type=str, default="reddit") argparser.add_argument("--num-epochs", type=int, default=20) argparser.add_argument("--num-hidden", type=int, default=16) argparser.add_argument("--num-layers", type=int, default=2) argparser.add_argument("--num-negs", type=int, default=1) argparser.add_argument( "--neg-share", default=False, action="store_true", help="sharing neg nodes for positive nodes", ) argparser.add_argument("--fan-out", type=str, default="10,25") argparser.add_argument("--batch-size", type=int, default=10000) argparser.add_argument("--log-every", type=int, default=20) argparser.add_argument("--eval-every", type=int, default=1000) argparser.add_argument("--lr", type=float, default=0.003) argparser.add_argument("--dropout", type=float, default=0.5) argparser.add_argument( "--num-workers", type=int, default=0, help="Number of sampling processes. Use 0 for no extra process.", ) args = argparser.parse_args() if args.gpu >= 0: device = th.device("cuda:%d" % args.gpu) else: device = th.device("cpu") datamodule = DataModule( args.dataset, True, [int(_) for _ in args.fan_out.split(",")], device, args.batch_size, args.num_workers, ) model = SAGELightning( datamodule.in_feats, args.num_hidden, datamodule.n_classes, args.num_layers, F.relu, args.dropout, args.lr, ) # Train unsupervised_callback = UnsupervisedClassification() checkpoint_callback = ModelCheckpoint(monitor="val_f1_micro", save_top_k=1) trainer = Trainer( gpus=[args.gpu] if args.gpu != -1 else None, max_epochs=args.num_epochs, val_check_interval=1000, callbacks=[checkpoint_callback, unsupervised_callback], num_sanity_val_steps=0, ) trainer.fit(model, datamodule=datamodule) ================================================ FILE: examples/pytorch/graphsage/lightning/node_classification.py ================================================ import glob import os import dgl import dgl.nn.pytorch as dglnn import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import torchmetrics.functional as MF import tqdm from ogb.nodeproppred import DglNodePropPredDataset from pytorch_lightning import LightningDataModule, LightningModule, Trainer from pytorch_lightning.callbacks import ModelCheckpoint from torchmetrics import Accuracy class SAGE(LightningModule): def __init__(self, in_feats, n_hidden, n_classes): super().__init__() self.save_hyperparameters() self.layers = nn.ModuleList() self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, "mean")) self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, "mean")) self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, "mean")) self.dropout = nn.Dropout(0.5) self.n_hidden = n_hidden self.n_classes = n_classes self.train_acc = Accuracy(task="multiclass", num_classes=n_classes) self.val_acc = Accuracy(task="multiclass", num_classes=n_classes) def forward(self, blocks, x): h = x for l, (layer, block) in enumerate(zip(self.layers, blocks)): h = layer(block, h) if l != len(self.layers) - 1: h = F.relu(h) h = self.dropout(h) return h def inference(self, g, device, batch_size, num_workers, buffer_device=None): # The difference between this inference function and the one in the official # example is that the intermediate results can also benefit from prefetching. g.ndata["h"] = g.ndata["feat"] sampler = dgl.dataloading.MultiLayerFullNeighborSampler( 1, prefetch_node_feats=["h"] ) dataloader = dgl.dataloading.DataLoader( g, torch.arange(g.num_nodes()).to(g.device), sampler, device=device, batch_size=batch_size, shuffle=False, drop_last=False, num_workers=num_workers, persistent_workers=(num_workers > 0), ) if buffer_device is None: buffer_device = device for l, layer in enumerate(self.layers): y = torch.zeros( g.num_nodes(), self.n_hidden if l != len(self.layers) - 1 else self.n_classes, device=buffer_device, ) for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader): x = blocks[0].srcdata["h"] h = layer(blocks[0], x) if l != len(self.layers) - 1: h = F.relu(h) h = self.dropout(h) y[output_nodes] = h.to(buffer_device) g.ndata["h"] = y return y def training_step(self, batch, batch_idx): input_nodes, output_nodes, blocks = batch x = blocks[0].srcdata["feat"] y = blocks[-1].dstdata["label"] y_hat = self(blocks, x) loss = F.cross_entropy(y_hat, y) self.train_acc(torch.argmax(y_hat, 1), y) self.log( "train_acc", self.train_acc, prog_bar=True, on_step=True, on_epoch=False, ) return loss def validation_step(self, batch, batch_idx): input_nodes, output_nodes, blocks = batch x = blocks[0].srcdata["feat"] y = blocks[-1].dstdata["label"] y_hat = self(blocks, x) self.val_acc(torch.argmax(y_hat, 1), y) self.log( "val_acc", self.val_acc, prog_bar=True, on_step=True, on_epoch=True, sync_dist=True, ) def configure_optimizers(self): optimizer = torch.optim.Adam( self.parameters(), lr=0.001, weight_decay=5e-4 ) return optimizer class DataModule(LightningDataModule): def __init__( self, graph, train_idx, val_idx, fanouts, batch_size, n_classes ): super().__init__() sampler = dgl.dataloading.NeighborSampler( fanouts, prefetch_node_feats=["feat"], prefetch_labels=["label"] ) self.g = graph self.train_idx, self.val_idx = train_idx, val_idx self.sampler = sampler self.batch_size = batch_size self.in_feats = graph.ndata["feat"].shape[1] self.n_classes = n_classes def train_dataloader(self): return dgl.dataloading.DataLoader( self.g, self.train_idx.to("cuda"), self.sampler, device="cuda", batch_size=self.batch_size, shuffle=True, drop_last=False, # For CPU sampling, set num_workers to nonzero and use_uva=False # Set use_ddp to False for single GPU. num_workers=0, use_uva=True, use_ddp=True, ) def val_dataloader(self): return dgl.dataloading.DataLoader( self.g, self.val_idx.to("cuda"), self.sampler, device="cuda", batch_size=self.batch_size, shuffle=True, drop_last=False, num_workers=0, use_uva=True, ) if __name__ == "__main__": dataset = DglNodePropPredDataset("ogbn-products") graph, labels = dataset[0] graph.ndata["label"] = labels.squeeze() graph.create_formats_() split_idx = dataset.get_idx_split() train_idx, val_idx, test_idx = ( split_idx["train"], split_idx["valid"], split_idx["test"], ) datamodule = DataModule( graph, train_idx, val_idx, [15, 10, 5], 1024, dataset.num_classes ) model = SAGE(datamodule.in_feats, 256, datamodule.n_classes) # Train checkpoint_callback = ModelCheckpoint(monitor="val_acc", save_top_k=1) # Use this for single GPU # trainer = Trainer(accelerator="gpu", devices=[0], max_epochs=10, # callbacks=[checkpoint_callback]) trainer = Trainer( accelerator="gpu", devices=[0, 1, 2, 3], max_epochs=10, callbacks=[checkpoint_callback], strategy="ddp_spawn", ) trainer.fit(model, datamodule=datamodule) # Test dirs = glob.glob("./lightning_logs/*") version = max([int(os.path.split(x)[-1].split("_")[-1]) for x in dirs]) logdir = "./lightning_logs/version_%d" % version print("Evaluating model in", logdir) ckpt = glob.glob(os.path.join(logdir, "checkpoints", "*"))[0] model = SAGE.load_from_checkpoint( checkpoint_path=ckpt, hparams_file=os.path.join(logdir, "hparams.yaml") ).to("cuda") with torch.no_grad(): pred = model.inference(graph, "cuda", 4096, 12, graph.device) pred = pred[test_idx] label = graph.ndata["label"][test_idx] acc = MF.accuracy( pred, label, task="multiclass", num_classes=datamodule.n_classes ) print("Test accuracy:", acc) ================================================ FILE: examples/pytorch/graphsage/link_pred.py ================================================ import argparse import dgl import dgl.nn as dglnn import torch import torch.nn as nn import torch.nn.functional as F import tqdm from dgl.dataloading import ( as_edge_prediction_sampler, DataLoader, MultiLayerFullNeighborSampler, negative_sampler, NeighborSampler, ) from ogb.linkproppred import DglLinkPropPredDataset, Evaluator def to_bidirected_with_reverse_mapping(g): """Makes a graph bidirectional, and returns a mapping array ``mapping`` where ``mapping[i]`` is the reverse edge of edge ID ``i``. Does not work with graphs that have self-loops. """ g_simple, mapping = dgl.to_simple( dgl.add_reverse_edges(g), return_counts="count", writeback_mapping=True ) c = g_simple.edata["count"] num_edges = g.num_edges() mapping_offset = torch.zeros( g_simple.num_edges() + 1, dtype=g_simple.idtype ) mapping_offset[1:] = c.cumsum(0) idx = mapping.argsort() idx_uniq = idx[mapping_offset[:-1]] reverse_idx = torch.where( idx_uniq >= num_edges, idx_uniq - num_edges, idx_uniq + num_edges ) reverse_mapping = mapping[reverse_idx] # sanity check src1, dst1 = g_simple.edges() src2, dst2 = g_simple.find_edges(reverse_mapping) assert torch.equal(src1, dst2) assert torch.equal(src2, dst1) return g_simple, reverse_mapping class SAGE(nn.Module): def __init__(self, in_size, hid_size): super().__init__() self.layers = nn.ModuleList() # three-layer GraphSAGE-mean self.layers.append(dglnn.SAGEConv(in_size, hid_size, "mean")) self.layers.append(dglnn.SAGEConv(hid_size, hid_size, "mean")) self.layers.append(dglnn.SAGEConv(hid_size, hid_size, "mean")) self.hid_size = hid_size self.predictor = nn.Sequential( nn.Linear(hid_size, hid_size), nn.ReLU(), nn.Linear(hid_size, hid_size), nn.ReLU(), nn.Linear(hid_size, 1), ) def forward(self, pair_graph, neg_pair_graph, blocks, x): h = x for l, (layer, block) in enumerate(zip(self.layers, blocks)): h = layer(block, h) if l != len(self.layers) - 1: h = F.relu(h) pos_src, pos_dst = pair_graph.edges() neg_src, neg_dst = neg_pair_graph.edges() h_pos = self.predictor(h[pos_src] * h[pos_dst]) h_neg = self.predictor(h[neg_src] * h[neg_dst]) return h_pos, h_neg def inference(self, g, device, batch_size): """Layer-wise inference algorithm to compute GNN node embeddings.""" feat = g.ndata["feat"] sampler = MultiLayerFullNeighborSampler(1, prefetch_node_feats=["feat"]) dataloader = DataLoader( g, torch.arange(g.num_nodes()).to(g.device), sampler, device=device, batch_size=batch_size, shuffle=False, drop_last=False, num_workers=0, ) buffer_device = torch.device("cpu") pin_memory = buffer_device != device for l, layer in enumerate(self.layers): y = torch.empty( g.num_nodes(), self.hid_size, device=buffer_device, pin_memory=pin_memory, ) feat = feat.to(device) for input_nodes, output_nodes, blocks in tqdm.tqdm( dataloader, desc="Inference" ): x = feat[input_nodes] h = layer(blocks[0], x) if l != len(self.layers) - 1: h = F.relu(h) y[output_nodes] = h.to(buffer_device) feat = y return y def compute_mrr( model, evaluator, node_emb, src, dst, neg_dst, device, batch_size=500 ): """Compute Mean Reciprocal Rank (MRR) in batches.""" rr = torch.zeros(src.shape[0]) for start in tqdm.trange(0, src.shape[0], batch_size, desc="Evaluate"): end = min(start + batch_size, src.shape[0]) all_dst = torch.cat([dst[start:end, None], neg_dst[start:end]], 1) h_src = node_emb[src[start:end]][:, None, :].to(device) h_dst = node_emb[all_dst.view(-1)].view(*all_dst.shape, -1).to(device) pred = model.predictor(h_src * h_dst).squeeze(-1) input_dict = {"y_pred_pos": pred[:, 0], "y_pred_neg": pred[:, 1:]} rr[start:end] = evaluator.eval(input_dict)["mrr_list"] return rr.mean() def evaluate(device, graph, edge_split, model, batch_size): model.eval() evaluator = Evaluator(name="ogbl-citation2") with torch.no_grad(): node_emb = model.inference(graph, device, batch_size) results = [] for split in ["valid", "test"]: src = edge_split[split]["source_node"].to(node_emb.device) dst = edge_split[split]["target_node"].to(node_emb.device) neg_dst = edge_split[split]["target_node_neg"].to(node_emb.device) results.append( compute_mrr( model, evaluator, node_emb, src, dst, neg_dst, device ) ) return results def train(args, device, g, reverse_eids, seed_edges, model): # create sampler & dataloader sampler = NeighborSampler([15, 10, 5], prefetch_node_feats=["feat"]) sampler = as_edge_prediction_sampler( sampler, exclude="reverse_id", reverse_eids=reverse_eids, negative_sampler=negative_sampler.Uniform(1), ) use_uva = args.mode == "mixed" dataloader = DataLoader( g, seed_edges, sampler, device=device, batch_size=512, shuffle=True, drop_last=False, num_workers=0, use_uva=use_uva, ) opt = torch.optim.Adam(model.parameters(), lr=0.0005) for epoch in range(10): model.train() total_loss = 0 for it, (input_nodes, pair_graph, neg_pair_graph, blocks) in enumerate( dataloader ): x = blocks[0].srcdata["feat"] pos_score, neg_score = model(pair_graph, neg_pair_graph, blocks, x) score = torch.cat([pos_score, neg_score]) pos_label = torch.ones_like(pos_score) neg_label = torch.zeros_like(neg_score) labels = torch.cat([pos_label, neg_label]) loss = F.binary_cross_entropy_with_logits(score, labels) opt.zero_grad() loss.backward() opt.step() total_loss += loss.item() if (it + 1) == 1000: break print("Epoch {:05d} | Loss {:.4f}".format(epoch, total_loss / (it + 1))) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--mode", default="mixed", choices=["cpu", "mixed", "puregpu"], help="Training mode. 'cpu' for CPU training, 'mixed' for CPU-GPU mixed training, " "'puregpu' for pure-GPU training.", ) args = parser.parse_args() if not torch.cuda.is_available(): args.mode = "cpu" print(f"Training in {args.mode} mode.") # load and preprocess dataset print("Loading data") dataset = DglLinkPropPredDataset("ogbl-citation2") g = dataset[0] g = g.to("cuda" if args.mode == "puregpu" else "cpu") device = torch.device("cpu" if args.mode == "cpu" else "cuda") g, reverse_eids = to_bidirected_with_reverse_mapping(g) reverse_eids = reverse_eids.to(device) seed_edges = torch.arange(g.num_edges()).to(device) edge_split = dataset.get_edge_split() # create GraphSAGE model in_size = g.ndata["feat"].shape[1] model = SAGE(in_size, 256).to(device) # model training print("Training...") train(args, device, g, reverse_eids, seed_edges, model) # validate/test the model print("Validation/Testing...") valid_mrr, test_mrr = evaluate( device, g, edge_split, model, batch_size=1000 ) print( "Validation MRR {:.4f}, Test MRR {:.4f}".format( valid_mrr.item(), test_mrr.item() ) ) ================================================ FILE: examples/pytorch/graphsage/load_graph.py ================================================ import dgl import torch as th def load_reddit(self_loop=True): from dgl.data import RedditDataset # load reddit data data = RedditDataset(self_loop=self_loop) g = data[0] g.ndata["features"] = g.ndata.pop("feat") g.ndata["labels"] = g.ndata.pop("label") return g, data.num_classes def load_ogb(name, root="dataset"): from ogb.nodeproppred import DglNodePropPredDataset print("load", name) data = DglNodePropPredDataset(name=name, root=root) print("finish loading", name) splitted_idx = data.get_idx_split() graph, labels = data[0] labels = labels[:, 0] graph.ndata["features"] = graph.ndata.pop("feat") graph.ndata["labels"] = labels in_feats = graph.ndata["features"].shape[1] num_labels = len(th.unique(labels[th.logical_not(th.isnan(labels))])) # Find the node IDs in the training, validation, and test set. train_nid, val_nid, test_nid = ( splitted_idx["train"], splitted_idx["valid"], splitted_idx["test"], ) train_mask = th.zeros((graph.num_nodes(),), dtype=th.bool) train_mask[train_nid] = True val_mask = th.zeros((graph.num_nodes(),), dtype=th.bool) val_mask[val_nid] = True test_mask = th.zeros((graph.num_nodes(),), dtype=th.bool) test_mask[test_nid] = True graph.ndata["train_mask"] = train_mask graph.ndata["val_mask"] = val_mask graph.ndata["test_mask"] = test_mask print("finish constructing", name) return graph, num_labels def inductive_split(g): """Split the graph into training graph, validation graph, and test graph by training and validation masks. Suitable for inductive models.""" train_g = g.subgraph(g.ndata["train_mask"]) val_g = g.subgraph(g.ndata["train_mask"] | g.ndata["val_mask"]) test_g = g return train_g, val_g, test_g ================================================ FILE: examples/pytorch/graphsage/node_classification.py ================================================ import argparse import dgl import dgl.nn as dglnn import torch import torch.nn as nn import torch.nn.functional as F import torchmetrics.functional as MF import tqdm from dgl.data import AsNodePredDataset from dgl.dataloading import ( DataLoader, MultiLayerFullNeighborSampler, NeighborSampler, ) from ogb.nodeproppred import DglNodePropPredDataset class SAGE(nn.Module): def __init__(self, in_size, hid_size, out_size): super().__init__() self.layers = nn.ModuleList() # three-layer GraphSAGE-mean self.layers.append(dglnn.SAGEConv(in_size, hid_size, "mean")) self.layers.append(dglnn.SAGEConv(hid_size, hid_size, "mean")) self.layers.append(dglnn.SAGEConv(hid_size, out_size, "mean")) self.dropout = nn.Dropout(0.5) self.hid_size = hid_size self.out_size = out_size def forward(self, blocks, x): h = x for l, (layer, block) in enumerate(zip(self.layers, blocks)): h = layer(block, h) if l != len(self.layers) - 1: h = F.relu(h) h = self.dropout(h) return h def inference(self, g, device, batch_size): """Conduct layer-wise inference to get all the node embeddings.""" feat = g.ndata["feat"] sampler = MultiLayerFullNeighborSampler(1, prefetch_node_feats=["feat"]) dataloader = DataLoader( g, torch.arange(g.num_nodes()).to(g.device), sampler, device=device, batch_size=batch_size, shuffle=False, drop_last=False, num_workers=0, ) buffer_device = torch.device("cpu") pin_memory = buffer_device != device for l, layer in enumerate(self.layers): y = torch.empty( g.num_nodes(), self.hid_size if l != len(self.layers) - 1 else self.out_size, dtype=feat.dtype, device=buffer_device, pin_memory=pin_memory, ) feat = feat.to(device) for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader): x = feat[input_nodes] h = layer(blocks[0], x) # len(blocks) = 1 if l != len(self.layers) - 1: h = F.relu(h) h = self.dropout(h) # by design, our output nodes are contiguous y[output_nodes[0] : output_nodes[-1] + 1] = h.to(buffer_device) feat = y return y def evaluate(model, graph, dataloader, num_classes): model.eval() ys = [] y_hats = [] for it, (input_nodes, output_nodes, blocks) in enumerate(dataloader): with torch.no_grad(): x = blocks[0].srcdata["feat"] ys.append(blocks[-1].dstdata["label"]) y_hats.append(model(blocks, x)) return MF.accuracy( torch.cat(y_hats), torch.cat(ys), task="multiclass", num_classes=num_classes, ) def layerwise_infer(device, graph, nid, model, num_classes, batch_size): model.eval() with torch.no_grad(): pred = model.inference( graph, device, batch_size ) # pred in buffer_device pred = pred[nid] label = graph.ndata["label"][nid].to(pred.device) return MF.accuracy( pred, label, task="multiclass", num_classes=num_classes ) def train(args, device, g, dataset, model, num_classes): # create sampler & dataloader train_idx = dataset.train_idx.to(device) val_idx = dataset.val_idx.to(device) sampler = NeighborSampler( [10, 10, 10], # fanout for [layer-0, layer-1, layer-2] prefetch_node_feats=["feat"], prefetch_labels=["label"], ) use_uva = args.mode == "mixed" train_dataloader = DataLoader( g, train_idx, sampler, device=device, batch_size=1024, shuffle=True, drop_last=False, num_workers=0, use_uva=use_uva, ) val_dataloader = DataLoader( g, val_idx, sampler, device=device, batch_size=1024, shuffle=True, drop_last=False, num_workers=0, use_uva=use_uva, ) opt = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=5e-4) for epoch in range(10): model.train() total_loss = 0 for it, (input_nodes, output_nodes, blocks) in enumerate( train_dataloader ): x = blocks[0].srcdata["feat"] y = blocks[-1].dstdata["label"] y_hat = model(blocks, x) loss = F.cross_entropy(y_hat, y) opt.zero_grad() loss.backward() opt.step() total_loss += loss.item() acc = evaluate(model, g, val_dataloader, num_classes) print( "Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} ".format( epoch, total_loss / (it + 1), acc.item() ) ) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--mode", default="mixed", choices=["cpu", "mixed", "puregpu"], help="Training mode. 'cpu' for CPU training, 'mixed' for CPU-GPU mixed training, " "'puregpu' for pure-GPU training.", ) parser.add_argument( "--dt", type=str, default="float", help="data type(float, bfloat16)", ) args = parser.parse_args() if not torch.cuda.is_available(): args.mode = "cpu" print(f"Training in {args.mode} mode.") # load and preprocess dataset print("Loading data") dataset = AsNodePredDataset(DglNodePropPredDataset("ogbn-products")) g = dataset[0] g = g.to("cuda" if args.mode == "puregpu" else "cpu") num_classes = dataset.num_classes device = torch.device("cpu" if args.mode == "cpu" else "cuda") # create GraphSAGE model in_size = g.ndata["feat"].shape[1] out_size = dataset.num_classes model = SAGE(in_size, 256, out_size).to(device) # convert model and graph to bfloat16 if needed if args.dt == "bfloat16": g = dgl.to_bfloat16(g) model = model.to(dtype=torch.bfloat16) # model training print("Training...") train(args, device, g, dataset, model, num_classes) # test the model print("Testing...") acc = layerwise_infer( device, g, dataset.test_idx, model, num_classes, batch_size=4096 ) print("Test Accuracy {:.4f}".format(acc.item())) ================================================ FILE: examples/pytorch/graphsage/train_full.py ================================================ import argparse import dgl import dgl.nn as dglnn import torch import torch.nn as nn import torch.nn.functional as F from dgl import AddSelfLoop from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset class SAGE(nn.Module): def __init__(self, in_size, hid_size, out_size): super().__init__() self.layers = nn.ModuleList() # two-layer GraphSAGE-mean self.layers.append(dglnn.SAGEConv(in_size, hid_size, "gcn")) self.layers.append(dglnn.SAGEConv(hid_size, out_size, "gcn")) self.dropout = nn.Dropout(0.5) def forward(self, graph, x): h = self.dropout(x) for l, layer in enumerate(self.layers): h = layer(graph, h) if l != len(self.layers) - 1: h = F.relu(h) h = self.dropout(h) return h def evaluate(g, features, labels, mask, model): model.eval() with torch.no_grad(): logits = model(g, features) logits = logits[mask] labels = labels[mask] _, indices = torch.max(logits, dim=1) correct = torch.sum(indices == labels) return correct.item() * 1.0 / len(labels) def train(g, features, labels, masks, model): # define train/val samples, loss function and optimizer train_mask, val_mask = masks loss_fcn = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4) # training loop for epoch in range(200): model.train() logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() acc = evaluate(g, features, labels, val_mask, model) print( "Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} ".format( epoch, loss.item(), acc ) ) if __name__ == "__main__": parser = argparse.ArgumentParser(description="GraphSAGE") parser.add_argument( "--dataset", type=str, default="cora", help="Dataset name ('cora', 'citeseer', 'pubmed')", ) parser.add_argument( "--dt", type=str, default="float", help="data type(float, bfloat16)", ) args = parser.parse_args() print(f"Training with DGL built-in GraphSage module") # load and preprocess dataset transform = ( AddSelfLoop() ) # by default, it will first remove self-loops to prevent duplication if args.dataset == "cora": data = CoraGraphDataset(transform=transform) elif args.dataset == "citeseer": data = CiteseerGraphDataset(transform=transform) elif args.dataset == "pubmed": data = PubmedGraphDataset(transform=transform) else: raise ValueError("Unknown dataset: {}".format(args.dataset)) g = data[0] device = torch.device("cuda" if torch.cuda.is_available() else "cpu") g = g.int().to(device) features = g.ndata["feat"] labels = g.ndata["label"] masks = g.ndata["train_mask"], g.ndata["val_mask"] # create GraphSAGE model in_size = features.shape[1] out_size = data.num_classes model = SAGE(in_size, 16, out_size).to(device) # convert model and graph to bfloat16 if needed if args.dt == "bfloat16": g = dgl.to_bfloat16(g) features = features.to(dtype=torch.bfloat16) model = model.to(dtype=torch.bfloat16) # model training print("Training...") train(g, features, labels, masks, model) # test the model print("Testing...") acc = evaluate(g, features, labels, g.ndata["test_mask"], model) print("Test accuracy {:.4f}".format(acc)) ================================================ FILE: examples/pytorch/graphsaint/README.md ================================================ # GraphSAINT This DGL example implements the paper: GraphSAINT: Graph Sampling Based Inductive Learning Method. Paper link: https://arxiv.org/abs/1907.04931 Author's code: https://github.com/GraphSAINT/GraphSAINT Contributor: Jiahang Li ([@ljh1064126026](https://github.com/ljh1064126026)) Tang Liu ([@lt610](https://github.com/lt610)) For built-in GraphSAINT subgraph samplers with online sampling, use `dgl.dataloading.SAINTSampler`. ## Dependencies - Python 3.7.10 - PyTorch 1.8.1 - NumPy 1.19.2 - Scikit-learn 0.23.2 - DGL 0.7.1 ## Dataset All datasets used are provided by Author's [code](https://github.com/GraphSAINT/GraphSAINT). They are available in [Google Drive](https://drive.google.com/drive/folders/1zycmmDES39zVlbVCYs88JTJ1Wm5FbfLz) (alternatively, [Baidu Wangpan (code: f1ao)](https://pan.baidu.com/s/1SOb0SiSAXavwAcNqkttwcg#list/path=%2F)). Dataset summary("m" stands for multi-label binary classification, and "s" for single-label.): | Dataset | Nodes | Edges | Degree | Feature | Classes | | :-: | :-: | :-: | :-: | :-: | :-: | | PPI | 14,755 | 225,270 | 15 | 50 | 121(m) | | Flickr | 89,250 | 899,756 | 10 | 500 | 7(s) | | Reddit | 232,965 | 11,606,919 | 50 | 602 | 41(s) | | Yelp | 716,847 | 6,977,410 | 10 | 300 | 100 (m) | | Amazon | 1,598,960 | 132,169,734 | 83 | 200 | 107 (m) | Note that the PPI dataset here is different from DGL's built-in variant. ## Config - The config file is `config.py`, which contains best configs for experiments below. - Please refer to `sampler.py` to see explanations of some key parameters. ### Parameters | **aggr** | **arch** | **dataset** | **dropout** | | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | | define how to aggregate embeddings of each node and its neighbors' embeddings ,which can be 'concat', 'mean'. The neighbors' embeddings are generated based on GCN | e.g. '1-1-0', means there're three layers, the first and the second layer employ message passing on the graph, then aggregate the embeddings of each node and its neighbors. The last layer only updates each node's embedding. The message passing mechanism comes from GCN | the name of dataset, which can be 'ppi', 'flickr', 'reddit', 'yelp', 'amazon' | the dropout of model used in train_sampling.py | | **edge_budget** | **gpu** | **length** | **log_dir** | | the expected number of edges in each subgraph, which is specified in the paper | -1 means cpu, otherwise 'cuda:gpu', e.g. if gpu=0, use 'cuda:0' | the length of each random walk | the directory storing logs | | **lr** | **n_epochs** | **n_hidden** | **no_batch_norm** | | learning rate | training epochs | hidden dimension | True if do NOT employ batch normalization in each layer | | **node_budget** | **num_subg** | **num_roots** | **sampler** | | the expected number of nodes in each subgraph, which is specified in the paper | the expected number of pre_sampled subgraphs | the number of roots to generate random walks | specify which sampler to use, which can be 'node', 'edge', 'rw', corresponding to node, edge, random walk sampler | | **use_val** | **val_every** | **num_workers_sampler** | **num_subg_sampler** | | True if use best model to test, which is stored by earlystop mechanism | validate per 'val_every' epochs | number of workers (processes) specified for internal dataloader in SAINTSampler, which is to pre-sample subgraphs | the maximal number of pre-sampled subgraphs | | **batch_size_sampler** | **num_workers** | | | | batch size of internal dataloader in SAINTSampler | number of workers (processes) specified for external dataloader in train_sampling.py, which is to sample subgraphs in training phase | | | ## Minibatch training Run with following: ```bash python train_sampling.py --task $task $online # online sampling: e.g. python train_sampling.py --task ppi_n --online # offline sampling: e.g. python train_sampling.py --task flickr_e ``` - `$task` includes `ppi_n, ppi_e, ppi_rw, flickr_n, flickr_e, flickr_rw, reddit_n, reddit_e, reddit_rw, yelp_n, yelp_e, yelp_rw, amazon_n, amazon_e, amazon_rw`. For example, `ppi_n` represents running experiments on dataset `ppi` with `node sampler` - If `$online` is `--online`, we sample subgraphs on-the-fly in the training phase, while discarding pre-sampled subgraphs. If `$online` is empty, we utilize pre-sampled subgraphs in the training phase. ## Experiments * Paper: results from the paper * Running: results from experiments with the authors' code * DGL: results from experiments with the DGL example. The experiment config comes from `config.py`. You can modify parameters in the `config.py` to see different performance of different setup. > Note that we implement offline sampling and online sampling in training phase. Offline sampling means all subgraphs utilized in training phase come from pre-sampled subgraphs. Online sampling means we discard all pre-sampled subgraphs and re-sample new subgraphs in training phase. > Note that the sampling method in the pre-sampling phase must be offline sampling. ### F1-micro #### Random node sampler | Method | PPI | Flickr | Reddit | Yelp | Amazon | | --- | --- | --- | --- | --- | --- | | Paper | 0.960±0.001 | 0.507±0.001 | 0.962±0.001 | 0.641±0.000 | 0.782±0.004 | | Running | 0.9628 | 0.5077 | 0.9622 | 0.6393 | 0.7695 | | DGL_offline | 0.9715 | 0.5024 | 0.9645 | 0.6457 | 0.8051 | | DGL_online | 0.9730 | 0.5071 | 0.9645 | 0.6444 | 0.8014 | #### Random edge sampler | Method | PPI | Flickr | Reddit | Yelp | Amazon | | --- | --- | --- | --- | --- | --- | | Paper | 0.981±0.007 | 0.510±0.002 | 0.966±0.001 | 0.653±0.003 | 0.807±0.001 | | Running | 0.9810 | 0.5066 | 0.9656 | 0.6531 | 0.8071 | | DGL_offline | 0.9817 | 0.5077 | 0.9655 | 0.6530 | 0.8034 | | DGL_online | 0.9815 | 0.5041 | 0.9653 | 0.6516 | 0.7756 | #### Random walk sampler | Method | PPI | Flickr | Reddit | Yelp | Amazon | | --- | --- | --- | --- | --- | --- | | Paper | 0.981±0.004 | 0.511±0.001 | 0.966±0.001 | 0.653±0.003 | 0.815±0.001 | | Running | 0.9812 | 0.5104 | 0.9648 | 0.6527 | 0.8131 | | DGL_offline | 0.9833 | 0.5027 | 0.9582 | 0.6514 | 0.8178 | | DGL_online | 0.9820 | 0.5110 | 0.9572 | 0.6508 | 0.8157 | ### Sampling time - Here sampling time includes consumed time of pre-sampling subgraphs and calculating normalization coefficients in the beginning. #### Random node sampler | Method | PPI | Flickr | Reddit | Yelp | Amazon | | --- | --- | --- | --- | --- | --- | | Running | 1.46 | 3.49 | 19 | 59.01 | 978.62 | | DGL | 2.51 | 1.12 | 27.32 | 60.15 | 929.24 | #### Random edge sampler | Method | PPI | Flickr | Reddit | Yelp | Amazon | | --- | --- | --- | --- | --- | --- | | Running | 1.4 | 3.18 | 13.88 | 39.02 | | | DGL | 3.04 | 1.87 | 52.01 | 48.38 | | #### Random walk sampler | Method | PPI | Flickr | Reddit | Yelp | Amazon | | --- | --- | --- | --- | --- | --- | | Running | 1.7 | 3.82 | 16.97 | 43.25 | 355.68 | | DGL | 3.05 | 2.13 | 11.01 | 22.23 | 151.84 | ## Test std of sampling and normalization time - We've run experiments 10 times repeatedly to test average and standard deviation of sampling and normalization time. Here we just test time without training model to the end. Moreover, for efficient testing, the hardware and config employed here are not the same as the experiments above, so the sampling time might be a bit different from that above. But we keep the environment consistent in all experiments below. > The config here which is different with that in the section above is only `num_workers_sampler`, `batch_size_sampler` and `num_workers`, which are only correlated to the sampling speed. Other parameters are kept consistent across two sections thus the model's performance is not affected. > The value is (average, std). ### Random node sampler | Method | PPI | Flickr | Reddit | Yelp | Amazon | | ------------------------- | --------------- | ------------ | ------------- | ------------- | --------------- | | DGL_Sampling(std) | 2.618, 0.004 | 3.017, 0.507 | 35.356, 2.363 | 69.913, 6.3 | 888.025, 16.004 | | DGL_Normalization(std) | Small to ignore | 0.008, 0.004 | 0.26, 0.047 | 0.189, 0.0288 | 2.443, 0.124 | | | | | | | | | author_Sampling(std) | 0.788, 0.661 | 0.728, 0.367 | 8.931, 3.155 | 27.818, 1.384 | 295.597, 4.928 | | author_Normalization(std) | 0.665, 0.565 | 4.981, 2.952 | 17.231, 7.116 | 47.449, 2.794 | 279.241, 17.615 | ### Random edge sampler | Method | PPI | Flickr | Reddit | Yelp | Amazon | | ------------------------- | --------------- | ------------ | ------------- | ------------- | ------ | | DGL_Sampling(std) | 3.554, 0.292 | 4.722, 0.245 | 47.09, 2.76 | 75.219, 6.442 | | | DGL_Normalization(std) | Small to ignore | 0.005, 0.007 | 0.235, 0.026 | 0.193, 0.021 | | | | | | | | | | author_Sampling(std) | 0.802, 0.667 | 0.761, 0.387 | 6.058, 2.166 | 13.914, 1.864 | | | author_Normalization(std) | 0.667, 0.570 | 5.180, 3.006 | 15.803, 5.867 | 44.278, 5.853 | | ### Random walk sampler | Method | PPI | Flickr | Reddit | Yelp | Amazon | | ------------------------- | --------------- | ------------ | ------------- | ------------- | --------------- | | DGL_Sampling(std) | 3.304, 0.08 | 5.487, 1.294 | 37.041, 2.083 | 39.951, 3.094 | 179.613, 18.881 | | DGL_Normalization(std) | Small to ignore | 0.001, 0.003 | 0.235, 0.026 | 0.185, 0.018 | 3.769, 0.326 | | | | | | | | | author_Sampling(std) | 0.924, 0.773 | 1.405, 0.718 | 8.608, 3.093 | 19.113, 1.700 | 217.184, 1.546 | | author_Normalization(std) | 0.701, 0.596 | 5.025, 2.954 | 18.198, 7.223 | 45.874, 8.020 | 128.272, 3.170 | ================================================ FILE: examples/pytorch/graphsaint/config.py ================================================ CONFIG = { "ppi_n": { "aggr": "concat", "arch": "1-0-1-0", "dataset": "ppi", "dropout": 0, "edge_budget": 4000, "length": 2, "log_dir": "none", "lr": 0.01, "n_epochs": 50, "n_hidden": 512, "no_batch_norm": False, "node_budget": 6000, "num_subg": 50, "num_roots": 3000, "sampler": "node", "use_val": True, "val_every": 1, "num_workers_sampler": 0, "num_subg_sampler": 10000, "batch_size_sampler": 200, "num_workers": 8, "full": True, }, "ppi_e": { "aggr": "concat", "arch": "1-0-1-0", "dataset": "ppi", "dropout": 0.1, "edge_budget": 4000, "length": 2, "log_dir": "none", "lr": 0.01, "n_epochs": 50, "n_hidden": 512, "no_batch_norm": False, "node_budget": 6000, "num_subg": 50, "num_roots": 3000, "sampler": "edge", "use_val": True, "val_every": 1, "num_workers_sampler": 0, "num_subg_sampler": 10000, "batch_size_sampler": 200, "num_workers": 8, "full": True, }, "ppi_rw": { "aggr": "concat", "arch": "1-0-1-0", "dataset": "ppi", "dropout": 0.1, "edge_budget": 4000, "length": 2, "log_dir": "none", "lr": 0.01, "n_epochs": 50, "n_hidden": 512, "no_batch_norm": False, "node_budget": 6000, "num_subg": 50, "num_roots": 3000, "sampler": "rw", "use_val": True, "val_every": 1, "num_workers_sampler": 0, "num_subg_sampler": 10000, "batch_size_sampler": 200, "num_workers": 8, "full": True, }, "flickr_n": { "aggr": "concat", "arch": "1-1-0", "dataset": "flickr", "dropout": 0.2, "edge_budget": 6000, "length": 2, "log_dir": "none", "lr": 0.01, "n_epochs": 50, "n_hidden": 256, "no_batch_norm": False, "node_budget": 8000, "num_subg": 25, "num_roots": 6000, "sampler": "node", "use_val": True, "val_every": 1, "num_workers_sampler": 0, "num_subg_sampler": 10000, "batch_size_sampler": 200, "num_workers": 8, "full": False, }, "flickr_e": { "aggr": "concat", "arch": "1-1-0", "dataset": "flickr", "dropout": 0.2, "edge_budget": 6000, "length": 2, "log_dir": "none", "lr": 0.01, "n_epochs": 50, "n_hidden": 256, "no_batch_norm": False, "node_budget": 8000, "num_subg": 25, "num_roots": 6000, "sampler": "edge", "use_val": True, "val_every": 1, "num_workers_sampler": 0, "num_subg_sampler": 10000, "batch_size_sampler": 200, "num_workers": 8, "full": False, }, "flickr_rw": { "aggr": "concat", "arch": "1-1-0", "dataset": "flickr", "dropout": 0.2, "edge_budget": 6000, "length": 2, "log_dir": "none", "lr": 0.01, "n_epochs": 50, "n_hidden": 256, "no_batch_norm": False, "node_budget": 8000, "num_subg": 25, "num_roots": 6000, "sampler": "rw", "use_val": True, "val_every": 1, "num_workers_sampler": 0, "num_subg_sampler": 10000, "batch_size_sampler": 200, "num_workers": 8, "full": False, }, "reddit_n": { "aggr": "concat", "arch": "1-0-1-0", "dataset": "reddit", "dropout": 0.1, "edge_budget": 4000, "length": 2, "log_dir": "none", "lr": 0.01, "n_epochs": 20, "n_hidden": 128, "no_batch_norm": False, "node_budget": 8000, "num_subg": 50, "num_roots": 3000, "sampler": "node", "use_val": True, "val_every": 1, "num_workers_sampler": 8, "num_subg_sampler": 10000, "batch_size_sampler": 200, "num_workers": 8, "full": True, }, "reddit_e": { "aggr": "concat", "arch": "1-0-1-0", "dataset": "reddit", "dropout": 0.1, "edge_budget": 6000, "length": 2, "log_dir": "none", "lr": 0.01, "n_epochs": 20, "n_hidden": 128, "no_batch_norm": False, "node_budget": 8000, "num_subg": 50, "num_roots": 3000, "sampler": "edge", "use_val": True, "val_every": 1, "num_workers_sampler": 8, "num_subg_sampler": 10000, "batch_size_sampler": 200, "num_workers": 8, "full": True, }, "reddit_rw": { "aggr": "concat", "arch": "1-0-1-0", "dataset": "reddit", "dropout": 0.1, "edge_budget": 6000, "length": 4, "log_dir": "none", "lr": 0.01, "n_epochs": 10, "n_hidden": 128, "no_batch_norm": False, "node_budget": 8000, "num_subg": 50, "num_roots": 200, "sampler": "rw", "use_val": True, "val_every": 1, "num_workers_sampler": 8, "num_subg_sampler": 10000, "batch_size_sampler": 200, "num_workers": 8, "full": True, }, "yelp_n": { "aggr": "concat", "arch": "1-1-0", "dataset": "yelp", "dropout": 0.1, "edge_budget": 6000, "length": 4, "log_dir": "none", "lr": 0.01, "n_epochs": 10, "n_hidden": 512, "no_batch_norm": False, "node_budget": 5000, "num_subg": 50, "num_roots": 200, "sampler": "node", "use_val": True, "val_every": 1, "num_workers_sampler": 8, "num_subg_sampler": 10000, "batch_size_sampler": 200, "num_workers": 8, "full": True, }, "yelp_e": { "aggr": "concat", "arch": "1-1-0", "dataset": "yelp", "dropout": 0.1, "edge_budget": 2500, "length": 4, "log_dir": "none", "lr": 0.01, "n_epochs": 10, "n_hidden": 512, "no_batch_norm": False, "node_budget": 5000, "num_subg": 50, "num_roots": 200, "sampler": "edge", "use_val": True, "val_every": 1, "num_workers_sampler": 8, "num_subg_sampler": 10000, "batch_size_sampler": 200, "num_workers": 8, "full": True, }, "yelp_rw": { "aggr": "concat", "arch": "1-1-0", "dataset": "yelp", "dropout": 0.1, "edge_budget": 2500, "length": 2, "log_dir": "none", "lr": 0.01, "n_epochs": 10, "n_hidden": 512, "no_batch_norm": False, "node_budget": 5000, "num_subg": 50, "num_roots": 1250, "sampler": "rw", "use_val": True, "val_every": 1, "num_workers_sampler": 8, "num_subg_sampler": 10000, "batch_size_sampler": 200, "num_workers": 8, "full": True, }, "amazon_n": { "aggr": "concat", "arch": "1-1-0", "dataset": "amazon", "dropout": 0.1, "edge_budget": 2500, "length": 4, "log_dir": "none", "lr": 0.01, "n_epochs": 5, "n_hidden": 512, "no_batch_norm": False, "node_budget": 4500, "num_subg": 50, "num_roots": 200, "sampler": "node", "use_val": True, "val_every": 1, "num_workers_sampler": 4, "num_subg_sampler": 10000, "batch_size_sampler": 200, "num_workers": 8, "full": True, }, "amazon_e": { "aggr": "concat", "arch": "1-1-0", "dataset": "amazon", "dropout": 0.1, "edge_budget": 2000, "gpu": 0, "length": 4, "log_dir": "none", "lr": 0.01, "n_epochs": 10, "n_hidden": 512, "no_batch_norm": False, "node_budget": 5000, "num_subg": 50, "num_roots": 200, "sampler": "edge", "use_val": True, "val_every": 1, "num_workers_sampler": 20, "num_subg_sampler": 5000, "batch_size_sampler": 50, "num_workers": 26, "full": True, }, "amazon_rw": { "aggr": "concat", "arch": "1-1-0", "dataset": "amazon", "dropout": 0.1, "edge_budget": 2500, "gpu": 0, "length": 2, "log_dir": "none", "lr": 0.01, "n_epochs": 5, "n_hidden": 512, "no_batch_norm": False, "node_budget": 5000, "num_subg": 50, "num_roots": 1500, "sampler": "rw", "use_val": True, "val_every": 1, "num_workers_sampler": 4, "num_subg_sampler": 10000, "batch_size_sampler": 200, "num_workers": 8, "full": True, }, } ================================================ FILE: examples/pytorch/graphsaint/modules.py ================================================ import dgl.function as fn import torch as th import torch.nn as nn import torch.nn.functional as F class GCNLayer(nn.Module): def __init__( self, in_dim, out_dim, order=1, act=None, dropout=0, batch_norm=False, aggr="concat", ): super(GCNLayer, self).__init__() self.lins = nn.ModuleList() self.bias = nn.ParameterList() for _ in range(order + 1): self.lins.append(nn.Linear(in_dim, out_dim, bias=False)) self.bias.append(nn.Parameter(th.zeros(out_dim))) self.order = order self.act = act self.dropout = nn.Dropout(dropout) self.batch_norm = batch_norm if batch_norm: self.offset, self.scale = nn.ParameterList(), nn.ParameterList() for _ in range(order + 1): self.offset.append(nn.Parameter(th.zeros(out_dim))) self.scale.append(nn.Parameter(th.ones(out_dim))) self.aggr = aggr self.reset_parameters() def reset_parameters(self): for lin in self.lins: nn.init.xavier_normal_(lin.weight) def feat_trans( self, features, idx ): # linear transformation + activation + batch normalization h = self.lins[idx](features) + self.bias[idx] if self.act is not None: h = self.act(h) if self.batch_norm: mean = h.mean(dim=1).view(h.shape[0], 1) var = h.var(dim=1, unbiased=False).view(h.shape[0], 1) + 1e-9 h = (h - mean) * self.scale[idx] * th.rsqrt(var) + self.offset[idx] return h def forward(self, graph, features): g = graph.local_var() h_in = self.dropout(features) h_hop = [h_in] D_norm = ( g.ndata["train_D_norm"] if "train_D_norm" in g.ndata else g.ndata["full_D_norm"] ) for _ in range(self.order): # forward propagation g.ndata["h"] = h_hop[-1] if "w" not in g.edata: g.edata["w"] = th.ones((g.num_edges(),)).to(features.device) g.update_all(fn.u_mul_e("h", "w", "m"), fn.sum("m", "h")) h = g.ndata.pop("h") h = h * D_norm h_hop.append(h) h_part = [self.feat_trans(ft, idx) for idx, ft in enumerate(h_hop)] if self.aggr == "mean": h_out = h_part[0] for i in range(len(h_part) - 1): h_out = h_out + h_part[i + 1] elif self.aggr == "concat": h_out = th.cat(h_part, 1) else: raise NotImplementedError return h_out class GCNNet(nn.Module): def __init__( self, in_dim, hid_dim, out_dim, arch="1-1-0", act=F.relu, dropout=0, batch_norm=False, aggr="concat", ): super(GCNNet, self).__init__() self.gcn = nn.ModuleList() orders = list(map(int, arch.split("-"))) self.gcn.append( GCNLayer( in_dim=in_dim, out_dim=hid_dim, order=orders[0], act=act, dropout=dropout, batch_norm=batch_norm, aggr=aggr, ) ) pre_out = ((aggr == "concat") * orders[0] + 1) * hid_dim for i in range(1, len(orders) - 1): self.gcn.append( GCNLayer( in_dim=pre_out, out_dim=hid_dim, order=orders[i], act=act, dropout=dropout, batch_norm=batch_norm, aggr=aggr, ) ) pre_out = ((aggr == "concat") * orders[i] + 1) * hid_dim self.gcn.append( GCNLayer( in_dim=pre_out, out_dim=hid_dim, order=orders[-1], act=act, dropout=dropout, batch_norm=batch_norm, aggr=aggr, ) ) pre_out = ((aggr == "concat") * orders[-1] + 1) * hid_dim self.out_layer = GCNLayer( in_dim=pre_out, out_dim=out_dim, order=0, act=None, dropout=dropout, batch_norm=False, aggr=aggr, ) def forward(self, graph): h = graph.ndata["feat"] for layer in self.gcn: h = layer(graph, h) h = F.normalize(h, p=2, dim=1) h = self.out_layer(graph, h) return h ================================================ FILE: examples/pytorch/graphsaint/sampler.py ================================================ import math import os import random import time import dgl import dgl.function as fn import numpy as np import scipy import torch as th from dgl.sampling import pack_traces, random_walk from torch.utils.data import DataLoader # The base class of sampler class SAINTSampler: """ Description ----------- SAINTSampler implements the sampler described in GraphSAINT. This sampler implements offline sampling in pre-sampling phase as well as fully offline sampling, fully online sampling in training phase. Users can conveniently set param 'online' of the sampler to choose different modes. Parameters ---------- node_budget : int the expected number of nodes in each subgraph, which is specifically explained in the paper. Actually this param specifies the times of sampling nodes from the original graph with replacement. The meaning of edge_budget is similar to the node_budget. dn : str name of dataset. g : DGLGraph the full graph. train_nid : list ids of training nodes. num_workers_sampler : int number of processes to sample subgraphs in pre-sampling procedure using torch.dataloader. num_subg_sampler : int, optional the max number of subgraphs sampled in pre-sampling phase for computing normalization coefficients in the beginning. Actually this param is used as ``__len__`` of sampler in pre-sampling phase. Please make sure that num_subg_sampler is greater than batch_size_sampler so that we can sample enough subgraphs. Defaults: 10000 batch_size_sampler : int, optional the number of subgraphs sampled by each process concurrently in pre-sampling phase. Defaults: 200 online : bool, optional If `True`, we employ online sampling in training phase. Otherwise employing offline sampling. Defaults: True num_subg : int, optional the expected number of sampled subgraphs in pre-sampling phase. It is actually the 'N' in the original paper. Note that this param is different from the num_subg_sampler. This param is just used to control the number of pre-sampled subgraphs. Defaults: 50 full : bool, optional True if the number of subgraphs used in the training phase equals to that of pre-sampled subgraphs, or ``math.ceil(self.train_g.num_nodes() / self.node_budget)``. This formula takes the result of A divided by B as the number of subgraphs used in the training phase, where A is the number of training nodes in the original graph, B is the expected number of nodes in each pre-sampled subgraph. Please refer to the paper to check the details. Defaults: True Notes ----- For parallelism of pre-sampling, we utilize `torch.DataLoader` to concurrently speed up sampling. The `num_subg_sampler` is the return value of `__len__` in pre-sampling phase. Moreover, the param `batch_size_sampler` determines the batch_size of `torch.DataLoader` in internal pre-sampling part. But note that if we wanna pass the SAINTSampler to `torch.DataLoader` for concurrently sampling subgraphs in training phase, we need to specify `batch_size` of `DataLoader`, that is, `batch_size_sampler` is not related to how sampler works in training procedure. """ def __init__( self, node_budget, dn, g, train_nid, num_workers_sampler, num_subg_sampler=10000, batch_size_sampler=200, online=True, num_subg=50, full=True, ): self.g = g.cpu() self.node_budget = node_budget self.train_g: dgl.graph = g.subgraph(train_nid) self.dn, self.num_subg = dn, num_subg self.node_counter = th.zeros((self.train_g.num_nodes(),)) self.edge_counter = th.zeros((self.train_g.num_edges(),)) self.prob = None self.num_subg_sampler = num_subg_sampler self.batch_size_sampler = batch_size_sampler self.num_workers_sampler = num_workers_sampler self.train = False self.online = online self.full = full assert ( self.num_subg_sampler >= self.batch_size_sampler ), "num_subg_sampler should be greater than batch_size_sampler" graph_fn, norm_fn = self.__generate_fn__() if os.path.exists(graph_fn): self.subgraphs = np.load(graph_fn, allow_pickle=True) aggr_norm, loss_norm = np.load(norm_fn, allow_pickle=True) else: os.makedirs("./subgraphs/", exist_ok=True) self.subgraphs = [] self.N, sampled_nodes = 0, 0 # N: the number of pre-sampled subgraphs # Employ parallelism to speed up the sampling procedure loader = DataLoader( self, batch_size=self.batch_size_sampler, shuffle=True, num_workers=self.num_workers_sampler, collate_fn=self.__collate_fn__, drop_last=False, ) t = time.perf_counter() for num_nodes, subgraphs_nids, subgraphs_eids in loader: self.subgraphs.extend(subgraphs_nids) sampled_nodes += num_nodes _subgraphs, _node_counts = np.unique( np.concatenate(subgraphs_nids), return_counts=True ) sampled_nodes_idx = th.from_numpy(_subgraphs) _node_counts = th.from_numpy(_node_counts) self.node_counter[sampled_nodes_idx] += _node_counts _subgraphs_eids, _edge_counts = np.unique( np.concatenate(subgraphs_eids), return_counts=True ) sampled_edges_idx = th.from_numpy(_subgraphs_eids) _edge_counts = th.from_numpy(_edge_counts) self.edge_counter[sampled_edges_idx] += _edge_counts self.N += len(subgraphs_nids) # number of subgraphs if sampled_nodes > self.train_g.num_nodes() * num_subg: break print(f"Sampling time: [{time.perf_counter() - t:.2f}s]") np.save(graph_fn, self.subgraphs) t = time.perf_counter() aggr_norm, loss_norm = self.__compute_norm__() print(f"Normalization time: [{time.perf_counter() - t:.2f}s]") np.save(norm_fn, (aggr_norm, loss_norm)) self.train_g.ndata["l_n"] = th.Tensor(loss_norm) self.train_g.edata["w"] = th.Tensor(aggr_norm) self.__compute_degree_norm() # basically normalizing adjacent matrix random.shuffle(self.subgraphs) self.__clear__() print("The number of subgraphs is: ", len(self.subgraphs)) self.train = True def __len__(self): if self.train is False: return self.num_subg_sampler else: if self.full: return len(self.subgraphs) else: return math.ceil(self.train_g.num_nodes() / self.node_budget) def __getitem__(self, idx): # Only when sampling subgraphs in training procedure and need to utilize sampled subgraphs and we still # have sampled subgraphs we can fetch a subgraph from sampled subgraphs if self.train: if self.online: subgraph = self.__sample__() return dgl.node_subgraph(self.train_g, subgraph) else: return dgl.node_subgraph(self.train_g, self.subgraphs[idx]) else: subgraph_nids = self.__sample__() num_nodes = len(subgraph_nids) subgraph_eids = dgl.node_subgraph( self.train_g, subgraph_nids ).edata[dgl.EID] return num_nodes, subgraph_nids, subgraph_eids def __collate_fn__(self, batch): if ( self.train ): # sample only one graph each epoch, batch_size in training phase in 1 return batch[0] else: sum_num_nodes = 0 subgraphs_nids_list = [] subgraphs_eids_list = [] for num_nodes, subgraph_nids, subgraph_eids in batch: sum_num_nodes += num_nodes subgraphs_nids_list.append(subgraph_nids) subgraphs_eids_list.append(subgraph_eids) return sum_num_nodes, subgraphs_nids_list, subgraphs_eids_list def __clear__(self): self.prob = None self.node_counter = None self.edge_counter = None self.g = None def __generate_fn__(self): raise NotImplementedError def __compute_norm__(self): self.node_counter[self.node_counter == 0] = 1 self.edge_counter[self.edge_counter == 0] = 1 loss_norm = self.N / self.node_counter / self.train_g.num_nodes() self.train_g.ndata["n_c"] = self.node_counter self.train_g.edata["e_c"] = self.edge_counter self.train_g.apply_edges(fn.v_div_e("n_c", "e_c", "a_n")) aggr_norm = self.train_g.edata.pop("a_n") self.train_g.ndata.pop("n_c") self.train_g.edata.pop("e_c") return aggr_norm.numpy(), loss_norm.numpy() def __compute_degree_norm(self): self.train_g.ndata[ "train_D_norm" ] = 1.0 / self.train_g.in_degrees().float().clamp(min=1).unsqueeze(1) self.g.ndata["full_D_norm"] = 1.0 / self.g.in_degrees().float().clamp( min=1 ).unsqueeze(1) def __sample__(self): raise NotImplementedError class SAINTNodeSampler(SAINTSampler): """ Description ----------- GraphSAINT with node sampler. Parameters ---------- node_budget : int the expected number of nodes in each subgraph, which is specifically explained in the paper. """ def __init__(self, node_budget, **kwargs): self.node_budget = node_budget super(SAINTNodeSampler, self).__init__( node_budget=node_budget, **kwargs ) def __generate_fn__(self): graph_fn = os.path.join( "./subgraphs/{}_Node_{}_{}.npy".format( self.dn, self.node_budget, self.num_subg ) ) norm_fn = os.path.join( "./subgraphs/{}_Node_{}_{}_norm.npy".format( self.dn, self.node_budget, self.num_subg ) ) return graph_fn, norm_fn def __sample__(self): if self.prob is None: self.prob = self.train_g.in_degrees().float().clamp(min=1) sampled_nodes = th.multinomial( self.prob, num_samples=self.node_budget, replacement=True ).unique() return sampled_nodes.numpy() class SAINTEdgeSampler(SAINTSampler): """ Description ----------- GraphSAINT with edge sampler. Parameters ---------- edge_budget : int the expected number of edges in each subgraph, which is specifically explained in the paper. """ def __init__(self, edge_budget, **kwargs): self.edge_budget = edge_budget self.rng = np.random.default_rng() super(SAINTEdgeSampler, self).__init__( node_budget=edge_budget * 2, **kwargs ) def __generate_fn__(self): graph_fn = os.path.join( "./subgraphs/{}_Edge_{}_{}.npy".format( self.dn, self.edge_budget, self.num_subg ) ) norm_fn = os.path.join( "./subgraphs/{}_Edge_{}_{}_norm.npy".format( self.dn, self.edge_budget, self.num_subg ) ) return graph_fn, norm_fn # TODO: only sample half edges, then add another half edges # TODO: use numpy to implement cython sampling method def __sample__(self): if self.prob is None: src, dst = self.train_g.edges() src_degrees, dst_degrees = self.train_g.in_degrees( src ).float().clamp(min=1), self.train_g.in_degrees(dst).float().clamp( min=1 ) prob_mat = 1.0 / src_degrees + 1.0 / dst_degrees prob_mat = scipy.sparse.csr_matrix( (prob_mat.numpy(), (src.numpy(), dst.numpy())) ) # The edge probability here only contains that of edges in upper triangle adjacency matrix # Because we assume the graph is undirected, that is, the adjacency matrix is symmetric. We only need # to consider half of edges in the graph. self.prob = th.tensor(scipy.sparse.triu(prob_mat).data) self.prob /= self.prob.sum() self.adj_nodes = np.stack(prob_mat.nonzero(), axis=1) sampled_edges = np.unique( dgl.random.choice( len(self.prob), size=self.edge_budget, prob=self.prob, replace=False, ) ) sampled_nodes = np.unique( self.adj_nodes[sampled_edges].flatten() ).astype("long") return sampled_nodes class SAINTRandomWalkSampler(SAINTSampler): """ Description ----------- GraphSAINT with random walk sampler Parameters ---------- num_roots : int the number of roots to generate random walks. length : int the length of each random walk. """ def __init__(self, num_roots, length, **kwargs): self.num_roots, self.length = num_roots, length super(SAINTRandomWalkSampler, self).__init__( node_budget=num_roots * length, **kwargs ) def __generate_fn__(self): graph_fn = os.path.join( "./subgraphs/{}_RW_{}_{}_{}.npy".format( self.dn, self.num_roots, self.length, self.num_subg ) ) norm_fn = os.path.join( "./subgraphs/{}_RW_{}_{}_{}_norm.npy".format( self.dn, self.num_roots, self.length, self.num_subg ) ) return graph_fn, norm_fn def __sample__(self): sampled_roots = th.randint( 0, self.train_g.num_nodes(), (self.num_roots,) ) traces, types = random_walk( self.train_g, nodes=sampled_roots, length=self.length ) sampled_nodes, _, _, _ = pack_traces(traces, types) sampled_nodes = sampled_nodes.unique() return sampled_nodes.numpy() ================================================ FILE: examples/pytorch/graphsaint/train_sampling.py ================================================ import argparse import os import time import warnings import torch import torch.nn.functional as F from config import CONFIG from modules import GCNNet from sampler import SAINTEdgeSampler, SAINTNodeSampler, SAINTRandomWalkSampler from torch.utils.data import DataLoader from utils import calc_f1, evaluate, load_data, Logger, save_log_dir def main(args, task): warnings.filterwarnings("ignore") multilabel_data = {"ppi", "yelp", "amazon"} multilabel = args.dataset in multilabel_data # This flag is excluded for too large dataset, like amazon, the graph of which is too large to be directly # shifted to one gpu. So we need to # 1. put the whole graph on cpu, and put the subgraphs on gpu in training phase # 2. put the model on gpu in training phase, and put the model on cpu in validation/testing phase # We need to judge cpu_flag and cuda (below) simultaneously when shift model between cpu and gpu if args.dataset in ["amazon"]: cpu_flag = True else: cpu_flag = False # load and preprocess dataset data = load_data(args, multilabel) g = data.g train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] labels = g.ndata["label"] train_nid = data.train_nid in_feats = g.ndata["feat"].shape[1] n_classes = data.num_classes n_nodes = g.num_nodes() n_edges = g.num_edges() n_train_samples = train_mask.int().sum().item() n_val_samples = val_mask.int().sum().item() n_test_samples = test_mask.int().sum().item() print( """----Data statistics------' #Nodes %d #Edges %d #Classes/Labels (multi binary labels) %d #Train samples %d #Val samples %d #Test samples %d""" % ( n_nodes, n_edges, n_classes, n_train_samples, n_val_samples, n_test_samples, ) ) # load sampler kwargs = { "dn": args.dataset, "g": g, "train_nid": train_nid, "num_workers_sampler": args.num_workers_sampler, "num_subg_sampler": args.num_subg_sampler, "batch_size_sampler": args.batch_size_sampler, "online": args.online, "num_subg": args.num_subg, "full": args.full, } if args.sampler == "node": saint_sampler = SAINTNodeSampler(args.node_budget, **kwargs) elif args.sampler == "edge": saint_sampler = SAINTEdgeSampler(args.edge_budget, **kwargs) elif args.sampler == "rw": saint_sampler = SAINTRandomWalkSampler( args.num_roots, args.length, **kwargs ) else: raise NotImplementedError loader = DataLoader( saint_sampler, collate_fn=saint_sampler.__collate_fn__, batch_size=1, shuffle=True, num_workers=args.num_workers, drop_last=False, ) # set device for dataset tensors if args.gpu < 0: cuda = False else: cuda = True torch.cuda.set_device(args.gpu) val_mask = val_mask.cuda() test_mask = test_mask.cuda() if not cpu_flag: g = g.to("cuda:{}".format(args.gpu)) print("labels shape:", g.ndata["label"].shape) print("features shape:", g.ndata["feat"].shape) model = GCNNet( in_dim=in_feats, hid_dim=args.n_hidden, out_dim=n_classes, arch=args.arch, dropout=args.dropout, batch_norm=not args.no_batch_norm, aggr=args.aggr, ) if cuda: model.cuda() # logger and so on log_dir = save_log_dir(args) logger = Logger(os.path.join(log_dir, "loggings")) logger.write(args) # use optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) # set train_nids to cuda tensor if cuda: train_nid = torch.from_numpy(train_nid).cuda() print( "GPU memory allocated before training(MB)", torch.cuda.memory_allocated(device=train_nid.device) / 1024 / 1024, ) start_time = time.time() best_f1 = -1 for epoch in range(args.n_epochs): for j, subg in enumerate(loader): if cuda: subg = subg.to(torch.cuda.current_device()) model.train() # forward pred = model(subg) batch_labels = subg.ndata["label"] if multilabel: loss = F.binary_cross_entropy_with_logits( pred, batch_labels, reduction="sum", weight=subg.ndata["l_n"].unsqueeze(1), ) else: loss = F.cross_entropy(pred, batch_labels, reduction="none") loss = (subg.ndata["l_n"] * loss).sum() optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm(model.parameters(), 5) optimizer.step() if j == len(loader) - 1: model.eval() with torch.no_grad(): train_f1_mic, train_f1_mac = calc_f1( batch_labels.cpu().numpy(), pred.cpu().numpy(), multilabel, ) print( f"epoch:{epoch + 1}/{args.n_epochs}, Iteration {j + 1}/" f"{len(loader)}:training loss", loss.item(), ) print( "Train F1-mic {:.4f}, Train F1-mac {:.4f}".format( train_f1_mic, train_f1_mac ) ) # evaluate model.eval() if epoch % args.val_every == 0: if ( cpu_flag and cuda ): # Only when we have shifted model to gpu and we need to shift it back on cpu model = model.to("cpu") val_f1_mic, val_f1_mac = evaluate( model, g, labels, val_mask, multilabel ) print( "Val F1-mic {:.4f}, Val F1-mac {:.4f}".format( val_f1_mic, val_f1_mac ) ) if val_f1_mic > best_f1: best_f1 = val_f1_mic print("new best val f1:", best_f1) torch.save( model.state_dict(), os.path.join(log_dir, "best_model_{}.pkl".format(task)), ) if cpu_flag and cuda: model.cuda() end_time = time.time() print(f"training using time {end_time - start_time}") # test if args.use_val: model.load_state_dict( torch.load( os.path.join(log_dir, "best_model_{}.pkl".format(task)), weights_only=False, ) ) if cpu_flag and cuda: model = model.to("cpu") test_f1_mic, test_f1_mac = evaluate(model, g, labels, test_mask, multilabel) print( "Test F1-mic {:.4f}, Test F1-mac {:.4f}".format( test_f1_mic, test_f1_mac ) ) if __name__ == "__main__": warnings.filterwarnings("ignore") parser = argparse.ArgumentParser(description="GraphSAINT") parser.add_argument( "--task", type=str, default="ppi_n", help="type of tasks" ) parser.add_argument( "--online", dest="online", action="store_true", help="sampling method in training phase", ) parser.add_argument("--gpu", type=int, default=0, help="the gpu index") task = parser.parse_args().task args = argparse.Namespace(**CONFIG[task]) args.online = parser.parse_args().online args.gpu = parser.parse_args().gpu print(args) main(args, task=task) ================================================ FILE: examples/pytorch/graphsaint/utils.py ================================================ import json import os from functools import namedtuple import dgl import numpy as np import scipy.sparse import torch from sklearn.metrics import f1_score from sklearn.preprocessing import StandardScaler class Logger(object): """A custom logger to log stdout to a logging file.""" def __init__(self, path): """Initialize the logger. Parameters --------- path : str The file path to be stored in. """ self.path = path def write(self, s): with open(self.path, "a") as f: f.write(str(s)) print(s) return def save_log_dir(args): log_dir = "./log/{}/{}".format(args.dataset, args.log_dir) os.makedirs(log_dir, exist_ok=True) return log_dir def calc_f1(y_true, y_pred, multilabel): if multilabel: y_pred[y_pred > 0] = 1 y_pred[y_pred <= 0] = 0 else: y_pred = np.argmax(y_pred, axis=1) return f1_score(y_true, y_pred, average="micro"), f1_score( y_true, y_pred, average="macro" ) def evaluate(model, g, labels, mask, multilabel=False): model.eval() with torch.no_grad(): logits = model(g) logits = logits[mask] labels = labels[mask] f1_mic, f1_mac = calc_f1( labels.cpu().numpy(), logits.cpu().numpy(), multilabel ) return f1_mic, f1_mac # load data of GraphSAINT and convert them to the format of dgl def load_data(args, multilabel): if not os.path.exists("graphsaintdata") and not os.path.exists("data"): raise ValueError("The directory graphsaintdata does not exist!") elif os.path.exists("graphsaintdata") and not os.path.exists("data"): os.rename("graphsaintdata", "data") prefix = "data/{}".format(args.dataset) DataType = namedtuple("Dataset", ["num_classes", "train_nid", "g"]) adj_full = scipy.sparse.load_npz("./{}/adj_full.npz".format(prefix)).astype( np.bool_ ) g = dgl.from_scipy(adj_full) num_nodes = g.num_nodes() adj_train = scipy.sparse.load_npz( "./{}/adj_train.npz".format(prefix) ).astype(np.bool_) train_nid = np.array(list(set(adj_train.nonzero()[0]))) role = json.load(open("./{}/role.json".format(prefix))) mask = np.zeros((num_nodes,), dtype=bool) train_mask = mask.copy() train_mask[role["tr"]] = True val_mask = mask.copy() val_mask[role["va"]] = True test_mask = mask.copy() test_mask[role["te"]] = True feats = np.load("./{}/feats.npy".format(prefix)) scaler = StandardScaler() scaler.fit(feats[train_nid]) feats = scaler.transform(feats) class_map = json.load(open("./{}/class_map.json".format(prefix))) class_map = {int(k): v for k, v in class_map.items()} if multilabel: # Multi-label binary classification num_classes = len(list(class_map.values())[0]) class_arr = np.zeros((num_nodes, num_classes)) for k, v in class_map.items(): class_arr[k] = v else: num_classes = max(class_map.values()) - min(class_map.values()) + 1 class_arr = np.zeros((num_nodes,)) for k, v in class_map.items(): class_arr[k] = v g.ndata["feat"] = torch.tensor(feats, dtype=torch.float) g.ndata["label"] = torch.tensor( class_arr, dtype=torch.float if multilabel else torch.long ) g.ndata["train_mask"] = torch.tensor(train_mask, dtype=torch.bool) g.ndata["val_mask"] = torch.tensor(val_mask, dtype=torch.bool) g.ndata["test_mask"] = torch.tensor(test_mask, dtype=torch.bool) data = DataType(g=g, num_classes=num_classes, train_nid=train_nid) return data ================================================ FILE: examples/pytorch/graphsim/README.md ================================================ # GraphParticleSim ## DGL Implementation of Interaction-Network paper. This DGL example implements the GNN model proposed in the paper [Interaction Network](https://arxiv.org/abs/1612.00222.pdf). GraphParticleSim implementor ---------------------- This example was implemented by [Ericcsr](https://github.com/Ericcsr) during his Internship work at the AWS Shanghai AI Lab. The graph dataset used in this example --------------------------------------- This Example uses Datasets Generate By Physics N-Body Simulator adapted from [This Repo](https://github.com/jsikyoon/Interaction-networks_tensorflow) n_body: - n Particles/Nodes - Complete Bidirectional Graph - 10 trajectories should be generated - 1000 steps of simulation per trajectory Dependency -------------------------------- - ffmpeg 4.3.8 - opencv-python 4.2.0 How to run example files -------------------------------- In the graphsim folder, run **Please first run `n_body_sim.py` to generate some data** Using Ground Truth Velocity From Simulator Directly. ```python python n_body_sim.py ``` Generate Longer trajectory or more trajectories. ```python python n_body_sim.py --num_traj --steps ``` **Please use `train.py`** ```python python train.py --num_workers 15 ``` Training with GPU ```python python train.py --gpu 0 --num_workers 15 ``` Training with visualization: for valid visualization, it might take full 40000 epoch of training ```python python train.py --gpu 0 --num_workers 15 --visualize ``` One Step Loss Performance, Loss of test data after 40000 training epochs. ------------------------- | Models/Dataset | 6 Body | | :-------------- | -----: | | Interaction Network in DGL | 80(10) | | Interaction Network in Tensorflow | 60 | ------------------------- Notice that The datasets are generated directly from simulator to prevent using Tensorflow to handle the original dataset. The training is very unstable, the even if the minimum loss is achieved from time to time, there are chances that loss will suddenly increase,in both auther's model and our model. Since the original model hasn't been released, the implementation of this model refers to Tensorflow version implemented in: https://github.com/jsikyoon/Interaction-networks_tensorflow which had consulted the first author for some implementation details. ================================================ FILE: examples/pytorch/graphsim/dataloader.py ================================================ import copy import os import dgl import networkx as nx import numpy as np import torch from torch.utils.data import DataLoader, Dataset def build_dense_graph(n_particles): g = nx.complete_graph(n_particles) return dgl.from_networkx(g) class MultiBodyDataset(Dataset): def __init__(self, path): self.path = path self.zipfile = np.load(self.path) self.node_state = self.zipfile["data"] self.node_label = self.zipfile["label"] self.n_particles = self.zipfile["n_particles"] def __len__(self): return self.node_state.shape[0] def __getitem__(self, idx): if torch.is_tensor(idx): idx = idx.tolist() node_state = self.node_state[idx, :, :] node_label = self.node_label[idx, :, :] return (node_state, node_label) class MultiBodyTrainDataset(MultiBodyDataset): def __init__(self, data_path="./data/"): super(MultiBodyTrainDataset, self).__init__( data_path + "n_body_train.npz" ) self.stat_median = self.zipfile["median"] self.stat_max = self.zipfile["max"] self.stat_min = self.zipfile["min"] class MultiBodyValidDataset(MultiBodyDataset): def __init__(self, data_path="./data/"): super(MultiBodyValidDataset, self).__init__( data_path + "n_body_valid.npz" ) class MultiBodyTestDataset(MultiBodyDataset): def __init__(self, data_path="./data/"): super(MultiBodyTestDataset, self).__init__( data_path + "n_body_test.npz" ) self.test_traj = self.zipfile["test_traj"] self.first_frame = torch.from_numpy(self.zipfile["first_frame"]) # Construct fully connected graph class MultiBodyGraphCollator: def __init__(self, n_particles): self.n_particles = n_particles self.graph = dgl.from_networkx(nx.complete_graph(self.n_particles)) def __call__(self, batch): graph_list = [] data_list = [] label_list = [] for frame in batch: graph_list.append(copy.deepcopy(self.graph)) data_list.append(torch.from_numpy(frame[0])) label_list.append(torch.from_numpy(frame[1])) graph_batch = dgl.batch(graph_list) data_batch = torch.vstack(data_list) label_batch = torch.vstack(label_list) return graph_batch, data_batch, label_batch ================================================ FILE: examples/pytorch/graphsim/models.py ================================================ import copy from functools import partial import dgl import dgl.function as fn import dgl.nn as dglnn import torch import torch.nn as nn from torch.nn import functional as F class MLP(nn.Module): def __init__(self, in_feats, out_feats, num_layers=2, hidden=128): super(MLP, self).__init__() self.layers = nn.ModuleList() layer = nn.Linear(hidden, out_feats) nn.init.normal_(layer.weight, std=0.1) nn.init.zeros_(layer.bias) self.layers.append(nn.Linear(in_feats, hidden)) if num_layers > 2: for i in range(1, num_layers - 1): layer = nn.Linear(hidden, hidden) nn.init.normal_(layer.weight, std=0.1) nn.init.zeros_(layer.bias) self.layers.append(layer) layer = nn.Linear(hidden, out_feats) nn.init.normal_(layer.weight, std=0.1) nn.init.zeros_(layer.bias) self.layers.append(layer) def forward(self, x): for l in range(len(self.layers) - 1): x = self.layers[l](x) x = F.relu(x) x = self.layers[-1](x) return x class PrepareLayer(nn.Module): """ Generate edge feature for the model input preparation: as well as do the normalization work. Parameters ========== node_feats : int Number of node features stat : dict dictionary which represent the statistics needed for normalization """ def __init__(self, node_feats, stat): super(PrepareLayer, self).__init__() self.node_feats = node_feats # stat {'median':median,'max':max,'min':min} self.stat = stat def normalize_input(self, node_feature): return (node_feature - self.stat["median"]) * ( 2 / (self.stat["max"] - self.stat["min"]) ) def forward(self, g, node_feature): with g.local_scope(): node_feature = self.normalize_input(node_feature) g.ndata["feat"] = node_feature # Only dynamic feature g.apply_edges(fn.u_sub_v("feat", "feat", "e")) edge_feature = g.edata["e"] return node_feature, edge_feature class InteractionNet(nn.Module): """ Simple Interaction Network One Layer interaction network for stellar multi-body problem simulation, it has the ability to simulate number of body motion no more than 12 Parameters ========== node_feats : int Number of node features stat : dict Statistcics for Denormalization """ def __init__(self, node_feats, stat): super(InteractionNet, self).__init__() self.node_feats = node_feats self.stat = stat edge_fn = partial(MLP, num_layers=5, hidden=150) node_fn = partial(MLP, num_layers=2, hidden=100) self.in_layer = InteractionLayer( node_feats - 3, # Use velocity only node_feats, out_node_feats=2, out_edge_feats=50, edge_fn=edge_fn, node_fn=node_fn, mode="n_n", ) # Denormalize Velocity only def denormalize_output(self, out): return ( out * (self.stat["max"][3:5] - self.stat["min"][3:5]) / 2 + self.stat["median"][3:5] ) def forward(self, g, n_feat, e_feat, global_feats, relation_feats): with g.local_scope(): out_n, out_e = self.in_layer( g, n_feat, e_feat, global_feats, relation_feats ) out_n = self.denormalize_output(out_n) return out_n, out_e class InteractionLayer(nn.Module): """ Implementation of single layer of interaction network Parameters ========== in_node_feats : int Number of node features in_edge_feats : int Number of edge features out_node_feats : int Number of node feature after one interaction out_edge_feats : int Number of edge features after one interaction global_feats : int Number of global features used as input relate_feats : int Feature related to the relation between object themselves edge_fn : torch.nn.Module Function to update edge feature in message generation node_fn : torch.nn.Module Function to update node feature in message aggregation mode : str Type of message should the edge carry nne : [src_feat,dst_feat,edge_feat] node feature concat edge feature. n_n : [src_feat-edge_feat] node feature subtract from each other. """ def __init__( self, in_node_feats, in_edge_feats, out_node_feats, out_edge_feats, global_feats=1, relate_feats=1, edge_fn=nn.Linear, node_fn=nn.Linear, mode="nne", ): # 'n_n' super(InteractionLayer, self).__init__() self.in_node_feats = in_node_feats self.in_edge_feats = in_edge_feats self.out_edge_feats = out_edge_feats self.out_node_feats = out_node_feats self.mode = mode # MLP for message passing input_shape = ( 2 * self.in_node_feats + self.in_edge_feats if mode == "nne" else self.in_edge_feats + relate_feats ) self.edge_fn = edge_fn( input_shape, self.out_edge_feats ) # 50 in IN paper self.node_fn = node_fn( self.in_node_feats + self.out_edge_feats + global_feats, self.out_node_feats, ) # Should be done by apply edge def update_edge_fn(self, edges): x = torch.cat( [edges.src["feat"], edges.dst["feat"], edges.data["feat"]], dim=1 ) ret = F.relu(self.edge_fn(x)) if self.mode == "nne" else self.edge_fn(x) return {"e": ret} # Assume agg comes from build in reduce def update_node_fn(self, nodes): x = torch.cat([nodes.data["feat"], nodes.data["agg"]], dim=1) ret = F.relu(self.node_fn(x)) if self.mode == "nne" else self.node_fn(x) return {"n": ret} def forward(self, g, node_feats, edge_feats, global_feats, relation_feats): # print(node_feats.shape,global_feats.shape) g.ndata["feat"] = torch.cat([node_feats, global_feats], dim=1) g.edata["feat"] = torch.cat([edge_feats, relation_feats], dim=1) if self.mode == "nne": g.apply_edges(self.update_edge_fn) else: g.edata["e"] = self.edge_fn(g.edata["feat"]) g.update_all( fn.copy_e("e", "msg"), fn.sum("msg", "agg"), self.update_node_fn ) return g.ndata["n"], g.edata["e"] ================================================ FILE: examples/pytorch/graphsim/n_body_sim.py ================================================ from __future__ import absolute_import, division, print_function import argparse import os from math import cos, pi, radians, sin import numpy as np """ This adapted from comes from https://github.com/jsikyoon/Interaction-networks_tensorflow which generates multi-body dynamic simulation data for Interaction network """ # 5 features on the state [mass,x,y,x_vel,y_vel] fea_num = 5 # G stand for Gravity constant 10**5 can help numerical stability G = 10**5 # time step diff_t = 0.001 def init(total_state, n_body, fea_num, orbit): data = np.zeros((total_state, n_body, fea_num), dtype=float) if orbit: data[0][0][0] = 100 data[0][0][1:5] = 0.0 # The position are initialized randomly. for i in range(1, n_body): data[0][i][0] = np.random.rand() * 8.98 + 0.02 distance = np.random.rand() * 90.0 + 10.0 theta = np.random.rand() * 360 theta_rad = pi / 2 - radians(theta) data[0][i][1] = distance * cos(theta_rad) data[0][i][2] = distance * sin(theta_rad) data[0][i][3] = ( -1 * data[0][i][2] / norm(data[0][i][1:3]) * (G * data[0][0][0] / norm(data[0][i][1:3]) ** 2) * distance / 1000 ) data[0][i][4] = ( data[0][i][1] / norm(data[0][i][1:3]) * (G * data[0][0][0] / norm(data[0][i][1:3]) ** 2) * distance / 1000 ) else: for i in range(n_body): data[0][i][0] = np.random.rand() * 8.98 + 0.02 distance = np.random.rand() * 90.0 + 10.0 theta = np.random.rand() * 360 theta_rad = pi / 2 - radians(theta) data[0][i][1] = distance * cos(theta_rad) data[0][i][2] = distance * sin(theta_rad) data[0][i][3] = np.random.rand() * 6.0 - 3.0 data[0][i][4] = np.random.rand() * 6.0 - 3.0 return data def norm(x): return np.sqrt(np.sum(x**2)) def get_f(reciever, sender): diff = sender[1:3] - reciever[1:3] distance = norm(diff) if distance < 1: distance = 1 return G * reciever[0] * sender[0] / (distance**3) * diff # Compute stat according to the paper for normalization def compute_stats(train_curr): data = np.vstack(train_curr).reshape(-1, fea_num) stat_median = np.median(data, axis=0) stat_max = np.quantile(data, 0.95, axis=0) stat_min = np.quantile(data, 0.05, axis=0) return stat_median, stat_max, stat_min def calc(cur_state, n_body): next_state = np.zeros((n_body, fea_num), dtype=float) f_mat = np.zeros((n_body, n_body, 2), dtype=float) f_sum = np.zeros((n_body, 2), dtype=float) acc = np.zeros((n_body, 2), dtype=float) for i in range(n_body): for j in range(i + 1, n_body): if j != i: f = get_f(cur_state[i][:3], cur_state[j][:3]) f_mat[i, j] += f f_mat[j, i] -= f f_sum[i] = np.sum(f_mat[i], axis=0) acc[i] = f_sum[i] / cur_state[i][0] next_state[i][0] = cur_state[i][0] next_state[i][3:5] = cur_state[i][3:5] + acc[i] * diff_t next_state[i][1:3] = cur_state[i][1:3] + next_state[i][3:5] * diff_t return next_state # The state is [mass,pos_x,pos_y,vel_x,vel_y]* n_body def gen(n_body, num_steps, orbit): # initialization on just first state data = init(num_steps, n_body, fea_num, orbit) for i in range(1, num_steps): data[i] = calc(data[i - 1], n_body) return data if __name__ == "__main__": argparser = argparse.ArgumentParser() argparser.add_argument("--num_bodies", type=int, default=6) argparser.add_argument("--num_traj", type=int, default=10) argparser.add_argument("--steps", type=int, default=1000) argparser.add_argument("--data_path", type=str, default="data") args = argparser.parse_args() if not os.path.exists(args.data_path): os.mkdir(args.data_path) # Generate data data_curr = [] data_next = [] for i in range(args.num_traj): raw_traj = gen(args.num_bodies, args.steps, True) data_curr.append(raw_traj[:-1]) data_next.append(raw_traj[1:]) print("Train Traj: ", i) # Compute normalization statistic from data stat_median, stat_max, stat_min = compute_stats(data_curr) data = np.vstack(data_curr) label = np.vstack(data_next)[:, :, 3:5] shuffle_idx = np.arange(data.shape[0]) np.random.shuffle(shuffle_idx) train_split = int(0.9 * data.shape[0]) valid_split = train_split + 300 data = data[shuffle_idx] label = label[shuffle_idx] train_data = data[:train_split] train_label = label[:train_split] valid_data = data[train_split:valid_split] valid_label = label[train_split:valid_split] test_data = data[valid_split:] test_label = label[valid_split:] np.savez( args.data_path + "/n_body_train.npz", data=train_data, label=train_label, n_particles=args.num_bodies, median=stat_median, max=stat_max, min=stat_min, ) np.savez( args.data_path + "/n_body_valid.npz", data=valid_data, label=valid_label, n_particles=args.num_bodies, ) test_traj = gen(args.num_bodies, args.steps, True) np.savez( args.data_path + "/n_body_test.npz", data=test_data, label=test_label, n_particles=args.num_bodies, first_frame=test_traj[0], test_traj=test_traj, ) ================================================ FILE: examples/pytorch/graphsim/train.py ================================================ import argparse import time import traceback import dgl import networkx as nx import numpy as np import torch from dataloader import ( MultiBodyGraphCollator, MultiBodyTestDataset, MultiBodyTrainDataset, MultiBodyValidDataset, ) from models import InteractionNet, MLP, PrepareLayer from torch.utils.data import DataLoader from utils import make_video def train( optimizer, loss_fn, reg_fn, model, prep, dataloader, lambda_reg, device ): total_loss = 0 model.train() for i, (graph_batch, data_batch, label_batch) in enumerate(dataloader): graph_batch = graph_batch.to(device) data_batch = data_batch.to(device) label_batch = label_batch.to(device) optimizer.zero_grad() node_feat, edge_feat = prep(graph_batch, data_batch) dummy_relation = torch.zeros(edge_feat.shape[0], 1).float().to(device) dummy_global = torch.zeros(node_feat.shape[0], 1).float().to(device) v_pred, out_e = model( graph_batch, node_feat[:, 3:5].float(), edge_feat.float(), dummy_global, dummy_relation, ) loss = loss_fn(v_pred, label_batch) total_loss += float(loss) zero_target = torch.zeros_like(out_e) loss = loss + lambda_reg * reg_fn(out_e, zero_target) reg_loss = 0 for param in model.parameters(): reg_loss = reg_loss + lambda_reg * reg_fn( param, torch.zeros_like(param).float().to(device) ) loss = loss + reg_loss loss.backward() optimizer.step() return total_loss / (i + 1) # One step evaluation def eval(loss_fn, model, prep, dataloader, device): total_loss = 0 model.eval() for i, (graph_batch, data_batch, label_batch) in enumerate(dataloader): graph_batch = graph_batch.to(device) data_batch = data_batch.to(device) label_batch = label_batch.to(device) node_feat, edge_feat = prep(graph_batch, data_batch) dummy_relation = torch.zeros(edge_feat.shape[0], 1).float().to(device) dummy_global = torch.zeros(node_feat.shape[0], 1).float().to(device) v_pred, _ = model( graph_batch, node_feat[:, 3:5].float(), edge_feat.float(), dummy_global, dummy_relation, ) loss = loss_fn(v_pred, label_batch) total_loss += float(loss) return total_loss / (i + 1) # Rollout Evaluation based in initial state # Need to integrate def eval_rollout(model, prep, initial_frame, n_object, device): current_frame = initial_frame.to(device) base_graph = nx.complete_graph(n_object) graph = dgl.from_networkx(base_graph).to(device) pos_buffer = [] model.eval() for step in range(100): node_feats, edge_feats = prep(graph, current_frame) dummy_relation = torch.zeros(edge_feats.shape[0], 1).float().to(device) dummy_global = torch.zeros(node_feats.shape[0], 1).float().to(device) v_pred, _ = model( graph, node_feats[:, 3:5].float(), edge_feats.float(), dummy_global, dummy_relation, ) current_frame[:, [1, 2]] += v_pred * 0.001 current_frame[:, 3:5] = v_pred pos_buffer.append(current_frame[:, [1, 2]].cpu().numpy()) pos_buffer = np.vstack(pos_buffer).reshape(100, n_object, -1) make_video(pos_buffer, "video_model.mp4") if __name__ == "__main__": argparser = argparse.ArgumentParser() argparser.add_argument( "--lr", type=float, default=0.001, help="learning rate" ) argparser.add_argument( "--epochs", type=int, default=40000, help="Number of epochs in training" ) argparser.add_argument( "--lambda_reg", type=float, default=0.001, help="regularization weight" ) argparser.add_argument( "--gpu", type=int, default=-1, help="gpu device code, -1 means cpu" ) argparser.add_argument( "--batch_size", type=int, default=100, help="size of each mini batch" ) argparser.add_argument( "--num_workers", type=int, default=0, help="number of workers for dataloading", ) argparser.add_argument( "--visualize", action="store_true", default=False, help="Whether enable trajectory rollout mode for visualization", ) args = argparser.parse_args() # Select Device to be CPU or GPU if args.gpu != -1: device = torch.device("cuda:{}".format(args.gpu)) else: device = torch.device("cpu") train_data = MultiBodyTrainDataset() valid_data = MultiBodyValidDataset() test_data = MultiBodyTestDataset() collator = MultiBodyGraphCollator(train_data.n_particles) train_dataloader = DataLoader( train_data, args.batch_size, True, collate_fn=collator, num_workers=args.num_workers, ) valid_dataloader = DataLoader( valid_data, args.batch_size, True, collate_fn=collator, num_workers=args.num_workers, ) test_full_dataloader = DataLoader( test_data, args.batch_size, True, collate_fn=collator, num_workers=args.num_workers, ) node_feats = 5 stat = { "median": torch.from_numpy(train_data.stat_median).to(device), "max": torch.from_numpy(train_data.stat_max).to(device), "min": torch.from_numpy(train_data.stat_min).to(device), } print( "Weight: ", train_data.stat_median[0], train_data.stat_max[0], train_data.stat_min[0], ) print( "Position: ", train_data.stat_median[[1, 2]], train_data.stat_max[[1, 2]], train_data.stat_min[[1, 2]], ) print( "Velocity: ", train_data.stat_median[[3, 4]], train_data.stat_max[[3, 4]], train_data.stat_min[[3, 4]], ) prepare_layer = PrepareLayer(node_feats, stat).to(device) interaction_net = InteractionNet(node_feats, stat).to(device) print(interaction_net) optimizer = torch.optim.Adam(interaction_net.parameters(), lr=args.lr) state_dict = interaction_net.state_dict() loss_fn = torch.nn.MSELoss() reg_fn = torch.nn.MSELoss(reduction="sum") try: for e in range(args.epochs): last_t = time.time() loss = train( optimizer, loss_fn, reg_fn, interaction_net, prepare_layer, train_dataloader, args.lambda_reg, device, ) print("Epoch time: ", time.time() - last_t) if e % 1 == 0: valid_loss = eval( loss_fn, interaction_net, prepare_layer, valid_dataloader, device, ) test_full_loss = eval( loss_fn, interaction_net, prepare_layer, test_full_dataloader, device, ) print( "Epoch: {}.Loss: Valid: {} Full: {}".format( e, valid_loss, test_full_loss ) ) except: traceback.print_exc() finally: if args.visualize: eval_rollout( interaction_net, prepare_layer, test_data.first_frame, test_data.n_particles, device, ) make_video(test_data.test_traj[:100, :, [1, 2]], "video_truth.mp4") ================================================ FILE: examples/pytorch/graphsim/utils.py ================================================ import os import cv2 as cv import matplotlib import matplotlib.animation as manimation import matplotlib.pyplot as plt import numpy as np matplotlib.use("agg") # Make video can be used to visualize test data def make_video(xy, filename): os.system("rm -rf pics/*") FFMpegWriter = manimation.writers["ffmpeg"] metadata = dict( title="Movie Test", artist="Matplotlib", comment="Movie support!" ) writer = FFMpegWriter(fps=15, metadata=metadata) fig = plt.figure() plt.xlim(-200, 200) plt.ylim(-200, 200) fig_num = len(xy) color = ["ro", "bo", "go", "ko", "yo", "mo", "co"] with writer.saving(fig, filename, len(xy)): for i in range(len(xy)): for j in range(len(xy[0])): plt.plot(xy[i, j, 1], xy[i, j, 0], color[j % len(color)]) writer.grab_frame() ================================================ FILE: examples/pytorch/graphwriter/README.md ================================================ # GraphWriter-DGL In this example we implement the GraphWriter, [Text Generation from Knowledge Graphs with Graph Transformers](https://arxiv.org/abs/1904.02342) in DGL. And the [author's code](https://github.com/rikdz/GraphWriter). ## Dependencies - PyTorch >= 1.2 - tqdm - pycoco (only for testing) - multi-bleu.perl and other scripts from mosesdecoder (only for testing) ## Usage ``` # download data sh prepare_data.sh # training sh run.sh # testing sh test.sh ``` ## Result on AGENDA | |BLEU|METEOR| training time per epoch| |-|-|-|-| |Author's implementation|14.3+-1.01| 18.8+-0.28| 1970s| |DGL implementation|14.31+-0.34|19.74+-0.69| 1080s| We use the author's code for the speed test, and our testbed is V100 GPU. | |BLEU| detok BLEU| METEOR | |-|-|-|-| |greedy, two layers| 13.97 +- 0.40| 13.78 +- 0.46| 18.76 +- 0.36| |beam 4, length penalty 1.0, two layers| 14.66 +- 0.65| 14.53 +- 0.52| 19.50 +- 0.49| |beam 4, length penalty 0.0, two layers| 14.33 +- 0.39| 14.09 +- 0.39| 18.63 +- 0.52| |greedy, six layers| 14.17 +- 0.46| 14.01 +- 0.51| 19.18 +- 0.49| |beam 4, length penalty 1.0, six layers| 14.31 +- 0.34| 14.35 +- 0.36| 19.74 +- 0.69| |beam 4, length penalty 0.0, six layers| 14.40 +- 0.85| 14.15 +- 0.84| 18.86 +- 0.78| We repeat the experiment five times. ### Examples We also provide the output of our implementation on test set together with the reference text. - [GraphWriter's output](https://data.dgl.ai/models/graphwriter/tmp_pred.txt) - [Reference text](https://data.dgl.ai/models/graphwriter/tmp_gold.txt) ================================================ FILE: examples/pytorch/graphwriter/graphwriter.py ================================================ import torch from modules import BiLSTM, GraphTrans, MSA from torch import nn from utlis import * import dgl class GraphWriter(nn.Module): def __init__(self, args): super(GraphWriter, self).__init__() self.args = args if args.title: self.title_emb = nn.Embedding( len(args.title_vocab), args.nhid, padding_idx=0 ) self.title_enc = BiLSTM(args, enc_type="title") self.title_attn = MSA(args) self.ent_emb = nn.Embedding( len(args.ent_text_vocab), args.nhid, padding_idx=0 ) self.tar_emb = nn.Embedding( len(args.text_vocab), args.nhid, padding_idx=0 ) if args.title: nn.init.xavier_normal_(self.title_emb.weight) nn.init.xavier_normal_(self.ent_emb.weight) self.rel_emb = nn.Embedding( len(args.rel_vocab), args.nhid, padding_idx=0 ) nn.init.xavier_normal_(self.rel_emb.weight) self.decode_lstm = nn.LSTMCell(args.dec_ninp, args.nhid) self.ent_enc = BiLSTM(args, enc_type="entity") self.graph_enc = GraphTrans(args) self.ent_attn = MSA(args) self.copy_attn = MSA(args, mode="copy") self.copy_fc = nn.Linear(args.dec_ninp, 1) self.pred_v_fc = nn.Linear(args.dec_ninp, len(args.text_vocab)) def enc_forward( self, batch, ent_mask, ent_text_mask, ent_len, rel_mask, title_mask ): title_enc = None if self.args.title: title_enc = self.title_enc( self.title_emb(batch["title"]), title_mask ) ent_enc = self.ent_enc( self.ent_emb(batch["ent_text"]), ent_text_mask, ent_len=batch["ent_len"], ) rel_emb = self.rel_emb(batch["rel"]) g_ent, g_root = self.graph_enc( ent_enc, ent_mask, ent_len, rel_emb, rel_mask, batch["graph"] ) return g_ent, g_root, title_enc, ent_enc def forward(self, batch, beam_size=-1): ent_mask = len2mask(batch["ent_len"], self.args.device) ent_text_mask = batch["ent_text"] == 0 rel_mask = batch["rel"] == 0 # 0 means the title_mask = batch["title"] == 0 g_ent, g_root, title_enc, ent_enc = self.enc_forward( batch, ent_mask, ent_text_mask, batch["ent_len"], rel_mask, title_mask, ) _h, _c = g_root, g_root.clone().detach() ctx = _h + self.ent_attn(_h, g_ent, mask=ent_mask) if self.args.title: attn = _h + self.title_attn(_h, title_enc, mask=title_mask) ctx = torch.cat([ctx, attn], 1) if beam_size < 1: # training outs = [] tar_inp = self.tar_emb(batch["text"].transpose(0, 1)) for t, xt in enumerate(tar_inp): _xt = torch.cat([ctx, xt], 1) _h, _c = self.decode_lstm(_xt, (_h, _c)) ctx = _h + self.ent_attn(_h, g_ent, mask=ent_mask) if self.args.title: attn = _h + self.title_attn(_h, title_enc, mask=title_mask) ctx = torch.cat([ctx, attn], 1) outs.append(torch.cat([_h, ctx], 1)) outs = torch.stack(outs, 1) copy_gate = torch.sigmoid(self.copy_fc(outs)) EPSI = 1e-6 # copy pred_v = torch.log(copy_gate + EPSI) + torch.log_softmax( self.pred_v_fc(outs), -1 ) pred_c = torch.log((1.0 - copy_gate) + EPSI) + torch.log_softmax( self.copy_attn(outs, ent_enc, mask=ent_mask), -1 ) pred = torch.cat([pred_v, pred_c], -1) return pred else: if beam_size == 1: # greedy device = g_ent.device B = g_ent.shape[0] ent_type = batch["ent_type"].view(B, -1) seq = ( torch.ones( B, ) .long() .to(device) * self.args.text_vocab("") ).unsqueeze(1) for t in range(self.args.beam_max_len): _inp = replace_ent( seq[:, -1], ent_type, len(self.args.text_vocab) ) xt = self.tar_emb(_inp) _xt = torch.cat([ctx, xt], 1) _h, _c = self.decode_lstm(_xt, (_h, _c)) ctx = _h + self.ent_attn(_h, g_ent, mask=ent_mask) if self.args.title: attn = _h + self.title_attn( _h, title_enc, mask=title_mask ) ctx = torch.cat([ctx, attn], 1) _y = torch.cat([_h, ctx], 1) copy_gate = torch.sigmoid(self.copy_fc(_y)) pred_v = torch.log(copy_gate) + torch.log_softmax( self.pred_v_fc(_y), -1 ) pred_c = torch.log((1.0 - copy_gate)) + torch.log_softmax( self.copy_attn( _y.unsqueeze(1), ent_enc, mask=ent_mask ).squeeze(1), -1, ) pred = torch.cat([pred_v, pred_c], -1).view(B, -1) for ban_item in ["", "", ""]: pred[:, self.args.text_vocab(ban_item)] = -1e8 _, word = pred.max(-1) seq = torch.cat([seq, word.unsqueeze(1)], 1) return seq else: # beam search device = g_ent.device B = g_ent.shape[0] BSZ = B * beam_size _h = _h.view(B, 1, -1).repeat(1, beam_size, 1).view(BSZ, -1) _c = _c.view(B, 1, -1).repeat(1, beam_size, 1).view(BSZ, -1) ent_mask = ( ent_mask.view(B, 1, -1) .repeat(1, beam_size, 1) .view(BSZ, -1) ) if self.args.title: title_mask = ( title_mask.view(B, 1, -1) .repeat(1, beam_size, 1) .view(BSZ, -1) ) title_enc = ( title_enc.view(B, 1, title_enc.size(1), -1) .repeat(1, beam_size, 1, 1) .view(BSZ, title_enc.size(1), -1) ) ctx = ctx.view(B, 1, -1).repeat(1, beam_size, 1).view(BSZ, -1) ent_type = ( batch["ent_type"] .view(B, 1, -1) .repeat(1, beam_size, 1) .view(BSZ, -1) ) g_ent = ( g_ent.view(B, 1, g_ent.size(1), -1) .repeat(1, beam_size, 1, 1) .view(BSZ, g_ent.size(1), -1) ) ent_enc = ( ent_enc.view(B, 1, ent_enc.size(1), -1) .repeat(1, beam_size, 1, 1) .view(BSZ, ent_enc.size(1), -1) ) beam_best = torch.zeros(B).to(device) - 1e9 beam_best_seq = [None] * B beam_seq = ( torch.ones(B, beam_size).long().to(device) * self.args.text_vocab("") ).unsqueeze(-1) beam_score = torch.zeros(B, beam_size).to(device) done_flag = torch.zeros(B, beam_size) for t in range(self.args.beam_max_len): _inp = replace_ent( beam_seq[:, :, -1].view(-1), ent_type, len(self.args.text_vocab), ) xt = self.tar_emb(_inp) _xt = torch.cat([ctx, xt], 1) _h, _c = self.decode_lstm(_xt, (_h, _c)) ctx = _h + self.ent_attn(_h, g_ent, mask=ent_mask) if self.args.title: attn = _h + self.title_attn( _h, title_enc, mask=title_mask ) ctx = torch.cat([ctx, attn], 1) _y = torch.cat([_h, ctx], 1) copy_gate = torch.sigmoid(self.copy_fc(_y)) pred_v = torch.log(copy_gate) + torch.log_softmax( self.pred_v_fc(_y), -1 ) pred_c = torch.log((1.0 - copy_gate)) + torch.log_softmax( self.copy_attn( _y.unsqueeze(1), ent_enc, mask=ent_mask ).squeeze(1), -1, ) pred = torch.cat([pred_v, pred_c], -1).view( B, beam_size, -1 ) for ban_item in ["", "", ""]: pred[:, :, self.args.text_vocab(ban_item)] = -1e8 if t == self.args.beam_max_len - 1: # force ending tt = pred[:, :, self.args.text_vocab("")] pred = pred * 0 - 1e8 pred[:, :, self.args.text_vocab("")] = tt cum_score = beam_score.view(B, beam_size, 1) + pred score, word = cum_score.topk( dim=-1, k=beam_size ) # B, beam_size, beam_size score, word = score.view(B, -1), word.view(B, -1) eos_idx = self.args.text_vocab("") if beam_seq.size(2) == 1: new_idx = torch.arange(beam_size).to(word) new_idx = new_idx[None, :].repeat(B, 1) else: _, new_idx = score.topk(dim=-1, k=beam_size) new_src, new_score, new_word, new_done = [], [], [], [] LP = beam_seq.size(2) ** self.args.lp for i in range(B): for j in range(beam_size): tmp_score = score[i][new_idx[i][j]] tmp_word = word[i][new_idx[i][j]] src_idx = new_idx[i][j] // beam_size new_src.append(src_idx) if tmp_word == eos_idx: new_score.append(-1e8) else: new_score.append(tmp_score) new_word.append(tmp_word) if ( tmp_word == eos_idx and done_flag[i][src_idx] == 0 and tmp_score / LP > beam_best[i] ): beam_best[i] = tmp_score / LP beam_best_seq[i] = beam_seq[i][src_idx] if tmp_word == eos_idx: new_done.append(1) else: new_done.append(done_flag[i][src_idx]) new_score = ( torch.Tensor(new_score) .view(B, beam_size) .to(beam_score) ) new_word = ( torch.Tensor(new_word).view(B, beam_size).to(beam_seq) ) new_src = ( torch.LongTensor(new_src).view(B, beam_size).to(device) ) new_done = ( torch.Tensor(new_done).view(B, beam_size).to(done_flag) ) beam_score = new_score done_flag = new_done beam_seq = beam_seq.view(B, beam_size, -1)[ torch.arange(B)[:, None].to(device), new_src ] beam_seq = torch.cat([beam_seq, new_word.unsqueeze(2)], 2) _h = _h.view(B, beam_size, -1)[ torch.arange(B)[:, None].to(device), new_src ].view(BSZ, -1) _c = _c.view(B, beam_size, -1)[ torch.arange(B)[:, None].to(device), new_src ].view(BSZ, -1) ctx = ctx.view(B, beam_size, -1)[ torch.arange(B)[:, None].to(device), new_src ].view(BSZ, -1) return beam_best_seq ================================================ FILE: examples/pytorch/graphwriter/modules.py ================================================ import math import torch import torch.nn.functional as F from torch import nn from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence from utlis import * import dgl.function as fn from dgl.nn.functional import edge_softmax class MSA(nn.Module): # multi-head self-attention, three modes # the first is the copy, determining which entity should be copied. # the second is the normal attention with two sequence inputs # the third is the attention but with one token and a sequence. (gather, attentive pooling) def __init__(self, args, mode="normal"): super(MSA, self).__init__() if mode == "copy": nhead, head_dim = 1, args.nhid qninp, kninp = args.dec_ninp, args.nhid if mode == "normal": nhead, head_dim = args.nhead, args.head_dim qninp, kninp = args.nhid, args.nhid self.attn_drop = nn.Dropout(0.1) self.WQ = nn.Linear( qninp, nhead * head_dim, bias=True if mode == "copy" else False ) if mode != "copy": self.WK = nn.Linear(kninp, nhead * head_dim, bias=False) self.WV = nn.Linear(kninp, nhead * head_dim, bias=False) self.args, self.nhead, self.head_dim, self.mode = ( args, nhead, head_dim, mode, ) def forward(self, inp1, inp2, mask=None): B, L2, H = inp2.shape NH, HD = self.nhead, self.head_dim if self.mode == "copy": q, k, v = self.WQ(inp1), inp2, inp2 else: q, k, v = self.WQ(inp1), self.WK(inp2), self.WV(inp2) L1 = 1 if inp1.ndim == 2 else inp1.shape[1] if self.mode != "copy": q = q / math.sqrt(H) q = q.view(B, L1, NH, HD).permute(0, 2, 1, 3) k = k.view(B, L2, NH, HD).permute(0, 2, 3, 1) v = v.view(B, L2, NH, HD).permute(0, 2, 1, 3) pre_attn = torch.matmul(q, k) if mask is not None: pre_attn = pre_attn.masked_fill(mask[:, None, None, :], -1e8) if self.mode == "copy": return pre_attn.squeeze(1) else: alpha = self.attn_drop(torch.softmax(pre_attn, -1)) attn = ( torch.matmul(alpha, v) .permute(0, 2, 1, 3) .contiguous() .view(B, L1, NH * HD) ) ret = attn if inp1.ndim == 2: return ret.squeeze(1) else: return ret class BiLSTM(nn.Module): # for entity encoding or the title encoding def __init__(self, args, enc_type="title"): super(BiLSTM, self).__init__() self.enc_type = enc_type self.drop = nn.Dropout(args.emb_drop) self.bilstm = nn.LSTM( args.nhid, args.nhid // 2, bidirectional=True, num_layers=args.enc_lstm_layers, batch_first=True, ) def forward(self, inp, mask, ent_len=None): inp = self.drop(inp) lens = (mask == 0).sum(-1).long().tolist() pad_seq = pack_padded_sequence( inp, lens, batch_first=True, enforce_sorted=False ) y, (_h, _c) = self.bilstm(pad_seq) if self.enc_type == "title": y = pad_packed_sequence(y, batch_first=True)[0] return y if self.enc_type == "entity": _h = _h.transpose(0, 1).contiguous() _h = _h[:, -2:].view( _h.size(0), -1 ) # two directions of the top-layer ret = pad(_h.split(ent_len), out_type="tensor") return ret class GAT(nn.Module): # a graph attention network with dot-product attention def __init__( self, in_feats, out_feats, num_heads, ffn_drop=0.0, attn_drop=0.0, trans=True, ): super(GAT, self).__init__() self._num_heads = num_heads self._in_feats = in_feats self._out_feats = out_feats self.q_proj = nn.Linear(in_feats, num_heads * out_feats, bias=False) self.k_proj = nn.Linear(in_feats, num_heads * out_feats, bias=False) self.v_proj = nn.Linear(in_feats, num_heads * out_feats, bias=False) self.attn_drop = nn.Dropout(0.1) self.ln1 = nn.LayerNorm(in_feats) self.ln2 = nn.LayerNorm(in_feats) if trans: self.FFN = nn.Sequential( nn.Linear(in_feats, 4 * in_feats), nn.PReLU(4 * in_feats), nn.Linear(4 * in_feats, in_feats), nn.Dropout(0.1), ) # a strange FFN, see the author's code self._trans = trans def forward(self, graph, feat): graph = graph.local_var() feat_c = feat.clone().detach().requires_grad_(False) q, k, v = self.q_proj(feat), self.k_proj(feat_c), self.v_proj(feat_c) q = q.view(-1, self._num_heads, self._out_feats) k = k.view(-1, self._num_heads, self._out_feats) v = v.view(-1, self._num_heads, self._out_feats) graph.ndata.update( {"ft": v, "el": k, "er": q} ) # k,q instead of q,k, the edge_softmax is applied on incoming edges # compute edge attention graph.apply_edges(fn.u_dot_v("el", "er", "e")) e = graph.edata.pop("e") / math.sqrt(self._out_feats * self._num_heads) graph.edata["a"] = edge_softmax(graph, e) # message passing graph.update_all(fn.u_mul_e("ft", "a", "m"), fn.sum("m", "ft2")) rst = graph.ndata["ft2"] # residual rst = rst.view(feat.shape) + feat if self._trans: rst = self.ln1(rst) rst = self.ln1(rst + self.FFN(rst)) # use the same layer norm, see the author's code return rst class GraphTrans(nn.Module): def __init__(self, args): super().__init__() self.args = args if args.graph_enc == "gat": # we only support gtrans, don't use this one self.gat = nn.ModuleList( [ GAT( args.nhid, args.nhid // 4, 4, attn_drop=args.attn_drop, trans=False, ) for _ in range(args.prop) ] ) # untested else: self.gat = nn.ModuleList( [ GAT( args.nhid, args.nhid // 4, 4, attn_drop=args.attn_drop, ffn_drop=args.drop, trans=True, ) for _ in range(args.prop) ] ) self.prop = args.prop def forward(self, ent, ent_mask, ent_len, rel, rel_mask, graphs): device = ent.device graphs = graphs.to(device) ent_mask = ent_mask == 0 # reverse mask rel_mask = rel_mask == 0 init_h = [] for i in range(graphs.batch_size): init_h.append(ent[i][ent_mask[i]]) init_h.append(rel[i][rel_mask[i]]) init_h = torch.cat(init_h, 0) feats = init_h for i in range(self.prop): feats = self.gat[i](graphs, feats) g_root = feats.index_select( 0, graphs.filter_nodes( lambda x: x.data["type"] == NODE_TYPE["root"] ).to(device), ) g_ent = pad( feats.index_select( 0, graphs.filter_nodes( lambda x: x.data["type"] == NODE_TYPE["entity"] ).to(device), ).split(ent_len), out_type="tensor", ) return g_ent, g_root ================================================ FILE: examples/pytorch/graphwriter/opts.py ================================================ import argparse import torch def fill_config(args): # dirty work args.device = torch.device(args.gpu) args.dec_ninp = args.nhid * 3 if args.title else args.nhid * 2 args.fnames = [args.train_file, args.valid_file, args.test_file] return args def vocab_config( args, ent_vocab, rel_vocab, text_vocab, ent_text_vocab, title_vocab ): # dirty work args.ent_vocab = ent_vocab args.rel_vocab = rel_vocab args.text_vocab = text_vocab args.ent_text_vocab = ent_text_vocab args.title_vocab = title_vocab return args def get_args(): args = argparse.ArgumentParser(description="Graph Writer in DGL") args.add_argument("--nhid", default=500, type=int, help="hidden size") args.add_argument("--nhead", default=4, type=int, help="number of heads") args.add_argument("--head_dim", default=125, type=int, help="head dim") args.add_argument( "--weight_decay", default=0.0, type=float, help="weight decay" ) args.add_argument( "--prop", default=6, type=int, help="number of layers of gnn" ) args.add_argument("--title", action="store_true", help="use title input") args.add_argument("--test", action="store_true", help="inference mode") args.add_argument("--batch_size", default=32, type=int, help="batch_size") args.add_argument( "--beam_size", default=4, type=int, help="beam size, 1 for greedy" ) args.add_argument("--epoch", default=20, type=int, help="training epoch") args.add_argument( "--beam_max_len", default=200, type=int, help="max length of the generated text", ) args.add_argument( "--enc_lstm_layers", default=2, type=int, help="number of layers of lstm", ) args.add_argument("--lr", default=1e-1, type=float, help="learning rate") # args.add_argument('--lr_decay', default=1e-8, type=float, help='') args.add_argument("--clip", default=1, type=float, help="gradient clip") args.add_argument( "--emb_drop", default=0.0, type=float, help="embedding dropout" ) args.add_argument( "--attn_drop", default=0.1, type=float, help="attention dropout" ) args.add_argument("--drop", default=0.1, type=float, help="dropout") args.add_argument("--lp", default=1.0, type=float, help="length penalty") args.add_argument( "--graph_enc", default="gtrans", type=str, help="gnn mode, we only support the graph transformer now", ) args.add_argument( "--train_file", default="data/unprocessed.train.json", type=str, help="training file", ) args.add_argument( "--valid_file", default="data/unprocessed.val.json", type=str, help="validation file", ) args.add_argument( "--test_file", default="data/unprocessed.test.json", type=str, help="test file", ) args.add_argument( "--save_dataset", default="data.pickle", type=str, help="save path of dataset", ) args.add_argument( "--save_model", default="saved_model.pt", type=str, help="save path of model", ) args.add_argument("--gpu", default=0, type=int, help="gpu mode") args = args.parse_args() args = fill_config(args) return args ================================================ FILE: examples/pytorch/graphwriter/prepare_data.sh ================================================ wget https://data.dgl.ai/dataset/AGENDA.tar.gz mkdir data tar -C data/ -xvzf AGENDA.tar.gz ================================================ FILE: examples/pytorch/graphwriter/run.sh ================================================ nohup env CUDA_VISIBLE_DEVICES=0 python -u train.py --prop 6 --save_model tmp_model.pt --title > train_1.log 2>&1 & #nohup env CUDA_VISIBLE_DEVICES=2 python -u train.py --prop 6 --save_model tmp_model1.pt --title > train_2.log 2>&1 & #nohup env CUDA_VISIBLE_DEVICES=3 python -u train.py --prop 6 --save_model tmp_model2.pt --title > train_3.log 2>&1 & #nohup env CUDA_VISIBLE_DEVICES=4 python -u train.py --prop 6 --save_model tmp_model3.pt --title > train_4.log 2>&1 & #nohup env CUDA_VISIBLE_DEVICES=5 python -u train.py --prop 2 --save_model tmp_model4.pt --title > train_5.log 2>&1 & #nohup env CUDA_VISIBLE_DEVICES=6 python -u train.py --prop 2 --save_model tmp_model5.pt --title > train_6.log 2>&1 & ================================================ FILE: examples/pytorch/graphwriter/test.sh ================================================ env CUDA_VISIBLE_DEVICES=0 python -u train.py --save_model tmp_model.ptbest --test --title --lp 1.0 --beam_size 1 if [ ! detokenizer.perl ]; then wget https://raw.githubusercontent.com/moses-smt/mosesdecoder/8c5eaa1a122236bbf927bde4ec610906fea599e6/scripts/tokenizer/detokenizer.perl fi if [ ! multi-bleu.perl ]; then wget https://raw.githubusercontent.com/moses-smt/mosesdecoder/8c5eaa1a122236bbf927bde4ec610906fea599e6/scripts/generic/multi-bleu.perl fi perl detokenizer.perl -l en < tmp_gold.txt > tmp_gold.txt.a perl detokenizer.perl -l en < tmp_pred.txt > tmp_pred.txt.a perl multi-bleu.perl tmp_gold.txt < tmp_pred.txt perl multi-bleu-detok.perl tmp_gold.txt.a < tmp_pred.txt.a ================================================ FILE: examples/pytorch/graphwriter/train.py ================================================ import os import sys import time import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from graphwriter import * from opts import * from tqdm import tqdm from utlis import * sys.path.append("./pycocoevalcap") from pycocoevalcap.bleu.bleu import Bleu from pycocoevalcap.meteor.meteor import Meteor from pycocoevalcap.rouge.rouge import Rouge def train_one_epoch(model, dataloader, optimizer, args, epoch): model.train() tloss = 0.0 tcnt = 0.0 st_time = time.time() with tqdm(dataloader, desc="Train Ep " + str(epoch), mininterval=60) as tq: for batch in tq: pred = model(batch) nll_loss = F.nll_loss( pred.view(-1, pred.shape[-1]), batch["tgt_text"].view(-1), ignore_index=0, ) loss = nll_loss optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() loss = loss.item() if loss != loss: raise ValueError("NaN appear") tloss += loss * len(batch["tgt_text"]) tcnt += len(batch["tgt_text"]) tq.set_postfix({"loss": tloss / tcnt}, refresh=False) print( "Train Ep ", str(epoch), "AVG Loss ", tloss / tcnt, "Steps ", tcnt, "Time ", time.time() - st_time, "GPU", torch.cuda.max_memory_cached() / 1024.0 / 1024.0 / 1024.0, ) torch.save(model, args.save_model + str(epoch % 100)) val_loss = 2**31 def eval_it(model, dataloader, args, epoch): global val_loss model.eval() tloss = 0.0 tcnt = 0.0 st_time = time.time() with tqdm(dataloader, desc="Eval Ep " + str(epoch), mininterval=60) as tq: for batch in tq: with torch.no_grad(): pred = model(batch) nll_loss = F.nll_loss( pred.view(-1, pred.shape[-1]), batch["tgt_text"].view(-1), ignore_index=0, ) loss = nll_loss loss = loss.item() tloss += loss * len(batch["tgt_text"]) tcnt += len(batch["tgt_text"]) tq.set_postfix({"loss": tloss / tcnt}, refresh=False) print( "Eval Ep ", str(epoch), "AVG Loss ", tloss / tcnt, "Steps ", tcnt, "Time ", time.time() - st_time, ) if tloss / tcnt < val_loss: print("Saving best model ", "Ep ", epoch, " loss ", tloss / tcnt) torch.save(model, args.save_model + "best") val_loss = tloss / tcnt def test(model, dataloader, args): scorer = Bleu(4) m_scorer = Meteor() r_scorer = Rouge() hyp = [] ref = [] model.eval() gold_file = open("tmp_gold.txt", "w") pred_file = open("tmp_pred.txt", "w") with tqdm(dataloader, desc="Test ", mininterval=1) as tq: for batch in tq: with torch.no_grad(): seq = model(batch, beam_size=args.beam_size) r = write_txt(batch, batch["tgt_text"], gold_file, args) h = write_txt(batch, seq, pred_file, args) hyp.extend(h) ref.extend(r) hyp = dict(zip(range(len(hyp)), hyp)) ref = dict(zip(range(len(ref)), ref)) print(hyp[0], ref[0]) print("BLEU INP", len(hyp), len(ref)) print("BLEU", scorer.compute_score(ref, hyp)[0]) print("METEOR", m_scorer.compute_score(ref, hyp)[0]) print("ROUGE_L", r_scorer.compute_score(ref, hyp)[0]) gold_file.close() pred_file.close() def main(args): if os.path.exists(args.save_dataset): train_dataset, valid_dataset, test_dataset = pickle.load( open(args.save_dataset, "rb") ) else: train_dataset, valid_dataset, test_dataset = get_datasets( args.fnames, device=args.device, save=args.save_dataset ) args = vocab_config( args, train_dataset.ent_vocab, train_dataset.rel_vocab, train_dataset.text_vocab, train_dataset.ent_text_vocab, train_dataset.title_vocab, ) train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_sampler=BucketSampler(train_dataset, batch_size=args.batch_size), collate_fn=train_dataset.batch_fn, ) valid_dataloader = torch.utils.data.DataLoader( valid_dataset, batch_size=args.batch_size, shuffle=False, collate_fn=train_dataset.batch_fn, ) test_dataloader = torch.utils.data.DataLoader( test_dataset, batch_size=args.batch_size, shuffle=False, collate_fn=train_dataset.batch_fn, ) model = GraphWriter(args) model.to(args.device) if args.test: model = torch.load(args.save_model, weights_only=False) model.args = args print(model) test(model, test_dataloader, args) else: optimizer = torch.optim.SGD( model.parameters(), lr=args.lr, weight_decay=args.weight_decay, momentum=0.9, ) print(model) for epoch in range(args.epoch): train_one_epoch(model, train_dataloader, optimizer, args, epoch) eval_it(model, valid_dataloader, args, epoch) if __name__ == "__main__": args = get_args() main(args) ================================================ FILE: examples/pytorch/graphwriter/utlis.py ================================================ import json import pickle import random import dgl import numpy as np import torch NODE_TYPE = {"entity": 0, "root": 1, "relation": 2} def write_txt(batch, seqs, w_file, args): # converting the prediction to real text. ret = [] for b, seq in enumerate(seqs): txt = [] for token in seq: # copy the entity if token >= len(args.text_vocab): ent_text = batch["raw_ent_text"][b][ token - len(args.text_vocab) ] ent_text = filter(lambda x: x != "", ent_text) txt.extend(ent_text) else: if int(token) not in [ args.text_vocab(x) for x in ["", "", ""] ]: txt.append(args.text_vocab(int(token))) if int(token) == args.text_vocab(""): break w_file.write(" ".join([str(x) for x in txt]) + "\n") ret.append([" ".join([str(x) for x in txt])]) return ret def replace_ent(x, ent, V): # replace the entity mask = x >= V if mask.sum() == 0: return x nz = mask.nonzero() fill_ent = ent[nz, x[mask] - V] x = x.masked_scatter(mask, fill_ent) return x def len2mask(lens, device): max_len = max(lens) mask = ( torch.arange(max_len, device=device) .unsqueeze(0) .expand(len(lens), max_len) ) mask = mask >= torch.LongTensor(lens).to(mask).unsqueeze(1) return mask def pad(var_len_list, out_type="list", flatten=False): if flatten: lens = [len(x) for x in var_len_list] var_len_list = sum(var_len_list, []) max_len = max([len(x) for x in var_len_list]) if out_type == "list": if flatten: return [ x + [""] * (max_len - len(x)) for x in var_len_list ], lens else: return [x + [""] * (max_len - len(x)) for x in var_len_list] if out_type == "tensor": if flatten: return ( torch.stack( [ torch.cat( [ x, torch.zeros( [max_len - len(x)] + list(x.shape[1:]) ).type_as(x), ], 0, ) for x in var_len_list ], 0, ), lens, ) else: return torch.stack( [ torch.cat( [ x, torch.zeros( [max_len - len(x)] + list(x.shape[1:]) ).type_as(x), ], 0, ) for x in var_len_list ], 0, ) class Vocab(object): def __init__( self, max_vocab=2**31, min_freq=-1, sp=["", "", "", ""], ): self.i2s = [] self.s2i = {} self.wf = {} self.max_vocab, self.min_freq, self.sp = max_vocab, min_freq, sp def __len__(self): return len(self.i2s) def __str__(self): return "Total " + str(len(self.i2s)) + str(self.i2s[:10]) def update(self, token): if isinstance(token, list): for t in token: self.update(t) else: self.wf[token] = self.wf.get(token, 0) + 1 def build(self): self.i2s.extend(self.sp) sort_kv = sorted(self.wf.items(), key=lambda x: x[1], reverse=True) for k, v in sort_kv: if ( len(self.i2s) < self.max_vocab and v >= self.min_freq and k not in self.sp ): self.i2s.append(k) self.s2i.update(list(zip(self.i2s, range(len(self.i2s))))) def __call__(self, x): if isinstance(x, int): return self.i2s[x] else: return self.s2i.get(x, self.s2i[""]) def save(self, fname): pass def load(self, fname): pass def at_least(x): # handling the illegal data if len(x) == 0: return [""] else: return x class Example(object): def __init__(self, title, ent_text, ent_type, rel, text): # one object corresponds to a data sample self.raw_title = title.split() self.raw_ent_text = [at_least(x.split()) for x in ent_text] assert min([len(x) for x in self.raw_ent_text]) > 0, str( self.raw_ent_text ) self.raw_ent_type = ent_type.split() # .. <> self.raw_rel = [] for r in rel: rel_list = r.split() for i in range(len(rel_list)): if ( i > 0 and i < len(rel_list) - 1 and rel_list[i - 1] == "--" and rel_list[i] != rel_list[i].lower() and rel_list[i + 1] == "--" ): self.raw_rel.append( [ rel_list[: i - 1], rel_list[i - 1] + rel_list[i] + rel_list[i + 1], rel_list[i + 2 :], ] ) break self.raw_text = text.split() self.graph = self.build_graph() def __str__(self): return "\n".join( [str(k) + ":\t" + str(v) for k, v in self.__dict__.items()] ) def __len__(self): return len(self.raw_text) @staticmethod def from_json(json_data): return Example( json_data["title"], json_data["entities"], json_data["types"], json_data["relations"], json_data["abstract"], ) def build_graph(self): graph = dgl.DGLGraph() ent_len = len(self.raw_ent_text) rel_len = len( self.raw_rel ) # treat the repeated relation as different nodes, refer to the author's code graph.add_nodes( ent_len, {"type": torch.ones(ent_len) * NODE_TYPE["entity"]} ) graph.add_nodes(1, {"type": torch.ones(1) * NODE_TYPE["root"]}) graph.add_nodes( rel_len * 2, {"type": torch.ones(rel_len * 2) * NODE_TYPE["relation"]}, ) graph.add_edges(ent_len, torch.arange(ent_len)) graph.add_edges(torch.arange(ent_len), ent_len) graph.add_edges( torch.arange(ent_len + 1 + rel_len * 2), torch.arange(ent_len + 1 + rel_len * 2), ) adj_edges = [] for i, r in enumerate(self.raw_rel): assert len(r) == 3, str(r) st, rt, ed = r st_ent, ed_ent = self.raw_ent_text.index( st ), self.raw_ent_text.index(ed) # according to the edge_softmax operator, we need to reverse the graph adj_edges.append([ent_len + 1 + 2 * i, st_ent]) adj_edges.append([ed_ent, ent_len + 1 + 2 * i]) adj_edges.append([ent_len + 1 + 2 * i + 1, ed_ent]) adj_edges.append([st_ent, ent_len + 1 + 2 * i + 1]) if len(adj_edges) > 0: graph.add_edges(*list(map(list, zip(*adj_edges)))) return graph def get_tensor( self, ent_vocab, rel_vocab, text_vocab, ent_text_vocab, title_vocab ): if hasattr(self, "_cached_tensor"): return self._cached_tensor else: title_data = [""] + self.raw_title + [""] title = [title_vocab(x) for x in title_data] ent_text = [ [ent_text_vocab(y) for y in x] for x in self.raw_ent_text ] ent_type = [ text_vocab(x) for x in self.raw_ent_type ] # for inference rel_data = ["--root--"] + sum( [[x[1], x[1] + "_INV"] for x in self.raw_rel], [] ) rel = [rel_vocab(x) for x in rel_data] text_data = [""] + self.raw_text + [""] text = [text_vocab(x) for x in text_data] tgt_text = [] # the input text and decoding target are different since the consideration of the copy mechanism. for i, str1 in enumerate(text_data): if str1[0] == "<" and str1[-1] == ">" and "_" in str1: a, b = str1[1:-1].split("_") text[i] = text_vocab("<" + a + ">") tgt_text.append(len(text_vocab) + int(b)) else: tgt_text.append(text[i]) self._cached_tensor = { "title": torch.LongTensor(title), "ent_text": [torch.LongTensor(x) for x in ent_text], "ent_type": torch.LongTensor(ent_type), "rel": torch.LongTensor(rel), "text": torch.LongTensor(text[:-1]), "tgt_text": torch.LongTensor(tgt_text[1:]), "graph": self.graph, "raw_ent_text": self.raw_ent_text, } return self._cached_tensor def update_vocab( self, ent_vocab, rel_vocab, text_vocab, ent_text_vocab, title_vocab ): ent_vocab.update(self.raw_ent_type) ent_text_vocab.update(self.raw_ent_text) title_vocab.update(self.raw_title) rel_vocab.update( ["--root--"] + [x[1] for x in self.raw_rel] + [x[1] + "_INV" for x in self.raw_rel] ) text_vocab.update(self.raw_ent_type) text_vocab.update(self.raw_text) class BucketSampler(torch.utils.data.Sampler): def __init__(self, data_source, batch_size=32, bucket=3): self.data_source = data_source self.bucket = bucket self.batch_size = batch_size def __iter__(self): # the magic number comes from the author's code perm = torch.randperm(len(self.data_source)) lens = torch.Tensor([len(x) for x in self.data_source]) lens = lens[perm] t1 = [] t2 = [] t3 = [] for i, l in enumerate(lens): if l < 100: t1.append(perm[i]) elif l > 100 and l < 220: t2.append(perm[i]) else: t3.append(perm[i]) datas = [t1, t2, t3] random.shuffle(datas) idxs = sum(datas, []) batch = [] lens = torch.Tensor([len(x) for x in self.data_source]) for idx in idxs: batch.append(idx) mlen = max([0] + [lens[x] for x in batch]) if ( (mlen < 100 and len(batch) == 32) or (mlen > 100 and mlen < 220 and len(batch) >= 24) or (mlen > 220 and len(batch) >= 8) or len(batch) == 32 ): yield batch batch = [] if len(batch) > 0: yield batch def __len__(self): return (len(self.data_source) + self.batch_size - 1) // self.batch_size class GWdataset(torch.utils.data.Dataset): def __init__( self, exs, ent_vocab=None, rel_vocab=None, text_vocab=None, ent_text_vocab=None, title_vocab=None, device=None, ): super(GWdataset, self).__init__() self.exs = exs ( self.ent_vocab, self.rel_vocab, self.text_vocab, self.ent_text_vocab, self.title_vocab, self.device, ) = ( ent_vocab, rel_vocab, text_vocab, ent_text_vocab, title_vocab, device, ) def __iter__(self): return iter(self.exs) def __getitem__(self, index): return self.exs[index] def __len__(self): return len(self.exs) def batch_fn(self, batch_ex): ( batch_title, batch_ent_text, batch_ent_type, batch_rel, batch_text, batch_tgt_text, batch_graph, ) = ([], [], [], [], [], [], []) batch_raw_ent_text = [] for ex in batch_ex: ex_data = ex.get_tensor( self.ent_vocab, self.rel_vocab, self.text_vocab, self.ent_text_vocab, self.title_vocab, ) batch_title.append(ex_data["title"]) batch_ent_text.append(ex_data["ent_text"]) batch_ent_type.append(ex_data["ent_type"]) batch_rel.append(ex_data["rel"]) batch_text.append(ex_data["text"]) batch_tgt_text.append(ex_data["tgt_text"]) batch_graph.append(ex_data["graph"]) batch_raw_ent_text.append(ex_data["raw_ent_text"]) batch_title = pad(batch_title, out_type="tensor") batch_ent_text, ent_len = pad( batch_ent_text, out_type="tensor", flatten=True ) batch_ent_type = pad(batch_ent_type, out_type="tensor") batch_rel = pad(batch_rel, out_type="tensor") batch_text = pad(batch_text, out_type="tensor") batch_tgt_text = pad(batch_tgt_text, out_type="tensor") batch_graph = dgl.batch(batch_graph) batch_graph.to(self.device) return { "title": batch_title.to(self.device), "ent_text": batch_ent_text.to(self.device), "ent_len": ent_len, "ent_type": batch_ent_type.to(self.device), "rel": batch_rel.to(self.device), "text": batch_text.to(self.device), "tgt_text": batch_tgt_text.to(self.device), "graph": batch_graph, "raw_ent_text": batch_raw_ent_text, } def get_datasets( fnames, min_freq=-1, sep=";", joint_vocab=True, device=None, save="tmp.pickle", ): # min_freq : not support now since it's very sensitive to the final results, but you can set it via passing min_freq to the Vocab class. # sep : not support now # joint_vocab : not support now ent_vocab = Vocab(sp=["", ""]) title_vocab = Vocab(min_freq=5) rel_vocab = Vocab(sp=["", ""]) text_vocab = Vocab(min_freq=5) ent_text_vocab = Vocab(sp=["", ""]) datasets = [] for fname in fnames: exs = [] json_datas = json.loads(open(fname).read()) for json_data in json_datas: # construct one data example ex = Example.from_json(json_data) if fname == fnames[0]: # only training set ex.update_vocab( ent_vocab, rel_vocab, text_vocab, ent_text_vocab, title_vocab, ) exs.append(ex) datasets.append(exs) ent_vocab.build() rel_vocab.build() text_vocab.build() ent_text_vocab.build() title_vocab.build() datasets = [ GWdataset( exs, ent_vocab, rel_vocab, text_vocab, ent_text_vocab, title_vocab, device, ) for exs in datasets ] with open(save, "wb") as f: pickle.dump(datasets, f) return datasets if __name__ == "__main__": ds = get_datasets( [ "data/unprocessed.val.json", "data/unprocessed.val.json", "data/unprocessed.test.json", ] ) print(ds[0].exs[0]) print( ds[0] .exs[0] .get_tensor( ds[0].ent_vocab, ds[0].rel_vocab, ds[0].text_vocab, ds[0].ent_text_vocab, ds[0].title_vocab, ) ) ================================================ FILE: examples/pytorch/gxn/README.md ================================================ # DGL Implementation of Graph Cross Networks with Vertex Infomax Pooling (NeurIPS 2020) This DGL example implements the GNN model proposed in the paper [Graph Cross Networks with Vertex Infomax Pooling](https://arxiv.org/pdf/2010.01804.pdf). The author's codes of implementation is in [here](https://github.com/limaosen0/GXN) The graph dataset used in this example --------------------------------------- The DGL's built-in LegacyTUDataset. This is a serial of graph kernel datasets for graph classification. We use 'DD', 'PROTEINS', 'ENZYMES', 'IMDB-BINARY', 'IMDB-MULTI' and 'COLLAB' in this GXN implementation. All these datasets are randomly splited to train and test set with ratio 0.9 and 0.1 (which is similar to the setting in the author's implementation). NOTE: Follow the setting of the author's implementation, for 'DD' and 'PROTEINS', we use one-hot node label as input node features. For ENZYMES', 'IMDB-BINARY', 'IMDB-MULTI' and 'COLLAB', we use the concatenation of one-hot node label (if available) and one-hot node degree as input node features. | | DD | PROTEINS | ENZYMES | IMDB-BINARY | IMDB-MULTI | COLLAB | | ---------------- | ------ | -------- | ------- | ------------ | ---------- | -------- | | NumGraphs | 1178 | 1113 | 600 | 1000 | 1500 | 5000 | | AvgNodesPerGraph | 284.32 | 39.06 | 32.63 | 19.77 | 13.00 | 74.49 | | AvgEdgesPerGraph | 715.66 | 72.82 | 62.14 | 96.53 | 65.94 | 2457.78 | | NumFeats | 89 | 1 | 18 | - | - | - | | NumClasses | 2 | 2 | 6 | 2 | 3 | 2 | How to run example files -------------------------------- If you want to reproduce the author's result, at the root directory of this example (gxn), run ```bash bash scripts/run_gxn.sh ${dataset_name} ${device_id} ${num_trials} ${print_trainlog_every} ``` If you want to perform a early-stop version experiment, at the root directory of this example, run ```bash bash scripts/run_gxn_early_stop.sh ${dataset_name} ${device_id} ${num_trials} ${print_trainlog_every} ``` where - dataset_name: Dataset name used in this experiment. Could be DD', 'PROTEINS', 'ENZYMES', 'IMDB-BINARY', 'IMDB-MULTI' and 'COLLAB'. - device_id: ID of computation device. -1 for pure CPU computation. For example if you only have single GPU, set this value to be 0. - num_trials: How many times does the experiment conducted. - print_training_log_every: Print training log every ? epochs. -1 for silent training. NOTE: If your have problem when using 'IMDB-BINARY', 'IMDB-MULTI' and 'COLLAB', it could be caused by a bug in `LegacyTUDataset`/`TUDataset` in DGL (see [here](https://github.com/dmlc/dgl/pull/2543)). If your DGL version is less than or equal to 0.5.3 and you encounter problems like "undefined variable" (`LegacyTUDataset`) or "the argument `force_reload=False` does not work" (`TUDataset`), try: - use `TUDataset` with `force_reload=True` - delete dataset files - change `degree_as_feature(dataset)` and `node_label_as_feature(dataset, mode=mode)` to `degree_as_feature(dataset, save=False)` and `node_label_as_feature(dataset, mode=mode, save=False)` in `main.py`. Performance ------------------------- **Accuracy** **NOTE**: Different from our implementation, the author uses fixed dataset split. Thus there may be difference between our result and the author's result. **To compare our implementation with the author's, we follow the setting in the author's implementation that performs model-selection on testset**. We also try early-stop with patience equals to 1/5 of the total number of epochs for some datasets. The result of `Author's Code` in the table below are obtained using first-ford data as the test dataset. | | DD | PROTEINS | ENZYMES | IMDB-BINARY | IMDB-MULTI | COLLAB | | ------------------| ------------ | ----------- | ----------- | ----------- | ---------- | ---------- | | Reported in Paper | 82.68(4.1 ) | 79.91(4.1) | 57.50(6.1) | 78.60(2.3) | 55.20(2.5) | 78.82(1.4) | | Author's Code | 82.05 | 72.07 | 58.33 | 77.00 | 56.00 | 80.40 | | DGL | 82.97(3.0) | 78.21(2.0) | 57.50(5.5) | 78.70(4.0) | 52.26(2.0) | 80.58(2.4) | | DGL(early-stop) | 78.66(4.3) | 73.12(3.1) | 39.83(7.4) | 68.60(6.7) | 45.40(9.4) | 76.18(1.9) | **Speed** Device: - CPU: Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz - GPU: Tesla V100-SXM2 16GB In seconds | | DD | PROTEINS | ENZYMES | IMDB-BINARY | IMDB-MULTI | COLLAB(batch_size=64) | COLLAB(batch_size=20) | | ------------- | ----- | -------- | ------- | ----------- | ---------- | --------------------- | --------------------- | | Author's Code | 25.32 | 2.93 | 1.53 | 2.42 | 3.58 | 96.69 | 19.78 | | DGL | 2.64 | 1.86 | 1.03 | 1.79 | 2.45 | 23.52 | 32.29 | ================================================ FILE: examples/pytorch/gxn/data_preprocess.py ================================================ import json import logging import os import sys import numpy as np import torch from dgl.data import LegacyTUDataset def _load_check_mark(path: str): if os.path.exists(path): with open(path, "r") as f: return json.load(f) else: return {} def _save_check_mark(path: str, marks: dict): with open(path, "w") as f: json.dump(marks, f) def node_label_as_feature(dataset: LegacyTUDataset, mode="concat", save=True): """ Description ----------- Add node labels to graph node features dict Parameters ---------- dataset : LegacyTUDataset The dataset object concat : str, optional How to add node label to the graph. Valid options are "add", "replace" and "concat". - "add": Directly add node_label to graph node feature dict. - "concat": Concatenate "feat" and "node_label" - "replace": Use "node_label" as "feat" Default: :obj:`"concat"` save : bool, optional Save the result dataset. Default: :obj:`True` """ # check if node label is not available if ( not os.path.exists(dataset._file_path("node_labels")) or len(dataset) == 0 ): logging.warning("No Node Label Data") return dataset # check if has cached value check_mark_name = "node_label_as_feature" check_mark_path = os.path.join( dataset.save_path, "info_{}_{}.json".format(dataset.name, dataset.hash) ) check_mark = _load_check_mark(check_mark_path) if ( check_mark_name in check_mark and check_mark[check_mark_name] and not dataset._force_reload ): logging.warning("Using cached value in node_label_as_feature") return dataset logging.warning( "Adding node labels into node features..., mode={}".format(mode) ) # check if graph has "feat" if "feat" not in dataset[0][0].ndata: logging.warning("Dataset has no node feature 'feat'") if mode.lower() == "concat": mode = "replace" # first read node labels DS_node_labels = dataset._idx_from_zero( np.loadtxt(dataset._file_path("node_labels"), dtype=int) ) one_hot_node_labels = dataset._to_onehot(DS_node_labels) # read graph idx DS_indicator = dataset._idx_from_zero( np.genfromtxt(dataset._file_path("graph_indicator"), dtype=int) ) node_idx_list = [] for idx in range(np.max(DS_indicator) + 1): node_idx = np.where(DS_indicator == idx) node_idx_list.append(node_idx[0]) # add to node feature dict for idx, g in zip(node_idx_list, dataset.graph_lists): node_labels_tensor = torch.tensor(one_hot_node_labels[idx, :]) if mode.lower() == "concat": g.ndata["feat"] = torch.cat( (g.ndata["feat"], node_labels_tensor), dim=1 ) elif mode.lower() == "add": g.ndata["node_label"] = node_labels_tensor else: # replace g.ndata["feat"] = node_labels_tensor if save: check_mark[check_mark_name] = True _save_check_mark(check_mark_path, check_mark) dataset.save() return dataset def degree_as_feature(dataset: LegacyTUDataset, save=True): """ Description ----------- Use node degree (in one-hot format) as node feature Parameters ---------- dataset : LegacyTUDataset The dataset object save : bool, optional Save the result dataset. Default: :obj:`True` """ # first check if already have such feature check_mark_name = "degree_as_feat" feat_name = "feat" check_mark_path = os.path.join( dataset.save_path, "info_{}_{}.json".format(dataset.name, dataset.hash) ) check_mark = _load_check_mark(check_mark_path) if ( check_mark_name in check_mark and check_mark[check_mark_name] and not dataset._force_reload ): logging.warning("Using cached value in 'degree_as_feature'") return dataset logging.warning("Adding node degree into node features...") min_degree = sys.maxsize max_degree = 0 for i in range(len(dataset)): degrees = dataset.graph_lists[i].in_degrees() min_degree = min(min_degree, degrees.min().item()) max_degree = max(max_degree, degrees.max().item()) vec_len = max_degree - min_degree + 1 for i in range(len(dataset)): num_nodes = dataset.graph_lists[i].num_nodes() node_feat = torch.zeros((num_nodes, vec_len)) degrees = dataset.graph_lists[i].in_degrees() node_feat[torch.arange(num_nodes), degrees - min_degree] = 1.0 dataset.graph_lists[i].ndata[feat_name] = node_feat if save: check_mark[check_mark_name] = True dataset.save() _save_check_mark(check_mark_path, check_mark) return dataset ================================================ FILE: examples/pytorch/gxn/layers.py ================================================ from typing import Optional import dgl import torch import torch.nn from dgl import DGLGraph from dgl.nn import GraphConv from torch import Tensor class GraphConvWithDropout(GraphConv): """ A GraphConv followed by a Dropout. """ def __init__( self, in_feats, out_feats, dropout=0.3, norm="both", weight=True, bias=True, activation=None, allow_zero_in_degree=False, ): super(GraphConvWithDropout, self).__init__( in_feats, out_feats, norm, weight, bias, activation, allow_zero_in_degree, ) self.dropout = torch.nn.Dropout(p=dropout) def call(self, graph, feat, weight=None): feat = self.dropout(feat) return super(GraphConvWithDropout, self).call(graph, feat, weight) class Discriminator(torch.nn.Module): """ Description ----------- A discriminator used to let the network to discrimate between positive (neighborhood of center node) and negative (any neighborhood in graph) samplings. Parameters ---------- feat_dim : int The number of channels of node features. """ def __init__(self, feat_dim: int): super(Discriminator, self).__init__() self.affine = torch.nn.Bilinear(feat_dim, feat_dim, 1) self.reset_parameters() def reset_parameters(self): torch.nn.init.xavier_uniform_(self.affine.weight) torch.nn.init.zeros_(self.affine.bias) def forward( self, h_x: Tensor, h_pos: Tensor, h_neg: Tensor, bias_pos: Optional[Tensor] = None, bias_neg: Optional[Tensor] = None, ): """ Parameters ---------- h_x : torch.Tensor Node features, shape: :obj:`(num_nodes, feat_dim)` h_pos : torch.Tensor The node features of positive samples It has the same shape as :obj:`h_x` h_neg : torch.Tensor The node features of negative samples It has the same shape as :obj:`h_x` bias_pos : torch.Tensor Bias parameter vector for positive scores shape: :obj:`(num_nodes)` bias_neg : torch.Tensor Bias parameter vector for negative scores shape: :obj:`(num_nodes)` Returns ------- (torch.Tensor, torch.Tensor) The output scores with shape (2 * num_nodes,), (num_nodes,) """ score_pos = self.affine(h_pos, h_x).squeeze() score_neg = self.affine(h_neg, h_x).squeeze() if bias_pos is not None: score_pos = score_pos + bias_pos if bias_neg is not None: score_neg = score_neg + bias_neg logits = torch.cat((score_pos, score_neg), 0) return logits, score_pos class DenseLayer(torch.nn.Module): """ Description ----------- Dense layer with a linear layer and an activation function """ def __init__( self, in_dim: int, out_dim: int, act: str = "prelu", bias=True ): super(DenseLayer, self).__init__() self.lin = torch.nn.Linear(in_dim, out_dim, bias=bias) self.act_type = act.lower() self.reset_parameters() def reset_parameters(self): torch.nn.init.xavier_uniform_(self.lin.weight) if self.lin.bias is not None: torch.nn.init.zeros_(self.lin.bias) if self.act_type == "prelu": self.act = torch.nn.PReLU() else: self.act = torch.relu def forward(self, x): x = self.lin(x) return self.act(x) class IndexSelect(torch.nn.Module): """ Description ----------- The index selection layer used by VIPool Parameters ---------- pool_ratio : float The pooling ratio (for keeping nodes). For example, if `pool_ratio=0.8`, 80\% nodes will be preserved. hidden_dim : int The number of channels in node features. act : str, optional The activation function type. Default: :obj:`'prelu'` dist : int, optional DO NOT USE THIS PARAMETER """ def __init__( self, pool_ratio: float, hidden_dim: int, act: str = "prelu", dist: int = 1, ): super(IndexSelect, self).__init__() self.pool_ratio = pool_ratio self.dist = dist self.dense = DenseLayer(hidden_dim, hidden_dim, act) self.discriminator = Discriminator(hidden_dim) self.gcn = GraphConvWithDropout(hidden_dim, hidden_dim) def forward( self, graph: DGLGraph, h_pos: Tensor, h_neg: Tensor, bias_pos: Optional[Tensor] = None, bias_neg: Optional[Tensor] = None, ): """ Description ----------- Perform index selection Parameters ---------- graph : dgl.DGLGraph Input graph. h_pos : torch.Tensor The node features of positive samples It has the same shape as :obj:`h_x` h_neg : torch.Tensor The node features of negative samples It has the same shape as :obj:`h_x` bias_pos : torch.Tensor Bias parameter vector for positive scores shape: :obj:`(num_nodes)` bias_neg : torch.Tensor Bias parameter vector for negative scores shape: :obj:`(num_nodes)` """ # compute scores h_pos = self.dense(h_pos) h_neg = self.dense(h_neg) embed = self.gcn(graph, h_pos) h_center = torch.sigmoid(embed) logit, logit_pos = self.discriminator( h_center, h_pos, h_neg, bias_pos, bias_neg ) scores = torch.sigmoid(logit_pos) # sort scores scores, idx = torch.sort(scores, descending=True) # select top-k num_nodes = graph.num_nodes() num_select_nodes = int(self.pool_ratio * num_nodes) size_list = [num_select_nodes, num_nodes - num_select_nodes] select_scores, _ = torch.split(scores, size_list, dim=0) select_idx, non_select_idx = torch.split(idx, size_list, dim=0) return logit, select_scores, select_idx, non_select_idx, embed class GraphPool(torch.nn.Module): """ Description ----------- The pooling module for graph Parameters ---------- hidden_dim : int The number of channels of node features. use_gcn : bool, optional Whether use gcn in down sampling process. default: :obj:`False` """ def __init__(self, hidden_dim: int, use_gcn=False): super(GraphPool, self).__init__() self.use_gcn = use_gcn self.down_sample_gcn = ( GraphConvWithDropout(hidden_dim, hidden_dim) if use_gcn else None ) def forward( self, graph: DGLGraph, feat: Tensor, select_idx: Tensor, non_select_idx: Optional[Tensor] = None, scores: Optional[Tensor] = None, pool_graph=False, ): """ Description ----------- Perform graph pooling. Parameters ---------- graph : dgl.DGLGraph The input graph feat : torch.Tensor The input node feature select_idx : torch.Tensor The index in fine graph of node from coarse graph, this is obtained from previous graph pooling layers. non_select_idx : torch.Tensor, optional The index that not included in output graph. default: :obj:`None` scores : torch.Tensor, optional Scores for nodes used for pooling and scaling. default: :obj:`None` pool_graph : bool, optional Whether perform graph pooling on graph topology. default: :obj:`False` """ if self.use_gcn: feat = self.down_sample_gcn(graph, feat) feat = feat[select_idx] if scores is not None: feat = feat * scores.unsqueeze(-1) if pool_graph: num_node_batch = graph.batch_num_nodes() graph = dgl.node_subgraph(graph, select_idx) graph.set_batch_num_nodes(num_node_batch) return feat, graph else: return feat class GraphUnpool(torch.nn.Module): """ Description ----------- The unpooling module for graph Parameters ---------- hidden_dim : int The number of channels of node features. """ def __init__(self, hidden_dim: int): super(GraphUnpool, self).__init__() self.up_sample_gcn = GraphConvWithDropout(hidden_dim, hidden_dim) def forward(self, graph: DGLGraph, feat: Tensor, select_idx: Tensor): """ Description ----------- Perform graph unpooling Parameters ---------- graph : dgl.DGLGraph The input graph feat : torch.Tensor The input node feature select_idx : torch.Tensor The index in fine graph of node from coarse graph, this is obtained from previous graph pooling layers. """ fine_feat = torch.zeros( (graph.num_nodes(), feat.size(-1)), device=feat.device ) fine_feat[select_idx] = feat fine_feat = self.up_sample_gcn(graph, fine_feat) return fine_feat ================================================ FILE: examples/pytorch/gxn/main.py ================================================ import json import os from datetime import datetime from time import time import dgl import torch import torch.nn.functional as F from data_preprocess import degree_as_feature, node_label_as_feature from dgl.data import LegacyTUDataset from dgl.dataloading import GraphDataLoader from networks import GraphClassifier from torch import Tensor from torch.utils.data import random_split from utils import get_stats, parse_args def compute_loss( cls_logits: Tensor, labels: Tensor, logits_s1: Tensor, logits_s2: Tensor, epoch: int, total_epochs: int, device: torch.device, ): # classification loss classify_loss = F.nll_loss(cls_logits, labels.to(device)) # loss for vertex infomax pooling scale1, scale2 = logits_s1.size(0) // 2, logits_s2.size(0) // 2 s1_label_t, s1_label_f = torch.ones(scale1), torch.zeros(scale1) s2_label_t, s2_label_f = torch.ones(scale2), torch.zeros(scale2) s1_label = torch.cat((s1_label_t, s1_label_f), dim=0).to(device) s2_label = torch.cat((s2_label_t, s2_label_f), dim=0).to(device) pool_loss_s1 = F.binary_cross_entropy_with_logits(logits_s1, s1_label) pool_loss_s2 = F.binary_cross_entropy_with_logits(logits_s2, s2_label) pool_loss = (pool_loss_s1 + pool_loss_s2) / 2 loss = classify_loss + (2 - epoch / total_epochs) * pool_loss return loss def train( model: torch.nn.Module, optimizer, trainloader, device, curr_epoch, total_epochs, ): model.train() total_loss = 0.0 num_batches = len(trainloader) for batch in trainloader: optimizer.zero_grad() batch_graphs, batch_labels = batch batch_graphs = batch_graphs.to(device) batch_labels = batch_labels.long().to(device) out, l1, l2 = model(batch_graphs, batch_graphs.ndata["feat"]) loss = compute_loss( out, batch_labels, l1, l2, curr_epoch, total_epochs, device ) loss.backward() optimizer.step() total_loss += loss.item() return total_loss / num_batches @torch.no_grad() def test(model: torch.nn.Module, loader, device): model.eval() correct = 0.0 num_graphs = 0 for batch in loader: batch_graphs, batch_labels = batch num_graphs += batch_labels.size(0) batch_graphs = batch_graphs.to(device) batch_labels = batch_labels.long().to(device) out, _, _ = model(batch_graphs, batch_graphs.ndata["feat"]) pred = out.argmax(dim=1) correct += pred.eq(batch_labels).sum().item() return correct / num_graphs def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # dataset = LegacyTUDataset(args.dataset, raw_dir=args.dataset_path) # add self loop. We add self loop for each graph here since the function "add_self_loop" does not # support batch graph. for i in range(len(dataset)): dataset.graph_lists[i] = dgl.remove_self_loop(dataset.graph_lists[i]) dataset.graph_lists[i] = dgl.add_self_loop(dataset.graph_lists[i]) # preprocess: use node degree/label as node feature if args.degree_as_feature: dataset = degree_as_feature(dataset) mode = "concat" else: mode = "replace" dataset = node_label_as_feature(dataset, mode=mode) num_training = int(len(dataset) * 0.9) num_test = len(dataset) - num_training train_set, test_set = random_split(dataset, [num_training, num_test]) train_loader = GraphDataLoader( train_set, batch_size=args.batch_size, shuffle=True, num_workers=1 ) test_loader = GraphDataLoader( test_set, batch_size=args.batch_size, num_workers=1 ) device = torch.device(args.device) # Step 2: Create model =================================================================== # num_feature, num_classes, _ = dataset.statistics() args.in_dim = int(num_feature) args.out_dim = int(num_classes) args.edge_feat_dim = 0 # No edge feature in datasets that we use. model = GraphClassifier(args).to(device) # Step 3: Create training components ===================================================== # optimizer = torch.optim.Adam( model.parameters(), lr=args.lr, amsgrad=True, weight_decay=args.weight_decay, ) # Step 4: training epoches =============================================================== # best_test_acc = 0.0 best_epoch = -1 train_times = [] for e in range(args.epochs): s_time = time() train_loss = train( model, optimizer, train_loader, device, e, args.epochs ) train_times.append(time() - s_time) test_acc = test(model, test_loader, device) if test_acc > best_test_acc: best_test_acc = test_acc best_epoch = e + 1 if (e + 1) % args.print_every == 0: log_format = ( "Epoch {}: loss={:.4f}, test_acc={:.4f}, best_test_acc={:.4f}" ) print(log_format.format(e + 1, train_loss, test_acc, best_test_acc)) print( "Best Epoch {}, final test acc {:.4f}".format(best_epoch, best_test_acc) ) return best_test_acc, sum(train_times) / len(train_times) if __name__ == "__main__": args = parse_args() res = [] train_times = [] for i in range(args.num_trials): print("Trial {}/{}".format(i + 1, args.num_trials)) acc, train_time = main(args) # acc, train_time = 0, 0 res.append(acc) train_times.append(train_time) mean, err_bd = get_stats(res, conf_interval=False) print("mean acc: {:.4f}, error bound: {:.4f}".format(mean, err_bd)) out_dict = { "hyper-parameters": vars(args), "result_date": str(datetime.now()), "result": "{:.4f}(+-{:.4f})".format(mean, err_bd), "train_time": "{:.4f}".format(sum(train_times) / len(train_times)), "details": res, } with open( os.path.join(args.output_path, "{}.log".format(args.dataset)), "w" ) as f: json.dump(out_dict, f, sort_keys=True, indent=4) ================================================ FILE: examples/pytorch/gxn/main_early_stop.py ================================================ import json import os from datetime import datetime from time import time import dgl import torch import torch.nn.functional as F from data_preprocess import degree_as_feature, node_label_as_feature from dgl.data import LegacyTUDataset from dgl.dataloading import GraphDataLoader from networks import GraphClassifier from torch import Tensor from torch.utils.data import random_split from utils import get_stats, parse_args def compute_loss( cls_logits: Tensor, labels: Tensor, logits_s1: Tensor, logits_s2: Tensor, epoch: int, total_epochs: int, device: torch.device, ): # classification loss classify_loss = F.nll_loss(cls_logits, labels.to(device)) # loss for vertex infomax pooling scale1, scale2 = logits_s1.size(0) // 2, logits_s2.size(0) // 2 s1_label_t, s1_label_f = torch.ones(scale1), torch.zeros(scale1) s2_label_t, s2_label_f = torch.ones(scale2), torch.zeros(scale2) s1_label = torch.cat((s1_label_t, s1_label_f), dim=0).to(device) s2_label = torch.cat((s2_label_t, s2_label_f), dim=0).to(device) pool_loss_s1 = F.binary_cross_entropy_with_logits(logits_s1, s1_label) pool_loss_s2 = F.binary_cross_entropy_with_logits(logits_s2, s2_label) pool_loss = (pool_loss_s1 + pool_loss_s2) / 2 loss = classify_loss + (2 - epoch / total_epochs) * pool_loss return loss def train( model: torch.nn.Module, optimizer, trainloader, device, curr_epoch, total_epochs, ): model.train() total_loss = 0.0 num_batches = len(trainloader) for batch in trainloader: optimizer.zero_grad() batch_graphs, batch_labels = batch batch_graphs = batch_graphs.to(device) batch_labels = batch_labels.long().to(device) out, l1, l2 = model(batch_graphs, batch_graphs.ndata["feat"]) loss = compute_loss( out, batch_labels, l1, l2, curr_epoch, total_epochs, device ) loss.backward() optimizer.step() total_loss += loss.item() return total_loss / num_batches @torch.no_grad() def test(model: torch.nn.Module, loader, device): model.eval() correct = 0.0 num_graphs = 0 for batch in loader: batch_graphs, batch_labels = batch num_graphs += batch_labels.size(0) batch_graphs = batch_graphs.to(device) batch_labels = batch_labels.long().to(device) out, _, _ = model(batch_graphs, batch_graphs.ndata["feat"]) pred = out.argmax(dim=1) correct += pred.eq(batch_labels).sum().item() return correct / num_graphs @torch.no_grad() def validate(model: torch.nn.Module, loader, device, curr_epoch, total_epochs): model.eval() tt_loss = 0.0 correct = 0.0 num_graphs = 0 num_batchs = len(loader) for batch in loader: batch_graphs, batch_labels = batch num_graphs += batch_labels.size(0) batch_graphs = batch_graphs.to(device) batch_labels = batch_labels.long().to(device) out, l1, l2 = model(batch_graphs, batch_graphs.ndata["feat"]) tt_loss += compute_loss( out, batch_labels, l1, l2, curr_epoch, total_epochs, device ).item() pred = out.argmax(dim=1) correct += pred.eq(batch_labels).sum().item() return correct / num_graphs, tt_loss / num_batchs def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # dataset = LegacyTUDataset(args.dataset, raw_dir=args.dataset_path) # add self loop. We add self loop for each graph here since the function "add_self_loop" does not # support batch graph. for i in range(len(dataset)): dataset.graph_lists[i] = dgl.remove_self_loop(dataset.graph_lists[i]) dataset.graph_lists[i] = dgl.add_self_loop(dataset.graph_lists[i]) # use degree as node feature if args.degree_as_feature: dataset = degree_as_feature(dataset) mode = "concat" else: mode = "replace" dataset = node_label_as_feature(dataset, mode=mode) num_training = int(len(dataset) * 0.8) num_val = int(len(dataset) * 0.1) num_test = len(dataset) - num_training - num_val train_set, val_set, test_set = random_split( dataset, [num_training, num_val, num_test] ) train_loader = GraphDataLoader( train_set, batch_size=args.batch_size, shuffle=True, num_workers=1 ) val_loader = GraphDataLoader( val_set, batch_size=args.batch_size, num_workers=1 ) test_loader = GraphDataLoader( test_set, batch_size=args.batch_size, num_workers=1 ) device = torch.device(args.device) # Step 2: Create model =================================================================== # num_feature, num_classes, _ = dataset.statistics() args.in_dim = int(num_feature) args.out_dim = int(num_classes) args.edge_feat_dim = 0 # No edge feature in datasets that we use. model = GraphClassifier(args).to(device) # Step 3: Create training components ===================================================== # optimizer = torch.optim.Adam( model.parameters(), lr=args.lr, amsgrad=True, weight_decay=args.weight_decay, ) # Step 4: training epoches =============================================================== # best_test_acc = 0.0 best_epoch = -1 train_times = [] bad_count = 0 best_val_loss = float("inf") for e in range(args.epochs): s_time = time() train_loss = train( model, optimizer, train_loader, device, e, args.epochs ) train_times.append(time() - s_time) _, val_loss = validate(model, val_loader, device, e, args.epochs) test_acc = test(model, test_loader, device) if best_val_loss > val_loss: best_val_loss = val_loss best_epoch = e bad_count = 0 best_test_acc = test_acc else: bad_count += 1 if bad_count > args.patience: break if (e + 1) % args.print_every == 0: log_format = ( "Epoch {}: loss={:.4f}, test_acc={:.4f}, best_test_acc={:.4f}" ) print(log_format.format(e + 1, train_loss, test_acc, best_test_acc)) print( "Best Epoch {}, final test acc {:.4f}".format(best_epoch, best_test_acc) ) return best_test_acc, sum(train_times) / len(train_times) if __name__ == "__main__": args = parse_args() res = [] train_times = [] for i in range(args.num_trials): print("Trial {}/{}".format(i + 1, args.num_trials)) acc, train_time = main(args) # acc, train_time = 0, 0 res.append(acc) train_times.append(train_time) mean, err_bd = get_stats(res, conf_interval=False) print("mean acc: {:.4f}, error bound: {:.4f}".format(mean, err_bd)) out_dict = { "hyper-parameters": vars(args), "result_date": str(datetime.now()), "result": "{:.4f}(+-{:.4f})".format(mean, err_bd), "train_time": "{:.4f}".format(sum(train_times) / len(train_times)), "details": res, } with open( os.path.join(args.output_path, "{}.log".format(args.dataset)), "w" ) as f: json.dump(out_dict, f, sort_keys=True, indent=4) ================================================ FILE: examples/pytorch/gxn/networks.py ================================================ from typing import List, Tuple, Union from layers import * import dgl.function as fn import torch import torch.nn import torch.nn.functional as F from dgl.nn.pytorch.glob import SortPooling class GraphCrossModule(torch.nn.Module): """ Description ----------- The Graph Cross Module used by Graph Cross Networks. This module only contains graph cross layers. Parameters ---------- pool_ratios : Union[float, List[float]] The pooling ratios (for keeping nodes) for each layer. For example, if `pool_ratio=0.8`, 80\% nodes will be preserved. If a single float number is given, all pooling layers will have the same pooling ratio. in_dim : int The number of input node feature channels. out_dim : int The number of output node feature channels. hidden_dim : int The number of hidden node feature channels. cross_weight : float, optional The weight parameter used in graph cross layers Default: :obj:`1.0` fuse_weight : float, optional The weight parameter used at the end of GXN for channel fusion. Default: :obj:`1.0` """ def __init__( self, pool_ratios: Union[float, List[float]], in_dim: int, out_dim: int, hidden_dim: int, cross_weight: float = 1.0, fuse_weight: float = 1.0, dist: int = 1, num_cross_layers: int = 2, ): super(GraphCrossModule, self).__init__() if isinstance(pool_ratios, float): pool_ratios = (pool_ratios, pool_ratios) self.cross_weight = cross_weight self.fuse_weight = fuse_weight self.num_cross_layers = num_cross_layers # build network self.start_gcn_scale1 = GraphConvWithDropout(in_dim, hidden_dim) self.start_gcn_scale2 = GraphConvWithDropout(hidden_dim, hidden_dim) self.end_gcn = GraphConvWithDropout(2 * hidden_dim, out_dim) self.index_select_scale1 = IndexSelect( pool_ratios[0], hidden_dim, act="prelu", dist=dist ) self.index_select_scale2 = IndexSelect( pool_ratios[1], hidden_dim, act="prelu", dist=dist ) self.start_pool_s12 = GraphPool(hidden_dim) self.start_pool_s23 = GraphPool(hidden_dim) self.end_unpool_s21 = GraphUnpool(hidden_dim) self.end_unpool_s32 = GraphUnpool(hidden_dim) self.s1_l1_gcn = GraphConvWithDropout(hidden_dim, hidden_dim) self.s1_l2_gcn = GraphConvWithDropout(hidden_dim, hidden_dim) self.s1_l3_gcn = GraphConvWithDropout(hidden_dim, hidden_dim) self.s2_l1_gcn = GraphConvWithDropout(hidden_dim, hidden_dim) self.s2_l2_gcn = GraphConvWithDropout(hidden_dim, hidden_dim) self.s2_l3_gcn = GraphConvWithDropout(hidden_dim, hidden_dim) self.s3_l1_gcn = GraphConvWithDropout(hidden_dim, hidden_dim) self.s3_l2_gcn = GraphConvWithDropout(hidden_dim, hidden_dim) self.s3_l3_gcn = GraphConvWithDropout(hidden_dim, hidden_dim) if num_cross_layers >= 1: self.pool_s12_1 = GraphPool(hidden_dim, use_gcn=True) self.unpool_s21_1 = GraphUnpool(hidden_dim) self.pool_s23_1 = GraphPool(hidden_dim, use_gcn=True) self.unpool_s32_1 = GraphUnpool(hidden_dim) if num_cross_layers >= 2: self.pool_s12_2 = GraphPool(hidden_dim, use_gcn=True) self.unpool_s21_2 = GraphUnpool(hidden_dim) self.pool_s23_2 = GraphPool(hidden_dim, use_gcn=True) self.unpool_s32_2 = GraphUnpool(hidden_dim) def forward(self, graph, feat): # start of scale-1 graph_scale1 = graph feat_scale1 = self.start_gcn_scale1(graph_scale1, feat) feat_origin = feat_scale1 feat_scale1_neg = feat_scale1[ torch.randperm(feat_scale1.size(0)) ] # negative samples ( logit_s1, scores_s1, select_idx_s1, non_select_idx_s1, feat_down_s1, ) = self.index_select_scale1(graph_scale1, feat_scale1, feat_scale1_neg) feat_scale2, graph_scale2 = self.start_pool_s12( graph_scale1, feat_scale1, select_idx_s1, non_select_idx_s1, scores_s1, pool_graph=True, ) # start of scale-2 feat_scale2 = self.start_gcn_scale2(graph_scale2, feat_scale2) feat_scale2_neg = feat_scale2[ torch.randperm(feat_scale2.size(0)) ] # negative samples ( logit_s2, scores_s2, select_idx_s2, non_select_idx_s2, feat_down_s2, ) = self.index_select_scale2(graph_scale2, feat_scale2, feat_scale2_neg) feat_scale3, graph_scale3 = self.start_pool_s23( graph_scale2, feat_scale2, select_idx_s2, non_select_idx_s2, scores_s2, pool_graph=True, ) # layer-1 res_s1_0, res_s2_0, res_s3_0 = feat_scale1, feat_scale2, feat_scale3 feat_scale1 = F.relu(self.s1_l1_gcn(graph_scale1, feat_scale1)) feat_scale2 = F.relu(self.s2_l1_gcn(graph_scale2, feat_scale2)) feat_scale3 = F.relu(self.s3_l1_gcn(graph_scale3, feat_scale3)) if self.num_cross_layers >= 1: feat_s12_fu = self.pool_s12_1( graph_scale1, feat_scale1, select_idx_s1, non_select_idx_s1, scores_s1, ) feat_s21_fu = self.unpool_s21_1( graph_scale1, feat_scale2, select_idx_s1 ) feat_s23_fu = self.pool_s23_1( graph_scale2, feat_scale2, select_idx_s2, non_select_idx_s2, scores_s2, ) feat_s32_fu = self.unpool_s32_1( graph_scale2, feat_scale3, select_idx_s2 ) feat_scale1 = ( feat_scale1 + self.cross_weight * feat_s21_fu + res_s1_0 ) feat_scale2 = ( feat_scale2 + self.cross_weight * (feat_s12_fu + feat_s32_fu) / 2 + res_s2_0 ) feat_scale3 = ( feat_scale3 + self.cross_weight * feat_s23_fu + res_s3_0 ) # layer-2 feat_scale1 = F.relu(self.s1_l2_gcn(graph_scale1, feat_scale1)) feat_scale2 = F.relu(self.s2_l2_gcn(graph_scale2, feat_scale2)) feat_scale3 = F.relu(self.s3_l2_gcn(graph_scale3, feat_scale3)) if self.num_cross_layers >= 2: feat_s12_fu = self.pool_s12_2( graph_scale1, feat_scale1, select_idx_s1, non_select_idx_s1, scores_s1, ) feat_s21_fu = self.unpool_s21_2( graph_scale1, feat_scale2, select_idx_s1 ) feat_s23_fu = self.pool_s23_2( graph_scale2, feat_scale2, select_idx_s2, non_select_idx_s2, scores_s2, ) feat_s32_fu = self.unpool_s32_2( graph_scale2, feat_scale3, select_idx_s2 ) cross_weight = self.cross_weight * 0.05 feat_scale1 = feat_scale1 + cross_weight * feat_s21_fu feat_scale2 = ( feat_scale2 + cross_weight * (feat_s12_fu + feat_s32_fu) / 2 ) feat_scale3 = feat_scale3 + cross_weight * feat_s23_fu # layer-3 feat_scale1 = F.relu(self.s1_l3_gcn(graph_scale1, feat_scale1)) feat_scale2 = F.relu(self.s2_l3_gcn(graph_scale2, feat_scale2)) feat_scale3 = F.relu(self.s3_l3_gcn(graph_scale3, feat_scale3)) # final layers feat_s3_out = ( self.end_unpool_s32(graph_scale2, feat_scale3, select_idx_s2) + feat_down_s2 ) feat_s2_out = self.end_unpool_s21( graph_scale1, feat_scale2 + feat_s3_out, select_idx_s1 ) feat_agg = ( feat_scale1 + self.fuse_weight * feat_s2_out + self.fuse_weight * feat_down_s1 ) feat_agg = torch.cat((feat_agg, feat_origin), dim=1) feat_agg = self.end_gcn(graph_scale1, feat_agg) return feat_agg, logit_s1, logit_s2 class GraphCrossNet(torch.nn.Module): """ Description ----------- The Graph Cross Network. Parameters ---------- in_dim : int The number of input node feature channels. out_dim : int The number of output node feature channels. edge_feat_dim : int, optional The number of input edge feature channels. Edge feature will be passed to a Linear layer and concatenated to input node features. Default: :obj:`0` hidden_dim : int, optional The number of hidden node feature channels. Default: :obj:`96` pool_ratios : Union[float, List[float]], optional The pooling ratios (for keeping nodes) for each layer. For example, if `pool_ratio=0.8`, 80\% nodes will be preserved. If a single float number is given, all pooling layers will have the same pooling ratio. Default: :obj:`[0.9, 0.7]` readout_nodes : int, optional Number of nodes perserved in the final sort pool operation. Default: :obj:`30` conv1d_dims : List[int], optional The number of kernels of Conv1d operations. Default: :obj:`[16, 32]` conv1d_kws : List[int], optional The kernel size of Conv1d. Default: :obj:`[5]` cross_weight : float, optional The weight parameter used in graph cross layers Default: :obj:`1.0` fuse_weight : float, optional The weight parameter used at the end of GXN for channel fusion. Default: :obj:`1.0` """ def __init__( self, in_dim: int, out_dim: int, edge_feat_dim: int = 0, hidden_dim: int = 96, pool_ratios: Union[List[float], float] = [0.9, 0.7], readout_nodes: int = 30, conv1d_dims: List[int] = [16, 32], conv1d_kws: List[int] = [5], cross_weight: float = 1.0, fuse_weight: float = 1.0, dist: int = 1, ): super(GraphCrossNet, self).__init__() self.in_dim = in_dim self.out_dim = out_dim self.hidden_dim = hidden_dim self.edge_feat_dim = edge_feat_dim self.readout_nodes = readout_nodes conv1d_kws = [hidden_dim] + conv1d_kws if edge_feat_dim > 0: self.in_dim += hidden_dim self.e2l_lin = torch.nn.Linear(edge_feat_dim, hidden_dim) else: self.e2l_lin = None self.gxn = GraphCrossModule( pool_ratios, in_dim=self.in_dim, out_dim=hidden_dim, hidden_dim=hidden_dim // 2, cross_weight=cross_weight, fuse_weight=fuse_weight, dist=dist, ) self.sortpool = SortPooling(readout_nodes) # final updates self.final_conv1 = torch.nn.Conv1d( 1, conv1d_dims[0], kernel_size=conv1d_kws[0], stride=conv1d_kws[0] ) self.final_maxpool = torch.nn.MaxPool1d(2, 2) self.final_conv2 = torch.nn.Conv1d( conv1d_dims[0], conv1d_dims[1], kernel_size=conv1d_kws[1], stride=1 ) self.final_dense_dim = int((readout_nodes - 2) / 2 + 1) self.final_dense_dim = ( self.final_dense_dim - conv1d_kws[1] + 1 ) * conv1d_dims[1] if self.out_dim > 0: self.out_lin = torch.nn.Linear(self.final_dense_dim, out_dim) self.init_weights() def init_weights(self): if self.e2l_lin is not None: torch.nn.init.xavier_normal_(self.e2l_lin.weight) torch.nn.init.xavier_normal_(self.final_conv1.weight) torch.nn.init.xavier_normal_(self.final_conv2.weight) if self.out_dim > 0: torch.nn.init.xavier_normal_(self.out_lin.weight) def forward( self, graph: DGLGraph, node_feat: Tensor, edge_feat: Optional[Tensor] = None, ): num_batch = graph.batch_size if edge_feat is not None: edge_feat = self.e2l_lin(edge_feat) with graph.local_scope(): graph.edata["he"] = edge_feat graph.update_all(fn.copy_e("he", "m"), fn.sum("m", "hn")) edge2node_feat = graph.ndata.pop("hn") node_feat = torch.cat((node_feat, edge2node_feat), dim=1) node_feat, logits1, logits2 = self.gxn(graph, node_feat) batch_sortpool_feats = self.sortpool(graph, node_feat) # final updates to_conv1d = batch_sortpool_feats.unsqueeze(1) conv1d_result = F.relu(self.final_conv1(to_conv1d)) conv1d_result = self.final_maxpool(conv1d_result) conv1d_result = F.relu(self.final_conv2(conv1d_result)) to_dense = conv1d_result.view(num_batch, -1) if self.out_dim > 0: out = F.relu(self.out_lin(to_dense)) else: out = to_dense return out, logits1, logits2 class GraphClassifier(torch.nn.Module): """ Description ----------- Graph Classifier for graph classification. GXN + MLP """ def __init__(self, args): super(GraphClassifier, self).__init__() self.gxn = GraphCrossNet( in_dim=args.in_dim, out_dim=args.embed_dim, edge_feat_dim=args.edge_feat_dim, hidden_dim=args.hidden_dim, pool_ratios=args.pool_ratios, readout_nodes=args.readout_nodes, conv1d_dims=args.conv1d_dims, conv1d_kws=args.conv1d_kws, cross_weight=args.cross_weight, fuse_weight=args.fuse_weight, ) self.lin1 = torch.nn.Linear(args.embed_dim, args.final_dense_hidden_dim) self.lin2 = torch.nn.Linear(args.final_dense_hidden_dim, args.out_dim) self.dropout = args.dropout def forward( self, graph: DGLGraph, node_feat: Tensor, edge_feat: Optional[Tensor] = None, ): embed, logits1, logits2 = self.gxn(graph, node_feat, edge_feat) logits = F.relu(self.lin1(embed)) if self.dropout > 0: logits = F.dropout(logits, p=self.dropout, training=self.training) logits = self.lin2(logits) return F.log_softmax(logits, dim=1), logits1, logits2 ================================================ FILE: examples/pytorch/gxn/scripts/run_gxn.sh ================================================ #!/bin/bash # input arguments DATA="${1-DD}" # ENZYMES, DD, PROTEINS, COLLAB, IMDB-BINARY, IMDB-MULTI device=${2-0} num_trials=${3-10} print_every=${4-10} # general settings hidden_gxn=96 k1=0.8 k2=0.7 sortpooling_k=30 hidden_final=128 batch_size=64 dropout=0.5 cross_weight=1.0 fuse_weight=0.9 weight_decay=1e-3 # dataset-specific settings case ${DATA} in IMDB-BINARY) num_epochs=200 learning_rate=0.001 sortpooling_k=31 k1=0.8 k2=0.5 ;; IMDB-MULTI) num_epochs=200 learning_rate=0.001 sortpooling_k=22 k1=0.8 k2=0.7 ;; COLLAB) num_epochs=100 learning_rate=0.001 sortpooling_k=130 k1=0.9 k2=0.5 ;; DD) num_epochs=100 learning_rate=0.0005 sortpooling_k=291 k1=0.8 k2=0.6 ;; PROTEINS) num_epochs=100 learning_rate=0.001 sortpooling_k=32 k1=0.8 k2=0.7 ;; ENZYMES) num_epochs=500 learning_rate=0.0001 sortpooling_k=42 k1=0.7 k2=0.5 ;; *) num_epochs=500 learning_rate=0.00001 ;; esac python main.py \ --dataset $DATA \ --lr $learning_rate \ --epochs $num_epochs \ --hidden_dim $hidden_gxn \ --final_dense_hidden_dim $hidden_final \ --readout_nodes $sortpooling_k \ --pool_ratios $k1 $k2 \ --batch_size $batch_size \ --device $device \ --dropout $dropout \ --cross_weight $cross_weight\ --fuse_weight $fuse_weight\ --weight_decay $weight_decay\ --num_trials $num_trials\ --print_every $print_every\ ================================================ FILE: examples/pytorch/gxn/scripts/run_gxn_early_stop.sh ================================================ #!/bin/bash # input arguments DATA="${1-DD}" # ENZYMES, DD, PROTEINS, COLLAB, IMDB-BINARY, IMDB-MULTI device=${2-0} num_trials=${3-10} print_every=${4-10} # general settings hidden_gxn=96 k1=0.8 k2=0.7 sortpooling_k=30 hidden_final=128 batch_size=64 dropout=0.5 cross_weight=1.0 fuse_weight=0.9 weight_decay=1e-3 # dataset-specific settings case ${DATA} in IMDB-BINARY) num_epochs=200 patience=40 learning_rate=0.001 sortpooling_k=31 k1=0.8 k2=0.5 ;; IMDB-MULTI) num_epochs=200 patience=40 learning_rate=0.001 sortpooling_k=22 k1=0.8 k2=0.7 ;; COLLAB) num_epochs=100 patience=20 learning_rate=0.001 sortpooling_k=130 k1=0.9 k2=0.5 ;; DD) num_epochs=100 patience=20 learning_rate=0.0005 sortpooling_k=291 k1=0.8 k2=0.6 ;; PROTEINS) num_epochs=100 patience=20 learning_rate=0.001 sortpooling_k=32 k1=0.8 k2=0.7 ;; ENZYMES) num_epochs=500 patience=100 learning_rate=0.0001 sortpooling_k=42 k1=0.7 k2=0.5 ;; *) num_epochs=500 patience=100 learning_rate=0.00001 ;; esac python main_early_stop.py \ --dataset $DATA \ --lr $learning_rate \ --epochs $num_epochs \ --hidden_dim $hidden_gxn \ --final_dense_hidden_dim $hidden_final \ --readout_nodes $sortpooling_k \ --pool_ratios $k1 $k2 \ --batch_size $batch_size \ --device $device \ --dropout $dropout \ --cross_weight $cross_weight\ --fuse_weight $fuse_weight\ --weight_decay $weight_decay\ --num_trials $num_trials\ --print_every $print_every\ --patience $patience\ ================================================ FILE: examples/pytorch/gxn/utils.py ================================================ import argparse import logging import math import os import random import numpy as np import torch import torch.cuda from scipy.stats import t def get_stats( array, conf_interval=False, name=None, stdout=False, logout=False ): """Compute mean and standard deviation from an numerical array Args: array (array like obj): The numerical array, this array can be convert to :obj:`torch.Tensor`. conf_interval (bool, optional): If True, compute the confidence interval bound (95%) instead of the std value. (default: :obj:`False`) name (str, optional): The name of this numerical array, for log usage. (default: :obj:`None`) stdout (bool, optional): Whether to output result to the terminal. (default: :obj:`False`) logout (bool, optional): Whether to output result via logging module. (default: :obj:`False`) """ eps = 1e-9 array = torch.Tensor(array) std, mean = torch.std_mean(array) std = std.item() mean = mean.item() center = mean if conf_interval: n = array.size(0) se = std / (math.sqrt(n) + eps) t_value = t.ppf(0.975, df=n - 1) err_bound = t_value * se else: err_bound = std # log and print if name is None: name = "array {}".format(id(array)) log = "{}: {:.4f}(+-{:.4f})".format(name, center, err_bound) if stdout: print(log) if logout: logging.info(log) return center, err_bound def parse_args(): parser = argparse.ArgumentParser("Graph Cross Network") parser.add_argument( "--pool_ratios", nargs="+", type=float, help="The pooling ratios used in graph cross layers", ) parser.add_argument( "--hidden_dim", type=int, default=96, help="The number of hidden channels in GXN", ) parser.add_argument( "--cross_weight", type=float, default=1.0, help="Weight parameter used in graph cross layer", ) parser.add_argument( "--fuse_weight", type=float, default=1.0, help="Weight parameter for feature fusion", ) parser.add_argument( "--num_cross_layers", type=int, default=2, help="The number of graph corss layers", ) parser.add_argument( "--readout_nodes", type=int, default=30, help="Number of nodes for each graph after final graph pooling", ) parser.add_argument( "--conv1d_dims", nargs="+", type=int, help="Number of channels in conv operations in the end of graph cross net", ) parser.add_argument( "--conv1d_kws", nargs="+", type=int, help="Kernel sizes of conv1d operations", ) parser.add_argument( "--dropout", type=float, default=0.0, help="Dropout rate" ) parser.add_argument( "--embed_dim", type=int, default=1024, help="Number of channels of graph embedding", ) parser.add_argument( "--final_dense_hidden_dim", type=int, default=128, help="The number of hidden channels in final dense layers", ) parser.add_argument("--batch_size", type=int, default=64, help="Batch size") parser.add_argument("--lr", type=float, default=1e-4, help="Learning rate") parser.add_argument( "--weight_decay", type=float, default=0.0, help="Weight decay rate" ) parser.add_argument( "--epochs", type=int, default=1000, help="Number of training epochs" ) parser.add_argument( "--patience", type=int, default=20, help="Patience for early stopping" ) parser.add_argument( "--num_trials", type=int, default=1, help="Number of trials" ) parser.add_argument( "--device", type=int, default=0, help="Computation device id, -1 for cpu", ) parser.add_argument( "--dataset", type=str, default="DD", help="Dataset used for training" ) parser.add_argument( "--seed", type=int, default=-1, help="Random seed, -1 for unset" ) parser.add_argument( "--print_every", type=int, default=10, help="Print train log every ? epochs, -1 for silence training", ) parser.add_argument( "--dataset_path", type=str, default="./datasets", help="Path holding your dataset", ) parser.add_argument( "--output_path", type=str, default="./output", help="Path holding your result files", ) args = parser.parse_args() # default value for list hyper-parameters if not args.pool_ratios or len(args.pool_ratios) < 2: args.pool_ratios = [0.8, 0.7] logging.warning( "No valid pool_ratios is given, " "using default value '{}'".format(args.pool_ratios) ) if not args.conv1d_dims or len(args.conv1d_dims) < 2: args.conv1d_dims = [16, 32] logging.warning( "No valid conv1d_dims is give, " "using default value {}".format(args.conv1d_dims) ) if not args.conv1d_kws or len(args.conv1d_kws) < 1: args.conv1d_kws = [5] logging.warning( "No valid conv1d_kws is given, " "using default value '{}'".format(args.conv1d_kws) ) # device args.device = "cpu" if args.device < 0 else "cuda:{}".format(args.device) if not torch.cuda.is_available(): logging.warning("GPU is not available, using CPU for training") args.device = "cpu" else: logging.warning("Device: {}".format(args.device)) # random seed if args.seed >= 0: torch.manual_seed(args.seed) random.seed(args.seed) np.random.seed(args.seed) if args.device != "cpu": torch.cuda.manual_seed(args.seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # print every if args.print_every < 0: args.print_every = args.epochs + 1 # path paths = [args.output_path, args.dataset_path] for p in paths: if not os.path.exists(p): os.makedirs(p) # datasets ad-hoc if args.dataset in ["COLLAB", "IMDB-BINARY", "IMDB-MULTI", "ENZYMES"]: args.degree_as_feature = True else: args.degree_as_feature = False return args ================================================ FILE: examples/pytorch/han/README.md ================================================ # Heterogeneous Graph Attention Network (HAN) with DGL This is an attempt to implement HAN with DGL's latest APIs for heterogeneous graphs. The authors' implementation can be found [here](https://github.com/Jhy1993/HAN). ## Usage `python main.py` for reproducing HAN's work on their dataset. `python main.py --hetero` for reproducing HAN's work on DGL's own dataset from [here](https://github.com/Jhy1993/HAN/tree/master/data/acm). The dataset is noisy because there are same author occurring multiple times as different nodes. For sampling-based training, `python train_sampling.py` ## Performance Reference performance numbers for the ACM dataset: | | micro f1 score | macro f1 score | | ------------------- | -------------- | -------------- | | Paper | 89.22 | 89.40 | | DGL | 88.99 | 89.02 | | Softmax regression (own dataset) | 89.66 | 89.62 | | DGL (own dataset) | 91.51 | 91.66 | We ran a softmax regression to check the easiness of our own dataset. HAN did show some improvements. ================================================ FILE: examples/pytorch/han/main.py ================================================ import torch from sklearn.metrics import f1_score from utils import EarlyStopping, load_data def score(logits, labels): _, indices = torch.max(logits, dim=1) prediction = indices.long().cpu().numpy() labels = labels.cpu().numpy() accuracy = (prediction == labels).sum() / len(prediction) micro_f1 = f1_score(labels, prediction, average="micro") macro_f1 = f1_score(labels, prediction, average="macro") return accuracy, micro_f1, macro_f1 def evaluate(model, g, features, labels, mask, loss_func): model.eval() with torch.no_grad(): logits = model(g, features) loss = loss_func(logits[mask], labels[mask]) accuracy, micro_f1, macro_f1 = score(logits[mask], labels[mask]) return loss, accuracy, micro_f1, macro_f1 def main(args): # If args['hetero'] is True, g would be a heterogeneous graph. # Otherwise, it will be a list of homogeneous graphs. ( g, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, val_mask, test_mask, ) = load_data(args["dataset"]) if hasattr(torch, "BoolTensor"): train_mask = train_mask.bool() val_mask = val_mask.bool() test_mask = test_mask.bool() features = features.to(args["device"]) labels = labels.to(args["device"]) train_mask = train_mask.to(args["device"]) val_mask = val_mask.to(args["device"]) test_mask = test_mask.to(args["device"]) if args["hetero"]: from model_hetero import HAN model = HAN( meta_paths=[["pa", "ap"], ["pf", "fp"]], in_size=features.shape[1], hidden_size=args["hidden_units"], out_size=num_classes, num_heads=args["num_heads"], dropout=args["dropout"], ).to(args["device"]) g = g.to(args["device"]) else: from model import HAN model = HAN( num_meta_paths=len(g), in_size=features.shape[1], hidden_size=args["hidden_units"], out_size=num_classes, num_heads=args["num_heads"], dropout=args["dropout"], ).to(args["device"]) g = [graph.to(args["device"]) for graph in g] stopper = EarlyStopping(patience=args["patience"]) loss_fcn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam( model.parameters(), lr=args["lr"], weight_decay=args["weight_decay"] ) for epoch in range(args["num_epochs"]): model.train() logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() train_acc, train_micro_f1, train_macro_f1 = score( logits[train_mask], labels[train_mask] ) val_loss, val_acc, val_micro_f1, val_macro_f1 = evaluate( model, g, features, labels, val_mask, loss_fcn ) early_stop = stopper.step(val_loss.data.item(), val_acc, model) print( "Epoch {:d} | Train Loss {:.4f} | Train Micro f1 {:.4f} | Train Macro f1 {:.4f} | " "Val Loss {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}".format( epoch + 1, loss.item(), train_micro_f1, train_macro_f1, val_loss.item(), val_micro_f1, val_macro_f1, ) ) if early_stop: break stopper.load_checkpoint(model) test_loss, test_acc, test_micro_f1, test_macro_f1 = evaluate( model, g, features, labels, test_mask, loss_fcn ) print( "Test loss {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}".format( test_loss.item(), test_micro_f1, test_macro_f1 ) ) if __name__ == "__main__": import argparse from utils import setup parser = argparse.ArgumentParser("HAN") parser.add_argument("-s", "--seed", type=int, default=1, help="Random seed") parser.add_argument( "-ld", "--log-dir", type=str, default="results", help="Dir for saving training results", ) parser.add_argument( "--hetero", action="store_true", help="Use metapath coalescing with DGL's own dataset", ) args = parser.parse_args().__dict__ args = setup(args) main(args) ================================================ FILE: examples/pytorch/han/model.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from dgl.nn.pytorch import GATConv class SemanticAttention(nn.Module): def __init__(self, in_size, hidden_size=128): super(SemanticAttention, self).__init__() self.project = nn.Sequential( nn.Linear(in_size, hidden_size), nn.Tanh(), nn.Linear(hidden_size, 1, bias=False), ) def forward(self, z): w = self.project(z).mean(0) # (M, 1) beta = torch.softmax(w, dim=0) # (M, 1) beta = beta.expand((z.shape[0],) + beta.shape) # (N, M, 1) return (beta * z).sum(1) # (N, D * K) class HANLayer(nn.Module): """ HAN layer. Arguments --------- num_meta_paths : number of homogeneous graphs generated from the metapaths. in_size : input feature dimension out_size : output feature dimension layer_num_heads : number of attention heads dropout : Dropout probability Inputs ------ g : list[DGLGraph] List of graphs h : tensor Input features Outputs ------- tensor The output feature """ def __init__( self, num_meta_paths, in_size, out_size, layer_num_heads, dropout ): super(HANLayer, self).__init__() # One GAT layer for each meta path based adjacency matrix self.gat_layers = nn.ModuleList() for i in range(num_meta_paths): self.gat_layers.append( GATConv( in_size, out_size, layer_num_heads, dropout, dropout, activation=F.elu, ) ) self.semantic_attention = SemanticAttention( in_size=out_size * layer_num_heads ) self.num_meta_paths = num_meta_paths def forward(self, gs, h): semantic_embeddings = [] for i, g in enumerate(gs): semantic_embeddings.append(self.gat_layers[i](g, h).flatten(1)) semantic_embeddings = torch.stack( semantic_embeddings, dim=1 ) # (N, M, D * K) return self.semantic_attention(semantic_embeddings) # (N, D * K) class HAN(nn.Module): def __init__( self, num_meta_paths, in_size, hidden_size, out_size, num_heads, dropout ): super(HAN, self).__init__() self.layers = nn.ModuleList() self.layers.append( HANLayer( num_meta_paths, in_size, hidden_size, num_heads[0], dropout ) ) for l in range(1, len(num_heads)): self.layers.append( HANLayer( num_meta_paths, hidden_size * num_heads[l - 1], hidden_size, num_heads[l], dropout, ) ) self.predict = nn.Linear(hidden_size * num_heads[-1], out_size) def forward(self, g, h): for gnn in self.layers: h = gnn(g, h) return self.predict(h) ================================================ FILE: examples/pytorch/han/model_hetero.py ================================================ """This model shows an example of using dgl.metapath_reachable_graph on the original heterogeneous graph. Because the original HAN implementation only gives the preprocessed homogeneous graph, this model could not reproduce the result in HAN as they did not provide the preprocessing code, and we constructed another dataset from ACM with a different set of papers, connections, features and labels. """ import dgl import torch import torch.nn as nn import torch.nn.functional as F from dgl.nn.pytorch import GATConv class SemanticAttention(nn.Module): def __init__(self, in_size, hidden_size=128): super(SemanticAttention, self).__init__() self.project = nn.Sequential( nn.Linear(in_size, hidden_size), nn.Tanh(), nn.Linear(hidden_size, 1, bias=False), ) def forward(self, z): w = self.project(z).mean(0) # (M, 1) beta = torch.softmax(w, dim=0) # (M, 1) beta = beta.expand((z.shape[0],) + beta.shape) # (N, M, 1) return (beta * z).sum(1) # (N, D * K) class HANLayer(nn.Module): """ HAN layer. Arguments --------- meta_paths : list of metapaths, each as a list of edge types in_size : input feature dimension out_size : output feature dimension layer_num_heads : number of attention heads dropout : Dropout probability Inputs ------ g : DGLGraph The heterogeneous graph h : tensor Input features Outputs ------- tensor The output feature """ def __init__(self, meta_paths, in_size, out_size, layer_num_heads, dropout): super(HANLayer, self).__init__() # One GAT layer for each meta path based adjacency matrix self.gat_layers = nn.ModuleList() for i in range(len(meta_paths)): self.gat_layers.append( GATConv( in_size, out_size, layer_num_heads, dropout, dropout, activation=F.elu, allow_zero_in_degree=True, ) ) self.semantic_attention = SemanticAttention( in_size=out_size * layer_num_heads ) self.meta_paths = list(tuple(meta_path) for meta_path in meta_paths) self._cached_graph = None self._cached_coalesced_graph = {} def forward(self, g, h): semantic_embeddings = [] if self._cached_graph is None or self._cached_graph is not g: self._cached_graph = g self._cached_coalesced_graph.clear() for meta_path in self.meta_paths: self._cached_coalesced_graph[ meta_path ] = dgl.metapath_reachable_graph(g, meta_path) for i, meta_path in enumerate(self.meta_paths): new_g = self._cached_coalesced_graph[meta_path] semantic_embeddings.append(self.gat_layers[i](new_g, h).flatten(1)) semantic_embeddings = torch.stack( semantic_embeddings, dim=1 ) # (N, M, D * K) return self.semantic_attention(semantic_embeddings) # (N, D * K) class HAN(nn.Module): def __init__( self, meta_paths, in_size, hidden_size, out_size, num_heads, dropout ): super(HAN, self).__init__() self.layers = nn.ModuleList() self.layers.append( HANLayer(meta_paths, in_size, hidden_size, num_heads[0], dropout) ) for l in range(1, len(num_heads)): self.layers.append( HANLayer( meta_paths, hidden_size * num_heads[l - 1], hidden_size, num_heads[l], dropout, ) ) self.predict = nn.Linear(hidden_size * num_heads[-1], out_size) def forward(self, g, h): for gnn in self.layers: h = gnn(g, h) return self.predict(h) ================================================ FILE: examples/pytorch/han/train_sampling.py ================================================ # -*- coding: utf-8 -*- """ HAN mini-batch training by RandomWalkSampler. note: This demo use RandomWalkSampler to sample neighbors, it's hard to get all neighbors when valid or test, so we sampled twice as many neighbors during val/test than training. """ import argparse import dgl import numpy import torch import torch.nn as nn import torch.nn.functional as F from dgl.nn.pytorch import GATConv from dgl.sampling import RandomWalkNeighborSampler from model_hetero import SemanticAttention from sklearn.metrics import f1_score from torch.utils.data import DataLoader from utils import EarlyStopping, set_random_seed class HANLayer(torch.nn.Module): """ HAN layer. Arguments --------- num_metapath : number of metapath based sub-graph in_size : input feature dimension out_size : output feature dimension layer_num_heads : number of attention heads dropout : Dropout probability Inputs ------ g : DGLGraph The heterogeneous graph h : tensor Input features Outputs ------- tensor The output feature """ def __init__( self, num_metapath, in_size, out_size, layer_num_heads, dropout ): super(HANLayer, self).__init__() # One GAT layer for each meta path based adjacency matrix self.gat_layers = nn.ModuleList() for i in range(num_metapath): self.gat_layers.append( GATConv( in_size, out_size, layer_num_heads, dropout, dropout, activation=F.elu, allow_zero_in_degree=True, ) ) self.semantic_attention = SemanticAttention( in_size=out_size * layer_num_heads ) self.num_metapath = num_metapath def forward(self, block_list, h_list): semantic_embeddings = [] for i, block in enumerate(block_list): semantic_embeddings.append( self.gat_layers[i](block, h_list[i]).flatten(1) ) semantic_embeddings = torch.stack( semantic_embeddings, dim=1 ) # (N, M, D * K) return self.semantic_attention(semantic_embeddings) # (N, D * K) class HAN(nn.Module): def __init__( self, num_metapath, in_size, hidden_size, out_size, num_heads, dropout ): super(HAN, self).__init__() self.layers = nn.ModuleList() self.layers.append( HANLayer(num_metapath, in_size, hidden_size, num_heads[0], dropout) ) for l in range(1, len(num_heads)): self.layers.append( HANLayer( num_metapath, hidden_size * num_heads[l - 1], hidden_size, num_heads[l], dropout, ) ) self.predict = nn.Linear(hidden_size * num_heads[-1], out_size) def forward(self, g, h): for gnn in self.layers: h = gnn(g, h) return self.predict(h) class HANSampler(object): def __init__(self, g, metapath_list, num_neighbors): self.sampler_list = [] for metapath in metapath_list: # note: random walk may get same route(same edge), which will be removed in the sampled graph. # So the sampled graph's edges may be less than num_random_walks(num_neighbors). self.sampler_list.append( RandomWalkNeighborSampler( G=g, num_traversals=1, termination_prob=0, num_random_walks=num_neighbors, num_neighbors=num_neighbors, metapath=metapath, ) ) def sample_blocks(self, seeds): block_list = [] for sampler in self.sampler_list: frontier = sampler(seeds) # add self loop frontier = dgl.remove_self_loop(frontier) frontier.add_edges(torch.tensor(seeds), torch.tensor(seeds)) block = dgl.to_block(frontier, seeds) block_list.append(block) return seeds, block_list def score(logits, labels): _, indices = torch.max(logits, dim=1) prediction = indices.long().cpu().numpy() labels = labels.cpu().numpy() accuracy = (prediction == labels).sum() / len(prediction) micro_f1 = f1_score(labels, prediction, average="micro") macro_f1 = f1_score(labels, prediction, average="macro") return accuracy, micro_f1, macro_f1 def evaluate( model, g, metapath_list, num_neighbors, features, labels, val_nid, loss_fcn, batch_size, ): model.eval() han_valid_sampler = HANSampler( g, metapath_list, num_neighbors=num_neighbors * 2 ) dataloader = DataLoader( dataset=val_nid, batch_size=batch_size, collate_fn=han_valid_sampler.sample_blocks, shuffle=False, drop_last=False, num_workers=4, ) correct = total = 0 prediction_list = [] labels_list = [] with torch.no_grad(): for step, (seeds, blocks) in enumerate(dataloader): h_list = load_subtensors(blocks, features) blocks = [block.to(args["device"]) for block in blocks] hs = [h.to(args["device"]) for h in h_list] logits = model(blocks, hs) loss = loss_fcn( logits, labels[numpy.asarray(seeds)].to(args["device"]) ) # get each predict label _, indices = torch.max(logits, dim=1) prediction = indices.long().cpu().numpy() labels_batch = labels[numpy.asarray(seeds)].cpu().numpy() prediction_list.append(prediction) labels_list.append(labels_batch) correct += (prediction == labels_batch).sum() total += prediction.shape[0] total_prediction = numpy.concatenate(prediction_list) total_labels = numpy.concatenate(labels_list) micro_f1 = f1_score(total_labels, total_prediction, average="micro") macro_f1 = f1_score(total_labels, total_prediction, average="macro") accuracy = correct / total return loss, accuracy, micro_f1, macro_f1 def load_subtensors(blocks, features): h_list = [] for block in blocks: input_nodes = block.srcdata[dgl.NID] h_list.append(features[input_nodes]) return h_list def main(args): # acm data if args["dataset"] == "ACMRaw": from utils import load_data ( g, features, labels, n_classes, train_nid, val_nid, test_nid, train_mask, val_mask, test_mask, ) = load_data("ACMRaw") metapath_list = [["pa", "ap"], ["pf", "fp"]] else: raise NotImplementedError( "Unsupported dataset {}".format(args["dataset"]) ) # Is it need to set different neighbors numbers for different meta-path based graph? num_neighbors = args["num_neighbors"] han_sampler = HANSampler(g, metapath_list, num_neighbors) # Create PyTorch DataLoader for constructing blocks dataloader = DataLoader( dataset=train_nid, batch_size=args["batch_size"], collate_fn=han_sampler.sample_blocks, shuffle=True, drop_last=False, num_workers=4, ) model = HAN( num_metapath=len(metapath_list), in_size=features.shape[1], hidden_size=args["hidden_units"], out_size=n_classes, num_heads=args["num_heads"], dropout=args["dropout"], ).to(args["device"]) total_params = sum(p.numel() for p in model.parameters()) print("total_params: {:d}".format(total_params)) total_trainable_params = sum( p.numel() for p in model.parameters() if p.requires_grad ) print("total trainable params: {:d}".format(total_trainable_params)) stopper = EarlyStopping(patience=args["patience"]) loss_fn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam( model.parameters(), lr=args["lr"], weight_decay=args["weight_decay"] ) for epoch in range(args["num_epochs"]): model.train() for step, (seeds, blocks) in enumerate(dataloader): h_list = load_subtensors(blocks, features) blocks = [block.to(args["device"]) for block in blocks] hs = [h.to(args["device"]) for h in h_list] logits = model(blocks, hs) loss = loss_fn( logits, labels[numpy.asarray(seeds)].to(args["device"]) ) optimizer.zero_grad() loss.backward() optimizer.step() # print info in each batch train_acc, train_micro_f1, train_macro_f1 = score( logits, labels[numpy.asarray(seeds)] ) print( "Epoch {:d} | loss: {:.4f} | train_acc: {:.4f} | train_micro_f1: {:.4f} | train_macro_f1: {:.4f}".format( epoch + 1, loss, train_acc, train_micro_f1, train_macro_f1 ) ) val_loss, val_acc, val_micro_f1, val_macro_f1 = evaluate( model, g, metapath_list, num_neighbors, features, labels, val_nid, loss_fn, args["batch_size"], ) early_stop = stopper.step(val_loss.data.item(), val_acc, model) print( "Epoch {:d} | Val loss {:.4f} | Val Accuracy {:.4f} | Val Micro f1 {:.4f} | Val Macro f1 {:.4f}".format( epoch + 1, val_loss.item(), val_acc, val_micro_f1, val_macro_f1 ) ) if early_stop: break stopper.load_checkpoint(model) test_loss, test_acc, test_micro_f1, test_macro_f1 = evaluate( model, g, metapath_list, num_neighbors, features, labels, test_nid, loss_fn, args["batch_size"], ) print( "Test loss {:.4f} | Test Accuracy {:.4f} | Test Micro f1 {:.4f} | Test Macro f1 {:.4f}".format( test_loss.item(), test_acc, test_micro_f1, test_macro_f1 ) ) if __name__ == "__main__": parser = argparse.ArgumentParser("mini-batch HAN") parser.add_argument("-s", "--seed", type=int, default=1, help="Random seed") parser.add_argument("--batch_size", type=int, default=32) parser.add_argument("--num_neighbors", type=int, default=20) parser.add_argument("--lr", type=float, default=0.001) parser.add_argument("--num_heads", type=list, default=[8]) parser.add_argument("--hidden_units", type=int, default=8) parser.add_argument("--dropout", type=float, default=0.6) parser.add_argument("--weight_decay", type=float, default=0.001) parser.add_argument("--num_epochs", type=int, default=100) parser.add_argument("--patience", type=int, default=10) parser.add_argument("--dataset", type=str, default="ACMRaw") parser.add_argument("--device", type=str, default="cuda:0") args = parser.parse_args().__dict__ # set_random_seed(args['seed']) main(args) ================================================ FILE: examples/pytorch/han/utils.py ================================================ import datetime import errno import os import pickle import random from pprint import pprint import dgl import numpy as np import torch from dgl.data.utils import _get_dgl_url, download, get_download_dir from scipy import io as sio, sparse def set_random_seed(seed=0): """Set random seed. Parameters ---------- seed : int Random seed to use """ random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) def mkdir_p(path, log=True): """Create a directory for the specified path. Parameters ---------- path : str Path name log : bool Whether to print result for directory creation """ try: os.makedirs(path) if log: print("Created directory {}".format(path)) except OSError as exc: if exc.errno == errno.EEXIST and os.path.isdir(path) and log: print("Directory {} already exists.".format(path)) else: raise def get_date_postfix(): """Get a date based postfix for directory name. Returns ------- post_fix : str """ dt = datetime.datetime.now() post_fix = "{}_{:02d}-{:02d}-{:02d}".format( dt.date(), dt.hour, dt.minute, dt.second ) return post_fix def setup_log_dir(args, sampling=False): """Name and create directory for logging. Parameters ---------- args : dict Configuration Returns ------- log_dir : str Path for logging directory sampling : bool Whether we are using sampling based training """ date_postfix = get_date_postfix() log_dir = os.path.join( args["log_dir"], "{}_{}".format(args["dataset"], date_postfix) ) if sampling: log_dir = log_dir + "_sampling" mkdir_p(log_dir) return log_dir # The configuration below is from the paper. default_configure = { "lr": 0.005, # Learning rate "num_heads": [8], # Number of attention heads for node-level attention "hidden_units": 8, "dropout": 0.6, "weight_decay": 0.001, "num_epochs": 200, "patience": 100, } sampling_configure = {"batch_size": 20} def setup(args): args.update(default_configure) set_random_seed(args["seed"]) args["dataset"] = "ACMRaw" if args["hetero"] else "ACM" args["device"] = "cuda:0" if torch.cuda.is_available() else "cpu" args["log_dir"] = setup_log_dir(args) return args def setup_for_sampling(args): args.update(default_configure) args.update(sampling_configure) set_random_seed() args["device"] = "cuda:0" if torch.cuda.is_available() else "cpu" args["log_dir"] = setup_log_dir(args, sampling=True) return args def get_binary_mask(total_size, indices): mask = torch.zeros(total_size) mask[indices] = 1 return mask.byte() def load_acm(remove_self_loop): url = "dataset/ACM3025.pkl" data_path = get_download_dir() + "/ACM3025.pkl" download(_get_dgl_url(url), path=data_path) with open(data_path, "rb") as f: data = pickle.load(f) labels, features = ( torch.from_numpy(data["label"].todense()).long(), torch.from_numpy(data["feature"].todense()).float(), ) num_classes = labels.shape[1] labels = labels.nonzero()[:, 1] if remove_self_loop: num_nodes = data["label"].shape[0] data["PAP"] = sparse.csr_matrix(data["PAP"] - np.eye(num_nodes)) data["PLP"] = sparse.csr_matrix(data["PLP"] - np.eye(num_nodes)) # Adjacency matrices for meta path based neighbors # (Mufei): I verified both of them are binary adjacency matrices with self loops author_g = dgl.from_scipy(data["PAP"]) subject_g = dgl.from_scipy(data["PLP"]) gs = [author_g, subject_g] train_idx = torch.from_numpy(data["train_idx"]).long().squeeze(0) val_idx = torch.from_numpy(data["val_idx"]).long().squeeze(0) test_idx = torch.from_numpy(data["test_idx"]).long().squeeze(0) num_nodes = author_g.num_nodes() train_mask = get_binary_mask(num_nodes, train_idx) val_mask = get_binary_mask(num_nodes, val_idx) test_mask = get_binary_mask(num_nodes, test_idx) print("dataset loaded") pprint( { "dataset": "ACM", "train": train_mask.sum().item() / num_nodes, "val": val_mask.sum().item() / num_nodes, "test": test_mask.sum().item() / num_nodes, } ) return ( gs, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, val_mask, test_mask, ) def load_acm_raw(remove_self_loop): assert not remove_self_loop url = "dataset/ACM.mat" data_path = get_download_dir() + "/ACM.mat" download(_get_dgl_url(url), path=data_path) data = sio.loadmat(data_path) p_vs_l = data["PvsL"] # paper-field? p_vs_a = data["PvsA"] # paper-author p_vs_t = data["PvsT"] # paper-term, bag of words p_vs_c = data["PvsC"] # paper-conference, labels come from that # We assign # (1) KDD papers as class 0 (data mining), # (2) SIGMOD and VLDB papers as class 1 (database), # (3) SIGCOMM and MOBICOMM papers as class 2 (communication) conf_ids = [0, 1, 9, 10, 13] label_ids = [0, 1, 2, 2, 1] p_vs_c_filter = p_vs_c[:, conf_ids] p_selected = (p_vs_c_filter.sum(1) != 0).A1.nonzero()[0] p_vs_l = p_vs_l[p_selected] p_vs_a = p_vs_a[p_selected] p_vs_t = p_vs_t[p_selected] p_vs_c = p_vs_c[p_selected] hg = dgl.heterograph( { ("paper", "pa", "author"): p_vs_a.nonzero(), ("author", "ap", "paper"): p_vs_a.transpose().nonzero(), ("paper", "pf", "field"): p_vs_l.nonzero(), ("field", "fp", "paper"): p_vs_l.transpose().nonzero(), } ) features = torch.FloatTensor(p_vs_t.toarray()) pc_p, pc_c = p_vs_c.nonzero() labels = np.zeros(len(p_selected), dtype=np.int64) for conf_id, label_id in zip(conf_ids, label_ids): labels[pc_p[pc_c == conf_id]] = label_id labels = torch.LongTensor(labels) num_classes = 3 float_mask = np.zeros(len(pc_p)) for conf_id in conf_ids: pc_c_mask = pc_c == conf_id float_mask[pc_c_mask] = np.random.permutation( np.linspace(0, 1, pc_c_mask.sum()) ) train_idx = np.where(float_mask <= 0.2)[0] val_idx = np.where((float_mask > 0.2) & (float_mask <= 0.3))[0] test_idx = np.where(float_mask > 0.3)[0] num_nodes = hg.num_nodes("paper") train_mask = get_binary_mask(num_nodes, train_idx) val_mask = get_binary_mask(num_nodes, val_idx) test_mask = get_binary_mask(num_nodes, test_idx) return ( hg, features, labels, num_classes, train_idx, val_idx, test_idx, train_mask, val_mask, test_mask, ) def load_data(dataset, remove_self_loop=False): if dataset == "ACM": return load_acm(remove_self_loop) elif dataset == "ACMRaw": return load_acm_raw(remove_self_loop) else: return NotImplementedError("Unsupported dataset {}".format(dataset)) class EarlyStopping(object): def __init__(self, patience=10): dt = datetime.datetime.now() self.filename = "early_stop_{}_{:02d}-{:02d}-{:02d}.pth".format( dt.date(), dt.hour, dt.minute, dt.second ) self.patience = patience self.counter = 0 self.best_acc = None self.best_loss = None self.early_stop = False def step(self, loss, acc, model): if self.best_loss is None: self.best_acc = acc self.best_loss = loss self.save_checkpoint(model) elif (loss > self.best_loss) and (acc < self.best_acc): self.counter += 1 print( f"EarlyStopping counter: {self.counter} out of {self.patience}" ) if self.counter >= self.patience: self.early_stop = True else: if (loss <= self.best_loss) and (acc >= self.best_acc): self.save_checkpoint(model) self.best_loss = np.min((loss, self.best_loss)) self.best_acc = np.max((acc, self.best_acc)) self.counter = 0 return self.early_stop def save_checkpoint(self, model): """Saves model when validation loss decreases.""" torch.save(model.state_dict(), self.filename) def load_checkpoint(self, model): """Load the latest checkpoint.""" model.load_state_dict(torch.load(self.filename, weights_only=False)) ================================================ FILE: examples/pytorch/hardgat/README.md ================================================ # HardGAT ## DGL Implementation of h/cGAO paper. This DGL example implements the GNN model proposed in the paper [HardGraphAttention](https://arxiv.org/abs/1907.04652.pdf). HardGANet implementor ---------------------- This example was implemented by [Ericcsr](https://github.com/Ericcsr) during his Internship work at the AWS Shanghai AI Lab. The graph dataset used in this example --------------------------------------- The DGL's built-in CoraGraphDataset. Dataset summary: - NumNodes: 2708 - NumEdges: 10556 - NumFeats: 1433 - NumClasses: 7 - NumTrainingSamples: 140 - NumValidationSamples: 500 - NumTestSamples: 1000 The DGL's build-in CiteseerGraphDataset. Dataset Summary: - NumNodes: 3327 - NumEdges: 9228 - NumFeats: 3703 - NumClasses: 6 - NumTrainingSamples: 120 - NumValidationSamples: 500 - NumTestSamples: 1000 The DGL's build-in PubmedGraphDataset. Dataset Summary: - NumNodes: 19717 - NumEdges: 88651 - NumFeats: 500 - NumClasses: 3 - NumTrainingSamples: 60 - NumValidationSamples: 500 - NumTestSamples: 1000 How to run example files -------------------------------- In the hgao folder, run **Please use `train.py`** ```python python train.py --dataset=cora ``` If want to use a GPU, run ```python python train.py --gpu 0 --dataset=citeseer ``` If you want to use more Graph Hard Attention Modules ```python python train.py --num-layers --dataset=pubmed ``` If you want to change the hard attention threshold k ```python python train.py --k --dataset=cora ``` If you want to test with vanillia GAT ```python python train.py --model --dataset=cora ``` Performance ------------------------- | Models/Datasets | Cora | Citeseer | Pubmed | | :-------------- | :--: | :------: | -----: | | GAT in DGL | 81.5% | 70.1% | 77.7% | | HardGAT | 81.8% | 70.2% |78.0%| Notice that HardGAT Simply replace GATConv with hGAO mentioned in paper. ================================================ FILE: examples/pytorch/hardgat/hgao.py ================================================ """ Graph Representation Learning via Hard Attention Networks in DGL using Adam optimization. References ---------- Paper: https://arxiv.org/abs/1907.04652 """ from functools import partial import dgl import dgl.function as fn import torch import torch.nn as nn import torch.nn.functional as F from dgl.base import DGLError from dgl.nn.pytorch import edge_softmax from dgl.nn.pytorch.utils import Identity from dgl.sampling import select_topk class HardGAO(nn.Module): def __init__( self, in_feats, out_feats, num_heads=8, feat_drop=0.0, attn_drop=0.0, negative_slope=0.2, residual=True, activation=F.elu, k=8, ): super(HardGAO, self).__init__() self.num_heads = num_heads self.in_feats = in_feats self.out_feats = out_feats self.k = k self.residual = residual # Initialize Parameters for Additive Attention self.fc = nn.Linear( self.in_feats, self.out_feats * self.num_heads, bias=False ) self.attn_l = nn.Parameter( torch.FloatTensor(size=(1, self.num_heads, self.out_feats)) ) self.attn_r = nn.Parameter( torch.FloatTensor(size=(1, self.num_heads, self.out_feats)) ) # Initialize Parameters for Hard Projection self.p = nn.Parameter(torch.FloatTensor(size=(1, in_feats))) # Initialize Dropouts self.feat_drop = nn.Dropout(feat_drop) self.attn_drop = nn.Dropout(attn_drop) self.leaky_relu = nn.LeakyReLU(negative_slope) if self.residual: if self.in_feats == self.out_feats: self.residual_module = Identity() else: self.residual_module = nn.Linear( self.in_feats, self.out_feats * num_heads, bias=False ) self.reset_parameters() self.activation = activation def reset_parameters(self): gain = nn.init.calculate_gain("relu") nn.init.xavier_normal_(self.fc.weight, gain=gain) nn.init.xavier_normal_(self.p, gain=gain) nn.init.xavier_normal_(self.attn_l, gain=gain) nn.init.xavier_normal_(self.attn_r, gain=gain) if self.residual: nn.init.xavier_normal_(self.residual_module.weight, gain=gain) def forward(self, graph, feat, get_attention=False): # Check in degree and generate error if (graph.in_degrees() == 0).any(): raise DGLError( "There are 0-in-degree nodes in the graph, " "output for those nodes will be invalid. " "This is harmful for some applications, " "causing silent performance regression. " "Adding self-loop on the input graph by " "calling `g = dgl.add_self_loop(g)` will resolve " "the issue. Setting ``allow_zero_in_degree`` " "to be `True` when constructing this module will " "suppress the check and let the code run." ) # projection process to get importance vector y graph.ndata["y"] = torch.abs( torch.matmul(self.p, feat.T).view(-1) ) / torch.norm(self.p, p=2) # Use edge message passing function to get the weight from src node graph.apply_edges(fn.copy_u("y", "y")) # Select Top k neighbors subgraph = select_topk(graph.cpu(), self.k, "y").to(graph.device) # Sigmoid as information threshold subgraph.ndata["y"] = torch.sigmoid(subgraph.ndata["y"]) # Using vector matrix elementwise mul for acceleration feat = subgraph.ndata["y"].view(-1, 1) * feat feat = self.feat_drop(feat) h = self.fc(feat).view(-1, self.num_heads, self.out_feats) el = (h * self.attn_l).sum(dim=-1).unsqueeze(-1) er = (h * self.attn_r).sum(dim=-1).unsqueeze(-1) # Assign the value on the subgraph subgraph.srcdata.update({"ft": h, "el": el}) subgraph.dstdata.update({"er": er}) # compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively. subgraph.apply_edges(fn.u_add_v("el", "er", "e")) e = self.leaky_relu(subgraph.edata.pop("e")) # compute softmax subgraph.edata["a"] = self.attn_drop(edge_softmax(subgraph, e)) # message passing subgraph.update_all(fn.u_mul_e("ft", "a", "m"), fn.sum("m", "ft")) rst = subgraph.dstdata["ft"] # activation if self.activation: rst = self.activation(rst) # Residual if self.residual: rst = rst + self.residual_module(feat).view( feat.shape[0], -1, self.out_feats ) if get_attention: return rst, subgraph.edata["a"] else: return rst class HardGAT(nn.Module): def __init__( self, g, num_layers, in_dim, num_hidden, num_classes, heads, activation, feat_drop, attn_drop, negative_slope, residual, k, ): super(HardGAT, self).__init__() self.g = g self.num_layers = num_layers self.gat_layers = nn.ModuleList() self.activation = activation gat_layer = partial(HardGAO, k=k) muls = heads # input projection (no residual) self.gat_layers.append( gat_layer( in_dim, num_hidden, heads[0], feat_drop, attn_drop, negative_slope, False, self.activation, ) ) # hidden layers for l in range(1, num_layers): # due to multi-head, the in_dim = num_hidden * num_heads self.gat_layers.append( gat_layer( num_hidden * muls[l - 1], num_hidden, heads[l], feat_drop, attn_drop, negative_slope, residual, self.activation, ) ) # output projection self.gat_layers.append( gat_layer( num_hidden * muls[-2], num_classes, heads[-1], feat_drop, attn_drop, negative_slope, False, None, ) ) def forward(self, inputs): h = inputs for l in range(self.num_layers): h = self.gat_layers[l](self.g, h).flatten(1) logits = self.gat_layers[-1](self.g, h).mean(1) return logits ================================================ FILE: examples/pytorch/hardgat/train.py ================================================ """ Graph Representation Learning via Hard Attention Networks in DGL using Adam optimization. References ---------- Paper: https://arxiv.org/abs/1907.04652 """ import argparse import time import dgl import numpy as np import torch import torch.nn.functional as F from dgl.data import ( CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset, register_data_args, ) from hgao import HardGAT from utils import EarlyStopping def accuracy(logits, labels): _, indices = torch.max(logits, dim=1) correct = torch.sum(indices == labels) return correct.item() * 1.0 / len(labels) def evaluate(model, features, labels, mask): model.eval() with torch.no_grad(): logits = model(features) logits = logits[mask] labels = labels[mask] return accuracy(logits, labels) def main(args): # load and preprocess dataset if args.dataset == "cora": data = CoraGraphDataset() elif args.dataset == "citeseer": data = CiteseerGraphDataset() elif args.dataset == "pubmed": data = PubmedGraphDataset() else: raise ValueError("Unknown dataset: {}".format(args.dataset)) if args.num_layers <= 0: raise ValueError("num layer must be positive int") g = data[0] if args.gpu < 0: cuda = False else: cuda = True g = g.to(args.gpu) features = g.ndata["feat"] labels = g.ndata["label"] train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] num_feats = features.shape[1] n_classes = data.num_classes n_edges = g.num_edges() print( """----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % ( n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item(), ) ) # add self loop g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) n_edges = g.num_edges() # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = HardGAT( g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, F.elu, args.in_drop, args.attn_drop, args.negative_slope, args.residual, args.k, ) print(model) if args.early_stop: stopper = EarlyStopping(patience=100) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.weight_decay ) # initialize graph mean = 0 for epoch in range(args.epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: mean = (mean * (epoch - 3) + (time.time() - t0)) / (epoch - 2) train_acc = accuracy(logits[train_mask], labels[train_mask]) if args.fastmode: val_acc = accuracy(logits[val_mask], labels[val_mask]) else: val_acc = evaluate(model, features, labels, val_mask) if args.early_stop: if stopper.step(val_acc, model): break print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |" " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format( epoch, mean, loss.item(), train_acc, val_acc, n_edges / mean / 1000, ) ) print() if args.early_stop: model.load_state_dict( torch.load("es_checkpoint.pt", weights_only=False) ) acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc)) if __name__ == "__main__": parser = argparse.ArgumentParser(description="GAT") register_data_args(parser) parser.add_argument( "--gpu", type=int, default=-1, help="which GPU to use. Set -1 to use CPU.", ) parser.add_argument( "--epochs", type=int, default=200, help="number of training epochs" ) parser.add_argument( "--num-heads", type=int, default=8, help="number of hidden attention heads", ) parser.add_argument( "--num-out-heads", type=int, default=1, help="number of output attention heads", ) parser.add_argument( "--num-layers", type=int, default=1, help="number of hidden layers" ) parser.add_argument( "--num-hidden", type=int, default=8, help="number of hidden units" ) parser.add_argument( "--residual", action="store_true", default=False, help="use residual connection", ) parser.add_argument( "--in-drop", type=float, default=0.6, help="input feature dropout" ) parser.add_argument( "--attn-drop", type=float, default=0.6, help="attention dropout" ) parser.add_argument("--lr", type=float, default=0.01, help="learning rate") parser.add_argument( "--weight-decay", type=float, default=5e-4, help="weight decay" ) parser.add_argument( "--negative-slope", type=float, default=0.2, help="the negative slope of leaky relu", ) parser.add_argument( "--early-stop", action="store_true", default=False, help="indicates whether to use early stop or not", ) parser.add_argument( "--fastmode", action="store_true", default=False, help="skip re-evaluate the validation set", ) parser.add_argument( "--k", type=int, default=8, help="top k neighor for attention calculation", ) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/pytorch/hardgat/utils.py ================================================ """ Graph Representation Learning via Hard Attention Networks in DGL using Adam optimization. References ---------- Paper: https://arxiv.org/abs/1907.04652 """ import numpy as np import torch import torch.nn as nn class EarlyStopping: def __init__(self, patience=10): self.patience = patience self.counter = 0 self.best_score = None self.early_stop = False def step(self, acc, model): score = acc if self.best_score is None: self.best_score = score self.save_checkpoint(model) elif score < self.best_score: self.counter += 1 print( f"EarlyStopping counter: {self.counter} out of {self.patience}" ) if self.counter >= self.patience: self.early_stop = True else: self.best_score = score self.save_checkpoint(model) self.counter = 0 return self.early_stop def save_checkpoint(self, model): """Saves model when validation loss decrease.""" torch.save(model.state_dict(), "es_checkpoint.pt") ================================================ FILE: examples/pytorch/hgp_sl/README.md ================================================ # DGL Implementation of the HGP-SL Paper This DGL example implements the GNN model proposed in the paper [Hierarchical Graph Pooling with Structure Learning](https://arxiv.org/pdf/1911.05954.pdf). The author's codes of implementation is in [here](https://github.com/cszhangzhen/HGP-SL) Example implementor ---------------------- This example was implemented by [Tianqi Zhang](https://github.com/lygztq) during his Applied Scientist Intern work at the AWS Shanghai AI Lab. The graph dataset used in this example --------------------------------------- The DGL's built-in [LegacyTUDataset](https://docs.dgl.ai/api/python/dgl.data.html?highlight=tudataset#dgl.data.LegacyTUDataset). This is a serial of graph kernel datasets for graph classification. We use 'DD', 'PROTEINS', 'NCI1', 'NCI109', 'Mutagenicity' and 'ENZYMES' in this HGP-SL implementation. All these datasets are randomly splited to train, validation and test set with ratio 0.8, 0.1 and 0.1. NOTE: Since there is no data attributes in some of these datasets, we use node_id (in one-hot vector whose length is the max number of nodes across all graphs) as the node feature. Also note that the node_id in some datasets is not unique (e.g. a graph may has two nodes with the same id). | | DD | PROTEINS | NCI1 | NCI109 | Mutagenicity | ENZYMES | | ---------------- | ------ | -------- | ----- | ------ | ------------ | ------- | | NumGraphs | 1178 | 1113 | 4110 | 4127 | 4337 | 600 | | AvgNodesPerGraph | 284.32 | 39.06 | 29.87 | 29.68 | 30.32 | 32.63 | | AvgEdgesPerGraph | 715.66 | 72.82 | 32.30 | 32.13 | 30.77 | 62.14 | | NumFeats | 89 | 1 | 37 | 38 | 14 | 18 | | NumClasses | 2 | 2 | 2 | 2 | 2 | 6 | How to run example files -------------------------------- In the HGP-SL-DGL folder, run ```bash python main.py --dataset ${your_dataset_name_here} [hyper-parameters] ``` If want to use a GPU, run ```bash python main.py --device ${your_device_id_here} --dataset ${your_dataset_name_here} [hyper-parameters] ``` For example, to perform experiments on DD dataset on GPU, run: ```bash python main.py --device 0 --dataset DD --lr 0.0001 --batch_size 64 --pool_ratio 0.3 --dropout 0.5 --conv_layers 2 ``` NOTE: Be careful when modifying `batch_size` and `pool_ratio` for large dataset like DD. Too large batch size or pooling ratio may cause out-of-memory and other severe errors. You can find the detailed hyper-parameter settings below (in the Performance section). Performance ------------------------- **Hyper-parameters** This part is directly from [author's implementation](https://github.com/cszhangzhen/HGP-SL) | Datasets | lr | weight_decay | batch_size | pool_ratio | dropout | net_layers | | ------------- | --------- | -------------- | --------------- | -------------- | -------- | ---------- | | PROTEINS | 0.001 | 0.001 | 512 | 0.5 | 0.0 | 3 | | Mutagenicity | 0.001 | 0.001 | 512 | 0.8 | 0.0 | 3 | | NCI109 | 0.001 | 0.001 | 512 | 0.8 | 0.0 | 3 | | NCI1 | 0.001 | 0.001 | 512 | 0.8 | 0.0 | 3 | | DD | 0.0001 | 0.001 | 64 | 0.3 | 0.5 | 2 | | ENZYMES | 0.001 | 0.001 | 128 | 0.8 | 0.0 | 2 | **Accuracy** **NOTE**: We find that there is a gap between accuracy obtained via author's code and the one reported in the [paper]((https://arxiv.org/pdf/1911.05954.pdf)). An issue has been proposed in the author's repo (see [here](https://github.com/cszhangzhen/HGP-SL/issues/8)). | | Mutagenicity | NCI109 | NCI1 | DD | | -------------------------- | ------------ | ----------- | ----------- | ----------- | | Reported in Paper | 82.15(0.58) | 80.67(1.16) | 78.45(0.77) | 80.96(1.26) | | Author's Code (full graph) | 78.44(2.10) | 74.44(2.05) | 77.37(2.09) | OOM | | Author's Code (sample) | 79.68(1.68) | 73.86(1.72) | 76.29(2.14) | 75.46(3.86) | | DGL (full graph) | 79.52(2.21) | 74.86(1.99) | 74.62(2.22) | OOM | | DGL (sample) | 79.15(1.62) | 75.39(1.86) | 73.77(2.04) | 76.47(2.14) | **Speed** Device: Tesla V100-SXM2 16GB In seconds | | DD(batchsize=64), large graph | Mutagenicity(batchsize=512), small graph | | ----------------------------- | ----------------------------- | ---------------------------------------- | | Author's code (sample) | 9.96 | 12.91 | | Author's code (full graph) | OOM | 13.03 | | DGL (sample) | 9.50 | 3.59 | | DGL (full graph) | OOM | 3.56 | ================================================ FILE: examples/pytorch/hgp_sl/functions.py ================================================ """ An original implementation of sparsemax (Martins & Astudillo, 2016) is available at https://github.com/OpenNMT/OpenNMT-py/blob/master/onmt/modules/sparse_activations.py. See `From Softmax to Sparsemax: A Sparse Model of Attention and Multi-Label Classification, ICML 2016` for detailed description. Here we implement a graph-edge version of sparsemax where we perform sparsemax for all edges with the same node as end-node in graphs. """ import dgl import torch from dgl._sparse_ops import _gsddmm, _gspmm from dgl.backend import astype from dgl.base import ALL, is_all from dgl.heterograph_index import HeteroGraphIndex from torch import Tensor from torch.autograd import Function def _neighbor_sort( scores: Tensor, end_n_ids: Tensor, in_degrees: Tensor, cum_in_degrees: Tensor, ): """Sort edge scores for each node""" num_nodes, max_in_degree = in_degrees.size(0), int(in_degrees.max().item()) # Compute the index for dense score matrix with size (N x D_{max}) # Note that the end_n_ids here is the end_node tensor in dgl graph, # which is not grouped by its node id (i.e. in this form: 0,0,1,1,1,...,N,N). # Thus here we first sort the end_node tensor to make it easier to compute # indexs in dense edge score matrix. Since we will need the original order # for following gspmm and gsddmm operations, we also keep the reverse mapping # (the reverse_perm) here. end_n_ids, perm = torch.sort(end_n_ids) scores = scores[perm] _, reverse_perm = torch.sort(perm) index = torch.arange( end_n_ids.size(0), dtype=torch.long, device=scores.device ) index = (index - cum_in_degrees[end_n_ids]) + (end_n_ids * max_in_degree) index = index.long() dense_scores = scores.new_full( (num_nodes * max_in_degree,), torch.finfo(scores.dtype).min ) dense_scores[index] = scores dense_scores = dense_scores.view(num_nodes, max_in_degree) sorted_dense_scores, dense_reverse_perm = dense_scores.sort( dim=-1, descending=True ) _, dense_reverse_perm = torch.sort(dense_reverse_perm, dim=-1) dense_reverse_perm = dense_reverse_perm + cum_in_degrees.view(-1, 1) dense_reverse_perm = dense_reverse_perm.view(-1) cumsum_sorted_dense_scores = sorted_dense_scores.cumsum(dim=-1).view(-1) sorted_dense_scores = sorted_dense_scores.view(-1) arange_vec = torch.arange( 1, max_in_degree + 1, dtype=torch.long, device=end_n_ids.device ) arange_vec = torch.repeat_interleave( arange_vec.view(1, -1), num_nodes, dim=0 ).view(-1) valid_mask = sorted_dense_scores != torch.finfo(scores.dtype).min sorted_scores = sorted_dense_scores[valid_mask] cumsum_sorted_scores = cumsum_sorted_dense_scores[valid_mask] arange_vec = arange_vec[valid_mask] dense_reverse_perm = dense_reverse_perm[valid_mask].long() return ( sorted_scores, cumsum_sorted_scores, arange_vec, reverse_perm, dense_reverse_perm, ) def _threshold_and_support_graph( gidx: HeteroGraphIndex, scores: Tensor, end_n_ids: Tensor ): """Find the threshold for each node and its edges""" in_degrees = _gspmm(gidx, "copy_rhs", "sum", None, torch.ones_like(scores))[ 0 ] cum_in_degrees = torch.cat( [in_degrees.new_zeros(1), in_degrees.cumsum(dim=0)[:-1]], dim=0 ) # perform sort on edges for each node ( sorted_scores, cumsum_scores, rhos, reverse_perm, dense_reverse_perm, ) = _neighbor_sort(scores, end_n_ids, in_degrees, cum_in_degrees) cumsum_scores = cumsum_scores - 1.0 support = rhos * sorted_scores > cumsum_scores support = support[dense_reverse_perm] # from sorted order to unsorted order support = support[reverse_perm] # from src-dst order to eid order support_size = _gspmm(gidx, "copy_rhs", "sum", None, support.float())[0] support_size = support_size.long() idx = support_size + cum_in_degrees - 1 # mask invalid index, for example, if batch is not start from 0 or not continuous, it may result in negative index mask = idx < 0 idx[mask] = 0 tau = cumsum_scores.gather(0, idx.long()) tau /= support_size.to(scores.dtype) return tau, support_size class EdgeSparsemaxFunction(Function): r""" Description ----------- Pytorch Auto-Grad Function for edge sparsemax. We define this auto-grad function here since sparsemax involves sort and select, which are not derivative. """ @staticmethod def forward( ctx, gidx: HeteroGraphIndex, scores: Tensor, eids: Tensor, end_n_ids: Tensor, norm_by: str, ): if not is_all(eids): gidx = gidx.edge_subgraph([eids], True).graph if norm_by == "src": gidx = gidx.reverse() # use feat - max(feat) for numerical stability. scores = scores.float() scores_max = _gspmm(gidx, "copy_rhs", "max", None, scores)[0] scores = _gsddmm(gidx, "sub", scores, scores_max, "e", "v") # find threshold for each node and perform ReLU(u-t(u)) operation. tau, supp_size = _threshold_and_support_graph(gidx, scores, end_n_ids) out = torch.clamp(_gsddmm(gidx, "sub", scores, tau, "e", "v"), min=0) ctx.backward_cache = gidx ctx.save_for_backward(supp_size, out) torch.cuda.empty_cache() return out @staticmethod def backward(ctx, grad_out): gidx = ctx.backward_cache supp_size, out = ctx.saved_tensors grad_in = grad_out.clone() # grad for ReLU grad_in[out == 0] = 0 # dL/dv_i = dL/do_i - 1/k \sum_{j=1}^k dL/do_j v_hat = _gspmm(gidx, "copy_rhs", "sum", None, grad_in)[ 0 ] / supp_size.to(out.dtype) grad_in_modify = _gsddmm(gidx, "sub", grad_in, v_hat, "e", "v") grad_in = torch.where(out != 0, grad_in_modify, grad_in) del gidx torch.cuda.empty_cache() return None, grad_in, None, None, None def edge_sparsemax(graph: dgl.DGLGraph, logits, eids=ALL, norm_by="dst"): r""" Description ----------- Compute edge sparsemax. For a node :math:`i`, edge sparsemax is an operation that computes .. math:: a_{ij} = \text{ReLU}(z_{ij} - \tau(\z_{i,:})) where :math:`z_{ij}` is a signal of edge :math:`j\rightarrow i`, also called logits in the context of sparsemax. :math:`\tau` is a function that can be found at the `From Softmax to Sparsemax ` paper. NOTE: currently only homogeneous graphs are supported. Parameters ---------- graph : DGLGraph The graph to perform edge sparsemax on. logits : torch.Tensor The input edge feature. eids : torch.Tensor or ALL, optional A tensor of edge index on which to apply edge sparsemax. If ALL, apply edge sparsemax on all edges in the graph. Default: ALL. norm_by : str, could be 'src' or 'dst' Normalized by source nodes of destination nodes. Default: `dst`. Returns ------- Tensor Sparsemax value. """ # we get edge index tensors here since it is # hard to get edge index with HeteroGraphIndex # object without other information like edge_type. row, col = graph.all_edges(order="eid") assert norm_by in ["dst", "src"] end_n_ids = col if norm_by == "dst" else row if not is_all(eids): eids = astype(eids, graph.idtype) end_n_ids = end_n_ids[eids] return EdgeSparsemaxFunction.apply( graph._graph, logits, eids, end_n_ids, norm_by ) class EdgeSparsemax(torch.nn.Module): r""" Description ----------- Compute edge sparsemax. For a node :math:`i`, edge sparsemax is an operation that computes .. math:: a_{ij} = \text{ReLU}(z_{ij} - \tau(\z_{i,:})) where :math:`z_{ij}` is a signal of edge :math:`j\rightarrow i`, also called logits in the context of sparsemax. :math:`\tau` is a function that can be found at the `From Softmax to Sparsemax ` paper. Parameters ---------- graph : DGLGraph The graph to perform edge sparsemax on. logits : torch.Tensor The input edge feature. eids : torch.Tensor or ALL, optional A tensor of edge index on which to apply edge sparsemax. If ALL, apply edge sparsemax on all edges in the graph. Default: ALL. norm_by : str, could be 'src' or 'dst' Normalized by source nodes of destination nodes. Default: `dst`. NOTE: currently only homogeneous graphs are supported. Returns ------- Tensor Sparsemax value. """ def __init__(self): super(EdgeSparsemax, self).__init__() def forward(self, graph, logits, eids=ALL, norm_by="dst"): return edge_sparsemax(graph, logits, eids, norm_by) ================================================ FILE: examples/pytorch/hgp_sl/layers.py ================================================ import dgl import dgl.function as fn import scipy.sparse import torch import torch.nn as nn import torch.nn.functional as F from dgl import DGLGraph from dgl.nn import AvgPooling, GraphConv, MaxPooling from dgl.ops import edge_softmax from functions import edge_sparsemax from torch import Tensor from torch.nn import Parameter from utils import get_batch_id, topk class WeightedGraphConv(GraphConv): r""" Description ----------- GraphConv with edge weights on homogeneous graphs. If edge weights are not given, directly call GraphConv instead. Parameters ---------- graph : DGLGraph The graph to perform this operation. n_feat : torch.Tensor The node features e_feat : torch.Tensor, optional The edge features. Default: :obj:`None` """ def forward(self, graph: DGLGraph, n_feat, e_feat=None): if e_feat is None: return super(WeightedGraphConv, self).forward(graph, n_feat) with graph.local_scope(): if self.weight is not None: n_feat = torch.matmul(n_feat, self.weight) src_norm = torch.pow(graph.out_degrees().float().clamp(min=1), -0.5) src_norm = src_norm.view(-1, 1) dst_norm = torch.pow(graph.in_degrees().float().clamp(min=1), -0.5) dst_norm = dst_norm.view(-1, 1) n_feat = n_feat * src_norm graph.ndata["h"] = n_feat graph.edata["e"] = e_feat graph.update_all(fn.u_mul_e("h", "e", "m"), fn.sum("m", "h")) n_feat = graph.ndata.pop("h") n_feat = n_feat * dst_norm if self.bias is not None: n_feat = n_feat + self.bias if self._activation is not None: n_feat = self._activation(n_feat) return n_feat class NodeInfoScoreLayer(nn.Module): r""" Description ----------- Compute a score for each node for sort-pooling. The score of each node is computed via the absolute difference of its first-order random walk result and its features. Arguments --------- sym_norm : bool, optional If true, use symmetric norm for adjacency. Default: :obj:`True` Parameters ---------- graph : DGLGraph The graph to perform this operation. feat : torch.Tensor The node features e_feat : torch.Tensor, optional The edge features. Default: :obj:`None` Returns ------- Tensor Score for each node. """ def __init__(self, sym_norm: bool = True): super(NodeInfoScoreLayer, self).__init__() self.sym_norm = sym_norm def forward(self, graph: dgl.DGLGraph, feat: Tensor, e_feat: Tensor): with graph.local_scope(): if self.sym_norm: src_norm = torch.pow( graph.out_degrees().float().clamp(min=1), -0.5 ) src_norm = src_norm.view(-1, 1).to(feat.device) dst_norm = torch.pow( graph.in_degrees().float().clamp(min=1), -0.5 ) dst_norm = dst_norm.view(-1, 1).to(feat.device) src_feat = feat * src_norm graph.ndata["h"] = src_feat graph.edata["e"] = e_feat graph = dgl.remove_self_loop(graph) graph.update_all(fn.u_mul_e("h", "e", "m"), fn.sum("m", "h")) dst_feat = graph.ndata.pop("h") * dst_norm feat = feat - dst_feat else: dst_norm = 1.0 / graph.in_degrees().float().clamp(min=1) dst_norm = dst_norm.view(-1, 1) graph.ndata["h"] = feat graph.edata["e"] = e_feat graph = dgl.remove_self_loop(graph) graph.update_all(fn.u_mul_e("h", "e", "m"), fn.sum("m", "h")) feat = feat - dst_norm * graph.ndata.pop("h") score = torch.sum(torch.abs(feat), dim=1) return score class HGPSLPool(nn.Module): r""" Description ----------- The HGP-SL pooling layer from `Hierarchical Graph Pooling with Structure Learning ` Parameters ---------- in_feat : int The number of input node feature's channels ratio : float, optional Pooling ratio. Default: 0.8 sample : bool, optional Whether use k-hop union graph to increase efficiency. Currently we only support full graph. Default: :obj:`False` sym_score_norm : bool, optional Use symmetric norm for adjacency or not. Default: :obj:`True` sparse : bool, optional Use edge sparsemax instead of edge softmax. Default: :obj:`True` sl : bool, optional Use structure learining module or not. Default: :obj:`True` lamb : float, optional The lambda parameter as weight of raw adjacency as described in the HGP-SL paper. Default: 1.0 negative_slop : float, optional Negative slop for leaky_relu. Default: 0.2 Returns ------- DGLGraph The pooled graph. torch.Tensor Node features torch.Tensor Edge features torch.Tensor Permutation index """ def __init__( self, in_feat: int, ratio=0.8, sample=True, sym_score_norm=True, sparse=True, sl=True, lamb=1.0, negative_slop=0.2, k_hop=3, ): super(HGPSLPool, self).__init__() self.in_feat = in_feat self.ratio = ratio self.sample = sample self.sparse = sparse self.sl = sl self.lamb = lamb self.negative_slop = negative_slop self.k_hop = k_hop self.att = Parameter(torch.Tensor(1, self.in_feat * 2)) self.calc_info_score = NodeInfoScoreLayer(sym_norm=sym_score_norm) self.reset_parameters() def reset_parameters(self): nn.init.xavier_normal_(self.att.data) def forward(self, graph: DGLGraph, feat: Tensor, e_feat=None): # top-k pool first if e_feat is None: e_feat = torch.ones( (graph.num_edges(),), dtype=feat.dtype, device=feat.device ) batch_num_nodes = graph.batch_num_nodes() x_score = self.calc_info_score(graph, feat, e_feat) perm, next_batch_num_nodes = topk( x_score, self.ratio, get_batch_id(batch_num_nodes), batch_num_nodes ) feat = feat[perm] pool_graph = None if not self.sample or not self.sl: # pool graph graph.edata["e"] = e_feat pool_graph = dgl.node_subgraph(graph, perm) e_feat = pool_graph.edata.pop("e") pool_graph.set_batch_num_nodes(next_batch_num_nodes) # no structure learning layer, directly return. if not self.sl: return pool_graph, feat, e_feat, perm # Structure Learning if self.sample: # A fast mode for large graphs. # In large graphs, learning the possible edge weights between each # pair of nodes is time consuming. To accelerate this process, # we sample it's K-Hop neighbors for each node and then learn the # edge weights between them. # first build multi-hop graph row, col = graph.all_edges() num_nodes = graph.num_nodes() scipy_adj = scipy.sparse.coo_matrix( ( e_feat.detach().cpu(), (row.detach().cpu(), col.detach().cpu()), ), shape=(num_nodes, num_nodes), ) for _ in range(self.k_hop): two_hop = scipy_adj**2 two_hop = two_hop * (1e-5 / two_hop.max()) scipy_adj = two_hop + scipy_adj row, col = scipy_adj.nonzero() row = torch.tensor(row, dtype=torch.long, device=graph.device) col = torch.tensor(col, dtype=torch.long, device=graph.device) e_feat = torch.tensor( scipy_adj.data, dtype=torch.float, device=feat.device ) # perform pooling on multi-hop graph mask = perm.new_full((num_nodes,), -1) i = torch.arange(perm.size(0), dtype=torch.long, device=perm.device) mask[perm] = i row, col = mask[row], mask[col] mask = (row >= 0) & (col >= 0) row, col = row[mask], col[mask] e_feat = e_feat[mask] # add remaining self loops mask = row != col num_nodes = perm.size(0) # num nodes after pool loop_index = torch.arange( 0, num_nodes, dtype=row.dtype, device=row.device ) inv_mask = ~mask loop_weight = torch.full( (num_nodes,), 0, dtype=e_feat.dtype, device=e_feat.device ) remaining_e_feat = e_feat[inv_mask] if remaining_e_feat.numel() > 0: loop_weight[row[inv_mask]] = remaining_e_feat e_feat = torch.cat([e_feat[mask], loop_weight], dim=0) row, col = row[mask], col[mask] row = torch.cat([row, loop_index], dim=0) col = torch.cat([col, loop_index], dim=0) # attention scores weights = (torch.cat([feat[row], feat[col]], dim=1) * self.att).sum( dim=-1 ) weights = ( F.leaky_relu(weights, self.negative_slop) + e_feat * self.lamb ) # sl and normalization sl_graph = dgl.graph((row, col)) if self.sparse: weights = edge_sparsemax(sl_graph, weights) else: weights = edge_softmax(sl_graph, weights) # get final graph mask = torch.abs(weights) > 0 row, col, weights = row[mask], col[mask], weights[mask] pool_graph = dgl.graph((row, col)) pool_graph.set_batch_num_nodes(next_batch_num_nodes) e_feat = weights else: # Learning the possible edge weights between each pair of # nodes in the pooled subgraph, relative slower. # construct complete graphs for all graph in the batch # use dense to build, then transform to sparse. # maybe there's more efficient way? batch_num_nodes = next_batch_num_nodes block_begin_idx = torch.cat( [ batch_num_nodes.new_zeros(1), batch_num_nodes.cumsum(dim=0)[:-1], ], dim=0, ) block_end_idx = batch_num_nodes.cumsum(dim=0) dense_adj = torch.zeros( (pool_graph.num_nodes(), pool_graph.num_nodes()), dtype=torch.float, device=feat.device, ) for idx_b, idx_e in zip(block_begin_idx, block_end_idx): dense_adj[idx_b:idx_e, idx_b:idx_e] = 1.0 row, col = torch.nonzero(dense_adj).t().contiguous() # compute weights for node-pairs weights = (torch.cat([feat[row], feat[col]], dim=1) * self.att).sum( dim=-1 ) weights = F.leaky_relu(weights, self.negative_slop) dense_adj[row, col] = weights # add pooled graph structure to weight matrix pool_row, pool_col = pool_graph.all_edges() dense_adj[pool_row, pool_col] += self.lamb * e_feat weights = dense_adj[row, col] del dense_adj torch.cuda.empty_cache() # edge softmax/sparsemax complete_graph = dgl.graph((row, col)) if self.sparse: weights = edge_sparsemax(complete_graph, weights) else: weights = edge_softmax(complete_graph, weights) # get new e_feat and graph structure, clean up. mask = torch.abs(weights) > 1e-9 row, col, weights = row[mask], col[mask], weights[mask] e_feat = weights pool_graph = dgl.graph((row, col)) pool_graph.set_batch_num_nodes(next_batch_num_nodes) return pool_graph, feat, e_feat, perm class ConvPoolReadout(torch.nn.Module): """A helper class. (GraphConv -> Pooling -> Readout)""" def __init__( self, in_feat: int, out_feat: int, pool_ratio=0.8, sample: bool = False, sparse: bool = True, sl: bool = True, lamb: float = 1.0, pool: bool = True, ): super(ConvPoolReadout, self).__init__() self.use_pool = pool self.conv = WeightedGraphConv(in_feat, out_feat) if pool: self.pool = HGPSLPool( out_feat, ratio=pool_ratio, sparse=sparse, sample=sample, sl=sl, lamb=lamb, ) else: self.pool = None self.avgpool = AvgPooling() self.maxpool = MaxPooling() def forward(self, graph, feature, e_feat=None): out = F.relu(self.conv(graph, feature, e_feat)) if self.use_pool: graph, out, e_feat, _ = self.pool(graph, out, e_feat) readout = torch.cat( [self.avgpool(graph, out), self.maxpool(graph, out)], dim=-1 ) return graph, out, e_feat, readout ================================================ FILE: examples/pytorch/hgp_sl/main.py ================================================ import argparse import json import logging import os from time import time import dgl import torch import torch.nn import torch.nn.functional as F from dgl.data import LegacyTUDataset from dgl.dataloading import GraphDataLoader from networks import HGPSLModel from torch.utils.data import random_split from utils import get_stats def parse_args(): parser = argparse.ArgumentParser(description="HGP-SL-DGL") parser.add_argument( "--dataset", type=str, default="DD", choices=["DD", "PROTEINS", "NCI1", "NCI109", "Mutagenicity", "ENZYMES"], help="DD/PROTEINS/NCI1/NCI109/Mutagenicity/ENZYMES", ) parser.add_argument( "--batch_size", type=int, default=512, help="batch size" ) parser.add_argument( "--sample", type=str, default="true", help="use sample method" ) parser.add_argument("--lr", type=float, default=1e-3, help="learning rate") parser.add_argument( "--weight_decay", type=float, default=1e-3, help="weight decay" ) parser.add_argument( "--pool_ratio", type=float, default=0.5, help="pooling ratio" ) parser.add_argument("--hid_dim", type=int, default=128, help="hidden size") parser.add_argument( "--conv_layers", type=int, default=3, help="number of conv layers" ) parser.add_argument( "--dropout", type=float, default=0.0, help="dropout ratio" ) parser.add_argument( "--lamb", type=float, default=1.0, help="trade-off parameter" ) parser.add_argument( "--epochs", type=int, default=1000, help="max number of training epochs" ) parser.add_argument( "--patience", type=int, default=100, help="patience for early stopping" ) parser.add_argument( "--device", type=int, default=-1, help="device id, -1 for cpu" ) parser.add_argument( "--dataset_path", type=str, default="./dataset", help="path to dataset" ) parser.add_argument( "--print_every", type=int, default=10, help="print trainlog every k epochs, -1 for silent training", ) parser.add_argument( "--num_trials", type=int, default=1, help="number of trials" ) parser.add_argument("--output_path", type=str, default="./output") args = parser.parse_args() # device args.device = "cpu" if args.device == -1 else "cuda:{}".format(args.device) if not torch.cuda.is_available(): logging.warning("CUDA is not available, use CPU for training.") args.device = "cpu" # print every if args.print_every == -1: args.print_every = args.epochs + 1 # bool args if args.sample.lower() == "true": args.sample = True else: args.sample = False # paths if not os.path.exists(args.dataset_path): os.makedirs(args.dataset_path) if not os.path.exists(args.output_path): os.makedirs(args.output_path) name = ( "Data={}_Hidden={}_Pool={}_WeightDecay={}_Lr={}_Sample={}.log".format( args.dataset, args.hid_dim, args.pool_ratio, args.weight_decay, args.lr, args.sample, ) ) args.output_path = os.path.join(args.output_path, name) return args def train(model: torch.nn.Module, optimizer, trainloader, device): model.train() total_loss = 0.0 num_batches = len(trainloader) for batch in trainloader: optimizer.zero_grad() batch_graphs, batch_labels = batch batch_graphs = batch_graphs.to(device) batch_labels = batch_labels.long().to(device) out = model(batch_graphs, batch_graphs.ndata["feat"]) loss = F.nll_loss(out, batch_labels) loss.backward() optimizer.step() total_loss += loss.item() return total_loss / num_batches @torch.no_grad() def test(model: torch.nn.Module, loader, device): model.eval() correct = 0.0 loss = 0.0 num_graphs = 0 for batch in loader: batch_graphs, batch_labels = batch num_graphs += batch_labels.size(0) batch_graphs = batch_graphs.to(device) batch_labels = batch_labels.long().to(device) out = model(batch_graphs, batch_graphs.ndata["feat"]) pred = out.argmax(dim=1) loss += F.nll_loss(out, batch_labels, reduction="sum").item() correct += pred.eq(batch_labels).sum().item() return correct / num_graphs, loss / num_graphs def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # dataset = LegacyTUDataset(args.dataset, raw_dir=args.dataset_path) # add self loop. We add self loop for each graph here since the function "add_self_loop" does not # support batch graph. for i in range(len(dataset)): dataset.graph_lists[i] = dgl.add_self_loop(dataset.graph_lists[i]) num_training = int(len(dataset) * 0.8) num_val = int(len(dataset) * 0.1) num_test = len(dataset) - num_val - num_training train_set, val_set, test_set = random_split( dataset, [num_training, num_val, num_test] ) train_loader = GraphDataLoader( train_set, batch_size=args.batch_size, shuffle=True, num_workers=6 ) val_loader = GraphDataLoader( val_set, batch_size=args.batch_size, num_workers=2 ) test_loader = GraphDataLoader( test_set, batch_size=args.batch_size, num_workers=2 ) device = torch.device(args.device) # Step 2: Create model =================================================================== # num_feature, num_classes, _ = dataset.statistics() model = HGPSLModel( in_feat=num_feature, out_feat=num_classes, hid_feat=args.hid_dim, conv_layers=args.conv_layers, dropout=args.dropout, pool_ratio=args.pool_ratio, lamb=args.lamb, sample=args.sample, ).to(device) args.num_feature = int(num_feature) args.num_classes = int(num_classes) # Step 3: Create training components ===================================================== # optimizer = torch.optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.weight_decay ) # Step 4: training epoches =============================================================== # bad_cound = 0 best_val_loss = float("inf") final_test_acc = 0.0 best_epoch = 0 train_times = [] for e in range(args.epochs): s_time = time() train_loss = train(model, optimizer, train_loader, device) train_times.append(time() - s_time) val_acc, val_loss = test(model, val_loader, device) test_acc, _ = test(model, test_loader, device) if best_val_loss > val_loss: best_val_loss = val_loss final_test_acc = test_acc bad_cound = 0 best_epoch = e + 1 else: bad_cound += 1 if bad_cound >= args.patience: break if (e + 1) % args.print_every == 0: log_format = ( "Epoch {}: loss={:.4f}, val_acc={:.4f}, final_test_acc={:.4f}" ) print(log_format.format(e + 1, train_loss, val_acc, final_test_acc)) print( "Best Epoch {}, final test acc {:.4f}".format( best_epoch, final_test_acc ) ) return final_test_acc, sum(train_times) / len(train_times) if __name__ == "__main__": args = parse_args() res = [] train_times = [] for i in range(args.num_trials): print("Trial {}/{}".format(i + 1, args.num_trials)) acc, train_time = main(args) res.append(acc) train_times.append(train_time) mean, err_bd = get_stats(res, conf_interval=False) print("mean acc: {:.4f}, error bound: {:.4f}".format(mean, err_bd)) out_dict = { "hyper-parameters": vars(args), "result": "{:.4f}(+-{:.4f})".format(mean, err_bd), "train_time": "{:.4f}".format(sum(train_times) / len(train_times)), } with open(args.output_path, "w") as f: json.dump(out_dict, f, sort_keys=True, indent=4) ================================================ FILE: examples/pytorch/hgp_sl/networks.py ================================================ import torch import torch.nn import torch.nn.functional as F from dgl.nn import AvgPooling, MaxPooling from layers import ConvPoolReadout class HGPSLModel(torch.nn.Module): r""" Description ----------- The graph classification model using HGP-SL pooling. Parameters ---------- in_feat : int The number of input node feature's channels. out_feat : int The number of output node feature's channels. hid_feat : int The number of hidden state's channels. dropout : float, optional The dropout rate. Default: 0 pool_ratio : float, optional The pooling ratio for each pooling layer. Default: 0.5 conv_layers : int, optional The number of graph convolution and pooling layers. Default: 3 sample : bool, optional Whether use k-hop union graph to increase efficiency. Currently we only support full graph. Default: :obj:`False` sparse : bool, optional Use edge sparsemax instead of edge softmax. Default: :obj:`True` sl : bool, optional Use structure learining module or not. Default: :obj:`True` lamb : float, optional The lambda parameter as weight of raw adjacency as described in the HGP-SL paper. Default: 1.0 """ def __init__( self, in_feat: int, out_feat: int, hid_feat: int, dropout: float = 0.0, pool_ratio: float = 0.5, conv_layers: int = 3, sample: bool = False, sparse: bool = True, sl: bool = True, lamb: float = 1.0, ): super(HGPSLModel, self).__init__() self.in_feat = in_feat self.out_feat = out_feat self.hid_feat = hid_feat self.dropout = dropout self.num_layers = conv_layers self.pool_ratio = pool_ratio convpools = [] for i in range(conv_layers): c_in = in_feat if i == 0 else hid_feat c_out = hid_feat use_pool = i != conv_layers - 1 convpools.append( ConvPoolReadout( c_in, c_out, pool_ratio=pool_ratio, sample=sample, sparse=sparse, sl=sl, lamb=lamb, pool=use_pool, ) ) self.convpool_layers = torch.nn.ModuleList(convpools) self.lin1 = torch.nn.Linear(hid_feat * 2, hid_feat) self.lin2 = torch.nn.Linear(hid_feat, hid_feat // 2) self.lin3 = torch.nn.Linear(hid_feat // 2, self.out_feat) def forward(self, graph, n_feat): final_readout = None e_feat = None for i in range(self.num_layers): graph, n_feat, e_feat, readout = self.convpool_layers[i]( graph, n_feat, e_feat ) if final_readout is None: final_readout = readout else: final_readout = final_readout + readout n_feat = F.relu(self.lin1(final_readout)) n_feat = F.dropout(n_feat, p=self.dropout, training=self.training) n_feat = F.relu(self.lin2(n_feat)) n_feat = F.dropout(n_feat, p=self.dropout, training=self.training) n_feat = self.lin3(n_feat) return F.log_softmax(n_feat, dim=-1) ================================================ FILE: examples/pytorch/hgp_sl/utils.py ================================================ import logging import math import torch from scipy.stats import t def get_stats( array, conf_interval=False, name=None, stdout=False, logout=False ): """Compute mean and standard deviation from an numerical array Args: array (array like obj): The numerical array, this array can be convert to :obj:`torch.Tensor`. conf_interval (bool, optional): If True, compute the confidence interval bound (95%) instead of the std value. (default: :obj:`False`) name (str, optional): The name of this numerical array, for log usage. (default: :obj:`None`) stdout (bool, optional): Whether to output result to the terminal. (default: :obj:`False`) logout (bool, optional): Whether to output result via logging module. (default: :obj:`False`) """ eps = 1e-9 array = torch.Tensor(array) std, mean = torch.std_mean(array) std = std.item() mean = mean.item() center = mean if conf_interval: n = array.size(0) se = std / (math.sqrt(n) + eps) t_value = t.ppf(0.975, df=n - 1) err_bound = t_value * se else: err_bound = std # log and print if name is None: name = "array {}".format(id(array)) log = "{}: {:.4f}(+-{:.4f})".format(name, center, err_bound) if stdout: print(log) if logout: logging.info(log) return center, err_bound def get_batch_id(num_nodes: torch.Tensor): """Convert the num_nodes array obtained from batch graph to batch_id array for each node. Args: num_nodes (torch.Tensor): The tensor whose element is the number of nodes in each graph in the batch graph. """ batch_size = num_nodes.size(0) batch_ids = [] for i in range(batch_size): item = torch.full( (num_nodes[i],), i, dtype=torch.long, device=num_nodes.device ) batch_ids.append(item) return torch.cat(batch_ids) def topk( x: torch.Tensor, ratio: float, batch_id: torch.Tensor, num_nodes: torch.Tensor, ): """The top-k pooling method. Given a graph batch, this method will pool out some nodes from input node feature tensor for each graph according to the given ratio. Args: x (torch.Tensor): The input node feature batch-tensor to be pooled. ratio (float): the pool ratio. For example if :obj:`ratio=0.5` then half of the input tensor will be pooled out. batch_id (torch.Tensor): The batch_id of each element in the input tensor. num_nodes (torch.Tensor): The number of nodes of each graph in batch. Returns: perm (torch.Tensor): The index in batch to be kept. k (torch.Tensor): The remaining number of nodes for each graph. """ batch_size, max_num_nodes = num_nodes.size(0), num_nodes.max().item() cum_num_nodes = torch.cat( [num_nodes.new_zeros(1), num_nodes.cumsum(dim=0)[:-1]], dim=0 ) index = torch.arange(batch_id.size(0), dtype=torch.long, device=x.device) index = (index - cum_num_nodes[batch_id]) + (batch_id * max_num_nodes) dense_x = x.new_full( (batch_size * max_num_nodes,), torch.finfo(x.dtype).min ) dense_x[index] = x dense_x = dense_x.view(batch_size, max_num_nodes) _, perm = dense_x.sort(dim=-1, descending=True) perm = perm + cum_num_nodes.view(-1, 1) perm = perm.view(-1) k = (ratio * num_nodes.to(torch.float)).ceil().to(torch.long) mask = [ torch.arange(k[i], dtype=torch.long, device=x.device) + i * max_num_nodes for i in range(batch_size) ] mask = torch.cat(mask, dim=0) perm = perm[mask] return perm, k ================================================ FILE: examples/pytorch/hgt/README.md ================================================ # Heterogeneous Graph Transformer (HGT) [Alternative PyTorch-Geometric implementation](https://github.com/acbull/pyHGT) [“**Heterogeneous Graph Transformer**”](https://arxiv.org/abs/2003.01332) is a graph neural network architecture that can deal with large-scale heterogeneous and dynamic graphs. This toy experiment is based on DGL's official [tutorial](https://docs.dgl.ai/en/0.4.x/generated/dgl.heterograph.html). As the ACM datasets doesn't have input feature, we simply randomly assign features for each node. Such process can be simply replaced by any prepared features. The reference performance against R-GCN and MLP running 5 times: | Model | Test Accuracy | # Parameter | | --------- | --------------- | -------------| | 2-layer HGT | 0.465 ± 0.007 | 2,176,324 | | 2-layer RGCN | 0.392 ± 0.013 | 416,340 | | MLP | 0.132 ± 0.003 | 200,974 | ================================================ FILE: examples/pytorch/hgt/model.py ================================================ import math import dgl import dgl.function as fn import torch import torch.nn as nn import torch.nn.functional as F from dgl.nn.functional import edge_softmax class HGTLayer(nn.Module): def __init__( self, in_dim, out_dim, node_dict, edge_dict, n_heads, dropout=0.2, use_norm=False, ): super(HGTLayer, self).__init__() self.in_dim = in_dim self.out_dim = out_dim self.node_dict = node_dict self.edge_dict = edge_dict self.num_types = len(node_dict) self.num_relations = len(edge_dict) self.total_rel = self.num_types * self.num_relations * self.num_types self.n_heads = n_heads self.d_k = out_dim // n_heads self.sqrt_dk = math.sqrt(self.d_k) self.att = None self.k_linears = nn.ModuleList() self.q_linears = nn.ModuleList() self.v_linears = nn.ModuleList() self.a_linears = nn.ModuleList() self.norms = nn.ModuleList() self.use_norm = use_norm for t in range(self.num_types): self.k_linears.append(nn.Linear(in_dim, out_dim)) self.q_linears.append(nn.Linear(in_dim, out_dim)) self.v_linears.append(nn.Linear(in_dim, out_dim)) self.a_linears.append(nn.Linear(out_dim, out_dim)) if use_norm: self.norms.append(nn.LayerNorm(out_dim)) self.relation_pri = nn.Parameter( torch.ones(self.num_relations, self.n_heads) ) self.relation_att = nn.Parameter( torch.Tensor(self.num_relations, n_heads, self.d_k, self.d_k) ) self.relation_msg = nn.Parameter( torch.Tensor(self.num_relations, n_heads, self.d_k, self.d_k) ) self.skip = nn.Parameter(torch.ones(self.num_types)) self.drop = nn.Dropout(dropout) nn.init.xavier_uniform_(self.relation_att) nn.init.xavier_uniform_(self.relation_msg) def forward(self, G, h): with G.local_scope(): node_dict, edge_dict = self.node_dict, self.edge_dict for srctype, etype, dsttype in G.canonical_etypes: sub_graph = G[srctype, etype, dsttype] k_linear = self.k_linears[node_dict[srctype]] v_linear = self.v_linears[node_dict[srctype]] q_linear = self.q_linears[node_dict[dsttype]] k = k_linear(h[srctype]).view(-1, self.n_heads, self.d_k) v = v_linear(h[srctype]).view(-1, self.n_heads, self.d_k) q = q_linear(h[dsttype]).view(-1, self.n_heads, self.d_k) e_id = self.edge_dict[etype] relation_att = self.relation_att[e_id] relation_pri = self.relation_pri[e_id] relation_msg = self.relation_msg[e_id] k = torch.einsum("bij,ijk->bik", k, relation_att) v = torch.einsum("bij,ijk->bik", v, relation_msg) sub_graph.srcdata["k"] = k sub_graph.dstdata["q"] = q sub_graph.srcdata["v_%d" % e_id] = v sub_graph.apply_edges(fn.v_dot_u("q", "k", "t")) attn_score = ( sub_graph.edata.pop("t").sum(-1) * relation_pri / self.sqrt_dk ) attn_score = edge_softmax(sub_graph, attn_score, norm_by="dst") sub_graph.edata["t"] = attn_score.unsqueeze(-1) G.multi_update_all( { etype: ( fn.u_mul_e("v_%d" % e_id, "t", "m"), fn.sum("m", "t"), ) for etype, e_id in edge_dict.items() }, cross_reducer="mean", ) new_h = {} for ntype in G.ntypes: """ Step 3: Target-specific Aggregation x = norm( W[node_type] * gelu( Agg(x) ) + x ) """ n_id = node_dict[ntype] alpha = torch.sigmoid(self.skip[n_id]) t = G.nodes[ntype].data["t"].view(-1, self.out_dim) trans_out = self.drop(self.a_linears[n_id](t)) trans_out = trans_out * alpha + h[ntype] * (1 - alpha) if self.use_norm: new_h[ntype] = self.norms[n_id](trans_out) else: new_h[ntype] = trans_out return new_h class HGT(nn.Module): def __init__( self, G, node_dict, edge_dict, n_inp, n_hid, n_out, n_layers, n_heads, use_norm=True, ): super(HGT, self).__init__() self.node_dict = node_dict self.edge_dict = edge_dict self.gcs = nn.ModuleList() self.n_inp = n_inp self.n_hid = n_hid self.n_out = n_out self.n_layers = n_layers self.adapt_ws = nn.ModuleList() for t in range(len(node_dict)): self.adapt_ws.append(nn.Linear(n_inp, n_hid)) for _ in range(n_layers): self.gcs.append( HGTLayer( n_hid, n_hid, node_dict, edge_dict, n_heads, use_norm=use_norm, ) ) self.out = nn.Linear(n_hid, n_out) def forward(self, G, out_key): h = {} for ntype in G.ntypes: n_id = self.node_dict[ntype] h[ntype] = F.gelu(self.adapt_ws[n_id](G.nodes[ntype].data["inp"])) for i in range(self.n_layers): h = self.gcs[i](G, h) return self.out(h[out_key]) class HeteroRGCNLayer(nn.Module): def __init__(self, in_size, out_size, etypes): super(HeteroRGCNLayer, self).__init__() # W_r for each relation self.weight = nn.ModuleDict( {name: nn.Linear(in_size, out_size) for name in etypes} ) def forward(self, G, feat_dict): # The input is a dictionary of node features for each type funcs = {} for srctype, etype, dsttype in G.canonical_etypes: # Compute W_r * h Wh = self.weight[etype](feat_dict[srctype]) # Save it in graph for message passing G.nodes[srctype].data["Wh_%s" % etype] = Wh # Specify per-relation message passing functions: (message_func, reduce_func). # Note that the results are saved to the same destination feature 'h', which # hints the type wise reducer for aggregation. funcs[etype] = (fn.copy_u("Wh_%s" % etype, "m"), fn.mean("m", "h")) # Trigger message passing of multiple types. # The first argument is the message passing functions for each relation. # The second one is the type wise reducer, could be "sum", "max", # "min", "mean", "stack" G.multi_update_all(funcs, "sum") # return the updated node feature dictionary return {ntype: G.nodes[ntype].data["h"] for ntype in G.ntypes} class HeteroRGCN(nn.Module): def __init__(self, G, in_size, hidden_size, out_size): super(HeteroRGCN, self).__init__() # create layers self.layer1 = HeteroRGCNLayer(in_size, hidden_size, G.etypes) self.layer2 = HeteroRGCNLayer(hidden_size, out_size, G.etypes) def forward(self, G, out_key): input_dict = {ntype: G.nodes[ntype].data["inp"] for ntype in G.ntypes} h_dict = self.layer1(G, input_dict) h_dict = {k: F.leaky_relu(h) for k, h in h_dict.items()} h_dict = self.layer2(G, h_dict) # get appropriate logits return h_dict[out_key] ================================================ FILE: examples/pytorch/hgt/train_acm.py ================================================ #!/usr/bin/env python # coding: utf-8 # In[1]: import argparse import math import urllib.request import numpy as np import scipy.io from model import * import dgl torch.manual_seed(0) data_url = "https://data.dgl.ai/dataset/ACM.mat" data_file_path = "/tmp/ACM.mat" urllib.request.urlretrieve(data_url, data_file_path) data = scipy.io.loadmat(data_file_path) parser = argparse.ArgumentParser( description="Training GNN on ogbn-products benchmark" ) parser.add_argument("--n_epoch", type=int, default=200) parser.add_argument("--n_hid", type=int, default=256) parser.add_argument("--n_inp", type=int, default=256) parser.add_argument("--clip", type=int, default=1.0) parser.add_argument("--max_lr", type=float, default=1e-3) args = parser.parse_args() def get_n_params(model): pp = 0 for p in list(model.parameters()): nn = 1 for s in list(p.size()): nn = nn * s pp += nn return pp def train(model, G): best_val_acc = torch.tensor(0) best_test_acc = torch.tensor(0) for epoch in np.arange(args.n_epoch) + 1: model.train() logits = model(G, "paper") # The loss is computed only for labeled nodes. loss = F.cross_entropy(logits[train_idx], labels[train_idx].to(device)) optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() scheduler.step() if epoch % 5 == 0: model.eval() logits = model(G, "paper") pred = logits.argmax(1).cpu() train_acc = (pred[train_idx] == labels[train_idx]).float().mean() val_acc = (pred[val_idx] == labels[val_idx]).float().mean() test_acc = (pred[test_idx] == labels[test_idx]).float().mean() if best_val_acc < val_acc: best_val_acc = val_acc best_test_acc = test_acc print( "Epoch: %d LR: %.5f Loss %.4f, Train Acc %.4f, Val Acc %.4f (Best %.4f), Test Acc %.4f (Best %.4f)" % ( epoch, optimizer.param_groups[0]["lr"], loss.item(), train_acc.item(), val_acc.item(), best_val_acc.item(), test_acc.item(), best_test_acc.item(), ) ) device = torch.device("cuda:0") G = dgl.heterograph( { ("paper", "written-by", "author"): data["PvsA"].nonzero(), ("author", "writing", "paper"): data["PvsA"].transpose().nonzero(), ("paper", "citing", "paper"): data["PvsP"].nonzero(), ("paper", "cited", "paper"): data["PvsP"].transpose().nonzero(), ("paper", "is-about", "subject"): data["PvsL"].nonzero(), ("subject", "has", "paper"): data["PvsL"].transpose().nonzero(), } ) print(G) pvc = data["PvsC"].tocsr() p_selected = pvc.tocoo() # generate labels labels = pvc.indices labels = torch.tensor(labels).long() # generate train/val/test split pid = p_selected.row shuffle = np.random.permutation(pid) train_idx = torch.tensor(shuffle[0:800]).long() val_idx = torch.tensor(shuffle[800:900]).long() test_idx = torch.tensor(shuffle[900:]).long() node_dict = {} edge_dict = {} for ntype in G.ntypes: node_dict[ntype] = len(node_dict) for etype in G.etypes: edge_dict[etype] = len(edge_dict) G.edges[etype].data["id"] = ( torch.ones(G.num_edges(etype), dtype=torch.long) * edge_dict[etype] ) # Random initialize input feature for ntype in G.ntypes: emb = nn.Parameter( torch.Tensor(G.num_nodes(ntype), 256), requires_grad=False ) nn.init.xavier_uniform_(emb) G.nodes[ntype].data["inp"] = emb G = G.to(device) model = HGT( G, node_dict, edge_dict, n_inp=args.n_inp, n_hid=args.n_hid, n_out=labels.max().item() + 1, n_layers=2, n_heads=4, use_norm=True, ).to(device) optimizer = torch.optim.AdamW(model.parameters()) scheduler = torch.optim.lr_scheduler.OneCycleLR( optimizer, total_steps=args.n_epoch, max_lr=args.max_lr ) print("Training HGT with #param: %d" % (get_n_params(model))) train(model, G) model = HeteroRGCN( G, in_size=args.n_inp, hidden_size=args.n_hid, out_size=labels.max().item() + 1, ).to(device) optimizer = torch.optim.AdamW(model.parameters()) scheduler = torch.optim.lr_scheduler.OneCycleLR( optimizer, total_steps=args.n_epoch, max_lr=args.max_lr ) print("Training RGCN with #param: %d" % (get_n_params(model))) train(model, G) model = HGT( G, node_dict, edge_dict, n_inp=args.n_inp, n_hid=args.n_hid, n_out=labels.max().item() + 1, n_layers=0, n_heads=4, ).to(device) optimizer = torch.optim.AdamW(model.parameters()) scheduler = torch.optim.lr_scheduler.OneCycleLR( optimizer, total_steps=args.n_epoch, max_lr=args.max_lr ) print("Training MLP with #param: %d" % (get_n_params(model))) train(model, G) ================================================ FILE: examples/pytorch/hilander/PSS/README.md ================================================ # PSS Code for the ECCV '22 submission "PSS: Progressive Sample Selection for Open-World Visual Representation Learning". ## Dependencies We use python 3.7. The CUDA version needs to be 10.2. Besides DGL==0.6.1, we depend on several packages. To install dependencies using conda: ```commandline conda create -n pss python=3.7 # create env conda activate pss # activate env conda install pytorch==1.7.0 torchvision==0.8.0 cudatoolkit=10.2 -c pytorch # install pytorch 1.7 version conda install -y cudatoolkit=10.2 faiss-gpu=1.6.5 -c pytorch # install faiss gpu version matching cuda 10.2 pip install dgl-cu102 # install dgl for cuda 10.2 pip install tqdm # install tqdm pip install matplotlib # install matplotlib pip install pandas # install pandas pip install pretrainedmodels # install pretrainedmodels pip install tensorboardX # install tensorboardX pip install seaborn # install seaborn pip install scikit-learn cd .. git clone https://github.com/yjxiong/clustering-benchmark.git # install clustering-benchmark for evaluation cd clustering-benchmark python setup.py install cd ../PSS ``` ## Data We use the iNaturalist 2018 dataset. - download link: https://www.kaggle.com/c/inaturalist-2018/data; - annotations are in `Smooth_AP/data/Inaturalist`; - annotation txt files for different data splits are in [S3 link]|[[Google Drive](https://drive.google.com/drive/folders/1xrWogJGef4Ex5OGjiImgA06bAnk2MDrK?usp=sharing)]|[[Baidu Netdisk](https://pan.baidu.com/s/14S0Fns29a4o7kFDlNyyPjA?pwd=uwsg)] (password:uwsg). Download `train_val2018.tar.gz` and the data split txt files to `data/Inaturalist/` folder. Extract the `tar.gz` files. The data folder has the following structure: ```bash PSS |- data | |- Inaturalist | |- train2018.json.tar.gz | |- train_val2018.tar.gz | |- val2018.json.tar.gz | |- train_val2018 | | |- Actinopterygii | | |- ... | |- lin_train_set1.txt | |- train_set1.txt | |- uin_train_set1.txt | |- uout_train_set1.txt | |- in_train_set1.txt | |- Inaturalist_test_set1.txt |-... ``` ## Training Run `bash train.sh` to train the model. ## Test Run `bash test.sh` to evaluate on the test set. ================================================ FILE: examples/pytorch/hilander/PSS/Smooth_AP/README.md ================================================ # Smooth_AP Referenced from the ECCV '20 paper ["Smooth-AP: Smoothing the Path Towards Large-Scale Image Retrieval"](https://www.robots.ox.ac.uk/~vgg/research/smooth-ap/), reference code is from https://github.com/Andrew-Brown1/Smooth_AP. ![teaser](https://github.com/Andrew-Brown1/Smooth_AP/blob/master/ims/teaser.png) ================================================ FILE: examples/pytorch/hilander/PSS/Smooth_AP/src/auxiliaries.py ================================================ # repo originally forked from https://github.com/Confusezius/Deep-Metric-Learning-Baselines ################## LIBRARIES ############################## import warnings warnings.filterwarnings("ignore") import csv import datetime import os import pickle as pkl import faiss import matplotlib.pyplot as plt import numpy as np import torch from PIL import Image from sklearn import metrics from torch import nn from tqdm import tqdm """=============================================================================================================""" ################### TensorBoard Settings ################### def args2exp_name(args): exp_name = f"{args.dataset}_{args.loss}_{args.lr}_bs{args.bs}_spc{args.samples_per_class}_embed{args.embed_dim}_arch{args.arch}_decay{args.decay}_fclr{args.fc_lr_mul}_anneal{args.sigmoid_temperature}" return exp_name ################# ACQUIRE NUMBER OF WEIGHTS ################# def gimme_params(model): """ Provide number of trainable parameters (i.e. those requiring gradient computation) for input network. Args: model: PyTorch Network Returns: int, number of parameters. """ model_parameters = filter(lambda p: p.requires_grad, model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) return params ################# SAVE TRAINING PARAMETERS IN NICE STRING ################# def gimme_save_string(opt): """ Taking the set of parameters and convert it to easy-to-read string, which can be stored later. Args: opt: argparse.Namespace, contains all training-specific parameters. Returns: string, returns string summary of parameters. """ varx = vars(opt) base_str = "" for key in varx: base_str += str(key) if isinstance(varx[key], dict): for sub_key, sub_item in varx[key].items(): base_str += "\n\t" + str(sub_key) + ": " + str(sub_item) else: base_str += "\n\t" + str(varx[key]) base_str += "\n\n" return base_str def f1_score( model_generated_cluster_labels, target_labels, feature_coll, computed_centroids, ): """ NOTE: MOSTLY ADAPTED FROM https://github.com/wzzheng/HDML on Hardness-Aware Deep Metric Learning. Args: model_generated_cluster_labels: np.ndarray [n_samples x 1], Cluster labels computed on top of data embeddings. target_labels: np.ndarray [n_samples x 1], ground truth labels for each data sample. feature_coll: np.ndarray [n_samples x embed_dim], total data embedding made by network. computed_centroids: np.ndarray [num_cluster=num_classes x embed_dim], cluster coordinates Returns: float, F1-score """ from scipy.special import comb d = np.zeros(len(feature_coll)) for i in range(len(feature_coll)): d[i] = np.linalg.norm( feature_coll[i, :] - computed_centroids[model_generated_cluster_labels[i], :] ) labels_pred = np.zeros(len(feature_coll)) for i in np.unique(model_generated_cluster_labels): index = np.where(model_generated_cluster_labels == i)[0] ind = np.argmin(d[index]) cid = index[ind] labels_pred[index] = cid N = len(target_labels) # Cluster n_labels avail_labels = np.unique(target_labels) n_labels = len(avail_labels) # Count the number of objects in each cluster count_cluster = np.zeros(n_labels) for i in range(n_labels): count_cluster[i] = len(np.where(target_labels == avail_labels[i])[0]) # Build a mapping from item_id to item index keys = np.unique(labels_pred) num_item = len(keys) values = range(num_item) item_map = dict() for i in range(len(keys)): item_map.update([(keys[i], values[i])]) # Count the number of objects of each item count_item = np.zeros(num_item) for i in range(N): index = item_map[labels_pred[i]] count_item[index] = count_item[index] + 1 # Compute True Positive (TP) plus False Positive (FP) count tp_fp = 0 for k in range(n_labels): if count_cluster[k] > 1: tp_fp = tp_fp + comb(count_cluster[k], 2) # Compute True Positive (TP) count tp = 0 for k in range(n_labels): member = np.where(target_labels == avail_labels[k])[0] member_ids = labels_pred[member] count = np.zeros(num_item) for j in range(len(member)): index = item_map[member_ids[j]] count[index] = count[index] + 1 for i in range(num_item): if count[i] > 1: tp = tp + comb(count[i], 2) # Compute False Positive (FP) count fp = tp_fp - tp # Compute False Negative (FN) count count = 0 for j in range(num_item): if count_item[j] > 1: count = count + comb(count_item[j], 2) fn = count - tp # compute F measure beta = 1 P = tp / (tp + fp) R = tp / (tp + fn) F1 = (beta * beta + 1) * P * R / (beta * beta * P + R) return F1 """=============================================================================================================""" def eval_metrics_one_dataset(model, test_dataloader, device, k_vals, opt): """ Compute evaluation metrics on test-dataset, e.g. NMI, F1 and Recall @ k. Args: model: PyTorch network, network to compute evaluation metrics for. test_dataloader: PyTorch Dataloader, dataloader for test dataset, should have no shuffling and correct processing. device: torch.device, Device to run inference on. k_vals: list of int, Recall values to compute opt: argparse.Namespace, contains all training-specific parameters. Returns: F1 score (float), NMI score (float), recall_at_k (list of float), data embedding (np.ndarray) """ torch.cuda.empty_cache() _ = model.eval() n_classes = len(test_dataloader.dataset.avail_classes) with torch.no_grad(): ### For all test images, extract features target_labels, feature_coll = [], [] final_iter = tqdm( test_dataloader, desc="Computing Evaluation Metrics..." ) image_paths = [x[0] for x in test_dataloader.dataset.image_list] for idx, inp in enumerate(final_iter): input_img, target = inp[-1], inp[0] target_labels.extend(target.numpy().tolist()) out = model(input_img.to(device), feature=True) feature_coll.extend(out.cpu().detach().numpy().tolist()) # pdb.set_trace() target_labels = np.hstack(target_labels).reshape(-1, 1) feature_coll = np.vstack(feature_coll).astype("float32") torch.cuda.empty_cache() ### Set Faiss CPU Cluster index cpu_cluster_index = faiss.IndexFlatL2(feature_coll.shape[-1]) kmeans = faiss.Clustering(feature_coll.shape[-1], n_classes) kmeans.niter = 20 kmeans.min_points_per_centroid = 1 kmeans.max_points_per_centroid = 1000000000 ### Train Kmeans kmeans.train(feature_coll, cpu_cluster_index) computed_centroids = faiss.vector_float_to_array( kmeans.centroids ).reshape(n_classes, feature_coll.shape[-1]) ### Assign feature points to clusters faiss_search_index = faiss.IndexFlatL2(computed_centroids.shape[-1]) faiss_search_index.add(computed_centroids) _, model_generated_cluster_labels = faiss_search_index.search( feature_coll, 1 ) ### Compute NMI NMI = metrics.cluster.normalized_mutual_info_score( model_generated_cluster_labels.reshape(-1), target_labels.reshape(-1), ) ### Recover max(k_vals) nehbours to use for recall computation faiss_search_index = faiss.IndexFlatL2(feature_coll.shape[-1]) faiss_search_index.add(feature_coll) _, k_closest_points = faiss_search_index.search( feature_coll, int(np.max(k_vals) + 1) ) k_closest_classes = target_labels.reshape(-1)[k_closest_points[:, 1:]] print("computing recalls") ### Compute Recall recall_all_k = [] for k in k_vals: recall_at_k = np.sum( [ 1 for target, recalled_predictions in zip( target_labels, k_closest_classes ) if target in recalled_predictions[:k] ] ) / len(target_labels) recall_all_k.append(recall_at_k) print("finished recalls") print("computing F1") ### Compute F1 Score F1 = 0 # F1 = f1_score(model_generated_cluster_labels, target_labels, feature_coll, computed_centroids) print("finished computing f1") return F1, NMI, recall_all_k, feature_coll def eval_metrics_query_and_gallery_dataset( model, query_dataloader, gallery_dataloader, device, k_vals, opt ): """ Compute evaluation metrics on test-dataset, e.g. NMI, F1 and Recall @ k. Args: model: PyTorch network, network to compute evaluation metrics for. query_dataloader: PyTorch Dataloader, dataloader for query dataset, for which nearest neighbours in the gallery dataset are retrieved. gallery_dataloader: PyTorch Dataloader, dataloader for gallery dataset, provides target samples which are to be retrieved in correspondance to the query dataset. device: torch.device, Device to run inference on. k_vals: list of int, Recall values to compute opt: argparse.Namespace, contains all training-specific parameters. Returns: F1 score (float), NMI score (float), recall_at_ks (list of float), query data embedding (np.ndarray), gallery data embedding (np.ndarray) """ torch.cuda.empty_cache() _ = model.eval() n_classes = len(query_dataloader.dataset.avail_classes) with torch.no_grad(): ### For all query test images, extract features query_target_labels, query_feature_coll = [], [] query_image_paths = [x[0] for x in query_dataloader.dataset.image_list] query_iter = tqdm(query_dataloader, desc="Extraction Query Features") for idx, inp in enumerate(query_iter): input_img, target = inp[-1], inp[0] query_target_labels.extend(target.numpy().tolist()) out = model(input_img.to(device), feature=True) query_feature_coll.extend(out.cpu().detach().numpy().tolist()) ### For all gallery test images, extract features gallery_target_labels, gallery_feature_coll = [], [] gallery_image_paths = [ x[0] for x in gallery_dataloader.dataset.image_list ] gallery_iter = tqdm( gallery_dataloader, desc="Extraction Gallery Features" ) for idx, inp in enumerate(gallery_iter): input_img, target = inp[-1], inp[0] gallery_target_labels.extend(target.numpy().tolist()) out = model(input_img.to(device), feature=True) gallery_feature_coll.extend(out.cpu().detach().numpy().tolist()) query_target_labels, query_feature_coll = np.hstack( query_target_labels ).reshape(-1, 1), np.vstack(query_feature_coll).astype("float32") gallery_target_labels, gallery_feature_coll = np.hstack( gallery_target_labels ).reshape(-1, 1), np.vstack(gallery_feature_coll).astype("float32") torch.cuda.empty_cache() ### Set CPU Cluster index stackset = np.concatenate( [query_feature_coll, gallery_feature_coll], axis=0 ) stacklabels = np.concatenate( [query_target_labels, gallery_target_labels], axis=0 ) cpu_cluster_index = faiss.IndexFlatL2(stackset.shape[-1]) kmeans = faiss.Clustering(stackset.shape[-1], n_classes) kmeans.niter = 20 kmeans.min_points_per_centroid = 1 kmeans.max_points_per_centroid = 1000000000 ### Train Kmeans kmeans.train(stackset, cpu_cluster_index) computed_centroids = faiss.vector_float_to_array( kmeans.centroids ).reshape(n_classes, stackset.shape[-1]) ### Assign feature points to clusters faiss_search_index = faiss.IndexFlatL2(computed_centroids.shape[-1]) faiss_search_index.add(computed_centroids) _, model_generated_cluster_labels = faiss_search_index.search( stackset, 1 ) ### Compute NMI NMI = metrics.cluster.normalized_mutual_info_score( model_generated_cluster_labels.reshape(-1), stacklabels.reshape(-1) ) ### Recover max(k_vals) nearest neighbours to use for recall computation faiss_search_index = faiss.IndexFlatL2(gallery_feature_coll.shape[-1]) faiss_search_index.add(gallery_feature_coll) _, k_closest_points = faiss_search_index.search( query_feature_coll, int(np.max(k_vals)) ) k_closest_classes = gallery_target_labels.reshape(-1)[k_closest_points] ### Compute Recall recall_all_k = [] for k in k_vals: recall_at_k = np.sum( [ 1 for target, recalled_predictions in zip( query_target_labels, k_closest_classes ) if target in recalled_predictions[:k] ] ) / len(query_target_labels) recall_all_k.append(recall_at_k) recall_str = ", ".join( "@{0}: {1:.4f}".format(k, rec) for k, rec in zip(k_vals, recall_all_k) ) ### Compute F1 score F1 = f1_score( model_generated_cluster_labels, stacklabels, stackset, computed_centroids, ) return F1, NMI, recall_all_k, query_feature_coll, gallery_feature_coll """=============================================================================================================""" ####### RECOVER CLOSEST EXAMPLE IMAGES ####### def recover_closest_one_dataset( feature_matrix_all, image_paths, save_path, n_image_samples=10, n_closest=3 ): """ Provide sample recoveries. Args: feature_matrix_all: np.ndarray [n_samples x embed_dim], full data embedding of test samples. image_paths: list [n_samples], list of datapaths corresponding to save_path: str, where to store sample image. n_image_samples: Number of sample recoveries. n_closest: Number of closest recoveries to show. Returns: Nothing! """ image_paths = np.array([x[0] for x in image_paths]) sample_idxs = np.random.choice( np.arange(len(feature_matrix_all)), n_image_samples ) faiss_search_index = faiss.IndexFlatL2(feature_matrix_all.shape[-1]) faiss_search_index.add(feature_matrix_all) _, closest_feature_idxs = faiss_search_index.search( feature_matrix_all, n_closest + 1 ) sample_paths = image_paths[closest_feature_idxs][sample_idxs] f, axes = plt.subplots(n_image_samples, n_closest + 1) for i, (ax, plot_path) in enumerate( zip(axes.reshape(-1), sample_paths.reshape(-1)) ): ax.imshow(np.array(Image.open(plot_path))) ax.set_xticks([]) ax.set_yticks([]) if i % (n_closest + 1): ax.axvline(x=0, color="g", linewidth=13) else: ax.axvline(x=0, color="r", linewidth=13) f.set_size_inches(10, 20) f.tight_layout() f.savefig(save_path) plt.close() ####### RECOVER CLOSEST EXAMPLE IMAGES ####### def recover_closest_inshop( query_feature_matrix_all, gallery_feature_matrix_all, query_image_paths, gallery_image_paths, save_path, n_image_samples=10, n_closest=3, ): """ Provide sample recoveries. Args: query_feature_matrix_all: np.ndarray [n_query_samples x embed_dim], full data embedding of query samples. gallery_feature_matrix_all: np.ndarray [n_gallery_samples x embed_dim], full data embedding of gallery samples. query_image_paths: list [n_samples], list of datapaths corresponding to gallery_image_paths: list [n_samples], list of datapaths corresponding to save_path: str, where to store sample image. n_image_samples: Number of sample recoveries. n_closest: Number of closest recoveries to show. Returns: Nothing! """ query_image_paths, gallery_image_paths = np.array( query_image_paths ), np.array(gallery_image_paths) sample_idxs = np.random.choice( np.arange(len(query_feature_matrix_all)), n_image_samples ) faiss_search_index = faiss.IndexFlatL2(gallery_feature_matrix_all.shape[-1]) faiss_search_index.add(gallery_feature_matrix_all) _, closest_feature_idxs = faiss_search_index.search( query_feature_matrix_all, n_closest ) image_paths = gallery_image_paths[closest_feature_idxs] image_paths = np.concatenate( [query_image_paths.reshape(-1, 1), image_paths], axis=-1 ) sample_paths = image_paths[closest_feature_idxs][sample_idxs] f, axes = plt.subplots(n_image_samples, n_closest + 1) for i, (ax, plot_path) in enumerate( zip(axes.reshape(-1), sample_paths.reshape(-1)) ): ax.imshow(np.array(Image.open(plot_path))) ax.set_xticks([]) ax.set_yticks([]) if i % (n_closest + 1): ax.axvline(x=0, color="g", linewidth=13) else: ax.axvline(x=0, color="r", linewidth=13) f.set_size_inches(10, 20) f.tight_layout() f.savefig(save_path) plt.close() """=============================================================================================================""" ################## SET NETWORK TRAINING CHECKPOINT ##################### def set_checkpoint(model, opt, progress_saver, savepath): """ Store relevant parameters (model and progress saver, as well as parameter-namespace). Can be easily extend for other stuff. Args: model: PyTorch network, network whose parameters are to be saved. opt: argparse.Namespace, includes all training-specific parameters progress_saver: subclass of LOGGER-class, contains a running memory of all training metrics. savepath: str, where to save checkpoint. Returns: Nothing! """ torch.save( { "state_dict": model.state_dict(), "opt": opt, "progress": progress_saver, }, savepath, ) """=============================================================================================================""" ################## WRITE TO CSV FILE ##################### class CSV_Writer: """ Class to append newly compute training metrics to a csv file for data logging. Is used together with the LOGGER class. """ def __init__(self, save_path, columns): """ Args: save_path: str, where to store the csv file columns: list of str, name of csv columns under which the resp. metrics are stored. Returns: Nothing! """ self.save_path = save_path self.columns = columns with open(self.save_path, "a") as csv_file: writer = csv.writer(csv_file, delimiter=",") writer.writerow(self.columns) def log(self, inputs): """ log one set of entries to the csv. Args: inputs: [list of int/str/float], values to append to the csv. Has to be of the same length as self.columns. Returns: Nothing! """ with open(self.save_path, "a") as csv_file: writer = csv.writer(csv_file, delimiter=",") writer.writerow(inputs) ################## GENERATE LOGGING FOLDER/FILES ####################### def set_logging(opt): """ Generate the folder in which everything is saved. If opt.savename is given, folder will take on said name. If not, a name based on the start time is provided. If the folder already exists, it will by iterated until it can be created without deleting existing data. The current opt.save_path will be extended to account for the new save_folder name. Args: opt: argparse.Namespace, contains all training-specific parameters. Returns: Nothing! """ checkfolder = opt.save_path + "/" + str(opt.iter) # Create start-time-based name if opt.savename is not give. if opt.savename == "": date = datetime.datetime.now() checkfolder = opt.save_path + "/" + str(opt.iter) # If folder already exists, iterate over it until is doesn't. # counter = 1 # while os.path.exists(checkfolder): # checkfolder = opt.save_path+'/'+opt.savename+'_'+str(counter) # counter += 1 # Create Folder if not os.path.exists(checkfolder): os.makedirs(checkfolder) opt.save_path = checkfolder # Store training parameters as text and pickle in said folder. with open(opt.save_path + "/Parameter_Info.txt", "w") as f: f.write(gimme_save_string(opt)) pkl.dump(opt, open(opt.save_path + "/hypa.pkl", "wb")) import pdb class LOGGER: """ This class provides a collection of logging properties that are useful for training. These include setting the save folder, in which progression of training/testing metrics is visualized, csv log-files are stored, sample recoveries are plotted and an internal data saver. """ def __init__(self, opt, metrics_to_log, name="Basic", start_new=True): """ Args: opt: argparse.Namespace, contains all training-specific parameters. metrics_to_log: dict, dictionary which shows in what structure the data should be saved. is given as the output of aux.metrics_to_examine. Example: {'train': ['Epochs', 'Time', 'Train Loss', 'Time'], 'val': ['Epochs','Time','NMI','F1', 'Recall @ 1','Recall @ 2','Recall @ 4','Recall @ 8']} name: Name of this logger. Will be used to distinguish logged files from other LOGGER instances. start_new: If set to true, a new save folder will be created initially. Returns: Nothing! """ self.prop = opt self.metrics_to_log = metrics_to_log ### Make Logging Directories if start_new: set_logging(opt) ### Set Progress Saver Dict self.progress_saver = self.provide_progress_saver(metrics_to_log) ### Set CSV Writters self.csv_loggers = { mode: CSV_Writer( opt.save_path + "/log_" + mode + "_" + name + ".csv", lognames ) for mode, lognames in metrics_to_log.items() } def provide_progress_saver(self, metrics_to_log): """ Provide Progress Saver dictionary. Args: metrics_to_log: see __init__(). Describes the structure of Progress_Saver. """ Progress_Saver = { key: {sub_key: [] for sub_key in metrics_to_log[key]} for key in metrics_to_log.keys() } return Progress_Saver def log(self, main_keys, metric_keys, values): """ Actually log new values in csv and Progress Saver dict internally. Args: main_keys: Main key in which data will be stored. Normally is either 'train' for training metrics or 'val' for validation metrics. metric_keys: Needs to follow the list length of self.progress_saver[main_key(s)]. List of metric keys that are extended with new values. values: Needs to be a list of the same structure as metric_keys. Actual values that are appended. """ if not isinstance(main_keys, list): main_keys = [main_keys] if not isinstance(metric_keys, list): metric_keys = [metric_keys] if not isinstance(values, list): values = [values] # Log data to progress saver dict. for main_key in main_keys: for value, metric_key in zip(values, metric_keys): self.progress_saver[main_key][metric_key].append(value) # Append data to csv. self.csv_loggers[main_key].log(values) def update_info_plot(self): """ Create a new updated version of training/metric progression plot. Args: None Returns: Nothing! """ t_epochs = self.progress_saver["val"]["Epochs"] t_loss_list = [self.progress_saver["train"]["Train Loss"]] t_legend_handles = ["Train Loss"] v_epochs = self.progress_saver["val"]["Epochs"] # Because Vehicle-ID normally uses three different test sets, a distinction has to be made. if self.prop.dataset != "vehicle_id": title = " | ".join( key + ": {0:3.3f}".format(np.max(item)) for key, item in self.progress_saver["val"].items() if key not in ["Time", "Epochs"] ) self.info_plot.title = title v_metric_list = [ self.progress_saver["val"][key] for key in self.progress_saver["val"].keys() if key not in ["Time", "Epochs"] ] v_legend_handles = [ key for key in self.progress_saver["val"].keys() if key not in ["Time", "Epochs"] ] self.info_plot.make_plot( t_epochs, v_epochs, t_loss_list, v_metric_list, t_legend_handles, v_legend_handles, ) else: # Iterate over all test sets. for i in range(3): title = " | ".join( key + ": {0:3.3f}".format(np.max(item)) for key, item in self.progress_saver["val"].items() if key not in ["Time", "Epochs"] and "Set {}".format(i) in key ) self.info_plot["Set {}".format(i)].title = title v_metric_list = [ self.progress_saver["val"][key] for key in self.progress_saver["val"].keys() if key not in ["Time", "Epochs"] and "Set {}".format(i) in key ] v_legend_handles = [ key for key in self.progress_saver["val"].keys() if key not in ["Time", "Epochs"] and "Set {}".format(i) in key ] self.info_plot["Set {}".format(i)].make_plot( t_epochs, v_epochs, t_loss_list, v_metric_list, t_legend_handles, v_legend_handles, appendix="set_{}".format(i), ) def metrics_to_examine(dataset, k_vals): """ Please only use either of the following keys: -> Epochs, Time, Train Loss for training -> Epochs, Time, NMI, F1 & Recall @ k for validation Args: dataset: str, dataset for which a storing structure for LOGGER.progress_saver is to be made. k_vals: list of int, Recall @ k - values. Returns: metric_dict: Dictionary representing the storing structure for LOGGER.progress_saver. See LOGGER.__init__() for an example. """ metric_dict = {"train": ["Epochs", "Time", "Train Loss"]} if dataset == "vehicle_id": metric_dict["val"] = ["Epochs", "Time"] # Vehicle_ID uses three test sets for i in range(3): metric_dict["val"] += [ "Set {} NMI".format(i), "Set {} F1".format(i), ] for k in k_vals: metric_dict["val"] += ["Set {} Recall @ {}".format(i, k)] else: metric_dict["val"] = ["Epochs", "Time", "NMI", "F1"] metric_dict["val"] += ["Recall @ {}".format(k) for k in k_vals] return metric_dict def bool_flag(s): """ Parse boolean arguments from the command line. """ FALSY_STRINGS = {"off", "false", "0"} TRUTHY_STRINGS = {"on", "true", "1"} if s.lower() in FALSY_STRINGS: return False elif s.lower() in TRUTHY_STRINGS: return True else: raise argparse.ArgumentTypeError("invalid value for a boolean flag") def vis(model, test_dataloader, device, split, opt): linsize = opt.linsize torch.cuda.empty_cache() if opt.dataset == "Inaturalist": if opt.iter > 0: with open(opt.cluster_path, "rb") as clusterf: ( path2idx, global_features, global_pred_labels, gt_labels, masks, ) = pkl.load(clusterf) gt_labels = gt_labels + len(np.unique(global_pred_labels)) idx2path = {v: k for k, v in path2idx.items()} else: with open(os.path.join(opt.source_path, "train_set1.txt")) as f: filelines = f.readlines() paths = [x.strip() for x in filelines] Lin_paths = paths[:linsize] masks = np.zeros(len(paths)) masks[: len(Lin_paths)] = 0 masks[len(Lin_paths) :] = 2 _ = model.eval() path2ids = {} with torch.no_grad(): ### For all test images, extract features target_labels, feature_coll = [], [] final_iter = tqdm( test_dataloader, desc="Computing Evaluation Metrics..." ) image_paths = [x[0] for x in test_dataloader.dataset.image_list] for i in range(len(image_paths)): path2ids[image_paths[i]] = i for idx, inp in enumerate(final_iter): input_img, target = inp[-1], inp[0] target_labels.extend(target.numpy().tolist()) out = model(input_img.to(device), feature=True) feature_coll.extend(out.cpu().detach().numpy().tolist()) # pdb.set_trace() target_labels = np.hstack(target_labels).reshape(-1) feature_coll = np.vstack(feature_coll).astype("float32") if (opt.dataset == "Inaturalist") and "all_train" in split: if opt.iter > 0: predicted_features = np.zeros_like(feature_coll) path2ids_new = {} target_labels_new = np.zeros_like(target_labels) for i in range(len(idx2path.keys())): path = idx2path[i] idxx = path2ids[path] path2ids_new[path] = i predicted_features[i] = feature_coll[idxx] target_labels_new[i] = target_labels[idxx] path2ids = path2ids_new feature_coll = predicted_features target_labels = target_labels_new gtlabels = target_labels lastuselected = np.where(masks == 1) masks[lastuselected] = 0 print(len(np.where(masks == 0)[0])) else: predicted_features = np.zeros_like(feature_coll) path2ids_new = {} target_labels_new = np.zeros_like(target_labels) for i in range(len(paths)): path = paths[i] idxx = path2ids[opt.source_path + "/" + path] path2ids_new[opt.source_path + "/" + path] = i predicted_features[i] = feature_coll[idxx] target_labels_new[i] = target_labels[idxx] path2ids = path2ids_new feature_coll = predicted_features target_labels = target_labels_new gtlabels = target_labels if "all_train" not in split: print("all_train not in split.") gtlabels = target_labels output_feature_path = os.path.join( opt.source_path, split + "_inat_features.pkl" ) print("Dump features into {}.".format(output_feature_path)) with open(output_feature_path, "wb") as f: pkl.dump([path2ids, feature_coll, target_labels, gtlabels, masks], f) print(target_labels.max()) print("target_labels:", target_labels.shape) print("feature_coll:", feature_coll.shape) ================================================ FILE: examples/pytorch/hilander/PSS/Smooth_AP/src/datasets.py ================================================ # repo originally forked from https://github.com/Confusezius/Deep-Metric-Learning-Baselines ################# LIBRARIES ############################### import pickle import warnings from numpy.core.arrayprint import IntegerFormat warnings.filterwarnings("ignore") import copy import os import random import numpy as np import pandas as pd import torch from PIL import Image from torch.utils.data import Dataset from torchvision import transforms """============================================================================""" ################ FUNCTION TO RETURN ALL DATALOADERS NECESSARY #################### def give_dataloaders(dataset, trainset, testset, opt, cluster_path=""): """ Args: dataset: string, name of dataset for which the dataloaders should be returned. opt: argparse.Namespace, contains all training-specific parameters. Returns: dataloaders: dict of dataloaders for training, testing and evaluation on training. """ # Dataset selection if opt.dataset == "Inaturalist": if opt.finetune: datasets = give_inat_datasets_finetune_1head( testset, cluster_path, opt ) else: if opt.get_features: datasets = give_inaturalist_datasets_for_features(opt) else: datasets = give_inaturalist_datasets(opt) else: raise Exception("No Dataset >{}< available!".format(dataset)) # Move datasets to dataloaders. dataloaders = {} for key, dataset in datasets.items(): if ( isinstance(dataset, TrainDatasetsmoothap) or isinstance(dataset, TrainDatasetsmoothap1Head) ) and key in ["training", "clustering"]: dataloaders[key] = torch.utils.data.DataLoader( dataset, batch_size=opt.bs, num_workers=opt.kernels, sampler=torch.utils.data.SequentialSampler(dataset), pin_memory=True, drop_last=True, ) else: is_val = dataset.is_validation if key == "training" or key == "clustering": dataloaders[key] = torch.utils.data.DataLoader( dataset, batch_size=opt.bs, num_workers=opt.kernels, shuffle=not is_val, pin_memory=True, drop_last=not is_val, ) else: dataloaders[key] = torch.utils.data.DataLoader( dataset, batch_size=opt.bs, num_workers=6, shuffle=not is_val, pin_memory=True, drop_last=not is_val, ) return dataloaders def give_inaturalist_datasets(opt): """ This function generates a training, testing and evaluation dataloader for Metric Learning on the Inaturalist 2018 dataset. For Metric Learning, training and test sets are provided by given json files. Will define a train and test split So no random shuffling of classes. Args: opt: argparse.Namespace, contains all traininig-specific parameters. Returns: dict of PyTorch datasets for training, testing and evaluation. """ # Load text-files containing classes and imagepaths. # Generate image_dicts of shape {class_idx:[list of paths to images belong to this class] ...} train_image_dict, val_image_dict, test_image_dict = {}, {}, {} with open(os.path.join(opt.source_path, opt.trainset)) as f: FileLines = f.readlines() FileLines = [x.strip() for x in FileLines] for entry in FileLines: info = entry.split("/") if "/".join([info[-3], info[-2]]) not in train_image_dict: train_image_dict["/".join([info[-3], info[-2]])] = [] train_image_dict["/".join([info[-3], info[-2]])].append( os.path.join(opt.source_path, entry) ) with open(os.path.join(opt.source_path, opt.testset)) as f: FileLines = f.readlines() FileLines = [x.strip() for x in FileLines] for entry in FileLines: info = entry.split("/") if "/".join([info[-3], info[-2]]) not in val_image_dict: val_image_dict["/".join([info[-3], info[-2]])] = [] val_image_dict["/".join([info[-3], info[-2]])].append( os.path.join(opt.source_path, entry) ) with open(os.path.join(opt.source_path, opt.testset)) as f: FileLines = f.readlines() FileLines = [x.strip() for x in FileLines] for entry in FileLines: info = entry.split("/") if "/".join([info[-3], info[-2]]) not in test_image_dict: test_image_dict["/".join([info[-3], info[-2]])] = [] test_image_dict["/".join([info[-3], info[-2]])].append( os.path.join(opt.source_path, entry) ) new_train_dict = {} class_ind_ind = 0 for cate in train_image_dict: new_train_dict["te/%d" % class_ind_ind] = train_image_dict[cate] class_ind_ind += 1 train_image_dict = new_train_dict train_dataset = TrainDatasetsmoothap(train_image_dict, opt) val_dataset = BaseTripletDataset(val_image_dict, opt, is_validation=True) eval_dataset = BaseTripletDataset(test_image_dict, opt, is_validation=True) # train_dataset.conversion = conversion # val_dataset.conversion = conversion # eval_dataset.conversion = conversion return { "training": train_dataset, "testing": val_dataset, "evaluation": eval_dataset, } def give_inaturalist_datasets_for_features(opt): """ This function generates a training, testing and evaluation dataloader for Metric Learning on the Inaturalist 2018 dataset. For Metric Learning, training and test sets are provided by given json files. Will define a train and test split So no random shuffling of classes. Args: opt: argparse.Namespace, contains all traininig-specific parameters. Returns: dict of PyTorch datasets for training, testing and evaluation. """ # Load text-files containing classes and imagepaths. # Generate image_dicts of shape {class_idx:[list of paths to images belong to this class] ...} train_image_dict, test_image_dict, eval_image_dict = {}, {}, {} if opt.iter > 0: with open(os.path.join(opt.cluster_path), "rb") as clusterf: ( path2idx, global_features, global_pred_labels, gt_labels, masks, ) = pickle.load(clusterf) gt_labels = gt_labels + len(np.unique(global_pred_labels)) for path, idx in path2idx.items(): if global_pred_labels[idx] == -1: if "te/%d" % gt_labels[idx] not in test_image_dict: test_image_dict["te/%d" % gt_labels[idx]] = [] test_image_dict["te/%d" % gt_labels[idx]].append(path) else: if ( "te/%d" % global_pred_labels[idx] not in train_image_dict ): train_image_dict["te/%d" % global_pred_labels[idx]] = [] train_image_dict["te/%d" % global_pred_labels[idx]].append( path ) if "te/%d" % global_pred_labels[idx] not in test_image_dict: test_image_dict["te/%d" % global_pred_labels[idx]] = [] test_image_dict["te/%d" % global_pred_labels[idx]].append( path ) else: with open(os.path.join(opt.source_path, opt.trainset)) as f: FileLines = f.readlines() FileLines = [x.strip() for x in FileLines] for entry in FileLines: info = entry.split("/") if "/".join([info[-3], info[-2]]) not in train_image_dict: train_image_dict["/".join([info[-3], info[-2]])] = [] train_image_dict["/".join([info[-3], info[-2]])].append( os.path.join(opt.source_path, entry) ) with open(os.path.join(opt.source_path, opt.all_trainset)) as f: FileLines = f.readlines() FileLines = [x.strip() for x in FileLines] for entry in FileLines: info = entry.split("/") if "/".join([info[-3], info[-2]]) not in test_image_dict: test_image_dict["/".join([info[-3], info[-2]])] = [] test_image_dict["/".join([info[-3], info[-2]])].append( os.path.join(opt.source_path, entry) ) with open(os.path.join(opt.source_path, opt.testset)) as f: FileLines = f.readlines() FileLines = [x.strip() for x in FileLines] for entry in FileLines: info = entry.split("/") if "/".join([info[-3], info[-2]]) not in eval_image_dict: eval_image_dict["/".join([info[-3], info[-2]])] = [] eval_image_dict["/".join([info[-3], info[-2]])].append( os.path.join(opt.source_path, entry) ) new_train_dict = {} class_ind_ind = 0 for cate in train_image_dict: new_train_dict["te/%d" % class_ind_ind] = train_image_dict[cate] class_ind_ind += 1 train_image_dict = new_train_dict new_test_dict = {} class_ind_ind = 0 for cate in test_image_dict: new_test_dict["te/%d" % class_ind_ind] = test_image_dict[cate] class_ind_ind += 1 test_image_dict = new_test_dict new_eval_dict = {} class_ind_ind = 0 for cate in eval_image_dict: new_eval_dict["te/%d" % class_ind_ind] = eval_image_dict[cate] class_ind_ind += 1 eval_image_dict = new_eval_dict train_dataset = BaseTripletDataset( train_image_dict, opt, is_validation=True ) test_dataset = BaseTripletDataset(test_image_dict, opt, is_validation=True) eval_dataset = BaseTripletDataset(eval_image_dict, opt, is_validation=True) # train_dataset.conversion = conversion # val_dataset.conversion = conversion # eval_dataset.conversion = conversion return { "training": train_dataset, "testing": test_dataset, "eval": eval_dataset, } def give_inat_datasets_finetune_1head(testset, cluster_label_path, opt): """ This function generates a training, testing and evaluation dataloader for Metric Learning on the Inaturalist 2018 dataset. For Metric Learning, training and test sets are provided by given json files. Will define a train and test split So no random shuffling of classes. Args: opt: argparse.Namespace, contains all traininig-specific parameters. Returns: dict of PyTorch datasets for training, testing and evaluation. """ # Load cluster labels from hilander results. import pickle train_image_dict, val_image_dict, cluster_image_dict = {}, {}, {} with open(cluster_label_path, "rb") as clusterf: ( path2idx, global_features, global_pred_labels, gt_labels, masks, ) = pickle.load(clusterf) for path, idx in path2idx.items(): if global_pred_labels[idx] == -1: continue else: if "te/%d" % global_pred_labels[idx] not in train_image_dict: train_image_dict["te/%d" % global_pred_labels[idx]] = [] train_image_dict["te/%d" % global_pred_labels[idx]].append(path) with open(os.path.join(opt.source_path, testset)) as f: FileLines = f.readlines() FileLines = [x.strip() for x in FileLines] for entry in FileLines: info = entry.split("/") if "/".join([info[-3], info[-2]]) not in val_image_dict: val_image_dict["/".join([info[-3], info[-2]])] = [] val_image_dict["/".join([info[-3], info[-2]])].append( os.path.join(opt.source_path, entry) ) train_dataset = TrainDatasetsmoothap(train_image_dict, opt) val_dataset = BaseTripletDataset(val_image_dict, opt, is_validation=True) # train_dataset.conversion = conversion # val_dataset.conversion = conversion # eval_dataset.conversion = conversion return { "training": train_dataset, "testing": val_dataset, "evaluation": val_dataset, } ################## BASIC PYTORCH DATASET USED FOR ALL DATASETS ################################## class BaseTripletDataset(Dataset): """ Dataset class to provide (augmented) correctly prepared training samples corresponding to standard DML literature. This includes normalizing to ImageNet-standards, and Random & Resized cropping of shapes 224 for ResNet50 and 227 for GoogLeNet during Training. During validation, only resizing to 256 or center cropping to 224/227 is performed. """ def __init__( self, image_dict, opt, samples_per_class=8, is_validation=False ): """ Dataset Init-Function. Args: image_dict: dict, Dictionary of shape {class_idx:[list of paths to images belong to this class] ...} providing all the training paths and classes. opt: argparse.Namespace, contains all training-specific parameters. samples_per_class: Number of samples to draw from one class before moving to the next when filling the batch. is_validation: If is true, dataset properties for validation/testing are used instead of ones for training. Returns: Nothing! """ # Define length of dataset self.n_files = np.sum( [len(image_dict[key]) for key in image_dict.keys()] ) self.is_validation = is_validation self.pars = opt self.image_dict = image_dict self.avail_classes = sorted(list(self.image_dict.keys())) # Convert image dictionary from classname:content to class_idx:content, because the initial indices are not necessarily from 0 - . self.image_dict = { i: self.image_dict[key] for i, key in enumerate(self.avail_classes) } self.avail_classes = sorted(list(self.image_dict.keys())) # Init. properties that are used when filling up batches. if not self.is_validation: self.samples_per_class = samples_per_class # Select current class to sample images from up to self.current_class = np.random.randint(len(self.avail_classes)) self.classes_visited = [self.current_class, self.current_class] self.n_samples_drawn = 0 # Data augmentation/processing methods. normalize = transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] ) transf_list = [] if not self.is_validation: transf_list.extend( [ transforms.RandomResizedCrop(size=224) if opt.arch == "resnet50" else transforms.RandomResizedCrop(size=227), transforms.RandomHorizontalFlip(0.5), ] ) else: transf_list.extend( [ transforms.Resize(256), transforms.CenterCrop(224) if opt.arch == "resnet50" else transforms.CenterCrop(227), ] ) transf_list.extend([transforms.ToTensor(), normalize]) self.transform = transforms.Compose(transf_list) # Convert Image-Dict to list of (image_path, image_class). Allows for easier direct sampling. self.image_list = [ [(x, key) for x in self.image_dict[key]] for key in self.image_dict.keys() ] self.image_list = [x for y in self.image_list for x in y] # Flag that denotes if dataset is called for the first time. self.is_init = True def ensure_3dim(self, img): """ Function that ensures that the input img is three-dimensional. Args: img: PIL.Image, image which is to be checked for three-dimensionality (i.e. if some images are black-and-white in an otherwise coloured dataset). Returns: Checked PIL.Image img. """ if len(img.size) == 2: img = img.convert("RGB") return img def __getitem__(self, idx): """ Args: idx: Sample idx for training sample Returns: tuple of form (sample_class, torch.Tensor() of input image) """ if self.pars.loss == "smoothap" or self.pars.loss == "smoothap_element": if self.is_init: # self.current_class = self.avail_classes[idx%len(self.avail_classes)] self.is_init = False if not self.is_validation: if self.samples_per_class == 1: return self.image_list[idx][-1], self.transform( self.ensure_3dim(Image.open(self.image_list[idx][0])) ) if self.n_samples_drawn == self.samples_per_class: # Once enough samples per class have been drawn, we choose another class to draw samples from. # Note that we ensure with self.classes_visited that no class is chosen if it had been chosen # previously or one before that. counter = copy.deepcopy(self.avail_classes) for prev_class in self.classes_visited: if prev_class in counter: counter.remove(prev_class) self.current_class = counter[idx % len(counter)] # self.classes_visited = self.classes_visited[1:]+[self.current_class] # EDIT -> there can be no class repeats self.classes_visited = self.classes_visited + [ self.current_class ] self.n_samples_drawn = 0 class_sample_idx = idx % len( self.image_dict[self.current_class] ) self.n_samples_drawn += 1 out_img = self.transform( self.ensure_3dim( Image.open( self.image_dict[self.current_class][ class_sample_idx ] ) ) ) return self.current_class, out_img else: return self.image_list[idx][-1], self.transform( self.ensure_3dim(Image.open(self.image_list[idx][0])) ) else: if self.is_init: self.current_class = self.avail_classes[ idx % len(self.avail_classes) ] self.is_init = False if not self.is_validation: if self.samples_per_class == 1: return self.image_list[idx][-1], self.transform( self.ensure_3dim(Image.open(self.image_list[idx][0])) ) if self.n_samples_drawn == self.samples_per_class: # Once enough samples per class have been drawn, we choose another class to draw samples from. # Note that we ensure with self.classes_visited that no class is chosen if it had been chosen # previously or one before that. counter = copy.deepcopy(self.avail_classes) for prev_class in self.classes_visited: if prev_class in counter: counter.remove(prev_class) self.current_class = counter[idx % len(counter)] self.classes_visited = self.classes_visited[1:] + [ self.current_class ] self.n_samples_drawn = 0 class_sample_idx = idx % len( self.image_dict[self.current_class] ) self.n_samples_drawn += 1 out_img = self.transform( self.ensure_3dim( Image.open( self.image_dict[self.current_class][ class_sample_idx ] ) ) ) return self.current_class, out_img else: return self.image_list[idx][-1], self.transform( self.ensure_3dim(Image.open(self.image_list[idx][0])) ) def __len__(self): return self.n_files flatten = lambda l: [item for sublist in l for item in sublist] ######################## dataset for SmoothAP regular training ################################## class TrainDatasetsmoothap(Dataset): """ This dataset class allows mini-batch formation pre-epoch, for greater speed """ def __init__(self, image_dict, opt): """ Args: image_dict: two-level dict, `super_dict[super_class_id][class_id]` gives the list of image paths having the same super-label and class label """ self.image_dict = image_dict self.dataset_name = opt.dataset self.batch_size = opt.bs self.samples_per_class = opt.samples_per_class for sub in self.image_dict: newsub = [] for instance in self.image_dict[sub]: newsub.append((sub, instance)) self.image_dict[sub] = newsub # checks # provide avail_classes self.avail_classes = [*self.image_dict] # Data augmentation/processing methods. normalize = transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] ) transf_list = [] transf_list.extend( [ transforms.RandomResizedCrop(size=224) if opt.arch in ["resnet50", "resnet50_mcn"] else transforms.RandomResizedCrop(size=227), transforms.RandomHorizontalFlip(0.5), ] ) transf_list.extend([transforms.ToTensor(), normalize]) self.transform = transforms.Compose(transf_list) self.reshuffle() def ensure_3dim(self, img): if len(img.size) == 2: img = img.convert("RGB") return img def reshuffle(self): image_dict = copy.deepcopy(self.image_dict) print("shuffling data") for sub in image_dict: random.shuffle(image_dict[sub]) classes = [*image_dict] random.shuffle(classes) total_batches = [] batch = [] finished = 0 while finished == 0: for sub_class in classes: if (len(image_dict[sub_class]) >= self.samples_per_class) and ( len(batch) < self.batch_size / self.samples_per_class ): batch.append( image_dict[sub_class][: self.samples_per_class] ) image_dict[sub_class] = image_dict[sub_class][ self.samples_per_class : ] if len(batch) == self.batch_size / self.samples_per_class: total_batches.append(batch) batch = [] else: finished = 1 random.shuffle(total_batches) self.dataset = flatten(flatten(total_batches)) def __getitem__(self, idx): # we use SequentialSampler together with SuperLabelTrainDataset, # so idx==0 indicates the start of a new epoch batch_item = self.dataset[idx] if self.dataset_name == "Inaturalist": cls = int(batch_item[0].split("/")[1]) else: cls = batch_item[0] img = Image.open(batch_item[1]) return cls, self.transform(self.ensure_3dim(img)) def __len__(self): return len(self.dataset) class TrainDatasetsmoothap1Head(Dataset): """ This dataset class allows mini-batch formation pre-epoch, for greater speed """ def __init__(self, image_dict_L, image_dict_U, opt): """ Args: image_dict: two-level dict, `super_dict[super_class_id][class_id]` gives the list of image paths having the same super-label and class label """ self.image_dict_L = image_dict_L self.image_dict_U = image_dict_U self.dataset_name = opt.dataset self.batch_size = opt.bs self.samples_per_class = opt.samples_per_class for sub_L in self.image_dict_L: newsub_L = [] for instance in self.image_dict_L[sub_L]: newsub_L.append((sub_L, instance)) self.image_dict_L[sub_L] = newsub_L for sub_U in self.image_dict_U: newsub_U = [] for instance in self.image_dict_U[sub_U]: newsub_U.append((sub_U, instance)) self.image_dict_U[sub_U] = newsub_U # checks # provide avail_classes self.avail_classes = [*self.image_dict_L] + [*self.image_dict_U] # Data augmentation/processing methods. normalize = transforms.Normalize( mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] ) transf_list = [] transf_list.extend( [ transforms.RandomResizedCrop(size=224) if opt.arch in ["resnet50", "resnet50_mcn"] else transforms.RandomResizedCrop(size=227), transforms.RandomHorizontalFlip(0.5), ] ) transf_list.extend([transforms.ToTensor(), normalize]) self.transform = transforms.Compose(transf_list) self.reshuffle() def sample_same_size(self): image_dict = copy.deepcopy(self.image_dict_L) L_size = 0 for sub_L in self.image_dict_L: L_size += len(self.image_dict_L[sub_L]) U_size = 0 classes_U = [*self.image_dict_U] # while U_size < len(list(self.image_dict_U)) and U_size < L_size: while len(classes_U) != 0: sub_U = random.choice(classes_U) classes_U.remove(sub_U) sub_U_size = len(self.image_dict_U[sub_U]) if sub_U in [*image_dict]: image_dict[sub_U].extend(self.image_dict_U[sub_U]) else: image_dict[sub_U] = self.image_dict_U[sub_U] U_size += sub_U_size return image_dict def ensure_3dim(self, img): if len(img.size) == 2: img = img.convert("RGB") return img def reshuffle(self): image_dict = self.sample_same_size() print("shuffling data") for sub in image_dict: random.shuffle(image_dict[sub]) classes = [*image_dict] random.shuffle(classes) total_batches = [] batch = [] finished = 0 while finished == 0: for sub_class in classes: if (len(image_dict[sub_class]) >= self.samples_per_class) and ( len(batch) < self.batch_size / self.samples_per_class ): batch.append( image_dict[sub_class][: self.samples_per_class] ) image_dict[sub_class] = image_dict[sub_class][ self.samples_per_class : ] if len(batch) == self.batch_size / self.samples_per_class: total_batches.append(batch) batch = [] else: finished = 1 random.shuffle(total_batches) self.dataset = flatten(flatten(total_batches)) def __getitem__(self, idx): # we use SequentialSampler together with SuperLabelTrainDataset, # so idx==0 indicates the start of a new epoch batch_item = self.dataset[idx] if self.dataset_name == "Inaturalist": cls = int(batch_item[0].split("/")[1]) else: cls = batch_item[0] img = Image.open(str(batch_item[1])) return cls, self.transform(self.ensure_3dim(img)) def __len__(self): return len(self.dataset) ================================================ FILE: examples/pytorch/hilander/PSS/Smooth_AP/src/evaluate.py ================================================ # repo originally forked from https://github.com/Confusezius/Deep-Metric-Learning-Baselines ##################################### LIBRARIES ########################################### import warnings warnings.filterwarnings("ignore") import csv import pickle as pkl import time import auxiliaries as aux import matplotlib.pyplot as plt import numpy as np import torch import torch.multiprocessing import torch.nn as nn from scipy.spatial import distance from sklearn.preprocessing import normalize from tqdm import tqdm torch.multiprocessing.set_sharing_strategy("file_system") """==================================================================================================================""" """==================================================================================================================""" """=========================================================""" def evaluate(dataset, LOG, **kwargs): """ Given a dataset name, applies the correct evaluation function. Args: dataset: str, name of dataset. LOG: aux.LOGGER instance, main logging class. **kwargs: Input Argument Dict, depends on dataset. Returns: (optional) Computed metrics. Are normally written directly to LOG and printed. """ if dataset in ["Inaturalist", "semi_fungi"]: ret = evaluate_one_dataset(LOG, **kwargs) elif dataset in ["vehicle_id"]: ret = evaluate_multiple_datasets(LOG, **kwargs) else: raise Exception("No implementation for dataset {} available!") return ret """=========================================================""" class DistanceMeasure: """ Container class to run and log the change of distance ratios between intra-class distances and inter-class distances. """ def __init__(self, checkdata, opt, name="Train", update_epochs=1): """ Args: checkdata: PyTorch DataLoader, data to check distance progression. opt: argparse.Namespace, contains all training-specific parameters. name: str, Name of instance. Important for savenames. update_epochs: int, Only compute distance ratios every said epoch. Returns: Nothing! """ self.update_epochs = update_epochs self.pars = opt self.save_path = opt.save_path self.name = name self.csv_file = opt.save_path + "/distance_measures_{}.csv".format( self.name ) with open(self.csv_file, "a") as csv_file: writer = csv.writer(csv_file, delimiter=",") writer.writerow(["Rel. Intra/Inter Distance"]) self.checkdata = checkdata self.mean_class_dists = [] self.epochs = [] def measure(self, model, epoch): """ Compute distance ratios of intra- and interclass distance. Args: model: PyTorch Network, network that produces the resp. embeddings. epoch: Current epoch. Returns: Nothing! """ if epoch % self.update_epochs: return self.epochs.append(epoch) torch.cuda.empty_cache() _ = model.eval() # Compute Embeddings with torch.no_grad(): feature_coll, target_coll = [], [] data_iter = tqdm( self.checkdata, desc="Estimating Data Distances..." ) for idx, data in enumerate(data_iter): input_img, target = data[1], data[0] features = model(input_img.to(self.pars.device)) feature_coll.extend(features.cpu().detach().numpy().tolist()) target_coll.extend(target.numpy().tolist()) feature_coll = np.vstack(feature_coll).astype("float32") target_coll = np.hstack(target_coll).reshape(-1) avail_labels = np.unique(target_coll) # Compute indixes of embeddings for each class. class_positions = [] for lab in avail_labels: class_positions.append(np.where(target_coll == lab)[0]) # Compute average intra-class distance and center of mass. com_class, dists_class = [], [] for class_pos in class_positions: dists = distance.cdist( feature_coll[class_pos], feature_coll[class_pos], "cosine" ) dists = np.sum(dists) / (len(dists) ** 2 - len(dists)) # dists = np.linalg.norm(np.std(feature_coll_aux[class_pos],axis=0).reshape(1,-1)).reshape(-1) com = normalize( np.mean(feature_coll[class_pos], axis=0).reshape(1, -1) ).reshape(-1) dists_class.append(dists) com_class.append(com) # Compute mean inter-class distances by the class-coms. mean_inter_dist = distance.cdist( np.array(com_class), np.array(com_class), "cosine" ) mean_inter_dist = np.sum(mean_inter_dist) / ( len(mean_inter_dist) ** 2 - len(mean_inter_dist) ) # Compute distance ratio mean_class_dist = np.mean(np.array(dists_class) / mean_inter_dist) self.mean_class_dists.append(mean_class_dist) self.update(mean_class_dist) def update(self, mean_class_dist): """ Update Loggers. Args: mean_class_dist: float, Distance Ratio Returns: Nothing! """ self.update_csv(mean_class_dist) self.update_plot() def update_csv(self, mean_class_dist): """ Update CSV. Args: mean_class_dist: float, Distance Ratio Returns: Nothing! """ with open(self.csv_file, "a") as csv_file: writer = csv.writer(csv_file, delimiter=",") writer.writerow([mean_class_dist]) def update_plot(self): """ Update progression plot. Args: None. Returns: Nothing! """ plt.style.use("ggplot") f, ax = plt.subplots(1) ax.set_title("Mean Intra- over Interclassdistances") ax.plot(self.epochs, self.mean_class_dists, label="Class") f.legend() f.set_size_inches(15, 8) f.savefig( self.save_path + "/distance_measures_{}.svg".format(self.name) ) class GradientMeasure: """ Container for gradient measure functionalities. Measure the gradients coming from the embedding layer to the final conv. layer to examine learning signal. """ def __init__(self, opt, name="class-it"): """ Args: opt: argparse.Namespace, contains all training-specific parameters. name: Name of class instance. Important for the savename. Returns: Nothing! """ self.pars = opt self.name = name self.saver = { "grad_normal_mean": [], "grad_normal_std": [], "grad_abs_mean": [], "grad_abs_std": [], } def include(self, params): """ Include the gradients for a set of parameters, normally the final embedding layer. Args: params: PyTorch Network layer after .backward() was called. Returns: Nothing! """ gradients = [params.weight.grad.detach().cpu().numpy()] for grad in gradients: ### Shape: 128 x 2048 self.saver["grad_normal_mean"].append(np.mean(grad, axis=0)) self.saver["grad_normal_std"].append(np.std(grad, axis=0)) self.saver["grad_abs_mean"].append(np.mean(np.abs(grad), axis=0)) self.saver["grad_abs_std"].append(np.std(np.abs(grad), axis=0)) def dump(self, epoch): """ Append all gradients to a pickle file. Args: epoch: Current epoch Returns: Nothing! """ with open( self.pars.save_path + "/grad_dict_{}.pkl".format(self.name), "ab" ) as f: pkl.dump([self.saver], f) self.saver = { "grad_normal_mean": [], "grad_normal_std": [], "grad_abs_mean": [], "grad_abs_std": [], } """=========================================================""" def evaluate_one_dataset( LOG, dataloader, model, opt, save=True, give_return=True, epoch=0 ): """ Compute evaluation metrics, update LOGGER and print results. Args: LOG: aux.LOGGER-instance. Main Logging Functionality. dataloader: PyTorch Dataloader, Testdata to be evaluated. model: PyTorch Network, Network to evaluate. opt: argparse.Namespace, contains all training-specific parameters. save: bool, if True, Checkpoints are saved when testing metrics (specifically Recall @ 1) improve. give_return: bool, if True, return computed metrics. epoch: int, current epoch, required for logger. Returns: (optional) Computed metrics. Are normally written directly to LOG and printed. """ start = time.time() image_paths = np.array(dataloader.dataset.image_list) with torch.no_grad(): # Compute Metrics ( F1, NMI, recall_at_ks, feature_matrix_all, ) = aux.eval_metrics_one_dataset( model, dataloader, device=opt.device, k_vals=opt.k_vals, opt=opt ) # Make printable summary string. result_str = ", ".join( "@{0}: {1:.4f}".format(k, rec) for k, rec in zip(opt.k_vals, recall_at_ks) ) result_str = "Epoch (Test) {0}: NMI [{1:.4f}] | F1 [{2:.4f}] | Recall [{3}]".format( epoch, NMI, F1, result_str ) if LOG is not None: if save: if not len( LOG.progress_saver["val"]["Recall @ 1"] ) or recall_at_ks[0] > np.max( LOG.progress_saver["val"]["Recall @ 1"] ): # Save Checkpoint print( "Set checkpoint at {}.".format( LOG.prop.save_path + "/checkpoint_{}.pth.tar".format(opt.iter) ) ) aux.set_checkpoint( model, opt, LOG.progress_saver, LOG.prop.save_path + "/checkpoint_{}.pth.tar".format(opt.iter), ) # aux.recover_closest_one_dataset(feature_matrix_all, image_paths, LOG.prop.save_path+'/sample_recoveries.png') # Update logs. LOG.log( "val", LOG.metrics_to_log["val"], [epoch, np.round(time.time() - start), NMI, F1] + recall_at_ks, ) print(result_str) if give_return: return recall_at_ks, NMI, F1 else: None """=========================================================""" def evaluate_query_and_gallery_dataset( LOG, query_dataloader, gallery_dataloader, model, opt, save=True, give_return=True, epoch=0, ): """ Compute evaluation metrics, update LOGGER and print results, specifically for In-Shop Clothes. Args: LOG: aux.LOGGER-instance. Main Logging Functionality. query_dataloader: PyTorch Dataloader, Query-testdata to be evaluated. gallery_dataloader: PyTorch Dataloader, Gallery-testdata to be evaluated. model: PyTorch Network, Network to evaluate. opt: argparse.Namespace, contains all training-specific parameters. save: bool, if True, Checkpoints are saved when testing metrics (specifically Recall @ 1) improve. give_return: bool, if True, return computed metrics. epoch: int, current epoch, required for logger. Returns: (optional) Computed metrics. Are normally written directly to LOG and printed. """ start = time.time() query_image_paths = np.array( [x[0] for x in query_dataloader.dataset.image_list] ) gallery_image_paths = np.array( [x[0] for x in gallery_dataloader.dataset.image_list] ) with torch.no_grad(): # Compute Metri cs. ( F1, NMI, recall_at_ks, query_feature_matrix_all, gallery_feature_matrix_all, ) = aux.eval_metrics_query_and_gallery_dataset( model, query_dataloader, gallery_dataloader, device=opt.device, k_vals=opt.k_vals, opt=opt, ) # Generate printable summary string. result_str = ", ".join( "@{0}: {1:.4f}".format(k, rec) for k, rec in zip(opt.k_vals, recall_at_ks) ) result_str = "Epoch (Test) {0}: NMI [{1:.4f}] | F1 [{2:.4f}] | Recall [{3}]".format( epoch, NMI, F1, result_str ) if LOG is not None: if save: if not len( LOG.progress_saver["val"]["Recall @ 1"] ) or recall_at_ks[0] > np.max( LOG.progress_saver["val"]["Recall @ 1"] ): # Save Checkpoint aux.set_checkpoint( model, opt, LOG.progress_saver, LOG.prop.save_path + "/checkpoint.pth.tar", ) aux.recover_closest_inshop( query_feature_matrix_all, gallery_feature_matrix_all, query_image_paths, gallery_image_paths, LOG.prop.save_path + "/sample_recoveries.png", ) # Update logs. LOG.log( "val", LOG.metrics_to_log["val"], [epoch, np.round(time.time() - start), NMI, F1] + recall_at_ks, ) print(result_str) if give_return: return recall_at_ks, NMI, F1 else: None """=========================================================""" def evaluate_multiple_datasets( LOG, dataloaders, model, opt, save=True, give_return=True, epoch=0 ): """ Compute evaluation metrics, update LOGGER and print results, specifically for Multi-test datasets s.a. PKU Vehicle ID. Args: LOG: aux.LOGGER-instance. Main Logging Functionality. dataloaders: List of PyTorch Dataloaders, test-dataloaders to evaluate. model: PyTorch Network, Network to evaluate. opt: argparse.Namespace, contains all training-specific parameters. sa ve: bool, if True, Checkpoints are saved when testing metrics (specifically Recall @ 1) improve. give_return: bool, i f True, return computed metrics. epoch: int, current epoch, required for logger. Returns : (optional) Computed metrics. Are normally written directly to LOG and printed. """ start = time.time() csv_data = [epoch] with torch.no_grad(): for i, dataloader in enumerate(dataloaders): print("Working on Set {}/{}".format(i + 1, len(dataloaders))) image_paths = np.array(dataloader.dataset.image_list) # Compute Metrics for specific testset. ( F1, NMI, recall_at_ks, feature_matrix_all, ) = aux.eval_metrics_one_dataset( model, dataloader, device=opt.device, k_vals=opt.k_vals, opt=opt ) # Generate printable summary string. result_str = ", ".join( "@{0}: {1:.4f}".format(k, rec) for k, rec in zip(opt.k_vals, recall_at_ks) ) result_str = "SET {0}: Epoch (Test) {1}: NMI [{2:.4f}] | F1 {3:.4f}| Recall [{4}]".format( i + 1, epoch, NMI, F1, result_str ) if LOG is not None: if save: if not len( LOG.progress_saver["val"]["Set {} Recall @ 1".format(i)] ) or recall_at_ks[0] > np.max( LOG.progress_saver["val"]["Set {} Recall @ 1".format(i)] ): # Save Checkpoint for specific test set. aux.set_checkpoint( model, opt, LOG.progress_saver, LOG.prop.save_path + "/checkpoint_set{}.pth.tar".format(i + 1), ) aux.recover_closest_one_dataset( feature_matrix_all, image_paths, LOG.prop.save_path + "/sample_recoveries_set{}.png".format(i + 1), ) csv_data += [NMI, F1] + recall_at_ks print(result_str) csv_data.insert(0, np.round(time.time() - start)) # Update logs. LOG.log("val", LOG.metrics_to_log["val"], csv_data) # if give_return: return csv_data[2:] # else: # None ================================================ FILE: examples/pytorch/hilander/PSS/Smooth_AP/src/evaluate_model.py ================================================ import argparse import os import auxiliaries as aux import datasets as data import evaluate as eval import netlib as netlib import torch if __name__ == "__main__": ################## INPUT ARGUMENTS ################### parser = argparse.ArgumentParser() ####### Main Parameter: Dataset to use for Training parser.add_argument( "--dataset", default="vehicle_id", type=str, help="Dataset to use.", choices=["Inaturalist", "vehicle_id"], ) parser.add_argument( "--source_path", default="/scratch/shared/beegfs/abrown/datasets", type=str, help="Path to training data.", ) parser.add_argument( "--save_path", default=os.getcwd() + "/Training_Results", type=str, help="Where to save everything.", ) parser.add_argument( "--savename", default="", type=str, help="Save folder name if any special information is to be included.", ) ### General Training Parameters parser.add_argument( "--kernels", default=8, type=int, help="Number of workers for pytorch dataloader.", ) parser.add_argument( "--bs", default=112, type=int, help="Mini-Batchsize to use." ) parser.add_argument( "--samples_per_class", default=4, type=int, help="Number of samples in one class drawn before choosing the next class. Set to >1 for losses other than ProxyNCA.", ) parser.add_argument("--loss", default="smoothap", type=str) ##### Evaluation Settings parser.add_argument( "--k_vals", nargs="+", default=[1, 2, 4, 8], type=int, help="Recall @ Values.", ) ##### Network parameters parser.add_argument( "--embed_dim", default=512, type=int, help="Embedding dimensionality of the network. Note: in literature, dim=128 is used for ResNet50 and dim=512 for GoogLeNet.", ) parser.add_argument( "--arch", default="resnet50", type=str, help="Network backend choice: resnet50, googlenet, BNinception", ) parser.add_argument( "--gpu", default=0, type=int, help="GPU-id for GPU to use." ) parser.add_argument( "--resume", default="", type=str, help="path to where weights to be evaluated are saved.", ) parser.add_argument( "--not_pretrained", action="store_true", help="If added, the network will be trained WITHOUT ImageNet-pretrained weights.", ) parser.add_argument("--trainset", default="lin_train_set1.txt", type=str) parser.add_argument( "--testset", default="Inaturalist_test_set1.txt", type=str ) parser.add_argument("--cluster_path", default="", type=str) parser.add_argument("--finetune", default="false", type=str) parser.add_argument("--class_num", default=948, type=int) parser.add_argument("--get_features", default="false", type=str) parser.add_argument( "--patch_size", default=16, type=int, help="vit patch size" ) parser.add_argument( "--pretrained_weights", default="", type=str, help="pretrained weight path", ) parser.add_argument( "--use_bn_in_head", default=False, type=aux.bool_flag, help="Whether to use batch normalizations in projection head (Default: False)", ) parser.add_argument( "--checkpoint_key", default="teacher", type=str, help='Key to use in the checkpoint (example: "teacher")', ) parser.add_argument( "--drop_path_rate", default=0.1, type=float, help="stochastic depth rate", ) parser.add_argument( "--norm_last_layer", default=True, type=aux.bool_flag, help="""Whether or not to weight normalize the last layer of the DINO head. Not normalizing leads to better performance but can make the training unstable. In our experiments, we typically set this paramater to False with vit_small and True with vit_base.""", ) parser.add_argument( "--linsize", default=29011, type=int, help="Lin data size." ) parser.add_argument( "--uinsize", default=18403, type=int, help="Uin data size." ) opt = parser.parse_args() """============================================================================""" opt.source_path += "/" + opt.dataset if opt.dataset == "Inaturalist": opt.n_epochs = 90 opt.tau = [40, 70] opt.k_vals = [1, 4, 16, 32] if opt.dataset == "vehicle_id": opt.k_vals = [1, 5] if opt.finetune == "true": opt.finetune = True elif opt.finetune == "false": opt.finetune = False if opt.get_features == "true": opt.get_features = True elif opt.get_features == "false": opt.get_features = False metrics_to_log = aux.metrics_to_examine(opt.dataset, opt.k_vals) LOG = aux.LOGGER(opt, metrics_to_log, name="Base", start_new=True) """============================================================================""" ##################### NETWORK SETUP ################## opt.device = torch.device("cuda") model = netlib.networkselect(opt) # Push to Device _ = model.to(opt.device) """============================================================================""" #################### DATALOADER SETUPS ################## # Returns a dictionary containing 'training', 'testing', and 'evaluation' dataloaders. # The 'testing'-dataloader corresponds to the validation set, and the 'evaluation'-dataloader # Is simply using the training set, however running under the same rules as 'testing' dataloader, # i.e. no shuffling and no random cropping. dataloaders = data.give_dataloaders( opt.dataset, opt.trainset, opt.testset, opt ) # Because the number of supervised classes is dataset dependent, we store them after # initializing the dataloader opt.num_classes = len(dataloaders["training"].dataset.avail_classes) if opt.dataset == "Inaturalist": eval_params = { "dataloader": dataloaders["testing"], "model": model, "opt": opt, "epoch": 0, } elif opt.dataset == "vehicle_id": eval_params = { "dataloaders": [ dataloaders["testing_set1"], dataloaders["testing_set2"], dataloaders["testing_set3"], ], "model": model, "opt": opt, "epoch": 0, } """============================================================================""" ####################evaluation ################## results = eval.evaluate(opt.dataset, LOG, save=True, **eval_params) ================================================ FILE: examples/pytorch/hilander/PSS/Smooth_AP/src/finetune_1head.py ================================================ # repo originally forked from https://github.com/Confusezius/Deep-Metric-Learning-Baselines """to do: clean all of the files - particularly the main.py and also the losses and dataset files and the file for doing the dataloading -- fast loading etc need to change all of the copyrights at the top of all of the files """ #################### LIBRARIES ######################## import warnings warnings.filterwarnings("ignore") import argparse import datetime import os import random import matplotlib import numpy as np os.chdir(os.path.dirname(os.path.realpath(__file__))) from pathlib import Path matplotlib.use("agg") import auxiliaries as aux import datasets as data import evaluate as eval import losses as losses import netlib as netlib import torch.multiprocessing from tensorboardX import SummaryWriter from tqdm import tqdm torch.multiprocessing.set_sharing_strategy("file_system") import time start = time.time() ################### INPUT ARGUMENTS ################### parser = argparse.ArgumentParser() ####### Main Parameter: Dataset to use for Training parser.add_argument( "--dataset", default="Inaturalist", type=str, help="Dataset to use.", choices=["Inaturalist", "semi_fungi"], ) ### General Training Parameters parser.add_argument( "--lr", default=0.00001, type=float, help="Learning Rate for network parameters.", ) parser.add_argument( "--fc_lr_mul", default=5, type=float, help="OPTIONAL: Multiply the embedding layer learning rate by this value. If set to 0, the embedding layer shares the same learning rate.", ) parser.add_argument( "--n_epochs", default=400, type=int, help="Number of training epochs." ) parser.add_argument( "--kernels", default=8, type=int, help="Number of workers for pytorch dataloader.", ) parser.add_argument( "--bs", default=112, type=int, help="Mini-Batchsize to use." ) parser.add_argument( "--samples_per_class", default=4, type=int, help="Number of samples in one class drawn before choosing the next class", ) parser.add_argument( "--seed", default=1, type=int, help="Random seed for reproducibility." ) parser.add_argument( "--scheduler", default="step", type=str, help="Type of learning rate scheduling. Currently: step & exp.", ) parser.add_argument( "--gamma", default=0.3, type=float, help="Learning rate reduction after tau epochs.", ) parser.add_argument( "--decay", default=0.001, type=float, help="Weight decay for optimizer." ) parser.add_argument( "--tau", default=[200, 300], nargs="+", type=int, help="Stepsize(s) before reducing learning rate.", ) parser.add_argument( "--infrequent_eval", default=0, type=int, help="only compute evaluation metrics every 10 epochs", ) parser.add_argument("--opt", default="adam", help="adam or sgd") ##### Loss-specific Settings parser.add_argument("--loss", default="smoothap", type=str) parser.add_argument( "--sigmoid_temperature", default=0.01, type=float, help="SmoothAP: the temperature of the sigmoid used in SmoothAP loss", ) ##### Evaluation Settings parser.add_argument( "--k_vals", nargs="+", default=[1, 2, 4, 8], type=int, help="Recall @ Values.", ) parser.add_argument( "--resume", default="", type=str, help="path to checkpoint to load weights from (if empty then ImageNet pre-trained weights are loaded", ) ##### Network parameters parser.add_argument( "--embed_dim", default=512, type=int, help="Embedding dimensionality of the network", ) parser.add_argument( "--arch", default="resnet50", type=str, help="Network backend choice: resnet50, googlenet, BNinception", ) parser.add_argument( "--grad_measure", action="store_true", help="If added, gradients passed from embedding layer to the last conv-layer are stored in each iteration.", ) parser.add_argument( "--dist_measure", action="store_true", help="If added, the ratio between intra- and interclass distances is stored after each epoch.", ) parser.add_argument( "--not_pretrained", action="store_true", help="If added, the network will be trained WITHOUT ImageNet-pretrained weights.", ) ##### Setup Parameters parser.add_argument("--gpu", default=0, type=int, help="GPU-id for GPU to use.") parser.add_argument( "--savename", default="", type=str, help="Save folder name if any special information is to be included.", ) ### Paths to datasets and storage folder parser.add_argument( "--source_path", default="/scratch/shared/beegfs/abrown/datasets", type=str, help="Path to data", ) parser.add_argument( "--save_path", default=os.getcwd() + "/Training_Results", type=str, help="Where to save the checkpoints", ) ### additional parameters parser.add_argument("--trainset", default="lin_train_set1.txt", type=str) parser.add_argument("--testset", default="Inaturalist_test_set1.txt", type=str) parser.add_argument("--cluster_path", default="", type=str) parser.add_argument("--finetune", default="true", type=str) parser.add_argument("--class_num", default=948, type=int) parser.add_argument( "--pretrained_weights", default="", type=str, help="pretrained weight path" ) parser.add_argument( "--use_bn_in_head", default=False, type=aux.bool_flag, help="Whether to use batch normalizations in projection head (Default: False)", ) parser.add_argument( "--checkpoint_key", default="teacher", type=str, help='Key to use in the checkpoint (example: "teacher")', ) parser.add_argument( "--drop_path_rate", default=0.1, type=float, help="stochastic depth rate" ) parser.add_argument("--iter", default=1, type=int) opt = parser.parse_args() """============================================================================""" opt.source_path += "/" + opt.dataset opt.save_path += "/" + opt.dataset + "_" + str(opt.embed_dim) if opt.dataset == "Inaturalist": # opt.n_epochs = 90 opt.tau = [40, 70] opt.k_vals = [1, 4, 16, 32] if opt.dataset == "semi_fungi": opt.tau = [40, 70] opt.k_vals = [1, 4, 16, 32] if opt.finetune == "true": opt.finetune = True elif opt.finetune == "false": opt.finetune = False """===========================================================================""" ################### TensorBoard Settings ################## timestamp = datetime.datetime.now().strftime(r"%Y-%m-%d_%H-%M-%S") exp_name = aux.args2exp_name(opt) opt.save_name = f"weights_{exp_name}" + "/" + timestamp random.seed(opt.seed) np.random.seed(opt.seed) torch.manual_seed(opt.seed) torch.cuda.manual_seed(opt.seed) torch.cuda.manual_seed_all(opt.seed) tensorboard_path = Path(f"logs/logs_{exp_name}") / timestamp tensorboard_path.parent.mkdir(exist_ok=True, parents=True) global writer writer = SummaryWriter(tensorboard_path) """============================================================================""" ################### GPU SETTINGS ########################### os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # os.environ["CUDA_VISIBLE_DEVICES"]= str(opt.gpu) print("using #GPUs:", torch.cuda.device_count()) """============================================================================""" #################### DATALOADER SETUPS ################## # Returns a dictionary containing 'training', 'testing', and 'evaluation' dataloaders. # The 'testing'-dataloader corresponds to the validation set, and the 'evaluation'-dataloader # Is simply using the training set, however running under the same rules as 'testing' dataloader, # i.e. no shuffling and no random cropping. dataloaders = data.give_dataloaders( opt.dataset, opt.trainset, opt.testset, opt, cluster_path=opt.cluster_path ) # Because the number of supervised classes is dataset dependent, we store them after # initializing the dataloader opt.num_classes = len(dataloaders["training"].dataset.avail_classes) print("num_classes:", opt.num_classes) print("train dataset size:", len(dataloaders["training"])) """============================================================================""" ##################### NETWORK SETUP ################## opt.device = torch.device("cuda") model = netlib.networkselect(opt) # Push to Device if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) _ = model.to(opt.device) # Place trainable parameter in list of parameters to train: if "fc_lr_mul" in vars(opt).keys() and opt.fc_lr_mul != 0: all_but_fc_params = list( filter(lambda x: "last_linear" not in x[0], model.named_parameters()) ) for ind, param in enumerate(all_but_fc_params): all_but_fc_params[ind] = param[1] if torch.cuda.device_count() > 1: fc_params = model.module.model.last_linear.parameters() else: fc_params = model.model.last_linear.parameters() to_optim = [ {"params": all_but_fc_params, "lr": opt.lr, "weight_decay": opt.decay}, { "params": fc_params, "lr": opt.lr * opt.fc_lr_mul, "weight_decay": opt.decay, }, ] else: to_optim = [ {"params": model.parameters(), "lr": opt.lr, "weight_decay": opt.decay} ] """============================================================================""" #################### CREATE LOGGING FILES ############### # Each dataset usually has a set of standard metrics to log. aux.metrics_to_examine() # returns a dict which lists metrics to log for training ('train') and validation/testing ('val') metrics_to_log = aux.metrics_to_examine(opt.dataset, opt.k_vals) # example output: {'train': ['Epochs', 'Time', 'Train Loss', 'Time'], # 'val': ['Epochs','Time','NMI','F1', 'Recall @ 1','Recall @ 2','Recall @ 4','Recall @ 8']} # Using the provided metrics of interest, we generate a LOGGER instance. # Note that 'start_new' denotes that a new folder should be made in which everything will be stored. # This includes network weights as well. LOG = aux.LOGGER(opt, metrics_to_log, name="Base", start_new=True) # If graphviz is installed on the system, a computational graph of the underlying # network will be made as well. """============================================================================""" #################### LOSS SETUP #################### # Depending on opt.loss and opt.sampling, the respective criterion is returned, # and if the loss has trainable parameters, to_optim is appended. criterion, to_optim = losses.loss_select(opt.loss, opt, to_optim) _ = criterion.to(opt.device) """============================================================================""" ##################### OPTIONAL EVALUATIONS ##################### # Store the averaged gradients returned from the embedding to the last conv. layer. if opt.grad_measure: grad_measure = eval.GradientMeasure(opt, name="baseline") # Store the relative distances between average intra- and inter-class distance. if opt.dist_measure: # Add a distance measure for training distance ratios distance_measure = eval.DistanceMeasure( dataloaders["evaluation"], opt, name="Train", update_epochs=1 ) # #If uncommented: Do the same for the test set # distance_measure_test = eval.DistanceMeasure(dataloaders['testing'], opt, name='Train', update_epochs=1) """============================================================================""" #################### OPTIM SETUP #################### # As optimizer, Adam with standard parameters is used. if opt.opt == "adam": optimizer = torch.optim.Adam(to_optim) elif opt.opt == "sgd": optimizer = torch.optim.SGD(to_optim) else: raise Exception("unknown optimiser") # for the SOA measures in the paper - need to use SGD and 0.05 learning rate # optimizer = torch.optim.Adam(to_optim) # optimizer = torch.optim.SGD(to_optim) if opt.scheduler == "exp": scheduler = torch.optim.lr_scheduler.ExponentialLR( optimizer, gamma=opt.gamma ) elif opt.scheduler == "step": scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=opt.tau, gamma=opt.gamma ) elif opt.scheduler == "none": print("No scheduling used!") else: raise Exception("No scheduling option for input: {}".format(opt.scheduler)) def same_model(model1, model2): for p1, p2 in zip(model1.parameters(), model2.parameters()): if p1.data.ne(p2.data).sum() > 0: return False return True """============================================================================""" #################### TRAINER FUNCTION ############################ def train_one_epoch_finetune( train_dataloader, model, optimizer, criterion, opt, epoch ): """ This function is called every epoch to perform training of the network over one full (randomized) iteration of the dataset. Args: train_dataloader: torch.utils.data.DataLoader, returns (augmented) training data. model: Network to train. optimizer: Optimizer to use for training. criterion: criterion to use during training. opt: argparse.Namespace, Contains all relevant parameters. epoch: int, Current epoch. Returns: Nothing! """ loss_collect = [] start = time.time() data_iterator = tqdm( train_dataloader, desc="Epoch {} Training gt labels...".format(epoch) ) for i, (class_labels, input) in enumerate(data_iterator): # Compute embeddings for input batch features = model(input.to(opt.device)) # Compute loss. if opt.loss != "smoothap": loss = criterion(features, class_labels) else: loss = criterion(features) # Ensure gradients are set to zero at beginning optimizer.zero_grad() # Compute gradient loss.backward() train_dataloader.dataset.classes_visited = [] if opt.grad_measure: # If desired, save computed gradients. grad_measure.include(model.model.last_linear) # Update weights using comp. gradients. optimizer.step() # Store loss per iteration. loss_collect.append(loss.item()) if i == len(train_dataloader) - 1: data_iterator.set_description( "Epoch (Train) {0}: Mean Loss [{1:.4f}]".format( epoch, np.mean(loss_collect) ) ) # Save metrics LOG.log( "train", LOG.metrics_to_log["train"], [epoch, np.round(time.time() - start, 4), np.mean(loss_collect)], ) writer.add_scalar("global/training_loss", np.mean(loss_collect), epoch) if opt.grad_measure: # Dump stored gradients to Pickle-File. grad_measure.dump(epoch) """============================================================================""" """========================== MAIN TRAINING PART ==============================""" """============================================================================""" ################### SCRIPT MAIN ########################## print("\n-----\n") # Each dataset requires slightly different dataloaders. if opt.dataset == "Inaturalist" or "semi_fungi": eval_params = { "dataloader": dataloaders["testing"], "model": model, "opt": opt, "epoch": 0, } # Compute Evaluation metrics, print them and store in LOG. print("epochs -> " + str(opt.n_epochs)) import time for epoch in range(opt.n_epochs): ### Print current learning rates for all parameters if opt.scheduler != "none": print( "Running with learning rates {}...".format( " | ".join("{}".format(x) for x in scheduler.get_lr()) ) ) ### Train one epoch _ = model.train() train_one_epoch_finetune( dataloaders["training"], model, optimizer, criterion, opt, epoch ) dataloaders["training"].dataset.reshuffle() ### Evaluate _ = model.eval() # Each dataset requires slightly different dataloaders. if opt.dataset == "Inaturalist": eval_params = { "dataloader": dataloaders["testing"], "model": model, "opt": opt, "epoch": epoch, } elif opt.dataset == "semi_fungi": eval_params = { "dataloader": dataloaders["testing"], "model": model, "opt": opt, "epoch": epoch, } # Compute Evaluation metrics, print them and store in LOG. if opt.infrequent_eval == 1: epoch_freq = 10 else: epoch_freq = 1 if epoch % epoch_freq == 0: results = eval.evaluate(opt.dataset, LOG, save=True, **eval_params) writer.add_scalar("global/recall1", results[0][0], epoch + 1) writer.add_scalar("global/recall2", results[0][1], epoch + 1) writer.add_scalar("global/recall3", results[0][2], epoch + 1) writer.add_scalar("global/recall4", results[0][3], epoch + 1) writer.add_scalar("global/NMI", results[1], epoch + 1) writer.add_scalar("global/F1", results[2], epoch + 1) # Update the Metric Plot and save it. # LOG.update_info_plot() # (optional) compute ratio of intra- to interdistances. if opt.dist_measure: distance_measure.measure(model, epoch) # distance_measure_test.measure(model, epoch) ### Learning Rate Scheduling Step if opt.scheduler != "none": scheduler.step() print("\n-----\n") print("Time:", time.time() - start) ================================================ FILE: examples/pytorch/hilander/PSS/Smooth_AP/src/get_features.py ================================================ # repo originally forked from https://github.com/Confusezius/Deep-Metric-Learning-Baselines """to do: clean all of the files - particularly the main.py and also the losses and dataset files and the file for doing the dataloading -- fast loading etc need to change all of the copyrights at the top of all of the files """ #################### LIBRARIES ######################## import warnings warnings.filterwarnings("ignore") import argparse import datetime import os import random import matplotlib import numpy as np os.chdir(os.path.dirname(os.path.realpath(__file__))) matplotlib.use("agg") import auxiliaries as aux import datasets as data import evaluate as eval import losses as losses import netlib as netlib import torch.multiprocessing torch.multiprocessing.set_sharing_strategy("file_system") ################### INPUT ARGUMENTS ################### parser = argparse.ArgumentParser() ####### Main Parameter: Dataset to use for Training parser.add_argument( "--dataset", default="Inaturalist", type=str, help="Dataset to use.", choices=["Inaturalist", "semi_fungi"], ) ### General Training Parameters parser.add_argument( "--lr", default=0.00001, type=float, help="Learning Rate for network parameters.", ) parser.add_argument( "--fc_lr_mul", default=5, type=float, help="OPTIONAL: Multiply the embedding layer learning rate by this value. If set to 0, the embedding layer shares the same learning rate.", ) parser.add_argument( "--n_epochs", default=400, type=int, help="Number of training epochs." ) parser.add_argument( "--kernels", default=8, type=int, help="Number of workers for pytorch dataloader.", ) parser.add_argument( "--bs", default=112, type=int, help="Mini-Batchsize to use." ) parser.add_argument( "--samples_per_class", default=4, type=int, help="Number of samples in one class drawn before choosing the next class", ) parser.add_argument( "--seed", default=1, type=int, help="Random seed for reproducibility." ) parser.add_argument( "--scheduler", default="step", type=str, help="Type of learning rate scheduling. Currently: step & exp.", ) parser.add_argument( "--gamma", default=0.3, type=float, help="Learning rate reduction after tau epochs.", ) parser.add_argument( "--decay", default=0.0004, type=float, help="Weight decay for optimizer." ) parser.add_argument( "--tau", default=[200, 300], nargs="+", type=int, help="Stepsize(s) before reducing learning rate.", ) parser.add_argument( "--infrequent_eval", default=0, type=int, help="only compute evaluation metrics every 10 epochs", ) parser.add_argument("--opt", default="adam", help="adam or sgd") ##### Loss-specific Settings parser.add_argument("--loss", default="smoothap", type=str) parser.add_argument( "--sigmoid_temperature", default=0.01, type=float, help="SmoothAP: the temperature of the sigmoid used in SmoothAP loss", ) ##### Evaluation Settings parser.add_argument( "--k_vals", nargs="+", default=[1, 2, 4, 8], type=int, help="Recall @ Values.", ) parser.add_argument( "--resume", default="", type=str, help="path to checkpoint to load weights from (if empty then ImageNet pre-trained weights are loaded", ) ##### Network parameters parser.add_argument( "--embed_dim", default=512, type=int, help="Embedding dimensionality of the network", ) parser.add_argument( "--arch", default="resnet50", type=str, help="Network backend choice: resnet50, googlenet, BNinception", ) parser.add_argument( "--grad_measure", action="store_true", help="If added, gradients passed from embedding layer to the last conv-layer are stored in each iteration.", ) parser.add_argument( "--dist_measure", action="store_true", help="If added, the ratio between intra- and interclass distances is stored after each epoch.", ) parser.add_argument( "--not_pretrained", action="store_true", help="If added, the network will be trained WITHOUT ImageNet-pretrained weights.", ) ##### Setup Parameters parser.add_argument("--gpu", default=0, type=int, help="GPU-id for GPU to use.") parser.add_argument( "--savename", default="", type=str, help="Save folder name if any special information is to be included.", ) ### Paths to datasets and storage folder parser.add_argument( "--source_path", default="/scratch/shared/beegfs/abrown/datasets", type=str, help="Path to data", ) parser.add_argument( "--save_path", default=os.getcwd() + "/Training_Results", type=str, help="Where to save the checkpoints", ) ### adational parser.add_argument("--trainset", default="lin_train_set1.txt", type=str) parser.add_argument("--all_trainset", default="train_set1.txt", type=str) parser.add_argument("--testset", default="test_set1.txt", type=str) parser.add_argument("--finetune", default="true", type=str) parser.add_argument("--cluster_path", default="", type=str) parser.add_argument("--get_features", default="false", type=str) parser.add_argument("--class_num", default=948, type=int) parser.add_argument("--iter", default=0, type=int) parser.add_argument( "--pretrained_weights", default="", type=str, help="pretrained weight path" ) parser.add_argument( "--use_bn_in_head", default=False, type=aux.bool_flag, help="Whether to use batch normalizations in projection head (Default: False)", ) parser.add_argument( "--checkpoint_key", default="teacher", type=str, help='Key to use in the checkpoint (example: "teacher")', ) parser.add_argument( "--drop_path_rate", default=0.1, type=float, help="stochastic depth rate" ) parser.add_argument("--linsize", default=29011, type=int, help="Lin data size.") parser.add_argument("--uinsize", default=18403, type=int, help="Uin data size.") opt = parser.parse_args() """============================================================================""" opt.source_path += "/" + opt.dataset opt.save_path += "/" + opt.dataset + "_" + str(opt.embed_dim) if opt.dataset == "Inaturalist": opt.n_epochs = 90 opt.tau = [40, 70] opt.k_vals = [1, 4, 16, 32] if opt.dataset == "semi_fungi": opt.tau = [40, 70] opt.k_vals = [1, 4, 16, 32] if opt.get_features == "true": opt.get_features = True if opt.get_features == "false": opt.get_features = False if opt.finetune == "true": opt.finetune = True elif opt.finetune == "false": opt.finetune = False """===========================================================================""" ################### TensorBoard Settings ################## timestamp = datetime.datetime.now().strftime(r"%Y-%m-%d_%H-%M-%S") exp_name = aux.args2exp_name(opt) opt.save_name = f"weights_{exp_name}" + "/" + timestamp random.seed(opt.seed) np.random.seed(opt.seed) torch.manual_seed(opt.seed) torch.cuda.manual_seed(opt.seed) torch.cuda.manual_seed_all(opt.seed) """============================================================================""" ################### GPU SETTINGS ########################### os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # os.environ["CUDA_VISIBLE_DEVICES"]= str(opt.gpu) print("using #GPUs:", torch.cuda.device_count()) """============================================================================""" ##################### NETWORK SETUP ################## opt.device = torch.device("cuda") model = netlib.networkselect(opt) # Push to Device if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) _ = model.to(opt.device) # Place trainable parameter in list of parameters to train: if "fc_lr_mul" in vars(opt).keys() and opt.fc_lr_mul != 0: all_but_fc_params = list( filter(lambda x: "last_linear" not in x[0], model.named_parameters()) ) for ind, param in enumerate(all_but_fc_params): all_but_fc_params[ind] = param[1] if torch.cuda.device_count() > 1: fc_params = model.module.model.last_linear.parameters() else: fc_params = model.model.last_linear.parameters() to_optim = [ {"params": all_but_fc_params, "lr": opt.lr, "weight_decay": opt.decay}, { "params": fc_params, "lr": opt.lr * opt.fc_lr_mul, "weight_decay": opt.decay, }, ] else: to_optim = [ {"params": model.parameters(), "lr": opt.lr, "weight_decay": opt.decay} ] """============================================================================""" #################### DATALOADER SETUPS ################## # Returns a dictionary containing 'training', 'testing', and 'evaluation' dataloaders. # The 'testing'-dataloader corresponds to the validation set, and the 'evaluation'-dataloader # Is simply using the training set, however running under the same rules as 'testing' dataloader, # i.e. no shuffling and no random cropping. dataloaders = data.give_dataloaders(opt.dataset, opt.trainset, opt.testset, opt) # Because the number of supervised classes is dataset dependent, we store them after # initializing the dataloader opt.num_classes = len(dataloaders["training"].dataset.avail_classes) """============================================================================""" #################### CREATE LOGGING FILES ############### # Each dataset usually has a set of standard metrics to log. aux.metrics_to_examine() # returns a dict which lists metrics to log for training ('train') and validation/testing ('val') metrics_to_log = aux.metrics_to_examine(opt.dataset, opt.k_vals) # example output: {'train': ['Epochs', 'Time', 'Train Loss', 'Time'], # 'val': ['Epochs','Time','NMI','F1', 'Recall @ 1','Recall @ 2','Recall @ 4','Recall @ 8']} # Using the provided metrics of interest, we generate a LOGGER instance. # Note that 'start_new' denotes that a new folder should be made in which everything will be stored. # This includes network weights as well. # If graphviz is installed on the system, a computational graph of the underlying # network will be made as well. """============================================================================""" #################### LOSS SETUP #################### # Depending on opt.loss and opt.sampling, the respective criterion is returned, # and if the loss has trainable parameters, to_optim is appended. LOG = aux.LOGGER(opt, metrics_to_log, name="Base", start_new=True) criterion, to_optim = losses.loss_select(opt.loss, opt, to_optim) _ = criterion.to(opt.device) """============================================================================""" ##################### OPTIONAL EVALUATIONS ##################### # Store the averaged gradients returned from the embedding to the last conv. layer. if opt.grad_measure: grad_measure = eval.GradientMeasure(opt, name="baseline") # Store the relative distances between average intra- and inter-class distance. if opt.dist_measure: # Add a distance measure for training distance ratios distance_measure = eval.DistanceMeasure( dataloaders["evaluation"], opt, name="Train", update_epochs=1 ) # #If uncommented: Do the same for the test set # distance_measure_test = eval.DistanceMeasure(dataloaders['testing'], opt, name='Train', update_epochs=1) """============================================================================""" #################### OPTIM SETUP #################### # As optimizer, Adam with standard parameters is used. if opt.opt == "adam": optimizer = torch.optim.Adam(to_optim) elif opt.opt == "sgd": optimizer = torch.optim.SGD(to_optim) else: raise Exception("unknown optimiser") # for the SOA measures in the paper - need to use SGD and 0.05 learning rate # optimizer = torch.optim.Adam(to_optim) # optimizer = torch.optim.SGD(to_optim) if opt.scheduler == "exp": scheduler = torch.optim.lr_scheduler.ExponentialLR( optimizer, gamma=opt.gamma ) elif opt.scheduler == "step": scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=opt.tau, gamma=opt.gamma ) elif opt.scheduler == "none": print("No scheduling used!") else: raise Exception("No scheduling option for input: {}".format(opt.scheduler)) def same_model(model1, model2): for p1, p2 in zip(model1.parameters(), model2.parameters()): if p1.data.ne(p2.data).sum() > 0: return False return True """============================================================================""" """================================ TESTING ===================================""" """============================================================================""" ################### SCRIPT MAIN ########################## print("\n-----\n") # Compute Evaluation metrics, print them and store in LOG. _ = model.eval() aux.vis( model, dataloaders["training"], opt.device, split="T_train_iter" + str(opt.iter) + "_" + str(opt.loss), opt=opt, ) aux.vis( model, dataloaders["testing"], opt.device, split="all_train_iter" + str(opt.iter) + "_" + str(opt.loss), opt=opt, ) aux.vis( model, dataloaders["eval"], opt.device, split="test_iter" + str(opt.iter) + "_" + str(opt.loss), opt=opt, ) # Update the Metric Plot and save it. print("\n-----\n") ================================================ FILE: examples/pytorch/hilander/PSS/Smooth_AP/src/losses.py ================================================ # repo originally forked from https://github.com/Confusezius/Deep-Metric-Learning-Baselines ###################### LIBRARIES ################################################# import warnings warnings.filterwarnings("ignore") import faiss import numpy as np import torch from scipy import sparse """=================================================================================================""" ############ LOSS SELECTION FUNCTION ##################### def loss_select(loss, opt, to_optim): """ Selection function which returns the respective criterion while appending to list of trainable parameters if required. Args: loss: str, name of loss function to return. opt: argparse.Namespace, contains all training-specific parameters. to_optim: list of trainable parameters. Is extend if loss function contains those as well. Returns: criterion (torch.nn.Module inherited), to_optim (optionally appended) """ if loss == "smoothap": loss_params = { "anneal": opt.sigmoid_temperature, "batch_size": opt.bs, "num_id": int(opt.bs / opt.samples_per_class), "feat_dims": opt.embed_dim, } criterion = SmoothAP(**loss_params) else: raise Exception("Loss {} not available!".format(loss)) return criterion, to_optim """==============================================Smooth-AP========================================""" def sigmoid(tensor, temp=1.0): """temperature controlled sigmoid takes as input a torch tensor (tensor) and passes it through a sigmoid, controlled by temperature: temp """ exponent = -tensor / temp # clamp the input tensor for stability exponent = torch.clamp(exponent, min=-50, max=50) y = 1.0 / (1.0 + torch.exp(exponent)) return y def compute_aff(x): """computes the affinity matrix between an input vector and itself""" return torch.mm(x, x.t()) class BinarizedF(torch.autograd.Function): def forward(self, inp): self.save_for_backward(inp) a = torch.ones_like(inp) b = torch.zeros_like(inp) output = torch.where(inp > 0, a, b) return output def backward(self, output_grad): (inp,) = self.saved_tensors input_abs = torch.abs(inp) ones = torch.ones_like(inp) zeros = torch.zeros_like(inp) input_grad = torch.where(input_abs > 0, ones, zeros) return input_grad class BinarizedModule(torch.nn.Module): def __init__(self): super(BinarizedModule, self).__init__() self.BF = BinarizedF() def forward(self, inp): output = self.BF(inp) return output class SmoothAP(torch.nn.Module): """PyTorch implementation of the Smooth-AP loss. implementation of the Smooth-AP loss. Takes as input the mini-batch of CNN-produced feature embeddings and returns the value of the Smooth-AP loss. The mini-batch must be formed of a defined number of classes. Each class must have the same number of instances represented in the mini-batch and must be ordered sequentially by class. e.g. the labels for a mini-batch with batch size 9, and 3 represented classes (A,B,C) must look like: labels = ( A, A, A, B, B, B, C, C, C) (the order of the classes however does not matter) For each instance in the mini-batch, the loss computes the Smooth-AP when it is used as the query and the rest of the mini-batch is used as the retrieval set. The positive set is formed of the other instances in the batch from the same class. The loss returns the average Smooth-AP across all instances in the mini-batch. Args: anneal : float the temperature of the sigmoid that is used to smooth the ranking function. A low value of the temperature results in a steep sigmoid, that tightly approximates the heaviside step function in the ranking function. batch_size : int the batch size being used during training. num_id : int the number of different classes that are represented in the batch. feat_dims : int the dimension of the input feature embeddings Shape: - Input (preds): (batch_size, feat_dims) (must be a cuda torch float tensor) - Output: scalar Examples:: >>> loss = SmoothAP(0.01, 60, 6, 256) >>> input = torch.randn(60, 256, requires_grad=True).cuda() >>> output = loss(input) >>> output.backward() """ def __init__(self, anneal, batch_size, num_id, feat_dims): """ Parameters ---------- anneal : float the temperature of the sigmoid that is used to smooth the ranking function batch_size : int the batch size being used num_id : int the number of different classes that are represented in the batch feat_dims : int the dimension of the input feature embeddings """ super(SmoothAP, self).__init__() assert batch_size % num_id == 0 self.anneal = anneal self.batch_size = batch_size self.num_id = num_id self.feat_dims = feat_dims def forward(self, preds): """Forward pass for all input predictions: preds - (batch_size x feat_dims)""" # ------ differentiable ranking of all retrieval set ------ # compute the mask which ignores the relevance score of the query to itself mask = 1.0 - torch.eye(self.batch_size) mask = mask.unsqueeze(dim=0).repeat(self.batch_size, 1, 1) # compute the relevance scores via cosine similarity of the CNN-produced embedding vectors sim_all = compute_aff(preds) sim_all_repeat = sim_all.unsqueeze(dim=1).repeat(1, self.batch_size, 1) # compute the difference matrix sim_diff = sim_all_repeat - sim_all_repeat.permute(0, 2, 1) # pass through the sigmoid sim_sg = sigmoid(sim_diff, temp=self.anneal) * mask.cuda() # compute the rankings sim_all_rk = torch.sum(sim_sg, dim=-1) + 1 # ------ differentiable ranking of only positive set in retrieval set ------ # compute the mask which only gives non-zero weights to the positive set xs = preds.view( self.num_id, int(self.batch_size / self.num_id), self.feat_dims ) pos_mask = 1.0 - torch.eye(int(self.batch_size / self.num_id)) pos_mask = ( pos_mask.unsqueeze(dim=0) .unsqueeze(dim=0) .repeat(self.num_id, int(self.batch_size / self.num_id), 1, 1) ) # compute the relevance scores sim_pos = torch.bmm(xs, xs.permute(0, 2, 1)) sim_pos_repeat = sim_pos.unsqueeze(dim=2).repeat( 1, 1, int(self.batch_size / self.num_id), 1 ) # compute the difference matrix sim_pos_diff = sim_pos_repeat - sim_pos_repeat.permute(0, 1, 3, 2) # pass through the sigmoid sim_pos_sg = sigmoid(sim_pos_diff, temp=self.anneal) * pos_mask.cuda() # compute the rankings of the positive set sim_pos_rk = torch.sum(sim_pos_sg, dim=-1) + 1 # sum the values of the Smooth-AP for all instances in the mini-batch ap = torch.zeros(1).cuda() group = int(self.batch_size / self.num_id) for ind in range(self.num_id): pos_divide = torch.sum( sim_pos_rk[ind] / ( sim_all_rk[ (ind * group) : ((ind + 1) * group), (ind * group) : ((ind + 1) * group), ] ) ) ap = ap + ((pos_divide / group) / self.batch_size) return 1 - ap ================================================ FILE: examples/pytorch/hilander/PSS/Smooth_AP/src/main.py ================================================ # repo originally forked from https://github.com/Confusezius/Deep-Metric-Learning-Baselines """to do: clean all of the files - particularly the main.py and also the losses and dataset files and the file for doing the dataloading -- fast loading etc need to change all of the copyrights at the top of all of the files """ #################### LIBRARIES ######################## import warnings warnings.filterwarnings("ignore") import argparse import datetime import os import random import matplotlib import numpy as np os.chdir(os.path.dirname(os.path.realpath(__file__))) from pathlib import Path matplotlib.use("agg") import auxiliaries as aux import datasets as data import evaluate as eval import losses as losses import netlib as netlib import torch.multiprocessing from tensorboardX import SummaryWriter from tqdm import tqdm torch.multiprocessing.set_sharing_strategy("file_system") ################### INPUT ARGUMENTS ################### parser = argparse.ArgumentParser() ####### Main Parameter: Dataset to use for Training parser.add_argument( "--dataset", default="vehicle_id", type=str, help="Dataset to use.", choices=["SoftInaturalist", "Inaturalist", "vehicle_id", "semi_fungi"], ) ### General Training Parameters parser.add_argument( "--lr", default=0.00001, type=float, help="Learning Rate for network parameters.", ) parser.add_argument( "--fc_lr_mul", default=5, type=float, help="OPTIONAL: Multiply the embedding layer learning rate by this value. If set to 0, the embedding layer shares the same learning rate.", ) parser.add_argument( "--n_epochs", default=400, type=int, help="Number of training epochs." ) parser.add_argument( "--kernels", default=8, type=int, help="Number of workers for pytorch dataloader.", ) parser.add_argument( "--bs", default=112, type=int, help="Mini-Batchsize to use." ) parser.add_argument( "--samples_per_class", default=4, type=int, help="Number of samples in one class drawn before choosing the next class", ) parser.add_argument( "--seed", default=1, type=int, help="Random seed for reproducibility." ) parser.add_argument( "--scheduler", default="step", type=str, help="Type of learning rate scheduling. Currently: step & exp.", ) parser.add_argument( "--gamma", default=0.3, type=float, help="Learning rate reduction after tau epochs.", ) parser.add_argument( "--decay", default=0.0004, type=float, help="Weight decay for optimizer." ) parser.add_argument( "--tau", default=[200, 300], nargs="+", type=int, help="Stepsize(s) before reducing learning rate.", ) parser.add_argument( "--infrequent_eval", default=0, type=int, help="only compute evaluation metrics every 10 epochs", ) parser.add_argument("--opt", default="adam", help="adam or sgd") ##### Loss-specific Settings parser.add_argument("--loss", default="smoothap", type=str) parser.add_argument( "--sigmoid_temperature", default=0.01, type=float, help="SmoothAP: the temperature of the sigmoid used in SmoothAP loss", ) ##### Evaluation Settings parser.add_argument( "--k_vals", nargs="+", default=[1, 2, 4, 8], type=int, help="Recall @ Values.", ) parser.add_argument( "--resume", default="", type=str, help="path to checkpoint to load weights from (if empty then ImageNet pre-trained weights are loaded", ) ##### Network parameters parser.add_argument( "--embed_dim", default=512, type=int, help="Embedding dimensionality of the network", ) parser.add_argument( "--arch", default="resnet50", type=str, help="Network backend choice: resnet50", ) parser.add_argument( "--pretrained_weights", default="", type=str, help="pretrained weight path" ) parser.add_argument( "--use_bn_in_head", default=False, type=aux.bool_flag, help="Whether to use batch normalizations in projection head (Default: False)", ) parser.add_argument( "--checkpoint_key", default="teacher", type=str, help='Key to use in the checkpoint (example: "teacher")', ) parser.add_argument( "--drop_path_rate", default=0.1, type=float, help="stochastic depth rate" ) parser.add_argument( "--grad_measure", action="store_true", help="If added, gradients passed from embedding layer to the last conv-layer are stored in each iteration.", ) parser.add_argument( "--dist_measure", action="store_true", help="If added, the ratio between intra- and interclass distances is stored after each epoch.", ) parser.add_argument( "--not_pretrained", action="store_true", help="If added, the network will be trained WITHOUT ImageNet-pretrained weights.", ) ##### Setup Parameters parser.add_argument("--gpu", default=0, type=int, help="GPU-id for GPU to use.") parser.add_argument( "--savename", default="", type=str, help="Save folder name if any special information is to be included.", ) ### Paths to datasets and storage folder parser.add_argument( "--source_path", default="/scratch/shared/beegfs/abrown/datasets", type=str, help="Path to data", ) parser.add_argument( "--save_path", default=os.getcwd() + "/Training_Results", type=str, help="Where to save the checkpoints", ) ### additional parameters parser.add_argument("--trainset", default="lin_train_set1.txt", type=str) parser.add_argument("--testset", default="Inaturalist_test_set1.txt", type=str) parser.add_argument("--cluster_path", default="", type=str) parser.add_argument("--finetune", default="false", type=str) parser.add_argument("--class_num", default=948, type=int) parser.add_argument("--get_features", default="false", type=str) parser.add_argument("--linsize", default=29011, type=int, help="Lin data size.") parser.add_argument("--uinsize", default=18403, type=int, help="Uin data size.") parser.add_argument("--iter", default=0, type=int) opt = parser.parse_args() """============================================================================""" if opt.dataset == "SoftInaturalist": opt.source_path += "/Inaturalist" opt.save_path += "/Inaturalist" + "_" + str(opt.embed_dim) else: opt.source_path += "/" + opt.dataset opt.save_path += "/" + opt.dataset + "_" + str(opt.embed_dim) if opt.dataset == "Inaturalist": # opt.n_epochs = 90 opt.tau = [40, 70] opt.k_vals = [1, 4, 16, 32] if opt.dataset == "SoftInaturalist": # opt.n_epochs = 90 opt.tau = [40, 70] opt.k_vals = [1, 4, 16, 32] if opt.dataset == "vehicle_id": opt.k_vals = [1, 5] if opt.dataset == "semi_fungi": opt.tau = [40, 70] opt.k_vals = [1, 4, 16, 32] if opt.finetune == "true": opt.finetune = True elif opt.finetune == "false": opt.finetune = False if opt.get_features == "true": opt.get_features = True elif opt.get_features == "false": opt.get_features = False """===========================================================================""" ################### TensorBoard Settings ################## timestamp = datetime.datetime.now().strftime(r"%Y-%m-%d_%H-%M-%S") exp_name = aux.args2exp_name(opt) opt.save_name = f"weights_{exp_name}" + "/" + timestamp random.seed(opt.seed) np.random.seed(opt.seed) torch.manual_seed(opt.seed) torch.cuda.manual_seed(opt.seed) torch.cuda.manual_seed_all(opt.seed) tensorboard_path = Path(f"logs/logs_{exp_name}") / timestamp tensorboard_path.parent.mkdir(exist_ok=True, parents=True) global writer writer = SummaryWriter(tensorboard_path) """============================================================================""" ################### GPU SETTINGS ########################### os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # os.environ["CUDA_VISIBLE_DEVICES"]= str(opt.gpu) print("using #GPUs:", torch.cuda.device_count()) """============================================================================""" ##################### NETWORK SETUP ################## opt.device = torch.device("cuda") model = netlib.networkselect(opt) # Push to Device if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) _ = model.to(opt.device) # Place trainable parameter in list of parameters to train: if "fc_lr_mul" in vars(opt).keys() and opt.fc_lr_mul != 0: all_but_fc_params = list( filter(lambda x: "last_linear" not in x[0], model.named_parameters()) ) for ind, param in enumerate(all_but_fc_params): all_but_fc_params[ind] = param[1] if torch.cuda.device_count() > 1: fc_params = model.module.model.last_linear.parameters() else: fc_params = model.model.last_linear.parameters() to_optim = [ {"params": all_but_fc_params, "lr": opt.lr, "weight_decay": opt.decay}, { "params": fc_params, "lr": opt.lr * opt.fc_lr_mul, "weight_decay": opt.decay, }, ] else: to_optim = [ {"params": model.parameters(), "lr": opt.lr, "weight_decay": opt.decay} ] """============================================================================""" #################### DATALOADER SETUPS ################## # Returns a dictionary containing 'training', 'testing', and 'evaluation' dataloaders. # The 'testing'-dataloader corresponds to the validation set, and the 'evaluation'-dataloader # Is simply using the training set, however running under the same rules as 'testing' dataloader, # i.e. no shuffling and no random cropping. dataloaders = data.give_dataloaders(opt.dataset, opt.trainset, opt.testset, opt) # Because the number of supervised classes is dataset dependent, we store them after # initializing the dataloader opt.num_classes = len(dataloaders["training"].dataset.avail_classes) """============================================================================""" #################### CREATE LOGGING FILES ############### # Each dataset usually has a set of standard metrics to log. aux.metrics_to_examine() # returns a dict which lists metrics to log for training ('train') and validation/testing ('val') metrics_to_log = aux.metrics_to_examine(opt.dataset, opt.k_vals) # example output: {'train': ['Epochs', 'Time', 'Train Loss', 'Time'], # 'val': ['Epochs','Time','NMI','F1', 'Recall @ 1','Recall @ 2','Recall @ 4','Recall @ 8']} # Using the provided metrics of interest, we generate a LOGGER instance. # Note that 'start_new' denotes that a new folder should be made in which everything will be stored. # This includes network weights as well. LOG = aux.LOGGER(opt, metrics_to_log, name="Base", start_new=True) # If graphviz is installed on the system, a computational graph of the underlying # network will be made as well. """============================================================================""" #################### LOSS SETUP #################### # Depending on opt.loss and opt.sampling, the respective criterion is returned, # and if the loss has trainable parameters, to_optim is appended. criterion, to_optim = losses.loss_select(opt.loss, opt, to_optim) _ = criterion.to(opt.device) """============================================================================""" ##################### OPTIONAL EVALUATIONS ##################### # Store the averaged gradients returned from the embedding to the last conv. layer. if opt.grad_measure: grad_measure = eval.GradientMeasure(opt, name="baseline") # Store the relative distances between average intra- and inter-class distance. if opt.dist_measure: # Add a distance measure for training distance ratios distance_measure = eval.DistanceMeasure( dataloaders["evaluation"], opt, name="Train", update_epochs=1 ) # #If uncommented: Do the same for the test set # distance_measure_test = eval.DistanceMeasure(dataloaders['testing'], opt, name='Train', update_epochs=1) """============================================================================""" #################### OPTIM SETUP #################### # As optimizer, Adam with standard parameters is used. if opt.opt == "adam": optimizer = torch.optim.Adam(to_optim) elif opt.opt == "sgd": optimizer = torch.optim.SGD(to_optim) else: raise Exception("unknown optimiser") # for the SOA measures in the paper - need to use SGD and 0.05 learning rate # optimizer = torch.optim.Adam(to_optim) # optimizer = torch.optim.SGD(to_optim) if opt.scheduler == "exp": scheduler = torch.optim.lr_scheduler.ExponentialLR( optimizer, gamma=opt.gamma ) elif opt.scheduler == "step": scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=opt.tau, gamma=opt.gamma ) elif opt.scheduler == "none": print("No scheduling used!") else: raise Exception("No scheduling option for input: {}".format(opt.scheduler)) def same_model(model1, model2): for p1, p2 in zip(model1.parameters(), model2.parameters()): if p1.data.ne(p2.data).sum() > 0: return False return True """============================================================================""" #################### TRAINER FUNCTION ############################ def train_one_epoch(train_dataloader, model, optimizer, criterion, opt, epoch): """ This function is called every epoch to perform training of the network over one full (randomized) iteration of the dataset. Args: train_dataloader: torch.utils.data.DataLoader, returns (augmented) training data. model: Network to train. optimizer: Optimizer to use for training. criterion: criterion to use during training. opt: argparse.Namespace, Contains all relevant parameters. epoch: int, Current epoch. Returns: Nothing! """ loss_collect = [] start = time.time() data_iterator = tqdm( train_dataloader, desc="Epoch {} Training...".format(epoch) ) for i, (class_labels, input) in enumerate(data_iterator): # Compute embeddings for input batch features = model(input.to(opt.device)) # Compute loss. if opt.loss != "smoothap": loss = criterion(features, class_labels) else: loss = criterion(features) # Ensure gradients are set to zero at beginning optimizer.zero_grad() # Compute gradient loss.backward() train_dataloader.dataset.classes_visited = [] if opt.grad_measure: # If desired, save computed gradients. grad_measure.include(model.model.last_linear) # Update weights using comp. gradients. optimizer.step() # Store loss per iteration. loss_collect.append(loss.item()) if i == len(train_dataloader) - 1: data_iterator.set_description( "Epoch (Train) {0}: Mean Loss [{1:.4f}]".format( epoch, np.mean(loss_collect) ) ) # Save metrics LOG.log( "train", LOG.metrics_to_log["train"], [epoch, np.round(time.time() - start, 4), np.mean(loss_collect)], ) writer.add_scalar("global/training_loss", np.mean(loss_collect), epoch) if opt.grad_measure: # Dump stored gradients to Pickle-File. grad_measure.dump(epoch) """============================================================================""" """========================== MAIN TRAINING PART ==============================""" """============================================================================""" ################### SCRIPT MAIN ########################## print("\n-----\n") # Each dataset requires slightly different dataloaders. if opt.dataset == "SoftInaturalist" or "Inaturalist" or "semi_fungi": eval_params = { "dataloader": dataloaders["testing"], "model": model, "opt": opt, "epoch": 0, } elif opt.dataset == "vehicle_id": eval_params = { "dataloaders": [ dataloaders["testing_set1"], dataloaders["testing_set2"], dataloaders["testing_set3"], ], "model": model, "opt": opt, "epoch": 0, } # Compute Evaluation metrics, print them and store in LOG. print("epochs -> " + str(opt.n_epochs)) import time for epoch in range(opt.n_epochs): ### Print current learning rates for all parameters if opt.scheduler != "none": print( "Running with learning rates {}...".format( " | ".join("{}".format(x) for x in scheduler.get_lr()) ) ) ### Train one epoch _ = model.train() train_one_epoch( dataloaders["training"], model, optimizer, criterion, opt, epoch ) dataloaders["training"].dataset.reshuffle() ### Evaluate _ = model.eval() # Each dataset requires slightly different dataloaders. if opt.dataset == "Inaturalist": eval_params = { "dataloader": dataloaders["evaluation"], "model": model, "opt": opt, "epoch": epoch, } elif opt.dataset == "vehicle_id": eval_params = { "dataloaders": [ dataloaders["testing_set1"], dataloaders["testing_set2"], dataloaders["testing_set3"], ], "model": model, "opt": opt, "epoch": epoch, } elif opt.dataset == "semi_fungi": eval_params = { "dataloader": dataloaders["testing"], "model": model, "opt": opt, "epoch": epoch, } # Compute Evaluation metrics, print them and store in LOG. if opt.infrequent_eval == 1: epoch_freq = 5 else: epoch_freq = 1 if not opt.dataset == "vehicle_id": if epoch % epoch_freq == 0: results = eval.evaluate(opt.dataset, LOG, save=True, **eval_params) writer.add_scalar("global/recall1", results[0][0], epoch + 1) writer.add_scalar("global/recall2", results[0][1], epoch + 1) writer.add_scalar("global/recall3", results[0][2], epoch + 1) writer.add_scalar("global/recall4", results[0][3], epoch + 1) writer.add_scalar("global/NMI", results[1], epoch + 1) writer.add_scalar("global/F1", results[2], epoch + 1) else: results = eval.evaluate(opt.dataset, LOG, save=True, **eval_params) writer.add_scalar("global/recall1", results[2], epoch + 1) writer.add_scalar( "global/recall2", results[3], epoch + 1 ) # writer.add_scalar('global/recall3',results[0][2],0) writer.add_scalar("global/recall3", results[6], epoch + 1) writer.add_scalar("global/recall4", results[7], epoch + 1) writer.add_scalar("global/recall5", results[10], epoch + 1) writer.add_scalar("global/recall6", results[11], epoch + 1) # Update the Metric Plot and save it. # LOG.update_info_plot() # (optional) compute ratio of intra- to interdistances. if opt.dist_measure: distance_measure.measure(model, epoch) # distance_measure_test.measure(model, epoch) ### Learning Rate Scheduling Step if opt.scheduler != "none": scheduler.step() print("\n-----\n") ================================================ FILE: examples/pytorch/hilander/PSS/Smooth_AP/src/netlib.py ================================================ # repo originally forked from https://github.com/Confusezius/Deep-Metric-Learning-Baselines ############################ LIBRARIES ###################################### import os from collections import OrderedDict import auxiliaries as aux import pretrainedmodels as ptm import torch import torch.nn as nn """=============================================================""" def initialize_weights(model): """ Function to initialize network weights. NOTE: NOT USED IN MAIN SCRIPT. Args: model: PyTorch Network Returns: Nothing! """ for idx, module in enumerate(model.modules()): if isinstance(module, nn.Conv2d): nn.init.kaiming_normal_( module.weight, mode="fan_out", nonlinearity="relu" ) elif isinstance(module, nn.BatchNorm2d): nn.init.constant_(module.weight, 1) nn.init.constant_(module.bias, 0) elif isinstance(module, nn.Linear): module.weight.data.normal_(0, 0.01) module.bias.data.zero_() """==================================================================================================================================""" ### ATTRIBUTE CHANGE HELPER def rename_attr(model, attr, name): """ Rename attribute in a class. Simply helper function. Args: model: General Class for which attributes should be renamed. attr: str, Name of target attribute. name: str, New attribute name. """ setattr(model, name, getattr(model, attr)) delattr(model, attr) """==================================================================================================================================""" ### NETWORK SELECTION FUNCTION def networkselect(opt): """ Selection function for available networks. Args: opt: argparse.Namespace, contains all training-specific training parameters. Returns: Network of choice """ if opt.arch == "resnet50": network = ResNet50(opt) else: raise Exception("Network {} not available!".format(opt.arch)) if opt.resume: weights = torch.load( os.path.join(opt.save_path, opt.resume), weights_only=False ) weights_state_dict = weights["state_dict"] if torch.cuda.device_count() > 1: encoder_state_dict = OrderedDict() for k, v in weights_state_dict.items(): k = k.replace("module.", "") encoder_state_dict[k] = v network.load_state_dict(encoder_state_dict) else: network.load_state_dict(weights_state_dict) # print("=================== network =======================") # for parameter in network.parameters(): # parameter.requires_grad = False # for parameter in network.layer_blocks[-1].parameters(): # parameter.requires_grad = True return network """=============================================================""" class ResNet50(nn.Module): """ Container for ResNet50 s.t. it can be used for metric learning. The Network has been broken down to allow for higher modularity, if one wishes to target specific layers/blocks directly. """ def __init__(self, opt, list_style=False, no_norm=False): super(ResNet50, self).__init__() self.pars = opt if not opt.not_pretrained: print("Getting pretrained weights...") self.model = ptm.__dict__["resnet50"]( num_classes=1000, pretrained="imagenet" ) print("Done.") else: print("Not utilizing pretrained weights!") self.model = ptm.__dict__["resnet50"]( num_classes=1000, pretrained=None ) for module in filter( lambda m: type(m) == nn.BatchNorm2d, self.model.modules() ): module.eval() module.train = lambda _: None if opt.embed_dim != 2048: self.model.last_linear = torch.nn.Linear( self.model.last_linear.in_features, opt.embed_dim ) self.layer_blocks = nn.ModuleList( [ self.model.layer1, self.model.layer2, self.model.layer3, self.model.layer4, ] ) self.loss = opt.loss self.feature = True def forward(self, x, feature=False, is_init_cluster_generation=False): x = self.model.maxpool( self.model.relu(self.model.bn1(self.model.conv1(x))) ) for layerblock in self.layer_blocks: x = layerblock(x) x = self.model.avgpool(x) x = x.view(x.size(0), -1) if self.pars.embed_dim != 2048: mod_x = self.model.last_linear(x) else: mod_x = x feat = torch.nn.functional.normalize(mod_x, dim=-1) if feature or self.loss == "smoothap": return feat else: pred = self.linear(feat) return pred ================================================ FILE: examples/pytorch/hilander/PSS/__init__.py ================================================ ================================================ FILE: examples/pytorch/hilander/PSS/test.sh ================================================ python Smooth_AP/src/evaluate_model.py \ --dataset Inaturalist \ --bs 384 \ --source_path ~/code/Smooth_AP/data/ --embed_dim 128 \ --resume $CHECKPOINT_PATH \ --class_num 948 --loss smoothap \ --trainset lin_train_set1.txt \ --testset Inaturalist_test_set1.txt \ --linsize 29011 --uinsize 18403 ================================================ FILE: examples/pytorch/hilander/PSS/test_subg_inat.py ================================================ import argparse, os, pickle, time import random import sys sys.path.append("..") import shutil import dgl import numpy as np import seaborn import torch import torch.optim as optim from dataset import LanderDataset from matplotlib import pyplot as plt from models import LANDER from utils import build_next_level, decode, evaluation, stop_iterating from utils.deduce import get_edge_dist STATISTIC = False ########### # ArgParser parser = argparse.ArgumentParser() # Dataset parser.add_argument("--data_path", type=str, required=True) parser.add_argument("--model_filename", type=str, default="lander.pth") parser.add_argument("--faiss_gpu", action="store_true") parser.add_argument("--num_workers", type=int, default=0) parser.add_argument("--output_filename", type=str, default="data/features.pkl") # HyperParam parser.add_argument("--knn_k", type=int, default=10) parser.add_argument("--levels", type=int, default=1) parser.add_argument("--tau", type=float, default=0.5) parser.add_argument("--threshold", type=str, default="prob") parser.add_argument("--metrics", type=str, default="pairwise,bcubed,nmi") parser.add_argument("--early_stop", action="store_true") # Model parser.add_argument("--hidden", type=int, default=512) parser.add_argument("--num_conv", type=int, default=4) parser.add_argument("--dropout", type=float, default=0.0) parser.add_argument("--gat", action="store_true") parser.add_argument("--gat_k", type=int, default=1) parser.add_argument("--balance", action="store_true") parser.add_argument("--use_cluster_feat", action="store_true") parser.add_argument("--use_focal_loss", action="store_true") parser.add_argument("--use_gt", action="store_true") # Subgraph parser.add_argument("--batch_size", type=int, default=4096) parser.add_argument("--mode", type=str, default="1head") parser.add_argument("--midpoint", type=str, default="false") parser.add_argument("--linsize", type=int, default=29011) parser.add_argument("--uinsize", type=int, default=18403) parser.add_argument("--inclasses", type=int, default=948) parser.add_argument("--thresh", type=float, default=1.0) parser.add_argument("--draw", type=str, default="false") parser.add_argument( "--density_distance_pkl", type=str, default="density_distance.pkl" ) parser.add_argument( "--density_lindistance_jpg", type=str, default="density_lindistance.jpg" ) args = parser.parse_args() print(args) MODE = args.mode linsize = args.linsize uinsize = args.uinsize inclasses = args.inclasses if args.draw == "false": args.draw = False elif args.draw == "true": args.draw = True ########################### # Environment Configuration if torch.cuda.is_available(): device = torch.device("cuda") else: device = torch.device("cpu") ################## # Data Preparation with open(args.data_path, "rb") as f: loaded_data = pickle.load(f) path2idx, features, pred_labels, labels, masks = loaded_data idx2path = {v: k for k, v in path2idx.items()} gtlabels = labels orifeatures = features orilabels = gtlabels if MODE == "selectbydensity": lastusim = np.where(masks == 1) masks[lastusim] = 2 selectedidx = np.where(masks != 0) features = features[selectedidx] labels = gtlabels[selectedidx] selectmasks = masks[selectedidx] print("filtered features:", len(features)) print("mask0:", len(np.where(masks == 0)[0])) print("mask1:", len(np.where(masks == 1)[0])) print("mask2:", len(np.where(masks == 2)[0])) elif MODE == "recluster": selectedidx = np.where(masks == 1) features = features[selectedidx] labels = gtlabels[selectedidx] labelspred = pred_labels[selectedidx] selectmasks = masks[selectedidx] gtlabels = gtlabels[selectedidx] print("filtered features:", len(features)) else: selectedidx = np.where(masks != 0) features = features[selectedidx] labels = gtlabels[selectedidx] labelspred = pred_labels[selectedidx] selectmasks = masks[selectedidx] gtlabels = gtlabels[selectedidx] print("filtered features:", len(features)) global_features = features.copy() # global features dataset = LanderDataset( features=features, labels=labels, k=args.knn_k, levels=1, faiss_gpu=False ) g = dataset.gs[0] g.ndata["pred_den"] = torch.zeros((g.num_nodes())) g.edata["prob_conn"] = torch.zeros((g.num_edges(), 2)) global_labels = labels.copy() ids = np.arange(g.num_nodes()) global_edges = ([], []) global_peaks = np.array([], dtype=np.long) global_edges_len = len(global_edges[0]) global_num_nodes = g.num_nodes() global_densities = g.ndata["density"][:linsize] global_densities = np.sort(global_densities) xs = np.arange(len(global_densities)) fanouts = [args.knn_k - 1 for i in range(args.num_conv + 1)] sampler = dgl.dataloading.MultiLayerNeighborSampler(fanouts) # fix the number of edges test_loader = dgl.dataloading.DataLoader( g, torch.arange(g.num_nodes()), sampler, batch_size=args.batch_size, shuffle=False, drop_last=False, num_workers=args.num_workers, ) ################## # Model Definition if not args.use_gt: feature_dim = g.ndata["features"].shape[1] model = LANDER( feature_dim=feature_dim, nhid=args.hidden, num_conv=args.num_conv, dropout=args.dropout, use_GAT=args.gat, K=args.gat_k, balance=args.balance, use_cluster_feat=args.use_cluster_feat, use_focal_loss=args.use_focal_loss, ) model.load_state_dict(torch.load(args.model_filename, weights_only=False)) model = model.to(device) model.eval() # number of edges added is the indicator for early stopping num_edges_add_last_level = np.Inf ################################## # Predict connectivity and density for level in range(args.levels): print("level:", level) if not args.use_gt: total_batches = len(test_loader) for batch, minibatch in enumerate(test_loader): input_nodes, sub_g, bipartites = minibatch sub_g = sub_g.to(device) bipartites = [b.to(device) for b in bipartites] with torch.no_grad(): output_bipartite = model(bipartites) global_nid = output_bipartite.dstdata[dgl.NID] global_eid = output_bipartite.edata["global_eid"] g.ndata["pred_den"][global_nid] = output_bipartite.dstdata[ "pred_den" ].to("cpu") g.edata["prob_conn"][global_eid] = output_bipartite.edata[ "prob_conn" ].to("cpu") torch.cuda.empty_cache() if (batch + 1) % 10 == 0: print("Batch %d / %d for inference" % (batch, total_batches)) ( new_pred_labels, peaks, global_edges, global_pred_labels, global_peaks, ) = decode( g, args.tau, args.threshold, args.use_gt, ids, global_edges, global_num_nodes, global_peaks, ) if level == 0: global_pred_densities = g.ndata["pred_den"] global_densities = g.ndata["density"] g.edata["prob_conn"] = torch.zeros((g.num_edges(), 2)) ids = ids[peaks] new_global_edges_len = len(global_edges[0]) num_edges_add_this_level = new_global_edges_len - global_edges_len if stop_iterating( level, args.levels, args.early_stop, num_edges_add_this_level, num_edges_add_last_level, args.knn_k, ): break global_edges_len = new_global_edges_len num_edges_add_last_level = num_edges_add_this_level # build new dataset features, labels, cluster_features = build_next_level( features, labels, peaks, global_features, global_pred_labels, global_peaks, ) # After the first level, the number of nodes reduce a lot. Using cpu faiss is faster. dataset = LanderDataset( features=features, labels=labels, k=args.knn_k, levels=1, faiss_gpu=False, cluster_features=cluster_features, ) g = dataset.gs[0] g.ndata["pred_den"] = torch.zeros((g.num_nodes())) g.edata["prob_conn"] = torch.zeros((g.num_edges(), 2)) test_loader = dgl.dataloading.DataLoader( g, torch.arange(g.num_nodes()), sampler, batch_size=args.batch_size, shuffle=False, drop_last=False, num_workers=args.num_workers, ) if MODE == "selectbydensity": thresh = args.thresh global_pred_densities = np.array(global_pred_densities).astype(float) global_densities = np.array(global_densities).astype(float) distance = np.abs(global_pred_densities - global_densities) print("densities shape", global_pred_densities.shape) print(global_pred_densities.max(), global_pred_densities.min()) selectidx = np.where(global_pred_densities > thresh)[0] selected_pred_densities = global_pred_densities[selectidx] selected_densities = global_densities[selectidx] selected_distance = np.abs(selected_pred_densities - selected_densities) print(np.mean(selected_distance)) print("number of selected samples:", len(selectidx)) notselectidx = np.where(global_pred_densities <= thresh) print("not selected:", len(notselectidx[0])) global_pred_labels[notselectidx] = -1 global_pred_labels_new = np.zeros_like(orilabels) global_pred_labels_new[:] = -1 Tidx = np.where(masks != 2) print("T:", len(Tidx[0])) l_in_gt = orilabels[Tidx] l_in_features = orifeatures[Tidx] l_in_gt_new = np.zeros_like(l_in_gt) l_in_unique = np.unique(l_in_gt) for i in range(len(l_in_unique)): l_in = l_in_unique[i] l_in_idx = np.where(l_in_gt == l_in) l_in_gt_new[l_in_idx] = i print("len(l_in_unique)", len(l_in_unique)) if args.draw: prototypes = np.zeros((len(l_in_unique), features.shape[1])) for i in range(len(l_in_unique)): idx = np.where(l_in_gt_new == i) prototypes[i] = np.mean(l_in_features[idx], axis=0) similarity_matrix = torch.mm( torch.from_numpy(global_features.astype(np.float32)), torch.from_numpy(prototypes.astype(np.float32)).t(), ) similarity_matrix = (1 - similarity_matrix) / 2 minvalues, selected_pred_labels = torch.min(similarity_matrix, 1) # far-close ratio closeidx = np.where(minvalues < 0.15) faridx = np.where(minvalues >= 0.15) print("far:", len(faridx[0])) print("close:", len(closeidx[0])) cutidx = np.where(global_pred_densities >= 0.5) draw_minvalues = minvalues[cutidx] draw_densities = global_pred_densities[cutidx] with open(args.density_distance_pkl, "wb") as f: pickle.dump((global_pred_densities, minvalues), f) print("dumped.") plt.clf() fig, ax = plt.subplots() import random if len(draw_densities) > 10000: samples_idx = random.sample(range(len(draw_minvalues)), 10000) ax.plot( draw_densities[random], draw_minvalues[random], color="tab:blue", marker="*", linestyle="None", markersize=1, ) else: ax.plot( draw_densities[random], draw_minvalues[random], color="tab:blue", marker="*", linestyle="None", markersize=1, ) plt.savefig(args.density_lindistance_jpg) global_pred_labels_new[Tidx] = l_in_gt_new global_pred_labels[selectidx] = global_pred_labels[selectidx] + len( l_in_unique ) global_pred_labels_new[selectedidx] = global_pred_labels global_pred_labels = global_pred_labels_new linunique = np.unique(global_pred_labels[Tidx]) uunique = np.unique(global_pred_labels[selectedidx]) allnique = np.unique(global_pred_labels) print("labels") print(len(linunique), len(uunique), len(allnique)) global_masks = np.zeros_like(masks) global_masks[:] = 1 global_masks[np.array(selectedidx[0])[notselectidx]] = 2 Tidx = np.where(masks != 2) global_masks[Tidx] = 0 print("mask0", len(np.where(global_masks == 0)[0])) print("mask1", len(np.where(global_masks == 1)[0])) print("mask2", len(np.where(global_masks == 2)[0])) print("all", len(masks), len(orilabels), len(orifeatures)) global_gt_labels = orilabels if MODE == "recluster": global_pred_labels_new = np.zeros_like(orilabels) global_pred_labels_new[:] = -1 Tidx = np.where(masks == 0) print("T:", len(Tidx[0])) l_in_gt = orilabels[Tidx] l_in_features = orifeatures[Tidx] l_in_gt_new = np.zeros_like(l_in_gt) l_in_unique = np.unique(l_in_gt) for i in range(len(l_in_unique)): l_in = l_in_unique[i] l_in_idx = np.where(l_in_gt == l_in) l_in_gt_new[l_in_idx] = i print("len(l_in_unique)", len(l_in_unique)) global_pred_labels_new[Tidx] = l_in_gt_new print(len(global_pred_labels)) print(len(selectedidx[0])) global_pred_labels_new[selectedidx[0]] = global_pred_labels + len( l_in_unique ) global_pred_labels = global_pred_labels_new global_masks = masks print("mask0", len(np.where(global_masks == 0)[0])) print("mask1", len(np.where(global_masks == 1)[0])) print("mask2", len(np.where(global_masks == 2)[0])) print("all", len(masks), len(orilabels), len(orifeatures)) global_gt_labels = orilabels if MODE == "donothing": global_masks = masks pass print("##################### L_in ########################") print(linsize) if len(global_pred_labels) >= linsize: evaluation( global_pred_labels[:linsize], global_gt_labels[:linsize], args.metrics ) else: print("No samples in L_in!") print("##################### U_in ########################") uinidx = np.where(global_pred_labels[linsize : linsize + uinsize] != -1)[0] uinidx = uinidx + linsize print(len(uinidx)) if len(uinidx): evaluation( global_pred_labels[uinidx], global_gt_labels[uinidx], args.metrics ) else: print("No samples in U_in!") print("##################### U_out ########################") uoutidx = np.where(global_pred_labels[linsize + uinsize :] != -1)[0] uoutidx = uoutidx + linsize + uinsize print(len(uoutidx)) if len(uoutidx): evaluation( global_pred_labels[uoutidx], global_gt_labels[uoutidx], args.metrics ) else: print("No samples in U_out!") print("##################### U ########################") uidx = np.where(global_pred_labels[linsize:] != -1)[0] uidx = uidx + linsize print(len(uidx)) if len(uidx): evaluation(global_pred_labels[uidx], global_gt_labels[uidx], args.metrics) else: print("No samples in U!") print("##################### L+U ########################") luidx = np.where(global_pred_labels != -1)[0] print(len(luidx)) evaluation(global_pred_labels[luidx], global_gt_labels[luidx], args.metrics) print("##################### new selected samples ########################") sidx = np.where(global_masks == 1)[0] print(len(sidx)) if len(sidx) != 0: evaluation(global_pred_labels[sidx], global_gt_labels[sidx], args.metrics) print("##################### not selected samples ########################") nsidx = np.where(global_masks == 2)[0] print(len(nsidx)) if len(nsidx) != 0: evaluation(global_pred_labels[nsidx], global_gt_labels[nsidx], args.metrics) with open(args.output_filename, "wb") as f: print(orifeatures.shape) print(global_pred_labels.shape) print(global_gt_labels.shape) print(global_masks.shape) pickle.dump( [ path2idx, orifeatures, global_pred_labels, global_gt_labels, global_masks, ], f, ) ================================================ FILE: examples/pytorch/hilander/PSS/train.sh ================================================ #!/bin/bash mkdir hilander_checkpoint ####################### ITER 0 ####################### # iter 0 (supervised baseline) - train Smooth-AP CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python Smooth_AP/src/main.py \ --dataset Inaturalist --lr 1e-5 --fc_lr_mul 1 \ --n_epochs 400 --bs 384 \ --source_path "../../data/" --embed_dim 128 \ --class_num 948 --loss smoothap --infrequent_eval 1 \ --trainset lin_train_set1.txt --testset Inaturalist_test_set1.txt # iter 0 (supervised baseline) - get feature python Smooth_AP/src/get_features.py \ --dataset Inaturalist --lr 1e-5 --fc_lr_mul 1 \ --n_epochs 400 --bs 384 \ --source_path "../../data/" --embed_dim 128 \ --resume "0/checkpoint_0.pth.tar" \ --finetune false --get_features true --iter 0 \ --class_num 948 --loss smoothap \ --trainset lin_train_set1.txt \ --all_trainset train_set1.txt \ --testset Inaturalist_test_set1.txt \ --linsize 29011 # iter 0 (supervised baseline) - train hi-lander python train_subg_inat.py \ --data_path "/home/ubuntu/code/dgl/examples/pytorch/hilander/PSS/data/Inaturalist/T_train_iter0_smoothap_inat_features.pkl" \ --model_filename '/home/ubuntu/code/dgl/examples/pytorch/hilander/PSS/hilander_checkpoint/inat_l_smoothap_iter0.pth' \ --knn_k 10,5,3 --levels 2,3,4 \ --hidden 512 --epochs 1000 --lr 0.01 \ --batch_size 4096 --num_conv 1 --gat --balance # iter 0 (supervised baseline) - get pseudo labels python test_subg_inat.py \ --data_path '/home/ubuntu/code/dgl/examples/pytorch/hilander/PSS/data/Inaturalist/all_train_iter0_smoothap_inat_features.pkl' \ --model_filename '/home/ubuntu/code/dgl/examples/pytorch/hilander/PSS/hilander_checkpoint/inat_l_smoothap_iter0.pth' --knn_k 10 \ --tau 0.9 --level 10 --threshold prob \ --hidden 512 --num_conv 1 --gat --batch_size 4096 --early_stop \ --mode selectbydensity --thresh 0.8 \ --linsize 29011 --uinsize 18403 --inclasses 948 \ --output_filename 'data/inat_hilander_l_smoothap_train_selectbydensity_iter0.pkl' for i in {1..4} ; do last_iter=`expr $i - 1` echo ${last_iter} # iter i - train Smooth-AP python Smooth_AP/src/finetune_1head.py \ --dataset Inaturalist --lr 1e-5 --fc_lr_mul 1 \ --n_epochs 400 --bs 384 --class_num 1024 \ --source_path "../../data/" --embed_dim 128 \ --trainset lin_train_set1.txt --testset Inaturalist_test_set1.txt \ --cluster_path "../../data/inat_hilander_l_smoothap_train_selectbydensity_iter${last_iter}.pkl" \ --finetune true --loss smoothap --infrequent_eval 1 --iter ${i} # iter i - get feature python Smooth_AP/src/get_features.py \ --dataset Inaturalist --lr 1e-5 --fc_lr_mul 1 \ --n_epochs 400 --bs 384 \ --source_path "../../data/" --embed_dim 128 \ --resume "${i}/checkpoint_${i}.pth.tar" \ --finetune false --get_features true --iter ${i} \ --class_num 948 --loss smoothap \ --trainset lin_train_set1.txt \ --all_trainset train_set1.txt \ --testset Inaturalist_test_set1.txt \ --linsize 29011 --uinsize 18403 \ --cluster_path "../../data/inat_hilander_l_smoothap_train_selectbydensity_iter${last_iter}.pkl" # iter i - train hi-lander python train_subg_inat.py \ --data_path "/home/ubuntu/code/dgl/examples/pytorch/hilander/PSS/data/Inaturalist/T_train_iter${i}_smoothap_inat_features.pkl" \ --model_filename "/home/ubuntu/code/dgl/examples/pytorch/hilander/PSS/hilander_checkpoint/inat_l_smoothap_iter${i}.pth" \ --knn_k 10,5,3 --levels 2,3,4 \ --hidden 512 --epochs 1000 --lr 0.01 \ --batch_size 4096 --num_conv 1 --gat --balance # iter i - get pseudo labels python test_subg_inat.py \ --data_path "/home/ubuntu/code/dgl/examples/pytorch/hilander/PSS/data/Inaturalist/all_train_iter${i}_smoothap_inat_features.pkl" \ --model_filename "/home/ubuntu/code/dgl/examples/pytorch/hilander/PSS/hilander_checkpoint/inat_l_smoothap_iter${i}.pth" --knn_k 10 \ --tau 0.9 --level 10 --threshold prob \ --hidden 512 --num_conv 1 --gat --batch_size 4096 --early_stop \ --mode selectbydensity --thresh 0.8 \ --linsize 29011 --uinsize 18403 --inclasses 948 \ --output_filename "data/inat_hilander_l_smoothap_train_selectbydensity_iter${i}.pkl" done ================================================ FILE: examples/pytorch/hilander/PSS/train_subg_inat.py ================================================ import argparse, os, pickle, time import random import sys import dgl import numpy as np import torch import torch.optim as optim sys.path.append("..") from dataset import LanderDataset from models import LANDER ########### # ArgParser parser = argparse.ArgumentParser() # Dataset parser.add_argument("--data_path", type=str, required=True) parser.add_argument("--levels", type=str, default="1") parser.add_argument("--faiss_gpu", action="store_true") parser.add_argument("--model_filename", type=str, default="lander.pth") # KNN parser.add_argument("--knn_k", type=str, default="10") parser.add_argument("--num_workers", type=int, default=0) # Model parser.add_argument("--hidden", type=int, default=512) parser.add_argument("--num_conv", type=int, default=1) parser.add_argument("--dropout", type=float, default=0.0) parser.add_argument("--gat", action="store_true") parser.add_argument("--gat_k", type=int, default=1) parser.add_argument("--balance", action="store_true") parser.add_argument("--use_cluster_feat", action="store_true") parser.add_argument("--use_focal_loss", action="store_true") # Training parser.add_argument("--epochs", type=int, default=100) parser.add_argument("--batch_size", type=int, default=1024) parser.add_argument("--lr", type=float, default=0.1) parser.add_argument("--momentum", type=float, default=0.9) parser.add_argument("--weight_decay", type=float, default=1e-5) args = parser.parse_args() print(args) ########################### # Environment Configuration if torch.cuda.is_available(): device = torch.device("cuda") else: device = torch.device("cpu") def setup_seed(seed): torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) np.random.seed(seed) random.seed(seed) torch.backends.cudnn.deterministic = True # setup_seed(20) ################## # Data Preparation with open(args.data_path, "rb") as f: path2idx, features, labels, _, masks = pickle.load(f) # lidx = np.where(masks==0) # features = features[lidx] # labels = labels[lidx] print("features.shape:", features.shape) print("labels.shape:", labels.shape) k_list = [int(k) for k in args.knn_k.split(",")] lvl_list = [int(l) for l in args.levels.split(",")] gs = [] nbrs = [] ks = [] datasets = [] for k, l in zip(k_list, lvl_list): print("k:", k) print("levels:", l) dataset = LanderDataset( features=features, labels=labels, k=k, levels=l, faiss_gpu=args.faiss_gpu, ) gs += [g for g in dataset.gs] ks += [k for g in dataset.gs] nbrs += [nbr for nbr in dataset.nbrs] datasets.append(dataset) # with open("./dataset.pkl", 'rb') as f: # datasets = pickle.load(f) # for i in range(len(datasets)): # dataset = datasets[i] # k = k_list[i] # gs += [g for g in dataset.gs] # ks += [k for g in dataset.gs] # nbrs += [nbr for nbr in dataset.nbrs] with open("./dataset.pkl", "wb") as f: pickle.dump(datasets, f) print("Dataset Prepared.") def set_train_sampler_loader(g, k): fanouts = [k - 1 for i in range(args.num_conv + 1)] sampler = dgl.dataloading.MultiLayerNeighborSampler(fanouts) # fix the number of edges train_dataloader = dgl.dataloading.DataLoader( g, torch.arange(g.num_nodes()), sampler, batch_size=args.batch_size, shuffle=True, drop_last=False, num_workers=args.num_workers, ) return train_dataloader train_loaders = [] for gidx, g in enumerate(gs): train_dataloader = set_train_sampler_loader(gs[gidx], ks[gidx]) train_loaders.append(train_dataloader) ################## # Model Definition feature_dim = gs[0].ndata["features"].shape[1] print("feature dimension:", feature_dim) model = LANDER( feature_dim=feature_dim, nhid=args.hidden, num_conv=args.num_conv, dropout=args.dropout, use_GAT=args.gat, K=args.gat_k, balance=args.balance, use_cluster_feat=args.use_cluster_feat, use_focal_loss=args.use_focal_loss, ) model = model.to(device) model.train() ################# # Hyperparameters opt = optim.SGD( model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, ) # keep num_batch_per_loader the same for every sub_dataloader num_batch_per_loader = len(train_loaders[0]) train_loaders = [iter(train_loader) for train_loader in train_loaders] num_loaders = len(train_loaders) scheduler = optim.lr_scheduler.CosineAnnealingLR( opt, T_max=args.epochs * num_batch_per_loader * num_loaders, eta_min=1e-5 ) print("Start Training.") ############### # Training Loop for epoch in range(args.epochs): loss_den_val_total = [] loss_conn_val_total = [] loss_val_total = [] for batch in range(num_batch_per_loader): for loader_id in range(num_loaders): try: minibatch = next(train_loaders[loader_id]) except: train_loaders[loader_id] = iter( set_train_sampler_loader(gs[loader_id], ks[loader_id]) ) minibatch = next(train_loaders[loader_id]) input_nodes, sub_g, bipartites = minibatch sub_g = sub_g.to(device) bipartites = [b.to(device) for b in bipartites] # get the feature for the input_nodes opt.zero_grad() output_bipartite = model(bipartites) loss, loss_den_val, loss_conn_val = model.compute_loss( output_bipartite ) loss_den_val_total.append(loss_den_val) loss_conn_val_total.append(loss_conn_val) loss_val_total.append(loss.item()) loss.backward() opt.step() if (batch + 1) % 10 == 0: print( "epoch: %d, batch: %d / %d, loader_id : %d / %d, loss: %.6f, loss_den: %.6f, loss_conn: %.6f" % ( epoch, batch, num_batch_per_loader, loader_id, num_loaders, loss.item(), loss_den_val, loss_conn_val, ) ) scheduler.step() print( "epoch: %d, loss: %.6f, loss_den: %.6f, loss_conn: %.6f" % ( epoch, np.array(loss_val_total).mean(), np.array(loss_den_val_total).mean(), np.array(loss_conn_val_total).mean(), ) ) torch.save(model.state_dict(), args.model_filename) torch.save(model.state_dict(), args.model_filename) ================================================ FILE: examples/pytorch/hilander/README.md ================================================ Learning Hierarchical Graph Neural Networks for Image Clustering ================================================================ This folder contains the official code for [Learning Hierarchical Graph Neural Networks for Image Clustering](https://arxiv.org/abs/2107.01319). ## Setup We use python 3.7. The CUDA version needs to be 10.2. Besides DGL (>=0.8), we depend on several packages. To install dependencies using conda: ```bash conda create -n Hilander # create env conda activate Hilander # activate env conda install pytorch==1.7.0 torchvision==0.8.0 cudatoolkit=10.2 -c pytorch # install pytorch 1.7 version conda install -y cudatoolkit=10.2 faiss-gpu=1.6.5 -c pytorch # install faiss gpu version matching cuda 10.2 pip install dgl-cu102 dglgo -f https://data.dgl.ai/wheels/repo.html # install the latest dgl for cuda 10.2 pip install tqdm # install tqdm git clone https://github.com/yjxiong/clustering-benchmark.git # install clustering-benchmark for evaluation cd clustering-benchmark python setup.py install cd ../ ``` ## Data The datasets used for training and test are hosted by several services. [AWS S3](https://dgl-data.s3.us-west-2.amazonaws.com/dataset/hilander/data.tar.gz) | [Google Drive](https://drive.google.com/file/d/1KLa3uu9ndaCc7YjnSVRLHpcJVMSz868v/view?usp=sharing) | [BaiduPan](https://pan.baidu.com/s/11iRcp84esfkkvdcw3kmPAw) (pwd: wbmh) After download, unpack the pickled files into `data/`. ## Training We provide training scripts for different datasets. For training on DeepGlint, one can run ```bash bash scripts/train_deepglint.sh ``` Deepglint is a large-scale dataset, we randomly select 10% of the classes to construct a subset to train. For training on full iNatualist dataset, one can run ```bash bash scripts/train_inat.sh ``` For training on re-sampled iNatualist dataset, one can run ```bash bash scripts/train_inat_resampled_1_in_6_per_class.sh ``` We sample a subset of the full iNat2018-Train to attain a drastically different train-time cluster size distribution as iNat2018-Test, which is named as inat_resampled_1_in_6_per_class. ## Inference In the paper, we have two experiment settings: Clustering with Seen Test Data Distribution and Clustering with Unseen Test Data Distribution. For Clustering with Seen Test Data Distribution, one can run ```bash bash scripts/test_deepglint_imbd_sampled_as_deepglint.sh bash scripts/test_inat.sh ``` **Clustering with Seen Test Data Distribution Performance** | | IMDB-Test-SameDist | iNat2018-Test | | ------------------ | ------------------------------: | ------------------------------: | | Fp | 0.779 | 0.330 | | Fb | 0.819 | 0.350 | | NMI | 0.949 | 0.774 | * The results might fluctuate a little due to the randomness introduced by gpu knn building using faiss-gpu. For Clustering with Unseen Test Data Distribution, one can run ```bash bash scripts/test_deepglint_hannah.sh bash scripts/test_deepglint_imdb.sh bash scripts/test_inat_train_on_resampled_1_in_6_per_class.sh ``` **Clustering with Unseen Test Data Distribution Performance** | | Hannah | IMDB | iNat2018-Test | | ------------------ | ------------------------------: | ------------------------------: | ------------------------------: | | Fp | 0.741 | 0.717 | 0.294 | | Fb | 0.706 | 0.810 | 0.352 | | NMI | 0.810 | 0.953 | 0.764 | * The results might fluctuate a little due to the randomness introduced by gpu knn building using faiss-gpu. ================================================ FILE: examples/pytorch/hilander/__init__.py ================================================ ================================================ FILE: examples/pytorch/hilander/checkpoint/.gitkeep ================================================ ================================================ FILE: examples/pytorch/hilander/data/.gitkeep ================================================ ================================================ FILE: examples/pytorch/hilander/models/__init__.py ================================================ from .graphconv import GraphConv from .lander import LANDER ================================================ FILE: examples/pytorch/hilander/models/focal_loss.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from torch.autograd import Variable # Below code are based on # https://zhuanlan.zhihu.com/p/28527749 class FocalLoss(nn.Module): r""" This criterion is a implemenation of Focal Loss, which is proposed in Focal Loss for Dense Object Detection. Loss(x, class) = - \alpha (1-softmax(x)[class])^gamma \log(softmax(x)[class]) The losses are averaged across observations for each minibatch. Args: alpha(1D Tensor, Variable) : the scalar factor for this criterion gamma(float, double) : gamma > 0; reduces the relative loss for well-classified examples (p > .5), putting more focus on hard, misclassified examples size_average(bool): By default, the losses are averaged over observations for each minibatch. However, if the field size_average is set to False, the losses are instead summed for each minibatch. """ def __init__(self, class_num, alpha=None, gamma=2, size_average=True): super(FocalLoss, self).__init__() if alpha is None: self.alpha = Variable(torch.ones(class_num, 1)) else: if isinstance(alpha, Variable): self.alpha = alpha else: self.alpha = Variable(alpha) self.gamma = gamma self.class_num = class_num self.size_average = size_average def forward(self, inputs, targets): N = inputs.size(0) C = inputs.size(1) P = F.softmax(inputs) class_mask = inputs.data.new(N, C).fill_(0) class_mask = Variable(class_mask) ids = targets.view(-1, 1) class_mask.scatter_(1, ids.data, 1.0) if inputs.is_cuda and not self.alpha.is_cuda: self.alpha = self.alpha.cuda() alpha = self.alpha[ids.data.view(-1)] probs = (P * class_mask).sum(1).view(-1, 1) log_p = probs.log() batch_loss = -alpha * (torch.pow((1 - probs), self.gamma)) * log_p if self.size_average: loss = batch_loss.mean() else: loss = batch_loss.sum() return loss ================================================ FILE: examples/pytorch/hilander/models/graphconv.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- import dgl.function as fn import torch import torch.nn as nn import torch.nn.functional as F from dgl.nn.pytorch import GATConv from torch.nn import init class GraphConvLayer(nn.Module): def __init__(self, in_feats, out_feats, bias=True): super(GraphConvLayer, self).__init__() self.mlp = nn.Linear(in_feats * 2, out_feats, bias=bias) def forward(self, bipartite, feat): if isinstance(feat, tuple): srcfeat, dstfeat = feat else: srcfeat = feat dstfeat = feat[: bipartite.num_dst_nodes()] graph = bipartite.local_var() graph.srcdata["h"] = srcfeat graph.update_all( fn.u_mul_e("h", "affine", "m"), fn.sum(msg="m", out="h") ) gcn_feat = torch.cat([dstfeat, graph.dstdata["h"]], dim=-1) out = self.mlp(gcn_feat) return out class GraphConv(nn.Module): def __init__(self, in_dim, out_dim, dropout=0, use_GAT=False, K=1): super(GraphConv, self).__init__() self.in_dim = in_dim self.out_dim = out_dim if use_GAT: self.gcn_layer = GATConv( in_dim, out_dim, K, allow_zero_in_degree=True ) self.bias = nn.Parameter(torch.Tensor(K, out_dim)) init.constant_(self.bias, 0) else: self.gcn_layer = GraphConvLayer(in_dim, out_dim, bias=True) self.dropout = dropout self.use_GAT = use_GAT def forward(self, bipartite, features): out = self.gcn_layer(bipartite, features) if self.use_GAT: out = torch.mean(out + self.bias, dim=1) out = out.reshape(out.shape[0], -1) out = F.relu(out) if self.dropout > 0: out = F.dropout(out, self.dropout, training=self.training) return out ================================================ FILE: examples/pytorch/hilander/models/lander.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- import dgl import dgl.function as fn import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from .focal_loss import FocalLoss from .graphconv import GraphConv class LANDER(nn.Module): def __init__( self, feature_dim, nhid, num_conv=4, dropout=0, use_GAT=True, K=1, balance=False, use_cluster_feat=True, use_focal_loss=True, **kwargs ): super(LANDER, self).__init__() nhid_half = int(nhid / 2) self.use_cluster_feat = use_cluster_feat self.use_focal_loss = use_focal_loss if self.use_cluster_feat: self.feature_dim = feature_dim * 2 else: self.feature_dim = feature_dim input_dim = (feature_dim, nhid, nhid, nhid_half) output_dim = (nhid, nhid, nhid_half, nhid_half) self.conv = nn.ModuleList() self.conv.append(GraphConv(self.feature_dim, nhid, dropout, use_GAT, K)) for i in range(1, num_conv): self.conv.append( GraphConv(input_dim[i], output_dim[i], dropout, use_GAT, K) ) self.src_mlp = nn.Linear(output_dim[num_conv - 1], nhid_half) self.dst_mlp = nn.Linear(output_dim[num_conv - 1], nhid_half) self.classifier_conn = nn.Sequential( nn.PReLU(nhid_half), nn.Linear(nhid_half, nhid_half), nn.PReLU(nhid_half), nn.Linear(nhid_half, 2), ) if self.use_focal_loss: self.loss_conn = FocalLoss(2) else: self.loss_conn = nn.CrossEntropyLoss() self.loss_den = nn.MSELoss() self.balance = balance def pred_conn(self, edges): src_feat = self.src_mlp(edges.src["conv_features"]) dst_feat = self.dst_mlp(edges.dst["conv_features"]) pred_conn = self.classifier_conn(src_feat + dst_feat) return {"pred_conn": pred_conn} def pred_den_msg(self, edges): prob = edges.data["prob_conn"] res = edges.data["raw_affine"] * (prob[:, 1] - prob[:, 0]) return {"pred_den_msg": res} def forward(self, bipartites): if isinstance(bipartites, dgl.DGLGraph): bipartites = [bipartites] * len(self.conv) if self.use_cluster_feat: neighbor_x = torch.cat( [ bipartites[0].ndata["features"], bipartites[0].ndata["cluster_features"], ], axis=1, ) else: neighbor_x = bipartites[0].ndata["features"] for i in range(len(self.conv)): neighbor_x = self.conv[i](bipartites[i], neighbor_x) output_bipartite = bipartites[-1] output_bipartite.ndata["conv_features"] = neighbor_x else: if self.use_cluster_feat: neighbor_x_src = torch.cat( [ bipartites[0].srcdata["features"], bipartites[0].srcdata["cluster_features"], ], axis=1, ) center_x_src = torch.cat( [ bipartites[1].srcdata["features"], bipartites[1].srcdata["cluster_features"], ], axis=1, ) else: neighbor_x_src = bipartites[0].srcdata["features"] center_x_src = bipartites[1].srcdata["features"] for i in range(len(self.conv)): neighbor_x_dst = neighbor_x_src[: bipartites[i].num_dst_nodes()] neighbor_x_src = self.conv[i]( bipartites[i], (neighbor_x_src, neighbor_x_dst) ) center_x_dst = center_x_src[: bipartites[i + 1].num_dst_nodes()] center_x_src = self.conv[i]( bipartites[i + 1], (center_x_src, center_x_dst) ) output_bipartite = bipartites[-1] output_bipartite.srcdata["conv_features"] = neighbor_x_src output_bipartite.dstdata["conv_features"] = center_x_src output_bipartite.apply_edges(self.pred_conn) output_bipartite.edata["prob_conn"] = F.softmax( output_bipartite.edata["pred_conn"], dim=1 ) output_bipartite.update_all( self.pred_den_msg, fn.mean("pred_den_msg", "pred_den") ) return output_bipartite def compute_loss(self, bipartite): pred_den = bipartite.dstdata["pred_den"] loss_den = self.loss_den(pred_den, bipartite.dstdata["density"]) labels_conn = bipartite.edata["labels_conn"] mask_conn = bipartite.edata["mask_conn"] if self.balance: labels_conn = bipartite.edata["labels_conn"] neg_check = torch.logical_and( bipartite.edata["labels_conn"] == 0, mask_conn ) num_neg = torch.sum(neg_check).item() neg_indices = torch.where(neg_check)[0] pos_check = torch.logical_and( bipartite.edata["labels_conn"] == 1, mask_conn ) num_pos = torch.sum(pos_check).item() pos_indices = torch.where(pos_check)[0] if num_pos > num_neg: mask_conn[ pos_indices[ np.random.choice( num_pos, num_pos - num_neg, replace=False ) ] ] = 0 elif num_pos < num_neg: mask_conn[ neg_indices[ np.random.choice( num_neg, num_neg - num_pos, replace=False ) ] ] = 0 # In subgraph training, it may happen that all edges are masked in a batch if mask_conn.sum() > 0: loss_conn = self.loss_conn( bipartite.edata["pred_conn"][mask_conn], labels_conn[mask_conn] ) loss = loss_den + loss_conn loss_den_val = loss_den.item() loss_conn_val = loss_conn.item() else: loss = loss_den loss_den_val = loss_den.item() loss_conn_val = 0 return loss, loss_den_val, loss_conn_val ================================================ FILE: examples/pytorch/hilander/scripts/test_deepglint_hannah.sh ================================================ python test_subg.py --data_path data/subcenter_arcface_deepglint_hannah_features.pkl --model_filename checkpoint/deepglint_sampler.pth --knn_k 10 --tau 0.8 --level 10 --threshold prob --faiss_gpu --hidden 512 --num_conv 1 --batch_size 4096 --early_stop --use_cluster_feat ================================================ FILE: examples/pytorch/hilander/scripts/test_deepglint_imdb.sh ================================================ python test_subg.py --data_path data/subcenter_arcface_deepglint_imdb_features.pkl --model_filename checkpoint/deepglint_sampler.pth --knn_k 10 --tau 0.8 --level 10 --threshold prob --faiss_gpu --hidden 512 --num_conv 1 --batch_size 4096 --early_stop --use_cluster_feat ================================================ FILE: examples/pytorch/hilander/scripts/test_deepglint_imdb_sampled_as_deepglint.sh ================================================ python test_subg.py --data_path data/subcenter_arcface_deepglint_imdb_features_sampled_as_deepglint_1_in_10.pkl --model_filename checkpoint/deepglint_sampler.pth --knn_k 10 --tau 0.8 --level 10 --threshold prob --faiss_gpu --hidden 512 --num_conv 1 --batch_size 4096 --early_stop --use_cluster_feat ================================================ FILE: examples/pytorch/hilander/scripts/test_inat.sh ================================================ python test_subg.py --data_path data/inat2018_test.pkl --model_filename checkpoint/inat.ckpt --knn_k 10 --tau 0.1 --level 10 --threshold prob --faiss_gpu --hidden 512 --num_conv 1 --gat --batch_size 4096 --early_stop ================================================ FILE: examples/pytorch/hilander/scripts/test_inat_train_on_resampled_1_in_6_per_class.sh ================================================ python test_subg.py --data_path data/inat2018_test.pkl --model_filename checkpoint/inat_resampled_1_in_6_per_class.ckpt --knn_k 10 --tau 0.1 --level 10 --threshold prob --faiss_gpu --hidden 512 --num_conv 1 --gat --batch_size 4096 --early_stop ================================================ FILE: examples/pytorch/hilander/scripts/train_deepglint.sh ================================================ python train_subg.py --data_path data/subcenter_arcface_deepglint_train_1_in_10_recreated.pkl --model_filename checkpoint/deepglint_sampler.pth --knn_k 10,5,3 --levels 2,3,4 --faiss_gpu --hidden 512 --epochs 250 --lr 0.01 --batch_size 4096 --num_conv 1 --balance --use_cluster_feat ================================================ FILE: examples/pytorch/hilander/scripts/train_inat.sh ================================================ python train_subg.py --data_path data/inat2018_train_dedup_inter_intra.pkl --model_filename checkpoint/inat.ckpt --knn_k 10,5,3 --levels 2,3,4 --faiss_gpu --hidden 512 --epochs 250 --lr 0.01 --batch_size 4096 --num_conv 1 --gat --balance ================================================ FILE: examples/pytorch/hilander/scripts/train_inat_resampled_1_in_6_per_class.sh ================================================ python train_subg.py --data_path data/inat2018_train_dedup_inter_intra_1_in_6_per_class.pkl --model_filename checkpoint/inat_resampled_1_in_6_per_class.ckpt --knn_k 10,5,3 --levels 2,3,4 --faiss_gpu --hidden 512 --epochs 250 --lr 0.01 --batch_size 4096 --num_conv 1 --gat --balance ================================================ FILE: examples/pytorch/hilander/test.py ================================================ import argparse import os import pickle import time import dgl import numpy as np import torch import torch.optim as optim from dataset import LanderDataset from models import LANDER from utils import build_next_level, decode, evaluation, stop_iterating ########### # ArgParser parser = argparse.ArgumentParser() # Dataset parser.add_argument("--data_path", type=str, required=True) parser.add_argument("--model_filename", type=str, default="lander.pth") parser.add_argument("--faiss_gpu", action="store_true") parser.add_argument("--early_stop", action="store_true") # HyperParam parser.add_argument("--knn_k", type=int, default=10) parser.add_argument("--levels", type=int, default=1) parser.add_argument("--tau", type=float, default=0.5) parser.add_argument("--threshold", type=str, default="prob") parser.add_argument("--metrics", type=str, default="pairwise,bcubed,nmi") # Model parser.add_argument("--hidden", type=int, default=512) parser.add_argument("--num_conv", type=int, default=4) parser.add_argument("--dropout", type=float, default=0.0) parser.add_argument("--gat", action="store_true") parser.add_argument("--gat_k", type=int, default=1) parser.add_argument("--balance", action="store_true") parser.add_argument("--use_cluster_feat", action="store_true") parser.add_argument("--use_focal_loss", action="store_true") parser.add_argument("--use_gt", action="store_true") args = parser.parse_args() ########################### # Environment Configuration if torch.cuda.is_available(): device = torch.device("cuda") else: device = torch.device("cpu") ################## # Data Preparation with open(args.data_path, "rb") as f: features, labels = pickle.load(f) global_features = features.copy() dataset = LanderDataset( features=features, labels=labels, k=args.knn_k, levels=1, faiss_gpu=args.faiss_gpu, ) g = dataset.gs[0].to(device) global_labels = labels.copy() ids = np.arange(g.num_nodes()) global_edges = ([], []) global_edges_len = len(global_edges[0]) global_num_nodes = g.num_nodes() ################## # Model Definition if not args.use_gt: feature_dim = g.ndata["features"].shape[1] model = LANDER( feature_dim=feature_dim, nhid=args.hidden, num_conv=args.num_conv, dropout=args.dropout, use_GAT=args.gat, K=args.gat_k, balance=args.balance, use_cluster_feat=args.use_cluster_feat, use_focal_loss=args.use_focal_loss, ) model.load_state_dict(torch.load(args.model_filename, weights_only=False)) model = model.to(device) model.eval() # number of edges added is the indicator for early stopping num_edges_add_last_level = np.Inf ################################## # Predict connectivity and density for level in range(args.levels): if not args.use_gt: with torch.no_grad(): g = model(g) ( new_pred_labels, peaks, global_edges, global_pred_labels, global_peaks, ) = decode( g, args.tau, args.threshold, args.use_gt, ids, global_edges, global_num_nodes, ) ids = ids[peaks] new_global_edges_len = len(global_edges[0]) num_edges_add_this_level = new_global_edges_len - global_edges_len if stop_iterating( level, args.levels, args.early_stop, num_edges_add_this_level, num_edges_add_last_level, args.knn_k, ): break global_edges_len = new_global_edges_len num_edges_add_last_level = num_edges_add_this_level # build new dataset features, labels, cluster_features = build_next_level( features, labels, peaks, global_features, global_pred_labels, global_peaks, ) # After the first level, the number of nodes reduce a lot. Using cpu faiss is faster. dataset = LanderDataset( features=features, labels=labels, k=args.knn_k, levels=1, faiss_gpu=False, cluster_features=cluster_features, ) if len(dataset.gs) == 0: break g = dataset.gs[0].to(device) evaluation(global_pred_labels, global_labels, args.metrics) ================================================ FILE: examples/pytorch/hilander/test_subg.py ================================================ import argparse import os import pickle import time import dgl import numpy as np import torch import torch.optim as optim from dataset import LanderDataset from models import LANDER from utils import build_next_level, decode, evaluation, stop_iterating ########### # ArgParser parser = argparse.ArgumentParser() # Dataset parser.add_argument("--data_path", type=str, required=True) parser.add_argument("--model_filename", type=str, default="lander.pth") parser.add_argument("--faiss_gpu", action="store_true") parser.add_argument("--num_workers", type=int, default=0) # HyperParam parser.add_argument("--knn_k", type=int, default=10) parser.add_argument("--levels", type=int, default=1) parser.add_argument("--tau", type=float, default=0.5) parser.add_argument("--threshold", type=str, default="prob") parser.add_argument("--metrics", type=str, default="pairwise,bcubed,nmi") parser.add_argument("--early_stop", action="store_true") # Model parser.add_argument("--hidden", type=int, default=512) parser.add_argument("--num_conv", type=int, default=4) parser.add_argument("--dropout", type=float, default=0.0) parser.add_argument("--gat", action="store_true") parser.add_argument("--gat_k", type=int, default=1) parser.add_argument("--balance", action="store_true") parser.add_argument("--use_cluster_feat", action="store_true") parser.add_argument("--use_focal_loss", action="store_true") parser.add_argument("--use_gt", action="store_true") # Subgraph parser.add_argument("--batch_size", type=int, default=4096) args = parser.parse_args() print(args) ########################### # Environment Configuration if torch.cuda.is_available(): device = torch.device("cuda") else: device = torch.device("cpu") ################## # Data Preparation with open(args.data_path, "rb") as f: features, labels = pickle.load(f) global_features = features.copy() dataset = LanderDataset( features=features, labels=labels, k=args.knn_k, levels=1, faiss_gpu=args.faiss_gpu, ) g = dataset.gs[0] g.ndata["pred_den"] = torch.zeros((g.num_nodes())) g.edata["prob_conn"] = torch.zeros((g.num_edges(), 2)) global_labels = labels.copy() ids = np.arange(g.num_nodes()) global_edges = ([], []) global_peaks = np.array([], dtype=np.long) global_edges_len = len(global_edges[0]) global_num_nodes = g.num_nodes() fanouts = [args.knn_k - 1 for i in range(args.num_conv + 1)] sampler = dgl.dataloading.MultiLayerNeighborSampler(fanouts) # fix the number of edges test_loader = dgl.dataloading.DataLoader( g, torch.arange(g.num_nodes()), sampler, batch_size=args.batch_size, shuffle=False, drop_last=False, num_workers=args.num_workers, ) ################## # Model Definition if not args.use_gt: feature_dim = g.ndata["features"].shape[1] model = LANDER( feature_dim=feature_dim, nhid=args.hidden, num_conv=args.num_conv, dropout=args.dropout, use_GAT=args.gat, K=args.gat_k, balance=args.balance, use_cluster_feat=args.use_cluster_feat, use_focal_loss=args.use_focal_loss, ) model.load_state_dict(torch.load(args.model_filename, weights_only=False)) model = model.to(device) model.eval() # number of edges added is the indicator for early stopping num_edges_add_last_level = np.Inf ################################## # Predict connectivity and density for level in range(args.levels): if not args.use_gt: total_batches = len(test_loader) for batch, minibatch in enumerate(test_loader): input_nodes, sub_g, bipartites = minibatch sub_g = sub_g.to(device) bipartites = [b.to(device) for b in bipartites] with torch.no_grad(): output_bipartite = model(bipartites) global_nid = output_bipartite.dstdata[dgl.NID] global_eid = output_bipartite.edata["global_eid"] g.ndata["pred_den"][global_nid] = output_bipartite.dstdata[ "pred_den" ].to("cpu") g.edata["prob_conn"][global_eid] = output_bipartite.edata[ "prob_conn" ].to("cpu") torch.cuda.empty_cache() if (batch + 1) % 10 == 0: print("Batch %d / %d for inference" % (batch, total_batches)) ( new_pred_labels, peaks, global_edges, global_pred_labels, global_peaks, ) = decode( g, args.tau, args.threshold, args.use_gt, ids, global_edges, global_num_nodes, global_peaks, ) ids = ids[peaks] new_global_edges_len = len(global_edges[0]) num_edges_add_this_level = new_global_edges_len - global_edges_len if stop_iterating( level, args.levels, args.early_stop, num_edges_add_this_level, num_edges_add_last_level, args.knn_k, ): break global_edges_len = new_global_edges_len num_edges_add_last_level = num_edges_add_this_level # build new dataset features, labels, cluster_features = build_next_level( features, labels, peaks, global_features, global_pred_labels, global_peaks, ) # After the first level, the number of nodes reduce a lot. Using cpu faiss is faster. dataset = LanderDataset( features=features, labels=labels, k=args.knn_k, levels=1, faiss_gpu=False, cluster_features=cluster_features, ) g = dataset.gs[0] g.ndata["pred_den"] = torch.zeros((g.num_nodes())) g.edata["prob_conn"] = torch.zeros((g.num_edges(), 2)) test_loader = dgl.dataloading.DataLoader( g, torch.arange(g.num_nodes()), sampler, batch_size=args.batch_size, shuffle=False, drop_last=False, num_workers=args.num_workers, ) evaluation(global_pred_labels, global_labels, args.metrics) ================================================ FILE: examples/pytorch/hilander/train.py ================================================ import argparse import os import pickle import time import dgl import numpy as np import torch import torch.optim as optim from dataset import LanderDataset from models import LANDER ########### # ArgParser parser = argparse.ArgumentParser() # Dataset parser.add_argument("--data_path", type=str, required=True) parser.add_argument("--test_data_path", type=str, required=True) parser.add_argument("--levels", type=str, default="1") parser.add_argument("--faiss_gpu", action="store_true") parser.add_argument("--model_filename", type=str, default="lander.pth") # KNN parser.add_argument("--knn_k", type=str, default="10") # Model parser.add_argument("--hidden", type=int, default=512) parser.add_argument("--num_conv", type=int, default=4) parser.add_argument("--dropout", type=float, default=0.0) parser.add_argument("--gat", action="store_true") parser.add_argument("--gat_k", type=int, default=1) parser.add_argument("--balance", action="store_true") parser.add_argument("--use_cluster_feat", action="store_true") parser.add_argument("--use_focal_loss", action="store_true") # Training parser.add_argument("--epochs", type=int, default=100) parser.add_argument("--lr", type=float, default=0.1) parser.add_argument("--momentum", type=float, default=0.9) parser.add_argument("--weight_decay", type=float, default=1e-5) args = parser.parse_args() ########################### # Environment Configuration if torch.cuda.is_available(): device = torch.device("cuda") else: device = torch.device("cpu") ################## # Data Preparation def prepare_dataset_graphs(data_path, k_list, lvl_list): with open(data_path, "rb") as f: features, labels = pickle.load(f) gs = [] for k, l in zip(k_list, lvl_list): dataset = LanderDataset( features=features, labels=labels, k=k, levels=l, faiss_gpu=args.faiss_gpu, ) gs += [g.to(device) for g in dataset.gs] return gs k_list = [int(k) for k in args.knn_k.split(",")] lvl_list = [int(l) for l in args.levels.split(",")] gs = prepare_dataset_graphs(args.data_path, k_list, lvl_list) test_gs = prepare_dataset_graphs(args.test_data_path, k_list, lvl_list) ################## # Model Definition feature_dim = gs[0].ndata["features"].shape[1] model = LANDER( feature_dim=feature_dim, nhid=args.hidden, num_conv=args.num_conv, dropout=args.dropout, use_GAT=args.gat, K=args.gat_k, balance=args.balance, use_cluster_feat=args.use_cluster_feat, use_focal_loss=args.use_focal_loss, ) model = model.to(device) model.train() best_model = None best_loss = np.Inf ################# # Hyperparameters opt = optim.SGD( model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, ) scheduler = optim.lr_scheduler.CosineAnnealingLR( opt, T_max=args.epochs, eta_min=1e-5 ) ############### # Training Loop for epoch in range(args.epochs): all_loss_den_val = 0 all_loss_conn_val = 0 for g in gs: opt.zero_grad() g = model(g) loss, loss_den_val, loss_conn_val = model.compute_loss(g) all_loss_den_val += loss_den_val all_loss_conn_val += loss_conn_val loss.backward() opt.step() scheduler.step() print( "Training, epoch: %d, loss_den: %.6f, loss_conn: %.6f" % (epoch, all_loss_den_val, all_loss_conn_val) ) # Report test all_test_loss_den_val = 0 all_test_loss_conn_val = 0 with torch.no_grad(): for g in test_gs: g = model(g) loss, loss_den_val, loss_conn_val = model.compute_loss(g) all_test_loss_den_val += loss_den_val all_test_loss_conn_val += loss_conn_val print( "Testing, epoch: %d, loss_den: %.6f, loss_conn: %.6f" % (epoch, all_test_loss_den_val, all_test_loss_conn_val) ) if all_test_loss_conn_val + all_test_loss_den_val < best_loss: best_loss = all_test_loss_conn_val + all_test_loss_den_val print("New best epoch", epoch) torch.save(model.state_dict(), args.model_filename + "_best") torch.save(model.state_dict(), args.model_filename) torch.save(model.state_dict(), args.model_filename) ================================================ FILE: examples/pytorch/hilander/train_subg.py ================================================ import argparse import os import pickle import time import dgl import numpy as np import torch import torch.optim as optim from dataset import LanderDataset from models import LANDER ########### # ArgParser parser = argparse.ArgumentParser() # Dataset parser.add_argument("--data_path", type=str, required=True) parser.add_argument("--levels", type=str, default="1") parser.add_argument("--faiss_gpu", action="store_true") parser.add_argument("--model_filename", type=str, default="lander.pth") # KNN parser.add_argument("--knn_k", type=str, default="10") parser.add_argument("--num_workers", type=int, default=0) # Model parser.add_argument("--hidden", type=int, default=512) parser.add_argument("--num_conv", type=int, default=1) parser.add_argument("--dropout", type=float, default=0.0) parser.add_argument("--gat", action="store_true") parser.add_argument("--gat_k", type=int, default=1) parser.add_argument("--balance", action="store_true") parser.add_argument("--use_cluster_feat", action="store_true") parser.add_argument("--use_focal_loss", action="store_true") # Training parser.add_argument("--epochs", type=int, default=100) parser.add_argument("--batch_size", type=int, default=1024) parser.add_argument("--lr", type=float, default=0.1) parser.add_argument("--momentum", type=float, default=0.9) parser.add_argument("--weight_decay", type=float, default=1e-5) args = parser.parse_args() print(args) ########################### # Environment Configuration if torch.cuda.is_available(): device = torch.device("cuda") else: device = torch.device("cpu") ################## # Data Preparation with open(args.data_path, "rb") as f: features, labels = pickle.load(f) k_list = [int(k) for k in args.knn_k.split(",")] lvl_list = [int(l) for l in args.levels.split(",")] gs = [] nbrs = [] ks = [] for k, l in zip(k_list, lvl_list): dataset = LanderDataset( features=features, labels=labels, k=k, levels=l, faiss_gpu=args.faiss_gpu, ) gs += [g for g in dataset.gs] ks += [k for g in dataset.gs] nbrs += [nbr for nbr in dataset.nbrs] print("Dataset Prepared.") def set_train_sampler_loader(g, k): fanouts = [k - 1 for i in range(args.num_conv + 1)] sampler = dgl.dataloading.MultiLayerNeighborSampler(fanouts) # fix the number of edges train_dataloader = dgl.dataloading.DataLoader( g, torch.arange(g.num_nodes()), sampler, batch_size=args.batch_size, shuffle=True, drop_last=False, num_workers=args.num_workers, ) return train_dataloader train_loaders = [] for gidx, g in enumerate(gs): train_dataloader = set_train_sampler_loader(gs[gidx], ks[gidx]) train_loaders.append(train_dataloader) ################## # Model Definition feature_dim = gs[0].ndata["features"].shape[1] model = LANDER( feature_dim=feature_dim, nhid=args.hidden, num_conv=args.num_conv, dropout=args.dropout, use_GAT=args.gat, K=args.gat_k, balance=args.balance, use_cluster_feat=args.use_cluster_feat, use_focal_loss=args.use_focal_loss, ) model = model.to(device) model.train() ################# # Hyperparameters opt = optim.SGD( model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, ) # keep num_batch_per_loader the same for every sub_dataloader num_batch_per_loader = len(train_loaders[0]) train_loaders = [iter(train_loader) for train_loader in train_loaders] num_loaders = len(train_loaders) scheduler = optim.lr_scheduler.CosineAnnealingLR( opt, T_max=args.epochs * num_batch_per_loader * num_loaders, eta_min=1e-5 ) print("Start Training.") ############### # Training Loop for epoch in range(args.epochs): loss_den_val_total = [] loss_conn_val_total = [] loss_val_total = [] for batch in range(num_batch_per_loader): for loader_id in range(num_loaders): try: minibatch = next(train_loaders[loader_id]) except: train_loaders[loader_id] = iter( set_train_sampler_loader(gs[loader_id], ks[loader_id]) ) minibatch = next(train_loaders[loader_id]) input_nodes, sub_g, bipartites = minibatch sub_g = sub_g.to(device) bipartites = [b.to(device) for b in bipartites] # get the feature for the input_nodes opt.zero_grad() output_bipartite = model(bipartites) loss, loss_den_val, loss_conn_val = model.compute_loss( output_bipartite ) loss_den_val_total.append(loss_den_val) loss_conn_val_total.append(loss_conn_val) loss_val_total.append(loss.item()) loss.backward() opt.step() if (batch + 1) % 10 == 0: print( "epoch: %d, batch: %d / %d, loader_id : %d / %d, loss: %.6f, loss_den: %.6f, loss_conn: %.6f" % ( epoch, batch, num_batch_per_loader, loader_id, num_loaders, loss.item(), loss_den_val, loss_conn_val, ) ) scheduler.step() print( "epoch: %d, loss: %.6f, loss_den: %.6f, loss_conn: %.6f" % ( epoch, np.array(loss_val_total).mean(), np.array(loss_den_val_total).mean(), np.array(loss_conn_val_total).mean(), ) ) torch.save(model.state_dict(), args.model_filename) torch.save(model.state_dict(), args.model_filename) ================================================ FILE: examples/pytorch/hilander/utils/__init__.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- from .adjacency import * from .deduce import * from .density import * from .evaluate import * from .faiss_gpu import faiss_search_approx_knn from .faiss_search import faiss_search_knn from .knn import * from .metrics import * from .misc import * ================================================ FILE: examples/pytorch/hilander/utils/adjacency.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- """ This file re-uses implementation from https://github.com/yl-1993/learn-to-cluster """ import numpy as np import scipy.sparse as sp from scipy.sparse import coo_matrix def row_normalize(mx): """Row-normalize sparse matrix""" rowsum = np.array(mx.sum(1)) # if rowsum <= 0, keep its previous value rowsum[rowsum <= 0] = 1 r_inv = np.power(rowsum, -1).flatten() r_inv[np.isinf(r_inv)] = 0.0 r_mat_inv = sp.diags(r_inv) mx = r_mat_inv.dot(mx) return mx, r_inv def sparse_mx_to_indices_values(sparse_mx): sparse_mx = sparse_mx.tocoo().astype(np.float32) indices = np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64) values = sparse_mx.data shape = np.array(sparse_mx.shape) return indices, values, shape ================================================ FILE: examples/pytorch/hilander/utils/deduce.py ================================================ """ This file re-uses implementation from https://github.com/yl-1993/learn-to-cluster """ import dgl import numpy as np import torch from sklearn import mixture from .density import density_to_peaks, density_to_peaks_vectorize __all__ = [ "peaks_to_labels", "edge_to_connected_graph", "decode", "build_next_level", ] def _find_parent(parent, u): idx = [] # parent is a fixed point while u != parent[u]: idx.append(u) u = parent[u] for i in idx: parent[i] = u return u def edge_to_connected_graph(edges, num): parent = list(range(num)) for u, v in edges: p_u = _find_parent(parent, u) p_v = _find_parent(parent, v) parent[p_u] = p_v for i in range(num): parent[i] = _find_parent(parent, i) remap = {} uf = np.unique(np.array(parent)) for i, f in enumerate(uf): remap[f] = i cluster_id = np.array([remap[f] for f in parent]) return cluster_id def peaks_to_edges(peaks, dist2peak, tau): edges = [] for src in peaks: dsts = peaks[src] dists = dist2peak[src] for dst, dist in zip(dsts, dists): if src == dst or dist >= 1 - tau: continue edges.append([src, dst]) return edges def peaks_to_labels(peaks, dist2peak, tau, inst_num): edges = peaks_to_edges(peaks, dist2peak, tau) pred_labels = edge_to_connected_graph(edges, inst_num) return pred_labels, edges def get_dists(g, nbrs, use_gt): k = nbrs.shape[1] src_id = nbrs[:, 1:].reshape(-1) dst_id = nbrs[:, 0].repeat(k - 1) eids = g.edge_ids(src_id, dst_id) if use_gt: new_dists = ( (1 - g.edata["labels_edge"][eids]).reshape(-1, k - 1).float() ) else: new_dists = g.edata["prob_conn"][eids, 0].reshape(-1, k - 1) ind = torch.argsort(new_dists, 1) offset = torch.LongTensor( (nbrs[:, 0] * (k - 1)).repeat(k - 1).reshape(-1, k - 1) ).to(g.device) ind = ind + offset nbrs = torch.LongTensor(nbrs).to(g.device) new_nbrs = torch.take(nbrs[:, 1:], ind) new_dists = torch.cat( [torch.zeros((new_dists.shape[0], 1)).to(g.device), new_dists], dim=1 ) new_nbrs = torch.cat( [torch.arange(new_nbrs.shape[0]).view(-1, 1).to(g.device), new_nbrs], dim=1, ) return new_nbrs.cpu().detach().numpy(), new_dists.cpu().detach().numpy() def get_edge_dist(g, threshold): if threshold == "prob": return g.edata["prob_conn"][:, 0] return 1 - g.edata["raw_affine"] def tree_generation(ng): ng.ndata["keep_eid"] = torch.zeros(ng.num_nodes()).long() - 1 def message_func(edges): return {"mval": edges.data["edge_dist"], "meid": edges.data[dgl.EID]} def reduce_func(nodes): ind = torch.min(nodes.mailbox["mval"], dim=1)[1] keep_eid = nodes.mailbox["meid"].gather(1, ind.view(-1, 1)) return {"keep_eid": keep_eid[:, 0]} node_order = dgl.traversal.topological_nodes_generator(ng) ng.prop_nodes(node_order, message_func, reduce_func) eids = ng.ndata["keep_eid"] eids = eids[eids > -1] edges = ng.find_edges(eids) treeg = dgl.graph(edges, num_nodes=ng.num_nodes()) return treeg def peak_propogation(treeg): treeg.ndata["pred_labels"] = torch.zeros(treeg.num_nodes()).long() - 1 peaks = torch.where(treeg.in_degrees() == 0)[0].cpu().numpy() treeg.ndata["pred_labels"][peaks] = torch.arange(peaks.shape[0]) def message_func(edges): return {"mlb": edges.src["pred_labels"]} def reduce_func(nodes): return {"pred_labels": nodes.mailbox["mlb"][:, 0]} node_order = dgl.traversal.topological_nodes_generator(treeg) treeg.prop_nodes(node_order, message_func, reduce_func) pred_labels = treeg.ndata["pred_labels"].cpu().numpy() return peaks, pred_labels def decode( g, tau, threshold, use_gt, ids=None, global_edges=None, global_num_nodes=None, global_peaks=None, ): # Edge filtering with tau and density den_key = "density" if use_gt else "pred_den" g = g.local_var() g.edata["edge_dist"] = get_edge_dist(g, threshold) g.apply_edges( lambda edges: { "keep": (edges.src[den_key] > edges.dst[den_key]).long() * (edges.data["edge_dist"] < 1 - tau).long() } ) eids = torch.where(g.edata["keep"] == 0)[0] ng = dgl.remove_edges(g, eids) # Tree generation ng.edata[dgl.EID] = torch.arange(ng.num_edges()) treeg = tree_generation(ng) # Label propogation peaks, pred_labels = peak_propogation(treeg) if ids is None: return pred_labels, peaks # Merge with previous layers src, dst = treeg.edges() new_global_edges = ( global_edges[0] + ids[src.numpy()].tolist(), global_edges[1] + ids[dst.numpy()].tolist(), ) global_treeg = dgl.graph(new_global_edges, num_nodes=global_num_nodes) global_peaks, global_pred_labels = peak_propogation(global_treeg) return ( pred_labels, peaks, new_global_edges, global_pred_labels, global_peaks, ) def build_next_level( features, labels, peaks, global_features, global_pred_labels, global_peaks ): global_peak_to_label = global_pred_labels[global_peaks] global_label_to_peak = np.zeros_like(global_peak_to_label) for i, pl in enumerate(global_peak_to_label): global_label_to_peak[pl] = i cluster_ind = np.split( np.argsort(global_pred_labels), np.unique(np.sort(global_pred_labels), return_index=True)[1][1:], ) cluster_features = np.zeros((len(peaks), global_features.shape[1])) for pi in range(len(peaks)): cluster_features[global_label_to_peak[pi], :] = np.mean( global_features[cluster_ind[pi], :], axis=0 ) features = features[peaks] labels = labels[peaks] return features, labels, cluster_features ================================================ FILE: examples/pytorch/hilander/utils/density.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- """ This file re-uses implementation from https://github.com/yl-1993/learn-to-cluster """ from itertools import groupby import numpy as np import torch from tqdm import tqdm __all__ = [ "density_estimation", "density_to_peaks", "density_to_peaks_vectorize", ] def density_estimation(dists, nbrs, labels, **kwargs): """use supervised density defined on neigborhood""" num, k_knn = dists.shape conf = np.ones((num,), dtype=np.float32) ind_array = labels[nbrs] == np.expand_dims(labels, 1).repeat(k_knn, 1) pos = ((1 - dists[:, 1:]) * ind_array[:, 1:]).sum(1) neg = ((1 - dists[:, 1:]) * (1 - ind_array[:, 1:])).sum(1) conf = (pos - neg) * conf conf /= k_knn - 1 return conf def density_to_peaks_vectorize(dists, nbrs, density, max_conn=1, name=""): # just calculate 1 connectivity assert dists.shape[0] == density.shape[0] assert dists.shape == nbrs.shape num, k = dists.shape if name == "gcn_feat": include_mask = nbrs != np.arange(0, num).reshape(-1, 1) secondary_mask = ( np.sum(include_mask, axis=1) == k ) # TODO: the condition == k should not happen as distance to the node self should be smallest, check for numerical stability; TODO: make top M instead of only supporting top 1 include_mask[secondary_mask, -1] = False nbrs_exclude_self = nbrs[include_mask].reshape(-1, k - 1) # (V, 79) dists_exclude_self = dists[include_mask].reshape(-1, k - 1) # (V, 79) else: include_mask = nbrs != np.arange(0, num).reshape(-1, 1) nbrs_exclude_self = nbrs[include_mask].reshape(-1, k - 1) # (V, 79) dists_exclude_self = dists[include_mask].reshape(-1, k - 1) # (V, 79) compare_map = density[nbrs_exclude_self] > density.reshape(-1, 1) peak_index = np.argmax(np.where(compare_map, 1, 0), axis=1) # (V,) compare_map_sum = np.sum(compare_map.cpu().data.numpy(), axis=1) # (V,) dist2peak = { i: [] if compare_map_sum[i] == 0 else [dists_exclude_self[i, peak_index[i]]] for i in range(num) } peaks = { i: [] if compare_map_sum[i] == 0 else [nbrs_exclude_self[i, peak_index[i]]] for i in range(num) } return dist2peak, peaks def density_to_peaks(dists, nbrs, density, max_conn=1, sort="dist"): # Note that dists has been sorted in ascending order assert dists.shape[0] == density.shape[0] assert dists.shape == nbrs.shape num, _ = dists.shape dist2peak = {i: [] for i in range(num)} peaks = {i: [] for i in range(num)} for i, nbr in tqdm(enumerate(nbrs)): nbr_conf = density[nbr] for j, c in enumerate(nbr_conf): nbr_idx = nbr[j] if i == nbr_idx or c <= density[i]: continue dist2peak[i].append(dists[i, j]) peaks[i].append(nbr_idx) if len(dist2peak[i]) >= max_conn: break return dist2peak, peaks ================================================ FILE: examples/pytorch/hilander/utils/evaluate.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- import argparse import inspect import numpy as np from clustering_benchmark import ClusteringBenchmark from utils import metrics, TextColors, Timer def _read_meta(fn): labels = list() lb_set = set() with open(fn) as f: for lb in f.readlines(): lb = int(lb.strip()) labels.append(lb) lb_set.add(lb) return np.array(labels), lb_set def evaluate(gt_labels, pred_labels, metric="pairwise"): if isinstance(gt_labels, str) and isinstance(pred_labels, str): print("[gt_labels] {}".format(gt_labels)) print("[pred_labels] {}".format(pred_labels)) gt_labels, gt_lb_set = _read_meta(gt_labels) pred_labels, pred_lb_set = _read_meta(pred_labels) print( "#inst: gt({}) vs pred({})".format(len(gt_labels), len(pred_labels)) ) print( "#cls: gt({}) vs pred({})".format(len(gt_lb_set), len(pred_lb_set)) ) metric_func = metrics.__dict__[metric] with Timer( "evaluate with {}{}{}".format(TextColors.FATAL, metric, TextColors.ENDC) ): result = metric_func(gt_labels, pred_labels) if isinstance(result, float): print( "{}{}: {:.4f}{}".format( TextColors.OKGREEN, metric, result, TextColors.ENDC ) ) else: ave_pre, ave_rec, fscore = result print( "{}ave_pre: {:.4f}, ave_rec: {:.4f}, fscore: {:.4f}{}".format( TextColors.OKGREEN, ave_pre, ave_rec, fscore, TextColors.ENDC ) ) def evaluation(pred_labels, labels, metrics): print("==> evaluation") # pred_labels = g.ndata['pred_labels'].cpu().numpy() max_cluster = np.max(pred_labels) # gt_labels_all = g.ndata['labels'].cpu().numpy() gt_labels_all = labels pred_labels_all = pred_labels metric_list = metrics.split(",") for metric in metric_list: evaluate(gt_labels_all, pred_labels_all, metric) # H and C-scores gt_dict = {} pred_dict = {} for i in range(len(gt_labels_all)): gt_dict[str(i)] = gt_labels_all[i] pred_dict[str(i)] = pred_labels_all[i] bm = ClusteringBenchmark(gt_dict) scores = bm.evaluate_vmeasure(pred_dict) fmi_scores = bm.evaluate_fowlkes_mallows_score(pred_dict) print(scores) ================================================ FILE: examples/pytorch/hilander/utils/faiss_gpu.py ================================================ """ This file re-uses implementation from https://github.com/yl-1993/learn-to-cluster """ import gc import os import faiss import numpy as np from tqdm import tqdm __all__ = ["faiss_search_approx_knn"] class faiss_index_wrapper: def __init__( self, target, nprobe=128, index_factory_str=None, verbose=False, mode="proxy", using_gpu=True, ): self._res_list = [] num_gpu = faiss.get_num_gpus() print("[faiss gpu] #GPU: {}".format(num_gpu)) size, dim = target.shape assert size > 0, "size: {}".format(size) index_factory_str = ( "IVF{},PQ{}".format(min(8192, 16 * round(np.sqrt(size))), 32) if index_factory_str is None else index_factory_str ) cpu_index = faiss.index_factory(dim, index_factory_str) cpu_index.nprobe = nprobe if mode == "proxy": co = faiss.GpuClonerOptions() co.useFloat16 = True co.usePrecomputed = False index = faiss.IndexProxy() for i in range(num_gpu): res = faiss.StandardGpuResources() self._res_list.append(res) sub_index = ( faiss.index_cpu_to_gpu(res, i, cpu_index, co) if using_gpu else cpu_index ) index.addIndex(sub_index) elif mode == "shard": co = faiss.GpuMultipleClonerOptions() co.useFloat16 = True co.usePrecomputed = False co.shard = True index = faiss.index_cpu_to_all_gpus(cpu_index, co, ngpu=num_gpu) else: raise KeyError("Unknown index mode") index = faiss.IndexIDMap(index) index.verbose = verbose # get nlist to decide how many samples used for training nlist = int( float( [ item for item in index_factory_str.split(",") if "IVF" in item ][0].replace("IVF", "") ) ) # training if not index.is_trained: indexes_sample_for_train = np.random.randint(0, size, nlist * 256) index.train(target[indexes_sample_for_train]) # add with ids target_ids = np.arange(0, size) index.add_with_ids(target, target_ids) self.index = index def search(self, *args, **kargs): return self.index.search(*args, **kargs) def __del__(self): self.index.reset() del self.index for res in self._res_list: del res def batch_search(index, query, k, bs, verbose=False): n = len(query) dists = np.zeros((n, k), dtype=np.float32) nbrs = np.zeros((n, k), dtype=np.int64) for sid in tqdm( range(0, n, bs), desc="faiss searching...", disable=not verbose ): eid = min(n, sid + bs) dists[sid:eid], nbrs[sid:eid] = index.search(query[sid:eid], k) return dists, nbrs def faiss_search_approx_knn( query, target, k, nprobe=128, bs=int(1e6), index_factory_str=None, verbose=False, ): index = faiss_index_wrapper( target, nprobe=nprobe, index_factory_str=index_factory_str, verbose=verbose, ) dists, nbrs = batch_search(index, query, k=k, bs=bs, verbose=verbose) del index gc.collect() return dists, nbrs ================================================ FILE: examples/pytorch/hilander/utils/faiss_search.py ================================================ """ This file re-uses implementation from https://github.com/yl-1993/learn-to-cluster """ import gc from tqdm import tqdm from .faiss_gpu import faiss_search_approx_knn __all__ = ["faiss_search_knn"] def precise_dist(feat, nbrs, num_process=4, sort=True, verbose=False): import torch feat_share = torch.from_numpy(feat).share_memory_() nbrs_share = torch.from_numpy(nbrs).share_memory_() dist_share = torch.zeros_like(nbrs_share).float().share_memory_() precise_dist_share_mem( feat_share, nbrs_share, dist_share, num_process=num_process, sort=sort, verbose=verbose, ) del feat_share gc.collect() return dist_share.numpy(), nbrs_share.numpy() def precise_dist_share_mem( feat, nbrs, dist, num_process=16, sort=True, process_unit=4000, verbose=False, ): from torch import multiprocessing as mp num, _ = feat.shape num_per_proc = int(num / num_process) + 1 for pi in range(num_process): sid = pi * num_per_proc eid = min(sid + num_per_proc, num) kwargs = { "feat": feat, "nbrs": nbrs, "dist": dist, "sid": sid, "eid": eid, "sort": sort, "process_unit": process_unit, "verbose": verbose, } bmm(**kwargs) def bmm( feat, nbrs, dist, sid, eid, sort=True, process_unit=4000, verbose=False ): import torch _, cols = dist.shape batch_sim = torch.zeros((eid - sid, cols), dtype=torch.float32) for s in tqdm( range(sid, eid, process_unit), desc="bmm", disable=not verbose ): e = min(eid, s + process_unit) query = feat[s:e].unsqueeze(1) gallery = feat[nbrs[s:e]].permute(0, 2, 1) batch_sim[s - sid : e - sid] = torch.clamp( torch.bmm(query, gallery).view(-1, cols), 0.0, 1.0 ) if sort: sort_unit = int(1e6) batch_nbr = nbrs[sid:eid] for s in range(0, batch_sim.shape[0], sort_unit): e = min(s + sort_unit, eid) batch_sim[s:e], indices = torch.sort( batch_sim[s:e], descending=True ) batch_nbr[s:e] = torch.gather(batch_nbr[s:e], 1, indices) nbrs[sid:eid] = batch_nbr dist[sid:eid] = 1.0 - batch_sim def faiss_search_knn( feat, k, nprobe=128, num_process=4, is_precise=True, sort=True, verbose=False, ): dists, nbrs = faiss_search_approx_knn( query=feat, target=feat, k=k, nprobe=nprobe, verbose=verbose ) if is_precise: print("compute precise dist among k={} nearest neighbors".format(k)) dists, nbrs = precise_dist( feat, nbrs, num_process=num_process, sort=sort, verbose=verbose ) return dists, nbrs ================================================ FILE: examples/pytorch/hilander/utils/knn.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- """ This file re-uses implementation from https://github.com/yl-1993/learn-to-cluster """ import math import multiprocessing as mp import os import numpy as np from tqdm import tqdm from utils import Timer from .faiss_search import faiss_search_knn __all__ = [ "knn_faiss", "knn_faiss_gpu", "fast_knns2spmat", "build_knns", "knns2ordered_nbrs", ] def knns2ordered_nbrs(knns, sort=True): if isinstance(knns, list): knns = np.array(knns) nbrs = knns[:, 0, :].astype(np.int32) dists = knns[:, 1, :] if sort: # sort dists from low to high nb_idx = np.argsort(dists, axis=1) idxs = np.arange(nb_idx.shape[0]).reshape(-1, 1) dists = dists[idxs, nb_idx] nbrs = nbrs[idxs, nb_idx] return dists, nbrs def fast_knns2spmat(knns, k, th_sim=0, use_sim=True, fill_value=None): # convert knns to symmetric sparse matrix from scipy.sparse import csr_matrix eps = 1e-5 n = len(knns) if isinstance(knns, list): knns = np.array(knns) if len(knns.shape) == 2: # knns saved by hnsw has different shape n = len(knns) ndarr = np.ones([n, 2, k]) ndarr[:, 0, :] = -1 # assign unknown dist to 1 and nbr to -1 for i, (nbr, dist) in enumerate(knns): size = len(nbr) assert size == len(dist) ndarr[i, 0, :size] = nbr[:size] ndarr[i, 1, :size] = dist[:size] knns = ndarr nbrs = knns[:, 0, :] dists = knns[:, 1, :] assert ( -eps <= dists.min() <= dists.max() <= 1 + eps ), "min: {}, max: {}".format(dists.min(), dists.max()) if use_sim: sims = 1.0 - dists else: sims = dists if fill_value is not None: print("[fast_knns2spmat] edge fill value:", fill_value) sims.fill(fill_value) row, col = np.where(sims >= th_sim) # remove the self-loop idxs = np.where(row != nbrs[row, col]) row = row[idxs] col = col[idxs] data = sims[row, col] col = nbrs[row, col] # convert to absolute column assert len(row) == len(col) == len(data) spmat = csr_matrix((data, (row, col)), shape=(n, n)) return spmat def build_knns(feats, k, knn_method, dump=True): with Timer("build index"): if knn_method == "faiss": index = knn_faiss(feats, k, omp_num_threads=None) elif knn_method == "faiss_gpu": index = knn_faiss_gpu(feats, k) else: raise KeyError( "Only support faiss and faiss_gpu currently ({}).".format( knn_method ) ) knns = index.get_knns() return knns class knn: def __init__(self, feats, k, index_path="", verbose=True): pass def filter_by_th(self, i): th_nbrs = [] th_dists = [] nbrs, dists = self.knns[i] for n, dist in zip(nbrs, dists): if 1 - dist < self.th: continue th_nbrs.append(n) th_dists.append(dist) th_nbrs = np.array(th_nbrs) th_dists = np.array(th_dists) return (th_nbrs, th_dists) def get_knns(self, th=None): if th is None or th <= 0.0: return self.knns # TODO: optimize the filtering process by numpy # nproc = mp.cpu_count() nproc = 1 with Timer( "filter edges by th {} (CPU={})".format(th, nproc), self.verbose ): self.th = th self.th_knns = [] tot = len(self.knns) if nproc > 1: pool = mp.Pool(nproc) th_knns = list( tqdm(pool.imap(self.filter_by_th, range(tot)), total=tot) ) pool.close() else: th_knns = [self.filter_by_th(i) for i in range(tot)] return th_knns class knn_faiss(knn): def __init__( self, feats, k, nprobe=128, omp_num_threads=None, rebuild_index=True, verbose=True, **kwargs ): import faiss if omp_num_threads is not None: faiss.omp_set_num_threads(omp_num_threads) self.verbose = verbose with Timer("[faiss] build index", verbose): feats = feats.astype("float32") size, dim = feats.shape index = faiss.IndexFlatIP(dim) index.add(feats) with Timer("[faiss] query topk {}".format(k), verbose): sims, nbrs = index.search(feats, k=k) self.knns = [ ( np.array(nbr, dtype=np.int32), 1 - np.array(sim, dtype=np.float32), ) for nbr, sim in zip(nbrs, sims) ] class knn_faiss_gpu(knn): def __init__( self, feats, k, nprobe=128, num_process=4, is_precise=True, sort=True, verbose=True, **kwargs ): with Timer("[faiss_gpu] query topk {}".format(k), verbose): dists, nbrs = faiss_search_knn( feats, k=k, nprobe=nprobe, num_process=num_process, is_precise=is_precise, sort=sort, verbose=verbose, ) self.knns = [ ( np.array(nbr, dtype=np.int32), np.array(dist, dtype=np.float32), ) for nbr, dist in zip(nbrs, dists) ] ================================================ FILE: examples/pytorch/hilander/utils/metrics.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- """ This file re-uses implementation from https://github.com/yl-1993/learn-to-cluster """ from __future__ import division import numpy as np from sklearn.metrics import precision_score, recall_score from sklearn.metrics.cluster import ( contingency_matrix, normalized_mutual_info_score, ) __all__ = ["pairwise", "bcubed", "nmi", "precision", "recall", "accuracy"] def _check(gt_labels, pred_labels): if gt_labels.ndim != 1: raise ValueError( "gt_labels must be 1D: shape is %r" % (gt_labels.shape,) ) if pred_labels.ndim != 1: raise ValueError( "pred_labels must be 1D: shape is %r" % (pred_labels.shape,) ) if gt_labels.shape != pred_labels.shape: raise ValueError( "gt_labels and pred_labels must have same size, got %d and %d" % (gt_labels.shape[0], pred_labels.shape[0]) ) return gt_labels, pred_labels def _get_lb2idxs(labels): lb2idxs = {} for idx, lb in enumerate(labels): if lb not in lb2idxs: lb2idxs[lb] = [] lb2idxs[lb].append(idx) return lb2idxs def _compute_fscore(pre, rec): return 2.0 * pre * rec / (pre + rec) def fowlkes_mallows_score(gt_labels, pred_labels, sparse=True): """The original function is from `sklearn.metrics.fowlkes_mallows_score`. We output the pairwise precision, pairwise recall and F-measure, instead of calculating the geometry mean of precision and recall. """ (n_samples,) = gt_labels.shape c = contingency_matrix(gt_labels, pred_labels, sparse=sparse) tk = np.dot(c.data, c.data) - n_samples pk = np.sum(np.asarray(c.sum(axis=0)).ravel() ** 2) - n_samples qk = np.sum(np.asarray(c.sum(axis=1)).ravel() ** 2) - n_samples avg_pre = tk / pk avg_rec = tk / qk fscore = _compute_fscore(avg_pre, avg_rec) return avg_pre, avg_rec, fscore def pairwise(gt_labels, pred_labels, sparse=True): _check(gt_labels, pred_labels) return fowlkes_mallows_score(gt_labels, pred_labels, sparse) def bcubed(gt_labels, pred_labels): _check(gt_labels, pred_labels) gt_lb2idxs = _get_lb2idxs(gt_labels) pred_lb2idxs = _get_lb2idxs(pred_labels) num_lbs = len(gt_lb2idxs) pre = np.zeros(num_lbs) rec = np.zeros(num_lbs) gt_num = np.zeros(num_lbs) for i, gt_idxs in enumerate(gt_lb2idxs.values()): all_pred_lbs = np.unique(pred_labels[gt_idxs]) gt_num[i] = len(gt_idxs) for pred_lb in all_pred_lbs: pred_idxs = pred_lb2idxs[pred_lb] n = 1.0 * np.intersect1d(gt_idxs, pred_idxs).size pre[i] += n**2 / len(pred_idxs) rec[i] += n**2 / gt_num[i] gt_num = gt_num.sum() avg_pre = pre.sum() / gt_num avg_rec = rec.sum() / gt_num fscore = _compute_fscore(avg_pre, avg_rec) return avg_pre, avg_rec, fscore def nmi(gt_labels, pred_labels): return normalized_mutual_info_score(pred_labels, gt_labels) def precision(gt_labels, pred_labels): return precision_score(gt_labels, pred_labels) def recall(gt_labels, pred_labels): return recall_score(gt_labels, pred_labels) def accuracy(gt_labels, pred_labels): return np.mean(gt_labels == pred_labels) ================================================ FILE: examples/pytorch/hilander/utils/misc.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- """ This file re-uses implementation from https://github.com/yl-1993/learn-to-cluster """ import json import os import pickle import random import time import numpy as np class TextColors: HEADER = "\033[35m" OKBLUE = "\033[34m" OKGREEN = "\033[32m" WARNING = "\033[33m" FATAL = "\033[31m" ENDC = "\033[0m" BOLD = "\033[1m" UNDERLINE = "\033[4m" class Timer: def __init__(self, name="task", verbose=True): self.name = name self.verbose = verbose def __enter__(self): self.start = time.time() return self def __exit__(self, exc_type, exc_val, exc_tb): if self.verbose: print( "[Time] {} consumes {:.4f} s".format( self.name, time.time() - self.start ) ) return exc_type is None def set_random_seed(seed, cuda=False): import torch random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) if cuda: torch.cuda.manual_seed_all(seed) def l2norm(vec): vec /= np.linalg.norm(vec, axis=1).reshape(-1, 1) return vec def is_l2norm(features, size): rand_i = random.choice(range(size)) norm_ = np.dot(features[rand_i, :], features[rand_i, :]) return abs(norm_ - 1) < 1e-6 def is_spmat_eq(a, b): return (a != b).nnz == 0 def aggregate(features, adj, times): dtype = features.dtype for i in range(times): features = adj * features return features.astype(dtype) def mkdir_if_no_exists(path, subdirs=[""], is_folder=False): if path == "": return for sd in subdirs: if sd != "" or is_folder: d = os.path.dirname(os.path.join(path, sd)) else: d = os.path.dirname(path) if not os.path.exists(d): os.makedirs(d) def stop_iterating( current_l, total_l, early_stop, num_edges_add_this_level, num_edges_add_last_level, knn_k, ): # Stopping rule 1: run all levels if current_l == total_l - 1: return True # Stopping rule 2: no new edges if num_edges_add_this_level == 0: return True # Stopping rule 3: early stopping, two levels start to produce similar numbers of edges if ( early_stop and float(num_edges_add_last_level) / num_edges_add_this_level < knn_k - 1 ): return True return False ================================================ FILE: examples/pytorch/infograph/README.md ================================================ # DGL Implementation of InfoGraph This DGL example implements the model proposed in the paper [InfoGraph: Unsupervised and Semi-supervised Graph-Level Representation Learning via Mutual Information Maximization](https://arxiv.org/abs/1908.01000). Author's code: https://github.com/fanyun-sun/InfoGraph ## Example Implementor This example was implemented by [Hengrui Zhang](https://github.com/hengruizhang98) when he was an applied scientist intern at AWS Shanghai AI Lab. ## Dependencies - Python 3.7 - PyTorch 1.7.1 - dgl 0.6.0 ## Datasets ##### Unsupervised Graph Classification Dataset: 'MUTAG', 'PTC', 'IMDBBINARY'(IMDB-B), 'IMDBMULTI'(IMDB-M), 'REDDITBINARY'(RDT-B), 'REDDITMULTI5K'(RDT-M5K) of dgl.data.GINDataset. | Dataset | MUTAG | PTC | RDT-B | RDT-M5K | IMDB-B | IMDB-M | | --------------- | ----- | ----- | ------ | ------- | ------ | ------ | | # Graphs | 188 | 344 | 2000 | 4999 | 1000 | 1500 | | # Classes | 2 | 2 | 2 | 5 | 2 | 3 | | Avg. Graph Size | 17.93 | 14.29 | 429.63 | 508.52 | 19.77 | 13.00 | **Semi-supervised Graph Regression Dataset:** QM9 dataset for graph property prediction (regression) | Dataset | # Graphs | # Regression Tasks | | ------- | -------- | ------------------ | | QM9 | 130,831 | 12 | The 12 tasks are: | Keys | Description | | ----- | :----------------------------------------- | | mu | Dipole moment | | alpha | Isotropic polarizability | | homo | Highest occupied molecular orbital energ | | lumo | Lowest unoccupied molecular orbital energy | | gap | Gap between 'homo' and 'lumo' | | r2 | Electronic spatial extent | | zpve | Zero point vibrational energy | | U0 | Internal energy at 0K | | U | Internal energy at 298.15K | | H | Enthalpy at 298.15K | | G | Free energy at 298.15K | | Cv | Heat capavity at 298.15K | ## Arguments ##### Unsupervised Graph Classification: ###### Dataset options ``` --dataname str The graph dataset name. Default is 'MUTAG'. ``` ###### GPU options ``` --gpu int GPU index. Default is -1, using CPU. ``` ###### Training options ``` --epochs int Number of training periods. Default is 20. --batch_size int Size of a training batch. Default is 128. --lr float Adam optimizer learning rate. Default is 0.01. --log_interval int Interval bettwen two evaluations. Default is 1. ``` ###### Model options ``` --n_layers int Number of GIN layers. Default is 3. --hid_dim int Dimension of hidden layers. Default is 32. ``` ##### Semi-supervised Graph Regression: ###### Dataset options ``` --target str The regression Task. Default is 'mu'. --train_num int Number of supervised examples. Default is 5000. ``` ###### GPU options ``` --gpu int GPU index. Default is -1, using CPU. ``` ###### Training options ``` --epochs int Number of training periods. Default is 200. --batch_size int Size of a training batch. Default is 20. --val_batch_size int Size of a validation batch. Default is 100. --lr float Adam optimizer learning rate. Default is 0.001. ``` ###### Model options ``` --hid_dim int Dimension of hidden layers. Default is 64. --reg int Regularization weight. Default is 0.001. ``` ## How to run examples Training and testing unsupervised model on MUTAG. (As graphs in these datasets are quite small and sparse, moving graphs from cpu to gpu would take a longer time than training, we recommend using **cpu** for these datasets). ```bash # MUTAG: python unsupervised.py --dataname MUTAG --n_layers 4 --hid_dim 32 ``` Replace 'MUTAG' with dataname in ['MUTAG', 'PTC', 'IMDBBINARY', 'IMDBMULTI', 'REDDITBINARY', 'REDDITMULTI5K'] if you'd like to try other datasets. Training and testing semi-supervised model on QM9 for graph property 'mu' with gpu. ```bash # QM9: python semisupervised.py --gpu 0 --target mu ``` Replace 'mu' with other target names above. ## Performance The hyperparameter setting in our implementation is identical to that reported in the paper. ##### Unsupervised Graph Classification: | Dataset | MUTAG | PTC | RDT-B | RDT-M5K | IMDB-B | IMDB-M | | :---------------: | :---: | :---: | :---: | ------- | ------ | ------ | | Accuracy Reported | 89.01 | 61.65 | 82.50 | 53.46 | 73.03 | 49.69 | | DGL | 89.88 | 63.54 | 88.50 | 56.27 | 72.70 | 50.13 | * REDDIT-M dataset would take a quite long time to load and evaluate. ##### Semisupervised Graph Regression on QM9: Here we only provide the results of 'mu', 'alpha', 'homo'. | Target | mu | alpha | homo | | :---------------: | :----: | :----: | :----: | | MAE Reported | 0.3169 | 0.5444 | 0.0060 | | The authors' code | 0.2411 | 0.5192 | 0.1560 | | DGL | 0.2355 | 0.5483 | 0.1581 | * The source of QM9 Dataset has changed so there's a gap between the MAE reported in the paper and that we reprodcued. * See this [issue](https://github.com/fanyun-sun/InfoGraph/issues/8) for authors' response. ================================================ FILE: examples/pytorch/infograph/evaluate_embedding.py ================================================ """ Evaluate unsupervised embedding using a variety of basic classifiers. """ """ Credit: https://github.com/fanyun-sun/InfoGraph """ import numpy as np import torch import torch.nn as nn from sklearn import preprocessing from sklearn.metrics import accuracy_score from sklearn.model_selection import GridSearchCV, StratifiedKFold from sklearn.svm import SVC class LogReg(nn.Module): def __init__(self, ft_in, nb_classes): super(LogReg, self).__init__() self.fc = nn.Linear(ft_in, nb_classes) def weights_init(self, m): if isinstance(m, nn.Linear): torch.nn.init.xavier_uniform_(m.weight.data) if m.bias is not None: m.bias.data.fill_(0.0) def forward(self, seq): ret = self.fc(seq) return ret def logistic_classify(x, y, device="cpu"): nb_classes = np.unique(y).shape[0] xent = nn.CrossEntropyLoss() hid_units = x.shape[1] accs = [] kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=None) for train_index, test_index in kf.split(x, y): train_embs, test_embs = x[train_index], x[test_index] train_lbls, test_lbls = y[train_index], y[test_index] train_embs, train_lbls = torch.from_numpy(train_embs).to( device ), torch.from_numpy(train_lbls).to(device) test_embs, test_lbls = torch.from_numpy(test_embs).to( device ), torch.from_numpy(test_lbls).to(device) log = LogReg(hid_units, nb_classes) log = log.to(device) opt = torch.optim.Adam(log.parameters(), lr=0.01, weight_decay=0.0) for it in range(100): log.train() opt.zero_grad() logits = log(train_embs) loss = xent(logits, train_lbls) loss.backward() opt.step() logits = log(test_embs) preds = torch.argmax(logits, dim=1) acc = torch.sum(preds == test_lbls).float() / test_lbls.shape[0] accs.append(acc.item()) return np.mean(accs) def svc_classify(x, y, search): kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=None) accuracies = [] for train_index, test_index in kf.split(x, y): x_train, x_test = x[train_index], x[test_index] y_train, y_test = y[train_index], y[test_index] if search: params = {"C": [0.001, 0.01, 0.1, 1, 10, 100, 1000]} classifier = GridSearchCV( SVC(), params, cv=5, scoring="accuracy", verbose=0 ) else: classifier = SVC(C=10) classifier.fit(x_train, y_train) accuracies.append(accuracy_score(y_test, classifier.predict(x_test))) return np.mean(accuracies) def evaluate_embedding(embeddings, labels, search=True, device="cpu"): labels = preprocessing.LabelEncoder().fit_transform(labels) x, y = np.array(embeddings), np.array(labels) logreg_accuracy = logistic_classify(x, y, device) print("LogReg", logreg_accuracy) svc_accuracy = svc_classify(x, y, search) print("svc", svc_accuracy) return logreg_accuracy, svc_accuracy ================================================ FILE: examples/pytorch/infograph/model.py ================================================ import torch as th import torch.nn as nn import torch.nn.functional as F from dgl.nn import GINConv, NNConv, Set2Set from dgl.nn.pytorch.glob import SumPooling from torch.nn import BatchNorm1d, GRU, Linear, ModuleList, ReLU, Sequential from utils import global_global_loss_, local_global_loss_ """ Feedforward neural network""" class FeedforwardNetwork(nn.Module): """ 3-layer feed-forward neural networks with jumping connections Parameters ----------- in_dim: int Input feature size. hid_dim: int Hidden feature size. Functions ----------- forward(feat): feat: Tensor [N * D], input features """ def __init__(self, in_dim, hid_dim): super(FeedforwardNetwork, self).__init__() self.block = Sequential( Linear(in_dim, hid_dim), ReLU(), Linear(hid_dim, hid_dim), ReLU(), Linear(hid_dim, hid_dim), ReLU(), ) self.jump_con = Linear(in_dim, hid_dim) def forward(self, feat): block_out = self.block(feat) jump_out = self.jump_con(feat) out = block_out + jump_out return out """ Unsupervised Setting """ class GINEncoder(nn.Module): """ Encoder based on dgl.nn.GINConv & dgl.nn.SumPooling Parameters ----------- in_dim: int Input feature size. hid_dim: int Hidden feature size. n_layer: Number of GIN layers. Functions ----------- forward(graph, feat): graph: DGLGraph feat: Tensor [N * D], node features """ def __init__(self, in_dim, hid_dim, n_layer): super(GINEncoder, self).__init__() self.n_layer = n_layer self.convs = ModuleList() self.bns = ModuleList() for i in range(n_layer): if i == 0: n_in = in_dim else: n_in = hid_dim n_out = hid_dim block = Sequential( Linear(n_in, n_out), ReLU(), Linear(hid_dim, hid_dim) ) conv = GINConv(apply_func=block, aggregator_type="sum") bn = BatchNorm1d(hid_dim) self.convs.append(conv) self.bns.append(bn) # sum pooling self.pool = SumPooling() def forward(self, graph, feat): xs = [] x = feat for i in range(self.n_layer): x = F.relu(self.convs[i](graph, x)) x = self.bns[i](x) xs.append(x) local_emb = th.cat(xs, 1) # patch-level embedding global_emb = self.pool(graph, local_emb) # graph-level embedding return global_emb, local_emb class InfoGraph(nn.Module): r""" InfoGraph model for unsupervised setting Parameters ----------- in_dim: int Input feature size. hid_dim: int Hidden feature size. n_layer: int Number of the GNN encoder layers. Functions ----------- forward(graph): graph: DGLGraph """ def __init__(self, in_dim, hid_dim, n_layer): super(InfoGraph, self).__init__() self.in_dim = in_dim self.hid_dim = hid_dim self.n_layer = n_layer embedding_dim = hid_dim * n_layer self.encoder = GINEncoder(in_dim, hid_dim, n_layer) self.local_d = FeedforwardNetwork( embedding_dim, embedding_dim ) # local discriminator (node-level) self.global_d = FeedforwardNetwork( embedding_dim, embedding_dim ) # global discriminator (graph-level) def get_embedding(self, graph, feat): # get_embedding function for evaluation the learned embeddings with th.no_grad(): global_emb, _ = self.encoder(graph, feat) return global_emb def forward(self, graph, feat, graph_id): global_emb, local_emb = self.encoder(graph, feat) global_h = self.global_d(global_emb) # global hidden representation local_h = self.local_d(local_emb) # local hidden representation loss = local_global_loss_(local_h, global_h, graph_id) return loss """ Semisupervised Setting """ class NNConvEncoder(nn.Module): """ Encoder based on dgl.nn.NNConv & GRU & dgl.nn.set2set pooling Parameters ----------- in_dim: int Input feature size. hid_dim: int Hidden feature size. Functions ----------- forward(graph, nfeat, efeat): graph: DGLGraph nfeat: Tensor [N * D1], node features efeat: Tensor [E * D2], edge features """ def __init__(self, in_dim, hid_dim): super(NNConvEncoder, self).__init__() self.lin0 = Linear(in_dim, hid_dim) # mlp for edge convolution in NNConv block = Sequential( Linear(5, 128), ReLU(), Linear(128, hid_dim * hid_dim) ) self.conv = NNConv( hid_dim, hid_dim, edge_func=block, aggregator_type="mean", residual=False, ) self.gru = GRU(hid_dim, hid_dim) # set2set pooling self.set2set = Set2Set(hid_dim, n_iters=3, n_layers=1) def forward(self, graph, nfeat, efeat): out = F.relu(self.lin0(nfeat)) h = out.unsqueeze(0) feat_map = [] # Convolution layer number is 3 for i in range(3): m = F.relu(self.conv(graph, out, efeat)) out, h = self.gru(m.unsqueeze(0), h) out = out.squeeze(0) feat_map.append(out) out = self.set2set(graph, out) # out: global embedding, feat_map[-1]: local embedding return out, feat_map[-1] class InfoGraphS(nn.Module): """ InfoGraph* model for semi-supervised setting Parameters ----------- in_dim: int Input feature size. hid_dim: int Hidden feature size. Functions ----------- forward(graph): graph: DGLGraph unsupforward(graph): graph: DGLGraph """ def __init__(self, in_dim, hid_dim): super(InfoGraphS, self).__init__() self.sup_encoder = NNConvEncoder(in_dim, hid_dim) self.unsup_encoder = NNConvEncoder(in_dim, hid_dim) self.fc1 = Linear(2 * hid_dim, hid_dim) self.fc2 = Linear(hid_dim, 1) # unsupervised local discriminator and global discriminator for local-global infomax self.unsup_local_d = FeedforwardNetwork(hid_dim, hid_dim) self.unsup_global_d = FeedforwardNetwork(2 * hid_dim, hid_dim) # supervised global discriminator and unsupervised global discriminator for global-global infomax self.sup_d = FeedforwardNetwork(2 * hid_dim, hid_dim) self.unsup_d = FeedforwardNetwork(2 * hid_dim, hid_dim) def forward(self, graph, nfeat, efeat): sup_global_emb, sup_local_emb = self.sup_encoder(graph, nfeat, efeat) sup_global_pred = self.fc2(F.relu(self.fc1(sup_global_emb))) sup_global_pred = sup_global_pred.view(-1) return sup_global_pred def unsup_forward(self, graph, nfeat, efeat, graph_id): sup_global_emb, sup_local_emb = self.sup_encoder(graph, nfeat, efeat) unsup_global_emb, unsup_local_emb = self.unsup_encoder( graph, nfeat, efeat ) g_enc = self.unsup_global_d(unsup_global_emb) l_enc = self.unsup_local_d(unsup_local_emb) sup_g_enc = self.sup_d(sup_global_emb) unsup_g_enc = self.unsup_d(unsup_global_emb) # Calculate loss unsup_loss = local_global_loss_(l_enc, g_enc, graph_id) con_loss = global_global_loss_(sup_g_enc, unsup_g_enc) return unsup_loss, con_loss ================================================ FILE: examples/pytorch/infograph/semisupervised.py ================================================ import argparse import dgl import numpy as np import torch as th import torch.nn.functional as F from dgl.data import QM9EdgeDataset from dgl.data.utils import Subset from dgl.dataloading import GraphDataLoader from model import InfoGraphS def argument(): parser = argparse.ArgumentParser(description="InfoGraphS") # data source params parser.add_argument( "--target", type=str, default="mu", help="Choose regression task" ) parser.add_argument( "--train_num", type=int, default=5000, help="Size of training set" ) # training params parser.add_argument( "--gpu", type=int, default=-1, help="GPU index, default:-1, using CPU." ) parser.add_argument( "--epochs", type=int, default=200, help="Training epochs." ) parser.add_argument( "--batch_size", type=int, default=20, help="Training batch size." ) parser.add_argument( "--val_batch_size", type=int, default=100, help="Validation batch size." ) parser.add_argument( "--lr", type=float, default=0.001, help="Learning rate." ) parser.add_argument("--wd", type=float, default=0, help="Weight decay.") # model params parser.add_argument( "--hid_dim", type=int, default=64, help="Hidden layer dimensionality" ) parser.add_argument( "--reg", type=float, default=0.001, help="Regularization coefficient" ) args = parser.parse_args() # check cuda if args.gpu != -1 and th.cuda.is_available(): args.device = "cuda:{}".format(args.gpu) else: args.device = "cpu" return args class DenseQM9EdgeDataset(QM9EdgeDataset): def __getitem__(self, idx): r"""Get graph and label by index Parameters ---------- idx : int Item index Returns ------- dgl.DGLGraph The graph contains: - ``ndata['pos']``: the coordinates of each atom - ``ndata['attr']``: the features of each atom - ``edata['edge_attr']``: the features of each bond Tensor Property values of molecular graphs """ pos = self.node_pos[self.n_cumsum[idx] : self.n_cumsum[idx + 1]] src = self.src[self.ne_cumsum[idx] : self.ne_cumsum[idx + 1]] dst = self.dst[self.ne_cumsum[idx] : self.ne_cumsum[idx + 1]] g = dgl.graph((src, dst)) g.ndata["pos"] = th.tensor(pos).float() g.ndata["attr"] = th.tensor( self.node_attr[self.n_cumsum[idx] : self.n_cumsum[idx + 1]] ).float() g.edata["edge_attr"] = th.tensor( self.edge_attr[self.ne_cumsum[idx] : self.ne_cumsum[idx + 1]] ).float() label = th.tensor(self.targets[idx][self.label_keys]).float() n_nodes = g.num_nodes() row = th.arange(n_nodes) col = th.arange(n_nodes) row = row.view(-1, 1).repeat(1, n_nodes).view(-1) col = col.repeat(n_nodes) src = g.edges()[0] dst = g.edges()[1] idx = src * n_nodes + dst size = list(g.edata["edge_attr"].size()) size[0] = n_nodes * n_nodes edge_attr = g.edata["edge_attr"].new_zeros(size) edge_attr[idx] = g.edata["edge_attr"] pos = g.ndata["pos"] dist = th.norm(pos[col] - pos[row], p=2, dim=-1).view(-1, 1) new_edge_attr = th.cat([edge_attr, dist.type_as(edge_attr)], dim=-1) graph = dgl.graph((row, col)) graph.ndata["attr"] = g.ndata["attr"] graph.edata["edge_attr"] = new_edge_attr graph = graph.remove_self_loop() return graph, label def collate(samples): """collate function for building graph dataloader""" # generate batched graphs and labels graphs, targets = map(list, zip(*samples)) batched_graph = dgl.batch(graphs) batched_targets = th.Tensor(targets) n_graphs = len(graphs) graph_id = th.arange(n_graphs) graph_id = dgl.broadcast_nodes(batched_graph, graph_id) batched_graph.ndata["graph_id"] = graph_id return batched_graph, batched_targets def evaluate(model, loader, num, device): error = 0 for graphs, targets in loader: graphs = graphs.to(device) nfeat, efeat = graphs.ndata["attr"], graphs.edata["edge_attr"] targets = targets.to(device) error += (model(graphs, nfeat, efeat) - targets).abs().sum().item() error = error / num return error if __name__ == "__main__": # Step 1: Prepare graph data ===================================== # args = argument() label_keys = [args.target] print(args) dataset = DenseQM9EdgeDataset(label_keys=label_keys) # Train/Val/Test Splitting N = dataset.targets.shape[0] all_idx = np.arange(N) np.random.shuffle(all_idx) val_num = 10000 test_num = 10000 val_idx = all_idx[:val_num] test_idx = all_idx[val_num : val_num + test_num] train_idx = all_idx[ val_num + test_num : val_num + test_num + args.train_num ] train_data = Subset(dataset, train_idx) val_data = Subset(dataset, val_idx) test_data = Subset(dataset, test_idx) unsup_idx = all_idx[val_num + test_num :] unsup_data = Subset(dataset, unsup_idx) # generate supervised training dataloader and unsupervised training dataloader train_loader = GraphDataLoader( train_data, batch_size=args.batch_size, collate_fn=collate, drop_last=False, shuffle=True, ) unsup_loader = GraphDataLoader( unsup_data, batch_size=args.batch_size, collate_fn=collate, drop_last=False, shuffle=True, ) # generate validation & testing dataloader val_loader = GraphDataLoader( val_data, batch_size=args.val_batch_size, collate_fn=collate, drop_last=False, shuffle=True, ) test_loader = GraphDataLoader( test_data, batch_size=args.val_batch_size, collate_fn=collate, drop_last=False, shuffle=True, ) print("======== target = {} ========".format(args.target)) in_dim = dataset[0][0].ndata["attr"].shape[1] # Step 2: Create model =================================================================== # model = InfoGraphS(in_dim, args.hid_dim) model = model.to(args.device) # Step 3: Create training components ===================================================== # optimizer = th.optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.wd ) scheduler = th.optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode="min", factor=0.7, patience=5, min_lr=0.000001 ) # Step 4: training epochs =============================================================== # best_val_error = float("inf") test_error = float("inf") for epoch in range(args.epochs): """Training""" model.train() lr = scheduler.optimizer.param_groups[0]["lr"] iteration = 0 sup_loss_all = 0 unsup_loss_all = 0 consis_loss_all = 0 for sup_data, unsup_data in zip(train_loader, unsup_loader): sup_graph, sup_target = sup_data unsup_graph, _ = unsup_data sup_graph = sup_graph.to(args.device) unsup_graph = unsup_graph.to(args.device) sup_nfeat, sup_efeat = ( sup_graph.ndata["attr"], sup_graph.edata["edge_attr"], ) unsup_nfeat, unsup_efeat, unsup_graph_id = ( unsup_graph.ndata["attr"], unsup_graph.edata["edge_attr"], unsup_graph.ndata["graph_id"], ) sup_target = sup_target sup_target = sup_target.to(args.device) optimizer.zero_grad() sup_loss = F.mse_loss( model(sup_graph, sup_nfeat, sup_efeat), sup_target ) unsup_loss, consis_loss = model.unsup_forward( unsup_graph, unsup_nfeat, unsup_efeat, unsup_graph_id ) loss = sup_loss + unsup_loss + args.reg * consis_loss loss.backward() sup_loss_all += sup_loss.item() unsup_loss_all += unsup_loss.item() consis_loss_all += consis_loss.item() optimizer.step() print( "Epoch: {}, Sup_Loss: {:4f}, Unsup_loss: {:.4f}, Consis_loss: {:.4f}".format( epoch, sup_loss_all, unsup_loss_all, consis_loss_all ) ) model.eval() val_error = evaluate(model, val_loader, val_num, args.device) scheduler.step(val_error) if val_error < best_val_error: best_val_error = val_error test_error = evaluate(model, test_loader, test_num, args.device) print( "Epoch: {}, LR: {}, val_error: {:.4f}, best_test_error: {:.4f}".format( epoch, lr, val_error, test_error ) ) ================================================ FILE: examples/pytorch/infograph/unsupervised.py ================================================ import argparse import dgl import torch as th from dgl.data import GINDataset from dgl.dataloading import GraphDataLoader from evaluate_embedding import evaluate_embedding from model import InfoGraph def argument(): parser = argparse.ArgumentParser(description="InfoGraph") # data source params parser.add_argument( "--dataname", type=str, default="MUTAG", help="Name of dataset." ) # training params parser.add_argument( "--gpu", type=int, default=-1, help="GPU index, default:-1, using CPU." ) parser.add_argument( "--epochs", type=int, default=20, help="Training epochs." ) parser.add_argument( "--batch_size", type=int, default=128, help="Training batch size." ) parser.add_argument("--lr", type=float, default=0.01, help="Learning rate.") parser.add_argument( "--log_interval", type=int, default=1, help="Interval between two evaluations.", ) # model params parser.add_argument( "--n_layers", type=int, default=3, help="Number of graph convolution layers before each pooling.", ) parser.add_argument( "--hid_dim", type=int, default=32, help="Hidden layer dimensionalities." ) args = parser.parse_args() # check cuda if args.gpu != -1 and th.cuda.is_available(): args.device = "cuda:{}".format(args.gpu) else: args.device = "cpu" return args def collate(samples): """collate function for building graph dataloader""" graphs, labels = map(list, zip(*samples)) # generate batched graphs and labels batched_graph = dgl.batch(graphs) batched_labels = th.tensor(labels) n_graphs = len(graphs) graph_id = th.arange(n_graphs) graph_id = dgl.broadcast_nodes(batched_graph, graph_id) batched_graph.ndata["graph_id"] = graph_id return batched_graph, batched_labels if __name__ == "__main__": # Step 1: Prepare graph data ===================================== # args = argument() print(args) # load dataset from dgl.data.GINDataset dataset = GINDataset(args.dataname, self_loop=False) # get graphs and labels graphs, labels = map(list, zip(*dataset)) # generate a full-graph with all examples for evaluation wholegraph = dgl.batch(graphs) wholegraph.ndata["attr"] = wholegraph.ndata["attr"].to(th.float32) # create dataloader for batch training dataloader = GraphDataLoader( dataset, batch_size=args.batch_size, collate_fn=collate, drop_last=False, shuffle=True, ) in_dim = wholegraph.ndata["attr"].shape[1] # Step 2: Create model =================================================================== # model = InfoGraph(in_dim, args.hid_dim, args.n_layers) model = model.to(args.device) # Step 3: Create training components ===================================================== # optimizer = th.optim.Adam(model.parameters(), lr=args.lr) print("===== Before training ======") wholegraph = wholegraph.to(args.device) wholefeat = wholegraph.ndata["attr"] emb = model.get_embedding(wholegraph, wholefeat).cpu() res = evaluate_embedding(emb, labels, args.device) """ Evaluate the initialized embeddings """ """ using logistic regression and SVM(non-linear) """ print("logreg {:4f}, svc {:4f}".format(res[0], res[1])) best_logreg = 0 best_logreg_epoch = 0 best_svc = 0 best_svc_epoch = 0 # Step 4: training epochs =============================================================== # for epoch in range(args.epochs): loss_all = 0 model.train() for graph, label in dataloader: graph = graph.to(args.device) feat = graph.ndata["attr"] graph_id = graph.ndata["graph_id"] n_graph = label.shape[0] optimizer.zero_grad() loss = model(graph, feat, graph_id) loss.backward() optimizer.step() loss_all += loss.item() print("Epoch {}, Loss {:.4f}".format(epoch, loss_all)) if epoch % args.log_interval == 0: # evaluate embeddings model.eval() emb = model.get_embedding(wholegraph, wholefeat).cpu() res = evaluate_embedding(emb, labels, args.device) if res[0] > best_logreg: best_logreg = res[0] best_logreg_epoch = epoch if res[1] > best_svc: best_svc = res[1] best_svc_epoch = epoch print( "best logreg {:4f}, epoch {} | best svc: {:4f}, epoch {}".format( best_logreg, best_logreg_epoch, best_svc, best_svc_epoch ) ) print("Training End") print("best logreg {:4f} ,best svc {:4f}".format(best_logreg, best_svc)) ================================================ FILE: examples/pytorch/infograph/utils.py ================================================ """ Credit: https://github.com/fanyun-sun/InfoGraph """ import math import torch as th import torch.nn.functional as F def get_positive_expectation(p_samples, average=True): """Computes the positive part of a JS Divergence. Args: p_samples: Positive samples. average: Average the result over samples. Returns: th.Tensor """ log_2 = math.log(2.0) Ep = log_2 - F.softplus(-p_samples) if average: return Ep.mean() else: return Ep def get_negative_expectation(q_samples, average=True): """Computes the negative part of a JS Divergence. Args: q_samples: Negative samples. average: Average the result over samples. Returns: th.Tensor """ log_2 = math.log(2.0) Eq = F.softplus(-q_samples) + q_samples - log_2 if average: return Eq.mean() else: return Eq def local_global_loss_(l_enc, g_enc, graph_id): num_graphs = g_enc.shape[0] num_nodes = l_enc.shape[0] device = g_enc.device pos_mask = th.zeros((num_nodes, num_graphs)).to(device) neg_mask = th.ones((num_nodes, num_graphs)).to(device) for nodeidx, graphidx in enumerate(graph_id): pos_mask[nodeidx][graphidx] = 1.0 neg_mask[nodeidx][graphidx] = 0.0 res = th.mm(l_enc, g_enc.t()) E_pos = get_positive_expectation(res * pos_mask, average=False).sum() E_pos = E_pos / num_nodes E_neg = get_negative_expectation(res * neg_mask, average=False).sum() E_neg = E_neg / (num_nodes * (num_graphs - 1)) return E_neg - E_pos def global_global_loss_(sup_enc, unsup_enc): num_graphs = sup_enc.shape[0] device = sup_enc.device pos_mask = th.eye(num_graphs).to(device) neg_mask = 1 - pos_mask res = th.mm(sup_enc, unsup_enc.t()) E_pos = get_positive_expectation(res * pos_mask, average=False) E_pos = (E_pos * pos_mask).sum() / pos_mask.sum() E_neg = get_negative_expectation(res * neg_mask, average=False) E_neg = (E_neg * neg_mask).sum() / neg_mask.sum() return E_neg - E_pos ================================================ FILE: examples/pytorch/jknet/README.md ================================================ # DGL Implementation of JKNet This DGL example implements the GNN model proposed in the paper [Representation Learning on Graphs with Jumping Knowledge Networks](https://arxiv.org/abs/1806.03536). Contributor: [xnuohz](https://github.com/xnuohz) ### Requirements The codebase is implemented in Python 3.6. For version requirement of packages, see below. ``` dgl 0.6.0 scikit-learn 0.24.1 tqdm 4.56.0 torch 1.7.1 ``` ### The graph datasets used in this example ###### Node Classification The DGL's built-in Cora, Citeseer datasets. Dataset summary: | Dataset | #Nodes | #Edges | #Feats | #Classes | #Train Nodes | #Val Nodes | #Test Nodes | | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | | Cora | 2,708 | 10,556 | 1,433 | 7(single label) | 60% | 20% | 20% | | Citeseer | 3,327 | 9,228 | 3,703 | 6(single label) | 60% | 20% | 20% | ### Usage ###### Dataset options ``` --dataset str The graph dataset name. Default is 'Cora'. ``` ###### GPU options ``` --gpu int GPU index. Default is -1, using CPU. ``` ###### Model options ``` --run int Number of running times. Default is 10. --epochs int Number of training epochs. Default is 500. --lr float Adam optimizer learning rate. Default is 0.01. --lamb float L2 regularization coefficient. Default is 0.0005. --hid-dim int Hidden layer dimensionalities. Default is 32. --num-layers int Number of T. Default is 5. --mode str Type of aggregation ['cat', 'max', 'lstm']. Default is 'cat'. --dropout float Dropout applied at all layers. Default is 0.5. ``` ###### Examples The following commands learn a neural network and predict on the test set. Train a JKNet which follows the original hyperparameters on different datasets. ```bash # Cora: python main.py --gpu 0 --mode max --num-layers 6 python main.py --gpu 0 --mode cat --num-layers 6 python main.py --gpu 0 --mode lstm --num-layers 1 # Citeseer: python main.py --gpu 0 --dataset Citeseer --mode max --num-layers 1 python main.py --gpu 0 --dataset Citeseer --mode cat --num-layers 1 python main.py --gpu 0 --dataset Citeseer --mode lstm --num-layers 2 ``` ### Performance **As the author does not release the code, we don't have the access to the data splits they used.** ###### Node Classification * Cora | | JK-Maxpool | JK-Concat | JK-LSTM | | :-: | :-: | :-: | :-: | | Metrics(Table 2) | 89.6±0.5 | 89.1±1.1 | 85.8±1.0 | | Metrics(DGL) | 86.1±1.5 | 85.1±1.6 | 84.2±1.6 | * Citeseer | | JK-Maxpool | JK-Concat | JK-LSTM | | :-: | :-: | :-: | :-: | | Metrics(Table 2) | 77.7±0.5 | 78.3±0.8 | 74.7±0.9 | | Metrics(DGL) | 70.9±1.9 | 73.0±1.5 | 69.0±1.7 | ================================================ FILE: examples/pytorch/jknet/main.py ================================================ """ The main file to train a JKNet model using a full graph """ import argparse import copy import numpy as np import torch import torch.nn as nn import torch.optim as optim from dgl.data import CiteseerGraphDataset, CoraGraphDataset from model import JKNet from sklearn.model_selection import train_test_split from tqdm import trange def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # # Load from DGL dataset if args.dataset == "Cora": dataset = CoraGraphDataset() elif args.dataset == "Citeseer": dataset = CiteseerGraphDataset() else: raise ValueError("Dataset {} is invalid.".format(args.dataset)) graph = dataset[0] # check cuda device = ( f"cuda:{args.gpu}" if args.gpu >= 0 and torch.cuda.is_available() else "cpu" ) # retrieve the number of classes n_classes = dataset.num_classes # retrieve labels of ground truth labels = graph.ndata.pop("label").to(device).long() # Extract node features feats = graph.ndata.pop("feat").to(device) n_features = feats.shape[-1] # create masks for train / validation / test # train : val : test = 6 : 2 : 2 n_nodes = graph.num_nodes() idx = torch.arange(n_nodes).to(device) train_idx, test_idx = train_test_split(idx, test_size=0.2) train_idx, val_idx = train_test_split(train_idx, test_size=0.25) graph = graph.to(device) # Step 2: Create model =================================================================== # model = JKNet( in_dim=n_features, hid_dim=args.hid_dim, out_dim=n_classes, num_layers=args.num_layers, mode=args.mode, dropout=args.dropout, ).to(device) best_model = copy.deepcopy(model) # Step 3: Create training components ===================================================== # loss_fn = nn.CrossEntropyLoss() opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.lamb) # Step 4: training epochs =============================================================== # acc = 0 epochs = trange(args.epochs, desc="Accuracy & Loss") for _ in epochs: # Training using a full graph model.train() logits = model(graph, feats) # compute loss train_loss = loss_fn(logits[train_idx], labels[train_idx]) train_acc = torch.sum( logits[train_idx].argmax(dim=1) == labels[train_idx] ).item() / len(train_idx) # backward opt.zero_grad() train_loss.backward() opt.step() # Validation using a full graph model.eval() with torch.no_grad(): valid_loss = loss_fn(logits[val_idx], labels[val_idx]) valid_acc = torch.sum( logits[val_idx].argmax(dim=1) == labels[val_idx] ).item() / len(val_idx) # Print out performance epochs.set_description( "Train Acc {:.4f} | Train Loss {:.4f} | Val Acc {:.4f} | Val loss {:.4f}".format( train_acc, train_loss.item(), valid_acc, valid_loss.item() ) ) if valid_acc > acc: acc = valid_acc best_model = copy.deepcopy(model) best_model.eval() logits = best_model(graph, feats) test_acc = torch.sum( logits[test_idx].argmax(dim=1) == labels[test_idx] ).item() / len(test_idx) print("Test Acc {:.4f}".format(test_acc)) return test_acc if __name__ == "__main__": """ JKNet Hyperparameters """ parser = argparse.ArgumentParser(description="JKNet") # data source params parser.add_argument( "--dataset", type=str, default="Cora", help="Name of dataset." ) # cuda params parser.add_argument( "--gpu", type=int, default=-1, help="GPU index. Default: -1, using CPU." ) # training params parser.add_argument("--run", type=int, default=10, help="Running times.") parser.add_argument( "--epochs", type=int, default=500, help="Training epochs." ) parser.add_argument( "--lr", type=float, default=0.005, help="Learning rate." ) parser.add_argument("--lamb", type=float, default=0.0005, help="L2 reg.") # model params parser.add_argument( "--hid-dim", type=int, default=32, help="Hidden layer dimensionalities." ) parser.add_argument( "--num-layers", type=int, default=5, help="Number of GCN layers." ) parser.add_argument( "--mode", type=str, default="cat", help="Type of aggregation.", choices=["cat", "max", "lstm"], ) parser.add_argument( "--dropout", type=float, default=0.5, help="Dropout applied at all layers.", ) args = parser.parse_args() print(args) acc_lists = [] for _ in range(args.run): acc_lists.append(main(args)) mean = np.around(np.mean(acc_lists, axis=0), decimals=3) std = np.around(np.std(acc_lists, axis=0), decimals=3) print("total acc: ", acc_lists) print("mean", mean) print("std", std) ================================================ FILE: examples/pytorch/jknet/model.py ================================================ import dgl.function as fn import torch import torch.nn as nn import torch.nn.functional as F from dgl.nn import GraphConv, JumpingKnowledge class JKNet(nn.Module): def __init__( self, in_dim, hid_dim, out_dim, num_layers=1, mode="cat", dropout=0.0 ): super(JKNet, self).__init__() self.mode = mode self.dropout = nn.Dropout(dropout) self.layers = nn.ModuleList() self.layers.append(GraphConv(in_dim, hid_dim, activation=F.relu)) for _ in range(num_layers): self.layers.append(GraphConv(hid_dim, hid_dim, activation=F.relu)) if self.mode == "lstm": self.jump = JumpingKnowledge(mode, hid_dim, num_layers) else: self.jump = JumpingKnowledge(mode) if self.mode == "cat": hid_dim = hid_dim * (num_layers + 1) self.output = nn.Linear(hid_dim, out_dim) self.reset_params() def reset_params(self): self.output.reset_parameters() for layers in self.layers: layers.reset_parameters() self.jump.reset_parameters() def forward(self, g, feats): feat_lst = [] for layer in self.layers: feats = self.dropout(layer(g, feats)) feat_lst.append(feats) if self.mode == "lstm": self.jump.lstm.flatten_parameters() g.ndata["h"] = self.jump(feat_lst) g.update_all(fn.copy_u("h", "m"), fn.sum("m", "h")) return self.output(g.ndata["h"]) ================================================ FILE: examples/pytorch/jtnn/README.md ================================================ Junction Tree VAE - example for training ========================================== This is a direct modification from https://github.com/wengong-jin/icml18-jtnn Dependencies -------------- * PyTorch 0.4.1+ * RDKit=2018.09.3.0 * requests How to run ----------- To run the model, use ``` python3 vaetrain_dgl.py ``` The script will automatically download the data, which is the same as the one in the original repository. To disable CUDA, run with `NOCUDA` variable set: ``` NOCUDA=1 python3 vaetrain_dgl.py ``` To decode for new molecules, run ``` python3 vaetrain_dgl.py -T ``` Currently, decoding involves encoding a training example, sampling from the posterior distribution, and decoding a molecule from that. ================================================ FILE: examples/pytorch/jtnn/jtnn/__init__.py ================================================ from .chemutils import decode_stereo from .datautils import JTNNCollator, JTNNDataset from .jtnn_vae import DGLJTNNVAE from .mol_tree import Vocab from .mpn import DGLMPN from .nnutils import cuda ================================================ FILE: examples/pytorch/jtnn/jtnn/chemutils.py ================================================ from collections import defaultdict import rdkit.Chem as Chem from rdkit.Chem.EnumerateStereoisomers import EnumerateStereoisomers from scipy.sparse import csr_matrix from scipy.sparse.csgraph import minimum_spanning_tree MST_MAX_WEIGHT = 100 MAX_NCAND = 2000 def set_atommap(mol, num=0): for atom in mol.GetAtoms(): atom.SetAtomMapNum(num) def get_mol(smiles): mol = Chem.MolFromSmiles(smiles) if mol is None: return None Chem.Kekulize(mol) return mol def get_smiles(mol): return Chem.MolToSmiles(mol, kekuleSmiles=True) def decode_stereo(smiles2D): mol = Chem.MolFromSmiles(smiles2D) dec_isomers = list(EnumerateStereoisomers(mol)) dec_isomers = [ Chem.MolFromSmiles(Chem.MolToSmiles(mol, isomericSmiles=True)) for mol in dec_isomers ] smiles3D = [ Chem.MolToSmiles(mol, isomericSmiles=True) for mol in dec_isomers ] chiralN = [ atom.GetIdx() for atom in dec_isomers[0].GetAtoms() if int(atom.GetChiralTag()) > 0 and atom.GetSymbol() == "N" ] if len(chiralN) > 0: for mol in dec_isomers: for idx in chiralN: mol.GetAtomWithIdx(idx).SetChiralTag( Chem.rdchem.ChiralType.CHI_UNSPECIFIED ) smiles3D.append(Chem.MolToSmiles(mol, isomericSmiles=True)) return smiles3D def sanitize(mol): try: smiles = get_smiles(mol) mol = get_mol(smiles) except Exception as e: return None return mol def copy_atom(atom): new_atom = Chem.Atom(atom.GetSymbol()) new_atom.SetFormalCharge(atom.GetFormalCharge()) new_atom.SetAtomMapNum(atom.GetAtomMapNum()) return new_atom def copy_edit_mol(mol): new_mol = Chem.RWMol(Chem.MolFromSmiles("")) for atom in mol.GetAtoms(): new_atom = copy_atom(atom) new_mol.AddAtom(new_atom) for bond in mol.GetBonds(): a1 = bond.GetBeginAtom().GetIdx() a2 = bond.GetEndAtom().GetIdx() bt = bond.GetBondType() new_mol.AddBond(a1, a2, bt) return new_mol def get_clique_mol(mol, atoms): smiles = Chem.MolFragmentToSmiles(mol, atoms, kekuleSmiles=True) new_mol = Chem.MolFromSmiles(smiles, sanitize=False) new_mol = copy_edit_mol(new_mol).GetMol() new_mol = sanitize(new_mol) # We assume this is not None return new_mol def tree_decomp(mol): n_atoms = mol.GetNumAtoms() if n_atoms == 1: return [[0]], [] cliques = [] for bond in mol.GetBonds(): a1 = bond.GetBeginAtom().GetIdx() a2 = bond.GetEndAtom().GetIdx() if not bond.IsInRing(): cliques.append([a1, a2]) ssr = [list(x) for x in Chem.GetSymmSSSR(mol)] cliques.extend(ssr) nei_list = [[] for i in range(n_atoms)] for i in range(len(cliques)): for atom in cliques[i]: nei_list[atom].append(i) # Merge Rings with intersection > 2 atoms for i in range(len(cliques)): if len(cliques[i]) <= 2: continue for atom in cliques[i]: for j in nei_list[atom]: if i >= j or len(cliques[j]) <= 2: continue inter = set(cliques[i]) & set(cliques[j]) if len(inter) > 2: cliques[i].extend(cliques[j]) cliques[i] = list(set(cliques[i])) cliques[j] = [] cliques = [c for c in cliques if len(c) > 0] nei_list = [[] for i in range(n_atoms)] for i in range(len(cliques)): for atom in cliques[i]: nei_list[atom].append(i) # Build edges and add singleton cliques edges = defaultdict(int) for atom in range(n_atoms): if len(nei_list[atom]) <= 1: continue cnei = nei_list[atom] bonds = [c for c in cnei if len(cliques[c]) == 2] rings = [c for c in cnei if len(cliques[c]) > 4] # In general, if len(cnei) >= 3, a singleton should be added, but 1 bond + 2 ring is currently not dealt with. if len(bonds) > 2 or (len(bonds) == 2 and len(cnei) > 2): cliques.append([atom]) c2 = len(cliques) - 1 for c1 in cnei: edges[(c1, c2)] = 1 elif len(rings) > 2: # Multiple (n>2) complex rings cliques.append([atom]) c2 = len(cliques) - 1 for c1 in cnei: edges[(c1, c2)] = MST_MAX_WEIGHT - 1 else: for i in range(len(cnei)): for j in range(i + 1, len(cnei)): c1, c2 = cnei[i], cnei[j] inter = set(cliques[c1]) & set(cliques[c2]) if edges[(c1, c2)] < len(inter): edges[(c1, c2)] = len( inter ) # cnei[i] < cnei[j] by construction edges = [u + (MST_MAX_WEIGHT - v,) for u, v in edges.items()] if len(edges) == 0: return cliques, edges # Compute Maximum Spanning Tree row, col, data = list(zip(*edges)) n_clique = len(cliques) clique_graph = csr_matrix((data, (row, col)), shape=(n_clique, n_clique)) junc_tree = minimum_spanning_tree(clique_graph) row, col = junc_tree.nonzero() edges = [(row[i], col[i]) for i in range(len(row))] return (cliques, edges) def atom_equal(a1, a2): return ( a1.GetSymbol() == a2.GetSymbol() and a1.GetFormalCharge() == a2.GetFormalCharge() ) # Bond type not considered because all aromatic (so SINGLE matches DOUBLE) def ring_bond_equal(b1, b2, reverse=False): b1 = (b1.GetBeginAtom(), b1.GetEndAtom()) if reverse: b2 = (b2.GetEndAtom(), b2.GetBeginAtom()) else: b2 = (b2.GetBeginAtom(), b2.GetEndAtom()) return atom_equal(b1[0], b2[0]) and atom_equal(b1[1], b2[1]) def attach_mols_nx(ctr_mol, neighbors, prev_nodes, nei_amap): prev_nids = [node["nid"] for node in prev_nodes] for nei_node in prev_nodes + neighbors: nei_id, nei_mol = nei_node["nid"], nei_node["mol"] amap = nei_amap[nei_id] for atom in nei_mol.GetAtoms(): if atom.GetIdx() not in amap: new_atom = copy_atom(atom) amap[atom.GetIdx()] = ctr_mol.AddAtom(new_atom) if nei_mol.GetNumBonds() == 0: nei_atom = nei_mol.GetAtomWithIdx(0) ctr_atom = ctr_mol.GetAtomWithIdx(amap[0]) ctr_atom.SetAtomMapNum(nei_atom.GetAtomMapNum()) else: for bond in nei_mol.GetBonds(): a1 = amap[bond.GetBeginAtom().GetIdx()] a2 = amap[bond.GetEndAtom().GetIdx()] if ctr_mol.GetBondBetweenAtoms(a1, a2) is None: ctr_mol.AddBond(a1, a2, bond.GetBondType()) elif nei_id in prev_nids: # father node overrides ctr_mol.RemoveBond(a1, a2) ctr_mol.AddBond(a1, a2, bond.GetBondType()) return ctr_mol def local_attach_nx(ctr_mol, neighbors, prev_nodes, amap_list): ctr_mol = copy_edit_mol(ctr_mol) nei_amap = {nei["nid"]: {} for nei in prev_nodes + neighbors} for nei_id, ctr_atom, nei_atom in amap_list: nei_amap[nei_id][nei_atom] = ctr_atom ctr_mol = attach_mols_nx(ctr_mol, neighbors, prev_nodes, nei_amap) return ctr_mol.GetMol() # This version records idx mapping between ctr_mol and nei_mol def enum_attach_nx(ctr_mol, nei_node, amap, singletons): nei_mol, nei_idx = nei_node["mol"], nei_node["nid"] att_confs = [] black_list = [ atom_idx for nei_id, atom_idx, _ in amap if nei_id in singletons ] ctr_atoms = [ atom for atom in ctr_mol.GetAtoms() if atom.GetIdx() not in black_list ] ctr_bonds = [bond for bond in ctr_mol.GetBonds()] if nei_mol.GetNumBonds() == 0: # neighbor singleton nei_atom = nei_mol.GetAtomWithIdx(0) used_list = [atom_idx for _, atom_idx, _ in amap] for atom in ctr_atoms: if atom_equal(atom, nei_atom) and atom.GetIdx() not in used_list: new_amap = amap + [(nei_idx, atom.GetIdx(), 0)] att_confs.append(new_amap) elif nei_mol.GetNumBonds() == 1: # neighbor is a bond bond = nei_mol.GetBondWithIdx(0) bond_val = int(bond.GetBondTypeAsDouble()) b1, b2 = bond.GetBeginAtom(), bond.GetEndAtom() for atom in ctr_atoms: # Optimize if atom is carbon (other atoms may change valence) if atom.GetAtomicNum() == 6 and atom.GetTotalNumHs() < bond_val: continue if atom_equal(atom, b1): new_amap = amap + [(nei_idx, atom.GetIdx(), b1.GetIdx())] att_confs.append(new_amap) elif atom_equal(atom, b2): new_amap = amap + [(nei_idx, atom.GetIdx(), b2.GetIdx())] att_confs.append(new_amap) else: # intersection is an atom for a1 in ctr_atoms: for a2 in nei_mol.GetAtoms(): if atom_equal(a1, a2): # Optimize if atom is carbon (other atoms may change valence) if ( a1.GetAtomicNum() == 6 and a1.GetTotalNumHs() + a2.GetTotalNumHs() < 4 ): continue new_amap = amap + [(nei_idx, a1.GetIdx(), a2.GetIdx())] att_confs.append(new_amap) # intersection is an bond if ctr_mol.GetNumBonds() > 1: for b1 in ctr_bonds: for b2 in nei_mol.GetBonds(): if ring_bond_equal(b1, b2): new_amap = amap + [ ( nei_idx, b1.GetBeginAtom().GetIdx(), b2.GetBeginAtom().GetIdx(), ), ( nei_idx, b1.GetEndAtom().GetIdx(), b2.GetEndAtom().GetIdx(), ), ] att_confs.append(new_amap) if ring_bond_equal(b1, b2, reverse=True): new_amap = amap + [ ( nei_idx, b1.GetBeginAtom().GetIdx(), b2.GetEndAtom().GetIdx(), ), ( nei_idx, b1.GetEndAtom().GetIdx(), b2.GetBeginAtom().GetIdx(), ), ] att_confs.append(new_amap) return att_confs # Try rings first: Speed-Up def enum_assemble_nx(node, neighbors, prev_nodes=[], prev_amap=[]): all_attach_confs = [] singletons = [ nei_node["nid"] for nei_node in neighbors + prev_nodes if nei_node["mol"].GetNumAtoms() == 1 ] def search(cur_amap, depth): if len(all_attach_confs) > MAX_NCAND: return if depth == len(neighbors): all_attach_confs.append(cur_amap) return nei_node = neighbors[depth] cand_amap = enum_attach_nx(node["mol"], nei_node, cur_amap, singletons) cand_smiles = set() candidates = [] for amap in cand_amap: cand_mol = local_attach_nx( node["mol"], neighbors[: depth + 1], prev_nodes, amap ) cand_mol = sanitize(cand_mol) if cand_mol is None: continue smiles = get_smiles(cand_mol) if smiles in cand_smiles: continue cand_smiles.add(smiles) candidates.append(amap) if len(candidates) == 0: return [] for new_amap in candidates: search(new_amap, depth + 1) search(prev_amap, 0) cand_smiles = set() candidates = [] for amap in all_attach_confs: cand_mol = local_attach_nx(node["mol"], neighbors, prev_nodes, amap) cand_mol = Chem.MolFromSmiles(Chem.MolToSmiles(cand_mol)) smiles = Chem.MolToSmiles(cand_mol) if smiles in cand_smiles: continue cand_smiles.add(smiles) Chem.Kekulize(cand_mol) candidates.append((smiles, cand_mol, amap)) return candidates # Only used for debugging purpose def dfs_assemble_nx( graph, cur_mol, global_amap, fa_amap, cur_node_id, fa_node_id ): cur_node = graph.nodes_dict[cur_node_id] fa_node = graph.nodes_dict[fa_node_id] if fa_node_id is not None else None fa_nid = fa_node["nid"] if fa_node is not None else -1 prev_nodes = [fa_node] if fa_node is not None else [] children_id = [ nei for nei in graph[cur_node_id] if graph.nodes_dict[nei]["nid"] != fa_nid ] children = [graph.nodes_dict[nei] for nei in children_id] neighbors = [nei for nei in children if nei["mol"].GetNumAtoms() > 1] neighbors = sorted( neighbors, key=lambda x: x["mol"].GetNumAtoms(), reverse=True ) singletons = [nei for nei in children if nei["mol"].GetNumAtoms() == 1] neighbors = singletons + neighbors cur_amap = [ (fa_nid, a2, a1) for nid, a1, a2 in fa_amap if nid == cur_node["nid"] ] cands = enum_assemble_nx( graph.nodes_dict[cur_node_id], neighbors, prev_nodes, cur_amap ) if len(cands) == 0: return cand_smiles, _, cand_amap = zip(*cands) label_idx = cand_smiles.index(cur_node["label"]) label_amap = cand_amap[label_idx] for nei_id, ctr_atom, nei_atom in label_amap: if nei_id == fa_nid: continue global_amap[nei_id][nei_atom] = global_amap[cur_node["nid"]][ctr_atom] cur_mol = attach_mols_nx( cur_mol, children, [], global_amap ) # father is already attached for nei_node_id, nei_node in zip(children_id, children): if not nei_node["is_leaf"]: dfs_assemble_nx( graph, cur_mol, global_amap, label_amap, nei_node_id, cur_node_id, ) ================================================ FILE: examples/pytorch/jtnn/jtnn/datautils.py ================================================ import dgl import torch from dgl.data.utils import ( _get_dgl_url, download, extract_archive, get_download_dir, ) from torch.utils.data import Dataset from .jtmpn import ( ATOM_FDIM as ATOM_FDIM_DEC, BOND_FDIM as BOND_FDIM_DEC, mol2dgl_single as mol2dgl_dec, ) from .mol_tree import Vocab from .mol_tree_nx import DGLMolTree from .mpn import mol2dgl_single as mol2dgl_enc def _unpack_field(examples, field): return [e[field] for e in examples] def _set_node_id(mol_tree, vocab): wid = [] for i, node in enumerate(mol_tree.nodes_dict): mol_tree.nodes_dict[node]["idx"] = i wid.append(vocab.get_index(mol_tree.nodes_dict[node]["smiles"])) return wid class JTNNDataset(Dataset): def __init__(self, data, vocab, training=True): self.dir = get_download_dir() self.zip_file_path = "{}/jtnn.zip".format(self.dir) download(_get_dgl_url("dgllife/jtnn.zip"), path=self.zip_file_path) extract_archive(self.zip_file_path, "{}/jtnn".format(self.dir)) print("Loading data...") data_file = "{}/jtnn/{}.txt".format(self.dir, data) with open(data_file) as f: self.data = [line.strip("\r\n ").split()[0] for line in f] self.vocab_file = "{}/jtnn/{}.txt".format(self.dir, vocab) print("Loading finished.") print("\tNum samples:", len(self.data)) print("\tVocab file:", self.vocab_file) self.training = training self.vocab = Vocab([x.strip("\r\n ") for x in open(self.vocab_file)]) def __len__(self): return len(self.data) def __getitem__(self, idx): smiles = self.data[idx] mol_tree = DGLMolTree(smiles) mol_tree.recover() mol_tree.assemble() wid = _set_node_id(mol_tree, self.vocab) # prebuild the molecule graph mol_graph, atom_x_enc, bond_x_enc = mol2dgl_enc(mol_tree.smiles) result = { "mol_tree": mol_tree, "mol_graph": mol_graph, "atom_x_enc": atom_x_enc, "bond_x_enc": bond_x_enc, "wid": wid, } if not self.training: return result # prebuild the candidate graph list cands = [] for node_id, node in mol_tree.nodes_dict.items(): # fill in ground truth if node["label"] not in node["cands"]: node["cands"].append(node["label"]) node["cand_mols"].append(node["label_mol"]) if node["is_leaf"] or len(node["cands"]) == 1: continue cands.extend( [(cand, mol_tree, node_id) for cand in node["cand_mols"]] ) if len(cands) > 0: ( cand_graphs, atom_x_dec, bond_x_dec, tree_mess_src_e, tree_mess_tgt_e, tree_mess_tgt_n, ) = mol2dgl_dec(cands) else: cand_graphs = [] atom_x_dec = torch.zeros(0, ATOM_FDIM_DEC) bond_x_dec = torch.zeros(0, BOND_FDIM_DEC) tree_mess_src_e = torch.zeros(0, 2).long() tree_mess_tgt_e = torch.zeros(0, 2).long() tree_mess_tgt_n = torch.zeros(0).long() # prebuild the stereoisomers cands = mol_tree.stereo_cands if len(cands) > 1: if mol_tree.smiles3D not in cands: cands.append(mol_tree.smiles3D) stereo_graphs = [mol2dgl_enc(c) for c in cands] stereo_cand_graphs, stereo_atom_x_enc, stereo_bond_x_enc = zip( *stereo_graphs ) stereo_atom_x_enc = torch.cat(stereo_atom_x_enc) stereo_bond_x_enc = torch.cat(stereo_bond_x_enc) stereo_cand_label = [(cands.index(mol_tree.smiles3D), len(cands))] else: stereo_cand_graphs = [] stereo_atom_x_enc = torch.zeros(0, atom_x_enc.shape[1]) stereo_bond_x_enc = torch.zeros(0, bond_x_enc.shape[1]) stereo_cand_label = [] result.update( { "cand_graphs": cand_graphs, "atom_x_dec": atom_x_dec, "bond_x_dec": bond_x_dec, "tree_mess_src_e": tree_mess_src_e, "tree_mess_tgt_e": tree_mess_tgt_e, "tree_mess_tgt_n": tree_mess_tgt_n, "stereo_cand_graphs": stereo_cand_graphs, "stereo_atom_x_enc": stereo_atom_x_enc, "stereo_bond_x_enc": stereo_bond_x_enc, "stereo_cand_label": stereo_cand_label, } ) return result class JTNNCollator(object): def __init__(self, vocab, training): self.vocab = vocab self.training = training @staticmethod def _batch_and_set(graphs, atom_x, bond_x, flatten): if flatten: graphs = [g for f in graphs for g in f] graph_batch = dgl.batch(graphs) graph_batch.ndata["x"] = atom_x graph_batch.edata.update( { "x": bond_x, "src_x": atom_x.new(bond_x.shape[0], atom_x.shape[1]).zero_(), } ) return graph_batch def __call__(self, examples): # get list of trees mol_trees = _unpack_field(examples, "mol_tree") wid = _unpack_field(examples, "wid") for _wid, mol_tree in zip(wid, mol_trees): mol_tree.graph.ndata["wid"] = torch.LongTensor(_wid) # TODO: either support pickling or get around ctypes pointers using scipy # batch molecule graphs mol_graphs = _unpack_field(examples, "mol_graph") atom_x = torch.cat(_unpack_field(examples, "atom_x_enc")) bond_x = torch.cat(_unpack_field(examples, "bond_x_enc")) mol_graph_batch = self._batch_and_set(mol_graphs, atom_x, bond_x, False) result = { "mol_trees": mol_trees, "mol_graph_batch": mol_graph_batch, } if not self.training: return result # batch candidate graphs cand_graphs = _unpack_field(examples, "cand_graphs") cand_batch_idx = [] atom_x = torch.cat(_unpack_field(examples, "atom_x_dec")) bond_x = torch.cat(_unpack_field(examples, "bond_x_dec")) tree_mess_src_e = _unpack_field(examples, "tree_mess_src_e") tree_mess_tgt_e = _unpack_field(examples, "tree_mess_tgt_e") tree_mess_tgt_n = _unpack_field(examples, "tree_mess_tgt_n") n_graph_nodes = 0 n_tree_nodes = 0 for i in range(len(cand_graphs)): tree_mess_tgt_e[i] += n_graph_nodes tree_mess_src_e[i] += n_tree_nodes tree_mess_tgt_n[i] += n_graph_nodes n_graph_nodes += sum(g.num_nodes() for g in cand_graphs[i]) n_tree_nodes += mol_trees[i].graph.num_nodes() cand_batch_idx.extend([i] * len(cand_graphs[i])) tree_mess_tgt_e = torch.cat(tree_mess_tgt_e) tree_mess_src_e = torch.cat(tree_mess_src_e) tree_mess_tgt_n = torch.cat(tree_mess_tgt_n) cand_graph_batch = self._batch_and_set( cand_graphs, atom_x, bond_x, True ) # batch stereoisomers stereo_cand_graphs = _unpack_field(examples, "stereo_cand_graphs") atom_x = torch.cat(_unpack_field(examples, "stereo_atom_x_enc")) bond_x = torch.cat(_unpack_field(examples, "stereo_bond_x_enc")) stereo_cand_batch_idx = [] for i in range(len(stereo_cand_graphs)): stereo_cand_batch_idx.extend([i] * len(stereo_cand_graphs[i])) if len(stereo_cand_batch_idx) > 0: stereo_cand_labels = [ (label, length) for ex in _unpack_field(examples, "stereo_cand_label") for label, length in ex ] stereo_cand_labels, stereo_cand_lengths = zip(*stereo_cand_labels) stereo_cand_graph_batch = self._batch_and_set( stereo_cand_graphs, atom_x, bond_x, True ) else: stereo_cand_labels = [] stereo_cand_lengths = [] stereo_cand_graph_batch = None stereo_cand_batch_idx = [] result.update( { "cand_graph_batch": cand_graph_batch, "cand_batch_idx": cand_batch_idx, "tree_mess_tgt_e": tree_mess_tgt_e, "tree_mess_src_e": tree_mess_src_e, "tree_mess_tgt_n": tree_mess_tgt_n, "stereo_cand_graph_batch": stereo_cand_graph_batch, "stereo_cand_batch_idx": stereo_cand_batch_idx, "stereo_cand_labels": stereo_cand_labels, "stereo_cand_lengths": stereo_cand_lengths, } ) return result ================================================ FILE: examples/pytorch/jtnn/jtnn/jtmpn.py ================================================ import os import dgl import dgl.function as DGLF import rdkit.Chem as Chem import torch import torch.nn as nn from dgl import line_graph, mean_nodes from .nnutils import cuda ELEM_LIST = [ "C", "N", "O", "S", "F", "Si", "P", "Cl", "Br", "Mg", "Na", "Ca", "Fe", "Al", "I", "B", "K", "Se", "Zn", "H", "Cu", "Mn", "unknown", ] ATOM_FDIM = len(ELEM_LIST) + 6 + 5 + 1 BOND_FDIM = 5 MAX_NB = 10 PAPER = os.getenv("PAPER", False) def onek_encoding_unk(x, allowable_set): if x not in allowable_set: x = allowable_set[-1] return [x == s for s in allowable_set] # Note that during graph decoding they don't predict stereochemistry-related # characteristics (i.e. Chiral Atoms, E-Z, Cis-Trans). Instead, they decode # the 2-D graph first, then enumerate all possible 3-D forms and find the # one with highest score. def atom_features(atom): return torch.Tensor( onek_encoding_unk(atom.GetSymbol(), ELEM_LIST) + onek_encoding_unk(atom.GetDegree(), [0, 1, 2, 3, 4, 5]) + onek_encoding_unk(atom.GetFormalCharge(), [-1, -2, 1, 2, 0]) + [atom.GetIsAromatic()] ) def bond_features(bond): bt = bond.GetBondType() return torch.Tensor( [ bt == Chem.rdchem.BondType.SINGLE, bt == Chem.rdchem.BondType.DOUBLE, bt == Chem.rdchem.BondType.TRIPLE, bt == Chem.rdchem.BondType.AROMATIC, bond.IsInRing(), ] ) def mol2dgl_single(cand_batch): cand_graphs = [] tree_mess_source_edges = [] # map these edges from trees to... tree_mess_target_edges = [] # these edges on candidate graphs tree_mess_target_nodes = [] n_nodes = 0 n_edges = 0 atom_x = [] bond_x = [] for mol, mol_tree, ctr_node_id in cand_batch: n_atoms = mol.GetNumAtoms() n_bonds = mol.GetNumBonds() ctr_node = mol_tree.nodes_dict[ctr_node_id] ctr_bid = ctr_node["idx"] mol_tree_graph = getattr(mol_tree, "graph", mol_tree) for i, atom in enumerate(mol.GetAtoms()): assert i == atom.GetIdx() atom_x.append(atom_features(atom)) bond_src = [] bond_dst = [] for i, bond in enumerate(mol.GetBonds()): a1 = bond.GetBeginAtom() a2 = bond.GetEndAtom() begin_idx = a1.GetIdx() end_idx = a2.GetIdx() features = bond_features(bond) bond_src.append(begin_idx) bond_dst.append(end_idx) bond_x.append(features) bond_src.append(end_idx) bond_dst.append(begin_idx) bond_x.append(features) x_nid, y_nid = a1.GetAtomMapNum(), a2.GetAtomMapNum() # Tree node ID in the batch x_bid = mol_tree.nodes_dict[x_nid - 1]["idx"] if x_nid > 0 else -1 y_bid = mol_tree.nodes_dict[y_nid - 1]["idx"] if y_nid > 0 else -1 if x_bid >= 0 and y_bid >= 0 and x_bid != y_bid: if mol_tree_graph.has_edges_between(x_bid, y_bid): tree_mess_target_edges.append( (begin_idx + n_nodes, end_idx + n_nodes) ) tree_mess_source_edges.append((x_bid, y_bid)) tree_mess_target_nodes.append(end_idx + n_nodes) if mol_tree_graph.has_edges_between(y_bid, x_bid): tree_mess_target_edges.append( (end_idx + n_nodes, begin_idx + n_nodes) ) tree_mess_source_edges.append((y_bid, x_bid)) tree_mess_target_nodes.append(begin_idx + n_nodes) n_nodes += n_atoms g = dgl.graph((bond_src, bond_dst), num_nodes=n_atoms) cand_graphs.append(g) return ( cand_graphs, torch.stack(atom_x), torch.stack(bond_x) if len(bond_x) > 0 else torch.zeros(0), torch.LongTensor(tree_mess_source_edges), torch.LongTensor(tree_mess_target_edges), torch.LongTensor(tree_mess_target_nodes), ) class LoopyBPUpdate(nn.Module): def __init__(self, hidden_size): super(LoopyBPUpdate, self).__init__() self.hidden_size = hidden_size self.W_h = nn.Linear(hidden_size, hidden_size, bias=False) def forward(self, node): msg_input = node.data["msg_input"] msg_delta = self.W_h(node.data["accum_msg"] + node.data["alpha"]) msg = torch.relu(msg_input + msg_delta) return {"msg": msg} if PAPER: mpn_gather_msg = [ DGLF.copy_e(edge="msg", out="msg"), DGLF.copy_e(edge="alpha", out="alpha"), ] else: mpn_gather_msg = DGLF.copy_e(edge="msg", out="msg") if PAPER: mpn_gather_reduce = [ DGLF.sum(msg="msg", out="m"), DGLF.sum(msg="alpha", out="accum_alpha"), ] else: mpn_gather_reduce = DGLF.sum(msg="msg", out="m") class GatherUpdate(nn.Module): def __init__(self, hidden_size): super(GatherUpdate, self).__init__() self.hidden_size = hidden_size self.W_o = nn.Linear(ATOM_FDIM + hidden_size, hidden_size) def forward(self, node): if PAPER: # m = node['m'] m = node.data["m"] + node.data["accum_alpha"] else: m = node.data["m"] + node.data["alpha"] return { "h": torch.relu(self.W_o(torch.cat([node.data["x"], m], 1))), } class DGLJTMPN(nn.Module): def __init__(self, hidden_size, depth): nn.Module.__init__(self) self.depth = depth self.W_i = nn.Linear(ATOM_FDIM + BOND_FDIM, hidden_size, bias=False) self.loopy_bp_updater = LoopyBPUpdate(hidden_size) self.gather_updater = GatherUpdate(hidden_size) self.hidden_size = hidden_size self.n_samples_total = 0 self.n_nodes_total = 0 self.n_edges_total = 0 self.n_passes = 0 def forward(self, cand_batch, mol_tree_batch): ( cand_graphs, tree_mess_src_edges, tree_mess_tgt_edges, tree_mess_tgt_nodes, ) = cand_batch n_samples = len(cand_graphs) cand_line_graph = line_graph( cand_graphs, backtracking=False, shared=True ) n_nodes = cand_graphs.num_nodes() n_edges = cand_graphs.num_edges() cand_graphs = self.run( cand_graphs, cand_line_graph, tree_mess_src_edges, tree_mess_tgt_edges, tree_mess_tgt_nodes, mol_tree_batch, ) g_repr = mean_nodes(cand_graphs, "h") self.n_samples_total += n_samples self.n_nodes_total += n_nodes self.n_edges_total += n_edges self.n_passes += 1 return g_repr def run( self, cand_graphs, cand_line_graph, tree_mess_src_edges, tree_mess_tgt_edges, tree_mess_tgt_nodes, mol_tree_batch, ): n_nodes = cand_graphs.num_nodes() cand_graphs.apply_edges( func=lambda edges: {"src_x": edges.src["x"]}, ) cand_line_graph.ndata.update(cand_graphs.edata) bond_features = cand_line_graph.ndata["x"] source_features = cand_line_graph.ndata["src_x"] features = torch.cat([source_features, bond_features], 1) msg_input = self.W_i(features) cand_line_graph.ndata.update( { "msg_input": msg_input, "msg": torch.relu(msg_input), "accum_msg": torch.zeros_like(msg_input), } ) zero_node_state = bond_features.new(n_nodes, self.hidden_size).zero_() cand_graphs.ndata.update( { "m": zero_node_state.clone(), "h": zero_node_state.clone(), } ) cand_graphs.edata["alpha"] = cuda( torch.zeros(cand_graphs.num_edges(), self.hidden_size) ) cand_graphs.ndata["alpha"] = zero_node_state if tree_mess_src_edges.shape[0] > 0: if PAPER: src_u, src_v = tree_mess_src_edges.unbind(1) tgt_u, tgt_v = tree_mess_tgt_edges.unbind(1) src_u = src_u.to(mol_tree_batch.device) src_v = src_v.to(mol_tree_batch.device) eid = mol_tree_batch.edge_ids(src_u, src_v) alpha = mol_tree_batch.edata["m"][eid] cand_graphs.edges[tgt_u, tgt_v].data["alpha"] = alpha else: src_u, src_v = tree_mess_src_edges.unbind(1) src_u = src_u.to(mol_tree_batch.device) src_v = src_v.to(mol_tree_batch.device) eid = mol_tree_batch.edge_ids(src_u, src_v) alpha = mol_tree_batch.edata["m"][eid] node_idx = tree_mess_tgt_nodes.to( device=zero_node_state.device )[:, None].expand_as(alpha) node_alpha = zero_node_state.clone().scatter_add( 0, node_idx, alpha ) cand_graphs.ndata["alpha"] = node_alpha cand_graphs.apply_edges( func=lambda edges: {"alpha": edges.src["alpha"]}, ) cand_line_graph.ndata.update(cand_graphs.edata) for i in range(self.depth - 1): cand_line_graph.update_all( DGLF.copy_u("msg", "msg"), DGLF.sum("msg", "accum_msg") ) cand_line_graph.apply_nodes(self.loopy_bp_updater) cand_graphs.edata.update(cand_line_graph.ndata) cand_graphs.update_all(DGLF.copy_e("msg", "msg"), DGLF.sum("msg", "m")) if PAPER: cand_graphs.update_all( DGLF.copy_e("alpha", "alpha"), DGLF.sum("alpha", "accum_alpha") ) cand_graphs.apply_nodes(self.gather_updater) return cand_graphs ================================================ FILE: examples/pytorch/jtnn/jtnn/jtnn_dec.py ================================================ import dgl.function as DGLF import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from dgl import batch, dfs_labeled_edges_generator, line_graph from .chemutils import enum_assemble_nx, get_mol from .mol_tree_nx import DGLMolTree from .nnutils import cuda, GRUUpdate, tocpu MAX_NB = 8 MAX_DECODE_LEN = 100 def dfs_order(forest, roots): forest = tocpu(forest) edges = dfs_labeled_edges_generator(forest, roots, has_reverse_edge=True) for e, l in zip(*edges): # I exploited the fact that the reverse edge ID equal to 1 xor forward # edge ID for molecule trees. Normally, I should locate reverse edges # using find_edges(). yield e ^ l, l dec_tree_node_msg = DGLF.copy_e(edge="m", out="m") dec_tree_node_reduce = DGLF.sum(msg="m", out="h") def dec_tree_node_update(nodes): return {"new": nodes.data["new"].clone().zero_()} def have_slots(fa_slots, ch_slots): if len(fa_slots) > 2 and len(ch_slots) > 2: return True matches = [] for i, s1 in enumerate(fa_slots): a1, c1, h1 = s1 for j, s2 in enumerate(ch_slots): a2, c2, h2 = s2 if a1 == a2 and c1 == c2 and (a1 != "C" or h1 + h2 >= 4): matches.append((i, j)) if len(matches) == 0: return False fa_match, ch_match = list(zip(*matches)) if ( len(set(fa_match)) == 1 and 1 < len(fa_slots) <= 2 ): # never remove atom from ring fa_slots.pop(fa_match[0]) if ( len(set(ch_match)) == 1 and 1 < len(ch_slots) <= 2 ): # never remove atom from ring ch_slots.pop(ch_match[0]) return True def can_assemble(mol_tree, u, v_node_dict): u_node_dict = mol_tree.nodes_dict[u] u_neighbors = mol_tree.graph.successors(u) u_neighbors_node_dict = [ mol_tree.nodes_dict[_u] for _u in u_neighbors if _u in mol_tree.nodes_dict ] neis = u_neighbors_node_dict + [v_node_dict] for i, nei in enumerate(neis): nei["nid"] = i neighbors = [nei for nei in neis if nei["mol"].GetNumAtoms() > 1] neighbors = sorted( neighbors, key=lambda x: x["mol"].GetNumAtoms(), reverse=True ) singletons = [nei for nei in neis if nei["mol"].GetNumAtoms() == 1] neighbors = singletons + neighbors cands = enum_assemble_nx(u_node_dict, neighbors) return len(cands) > 0 def create_node_dict(smiles, clique=[]): return dict( smiles=smiles, mol=get_mol(smiles), clique=clique, ) class DGLJTNNDecoder(nn.Module): def __init__(self, vocab, hidden_size, latent_size, embedding=None): nn.Module.__init__(self) self.hidden_size = hidden_size self.vocab_size = vocab.size() self.vocab = vocab if embedding is None: self.embedding = nn.Embedding(self.vocab_size, hidden_size) else: self.embedding = embedding self.dec_tree_edge_update = GRUUpdate(hidden_size) self.W = nn.Linear(latent_size + hidden_size, hidden_size) self.U = nn.Linear(latent_size + 2 * hidden_size, hidden_size) self.W_o = nn.Linear(hidden_size, self.vocab_size) self.U_s = nn.Linear(hidden_size, 1) def forward(self, mol_trees, tree_vec): """ The training procedure which computes the prediction loss given the ground truth tree """ mol_tree_batch = batch(mol_trees) mol_tree_batch_lg = line_graph( mol_tree_batch, backtracking=False, shared=True ) n_trees = len(mol_trees) return self.run(mol_tree_batch, mol_tree_batch_lg, n_trees, tree_vec) def run(self, mol_tree_batch, mol_tree_batch_lg, n_trees, tree_vec): node_offset = np.cumsum( np.insert(mol_tree_batch.batch_num_nodes().cpu().numpy(), 0, 0) ) root_ids = node_offset[:-1] n_nodes = mol_tree_batch.num_nodes() n_edges = mol_tree_batch.num_edges() mol_tree_batch.ndata.update( { "x": self.embedding(mol_tree_batch.ndata["wid"]), "h": cuda(torch.zeros(n_nodes, self.hidden_size)), "new": cuda( torch.ones(n_nodes).bool() ), # whether it's newly generated node } ) mol_tree_batch.edata.update( { "s": cuda(torch.zeros(n_edges, self.hidden_size)), "m": cuda(torch.zeros(n_edges, self.hidden_size)), "r": cuda(torch.zeros(n_edges, self.hidden_size)), "z": cuda(torch.zeros(n_edges, self.hidden_size)), "src_x": cuda(torch.zeros(n_edges, self.hidden_size)), "dst_x": cuda(torch.zeros(n_edges, self.hidden_size)), "rm": cuda(torch.zeros(n_edges, self.hidden_size)), "accum_rm": cuda(torch.zeros(n_edges, self.hidden_size)), } ) mol_tree_batch.apply_edges( func=lambda edges: { "src_x": edges.src["x"], "dst_x": edges.dst["x"], }, ) # input tensors for stop prediction (p) and label prediction (q) p_inputs = [] p_targets = [] q_inputs = [] q_targets = [] # Predict root mol_tree_batch.pull(root_ids, DGLF.copy_e("m", "m"), DGLF.sum("m", "h")) mol_tree_batch.apply_nodes(dec_tree_node_update, v=root_ids) # Extract hidden states and store them for stop/label prediction h = mol_tree_batch.nodes[root_ids].data["h"] x = mol_tree_batch.nodes[root_ids].data["x"] p_inputs.append(torch.cat([x, h, tree_vec], 1)) # If the out degree is 0 we don't generate any edges at all root_out_degrees = mol_tree_batch.out_degrees(root_ids) q_inputs.append(torch.cat([h, tree_vec], 1)) q_targets.append(mol_tree_batch.nodes[root_ids].data["wid"]) # Traverse the tree and predict on children for eid, p in dfs_order(mol_tree_batch, root_ids): eid = eid.to(mol_tree_batch.device) p = p.to(mol_tree_batch.device) u, v = mol_tree_batch.find_edges(eid) p_target_list = torch.zeros_like(root_out_degrees) p_target_list[root_out_degrees > 0] = 1 - p p_target_list = p_target_list[root_out_degrees >= 0] p_targets.append(torch.tensor(p_target_list)) root_out_degrees -= (root_out_degrees == 0).long() root_out_degrees -= torch.tensor( np.isin(root_ids, v.cpu().numpy()) ).to(root_out_degrees) mol_tree_batch_lg.ndata.update(mol_tree_batch.edata) mol_tree_batch_lg.pull( eid, DGLF.copy_u("m", "m"), DGLF.sum("m", "s") ) mol_tree_batch_lg.pull( eid, DGLF.copy_u("rm", "rm"), DGLF.sum("rm", "accum_rm") ) mol_tree_batch_lg.apply_nodes(self.dec_tree_edge_update, v=eid) mol_tree_batch.edata.update(mol_tree_batch_lg.ndata) is_new = mol_tree_batch.nodes[v].data["new"] mol_tree_batch.pull(v, DGLF.copy_e("m", "m"), DGLF.sum("m", "h")) mol_tree_batch.apply_nodes(dec_tree_node_update, v=v) # Extract n_repr = mol_tree_batch.nodes[v].data h = n_repr["h"] x = n_repr["x"] tree_vec_set = tree_vec[root_out_degrees >= 0] wid = n_repr["wid"] p_inputs.append(torch.cat([x, h, tree_vec_set], 1)) # Only newly generated nodes are needed for label prediction # NOTE: The following works since the uncomputed messages are zeros. q_input = torch.cat([h, tree_vec_set], 1)[is_new] q_target = wid[is_new] if q_input.shape[0] > 0: q_inputs.append(q_input) q_targets.append(q_target) p_targets.append( torch.zeros( (root_out_degrees == 0).sum(), device=root_out_degrees.device, dtype=torch.int64, ) ) # Batch compute the stop/label prediction losses p_inputs = torch.cat(p_inputs, 0) p_targets = cuda(torch.cat(p_targets, 0)) q_inputs = torch.cat(q_inputs, 0) q_targets = torch.cat(q_targets, 0) q = self.W_o(torch.relu(self.W(q_inputs))) p = self.U_s(torch.relu(self.U(p_inputs)))[:, 0] p_loss = ( F.binary_cross_entropy_with_logits( p, p_targets.float(), size_average=False ) / n_trees ) q_loss = F.cross_entropy(q, q_targets, size_average=False) / n_trees p_acc = ((p > 0).long() == p_targets).sum().float() / p_targets.shape[0] q_acc = (q.max(1)[1] == q_targets).float().sum() / q_targets.shape[0] self.q_inputs = q_inputs self.q_targets = q_targets self.q = q self.p_inputs = p_inputs self.p_targets = p_targets self.p = p return q_loss, p_loss, q_acc, p_acc def decode(self, mol_vec): assert mol_vec.shape[0] == 1 mol_tree = DGLMolTree(None) mol_tree.graph = mol_tree.graph.to(mol_vec.device) mol_tree_graph = mol_tree.graph init_hidden = cuda(torch.zeros(1, self.hidden_size)) root_hidden = torch.cat([init_hidden, mol_vec], 1) root_hidden = F.relu(self.W(root_hidden)) root_score = self.W_o(root_hidden) _, root_wid = torch.max(root_score, 1) root_wid = root_wid.view(1) mol_tree_graph.add_nodes(1) # root mol_tree_graph.ndata["wid"] = root_wid mol_tree_graph.ndata["x"] = self.embedding(root_wid) mol_tree_graph.ndata["h"] = init_hidden mol_tree_graph.ndata["fail"] = cuda(torch.tensor([0])) mol_tree.nodes_dict[0] = root_node_dict = create_node_dict( self.vocab.get_smiles(root_wid) ) stack, trace = [], [] stack.append((0, self.vocab.get_slots(root_wid))) all_nodes = {0: root_node_dict} h = {} first = True new_node_id = 0 new_edge_id = 0 for step in range(MAX_DECODE_LEN): u, u_slots = stack[-1] x = mol_tree_graph.ndata["x"][u : u + 1] h = mol_tree_graph.ndata["h"][u : u + 1] # Predict stop p_input = torch.cat([x, h, mol_vec], 1) p_score = torch.sigmoid(self.U_s(torch.relu(self.U(p_input)))) backtrack = p_score.item() < 0.5 if not backtrack: # Predict next clique. Note that the prediction may fail due # to lack of assemblable components mol_tree_graph.add_nodes(1) new_node_id += 1 v = new_node_id mol_tree_graph.add_edges(u, v) uv = new_edge_id new_edge_id += 1 if first: mol_tree_graph.edata.update( { "s": cuda(torch.zeros(1, self.hidden_size)), "m": cuda(torch.zeros(1, self.hidden_size)), "r": cuda(torch.zeros(1, self.hidden_size)), "z": cuda(torch.zeros(1, self.hidden_size)), "src_x": cuda(torch.zeros(1, self.hidden_size)), "dst_x": cuda(torch.zeros(1, self.hidden_size)), "rm": cuda(torch.zeros(1, self.hidden_size)), "accum_rm": cuda(torch.zeros(1, self.hidden_size)), } ) first = False mol_tree_graph.edata["src_x"][uv] = mol_tree_graph.ndata["x"][u] # keeping dst_x 0 is fine as h on new edge doesn't depend on that. # DGL doesn't dynamically maintain a line graph. mol_tree_graph_lg = line_graph( mol_tree_graph, backtracking=False, shared=True ) mol_tree_graph_lg.pull( uv, DGLF.copy_u("m", "m"), DGLF.sum("m", "s") ) mol_tree_graph_lg.pull( uv, DGLF.copy_u("rm", "rm"), DGLF.sum("rm", "accum_rm") ) mol_tree_graph_lg.apply_nodes( self.dec_tree_edge_update.update_zm, v=uv ) mol_tree_graph.edata.update(mol_tree_graph_lg.ndata) mol_tree_graph.pull( v, DGLF.copy_e("m", "m"), DGLF.sum("m", "h") ) h_v = mol_tree_graph.ndata["h"][v : v + 1] q_input = torch.cat([h_v, mol_vec], 1) q_score = torch.softmax( self.W_o(torch.relu(self.W(q_input))), -1 ) _, sort_wid = torch.sort(q_score, 1, descending=True) sort_wid = sort_wid.squeeze() next_wid = None for wid in sort_wid.tolist()[:5]: slots = self.vocab.get_slots(wid) cand_node_dict = create_node_dict( self.vocab.get_smiles(wid) ) if have_slots(u_slots, slots) and can_assemble( mol_tree, u, cand_node_dict ): next_wid = wid next_slots = slots next_node_dict = cand_node_dict break if next_wid is None: # Failed adding an actual children; v is a spurious node # and we mark it. mol_tree_graph.ndata["fail"][v] = cuda(torch.tensor([1])) backtrack = True else: next_wid = cuda(torch.tensor([next_wid])) mol_tree_graph.ndata["wid"][v] = next_wid mol_tree_graph.ndata["x"][v] = self.embedding(next_wid) mol_tree.nodes_dict[v] = next_node_dict all_nodes[v] = next_node_dict stack.append((v, next_slots)) mol_tree_graph.add_edges(v, u) vu = new_edge_id new_edge_id += 1 mol_tree_graph.edata["dst_x"][uv] = mol_tree_graph.ndata[ "x" ][v] mol_tree_graph.edata["src_x"][vu] = mol_tree_graph.ndata[ "x" ][v] mol_tree_graph.edata["dst_x"][vu] = mol_tree_graph.ndata[ "x" ][u] # DGL doesn't dynamically maintain a line graph. mol_tree_graph_lg = line_graph( mol_tree_graph, backtracking=False, shared=True ) mol_tree_graph_lg.apply_nodes( self.dec_tree_edge_update.update_r, uv ) mol_tree_graph.edata.update(mol_tree_graph_lg.ndata) if backtrack: if len(stack) == 1: break # At root, terminate pu, _ = stack[-2] u_pu = mol_tree_graph.edge_ids(u, pu) mol_tree_graph_lg.pull( u_pu, DGLF.copy_u("m", "m"), DGLF.sum("m", "s") ) mol_tree_graph_lg.pull( u_pu, DGLF.copy_u("rm", "rm"), DGLF.sum("rm", "accum_rm") ) mol_tree_graph_lg.apply_nodes(self.dec_tree_edge_update, v=u_pu) mol_tree_graph.edata.update(mol_tree_graph_lg.ndata) mol_tree_graph.pull( pu, DGLF.copy_e("m", "m"), DGLF.sum("m", "h") ) stack.pop() effective_nodes = mol_tree_graph.filter_nodes( lambda nodes: nodes.data["fail"] != 1 ) effective_nodes, _ = torch.sort(effective_nodes) return mol_tree, all_nodes, effective_nodes ================================================ FILE: examples/pytorch/jtnn/jtnn/jtnn_enc.py ================================================ import dgl.function as DGLF import numpy as np import torch import torch.nn as nn from dgl import batch, bfs_edges_generator, line_graph from .nnutils import cuda, GRUUpdate, tocpu MAX_NB = 8 def level_order(forest, roots): forest = tocpu(forest) edges = bfs_edges_generator(forest, roots) if len(edges) == 0: # no edges in the tree; do not perform loopy BP return _, leaves = forest.find_edges(edges[-1]) edges_back = bfs_edges_generator(forest, roots, reverse=True) yield from reversed(edges_back) yield from edges class EncoderGatherUpdate(nn.Module): def __init__(self, hidden_size): nn.Module.__init__(self) self.hidden_size = hidden_size self.W = nn.Linear(2 * hidden_size, hidden_size) def forward(self, nodes): x = nodes.data["x"] m = nodes.data["m"] return { "h": torch.relu(self.W(torch.cat([x, m], 1))), } class DGLJTNNEncoder(nn.Module): def __init__(self, vocab, hidden_size, embedding=None): nn.Module.__init__(self) self.hidden_size = hidden_size self.vocab_size = vocab.size() self.vocab = vocab if embedding is None: self.embedding = nn.Embedding(self.vocab_size, hidden_size) else: self.embedding = embedding self.enc_tree_update = GRUUpdate(hidden_size) self.enc_tree_gather_update = EncoderGatherUpdate(hidden_size) def forward(self, mol_trees): mol_tree_batch = batch(mol_trees) # Build line graph to prepare for belief propagation mol_tree_batch_lg = line_graph( mol_tree_batch, backtracking=False, shared=True ) return self.run(mol_tree_batch, mol_tree_batch_lg) def run(self, mol_tree_batch, mol_tree_batch_lg): # Since tree roots are designated to 0. In the batched graph we can # simply find the corresponding node ID by looking at node_offset node_offset = np.cumsum( np.insert(mol_tree_batch.batch_num_nodes().cpu().numpy(), 0, 0) ) root_ids = node_offset[:-1] n_nodes = mol_tree_batch.num_nodes() n_edges = mol_tree_batch.num_edges() # Assign structure embeddings to tree nodes mol_tree_batch.ndata.update( { "x": self.embedding(mol_tree_batch.ndata["wid"]), "m": cuda(torch.zeros(n_nodes, self.hidden_size)), "h": cuda(torch.zeros(n_nodes, self.hidden_size)), } ) # Initialize the intermediate variables according to Eq (4)-(8). # Also initialize the src_x and dst_x fields. # TODO: context? mol_tree_batch.edata.update( { "s": cuda(torch.zeros(n_edges, self.hidden_size)), "m": cuda(torch.zeros(n_edges, self.hidden_size)), "r": cuda(torch.zeros(n_edges, self.hidden_size)), "z": cuda(torch.zeros(n_edges, self.hidden_size)), "src_x": cuda(torch.zeros(n_edges, self.hidden_size)), "dst_x": cuda(torch.zeros(n_edges, self.hidden_size)), "rm": cuda(torch.zeros(n_edges, self.hidden_size)), "accum_rm": cuda(torch.zeros(n_edges, self.hidden_size)), } ) # Send the source/destination node features to edges mol_tree_batch.apply_edges( func=lambda edges: { "src_x": edges.src["x"], "dst_x": edges.dst["x"], }, ) # Message passing # I exploited the fact that the reduce function is a sum of incoming # messages, and the uncomputed messages are zero vectors. Essentially, # we can always compute s_ij as the sum of incoming m_ij, no matter # if m_ij is actually computed or not. mol_tree_batch_lg.ndata.update(mol_tree_batch.edata) for eid in level_order(mol_tree_batch, root_ids): eid = eid.to(mol_tree_batch_lg.device) mol_tree_batch_lg.pull( eid, DGLF.copy_u("m", "m"), DGLF.sum("m", "s") ) mol_tree_batch_lg.pull( eid, DGLF.copy_u("rm", "rm"), DGLF.sum("rm", "accum_rm") ) mol_tree_batch_lg.apply_nodes(self.enc_tree_update, v=eid) # Readout mol_tree_batch.edata.update(mol_tree_batch_lg.ndata) mol_tree_batch.update_all(DGLF.copy_e("m", "m"), DGLF.sum("m", "m")) mol_tree_batch.apply_nodes(self.enc_tree_gather_update) root_vecs = mol_tree_batch.nodes[root_ids].data["h"] return mol_tree_batch, root_vecs ================================================ FILE: examples/pytorch/jtnn/jtnn/jtnn_vae.py ================================================ import copy import rdkit.Chem as Chem import torch import torch.nn as nn import torch.nn.functional as F from dgl import batch, unbatch from .chemutils import ( attach_mols_nx, copy_edit_mol, decode_stereo, enum_assemble_nx, set_atommap, ) from .jtmpn import DGLJTMPN, mol2dgl_single as mol2dgl_dec from .jtnn_dec import DGLJTNNDecoder from .jtnn_enc import DGLJTNNEncoder from .mpn import DGLMPN, mol2dgl_single as mol2dgl_enc from .nnutils import cuda class DGLJTNNVAE(nn.Module): def __init__(self, vocab, hidden_size, latent_size, depth): super(DGLJTNNVAE, self).__init__() self.vocab = vocab self.hidden_size = hidden_size self.latent_size = latent_size self.depth = depth self.embedding = nn.Embedding(vocab.size(), hidden_size) self.mpn = DGLMPN(hidden_size, depth) self.jtnn = DGLJTNNEncoder(vocab, hidden_size, self.embedding) self.decoder = DGLJTNNDecoder( vocab, hidden_size, latent_size // 2, self.embedding ) self.jtmpn = DGLJTMPN(hidden_size, depth) self.T_mean = nn.Linear(hidden_size, latent_size // 2) self.T_var = nn.Linear(hidden_size, latent_size // 2) self.G_mean = nn.Linear(hidden_size, latent_size // 2) self.G_var = nn.Linear(hidden_size, latent_size // 2) self.n_nodes_total = 0 self.n_passes = 0 self.n_edges_total = 0 self.n_tree_nodes_total = 0 @staticmethod def move_to_cuda(mol_batch): for i in range(len(mol_batch["mol_trees"])): mol_batch["mol_trees"][i].graph = cuda( mol_batch["mol_trees"][i].graph ) mol_batch["mol_graph_batch"] = cuda(mol_batch["mol_graph_batch"]) if "cand_graph_batch" in mol_batch: mol_batch["cand_graph_batch"] = cuda(mol_batch["cand_graph_batch"]) if mol_batch.get("stereo_cand_graph_batch") is not None: mol_batch["stereo_cand_graph_batch"] = cuda( mol_batch["stereo_cand_graph_batch"] ) def encode(self, mol_batch): mol_graphs = mol_batch["mol_graph_batch"] mol_vec = self.mpn(mol_graphs) mol_tree_batch, tree_vec = self.jtnn( [t.graph for t in mol_batch["mol_trees"]] ) self.n_nodes_total += mol_graphs.num_nodes() self.n_edges_total += mol_graphs.num_edges() self.n_tree_nodes_total += sum( t.graph.num_nodes() for t in mol_batch["mol_trees"] ) self.n_passes += 1 return mol_tree_batch, tree_vec, mol_vec def sample(self, tree_vec, mol_vec, e1=None, e2=None): tree_mean = self.T_mean(tree_vec) tree_log_var = -torch.abs(self.T_var(tree_vec)) mol_mean = self.G_mean(mol_vec) mol_log_var = -torch.abs(self.G_var(mol_vec)) epsilon = cuda(torch.randn(*tree_mean.shape)) if e1 is None else e1 tree_vec = tree_mean + torch.exp(tree_log_var / 2) * epsilon epsilon = cuda(torch.randn(*mol_mean.shape)) if e2 is None else e2 mol_vec = mol_mean + torch.exp(mol_log_var / 2) * epsilon z_mean = torch.cat([tree_mean, mol_mean], 1) z_log_var = torch.cat([tree_log_var, mol_log_var], 1) return tree_vec, mol_vec, z_mean, z_log_var def forward(self, mol_batch, beta=0, e1=None, e2=None): self.move_to_cuda(mol_batch) mol_trees = mol_batch["mol_trees"] batch_size = len(mol_trees) mol_tree_batch, tree_vec, mol_vec = self.encode(mol_batch) tree_vec, mol_vec, z_mean, z_log_var = self.sample( tree_vec, mol_vec, e1, e2 ) kl_loss = ( -0.5 * torch.sum( 1.0 + z_log_var - z_mean * z_mean - torch.exp(z_log_var) ) / batch_size ) word_loss, topo_loss, word_acc, topo_acc = self.decoder( [t.graph for t in mol_trees], tree_vec ) assm_loss, assm_acc = self.assm(mol_batch, mol_tree_batch, mol_vec) stereo_loss, stereo_acc = self.stereo(mol_batch, mol_vec) loss = ( word_loss + topo_loss + assm_loss + 2 * stereo_loss + beta * kl_loss ) return loss, kl_loss, word_acc, topo_acc, assm_acc, stereo_acc def assm(self, mol_batch, mol_tree_batch, mol_vec): cands = [ mol_batch["cand_graph_batch"], cuda(mol_batch["tree_mess_src_e"]), cuda(mol_batch["tree_mess_tgt_e"]), cuda(mol_batch["tree_mess_tgt_n"]), ] cand_vec = self.jtmpn(cands, mol_tree_batch) cand_vec = self.G_mean(cand_vec) batch_idx = cuda(torch.LongTensor(mol_batch["cand_batch_idx"])) mol_vec = mol_vec[batch_idx] mol_vec = mol_vec.view(-1, 1, self.latent_size // 2) cand_vec = cand_vec.view(-1, self.latent_size // 2, 1) scores = (mol_vec @ cand_vec)[:, 0, 0] cnt, tot, acc = 0, 0, 0 all_loss = [] for i, mol_tree in enumerate(mol_batch["mol_trees"]): comp_nodes = [ node_id for node_id, node in mol_tree.nodes_dict.items() if len(node["cands"]) > 1 and not node["is_leaf"] ] cnt += len(comp_nodes) # segmented accuracy and cross entropy for node_id in comp_nodes: node = mol_tree.nodes_dict[node_id] label = node["cands"].index(node["label"]) ncand = len(node["cands"]) cur_score = scores[tot : tot + ncand] tot += ncand if cur_score[label].item() >= cur_score.max().item(): acc += 1 label = cuda(torch.LongTensor([label])) all_loss.append( F.cross_entropy( cur_score.view(1, -1), label, size_average=False ) ) all_loss = sum(all_loss) / len(mol_batch["mol_trees"]) return all_loss, acc / cnt def stereo(self, mol_batch, mol_vec): stereo_cands = mol_batch["stereo_cand_graph_batch"] batch_idx = mol_batch["stereo_cand_batch_idx"] labels = mol_batch["stereo_cand_labels"] lengths = mol_batch["stereo_cand_lengths"] if len(labels) == 0: # Only one stereoisomer exists; do nothing return cuda(torch.tensor(0.0)), 1.0 batch_idx = cuda(torch.LongTensor(batch_idx)) stereo_cands = self.mpn(stereo_cands) stereo_cands = self.G_mean(stereo_cands) stereo_labels = mol_vec[batch_idx] scores = F.cosine_similarity(stereo_cands, stereo_labels) st, acc = 0, 0 all_loss = [] for label, le in zip(labels, lengths): cur_scores = scores[st : st + le] if cur_scores.data[label].item() >= cur_scores.max().item(): acc += 1 label = cuda(torch.LongTensor([label])) all_loss.append( F.cross_entropy( cur_scores.view(1, -1), label, size_average=False ) ) st += le all_loss = sum(all_loss) / len(labels) return all_loss, acc / len(labels) def decode(self, tree_vec, mol_vec): mol_tree, nodes_dict, effective_nodes = self.decoder.decode(tree_vec) effective_nodes_list = effective_nodes.tolist() nodes_dict = [nodes_dict[v] for v in effective_nodes_list] for i, (node_id, node) in enumerate( zip(effective_nodes_list, nodes_dict) ): node["idx"] = i node["nid"] = i + 1 node["is_leaf"] = True if mol_tree.graph.in_degrees(node_id) > 1: node["is_leaf"] = False set_atommap(node["mol"], node["nid"]) mol_tree_sg = mol_tree.graph.subgraph( effective_nodes.to(tree_vec.device) ) mol_tree_msg, _ = self.jtnn([mol_tree_sg]) mol_tree_msg = unbatch(mol_tree_msg)[0] mol_tree_msg.nodes_dict = nodes_dict cur_mol = copy_edit_mol(nodes_dict[0]["mol"]) global_amap = [{}] + [{} for node in nodes_dict] global_amap[1] = { atom.GetIdx(): atom.GetIdx() for atom in cur_mol.GetAtoms() } cur_mol = self.dfs_assemble( mol_tree_msg, mol_vec, cur_mol, global_amap, [], 0, None ) if cur_mol is None: return None cur_mol = cur_mol.GetMol() set_atommap(cur_mol) cur_mol = Chem.MolFromSmiles(Chem.MolToSmiles(cur_mol)) if cur_mol is None: return None smiles2D = Chem.MolToSmiles(cur_mol) stereo_cands = decode_stereo(smiles2D) if len(stereo_cands) == 1: return stereo_cands[0] stereo_graphs = [mol2dgl_enc(c) for c in stereo_cands] stereo_cand_graphs, atom_x, bond_x = zip(*stereo_graphs) stereo_cand_graphs = cuda(batch(stereo_cand_graphs)) atom_x = cuda(torch.cat(atom_x)) bond_x = cuda(torch.cat(bond_x)) stereo_cand_graphs.ndata["x"] = atom_x stereo_cand_graphs.edata["x"] = bond_x stereo_cand_graphs.edata["src_x"] = atom_x.new( bond_x.shape[0], atom_x.shape[1] ).zero_() stereo_vecs = self.mpn(stereo_cand_graphs) stereo_vecs = self.G_mean(stereo_vecs) scores = F.cosine_similarity(stereo_vecs, mol_vec) _, max_id = scores.max(0) return stereo_cands[max_id.item()] def dfs_assemble( self, mol_tree_msg, mol_vec, cur_mol, global_amap, fa_amap, cur_node_id, fa_node_id, ): nodes_dict = mol_tree_msg.nodes_dict fa_node = nodes_dict[fa_node_id] if fa_node_id is not None else None cur_node = nodes_dict[cur_node_id] fa_nid = fa_node["nid"] if fa_node is not None else -1 prev_nodes = [fa_node] if fa_node is not None else [] children_node_id = [ v for v in mol_tree_msg.successors(cur_node_id).tolist() if nodes_dict[v]["nid"] != fa_nid ] children = [nodes_dict[v] for v in children_node_id] neighbors = [nei for nei in children if nei["mol"].GetNumAtoms() > 1] neighbors = sorted( neighbors, key=lambda x: x["mol"].GetNumAtoms(), reverse=True ) singletons = [nei for nei in children if nei["mol"].GetNumAtoms() == 1] neighbors = singletons + neighbors cur_amap = [ (fa_nid, a2, a1) for nid, a1, a2 in fa_amap if nid == cur_node["nid"] ] cands = enum_assemble_nx(cur_node, neighbors, prev_nodes, cur_amap) if len(cands) == 0: return None cand_smiles, cand_mols, cand_amap = list(zip(*cands)) cands = [(candmol, mol_tree_msg, cur_node_id) for candmol in cand_mols] ( cand_graphs, atom_x, bond_x, tree_mess_src_edges, tree_mess_tgt_edges, tree_mess_tgt_nodes, ) = mol2dgl_dec(cands) cand_graphs = batch([g.to(mol_vec.device) for g in cand_graphs]) atom_x = cuda(atom_x) bond_x = cuda(bond_x) cand_graphs.ndata["x"] = atom_x cand_graphs.edata["x"] = bond_x cand_graphs.edata["src_x"] = atom_x.new( bond_x.shape[0], atom_x.shape[1] ).zero_() cand_vecs = self.jtmpn( ( cand_graphs, tree_mess_src_edges, tree_mess_tgt_edges, tree_mess_tgt_nodes, ), mol_tree_msg, ) cand_vecs = self.G_mean(cand_vecs) mol_vec = mol_vec.squeeze() scores = cand_vecs @ mol_vec _, cand_idx = torch.sort(scores, descending=True) backup_mol = Chem.RWMol(cur_mol) for i in range(len(cand_idx)): cur_mol = Chem.RWMol(backup_mol) pred_amap = cand_amap[cand_idx[i].item()] new_global_amap = copy.deepcopy(global_amap) for nei_id, ctr_atom, nei_atom in pred_amap: if nei_id == fa_nid: continue new_global_amap[nei_id][nei_atom] = new_global_amap[ cur_node["nid"] ][ctr_atom] cur_mol = attach_mols_nx(cur_mol, children, [], new_global_amap) new_mol = cur_mol.GetMol() new_mol = Chem.MolFromSmiles(Chem.MolToSmiles(new_mol)) if new_mol is None: continue result = True for nei_node_id, nei_node in zip(children_node_id, children): if nei_node["is_leaf"]: continue cur_mol = self.dfs_assemble( mol_tree_msg, mol_vec, cur_mol, new_global_amap, pred_amap, nei_node_id, cur_node_id, ) if cur_mol is None: result = False break if result: return cur_mol return None ================================================ FILE: examples/pytorch/jtnn/jtnn/line_profiler_integration.py ================================================ """ line_profiler integration """ import os if os.getenv("PROFILE", 0): import atexit import line_profiler profile = line_profiler.LineProfiler() profile_output = os.getenv("PROFILE_OUTPUT", None) if profile_output: from functools import partial atexit.register(partial(profile.dump_stats, profile_output)) else: atexit.register(profile.print_stats) else: def profile(f): return f ================================================ FILE: examples/pytorch/jtnn/jtnn/mol_tree.py ================================================ import copy import rdkit.Chem as Chem def get_slots(smiles): mol = Chem.MolFromSmiles(smiles) return [ (atom.GetSymbol(), atom.GetFormalCharge(), atom.GetTotalNumHs()) for atom in mol.GetAtoms() ] class Vocab(object): def __init__(self, smiles_list): self.vocab = smiles_list self.vmap = {x: i for i, x in enumerate(self.vocab)} self.slots = [get_slots(smiles) for smiles in self.vocab] def get_index(self, smiles): return self.vmap[smiles] def get_smiles(self, idx): return self.vocab[idx] def get_slots(self, idx): return copy.deepcopy(self.slots[idx]) def size(self): return len(self.vocab) ================================================ FILE: examples/pytorch/jtnn/jtnn/mol_tree_nx.py ================================================ import dgl import numpy as np import rdkit.Chem as Chem from .chemutils import ( decode_stereo, enum_assemble_nx, get_clique_mol, get_mol, get_smiles, set_atommap, tree_decomp, ) class DGLMolTree(object): def __init__(self, smiles): self.nodes_dict = {} if smiles is None: self.graph = dgl.graph(([], [])) return self.smiles = smiles self.mol = get_mol(smiles) # Stereo Generation mol = Chem.MolFromSmiles(smiles) self.smiles3D = Chem.MolToSmiles(mol, isomericSmiles=True) self.smiles2D = Chem.MolToSmiles(mol) self.stereo_cands = decode_stereo(self.smiles2D) # cliques: a list of list of atom indices cliques, edges = tree_decomp(self.mol) root = 0 for i, c in enumerate(cliques): cmol = get_clique_mol(self.mol, c) csmiles = get_smiles(cmol) self.nodes_dict[i] = dict( smiles=csmiles, mol=get_mol(csmiles), clique=c, ) if min(c) == 0: root = i # The clique with atom ID 0 becomes root if root > 0: for attr in self.nodes_dict[0]: self.nodes_dict[0][attr], self.nodes_dict[root][attr] = ( self.nodes_dict[root][attr], self.nodes_dict[0][attr], ) src = np.zeros((len(edges) * 2,), dtype="int") dst = np.zeros((len(edges) * 2,), dtype="int") for i, (_x, _y) in enumerate(edges): x = 0 if _x == root else root if _x == 0 else _x y = 0 if _y == root else root if _y == 0 else _y src[2 * i] = x dst[2 * i] = y src[2 * i + 1] = y dst[2 * i + 1] = x self.graph = dgl.graph((src, dst), num_nodes=len(cliques)) for i in self.nodes_dict: self.nodes_dict[i]["nid"] = i + 1 if self.graph.out_degrees(i) > 1: # Leaf node mol is not marked set_atommap( self.nodes_dict[i]["mol"], self.nodes_dict[i]["nid"] ) self.nodes_dict[i]["is_leaf"] = self.graph.out_degrees(i) == 1 def treesize(self): return self.graph.num_nodes() def _recover_node(self, i, original_mol): node = self.nodes_dict[i] clique = [] clique.extend(node["clique"]) if not node["is_leaf"]: for cidx in node["clique"]: original_mol.GetAtomWithIdx(cidx).SetAtomMapNum(node["nid"]) for j in self.graph.successors(i).numpy(): nei_node = self.nodes_dict[j] clique.extend(nei_node["clique"]) if nei_node["is_leaf"]: # Leaf node, no need to mark continue for cidx in nei_node["clique"]: # allow singleton node override the atom mapping if cidx not in node["clique"] or len(nei_node["clique"]) == 1: atom = original_mol.GetAtomWithIdx(cidx) atom.SetAtomMapNum(nei_node["nid"]) clique = list(set(clique)) label_mol = get_clique_mol(original_mol, clique) node["label"] = Chem.MolToSmiles( Chem.MolFromSmiles(get_smiles(label_mol)) ) node["label_mol"] = get_mol(node["label"]) for cidx in clique: original_mol.GetAtomWithIdx(cidx).SetAtomMapNum(0) return node["label"] def _assemble_node(self, i): neighbors = [ self.nodes_dict[j] for j in self.graph.successors(i).numpy() if self.nodes_dict[j]["mol"].GetNumAtoms() > 1 ] neighbors = sorted( neighbors, key=lambda x: x["mol"].GetNumAtoms(), reverse=True ) singletons = [ self.nodes_dict[j] for j in self.graph.successors(i).numpy() if self.nodes_dict[j]["mol"].GetNumAtoms() == 1 ] neighbors = singletons + neighbors cands = enum_assemble_nx(self.nodes_dict[i], neighbors) if len(cands) > 0: ( self.nodes_dict[i]["cands"], self.nodes_dict[i]["cand_mols"], _, ) = list(zip(*cands)) self.nodes_dict[i]["cands"] = list(self.nodes_dict[i]["cands"]) self.nodes_dict[i]["cand_mols"] = list( self.nodes_dict[i]["cand_mols"] ) else: self.nodes_dict[i]["cands"] = [] self.nodes_dict[i]["cand_mols"] = [] def recover(self): for i in self.nodes_dict: self._recover_node(i, self.mol) def assemble(self): for i in self.nodes_dict: self._assemble_node(i) ================================================ FILE: examples/pytorch/jtnn/jtnn/mpn.py ================================================ import dgl import dgl.function as DGLF import rdkit.Chem as Chem import torch import torch.nn as nn import torch.nn.functional as F from dgl import line_graph, mean_nodes from .chemutils import get_mol ELEM_LIST = [ "C", "N", "O", "S", "F", "Si", "P", "Cl", "Br", "Mg", "Na", "Ca", "Fe", "Al", "I", "B", "K", "Se", "Zn", "H", "Cu", "Mn", "unknown", ] ATOM_FDIM = len(ELEM_LIST) + 6 + 5 + 4 + 1 BOND_FDIM = 5 + 6 MAX_NB = 6 def onek_encoding_unk(x, allowable_set): if x not in allowable_set: x = allowable_set[-1] return [x == s for s in allowable_set] def atom_features(atom): return torch.Tensor( onek_encoding_unk(atom.GetSymbol(), ELEM_LIST) + onek_encoding_unk(atom.GetDegree(), [0, 1, 2, 3, 4, 5]) + onek_encoding_unk(atom.GetFormalCharge(), [-1, -2, 1, 2, 0]) + onek_encoding_unk(int(atom.GetChiralTag()), [0, 1, 2, 3]) + [atom.GetIsAromatic()] ) def bond_features(bond): bt = bond.GetBondType() stereo = int(bond.GetStereo()) fbond = [ bt == Chem.rdchem.BondType.SINGLE, bt == Chem.rdchem.BondType.DOUBLE, bt == Chem.rdchem.BondType.TRIPLE, bt == Chem.rdchem.BondType.AROMATIC, bond.IsInRing(), ] fstereo = onek_encoding_unk(stereo, [0, 1, 2, 3, 4, 5]) return torch.Tensor(fbond + fstereo) def mol2dgl_single(smiles): n_edges = 0 atom_x = [] bond_x = [] mol = get_mol(smiles) n_atoms = mol.GetNumAtoms() n_bonds = mol.GetNumBonds() for i, atom in enumerate(mol.GetAtoms()): assert i == atom.GetIdx() atom_x.append(atom_features(atom)) bond_src = [] bond_dst = [] for i, bond in enumerate(mol.GetBonds()): begin_idx = bond.GetBeginAtom().GetIdx() end_idx = bond.GetEndAtom().GetIdx() features = bond_features(bond) bond_src.append(begin_idx) bond_dst.append(end_idx) bond_x.append(features) # set up the reverse direction bond_src.append(end_idx) bond_dst.append(begin_idx) bond_x.append(features) graph = dgl.graph((bond_src, bond_dst), num_nodes=n_atoms) n_edges += n_bonds return ( graph, torch.stack(atom_x), torch.stack(bond_x) if len(bond_x) > 0 else torch.zeros(0), ) class LoopyBPUpdate(nn.Module): def __init__(self, hidden_size): super(LoopyBPUpdate, self).__init__() self.hidden_size = hidden_size self.W_h = nn.Linear(hidden_size, hidden_size, bias=False) def forward(self, nodes): msg_input = nodes.data["msg_input"] msg_delta = self.W_h(nodes.data["accum_msg"]) msg = F.relu(msg_input + msg_delta) return {"msg": msg} class GatherUpdate(nn.Module): def __init__(self, hidden_size): super(GatherUpdate, self).__init__() self.hidden_size = hidden_size self.W_o = nn.Linear(ATOM_FDIM + hidden_size, hidden_size) def forward(self, nodes): m = nodes.data["m"] return { "h": F.relu(self.W_o(torch.cat([nodes.data["x"], m], 1))), } class DGLMPN(nn.Module): def __init__(self, hidden_size, depth): super(DGLMPN, self).__init__() self.depth = depth self.W_i = nn.Linear(ATOM_FDIM + BOND_FDIM, hidden_size, bias=False) self.loopy_bp_updater = LoopyBPUpdate(hidden_size) self.gather_updater = GatherUpdate(hidden_size) self.hidden_size = hidden_size self.n_samples_total = 0 self.n_nodes_total = 0 self.n_edges_total = 0 self.n_passes = 0 def forward(self, mol_graph): n_samples = mol_graph.batch_size mol_line_graph = line_graph(mol_graph, backtracking=False, shared=True) n_nodes = mol_graph.num_nodes() n_edges = mol_graph.num_edges() mol_graph = self.run(mol_graph, mol_line_graph) # TODO: replace with unbatch or readout g_repr = mean_nodes(mol_graph, "h") self.n_samples_total += n_samples self.n_nodes_total += n_nodes self.n_edges_total += n_edges self.n_passes += 1 return g_repr def run(self, mol_graph, mol_line_graph): n_nodes = mol_graph.num_nodes() mol_graph.apply_edges( func=lambda edges: {"src_x": edges.src["x"]}, ) mol_line_graph.ndata.update(mol_graph.edata) e_repr = mol_line_graph.ndata bond_features = e_repr["x"] source_features = e_repr["src_x"] features = torch.cat([source_features, bond_features], 1) msg_input = self.W_i(features) mol_line_graph.ndata.update( { "msg_input": msg_input, "msg": F.relu(msg_input), "accum_msg": torch.zeros_like(msg_input), } ) mol_graph.ndata.update( { "m": bond_features.new(n_nodes, self.hidden_size).zero_(), "h": bond_features.new(n_nodes, self.hidden_size).zero_(), } ) for i in range(self.depth - 1): mol_line_graph.update_all( DGLF.copy_u("msg", "msg"), DGLF.sum("msg", "accum_msg") ) mol_line_graph.apply_nodes(self.loopy_bp_updater) mol_graph.edata.update(mol_line_graph.ndata) mol_graph.update_all(DGLF.copy_e("msg", "msg"), DGLF.sum("msg", "m")) mol_graph.apply_nodes(self.gather_updater) return mol_graph ================================================ FILE: examples/pytorch/jtnn/jtnn/nnutils.py ================================================ import os import dgl import torch import torch.nn as nn def cuda(x): if torch.cuda.is_available() and not os.getenv("NOCUDA", None): return x.to(torch.device("cuda")) # works for both DGLGraph and tensor else: return x class GRUUpdate(nn.Module): def __init__(self, hidden_size): nn.Module.__init__(self) self.hidden_size = hidden_size self.W_z = nn.Linear(2 * hidden_size, hidden_size) self.W_r = nn.Linear(hidden_size, hidden_size, bias=False) self.U_r = nn.Linear(hidden_size, hidden_size) self.W_h = nn.Linear(2 * hidden_size, hidden_size) def update_zm(self, node): src_x = node.data["src_x"] s = node.data["s"] rm = node.data["accum_rm"] z = torch.sigmoid(self.W_z(torch.cat([src_x, s], 1))) m = torch.tanh(self.W_h(torch.cat([src_x, rm], 1))) m = (1 - z) * s + z * m return {"m": m, "z": z} def update_r(self, node, zm=None): dst_x = node.data["dst_x"] m = node.data["m"] if zm is None else zm["m"] r_1 = self.W_r(dst_x) r_2 = self.U_r(m) r = torch.sigmoid(r_1 + r_2) return {"r": r, "rm": r * m} def forward(self, node): dic = self.update_zm(node) dic.update(self.update_r(node, zm=dic)) return dic def tocpu(g): src, dst = g.edges() src = src.cpu() dst = dst.cpu() return dgl.graph((src, dst), num_nodes=g.num_nodes()) ================================================ FILE: examples/pytorch/jtnn/vaetrain_dgl.py ================================================ import math import random import sys from collections import deque from optparse import OptionParser import rdkit import torch import torch.nn as nn import torch.optim as optim import torch.optim.lr_scheduler as lr_scheduler import tqdm from jtnn import * from torch.utils.data import DataLoader torch.multiprocessing.set_sharing_strategy("file_system") def worker_init_fn(id_): lg = rdkit.RDLogger.logger() lg.setLevel(rdkit.RDLogger.CRITICAL) worker_init_fn(None) parser = OptionParser() parser.add_option( "-t", "--train", dest="train", default="train", help="Training file name" ) parser.add_option( "-v", "--vocab", dest="vocab", default="vocab", help="Vocab file name" ) parser.add_option("-s", "--save_dir", dest="save_path") parser.add_option("-m", "--model", dest="model_path", default=None) parser.add_option("-b", "--batch", dest="batch_size", default=40) parser.add_option("-w", "--hidden", dest="hidden_size", default=200) parser.add_option("-l", "--latent", dest="latent_size", default=56) parser.add_option("-d", "--depth", dest="depth", default=3) parser.add_option("-z", "--beta", dest="beta", default=1.0) parser.add_option("-q", "--lr", dest="lr", default=1e-3) parser.add_option("-T", "--test", dest="test", action="store_true") opts, args = parser.parse_args() dataset = JTNNDataset(data=opts.train, vocab=opts.vocab, training=True) vocab = dataset.vocab batch_size = int(opts.batch_size) hidden_size = int(opts.hidden_size) latent_size = int(opts.latent_size) depth = int(opts.depth) beta = float(opts.beta) lr = float(opts.lr) model = DGLJTNNVAE(vocab, hidden_size, latent_size, depth) if opts.model_path is not None: model.load_state_dict(torch.load(opts.model_path, weights_only=False)) else: for param in model.parameters(): if param.dim() == 1: nn.init.constant(param, 0) else: nn.init.xavier_normal(param) model = cuda(model) print( "Model #Params: %dK" % (sum([x.nelement() for x in model.parameters()]) / 1000,) ) optimizer = optim.Adam(model.parameters(), lr=lr) scheduler = lr_scheduler.ExponentialLR(optimizer, 0.9) scheduler.step() MAX_EPOCH = 100 PRINT_ITER = 20 def train(): dataset.training = True dataloader = DataLoader( dataset, batch_size=batch_size, shuffle=True, num_workers=4, collate_fn=JTNNCollator(vocab, True), drop_last=True, worker_init_fn=worker_init_fn, ) for epoch in range(MAX_EPOCH): word_acc, topo_acc, assm_acc, steo_acc = 0, 0, 0, 0 for it, batch in enumerate(tqdm.tqdm(dataloader)): model.zero_grad() try: loss, kl_div, wacc, tacc, sacc, dacc = model(batch, beta) except: print([t.smiles for t in batch["mol_trees"]]) raise loss.backward() optimizer.step() word_acc += wacc topo_acc += tacc assm_acc += sacc steo_acc += dacc if (it + 1) % PRINT_ITER == 0: word_acc = word_acc / PRINT_ITER * 100 topo_acc = topo_acc / PRINT_ITER * 100 assm_acc = assm_acc / PRINT_ITER * 100 steo_acc = steo_acc / PRINT_ITER * 100 print( "KL: %.1f, Word: %.2f, Topo: %.2f, Assm: %.2f, Steo: %.2f, Loss: %.6f" % ( kl_div, word_acc, topo_acc, assm_acc, steo_acc, loss.item(), ) ) word_acc, topo_acc, assm_acc, steo_acc = 0, 0, 0, 0 sys.stdout.flush() if (it + 1) % 1500 == 0: # Fast annealing scheduler.step() print("learning rate: %.6f" % scheduler.get_lr()[0]) torch.save( model.state_dict(), opts.save_path + "/model.iter-%d-%d" % (epoch, it + 1), ) scheduler.step() print("learning rate: %.6f" % scheduler.get_lr()[0]) torch.save( model.state_dict(), opts.save_path + "/model.iter-" + str(epoch) ) def test(): dataset.training = False dataloader = DataLoader( dataset, batch_size=1, shuffle=False, num_workers=0, collate_fn=JTNNCollator(vocab, False), drop_last=True, worker_init_fn=worker_init_fn, ) # Just an example of molecule decoding; in reality you may want to sample # tree and molecule vectors. for it, batch in enumerate(dataloader): gt_smiles = batch["mol_trees"][0].smiles print(gt_smiles) model.move_to_cuda(batch) _, tree_vec, mol_vec = model.encode(batch) tree_vec, mol_vec, _, _ = model.sample(tree_vec, mol_vec) smiles = model.decode(tree_vec, mol_vec) print(smiles) if __name__ == "__main__": if opts.test: test() else: train() print("# passes:", model.n_passes) print("Total # nodes processed:", model.n_nodes_total) print("Total # edges processed:", model.n_edges_total) print("Total # tree nodes processed:", model.n_tree_nodes_total) print("Graph decoder: # passes:", model.jtmpn.n_passes) print( "Graph decoder: Total # candidates processed:", model.jtmpn.n_samples_total, ) print("Graph decoder: Total # nodes processed:", model.jtmpn.n_nodes_total) print("Graph decoder: Total # edges processed:", model.jtmpn.n_edges_total) print("Graph encoder: # passes:", model.mpn.n_passes) print( "Graph encoder: Total # candidates processed:", model.mpn.n_samples_total, ) print("Graph encoder: Total # nodes processed:", model.mpn.n_nodes_total) print("Graph encoder: Total # edges processed:", model.mpn.n_edges_total) ================================================ FILE: examples/pytorch/label_propagation/README.md ================================================ # DGL Implementation of Label Propagation This DGL example implements the method proposed in the paper [Learning from Labeled and Unlabeled Data with Label Propagation](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.14.3864&rep=rep1&type=pdf). Contributor: [xnuohz](https://github.com/xnuohz) ### Requirements The codebase is implemented in Python 3.7. For version requirement of packages, see below. ``` dgl 0.6.0.post1 torch 1.7.0 ``` ### The graph datasets used in this example The DGL's built-in Cora, Pubmed and Citeseer datasets. Dataset summary: | Dataset | #Nodes | #Edges | #Feats | #Classes | #Train Nodes | #Val Nodes | #Test Nodes | | :------: | :----: | :----: | :----: | :------: | :----------: | :--------: | :---------: | | Citeseer | 3,327 | 9,228 | 3,703 | 6 | 120 | 500 | 1000 | | Cora | 2,708 | 10,556 | 1,433 | 7 | 140 | 500 | 1000 | | Pubmed | 19,717 | 88,651 | 500 | 3 | 60 | 500 | 1000 | ### Usage ```bash # Cora python main.py # Citeseer python main.py --dataset Citeseer --num-layers 100 --alpha 0.99 # Pubmed python main.py --dataset Pubmed --num-layers 60 --alpha 1 ``` ### Performance | Dataset | Cora | Citeseer | Pubmed | | :----------: | :---: | :------: | :----: | | Results(DGL) | 69.20 | 51.30 | 71.40 | ================================================ FILE: examples/pytorch/label_propagation/main.py ================================================ import argparse import dgl import torch from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset from dgl.nn import LabelPropagation def main(): # check cuda device = ( f"cuda:{args.gpu}" if torch.cuda.is_available() and args.gpu >= 0 else "cpu" ) # load data if args.dataset == "Cora": dataset = CoraGraphDataset() elif args.dataset == "Citeseer": dataset = CiteseerGraphDataset() elif args.dataset == "Pubmed": dataset = PubmedGraphDataset() else: raise ValueError("Dataset {} is invalid.".format(args.dataset)) g = dataset[0] g = dgl.add_self_loop(g) labels = g.ndata.pop("label").to(device).long() # load masks for train / test, valid is not used. train_mask = g.ndata.pop("train_mask") test_mask = g.ndata.pop("test_mask") train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze().to(device) test_idx = torch.nonzero(test_mask, as_tuple=False).squeeze().to(device) g = g.to(device) # label propagation lp = LabelPropagation(args.num_layers, args.alpha) logits = lp(g, labels, mask=train_idx) test_acc = torch.sum( logits[test_idx].argmax(dim=1) == labels[test_idx] ).item() / len(test_idx) print("Test Acc {:.4f}".format(test_acc)) if __name__ == "__main__": """ Label Propagation Hyperparameters """ parser = argparse.ArgumentParser(description="LP") parser.add_argument("--gpu", type=int, default=0) parser.add_argument("--dataset", type=str, default="Cora") parser.add_argument("--num-layers", type=int, default=10) parser.add_argument("--alpha", type=float, default=0.5) args = parser.parse_args() print(args) main() ================================================ FILE: examples/pytorch/labor/README.md ================================================ Layer-Neighbor Sampling -- Defusing Neighborhood Explosion in GNNs ============ - Paper link: [https://arxiv.org/abs/2210.13339](https://arxiv.org/abs/2210.13339) This is the official Labor sampling example to reproduce the results in the original paper with the GraphSAGE GNN model. The model can be changed to any other model where NeighborSampler can be used. A more modern and performant version is provided in the `examples/graphbolt/pyg/labor` folder. Requirements ------------ ```bash pip install requests lightning==2.0.6 ogb ``` How to run ------- ### Minibatch training for node classification Train w/ mini-batch sampling on the GPU for node classification on "ogbn-products" ```bash python3 train_lightning.py --dataset=ogbn-products ``` Results: ``` Test Accuracy: 0.797 ``` Any integer passed as the `--importance-sampling=i` argument runs the corresponding LABOR-i variant. `--importance-sampling=-1` runs the LABOR-* variant. `--vertex-limit` argument is used if a vertex sampling budget is needed. It adjusts the batch size at the end of every epoch so that the average number of sampled vertices converges to the provided vertex limit. Can be used to replicate the vertex sampling budget experiments in the Labor paper. During training runs, statistics about number of sampled vertices, edges, cache miss rates will be reported. One can use tensorboard to look at their plots during/after training: ```bash tensorboard --logdir tb_logs ``` ## Utilize a GPU feature cache for UVA training ```bash python3 train_lightning.py --dataset=ogbn-products --use-uva --cache-size=500000 ``` ## Reduce GPU feature cache miss rate for UVA training ```bash python3 train_lightning.py --dataset=ogbn-products --use-uva --cache-size=500000 --batch-dependency=64 ``` ## Force all layers to share the same neighborhood for shared vertices ```bash python3 train_lightning.py --dataset=ogbn-products --layer-dependency ``` ================================================ FILE: examples/pytorch/labor/ladies_sampler.py ================================================ # referenced the following implementation: https://github.com/BarclayII/dgl/blob/ladies/examples/pytorch/ladies/ladies2.py import dgl import dgl.function as fn import torch def find_indices_in(a, b): b_sorted, indices = torch.sort(b) sorted_indices = torch.searchsorted(b_sorted, a) sorted_indices[sorted_indices >= indices.shape[0]] = 0 return indices[sorted_indices] def union(*arrays): return torch.unique(torch.cat(arrays)) def normalized_edata(g, weight=None): with g.local_scope(): if weight is None: weight = "W" g.edata[weight] = torch.ones(g.number_of_edges(), device=g.device) g.update_all(fn.copy_e(weight, weight), fn.sum(weight, "v")) g.apply_edges(lambda edges: {"w": 1 / edges.dst["v"]}) return g.edata["w"] class LadiesSampler(dgl.dataloading.BlockSampler): def __init__( self, nodes_per_layer, importance_sampling=True, weight="w", out_weight="edge_weights", replace=False, ): super().__init__() self.nodes_per_layer = nodes_per_layer self.importance_sampling = importance_sampling self.edge_weight = weight self.output_weight = out_weight self.replace = replace def compute_prob(self, g, seed_nodes, weight, num): """ g : the whole graph seed_nodes : the output nodes for the current layer weight : the weight of the edges return : the unnormalized probability of the candidate nodes, as well as the subgraph containing all the edges from the candidate nodes to the output nodes. """ insg = dgl.in_subgraph(g, seed_nodes) insg = dgl.compact_graphs(insg, seed_nodes) if self.importance_sampling: out_frontier = dgl.reverse(insg, copy_edata=True) weight = weight[out_frontier.edata[dgl.EID].long()] prob = dgl.ops.copy_e_sum(out_frontier, weight**2) # prob = torch.sqrt(prob) else: prob = torch.ones(insg.num_nodes()) prob[insg.out_degrees() == 0] = 0 return prob, insg def select_neighbors(self, prob, num): """ seed_nodes : output nodes cand_nodes : candidate nodes. Must contain all output nodes in @seed_nodes prob : unnormalized probability of each candidate node num : number of neighbors to sample return : the set of input nodes in terms of their indices in @cand_nodes, and also the indices of seed nodes in the selected nodes. """ # The returned nodes should be a union of seed_nodes plus @num nodes from cand_nodes. # Because compute_prob returns a compacted subgraph and a list of probabilities, # we need to find the corresponding local IDs of the resulting union in the subgraph # so that we can compute the edge weights of the block. # This is why we need a find_indices_in() function. neighbor_nodes_idx = torch.multinomial( prob, min(num, prob.shape[0]), replacement=self.replace ) return neighbor_nodes_idx def generate_block(self, insg, neighbor_nodes_idx, seed_nodes, P_sg, W_sg): """ insg : the subgraph yielded by compute_prob() neighbor_nodes_idx : the sampled nodes from the subgraph @insg, yielded by select_neighbors() seed_nodes_local_idx : the indices of seed nodes in the selected neighbor nodes, also yielded by select_neighbors() P_sg : unnormalized probability of each node being sampled, yielded by compute_prob() W_sg : edge weights of @insg return : the block. """ seed_nodes_idx = find_indices_in(seed_nodes, insg.ndata[dgl.NID]) u_nodes = union(neighbor_nodes_idx, seed_nodes_idx) sg = insg.subgraph(u_nodes.type(insg.idtype)) u, v = sg.edges() lu = sg.ndata[dgl.NID][u.long()] s = find_indices_in(lu, neighbor_nodes_idx) eg = dgl.edge_subgraph( sg, lu == neighbor_nodes_idx[s], relabel_nodes=False ) eg.ndata[dgl.NID] = sg.ndata[dgl.NID][: eg.num_nodes()] eg.edata[dgl.EID] = sg.edata[dgl.EID][eg.edata[dgl.EID].long()] sg = eg nids = insg.ndata[dgl.NID][sg.ndata[dgl.NID].long()] P = P_sg[u_nodes.long()] W = W_sg[sg.edata[dgl.EID].long()] W_tilde = dgl.ops.e_div_u(sg, W, P) W_tilde_sum = dgl.ops.copy_e_sum(sg, W_tilde) d = sg.in_degrees() W_tilde = dgl.ops.e_mul_v(sg, W_tilde, d / W_tilde_sum) block = dgl.to_block(sg, seed_nodes_idx.type(sg.idtype)) block.edata[self.output_weight] = W_tilde # correct node ID mapping block.srcdata[dgl.NID] = nids[block.srcdata[dgl.NID].long()] block.dstdata[dgl.NID] = nids[block.dstdata[dgl.NID].long()] sg_eids = insg.edata[dgl.EID][sg.edata[dgl.EID].long()] block.edata[dgl.EID] = sg_eids[block.edata[dgl.EID].long()] return block def sample_blocks(self, g, seed_nodes, exclude_eids=None): output_nodes = seed_nodes blocks = [] for block_id in reversed(range(len(self.nodes_per_layer))): num_nodes_to_sample = self.nodes_per_layer[block_id] W = g.edata[self.edge_weight] prob, insg = self.compute_prob( g, seed_nodes, W, num_nodes_to_sample ) neighbor_nodes_idx = self.select_neighbors( prob, num_nodes_to_sample ) block = self.generate_block( insg, neighbor_nodes_idx.type(g.idtype), seed_nodes.type(g.idtype), prob, W[insg.edata[dgl.EID].long()], ) seed_nodes = block.srcdata[dgl.NID] blocks.insert(0, block) return seed_nodes, output_nodes, blocks class PoissonLadiesSampler(LadiesSampler): def __init__( self, nodes_per_layer, importance_sampling=True, weight="w", out_weight="edge_weights", skip=False, ): super().__init__( nodes_per_layer, importance_sampling, weight, out_weight ) self.eps = 0.9999 self.skip = skip def compute_prob(self, g, seed_nodes, weight, num): """ g : the whole graph seed_nodes : the output nodes for the current layer weight : the weight of the edges return : the unnormalized probability of the candidate nodes, as well as the subgraph containing all the edges from the candidate nodes to the output nodes. """ prob, insg = super().compute_prob(g, seed_nodes, weight, num) one = torch.ones_like(prob) if prob.shape[0] <= num: return one, insg c = 1.0 for i in range(50): S = torch.sum(torch.minimum(prob * c, one).to(torch.float64)).item() if min(S, num) / max(S, num) >= self.eps: break else: c *= num / S if self.skip: skip_nodes = find_indices_in(seed_nodes, insg.ndata[dgl.NID]) prob[skip_nodes] = float("inf") return torch.minimum(prob * c, one), insg def select_neighbors(self, prob, num): """ seed_nodes : output nodes cand_nodes : candidate nodes. Must contain all output nodes in @seed_nodes prob : unnormalized probability of each candidate node num : number of neighbors to sample return : the set of input nodes in terms of their indices in @cand_nodes, and also the indices of seed nodes in the selected nodes. """ # The returned nodes should be a union of seed_nodes plus @num nodes from cand_nodes. # Because compute_prob returns a compacted subgraph and a list of probabilities, # we need to find the corresponding local IDs of the resulting union in the subgraph # so that we can compute the edge weights of the block. # This is why we need a find_indices_in() function. neighbor_nodes_idx = torch.arange(prob.shape[0], device=prob.device)[ torch.bernoulli(prob) == 1 ] return neighbor_nodes_idx ================================================ FILE: examples/pytorch/labor/load_graph.py ================================================ import dgl import torch as th def load_data(data): g = data[0] g.ndata["features"] = g.ndata.pop("feat") g.ndata["labels"] = g.ndata.pop("label") return g, data.num_classes def load_dgl(name): from dgl.data import ( CiteseerGraphDataset, CoraGraphDataset, FlickrDataset, PubmedGraphDataset, RedditDataset, YelpDataset, ) d = { "cora": CoraGraphDataset, "citeseer": CiteseerGraphDataset, "pubmed": PubmedGraphDataset, "reddit": RedditDataset, "yelp": YelpDataset, "flickr": FlickrDataset, } return load_data(d[name]()) def load_reddit(self_loop=True): from dgl.data import RedditDataset # load reddit data data = RedditDataset(self_loop=self_loop) return load_data(data) def load_mag240m(root="dataset"): from os.path import join import numpy as np from ogb.lsc import MAG240MDataset dataset = MAG240MDataset(root=root) print("Loading graph") (g,), _ = dgl.load_graphs(join(root, "mag240m_kddcup2021/graph.dgl")) print("Loading features") paper_offset = dataset.num_authors + dataset.num_institutions num_nodes = paper_offset + dataset.num_papers num_features = dataset.num_paper_features feats = th.from_numpy( np.memmap( join(root, "mag240m_kddcup2021/full.npy"), mode="r", dtype="float16", shape=(num_nodes, num_features), ) ).float() g.ndata["features"] = feats train_nid = th.LongTensor(dataset.get_idx_split("train")) + paper_offset val_nid = th.LongTensor(dataset.get_idx_split("valid")) + paper_offset test_nid = th.LongTensor(dataset.get_idx_split("test-dev")) + paper_offset train_mask = th.zeros((g.number_of_nodes(),), dtype=th.bool) train_mask[train_nid] = True val_mask = th.zeros((g.number_of_nodes(),), dtype=th.bool) val_mask[val_nid] = True test_mask = th.zeros((g.number_of_nodes(),), dtype=th.bool) test_mask[test_nid] = True g.ndata["train_mask"] = train_mask g.ndata["val_mask"] = val_mask g.ndata["test_mask"] = test_mask labels = th.tensor(dataset.paper_label) num_labels = len(th.unique(labels[th.logical_not(th.isnan(labels))])) g.ndata["labels"] = -th.ones(g.number_of_nodes(), dtype=th.int64) g.ndata["labels"][train_nid] = labels[train_nid - paper_offset].long() g.ndata["labels"][val_nid] = labels[val_nid - paper_offset].long() return g, num_labels def load_ogb(name, root="dataset"): if name == "ogbn-mag240M": return load_mag240m(root) from ogb.nodeproppred import DglNodePropPredDataset print("load", name) data = DglNodePropPredDataset(name=name, root=root) print("finish loading", name) splitted_idx = data.get_idx_split() graph, labels = data[0] labels = labels[:, 0] graph.ndata["features"] = graph.ndata.pop("feat") num_labels = len(th.unique(labels[th.logical_not(th.isnan(labels))])) graph.ndata["labels"] = labels.type(th.LongTensor) in_feats = graph.ndata["features"].shape[1] # Find the node IDs in the training, validation, and test set. train_nid, val_nid, test_nid = ( splitted_idx["train"], splitted_idx["valid"], splitted_idx["test"], ) train_mask = th.zeros((graph.number_of_nodes(),), dtype=th.bool) train_mask[train_nid] = True val_mask = th.zeros((graph.number_of_nodes(),), dtype=th.bool) val_mask[val_nid] = True test_mask = th.zeros((graph.number_of_nodes(),), dtype=th.bool) test_mask[test_nid] = True graph.ndata["train_mask"] = train_mask graph.ndata["val_mask"] = val_mask graph.ndata["test_mask"] = test_mask print("finish constructing", name) return graph, num_labels def load_dataset(dataset_name): multilabel = False if dataset_name in [ "reddit", "cora", "citeseer", "pubmed", "yelp", "flickr", ]: g, n_classes = load_dgl(dataset_name) multilabel = dataset_name in ["yelp"] if multilabel: g.ndata["labels"] = g.ndata["labels"].to(dtype=th.float32) elif dataset_name in [ "ogbn-products", "ogbn-arxiv", "ogbn-papers100M", "ogbn-mag240M", ]: g, n_classes = load_ogb(dataset_name) else: raise ValueError("unknown dataset") return g, n_classes, multilabel ================================================ FILE: examples/pytorch/labor/model.py ================================================ import dgl import dgl.nn as dglnn import sklearn.linear_model as lm import sklearn.metrics as skm import torch as th import torch.functional as F import torch.nn as nn import tqdm class SAGE(nn.Module): def __init__( self, in_feats, n_hidden, n_classes, n_layers, activation, dropout ): super().__init__() self.init(in_feats, n_hidden, n_classes, n_layers, activation, dropout) def init( self, in_feats, n_hidden, n_classes, n_layers, activation, dropout ): self.n_layers = n_layers self.n_hidden = n_hidden self.n_classes = n_classes self.layers = nn.ModuleList() if n_layers > 1: self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, "mean")) for i in range(1, n_layers - 1): self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, "mean")) self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, "mean")) else: self.layers.append(dglnn.SAGEConv(in_feats, n_classes, "mean")) self.dropout = nn.Dropout(dropout) self.activation = activation def forward(self, blocks, x): h = x for l, (layer, block) in enumerate(zip(self.layers, blocks)): h = layer( block, h, edge_weight=block.edata["edge_weights"] if "edge_weights" in block.edata else None, ) if l != len(self.layers) - 1: h = self.activation(h) h = self.dropout(h) return h def inference(self, g, device, batch_size, use_uva, num_workers): # The difference between this inference function and the one in the official # example is that the intermediate results can also benefit from prefetching. g.ndata["h"] = g.ndata["features"] sampler = dgl.dataloading.MultiLayerFullNeighborSampler( 1, prefetch_node_feats=["h"] ) pin_memory = g.device != device and use_uva dataloader = dgl.dataloading.DataLoader( g, th.arange(g.num_nodes(), dtype=g.idtype, device=g.device), sampler, device=device, batch_size=batch_size, shuffle=False, drop_last=False, use_uva=use_uva, num_workers=num_workers, persistent_workers=(num_workers > 0), ) self.eval() for l, layer in enumerate(self.layers): y = th.empty( g.num_nodes(), self.n_hidden if l != len(self.layers) - 1 else self.n_classes, dtype=g.ndata["h"].dtype, device=g.device, pin_memory=pin_memory, ) for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader): x = blocks[0].srcdata["h"] h = layer(blocks[0], x) if l < len(self.layers) - 1: h = self.activation(h) h = self.dropout(h) # by design, our output nodes are contiguous y[output_nodes[0].item() : output_nodes[-1].item() + 1] = h.to( y.device ) g.ndata["h"] = y return y ================================================ FILE: examples/pytorch/labor/train_lightning.py ================================================ # /*! # * Copyright (c) 2022, NVIDIA Corporation # * Copyright (c) 2022, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) # * All rights reserved. # * # * Licensed under the Apache License, Version 2.0 (the "License"); # * you may not use this file except in compliance with the License. # * You may obtain a copy of the License at # * # * http://www.apache.org/licenses/LICENSE-2.0 # * # * Unless required by applicable law or agreed to in writing, software # * distributed under the License is distributed on an "AS IS" BASIS, # * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # * See the License for the specific language governing permissions and # * limitations under the License. # * # * @file train_lightning.py # * @brief labor sampling example # */ import argparse import glob import math import os import time import dgl import torch as th import torch.nn as nn import torch.nn.functional as F from ladies_sampler import LadiesSampler, normalized_edata, PoissonLadiesSampler from load_graph import load_dataset from model import SAGE from pytorch_lightning import LightningDataModule, LightningModule, Trainer from pytorch_lightning.callbacks import Callback, EarlyStopping, ModelCheckpoint from pytorch_lightning.loggers import TensorBoardLogger from torchmetrics.classification import MulticlassF1Score, MultilabelF1Score class SAGELightning(LightningModule): def __init__( self, in_feats, n_hidden, n_classes, n_layers, activation, dropout, lr, multilabel, ): super().__init__() self.save_hyperparameters() self.module = SAGE( in_feats, n_hidden, n_classes, n_layers, activation, dropout ) self.lr = lr self.f1score_class = lambda: ( MulticlassF1Score if not multilabel else MultilabelF1Score )(n_classes, average="micro") self.train_acc = self.f1score_class() self.val_acc = self.f1score_class() self.num_steps = 0 self.cum_sampled_nodes = [0 for _ in range(n_layers + 1)] self.cum_sampled_edges = [0 for _ in range(n_layers)] self.w = 0.99 self.loss_fn = ( nn.CrossEntropyLoss() if not multilabel else nn.BCEWithLogitsLoss() ) self.pt = 0 def num_sampled_nodes(self, i): return ( self.cum_sampled_nodes[i] / self.num_steps if self.w >= 1 else self.cum_sampled_nodes[i] * (1 - self.w) / (1 - self.w**self.num_steps) ) def num_sampled_edges(self, i): return ( self.cum_sampled_edges[i] / self.num_steps if self.w >= 1 else self.cum_sampled_edges[i] * (1 - self.w) / (1 - self.w**self.num_steps) ) def training_step(self, batch, batch_idx): input_nodes, output_nodes, mfgs = batch mfgs = [mfg.int().to(device) for mfg in mfgs] self.num_steps += 1 for i, mfg in enumerate(mfgs): self.cum_sampled_nodes[i] = ( self.cum_sampled_nodes[i] * self.w + mfg.num_src_nodes() ) self.cum_sampled_edges[i] = ( self.cum_sampled_edges[i] * self.w + mfg.num_edges() ) self.log( "num_nodes/{}".format(i), self.num_sampled_nodes(i), prog_bar=True, on_step=True, on_epoch=False, ) self.log( "num_edges/{}".format(i), self.num_sampled_edges(i), prog_bar=True, on_step=True, on_epoch=False, ) # for batch size monitoring i = len(mfgs) self.cum_sampled_nodes[i] = ( self.cum_sampled_nodes[i] * self.w + mfgs[-1].num_dst_nodes() ) self.log( "num_nodes/{}".format(i), self.num_sampled_nodes(i), prog_bar=True, on_step=True, on_epoch=False, ) batch_inputs = mfgs[0].srcdata["features"] batch_labels = mfgs[-1].dstdata["labels"] self.st = time.time() batch_pred = self.module(mfgs, batch_inputs) loss = self.loss_fn(batch_pred, batch_labels) self.train_acc(batch_pred, batch_labels.int()) self.log( "train_acc", self.train_acc, prog_bar=True, on_step=True, on_epoch=True, batch_size=batch_labels.shape[0], ) self.log( "train_loss", loss, on_step=True, on_epoch=True, batch_size=batch_labels.shape[0], ) t = time.time() self.log( "iter_time", t - self.pt, prog_bar=True, on_step=True, on_epoch=False, ) self.pt = t return loss def on_train_batch_end(self, outputs, batch, batch_idx): self.log( "forward_backward_time", time.time() - self.st, prog_bar=True, on_step=True, on_epoch=False, ) def validation_step(self, batch, batch_idx, dataloader_idx=0): input_nodes, output_nodes, mfgs = batch mfgs = [mfg.int().to(device) for mfg in mfgs] batch_inputs = mfgs[0].srcdata["features"] batch_labels = mfgs[-1].dstdata["labels"] batch_pred = self.module(mfgs, batch_inputs) loss = self.loss_fn(batch_pred, batch_labels) self.val_acc(batch_pred, batch_labels.int()) self.log( "val_acc", self.val_acc, prog_bar=True, on_step=False, on_epoch=True, sync_dist=True, batch_size=batch_labels.shape[0], ) self.log( "val_loss", loss, on_step=False, on_epoch=True, sync_dist=True, batch_size=batch_labels.shape[0], ) def configure_optimizers(self): optimizer = th.optim.Adam(self.parameters(), lr=self.lr) return optimizer class DataModule(LightningDataModule): def __init__( self, dataset_name, undirected, data_cpu=False, use_uva=False, fan_out=[10, 25], lad_out=[11000, 5000], device=th.device("cpu"), batch_size=1000, num_workers=4, sampler="labor", importance_sampling=0, layer_dependency=False, batch_dependency=1, cache_size=0, ): super().__init__() g, n_classes, multilabel = load_dataset(dataset_name) if undirected: src, dst = g.all_edges() g.add_edges(dst, src) cast_to_int = max(g.num_nodes(), g.num_edges()) <= 2e9 if cast_to_int: g = g.int() train_nid = th.nonzero(g.ndata["train_mask"], as_tuple=True)[0] val_nid = th.nonzero(g.ndata["val_mask"], as_tuple=True)[0] test_nid = th.nonzero(g.ndata["test_mask"], as_tuple=True)[0] fanouts = [int(_) for _ in fan_out] ladouts = [int(_) for _ in lad_out] if sampler == "neighbor": sampler = dgl.dataloading.NeighborSampler( fanouts, prefetch_node_feats=["features"], prefetch_edge_feats=["etype"] if "etype" in g.edata else [], prefetch_labels=["labels"], ) elif "ladies" in sampler: g.edata["w"] = normalized_edata(g) sampler = ( PoissonLadiesSampler if "poisson" in sampler else LadiesSampler )(ladouts) else: sampler = dgl.dataloading.LaborSampler( fanouts, importance_sampling=importance_sampling, layer_dependency=layer_dependency, batch_dependency=batch_dependency, prefetch_node_feats=["features"], prefetch_edge_feats=["etype"] if "etype" in g.edata else [], prefetch_labels=["labels"], ) dataloader_device = th.device("cpu") g = g.formats(["csc"]) if use_uva or not data_cpu: train_nid = train_nid.to(device) val_nid = val_nid.to(device) test_nid = test_nid.to(device) if not data_cpu and not use_uva: g = g.to(device) dataloader_device = device self.g = g self.train_nid = train_nid.to(g.idtype) self.val_nid = val_nid.to(g.idtype) self.test_nid = test_nid.to(g.idtype) self.sampler = sampler self.device = dataloader_device self.use_uva = use_uva self.batch_size = batch_size self.num_workers = num_workers self.in_feats = g.ndata["features"].shape[1] self.n_classes = n_classes self.multilabel = multilabel self.gpu_cache_arg = {"node": {"features": cache_size}} def train_dataloader(self): return dgl.dataloading.DataLoader( self.g, self.train_nid, self.sampler, device=self.device, use_uva=self.use_uva, batch_size=self.batch_size, shuffle=True, drop_last=True, num_workers=self.num_workers, gpu_cache=self.gpu_cache_arg, ) def val_dataloader(self): return dgl.dataloading.DataLoader( self.g, self.val_nid, self.sampler, device=self.device, use_uva=self.use_uva, batch_size=self.batch_size, shuffle=False, drop_last=False, num_workers=self.num_workers, gpu_cache=self.gpu_cache_arg, ) class BatchSizeCallback(Callback): def __init__(self, limit, factor=3): super().__init__() self.limit = limit self.factor = factor self.clear() def clear(self): self.n = 0 self.m = 0 self.s = 0 def push(self, x): self.n += 1 m = self.m self.m += (x - m) / self.n self.s += (x - m) * (x - self.m) @property def var(self): return self.s / (self.n - 1) @property def std(self): return math.sqrt(self.var) def on_train_batch_start(self, trainer, datamodule, batch, batch_idx): input_nodes, output_nodes, mfgs = batch features = mfgs[0].srcdata["features"] if hasattr(features, "__cache_miss__"): trainer.strategy.model.log( "cache_miss", features.__cache_miss__, prog_bar=True, on_step=True, on_epoch=False, ) def on_train_batch_end( self, trainer, datamodule, outputs, batch, batch_idx ): input_nodes, output_nodes, mfgs = batch self.push(mfgs[0].num_src_nodes()) def on_train_epoch_end(self, trainer, datamodule): if ( self.limit > 0 and self.n >= 2 and abs(self.limit - self.m) * self.n >= self.std * self.factor ): trainer.datamodule.batch_size = int( trainer.datamodule.batch_size * self.limit / self.m ) loop = trainer._active_loop assert loop is not None loop._combined_loader = None loop.setup_data() self.clear() if __name__ == "__main__": argparser = argparse.ArgumentParser() argparser.add_argument( "--gpu", type=int, default=0 if th.cuda.is_available() else -1, help="GPU device ID. Use -1 for CPU training", ) argparser.add_argument("--dataset", type=str, default="reddit") argparser.add_argument("--num-epochs", type=int, default=-1) argparser.add_argument("--num-steps", type=int, default=-1) argparser.add_argument("--min-steps", type=int, default=0) argparser.add_argument("--num-hidden", type=int, default=256) argparser.add_argument("--num-layers", type=int, default=3) argparser.add_argument("--fan-out", type=str, default="10,10,10") argparser.add_argument("--lad-out", type=str, default="16000,11000,5000") argparser.add_argument("--batch-size", type=int, default=1024) argparser.add_argument("--lr", type=float, default=0.001) argparser.add_argument("--dropout", type=float, default=0.5) argparser.add_argument( "--num-workers", type=int, default=0, help="Number of sampling processes. Use 0 for no extra process.", ) argparser.add_argument( "--data-cpu", action="store_true", help="By default the script puts the node features and labels " "on GPU when using it to save time for data copy. This may " "be undesired if they cannot fit in GPU memory at once. " "This flag disables that.", ) argparser.add_argument( "--sampler", type=str, default="labor", choices=["neighbor", "labor", "ladies", "poisson-ladies"], ) argparser.add_argument("--importance-sampling", type=int, default=0) argparser.add_argument("--layer-dependency", action="store_true") argparser.add_argument("--batch-dependency", type=int, default=1) argparser.add_argument("--logdir", type=str, default="tb_logs") argparser.add_argument("--vertex-limit", type=int, default=-1) argparser.add_argument("--use-uva", action="store_true") argparser.add_argument("--cache-size", type=int, default=0) argparser.add_argument("--undirected", action="store_true") argparser.add_argument("--val-acc-target", type=float, default=1) argparser.add_argument("--early-stopping-patience", type=int, default=10) argparser.add_argument("--disable-checkpoint", action="store_true") argparser.add_argument("--precision", type=str, default="highest") args = argparser.parse_args() if args.precision != "highest": th.set_float32_matmul_precision(args.precision) if args.gpu >= 0: device = th.device("cuda:%d" % args.gpu) else: device = th.device("cpu") datamodule = DataModule( args.dataset, args.undirected, args.data_cpu, args.use_uva, [int(_) for _ in args.fan_out.split(",")], [int(_) for _ in args.lad_out.split(",")], device, args.batch_size, args.num_workers, args.sampler, args.importance_sampling, args.layer_dependency, args.batch_dependency, args.cache_size, ) model = SAGELightning( datamodule.in_feats, args.num_hidden, datamodule.n_classes, args.num_layers, F.relu, args.dropout, args.lr, datamodule.multilabel, ) # Train callbacks = [] if not args.disable_checkpoint: callbacks.append( ModelCheckpoint(monitor="val_acc", save_top_k=1, mode="max") ) callbacks.append(BatchSizeCallback(args.vertex_limit)) callbacks.append( EarlyStopping( monitor="val_acc", stopping_threshold=args.val_acc_target, mode="max", patience=args.early_stopping_patience, ) ) subdir = "{}_{}_{}_{}_{}".format( args.dataset, args.sampler, args.importance_sampling, args.layer_dependency, args.batch_dependency, ) logger = TensorBoardLogger(args.logdir, name=subdir) trainer = Trainer( accelerator="gpu" if args.gpu != -1 else "cpu", devices=[args.gpu] if args.gpu != -1 else "auto", max_epochs=args.num_epochs, max_steps=args.num_steps, min_steps=args.min_steps, callbacks=callbacks, logger=logger, ) trainer.fit(model, datamodule=datamodule) # Test if not args.disable_checkpoint: logdir = os.path.join(args.logdir, subdir) dirs = glob.glob("./{}/*".format(logdir)) version = max([int(os.path.split(x)[-1].split("_")[-1]) for x in dirs]) logdir = "./{}/version_{}".format(logdir, version) print("Evaluating model in", logdir) ckpt = glob.glob(os.path.join(logdir, "checkpoints", "*"))[0] model = SAGELightning.load_from_checkpoint( checkpoint_path=ckpt, hparams_file=os.path.join(logdir, "hparams.yaml"), ).to(device) with th.no_grad(): graph = datamodule.g pred = model.module.inference( graph, f"cuda:{args.gpu}" if args.gpu != -1 else "cpu", 4096, args.use_uva, args.num_workers, ) for nid, split_name in zip( [datamodule.train_nid, datamodule.val_nid, datamodule.test_nid], ["Train", "Validation", "Test"], ): nid = nid.to(pred.device).long() pred_nid = pred[nid] label = graph.ndata["labels"][nid] f1score = model.f1score_class().to(pred.device) acc = f1score(pred_nid, label) print(f"{split_name} accuracy: {acc.item()}") ================================================ FILE: examples/pytorch/lda/README.md ================================================ Latent Dirichlet Allocation === LDA is a classical algorithm for probabilistic graphical models. It assumes hierarchical Bayes models with discrete variables on sparse doc/word graphs. This example shows how it can be done on DGL, where the corpus is represented as a bipartite multi-graph G. There is no back-propagation, because gradient descent is typically considered inefficient on probability simplex. On the provided small-scale example on 20 news groups dataset, our DGL-LDA model runs 50% faster on GPU than sklearn model without joblib parallel. For larger graphs, thanks to subgraph sampling and low-memory implementation, we may fit 100 million unique words with 256 topic dimensions on a large multi-gpu machine. (The runtime memory is often less than 2x of parameter storage.) Key equations --- Let k be the topic index variable with one-hot encoded vector representation z. The rest of the variables are: | | z_d\~p(θ_d) | w_k\~p(β_k) | z_dw\~q(ϕ_dw) | |-------------|-------------|-------------|---------------| | Prior | Dir(α) | Dir(η) | (n/a) | | Posterior | Dir(γ_d) | Dir(λ_k) | (n/a) | We overload w with bold-symbol-w, which represents the entire observed document-world multi-graph. The difference is better shown in the original paper. **Multinomial PCA** Multinomial PCA is a "latent allocation" model without the "Dirichlet". Its data likelihood sums over the latent topic-index variable k, , where θ_d and β_k are shared within the same document and topic, respectively. If we perform gradient descent, we may need additional steps to project the parameters to the probability simplices: and . Instead, a more efficient solution is to borrow ideas from evidence lower-bound (ELBO) decomposition: The solutions for and follow from the maximization of cross-entropy loss. The solution for follows from Kullback-Leibler divergence. After normalizing to , the difference becomes constant in k, which is connected to the likelihood for the observed document-word pairs. Note that after learning, the document vector θ_d considers the correlation between all words in d and similarly the topic distribution vector β_k considers the correlations in all observed documents. **Variational Bayes** A Bayesian model adds Dirichlet priors to θ_d and β_z, which leads to a similar ELBO if we assume independence , i.e.: **Solutions** The solutions to VB subsumes the solutions to multinomial PCA when n goes to infinity. The solution for ϕ is , where the additional expectation can be expressed via digamma functions and is the log-partition function. The solutions for and come from direct gradient calculation. After substituting the optimal solutions, we compute the marginal likelihood by adding the three terms, which are all connected to (the negative of) Kullback-Leibler divergence. DGL usage --- The corpus is represented as a bipartite multi-graph G. We use DGL to propagate information through the edges and aggregate the distributions at doc/word nodes. For scalability, the phi variables are transient and updated during message passing. The gamma / lambda variables are updated after the nodes receive all edge messages. Following the conventions in [1], the gamma update is called E-step and the lambda update is called M-step. The lambda variable is further recorded by the trainer. A separate function is used to produce perplexity, which is based on the ELBO objective function divided by the total numbers of word/doc occurrences. Example --- `%run example_20newsgroups.py` * Approximately matches scikit-learn training perplexity after 10 rounds of training. * Exactly matches scikit-learn training perplexity if word_z is set to lda.components_.T * There is a difference in how we compute testing perplexity. We weigh the beta contributions by the training word counts, whereas sklearn weighs them by test word counts. * The DGL-LDA model runs 50% faster on GPU devices compared with sklearn without joblib parallel. Advanced configurations --- * Set `0 # * Lars Buitinck # * Chyi-Kwei Yau # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import warnings from time import time import dgl import matplotlib.pyplot as plt import numpy as np import scipy.sparse as ss import torch from dgl import function as fn from lda_model import LatentDirichletAllocation as LDAModel from sklearn.datasets import fetch_20newsgroups from sklearn.decomposition import LatentDirichletAllocation, NMF from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer n_samples = 2000 n_features = 1000 n_components = 10 n_top_words = 20 device = "cuda" def plot_top_words(model, feature_names, n_top_words, title): fig, axes = plt.subplots(2, 5, figsize=(30, 15), sharex=True) axes = axes.flatten() for topic_idx, topic in enumerate(model.components_): top_features_ind = topic.argsort()[: -n_top_words - 1 : -1] top_features = [feature_names[i] for i in top_features_ind] weights = topic[top_features_ind] ax = axes[topic_idx] ax.barh(top_features, weights, height=0.7) ax.set_title(f"Topic {topic_idx +1}", fontdict={"fontsize": 30}) ax.invert_yaxis() ax.tick_params(axis="both", which="major", labelsize=20) for i in "top right left".split(): ax.spines[i].set_visible(False) fig.suptitle(title, fontsize=40) plt.subplots_adjust(top=0.90, bottom=0.05, wspace=0.90, hspace=0.3) plt.show() # Load the 20 newsgroups dataset and vectorize it. We use a few heuristics # to filter out useless terms early on: the posts are stripped of headers, # footers and quoted replies, and common English words, words occurring in # only one document or in at least 95% of the documents are removed. print("Loading dataset...") t0 = time() data, _ = fetch_20newsgroups( shuffle=True, random_state=1, remove=("headers", "footers", "quotes"), return_X_y=True, ) data_samples = data[:n_samples] data_test = data[n_samples : 2 * n_samples] print("done in %0.3fs." % (time() - t0)) # Use tf (raw term count) features for LDA. print("Extracting tf features for LDA...") tf_vectorizer = CountVectorizer( max_df=0.95, min_df=2, max_features=n_features, stop_words="english" ) t0 = time() tf_vectorizer.fit(data) tf = tf_vectorizer.transform(data_samples) tt = tf_vectorizer.transform(data_test) tf_feature_names = tf_vectorizer.get_feature_names() tf_uv = [ (u, v) for u, v, e in zip(tf.tocoo().row, tf.tocoo().col, tf.tocoo().data) for _ in range(e) ] tt_uv = [ (u, v) for u, v, e in zip(tt.tocoo().row, tt.tocoo().col, tt.tocoo().data) for _ in range(e) ] print("done in %0.3fs." % (time() - t0)) print() print("Preparing dgl graphs...") t0 = time() G = dgl.heterograph({("doc", "topic", "word"): tf_uv}, device=device) Gt = dgl.heterograph({("doc", "topic", "word"): tt_uv}, device=device) print("done in %0.3fs." % (time() - t0)) print() print("Training dgl-lda model...") t0 = time() model = LDAModel(G.num_nodes("word"), n_components) model.fit(G) print("done in %0.3fs." % (time() - t0)) print() print(f"dgl-lda training perplexity {model.perplexity(G):.3f}") print(f"dgl-lda testing perplexity {model.perplexity(Gt):.3f}") word_nphi = np.vstack([nphi.tolist() for nphi in model.word_data.nphi]) plot_top_words( type("dummy", (object,), {"components_": word_nphi}), tf_feature_names, n_top_words, "Topics in LDA model", ) print("Training scikit-learn model...") print( "\n" * 2, "Fitting LDA models with tf features, " "n_samples=%d and n_features=%d..." % (n_samples, n_features), ) lda = LatentDirichletAllocation( n_components=n_components, max_iter=5, learning_method="online", learning_offset=50.0, random_state=0, verbose=1, ) t0 = time() lda.fit(tf) print("done in %0.3fs." % (time() - t0)) print() print(f"scikit-learn training perplexity {lda.perplexity(tf):.3f}") print(f"scikit-learn testing perplexity {lda.perplexity(tt):.3f}") ================================================ FILE: examples/pytorch/lda/lda_model.py ================================================ # Copyright 2021 Yifei Ma # with references from "sklearn.decomposition.LatentDirichletAllocation" # with the following original authors: # * Chyi-Kwei Yau (the said scikit-learn implementation) # * Matthew D. Hoffman (original onlineldavb implementation) # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import collections import functools import io import os import warnings import dgl import numpy as np import scipy as sp import torch try: from functools import cached_property except ImportError: try: from backports.cached_property import cached_property except ImportError: warnings.warn("cached_property not found - using property instead") cached_property = property class EdgeData: def __init__(self, src_data, dst_data): self.src_data = src_data self.dst_data = dst_data @property def loglike(self): return (self.src_data["Elog"] + self.dst_data["Elog"]).logsumexp(1) @property def phi(self): return ( self.src_data["Elog"] + self.dst_data["Elog"] - self.loglike.unsqueeze(1) ).exp() @property def expectation(self): return ( self.src_data["expectation"] * self.dst_data["expectation"] ).sum(1) class _Dirichlet: def __init__(self, prior, nphi, _chunksize=int(1e6)): self.prior = prior self.nphi = nphi self.device = nphi.device self._sum_by_parts = lambda map_fn: functools.reduce( torch.add, [ map_fn(slice(i, min(i + _chunksize, nphi.shape[1]))).sum(1) for i in list(range(0, nphi.shape[1], _chunksize)) ], ) def _posterior(self, _ID=slice(None)): return self.prior + self.nphi[:, _ID] @cached_property def posterior_sum(self): return self.nphi.sum(1) + self.prior * self.nphi.shape[1] def _Elog(self, _ID=slice(None)): return torch.digamma(self._posterior(_ID)) - torch.digamma( self.posterior_sum.unsqueeze(1) ) @cached_property def loglike(self): neg_evid = -self._sum_by_parts( lambda s: (self.nphi[:, s] * self._Elog(s)) ) prior = torch.as_tensor(self.prior).to(self.nphi) K = self.nphi.shape[1] log_B_prior = torch.lgamma(prior) * K - torch.lgamma(prior * K) log_B_posterior = self._sum_by_parts( lambda s: torch.lgamma(self._posterior(s)) ) - torch.lgamma(self.posterior_sum) return neg_evid - log_B_prior + log_B_posterior @cached_property def n(self): return self.nphi.sum(1) @cached_property def cdf(self): cdf = self._posterior() torch.cumsum(cdf, 1, out=cdf) cdf /= cdf[:, -1:].clone() return cdf def _expectation(self, _ID=slice(None)): expectation = self._posterior(_ID) expectation /= self.posterior_sum.unsqueeze(1) return expectation @cached_property def Bayesian_gap(self): return 1.0 - self._sum_by_parts(lambda s: self._Elog(s).exp()) _cached_properties = [ "posterior_sum", "loglike", "n", "cdf", "Bayesian_gap", ] def clear_cache(self): for name in self._cached_properties: try: delattr(self, name) except AttributeError: pass def update(self, new, _ID=slice(None), rho=1): """inplace: old * (1-rho) + new * rho""" self.clear_cache() mean_change = (self.nphi[:, _ID] - new).abs().mean().tolist() self.nphi *= 1 - rho self.nphi[:, _ID] += new * rho return mean_change class DocData(_Dirichlet): """nphi (n_docs by n_topics)""" def prepare_graph(self, G, key="Elog"): G.nodes["doc"].data[key] = getattr(self, "_" + key)().to(G.device) def update_from(self, G, mult): new = G.nodes["doc"].data["nphi"] * mult return self.update(new.to(self.device)) class _Distributed(collections.UserList): """split on dim=0 and store on multiple devices""" def __init__(self, prior, nphi): self.prior = prior self.nphi = nphi super().__init__([_Dirichlet(self.prior, nphi) for nphi in self.nphi]) def split_device(self, other, dim=0): split_sections = [x.shape[0] for x in self.nphi] out = torch.split(other, split_sections, dim) return [y.to(x.device) for x, y in zip(self.nphi, out)] class WordData(_Distributed): """distributed nphi (n_topics by n_words), transpose to/from graph nodes data""" def prepare_graph(self, G, key="Elog"): if "_ID" in G.nodes["word"].data: _ID = G.nodes["word"].data["_ID"] else: _ID = slice(None) out = [getattr(part, "_" + key)(_ID).to(G.device) for part in self] G.nodes["word"].data[key] = torch.cat(out).T def update_from(self, G, mult, rho): nphi = G.nodes["word"].data["nphi"].T * mult if "_ID" in G.nodes["word"].data: _ID = G.nodes["word"].data["_ID"] else: _ID = slice(None) mean_change = [ x.update(y, _ID, rho) for x, y in zip(self, self.split_device(nphi)) ] return np.mean(mean_change) class Gamma(collections.namedtuple("Gamma", "concentration, rate")): """articulate the difference between torch gamma and numpy gamma""" @property def shape(self): return self.concentration @property def scale(self): return 1 / self.rate def sample(self, shape, device): return torch.distributions.gamma.Gamma( torch.as_tensor(self.concentration, device=device), torch.as_tensor(self.rate, device=device), ).sample(shape) class LatentDirichletAllocation: """LDA model that works with a HeteroGraph with doc->word meta paths. The model alters the attributes of G arbitrarily. This is inspired by [1] and its corresponding scikit-learn implementation. Inputs --- * G: a template graph or an integer showing n_words * n_components: latent feature dimension; automatically set priors if missing. * prior: parameters in the Dirichlet prior; default to 1/n_components and 1/n_words * rho: new_nphi = (1-rho)*old_nphi + rho*nphi; default to 1 for full gradients. * mult: multiplier for nphi-update; a large value effectively disables prior. * init: sklearn initializers (100.0, 100.0); the sample points concentrate around 1.0 * device_list: accelerate word_data updates. Notes --- Some differences between this and sklearn.decomposition.LatentDirichletAllocation: * default word perplexity is normalized by training set instead of testing set. References --- [1] Matthew Hoffman, Francis Bach, David Blei. Online Learning for Latent Dirichlet Allocation. Advances in Neural Information Processing Systems 23 (NIPS 2010). [2] Reactive LDA Library blogpost by Yingjie Miao for a similar Gibbs model """ def __init__( self, n_words, n_components, prior=None, rho=1, mult={"doc": 1, "word": 1}, init={"doc": (100.0, 100.0), "word": (100.0, 100.0)}, device_list=["cpu"], verbose=True, ): self.n_words = n_words self.n_components = n_components if prior is None: prior = {"doc": 1.0 / n_components, "word": 1.0 / n_components} self.prior = prior self.rho = rho self.mult = mult self.init = init assert not isinstance(device_list, str), "plz wrap devices in a list" self.device_list = device_list[:n_components] # avoid edge cases self.verbose = verbose self._init_word_data() def _init_word_data(self): split_sections = np.diff( np.linspace(0, self.n_components, len(self.device_list) + 1).astype( int ) ) word_nphi = [ Gamma(*self.init["word"]).sample((s, self.n_words), device) for s, device in zip(split_sections, self.device_list) ] self.word_data = WordData(self.prior["word"], word_nphi) def _init_doc_data(self, n_docs, device): doc_nphi = Gamma(*self.init["doc"]).sample( (n_docs, self.n_components), device ) return DocData(self.prior["doc"], doc_nphi) def save(self, f): for w in self.word_data: w.clear_cache() torch.save( { "prior": self.prior, "rho": self.rho, "mult": self.mult, "init": self.init, "word_data": [part.nphi for part in self.word_data], }, f, ) def _prepare_graph(self, G, doc_data, key="Elog"): doc_data.prepare_graph(G, key) self.word_data.prepare_graph(G, key) def _e_step(self, G, doc_data=None, mean_change_tol=1e-3, max_iters=100): """_e_step implements doc data sampling until convergence or max_iters""" if doc_data is None: doc_data = self._init_doc_data(G.num_nodes("doc"), G.device) G_rev = G.reverse() # word -> doc self.word_data.prepare_graph(G_rev) for i in range(max_iters): doc_data.prepare_graph(G_rev) G_rev.update_all( lambda edges: {"phi": EdgeData(edges.src, edges.dst).phi}, dgl.function.sum("phi", "nphi"), ) mean_change = doc_data.update_from(G_rev, self.mult["doc"]) if mean_change < mean_change_tol: break if self.verbose: print( f"e-step num_iters={i+1} with mean_change={mean_change:.4f}, " f"perplexity={self.perplexity(G, doc_data):.4f}" ) return doc_data transform = _e_step def predict(self, doc_data): pred_scores = [ # d_exp @ w._expectation() (lambda x: x @ w.nphi + x.sum(1, keepdims=True) * w.prior)( d_exp / w.posterior_sum.unsqueeze(0) ) for (d_exp, w) in zip( self.word_data.split_device(doc_data._expectation(), dim=1), self.word_data, ) ] x = torch.zeros_like(pred_scores[0], device=doc_data.device) for p in pred_scores: x += p.to(x.device) return x def sample(self, doc_data, num_samples): """draw independent words and return the marginal probabilities, i.e., the expectations in Dirichlet distributions. """ def fn(cdf): u = torch.rand(cdf.shape[0], num_samples, device=cdf.device) return torch.searchsorted(cdf, u).to(doc_data.device) topic_ids = fn(doc_data.cdf) word_ids = torch.cat([fn(part.cdf) for part in self.word_data]) ids = torch.gather( word_ids, 0, topic_ids ) # pick components by topic_ids # compute expectation scores on sampled ids src_ids = ( torch.arange(ids.shape[0], dtype=ids.dtype, device=ids.device) .reshape((-1, 1)) .expand(ids.shape) ) unique_ids, inverse_ids = torch.unique( ids, sorted=False, return_inverse=True ) G = dgl.heterograph( {("doc", "", "word"): (src_ids.ravel(), inverse_ids.ravel())} ) G.nodes["word"].data["_ID"] = unique_ids self._prepare_graph(G, doc_data, "expectation") G.apply_edges( lambda e: {"expectation": EdgeData(e.src, e.dst).expectation} ) expectation = G.edata.pop("expectation").reshape(ids.shape) return ids, expectation def _m_step(self, G, doc_data): """_m_step implements word data sampling and stores word_z stats. mean_change is in the sense of full graph with rho=1. """ G = G.clone() self._prepare_graph(G, doc_data) G.update_all( lambda edges: {"phi": EdgeData(edges.src, edges.dst).phi}, dgl.function.sum("phi", "nphi"), ) self._last_mean_change = self.word_data.update_from( G, self.mult["word"], self.rho ) if self.verbose: print(f"m-step mean_change={self._last_mean_change:.4f}, ", end="") Bayesian_gap = np.mean( [part.Bayesian_gap.mean().tolist() for part in self.word_data] ) print(f"Bayesian_gap={Bayesian_gap:.4f}") def partial_fit(self, G): doc_data = self._e_step(G) self._m_step(G, doc_data) return self def fit(self, G, mean_change_tol=1e-3, max_epochs=10): for i in range(max_epochs): if self.verbose: print(f"epoch {i+1}, ", end="") self.partial_fit(G) if self._last_mean_change < mean_change_tol: break return self def perplexity(self, G, doc_data=None): """ppl = exp{-sum[log(p(w1,...,wn|d))] / n} Follows Eq (15) in Hoffman et al., 2010. """ if doc_data is None: doc_data = self._e_step(G) # compute E[log p(docs | theta, beta)] G = G.clone() self._prepare_graph(G, doc_data) G.apply_edges( lambda edges: {"loglike": EdgeData(edges.src, edges.dst).loglike} ) edge_elbo = (G.edata["loglike"].sum() / G.num_edges()).tolist() if self.verbose: print(f"neg_elbo phi: {-edge_elbo:.3f}", end=" ") # compute E[log p(theta | alpha) - log q(theta | gamma)] doc_elbo = (doc_data.loglike.sum() / doc_data.n.sum()).tolist() if self.verbose: print(f"theta: {-doc_elbo:.3f}", end=" ") # compute E[log p(beta | eta) - log q(beta | lambda)] # The denominator n for extrapolation perplexity is undefined. # We use the train set, whereas sklearn uses the test set. word_elbo = sum( [part.loglike.sum().tolist() for part in self.word_data] ) / sum([part.n.sum().tolist() for part in self.word_data]) if self.verbose: print(f"beta: {-word_elbo:.3f}") ppl = np.exp(-edge_elbo - doc_elbo - word_elbo) if G.num_edges() > 0 and np.isnan(ppl): warnings.warn("numerical issue in perplexity") return ppl def doc_subgraph(G, doc_ids): sampler = dgl.dataloading.MultiLayerFullNeighborSampler(1) _, _, (block,) = sampler.sample( G.reverse(), {"doc": torch.as_tensor(doc_ids)} ) B = dgl.DGLGraph( block._graph, ["_", "word", "doc", "_"], block.etypes ).reverse() B.nodes["word"].data["_ID"] = block.nodes["word"].data["_ID"] return B if __name__ == "__main__": print("Testing LatentDirichletAllocation ...") G = dgl.heterograph( {("doc", "", "word"): [(0, 0), (1, 3)]}, {"doc": 2, "word": 5} ) model = LatentDirichletAllocation(n_words=5, n_components=10, verbose=False) model.fit(G) model.transform(G) model.predict(model.transform(G)) if hasattr(torch, "searchsorted"): model.sample(model.transform(G), 3) model.perplexity(G) for doc_id in range(2): B = doc_subgraph(G, [doc_id]) model.partial_fit(B) with io.BytesIO() as f: model.save(f) f.seek(0) print(torch.load(f, weights_only=False)) print("Testing LatentDirichletAllocation passed!") ================================================ FILE: examples/pytorch/line_graph/README.md ================================================ Community Detection with Graph Neural Networks (CDGNN) ============ Paper link: [https://openreview.net/pdf?id=H1g0Z3A9Fm](https://openreview.net/pdf?id=H1g0Z3A9Fm) Author's code repo: [https://github.com/zhengdao-chen/GNN4CD](https://github.com/zhengdao-chen/GNN4CD) This folder contains a DGL implementation of the CDGNN model. Dependencies -------------- * PyTorch 0.4.1+ * requests ```bash pip install torch requests ``` How to run ---------- An experiment on the Stochastic Block Model in default settings can be run with ```bash python3 train.py ``` An experiment on the Stochastic Block Model in customized settings can be run with ```bash python3 train.py --batch-size BATCH_SIZE --gpu GPU --n-communities N_COMMUNITIES \ --n-features N_FEATURES --n-graphs N_GRAPH --n-iterations N_ITERATIONS \ --n-layers N_LAYER --n-nodes N_NODE --model-path MODEL_PATH --radius RADIUS ``` ================================================ FILE: examples/pytorch/line_graph/gnn.py ================================================ import copy import itertools import dgl import dgl.function as fn import networkx as nx import numpy as np import torch as th import torch.nn as nn import torch.nn.functional as F class GNNModule(nn.Module): def __init__(self, in_feats, out_feats, radius): super().__init__() self.out_feats = out_feats self.radius = radius new_linear = lambda: nn.Linear(in_feats, out_feats) new_linear_list = lambda: nn.ModuleList( [new_linear() for i in range(radius)] ) self.theta_x, self.theta_deg, self.theta_y = ( new_linear(), new_linear(), new_linear(), ) self.theta_list = new_linear_list() self.gamma_y, self.gamma_deg, self.gamma_x = ( new_linear(), new_linear(), new_linear(), ) self.gamma_list = new_linear_list() self.bn_x = nn.BatchNorm1d(out_feats) self.bn_y = nn.BatchNorm1d(out_feats) def aggregate(self, g, z): z_list = [] g.ndata["z"] = z g.update_all(fn.copy_u(u="z", out="m"), fn.sum(msg="m", out="z")) z_list.append(g.ndata["z"]) for i in range(self.radius - 1): for j in range(2**i): g.update_all( fn.copy_u(u="z", out="m"), fn.sum(msg="m", out="z") ) z_list.append(g.ndata["z"]) return z_list def forward(self, g, lg, x, y, deg_g, deg_lg, pm_pd): pmpd_x = F.embedding(pm_pd, x) sum_x = sum( theta(z) for theta, z in zip(self.theta_list, self.aggregate(g, x)) ) g.edata["y"] = y g.update_all(fn.copy_e(e="y", out="m"), fn.sum("m", "pmpd_y")) pmpd_y = g.ndata.pop("pmpd_y") x = ( self.theta_x(x) + self.theta_deg(deg_g * x) + sum_x + self.theta_y(pmpd_y) ) n = self.out_feats // 2 x = th.cat([x[:, :n], F.relu(x[:, n:])], 1) x = self.bn_x(x) sum_y = sum( gamma(z) for gamma, z in zip(self.gamma_list, self.aggregate(lg, y)) ) y = ( self.gamma_y(y) + self.gamma_deg(deg_lg * y) + sum_y + self.gamma_x(pmpd_x) ) y = th.cat([y[:, :n], F.relu(y[:, n:])], 1) y = self.bn_y(y) return x, y class GNN(nn.Module): def __init__(self, feats, radius, n_classes): super(GNN, self).__init__() self.linear = nn.Linear(feats[-1], n_classes) self.module_list = nn.ModuleList( [GNNModule(m, n, radius) for m, n in zip(feats[:-1], feats[1:])] ) def forward(self, g, lg, deg_g, deg_lg, pm_pd): x, y = deg_g, deg_lg for module in self.module_list: x, y = module(g, lg, x, y, deg_g, deg_lg, pm_pd) return self.linear(x) ================================================ FILE: examples/pytorch/line_graph/train.py ================================================ """ Supervised Community Detection with Hierarchical Graph Neural Networks https://arxiv.org/abs/1705.08415 Author's implementation: https://github.com/joanbruna/GNN_community """ from __future__ import division import argparse import time from itertools import permutations import gnn import numpy as np import torch as th import torch.nn.functional as F import torch.optim as optim from dgl.data import SBMMixtureDataset from torch.utils.data import DataLoader parser = argparse.ArgumentParser() parser.add_argument("--batch-size", type=int, help="Batch size", default=1) parser.add_argument("--gpu", type=int, help="GPU index", default=-1) parser.add_argument("--lr", type=float, help="Learning rate", default=0.001) parser.add_argument( "--n-communities", type=int, help="Number of communities", default=2 ) parser.add_argument( "--n-epochs", type=int, help="Number of epochs", default=100 ) parser.add_argument( "--n-features", type=int, help="Number of features", default=16 ) parser.add_argument("--n-graphs", type=int, help="Number of graphs", default=10) parser.add_argument("--n-layers", type=int, help="Number of layers", default=30) parser.add_argument( "--n-nodes", type=int, help="Number of nodes", default=10000 ) parser.add_argument("--optim", type=str, help="Optimizer", default="Adam") parser.add_argument("--radius", type=int, help="Radius", default=3) parser.add_argument("--verbose", action="store_true") args = parser.parse_args() dev = th.device("cpu") if args.gpu < 0 else th.device("cuda:%d" % args.gpu) K = args.n_communities training_dataset = SBMMixtureDataset(args.n_graphs, args.n_nodes, K) training_loader = DataLoader( training_dataset, args.batch_size, collate_fn=training_dataset.collate_fn, drop_last=True, ) ones = th.ones(args.n_nodes // K) y_list = [ th.cat([x * ones for x in p]).long().to(dev) for p in permutations(range(K)) ] feats = [1] + [args.n_features] * args.n_layers + [K] model = gnn.GNN(feats, args.radius, K).to(dev) optimizer = getattr(optim, args.optim)(model.parameters(), lr=args.lr) def compute_overlap(z_list): ybar_list = [th.max(z, 1)[1] for z in z_list] overlap_list = [] for y_bar in ybar_list: accuracy = max(th.sum(y_bar == y).item() for y in y_list) / args.n_nodes overlap = (accuracy - 1 / K) / (1 - 1 / K) overlap_list.append(overlap) return sum(overlap_list) / len(overlap_list) def from_np(f, *args): def wrap(*args): new = [ th.from_numpy(x) if isinstance(x, np.ndarray) else x for x in args ] return f(*new) return wrap @from_np def step(i, j, g, lg, deg_g, deg_lg, pm_pd): """One step of training.""" g = g.to(dev) lg = lg.to(dev) deg_g = deg_g.to(dev).unsqueeze(1) deg_lg = deg_lg.to(dev).unsqueeze(1) pm_pd = pm_pd.to(dev) t0 = time.time() z = model(g, lg, deg_g, deg_lg, pm_pd) t_forward = time.time() - t0 z_list = th.chunk(z, args.batch_size, 0) loss = ( sum(min(F.cross_entropy(z, y) for y in y_list) for z in z_list) / args.batch_size ) overlap = compute_overlap(z_list) optimizer.zero_grad() t0 = time.time() loss.backward() t_backward = time.time() - t0 optimizer.step() return loss, overlap, t_forward, t_backward @from_np def inference(g, lg, deg_g, deg_lg, pm_pd): g = g.to(dev) lg = lg.to(dev) deg_g = deg_g.to(dev).unsqueeze(1) deg_lg = deg_lg.to(dev).unsqueeze(1) pm_pd = pm_pd.to(dev) z = model(g, lg, deg_g, deg_lg, pm_pd) return z def test(): p_list = [6, 5.5, 5, 4.5, 1.5, 1, 0.5, 0] q_list = [0, 0.5, 1, 1.5, 4.5, 5, 5.5, 6] N = 1 overlap_list = [] for p, q in zip(p_list, q_list): dataset = SBMMixtureDataset(N, args.n_nodes, K, pq=[[p, q]] * N) loader = DataLoader(dataset, N, collate_fn=dataset.collate_fn) g, lg, deg_g, deg_lg, pm_pd = next(iter(loader)) z = inference(g, lg, deg_g, deg_lg, pm_pd) overlap_list.append(compute_overlap(th.chunk(z, N, 0))) return overlap_list n_iterations = args.n_graphs // args.batch_size for i in range(args.n_epochs): total_loss, total_overlap, s_forward, s_backward = 0, 0, 0, 0 for j, [g, lg, deg_g, deg_lg, pm_pd] in enumerate(training_loader): loss, overlap, t_forward, t_backward = step( i, j, g, lg, deg_g, deg_lg, pm_pd ) total_loss += loss total_overlap += overlap s_forward += t_forward s_backward += t_backward epoch = "0" * (len(str(args.n_epochs)) - len(str(i))) iteration = "0" * (len(str(n_iterations)) - len(str(j))) if args.verbose: print( "[epoch %s%d iteration %s%d]loss %.3f | overlap %.3f" % (epoch, i, iteration, j, loss, overlap) ) epoch = "0" * (len(str(args.n_epochs)) - len(str(i))) loss = total_loss / (j + 1) overlap = total_overlap / (j + 1) t_forward = s_forward / (j + 1) t_backward = s_backward / (j + 1) print( "[epoch %s%d]loss %.3f | overlap %.3f | forward time %.3fs | backward time %.3fs" % (epoch, i, loss, overlap, t_forward, t_backward) ) overlap_list = test() overlap_str = " - ".join(["%.3f" % overlap for overlap in overlap_list]) print("[epoch %s%d]overlap: %s" % (epoch, i, overlap_str)) ================================================ FILE: examples/pytorch/metapath2vec/README.md ================================================ Metapath2vec ============ - Paper link: [metapath2vec: Scalable Representation Learning for Heterogeneous Networks](https://ericdongyx.github.io/papers/KDD17-dong-chawla-swami-metapath2vec.pdf) - Author's code repo: [https://ericdongyx.github.io/metapath2vec/m2v.html](https://ericdongyx.github.io/metapath2vec/m2v.html). Dependencies ------------ - PyTorch 1.0.1+ How to run the code ----- Run with either of the following procedures: * Running with default AMiner dataset: 1. Directly run the following command: ```bash python metapath2vec.py --aminer --path "where/you/want/to/download" --output_file "your_model_output_path" ``` * Running with another AMiner-like dataset 1. Prepare the data in the same format as the ones of AMiner and DBIS in Section B of [Author's code repo](https://ericdongyx.github.io/metapath2vec/m2v.html). 2. Run `sampler.py` on your graph dataset with, for instance, ```bash python sampler.py net_dbis ``` 3. Run the following command: ```bash python metapath2vec.py --path net_dbis/output_path.txt --output_file "your_model_output_path" ``` Tips: Change num_workers based on your GPU instances; Running 3 or 4 epochs is actually enough. Tricks included in the implementation: ------- 1, Sub-sampling; 2, Negative Sampling without repeatedly calling numpy random choices; Performance and Explanations: ------- Venue Classification Results for Metapath2vec: | Metric | 5% | 10% | 20% | 30% | 40% | 50% | 60% | 70% | 80% | 90% | | ------ | -- | --- | --- | --- | --- | --- | --- | --- | --- | --- | | Macro-F1 | 0.3033 | 0.5247 | 0.8033 | 0.8971 | 0.9406 | 0.9532 | 0.9529 | 0.9701 | 0.9683 | 0.9670 | | Micro-F1 | 0.4173 | 0.5975 | 0.8327 | 0.9011 | 0.9400 | 0.9522 | 0.9537 | 0.9725 | 0.9815 | 0.9857 | Author Classfication Results for Metapath2vec: | Metric | 5% | 10% | 20% | 30% | 40% | 50% | 60% | 70% | 80% | 90% | | ------ | -- | --- | --- | --- | --- | --- | --- | --- | --- | --- | | Macro-F1 | 0.9216 | 0.9262 | 0.9292 | 0.9303 | 0.9309 | 0.9314 | 0.9315 | 0.9316 | 0.9319 | 0.9320 | | Micro-F1 | 0.9279 | 0.9319 | 0.9346 | 0.9356 | 0.9361 | 0.9365 | 0.9365 | 0.9365 | 0.9367 | 0.9369 | Note that: Testing files are available in "label 2" file; The above are results listed in the paper, in real experiments, exact numbers might be slightly different: 1, For venue node classification results, when the size of the training dataset is small (e.g. 5%), the variance of the performance is large since the number of available labeled venues is small. 2, For author node classification results, the performance is stable since the number of available labeled authors is huge, so even 5% training data would be sufficient. 3, In the test.py, you could change experiment times you want, especially it is very slow to test author classification so you could only do 1 or 2 times. ================================================ FILE: examples/pytorch/metapath2vec/download.py ================================================ import os import torch as th import torch.nn as nn import tqdm class PBar(object): def __enter__(self): self.t = None return self def __call__(self, blockno, readsize, totalsize): if self.t is None: self.t = tqdm.tqdm(total=totalsize) self.t.update(readsize) def __exit__(self, exc_type, exc_value, traceback): self.t.close() class AminerDataset(object): """ Download Aminer Dataset from Amazon S3 bucket. """ def __init__(self, path): self.url = "https://data.dgl.ai/dataset/aminer.zip" if not os.path.exists(os.path.join(path, "aminer.txt")): print("File not found. Downloading from", self.url) self._download_and_extract(path, "aminer.zip") self.fn = os.path.join(path, "aminer.txt") def _download_and_extract(self, path, filename): import shutil, zipfile, zlib import urllib.request from tqdm import tqdm fn = os.path.join(path, filename) with PBar() as pb: urllib.request.urlretrieve(self.url, fn, pb) print("Download finished. Unzipping the file...") with zipfile.ZipFile(fn) as zf: zf.extractall(path) print("Unzip finished.") class CustomDataset(object): """ Custom dataset generated by sampler.py (e.g. NetDBIS) """ def __init__(self, path): self.fn = path ================================================ FILE: examples/pytorch/metapath2vec/metapath2vec.py ================================================ import argparse import torch import torch.optim as optim from download import AminerDataset, CustomDataset from model import SkipGramModel from reading_data import DataReader, Metapath2vecDataset from torch.utils.data import DataLoader from tqdm import tqdm class Metapath2VecTrainer: def __init__(self, args): if args.aminer: dataset = AminerDataset(args.path) else: dataset = CustomDataset(args.path) self.data = DataReader(dataset, args.min_count, args.care_type) dataset = Metapath2vecDataset(self.data, args.window_size) self.dataloader = DataLoader( dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=dataset.collate, ) self.output_file_name = args.output_file self.emb_size = len(self.data.word2id) self.emb_dimension = args.dim self.batch_size = args.batch_size self.iterations = args.iterations self.initial_lr = args.initial_lr self.skip_gram_model = SkipGramModel(self.emb_size, self.emb_dimension) self.use_cuda = torch.cuda.is_available() self.device = torch.device("cuda" if self.use_cuda else "cpu") if self.use_cuda: self.skip_gram_model.cuda() def train(self): optimizer = optim.SparseAdam( list(self.skip_gram_model.parameters()), lr=self.initial_lr ) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, len(self.dataloader) ) for iteration in range(self.iterations): print("\n\n\nIteration: " + str(iteration + 1)) running_loss = 0.0 for i, sample_batched in enumerate(tqdm(self.dataloader)): if len(sample_batched[0]) > 1: pos_u = sample_batched[0].to(self.device) pos_v = sample_batched[1].to(self.device) neg_v = sample_batched[2].to(self.device) scheduler.step() optimizer.zero_grad() loss = self.skip_gram_model.forward(pos_u, pos_v, neg_v) loss.backward() optimizer.step() running_loss = running_loss * 0.9 + loss.item() * 0.1 if i > 0 and i % 500 == 0: print(" Loss: " + str(running_loss)) self.skip_gram_model.save_embedding( self.data.id2word, self.output_file_name ) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Metapath2vec") # parser.add_argument('--input_file', type=str, help="input_file") parser.add_argument( "--aminer", action="store_true", help="Use AMiner dataset" ) parser.add_argument("--path", type=str, help="input_path") parser.add_argument("--output_file", type=str, help="output_file") parser.add_argument( "--dim", default=128, type=int, help="embedding dimensions" ) parser.add_argument( "--window_size", default=7, type=int, help="context window size" ) parser.add_argument("--iterations", default=5, type=int, help="iterations") parser.add_argument("--batch_size", default=50, type=int, help="batch size") parser.add_argument( "--care_type", default=0, type=int, help="if 1, heterogeneous negative sampling, else normal negative sampling", ) parser.add_argument( "--initial_lr", default=0.025, type=float, help="learning rate" ) parser.add_argument("--min_count", default=5, type=int, help="min count") parser.add_argument( "--num_workers", default=16, type=int, help="number of workers" ) args = parser.parse_args() m2v = Metapath2VecTrainer(args) m2v.train() ================================================ FILE: examples/pytorch/metapath2vec/model.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from torch.nn import init """ u_embedding: Embedding for center word. v_embedding: Embedding for neighbor words. """ class SkipGramModel(nn.Module): def __init__(self, emb_size, emb_dimension): super(SkipGramModel, self).__init__() self.emb_size = emb_size self.emb_dimension = emb_dimension self.u_embeddings = nn.Embedding(emb_size, emb_dimension, sparse=True) self.v_embeddings = nn.Embedding(emb_size, emb_dimension, sparse=True) initrange = 1.0 / self.emb_dimension init.uniform_(self.u_embeddings.weight.data, -initrange, initrange) init.constant_(self.v_embeddings.weight.data, 0) def forward(self, pos_u, pos_v, neg_v): emb_u = self.u_embeddings(pos_u) emb_v = self.v_embeddings(pos_v) emb_neg_v = self.v_embeddings(neg_v) score = torch.sum(torch.mul(emb_u, emb_v), dim=1) score = torch.clamp(score, max=10, min=-10) score = -F.logsigmoid(score) neg_score = torch.bmm(emb_neg_v, emb_u.unsqueeze(2)).squeeze() neg_score = torch.clamp(neg_score, max=10, min=-10) neg_score = -torch.sum(F.logsigmoid(-neg_score), dim=1) return torch.mean(score + neg_score) def save_embedding(self, id2word, file_name): embedding = self.u_embeddings.weight.cpu().data.numpy() with open(file_name, "w") as f: f.write("%d %d\n" % (len(id2word), self.emb_dimension)) for wid, w in id2word.items(): e = " ".join(map(lambda x: str(x), embedding[wid])) f.write("%s %s\n" % (w, e)) ================================================ FILE: examples/pytorch/metapath2vec/reading_data.py ================================================ import numpy as np import torch from download import AminerDataset from torch.utils.data import Dataset np.random.seed(12345) class DataReader: NEGATIVE_TABLE_SIZE = 1e8 def __init__(self, dataset, min_count, care_type): self.negatives = [] self.discards = [] self.negpos = 0 self.care_type = care_type self.word2id = dict() self.id2word = dict() self.sentences_count = 0 self.token_count = 0 self.word_frequency = dict() self.inputFileName = dataset.fn self.read_words(min_count) self.initTableNegatives() self.initTableDiscards() def read_words(self, min_count): word_frequency = dict() for line in open(self.inputFileName, encoding="ISO-8859-1"): line = line.split() if len(line) > 1: self.sentences_count += 1 for word in line: if len(word) > 0: self.token_count += 1 word_frequency[word] = word_frequency.get(word, 0) + 1 if self.token_count % 1000000 == 0: print( "Read " + str(int(self.token_count / 1000000)) + "M words." ) wid = 0 for w, c in word_frequency.items(): if c < min_count: continue self.word2id[w] = wid self.id2word[wid] = w self.word_frequency[wid] = c wid += 1 self.word_count = len(self.word2id) print("Total embeddings: " + str(len(self.word2id))) def initTableDiscards(self): # get a frequency table for sub-sampling. Note that the frequency is adjusted by # sub-sampling tricks. t = 0.0001 f = np.array(list(self.word_frequency.values())) / self.token_count self.discards = np.sqrt(t / f) + (t / f) def initTableNegatives(self): # get a table for negative sampling, if word with index 2 appears twice, then 2 will be listed # in the table twice. pow_frequency = np.array(list(self.word_frequency.values())) ** 0.75 words_pow = sum(pow_frequency) ratio = pow_frequency / words_pow count = np.round(ratio * DataReader.NEGATIVE_TABLE_SIZE) for wid, c in enumerate(count): self.negatives += [wid] * int(c) self.negatives = np.array(self.negatives) np.random.shuffle(self.negatives) self.sampling_prob = ratio def getNegatives(self, target, size): # TODO check equality with target if self.care_type == 0: response = self.negatives[self.negpos : self.negpos + size] self.negpos = (self.negpos + size) % len(self.negatives) if len(response) != size: return np.concatenate( (response, self.negatives[0 : self.negpos]) ) return response # ----------------------------------------------------------------------------------------------------------------- class Metapath2vecDataset(Dataset): def __init__(self, data, window_size): # read in data, window_size and input filename self.data = data self.window_size = window_size self.input_file = open(data.inputFileName, encoding="ISO-8859-1") def __len__(self): # return the number of walks return self.data.sentences_count def __getitem__(self, idx): # return the list of pairs (center, context, 5 negatives) while True: line = self.input_file.readline() if not line: self.input_file.seek(0, 0) line = self.input_file.readline() if len(line) > 1: words = line.split() if len(words) > 1: word_ids = [ self.data.word2id[w] for w in words if w in self.data.word2id and np.random.rand() < self.data.discards[self.data.word2id[w]] ] pair_catch = [] for i, u in enumerate(word_ids): for j, v in enumerate( word_ids[ max(i - self.window_size, 0) : i + self.window_size ] ): assert u < self.data.word_count assert v < self.data.word_count if i == j: continue pair_catch.append( (u, v, self.data.getNegatives(v, 5)) ) return pair_catch @staticmethod def collate(batches): all_u = [u for batch in batches for u, _, _ in batch if len(batch) > 0] all_v = [v for batch in batches for _, v, _ in batch if len(batch) > 0] all_neg_v = [ neg_v for batch in batches for _, _, neg_v in batch if len(batch) > 0 ] return ( torch.LongTensor(all_u), torch.LongTensor(all_v), torch.LongTensor(all_neg_v), ) ================================================ FILE: examples/pytorch/metapath2vec/sampler.py ================================================ import os import random import sys import time import dgl import numpy as np import tqdm num_walks_per_node = 1000 walk_length = 100 path = sys.argv[1] def construct_graph(): paper_ids = [] paper_names = [] author_ids = [] author_names = [] conf_ids = [] conf_names = [] f_3 = open(os.path.join(path, "id_author.txt"), encoding="ISO-8859-1") f_4 = open(os.path.join(path, "id_conf.txt"), encoding="ISO-8859-1") f_5 = open(os.path.join(path, "paper.txt"), encoding="ISO-8859-1") while True: z = f_3.readline() if not z: break z = z.strip().split() identity = int(z[0]) author_ids.append(identity) author_names.append(z[1]) while True: w = f_4.readline() if not w: break w = w.strip().split() identity = int(w[0]) conf_ids.append(identity) conf_names.append(w[1]) while True: v = f_5.readline() if not v: break v = v.strip().split() identity = int(v[0]) paper_name = "p" + "".join(v[1:]) paper_ids.append(identity) paper_names.append(paper_name) f_3.close() f_4.close() f_5.close() author_ids_invmap = {x: i for i, x in enumerate(author_ids)} conf_ids_invmap = {x: i for i, x in enumerate(conf_ids)} paper_ids_invmap = {x: i for i, x in enumerate(paper_ids)} paper_author_src = [] paper_author_dst = [] paper_conf_src = [] paper_conf_dst = [] f_1 = open(os.path.join(path, "paper_author.txt"), "r") f_2 = open(os.path.join(path, "paper_conf.txt"), "r") for x in f_1: x = x.split("\t") x[0] = int(x[0]) x[1] = int(x[1].strip("\n")) paper_author_src.append(paper_ids_invmap[x[0]]) paper_author_dst.append(author_ids_invmap[x[1]]) for y in f_2: y = y.split("\t") y[0] = int(y[0]) y[1] = int(y[1].strip("\n")) paper_conf_src.append(paper_ids_invmap[y[0]]) paper_conf_dst.append(conf_ids_invmap[y[1]]) f_1.close() f_2.close() hg = dgl.heterograph( { ("paper", "pa", "author"): (paper_author_src, paper_author_dst), ("author", "ap", "paper"): (paper_author_dst, paper_author_src), ("paper", "pc", "conf"): (paper_conf_src, paper_conf_dst), ("conf", "cp", "paper"): (paper_conf_dst, paper_conf_src), } ) return hg, author_names, conf_names, paper_names # "conference - paper - Author - paper - conference" metapath sampling def generate_metapath(): output_path = open(os.path.join(path, "output_path.txt"), "w") count = 0 hg, author_names, conf_names, paper_names = construct_graph() for conf_idx in tqdm.trange(hg.num_nodes("conf")): traces, _ = dgl.sampling.random_walk( hg, [conf_idx] * num_walks_per_node, metapath=["cp", "pa", "ap", "pc"] * walk_length, ) for tr in traces: outline = " ".join( (conf_names if i % 4 == 0 else author_names)[tr[i]] for i in range(0, len(tr), 2) ) # skip paper print(outline, file=output_path) output_path.close() if __name__ == "__main__": generate_metapath() ================================================ FILE: examples/pytorch/metapath2vec/test.py ================================================ import numpy as np from sklearn.linear_model import LogisticRegression from sklearn.metrics import f1_score if __name__ == "__main__": venue_count = 133 author_count = 246678 experiment_times = 1 percent = 0.05 file = open(".../output_file_path/...") file_1 = open(".../label 2/googlescholar.8area.venue.label.txt") file_2 = open(".../label 2/googlescholar.8area.author.label.txt") check_venue = {} check_author = {} for line in file_1: venue_label = line.strip().split(" ") check_venue[venue_label[0]] = int(venue_label[1]) for line in file_2: author_label = line.strip().split(" ") check_author[author_label[0]] = int(author_label[1]) venue_embed_dict = {} author_embed_dict = {} # collect embeddings separately in dictionary form file.readline() print("read line by line") for line in file: embed = line.strip().split(" ") if embed[0] in check_venue: venue_embed_dict[embed[0]] = [] for i in range(1, len(embed), 1): venue_embed_dict[embed[0]].append(float(embed[i])) if embed[0] in check_author: author_embed_dict[embed[0]] = [] for j in range(1, len(embed), 1): author_embed_dict[embed[0]].append(float(embed[j])) # get venue embeddings print("reading finished") venues = list(venue_embed_dict.keys()) authors = list(author_embed_dict.keys()) macro_average_venue = 0 micro_average_venue = 0 macro_average_author = 0 micro_average_author = 0 for time in range(experiment_times): print("one more time") np.random.shuffle(venues) np.random.shuffle(authors) venue_embedding = np.array([]) author_embedding = np.array([]) print("collecting venue embeddings") for venue in venues: temp = np.array(venue_embed_dict[venue]) if len(venue_embedding) == 0: venue_embedding = temp else: venue_embedding = np.vstack((venue_embedding, temp)) print("collecting author embeddings") count = 0 for author in authors: count += 1 # print("one more author " + str(count)) temp_1 = np.array(author_embed_dict[author]) if len(author_embedding) == 0: author_embedding = temp_1 else: author_embedding = np.vstack((author_embedding, temp_1)) # split data into training and testing print("splitting") venue_split = int(venue_count * percent) venue_training = venue_embedding[:venue_split, :] venue_testing = venue_embedding[venue_split:, :] author_split = int(author_count * percent) author_training = author_embedding[:author_split, :] author_testing = author_embedding[author_split:, :] # split label into training and testing venue_label = [] venue_true = [] author_label = [] author_true = [] for i in range(len(venues)): if i < venue_split: venue_label.append(check_venue[venues[i]]) else: venue_true.append(check_venue[venues[i]]) venue_label = np.array(venue_label) venue_true = np.array(venue_true) for j in range(len(authors)): if j < author_split: author_label.append(check_author[authors[j]]) else: author_true.append(check_author[authors[j]]) author_label = np.array(author_label) author_true = np.array(author_true) file.close() print("beging predicting") clf_venue = LogisticRegression( random_state=0, solver="lbfgs", multi_class="multinomial" ).fit(venue_training, venue_label) y_pred_venue = clf_venue.predict(venue_testing) clf_author = LogisticRegression( random_state=0, solver="lbfgs", multi_class="multinomial" ).fit(author_training, author_label) y_pred_author = clf_author.predict(author_testing) macro_average_venue += f1_score( venue_true, y_pred_venue, average="macro" ) micro_average_venue += f1_score( venue_true, y_pred_venue, average="micro" ) macro_average_author += f1_score( author_true, y_pred_author, average="macro" ) micro_average_author += f1_score( author_true, y_pred_author, average="micro" ) print(macro_average_venue / float(experiment_times)) print(micro_average_venue / float(experiment_times)) print(macro_average_author / float(experiment_times)) print(micro_average_author / float(experiment_times)) ================================================ FILE: examples/pytorch/mixhop/README.md ================================================ # DGL Implementations of MixHop This DGL example implements the GNN model proposed in the paper [MixHop: Higher-Order Graph Convolution Architectures via Sparsified Neighborhood Mixing](https://arxiv.org/abs/1905.00067). For the original implementation, see [here](https://github.com/samihaija/mixhop). Contributor: [xnuohz](https://github.com/xnuohz) ### Requirements The codebase is implemented in Python 3.6. For version requirement of packages, see below. ``` dgl 0.5.2 numpy 1.19.4 pandas 1.1.4 tqdm 4.53.0 torch 1.7.0 ``` ### The graph datasets used in this example The DGL's built-in Cora, Pubmed and Citeseer datasets. Dataset summary: | Dataset | #Nodes | #Edges | #Feats | #Classes | #Train Nodes | #Val Nodes | #Test Nodes | | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | | Citeseer | 3,327 | 9,228 | 3,703 | 6 | 120 | 500 | 1000 | | Cora | 2,708 | 10,556 | 1,433 | 7 | 140 | 500 | 1000 | | Pubmed | 19,717 | 88,651 | 500 | 3 | 60 | 500 | 1000 | ### Usage ###### Dataset options ``` --dataset str The graph dataset name. Default is 'Cora'. ``` ###### GPU options ``` --gpu int GPU index. Default is -1, using CPU. ``` ###### Model options ``` --epochs int Number of training epochs. Default is 2000. --early-stopping int Early stopping rounds. Default is 200. --lr float Adam optimizer learning rate. Default is 0.5. --lamb float L2 regularization coefficient. Default is 0.0005. --step-size int Period of learning rate decay. Default is 40. --gamma float Factor of learning rate decay. Default is 0.01. --hid-dim int Hidden layer dimensionalities. Default is 60. --num-layers int Number of GNN layers. Default is 4. --input-dropout float Dropout applied at input layer. Default is 0.7. --layer-dropout float Dropout applied at hidden layers. Default is 0.9. --p list List of powers of adjacency matrix. Default is [0, 1, 2]. ``` ###### Examples The following commands learn a neural network and predict on the test set. Training a MixHop model on the default dataset. ```bash python main.py ``` Train a model for 200 epochs and perform an early stop if the validation accuracy stops getting improved for 10 epochs. ```bash python main.py --epochs 200 --early-stopping 10 ``` Train a model with a different learning rate and regularization coefficient. ```bash python main.py --lr 0.001 --lamb 0.1 ``` Train a model with different model hyperparameters. ```bash python main.py --num-layers 6 --p 2 4 6 ``` Train a model which follows the original hyperparameters on different datasets. ```bash # Cora: python main.py --gpu 0 --dataset Cora --lr 1 --input-dropout 0.6 --lamb 5e-3 --hid-dim 100 --num-layers 3 # Citeseer: python main.py --gpu 0 --dataset Citeseer --lr 0.25 --input-dropout 0.5 --lamb 5e-3 --hid-dim 60 --num-layers 3 # Pubmed: python main.py --gpu 0 --dataset Pubmed --lr 0.5 --input-dropout 0.7 --lamb 5e-3 --hid-dim 60 --num-layers 3 ``` ### Performance | Dataset | Cora | Pubmed | Citeseer | | :-: | :-: | :-: | :-: | | Accuracy(MixHop: default architecture in Table 1) | 0.818 | 0.800 | 0.714 | | Accuracy(official code) | 0.610(0.156) | 0.746(0.065) | 0.700(0.017) | | Accuracy(DGL) | 0.801(0.005) | 0.780(0.005) | 0.692(0.005) | ================================================ FILE: examples/pytorch/mixhop/main.py ================================================ """ The main file to train a MixHop model using a full graph """ import argparse import copy import random import dgl import dgl.function as fn import numpy as np import torch import torch.nn as nn import torch.optim as optim from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset from tqdm import trange class MixHopConv(nn.Module): r""" Description ----------- MixHop Graph Convolutional layer from paper `MixHop: Higher-Order Graph Convolutional Architecturesvia Sparsified Neighborhood Mixing `__. .. math:: H^{(i+1)} =\underset{j \in P}{\Bigg\Vert} \sigma\left(\widehat{A}^j H^{(i)} W_j^{(i)}\right), where :math:`\widehat{A}` denotes the symmetrically normalized adjacencymatrix with self-connections, :math:`D_{ii} = \sum_{j=0} \widehat{A}_{ij}` its diagonal degree matrix, :math:`W_j^{(i)}` denotes the trainable weight matrix of different MixHop layers. Parameters ---------- in_dim : int Input feature size. i.e, the number of dimensions of :math:`H^{(i)}`. out_dim : int Output feature size for each power. p: list List of powers of adjacency matrix. Defaults: ``[0, 1, 2]``. dropout: float, optional Dropout rate on node features. Defaults: ``0``. activation: callable activation function/layer or None, optional If not None, applies an activation function to the updated node features. Default: ``None``. batchnorm: bool, optional If True, use batch normalization. Defaults: ``False``. """ def __init__( self, in_dim, out_dim, p=[0, 1, 2], dropout=0, activation=None, batchnorm=False, ): super(MixHopConv, self).__init__() self.in_dim = in_dim self.out_dim = out_dim self.p = p self.activation = activation self.batchnorm = batchnorm # define dropout layer self.dropout = nn.Dropout(dropout) # define batch norm layer if self.batchnorm: self.bn = nn.BatchNorm1d(out_dim * len(p)) # define weight dict for each power j self.weights = nn.ModuleDict( {str(j): nn.Linear(in_dim, out_dim, bias=False) for j in p} ) def forward(self, graph, feats): with graph.local_scope(): # assume that the graphs are undirected and graph.in_degrees() is the same as graph.out_degrees() degs = graph.in_degrees().float().clamp(min=1) norm = torch.pow(degs, -0.5).to(feats.device).unsqueeze(1) max_j = max(self.p) + 1 outputs = [] for j in range(max_j): if j in self.p: output = self.weights[str(j)](feats) outputs.append(output) feats = feats * norm graph.ndata["h"] = feats graph.update_all(fn.copy_u("h", "m"), fn.sum("m", "h")) feats = graph.ndata.pop("h") feats = feats * norm final = torch.cat(outputs, dim=1) if self.batchnorm: final = self.bn(final) if self.activation is not None: final = self.activation(final) final = self.dropout(final) return final class MixHop(nn.Module): def __init__( self, in_dim, hid_dim, out_dim, num_layers=2, p=[0, 1, 2], input_dropout=0.0, layer_dropout=0.0, activation=None, batchnorm=False, ): super(MixHop, self).__init__() self.in_dim = in_dim self.hid_dim = hid_dim self.out_dim = out_dim self.num_layers = num_layers self.p = p self.input_dropout = input_dropout self.layer_dropout = layer_dropout self.activation = activation self.batchnorm = batchnorm self.layers = nn.ModuleList() self.dropout = nn.Dropout(self.input_dropout) # Input layer self.layers.append( MixHopConv( self.in_dim, self.hid_dim, p=self.p, dropout=self.input_dropout, activation=self.activation, batchnorm=self.batchnorm, ) ) # Hidden layers with n - 1 MixHopConv layers for i in range(self.num_layers - 2): self.layers.append( MixHopConv( self.hid_dim * len(args.p), self.hid_dim, p=self.p, dropout=self.layer_dropout, activation=self.activation, batchnorm=self.batchnorm, ) ) self.fc_layers = nn.Linear( self.hid_dim * len(args.p), self.out_dim, bias=False ) def forward(self, graph, feats): feats = self.dropout(feats) for layer in self.layers: feats = layer(graph, feats) feats = self.fc_layers(feats) return feats def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # # Load from DGL dataset if args.dataset == "Cora": dataset = CoraGraphDataset() elif args.dataset == "Citeseer": dataset = CiteseerGraphDataset() elif args.dataset == "Pubmed": dataset = PubmedGraphDataset() else: raise ValueError("Dataset {} is invalid.".format(args.dataset)) graph = dataset[0] graph = dgl.add_self_loop(graph) # check cuda if args.gpu >= 0 and torch.cuda.is_available(): device = "cuda:{}".format(args.gpu) else: device = "cpu" # retrieve the number of classes n_classes = dataset.num_classes # retrieve labels of ground truth labels = graph.ndata.pop("label").to(device).long() # Extract node features feats = graph.ndata.pop("feat").to(device) n_features = feats.shape[-1] # retrieve masks for train/validation/test train_mask = graph.ndata.pop("train_mask") val_mask = graph.ndata.pop("val_mask") test_mask = graph.ndata.pop("test_mask") train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze().to(device) val_idx = torch.nonzero(val_mask, as_tuple=False).squeeze().to(device) test_idx = torch.nonzero(test_mask, as_tuple=False).squeeze().to(device) graph = graph.to(device) # Step 2: Create model =================================================================== # model = MixHop( in_dim=n_features, hid_dim=args.hid_dim, out_dim=n_classes, num_layers=args.num_layers, p=args.p, input_dropout=args.input_dropout, layer_dropout=args.layer_dropout, activation=torch.tanh, batchnorm=True, ) model = model.to(device) best_model = copy.deepcopy(model) # Step 3: Create training components ===================================================== # loss_fn = nn.CrossEntropyLoss() opt = optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.lamb) scheduler = optim.lr_scheduler.StepLR(opt, args.step_size, gamma=args.gamma) # Step 4: training epoches =============================================================== # acc = 0 no_improvement = 0 epochs = trange(args.epochs, desc="Accuracy & Loss") for _ in epochs: # Training using a full graph model.train() logits = model(graph, feats) # compute loss train_loss = loss_fn(logits[train_idx], labels[train_idx]) train_acc = torch.sum( logits[train_idx].argmax(dim=1) == labels[train_idx] ).item() / len(train_idx) # backward opt.zero_grad() train_loss.backward() opt.step() # Validation using a full graph model.eval() with torch.no_grad(): valid_loss = loss_fn(logits[val_idx], labels[val_idx]) valid_acc = torch.sum( logits[val_idx].argmax(dim=1) == labels[val_idx] ).item() / len(val_idx) # Print out performance epochs.set_description( "Train Acc {:.4f} | Train Loss {:.4f} | Val Acc {:.4f} | Val loss {:.4f}".format( train_acc, train_loss.item(), valid_acc, valid_loss.item() ) ) if valid_acc < acc: no_improvement += 1 if no_improvement == args.early_stopping: print("Early stop.") break else: no_improvement = 0 acc = valid_acc best_model = copy.deepcopy(model) scheduler.step() best_model.eval() logits = best_model(graph, feats) test_acc = torch.sum( logits[test_idx].argmax(dim=1) == labels[test_idx] ).item() / len(test_idx) print("Test Acc {:.4f}".format(test_acc)) return test_acc if __name__ == "__main__": """ MixHop Model Hyperparameters """ parser = argparse.ArgumentParser(description="MixHop GCN") # data source params parser.add_argument( "--dataset", type=str, default="Cora", help="Name of dataset." ) # cuda params parser.add_argument( "--gpu", type=int, default=-1, help="GPU index. Default: -1, using CPU." ) # training params parser.add_argument( "--epochs", type=int, default=2000, help="Training epochs." ) parser.add_argument( "--early-stopping", type=int, default=200, help="Patient epochs to wait before early stopping.", ) parser.add_argument("--lr", type=float, default=0.5, help="Learning rate.") parser.add_argument("--lamb", type=float, default=5e-4, help="L2 reg.") parser.add_argument( "--step-size", type=int, default=40, help="Period of learning rate decay.", ) parser.add_argument( "--gamma", type=float, default=0.01, help="Multiplicative factor of learning rate decay.", ) # model params parser.add_argument( "--hid-dim", type=int, default=60, help="Hidden layer dimensionalities." ) parser.add_argument( "--num-layers", type=int, default=4, help="Number of GNN layers." ) parser.add_argument( "--input-dropout", type=float, default=0.7, help="Dropout applied at input layer.", ) parser.add_argument( "--layer-dropout", type=float, default=0.9, help="Dropout applied at hidden layers.", ) parser.add_argument( "--p", nargs="+", type=int, help="List of powers of adjacency matrix." ) parser.set_defaults(p=[0, 1, 2]) args = parser.parse_args() print(args) acc_lists = [] for _ in range(100): acc_lists.append(main(args)) acc_lists.sort() acc_lists_top = np.array(acc_lists[50:]) mean = np.around(np.mean(acc_lists_top, axis=0), decimals=3) std = np.around(np.std(acc_lists_top, axis=0), decimals=3) print("Total acc: ", acc_lists) print("Top 50 acc:", acc_lists_top) print("mean", mean) print("std", std) ================================================ FILE: examples/pytorch/model_zoo/README.md ================================================ Model Zoo ========== Here are examples of using the model zoo. ================================================ FILE: examples/pytorch/model_zoo/citation_network/README.md ================================================ # Node Classification on Citation Networks This example shows how to use modules defined in `dgl.nn.pytorch.conv` to do node classification on citation network datasets. ## Datasets - Cora - Citeseer - Pubmed ## Models - GCN: [Semi-Supervised Classification with Graph Convolutional Networks](https://arxiv.org/pdf/1609.02907) - GAT: [Graph Attention Networks](https://arxiv.org/abs/1710.10903) - GraphSAGE [Inductive Representation Learning on Large Graphs](https://cs.stanford.edu/people/jure/pubs/graphsage-nips17.pdf) - APPNP: [Predict then Propagate: Graph Neural Networks meet Personalized PageRank](https://arxiv.org/pdf/1810.05997) - GIN: [How Powerful are Graph Neural Networks?](https://arxiv.org/abs/1810.00826) - TAGCN: [Topology Adaptive Graph Convolutional Networks](https://arxiv.org/abs/1710.10370) - SGC: [Simplifying Graph Convolutional Networks](https://arxiv.org/abs/1902.07153) - AGNN: [Attention-based Graph Neural Network for Semi-supervised Learning](https://arxiv.org/pdf/1803.03735.pdf) - ChebNet: [Convolutional Neural Networks on Graphs with Fast Localized Spectral Filtering](https://arxiv.org/abs/1606.09375) ## Usage ``` python run.py [--gpu GPU] --model MODEL_NAME --dataset DATASET_NAME [--self-loop] ``` The hyperparameters might not be the optimal, you could specify them manually in `conf.py`. ================================================ FILE: examples/pytorch/model_zoo/citation_network/conf.py ================================================ import torch as th import torch.nn.functional as F GCN_CONFIG = { "extra_args": [16, 1, F.relu, 0.5], "lr": 1e-2, "weight_decay": 5e-4, } GAT_CONFIG = { "extra_args": [8, 1, [8] * 1 + [1], F.elu, 0.6, 0.6, 0.2, False], "lr": 0.005, "weight_decay": 5e-4, } GRAPHSAGE_CONFIG = { "extra_args": [16, 1, F.relu, 0.5, "gcn"], "lr": 1e-2, "weight_decay": 5e-4, } APPNP_CONFIG = { "extra_args": [64, 1, F.relu, 0.5, 0.5, 0.1, 10], "lr": 1e-2, "weight_decay": 5e-4, } TAGCN_CONFIG = { "extra_args": [16, 1, F.relu, 0.5], "lr": 1e-2, "weight_decay": 5e-4, } AGNN_CONFIG = { "extra_args": [32, 2, 1.0, True, 0.5], "lr": 1e-2, "weight_decay": 5e-4, } SGC_CONFIG = { "extra_args": [None, 2, False], "lr": 0.2, "weight_decay": 5e-6, } GIN_CONFIG = { "extra_args": [16, 1, 0, True], "lr": 1e-2, "weight_decay": 5e-6, } CHEBNET_CONFIG = { "extra_args": [32, 1, 2, True], "lr": 1e-2, "weight_decay": 5e-4, } ================================================ FILE: examples/pytorch/model_zoo/citation_network/models.py ================================================ import torch import torch.nn as nn from dgl.nn.pytorch import ( AGNNConv, APPNPConv, ChebConv, GATConv, GINConv, GraphConv, SAGEConv, SGConv, TAGConv, ) class GCN(nn.Module): def __init__( self, g, in_feats, n_classes, n_hidden, n_layers, activation, dropout ): super(GCN, self).__init__() self.g = g self.layers = nn.ModuleList() # input layer self.layers.append(GraphConv(in_feats, n_hidden, activation=activation)) # hidden layers for i in range(n_layers - 1): self.layers.append( GraphConv(n_hidden, n_hidden, activation=activation) ) # output layer self.layers.append(GraphConv(n_hidden, n_classes)) self.dropout = nn.Dropout(p=dropout) def forward(self, features): h = features for i, layer in enumerate(self.layers): if i != 0: h = self.dropout(h) h = layer(self.g, h) return h class GAT(nn.Module): def __init__( self, g, in_dim, num_classes, num_hidden, num_layers, heads, activation, feat_drop, attn_drop, negative_slope, residual, ): super(GAT, self).__init__() self.g = g self.num_layers = num_layers self.gat_layers = nn.ModuleList() self.activation = activation # input projection (no residual) self.gat_layers.append( GATConv( in_dim, num_hidden, heads[0], feat_drop, attn_drop, negative_slope, False, self.activation, ) ) # hidden layers for l in range(1, num_layers): # due to multi-head, the in_dim = num_hidden * num_heads self.gat_layers.append( GATConv( num_hidden * heads[l - 1], num_hidden, heads[l], feat_drop, attn_drop, negative_slope, residual, self.activation, ) ) # output projection self.gat_layers.append( GATConv( num_hidden * heads[-2], num_classes, heads[-1], feat_drop, attn_drop, negative_slope, residual, None, ) ) def forward(self, inputs): h = inputs for l in range(self.num_layers): h = self.gat_layers[l](self.g, h).flatten(1) # output projection logits = self.gat_layers[-1](self.g, h).mean(1) return logits class GraphSAGE(nn.Module): def __init__( self, g, in_feats, n_classes, n_hidden, n_layers, activation, dropout, aggregator_type, ): super(GraphSAGE, self).__init__() self.layers = nn.ModuleList() self.g = g # input layer self.layers.append( SAGEConv( in_feats, n_hidden, aggregator_type, feat_drop=dropout, activation=activation, ) ) # hidden layers for i in range(n_layers - 1): self.layers.append( SAGEConv( n_hidden, n_hidden, aggregator_type, feat_drop=dropout, activation=activation, ) ) # output layer self.layers.append( SAGEConv( n_hidden, n_classes, aggregator_type, feat_drop=dropout, activation=None, ) ) # activation None def forward(self, features): h = features for layer in self.layers: h = layer(self.g, h) return h class APPNP(nn.Module): def __init__( self, g, in_feats, n_classes, n_hidden, n_layers, activation, feat_drop, edge_drop, alpha, k, ): super(APPNP, self).__init__() self.g = g self.layers = nn.ModuleList() # input layer self.layers.append(nn.Linear(in_feats, n_hidden)) # hidden layers for i in range(1, n_layers): self.layers.append(nn.Linear(n_hidden, n_hidden)) # output layer self.layers.append(nn.Linear(n_hidden, n_classes)) self.activation = activation if feat_drop: self.feat_drop = nn.Dropout(feat_drop) else: self.feat_drop = lambda x: x self.propagate = APPNPConv(k, alpha, edge_drop) self.reset_parameters() def reset_parameters(self): for layer in self.layers: layer.reset_parameters() def forward(self, features): # prediction step h = features h = self.feat_drop(h) h = self.activation(self.layers[0](h)) for layer in self.layers[1:-1]: h = self.activation(layer(h)) h = self.layers[-1](self.feat_drop(h)) # propagation step h = self.propagate(self.g, h) return h class TAGCN(nn.Module): def __init__( self, g, in_feats, n_classes, n_hidden, n_layers, activation, dropout ): super(TAGCN, self).__init__() self.g = g self.layers = nn.ModuleList() # input layer self.layers.append(TAGConv(in_feats, n_hidden, activation=activation)) # hidden layers for i in range(n_layers - 1): self.layers.append( TAGConv(n_hidden, n_hidden, activation=activation) ) # output layer self.layers.append(TAGConv(n_hidden, n_classes)) # activation=None self.dropout = nn.Dropout(p=dropout) def forward(self, features): h = features for i, layer in enumerate(self.layers): if i != 0: h = self.dropout(h) h = layer(self.g, h) return h class AGNN(nn.Module): def __init__( self, g, in_feats, n_classes, n_hidden, n_layers, init_beta, learn_beta, dropout, ): super(AGNN, self).__init__() self.g = g self.layers = nn.ModuleList( [AGNNConv(init_beta, learn_beta) for _ in range(n_layers)] ) self.proj = nn.Sequential( nn.Dropout(dropout), nn.Linear(in_feats, n_hidden), nn.ReLU() ) self.cls = nn.Sequential( nn.Dropout(dropout), nn.Linear(n_hidden, n_classes) ) def forward(self, features): h = self.proj(features) for layer in self.layers: h = layer(self.g, h) return self.cls(h) class SGC(nn.Module): def __init__(self, g, in_feats, n_classes, n_hidden, k, bias): super(SGC, self).__init__() self.g = g self.net = SGConv(in_feats, n_classes, k=k, cached=True, bias=bias) def forward(self, features): return self.net(self.g, features) class GIN(nn.Module): def __init__( self, g, in_feats, n_classes, n_hidden, n_layers, init_eps, learn_eps ): super(GIN, self).__init__() self.g = g self.layers = nn.ModuleList() self.layers.append( GINConv( nn.Sequential( nn.Dropout(0.6), nn.Linear(in_feats, n_hidden), nn.ReLU(), ), "mean", init_eps, learn_eps, ) ) for i in range(n_layers - 1): self.layers.append( GINConv( nn.Sequential( nn.Dropout(0.6), nn.Linear(n_hidden, n_hidden), nn.ReLU(), ), "mean", init_eps, learn_eps, ) ) self.layers.append( GINConv( nn.Sequential( nn.Dropout(0.6), nn.Linear(n_hidden, n_classes), ), "mean", init_eps, learn_eps, ) ) def forward(self, features): h = features for layer in self.layers: h = layer(self.g, h) return h class ChebNet(nn.Module): def __init__(self, g, in_feats, n_classes, n_hidden, n_layers, k, bias): super(ChebNet, self).__init__() self.g = g self.layers = nn.ModuleList() self.layers.append(ChebConv(in_feats, n_hidden, k, bias=bias)) for _ in range(n_layers - 1): self.layers.append(ChebConv(n_hidden, n_hidden, k, bias=bias)) self.layers.append(ChebConv(n_hidden, n_classes, k, bias=bias)) def forward(self, features): h = features for layer in self.layers: h = layer(self.g, h, [2]) return h ================================================ FILE: examples/pytorch/model_zoo/citation_network/run.py ================================================ import argparse import time import networkx as nx import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from conf import * from models import * import dgl from dgl.data import load_data, register_data_args def get_model_and_config(name): name = name.lower() if name == "gcn": return GCN, GCN_CONFIG elif name == "gat": return GAT, GAT_CONFIG elif name == "graphsage": return GraphSAGE, GRAPHSAGE_CONFIG elif name == "appnp": return APPNP, APPNP_CONFIG elif name == "tagcn": return TAGCN, TAGCN_CONFIG elif name == "agnn": return AGNN, AGNN_CONFIG elif name == "sgc": return SGC, SGC_CONFIG elif name == "gin": return GIN, GIN_CONFIG elif name == "chebnet": return ChebNet, CHEBNET_CONFIG def evaluate(model, features, labels, mask): model.eval() with torch.no_grad(): logits = model(features) logits = logits[mask] labels = labels[mask] _, indices = torch.max(logits, dim=1) correct = torch.sum(indices == labels) return correct.item() * 1.0 / len(labels) def main(args): # load and preprocess dataset data = load_data(args) g = data[0] if args.gpu < 0: cuda = False else: cuda = True g = g.to(args.gpu) features = g.ndata["feat"] labels = g.ndata["label"] train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] in_feats = features.shape[1] n_classes = data.num_classes n_edges = g.num_edges() print( """----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % ( n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item(), ) ) # graph preprocess and calculate normalization factor # add self loop if args.self_loop: g = g.remove_self_loop().add_self_loop() n_edges = g.num_edges() # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 g.ndata["norm"] = norm.unsqueeze(1) # create GCN model GNN, config = get_model_and_config(args.model) model = GNN(g, in_feats, n_classes, *config["extra_args"]) if cuda: model = model.cuda() print(model) loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam( model.parameters(), lr=config["lr"], weight_decay=config["weight_decay"] ) # initialize graph mean = 0 for epoch in range(200): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: mean = (mean * (epoch - 3) + (time.time() - t0)) / (epoch - 2) acc = evaluate(model, features, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format( epoch, mean, loss.item(), acc, n_edges / mean / 1000, ) ) print() acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc)) if __name__ == "__main__": parser = argparse.ArgumentParser( description="Node classification on citation networks." ) register_data_args(parser) parser.add_argument( "--model", type=str, default="gcn", help="model to use, available models are gcn, gat, graphsage, gin," "appnp, tagcn, sgc, agnn", ) parser.add_argument("--gpu", type=int, default=-1, help="gpu") parser.add_argument( "--self-loop", action="store_true", help="graph self-loop (default=False)", ) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/pytorch/model_zoo/geometric/.gitignore ================================================ MNIST/ ================================================ FILE: examples/pytorch/model_zoo/geometric/README.md ================================================ Geometric Deep Learning models ========= This example shows how to use geometric deep learning models defined in `dgl.nn.pytorch.conv` for graph classification. Currently we support following models: - [ChebNet](https://arxiv.org/pdf/1606.09375.pdf) - [MoNet](https://arxiv.org/pdf/1611.08402.pdf) ## Image Classification on MNIST By transforming images to graphs, graph classifcation algorithms could be applied to image classification problems. ### Usage ```bash python mnist.py --model cheb --gpu 0 python mnist.py --model monet --gpu 0 ``` ### Acknowledgement We thank [Xavier Bresson](https://github.com/xbresson) for providing code for graph coarsening algorithm and grid graph building in [CE7454_2019 Labs](https://github.com/xbresson/CE7454_2019/tree/master/codes/labs_lecture14/lab01_ChebGCNs). ================================================ FILE: examples/pytorch/model_zoo/geometric/coarsening.py ================================================ # author: xbresson # code link: https://github.com/xbresson/CE7454_2019/blob/master/codes/labs_lecture14/lab01_ChebGCNs/lib/coarsening.py import numpy as np import scipy.sparse import sklearn.metrics def laplacian(W, normalized=True): """Return graph Laplacian""" # Degree matrix. d = W.sum(axis=0) # Laplacian matrix. if not normalized: D = scipy.sparse.diags(d.A.squeeze(), 0) L = D - W else: d += np.spacing(np.array(0, W.dtype)) d = 1 / np.sqrt(d) D = scipy.sparse.diags(d.A.squeeze(), 0) I = scipy.sparse.identity(d.size, dtype=W.dtype) L = I - D * W * D assert np.abs(L - L.T).mean() < 1e-9 assert type(L) is scipy.sparse.csr.csr_matrix return L def rescale_L(L, lmax=2): """Rescale Laplacian eigenvalues to [-1,1]""" M, M = L.shape I = scipy.sparse.identity(M, format="csr", dtype=L.dtype) L /= lmax * 2 L -= I return L def lmax_L(L): """Compute largest Laplacian eigenvalue""" return scipy.sparse.linalg.eigsh( L, k=1, which="LM", return_eigenvectors=False )[0] # graph coarsening with Heavy Edge Matching def coarsen(A, levels): graphs, parents = HEM(A, levels) perms = compute_perm(parents) laplacians = [] for i, A in enumerate(graphs): M, M = A.shape if i < levels: A = perm_adjacency(A, perms[i]) A = A.tocsr() A.eliminate_zeros() Mnew, Mnew = A.shape print( "Layer {0}: M_{0} = |V| = {1} nodes ({2} added), |E| = {3} edges".format( i, Mnew, Mnew - M, A.nnz // 2 ) ) L = laplacian(A, normalized=True) laplacians.append(L) return laplacians, perms[0] if len(perms) > 0 else None def HEM(W, levels, rid=None): """ Coarsen a graph multiple times using the Heavy Edge Matching (HEM). Input W: symmetric sparse weight (adjacency) matrix levels: the number of coarsened graphs Output graph[0]: original graph of size N_1 graph[2]: coarser graph of size N_2 < N_1 graph[levels]: coarsest graph of Size N_levels < ... < N_2 < N_1 parents[i] is a vector of size N_i with entries ranging from 1 to N_{i+1} which indicate the parents in the coarser graph[i+1] nd_sz{i} is a vector of size N_i that contains the size of the supernode in the graph{i} Note if "graph" is a list of length k, then "parents" will be a list of length k-1 """ N, N = W.shape if rid is None: rid = np.random.permutation(range(N)) ss = np.array(W.sum(axis=0)).squeeze() rid = np.argsort(ss) parents = [] degree = W.sum(axis=0) - W.diagonal() graphs = [] graphs.append(W) print("Heavy Edge Matching coarsening with Xavier version") for _ in range(levels): # CHOOSE THE WEIGHTS FOR THE PAIRING # weights = ones(N,1) # metis weights weights = degree # graclus weights # weights = supernode_size # other possibility weights = np.array(weights).squeeze() # PAIR THE VERTICES AND CONSTRUCT THE ROOT VECTOR idx_row, idx_col, val = scipy.sparse.find(W) cc = idx_row rr = idx_col vv = val # TO BE SPEEDUP if not (list(cc) == list(np.sort(cc))): tmp = cc cc = rr rr = tmp cluster_id = HEM_one_level(cc, rr, vv, rid, weights) # cc is ordered parents.append(cluster_id) # COMPUTE THE EDGES WEIGHTS FOR THE NEW GRAPH nrr = cluster_id[rr] ncc = cluster_id[cc] nvv = vv Nnew = cluster_id.max() + 1 # CSR is more appropriate: row,val pairs appear multiple times W = scipy.sparse.csr_matrix((nvv, (nrr, ncc)), shape=(Nnew, Nnew)) W.eliminate_zeros() # Add new graph to the list of all coarsened graphs graphs.append(W) N, N = W.shape # COMPUTE THE DEGREE (OMIT OR NOT SELF LOOPS) degree = W.sum(axis=0) # degree = W.sum(axis=0) - W.diagonal() # CHOOSE THE ORDER IN WHICH VERTICES WILL BE VISTED AT THE NEXT PASS # [~, rid]=sort(ss); # arthur strategy # [~, rid]=sort(supernode_size); # thomas strategy # rid=randperm(N); # metis/graclus strategy ss = np.array(W.sum(axis=0)).squeeze() rid = np.argsort(ss) return graphs, parents # Coarsen a graph given by rr,cc,vv. rr is assumed to be ordered def HEM_one_level(rr, cc, vv, rid, weights): nnz = rr.shape[0] N = rr[nnz - 1] + 1 marked = np.zeros(N, np.bool_) rowstart = np.zeros(N, np.int32) rowlength = np.zeros(N, np.int32) cluster_id = np.zeros(N, np.int32) oldval = rr[0] count = 0 clustercount = 0 for ii in range(nnz): rowlength[count] = rowlength[count] + 1 if rr[ii] > oldval: oldval = rr[ii] rowstart[count + 1] = ii count = count + 1 for ii in range(N): tid = rid[ii] if not marked[tid]: wmax = 0.0 rs = rowstart[tid] marked[tid] = True bestneighbor = -1 for jj in range(rowlength[tid]): nid = cc[rs + jj] if marked[nid]: tval = 0.0 else: # First approach if 2 == 1: tval = vv[rs + jj] * ( 1.0 / weights[tid] + 1.0 / weights[nid] ) # Second approach if 1 == 1: Wij = vv[rs + jj] Wii = vv[rowstart[tid]] Wjj = vv[rowstart[nid]] di = weights[tid] dj = weights[nid] tval = (2.0 * Wij + Wii + Wjj) * 1.0 / (di + dj + 1e-9) if tval > wmax: wmax = tval bestneighbor = nid cluster_id[tid] = clustercount if bestneighbor > -1: cluster_id[bestneighbor] = clustercount marked[bestneighbor] = True clustercount += 1 return cluster_id def compute_perm(parents): """ Return a list of indices to reorder the adjacency and data matrices so that the union of two neighbors from layer to layer forms a binary tree. """ # Order of last layer is random (chosen by the clustering algorithm). indices = [] if len(parents) > 0: M_last = max(parents[-1]) + 1 indices.append(list(range(M_last))) for parent in parents[::-1]: # Fake nodes go after real ones. pool_singeltons = len(parent) indices_layer = [] for i in indices[-1]: indices_node = list(np.where(parent == i)[0]) assert 0 <= len(indices_node) <= 2 # Add a node to go with a singelton. if len(indices_node) == 1: indices_node.append(pool_singeltons) pool_singeltons += 1 # Add two nodes as children of a singelton in the parent. elif len(indices_node) == 0: indices_node.append(pool_singeltons + 0) indices_node.append(pool_singeltons + 1) pool_singeltons += 2 indices_layer.extend(indices_node) indices.append(indices_layer) # Sanity checks. for i, indices_layer in enumerate(indices): M = M_last * 2**i # Reduction by 2 at each layer (binary tree). assert len(indices[0] == M) # The new ordering does not omit an indice. assert sorted(indices_layer) == list(range(M)) return indices[::-1] assert compute_perm( [np.array([4, 1, 1, 2, 2, 3, 0, 0, 3]), np.array([2, 1, 0, 1, 0])] ) == [[3, 4, 0, 9, 1, 2, 5, 8, 6, 7, 10, 11], [2, 4, 1, 3, 0, 5], [0, 1, 2]] def perm_adjacency(A, indices): """ Permute adjacency matrix, i.e. exchange node ids, so that binary unions form the clustering tree. """ if indices is None: return A M, M = A.shape Mnew = len(indices) A = A.tocoo() # Add Mnew - M isolated vertices. rows = scipy.sparse.coo_matrix((Mnew - M, M), dtype=np.float32) cols = scipy.sparse.coo_matrix((Mnew, Mnew - M), dtype=np.float32) A = scipy.sparse.vstack([A, rows]) A = scipy.sparse.hstack([A, cols]) # Permute the rows and the columns. perm = np.argsort(indices) A.row = np.array(perm)[A.row] A.col = np.array(perm)[A.col] assert np.abs(A - A.T).mean() < 1e-8 # 1e-9 assert type(A) is scipy.sparse.coo.coo_matrix return A def perm_data(x, indices): """ Permute data matrix, i.e. exchange node ids, so that binary unions form the clustering tree. """ if indices is None: return x N, M = x.shape Mnew = len(indices) assert Mnew >= M xnew = np.empty((N, Mnew)) for i, j in enumerate(indices): # Existing vertex, i.e. real data. if j < M: xnew[:, i] = x[:, j] # Fake vertex because of singeltons. # They will stay 0 so that max pooling chooses the singelton. # Or -infty ? else: xnew[:, i] = np.zeros(N) return xnew ================================================ FILE: examples/pytorch/model_zoo/geometric/coordinate.py ================================================ import torch as th """Compute x,y coordinate for nodes in the graph""" eps = 1e-8 def get_coordinates(graphs, grid_side, coarsening_levels, perm): rst = [] for l in range(coarsening_levels + 1): xs, ys = [], [] for i in range(graphs[l].num_nodes()): cnt = eps x_accum = 0 y_accum = 0 for j in range(i * 2**l, (i + 1) * 2**l): if perm[j] < grid_side**2: x_accum += perm[j] // grid_side y_accum += perm[j] % grid_side cnt += 1 xs.append(x_accum / cnt) ys.append(y_accum / cnt) rst.append( th.cat([th.tensor(xs).view(-1, 1), th.tensor(ys).view(-1, 1)], -1) ) return rst """Cartesian coordinate to polar coordinate""" def z2polar(edges): z = edges.dst["xy"] - edges.src["xy"] rho = th.norm(z, dim=-1, p=2) x, y = z.unbind(dim=-1) phi = th.atan2(y, x) return {"u": th.cat([rho.unsqueeze(-1), phi.unsqueeze(-1)], -1)} ================================================ FILE: examples/pytorch/model_zoo/geometric/grid_graph.py ================================================ # author: xbresson # code link: https://github.com/xbresson/CE7454_2019/blob/master/codes/labs_lecture14/lab01_ChebGCNs/lib/grid_graph.py import numpy as np import scipy.sparse # scipy.spatial.distance import scipy.sparse.linalg import sklearn import sklearn.metrics def grid_graph(grid_side, number_edges, metric): """Generate graph of a grid""" z = grid(grid_side) dist, idx = distance_sklearn_metrics(z, k=number_edges, metric=metric) A = adjacency(dist, idx) print("nb edges: ", A.nnz) return A def grid(m, dtype=np.float32): """Return coordinates of grid points""" M = m**2 x = np.linspace(0, 1, m, dtype=dtype) y = np.linspace(0, 1, m, dtype=dtype) xx, yy = np.meshgrid(x, y) z = np.empty((M, 2), dtype) z[:, 0] = xx.reshape(M) z[:, 1] = yy.reshape(M) return z def distance_sklearn_metrics(z, k=4, metric="euclidean"): """Compute pairwise distances""" # d = sklearn.metrics.pairwise.pairwise_distances(z, metric=metric, n_jobs=-2) d = sklearn.metrics.pairwise.pairwise_distances(z, metric=metric, n_jobs=1) # k-NN idx = np.argsort(d)[:, 1 : k + 1] d.sort() d = d[:, 1 : k + 1] return d, idx def adjacency(dist, idx): """Return adjacency matrix of a kNN graph""" M, k = dist.shape assert M, k == idx.shape assert dist.min() >= 0 assert dist.max() <= 1 # Pairwise distances sigma2 = np.mean(dist[:, -1]) ** 2 dist = np.exp(-(dist**2) / sigma2) # Weight matrix I = np.arange(0, M).repeat(k) J = idx.reshape(M * k) V = dist.reshape(M * k) W = scipy.sparse.coo_matrix((V, (I, J)), shape=(M, M)) # No self-connections W.setdiag(0) # Undirected graph bigger = W.T > W W = W - W.multiply(bigger) + W.T.multiply(bigger) assert W.nnz % 2 == 0 assert np.abs(W - W.T).mean() < 1e-10 assert type(W) is scipy.sparse.csr.csr_matrix return W ================================================ FILE: examples/pytorch/model_zoo/geometric/mnist.py ================================================ import argparse import time import dgl import networkx as nx import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from coarsening import coarsen from coordinate import get_coordinates, z2polar from dgl.data import load_data, register_data_args from dgl.nn.pytorch.conv import ChebConv, GMMConv from dgl.nn.pytorch.glob import MaxPooling from grid_graph import grid_graph from torch.utils.data import DataLoader from torchvision import datasets, transforms argparser = argparse.ArgumentParser("MNIST") argparser.add_argument( "--gpu", type=int, default=-1, help="gpu id, use cpu if set to -1" ) argparser.add_argument( "--model", type=str, default="chebnet", help="model to use, chebnet/monet" ) argparser.add_argument("--batch-size", type=int, default=100, help="batch size") args = argparser.parse_args() grid_side = 28 number_edges = 8 metric = "euclidean" A = grid_graph(28, 8, metric) coarsening_levels = 4 L, perm = coarsen(A, coarsening_levels) g_arr = [dgl.from_scipy(csr) for csr in L] coordinate_arr = get_coordinates(g_arr, grid_side, coarsening_levels, perm) str_to_torch_dtype = { "float16": torch.half, "float32": torch.float32, "float64": torch.float64, } coordinate_arr = [ coord.to(dtype=str_to_torch_dtype[str(A.dtype)]) for coord in coordinate_arr ] for g, coordinate_arr in zip(g_arr, coordinate_arr): g.ndata["xy"] = coordinate_arr g.apply_edges(z2polar) def batcher(batch): g_batch = [[] for _ in range(coarsening_levels + 1)] x_batch = [] y_batch = [] for x, y in batch: x = torch.cat([x.view(-1), x.new_zeros(len(perm) - 28**2)], 0) x = x[perm] x_batch.append(x) y_batch.append(y) for i in range(coarsening_levels + 1): g_batch[i].append(g_arr[i]) x_batch = torch.cat(x_batch).unsqueeze(-1) y_batch = torch.LongTensor(y_batch) g_batch = [dgl.batch(g) for g in g_batch] return g_batch, x_batch, y_batch trainset = datasets.MNIST( root=".", train=True, download=True, transform=transforms.ToTensor() ) testset = datasets.MNIST( root=".", train=False, download=True, transform=transforms.ToTensor() ) train_loader = DataLoader( trainset, batch_size=args.batch_size, shuffle=True, collate_fn=batcher, num_workers=6, ) test_loader = DataLoader( testset, batch_size=args.batch_size, shuffle=False, collate_fn=batcher, num_workers=6, ) class MoNet(nn.Module): def __init__(self, n_kernels, in_feats, hiddens, out_feats): super(MoNet, self).__init__() self.pool = nn.MaxPool1d(2) self.layers = nn.ModuleList() self.readout = MaxPooling() # Input layer self.layers.append(GMMConv(in_feats, hiddens[0], 2, n_kernels)) # Hidden layer for i in range(1, len(hiddens)): self.layers.append( GMMConv(hiddens[i - 1], hiddens[i], 2, n_kernels) ) self.cls = nn.Sequential( nn.Linear(hiddens[-1], out_feats), nn.LogSoftmax(dim=1) ) def forward(self, g_arr, feat): for g, layer in zip(g_arr, self.layers): u = g.edata["u"] feat = ( self.pool(layer(g, feat, u).transpose(-1, -2).unsqueeze(0)) .squeeze(0) .transpose(-1, -2) ) return self.cls(self.readout(g_arr[-1], feat)) class ChebNet(nn.Module): def __init__(self, k, in_feats, hiddens, out_feats): super(ChebNet, self).__init__() self.pool = nn.MaxPool1d(2) self.layers = nn.ModuleList() self.readout = MaxPooling() # Input layer self.layers.append(ChebConv(in_feats, hiddens[0], k)) for i in range(1, len(hiddens)): self.layers.append(ChebConv(hiddens[i - 1], hiddens[i], k)) self.cls = nn.Sequential( nn.Linear(hiddens[-1], out_feats), nn.LogSoftmax(dim=1) ) def forward(self, g_arr, feat): for g, layer in zip(g_arr, self.layers): feat = ( self.pool( layer(g, feat, [2] * g.batch_size) .transpose(-1, -2) .unsqueeze(0) ) .squeeze(0) .transpose(-1, -2) ) return self.cls(self.readout(g_arr[-1], feat)) if args.gpu == -1: device = torch.device("cpu") else: device = torch.device(args.gpu) if args.model == "chebnet": model = ChebNet(2, 1, [32, 64, 128, 256], 10) else: model = MoNet(10, 1, [32, 64, 128, 256], 10) model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) log_interval = 50 for epoch in range(10): print("epoch {} starts".format(epoch)) model.train() hit, tot = 0, 0 loss_accum = 0 for i, (g, x, y) in enumerate(train_loader): x = x.to(device) y = y.to(device) g = [g_i.to(device) for g_i in g] out = model(g, x) hit += (out.max(-1)[1] == y).sum().item() tot += len(y) loss = F.nll_loss(out, y) loss_accum += loss.item() if (i + 1) % log_interval == 0: print( "loss: {}, acc: {}".format(loss_accum / log_interval, hit / tot) ) hit, tot = 0, 0 loss_accum = 0 optimizer.zero_grad() loss.backward() optimizer.step() model.eval() hit, tot = 0, 0 for g, x, y in test_loader: x = x.to(device) y = y.to(device) g = [g_i.to(device) for g_i in g] out = model(g, x) hit += (out.max(-1)[1] == y).sum().item() tot += len(y) print("test acc: ", hit / tot) ================================================ FILE: examples/pytorch/monet/README.md ================================================ MoNet ===== - paper link: [Geometric deep learning on graphs and manifolds using mixture model CNNs](https://arxiv.org/pdf/1611.08402.pdf) Dependencies ============ - pytorch 1.1+ Results ======= ## Citation networks Run with following (available dataset: "cora", "citeseer", "pubmed") ```bash python3 citation.py --dataset cora --gpu 0 ``` - Cora: ~0.816 - Pubmed: ~0.763 ## Image classification: - please refer to [model_zoo/geometric](../model_zoo/geometric). ================================================ FILE: examples/pytorch/monet/citation.py ================================================ import argparse import time import networkx as nx import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from dgl import DGLGraph from dgl.data import load_data, register_data_args from dgl.nn.pytorch.conv import GMMConv class MoNet(nn.Module): def __init__( self, g, in_feats, n_hidden, out_feats, n_layers, dim, n_kernels, dropout, ): super(MoNet, self).__init__() self.g = g self.layers = nn.ModuleList() self.pseudo_proj = nn.ModuleList() # Input layer self.layers.append(GMMConv(in_feats, n_hidden, dim, n_kernels)) self.pseudo_proj.append(nn.Sequential(nn.Linear(2, dim), nn.Tanh())) # Hidden layer for _ in range(n_layers - 1): self.layers.append(GMMConv(n_hidden, n_hidden, dim, n_kernels)) self.pseudo_proj.append(nn.Sequential(nn.Linear(2, dim), nn.Tanh())) # Output layer self.layers.append(GMMConv(n_hidden, out_feats, dim, n_kernels)) self.pseudo_proj.append(nn.Sequential(nn.Linear(2, dim), nn.Tanh())) self.dropout = nn.Dropout(dropout) def forward(self, feat, pseudo): h = feat for i in range(len(self.layers)): if i != 0: h = self.dropout(h) h = self.layers[i](self.g, h, self.pseudo_proj[i](pseudo)) return h def evaluate(model, features, pseudo, labels, mask): model.eval() with torch.no_grad(): logits = model(features, pseudo) logits = logits[mask] labels = labels[mask] _, indices = torch.max(logits, dim=1) correct = torch.sum(indices == labels) return correct.item() * 1.0 / len(labels) def main(args): # load and preprocess dataset data = load_data(args) g = data[0] if args.gpu < 0: cuda = False else: cuda = True g = g.to(args.gpu) features = g.ndata["feat"] labels = g.ndata["label"] train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] in_feats = features.shape[1] n_classes = data.num_classes n_edges = g.num_edges() print( """----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % ( n_edges, n_classes, train_mask.sum().item(), val_mask.sum().item(), test_mask.sum().item(), ) ) # graph preprocess and calculate normalization factor g = g.remove_self_loop().add_self_loop() n_edges = g.num_edges() us, vs = g.edges(order="eid") udeg, vdeg = 1 / torch.sqrt(g.in_degrees(us).float()), 1 / torch.sqrt( g.in_degrees(vs).float() ) pseudo = torch.cat([udeg.unsqueeze(1), vdeg.unsqueeze(1)], dim=1) # create GraphSAGE model model = MoNet( g, in_feats, args.n_hidden, n_classes, args.n_layers, args.pseudo_dim, args.n_kernels, args.dropout, ) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.weight_decay ) # initialize graph mean = 0 for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features, pseudo) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: mean = (mean * (epoch - 3) + (time.time() - t0)) / (epoch - 2) acc = evaluate(model, features, pseudo, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format( epoch, mean, loss.item(), acc, n_edges / mean / 1000, ) ) print() acc = evaluate(model, features, pseudo, labels, test_mask) print("Test Accuracy {:.4f}".format(acc)) if __name__ == "__main__": parser = argparse.ArgumentParser(description="MoNet on citation network") register_data_args(parser) parser.add_argument( "--dropout", type=float, default=0.5, help="dropout probability" ) parser.add_argument("--gpu", type=int, default=-1, help="gpu") parser.add_argument("--lr", type=float, default=1e-2, help="learning rate") parser.add_argument( "--n-epochs", type=int, default=200, help="number of training epochs" ) parser.add_argument( "--n-hidden", type=int, default=16, help="number of hidden gcn units" ) parser.add_argument( "--n-layers", type=int, default=1, help="number of hidden gcn layers" ) parser.add_argument( "--pseudo-dim", type=int, default=2, help="Pseudo coordinate dimensions in GMMConv, 2 for cora and 3 for pubmed", ) parser.add_argument( "--n-kernels", type=int, default=3, help="Number of kernels in GMMConv layer", ) parser.add_argument( "--weight-decay", type=float, default=5e-4, help="Weight for L2 loss" ) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/pytorch/multigpu/README.md ================================================ Multiple GPU Training ============ Requirements ------------ ```bash pip install torchmetrics==0.11.4 ``` How to run ------- ### Graph property prediction Run with following (available dataset: "ogbg-molhiv", "ogbg-molpcba") ```bash python3 multi_gpu_graph_prediction.py --dataset ogbg-molhiv ``` #### __Results__ ``` * ogbg-molhiv: ~0.7965 * ogbg-molpcba: ~0.2239 ``` #### __Scalability__ We test scalability of the code with dataset "ogbg-molhiv" in a machine of type Amazon EC2 g4dn.metal , which has **8 Nvidia T4 Tensor Core GPUs**. |GPU number |Speed Up |Batch size |Test accuracy |Average epoch Time| | --- | ----------- | ----------- | -----------|-----------| | 1 | x | 32 | 0.7765| 45.0s| | 2 | 3.7x |64 | 0.7761|12.1s| | 4 | 5.9x| 128 | 0.7854|7.6s| | 8 | 9.5x| 256 | 0.7751|4.7s| ### Node classification Run with following on dataset "ogbn-products" ```bash python3 multi_gpu_node_classification.py ``` #### __Results__ ``` Test Accuracy: ~0.7632 ``` ### Link prediction Run with following (available dataset: "ogbn-products", "reddit") ```bash python3 multi_gpu_link_prediction.py --dataset ogbn-products ``` #### __Results__ ``` Eval F1-score: ~0.7999 Test F1-score: ~0.6383 ``` Notably, * The loss function is defined by predicting whether an edge exists between two nodes or not. * When computing the score of `(u, v)`, the connections between node `u` and `v` are removed from neighbor sampling. * The performance of the learned embeddings are measured by training a softmax regression with scikit-learn. ================================================ FILE: examples/pytorch/multigpu/multi_gpu_graph_prediction.py ================================================ import argparse import dgl import dgl.nn as dglnn import torch import torch.distributed as dist import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from dgl.data import AsGraphPredDataset from dgl.dataloading import GraphDataLoader from ogb.graphproppred import DglGraphPropPredDataset, Evaluator from ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder from tqdm import tqdm class MLP(nn.Module): def __init__(self, in_feats): super().__init__() self.mlp = nn.Sequential( nn.Linear(in_feats, 2 * in_feats), nn.BatchNorm1d(2 * in_feats), nn.ReLU(), nn.Linear(2 * in_feats, in_feats), nn.BatchNorm1d(in_feats), ) def forward(self, h): return self.mlp(h) class GIN(nn.Module): def __init__(self, n_hidden, n_output, n_layers=5): super().__init__() self.node_encoder = AtomEncoder(n_hidden) self.edge_encoders = nn.ModuleList( [BondEncoder(n_hidden) for _ in range(n_layers)] ) self.pool = dglnn.AvgPooling() self.dropout = nn.Dropout(0.5) self.layers = nn.ModuleList() for _ in range(n_layers): self.layers.append(dglnn.GINEConv(MLP(n_hidden), learn_eps=True)) self.predictor = nn.Linear(n_hidden, n_output) # add virtual node self.virtual_emb = nn.Embedding(1, n_hidden) nn.init.constant_(self.virtual_emb.weight.data, 0) self.virtual_layers = nn.ModuleList() for _ in range(n_layers - 1): self.virtual_layers.append(MLP(n_hidden)) self.virtual_pool = dglnn.SumPooling() def forward(self, g, x, x_e): v_emb = self.virtual_emb.weight.expand(g.batch_size, -1) hn = self.node_encoder(x) for i in range(len(self.layers)): v_hn = dgl.broadcast_nodes(g, v_emb) hn = hn + v_hn he = self.edge_encoders[i](x_e) hn = self.layers[i](g, hn, he) hn = F.relu(hn) hn = self.dropout(hn) if i != len(self.layers) - 1: v_emb_tmp = self.virtual_pool(g, hn) + v_emb v_emb = self.virtual_layers[i](v_emb_tmp) v_emb = self.dropout(F.relu(v_emb)) hn = self.pool(g, hn) return self.predictor(hn) @torch.no_grad() def evaluate(dataloader, device, model, evaluator): model.eval() y_true = [] y_pred = [] for batched_graph, labels in tqdm(dataloader): batched_graph, labels = batched_graph.to(device), labels.to(device) node_feat, edge_feat = ( batched_graph.ndata["feat"], batched_graph.edata["feat"], ) y_hat = model(batched_graph, node_feat, edge_feat) y_true.append(labels.view(y_hat.shape).detach().cpu()) y_pred.append(y_hat.detach().cpu()) y_true = torch.cat(y_true, dim=0).numpy() y_pred = torch.cat(y_pred, dim=0).numpy() input_dict = {"y_true": y_true, "y_pred": y_pred} return evaluator.eval(input_dict) def train(rank, world_size, dataset_name, root): dist.init_process_group( "nccl", "tcp://127.0.0.1:12347", world_size=world_size, rank=rank ) torch.cuda.set_device(rank) dataset = AsGraphPredDataset(DglGraphPropPredDataset(dataset_name, root)) evaluator = Evaluator(dataset_name) model = GIN(300, dataset.num_tasks).to(rank) model = nn.parallel.DistributedDataParallel(model, device_ids=[rank]) optimizer = optim.Adam(model.parameters(), lr=0.001) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5) train_dataloader = GraphDataLoader( dataset[dataset.train_idx], batch_size=256, use_ddp=True, shuffle=True ) valid_dataloader = GraphDataLoader(dataset[dataset.val_idx], batch_size=256) test_dataloader = GraphDataLoader(dataset[dataset.test_idx], batch_size=256) for epoch in range(50): model.train() train_dataloader.set_epoch(epoch) for batched_graph, labels in train_dataloader: batched_graph, labels = batched_graph.to(rank), labels.to(rank) node_feat, edge_feat = ( batched_graph.ndata["feat"], batched_graph.edata["feat"], ) logits = model(batched_graph, node_feat, edge_feat) optimizer.zero_grad() is_labeled = labels == labels loss = F.binary_cross_entropy_with_logits( logits.float()[is_labeled], labels.float()[is_labeled] ) loss.backward() optimizer.step() scheduler.step() if rank == 0: val_metric = evaluate( valid_dataloader, rank, model.module, evaluator )[evaluator.eval_metric] test_metric = evaluate( test_dataloader, rank, model.module, evaluator )[evaluator.eval_metric] print( f"Epoch: {epoch:03d}, Loss: {loss:.4f}, " f"Val: {val_metric:.4f}, Test: {test_metric:.4f}" ) dist.destroy_process_group() if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--dataset", type=str, default="ogbg-molhiv", choices=["ogbg-molhiv", "ogbg-molpcba"], help="name of dataset (default: ogbg-molhiv)", ) dataset_name = parser.parse_args().dataset root = "./data/OGB" DglGraphPropPredDataset(dataset_name, root) world_size = torch.cuda.device_count() print("Let's use", world_size, "GPUs!") args = (world_size, dataset_name, root) import torch.multiprocessing as mp mp.spawn(train, args=args, nprocs=world_size, join=True) ================================================ FILE: examples/pytorch/multigpu/multi_gpu_link_prediction.py ================================================ import argparse import os import time import dgl.function as fn import dgl.nn as dglnn import numpy as np import sklearn.linear_model as lm import sklearn.metrics as skm import torch import torch.distributed as dist import torch.multiprocessing as mp import torch.nn as nn import torch.nn.functional as F import tqdm from dgl.data import AsNodePredDataset, RedditDataset from dgl.dataloading import ( as_edge_prediction_sampler, DataLoader, MultiLayerFullNeighborSampler, NeighborSampler, ) from dgl.multiprocessing import shared_tensor from ogb.nodeproppred import DglNodePropPredDataset from torch.nn.parallel import DistributedDataParallel class SAGE(nn.Module): def __init__(self, in_size, hid_size, out_size): super().__init__() self.layers = nn.ModuleList() # two-layer GraphSAGE-mean self.layers.append(dglnn.SAGEConv(in_size, hid_size, "mean")) self.layers.append(dglnn.SAGEConv(hid_size, out_size, "mean")) self.dropout = nn.Dropout(0.5) self.hid_size = hid_size self.out_size = out_size def forward(self, blocks, x): h = x for l, (layer, block) in enumerate(zip(self.layers, blocks)): h = layer(block, h) if l != len(self.layers) - 1: h = F.relu(h) h = self.dropout(h) return h def inference(self, g, device, batch_size, use_uva): g.ndata["h"] = g.ndata["feat"] sampler = MultiLayerFullNeighborSampler(1, prefetch_node_feats=["h"]) for l, layer in enumerate(self.layers): dataloader = DataLoader( g, torch.arange(g.num_nodes(), device=device), sampler, device=device, batch_size=batch_size, shuffle=False, drop_last=False, num_workers=0, use_ddp=True, use_uva=use_uva, ) # in order to prevent running out of GPU memory, allocate a # shared output tensor 'y' in host memory y = shared_tensor( ( g.num_nodes(), self.hid_size if l != len(self.layers) - 1 else self.out_size, ) ) for input_nodes, output_nodes, blocks in ( tqdm.tqdm(dataloader) if dist.get_rank() == 0 else dataloader ): x = blocks[0].srcdata["h"] h = layer(blocks[0], x) # len(blocks) = 1 if l != len(self.layers) - 1: h = F.relu(h) h = self.dropout(h) # non_blocking (with pinned memory) to accelerate data transfer y[output_nodes] = h.to(y.device, non_blocking=True) # make sure all GPUs are done writing to 'y' dist.barrier() g.ndata["h"] = y if use_uva else y.to(device) g.ndata.pop("h") return y class NegativeSampler(object): def __init__(self, g, k, neg_share=False, device=None): if device is None: device = g.device self.weights = g.in_degrees().float().to(device) ** 0.75 self.k = k self.neg_share = neg_share def __call__(self, g, eids): src, _ = g.find_edges(eids) n = len(src) if self.neg_share and n % self.k == 0: dst = self.weights.multinomial(n, replacement=True) dst = dst.view(-1, 1, self.k).expand(-1, self.k, -1).flatten() else: dst = self.weights.multinomial(n * self.k, replacement=True) src = src.repeat_interleave(self.k) return src, dst class CrossEntropyLoss(nn.Module): def forward(self, block_outputs, pos_graph, neg_graph): with pos_graph.local_scope(): pos_graph.ndata["h"] = block_outputs pos_graph.apply_edges(fn.u_dot_v("h", "h", "score")) pos_score = pos_graph.edata["score"] with neg_graph.local_scope(): neg_graph.ndata["h"] = block_outputs neg_graph.apply_edges(fn.u_dot_v("h", "h", "score")) neg_score = neg_graph.edata["score"] score = torch.cat([pos_score, neg_score]) label = torch.cat( [torch.ones_like(pos_score), torch.zeros_like(neg_score)] ).long() loss = F.binary_cross_entropy_with_logits(score, label.float()) return loss def compute_acc_unsupervised(emb, labels, train_nids, val_nids, test_nids): """ Compute the accuracy of prediction given the labels. """ emb = emb.cpu().numpy() labels = labels.cpu().numpy() train_nids = train_nids.cpu().numpy() train_labels = labels[train_nids] val_nids = val_nids.cpu().numpy() val_labels = labels[val_nids] test_nids = test_nids.cpu().numpy() test_labels = labels[test_nids] emb = (emb - emb.mean(0, keepdims=True)) / emb.std(0, keepdims=True) lr = lm.LogisticRegression(multi_class="multinomial", max_iter=10000) lr.fit(emb[train_nids], train_labels) pred = lr.predict(emb) f1_micro_eval = skm.f1_score(val_labels, pred[val_nids], average="micro") f1_micro_test = skm.f1_score(test_labels, pred[test_nids], average="micro") return f1_micro_eval, f1_micro_test def evaluate(proc_id, model, g, device, use_uva): model.eval() batch_size = 10000 with torch.no_grad(): pred = model.module.inference(g, device, batch_size, use_uva) return pred def train( proc_id, nprocs, device, g, train_idx, val_idx, test_idx, model, use_uva ): # Create PyTorch DataLoader for constructing blocks n_edges = g.num_edges() train_seeds = torch.arange(n_edges).to(device) labels = g.ndata["label"].to("cpu") sampler = NeighborSampler([10, 25], prefetch_node_feats=["feat"]) sampler = as_edge_prediction_sampler( sampler, exclude="reverse_id", # For each edge with ID e in Reddit dataset, the reverse edge is e ± |E|/2. reverse_eids=torch.cat( [torch.arange(n_edges // 2, n_edges), torch.arange(0, n_edges // 2)] ).to(train_seeds), # num_negs = 1, neg_share = False negative_sampler=NegativeSampler( g, 1, False, device if use_uva else None ), ) train_dataloader = DataLoader( g, train_seeds, sampler, device=device, batch_size=10000, shuffle=True, drop_last=False, num_workers=0, use_ddp=True, use_uva=use_uva, ) opt = torch.optim.Adam(model.parameters(), lr=0.003) loss_fcn = CrossEntropyLoss() iter_pos = [] iter_neg = [] for epoch in range(10): tic = time.time() model.train() for step, (input_nodes, pos_graph, neg_graph, blocks) in enumerate( train_dataloader ): x = blocks[0].srcdata["feat"] y_hat = model(blocks, x) loss = loss_fcn(y_hat, pos_graph, neg_graph) opt.zero_grad() loss.backward() opt.step() if step % 20 == 0 and proc_id == 0: # log every 20 steps # gpu memory reserved by PyTorch gpu_mem_alloc = ( torch.cuda.max_memory_allocated() / 1000000 if torch.cuda.is_available() else 0 ) print( f"Epoch {epoch:05d} | Step {step:05d} | Loss {loss.item():.4f} | GPU {gpu_mem_alloc:.1f} MB" ) t = time.time() - tic if proc_id == 0: print(f"Epoch Time(s): {t:.4f}") if (epoch + 1) % 5 == 0: # eval every 5 epochs pred = evaluate(proc_id, model, g, device, use_uva) # in parallel if proc_id == 0: # only master proc does the accuracy computation eval_acc, test_acc = compute_acc_unsupervised( pred, labels, train_idx, val_idx, test_idx ) print( f"Epoch {epoch:05d} | Eval F1-score {eval_acc:.4f} | Test F1-Score {test_acc:.4f}" ) def run(proc_id, nprocs, devices, g, data, mode): # find corresponding device for my rank device = devices[proc_id] torch.cuda.set_device(device) # initialize process group and unpack data for sub-processes dist.init_process_group( backend="nccl", init_method="tcp://127.0.0.1:12345", world_size=nprocs, rank=proc_id, ) out_size, train_idx, val_idx, test_idx = data g = g.to(device if mode == "puregpu" else "cpu") # create GraphSAGE model (distributed) in_size = g.ndata["feat"].shape[1] model = SAGE(in_size, 16, 16).to(device) model = DistributedDataParallel( model, device_ids=[device], output_device=device ) # training + testing use_uva = mode == "mixed" train( proc_id, nprocs, device, g, train_idx, val_idx, test_idx, model, use_uva ) # cleanup process group dist.destroy_process_group() if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--dataset", type=str, default="ogbn-products", choices=["ogbn-products", "reddit"], help="name of dataset (default: ogbn-products)", ) parser.add_argument( "--mode", default="mixed", choices=["mixed", "puregpu"], help="Training mode. 'mixed' for CPU-GPU mixed training, " "'puregpu' for pure-GPU training.", ) parser.add_argument( "--gpu", type=str, default="0", help="GPU(s) in use. Can be a list of gpu ids for multi-gpu training," " e.g., 0,1,2,3.", ) args = parser.parse_args() devices = list(map(int, args.gpu.split(","))) nprocs = len(devices) assert ( torch.cuda.is_available() ), f"Must have GPUs to enable multi-gpu training." print(f"Training in {args.mode} mode using {nprocs} GPU(s)") # load and preprocess dataset print("Loading data") if args.dataset == "ogbn-products": # can it be AsLinkPredDataset? dataset = AsNodePredDataset(DglNodePropPredDataset("ogbn-products")) elif args.dataset == "reddit": dataset = AsNodePredDataset(RedditDataset(self_loop=False)) g = dataset[0] # avoid creating certain graph formats in each sub-process to save momory g.create_formats_() # thread limiting to avoid resource competition os.environ["OMP_NUM_THREADS"] = str(mp.cpu_count() // 2 // nprocs) data = ( dataset.num_classes, dataset.train_idx, dataset.val_idx, dataset.test_idx, ) mp.spawn(run, args=(nprocs, devices, g, data, args.mode), nprocs=nprocs) ================================================ FILE: examples/pytorch/multigpu/multi_gpu_node_classification.py ================================================ import argparse import os import dgl.nn as dglnn import torch import torch.distributed as dist import torch.multiprocessing as mp import torch.nn as nn import torch.nn.functional as F import torchmetrics.functional as MF import tqdm from dgl.data import AsNodePredDataset from dgl.dataloading import ( DataLoader, MultiLayerFullNeighborSampler, NeighborSampler, ) from dgl.multiprocessing import shared_tensor from ogb.nodeproppred import DglNodePropPredDataset from torch.nn.parallel import DistributedDataParallel class SAGE(nn.Module): def __init__(self, in_size, hid_size, out_size): super().__init__() self.layers = nn.ModuleList() # three-layer GraphSAGE-mean self.layers.append(dglnn.SAGEConv(in_size, hid_size, "mean")) self.layers.append(dglnn.SAGEConv(hid_size, hid_size, "mean")) self.layers.append(dglnn.SAGEConv(hid_size, out_size, "mean")) self.dropout = nn.Dropout(0.5) self.hid_size = hid_size self.out_size = out_size def forward(self, blocks, x): h = x for l, (layer, block) in enumerate(zip(self.layers, blocks)): h = layer(block, h) if l != len(self.layers) - 1: h = F.relu(h) h = self.dropout(h) return h def inference(self, g, device, batch_size, use_uva): g.ndata["h"] = g.ndata["feat"] sampler = MultiLayerFullNeighborSampler(1, prefetch_node_feats=["h"]) for l, layer in enumerate(self.layers): dataloader = DataLoader( g, torch.arange(g.num_nodes(), device=device), sampler, device=device, batch_size=batch_size, shuffle=False, drop_last=False, num_workers=0, use_ddp=True, use_uva=use_uva, ) # in order to prevent running out of GPU memory, allocate a # shared output tensor 'y' in host memory y = shared_tensor( ( g.num_nodes(), self.hid_size if l != len(self.layers) - 1 else self.out_size, ) ) for input_nodes, output_nodes, blocks in ( tqdm.tqdm(dataloader) if dist.get_rank() == 0 else dataloader ): x = blocks[0].srcdata["h"] h = layer(blocks[0], x) # len(blocks) = 1 if l != len(self.layers) - 1: h = F.relu(h) h = self.dropout(h) # non_blocking (with pinned memory) to accelerate data transfer y[output_nodes] = h.to(y.device, non_blocking=True) # make sure all GPUs are done writing to 'y' dist.barrier() g.ndata["h"] = y if use_uva else y.to(device) g.ndata.pop("h") return y def evaluate(model, g, num_classes, dataloader): model.eval() ys = [] y_hats = [] for it, (input_nodes, output_nodes, blocks) in enumerate(dataloader): with torch.no_grad(): x = blocks[0].srcdata["feat"] ys.append(blocks[-1].dstdata["label"]) y_hats.append(model(blocks, x)) return MF.accuracy( torch.cat(y_hats), torch.cat(ys), task="multiclass", num_classes=num_classes, ) def layerwise_infer( proc_id, device, g, num_classes, nid, model, use_uva, batch_size=2**16 ): model.eval() with torch.no_grad(): pred = model.module.inference(g, device, batch_size, use_uva) pred = pred[nid] labels = g.ndata["label"][nid].to(pred.device) if proc_id == 0: acc = MF.accuracy( pred, labels, task="multiclass", num_classes=num_classes ) print("Test Accuracy {:.4f}".format(acc.item())) def train( proc_id, nprocs, device, g, num_classes, train_idx, val_idx, model, use_uva ): sampler = NeighborSampler( [10, 10, 10], prefetch_node_feats=["feat"], prefetch_labels=["label"] ) train_dataloader = DataLoader( g, train_idx, sampler, device=device, batch_size=1024, shuffle=True, drop_last=False, num_workers=0, use_ddp=True, use_uva=use_uva, ) val_dataloader = DataLoader( g, val_idx, sampler, device=device, batch_size=1024, shuffle=True, drop_last=False, num_workers=0, use_ddp=True, use_uva=use_uva, ) opt = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4) for epoch in range(10): model.train() total_loss = 0 for it, (input_nodes, output_nodes, blocks) in enumerate( train_dataloader ): x = blocks[0].srcdata["feat"] y = blocks[-1].dstdata["label"] y_hat = model(blocks, x) loss = F.cross_entropy(y_hat, y) opt.zero_grad() loss.backward() opt.step() total_loss += loss acc = ( evaluate(model, g, num_classes, val_dataloader).to(device) / nprocs ) dist.reduce(acc, 0) if proc_id == 0: print( "Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} ".format( epoch, total_loss / (it + 1), acc.item() ) ) def run(proc_id, nprocs, devices, g, data, mode): # find corresponding device for my rank device = devices[proc_id] torch.cuda.set_device(device) # initialize process group and unpack data for sub-processes dist.init_process_group( backend="nccl", init_method="tcp://127.0.0.1:12345", world_size=nprocs, rank=proc_id, ) num_classes, train_idx, val_idx, test_idx = data train_idx = train_idx.to(device) val_idx = val_idx.to(device) g = g.to(device if mode == "puregpu" else "cpu") # create GraphSAGE model (distributed) in_size = g.ndata["feat"].shape[1] model = SAGE(in_size, 256, num_classes).to(device) model = DistributedDataParallel( model, device_ids=[device], output_device=device ) # training + testing use_uva = mode == "mixed" train( proc_id, nprocs, device, g, num_classes, train_idx, val_idx, model, use_uva, ) layerwise_infer(proc_id, device, g, num_classes, test_idx, model, use_uva) # cleanup process group dist.destroy_process_group() if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--mode", default="mixed", choices=["mixed", "puregpu"], help="Training mode. 'mixed' for CPU-GPU mixed training, " "'puregpu' for pure-GPU training.", ) parser.add_argument( "--gpu", type=str, default="0", help="GPU(s) in use. Can be a list of gpu ids for multi-gpu training," " e.g., 0,1,2,3.", ) args = parser.parse_args() devices = list(map(int, args.gpu.split(","))) nprocs = len(devices) assert ( torch.cuda.is_available() ), f"Must have GPUs to enable multi-gpu training." print(f"Training in {args.mode} mode using {nprocs} GPU(s)") # load and preprocess dataset print("Loading data") dataset = AsNodePredDataset(DglNodePropPredDataset("ogbn-products")) g = dataset[0] # avoid creating certain graph formats in each sub-process to save momory g.create_formats_() # thread limiting to avoid resource competition os.environ["OMP_NUM_THREADS"] = str(mp.cpu_count() // 2 // nprocs) data = ( dataset.num_classes, dataset.train_idx, dataset.val_idx, dataset.test_idx, ) mp.spawn(run, args=(nprocs, devices, g, data, args.mode), nprocs=nprocs) ================================================ FILE: examples/pytorch/mvgrl/README.md ================================================ # DGL Implementation of MVGRL This DGL example implements the model proposed in the paper [Contrastive Multi-View Representation Learning on Graphs](https://arxiv.org/abs/2006.05582). Author's code: https://github.com/kavehhassani/mvgrl ## Example Implementor This example was implemented by [Hengrui Zhang](https://github.com/hengruizhang98) when he was an applied scientist intern at AWS Shanghai AI Lab. ## Dependencies - Python 3.7 - PyTorch 1.7.1 - dgl 0.6.0 - networkx - scipy ## Datasets ##### Unsupervised Graph Classification Datasets: 'MUTAG', 'PTC_MR', 'REDDIT-BINARY', 'IMDB-BINARY', 'IMDB-MULTI'. | Dataset | MUTAG | PTC_MR | RDT-B | IMDB-B | IMDB-M | | --------------- | ----- | ------ | ------ | ------ | ------ | | # Graphs | 188 | 344 | 2000 | 1000 | 1500 | | # Classes | 2 | 2 | 2 | 2 | 3 | | Avg. Graph Size | 17.93 | 14.29 | 429.63 | 19.77 | 13.00 | * RDT-B, IMDB-B, IMDB-M are short for REDDIT-BINARY, IMDB-BINARY and IMDB-MULTI respectively. ##### Unsupervised Node Classification Datasets: 'Cora', 'Citeseer' and 'Pubmed' | Dataset | # Nodes | # Edges | # Classes | | -------- | ------- | ------- | --------- | | Cora | 2,708 | 10,556 | 7 | | Citeseer | 3,327 | 9,228 | 6 | | Pubmed | 19,717 | 88,651 | 3 | ## Arguments ##### Graph Classification: ``` --dataname str The graph dataset name. Default is 'MUTAG'. --gpu int GPU index. Default is -1, using cpu. --epochs int Number of training periods. Default is 200. --patience int Early stopping steps. Default is 20. --lr float Learning rate. Default is 0.001. --wd float Weight decay. Default is 0.0. --batch_size int Size of a training batch. Default is 64. --n_layers int Number of GNN layers. Default is 4. --hid_dim int Embedding dimension. Default is 32. ``` ##### Node Classification: ``` --dataname str The graph dataset name. Default is 'cora'. --gpu int GPU index. Default is -1, using cpu. --epochs int Number of training periods. Default is 500. --patience int Early stopping steps. Default is 20. --lr1 float Learning rate of main model. Default is 0.001. --lr2 float Learning rate of linear classifer. Default is 0.01. --wd1 float Weight decay of main model. Default is 0.0. --wd2 float Weight decay of linear classifier. Default is 0.0. --epsilon float Edge mask threshold. Default is 0.01. --hid_dim int Embedding dimension. Default is 512. --sample_size int Subgraph size. Default is 2000. ``` ## How to run examples ###### Graph Classification ```python # Enter the 'graph' directory cd graph # MUTAG: python main.py --dataname MUTAG --epochs 20 # PTC_MR: python main.py --dataname PTC_MR --epochs 32 --hid_dim 128 # REDDIT-BINARY python main.py --dataname REDDIT-BINARY --epochs 20 --hid_dim 128 # IMDB-BINARY python main.py --dataname IMDB-BINARY --epochs 20 --hid_dim 512 --n_layers 2 # IMDB-MULTI python main.py --dataname IMDB-MULTI --epochs 20 --hid_dim 512 --n_layers 2 ``` ###### Node Classification For semi-supervised node classification on 'Cora', 'Citeseer' and 'Pubmed', we provide two implementations: 1. full-graph training, see 'main.py', where we contrast the local and global representations of the whole graph. 2. subgraph training, see 'main_sample.py', where we contrast the local and global representations of a sampled subgraph with fixed number of nodes. For larger graphs(e.g. Pubmed), it would be hard to calculate the graph diffusion matrix(i.e., PPR matrix), so we try to approximate it with [APPNP](https://arxiv.org/abs/1810.05997), see function 'process_dataset_appnp' in 'node/dataset.py' for details. ```python # Enter the 'node' directory cd node # Cora with full graph python main.py --dataname cora --gpu 0 # Cora with sampled subgraphs python main_sample.py --dataname cora --gpu 0 # Citeseer with full graph python main.py --dataname citeseer --wd1 0.001 --wd2 0.01 --epochs 200 --gpu 0 # Citeseer with sampled subgraphs python main_sample.py --dataname citeseer --wd2 0.01 --gpu 0 # Pubmed with sampled subgraphs python main_sample.py --dataname pubmed --sample_size 4000 --epochs 400 --patience 999 --gpu 0 ``` ## Performance We use the same hyper-parameter settings as stated in the original paper. ##### Graph classification: | Dataset | MUTAG | PTC-MR | REDDIT-B | IMDB-B | IMDB-M | | :---------------: | :---: | :----: | :------: | :----: | :----: | | Accuracy Reported | 89.7 | 62.5 | 84.5 | 74.2 | 51.2 | | DGL | 89.4 | 62.2 | 85.0 | 73.8 | 51.1 | * The datasets that the authors used are slightly different from standard TUDataset (see dgl.data.GINDataset) in the nodes' features(e.g. The node features of 'MUTAG' dataset are of dimensionality 11 rather than 7") ##### Node classification: | Dataset | Cora | Citeseer | Pubmed | | :---------------: | :--: | :------: | :----: | | Accuracy Reported | 86.8 | 73.3 | 80.1 | | DGL-sample | 83.2 | 72.6 | 79.8 | | DGL-full | 83.5 | 73.7 | OOM | * We fail to reproduce the reported accuracy on 'Cora', even with the authors' code. * The accuracy reported by the original paper is based on fixed-sized subgraph-training. ================================================ FILE: examples/pytorch/mvgrl/graph/dataset.py ================================================ """ Code adapted from https://github.com/kavehhassani/mvgrl """ import os import re from collections import Counter import dgl import networkx as nx import numpy as np import torch as th from dgl.data import DGLDataset from scipy.linalg import fractional_matrix_power, inv """ Compute Personalized Page Ranking""" def compute_ppr(graph: nx.Graph, alpha=0.2, self_loop=True): a = nx.convert_matrix.to_numpy_array(graph) if self_loop: a = a + np.eye(a.shape[0]) # A^ = A + I_n d = np.diag(np.sum(a, 1)) # D^ = Sigma A^_ii dinv = fractional_matrix_power(d, -0.5) # D^(-1/2) at = np.matmul(np.matmul(dinv, a), dinv) # A~ = D^(-1/2) x A^ x D^(-1/2) return alpha * inv( (np.eye(a.shape[0]) - (1 - alpha) * at) ) # a(I_n-(1-a)A~)^-1 def download(dataset, datadir): os.makedirs(datadir) url = "https://ls11-www.cs.tu-dortmund.de/people/morris/graphkerneldatasets/{0}.zip".format( dataset ) zipfile = os.path.basename(url) os.system("wget {0}; unzip {1}".format(url, zipfile)) os.system("mv {0}/* {1}".format(dataset, datadir)) os.system("rm -r {0}".format(dataset)) os.system("rm {0}".format(zipfile)) def process(dataset): src = os.path.join(os.path.dirname(__file__), "data") prefix = os.path.join(src, dataset, dataset) # assign each node to the corresponding graph graph_node_dict = {} with open("{0}_graph_indicator.txt".format(prefix), "r") as f: for idx, line in enumerate(f): graph_node_dict[idx + 1] = int(line.strip("\n")) node_labels = [] if os.path.exists("{0}_node_labels.txt".format(prefix)): with open("{0}_node_labels.txt".format(prefix), "r") as f: for line in f: node_labels += [int(line.strip("\n")) - 1] num_unique_node_labels = max(node_labels) + 1 else: print("No node labels") node_attrs = [] if os.path.exists("{0}_node_attributes.txt".format(prefix)): with open("{0}_node_attributes.txt".format(prefix), "r") as f: for line in f: node_attrs.append( np.array( [ float(attr) for attr in re.split("[,\s]+", line.strip("\s\n")) if attr ], dtype=float, ) ) else: print("No node attributes") graph_labels = [] unique_labels = set() with open("{0}_graph_labels.txt".format(prefix), "r") as f: for line in f: val = int(line.strip("\n")) if val not in unique_labels: unique_labels.add(val) graph_labels.append(val) label_idx_dict = {val: idx for idx, val in enumerate(unique_labels)} graph_labels = np.array([label_idx_dict[l] for l in graph_labels]) adj_list = {idx: [] for idx in range(1, len(graph_labels) + 1)} index_graph = {idx: [] for idx in range(1, len(graph_labels) + 1)} with open("{0}_A.txt".format(prefix), "r") as f: for line in f: u, v = tuple(map(int, line.strip("\n").split(","))) adj_list[graph_node_dict[u]].append((u, v)) index_graph[graph_node_dict[u]] += [u, v] for k in index_graph.keys(): index_graph[k] = [u - 1 for u in set(index_graph[k])] graphs, pprs = [], [] for idx in range(1, 1 + len(adj_list)): graph = nx.from_edgelist(adj_list[idx]) graph.graph["label"] = graph_labels[idx - 1] for u in graph.nodes(): if len(node_labels) > 0: node_label_one_hot = [0] * num_unique_node_labels node_label = node_labels[u - 1] node_label_one_hot[node_label] = 1 graph.nodes[u]["label"] = node_label_one_hot if len(node_attrs) > 0: graph.nodes[u]["feat"] = node_attrs[u - 1] if len(node_attrs) > 0: graph.graph["feat_dim"] = node_attrs[0].shape[0] # relabeling mapping = {} for node_idx, node in enumerate(graph.nodes()): mapping[node] = node_idx graphs.append(nx.relabel_nodes(graph, mapping)) pprs.append(compute_ppr(graph, alpha=0.2)) if "feat_dim" in graphs[0].graph: pass else: max_deg = max([max(dict(graph.degree).values()) for graph in graphs]) for graph in graphs: for u in graph.nodes(data=True): f = np.zeros(max_deg + 1) f[graph.degree[u[0]]] = 1.0 if "label" in u[1]: f = np.concatenate( (np.array(u[1]["label"], dtype=float), f) ) graph.nodes[u[0]]["feat"] = f return graphs, pprs def load(dataset): basedir = os.path.dirname(os.path.abspath(__file__)) datadir = os.path.join(basedir, "data", dataset) if not os.path.exists(datadir): download(dataset, datadir) graphs, diff = process(dataset) feat, adj, labels = [], [], [] for idx, graph in enumerate(graphs): adj.append(nx.to_numpy_array(graph)) labels.append(graph.graph["label"]) feat.append( np.array(list(nx.get_node_attributes(graph, "feat").values())) ) adj, diff, feat, labels = ( np.array(adj), np.array(diff), np.array(feat), np.array(labels), ) np.save(f"{datadir}/adj.npy", adj) np.save(f"{datadir}/diff.npy", diff) np.save(f"{datadir}/feat.npy", feat) np.save(f"{datadir}/labels.npy", labels) else: adj = np.load(f"{datadir}/adj.npy", allow_pickle=True) diff = np.load(f"{datadir}/diff.npy", allow_pickle=True) feat = np.load(f"{datadir}/feat.npy", allow_pickle=True) labels = np.load(f"{datadir}/labels.npy", allow_pickle=True) n_graphs = adj.shape[0] graphs = [] diff_graphs = [] lbls = [] for i in range(n_graphs): a = adj[i] edge_indexes = a.nonzero() graph = dgl.graph(edge_indexes) graph = graph.add_self_loop() graph.ndata["feat"] = th.tensor(feat[i]).float() diff_adj = diff[i] diff_indexes = diff_adj.nonzero() diff_weight = th.tensor(diff_adj[diff_indexes]).float() diff_graph = dgl.graph(diff_indexes) diff_graph.edata["edge_weight"] = diff_weight label = labels[i] graphs.append(graph) diff_graphs.append(diff_graph) lbls.append(label) labels = th.tensor(lbls) dataset = TUDataset(graphs, diff_graphs, labels) return dataset class TUDataset(DGLDataset): def __init__(self, graphs, diff_graphs, labels): super(TUDataset, self).__init__(name="tu") self.graphs = graphs self.diff_graphs = diff_graphs self.labels = labels def process(self): return def __len__(self): return len(self.graphs) def __getitem__(self, idx): return self.graphs[idx], self.diff_graphs[idx], self.labels[idx] ================================================ FILE: examples/pytorch/mvgrl/graph/main.py ================================================ import argparse import warnings import dgl import torch as th from dataset import load from dgl.dataloading import GraphDataLoader warnings.filterwarnings("ignore") from model import MVGRL from utils import linearsvc parser = argparse.ArgumentParser(description="mvgrl") parser.add_argument( "--dataname", type=str, default="MUTAG", help="Name of dataset." ) parser.add_argument( "--gpu", type=int, default=-1, help="GPU index. Default: -1, using cpu." ) parser.add_argument( "--epochs", type=int, default=200, help=" Number of training periods." ) parser.add_argument( "--patience", type=int, default=20, help="Early stopping steps." ) parser.add_argument( "--lr", type=float, default=0.001, help="Learning rate of mvgrl." ) parser.add_argument( "--wd", type=float, default=0.0, help="Weight decay of mvgrl." ) parser.add_argument("--batch_size", type=int, default=64, help="Batch size.") parser.add_argument( "--n_layers", type=int, default=4, help="Number of GNN layers." ) parser.add_argument("--hid_dim", type=int, default=32, help="Hidden layer dim.") args = parser.parse_args() # check cuda if args.gpu != -1 and th.cuda.is_available(): args.device = "cuda:{}".format(args.gpu) else: args.device = "cpu" def collate(samples): """collate function for building the graph dataloader""" graphs, diff_graphs, labels = map(list, zip(*samples)) # generate batched graphs and labels batched_graph = dgl.batch(graphs) batched_labels = th.tensor(labels) batched_diff_graph = dgl.batch(diff_graphs) n_graphs = len(graphs) graph_id = th.arange(n_graphs) graph_id = dgl.broadcast_nodes(batched_graph, graph_id) batched_graph.ndata["graph_id"] = graph_id return batched_graph, batched_diff_graph, batched_labels if __name__ == "__main__": # Step 1: Prepare data =================================================================== # dataset = load(args.dataname) graphs, diff_graphs, labels = map(list, zip(*dataset)) print("Number of graphs:", len(graphs)) # generate a full-graph with all examples for evaluation wholegraph = dgl.batch(graphs) whole_dg = dgl.batch(diff_graphs) # create dataloader for batch training dataloader = GraphDataLoader( dataset, batch_size=args.batch_size, collate_fn=collate, drop_last=False, shuffle=True, ) in_dim = wholegraph.ndata["feat"].shape[1] # Step 2: Create model =================================================================== # model = MVGRL(in_dim, args.hid_dim, args.n_layers) model = model.to(args.device) # Step 3: Create training components ===================================================== # optimizer = th.optim.Adam(model.parameters(), lr=args.lr) print("===== Before training ======") wholegraph = wholegraph.to(args.device) whole_dg = whole_dg.to(args.device) wholefeat = wholegraph.ndata.pop("feat") whole_weight = whole_dg.edata.pop("edge_weight") embs = model.get_embedding(wholegraph, whole_dg, wholefeat, whole_weight) lbls = th.LongTensor(labels) acc_mean, acc_std = linearsvc(embs, lbls) print("accuracy_mean, {:.4f}".format(acc_mean)) best = float("inf") cnt_wait = 0 # Step 4: Training epochs =============================================================== # for epoch in range(args.epochs): loss_all = 0 model.train() for graph, diff_graph, label in dataloader: graph = graph.to(args.device) diff_graph = diff_graph.to(args.device) feat = graph.ndata["feat"] graph_id = graph.ndata["graph_id"] edge_weight = diff_graph.edata["edge_weight"] n_graph = label.shape[0] optimizer.zero_grad() loss = model(graph, diff_graph, feat, edge_weight, graph_id) loss_all += loss.item() loss.backward() optimizer.step() print("Epoch {}, Loss {:.4f}".format(epoch, loss_all)) if loss_all < best: best = loss_all best_t = epoch cnt_wait = 0 th.save(model.state_dict(), f"{args.dataname}.pkl") else: cnt_wait += 1 if cnt_wait == args.patience: print("Early stopping") break print("Training End") # Step 5: Linear evaluation ========================================================== # model.load_state_dict(th.load(f"{args.dataname}.pkl")) embs = model.get_embedding(wholegraph, whole_dg, wholefeat, whole_weight) acc_mean, acc_std = linearsvc(embs, lbls) print("accuracy_mean, {:.4f}".format(acc_mean)) ================================================ FILE: examples/pytorch/mvgrl/graph/model.py ================================================ import torch as th import torch.nn as nn from dgl.nn.pytorch import GraphConv from dgl.nn.pytorch.glob import SumPooling from utils import local_global_loss_ class MLP(nn.Module): def __init__(self, in_dim, out_dim): super(MLP, self).__init__() self.fcs = nn.Sequential( nn.Linear(in_dim, out_dim), nn.PReLU(), nn.Linear(out_dim, out_dim), nn.PReLU(), nn.Linear(out_dim, out_dim), nn.PReLU(), ) self.linear_shortcut = nn.Linear(in_dim, out_dim) def forward(self, x): return self.fcs(x) + self.linear_shortcut(x) class GCN(nn.Module): def __init__(self, in_dim, out_dim, num_layers, norm): super(GCN, self).__init__() self.num_layers = num_layers self.layers = nn.ModuleList() self.layers.append( GraphConv( in_dim, out_dim, bias=False, norm=norm, activation=nn.PReLU() ) ) self.pooling = SumPooling() for _ in range(num_layers - 1): self.layers.append( GraphConv( out_dim, out_dim, bias=False, norm=norm, activation=nn.PReLU(), ) ) def forward(self, graph, feat, edge_weight=None): h = self.layers[0](graph, feat, edge_weight=edge_weight) hg = self.pooling(graph, h) for idx in range(self.num_layers - 1): h = self.layers[idx + 1](graph, h, edge_weight=edge_weight) hg = th.cat((hg, self.pooling(graph, h)), -1) return h, hg class MVGRL(nn.Module): r""" mvgrl model Parameters ----------- in_dim: int Input feature size. out_dim: int Output feature size. num_layers: int Number of the GNN encoder layers. Functions ----------- forward(graph1, graph2, feat, edge_weight): graph1: DGLGraph The original graph graph2: DGLGraph The diffusion graph feat: tensor Node features edge_weight: tensor Edge weight of the diffusion graph """ def __init__(self, in_dim, out_dim, num_layers): super(MVGRL, self).__init__() self.local_mlp = MLP(out_dim, out_dim) self.global_mlp = MLP(num_layers * out_dim, out_dim) self.encoder1 = GCN(in_dim, out_dim, num_layers, norm="both") self.encoder2 = GCN(in_dim, out_dim, num_layers, norm="none") def get_embedding(self, graph1, graph2, feat, edge_weight): local_v1, global_v1 = self.encoder1(graph1, feat) local_v2, global_v2 = self.encoder2( graph2, feat, edge_weight=edge_weight ) global_v1 = self.global_mlp(global_v1) global_v2 = self.global_mlp(global_v2) return (global_v1 + global_v2).detach() def forward(self, graph1, graph2, feat, edge_weight, graph_id): # calculate node embeddings and graph embeddings local_v1, global_v1 = self.encoder1(graph1, feat) local_v2, global_v2 = self.encoder2( graph2, feat, edge_weight=edge_weight ) local_v1 = self.local_mlp(local_v1) local_v2 = self.local_mlp(local_v2) global_v1 = self.global_mlp(global_v1) global_v2 = self.global_mlp(global_v2) # calculate loss loss1 = local_global_loss_(local_v1, global_v2, graph_id) loss2 = local_global_loss_(local_v2, global_v1, graph_id) loss = loss1 + loss2 return loss ================================================ FILE: examples/pytorch/mvgrl/graph/utils.py ================================================ """ Code adapted from https://github.com/fanyun-sun/InfoGraph """ import math import numpy as np import torch as th import torch.nn.functional as F from sklearn.metrics import accuracy_score from sklearn.model_selection import GridSearchCV, StratifiedKFold from sklearn.svm import LinearSVC def linearsvc(embeds, labels): x = embeds.cpu().numpy() y = labels.cpu().numpy() params = {"C": [0.001, 0.01, 0.1, 1, 10, 100, 1000]} kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=None) accuracies = [] for train_index, test_index in kf.split(x, y): x_train, x_test = x[train_index], x[test_index] y_train, y_test = y[train_index], y[test_index] classifier = GridSearchCV( LinearSVC(), params, cv=5, scoring="accuracy", verbose=0 ) classifier.fit(x_train, y_train) accuracies.append(accuracy_score(y_test, classifier.predict(x_test))) return np.mean(accuracies), np.std(accuracies) def get_positive_expectation(p_samples, average=True): """Computes the positive part of a JS Divergence. Args: p_samples: Positive samples. average: Average the result over samples. Returns: th.Tensor """ log_2 = math.log(2.0) Ep = log_2 - F.softplus(-p_samples) if average: return Ep.mean() else: return Ep def get_negative_expectation(q_samples, average=True): """Computes the negative part of a JS Divergence. Args: q_samples: Negative samples. average: Average the result over samples. Returns: th.Tensor """ log_2 = math.log(2.0) Eq = F.softplus(-q_samples) + q_samples - log_2 if average: return Eq.mean() else: return Eq def local_global_loss_(l_enc, g_enc, graph_id): num_graphs = g_enc.shape[0] num_nodes = l_enc.shape[0] device = g_enc.device pos_mask = th.zeros((num_nodes, num_graphs)).to(device) neg_mask = th.ones((num_nodes, num_graphs)).to(device) for nodeidx, graphidx in enumerate(graph_id): pos_mask[nodeidx][graphidx] = 1.0 neg_mask[nodeidx][graphidx] = 0.0 res = th.mm(l_enc, g_enc.t()) E_pos = get_positive_expectation(res * pos_mask, average=False).sum() E_pos = E_pos / num_nodes E_neg = get_negative_expectation(res * neg_mask, average=False).sum() E_neg = E_neg / (num_nodes * (num_graphs - 1)) return E_neg - E_pos ================================================ FILE: examples/pytorch/mvgrl/node/dataset.py ================================================ """ Code adapted from https://github.com/kavehhassani/mvgrl """ import dgl import networkx as nx import numpy as np import scipy.sparse as sp import torch as th from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset from dgl.nn import APPNPConv from scipy.linalg import fractional_matrix_power, inv from sklearn.preprocessing import MinMaxScaler def preprocess_features(features): """Row-normalize feature matrix and convert to tuple representation""" rowsum = np.array(features.sum(1)) r_inv = np.power(rowsum, -1).flatten() r_inv[np.isinf(r_inv)] = 0.0 r_mat_inv = sp.diags(r_inv) features = r_mat_inv.dot(features) if isinstance(features, np.ndarray): return features else: return features.todense(), sparse_to_tuple(features) def sparse_to_tuple(sparse_mx): """Convert sparse matrix to tuple representation.""" def to_tuple(mx): if not sp.isspmatrix_coo(mx): mx = mx.tocoo() coords = np.vstack((mx.row, mx.col)).transpose() values = mx.data shape = mx.shape return coords, values, shape if isinstance(sparse_mx, list): for i in range(len(sparse_mx)): sparse_mx[i] = to_tuple(sparse_mx[i]) else: sparse_mx = to_tuple(sparse_mx) return sparse_mx def compute_ppr(graph: nx.Graph, alpha=0.2, self_loop=True): a = nx.convert_matrix.to_numpy_array(graph) if self_loop: a = a + np.eye(a.shape[0]) # A^ = A + I_n d = np.diag(np.sum(a, 1)) # D^ = Sigma A^_ii dinv = fractional_matrix_power(d, -0.5) # D^(-1/2) at = np.matmul(np.matmul(dinv, a), dinv) # A~ = D^(-1/2) x A^ x D^(-1/2) return alpha * inv( (np.eye(a.shape[0]) - (1 - alpha) * at) ) # a(I_n-(1-a)A~)^-1 def process_dataset(name, epsilon): if name == "cora": dataset = CoraGraphDataset() elif name == "citeseer": dataset = CiteseerGraphDataset() graph = dataset[0] feat = graph.ndata.pop("feat") label = graph.ndata.pop("label") train_mask = graph.ndata.pop("train_mask") val_mask = graph.ndata.pop("val_mask") test_mask = graph.ndata.pop("test_mask") train_idx = th.nonzero(train_mask, as_tuple=False).squeeze() val_idx = th.nonzero(val_mask, as_tuple=False).squeeze() test_idx = th.nonzero(test_mask, as_tuple=False).squeeze() nx_g = dgl.to_networkx(graph) print("computing ppr") diff_adj = compute_ppr(nx_g, 0.2) print("computing end") if name == "citeseer": print("additional processing") feat = th.tensor(preprocess_features(feat.numpy())).float() diff_adj[diff_adj < epsilon] = 0 scaler = MinMaxScaler() scaler.fit(diff_adj) diff_adj = scaler.transform(diff_adj) diff_edges = np.nonzero(diff_adj) diff_weight = diff_adj[diff_edges] diff_graph = dgl.graph(diff_edges) graph = graph.add_self_loop() return ( graph, diff_graph, feat, label, train_idx, val_idx, test_idx, diff_weight, ) def process_dataset_appnp(epsilon): k = 20 alpha = 0.2 dataset = PubmedGraphDataset() graph = dataset[0] feat = graph.ndata.pop("feat") label = graph.ndata.pop("label") train_mask = graph.ndata.pop("train_mask") val_mask = graph.ndata.pop("val_mask") test_mask = graph.ndata.pop("test_mask") train_idx = th.nonzero(train_mask, as_tuple=False).squeeze() val_idx = th.nonzero(val_mask, as_tuple=False).squeeze() test_idx = th.nonzero(test_mask, as_tuple=False).squeeze() appnp = APPNPConv(k, alpha) id = th.eye(graph.num_nodes()).float() diff_adj = appnp(graph.add_self_loop(), id).numpy() diff_adj[diff_adj < epsilon] = 0 scaler = MinMaxScaler() scaler.fit(diff_adj) diff_adj = scaler.transform(diff_adj) diff_edges = np.nonzero(diff_adj) diff_weight = diff_adj[diff_edges] diff_graph = dgl.graph(diff_edges) return ( graph, diff_graph, feat, label, train_idx, val_idx, test_idx, diff_weight, ) ================================================ FILE: examples/pytorch/mvgrl/node/main.py ================================================ import argparse import warnings import numpy as np import torch as th import torch.nn as nn warnings.filterwarnings("ignore") from dataset import process_dataset from model import LogReg, MVGRL parser = argparse.ArgumentParser(description="mvgrl") parser.add_argument( "--dataname", type=str, default="cora", help="Name of dataset." ) parser.add_argument( "--gpu", type=int, default=0, help="GPU index. Default: -1, using cpu." ) parser.add_argument("--epochs", type=int, default=500, help="Training epochs.") parser.add_argument( "--patience", type=int, default=20, help="Patient epochs to wait before early stopping.", ) parser.add_argument( "--lr1", type=float, default=0.001, help="Learning rate of mvgrl." ) parser.add_argument( "--lr2", type=float, default=0.01, help="Learning rate of linear evaluator." ) parser.add_argument( "--wd1", type=float, default=0.0, help="Weight decay of mvgrl." ) parser.add_argument( "--wd2", type=float, default=0.0, help="Weight decay of linear evaluator." ) parser.add_argument( "--epsilon", type=float, default=0.01, help="Edge mask threshold of diffusion graph.", ) parser.add_argument( "--hid_dim", type=int, default=512, help="Hidden layer dim." ) args = parser.parse_args() # check cuda if args.gpu != -1 and th.cuda.is_available(): args.device = "cuda:{}".format(args.gpu) else: args.device = "cpu" if __name__ == "__main__": print(args) # Step 1: Prepare data =================================================================== # ( graph, diff_graph, feat, label, train_idx, val_idx, test_idx, edge_weight, ) = process_dataset(args.dataname, args.epsilon) n_feat = feat.shape[1] n_classes = np.unique(label).shape[0] graph = graph.to(args.device) diff_graph = diff_graph.to(args.device) feat = feat.to(args.device) edge_weight = th.tensor(edge_weight).float().to(args.device) train_idx = train_idx.to(args.device) val_idx = val_idx.to(args.device) test_idx = test_idx.to(args.device) n_node = graph.num_nodes() lbl1 = th.ones(n_node * 2) lbl2 = th.zeros(n_node * 2) lbl = th.cat((lbl1, lbl2)) # Step 2: Create model =================================================================== # model = MVGRL(n_feat, args.hid_dim) model = model.to(args.device) lbl = lbl.to(args.device) # Step 3: Create training components ===================================================== # optimizer = th.optim.Adam( model.parameters(), lr=args.lr1, weight_decay=args.wd1 ) loss_fn = nn.BCEWithLogitsLoss() # Step 4: Training epochs ================================================================ # best = float("inf") cnt_wait = 0 for epoch in range(args.epochs): model.train() optimizer.zero_grad() shuf_idx = np.random.permutation(n_node) shuf_feat = feat[shuf_idx, :] shuf_feat = shuf_feat.to(args.device) out = model(graph, diff_graph, feat, shuf_feat, edge_weight) loss = loss_fn(out, lbl) loss.backward() optimizer.step() print("Epoch: {0}, Loss: {1:0.4f}".format(epoch, loss.item())) if loss < best: best = loss cnt_wait = 0 th.save(model.state_dict(), "model.pkl") else: cnt_wait += 1 if cnt_wait == args.patience: print("Early stopping") break model.load_state_dict(th.load("model.pkl")) embeds = model.get_embedding(graph, diff_graph, feat, edge_weight) train_embs = embeds[train_idx] test_embs = embeds[test_idx] label = label.to(args.device) train_labels = label[train_idx] test_labels = label[test_idx] accs = [] # Step 5: Linear evaluation ========================================================== # for _ in range(5): model = LogReg(args.hid_dim, n_classes) opt = th.optim.Adam( model.parameters(), lr=args.lr2, weight_decay=args.wd2 ) model = model.to(args.device) loss_fn = nn.CrossEntropyLoss() for epoch in range(300): model.train() opt.zero_grad() logits = model(train_embs) loss = loss_fn(logits, train_labels) loss.backward() opt.step() model.eval() logits = model(test_embs) preds = th.argmax(logits, dim=1) acc = th.sum(preds == test_labels).float() / test_labels.shape[0] accs.append(acc * 100) accs = th.stack(accs) print(accs.mean().item(), accs.std().item()) ================================================ FILE: examples/pytorch/mvgrl/node/main_sample.py ================================================ import argparse import random import warnings import dgl import numpy as np import torch as th import torch.nn as nn warnings.filterwarnings("ignore") from dataset import process_dataset, process_dataset_appnp from model import LogReg, MVGRL parser = argparse.ArgumentParser(description="mvgrl") parser.add_argument( "--dataname", type=str, default="cora", help="Name of dataset." ) parser.add_argument( "--gpu", type=int, default=-1, help="GPU index. Default: -1, using cpu." ) parser.add_argument("--epochs", type=int, default=500, help="Training epochs.") parser.add_argument( "--patience", type=int, default=20, help="Patient epochs to wait before early stopping.", ) parser.add_argument( "--lr1", type=float, default=0.001, help="Learning rate of mvgrl." ) parser.add_argument( "--lr2", type=float, default=0.01, help="Learning rate of linear evaluator." ) parser.add_argument( "--wd1", type=float, default=0.0, help="Weight decay of mvgrl." ) parser.add_argument( "--wd2", type=float, default=0.0, help="Weight decay of linear evaluator." ) parser.add_argument( "--epsilon", type=float, default=0.01, help="Edge mask threshold of diffusion graph.", ) parser.add_argument( "--hid_dim", type=int, default=512, help="Hidden layer dim." ) parser.add_argument( "--sample_size", type=int, default=2000, help="Subgraph size." ) args = parser.parse_args() # check cuda if args.gpu != -1 and th.cuda.is_available(): args.device = "cuda:{}".format(args.gpu) else: args.device = "cpu" if __name__ == "__main__": print(args) # Step 1: Prepare data =================================================================== # if args.dataname == "pubmed": ( graph, diff_graph, feat, label, train_idx, val_idx, test_idx, edge_weight, ) = process_dataset_appnp(args.epsilon) else: ( graph, diff_graph, feat, label, train_idx, val_idx, test_idx, edge_weight, ) = process_dataset(args.dataname, args.epsilon) edge_weight = th.tensor(edge_weight).float() graph.ndata["feat"] = feat diff_graph.edata["edge_weight"] = edge_weight n_feat = feat.shape[1] n_classes = np.unique(label).shape[0] edge_weight = th.tensor(edge_weight).float() train_idx = train_idx.to(args.device) val_idx = val_idx.to(args.device) test_idx = test_idx.to(args.device) n_node = graph.num_nodes() sample_size = args.sample_size lbl1 = th.ones(sample_size * 2) lbl2 = th.zeros(sample_size * 2) lbl = th.cat((lbl1, lbl2)) lbl = lbl.to(args.device) # Step 2: Create model =================================================================== # model = MVGRL(n_feat, args.hid_dim) model = model.to(args.device) # Step 3: Create training components ===================================================== # optimizer = th.optim.Adam( model.parameters(), lr=args.lr1, weight_decay=args.wd1 ) loss_fn = nn.BCEWithLogitsLoss() node_list = list(range(n_node)) # Step 4: Training epochs ================================================================ # best = float("inf") cnt_wait = 0 for epoch in range(args.epochs): model.train() optimizer.zero_grad() sample_idx = random.sample(node_list, sample_size) g = dgl.node_subgraph(graph, sample_idx) dg = dgl.node_subgraph(diff_graph, sample_idx) f = g.ndata.pop("feat") ew = dg.edata.pop("edge_weight") shuf_idx = np.random.permutation(sample_size) sf = f[shuf_idx, :] g = g.to(args.device) dg = dg.to(args.device) f = f.to(args.device) ew = ew.to(args.device) sf = sf.to(args.device) out = model(g, dg, f, sf, ew) loss = loss_fn(out, lbl) loss.backward() optimizer.step() print("Epoch: {0}, Loss: {1:0.4f}".format(epoch, loss.item())) if loss < best: best = loss cnt_wait = 0 th.save(model.state_dict(), "model.pkl") else: cnt_wait += 1 if cnt_wait == args.patience: print("Early stopping") break model.load_state_dict(th.load("model.pkl")) graph = graph.to(args.device) diff_graph = diff_graph.to(args.device) feat = feat.to(args.device) edge_weight = edge_weight.to(args.device) embeds = model.get_embedding(graph, diff_graph, feat, edge_weight) train_embs = embeds[train_idx] test_embs = embeds[test_idx] label = label.to(args.device) train_labels = label[train_idx] test_labels = label[test_idx] accs = [] # Step 5: Linear evaluation ========================================================== # for _ in range(5): model = LogReg(args.hid_dim, n_classes) opt = th.optim.Adam( model.parameters(), lr=args.lr2, weight_decay=args.wd2 ) model = model.to(args.device) loss_fn = nn.CrossEntropyLoss() for epoch in range(300): model.train() opt.zero_grad() logits = model(train_embs) loss = loss_fn(logits, train_labels) loss.backward() opt.step() model.eval() logits = model(test_embs) preds = th.argmax(logits, dim=1) acc = th.sum(preds == test_labels).float() / test_labels.shape[0] accs.append(acc * 100) accs = th.stack(accs) print(accs.mean().item(), accs.std().item()) ================================================ FILE: examples/pytorch/mvgrl/node/model.py ================================================ import torch as th import torch.nn as nn from dgl.nn.pytorch import GraphConv from dgl.nn.pytorch.glob import AvgPooling class LogReg(nn.Module): def __init__(self, hid_dim, n_classes): super(LogReg, self).__init__() self.fc = nn.Linear(hid_dim, n_classes) def forward(self, x): ret = self.fc(x) return ret class Discriminator(nn.Module): def __init__(self, dim): super(Discriminator, self).__init__() self.fn = nn.Bilinear(dim, dim, 1) def forward(self, h1, h2, h3, h4, c1, c2): c_x1 = c1.expand_as(h1).contiguous() c_x2 = c2.expand_as(h2).contiguous() # positive sc_1 = self.fn(h2, c_x1).squeeze(1) sc_2 = self.fn(h1, c_x2).squeeze(1) # negative sc_3 = self.fn(h4, c_x1).squeeze(1) sc_4 = self.fn(h3, c_x2).squeeze(1) logits = th.cat((sc_1, sc_2, sc_3, sc_4)) return logits class MVGRL(nn.Module): def __init__(self, in_dim, out_dim): super(MVGRL, self).__init__() self.encoder1 = GraphConv( in_dim, out_dim, norm="both", bias=True, activation=nn.PReLU() ) self.encoder2 = GraphConv( in_dim, out_dim, norm="none", bias=True, activation=nn.PReLU() ) self.pooling = AvgPooling() self.disc = Discriminator(out_dim) self.act_fn = nn.Sigmoid() def get_embedding(self, graph, diff_graph, feat, edge_weight): h1 = self.encoder1(graph, feat) h2 = self.encoder2(diff_graph, feat, edge_weight=edge_weight) return (h1 + h2).detach() def forward(self, graph, diff_graph, feat, shuf_feat, edge_weight): h1 = self.encoder1(graph, feat) h2 = self.encoder2(diff_graph, feat, edge_weight=edge_weight) h3 = self.encoder1(graph, shuf_feat) h4 = self.encoder2(diff_graph, shuf_feat, edge_weight=edge_weight) c1 = self.act_fn(self.pooling(graph, h1)) c2 = self.act_fn(self.pooling(graph, h2)) out = self.disc(h1, h2, h3, h4, c1, c2) return out ================================================ FILE: examples/pytorch/node2vec/README.md ================================================ # DGL Implementation of the Node2vec This DGL example implements the graph embedding model proposed in the paper [node2vec: Scalable Feature Learning for Networks](https://arxiv.org/abs/1607.00653) The author's codes of implementation is in [Node2vec](https://github.com/aditya-grover/node2vec) Example implementor ---------------------- This example was implemented by [Smile](https://github.com/Smilexuhc) during his intern work at the AWS Shanghai AI Lab. The graph dataset used in this example --------------------------------------- cora - NumNodes: 2708 - NumEdges: 10556 ogbn-products - NumNodes: 2449029 - NumEdges: 61859140 Dependencies -------------------------------- - python 3.6+ - Pytorch 1.5.0+ - ogb How to run example files -------------------------------- To train a node2vec model: ```shell script python main.py --task="train" ``` To time node2vec random walks: ```shell script python main.py --task="time" --runs=10 ``` Performance ------------------------- **Setting:** `walk_length=50, p=0.25, q=4.0` | Dataset | DGL | PyG | | -------- | :---------: | :---------: | | cora | 0.0092s | 0.0179s | | products | 66.22s | 77.65s | Note that the number in table are the average results of multiple trials. For cora, we run 50 trials. For ogbn-products, we run 10 trials. ================================================ FILE: examples/pytorch/node2vec/main.py ================================================ import time from dgl.sampling import node2vec_random_walk from model import Node2vecModel from utils import load_graph, parse_arguments def time_randomwalk(graph, args): """ Test cost time of random walk """ start_time = time.time() # default setting for testing params = {"p": 0.25, "q": 4, "walk_length": 50} for i in range(args.runs): node2vec_random_walk(graph, graph.nodes(), **params) end_time = time.time() cost_time_avg = (end_time - start_time) / args.runs print( "Run dataset {} {} trials, mean run time: {:.3f}s".format( args.dataset, args.runs, cost_time_avg ) ) def train_node2vec(graph, eval_set, args): """ Train node2vec model """ trainer = Node2vecModel( graph, embedding_dim=args.embedding_dim, walk_length=args.walk_length, p=args.p, q=args.q, num_walks=args.num_walks, eval_set=eval_set, eval_steps=1, device=args.device, ) trainer.train( epochs=args.epochs, batch_size=args.batch_size, learning_rate=0.01 ) if __name__ == "__main__": args = parse_arguments() graph, eval_set = load_graph(args.dataset) if args.task == "train": print("Perform training node2vec model") train_node2vec(graph, eval_set, args) elif args.task == "time": print("Timing random walks") time_randomwalk(graph, args) else: raise ValueError("Task type error!") ================================================ FILE: examples/pytorch/node2vec/model.py ================================================ import torch import torch.nn as nn from dgl.sampling import node2vec_random_walk from sklearn.linear_model import LogisticRegression from torch.utils.data import DataLoader class Node2vec(nn.Module): """Node2vec model from paper node2vec: Scalable Feature Learning for Networks Attributes ---------- g: DGLGraph The graph. embedding_dim: int Dimension of node embedding. walk_length: int Length of each trace. p: float Likelihood of immediately revisiting a node in the walk. Same notation as in the paper. q: float Control parameter to interpolate between breadth-first strategy and depth-first strategy. Same notation as in the paper. num_walks: int Number of random walks for each node. Default: 10. window_size: int Maximum distance between the center node and predicted node. Default: 5. num_negatives: int The number of negative samples for each positive sample. Default: 5. use_sparse: bool If set to True, use PyTorch's sparse embedding and optimizer. Default: ``True``. weight_name : str, optional The name of the edge feature tensor on the graph storing the (unnormalized) probabilities associated with each edge for choosing the next node. The feature tensor must be non-negative and the sum of the probabilities must be positive for the outbound edges of all nodes (although they don't have to sum up to one). The result will be undefined otherwise. If omitted, DGL assumes that the neighbors are picked uniformly. """ def __init__( self, g, embedding_dim, walk_length, p, q, num_walks=10, window_size=5, num_negatives=5, use_sparse=True, weight_name=None, ): super(Node2vec, self).__init__() assert walk_length >= window_size self.g = g self.embedding_dim = embedding_dim self.walk_length = walk_length self.p = p self.q = q self.num_walks = num_walks self.window_size = window_size self.num_negatives = num_negatives self.N = self.g.num_nodes() if weight_name is not None: self.prob = weight_name else: self.prob = None self.embedding = nn.Embedding(self.N, embedding_dim, sparse=use_sparse) def reset_parameters(self): self.embedding.reset_parameters() def sample(self, batch): """ Generate positive and negative samples. Positive samples are generated from random walk Negative samples are generated from random sampling """ if not isinstance(batch, torch.Tensor): batch = torch.tensor(batch) batch = batch.repeat(self.num_walks) # positive pos_traces = node2vec_random_walk( self.g, batch, self.p, self.q, self.walk_length, self.prob ) pos_traces = pos_traces.unfold(1, self.window_size, 1) # rolling window pos_traces = pos_traces.contiguous().view(-1, self.window_size) # negative neg_batch = batch.repeat(self.num_negatives) neg_traces = torch.randint( self.N, (neg_batch.size(0), self.walk_length) ) neg_traces = torch.cat([neg_batch.view(-1, 1), neg_traces], dim=-1) neg_traces = neg_traces.unfold(1, self.window_size, 1) # rolling window neg_traces = neg_traces.contiguous().view(-1, self.window_size) return pos_traces, neg_traces def forward(self, nodes=None): """ Returns the embeddings of the input nodes Parameters ---------- nodes: Tensor, optional Input nodes, if set `None`, will return all the node embedding. Returns ------- Tensor Node embedding """ emb = self.embedding.weight if nodes is None: return emb else: return emb[nodes] def loss(self, pos_trace, neg_trace): """ Computes the loss given positive and negative random walks. Parameters ---------- pos_trace: Tensor positive random walk trace neg_trace: Tensor negative random walk trace """ e = 1e-15 # Positive pos_start, pos_rest = ( pos_trace[:, 0], pos_trace[:, 1:].contiguous(), ) # start node and following trace w_start = self.embedding(pos_start).unsqueeze(dim=1) w_rest = self.embedding(pos_rest) pos_out = (w_start * w_rest).sum(dim=-1).view(-1) # Negative neg_start, neg_rest = neg_trace[:, 0], neg_trace[:, 1:].contiguous() w_start = self.embedding(neg_start).unsqueeze(dim=1) w_rest = self.embedding(neg_rest) neg_out = (w_start * w_rest).sum(dim=-1).view(-1) # compute loss pos_loss = -torch.log(torch.sigmoid(pos_out) + e).mean() neg_loss = -torch.log(1 - torch.sigmoid(neg_out) + e).mean() return pos_loss + neg_loss def loader(self, batch_size): """ Parameters ---------- batch_size: int batch size Returns ------- DataLoader Node2vec training data loader """ return DataLoader( torch.arange(self.N), batch_size=batch_size, shuffle=True, collate_fn=self.sample, ) @torch.no_grad() def evaluate(self, x_train, y_train, x_val, y_val): """ Evaluate the quality of embedding vector via a downstream classification task with logistic regression. """ x_train = self.forward(x_train) x_val = self.forward(x_val) x_train, y_train = x_train.cpu().numpy(), y_train.cpu().numpy() x_val, y_val = x_val.cpu().numpy(), y_val.cpu().numpy() lr = LogisticRegression( solver="lbfgs", multi_class="auto", max_iter=150 ).fit(x_train, y_train) return lr.score(x_val, y_val) class Node2vecModel(object): """ Wrapper of the ``Node2Vec`` class with a ``train`` method. Attributes ---------- g: DGLGraph The graph. embedding_dim: int Dimension of node embedding. walk_length: int Length of each trace. p: float Likelihood of immediately revisiting a node in the walk. q: float Control parameter to interpolate between breadth-first strategy and depth-first strategy. num_walks: int Number of random walks for each node. Default: 10. window_size: int Maximum distance between the center node and predicted node. Default: 5. num_negatives: int The number of negative samples for each positive sample. Default: 5. use_sparse: bool If set to True, uses PyTorch's sparse embedding and optimizer. Default: ``True``. weight_name : str, optional The name of the edge feature tensor on the graph storing the (unnormalized) probabilities associated with each edge for choosing the next node. The feature tensor must be non-negative and the sum of the probabilities must be positive for the outbound edges of all nodes (although they don't have to sum up to one). The result will be undefined otherwise. If omitted, DGL assumes that the neighbors are picked uniformly. Default: ``None``. eval_set: list of tuples (Tensor, Tensor) [(nodes_train,y_train),(nodes_val,y_val)] If omitted, model will not be evaluated. Default: ``None``. eval_steps: int Interval steps of evaluation. if set <= 0, model will not be evaluated. Default: ``None``. device: str device, default 'cpu'. """ def __init__( self, g, embedding_dim, walk_length, p=1.0, q=1.0, num_walks=1, window_size=5, num_negatives=5, use_sparse=True, weight_name=None, eval_set=None, eval_steps=-1, device="cpu", ): self.model = Node2vec( g, embedding_dim, walk_length, p, q, num_walks, window_size, num_negatives, use_sparse, weight_name, ) self.g = g self.use_sparse = use_sparse self.eval_steps = eval_steps self.eval_set = eval_set if device == "cpu": self.device = device else: self.device = "cuda" if torch.cuda.is_available() else "cpu" def _train_step(self, model, loader, optimizer, device): model.train() total_loss = 0 for pos_traces, neg_traces in loader: pos_traces, neg_traces = pos_traces.to(device), neg_traces.to( device ) optimizer.zero_grad() loss = model.loss(pos_traces, neg_traces) loss.backward() optimizer.step() total_loss += loss.item() return total_loss / len(loader) @torch.no_grad() def _evaluate_step(self): nodes_train, y_train = self.eval_set[0] nodes_val, y_val = self.eval_set[1] acc = self.model.evaluate(nodes_train, y_train, nodes_val, y_val) return acc def train(self, epochs, batch_size, learning_rate=0.01): """ Parameters ---------- epochs: int num of train epoch batch_size: int batch size learning_rate: float learning rate. Default 0.01. """ self.model = self.model.to(self.device) loader = self.model.loader(batch_size) if self.use_sparse: optimizer = torch.optim.SparseAdam( list(self.model.parameters()), lr=learning_rate ) else: optimizer = torch.optim.Adam( self.model.parameters(), lr=learning_rate ) for i in range(epochs): loss = self._train_step(self.model, loader, optimizer, self.device) if self.eval_steps > 0: if epochs % self.eval_steps == 0: acc = self._evaluate_step() print( "Epoch: {}, Train Loss: {:.4f}, Val Acc: {:.4f}".format( i, loss, acc ) ) def embedding(self, nodes=None): """ Returns the embeddings of the input nodes Parameters ---------- nodes: Tensor, optional Input nodes, if set `None`, will return all the node embedding. Returns ------- Tensor Node embedding. """ return self.model(nodes) ================================================ FILE: examples/pytorch/node2vec/utils.py ================================================ import argparse from ogb.linkproppred import * from ogb.nodeproppred import * from dgl.data import CitationGraphDataset def load_graph(name): cite_graphs = ["cora", "citeseer", "pubmed"] if name in cite_graphs: dataset = CitationGraphDataset(name) graph = dataset[0] nodes = graph.nodes() y = graph.ndata["label"] train_mask = graph.ndata["train_mask"] val_mask = graph.ndata["test_mask"] nodes_train, y_train = nodes[train_mask], y[train_mask] nodes_val, y_val = nodes[val_mask], y[val_mask] eval_set = [(nodes_train, y_train), (nodes_val, y_val)] elif name.startswith("ogbn"): dataset = DglNodePropPredDataset(name) graph, y = dataset[0] split_nodes = dataset.get_idx_split() nodes = graph.nodes() train_idx = split_nodes["train"] val_idx = split_nodes["valid"] nodes_train, y_train = nodes[train_idx], y[train_idx] nodes_val, y_val = nodes[val_idx], y[val_idx] eval_set = [(nodes_train, y_train), (nodes_val, y_val)] else: raise ValueError("Dataset name error!") return graph, eval_set def parse_arguments(): """ Parse arguments """ parser = argparse.ArgumentParser(description="Node2vec") parser.add_argument("--dataset", type=str, default="cora") # 'train' for training node2vec model, 'time' for testing speed of random walk parser.add_argument("--task", type=str, default="train") parser.add_argument("--runs", type=int, default=10) parser.add_argument("--device", type=str, default="cpu") parser.add_argument("--embedding_dim", type=int, default=128) parser.add_argument("--walk_length", type=int, default=50) parser.add_argument("--p", type=float, default=0.25) parser.add_argument("--q", type=float, default=4.0) parser.add_argument("--num_walks", type=int, default=10) parser.add_argument("--epochs", type=int, default=100) parser.add_argument("--batch_size", type=int, default=128) args = parser.parse_args() return args ================================================ FILE: examples/pytorch/ogb/README.md ================================================ # OGB Submissions This directory lists the submissions made from DGL Team to the OGB Leaderboard. Currently it contains: * OGBN-Products * GraphSAGE with Neighbor Sampling * SIGN * OGBN-Proteins * MWE-GCN and MWE-DGCN ([GCN models for graphs with multi-dimensionally weighted edges](https://cims.nyu.edu/~chenzh/files/GCN_with_edge_weights.pdf)) * OGBN-Arxiv * SIGN * OGBN-Mag * SIGN ================================================ FILE: examples/pytorch/ogb/cluster-gat/README.md ================================================ # ClusterGAT Params: 1540848 ## OGB Products Run `main.py` and you should directly see the result. Valid over 10 runs: 0.8985 ± 0.00224 Accuracy over 10 runs: 0.79232 ± 0.007786 ================================================ FILE: examples/pytorch/ogb/cluster-gat/main.py ================================================ import argparse import time from functools import partial import dgl import dgl.nn.pytorch as dglnn import numpy as np import torch as th import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import tqdm from ogb.nodeproppred import DglNodePropPredDataset from sampler import ClusterIter, subgraph_collate_fn from torch.utils.data import DataLoader class GAT(nn.Module): def __init__( self, in_feats, num_heads, n_hidden, n_classes, n_layers, activation, dropout=0.0, ): super().__init__() self.n_layers = n_layers self.n_hidden = n_hidden self.n_classes = n_classes self.layers = nn.ModuleList() self.num_heads = num_heads self.layers.append( dglnn.GATConv( in_feats, n_hidden, num_heads=num_heads, feat_drop=dropout, attn_drop=dropout, activation=activation, negative_slope=0.2, ) ) for i in range(1, n_layers - 1): self.layers.append( dglnn.GATConv( n_hidden * num_heads, n_hidden, num_heads=num_heads, feat_drop=dropout, attn_drop=dropout, activation=activation, negative_slope=0.2, ) ) self.layers.append( dglnn.GATConv( n_hidden * num_heads, n_classes, num_heads=num_heads, feat_drop=dropout, attn_drop=dropout, activation=None, negative_slope=0.2, ) ) def forward(self, g, x): h = x for l, conv in enumerate(self.layers): h = conv(g, h) if l < len(self.layers) - 1: h = h.flatten(1) h = h.mean(1) return h.log_softmax(dim=-1) def inference(self, g, x, batch_size, device): """ Inference with the GAT model on full neighbors (i.e. without neighbor sampling). g : the entire graph. x : the input of entire node set. The inference code is written in a fashion that it could handle any number of nodes and layers. """ num_heads = self.num_heads for l, layer in enumerate(self.layers): if l < self.n_layers - 1: y = th.zeros( g.num_nodes(), self.n_hidden * num_heads if l != len(self.layers) - 1 else self.n_classes, ) else: y = th.zeros( g.num_nodes(), self.n_hidden if l != len(self.layers) - 1 else self.n_classes, ) sampler = dgl.dataloading.MultiLayerFullNeighborSampler(1) dataloader = dgl.dataloading.DataLoader( g, th.arange(g.num_nodes()), sampler, batch_size=batch_size, shuffle=False, drop_last=False, num_workers=args.num_workers, ) with dataloader.enable_cpu_affinity(): for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader): block = blocks[0].int().to(device) h = x[input_nodes].to(device) if l < self.n_layers - 1: h = layer(block, h).flatten(1) else: h = layer(block, h) h = h.mean(1) h = h.log_softmax(dim=-1) y[output_nodes] = h.cpu() x = y return y def compute_acc(pred, labels): """ Compute the accuracy of prediction given the labels. """ return (th.argmax(pred, dim=1) == labels).float().sum() / len(pred) def evaluate(model, g, nfeat, labels, val_nid, test_nid, batch_size, device): """ Evaluate the model on the validation set specified by ``val_mask``. g : The entire graph. inputs : The features of all the nodes. labels : The labels of all the nodes. val_mask : A 0-1 mask indicating which nodes do we actually compute the accuracy for. batch_size : Number of nodes to compute at the same time. device : The GPU device to evaluate on. """ model.eval() with th.no_grad(): pred = model.inference(g, nfeat, batch_size, device) model.train() labels_cpu = labels.to(th.device("cpu")) return ( compute_acc(pred[val_nid], labels_cpu[val_nid]), compute_acc(pred[test_nid], labels_cpu[test_nid]), pred, ) def model_param_summary(model): """Count the model parameters""" cnt = sum(p.numel() for p in model.parameters() if p.requires_grad) print("Total Params {}".format(cnt)) #### Entry point def run(args, device, data, nfeat): # Unpack data ( train_nid, val_nid, test_nid, in_feats, labels, n_classes, g, cluster_iterator, ) = data labels = labels.to(device) # Define model and optimizer model = GAT( in_feats, args.num_heads, args.num_hidden, n_classes, args.num_layers, F.relu, args.dropout, ) model_param_summary(model) model = model.to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd) # Training loop avg = 0 best_eval_acc = 0 best_test_acc = 0 for epoch in range(args.num_epochs): iter_load = 0 iter_far = 0 iter_back = 0 tic = time.time() # Loop over the dataloader to sample the computation dependency graph as a list of # blocks. tic_start = time.time() for step, cluster in enumerate(cluster_iterator): mask = cluster.ndata.pop("train_mask") if mask.sum() == 0: continue cluster.edata.pop(dgl.EID) cluster = cluster.int().to(device) input_nodes = cluster.ndata[dgl.NID] batch_inputs = nfeat[input_nodes] batch_labels = labels[input_nodes] tic_step = time.time() # Compute loss and prediction batch_pred = model(cluster, batch_inputs) batch_pred = batch_pred[mask] batch_labels = batch_labels[mask] loss = nn.functional.nll_loss(batch_pred, batch_labels) optimizer.zero_grad() tic_far = time.time() loss.backward() optimizer.step() tic_back = time.time() iter_load += tic_step - tic_start iter_far += tic_far - tic_step iter_back += tic_back - tic_far if step % args.log_every == 0: acc = compute_acc(batch_pred, batch_labels) gpu_mem_alloc = ( th.cuda.max_memory_allocated() / 1000000 if th.cuda.is_available() else 0 ) print( "Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train Acc {:.4f} | GPU {:.1f} MB".format( epoch, step, loss.item(), acc.item(), gpu_mem_alloc ) ) tic_start = time.time() toc = time.time() print( "Epoch Time(s): {:.4f} Load {:.4f} Forward {:.4f} Backward {:.4f}".format( toc - tic, iter_load, iter_far, iter_back ) ) if epoch >= 5: avg += toc - tic if epoch % args.eval_every == 0 and epoch != 0: eval_acc, test_acc, pred = evaluate( model, g, nfeat, labels, val_nid, test_nid, args.val_batch_size, device, ) model = model.to(device) if args.save_pred: np.savetxt( args.save_pred + "%02d" % epoch, pred.argmax(1).cpu().numpy(), "%d", ) print("Eval Acc {:.4f}".format(eval_acc)) if eval_acc > best_eval_acc: best_eval_acc = eval_acc best_test_acc = test_acc print( "Best Eval Acc {:.4f} Test Acc {:.4f}".format( best_eval_acc, best_test_acc ) ) if epoch >= 5: print("Avg epoch time: {}".format(avg / (epoch - 4))) return best_test_acc.to(th.device("cpu")) if __name__ == "__main__": argparser = argparse.ArgumentParser("multi-gpu training") argparser.add_argument( "--gpu", type=int, default=0, help="GPU device ID. Use -1 for CPU training", ) argparser.add_argument("--num_epochs", type=int, default=20) argparser.add_argument("--num_hidden", type=int, default=128) argparser.add_argument("--num_layers", type=int, default=3) argparser.add_argument("--num_heads", type=int, default=8) argparser.add_argument("--batch_size", type=int, default=32) argparser.add_argument("--val_batch_size", type=int, default=2000) argparser.add_argument("--log_every", type=int, default=20) argparser.add_argument("--eval_every", type=int, default=1) argparser.add_argument("--lr", type=float, default=0.001) argparser.add_argument("--dropout", type=float, default=0.5) argparser.add_argument("--save_pred", type=str, default="") argparser.add_argument("--wd", type=float, default=0) argparser.add_argument("--num_partitions", type=int, default=15000) argparser.add_argument("--num_workers", type=int, default=4) argparser.add_argument( "--data_cpu", action="store_true", help="By default the script puts all node features and labels " "on GPU when using it to save time for data copy. This may " "be undesired if they cannot fit in GPU memory at once. " "This flag disables that.", ) args = argparser.parse_args() if args.gpu >= 0: device = th.device("cuda:%d" % args.gpu) else: device = th.device("cpu") # load ogbn-products data data = DglNodePropPredDataset(name="ogbn-products") splitted_idx = data.get_idx_split() train_idx, val_idx, test_idx = ( splitted_idx["train"], splitted_idx["valid"], splitted_idx["test"], ) graph, labels = data[0] labels = labels[:, 0] print("Total edges before adding self-loop {}".format(graph.num_edges())) graph = dgl.remove_self_loop(graph) graph = dgl.add_self_loop(graph) print("Total edges after adding self-loop {}".format(graph.num_edges())) num_nodes = train_idx.shape[0] + val_idx.shape[0] + test_idx.shape[0] assert num_nodes == graph.num_nodes() mask = th.zeros(num_nodes, dtype=th.bool) mask[train_idx] = True graph.ndata["train_mask"] = mask graph.in_degrees(0) graph.out_degrees(0) graph.find_edges(0) cluster_iter_data = ClusterIter( "ogbn-products", graph, args.num_partitions, args.batch_size ) cluster_iterator = DataLoader( cluster_iter_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.num_workers, collate_fn=partial(subgraph_collate_fn, graph), ) in_feats = graph.ndata["feat"].shape[1] n_classes = (labels.max() + 1).item() # Pack data data = ( train_idx, val_idx, test_idx, in_feats, labels, n_classes, graph, cluster_iterator, ) # Run 10 times test_accs = [] nfeat = graph.ndata.pop("feat").to(device) for i in range(10): test_accs.append(run(args, device, data, nfeat)) print("Average test accuracy:", np.mean(test_accs), "±", np.std(test_accs)) ================================================ FILE: examples/pytorch/ogb/cluster-gat/partition_utils.py ================================================ from time import time import dgl import numpy as np from dgl import backend as F from dgl.transforms import metis_partition def get_partition_list(g, psize): p_gs = metis_partition(g, psize) graphs = [] for k, val in p_gs.items(): nids = val.ndata[dgl.NID] nids = F.asnumpy(nids) graphs.append(nids) return graphs ================================================ FILE: examples/pytorch/ogb/cluster-gat/sampler.py ================================================ import os import torch from partition_utils import * class ClusterIter(object): """The partition sampler given a DGLGraph and partition number. The metis is used as the graph partition backend. """ def __init__(self, dn, g, psize, batch_size): """Initialize the sampler. Paramters --------- dn : str The dataset name. g : DGLGraph The full graph of dataset psize: int The partition number batch_size: int The number of partitions in one batch """ self.psize = psize self.batch_size = batch_size # cache the partitions of known datasets&partition number if dn: fn = os.path.join("./datasets/", dn + "_{}.npy".format(psize)) if os.path.exists(fn): self.par_li = np.load(fn, allow_pickle=True) else: os.makedirs("./datasets/", exist_ok=True) self.par_li = get_partition_list(g, psize) self.par_li = np.array(self.par_li, dtype=object) np.save(fn, self.par_li) else: self.par_li = get_partition_list(g, psize) par_list = [] for p in self.par_li: par = torch.Tensor(p) par_list.append(par) self.par_list = par_list def __len__(self): return self.psize def __getitem__(self, idx): return self.par_li[idx] def subgraph_collate_fn(g, batch): nids = np.concatenate(batch).reshape(-1).astype(np.int64) g1 = g.subgraph(nids) g1 = dgl.remove_self_loop(g1) g1 = dgl.add_self_loop(g1) return g1 ================================================ FILE: examples/pytorch/ogb/cluster-sage/README.md ================================================ # Cluster-SAGE on OGB Dataset Requires DGL 0.4.3post2 or later versions. We use builtin metis to do the graph partition. ## OGB-Product Run `main.py` and you should directly see the result. Accuracy over 10 runs: 0.7830701 ± 0.0035093208 ================================================ FILE: examples/pytorch/ogb/cluster-sage/main.py ================================================ import argparse import time import traceback from functools import partial import dgl import dgl.function as fn import dgl.nn.pytorch as dglnn import numpy as np import torch as th import torch.multiprocessing as mp import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import tqdm from dgl.data import RedditDataset from ogb.nodeproppred import DglNodePropPredDataset from sampler import ClusterIter, subgraph_collate_fn from torch.utils.data import DataLoader #### Neighbor sampler class SAGE(nn.Module): def __init__( self, in_feats, n_hidden, n_classes, n_layers, activation, dropout ): super().__init__() self.n_layers = n_layers self.n_hidden = n_hidden self.n_classes = n_classes self.layers = nn.ModuleList() self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, "mean")) for i in range(1, n_layers - 1): self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, "mean")) self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, "mean")) self.dropout = nn.Dropout(dropout) self.activation = activation def forward(self, g, x): h = x for l, conv in enumerate(self.layers): h = conv(g, h) if l != len(self.layers) - 1: h = self.activation(h) h = self.dropout(h) return h def inference(self, g, x, batch_size, device): """ Inference with the GraphSAGE model on full neighbors (i.e. without neighbor sampling). g : the entire graph. x : the input of entire node set. The inference code is written in a fashion that it could handle any number of nodes and layers. """ # During inference with sampling, multi-layer blocks are very inefficient because # lots of computations in the first few layers are repeated. # Therefore, we compute the representation of all nodes layer by layer. The nodes # on each layer are of course splitted in batches. # TODO: can we standardize this? h = x for l, conv in enumerate(self.layers): h = conv(g, h) if l != len(self.layers) - 1: h = self.activation(h) return h def compute_acc(pred, labels): """ Compute the accuracy of prediction given the labels. """ return (th.argmax(pred, dim=1) == labels).float().sum() / len(pred) def evaluate(model, g, labels, val_nid, test_nid, batch_size, device): """ Evaluate the model on the validation set specified by ``val_mask``. g : The entire graph. inputs : The features of all the nodes. labels : The labels of all the nodes. val_mask : A 0-1 mask indicating which nodes do we actually compute the accuracy for. batch_size : Number of nodes to compute at the same time. device : The GPU device to evaluate on. """ model.eval() with th.no_grad(): inputs = g.ndata["feat"] model = model.cpu() pred = model.inference(g, inputs, batch_size, device) model.train() return ( compute_acc(pred[val_nid], labels[val_nid]), compute_acc(pred[test_nid], labels[test_nid]), pred, ) def load_subtensor(g, labels, seeds, input_nodes, device): """ Copys features and labels of a set of nodes onto GPU. """ batch_inputs = g.ndata["feat"][input_nodes].to(device) batch_labels = labels[seeds].to(device) return batch_inputs, batch_labels #### Entry point def run(args, device, data): # Unpack data ( train_nid, val_nid, test_nid, in_feats, labels, n_classes, g, cluster_iterator, ) = data # Define model and optimizer model = SAGE( in_feats, args.num_hidden, n_classes, args.num_layers, F.relu, args.dropout, ) model = model.to(device) loss_fcn = nn.CrossEntropyLoss() loss_fcn = loss_fcn.to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd) # Training loop avg = 0 iter_tput = [] best_eval_acc = 0 best_test_acc = 0 for epoch in range(args.num_epochs): iter_load = 0 iter_far = 0 iter_back = 0 iter_tl = 0 tic = time.time() # Loop over the dataloader to sample the computation dependency graph as a list of # blocks. tic_start = time.time() for step, cluster in enumerate(cluster_iterator): cluster = cluster.int().to(device) mask = cluster.ndata["train_mask"].to(device) if mask.sum() == 0: continue feat = cluster.ndata["feat"].to(device) batch_labels = cluster.ndata["labels"].to(device) tic_step = time.time() batch_pred = model(cluster, feat) batch_pred = batch_pred[mask] batch_labels = batch_labels[mask] loss = loss_fcn(batch_pred, batch_labels) optimizer.zero_grad() tic_far = time.time() loss.backward() optimizer.step() tic_back = time.time() iter_load += tic_step - tic_start iter_far += tic_far - tic_step iter_back += tic_back - tic_far tic_start = time.time() if step % args.log_every == 0: acc = compute_acc(batch_pred, batch_labels) gpu_mem_alloc = ( th.cuda.max_memory_allocated() / 1000000 if th.cuda.is_available() else 0 ) print( "Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train Acc {:.4f} | GPU {:.1f} MB".format( epoch, step, loss.item(), acc.item(), gpu_mem_alloc ) ) toc = time.time() print( "Epoch Time(s): {:.4f} Load {:.4f} Forward {:.4f} Backward {:.4f}".format( toc - tic, iter_load, iter_far, iter_back ) ) if epoch >= 5: avg += toc - tic if epoch % args.eval_every == 0 and epoch != 0: eval_acc, test_acc, pred = evaluate( model, g, labels, val_nid, test_nid, args.val_batch_size, device ) model = model.to(device) if args.save_pred: np.savetxt( args.save_pred + "%02d" % epoch, pred.argmax(1).cpu().numpy(), "%d", ) print("Eval Acc {:.4f}".format(eval_acc)) if eval_acc > best_eval_acc: best_eval_acc = eval_acc best_test_acc = test_acc print( "Best Eval Acc {:.4f} Test Acc {:.4f}".format( best_eval_acc, best_test_acc ) ) print("Avg epoch time: {}".format(avg / (epoch - 4))) return best_test_acc if __name__ == "__main__": argparser = argparse.ArgumentParser("multi-gpu training") argparser.add_argument( "--gpu", type=int, default=0, help="GPU device ID. Use -1 for CPU training", ) argparser.add_argument("--num-epochs", type=int, default=30) argparser.add_argument("--num-hidden", type=int, default=256) argparser.add_argument("--num-layers", type=int, default=3) argparser.add_argument("--batch-size", type=int, default=32) argparser.add_argument("--val-batch-size", type=int, default=10000) argparser.add_argument("--log-every", type=int, default=20) argparser.add_argument("--eval-every", type=int, default=1) argparser.add_argument("--lr", type=float, default=0.001) argparser.add_argument("--dropout", type=float, default=0.5) argparser.add_argument("--save-pred", type=str, default="") argparser.add_argument("--wd", type=float, default=0) argparser.add_argument("--num_partitions", type=int, default=15000) args = argparser.parse_args() if args.gpu >= 0: device = th.device("cuda:%d" % args.gpu) else: device = th.device("cpu") # load ogbn-products data data = DglNodePropPredDataset(name="ogbn-products") splitted_idx = data.get_idx_split() train_idx, val_idx, test_idx = ( splitted_idx["train"], splitted_idx["valid"], splitted_idx["test"], ) graph, labels = data[0] labels = labels[:, 0] num_nodes = train_idx.shape[0] + val_idx.shape[0] + test_idx.shape[0] assert num_nodes == graph.num_nodes() graph.ndata["labels"] = labels mask = th.zeros(num_nodes, dtype=th.bool) mask[train_idx] = True graph.ndata["train_mask"] = mask mask = th.zeros(num_nodes, dtype=th.bool) mask[val_idx] = True graph.ndata["valid_mask"] = mask mask = th.zeros(num_nodes, dtype=th.bool) mask[test_idx] = True graph.ndata["test_mask"] = mask graph.in_degrees(0) graph.out_degrees(0) graph.find_edges(0) cluster_iter_data = ClusterIter( "ogbn-products", graph, args.num_partitions, args.batch_size, th.cat([train_idx, val_idx, test_idx]), ) idx = th.arange(args.num_partitions // args.batch_size) cluster_iterator = DataLoader( cluster_iter_data, batch_size=32, shuffle=True, pin_memory=True, num_workers=4, collate_fn=partial(subgraph_collate_fn, graph), ) in_feats = graph.ndata["feat"].shape[1] print(in_feats) n_classes = (labels.max() + 1).item() # Pack data data = ( train_idx, val_idx, test_idx, in_feats, labels, n_classes, graph, cluster_iterator, ) # Run 10 times test_accs = [] for i in range(10): test_accs.append(run(args, device, data)) print( "Average test accuracy:", np.mean(test_accs), "±", np.std(test_accs) ) ================================================ FILE: examples/pytorch/ogb/cluster-sage/partition_utils.py ================================================ from time import time import dgl import numpy as np from dgl import backend as F from dgl.transforms import metis_partition def get_partition_list(g, psize): p_gs = metis_partition(g, psize) graphs = [] for k, val in p_gs.items(): nids = val.ndata[dgl.NID] nids = F.asnumpy(nids) graphs.append(nids) return graphs ================================================ FILE: examples/pytorch/ogb/cluster-sage/sampler.py ================================================ import os import random import time import torch from partition_utils import * import dgl.function as fn class ClusterIter(object): """The partition sampler given a DGLGraph and partition number. The metis is used as the graph partition backend. """ def __init__(self, dn, g, psize, batch_size, seed_nid): """Initialize the sampler. Paramters --------- dn : str The dataset name. g : DGLGraph The full graph of dataset psize: int The partition number batch_size: int The number of partitions in one batch seed_nid: np.ndarray The training nodes ids, used to extract the training graph """ self.psize = psize self.batch_size = batch_size # cache the partitions of known datasets&partition number if dn: fn = os.path.join("./datasets/", dn + "_{}.npy".format(psize)) if os.path.exists(fn): self.par_li = np.load(fn, allow_pickle=True) else: os.makedirs("./datasets/", exist_ok=True) self.par_li = get_partition_list(g, psize) self.par_li = np.array(self.par_li, dtype=object) np.save(fn, self.par_li) else: self.par_li = get_partition_list(g, psize) par_list = [] for p in self.par_li: par = torch.Tensor(p) par_list.append(par) self.par_list = par_list # use one side normalization def get_norm(self, g): norm = 1.0 / g.in_degrees().float().unsqueeze(1) norm[torch.isinf(norm)] = 0 norm = norm.to(self.g.ndata["feat"].device) return norm def __len__(self): return self.psize def __getitem__(self, idx): return self.par_li[idx] def subgraph_collate_fn(g, batch): nids = np.concatenate(batch).reshape(-1).astype(np.int64) g1 = g.subgraph(nids) return g1 ================================================ FILE: examples/pytorch/ogb/deepwalk/README.md ================================================ # DeepWalk Example - Paper link: [here](https://arxiv.org/pdf/1403.6652.pdf) - Other implementation: [gensim](https://github.com/phanein/deepwalk), [deepwalk-c](https://github.com/xgfs/deepwalk-c) The implementation includes multi-processing training with CPU and mixed training with CPU and multi-GPU. ## Dependencies - PyTorch 1.5.0+ ## Tested version - PyTorch 1.5.0 - DGL 0.5.0 ## Input data Currently, we support two builtin dataset: youtube and blog. Use --data\_file youtube to select youtube dataset and --data\_file blog to select blog dataset. The data is avaliable at https://data.dgl.ai/dataset/DeepWalk/youtube.zip and https://data.dgl.ai/dataset/DeepWalk/blog.zip The youtube.zip includes both youtube-net.txt, youtube-vocab.txt and youtube-label.txt; The blog.zip includes both blog-net.txt, blog-vocab.txt and blog-label.txt. For other datasets please pass the full path to the trainer through --data\_file and the format of a network file should follow: ``` 1(node id) 2(node id) 1 3 1 4 2 4 ... ``` ### How to run the code To run the code: ``` python3 deepwalk.py --data_file youtube --output_emb_file emb.txt --mix --lr 0.2 --gpus 0 1 2 3 --batch_size 100 --negative 5 ``` ### How to save the embedding By default the trained embedding is saved under --output\_embe\_file FILE\_NAME as a numpy object. To save the trained embedding in raw format(txt format), please use --save\_in\_txt argument. ### Evaluation To evalutate embedding on multi-label classification, please refer to [here](https://github.com/ShawXh/Evaluate-Embedding) YouTube (1M nodes). | Implementation | Macro-F1 (%)
1%    3%    5%    7%    9% | Micro-F1 (%)
1%    3%    5%    7%    9% | |----|----|----| | gensim.word2vec(hs) | 28.73   32.51   33.67   34.28   34.79 | 35.73   38.34   39.37   40.08   40.77 | | gensim.word2vec(ns) | 28.18   32.25   33.56   34.60   35.22 | 35.35   37.69   38.08   40.24   41.09 | | ours | 24.58   31.23   33.97   35.41   36.48 | 38.93   43.17   44.73   45.42   45.92 | The comparison between running time is shown as below, where the numbers in the brackets denote time used on random-walk. | Implementation | gensim.word2vec(hs) | gensim.word2vec(ns) | Ours | |----|----|----|----| | Time (s) | 27119.6(1759.8) | 10580.3(1704.3) | 428.89 | Parameters. - walk_length = 80, number_walks = 10, window_size = 5 - Ours: 4GPU (Tesla V100), lr = 0.2, batchs_size = 128, neg_weight = 5, negative = 1, num_thread = 4 - Others: workers = 8, negative = 5 Speeding-up with mixed CPU & multi-GPU. The used parameters are the same as above. | #GPUs | 1 | 2 | 4 | |----------|-------|-------|-------| | Time (s) |1419.64| 952.04|428.89 | ## OGB Dataset ### How to load ogb data You can run the code directly with: ``` python3 deepwalk --ogbl_name xxx --load_from_ogbl ``` However, ogb.linkproppred might not be compatible with mixed training with multi-gpu. If you want to do mixed training, please use no more than 1 gpu by the command above. ### Evaluation For evaluatation we follow the code mlp.py provided by ogb [here](https://github.com/snap-stanford/ogb/blob/master/examples/linkproppred/collab/mlp.py). ### Used config ogbl-collab ``` python3 deepwalk.py --ogbl_name ogbl-collab --load_from_ogbl --save_in_pt --output_emb_file collab-embedding.pt --num_walks 50 --window_size 2 --walk_length 40 --lr 0.1 --negative 1 --neg_weight 1 --lap_norm 0.01 --mix --gpus 0 --num_threads 4 --print_interval 2000 --print_loss --batch_size 128 --use_context_weight cd ./ogb/blob/master/examples/linkproppred/collab/ cp embedding_pt_file_path ./ python3 mlp.py --device 0 --runs 10 --use_node_embedding ``` ogbl-ddi ``` python3 deepwalk.py --ogbl_name ogbl-ddi --load_from_ogbl --save_in_pt --output_emb_file ddi-embedding.pt --num_walks 50 --window_size 2 --walk_length 80 --lr 0.1 --negative 1 --neg_weight 1 --lap_norm 0.05 --only_gpu --gpus 0 --num_threads 4 --print_interval 2000 --print_loss --batch_size 16 --use_context_weight cd ./ogb/blob/master/examples/linkproppred/ddi/ cp embedding_pt_file_path ./ python3 mlp.py --device 0 --runs 10 --epochs 100 ``` ogbl-ppa ``` python3 deepwalk.py --ogbl_name ogbl-ppa --load_from_ogbl --save_in_pt --output_emb_file ppa-embedding.pt --negative 1 --neg_weight 1 --batch_size 64 --print_interval 2000 --print_loss --window_size 1 --num_walks 30 --walk_length 80 --lr 0.1 --lap_norm 0.02 --mix --gpus 0 --num_threads 4 cp embedding_pt_file_path ./ python3 mlp.py --device 2 --runs 10 ``` ogbl-citation ``` python3 deepwalk.py --ogbl_name ogbl-citation --load_from_ogbl --save_in_pt --output_emb_file embedding.pt --window_size 2 --num_walks 10 --negative 1 --neg_weight 1 --walk_length 80 --batch_size 128 --print_loss --print_interval 1000 --mix --gpus 0 --use_context_weight --num_threads 4 --lap_norm 0.01 --lr 0.1 cp embedding_pt_file_path ./ python3 mlp.py --device 2 --runs 10 --use_node_embedding ``` ### OGBL Results ogbl-collab
#params: 61258346(model) + 131841(mlp) = 61390187
Hits@10
 Highest Train: 74.83 ± 4.79
 Highest Valid: 40.03 ± 2.98
  Final Train: 74.51 ± 4.92
  Final Test: 31.13 ± 2.47
Hits@50
 Highest Train: 98.83 ± 0.15
 Highest Valid: 60.61 ± 0.32
  Final Train: 98.74 ± 0.17
  Final Test: 50.37 ± 0.34
Hits@100
 Highest Train: 99.86 ± 0.04
 Highest Valid: 66.64 ± 0.32
  Final Train: 99.84 ± 0.06
  Final Test: 56.88 ± 0.37
obgl-ddi
#params: 1444840(model) + 99073(mlp) = 1543913
Hits@10
 Highest Train: 33.91 ± 2.01
 Highest Valid: 30.96 ± 1.89
  Final Train: 33.90 ± 2.00
  Final Test: 15.16 ± 4.28
Hits@20
 Highest Train: 44.64 ± 1.71
 Highest Valid: 41.32 ± 1.69
  Final Train: 44.62 ± 1.69
  Final Test: 26.42 ± 6.10
Hits@30
 Highest Train: 51.01 ± 1.72
 Highest Valid: 47.64 ± 1.71
  Final Train: 50.99 ± 1.72
  Final Test: 33.56 ± 3.95
ogbl-ppa
#params: 150024820(model) + 113921(mlp) = 150138741
Hits@10
 Highest Train: 4.78 ± 0.73
 Highest Valid: 4.30 ± 0.68
  Final Train: 4.77 ± 0.73
  Final Test: 2.67 ± 0.42
Hits@50
 Highest Train: 18.82 ± 1.07
 Highest Valid: 17.26 ± 1.01
  Final Train: 18.82 ± 1.07
  Final Test: 17.34 ± 2.09
Hits@100
 Highest Train: 31.29 ± 2.11
 Highest Valid: 28.97 ± 1.92
  Final Train: 31.28 ± 2.12
  Final Test: 28.88 ± 1.53
ogbl-citation
#params: 757811178(model) + 131841(mlp) = 757943019
MRR
 Highest Train: 0.9381 ± 0.0003
 Highest Valid: 0.8469 ± 0.0003
  Final Train: 0.9377 ± 0.0004
  Final Test: 0.8479 ± 0.0003 ### Notes #### Multi-GPU issues For efficiency, the results of ogbl-collab, ogbl-ppa, ogbl-ddi are run with multi-GPU. Since ogb is somehow incompatible with our multi-GPU implementation, we need to do some preprocessing. The command is: ``` python3 load_dataset.py --name dataset_name ``` It will output a data file to the local. For example, if `dataset_name` is `ogbl-collab`, then a file `ogbl-collab-net.txt` will be generated. Then we run ``` python3 deepwalk.py --data_file data_file_path ``` where the other parameters are the same with used configs without using `--load_from_ogbl` and `--ogbl_name`. #### Others The performance on ogbl-ddi and ogbl-ppa can be not that stable. ================================================ FILE: examples/pytorch/ogb/deepwalk/deepwalk.py ================================================ import argparse import os import random import time import dgl import numpy as np import torch import torch.multiprocessing as mp from model import SkipGramModel from reading_data import DeepwalkDataset from torch.utils.data import DataLoader from utils import shuffle_walks, sum_up_params class DeepwalkTrainer: def __init__(self, args): """Initializing the trainer with the input arguments""" self.args = args self.dataset = DeepwalkDataset( net_file=args.data_file, map_file=args.map_file, walk_length=args.walk_length, window_size=args.window_size, num_walks=args.num_walks, batch_size=args.batch_size, negative=args.negative, gpus=args.gpus, fast_neg=args.fast_neg, ogbl_name=args.ogbl_name, load_from_ogbl=args.load_from_ogbl, ) self.emb_size = self.dataset.G.num_nodes() self.emb_model = None def init_device_emb(self): """set the device before training will be called once in fast_train_mp / fast_train """ choices = sum([self.args.only_gpu, self.args.only_cpu, self.args.mix]) assert ( choices == 1 ), "Must choose only *one* training mode in [only_cpu, only_gpu, mix]" # initializing embedding on CPU self.emb_model = SkipGramModel( emb_size=self.emb_size, emb_dimension=self.args.dim, walk_length=self.args.walk_length, window_size=self.args.window_size, batch_size=self.args.batch_size, only_cpu=self.args.only_cpu, only_gpu=self.args.only_gpu, mix=self.args.mix, neg_weight=self.args.neg_weight, negative=self.args.negative, lr=self.args.lr, lap_norm=self.args.lap_norm, fast_neg=self.args.fast_neg, record_loss=self.args.print_loss, norm=self.args.norm, use_context_weight=self.args.use_context_weight, async_update=self.args.async_update, num_threads=self.args.num_threads, ) torch.set_num_threads(self.args.num_threads) if self.args.only_gpu: print("Run in 1 GPU") assert self.args.gpus[0] >= 0 self.emb_model.all_to_device(self.args.gpus[0]) elif self.args.mix: print("Mix CPU with %d GPU" % len(self.args.gpus)) if len(self.args.gpus) == 1: assert ( self.args.gpus[0] >= 0 ), "mix CPU with GPU should have available GPU" self.emb_model.set_device(self.args.gpus[0]) else: print("Run in CPU process") self.args.gpus = [torch.device("cpu")] def train(self): """train the embedding""" if len(self.args.gpus) > 1: self.fast_train_mp() else: self.fast_train() def fast_train_mp(self): """multi-cpu-core or mix cpu & multi-gpu""" self.init_device_emb() self.emb_model.share_memory() if self.args.count_params: sum_up_params(self.emb_model) start_all = time.time() ps = [] for i in range(len(self.args.gpus)): p = mp.Process( target=self.fast_train_sp, args=(i, self.args.gpus[i]) ) ps.append(p) p.start() for p in ps: p.join() print("Used time: %.2fs" % (time.time() - start_all)) if self.args.save_in_txt: self.emb_model.save_embedding_txt( self.dataset, self.args.output_emb_file ) elif self.args.save_in_pt: self.emb_model.save_embedding_pt( self.dataset, self.args.output_emb_file ) else: self.emb_model.save_embedding( self.dataset, self.args.output_emb_file ) def fast_train_sp(self, rank, gpu_id): """a subprocess for fast_train_mp""" if self.args.mix: self.emb_model.set_device(gpu_id) torch.set_num_threads(self.args.num_threads) if self.args.async_update: self.emb_model.create_async_update() sampler = self.dataset.create_sampler(rank) dataloader = DataLoader( dataset=sampler.seeds, batch_size=self.args.batch_size, collate_fn=sampler.sample, shuffle=False, drop_last=False, num_workers=self.args.num_sampler_threads, ) num_batches = len(dataloader) print( "num batchs: %d in process [%d] GPU [%d]" % (num_batches, rank, gpu_id) ) # number of positive node pairs in a sequence num_pos = int( 2 * self.args.walk_length * self.args.window_size - self.args.window_size * (self.args.window_size + 1) ) start = time.time() with torch.no_grad(): for i, walks in enumerate(dataloader): if self.args.fast_neg: self.emb_model.fast_learn(walks) else: # do negative sampling bs = len(walks) neg_nodes = torch.LongTensor( np.random.choice( self.dataset.neg_table, bs * num_pos * self.args.negative, replace=True, ) ) self.emb_model.fast_learn(walks, neg_nodes=neg_nodes) if i > 0 and i % self.args.print_interval == 0: if self.args.print_loss: print( "GPU-[%d] batch %d time: %.2fs loss: %.4f" % ( gpu_id, i, time.time() - start, -sum(self.emb_model.loss) / self.args.print_interval, ) ) self.emb_model.loss = [] else: print( "GPU-[%d] batch %d time: %.2fs" % (gpu_id, i, time.time() - start) ) start = time.time() if self.args.async_update: self.emb_model.finish_async_update() def fast_train(self): """fast train with dataloader with only gpu / only cpu""" # the number of postive node pairs of a node sequence num_pos = ( 2 * self.args.walk_length * self.args.window_size - self.args.window_size * (self.args.window_size + 1) ) num_pos = int(num_pos) self.init_device_emb() if self.args.async_update: self.emb_model.share_memory() self.emb_model.create_async_update() if self.args.count_params: sum_up_params(self.emb_model) sampler = self.dataset.create_sampler(0) dataloader = DataLoader( dataset=sampler.seeds, batch_size=self.args.batch_size, collate_fn=sampler.sample, shuffle=False, drop_last=False, num_workers=self.args.num_sampler_threads, ) num_batches = len(dataloader) print("num batchs: %d\n" % num_batches) start_all = time.time() start = time.time() with torch.no_grad(): max_i = num_batches for i, walks in enumerate(dataloader): if self.args.fast_neg: self.emb_model.fast_learn(walks) else: # do negative sampling bs = len(walks) neg_nodes = torch.LongTensor( np.random.choice( self.dataset.neg_table, bs * num_pos * self.args.negative, replace=True, ) ) self.emb_model.fast_learn(walks, neg_nodes=neg_nodes) if i > 0 and i % self.args.print_interval == 0: if self.args.print_loss: print( "Batch %d training time: %.2fs loss: %.4f" % ( i, time.time() - start, -sum(self.emb_model.loss) / self.args.print_interval, ) ) self.emb_model.loss = [] else: print( "Batch %d, training time: %.2fs" % (i, time.time() - start) ) start = time.time() if self.args.async_update: self.emb_model.finish_async_update() print("Training used time: %.2fs" % (time.time() - start_all)) if self.args.save_in_txt: self.emb_model.save_embedding_txt( self.dataset, self.args.output_emb_file ) elif self.args.save_in_pt: self.emb_model.save_embedding_pt( self.dataset, self.args.output_emb_file ) else: self.emb_model.save_embedding( self.dataset, self.args.output_emb_file ) if __name__ == "__main__": parser = argparse.ArgumentParser(description="DeepWalk") # input files ## personal datasets parser.add_argument( "--data_file", type=str, help="path of the txt network file, builtin dataset include youtube-net and blog-net", ) ## ogbl datasets parser.add_argument( "--ogbl_name", type=str, help="name of ogbl dataset, e.g. ogbl-ddi" ) parser.add_argument( "--load_from_ogbl", default=False, action="store_true", help="whether load dataset from ogbl", ) # output files parser.add_argument( "--save_in_txt", default=False, action="store_true", help="Whether save dat in txt format or npy", ) parser.add_argument( "--save_in_pt", default=False, action="store_true", help="Whether save dat in pt format or npy", ) parser.add_argument( "--output_emb_file", type=str, default="emb.npy", help="path of the output npy embedding file", ) parser.add_argument( "--map_file", type=str, default="nodeid_to_index.pickle", help="path of the mapping dict that maps node ids to embedding index", ) parser.add_argument( "--norm", default=False, action="store_true", help="whether to do normalization over node embedding after training", ) # model parameters parser.add_argument( "--dim", default=128, type=int, help="embedding dimensions" ) parser.add_argument( "--window_size", default=5, type=int, help="context window size" ) parser.add_argument( "--use_context_weight", default=False, action="store_true", help="whether to add weights over nodes in the context window", ) parser.add_argument( "--num_walks", default=10, type=int, help="number of walks for each node", ) parser.add_argument( "--negative", default=1, type=int, help="negative samples for each positve node pair", ) parser.add_argument( "--batch_size", default=128, type=int, help="number of node sequences in each batch", ) parser.add_argument( "--walk_length", default=80, type=int, help="number of nodes in a sequence", ) parser.add_argument( "--neg_weight", default=1.0, type=float, help="negative weight" ) parser.add_argument( "--lap_norm", default=0.01, type=float, help="weight of laplacian normalization, recommend to set as 0.1 / windoe_size", ) # training parameters parser.add_argument( "--print_interval", default=100, type=int, help="number of batches between printing", ) parser.add_argument( "--print_loss", default=False, action="store_true", help="whether print loss during training", ) parser.add_argument("--lr", default=0.2, type=float, help="learning rate") # optimization settings parser.add_argument( "--mix", default=False, action="store_true", help="mixed training with CPU and GPU", ) parser.add_argument( "--gpus", type=int, default=[-1], nargs="+", help="a list of active gpu ids, e.g. 0, used with --mix", ) parser.add_argument( "--only_cpu", default=False, action="store_true", help="training with CPU", ) parser.add_argument( "--only_gpu", default=False, action="store_true", help="training with GPU", ) parser.add_argument( "--async_update", default=False, action="store_true", help="mixed training asynchronously, not recommended", ) parser.add_argument( "--true_neg", default=False, action="store_true", help="If not specified, this program will use " "a faster negative sampling method, " "but the samples might be false negative " "with a small probability. If specified, " "this program will generate a true negative sample table," "and select from it when doing negative samling", ) parser.add_argument( "--num_threads", default=8, type=int, help="number of threads used for each CPU-core/GPU", ) parser.add_argument( "--num_sampler_threads", default=2, type=int, help="number of threads used for sampling", ) parser.add_argument( "--count_params", default=False, action="store_true", help="count the params, exit once counting over", ) args = parser.parse_args() args.fast_neg = not args.true_neg if args.async_update: assert args.mix, "--async_update only with --mix" start_time = time.time() trainer = DeepwalkTrainer(args) trainer.train() print("Total used time: %.2f" % (time.time() - start_time)) ================================================ FILE: examples/pytorch/ogb/deepwalk/load_dataset.py ================================================ """ load dataset from ogb """ import argparse import time from ogb.linkproppred import DglLinkPropPredDataset def load_from_ogbl_with_name(name): choices = ["ogbl-collab", "ogbl-ddi", "ogbl-ppa", "ogbl-citation"] assert name in choices, "name must be selected from " + str(choices) dataset = DglLinkPropPredDataset(name) return dataset[0] if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--name", type=str, choices=["ogbl-collab", "ogbl-ddi", "ogbl-ppa", "ogbl-citation"], default="ogbl-collab", help="name of datasets by ogb", ) args = parser.parse_args() print("loading graph... it might take some time") name = args.name g = load_from_ogbl_with_name(name=name) try: w = g.edata["edge_weight"] weighted = True except: weighted = False edge_num = g.edges()[0].shape[0] src = list(g.edges()[0]) tgt = list(g.edges()[1]) if weighted: weight = list(g.edata["edge_weight"]) print("writing...") start_time = time.time() with open(name + "-net.txt", "w") as f: for i in range(edge_num): if weighted: f.write( str(src[i].item()) + " " + str(tgt[i].item()) + " " + str(weight[i].item()) + "\n" ) else: f.write( str(src[i].item()) + " " + str(tgt[i].item()) + " " + "1\n" ) print("writing used time: %d s" % int(time.time() - start_time)) ================================================ FILE: examples/pytorch/ogb/deepwalk/model.py ================================================ import random import numpy as np import torch import torch.multiprocessing as mp import torch.nn as nn import torch.nn.functional as F from torch.multiprocessing import Queue from torch.nn import init def init_emb2pos_index(walk_length, window_size, batch_size): """select embedding of positive nodes from a batch of node embeddings Return ------ index_emb_posu torch.LongTensor : the indices of u_embeddings index_emb_posv torch.LongTensor : the indices of v_embeddings Usage ----- # emb_u.shape: [batch_size * walk_length, dim] batch_emb2posu = torch.index_select(emb_u, 0, index_emb_posu) """ idx_list_u = [] idx_list_v = [] for b in range(batch_size): for i in range(walk_length): for j in range(i - window_size, i): if j >= 0: idx_list_u.append(j + b * walk_length) idx_list_v.append(i + b * walk_length) for j in range(i + 1, i + 1 + window_size): if j < walk_length: idx_list_u.append(j + b * walk_length) idx_list_v.append(i + b * walk_length) # [num_pos * batch_size] index_emb_posu = torch.LongTensor(idx_list_u) index_emb_posv = torch.LongTensor(idx_list_v) return index_emb_posu, index_emb_posv def init_emb2neg_index(walk_length, window_size, negative, batch_size): """select embedding of negative nodes from a batch of node embeddings for fast negative sampling Return ------ index_emb_negu torch.LongTensor : the indices of u_embeddings index_emb_negv torch.LongTensor : the indices of v_embeddings Usage ----- # emb_u.shape: [batch_size * walk_length, dim] batch_emb2negu = torch.index_select(emb_u, 0, index_emb_negu) """ idx_list_u = [] for b in range(batch_size): for i in range(walk_length): for j in range(i - window_size, i): if j >= 0: idx_list_u += [i + b * walk_length] * negative for j in range(i + 1, i + 1 + window_size): if j < walk_length: idx_list_u += [i + b * walk_length] * negative idx_list_v = ( list(range(batch_size * walk_length)) * negative * window_size * 2 ) random.shuffle(idx_list_v) idx_list_v = idx_list_v[: len(idx_list_u)] # [bs * walk_length * negative] index_emb_negu = torch.LongTensor(idx_list_u) index_emb_negv = torch.LongTensor(idx_list_v) return index_emb_negu, index_emb_negv def init_weight(walk_length, window_size, batch_size): """init context weight""" weight = [] for b in range(batch_size): for i in range(walk_length): for j in range(i - window_size, i): if j >= 0: weight.append(1.0 - float(i - j - 1) / float(window_size)) for j in range(i + 1, i + 1 + window_size): if j < walk_length: weight.append(1.0 - float(j - i - 1) / float(window_size)) # [num_pos * batch_size] return torch.Tensor(weight).unsqueeze(1) def init_empty_grad(emb_dimension, walk_length, batch_size): """initialize gradient matrix""" grad_u = torch.zeros((batch_size * walk_length, emb_dimension)) grad_v = torch.zeros((batch_size * walk_length, emb_dimension)) return grad_u, grad_v def adam(grad, state_sum, nodes, lr, device, only_gpu): """calculate gradients according to adam""" grad_sum = (grad * grad).mean(1) if not only_gpu: grad_sum = grad_sum.cpu() state_sum.index_add_(0, nodes, grad_sum) # cpu std = state_sum[nodes].to(device) # gpu std_values = std.sqrt_().add_(1e-10).unsqueeze(1) grad = lr * grad / std_values # gpu return grad def async_update(num_threads, model, queue): """asynchronous embedding update""" torch.set_num_threads(num_threads) while True: (grad_u, grad_v, grad_v_neg, nodes, neg_nodes) = queue.get() if grad_u is None: return with torch.no_grad(): model.u_embeddings.weight.data.index_add_(0, nodes.view(-1), grad_u) model.v_embeddings.weight.data.index_add_(0, nodes.view(-1), grad_v) if neg_nodes is not None: model.v_embeddings.weight.data.index_add_( 0, neg_nodes.view(-1), grad_v_neg ) class SkipGramModel(nn.Module): """Negative sampling based skip-gram""" def __init__( self, emb_size, emb_dimension, walk_length, window_size, batch_size, only_cpu, only_gpu, mix, neg_weight, negative, lr, lap_norm, fast_neg, record_loss, norm, use_context_weight, async_update, num_threads, ): """initialize embedding on CPU Paremeters ---------- emb_size int : number of nodes emb_dimension int : embedding dimension walk_length int : number of nodes in a sequence window_size int : context window size batch_size int : number of node sequences in each batch only_cpu bool : training with CPU only_gpu bool : training with GPU mix bool : mixed training with CPU and GPU negative int : negative samples for each positve node pair neg_weight float : negative weight lr float : initial learning rate lap_norm float : weight of laplacian normalization fast_neg bool : do negative sampling inside a batch record_loss bool : print the loss during training norm bool : do normalizatin on the embedding after training use_context_weight : give different weights to the nodes in a context window async_update : asynchronous training """ super(SkipGramModel, self).__init__() self.emb_size = emb_size self.emb_dimension = emb_dimension self.walk_length = walk_length self.window_size = window_size self.batch_size = batch_size self.only_cpu = only_cpu self.only_gpu = only_gpu self.mixed_train = mix self.neg_weight = neg_weight self.negative = negative self.lr = lr self.lap_norm = lap_norm self.fast_neg = fast_neg self.record_loss = record_loss self.norm = norm self.use_context_weight = use_context_weight self.async_update = async_update self.num_threads = num_threads # initialize the device as cpu self.device = torch.device("cpu") # content embedding self.u_embeddings = nn.Embedding( self.emb_size, self.emb_dimension, sparse=True ) # context embedding self.v_embeddings = nn.Embedding( self.emb_size, self.emb_dimension, sparse=True ) # initialze embedding initrange = 1.0 / self.emb_dimension init.uniform_(self.u_embeddings.weight.data, -initrange, initrange) init.constant_(self.v_embeddings.weight.data, 0) # lookup_table is used for fast sigmoid computing self.lookup_table = torch.sigmoid(torch.arange(-6.01, 6.01, 0.01)) self.lookup_table[0] = 0.0 self.lookup_table[-1] = 1.0 if self.record_loss: self.logsigmoid_table = torch.log( torch.sigmoid(torch.arange(-6.01, 6.01, 0.01)) ) self.loss = [] # indexes to select positive/negative node pairs from batch_walks self.index_emb_posu, self.index_emb_posv = init_emb2pos_index( self.walk_length, self.window_size, self.batch_size ) self.index_emb_negu, self.index_emb_negv = init_emb2neg_index( self.walk_length, self.window_size, self.negative, self.batch_size ) if self.use_context_weight: self.context_weight = init_weight( self.walk_length, self.window_size, self.batch_size ) # adam self.state_sum_u = torch.zeros(self.emb_size) self.state_sum_v = torch.zeros(self.emb_size) # gradients of nodes in batch_walks self.grad_u, self.grad_v = init_empty_grad( self.emb_dimension, self.walk_length, self.batch_size ) def create_async_update(self): """Set up the async update subprocess.""" self.async_q = Queue(1) self.async_p = mp.Process( target=async_update, args=(self.num_threads, self, self.async_q) ) self.async_p.start() def finish_async_update(self): """Notify the async update subprocess to quit.""" self.async_q.put((None, None, None, None, None)) self.async_p.join() def share_memory(self): """share the parameters across subprocesses""" self.u_embeddings.weight.share_memory_() self.v_embeddings.weight.share_memory_() self.state_sum_u.share_memory_() self.state_sum_v.share_memory_() def set_device(self, gpu_id): """set gpu device""" self.device = torch.device("cuda:%d" % gpu_id) print("The device is", self.device) self.lookup_table = self.lookup_table.to(self.device) if self.record_loss: self.logsigmoid_table = self.logsigmoid_table.to(self.device) self.index_emb_posu = self.index_emb_posu.to(self.device) self.index_emb_posv = self.index_emb_posv.to(self.device) self.index_emb_negu = self.index_emb_negu.to(self.device) self.index_emb_negv = self.index_emb_negv.to(self.device) self.grad_u = self.grad_u.to(self.device) self.grad_v = self.grad_v.to(self.device) if self.use_context_weight: self.context_weight = self.context_weight.to(self.device) def all_to_device(self, gpu_id): """move all of the parameters to a single GPU""" self.device = torch.device("cuda:%d" % gpu_id) self.set_device(gpu_id) self.u_embeddings = self.u_embeddings.cuda(gpu_id) self.v_embeddings = self.v_embeddings.cuda(gpu_id) self.state_sum_u = self.state_sum_u.to(self.device) self.state_sum_v = self.state_sum_v.to(self.device) def fast_sigmoid(self, score): """do fast sigmoid by looking up in a pre-defined table""" idx = torch.floor((score + 6.01) / 0.01).long() return self.lookup_table[idx] def fast_logsigmoid(self, score): """do fast logsigmoid by looking up in a pre-defined table""" idx = torch.floor((score + 6.01) / 0.01).long() return self.logsigmoid_table[idx] def fast_learn(self, batch_walks, neg_nodes=None): """Learn a batch of random walks in a fast way. It has the following features: 1. It calculating the gradients directly without the forward operation. 2. It does sigmoid by a looking up table. Specifically, for each positive/negative node pair (i,j), the updating procedure is as following: score = self.fast_sigmoid(u_embedding[i].dot(v_embedding[j])) # label = 1 for positive samples; label = 0 for negative samples. u_embedding[i] += (label - score) * v_embedding[j] v_embedding[i] += (label - score) * u_embedding[j] Parameters ---------- batch_walks list : a list of node sequnces lr float : current learning rate neg_nodes torch.LongTensor : a long tensor of sampled true negative nodes. If neg_nodes is None, then do negative sampling randomly from the nodes in batch_walks as an alternative. Usage example ------------- batch_walks = [torch.LongTensor([1,2,3,4]), torch.LongTensor([2,3,4,2])]) lr = 0.01 neg_nodes = None """ lr = self.lr # [batch_size, walk_length] if isinstance(batch_walks, list): nodes = torch.stack(batch_walks) elif isinstance(batch_walks, torch.LongTensor): nodes = batch_walks if self.only_gpu: nodes = nodes.to(self.device) if neg_nodes is not None: neg_nodes = neg_nodes.to(self.device) emb_u = ( self.u_embeddings(nodes) .view(-1, self.emb_dimension) .to(self.device) ) emb_v = ( self.v_embeddings(nodes) .view(-1, self.emb_dimension) .to(self.device) ) ## Postive bs = len(batch_walks) if bs < self.batch_size: index_emb_posu, index_emb_posv = init_emb2pos_index( self.walk_length, self.window_size, bs ) index_emb_posu = index_emb_posu.to(self.device) index_emb_posv = index_emb_posv.to(self.device) else: index_emb_posu = self.index_emb_posu index_emb_posv = self.index_emb_posv # num_pos: the number of positive node pairs generated by a single walk sequence # [batch_size * num_pos, dim] emb_pos_u = torch.index_select(emb_u, 0, index_emb_posu) emb_pos_v = torch.index_select(emb_v, 0, index_emb_posv) pos_score = torch.sum(torch.mul(emb_pos_u, emb_pos_v), dim=1) pos_score = torch.clamp(pos_score, max=6, min=-6) # [batch_size * num_pos, 1] score = (1 - self.fast_sigmoid(pos_score)).unsqueeze(1) if self.record_loss: self.loss.append(torch.mean(self.fast_logsigmoid(pos_score)).item()) # [batch_size * num_pos, dim] if self.lap_norm > 0: grad_u_pos = score * emb_pos_v + self.lap_norm * ( emb_pos_v - emb_pos_u ) grad_v_pos = score * emb_pos_u + self.lap_norm * ( emb_pos_u - emb_pos_v ) else: grad_u_pos = score * emb_pos_v grad_v_pos = score * emb_pos_u if self.use_context_weight: if bs < self.batch_size: context_weight = init_weight( self.walk_length, self.window_size, bs ).to(self.device) else: context_weight = self.context_weight grad_u_pos *= context_weight grad_v_pos *= context_weight # [batch_size * walk_length, dim] if bs < self.batch_size: grad_u, grad_v = init_empty_grad( self.emb_dimension, self.walk_length, bs ) grad_u = grad_u.to(self.device) grad_v = grad_v.to(self.device) else: self.grad_u = self.grad_u.to(self.device) self.grad_u.zero_() self.grad_v = self.grad_v.to(self.device) self.grad_v.zero_() grad_u = self.grad_u grad_v = self.grad_v grad_u.index_add_(0, index_emb_posu, grad_u_pos) grad_v.index_add_(0, index_emb_posv, grad_v_pos) ## Negative if bs < self.batch_size: index_emb_negu, index_emb_negv = init_emb2neg_index( self.walk_length, self.window_size, self.negative, bs ) index_emb_negu = index_emb_negu.to(self.device) index_emb_negv = index_emb_negv.to(self.device) else: index_emb_negu = self.index_emb_negu index_emb_negv = self.index_emb_negv emb_neg_u = torch.index_select(emb_u, 0, index_emb_negu) if neg_nodes is None: emb_neg_v = torch.index_select(emb_v, 0, index_emb_negv) else: emb_neg_v = self.v_embeddings.weight[neg_nodes].to(self.device) # [batch_size * walk_length * negative, dim] neg_score = torch.sum(torch.mul(emb_neg_u, emb_neg_v), dim=1) neg_score = torch.clamp(neg_score, max=6, min=-6) # [batch_size * walk_length * negative, 1] score = -self.fast_sigmoid(neg_score).unsqueeze(1) if self.record_loss: self.loss.append( self.negative * self.neg_weight * torch.mean(self.fast_logsigmoid(-neg_score)).item() ) grad_u_neg = self.neg_weight * score * emb_neg_v grad_v_neg = self.neg_weight * score * emb_neg_u grad_u.index_add_(0, index_emb_negu, grad_u_neg) if neg_nodes is None: grad_v.index_add_(0, index_emb_negv, grad_v_neg) ## Update nodes = nodes.view(-1) # use adam optimizer grad_u = adam( grad_u, self.state_sum_u, nodes, lr, self.device, self.only_gpu ) grad_v = adam( grad_v, self.state_sum_v, nodes, lr, self.device, self.only_gpu ) if neg_nodes is not None: grad_v_neg = adam( grad_v_neg, self.state_sum_v, neg_nodes, lr, self.device, self.only_gpu, ) if self.mixed_train: grad_u = grad_u.cpu() grad_v = grad_v.cpu() if neg_nodes is not None: grad_v_neg = grad_v_neg.cpu() else: grad_v_neg = None if self.async_update: grad_u.share_memory_() grad_v.share_memory_() nodes.share_memory_() if neg_nodes is not None: neg_nodes.share_memory_() grad_v_neg.share_memory_() self.async_q.put((grad_u, grad_v, grad_v_neg, nodes, neg_nodes)) if not self.async_update: self.u_embeddings.weight.data.index_add_(0, nodes.view(-1), grad_u) self.v_embeddings.weight.data.index_add_(0, nodes.view(-1), grad_v) if neg_nodes is not None: self.v_embeddings.weight.data.index_add_( 0, neg_nodes.view(-1), grad_v_neg ) return def forward(self, pos_u, pos_v, neg_v): """Do forward and backward. It is designed for future use.""" emb_u = self.u_embeddings(pos_u) emb_v = self.v_embeddings(pos_v) emb_neg_v = self.v_embeddings(neg_v) score = torch.sum(torch.mul(emb_u, emb_v), dim=1) score = torch.clamp(score, max=6, min=-6) score = -F.logsigmoid(score) neg_score = torch.bmm(emb_neg_v, emb_u.unsqueeze(2)).squeeze() neg_score = torch.clamp(neg_score, max=6, min=-6) neg_score = -torch.sum(F.logsigmoid(-neg_score), dim=1) # return torch.mean(score + neg_score) return torch.sum(score), torch.sum(neg_score) def save_embedding(self, dataset, file_name): """Write embedding to local file. Only used when node ids are numbers. Parameter --------- dataset DeepwalkDataset : the dataset file_name str : the file name """ embedding = self.u_embeddings.weight.cpu().data.numpy() if self.norm: embedding /= np.sqrt(np.sum(embedding * embedding, 1)).reshape( -1, 1 ) np.save(file_name, embedding) def save_embedding_pt(self, dataset, file_name): """For ogb leaderboard.""" try: max_node_id = max(dataset.node2id.keys()) if max_node_id + 1 != self.emb_size: print("WARNING: The node ids are not serial.") embedding = torch.zeros(max_node_id + 1, self.emb_dimension) index = torch.LongTensor( list( map( lambda id: dataset.id2node[id], list(range(self.emb_size)), ) ) ) embedding.index_add_(0, index, self.u_embeddings.weight.cpu().data) if self.norm: embedding /= torch.sqrt( torch.sum(embedding.mul(embedding), 1) + 1e-6 ).unsqueeze(1) torch.save(embedding, file_name) except: self.save_embedding_pt_dgl_graph(dataset, file_name) def save_embedding_pt_dgl_graph(self, dataset, file_name): """For ogb leaderboard""" embedding = torch.zeros_like(self.u_embeddings.weight.cpu().data) valid_seeds = torch.LongTensor(dataset.valid_seeds) valid_embedding = self.u_embeddings.weight.cpu().data.index_select( 0, valid_seeds ) embedding.index_add_(0, valid_seeds, valid_embedding) if self.norm: embedding /= torch.sqrt( torch.sum(embedding.mul(embedding), 1) + 1e-6 ).unsqueeze(1) torch.save(embedding, file_name) def save_embedding_txt(self, dataset, file_name): """Write embedding to local file. For future use. Parameter --------- dataset DeepwalkDataset : the dataset file_name str : the file name """ embedding = self.u_embeddings.weight.cpu().data.numpy() if self.norm: embedding /= np.sqrt(np.sum(embedding * embedding, 1)).reshape( -1, 1 ) with open(file_name, "w") as f: f.write("%d %d\n" % (self.emb_size, self.emb_dimension)) for wid in range(self.emb_size): e = " ".join(map(lambda x: str(x), embedding[wid])) f.write("%s %s\n" % (str(dataset.id2node[wid]), e)) ================================================ FILE: examples/pytorch/ogb/deepwalk/reading_data.py ================================================ import os import pickle import random import time import dgl import numpy as np import scipy.sparse as sp import torch from dgl.data.utils import ( _get_dgl_url, download, extract_archive, get_download_dir, ) from torch.utils.data import DataLoader from utils import shuffle_walks def ReadTxtNet(file_path="", undirected=True): """Read the txt network file. Notations: The network is unweighted. Parameters ---------- file_path str : path of network file undirected bool : whether the edges are undirected Return ------ net dict : a dict recording the connections in the graph node2id dict : a dict mapping the nodes to their embedding indices id2node dict : a dict mapping nodes embedding indices to the nodes """ if file_path == "youtube" or file_path == "blog": name = file_path dir = get_download_dir() zip_file_path = "{}/{}.zip".format(dir, name) download( _get_dgl_url( os.path.join("dataset/DeepWalk/", "{}.zip".format(file_path)) ), path=zip_file_path, ) extract_archive(zip_file_path, "{}/{}".format(dir, name)) file_path = "{}/{}/{}-net.txt".format(dir, name, name) node2id = {} id2node = {} cid = 0 src = [] dst = [] weight = [] net = {} with open(file_path, "r") as f: for line in f.readlines(): tup = list(map(int, line.strip().split(" "))) assert len(tup) in [ 2, 3, ], "The format of network file is unrecognizable." if len(tup) == 3: n1, n2, w = tup elif len(tup) == 2: n1, n2 = tup w = 1 if n1 not in node2id: node2id[n1] = cid id2node[cid] = n1 cid += 1 if n2 not in node2id: node2id[n2] = cid id2node[cid] = n2 cid += 1 n1 = node2id[n1] n2 = node2id[n2] if n1 not in net: net[n1] = {n2: w} src.append(n1) dst.append(n2) weight.append(w) elif n2 not in net[n1]: net[n1][n2] = w src.append(n1) dst.append(n2) weight.append(w) if undirected: if n2 not in net: net[n2] = {n1: w} src.append(n2) dst.append(n1) weight.append(w) elif n1 not in net[n2]: net[n2][n1] = w src.append(n2) dst.append(n1) weight.append(w) print("node num: %d" % len(net)) print("edge num: %d" % len(src)) assert max(net.keys()) == len(net) - 1, "error reading net, quit" sm = sp.coo_matrix((np.array(weight), (src, dst)), dtype=np.float32) return net, node2id, id2node, sm def net2graph(net_sm): """Transform the network to DGL graph Return ------ G DGLGraph : graph by DGL """ start = time.time() G = dgl.from_scipy(net_sm) end = time.time() t = end - start print("Building DGLGraph in %.2fs" % t) return G def make_undirected(G): G.add_edges(G.edges()[1], G.edges()[0]) return G def find_connected_nodes(G): nodes = G.out_degrees().nonzero().squeeze(-1) return nodes class DeepwalkDataset: def __init__( self, net_file, map_file, walk_length, window_size, num_walks, batch_size, negative=5, gpus=[0], fast_neg=True, ogbl_name="", load_from_ogbl=False, ): """This class has the following functions: 1. Transform the txt network file into DGL graph; 2. Generate random walk sequences for the trainer; 3. Provide the negative table if the user hopes to sample negative nodes according to nodes' degrees; Parameter --------- net_file str : path of the txt network file walk_length int : number of nodes in a sequence window_size int : context window size num_walks int : number of walks for each node batch_size int : number of node sequences in each batch negative int : negative samples for each positve node pair fast_neg bool : whether do negative sampling inside a batch """ self.walk_length = walk_length self.window_size = window_size self.num_walks = num_walks self.batch_size = batch_size self.negative = negative self.num_procs = len(gpus) self.fast_neg = fast_neg if load_from_ogbl: assert ( len(gpus) == 1 ), "ogb.linkproppred is not compatible with multi-gpu training (CUDA error)." from load_dataset import load_from_ogbl_with_name self.G = load_from_ogbl_with_name(ogbl_name) self.G = make_undirected(self.G) else: self.net, self.node2id, self.id2node, self.sm = ReadTxtNet(net_file) self.save_mapping(map_file) self.G = net2graph(self.sm) self.num_nodes = self.G.num_nodes() # random walk seeds start = time.time() self.valid_seeds = find_connected_nodes(self.G) if len(self.valid_seeds) != self.num_nodes: print( "WARNING: The node ids are not serial. Some nodes are invalid." ) seeds = torch.cat([torch.LongTensor(self.valid_seeds)] * num_walks) self.seeds = torch.split( shuffle_walks(seeds), int( np.ceil(len(self.valid_seeds) * self.num_walks / self.num_procs) ), 0, ) end = time.time() t = end - start print("%d seeds in %.2fs" % (len(seeds), t)) # negative table for true negative sampling if not fast_neg: node_degree = self.G.out_degrees(self.valid_seeds).numpy() node_degree = np.power(node_degree, 0.75) node_degree /= np.sum(node_degree) node_degree = np.array(node_degree * 1e8, dtype=int) self.neg_table = [] for idx, node in enumerate(self.valid_seeds): self.neg_table += [node] * node_degree[idx] self.neg_table_size = len(self.neg_table) self.neg_table = np.array(self.neg_table, dtype=int) del node_degree def create_sampler(self, i): """create random walk sampler""" return DeepwalkSampler(self.G, self.seeds[i], self.walk_length) def save_mapping(self, map_file): """save the mapping dict that maps node IDs to embedding indices""" with open(map_file, "wb") as f: pickle.dump(self.node2id, f) class DeepwalkSampler(object): def __init__(self, G, seeds, walk_length): """random walk sampler Parameter --------- G dgl.Graph : the input graph seeds torch.LongTensor : starting nodes walk_length int : walk length """ self.G = G self.seeds = seeds self.walk_length = walk_length def sample(self, seeds): walks = dgl.sampling.random_walk( self.G, seeds, length=self.walk_length - 1 )[0] return walks ================================================ FILE: examples/pytorch/ogb/deepwalk/utils.py ================================================ import torch def shuffle_walks(walks): seeds = torch.randperm(walks.size()[0]) return walks[seeds] def sum_up_params(model): """Count the model parameters""" n = [] n.append(model.u_embeddings.weight.cpu().data.numel() * 2) n.append(model.lookup_table.cpu().numel()) n.append(model.index_emb_posu.cpu().numel() * 2) n.append(model.grad_u.cpu().numel() * 2) try: n.append(model.index_emb_negu.cpu().numel() * 2) except: pass try: n.append(model.state_sum_u.cpu().numel() * 2) except: pass try: n.append(model.grad_avg.cpu().numel()) except: pass try: n.append(model.context_weight.cpu().numel()) except: pass print("#params " + str(sum(n))) exit() ================================================ FILE: examples/pytorch/ogb/directional_GSN/README.md ================================================ # directional_GSN ## Introduction This is an example of implementing [directional_GSN](https://arxiv.org/abs/2006.09252) for graph classification in DGL. directional_GSN is a combination of Graph Substructure Networks ([GSN](https://arxiv.org/abs/2006.09252)) with Directional Graph Networks ([DGN](https://arxiv.org/pdf/2010.02863.pdf)), where we defined a vector field based on substructure encoding instead of Laplacian eigenvectors. The script in this folder experiments directional_GSN on ogbg-molpcba dataset. ## Installation requirements ``` conda create --name gsn python=3.7 conda activate gsn conda install pytorch==1.11.0 cudatoolkit=10.2 -c pytorch pip install tqdm pip install networkx conda install -c conda-forge graph-tool pip install ogb pip install dgl-cu102 -f https://data.dgl.ai/wheels/repo.html ``` ## Experiments We fix the random seed to 41, and train the model on a single Tesla T4 GPU with 16GB memory. ### ogbg-molpcba #### performance | | train_AP | valid_AP | test_AP | #parameters | | ---------------- | ---------| -------- | ------- | ----------- | | directional_GSN | 0.4301 | 0.2598 | 0.2438 | 5142713 | #### Reproduction of performance ```{.bash} python preprocessing.py python main.py --seed 41 --epochs 450 --hidden_dim 420 --out_dim 420 --dropout 0.2 ``` ## References ```{.tex} @article{bouritsas2020improving, title={Improving graph neural network expressivity via subgraph isomorphism counting}, author={Bouritsas, Giorgos and Frasca, Fabrizio and Zafeiriou, Stefanos and Bronstein, Michael M}, journal={arXiv preprint arXiv:2006.09252}, year={2020} } ``` ================================================ FILE: examples/pytorch/ogb/directional_GSN/main.py ================================================ import argparse import random import dgl import numpy as np import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from dgl.dataloading import GraphDataLoader from ogb.graphproppred import Evaluator from ogb.graphproppred.mol_encoder import AtomEncoder from preprocessing import prepare_dataset from torch.utils.data import Dataset from tqdm import tqdm def aggregate_mean(h, vector_field, h_in): return torch.mean(h, dim=1) def aggregate_max(h, vector_field, h_in): return torch.max(h, dim=1)[0] def aggregate_sum(h, vector_field, h_in): return torch.sum(h, dim=1) def aggregate_dir_dx(h, vector_field, h_in, eig_idx=1): eig_w = ( (vector_field[:, :, eig_idx]) / ( torch.sum( torch.abs(vector_field[:, :, eig_idx]), keepdim=True, dim=1 ) + 1e-8 ) ).unsqueeze(-1) h_mod = torch.mul(h, eig_w) return torch.abs(torch.sum(h_mod, dim=1) - torch.sum(eig_w, dim=1) * h_in) class FCLayer(nn.Module): def __init__(self, in_size, out_size): super(FCLayer, self).__init__() self.in_size = in_size self.out_size = out_size self.linear = nn.Linear(in_size, out_size, bias=True) self.reset_parameters() def reset_parameters(self): nn.init.xavier_uniform_(self.linear.weight, 1 / self.in_size) self.linear.bias.data.zero_() def forward(self, x): h = self.linear(x) return h class MLP(nn.Module): def __init__(self, in_size, out_size): super(MLP, self).__init__() self.in_size = in_size self.out_size = out_size self.fc = FCLayer(in_size, out_size) def forward(self, x): x = self.fc(x) return x class DGNLayer(nn.Module): def __init__(self, in_dim, out_dim, dropout, aggregators): super().__init__() self.dropout = dropout self.aggregators = aggregators self.batchnorm_h = nn.BatchNorm1d(out_dim) self.pretrans = MLP(in_size=2 * in_dim, out_size=in_dim) self.posttrans = MLP( in_size=(len(aggregators) * 1 + 1) * in_dim, out_size=out_dim ) def pretrans_edges(self, edges): z2 = torch.cat([edges.src["h"], edges.dst["h"]], dim=1) vector_field = edges.data["eig"] return {"e": self.pretrans(z2), "vector_field": vector_field} def message_func(self, edges): return { "e": edges.data["e"], "vector_field": edges.data["vector_field"], } def reduce_func(self, nodes): h_in = nodes.data["h"] h = nodes.mailbox["e"] vector_field = nodes.mailbox["vector_field"] h = torch.cat( [ aggregate(h, vector_field, h_in) for aggregate in self.aggregators ], dim=1, ) return {"h": h} def forward(self, g, h, snorm_n): g.ndata["h"] = h # pretransformation g.apply_edges(self.pretrans_edges) # aggregation g.update_all(self.message_func, self.reduce_func) h = torch.cat([h, g.ndata["h"]], dim=1) # posttransformation h = self.posttrans(h) # graph and batch normalization h = h * snorm_n h = self.batchnorm_h(h) h = F.relu(h) h = F.dropout(h, self.dropout, training=self.training) return h class MLPReadout(nn.Module): def __init__(self, input_dim, output_dim, L=2): # L=nb_hidden_layers super().__init__() list_FC_layers = [ nn.Linear(input_dim // 2**l, input_dim // 2 ** (l + 1), bias=True) for l in range(L) ] list_FC_layers.append( nn.Linear(input_dim // 2**L, output_dim, bias=True) ) self.FC_layers = nn.ModuleList(list_FC_layers) self.L = L def forward(self, x): y = x for l in range(self.L): y = self.FC_layers[l](y) y = F.relu(y) y = self.FC_layers[self.L](y) return y class DGNNet(nn.Module): def __init__(self, hidden_dim=420, out_dim=420, dropout=0.2, n_layers=4): super().__init__() self.embedding_h = AtomEncoder(emb_dim=hidden_dim) self.aggregators = [ aggregate_mean, aggregate_sum, aggregate_max, aggregate_dir_dx, ] self.layers = nn.ModuleList( [ DGNLayer( in_dim=hidden_dim, out_dim=hidden_dim, dropout=dropout, aggregators=self.aggregators, ) for _ in range(n_layers - 1) ] ) self.layers.append( DGNLayer( in_dim=hidden_dim, out_dim=out_dim, dropout=dropout, aggregators=self.aggregators, ) ) # 128 out dim since ogbg-molpcba has 128 tasks self.MLP_layer = MLPReadout(out_dim, 128) def forward(self, g, h, snorm_n): h = self.embedding_h(h) for i, conv in enumerate(self.layers): h_t = conv(g, h, snorm_n) h = h_t g.ndata["h"] = h hg = dgl.mean_nodes(g, "h") return self.MLP_layer(hg) def loss(self, scores, labels): is_labeled = labels == labels loss = nn.BCEWithLogitsLoss()( scores[is_labeled], labels[is_labeled].float() ) return loss def train_epoch(model, optimizer, device, data_loader): model.train() epoch_loss = 0 epoch_train_AP = 0 list_scores = [] list_labels = [] for iter, (batch_graphs, batch_labels, batch_snorm_n) in enumerate( data_loader ): batch_graphs = batch_graphs.to(device) batch_x = batch_graphs.ndata["feat"] # num x feat batch_snorm_n = batch_snorm_n.to(device) batch_labels = batch_labels.to(device) optimizer.zero_grad() batch_scores = model(batch_graphs, batch_x, batch_snorm_n) loss = model.loss(batch_scores, batch_labels) loss.backward() optimizer.step() epoch_loss += loss.item() list_scores.append(batch_scores) list_labels.append(batch_labels) epoch_loss /= iter + 1 evaluator = Evaluator(name="ogbg-molpcba") epoch_train_AP = evaluator.eval( {"y_pred": torch.cat(list_scores), "y_true": torch.cat(list_labels)} )["ap"] return epoch_loss, epoch_train_AP def evaluate_network(model, device, data_loader): model.eval() epoch_test_loss = 0 epoch_test_AP = 0 with torch.no_grad(): list_scores = [] list_labels = [] for iter, (batch_graphs, batch_labels, batch_snorm_n) in enumerate( data_loader ): batch_graphs = batch_graphs.to(device) batch_x = batch_graphs.ndata["feat"] batch_snorm_n = batch_snorm_n.to(device) batch_labels = batch_labels.to(device) batch_scores = model(batch_graphs, batch_x, batch_snorm_n) loss = model.loss(batch_scores, batch_labels) epoch_test_loss += loss.item() list_scores.append(batch_scores) list_labels.append(batch_labels) epoch_test_loss /= iter + 1 evaluator = Evaluator(name="ogbg-molpcba") epoch_test_AP = evaluator.eval( {"y_pred": torch.cat(list_scores), "y_true": torch.cat(list_labels)} )["ap"] return epoch_test_loss, epoch_test_AP def train(dataset, params): trainset, valset, testset = dataset.train, dataset.val, dataset.test device = params.device print("Training Graphs: ", len(trainset)) print("Validation Graphs: ", len(valset)) print("Test Graphs: ", len(testset)) model = DGNNet() model = model.to(device) # view model parameters total_param = 0 print("MODEL DETAILS:\n") for param in model.parameters(): total_param += np.prod(list(param.data.size())) print("DGN Total parameters:", total_param) optimizer = optim.Adam(model.parameters(), lr=0.0008, weight_decay=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode="min", factor=0.8, patience=8 ) epoch_train_losses, epoch_val_losses = [], [] epoch_train_APs, epoch_val_APs, epoch_test_APs = [], [], [] train_loader = GraphDataLoader( trainset, batch_size=params.batch_size, shuffle=True, collate_fn=dataset.collate, pin_memory=True, ) val_loader = GraphDataLoader( valset, batch_size=params.batch_size, shuffle=False, collate_fn=dataset.collate, pin_memory=True, ) test_loader = GraphDataLoader( testset, batch_size=params.batch_size, shuffle=False, collate_fn=dataset.collate, pin_memory=True, ) with tqdm(range(450), unit="epoch") as t: for epoch in t: t.set_description("Epoch %d" % epoch) epoch_train_loss, epoch_train_ap = train_epoch( model, optimizer, device, train_loader ) epoch_val_loss, epoch_val_ap = evaluate_network( model, device, val_loader ) epoch_train_losses.append(epoch_train_loss) epoch_val_losses.append(epoch_val_loss) epoch_train_APs.append(epoch_train_ap.item()) epoch_val_APs.append(epoch_val_ap.item()) _, epoch_test_ap = evaluate_network(model, device, test_loader) epoch_test_APs.append(epoch_test_ap.item()) t.set_postfix( train_loss=epoch_train_loss, train_AP=epoch_train_ap.item(), val_AP=epoch_val_ap.item(), refresh=False, ) scheduler.step(-epoch_val_ap.item()) if optimizer.param_groups[0]["lr"] < 1e-5: print("\n!! LR EQUAL TO MIN LR SET.") break print("") best_val_epoch = np.argmax(np.array(epoch_val_APs)) best_train_epoch = np.argmax(np.array(epoch_train_APs)) best_val_ap = epoch_val_APs[best_val_epoch] best_val_test_ap = epoch_test_APs[best_val_epoch] best_val_train_ap = epoch_train_APs[best_val_epoch] best_train_ap = epoch_train_APs[best_train_epoch] print("Best Train AP: {:.4f}".format(best_train_ap)) print("Best Val AP: {:.4f}".format(best_val_ap)) print("Test AP of Best Val: {:.4f}".format(best_val_test_ap)) print("Train AP of Best Val: {:.4f}".format(best_val_train_ap)) class Subset(object): def __init__(self, dataset, labels, indices): dataset = [dataset[idx] for idx in indices] labels = [labels[idx] for idx in indices] self.dataset, self.labels = [], [] for i, g in enumerate(dataset): if g.num_nodes() > 5: self.dataset.append(g) self.labels.append(labels[i]) self.len = len(self.dataset) def __getitem__(self, item): return self.dataset[item], self.labels[item] def __len__(self): return self.len class PCBADataset(Dataset): def __init__(self, name): print("[I] Loading dataset %s..." % (name)) self.name = name self.dataset, self.split_idx = prepare_dataset(name) print("One hot encoding substructure counts... ", end="") self.d_id = [1] * self.dataset[0].edata["subgraph_counts"].shape[1] for g in self.dataset: g.edata["eig"] = g.edata["subgraph_counts"].float() self.train = Subset( self.dataset, self.split_idx["label"], self.split_idx["train"] ) self.val = Subset( self.dataset, self.split_idx["label"], self.split_idx["valid"] ) self.test = Subset( self.dataset, self.split_idx["label"], self.split_idx["test"] ) print( "train, test, val sizes :", len(self.train), len(self.test), len(self.val), ) print("[I] Finished loading.") # form a mini batch from a given list of samples = [(graph, label) pairs] def collate(self, samples): # The input samples is a list of pairs (graph, label). graphs, labels = map(list, zip(*samples)) labels = torch.stack(labels) tab_sizes_n = [g.num_nodes() for g in graphs] tab_snorm_n = [ torch.FloatTensor(size, 1).fill_(1.0 / size) for size in tab_sizes_n ] snorm_n = torch.cat(tab_snorm_n).sqrt() batched_graph = dgl.batch(graphs) return batched_graph, labels, snorm_n if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--gpu_id", default=0, type=int, help="Please give a value for gpu id" ) parser.add_argument( "--seed", default=41, type=int, help="Please give a value for seed" ) parser.add_argument( "--batch_size", default=2048, type=int, help="Please give a value for batch_size", ) args = parser.parse_args() # device args.device = torch.device( "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() else "cpu" ) # setting seeds random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) dataset = PCBADataset("ogbg-molpcba") train(dataset, args) ================================================ FILE: examples/pytorch/ogb/directional_GSN/preprocessing.py ================================================ import os import graph_tool as gt import graph_tool.topology as gt_topology import networkx as nx import numpy as np import torch from dgl.data.utils import load_graphs, save_graphs from ogb.graphproppred import DglGraphPropPredDataset from tqdm import tqdm def to_undirected(edge_index): row, col = edge_index.transpose(1, 0) row, col = torch.cat([row, col], dim=0), torch.cat([col, row], dim=0) edge_index = torch.stack([row, col], dim=0) return edge_index.transpose(1, 0).tolist() def induced_edge_automorphism_orbits(edge_list): ##### node automorphism orbits ##### graph = gt.Graph(directed=False) graph.add_edge_list(edge_list) gt.stats.remove_self_loops(graph) gt.stats.remove_parallel_edges(graph) # compute the node automorphism group aut_group = gt_topology.subgraph_isomorphism( graph, graph, induced=False, subgraph=True, generator=False ) orbit_membership = {} for v in graph.get_vertices(): orbit_membership[v] = v # whenever two nodes can be mapped via some automorphism, they are assigned the same orbit for aut in aut_group: for original, node in enumerate(aut): role = min(original, orbit_membership[node]) orbit_membership[node] = role orbit_membership_list = [[], []] for node, om_curr in orbit_membership.items(): orbit_membership_list[0].append(node) orbit_membership_list[1].append(om_curr) # make orbit list contiguous (i.e. 0,1,2,...O) _, contiguous_orbit_membership = np.unique( orbit_membership_list[1], return_inverse=True ) orbit_membership = { node: contiguous_orbit_membership[i] for i, node in enumerate(orbit_membership_list[0]) } aut_count = len(aut_group) ##### induced edge automorphism orbits (according to the node automorphism group) ##### edge_orbit_partition = dict() edge_orbit_membership = dict() edge_orbits2inds = dict() ind = 0 edge_list = to_undirected(torch.tensor(graph.get_edges())) # infer edge automorphisms from the node automorphisms for i, edge in enumerate(edge_list): edge_orbit = frozenset( [orbit_membership[edge[0]], orbit_membership[edge[1]]] ) if edge_orbit not in edge_orbits2inds: edge_orbits2inds[edge_orbit] = ind ind_edge_orbit = ind ind += 1 else: ind_edge_orbit = edge_orbits2inds[edge_orbit] if ind_edge_orbit not in edge_orbit_partition: edge_orbit_partition[ind_edge_orbit] = [tuple(edge)] else: edge_orbit_partition[ind_edge_orbit] += [tuple(edge)] edge_orbit_membership[i] = ind_edge_orbit print( "Edge orbit partition of given substructure: {}".format( edge_orbit_partition ) ) print("Number of edge orbits: {}".format(len(edge_orbit_partition))) print("Graph (node) automorphism count: {}".format(aut_count)) return graph, edge_orbit_partition, edge_orbit_membership, aut_count def subgraph_isomorphism_edge_counts(edge_index, subgraph_dict): ##### edge structural identifiers ##### edge_index = edge_index.transpose(1, 0).cpu().numpy() edge_dict = {} for i, edge in enumerate(edge_index): edge_dict[tuple(edge)] = i subgraph_edges = to_undirected( torch.tensor(subgraph_dict["subgraph"].get_edges().tolist()) ) G_gt = gt.Graph(directed=False) G_gt.add_edge_list(list(edge_index)) gt.stats.remove_self_loops(G_gt) gt.stats.remove_parallel_edges(G_gt) # compute all subgraph isomorphisms sub_iso = gt_topology.subgraph_isomorphism( subgraph_dict["subgraph"], G_gt, induced=True, subgraph=True, generator=True, ) counts = np.zeros( (edge_index.shape[0], len(subgraph_dict["orbit_partition"])) ) for sub_iso_curr in sub_iso: mapping = sub_iso_curr.get_array() for i, edge in enumerate(subgraph_edges): # for every edge in the graph H, find the edge in the subgraph G_S to which it is mapped # (by finding where its endpoints are matched). # Then, increase the count of the matched edge w.r.t. the corresponding orbit # Repeat for the reverse edge (the one with the opposite direction) edge_orbit = subgraph_dict["orbit_membership"][i] mapped_edge = tuple([mapping[edge[0]], mapping[edge[1]]]) counts[edge_dict[mapped_edge], edge_orbit] += 1 counts = counts / subgraph_dict["aut_count"] counts = torch.tensor(counts) return counts def prepare_dataset(name): # maximum size of cycle graph k = 8 path = os.path.join("./", "dataset", name) data_folder = os.path.join(path, "processed") os.makedirs(data_folder, exist_ok=True) data_file = os.path.join( data_folder, "cycle_graph_induced_{}.bin".format(k) ) # try to load if os.path.exists(data_file): # load print("Loading dataset from {}".format(data_file)) g_list, split_idx = load_graphs(data_file) else: # generate g_list, split_idx = generate_dataset(path, name) print("Saving dataset to {}".format(data_file)) save_graphs(data_file, g_list, split_idx) return g_list, split_idx def generate_dataset(path, name): ### compute the orbits of each substructure in the list, as well as the node automorphism count subgraph_dicts = [] edge_lists = [] for k in range(3, 8 + 1): graphs_nx = nx.cycle_graph(k) edge_lists.append(list(graphs_nx.edges)) for edge_list in edge_lists: ( subgraph, orbit_partition, orbit_membership, aut_count, ) = induced_edge_automorphism_orbits(edge_list=edge_list) subgraph_dicts.append( { "subgraph": subgraph, "orbit_partition": orbit_partition, "orbit_membership": orbit_membership, "aut_count": aut_count, } ) ### load and preprocess dataset dataset = DglGraphPropPredDataset(name=name, root=path) split_idx = dataset.get_idx_split() # computation of subgraph isomorphisms & creation of data structure graphs_dgl = list() split_idx["label"] = [] for i, datapoint in tqdm(enumerate(dataset)): g, label = datapoint g = _prepare(g, subgraph_dicts) graphs_dgl.append(g) split_idx["label"].append(label) split_idx["label"] = torch.stack(split_idx["label"]) return graphs_dgl, split_idx def _prepare(g, subgraph_dicts): edge_index = torch.stack(g.edges()) identifiers = None for subgraph_dict in subgraph_dicts: counts = subgraph_isomorphism_edge_counts(edge_index, subgraph_dict) identifiers = ( counts if identifiers is None else torch.cat((identifiers, counts), 1) ) g.edata["subgraph_counts"] = identifiers.long() return g if __name__ == "__main__": prepare_dataset("ogbg-molpcba") ================================================ FILE: examples/pytorch/ogb/line/README.md ================================================ # LINE Example - Paper link: [here](https://arxiv.org/pdf/1503.03578) - Official implementation: [here](https://github.com/tangjianpku/LINE) This implementation includes both LINE-1st and LINE-2nd. The detailed usage is shown in the arguments in line.py. ## How to load ogb data To load ogb dataset, you need to run the following command, which will output a network file, ogbn-products-net.txt: ``` python3 load_dataset.py --name ogbn-proteins ``` Or you can run the code directly with: ``` python3 line.py --ogbn_name xxx --load_from_ogbn ``` However, ogb.nodeproppred might not be compatible with mixed training with multi-gpu. If you want to do mixed training, please use no more than 1 gpu by the command above. We leave the commands to run with multi-gpu at the end. ## Evaluation For evaluatation we follow the code mlp.py provided by ogb [here](https://github.com/snap-stanford/ogb/blob/master/examples/nodeproppred/). ## Used config ogbn-arxiv ``` python3 line.py --save_in_pt --dim 128 --lap_norm 0.1 --mix --gpus 0 --batch_size 1024 --output_emb_file arxiv-embedding.pt --num_samples 1000 --print_interval 1000 --negative 5 --fast_neg --load_from_ogbn --ogbn_name ogbn-arxiv cd ./ogb/blob/master/examples/nodeproppred/arxiv cp embedding_pt_file_path ./ python3 mlp.py --device 0 --use_node_embedding ``` ogbn-proteins ``` python3 line.py --save_in_pt --dim 128 --lap_norm 0.01 --mix --gpus 1 --batch_size 1024 --output_emb_file protein-embedding.pt --num_samples 600 --print_interval 1000 --negative 1 --fast_neg --load_from_ogbn --ogbn_name ogbn-proteins --print_loss cd ./ogb/blob/master/examples/nodeproppred/proteins cp embedding_pt_file_path ./ python3 mlp.py --device 0 --use_node_embedding ``` ogbl-products ``` python3 line.py --save_in_pt --dim 128 --lap_norm 0.01 --mix --gpus 0 --batch_size 4096 --output_emb_file products-embedding.pt --num_samples 3000 --print_interval 1000 --negative 1 --fast_neg --load_from_ogbn --ogbn_name ogbn-products --print_loss cd ./ogb/blob/master/examples/nodeproppred/products cp embedding_pt_file_path ./ python3 mlp.py --device 0 --use_node_embedding ``` ## Results ogbn-arxiv
#params: 33023343(model) + 142888(mlp) = 33166231
Highest Train: 82.94 ± 0.11
Highest Valid: 71.76 ± 0.08
Final Train: 80.74 ± 1.30
Final Test: 70.47 ± 0.19
obgn-proteins
#params: 25853524(model) + 129648(mlp) = 25983172
Highest Train: 93.11 ± 0.04
Highest Valid: 70.50 ± 1.29
Final Train: 77.66 ± 10.27
Final Test: 62.07 ± 1.25
ogbn-products
#params: 477570049(model) + 136495(mlp) = 477706544
Highest Train: 98.01 ± 0.32
Highest Valid: 89.57 ± 0.09
Final Train: 94.96 ± 0.43
Final Test: 72.52 ± 0.29 ## Notes To utlize multi-GPU training, we need to load datasets as a local file before training by the following command: ``` python3 load_dataset.py --name dataset_name ``` where `dataset_name` can be `ogbn-arxiv`, `ogbn-proteins`, and `ogbn-products`. After that, a local file `$dataset_name$-graph.bin` will be generated. Then run: ``` python3 line.py --data_file $dataset_name$-graph.bin ``` where the other parameters are the same with used configs without using `--load_from_ogbn` and `--ogbn_name`. ================================================ FILE: examples/pytorch/ogb/line/line.py ================================================ import argparse import os import random import time import dgl import numpy as np import torch import torch.multiprocessing as mp from model import SkipGramModel from reading_data import LineDataset from torch.utils.data import DataLoader from utils import check_args, sum_up_params class LineTrainer: def __init__(self, args): """Initializing the trainer with the input arguments""" self.args = args self.dataset = LineDataset( net_file=args.data_file, batch_size=args.batch_size, negative=args.negative, gpus=args.gpus, fast_neg=args.fast_neg, ogbl_name=args.ogbl_name, load_from_ogbl=args.load_from_ogbl, ogbn_name=args.ogbn_name, load_from_ogbn=args.load_from_ogbn, num_samples=args.num_samples * 1000000, ) self.emb_size = self.dataset.G.num_nodes() self.emb_model = None def init_device_emb(self): """set the device before training will be called once in fast_train_mp / fast_train """ choices = sum([self.args.only_gpu, self.args.only_cpu, self.args.mix]) assert ( choices == 1 ), "Must choose only *one* training mode in [only_cpu, only_gpu, mix]" # initializing embedding on CPU self.emb_model = SkipGramModel( emb_size=self.emb_size, emb_dimension=self.args.dim, batch_size=self.args.batch_size, only_cpu=self.args.only_cpu, only_gpu=self.args.only_gpu, only_fst=self.args.only_fst, only_snd=self.args.only_snd, mix=self.args.mix, neg_weight=self.args.neg_weight, negative=self.args.negative, lr=self.args.lr, lap_norm=self.args.lap_norm, fast_neg=self.args.fast_neg, record_loss=self.args.print_loss, async_update=self.args.async_update, num_threads=self.args.num_threads, ) torch.set_num_threads(self.args.num_threads) if self.args.only_gpu: print("Run in 1 GPU") assert self.args.gpus[0] >= 0 self.emb_model.all_to_device(self.args.gpus[0]) elif self.args.mix: print("Mix CPU with %d GPU" % len(self.args.gpus)) if len(self.args.gpus) == 1: assert ( self.args.gpus[0] >= 0 ), "mix CPU with GPU should have avaliable GPU" self.emb_model.set_device(self.args.gpus[0]) else: print("Run in CPU process") def train(self): """train the embedding""" if len(self.args.gpus) > 1: self.fast_train_mp() else: self.fast_train() def fast_train_mp(self): """multi-cpu-core or mix cpu & multi-gpu""" self.init_device_emb() self.emb_model.share_memory() sum_up_params(self.emb_model) start_all = time.time() ps = [] for i in range(len(self.args.gpus)): p = mp.Process( target=self.fast_train_sp, args=(i, self.args.gpus[i]) ) ps.append(p) p.start() for p in ps: p.join() print("Used time: %.2fs" % (time.time() - start_all)) if self.args.save_in_pt: self.emb_model.save_embedding_pt( self.dataset, self.args.output_emb_file ) else: self.emb_model.save_embedding( self.dataset, self.args.output_emb_file ) def fast_train_sp(self, rank, gpu_id): """a subprocess for fast_train_mp""" if self.args.mix: self.emb_model.set_device(gpu_id) torch.set_num_threads(self.args.num_threads) if self.args.async_update: self.emb_model.create_async_update() sampler = self.dataset.create_sampler(rank) dataloader = DataLoader( dataset=sampler.seeds, batch_size=self.args.batch_size, collate_fn=sampler.sample, shuffle=False, drop_last=False, num_workers=self.args.num_sampler_threads, ) num_batches = len(dataloader) print( "num batchs: %d in process [%d] GPU [%d]" % (num_batches, rank, gpu_id) ) start = time.time() with torch.no_grad(): for i, edges in enumerate(dataloader): if self.args.fast_neg: self.emb_model.fast_learn(edges) else: # do negative sampling bs = edges.size()[0] neg_nodes = torch.LongTensor( np.random.choice( self.dataset.neg_table, bs * self.args.negative, replace=True, ) ) self.emb_model.fast_learn(edges, neg_nodes=neg_nodes) if i > 0 and i % self.args.print_interval == 0: if self.args.print_loss: if self.args.only_fst: print( "GPU-[%d] batch %d time: %.2fs fst-loss: %.4f" % ( gpu_id, i, time.time() - start, -sum(self.emb_model.loss_fst) / self.args.print_interval, ) ) elif self.args.only_snd: print( "GPU-[%d] batch %d time: %.2fs snd-loss: %.4f" % ( gpu_id, i, time.time() - start, -sum(self.emb_model.loss_snd) / self.args.print_interval, ) ) else: print( "GPU-[%d] batch %d time: %.2fs fst-loss: %.4f snd-loss: %.4f" % ( gpu_id, i, time.time() - start, -sum(self.emb_model.loss_fst) / self.args.print_interval, -sum(self.emb_model.loss_snd) / self.args.print_interval, ) ) self.emb_model.loss_fst = [] self.emb_model.loss_snd = [] else: print( "GPU-[%d] batch %d time: %.2fs" % (gpu_id, i, time.time() - start) ) start = time.time() if self.args.async_update: self.emb_model.finish_async_update() def fast_train(self): """fast train with dataloader with only gpu / only cpu""" self.init_device_emb() if self.args.async_update: self.emb_model.share_memory() self.emb_model.create_async_update() sum_up_params(self.emb_model) sampler = self.dataset.create_sampler(0) dataloader = DataLoader( dataset=sampler.seeds, batch_size=self.args.batch_size, collate_fn=sampler.sample, shuffle=False, drop_last=False, num_workers=self.args.num_sampler_threads, ) num_batches = len(dataloader) print("num batchs: %d\n" % num_batches) start_all = time.time() start = time.time() with torch.no_grad(): for i, edges in enumerate(dataloader): if self.args.fast_neg: self.emb_model.fast_learn(edges) else: # do negative sampling bs = edges.size()[0] neg_nodes = torch.LongTensor( np.random.choice( self.dataset.neg_table, bs * self.args.negative, replace=True, ) ) self.emb_model.fast_learn(edges, neg_nodes=neg_nodes) if i > 0 and i % self.args.print_interval == 0: if self.args.print_loss: if self.args.only_fst: print( "Batch %d time: %.2fs fst-loss: %.4f" % ( i, time.time() - start, -sum(self.emb_model.loss_fst) / self.args.print_interval, ) ) elif self.args.only_snd: print( "Batch %d time: %.2fs snd-loss: %.4f" % ( i, time.time() - start, -sum(self.emb_model.loss_snd) / self.args.print_interval, ) ) else: print( "Batch %d time: %.2fs fst-loss: %.4f snd-loss: %.4f" % ( i, time.time() - start, -sum(self.emb_model.loss_fst) / self.args.print_interval, -sum(self.emb_model.loss_snd) / self.args.print_interval, ) ) self.emb_model.loss_fst = [] self.emb_model.loss_snd = [] else: print( "Batch %d, training time: %.2fs" % (i, time.time() - start) ) start = time.time() if self.args.async_update: self.emb_model.finish_async_update() print("Training used time: %.2fs" % (time.time() - start_all)) if self.args.save_in_pt: self.emb_model.save_embedding_pt( self.dataset, self.args.output_emb_file ) else: self.emb_model.save_embedding( self.dataset, self.args.output_emb_file ) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Implementation of LINE.") # input files ## personal datasets parser.add_argument("--data_file", type=str, help="path of dgl graphs") ## ogbl datasets parser.add_argument( "--ogbl_name", type=str, help="name of ogbl dataset, e.g. ogbl-ddi" ) parser.add_argument( "--load_from_ogbl", default=False, action="store_true", help="whether load dataset from ogbl", ) parser.add_argument( "--ogbn_name", type=str, help="name of ogbn dataset, e.g. ogbn-proteins" ) parser.add_argument( "--load_from_ogbn", default=False, action="store_true", help="whether load dataset from ogbn", ) # output files parser.add_argument( "--save_in_pt", default=False, action="store_true", help="Whether save dat in pt format or npy", ) parser.add_argument( "--output_emb_file", type=str, default="emb.npy", help="path of the output npy embedding file", ) # model parameters parser.add_argument( "--dim", default=128, type=int, help="embedding dimensions" ) parser.add_argument( "--num_samples", default=1, type=int, help="number of samples during training (million)", ) parser.add_argument( "--negative", default=1, type=int, help="negative samples for each positve node pair", ) parser.add_argument( "--batch_size", default=128, type=int, help="number of edges in each batch", ) parser.add_argument( "--neg_weight", default=1.0, type=float, help="negative weight" ) parser.add_argument( "--lap_norm", default=0.01, type=float, help="weight of laplacian normalization", ) # training parameters parser.add_argument( "--only_fst", default=False, action="store_true", help="only do first-order proximity embedding", ) parser.add_argument( "--only_snd", default=False, action="store_true", help="only do second-order proximity embedding", ) parser.add_argument( "--print_interval", default=100, type=int, help="number of batches between printing", ) parser.add_argument( "--print_loss", default=False, action="store_true", help="whether print loss during training", ) parser.add_argument("--lr", default=0.2, type=float, help="learning rate") # optimization settings parser.add_argument( "--mix", default=False, action="store_true", help="mixed training with CPU and GPU", ) parser.add_argument( "--gpus", type=int, default=[-1], nargs="+", help="a list of active gpu ids, e.g. 0, used with --mix", ) parser.add_argument( "--only_cpu", default=False, action="store_true", help="training with CPU", ) parser.add_argument( "--only_gpu", default=False, action="store_true", help="training with a single GPU (all of the parameters are moved on the GPU)", ) parser.add_argument( "--async_update", default=False, action="store_true", help="mixed training asynchronously, recommend not to use this", ) parser.add_argument( "--fast_neg", default=False, action="store_true", help="do negative sampling inside a batch", ) parser.add_argument( "--num_threads", default=2, type=int, help="number of threads used for each CPU-core/GPU", ) parser.add_argument( "--num_sampler_threads", default=2, type=int, help="number of threads used for sampling", ) args = parser.parse_args() if args.async_update: assert args.mix, "--async_update only with --mix" start_time = time.time() trainer = LineTrainer(args) trainer.train() print("Total used time: %.2f" % (time.time() - start_time)) ================================================ FILE: examples/pytorch/ogb/line/load_dataset.py ================================================ """ load dataset from ogb """ import argparse import dgl from ogb.linkproppred import DglLinkPropPredDataset from ogb.nodeproppred import DglNodePropPredDataset def load_from_ogbl_with_name(name): choices = ["ogbl-collab", "ogbl-ddi", "ogbl-ppa", "ogbl-citation"] assert name in choices, "name must be selected from " + str(choices) dataset = DglLinkPropPredDataset(name) return dataset[0] def load_from_ogbn_with_name(name): choices = [ "ogbn-products", "ogbn-proteins", "ogbn-arxiv", "ogbn-papers100M", ] assert name in choices, "name must be selected from " + str(choices) dataset, label = DglNodePropPredDataset(name)[0] return dataset if __name__ == "__main__": """load datasets as net.txt format""" parser = argparse.ArgumentParser() parser.add_argument( "--name", type=str, choices=[ "ogbl-collab", "ogbl-ddi", "ogbl-ppa", "ogbl-citation", "ogbn-products", "ogbn-proteins", "ogbn-arxiv", "ogbn-papers100M", ], default="ogbl-collab", help="name of datasets by ogb", ) args = parser.parse_args() name = args.name if name.startswith("ogbl"): g = load_from_ogbl_with_name(name=name) else: g = load_from_ogbn_with_name(name=name) dgl.save_graphs(name + "-graph.bin", g) ================================================ FILE: examples/pytorch/ogb/line/model.py ================================================ import random import numpy as np import torch import torch.multiprocessing as mp import torch.nn as nn import torch.nn.functional as F from torch.multiprocessing import Queue from torch.nn import init def init_emb2neg_index(negative, batch_size): """select embedding of negative nodes from a batch of node embeddings for fast negative sampling Return ------ index_emb_negu torch.LongTensor : the indices of u_embeddings index_emb_negv torch.LongTensor : the indices of v_embeddings Usage ----- # emb_u.shape: [batch_size, dim] batch_emb2negu = torch.index_select(emb_u, 0, index_emb_negu) """ idx_list_u = list(range(batch_size)) * negative idx_list_v = list(range(batch_size)) * negative random.shuffle(idx_list_v) index_emb_negu = torch.LongTensor(idx_list_u) index_emb_negv = torch.LongTensor(idx_list_v) return index_emb_negu, index_emb_negv def adam(grad, state_sum, nodes, lr, device, only_gpu): """calculate gradients according to adam""" grad_sum = (grad * grad).mean(1) if not only_gpu: grad_sum = grad_sum.cpu() state_sum.index_add_(0, nodes, grad_sum) # cpu std = state_sum[nodes].to(device) # gpu std_values = std.sqrt_().add_(1e-10).unsqueeze(1) grad = lr * grad / std_values # gpu return grad def async_update(num_threads, model, queue): """Asynchronous embedding update for entity embeddings.""" torch.set_num_threads(num_threads) print("async start") while True: (grad_u, grad_v, grad_v_neg, nodes, neg_nodes, first_flag) = queue.get() if grad_u is None: return with torch.no_grad(): if first_flag: model.fst_u_embeddings.weight.data.index_add_( 0, nodes[:, 0], grad_u ) model.fst_u_embeddings.weight.data.index_add_( 0, nodes[:, 1], grad_v ) if neg_nodes is not None: model.fst_u_embeddings.weight.data.index_add_( 0, neg_nodes, grad_v_neg ) else: model.snd_u_embeddings.weight.data.index_add_( 0, nodes[:, 0], grad_u ) model.snd_v_embeddings.weight.data.index_add_( 0, nodes[:, 1], grad_v ) if neg_nodes is not None: model.snd_v_embeddings.weight.data.index_add_( 0, neg_nodes, grad_v_neg ) class SkipGramModel(nn.Module): """Negative sampling based skip-gram""" def __init__( self, emb_size, emb_dimension, batch_size, only_cpu, only_gpu, only_fst, only_snd, mix, neg_weight, negative, lr, lap_norm, fast_neg, record_loss, async_update, num_threads, ): """initialize embedding on CPU Paremeters ---------- emb_size int : number of nodes emb_dimension int : embedding dimension batch_size int : number of node sequences in each batch only_cpu bool : training with CPU only_gpu bool : training with GPU only_fst bool : only embedding for first-order proximity only_snd bool : only embedding for second-order proximity mix bool : mixed training with CPU and GPU negative int : negative samples for each positve node pair neg_weight float : negative weight lr float : initial learning rate lap_norm float : weight of laplacian normalization fast_neg bool : do negative sampling inside a batch record_loss bool : print the loss during training use_context_weight : give different weights to the nodes in a context window async_update : asynchronous training """ super(SkipGramModel, self).__init__() self.emb_size = emb_size self.batch_size = batch_size self.only_cpu = only_cpu self.only_gpu = only_gpu if only_fst: self.fst = True self.snd = False self.emb_dimension = emb_dimension elif only_snd: self.fst = False self.snd = True self.emb_dimension = emb_dimension else: self.fst = True self.snd = True self.emb_dimension = int(emb_dimension / 2) self.mixed_train = mix self.neg_weight = neg_weight self.negative = negative self.lr = lr self.lap_norm = lap_norm self.fast_neg = fast_neg self.record_loss = record_loss self.async_update = async_update self.num_threads = num_threads # initialize the device as cpu self.device = torch.device("cpu") # embedding initrange = 1.0 / self.emb_dimension if self.fst: self.fst_u_embeddings = nn.Embedding( self.emb_size, self.emb_dimension, sparse=True ) init.uniform_( self.fst_u_embeddings.weight.data, -initrange, initrange ) if self.snd: self.snd_u_embeddings = nn.Embedding( self.emb_size, self.emb_dimension, sparse=True ) init.uniform_( self.snd_u_embeddings.weight.data, -initrange, initrange ) self.snd_v_embeddings = nn.Embedding( self.emb_size, self.emb_dimension, sparse=True ) init.constant_(self.snd_v_embeddings.weight.data, 0) # lookup_table is used for fast sigmoid computing self.lookup_table = torch.sigmoid(torch.arange(-6.01, 6.01, 0.01)) self.lookup_table[0] = 0.0 self.lookup_table[-1] = 1.0 if self.record_loss: self.logsigmoid_table = torch.log( torch.sigmoid(torch.arange(-6.01, 6.01, 0.01)) ) self.loss_fst = [] self.loss_snd = [] # indexes to select positive/negative node pairs from batch_walks self.index_emb_negu, self.index_emb_negv = init_emb2neg_index( self.negative, self.batch_size ) # adam if self.fst: self.fst_state_sum_u = torch.zeros(self.emb_size) if self.snd: self.snd_state_sum_u = torch.zeros(self.emb_size) self.snd_state_sum_v = torch.zeros(self.emb_size) def create_async_update(self): """Set up the async update subprocess.""" self.async_q = Queue(1) self.async_p = mp.Process( target=async_update, args=(self.num_threads, self, self.async_q) ) self.async_p.start() def finish_async_update(self): """Notify the async update subprocess to quit.""" self.async_q.put((None, None, None, None, None)) self.async_p.join() def share_memory(self): """share the parameters across subprocesses""" if self.fst: self.fst_u_embeddings.weight.share_memory_() self.fst_state_sum_u.share_memory_() if self.snd: self.snd_u_embeddings.weight.share_memory_() self.snd_v_embeddings.weight.share_memory_() self.snd_state_sum_u.share_memory_() self.snd_state_sum_v.share_memory_() def set_device(self, gpu_id): """set gpu device""" self.device = torch.device("cuda:%d" % gpu_id) print("The device is", self.device) self.lookup_table = self.lookup_table.to(self.device) if self.record_loss: self.logsigmoid_table = self.logsigmoid_table.to(self.device) self.index_emb_negu = self.index_emb_negu.to(self.device) self.index_emb_negv = self.index_emb_negv.to(self.device) def all_to_device(self, gpu_id): """move all of the parameters to a single GPU""" self.device = torch.device("cuda:%d" % gpu_id) self.set_device(gpu_id) if self.fst: self.fst_u_embeddings = self.fst_u_embeddings.cuda(gpu_id) self.fst_state_sum_u = self.fst_state_sum_u.to(self.device) if self.snd: self.snd_u_embeddings = self.snd_u_embeddings.cuda(gpu_id) self.snd_v_embeddings = self.snd_v_embeddings.cuda(gpu_id) self.snd_state_sum_u = self.snd_state_sum_u.to(self.device) self.snd_state_sum_v = self.snd_state_sum_v.to(self.device) def fast_sigmoid(self, score): """do fast sigmoid by looking up in a pre-defined table""" idx = torch.floor((score + 6.01) / 0.01).long() return self.lookup_table[idx] def fast_logsigmoid(self, score): """do fast logsigmoid by looking up in a pre-defined table""" idx = torch.floor((score + 6.01) / 0.01).long() return self.logsigmoid_table[idx] def fast_pos_bp(self, emb_pos_u, emb_pos_v, first_flag): """get grad for positve samples""" pos_score = torch.sum(torch.mul(emb_pos_u, emb_pos_v), dim=1) pos_score = torch.clamp(pos_score, max=6, min=-6) # [batch_size, 1] score = (1 - self.fast_sigmoid(pos_score)).unsqueeze(1) if self.record_loss: if first_flag: self.loss_fst.append( torch.mean(self.fast_logsigmoid(pos_score)).item() ) else: self.loss_snd.append( torch.mean(self.fast_logsigmoid(pos_score)).item() ) # [batch_size, dim] if self.lap_norm > 0: grad_u_pos = score * emb_pos_v + self.lap_norm * ( emb_pos_v - emb_pos_u ) grad_v_pos = score * emb_pos_u + self.lap_norm * ( emb_pos_u - emb_pos_v ) else: grad_u_pos = score * emb_pos_v grad_v_pos = score * emb_pos_u return grad_u_pos, grad_v_pos def fast_neg_bp(self, emb_neg_u, emb_neg_v, first_flag): """get grad for negative samples""" neg_score = torch.sum(torch.mul(emb_neg_u, emb_neg_v), dim=1) neg_score = torch.clamp(neg_score, max=6, min=-6) # [batch_size * negative, 1] score = -self.fast_sigmoid(neg_score).unsqueeze(1) if self.record_loss: if first_flag: self.loss_fst.append( self.negative * self.neg_weight * torch.mean(self.fast_logsigmoid(-neg_score)).item() ) else: self.loss_snd.append( self.negative * self.neg_weight * torch.mean(self.fast_logsigmoid(-neg_score)).item() ) grad_u_neg = self.neg_weight * score * emb_neg_v grad_v_neg = self.neg_weight * score * emb_neg_u return grad_u_neg, grad_v_neg def fast_learn(self, batch_edges, neg_nodes=None): """Learn a batch of edges in a fast way. It has the following features: 1. It calculating the gradients directly without the forward operation. 2. It does sigmoid by a looking up table. Specifically, for each positive/negative node pair (i,j), the updating procedure is as following: score = self.fast_sigmoid(u_embedding[i].dot(v_embedding[j])) # label = 1 for positive samples; label = 0 for negative samples. u_embedding[i] += (label - score) * v_embedding[j] v_embedding[i] += (label - score) * u_embedding[j] Parameters ---------- batch_edges list : a list of node sequnces neg_nodes torch.LongTensor : a long tensor of sampled true negative nodes. If neg_nodes is None, then do negative sampling randomly from the nodes in batch_walks as an alternative. Usage example ------------- batch_walks = torch.LongTensor([[1,2], [3,4], [5,6]]) neg_nodes = None """ lr = self.lr # [batch_size, 2] nodes = batch_edges if self.only_gpu: nodes = nodes.to(self.device) if neg_nodes is not None: neg_nodes = neg_nodes.to(self.device) bs = len(nodes) if self.fst: emb_u = ( self.fst_u_embeddings(nodes[:, 0]) .view(-1, self.emb_dimension) .to(self.device) ) emb_v = ( self.fst_u_embeddings(nodes[:, 1]) .view(-1, self.emb_dimension) .to(self.device) ) ## Postive emb_pos_u, emb_pos_v = emb_u, emb_v grad_u_pos, grad_v_pos = self.fast_pos_bp( emb_pos_u, emb_pos_v, True ) ## Negative emb_neg_u = emb_pos_u.repeat((self.negative, 1)) if bs < self.batch_size: index_emb_negu, index_emb_negv = init_emb2neg_index( self.negative, bs ) index_emb_negu = index_emb_negu.to(self.device) index_emb_negv = index_emb_negv.to(self.device) else: index_emb_negu = self.index_emb_negu index_emb_negv = self.index_emb_negv if neg_nodes is None: emb_neg_v = torch.index_select(emb_v, 0, index_emb_negv) else: emb_neg_v = self.fst_u_embeddings.weight[neg_nodes].to( self.device ) grad_u_neg, grad_v_neg = self.fast_neg_bp( emb_neg_u, emb_neg_v, True ) ## Update grad_u_pos.index_add_(0, index_emb_negu, grad_u_neg) grad_u = grad_u_pos if neg_nodes is None: grad_v_pos.index_add_(0, index_emb_negv, grad_v_neg) grad_v = grad_v_pos else: grad_v = grad_v_pos # use adam optimizer grad_u = adam( grad_u, self.fst_state_sum_u, nodes[:, 0], lr, self.device, self.only_gpu, ) grad_v = adam( grad_v, self.fst_state_sum_u, nodes[:, 1], lr, self.device, self.only_gpu, ) if neg_nodes is not None: grad_v_neg = adam( grad_v_neg, self.fst_state_sum_u, neg_nodes, lr, self.device, self.only_gpu, ) if self.mixed_train: grad_u = grad_u.cpu() grad_v = grad_v.cpu() if neg_nodes is not None: grad_v_neg = grad_v_neg.cpu() else: grad_v_neg = None if self.async_update: grad_u.share_memory_() grad_v.share_memory_() nodes.share_memory_() if neg_nodes is not None: neg_nodes.share_memory_() grad_v_neg.share_memory_() self.async_q.put( (grad_u, grad_v, grad_v_neg, nodes, neg_nodes, True) ) if not self.async_update: self.fst_u_embeddings.weight.data.index_add_( 0, nodes[:, 0], grad_u ) self.fst_u_embeddings.weight.data.index_add_( 0, nodes[:, 1], grad_v ) if neg_nodes is not None: self.fst_u_embeddings.weight.data.index_add_( 0, neg_nodes, grad_v_neg ) if self.snd: emb_u = ( self.snd_u_embeddings(nodes[:, 0]) .view(-1, self.emb_dimension) .to(self.device) ) emb_v = ( self.snd_v_embeddings(nodes[:, 1]) .view(-1, self.emb_dimension) .to(self.device) ) ## Postive emb_pos_u, emb_pos_v = emb_u, emb_v grad_u_pos, grad_v_pos = self.fast_pos_bp( emb_pos_u, emb_pos_v, False ) ## Negative emb_neg_u = emb_pos_u.repeat((self.negative, 1)) if bs < self.batch_size: index_emb_negu, index_emb_negv = init_emb2neg_index( self.negative, bs ) index_emb_negu = index_emb_negu.to(self.device) index_emb_negv = index_emb_negv.to(self.device) else: index_emb_negu = self.index_emb_negu index_emb_negv = self.index_emb_negv if neg_nodes is None: emb_neg_v = torch.index_select(emb_v, 0, index_emb_negv) else: emb_neg_v = self.snd_v_embeddings.weight[neg_nodes].to( self.device ) grad_u_neg, grad_v_neg = self.fast_neg_bp( emb_neg_u, emb_neg_v, False ) ## Update grad_u_pos.index_add_(0, index_emb_negu, grad_u_neg) grad_u = grad_u_pos if neg_nodes is None: grad_v_pos.index_add_(0, index_emb_negv, grad_v_neg) grad_v = grad_v_pos else: grad_v = grad_v_pos # use adam optimizer grad_u = adam( grad_u, self.snd_state_sum_u, nodes[:, 0], lr, self.device, self.only_gpu, ) grad_v = adam( grad_v, self.snd_state_sum_v, nodes[:, 1], lr, self.device, self.only_gpu, ) if neg_nodes is not None: grad_v_neg = adam( grad_v_neg, self.snd_state_sum_v, neg_nodes, lr, self.device, self.only_gpu, ) if self.mixed_train: grad_u = grad_u.cpu() grad_v = grad_v.cpu() if neg_nodes is not None: grad_v_neg = grad_v_neg.cpu() else: grad_v_neg = None if self.async_update: grad_u.share_memory_() grad_v.share_memory_() nodes.share_memory_() if neg_nodes is not None: neg_nodes.share_memory_() grad_v_neg.share_memory_() self.async_q.put( (grad_u, grad_v, grad_v_neg, nodes, neg_nodes, False) ) if not self.async_update: self.snd_u_embeddings.weight.data.index_add_( 0, nodes[:, 0], grad_u ) self.snd_v_embeddings.weight.data.index_add_( 0, nodes[:, 1], grad_v ) if neg_nodes is not None: self.snd_v_embeddings.weight.data.index_add_( 0, neg_nodes, grad_v_neg ) return def get_embedding(self): if self.fst: embedding_fst = self.fst_u_embeddings.weight.cpu().data.numpy() embedding_fst /= np.sqrt( np.sum(embedding_fst * embedding_fst, 1) ).reshape(-1, 1) if self.snd: embedding_snd = self.snd_u_embeddings.weight.cpu().data.numpy() embedding_snd /= np.sqrt( np.sum(embedding_snd * embedding_snd, 1) ).reshape(-1, 1) if self.fst and self.snd: embedding = np.concatenate((embedding_fst, embedding_snd), 1) embedding /= np.sqrt(np.sum(embedding * embedding, 1)).reshape( -1, 1 ) elif self.fst and not self.snd: embedding = embedding_fst elif self.snd and not self.fst: embedding = embedding_snd else: pass return embedding def save_embedding(self, dataset, file_name): """Write embedding to local file. Only used when node ids are numbers. Parameter --------- dataset DeepwalkDataset : the dataset file_name str : the file name """ embedding = self.get_embedding() np.save(file_name, embedding) def save_embedding_pt(self, dataset, file_name): """For ogb leaderboard.""" embedding = torch.Tensor(self.get_embedding()).cpu() embedding_empty = torch.zeros_like(embedding.data) valid_nodes = torch.LongTensor(dataset.valid_nodes) valid_embedding = embedding.data.index_select(0, valid_nodes) embedding_empty.index_add_(0, valid_nodes, valid_embedding) torch.save(embedding_empty, file_name) ================================================ FILE: examples/pytorch/ogb/line/reading_data.py ================================================ import os import pickle import random import time import dgl import numpy as np import scipy.sparse as sp import torch from dgl.data.utils import ( _get_dgl_url, download, extract_archive, get_download_dir, ) from torch.utils.data import DataLoader def ReadTxtNet(file_path="", undirected=True): """Read the txt network file. Notations: The network is unweighted. Parameters ---------- file_path str : path of network file undirected bool : whether the edges are undirected Return ------ net dict : a dict recording the connections in the graph node2id dict : a dict mapping the nodes to their embedding indices id2node dict : a dict mapping nodes embedding indices to the nodes """ if file_path == "youtube" or file_path == "blog": name = file_path dir = get_download_dir() zip_file_path = "{}/{}.zip".format(dir, name) download( _get_dgl_url( os.path.join("dataset/DeepWalk/", "{}.zip".format(file_path)) ), path=zip_file_path, ) extract_archive(zip_file_path, "{}/{}".format(dir, name)) file_path = "{}/{}/{}-net.txt".format(dir, name, name) node2id = {} id2node = {} cid = 0 src = [] dst = [] weight = [] net = {} with open(file_path, "r") as f: for line in f.readlines(): tup = list(map(int, line.strip().split(" "))) assert len(tup) in [ 2, 3, ], "The format of network file is unrecognizable." if len(tup) == 3: n1, n2, w = tup elif len(tup) == 2: n1, n2 = tup w = 1 if n1 not in node2id: node2id[n1] = cid id2node[cid] = n1 cid += 1 if n2 not in node2id: node2id[n2] = cid id2node[cid] = n2 cid += 1 n1 = node2id[n1] n2 = node2id[n2] if n1 not in net: net[n1] = {n2: w} src.append(n1) dst.append(n2) weight.append(w) elif n2 not in net[n1]: net[n1][n2] = w src.append(n1) dst.append(n2) weight.append(w) if undirected: if n2 not in net: net[n2] = {n1: w} src.append(n2) dst.append(n1) weight.append(w) elif n1 not in net[n2]: net[n2][n1] = w src.append(n2) dst.append(n1) weight.append(w) print("node num: %d" % len(net)) print("edge num: %d" % len(src)) assert max(net.keys()) == len(net) - 1, "error reading net, quit" sm = sp.coo_matrix((np.array(weight), (src, dst)), dtype=np.float32) return net, node2id, id2node, sm def net2graph(net_sm): """Transform the network to DGL graph Return ------ G DGLGraph : graph by DGL """ start = time.time() G = dgl.DGLGraph(net_sm) end = time.time() t = end - start print("Building DGLGraph in %.2fs" % t) return G def make_undirected(G): G.add_edges(G.edges()[1], G.edges()[0]) return G def find_connected_nodes(G): nodes = torch.nonzero(G.out_degrees(), as_tuple=False).squeeze(-1) return nodes class LineDataset: def __init__( self, net_file, batch_size, num_samples, negative=5, gpus=[0], fast_neg=True, ogbl_name="", load_from_ogbl=False, ogbn_name="", load_from_ogbn=False, ): """This class has the following functions: 1. Transform the txt network file into DGL graph; 2. Generate random walk sequences for the trainer; 3. Provide the negative table if the user hopes to sample negative nodes according to nodes' degrees; Parameter --------- net_file str : path of the dgl network file walk_length int : number of nodes in a sequence window_size int : context window size num_walks int : number of walks for each node batch_size int : number of node sequences in each batch negative int : negative samples for each positve node pair fast_neg bool : whether do negative sampling inside a batch """ self.batch_size = batch_size self.negative = negative self.num_samples = num_samples self.num_procs = len(gpus) self.fast_neg = fast_neg if load_from_ogbl: assert ( len(gpus) == 1 ), "ogb.linkproppred is not compatible with multi-gpu training." from load_dataset import load_from_ogbl_with_name self.G = load_from_ogbl_with_name(ogbl_name) elif load_from_ogbn: assert ( len(gpus) == 1 ), "ogb.linkproppred is not compatible with multi-gpu training." from load_dataset import load_from_ogbn_with_name self.G = load_from_ogbn_with_name(ogbn_name) else: self.G = dgl.load_graphs(net_file)[0][0] self.G = make_undirected(self.G) print("Finish reading graph") self.num_nodes = self.G.num_nodes() start = time.time() seeds = np.random.choice( np.arange(self.G.num_edges()), self.num_samples, replace=True ) # edge index self.seeds = torch.split( torch.LongTensor(seeds), int(np.ceil(self.num_samples / self.num_procs)), 0, ) end = time.time() t = end - start print("generate %d samples in %.2fs" % (len(seeds), t)) # negative table for true negative sampling self.valid_nodes = find_connected_nodes(self.G) if not fast_neg: node_degree = self.G.out_degrees(self.valid_nodes).numpy() node_degree = np.power(node_degree, 0.75) node_degree /= np.sum(node_degree) node_degree = np.array(node_degree * 1e8, dtype=int) self.neg_table = [] for idx, node in enumerate(self.valid_nodes): self.neg_table += [node] * node_degree[idx] self.neg_table_size = len(self.neg_table) self.neg_table = np.array(self.neg_table, dtype=int) del node_degree def create_sampler(self, i): """create random walk sampler""" return EdgeSampler(self.G, self.seeds[i]) def save_mapping(self, map_file): with open(map_file, "wb") as f: pickle.dump(self.node2id, f) class EdgeSampler(object): def __init__(self, G, seeds): self.G = G self.seeds = seeds self.edges = torch.cat( (self.G.edges()[0].unsqueeze(0), self.G.edges()[1].unsqueeze(0)), 0 ).t() def sample(self, seeds): """seeds torch.LongTensor : a batch of indices of edges""" return self.edges[torch.LongTensor(seeds)] ================================================ FILE: examples/pytorch/ogb/line/utils.py ================================================ import torch def check_args(args): flag = sum([args.only_1st, args.only_2nd]) assert ( flag <= 1 ), "no more than one selection from --only_1st and --only_2nd" if flag == 0: assert args.dim % 2 == 0, "embedding dimension must be an even number" if args.async_update: assert args.mix, "please use --async_update with --mix" def sum_up_params(model): """Count the model parameters""" n = [] if model.fst: p = model.fst_u_embeddings.weight.cpu().data.numel() n.append(p) p = model.fst_state_sum_u.cpu().data.numel() n.append(p) if model.snd: p = model.snd_u_embeddings.weight.cpu().data.numel() * 2 n.append(p) p = model.snd_state_sum_u.cpu().data.numel() * 2 n.append(p) n.append(model.lookup_table.cpu().numel()) try: n.append(model.index_emb_negu.cpu().numel() * 2) except: pass print("#params " + str(sum(n))) ================================================ FILE: examples/pytorch/ogb/ngnn/README.md ================================================ # NGNN + GraphSage/GCN ## Introduction This is an example of implementing [NGNN](https://arxiv.org/abs/2111.11638) for link prediction in DGL. We use a model-agnostic methodology, namely Network In Graph Neural Network (NGNN), which allows arbitrary GNN models to increase their model capacity. The script in this folder experiments full-batch GCN/GraphSage (with/without NGNN) on the datasets: ogbl-ddi, ogbl-collab and ogbl-ppa. ## Installation requirements ``` ogb>=1.3.3 torch>=1.11.0 dgl>=0.8 ``` ## Experiments We do not fix random seeds at all, and take over 10 runs for all models. All models are trained on a single V100 GPU with 16GB memory. ### ogbl-ddi #### performance
test set validation set #parameters
Hits@20 Hits@50 Hits@100 Hits@20 Hits@50 Hits@100
GCN+NGNN(paper) 48.22% ± 7.00% 82.56% ± 4.03% 89.48% ± 1.68% 65.95% ± 1.16% 70.24% ± 0.50% 72.54% ± 0.62% 1,487,361
GCN+NGNN(ours; 50runs) 54.83% ± 15.81% 93.15% ± 2.59% 97.05% ± 0.56% 71.21% ± 0.38% 73.55% ± 0.25% 76.24% ± 1.33%
GraphSage+NGNN(paper) 60.75% ± 4.94% 84.58% ± 1.89% 92.58% ± 0.88% 68.05% ± 0.68% 71.14% ± 0.33% 72.77% ± 0.09% 1,618,433
GraphSage+NGNN(ours; 50runs) 57.70% ± 15.23% 96.18% ± 0.94% 98.58% ± 0.17% 73.23% ± 0.40% 87.20% ± 5.29% 98.71% ± 0.22%
A 3-layer MLP is used as LinkPredictor here, while a 2-layer one is used by the NGNN paper. This is the main reason for the better performance. #### Reproduction of performance - GCN + NGNN ```{.bash} python main.py --dataset ogbl-ddi --device 0 --ngnn_type input --epochs 800 --dropout 0.5 --num_layers 2 --lr 0.0025 --batch_size 16384 --runs 50 ``` - GraphSage + NGNN ```{.bash} python main.py --dataset ogbl-ddi --device 1 --ngnn_type input --use_sage --epochs 600 --dropout 0.25 --num_layers 2 --lr 0.0012 --batch_size 32768 --runs 50 ``` ### ogbl-collab #### Performance
test set validation set #parameters
Hits@10 Hits@50 Hits@100 Hits@10 Hits@50 Hits@100
GCN+NGNN(paper) 36.69% ± 0.82% 51.83% ± 0.50% 57.41% ± 0.22% 44.97% ± 0.97% 60.84% ± 0.63% 66.09% ± 0.30% 428,033
GCN+NGNN(ours) 39.29% ± 1.21% 53.48% ± 0.40% 58.34% ± 0.45% 48.28% ± 1.39% 62.73% ± 0.40% 67.13% ± 0.39%
GraphSage+NGNN(paper) 36.83% ± 2.56% 52.62% ± 1.04% 57.96% ± 0.56% 45.62% ± 2.56% 61.34% ± 1.05% 66.26% ± 0.44% 591,873
GraphSage+NGNN(ours) 40.30% ± 1.03% 53.59% ± 0.56% 58.75% ± 0.57% 49.85% ± 1.07% 62.81% ± 0.46% 67.33% ± 0.38%
#### Reproduction of performance - GCN + NGNN ```{.bash} python main.py --dataset ogbl-collab --device 2 --ngnn_type hidden --epochs 600 --dropout 0.2 --num_layers 3 --lr 0.001 --batch_size 32768 --runs 10 ``` - GraphSage + NGNN ```{.bash} python main.py --dataset ogbl-collab --device 3 --ngnn_type input --use_sage --epochs 800 --dropout 0.2 --num_layers 3 --lr 0.0005 --batch_size 32768 --runs 10 ``` ### ogbl-ppa #### Performance
test set validation set #parameters
Hits@10 Hits@50 Hits@100 Hits@10 Hits@50 Hits@100
GCN+NGNN(paper) 5.64% ± 0.93% 18.44% ± 1.88% 26.78% ± 0.9% 8.14% ± 0.71% 19.69% ± 0.94% 27.86% ± 0.81% 673,281
GCN+NGNN(ours) 13.07% ± 3.24% 28.55% ± 1.62% 36.83% ± 0.99% 16.36% ± 1.89% 30.56% ± 0.72% 38.34% ± 0.82% 410,113
GraphSage+NGNN(paper) 3.52% ± 1.24% 15.55% ± 1.92% 24.45% ± 2.34% 5.59% ± 0.93% 17.21% ± 0.69% 25.42% ± 0.50% 819,201
GraphSage+NGNN(ours) 11.73% ± 2.42% 29.88% ± 1.84% 40.05% ± 1.38% 14.73% ± 2.36% 31.59% ± 1.72% 40.58% ± 1.23% 556,033
The main difference between this implementation and NGNN paper is the position of NGNN (all -> input). #### Reproduction of performance - GCN + NGNN ```{.bash} python main.py --dataset ogbl-ppa --device 4 --ngnn_type input --epochs 80 --dropout 0.2 --num_layers 3 --lr 0.001 --batch_size 49152 --runs 10 ``` - GraphSage + NGNN ```{.bash} python main.py --dataset ogbl-ppa --device 5 --ngnn_type input --use_sage --epochs 80 --dropout 0.2 --num_layers 3 --lr 0.001 --batch_size 49152 --runs 10 ``` ## References ```{.tex} @article{DBLP:journals/corr/abs-2111-11638, author = {Xiang Song and Runjie Ma and Jiahang Li and Muhan Zhang and David Paul Wipf}, title = {Network In Graph Neural Network}, journal = {CoRR}, volume = {abs/2111.11638}, year = {2021}, url = {https://arxiv.org/abs/2111.11638}, eprinttype = {arXiv}, eprint = {2111.11638}, timestamp = {Fri, 26 Nov 2021 13:48:43 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2111-11638.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } ``` ================================================ FILE: examples/pytorch/ogb/ngnn/main.py ================================================ import argparse import math import dgl import torch import torch.nn.functional as F from dgl.dataloading.negative_sampler import GlobalUniform from dgl.nn.pytorch import GraphConv, SAGEConv from ogb.linkproppred import DglLinkPropPredDataset, Evaluator from torch.nn import Linear from torch.utils.data import DataLoader class Logger(object): def __init__(self, runs, info=None): self.info = info self.results = [[] for _ in range(runs)] def add_result(self, run, result): assert len(result) == 3 assert run >= 0 and run < len(self.results) self.results[run].append(result) def print_statistics(self, run=None): if run is not None: result = 100 * torch.tensor(self.results[run]) argmax = result[:, 1].argmax().item() print(f"Run {run + 1:02d}:") print(f"Highest Train: {result[:, 0].max():.2f}") print(f"Highest Valid: {result[:, 1].max():.2f}") print(f" Final Train: {result[argmax, 0]:.2f}") print(f" Final Test: {result[argmax, 2]:.2f}") else: result = 100 * torch.tensor(self.results) best_results = [] for r in result: train1 = r[:, 0].max().item() valid = r[:, 1].max().item() train2 = r[r[:, 1].argmax(), 0].item() test = r[r[:, 1].argmax(), 2].item() best_results.append((train1, valid, train2, test)) best_result = torch.tensor(best_results) print(f"All runs:") r = best_result[:, 0] print(f"Highest Train: {r.mean():.2f} ± {r.std():.2f}") r = best_result[:, 1] print(f"Highest Valid: {r.mean():.2f} ± {r.std():.2f}") r = best_result[:, 2] print(f" Final Train: {r.mean():.2f} ± {r.std():.2f}") r = best_result[:, 3] print(f" Final Test: {r.mean():.2f} ± {r.std():.2f}") class NGNN_GCNConv(torch.nn.Module): def __init__( self, in_channels, hidden_channels, out_channels, num_nonl_layers ): super(NGNN_GCNConv, self).__init__() self.num_nonl_layers = ( num_nonl_layers # number of nonlinear layers in each conv layer ) self.conv = GraphConv(in_channels, hidden_channels) self.fc = Linear(hidden_channels, hidden_channels) self.fc2 = Linear(hidden_channels, out_channels) self.reset_parameters() def reset_parameters(self): self.conv.reset_parameters() gain = torch.nn.init.calculate_gain("relu") torch.nn.init.xavier_uniform_(self.fc.weight, gain=gain) torch.nn.init.xavier_uniform_(self.fc2.weight, gain=gain) for bias in [self.fc.bias, self.fc2.bias]: stdv = 1.0 / math.sqrt(bias.size(0)) bias.data.uniform_(-stdv, stdv) def forward(self, g, x): x = self.conv(g, x) if self.num_nonl_layers == 2: x = F.relu(x) x = self.fc(x) x = F.relu(x) x = self.fc2(x) return x class GCN(torch.nn.Module): def __init__( self, in_channels, hidden_channels, out_channels, num_layers, dropout, ngnn_type, dataset, ): super(GCN, self).__init__() self.dataset = dataset self.convs = torch.nn.ModuleList() num_nonl_layers = ( 1 if num_layers <= 2 else 2 ) # number of nonlinear layers in each conv layer if ngnn_type == "input": self.convs.append( NGNN_GCNConv( in_channels, hidden_channels, hidden_channels, num_nonl_layers, ) ) for _ in range(num_layers - 2): self.convs.append(GraphConv(hidden_channels, hidden_channels)) elif ngnn_type == "hidden": self.convs.append(GraphConv(in_channels, hidden_channels)) for _ in range(num_layers - 2): self.convs.append( NGNN_GCNConv( hidden_channels, hidden_channels, hidden_channels, num_nonl_layers, ) ) self.convs.append(GraphConv(hidden_channels, out_channels)) self.dropout = dropout self.reset_parameters() def reset_parameters(self): for conv in self.convs: conv.reset_parameters() def forward(self, g, x): for conv in self.convs[:-1]: x = conv(g, x) x = F.relu(x) x = F.dropout(x, p=self.dropout, training=self.training) x = self.convs[-1](g, x) return x class NGNN_SAGEConv(torch.nn.Module): def __init__( self, in_channels, hidden_channels, out_channels, num_nonl_layers, *, reduce, ): super(NGNN_SAGEConv, self).__init__() self.num_nonl_layers = ( num_nonl_layers # number of nonlinear layers in each conv layer ) self.conv = SAGEConv(in_channels, hidden_channels, reduce) self.fc = Linear(hidden_channels, hidden_channels) self.fc2 = Linear(hidden_channels, out_channels) self.reset_parameters() def reset_parameters(self): self.conv.reset_parameters() gain = torch.nn.init.calculate_gain("relu") torch.nn.init.xavier_uniform_(self.fc.weight, gain=gain) torch.nn.init.xavier_uniform_(self.fc2.weight, gain=gain) for bias in [self.fc.bias, self.fc2.bias]: stdv = 1.0 / math.sqrt(bias.size(0)) bias.data.uniform_(-stdv, stdv) def forward(self, g, x): x = self.conv(g, x) if self.num_nonl_layers == 2: x = F.relu(x) x = self.fc(x) x = F.relu(x) x = self.fc2(x) return x class SAGE(torch.nn.Module): def __init__( self, in_channels, hidden_channels, out_channels, num_layers, dropout, ngnn_type, dataset, reduce="mean", ): super(SAGE, self).__init__() self.dataset = dataset self.convs = torch.nn.ModuleList() num_nonl_layers = ( 1 if num_layers <= 2 else 2 ) # number of nonlinear layers in each conv layer if ngnn_type == "input": self.convs.append( NGNN_SAGEConv( in_channels, hidden_channels, hidden_channels, num_nonl_layers, reduce=reduce, ) ) for _ in range(num_layers - 2): self.convs.append( SAGEConv(hidden_channels, hidden_channels, reduce) ) elif ngnn_type == "hidden": self.convs.append(SAGEConv(in_channels, hidden_channels, reduce)) for _ in range(num_layers - 2): self.convs.append( NGNN_SAGEConv( hidden_channels, hidden_channels, hidden_channels, num_nonl_layers, reduce=reduce, ) ) self.convs.append(SAGEConv(hidden_channels, out_channels, reduce)) self.dropout = dropout self.reset_parameters() def reset_parameters(self): for conv in self.convs: conv.reset_parameters() def forward(self, g, x): for conv in self.convs[:-1]: x = conv(g, x) x = F.relu(x) x = F.dropout(x, p=self.dropout, training=self.training) x = self.convs[-1](g, x) return x class LinkPredictor(torch.nn.Module): def __init__( self, in_channels, hidden_channels, out_channels, num_layers, dropout ): super(LinkPredictor, self).__init__() self.lins = torch.nn.ModuleList() self.lins.append(Linear(in_channels, hidden_channels)) for _ in range(num_layers - 2): self.lins.append(Linear(hidden_channels, hidden_channels)) self.lins.append(Linear(hidden_channels, out_channels)) self.dropout = dropout self.reset_parameters() def reset_parameters(self): for lin in self.lins: lin.reset_parameters() def forward(self, x_i, x_j): x = x_i * x_j for lin in self.lins[:-1]: x = lin(x) x = F.relu(x) x = F.dropout(x, p=self.dropout, training=self.training) x = self.lins[-1](x) return torch.sigmoid(x) def train(model, predictor, g, x, split_edge, optimizer, batch_size): model.train() predictor.train() pos_train_edge = split_edge["train"]["edge"].to(x.device) neg_sampler = GlobalUniform(1) total_loss = total_examples = 0 for perm in DataLoader( range(pos_train_edge.size(0)), batch_size, shuffle=True ): optimizer.zero_grad() h = model(g, x) edge = pos_train_edge[perm].t() pos_out = predictor(h[edge[0]], h[edge[1]]) pos_loss = -torch.log(pos_out + 1e-15).mean() edge = neg_sampler(g, edge[0]) neg_out = predictor(h[edge[0]], h[edge[1]]) neg_loss = -torch.log(1 - neg_out + 1e-15).mean() loss = pos_loss + neg_loss loss.backward() if model.dataset == "ogbl-ddi": torch.nn.utils.clip_grad_norm_(x, 1.0) torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) torch.nn.utils.clip_grad_norm_(predictor.parameters(), 1.0) optimizer.step() num_examples = pos_out.size(0) total_loss += loss.item() * num_examples total_examples += num_examples return total_loss / total_examples @torch.no_grad() def test(model, predictor, g, x, split_edge, evaluator, batch_size): model.eval() predictor.eval() h = model(g, x) pos_train_edge = split_edge["eval_train"]["edge"].to(h.device) pos_valid_edge = split_edge["valid"]["edge"].to(h.device) neg_valid_edge = split_edge["valid"]["edge_neg"].to(h.device) pos_test_edge = split_edge["test"]["edge"].to(h.device) neg_test_edge = split_edge["test"]["edge_neg"].to(h.device) def get_pred(test_edges, h): preds = [] for perm in DataLoader(range(test_edges.size(0)), batch_size): edge = test_edges[perm].t() preds += [predictor(h[edge[0]], h[edge[1]]).squeeze().cpu()] pred = torch.cat(preds, dim=0) return pred pos_train_pred = get_pred(pos_train_edge, h) pos_valid_pred = get_pred(pos_valid_edge, h) neg_valid_pred = get_pred(neg_valid_edge, h) pos_test_pred = get_pred(pos_test_edge, h) neg_test_pred = get_pred(neg_test_edge, h) results = {} for K in [20, 50, 100]: evaluator.K = K train_hits = evaluator.eval( { "y_pred_pos": pos_train_pred, "y_pred_neg": neg_valid_pred, } )[f"hits@{K}"] valid_hits = evaluator.eval( { "y_pred_pos": pos_valid_pred, "y_pred_neg": neg_valid_pred, } )[f"hits@{K}"] test_hits = evaluator.eval( { "y_pred_pos": pos_test_pred, "y_pred_neg": neg_test_pred, } )[f"hits@{K}"] results[f"Hits@{K}"] = (train_hits, valid_hits, test_hits) return results def main(): parser = argparse.ArgumentParser( description="OGBL(Full Batch GCN/GraphSage + NGNN)" ) # dataset setting parser.add_argument( "--dataset", type=str, default="ogbl-ddi", choices=["ogbl-ddi", "ogbl-collab", "ogbl-ppa"], ) # device setting parser.add_argument( "--device", type=int, default=0, help="GPU device ID. Use -1 for CPU training.", ) # model structure settings parser.add_argument( "--use_sage", action="store_true", help="If not set, use GCN by default.", ) parser.add_argument( "--ngnn_type", type=str, default="input", choices=["input", "hidden"], help="You can set this value from 'input' or 'hidden' to apply NGNN to different GNN layers.", ) parser.add_argument( "--num_layers", type=int, default=3, help="number of GNN layers" ) parser.add_argument("--hidden_channels", type=int, default=256) parser.add_argument("--dropout", type=float, default=0.0) parser.add_argument("--batch_size", type=int, default=64 * 1024) parser.add_argument("--lr", type=float, default=0.001) parser.add_argument("--epochs", type=int, default=400) # training settings parser.add_argument("--eval_steps", type=int, default=1) parser.add_argument("--runs", type=int, default=10) args = parser.parse_args() print(args) device = ( f"cuda:{args.device}" if args.device != -1 and torch.cuda.is_available() else "cpu" ) device = torch.device(device) dataset = DglLinkPropPredDataset(name=args.dataset) g = dataset[0] split_edge = dataset.get_edge_split() # We randomly pick some training samples that we want to evaluate on: idx = torch.randperm(split_edge["train"]["edge"].size(0)) idx = idx[: split_edge["valid"]["edge"].size(0)] split_edge["eval_train"] = {"edge": split_edge["train"]["edge"][idx]} if dataset.name == "ogbl-ppa": g.ndata["feat"] = g.ndata["feat"].to(torch.float) if dataset.name == "ogbl-ddi": emb = torch.nn.Embedding(g.num_nodes(), args.hidden_channels).to(device) in_channels = args.hidden_channels else: # ogbl-collab, ogbl-ppa in_channels = g.ndata["feat"].size(-1) # select model if args.use_sage: model = SAGE( in_channels, args.hidden_channels, args.hidden_channels, args.num_layers, args.dropout, args.ngnn_type, dataset.name, ) else: # GCN g = dgl.add_self_loop(g) model = GCN( in_channels, args.hidden_channels, args.hidden_channels, args.num_layers, args.dropout, args.ngnn_type, dataset.name, ) predictor = LinkPredictor( args.hidden_channels, args.hidden_channels, 1, 3, args.dropout ) g, model, predictor = map(lambda x: x.to(device), (g, model, predictor)) evaluator = Evaluator(name=dataset.name) loggers = { "Hits@20": Logger(args.runs, args), "Hits@50": Logger(args.runs, args), "Hits@100": Logger(args.runs, args), } for run in range(args.runs): model.reset_parameters() predictor.reset_parameters() if dataset.name == "ogbl-ddi": torch.nn.init.xavier_uniform_(emb.weight) g.ndata["feat"] = emb.weight optimizer = torch.optim.Adam( list(model.parameters()) + list(predictor.parameters()) + (list(emb.parameters()) if dataset.name == "ogbl-ddi" else []), lr=args.lr, ) for epoch in range(1, 1 + args.epochs): loss = train( model, predictor, g, g.ndata["feat"], split_edge, optimizer, args.batch_size, ) if epoch % args.eval_steps == 0: results = test( model, predictor, g, g.ndata["feat"], split_edge, evaluator, args.batch_size, ) for key, result in results.items(): loggers[key].add_result(run, result) train_hits, valid_hits, test_hits = result print(key) print( f"Run: {run + 1:02d}, " f"Epoch: {epoch:02d}, " f"Loss: {loss:.4f}, " f"Train: {100 * train_hits:.2f}%, " f"Valid: {100 * valid_hits:.2f}%, " f"Test: {100 * test_hits:.2f}%" ) print("---") for key in loggers.keys(): print(key) loggers[key].print_statistics(run) for key in loggers.keys(): print(key) loggers[key].print_statistics() if __name__ == "__main__": main() ================================================ FILE: examples/pytorch/ogb/ngnn_seal/README.md ================================================ # NGNN + SEAL ## Introduction This is a submission of implementing [NGNN](https://arxiv.org/abs/2111.11638) + [SEAL](https://arxiv.org/pdf/2010.16103.pdf) to OGB link prediction leaderboards. Some code is migrated from [https://github.com/facebookresearch/SEAL_OGB](https://github.com/facebookresearch/SEAL_OGB). ## Installation Requirements ``` ogb>=1.3.4 torch>=1.12.0 dgl>=0.8 scipy, numpy, tqdm... ``` ## Experiments We do not fix random seeds at all, and take over 10 runs for all models. All models are trained on a single T4 GPU with 16GB memory and 96 vCPUs. ### ogbl-ppa #### performance | | Test Hits@100 | Validation Hits@100 | #Parameters | |:------------:|:-------------------:|:-----------------:|:------------:| | SEAL | 48.80% ± 3.16% | 51.25% ± 2.52% | 709,122 | | SEAL + NGNN | 59.71% ± 2.45% | 59.95% ± 2.05% | 735,426 | #### Reproduction of performance ```{.bash} python main.py --dataset ogbl-ppa --ngnn_type input --hidden_channels 48 --epochs 50 --lr 0.00015 --batch_size 128 --num_workers 48 --train_percent 5 --val_percent 8 --eval_hits_K 10 --use_feature --dynamic_train --dynamic_val --dynamic_test --runs 10 ``` As training is very costly, we select the best model by evaluation on a subset of the validation edges and using a lower K for Hits@K. Then we do experiments on the full validation and test sets with the best model selected, and get the required metrics. ### ogbl-citation2 #### performance | | Test MRR | Validation MRR | #Parameters | |:------------:|:-------------------:|:-----------------:|:------------:| | SEAL | 0.8767 ± 0.0032 | 0.8757 ± 0.0031 | 260,802 | | SEAL + NGNN | 0.8891 ± 0.0022 | 0.8879 ± 0.0022 | 1,134,402 | #### Reproduction of performance ```{.bash} python main.py --dataset ogbl-citation2 --ngnn_type all --hidden_channels 256 --epochs 15 --lr 2e-05 --batch_size 64 --num_workers 24 --train_percent 8 --val_percent 4 --num_ngnn_layers 2 --use_feature --use_edge_weight --dynamic_train --dynamic_val --dynamic_test --runs 10 ``` For all datasets, if you specify `--dynamic_train`, the enclosing subgraphs of the training links will be extracted on the fly instead of preprocessing and saving to disk. Similarly for `--dynamic_val` and `--dynamic_test`. You can increase `--num_workers` to accelerate the dynamic subgraph extraction process. You can also specify the `val_percent` and `eval_hits_K` arguments in the above command to adjust the proportion of the validation dataset to use and the K to use for Hits@K. ## Reference @article{DBLP:journals/corr/abs-2111-11638, author = {Xiang Song and Runjie Ma and Jiahang Li and Muhan Zhang and David Paul Wipf}, title = {Network In Graph Neural Network}, journal = {CoRR}, volume = {abs/2111.11638}, year = {2021}, url = {https://arxiv.org/abs/2111.11638}, eprinttype = {arXiv}, eprint = {2111.11638}, timestamp = {Fri, 26 Nov 2021 13:48:43 +0100}, biburl = {https://dblp.org/rec/journals/corr/abs-2111-11638.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } @article{zhang2021labeling, title={Labeling Trick: A Theory of Using Graph Neural Networks for Multi-Node Representation Learning}, author={Zhang, Muhan and Li, Pan and Xia, Yinglong and Wang, Kai and Jin, Long}, journal={Advances in Neural Information Processing Systems}, volume={34}, year={2021} } @inproceedings{zhang2018link, title={Link prediction based on graph neural networks}, author={Zhang, Muhan and Chen, Yixin}, booktitle={Advances in Neural Information Processing Systems}, pages={5165--5175}, year={2018} } ================================================ FILE: examples/pytorch/ogb/ngnn_seal/main.py ================================================ import argparse import datetime import os import sys import time import dgl import torch from dgl.data.utils import load_graphs, save_graphs from dgl.dataloading import GraphDataLoader from ogb.linkproppred import DglLinkPropPredDataset, Evaluator from torch.nn import BCEWithLogitsLoss from torch.utils.data import Dataset from tqdm import tqdm from models import * from utils import * class SEALOGBLDataset(Dataset): def __init__( self, root, graph, split_edge, percent=100, split="train", ratio_per_hop=1.0, directed=False, dynamic=True, ) -> None: super().__init__() self.root = root self.graph = graph self.split = split self.split_edge = split_edge self.percent = percent self.ratio_per_hop = ratio_per_hop self.directed = directed self.dynamic = dynamic if "weights" in self.graph.edata: self.edge_weights = self.graph.edata["weights"] else: self.edge_weights = None if "feat" in self.graph.ndata: self.node_features = self.graph.ndata["feat"] else: self.node_features = None pos_edge, neg_edge = get_pos_neg_edges( self.split, self.split_edge, self.graph, self.percent ) self.links = torch.cat([pos_edge, neg_edge], 0) # [Np + Nn, 2] self.labels = np.array([1] * len(pos_edge) + [0] * len(neg_edge)) if not self.dynamic: self.g_list, tensor_dict = self.load_cached() self.labels = tensor_dict["y"] def __len__(self): return len(self.labels) def __getitem__(self, idx): if not self.dynamic: g, y = self.g_list[idx], self.labels[idx] x = None if "x" not in g.ndata else g.ndata["x"] w = None if "w" not in g.edata else g.eata["w"] return g, g.ndata["z"], x, w, y src, dst = self.links[idx][0].item(), self.links[idx][1].item() y = self.labels[idx] subg = k_hop_subgraph( src, dst, 1, self.graph, self.ratio_per_hop, self.directed ) # Remove the link between src and dst. direct_links = [[], []] for s, t in [(0, 1), (1, 0)]: if subg.has_edges_between(s, t): direct_links[0].append(s) direct_links[1].append(t) if len(direct_links[0]): subg.remove_edges(subg.edge_ids(*direct_links)) NIDs, EIDs = subg.ndata[dgl.NID], subg.edata[dgl.EID] z = drnl_node_labeling(subg.adj_external(scipy_fmt="csr"), 0, 1) edge_weights = ( self.edge_weights[EIDs] if self.edge_weights is not None else None ) x = self.node_features[NIDs] if self.node_features is not None else None subg_aug = subg.add_self_loop() if edge_weights is not None: edge_weights = torch.cat( [ edge_weights, torch.ones(subg_aug.num_edges() - subg.num_edges()), ] ) return subg_aug, z, x, edge_weights, y @property def cached_name(self): return f"SEAL_{self.split}_{self.percent}%.pt" def process(self): g_list, labels = [], [] self.dynamic = True for i in tqdm(range(len(self))): g, z, x, weights, y = self[i] g.ndata["z"] = z if x is not None: g.ndata["x"] = x if weights is not None: g.edata["w"] = weights g_list.append(g) labels.append(y) self.dynamic = False return g_list, {"y": torch.tensor(labels)} def load_cached(self): path = os.path.join(self.root, self.cached_name) if os.path.exists(path): return load_graphs(path) if not os.path.exists(self.root): os.makedirs(self.root) g_list, labels = self.process() save_graphs(path, g_list, labels) return g_list, labels def ogbl_collate_fn(batch): gs, zs, xs, ws, ys = zip(*batch) batched_g = dgl.batch(gs) z = torch.cat(zs, dim=0) if xs[0] is not None: x = torch.cat(xs, dim=0) else: x = None if ws[0] is not None: edge_weights = torch.cat(ws, dim=0) else: edge_weights = None y = torch.tensor(ys) return batched_g, z, x, edge_weights, y def train(): model.train() loss_fnt = BCEWithLogitsLoss() total_loss = 0 pbar = tqdm(train_loader, ncols=70) for batch in pbar: g, z, x, edge_weights, y = [ item.to(device) if item is not None else None for item in batch ] optimizer.zero_grad() logits = model(g, z, x, edge_weight=edge_weights) loss = loss_fnt(logits.view(-1), y.to(torch.float)) loss.backward() optimizer.step() total_loss += loss.item() * g.batch_size return total_loss / len(train_dataset) @torch.no_grad() def test(dataloader, hits_K=["hits@100"]): model.eval() if isinstance(hits_K, (int, str)): hits_K = [hits_K] y_pred, y_true = [], [] for batch in tqdm(dataloader, ncols=70): g, z, x, edge_weights, y = [ item.to(device) if item is not None else None for item in batch ] logits = model(g, z, x, edge_weight=edge_weights) y_pred.append(logits.view(-1).cpu()) y_true.append(y.view(-1).cpu().to(torch.float)) y_pred, y_true = torch.cat(y_pred), torch.cat(y_true) pos_y_pred = y_pred[y_true == 1] neg_y_pred = y_pred[y_true == 0] if dataset.eval_metric.startswith("hits@"): results = evaluate_hits(pos_y_pred, neg_y_pred, hits_K) elif dataset.eval_metric == "mrr": results = evaluate_mrr(pos_y_pred, neg_y_pred) elif dataset.eval_metric == "rocauc": results = evaluate_rocauc(pos_y_pred, neg_y_pred) return results def evaluate_hits(y_pred_pos, y_pred_neg, hits_K): results = {} hits_K = map( lambda x: (int(x.split("@")[1]) if isinstance(x, str) else x), hits_K ) for K in hits_K: evaluator.K = K hits = evaluator.eval( { "y_pred_pos": y_pred_pos, "y_pred_neg": y_pred_neg, } )[f"hits@{K}"] results[f"hits@{K}"] = hits return results def evaluate_mrr(y_pred_pos, y_pred_neg): y_pred_neg = y_pred_neg.view(y_pred_pos.shape[0], -1) results = {} mrr = ( evaluator.eval( { "y_pred_pos": y_pred_pos, "y_pred_neg": y_pred_neg, } )["mrr_list"] .mean() .item() ) results["mrr"] = mrr return results def evaluate_rocauc(y_pred_pos, y_pred_neg): results = {} rocauc = evaluator.eval( { "y_pred_pos": y_pred_pos, "y_pred_neg": y_pred_neg, } )["rocauc"] results["rocauc"] = rocauc return results def print_log(*x, sep="\n", end="\n", mode="a"): print(*x, sep=sep, end=end) with open(log_file, mode=mode) as f: print(*x, sep=sep, end=end, file=f) if __name__ == "__main__": # Data settings parser = argparse.ArgumentParser(description="OGBL (SEAL)") parser.add_argument("--dataset", type=str, default="ogbl-vessel") # GNN settings parser.add_argument( "--max_z", type=int, default=1000, help="max number of labels as embeddings to look up", ) parser.add_argument("--sortpool_k", type=float, default=0.6) parser.add_argument("--num_layers", type=int, default=3) parser.add_argument("--hidden_channels", type=int, default=32) parser.add_argument("--batch_size", type=int, default=32) parser.add_argument( "--ngnn_type", type=str, default="none", choices=["none", "input", "hidden", "output", "all"], help="You can set this value from 'none', 'input', 'hidden' or 'all' " "to apply NGNN to different GNN layers.", ) parser.add_argument( "--num_ngnn_layers", type=int, default=1, choices=[1, 2] ) # Subgraph extraction settings parser.add_argument("--ratio_per_hop", type=float, default=1.0) parser.add_argument( "--use_feature", action="store_true", help="whether to use raw node features as GNN input", ) parser.add_argument( "--use_edge_weight", action="store_true", help="whether to consider edge weight in GNN", ) # Training settings parser.add_argument( "--device", type=int, default=0, help="GPU device ID. Use -1 for CPU training.", ) parser.add_argument("--lr", type=float, default=0.001) parser.add_argument("--epochs", type=int, default=5) parser.add_argument("--dropout", type=float, default=0.0) parser.add_argument("--runs", type=int, default=10) parser.add_argument("--train_percent", type=float, default=1) parser.add_argument("--val_percent", type=float, default=1) parser.add_argument("--final_val_percent", type=float, default=100) parser.add_argument("--test_percent", type=float, default=100) parser.add_argument("--no_test", action="store_true") parser.add_argument( "--dynamic_train", action="store_true", help="dynamically extract enclosing subgraphs on the fly", ) parser.add_argument("--dynamic_val", action="store_true") parser.add_argument("--dynamic_test", action="store_true") parser.add_argument( "--num_workers", type=int, default=24, help="number of workers for dynamic dataloaders; " "using a larger value for dynamic dataloading is recommended", ) # Testing settings parser.add_argument( "--use_valedges_as_input", action="store_true", help="available for ogbl-collab", ) parser.add_argument("--eval_steps", type=int, default=1) parser.add_argument( "--eval_hits_K", type=int, nargs="*", default=[10], help="hits@K for each eval step; " "only available for datasets with hits@xx as the eval metric", ) parser.add_argument( "--test_topk", type=int, default=1, help="select best k models for full validation/test each run.", ) args = parser.parse_args() data_appendix = "_rph{}".format("".join(str(args.ratio_per_hop).split("."))) if args.use_valedges_as_input: data_appendix += "_uvai" args.res_dir = os.path.join( f'results{"_NoTest" if args.no_test else ""}', f'{args.dataset.split("-")[1]}-{args.ngnn_type}+{time.strftime("%m%d%H%M%S")}', ) print(f"Results will be saved in {args.res_dir}") if not os.path.exists(args.res_dir): os.makedirs(args.res_dir) log_file = os.path.join(args.res_dir, "log.txt") # Save command line input. cmd_input = "python " + " ".join(sys.argv) + "\n" with open(os.path.join(args.res_dir, "cmd_input.txt"), "a") as f: f.write(cmd_input) print(f"Command line input is saved.") print_log(f"{cmd_input}") dataset = DglLinkPropPredDataset(name=args.dataset) split_edge = dataset.get_edge_split() graph = dataset[0] # Re-format the data of ogbl-citation2. if args.dataset == "ogbl-citation2": for k in ["train", "valid", "test"]: src = split_edge[k]["source_node"] tgt = split_edge[k]["target_node"] split_edge[k]["edge"] = torch.stack([src, tgt], dim=1) if k != "train": tgt_neg = split_edge[k]["target_node_neg"] split_edge[k]["edge_neg"] = torch.stack( [src[:, None].repeat(1, tgt_neg.size(1)), tgt_neg], dim=-1 ) # [Ns, Nt, 2] # Reconstruct the graph for ogbl-collab data # for validation edge augmentation and coalesce. if args.dataset == "ogbl-collab": # Float edata for to_simple transformation. graph.edata.pop("year") graph.edata["weight"] = graph.edata["weight"].to(torch.float) if args.use_valedges_as_input: val_edges = split_edge["valid"]["edge"] row, col = val_edges.t() val_weights = torch.ones(size=(val_edges.size(0), 1)) graph.add_edges( torch.cat([row, col]), torch.cat([col, row]), {"weight": val_weights}, ) graph = graph.to_simple(copy_edata=True, aggregator="sum") if args.dataset == "ogbl-vessel": graph.ndata["feat"][:, 0] = torch.nn.functional.normalize( graph.ndata["feat"][:, 0], dim=0 ) graph.ndata["feat"][:, 1] = torch.nn.functional.normalize( graph.ndata["feat"][:, 1], dim=0 ) graph.ndata["feat"][:, 2] = torch.nn.functional.normalize( graph.ndata["feat"][:, 2], dim=0 ) graph.ndata["feat"] = graph.ndata["feat"].to(torch.float) if not args.use_edge_weight and "weight" in graph.edata: del graph.edata["weight"] if not args.use_feature and "feat" in graph.ndata: del graph.ndata["feat"] directed = args.dataset.startswith("ogbl-citation") evaluator = Evaluator(name=args.dataset) if dataset.eval_metric.startswith("hits@"): loggers = { f"hits@{k}": Logger(args.runs, args) for k in args.eval_hits_K } elif dataset.eval_metric == "mrr": loggers = { "mrr": Logger(args.runs, args), } elif dataset.eval_metric == "rocauc": loggers = { "rocauc": Logger(args.runs, args), } device = ( f"cuda:{args.device}" if args.device != -1 and torch.cuda.is_available() else "cpu" ) device = torch.device(device) path = f"{dataset.root}_seal{data_appendix}" if not (args.dynamic_train or args.dynamic_val or args.dynamic_test): args.num_workers = 0 train_dataset, val_dataset, final_val_dataset, test_dataset = [ SEALOGBLDataset( path, graph, split_edge, percent=percent, split=split, ratio_per_hop=args.ratio_per_hop, directed=directed, dynamic=dynamic, ) for percent, split, dynamic in zip( [ args.train_percent, args.val_percent, args.final_val_percent, args.test_percent, ], ["train", "valid", "valid", "test"], [ args.dynamic_train, args.dynamic_val, args.dynamic_test, args.dynamic_test, ], ) ] train_loader = GraphDataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, collate_fn=ogbl_collate_fn, num_workers=args.num_workers, ) val_loader = GraphDataLoader( val_dataset, batch_size=args.batch_size, shuffle=False, collate_fn=ogbl_collate_fn, num_workers=args.num_workers, ) final_val_loader = GraphDataLoader( final_val_dataset, batch_size=args.batch_size, shuffle=False, collate_fn=ogbl_collate_fn, num_workers=args.num_workers, ) test_loader = GraphDataLoader( test_dataset, batch_size=args.batch_size, shuffle=False, collate_fn=ogbl_collate_fn, num_workers=args.num_workers, ) if 0 < args.sortpool_k <= 1: # Transform percentile to number. if args.dataset.startswith("ogbl-citation"): # For this dataset, subgraphs extracted around positive edges are # rather larger than negative edges. Thus we sample from 1000 # positive and 1000 negative edges to estimate the k (number of # nodes to hold for each graph) used in SortPooling. # You can certainly set k manually, instead of estimating from # a percentage of sampled subgraphs. _sampled_indices = list(range(1000)) + list( range(len(train_dataset) - 1000, len(train_dataset)) ) else: _sampled_indices = list(range(1000)) _num_nodes = sorted( [train_dataset[i][0].num_nodes() for i in _sampled_indices] ) _k = _num_nodes[int(math.ceil(args.sortpool_k * len(_num_nodes))) - 1] model_k = max(10, _k) else: raise argparse.ArgumentTypeError("sortpool_k must be in range (0, 1].") print_log(f"training starts: {datetime.datetime.now()}") for run in range(args.runs): stime = datetime.datetime.now() print_log(f"\n++++++\n\nstart run [{run+1}], {stime}") model = DGCNN( args.hidden_channels, args.num_layers, args.max_z, model_k, feature_dim=graph.ndata["feat"].size(1) if (args.use_feature and "feat" in graph.ndata) else 0, dropout=args.dropout, ngnn_type=args.ngnn_type, num_ngnn_layers=args.num_ngnn_layers, ).to(device) parameters = list(model.parameters()) optimizer = torch.optim.Adam(params=parameters, lr=args.lr) total_params = sum(p.numel() for param in parameters for p in param) print_log( f"Total number of parameters is {total_params}", f"SortPooling k is set to {model.k}", ) start_epoch = 1 # Training starts. for epoch in range(start_epoch, start_epoch + args.epochs): epo_stime = datetime.datetime.now() loss = train() epo_train_etime = datetime.datetime.now() print_log( f"[epoch: {epoch}]", f" starts: {epo_stime}, " f"ends: {epo_train_etime}, " f"spent time:{epo_train_etime - epo_stime}", ) if epoch % args.eval_steps == 0: epo_eval_stime = datetime.datetime.now() results = test(val_loader, loggers.keys()) epo_eval_etime = datetime.datetime.now() print_log( f" starts: {epo_eval_stime}, " f"ends: {epo_eval_etime}, " f"spent time:{epo_eval_etime - epo_eval_stime}" ) for key, valid_res in results.items(): loggers[key].add_result(run, valid_res) to_print = ( f"Run: {run + 1:02d}, " f"Epoch: {epoch:02d}, " f"Loss: {loss:.4f}, " f"Valid ({args.val_percent}%) [{key}]: {valid_res:.4f}" ) print_log(key, to_print) model_name = os.path.join( args.res_dir, f"run{run+1}_model_checkpoint{epoch}.pth" ) optimizer_name = os.path.join( args.res_dir, f"run{run+1}_optimizer_checkpoint{epoch}.pth" ) torch.save(model.state_dict(), model_name) torch.save(optimizer.state_dict(), optimizer_name) print_log() tested = dict() for eval_metric in loggers.keys(): # Select models according to the eval_metric of the dataset. res = torch.tensor(loggers[eval_metric].results["valid"][run]) if args.no_test: epoch = torch.argmax(res).item() + 1 val_res = loggers[eval_metric].results["valid"][run][epoch - 1] loggers[eval_metric].add_result(run, (epoch, val_res), "test") print_log( f"No Test; Best Valid:", f" Run: {run + 1:02d}, " f"Epoch: {epoch:02d}, " f"Valid ({args.val_percent}%) [{eval_metric}]: {val_res:.4f}", ) continue idx_to_test = ( torch.topk(res, args.test_topk, largest=True).indices + 1 ).tolist() # indices of top k valid results print_log( f"Eval Metric: {eval_metric}", f"Run: {run + 1:02d}, " f"Top {args.test_topk} Eval Points: {idx_to_test}", ) for _idx, epoch in enumerate(idx_to_test): print_log( f"Test Point[{_idx+1}]: " f"Epoch {epoch:02d}, " f"Test Metric: {dataset.eval_metric}" ) if epoch not in tested: model_name = os.path.join( args.res_dir, f"run{run+1}_model_checkpoint{epoch}.pth" ) optimizer_name = os.path.join( args.res_dir, f"run{run+1}_optimizer_checkpoint{epoch}.pth", ) model.load_state_dict( torch.load(model_name, weights_only=False) ) optimizer.load_state_dict( torch.load(optimizer_name, weights_only=False) ) tested[epoch] = ( test(final_val_loader, dataset.eval_metric)[ dataset.eval_metric ], test(test_loader, dataset.eval_metric)[ dataset.eval_metric ], ) val_res, test_res = tested[epoch] loggers[eval_metric].add_result( run, (epoch, val_res, test_res), "test" ) print_log( f" Run: {run + 1:02d}, " f"Epoch: {epoch:02d}, " f"Valid ({args.val_percent}%) [{eval_metric}]: " f"{loggers[eval_metric].results['valid'][run][epoch-1]:.4f}, " f"Valid (final) [{dataset.eval_metric}]: {val_res:.4f}, " f"Test [{dataset.eval_metric}]: {test_res:.4f}" ) etime = datetime.datetime.now() print_log( f"end run [{run}], {etime}", f"spent time:{etime-stime}", ) for key in loggers.keys(): print(f"\n{key}") loggers[key].print_statistics() with open(log_file, "a") as f: print(f"\n{key}", file=f) loggers[key].print_statistics(f=f) print(f"Total number of parameters is {total_params}") print(f"Results are saved in {args.res_dir}") ================================================ FILE: examples/pytorch/ogb/ngnn_seal/models.py ================================================ import math import torch import torch.nn.functional as F from dgl.nn import GraphConv, SortPooling from torch.nn import Conv1d, Embedding, Linear, MaxPool1d, ModuleList class NGNN_GCNConv(torch.nn.Module): def __init__( self, input_channels, hidden_channels, output_channels, num_layers ): super(NGNN_GCNConv, self).__init__() self.conv = GraphConv(input_channels, hidden_channels) self.fc = Linear(hidden_channels, hidden_channels) self.fc2 = Linear(hidden_channels, output_channels) self.num_layers = num_layers def reset_parameters(self): self.conv.reset_parameters() gain = torch.nn.init.calculate_gain("relu") torch.nn.init.xavier_uniform_(self.fc.weight, gain=gain) torch.nn.init.xavier_uniform_(self.fc2.weight, gain=gain) for bias in [self.fc.bias, self.fc2.bias]: stdv = 1.0 / math.sqrt(bias.size(0)) bias.data.uniform_(-stdv, stdv) def forward(self, g, x, edge_weight=None): x = self.conv(g, x, edge_weight) if self.num_layers == 2: x = F.relu(x) x = self.fc(x) x = F.relu(x) x = self.fc2(x) return x # An end-to-end deep learning architecture for graph classification, AAAI-18. class DGCNN(torch.nn.Module): def __init__( self, hidden_channels, num_layers, max_z, k, feature_dim=0, GNN=GraphConv, NGNN=NGNN_GCNConv, dropout=0.0, ngnn_type="all", num_ngnn_layers=1, ): super(DGCNN, self).__init__() self.feature_dim = feature_dim self.dropout = dropout self.k = k self.sort_pool = SortPooling(k=self.k) self.max_z = max_z self.z_embedding = Embedding(self.max_z, hidden_channels) self.convs = ModuleList() initial_channels = hidden_channels + self.feature_dim self.num_ngnn_layers = num_ngnn_layers if ngnn_type in ["input", "all"]: self.convs.append( NGNN( initial_channels, hidden_channels, hidden_channels, self.num_ngnn_layers, ) ) else: self.convs.append(GNN(initial_channels, hidden_channels)) if ngnn_type in ["hidden", "all"]: for _ in range(0, num_layers - 1): self.convs.append( NGNN( hidden_channels, hidden_channels, hidden_channels, self.num_ngnn_layers, ) ) else: for _ in range(0, num_layers - 1): self.convs.append(GNN(hidden_channels, hidden_channels)) if ngnn_type in ["output", "all"]: self.convs.append( NGNN(hidden_channels, hidden_channels, 1, self.num_ngnn_layers) ) else: self.convs.append(GNN(hidden_channels, 1)) conv1d_channels = [16, 32] total_latent_dim = hidden_channels * num_layers + 1 conv1d_kws = [total_latent_dim, 5] self.conv1 = Conv1d(1, conv1d_channels[0], conv1d_kws[0], conv1d_kws[0]) self.maxpool1d = MaxPool1d(2, 2) self.conv2 = Conv1d( conv1d_channels[0], conv1d_channels[1], conv1d_kws[1], 1 ) dense_dim = int((self.k - 2) / 2 + 1) dense_dim = (dense_dim - conv1d_kws[1] + 1) * conv1d_channels[1] self.lin1 = Linear(dense_dim, 128) self.lin2 = Linear(128, 1) def forward(self, g, z, x=None, edge_weight=None): z_emb = self.z_embedding(z) if z_emb.ndim == 3: # in case z has multiple integer labels z_emb = z_emb.sum(dim=1) if x is not None: x = torch.cat([z_emb, x.to(torch.float)], 1) else: x = z_emb xs = [x] for conv in self.convs: xs += [ F.dropout( torch.tanh(conv(g, xs[-1], edge_weight=edge_weight)), p=self.dropout, training=self.training, ) ] x = torch.cat(xs[1:], dim=-1) # global pooling x = self.sort_pool(g, x) x = x.unsqueeze(1) # [num_graphs, 1, k * hidden] x = F.relu(self.conv1(x)) x = self.maxpool1d(x) x = F.relu(self.conv2(x)) x = x.view(x.size(0), -1) # [num_graphs, dense_dim] # MLP. x = F.relu(self.lin1(x)) x = F.dropout(x, p=0.5, training=self.training) x = self.lin2(x) return x ================================================ FILE: examples/pytorch/ogb/ngnn_seal/utils.py ================================================ import random import sys import numpy as np import torch from dgl.sampling import global_uniform_negative_sampling from scipy.sparse.csgraph import shortest_path def k_hop_subgraph(src, dst, num_hops, g, sample_ratio=1.0, directed=False): # Extract the k-hop enclosing subgraph around link (src, dst) from g nodes = [src, dst] visited = set([src, dst]) fringe = set([src, dst]) for _ in range(num_hops): if not directed: _, fringe = g.out_edges(list(fringe)) fringe = fringe.tolist() else: _, out_neighbors = g.out_edges(list(fringe)) in_neighbors, _ = g.in_edges(list(fringe)) fringe = in_neighbors.tolist() + out_neighbors.tolist() fringe = set(fringe) - visited visited = visited.union(fringe) if sample_ratio < 1.0: fringe = random.sample(fringe, int(sample_ratio * len(fringe))) if len(fringe) == 0: break nodes = nodes + list(fringe) subg = g.subgraph(nodes, store_ids=True) return subg def drnl_node_labeling(adj, src, dst): # Double Radius Node Labeling (DRNL). src, dst = (dst, src) if src > dst else (src, dst) idx = list(range(src)) + list(range(src + 1, adj.shape[0])) adj_wo_src = adj[idx, :][:, idx] idx = list(range(dst)) + list(range(dst + 1, adj.shape[0])) adj_wo_dst = adj[idx, :][:, idx] dist2src = shortest_path( adj_wo_dst, directed=False, unweighted=True, indices=src ) dist2src = np.insert(dist2src, dst, 0, axis=0) dist2src = torch.from_numpy(dist2src) dist2dst = shortest_path( adj_wo_src, directed=False, unweighted=True, indices=dst - 1 ) dist2dst = np.insert(dist2dst, src, 0, axis=0) dist2dst = torch.from_numpy(dist2dst) dist = dist2src + dist2dst dist_over_2, dist_mod_2 = ( torch.div(dist, 2, rounding_mode="floor"), dist % 2, ) z = 1 + torch.min(dist2src, dist2dst) z += dist_over_2 * (dist_over_2 + dist_mod_2 - 1) z[src] = 1.0 z[dst] = 1.0 # shortest path may include inf values z[torch.isnan(z)] = 0.0 return z.to(torch.long) def get_pos_neg_edges(split, split_edge, g, percent=100): pos_edge = split_edge[split]["edge"] if split == "train": neg_edge = torch.stack( global_uniform_negative_sampling( g, num_samples=pos_edge.size(0), exclude_self_loops=True ), dim=1, ) else: neg_edge = split_edge[split]["edge_neg"] # sampling according to the percent param np.random.seed(123) # pos sampling num_pos = pos_edge.size(0) perm = np.random.permutation(num_pos) perm = perm[: int(percent / 100 * num_pos)] pos_edge = pos_edge[perm] # neg sampling if neg_edge.dim() > 2: # [Np, Nn, 2] neg_edge = neg_edge[perm].view(-1, 2) else: np.random.seed(123) num_neg = neg_edge.size(0) perm = np.random.permutation(num_neg) perm = perm[: int(percent / 100 * num_neg)] neg_edge = neg_edge[perm] return pos_edge, neg_edge # ([2, Np], [2, Nn]) -> ([Np, 2], [Nn, 2]) class Logger(object): def __init__(self, runs, info=None): self.info = info self.results = { "valid": [[] for _ in range(runs)], "test": [[] for _ in range(runs)], } def add_result(self, run, result, split="valid"): assert run >= 0 and run < len(self.results["valid"]) assert split in ["valid", "test"] self.results[split][run].append(result) def print_statistics(self, run=None, f=sys.stdout): if run is not None: result = torch.tensor(self.results["valid"][run]) print(f"Run {run + 1:02d}:", file=f) print(f"Highest Valid: {result.max():.4f}", file=f) print(f"Highest Eval Point: {result.argmax().item()+1}", file=f) if not self.info.no_test: print( f' Final Test Point[1]: {self.results["test"][run][0][0]}', f' Final Valid: {self.results["test"][run][0][1]}', f' Final Test: {self.results["test"][run][0][2]}', sep="\n", file=f, ) else: best_result = torch.tensor( [test_res[0] for test_res in self.results["test"]] ) print(f"All runs:", file=f) r = best_result[:, 1] print(f"Highest Valid: {r.mean():.4f} ± {r.std():.4f}", file=f) if not self.info.no_test: r = best_result[:, 2] print(f" Final Test: {r.mean():.4f} ± {r.std():.4f}", file=f) ================================================ FILE: examples/pytorch/ogb/ogbn-arxiv/README.md ================================================ # DGL examples for ogbn-arxiv DGL implementation of GCN and GAT for [ogbn-arxiv](https://ogb.stanford.edu/docs/nodeprop/). Using some of the techniques from *Bag of Tricks for Node Classification with Graph Neural Networks* ([https://arxiv.org/abs/2103.13355](https://arxiv.org/abs/2103.13355)). Requires DGL 0.5 or later versions. ### GCN For the best score, run `gcn.py` with `--use-linear` and `--use-labels` enabled and you should directly see the result. ```bash python3 gcn.py --use-linear --use-labels ``` ### GAT For the score of `GAT(norm. adj.)+labels`, run the following command and you should directly see the result. ```bash python3 gat.py --use-norm --use-labels --no-attn-dst --edge-drop=0.1 --input-drop=0.1 ``` For the score of `GAT(norm. adj.)+label reuse`, run the following command and you should directly see the result. ```bash python3 gat.py --use-norm --use-labels --n-label-iters=1 --no-attn-dst --edge-drop=0.3 --input-drop=0.25 ``` For the score of `GAT(norm. adj.)+label reuse+C&S`, run the following command and you should directly see the result. ```bash python3 gat.py --use-norm --use-labels --n-label-iters=1 --no-attn-dst --edge-drop=0.3 --input-drop=0.25 --save-pred python3 correct_and_smooth.py --use-norm ``` ## Usage & Options ### GCN ``` usage: GCN on OGBN-Arxiv [-h] [--cpu] [--gpu GPU] [--n-runs N_RUNS] [--n-epochs N_EPOCHS] [--use-labels] [--use-linear] [--lr LR] [--n-layers N_LAYERS] [--n-hidden N_HIDDEN] [--dropout DROPOUT] [--wd WD] [--log-every LOG_EVERY] [--plot-curves] optional arguments: -h, --help show this help message and exit --cpu CPU mode. This option overrides --gpu. (default: False) --gpu GPU GPU device ID. (default: 0) --n-runs N_RUNS running times (default: 10) --n-epochs N_EPOCHS number of epochs (default: 1000) --use-labels Use labels in the training set as input features. (default: False) --use-linear Use linear layer. (default: False) --lr LR learning rate (default: 0.005) --n-layers N_LAYERS number of layers (default: 3) --n-hidden N_HIDDEN number of hidden units (default: 256) --dropout DROPOUT dropout rate (default: 0.75) --wd WD weight decay (default: 0) --log-every LOG_EVERY log every LOG_EVERY epochs (default: 20) --plot-curves plot learning curves (default: False) ``` ### GAT ``` usage: GAT on OGBN-Arxiv [-h] [--cpu] [--gpu GPU] [--n-runs N_RUNS] [--n-epochs N_EPOCHS] [--use-labels] [--n-label-iters N_LABEL_ITERS] [--no-attn-dst] [--use-norm] [--lr LR] [--n-layers N_LAYERS] [--n-heads N_HEADS] [--n-hidden N_HIDDEN] [--dropout DROPOUT] [--input-drop INPUT_DROP] [--attn-drop ATTN_DROP] [--edge-drop EDGE_DROP] [--wd WD] [--log-every LOG_EVERY] [--plot-curves] optional arguments: -h, --help show this help message and exit --cpu CPU mode. This option overrides --gpu. (default: False) --gpu GPU GPU device ID. (default: 0) --n-runs N_RUNS running times (default: 10) --n-epochs N_EPOCHS number of epochs (default: 2000) --use-labels Use labels in the training set as input features. (default: False) --n-label-iters N_LABEL_ITERS number of label iterations (default: 0) --no-attn-dst Don't use attn_dst. (default: False) --use-norm Use symmetrically normalized adjacency matrix. (default: False) --lr LR learning rate (default: 0.002) --n-layers N_LAYERS number of layers (default: 3) --n-heads N_HEADS number of heads (default: 3) --n-hidden N_HIDDEN number of hidden units (default: 250) --dropout DROPOUT dropout rate (default: 0.75) --input-drop INPUT_DROP input drop rate (default: 0.1) --attn-drop ATTN_DROP attention dropout rate (default: 0.0) --edge-drop EDGE_DROP edge drop rate (default: 0.0) --wd WD weight decay (default: 0) --log-every LOG_EVERY log every LOG_EVERY epochs (default: 20) --plot-curves plot learning curves (default: False) ``` ## Results Here are the results over at least 10 runs. | Method | Validation Accuracy | Test Accuracy | #Parameters | |:-------------------------------:|:-------------------:|:---------------:|:-----------:| | GCN | 0.7361 ± 0.0009 | 0.7246 ± 0.0021 | 109,608 | | GCN+linear | 0.7397 ± 0.0010 | 0.7270 ± 0.0016 | 218,152 | | GCN+labels | 0.7399 ± 0.0008 | 0.7259 ± 0.0006 | 119,848 | | GCN+linear+labels | 0.7442 ± 0.0012 | 0.7306 ± 0.0024 | 238,632 | | GAT(norm. adj.)+labels | 0.7508 ± 0.0009 | 0.7366 ± 0.0011 | 1,441,580 | | GAT(norm. adj.)+label reuse | 0.7516 ± 0.0008 | 0.7391 ± 0.0012 | 1,441,580 | | GAT(norm. adj.)+label reuse+C&S | 0.7519 ± 0.0008 | 0.7395 ± 0.0012 | 1,441,580 | ================================================ FILE: examples/pytorch/ogb/ogbn-arxiv/correct_and_smooth.py ================================================ import argparse import glob import numpy as np import torch import torch.nn.functional as F from dgl import function as fn from ogb.nodeproppred import DglNodePropPredDataset, Evaluator device = None dataset = "ogbn-arxiv" n_node_feats, n_classes = 0, 0 def load_data(dataset): global n_node_feats, n_classes data = DglNodePropPredDataset(name=dataset) evaluator = Evaluator(name=dataset) splitted_idx = data.get_idx_split() train_idx, val_idx, test_idx = ( splitted_idx["train"], splitted_idx["valid"], splitted_idx["test"], ) graph, labels = data[0] n_node_feats = graph.ndata["feat"].shape[1] n_classes = (labels.max() + 1).item() return graph, labels, train_idx, val_idx, test_idx, evaluator def preprocess(graph): global n_node_feats # add reverse edges srcs, dsts = graph.all_edges() graph.add_edges(dsts, srcs) # add self-loop print(f"Total edges before adding self-loop {graph.num_edges()}") graph = graph.remove_self_loop().add_self_loop() print(f"Total edges after adding self-loop {graph.num_edges()}") graph.create_formats_() return graph def general_outcome_correlation( graph, y0, n_prop=50, alpha=0.8, use_norm=False, post_step=None ): with graph.local_scope(): y = y0 for _ in range(n_prop): if use_norm: degs = graph.in_degrees().float().clamp(min=1) norm = torch.pow(degs, -0.5) shp = norm.shape + (1,) * (y.dim() - 1) norm = torch.reshape(norm, shp) y = y * norm graph.srcdata.update({"y": y}) graph.update_all(fn.copy_u("y", "m"), fn.mean("m", "y")) y = graph.dstdata["y"] if use_norm: degs = graph.in_degrees().float().clamp(min=1) norm = torch.pow(degs, 0.5) shp = norm.shape + (1,) * (y.dim() - 1) norm = torch.reshape(norm, shp) y = y * norm y = alpha * y + (1 - alpha) * y0 if post_step is not None: y = post_step(y) return y def evaluate(labels, pred, train_idx, val_idx, test_idx, evaluator): return ( evaluator(pred[train_idx], labels[train_idx]), evaluator(pred[val_idx], labels[val_idx]), evaluator(pred[test_idx], labels[test_idx]), ) def run(args, graph, labels, pred, train_idx, val_idx, test_idx, evaluator): evaluator_wrapper = lambda pred, labels: evaluator.eval( {"y_pred": pred.argmax(dim=-1, keepdim=True), "y_true": labels} )["acc"] y = pred.clone() y[train_idx] = F.one_hot(labels[train_idx], n_classes).float().squeeze(1) # dy = torch.zeros(graph.num_nodes(), n_classes, device=device) # dy[train_idx] = F.one_hot(labels[train_idx], n_classes).float().squeeze(1) - pred[train_idx] _train_acc, val_acc, test_acc = evaluate( labels, y, train_idx, val_idx, test_idx, evaluator_wrapper ) # print("train acc:", _train_acc) print("original val acc:", val_acc) print("original test acc:", test_acc) # NOTE: Only "smooth" is performed here. # smoothed_dy = general_outcome_correlation( # graph, dy, alpha=args.alpha1, use_norm=args.use_norm, post_step=lambda x: x.clamp(-1, 1) # ) # y[train_idx] = F.one_hot(labels[train_idx], n_classes).float().squeeze(1) # smoothed_dy = smoothed_dy # y = y + args.alpha2 * smoothed_dy # .clamp(0, 1) smoothed_y = general_outcome_correlation( graph, y, alpha=args.alpha, use_norm=args.use_norm, post_step=lambda x: x.clamp(0, 1), ) _train_acc, val_acc, test_acc = evaluate( labels, smoothed_y, train_idx, val_idx, test_idx, evaluator_wrapper ) # print("train acc:", _train_acc) print("val acc:", val_acc) print("test acc:", test_acc) return val_acc, test_acc def main(): global device argparser = argparse.ArgumentParser(description="implementation of C&S)") argparser.add_argument( "--cpu", action="store_true", help="CPU mode. This option overrides --gpu.", ) argparser.add_argument("--gpu", type=int, default=0, help="GPU device ID.") argparser.add_argument( "--use-norm", action="store_true", help="Use symmetrically normalized adjacency matrix.", ) argparser.add_argument("--alpha", type=float, default=0.6, help="alpha") argparser.add_argument( "--pred-files", type=str, default="./output/*.pt", help="address of prediction files", ) args = argparser.parse_args() if args.cpu: device = torch.device("cpu") else: device = torch.device(f"cuda:{args.gpu}") # load data & preprocess graph, labels, train_idx, val_idx, test_idx, evaluator = load_data(dataset) graph = preprocess(graph) graph, labels, train_idx, val_idx, test_idx = map( lambda x: x.to(device), (graph, labels, train_idx, val_idx, test_idx) ) # run val_accs, test_accs = [], [] for pred_file in glob.iglob(args.pred_files): print("load:", pred_file) pred = torch.load(pred_file, weights_only=False) val_acc, test_acc = run( args, graph, labels, pred, train_idx, val_idx, test_idx, evaluator ) val_accs.append(val_acc) test_accs.append(test_acc) print(args) print(f"Runned {len(val_accs)} times") print("Val Accs:", val_accs) print("Test Accs:", test_accs) print(f"Average val accuracy: {np.mean(val_accs)} ± {np.std(val_accs)}") print(f"Average test accuracy: {np.mean(test_accs)} ± {np.std(test_accs)}") if __name__ == "__main__": main() # Namespace(alpha=0.6, cpu=False, gpu=0, pred_files='./output/*.pt', use_norm=True) # Runned 20 times # Val Accs: [0.7523742407463337, 0.750729890264774, 0.7524077989194268, 0.7527098224772644, 0.752508473438706, 0.7509983556495184, 0.751904426323031, 0.7514010537266351, 0.7524077989194268, 0.753716567670056, 0.7523071244001477, 0.7518373099768448, 0.7528440551696366, 0.7509983556495184, 0.7521057753615893, 0.7520386590154032, 0.7500251686298198, 0.7513674955535421, 0.7509312393033323, 0.7518037518037518] # Test Accs: [0.7392753533732486, 0.7381437359833755, 0.7412093903668497, 0.7402629467316832, 0.7386169578009588, 0.7380408616752052, 0.7397280003291978, 0.7401189227002448, 0.7424233072032591, 0.7397280003291978, 0.7378351130588647, 0.7400160483920746, 0.740921342303973, 0.7385758080776906, 0.7411682406435817, 0.7389667304487377, 0.7396457008826616, 0.7384935086311545, 0.7396251260210275, 0.7379997119519371] # Average val accuracy: 0.751870868149938 ± 0.0008415008835817228 # Average test accuracy: 0.7395397403452462 ± 0.0012162384423867229 ================================================ FILE: examples/pytorch/ogb/ogbn-arxiv/gat.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- import argparse import math import os import random import time import dgl import numpy as np import torch import torch.nn.functional as F import torch.optim as optim from matplotlib import pyplot as plt from matplotlib.ticker import AutoMinorLocator, MultipleLocator from models import GAT from ogb.nodeproppred import DglNodePropPredDataset, Evaluator epsilon = 1 - math.log(2) device = None dataset = "ogbn-arxiv" n_node_feats, n_classes = 0, 0 def seed(seed=0): random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False dgl.random.seed(seed) def load_data(dataset): global n_node_feats, n_classes data = DglNodePropPredDataset(name=dataset) evaluator = Evaluator(name=dataset) splitted_idx = data.get_idx_split() train_idx, val_idx, test_idx = ( splitted_idx["train"], splitted_idx["valid"], splitted_idx["test"], ) graph, labels = data[0] n_node_feats = graph.ndata["feat"].shape[1] n_classes = (labels.max() + 1).item() return graph, labels, train_idx, val_idx, test_idx, evaluator def preprocess(graph): global n_node_feats # make bidirected feat = graph.ndata["feat"] graph = dgl.to_bidirected(graph) graph.ndata["feat"] = feat # add self-loop print(f"Total edges before adding self-loop {graph.num_edges()}") graph = graph.remove_self_loop().add_self_loop() print(f"Total edges after adding self-loop {graph.num_edges()}") graph.create_formats_() return graph def gen_model(args): if args.use_labels: n_node_feats_ = n_node_feats + n_classes else: n_node_feats_ = n_node_feats model = GAT( n_node_feats_, n_classes, n_hidden=args.n_hidden, n_layers=args.n_layers, n_heads=args.n_heads, activation=F.relu, dropout=args.dropout, input_drop=args.input_drop, attn_drop=args.attn_drop, edge_drop=args.edge_drop, use_attn_dst=not args.no_attn_dst, use_symmetric_norm=args.use_norm, ) return model def custom_loss_function(x, labels): y = F.cross_entropy(x, labels[:, 0], reduction="none") y = torch.log(epsilon + y) - math.log(epsilon) return torch.mean(y) def add_labels(feat, labels, idx): onehot = torch.zeros([feat.shape[0], n_classes], device=device) onehot[idx, labels[idx, 0]] = 1 return torch.cat([feat, onehot], dim=-1) def adjust_learning_rate(optimizer, lr, epoch): if epoch <= 50: for param_group in optimizer.param_groups: param_group["lr"] = lr * epoch / 50 def train( args, model, graph, labels, train_idx, val_idx, test_idx, optimizer, evaluator, ): model.train() feat = graph.ndata["feat"] if args.use_labels: mask = torch.rand(train_idx.shape) < args.mask_rate train_labels_idx = train_idx[mask] train_pred_idx = train_idx[~mask] feat = add_labels(feat, labels, train_labels_idx) else: mask = torch.rand(train_idx.shape) < args.mask_rate train_pred_idx = train_idx[mask] optimizer.zero_grad() pred = model(graph, feat) if args.n_label_iters > 0: unlabel_idx = torch.cat([train_pred_idx, val_idx, test_idx]) for _ in range(args.n_label_iters): pred = pred.detach() torch.cuda.empty_cache() feat[unlabel_idx, -n_classes:] = F.softmax( pred[unlabel_idx], dim=-1 ) pred = model(graph, feat) loss = custom_loss_function(pred[train_pred_idx], labels[train_pred_idx]) loss.backward() optimizer.step() return evaluator(pred[train_idx], labels[train_idx]), loss.item() @torch.no_grad() def evaluate( args, model, graph, labels, train_idx, val_idx, test_idx, evaluator ): model.eval() feat = graph.ndata["feat"] if args.use_labels: feat = add_labels(feat, labels, train_idx) pred = model(graph, feat) if args.n_label_iters > 0: unlabel_idx = torch.cat([val_idx, test_idx]) for _ in range(args.n_label_iters): feat[unlabel_idx, -n_classes:] = F.softmax( pred[unlabel_idx], dim=-1 ) pred = model(graph, feat) train_loss = custom_loss_function(pred[train_idx], labels[train_idx]) val_loss = custom_loss_function(pred[val_idx], labels[val_idx]) test_loss = custom_loss_function(pred[test_idx], labels[test_idx]) return ( evaluator(pred[train_idx], labels[train_idx]), evaluator(pred[val_idx], labels[val_idx]), evaluator(pred[test_idx], labels[test_idx]), train_loss, val_loss, test_loss, pred, ) def run( args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running ): evaluator_wrapper = lambda pred, labels: evaluator.eval( {"y_pred": pred.argmax(dim=-1, keepdim=True), "y_true": labels} )["acc"] # define model and optimizer model = gen_model(args).to(device) optimizer = optim.RMSprop( model.parameters(), lr=args.lr, weight_decay=args.wd ) # training loop total_time = 0 best_val_acc, final_test_acc, best_val_loss = 0, 0, float("inf") final_pred = None accs, train_accs, val_accs, test_accs = [], [], [], [] losses, train_losses, val_losses, test_losses = [], [], [], [] for epoch in range(1, args.n_epochs + 1): tic = time.time() adjust_learning_rate(optimizer, args.lr, epoch) acc, loss = train( args, model, graph, labels, train_idx, val_idx, test_idx, optimizer, evaluator_wrapper, ) ( train_acc, val_acc, test_acc, train_loss, val_loss, test_loss, pred, ) = evaluate( args, model, graph, labels, train_idx, val_idx, test_idx, evaluator_wrapper, ) toc = time.time() total_time += toc - tic if val_loss < best_val_loss: best_val_loss = val_loss best_val_acc = val_acc final_test_acc = test_acc final_pred = pred if epoch == args.n_epochs or epoch % args.log_every == 0: print( f"Run: {n_running}/{args.n_runs}, Epoch: {epoch}/{args.n_epochs}, Average epoch time: {total_time / epoch:.2f}\n" f"Loss: {loss:.4f}, Acc: {acc:.4f}\n" f"Train/Val/Test loss: {train_loss:.4f}/{val_loss:.4f}/{test_loss:.4f}\n" f"Train/Val/Test/Best val/Final test acc: {train_acc:.4f}/{val_acc:.4f}/{test_acc:.4f}/{best_val_acc:.4f}/{final_test_acc:.4f}" ) for l, e in zip( [ accs, train_accs, val_accs, test_accs, losses, train_losses, val_losses, test_losses, ], [ acc, train_acc, val_acc, test_acc, loss, train_loss, val_loss, test_loss, ], ): l.append(e) print("*" * 50) print(f"Best val acc: {best_val_acc}, Final test acc: {final_test_acc}") print("*" * 50) # plot learning curves if args.plot_curves: fig = plt.figure(figsize=(24, 24)) ax = fig.gca() ax.set_xticks(np.arange(0, args.n_epochs, 100)) ax.set_yticks(np.linspace(0, 1.0, 101)) ax.tick_params(labeltop=True, labelright=True) for y, label in zip( [accs, train_accs, val_accs, test_accs], ["acc", "train acc", "val acc", "test acc"], ): plt.plot(range(args.n_epochs), y, label=label, linewidth=1) ax.xaxis.set_major_locator(MultipleLocator(100)) ax.xaxis.set_minor_locator(AutoMinorLocator(1)) ax.yaxis.set_major_locator(MultipleLocator(0.01)) ax.yaxis.set_minor_locator(AutoMinorLocator(2)) plt.grid(which="major", color="red", linestyle="dotted") plt.grid(which="minor", color="orange", linestyle="dotted") plt.legend() plt.tight_layout() plt.savefig(f"gat_acc_{n_running}.png") fig = plt.figure(figsize=(24, 24)) ax = fig.gca() ax.set_xticks(np.arange(0, args.n_epochs, 100)) ax.tick_params(labeltop=True, labelright=True) for y, label in zip( [losses, train_losses, val_losses, test_losses], ["loss", "train loss", "val loss", "test loss"], ): plt.plot(range(args.n_epochs), y, label=label, linewidth=1) ax.xaxis.set_major_locator(MultipleLocator(100)) ax.xaxis.set_minor_locator(AutoMinorLocator(1)) ax.yaxis.set_major_locator(MultipleLocator(0.1)) ax.yaxis.set_minor_locator(AutoMinorLocator(5)) plt.grid(which="major", color="red", linestyle="dotted") plt.grid(which="minor", color="orange", linestyle="dotted") plt.legend() plt.tight_layout() plt.savefig(f"gat_loss_{n_running}.png") if args.save_pred: os.makedirs("./output", exist_ok=True) torch.save(F.softmax(final_pred, dim=1), f"./output/{n_running}.pt") return best_val_acc, final_test_acc def count_parameters(args): model = gen_model(args) return sum([p.numel() for p in model.parameters() if p.requires_grad]) def main(): global device, n_node_feats, n_classes, epsilon argparser = argparse.ArgumentParser( "GAT implementation on ogbn-arxiv", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) argparser.add_argument( "--cpu", action="store_true", help="CPU mode. This option overrides --gpu.", ) argparser.add_argument("--gpu", type=int, default=0, help="GPU device ID.") argparser.add_argument("--seed", type=int, default=0, help="seed") argparser.add_argument( "--n-runs", type=int, default=10, help="running times" ) argparser.add_argument( "--n-epochs", type=int, default=2000, help="number of epochs" ) argparser.add_argument( "--use-labels", action="store_true", help="Use labels in the training set as input features.", ) argparser.add_argument( "--n-label-iters", type=int, default=0, help="number of label iterations", ) argparser.add_argument( "--mask-rate", type=float, default=0.5, help="mask rate" ) argparser.add_argument( "--no-attn-dst", action="store_true", help="Don't use attn_dst." ) argparser.add_argument( "--use-norm", action="store_true", help="Use symmetrically normalized adjacency matrix.", ) argparser.add_argument( "--lr", type=float, default=0.002, help="learning rate" ) argparser.add_argument( "--n-layers", type=int, default=3, help="number of layers" ) argparser.add_argument( "--n-heads", type=int, default=3, help="number of heads" ) argparser.add_argument( "--n-hidden", type=int, default=250, help="number of hidden units" ) argparser.add_argument( "--dropout", type=float, default=0.75, help="dropout rate" ) argparser.add_argument( "--input-drop", type=float, default=0.1, help="input drop rate" ) argparser.add_argument( "--attn-drop", type=float, default=0.0, help="attention drop rate" ) argparser.add_argument( "--edge-drop", type=float, default=0.0, help="edge drop rate" ) argparser.add_argument("--wd", type=float, default=0, help="weight decay") argparser.add_argument( "--log-every", type=int, default=20, help="log every LOG_EVERY epochs" ) argparser.add_argument( "--plot-curves", action="store_true", help="plot learning curves" ) argparser.add_argument( "--save-pred", action="store_true", help="save final predictions" ) args = argparser.parse_args() if not args.use_labels and args.n_label_iters > 0: raise ValueError( "'--use-labels' must be enabled when n_label_iters > 0" ) if args.cpu: device = torch.device("cpu") else: device = torch.device(f"cuda:{args.gpu}") # load data & preprocess graph, labels, train_idx, val_idx, test_idx, evaluator = load_data(dataset) graph = preprocess(graph) graph, labels, train_idx, val_idx, test_idx = map( lambda x: x.to(device), (graph, labels, train_idx, val_idx, test_idx) ) # run val_accs, test_accs = [], [] for i in range(args.n_runs): seed(args.seed + i) val_acc, test_acc = run( args, graph, labels, train_idx, val_idx, test_idx, evaluator, i + 1 ) val_accs.append(val_acc) test_accs.append(test_acc) print(args) print(f"Runned {args.n_runs} times") print("Val Accs:", val_accs) print("Test Accs:", test_accs) print(f"Average val accuracy: {np.mean(val_accs)} ± {np.std(val_accs)}") print(f"Average test accuracy: {np.mean(test_accs)} ± {np.std(test_accs)}") print(f"Number of params: {count_parameters(args)}") if __name__ == "__main__": main() # Namespace(attn_drop=0.0, cpu=False, dropout=0.75, edge_drop=0.1, gpu=0, input_drop=0.1, log_every=20, lr=0.002, n_epochs=2000, n_heads=3, n_hidden=250, n_label_iters=0, n_layers=3, n_runs=10, no_attn_dst=True, plot_curves=True, use_labels=True, use_norm=True, wd=0) # Runned 10 times # Val Accs: [0.7492868888217725, 0.7524413570925199, 0.7505620993993087, 0.7500251686298198, 0.7501929594952851, 0.7513003792073559, 0.7516695191113796, 0.7505285412262156, 0.7504949830531226, 0.7515017282459143] # Test Accs: [0.7366829208073575, 0.7384112091846182, 0.7368886694236981, 0.7345019854741477, 0.7373001666563792, 0.7362508487130424, 0.7352221056313396, 0.736477172191017, 0.7380614365368393, 0.7362919984363105] # Average val accuracy: 0.7508003624282694 ± 0.0008760483047616948 # Average test accuracy: 0.736608851305475 ± 0.0011192876013651112 # Number of params: 1441580 # Namespace(attn_drop=0.0, cpu=False, dropout=0.75, edge_drop=0.3, gpu=0, input_drop=0.25, log_every=20, lr=0.002, n_epochs=2000, n_heads=3, n_hidden=250, n_label_iters=1, n_layers=3, n_runs=10, no_attn_dst=True, plot_curves=True, use_labels=True, use_norm=True, wd=0) # Runned 20 times # Val Accs: [0.7529782878620088, 0.7521393335346823, 0.7521728917077755, 0.7504949830531226, 0.7518037518037518, 0.7518373099768448, 0.7516359609382866, 0.7511325883418907, 0.7509312393033323, 0.7515017282459143, 0.7511325883418907, 0.7514346118997282, 0.7509312393033323, 0.7521393335346823, 0.7528776133427296, 0.7522735662270545, 0.7504949830531226, 0.7522735662270545, 0.7511661465149837, 0.7501258431490989] # Test Accs: [0.7390901796185421, 0.7398720243606361, 0.7394605271279551, 0.7384523589078863, 0.7388638561405675, 0.7397280003291978, 0.7414151389831903, 0.7376499393041582, 0.7399748986688065, 0.7400366232537087, 0.7392547785116145, 0.7388844310022015, 0.7374853404110857, 0.7384317840462523, 0.7418677859391396, 0.737937987367035, 0.7381643108450096, 0.7399543238071724, 0.7377322387506944, 0.7385758080776906] # Average val accuracy: 0.7515738783180644 ± 0.0007617982474634186 # Average test accuracy: 0.7391416167726272 ± 0.0011522198067958794 # Number of params: 1441580 ================================================ FILE: examples/pytorch/ogb/ogbn-arxiv/gcn.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- import argparse import math import time import numpy as np import torch as th import torch.nn.functional as F import torch.optim as optim from matplotlib import pyplot as plt from matplotlib.ticker import AutoMinorLocator, MultipleLocator from models import GCN from ogb.nodeproppred import DglNodePropPredDataset, Evaluator device = None in_feats, n_classes = None, None epsilon = 1 - math.log(2) def gen_model(args): if args.use_labels: model = GCN( in_feats + n_classes, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, args.use_linear, ) else: model = GCN( in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, args.use_linear, ) return model def cross_entropy(x, labels): y = F.cross_entropy(x, labels[:, 0], reduction="none") y = th.log(epsilon + y) - math.log(epsilon) return th.mean(y) def compute_acc(pred, labels, evaluator): return evaluator.eval( {"y_pred": pred.argmax(dim=-1, keepdim=True), "y_true": labels} )["acc"] def add_labels(feat, labels, idx): onehot = th.zeros([feat.shape[0], n_classes]).to(device) onehot[idx, labels[idx, 0]] = 1 return th.cat([feat, onehot], dim=-1) def adjust_learning_rate(optimizer, lr, epoch): if epoch <= 50: for param_group in optimizer.param_groups: param_group["lr"] = lr * epoch / 50 def train(model, graph, labels, train_idx, optimizer, use_labels): model.train() feat = graph.ndata["feat"] if use_labels: mask_rate = 0.5 mask = th.rand(train_idx.shape) < mask_rate train_labels_idx = train_idx[mask] train_pred_idx = train_idx[~mask] feat = add_labels(feat, labels, train_labels_idx) else: mask_rate = 0.5 mask = th.rand(train_idx.shape) < mask_rate train_pred_idx = train_idx[mask] optimizer.zero_grad() pred = model(graph, feat) loss = cross_entropy(pred[train_pred_idx], labels[train_pred_idx]) loss.backward() optimizer.step() return loss, pred @th.no_grad() def evaluate( model, graph, labels, train_idx, val_idx, test_idx, use_labels, evaluator ): model.eval() feat = graph.ndata["feat"] if use_labels: feat = add_labels(feat, labels, train_idx) pred = model(graph, feat) train_loss = cross_entropy(pred[train_idx], labels[train_idx]) val_loss = cross_entropy(pred[val_idx], labels[val_idx]) test_loss = cross_entropy(pred[test_idx], labels[test_idx]) return ( compute_acc(pred[train_idx], labels[train_idx], evaluator), compute_acc(pred[val_idx], labels[val_idx], evaluator), compute_acc(pred[test_idx], labels[test_idx], evaluator), train_loss, val_loss, test_loss, ) def run( args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running ): # define model and optimizer model = gen_model(args) model = model.to(device) optimizer = optim.AdamW( model.parameters(), lr=args.lr, weight_decay=args.wd ) lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode="min", factor=0.5, patience=100, min_lr=1e-3, ) # training loop total_time = 0 best_val_acc, final_test_acc, best_val_loss = 0, 0, float("inf") accs, train_accs, val_accs, test_accs = [], [], [], [] losses, train_losses, val_losses, test_losses = [], [], [], [] for epoch in range(1, args.n_epochs + 1): tic = time.time() adjust_learning_rate(optimizer, args.lr, epoch) loss, pred = train( model, graph, labels, train_idx, optimizer, args.use_labels ) acc = compute_acc(pred[train_idx], labels[train_idx], evaluator) ( train_acc, val_acc, test_acc, train_loss, val_loss, test_loss, ) = evaluate( model, graph, labels, train_idx, val_idx, test_idx, args.use_labels, evaluator, ) lr_scheduler.step(loss) toc = time.time() total_time += toc - tic if val_loss < best_val_loss: best_val_loss = val_loss best_val_acc = val_acc final_test_acc = test_acc if epoch % args.log_every == 0: print( f"Run: {n_running}/{args.n_runs}, Epoch: {epoch}/{args.n_epochs}, Average epoch time: {total_time / epoch:.2f}\n" f"Loss: {loss.item():.4f}, Acc: {acc:.4f}\n" f"Train/Val/Test loss: {train_loss:.4f}/{val_loss:.4f}/{test_loss:.4f}\n" f"Train/Val/Test/Best val/Final test acc: {train_acc:.4f}/{val_acc:.4f}/{test_acc:.4f}/{best_val_acc:.4f}/{final_test_acc:.4f}" ) for l, e in zip( [ accs, train_accs, val_accs, test_accs, losses, train_losses, val_losses, test_losses, ], [ acc, train_acc, val_acc, test_acc, loss, train_loss, val_loss, test_loss, ], ): l.append(e) print("*" * 50) print(f"Best val acc: {best_val_acc}, Final test acc: {final_test_acc}") print("*" * 50) if args.plot_curves: fig = plt.figure(figsize=(24, 24)) ax = fig.gca() ax.set_xticks(np.arange(0, args.n_epochs, 100)) ax.set_yticks(np.linspace(0, 1.0, 101)) ax.tick_params(labeltop=True, labelright=True) for y, label in zip( [accs, train_accs, val_accs, test_accs], ["acc", "train acc", "val acc", "test acc"], ): plt.plot(range(args.n_epochs), y, label=label) ax.xaxis.set_major_locator(MultipleLocator(100)) ax.xaxis.set_minor_locator(AutoMinorLocator(1)) ax.yaxis.set_major_locator(MultipleLocator(0.01)) ax.yaxis.set_minor_locator(AutoMinorLocator(2)) plt.grid(which="major", color="red", linestyle="dotted") plt.grid(which="minor", color="orange", linestyle="dotted") plt.legend() plt.tight_layout() plt.savefig(f"gcn_acc_{n_running}.png") fig = plt.figure(figsize=(24, 24)) ax = fig.gca() ax.set_xticks(np.arange(0, args.n_epochs, 100)) ax.tick_params(labeltop=True, labelright=True) for y, label in zip( [losses, train_losses, val_losses, test_losses], ["loss", "train loss", "val loss", "test loss"], ): plt.plot(range(args.n_epochs), y, label=label) ax.xaxis.set_major_locator(MultipleLocator(100)) ax.xaxis.set_minor_locator(AutoMinorLocator(1)) ax.yaxis.set_major_locator(MultipleLocator(0.1)) ax.yaxis.set_minor_locator(AutoMinorLocator(5)) plt.grid(which="major", color="red", linestyle="dotted") plt.grid(which="minor", color="orange", linestyle="dotted") plt.legend() plt.tight_layout() plt.savefig(f"gcn_loss_{n_running}.png") return best_val_acc, final_test_acc def count_parameters(args): model = gen_model(args) return sum( [np.prod(p.size()) for p in model.parameters() if p.requires_grad] ) def main(): global device, in_feats, n_classes argparser = argparse.ArgumentParser( "GCN on OGBN-Arxiv", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) argparser.add_argument( "--cpu", action="store_true", help="CPU mode. This option overrides --gpu.", ) argparser.add_argument("--gpu", type=int, default=0, help="GPU device ID.") argparser.add_argument( "--n-runs", type=int, default=10, help="running times" ) argparser.add_argument( "--n-epochs", type=int, default=1000, help="number of epochs" ) argparser.add_argument( "--use-labels", action="store_true", help="Use labels in the training set as input features.", ) argparser.add_argument( "--use-linear", action="store_true", help="Use linear layer." ) argparser.add_argument( "--lr", type=float, default=0.005, help="learning rate" ) argparser.add_argument( "--n-layers", type=int, default=3, help="number of layers" ) argparser.add_argument( "--n-hidden", type=int, default=256, help="number of hidden units" ) argparser.add_argument( "--dropout", type=float, default=0.5, help="dropout rate" ) argparser.add_argument("--wd", type=float, default=0, help="weight decay") argparser.add_argument( "--log-every", type=int, default=20, help="log every LOG_EVERY epochs" ) argparser.add_argument( "--plot-curves", action="store_true", help="plot learning curves" ) args = argparser.parse_args() if args.cpu: device = th.device("cpu") else: device = th.device("cuda:%d" % args.gpu) # load data data = DglNodePropPredDataset(name="ogbn-arxiv") evaluator = Evaluator(name="ogbn-arxiv") splitted_idx = data.get_idx_split() train_idx, val_idx, test_idx = ( splitted_idx["train"], splitted_idx["valid"], splitted_idx["test"], ) graph, labels = data[0] # add reverse edges srcs, dsts = graph.all_edges() graph.add_edges(dsts, srcs) # add self-loop print(f"Total edges before adding self-loop {graph.num_edges()}") graph = graph.remove_self_loop().add_self_loop() print(f"Total edges after adding self-loop {graph.num_edges()}") in_feats = graph.ndata["feat"].shape[1] n_classes = (labels.max() + 1).item() graph.create_formats_() train_idx = train_idx.to(device) val_idx = val_idx.to(device) test_idx = test_idx.to(device) labels = labels.to(device) graph = graph.to(device) # run val_accs = [] test_accs = [] for i in range(args.n_runs): val_acc, test_acc = run( args, graph, labels, train_idx, val_idx, test_idx, evaluator, i ) val_accs.append(val_acc) test_accs.append(test_acc) print(f"Runned {args.n_runs} times") print("Val Accs:", val_accs) print("Test Accs:", test_accs) print(f"Average val accuracy: {np.mean(val_accs)} ± {np.std(val_accs)}") print(f"Average test accuracy: {np.mean(test_accs)} ± {np.std(test_accs)}") print(f"Number of params: {count_parameters(args)}") if __name__ == "__main__": main() ================================================ FILE: examples/pytorch/ogb/ogbn-arxiv/models.py ================================================ import dgl.nn.pytorch as dglnn import torch import torch.nn as nn from dgl import function as fn from dgl.ops import edge_softmax from dgl.utils import expand_as_pair class ElementWiseLinear(nn.Module): def __init__(self, size, weight=True, bias=True, inplace=False): super().__init__() if weight: self.weight = nn.Parameter(torch.Tensor(size)) else: self.weight = None if bias: self.bias = nn.Parameter(torch.Tensor(size)) else: self.bias = None self.inplace = inplace self.reset_parameters() def reset_parameters(self): if self.weight is not None: nn.init.ones_(self.weight) if self.bias is not None: nn.init.zeros_(self.bias) def forward(self, x): if self.inplace: if self.weight is not None: x.mul_(self.weight) if self.bias is not None: x.add_(self.bias) else: if self.weight is not None: x = x * self.weight if self.bias is not None: x = x + self.bias return x class GCN(nn.Module): def __init__( self, in_feats, n_hidden, n_classes, n_layers, activation, dropout, use_linear, ): super().__init__() self.n_layers = n_layers self.n_hidden = n_hidden self.n_classes = n_classes self.use_linear = use_linear self.convs = nn.ModuleList() if use_linear: self.linear = nn.ModuleList() self.norms = nn.ModuleList() for i in range(n_layers): in_hidden = n_hidden if i > 0 else in_feats out_hidden = n_hidden if i < n_layers - 1 else n_classes bias = i == n_layers - 1 self.convs.append( dglnn.GraphConv(in_hidden, out_hidden, "both", bias=bias) ) if use_linear: self.linear.append(nn.Linear(in_hidden, out_hidden, bias=False)) if i < n_layers - 1: self.norms.append(nn.BatchNorm1d(out_hidden)) self.input_drop = nn.Dropout(min(0.1, dropout)) self.dropout = nn.Dropout(dropout) self.activation = activation def forward(self, graph, feat): h = feat h = self.input_drop(h) for i in range(self.n_layers): conv = self.convs[i](graph, h) if self.use_linear: linear = self.linear[i](h) h = conv + linear else: h = conv if i < self.n_layers - 1: h = self.norms[i](h) h = self.activation(h) h = self.dropout(h) return h class GATConv(nn.Module): def __init__( self, in_feats, out_feats, num_heads=1, feat_drop=0.0, attn_drop=0.0, edge_drop=0.0, negative_slope=0.2, use_attn_dst=True, residual=False, activation=None, allow_zero_in_degree=False, use_symmetric_norm=False, ): super(GATConv, self).__init__() self._num_heads = num_heads self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats self._allow_zero_in_degree = allow_zero_in_degree self._use_symmetric_norm = use_symmetric_norm if isinstance(in_feats, tuple): self.fc_src = nn.Linear( self._in_src_feats, out_feats * num_heads, bias=False ) self.fc_dst = nn.Linear( self._in_dst_feats, out_feats * num_heads, bias=False ) else: self.fc = nn.Linear( self._in_src_feats, out_feats * num_heads, bias=False ) self.attn_l = nn.Parameter( torch.FloatTensor(size=(1, num_heads, out_feats)) ) if use_attn_dst: self.attn_r = nn.Parameter( torch.FloatTensor(size=(1, num_heads, out_feats)) ) else: self.register_buffer("attn_r", None) self.feat_drop = nn.Dropout(feat_drop) self.attn_drop = nn.Dropout(attn_drop) self.edge_drop = edge_drop self.leaky_relu = nn.LeakyReLU(negative_slope) if residual: self.res_fc = nn.Linear( self._in_dst_feats, num_heads * out_feats, bias=False ) else: self.register_buffer("res_fc", None) self.reset_parameters() self._activation = activation def reset_parameters(self): gain = nn.init.calculate_gain("relu") if hasattr(self, "fc"): nn.init.xavier_normal_(self.fc.weight, gain=gain) else: nn.init.xavier_normal_(self.fc_src.weight, gain=gain) nn.init.xavier_normal_(self.fc_dst.weight, gain=gain) nn.init.xavier_normal_(self.attn_l, gain=gain) if isinstance(self.attn_r, nn.Parameter): nn.init.xavier_normal_(self.attn_r, gain=gain) if isinstance(self.res_fc, nn.Linear): nn.init.xavier_normal_(self.res_fc.weight, gain=gain) def set_allow_zero_in_degree(self, set_value): self._allow_zero_in_degree = set_value def forward(self, graph, feat): with graph.local_scope(): if not self._allow_zero_in_degree: if (graph.in_degrees() == 0).any(): assert False if isinstance(feat, tuple): h_src = self.feat_drop(feat[0]) h_dst = self.feat_drop(feat[1]) if not hasattr(self, "fc_src"): self.fc_src, self.fc_dst = self.fc, self.fc feat_src, feat_dst = h_src, h_dst feat_src = self.fc_src(h_src).view( -1, self._num_heads, self._out_feats ) feat_dst = self.fc_dst(h_dst).view( -1, self._num_heads, self._out_feats ) else: h_src = self.feat_drop(feat) feat_src = h_src feat_src = self.fc(h_src).view( -1, self._num_heads, self._out_feats ) if graph.is_block: h_dst = h_src[: graph.number_of_dst_nodes()] feat_dst = feat_src[: graph.number_of_dst_nodes()] else: h_dst = h_src feat_dst = feat_src if self._use_symmetric_norm: degs = graph.out_degrees().float().clamp(min=1) norm = torch.pow(degs, -0.5) shp = norm.shape + (1,) * (feat_src.dim() - 1) norm = torch.reshape(norm, shp) feat_src = feat_src * norm # NOTE: GAT paper uses "first concatenation then linear projection" # to compute attention scores, while ours is "first projection then # addition", the two approaches are mathematically equivalent: # We decompose the weight vector a mentioned in the paper into # [a_l || a_r], then # a^T [Wh_i || Wh_j] = a_l Wh_i + a_r Wh_j # Our implementation is much efficient because we do not need to # save [Wh_i || Wh_j] on edges, which is not memory-efficient. Plus, # addition could be optimized with DGL's built-in function u_add_v, # which further speeds up computation and saves memory footprint. el = (feat_src * self.attn_l).sum(dim=-1).unsqueeze(-1) graph.srcdata.update({"ft": feat_src, "el": el}) # compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively. if self.attn_r is not None: er = (feat_dst * self.attn_r).sum(dim=-1).unsqueeze(-1) graph.dstdata.update({"er": er}) graph.apply_edges(fn.u_add_v("el", "er", "e")) else: graph.apply_edges(fn.copy_u("el", "e")) e = self.leaky_relu(graph.edata.pop("e")) if self.training and self.edge_drop > 0: perm = torch.randperm(graph.num_edges(), device=e.device) bound = int(graph.num_edges() * self.edge_drop) eids = perm[bound:] graph.edata["a"] = torch.zeros_like(e) graph.edata["a"][eids] = self.attn_drop( edge_softmax(graph, e[eids], eids=eids) ) else: graph.edata["a"] = self.attn_drop(edge_softmax(graph, e)) # message passing graph.update_all(fn.u_mul_e("ft", "a", "m"), fn.sum("m", "ft")) rst = graph.dstdata["ft"] if self._use_symmetric_norm: degs = graph.in_degrees().float().clamp(min=1) norm = torch.pow(degs, 0.5) shp = norm.shape + (1,) * (feat_dst.dim() - 1) norm = torch.reshape(norm, shp) rst = rst * norm # residual if self.res_fc is not None: resval = self.res_fc(h_dst).view( h_dst.shape[0], -1, self._out_feats ) rst = rst + resval # activation if self._activation is not None: rst = self._activation(rst) return rst class GAT(nn.Module): def __init__( self, in_feats, n_classes, n_hidden, n_layers, n_heads, activation, dropout=0.0, input_drop=0.0, attn_drop=0.0, edge_drop=0.0, use_attn_dst=True, use_symmetric_norm=False, ): super().__init__() self.in_feats = in_feats self.n_hidden = n_hidden self.n_classes = n_classes self.n_layers = n_layers self.num_heads = n_heads self.convs = nn.ModuleList() self.norms = nn.ModuleList() for i in range(n_layers): in_hidden = n_heads * n_hidden if i > 0 else in_feats out_hidden = n_hidden if i < n_layers - 1 else n_classes num_heads = n_heads if i < n_layers - 1 else 1 out_channels = n_heads self.convs.append( GATConv( in_hidden, out_hidden, num_heads=num_heads, attn_drop=attn_drop, edge_drop=edge_drop, use_attn_dst=use_attn_dst, use_symmetric_norm=use_symmetric_norm, residual=True, ) ) if i < n_layers - 1: self.norms.append(nn.BatchNorm1d(out_channels * out_hidden)) self.bias_last = ElementWiseLinear( n_classes, weight=False, bias=True, inplace=True ) self.input_drop = nn.Dropout(input_drop) self.dropout = nn.Dropout(dropout) self.activation = activation def forward(self, graph, feat): h = feat h = self.input_drop(h) for i in range(self.n_layers): conv = self.convs[i](graph, h) h = conv if i < self.n_layers - 1: h = h.flatten(1) h = self.norms[i](h) h = self.activation(h, inplace=True) h = self.dropout(h) h = h.mean(1) h = self.bias_last(h) return h ================================================ FILE: examples/pytorch/ogb/ogbn-mag/README.md ================================================ ## Running The task can be run with default parameters as follows: `python hetero_rgcn.py` The following options can be specified via command line arguments: ``` optional arguments: -h, --help show this help message and exit --runs RUNS ``` ### Performance Running the task with default parameters should yield performance similar to below: ``` Final performance: All runs: Highest Train: 84.67 ± 0.37 Highest Valid: 48.75 ± 0.39 Final Train: 71.08 ± 7.09 Final Test: 47.81 ± 0.37 ``` This is a result of 10 experiments where each experiment is run for 3 epochs. In the table above, "Highest" corresponds to the maximum value over the 3 epochs and "Final" corresponds to the value obtained when evaluating with the model parameters _as they were when the Validation accuracy was its maximum_. For example, if the best Valid Accuracy was achieved at the end of epoch 2, then "Final Train" and "Final Test" are the Train and Test accuracies after epoch 2. The values reported in the table are the average and standard deviations of these metrics from 10 runs. Typically, the best Validation performance is obtained after the 1st or 2nd epoch, after which it begins to overfit. This is why "Highest Train" (typically occuring at the end of the 3rd epoch), is significantly higher than "Final Train" (corresponding to epoch of maximal Validation performance). ## Background The purpose of this example is to faithfully recreate the ogbn-mag NeighborSampling (R-GCN aggr) [PyG implementation](https://github.com/snap-stanford/ogb/blob/master/examples/nodeproppred/mag/sampler.py) using DGL's HeteroGraph API. This effort is a result of a deep-dive in [#3511](https://github.com/dmlc/dgl/issues/3511), which uncovered a number of differences between a simple R-GCN minibatch DGL implementation (e.g. like [this one](https://github.com/dmlc/dgl/blob/master/examples/pytorch/rgcn-hetero/entity_classify_mb.py)) and one specific to the OGB MAG dataset. Some examples of such differences: - Instead of reversing `(paper, cites, paper)` into a new relation like `(paper, rev-cites, paper)`, the PyG implementation instead just made these into undirected edges ([code](https://github.com/snap-stanford/ogb/blob/master/examples/nodeproppred/mag/sampler.py#L54)) - In the PyG implementation there's a separate "self" linear projection matrix for each _node-type_ ([code](https://github.com/snap-stanford/ogb/blob/master/examples/nodeproppred/mag/sampler.py#L106)). This is different from the R-GCN [paper](https://arxiv.org/abs/1703.06103), which has a single "self" linear projection matrix for each R-GCN layer, not a different one for each node-type. ### Neighborhood sampling differences Although the model architectures, hyperparameter values and initialization methods are identical between the implementation here and the PyG one as of this writing, there is still a significant difference in the way neighbors are sampled, which results in the DGL implementation achieving significantly faster overfitting to the training dataset and slightly improved performance on the Test dataset. In DGL, sampling on heterogeneous graphs with a `fanout = N` parameter means there are N samples _per incoming relation type_. In the PyG implementation, the heterogeneous graph is represented as a homogeneous graph and there are N samples total, regardless of relation type. This effectively means that given the same `fanout` value, there are R times as many neighbors sampled for DGL than PyG, where R is the number of edge-types that are directed inward to a node. Since there are significantly more nodes involved in the computation, there are likewise more nodes receiving gradient updates and therefore more significant overfitting given the same number of epochs. An effort was made to mitigate this increase by reducing the fanout from `[25, 20]` to `[6, 5]`, which gives roughly the same number of neighbors between PyG and DGL and similar final training performance. However, the DGL implementation has significantly worse Test performance in this case. This is likely due to the fact that sampling e.g., 5 nodes from 4 different edge types is not the same as sampling 20 nodes by ignoring edge type unless the edge types are uniformly distributed. ### Input features The `paper` nodes have 128-dimensional features that are derived from word embeddings of the words found in the title and abstract of the papers. Following the PyG implementation, all node types except `paper` receive 128-dimensional learnable embeddings as node features. This results in 154,029,312 learnable parameters for just the node features. ``` ParameterDict( (author): Parameter containing: [torch.FloatTensor of size 1134649x128] (field_of_study): Parameter containing: [torch.FloatTensor of size 59965x128] (institution): Parameter containing: [torch.FloatTensor of size 8740x128] ) ``` ### Model architecture The input features are passed to a modified version of the R-GCN architecture. As in the R-GCN paper, each _edge-type_ has its own linear projection matrix (the "weight" ModuleDict below). Different from the original paper, however, each _node-type_ has its own "self" linear projection matrix (the "loop_weights" ModuleDict below). There are 7 edge-types: 4 natural edge-types ("cites", "affiliated_with", "has_topic" and "writes") and 3 manufactured reverse edge-types ("rev-affiliated_with", "rev-has_topic", "rev-writes"). As mentioned above, note that there is _not_ a reverse edge type like "rev-cites", and instead the reverse edges are given the same type of "cites". This exception was presumably made because the source and destinate nodes are of type "paper". Whereas the 7 "relation" linear layers do not have a bias, the 4 "self" linear layers do. With two of these layers, a hidden dimension size of 64 and 349 output classes, we end up with 337,460 R-GCN model parameters. ================================================ FILE: examples/pytorch/ogb/ogbn-mag/hetero_rgcn.py ================================================ import argparse import itertools import sys import dgl import dgl.nn as dglnn import psutil import torch as th import torch.nn as nn import torch.nn.functional as F from dgl import AddReverse, Compose, ToSimple from dgl.nn import HeteroEmbedding from ogb.nodeproppred import DglNodePropPredDataset, Evaluator from tqdm import tqdm v_t = dgl.__version__ def prepare_data(args, device): dataset = DglNodePropPredDataset(name="ogbn-mag") split_idx = dataset.get_idx_split() # graph: dgl graph object, label: torch tensor of shape (num_nodes, num_tasks) g, labels = dataset[0] labels = labels["paper"].flatten() transform = Compose([ToSimple(), AddReverse()]) g = transform(g) print("Loaded graph: {}".format(g)) logger = Logger(args.runs) # train sampler sampler = dgl.dataloading.MultiLayerNeighborSampler([25, 20]) num_workers = args.num_workers train_loader = dgl.dataloading.DataLoader( g, split_idx["train"], sampler, batch_size=1024, shuffle=True, num_workers=num_workers, device=device, ) return g, labels, dataset.num_classes, split_idx, logger, train_loader def extract_embed(node_embed, input_nodes): emb = node_embed( {ntype: input_nodes[ntype] for ntype in input_nodes if ntype != "paper"} ) return emb def rel_graph_embed(graph, embed_size): node_num = {} for ntype in graph.ntypes: if ntype == "paper": continue node_num[ntype] = graph.num_nodes(ntype) embeds = HeteroEmbedding(node_num, embed_size) return embeds class RelGraphConvLayer(nn.Module): def __init__( self, in_feat, out_feat, ntypes, rel_names, activation=None, dropout=0.0 ): super(RelGraphConvLayer, self).__init__() self.in_feat = in_feat self.out_feat = out_feat self.ntypes = ntypes self.rel_names = rel_names self.activation = activation self.conv = dglnn.HeteroGraphConv( { rel: dglnn.GraphConv( in_feat, out_feat, norm="right", weight=False, bias=False ) for rel in rel_names } ) self.weight = nn.ModuleDict( { rel_name: nn.Linear(in_feat, out_feat, bias=False) for rel_name in self.rel_names } ) # weight for self loop self.loop_weights = nn.ModuleDict( { ntype: nn.Linear(in_feat, out_feat, bias=True) for ntype in self.ntypes } ) self.dropout = nn.Dropout(dropout) self.reset_parameters() def reset_parameters(self): for layer in self.weight.values(): layer.reset_parameters() for layer in self.loop_weights.values(): layer.reset_parameters() def forward(self, g, inputs): """ Parameters ---------- g : DGLGraph Input graph. inputs : dict[str, torch.Tensor] Node feature for each node type. Returns ------- dict[str, torch.Tensor] New node features for each node type. """ g = g.local_var() wdict = { rel_name: {"weight": self.weight[rel_name].weight.T} for rel_name in self.rel_names } inputs_dst = { k: v[: g.number_of_dst_nodes(k)] for k, v in inputs.items() } hs = self.conv(g, inputs, mod_kwargs=wdict) def _apply(ntype, h): h = h + self.loop_weights[ntype](inputs_dst[ntype]) if self.activation: h = self.activation(h) return self.dropout(h) return {ntype: _apply(ntype, h) for ntype, h in hs.items()} class EntityClassify(nn.Module): def __init__(self, g, in_dim, out_dim): super(EntityClassify, self).__init__() self.in_dim = in_dim self.h_dim = 64 self.out_dim = out_dim self.rel_names = list(set(g.etypes)) self.rel_names.sort() self.dropout = 0.5 self.layers = nn.ModuleList() # i2h self.layers.append( RelGraphConvLayer( self.in_dim, self.h_dim, g.ntypes, self.rel_names, activation=F.relu, dropout=self.dropout, ) ) # h2o self.layers.append( RelGraphConvLayer( self.h_dim, self.out_dim, g.ntypes, self.rel_names, activation=None, ) ) def reset_parameters(self): for layer in self.layers: layer.reset_parameters() def forward(self, h, blocks): for layer, block in zip(self.layers, blocks): h = layer(block, h) return h class Logger(object): r""" This class was taken directly from the PyG implementation and can be found here: https://github.com/snap-stanford/ogb/blob/master/examples/nodeproppred/mag/logger.py This was done to ensure that performance was measured in precisely the same way """ def __init__(self, runs): self.results = [[] for _ in range(runs)] def add_result(self, run, result): assert len(result) == 3 assert run >= 0 and run < len(self.results) self.results[run].append(result) def print_statistics(self, run=None): if run is not None: result = 100 * th.tensor(self.results[run]) argmax = result[:, 1].argmax().item() print(f"Run {run + 1:02d}:") print(f"Highest Train: {result[:, 0].max():.2f}") print(f"Highest Valid: {result[:, 1].max():.2f}") print(f" Final Train: {result[argmax, 0]:.2f}") print(f" Final Test: {result[argmax, 2]:.2f}") else: result = 100 * th.tensor(self.results) best_results = [] for r in result: train1 = r[:, 0].max().item() valid = r[:, 1].max().item() train2 = r[r[:, 1].argmax(), 0].item() test = r[r[:, 1].argmax(), 2].item() best_results.append((train1, valid, train2, test)) best_result = th.tensor(best_results) print(f"All runs:") r = best_result[:, 0] print(f"Highest Train: {r.mean():.2f} ± {r.std():.2f}") r = best_result[:, 1] print(f"Highest Valid: {r.mean():.2f} ± {r.std():.2f}") r = best_result[:, 2] print(f" Final Train: {r.mean():.2f} ± {r.std():.2f}") r = best_result[:, 3] print(f" Final Test: {r.mean():.2f} ± {r.std():.2f}") def train( g, model, node_embed, optimizer, train_loader, split_idx, labels, logger, device, run, ): print("start training...") category = "paper" for epoch in range(3): num_train = split_idx["train"][category].shape[0] pbar = tqdm(total=num_train) pbar.set_description(f"Epoch {epoch:02d}") model.train() total_loss = 0 for input_nodes, seeds, blocks in train_loader: blocks = [blk.to(device) for blk in blocks] seeds = seeds[ category ] # we only predict the nodes with type "category" batch_size = seeds.shape[0] input_nodes_indexes = input_nodes["paper"].to(g.device) seeds = seeds.to(labels.device) emb = extract_embed(node_embed, input_nodes) # Add the batch's raw "paper" features emb.update({"paper": g.ndata["feat"]["paper"][input_nodes_indexes]}) emb = {k: e.to(device) for k, e in emb.items()} lbl = labels[seeds].to(device) optimizer.zero_grad() logits = model(emb, blocks)[category] y_hat = logits.log_softmax(dim=-1) loss = F.nll_loss(y_hat, lbl) loss.backward() optimizer.step() total_loss += loss.item() * batch_size pbar.update(batch_size) pbar.close() loss = total_loss / num_train result = test(g, model, node_embed, labels, device, split_idx) logger.add_result(run, result) train_acc, valid_acc, test_acc = result print( f"Run: {run + 1:02d}, " f"Epoch: {epoch +1 :02d}, " f"Loss: {loss:.4f}, " f"Train: {100 * train_acc:.2f}%, " f"Valid: {100 * valid_acc:.2f}%, " f"Test: {100 * test_acc:.2f}%" ) return logger @th.no_grad() def test(g, model, node_embed, y_true, device, split_idx): model.eval() category = "paper" evaluator = Evaluator(name="ogbn-mag") # 2 GNN layers sampler = dgl.dataloading.MultiLayerFullNeighborSampler(2) loader = dgl.dataloading.DataLoader( g, {"paper": th.arange(g.num_nodes("paper"))}, sampler, batch_size=16384, shuffle=False, num_workers=0, device=device, ) pbar = tqdm(total=y_true.size(0)) pbar.set_description(f"Inference") y_hats = list() for input_nodes, seeds, blocks in loader: blocks = [blk.to(device) for blk in blocks] seeds = seeds[ category ] # we only predict the nodes with type "category" batch_size = seeds.shape[0] input_nodes_indexes = input_nodes["paper"].to(g.device) emb = extract_embed(node_embed, input_nodes) # Get the batch's raw "paper" features emb.update({"paper": g.ndata["feat"]["paper"][input_nodes_indexes]}) emb = {k: e.to(device) for k, e in emb.items()} logits = model(emb, blocks)[category] y_hat = logits.log_softmax(dim=-1).argmax(dim=1, keepdims=True) y_hats.append(y_hat.cpu()) pbar.update(batch_size) pbar.close() y_pred = th.cat(y_hats, dim=0) y_true = th.unsqueeze(y_true, 1) train_acc = evaluator.eval( { "y_true": y_true[split_idx["train"]["paper"]], "y_pred": y_pred[split_idx["train"]["paper"]], } )["acc"] valid_acc = evaluator.eval( { "y_true": y_true[split_idx["valid"]["paper"]], "y_pred": y_pred[split_idx["valid"]["paper"]], } )["acc"] test_acc = evaluator.eval( { "y_true": y_true[split_idx["test"]["paper"]], "y_pred": y_pred[split_idx["test"]["paper"]], } )["acc"] return train_acc, valid_acc, test_acc def is_support_affinity(v_t): # dgl supports enable_cpu_affinity since 0.9.1 return v_t >= "0.9.1" def main(args): device = f"cuda:0" if th.cuda.is_available() else "cpu" g, labels, num_classes, split_idx, logger, train_loader = prepare_data( args, device ) embed_layer = rel_graph_embed(g, 128).to(device) model = EntityClassify(g, 128, num_classes).to(device) print( f"Number of embedding parameters: {sum(p.numel() for p in embed_layer.parameters())}" ) print( f"Number of model parameters: {sum(p.numel() for p in model.parameters())}" ) for run in range(args.runs): try: embed_layer.reset_parameters() model.reset_parameters() except: # old pytorch version doesn't support reset_parameters() API pass # optimizer all_params = itertools.chain( model.parameters(), embed_layer.parameters() ) optimizer = th.optim.Adam(all_params, lr=0.01) if ( args.num_workers != 0 and device == "cpu" and is_support_affinity(v_t) ): expected_max = int(psutil.cpu_count(logical=False)) if args.num_workers >= expected_max: print( f"[ERROR] You specified num_workers are larger than physical cores, please set any number less than {expected_max}", file=sys.stderr, ) with train_loader.enable_cpu_affinity(): logger = train( g, model, embed_layer, optimizer, train_loader, split_idx, labels, logger, device, run, ) else: logger = train( g, model, embed_layer, optimizer, train_loader, split_idx, labels, logger, device, run, ) logger.print_statistics(run) print("Final performance: ") logger.print_statistics() if __name__ == "__main__": parser = argparse.ArgumentParser(description="RGCN") parser.add_argument("--runs", type=int, default=10) parser.add_argument("--num_workers", type=int, default=0) args = parser.parse_args() main(args) ================================================ FILE: examples/pytorch/ogb/ogbn-products/gat/README.md ================================================ # DGL examples for ogbn-products ## Sample-based GAT Requires DGL 0.4.3post2 or later versions. Run `main.py` and you should directly see the result. Accuracy over 5 runs: 0.7863197 ± 0.00072568655 ## GAT (another implementation) Requires DGL 0.5 or later versions. For the score of `GAT`, run the following command and you should directly see the result. ```bash python3 gat.py ``` Or, if you want to speed up during training time, run with `--estimation-mode` enabled. This option will do a complete evaluation when the training is over. ```bash python3 gat.py --estimation-mode ``` ## Results Here are the results over 10 runs. | Method | Validation Accuracy | Test Accuracy | #Parameters | |:-------------:|:-------------------:|:---------------:|:-----------:| | GAT (main.py) | N/A | 0.7863 ± 0.0007 | N/A | | GAT (gat.py) | 0.9327 ± 0.0003 | 0.8126 ± 0.0018 | 1,065,127 | ================================================ FILE: examples/pytorch/ogb/ogbn-products/gat/gat.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- import argparse import math import random import time from collections import OrderedDict import dgl import dgl.function as fn import matplotlib.pyplot as plt import numpy as np import torch import torch.nn.functional as F import torch.optim as optim from dgl.dataloading import ( DataLoader, MultiLayerFullNeighborSampler, MultiLayerNeighborSampler, ) from matplotlib.ticker import AutoMinorLocator, MultipleLocator from models import GAT from ogb.nodeproppred import DglNodePropPredDataset, Evaluator from torch import nn from tqdm import tqdm epsilon = 1 - math.log(2) device = None dataset = "ogbn-products" n_node_feats, n_edge_feats, n_classes = 0, 0, 0 def seed(seed=0): random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False dgl.random.seed(seed) def load_data(dataset): data = DglNodePropPredDataset(name=dataset) evaluator = Evaluator(name=dataset) splitted_idx = data.get_idx_split() train_idx, val_idx, test_idx = ( splitted_idx["train"], splitted_idx["valid"], splitted_idx["test"], ) graph, labels = data[0] graph.ndata["labels"] = labels return graph, labels, train_idx, val_idx, test_idx, evaluator def preprocess(graph, labels, train_idx): global n_node_feats, n_classes n_node_feats = graph.ndata["feat"].shape[1] n_classes = (labels.max() + 1).item() # graph = graph.remove_self_loop().add_self_loop() n_node_feats = graph.ndata["feat"].shape[-1] graph.ndata["train_labels_onehot"] = torch.zeros( graph.num_nodes(), n_classes ) graph.ndata["train_labels_onehot"][train_idx, labels[train_idx, 0]] = 1 graph.ndata["is_train"] = torch.zeros(graph.num_nodes(), dtype=torch.bool) graph.ndata["is_train"][train_idx] = 1 graph.create_formats_() return graph, labels def gen_model(args): if args.use_labels: n_node_feats_ = n_node_feats + n_classes else: n_node_feats_ = n_node_feats model = GAT( n_node_feats_, n_edge_feats, n_classes, n_layers=args.n_layers, n_heads=args.n_heads, n_hidden=args.n_hidden, edge_emb=0, activation=F.relu, dropout=args.dropout, input_drop=args.input_drop, attn_drop=args.attn_dropout, edge_drop=args.edge_drop, use_attn_dst=not args.no_attn_dst, allow_zero_in_degree=True, residual=False, ) return model def custom_loss_function(x, labels): y = F.cross_entropy(x, labels[:, 0], reduction="none") y = torch.log(epsilon + y) - math.log(epsilon) return torch.mean(y) def add_soft_labels(graph, soft_labels): feat = graph.srcdata["feat"] graph.srcdata["feat"] = torch.cat([feat, soft_labels], dim=-1) def update_hard_labels(graph, idx=None): if idx is None: idx = torch.arange(graph.srcdata["is_train"].shape[0])[ graph.srcdata["is_train"] ] graph.srcdata["feat"][idx, -n_classes:] = graph.srcdata[ "train_labels_onehot" ][idx] def train( args, model, dataloader, labels, train_idx, criterion, optimizer, evaluator ): model.train() loss_sum, total = 0, 0 preds = torch.zeros(labels.shape[0], n_classes) for it in range(args.n_label_iters + 1): preds_old = preds.clone() for input_nodes, output_nodes, subgraphs in dataloader: subgraphs = [b.to(device) for b in subgraphs] new_train_idx = torch.arange(len(output_nodes)) if args.use_labels: mask = torch.rand(new_train_idx.shape) < args.mask_rate train_labels_idx = torch.cat( [ new_train_idx[~mask], torch.arange(len(output_nodes), len(input_nodes)), ] ) train_pred_idx = new_train_idx[mask] add_soft_labels( subgraphs[0], F.softmax(preds_old[input_nodes].to(device), dim=-1), ) update_hard_labels(subgraphs[0], train_labels_idx) else: train_pred_idx = new_train_idx pred = model(subgraphs) preds[output_nodes] = pred.cpu().detach() # NOTE: This is not a complete implementation of label reuse, since it is too expensive # to predict the nodes in validation and test set during training time. if it == args.n_label_iters: loss = criterion( pred[train_pred_idx], subgraphs[-1].dstdata["labels"][train_pred_idx], ) optimizer.zero_grad() loss.backward() optimizer.step() count = len(train_pred_idx) loss_sum += loss.item() * count total += count torch.cuda.empty_cache() return ( evaluator(preds[train_idx], labels[train_idx]), loss_sum / total, ) @torch.no_grad() def evaluate( args, model, dataloader, labels, train_idx, val_idx, test_idx, criterion, evaluator, ): model.eval() # Due to the limitation of memory capacity, we calculate the average of logits 'eval_times' times. eval_times = 1 preds_avg = torch.zeros(labels.shape[0], n_classes) for _ in range(eval_times): preds = torch.zeros(labels.shape[0], n_classes) for _it in range(args.n_label_iters + 1): preds_old = preds.clone() for input_nodes, output_nodes, subgraphs in dataloader: subgraphs = [b.to(device) for b in subgraphs] if args.use_labels: add_soft_labels( subgraphs[0], F.softmax(preds_old[input_nodes].to(device), dim=-1), ) update_hard_labels(subgraphs[0]) pred = model(subgraphs, inference=True) preds[output_nodes] = pred.cpu() torch.cuda.empty_cache() preds_avg += preds preds_avg = preds_avg.to(device) preds_avg /= eval_times train_loss = criterion(preds_avg[train_idx], labels[train_idx]).item() val_loss = criterion(preds_avg[val_idx], labels[val_idx]).item() test_loss = criterion(preds_avg[test_idx], labels[test_idx]).item() return ( evaluator(preds_avg[train_idx], labels[train_idx]), evaluator(preds_avg[val_idx], labels[val_idx]), evaluator(preds_avg[test_idx], labels[test_idx]), train_loss, val_loss, test_loss, ) def run( args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running ): evaluator_wrapper = lambda pred, labels: evaluator.eval( {"y_pred": pred.argmax(dim=-1, keepdim=True), "y_true": labels} )["acc"] criterion = custom_loss_function n_train_samples = train_idx.shape[0] train_batch_size = (n_train_samples + 29) // 30 train_sampler = MultiLayerNeighborSampler( [10 for _ in range(args.n_layers)] ) train_dataloader = DataLoader( graph.cpu(), train_idx.cpu(), train_sampler, batch_size=train_batch_size, shuffle=True, num_workers=4, ) eval_batch_size = 32768 eval_sampler = MultiLayerNeighborSampler([15 for _ in range(args.n_layers)]) if args.estimation_mode: test_idx_during_training = test_idx[ torch.arange(start=0, end=len(test_idx), step=45) ] else: test_idx_during_training = test_idx eval_idx = torch.cat( [train_idx.cpu(), val_idx.cpu(), test_idx_during_training.cpu()] ) eval_dataloader = DataLoader( graph.cpu(), eval_idx, eval_sampler, batch_size=eval_batch_size, shuffle=False, num_workers=4, ) model = gen_model(args).to(device) optimizer = optim.AdamW( model.parameters(), lr=args.lr, weight_decay=args.wd ) lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode="max", factor=0.7, patience=20, min_lr=1e-4, ) best_model_state_dict = None total_time = 0 val_score, best_val_score, final_test_score = 0, 0, 0 scores, train_scores, val_scores, test_scores = [], [], [], [] losses, train_losses, val_losses, test_losses = [], [], [], [] for epoch in range(1, args.n_epochs + 1): tic = time.time() score, loss = train( args, model, train_dataloader, labels, train_idx, criterion, optimizer, evaluator_wrapper, ) toc = time.time() total_time += toc - tic if ( epoch == args.n_epochs or epoch % args.eval_every == 0 or epoch % args.log_every == 0 ): ( train_score, val_score, test_score, train_loss, val_loss, test_loss, ) = evaluate( args, model, eval_dataloader, labels, train_idx, val_idx, test_idx_during_training, criterion, evaluator_wrapper, ) if val_score > best_val_score: best_val_score = val_score final_test_score = test_score if args.estimation_mode: best_model_state_dict = { k: v.to("cpu") for k, v in model.state_dict().items() } if epoch == args.n_epochs or epoch % args.log_every == 0: print( f"Run: {n_running}/{args.n_runs}, Epoch: {epoch}/{args.n_epochs}, Average epoch time: {total_time / epoch:.2f}\n" f"Loss: {loss:.4f}, Score: {score:.4f}\n" f"Train/Val/Test loss: {train_loss:.4f}/{val_loss:.4f}/{test_loss:.4f}\n" f"Train/Val/Test/Best val/Final test score: {train_score:.4f}/{val_score:.4f}/{test_score:.4f}/{best_val_score:.4f}/{final_test_score:.4f}" ) for l, e in zip( [ scores, train_scores, val_scores, test_scores, losses, train_losses, val_losses, test_losses, ], [ score, train_score, val_score, test_score, loss, train_loss, val_loss, test_loss, ], ): l.append(e) lr_scheduler.step(val_score) if args.estimation_mode: model.load_state_dict(best_model_state_dict) eval_dataloader = DataLoader( graph.cpu(), test_idx.cpu(), eval_sampler, batch_size=eval_batch_size, shuffle=False, num_workers=4, ) final_test_score = evaluate( args, model, eval_dataloader, labels, train_idx, val_idx, test_idx, criterion, evaluator_wrapper, )[2] print("*" * 50) print( f"Best val score: {best_val_score}, Final test score: {final_test_score}" ) print("*" * 50) if args.plot_curves: fig = plt.figure(figsize=(24, 24)) ax = fig.gca() ax.set_xticks(np.arange(0, args.n_epochs, 100)) ax.set_yticks(np.linspace(0, 1.0, 101)) ax.tick_params(labeltop=True, labelright=True) for y, label in zip( [train_scores, val_scores, test_scores], ["train score", "val score", "test score"], ): plt.plot( range(1, args.n_epochs + 1, args.log_every), y, label=label, linewidth=1, ) ax.xaxis.set_major_locator(MultipleLocator(10)) ax.xaxis.set_minor_locator(AutoMinorLocator(1)) ax.yaxis.set_major_locator(MultipleLocator(0.01)) ax.yaxis.set_minor_locator(AutoMinorLocator(2)) plt.grid(which="major", color="red", linestyle="dotted") plt.grid(which="minor", color="orange", linestyle="dotted") plt.legend() plt.tight_layout() plt.savefig(f"gat_score_{n_running}.png") fig = plt.figure(figsize=(24, 24)) ax = fig.gca() ax.set_xticks(np.arange(0, args.n_epochs, 100)) ax.tick_params(labeltop=True, labelright=True) for y, label in zip( [losses, train_losses, val_losses, test_losses], ["loss", "train loss", "val loss", "test loss"], ): plt.plot( range(1, args.n_epochs + 1, args.log_every), y, label=label, linewidth=1, ) ax.xaxis.set_major_locator(MultipleLocator(10)) ax.xaxis.set_minor_locator(AutoMinorLocator(1)) ax.yaxis.set_major_locator(MultipleLocator(0.1)) ax.yaxis.set_minor_locator(AutoMinorLocator(5)) plt.grid(which="major", color="red", linestyle="dotted") plt.grid(which="minor", color="orange", linestyle="dotted") plt.legend() plt.tight_layout() plt.savefig(f"gat_loss_{n_running}.png") return best_val_score, final_test_score def count_parameters(args): model = gen_model(args) return sum( [np.prod(p.size()) for p in model.parameters() if p.requires_grad] ) def main(): global device argparser = argparse.ArgumentParser( "GAT implementation on ogbn-products", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) argparser.add_argument( "--cpu", action="store_true", help="CPU mode. This option overrides '--gpu'.", ) argparser.add_argument("--gpu", type=int, default=0, help="GPU device ID") argparser.add_argument("--seed", type=int, default=0, help="seed") argparser.add_argument( "--n-runs", type=int, default=10, help="running times" ) argparser.add_argument( "--n-epochs", type=int, default=250, help="number of epochs" ) argparser.add_argument( "--use-labels", action="store_true", help="Use labels in the training set as input features.", ) argparser.add_argument( "--n-label-iters", type=int, default=0, help="number of label iterations", ) argparser.add_argument( "--no-attn-dst", action="store_true", help="Don't use attn_dst." ) argparser.add_argument( "--mask-rate", type=float, default=0.5, help="mask rate" ) argparser.add_argument( "--n-heads", type=int, default=4, help="number of heads" ) argparser.add_argument( "--lr", type=float, default=0.01, help="learning rate" ) argparser.add_argument( "--n-layers", type=int, default=3, help="number of layers" ) argparser.add_argument( "--n-hidden", type=int, default=120, help="number of hidden units" ) argparser.add_argument( "--dropout", type=float, default=0.5, help="dropout rate" ) argparser.add_argument( "--input-drop", type=float, default=0.1, help="input drop rate" ) argparser.add_argument( "--attn-dropout", type=float, default=0.0, help="attention drop rate" ) argparser.add_argument( "--edge-drop", type=float, default=0.1, help="edge drop rate" ) argparser.add_argument("--wd", type=float, default=0, help="weight decay") argparser.add_argument( "--eval-every", type=int, default=2, help="log every EVAL_EVERY epochs" ) argparser.add_argument( "--estimation-mode", action="store_true", help="Estimate the score of test set for speed during training.", ) argparser.add_argument( "--log-every", type=int, default=2, help="log every LOG_EVERY epochs" ) argparser.add_argument( "--plot-curves", action="store_true", help="plot learning curves" ) args = argparser.parse_args() if args.cpu: device = torch.device("cpu") else: device = torch.device("cuda:%d" % args.gpu) # load data & preprocess graph, labels, train_idx, val_idx, test_idx, evaluator = load_data(dataset) graph, labels = preprocess(graph, labels, train_idx) labels, train_idx, val_idx, test_idx = map( lambda x: x.to(device), (labels, train_idx, val_idx, test_idx) ) # run val_scores, test_scores = [], [] for i in range(1, args.n_runs + 1): seed(args.seed + i) val_score, test_score = run( args, graph, labels, train_idx, val_idx, test_idx, evaluator, i ) val_scores.append(val_score) test_scores.append(test_score) print(args) print(f"Runned {args.n_runs} times") print("Val scores:", val_scores) print("Test scores:", test_scores) print(f"Average val score: {np.mean(val_scores)} ± {np.std(val_scores)}") print(f"Average test score: {np.mean(test_scores)} ± {np.std(test_scores)}") print(f"Number of params: {count_parameters(args)}") if args.estimation_mode: print( "WARNING: Estimation mode is enabled. The final test score is accurate, but not accurate during training time." ) if __name__ == "__main__": main() # Namespace(attn_dropout=0.0, cpu=False, dropout=0.5, edge_drop=0.1, estimation_mode=True, eval_every=2, gpu=1, input_drop=0.1, log_every=2, lr=0.01, mask_rate=0.5, n_epochs=250, n_heads=4, n_hidden=120, n_label_iters=0, n_layers=3, n_runs=10, no_attn_dst=False, plot_curves=True, seed=0, use_labels=False, wd=0) # Runned 10 times # Val scores: [0.9326348447473489, 0.9330163008926073, 0.9327619967957684, 0.932355110240826, 0.9330163008926073, 0.9327365663860845, 0.9329145792538718, 0.9322788190117742, 0.9321516669633548, 0.9329908704829235] # Test scores: [0.8147550191112792, 0.8115680737936217, 0.8128332725586069, 0.8134062268564646, 0.8118784993477448, 0.8145462613150566, 0.8151228304665284, 0.8115274066904614, 0.8108545920615103, 0.8094583548530088] # Average val score: 0.9326857055667167 ± 0.00030580001557474636 # Average test score: 0.8125950537054282 ± 0.001765025824381352 # Number of params: 1065127 # Namespace(attn_dropout=0.0, cpu=False, dropout=0.5, edge_drop=0.1, estimation_mode=True, eval_every=2, gpu=0, input_drop=0.1, log_every=2, lr=0.01, mask_rate=0.5, n_epochs=250, n_heads=4, n_hidden=120, n_label_iters=0, n_layers=3, n_runs=5, no_attn_dst=True, plot_curves=True, seed=0, use_labels=False, wd=0) # Runned 10 times # Val scores: [0.9332451745797625, 0.9330417313022913, 0.9328128576151362, 0.9323296798311421, 0.9324568318795616, 0.9327874272054523, 0.9327619967957684, 0.9328128576151362, 0.9322025277827226, 0.9329400096635557] # Test scores: [0.8103399272781824, 0.8115870517750965, 0.8107294277551171, 0.8115771109276573, 0.8130244079434601, 0.8094628734200265, 0.8105681149125815, 0.809217063374258, 0.8108085026779287, 0.8151549122923549] # Average val score: 0.932739109427053 ± 0.0003061065079170266 # Average test score: 0.8112469392356664 ± 0.0016644261188834386 # Number of params: 1060887 ================================================ FILE: examples/pytorch/ogb/ogbn-products/gat/main.py ================================================ import argparse import time import dgl import dgl.nn.pytorch as dglnn import numpy as np import torch as th import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import tqdm from ogb.nodeproppred import DglNodePropPredDataset class GAT(nn.Module): def __init__( self, in_feats, n_hidden, n_classes, n_layers, num_heads, activation ): super().__init__() self.n_layers = n_layers self.n_hidden = n_hidden self.n_classes = n_classes self.layers = nn.ModuleList() self.layers.append( dglnn.GATConv( (in_feats, in_feats), n_hidden, num_heads=num_heads, activation=activation, ) ) for i in range(1, n_layers - 1): self.layers.append( dglnn.GATConv( (n_hidden * num_heads, n_hidden * num_heads), n_hidden, num_heads=num_heads, activation=activation, ) ) self.layers.append( dglnn.GATConv( (n_hidden * num_heads, n_hidden * num_heads), n_classes, num_heads=num_heads, activation=None, ) ) def forward(self, blocks, x): h = x for l, (layer, block) in enumerate(zip(self.layers, blocks)): # We need to first copy the representation of nodes on the RHS from the # appropriate nodes on the LHS. # Note that the shape of h is (num_nodes_LHS, D) and the shape of h_dst # would be (num_nodes_RHS, D) h_dst = h[: block.num_dst_nodes()] # Then we compute the updated representation on the RHS. # The shape of h now becomes (num_nodes_RHS, D) if l < self.n_layers - 1: h = layer(block, (h, h_dst)).flatten(1) else: h = layer(block, (h, h_dst)) h = h.mean(1) return h.log_softmax(dim=-1) def inference(self, g, x, num_heads, device): """ Inference with the GAT model on full neighbors (i.e. without neighbor sampling). g : the entire graph. x : the input of entire node set. The inference code is written in a fashion that it could handle any number of nodes and layers. """ # During inference with sampling, multi-layer blocks are very inefficient because # lots of computations in the first few layers are repeated. # Therefore, we compute the representation of all nodes layer by layer. The nodes # on each layer are of course splitted in batches. # TODO: can we standardize this? for l, layer in enumerate(self.layers): if l < self.n_layers - 1: y = th.zeros( g.num_nodes(), self.n_hidden * num_heads if l != len(self.layers) - 1 else self.n_classes, ) else: y = th.zeros( g.num_nodes(), self.n_hidden if l != len(self.layers) - 1 else self.n_classes, ) sampler = dgl.dataloading.MultiLayerFullNeighborSampler(1) dataloader = dgl.dataloading.DataLoader( g, th.arange(g.num_nodes()), sampler, batch_size=args.batch_size, shuffle=True, drop_last=False, num_workers=args.num_workers, ) for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader): block = blocks[0].int().to(device) h = x[input_nodes].to(device) h_dst = h[: block.num_dst_nodes()] if l < self.n_layers - 1: h = layer(block, (h, h_dst)).flatten(1) else: h = layer(block, (h, h_dst)) h = h.mean(1) h = h.log_softmax(dim=-1) y[output_nodes] = h.cpu() x = y return y.to(device) def compute_acc(pred, labels): """ Compute the accuracy of prediction given the labels. """ return (th.argmax(pred, dim=1) == labels).float().sum() / len(pred) def evaluate(model, g, nfeat, labels, val_nid, test_nid, num_heads, device): """ Evaluate the model on the validation set specified by ``val_mask``. g : The entire graph. inputs : The features of all the nodes. labels : The labels of all the nodes. val_mask : A 0-1 mask indicating which nodes do we actually compute the accuracy for. batch_size : Number of nodes to compute at the same time. device : The GPU device to evaluate on. """ model.eval() with th.no_grad(): pred = model.inference(g, nfeat, num_heads, device) model.train() return ( compute_acc(pred[val_nid], labels[val_nid]), compute_acc(pred[test_nid], labels[test_nid]), pred, ) def load_subtensor(nfeat, labels, seeds, input_nodes): """ Extracts features and labels for a set of nodes. """ batch_inputs = nfeat[input_nodes] batch_labels = labels[seeds] return batch_inputs, batch_labels #### Entry point def run(args, device, data): # Unpack data ( train_nid, val_nid, test_nid, in_feats, labels, n_classes, nfeat, g, num_heads, ) = data # Create PyTorch DataLoader for constructing blocks sampler = dgl.dataloading.MultiLayerNeighborSampler( [int(fanout) for fanout in args.fan_out.split(",")] ) dataloader = dgl.dataloading.DataLoader( g, train_nid, sampler, batch_size=args.batch_size, shuffle=True, drop_last=False, num_workers=args.num_workers, ) # Define model and optimizer model = GAT( in_feats, args.num_hidden, n_classes, args.num_layers, num_heads, F.relu ) model = model.to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd) # Training loop avg = 0 iter_tput = [] best_eval_acc = 0 best_test_acc = 0 for epoch in range(args.num_epochs): tic = time.time() # Loop over the dataloader to sample the computation dependency graph as a list of # blocks. for step, (input_nodes, seeds, blocks) in enumerate(dataloader): tic_step = time.time() # copy block to gpu blocks = [blk.to(device) for blk in blocks] # Load the input features as well as output labels batch_inputs, batch_labels = load_subtensor( nfeat, labels, seeds, input_nodes ) # Compute loss and prediction batch_pred = model(blocks, batch_inputs) loss = F.nll_loss(batch_pred, batch_labels) optimizer.zero_grad() loss.backward() optimizer.step() iter_tput.append(len(seeds) / (time.time() - tic_step)) if step % args.log_every == 0: acc = compute_acc(batch_pred, batch_labels) gpu_mem_alloc = ( th.cuda.max_memory_allocated() / 1000000 if th.cuda.is_available() else 0 ) print( "Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train Acc {:.4f} | Speed (samples/sec) {:.4f} | GPU {:.1f} MB".format( epoch, step, loss.item(), acc.item(), np.mean(iter_tput[3:]), gpu_mem_alloc, ) ) toc = time.time() print("Epoch Time(s): {:.4f}".format(toc - tic)) if epoch >= 5: avg += toc - tic if epoch % args.eval_every == 0 and epoch != 0: eval_acc, test_acc, pred = evaluate( model, g, nfeat, labels, val_nid, test_nid, num_heads, device ) if args.save_pred: np.savetxt( args.save_pred + "%02d" % epoch, pred.argmax(1).cpu().numpy(), "%d", ) print("Eval Acc {:.4f}".format(eval_acc)) if eval_acc > best_eval_acc: best_eval_acc = eval_acc best_test_acc = test_acc print( "Best Eval Acc {:.4f} Test Acc {:.4f}".format( best_eval_acc, best_test_acc ) ) print("Avg epoch time: {}".format(avg / (epoch - 4))) return best_test_acc if __name__ == "__main__": argparser = argparse.ArgumentParser("multi-gpu training") argparser.add_argument( "--gpu", type=int, default=0, help="GPU device ID. Use -1 for CPU training", ) argparser.add_argument("--num-epochs", type=int, default=100) argparser.add_argument("--num-hidden", type=int, default=128) argparser.add_argument("--num-layers", type=int, default=3) argparser.add_argument("--fan-out", type=str, default="10,10,10") argparser.add_argument("--batch-size", type=int, default=512) argparser.add_argument("--val-batch-size", type=int, default=512) argparser.add_argument("--log-every", type=int, default=20) argparser.add_argument("--eval-every", type=int, default=1) argparser.add_argument("--lr", type=float, default=0.001) argparser.add_argument( "--num-workers", type=int, default=8, help="Number of sampling processes. Use 0 for no extra process.", ) argparser.add_argument("--save-pred", type=str, default="") argparser.add_argument("--head", type=int, default=4) argparser.add_argument("--wd", type=float, default=0) args = argparser.parse_args() if args.gpu >= 0: device = th.device("cuda:%d" % args.gpu) else: device = th.device("cpu") # load data data = DglNodePropPredDataset(name="ogbn-products") splitted_idx = data.get_idx_split() train_idx, val_idx, test_idx = ( splitted_idx["train"], splitted_idx["valid"], splitted_idx["test"], ) graph, labels = data[0] nfeat = graph.ndata.pop("feat").to(device) labels = labels[:, 0].to(device) print("Total edges before adding self-loop {}".format(graph.num_edges())) graph = graph.remove_self_loop().add_self_loop() print("Total edges after adding self-loop {}".format(graph.num_edges())) in_feats = nfeat.shape[1] n_classes = (labels.max() + 1).item() # Create csr/coo/csc formats before launching sampling processes # This avoids creating certain formats in each data loader process, which saves momory and CPU. graph.create_formats_() # Pack data data = ( train_idx, val_idx, test_idx, in_feats, labels, n_classes, nfeat, graph, args.head, ) # Run 10 times test_accs = [] for i in range(10): test_accs.append(run(args, device, data).cpu().numpy()) print( "Average test accuracy:", np.mean(test_accs), "±", np.std(test_accs) ) ================================================ FILE: examples/pytorch/ogb/ogbn-products/gat/models.py ================================================ # update time: 2020.11.02 17:33 import torch import torch.nn as nn import torch.nn.functional as F from dgl import function as fn from dgl.ops import edge_softmax from dgl.utils import expand_as_pair class GATConv(nn.Module): def __init__( self, node_feats, edge_feats, out_feats, n_heads=1, attn_drop=0.0, edge_drop=0.0, negative_slope=0.2, residual=True, activation=None, use_attn_dst=True, allow_zero_in_degree=True, use_symmetric_norm=False, ): super(GATConv, self).__init__() self._n_heads = n_heads self._in_src_feats, self._in_dst_feats = expand_as_pair(node_feats) self._out_feats = out_feats self._allow_zero_in_degree = allow_zero_in_degree self._use_symmetric_norm = use_symmetric_norm # feat fc self.src_fc = nn.Linear( self._in_src_feats, out_feats * n_heads, bias=False ) if residual: self.dst_fc = nn.Linear(self._in_src_feats, out_feats * n_heads) self.bias = None else: self.dst_fc = None self.bias = nn.Parameter(out_feats * n_heads) # attn fc self.attn_src_fc = nn.Linear(self._in_src_feats, n_heads, bias=False) if use_attn_dst: self.attn_dst_fc = nn.Linear( self._in_src_feats, n_heads, bias=False ) else: self.attn_dst_fc = None if edge_feats > 0: self.attn_edge_fc = nn.Linear(edge_feats, n_heads, bias=False) else: self.attn_edge_fc = None self.attn_drop = nn.Dropout(attn_drop) self.edge_drop = edge_drop self.leaky_relu = nn.LeakyReLU(negative_slope, inplace=True) self.activation = activation self.reset_parameters() def reset_parameters(self): gain = nn.init.calculate_gain("relu") nn.init.xavier_normal_(self.src_fc.weight, gain=gain) if self.dst_fc is not None: nn.init.xavier_normal_(self.dst_fc.weight, gain=gain) nn.init.xavier_normal_(self.attn_src_fc.weight, gain=gain) if self.attn_dst_fc is not None: nn.init.xavier_normal_(self.attn_dst_fc.weight, gain=gain) if self.attn_edge_fc is not None: nn.init.xavier_normal_(self.attn_edge_fc.weight, gain=gain) if self.bias is not None: nn.init.zeros_(self.bias) def set_allow_zero_in_degree(self, set_value): self._allow_zero_in_degree = set_value def forward(self, graph, feat_src, feat_edge=None): with graph.local_scope(): if not self._allow_zero_in_degree: if (graph.in_degrees() == 0).any(): assert False if graph.is_block: feat_dst = feat_src[: graph.number_of_dst_nodes()] else: feat_dst = feat_src if self._use_symmetric_norm: degs = graph.out_degrees().float().clamp(min=1) norm = torch.pow(degs, -0.5) shp = norm.shape + (1,) * (feat_src.dim() - 1) norm = torch.reshape(norm, shp) feat_src = feat_src * norm feat_src_fc = self.src_fc(feat_src).view( -1, self._n_heads, self._out_feats ) feat_dst_fc = self.dst_fc(feat_dst).view( -1, self._n_heads, self._out_feats ) attn_src = self.attn_src_fc(feat_src).view(-1, self._n_heads, 1) # NOTE: GAT paper uses "first concatenation then linear projection" # to compute attention scores, while ours is "first projection then # addition", the two approaches are mathematically equivalent: # We decompose the weight vector a mentioned in the paper into # [a_l || a_r], then # a^T [Wh_i || Wh_j] = a_l Wh_i + a_r Wh_j # Our implementation is much efficient because we do not need to # save [Wh_i || Wh_j] on edges, which is not memory-efficient. Plus, # addition could be optimized with DGL's built-in function u_add_v, # which further speeds up computation and saves memory footprint. graph.srcdata.update( {"feat_src_fc": feat_src_fc, "attn_src": attn_src} ) if self.attn_dst_fc is not None: attn_dst = self.attn_dst_fc(feat_dst).view(-1, self._n_heads, 1) graph.dstdata.update({"attn_dst": attn_dst}) graph.apply_edges( fn.u_add_v("attn_src", "attn_dst", "attn_node") ) else: graph.apply_edges(fn.copy_u("attn_src", "attn_node")) e = graph.edata["attn_node"] if feat_edge is not None: attn_edge = self.attn_edge_fc(feat_edge).view( -1, self._n_heads, 1 ) graph.edata.update({"attn_edge": attn_edge}) e += graph.edata["attn_edge"] e = self.leaky_relu(e) if self.training and self.edge_drop > 0: perm = torch.randperm(graph.num_edges(), device=e.device) bound = int(graph.num_edges() * self.edge_drop) eids = perm[bound:] graph.edata["a"] = torch.zeros_like(e) graph.edata["a"][eids] = self.attn_drop( edge_softmax(graph, e[eids], eids=eids) ) else: graph.edata["a"] = self.attn_drop(edge_softmax(graph, e)) # message passing graph.update_all( fn.u_mul_e("feat_src_fc", "a", "m"), fn.sum("m", "feat_src_fc") ) rst = graph.dstdata["feat_src_fc"] if self._use_symmetric_norm: degs = graph.in_degrees().float().clamp(min=1) norm = torch.pow(degs, 0.5) shp = norm.shape + (1,) * (feat_dst.dim()) norm = torch.reshape(norm, shp) rst = rst * norm # residual if self.dst_fc is not None: rst += feat_dst_fc else: rst += self.bias # activation if self.activation is not None: rst = self.activation(rst, inplace=True) return rst class GAT(nn.Module): def __init__( self, node_feats, edge_feats, n_classes, n_layers, n_heads, n_hidden, edge_emb, activation, dropout, input_drop, attn_drop, edge_drop, use_attn_dst=True, allow_zero_in_degree=False, residual=False, ): super().__init__() self.n_layers = n_layers self.n_heads = n_heads self.n_hidden = n_hidden self.n_classes = n_classes self.convs = nn.ModuleList() self.norms = nn.ModuleList() self.node_encoder = nn.Linear(node_feats, n_hidden) if edge_emb > 0: self.edge_encoder = nn.ModuleList() else: self.edge_encoder = None for i in range(n_layers): in_hidden = n_heads * n_hidden if i > 0 else node_feats out_hidden = n_hidden if self.edge_encoder is not None: self.edge_encoder.append(nn.Linear(edge_feats, edge_emb)) self.convs.append( GATConv( in_hidden, edge_emb, out_hidden, n_heads=n_heads, attn_drop=attn_drop, edge_drop=edge_drop, use_attn_dst=use_attn_dst, allow_zero_in_degree=allow_zero_in_degree, ) ) self.norms.append(nn.BatchNorm1d(n_heads * out_hidden)) self.pred_linear = nn.Linear(n_heads * n_hidden, n_classes) self.input_drop = nn.Dropout(input_drop) self.dropout = nn.Dropout(dropout) self.activation = activation self.residual = residual def forward(self, g, inference=False): if not isinstance(g, list): subgraphs = [g] * self.n_layers else: subgraphs = g h = subgraphs[0].srcdata["feat"] h = self.input_drop(h) h_last = None for i in range(self.n_layers): if self.edge_encoder is not None: efeat = subgraphs[i].edata["feat"] efeat_emb = self.edge_encoder[i](efeat) efeat_emb = F.relu(efeat_emb, inplace=True) else: efeat_emb = None h = self.convs[i](subgraphs[i], h, efeat_emb).flatten(1, -1) if self.residual and h_last is not None: h += h_last[: h.shape[0], :] h_last = h h = self.norms[i](h) h = self.activation(h, inplace=True) h = self.dropout(h) if inference: torch.cuda.empty_cache() h = self.pred_linear(h) return h class MLP(nn.Module): def __init__( self, in_feats, n_classes, n_layers, n_hidden, activation, dropout=0.0, input_drop=0.0, residual=False, ): super().__init__() self.n_layers = n_layers self.n_hidden = n_hidden self.n_classes = n_classes self.linears = nn.ModuleList() self.norms = nn.ModuleList() for i in range(n_layers): in_hidden = n_hidden if i > 0 else in_feats out_hidden = n_hidden if i < n_layers - 1 else n_classes self.linears.append(nn.Linear(in_hidden, out_hidden)) if i < n_layers - 1: self.norms.append(nn.BatchNorm1d(out_hidden)) self.activation = activation self.input_drop = nn.Dropout(input_drop) self.dropout = nn.Dropout(dropout) self.residual = residual def forward(self, h): h = self.input_drop(h) h_last = None for i in range(self.n_layers): h = self.linears[i](h) if self.residual and 0 < i < self.n_layers - 1: h += h_last h_last = h if i < self.n_layers - 1: h = self.norms[i](h) h = self.activation(h, inplace=True) h = self.dropout(h) return h ================================================ FILE: examples/pytorch/ogb/ogbn-products/graphsage/README.md ================================================ # GraphSAGE on OGB Products Requires DGL 0.4.3post2 or later versions. Run `main.py` and you should directly see the result. Accuracy over 10 runs: 0.7828772 ± 0.001568163 ================================================ FILE: examples/pytorch/ogb/ogbn-products/graphsage/main.py ================================================ import argparse import time import dgl import dgl.nn.pytorch as dglnn import numpy as np import torch as th import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import tqdm from ogb.nodeproppred import DglNodePropPredDataset class SAGE(nn.Module): def __init__( self, in_feats, n_hidden, n_classes, n_layers, activation, dropout ): super().__init__() self.n_layers = n_layers self.n_hidden = n_hidden self.n_classes = n_classes self.layers = nn.ModuleList() self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, "mean")) for i in range(1, n_layers - 1): self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, "mean")) self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, "mean")) self.dropout = nn.Dropout(dropout) self.activation = activation def forward(self, blocks, x): h = x for l, (layer, block) in enumerate(zip(self.layers, blocks)): # We need to first copy the representation of nodes on the RHS from the # appropriate nodes on the LHS. # Note that the shape of h is (num_nodes_LHS, D) and the shape of h_dst # would be (num_nodes_RHS, D) h_dst = h[: block.num_dst_nodes()] # Then we compute the updated representation on the RHS. # The shape of h now becomes (num_nodes_RHS, D) h = layer(block, (h, h_dst)) if l != len(self.layers) - 1: h = self.activation(h) h = self.dropout(h) return h def inference(self, g, x, device): """ Inference with the GraphSAGE model on full neighbors (i.e. without neighbor sampling). g : the entire graph. x : the input of entire node set. The inference code is written in a fashion that it could handle any number of nodes and layers. """ # During inference with sampling, multi-layer blocks are very inefficient because # lots of computations in the first few layers are repeated. # Therefore, we compute the representation of all nodes layer by layer. The nodes # on each layer are of course splitted in batches. # TODO: can we standardize this? for l, layer in enumerate(self.layers): y = th.zeros( g.num_nodes(), self.n_hidden if l != len(self.layers) - 1 else self.n_classes, ).to(device) sampler = dgl.dataloading.MultiLayerFullNeighborSampler(1) dataloader = dgl.dataloading.DataLoader( g, th.arange(g.num_nodes()), sampler, batch_size=args.batch_size, shuffle=True, drop_last=False, num_workers=args.num_workers, ) for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader): block = blocks[0].int().to(device) h = x[input_nodes] h_dst = h[: block.num_dst_nodes()] h = layer(block, (h, h_dst)) if l != len(self.layers) - 1: h = self.activation(h) h = self.dropout(h) y[output_nodes] = h x = y return y def compute_acc(pred, labels): """ Compute the accuracy of prediction given the labels. """ return (th.argmax(pred, dim=1) == labels).float().sum() / len(pred) def evaluate(model, g, nfeat, labels, val_nid, test_nid, device): """ Evaluate the model on the validation set specified by ``val_mask``. g : The entire graph. inputs : The features of all the nodes. labels : The labels of all the nodes. val_mask : A 0-1 mask indicating which nodes do we actually compute the accuracy for. device : The GPU device to evaluate on. """ model.eval() with th.no_grad(): pred = model.inference(g, nfeat, device) model.train() return ( compute_acc(pred[val_nid], labels[val_nid]), compute_acc(pred[test_nid], labels[test_nid]), pred, ) def load_subtensor(nfeat, labels, seeds, input_nodes): """ Extracts features and labels for a set of nodes. """ batch_inputs = nfeat[input_nodes] batch_labels = labels[seeds] return batch_inputs, batch_labels #### Entry point def run(args, device, data): # Unpack data train_nid, val_nid, test_nid, in_feats, labels, n_classes, nfeat, g = data # Create PyTorch DataLoader for constructing blocks sampler = dgl.dataloading.MultiLayerNeighborSampler( [int(fanout) for fanout in args.fan_out.split(",")] ) dataloader = dgl.dataloading.DataLoader( g, train_nid, sampler, batch_size=args.batch_size, shuffle=True, drop_last=False, num_workers=args.num_workers, ) # Define model and optimizer model = SAGE( in_feats, args.num_hidden, n_classes, args.num_layers, F.relu, args.dropout, ) model = model.to(device) loss_fcn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd) # Training loop avg = 0 iter_tput = [] best_eval_acc = 0 best_test_acc = 0 for epoch in range(args.num_epochs): tic = time.time() # Loop over the dataloader to sample the computation dependency graph as a list of # blocks. for step, (input_nodes, seeds, blocks) in enumerate(dataloader): tic_step = time.time() # copy block to gpu blocks = [blk.int().to(device) for blk in blocks] # Load the input features as well as output labels batch_inputs, batch_labels = load_subtensor( nfeat, labels, seeds, input_nodes ) # Compute loss and prediction batch_pred = model(blocks, batch_inputs) loss = loss_fcn(batch_pred, batch_labels) optimizer.zero_grad() loss.backward() optimizer.step() iter_tput.append(len(seeds) / (time.time() - tic_step)) if step % args.log_every == 0: acc = compute_acc(batch_pred, batch_labels) gpu_mem_alloc = ( th.cuda.max_memory_allocated() / 1000000 if th.cuda.is_available() else 0 ) print( "Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train Acc {:.4f} | Speed (samples/sec) {:.4f} | GPU {:.1f} MB".format( epoch, step, loss.item(), acc.item(), np.mean(iter_tput[3:]), gpu_mem_alloc, ) ) toc = time.time() print("Epoch Time(s): {:.4f}".format(toc - tic)) if epoch >= 5: avg += toc - tic if epoch % args.eval_every == 0 and epoch != 0: eval_acc, test_acc, pred = evaluate( model, g, nfeat, labels, val_nid, test_nid, device ) if args.save_pred: np.savetxt( args.save_pred + "%02d" % epoch, pred.argmax(1).cpu().numpy(), "%d", ) print("Eval Acc {:.4f}".format(eval_acc)) if eval_acc > best_eval_acc: best_eval_acc = eval_acc best_test_acc = test_acc print( "Best Eval Acc {:.4f} Test Acc {:.4f}".format( best_eval_acc, best_test_acc ) ) print("Avg epoch time: {}".format(avg / (epoch - 4))) return best_test_acc if __name__ == "__main__": argparser = argparse.ArgumentParser("multi-gpu training") argparser.add_argument( "--gpu", type=int, default=0, help="GPU device ID. Use -1 for CPU training", ) argparser.add_argument("--num-epochs", type=int, default=20) argparser.add_argument("--num-hidden", type=int, default=256) argparser.add_argument("--num-layers", type=int, default=3) argparser.add_argument("--fan-out", type=str, default="5,10,15") argparser.add_argument("--batch-size", type=int, default=1000) argparser.add_argument("--val-batch-size", type=int, default=10000) argparser.add_argument("--log-every", type=int, default=20) argparser.add_argument("--eval-every", type=int, default=1) argparser.add_argument("--lr", type=float, default=0.003) argparser.add_argument("--dropout", type=float, default=0.5) argparser.add_argument( "--num-workers", type=int, default=4, help="Number of sampling processes. Use 0 for no extra process.", ) argparser.add_argument("--save-pred", type=str, default="") argparser.add_argument("--wd", type=float, default=0) args = argparser.parse_args() if args.gpu >= 0: device = th.device("cuda:%d" % args.gpu) else: device = th.device("cpu") # load ogbn-products data data = DglNodePropPredDataset(name="ogbn-products") splitted_idx = data.get_idx_split() train_idx, val_idx, test_idx = ( splitted_idx["train"], splitted_idx["valid"], splitted_idx["test"], ) graph, labels = data[0] nfeat = graph.ndata.pop("feat").to(device) labels = labels[:, 0].to(device) in_feats = nfeat.shape[1] n_classes = (labels.max() + 1).item() # Create csr/coo/csc formats before launching sampling processes # This avoids creating certain formats in each data loader process, which saves momory and CPU. graph.create_formats_() # Pack data data = ( train_idx, val_idx, test_idx, in_feats, labels, n_classes, nfeat, graph, ) # Run 10 times test_accs = [] for i in range(10): test_accs.append(run(args, device, data).cpu().numpy()) print( "Average test accuracy:", np.mean(test_accs), "±", np.std(test_accs) ) ================================================ FILE: examples/pytorch/ogb/ogbn-products/mlp/README.md ================================================ # DGL examples for ogbn-products Requires DGL 0.5 or later versions. For the score of `MLP`, run the following command and you should directly see the result. ```bash python3 mlp.py --eval-last ``` ## Results Here are the results over 10 runs. | Method | Validation Accuracy | Test Accuracy | #Parameters | |:------:|:-------------------:|:---------------:|:-----------:| | MLP | 0.7841 ± 0.0014 | 0.6320 ± 0.0013 | 535,727 | ================================================ FILE: examples/pytorch/ogb/ogbn-products/mlp/mlp.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- import argparse import math import random import time from collections import OrderedDict import dgl.function as fn import matplotlib.pyplot as plt import numpy as np import torch import torch.nn.functional as F import torch.optim as optim from dgl.dataloading import ( DataLoader, MultiLayerFullNeighborSampler, MultiLayerNeighborSampler, ) from matplotlib.ticker import AutoMinorLocator, MultipleLocator from models import MLP from ogb.nodeproppred import DglNodePropPredDataset, Evaluator from torch import nn from tqdm import tqdm epsilon = 1 - math.log(2) device = None dataset = "ogbn-products" n_node_feats, n_edge_feats, n_classes = 0, 0, 0 def seed(seed=0): random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False def load_data(dataset): data = DglNodePropPredDataset(name=dataset) evaluator = Evaluator(name=dataset) splitted_idx = data.get_idx_split() train_idx, val_idx, test_idx = ( splitted_idx["train"], splitted_idx["valid"], splitted_idx["test"], ) graph, labels = data[0] graph.ndata["labels"] = labels return graph, labels, train_idx, val_idx, test_idx, evaluator def preprocess(graph, labels): global n_node_feats, n_classes n_node_feats = graph.ndata["feat"].shape[1] n_classes = (labels.max() + 1).item() # graph = graph.remove_self_loop().add_self_loop() n_node_feats = graph.ndata["feat"].shape[-1] return graph, labels def gen_model(args): model = MLP( n_node_feats, n_classes, n_layers=args.n_layers, n_hidden=args.n_hidden, activation=F.relu, dropout=args.dropout, input_drop=args.input_drop, residual=False, ) return model def custom_loss_function(x, labels): y = F.cross_entropy(x, labels[:, 0], reduction="none") y = torch.log(epsilon + y) - math.log(epsilon) return torch.mean(y) def train( args, model, dataloader, labels, train_idx, criterion, optimizer, evaluator ): model.train() loss_sum, total = 0, 0 preds = torch.zeros(labels.shape[0], n_classes) with dataloader.enable_cpu_affinity(): for _input_nodes, output_nodes, subgraphs in dataloader: subgraphs = [b.to(device) for b in subgraphs] new_train_idx = list(range(len(output_nodes))) pred = model(subgraphs[0].srcdata["feat"]) preds[output_nodes] = pred.cpu().detach() loss = criterion( pred[new_train_idx], labels[output_nodes][new_train_idx] ) optimizer.zero_grad() loss.backward() optimizer.step() count = len(new_train_idx) loss_sum += loss.item() * count total += count preds = preds.to(train_idx.device) return ( loss_sum / total, evaluator(preds[train_idx], labels[train_idx]), ) @torch.no_grad() def evaluate( args, model, dataloader, labels, train_idx, val_idx, test_idx, criterion, evaluator, ): model.eval() preds = torch.zeros(labels.shape[0], n_classes, device=device) eval_times = 1 # Due to the limitation of memory capacity, we calculate the average of logits 'eval_times' times. for _ in range(eval_times): with dataloader.enable_cpu_affinity(): for _input_nodes, output_nodes, subgraphs in dataloader: subgraphs = [b.to(device) for b in subgraphs] pred = model(subgraphs[0].srcdata["feat"]) preds[output_nodes] = pred preds /= eval_times train_loss = criterion(preds[train_idx], labels[train_idx]).item() val_loss = criterion(preds[val_idx], labels[val_idx]).item() test_loss = criterion(preds[test_idx], labels[test_idx]).item() return ( evaluator(preds[train_idx], labels[train_idx]), evaluator(preds[val_idx], labels[val_idx]), evaluator(preds[test_idx], labels[test_idx]), train_loss, val_loss, test_loss, ) def run( args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running ): evaluator_wrapper = lambda pred, labels: evaluator.eval( {"y_pred": pred.argmax(dim=-1, keepdim=True), "y_true": labels} )["acc"] criterion = custom_loss_function train_batch_size = 4096 train_sampler = MultiLayerNeighborSampler( [0 for _ in range(args.n_layers)] ) # no not sample neighbors train_dataloader = DataLoader( graph.cpu(), train_idx.cpu(), train_sampler, batch_size=train_batch_size, shuffle=True, num_workers=4, ) eval_batch_size = 4096 eval_sampler = MultiLayerNeighborSampler( [0 for _ in range(args.n_layers)] ) # no not sample neighbors if args.eval_last: eval_idx = torch.cat([train_idx.cpu(), val_idx.cpu()]) else: eval_idx = torch.cat([train_idx.cpu(), val_idx.cpu(), test_idx.cpu()]) eval_dataloader = DataLoader( graph.cpu(), eval_idx, eval_sampler, batch_size=eval_batch_size, shuffle=False, num_workers=4, ) model = gen_model(args).to(device) optimizer = optim.AdamW( model.parameters(), lr=args.lr, weight_decay=args.wd ) lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode="max", factor=0.7, patience=20, min_lr=1e-4, ) best_model_state_dict = None total_time = 0 val_score, best_val_score, final_test_score = 0, 0, 0 scores, train_scores, val_scores, test_scores = [], [], [], [] losses, train_losses, val_losses, test_losses = [], [], [], [] for epoch in range(1, args.n_epochs + 1): tic = time.time() loss, score = train( args, model, train_dataloader, labels, train_idx, criterion, optimizer, evaluator_wrapper, ) toc = time.time() total_time += toc - tic if epoch % args.eval_every == 0 or epoch % args.log_every == 0: ( train_score, val_score, test_score, train_loss, val_loss, test_loss, ) = evaluate( args, model, eval_dataloader, labels, train_idx, val_idx, test_idx, criterion, evaluator_wrapper, ) if val_score > best_val_score: best_val_score = val_score final_test_score = test_score if args.eval_last: best_model_state_dict = { k: v.to("cpu") for k, v in model.state_dict().items() } best_model_state_dict = OrderedDict(best_model_state_dict) if epoch % args.log_every == 0: print( f"Run: {n_running}/{args.n_runs}, Epoch: {epoch}/{args.n_epochs}, Average epoch time: {total_time / epoch}" ) print( f"Loss: {loss:.4f}, Score: {score:.4f}\n" f"Train/Val/Test loss: {train_loss:.4f}/{val_loss:.4f}/{test_loss:.4f}\n" f"Train/Val/Test/Best val/Final test score: {train_score:.4f}/{val_score:.4f}/{test_score:.4f}/{best_val_score:.4f}/{final_test_score:.4f}" ) for l, e in zip( [ scores, train_scores, val_scores, test_scores, losses, train_losses, val_losses, test_losses, ], [ score, train_score, val_score, test_score, loss, train_loss, val_loss, test_loss, ], ): l.append(e) lr_scheduler.step(val_score) if args.eval_last: model.load_state_dict(best_model_state_dict) eval_dataloader = DataLoader( graph.cpu(), test_idx.cpu(), eval_sampler, batch_size=eval_batch_size, shuffle=False, num_workers=4, ) final_test_score = evaluate( args, model, eval_dataloader, labels, train_idx, val_idx, test_idx, criterion, evaluator_wrapper, )[2] print("*" * 50) print( f"Average epoch time: {total_time / args.n_epochs}, Test score: {final_test_score}" ) if args.plot_curves: fig = plt.figure(figsize=(24, 24)) ax = fig.gca() ax.set_xticks(np.arange(0, args.n_epochs, 100)) ax.set_yticks(np.linspace(0, 1.0, 101)) ax.tick_params(labeltop=True, labelright=True) for y, label in zip( [train_scores, val_scores, test_scores], ["train score", "val score", "test score"], ): plt.plot( range(1, args.n_epochs + 1, args.log_every), y, label=label, linewidth=1, ) ax.xaxis.set_major_locator(MultipleLocator(20)) ax.xaxis.set_minor_locator(AutoMinorLocator(1)) ax.yaxis.set_major_locator(MultipleLocator(0.01)) ax.yaxis.set_minor_locator(AutoMinorLocator(2)) plt.grid(which="major", color="red", linestyle="dotted") plt.grid(which="minor", color="orange", linestyle="dotted") plt.legend() plt.tight_layout() plt.savefig(f"gat_score_{n_running}.png") fig = plt.figure(figsize=(24, 24)) ax = fig.gca() ax.set_xticks(np.arange(0, args.n_epochs, 100)) ax.tick_params(labeltop=True, labelright=True) for y, label in zip( [losses, train_losses, val_losses, test_losses], ["loss", "train loss", "val loss", "test loss"], ): plt.plot( range(1, args.n_epochs + 1, args.log_every), y, label=label, linewidth=1, ) ax.xaxis.set_major_locator(MultipleLocator(20)) ax.xaxis.set_minor_locator(AutoMinorLocator(1)) ax.yaxis.set_major_locator(MultipleLocator(0.1)) ax.yaxis.set_minor_locator(AutoMinorLocator(5)) plt.grid(which="major", color="red", linestyle="dotted") plt.grid(which="minor", color="orange", linestyle="dotted") plt.legend() plt.tight_layout() plt.savefig(f"gat_loss_{n_running}.png") return best_val_score, final_test_score def count_parameters(args): model = gen_model(args) return sum( [np.prod(p.size()) for p in model.parameters() if p.requires_grad] ) def main(): global device argparser = argparse.ArgumentParser( "GAT on OGBN-Proteins", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) argparser.add_argument( "--cpu", action="store_true", help="CPU mode. This option overrides '--gpu'.", ) argparser.add_argument("--gpu", type=int, default=0, help="GPU device ID.") argparser.add_argument("--seed", type=int, help="seed", default=0) argparser.add_argument("--n-runs", type=int, default=10) argparser.add_argument("--n-epochs", type=int, default=500) argparser.add_argument("--lr", type=float, default=0.01) argparser.add_argument("--n-layers", type=int, default=4) argparser.add_argument("--n-hidden", type=int, default=480) argparser.add_argument("--dropout", type=float, default=0.2) argparser.add_argument("--input-drop", type=float, default=0) argparser.add_argument("--wd", type=float, default=0) argparser.add_argument( "--estimation-mode", action="store_true", help="Estimate the score of test set for speed.", ) argparser.add_argument( "--eval-last", action="store_true", help="Evaluate the score of test set at last.", ) argparser.add_argument("--eval-every", type=int, default=1) argparser.add_argument("--log-every", type=int, default=1) argparser.add_argument("--plot-curves", action="store_true") args = argparser.parse_args() if args.cpu: device = torch.device("cpu") else: device = torch.device("cuda:%d" % args.gpu) if args.estimation_mode: print( "WARNING: Estimation mode is enabled. The test score is not accurate." ) seed(args.seed) graph, labels, train_idx, val_idx, test_idx, evaluator = load_data(dataset) graph, labels = preprocess(graph, labels) graph.create_formats_() # graph = graph.to(device) labels = labels.to(device) train_idx = train_idx.to(device) val_idx = val_idx.to(device) test_idx = test_idx.to(device) if args.estimation_mode: test_idx = test_idx[torch.arange(start=0, end=len(test_idx), step=50)] val_scores, test_scores = [], [] for i in range(1, args.n_runs + 1): val_score, test_score = run( args, graph, labels, train_idx, val_idx, test_idx, evaluator, i ) val_scores.append(val_score) test_scores.append(test_score) print(args) print(f"Runned {args.n_runs} times") print("Val scores:", val_scores) print("Test scores:", test_scores) print(f"Average val score: {np.mean(val_scores)} ± {np.std(val_scores)}") print(f"Average test score: {np.mean(test_scores)} ± {np.std(test_scores)}") print(f"Number of params: {count_parameters(args)}") if args.estimation_mode: print( "WARNING: Estimation mode is enabled. The test score is not accurate." ) if __name__ == "__main__": main() # Namespace(cpu=False, dropout=0.2, estimation_mode=False, eval_every=1, eval_last=True, gpu=2, input_drop=0, log_every=1, lr=0.01, n_epochs=500, n_hidden=480, n_layers=4, n_runs=10, plot_curves=True, seed=0, wd=0) # Runned 10 times # Val scores: [0.7846298603870508, 0.7811713246700405, 0.7828751621188618, 0.7839941001449533, 0.7843501258805279, 0.7841466826030568, 0.7846298603870508, 0.7865880019327112, 0.7832057574447524, 0.7851384685807289] # Test scores: [0.6318660190656417, 0.6304137516261193, 0.6329961126767946, 0.6312885462007662, 0.6340624944929965, 0.6301507710256831, 0.6314534738969161, 0.6334637843631373, 0.6312465235275007, 0.6329857199726536] # Average val score: 0.7840729344149735 ± 0.0013702460721628086 # Average test score: 0.6319927196848208 ± 0.001252448369121226 # Number of params: 535727 ================================================ FILE: examples/pytorch/ogb/ogbn-products/mlp/models.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F class MLP(nn.Module): def __init__( self, in_feats, n_classes, n_layers, n_hidden, activation, dropout=0.0, input_drop=0.0, residual=False, ): super().__init__() self.n_layers = n_layers self.n_hidden = n_hidden self.n_classes = n_classes self.linears = nn.ModuleList() self.norms = nn.ModuleList() for i in range(n_layers): in_hidden = n_hidden if i > 0 else in_feats out_hidden = n_hidden if i < n_layers - 1 else n_classes self.linears.append(nn.Linear(in_hidden, out_hidden)) if i < n_layers - 1: self.norms.append(nn.BatchNorm1d(out_hidden)) self.activation = activation self.input_drop = nn.Dropout(input_drop) self.dropout = nn.Dropout(dropout) self.residual = residual def forward(self, h): h = self.input_drop(h) h_last = None for i in range(self.n_layers): h = self.linears[i](h) if self.residual and 0 < i < self.n_layers - 1: h += h_last h_last = h if i < self.n_layers - 1: h = self.norms[i](h) h = self.activation(h, inplace=True) h = self.dropout(h) return h ================================================ FILE: examples/pytorch/ogb/ogbn-proteins/README.md ================================================ # DGL for ogbn-proteins ## GAT DGL implementation of GAT for [ogbn-proteins](https://ogb.stanford.edu/docs/nodeprop/). Using some of the techniques from *Bag of Tricks for Node Classification with Graph Neural Networks* ([https://arxiv.org/abs/2103.13355](https://arxiv.org/abs/2103.13355)). Requires DGL 0.5 or later versions. ### Usage For the best score, run `gat.py` and you should directly see the result. ```bash python3 gat.py ``` For the score of `GAT+labels`, run `gat.py` with `--use-labels` enabled and you should directly see the result. ```bash python3 gat.py --use-labels ``` ### Results Here are the results over 10 runs. | Method | Validation ROC-AUC | Test ROC-AUC | #Parameters | |:----------:|:------------------:|:---------------:|:-----------:| | GAT | 0.9276 ± 0.0007 | 0.8747 ± 0.0016 | 2,475,232 | | GAT+labels | 0.9280 ± 0.0008 | 0.8765 ± 0.0008 | 2,484,192 | ## MWE-GCN and MWE-DGCN ### Models [MWE-GCN and MWE-DGCN](https://cims.nyu.edu/~chenzh/files/GCN_with_edge_weights.pdf) are GCN models designed for graphs whose edges contain multi-dimensional edge weights that indicate the strengths of the relations represented by the edges. ### Dependencies - DGL 0.5.2 - PyTorch 1.4.0 - OGB 1.2.0 - Tensorboard 2.1.1 ### Usage To use MWE-GCN: ```python python main_proteins_full_dgl.py --model MWE-GCN ``` To use MWE-DGCN: ```python python main_proteins_full_dgl.py --model MWE-DGCN ``` Additional optional arguments include 'rand_seed' (the random seed), 'cuda' (the cuda device number, if available), 'postfix' (a string appended to the saved-model file) ================================================ FILE: examples/pytorch/ogb/ogbn-proteins/configure.py ================================================ """Best hyperparameters found.""" import torch MWE_GCN_proteins = { "num_ew_channels": 8, "num_epochs": 2000, "in_feats": 1, "hidden_feats": 10, "out_feats": 112, "n_layers": 3, "lr": 2e-2, "weight_decay": 0, "patience": 1000, "dropout": 0.2, "aggr_mode": "sum", ## 'sum' or 'concat' for the aggregation across channels "ewnorm": "both", } MWE_DGCN_proteins = { "num_ew_channels": 8, "num_epochs": 2000, "in_feats": 1, "hidden_feats": 10, "out_feats": 112, "n_layers": 2, "lr": 1e-2, "weight_decay": 0, "patience": 300, "dropout": 0.5, "aggr_mode": "sum", "residual": True, "ewnorm": "none", } def get_exp_configure(args): if args["model"] == "MWE-GCN": return MWE_GCN_proteins elif args["model"] == "MWE-DGCN": return MWE_DGCN_proteins ================================================ FILE: examples/pytorch/ogb/ogbn-proteins/gat.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- import argparse import os import random import sys import time import dgl import dgl.function as fn import matplotlib.pyplot as plt import numpy as np import torch import torch.nn.functional as F import torch.optim as optim from dgl.dataloading import ( DataLoader, MultiLayerFullNeighborSampler, MultiLayerNeighborSampler, ) from matplotlib.ticker import AutoMinorLocator, MultipleLocator from models import GAT from ogb.nodeproppred import DglNodePropPredDataset, Evaluator from torch import nn device = None dataset = "ogbn-proteins" n_node_feats, n_edge_feats, n_classes = 0, 8, 112 def seed(seed=0): random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False dgl.random.seed(seed) def load_data(dataset): data = DglNodePropPredDataset(name=dataset) evaluator = Evaluator(name=dataset) splitted_idx = data.get_idx_split() train_idx, val_idx, test_idx = ( splitted_idx["train"], splitted_idx["valid"], splitted_idx["test"], ) graph, labels = data[0] graph.ndata["labels"] = labels return graph, labels, train_idx, val_idx, test_idx, evaluator def preprocess(graph, labels, train_idx): global n_node_feats # The sum of the weights of adjacent edges is used as node features. graph.update_all( fn.copy_e("feat", "feat_copy"), fn.sum("feat_copy", "feat") ) n_node_feats = graph.ndata["feat"].shape[-1] # Only the labels in the training set are used as features, while others are filled with zeros. graph.ndata["train_labels_onehot"] = torch.zeros( graph.num_nodes(), n_classes ) graph.ndata["train_labels_onehot"][train_idx, labels[train_idx, 0]] = 1 graph.ndata["deg"] = graph.out_degrees().float().clamp(min=1) graph.create_formats_() return graph, labels def gen_model(args): if args.use_labels: n_node_feats_ = n_node_feats + n_classes else: n_node_feats_ = n_node_feats model = GAT( n_node_feats_, n_edge_feats, n_classes, n_layers=args.n_layers, n_heads=args.n_heads, n_hidden=args.n_hidden, edge_emb=16, activation=F.relu, dropout=args.dropout, input_drop=args.input_drop, attn_drop=args.attn_drop, edge_drop=args.edge_drop, use_attn_dst=not args.no_attn_dst, ) return model def add_labels(graph, idx): feat = graph.srcdata["feat"] train_labels_onehot = torch.zeros([feat.shape[0], n_classes], device=device) train_labels_onehot[idx] = graph.srcdata["train_labels_onehot"][idx] graph.srcdata["feat"] = torch.cat([feat, train_labels_onehot], dim=-1) def train( args, model, dataloader, _labels, _train_idx, criterion, optimizer, _evaluator, ): model.train() loss_sum, total = 0, 0 for input_nodes, output_nodes, subgraphs in dataloader: subgraphs = [b.to(device) for b in subgraphs] new_train_idx = torch.arange(len(output_nodes), device=device) if args.use_labels: train_labels_idx = torch.arange( len(output_nodes), len(input_nodes), device=device ) train_pred_idx = new_train_idx add_labels(subgraphs[0], train_labels_idx) else: train_pred_idx = new_train_idx pred = model(subgraphs) loss = criterion( pred[train_pred_idx], subgraphs[-1].dstdata["labels"][train_pred_idx].float(), ) optimizer.zero_grad() loss.backward() optimizer.step() count = len(train_pred_idx) loss_sum += loss.item() * count total += count # torch.cuda.empty_cache() return loss_sum / total @torch.no_grad() def evaluate( args, model, dataloader, labels, train_idx, val_idx, test_idx, criterion, evaluator, ): model.eval() preds = torch.zeros(labels.shape).to(device) # Due to the memory capacity constraints, we use sampling for inference and calculate the average of the predictions 'eval_times' times. eval_times = 1 for _ in range(eval_times): for input_nodes, output_nodes, subgraphs in dataloader: subgraphs = [b.to(device) for b in subgraphs] new_train_idx = list(range(len(input_nodes))) if args.use_labels: add_labels(subgraphs[0], new_train_idx) pred = model(subgraphs) preds[output_nodes] += pred # torch.cuda.empty_cache() preds /= eval_times train_loss = criterion(preds[train_idx], labels[train_idx].float()).item() val_loss = criterion(preds[val_idx], labels[val_idx].float()).item() test_loss = criterion(preds[test_idx], labels[test_idx].float()).item() return ( evaluator(preds[train_idx], labels[train_idx]), evaluator(preds[val_idx], labels[val_idx]), evaluator(preds[test_idx], labels[test_idx]), train_loss, val_loss, test_loss, preds, ) def run( args, graph, labels, train_idx, val_idx, test_idx, evaluator, n_running ): evaluator_wrapper = lambda pred, labels: evaluator.eval( {"y_pred": pred, "y_true": labels} )["rocauc"] train_batch_size = (len(train_idx) + 9) // 10 # batch_size = len(train_idx) train_sampler = MultiLayerNeighborSampler( [32 for _ in range(args.n_layers)] ) # sampler = MultiLayerFullNeighborSampler(args.n_layers) train_dataloader = DataLoader( graph.cpu(), train_idx.cpu(), train_sampler, batch_size=train_batch_size, num_workers=10, ) eval_sampler = MultiLayerNeighborSampler( [100 for _ in range(args.n_layers)] ) # sampler = MultiLayerFullNeighborSampler(args.n_layers) eval_dataloader = DataLoader( graph.cpu(), torch.cat([train_idx.cpu(), val_idx.cpu(), test_idx.cpu()]), eval_sampler, batch_size=65536, num_workers=10, ) criterion = nn.BCEWithLogitsLoss() model = gen_model(args).to(device) optimizer = optim.AdamW( model.parameters(), lr=args.lr, weight_decay=args.wd ) lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode="max", factor=0.75, patience=50 ) total_time = 0 val_score, best_val_score, final_test_score = 0, 0, 0 train_scores, val_scores, test_scores = [], [], [] losses, train_losses, val_losses, test_losses = [], [], [], [] final_pred = None for epoch in range(1, args.n_epochs + 1): tic = time.time() loss = train( args, model, train_dataloader, labels, train_idx, criterion, optimizer, evaluator_wrapper, ) toc = time.time() total_time += toc - tic if ( epoch == args.n_epochs or epoch % args.eval_every == 0 or epoch % args.log_every == 0 ): ( train_score, val_score, test_score, train_loss, val_loss, test_loss, pred, ) = evaluate( args, model, eval_dataloader, labels, train_idx, val_idx, test_idx, criterion, evaluator_wrapper, ) if val_score > best_val_score: best_val_score = val_score final_test_score = test_score final_pred = pred if epoch % args.log_every == 0: print( f"Run: {n_running}/{args.n_runs}, Epoch: {epoch}/{args.n_epochs}, Average epoch time: {total_time / epoch:.2f}s" ) print( f"Loss: {loss:.4f}\n" f"Train/Val/Test loss: {train_loss:.4f}/{val_loss:.4f}/{test_loss:.4f}\n" f"Train/Val/Test/Best val/Final test score: {train_score:.4f}/{val_score:.4f}/{test_score:.4f}/{best_val_score:.4f}/{final_test_score:.4f}" ) for l, e in zip( [ train_scores, val_scores, test_scores, losses, train_losses, val_losses, test_losses, ], [ train_score, val_score, test_score, loss, train_loss, val_loss, test_loss, ], ): l.append(e) lr_scheduler.step(val_score) print("*" * 50) print( f"Best val score: {best_val_score}, Final test score: {final_test_score}" ) print("*" * 50) if args.plot: fig = plt.figure(figsize=(24, 24)) ax = fig.gca() ax.set_xticks(np.arange(0, args.n_epochs, 100)) ax.set_yticks(np.linspace(0, 1.0, 101)) ax.tick_params(labeltop=True, labelright=True) for y, label in zip( [train_scores, val_scores, test_scores], ["train score", "val score", "test score"], ): plt.plot( range(1, args.n_epochs + 1, args.log_every), y, label=label, linewidth=1, ) ax.xaxis.set_major_locator(MultipleLocator(100)) ax.xaxis.set_minor_locator(AutoMinorLocator(1)) ax.yaxis.set_major_locator(MultipleLocator(0.01)) ax.yaxis.set_minor_locator(AutoMinorLocator(2)) plt.grid(which="major", color="red", linestyle="dotted") plt.grid(which="minor", color="orange", linestyle="dotted") plt.legend() plt.tight_layout() plt.savefig(f"gat_score_{n_running}.png") fig = plt.figure(figsize=(24, 24)) ax = fig.gca() ax.set_xticks(np.arange(0, args.n_epochs, 100)) ax.tick_params(labeltop=True, labelright=True) for y, label in zip( [losses, train_losses, val_losses, test_losses], ["loss", "train loss", "val loss", "test loss"], ): plt.plot( range(1, args.n_epochs + 1, args.log_every), y, label=label, linewidth=1, ) ax.xaxis.set_major_locator(MultipleLocator(100)) ax.xaxis.set_minor_locator(AutoMinorLocator(1)) ax.yaxis.set_major_locator(MultipleLocator(0.1)) ax.yaxis.set_minor_locator(AutoMinorLocator(5)) plt.grid(which="major", color="red", linestyle="dotted") plt.grid(which="minor", color="orange", linestyle="dotted") plt.legend() plt.tight_layout() plt.savefig(f"gat_loss_{n_running}.png") if args.save_pred: os.makedirs("./output", exist_ok=True) torch.save(F.softmax(final_pred, dim=1), f"./output/{n_running}.pt") return best_val_score, final_test_score def count_parameters(args): model = gen_model(args) return sum( [np.prod(p.size()) for p in model.parameters() if p.requires_grad] ) def main(): global device argparser = argparse.ArgumentParser( "GAT implementation on ogbn-proteins", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) argparser.add_argument( "--cpu", action="store_true", help="CPU mode. This option overrides '--gpu'.", ) argparser.add_argument("--gpu", type=int, default=0, help="GPU device ID") argparser.add_argument("--seed", type=int, default=0, help="random seed") argparser.add_argument( "--n-runs", type=int, default=10, help="running times" ) argparser.add_argument( "--n-epochs", type=int, default=1200, help="number of epochs" ) argparser.add_argument( "--use-labels", action="store_true", help="Use labels in the training set as input features.", ) argparser.add_argument( "--no-attn-dst", action="store_true", help="Don't use attn_dst." ) argparser.add_argument( "--n-heads", type=int, default=6, help="number of heads" ) argparser.add_argument( "--lr", type=float, default=0.01, help="learning rate" ) argparser.add_argument( "--n-layers", type=int, default=6, help="number of layers" ) argparser.add_argument( "--n-hidden", type=int, default=80, help="number of hidden units" ) argparser.add_argument( "--dropout", type=float, default=0.25, help="dropout rate" ) argparser.add_argument( "--input-drop", type=float, default=0.1, help="input drop rate" ) argparser.add_argument( "--attn-drop", type=float, default=0.0, help="attention dropout rate" ) argparser.add_argument( "--edge-drop", type=float, default=0.1, help="edge drop rate" ) argparser.add_argument("--wd", type=float, default=0, help="weight decay") argparser.add_argument( "--eval-every", type=int, default=5, help="evaluate every EVAL_EVERY epochs", ) argparser.add_argument( "--log-every", type=int, default=5, help="log every LOG_EVERY epochs" ) argparser.add_argument( "--plot", action="store_true", help="plot learning curves" ) argparser.add_argument( "--save-pred", action="store_true", help="save final predictions" ) args = argparser.parse_args() if args.cpu: device = torch.device("cpu") else: device = torch.device(f"cuda:{args.gpu}") # load data & preprocess print("Loading data") graph, labels, train_idx, val_idx, test_idx, evaluator = load_data(dataset) print("Preprocessing") graph, labels = preprocess(graph, labels, train_idx) labels, train_idx, val_idx, test_idx = map( lambda x: x.to(device), (labels, train_idx, val_idx, test_idx) ) # run val_scores, test_scores = [], [] for i in range(args.n_runs): print("Running", i) seed(args.seed + i) val_score, test_score = run( args, graph, labels, train_idx, val_idx, test_idx, evaluator, i + 1 ) val_scores.append(val_score) test_scores.append(test_score) print(" ".join(sys.argv)) print(args) print(f"Runned {args.n_runs} times") print("Val scores:", val_scores) print("Test scores:", test_scores) print(f"Average val score: {np.mean(val_scores)} ± {np.std(val_scores)}") print(f"Average test score: {np.mean(test_scores)} ± {np.std(test_scores)}") print(f"Number of params: {count_parameters(args)}") if __name__ == "__main__": main() # Namespace(attn_drop=0.0, cpu=False, dropout=0.25, edge_drop=0.1, eval_every=5, gpu=6, input_drop=0.1, log_every=5, lr=0.01, n_epochs=1200, n_heads=6, n_hidden=80, n_layers=6, n_runs=10, no_attn_dst=False, plot=True, save_pred=False, seed=0, use_labels=False, wd=0) # Runned 10 times # Val scores: [0.927741031859485, 0.9272113161947824, 0.9271363901359605, 0.9275579074100136, 0.9264291968462317, 0.9275278541203443, 0.9286381790529751, 0.9288245051991526, 0.9269289529175155, 0.9278177920224489] # Test scores: [0.8754403567694566, 0.8749781870941457, 0.8735933245353141, 0.8759835445000637, 0.8745950242855286, 0.8742530369108132, 0.8784892022402326, 0.873345314887444, 0.8724393129004984, 0.874077975765639] # Average val score: 0.927581312575891 ± 0.0006953509986591492 # Average test score: 0.8747195279889135 ± 0.001593598488797452 # Number of params: 2475232 # Namespace(attn_drop=0.0, cpu=False, dropout=0.25, edge_drop=0.1, eval_every=5, gpu=7, input_drop=0.1, log_every=5, lr=0.01, n_epochs=1200, n_heads=6, n_hidden=80, n_layers=6, n_runs=10, no_attn_dst=False, plot=True, save_pred=False, seed=0, use_labels=True, wd=0) # Runned 10 times # Val scores: [0.9293776332568928, 0.9281066322254939, 0.9286775378440911, 0.9270252685136046, 0.9267937838323375, 0.9277731792338011, 0.9285615428437761, 0.9270819730221879, 0.9276822010553241, 0.9287115722177839] # Test scores: [0.8761623033485811, 0.8773002619440896, 0.8756680817047869, 0.8751873860287073, 0.875781797307807, 0.8764533839446703, 0.8771202308989311, 0.8765888651476396, 0.8773581283481205, 0.8777751912293709] # Average val score: 0.9279791324045293 ± 0.0008115348697502517 # Average test score: 0.8765395629902706 ± 0.0008016806017700173 # Number of params: 2484192 ================================================ FILE: examples/pytorch/ogb/ogbn-proteins/main_proteins_full_dgl.py ================================================ import os import time import dgl.function as fn import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from ogb.nodeproppred import Evaluator from ogb.nodeproppred.dataset_dgl import DglNodePropPredDataset from torch.optim import Adam from torch.optim.lr_scheduler import ReduceLROnPlateau from torch.utils.tensorboard import SummaryWriter from utils import load_model, set_random_seed def normalize_edge_weights(graph, device, num_ew_channels): degs = graph.in_degrees().float() degs = torch.clamp(degs, min=1) norm = torch.pow(degs, 0.5) norm = norm.to(args["device"]) graph.ndata["norm"] = norm.unsqueeze(1) graph.apply_edges(fn.e_div_u("feat", "norm", "feat")) graph.apply_edges(fn.e_div_v("feat", "norm", "feat")) for channel in range(num_ew_channels): graph.edata["feat_" + str(channel)] = graph.edata["feat"][ :, channel : channel + 1 ] def run_a_train_epoch(graph, node_idx, model, criterion, optimizer, evaluator): model.train() logits = model(graph)[node_idx] labels = graph.ndata["labels"][node_idx] loss = criterion(logits, labels) optimizer.zero_grad() loss.backward() optimizer.step() loss = loss.data.item() labels = labels.cpu().numpy() preds = logits.cpu().detach().numpy() return loss, evaluator.eval({"y_true": labels, "y_pred": preds})["rocauc"] def run_an_eval_epoch(graph, splitted_idx, model, evaluator): model.eval() with torch.no_grad(): logits = model(graph) labels = graph.ndata["labels"].cpu().numpy() preds = logits.cpu().detach().numpy() train_score = evaluator.eval( { "y_true": labels[splitted_idx["train"]], "y_pred": preds[splitted_idx["train"]], } ) val_score = evaluator.eval( { "y_true": labels[splitted_idx["valid"]], "y_pred": preds[splitted_idx["valid"]], } ) test_score = evaluator.eval( { "y_true": labels[splitted_idx["test"]], "y_pred": preds[splitted_idx["test"]], } ) return train_score["rocauc"], val_score["rocauc"], test_score["rocauc"] def main(args): print(args) if args["rand_seed"] > -1: set_random_seed(args["rand_seed"]) dataset = DglNodePropPredDataset(name=args["dataset"]) print(dataset.meta_info) splitted_idx = dataset.get_idx_split() graph = dataset.graph[0] graph.ndata["labels"] = dataset.labels.float().to(args["device"]) graph.edata["feat"] = graph.edata["feat"].float().to(args["device"]) if args["ewnorm"] == "both": print("Symmetric normalization of edge weights by degree") normalize_edge_weights(graph, args["device"], args["num_ew_channels"]) elif args["ewnorm"] == "none": print("Not normalizing edge weights") for channel in range(args["num_ew_channels"]): graph.edata["feat_" + str(channel)] = graph.edata["feat"][ :, channel : channel + 1 ] model = load_model(args).to(args["device"]) optimizer = Adam( model.parameters(), lr=args["lr"], weight_decay=args["weight_decay"] ) min_lr = 1e-3 scheduler = ReduceLROnPlateau( optimizer, "max", factor=0.7, patience=100, verbose=True, min_lr=min_lr ) print("scheduler min_lr", min_lr) criterion = nn.BCEWithLogitsLoss() evaluator = Evaluator(args["dataset"]) print("model", args["model"]) print("n_layers", args["n_layers"]) print("hidden dim", args["hidden_feats"]) print("lr", args["lr"]) dur = [] best_val_score = 0.0 num_patient_epochs = 0 model_folder = "./saved_models/" model_path = ( model_folder + str(args["exp_name"]) + "_" + str(args["postfix"]) ) if not os.path.exists(model_folder): os.makedirs(model_folder) for epoch in range(1, args["num_epochs"] + 1): if epoch >= 3: t0 = time.time() loss, train_score = run_a_train_epoch( graph, splitted_idx["train"], model, criterion, optimizer, evaluator ) if epoch >= 3: dur.append(time.time() - t0) avg_time = np.mean(dur) else: avg_time = None train_score, val_score, test_score = run_an_eval_epoch( graph, splitted_idx, model, evaluator ) scheduler.step(val_score) # Early stop if val_score > best_val_score: torch.save(model.state_dict(), model_path) best_val_score = val_score num_patient_epochs = 0 else: num_patient_epochs += 1 print( "Epoch {:d}, loss {:.4f}, train score {:.4f}, " "val score {:.4f}, avg time {}, num patient epochs {:d}".format( epoch, loss, train_score, val_score, avg_time, num_patient_epochs, ) ) if num_patient_epochs == args["patience"]: break model.load_state_dict(torch.load(model_path, weights_only=False)) train_score, val_score, test_score = run_an_eval_epoch( graph, splitted_idx, model, evaluator ) print("Train score {:.4f}".format(train_score)) print("Valid score {:.4f}".format(val_score)) print("Test score {:.4f}".format(test_score)) with open("results.txt", "w") as f: f.write("loss {:.4f}\n".format(loss)) f.write("Best validation rocauc {:.4f}\n".format(best_val_score)) f.write("Test rocauc {:.4f}\n".format(test_score)) print(args) if __name__ == "__main__": import argparse from configure import get_exp_configure parser = argparse.ArgumentParser( description="OGB node property prediction with DGL using full graph training" ) parser.add_argument( "-m", "--model", type=str, choices=["MWE-GCN", "MWE-DGCN"], default="MWE-DGCN", help="Model to use", ) parser.add_argument("-c", "--cuda", type=str, default="none") parser.add_argument( "--postfix", type=str, default="", help="a string appended to the file name of the saved model", ) parser.add_argument( "--rand_seed", type=int, default=-1, help="random seed for torch and numpy", ) parser.add_argument("--residual", action="store_true") parser.add_argument( "--ewnorm", type=str, default="none", choices=["none", "both"] ) args = parser.parse_args().__dict__ # Get experiment configuration args["dataset"] = "ogbn-proteins" args["exp_name"] = "_".join([args["model"], args["dataset"]]) args.update(get_exp_configure(args)) if not (args["cuda"] == "none"): args["device"] = torch.device("cuda: " + str(args["cuda"])) else: args["device"] = torch.device("cpu") main(args) ================================================ FILE: examples/pytorch/ogb/ogbn-proteins/models.py ================================================ import math from functools import partial import dgl.function as fn import dgl.nn.pytorch as dglnn import torch import torch.nn as nn import torch.nn.functional as F from dgl import function as fn from dgl._ffi.base import DGLError from dgl.base import ALL from dgl.nn.pytorch.utils import Identity from dgl.ops import edge_softmax from dgl.utils import expand_as_pair from torch.nn import init from torch.utils.checkpoint import checkpoint class MWEConv(nn.Module): def __init__( self, in_feats, out_feats, activation, bias=True, num_channels=8, aggr_mode="sum", ): super(MWEConv, self).__init__() self.num_channels = num_channels self._in_feats = in_feats self._out_feats = out_feats self.weight = nn.Parameter( torch.Tensor(in_feats, out_feats, num_channels) ) if bias: self.bias = nn.Parameter(torch.Tensor(out_feats, num_channels)) else: self.bias = None self.reset_parameters() self.activation = activation if aggr_mode == "concat": self.aggr_mode = "concat" self.final = nn.Linear(out_feats * self.num_channels, out_feats) elif aggr_mode == "sum": self.aggr_mode = "sum" self.final = nn.Linear(out_feats, out_feats) def reset_parameters(self): stdv = 1.0 / math.sqrt(self.weight.size(1)) self.weight.data.uniform_(-stdv, stdv) if self.bias is not None: stdv = 1.0 / math.sqrt(self.bias.size(0)) self.bias.data.uniform_(-stdv, stdv) def forward(self, g, node_state_prev): node_state = node_state_prev # if self.dropout: # node_states = self.dropout(node_state) g = g.local_var() new_node_states = [] ## perform weighted convolution for every channel of edge weight for c in range(self.num_channels): node_state_c = node_state if self._out_feats < self._in_feats: g.ndata["feat_" + str(c)] = torch.mm( node_state_c, self.weight[:, :, c] ) else: g.ndata["feat_" + str(c)] = node_state_c g.update_all( fn.u_mul_e("feat_" + str(c), "feat_" + str(c), "m"), fn.sum("m", "feat_" + str(c) + "_new"), ) node_state_c = g.ndata.pop("feat_" + str(c) + "_new") if self._out_feats >= self._in_feats: node_state_c = torch.mm(node_state_c, self.weight[:, :, c]) if self.bias is not None: node_state_c = node_state_c + self.bias[:, c] node_state_c = self.activation(node_state_c) new_node_states.append(node_state_c) if self.aggr_mode == "sum": node_states = torch.stack(new_node_states, dim=1).sum(1) elif self.aggr_mode == "concat": node_states = torch.cat(new_node_states, dim=1) node_states = self.final(node_states) return node_states class MWE_GCN(nn.Module): def __init__( self, n_input, n_hidden, n_output, n_layers, activation, dropout, aggr_mode="sum", device="cpu", ): super(MWE_GCN, self).__init__() self.dropout = dropout self.activation = activation self.layers = nn.ModuleList() self.layers.append( MWEConv( n_input, n_hidden, activation=activation, aggr_mode=aggr_mode ) ) for i in range(n_layers - 1): self.layers.append( MWEConv( n_hidden, n_hidden, activation=activation, aggr_mode=aggr_mode, ) ) self.pred_out = nn.Linear(n_hidden, n_output) self.device = device def forward(self, g, node_state=None): node_state = torch.ones(g.num_nodes(), 1).float().to(self.device) for layer in self.layers: node_state = F.dropout( node_state, p=self.dropout, training=self.training ) node_state = layer(g, node_state) node_state = self.activation(node_state) out = self.pred_out(node_state) return out class MWE_DGCN(nn.Module): def __init__( self, n_input, n_hidden, n_output, n_layers, activation, dropout, residual=False, aggr_mode="sum", device="cpu", ): super(MWE_DGCN, self).__init__() self.n_layers = n_layers self.activation = activation self.dropout = dropout self.residual = residual self.layers = nn.ModuleList() self.layer_norms = nn.ModuleList() self.layers.append( MWEConv( n_input, n_hidden, activation=activation, aggr_mode=aggr_mode ) ) for i in range(n_layers - 1): self.layers.append( MWEConv( n_hidden, n_hidden, activation=activation, aggr_mode=aggr_mode, ) ) for i in range(n_layers): self.layer_norms.append( nn.LayerNorm(n_hidden, elementwise_affine=True) ) self.pred_out = nn.Linear(n_hidden, n_output) self.device = device def forward(self, g, node_state=None): node_state = torch.ones(g.num_nodes(), 1).float().to(self.device) node_state = self.layers[0](g, node_state) for layer in range(1, self.n_layers): node_state_new = self.layer_norms[layer - 1](node_state) node_state_new = self.activation(node_state_new) node_state_new = F.dropout( node_state_new, p=self.dropout, training=self.training ) if self.residual == "true": node_state = node_state + self.layers[layer](g, node_state_new) else: node_state = self.layers[layer](g, node_state_new) node_state = self.layer_norms[self.n_layers - 1](node_state) node_state = self.activation(node_state) node_state = F.dropout( node_state, p=self.dropout, training=self.training ) out = self.pred_out(node_state) return out class GATConv(nn.Module): def __init__( self, node_feats, edge_feats, out_feats, n_heads=1, attn_drop=0.0, edge_drop=0.0, negative_slope=0.2, residual=True, activation=None, use_attn_dst=True, allow_zero_in_degree=True, use_symmetric_norm=False, ): super(GATConv, self).__init__() self._n_heads = n_heads self._in_src_feats, self._in_dst_feats = expand_as_pair(node_feats) self._out_feats = out_feats self._allow_zero_in_degree = allow_zero_in_degree self._use_symmetric_norm = use_symmetric_norm # feat fc self.src_fc = nn.Linear( self._in_src_feats, out_feats * n_heads, bias=False ) if residual: self.dst_fc = nn.Linear(self._in_src_feats, out_feats * n_heads) self.bias = None else: self.dst_fc = None self.bias = nn.Parameter(out_feats * n_heads) # attn fc self.attn_src_fc = nn.Linear(self._in_src_feats, n_heads, bias=False) if use_attn_dst: self.attn_dst_fc = nn.Linear( self._in_src_feats, n_heads, bias=False ) else: self.attn_dst_fc = None if edge_feats > 0: self.attn_edge_fc = nn.Linear(edge_feats, n_heads, bias=False) else: self.attn_edge_fc = None self.attn_drop = nn.Dropout(attn_drop) self.edge_drop = edge_drop self.leaky_relu = nn.LeakyReLU(negative_slope, inplace=True) self.activation = activation self.reset_parameters() def reset_parameters(self): gain = nn.init.calculate_gain("relu") nn.init.xavier_normal_(self.src_fc.weight, gain=gain) if self.dst_fc is not None: nn.init.xavier_normal_(self.dst_fc.weight, gain=gain) nn.init.xavier_normal_(self.attn_src_fc.weight, gain=gain) if self.attn_dst_fc is not None: nn.init.xavier_normal_(self.attn_dst_fc.weight, gain=gain) if self.attn_edge_fc is not None: nn.init.xavier_normal_(self.attn_edge_fc.weight, gain=gain) if self.bias is not None: nn.init.zeros_(self.bias) def set_allow_zero_in_degree(self, set_value): self._allow_zero_in_degree = set_value def forward(self, graph, feat_src, feat_edge=None): with graph.local_scope(): if not self._allow_zero_in_degree: if (graph.in_degrees() == 0).any(): assert False if graph.is_block: feat_dst = feat_src[: graph.number_of_dst_nodes()] else: feat_dst = feat_src if self._use_symmetric_norm: degs = graph.srcdata["deg"] # degs = graph.out_degrees().float().clamp(min=1) norm = torch.pow(degs, -0.5) shp = norm.shape + (1,) * (feat_src.dim() - 1) norm = torch.reshape(norm, shp) feat_src = feat_src * norm feat_src_fc = self.src_fc(feat_src).view( -1, self._n_heads, self._out_feats ) feat_dst_fc = self.dst_fc(feat_dst).view( -1, self._n_heads, self._out_feats ) attn_src = self.attn_src_fc(feat_src).view(-1, self._n_heads, 1) # NOTE: GAT paper uses "first concatenation then linear projection" # to compute attention scores, while ours is "first projection then # addition", the two approaches are mathematically equivalent: # We decompose the weight vector a mentioned in the paper into # [a_l || a_r], then # a^T [Wh_i || Wh_j] = a_l Wh_i + a_r Wh_j # Our implementation is much efficient because we do not need to # save [Wh_i || Wh_j] on edges, which is not memory-efficient. Plus, # addition could be optimized with DGL's built-in function u_add_v, # which further speeds up computation and saves memory footprint. graph.srcdata.update( {"feat_src_fc": feat_src_fc, "attn_src": attn_src} ) if self.attn_dst_fc is not None: attn_dst = self.attn_dst_fc(feat_dst).view(-1, self._n_heads, 1) graph.dstdata.update({"attn_dst": attn_dst}) graph.apply_edges( fn.u_add_v("attn_src", "attn_dst", "attn_node") ) else: graph.apply_edges(fn.copy_u("attn_src", "attn_node")) e = graph.edata["attn_node"] if feat_edge is not None: attn_edge = self.attn_edge_fc(feat_edge).view( -1, self._n_heads, 1 ) graph.edata.update({"attn_edge": attn_edge}) e += graph.edata["attn_edge"] e = self.leaky_relu(e) if self.training and self.edge_drop > 0: perm = torch.randperm(graph.num_edges(), device=e.device) bound = int(graph.num_edges() * self.edge_drop) eids = perm[bound:] graph.edata["a"] = torch.zeros_like(e) graph.edata["a"][eids] = self.attn_drop( edge_softmax(graph, e[eids], eids=eids) ) else: graph.edata["a"] = self.attn_drop(edge_softmax(graph, e)) # message passing graph.update_all( fn.u_mul_e("feat_src_fc", "a", "m"), fn.sum("m", "feat_src_fc") ) rst = graph.dstdata["feat_src_fc"] if self._use_symmetric_norm: degs = graph.dstdata["deg"] # degs = graph.in_degrees().float().clamp(min=1) norm = torch.pow(degs, 0.5) shp = norm.shape + (1,) * (feat_dst.dim()) norm = torch.reshape(norm, shp) rst = rst * norm # residual if self.dst_fc is not None: rst += feat_dst_fc else: rst += self.bias # activation if self.activation is not None: rst = self.activation(rst, inplace=True) return rst class GAT(nn.Module): def __init__( self, node_feats, edge_feats, n_classes, n_layers, n_heads, n_hidden, edge_emb, activation, dropout, input_drop, attn_drop, edge_drop, use_attn_dst=True, allow_zero_in_degree=False, ): super().__init__() self.n_layers = n_layers self.n_heads = n_heads self.n_hidden = n_hidden self.n_classes = n_classes self.convs = nn.ModuleList() self.norms = nn.ModuleList() self.node_encoder = nn.Linear(node_feats, n_hidden) if edge_emb > 0: self.edge_encoder = nn.ModuleList() for i in range(n_layers): in_hidden = n_heads * n_hidden if i > 0 else n_hidden out_hidden = n_hidden # bias = i == n_layers - 1 if edge_emb > 0: self.edge_encoder.append(nn.Linear(edge_feats, edge_emb)) self.convs.append( GATConv( in_hidden, edge_emb, out_hidden, n_heads=n_heads, attn_drop=attn_drop, edge_drop=edge_drop, use_attn_dst=use_attn_dst, allow_zero_in_degree=allow_zero_in_degree, use_symmetric_norm=False, ) ) self.norms.append(nn.BatchNorm1d(n_heads * out_hidden)) self.pred_linear = nn.Linear(n_heads * n_hidden, n_classes) self.input_drop = nn.Dropout(input_drop) self.dropout = nn.Dropout(dropout) self.activation = activation def forward(self, g): if not isinstance(g, list): subgraphs = [g] * self.n_layers else: subgraphs = g h = subgraphs[0].srcdata["feat"] h = self.node_encoder(h) h = F.relu(h, inplace=True) h = self.input_drop(h) h_last = None for i in range(self.n_layers): if self.edge_encoder is not None: efeat = subgraphs[i].edata["feat"] efeat_emb = self.edge_encoder[i](efeat) efeat_emb = F.relu(efeat_emb, inplace=True) else: efeat_emb = None h = self.convs[i](subgraphs[i], h, efeat_emb).flatten(1, -1) if h_last is not None: h += h_last[: h.shape[0], :] h_last = h h = self.norms[i](h) h = self.activation(h, inplace=True) h = self.dropout(h) h = self.pred_linear(h) return h ================================================ FILE: examples/pytorch/ogb/ogbn-proteins/utils.py ================================================ import random import numpy as np import torch import torch.nn.functional as F from models import MWE_DGCN, MWE_GCN def set_random_seed(seed): random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) print("random seed set to be " + str(seed)) def load_model(args): if args["model"] == "MWE-GCN": model = MWE_GCN( n_input=args["in_feats"], n_hidden=args["hidden_feats"], n_output=args["out_feats"], n_layers=args["n_layers"], activation=torch.nn.Tanh(), dropout=args["dropout"], aggr_mode=args["aggr_mode"], device=args["device"], ) elif args["model"] == "MWE-DGCN": model = MWE_DGCN( n_input=args["in_feats"], n_hidden=args["hidden_feats"], n_output=args["out_feats"], n_layers=args["n_layers"], activation=torch.nn.ReLU(), dropout=args["dropout"], aggr_mode=args["aggr_mode"], residual=args["residual"], device=args["device"], ) else: raise ValueError("Unexpected model {}".format(args["model"])) return model class Logger(object): def __init__(self, runs, info=None): self.info = info self.results = [[] for _ in range(runs)] def add_result(self, run, result): assert len(result) == 3 assert run >= 0 and run < len(self.results) self.results[run].append(result) def print_statistics(self, run=None): if run is not None: result = 100 * torch.tensor(self.results[run]) argmax = result[:, 1].argmax().item() print(f"Run {run + 1:02d}:") print(f"Highest Train: {result[:, 0].max():.2f}") print(f"Highest Valid: {result[:, 1].max():.2f}") print(f" Final Train: {result[argmax, 0]:.2f}") print(f" Final Test: {result[argmax, 2]:.2f}") else: result = 100 * torch.tensor(self.results) best_results = [] for r in result: train1 = r[:, 0].max().item() valid = r[:, 1].max().item() train2 = r[r[:, 1].argmax(), 0].item() test = r[r[:, 1].argmax(), 2].item() best_results.append((train1, valid, train2, test)) best_result = torch.tensor(best_results) print(f"All runs:") r = best_result[:, 0] print(f"Highest Train: {r.mean():.2f} ± {r.std():.2f}") r = best_result[:, 1] print(f"Highest Valid: {r.mean():.2f} ± {r.std():.2f}") r = best_result[:, 2] print(f" Final Train: {r.mean():.2f} ± {r.std():.2f}") r = best_result[:, 3] print(f" Final Test: {r.mean():.2f} ± {r.std():.2f}") ================================================ FILE: examples/pytorch/ogb/seal_ogbl/README.md ================================================ # SEAL Implementation for OGBL in DGL Introduction ------------ This is an example of implementing [SEAL](https://arxiv.org/pdf/2010.16103.pdf) for link prediction in DGL. Some parts are migrated from [https://github.com/facebookresearch/SEAL_OGB](https://github.com/facebookresearch/SEAL_OGB). Requirements ------------ [PyTorch](https://pytorch.org/), [DGL](https://www.dgl.ai/), [OGB](https://ogb.stanford.edu/docs/home/), and other python libraries: numpy, scipy, tqdm, scikit-learn, etc. Usages ------ Run the following command for results on each benchmark ```bash # ogbl-ppa python main.py \ --dataset ogbl-ppa \ --use_feature \ --use_edge_weight \ --eval_steps 5 \ --epochs 20 \ --train_percent 5 # ogbl-collab python main.py \ --dataset ogbl-collab \ --train_percent 15 \ --hidden_channels 256 \ --use_valedges_as_input # ogbl-ddi python main.py \ --dataset ogbl-ddi \ --ratio_per_hop 0.2 \ --use_edge_weight \ --eval_steps 1 \ --epochs 10 \ --train_percent 5 # ogbl-citation2 python main.py \ --dataset ogbl-citation2 \ --use_feature \ --use_edge_weight \ --eval_steps 1 \ --epochs 10 \ --train_percent 2 \ --val_percent 1 \ --test_percent 1 ``` Results ------- | | ogbl-ppa (Hits@100) | ogbl-collab (Hits@50) | ogbl-ddi (Hits@20) | ogbl-citation2 (MRRd) | |--------------|---------------------|-----------------------|--------------------|---------------------| | Paper Test Results | 48.80%±3.16% | 64.74%±0.43% | 30.56%±3.86%* | 87.67%±0.32r% | | Our Test Results | 49.48%±2.52% | 64.23%±0.57% | 27.93%±4.19% | 86.29%±0.47% | \* Note that the relatively large gap on ogbl-ddi may come from the high variance of results on this dataset. We get 28.77%±3.43% by only changing the sampling seed. Reference --------- @article{zhang2021labeling, title={Labeling Trick: A Theory of Using Graph Neural Networks for Multi-Node Representation Learning}, author={Zhang, Muhan and Li, Pan and Xia, Yinglong and Wang, Kai and Jin, Long}, journal={Advances in Neural Information Processing Systems}, volume={34}, year={2021} } @inproceedings{zhang2018link, title={Link prediction based on graph neural networks}, author={Zhang, Muhan and Chen, Yixin}, booktitle={Advances in Neural Information Processing Systems}, pages={5165--5175}, year={2018} } ================================================ FILE: examples/pytorch/ogb/seal_ogbl/main.py ================================================ import argparse import math import os import random import sys import time import dgl import numpy as np import torch import torch.nn.functional as F from dgl.dataloading import DataLoader, Sampler from dgl.nn import GraphConv, SortPooling from dgl.sampling import global_uniform_negative_sampling from ogb.linkproppred import DglLinkPropPredDataset, Evaluator from scipy.sparse.csgraph import shortest_path from torch.nn import ( BCEWithLogitsLoss, Conv1d, Embedding, Linear, MaxPool1d, ModuleList, ) from tqdm import tqdm class Logger(object): def __init__(self, runs, info=None): self.info = info self.results = [[] for _ in range(runs)] def add_result(self, run, result): # result is in the format of (val_score, test_score) assert len(result) == 2 assert run >= 0 and run < len(self.results) self.results[run].append(result) def print_statistics(self, run=None, f=sys.stdout): if run is not None: result = 100 * torch.tensor(self.results[run]) argmax = result[:, 0].argmax().item() print(f"Run {run + 1:02d}:", file=f) print(f"Highest Valid: {result[:, 0].max():.2f}", file=f) print(f"Highest Eval Point: {argmax + 1}", file=f) print(f" Final Test: {result[argmax, 1]:.2f}", file=f) else: result = 100 * torch.tensor(self.results) best_results = [] for r in result: valid = r[:, 0].max().item() test = r[r[:, 0].argmax(), 1].item() best_results.append((valid, test)) best_result = torch.tensor(best_results) print(f"All runs:", file=f) r = best_result[:, 0] print(f"Highest Valid: {r.mean():.2f} ± {r.std():.2f}", file=f) r = best_result[:, 1] print(f" Final Test: {r.mean():.2f} ± {r.std():.2f}", file=f) class SealSampler(Sampler): def __init__( self, g, num_hops=1, sample_ratio=1.0, directed=False, prefetch_node_feats=None, prefetch_edge_feats=None, ): super().__init__() self.g = g self.num_hops = num_hops self.sample_ratio = sample_ratio self.directed = directed self.prefetch_node_feats = prefetch_node_feats self.prefetch_edge_feats = prefetch_edge_feats def _double_radius_node_labeling(self, adj): N = adj.shape[0] adj_wo_src = adj[range(1, N), :][:, range(1, N)] idx = list(range(1)) + list(range(2, N)) adj_wo_dst = adj[idx, :][:, idx] dist2src = shortest_path( adj_wo_dst, directed=False, unweighted=True, indices=0 ) dist2src = np.insert(dist2src, 1, 0, axis=0) dist2src = torch.from_numpy(dist2src) dist2dst = shortest_path( adj_wo_src, directed=False, unweighted=True, indices=0 ) dist2dst = np.insert(dist2dst, 0, 0, axis=0) dist2dst = torch.from_numpy(dist2dst) dist = dist2src + dist2dst dist_over_2, dist_mod_2 = ( torch.div(dist, 2, rounding_mode="floor"), dist % 2, ) z = 1 + torch.min(dist2src, dist2dst) z += dist_over_2 * (dist_over_2 + dist_mod_2 - 1) z[0:2] = 1.0 # shortest path may include inf values z[torch.isnan(z)] = 0.0 return z.to(torch.long) def sample(self, aug_g, seed_edges): g = self.g subgraphs = [] # construct k-hop enclosing graph for each link for eid in seed_edges: src, dst = map(int, aug_g.find_edges(eid)) # construct the enclosing graph visited, nodes, fringe = [np.unique([src, dst]) for _ in range(3)] for _ in range(self.num_hops): if not self.directed: _, fringe = g.out_edges(fringe) else: _, out_neighbors = g.out_edges(fringe) in_neighbors, _ = g.in_edges(fringe) fringe = np.union1d(in_neighbors, out_neighbors) fringe = np.setdiff1d(fringe, visited) visited = np.union1d(visited, fringe) if self.sample_ratio < 1.0: fringe = np.random.choice( fringe, int(self.sample_ratio * len(fringe)), replace=False, ) if len(fringe) == 0: break nodes = np.union1d(nodes, fringe) subg = g.subgraph(nodes, store_ids=True) # remove edges to predict edges_to_remove = [ subg.edge_ids(s, t) for s, t in [(0, 1), (1, 0)] if subg.has_edges_between(s, t) ] subg.remove_edges(edges_to_remove) # add double radius node labeling subg.ndata["z"] = self._double_radius_node_labeling( subg.adj_external(scipy_fmt="csr") ) subg_aug = subg.add_self_loop() if "weight" in subg.edata: subg_aug.edata["weight"][subg.num_edges() :] = torch.ones( subg_aug.num_edges() - subg.num_edges() ) subgraphs.append(subg_aug) subgraphs = dgl.batch(subgraphs) dgl.set_src_lazy_features(subg_aug, self.prefetch_node_feats) dgl.set_edge_lazy_features(subg_aug, self.prefetch_edge_feats) return subgraphs, aug_g.edata["y"][seed_edges] # An end-to-end deep learning architecture for graph classification, AAAI-18. class DGCNN(torch.nn.Module): def __init__( self, hidden_channels, num_layers, k, GNN=GraphConv, feature_dim=0 ): super(DGCNN, self).__init__() self.feature_dim = feature_dim self.k = k self.sort_pool = SortPooling(k=k) self.max_z = 1000 self.z_embedding = Embedding(self.max_z, hidden_channels) self.convs = ModuleList() initial_channels = hidden_channels + self.feature_dim self.convs.append(GNN(initial_channels, hidden_channels)) for _ in range(0, num_layers - 1): self.convs.append(GNN(hidden_channels, hidden_channels)) self.convs.append(GNN(hidden_channels, 1)) conv1d_channels = [16, 32] total_latent_dim = hidden_channels * num_layers + 1 conv1d_kws = [total_latent_dim, 5] self.conv1 = Conv1d(1, conv1d_channels[0], conv1d_kws[0], conv1d_kws[0]) self.maxpool1d = MaxPool1d(2, 2) self.conv2 = Conv1d( conv1d_channels[0], conv1d_channels[1], conv1d_kws[1], 1 ) dense_dim = int((self.k - 2) / 2 + 1) dense_dim = (dense_dim - conv1d_kws[1] + 1) * conv1d_channels[1] self.lin1 = Linear(dense_dim, 128) self.lin2 = Linear(128, 1) def forward(self, g, z, x=None, edge_weight=None): z_emb = self.z_embedding(z) if z_emb.ndim == 3: # in case z has multiple integer labels z_emb = z_emb.sum(dim=1) if x is not None: x = torch.cat([z_emb, x.to(torch.float)], 1) else: x = z_emb xs = [x] for conv in self.convs: xs += [torch.tanh(conv(g, xs[-1], edge_weight=edge_weight))] x = torch.cat(xs[1:], dim=-1) # global pooling x = self.sort_pool(g, x) x = x.unsqueeze(1) # [num_graphs, 1, k * hidden] x = F.relu(self.conv1(x)) x = self.maxpool1d(x) x = F.relu(self.conv2(x)) x = x.view(x.size(0), -1) # [num_graphs, dense_dim] # MLP. x = F.relu(self.lin1(x)) x = F.dropout(x, p=0.5, training=self.training) x = self.lin2(x) return x def get_pos_neg_edges(split, split_edge, g, percent=100): pos_edge = split_edge[split]["edge"] if split == "train": neg_edge = torch.stack( global_uniform_negative_sampling( g, num_samples=pos_edge.size(0), exclude_self_loops=True ), dim=1, ) else: neg_edge = split_edge[split]["edge_neg"] # sampling according to the percent param np.random.seed(123) # pos sampling num_pos = pos_edge.size(0) perm = np.random.permutation(num_pos) perm = perm[: int(percent / 100 * num_pos)] pos_edge = pos_edge[perm] # neg sampling if neg_edge.dim() > 2: # [Np, Nn, 2] neg_edge = neg_edge[perm].view(-1, 2) else: np.random.seed(123) num_neg = neg_edge.size(0) perm = np.random.permutation(num_neg) perm = perm[: int(percent / 100 * num_neg)] neg_edge = neg_edge[perm] return pos_edge, neg_edge # ([2, Np], [2, Nn]) -> ([Np, 2], [Nn, 2]) def train(): model.train() loss_fnt = BCEWithLogitsLoss() total_loss = 0 total = 0 pbar = tqdm(train_loader, ncols=70) for gs, y in pbar: optimizer.zero_grad() logits = model( gs, gs.ndata["z"], gs.ndata.get("feat", None), edge_weight=gs.edata.get("weight", None), ) loss = loss_fnt(logits.view(-1), y.to(torch.float)) loss.backward() optimizer.step() total_loss += loss.item() * gs.batch_size total += gs.batch_size return total_loss / total @torch.no_grad() def test(): model.eval() y_pred, y_true = [], [] for gs, y in tqdm(val_loader, ncols=70): logits = model( gs, gs.ndata["z"], gs.ndata.get("feat", None), edge_weight=gs.edata.get("weight", None), ) y_pred.append(logits.view(-1).cpu()) y_true.append(y.view(-1).cpu().to(torch.float)) val_pred, val_true = torch.cat(y_pred), torch.cat(y_true) pos_val_pred = val_pred[val_true == 1] neg_val_pred = val_pred[val_true == 0] y_pred, y_true = [], [] for gs, y in tqdm(test_loader, ncols=70): logits = model( gs, gs.ndata["z"], gs.ndata.get("feat", None), edge_weight=gs.edata.get("weight", None), ) y_pred.append(logits.view(-1).cpu()) y_true.append(y.view(-1).cpu().to(torch.float)) test_pred, test_true = torch.cat(y_pred), torch.cat(y_true) pos_test_pred = test_pred[test_true == 1] neg_test_pred = test_pred[test_true == 0] if args.eval_metric == "hits": results = evaluate_hits( pos_val_pred, neg_val_pred, pos_test_pred, neg_test_pred ) elif args.eval_metric == "mrr": results = evaluate_mrr( pos_val_pred, neg_val_pred, pos_test_pred, neg_test_pred ) return results def evaluate_hits(pos_val_pred, neg_val_pred, pos_test_pred, neg_test_pred): results = {} for K in [20, 50, 100]: evaluator.K = K valid_hits = evaluator.eval( { "y_pred_pos": pos_val_pred, "y_pred_neg": neg_val_pred, } )[f"hits@{K}"] test_hits = evaluator.eval( { "y_pred_pos": pos_test_pred, "y_pred_neg": neg_test_pred, } )[f"hits@{K}"] results[f"Hits@{K}"] = (valid_hits, test_hits) return results def evaluate_mrr(pos_val_pred, neg_val_pred, pos_test_pred, neg_test_pred): print( pos_val_pred.size(), neg_val_pred.size(), pos_test_pred.size(), neg_test_pred.size(), ) neg_val_pred = neg_val_pred.view(pos_val_pred.shape[0], -1) neg_test_pred = neg_test_pred.view(pos_test_pred.shape[0], -1) results = {} valid_mrr = ( evaluator.eval( { "y_pred_pos": pos_val_pred, "y_pred_neg": neg_val_pred, } )["mrr_list"] .mean() .item() ) test_mrr = ( evaluator.eval( { "y_pred_pos": pos_test_pred, "y_pred_neg": neg_test_pred, } )["mrr_list"] .mean() .item() ) results["MRR"] = (valid_mrr, test_mrr) return results if __name__ == "__main__": # Data settings parser = argparse.ArgumentParser(description="OGBL (SEAL)") parser.add_argument("--dataset", type=str, default="ogbl-collab") # GNN settings parser.add_argument("--sortpool_k", type=float, default=0.6) parser.add_argument("--num_layers", type=int, default=3) parser.add_argument("--hidden_channels", type=int, default=32) parser.add_argument("--batch_size", type=int, default=32) # Subgraph extraction settings parser.add_argument("--ratio_per_hop", type=float, default=1.0) parser.add_argument( "--use_feature", action="store_true", help="whether to use raw node features as GNN input", ) parser.add_argument( "--use_edge_weight", action="store_true", help="whether to consider edge weight in GNN", ) # Training settings parser.add_argument("--lr", type=float, default=0.0001) parser.add_argument("--epochs", type=int, default=50) parser.add_argument("--runs", type=int, default=10) parser.add_argument("--train_percent", type=float, default=100) parser.add_argument("--val_percent", type=float, default=100) parser.add_argument("--test_percent", type=float, default=100) parser.add_argument( "--num_workers", type=int, default=8, help="number of workers for dynamic dataloaders", ) # Testing settings parser.add_argument("--use_valedges_as_input", action="store_true") parser.add_argument("--eval_steps", type=int, default=1) args = parser.parse_args() data_appendix = "_rph{}".format("".join(str(args.ratio_per_hop).split("."))) if args.use_valedges_as_input: data_appendix += "_uvai" args.res_dir = os.path.join( "results/{}_{}".format(args.dataset, time.strftime("%Y%m%d%H%M%S")) ) print("Results will be saved in " + args.res_dir) if not os.path.exists(args.res_dir): os.makedirs(args.res_dir) log_file = os.path.join(args.res_dir, "log.txt") # Save command line input. cmd_input = "python " + " ".join(sys.argv) + "\n" with open(os.path.join(args.res_dir, "cmd_input.txt"), "a") as f: f.write(cmd_input) print("Command line input: " + cmd_input + " is saved.") with open(log_file, "a") as f: f.write("\n" + cmd_input) dataset = DglLinkPropPredDataset(name=args.dataset) split_edge = dataset.get_edge_split() graph = dataset[0] # re-format the data of citation2 if args.dataset == "ogbl-citation2": for k in ["train", "valid", "test"]: src = split_edge[k]["source_node"] tgt = split_edge[k]["target_node"] split_edge[k]["edge"] = torch.stack([src, tgt], dim=1) if k != "train": tgt_neg = split_edge[k]["target_node_neg"] split_edge[k]["edge_neg"] = torch.stack( [src[:, None].repeat(1, tgt_neg.size(1)), tgt_neg], dim=-1 ) # [Ns, Nt, 2] # reconstruct the graph for ogbl-collab data for validation edge augmentation and coalesce if args.dataset == "ogbl-collab": graph.edata.pop("year") # float edata for to_simple transform graph.edata["weight"] = graph.edata["weight"].to(torch.float) if args.use_valedges_as_input: val_edges = split_edge["valid"]["edge"] row, col = val_edges.t() val_weights = torch.ones(size=(val_edges.size(0), 1)) graph.add_edges( torch.cat([row, col]), torch.cat([col, row]), {"weight": val_weights}, ) graph = graph.to_simple(copy_edata=True, aggregator="sum") if not args.use_edge_weight and "weight" in graph.edata: graph.edata.pop("weight") if not args.use_feature and "feat" in graph.ndata: graph.ndata.pop("feat") if args.dataset.startswith("ogbl-citation"): args.eval_metric = "mrr" directed = True else: args.eval_metric = "hits" directed = False evaluator = Evaluator(name=args.dataset) if args.eval_metric == "hits": loggers = { "Hits@20": Logger(args.runs, args), "Hits@50": Logger(args.runs, args), "Hits@100": Logger(args.runs, args), } elif args.eval_metric == "mrr": loggers = { "MRR": Logger(args.runs, args), } device = torch.device("cuda" if torch.cuda.is_available() else "cpu") path = dataset.root + "_seal{}".format(data_appendix) loaders = [] prefetch_node_feats = ["feat"] if "feat" in graph.ndata else None prefetch_edge_feats = ["weight"] if "weight" in graph.edata else None train_edge, train_edge_neg = get_pos_neg_edges( "train", split_edge, graph, args.train_percent ) val_edge, val_edge_neg = get_pos_neg_edges( "valid", split_edge, graph, args.val_percent ) test_edge, test_edge_neg = get_pos_neg_edges( "test", split_edge, graph, args.test_percent ) # create an augmented graph for sampling aug_g = dgl.graph(graph.edges()) aug_g.edata["y"] = torch.ones(aug_g.num_edges()) aug_edges = torch.cat( [val_edge, test_edge, train_edge_neg, val_edge_neg, test_edge_neg] ) aug_labels = torch.cat( [ torch.ones(len(val_edge) + len(test_edge)), torch.zeros( len(train_edge_neg) + len(val_edge_neg) + len(test_edge_neg) ), ] ) aug_g.add_edges(aug_edges[:, 0], aug_edges[:, 1], {"y": aug_labels}) # eids for sampling split_len = [graph.num_edges()] + list( map( len, [val_edge, test_edge, train_edge_neg, val_edge_neg, test_edge_neg], ) ) train_eids = torch.cat( [ graph.edge_ids(train_edge[:, 0], train_edge[:, 1]), torch.arange(sum(split_len[:3]), sum(split_len[:4])), ] ) val_eids = torch.cat( [ torch.arange(sum(split_len[:1]), sum(split_len[:2])), torch.arange(sum(split_len[:4]), sum(split_len[:5])), ] ) test_eids = torch.cat( [ torch.arange(sum(split_len[:2]), sum(split_len[:3])), torch.arange(sum(split_len[:5]), sum(split_len[:6])), ] ) sampler = SealSampler( graph, 1, args.ratio_per_hop, directed, prefetch_node_feats, prefetch_edge_feats, ) # force to be dynamic for consistent dataloading for split, shuffle, eids in zip( ["train", "valid", "test"], [True, False, False], [train_eids, val_eids, test_eids], ): data_loader = DataLoader( aug_g, eids, sampler, shuffle=shuffle, device=device, batch_size=args.batch_size, num_workers=args.num_workers, ) loaders.append(data_loader) train_loader, val_loader, test_loader = loaders # convert sortpool_k from percentile to number. num_nodes = [] for subgs, _ in train_loader: subgs = dgl.unbatch(subgs) if len(num_nodes) > 1000: break for subg in subgs: num_nodes.append(subg.num_nodes()) num_nodes = sorted(num_nodes) k = num_nodes[int(math.ceil(args.sortpool_k * len(num_nodes))) - 1] k = max(k, 10) for run in range(args.runs): model = DGCNN( args.hidden_channels, args.num_layers, k, feature_dim=graph.ndata["feat"].size(1) if args.use_feature else 0, ).to(device) parameters = list(model.parameters()) optimizer = torch.optim.Adam(params=parameters, lr=args.lr) total_params = sum(p.numel() for param in parameters for p in param) print(f"Total number of parameters is {total_params}") print(f"SortPooling k is set to {k}") with open(log_file, "a") as f: print(f"Total number of parameters is {total_params}", file=f) print(f"SortPooling k is set to {k}", file=f) start_epoch = 1 # Training starts for epoch in range(start_epoch, start_epoch + args.epochs): loss = train() if epoch % args.eval_steps == 0: results = test() for key, result in results.items(): loggers[key].add_result(run, result) model_name = os.path.join( args.res_dir, "run{}_model_checkpoint{}.pth".format(run + 1, epoch), ) optimizer_name = os.path.join( args.res_dir, "run{}_optimizer_checkpoint{}.pth".format(run + 1, epoch), ) torch.save(model.state_dict(), model_name) torch.save(optimizer.state_dict(), optimizer_name) for key, result in results.items(): valid_res, test_res = result to_print = ( f"Run: {run + 1:02d}, Epoch: {epoch:02d}, " + f"Loss: {loss:.4f}, Valid: {100 * valid_res:.2f}%, " + f"Test: {100 * test_res:.2f}%" ) print(key) print(to_print) with open(log_file, "a") as f: print(key, file=f) print(to_print, file=f) for key in loggers.keys(): print(key) loggers[key].print_statistics(run) with open(log_file, "a") as f: print(key, file=f) loggers[key].print_statistics(run, f=f) for key in loggers.keys(): print(key) loggers[key].print_statistics() with open(log_file, "a") as f: print(key, file=f) loggers[key].print_statistics(f=f) print(f"Total number of parameters is {total_params}") print(f"Results are saved in {args.res_dir}") ================================================ FILE: examples/pytorch/ogb/sign/.gitignore ================================================ dataset ================================================ FILE: examples/pytorch/ogb/sign/README.md ================================================ SIGN: Scalable Inception Graph Neural Network ========================== Paper: [https://arxiv.org/abs/2004.11198](https://arxiv.org/abs/2004.11198) Dependencies ------------ - pytorch 1.5 - dgl 0.5 nightly build - `pip install --pre dgl` - ogb 1.2.3 How to run ------------- ### ogbn-products ```python python3 sign.py --dataset ogbn-products --eval-ev 10 --R 5 --input-d 0.3 --num-h 512 \ --dr 0.4 --lr 0.001 --batch-size 50000 --num-runs 10 ``` ### ogbn-arxiv ```python python3 sign.py --dataset ogbn-arxiv --eval-ev 10 --R 5 --input-d 0.1 --num-h 512 \ --dr 0.5 --lr 0.001 --eval-b 100000 --num-runs 10 ``` ### ogbn-mag ogbn-mag is a heterogeneous graph and the task is to predict publishing venue of papers. Since SIGN model is designed for homogeneous graph, we simply ignore heterogeneous information (i.e. node and edge types) and treat the graph as a homogeneous one. For node types that don't have input feature, we featurize them with the average of their neighbors' features. ```python python3 sign.py --dataset ogbn-mag --eval-ev 10 --R 5 --input-d 0 --num-h 512 \ --dr 0.5 --lr 0.001 --batch-size 50000 --num-runs 10 ``` Results ---------- Table below shows the average and standard deviation (over 10 times) of accuracy. Experiments were performed on Tesla T4 (15GB) GPU on Oct 29. | Dataset | Test Accuracy | Validation Accuracy | # Params | | :-------------: | :-------------: | :-------------------: | :---------: | | ogbn-products | 0.8052±0.0016 | 0.9299±0.0004 | 3,483,703 | | ogbn-arxiv | 0.7195±0.0011 | 0.7323±0.0006 | 3,566,128 | | ogbn-mag | 0.4046±0.0012 | 0.4068±0.0010 | 3,724,645 | ================================================ FILE: examples/pytorch/ogb/sign/dataset.py ================================================ import dgl import dgl.function as fn import numpy as np import torch from ogb.nodeproppred import DglNodePropPredDataset, Evaluator def get_ogb_evaluator(dataset): """ Get evaluator from Open Graph Benchmark based on dataset """ evaluator = Evaluator(name=dataset) return lambda preds, labels: evaluator.eval( { "y_true": labels.view(-1, 1), "y_pred": preds.view(-1, 1), } )["acc"] def convert_mag_to_homograph(g, device): """ Featurize node types that don't have input features (i.e. author, institution, field_of_study) by averaging their neighbor features. Then convert the graph to a undirected homogeneous graph. """ src_writes, dst_writes = g.all_edges(etype="writes") src_topic, dst_topic = g.all_edges(etype="has_topic") src_aff, dst_aff = g.all_edges(etype="affiliated_with") new_g = dgl.heterograph( { ("paper", "written", "author"): (dst_writes, src_writes), ("paper", "has_topic", "field"): (src_topic, dst_topic), ("author", "aff", "inst"): (src_aff, dst_aff), } ) new_g = new_g.to(device) new_g.nodes["paper"].data["feat"] = g.nodes["paper"].data["feat"] new_g["written"].update_all(fn.copy_u("feat", "m"), fn.mean("m", "feat")) new_g["has_topic"].update_all(fn.copy_u("feat", "m"), fn.mean("m", "feat")) new_g["aff"].update_all(fn.copy_u("feat", "m"), fn.mean("m", "feat")) g.nodes["author"].data["feat"] = new_g.nodes["author"].data["feat"] g.nodes["institution"].data["feat"] = new_g.nodes["inst"].data["feat"] g.nodes["field_of_study"].data["feat"] = new_g.nodes["field"].data["feat"] # Convert to homogeneous graph # Get DGL type id for paper type target_type_id = g.get_ntype_id("paper") g = dgl.to_homogeneous(g, ndata=["feat"]) g = dgl.add_reverse_edges(g, copy_ndata=True) # Mask for paper nodes g.ndata["target_mask"] = g.ndata[dgl.NTYPE] == target_type_id return g def load_dataset(name, device): """ Load dataset and move graph and features to device """ if name not in ["ogbn-products", "ogbn-arxiv", "ogbn-mag"]: raise RuntimeError("Dataset {} is not supported".format(name)) dataset = DglNodePropPredDataset(name=name) splitted_idx = dataset.get_idx_split() train_nid = splitted_idx["train"] val_nid = splitted_idx["valid"] test_nid = splitted_idx["test"] g, labels = dataset[0] g = g.to(device) if name == "ogbn-arxiv": g = dgl.add_reverse_edges(g, copy_ndata=True) g = dgl.add_self_loop(g) g.ndata["feat"] = g.ndata["feat"].float() elif name == "ogbn-mag": # MAG is a heterogeneous graph. The task is to make prediction for # paper nodes labels = labels["paper"] train_nid = train_nid["paper"] val_nid = val_nid["paper"] test_nid = test_nid["paper"] g = convert_mag_to_homograph(g, device) else: g.ndata["feat"] = g.ndata["feat"].float() n_classes = dataset.num_classes labels = labels.squeeze() evaluator = get_ogb_evaluator(name) print( f"# Nodes: {g.num_nodes()}\n" f"# Edges: {g.num_edges()}\n" f"# Train: {len(train_nid)}\n" f"# Val: {len(val_nid)}\n" f"# Test: {len(test_nid)}\n" f"# Classes: {n_classes}" ) return g, labels, n_classes, train_nid, val_nid, test_nid, evaluator ================================================ FILE: examples/pytorch/ogb/sign/sign.py ================================================ import argparse import time import dgl import dgl.function as fn import numpy as np import torch import torch.nn as nn from dataset import load_dataset class FeedForwardNet(nn.Module): def __init__(self, in_feats, hidden, out_feats, n_layers, dropout): super(FeedForwardNet, self).__init__() self.layers = nn.ModuleList() self.n_layers = n_layers if n_layers == 1: self.layers.append(nn.Linear(in_feats, out_feats)) else: self.layers.append(nn.Linear(in_feats, hidden)) for i in range(n_layers - 2): self.layers.append(nn.Linear(hidden, hidden)) self.layers.append(nn.Linear(hidden, out_feats)) if self.n_layers > 1: self.prelu = nn.PReLU() self.dropout = nn.Dropout(dropout) self.reset_parameters() def reset_parameters(self): gain = nn.init.calculate_gain("relu") for layer in self.layers: nn.init.xavier_uniform_(layer.weight, gain=gain) nn.init.zeros_(layer.bias) def forward(self, x): for layer_id, layer in enumerate(self.layers): x = layer(x) if layer_id < self.n_layers - 1: x = self.dropout(self.prelu(x)) return x class SIGN(nn.Module): def __init__( self, in_feats, hidden, out_feats, num_hops, n_layers, dropout, input_drop, ): super(SIGN, self).__init__() self.dropout = nn.Dropout(dropout) self.prelu = nn.PReLU() self.inception_ffs = nn.ModuleList() self.input_drop = nn.Dropout(input_drop) for hop in range(num_hops): self.inception_ffs.append( FeedForwardNet(in_feats, hidden, hidden, n_layers, dropout) ) self.project = FeedForwardNet( num_hops * hidden, hidden, out_feats, n_layers, dropout ) def forward(self, feats): feats = [self.input_drop(feat) for feat in feats] hidden = [] for feat, ff in zip(feats, self.inception_ffs): hidden.append(ff(feat)) out = self.project(self.dropout(self.prelu(torch.cat(hidden, dim=-1)))) return out def reset_parameters(self): for ff in self.inception_ffs: ff.reset_parameters() self.project.reset_parameters() def get_n_params(model): pp = 0 for p in list(model.parameters()): nn = 1 for s in list(p.size()): nn = nn * s pp += nn return pp def neighbor_average_features(g, args): """ Compute multi-hop neighbor-averaged node features """ print("Compute neighbor-averaged feats") g.ndata["feat_0"] = g.ndata["feat"] for hop in range(1, args.R + 1): g.update_all( fn.copy_u(f"feat_{hop-1}", "msg"), fn.mean("msg", f"feat_{hop}") ) res = [] for hop in range(args.R + 1): res.append(g.ndata.pop(f"feat_{hop}")) if args.dataset == "ogbn-mag": # For MAG dataset, only return features for target node types (i.e. # paper nodes) target_mask = g.ndata["target_mask"] target_ids = g.ndata[dgl.NID][target_mask] num_target = target_mask.sum().item() new_res = [] for x in res: feat = torch.zeros( (num_target,) + x.shape[1:], dtype=x.dtype, device=x.device ) feat[target_ids] = x[target_mask] new_res.append(feat) res = new_res return res def prepare_data(device, args): """ Load dataset and compute neighbor-averaged node features used by SIGN model """ data = load_dataset(args.dataset, device) g, labels, n_classes, train_nid, val_nid, test_nid, evaluator = data in_feats = g.ndata["feat"].shape[1] feats = neighbor_average_features(g, args) labels = labels.to(device) # move to device train_nid = train_nid.to(device) val_nid = val_nid.to(device) test_nid = test_nid.to(device) return ( feats, labels, in_feats, n_classes, train_nid, val_nid, test_nid, evaluator, ) def train(model, feats, labels, loss_fcn, optimizer, train_loader): model.train() device = labels.device for batch in train_loader: batch_feats = [x[batch].to(device) for x in feats] loss = loss_fcn(model(batch_feats), labels[batch]) optimizer.zero_grad() loss.backward() optimizer.step() def test( model, feats, labels, test_loader, evaluator, train_nid, val_nid, test_nid ): model.eval() device = labels.device preds = [] for batch in test_loader: batch_feats = [feat[batch].to(device) for feat in feats] preds.append(torch.argmax(model(batch_feats), dim=-1)) # Concat mini-batch prediction results along node dimension preds = torch.cat(preds, dim=0) train_res = evaluator(preds[train_nid], labels[train_nid]) val_res = evaluator(preds[val_nid], labels[val_nid]) test_res = evaluator(preds[test_nid], labels[test_nid]) return train_res, val_res, test_res def run(args, data, device): ( feats, labels, in_size, num_classes, train_nid, val_nid, test_nid, evaluator, ) = data train_loader = torch.utils.data.DataLoader( train_nid, batch_size=args.batch_size, shuffle=True, drop_last=False ) test_loader = torch.utils.data.DataLoader( torch.arange(labels.shape[0]), batch_size=args.eval_batch_size, shuffle=False, drop_last=False, ) # Initialize model and optimizer for each run num_hops = args.R + 1 model = SIGN( in_size, args.num_hidden, num_classes, num_hops, args.ff_layer, args.dropout, args.input_dropout, ) model = model.to(device) print("# Params:", get_n_params(model)) loss_fcn = nn.CrossEntropyLoss() optimizer = torch.optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.weight_decay ) # Start training best_epoch = 0 best_val = 0 best_test = 0 for epoch in range(1, args.num_epochs + 1): start = time.time() train(model, feats, labels, loss_fcn, optimizer, train_loader) if epoch % args.eval_every == 0: with torch.no_grad(): acc = test( model, feats, labels, test_loader, evaluator, train_nid, val_nid, test_nid, ) end = time.time() log = "Epoch {}, Time(s): {:.4f}, ".format(epoch, end - start) log += "Acc: Train {:.4f}, Val {:.4f}, Test {:.4f}".format(*acc) print(log) if acc[1] > best_val: best_epoch = epoch best_val = acc[1] best_test = acc[2] print( "Best Epoch {}, Val {:.4f}, Test {:.4f}".format( best_epoch, best_val, best_test ) ) return best_val, best_test def main(args): if args.gpu < 0: device = "cpu" else: device = "cuda:{}".format(args.gpu) with torch.no_grad(): data = prepare_data(device, args) val_accs = [] test_accs = [] for i in range(args.num_runs): print(f"Run {i} start training") best_val, best_test = run(args, data, device) val_accs.append(best_val) test_accs.append(best_test) print( f"Average val accuracy: {np.mean(val_accs):.4f}, " f"std: {np.std(val_accs):.4f}" ) print( f"Average test accuracy: {np.mean(test_accs):.4f}, " f"std: {np.std(test_accs):.4f}" ) if __name__ == "__main__": parser = argparse.ArgumentParser(description="SIGN") parser.add_argument("--num-epochs", type=int, default=1000) parser.add_argument("--num-hidden", type=int, default=512) parser.add_argument("--R", type=int, default=5, help="number of hops") parser.add_argument("--lr", type=float, default=0.001) parser.add_argument("--dataset", type=str, default="ogbn-mag") parser.add_argument( "--dropout", type=float, default=0.5, help="dropout on activation" ) parser.add_argument("--gpu", type=int, default=0) parser.add_argument("--weight-decay", type=float, default=0) parser.add_argument("--eval-every", type=int, default=10) parser.add_argument("--batch-size", type=int, default=50000) parser.add_argument( "--eval-batch-size", type=int, default=100000, help="evaluation batch size", ) parser.add_argument( "--ff-layer", type=int, default=2, help="number of feed-forward layers" ) parser.add_argument( "--input-dropout", type=float, default=0, help="dropout on input features", ) parser.add_argument( "--num-runs", type=int, default=10, help="number of times to repeat the experiment", ) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/pytorch/ogb_lsc/MAG240M/README.md ================================================ # Baseline Code for MAG240M The code is ported from the R-GAT examples [here](https://github.com/snap-stanford/ogb/tree/master/examples/lsc/mag240m). Please refer to the [OGB-LSC paper](https://arxiv.org/abs/2103.09430) for the detailed setting. ## Installation Requirements ``` ogb>=1.3.0 torch>=1.7.0 ``` ## Running Preprocessing Script ``` python preprocess.py \ --rootdir . \ --author-output-path ./author.npy \ --inst-output-path ./inst.npy \ --graph-output-path ./graph.dgl \ --graph-as-homogeneous \ --full-output-path ./full.npy ``` This will give you the following files: * `author.npy`: The author features, preprocessed by averaging the neighboring paper features. * `inst.npy`: The institution features, preprocessed by averaging the neighboring author features. * `graph.dgl`: The *homogenized* DGL graph stored in CSC format, which is friendly for neighbor sampling. Edge types are stored on the edges as an `int8` feature. Nodes are in the order of author, institution, and paper. * `full.npy`: The concatenated author, institution, and paper features. Since that will usually take a long time, we also offer the above files for download: * [`author.npy`](https://dgl-data.s3-accelerate.amazonaws.com/dataset/OGB-LSC/author.npy) * [`inst.npy`](https://dgl-data.s3-accelerate.amazonaws.com/dataset/OGB-LSC/inst.npy) * [`graph.dgl`](https://dgl-data.s3-accelerate.amazonaws.com/dataset/OGB-LSC/graph.dgl) * [`full.npy`](https://dgl-data.s3-accelerate.amazonaws.com/dataset/OGB-LSC/full.npy) In addition, we offer * [`full_feat.npy`](https://dgl-data.s3-accelerate.amazonaws.com/dataset/OGB-LSC/full_feat.npy): The preprocessed full feature matrix for running OGB's own baseline. Note that the features are concatenated in the order of paper, author, and institution, unlike the one in our baseline code. It is also preprocessed in float32 arithmetics instead of float16 arithmetics. ## Running Training Script ``` python train.py \ --rootdir . \ --graph-preprocess-path ./graph.dgl \ --full-preprocess-path ./full.npy ``` The validation accuracy is 0.701. We do not have ground truth test labels so we do not report test accuracy. ## Hardware configurations We successfully run 8 experiments in parallel on an AWS p4d.24x large instance with the preprocessed feature matrices stored on an NVMe SSD to enable fast disk read. Each experiment requires less than 128GB CPU memory and less than 12GB GPU memory to run. Every epoch takes around 6 minutes 30 seconds to train and 1 minutes 40 seconds to validate. If your hard drive is slow, it is best to load all the features into memory for a reasonable training speed. The CPU memory consumption will go up to as large as 512GB though. ================================================ FILE: examples/pytorch/ogb_lsc/MAG240M/preprocess.py ================================================ import argparse import os import dgl import dgl.function as fn import numpy as np import ogb import torch import tqdm from ogb.lsc import MAG240MDataset parser = argparse.ArgumentParser() parser.add_argument( "--rootdir", type=str, default=".", help="Directory to download the OGB dataset.", ) parser.add_argument( "--author-output-path", type=str, help="Path to store the author features." ) parser.add_argument( "--inst-output-path", type=str, help="Path to store the institution features.", ) parser.add_argument( "--graph-output-path", type=str, help="Path to store the graph." ) parser.add_argument( "--graph-format", type=str, default="csc", help="Graph format (coo, csr or csc).", ) parser.add_argument( "--graph-as-homogeneous", action="store_true", help="Store the graph as DGL homogeneous graph.", ) parser.add_argument( "--full-output-path", type=str, help="Path to store features of all nodes. Effective only when graph is homogeneous.", ) args = parser.parse_args() print("Building graph") dataset = MAG240MDataset(root=args.rootdir) ei_writes = dataset.edge_index("author", "writes", "paper") ei_cites = dataset.edge_index("paper", "paper") ei_affiliated = dataset.edge_index("author", "institution") # We sort the nodes starting with the papers, then the authors, then the institutions. author_offset = 0 inst_offset = author_offset + dataset.num_authors paper_offset = inst_offset + dataset.num_institutions g = dgl.heterograph( { ("author", "write", "paper"): (ei_writes[0], ei_writes[1]), ("paper", "write-by", "author"): (ei_writes[1], ei_writes[0]), ("author", "affiliate-with", "institution"): ( ei_affiliated[0], ei_affiliated[1], ), ("institution", "affiliate", "author"): ( ei_affiliated[1], ei_affiliated[0], ), ("paper", "cite", "paper"): ( np.concatenate([ei_cites[0], ei_cites[1]]), np.concatenate([ei_cites[1], ei_cites[0]]), ), } ) paper_feat = dataset.paper_feat author_feat = np.memmap( args.author_output_path, mode="w+", dtype="float16", shape=(dataset.num_authors, dataset.num_paper_features), ) inst_feat = np.memmap( args.inst_output_path, mode="w+", dtype="float16", shape=(dataset.num_institutions, dataset.num_paper_features), ) # Iteratively process author features along the feature dimension. BLOCK_COLS = 16 with tqdm.trange(0, dataset.num_paper_features, BLOCK_COLS) as tq: for start in tq: tq.set_postfix_str("Reading paper features...") g.nodes["paper"].data["x"] = torch.FloatTensor( paper_feat[:, start : start + BLOCK_COLS].astype("float32") ) # Compute author features... tq.set_postfix_str("Computing author features...") g.update_all(fn.copy_u("x", "m"), fn.mean("m", "x"), etype="write-by") # Then institution features... tq.set_postfix_str("Computing institution features...") g.update_all( fn.copy_u("x", "m"), fn.mean("m", "x"), etype="affiliate-with" ) tq.set_postfix_str("Writing author features...") author_feat[:, start : start + BLOCK_COLS] = ( g.nodes["author"].data["x"].numpy().astype("float16") ) tq.set_postfix_str("Writing institution features...") inst_feat[:, start : start + BLOCK_COLS] = ( g.nodes["institution"].data["x"].numpy().astype("float16") ) del g.nodes["paper"].data["x"] del g.nodes["author"].data["x"] del g.nodes["institution"].data["x"] author_feat.flush() inst_feat.flush() # Convert to homogeneous if needed. (The RGAT baseline needs homogeneous graph) if args.graph_as_homogeneous: # Process graph g = dgl.to_homogeneous(g) # DGL ensures that nodes with the same type are put together with the order preserved. # DGL also ensures that the node types are sorted in ascending order. assert torch.equal( g.ndata[dgl.NTYPE], torch.cat( [ torch.full((dataset.num_authors,), 0), torch.full((dataset.num_institutions,), 1), torch.full((dataset.num_papers,), 2), ] ), ) assert torch.equal( g.ndata[dgl.NID], torch.cat( [ torch.arange(dataset.num_authors), torch.arange(dataset.num_institutions), torch.arange(dataset.num_papers), ] ), ) g.edata["etype"] = g.edata[dgl.ETYPE].byte() del g.edata[dgl.ETYPE] del g.ndata[dgl.NTYPE] del g.ndata[dgl.NID] # Process feature full_feat = np.memmap( args.full_output_path, mode="w+", dtype="float16", shape=( dataset.num_authors + dataset.num_institutions + dataset.num_papers, dataset.num_paper_features, ), ) BLOCK_ROWS = 100000 for start in tqdm.trange(0, dataset.num_authors, BLOCK_ROWS): end = min(dataset.num_authors, start + BLOCK_ROWS) full_feat[author_offset + start : author_offset + end] = author_feat[ start:end ] for start in tqdm.trange(0, dataset.num_institutions, BLOCK_ROWS): end = min(dataset.num_institutions, start + BLOCK_ROWS) full_feat[inst_offset + start : inst_offset + end] = inst_feat[ start:end ] for start in tqdm.trange(0, dataset.num_papers, BLOCK_ROWS): end = min(dataset.num_papers, start + BLOCK_ROWS) full_feat[paper_offset + start : paper_offset + end] = paper_feat[ start:end ] # Convert the graph to the given format and save. (The RGAT baseline needs CSC graph) g = g.formats(args.graph_format) dgl.save_graphs(args.graph_output_path, g) ================================================ FILE: examples/pytorch/ogb_lsc/MAG240M/train.py ================================================ #!/usr/bin/env python # coding: utf-8 import argparse import time import dgl import dgl.function as fn import dgl.nn as dglnn import numpy as np import ogb import torch import torch.nn as nn import torch.nn.functional as F import tqdm from ogb.lsc import MAG240MDataset, MAG240MEvaluator class RGAT(nn.Module): def __init__( self, in_channels, out_channels, hidden_channels, num_etypes, num_layers, num_heads, dropout, pred_ntype, ): super().__init__() self.convs = nn.ModuleList() self.norms = nn.ModuleList() self.skips = nn.ModuleList() self.convs.append( nn.ModuleList( [ dglnn.GATConv( in_channels, hidden_channels // num_heads, num_heads, allow_zero_in_degree=True, ) for _ in range(num_etypes) ] ) ) self.norms.append(nn.BatchNorm1d(hidden_channels)) self.skips.append(nn.Linear(in_channels, hidden_channels)) for _ in range(num_layers - 1): self.convs.append( nn.ModuleList( [ dglnn.GATConv( hidden_channels, hidden_channels // num_heads, num_heads, allow_zero_in_degree=True, ) for _ in range(num_etypes) ] ) ) self.norms.append(nn.BatchNorm1d(hidden_channels)) self.skips.append(nn.Linear(hidden_channels, hidden_channels)) self.mlp = nn.Sequential( nn.Linear(hidden_channels, hidden_channels), nn.BatchNorm1d(hidden_channels), nn.ReLU(), nn.Dropout(dropout), nn.Linear(hidden_channels, out_channels), ) self.dropout = nn.Dropout(dropout) self.hidden_channels = hidden_channels self.pred_ntype = pred_ntype self.num_etypes = num_etypes def forward(self, mfgs, x): for i in range(len(mfgs)): mfg = mfgs[i] x_dst = x[: mfg.num_dst_nodes()] n_src = mfg.num_src_nodes() n_dst = mfg.num_dst_nodes() mfg = dgl.block_to_graph(mfg) x_skip = self.skips[i](x_dst) for j in range(self.num_etypes): subg = mfg.edge_subgraph( mfg.edata["etype"] == j, relabel_nodes=False ) x_skip += self.convs[i][j](subg, (x, x_dst)).view( -1, self.hidden_channels ) x = self.norms[i](x_skip) x = F.elu(x) x = self.dropout(x) return self.mlp(x) class ExternalNodeCollator(dgl.dataloading.NodeCollator): def __init__(self, g, idx, sampler, offset, feats, label): super().__init__(g, idx, sampler) self.offset = offset self.feats = feats self.label = label def collate(self, items): input_nodes, output_nodes, mfgs = super().collate(items) # Copy input features mfgs[0].srcdata["x"] = torch.FloatTensor(self.feats[input_nodes]) mfgs[-1].dstdata["y"] = torch.LongTensor( self.label[output_nodes - self.offset] ) return input_nodes, output_nodes, mfgs def train(args, dataset, g, feats, paper_offset): print("Loading masks and labels") train_idx = torch.LongTensor(dataset.get_idx_split("train")) + paper_offset valid_idx = torch.LongTensor(dataset.get_idx_split("valid")) + paper_offset label = dataset.paper_label print("Initializing dataloader...") sampler = dgl.dataloading.MultiLayerNeighborSampler([15, 25]) train_collator = ExternalNodeCollator( g, train_idx, sampler, paper_offset, feats, label ) valid_collator = ExternalNodeCollator( g, valid_idx, sampler, paper_offset, feats, label ) train_dataloader = torch.utils.data.DataLoader( train_collator.dataset, batch_size=1024, shuffle=True, drop_last=False, collate_fn=train_collator.collate, num_workers=4, ) valid_dataloader = torch.utils.data.DataLoader( valid_collator.dataset, batch_size=1024, shuffle=True, drop_last=False, collate_fn=valid_collator.collate, num_workers=2, ) print("Initializing model...") model = RGAT( dataset.num_paper_features, dataset.num_classes, 1024, 5, 2, 4, 0.5, "paper", ).cuda() opt = torch.optim.Adam(model.parameters(), lr=0.001) sched = torch.optim.lr_scheduler.StepLR(opt, step_size=25, gamma=0.25) best_acc = 0 for _ in range(args.epochs): model.train() with tqdm.tqdm(train_dataloader) as tq: for i, (input_nodes, output_nodes, mfgs) in enumerate(tq): mfgs = [g.to("cuda") for g in mfgs] x = mfgs[0].srcdata["x"] y = mfgs[-1].dstdata["y"] y_hat = model(mfgs, x) loss = F.cross_entropy(y_hat, y) opt.zero_grad() loss.backward() opt.step() acc = (y_hat.argmax(1) == y).float().mean() tq.set_postfix( {"loss": "%.4f" % loss.item(), "acc": "%.4f" % acc.item()}, refresh=False, ) model.eval() correct = total = 0 for i, (input_nodes, output_nodes, mfgs) in enumerate( tqdm.tqdm(valid_dataloader) ): with torch.no_grad(): mfgs = [g.to("cuda") for g in mfgs] x = mfgs[0].srcdata["x"] y = mfgs[-1].dstdata["y"] y_hat = model(mfgs, x) correct += (y_hat.argmax(1) == y).sum().item() total += y_hat.shape[0] acc = correct / total print("Validation accuracy:", acc) sched.step() if best_acc < acc: best_acc = acc print("Updating best model...") torch.save(model.state_dict(), args.model_path) def test(args, dataset, g, feats, paper_offset): print("Loading masks and labels...") valid_idx = torch.LongTensor(dataset.get_idx_split("valid")) + paper_offset test_idx = torch.LongTensor(dataset.get_idx_split("test")) + paper_offset label = dataset.paper_label print("Initializing data loader...") sampler = dgl.dataloading.MultiLayerNeighborSampler([160, 160]) valid_collator = ExternalNodeCollator( g, valid_idx, sampler, paper_offset, feats, label ) valid_dataloader = torch.utils.data.DataLoader( valid_collator.dataset, batch_size=16, shuffle=False, drop_last=False, collate_fn=valid_collator.collate, num_workers=2, ) test_collator = ExternalNodeCollator( g, test_idx, sampler, paper_offset, feats, label ) test_dataloader = torch.utils.data.DataLoader( test_collator.dataset, batch_size=16, shuffle=False, drop_last=False, collate_fn=test_collator.collate, num_workers=4, ) print("Loading model...") model = RGAT( dataset.num_paper_features, dataset.num_classes, 1024, 5, 2, 4, 0.5, "paper", ).cuda() model.load_state_dict(torch.load(args.model_path, weights_only=False)) model.eval() correct = total = 0 for i, (input_nodes, output_nodes, mfgs) in enumerate( tqdm.tqdm(valid_dataloader) ): with torch.no_grad(): mfgs = [g.to("cuda") for g in mfgs] x = mfgs[0].srcdata["x"] y = mfgs[-1].dstdata["y"] y_hat = model(mfgs, x) correct += (y_hat.argmax(1) == y).sum().item() total += y_hat.shape[0] acc = correct / total print("Validation accuracy:", acc) evaluator = MAG240MEvaluator() y_preds = [] for i, (input_nodes, output_nodes, mfgs) in enumerate( tqdm.tqdm(test_dataloader) ): with torch.no_grad(): mfgs = [g.to("cuda") for g in mfgs] x = mfgs[0].srcdata["x"] y = mfgs[-1].dstdata["y"] y_hat = model(mfgs, x) y_preds.append(y_hat.argmax(1).cpu()) evaluator.save_test_submission( {"y_pred": torch.cat(y_preds)}, args.submission_path ) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--rootdir", type=str, default=".", help="Directory to download the OGB dataset.", ) parser.add_argument( "--graph-path", type=str, default="./graph.dgl", help="Path to the graph.", ) parser.add_argument( "--full-feature-path", type=str, default="./full.npy", help="Path to the features of all nodes.", ) parser.add_argument( "--epochs", type=int, default=100, help="Number of epochs." ) parser.add_argument( "--model-path", type=str, default="./model.pt", help="Path to store the best model.", ) parser.add_argument( "--submission-path", type=str, default="./results", help="Submission directory.", ) args = parser.parse_args() dataset = MAG240MDataset(root=args.rootdir) print("Loading graph") (g,), _ = dgl.load_graphs(args.graph_path) g = g.formats(["csc"]) print("Loading features") paper_offset = dataset.num_authors + dataset.num_institutions num_nodes = paper_offset + dataset.num_papers num_features = dataset.num_paper_features feats = np.memmap( args.full_feature_path, mode="r", dtype="float16", shape=(num_nodes, num_features), ) if args.epochs != 0: train(args, dataset, g, feats, paper_offset) test(args, dataset, g, feats, paper_offset) ================================================ FILE: examples/pytorch/ogb_lsc/MAG240M/train_multi_gpus.py ================================================ #!/usr/bin/env python # coding: utf-8 import argparse import math import sys from collections import OrderedDict import dgl import dgl.nn as dglnn import numpy as np import torch import torch.multiprocessing as mp import torch.nn as nn import torch.nn.functional as F import tqdm from ogb.lsc import MAG240MDataset, MAG240MEvaluator from torch.nn.parallel import DistributedDataParallel class RGAT(nn.Module): def __init__( self, in_channels, out_channels, hidden_channels, num_etypes, num_layers, num_heads, dropout, pred_ntype, ): super().__init__() self.convs = nn.ModuleList() self.norms = nn.ModuleList() self.skips = nn.ModuleList() self.convs.append( nn.ModuleList( [ dglnn.GATConv( in_channels, hidden_channels // num_heads, num_heads, allow_zero_in_degree=True, ) for _ in range(num_etypes) ] ) ) self.norms.append(nn.BatchNorm1d(hidden_channels)) self.skips.append(nn.Linear(in_channels, hidden_channels)) for _ in range(num_layers - 1): self.convs.append( nn.ModuleList( [ dglnn.GATConv( hidden_channels, hidden_channels // num_heads, num_heads, allow_zero_in_degree=True, ) for _ in range(num_etypes) ] ) ) self.norms.append(nn.BatchNorm1d(hidden_channels)) self.skips.append(nn.Linear(hidden_channels, hidden_channels)) self.mlp = nn.Sequential( nn.Linear(hidden_channels, hidden_channels), nn.BatchNorm1d(hidden_channels), nn.ReLU(), nn.Dropout(dropout), nn.Linear(hidden_channels, out_channels), ) self.dropout = nn.Dropout(dropout) self.hidden_channels = hidden_channels self.pred_ntype = pred_ntype self.num_etypes = num_etypes def forward(self, mfgs, x): for i in range(len(mfgs)): mfg = mfgs[i] x_dst = x[: mfg.num_dst_nodes()] n_src = mfg.num_src_nodes() n_dst = mfg.num_dst_nodes() mfg = dgl.block_to_graph(mfg) x_skip = self.skips[i](x_dst) for j in range(self.num_etypes): subg = mfg.edge_subgraph( mfg.edata["etype"] == j, relabel_nodes=False ) x_skip += self.convs[i][j](subg, (x, x_dst)).view( -1, self.hidden_channels ) x = self.norms[i](x_skip) x = F.elu(x) x = self.dropout(x) return self.mlp(x) class ExternalNodeCollator(dgl.dataloading.NodeCollator): def __init__(self, g, idx, sampler, offset, feats, label): super().__init__(g, idx, sampler) self.offset = offset self.feats = feats self.label = label def collate(self, items): input_nodes, output_nodes, mfgs = super().collate(items) # Copy input features mfgs[0].srcdata["x"] = torch.FloatTensor(self.feats[input_nodes]) mfgs[-1].dstdata["y"] = torch.LongTensor( self.label[output_nodes - self.offset] ) return input_nodes, output_nodes, mfgs def train(proc_id, n_gpus, args, dataset, g, feats, paper_offset): dev_id = devices[proc_id] if n_gpus > 1: dist_init_method = "tcp://{master_ip}:{master_port}".format( master_ip="127.0.0.1", master_port="12346" ) world_size = n_gpus torch.distributed.init_process_group( backend="nccl", init_method=dist_init_method, world_size=world_size, rank=proc_id, ) torch.cuda.set_device(dev_id) print("Loading masks and labels") train_idx = torch.LongTensor(dataset.get_idx_split("train")) + paper_offset valid_idx = torch.LongTensor(dataset.get_idx_split("valid")) + paper_offset label = dataset.paper_label print("Initializing dataloader...") sampler = dgl.dataloading.MultiLayerNeighborSampler([15, 25]) train_collator = ExternalNodeCollator( g, train_idx, sampler, paper_offset, feats, label ) valid_collator = ExternalNodeCollator( g, valid_idx, sampler, paper_offset, feats, label ) # Necessary according to https://yangkky.github.io/2019/07/08/distributed-pytorch-tutorial.html train_sampler = torch.utils.data.distributed.DistributedSampler( train_collator.dataset, num_replicas=world_size, rank=proc_id, shuffle=True, drop_last=False, ) valid_sampler = torch.utils.data.distributed.DistributedSampler( valid_collator.dataset, num_replicas=world_size, rank=proc_id, shuffle=True, drop_last=False, ) train_dataloader = torch.utils.data.DataLoader( train_collator.dataset, batch_size=1024, collate_fn=train_collator.collate, num_workers=4, sampler=train_sampler, ) valid_dataloader = torch.utils.data.DataLoader( valid_collator.dataset, batch_size=1024, collate_fn=valid_collator.collate, num_workers=2, sampler=valid_sampler, ) print("Initializing model...") model = RGAT( dataset.num_paper_features, dataset.num_classes, 1024, 5, 2, 4, 0.5, "paper", ).to(dev_id) # convert BN to SyncBatchNorm. see https://pytorch.org/docs/stable/generated/torch.nn.SyncBatchNorm.html model = nn.SyncBatchNorm.convert_sync_batchnorm(model) model = DistributedDataParallel( model, device_ids=[dev_id], output_device=dev_id ) opt = torch.optim.Adam(model.parameters(), lr=0.001) sched = torch.optim.lr_scheduler.StepLR(opt, step_size=25, gamma=0.25) best_acc = 0 for i in range(args.epochs): # make shuffling work properly across multiple epochs. # see https://pytorch.org/docs/stable/data.html#torch.utils.data.distributed.DistributedSampler train_sampler.set_epoch(i) model.train() with tqdm.tqdm(train_dataloader) as tq: for i, (input_nodes, output_nodes, mfgs) in enumerate(tq): mfgs = [g.to(dev_id) for g in mfgs] x = mfgs[0].srcdata["x"] y = mfgs[-1].dstdata["y"] y_hat = model(mfgs, x) loss = F.cross_entropy(y_hat, y) opt.zero_grad() loss.backward() opt.step() acc = (y_hat.argmax(1) == y).float().mean() tq.set_postfix( {"loss": "%.4f" % loss.item(), "acc": "%.4f" % acc.item()}, refresh=False, ) # eval in each process model.eval() correct = torch.LongTensor([0]).to(dev_id) total = torch.LongTensor([0]).to(dev_id) for i, (input_nodes, output_nodes, mfgs) in enumerate( tqdm.tqdm(valid_dataloader) ): with torch.no_grad(): mfgs = [g.to(dev_id) for g in mfgs] x = mfgs[0].srcdata["x"] y = mfgs[-1].dstdata["y"] y_hat = model(mfgs, x) correct += (y_hat.argmax(1) == y).sum().item() total += y_hat.shape[0] # `reduce` data into process 0 torch.distributed.reduce( correct, dst=0, op=torch.distributed.ReduceOp.SUM ) torch.distributed.reduce( total, dst=0, op=torch.distributed.ReduceOp.SUM ) acc = (correct / total).item() sched.step() # process 0 print accuracy and save model if proc_id == 0: print("Validation accuracy:", acc) if best_acc < acc: best_acc = acc print("Updating best model...") torch.save(model.state_dict(), args.model_path) def test(args, dataset, g, feats, paper_offset): print("Loading masks and labels...") valid_idx = torch.LongTensor(dataset.get_idx_split("valid")) + paper_offset test_idx = torch.LongTensor(dataset.get_idx_split("test")) + paper_offset label = dataset.paper_label print("Initializing data loader...") sampler = dgl.dataloading.MultiLayerNeighborSampler([160, 160]) valid_collator = ExternalNodeCollator( g, valid_idx, sampler, paper_offset, feats, label ) valid_dataloader = torch.utils.data.DataLoader( valid_collator.dataset, batch_size=16, shuffle=False, drop_last=False, collate_fn=valid_collator.collate, num_workers=2, ) test_collator = ExternalNodeCollator( g, test_idx, sampler, paper_offset, feats, label ) test_dataloader = torch.utils.data.DataLoader( test_collator.dataset, batch_size=16, shuffle=False, drop_last=False, collate_fn=test_collator.collate, num_workers=4, ) print("Loading model...") model = RGAT( dataset.num_paper_features, dataset.num_classes, 1024, 5, 2, 4, 0.5, "paper", ).cuda() # load ddp's model parameters, we need to remove the name of 'module.' state_dict = torch.load(args.model_path, weights_only=False) new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k[7:] new_state_dict[name] = v model.load_state_dict(new_state_dict) model.eval() correct = total = 0 for i, (input_nodes, output_nodes, mfgs) in enumerate( tqdm.tqdm(valid_dataloader) ): with torch.no_grad(): mfgs = [g.to("cuda") for g in mfgs] x = mfgs[0].srcdata["x"] y = mfgs[-1].dstdata["y"] y_hat = model(mfgs, x) correct += (y_hat.argmax(1) == y).sum().item() total += y_hat.shape[0] acc = correct / total print("Validation accuracy:", acc) evaluator = MAG240MEvaluator() y_preds = [] for i, (input_nodes, output_nodes, mfgs) in enumerate( tqdm.tqdm(test_dataloader) ): with torch.no_grad(): mfgs = [g.to("cuda") for g in mfgs] x = mfgs[0].srcdata["x"] y = mfgs[-1].dstdata["y"] y_hat = model(mfgs, x) y_preds.append(y_hat.argmax(1).cpu()) evaluator.save_test_submission( {"y_pred": torch.cat(y_preds)}, args.submission_path ) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--rootdir", type=str, default=".", help="Directory to download the OGB dataset.", ) parser.add_argument( "--graph-path", type=str, default="./graph.dgl", help="Path to the graph.", ) parser.add_argument( "--full-feature-path", type=str, default="./full.npy", help="Path to the features of all nodes.", ) parser.add_argument( "--epochs", type=int, default=100, help="Number of epochs." ) parser.add_argument( "--model-path", type=str, default="./model_ddp.pt", help="Path to store the best model.", ) parser.add_argument( "--submission-path", type=str, default="./results_ddp", help="Submission directory.", ) parser.add_argument("--gpus", type=str, default="0,1,2") args = parser.parse_args() devices = list(map(int, args.gpus.split(","))) n_gpus = len(devices) if n_gpus <= 1: print("make sure the number of gpus greater than 1!") sys.exit() dataset = MAG240MDataset(root=args.rootdir) print("Loading graph") (g,), _ = dgl.load_graphs(args.graph_path) g = g.formats(["csc"]) print("Loading features") paper_offset = dataset.num_authors + dataset.num_institutions num_nodes = paper_offset + dataset.num_papers num_features = dataset.num_paper_features feats = np.memmap( args.full_feature_path, mode="r", dtype="float16", shape=(num_nodes, num_features), ) mp.spawn( train, args=(n_gpus, args, dataset, g, feats, paper_offset), nprocs=n_gpus, ) test(args, dataset, g, feats, paper_offset) ================================================ FILE: examples/pytorch/ogb_lsc/PCQM4M/README.md ================================================ # Baseline Code for PCQM4M-LSC The code is ported from the official examples [here](https://github.com/snap-stanford/ogb/tree/master/examples/lsc/pcqm4m). Please refer to the [OGB-LSC paper](https://arxiv.org/abs/2103.09430) for the detailed setting. ## Installation Requirements ``` ogb>=1.3.0 rdkit>=2019.03.1 torch>=1.7.0 ``` We recommend installing RDKit with `conda install -c rdkit rdkit==2019.03.1`. ## Commandline Arguments - `LOG_DIR`: Tensorboard log directory. - `CHECKPOINT_DIR`: Directory to save the best validation checkpoint. The checkpoint file will be saved at `${CHECKPOINT_DIR}/checkpoint.pt`. - `TEST_DIR`: Directory path to save the test submission. The test file will be saved at `${TEST_DIR}/y_pred_pcqm4m.npz`. ## Baseline Models ### GIN [1] ``` python main.py --gnn gin --log_dir $LOG_DIR --checkpoint_dir $CHECKPOINT_DIR --save_test_dir $TEST_DIR ``` ### GIN-virtual [1,3] ``` python main.py --gnn gin-virtual --log_dir $LOG_DIR --checkpoint_dir $CHECKPOINT_DIR --save_test_dir $TEST_DIR ``` ### GCN [2] ``` python main.py --gnn gcn --log_dir $LOG_DIR --checkpoint_dir $CHECKPOINT_DIR --save_test_dir $TEST_DIR ``` ### GCN-virtual [2,3] ``` python main.py --gnn gcn-virtual --log_dir $LOG_DIR --checkpoint_dir $CHECKPOINT_DIR --save_test_dir $TEST_DIR ``` ## Measuring the Test Inference Time The code below takes **the raw SMILES strings as input**, uses the saved checkpoint, and performs inference over for all the 377,423 test molecules. ``` python test_inference.py --gnn $GNN --checkpoint_dir $CHECKPOINT_DIR --save_test_dir $TEST_DIR ``` For your model, **the total inference time needs to be less than 12 hours on a single GPU and a CPU**. Ideally, you should use the CPU/GPU spec of the organizers, which consists of a single GeForce RTX 2080 GPU and an Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz. However, the organizers also allow the use of other GPU/CPU specs, as long as the specs are clearly reported in the final submission. ## Performance | Model | Original Valid MAE | DGL Valid MAE | #Parameters | | ----------- | ------------------ | ------------- | ----------- | | GIN | 0.1536 | 0.1536 | 3.8M | | GIN-virtual | 0.1396 | 0.1407 | 6.7M | | GCN | 0.1684 | 0.1683 | 2.0M | | GCN-virtual | 0.1510 | 0.1557 | 4.9M | ## References [1] Xu, K., Hu, W., Leskovec, J., & Jegelka, S. (2019). How powerful are graph neural networks?. ICLR 2019 [2] Kipf, T. N., & Welling, M. (2017). Semi-supervised classification with graph convolutional networks. ICLR 2017 [3] Gilmer, J., Schoenholz, S. S., Riley, P. F., Vinyals, O., & Dahl, G. E. Neural message passing for quantum chemistry. ICML 2017. ================================================ FILE: examples/pytorch/ogb_lsc/PCQM4M/conv.py ================================================ import dgl import dgl.function as fn import torch import torch.nn as nn import torch.nn.functional as F from dgl.nn.pytorch import SumPooling from ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder ### GIN convolution along the graph structure class GINConv(nn.Module): def __init__(self, emb_dim): """ emb_dim (int): node embedding dimensionality """ super(GINConv, self).__init__() self.mlp = nn.Sequential( nn.Linear(emb_dim, emb_dim), nn.BatchNorm1d(emb_dim), nn.ReLU(), nn.Linear(emb_dim, emb_dim), ) self.eps = nn.Parameter(torch.Tensor([0])) self.bond_encoder = BondEncoder(emb_dim=emb_dim) def forward(self, g, x, edge_attr): with g.local_scope(): edge_embedding = self.bond_encoder(edge_attr) g.ndata["x"] = x g.apply_edges(fn.copy_u("x", "m")) g.edata["m"] = F.relu(g.edata["m"] + edge_embedding) g.update_all(fn.copy_e("m", "m"), fn.sum("m", "new_x")) out = self.mlp((1 + self.eps) * x + g.ndata["new_x"]) return out ### GCN convolution along the graph structure class GCNConv(nn.Module): def __init__(self, emb_dim): """ emb_dim (int): node embedding dimensionality """ super(GCNConv, self).__init__() self.linear = nn.Linear(emb_dim, emb_dim) self.root_emb = nn.Embedding(1, emb_dim) self.bond_encoder = BondEncoder(emb_dim=emb_dim) def forward(self, g, x, edge_attr): with g.local_scope(): x = self.linear(x) edge_embedding = self.bond_encoder(edge_attr) # Molecular graphs are undirected # g.out_degrees() is the same as g.in_degrees() degs = (g.out_degrees().float() + 1).to(x.device) norm = torch.pow(degs, -0.5).unsqueeze(-1) # (N, 1) g.ndata["norm"] = norm g.apply_edges(fn.u_mul_v("norm", "norm", "norm")) g.ndata["x"] = x g.apply_edges(fn.copy_u("x", "m")) g.edata["m"] = g.edata["norm"] * F.relu( g.edata["m"] + edge_embedding ) g.update_all(fn.copy_e("m", "m"), fn.sum("m", "new_x")) out = g.ndata["new_x"] + F.relu( x + self.root_emb.weight ) * 1.0 / degs.view(-1, 1) return out ### GNN to generate node embedding class GNN_node(nn.Module): """ Output: node representations """ def __init__( self, num_layers, emb_dim, drop_ratio=0.5, JK="last", residual=False, gnn_type="gin", ): """ num_layers (int): number of GNN message passing layers emb_dim (int): node embedding dimensionality """ super(GNN_node, self).__init__() self.num_layers = num_layers self.drop_ratio = drop_ratio self.JK = JK ### add residual connection or not self.residual = residual if self.num_layers < 2: raise ValueError("Number of GNN layers must be greater than 1.") self.atom_encoder = AtomEncoder(emb_dim) ###List of GNNs self.convs = nn.ModuleList() self.batch_norms = nn.ModuleList() for layer in range(num_layers): if gnn_type == "gin": self.convs.append(GINConv(emb_dim)) elif gnn_type == "gcn": self.convs.append(GCNConv(emb_dim)) else: ValueError("Undefined GNN type called {}".format(gnn_type)) self.batch_norms.append(nn.BatchNorm1d(emb_dim)) def forward(self, g, x, edge_attr): ### computing input node embedding h_list = [self.atom_encoder(x)] for layer in range(self.num_layers): h = self.convs[layer](g, h_list[layer], edge_attr) h = self.batch_norms[layer](h) if layer == self.num_layers - 1: # remove relu for the last layer h = F.dropout(h, self.drop_ratio, training=self.training) else: h = F.dropout( F.relu(h), self.drop_ratio, training=self.training ) if self.residual: h += h_list[layer] h_list.append(h) ### Different implementations of Jk-concat if self.JK == "last": node_representation = h_list[-1] elif self.JK == "sum": node_representation = 0 for layer in range(self.num_layers): node_representation += h_list[layer] return node_representation ### Virtual GNN to generate node embedding class GNN_node_Virtualnode(nn.Module): """ Output: node representations """ def __init__( self, num_layers, emb_dim, drop_ratio=0.5, JK="last", residual=False, gnn_type="gin", ): """ num_layers (int): number of GNN message passing layers emb_dim (int): node embedding dimensionality """ super(GNN_node_Virtualnode, self).__init__() self.num_layers = num_layers self.drop_ratio = drop_ratio self.JK = JK ### add residual connection or not self.residual = residual if self.num_layers < 2: raise ValueError("Number of GNN layers must be greater than 1.") self.atom_encoder = AtomEncoder(emb_dim) ### set the initial virtual node embedding to 0. self.virtualnode_embedding = nn.Embedding(1, emb_dim) nn.init.constant_(self.virtualnode_embedding.weight.data, 0) ### List of GNNs self.convs = nn.ModuleList() ### batch norms applied to node embeddings self.batch_norms = nn.ModuleList() ### List of MLPs to transform virtual node at every layer self.mlp_virtualnode_list = nn.ModuleList() for layer in range(num_layers): if gnn_type == "gin": self.convs.append(GINConv(emb_dim)) elif gnn_type == "gcn": self.convs.append(GCNConv(emb_dim)) else: ValueError("Undefined GNN type called {}".format(gnn_type)) self.batch_norms.append(nn.BatchNorm1d(emb_dim)) for layer in range(num_layers - 1): self.mlp_virtualnode_list.append( nn.Sequential( nn.Linear(emb_dim, emb_dim), nn.BatchNorm1d(emb_dim), nn.ReLU(), nn.Linear(emb_dim, emb_dim), nn.BatchNorm1d(emb_dim), nn.ReLU(), ) ) self.pool = SumPooling() def forward(self, g, x, edge_attr): ### virtual node embeddings for graphs virtualnode_embedding = self.virtualnode_embedding( torch.zeros(g.batch_size).to(x.dtype).to(x.device) ) h_list = [self.atom_encoder(x)] batch_id = dgl.broadcast_nodes( g, torch.arange(g.batch_size).to(x.device) ) for layer in range(self.num_layers): ### add message from virtual nodes to graph nodes h_list[layer] = h_list[layer] + virtualnode_embedding[batch_id] ### Message passing among graph nodes h = self.convs[layer](g, h_list[layer], edge_attr) h = self.batch_norms[layer](h) if layer == self.num_layers - 1: # remove relu for the last layer h = F.dropout(h, self.drop_ratio, training=self.training) else: h = F.dropout( F.relu(h), self.drop_ratio, training=self.training ) if self.residual: h = h + h_list[layer] h_list.append(h) ### update the virtual nodes if layer < self.num_layers - 1: ### add message from graph nodes to virtual nodes virtualnode_embedding_temp = ( self.pool(g, h_list[layer]) + virtualnode_embedding ) ### transform virtual nodes using MLP virtualnode_embedding_temp = self.mlp_virtualnode_list[layer]( virtualnode_embedding_temp ) if self.residual: virtualnode_embedding = virtualnode_embedding + F.dropout( virtualnode_embedding_temp, self.drop_ratio, training=self.training, ) else: virtualnode_embedding = F.dropout( virtualnode_embedding_temp, self.drop_ratio, training=self.training, ) ### Different implementations of Jk-concat if self.JK == "last": node_representation = h_list[-1] elif self.JK == "sum": node_representation = 0 for layer in range(self.num_layers): node_representation += h_list[layer] return node_representation ================================================ FILE: examples/pytorch/ogb_lsc/PCQM4M/gnn.py ================================================ import torch import torch.nn as nn from conv import GNN_node, GNN_node_Virtualnode from dgl.nn.pytorch import ( AvgPooling, GlobalAttentionPooling, MaxPooling, Set2Set, SumPooling, ) class GNN(nn.Module): def __init__( self, num_tasks=1, num_layers=5, emb_dim=300, gnn_type="gin", virtual_node=True, residual=False, drop_ratio=0, JK="last", graph_pooling="sum", ): """ num_tasks (int): number of labels to be predicted virtual_node (bool): whether to add virtual node or not """ super(GNN, self).__init__() self.num_layers = num_layers self.drop_ratio = drop_ratio self.JK = JK self.emb_dim = emb_dim self.num_tasks = num_tasks self.graph_pooling = graph_pooling if self.num_layers < 2: raise ValueError("Number of GNN layers must be greater than 1.") ### GNN to generate node embeddings if virtual_node: self.gnn_node = GNN_node_Virtualnode( num_layers, emb_dim, JK=JK, drop_ratio=drop_ratio, residual=residual, gnn_type=gnn_type, ) else: self.gnn_node = GNN_node( num_layers, emb_dim, JK=JK, drop_ratio=drop_ratio, residual=residual, gnn_type=gnn_type, ) ### Pooling function to generate whole-graph embeddings if self.graph_pooling == "sum": self.pool = SumPooling() elif self.graph_pooling == "mean": self.pool = AvgPooling() elif self.graph_pooling == "max": self.pool = MaxPooling elif self.graph_pooling == "attention": self.pool = GlobalAttentionPooling( gate_nn=nn.Sequential( nn.Linear(emb_dim, 2 * emb_dim), nn.BatchNorm1d(2 * emb_dim), nn.ReLU(), nn.Linear(2 * emb_dim, 1), ) ) elif self.graph_pooling == "set2set": self.pool = Set2Set(emb_dim, n_iters=2, n_layers=2) else: raise ValueError("Invalid graph pooling type.") if graph_pooling == "set2set": self.graph_pred_linear = nn.Linear(2 * self.emb_dim, self.num_tasks) else: self.graph_pred_linear = nn.Linear(self.emb_dim, self.num_tasks) def forward(self, g, x, edge_attr): h_node = self.gnn_node(g, x, edge_attr) h_graph = self.pool(g, h_node) output = self.graph_pred_linear(h_graph) if self.training: return output else: return torch.clamp(output, min=0, max=50) ================================================ FILE: examples/pytorch/ogb_lsc/PCQM4M/main.py ================================================ import argparse import os import random import dgl import numpy as np import torch import torch.optim as optim from gnn import GNN from ogb.lsc import DglPCQM4MDataset, PCQM4MEvaluator from torch.optim.lr_scheduler import StepLR from torch.utils.data import DataLoader from torch.utils.tensorboard import SummaryWriter from tqdm import tqdm reg_criterion = torch.nn.L1Loss() def collate_dgl(samples): graphs, labels = map(list, zip(*samples)) batched_graph = dgl.batch(graphs) labels = torch.stack(labels) return batched_graph, labels def train(model, device, loader, optimizer): model.train() loss_accum = 0 for step, (bg, labels) in enumerate(tqdm(loader, desc="Iteration")): bg = bg.to(device) x = bg.ndata.pop("feat") edge_attr = bg.edata.pop("feat") labels = labels.to(device) pred = model(bg, x, edge_attr).view( -1, ) optimizer.zero_grad() loss = reg_criterion(pred, labels) loss.backward() optimizer.step() loss_accum += loss.detach().cpu().item() return loss_accum / (step + 1) def eval(model, device, loader, evaluator): model.eval() y_true = [] y_pred = [] for step, (bg, labels) in enumerate(tqdm(loader, desc="Iteration")): bg = bg.to(device) x = bg.ndata.pop("feat") edge_attr = bg.edata.pop("feat") labels = labels.to(device) with torch.no_grad(): pred = model(bg, x, edge_attr).view( -1, ) y_true.append(labels.view(pred.shape).detach().cpu()) y_pred.append(pred.detach().cpu()) y_true = torch.cat(y_true, dim=0) y_pred = torch.cat(y_pred, dim=0) input_dict = {"y_true": y_true, "y_pred": y_pred} return evaluator.eval(input_dict)["mae"] def test(model, device, loader): model.eval() y_pred = [] for step, (bg, _) in enumerate(tqdm(loader, desc="Iteration")): bg = bg.to(device) x = bg.ndata.pop("feat") edge_attr = bg.edata.pop("feat") with torch.no_grad(): pred = model(bg, x, edge_attr).view( -1, ) y_pred.append(pred.detach().cpu()) y_pred = torch.cat(y_pred, dim=0) return y_pred def main(): # Training settings parser = argparse.ArgumentParser( description="GNN baselines on pcqm4m with DGL" ) parser.add_argument( "--seed", type=int, default=42, help="random seed to use (default: 42)" ) parser.add_argument( "--device", type=int, default=0, help="which gpu to use if any (default: 0)", ) parser.add_argument( "--gnn", type=str, default="gin-virtual", help="GNN to use, which can be from " "[gin, gin-virtual, gcn, gcn-virtual] (default: gin-virtual)", ) parser.add_argument( "--graph_pooling", type=str, default="sum", help="graph pooling strategy mean or sum (default: sum)", ) parser.add_argument( "--drop_ratio", type=float, default=0, help="dropout ratio (default: 0)" ) parser.add_argument( "--num_layers", type=int, default=5, help="number of GNN message passing layers (default: 5)", ) parser.add_argument( "--emb_dim", type=int, default=600, help="dimensionality of hidden units in GNNs (default: 600)", ) parser.add_argument( "--train_subset", action="store_true", help="use 10% of the training set for training", ) parser.add_argument( "--batch_size", type=int, default=256, help="input batch size for training (default: 256)", ) parser.add_argument( "--epochs", type=int, default=100, help="number of epochs to train (default: 100)", ) parser.add_argument( "--num_workers", type=int, default=0, help="number of workers (default: 0)", ) parser.add_argument( "--log_dir", type=str, default="", help="tensorboard log directory. If not specified, " "tensorboard will not be used.", ) parser.add_argument( "--checkpoint_dir", type=str, default="", help="directory to save checkpoint", ) parser.add_argument( "--save_test_dir", type=str, default="", help="directory to save test submission file", ) args = parser.parse_args() print(args) np.random.seed(args.seed) torch.manual_seed(args.seed) random.seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) device = torch.device("cuda:" + str(args.device)) else: device = torch.device("cpu") ### automatic dataloading and splitting dataset = DglPCQM4MDataset(root="dataset/") # split_idx['train'], split_idx['valid'], split_idx['test'] # separately gives a 1D int64 tensor split_idx = dataset.get_idx_split() ### automatic evaluator. evaluator = PCQM4MEvaluator() if args.train_subset: subset_ratio = 0.1 subset_idx = torch.randperm(len(split_idx["train"]))[ : int(subset_ratio * len(split_idx["train"])) ] train_loader = DataLoader( dataset[split_idx["train"][subset_idx]], batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_dgl, ) else: train_loader = DataLoader( dataset[split_idx["train"]], batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_dgl, ) valid_loader = DataLoader( dataset[split_idx["valid"]], batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_dgl, ) if args.save_test_dir != "": test_loader = DataLoader( dataset[split_idx["test"]], batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_dgl, ) if args.checkpoint_dir != "": os.makedirs(args.checkpoint_dir, exist_ok=True) shared_params = { "num_layers": args.num_layers, "emb_dim": args.emb_dim, "drop_ratio": args.drop_ratio, "graph_pooling": args.graph_pooling, } if args.gnn == "gin": model = GNN(gnn_type="gin", virtual_node=False, **shared_params).to( device ) elif args.gnn == "gin-virtual": model = GNN(gnn_type="gin", virtual_node=True, **shared_params).to( device ) elif args.gnn == "gcn": model = GNN(gnn_type="gcn", virtual_node=False, **shared_params).to( device ) elif args.gnn == "gcn-virtual": model = GNN(gnn_type="gcn", virtual_node=True, **shared_params).to( device ) else: raise ValueError("Invalid GNN type") num_params = sum(p.numel() for p in model.parameters()) print(f"#Params: {num_params}") optimizer = optim.Adam(model.parameters(), lr=0.001) if args.log_dir != "": writer = SummaryWriter(log_dir=args.log_dir) best_valid_mae = 1000 if args.train_subset: scheduler = StepLR(optimizer, step_size=300, gamma=0.25) args.epochs = 1000 else: scheduler = StepLR(optimizer, step_size=30, gamma=0.25) for epoch in range(1, args.epochs + 1): print("=====Epoch {}".format(epoch)) print("Training...") train_mae = train(model, device, train_loader, optimizer) print("Evaluating...") valid_mae = eval(model, device, valid_loader, evaluator) print({"Train": train_mae, "Validation": valid_mae}) if args.log_dir != "": writer.add_scalar("valid/mae", valid_mae, epoch) writer.add_scalar("train/mae", train_mae, epoch) if valid_mae < best_valid_mae: best_valid_mae = valid_mae if args.checkpoint_dir != "": print("Saving checkpoint...") checkpoint = { "epoch": epoch, "model_state_dict": model.state_dict(), "optimizer_state_dict": optimizer.state_dict(), "scheduler_state_dict": scheduler.state_dict(), "best_val_mae": best_valid_mae, "num_params": num_params, } torch.save( checkpoint, os.path.join(args.checkpoint_dir, "checkpoint.pt"), ) if args.save_test_dir != "": print("Predicting on test data...") y_pred = test(model, device, test_loader) print("Saving test submission file...") evaluator.save_test_submission( {"y_pred": y_pred}, args.save_test_dir ) scheduler.step() print(f"Best validation MAE so far: {best_valid_mae}") if args.log_dir != "": writer.close() if __name__ == "__main__": main() ================================================ FILE: examples/pytorch/ogb_lsc/PCQM4M/test_inference.py ================================================ import argparse import os import random import dgl import numpy as np import torch from gnn import GNN from ogb.lsc import PCQM4MDataset, PCQM4MEvaluator from ogb.utils import smiles2graph from torch.utils.data import DataLoader from tqdm import tqdm def collate_dgl(graphs): batched_graph = dgl.batch(graphs) return batched_graph def test(model, device, loader): model.eval() y_pred = [] for step, bg in enumerate(tqdm(loader, desc="Iteration")): bg = bg.to(device) x = bg.ndata.pop("feat") edge_attr = bg.edata.pop("feat") with torch.no_grad(): pred = model(bg, x, edge_attr).view( -1, ) y_pred.append(pred.detach().cpu()) y_pred = torch.cat(y_pred, dim=0) return y_pred class OnTheFlyPCQMDataset(object): def __init__(self, smiles_list, smiles2graph=smiles2graph): super(OnTheFlyPCQMDataset, self).__init__() self.smiles_list = smiles_list self.smiles2graph = smiles2graph def __getitem__(self, idx): """Get datapoint with index""" smiles, _ = self.smiles_list[idx] graph = self.smiles2graph(smiles) dgl_graph = dgl.graph( (graph["edge_index"][0], graph["edge_index"][1]), num_nodes=graph["num_nodes"], ) dgl_graph.edata["feat"] = torch.from_numpy(graph["edge_feat"]).to( torch.int64 ) dgl_graph.ndata["feat"] = torch.from_numpy(graph["node_feat"]).to( torch.int64 ) return dgl_graph def __len__(self): """Length of the dataset Returns ------- int Length of Dataset """ return len(self.smiles_list) def main(): # Training settings parser = argparse.ArgumentParser( description="GNN baselines on pcqm4m with DGL" ) parser.add_argument( "--seed", type=int, default=42, help="random seed to use (default: 42)" ) parser.add_argument( "--device", type=int, default=0, help="which gpu to use if any (default: 0)", ) parser.add_argument( "--gnn", type=str, default="gin-virtual", help="GNN to use, which can be from " "[gin, gin-virtual, gcn, gcn-virtual] (default: gin-virtual)", ) parser.add_argument( "--graph_pooling", type=str, default="sum", help="graph pooling strategy mean or sum (default: sum)", ) parser.add_argument( "--drop_ratio", type=float, default=0, help="dropout ratio (default: 0)" ) parser.add_argument( "--num_layers", type=int, default=5, help="number of GNN message passing layers (default: 5)", ) parser.add_argument( "--emb_dim", type=int, default=600, help="dimensionality of hidden units in GNNs (default: 600)", ) parser.add_argument( "--batch_size", type=int, default=256, help="input batch size for training (default: 256)", ) parser.add_argument( "--num_workers", type=int, default=0, help="number of workers (default: 0)", ) parser.add_argument( "--checkpoint_dir", type=str, default="", help="directory to save checkpoint", ) parser.add_argument( "--save_test_dir", type=str, default="", help="directory to save test submission file", ) args = parser.parse_args() print(args) np.random.seed(args.seed) torch.manual_seed(args.seed) random.seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) device = torch.device("cuda:" + str(args.device)) else: device = torch.device("cpu") ### automatic data loading and splitting ### Read in the raw SMILES strings smiles_dataset = PCQM4MDataset(root="dataset/", only_smiles=True) split_idx = smiles_dataset.get_idx_split() test_smiles_dataset = [smiles_dataset[i] for i in split_idx["test"]] onthefly_dataset = OnTheFlyPCQMDataset(test_smiles_dataset) test_loader = DataLoader( onthefly_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_dgl, ) ### automatic evaluator. evaluator = PCQM4MEvaluator() shared_params = { "num_layers": args.num_layers, "emb_dim": args.emb_dim, "drop_ratio": args.drop_ratio, "graph_pooling": args.graph_pooling, } if args.gnn == "gin": model = GNN(gnn_type="gin", virtual_node=False, **shared_params).to( device ) elif args.gnn == "gin-virtual": model = GNN(gnn_type="gin", virtual_node=True, **shared_params).to( device ) elif args.gnn == "gcn": model = GNN(gnn_type="gcn", virtual_node=False, **shared_params).to( device ) elif args.gnn == "gcn-virtual": model = GNN(gnn_type="gcn", virtual_node=True, **shared_params).to( device ) else: raise ValueError("Invalid GNN type") num_params = sum(p.numel() for p in model.parameters()) print(f"#Params: {num_params}") checkpoint_path = os.path.join(args.checkpoint_dir, "checkpoint.pt") if not os.path.exists(checkpoint_path): raise RuntimeError(f"Checkpoint file not found at {checkpoint_path}") ## reading in checkpoint checkpoint = torch.load(checkpoint_path, weights_only=False) model.load_state_dict(checkpoint["model_state_dict"]) print("Predicting on test data...") y_pred = test(model, device, test_loader) print("Saving test submission file...") evaluator.save_test_submission({"y_pred": y_pred}, args.save_test_dir) if __name__ == "__main__": main() ================================================ FILE: examples/pytorch/ogb_lsc/README.md ================================================ # Baselines for OGB Large-Scale Challenge (LSC) at KDD Cup 2021 **Please upgrade your OGB to 1.3.1 to enable faster downloads**: - [Node Classification with MAG240M](https://dgl-data.s3-accelerate.amazonaws.com/dataset/OGB-LSC/mag240m_kddcup2021.zip) - [Link Prediction with WikiKG90M](https://dgl-data.s3-accelerate.amazonaws.com/dataset/OGB-LSC/wikikg90m_kddcup2021.zip) - [Graph Classification with PCQM4M](https://dgl-data.s3-accelerate.amazonaws.com/dataset/OGB-LSC/pcqm4m_kddcup2021.zip) # checksum md5sum of the files mag240m_kddcup2021.zip : ```bd61c9446f557fbe4430d9a7ce108b34``` wikikg90m_kddcup2021.zip : ```73d4f5dde29d78669330b4db4c12fc9c``` pcqm4m_kddcup2021.zip. : ```5144ebaa7c67d24da1a2acbe41f57f6a``` ================================================ FILE: examples/pytorch/ogc/README.md ================================================ # Optimized Graph Convolution (OGC) This DGL example implements the OGC method from the paper: [From Cluster Assumption to Graph Convolution: Graph-based Semi-Supervised Learning Revisited](https://arxiv.org/abs/2309.13599). With only one trainable layer, OGC is a very simple but powerful graph convolution method. ## Example Implementor This example was implemented by [Sinuo Xu](https://github.com/SinuoXu) when she was an undergraduate at SJTU. ## Dependencies Python 3.11.5 PyTorch 2.0.1 DGL 1.1.2 scikit-learn 1.3.1 ## Dataset The DGL's built-in Cora, Pubmed and Citeseer datasets, as follows: | Dataset | #Nodes | #Edges | #Feats | #Classes | #Train Nodes | #Val Nodes | #Test Nodes | | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | | Citeseer | 3,327 | 9,228 | 3,703 | 6 | 120 | 500 | 1000 | | Cora | 2,708 | 10,556 | 1,433 | 7 | 140 | 500 | 1000 | | Pubmed | 19,717 | 88,651 | 500 | 3 | 60 | 500 | 1000 | ## Usage ```bash python main.py --dataset cora python main.py --dataset citeseer python main.py --dataset pubmed ``` ## Performance | Dataset | Cora | Citeseer | Pubmed | | :-: | :-: | :-: | :-: | | OGC (DGL) | **86.9(±0.2)** | **77.4(±0.1)** | **83.6(±0.1)** | | OGC (Reported) | **86.9(±0.0)** | **77.4(±0.0)** | 83.4(±0.0) | ================================================ FILE: examples/pytorch/ogc/ogc.py ================================================ import dgl.sparse as dglsp import torch.nn as nn import torch.nn.functional as F from utils import LinearNeuralNetwork class OGC(nn.Module): def __init__(self, graph): super(OGC, self).__init__() self.linear_clf = LinearNeuralNetwork( nfeat=graph.ndata["feat"].shape[1], nclass=graph.ndata["label"].max().item() + 1, bias=False, ) self.label = graph.ndata["label"] self.label_one_hot = F.one_hot(graph.ndata["label"]).float() # LIM trick, else use both train and val set to construct this matrix. self.label_idx_mat = dglsp.diag(graph.ndata["train_mask"]).float() self.test_mask = graph.ndata["test_mask"] self.tv_mask = graph.ndata["train_mask"] + graph.ndata["val_mask"] def forward(self, x): return self.linear_clf(x) def update_embeds(self, embeds, lazy_adj, args): """Update classifier's weight by training a linear supervised model.""" pred_label = self(embeds).data clf_weight = self.linear_clf.W.weight.data # Update the smoothness loss via LGC. embeds = dglsp.spmm(lazy_adj, embeds) # Update the supervised loss via SEB. deriv_sup = 2 * dglsp.matmul( dglsp.spmm(self.label_idx_mat, -self.label_one_hot + pred_label), clf_weight, ) embeds = embeds - args.lr_sup * deriv_sup args.lr_sup = args.lr_sup * args.decline return embeds ================================================ FILE: examples/pytorch/ogc/train.py ================================================ import argparse import time import dgl.sparse as dglsp import torch.nn.functional as F import torch.optim as optim from dgl import AddSelfLoop from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset from ogc import OGC from utils import model_test, symmetric_normalize_adjacency def train(model, embeds, lazy_adj, args): patience = 0 _, _, last_acc, last_output = model_test(model, embeds) tv_mask = model.tv_mask optimizer = optim.SGD(model.parameters(), lr=args.lr_clf) for i in range(64): model.train() output = model(embeds) loss_tv = F.mse_loss( output[tv_mask], model.label_one_hot[tv_mask], reduction="sum" ) optimizer.zero_grad() loss_tv.backward() optimizer.step() # Updating node embeds by LGC and SEB jointly. embeds = model.update_embeds(embeds, lazy_adj, args) loss_tv, acc_tv, acc_test, pred = model_test(model, embeds) print( "epoch {} loss_tv {:.4f} acc_tv {:.4f} acc_test {:.4f}".format( i + 1, loss_tv, acc_tv, acc_test ) ) sim_rate = float(int((pred == last_output).sum()) / int(pred.shape[0])) if sim_rate > args.max_sim_rate: patience += 1 if patience > args.max_patience: break last_acc = acc_test last_output = pred return last_acc if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--dataset", type=str, default="citeseer", choices=["cora", "citeseer", "pubmed"], help="dataset to use", ) parser.add_argument( "--decline", type=float, default=0.9, help="decline rate" ) parser.add_argument( "--lr_sup", type=float, default=0.001, help="learning rate for supervised loss", ) parser.add_argument( "--lr_clf", type=float, default=0.5, help="learning rate for the used linear classifier", ) parser.add_argument( "--beta", type=float, default=0.1, help="moving probability that a node moves to its neighbors", ) parser.add_argument( "--max_sim_rate", type=float, default=0.995, help="max label prediction similarity between iterations", ) parser.add_argument( "--max_patience", type=int, default=2, help="tolerance for consecutively similar test predictions", ) parser.add_argument( "--device", type=str, default="cpu", choices=["cpu", "cuda"], help="device to use", ) args, _ = parser.parse_known_args() # Load and preprocess dataset. transform = AddSelfLoop() if args.dataset == "cora": data = CoraGraphDataset(transform=transform) elif args.dataset == "citeseer": data = CiteseerGraphDataset(transform=transform) elif args.dataset == "pubmed": data = PubmedGraphDataset(transform=transform) else: raise ValueError("Unknown dataset: {}".format(args.dataset)) graph = data[0].to(args.device) features = graph.ndata["feat"] adj = symmetric_normalize_adjacency(graph) I_N = dglsp.identity((features.shape[0], features.shape[0])) # Lazy random walk (also known as lazy graph convolution). lazy_adj = dglsp.add((1 - args.beta) * I_N, args.beta * adj).to(args.device) model = OGC(graph).to(args.device) start_time = time.time() res = train(model, features, lazy_adj, args) time_tot = time.time() - start_time print(f"Test Acc:{res:.4f}") print(f"Total Time:{time_tot:.4f}") ================================================ FILE: examples/pytorch/ogc/utils.py ================================================ import dgl.sparse as dglsp import torch import torch.nn as nn import torch.nn.functional as F class LinearNeuralNetwork(nn.Module): def __init__(self, nfeat, nclass, bias=True): super(LinearNeuralNetwork, self).__init__() self.W = nn.Linear(nfeat, nclass, bias=bias) def forward(self, x): return self.W(x) def symmetric_normalize_adjacency(graph): """Symmetric normalize graph adjacency matrix.""" indices = torch.stack(graph.edges()) n = graph.num_nodes() adj = dglsp.spmatrix(indices, shape=(n, n)) deg_invsqrt = dglsp.diag(adj.sum(0)) ** -0.5 return deg_invsqrt @ adj @ deg_invsqrt def model_test(model, embeds): model.eval() with torch.no_grad(): output = model(embeds) pred = output.argmax(dim=-1) test_mask, tv_mask = model.test_mask, model.tv_mask loss_tv = F.mse_loss(output[tv_mask], model.label_one_hot[tv_mask]) accs = [] for mask in [tv_mask, test_mask]: accs.append(float((pred[mask] == model.label[mask]).sum() / mask.sum())) return loss_tv.item(), accs[0], accs[1], pred ================================================ FILE: examples/pytorch/pagerank.py ================================================ import dgl import dgl.function as fn import networkx as nx import torch N = 100 network = nx.erdos_renyi_graph(N, 0.05) g = dgl.from_networkx(network) DAMP = 0.85 K = 10 def compute_pagerank(g): g.ndata["pv"] = torch.ones(N) / N degrees = g.out_degrees(g.nodes()).type(torch.float32) for k in range(K): g.ndata["pv"] = g.ndata["pv"] / degrees g.update_all( message_func=fn.copy_u(u="pv", out="m"), reduce_func=fn.sum(msg="m", out="pv"), ) g.ndata["pv"] = (1 - DAMP) / N + DAMP * g.ndata["pv"] return g.ndata["pv"] pv = compute_pagerank(g) print(pv) ================================================ FILE: examples/pytorch/pinsage/README.md ================================================ # PinSAGE example ## Requirements - dask - pandas - torchtext>=0.9.0 ## Prepare datasets ### MovieLens 1M 1. Download and extract the MovieLens-1M dataset from http://files.grouplens.org/datasets/movielens/ml-1m.zip into the current directory. 2. Run `python process_movielens1m.py ./ml-1m ./data_processed`. Replace `ml-1m` with the directory you put the `.dat` files, and replace `data_processed` with any path you wish to put the output files. ### Nowplaying-rs 1. Download and extract the Nowplaying-rs dataset from https://zenodo.org/record/3248543/files/nowplayingrs.zip?download=1 into the current directory. 2. Run `python process_nowplaying_rs.py ./nowplaying_rs_dataset ./data_processed` ## Run model ### Nearest-neighbor recommendation This model returns items that are K nearest neighbors of the latest item the user has interacted. The distance between two items are measured by Euclidean distance of item embeddings, which are learned as outputs of PinSAGE. ``` python model.py data_processed --num-epochs 300 --num-workers 2 --device cuda:0 --hidden-dims 64 ``` The implementation here also assigns a learnable vector to each item. If your hidden state size is so large that the learnable vectors cannot fit into GPU, use this script for sparse embedding update (written with `torch.optim.SparseAdam`) instead: ``` python model_sparse.py data_processed --num-epochs 300 --num-workers 2 --device cuda:0 --hidden-dims 1024 ``` Note that since the embedding update is done on CPU, it will be significantly slower than doing everything on GPU. The HITS@10 is 0.01241, compared to 0.01220 with SLIM with the same dimensionality.\ ## Difference from the paper The implementation here is different from what being described in the paper: 1. The paper described a supervised setting where the authors have a ground truth set of which items are relevant. However, in traditional recommender system datasets we don't have such labels other than which items are interacted by which users (as well as the user/item's own features). Therefore, I adapted PinSAGE to an unsupervised setting where I predict whether two items are cointeracted by the same user. 2. PinSAGE paper explicitly stated that the items do not learnable embeddings of nodes, but directly express the embeddings as a function of node features. While this is reasonable for rich datasets like Pinterest's where images and texts are rich enough to distinguish the items from each other, it is unfortunately not the case for traditional recommender system datasets like MovieLens or Nowplaying-RS where we only have a bunch of categorical or numeric variables. I found adding a learnable embedding for each item still helpful for those datasets. 3. The PinSAGE paper directly pass the GNN output to an MLP and make the result the final item representation. Here, I'm adding the GNN output with the node's own learnable embedding as the final item representation instead. ================================================ FILE: examples/pytorch/pinsage/builder.py ================================================ """Graph builder from pandas dataframes""" from collections import namedtuple import dgl from pandas.api.types import ( is_categorical, is_categorical_dtype, is_numeric_dtype, ) __all__ = ["PandasGraphBuilder"] def _series_to_tensor(series): if is_categorical(series): return torch.LongTensor(series.cat.codes.values.astype("int64")) else: # numeric return torch.FloatTensor(series.values) class PandasGraphBuilder(object): """Creates a heterogeneous graph from multiple pandas dataframes. Examples -------- Let's say we have the following three pandas dataframes: User table ``users``: =========== =========== ======= ``user_id`` ``country`` ``age`` =========== =========== ======= XYZZY U.S. 25 FOO China 24 BAR China 23 =========== =========== ======= Game table ``games``: =========== ========= ============== ================== ``game_id`` ``title`` ``is_sandbox`` ``is_multiplayer`` =========== ========= ============== ================== 1 Minecraft True True 2 Tetris 99 False True =========== ========= ============== ================== Play relationship table ``plays``: =========== =========== ========= ``user_id`` ``game_id`` ``hours`` =========== =========== ========= XYZZY 1 24 FOO 1 20 FOO 2 16 BAR 2 28 =========== =========== ========= One could then create a bidirectional bipartite graph as follows: >>> builder = PandasGraphBuilder() >>> builder.add_entities(users, 'user_id', 'user') >>> builder.add_entities(games, 'game_id', 'game') >>> builder.add_binary_relations(plays, 'user_id', 'game_id', 'plays') >>> builder.add_binary_relations(plays, 'game_id', 'user_id', 'played-by') >>> g = builder.build() >>> g.num_nodes('user') 3 >>> g.num_edges('plays') 4 """ def __init__(self): self.entity_tables = {} self.relation_tables = {} self.entity_pk_to_name = ( {} ) # mapping from primary key name to entity name self.entity_pk = {} # mapping from entity name to primary key self.entity_key_map = ( {} ) # mapping from entity names to primary key values self.num_nodes_per_type = {} self.edges_per_relation = {} self.relation_name_to_etype = {} self.relation_src_key = {} # mapping from relation name to source key self.relation_dst_key = ( {} ) # mapping from relation name to destination key def add_entities(self, entity_table, primary_key, name): entities = entity_table[primary_key].astype("category") if not (entities.value_counts() == 1).all(): raise ValueError( "Different entity with the same primary key detected." ) # preserve the category order in the original entity table entities = entities.cat.reorder_categories( entity_table[primary_key].values ) self.entity_pk_to_name[primary_key] = name self.entity_pk[name] = primary_key self.num_nodes_per_type[name] = entity_table.shape[0] self.entity_key_map[name] = entities self.entity_tables[name] = entity_table def add_binary_relations( self, relation_table, source_key, destination_key, name ): src = relation_table[source_key].astype("category") src = src.cat.set_categories( self.entity_key_map[ self.entity_pk_to_name[source_key] ].cat.categories ) dst = relation_table[destination_key].astype("category") dst = dst.cat.set_categories( self.entity_key_map[ self.entity_pk_to_name[destination_key] ].cat.categories ) if src.isnull().any(): raise ValueError( "Some source entities in relation %s do not exist in entity %s." % (name, source_key) ) if dst.isnull().any(): raise ValueError( "Some destination entities in relation %s do not exist in entity %s." % (name, destination_key) ) srctype = self.entity_pk_to_name[source_key] dsttype = self.entity_pk_to_name[destination_key] etype = (srctype, name, dsttype) self.relation_name_to_etype[name] = etype self.edges_per_relation[etype] = ( src.cat.codes.values.astype("int64"), dst.cat.codes.values.astype("int64"), ) self.relation_tables[name] = relation_table self.relation_src_key[name] = source_key self.relation_dst_key[name] = destination_key def build(self): # Create heterograph graph = dgl.heterograph( self.edges_per_relation, self.num_nodes_per_type ) return graph ================================================ FILE: examples/pytorch/pinsage/data_utils.py ================================================ import dask.dataframe as dd import dgl import numpy as np import scipy.sparse as ssp import torch import tqdm # This is the train-test split method most of the recommender system papers running on MovieLens # takes. It essentially follows the intuition of "training on the past and predict the future". # One can also change the threshold to make validation and test set take larger proportions. def train_test_split_by_time(df, timestamp, user): df["train_mask"] = np.ones((len(df),), dtype=np.bool_) df["val_mask"] = np.zeros((len(df),), dtype=np.bool_) df["test_mask"] = np.zeros((len(df),), dtype=np.bool_) df = dd.from_pandas(df, npartitions=10) def train_test_split(df): df = df.sort_values([timestamp]) if df.shape[0] > 1: df.iloc[-1, -3] = False df.iloc[-1, -1] = True if df.shape[0] > 2: df.iloc[-2, -3] = False df.iloc[-2, -2] = True return df meta_df = { "user_id": np.int64, "movie_id": np.int64, "rating": np.int64, "timestamp": np.int64, "user_id": np.int64, "train_mask": bool, "val_mask": bool, "test_mask": bool, } df = ( df.groupby(user, group_keys=False) .apply(train_test_split, meta=meta_df) .compute(scheduler="processes") .sort_index() ) print(df[df[user] == df[user].unique()[0]].sort_values(timestamp)) return ( df["train_mask"].to_numpy().nonzero()[0], df["val_mask"].to_numpy().nonzero()[0], df["test_mask"].to_numpy().nonzero()[0], ) def build_train_graph(g, train_indices, utype, itype, etype, etype_rev): train_g = g.edge_subgraph( {etype: train_indices, etype_rev: train_indices}, relabel_nodes=False ) # copy features for ntype in g.ntypes: for col, data in g.nodes[ntype].data.items(): train_g.nodes[ntype].data[col] = data for etype in g.etypes: for col, data in g.edges[etype].data.items(): train_g.edges[etype].data[col] = data[ train_g.edges[etype].data[dgl.EID] ] return train_g def build_val_test_matrix(g, val_indices, test_indices, utype, itype, etype): n_users = g.num_nodes(utype) n_items = g.num_nodes(itype) val_src, val_dst = g.find_edges(val_indices, etype=etype) test_src, test_dst = g.find_edges(test_indices, etype=etype) val_src = val_src.numpy() val_dst = val_dst.numpy() test_src = test_src.numpy() test_dst = test_dst.numpy() val_matrix = ssp.coo_matrix( (np.ones_like(val_src), (val_src, val_dst)), (n_users, n_items) ) test_matrix = ssp.coo_matrix( (np.ones_like(test_src), (test_src, test_dst)), (n_users, n_items) ) return val_matrix, test_matrix def linear_normalize(values): return (values - values.min(0, keepdims=True)) / ( values.max(0, keepdims=True) - values.min(0, keepdims=True) ) ================================================ FILE: examples/pytorch/pinsage/evaluation.py ================================================ import argparse import pickle import dgl import numpy as np import torch def prec(recommendations, ground_truth): n_users, n_items = ground_truth.shape K = recommendations.shape[1] user_idx = np.repeat(np.arange(n_users), K) item_idx = recommendations.flatten() relevance = ground_truth[user_idx, item_idx].reshape((n_users, K)) hit = relevance.any(axis=1).mean() return hit class LatestNNRecommender(object): def __init__( self, user_ntype, item_ntype, user_to_item_etype, timestamp, batch_size ): self.user_ntype = user_ntype self.item_ntype = item_ntype self.user_to_item_etype = user_to_item_etype self.batch_size = batch_size self.timestamp = timestamp def recommend(self, full_graph, K, h_user, h_item): """ Return a (n_user, K) matrix of recommended items for each user """ graph_slice = full_graph.edge_type_subgraph([self.user_to_item_etype]) n_users = full_graph.num_nodes(self.user_ntype) latest_interactions = dgl.sampling.select_topk( graph_slice, 1, self.timestamp, edge_dir="out" ) user, latest_items = latest_interactions.all_edges( form="uv", order="srcdst" ) # each user should have at least one "latest" interaction assert torch.equal(user, torch.arange(n_users)) recommended_batches = [] user_batches = torch.arange(n_users).split(self.batch_size) for user_batch in user_batches: latest_item_batch = latest_items[user_batch].to( device=h_item.device ) dist = h_item[latest_item_batch] @ h_item.t() # exclude items that are already interacted for i, u in enumerate(user_batch.tolist()): interacted_items = full_graph.successors( u, etype=self.user_to_item_etype ) dist[i, interacted_items] = -np.inf recommended_batches.append(dist.topk(K, 1)[1]) recommendations = torch.cat(recommended_batches, 0) return recommendations def evaluate_nn(dataset, h_item, k, batch_size): g = dataset["train-graph"] val_matrix = dataset["val-matrix"].tocsr() test_matrix = dataset["test-matrix"].tocsr() item_texts = dataset["item-texts"] user_ntype = dataset["user-type"] item_ntype = dataset["item-type"] user_to_item_etype = dataset["user-to-item-type"] timestamp = dataset["timestamp-edge-column"] rec_engine = LatestNNRecommender( user_ntype, item_ntype, user_to_item_etype, timestamp, batch_size ) recommendations = rec_engine.recommend(g, k, None, h_item).cpu().numpy() return prec(recommendations, val_matrix) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("dataset_path", type=str) parser.add_argument("item_embedding_path", type=str) parser.add_argument("-k", type=int, default=10) parser.add_argument("--batch-size", type=int, default=32) args = parser.parse_args() with open(args.dataset_path, "rb") as f: dataset = pickle.load(f) with open(args.item_embedding_path, "rb") as f: emb = torch.FloatTensor(pickle.load(f)) print(evaluate_nn(dataset, emb, args.k, args.batch_size)) ================================================ FILE: examples/pytorch/pinsage/layers.py ================================================ import dgl import dgl.function as fn import dgl.nn.pytorch as dglnn import torch import torch.nn as nn import torch.nn.functional as F def disable_grad(module): for param in module.parameters(): param.requires_grad = False def _init_input_modules(g, ntype, textset, hidden_dims): # We initialize the linear projections of each input feature ``x`` as # follows: # * If ``x`` is a scalar integral feature, we assume that ``x`` is a categorical # feature, and assume the range of ``x`` is 0..max(x). # * If ``x`` is a float one-dimensional feature, we assume that ``x`` is a # numeric vector. # * If ``x`` is a field of a textset, we process it as bag of words. module_dict = nn.ModuleDict() for column, data in g.nodes[ntype].data.items(): if column == dgl.NID: continue if data.dtype == torch.float32: assert data.ndim == 2 m = nn.Linear(data.shape[1], hidden_dims) nn.init.xavier_uniform_(m.weight) nn.init.constant_(m.bias, 0) module_dict[column] = m elif data.dtype == torch.int64: assert data.ndim == 1 m = nn.Embedding(data.max() + 2, hidden_dims, padding_idx=-1) nn.init.xavier_uniform_(m.weight) module_dict[column] = m if textset is not None: for column, field in textset.items(): textlist, vocab, pad_var, batch_first = field module_dict[column] = BagOfWords(vocab, hidden_dims) return module_dict class BagOfWords(nn.Module): def __init__(self, vocab, hidden_dims): super().__init__() self.emb = nn.Embedding( len(vocab.get_itos()), hidden_dims, padding_idx=vocab.get_stoi()[""], ) nn.init.xavier_uniform_(self.emb.weight) def forward(self, x, length): return self.emb(x).sum(1) / length.unsqueeze(1).float() class LinearProjector(nn.Module): """ Projects each input feature of the graph linearly and sums them up """ def __init__(self, full_graph, ntype, textset, hidden_dims): super().__init__() self.ntype = ntype self.inputs = _init_input_modules( full_graph, ntype, textset, hidden_dims ) def forward(self, ndata): projections = [] for feature, data in ndata.items(): if feature == dgl.NID or feature.endswith("__len"): # This is an additional feature indicating the length of the ``feature`` # column; we shouldn't process this. continue module = self.inputs[feature] if isinstance(module, BagOfWords): # Textual feature; find the length and pass it to the textual module. length = ndata[feature + "__len"] result = module(data, length) else: result = module(data) projections.append(result) return torch.stack(projections, 1).sum(1) class WeightedSAGEConv(nn.Module): def __init__(self, input_dims, hidden_dims, output_dims, act=F.relu): super().__init__() self.act = act self.Q = nn.Linear(input_dims, hidden_dims) self.W = nn.Linear(input_dims + hidden_dims, output_dims) self.reset_parameters() self.dropout = nn.Dropout(0.5) def reset_parameters(self): gain = nn.init.calculate_gain("relu") nn.init.xavier_uniform_(self.Q.weight, gain=gain) nn.init.xavier_uniform_(self.W.weight, gain=gain) nn.init.constant_(self.Q.bias, 0) nn.init.constant_(self.W.bias, 0) def forward(self, g, h, weights): """ g : graph h : node features weights : scalar edge weights """ h_src, h_dst = h with g.local_scope(): g.srcdata["n"] = self.act(self.Q(self.dropout(h_src))) g.edata["w"] = weights.float() g.update_all(fn.u_mul_e("n", "w", "m"), fn.sum("m", "n")) g.update_all(fn.copy_e("w", "m"), fn.sum("m", "ws")) n = g.dstdata["n"] ws = g.dstdata["ws"].unsqueeze(1).clamp(min=1) z = self.act(self.W(self.dropout(torch.cat([n / ws, h_dst], 1)))) z_norm = z.norm(2, 1, keepdim=True) z_norm = torch.where( z_norm == 0, torch.tensor(1.0).to(z_norm), z_norm ) z = z / z_norm return z class SAGENet(nn.Module): def __init__(self, hidden_dims, n_layers): """ g : DGLGraph The user-item interaction graph. This is only for finding the range of categorical variables. item_textsets : torchtext.data.Dataset The textual features of each item node. """ super().__init__() self.convs = nn.ModuleList() for _ in range(n_layers): self.convs.append( WeightedSAGEConv(hidden_dims, hidden_dims, hidden_dims) ) def forward(self, blocks, h): for layer, block in zip(self.convs, blocks): h_dst = h[: block.num_nodes("DST/" + block.ntypes[0])] h = layer(block, (h, h_dst), block.edata["weights"]) return h class ItemToItemScorer(nn.Module): def __init__(self, full_graph, ntype): super().__init__() n_nodes = full_graph.num_nodes(ntype) self.bias = nn.Parameter(torch.zeros(n_nodes, 1)) def _add_bias(self, edges): bias_src = self.bias[edges.src[dgl.NID]] bias_dst = self.bias[edges.dst[dgl.NID]] return {"s": edges.data["s"] + bias_src + bias_dst} def forward(self, item_item_graph, h): """ item_item_graph : graph consists of edges connecting the pairs h : hidden state of every node """ with item_item_graph.local_scope(): item_item_graph.ndata["h"] = h item_item_graph.apply_edges(fn.u_dot_v("h", "h", "s")) item_item_graph.apply_edges(self._add_bias) pair_score = item_item_graph.edata["s"] return pair_score ================================================ FILE: examples/pytorch/pinsage/model.py ================================================ import argparse import os import pickle import dgl import evaluation import layers import numpy as np import sampler as sampler_module import torch import torch.nn as nn import torchtext import tqdm from torch.utils.data import DataLoader from torchtext.data.utils import get_tokenizer from torchtext.vocab import build_vocab_from_iterator class PinSAGEModel(nn.Module): def __init__(self, full_graph, ntype, textsets, hidden_dims, n_layers): super().__init__() self.proj = layers.LinearProjector( full_graph, ntype, textsets, hidden_dims ) self.sage = layers.SAGENet(hidden_dims, n_layers) self.scorer = layers.ItemToItemScorer(full_graph, ntype) def forward(self, pos_graph, neg_graph, blocks): h_item = self.get_repr(blocks) pos_score = self.scorer(pos_graph, h_item) neg_score = self.scorer(neg_graph, h_item) return (neg_score - pos_score + 1).clamp(min=0) def get_repr(self, blocks): h_item = self.proj(blocks[0].srcdata) h_item_dst = self.proj(blocks[-1].dstdata) return h_item_dst + self.sage(blocks, h_item) def train(dataset, args): g = dataset["train-graph"] val_matrix = dataset["val-matrix"].tocsr() test_matrix = dataset["test-matrix"].tocsr() item_texts = dataset["item-texts"] user_ntype = dataset["user-type"] item_ntype = dataset["item-type"] user_to_item_etype = dataset["user-to-item-type"] timestamp = dataset["timestamp-edge-column"] device = torch.device(args.device) # Assign user and movie IDs and use them as features (to learn an individual trainable # embedding for each entity) g.nodes[user_ntype].data["id"] = torch.arange(g.num_nodes(user_ntype)) g.nodes[item_ntype].data["id"] = torch.arange(g.num_nodes(item_ntype)) # Prepare torchtext dataset and Vocabulary textset = {} tokenizer = get_tokenizer(None) textlist = [] batch_first = True for i in range(g.num_nodes(item_ntype)): for key in item_texts.keys(): l = tokenizer(item_texts[key][i].lower()) textlist.append(l) for key, field in item_texts.items(): vocab2 = build_vocab_from_iterator( textlist, specials=["", ""] ) textset[key] = ( textlist, vocab2, vocab2.get_stoi()[""], batch_first, ) # Sampler batch_sampler = sampler_module.ItemToItemBatchSampler( g, user_ntype, item_ntype, args.batch_size ) neighbor_sampler = sampler_module.NeighborSampler( g, user_ntype, item_ntype, args.random_walk_length, args.random_walk_restart_prob, args.num_random_walks, args.num_neighbors, args.num_layers, ) collator = sampler_module.PinSAGECollator( neighbor_sampler, g, item_ntype, textset ) dataloader = DataLoader( batch_sampler, collate_fn=collator.collate_train, num_workers=args.num_workers, ) dataloader_test = DataLoader( torch.arange(g.num_nodes(item_ntype)), batch_size=args.batch_size, collate_fn=collator.collate_test, num_workers=args.num_workers, ) dataloader_it = iter(dataloader) # Model model = PinSAGEModel( g, item_ntype, textset, args.hidden_dims, args.num_layers ).to(device) # Optimizer opt = torch.optim.Adam(model.parameters(), lr=args.lr) # For each batch of head-tail-negative triplets... for epoch_id in range(args.num_epochs): model.train() for batch_id in tqdm.trange(args.batches_per_epoch): pos_graph, neg_graph, blocks = next(dataloader_it) # Copy to GPU for i in range(len(blocks)): blocks[i] = blocks[i].to(device) pos_graph = pos_graph.to(device) neg_graph = neg_graph.to(device) loss = model(pos_graph, neg_graph, blocks).mean() opt.zero_grad() loss.backward() opt.step() # Evaluate model.eval() with torch.no_grad(): item_batches = torch.arange(g.num_nodes(item_ntype)).split( args.batch_size ) h_item_batches = [] for blocks in dataloader_test: for i in range(len(blocks)): blocks[i] = blocks[i].to(device) h_item_batches.append(model.get_repr(blocks)) h_item = torch.cat(h_item_batches, 0) print( evaluation.evaluate_nn(dataset, h_item, args.k, args.batch_size) ) if __name__ == "__main__": # Arguments parser = argparse.ArgumentParser() parser.add_argument("dataset_path", type=str) parser.add_argument("--random-walk-length", type=int, default=2) parser.add_argument("--random-walk-restart-prob", type=float, default=0.5) parser.add_argument("--num-random-walks", type=int, default=10) parser.add_argument("--num-neighbors", type=int, default=3) parser.add_argument("--num-layers", type=int, default=2) parser.add_argument("--hidden-dims", type=int, default=16) parser.add_argument("--batch-size", type=int, default=32) parser.add_argument( "--device", type=str, default="cpu" ) # can also be "cuda:0" parser.add_argument("--num-epochs", type=int, default=1) parser.add_argument("--batches-per-epoch", type=int, default=20000) parser.add_argument("--num-workers", type=int, default=0) parser.add_argument("--lr", type=float, default=3e-5) parser.add_argument("-k", type=int, default=10) args = parser.parse_args() # Load dataset data_info_path = os.path.join(args.dataset_path, "data.pkl") with open(data_info_path, "rb") as f: dataset = pickle.load(f) train_g_path = os.path.join(args.dataset_path, "train_g.bin") g_list, _ = dgl.load_graphs(train_g_path) dataset["train-graph"] = g_list[0] train(dataset, args) ================================================ FILE: examples/pytorch/pinsage/model_sparse.py ================================================ import argparse import os import pickle import dgl import evaluation import layers import numpy as np import sampler as sampler_module import torch import torch.nn as nn import torchtext import tqdm from torch.utils.data import DataLoader from torchtext.data.utils import get_tokenizer from torchtext.vocab import build_vocab_from_iterator class PinSAGEModel(nn.Module): def __init__(self, full_graph, ntype, textsets, hidden_dims, n_layers): super().__init__() self.proj = layers.LinearProjector( full_graph, ntype, textsets, hidden_dims ) self.sage = layers.SAGENet(hidden_dims, n_layers) self.scorer = layers.ItemToItemScorer(full_graph, ntype) def forward(self, pos_graph, neg_graph, blocks, item_emb): h_item = self.get_repr(blocks, item_emb) pos_score = self.scorer(pos_graph, h_item) neg_score = self.scorer(neg_graph, h_item) return (neg_score - pos_score + 1).clamp(min=0) def get_repr(self, blocks, item_emb): # project features h_item = self.proj(blocks[0].srcdata) h_item_dst = self.proj(blocks[-1].dstdata) # add to the item embedding itself h_item = h_item + item_emb(blocks[0].srcdata[dgl.NID].cpu()).to(h_item) h_item_dst = h_item_dst + item_emb( blocks[-1].dstdata[dgl.NID].cpu() ).to(h_item_dst) return h_item_dst + self.sage(blocks, h_item) def train(dataset, args): g = dataset["train-graph"] val_matrix = dataset["val-matrix"].tocsr() test_matrix = dataset["test-matrix"].tocsr() item_texts = dataset["item-texts"] user_ntype = dataset["user-type"] item_ntype = dataset["item-type"] user_to_item_etype = dataset["user-to-item-type"] timestamp = dataset["timestamp-edge-column"] device = torch.device(args.device) # Prepare torchtext dataset and vocabulary textset = {} tokenizer = get_tokenizer(None) textlist = [] batch_first = True for i in range(g.num_nodes(item_ntype)): for key in item_texts.keys(): l = tokenizer(item_texts[key][i].lower()) textlist.append(l) for key, field in item_texts.items(): vocab2 = build_vocab_from_iterator( textlist, specials=["", ""] ) textset[key] = ( textlist, vocab2, vocab2.get_stoi()[""], batch_first, ) # Sampler batch_sampler = sampler_module.ItemToItemBatchSampler( g, user_ntype, item_ntype, args.batch_size ) neighbor_sampler = sampler_module.NeighborSampler( g, user_ntype, item_ntype, args.random_walk_length, args.random_walk_restart_prob, args.num_random_walks, args.num_neighbors, args.num_layers, ) collator = sampler_module.PinSAGECollator( neighbor_sampler, g, item_ntype, textset ) dataloader = DataLoader( batch_sampler, collate_fn=collator.collate_train, num_workers=args.num_workers, ) dataloader_test = DataLoader( torch.arange(g.num_nodes(item_ntype)), batch_size=args.batch_size, collate_fn=collator.collate_test, num_workers=args.num_workers, ) dataloader_it = iter(dataloader) # Model model = PinSAGEModel( g, item_ntype, textset, args.hidden_dims, args.num_layers ).to(device) item_emb = nn.Embedding( g.num_nodes(item_ntype), args.hidden_dims, sparse=True ) # Optimizer opt = torch.optim.Adam(model.parameters(), lr=args.lr) opt_emb = torch.optim.SparseAdam(item_emb.parameters(), lr=args.lr) # For each batch of head-tail-negative triplets... for epoch_id in range(args.num_epochs): model.train() for batch_id in tqdm.trange(args.batches_per_epoch): pos_graph, neg_graph, blocks = next(dataloader_it) # Copy to GPU for i in range(len(blocks)): blocks[i] = blocks[i].to(device) pos_graph = pos_graph.to(device) neg_graph = neg_graph.to(device) loss = model(pos_graph, neg_graph, blocks, item_emb).mean() opt.zero_grad() opt_emb.zero_grad() loss.backward() opt.step() opt_emb.step() # Evaluate model.eval() with torch.no_grad(): item_batches = torch.arange(g.num_nodes(item_ntype)).split( args.batch_size ) h_item_batches = [] for blocks in tqdm.tqdm(dataloader_test): for i in range(len(blocks)): blocks[i] = blocks[i].to(device) h_item_batches.append(model.get_repr(blocks, item_emb)) h_item = torch.cat(h_item_batches, 0) print( evaluation.evaluate_nn(dataset, h_item, args.k, args.batch_size) ) if __name__ == "__main__": # Arguments parser = argparse.ArgumentParser() parser.add_argument("dataset_path", type=str) parser.add_argument("--random-walk-length", type=int, default=2) parser.add_argument("--random-walk-restart-prob", type=float, default=0.5) parser.add_argument("--num-random-walks", type=int, default=10) parser.add_argument("--num-neighbors", type=int, default=3) parser.add_argument("--num-layers", type=int, default=2) parser.add_argument("--hidden-dims", type=int, default=16) parser.add_argument("--batch-size", type=int, default=32) parser.add_argument( "--device", type=str, default="cpu" ) # can also be "cuda:0" parser.add_argument("--num-epochs", type=int, default=1) parser.add_argument("--batches-per-epoch", type=int, default=20000) parser.add_argument("--num-workers", type=int, default=0) parser.add_argument("--lr", type=float, default=3e-5) parser.add_argument("-k", type=int, default=10) args = parser.parse_args() # Load dataset data_info_path = os.path.join(args.dataset_path, "data.pkl") with open(data_info_path, "rb") as f: dataset = pickle.load(f) train_g_path = os.path.join(args.dataset_path, "train_g.bin") g_list, _ = dgl.load_graphs(train_g_path) dataset["train-graph"] = g_list[0] train(dataset, args) ================================================ FILE: examples/pytorch/pinsage/process_movielens1m.py ================================================ """ Script that reads from raw MovieLens-1M data and dumps into a pickle file the following: * A heterogeneous graph with categorical features. * A list with all the movie titles. The movie titles correspond to the movie nodes in the heterogeneous graph. This script exemplifies how to prepare tabular data with textual features. Since DGL graphs do not store variable-length features, we instead put variable-length features into a more suitable container (e.g. torchtext to handle list of texts) """ import argparse import os import pickle import re import numpy as np import pandas as pd import scipy.sparse as ssp import torch import torchtext from builder import PandasGraphBuilder from data_utils import * import dgl if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("directory", type=str) parser.add_argument("out_directory", type=str) args = parser.parse_args() directory = args.directory out_directory = args.out_directory os.makedirs(out_directory, exist_ok=True) ## Build heterogeneous graph # Load data users = [] with open(os.path.join(directory, "users.dat"), encoding="latin1") as f: for l in f: id_, gender, age, occupation, zip_ = l.strip().split("::") users.append( { "user_id": int(id_), "gender": gender, "age": age, "occupation": occupation, "zip": zip_, } ) users = pd.DataFrame(users).astype("category") movies = [] with open(os.path.join(directory, "movies.dat"), encoding="latin1") as f: for l in f: id_, title, genres = l.strip().split("::") genres_set = set(genres.split("|")) # extract year assert re.match(r".*\([0-9]{4}\)$", title) year = title[-5:-1] title = title[:-6].strip() data = {"movie_id": int(id_), "title": title, "year": year} for g in genres_set: data[g] = True movies.append(data) movies = pd.DataFrame(movies).astype({"year": "category"}) ratings = [] with open(os.path.join(directory, "ratings.dat"), encoding="latin1") as f: for l in f: user_id, movie_id, rating, timestamp = [ int(_) for _ in l.split("::") ] ratings.append( { "user_id": user_id, "movie_id": movie_id, "rating": rating, "timestamp": timestamp, } ) ratings = pd.DataFrame(ratings) # Filter the users and items that never appear in the rating table. distinct_users_in_ratings = ratings["user_id"].unique() distinct_movies_in_ratings = ratings["movie_id"].unique() users = users[users["user_id"].isin(distinct_users_in_ratings)] movies = movies[movies["movie_id"].isin(distinct_movies_in_ratings)] # Group the movie features into genres (a vector), year (a category), title (a string) genre_columns = movies.columns.drop(["movie_id", "title", "year"]) movies[genre_columns] = movies[genre_columns].fillna(False).astype("bool") movies_categorical = movies.drop("title", axis=1) # Build graph graph_builder = PandasGraphBuilder() graph_builder.add_entities(users, "user_id", "user") graph_builder.add_entities(movies_categorical, "movie_id", "movie") graph_builder.add_binary_relations( ratings, "user_id", "movie_id", "watched" ) graph_builder.add_binary_relations( ratings, "movie_id", "user_id", "watched-by" ) g = graph_builder.build() # Assign features. # Note that variable-sized features such as texts or images are handled elsewhere. for data_type in ["gender", "age", "occupation", "zip"]: g.nodes["user"].data[data_type] = torch.LongTensor( np.array(users[data_type].cat.codes.values) ) g.nodes["movie"].data["year"] = torch.LongTensor( np.array(movies["year"].cat.codes.values) ) g.nodes["movie"].data["genre"] = torch.FloatTensor( np.array(movies[genre_columns].values) ) for edge_type in ["watched", "watched-by"]: for data_type in ["rating", "timestamp"]: g.edges[edge_type].data[data_type] = torch.LongTensor( np.array(ratings[data_type].values) ) # Train-validation-test split # This is a little bit tricky as we want to select the last interaction for test, and the # second-to-last interaction for validation. train_indices, val_indices, test_indices = train_test_split_by_time( ratings, "timestamp", "user_id" ) # Build the graph with training interactions only. train_g = build_train_graph( g, train_indices, "user", "movie", "watched", "watched-by" ) assert train_g.out_degrees(etype="watched").min() > 0 # Build the user-item sparse matrix for validation and test set. val_matrix, test_matrix = build_val_test_matrix( g, val_indices, test_indices, "user", "movie", "watched" ) ## Build title set movie_textual_dataset = {"title": movies["title"].values} # The model should build their own vocabulary and process the texts. Here is one example # of using torchtext to pad and numericalize a batch of strings. # field = torchtext.data.Field(include_lengths=True, lower=True, batch_first=True) # examples = [torchtext.data.Example.fromlist([t], [('title', title_field)]) for t in texts] # titleset = torchtext.data.Dataset(examples, [('title', title_field)]) # field.build_vocab(titleset.title, vectors='fasttext.simple.300d') # token_ids, lengths = field.process([examples[0].title, examples[1].title]) ## Dump the graph and the datasets dgl.save_graphs(os.path.join(out_directory, "train_g.bin"), train_g) dataset = { "val-matrix": val_matrix, "test-matrix": test_matrix, "item-texts": movie_textual_dataset, "item-images": None, "user-type": "user", "item-type": "movie", "user-to-item-type": "watched", "item-to-user-type": "watched-by", "timestamp-edge-column": "timestamp", } with open(os.path.join(out_directory, "data.pkl"), "wb") as f: pickle.dump(dataset, f) ================================================ FILE: examples/pytorch/pinsage/process_nowplaying_rs.py ================================================ """ Script that reads from raw Nowplaying-RS data and dumps into a pickle file a heterogeneous graph with categorical and numeric features. """ import argparse import os import pickle import pandas as pd import scipy.sparse as ssp from builder import PandasGraphBuilder from data_utils import * import dgl if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("directory", type=str) parser.add_argument("out_directory", type=str) args = parser.parse_args() directory = args.directory out_directory = args.out_directory os.makedirs(out_directory, exist_ok=True) data = pd.read_csv(os.path.join(directory, "context_content_features.csv")) track_feature_cols = list(data.columns[1:13]) data = data[ ["user_id", "track_id", "created_at"] + track_feature_cols ].dropna() users = data[["user_id"]].drop_duplicates() tracks = data[["track_id"] + track_feature_cols].drop_duplicates() assert tracks["track_id"].value_counts().max() == 1 tracks = tracks.astype( {"mode": "int64", "key": "int64", "artist_id": "category"} ) events = data[["user_id", "track_id", "created_at"]] events["created_at"] = ( events["created_at"].values.astype("datetime64[s]").astype("int64") ) graph_builder = PandasGraphBuilder() graph_builder.add_entities(users, "user_id", "user") graph_builder.add_entities(tracks, "track_id", "track") graph_builder.add_binary_relations( events, "user_id", "track_id", "listened" ) graph_builder.add_binary_relations( events, "track_id", "user_id", "listened-by" ) g = graph_builder.build() float_cols = [] for col in tracks.columns: if col == "track_id": continue elif col == "artist_id": g.nodes["track"].data[col] = torch.LongTensor( tracks[col].cat.codes.values ) elif tracks.dtypes[col] == "float64": float_cols.append(col) else: g.nodes["track"].data[col] = torch.LongTensor(tracks[col].values) g.nodes["track"].data["song_features"] = torch.FloatTensor( linear_normalize(tracks[float_cols].values) ) g.edges["listened"].data["created_at"] = torch.LongTensor( events["created_at"].values ) g.edges["listened-by"].data["created_at"] = torch.LongTensor( events["created_at"].values ) n_edges = g.num_edges("listened") train_indices, val_indices, test_indices = train_test_split_by_time( events, "created_at", "user_id" ) train_g = build_train_graph( g, train_indices, "user", "track", "listened", "listened-by" ) assert train_g.out_degrees(etype="listened").min() > 0 val_matrix, test_matrix = build_val_test_matrix( g, val_indices, test_indices, "user", "track", "listened" ) dgl.save_graphs(os.path.join(out_directory, "train_g.bin"), train_g) dataset = { "val-matrix": val_matrix, "test-matrix": test_matrix, "item-texts": {}, "item-images": None, "user-type": "user", "item-type": "track", "user-to-item-type": "listened", "item-to-user-type": "listened-by", "timestamp-edge-column": "created_at", } with open(os.path.join(out_directory, "data.pkl"), "wb") as f: pickle.dump(dataset, f) ================================================ FILE: examples/pytorch/pinsage/sampler.py ================================================ import dgl import numpy as np import torch from torch.utils.data import DataLoader, IterableDataset from torchtext.data.functional import numericalize_tokens_from_iterator def padding(array, yy, val): """ :param array: torch tensor array :param yy: desired width :param val: padded value :return: padded array """ w = array.shape[0] b = 0 bb = yy - b - w return torch.nn.functional.pad( array, pad=(b, bb), mode="constant", value=val ) def compact_and_copy(frontier, seeds): block = dgl.to_block(frontier, seeds) for col, data in frontier.edata.items(): if col == dgl.EID: continue block.edata[col] = data[block.edata[dgl.EID]] return block class ItemToItemBatchSampler(IterableDataset): def __init__(self, g, user_type, item_type, batch_size): self.g = g self.user_type = user_type self.item_type = item_type self.user_to_item_etype = list(g.metagraph()[user_type][item_type])[0] self.item_to_user_etype = list(g.metagraph()[item_type][user_type])[0] self.batch_size = batch_size def __iter__(self): while True: heads = torch.randint( 0, self.g.num_nodes(self.item_type), (self.batch_size,) ) tails = dgl.sampling.random_walk( self.g, heads, metapath=[self.item_to_user_etype, self.user_to_item_etype], )[0][:, 2] neg_tails = torch.randint( 0, self.g.num_nodes(self.item_type), (self.batch_size,) ) mask = tails != -1 yield heads[mask], tails[mask], neg_tails[mask] class NeighborSampler(object): def __init__( self, g, user_type, item_type, random_walk_length, random_walk_restart_prob, num_random_walks, num_neighbors, num_layers, ): self.g = g self.user_type = user_type self.item_type = item_type self.user_to_item_etype = list(g.metagraph()[user_type][item_type])[0] self.item_to_user_etype = list(g.metagraph()[item_type][user_type])[0] self.samplers = [ dgl.sampling.PinSAGESampler( g, item_type, user_type, random_walk_length, random_walk_restart_prob, num_random_walks, num_neighbors, ) for _ in range(num_layers) ] def sample_blocks(self, seeds, heads=None, tails=None, neg_tails=None): blocks = [] for sampler in self.samplers: frontier = sampler(seeds) if heads is not None: eids = frontier.edge_ids( torch.cat([heads, heads]), torch.cat([tails, neg_tails]), return_uv=True, )[2] if len(eids) > 0: old_frontier = frontier frontier = dgl.remove_edges(old_frontier, eids) # print(old_frontier) # print(frontier) # print(frontier.edata['weights']) # frontier.edata['weights'] = old_frontier.edata['weights'][frontier.edata[dgl.EID]] block = compact_and_copy(frontier, seeds) seeds = block.srcdata[dgl.NID] blocks.insert(0, block) return blocks def sample_from_item_pairs(self, heads, tails, neg_tails): # Create a graph with positive connections only and another graph with negative # connections only. pos_graph = dgl.graph( (heads, tails), num_nodes=self.g.num_nodes(self.item_type) ) neg_graph = dgl.graph( (heads, neg_tails), num_nodes=self.g.num_nodes(self.item_type) ) pos_graph, neg_graph = dgl.compact_graphs([pos_graph, neg_graph]) seeds = pos_graph.ndata[dgl.NID] blocks = self.sample_blocks(seeds, heads, tails, neg_tails) return pos_graph, neg_graph, blocks def assign_simple_node_features(ndata, g, ntype, assign_id=False): """ Copies data to the given block from the corresponding nodes in the original graph. """ for col in g.nodes[ntype].data.keys(): if not assign_id and col == dgl.NID: continue induced_nodes = ndata[dgl.NID] ndata[col] = g.nodes[ntype].data[col][induced_nodes] def assign_textual_node_features(ndata, textset, ntype): """ Assigns numericalized tokens from a torchtext dataset to given block. The numericalized tokens would be stored in the block as node features with the same name as ``field_name``. The length would be stored as another node feature with name ``field_name + '__len'``. block : DGLGraph First element of the compacted blocks, with "dgl.NID" as the corresponding node ID in the original graph, hence the index to the text dataset. The numericalized tokens (and lengths if available) would be stored onto the blocks as new node features. textset : torchtext.data.Dataset A torchtext dataset whose number of examples is the same as that of nodes in the original graph. """ node_ids = ndata[dgl.NID].numpy() for field_name, field in textset.items(): textlist, vocab, pad_var, batch_first = field examples = [textlist[i] for i in node_ids] ids_iter = numericalize_tokens_from_iterator(vocab, examples) maxsize = max([len(textlist[i]) for i in node_ids]) ids = next(ids_iter) x = torch.asarray([num for num in ids]) lengths = torch.tensor([len(x)]) tokens = padding(x, maxsize, pad_var) for ids in ids_iter: x = torch.asarray([num for num in ids]) l = torch.tensor([len(x)]) y = padding(x, maxsize, pad_var) tokens = torch.vstack((tokens, y)) lengths = torch.cat((lengths, l)) if not batch_first: tokens = tokens.t() ndata[field_name] = tokens ndata[field_name + "__len"] = lengths def assign_features_to_blocks(blocks, g, textset, ntype): # For the first block (which is closest to the input), copy the features from # the original graph as well as the texts. assign_simple_node_features(blocks[0].srcdata, g, ntype) assign_textual_node_features(blocks[0].srcdata, textset, ntype) assign_simple_node_features(blocks[-1].dstdata, g, ntype) assign_textual_node_features(blocks[-1].dstdata, textset, ntype) class PinSAGECollator(object): def __init__(self, sampler, g, ntype, textset): self.sampler = sampler self.ntype = ntype self.g = g self.textset = textset def collate_train(self, batches): heads, tails, neg_tails = batches[0] # Construct multilayer neighborhood via PinSAGE... pos_graph, neg_graph, blocks = self.sampler.sample_from_item_pairs( heads, tails, neg_tails ) assign_features_to_blocks(blocks, self.g, self.textset, self.ntype) return pos_graph, neg_graph, blocks def collate_test(self, samples): batch = torch.LongTensor(samples) blocks = self.sampler.sample_blocks(batch) assign_features_to_blocks(blocks, self.g, self.textset, self.ntype) return blocks ================================================ FILE: examples/pytorch/pointcloud/bipointnet/ModelNetDataLoader.py ================================================ import os import warnings import numpy as np from torch.utils.data import Dataset warnings.filterwarnings("ignore") def pc_normalize(pc): centroid = np.mean(pc, axis=0) pc = pc - centroid m = np.max(np.sqrt(np.sum(pc**2, axis=1))) pc = pc / m return pc def farthest_point_sample(point, npoint): """ Farthest point sampler works as follows: 1. Initialize the sample set S with a random point 2. Pick point P not in S, which maximizes the distance d(P, S) 3. Repeat step 2 until |S| = npoint Input: xyz: pointcloud data, [N, D] npoint: number of samples Return: centroids: sampled pointcloud index, [npoint, D] """ N, D = point.shape xyz = point[:, :3] centroids = np.zeros((npoint,)) distance = np.ones((N,)) * 1e10 farthest = np.random.randint(0, N) for i in range(npoint): centroids[i] = farthest centroid = xyz[farthest, :] dist = np.sum((xyz - centroid) ** 2, -1) mask = dist < distance distance[mask] = dist[mask] farthest = np.argmax(distance, -1) point = point[centroids.astype(np.int32)] return point class ModelNetDataLoader(Dataset): def __init__( self, root, npoint=1024, split="train", fps=False, normal_channel=True, cache_size=15000, ): """ Input: root: the root path to the local data files npoint: number of points from each cloud split: which split of the data, 'train' or 'test' fps: whether to sample points with farthest point sampler normal_channel: whether to use additional channel cache_size: the cache size of in-memory point clouds """ self.root = root self.npoints = npoint self.fps = fps self.catfile = os.path.join(self.root, "modelnet40_shape_names.txt") self.cat = [line.rstrip() for line in open(self.catfile)] self.classes = dict(zip(self.cat, range(len(self.cat)))) self.normal_channel = normal_channel shape_ids = {} shape_ids["train"] = [ line.rstrip() for line in open(os.path.join(self.root, "modelnet40_train.txt")) ] shape_ids["test"] = [ line.rstrip() for line in open(os.path.join(self.root, "modelnet40_test.txt")) ] assert split == "train" or split == "test" shape_names = ["_".join(x.split("_")[0:-1]) for x in shape_ids[split]] # list of (shape_name, shape_txt_file_path) tuple self.datapath = [ ( shape_names[i], os.path.join(self.root, shape_names[i], shape_ids[split][i]) + ".txt", ) for i in range(len(shape_ids[split])) ] print("The size of %s data is %d" % (split, len(self.datapath))) self.cache_size = cache_size self.cache = {} def __len__(self): return len(self.datapath) def _get_item(self, index): if index in self.cache: point_set, cls = self.cache[index] else: fn = self.datapath[index] cls = self.classes[self.datapath[index][0]] cls = np.array([cls]).astype(np.int32) point_set = np.loadtxt(fn[1], delimiter=",").astype(np.float32) if self.fps: point_set = farthest_point_sample(point_set, self.npoints) else: point_set = point_set[0 : self.npoints, :] point_set[:, 0:3] = pc_normalize(point_set[:, 0:3]) if not self.normal_channel: point_set = point_set[:, 0:3] if len(self.cache) < self.cache_size: self.cache[index] = (point_set, cls) return point_set, cls ================================================ FILE: examples/pytorch/pointcloud/bipointnet/README.md ================================================ ## *BiPointNet: Binary Neural Network for Point Clouds* Created by [Haotong Qin](https://htqin.github.io/), [Zhongang Cai](https://scholar.google.com/citations?user=WrDKqIAAAAAJ&hl=en), [Mingyuan Zhang](https://scholar.google.com/citations?user=2QLD4fAAAAAJ&hl=en), Yifu Ding, Haiyu Zhao, Shuai Yi, [Xianglong Liu](http://sites.nlsde.buaa.edu.cn/~xlliu/), and [Hao Su](https://cseweb.ucsd.edu/~haosu/) from Beihang University, SenseTime, and UCSD. ![prediction example](https://htqin.github.io/Imgs/ICLR/overview_v1.png) ### Introduction This project is the official implementation of our accepted ICLR 2021 paper *BiPointNet: Binary Neural Network for Point Clouds* [[PDF]( https://openreview.net/forum?id=9QLRCVysdlO)]. To alleviate the resource constraint for real-time point cloud applications that run on edge devices, in this paper we present ***BiPointNet***, the first model binarization approach for efficient deep learning on point clouds. We first discover that the immense performance drop of binarized models for point clouds mainly stems from two challenges: aggregation-induced feature homogenization that leads to a degradation of information entropy, and scale distortion that hinders optimization and invalidates scale-sensitive structures. With theoretical justifications and in-depth analysis, our BiPointNet introduces Entropy-Maximizing Aggregation (EMA) to modulate the distribution before aggregation for the maximum information entropy, and Layer-wise Scale Recovery (LSR) to efficiently restore feature representation capacity. Extensive experiments show that BiPointNet outperforms existing binarization methods by convincing margins, at the level even comparable with the full precision counterpart. We highlight that our techniques are generic, guaranteeing significant improvements on various fundamental tasks and mainstream backbones, e.g., BiPointNet gives an impressive 14.7x speedup and 18.9x storage saving on real-world resource-constrained devices. Besides, our reasoning framework is dabnn. ### How to Run ```shell script python train_cls.py --model ${MODEL} ``` Here, `MODEL` has two choices: `bipointnet` and `bipointnet2_ssg` # Performance ## Classification | Model | Dataset | Metric | Score | | --------------- | ---------- | -------- | ----- | | BiPointNet | ModelNet40 | Accuracy | 88.4 | | BiPointNet2_SSG | ModelNet40 | Accuracy | 83.1 | Because of the difference in implementation brought by the application of DGL, this version is even better than the original paper. ### Citation If you find our work useful in your research, please consider citing: ``` @inproceedings{Qin:iclr21, author = {Haotong Qin and Zhongang Cai and Mingyuan Zhang and Yifu Ding and Haiyu Zhao and Shuai Yi and Xianglong Liu and Hao Su}, title = {BiPointNet: Binary Neural Network for Point Clouds}, booktitle = {ICLR}, year = {2021} } ``` ================================================ FILE: examples/pytorch/pointcloud/bipointnet/basic.py ================================================ import dgl import torch import torch.nn as nn import torch.nn.functional as F from torch.autograd import Function from torch.nn import Parameter from torch.nn.modules.utils import _single class BinaryQuantize(Function): @staticmethod def forward(ctx, input): ctx.save_for_backward(input) out = torch.sign(input) return out @staticmethod def backward(ctx, grad_output): input = ctx.saved_tensors grad_input = grad_output grad_input[input[0].gt(1)] = 0 grad_input[input[0].lt(-1)] = 0 return grad_input class BiLinearLSR(torch.nn.Linear): def __init__(self, in_features, out_features, bias=False, binary_act=True): super(BiLinearLSR, self).__init__(in_features, out_features, bias=bias) self.binary_act = binary_act # must register a nn.Parameter placeholder for model loading # self.register_parameter('scale', None) doesn't register None into state_dict # so it leads to unexpected key error when loading saved model # hence, init scale with Parameter # however, Parameter(None) actually has size [0], not [] as a scalar # hence, init it using the following trick self.register_parameter( "scale", Parameter(torch.Tensor([0.0]).squeeze()) ) def reset_scale(self, input): bw = self.weight ba = input bw = bw - bw.mean() self.scale = Parameter( ( F.linear(ba, bw).std() / F.linear(torch.sign(ba), torch.sign(bw)).std() ) .float() .to(ba.device) ) # corner case when ba is all 0.0 if torch.isnan(self.scale): self.scale = Parameter( (bw.std() / torch.sign(bw).std()).float().to(ba.device) ) def forward(self, input): bw = self.weight ba = input bw = bw - bw.mean() if self.scale.item() == 0.0: self.reset_scale(input) bw = BinaryQuantize().apply(bw) bw = bw * self.scale if self.binary_act: ba = BinaryQuantize().apply(ba) output = F.linear(ba, bw) return output class BiLinear(torch.nn.Linear): def __init__(self, in_features, out_features, bias=True, binary_act=True): super(BiLinear, self).__init__(in_features, out_features, bias=True) self.binary_act = binary_act self.output_ = None def forward(self, input): bw = self.weight ba = input bw = BinaryQuantize().apply(bw) if self.binary_act: ba = BinaryQuantize().apply(ba) output = F.linear(ba, bw, self.bias) self.output_ = output return output class BiConv2d(torch.nn.Conv2d): def __init__( self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode="zeros", ): super(BiConv2d, self).__init__( in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias, padding_mode, ) def forward(self, input): bw = self.weight ba = input bw = bw - bw.mean() bw = BinaryQuantize().apply(bw) ba = BinaryQuantize().apply(ba) if self.padding_mode == "circular": expanded_padding = ( (self.padding[0] + 1) // 2, self.padding[0] // 2, ) return F.conv2d( F.pad(ba, expanded_padding, mode="circular"), bw, self.bias, self.stride, _single(0), self.dilation, self.groups, ) return F.conv2d( ba, bw, self.bias, self.stride, self.padding, self.dilation, self.groups, ) def square_distance(src, dst): """ Adapted from https://github.com/yanx27/Pointnet_Pointnet2_pytorch """ B, N, _ = src.shape _, M, _ = dst.shape dist = -2 * torch.matmul(src, dst.permute(0, 2, 1)) dist += torch.sum(src**2, -1).view(B, N, 1) dist += torch.sum(dst**2, -1).view(B, 1, M) return dist def index_points(points, idx): """ Adapted from https://github.com/yanx27/Pointnet_Pointnet2_pytorch """ device = points.device B = points.shape[0] view_shape = list(idx.shape) view_shape[1:] = [1] * (len(view_shape) - 1) repeat_shape = list(idx.shape) repeat_shape[0] = 1 batch_indices = ( torch.arange(B, dtype=torch.long) .to(device) .view(view_shape) .repeat(repeat_shape) ) new_points = points[batch_indices, idx, :] return new_points class FixedRadiusNearNeighbors(nn.Module): """ Ball Query - Find the neighbors with-in a fixed radius """ def __init__(self, radius, n_neighbor): super(FixedRadiusNearNeighbors, self).__init__() self.radius = radius self.n_neighbor = n_neighbor def forward(self, pos, centroids): """ Adapted from https://github.com/yanx27/Pointnet_Pointnet2_pytorch """ device = pos.device B, N, _ = pos.shape center_pos = index_points(pos, centroids) _, S, _ = center_pos.shape group_idx = ( torch.arange(N, dtype=torch.long) .to(device) .view(1, 1, N) .repeat([B, S, 1]) ) sqrdists = square_distance(center_pos, pos) group_idx[sqrdists > self.radius**2] = N group_idx = group_idx.sort(dim=-1)[0][:, :, : self.n_neighbor] group_first = ( group_idx[:, :, 0].view(B, S, 1).repeat([1, 1, self.n_neighbor]) ) mask = group_idx == N group_idx[mask] = group_first[mask] return group_idx class FixedRadiusNNGraph(nn.Module): """ Build NN graph """ def __init__(self, radius, n_neighbor): super(FixedRadiusNNGraph, self).__init__() self.radius = radius self.n_neighbor = n_neighbor self.frnn = FixedRadiusNearNeighbors(radius, n_neighbor) def forward(self, pos, centroids, feat=None): dev = pos.device group_idx = self.frnn(pos, centroids) B, N, _ = pos.shape glist = [] for i in range(B): center = torch.zeros((N)).to(dev) center[centroids[i]] = 1 src = group_idx[i].contiguous().view(-1) dst = centroids[i].view(-1, 1).repeat(1, self.n_neighbor).view(-1) unified = torch.cat([src, dst]) uniq, inv_idx = torch.unique(unified, return_inverse=True) src_idx = inv_idx[: src.shape[0]] dst_idx = inv_idx[src.shape[0] :] g = dgl.graph((src_idx, dst_idx)) g.ndata["pos"] = pos[i][uniq] g.ndata["center"] = center[uniq] if feat is not None: g.ndata["feat"] = feat[i][uniq] glist.append(g) bg = dgl.batch(glist) return bg class RelativePositionMessage(nn.Module): """ Compute the input feature from neighbors """ def __init__(self, n_neighbor): super(RelativePositionMessage, self).__init__() self.n_neighbor = n_neighbor def forward(self, edges): pos = edges.src["pos"] - edges.dst["pos"] if "feat" in edges.src: res = torch.cat([pos, edges.src["feat"]], 1) else: res = pos return {"agg_feat": res} ================================================ FILE: examples/pytorch/pointcloud/bipointnet/bipointnet2.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from basic import ( BiConv2d, BiLinearLSR, FixedRadiusNNGraph, RelativePositionMessage, ) from dgl.geometry import farthest_point_sampler class BiPointNetConv(nn.Module): """ Feature aggregation """ def __init__(self, sizes, batch_size): super(BiPointNetConv, self).__init__() self.batch_size = batch_size self.conv = nn.ModuleList() self.bn = nn.ModuleList() for i in range(1, len(sizes)): self.conv.append(BiConv2d(sizes[i - 1], sizes[i], 1)) self.bn.append(nn.BatchNorm2d(sizes[i])) def forward(self, nodes): shape = nodes.mailbox["agg_feat"].shape h = ( nodes.mailbox["agg_feat"] .view(self.batch_size, -1, shape[1], shape[2]) .permute(0, 3, 2, 1) ) for conv, bn in zip(self.conv, self.bn): h = conv(h) h = bn(h) h = F.relu(h) h = torch.max(h, 2)[0] feat_dim = h.shape[1] h = h.permute(0, 2, 1).reshape(-1, feat_dim) return {"new_feat": h} def group_all(self, pos, feat): """ Feature aggregation and pooling for the non-sampling layer """ if feat is not None: h = torch.cat([pos, feat], 2) else: h = pos B, N, D = h.shape _, _, C = pos.shape new_pos = torch.zeros(B, 1, C) h = h.permute(0, 2, 1).view(B, -1, N, 1) for conv, bn in zip(self.conv, self.bn): h = conv(h) h = bn(h) h = F.relu(h) h = torch.max(h[:, :, :, 0], 2)[0] # [B,D] return new_pos, h class BiSAModule(nn.Module): """ The Set Abstraction Layer """ def __init__( self, npoints, batch_size, radius, mlp_sizes, n_neighbor=64, group_all=False, ): super(BiSAModule, self).__init__() self.group_all = group_all if not group_all: self.npoints = npoints self.frnn_graph = FixedRadiusNNGraph(radius, n_neighbor) self.message = RelativePositionMessage(n_neighbor) self.conv = BiPointNetConv(mlp_sizes, batch_size) self.batch_size = batch_size def forward(self, pos, feat): if self.group_all: return self.conv.group_all(pos, feat) centroids = farthest_point_sampler(pos, self.npoints) g = self.frnn_graph(pos, centroids, feat) g.update_all(self.message, self.conv) mask = g.ndata["center"] == 1 pos_dim = g.ndata["pos"].shape[-1] feat_dim = g.ndata["new_feat"].shape[-1] pos_res = g.ndata["pos"][mask].view(self.batch_size, -1, pos_dim) feat_res = g.ndata["new_feat"][mask].view(self.batch_size, -1, feat_dim) return pos_res, feat_res class BiPointNet2SSGCls(nn.Module): def __init__( self, output_classes, batch_size, input_dims=3, dropout_prob=0.4 ): super(BiPointNet2SSGCls, self).__init__() self.input_dims = input_dims self.sa_module1 = BiSAModule( 512, batch_size, 0.2, [input_dims, 64, 64, 128] ) self.sa_module2 = BiSAModule( 128, batch_size, 0.4, [128 + 3, 128, 128, 256] ) self.sa_module3 = BiSAModule( None, batch_size, None, [256 + 3, 256, 512, 1024], group_all=True ) self.mlp1 = BiLinearLSR(1024, 512) self.bn1 = nn.BatchNorm1d(512) self.drop1 = nn.Dropout(dropout_prob) self.mlp2 = BiLinearLSR(512, 256) self.bn2 = nn.BatchNorm1d(256) self.drop2 = nn.Dropout(dropout_prob) self.mlp_out = BiLinearLSR(256, output_classes) def forward(self, x): if x.shape[-1] > 3: pos = x[:, :, :3] feat = x[:, :, 3:] else: pos = x feat = None pos, feat = self.sa_module1(pos, feat) pos, feat = self.sa_module2(pos, feat) _, h = self.sa_module3(pos, feat) h = self.mlp1(h) h = self.bn1(h) h = F.relu(h) h = self.drop1(h) h = self.mlp2(h) h = self.bn2(h) h = F.relu(h) h = self.drop2(h) out = self.mlp_out(h) return out ================================================ FILE: examples/pytorch/pointcloud/bipointnet/bipointnet_cls.py ================================================ import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from basic import BiLinear from torch.autograd import Variable offset_map = {1024: -3.2041, 2048: -3.4025, 4096: -3.5836} class Conv1d(nn.Module): def __init__(self, inplane, outplane, Linear): super().__init__() self.lin = Linear(inplane, outplane) def forward(self, x): B, C, N = x.shape x = x.permute(0, 2, 1).contiguous().view(-1, C) x = self.lin(x).view(B, N, -1).permute(0, 2, 1).contiguous() return x class EmaMaxPool(nn.Module): def __init__(self, kernel_size, affine=True, Linear=BiLinear, use_bn=True): super(EmaMaxPool, self).__init__() self.kernel_size = kernel_size self.bn3 = nn.BatchNorm1d(1024, affine=affine) self.use_bn = use_bn def forward(self, x): batchsize, D, N = x.size() if self.use_bn: x = torch.max(x, 2, keepdim=True)[0] + offset_map[N] else: x = torch.max(x, 2, keepdim=True)[0] - 0.3 return x class BiPointNetCls(nn.Module): def __init__( self, output_classes, input_dims=3, conv1_dim=64, use_transform=True, Linear=BiLinear, ): super(BiPointNetCls, self).__init__() self.input_dims = input_dims self.conv1 = nn.ModuleList() self.conv1.append(Conv1d(input_dims, conv1_dim, Linear=Linear)) self.conv1.append(Conv1d(conv1_dim, conv1_dim, Linear=Linear)) self.conv1.append(Conv1d(conv1_dim, conv1_dim, Linear=Linear)) self.bn1 = nn.ModuleList() self.bn1.append(nn.BatchNorm1d(conv1_dim)) self.bn1.append(nn.BatchNorm1d(conv1_dim)) self.bn1.append(nn.BatchNorm1d(conv1_dim)) self.conv2 = nn.ModuleList() self.conv2.append(Conv1d(conv1_dim, conv1_dim * 2, Linear=Linear)) self.conv2.append(Conv1d(conv1_dim * 2, conv1_dim * 16, Linear=Linear)) self.bn2 = nn.ModuleList() self.bn2.append(nn.BatchNorm1d(conv1_dim * 2)) self.bn2.append(nn.BatchNorm1d(conv1_dim * 16)) self.maxpool = EmaMaxPool(conv1_dim * 16, Linear=Linear, use_bn=True) self.pool_feat_len = conv1_dim * 16 self.mlp3 = nn.ModuleList() self.mlp3.append(Linear(conv1_dim * 16, conv1_dim * 8)) self.mlp3.append(Linear(conv1_dim * 8, conv1_dim * 4)) self.bn3 = nn.ModuleList() self.bn3.append(nn.BatchNorm1d(conv1_dim * 8)) self.bn3.append(nn.BatchNorm1d(conv1_dim * 4)) self.dropout = nn.Dropout(0.3) self.mlp_out = Linear(conv1_dim * 4, output_classes) self.use_transform = use_transform if use_transform: self.transform1 = TransformNet(input_dims) self.trans_bn1 = nn.BatchNorm1d(input_dims) self.transform2 = TransformNet(conv1_dim) self.trans_bn2 = nn.BatchNorm1d(conv1_dim) def forward(self, x): batch_size = x.shape[0] h = x.permute(0, 2, 1) if self.use_transform: trans = self.transform1(h) h = h.transpose(2, 1) h = torch.bmm(h, trans) h = h.transpose(2, 1) h = F.relu(self.trans_bn1(h)) for conv, bn in zip(self.conv1, self.bn1): h = conv(h) h = bn(h) h = F.relu(h) if self.use_transform: trans = self.transform2(h) h = h.transpose(2, 1) h = torch.bmm(h, trans) h = h.transpose(2, 1) h = F.relu(self.trans_bn2(h)) for conv, bn in zip(self.conv2, self.bn2): h = conv(h) h = bn(h) h = F.relu(h) h = self.maxpool(h).view(-1, self.pool_feat_len) for mlp, bn in zip(self.mlp3, self.bn3): h = mlp(h) h = bn(h) h = F.relu(h) h = self.dropout(h) out = self.mlp_out(h) return out class TransformNet(nn.Module): def __init__(self, input_dims=3, conv1_dim=64, Linear=BiLinear): super(TransformNet, self).__init__() self.conv = nn.ModuleList() self.conv.append(Conv1d(input_dims, conv1_dim, Linear=Linear)) self.conv.append(Conv1d(conv1_dim, conv1_dim * 2, Linear=Linear)) self.conv.append(Conv1d(conv1_dim * 2, conv1_dim * 16, Linear=Linear)) self.bn = nn.ModuleList() self.bn.append(nn.BatchNorm1d(conv1_dim)) self.bn.append(nn.BatchNorm1d(conv1_dim * 2)) self.bn.append(nn.BatchNorm1d(conv1_dim * 16)) # self.maxpool = nn.MaxPool1d(conv1_dim * 16) self.maxpool = EmaMaxPool(conv1_dim * 16, Linear=Linear, use_bn=True) self.pool_feat_len = conv1_dim * 16 self.mlp2 = nn.ModuleList() self.mlp2.append(Linear(conv1_dim * 16, conv1_dim * 8)) self.mlp2.append(Linear(conv1_dim * 8, conv1_dim * 4)) self.bn2 = nn.ModuleList() self.bn2.append(nn.BatchNorm1d(conv1_dim * 8)) self.bn2.append(nn.BatchNorm1d(conv1_dim * 4)) self.input_dims = input_dims self.mlp_out = Linear(conv1_dim * 4, input_dims * input_dims) def forward(self, h): batch_size = h.shape[0] for conv, bn in zip(self.conv, self.bn): h = conv(h) h = bn(h) h = F.relu(h) h = self.maxpool(h).view(-1, self.pool_feat_len) for mlp, bn in zip(self.mlp2, self.bn2): h = mlp(h) h = bn(h) h = F.relu(h) out = self.mlp_out(h) iden = Variable( torch.from_numpy( np.eye(self.input_dims).flatten().astype(np.float32) ) ) iden = iden.view(1, self.input_dims * self.input_dims).repeat( batch_size, 1 ) if out.is_cuda: iden = iden.cuda() out = out + iden out = out.view(-1, self.input_dims, self.input_dims) return out ================================================ FILE: examples/pytorch/pointcloud/bipointnet/train_cls.py ================================================ import argparse import os import urllib from functools import partial import dgl import provider import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import tqdm from bipointnet2 import BiPointNet2SSGCls from bipointnet_cls import BiPointNetCls from dgl.data.utils import download, get_download_dir from ModelNetDataLoader import ModelNetDataLoader from torch.utils.data import DataLoader torch.backends.cudnn.enabled = False # from dataset import ModelNet parser = argparse.ArgumentParser() parser.add_argument("--model", type=str, default="bipointnet") parser.add_argument("--dataset-path", type=str, default="") parser.add_argument("--load-model-path", type=str, default="") parser.add_argument("--save-model-path", type=str, default="") parser.add_argument("--num-epochs", type=int, default=200) parser.add_argument("--num-workers", type=int, default=0) parser.add_argument("--batch-size", type=int, default=32) args = parser.parse_args() num_workers = args.num_workers batch_size = args.batch_size data_filename = "modelnet40_normal_resampled.zip" download_path = os.path.join(get_download_dir(), data_filename) local_path = args.dataset_path or os.path.join( get_download_dir(), "modelnet40_normal_resampled" ) if not os.path.exists(local_path): download( "https://shapenet.cs.stanford.edu/media/modelnet40_normal_resampled.zip", download_path, verify_ssl=False, ) from zipfile import ZipFile with ZipFile(download_path) as z: z.extractall(path=get_download_dir()) CustomDataLoader = partial( DataLoader, num_workers=num_workers, batch_size=batch_size, shuffle=True, drop_last=True, ) def train(net, opt, scheduler, train_loader, dev): net.train() total_loss = 0 num_batches = 0 total_correct = 0 count = 0 loss_f = nn.CrossEntropyLoss() with tqdm.tqdm(train_loader, ascii=True) as tq: for data, label in tq: data = data.data.numpy() data = provider.random_point_dropout(data) data[:, :, 0:3] = provider.random_scale_point_cloud(data[:, :, 0:3]) data[:, :, 0:3] = provider.jitter_point_cloud(data[:, :, 0:3]) data[:, :, 0:3] = provider.shift_point_cloud(data[:, :, 0:3]) data = torch.tensor(data) label = label[:, 0] num_examples = label.shape[0] data, label = data.to(dev), label.to(dev).squeeze().long() opt.zero_grad() logits = net(data) loss = loss_f(logits, label) loss.backward() opt.step() _, preds = logits.max(1) num_batches += 1 count += num_examples loss = loss.item() correct = (preds == label).sum().item() total_loss += loss total_correct += correct tq.set_postfix( { "AvgLoss": "%.5f" % (total_loss / num_batches), "AvgAcc": "%.5f" % (total_correct / count), } ) scheduler.step() def evaluate(net, test_loader, dev): net.eval() total_correct = 0 count = 0 with torch.no_grad(): with tqdm.tqdm(test_loader, ascii=True) as tq: for data, label in tq: label = label[:, 0] num_examples = label.shape[0] data, label = data.to(dev), label.to(dev).squeeze().long() logits = net(data) _, preds = logits.max(1) correct = (preds == label).sum().item() total_correct += correct count += num_examples tq.set_postfix({"AvgAcc": "%.5f" % (total_correct / count)}) return total_correct / count dev = torch.device("cuda" if torch.cuda.is_available() else "cpu") if args.model == "bipointnet": net = BiPointNetCls(40, input_dims=6) elif args.model == "bipointnet2_ssg": net = BiPointNet2SSGCls(40, batch_size, input_dims=6) net = net.to(dev) if args.load_model_path: net.load_state_dict( torch.load(args.load_model_path, weights_only=False, map_location=dev) ) opt = optim.Adam(net.parameters(), lr=1e-3, weight_decay=1e-4) scheduler = optim.lr_scheduler.StepLR(opt, step_size=20, gamma=0.7) train_dataset = ModelNetDataLoader(local_path, 1024, split="train") test_dataset = ModelNetDataLoader(local_path, 1024, split="test") train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=True, ) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, drop_last=True, ) best_test_acc = 0 for epoch in range(args.num_epochs): train(net, opt, scheduler, train_loader, dev) if (epoch + 1) % 1 == 0: print("Epoch #%d Testing" % epoch) test_acc = evaluate(net, test_loader, dev) if test_acc > best_test_acc: best_test_acc = test_acc if args.save_model_path: torch.save(net.state_dict(), args.save_model_path) print("Current test acc: %.5f (best: %.5f)" % (test_acc, best_test_acc)) ================================================ FILE: examples/pytorch/pointcloud/edgeconv/README.md ================================================ Dynamic EdgeConv ==== This is a reproduction of the paper [Dynamic Graph CNN for Learning on Point Clouds](https://arxiv.org/pdf/1801.07829.pdf). The reproduced experiment is the 40-class classification on the ModelNet40 dataset. The sampled point clouds are identical to that of [PointNet](https://github.com/charlesq34/pointnet). To train and test the model, simply run ```python python main.py ``` The model currently takes 3 minutes to train an epoch on Tesla V100, and an additional 17 seconds to run a validation and 20 seconds to run a test. The best validation performance is 93.5% with a test performance of 91.8%. ## Dependencies * `h5py` * `tqdm` ================================================ FILE: examples/pytorch/pointcloud/edgeconv/main.py ================================================ import argparse import os import urllib from functools import partial import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import tqdm from dgl.data.utils import download, get_download_dir from model import compute_loss, Model from modelnet import ModelNet from torch.utils.data import DataLoader parser = argparse.ArgumentParser() parser.add_argument("--dataset-path", type=str, default="") parser.add_argument("--load-model-path", type=str, default="") parser.add_argument("--save-model-path", type=str, default="") parser.add_argument("--num-epochs", type=int, default=100) parser.add_argument("--num-workers", type=int, default=0) parser.add_argument("--batch-size", type=int, default=32) args = parser.parse_args() num_workers = args.num_workers batch_size = args.batch_size data_filename = "modelnet40-sampled-2048.h5" local_path = args.dataset_path or os.path.join( get_download_dir(), data_filename ) if not os.path.exists(local_path): download( "https://data.dgl.ai/dataset/modelnet40-sampled-2048.h5", local_path ) CustomDataLoader = partial( DataLoader, num_workers=num_workers, batch_size=batch_size, shuffle=True, drop_last=True, ) def train(model, opt, scheduler, train_loader, dev): scheduler.step() model.train() total_loss = 0 num_batches = 0 total_correct = 0 count = 0 with tqdm.tqdm(train_loader, ascii=True) as tq: for data, label in tq: num_examples = label.shape[0] data, label = data.to(dev), label.to(dev).squeeze().long() opt.zero_grad() logits = model(data) loss = compute_loss(logits, label) loss.backward() opt.step() _, preds = logits.max(1) num_batches += 1 count += num_examples loss = loss.item() correct = (preds == label).sum().item() total_loss += loss total_correct += correct tq.set_postfix( { "Loss": "%.5f" % loss, "AvgLoss": "%.5f" % (total_loss / num_batches), "Acc": "%.5f" % (correct / num_examples), "AvgAcc": "%.5f" % (total_correct / count), } ) def evaluate(model, test_loader, dev): model.eval() total_correct = 0 count = 0 with torch.no_grad(): with tqdm.tqdm(test_loader, ascii=True) as tq: for data, label in tq: num_examples = label.shape[0] data, label = data.to(dev), label.to(dev).squeeze().long() logits = model(data) _, preds = logits.max(1) correct = (preds == label).sum().item() total_correct += correct count += num_examples tq.set_postfix( { "Acc": "%.5f" % (correct / num_examples), "AvgAcc": "%.5f" % (total_correct / count), } ) return total_correct / count dev = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = Model(20, [64, 64, 128, 256], [512, 512, 256], 40) model = model.to(dev) if args.load_model_path: model.load_state_dict( torch.load(args.load_model_path, weights_only=False, map_location=dev) ) opt = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4) scheduler = optim.lr_scheduler.CosineAnnealingLR( opt, args.num_epochs, eta_min=0.001 ) modelnet = ModelNet(local_path, 1024) train_loader = CustomDataLoader(modelnet.train()) valid_loader = CustomDataLoader(modelnet.valid()) test_loader = CustomDataLoader(modelnet.test()) best_valid_acc = 0 best_test_acc = 0 for epoch in range(args.num_epochs): print("Epoch #%d Validating" % epoch) valid_acc = evaluate(model, valid_loader, dev) test_acc = evaluate(model, test_loader, dev) if valid_acc > best_valid_acc: best_valid_acc = valid_acc best_test_acc = test_acc if args.save_model_path: torch.save(model.state_dict(), args.save_model_path) print( "Current validation acc: %.5f (best: %.5f), test acc: %.5f (best: %.5f)" % (valid_acc, best_valid_acc, test_acc, best_test_acc) ) train(model, opt, scheduler, train_loader, dev) ================================================ FILE: examples/pytorch/pointcloud/edgeconv/model.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from dgl.nn.pytorch import EdgeConv, KNNGraph class Model(nn.Module): def __init__( self, k, feature_dims, emb_dims, output_classes, input_dims=3, dropout_prob=0.5, ): super(Model, self).__init__() self.nng = KNNGraph(k) self.conv = nn.ModuleList() self.num_layers = len(feature_dims) for i in range(self.num_layers): self.conv.append( EdgeConv( feature_dims[i - 1] if i > 0 else input_dims, feature_dims[i], batch_norm=True, ) ) self.proj = nn.Linear(sum(feature_dims), emb_dims[0]) self.embs = nn.ModuleList() self.bn_embs = nn.ModuleList() self.dropouts = nn.ModuleList() self.num_embs = len(emb_dims) - 1 for i in range(1, self.num_embs + 1): self.embs.append( nn.Linear( # * 2 because of concatenation of max- and mean-pooling emb_dims[i - 1] if i > 1 else (emb_dims[i - 1] * 2), emb_dims[i], ) ) self.bn_embs.append(nn.BatchNorm1d(emb_dims[i])) self.dropouts.append(nn.Dropout(dropout_prob)) self.proj_output = nn.Linear(emb_dims[-1], output_classes) def forward(self, x): hs = [] batch_size, n_points, x_dims = x.shape h = x for i in range(self.num_layers): g = self.nng(h).to(h.device) h = h.view(batch_size * n_points, -1) h = self.conv[i](g, h) h = F.leaky_relu(h, 0.2) h = h.view(batch_size, n_points, -1) hs.append(h) h = torch.cat(hs, 2) h = self.proj(h) h_max, _ = torch.max(h, 1) h_avg = torch.mean(h, 1) h = torch.cat([h_max, h_avg], 1) for i in range(self.num_embs): h = self.embs[i](h) h = self.bn_embs[i](h) h = F.leaky_relu(h, 0.2) h = self.dropouts[i](h) h = self.proj_output(h) return h def compute_loss(logits, y, eps=0.2): num_classes = logits.shape[1] one_hot = torch.zeros_like(logits).scatter_(1, y.view(-1, 1), 1) one_hot = one_hot * (1 - eps) + (1 - one_hot) * eps / (num_classes - 1) log_prob = F.log_softmax(logits, 1) loss = -(one_hot * log_prob).sum(1).mean() return loss ================================================ FILE: examples/pytorch/pointcloud/edgeconv/modelnet.py ================================================ import numpy as np from torch.utils.data import Dataset class ModelNet(object): def __init__(self, path, num_points): import h5py self.f = h5py.File(path) self.num_points = num_points self.n_train = self.f["train/data"].shape[0] self.n_valid = int(self.n_train / 5) self.n_train -= self.n_valid self.n_test = self.f["test/data"].shape[0] def train(self): return ModelNetDataset(self, "train") def valid(self): return ModelNetDataset(self, "valid") def test(self): return ModelNetDataset(self, "test") class ModelNetDataset(Dataset): def __init__(self, modelnet, mode): super(ModelNetDataset, self).__init__() self.num_points = modelnet.num_points self.mode = mode if mode == "train": self.data = modelnet.f["train/data"][: modelnet.n_train] self.label = modelnet.f["train/label"][: modelnet.n_train] elif mode == "valid": self.data = modelnet.f["train/data"][modelnet.n_train :] self.label = modelnet.f["train/label"][modelnet.n_train :] elif mode == "test": self.data = modelnet.f["test/data"].value self.label = modelnet.f["test/label"].value def translate(self, x, scale=(2 / 3, 3 / 2), shift=(-0.2, 0.2)): xyz1 = np.random.uniform(low=scale[0], high=scale[1], size=[3]) xyz2 = np.random.uniform(low=shift[0], high=shift[1], size=[3]) x = np.add(np.multiply(x, xyz1), xyz2).astype("float32") return x def __len__(self): return self.data.shape[0] def __getitem__(self, i): x = self.data[i][: self.num_points] y = self.label[i] if self.mode == "train": x = self.translate(x) np.random.shuffle(x) return x, y ================================================ FILE: examples/pytorch/pointcloud/pct/ModelNetDataLoader.py ================================================ import os import warnings import numpy as np from torch.utils.data import Dataset warnings.filterwarnings("ignore") def pc_normalize(pc): centroid = np.mean(pc, axis=0) pc = pc - centroid m = np.max(np.sqrt(np.sum(pc**2, axis=1))) pc = pc / m return pc def farthest_point_sample(point, npoint): """ Farthest point sampler works as follows: 1. Initialize the sample set S with a random point 2. Pick point P not in S, which maximizes the distance d(P, S) 3. Repeat step 2 until |S| = npoint Input: xyz: pointcloud data, [N, D] npoint: number of samples Return: centroids: sampled pointcloud index, [npoint, D] """ N, D = point.shape xyz = point[:, :3] centroids = np.zeros((npoint,)) distance = np.ones((N,)) * 1e10 farthest = np.random.randint(0, N) for i in range(npoint): centroids[i] = farthest centroid = xyz[farthest, :] dist = np.sum((xyz - centroid) ** 2, -1) mask = dist < distance distance[mask] = dist[mask] farthest = np.argmax(distance, -1) point = point[centroids.astype(np.int32)] return point class ModelNetDataLoader(Dataset): def __init__( self, root, npoint=1024, split="train", fps=False, normal_channel=True, cache_size=15000, ): """ Input: root: the root path to the local data files npoint: number of points from each cloud split: which split of the data, 'train' or 'test' fps: whether to sample points with farthest point sampler normal_channel: whether to use additional channel cache_size: the cache size of in-memory point clouds """ self.root = root self.npoints = npoint self.fps = fps self.catfile = os.path.join(self.root, "modelnet40_shape_names.txt") self.cat = [line.rstrip() for line in open(self.catfile)] self.classes = dict(zip(self.cat, range(len(self.cat)))) self.normal_channel = normal_channel shape_ids = {} shape_ids["train"] = [ line.rstrip() for line in open(os.path.join(self.root, "modelnet40_train.txt")) ] shape_ids["test"] = [ line.rstrip() for line in open(os.path.join(self.root, "modelnet40_test.txt")) ] assert split == "train" or split == "test" shape_names = ["_".join(x.split("_")[0:-1]) for x in shape_ids[split]] # list of (shape_name, shape_txt_file_path) tuple self.datapath = [ ( shape_names[i], os.path.join(self.root, shape_names[i], shape_ids[split][i]) + ".txt", ) for i in range(len(shape_ids[split])) ] print("The size of %s data is %d" % (split, len(self.datapath))) self.cache_size = cache_size self.cache = {} def __len__(self): return len(self.datapath) def _get_item(self, index): if index in self.cache: point_set, cls = self.cache[index] else: fn = self.datapath[index] cls = self.classes[self.datapath[index][0]] cls = np.array([cls]).astype(np.int32) point_set = np.loadtxt(fn[1], delimiter=",").astype(np.float32) if self.fps: point_set = farthest_point_sample(point_set, self.npoints) else: point_set = point_set[0 : self.npoints, :] point_set[:, 0:3] = pc_normalize(point_set[:, 0:3]) if not self.normal_channel: point_set = point_set[:, 0:3] if len(self.cache) < self.cache_size: self.cache[index] = (point_set, cls) return point_set, cls def __getitem__(self, index): return self._get_item(index) ================================================ FILE: examples/pytorch/pointcloud/pct/README.md ================================================ PCT ==== This is a reproduction of the paper: [PCT: Point cloud transformer](http://arxiv.org/abs/2012.09688). # Performance | Task | Dataset | Metric | Score - Paper | Score - DGL (Adam) | Time(s) - DGL | |-----------------|------------|----------|------------------|-------------|-------------------| | Classification | ModelNet40 | Accuracy | 93.2 | 92.1 | 740.0 | | Part Segmentation | ShapeNet | mIoU | 86.4 | 85.6 | 390.0 | + Time(s) are the average training time per epoch, measured on EC2 g4dn.12xlarge instance w/ Tesla T4 GPU. + We run the code with the preprocessing used in [PointNet++](../pointnet). We can only get 84.5 for classification if we use the preprocessing described in the paper: > During training, a random translation in [−0.2, 0.2], a random anisotropic scaling in [0.67, 1.5] and a random input dropout were applied to augment the input data. # How to Run For point cloud classification, run with ```python python train_cls.py ``` For point cloud part-segmentation, run with ```python python train_partseg.py ``` ================================================ FILE: examples/pytorch/pointcloud/pct/ShapeNet.py ================================================ import json import os from zipfile import ZipFile import dgl import numpy as np import tqdm from dgl.data.utils import download, get_download_dir from scipy.sparse import csr_matrix from torch.utils.data import Dataset class ShapeNet(object): def __init__(self, num_points=2048, normal_channel=True): self.num_points = num_points self.normal_channel = normal_channel SHAPENET_DOWNLOAD_URL = "https://shapenet.cs.stanford.edu/media/shapenetcore_partanno_segmentation_benchmark_v0_normal.zip" download_path = get_download_dir() data_filename = ( "shapenetcore_partanno_segmentation_benchmark_v0_normal.zip" ) data_path = os.path.join( download_path, "shapenetcore_partanno_segmentation_benchmark_v0_normal", ) if not os.path.exists(data_path): local_path = os.path.join(download_path, data_filename) if not os.path.exists(local_path): download(SHAPENET_DOWNLOAD_URL, local_path, verify_ssl=False) with ZipFile(local_path) as z: z.extractall(path=download_path) synset_file = "synsetoffset2category.txt" with open(os.path.join(data_path, synset_file)) as f: synset = [t.split("\n")[0].split("\t") for t in f.readlines()] self.synset_dict = {} for syn in synset: self.synset_dict[syn[1]] = syn[0] self.seg_classes = { "Airplane": [0, 1, 2, 3], "Bag": [4, 5], "Cap": [6, 7], "Car": [8, 9, 10, 11], "Chair": [12, 13, 14, 15], "Earphone": [16, 17, 18], "Guitar": [19, 20, 21], "Knife": [22, 23], "Lamp": [24, 25, 26, 27], "Laptop": [28, 29], "Motorbike": [30, 31, 32, 33, 34, 35], "Mug": [36, 37], "Pistol": [38, 39, 40], "Rocket": [41, 42, 43], "Skateboard": [44, 45, 46], "Table": [47, 48, 49], } train_split_json = "shuffled_train_file_list.json" val_split_json = "shuffled_val_file_list.json" test_split_json = "shuffled_test_file_list.json" split_path = os.path.join(data_path, "train_test_split") with open(os.path.join(split_path, train_split_json)) as f: tmp = f.read() self.train_file_list = [ os.path.join(data_path, t.replace("shape_data/", "") + ".txt") for t in json.loads(tmp) ] with open(os.path.join(split_path, val_split_json)) as f: tmp = f.read() self.val_file_list = [ os.path.join(data_path, t.replace("shape_data/", "") + ".txt") for t in json.loads(tmp) ] with open(os.path.join(split_path, test_split_json)) as f: tmp = f.read() self.test_file_list = [ os.path.join(data_path, t.replace("shape_data/", "") + ".txt") for t in json.loads(tmp) ] def train(self): return ShapeNetDataset( self, "train", self.num_points, self.normal_channel ) def valid(self): return ShapeNetDataset( self, "valid", self.num_points, self.normal_channel ) def trainval(self): return ShapeNetDataset( self, "trainval", self.num_points, self.normal_channel ) def test(self): return ShapeNetDataset( self, "test", self.num_points, self.normal_channel ) class ShapeNetDataset(Dataset): def __init__(self, shapenet, mode, num_points, normal_channel=True): super(ShapeNetDataset, self).__init__() self.mode = mode self.num_points = num_points if not normal_channel: self.dim = 3 else: self.dim = 6 if mode == "train": self.file_list = shapenet.train_file_list elif mode == "valid": self.file_list = shapenet.val_file_list elif mode == "test": self.file_list = shapenet.test_file_list elif mode == "trainval": self.file_list = shapenet.train_file_list + shapenet.val_file_list else: raise "Not supported `mode`" data_list = [] label_list = [] category_list = [] print("Loading data from split " + self.mode) for fn in tqdm.tqdm(self.file_list, ascii=True): with open(fn) as f: data = np.array( [t.split("\n")[0].split(" ") for t in f.readlines()] ).astype(np.float) data_list.append(data[:, 0 : self.dim]) label_list.append(data[:, 6].astype(int)) category_list.append(shapenet.synset_dict[fn.split("/")[-2]]) self.data = data_list self.label = label_list self.category = category_list def translate(self, x, scale=(2 / 3, 3 / 2), shift=(-0.2, 0.2), size=3): xyz1 = np.random.uniform(low=scale[0], high=scale[1], size=[size]) xyz2 = np.random.uniform(low=shift[0], high=shift[1], size=[size]) x = np.add(np.multiply(x, xyz1), xyz2).astype("float32") return x def __len__(self): return len(self.data) def __getitem__(self, i): inds = np.random.choice( self.data[i].shape[0], self.num_points, replace=True ) x = self.data[i][inds, : self.dim] y = self.label[i][inds] cat = self.category[i] if self.mode == "train": x = self.translate(x, size=self.dim) x = x.astype(np.float) y = y.astype(int) return x, y, cat ================================================ FILE: examples/pytorch/pointcloud/pct/helper.py ================================================ import dgl import torch import torch.nn as nn import torch.nn.functional as F from dgl.geometry import farthest_point_sampler """ Part of the code are adapted from https://github.com/yanx27/Pointnet_Pointnet2_pytorch """ def square_distance(src, dst): """ Adapted from https://github.com/yanx27/Pointnet_Pointnet2_pytorch """ B, N, _ = src.shape _, M, _ = dst.shape dist = -2 * torch.matmul(src, dst.permute(0, 2, 1)) dist += torch.sum(src**2, -1).view(B, N, 1) dist += torch.sum(dst**2, -1).view(B, 1, M) return dist def index_points(points, idx): """ Adapted from https://github.com/yanx27/Pointnet_Pointnet2_pytorch """ device = points.device B = points.shape[0] view_shape = list(idx.shape) view_shape[1:] = [1] * (len(view_shape) - 1) repeat_shape = list(idx.shape) repeat_shape[0] = 1 batch_indices = ( torch.arange(B, dtype=torch.long) .to(device) .view(view_shape) .repeat(repeat_shape) ) new_points = points[batch_indices, idx, :] return new_points class KNearNeighbors(nn.Module): """ Find the k nearest neighbors """ def __init__(self, n_neighbor): super(KNearNeighbors, self).__init__() self.n_neighbor = n_neighbor def forward(self, pos, centroids): """ Adapted from https://github.com/yanx27/Pointnet_Pointnet2_pytorch """ center_pos = index_points(pos, centroids) sqrdists = square_distance(center_pos, pos) group_idx = sqrdists.argsort(dim=-1)[:, :, : self.n_neighbor] return group_idx class KNNGraphBuilder(nn.Module): """ Build NN graph """ def __init__(self, n_neighbor): super(KNNGraphBuilder, self).__init__() self.n_neighbor = n_neighbor self.knn = KNearNeighbors(n_neighbor) def forward(self, pos, centroids, feat=None): dev = pos.device group_idx = self.knn(pos, centroids) B, N, _ = pos.shape glist = [] for i in range(B): center = torch.zeros((N)).to(dev) center[centroids[i]] = 1 src = group_idx[i].contiguous().view(-1) dst = ( centroids[i] .view(-1, 1) .repeat( 1, min(self.n_neighbor, src.shape[0] // centroids.shape[1]) ) .view(-1) ) unified = torch.cat([src, dst]) uniq, inv_idx = torch.unique(unified, return_inverse=True) src_idx = inv_idx[: src.shape[0]] dst_idx = inv_idx[src.shape[0] :] g = dgl.graph((src_idx, dst_idx)) g.ndata["pos"] = pos[i][uniq] g.ndata["center"] = center[uniq] if feat is not None: g.ndata["feat"] = feat[i][uniq] glist.append(g) bg = dgl.batch(glist) return bg class KNNMessage(nn.Module): """ Compute the input feature from neighbors """ def __init__(self, n_neighbor): super(KNNMessage, self).__init__() self.n_neighbor = n_neighbor def forward(self, edges): norm = edges.src["feat"] - edges.dst["feat"] if "feat" in edges.src: res = torch.cat([norm, edges.src["feat"]], 1) else: res = norm return {"agg_feat": res} class KNNConv(nn.Module): """ Feature aggregation """ def __init__(self, sizes): super(KNNConv, self).__init__() self.conv = nn.ModuleList() self.bn = nn.ModuleList() for i in range(1, len(sizes)): self.conv.append(nn.Conv2d(sizes[i - 1], sizes[i], 1)) self.bn.append(nn.BatchNorm2d(sizes[i])) def forward(self, nodes): shape = nodes.mailbox["agg_feat"].shape h = ( nodes.mailbox["agg_feat"] .view(shape[0], -1, shape[1], shape[2]) .permute(0, 3, 2, 1) ) for conv, bn in zip(self.conv, self.bn): h = conv(h) h = bn(h) h = F.relu(h) h = torch.max(h, 2)[0] feat_dim = h.shape[1] h = h.permute(0, 2, 1).reshape(-1, feat_dim) return {"new_feat": h} class TransitionDown(nn.Module): """ The Transition Down Module """ def __init__(self, in_channels, out_channels, n_neighbor=64): super(TransitionDown, self).__init__() self.frnn_graph = KNNGraphBuilder(n_neighbor) self.message = KNNMessage(n_neighbor) self.conv = KNNConv([in_channels, out_channels, out_channels]) def forward(self, pos, feat, n_point): batch_size = pos.shape[0] centroids = farthest_point_sampler(pos, n_point) g = self.frnn_graph(pos, centroids, feat) g.update_all(self.message, self.conv) mask = g.ndata["center"] == 1 pos_dim = g.ndata["pos"].shape[-1] feat_dim = g.ndata["new_feat"].shape[-1] pos_res = g.ndata["pos"][mask].view(batch_size, -1, pos_dim) feat_res = g.ndata["new_feat"][mask].view(batch_size, -1, feat_dim) return pos_res, feat_res ================================================ FILE: examples/pytorch/pointcloud/pct/pct.py ================================================ import torch from helper import TransitionDown from torch import nn """ Part of the code are adapted from https://github.com/MenghaoGuo/PCT """ class PCTPositionEmbedding(nn.Module): def __init__(self, channels=256): super(PCTPositionEmbedding, self).__init__() self.conv1 = nn.Conv1d(channels, channels, kernel_size=1, bias=False) self.conv_pos = nn.Conv1d(3, channels, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm1d(channels) self.sa1 = SALayerCLS(channels) self.sa2 = SALayerCLS(channels) self.sa3 = SALayerCLS(channels) self.sa4 = SALayerCLS(channels) self.relu = nn.ReLU() def forward(self, x, xyz): # add position embedding xyz = xyz.permute(0, 2, 1) xyz = self.conv_pos(xyz) x = self.relu(self.bn1(self.conv1(x))) # B, D, N x1 = self.sa1(x, xyz) x2 = self.sa2(x1, xyz) x3 = self.sa3(x2, xyz) x4 = self.sa4(x3, xyz) x = torch.cat((x1, x2, x3, x4), dim=1) return x class SALayerCLS(nn.Module): def __init__(self, channels): super(SALayerCLS, self).__init__() self.q_conv = nn.Conv1d(channels, channels // 4, 1, bias=False) self.k_conv = nn.Conv1d(channels, channels // 4, 1, bias=False) self.q_conv.weight = self.k_conv.weight self.v_conv = nn.Conv1d(channels, channels, 1) self.trans_conv = nn.Conv1d(channels, channels, 1) self.after_norm = nn.BatchNorm1d(channels) self.act = nn.ReLU() self.softmax = nn.Softmax(dim=-1) def forward(self, x, xyz): x = x + xyz x_q = self.q_conv(x).permute(0, 2, 1) # b, n, c x_k = self.k_conv(x) # b, c, n x_v = self.v_conv(x) energy = torch.bmm(x_q, x_k) # b, n, n attention = self.softmax(energy) attention = attention / (1e-9 + attention.sum(dim=1, keepdims=True)) x_r = torch.bmm(x_v, attention) # b, c, n x_r = self.act(self.after_norm(self.trans_conv(x - x_r))) x = x + x_r return x class SALayerSeg(nn.Module): def __init__(self, channels): super(SALayerSeg, self).__init__() self.q_conv = nn.Conv1d(channels, channels // 4, 1, bias=False) self.k_conv = nn.Conv1d(channels, channels // 4, 1, bias=False) self.q_conv.weight = self.k_conv.weight self.v_conv = nn.Conv1d(channels, channels, 1) self.trans_conv = nn.Conv1d(channels, channels, 1) self.after_norm = nn.BatchNorm1d(channels) self.act = nn.ReLU() self.softmax = nn.Softmax(dim=-1) def forward(self, x): x_q = self.q_conv(x).permute(0, 2, 1) # b, n, c x_k = self.k_conv(x) # b, c, n x_v = self.v_conv(x) energy = torch.bmm(x_q, x_k) # b, n, n attention = self.softmax(energy) attention = attention / (1e-9 + attention.sum(dim=1, keepdims=True)) x_r = torch.bmm(x_v, attention) # b, c, n x_r = self.act(self.after_norm(self.trans_conv(x - x_r))) x = x + x_r return x class PointTransformerCLS(nn.Module): def __init__(self, output_channels=40): super(PointTransformerCLS, self).__init__() self.conv1 = nn.Conv1d(3, 64, kernel_size=1, bias=False) self.conv2 = nn.Conv1d(64, 64, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm1d(64) self.bn2 = nn.BatchNorm1d(64) self.g_op0 = TransitionDown( in_channels=128, out_channels=128, n_neighbor=32 ) self.g_op1 = TransitionDown( in_channels=256, out_channels=256, n_neighbor=32 ) self.pt_last = PCTPositionEmbedding() self.relu = nn.ReLU() self.conv_fuse = nn.Sequential( nn.Conv1d(1280, 1024, kernel_size=1, bias=False), nn.BatchNorm1d(1024), nn.LeakyReLU(negative_slope=0.2), ) self.linear1 = nn.Linear(1024, 512, bias=False) self.bn6 = nn.BatchNorm1d(512) self.dp1 = nn.Dropout(p=0.5) self.linear2 = nn.Linear(512, 256) self.bn7 = nn.BatchNorm1d(256) self.dp2 = nn.Dropout(p=0.5) self.linear3 = nn.Linear(256, output_channels) def forward(self, x): xyz = x[..., :3] x = x[..., 3:].permute(0, 2, 1) batch_size, _, _ = x.size() x = self.relu(self.bn1(self.conv1(x))) # B, D, N x = self.relu(self.bn2(self.conv2(x))) # B, D, N x = x.permute(0, 2, 1) new_xyz, feature_0 = self.g_op0(xyz, x, n_point=512) new_xyz, feature_1 = self.g_op1(new_xyz, feature_0, n_point=256) # add position embedding on each layer x = self.pt_last(feature_1, new_xyz) x = torch.cat([x, feature_1], dim=1) x = self.conv_fuse(x) x, _ = torch.max(x, 2) x = x.view(batch_size, -1) x = self.relu(self.bn6(self.linear1(x))) x = self.dp1(x) x = self.relu(self.bn7(self.linear2(x))) x = self.dp2(x) x = self.linear3(x) return x class PointTransformerSeg(nn.Module): def __init__(self, part_num=50): super(PointTransformerSeg, self).__init__() self.part_num = part_num self.conv1 = nn.Conv1d(3, 128, kernel_size=1, bias=False) self.conv2 = nn.Conv1d(128, 128, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm1d(128) self.bn2 = nn.BatchNorm1d(128) self.sa1 = SALayerSeg(128) self.sa2 = SALayerSeg(128) self.sa3 = SALayerSeg(128) self.sa4 = SALayerSeg(128) self.conv_fuse = nn.Sequential( nn.Conv1d(512, 1024, kernel_size=1, bias=False), nn.BatchNorm1d(1024), nn.LeakyReLU(negative_slope=0.2), ) self.label_conv = nn.Sequential( nn.Conv1d(16, 64, kernel_size=1, bias=False), nn.BatchNorm1d(64), nn.LeakyReLU(negative_slope=0.2), ) self.convs1 = nn.Conv1d(1024 * 3 + 64, 512, 1) self.dp1 = nn.Dropout(0.5) self.convs2 = nn.Conv1d(512, 256, 1) self.convs3 = nn.Conv1d(256, self.part_num, 1) self.bns1 = nn.BatchNorm1d(512) self.bns2 = nn.BatchNorm1d(256) self.relu = nn.ReLU() def forward(self, x, cls_label): x = x.permute(0, 2, 1) batch_size, _, N = x.size() x = self.relu(self.bn1(self.conv1(x))) # B, D, N x = self.relu(self.bn2(self.conv2(x))) x1 = self.sa1(x) x2 = self.sa2(x1) x3 = self.sa3(x2) x4 = self.sa4(x3) x = torch.cat((x1, x2, x3, x4), dim=1) x = self.conv_fuse(x) x_max, _ = torch.max(x, 2) x_avg = torch.mean(x, 2) x_max_feature = x_max.view(batch_size, -1).unsqueeze(-1).repeat(1, 1, N) x_avg_feature = x_avg.view(batch_size, -1).unsqueeze(-1).repeat(1, 1, N) cls_label_feature = self.label_conv(cls_label).repeat(1, 1, N) x_global_feature = torch.cat( (x_max_feature, x_avg_feature, cls_label_feature), 1 ) x = torch.cat((x, x_global_feature), 1) x = self.relu(self.bns1(self.convs1(x))) x = self.dp1(x) x = self.relu(self.bns2(self.convs2(x))) x = self.convs3(x) return x class PartSegLoss(nn.Module): def __init__(self, eps=0.2): super(PartSegLoss, self).__init__() self.eps = eps self.loss = nn.CrossEntropyLoss() def forward(self, logits, y): num_classes = logits.shape[1] logits = logits.permute(0, 2, 1).contiguous().view(-1, num_classes) loss = self.loss(logits, y) return loss ================================================ FILE: examples/pytorch/pointcloud/pct/provider.py ================================================ """ Adapted from https://github.com/yanx27/Pointnet_Pointnet2_pytorch/blob/master/provider.py """ import numpy as np def normalize_data(batch_data): """Normalize the batch data, use coordinates of the block centered at origin, Input: BxNxC array Output: BxNxC array """ B, N, C = batch_data.shape normal_data = np.zeros((B, N, C)) for b in range(B): pc = batch_data[b] centroid = np.mean(pc, axis=0) pc = pc - centroid m = np.max(np.sqrt(np.sum(pc**2, axis=1))) pc = pc / m normal_data[b] = pc return normal_data def shuffle_data(data, labels): """Shuffle data and labels. Input: data: B,N,... numpy array label: B,... numpy array Return: shuffled data, label and shuffle indices """ idx = np.arange(len(labels)) np.random.shuffle(idx) return data[idx, ...], labels[idx], idx def shuffle_points(batch_data): """Shuffle orders of points in each point cloud -- changes FPS behavior. Use the same shuffling idx for the entire batch. Input: BxNxC array Output: BxNxC array """ idx = np.arange(batch_data.shape[1]) np.random.shuffle(idx) return batch_data[:, idx, :] def rotate_point_cloud(batch_data): """Randomly rotate the point clouds to augument the dataset rotation is per shape based along up direction Input: BxNx3 array, original batch of point clouds Return: BxNx3 array, rotated batch of point clouds """ rotated_data = np.zeros(batch_data.shape, dtype=np.float32) for k in range(batch_data.shape[0]): rotation_angle = np.random.uniform() * 2 * np.pi cosval = np.cos(rotation_angle) sinval = np.sin(rotation_angle) rotation_matrix = np.array( [[cosval, 0, sinval], [0, 1, 0], [-sinval, 0, cosval]] ) shape_pc = batch_data[k, ...] rotated_data[k, ...] = np.dot( shape_pc.reshape((-1, 3)), rotation_matrix ) return rotated_data def rotate_point_cloud_z(batch_data): """Randomly rotate the point clouds to augument the dataset rotation is per shape based along up direction Input: BxNx3 array, original batch of point clouds Return: BxNx3 array, rotated batch of point clouds """ rotated_data = np.zeros(batch_data.shape, dtype=np.float32) for k in range(batch_data.shape[0]): rotation_angle = np.random.uniform() * 2 * np.pi cosval = np.cos(rotation_angle) sinval = np.sin(rotation_angle) rotation_matrix = np.array( [[cosval, sinval, 0], [-sinval, cosval, 0], [0, 0, 1]] ) shape_pc = batch_data[k, ...] rotated_data[k, ...] = np.dot( shape_pc.reshape((-1, 3)), rotation_matrix ) return rotated_data def rotate_point_cloud_with_normal(batch_xyz_normal): """Randomly rotate XYZ, normal point cloud. Input: batch_xyz_normal: B,N,6, first three channels are XYZ, last 3 all normal Output: B,N,6, rotated XYZ, normal point cloud """ for k in range(batch_xyz_normal.shape[0]): rotation_angle = np.random.uniform() * 2 * np.pi cosval = np.cos(rotation_angle) sinval = np.sin(rotation_angle) rotation_matrix = np.array( [[cosval, 0, sinval], [0, 1, 0], [-sinval, 0, cosval]] ) shape_pc = batch_xyz_normal[k, :, 0:3] shape_normal = batch_xyz_normal[k, :, 3:6] batch_xyz_normal[k, :, 0:3] = np.dot( shape_pc.reshape((-1, 3)), rotation_matrix ) batch_xyz_normal[k, :, 3:6] = np.dot( shape_normal.reshape((-1, 3)), rotation_matrix ) return batch_xyz_normal def rotate_perturbation_point_cloud_with_normal( batch_data, angle_sigma=0.06, angle_clip=0.18 ): """Randomly perturb the point clouds by small rotations Input: BxNx6 array, original batch of point clouds and point normals Return: BxNx3 array, rotated batch of point clouds """ rotated_data = np.zeros(batch_data.shape, dtype=np.float32) for k in range(batch_data.shape[0]): angles = np.clip( angle_sigma * np.random.randn(3), -angle_clip, angle_clip ) Rx = np.array( [ [1, 0, 0], [0, np.cos(angles[0]), -np.sin(angles[0])], [0, np.sin(angles[0]), np.cos(angles[0])], ] ) Ry = np.array( [ [np.cos(angles[1]), 0, np.sin(angles[1])], [0, 1, 0], [-np.sin(angles[1]), 0, np.cos(angles[1])], ] ) Rz = np.array( [ [np.cos(angles[2]), -np.sin(angles[2]), 0], [np.sin(angles[2]), np.cos(angles[2]), 0], [0, 0, 1], ] ) R = np.dot(Rz, np.dot(Ry, Rx)) shape_pc = batch_data[k, :, 0:3] shape_normal = batch_data[k, :, 3:6] rotated_data[k, :, 0:3] = np.dot(shape_pc.reshape((-1, 3)), R) rotated_data[k, :, 3:6] = np.dot(shape_normal.reshape((-1, 3)), R) return rotated_data def rotate_point_cloud_by_angle(batch_data, rotation_angle): """Rotate the point cloud along up direction with certain angle. Input: BxNx3 array, original batch of point clouds Return: BxNx3 array, rotated batch of point clouds """ rotated_data = np.zeros(batch_data.shape, dtype=np.float32) for k in range(batch_data.shape[0]): # rotation_angle = np.random.uniform() * 2 * np.pi cosval = np.cos(rotation_angle) sinval = np.sin(rotation_angle) rotation_matrix = np.array( [[cosval, 0, sinval], [0, 1, 0], [-sinval, 0, cosval]] ) shape_pc = batch_data[k, :, 0:3] rotated_data[k, :, 0:3] = np.dot( shape_pc.reshape((-1, 3)), rotation_matrix ) return rotated_data def rotate_point_cloud_by_angle_with_normal(batch_data, rotation_angle): """Rotate the point cloud along up direction with certain angle. Input: BxNx6 array, original batch of point clouds with normal scalar, angle of rotation Return: BxNx6 array, rotated batch of point clouds iwth normal """ rotated_data = np.zeros(batch_data.shape, dtype=np.float32) for k in range(batch_data.shape[0]): # rotation_angle = np.random.uniform() * 2 * np.pi cosval = np.cos(rotation_angle) sinval = np.sin(rotation_angle) rotation_matrix = np.array( [[cosval, 0, sinval], [0, 1, 0], [-sinval, 0, cosval]] ) shape_pc = batch_data[k, :, 0:3] shape_normal = batch_data[k, :, 3:6] rotated_data[k, :, 0:3] = np.dot( shape_pc.reshape((-1, 3)), rotation_matrix ) rotated_data[k, :, 3:6] = np.dot( shape_normal.reshape((-1, 3)), rotation_matrix ) return rotated_data def rotate_perturbation_point_cloud( batch_data, angle_sigma=0.06, angle_clip=0.18 ): """Randomly perturb the point clouds by small rotations Input: BxNx3 array, original batch of point clouds Return: BxNx3 array, rotated batch of point clouds """ rotated_data = np.zeros(batch_data.shape, dtype=np.float32) for k in range(batch_data.shape[0]): angles = np.clip( angle_sigma * np.random.randn(3), -angle_clip, angle_clip ) Rx = np.array( [ [1, 0, 0], [0, np.cos(angles[0]), -np.sin(angles[0])], [0, np.sin(angles[0]), np.cos(angles[0])], ] ) Ry = np.array( [ [np.cos(angles[1]), 0, np.sin(angles[1])], [0, 1, 0], [-np.sin(angles[1]), 0, np.cos(angles[1])], ] ) Rz = np.array( [ [np.cos(angles[2]), -np.sin(angles[2]), 0], [np.sin(angles[2]), np.cos(angles[2]), 0], [0, 0, 1], ] ) R = np.dot(Rz, np.dot(Ry, Rx)) shape_pc = batch_data[k, ...] rotated_data[k, ...] = np.dot(shape_pc.reshape((-1, 3)), R) return rotated_data def jitter_point_cloud(batch_data, sigma=0.01, clip=0.05): """Randomly jitter points. jittering is per point. Input: BxNx3 array, original batch of point clouds Return: BxNx3 array, jittered batch of point clouds """ B, N, C = batch_data.shape assert clip > 0 jittered_data = np.clip(sigma * np.random.randn(B, N, C), -1 * clip, clip) jittered_data += batch_data return jittered_data def shift_point_cloud(batch_data, shift_range=0.1): """Randomly shift point cloud. Shift is per point cloud. Input: BxNx3 array, original batch of point clouds Return: BxNx3 array, shifted batch of point clouds """ B, N, C = batch_data.shape shifts = np.random.uniform(-shift_range, shift_range, (B, 3)) for batch_index in range(B): batch_data[batch_index, :, :] += shifts[batch_index, :] return batch_data def random_scale_point_cloud(batch_data, scale_low=0.8, scale_high=1.25): """Randomly scale the point cloud. Scale is per point cloud. Input: BxNx3 array, original batch of point clouds Return: BxNx3 array, scaled batch of point clouds """ B, N, C = batch_data.shape scales = np.random.uniform(scale_low, scale_high, B) for batch_index in range(B): batch_data[batch_index, :, :] *= scales[batch_index] return batch_data def random_point_dropout(batch_pc, max_dropout_ratio=0.875): """batch_pc: BxNx3""" for b in range(batch_pc.shape[0]): dropout_ratio = np.random.random() * max_dropout_ratio # 0~0.875 drop_idx = np.where( np.random.random((batch_pc.shape[1])) <= dropout_ratio )[0] if len(drop_idx) > 0: dropout_ratio = ( np.random.random() * max_dropout_ratio ) # 0~0.875 # not need batch_pc[b, drop_idx, :] = batch_pc[ b, 0, : ] # set to the first point return batch_pc ================================================ FILE: examples/pytorch/pointcloud/pct/train_cls.py ================================================ import argparse import os import time from functools import partial import provider import torch import torch.nn as nn import tqdm from dgl.data.utils import download, get_download_dir from ModelNetDataLoader import ModelNetDataLoader from pct import PointTransformerCLS from torch.utils.data import DataLoader torch.backends.cudnn.enabled = False parser = argparse.ArgumentParser() parser.add_argument("--dataset-path", type=str, default="") parser.add_argument("--load-model-path", type=str, default="") parser.add_argument("--save-model-path", type=str, default="") parser.add_argument("--num-epochs", type=int, default=250) parser.add_argument("--num-workers", type=int, default=8) parser.add_argument("--batch-size", type=int, default=32) args = parser.parse_args() num_workers = args.num_workers batch_size = args.batch_size data_filename = "modelnet40_normal_resampled.zip" download_path = os.path.join(get_download_dir(), data_filename) local_path = args.dataset_path or os.path.join( get_download_dir(), "modelnet40_normal_resampled" ) if not os.path.exists(local_path): download( "https://shapenet.cs.stanford.edu/media/modelnet40_normal_resampled.zip", download_path, verify_ssl=False, ) from zipfile import ZipFile with ZipFile(download_path) as z: z.extractall(path=get_download_dir()) CustomDataLoader = partial( DataLoader, num_workers=num_workers, batch_size=batch_size, shuffle=True, drop_last=True, ) def train(net, opt, scheduler, train_loader, dev): net.train() total_loss = 0 num_batches = 0 total_correct = 0 count = 0 loss_f = nn.CrossEntropyLoss() start_time = time.time() with tqdm.tqdm(train_loader, ascii=True) as tq: for data, label in tq: data = data.data.numpy() data = provider.random_point_dropout(data) data[:, :, 0:3] = provider.random_scale_point_cloud(data[:, :, 0:3]) data[:, :, 0:3] = provider.jitter_point_cloud(data[:, :, 0:3]) data[:, :, 0:3] = provider.shift_point_cloud(data[:, :, 0:3]) data = torch.tensor(data) label = label[:, 0] num_examples = label.shape[0] data, label = data.to(dev), label.to(dev).squeeze().long() opt.zero_grad() logits = net(data) loss = loss_f(logits, label) loss.backward() opt.step() _, preds = logits.max(1) num_batches += 1 count += num_examples loss = loss.item() correct = (preds == label).sum().item() total_loss += loss total_correct += correct tq.set_postfix( { "AvgLoss": "%.5f" % (total_loss / num_batches), "AvgAcc": "%.5f" % (total_correct / count), } ) print( "[Train] AvgLoss: {:.5}, AvgAcc: {:.5}, Time: {:.5}s".format( total_loss / num_batches, total_correct / count, time.time() - start_time, ) ) scheduler.step() def evaluate(net, test_loader, dev): net.eval() total_correct = 0 count = 0 start_time = time.time() with torch.no_grad(): with tqdm.tqdm(test_loader, ascii=True) as tq: for data, label in tq: label = label[:, 0] num_examples = label.shape[0] data, label = data.to(dev), label.to(dev).squeeze().long() logits = net(data) _, preds = logits.max(1) correct = (preds == label).sum().item() total_correct += correct count += num_examples tq.set_postfix({"AvgAcc": "%.5f" % (total_correct / count)}) print( "[Test] AvgAcc: {:.5}, Time: {:.5}s".format( total_correct / count, time.time() - start_time ) ) return total_correct / count dev = torch.device("cuda" if torch.cuda.is_available() else "cpu") net = PointTransformerCLS() net = net.to(dev) if args.load_model_path: net.load_state_dict( torch.load(args.load_model_path, weights_only=False, map_location=dev) ) opt = torch.optim.SGD( net.parameters(), lr=0.01, weight_decay=1e-4, momentum=0.9 ) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( opt, T_max=args.num_epochs ) train_dataset = ModelNetDataLoader(local_path, 1024, split="train") test_dataset = ModelNetDataLoader(local_path, 1024, split="test") train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=True, ) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, drop_last=True, ) best_test_acc = 0 for epoch in range(args.num_epochs): print("Epoch #{}: ".format(epoch)) train(net, opt, scheduler, train_loader, dev) if (epoch + 1) % 1 == 0: test_acc = evaluate(net, test_loader, dev) if test_acc > best_test_acc: best_test_acc = test_acc if args.save_model_path: torch.save(net.state_dict(), args.save_model_path) print("Current test acc: %.5f (best: %.5f)" % (test_acc, best_test_acc)) print() ================================================ FILE: examples/pytorch/pointcloud/pct/train_partseg.py ================================================ import argparse import time from functools import partial import dgl import numpy as np import provider import torch import torch.optim as optim import tqdm from pct import PartSegLoss, PointTransformerSeg from ShapeNet import ShapeNet from torch.utils.data import DataLoader parser = argparse.ArgumentParser() parser.add_argument("--dataset-path", type=str, default="") parser.add_argument("--load-model-path", type=str, default="") parser.add_argument("--save-model-path", type=str, default="") parser.add_argument("--num-epochs", type=int, default=500) parser.add_argument("--num-workers", type=int, default=8) parser.add_argument("--batch-size", type=int, default=16) parser.add_argument("--tensorboard", action="store_true") args = parser.parse_args() num_workers = args.num_workers batch_size = args.batch_size def collate(samples): graphs, cat = map(list, zip(*samples)) return dgl.batch(graphs), cat CustomDataLoader = partial( DataLoader, num_workers=num_workers, batch_size=batch_size, shuffle=True, drop_last=True, ) def train(net, opt, scheduler, train_loader, dev): category_list = sorted(list(shapenet.seg_classes.keys())) eye_mat = np.eye(16) net.train() total_loss = 0 num_batches = 0 total_correct = 0 count = 0 start = time.time() with tqdm.tqdm(train_loader, ascii=True) as tq: for data, label, cat in tq: num_examples = data.shape[0] data = data.to(dev, dtype=torch.float) label = label.to(dev, dtype=torch.long).view(-1) opt.zero_grad() cat_ind = [category_list.index(c) for c in cat] # An one-hot encoding for the object category cat_tensor = torch.tensor(eye_mat[cat_ind]).to( dev, dtype=torch.float ) cat_tensor = cat_tensor.view(num_examples, 16, 1) logits = net(data, cat_tensor) loss = L(logits, label) loss.backward() opt.step() _, preds = logits.max(1) count += num_examples * 2048 loss = loss.item() total_loss += loss num_batches += 1 correct = (preds.view(-1) == label).sum().item() total_correct += correct AvgLoss = total_loss / num_batches AvgAcc = total_correct / count tq.set_postfix( {"AvgLoss": "%.5f" % AvgLoss, "AvgAcc": "%.5f" % AvgAcc} ) scheduler.step() end = time.time() print( "[Train] AvgLoss: {:.5}, AvgAcc: {:.5}, Time: {:.5}s".format( total_loss / num_batches, total_correct / count, end - start ) ) return data, preds, AvgLoss, AvgAcc, end - start def mIoU(preds, label, cat, cat_miou, seg_classes): for i in range(preds.shape[0]): shape_iou = 0 n = len(seg_classes[cat[i]]) for cls in seg_classes[cat[i]]: pred_set = set(np.where(preds[i, :] == cls)[0]) label_set = set(np.where(label[i, :] == cls)[0]) union = len(pred_set.union(label_set)) inter = len(pred_set.intersection(label_set)) if union == 0: shape_iou += 1 else: shape_iou += inter / union shape_iou /= n cat_miou[cat[i]][0] += shape_iou cat_miou[cat[i]][1] += 1 return cat_miou def evaluate(net, test_loader, dev, per_cat_verbose=False): category_list = sorted(list(shapenet.seg_classes.keys())) eye_mat = np.eye(16) net.eval() cat_miou = {} for k in shapenet.seg_classes.keys(): cat_miou[k] = [0, 0] miou = 0 count = 0 per_cat_miou = 0 per_cat_count = 0 with torch.no_grad(): with tqdm.tqdm(test_loader, ascii=True) as tq: for data, label, cat in tq: num_examples = data.shape[0] data = data.to(dev, dtype=torch.float) label = label.to(dev, dtype=torch.long) cat_ind = [category_list.index(c) for c in cat] cat_tensor = torch.tensor(eye_mat[cat_ind]).to( dev, dtype=torch.float ) cat_tensor = cat_tensor.view(num_examples, 16, 1) logits = net(data, cat_tensor) _, preds = logits.max(1) cat_miou = mIoU( preds.cpu().numpy(), label.view(num_examples, -1).cpu().numpy(), cat, cat_miou, shapenet.seg_classes, ) for _, v in cat_miou.items(): if v[1] > 0: miou += v[0] count += v[1] per_cat_miou += v[0] / v[1] per_cat_count += 1 tq.set_postfix( { "mIoU": "%.5f" % (miou / count), "per Category mIoU": "%.5f" % (per_cat_miou / per_cat_count), } ) print( "[Test] mIoU: %.5f, per Category mIoU: %.5f" % (miou / count, per_cat_miou / per_cat_count) ) if per_cat_verbose: print("-" * 60) print("Per-Category mIoU:") for k, v in cat_miou.items(): if v[1] > 0: print("%s mIoU=%.5f" % (k, v[0] / v[1])) else: print("%s mIoU=%.5f" % (k, 1)) print("-" * 60) return miou / count, per_cat_miou / per_cat_count dev = torch.device("cuda" if torch.cuda.is_available() else "cpu") net = PointTransformerSeg() net = net.to(dev) if args.load_model_path: net.load_state_dict( torch.load(args.load_model_path, weights_only=False, map_location=dev) ) opt = torch.optim.SGD( net.parameters(), lr=0.01, weight_decay=1e-4, momentum=0.9 ) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( opt, T_max=args.num_epochs ) L = PartSegLoss() shapenet = ShapeNet(2048, normal_channel=False) train_loader = CustomDataLoader(shapenet.trainval()) test_loader = CustomDataLoader(shapenet.test()) # Tensorboard if args.tensorboard: import torchvision from torch.utils.tensorboard import SummaryWriter from torchvision import datasets, transforms writer = SummaryWriter() # Select 50 distinct colors for different parts color_map = torch.tensor( [ [47, 79, 79], [139, 69, 19], [112, 128, 144], [85, 107, 47], [139, 0, 0], [128, 128, 0], [72, 61, 139], [0, 128, 0], [188, 143, 143], [60, 179, 113], [205, 133, 63], [0, 139, 139], [70, 130, 180], [205, 92, 92], [154, 205, 50], [0, 0, 139], [50, 205, 50], [250, 250, 250], [218, 165, 32], [139, 0, 139], [10, 10, 10], [176, 48, 96], [72, 209, 204], [153, 50, 204], [255, 69, 0], [255, 145, 0], [0, 0, 205], [255, 255, 0], [0, 255, 0], [233, 150, 122], [220, 20, 60], [0, 191, 255], [160, 32, 240], [192, 192, 192], [173, 255, 47], [218, 112, 214], [216, 191, 216], [255, 127, 80], [255, 0, 255], [100, 149, 237], [128, 128, 128], [221, 160, 221], [144, 238, 144], [123, 104, 238], [255, 160, 122], [175, 238, 238], [238, 130, 238], [127, 255, 212], [255, 218, 185], [255, 105, 180], ] ) # paint each point according to its pred def paint(batched_points): B, N = batched_points.shape colored = color_map[batched_points].squeeze(2) return colored best_test_miou = 0 best_test_per_cat_miou = 0 for epoch in range(args.num_epochs): print("Epoch #{}: ".format(epoch)) data, preds, AvgLoss, AvgAcc, training_time = train( net, opt, scheduler, train_loader, dev ) if (epoch + 1) % 5 == 0 or epoch == 0: test_miou, test_per_cat_miou = evaluate(net, test_loader, dev, True) if test_miou > best_test_miou: best_test_miou = test_miou best_test_per_cat_miou = test_per_cat_miou if args.save_model_path: torch.save(net.state_dict(), args.save_model_path) print( "Current test mIoU: %.5f (best: %.5f), per-Category mIoU: %.5f (best: %.5f)" % ( test_miou, best_test_miou, test_per_cat_miou, best_test_per_cat_miou, ) ) # Tensorboard if args.tensorboard: colored = paint(preds) writer.add_mesh( "data", vertices=data, colors=colored, global_step=epoch ) writer.add_scalar( "training time for one epoch", training_time, global_step=epoch ) writer.add_scalar("AvgLoss", AvgLoss, global_step=epoch) writer.add_scalar("AvgAcc", AvgAcc, global_step=epoch) if (epoch + 1) % 5 == 0: writer.add_scalar("test mIoU", test_miou, global_step=epoch) writer.add_scalar( "best test mIoU", best_test_miou, global_step=epoch ) print() ================================================ FILE: examples/pytorch/pointcloud/point_transformer/ModelNetDataLoader.py ================================================ import os import warnings import numpy as np from torch.utils.data import Dataset warnings.filterwarnings("ignore") def pc_normalize(pc): centroid = np.mean(pc, axis=0) pc = pc - centroid m = np.max(np.sqrt(np.sum(pc**2, axis=1))) pc = pc / m return pc def farthest_point_sample(point, npoint): """ Farthest point sampler works as follows: 1. Initialize the sample set S with a random point 2. Pick point P not in S, which maximizes the distance d(P, S) 3. Repeat step 2 until |S| = npoint Input: xyz: pointcloud data, [N, D] npoint: number of samples Return: centroids: sampled pointcloud index, [npoint, D] """ N, D = point.shape xyz = point[:, :3] centroids = np.zeros((npoint,)) distance = np.ones((N,)) * 1e10 farthest = np.random.randint(0, N) for i in range(npoint): centroids[i] = farthest centroid = xyz[farthest, :] dist = np.sum((xyz - centroid) ** 2, -1) mask = dist < distance distance[mask] = dist[mask] farthest = np.argmax(distance, -1) point = point[centroids.astype(np.int32)] return point class ModelNetDataLoader(Dataset): def __init__( self, root, npoint=1024, split="train", fps=False, normal_channel=True, cache_size=15000, ): """ Input: root: the root path to the local data files npoint: number of points from each cloud split: which split of the data, 'train' or 'test' fps: whether to sample points with farthest point sampler normal_channel: whether to use additional channel cache_size: the cache size of in-memory point clouds """ self.root = root self.npoints = npoint self.fps = fps self.catfile = os.path.join(self.root, "modelnet40_shape_names.txt") self.cat = [line.rstrip() for line in open(self.catfile)] self.classes = dict(zip(self.cat, range(len(self.cat)))) self.normal_channel = normal_channel shape_ids = {} shape_ids["train"] = [ line.rstrip() for line in open(os.path.join(self.root, "modelnet40_train.txt")) ] shape_ids["test"] = [ line.rstrip() for line in open(os.path.join(self.root, "modelnet40_test.txt")) ] assert split == "train" or split == "test" shape_names = ["_".join(x.split("_")[0:-1]) for x in shape_ids[split]] # list of (shape_name, shape_txt_file_path) tuple self.datapath = [ ( shape_names[i], os.path.join(self.root, shape_names[i], shape_ids[split][i]) + ".txt", ) for i in range(len(shape_ids[split])) ] print("The size of %s data is %d" % (split, len(self.datapath))) self.cache_size = cache_size self.cache = {} def __len__(self): return len(self.datapath) def _get_item(self, index): if index in self.cache: point_set, cls = self.cache[index] else: fn = self.datapath[index] cls = self.classes[self.datapath[index][0]] cls = np.array([cls]).astype(np.int32) point_set = np.loadtxt(fn[1], delimiter=",").astype(np.float32) if self.fps: point_set = farthest_point_sample(point_set, self.npoints) else: point_set = point_set[0 : self.npoints, :] point_set[:, 0:3] = pc_normalize(point_set[:, 0:3]) if not self.normal_channel: point_set = point_set[:, 0:3] if len(self.cache) < self.cache_size: self.cache[index] = (point_set, cls) return point_set, cls def __getitem__(self, index): return self._get_item(index) ================================================ FILE: examples/pytorch/pointcloud/point_transformer/README.md ================================================ Point Transformer ==== > This model is implemented on August 27, 2021 when there is no official code released. Thus we implemented this model based on the code from . This is a reproduction of the paper: [Point Transformer](http://arxiv.org/abs/2012.09164). # Performance | Task | Dataset | Metric | Score - Paper | Score - DGL (Adam) | Score - DGL (SGD) | Time(s) - DGL | |-----------------|------------|----------|------------------|-------------|-------------|-------------------| | Classification | ModelNet40 | Accuracy | 93.7 | 92.0 | 91.5 | 117.0 | | Part Segmentation | ShapeNet | mIoU | 86.6 | 84.3 | 85.1 | 260.0 | + Time(s) are the average training time per epoch, measured on EC2 p3.8xlarge instance w/ Tesla V100 GPU. # How to Run For point cloud classification, run with ```python python train_cls.py --opt [sgd/adam] ``` For point cloud part-segmentation, run with ```python python train_partseg.py --opt [sgd/adam] ``` ================================================ FILE: examples/pytorch/pointcloud/point_transformer/ShapeNet.py ================================================ import json import os from zipfile import ZipFile import dgl import numpy as np import tqdm from dgl.data.utils import download, get_download_dir from scipy.sparse import csr_matrix from torch.utils.data import Dataset class ShapeNet(object): def __init__(self, num_points=2048, normal_channel=True): self.num_points = num_points self.normal_channel = normal_channel SHAPENET_DOWNLOAD_URL = "https://shapenet.cs.stanford.edu/media/shapenetcore_partanno_segmentation_benchmark_v0_normal.zip" download_path = get_download_dir() data_filename = ( "shapenetcore_partanno_segmentation_benchmark_v0_normal.zip" ) data_path = os.path.join( download_path, "shapenetcore_partanno_segmentation_benchmark_v0_normal", ) if not os.path.exists(data_path): local_path = os.path.join(download_path, data_filename) if not os.path.exists(local_path): download(SHAPENET_DOWNLOAD_URL, local_path, verify_ssl=False) with ZipFile(local_path) as z: z.extractall(path=download_path) synset_file = "synsetoffset2category.txt" with open(os.path.join(data_path, synset_file)) as f: synset = [t.split("\n")[0].split("\t") for t in f.readlines()] self.synset_dict = {} for syn in synset: self.synset_dict[syn[1]] = syn[0] self.seg_classes = { "Airplane": [0, 1, 2, 3], "Bag": [4, 5], "Cap": [6, 7], "Car": [8, 9, 10, 11], "Chair": [12, 13, 14, 15], "Earphone": [16, 17, 18], "Guitar": [19, 20, 21], "Knife": [22, 23], "Lamp": [24, 25, 26, 27], "Laptop": [28, 29], "Motorbike": [30, 31, 32, 33, 34, 35], "Mug": [36, 37], "Pistol": [38, 39, 40], "Rocket": [41, 42, 43], "Skateboard": [44, 45, 46], "Table": [47, 48, 49], } train_split_json = "shuffled_train_file_list.json" val_split_json = "shuffled_val_file_list.json" test_split_json = "shuffled_test_file_list.json" split_path = os.path.join(data_path, "train_test_split") with open(os.path.join(split_path, train_split_json)) as f: tmp = f.read() self.train_file_list = [ os.path.join(data_path, t.replace("shape_data/", "") + ".txt") for t in json.loads(tmp) ] with open(os.path.join(split_path, val_split_json)) as f: tmp = f.read() self.val_file_list = [ os.path.join(data_path, t.replace("shape_data/", "") + ".txt") for t in json.loads(tmp) ] with open(os.path.join(split_path, test_split_json)) as f: tmp = f.read() self.test_file_list = [ os.path.join(data_path, t.replace("shape_data/", "") + ".txt") for t in json.loads(tmp) ] def train(self): return ShapeNetDataset( self, "train", self.num_points, self.normal_channel ) def valid(self): return ShapeNetDataset( self, "valid", self.num_points, self.normal_channel ) def trainval(self): return ShapeNetDataset( self, "trainval", self.num_points, self.normal_channel ) def test(self): return ShapeNetDataset( self, "test", self.num_points, self.normal_channel ) class ShapeNetDataset(Dataset): def __init__(self, shapenet, mode, num_points, normal_channel=True): super(ShapeNetDataset, self).__init__() self.mode = mode self.num_points = num_points if not normal_channel: self.dim = 3 else: self.dim = 6 if mode == "train": self.file_list = shapenet.train_file_list elif mode == "valid": self.file_list = shapenet.val_file_list elif mode == "test": self.file_list = shapenet.test_file_list elif mode == "trainval": self.file_list = shapenet.train_file_list + shapenet.val_file_list else: raise "Not supported `mode`" data_list = [] label_list = [] category_list = [] print("Loading data from split " + self.mode) for fn in tqdm.tqdm(self.file_list, ascii=True): with open(fn) as f: data = np.array( [t.split("\n")[0].split(" ") for t in f.readlines()] ).astype(float) data_list.append(data[:, 0 : self.dim]) label_list.append(data[:, 6].astype(int)) category_list.append(shapenet.synset_dict[fn.split("/")[-2]]) self.data = data_list self.label = label_list self.category = category_list def translate(self, x, scale=(2 / 3, 3 / 2), shift=(-0.2, 0.2), size=3): xyz1 = np.random.uniform(low=scale[0], high=scale[1], size=[size]) xyz2 = np.random.uniform(low=shift[0], high=shift[1], size=[size]) x = np.add(np.multiply(x, xyz1), xyz2).astype("float32") return x def __len__(self): return len(self.data) def __getitem__(self, i): inds = np.random.choice( self.data[i].shape[0], self.num_points, replace=True ) x = self.data[i][inds, : self.dim] y = self.label[i][inds] cat = self.category[i] if self.mode == "train": x = self.translate(x, size=self.dim) x = x.astype(float) y = y.astype(int) return x, y, cat ================================================ FILE: examples/pytorch/pointcloud/point_transformer/helper.py ================================================ import dgl import torch import torch.nn as nn import torch.nn.functional as F from dgl.geometry import farthest_point_sampler """ Part of the code are adapted from https://github.com/yanx27/Pointnet_Pointnet2_pytorch """ def square_distance(src, dst): """ Adapted from https://github.com/yanx27/Pointnet_Pointnet2_pytorch """ B, N, _ = src.shape _, M, _ = dst.shape dist = -2 * torch.matmul(src, dst.permute(0, 2, 1)) dist += torch.sum(src**2, -1).view(B, N, 1) dist += torch.sum(dst**2, -1).view(B, 1, M) return dist def index_points(points, idx): """ Adapted from https://github.com/yanx27/Pointnet_Pointnet2_pytorch """ device = points.device B = points.shape[0] view_shape = list(idx.shape) view_shape[1:] = [1] * (len(view_shape) - 1) repeat_shape = list(idx.shape) repeat_shape[0] = 1 batch_indices = ( torch.arange(B, dtype=torch.long) .to(device) .view(view_shape) .repeat(repeat_shape) ) new_points = points[batch_indices, idx, :] return new_points class KNearNeighbors(nn.Module): """ Find the k nearest neighbors """ def __init__(self, n_neighbor): super(KNearNeighbors, self).__init__() self.n_neighbor = n_neighbor def forward(self, pos, centroids): """ Adapted from https://github.com/yanx27/Pointnet_Pointnet2_pytorch """ center_pos = index_points(pos, centroids) sqrdists = square_distance(center_pos, pos) group_idx = sqrdists.argsort(dim=-1)[:, :, : self.n_neighbor] return group_idx class KNNGraphBuilder(nn.Module): """ Build NN graph """ def __init__(self, n_neighbor): super(KNNGraphBuilder, self).__init__() self.n_neighbor = n_neighbor self.knn = KNearNeighbors(n_neighbor) def forward(self, pos, centroids, feat=None): dev = pos.device group_idx = self.knn(pos, centroids) B, N, _ = pos.shape glist = [] for i in range(B): center = torch.zeros((N)).to(dev) center[centroids[i]] = 1 src = group_idx[i].contiguous().view(-1) dst = ( centroids[i] .view(-1, 1) .repeat( 1, min(self.n_neighbor, src.shape[0] // centroids.shape[1]) ) .view(-1) ) unified = torch.cat([src, dst]) uniq, inv_idx = torch.unique(unified, return_inverse=True) src_idx = inv_idx[: src.shape[0]] dst_idx = inv_idx[src.shape[0] :] g = dgl.graph((src_idx, dst_idx)) g.ndata["pos"] = pos[i][uniq] g.ndata["center"] = center[uniq] if feat is not None: g.ndata["feat"] = feat[i][uniq] glist.append(g) bg = dgl.batch(glist) return bg class RelativePositionMessage(nn.Module): """ Compute the input feature from neighbors """ def __init__(self, n_neighbor): super(RelativePositionMessage, self).__init__() self.n_neighbor = n_neighbor def forward(self, edges): pos = edges.src["pos"] - edges.dst["pos"] if "feat" in edges.src: res = torch.cat([pos, edges.src["feat"]], 1) else: res = pos return {"agg_feat": res} class KNNConv(nn.Module): """ Feature aggregation """ def __init__(self, sizes, batch_size): super(KNNConv, self).__init__() self.batch_size = batch_size self.conv = nn.ModuleList() self.bn = nn.ModuleList() for i in range(1, len(sizes)): self.conv.append(nn.Conv2d(sizes[i - 1], sizes[i], 1)) self.bn.append(nn.BatchNorm2d(sizes[i])) def forward(self, nodes): shape = nodes.mailbox["agg_feat"].shape h = ( nodes.mailbox["agg_feat"] .view(self.batch_size, -1, shape[1], shape[2]) .permute(0, 3, 2, 1) ) for conv, bn in zip(self.conv, self.bn): h = conv(h) h = bn(h) h = F.relu(h) h = torch.max(h, 2)[0] feat_dim = h.shape[1] h = h.permute(0, 2, 1).reshape(-1, feat_dim) return {"new_feat": h} def group_all(self, pos, feat): """ Feature aggregation and pooling for the non-sampling layer """ if feat is not None: h = torch.cat([pos, feat], 2) else: h = pos B, N, D = h.shape _, _, C = pos.shape new_pos = torch.zeros(B, 1, C) h = h.permute(0, 2, 1).view(B, -1, N, 1) for conv, bn in zip(self.conv, self.bn): h = conv(h) h = bn(h) h = F.relu(h) h = torch.max(h[:, :, :, 0], 2)[0] # [B,D] return new_pos, h class TransitionDown(nn.Module): """ The Transition Down Module """ def __init__(self, n_points, batch_size, mlp_sizes, n_neighbors=64): super(TransitionDown, self).__init__() self.n_points = n_points self.frnn_graph = KNNGraphBuilder(n_neighbors) self.message = RelativePositionMessage(n_neighbors) self.conv = KNNConv(mlp_sizes, batch_size) self.batch_size = batch_size def forward(self, pos, feat): centroids = farthest_point_sampler(pos, self.n_points) g = self.frnn_graph(pos, centroids, feat) g.update_all(self.message, self.conv) mask = g.ndata["center"] == 1 pos_dim = g.ndata["pos"].shape[-1] feat_dim = g.ndata["new_feat"].shape[-1] pos_res = g.ndata["pos"][mask].view(self.batch_size, -1, pos_dim) feat_res = g.ndata["new_feat"][mask].view(self.batch_size, -1, feat_dim) return pos_res, feat_res class FeaturePropagation(nn.Module): """ The FeaturePropagation Layer """ def __init__(self, input_dims, sizes): super(FeaturePropagation, self).__init__() self.convs = nn.ModuleList() self.bns = nn.ModuleList() sizes = [input_dims] + sizes for i in range(1, len(sizes)): self.convs.append(nn.Conv1d(sizes[i - 1], sizes[i], 1)) self.bns.append(nn.BatchNorm1d(sizes[i])) def forward(self, x1, x2, feat1, feat2): """ Adapted from https://github.com/yanx27/Pointnet_Pointnet2_pytorch Input: x1: input points position data, [B, N, C] x2: sampled input points position data, [B, S, C] feat1: input points data, [B, N, D] feat2: input points data, [B, S, D] Return: new_feat: upsampled points data, [B, D', N] """ B, N, C = x1.shape _, S, _ = x2.shape if S == 1: interpolated_feat = feat2.repeat(1, N, 1) else: dists = square_distance(x1, x2) dists, idx = dists.sort(dim=-1) dists, idx = dists[:, :, :3], idx[:, :, :3] # [B, N, 3] dist_recip = 1.0 / (dists + 1e-8) norm = torch.sum(dist_recip, dim=2, keepdim=True) weight = dist_recip / norm interpolated_feat = torch.sum( index_points(feat2, idx) * weight.view(B, N, 3, 1), dim=2 ) if feat1 is not None: new_feat = torch.cat([feat1, interpolated_feat], dim=-1) else: new_feat = interpolated_feat new_feat = new_feat.permute(0, 2, 1) # [B, D, S] for i, conv in enumerate(self.convs): bn = self.bns[i] new_feat = F.relu(bn(conv(new_feat))) return new_feat class SwapAxes(nn.Module): def __init__(self, dim1=1, dim2=2): super(SwapAxes, self).__init__() self.dim1 = dim1 self.dim2 = dim2 def forward(self, x): return x.transpose(self.dim1, self.dim2) class TransitionUp(nn.Module): """ The Transition Up Module """ def __init__(self, dim1, dim2, dim_out): super(TransitionUp, self).__init__() self.fc1 = nn.Sequential( nn.Linear(dim1, dim_out), SwapAxes(), nn.BatchNorm1d(dim_out), # TODO SwapAxes(), nn.ReLU(), ) self.fc2 = nn.Sequential( nn.Linear(dim2, dim_out), SwapAxes(), nn.BatchNorm1d(dim_out), # TODO SwapAxes(), nn.ReLU(), ) self.fp = FeaturePropagation(-1, []) def forward(self, pos1, feat1, pos2, feat2): h1 = self.fc1(feat1) h2 = self.fc2(feat2) h1 = self.fp(pos2, pos1, None, h1).transpose(1, 2) return h1 + h2 ================================================ FILE: examples/pytorch/pointcloud/point_transformer/point_transformer.py ================================================ import numpy as np import torch from helper import index_points, square_distance, TransitionDown, TransitionUp from torch import nn """ Part of the code are adapted from https://github.com/qq456cvb/Point-Transformers """ class PointTransformerBlock(nn.Module): def __init__(self, input_dim, n_neighbors, transformer_dim=None): super(PointTransformerBlock, self).__init__() if transformer_dim is None: transformer_dim = input_dim self.fc1 = nn.Linear(input_dim, transformer_dim) self.fc2 = nn.Linear(transformer_dim, input_dim) self.fc_delta = nn.Sequential( nn.Linear(3, transformer_dim), nn.ReLU(), nn.Linear(transformer_dim, transformer_dim), ) self.fc_gamma = nn.Sequential( nn.Linear(transformer_dim, transformer_dim), nn.ReLU(), nn.Linear(transformer_dim, transformer_dim), ) self.w_qs = nn.Linear(transformer_dim, transformer_dim, bias=False) self.w_ks = nn.Linear(transformer_dim, transformer_dim, bias=False) self.w_vs = nn.Linear(transformer_dim, transformer_dim, bias=False) self.n_neighbors = n_neighbors def forward(self, x, pos): dists = square_distance(pos, pos) knn_idx = dists.argsort()[:, :, : self.n_neighbors] # b x n x k knn_pos = index_points(pos, knn_idx) h = self.fc1(x) q, k, v = ( self.w_qs(h), index_points(self.w_ks(h), knn_idx), index_points(self.w_vs(h), knn_idx), ) pos_enc = self.fc_delta(pos[:, :, None] - knn_pos) # b x n x k x f attn = self.fc_gamma(q[:, :, None] - k + pos_enc) attn = torch.softmax( attn / np.sqrt(k.size(-1)), dim=-2 ) # b x n x k x f res = torch.einsum("bmnf,bmnf->bmf", attn, v + pos_enc) res = self.fc2(res) + x return res, attn class PointTransformer(nn.Module): def __init__( self, n_points, batch_size, feature_dim=3, n_blocks=4, downsampling_rate=4, hidden_dim=32, transformer_dim=None, n_neighbors=16, ): super(PointTransformer, self).__init__() self.fc = nn.Sequential( nn.Linear(feature_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), ) self.ptb = PointTransformerBlock( hidden_dim, n_neighbors, transformer_dim ) self.transition_downs = nn.ModuleList() self.transformers = nn.ModuleList() for i in range(n_blocks): block_hidden_dim = hidden_dim * 2 ** (i + 1) block_n_points = n_points // (downsampling_rate ** (i + 1)) self.transition_downs.append( TransitionDown( block_n_points, batch_size, [ block_hidden_dim // 2 + 3, block_hidden_dim, block_hidden_dim, ], n_neighbors=n_neighbors, ) ) self.transformers.append( PointTransformerBlock( block_hidden_dim, n_neighbors, transformer_dim ) ) def forward(self, x): if x.shape[-1] > 3: pos = x[:, :, :3] else: pos = x feat = x h = self.fc(feat) h, _ = self.ptb(h, pos) hidden_state = [(pos, h)] for td, tf in zip(self.transition_downs, self.transformers): pos, h = td(pos, h) h, _ = tf(h, pos) hidden_state.append((pos, h)) return h, hidden_state class PointTransformerCLS(nn.Module): def __init__( self, out_classes, batch_size, n_points=1024, feature_dim=3, n_blocks=4, downsampling_rate=4, hidden_dim=32, transformer_dim=None, n_neighbors=16, ): super(PointTransformerCLS, self).__init__() self.backbone = PointTransformer( n_points, batch_size, feature_dim, n_blocks, downsampling_rate, hidden_dim, transformer_dim, n_neighbors, ) self.out = self.fc2 = nn.Sequential( nn.Linear(hidden_dim * 2 ** (n_blocks), 256), nn.ReLU(), nn.Linear(256, 64), nn.ReLU(), nn.Linear(64, out_classes), ) def forward(self, x): h, _ = self.backbone(x) out = self.out(torch.mean(h, dim=1)) return out class PointTransformerSeg(nn.Module): def __init__( self, out_classes, batch_size, n_points=2048, feature_dim=3, n_blocks=4, downsampling_rate=4, hidden_dim=32, transformer_dim=None, n_neighbors=16, ): super().__init__() self.backbone = PointTransformer( n_points, batch_size, feature_dim, n_blocks, downsampling_rate, hidden_dim, transformer_dim, n_neighbors, ) self.fc = nn.Sequential( nn.Linear(32 * 2**n_blocks, 512), nn.ReLU(), nn.Linear(512, 512), nn.ReLU(), nn.Linear(512, 32 * 2**n_blocks), ) self.ptb = PointTransformerBlock( 32 * 2**n_blocks, n_neighbors, transformer_dim ) self.n_blocks = n_blocks self.transition_ups = nn.ModuleList() self.transformers = nn.ModuleList() for i in reversed(range(n_blocks)): block_hidden_dim = 32 * 2**i self.transition_ups.append( TransitionUp( block_hidden_dim * 2, block_hidden_dim, block_hidden_dim ) ) self.transformers.append( PointTransformerBlock( block_hidden_dim, n_neighbors, transformer_dim ) ) self.out = nn.Sequential( nn.Linear(32 + 16, 64), nn.ReLU(), nn.Linear(64, 64), nn.ReLU(), nn.Linear(64, out_classes), ) def forward(self, x, cat_vec=None): _, hidden_state = self.backbone(x) pos, h = hidden_state[-1] h, _ = self.ptb(self.fc(h), pos) for i in range(self.n_blocks): h = self.transition_ups[i]( pos, h, hidden_state[-i - 2][0], hidden_state[-i - 2][1] ) pos = hidden_state[-i - 2][0] h, _ = self.transformers[i](h, pos) return self.out(torch.cat([h, cat_vec], dim=-1)) class PartSegLoss(nn.Module): def __init__(self, eps=0.2): super(PartSegLoss, self).__init__() self.eps = eps self.loss = nn.CrossEntropyLoss() def forward(self, logits, y): num_classes = logits.shape[1] logits = logits.permute(0, 2, 1).contiguous().view(-1, num_classes) loss = self.loss(logits, y) return loss ================================================ FILE: examples/pytorch/pointcloud/point_transformer/provider.py ================================================ """ Adapted from https://github.com/yanx27/Pointnet_Pointnet2_pytorch/blob/master/provider.py """ import numpy as np def normalize_data(batch_data): """Normalize the batch data, use coordinates of the block centered at origin, Input: BxNxC array Output: BxNxC array """ B, N, C = batch_data.shape normal_data = np.zeros((B, N, C)) for b in range(B): pc = batch_data[b] centroid = np.mean(pc, axis=0) pc = pc - centroid m = np.max(np.sqrt(np.sum(pc**2, axis=1))) pc = pc / m normal_data[b] = pc return normal_data def shuffle_data(data, labels): """Shuffle data and labels. Input: data: B,N,... numpy array label: B,... numpy array Return: shuffled data, label and shuffle indices """ idx = np.arange(len(labels)) np.random.shuffle(idx) return data[idx, ...], labels[idx], idx def shuffle_points(batch_data): """Shuffle orders of points in each point cloud -- changes FPS behavior. Use the same shuffling idx for the entire batch. Input: BxNxC array Output: BxNxC array """ idx = np.arange(batch_data.shape[1]) np.random.shuffle(idx) return batch_data[:, idx, :] def rotate_point_cloud(batch_data): """Randomly rotate the point clouds to augument the dataset rotation is per shape based along up direction Input: BxNx3 array, original batch of point clouds Return: BxNx3 array, rotated batch of point clouds """ rotated_data = np.zeros(batch_data.shape, dtype=np.float32) for k in range(batch_data.shape[0]): rotation_angle = np.random.uniform() * 2 * np.pi cosval = np.cos(rotation_angle) sinval = np.sin(rotation_angle) rotation_matrix = np.array( [[cosval, 0, sinval], [0, 1, 0], [-sinval, 0, cosval]] ) shape_pc = batch_data[k, ...] rotated_data[k, ...] = np.dot( shape_pc.reshape((-1, 3)), rotation_matrix ) return rotated_data def rotate_point_cloud_z(batch_data): """Randomly rotate the point clouds to augument the dataset rotation is per shape based along up direction Input: BxNx3 array, original batch of point clouds Return: BxNx3 array, rotated batch of point clouds """ rotated_data = np.zeros(batch_data.shape, dtype=np.float32) for k in range(batch_data.shape[0]): rotation_angle = np.random.uniform() * 2 * np.pi cosval = np.cos(rotation_angle) sinval = np.sin(rotation_angle) rotation_matrix = np.array( [[cosval, sinval, 0], [-sinval, cosval, 0], [0, 0, 1]] ) shape_pc = batch_data[k, ...] rotated_data[k, ...] = np.dot( shape_pc.reshape((-1, 3)), rotation_matrix ) return rotated_data def rotate_point_cloud_with_normal(batch_xyz_normal): """Randomly rotate XYZ, normal point cloud. Input: batch_xyz_normal: B,N,6, first three channels are XYZ, last 3 all normal Output: B,N,6, rotated XYZ, normal point cloud """ for k in range(batch_xyz_normal.shape[0]): rotation_angle = np.random.uniform() * 2 * np.pi cosval = np.cos(rotation_angle) sinval = np.sin(rotation_angle) rotation_matrix = np.array( [[cosval, 0, sinval], [0, 1, 0], [-sinval, 0, cosval]] ) shape_pc = batch_xyz_normal[k, :, 0:3] shape_normal = batch_xyz_normal[k, :, 3:6] batch_xyz_normal[k, :, 0:3] = np.dot( shape_pc.reshape((-1, 3)), rotation_matrix ) batch_xyz_normal[k, :, 3:6] = np.dot( shape_normal.reshape((-1, 3)), rotation_matrix ) return batch_xyz_normal def rotate_perturbation_point_cloud_with_normal( batch_data, angle_sigma=0.06, angle_clip=0.18 ): """Randomly perturb the point clouds by small rotations Input: BxNx6 array, original batch of point clouds and point normals Return: BxNx3 array, rotated batch of point clouds """ rotated_data = np.zeros(batch_data.shape, dtype=np.float32) for k in range(batch_data.shape[0]): angles = np.clip( angle_sigma * np.random.randn(3), -angle_clip, angle_clip ) Rx = np.array( [ [1, 0, 0], [0, np.cos(angles[0]), -np.sin(angles[0])], [0, np.sin(angles[0]), np.cos(angles[0])], ] ) Ry = np.array( [ [np.cos(angles[1]), 0, np.sin(angles[1])], [0, 1, 0], [-np.sin(angles[1]), 0, np.cos(angles[1])], ] ) Rz = np.array( [ [np.cos(angles[2]), -np.sin(angles[2]), 0], [np.sin(angles[2]), np.cos(angles[2]), 0], [0, 0, 1], ] ) R = np.dot(Rz, np.dot(Ry, Rx)) shape_pc = batch_data[k, :, 0:3] shape_normal = batch_data[k, :, 3:6] rotated_data[k, :, 0:3] = np.dot(shape_pc.reshape((-1, 3)), R) rotated_data[k, :, 3:6] = np.dot(shape_normal.reshape((-1, 3)), R) return rotated_data def rotate_point_cloud_by_angle(batch_data, rotation_angle): """Rotate the point cloud along up direction with certain angle. Input: BxNx3 array, original batch of point clouds Return: BxNx3 array, rotated batch of point clouds """ rotated_data = np.zeros(batch_data.shape, dtype=np.float32) for k in range(batch_data.shape[0]): # rotation_angle = np.random.uniform() * 2 * np.pi cosval = np.cos(rotation_angle) sinval = np.sin(rotation_angle) rotation_matrix = np.array( [[cosval, 0, sinval], [0, 1, 0], [-sinval, 0, cosval]] ) shape_pc = batch_data[k, :, 0:3] rotated_data[k, :, 0:3] = np.dot( shape_pc.reshape((-1, 3)), rotation_matrix ) return rotated_data def rotate_point_cloud_by_angle_with_normal(batch_data, rotation_angle): """Rotate the point cloud along up direction with certain angle. Input: BxNx6 array, original batch of point clouds with normal scalar, angle of rotation Return: BxNx6 array, rotated batch of point clouds iwth normal """ rotated_data = np.zeros(batch_data.shape, dtype=np.float32) for k in range(batch_data.shape[0]): # rotation_angle = np.random.uniform() * 2 * np.pi cosval = np.cos(rotation_angle) sinval = np.sin(rotation_angle) rotation_matrix = np.array( [[cosval, 0, sinval], [0, 1, 0], [-sinval, 0, cosval]] ) shape_pc = batch_data[k, :, 0:3] shape_normal = batch_data[k, :, 3:6] rotated_data[k, :, 0:3] = np.dot( shape_pc.reshape((-1, 3)), rotation_matrix ) rotated_data[k, :, 3:6] = np.dot( shape_normal.reshape((-1, 3)), rotation_matrix ) return rotated_data def rotate_perturbation_point_cloud( batch_data, angle_sigma=0.06, angle_clip=0.18 ): """Randomly perturb the point clouds by small rotations Input: BxNx3 array, original batch of point clouds Return: BxNx3 array, rotated batch of point clouds """ rotated_data = np.zeros(batch_data.shape, dtype=np.float32) for k in range(batch_data.shape[0]): angles = np.clip( angle_sigma * np.random.randn(3), -angle_clip, angle_clip ) Rx = np.array( [ [1, 0, 0], [0, np.cos(angles[0]), -np.sin(angles[0])], [0, np.sin(angles[0]), np.cos(angles[0])], ] ) Ry = np.array( [ [np.cos(angles[1]), 0, np.sin(angles[1])], [0, 1, 0], [-np.sin(angles[1]), 0, np.cos(angles[1])], ] ) Rz = np.array( [ [np.cos(angles[2]), -np.sin(angles[2]), 0], [np.sin(angles[2]), np.cos(angles[2]), 0], [0, 0, 1], ] ) R = np.dot(Rz, np.dot(Ry, Rx)) shape_pc = batch_data[k, ...] rotated_data[k, ...] = np.dot(shape_pc.reshape((-1, 3)), R) return rotated_data def jitter_point_cloud(batch_data, sigma=0.01, clip=0.05): """Randomly jitter points. jittering is per point. Input: BxNx3 array, original batch of point clouds Return: BxNx3 array, jittered batch of point clouds """ B, N, C = batch_data.shape assert clip > 0 jittered_data = np.clip(sigma * np.random.randn(B, N, C), -1 * clip, clip) jittered_data += batch_data return jittered_data def shift_point_cloud(batch_data, shift_range=0.1): """Randomly shift point cloud. Shift is per point cloud. Input: BxNx3 array, original batch of point clouds Return: BxNx3 array, shifted batch of point clouds """ B, N, C = batch_data.shape shifts = np.random.uniform(-shift_range, shift_range, (B, 3)) for batch_index in range(B): batch_data[batch_index, :, :] += shifts[batch_index, :] return batch_data def random_scale_point_cloud(batch_data, scale_low=0.8, scale_high=1.25): """Randomly scale the point cloud. Scale is per point cloud. Input: BxNx3 array, original batch of point clouds Return: BxNx3 array, scaled batch of point clouds """ B, N, C = batch_data.shape scales = np.random.uniform(scale_low, scale_high, B) for batch_index in range(B): batch_data[batch_index, :, :] *= scales[batch_index] return batch_data def random_point_dropout(batch_pc, max_dropout_ratio=0.875): """batch_pc: BxNx3""" for b in range(batch_pc.shape[0]): dropout_ratio = np.random.random() * max_dropout_ratio # 0~0.875 drop_idx = np.where( np.random.random((batch_pc.shape[1])) <= dropout_ratio )[0] if len(drop_idx) > 0: dropout_ratio = ( np.random.random() * max_dropout_ratio ) # 0~0.875 # not need batch_pc[b, drop_idx, :] = batch_pc[ b, 0, : ] # set to the first point return batch_pc ================================================ FILE: examples/pytorch/pointcloud/point_transformer/train_cls.py ================================================ import argparse import os import time from functools import partial import provider import torch import torch.nn as nn import tqdm from dgl.data.utils import download, get_download_dir from ModelNetDataLoader import ModelNetDataLoader from point_transformer import PointTransformerCLS from torch.utils.data import DataLoader torch.backends.cudnn.enabled = False parser = argparse.ArgumentParser() parser.add_argument("--dataset-path", type=str, default="") parser.add_argument("--load-model-path", type=str, default="") parser.add_argument("--save-model-path", type=str, default="") parser.add_argument("--num-epochs", type=int, default=200) parser.add_argument("--num-workers", type=int, default=8) parser.add_argument("--batch-size", type=int, default=16) parser.add_argument("--opt", type=str, default="adam") args = parser.parse_args() num_workers = args.num_workers batch_size = args.batch_size data_filename = "modelnet40_normal_resampled.zip" download_path = os.path.join(get_download_dir(), data_filename) local_path = args.dataset_path or os.path.join( get_download_dir(), "modelnet40_normal_resampled" ) if not os.path.exists(local_path): download( "https://shapenet.cs.stanford.edu/media/modelnet40_normal_resampled.zip", download_path, verify_ssl=False, ) from zipfile import ZipFile with ZipFile(download_path) as z: z.extractall(path=get_download_dir()) CustomDataLoader = partial( DataLoader, num_workers=num_workers, batch_size=batch_size, shuffle=True, drop_last=True, ) def train(net, opt, scheduler, train_loader, dev): net.train() total_loss = 0 num_batches = 0 total_correct = 0 count = 0 loss_f = nn.CrossEntropyLoss() start_time = time.time() with tqdm.tqdm(train_loader, ascii=True) as tq: for data, label in tq: data = data.data.numpy() data = provider.random_point_dropout(data) data[:, :, 0:3] = provider.random_scale_point_cloud(data[:, :, 0:3]) data[:, :, 0:3] = provider.jitter_point_cloud(data[:, :, 0:3]) data[:, :, 0:3] = provider.shift_point_cloud(data[:, :, 0:3]) data = torch.tensor(data) label = label[:, 0] num_examples = label.shape[0] data, label = data.to(dev), label.to(dev).squeeze().long() opt.zero_grad() logits = net(data) loss = loss_f(logits, label) loss.backward() opt.step() _, preds = logits.max(1) num_batches += 1 count += num_examples loss = loss.item() correct = (preds == label).sum().item() total_loss += loss total_correct += correct tq.set_postfix( { "AvgLoss": "%.5f" % (total_loss / num_batches), "AvgAcc": "%.5f" % (total_correct / count), } ) print( "[Train] AvgLoss: {:.5}, AvgAcc: {:.5}, Time: {:.5}s".format( total_loss / num_batches, total_correct / count, time.time() - start_time, ) ) scheduler.step() def evaluate(net, test_loader, dev): net.eval() total_correct = 0 count = 0 start_time = time.time() with torch.no_grad(): with tqdm.tqdm(test_loader, ascii=True) as tq: for data, label in tq: label = label[:, 0] num_examples = label.shape[0] data, label = data.to(dev), label.to(dev).squeeze().long() logits = net(data) _, preds = logits.max(1) correct = (preds == label).sum().item() total_correct += correct count += num_examples tq.set_postfix({"AvgAcc": "%.5f" % (total_correct / count)}) print( "[Test] AvgAcc: {:.5}, Time: {:.5}s".format( total_correct / count, time.time() - start_time ) ) return total_correct / count dev = torch.device("cuda" if torch.cuda.is_available() else "cpu") net = PointTransformerCLS(40, batch_size, feature_dim=6) net = net.to(dev) if args.load_model_path: net.load_state_dict( torch.load(args.load_model_path, weights_only=False, map_location=dev) ) if args.opt == "sgd": # The optimizer strategy described in paper: opt = torch.optim.SGD( net.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4 ) scheduler = torch.optim.lr_scheduler.MultiStepLR( opt, milestones=[120, 160], gamma=0.1 ) elif args.opt == "adam": # The optimizer strategy proposed by # https://github.com/qq456cvb/Point-Transformers: opt = torch.optim.Adam( net.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=1e-4, ) scheduler = torch.optim.lr_scheduler.StepLR(opt, step_size=50, gamma=0.3) train_dataset = ModelNetDataLoader(local_path, 1024, split="train") test_dataset = ModelNetDataLoader(local_path, 1024, split="test") train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=True, ) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, drop_last=True, ) best_test_acc = 0 for epoch in range(args.num_epochs): print("Epoch #{}: ".format(epoch)) train(net, opt, scheduler, train_loader, dev) if (epoch + 1) % 1 == 0: test_acc = evaluate(net, test_loader, dev) if test_acc > best_test_acc: best_test_acc = test_acc if args.save_model_path: torch.save(net.state_dict(), args.save_model_path) print("Current test acc: %.5f (best: %.5f)" % (test_acc, best_test_acc)) print() ================================================ FILE: examples/pytorch/pointcloud/point_transformer/train_partseg.py ================================================ import argparse import time from functools import partial import dgl import numpy as np import torch import torch.optim as optim import tqdm from point_transformer import PartSegLoss, PointTransformerSeg from ShapeNet import ShapeNet from torch.utils.data import DataLoader parser = argparse.ArgumentParser() parser.add_argument("--dataset-path", type=str, default="") parser.add_argument("--load-model-path", type=str, default="") parser.add_argument("--save-model-path", type=str, default="") parser.add_argument("--num-epochs", type=int, default=250) parser.add_argument("--num-workers", type=int, default=8) parser.add_argument("--batch-size", type=int, default=16) parser.add_argument("--tensorboard", action="store_true") parser.add_argument("--opt", type=str, default="adam") args = parser.parse_args() num_workers = args.num_workers batch_size = args.batch_size def collate(samples): graphs, cat = map(list, zip(*samples)) return dgl.batch(graphs), cat CustomDataLoader = partial( DataLoader, num_workers=num_workers, batch_size=batch_size, shuffle=True, drop_last=True, ) def train(net, opt, scheduler, train_loader, dev): category_list = sorted(list(shapenet.seg_classes.keys())) eye_mat = np.eye(16) net.train() total_loss = 0 num_batches = 0 total_correct = 0 count = 0 start = time.time() with tqdm.tqdm(train_loader, ascii=True) as tq: for data, label, cat in tq: num_examples = data.shape[0] data = data.to(dev, dtype=torch.float) label = label.to(dev, dtype=torch.long).view(-1) opt.zero_grad() cat_ind = [category_list.index(c) for c in cat] # An one-hot encoding for the object category cat_tensor = ( torch.tensor(eye_mat[cat_ind]) .to(dev, dtype=torch.float) .repeat(1, 2048) ) cat_tensor = cat_tensor.view(num_examples, -1, 16) logits = net(data, cat_tensor).permute(0, 2, 1) loss = L(logits, label) loss.backward() opt.step() _, preds = logits.max(1) count += num_examples * 2048 loss = loss.item() total_loss += loss num_batches += 1 correct = (preds.view(-1) == label).sum().item() total_correct += correct AvgLoss = total_loss / num_batches AvgAcc = total_correct / count tq.set_postfix( {"AvgLoss": "%.5f" % AvgLoss, "AvgAcc": "%.5f" % AvgAcc} ) scheduler.step() end = time.time() print( "[Train] AvgLoss: {:.5}, AvgAcc: {:.5}, Time: {:.5}s".format( total_loss / num_batches, total_correct / count, end - start ) ) return data, preds, AvgLoss, AvgAcc, end - start def mIoU(preds, label, cat, cat_miou, seg_classes): for i in range(preds.shape[0]): shape_iou = 0 n = len(seg_classes[cat[i]]) for cls in seg_classes[cat[i]]: pred_set = set(np.where(preds[i, :] == cls)[0]) label_set = set(np.where(label[i, :] == cls)[0]) union = len(pred_set.union(label_set)) inter = len(pred_set.intersection(label_set)) if union == 0: shape_iou += 1 else: shape_iou += inter / union shape_iou /= n cat_miou[cat[i]][0] += shape_iou cat_miou[cat[i]][1] += 1 return cat_miou def evaluate(net, test_loader, dev, per_cat_verbose=False): category_list = sorted(list(shapenet.seg_classes.keys())) eye_mat = np.eye(16) net.eval() cat_miou = {} for k in shapenet.seg_classes.keys(): cat_miou[k] = [0, 0] miou = 0 count = 0 per_cat_miou = 0 per_cat_count = 0 with torch.no_grad(): with tqdm.tqdm(test_loader, ascii=True) as tq: for data, label, cat in tq: num_examples = data.shape[0] data = data.to(dev, dtype=torch.float) label = label.to(dev, dtype=torch.long) cat_ind = [category_list.index(c) for c in cat] cat_tensor = ( torch.tensor(eye_mat[cat_ind]) .to(dev, dtype=torch.float) .repeat(1, 2048) ) cat_tensor = cat_tensor.view(num_examples, -1, 16) logits = net(data, cat_tensor).permute(0, 2, 1) _, preds = logits.max(1) cat_miou = mIoU( preds.cpu().numpy(), label.view(num_examples, -1).cpu().numpy(), cat, cat_miou, shapenet.seg_classes, ) for _, v in cat_miou.items(): if v[1] > 0: miou += v[0] count += v[1] per_cat_miou += v[0] / v[1] per_cat_count += 1 tq.set_postfix( { "mIoU": "%.5f" % (miou / count), "per Category mIoU": "%.5f" % (per_cat_miou / per_cat_count), } ) print( "[Test] mIoU: %.5f, per Category mIoU: %.5f" % (miou / count, per_cat_miou / per_cat_count) ) if per_cat_verbose: print("-" * 60) print("Per-Category mIoU:") for k, v in cat_miou.items(): if v[1] > 0: print("%s mIoU=%.5f" % (k, v[0] / v[1])) else: print("%s mIoU=%.5f" % (k, 1)) print("-" * 60) return miou / count, per_cat_miou / per_cat_count dev = torch.device("cuda" if torch.cuda.is_available() else "cpu") net = PointTransformerSeg(50, batch_size) net = net.to(dev) if args.load_model_path: net.load_state_dict( torch.load(args.load_model_path, weights_only=False, map_location=dev) ) if args.opt == "sgd": # The optimizer strategy described in paper: opt = torch.optim.SGD( net.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4 ) scheduler = torch.optim.lr_scheduler.MultiStepLR( opt, milestones=[120, 160], gamma=0.1 ) elif args.opt == "adam": # The optimizer strategy proposed by # https://github.com/qq456cvb/Point-Transformers: opt = torch.optim.Adam( net.parameters(), lr=1e-3, betas=(0.9, 0.999), eps=1e-08, weight_decay=1e-4, ) scheduler = torch.optim.lr_scheduler.StepLR(opt, step_size=50, gamma=0.3) L = PartSegLoss() shapenet = ShapeNet(2048, normal_channel=False) train_loader = CustomDataLoader(shapenet.trainval()) test_loader = CustomDataLoader(shapenet.test()) # Tensorboard if args.tensorboard: import torchvision from torch.utils.tensorboard import SummaryWriter from torchvision import datasets, transforms writer = SummaryWriter() # Select 50 distinct colors for different parts color_map = torch.tensor( [ [47, 79, 79], [139, 69, 19], [112, 128, 144], [85, 107, 47], [139, 0, 0], [128, 128, 0], [72, 61, 139], [0, 128, 0], [188, 143, 143], [60, 179, 113], [205, 133, 63], [0, 139, 139], [70, 130, 180], [205, 92, 92], [154, 205, 50], [0, 0, 139], [50, 205, 50], [250, 250, 250], [218, 165, 32], [139, 0, 139], [10, 10, 10], [176, 48, 96], [72, 209, 204], [153, 50, 204], [255, 69, 0], [255, 145, 0], [0, 0, 205], [255, 255, 0], [0, 255, 0], [233, 150, 122], [220, 20, 60], [0, 191, 255], [160, 32, 240], [192, 192, 192], [173, 255, 47], [218, 112, 214], [216, 191, 216], [255, 127, 80], [255, 0, 255], [100, 149, 237], [128, 128, 128], [221, 160, 221], [144, 238, 144], [123, 104, 238], [255, 160, 122], [175, 238, 238], [238, 130, 238], [127, 255, 212], [255, 218, 185], [255, 105, 180], ] ) # paint each point according to its pred def paint(batched_points): B, N = batched_points.shape colored = color_map[batched_points].squeeze(2) return colored best_test_miou = 0 best_test_per_cat_miou = 0 for epoch in range(args.num_epochs): print("Epoch #{}: ".format(epoch)) data, preds, AvgLoss, AvgAcc, training_time = train( net, opt, scheduler, train_loader, dev ) if (epoch + 1) % 5 == 0 or epoch == 0: test_miou, test_per_cat_miou = evaluate(net, test_loader, dev, True) if test_miou > best_test_miou: best_test_miou = test_miou best_test_per_cat_miou = test_per_cat_miou if args.save_model_path: torch.save(net.state_dict(), args.save_model_path) print( "Current test mIoU: %.5f (best: %.5f), per-Category mIoU: %.5f (best: %.5f)" % ( test_miou, best_test_miou, test_per_cat_miou, best_test_per_cat_miou, ) ) # Tensorboard if args.tensorboard: colored = paint(preds) writer.add_mesh( "data", vertices=data, colors=colored, global_step=epoch ) writer.add_scalar( "training time for one epoch", training_time, global_step=epoch ) writer.add_scalar("AvgLoss", AvgLoss, global_step=epoch) writer.add_scalar("AvgAcc", AvgAcc, global_step=epoch) if (epoch + 1) % 5 == 0: writer.add_scalar("test mIoU", test_miou, global_step=epoch) writer.add_scalar( "best test mIoU", best_test_miou, global_step=epoch ) print() ================================================ FILE: examples/pytorch/pointcloud/pointnet/ModelNetDataLoader.py ================================================ import os import warnings import numpy as np from torch.utils.data import Dataset warnings.filterwarnings("ignore") def pc_normalize(pc): centroid = np.mean(pc, axis=0) pc = pc - centroid m = np.max(np.sqrt(np.sum(pc**2, axis=1))) pc = pc / m return pc def farthest_point_sample(point, npoint): """ Farthest point sampler works as follows: 1. Initialize the sample set S with a random point 2. Pick point P not in S, which maximizes the distance d(P, S) 3. Repeat step 2 until |S| = npoint Input: xyz: pointcloud data, [N, D] npoint: number of samples Return: centroids: sampled pointcloud index, [npoint, D] """ N, D = point.shape xyz = point[:, :3] centroids = np.zeros((npoint,)) distance = np.ones((N,)) * 1e10 farthest = np.random.randint(0, N) for i in range(npoint): centroids[i] = farthest centroid = xyz[farthest, :] dist = np.sum((xyz - centroid) ** 2, -1) mask = dist < distance distance[mask] = dist[mask] farthest = np.argmax(distance, -1) point = point[centroids.astype(np.int32)] return point class ModelNetDataLoader(Dataset): def __init__( self, root, npoint=1024, split="train", fps=False, normal_channel=True, cache_size=15000, ): """ Input: root: the root path to the local data files npoint: number of points from each cloud split: which split of the data, 'train' or 'test' fps: whether to sample points with farthest point sampler normal_channel: whether to use additional channel cache_size: the cache size of in-memory point clouds """ self.root = root self.npoints = npoint self.fps = fps self.catfile = os.path.join(self.root, "modelnet40_shape_names.txt") self.cat = [line.rstrip() for line in open(self.catfile)] self.classes = dict(zip(self.cat, range(len(self.cat)))) self.normal_channel = normal_channel shape_ids = {} shape_ids["train"] = [ line.rstrip() for line in open(os.path.join(self.root, "modelnet40_train.txt")) ] shape_ids["test"] = [ line.rstrip() for line in open(os.path.join(self.root, "modelnet40_test.txt")) ] assert split == "train" or split == "test" shape_names = ["_".join(x.split("_")[0:-1]) for x in shape_ids[split]] # list of (shape_name, shape_txt_file_path) tuple self.datapath = [ ( shape_names[i], os.path.join(self.root, shape_names[i], shape_ids[split][i]) + ".txt", ) for i in range(len(shape_ids[split])) ] print("The size of %s data is %d" % (split, len(self.datapath))) self.cache_size = cache_size self.cache = {} def __len__(self): return len(self.datapath) def _get_item(self, index): if index in self.cache: point_set, cls = self.cache[index] else: fn = self.datapath[index] cls = self.classes[self.datapath[index][0]] cls = np.array([cls]).astype(np.int32) point_set = np.loadtxt(fn[1], delimiter=",").astype(np.float32) if self.fps: point_set = farthest_point_sample(point_set, self.npoints) else: point_set = point_set[0 : self.npoints, :] point_set[:, 0:3] = pc_normalize(point_set[:, 0:3]) if not self.normal_channel: point_set = point_set[:, 0:3] if len(self.cache) < self.cache_size: self.cache[index] = (point_set, cls) return point_set, cls def __getitem__(self, index): return self._get_item(index) ================================================ FILE: examples/pytorch/pointcloud/pointnet/README.md ================================================ PointNet and PointNet++ for Point Cloud Classification and Segmentation ==== This is a reproduction of the papers - [PointNet: Deep Learning on Point Sets for 3D Classification and Segmentation](https://arxiv.org/abs/1612.00593). - [PointNet++: Deep Hierarchical Feature Learning on Point Sets in a Metric Space](https://arxiv.org/abs/1706.02413). # Performance ## Classification | Model | Dataset | Metric | Score - PyTorch | Score - DGL | Time(s) - PyTorch | Time(s) - DGL | |-----------------|------------|----------|------------------|-------------|-------------------|---------------| | PointNet | ModelNet40 | Accuracy | 89.2(Official) | 89.3 | 181.8 | 95.0 | | PointNet++(SSG) | ModelNet40 | Accuracy | 92.4 | 93.3 | 182.6 | 133.7 | | PointNet++(MSG) | ModelNet40 | Accuracy | 92.8 | 93.3 | 383.6 | 240.5 | ## Part Segmentation | Model | Dataset | Metric | Score - PyTorch | Score - DGL | Time(s) - PyTorch | Time(s) - DGL | |-----------------|------------|----------|-----------------|-------------|-------------------|---------------| | PointNet | ShapeNet | mIoU | 84.3 | 83.6 | 251.6 | 234.0 | | PointNet++(SSG) | ShapeNet | mIoU | 84.9 | 84.5 | 361.7 | 240.1 | | PointNet++(MSG) | ShapeNet | mIoU | 85.4 | 84.6 | 817.3 | 821.8 | + Score - PyTorch are collected from [this repo](https://github.com/yanx27/Pointnet_Pointnet2_pytorch). + Time(s) are the average training time per epoch, measured on EC2 g4dn.4xlarge instance w/ Tesla T4 GPU. # How to Run For point cloud classification, run with ```python python train_cls.py ``` For point cloud part-segmentation, run with ```python python train_partseg.py ``` ## To Visualize Part Segmentation in Tensorboard ![Screenshot](vis.png) First ``pip install tensorboard`` then run ```python python train_partseg.py --tensorboard ``` To display in Tensorboard, run ``tensorboard --logdir=runs`` ================================================ FILE: examples/pytorch/pointcloud/pointnet/ShapeNet.py ================================================ import json import os from zipfile import ZipFile import dgl import numpy as np import tqdm from dgl.data.utils import download, get_download_dir from scipy.sparse import csr_matrix from torch.utils.data import Dataset class ShapeNet(object): def __init__(self, num_points=2048, normal_channel=True): self.num_points = num_points self.normal_channel = normal_channel SHAPENET_DOWNLOAD_URL = "https://shapenet.cs.stanford.edu/media/shapenetcore_partanno_segmentation_benchmark_v0_normal.zip" download_path = get_download_dir() data_filename = ( "shapenetcore_partanno_segmentation_benchmark_v0_normal.zip" ) data_path = os.path.join( download_path, "shapenetcore_partanno_segmentation_benchmark_v0_normal", ) if not os.path.exists(data_path): local_path = os.path.join(download_path, data_filename) if not os.path.exists(local_path): download(SHAPENET_DOWNLOAD_URL, local_path, verify_ssl=False) with ZipFile(local_path) as z: z.extractall(path=download_path) synset_file = "synsetoffset2category.txt" with open(os.path.join(data_path, synset_file)) as f: synset = [t.split("\n")[0].split("\t") for t in f.readlines()] self.synset_dict = {} for syn in synset: self.synset_dict[syn[1]] = syn[0] self.seg_classes = { "Airplane": [0, 1, 2, 3], "Bag": [4, 5], "Cap": [6, 7], "Car": [8, 9, 10, 11], "Chair": [12, 13, 14, 15], "Earphone": [16, 17, 18], "Guitar": [19, 20, 21], "Knife": [22, 23], "Lamp": [24, 25, 26, 27], "Laptop": [28, 29], "Motorbike": [30, 31, 32, 33, 34, 35], "Mug": [36, 37], "Pistol": [38, 39, 40], "Rocket": [41, 42, 43], "Skateboard": [44, 45, 46], "Table": [47, 48, 49], } train_split_json = "shuffled_train_file_list.json" val_split_json = "shuffled_val_file_list.json" test_split_json = "shuffled_test_file_list.json" split_path = os.path.join(data_path, "train_test_split") with open(os.path.join(split_path, train_split_json)) as f: tmp = f.read() self.train_file_list = [ os.path.join(data_path, t.replace("shape_data/", "") + ".txt") for t in json.loads(tmp) ] with open(os.path.join(split_path, val_split_json)) as f: tmp = f.read() self.val_file_list = [ os.path.join(data_path, t.replace("shape_data/", "") + ".txt") for t in json.loads(tmp) ] with open(os.path.join(split_path, test_split_json)) as f: tmp = f.read() self.test_file_list = [ os.path.join(data_path, t.replace("shape_data/", "") + ".txt") for t in json.loads(tmp) ] def train(self): return ShapeNetDataset( self, "train", self.num_points, self.normal_channel ) def valid(self): return ShapeNetDataset( self, "valid", self.num_points, self.normal_channel ) def trainval(self): return ShapeNetDataset( self, "trainval", self.num_points, self.normal_channel ) def test(self): return ShapeNetDataset( self, "test", self.num_points, self.normal_channel ) class ShapeNetDataset(Dataset): def __init__(self, shapenet, mode, num_points, normal_channel=True): super(ShapeNetDataset, self).__init__() self.mode = mode self.num_points = num_points if not normal_channel: self.dim = 3 else: self.dim = 6 if mode == "train": self.file_list = shapenet.train_file_list elif mode == "valid": self.file_list = shapenet.val_file_list elif mode == "test": self.file_list = shapenet.test_file_list elif mode == "trainval": self.file_list = shapenet.train_file_list + shapenet.val_file_list else: raise "Not supported `mode`" data_list = [] label_list = [] category_list = [] print("Loading data from split " + self.mode) for fn in tqdm.tqdm(self.file_list, ascii=True): with open(fn) as f: data = np.array( [t.split("\n")[0].split(" ") for t in f.readlines()] ).astype(float) data_list.append(data[:, 0 : self.dim]) label_list.append(data[:, 6].astype(int)) category_list.append(shapenet.synset_dict[fn.split("/")[-2]]) self.data = data_list self.label = label_list self.category = category_list def translate(self, x, scale=(2 / 3, 3 / 2), shift=(-0.2, 0.2), size=3): xyz1 = np.random.uniform(low=scale[0], high=scale[1], size=[size]) xyz2 = np.random.uniform(low=shift[0], high=shift[1], size=[size]) x = np.add(np.multiply(x, xyz1), xyz2).astype("float32") return x def __len__(self): return len(self.data) def __getitem__(self, i): inds = np.random.choice( self.data[i].shape[0], self.num_points, replace=True ) x = self.data[i][inds, : self.dim] y = self.label[i][inds] cat = self.category[i] if self.mode == "train": x = self.translate(x, size=self.dim) x = x.astype(float) y = y.astype(int) return x, y, cat ================================================ FILE: examples/pytorch/pointcloud/pointnet/pointnet2.py ================================================ import dgl import dgl.function as fn import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from dgl.geometry import ( farthest_point_sampler, ) # dgl.geometry.pytorch -> dgl.geometry from torch.autograd import Variable """ Part of the code are adapted from https://github.com/yanx27/Pointnet_Pointnet2_pytorch """ def square_distance(src, dst): """ Adapted from https://github.com/yanx27/Pointnet_Pointnet2_pytorch """ B, N, _ = src.shape _, M, _ = dst.shape dist = -2 * torch.matmul(src, dst.permute(0, 2, 1)) dist += torch.sum(src**2, -1).view(B, N, 1) dist += torch.sum(dst**2, -1).view(B, 1, M) return dist def index_points(points, idx): """ Adapted from https://github.com/yanx27/Pointnet_Pointnet2_pytorch """ device = points.device B = points.shape[0] view_shape = list(idx.shape) view_shape[1:] = [1] * (len(view_shape) - 1) repeat_shape = list(idx.shape) repeat_shape[0] = 1 batch_indices = ( torch.arange(B, dtype=torch.long) .to(device) .view(view_shape) .repeat(repeat_shape) ) new_points = points[batch_indices, idx, :] return new_points class FixedRadiusNearNeighbors(nn.Module): """ Ball Query - Find the neighbors with-in a fixed radius """ def __init__(self, radius, n_neighbor): super(FixedRadiusNearNeighbors, self).__init__() self.radius = radius self.n_neighbor = n_neighbor def forward(self, pos, centroids): """ Adapted from https://github.com/yanx27/Pointnet_Pointnet2_pytorch """ device = pos.device B, N, _ = pos.shape center_pos = index_points(pos, centroids) _, S, _ = center_pos.shape group_idx = ( torch.arange(N, dtype=torch.long) .to(device) .view(1, 1, N) .repeat([B, S, 1]) ) sqrdists = square_distance(center_pos, pos) group_idx[sqrdists > self.radius**2] = N group_idx = group_idx.sort(dim=-1)[0][:, :, : self.n_neighbor] group_first = ( group_idx[:, :, 0].view(B, S, 1).repeat([1, 1, self.n_neighbor]) ) mask = group_idx == N group_idx[mask] = group_first[mask] return group_idx class FixedRadiusNNGraph(nn.Module): """ Build NN graph """ def __init__(self, radius, n_neighbor): super(FixedRadiusNNGraph, self).__init__() self.radius = radius self.n_neighbor = n_neighbor self.frnn = FixedRadiusNearNeighbors(radius, n_neighbor) def forward(self, pos, centroids, feat=None): dev = pos.device group_idx = self.frnn(pos, centroids) B, N, _ = pos.shape glist = [] for i in range(B): center = torch.zeros((N)).to(dev) center[centroids[i]] = 1 src = group_idx[i].contiguous().view(-1) dst = centroids[i].view(-1, 1).repeat(1, self.n_neighbor).view(-1) unified = torch.cat([src, dst]) uniq, inv_idx = torch.unique(unified, return_inverse=True) src_idx = inv_idx[: src.shape[0]] dst_idx = inv_idx[src.shape[0] :] g = dgl.graph((src_idx, dst_idx)) g.ndata["pos"] = pos[i][uniq] g.ndata["center"] = center[uniq] if feat is not None: g.ndata["feat"] = feat[i][uniq] glist.append(g) bg = dgl.batch(glist) return bg class RelativePositionMessage(nn.Module): """ Compute the input feature from neighbors """ def __init__(self, n_neighbor): super(RelativePositionMessage, self).__init__() self.n_neighbor = n_neighbor def forward(self, edges): pos = edges.src["pos"] - edges.dst["pos"] if "feat" in edges.src: res = torch.cat([pos, edges.src["feat"]], 1) else: res = pos return {"agg_feat": res} class PointNetConv(nn.Module): """ Feature aggregation """ def __init__(self, sizes, batch_size): super(PointNetConv, self).__init__() self.batch_size = batch_size self.conv = nn.ModuleList() self.bn = nn.ModuleList() for i in range(1, len(sizes)): self.conv.append(nn.Conv2d(sizes[i - 1], sizes[i], 1)) self.bn.append(nn.BatchNorm2d(sizes[i])) def forward(self, nodes): shape = nodes.mailbox["agg_feat"].shape h = ( nodes.mailbox["agg_feat"] .view(self.batch_size, -1, shape[1], shape[2]) .permute(0, 3, 2, 1) ) for conv, bn in zip(self.conv, self.bn): h = conv(h) h = bn(h) h = F.relu(h) h = torch.max(h, 2)[0] feat_dim = h.shape[1] h = h.permute(0, 2, 1).reshape(-1, feat_dim) return {"new_feat": h} def group_all(self, pos, feat): """ Feature aggregation and pooling for the non-sampling layer """ if feat is not None: h = torch.cat([pos, feat], 2) else: h = pos B, N, D = h.shape _, _, C = pos.shape new_pos = torch.zeros(B, 1, C) h = h.permute(0, 2, 1).view(B, -1, N, 1) for conv, bn in zip(self.conv, self.bn): h = conv(h) h = bn(h) h = F.relu(h) h = torch.max(h[:, :, :, 0], 2)[0] # [B,D] return new_pos, h class SAModule(nn.Module): """ The Set Abstraction Layer """ def __init__( self, npoints, batch_size, radius, mlp_sizes, n_neighbor=64, group_all=False, ): super(SAModule, self).__init__() self.group_all = group_all if not group_all: self.npoints = npoints self.frnn_graph = FixedRadiusNNGraph(radius, n_neighbor) self.message = RelativePositionMessage(n_neighbor) self.conv = PointNetConv(mlp_sizes, batch_size) self.batch_size = batch_size def forward(self, pos, feat): if self.group_all: return self.conv.group_all(pos, feat) centroids = farthest_point_sampler(pos, self.npoints) g = self.frnn_graph(pos, centroids, feat) g.update_all(self.message, self.conv) mask = g.ndata["center"] == 1 pos_dim = g.ndata["pos"].shape[-1] feat_dim = g.ndata["new_feat"].shape[-1] pos_res = g.ndata["pos"][mask].view(self.batch_size, -1, pos_dim) feat_res = g.ndata["new_feat"][mask].view(self.batch_size, -1, feat_dim) return pos_res, feat_res class SAMSGModule(nn.Module): """ The Set Abstraction Multi-Scale grouping Layer """ def __init__( self, npoints, batch_size, radius_list, n_neighbor_list, mlp_sizes_list ): super(SAMSGModule, self).__init__() self.batch_size = batch_size self.group_size = len(radius_list) self.npoints = npoints self.frnn_graph_list = nn.ModuleList() self.message_list = nn.ModuleList() self.conv_list = nn.ModuleList() for i in range(self.group_size): self.frnn_graph_list.append( FixedRadiusNNGraph(radius_list[i], n_neighbor_list[i]) ) self.message_list.append( RelativePositionMessage(n_neighbor_list[i]) ) self.conv_list.append(PointNetConv(mlp_sizes_list[i], batch_size)) def forward(self, pos, feat): centroids = farthest_point_sampler(pos, self.npoints) feat_res_list = [] for i in range(self.group_size): g = self.frnn_graph_list[i](pos, centroids, feat) g.update_all(self.message_list[i], self.conv_list[i]) mask = g.ndata["center"] == 1 pos_dim = g.ndata["pos"].shape[-1] feat_dim = g.ndata["new_feat"].shape[-1] if i == 0: pos_res = g.ndata["pos"][mask].view( self.batch_size, -1, pos_dim ) feat_res = g.ndata["new_feat"][mask].view( self.batch_size, -1, feat_dim ) feat_res_list.append(feat_res) feat_res = torch.cat(feat_res_list, 2) return pos_res, feat_res class PointNet2FP(nn.Module): """ The Feature Propagation Layer """ def __init__(self, input_dims, sizes): super(PointNet2FP, self).__init__() self.convs = nn.ModuleList() self.bns = nn.ModuleList() sizes = [input_dims] + sizes for i in range(1, len(sizes)): self.convs.append(nn.Conv1d(sizes[i - 1], sizes[i], 1)) self.bns.append(nn.BatchNorm1d(sizes[i])) def forward(self, x1, x2, feat1, feat2): """ Adapted from https://github.com/yanx27/Pointnet_Pointnet2_pytorch Input: x1: input points position data, [B, N, C] x2: sampled input points position data, [B, S, C] feat1: input points data, [B, N, D] feat2: input points data, [B, S, D] Return: new_feat: upsampled points data, [B, D', N] """ B, N, C = x1.shape _, S, _ = x2.shape if S == 1: interpolated_feat = feat2.repeat(1, N, 1) else: dists = square_distance(x1, x2) dists, idx = dists.sort(dim=-1) dists, idx = dists[:, :, :3], idx[:, :, :3] # [B, N, 3] dist_recip = 1.0 / (dists + 1e-8) norm = torch.sum(dist_recip, dim=2, keepdim=True) weight = dist_recip / norm interpolated_feat = torch.sum( index_points(feat2, idx) * weight.view(B, N, 3, 1), dim=2 ) if feat1 is not None: new_feat = torch.cat([feat1, interpolated_feat], dim=-1) else: new_feat = interpolated_feat new_feat = new_feat.permute(0, 2, 1) # [B, D, S] for i, conv in enumerate(self.convs): bn = self.bns[i] new_feat = F.relu(bn(conv(new_feat))) return new_feat class PointNet2SSGCls(nn.Module): def __init__( self, output_classes, batch_size, input_dims=3, dropout_prob=0.4 ): super(PointNet2SSGCls, self).__init__() self.input_dims = input_dims self.sa_module1 = SAModule( 512, batch_size, 0.2, [input_dims, 64, 64, 128] ) self.sa_module2 = SAModule( 128, batch_size, 0.4, [128 + 3, 128, 128, 256] ) self.sa_module3 = SAModule( None, batch_size, None, [256 + 3, 256, 512, 1024], group_all=True ) self.mlp1 = nn.Linear(1024, 512) self.bn1 = nn.BatchNorm1d(512) self.drop1 = nn.Dropout(dropout_prob) self.mlp2 = nn.Linear(512, 256) self.bn2 = nn.BatchNorm1d(256) self.drop2 = nn.Dropout(dropout_prob) self.mlp_out = nn.Linear(256, output_classes) def forward(self, x): if x.shape[-1] > 3: pos = x[:, :, :3] feat = x[:, :, 3:] else: pos = x feat = None pos, feat = self.sa_module1(pos, feat) pos, feat = self.sa_module2(pos, feat) _, h = self.sa_module3(pos, feat) h = self.mlp1(h) h = self.bn1(h) h = F.relu(h) h = self.drop1(h) h = self.mlp2(h) h = self.bn2(h) h = F.relu(h) h = self.drop2(h) out = self.mlp_out(h) return out class PointNet2MSGCls(nn.Module): def __init__( self, output_classes, batch_size, input_dims=3, dropout_prob=0.4 ): super(PointNet2MSGCls, self).__init__() self.input_dims = input_dims self.sa_msg_module1 = SAMSGModule( 512, batch_size, [0.1, 0.2, 0.4], [16, 32, 128], [ [input_dims, 32, 32, 64], [input_dims, 64, 64, 128], [input_dims, 64, 96, 128], ], ) self.sa_msg_module2 = SAMSGModule( 128, batch_size, [0.2, 0.4, 0.8], [32, 64, 128], [ [320 + 3, 64, 64, 128], [320 + 3, 128, 128, 256], [320 + 3, 128, 128, 256], ], ) self.sa_module3 = SAModule( None, batch_size, None, [640 + 3, 256, 512, 1024], group_all=True ) self.mlp1 = nn.Linear(1024, 512) self.bn1 = nn.BatchNorm1d(512) self.drop1 = nn.Dropout(dropout_prob) self.mlp2 = nn.Linear(512, 256) self.bn2 = nn.BatchNorm1d(256) self.drop2 = nn.Dropout(dropout_prob) self.mlp_out = nn.Linear(256, output_classes) def forward(self, x): if x.shape[-1] > 3: pos = x[:, :, :3] feat = x[:, :, 3:] else: pos = x feat = None pos, feat = self.sa_msg_module1(pos, feat) pos, feat = self.sa_msg_module2(pos, feat) _, h = self.sa_module3(pos, feat) h = self.mlp1(h) h = self.bn1(h) h = F.relu(h) h = self.drop1(h) h = self.mlp2(h) h = self.bn2(h) h = F.relu(h) h = self.drop2(h) out = self.mlp_out(h) return out ================================================ FILE: examples/pytorch/pointcloud/pointnet/pointnet2_partseg.py ================================================ import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from pointnet2 import PointNet2FP, SAModule, SAMSGModule from torch.autograd import Variable class PointNet2SSGPartSeg(nn.Module): def __init__(self, output_classes, batch_size, input_dims=6): super(PointNet2SSGPartSeg, self).__init__() # if normal_channel == true, input_dims = 6+3 self.input_dims = input_dims self.sa_module1 = SAModule( 512, batch_size, 0.2, [input_dims, 64, 64, 128], n_neighbor=32 ) self.sa_module2 = SAModule( 128, batch_size, 0.4, [128 + 3, 128, 128, 256] ) self.sa_module3 = SAModule( None, batch_size, None, [256 + 3, 256, 512, 1024], group_all=True ) self.fp3 = PointNet2FP(1280, [256, 256]) self.fp2 = PointNet2FP(384, [256, 128]) # if normal_channel == true, 128+16+6+3 self.fp1 = PointNet2FP(128 + 16 + 6, [128, 128, 128]) self.conv1 = nn.Conv1d(128, 128, 1) self.bn1 = nn.BatchNorm1d(128) self.drop1 = nn.Dropout(0.5) self.conv2 = nn.Conv1d(128, output_classes, 1) def forward(self, x, cat_vec=None): if x.shape[-1] > 3: l0_pos = x[:, :, :3] l0_feat = x else: l0_pos = x l0_feat = x # Set Abstraction layers l1_pos, l1_feat = self.sa_module1(l0_pos, l0_feat) # l1_feat: [B, N, D] l2_pos, l2_feat = self.sa_module2(l1_pos, l1_feat) l3_pos, l3_feat = self.sa_module3(l2_pos, l2_feat) # [B, N, C], [B, D] # Feature Propagation layers l2_feat = self.fp3( l2_pos, l3_pos, l2_feat, l3_feat.unsqueeze(1) ) # l2_feat: [B, D, N] l1_feat = self.fp2(l1_pos, l2_pos, l1_feat, l2_feat.permute(0, 2, 1)) l0_feat = torch.cat([cat_vec.permute(0, 2, 1), l0_pos, l0_feat], 2) l0_feat = self.fp1(l0_pos, l1_pos, l0_feat, l1_feat.permute(0, 2, 1)) # FC layers feat = F.relu(self.bn1(self.conv1(l0_feat))) out = self.drop1(feat) out = self.conv2(out) # [B, output_classes, N] return out class PointNet2MSGPartSeg(nn.Module): def __init__(self, output_classes, batch_size, input_dims=6): super(PointNet2MSGPartSeg, self).__init__() self.sa_msg_module1 = SAMSGModule( 512, batch_size, [0.1, 0.2, 0.4], [32, 64, 128], [ [input_dims, 32, 32, 64], [input_dims, 64, 64, 128], [input_dims, 64, 96, 128], ], ) self.sa_msg_module2 = SAMSGModule( 128, batch_size, [0.4, 0.8], [64, 128], [ [128 + 128 + 64 + 3, 128, 128, 256], [128 + 128 + 64 + 3, 128, 196, 256], ], ) self.sa_module3 = SAModule( None, batch_size, None, [512 + 3, 256, 512, 1024], group_all=True ) self.fp3 = PointNet2FP(1536, [256, 256]) self.fp2 = PointNet2FP(576, [256, 128]) # if normal_channel == true, 150 + 3 self.fp1 = PointNet2FP(150, [128, 128]) self.conv1 = nn.Conv1d(128, 128, 1) self.bn1 = nn.BatchNorm1d(128) self.drop1 = nn.Dropout(0.5) self.conv2 = nn.Conv1d(128, output_classes, 1) def forward(self, x, cat_vec=None): if x.shape[-1] > 3: l0_pos = x[:, :, :3] l0_feat = x else: l0_pos = x l0_feat = x # Set Abstraction layers l1_pos, l1_feat = self.sa_msg_module1(l0_pos, l0_feat) l2_pos, l2_feat = self.sa_msg_module2(l1_pos, l1_feat) l3_pos, l3_feat = self.sa_module3(l2_pos, l2_feat) # Feature Propagation layers l2_feat = self.fp3(l2_pos, l3_pos, l2_feat, l3_feat.unsqueeze(1)) l1_feat = self.fp2(l1_pos, l2_pos, l1_feat, l2_feat.permute(0, 2, 1)) l0_feat = torch.cat([cat_vec.permute(0, 2, 1), l0_pos, l0_feat], 2) l0_feat = self.fp1(l0_pos, l1_pos, l0_feat, l1_feat.permute(0, 2, 1)) # FC layers feat = F.relu(self.bn1(self.conv1(l0_feat))) out = self.drop1(feat) out = self.conv2(out) return out ================================================ FILE: examples/pytorch/pointcloud/pointnet/pointnet_cls.py ================================================ import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from torch.autograd import Variable class PointNetCls(nn.Module): def __init__( self, output_classes, input_dims=3, conv1_dim=64, dropout_prob=0.5, use_transform=True, ): super(PointNetCls, self).__init__() self.input_dims = input_dims self.conv1 = nn.ModuleList() self.conv1.append(nn.Conv1d(input_dims, conv1_dim, 1)) self.conv1.append(nn.Conv1d(conv1_dim, conv1_dim, 1)) self.conv1.append(nn.Conv1d(conv1_dim, conv1_dim, 1)) self.bn1 = nn.ModuleList() self.bn1.append(nn.BatchNorm1d(conv1_dim)) self.bn1.append(nn.BatchNorm1d(conv1_dim)) self.bn1.append(nn.BatchNorm1d(conv1_dim)) self.conv2 = nn.ModuleList() self.conv2.append(nn.Conv1d(conv1_dim, conv1_dim * 2, 1)) self.conv2.append(nn.Conv1d(conv1_dim * 2, conv1_dim * 16, 1)) self.bn2 = nn.ModuleList() self.bn2.append(nn.BatchNorm1d(conv1_dim * 2)) self.bn2.append(nn.BatchNorm1d(conv1_dim * 16)) self.maxpool = nn.MaxPool1d(conv1_dim * 16) self.pool_feat_len = conv1_dim * 16 self.mlp3 = nn.ModuleList() self.mlp3.append(nn.Linear(conv1_dim * 16, conv1_dim * 8)) self.mlp3.append(nn.Linear(conv1_dim * 8, conv1_dim * 4)) self.bn3 = nn.ModuleList() self.bn3.append(nn.BatchNorm1d(conv1_dim * 8)) self.bn3.append(nn.BatchNorm1d(conv1_dim * 4)) self.dropout = nn.Dropout(0.3) self.mlp_out = nn.Linear(conv1_dim * 4, output_classes) self.use_transform = use_transform if use_transform: self.transform1 = TransformNet(input_dims) self.trans_bn1 = nn.BatchNorm1d(input_dims) self.transform2 = TransformNet(conv1_dim) self.trans_bn2 = nn.BatchNorm1d(conv1_dim) def forward(self, x): batch_size = x.shape[0] h = x.permute(0, 2, 1) if self.use_transform: trans = self.transform1(h) h = h.transpose(2, 1) h = torch.bmm(h, trans) h = h.transpose(2, 1) h = F.relu(self.trans_bn1(h)) for conv, bn in zip(self.conv1, self.bn1): h = conv(h) h = bn(h) h = F.relu(h) if self.use_transform: trans = self.transform2(h) h = h.transpose(2, 1) h = torch.bmm(h, trans) h = h.transpose(2, 1) h = F.relu(self.trans_bn2(h)) for conv, bn in zip(self.conv2, self.bn2): h = conv(h) h = bn(h) h = F.relu(h) h = self.maxpool(h).view(-1, self.pool_feat_len) for mlp, bn in zip(self.mlp3, self.bn3): h = mlp(h) h = bn(h) h = F.relu(h) h = self.dropout(h) out = self.mlp_out(h) return out class TransformNet(nn.Module): def __init__(self, input_dims=3, conv1_dim=64): super(TransformNet, self).__init__() self.conv = nn.ModuleList() self.conv.append(nn.Conv1d(input_dims, conv1_dim, 1)) self.conv.append(nn.Conv1d(conv1_dim, conv1_dim * 2, 1)) self.conv.append(nn.Conv1d(conv1_dim * 2, conv1_dim * 16, 1)) self.bn = nn.ModuleList() self.bn.append(nn.BatchNorm1d(conv1_dim)) self.bn.append(nn.BatchNorm1d(conv1_dim * 2)) self.bn.append(nn.BatchNorm1d(conv1_dim * 16)) self.maxpool = nn.MaxPool1d(conv1_dim * 16) self.pool_feat_len = conv1_dim * 16 self.mlp2 = nn.ModuleList() self.mlp2.append(nn.Linear(conv1_dim * 16, conv1_dim * 8)) self.mlp2.append(nn.Linear(conv1_dim * 8, conv1_dim * 4)) self.bn2 = nn.ModuleList() self.bn2.append(nn.BatchNorm1d(conv1_dim * 8)) self.bn2.append(nn.BatchNorm1d(conv1_dim * 4)) self.input_dims = input_dims self.mlp_out = nn.Linear(conv1_dim * 4, input_dims * input_dims) def forward(self, h): batch_size = h.shape[0] for conv, bn in zip(self.conv, self.bn): h = conv(h) h = bn(h) h = F.relu(h) h = self.maxpool(h).view(-1, self.pool_feat_len) for mlp, bn in zip(self.mlp2, self.bn2): h = mlp(h) h = bn(h) h = F.relu(h) out = self.mlp_out(h) iden = Variable( torch.from_numpy( np.eye(self.input_dims).flatten().astype(np.float32) ) ) iden = iden.view(1, self.input_dims * self.input_dims).repeat( batch_size, 1 ) if out.is_cuda: iden = iden.cuda() out = out + iden out = out.view(-1, self.input_dims, self.input_dims) return out ================================================ FILE: examples/pytorch/pointcloud/pointnet/pointnet_partseg.py ================================================ import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from torch.autograd import Variable class PointNetPartSeg(nn.Module): def __init__( self, output_classes, input_dims=3, num_points=2048, use_transform=True ): super(PointNetPartSeg, self).__init__() self.input_dims = input_dims self.conv1 = nn.ModuleList() self.conv1.append(nn.Conv1d(input_dims, 64, 1)) self.conv1.append(nn.Conv1d(64, 128, 1)) self.conv1.append(nn.Conv1d(128, 128, 1)) self.bn1 = nn.ModuleList() self.bn1.append(nn.BatchNorm1d(64)) self.bn1.append(nn.BatchNorm1d(128)) self.bn1.append(nn.BatchNorm1d(128)) self.conv2 = nn.ModuleList() self.conv2.append(nn.Conv1d(128, 512, 1)) self.bn2 = nn.ModuleList() self.bn2.append(nn.BatchNorm1d(512)) self.conv_max = nn.Conv1d(512, 2048, 1) self.bn_max = nn.BatchNorm1d(2048) self.maxpool = nn.MaxPool1d(num_points) self.pool_feat_len = 2048 self.conv3 = nn.ModuleList() self.conv3.append(nn.Conv1d(2048 + 64 + 128 * 3 + 512 + 16, 256, 1)) self.conv3.append(nn.Conv1d(256, 256, 1)) self.conv3.append(nn.Conv1d(256, 128, 1)) self.bn3 = nn.ModuleList() self.bn3.append(nn.BatchNorm1d(256)) self.bn3.append(nn.BatchNorm1d(256)) self.bn3.append(nn.BatchNorm1d(128)) self.conv_out = nn.Conv1d(128, output_classes, 1) self.use_transform = use_transform if use_transform: self.transform1 = TransformNet(self.input_dims) self.trans_bn1 = nn.BatchNorm1d(self.input_dims) self.transform2 = TransformNet(128) self.trans_bn2 = nn.BatchNorm1d(128) def forward(self, x, cat_vec=None): batch_size = x.shape[0] h = x.permute(0, 2, 1) num_points = h.shape[2] if self.use_transform: trans = self.transform1(h) h = h.transpose(2, 1) h = torch.bmm(h, trans) h = h.transpose(2, 1) h = F.relu(self.trans_bn1(h)) mid_feat = [] for conv, bn in zip(self.conv1, self.bn1): h = conv(h) h = bn(h) h = F.relu(h) mid_feat.append(h) if self.use_transform: trans = self.transform2(h) h = h.transpose(2, 1) h = torch.bmm(h, trans) h = h.transpose(2, 1) h = F.relu(self.trans_bn2(h)) mid_feat.append(h) for conv, bn in zip(self.conv2, self.bn2): h = conv(h) h = bn(h) h = F.relu(h) mid_feat.append(h) h = self.conv_max(h) h = self.bn_max(h) h = self.maxpool(h).view(batch_size, -1, 1).repeat(1, 1, num_points) mid_feat.append(h) if cat_vec is not None: mid_feat.append(cat_vec) h = torch.cat(mid_feat, 1) for conv, bn in zip(self.conv3, self.bn3): h = conv(h) h = bn(h) h = F.relu(h) out = self.conv_out(h) return out class TransformNet(nn.Module): def __init__(self, input_dims=3, num_points=2048): super(TransformNet, self).__init__() self.conv = nn.ModuleList() self.conv.append(nn.Conv1d(input_dims, 64, 1)) self.conv.append(nn.Conv1d(64, 128, 1)) self.conv.append(nn.Conv1d(128, 1024, 1)) self.bn = nn.ModuleList() self.bn.append(nn.BatchNorm1d(64)) self.bn.append(nn.BatchNorm1d(128)) self.bn.append(nn.BatchNorm1d(1024)) self.maxpool = nn.MaxPool1d(num_points) self.pool_feat_len = 1024 self.mlp2 = nn.ModuleList() self.mlp2.append(nn.Linear(1024, 512)) self.mlp2.append(nn.Linear(512, 256)) self.bn2 = nn.ModuleList() self.bn2.append(nn.BatchNorm1d(512)) self.bn2.append(nn.BatchNorm1d(256)) self.input_dims = input_dims self.mlp_out = nn.Linear(256, input_dims * input_dims) def forward(self, h): batch_size = h.shape[0] for conv, bn in zip(self.conv, self.bn): h = conv(h) h = bn(h) h = F.relu(h) h = self.maxpool(h).view(-1, self.pool_feat_len) for mlp, bn in zip(self.mlp2, self.bn2): h = mlp(h) h = bn(h) h = F.relu(h) out = self.mlp_out(h) iden = Variable( torch.from_numpy( np.eye(self.input_dims).flatten().astype(np.float32) ) ) iden = iden.view(1, self.input_dims * self.input_dims).repeat( batch_size, 1 ) if out.is_cuda: iden = iden.cuda() out = out + iden out = out.view(-1, self.input_dims, self.input_dims) return out class PartSegLoss(nn.Module): def __init__(self, eps=0.2): super(PartSegLoss, self).__init__() self.eps = eps self.loss = nn.CrossEntropyLoss() def forward(self, logits, y): num_classes = logits.shape[1] logits = logits.permute(0, 2, 1).contiguous().view(-1, num_classes) loss = self.loss(logits, y) return loss ================================================ FILE: examples/pytorch/pointcloud/pointnet/provider.py ================================================ """ Adapted from https://github.com/yanx27/Pointnet_Pointnet2_pytorch/blob/master/provider.py """ import numpy as np def normalize_data(batch_data): """Normalize the batch data, use coordinates of the block centered at origin, Input: BxNxC array Output: BxNxC array """ B, N, C = batch_data.shape normal_data = np.zeros((B, N, C)) for b in range(B): pc = batch_data[b] centroid = np.mean(pc, axis=0) pc = pc - centroid m = np.max(np.sqrt(np.sum(pc**2, axis=1))) pc = pc / m normal_data[b] = pc return normal_data def shuffle_data(data, labels): """Shuffle data and labels. Input: data: B,N,... numpy array label: B,... numpy array Return: shuffled data, label and shuffle indices """ idx = np.arange(len(labels)) np.random.shuffle(idx) return data[idx, ...], labels[idx], idx def shuffle_points(batch_data): """Shuffle orders of points in each point cloud -- changes FPS behavior. Use the same shuffling idx for the entire batch. Input: BxNxC array Output: BxNxC array """ idx = np.arange(batch_data.shape[1]) np.random.shuffle(idx) return batch_data[:, idx, :] def rotate_point_cloud(batch_data): """Randomly rotate the point clouds to augument the dataset rotation is per shape based along up direction Input: BxNx3 array, original batch of point clouds Return: BxNx3 array, rotated batch of point clouds """ rotated_data = np.zeros(batch_data.shape, dtype=np.float32) for k in range(batch_data.shape[0]): rotation_angle = np.random.uniform() * 2 * np.pi cosval = np.cos(rotation_angle) sinval = np.sin(rotation_angle) rotation_matrix = np.array( [[cosval, 0, sinval], [0, 1, 0], [-sinval, 0, cosval]] ) shape_pc = batch_data[k, ...] rotated_data[k, ...] = np.dot( shape_pc.reshape((-1, 3)), rotation_matrix ) return rotated_data def rotate_point_cloud_z(batch_data): """Randomly rotate the point clouds to augument the dataset rotation is per shape based along up direction Input: BxNx3 array, original batch of point clouds Return: BxNx3 array, rotated batch of point clouds """ rotated_data = np.zeros(batch_data.shape, dtype=np.float32) for k in range(batch_data.shape[0]): rotation_angle = np.random.uniform() * 2 * np.pi cosval = np.cos(rotation_angle) sinval = np.sin(rotation_angle) rotation_matrix = np.array( [[cosval, sinval, 0], [-sinval, cosval, 0], [0, 0, 1]] ) shape_pc = batch_data[k, ...] rotated_data[k, ...] = np.dot( shape_pc.reshape((-1, 3)), rotation_matrix ) return rotated_data def rotate_point_cloud_with_normal(batch_xyz_normal): """Randomly rotate XYZ, normal point cloud. Input: batch_xyz_normal: B,N,6, first three channels are XYZ, last 3 all normal Output: B,N,6, rotated XYZ, normal point cloud """ for k in range(batch_xyz_normal.shape[0]): rotation_angle = np.random.uniform() * 2 * np.pi cosval = np.cos(rotation_angle) sinval = np.sin(rotation_angle) rotation_matrix = np.array( [[cosval, 0, sinval], [0, 1, 0], [-sinval, 0, cosval]] ) shape_pc = batch_xyz_normal[k, :, 0:3] shape_normal = batch_xyz_normal[k, :, 3:6] batch_xyz_normal[k, :, 0:3] = np.dot( shape_pc.reshape((-1, 3)), rotation_matrix ) batch_xyz_normal[k, :, 3:6] = np.dot( shape_normal.reshape((-1, 3)), rotation_matrix ) return batch_xyz_normal def rotate_perturbation_point_cloud_with_normal( batch_data, angle_sigma=0.06, angle_clip=0.18 ): """Randomly perturb the point clouds by small rotations Input: BxNx6 array, original batch of point clouds and point normals Return: BxNx3 array, rotated batch of point clouds """ rotated_data = np.zeros(batch_data.shape, dtype=np.float32) for k in range(batch_data.shape[0]): angles = np.clip( angle_sigma * np.random.randn(3), -angle_clip, angle_clip ) Rx = np.array( [ [1, 0, 0], [0, np.cos(angles[0]), -np.sin(angles[0])], [0, np.sin(angles[0]), np.cos(angles[0])], ] ) Ry = np.array( [ [np.cos(angles[1]), 0, np.sin(angles[1])], [0, 1, 0], [-np.sin(angles[1]), 0, np.cos(angles[1])], ] ) Rz = np.array( [ [np.cos(angles[2]), -np.sin(angles[2]), 0], [np.sin(angles[2]), np.cos(angles[2]), 0], [0, 0, 1], ] ) R = np.dot(Rz, np.dot(Ry, Rx)) shape_pc = batch_data[k, :, 0:3] shape_normal = batch_data[k, :, 3:6] rotated_data[k, :, 0:3] = np.dot(shape_pc.reshape((-1, 3)), R) rotated_data[k, :, 3:6] = np.dot(shape_normal.reshape((-1, 3)), R) return rotated_data def rotate_point_cloud_by_angle(batch_data, rotation_angle): """Rotate the point cloud along up direction with certain angle. Input: BxNx3 array, original batch of point clouds Return: BxNx3 array, rotated batch of point clouds """ rotated_data = np.zeros(batch_data.shape, dtype=np.float32) for k in range(batch_data.shape[0]): # rotation_angle = np.random.uniform() * 2 * np.pi cosval = np.cos(rotation_angle) sinval = np.sin(rotation_angle) rotation_matrix = np.array( [[cosval, 0, sinval], [0, 1, 0], [-sinval, 0, cosval]] ) shape_pc = batch_data[k, :, 0:3] rotated_data[k, :, 0:3] = np.dot( shape_pc.reshape((-1, 3)), rotation_matrix ) return rotated_data def rotate_point_cloud_by_angle_with_normal(batch_data, rotation_angle): """Rotate the point cloud along up direction with certain angle. Input: BxNx6 array, original batch of point clouds with normal scalar, angle of rotation Return: BxNx6 array, rotated batch of point clouds iwth normal """ rotated_data = np.zeros(batch_data.shape, dtype=np.float32) for k in range(batch_data.shape[0]): # rotation_angle = np.random.uniform() * 2 * np.pi cosval = np.cos(rotation_angle) sinval = np.sin(rotation_angle) rotation_matrix = np.array( [[cosval, 0, sinval], [0, 1, 0], [-sinval, 0, cosval]] ) shape_pc = batch_data[k, :, 0:3] shape_normal = batch_data[k, :, 3:6] rotated_data[k, :, 0:3] = np.dot( shape_pc.reshape((-1, 3)), rotation_matrix ) rotated_data[k, :, 3:6] = np.dot( shape_normal.reshape((-1, 3)), rotation_matrix ) return rotated_data def rotate_perturbation_point_cloud( batch_data, angle_sigma=0.06, angle_clip=0.18 ): """Randomly perturb the point clouds by small rotations Input: BxNx3 array, original batch of point clouds Return: BxNx3 array, rotated batch of point clouds """ rotated_data = np.zeros(batch_data.shape, dtype=np.float32) for k in range(batch_data.shape[0]): angles = np.clip( angle_sigma * np.random.randn(3), -angle_clip, angle_clip ) Rx = np.array( [ [1, 0, 0], [0, np.cos(angles[0]), -np.sin(angles[0])], [0, np.sin(angles[0]), np.cos(angles[0])], ] ) Ry = np.array( [ [np.cos(angles[1]), 0, np.sin(angles[1])], [0, 1, 0], [-np.sin(angles[1]), 0, np.cos(angles[1])], ] ) Rz = np.array( [ [np.cos(angles[2]), -np.sin(angles[2]), 0], [np.sin(angles[2]), np.cos(angles[2]), 0], [0, 0, 1], ] ) R = np.dot(Rz, np.dot(Ry, Rx)) shape_pc = batch_data[k, ...] rotated_data[k, ...] = np.dot(shape_pc.reshape((-1, 3)), R) return rotated_data def jitter_point_cloud(batch_data, sigma=0.01, clip=0.05): """Randomly jitter points. jittering is per point. Input: BxNx3 array, original batch of point clouds Return: BxNx3 array, jittered batch of point clouds """ B, N, C = batch_data.shape assert clip > 0 jittered_data = np.clip(sigma * np.random.randn(B, N, C), -1 * clip, clip) jittered_data += batch_data return jittered_data def shift_point_cloud(batch_data, shift_range=0.1): """Randomly shift point cloud. Shift is per point cloud. Input: BxNx3 array, original batch of point clouds Return: BxNx3 array, shifted batch of point clouds """ B, N, C = batch_data.shape shifts = np.random.uniform(-shift_range, shift_range, (B, 3)) for batch_index in range(B): batch_data[batch_index, :, :] += shifts[batch_index, :] return batch_data def random_scale_point_cloud(batch_data, scale_low=0.8, scale_high=1.25): """Randomly scale the point cloud. Scale is per point cloud. Input: BxNx3 array, original batch of point clouds Return: BxNx3 array, scaled batch of point clouds """ B, N, C = batch_data.shape scales = np.random.uniform(scale_low, scale_high, B) for batch_index in range(B): batch_data[batch_index, :, :] *= scales[batch_index] return batch_data def random_point_dropout(batch_pc, max_dropout_ratio=0.875): """batch_pc: BxNx3""" for b in range(batch_pc.shape[0]): dropout_ratio = np.random.random() * max_dropout_ratio # 0~0.875 drop_idx = np.where( np.random.random((batch_pc.shape[1])) <= dropout_ratio )[0] if len(drop_idx) > 0: dropout_ratio = ( np.random.random() * max_dropout_ratio ) # 0~0.875 # not need batch_pc[b, drop_idx, :] = batch_pc[ b, 0, : ] # set to the first point return batch_pc ================================================ FILE: examples/pytorch/pointcloud/pointnet/train_cls.py ================================================ import argparse import os import urllib from functools import partial import dgl import provider import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import tqdm from dgl.data.utils import download, get_download_dir from ModelNetDataLoader import ModelNetDataLoader from pointnet2 import PointNet2MSGCls, PointNet2SSGCls from pointnet_cls import PointNetCls from torch.utils.data import DataLoader torch.backends.cudnn.enabled = False # from dataset import ModelNet parser = argparse.ArgumentParser() parser.add_argument("--model", type=str, default="pointnet") parser.add_argument("--dataset-path", type=str, default="") parser.add_argument("--load-model-path", type=str, default="") parser.add_argument("--save-model-path", type=str, default="") parser.add_argument("--num-epochs", type=int, default=200) parser.add_argument("--num-workers", type=int, default=8) parser.add_argument("--batch-size", type=int, default=32) args = parser.parse_args() num_workers = args.num_workers batch_size = args.batch_size data_filename = "modelnet40_normal_resampled.zip" download_path = os.path.join(get_download_dir(), data_filename) local_path = args.dataset_path or os.path.join( get_download_dir(), "modelnet40_normal_resampled" ) if not os.path.exists(local_path): download( "https://shapenet.cs.stanford.edu/media/modelnet40_normal_resampled.zip", download_path, verify_ssl=False, ) from zipfile import ZipFile with ZipFile(download_path) as z: z.extractall(path=get_download_dir()) CustomDataLoader = partial( DataLoader, num_workers=num_workers, batch_size=batch_size, shuffle=True, drop_last=True, ) def train(net, opt, scheduler, train_loader, dev): net.train() total_loss = 0 num_batches = 0 total_correct = 0 count = 0 loss_f = nn.CrossEntropyLoss() with tqdm.tqdm(train_loader, ascii=True) as tq: for data, label in tq: data = data.data.numpy() data = provider.random_point_dropout(data) data[:, :, 0:3] = provider.random_scale_point_cloud(data[:, :, 0:3]) data[:, :, 0:3] = provider.jitter_point_cloud(data[:, :, 0:3]) data[:, :, 0:3] = provider.shift_point_cloud(data[:, :, 0:3]) data = torch.tensor(data) label = label[:, 0] num_examples = label.shape[0] data, label = data.to(dev), label.to(dev).squeeze().long() opt.zero_grad() logits = net(data) loss = loss_f(logits, label) loss.backward() opt.step() _, preds = logits.max(1) num_batches += 1 count += num_examples loss = loss.item() correct = (preds == label).sum().item() total_loss += loss total_correct += correct tq.set_postfix( { "AvgLoss": "%.5f" % (total_loss / num_batches), "AvgAcc": "%.5f" % (total_correct / count), } ) scheduler.step() def evaluate(net, test_loader, dev): net.eval() total_correct = 0 count = 0 with torch.no_grad(): with tqdm.tqdm(test_loader, ascii=True) as tq: for data, label in tq: label = label[:, 0] num_examples = label.shape[0] data, label = data.to(dev), label.to(dev).squeeze().long() logits = net(data) _, preds = logits.max(1) correct = (preds == label).sum().item() total_correct += correct count += num_examples tq.set_postfix({"AvgAcc": "%.5f" % (total_correct / count)}) return total_correct / count dev = torch.device("cuda" if torch.cuda.is_available() else "cpu") if args.model == "pointnet": net = PointNetCls(40, input_dims=6) elif args.model == "pointnet2_ssg": net = PointNet2SSGCls(40, batch_size, input_dims=6) elif args.model == "pointnet2_msg": net = PointNet2MSGCls(40, batch_size, input_dims=6) net = net.to(dev) if args.load_model_path: net.load_state_dict( torch.load(args.load_model_path, weights_only=False, map_location=dev) ) opt = optim.Adam(net.parameters(), lr=1e-3, weight_decay=1e-4) scheduler = optim.lr_scheduler.StepLR(opt, step_size=20, gamma=0.7) train_dataset = ModelNetDataLoader(local_path, 1024, split="train") test_dataset = ModelNetDataLoader(local_path, 1024, split="test") train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=True, ) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, drop_last=True, ) best_test_acc = 0 for epoch in range(args.num_epochs): train(net, opt, scheduler, train_loader, dev) if (epoch + 1) % 1 == 0: print("Epoch #%d Testing" % epoch) test_acc = evaluate(net, test_loader, dev) if test_acc > best_test_acc: best_test_acc = test_acc if args.save_model_path: torch.save(net.state_dict(), args.save_model_path) print("Current test acc: %.5f (best: %.5f)" % (test_acc, best_test_acc)) ================================================ FILE: examples/pytorch/pointcloud/pointnet/train_partseg.py ================================================ import argparse import os import time import urllib from functools import partial import dgl import numpy as np import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import tqdm from dgl.data.utils import download, get_download_dir from pointnet2_partseg import PointNet2MSGPartSeg, PointNet2SSGPartSeg from pointnet_partseg import PartSegLoss, PointNetPartSeg from ShapeNet import ShapeNet from torch.utils.data import DataLoader parser = argparse.ArgumentParser() parser.add_argument("--model", type=str, default="pointnet") parser.add_argument("--dataset-path", type=str, default="") parser.add_argument("--load-model-path", type=str, default="") parser.add_argument("--save-model-path", type=str, default="") parser.add_argument("--num-epochs", type=int, default=250) parser.add_argument("--num-workers", type=int, default=4) parser.add_argument("--batch-size", type=int, default=16) parser.add_argument("--tensorboard", action="store_true") args = parser.parse_args() num_workers = args.num_workers batch_size = args.batch_size def collate(samples): graphs, cat = map(list, zip(*samples)) return dgl.batch(graphs), cat CustomDataLoader = partial( DataLoader, num_workers=num_workers, batch_size=batch_size, shuffle=True, drop_last=True, ) def train(net, opt, scheduler, train_loader, dev): category_list = sorted(list(shapenet.seg_classes.keys())) eye_mat = np.eye(16) net.train() total_loss = 0 num_batches = 0 total_correct = 0 count = 0 start = time.time() with tqdm.tqdm(train_loader, ascii=True) as tq: for data, label, cat in tq: num_examples = data.shape[0] data = data.to(dev, dtype=torch.float) label = label.to(dev, dtype=torch.long).view(-1) opt.zero_grad() cat_ind = [category_list.index(c) for c in cat] # An one-hot encoding for the object category cat_tensor = ( torch.tensor(eye_mat[cat_ind]) .to(dev, dtype=torch.float) .repeat(1, 2048) ) cat_tensor = cat_tensor.view(num_examples, -1, 16).permute(0, 2, 1) logits = net(data, cat_tensor) loss = L(logits, label) loss.backward() opt.step() _, preds = logits.max(1) count += num_examples * 2048 loss = loss.item() total_loss += loss num_batches += 1 correct = (preds.view(-1) == label).sum().item() total_correct += correct AvgLoss = total_loss / num_batches AvgAcc = total_correct / count tq.set_postfix( {"AvgLoss": "%.5f" % AvgLoss, "AvgAcc": "%.5f" % AvgAcc} ) scheduler.step() end = time.time() return data, preds, AvgLoss, AvgAcc, end - start def mIoU(preds, label, cat, cat_miou, seg_classes): for i in range(preds.shape[0]): shape_iou = 0 n = len(seg_classes[cat[i]]) for cls in seg_classes[cat[i]]: pred_set = set(np.where(preds[i, :] == cls)[0]) label_set = set(np.where(label[i, :] == cls)[0]) union = len(pred_set.union(label_set)) inter = len(pred_set.intersection(label_set)) if union == 0: shape_iou += 1 else: shape_iou += inter / union shape_iou /= n cat_miou[cat[i]][0] += shape_iou cat_miou[cat[i]][1] += 1 return cat_miou def evaluate(net, test_loader, dev, per_cat_verbose=False): category_list = sorted(list(shapenet.seg_classes.keys())) eye_mat = np.eye(16) net.eval() cat_miou = {} for k in shapenet.seg_classes.keys(): cat_miou[k] = [0, 0] miou = 0 count = 0 per_cat_miou = 0 per_cat_count = 0 with torch.no_grad(): with tqdm.tqdm(test_loader, ascii=True) as tq: for data, label, cat in tq: num_examples = data.shape[0] data = data.to(dev, dtype=torch.float) label = label.to(dev, dtype=torch.long) cat_ind = [category_list.index(c) for c in cat] cat_tensor = ( torch.tensor(eye_mat[cat_ind]) .to(dev, dtype=torch.float) .repeat(1, 2048) ) cat_tensor = cat_tensor.view(num_examples, -1, 16).permute( 0, 2, 1 ) logits = net(data, cat_tensor) _, preds = logits.max(1) cat_miou = mIoU( preds.cpu().numpy(), label.view(num_examples, -1).cpu().numpy(), cat, cat_miou, shapenet.seg_classes, ) for _, v in cat_miou.items(): if v[1] > 0: miou += v[0] count += v[1] per_cat_miou += v[0] / v[1] per_cat_count += 1 tq.set_postfix( { "mIoU": "%.5f" % (miou / count), "per Category mIoU": "%.5f" % (miou / count), } ) if per_cat_verbose: print("Per-Category mIoU:") for k, v in cat_miou.items(): if v[1] > 0: print("%s mIoU=%.5f" % (k, v[0] / v[1])) else: print("%s mIoU=%.5f" % (k, 1)) return miou / count, per_cat_miou / per_cat_count dev = torch.device("cuda" if torch.cuda.is_available() else "cpu") # dev = "cpu" if args.model == "pointnet": net = PointNetPartSeg(50, 3, 2048) elif args.model == "pointnet2_ssg": net = PointNet2SSGPartSeg(50, batch_size, input_dims=6) elif args.model == "pointnet2_msg": net = PointNet2MSGPartSeg(50, batch_size, input_dims=6) net = net.to(dev) if args.load_model_path: net.load_state_dict( torch.load(args.load_model_path, weights_only=False, map_location=dev) ) opt = optim.Adam(net.parameters(), lr=0.001, weight_decay=1e-4) scheduler = optim.lr_scheduler.StepLR(opt, step_size=20, gamma=0.5) L = PartSegLoss() shapenet = ShapeNet(2048, normal_channel=False) train_loader = CustomDataLoader(shapenet.trainval()) test_loader = CustomDataLoader(shapenet.test()) # Tensorboard if args.tensorboard: import torchvision from torch.utils.tensorboard import SummaryWriter from torchvision import datasets, transforms writer = SummaryWriter() # Select 50 distinct colors for different parts color_map = torch.tensor( [ [47, 79, 79], [139, 69, 19], [112, 128, 144], [85, 107, 47], [139, 0, 0], [128, 128, 0], [72, 61, 139], [0, 128, 0], [188, 143, 143], [60, 179, 113], [205, 133, 63], [0, 139, 139], [70, 130, 180], [205, 92, 92], [154, 205, 50], [0, 0, 139], [50, 205, 50], [250, 250, 250], [218, 165, 32], [139, 0, 139], [10, 10, 10], [176, 48, 96], [72, 209, 204], [153, 50, 204], [255, 69, 0], [255, 145, 0], [0, 0, 205], [255, 255, 0], [0, 255, 0], [233, 150, 122], [220, 20, 60], [0, 191, 255], [160, 32, 240], [192, 192, 192], [173, 255, 47], [218, 112, 214], [216, 191, 216], [255, 127, 80], [255, 0, 255], [100, 149, 237], [128, 128, 128], [221, 160, 221], [144, 238, 144], [123, 104, 238], [255, 160, 122], [175, 238, 238], [238, 130, 238], [127, 255, 212], [255, 218, 185], [255, 105, 180], ] ) # paint each point according to its pred def paint(batched_points): B, N = batched_points.shape colored = color_map[batched_points].squeeze(2) return colored best_test_miou = 0 best_test_per_cat_miou = 0 for epoch in range(args.num_epochs): data, preds, AvgLoss, AvgAcc, training_time = train( net, opt, scheduler, train_loader, dev ) if (epoch + 1) % 5 == 0: print("Epoch #%d Testing" % epoch) test_miou, test_per_cat_miou = evaluate( net, test_loader, dev, (epoch + 1) % 5 == 0 ) if test_miou > best_test_miou: best_test_miou = test_miou best_test_per_cat_miou = test_per_cat_miou if args.save_model_path: torch.save(net.state_dict(), args.save_model_path) print( "Current test mIoU: %.5f (best: %.5f), per-Category mIoU: %.5f (best: %.5f)" % ( test_miou, best_test_miou, test_per_cat_miou, best_test_per_cat_miou, ) ) # Tensorboard if args.tensorboard: colored = paint(preds) writer.add_mesh( "data", vertices=data, colors=colored, global_step=epoch ) writer.add_scalar( "training time for one epoch", training_time, global_step=epoch ) writer.add_scalar("AvgLoss", AvgLoss, global_step=epoch) writer.add_scalar("AvgAcc", AvgAcc, global_step=epoch) if (epoch + 1) % 5 == 0: writer.add_scalar("test mIoU", test_miou, global_step=epoch) writer.add_scalar( "best test mIoU", best_test_miou, global_step=epoch ) ================================================ FILE: examples/pytorch/rect/README.md ================================================ # **DGL Implementation of RECT (TKDE20)** This DGL example implements the GNN model **RECT** (or more specifically its supervised part **RECT-L**) proposed in the paper [Network Embedding with Completely-imbalanced Labels](https://ieeexplore.ieee.org/document/8979355). The authors' original implementation can be found [here](https://github.com/zhengwang100/RECT). ## Example Implementor This example was implemented by [Tingzhang Zhao](https://github.com/Fizyhsp) when he was an undergraduate at USTB. ## **Dataset and experimental setting** Two DGL's build-in datasets (Cora and Citeseer) with their default train/val/test settings are used in this example. In addition, as this paper considers the zero-shot (i.e., completely-imbalanced) label setting, those "unseen" classes should be removed from the training set, as suggested in the paper. In this example, in each dataset, we simply remove the 2-3 classes (i.e., these 2-3 classes are unseen classes) from the labeled training set. Then, we obtain graph embedding results by different models. Finally, with the obtained embedding results and the original balanced labels, we train a logistic regression classifier to evaluate the model performance. ## **Usage** `python main.py --dataset cora --gpu 0 --model-opt RECT-L --removed-class 0 1 2` #reproducing the RECT-L on "cora" datasets in the zero-shot label setting using GPU `python main.py --dataset cora --gpu 0 --model-opt GCN --removed-class 0 1 2` #reproducing the GCN on "cora" datasets in the zero-shot label setting using GPU `python main.py --dataset cora --gpu 0 --model-opt NodeFeats --removed-class 0 1 2` # evaluating the original node features using GPU ## **Performance** The performance results are are as follows: | **Datasets/Models** | **NodeFeats** | **GCN** | **RECT-L** | | :-----------------: | :-----------: | :-----: | :--------: | | **Cora** | 47.56 | 51.26 | **68.60** | | **Citeseer** | 42.04 | 37.55 | **56.32** |
Table 1:node classification results with the first three classes as "unseen"


| **Datasets/Models** | **NodeFeats** | **GCN** | **RECT-L** | | :-----------------: | :-----------: | :-----: | :--------: | | **Cora** | 47.56 | 56.91 | **69.30** | | **Citeseer** | 42.04 | 45.69 | **61.85** |
Table 2:node classification results with the last two classes as "unseen"

================================================ FILE: examples/pytorch/rect/classify.py ================================================ from statistics import mean import torch import torch.nn as nn import torch.nn.functional as F class LogisticRegressionClassifier(nn.Module): """Define a logistic regression classifier to evaluate the quality of embedding results""" def __init__(self, nfeat, nclass): super(LogisticRegressionClassifier, self).__init__() self.lrc = nn.Linear(nfeat, nclass) def forward(self, x): preds = self.lrc(x) return preds def _evaluate(model, features, labels, test_mask): model.eval() with torch.no_grad(): logits = model(features) logits = logits[test_mask] labels = labels[test_mask] _, indices = torch.max(logits, dim=1) correct = torch.sum(indices == labels) return correct.item() * 1.0 / len(labels) def _train_test_with_lrc(model, features, labels, train_mask, test_mask): """Under the pre-defined balanced train/test label setting, train a lrc to evaluate the embedding results.""" optimizer = torch.optim.Adam(model.parameters(), lr=0.2, weight_decay=5e-06) for _ in range(100): model.train() optimizer.zero_grad() output = model(features) loss_train = F.cross_entropy(output[train_mask], labels[train_mask]) loss_train.backward() optimizer.step() return _evaluate( model=model, features=features, labels=labels, test_mask=test_mask ) def evaluate_embeds( features, labels, train_mask, test_mask, n_classes, cuda, test_times=10 ): print( "Training a logistic regression classifier with the pre-defined train/test split setting ..." ) res_list = [] for _ in range(test_times): model = LogisticRegressionClassifier( nfeat=features.shape[1], nclass=n_classes ) if cuda: model.cuda() res = _train_test_with_lrc( model=model, features=features, labels=labels, train_mask=train_mask, test_mask=test_mask, ) res_list.append(res) return mean(res_list) ================================================ FILE: examples/pytorch/rect/label_utils.py ================================================ from collections import defaultdict import numpy as np import torch def remove_unseen_classes_from_training(train_mask, labels, removed_class): """Remove the unseen classes (the first three classes by default) to get the zero-shot (i.e., completely imbalanced) label setting Input: train_mask, labels, removed_class Output: train_mask_zs: the bool list only containing seen classes """ train_mask_zs = train_mask.clone() for i in range(train_mask_zs.numel()): if train_mask_zs[i] == 1 and (labels[i].item() in removed_class): train_mask_zs[i] = 0 return train_mask_zs def get_class_set(labels): """Get the class set. Input: labels [l, [c1, c2, ..]] Output:the labeled class set dict_keys([k1, k2, ..]) """ mydict = {} for y in labels: for label in y: mydict[int(label)] = 1 return mydict.keys() def get_label_attributes(train_mask_zs, nodeids, labellist, features): """Get the class-center (semanic knowledge) of each seen class. Suppose a node i is labeled as c, then attribute[c] += node_i_attribute, finally mean(attribute[c]) Input: train_mask_zs, nodeids, labellist, features Output: label_attribute{}: label -> average_labeled_node_features (class centers) """ _, feat_num = features.shape labels = get_class_set(labellist) label_attribute_nodes = defaultdict(list) for nodeid, labels in zip(nodeids, labellist): for label in labels: label_attribute_nodes[int(label)].append(int(nodeid)) label_attribute = {} for label in label_attribute_nodes.keys(): nodes = label_attribute_nodes[int(label)] selected_features = features[nodes, :] label_attribute[int(label)] = np.mean(selected_features, axis=0) return label_attribute def get_labeled_nodes_label_attribute(train_mask_zs, labels, features, cuda): """Replace the original labels by their class-centers. For each label c in the training set, the following operations will be performed: Get label_attribute{} through function get_label_attributes, then res[i, :] = label_attribute[c] Input: train_mask_zs, labels, features Output: Y_{semantic} [l, ft]: tensor """ X = torch.LongTensor(range(features.shape[0])) nodeids = [] labellist = [] for i in X[train_mask_zs].numpy().tolist(): nodeids.append(str(i)) for i in labels[train_mask_zs].cpu().numpy().tolist(): labellist.append([str(i)]) # 1. get the semantic knowledge (class centers) of all seen classes label_attribute = get_label_attributes( train_mask_zs=train_mask_zs, nodeids=nodeids, labellist=labellist, features=features.cpu().numpy(), ) # 2. replace original labels by their class centers (semantic knowledge) res = np.zeros([len(nodeids), features.shape[1]]) for i, labels in enumerate(labellist): # support mutiple labels c = len(labels) temp = np.zeros([c, features.shape[1]]) for ii, label in enumerate(labels): temp[ii, :] = label_attribute[int(label)] temp = np.mean(temp, axis=0) res[i, :] = temp if cuda: res = torch.FloatTensor(res).cuda() else: res = torch.FloatTensor(res) return res ================================================ FILE: examples/pytorch/rect/main.py ================================================ import torch import torch.nn as nn from classify import evaluate_embeds from label_utils import ( get_labeled_nodes_label_attribute, remove_unseen_classes_from_training, ) from model import GCN, RECT_L from utils import load_data, process_classids, svd_feature def main(args): g, features, labels, train_mask, test_mask, n_classes, cuda = load_data( args ) # adopt any number of classes as the unseen classes (the first three classes by default) removed_class = args.removed_class if len(removed_class) > n_classes: raise ValueError( "unseen number is greater than the number of classes: {}".format( len(removed_class) ) ) for i in removed_class: if i not in labels: raise ValueError("class out of bounds: {}".format(i)) # remove these unseen classes from the training set, to construct the zero-shot label setting train_mask_zs = remove_unseen_classes_from_training( train_mask=train_mask, labels=labels, removed_class=removed_class ) print( "after removing the unseen classes, seen class labeled node num:", sum(train_mask_zs).item(), ) if args.model_opt == "RECT-L": model = RECT_L( g=g, in_feats=args.n_hidden, n_hidden=args.n_hidden, activation=nn.PReLU(), ) if cuda: model.cuda() features = svd_feature(features=features, d=args.n_hidden) attribute_labels = get_labeled_nodes_label_attribute( train_mask_zs=train_mask_zs, labels=labels, features=features, cuda=cuda, ) loss_fcn = nn.MSELoss(reduction="sum") optimizer = torch.optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.weight_decay ) for epoch in range(args.n_epochs): model.train() optimizer.zero_grad() logits = model(features) loss_train = loss_fcn(attribute_labels, logits[train_mask_zs]) print( "Epoch {:d} | Train Loss {:.5f}".format( epoch + 1, loss_train.item() ) ) loss_train.backward() optimizer.step() model.eval() embeds = model.embed(features) elif args.model_opt == "GCN": model = GCN( g=g, in_feats=features.shape[1], n_hidden=args.n_hidden, n_classes=n_classes - len(removed_class), activation=nn.PReLU(), dropout=args.dropout, ) if cuda: model.cuda() loss_fcn = nn.CrossEntropyLoss() optimizer = torch.optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.weight_decay ) for epoch in range(args.n_epochs): model.train() logits = model(features) labels_train = process_classids(labels_temp=labels[train_mask_zs]) loss_train = loss_fcn(logits[train_mask_zs], labels_train) optimizer.zero_grad() print( "Epoch {:d} | Train Loss {:.5f}".format( epoch + 1, loss_train.item() ) ) loss_train.backward() optimizer.step() model.eval() embeds = model.embed(features) elif args.model_opt == "NodeFeats": embeds = svd_feature(features) # evaluate the quality of embedding results with the original balanced labels, to assess the model performance (as suggested in the paper) res = evaluate_embeds( features=embeds, labels=labels, train_mask=train_mask, test_mask=test_mask, n_classes=n_classes, cuda=cuda, ) print("Test Accuracy of {:s}: {:.4f}".format(args.model_opt, res)) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="MODEL") parser.add_argument( "--model-opt", type=str, default="RECT-L", choices=["RECT-L", "GCN", "NodeFeats"], help="model option", ) parser.add_argument( "--dataset", type=str, default="cora", choices=["cora", "citeseer"], help="dataset", ) parser.add_argument( "--dropout", type=float, default=0.0, help="dropout probability" ) parser.add_argument("--gpu", type=int, default=0, help="gpu") parser.add_argument( "--removed-class", type=int, nargs="*", default=[0, 1, 2], help="remove the unseen classes", ) parser.add_argument("--lr", type=float, default=1e-3, help="learning rate") parser.add_argument( "--n-epochs", type=int, default=200, help="number of training epochs" ) parser.add_argument( "--n-hidden", type=int, default=200, help="number of hidden gcn units" ) parser.add_argument( "--weight-decay", type=float, default=5e-4, help="Weight for L2 loss" ) args = parser.parse_args() main(args) ================================================ FILE: examples/pytorch/rect/model.py ================================================ import torch.nn as nn import torch.nn.functional as F from dgl.nn.pytorch import GraphConv class GCN(nn.Module): def __init__(self, g, in_feats, n_hidden, n_classes, activation, dropout): super(GCN, self).__init__() self.g = g self.gcn_1 = GraphConv(in_feats, n_hidden, activation=activation) self.gcn_2 = GraphConv(n_hidden, n_classes) self.dropout = nn.Dropout(p=dropout) def forward(self, features): h = self.gcn_1(self.g, features) h = self.dropout(h) preds = self.gcn_2(self.g, h) return preds def embed(self, inputs): h_1 = self.gcn_1(self.g, inputs) return h_1.detach() class RECT_L(nn.Module): def __init__(self, g, in_feats, n_hidden, activation, dropout=0.0): super(RECT_L, self).__init__() self.g = g self.gcn_1 = GraphConv(in_feats, n_hidden, activation=activation) self.fc = nn.Linear(n_hidden, in_feats) self.dropout = dropout nn.init.xavier_uniform_(self.fc.weight.data) def forward(self, inputs): h_1 = self.gcn_1(self.g, inputs) h_1 = F.dropout(h_1, p=self.dropout, training=self.training) preds = self.fc(h_1) return preds # Detach the return variables def embed(self, inputs): h_1 = self.gcn_1(self.g, inputs) return h_1.detach() ================================================ FILE: examples/pytorch/rect/utils.py ================================================ import dgl import torch from dgl.data import CiteseerGraphDataset, CoraGraphDataset def load_data(args): if args.dataset == "cora": data = CoraGraphDataset() elif args.dataset == "citeseer": data = CiteseerGraphDataset() else: raise ValueError("Unknown dataset: {}".format(args.dataset)) g = data[0] if args.gpu < 0: cuda = False else: cuda = True g = g.int().to(args.gpu) features = g.ndata["feat"] labels = g.ndata["label"] train_mask = g.ndata["train_mask"] test_mask = g.ndata["test_mask"] g = dgl.add_self_loop(g) return g, features, labels, train_mask, test_mask, data.num_classes, cuda def svd_feature(features, d=200): """Get 200-dimensional node features, to avoid curse of dimensionality""" if features.shape[1] <= d: return features U, S, VT = torch.svd(features) res = torch.mm(U[:, 0:d], torch.diag(S[0:d])) return res def process_classids(labels_temp): """Reorder the remaining classes with unseen classes removed. Input: the label only removing unseen classes Output: the label with reordered classes """ labeldict = {} num = 0 for i in labels_temp: labeldict[int(i)] = 1 labellist = sorted(labeldict) for label in labellist: labeldict[int(label)] = num num = num + 1 for i in range(labels_temp.numel()): labels_temp[i] = labeldict[int(labels_temp[i])] return labels_temp ================================================ FILE: examples/pytorch/rgat/README.md ================================================ Relational Graph Attention Networks (RGAT) ============== This is an adaptation of RGCN where graph convolution is replaced with graph attention. Dependencies ------------ - torchmetrics 0.11.4 Install as follows: ```bash pip install torchmetrics==0.11.4 ``` How to Run ------- Run with the following for node classification on ogbn-mag dataset ```bash python train.py ``` Summary ------- * ogbn-mag (test acc.): ~0.3647 ================================================ FILE: examples/pytorch/rgat/train.py ================================================ import dgl import dgl.function as fn import dgl.nn as dglnn import torch import torch.nn as nn import torch.nn.functional as F import torchmetrics.functional as MF import tqdm from dgl import apply_each from dgl.dataloading import DataLoader, NeighborSampler from ogb.nodeproppred import DglNodePropPredDataset class HeteroGAT(nn.Module): def __init__(self, etypes, in_size, hid_size, out_size, n_heads=4): super().__init__() self.layers = nn.ModuleList() self.layers.append( dglnn.HeteroGraphConv( { etype: dglnn.GATConv(in_size, hid_size // n_heads, n_heads) for etype in etypes } ) ) self.layers.append( dglnn.HeteroGraphConv( { etype: dglnn.GATConv(hid_size, hid_size // n_heads, n_heads) for etype in etypes } ) ) self.layers.append( dglnn.HeteroGraphConv( { etype: dglnn.GATConv(hid_size, hid_size // n_heads, n_heads) for etype in etypes } ) ) self.dropout = nn.Dropout(0.5) self.linear = nn.Linear(hid_size, out_size) # Should be HeteroLinear def forward(self, blocks, x): h = x for l, (layer, block) in enumerate(zip(self.layers, blocks)): h = layer(block, h) # One thing is that h might return tensors with zero rows if the number of dst nodes # of one node type is 0. x.view(x.shape[0], -1) wouldn't work in this case. h = apply_each( h, lambda x: x.view(x.shape[0], x.shape[1] * x.shape[2]) ) if l != len(self.layers) - 1: h = apply_each(h, F.relu) h = apply_each(h, self.dropout) return self.linear(h["paper"]) def evaluate(num_classes, model, dataloader, desc): preds = [] labels = [] with torch.no_grad(): for input_nodes, output_nodes, blocks in tqdm.tqdm( dataloader, desc=desc ): x = blocks[0].srcdata["feat"] y = blocks[-1].dstdata["label"]["paper"][:, 0] y_hat = model(blocks, x) preds.append(y_hat.cpu()) labels.append(y.cpu()) preds = torch.cat(preds, 0) labels = torch.cat(labels, 0) acc = MF.accuracy( preds, labels, task="multiclass", num_classes=num_classes ) return acc def train(train_loader, val_loader, test_loader, num_classes, model): # loss function and optimizer loss_fcn = nn.CrossEntropyLoss() opt = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-4) # training loop for epoch in range(10): model.train() total_loss = 0 for it, (input_nodes, output_nodes, blocks) in enumerate( tqdm.tqdm(train_dataloader, desc="Train") ): x = blocks[0].srcdata["feat"] y = blocks[-1].dstdata["label"]["paper"][:, 0] y_hat = model(blocks, x) loss = loss_fcn(y_hat, y) opt.zero_grad() loss.backward() opt.step() total_loss += loss.item() model.eval() val_acc = evaluate(num_classes, model, val_dataloader, "Val. ") test_acc = evaluate(num_classes, model, test_dataloader, "Test ") print( f"Epoch {epoch:05d} | Loss {total_loss/(it+1):.4f} | Validation Acc. {val_acc.item():.4f} | Test Acc. {test_acc.item():.4f}" ) if __name__ == "__main__": print( f"Training with DGL built-in HeteroGraphConv using GATConv as its convolution sub-modules" ) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # load and preprocess dataset print("Loading data") dataset = DglNodePropPredDataset("ogbn-mag") graph, labels = dataset[0] graph.ndata["label"] = labels # add reverse edges in "cites" relation, and add reverse edge types for the rest etypes graph = dgl.AddReverse()(graph) # precompute the author, topic, and institution features graph.update_all( fn.copy_u("feat", "m"), fn.mean("m", "feat"), etype="rev_writes" ) graph.update_all( fn.copy_u("feat", "m"), fn.mean("m", "feat"), etype="has_topic" ) graph.update_all( fn.copy_u("feat", "m"), fn.mean("m", "feat"), etype="affiliated_with" ) # find train/val/test indexes split_idx = dataset.get_idx_split() train_idx, val_idx, test_idx = ( split_idx["train"], split_idx["valid"], split_idx["test"], ) train_idx = apply_each(train_idx, lambda x: x.to(device)) val_idx = apply_each(val_idx, lambda x: x.to(device)) test_idx = apply_each(test_idx, lambda x: x.to(device)) # create RGAT model in_size = graph.ndata["feat"]["paper"].shape[1] num_classes = dataset.num_classes model = HeteroGAT(graph.etypes, in_size, 256, num_classes).to(device) # dataloader + model training + testing train_sampler = NeighborSampler( [5, 5, 5], prefetch_node_feats={k: ["feat"] for k in graph.ntypes}, prefetch_labels={"paper": ["label"]}, ) val_sampler = NeighborSampler( [10, 10, 10], prefetch_node_feats={k: ["feat"] for k in graph.ntypes}, prefetch_labels={"paper": ["label"]}, ) train_dataloader = DataLoader( graph, train_idx, train_sampler, device=device, batch_size=1000, shuffle=True, drop_last=False, num_workers=0, use_uva=torch.cuda.is_available(), ) val_dataloader = DataLoader( graph, val_idx, val_sampler, device=device, batch_size=1000, shuffle=False, drop_last=False, num_workers=0, use_uva=torch.cuda.is_available(), ) test_dataloader = DataLoader( graph, test_idx, val_sampler, device=device, batch_size=1000, shuffle=False, drop_last=False, num_workers=0, use_uva=torch.cuda.is_available(), ) train(train_dataloader, val_dataloader, test_dataloader, num_classes, model) ================================================ FILE: examples/pytorch/rgcn/README.md ================================================ # Relational-GCN * Paper: [Modeling Relational Data with Graph Convolutional Networks](https://arxiv.org/abs/1703.06103) * Author's code for entity classification: [https://github.com/tkipf/relational-gcn](https://github.com/tkipf/relational-gcn) * Author's code for link prediction: [https://github.com/MichSchli/RelationPrediction](https://github.com/MichSchli/RelationPrediction) ### Dependencies - rdflib - torchmetrics 0.11.4 Install as follows: ```bash pip install rdflib pip install torchmetrics==0.11.4 ``` How to run ------- ### Entity Classification Run with the following for entity classification (available datasets: aifb (default), mutag, bgs, and am) ```bash python3 entity.py --dataset aifb ``` For mini-batch training, run with the following (available datasets are the same as above) ```bash python3 entity_sample.py --dataset aifb ``` For multi-gpu training (with sampling), run with the following (same datasets and GPU IDs separated by comma) ```bash python3 entity_sample_multi_gpu.py --dataset aifb --gpu 0,1 ``` ### Link Prediction Run with the following for link prediction on dataset FB15k-237 with filtered-MRR ```bash python link.py ``` > **_NOTE:_** By default, we use uniform edge sampling instead of neighbor-based edge sampling as in [author's code](https://github.com/MichSchli/RelationPrediction). In practice, we find that it can achieve similar MRR. Summary ------- ### Entity Classification | Dataset | Full-graph | Mini-batch | ------------- | ------- | ------ | aifb | ~0.85 | ~0.82 | mutag | ~0.70 | ~0.50 | bgs | ~0.86 | ~0.64 | am | ~0.78 | ~0.42 ### Link Prediction | Dataset | Best MRR | ------------- | ------- | FB15k-237 | ~0.2397 ================================================ FILE: examples/pytorch/rgcn/entity.py ================================================ import argparse import dgl import torch import torch.nn as nn import torch.nn.functional as F from dgl.data.rdf import AIFBDataset, AMDataset, BGSDataset, MUTAGDataset from dgl.nn.pytorch import RelGraphConv from torchmetrics.functional import accuracy class RGCN(nn.Module): def __init__(self, num_nodes, h_dim, out_dim, num_rels): super().__init__() self.emb = nn.Embedding(num_nodes, h_dim) # two-layer RGCN self.conv1 = RelGraphConv( h_dim, h_dim, num_rels, regularizer="basis", num_bases=num_rels, self_loop=False, ) self.conv2 = RelGraphConv( h_dim, out_dim, num_rels, regularizer="basis", num_bases=num_rels, self_loop=False, ) def forward(self, g): x = self.emb.weight h = F.relu(self.conv1(g, x, g.edata[dgl.ETYPE], g.edata["norm"])) h = self.conv2(g, h, g.edata[dgl.ETYPE], g.edata["norm"]) return h def evaluate(g, target_idx, labels, num_classes, test_mask, model): test_idx = torch.nonzero(test_mask, as_tuple=False).squeeze() model.eval() with torch.no_grad(): logits = model(g) logits = logits[target_idx] return accuracy( logits[test_idx].argmax(dim=1), labels[test_idx], task="multiclass", num_classes=num_classes, ).item() def train(g, target_idx, labels, num_classes, train_mask, model): # define train idx, loss function and optimizer train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze() loss_fcn = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4) model.train() for epoch in range(50): logits = model(g) logits = logits[target_idx] loss = loss_fcn(logits[train_idx], labels[train_idx]) optimizer.zero_grad() loss.backward() optimizer.step() acc = accuracy( logits[train_idx].argmax(dim=1), labels[train_idx], task="multiclass", num_classes=num_classes, ).item() print( "Epoch {:05d} | Loss {:.4f} | Train Accuracy {:.4f} ".format( epoch, loss.item(), acc ) ) if __name__ == "__main__": parser = argparse.ArgumentParser( description="RGCN for entity classification" ) parser.add_argument( "--dataset", type=str, default="aifb", help="Dataset name ('aifb', 'mutag', 'bgs', 'am').", ) args = parser.parse_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Training with DGL built-in RGCN module.") # load and preprocess dataset if args.dataset == "aifb": data = AIFBDataset() elif args.dataset == "mutag": data = MUTAGDataset() elif args.dataset == "bgs": data = BGSDataset() elif args.dataset == "am": data = AMDataset() else: raise ValueError("Unknown dataset: {}".format(args.dataset)) g = data[0] g = g.int().to(device) num_rels = len(g.canonical_etypes) category = data.predict_category labels = g.nodes[category].data.pop("labels") train_mask = g.nodes[category].data.pop("train_mask") test_mask = g.nodes[category].data.pop("test_mask") # calculate normalization weight for each edge, and find target category and node id for cetype in g.canonical_etypes: g.edges[cetype].data["norm"] = dgl.norm_by_dst(g, cetype).unsqueeze(1) category_id = g.ntypes.index(category) g = dgl.to_homogeneous(g, edata=["norm"]) node_ids = torch.arange(g.num_nodes()).to(device) target_idx = node_ids[g.ndata[dgl.NTYPE] == category_id] # create RGCN model in_size = g.num_nodes() # featureless with one-hot encoding num_classes = data.num_classes model = RGCN(in_size, 16, num_classes, num_rels).to(device) train(g, target_idx, labels, num_classes, train_mask, model) acc = evaluate(g, target_idx, labels, num_classes, test_mask, model) print("Test accuracy {:.4f}".format(acc)) ================================================ FILE: examples/pytorch/rgcn/entity_sample.py ================================================ import argparse import dgl import torch import torch.nn as nn import torch.nn.functional as F from dgl.data.rdf import AIFBDataset, AMDataset, BGSDataset, MUTAGDataset from dgl.dataloading import DataLoader, MultiLayerNeighborSampler from dgl.nn.pytorch import RelGraphConv from torchmetrics.functional import accuracy class RGCN(nn.Module): def __init__(self, num_nodes, h_dim, out_dim, num_rels): super().__init__() self.emb = nn.Embedding(num_nodes, h_dim) # two-layer RGCN self.conv1 = RelGraphConv( h_dim, h_dim, num_rels, regularizer="basis", num_bases=num_rels, self_loop=False, ) self.conv2 = RelGraphConv( h_dim, out_dim, num_rels, regularizer="basis", num_bases=num_rels, self_loop=False, ) def forward(self, g): x = self.emb(g[0].srcdata[dgl.NID]) h = F.relu( self.conv1(g[0], x, g[0].edata[dgl.ETYPE], g[0].edata["norm"]) ) h = self.conv2(g[1], h, g[1].edata[dgl.ETYPE], g[1].edata["norm"]) return h def evaluate(model, labels, num_classes, dataloader, inv_target): model.eval() eval_logits = [] eval_seeds = [] with torch.no_grad(): for input_nodes, output_nodes, blocks in dataloader: output_nodes = inv_target[output_nodes] for block in blocks: block.edata["norm"] = dgl.norm_by_dst(block).unsqueeze(1) logits = model(blocks) eval_logits.append(logits.cpu().detach()) eval_seeds.append(output_nodes.cpu().detach()) eval_logits = torch.cat(eval_logits) eval_seeds = torch.cat(eval_seeds) return accuracy( eval_logits.argmax(dim=1), labels[eval_seeds].cpu(), task="multiclass", num_classes=num_classes, ).item() def train(device, g, target_idx, labels, train_mask, num_classes, model): # define train idx, loss function and optimizer train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze() loss_fcn = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4) # construct sampler and dataloader sampler = MultiLayerNeighborSampler([4, 4]) train_loader = DataLoader( g, target_idx[train_idx], sampler, device=device, batch_size=100, shuffle=True, ) # no separate validation subset, use train index instead for validation val_loader = DataLoader( g, target_idx[train_idx], sampler, device=device, batch_size=100, shuffle=False, ) for epoch in range(50): model.train() total_loss = 0 for it, (input_nodes, output_nodes, blocks) in enumerate(train_loader): output_nodes = inv_target[output_nodes] for block in blocks: block.edata["norm"] = dgl.norm_by_dst(block).unsqueeze(1) logits = model(blocks) loss = loss_fcn(logits, labels[output_nodes]) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.item() acc = evaluate(model, labels, num_classes, val_loader, inv_target) print( "Epoch {:05d} | Loss {:.4f} | Val. Accuracy {:.4f} ".format( epoch, total_loss / (it + 1), acc ) ) if __name__ == "__main__": parser = argparse.ArgumentParser( description="RGCN for entity classification with sampling" ) parser.add_argument( "--dataset", type=str, default="aifb", help="Dataset name ('aifb', 'mutag', 'bgs', 'am').", ) args = parser.parse_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Training with DGL built-in RGCN module with sampling.") # load and preprocess dataset if args.dataset == "aifb": data = AIFBDataset() elif args.dataset == "mutag": data = MUTAGDataset() elif args.dataset == "bgs": data = BGSDataset() elif args.dataset == "am": data = AMDataset() else: raise ValueError("Unknown dataset: {}".format(args.dataset)) g = data[0] num_rels = len(g.canonical_etypes) category = data.predict_category labels = g.nodes[category].data.pop("labels").to(device) train_mask = g.nodes[category].data.pop("train_mask") test_mask = g.nodes[category].data.pop("test_mask") # find target category and node id category_id = g.ntypes.index(category) g = dgl.to_homogeneous(g) node_ids = torch.arange(g.num_nodes()) target_idx = node_ids[g.ndata[dgl.NTYPE] == category_id] # rename the fields as they can be changed by DataLoader g.ndata["ntype"] = g.ndata.pop(dgl.NTYPE) g.ndata["type_id"] = g.ndata.pop(dgl.NID) # find the mapping (inv_target) from global node IDs to type-specific node IDs inv_target = torch.empty((g.num_nodes(),), dtype=torch.int64).to(device) inv_target[target_idx] = torch.arange( 0, target_idx.shape[0], dtype=inv_target.dtype ).to(device) # create RGCN model in_size = g.num_nodes() # featureless with one-hot encoding num_classes = data.num_classes model = RGCN(in_size, 16, num_classes, num_rels).to(device) train(device, g, target_idx, labels, train_mask, num_classes, model) test_idx = torch.nonzero(test_mask, as_tuple=False).squeeze() test_sampler = MultiLayerNeighborSampler( [-1, -1] ) # -1 for sampling all neighbors test_loader = DataLoader( g, target_idx[test_idx], test_sampler, device=device, batch_size=32, shuffle=False, ) acc = evaluate(model, labels, num_classes, test_loader, inv_target) print("Test accuracy {:.4f}".format(acc)) ================================================ FILE: examples/pytorch/rgcn/entity_sample_multi_gpu.py ================================================ import argparse import os import dgl import torch import torch.distributed as dist import torch.multiprocessing as mp import torch.nn as nn import torch.nn.functional as F from dgl.data.rdf import AIFBDataset, AMDataset, BGSDataset, MUTAGDataset from dgl.dataloading import DataLoader, MultiLayerNeighborSampler from dgl.nn.pytorch import RelGraphConv from torch.nn.parallel import DistributedDataParallel from torchmetrics.functional import accuracy class RGCN(nn.Module): def __init__(self, num_nodes, h_dim, out_dim, num_rels): super().__init__() self.emb = nn.Embedding(num_nodes, h_dim) # two-layer RGCN self.conv1 = RelGraphConv( h_dim, h_dim, num_rels, regularizer="basis", num_bases=num_rels, self_loop=False, ) self.conv2 = RelGraphConv( h_dim, out_dim, num_rels, regularizer="basis", num_bases=num_rels, self_loop=False, ) def forward(self, g): x = self.emb(g[0].srcdata[dgl.NID]) h = F.relu( self.conv1(g[0], x, g[0].edata[dgl.ETYPE], g[0].edata["norm"]) ) h = self.conv2(g[1], h, g[1].edata[dgl.ETYPE], g[1].edata["norm"]) return h def evaluate(model, labels, num_classes, dataloader, inv_target): model.eval() eval_logits = [] eval_seeds = [] with torch.no_grad(): for input_nodes, output_nodes, blocks in dataloader: output_nodes = inv_target[output_nodes] for block in blocks: block.edata["norm"] = dgl.norm_by_dst(block).unsqueeze(1) logits = model(blocks) eval_logits.append(logits.cpu().detach()) eval_seeds.append(output_nodes.cpu().detach()) eval_logits = torch.cat(eval_logits) eval_seeds = torch.cat(eval_seeds) num_seeds = len(eval_seeds) loc_sum = accuracy( eval_logits.argmax(dim=1), labels[eval_seeds].cpu(), task="multiclass", num_classes=num_classes, ) * float(num_seeds) return torch.tensor([loc_sum.item(), float(num_seeds)]) def train( proc_id, device, g, target_idx, labels, num_classes, train_idx, inv_target, model, ): # define loss function and optimizer loss_fcn = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4) # construct sampler and dataloader sampler = MultiLayerNeighborSampler([4, 4]) train_loader = DataLoader( g, target_idx[train_idx], sampler, device=device, batch_size=100, shuffle=True, use_ddp=True, ) # no separate validation subset, use train index instead for validation val_loader = DataLoader( g, target_idx[train_idx], sampler, device=device, batch_size=100, shuffle=False, use_ddp=True, ) for epoch in range(50): model.train() total_loss = 0 for it, (input_nodes, output_nodes, blocks) in enumerate(train_loader): output_nodes = inv_target[output_nodes] for block in blocks: block.edata["norm"] = dgl.norm_by_dst(block).unsqueeze(1) logits = model(blocks) loss = loss_fcn(logits, labels[output_nodes]) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.item() # torchmetric accuracy defined as num_correct_labels / num_train_nodes # loc_acc_split = [loc_accuracy * loc_num_train_nodes, loc_num_train_nodes] loc_acc_split = evaluate( model, labels, num_classes, val_loader, inv_target ).to(device) dist.reduce(loc_acc_split, 0) if proc_id == 0: acc = loc_acc_split[0] / loc_acc_split[1] print( "Epoch {:05d} | Loss {:.4f} | Val. Accuracy {:.4f} ".format( epoch, total_loss / (it + 1), acc.item() ) ) def run(proc_id, nprocs, devices, g, data): # find corresponding device for my rank device = devices[proc_id] torch.cuda.set_device(device) # initialize process group and unpack data for sub-processes dist.init_process_group( backend="nccl", init_method="tcp://127.0.0.1:12345", world_size=nprocs, rank=proc_id, ) ( num_rels, num_classes, labels, train_idx, test_idx, target_idx, inv_target, ) = data labels = labels.to(device) inv_target = inv_target.to(device) # create RGCN model (distributed) in_size = g.num_nodes() model = RGCN(in_size, 16, num_classes, num_rels).to(device) model = DistributedDataParallel( model, device_ids=[device], output_device=device ) # training + testing train( proc_id, device, g, target_idx, labels, num_classes, train_idx, inv_target, model, ) test_sampler = MultiLayerNeighborSampler( [-1, -1] ) # -1 for sampling all neighbors test_loader = DataLoader( g, target_idx[test_idx], test_sampler, device=device, batch_size=32, shuffle=False, use_ddp=True, ) loc_acc_split = evaluate( model, labels, num_classes, test_loader, inv_target ).to(device) dist.reduce(loc_acc_split, 0) if proc_id == 0: acc = loc_acc_split[0] / loc_acc_split[1] print("Test accuracy {:.4f}".format(acc)) # cleanup process group dist.destroy_process_group() if __name__ == "__main__": parser = argparse.ArgumentParser( description="RGCN for entity classification with sampling (multi-gpu)" ) parser.add_argument( "--dataset", type=str, default="aifb", help="Dataset name ('aifb', 'mutag', 'bgs', 'am').", ) parser.add_argument( "--gpu", type=str, default="0", help="GPU(s) in use. Can be a list of gpu ids for multi-gpu training," " e.g., 0,1,2,3.", ) args = parser.parse_args() devices = list(map(int, args.gpu.split(","))) nprocs = len(devices) print( f"Training with DGL built-in RGCN module with sampling using", nprocs, f"GPU(s)", ) # load and preprocess dataset at master(parent) process if args.dataset == "aifb": data = AIFBDataset() elif args.dataset == "mutag": data = MUTAGDataset() elif args.dataset == "bgs": data = BGSDataset() elif args.dataset == "am": data = AMDataset() else: raise ValueError("Unknown dataset: {}".format(args.dataset)) g = data[0] num_rels = len(g.canonical_etypes) category = data.predict_category labels = g.nodes[category].data.pop("labels") train_mask = g.nodes[category].data.pop("train_mask") test_mask = g.nodes[category].data.pop("test_mask") # find target category and node id category_id = g.ntypes.index(category) g = dgl.to_homogeneous(g) node_ids = torch.arange(g.num_nodes()) target_idx = node_ids[g.ndata[dgl.NTYPE] == category_id] # rename the fields as they can be changed by DataLoader g.ndata["ntype"] = g.ndata.pop(dgl.NTYPE) g.ndata["type_id"] = g.ndata.pop(dgl.NID) # find the mapping (inv_target) from global node IDs to type-specific node IDs inv_target = torch.empty((g.num_nodes(),), dtype=torch.int64) inv_target[target_idx] = torch.arange( 0, target_idx.shape[0], dtype=inv_target.dtype ) # avoid creating certain graph formats and train/test indexes in each sub-process to save momory g.create_formats_() train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze() test_idx = torch.nonzero(test_mask, as_tuple=False).squeeze() # thread limiting to avoid resource competition os.environ["OMP_NUM_THREADS"] = str(mp.cpu_count() // 2 // nprocs) data = ( num_rels, data.num_classes, labels, train_idx, test_idx, target_idx, inv_target, ) mp.spawn(run, args=(nprocs, devices, g, data), nprocs=nprocs) ================================================ FILE: examples/pytorch/rgcn/entity_utils.py ================================================ import dgl import torch as th from dgl.data.rdf import AIFBDataset, AMDataset, BGSDataset, MUTAGDataset def load_data(data_name, get_norm=False, inv_target=False): if data_name == "aifb": dataset = AIFBDataset() elif data_name == "mutag": dataset = MUTAGDataset() elif data_name == "bgs": dataset = BGSDataset() else: dataset = AMDataset() # Load hetero-graph hg = dataset[0] num_rels = len(hg.canonical_etypes) category = dataset.predict_category num_classes = dataset.num_classes labels = hg.nodes[category].data.pop("labels") train_mask = hg.nodes[category].data.pop("train_mask") test_mask = hg.nodes[category].data.pop("test_mask") train_idx = th.nonzero(train_mask, as_tuple=False).squeeze() test_idx = th.nonzero(test_mask, as_tuple=False).squeeze() if get_norm: # Calculate normalization weight for each edge, # 1. / d, d is the degree of the destination node for cetype in hg.canonical_etypes: hg.edges[cetype].data["norm"] = dgl.norm_by_dst( hg, cetype ).unsqueeze(1) edata = ["norm"] else: edata = None # get target category id category_id = hg.ntypes.index(category) g = dgl.to_homogeneous(hg, edata=edata) # Rename the fields as they can be changed by for example DataLoader g.ndata["ntype"] = g.ndata.pop(dgl.NTYPE) g.ndata["type_id"] = g.ndata.pop(dgl.NID) node_ids = th.arange(g.num_nodes()) # find out the target node ids in g loc = g.ndata["ntype"] == category_id target_idx = node_ids[loc] if inv_target: # Map global node IDs to type-specific node IDs. This is required for # looking up type-specific labels in a minibatch inv_target = th.empty((g.num_nodes(),), dtype=th.int64) inv_target[target_idx] = th.arange( 0, target_idx.shape[0], dtype=inv_target.dtype ) return ( g, num_rels, num_classes, labels, train_idx, test_idx, target_idx, inv_target, ) else: return g, num_rels, num_classes, labels, train_idx, test_idx, target_idx ================================================ FILE: examples/pytorch/rgcn/experimental/README.md ================================================ ## Distributed training This is an example of training RGCN node classification in a distributed fashion. Currently, the example train RGCN graphs with input node features. The current implementation follows ../rgcn/entity_claasify_mp.py. Before training, install python libs by pip: ```bash pip3 install ogb pyarrow ``` To train RGCN, it has four steps: ### Step 0: Setup a Distributed File System * You may skip this step if your cluster already has folder(s) synchronized across machines. To perform distributed training, files and codes need to be accessed across multiple machines. A distributed file system would perfectly handle the job (i.e., NFS, Ceph). #### Server side setup Here is an example of how to setup NFS. First, install essential libs on the storage server ```bash sudo apt-get install nfs-kernel-server ``` Below we assume the user account is `ubuntu` and we create a directory of `workspace` in the home directory. ```bash mkdir -p /home/ubuntu/workspace ``` We assume that the all servers are under a subnet with ip range `192.168.0.0` to `192.168.255.255`. The exports configuration needs to be modifed to ```bash sudo vim /etc/exports # add the following line /home/ubuntu/workspace 192.168.0.0/16(rw,sync,no_subtree_check) ``` The server's internal ip can be checked via `ifconfig` or `ip`. If the ip does not begin with `192.168`, then you may use ```bash # for ip range 10.0.0.0 – 10.255.255.255 /home/ubuntu/workspace 10.0.0.0/8(rw,sync,no_subtree_check) # for ip range 172.16.0.0 – 172.31.255.255 /home/ubuntu/workspace 172.16.0.0/12(rw,sync,no_subtree_check) ``` Then restart NFS, the setup on server side is finished. ``` sudo systemctl restart nfs-kernel-server ``` For configraution details, please refer to [NFS ArchWiki](https://wiki.archlinux.org/index.php/NFS). #### Client side setup To use NFS, clients also require to install essential packages ``` sudo apt-get install nfs-common ``` You can either mount the NFS manually ``` mkdir -p /home/ubuntu/workspace sudo mount -t nfs :/home/ubuntu/workspace /home/ubuntu/workspace ``` or edit the fstab so the folder will be mounted automatically ``` # vim /etc/fstab ## append the following line to the file :/home/ubuntu/workspace /home/ubuntu/workspace nfs defaults 0 0 ``` Then run `mount -a`. Now go to `/home/ubuntu/workspace` and clone the DGL Github repository. ### Step 1: set IP configuration file. User need to set their own IP configuration file `ip_config.txt` before training. For example, if we have four machines in current cluster, the IP configuration could like this: ```bash 172.31.0.1 172.31.0.2 172.31.0.3 172.31.0.4 ``` Users need to make sure that the master node (node-0) has right permission to ssh to all the other nodes without password authentication. [This link](https://linuxize.com/post/how-to-setup-passwordless-ssh-login/) provides instructions of setting passwordless SSH login. ### Step 2: partition the graph. The example provides a script to partition some builtin graphs such as ogbn-mag graph. If we want to train RGCN on 4 machines, we need to partition the graph into 4 parts. In this example, we partition the ogbn-mag graph into 4 parts with Metis. The partitions are balanced with respect to the number of nodes, the number of edges and the number of labelled nodes. ```bash python3 partition_graph.py --dataset ogbn-mag --num_parts 4 --balance_train --balance_edges ``` ### Step 3: Launch distributed jobs DGL provides a script to launch the training job in the cluster. `part_config` and `ip_config` specify relative paths to the path of the workspace. The command below launches one training process on each machine and each training process has 4 sampling processes. ```bash python3 ~/workspace/dgl/tools/launch.py \ --workspace ~/workspace/dgl/examples/pytorch/rgcn/experimental/ \ --num_trainers 1 \ --num_servers 1 \ --num_samplers 4 \ --part_config data/ogbn-mag.json \ --ip_config ip_config.txt \ "python3 entity_classify_dist.py --graph-name ogbn-mag --dataset ogbn-mag --fanout='25,25' --batch-size 1024 --n-hidden 64 --lr 0.01 --eval-batch-size 1024 --low-mem --dropout 0.5 --use-self-loop --n-bases 2 --n-epochs 3 --layer-norm --ip-config ip_config.txt --sparse-embedding --sparse-lr 0.06 --num_gpus 1" ``` We can get the performance score at the second epoch: ``` Val Acc 0.4323, Test Acc 0.4255, time: 128.0379 ``` The command below launches the same distributed training job using dgl distributed DistEmbedding ```bash python3 ~/workspace/dgl/tools/launch.py \ --workspace ~/workspace/dgl/examples/pytorch/rgcn/experimental/ \ --num_trainers 1 \ --num_servers 1 \ --num_samplers 4 \ --part_config data/ogbn-mag.json \ --ip_config ip_config.txt \ "python3 entity_classify_dist.py --graph-name ogbn-mag --dataset ogbn-mag --fanout='25,25' --batch-size 1024 --n-hidden 64 --lr 0.01 --eval-batch-size 1024 --low-mem --dropout 0.5 --use-self-loop --n-bases 2 --n-epochs 3 --layer-norm --ip-config ip_config.txt --sparse-embedding --sparse-lr 0.06 --num_gpus 1 --dgl-sparse" ``` We can get the performance score at the second epoch: ``` Val Acc 0.4410, Test Acc 0.4282, time: 32.5274 ``` **Note:** if you are using conda or other virtual environments on the remote machines, you need to replace `python3` in the command string (i.e. the last argument) with the path to the Python interpreter in that environment. ## Partition a graph with ParMETIS It has four steps to partition a graph with ParMETIS for DGL's distributed training. More details about the four steps are explained in our [user guide](https://doc.dgl.ai/guide/distributed-preprocessing.html). ### Step 1: write the graph into files. The graph structure should be written as a node file and an edge file. The node features and edge features can be written as DGL tensors. `write_mag.py` shows an example of writing the OGB MAG graph into files. As `pm_dglpart` cannot handle self-loops and duplicate edges correctly, these edges are removed and stored into `mag_removed_edges.txt` when calling `write_mag.py`. When converting ParMETIS outputs into DGLGraph in next steps, `mag_removed_edges.txt` should be passed in. Refer to Step 3 for more details. ```bash python3 write_mag.py ``` ### Step 2: partition the graph with ParMETIS Run the program called `pm_dglpart` in ParMETIS to read the node file and the edge file output in Step 1 to partition the graph. ```bash pm_dglpart mag 2 ``` This partitions the graph into two parts with a single process. ``` mpirun -np 4 pm_dglpart mag 2 ``` This partitions the graph into eight parts with four processes. ``` mpirun --hostfile hostfile -np 4 pm_dglpart mag 2 ``` This partitions the graph into eight parts with four processes on multiple machines. `hostfile` specifies the IPs of the machines; one line for a machine. The input files should reside in the machine where the command line runs. Each process will write the partitions to files in the local machine. For simplicity, we recommend users to write the files on NFS. ### Step 3: Convert the ParMETIS partitions into DGLGraph DGL provides a tool called `convert_partition.py` to load one partition at a time and convert it into a DGLGraph and save it into a file. As mentioned in Step 1, please pass `mag_removed_edges.txt` if any self-loops and duplicate edges are removed. ```bash python3 ~/workspace/dgl/tools/convert_partition.py --input-dir . --graph-name mag --schema mag.json --num-parts 2 --num-node-weights 4 --output outputs --removed-edges mag_removed_edges.txt ``` ### Step 4: Read node data and edge data for each partition This shows an example of reading node data and edge data of each partition and saving them into files located in the same directory as the DGLGraph file. ```bash python3 get_mag_data.py ``` ### Step 5: Verify the partition result (Optional) ```bash python3 verify_mag_partitions.py ``` ## Distributed code runs in the standalone mode The standalone mode is mainly used for development and testing. The procedure to run the code is much simpler. ### Step 1: graph construction. When testing the standalone mode of the training script, we should construct a graph with one partition. ```bash python3 partition_graph.py --dataset ogbn-mag --num_parts 1 ``` ### Step 2: run the training script ```bash DGL_DIST_MODE=standalone python3 entity_classify_dist.py --graph-name ogbn-mag --dataset ogbn-mag --fanout='25,25' --batch-size 512 --n-hidden 64 --lr 0.01 --eval-batch-size 128 --low-mem --dropout 0.5 --use-self-loop --n-bases 2 --n-epochs 3 --layer-norm --ip-config ip_config.txt --conf-path 'data/ogbn-mag.json' --standalone --sparse-embedding --sparse-lr 0.06 ``` ================================================ FILE: examples/pytorch/rgcn/experimental/entity_classify_dist.py ================================================ """ Modeling Relational Data with Graph Convolutional Networks Paper: https://arxiv.org/abs/1703.06103 Code: https://github.com/tkipf/relational-gcn Difference compared to tkipf/relation-gcn * l2norm applied to all weights * remove nodes that won't be touched """ import argparse import gc, os import itertools import time import numpy as np os.environ["DGLBACKEND"] = "pytorch" from functools import partial import dgl import dgl.distributed import torch as th import torch.multiprocessing as mp import torch.nn as nn import torch.nn.functional as F import tqdm from dgl import DGLGraph, nn as dglnn from dgl.distributed import DistDataLoader from ogb.nodeproppred import DglNodePropPredDataset from torch.multiprocessing import Queue from torch.nn.parallel import DistributedDataParallel from torch.utils.data import DataLoader class RelGraphConvLayer(nn.Module): r"""Relational graph convolution layer. Parameters ---------- in_feat : int Input feature size. out_feat : int Output feature size. rel_names : list[str] Relation names. num_bases : int, optional Number of bases. If is none, use number of relations. Default: None. weight : bool, optional True if a linear layer is applied after message passing. Default: True bias : bool, optional True if bias is added. Default: True activation : callable, optional Activation function. Default: None self_loop : bool, optional True to include self loop message. Default: False dropout : float, optional Dropout rate. Default: 0.0 """ def __init__( self, in_feat, out_feat, rel_names, num_bases, *, weight=True, bias=True, activation=None, self_loop=False, dropout=0.0 ): super(RelGraphConvLayer, self).__init__() self.in_feat = in_feat self.out_feat = out_feat self.rel_names = rel_names self.num_bases = num_bases self.bias = bias self.activation = activation self.self_loop = self_loop self.conv = dglnn.HeteroGraphConv( { rel: dglnn.GraphConv( in_feat, out_feat, norm="right", weight=False, bias=False ) for rel in rel_names } ) self.use_weight = weight self.use_basis = num_bases < len(self.rel_names) and weight if self.use_weight: if self.use_basis: self.basis = dglnn.WeightBasis( (in_feat, out_feat), num_bases, len(self.rel_names) ) else: self.weight = nn.Parameter( th.Tensor(len(self.rel_names), in_feat, out_feat) ) nn.init.xavier_uniform_( self.weight, gain=nn.init.calculate_gain("relu") ) # bias if bias: self.h_bias = nn.Parameter(th.Tensor(out_feat)) nn.init.zeros_(self.h_bias) # weight for self loop if self.self_loop: self.loop_weight = nn.Parameter(th.Tensor(in_feat, out_feat)) nn.init.xavier_uniform_( self.loop_weight, gain=nn.init.calculate_gain("relu") ) self.dropout = nn.Dropout(dropout) def forward(self, g, inputs): """Forward computation Parameters ---------- g : DGLGraph Input graph. inputs : dict[str, torch.Tensor] Node feature for each node type. Returns ------- dict[str, torch.Tensor] New node features for each node type. """ g = g.local_var() if self.use_weight: weight = self.basis() if self.use_basis else self.weight wdict = { self.rel_names[i]: {"weight": w.squeeze(0)} for i, w in enumerate(th.split(weight, 1, dim=0)) } else: wdict = {} if g.is_block: inputs_src = inputs inputs_dst = { k: v[: g.number_of_dst_nodes(k)] for k, v in inputs.items() } else: inputs_src = inputs_dst = inputs hs = self.conv(g, inputs, mod_kwargs=wdict) def _apply(ntype, h): if self.self_loop: h = h + th.matmul(inputs_dst[ntype], self.loop_weight) if self.bias: h = h + self.h_bias if self.activation: h = self.activation(h) return self.dropout(h) return {ntype: _apply(ntype, h) for ntype, h in hs.items()} class EntityClassify(nn.Module): """Entity classification class for RGCN Parameters ---------- device : int Device to run the layer. num_nodes : int Number of nodes. h_dim : int Hidden dim size. out_dim : int Output dim size. rel_names : list of str A list of relation names. num_bases : int Number of bases. If is none, use number of relations. num_hidden_layers : int Number of hidden RelGraphConv Layer dropout : float Dropout use_self_loop : bool Use self loop if True, default False. """ def __init__( self, device, h_dim, out_dim, rel_names, num_bases=None, num_hidden_layers=1, dropout=0, use_self_loop=False, layer_norm=False, ): super(EntityClassify, self).__init__() self.device = device self.h_dim = h_dim self.out_dim = out_dim self.num_bases = None if num_bases < 0 else num_bases self.num_hidden_layers = num_hidden_layers self.dropout = dropout self.use_self_loop = use_self_loop self.layer_norm = layer_norm self.layers = nn.ModuleList() # i2h self.layers.append( RelGraphConvLayer( self.h_dim, self.h_dim, rel_names, self.num_bases, activation=F.relu, self_loop=self.use_self_loop, dropout=self.dropout, ) ) # h2h for idx in range(self.num_hidden_layers): self.layers.append( RelGraphConvLayer( self.h_dim, self.h_dim, rel_names, self.num_bases, activation=F.relu, self_loop=self.use_self_loop, dropout=self.dropout, ) ) # h2o self.layers.append( RelGraphConvLayer( self.h_dim, self.out_dim, rel_names, self.num_bases, activation=None, self_loop=self.use_self_loop, ) ) def forward(self, blocks, feats, norm=None): if blocks is None: # full graph training blocks = [self.g] * len(self.layers) h = feats for layer, block in zip(self.layers, blocks): block = block.to(self.device) h = layer(block, h) return h def init_emb(shape, dtype): arr = th.zeros(shape, dtype=dtype) nn.init.uniform_(arr, -1.0, 1.0) return arr class DistEmbedLayer(nn.Module): r"""Embedding layer for featureless heterograph. Parameters ---------- dev_id : int Device to run the layer. g : DistGraph training graph embed_size : int Output embed size sparse_emb: bool Whether to use sparse embedding Default: False dgl_sparse_emb: bool Whether to use DGL sparse embedding Default: False embed_name : str, optional Embed name """ def __init__( self, dev_id, g, embed_size, sparse_emb=False, dgl_sparse_emb=False, feat_name="feat", embed_name="node_emb", ): super(DistEmbedLayer, self).__init__() self.dev_id = dev_id self.embed_size = embed_size self.embed_name = embed_name self.feat_name = feat_name self.sparse_emb = sparse_emb self.g = g self.ntype_id_map = {g.get_ntype_id(ntype): ntype for ntype in g.ntypes} self.node_projs = nn.ModuleDict() for ntype in g.ntypes: if feat_name in g.nodes[ntype].data: self.node_projs[ntype] = nn.Linear( g.nodes[ntype].data[feat_name].shape[1], embed_size ) nn.init.xavier_uniform_(self.node_projs[ntype].weight) print("node {} has data {}".format(ntype, feat_name)) if sparse_emb: if dgl_sparse_emb: self.node_embeds = {} for ntype in g.ntypes: # We only create embeddings for nodes without node features. if feat_name not in g.nodes[ntype].data: part_policy = g.get_node_partition_policy(ntype) self.node_embeds[ntype] = dgl.distributed.DistEmbedding( g.num_nodes(ntype), self.embed_size, embed_name + "_" + ntype, init_emb, part_policy, ) else: self.node_embeds = nn.ModuleDict() for ntype in g.ntypes: # We only create embeddings for nodes without node features. if feat_name not in g.nodes[ntype].data: self.node_embeds[ntype] = th.nn.Embedding( g.num_nodes(ntype), self.embed_size, sparse=self.sparse_emb, ) nn.init.uniform_( self.node_embeds[ntype].weight, -1.0, 1.0 ) else: self.node_embeds = nn.ModuleDict() for ntype in g.ntypes: # We only create embeddings for nodes without node features. if feat_name not in g.nodes[ntype].data: self.node_embeds[ntype] = th.nn.Embedding( g.num_nodes(ntype), self.embed_size ) nn.init.uniform_(self.node_embeds[ntype].weight, -1.0, 1.0) def forward(self, node_ids): """Forward computation Parameters ---------- node_ids : dict of Tensor node ids to generate embedding for. Returns ------- tensor embeddings as the input of the next layer """ embeds = {} for ntype in node_ids: if self.feat_name in self.g.nodes[ntype].data: embeds[ntype] = self.node_projs[ntype]( self.g.nodes[ntype] .data[self.feat_name][node_ids[ntype]] .to(self.dev_id) ) else: embeds[ntype] = self.node_embeds[ntype](node_ids[ntype]).to( self.dev_id ) return embeds def compute_acc(results, labels): """ Compute the accuracy of prediction given the labels. """ labels = labels.long() return (results == labels).float().sum() / len(results) def evaluate( g, model, embed_layer, labels, eval_loader, test_loader, all_val_nid, all_test_nid, ): model.eval() embed_layer.eval() eval_logits = [] eval_seeds = [] global_results = dgl.distributed.DistTensor( labels.shape, th.long, "results", persistent=True ) with th.no_grad(): th.cuda.empty_cache() for sample_data in tqdm.tqdm(eval_loader): input_nodes, seeds, blocks = sample_data seeds = seeds["paper"] feats = embed_layer(input_nodes) logits = model(blocks, feats) assert len(logits) == 1 logits = logits["paper"] eval_logits.append(logits.cpu().detach()) assert np.all(seeds.numpy() < g.num_nodes("paper")) eval_seeds.append(seeds.cpu().detach()) eval_logits = th.cat(eval_logits) eval_seeds = th.cat(eval_seeds) global_results[eval_seeds] = eval_logits.argmax(dim=1) test_logits = [] test_seeds = [] with th.no_grad(): th.cuda.empty_cache() for sample_data in tqdm.tqdm(test_loader): input_nodes, seeds, blocks = sample_data seeds = seeds["paper"] feats = embed_layer(input_nodes) logits = model(blocks, feats) assert len(logits) == 1 logits = logits["paper"] test_logits.append(logits.cpu().detach()) assert np.all(seeds.numpy() < g.num_nodes("paper")) test_seeds.append(seeds.cpu().detach()) test_logits = th.cat(test_logits) test_seeds = th.cat(test_seeds) global_results[test_seeds] = test_logits.argmax(dim=1) g.barrier() if g.rank() == 0: return compute_acc( global_results[all_val_nid], labels[all_val_nid] ), compute_acc(global_results[all_test_nid], labels[all_test_nid]) else: return -1, -1 def run(args, device, data): ( g, num_classes, train_nid, val_nid, test_nid, labels, all_val_nid, all_test_nid, ) = data fanouts = [int(fanout) for fanout in args.fanout.split(",")] val_fanouts = [int(fanout) for fanout in args.validation_fanout.split(",")] sampler = dgl.dataloading.MultiLayerNeighborSampler(fanouts) dataloader = dgl.distributed.DistNodeDataLoader( g, {"paper": train_nid}, sampler, batch_size=args.batch_size, shuffle=True, drop_last=False, ) valid_sampler = dgl.dataloading.MultiLayerNeighborSampler(val_fanouts) valid_dataloader = dgl.distributed.DistNodeDataLoader( g, {"paper": val_nid}, valid_sampler, batch_size=args.batch_size, shuffle=False, drop_last=False, ) test_sampler = dgl.dataloading.MultiLayerNeighborSampler(val_fanouts) test_dataloader = dgl.distributed.DistNodeDataLoader( g, {"paper": test_nid}, test_sampler, batch_size=args.eval_batch_size, shuffle=False, drop_last=False, ) embed_layer = DistEmbedLayer( device, g, args.n_hidden, sparse_emb=args.sparse_embedding, dgl_sparse_emb=args.dgl_sparse, feat_name="feat", ) model = EntityClassify( device, args.n_hidden, num_classes, g.etypes, num_bases=args.n_bases, num_hidden_layers=args.n_layers - 2, dropout=args.dropout, use_self_loop=args.use_self_loop, layer_norm=args.layer_norm, ) model = model.to(device) if not args.standalone: if args.num_gpus == -1: model = DistributedDataParallel(model) # If there are dense parameters in the embedding layer # or we use Pytorch saprse embeddings. if len(embed_layer.node_projs) > 0 or not args.dgl_sparse: embed_layer = DistributedDataParallel(embed_layer) else: dev_id = g.rank() % args.num_gpus model = DistributedDataParallel( model, device_ids=[dev_id], output_device=dev_id ) # If there are dense parameters in the embedding layer # or we use Pytorch saprse embeddings. if len(embed_layer.node_projs) > 0 or not args.dgl_sparse: embed_layer = embed_layer.to(device) embed_layer = DistributedDataParallel( embed_layer, device_ids=[dev_id], output_device=dev_id ) if args.sparse_embedding: if args.dgl_sparse and args.standalone: emb_optimizer = dgl.distributed.optim.SparseAdam( list(embed_layer.node_embeds.values()), lr=args.sparse_lr ) print( "optimize DGL sparse embedding:", embed_layer.node_embeds.keys() ) elif args.dgl_sparse: emb_optimizer = dgl.distributed.optim.SparseAdam( list(embed_layer.module.node_embeds.values()), lr=args.sparse_lr ) print( "optimize DGL sparse embedding:", embed_layer.module.node_embeds.keys(), ) elif args.standalone: emb_optimizer = th.optim.SparseAdam( list(embed_layer.node_embeds.parameters()), lr=args.sparse_lr ) print("optimize Pytorch sparse embedding:", embed_layer.node_embeds) else: emb_optimizer = th.optim.SparseAdam( list(embed_layer.module.node_embeds.parameters()), lr=args.sparse_lr, ) print( "optimize Pytorch sparse embedding:", embed_layer.module.node_embeds, ) dense_params = list(model.parameters()) if args.standalone: dense_params += list(embed_layer.node_projs.parameters()) print("optimize dense projection:", embed_layer.node_projs) else: dense_params += list(embed_layer.module.node_projs.parameters()) print("optimize dense projection:", embed_layer.module.node_projs) optimizer = th.optim.Adam( dense_params, lr=args.lr, weight_decay=args.l2norm ) else: all_params = list(model.parameters()) + list(embed_layer.parameters()) optimizer = th.optim.Adam( all_params, lr=args.lr, weight_decay=args.l2norm ) # training loop print("start training...") for epoch in range(args.n_epochs): tic = time.time() sample_time = 0 copy_time = 0 forward_time = 0 backward_time = 0 update_time = 0 number_train = 0 number_input = 0 step_time = [] iter_t = [] sample_t = [] feat_copy_t = [] forward_t = [] backward_t = [] update_t = [] iter_tput = [] start = time.time() # Loop over the dataloader to sample the computation dependency graph as a list of # blocks. step_time = [] for step, sample_data in enumerate(dataloader): input_nodes, seeds, blocks = sample_data seeds = seeds["paper"] number_train += seeds.shape[0] number_input += np.sum( [blocks[0].num_src_nodes(ntype) for ntype in blocks[0].ntypes] ) tic_step = time.time() sample_time += tic_step - start sample_t.append(tic_step - start) feats = embed_layer(input_nodes) label = labels[seeds].to(device) copy_time = time.time() feat_copy_t.append(copy_time - tic_step) # forward logits = model(blocks, feats) assert len(logits) == 1 logits = logits["paper"] loss = F.cross_entropy(logits, label) forward_end = time.time() # backward optimizer.zero_grad() if args.sparse_embedding: emb_optimizer.zero_grad() loss.backward() compute_end = time.time() forward_t.append(forward_end - copy_time) backward_t.append(compute_end - forward_end) # Update model parameters optimizer.step() if args.sparse_embedding: emb_optimizer.step() update_t.append(time.time() - compute_end) step_t = time.time() - start step_time.append(step_t) train_acc = th.sum(logits.argmax(dim=1) == label).item() / len( seeds ) if step % args.log_every == 0: print( "[{}] Epoch {:05d} | Step {:05d} | Train acc {:.4f} | Loss {:.4f} | time {:.3f} s" "| sample {:.3f} | copy {:.3f} | forward {:.3f} | backward {:.3f} | update {:.3f}".format( g.rank(), epoch, step, train_acc, loss.item(), np.sum(step_time[-args.log_every :]), np.sum(sample_t[-args.log_every :]), np.sum(feat_copy_t[-args.log_every :]), np.sum(forward_t[-args.log_every :]), np.sum(backward_t[-args.log_every :]), np.sum(update_t[-args.log_every :]), ) ) start = time.time() gc.collect() print( "[{}]Epoch Time(s): {:.4f}, sample: {:.4f}, data copy: {:.4f}, forward: {:.4f}, backward: {:.4f}, update: {:.4f}, #train: {}, #input: {}".format( g.rank(), np.sum(step_time), np.sum(sample_t), np.sum(feat_copy_t), np.sum(forward_t), np.sum(backward_t), np.sum(update_t), number_train, number_input, ) ) epoch += 1 start = time.time() g.barrier() val_acc, test_acc = evaluate( g, model, embed_layer, labels, valid_dataloader, test_dataloader, all_val_nid, all_test_nid, ) if val_acc >= 0: print( "Val Acc {:.4f}, Test Acc {:.4f}, time: {:.4f}".format( val_acc, test_acc, time.time() - start ) ) def main(args): dgl.distributed.initialize(args.ip_config) if not args.standalone: th.distributed.init_process_group(backend="gloo") g = dgl.distributed.DistGraph(args.graph_name, part_config=args.conf_path) print("rank:", g.rank()) pb = g.get_partition_book() if "trainer_id" in g.nodes["paper"].data: train_nid = dgl.distributed.node_split( g.nodes["paper"].data["train_mask"], pb, ntype="paper", force_even=True, node_trainer_ids=g.nodes["paper"].data["trainer_id"], ) val_nid = dgl.distributed.node_split( g.nodes["paper"].data["val_mask"], pb, ntype="paper", force_even=True, node_trainer_ids=g.nodes["paper"].data["trainer_id"], ) test_nid = dgl.distributed.node_split( g.nodes["paper"].data["test_mask"], pb, ntype="paper", force_even=True, node_trainer_ids=g.nodes["paper"].data["trainer_id"], ) else: train_nid = dgl.distributed.node_split( g.nodes["paper"].data["train_mask"], pb, ntype="paper", force_even=True, ) val_nid = dgl.distributed.node_split( g.nodes["paper"].data["val_mask"], pb, ntype="paper", force_even=True, ) test_nid = dgl.distributed.node_split( g.nodes["paper"].data["test_mask"], pb, ntype="paper", force_even=True, ) local_nid = pb.partid2nids(pb.partid, "paper").detach().numpy() print( "part {}, train: {} (local: {}), val: {} (local: {}), test: {} (local: {})".format( g.rank(), len(train_nid), len(np.intersect1d(train_nid.numpy(), local_nid)), len(val_nid), len(np.intersect1d(val_nid.numpy(), local_nid)), len(test_nid), len(np.intersect1d(test_nid.numpy(), local_nid)), ) ) if args.num_gpus == -1: device = th.device("cpu") else: dev_id = g.rank() % args.num_gpus device = th.device("cuda:" + str(dev_id)) labels = g.nodes["paper"].data["labels"][np.arange(g.num_nodes("paper"))] all_val_nid = th.LongTensor( np.nonzero( g.nodes["paper"].data["val_mask"][np.arange(g.num_nodes("paper"))] ) ).squeeze() all_test_nid = th.LongTensor( np.nonzero( g.nodes["paper"].data["test_mask"][np.arange(g.num_nodes("paper"))] ) ).squeeze() n_classes = len(th.unique(labels[labels >= 0])) print("#classes:", n_classes) run( args, device, ( g, n_classes, train_nid, val_nid, test_nid, labels, all_val_nid, all_test_nid, ), ) if __name__ == "__main__": parser = argparse.ArgumentParser(description="RGCN") # distributed training related parser.add_argument("--graph-name", type=str, help="graph name") parser.add_argument("--id", type=int, help="the partition id") parser.add_argument( "--ip-config", type=str, help="The file for IP configuration" ) parser.add_argument( "--conf-path", type=str, help="The path to the partition config file" ) # rgcn related parser.add_argument( "--num_gpus", type=int, default=-1, help="the number of GPU device. Use -1 for CPU training", ) parser.add_argument( "--dropout", type=float, default=0, help="dropout probability" ) parser.add_argument( "--n-hidden", type=int, default=16, help="number of hidden units" ) parser.add_argument("--lr", type=float, default=1e-2, help="learning rate") parser.add_argument( "--sparse-lr", type=float, default=1e-2, help="sparse lr rate" ) parser.add_argument( "--n-bases", type=int, default=-1, help="number of filter weight matrices, default: -1 [use all]", ) parser.add_argument( "--n-layers", type=int, default=2, help="number of propagation rounds" ) parser.add_argument( "-e", "--n-epochs", type=int, default=50, help="number of training epochs", ) parser.add_argument( "-d", "--dataset", type=str, required=True, help="dataset to use" ) parser.add_argument("--l2norm", type=float, default=0, help="l2 norm coef") parser.add_argument( "--relabel", default=False, action="store_true", help="remove untouched nodes and relabel", ) parser.add_argument( "--fanout", type=str, default="4, 4", help="Fan-out of neighbor sampling.", ) parser.add_argument( "--validation-fanout", type=str, default=None, help="Fan-out of neighbor sampling during validation.", ) parser.add_argument( "--use-self-loop", default=False, action="store_true", help="include self feature as a special relation", ) parser.add_argument( "--batch-size", type=int, default=100, help="Mini-batch size. " ) parser.add_argument( "--eval-batch-size", type=int, default=128, help="Mini-batch size. " ) parser.add_argument("--log-every", type=int, default=20) parser.add_argument( "--low-mem", default=False, action="store_true", help="Whether use low mem RelGraphCov", ) parser.add_argument( "--sparse-embedding", action="store_true", help="Use sparse embedding for node embeddings.", ) parser.add_argument( "--dgl-sparse", action="store_true", help="Whether to use DGL sparse embedding", ) parser.add_argument( "--layer-norm", default=False, action="store_true", help="Use layer norm", ) parser.add_argument( "--local_rank", type=int, help="get rank of the process" ) parser.add_argument( "--standalone", action="store_true", help="run in the standalone mode" ) args = parser.parse_args() # if validation_fanout is None, set it with args.fanout if args.validation_fanout is None: args.validation_fanout = args.fanout print(args) main(args) ================================================ FILE: examples/pytorch/rgcn/experimental/get_mag_data.py ================================================ import json import dgl import numpy as np import torch as th from ogb.nodeproppred import DglNodePropPredDataset # Load OGB-MAG. dataset = DglNodePropPredDataset(name="ogbn-mag") hg_orig, labels = dataset[0] subgs = {} for etype in hg_orig.canonical_etypes: u, v = hg_orig.all_edges(etype=etype) subgs[etype] = (u, v) subgs[(etype[2], "rev-" + etype[1], etype[0])] = (v, u) hg = dgl.heterograph(subgs) hg.nodes["paper"].data["feat"] = hg_orig.nodes["paper"].data["feat"] split_idx = dataset.get_idx_split() train_idx = split_idx["train"]["paper"] val_idx = split_idx["valid"]["paper"] test_idx = split_idx["test"]["paper"] paper_labels = labels["paper"].squeeze() train_mask = th.zeros((hg.num_nodes("paper"),), dtype=th.bool) train_mask[train_idx] = True val_mask = th.zeros((hg.num_nodes("paper"),), dtype=th.bool) val_mask[val_idx] = True test_mask = th.zeros((hg.num_nodes("paper"),), dtype=th.bool) test_mask[test_idx] = True hg.nodes["paper"].data["train_mask"] = train_mask hg.nodes["paper"].data["val_mask"] = val_mask hg.nodes["paper"].data["test_mask"] = test_mask hg.nodes["paper"].data["labels"] = paper_labels with open("outputs/mag.json") as json_file: metadata = json.load(json_file) for part_id in range(metadata["num_parts"]): subg = dgl.load_graphs("outputs/part{}/graph.dgl".format(part_id))[0][0] node_data = {} for ntype in hg.ntypes: local_node_idx = th.logical_and( subg.ndata["inner_node"].bool(), subg.ndata[dgl.NTYPE] == hg.get_ntype_id(ntype), ) local_nodes = subg.ndata["orig_id"][local_node_idx].numpy() for name in hg.nodes[ntype].data: node_data[ntype + "/" + name] = hg.nodes[ntype].data[name][ local_nodes ] print("node features:", node_data.keys()) dgl.data.utils.save_tensors( "outputs/" + metadata["part-{}".format(part_id)]["node_feats"], node_data, ) edge_data = {} for etype in hg.etypes: local_edges = subg.edata["orig_id"][ subg.edata[dgl.ETYPE] == hg.get_etype_id(etype) ] for name in hg.edges[etype].data: edge_data[etype + "/" + name] = hg.edges[etype].data[name][ local_edges ] print("edge features:", edge_data.keys()) dgl.data.utils.save_tensors( "outputs/" + metadata["part-{}".format(part_id)]["edge_feats"], edge_data, ) ================================================ FILE: examples/pytorch/rgcn/experimental/partition_graph.py ================================================ import argparse import time import dgl import numpy as np import torch as th from ogb.nodeproppred import DglNodePropPredDataset def load_ogb(dataset): if dataset == "ogbn-mag": dataset = DglNodePropPredDataset(name=dataset) split_idx = dataset.get_idx_split() train_idx = split_idx["train"]["paper"] val_idx = split_idx["valid"]["paper"] test_idx = split_idx["test"]["paper"] hg_orig, labels = dataset[0] subgs = {} for etype in hg_orig.canonical_etypes: u, v = hg_orig.all_edges(etype=etype) subgs[etype] = (u, v) subgs[(etype[2], "rev-" + etype[1], etype[0])] = (v, u) hg = dgl.heterograph(subgs) hg.nodes["paper"].data["feat"] = hg_orig.nodes["paper"].data["feat"] paper_labels = labels["paper"].squeeze() num_rels = len(hg.canonical_etypes) num_of_ntype = len(hg.ntypes) num_classes = dataset.num_classes category = "paper" print("Number of relations: {}".format(num_rels)) print("Number of class: {}".format(num_classes)) print("Number of train: {}".format(len(train_idx))) print("Number of valid: {}".format(len(val_idx))) print("Number of test: {}".format(len(test_idx))) # get target category id category_id = len(hg.ntypes) for i, ntype in enumerate(hg.ntypes): if ntype == category: category_id = i train_mask = th.zeros((hg.num_nodes("paper"),), dtype=th.bool) train_mask[train_idx] = True val_mask = th.zeros((hg.num_nodes("paper"),), dtype=th.bool) val_mask[val_idx] = True test_mask = th.zeros((hg.num_nodes("paper"),), dtype=th.bool) test_mask[test_idx] = True hg.nodes["paper"].data["train_mask"] = train_mask hg.nodes["paper"].data["val_mask"] = val_mask hg.nodes["paper"].data["test_mask"] = test_mask hg.nodes["paper"].data["labels"] = paper_labels return hg else: raise ("Do not support other ogbn datasets.") if __name__ == "__main__": argparser = argparse.ArgumentParser("Partition builtin graphs") argparser.add_argument( "--dataset", type=str, default="ogbn-mag", help="datasets: ogbn-mag" ) argparser.add_argument( "--num_parts", type=int, default=4, help="number of partitions" ) argparser.add_argument( "--part_method", type=str, default="metis", help="the partition method" ) argparser.add_argument( "--balance_train", action="store_true", help="balance the training size in each partition.", ) argparser.add_argument( "--undirected", action="store_true", help="turn the graph into an undirected graph.", ) argparser.add_argument( "--balance_edges", action="store_true", help="balance the number of edges in each partition.", ) argparser.add_argument( "--num_trainers_per_machine", type=int, default=1, help="the number of trainers per machine. The trainer ids are stored\ in the node feature 'trainer_id'", ) argparser.add_argument( "--output", type=str, default="data", help="Output path of partitioned graph.", ) args = argparser.parse_args() start = time.time() g = load_ogb(args.dataset) print( "load {} takes {:.3f} seconds".format(args.dataset, time.time() - start) ) print("|V|={}, |E|={}".format(g.num_nodes(), g.num_edges())) print( "train: {}, valid: {}, test: {}".format( th.sum(g.nodes["paper"].data["train_mask"]), th.sum(g.nodes["paper"].data["val_mask"]), th.sum(g.nodes["paper"].data["test_mask"]), ) ) if args.balance_train: balance_ntypes = {"paper": g.nodes["paper"].data["train_mask"]} else: balance_ntypes = None dgl.distributed.partition_graph( g, args.dataset, args.num_parts, args.output, part_method=args.part_method, balance_ntypes=balance_ntypes, balance_edges=args.balance_edges, num_trainers_per_machine=args.num_trainers_per_machine, ) ================================================ FILE: examples/pytorch/rgcn/experimental/preprocessing_dist_training/edges/identity1/sample.csv ================================================ identity1,0,0,1 identity1,1,0,2 ================================================ FILE: examples/pytorch/rgcn/experimental/preprocessing_dist_training/edges/identity2/sample.csv ================================================ identity2,0,1,2 identity2,1,0,2 ================================================ FILE: examples/pytorch/rgcn/experimental/preprocessing_dist_training/edges/identity3/sample.csv ================================================ identity3,0,0,2 ================================================ FILE: examples/pytorch/rgcn/experimental/preprocessing_dist_training/metis_creation.py ================================================ import argparse import glob import json import os from collections import defaultdict import pandas as pd path = os.getcwd() parser = argparse.ArgumentParser() parser.add_argument( "-n", "--name", help="name of graph to create", default="order" ) parser.add_argument( "-nc", "--node_column", nargs="+", default=["order_id", "entity_index", "order_datetime", "cid"], ) parser.add_argument("-nk", "--node_key", default="entity_index") parser.add_argument( "-ec", "--edge_column", nargs="+", default=[ "predicate_type", "predicate_index", "entity_index", "entity_index_y", ], ) parser.add_argument("-es", "--edge_start", default="entity_index") parser.add_argument("-en", "--edge_end", default="entity_index_y") args = parser.parse_args() # Store all types of node in nodes folder nodes_list = sorted(glob.glob(os.path.join(path, "nodes/*"))) if os.path.exists("{}_nodes.txt".format(args.name)): os.remove("{}_nodes.txt".format(args.name)) schema_dict = defaultdict(dict) node_type_id = 0 all_nodes_count = 0 for node_type_name in nodes_list: nodes_count = 0 csv_files = sorted(glob.glob(os.path.join(node_type_name, "*.csv"))) for file_name in csv_files: df = pd.read_csv( file_name, error_bad_lines=False, escapechar="\\", names=args.node_column, usecols=[*range(len(args.node_column))], ) df_entity = pd.DataFrame(df[args.node_key], columns=[args.node_key]) df_entity["type"] = node_type_id column_list = ["type"] for weight_index in range(len(nodes_list)): weight_num = "weight{}".format(weight_index) column_list.append(weight_num) if weight_index == node_type_id: df_entity[weight_num] = 1 else: df_entity[weight_num] = 0 nodes_count += len(df_entity.index) column_list.append(args.node_key) # This loop is trying to create file which servers as an input for Metis Algorithm. # More details about metis input can been found here : https://docs.dgl.ai/en/0.6.x/guide/distributed-preprocessing.html#input-format-for-parmetis df_entity.to_csv( "{}_nodes.txt".format(args.name), columns=column_list, sep=" ", index=False, header=False, mode="a", ) schema_dict["nid"][os.path.basename(node_type_name)] = [ all_nodes_count, nodes_count + all_nodes_count, ] all_nodes_count += nodes_count node_type_id += 1 if os.path.exists("{}_edges.txt".format(args.name)): os.remove("{}_edges.txt".format(args.name)) # Store all types of edge in edges folder edges_list = sorted(glob.glob(os.path.join(path, "edges/*"))) all_edges_count = 0 edge_type_id = 0 for edge_type_name in edges_list: edge_count = 0 csv_files = sorted(glob.glob(os.path.join(edge_type_name, "*.csv"))) for file_name in csv_files: df = pd.read_csv( file_name, error_bad_lines=False, escapechar="\\", names=args.edge_column, usecols=[*range(len(args.edge_column))], ) df_entity = pd.DataFrame( df[[args.edge_start, args.edge_end]], columns=[args.edge_start, args.edge_end], ) df_entity["type"] = edge_type_id df_entity = df_entity.reset_index() df_entity["number"] = df_entity.index + edge_count edge_count += len(df_entity.index) # This loop is trying to create file which servers as an input for Metis Algorithm. # More details about metis input can been found here : https://docs.dgl.ai/en/0.6.x/guide/distributed-preprocessing.html#input-format-for-parmetis df_entity.to_csv( "{}_edges.txt".format(args.name), columns=[args.edge_start, args.edge_end, "number", "type"], sep=" ", index=False, header=False, mode="a", ) schema_dict["eid"][os.path.basename(edge_type_name)] = [ all_edges_count, all_edges_count + edge_count, ] edge_type_id += 1 all_edges_count += edge_count if os.path.exists("{}_stats.txt".format(args.name)): os.remove("{}_stats.txt".format(args.name)) df = pd.DataFrame( [[all_nodes_count, all_edges_count, len(nodes_list)]], columns=["nodes_count", "edges_count", "weight_count"], ) df.to_csv( "{}_stats.txt".format(args.name), columns=["nodes_count", "edges_count", "weight_count"], sep=" ", index=False, header=False, ) if os.path.exists("{}.json".format(args.name)): os.remove("{}.json".format(args.name)) with open("{}.json".format(args.name), "w", encoding="utf8") as json_file: json.dump(schema_dict, json_file, ensure_ascii=False) ================================================ FILE: examples/pytorch/rgcn/experimental/preprocessing_dist_training/nodes/order/sample.csv ================================================ 171-0000102-1785122,0,2021-06-01 21:15:33,18604601535 171-0000550-1206725,1,2021-06-08 12:53:53,19613747325 171-0000784-4201160,2,2021-06-05 16:27:42,8348611025 ================================================ FILE: examples/pytorch/rgcn/experimental/preprocessing_dist_training/pre_process_dist_training.sh ================================================ #!/bin/bash cur_dir=$(pwd) host_count=`cat hostfile | wc -l` graph_name="order" perhost_part=2 current_host=`ifconfig | grep -Eo 'inet (addr:)?([0-9]*\.){3}[0-9]*' | grep -Eo '([0-9]*\.){3}[0-9]*' | grep -v '127.0.0.1'` echo "metis creation start" ##Nodes `python3 metis_creation.py -n ${graph_name}` echo "metis creation ends" echo "directory creation starts" while read p; do if [ "$p" != "$current_host" ]; then `ssh ${p} "mkdir -p ${cur_dir}" < /dev/null` fi done mpirun.out` echo "partioning ends" echo "scp starts" while read p; do if [ "$p" != "$current_host" ]; then `scp ${p}:${cur_dir}/* ./ < /dev/null` fi done remove.csv` echo "fetching removed edges ends" echo "homo graph to herto graph starts" `python3 substitute_to_hetero.py -n order -r remove.csv` echo "homo graph to herto graph ends" ================================================ FILE: examples/pytorch/rgcn/experimental/verify_mag_partitions.py ================================================ import json import os import dgl import numpy as np import torch as th from ogb.nodeproppred import DglNodePropPredDataset partitions_folder = "outputs" graph_name = "mag" with open("{}/{}.json".format(partitions_folder, graph_name)) as json_file: metadata = json.load(json_file) num_parts = metadata["num_parts"] # Load OGB-MAG. dataset = DglNodePropPredDataset(name="ogbn-mag") hg_orig, labels = dataset[0] subgs = {} for etype in hg_orig.canonical_etypes: u, v = hg_orig.all_edges(etype=etype) subgs[etype] = (u, v) subgs[(etype[2], "rev-" + etype[1], etype[0])] = (v, u) hg = dgl.heterograph(subgs) hg.nodes["paper"].data["feat"] = hg_orig.nodes["paper"].data["feat"] # Construct node data and edge data after reshuffling. node_feats = {} edge_feats = {} for partid in range(num_parts): part_node_feats = dgl.data.utils.load_tensors( "{}/part{}/node_feat.dgl".format(partitions_folder, partid) ) part_edge_feats = dgl.data.utils.load_tensors( "{}/part{}/edge_feat.dgl".format(partitions_folder, partid) ) for key in part_node_feats: if key in node_feats: node_feats[key].append(part_node_feats[key]) else: node_feats[key] = [part_node_feats[key]] for key in part_edge_feats: if key in edge_feats: edge_feats[key].append(part_edge_feats[key]) else: edge_feats[key] = [part_edge_feats[key]] for key in node_feats: node_feats[key] = th.cat(node_feats[key]) for key in edge_feats: edge_feats[key] = th.cat(edge_feats[key]) ntype_map = metadata["ntypes"] ntypes = [None] * len(ntype_map) for key in ntype_map: ntype_id = ntype_map[key] ntypes[ntype_id] = key etype_map = metadata["etypes"] etypes = [None] * len(etype_map) for key in etype_map: etype_id = etype_map[key] etypes[etype_id] = key etype2canonical = { etype: (srctype, etype, dsttype) for srctype, etype, dsttype in hg.canonical_etypes } node_map = metadata["node_map"] for key in node_map: node_map[key] = th.stack([th.tensor(row) for row in node_map[key]], 0) nid_map = dgl.distributed.id_map.IdMap(node_map) edge_map = metadata["edge_map"] for key in edge_map: edge_map[key] = th.stack([th.tensor(row) for row in edge_map[key]], 0) eid_map = dgl.distributed.id_map.IdMap(edge_map) for ntype in node_map: assert hg.num_nodes(ntype) == th.sum( node_map[ntype][:, 1] - node_map[ntype][:, 0] ) for etype in edge_map: assert hg.num_edges(etype) == th.sum( edge_map[etype][:, 1] - edge_map[etype][:, 0] ) # verify part_0 with graph_partition_book eid = [] gpb = dgl.distributed.graph_partition_book.RangePartitionBook( 0, num_parts, node_map, edge_map, {ntype: i for i, ntype in enumerate(hg.ntypes)}, {etype: i for i, etype in enumerate(hg.etypes)}, ) subg0 = dgl.load_graphs("{}/part0/graph.dgl".format(partitions_folder))[0][0] for etype in hg.etypes: type_eid = th.zeros((1,), dtype=th.int64) eid.append(gpb.map_to_homo_eid(type_eid, etype)) eid = th.cat(eid) part_id = gpb.eid2partid(eid) assert th.all(part_id == 0) local_eid = gpb.eid2localeid(eid, 0) assert th.all(local_eid == eid) assert th.all(subg0.edata[dgl.EID][local_eid] == eid) lsrc, ldst = subg0.find_edges(local_eid) gsrc, gdst = subg0.ndata[dgl.NID][lsrc], subg0.ndata[dgl.NID][ldst] # The destination nodes are owned by the partition. assert th.all(gdst == ldst) # gdst which is not assigned into current partition is not required to equal ldst assert th.all(th.logical_or(gdst == ldst, subg0.ndata["inner_node"][ldst] == 0)) etids, _ = gpb.map_to_per_etype(eid) src_tids, _ = gpb.map_to_per_ntype(gsrc) dst_tids, _ = gpb.map_to_per_ntype(gdst) canonical_etypes = [] etype_ids = th.arange(0, len(etypes)) for src_tid, etype_id, dst_tid in zip(src_tids, etype_ids, dst_tids): canonical_etypes.append( (ntypes[src_tid], etypes[etype_id], ntypes[dst_tid]) ) for etype in canonical_etypes: assert etype in hg.canonical_etypes # Load the graph partition structure. orig_node_ids = {ntype: [] for ntype in hg.ntypes} orig_edge_ids = {etype: [] for etype in hg.etypes} for partid in range(num_parts): print("test part", partid) part_file = "{}/part{}/graph.dgl".format(partitions_folder, partid) subg = dgl.load_graphs(part_file)[0][0] subg_src_id, subg_dst_id = subg.edges() orig_src_id = subg.ndata["orig_id"][subg_src_id] orig_dst_id = subg.ndata["orig_id"][subg_dst_id] global_src_id = subg.ndata[dgl.NID][subg_src_id] global_dst_id = subg.ndata[dgl.NID][subg_dst_id] subg_ntype = subg.ndata[dgl.NTYPE] subg_etype = subg.edata[dgl.ETYPE] for ntype_id in th.unique(subg_ntype): ntype = ntypes[ntype_id] idx = subg_ntype == ntype_id # This is global IDs after reshuffle. nid = subg.ndata[dgl.NID][idx] ntype_ids1, type_nid = nid_map(nid) orig_type_nid = subg.ndata["orig_id"][idx] inner_node = subg.ndata["inner_node"][idx] # All nodes should have the same node type. assert np.all(ntype_ids1.numpy() == int(ntype_id)) assert np.all( nid[inner_node == 1].numpy() == np.arange(node_map[ntype][partid, 0], node_map[ntype][partid, 1]) ) orig_node_ids[ntype].append(orig_type_nid[inner_node == 1]) # Check the degree of the inner nodes. inner_nids = th.nonzero( th.logical_and(subg_ntype == ntype_id, subg.ndata["inner_node"]), as_tuple=True, )[0] subg_deg = subg.in_degrees(inner_nids) orig_nids = subg.ndata["orig_id"][inner_nids] # Calculate the in-degrees of nodes of a particular node type. glob_deg = th.zeros(len(subg_deg), dtype=th.int64) for etype in hg.canonical_etypes: dst_ntype = etype[2] if dst_ntype == ntype: glob_deg += hg.in_degrees(orig_nids, etype=etype) assert np.all(glob_deg.numpy() == subg_deg.numpy()) # Check node data. for name in hg.nodes[ntype].data: local_data = node_feats[ntype + "/" + name][type_nid] local_data1 = hg.nodes[ntype].data[name][orig_type_nid] assert np.all(local_data.numpy() == local_data1.numpy()) for etype_id in th.unique(subg_etype): etype = etypes[etype_id] srctype, _, dsttype = etype2canonical[etype] idx = subg_etype == etype_id exist = hg[etype].has_edges_between(orig_src_id[idx], orig_dst_id[idx]) assert np.all(exist.numpy()) eid = hg[etype].edge_ids(orig_src_id[idx], orig_dst_id[idx]) assert np.all(eid.numpy() == subg.edata["orig_id"][idx].numpy()) ntype_ids, type_nid = nid_map(global_src_id[idx]) assert len(th.unique(ntype_ids)) == 1 assert ntypes[ntype_ids[0]] == srctype ntype_ids, type_nid = nid_map(global_dst_id[idx]) assert len(th.unique(ntype_ids)) == 1 assert ntypes[ntype_ids[0]] == dsttype # This is global IDs after reshuffle. eid = subg.edata[dgl.EID][idx] etype_ids1, type_eid = eid_map(eid) orig_type_eid = subg.edata["orig_id"][idx] inner_edge = subg.edata["inner_edge"][idx] # All edges should have the same edge type. assert np.all(etype_ids1.numpy() == int(etype_id)) assert np.all( np.sort(eid[inner_edge == 1].numpy()) == np.arange(edge_map[etype][partid, 0], edge_map[etype][partid, 1]) ) orig_edge_ids[etype].append(orig_type_eid[inner_edge == 1]) # Check edge data. for name in hg.edges[etype].data: local_data = edge_feats[etype + "/" + name][type_eid] local_data1 = hg.edges[etype].data[name][orig_type_eid] assert np.all(local_data.numpy() == local_data1.numpy()) for ntype in orig_node_ids: nids = th.cat(orig_node_ids[ntype]) nids = th.sort(nids)[0] assert np.all((nids == th.arange(hg.num_nodes(ntype))).numpy()) for etype in orig_edge_ids: eids = th.cat(orig_edge_ids[etype]) eids = th.sort(eids)[0] assert np.all((eids == th.arange(hg.num_edges(etype))).numpy()) ================================================ FILE: examples/pytorch/rgcn/experimental/write_mag.py ================================================ import json import dgl import numpy as np import torch as th from ogb.nodeproppred import DglNodePropPredDataset # Load OGB-MAG. dataset = DglNodePropPredDataset(name="ogbn-mag") hg_orig, labels = dataset[0] subgs = {} for etype in hg_orig.canonical_etypes: u, v = hg_orig.all_edges(etype=etype) subgs[etype] = (u, v) subgs[(etype[2], "rev-" + etype[1], etype[0])] = (v, u) hg = dgl.heterograph(subgs) hg.nodes["paper"].data["feat"] = hg_orig.nodes["paper"].data["feat"] print(hg) # OGB-MAG is stored in heterogeneous format. We need to convert it into homogeneous format. g = dgl.to_homogeneous(hg) g.ndata["orig_id"] = g.ndata[dgl.NID] g.edata["orig_id"] = g.edata[dgl.EID] print("|V|=" + str(g.num_nodes())) print("|E|=" + str(g.num_edges())) print("|NTYPE|=" + str(len(th.unique(g.ndata[dgl.NTYPE])))) # Store the metadata of nodes. num_node_weights = 0 node_data = [g.ndata[dgl.NTYPE].numpy()] for ntype_id in th.unique(g.ndata[dgl.NTYPE]): node_data.append((g.ndata[dgl.NTYPE] == ntype_id).numpy()) num_node_weights += 1 node_data.append(g.ndata["orig_id"].numpy()) node_data = np.stack(node_data, 1) np.savetxt("mag_nodes.txt", node_data, fmt="%d", delimiter=" ") # Store the node features node_feats = {} for ntype in hg.ntypes: for name in hg.nodes[ntype].data: node_feats[ntype + "/" + name] = hg.nodes[ntype].data[name] dgl.data.utils.save_tensors("node_feat.dgl", node_feats) # Store the metadata of edges. # ParMETIS cannot handle duplicated edges and self-loops. We should remove them # in the preprocessing. src_id, dst_id = g.edges() # Remove self-loops self_loop_idx = src_id == dst_id not_self_loop_idx = src_id != dst_id self_loop_src_id = src_id[self_loop_idx] self_loop_dst_id = dst_id[self_loop_idx] self_loop_orig_id = g.edata["orig_id"][self_loop_idx] self_loop_etype = g.edata[dgl.ETYPE][self_loop_idx] src_id = src_id[not_self_loop_idx] dst_id = dst_id[not_self_loop_idx] orig_id = g.edata["orig_id"][not_self_loop_idx] etype = g.edata[dgl.ETYPE][not_self_loop_idx] # Remove duplicated edges. ids = (src_id * g.num_nodes() + dst_id).numpy() uniq_ids, idx = np.unique(ids, return_index=True) duplicate_idx = np.setdiff1d(np.arange(len(ids)), idx) duplicate_src_id = src_id[duplicate_idx] duplicate_dst_id = dst_id[duplicate_idx] duplicate_orig_id = orig_id[duplicate_idx] duplicate_etype = etype[duplicate_idx] src_id = src_id[idx] dst_id = dst_id[idx] orig_id = orig_id[idx] etype = etype[idx] edge_data = th.stack([src_id, dst_id, orig_id, etype], 1) np.savetxt("mag_edges.txt", edge_data.numpy(), fmt="%d", delimiter=" ") removed_edge_data = th.stack( [ th.cat([self_loop_src_id, duplicate_src_id]), th.cat([self_loop_dst_id, duplicate_dst_id]), th.cat([self_loop_orig_id, duplicate_orig_id]), th.cat([self_loop_etype, duplicate_etype]), ], 1, ) np.savetxt( "mag_removed_edges.txt", removed_edge_data.numpy(), fmt="%d", delimiter=" " ) print( "There are {} edges, remove {} self-loops and {} duplicated edges".format( g.num_edges(), len(self_loop_src_id), len(duplicate_src_id) ) ) # Store the edge features edge_feats = {} for etype in hg.etypes: for name in hg.edges[etype].data: edge_feats[etype + "/" + name] = hg.edges[etype].data[name] dgl.data.utils.save_tensors("edge_feat.dgl", edge_feats) # Store the basic metadata of the graph. graph_stats = [g.num_nodes(), len(src_id), num_node_weights] with open("mag_stats.txt", "w") as filehandle: filehandle.writelines( "{} {} {}".format(graph_stats[0], graph_stats[1], graph_stats[2]) ) # Store the ID ranges of nodes and edges of the entire graph. nid_ranges = {} eid_ranges = {} for ntype in hg.ntypes: ntype_id = hg.get_ntype_id(ntype) nid = th.nonzero(g.ndata[dgl.NTYPE] == ntype_id, as_tuple=True)[0] per_type_nid = g.ndata["orig_id"][nid] assert np.all((per_type_nid == th.arange(len(per_type_nid))).numpy()) assert np.all((nid == th.arange(nid[0], nid[-1] + 1)).numpy()) nid_ranges[ntype] = [int(nid[0]), int(nid[-1] + 1)] for etype in hg.etypes: etype_id = hg.get_etype_id(etype) eid = th.nonzero(g.edata[dgl.ETYPE] == etype_id, as_tuple=True)[0] assert np.all((eid == th.arange(eid[0], eid[-1] + 1)).numpy()) eid_ranges[etype] = [int(eid[0]), int(eid[-1] + 1)] with open("mag.json", "w") as outfile: json.dump({"nid": nid_ranges, "eid": eid_ranges}, outfile, indent=4) ================================================ FILE: examples/pytorch/rgcn/link.py ================================================ import dgl import numpy as np import torch import torch.nn as nn import torch.nn.functional as F import tqdm from dgl.data.knowledge_graph import FB15k237Dataset from dgl.dataloading import GraphDataLoader from dgl.nn.pytorch import RelGraphConv # for building training/testing graphs def get_subset_g(g, mask, num_rels, bidirected=False): src, dst = g.edges() sub_src = src[mask] sub_dst = dst[mask] sub_rel = g.edata["etype"][mask] if bidirected: sub_src, sub_dst = torch.cat([sub_src, sub_dst]), torch.cat( [sub_dst, sub_src] ) sub_rel = torch.cat([sub_rel, sub_rel + num_rels]) sub_g = dgl.graph((sub_src, sub_dst), num_nodes=g.num_nodes()) sub_g.edata[dgl.ETYPE] = sub_rel return sub_g class GlobalUniform: def __init__(self, g, sample_size): self.sample_size = sample_size self.eids = np.arange(g.num_edges()) def sample(self): return torch.from_numpy(np.random.choice(self.eids, self.sample_size)) class NegativeSampler: def __init__(self, k=10): # negative sampling rate = 10 self.k = k def sample(self, pos_samples, num_nodes): batch_size = len(pos_samples) neg_batch_size = batch_size * self.k neg_samples = np.tile(pos_samples, (self.k, 1)) values = np.random.randint(num_nodes, size=neg_batch_size) choices = np.random.uniform(size=neg_batch_size) subj = choices > 0.5 obj = choices <= 0.5 neg_samples[subj, 0] = values[subj] neg_samples[obj, 2] = values[obj] samples = np.concatenate((pos_samples, neg_samples)) # binary labels indicating positive and negative samples labels = np.zeros(batch_size * (self.k + 1), dtype=np.float32) labels[:batch_size] = 1 return torch.from_numpy(samples), torch.from_numpy(labels) class SubgraphIterator: def __init__(self, g, num_rels, sample_size=30000, num_epochs=6000): self.g = g self.num_rels = num_rels self.sample_size = sample_size self.num_epochs = num_epochs self.pos_sampler = GlobalUniform(g, sample_size) self.neg_sampler = NegativeSampler() def __len__(self): return self.num_epochs def __getitem__(self, i): eids = self.pos_sampler.sample() src, dst = self.g.find_edges(eids) src, dst = src.numpy(), dst.numpy() rel = self.g.edata[dgl.ETYPE][eids].numpy() # relabel nodes to have consecutive node IDs uniq_v, edges = np.unique((src, dst), return_inverse=True) num_nodes = len(uniq_v) # edges is the concatenation of src, dst with relabeled ID src, dst = np.reshape(edges, (2, -1)) relabeled_data = np.stack((src, rel, dst)).transpose() samples, labels = self.neg_sampler.sample(relabeled_data, num_nodes) # use only half of the positive edges chosen_ids = np.random.choice( np.arange(self.sample_size), size=int(self.sample_size / 2), replace=False, ) src = src[chosen_ids] dst = dst[chosen_ids] rel = rel[chosen_ids] src, dst = np.concatenate((src, dst)), np.concatenate((dst, src)) rel = np.concatenate((rel, rel + self.num_rels)) sub_g = dgl.graph((src, dst), num_nodes=num_nodes) sub_g.edata[dgl.ETYPE] = torch.from_numpy(rel) sub_g.edata["norm"] = dgl.norm_by_dst(sub_g).unsqueeze(-1) uniq_v = torch.from_numpy(uniq_v).view(-1).long() return sub_g, uniq_v, samples, labels class RGCN(nn.Module): def __init__(self, num_nodes, h_dim, num_rels): super().__init__() # two-layer RGCN self.emb = nn.Embedding(num_nodes, h_dim) self.conv1 = RelGraphConv( h_dim, h_dim, num_rels, regularizer="bdd", num_bases=100, self_loop=True, ) self.conv2 = RelGraphConv( h_dim, h_dim, num_rels, regularizer="bdd", num_bases=100, self_loop=True, ) self.dropout = nn.Dropout(0.2) def forward(self, g, nids): x = self.emb(nids) h = F.relu(self.conv1(g, x, g.edata[dgl.ETYPE], g.edata["norm"])) h = self.dropout(h) h = self.conv2(g, h, g.edata[dgl.ETYPE], g.edata["norm"]) return self.dropout(h) class LinkPredict(nn.Module): def __init__(self, num_nodes, num_rels, h_dim=500, reg_param=0.01): super().__init__() self.rgcn = RGCN(num_nodes, h_dim, num_rels * 2) self.reg_param = reg_param self.w_relation = nn.Parameter(torch.Tensor(num_rels, h_dim)) nn.init.xavier_uniform_( self.w_relation, gain=nn.init.calculate_gain("relu") ) def calc_score(self, embedding, triplets): s = embedding[triplets[:, 0]] r = self.w_relation[triplets[:, 1]] o = embedding[triplets[:, 2]] score = torch.sum(s * r * o, dim=1) return score def forward(self, g, nids): return self.rgcn(g, nids) def regularization_loss(self, embedding): return torch.mean(embedding.pow(2)) + torch.mean(self.w_relation.pow(2)) def get_loss(self, embed, triplets, labels): # each row in the triplets is a 3-tuple of (source, relation, destination) score = self.calc_score(embed, triplets) predict_loss = F.binary_cross_entropy_with_logits(score, labels) reg_loss = self.regularization_loss(embed) return predict_loss + self.reg_param * reg_loss def filter( triplets_to_filter, target_s, target_r, target_o, num_nodes, filter_o=True ): """Get candidate heads or tails to score""" target_s, target_r, target_o = int(target_s), int(target_r), int(target_o) # Add the ground truth node first if filter_o: candidate_nodes = [target_o] else: candidate_nodes = [target_s] for e in range(num_nodes): triplet = ( (target_s, target_r, e) if filter_o else (e, target_r, target_o) ) # Do not consider a node if it leads to a real triplet if triplet not in triplets_to_filter: candidate_nodes.append(e) return torch.LongTensor(candidate_nodes) def perturb_and_get_filtered_rank( emb, w, s, r, o, test_size, triplets_to_filter, filter_o=True ): """Perturb subject or object in the triplets""" num_nodes = emb.shape[0] ranks = [] for idx in tqdm.tqdm(range(test_size), desc="Evaluate"): target_s = s[idx] target_r = r[idx] target_o = o[idx] candidate_nodes = filter( triplets_to_filter, target_s, target_r, target_o, num_nodes, filter_o=filter_o, ) if filter_o: emb_s = emb[target_s] emb_o = emb[candidate_nodes] else: emb_s = emb[candidate_nodes] emb_o = emb[target_o] target_idx = 0 emb_r = w[target_r] emb_triplet = emb_s * emb_r * emb_o scores = torch.sigmoid(torch.sum(emb_triplet, dim=1)) _, indices = torch.sort(scores, descending=True) rank = int((indices == target_idx).nonzero()) ranks.append(rank) return torch.LongTensor(ranks) def calc_mrr(emb, w, mask, triplets_to_filter, batch_size=100, filter=True): with torch.no_grad(): test_triplets = triplets_to_filter[mask] s, r, o = test_triplets[:, 0], test_triplets[:, 1], test_triplets[:, 2] test_size = len(s) triplets_to_filter = { tuple(triplet) for triplet in triplets_to_filter.tolist() } ranks_s = perturb_and_get_filtered_rank( emb, w, s, r, o, test_size, triplets_to_filter, filter_o=False ) ranks_o = perturb_and_get_filtered_rank( emb, w, s, r, o, test_size, triplets_to_filter ) ranks = torch.cat([ranks_s, ranks_o]) ranks += 1 # change to 1-indexed mrr = torch.mean(1.0 / ranks.float()).item() return mrr def train( dataloader, test_g, test_nids, val_mask, triplets, device, model_state_file, model, ): optimizer = torch.optim.Adam(model.parameters(), lr=1e-2) best_mrr = 0 for epoch, batch_data in enumerate(dataloader): # single graph batch model.train() g, train_nids, edges, labels = batch_data g = g.to(device) train_nids = train_nids.to(device) edges = edges.to(device) labels = labels.to(device) embed = model(g, train_nids) loss = model.get_loss(embed, edges, labels) optimizer.zero_grad() loss.backward() nn.utils.clip_grad_norm_( model.parameters(), max_norm=1.0 ) # clip gradients optimizer.step() print( "Epoch {:04d} | Loss {:.4f} | Best MRR {:.4f}".format( epoch, loss.item(), best_mrr ) ) if (epoch + 1) % 500 == 0: # perform validation on CPU because full graph is too large model = model.cpu() model.eval() embed = model(test_g, test_nids) mrr = calc_mrr( embed, model.w_relation, val_mask, triplets, batch_size=500 ) # save best model if best_mrr < mrr: best_mrr = mrr torch.save( {"state_dict": model.state_dict(), "epoch": epoch}, model_state_file, ) model = model.to(device) if __name__ == "__main__": device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"Training with DGL built-in RGCN module") # load and preprocess dataset data = FB15k237Dataset(reverse=False) g = data[0] num_nodes = g.num_nodes() num_rels = data.num_rels train_g = get_subset_g(g, g.edata["train_mask"], num_rels) test_g = get_subset_g(g, g.edata["train_mask"], num_rels, bidirected=True) test_g.edata["norm"] = dgl.norm_by_dst(test_g).unsqueeze(-1) test_nids = torch.arange(0, num_nodes) val_mask = g.edata["val_mask"] test_mask = g.edata["test_mask"] subg_iter = SubgraphIterator(train_g, num_rels) # uniform edge sampling dataloader = GraphDataLoader( subg_iter, batch_size=1, collate_fn=lambda x: x[0] ) # Prepare data for metric computation src, dst = g.edges() triplets = torch.stack([src, g.edata["etype"], dst], dim=1) # create RGCN model model = LinkPredict(num_nodes, num_rels).to(device) # train model_state_file = "model_state.pth" train( dataloader, test_g, test_nids, val_mask, triplets, device, model_state_file, model, ) # testing print("Testing...") checkpoint = torch.load(model_state_file, weights_only=False) model = model.cpu() # test on CPU model.eval() model.load_state_dict(checkpoint["state_dict"]) embed = model(test_g, test_nids) best_mrr = calc_mrr( embed, model.w_relation, test_mask, triplets, batch_size=500 ) print( "Best MRR {:.4f} achieved using the epoch {:04d}".format( best_mrr, checkpoint["epoch"] ) ) ================================================ FILE: examples/pytorch/rgcn/model.py ================================================ import dgl import torch as th import torch.nn as nn import torch.nn.functional as F from dgl import DGLGraph from dgl.nn.pytorch import RelGraphConv class RGCN(nn.Module): def __init__( self, num_nodes, h_dim, out_dim, num_rels, regularizer="basis", num_bases=-1, dropout=0.0, self_loop=False, ns_mode=False, ): super(RGCN, self).__init__() if num_bases == -1: num_bases = num_rels self.emb = nn.Embedding(num_nodes, h_dim) self.conv1 = RelGraphConv( h_dim, h_dim, num_rels, regularizer, num_bases, self_loop=self_loop ) self.conv2 = RelGraphConv( h_dim, out_dim, num_rels, regularizer, num_bases, self_loop=self_loop, ) self.dropout = nn.Dropout(dropout) self.ns_mode = ns_mode def forward(self, g, nids=None): if self.ns_mode: # forward for neighbor sampling x = self.emb(g[0].srcdata[dgl.NID]) h = self.conv1(g[0], x, g[0].edata[dgl.ETYPE], g[0].edata["norm"]) h = self.dropout(F.relu(h)) h = self.conv2(g[1], h, g[1].edata[dgl.ETYPE], g[1].edata["norm"]) return h else: x = self.emb.weight if nids is None else self.emb(nids) h = self.conv1(g, x, g.edata[dgl.ETYPE], g.edata["norm"]) h = self.dropout(F.relu(h)) h = self.conv2(g, h, g.edata[dgl.ETYPE], g.edata["norm"]) return h ================================================ FILE: examples/pytorch/rgcn-hetero/.gitignore ================================================ *.pth *.pt ================================================ FILE: examples/pytorch/rgcn-hetero/README.md ================================================ # Relational-GCN * Paper: [https://arxiv.org/abs/1703.06103](https://arxiv.org/abs/1703.06103) * Author's code for entity classification: [https://github.com/tkipf/relational-gcn](https://github.com/tkipf/relational-gcn) * Author's code for link prediction: [https://github.com/MichSchli/RelationPrediction](https://github.com/MichSchli/RelationPrediction) The preprocessing is slightly different from the author's code. We directly load and preprocess raw RDF data. For AIFB, BGS and AM, all literal nodes are pruned from the graph. For AIFB, some training/testing nodes thus become orphan and are excluded from the training/testing set. The resulting graph has fewer entities and relations. As a reference (numbers include reverse edges and relations): | Dataset | #Nodes | #Edges | #Relations | #Labeled | | --- | --- | --- | --- | --- | | AIFB | 8,285 | 58,086 | 90 | 176 | | AIFB-hetero | 7,262 | 48,810 | 78 | 176 | | MUTAG | 23,644 | 148,454 | 46 | 340 | | MUTAG-hetero | 27,163 | 148,100 | 46 | 340 | | BGS | 333,845 | 1,832,398 | 206 | 146 | | BGS-hetero | 94,806 | 672,884 | 96 | 146 | | AM | 1,666,764 | 11,976,642 | 266 | 1000 | | AM-hetero | 881,680 | 5,668,682 | 96 | 1000 | ### Dependencies * PyTorch 1.0+ * requests * rdflib ``` pip install requests torch rdflib pandas ``` Example code was tested with rdflib 4.2.2 and pandas 0.23.4 ### Entity Classification All experiments use one-hot encoding as featureless input. Best accuracy reported. AIFB: accuracy 96.11% (5 runs, DGL), 95.83% (paper) ``` python3 entity_classify.py -d aifb --testing --gpu 0 ``` MUTAG: accuracy 72.06% (5 runs, DGL), 73.23% (paper) ``` python3 entity_classify.py -d mutag --l2norm 5e-4 --n-bases 30 --testing --gpu 0 ``` BGS: accuracy 91.73% (5 runs, DGL), 83.10% (paper) ``` python3 entity_classify.py -d bgs --l2norm 5e-4 --n-bases 40 --testing --gpu 0 ``` AM: accuracy 88.28% (5 runs, DGL), 89.29% (paper) ``` python3 entity_classify.py -d am --l2norm 5e-4 --n-bases 40 --testing --gpu 0 ``` ### Entity Classification w/ minibatch training Accuracy numbers are reported by 5 runs. AIFB: accuracy best=97.22% avg=94.44% ``` python3 entity_classify_mb.py -d aifb --testing --gpu 0 --fanout=8 ``` MUTAG: accuracy best=76.47% avg=67.37% ``` python3 entity_classify_mb.py -d mutag --l2norm 5e-4 --n-bases 30 --testing --gpu 0 --batch-size=50 --fanout=8 ``` BGS: accuracy best=96.55% avg=91.04% ``` python3 entity_classify_mb.py -d bgs --l2norm 5e-4 --n-bases 40 --testing --gpu 0 ``` AM: accuracy best=89.39% avg=88.55% ``` python3 entity_classify_mb.py -d am --l2norm 5e-4 --n-bases 40 --testing --gpu 0 ``` ### Offline Inferencing Trained Model can be exported by providing '--model\_path ' parameter to entity\_classify.py. And then test\_classify.py can load the saved model and do the testing offline. AIFB: ``` python3 entity_classify.py -d aifb --testing --gpu 0 --model_path "aifb.pt" python3 test_classify.py -d aifb --gpu 0 --model_path "aifb.pt" ``` MUTAG: ``` python3 entity_classify.py -d mutag --l2norm 5e-4 --n-bases 30 --testing --gpu 0 --model_path "mutag.pt" python3 test_classify.py -d mutag --n-bases 30 --gpu 0 --model_path "mutag.pt" ``` BGS: ``` python3 entity_classify.py -d bgs --l2norm 5e-4 --n-bases 40 --testing --gpu 0 --model_path "bgs.pt" python3 test_classify.py -d bgs --n-bases 40 --gpu 0 --model_path "bgs.pt" ``` AM: ``` python3 entity_classify.py -d am --l2norm 5e-4 --n-bases 40 --testing --gpu 0 --model_path "am.pt" python3 test_classify.py -d am --n-bases 40 --gpu 0 --model_path "am.pt" ``` ================================================ FILE: examples/pytorch/rgcn-hetero/entity_classify.py ================================================ """Modeling Relational Data with Graph Convolutional Networks Paper: https://arxiv.org/abs/1703.06103 Reference Code: https://github.com/tkipf/relational-gcn """ import argparse import time import numpy as np import torch as th import torch.nn as nn import torch.nn.functional as F from dgl.data.rdf import AIFBDataset, AMDataset, BGSDataset, MUTAGDataset from model import EntityClassify def main(args): # load graph data if args.dataset == "aifb": dataset = AIFBDataset() elif args.dataset == "mutag": dataset = MUTAGDataset() elif args.dataset == "bgs": dataset = BGSDataset() elif args.dataset == "am": dataset = AMDataset() else: raise ValueError() g = dataset[0] category = dataset.predict_category num_classes = dataset.num_classes train_mask = g.nodes[category].data.pop("train_mask") test_mask = g.nodes[category].data.pop("test_mask") train_idx = th.nonzero(train_mask, as_tuple=False).squeeze() test_idx = th.nonzero(test_mask, as_tuple=False).squeeze() labels = g.nodes[category].data.pop("labels") category_id = len(g.ntypes) for i, ntype in enumerate(g.ntypes): if ntype == category: category_id = i # split dataset into train, validate, test if args.validation: val_idx = train_idx[: len(train_idx) // 5] train_idx = train_idx[len(train_idx) // 5 :] else: val_idx = train_idx # check cuda use_cuda = args.gpu >= 0 and th.cuda.is_available() if use_cuda: th.cuda.set_device(args.gpu) g = g.to("cuda:%d" % args.gpu) labels = labels.cuda() train_idx = train_idx.cuda() test_idx = test_idx.cuda() # create model model = EntityClassify( g, args.n_hidden, num_classes, num_bases=args.n_bases, num_hidden_layers=args.n_layers - 2, dropout=args.dropout, use_self_loop=args.use_self_loop, ) if use_cuda: model.cuda() # optimizer optimizer = th.optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.l2norm ) # training loop print("start training...") dur = [] model.train() for epoch in range(args.n_epochs): optimizer.zero_grad() if epoch > 5: t0 = time.time() logits = model()[category] loss = F.cross_entropy(logits[train_idx], labels[train_idx]) loss.backward() optimizer.step() t1 = time.time() if epoch > 5: dur.append(t1 - t0) train_acc = th.sum( logits[train_idx].argmax(dim=1) == labels[train_idx] ).item() / len(train_idx) val_loss = F.cross_entropy(logits[val_idx], labels[val_idx]) val_acc = th.sum( logits[val_idx].argmax(dim=1) == labels[val_idx] ).item() / len(val_idx) print( "Epoch {:05d} | Train Acc: {:.4f} | Train Loss: {:.4f} | Valid Acc: {:.4f} | Valid loss: {:.4f} | Time: {:.4f}".format( epoch, train_acc, loss.item(), val_acc, val_loss.item(), np.average(dur), ) ) print() if args.model_path is not None: th.save(model.state_dict(), args.model_path) model.eval() logits = model.forward()[category] test_loss = F.cross_entropy(logits[test_idx], labels[test_idx]) test_acc = th.sum( logits[test_idx].argmax(dim=1) == labels[test_idx] ).item() / len(test_idx) print( "Test Acc: {:.4f} | Test loss: {:.4f}".format( test_acc, test_loss.item() ) ) print() if __name__ == "__main__": parser = argparse.ArgumentParser(description="RGCN") parser.add_argument( "--dropout", type=float, default=0, help="dropout probability" ) parser.add_argument( "--n-hidden", type=int, default=16, help="number of hidden units" ) parser.add_argument("--gpu", type=int, default=-1, help="gpu") parser.add_argument("--lr", type=float, default=1e-2, help="learning rate") parser.add_argument( "--n-bases", type=int, default=-1, help="number of filter weight matrices, default: -1 [use all]", ) parser.add_argument( "--n-layers", type=int, default=2, help="number of propagation rounds" ) parser.add_argument( "-e", "--n-epochs", type=int, default=50, help="number of training epochs", ) parser.add_argument( "-d", "--dataset", type=str, required=True, help="dataset to use" ) parser.add_argument( "--model_path", type=str, default=None, help="path for save the model" ) parser.add_argument("--l2norm", type=float, default=0, help="l2 norm coef") parser.add_argument( "--use-self-loop", default=False, action="store_true", help="include self feature as a special relation", ) fp = parser.add_mutually_exclusive_group(required=False) fp.add_argument("--validation", dest="validation", action="store_true") fp.add_argument("--testing", dest="validation", action="store_false") parser.set_defaults(validation=True) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/pytorch/rgcn-hetero/entity_classify_heteroAPI.py ================================================ """Modeling Relational Data with Graph Convolutional Networks Paper: https://arxiv.org/abs/1703.06103 Reference Code: https://github.com/tkipf/relational-gcn """ import argparse import time import numpy as np import torch as th import torch.nn as nn import torch.nn.functional as F from dgl.data.rdf import AIFBDataset, AMDataset, BGSDataset, MUTAGDataset from model import EntityClassify_HeteroAPI def main(args): # load graph data if args.dataset == "aifb": dataset = AIFBDataset() elif args.dataset == "mutag": dataset = MUTAGDataset() elif args.dataset == "bgs": dataset = BGSDataset() elif args.dataset == "am": dataset = AMDataset() else: raise ValueError() g = dataset[0] category = dataset.predict_category num_classes = dataset.num_classes train_mask = g.nodes[category].data.pop("train_mask") test_mask = g.nodes[category].data.pop("test_mask") train_idx = th.nonzero(train_mask, as_tuple=False).squeeze() test_idx = th.nonzero(test_mask, as_tuple=False).squeeze() labels = g.nodes[category].data.pop("labels") category_id = len(g.ntypes) for i, ntype in enumerate(g.ntypes): if ntype == category: category_id = i # split dataset into train, validate, test if args.validation: val_idx = train_idx[: len(train_idx) // 5] train_idx = train_idx[len(train_idx) // 5 :] else: val_idx = train_idx # check cuda use_cuda = args.gpu >= 0 and th.cuda.is_available() if use_cuda: th.cuda.set_device(args.gpu) g = g.to("cuda:%d" % args.gpu) labels = labels.cuda() train_idx = train_idx.cuda() test_idx = test_idx.cuda() # create model model = EntityClassify_HeteroAPI( g, args.n_hidden, num_classes, num_bases=args.n_bases, num_hidden_layers=args.n_layers - 2, dropout=args.dropout, use_self_loop=args.use_self_loop, ) if use_cuda: model.cuda() # optimizer optimizer = th.optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.l2norm ) # training loop print("start training...") dur = [] model.train() for epoch in range(args.n_epochs): optimizer.zero_grad() t0 = time.time() logits = model()[category] loss = F.cross_entropy(logits[train_idx], labels[train_idx]) loss.backward() optimizer.step() t1 = time.time() dur.append(t1 - t0) train_acc = th.sum( logits[train_idx].argmax(dim=1) == labels[train_idx] ).item() / len(train_idx) val_loss = F.cross_entropy(logits[val_idx], labels[val_idx]) val_acc = th.sum( logits[val_idx].argmax(dim=1) == labels[val_idx] ).item() / len(val_idx) print( "Epoch {:05d} | Train Acc: {:.4f} | Train Loss: {:.4f} | Valid Acc: {:.4f} | Valid loss: {:.4f} | Time: {:.4f}".format( epoch, train_acc, loss.item(), val_acc, val_loss.item(), np.average(dur), ) ) print() if args.model_path is not None: th.save(model.state_dict(), args.model_path) model.eval() logits = model.forward()[category] test_loss = F.cross_entropy(logits[test_idx], labels[test_idx]) test_acc = th.sum( logits[test_idx].argmax(dim=1) == labels[test_idx] ).item() / len(test_idx) print( "Test Acc: {:.4f} | Test loss: {:.4f}".format( test_acc, test_loss.item() ) ) print() if __name__ == "__main__": parser = argparse.ArgumentParser(description="RGCN") parser.add_argument( "--dropout", type=float, default=0, help="dropout probability" ) parser.add_argument( "--n-hidden", type=int, default=16, help="number of hidden units" ) parser.add_argument("--gpu", type=int, default=-1, help="gpu") parser.add_argument("--lr", type=float, default=1e-2, help="learning rate") parser.add_argument( "--n-bases", type=int, default=-1, help="number of filter weight matrices, default: -1 [use all]", ) parser.add_argument( "--n-layers", type=int, default=2, help="number of propagation rounds" ) parser.add_argument( "-e", "--n-epochs", type=int, default=50, help="number of training epochs", ) parser.add_argument( "-d", "--dataset", type=str, required=True, help="dataset to use" ) parser.add_argument( "--model_path", type=str, default=None, help="path for save the model" ) parser.add_argument("--l2norm", type=float, default=0, help="l2 norm coef") parser.add_argument( "--use-self-loop", default=False, action="store_true", help="include self feature as a special relation", ) fp = parser.add_mutually_exclusive_group(required=False) fp.add_argument("--validation", dest="validation", action="store_true") fp.add_argument("--testing", dest="validation", action="store_false") parser.set_defaults(validation=True) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/pytorch/rgcn-hetero/entity_classify_mb.py ================================================ """Modeling Relational Data with Graph Convolutional Networks Paper: https://arxiv.org/abs/1703.06103 Reference Code: https://github.com/tkipf/relational-gcn """ import argparse import itertools import time import dgl import numpy as np import torch as th import torch.nn.functional as F from dgl.data.rdf import AIFBDataset, AMDataset, BGSDataset, MUTAGDataset from model import EntityClassify, RelGraphEmbed def extract_embed(node_embed, input_nodes): emb = {} for ntype, nid in input_nodes.items(): nid = input_nodes[ntype] emb[ntype] = node_embed[ntype][nid] return emb def evaluate(model, loader, node_embed, labels, category, device): model.eval() total_loss = 0 total_acc = 0 count = 0 with loader.enable_cpu_affinity(): for input_nodes, seeds, blocks in loader: blocks = [blk.to(device) for blk in blocks] seeds = seeds[category] emb = extract_embed(node_embed, input_nodes) emb = {k: e.to(device) for k, e in emb.items()} lbl = labels[seeds].to(device) logits = model(emb, blocks)[category] loss = F.cross_entropy(logits, lbl) acc = th.sum(logits.argmax(dim=1) == lbl).item() total_loss += loss.item() * len(seeds) total_acc += acc count += len(seeds) return total_loss / count, total_acc / count def main(args): # check cuda device = "cpu" use_cuda = args.gpu >= 0 and th.cuda.is_available() if use_cuda: th.cuda.set_device(args.gpu) device = "cuda:%d" % args.gpu # load graph data if args.dataset == "aifb": dataset = AIFBDataset() elif args.dataset == "mutag": dataset = MUTAGDataset() elif args.dataset == "bgs": dataset = BGSDataset() elif args.dataset == "am": dataset = AMDataset() else: raise ValueError() g = dataset[0] category = dataset.predict_category num_classes = dataset.num_classes train_mask = g.nodes[category].data.pop("train_mask") test_mask = g.nodes[category].data.pop("test_mask") train_idx = th.nonzero(train_mask, as_tuple=False).squeeze() test_idx = th.nonzero(test_mask, as_tuple=False).squeeze() labels = g.nodes[category].data.pop("labels") # split dataset into train, validate, test if args.validation: val_idx = train_idx[: len(train_idx) // 5] train_idx = train_idx[len(train_idx) // 5 :] else: val_idx = train_idx # create embeddings embed_layer = RelGraphEmbed(g, args.n_hidden) if not args.data_cpu: labels = labels.to(device) embed_layer = embed_layer.to(device) if args.num_workers <= 0: raise ValueError( "The '--num_workers' parameter value is expected " "to be >0, but got {}.".format(args.num_workers) ) node_embed = embed_layer() # create model model = EntityClassify( g, args.n_hidden, num_classes, num_bases=args.n_bases, num_hidden_layers=args.n_layers - 2, dropout=args.dropout, use_self_loop=args.use_self_loop, ) if use_cuda: model.cuda() # train sampler sampler = dgl.dataloading.MultiLayerNeighborSampler( [args.fanout] * args.n_layers ) loader = dgl.dataloading.DataLoader( g, {category: train_idx}, sampler, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, ) # validation sampler # we do not use full neighbor to save computation resources val_sampler = dgl.dataloading.MultiLayerNeighborSampler( [args.fanout] * args.n_layers ) val_loader = dgl.dataloading.DataLoader( g, {category: val_idx}, val_sampler, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, ) # optimizer all_params = itertools.chain(model.parameters(), embed_layer.parameters()) optimizer = th.optim.Adam(all_params, lr=args.lr, weight_decay=args.l2norm) # training loop print("start training...") mean = 0 for epoch in range(args.n_epochs): model.train() optimizer.zero_grad() if epoch > 3: t0 = time.time() with loader.enable_cpu_affinity(): for i, (input_nodes, seeds, blocks) in enumerate(loader): blocks = [blk.to(device) for blk in blocks] seeds = seeds[ category ] # we only predict the nodes with type "category" batch_tic = time.time() emb = extract_embed(node_embed, input_nodes) lbl = labels[seeds] if use_cuda: emb = {k: e.cuda() for k, e in emb.items()} lbl = lbl.cuda() logits = model(emb, blocks)[category] loss = F.cross_entropy(logits, lbl) loss.backward() optimizer.step() train_acc = th.sum(logits.argmax(dim=1) == lbl).item() / len( seeds ) print( f"Epoch {epoch:05d} | Batch {i:03d} | Train Acc: " "{train_acc:.4f} | Train Loss: {loss.item():.4f} | Time: " "{time.time() - batch_tic:.4f}" ) if epoch > 3: mean = (mean * (epoch - 3) + (time.time() - t0)) / (epoch - 2) val_loss, val_acc = evaluate( model, val_loader, node_embed, labels, category, device ) print( f"Epoch {epoch:05d} | Valid Acc: {val_acc:.4f} | Valid loss: " "{val_loss:.4f} | Time: {mean:.4f}" ) print() if args.model_path is not None: th.save(model.state_dict(), args.model_path) output = model.inference( g, args.batch_size, "cuda" if use_cuda else "cpu", args.num_workers, node_embed, ) test_pred = output[category][test_idx] test_labels = labels[test_idx].to(test_pred.device) test_acc = (test_pred.argmax(1) == test_labels).float().mean() print("Test Acc: {:.4f}".format(test_acc)) print() if __name__ == "__main__": parser = argparse.ArgumentParser(description="RGCN") parser.add_argument( "--dropout", type=float, default=0, help="dropout probability" ) parser.add_argument( "--n-hidden", type=int, default=16, help="number of hidden units" ) parser.add_argument("--gpu", type=int, default=-1, help="gpu") parser.add_argument("--lr", type=float, default=1e-2, help="learning rate") parser.add_argument( "--n-bases", type=int, default=-1, help="number of filter weight matrices, default: -1 [use all]", ) parser.add_argument( "--n-layers", type=int, default=2, help="number of propagation rounds" ) parser.add_argument( "-e", "--n-epochs", type=int, default=20, help="number of training epochs", ) parser.add_argument( "-d", "--dataset", type=str, required=True, help="dataset to use" ) parser.add_argument( "--model_path", type=str, default=None, help="path for save the model" ) parser.add_argument("--l2norm", type=float, default=0, help="l2 norm coef") parser.add_argument( "--use-self-loop", default=False, action="store_true", help="include self feature as a special relation", ) parser.add_argument( "--batch-size", type=int, default=100, help="Mini-batch size. If -1, use full graph training.", ) parser.add_argument( "--fanout", type=int, default=4, help="Fan-out of neighbor sampling." ) parser.add_argument( "--data-cpu", action="store_true", help="By default the script puts all node features and labels " "on GPU when using it to save time for data copy. This may " "be undesired if they cannot fit in GPU memory at once. " "This flag disables that.", ) parser.add_argument( "--num_workers", type=int, default=4, help="Number of node dataloader" ) fp = parser.add_mutually_exclusive_group(required=False) fp.add_argument("--validation", dest="validation", action="store_true") fp.add_argument("--testing", dest="validation", action="store_false") parser.set_defaults(validation=True) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/pytorch/rgcn-hetero/model.py ================================================ """RGCN layer implementation""" from collections import defaultdict import dgl import dgl.function as fn import dgl.nn as dglnn import torch as th import torch.nn as nn import torch.nn.functional as F import tqdm class RelGraphConvLayer(nn.Module): r"""Relational graph convolution layer. Parameters ---------- in_feat : int Input feature size. out_feat : int Output feature size. rel_names : list[str] Relation names. num_bases : int, optional Number of bases. If is none, use number of relations. Default: None. weight : bool, optional True if a linear layer is applied after message passing. Default: True bias : bool, optional True if bias is added. Default: True activation : callable, optional Activation function. Default: None self_loop : bool, optional True to include self loop message. Default: False dropout : float, optional Dropout rate. Default: 0.0 """ def __init__( self, in_feat, out_feat, rel_names, num_bases, *, weight=True, bias=True, activation=None, self_loop=False, dropout=0.0 ): super(RelGraphConvLayer, self).__init__() self.in_feat = in_feat self.out_feat = out_feat self.rel_names = rel_names self.num_bases = num_bases self.bias = bias self.activation = activation self.self_loop = self_loop self.conv = dglnn.HeteroGraphConv( { rel: dglnn.GraphConv( in_feat, out_feat, norm="right", weight=False, bias=False ) for rel in rel_names } ) self.use_weight = weight self.use_basis = num_bases < len(self.rel_names) and weight if self.use_weight: if self.use_basis: self.basis = dglnn.WeightBasis( (in_feat, out_feat), num_bases, len(self.rel_names) ) else: self.weight = nn.Parameter( th.Tensor(len(self.rel_names), in_feat, out_feat) ) nn.init.xavier_uniform_( self.weight, gain=nn.init.calculate_gain("relu") ) # bias if bias: self.h_bias = nn.Parameter(th.Tensor(out_feat)) nn.init.zeros_(self.h_bias) # weight for self loop if self.self_loop: self.loop_weight = nn.Parameter(th.Tensor(in_feat, out_feat)) nn.init.xavier_uniform_( self.loop_weight, gain=nn.init.calculate_gain("relu") ) self.dropout = nn.Dropout(dropout) def forward(self, g, inputs): """Forward computation Parameters ---------- g : DGLGraph Input graph. inputs : dict[str, torch.Tensor] Node feature for each node type. Returns ------- dict[str, torch.Tensor] New node features for each node type. """ g = g.local_var() if self.use_weight: weight = self.basis() if self.use_basis else self.weight wdict = { self.rel_names[i]: {"weight": w.squeeze(0)} for i, w in enumerate(th.split(weight, 1, dim=0)) } else: wdict = {} if g.is_block: inputs_src = inputs inputs_dst = { k: v[: g.number_of_dst_nodes(k)] for k, v in inputs.items() } else: inputs_src = inputs_dst = inputs hs = self.conv(g, inputs, mod_kwargs=wdict) def _apply(ntype, h): if self.self_loop: h = h + th.matmul(inputs_dst[ntype], self.loop_weight) if self.bias: h = h + self.h_bias if self.activation: h = self.activation(h) return self.dropout(h) return {ntype: _apply(ntype, h) for ntype, h in hs.items()} class RelGraphConvLayerHeteroAPI(nn.Module): r"""Relational graph convolution layer. Parameters ---------- in_feat : int Input feature size. out_feat : int Output feature size. rel_names : list[str] Relation names. num_bases : int, optional Number of bases. If is none, use number of relations. Default: None. weight : bool, optional True if a linear layer is applied after message passing. Default: True bias : bool, optional True if bias is added. Default: True activation : callable, optional Activation function. Default: None self_loop : bool, optional True to include self loop message. Default: False dropout : float, optional Dropout rate. Default: 0.0 """ def __init__( self, in_feat, out_feat, rel_names, num_bases, *, weight=True, bias=True, activation=None, self_loop=False, dropout=0.0 ): super(RelGraphConvLayerHeteroAPI, self).__init__() self.in_feat = in_feat self.out_feat = out_feat self.rel_names = rel_names self.num_bases = num_bases self.bias = bias self.activation = activation self.self_loop = self_loop self.use_weight = weight self.use_basis = num_bases < len(self.rel_names) and weight if self.use_weight: if self.use_basis: self.basis = dglnn.WeightBasis( (in_feat, out_feat), num_bases, len(self.rel_names) ) else: self.weight = nn.Parameter( th.Tensor(len(self.rel_names), in_feat, out_feat) ) nn.init.xavier_uniform_( self.weight, gain=nn.init.calculate_gain("relu") ) # bias if bias: self.h_bias = nn.Parameter(th.Tensor(out_feat)) nn.init.zeros_(self.h_bias) # weight for self loop if self.self_loop: self.loop_weight = nn.Parameter(th.Tensor(in_feat, out_feat)) nn.init.xavier_uniform_( self.loop_weight, gain=nn.init.calculate_gain("relu") ) self.dropout = nn.Dropout(dropout) def forward(self, g, inputs): """Forward computation Parameters ---------- g : DGLGraph Input graph. inputs : dict[str, torch.Tensor] Node feature for each node type. Returns ------- dict[str, torch.Tensor] New node features for each node type. """ g = g.local_var() if self.use_weight: weight = self.basis() if self.use_basis else self.weight wdict = { self.rel_names[i]: {"weight": w.squeeze(0)} for i, w in enumerate(th.split(weight, 1, dim=0)) } else: wdict = {} inputs_src = inputs_dst = inputs for srctype, _, _ in g.canonical_etypes: g.nodes[srctype].data["h"] = inputs[srctype] if self.use_weight: g.apply_edges(fn.copy_u("h", "m")) m = g.edata["m"] for rel in g.canonical_etypes: _, etype, _ = rel g.edges[rel].data["h*w_r"] = th.matmul( m[rel], wdict[etype]["weight"] ) else: g.apply_edges(fn.copy_u("h", "h*w_r")) g.update_all(fn.copy_e("h*w_r", "m"), fn.sum("m", "h")) def _apply(ntype): h = g.nodes[ntype].data["h"] if self.self_loop: h = h + th.matmul(inputs_dst[ntype], self.loop_weight) if self.bias: h = h + self.h_bias if self.activation: h = self.activation(h) return self.dropout(h) return {ntype: _apply(ntype) for ntype in g.dsttypes} class RelGraphEmbed(nn.Module): r"""Embedding layer for featureless heterograph.""" def __init__( self, g, embed_size, embed_name="embed", activation=None, dropout=0.0 ): super(RelGraphEmbed, self).__init__() self.g = g self.embed_size = embed_size self.embed_name = embed_name self.activation = activation self.dropout = nn.Dropout(dropout) # create weight embeddings for each node for each relation self.embeds = nn.ParameterDict() for ntype in g.ntypes: embed = nn.Parameter(th.Tensor(g.num_nodes(ntype), self.embed_size)) nn.init.xavier_uniform_(embed, gain=nn.init.calculate_gain("relu")) self.embeds[ntype] = embed def forward(self, block=None): """Forward computation Parameters ---------- block : DGLGraph, optional If not specified, directly return the full graph with embeddings stored in :attr:`embed_name`. Otherwise, extract and store the embeddings to the block graph and return. Returns ------- DGLGraph The block graph fed with embeddings. """ return self.embeds class EntityClassify(nn.Module): def __init__( self, g, h_dim, out_dim, num_bases, num_hidden_layers=1, dropout=0, use_self_loop=False, ): super(EntityClassify, self).__init__() self.g = g self.h_dim = h_dim self.out_dim = out_dim self.rel_names = list(set(g.etypes)) self.rel_names.sort() if num_bases < 0 or num_bases > len(self.rel_names): self.num_bases = len(self.rel_names) else: self.num_bases = num_bases self.num_hidden_layers = num_hidden_layers self.dropout = dropout self.use_self_loop = use_self_loop self.embed_layer = RelGraphEmbed(g, self.h_dim) self.layers = nn.ModuleList() # i2h self.layers.append( RelGraphConvLayer( self.h_dim, self.h_dim, self.rel_names, self.num_bases, activation=F.relu, self_loop=self.use_self_loop, dropout=self.dropout, weight=False, ) ) # h2h for i in range(self.num_hidden_layers): self.layers.append( RelGraphConvLayer( self.h_dim, self.h_dim, self.rel_names, self.num_bases, activation=F.relu, self_loop=self.use_self_loop, dropout=self.dropout, ) ) # h2o self.layers.append( RelGraphConvLayer( self.h_dim, self.out_dim, self.rel_names, self.num_bases, activation=None, self_loop=self.use_self_loop, ) ) def forward(self, h=None, blocks=None): if h is None: # full graph training h = self.embed_layer() if blocks is None: # full graph training for layer in self.layers: h = layer(self.g, h) else: # minibatch training for layer, block in zip(self.layers, blocks): h = layer(block, h) return h def inference(self, g, batch_size, device, num_workers, x=None): """Minibatch inference of final representation over all node types. ***NOTE*** For node classification, the model is trained to predict on only one node type's label. Therefore, only that type's final representation is meaningful. """ if x is None: x = self.embed_layer() for l, layer in enumerate(self.layers): y = { k: th.zeros( g.num_nodes(k), self.h_dim if l != len(self.layers) - 1 else self.out_dim, ) for k in g.ntypes } sampler = dgl.dataloading.MultiLayerFullNeighborSampler(1) dataloader = dgl.dataloading.DataLoader( g, {k: th.arange(g.num_nodes(k)) for k in g.ntypes}, sampler, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=num_workers, ) with dataloader.enable_cpu_affinity(): for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader): block = blocks[0].to(device) h = { k: x[k][input_nodes[k]].to(device) for k in input_nodes.keys() } h = layer(block, h) for k in output_nodes.keys(): y[k][output_nodes[k]] = h[k].cpu() x = y return y class EntityClassify_HeteroAPI(nn.Module): def __init__( self, g, h_dim, out_dim, num_bases, num_hidden_layers=1, dropout=0, use_self_loop=False, ): super(EntityClassify_HeteroAPI, self).__init__() self.g = g self.h_dim = h_dim self.out_dim = out_dim self.rel_names = list(set(g.etypes)) self.rel_names.sort() if num_bases < 0 or num_bases > len(self.rel_names): self.num_bases = len(self.rel_names) else: self.num_bases = num_bases self.num_hidden_layers = num_hidden_layers self.dropout = dropout self.use_self_loop = use_self_loop self.embed_layer = RelGraphEmbed(g, self.h_dim) self.layers = nn.ModuleList() # i2h self.layers.append( RelGraphConvLayerHeteroAPI( self.h_dim, self.h_dim, self.rel_names, self.num_bases, activation=F.relu, self_loop=self.use_self_loop, dropout=self.dropout, weight=False, ) ) # h2h for i in range(self.num_hidden_layers): self.layers.append( RelGraphConvLayerHeteroAPI( self.h_dim, self.h_dim, self.rel_names, self.num_bases, activation=F.relu, self_loop=self.use_self_loop, dropout=self.dropout, ) ) # h2o self.layers.append( RelGraphConvLayerHeteroAPI( self.h_dim, self.out_dim, self.rel_names, self.num_bases, activation=None, self_loop=self.use_self_loop, ) ) def forward(self, h=None, blocks=None): if h is None: # full graph training h = self.embed_layer() if blocks is None: # full graph training for layer in self.layers: h = layer(self.g, h) else: # minibatch training for layer, block in zip(self.layers, blocks): h = layer(block, h) return h def inference(self, g, batch_size, device, num_workers, x=None): """Minibatch inference of final representation over all node types. ***NOTE*** For node classification, the model is trained to predict on only one node type's label. Therefore, only that type's final representation is meaningful. """ if x is None: x = self.embed_layer() for l, layer in enumerate(self.layers): y = { k: th.zeros( g.num_nodes(k), self.h_dim if l != len(self.layers) - 1 else self.out_dim, ) for k in g.ntypes } sampler = dgl.dataloading.MultiLayerFullNeighborSampler(1) dataloader = dgl.dataloading.DataLoader( g, {k: th.arange(g.num_nodes(k)) for k in g.ntypes}, sampler, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=num_workers, ) for input_nodes, output_nodes, blocks in tqdm.tqdm(dataloader): block = blocks[0].to(device) h = { k: x[k][input_nodes[k]].to(device) for k in input_nodes.keys() } h = layer(block, h) for k in h.keys(): y[k][output_nodes[k]] = h[k].cpu() x = y return y ================================================ FILE: examples/pytorch/rgcn-hetero/test_classify.py ================================================ """Infering Relational Data with Graph Convolutional Networks """ import argparse from functools import partial import torch as th import torch.nn.functional as F from dgl.data.rdf import AIFBDataset, AMDataset, BGSDataset, MUTAGDataset from entity_classify import EntityClassify def main(args): # load graph data if args.dataset == "aifb": dataset = AIFBDataset() elif args.dataset == "mutag": dataset = MUTAGDataset() elif args.dataset == "bgs": dataset = BGSDataset() elif args.dataset == "am": dataset = AMDataset() else: raise ValueError() g = dataset[0] category = dataset.predict_category num_classes = dataset.num_classes test_mask = g.nodes[category].data.pop("test_mask") test_idx = th.nonzero(test_mask, as_tuple=False).squeeze() labels = g.nodes[category].data.pop("labels") # check cuda use_cuda = args.gpu >= 0 and th.cuda.is_available() if use_cuda: th.cuda.set_device(args.gpu) labels = labels.cuda() test_idx = test_idx.cuda() g = g.to("cuda:%d" % args.gpu) # create model model = EntityClassify( g, args.n_hidden, num_classes, num_bases=args.n_bases, num_hidden_layers=args.n_layers - 2, use_self_loop=args.use_self_loop, ) model.load_state_dict(th.load(args.model_path)) if use_cuda: model.cuda() print("start testing...") model.eval() logits = model.forward()[category] test_loss = F.cross_entropy(logits[test_idx], labels[test_idx]) test_acc = th.sum( logits[test_idx].argmax(dim=1) == labels[test_idx] ).item() / len(test_idx) print( "Test Acc: {:.4f} | Test loss: {:.4f}".format( test_acc, test_loss.item() ) ) print() if __name__ == "__main__": parser = argparse.ArgumentParser(description="RGCN") parser.add_argument( "--n-hidden", type=int, default=16, help="number of hidden units" ) parser.add_argument("--gpu", type=int, default=-1, help="gpu") parser.add_argument("--lr", type=float, default=1e-2, help="learning rate") parser.add_argument( "--n-bases", type=int, default=-1, help="number of filter weight matrices, default: -1 [use all]", ) parser.add_argument( "--n-layers", type=int, default=2, help="number of propagation rounds" ) parser.add_argument( "-d", "--dataset", type=str, required=True, help="dataset to use" ) parser.add_argument( "--model_path", type=str, help="path of the model to load from" ) parser.add_argument( "--use-self-loop", default=False, action="store_true", help="include self feature as a special relation", ) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/pytorch/rrn/README.md ================================================ # Recurrent Relational Network (RRN) * Paper link: https://arxiv.org/abs/1711.08028 * Author's code repo: https://github.com/rasmusbergpalm/recurrent-relational-networks ## Dependencies * PyTorch 1.0+ * DGL 0.5+ ## Codes The folder contains a DGL implementation of Recurrent Relational Network, and its application on sudoku solving. ## Usage - To train the RRN for sudoku, run the following ``` python3 train_sudoku.py --output_dir out/ --do_train ``` - Test with specified aggregation steps: ``` python3 train_sudoku.py --output_dir out/ --do_eval --steps 64 ``` Test accuracy (puzzle-level): | | 32 steps | 64 steps | | ----- | :------: | :------: | | Paper | 94.1 | 96.6 | | DGL | 95.3 | 98.9 | - To use the trained model for solving sudoku, follow the example bellow: ```python from sudoku_solver import solve_sudoku q = [[9, 7, 0, 4, 0, 2, 0, 5, 3], [0, 4, 6, 0, 9, 0, 0, 0, 0], [0, 0, 8, 6, 0, 1, 4, 0, 7], [0, 0, 0, 0, 0, 3, 5, 0, 0], [7, 6, 0, 0, 0, 0, 0, 8, 2], [0, 0, 2, 8, 0, 0, 0, 0, 0], [6, 0, 5, 1, 0, 7, 2, 0, 0], [0, 0, 0, 0, 6, 0, 7, 4, 0], [4, 3, 0, 2, 0, 9, 0, 6, 1] ] answer = solve_sudoku(q) print(answer) ''' [[9 7 1 4 8 2 6 5 3] [3 4 6 7 9 5 1 2 8] [2 5 8 6 3 1 4 9 7] [8 1 4 9 2 3 5 7 6] [7 6 3 5 1 4 9 8 2] [5 9 2 8 7 6 3 1 4] [6 8 5 1 4 7 2 3 9] [1 2 9 3 6 8 7 4 5] [4 3 7 2 5 9 8 6 1]] ''' ``` ================================================ FILE: examples/pytorch/rrn/rrn.py ================================================ """ Recurrent Relational Network(RRN) module References: - Recurrent Relational Networks - Paper: https://arxiv.org/abs/1711.08028 - Original Code: https://github.com/rasmusbergpalm/recurrent-relational-networks """ import dgl.function as fn import torch from torch import nn class RRNLayer(nn.Module): def __init__(self, msg_layer, node_update_func, edge_drop): super(RRNLayer, self).__init__() self.msg_layer = msg_layer self.node_update_func = node_update_func self.edge_dropout = nn.Dropout(edge_drop) def forward(self, g): g.apply_edges(self.get_msg) g.edata["e"] = self.edge_dropout(g.edata["e"]) g.update_all( message_func=fn.copy_e("e", "msg"), reduce_func=fn.sum("msg", "m") ) g.apply_nodes(self.node_update) def get_msg(self, edges): e = torch.cat([edges.src["h"], edges.dst["h"]], -1) e = self.msg_layer(e) return {"e": e} def node_update(self, nodes): return self.node_update_func(nodes) class RRN(nn.Module): def __init__(self, msg_layer, node_update_func, num_steps, edge_drop): super(RRN, self).__init__() self.num_steps = num_steps self.rrn_layer = RRNLayer(msg_layer, node_update_func, edge_drop) def forward(self, g, get_all_outputs=True): outputs = [] for _ in range(self.num_steps): self.rrn_layer(g) if get_all_outputs: outputs.append(g.ndata["h"]) if get_all_outputs: outputs = torch.stack(outputs, 0) # num_steps x n_nodes x h_dim else: outputs = g.ndata["h"] # n_nodes x h_dim return outputs ================================================ FILE: examples/pytorch/rrn/sudoku.py ================================================ """ SudokuNN module based on RRN for solving sudoku puzzles """ import torch from rrn import RRN from torch import nn class SudokuNN(nn.Module): def __init__(self, num_steps, embed_size=16, hidden_dim=96, edge_drop=0.1): super(SudokuNN, self).__init__() self.num_steps = num_steps self.digit_embed = nn.Embedding(10, embed_size) self.row_embed = nn.Embedding(9, embed_size) self.col_embed = nn.Embedding(9, embed_size) self.input_layer = nn.Sequential( nn.Linear(3 * embed_size, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), ) self.lstm = nn.LSTMCell(hidden_dim * 2, hidden_dim, bias=False) msg_layer = nn.Sequential( nn.Linear(2 * hidden_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), nn.Linear(hidden_dim, hidden_dim), ) self.rrn = RRN(msg_layer, self.node_update_func, num_steps, edge_drop) self.output_layer = nn.Linear(hidden_dim, 10) self.loss_func = nn.CrossEntropyLoss() def forward(self, g, is_training=True): labels = g.ndata.pop("a") input_digits = self.digit_embed(g.ndata.pop("q")) rows = self.row_embed(g.ndata.pop("row")) cols = self.col_embed(g.ndata.pop("col")) x = self.input_layer(torch.cat([input_digits, rows, cols], -1)) g.ndata["x"] = x g.ndata["h"] = x g.ndata["rnn_h"] = torch.zeros_like(x, dtype=torch.float) g.ndata["rnn_c"] = torch.zeros_like(x, dtype=torch.float) outputs = self.rrn(g, is_training) logits = self.output_layer(outputs) preds = torch.argmax(logits, -1) if is_training: labels = torch.stack([labels] * self.num_steps, 0) logits = logits.view([-1, 10]) labels = labels.view([-1]) loss = self.loss_func(logits, labels) return preds, loss def node_update_func(self, nodes): x, h, m, c = ( nodes.data["x"], nodes.data["rnn_h"], nodes.data["m"], nodes.data["rnn_c"], ) new_h, new_c = self.lstm(torch.cat([x, m], -1), (h, c)) return {"h": new_h, "rnn_c": new_c, "rnn_h": new_h} ================================================ FILE: examples/pytorch/rrn/sudoku_data.py ================================================ import csv import os import urllib.request import zipfile from copy import copy import dgl import numpy as np import torch from torch.utils.data import DataLoader, RandomSampler, SequentialSampler from torch.utils.data.dataset import Dataset def _basic_sudoku_graph(): grids = [ [0, 1, 2, 9, 10, 11, 18, 19, 20], [3, 4, 5, 12, 13, 14, 21, 22, 23], [6, 7, 8, 15, 16, 17, 24, 25, 26], [27, 28, 29, 36, 37, 38, 45, 46, 47], [30, 31, 32, 39, 40, 41, 48, 49, 50], [33, 34, 35, 42, 43, 44, 51, 52, 53], [54, 55, 56, 63, 64, 65, 72, 73, 74], [57, 58, 59, 66, 67, 68, 75, 76, 77], [60, 61, 62, 69, 70, 71, 78, 79, 80], ] edges = set() for i in range(81): row, col = i // 9, i % 9 # same row and col row_src = row * 9 col_src = col for _ in range(9): edges.add((row_src, i)) edges.add((col_src, i)) row_src += 1 col_src += 9 # same grid grid_row, grid_col = row // 3, col // 3 for n in grids[grid_row * 3 + grid_col]: if n != i: edges.add((n, i)) edges = list(edges) g = dgl.graph(edges) return g class ListDataset(Dataset): def __init__(self, *lists_of_data): assert all(len(lists_of_data[0]) == len(d) for d in lists_of_data) self.lists_of_data = lists_of_data def __getitem__(self, index): return tuple(d[index] for d in self.lists_of_data) def __len__(self): return len(self.lists_of_data[0]) def _get_sudoku_dataset(segment="train"): assert segment in ["train", "valid", "test"] url = "https://data.dgl.ai/dataset/sudoku-hard.zip" zip_fname = "/tmp/sudoku-hard.zip" dest_dir = "/tmp/sudoku-hard/" if not os.path.exists(dest_dir): print("Downloading data...") urllib.request.urlretrieve(url, zip_fname) with zipfile.ZipFile(zip_fname) as f: f.extractall("/tmp/") def read_csv(fname): print("Reading %s..." % fname) with open(dest_dir + fname) as f: reader = csv.reader(f, delimiter=",") return [(q, a) for q, a in reader] data = read_csv(segment + ".csv") def encode(samples): def parse(x): return list(map(int, list(x))) encoded = [(parse(q), parse(a)) for q, a in samples] return encoded data = encode(data) print(f"Number of puzzles in {segment} set : {len(data)}") return data def sudoku_dataloader(batch_size, segment="train"): """ Get a DataLoader instance for dataset of sudoku. Every iteration of the dataloader returns a DGLGraph instance, the ndata of the graph contains: 'q': question, e.g. the sudoku puzzle to be solved, the position is to be filled with number from 1-9 if the value in the position is 0 'a': answer, the ground truth of the sudoku puzzle 'row': row index for each position in the grid 'col': column index for each position in the grid :param batch_size: Batch size for the dataloader :param segment: The segment of the datasets, must in ['train', 'valid', 'test'] :return: A pytorch DataLoader instance """ data = _get_sudoku_dataset(segment) q, a = zip(*data) dataset = ListDataset(q, a) if segment == "train": data_sampler = RandomSampler(dataset) else: data_sampler = SequentialSampler(dataset) basic_graph = _basic_sudoku_graph() sudoku_indices = np.arange(0, 81) rows = sudoku_indices // 9 cols = sudoku_indices % 9 def collate_fn(batch): graph_list = [] for q, a in batch: q = torch.tensor(q, dtype=torch.long) a = torch.tensor(a, dtype=torch.long) graph = copy(basic_graph) graph.ndata["q"] = q # q means question graph.ndata["a"] = a # a means answer graph.ndata["row"] = torch.tensor(rows, dtype=torch.long) graph.ndata["col"] = torch.tensor(cols, dtype=torch.long) graph_list.append(graph) batch_graph = dgl.batch(graph_list) return batch_graph dataloader = DataLoader( dataset, batch_size, sampler=data_sampler, collate_fn=collate_fn ) return dataloader ================================================ FILE: examples/pytorch/rrn/sudoku_solver.py ================================================ import os import urllib.request import numpy as np import torch from sudoku import SudokuNN from sudoku_data import _basic_sudoku_graph def solve_sudoku(puzzle): """ Solve sudoku puzzle using RRN. :param puzzle: an array-like data with shape [9, 9], blank positions are filled with 0 :return: a [9, 9] shaped numpy array """ puzzle = np.array(puzzle, dtype=int).reshape([-1]) model_path = "ckpt" if not os.path.exists(model_path): os.mkdir(model_path) model_filename = os.path.join(model_path, "rrn-sudoku.pkl") if not os.path.exists(model_filename): print("Downloading model...") url = "https://data.dgl.ai/models/rrn-sudoku.pkl" urllib.request.urlretrieve(url, model_filename) model = SudokuNN(num_steps=64, edge_drop=0.0) model.load_state_dict( torch.load(model_filename, weights_only=False, map_location="cpu") ) model.eval() g = _basic_sudoku_graph() sudoku_indices = np.arange(0, 81) rows = sudoku_indices // 9 cols = sudoku_indices % 9 g.ndata["row"] = torch.tensor(rows, dtype=torch.long) g.ndata["col"] = torch.tensor(cols, dtype=torch.long) g.ndata["q"] = torch.tensor(puzzle, dtype=torch.long) g.ndata["a"] = torch.tensor(puzzle, dtype=torch.long) pred, _ = model(g, False) pred = pred.cpu().data.numpy().reshape([9, 9]) return pred if __name__ == "__main__": q = [ [9, 7, 0, 4, 0, 2, 0, 5, 3], [0, 4, 6, 0, 9, 0, 0, 0, 0], [0, 0, 8, 6, 0, 1, 4, 0, 7], [0, 0, 0, 0, 0, 3, 5, 0, 0], [7, 6, 0, 0, 0, 0, 0, 8, 2], [0, 0, 2, 8, 0, 0, 0, 0, 0], [6, 0, 5, 1, 0, 7, 2, 0, 0], [0, 0, 0, 0, 6, 0, 7, 4, 0], [4, 3, 0, 2, 0, 9, 0, 6, 1], ] answer = solve_sudoku(q) print(answer) ================================================ FILE: examples/pytorch/rrn/train_sudoku.py ================================================ import argparse import os import numpy as np import torch from sudoku import SudokuNN from sudoku_data import sudoku_dataloader from torch.optim import Adam def main(args): if args.gpu < 0 or not torch.cuda.is_available(): device = torch.device("cpu") else: device = torch.device("cuda", args.gpu) model = SudokuNN(num_steps=args.steps, edge_drop=args.edge_drop) if args.do_train: if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) model.to(device) train_dataloader = sudoku_dataloader(args.batch_size, segment="train") dev_dataloader = sudoku_dataloader(args.batch_size, segment="valid") opt = Adam( model.parameters(), lr=args.lr, weight_decay=args.weight_decay ) best_dev_acc = 0.0 for epoch in range(args.epochs): model.train() for i, g in enumerate(train_dataloader): g = g.to(device) _, loss = model(g) opt.zero_grad() loss.backward() opt.step() if i % 100 == 0: print(f"Epoch {epoch}, batch {i}, loss {loss.cpu().data}") # dev print("\n=========Dev step========") model.eval() dev_loss = [] dev_res = [] for g in dev_dataloader: g = g.to(device) target = g.ndata["a"] target = target.view([-1, 81]) with torch.no_grad(): preds, loss = model(g, is_training=False) preds = preds.view([-1, 81]) for i in range(preds.size(0)): dev_res.append( int(torch.equal(preds[i, :], target[i, :])) ) dev_loss.append(loss.cpu().detach().data) dev_acc = sum(dev_res) / len(dev_res) print(f"Dev loss {np.mean(dev_loss)}, accuracy {dev_acc}") if dev_acc >= best_dev_acc: torch.save( model.state_dict(), os.path.join(args.output_dir, "model_best.bin"), ) best_dev_acc = dev_acc print(f"Best dev accuracy {best_dev_acc}\n") torch.save( model.state_dict(), os.path.join(args.output_dir, "model_final.bin") ) if args.do_eval: model_path = os.path.join(args.output_dir, "model_best.bin") if not os.path.exists(model_path): raise FileNotFoundError("Saved model not Found!") model.load_state_dict(torch.load(model_path, weights_only=False)) model.to(device) test_dataloader = sudoku_dataloader(args.batch_size, segment="test") print("\n=========Test step========") model.eval() test_loss = [] test_res = [] for g in test_dataloader: g = g.to(device) target = g.ndata["a"] target = target.view([-1, 81]) with torch.no_grad(): preds, loss = model(g, is_training=False) preds = preds preds = preds.view([-1, 81]) for i in range(preds.size(0)): test_res.append(int(torch.equal(preds[i, :], target[i, :]))) test_loss.append(loss.cpu().detach().data) test_acc = sum(test_res) / len(test_res) print(f"Test loss {np.mean(test_loss)}, accuracy {test_acc}\n") if __name__ == "__main__": parser = argparse.ArgumentParser( description="Recurrent Relational Network on sudoku task." ) parser.add_argument( "--output_dir", type=str, default=None, required=True, help="The directory to save model", ) parser.add_argument( "--do_train", default=False, action="store_true", help="Train the model" ) parser.add_argument( "--do_eval", default=False, action="store_true", help="Evaluate the model on test data", ) parser.add_argument( "--epochs", type=int, default=100, help="Number of training epochs" ) parser.add_argument("--batch_size", type=int, default=64, help="Batch size") parser.add_argument( "--edge_drop", type=float, default=0.4, help="Dropout rate at edges." ) parser.add_argument( "--steps", type=int, default=32, help="Number of message passing steps." ) parser.add_argument("--gpu", type=int, default=-1, help="gpu") parser.add_argument("--lr", type=float, default=2e-4, help="Learning rate") parser.add_argument( "--weight_decay", type=float, default=1e-4, help="weight decay (L2 penalty)", ) args = parser.parse_args() main(args) ================================================ FILE: examples/pytorch/sagpool/README.md ================================================ # DGL Implementation of the SAGPool Paper This DGL example implements the GNN model proposed in the paper [Self Attention Graph Pooling](https://arxiv.org/pdf/1904.08082.pdf). The author's codes of implementation is in [here](https://github.com/inyeoplee77/SAGPool) The graph dataset used in this example --------------------------------------- The DGL's built-in LegacyTUDataset. This is a serial of graph kernel datasets for graph classification. We use 'DD', 'PROTEINS', 'NCI1', 'NCI109' and 'Mutagenicity' in this SAGPool implementation. All these datasets are randomly splited to train, validation and test set with ratio 0.8, 0.1 and 0.1. NOTE: Since there is no data attributes in some of these datasets, we use node_id (in one-hot vector whose length is the max number of nodes across all graphs) as the node feature. Also note that the node_id in some datasets is not unique (e.g. a graph may has two nodes with the same id). | | DD | PROTEINS | NCI1 | NCI109 | Mutagenicity | | ---------------- | ------ | -------- | ----- | ------ | ------------ | | NumGraphs | 1178 | 1113 | 4110 | 4127 | 4337 | | AvgNodesPerGraph | 284.32 | 39.06 | 29.87 | 29.68 | 30.32 | | AvgEdgesPerGraph | 715.66 | 72.82 | 32.30 | 32.13 | 30.77 | | NumFeats | 89 | 1 | 37 | 38 | 14 | | NumClasses | 2 | 2 | 2 | 2 | 2 | How to run example files -------------------------------- The valid dataset names (you can find a full list [here](https://chrsmrrs.github.io/datasets/docs/datasets/)): - 'DD' for D&D - 'PROTEINS' for PROTEINS - 'NCI1' for NCI1 - 'NCI109' for NCI109 - 'Mutagenicity' for Mutagenicity In the sagpool folder, run ```bash python main.py --dataset ${your_dataset_name_here} ``` If want to use a GPU, run ```bash python main.py --device ${your_device_id_here} --dataset ${your_dataset_name_here} ``` If your want to perform a grid search, modify parameter settings in `grid_search_config.json` and run ```bash python grid_search.py --device ${your_device_id_here} --num_trials ${num_of_trials_here} ``` Performance ------------------------- NOTE: We do not perform grid search or finetune here, so there may be a gap between results in paper and our results. Also, we only perform 10 trials for each experiment, which is different from 200 trials per experiment in the paper. **The global architecture result** | Dataset | paper result (global) | ours (global) | | ------------- | -------------------------------- | --------------------------- | | D&D | 76.19 (0.94) | 74.79 (2.69) | | PROTEINS | 70.04 (1.47) | 70.36 (5.90) | | NCI1 | 74.18 (1.20) | 72.82 (2.36) | | NCI109 | 74.06 (0.78) | 71.64 (2.65) | | Mutagenicity | N/A | 76.55 (2.89) | **The hierarchical architecture result** | Dataset | paper result (hierarchical) | ours (hierarchical) | | ------------- | -------------------------------- | --------------------------- | | D&D | 76.45 (0.97) | 75.38 (4.17) | | PROTEINS | 71.86 (0.97) | 70.36 (5.68) | | NCI1 | 67.45 (1.11) | 70.61 (2.25) | | NCI109 | 67.86 (1.41) | 69.13 (3.85) | | Mutagenicity | N/A | 75.20 (1.95) | ================================================ FILE: examples/pytorch/sagpool/grid_search.py ================================================ import json import os from copy import deepcopy from main import main, parse_args from utils import get_stats def load_config(path="./grid_search_config.json"): with open(path, "r") as f: return json.load(f) def run_experiments(args): res = [] for i in range(args.num_trials): print("Trial {}/{}".format(i + 1, args.num_trials)) acc, _ = main(args) res.append(acc) mean, err_bd = get_stats(res, conf_interval=True) return mean, err_bd def grid_search(config: dict): args = parse_args() results = {} for d in config["dataset"]: args.dataset = d best_acc, err_bd = 0.0, 0.0 best_args = vars(args) for arch in config["arch"]: args.architecture = arch for hidden in config["hidden"]: args.hid_dim = hidden for pool_ratio in config["pool_ratio"]: args.pool_ratio = pool_ratio for lr in config["lr"]: args.lr = lr for weight_decay in config["weight_decay"]: args.weight_decay = weight_decay acc, bd = run_experiments(args) if acc > best_acc: best_acc = acc err_bd = bd best_args = deepcopy(vars(args)) args.output_path = "./output" if not os.path.exists(args.output_path): os.makedirs(args.output_path) args.output_path = "./output/{}.log".format(d) result = { "params": best_args, "result": "{:.4f}({:.4f})".format(best_acc, err_bd), } with open(args.output_path, "w") as f: json.dump(result, f, sort_keys=True, indent=4) grid_search(load_config()) ================================================ FILE: examples/pytorch/sagpool/grid_search_config.json ================================================ { "arch": ["hierarchical", "global"], "hidden": [16, 32, 64, 128], "pool_ratio": [0.25, 0.5], "lr": [1e-2, 5e-2, 1e-3, 5e-3, 1e-4, 5e-4], "weight_decay": [1e-2, 1e-3, 1e-4, 1e-5], "dataset": ["DD", "PROTEINS", "NCI1", "NCI109", "Mutagenicity"] } ================================================ FILE: examples/pytorch/sagpool/layer.py ================================================ import dgl import torch import torch.nn.functional as F from dgl.nn import AvgPooling, GraphConv, MaxPooling from utils import get_batch_id, topk class SAGPool(torch.nn.Module): """The Self-Attention Pooling layer in paper `Self Attention Graph Pooling ` Args: in_dim (int): The dimension of node feature. ratio (float, optional): The pool ratio which determines the amount of nodes remain after pooling. (default: :obj:`0.5`) conv_op (torch.nn.Module, optional): The graph convolution layer in dgl used to compute scale for each node. (default: :obj:`dgl.nn.GraphConv`) non_linearity (Callable, optional): The non-linearity function, a pytorch function. (default: :obj:`torch.tanh`) """ def __init__( self, in_dim: int, ratio=0.5, conv_op=GraphConv, non_linearity=torch.tanh, ): super(SAGPool, self).__init__() self.in_dim = in_dim self.ratio = ratio self.score_layer = conv_op(in_dim, 1) self.non_linearity = non_linearity def forward(self, graph: dgl.DGLGraph, feature: torch.Tensor): score = self.score_layer(graph, feature).squeeze() perm, next_batch_num_nodes = topk( score, self.ratio, get_batch_id(graph.batch_num_nodes()), graph.batch_num_nodes(), ) feature = feature[perm] * self.non_linearity(score[perm]).view(-1, 1) graph = dgl.node_subgraph(graph, perm) # node_subgraph currently does not support batch-graph, # the 'batch_num_nodes' of the result subgraph is None. # So we manually set the 'batch_num_nodes' here. # Since global pooling has nothing to do with 'batch_num_edges', # we can leave it to be None or unchanged. graph.set_batch_num_nodes(next_batch_num_nodes) return graph, feature, perm class ConvPoolBlock(torch.nn.Module): """A combination of GCN layer and SAGPool layer, followed by a concatenated (mean||sum) readout operation. """ def __init__(self, in_dim: int, out_dim: int, pool_ratio=0.8): super(ConvPoolBlock, self).__init__() self.conv = GraphConv(in_dim, out_dim) self.pool = SAGPool(out_dim, ratio=pool_ratio) self.avgpool = AvgPooling() self.maxpool = MaxPooling() def forward(self, graph, feature): out = F.relu(self.conv(graph, feature)) graph, out, _ = self.pool(graph, out) g_out = torch.cat( [self.avgpool(graph, out), self.maxpool(graph, out)], dim=-1 ) return graph, out, g_out ================================================ FILE: examples/pytorch/sagpool/main.py ================================================ import argparse import json import logging import os from time import time import dgl import torch import torch.nn import torch.nn.functional as F from dgl.data import LegacyTUDataset from dgl.dataloading import GraphDataLoader from network import get_sag_network from torch.utils.data import random_split from utils import get_stats def parse_args(): parser = argparse.ArgumentParser(description="Self-Attention Graph Pooling") parser.add_argument( "--dataset", type=str, default="DD", choices=["DD", "PROTEINS", "NCI1", "NCI109", "Mutagenicity"], help="DD/PROTEINS/NCI1/NCI109/Mutagenicity", ) parser.add_argument( "--batch_size", type=int, default=128, help="batch size" ) parser.add_argument("--lr", type=float, default=5e-4, help="learning rate") parser.add_argument( "--weight_decay", type=float, default=1e-4, help="weight decay" ) parser.add_argument( "--pool_ratio", type=float, default=0.5, help="pooling ratio" ) parser.add_argument("--hid_dim", type=int, default=128, help="hidden size") parser.add_argument( "--dropout", type=float, default=0.5, help="dropout ratio" ) parser.add_argument( "--epochs", type=int, default=100000, help="max number of training epochs", ) parser.add_argument( "--patience", type=int, default=50, help="patience for early stopping" ) parser.add_argument( "--device", type=int, default=-1, help="device id, -1 for cpu" ) parser.add_argument( "--architecture", type=str, default="hierarchical", choices=["hierarchical", "global"], help="model architecture", ) parser.add_argument( "--dataset_path", type=str, default="./dataset", help="path to dataset" ) parser.add_argument( "--conv_layers", type=int, default=3, help="number of conv layers" ) parser.add_argument( "--print_every", type=int, default=10, help="print trainlog every k epochs, -1 for silent training", ) parser.add_argument( "--num_trials", type=int, default=1, help="number of trials" ) parser.add_argument("--output_path", type=str, default="./output") args = parser.parse_args() # device args.device = "cpu" if args.device == -1 else "cuda:{}".format(args.device) if not torch.cuda.is_available(): logging.warning("CUDA is not available, use CPU for training.") args.device = "cpu" # print every if args.print_every == -1: args.print_every = args.epochs + 1 # paths if not os.path.exists(args.dataset_path): os.makedirs(args.dataset_path) if not os.path.exists(args.output_path): os.makedirs(args.output_path) name = "Data={}_Hidden={}_Arch={}_Pool={}_WeightDecay={}_Lr={}.log".format( args.dataset, args.hid_dim, args.architecture, args.pool_ratio, args.weight_decay, args.lr, ) args.output_path = os.path.join(args.output_path, name) return args def train(model: torch.nn.Module, optimizer, trainloader, device): model.train() total_loss = 0.0 num_batches = len(trainloader) for batch in trainloader: optimizer.zero_grad() batch_graphs, batch_labels = batch batch_graphs = batch_graphs.to(device) batch_labels = batch_labels.long().to(device) out = model(batch_graphs) loss = F.nll_loss(out, batch_labels) loss.backward() optimizer.step() total_loss += loss.item() return total_loss / num_batches @torch.no_grad() def test(model: torch.nn.Module, loader, device): model.eval() correct = 0.0 loss = 0.0 num_graphs = 0 for batch in loader: batch_graphs, batch_labels = batch num_graphs += batch_labels.size(0) batch_graphs = batch_graphs.to(device) batch_labels = batch_labels.long().to(device) out = model(batch_graphs) pred = out.argmax(dim=1) loss += F.nll_loss(out, batch_labels, reduction="sum").item() correct += pred.eq(batch_labels).sum().item() return correct / num_graphs, loss / num_graphs def main(args): # Step 1: Prepare graph data and retrieve train/validation/test index ============================= # dataset = LegacyTUDataset(args.dataset, raw_dir=args.dataset_path) # add self loop. We add self loop for each graph here since the function "add_self_loop" does not # support batch graph. for i in range(len(dataset)): dataset.graph_lists[i] = dgl.add_self_loop(dataset.graph_lists[i]) num_training = int(len(dataset) * 0.8) num_val = int(len(dataset) * 0.1) num_test = len(dataset) - num_val - num_training train_set, val_set, test_set = random_split( dataset, [num_training, num_val, num_test] ) train_loader = GraphDataLoader( train_set, batch_size=args.batch_size, shuffle=True, num_workers=6 ) val_loader = GraphDataLoader( val_set, batch_size=args.batch_size, num_workers=2 ) test_loader = GraphDataLoader( test_set, batch_size=args.batch_size, num_workers=2 ) device = torch.device(args.device) # Step 2: Create model =================================================================== # num_feature, num_classes, _ = dataset.statistics() model_op = get_sag_network(args.architecture) model = model_op( in_dim=num_feature, hid_dim=args.hid_dim, out_dim=num_classes, num_convs=args.conv_layers, pool_ratio=args.pool_ratio, dropout=args.dropout, ).to(device) args.num_feature = int(num_feature) args.num_classes = int(num_classes) # Step 3: Create training components ===================================================== # optimizer = torch.optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.weight_decay ) # Step 4: training epoches =============================================================== # bad_cound = 0 best_val_loss = float("inf") final_test_acc = 0.0 best_epoch = 0 train_times = [] for e in range(args.epochs): s_time = time() train_loss = train(model, optimizer, train_loader, device) train_times.append(time() - s_time) val_acc, val_loss = test(model, val_loader, device) test_acc, _ = test(model, test_loader, device) if best_val_loss > val_loss: best_val_loss = val_loss final_test_acc = test_acc bad_cound = 0 best_epoch = e + 1 else: bad_cound += 1 if bad_cound >= args.patience: break if (e + 1) % args.print_every == 0: log_format = ( "Epoch {}: loss={:.4f}, val_acc={:.4f}, final_test_acc={:.4f}" ) print(log_format.format(e + 1, train_loss, val_acc, final_test_acc)) print( "Best Epoch {}, final test acc {:.4f}".format( best_epoch, final_test_acc ) ) return final_test_acc, sum(train_times) / len(train_times) if __name__ == "__main__": args = parse_args() res = [] train_times = [] for i in range(args.num_trials): print("Trial {}/{}".format(i + 1, args.num_trials)) acc, train_time = main(args) res.append(acc) train_times.append(train_time) mean, err_bd = get_stats(res) print("mean acc: {:.4f}, error bound: {:.4f}".format(mean, err_bd)) out_dict = { "hyper-parameters": vars(args), "result": "{:.4f}(+-{:.4f})".format(mean, err_bd), "train_time": "{:.4f}".format(sum(train_times) / len(train_times)), } with open(args.output_path, "w") as f: json.dump(out_dict, f, sort_keys=True, indent=4) ================================================ FILE: examples/pytorch/sagpool/network.py ================================================ import dgl import torch import torch.nn import torch.nn.functional as F from dgl.nn import AvgPooling, GraphConv, MaxPooling from layer import ConvPoolBlock, SAGPool class SAGNetworkHierarchical(torch.nn.Module): """The Self-Attention Graph Pooling Network with hierarchical readout in paper `Self Attention Graph Pooling ` Args: in_dim (int): The input node feature dimension. hid_dim (int): The hidden dimension for node feature. out_dim (int): The output dimension. num_convs (int, optional): The number of graph convolution layers. (default: 3) pool_ratio (float, optional): The pool ratio which determines the amount of nodes remain after pooling. (default: :obj:`0.5`) dropout (float, optional): The dropout ratio for each layer. (default: 0) """ def __init__( self, in_dim: int, hid_dim: int, out_dim: int, num_convs=3, pool_ratio: float = 0.5, dropout: float = 0.0, ): super(SAGNetworkHierarchical, self).__init__() self.dropout = dropout self.num_convpools = num_convs convpools = [] for i in range(num_convs): _i_dim = in_dim if i == 0 else hid_dim _o_dim = hid_dim convpools.append( ConvPoolBlock(_i_dim, _o_dim, pool_ratio=pool_ratio) ) self.convpools = torch.nn.ModuleList(convpools) self.lin1 = torch.nn.Linear(hid_dim * 2, hid_dim) self.lin2 = torch.nn.Linear(hid_dim, hid_dim // 2) self.lin3 = torch.nn.Linear(hid_dim // 2, out_dim) def forward(self, graph: dgl.DGLGraph): feat = graph.ndata["feat"] final_readout = None for i in range(self.num_convpools): graph, feat, readout = self.convpools[i](graph, feat) if final_readout is None: final_readout = readout else: final_readout = final_readout + readout feat = F.relu(self.lin1(final_readout)) feat = F.dropout(feat, p=self.dropout, training=self.training) feat = F.relu(self.lin2(feat)) feat = F.log_softmax(self.lin3(feat), dim=-1) return feat class SAGNetworkGlobal(torch.nn.Module): """The Self-Attention Graph Pooling Network with global readout in paper `Self Attention Graph Pooling ` Args: in_dim (int): The input node feature dimension. hid_dim (int): The hidden dimension for node feature. out_dim (int): The output dimension. num_convs (int, optional): The number of graph convolution layers. (default: 3) pool_ratio (float, optional): The pool ratio which determines the amount of nodes remain after pooling. (default: :obj:`0.5`) dropout (float, optional): The dropout ratio for each layer. (default: 0) """ def __init__( self, in_dim: int, hid_dim: int, out_dim: int, num_convs=3, pool_ratio: float = 0.5, dropout: float = 0.0, ): super(SAGNetworkGlobal, self).__init__() self.dropout = dropout self.num_convs = num_convs convs = [] for i in range(num_convs): _i_dim = in_dim if i == 0 else hid_dim _o_dim = hid_dim convs.append(GraphConv(_i_dim, _o_dim)) self.convs = torch.nn.ModuleList(convs) concat_dim = num_convs * hid_dim self.pool = SAGPool(concat_dim, ratio=pool_ratio) self.avg_readout = AvgPooling() self.max_readout = MaxPooling() self.lin1 = torch.nn.Linear(concat_dim * 2, hid_dim) self.lin2 = torch.nn.Linear(hid_dim, hid_dim // 2) self.lin3 = torch.nn.Linear(hid_dim // 2, out_dim) def forward(self, graph: dgl.DGLGraph): feat = graph.ndata["feat"] conv_res = [] for i in range(self.num_convs): feat = self.convs[i](graph, feat) conv_res.append(feat) conv_res = torch.cat(conv_res, dim=-1) graph, feat, _ = self.pool(graph, conv_res) feat = torch.cat( [self.avg_readout(graph, feat), self.max_readout(graph, feat)], dim=-1, ) feat = F.relu(self.lin1(feat)) feat = F.dropout(feat, p=self.dropout, training=self.training) feat = F.relu(self.lin2(feat)) feat = F.log_softmax(self.lin3(feat), dim=-1) return feat def get_sag_network(net_type: str = "hierarchical"): if net_type == "hierarchical": return SAGNetworkHierarchical elif net_type == "global": return SAGNetworkGlobal else: raise ValueError( "SAGNetwork type {} is not supported.".format(net_type) ) ================================================ FILE: examples/pytorch/sagpool/utils.py ================================================ import logging import math import torch from scipy.stats import t def get_stats( array, conf_interval=False, name=None, stdout=False, logout=False ): """Compute mean and standard deviation from an numerical array Args: array (array like obj): The numerical array, this array can be convert to :obj:`torch.Tensor`. conf_interval (bool, optional): If True, compute the confidence interval bound (95%) instead of the std value. (default: :obj:`False`) name (str, optional): The name of this numerical array, for log usage. (default: :obj:`None`) stdout (bool, optional): Whether to output result to the terminal. (default: :obj:`False`) logout (bool, optional): Whether to output result via logging module. (default: :obj:`False`) """ eps = 1e-9 array = torch.Tensor(array) std, mean = torch.std_mean(array) std = std.item() mean = mean.item() center = mean if conf_interval: n = array.size(0) se = std / (math.sqrt(n) + eps) t_value = t.ppf(0.975, df=n - 1) err_bound = t_value * se else: err_bound = std # log and print if name is None: name = "array {}".format(id(array)) log = "{}: {:.4f}(+-{:.4f})".format(name, center, err_bound) if stdout: print(log) if logout: logging.info(log) return center, err_bound def get_batch_id(num_nodes: torch.Tensor): """Convert the num_nodes array obtained from batch graph to batch_id array for each node. Args: num_nodes (torch.Tensor): The tensor whose element is the number of nodes in each graph in the batch graph. """ batch_size = num_nodes.size(0) batch_ids = [] for i in range(batch_size): item = torch.full( (num_nodes[i],), i, dtype=torch.long, device=num_nodes.device ) batch_ids.append(item) return torch.cat(batch_ids) def topk( x: torch.Tensor, ratio: float, batch_id: torch.Tensor, num_nodes: torch.Tensor, ): """The top-k pooling method. Given a graph batch, this method will pool out some nodes from input node feature tensor for each graph according to the given ratio. Args: x (torch.Tensor): The input node feature batch-tensor to be pooled. ratio (float): the pool ratio. For example if :obj:`ratio=0.5` then half of the input tensor will be pooled out. batch_id (torch.Tensor): The batch_id of each element in the input tensor. num_nodes (torch.Tensor): The number of nodes of each graph in batch. Returns: perm (torch.Tensor): The index in batch to be kept. k (torch.Tensor): The remaining number of nodes for each graph. """ batch_size, max_num_nodes = num_nodes.size(0), num_nodes.max().item() cum_num_nodes = torch.cat( [num_nodes.new_zeros(1), num_nodes.cumsum(dim=0)[:-1]], dim=0 ) index = torch.arange(batch_id.size(0), dtype=torch.long, device=x.device) index = (index - cum_num_nodes[batch_id]) + (batch_id * max_num_nodes) dense_x = x.new_full( (batch_size * max_num_nodes,), torch.finfo(x.dtype).min ) dense_x[index] = x dense_x = dense_x.view(batch_size, max_num_nodes) _, perm = dense_x.sort(dim=-1, descending=True) perm = perm + cum_num_nodes.view(-1, 1) perm = perm.view(-1) k = (ratio * num_nodes.to(torch.float)).ceil().to(torch.long) mask = [ torch.arange(k[i], dtype=torch.long, device=x.device) + i * max_num_nodes for i in range(batch_size) ] mask = torch.cat(mask, dim=0) perm = perm[mask] return perm, k ================================================ FILE: examples/pytorch/seal/README.md ================================================ # DGL Implementation of the SEAL Paper This DGL example implements the link prediction model proposed in the paper [Link Prediction Based on Graph Neural Networks](https://arxiv.org/pdf/1802.09691.pdf) and [REVISITING GRAPH NEURAL NETWORKS FOR LINK PREDICTION](https://arxiv.org/pdf/2010.16103.pdf) The author's codes of implementation is in [SEAL](https://github.com/muhanzhang/SEAL) (pytorch) and [SEAL_ogb](https://github.com/facebookresearch/SEAL_OGB) (torch_geometric) Example implementor ---------------------- This example was implemented by [Smile](https://github.com/Smilexuhc) during his intern work at the AWS Shanghai AI Lab. The graph dataset used in this example --------------------------------------- ogbl-collab - NumNodes: 235868 - NumEdges: 2358104 - NumNodeFeats: 128 - NumEdgeWeights: 1 - NumValidEdges: 160084 - NumTestEdges: 146329 Dependencies -------------------------------- - python 3.6+ - Pytorch 1.5.0+ - dgl 0.6.0 + - ogb - pandas - tqdm - scipy How to run example files -------------------------------- In the seal_dgl folder run on cpu: ```shell script python main.py --gpu_id=-1 --subsample_ratio=0.1 ``` run on gpu: ```shell script python main.py --gpu_id=0 --subsample_ratio=0.1 ``` Performance ------------------------- experiment on `ogbl-collab` | method | valid-hits@50 | test-hits@50 | | ------ | ------------- | ------------ | | paper | 63.89(0.49) | 53.71(0.47) | | ours | 63.56(0.71) | 53.61(0.78) | Note: We only perform 5 trails in the experiment. ================================================ FILE: examples/pytorch/seal/logger.py ================================================ import logging import os import time def _transform_log_level(str_level): if str_level == "info": return logging.INFO elif str_level == "warning": return logging.WARNING elif str_level == "critical": return logging.CRITICAL elif str_level == "debug": return logging.DEBUG elif str_level == "error": return logging.ERROR else: raise KeyError("Log level error") class LightLogging(object): def __init__(self, log_path=None, log_name="lightlog", log_level="debug"): log_level = _transform_log_level(log_level) if log_path: if not log_path.endswith("/"): log_path += "/" if not os.path.exists(log_path): os.mkdir(log_path) if log_name.endswith("-") or log_name.endswith("_"): log_name = ( log_path + log_name + time.strftime( "%Y-%m-%d-%H:%M", time.localtime(time.time()) ) + ".log" ) else: log_name = ( log_path + log_name + "_" + time.strftime( "%Y-%m-%d-%H-%M", time.localtime(time.time()) ) + ".log" ) logging.basicConfig( level=log_level, format="%(asctime)s %(levelname)s: %(message)s", datefmt="%Y-%m-%d-%H:%M", handlers=[ logging.FileHandler(log_name, mode="w"), logging.StreamHandler(), ], ) logging.info("Start Logging") logging.info("Log file path: {}".format(log_name)) else: logging.basicConfig( level=log_level, format="%(asctime)s %(levelname)s: %(message)s", datefmt="%Y-%m-%d-%H:%M", handlers=[logging.StreamHandler()], ) logging.info("Start Logging") def debug(self, msg): logging.debug(msg) def info(self, msg): logging.info(msg) def critical(self, msg): logging.critical(msg) def warning(self, msg): logging.warning(msg) def error(self, msg): logging.error(msg) ================================================ FILE: examples/pytorch/seal/main.py ================================================ import time import numpy as np import torch import torch.multiprocessing from dgl import EID, NID from dgl.dataloading import GraphDataLoader from logger import LightLogging from model import DGCNN, GCN from sampler import SEALData from torch.nn import BCEWithLogitsLoss from tqdm import tqdm from utils import evaluate_hits, load_ogb_dataset, parse_arguments torch.multiprocessing.set_sharing_strategy("file_system") """ Part of the code are adapted from https://github.com/facebookresearch/SEAL_OGB """ def train( model, dataloader, loss_fn, optimizer, device, num_graphs=32, total_graphs=None, ): model.train() total_loss = 0 for g, labels in tqdm(dataloader, ncols=100): g = g.to(device) labels = labels.to(device) optimizer.zero_grad() logits = model(g, g.ndata["z"], g.ndata[NID], g.edata[EID]) loss = loss_fn(logits, labels) loss.backward() optimizer.step() total_loss += loss.item() * num_graphs return total_loss / total_graphs @torch.no_grad() def evaluate(model, dataloader, device): model.eval() y_pred, y_true = [], [] for g, labels in tqdm(dataloader, ncols=100): g = g.to(device) logits = model(g, g.ndata["z"], g.ndata[NID], g.edata[EID]) y_pred.append(logits.view(-1).cpu()) y_true.append(labels.view(-1).cpu().to(torch.float)) y_pred, y_true = torch.cat(y_pred), torch.cat(y_true) pos_pred = y_pred[y_true == 1] neg_pred = y_pred[y_true == 0] return pos_pred, neg_pred def main(args, print_fn=print): print_fn("Experiment arguments: {}".format(args)) if args.random_seed: torch.manual_seed(args.random_seed) else: torch.manual_seed(123) # Load dataset if args.dataset.startswith("ogbl"): graph, split_edge = load_ogb_dataset(args.dataset) else: raise NotImplementedError num_nodes = graph.num_nodes() # set gpu if args.gpu_id >= 0 and torch.cuda.is_available(): device = "cuda:{}".format(args.gpu_id) else: device = "cpu" if args.dataset == "ogbl-collab": # ogbl-collab dataset is multi-edge graph use_coalesce = True else: use_coalesce = False # Generate positive and negative edges and corresponding labels # Sampling subgraphs and generate node labeling features seal_data = SEALData( g=graph, split_edge=split_edge, hop=args.hop, neg_samples=args.neg_samples, subsample_ratio=args.subsample_ratio, use_coalesce=use_coalesce, prefix=args.dataset, save_dir=args.save_dir, num_workers=args.num_workers, print_fn=print_fn, ) node_attribute = seal_data.ndata["feat"] edge_weight = seal_data.edata["weight"].float() train_data = seal_data("train") val_data = seal_data("valid") test_data = seal_data("test") train_graphs = len(train_data.graph_list) # Set data loader train_loader = GraphDataLoader( train_data, batch_size=args.batch_size, num_workers=args.num_workers ) val_loader = GraphDataLoader( val_data, batch_size=args.batch_size, num_workers=args.num_workers ) test_loader = GraphDataLoader( test_data, batch_size=args.batch_size, num_workers=args.num_workers ) # set model if args.model == "gcn": model = GCN( num_layers=args.num_layers, hidden_units=args.hidden_units, gcn_type=args.gcn_type, pooling_type=args.pooling, node_attributes=node_attribute, edge_weights=edge_weight, node_embedding=None, use_embedding=True, num_nodes=num_nodes, dropout=args.dropout, ) elif args.model == "dgcnn": model = DGCNN( num_layers=args.num_layers, hidden_units=args.hidden_units, k=args.sort_k, gcn_type=args.gcn_type, node_attributes=node_attribute, edge_weights=edge_weight, node_embedding=None, use_embedding=True, num_nodes=num_nodes, dropout=args.dropout, ) else: raise ValueError("Model error") model = model.to(device) parameters = model.parameters() optimizer = torch.optim.Adam(parameters, lr=args.lr) loss_fn = BCEWithLogitsLoss() print_fn( "Total parameters: {}".format( sum([p.numel() for p in model.parameters()]) ) ) # train and evaluate loop summary_val = [] summary_test = [] for epoch in range(args.epochs): start_time = time.time() loss = train( model=model, dataloader=train_loader, loss_fn=loss_fn, optimizer=optimizer, device=device, num_graphs=args.batch_size, total_graphs=train_graphs, ) train_time = time.time() if epoch % args.eval_steps == 0: val_pos_pred, val_neg_pred = evaluate( model=model, dataloader=val_loader, device=device ) test_pos_pred, test_neg_pred = evaluate( model=model, dataloader=test_loader, device=device ) val_metric = evaluate_hits( args.dataset, val_pos_pred, val_neg_pred, args.hits_k ) test_metric = evaluate_hits( args.dataset, test_pos_pred, test_neg_pred, args.hits_k ) evaluate_time = time.time() print_fn( "Epoch-{}, train loss: {:.4f}, hits@{}: val-{:.4f}, test-{:.4f}, " "cost time: train-{:.1f}s, total-{:.1f}s".format( epoch, loss, args.hits_k, val_metric, test_metric, train_time - start_time, evaluate_time - start_time, ) ) summary_val.append(val_metric) summary_test.append(test_metric) summary_test = np.array(summary_test) print_fn("Experiment Results:") print_fn( "Best hits@{}: {:.4f}, epoch: {}".format( args.hits_k, np.max(summary_test), np.argmax(summary_test) ) ) if __name__ == "__main__": args = parse_arguments() logger = LightLogging(log_name="SEAL", log_path="./logs") main(args, logger.info) ================================================ FILE: examples/pytorch/seal/model.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from dgl.nn.pytorch import GraphConv, SAGEConv, SortPooling, SumPooling class GCN(nn.Module): """ GCN Model Attributes: num_layers(int): num of gcn layers hidden_units(int): num of hidden units gcn_type(str): type of gcn layer, 'gcn' for GraphConv and 'sage' for SAGEConv pooling_type(str): type of graph pooling to get subgraph representation 'sum' for sum pooling and 'center' for center pooling. node_attributes(Tensor, optional): node attribute edge_weights(Tensor, optional): edge weight node_embedding(Tensor, optional): pre-trained node embedding use_embedding(bool, optional): whether to use node embedding. Note that if 'use_embedding' is set True and 'node_embedding' is None, will automatically randomly initialize node embedding. num_nodes(int, optional): num of nodes dropout(float, optional): dropout rate max_z(int, optional): default max vocab size of node labeling, default 1000. """ def __init__( self, num_layers, hidden_units, gcn_type="gcn", pooling_type="sum", node_attributes=None, edge_weights=None, node_embedding=None, use_embedding=False, num_nodes=None, dropout=0.5, max_z=1000, ): super(GCN, self).__init__() self.num_layers = num_layers self.dropout = dropout self.pooling_type = pooling_type self.use_attribute = False if node_attributes is None else True self.use_embedding = use_embedding self.use_edge_weight = False if edge_weights is None else True self.z_embedding = nn.Embedding(max_z, hidden_units) if node_attributes is not None: self.node_attributes_lookup = nn.Embedding.from_pretrained( node_attributes ) self.node_attributes_lookup.weight.requires_grad = False if edge_weights is not None: self.edge_weights_lookup = nn.Embedding.from_pretrained( edge_weights ) self.edge_weights_lookup.weight.requires_grad = False if node_embedding is not None: self.node_embedding = nn.Embedding.from_pretrained(node_embedding) self.node_embedding.weight.requires_grad = False elif use_embedding: self.node_embedding = nn.Embedding(num_nodes, hidden_units) initial_dim = hidden_units if self.use_attribute: initial_dim += self.node_attributes_lookup.embedding_dim if self.use_embedding: initial_dim += self.node_embedding.embedding_dim self.layers = nn.ModuleList() if gcn_type == "gcn": self.layers.append( GraphConv(initial_dim, hidden_units, allow_zero_in_degree=True) ) for _ in range(num_layers - 1): self.layers.append( GraphConv( hidden_units, hidden_units, allow_zero_in_degree=True ) ) elif gcn_type == "sage": self.layers.append( SAGEConv(initial_dim, hidden_units, aggregator_type="gcn") ) for _ in range(num_layers - 1): self.layers.append( SAGEConv(hidden_units, hidden_units, aggregator_type="gcn") ) else: raise ValueError("Gcn type error.") self.linear_1 = nn.Linear(hidden_units, hidden_units) self.linear_2 = nn.Linear(hidden_units, 1) if pooling_type != "sum": raise ValueError("Pooling type error.") self.pooling = SumPooling() def reset_parameters(self): for layer in self.layers: layer.reset_parameters() def forward(self, g, z, node_id=None, edge_id=None): """ Args: g(DGLGraph): the graph z(Tensor): node labeling tensor, shape [N, 1] node_id(Tensor, optional): node id tensor, shape [N, 1] edge_id(Tensor, optional): edge id tensor, shape [E, 1] Returns: x(Tensor): output tensor """ z_emb = self.z_embedding(z) if self.use_attribute: x = self.node_attributes_lookup(node_id) x = torch.cat([z_emb, x], 1) else: x = z_emb if self.use_edge_weight: edge_weight = self.edge_weights_lookup(edge_id) else: edge_weight = None if self.use_embedding: n_emb = self.node_embedding(node_id) x = torch.cat([x, n_emb], 1) for layer in self.layers[:-1]: x = layer(g, x, edge_weight=edge_weight) x = F.relu(x) x = F.dropout(x, p=self.dropout, training=self.training) x = self.layers[-1](g, x, edge_weight=edge_weight) x = self.pooling(g, x) x = F.relu(self.linear_1(x)) F.dropout(x, p=self.dropout, training=self.training) x = self.linear_2(x) return x class DGCNN(nn.Module): """ An end-to-end deep learning architecture for graph classification. paper link: https://muhanzhang.github.io/papers/AAAI_2018_DGCNN.pdf Attributes: num_layers(int): num of gcn layers hidden_units(int): num of hidden units k(int, optional): The number of nodes to hold for each graph in SortPooling. gcn_type(str): type of gcn layer, 'gcn' for GraphConv and 'sage' for SAGEConv node_attributes(Tensor, optional): node attribute edge_weights(Tensor, optional): edge weight node_embedding(Tensor, optional): pre-trained node embedding use_embedding(bool, optional): whether to use node embedding. Note that if 'use_embedding' is set True and 'node_embedding' is None, will automatically randomly initialize node embedding. num_nodes(int, optional): num of nodes dropout(float, optional): dropout rate max_z(int, optional): default max vocab size of node labeling, default 1000. """ def __init__( self, num_layers, hidden_units, k=10, gcn_type="gcn", node_attributes=None, edge_weights=None, node_embedding=None, use_embedding=False, num_nodes=None, dropout=0.5, max_z=1000, ): super(DGCNN, self).__init__() self.num_layers = num_layers self.dropout = dropout self.use_attribute = False if node_attributes is None else True self.use_embedding = use_embedding self.use_edge_weight = False if edge_weights is None else True self.z_embedding = nn.Embedding(max_z, hidden_units) if node_attributes is not None: self.node_attributes_lookup = nn.Embedding.from_pretrained( node_attributes ) self.node_attributes_lookup.weight.requires_grad = False if edge_weights is not None: self.edge_weights_lookup = nn.Embedding.from_pretrained( edge_weights ) self.edge_weights_lookup.weight.requires_grad = False if node_embedding is not None: self.node_embedding = nn.Embedding.from_pretrained(node_embedding) self.node_embedding.weight.requires_grad = False elif use_embedding: self.node_embedding = nn.Embedding(num_nodes, hidden_units) initial_dim = hidden_units if self.use_attribute: initial_dim += self.node_attributes_lookup.embedding_dim if self.use_embedding: initial_dim += self.node_embedding.embedding_dim self.layers = nn.ModuleList() if gcn_type == "gcn": self.layers.append( GraphConv(initial_dim, hidden_units, allow_zero_in_degree=True) ) for _ in range(num_layers - 1): self.layers.append( GraphConv( hidden_units, hidden_units, allow_zero_in_degree=True ) ) self.layers.append( GraphConv(hidden_units, 1, allow_zero_in_degree=True) ) elif gcn_type == "sage": self.layers.append( SAGEConv(initial_dim, hidden_units, aggregator_type="gcn") ) for _ in range(num_layers - 1): self.layers.append( SAGEConv(hidden_units, hidden_units, aggregator_type="gcn") ) self.layers.append(SAGEConv(hidden_units, 1, aggregator_type="gcn")) else: raise ValueError("Gcn type error.") self.pooling = SortPooling(k=k) conv1d_channels = [16, 32] total_latent_dim = hidden_units * num_layers + 1 conv1d_kws = [total_latent_dim, 5] self.conv_1 = nn.Conv1d( 1, conv1d_channels[0], conv1d_kws[0], conv1d_kws[0] ) self.maxpool1d = nn.MaxPool1d(2, 2) self.conv_2 = nn.Conv1d( conv1d_channels[0], conv1d_channels[1], conv1d_kws[1], 1 ) dense_dim = int((k - 2) / 2 + 1) dense_dim = (dense_dim - conv1d_kws[1] + 1) * conv1d_channels[1] self.linear_1 = nn.Linear(dense_dim, 128) self.linear_2 = nn.Linear(128, 1) def forward(self, g, z, node_id=None, edge_id=None): """ Args: g(DGLGraph): the graph z(Tensor): node labeling tensor, shape [N, 1] node_id(Tensor, optional): node id tensor, shape [N, 1] edge_id(Tensor, optional): edge id tensor, shape [E, 1] Returns: x(Tensor): output tensor """ z_emb = self.z_embedding(z) if self.use_attribute: x = self.node_attributes_lookup(node_id) x = torch.cat([z_emb, x], 1) else: x = z_emb if self.use_edge_weight: edge_weight = self.edge_weights_lookup(edge_id) else: edge_weight = None if self.use_embedding: n_emb = self.node_embedding(node_id) x = torch.cat([x, n_emb], 1) xs = [x] for layer in self.layers: out = torch.tanh(layer(g, xs[-1], edge_weight=edge_weight)) xs += [out] x = torch.cat(xs[1:], dim=-1) # SortPooling x = self.pooling(g, x) x = x.unsqueeze(1) x = F.relu(self.conv_1(x)) x = self.maxpool1d(x) x = F.relu(self.conv_2(x)) x = x.view(x.size(0), -1) x = F.relu(self.linear_1(x)) F.dropout(x, p=self.dropout, training=self.training) x = self.linear_2(x) return x ================================================ FILE: examples/pytorch/seal/sampler.py ================================================ import os.path as osp from copy import deepcopy import dgl import torch from dgl import add_self_loop, DGLGraph, NID from dgl.dataloading.negative_sampler import Uniform from torch.utils.data import DataLoader, Dataset from tqdm import tqdm from utils import drnl_node_labeling class GraphDataSet(Dataset): """ GraphDataset for torch DataLoader """ def __init__(self, graph_list, tensor): self.graph_list = graph_list self.tensor = tensor def __len__(self): return len(self.graph_list) def __getitem__(self, index): return (self.graph_list[index], self.tensor[index]) class PosNegEdgesGenerator(object): """ Generate positive and negative samples Attributes: g(dgl.DGLGraph): graph split_edge(dict): split edge neg_samples(int): num of negative samples per positive sample subsample_ratio(float): ratio of subsample shuffle(bool): if shuffle generated graph list """ def __init__( self, g, split_edge, neg_samples=1, subsample_ratio=0.1, shuffle=True ): self.neg_sampler = Uniform(neg_samples) self.subsample_ratio = subsample_ratio self.split_edge = split_edge self.g = g self.shuffle = shuffle def __call__(self, split_type): if split_type == "train": subsample_ratio = self.subsample_ratio else: subsample_ratio = 1 pos_edges = self.split_edge[split_type]["edge"] if split_type == "train": # Adding self loop in train avoids sampling the source node itself. g = add_self_loop(self.g) eids = g.edge_ids(pos_edges[:, 0], pos_edges[:, 1]) neg_edges = torch.stack(self.neg_sampler(g, eids), dim=1) else: neg_edges = self.split_edge[split_type]["edge_neg"] pos_edges = self.subsample(pos_edges, subsample_ratio).long() neg_edges = self.subsample(neg_edges, subsample_ratio).long() edges = torch.cat([pos_edges, neg_edges]) labels = torch.cat( [ torch.ones(pos_edges.size(0), 1), torch.zeros(neg_edges.size(0), 1), ] ) if self.shuffle: perm = torch.randperm(edges.size(0)) edges = edges[perm] labels = labels[perm] return edges, labels def subsample(self, edges, subsample_ratio): """ Subsample generated edges. Args: edges(Tensor): edges to subsample subsample_ratio(float): ratio of subsample Returns: edges(Tensor): edges """ num_edges = edges.size(0) perm = torch.randperm(num_edges) perm = perm[: int(subsample_ratio * num_edges)] edges = edges[perm] return edges class EdgeDataSet(Dataset): """ Assistant Dataset for speeding up the SEALSampler """ def __init__(self, edges, labels, transform): self.edges = edges self.transform = transform self.labels = labels def __len__(self): return len(self.edges) def __getitem__(self, index): subgraph = self.transform(self.edges[index]) return (subgraph, self.labels[index]) class SEALSampler(object): """ Sampler for SEAL in paper(no-block version) The strategy is to sample all the k-hop neighbors around the two target nodes. Attributes: graph(DGLGraph): The graph hop(int): num of hop num_workers(int): num of workers """ def __init__(self, graph, hop=1, num_workers=32, print_fn=print): self.graph = graph self.hop = hop self.print_fn = print_fn self.num_workers = num_workers def sample_subgraph(self, target_nodes): """ Args: target_nodes(Tensor): Tensor of two target nodes Returns: subgraph(DGLGraph): subgraph """ sample_nodes = [target_nodes] frontiers = target_nodes for i in range(self.hop): frontiers = self.graph.out_edges(frontiers)[1] frontiers = torch.unique(frontiers) sample_nodes.append(frontiers) sample_nodes = torch.cat(sample_nodes) sample_nodes = torch.unique(sample_nodes) subgraph = dgl.node_subgraph(self.graph, sample_nodes) # Each node should have unique node id in the new subgraph u_id = int( torch.nonzero( subgraph.ndata[NID] == int(target_nodes[0]), as_tuple=False ) ) v_id = int( torch.nonzero( subgraph.ndata[NID] == int(target_nodes[1]), as_tuple=False ) ) # remove link between target nodes in positive subgraphs. if subgraph.has_edges_between(u_id, v_id): link_id = subgraph.edge_ids(u_id, v_id, return_uv=True)[2] subgraph.remove_edges(link_id) if subgraph.has_edges_between(v_id, u_id): link_id = subgraph.edge_ids(v_id, u_id, return_uv=True)[2] subgraph.remove_edges(link_id) z = drnl_node_labeling(subgraph, u_id, v_id) subgraph.ndata["z"] = z return subgraph def _collate(self, batch): batch_graphs, batch_labels = map(list, zip(*batch)) batch_graphs = dgl.batch(batch_graphs) batch_labels = torch.stack(batch_labels) return batch_graphs, batch_labels def __call__(self, edges, labels): subgraph_list = [] labels_list = [] edge_dataset = EdgeDataSet( edges, labels, transform=self.sample_subgraph ) self.print_fn( "Using {} workers in sampling job.".format(self.num_workers) ) sampler = DataLoader( edge_dataset, batch_size=32, num_workers=self.num_workers, shuffle=False, collate_fn=self._collate, ) for subgraph, label in tqdm(sampler, ncols=100): label_copy = deepcopy(label) subgraph = dgl.unbatch(subgraph) del label subgraph_list += subgraph labels_list.append(label_copy) return subgraph_list, torch.cat(labels_list) class SEALData(object): """ 1. Generate positive and negative samples 2. Subgraph sampling Attributes: g(dgl.DGLGraph): graph split_edge(dict): split edge hop(int): num of hop neg_samples(int): num of negative samples per positive sample subsample_ratio(float): ratio of subsample use_coalesce(bool): True for coalesce graph. Graph with multi-edge need to coalesce """ def __init__( self, g, split_edge, hop=1, neg_samples=1, subsample_ratio=1, prefix=None, save_dir=None, num_workers=32, shuffle=True, use_coalesce=True, print_fn=print, ): self.g = g self.hop = hop self.subsample_ratio = subsample_ratio self.prefix = prefix self.save_dir = save_dir self.print_fn = print_fn self.generator = PosNegEdgesGenerator( g=self.g, split_edge=split_edge, neg_samples=neg_samples, subsample_ratio=subsample_ratio, shuffle=shuffle, ) if use_coalesce: for k, v in g.edata.items(): g.edata[k] = v.float() # dgl.to_simple() requires data is float self.g = dgl.to_simple( g, copy_ndata=True, copy_edata=True, aggregator="sum" ) self.ndata = {k: v for k, v in self.g.ndata.items()} self.edata = {k: v for k, v in self.g.edata.items()} self.g.ndata.clear() self.g.edata.clear() self.print_fn("Save ndata and edata in class.") self.print_fn("Clear ndata and edata in graph.") self.sampler = SEALSampler( graph=self.g, hop=hop, num_workers=num_workers, print_fn=print_fn ) def __call__(self, split_type): if split_type == "train": subsample_ratio = self.subsample_ratio else: subsample_ratio = 1 path = osp.join( self.save_dir or "", "{}_{}_{}-hop_{}-subsample.bin".format( self.prefix, split_type, self.hop, subsample_ratio ), ) if osp.exists(path): self.print_fn("Load existing processed {} files".format(split_type)) graph_list, data = dgl.load_graphs(path) dataset = GraphDataSet(graph_list, data["labels"]) else: self.print_fn("Processed {} files not exist.".format(split_type)) edges, labels = self.generator(split_type) self.print_fn("Generate {} edges totally.".format(edges.size(0))) graph_list, labels = self.sampler(edges, labels) dataset = GraphDataSet(graph_list, labels) dgl.save_graphs(path, graph_list, {"labels": labels}) self.print_fn("Save preprocessed subgraph to {}".format(path)) return dataset ================================================ FILE: examples/pytorch/seal/utils.py ================================================ import argparse import dgl import numpy as np import pandas as pd import torch from ogb.linkproppred import DglLinkPropPredDataset, Evaluator from scipy.sparse.csgraph import shortest_path def parse_arguments(): """ Parse arguments """ parser = argparse.ArgumentParser(description="SEAL") parser.add_argument("--dataset", type=str, default="ogbl-collab") parser.add_argument("--gpu_id", type=int, default=0) parser.add_argument("--hop", type=int, default=1) parser.add_argument("--model", type=str, default="dgcnn") parser.add_argument("--gcn_type", type=str, default="gcn") parser.add_argument("--num_layers", type=int, default=3) parser.add_argument("--hidden_units", type=int, default=32) parser.add_argument("--sort_k", type=int, default=30) parser.add_argument("--pooling", type=str, default="sum") parser.add_argument("--dropout", type=str, default=0.5) parser.add_argument("--hits_k", type=int, default=50) parser.add_argument("--lr", type=float, default=0.0001) parser.add_argument("--neg_samples", type=int, default=1) parser.add_argument("--subsample_ratio", type=float, default=0.1) parser.add_argument("--epochs", type=int, default=60) parser.add_argument("--batch_size", type=int, default=32) parser.add_argument("--eval_steps", type=int, default=5) parser.add_argument("--num_workers", type=int, default=32) parser.add_argument("--random_seed", type=int, default=2021) parser.add_argument("--save_dir", type=str, default="./processed") args = parser.parse_args() return args def load_ogb_dataset(dataset): """ Load OGB dataset Args: dataset(str): name of dataset (ogbl-collab, ogbl-ddi, ogbl-citation) Returns: graph(DGLGraph): graph split_edge(dict): split edge """ dataset = DglLinkPropPredDataset(name=dataset) split_edge = dataset.get_edge_split() graph = dataset[0] return graph, split_edge def drnl_node_labeling(subgraph, src, dst): """ Double Radius Node labeling d = r(i,u)+r(i,v) label = 1+ min(r(i,u),r(i,v))+ (d//2)*(d//2+d%2-1) Isolated nodes in subgraph will be set as zero. Extreme large graph may cause memory error. Args: subgraph(DGLGraph): The graph src(int): node id of one of src node in new subgraph dst(int): node id of one of dst node in new subgraph Returns: z(Tensor): node labeling tensor """ adj = subgraph.adj_external().to_dense().numpy() src, dst = (dst, src) if src > dst else (src, dst) idx = list(range(src)) + list(range(src + 1, adj.shape[0])) adj_wo_src = adj[idx, :][:, idx] idx = list(range(dst)) + list(range(dst + 1, adj.shape[0])) adj_wo_dst = adj[idx, :][:, idx] dist2src = shortest_path( adj_wo_dst, directed=False, unweighted=True, indices=src ) dist2src = np.insert(dist2src, dst, 0, axis=0) dist2src = torch.from_numpy(dist2src) dist2dst = shortest_path( adj_wo_src, directed=False, unweighted=True, indices=dst - 1 ) dist2dst = np.insert(dist2dst, src, 0, axis=0) dist2dst = torch.from_numpy(dist2dst) dist = dist2src + dist2dst dist_over_2, dist_mod_2 = dist // 2, dist % 2 z = 1 + torch.min(dist2src, dist2dst) z += dist_over_2 * (dist_over_2 + dist_mod_2 - 1) z[src] = 1.0 z[dst] = 1.0 z[torch.isnan(z)] = 0.0 return z.to(torch.long) def evaluate_hits(name, pos_pred, neg_pred, K): """ Compute hits Args: name(str): name of dataset pos_pred(Tensor): predict value of positive edges neg_pred(Tensor): predict value of negative edges K(int): num of hits Returns: hits(float): score of hits """ evaluator = Evaluator(name) evaluator.K = K hits = evaluator.eval( { "y_pred_pos": pos_pred, "y_pred_neg": neg_pred, } )[f"hits@{K}"] return hits ================================================ FILE: examples/pytorch/sgc/README.md ================================================ Simple Graph Convolution (SGC) ============ - Paper link: [Simplifying Graph Convolutional Networks](https://arxiv.org/abs/1902.07153) - Author's code repo: [https://github.com/Tiiiger/SGC](https://github.com/Tiiiger/SGC). Dependencies ------------ - PyTorch 0.4.1+ - requests ``bash pip install torch requests `` Codes ----- The folder contains an implementation of SGC (`sgc.py`). `sgc_reddit.py` contains an example of training SGC on the reddit dataset. Results ------- Run with following (available dataset: "cora", "citeseer", "pubmed") ```bash python3 sgc.py --dataset cora --gpu 0 python3 sgc.py --dataset citeseer --weight-decay 5e-5 --n-epochs 150 --bias --gpu 0 python3 sgc.py --dataset pubmed --weight-decay 5e-5 --bias --gpu 0 ``` Run the following command to train on the reddit dataset. ```bash python sgc_reddit.py --gpu 0 ``` On NVIDIA V100 * cora: 0.819 (paper: 0.810), 0.0008s/epoch * citeseer: 0.725 (paper: 0.719), 0.0008s/epoch * pubmed: 0.788 (paper: 0.789), 0.0007s/epoch * reddit: 0.947 (paper: 0.949), 0.6872s in total ================================================ FILE: examples/pytorch/sgc/sgc.py ================================================ """ This code was modified from the GCN implementation in DGL examples. Simplifying Graph Convolutional Networks Paper: https://arxiv.org/abs/1902.07153 Code: https://github.com/Tiiiger/SGC SGC implementation in DGL. """ import argparse import math import time import dgl import dgl.function as fn import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from dgl.data import ( CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset, register_data_args, ) from dgl.nn.pytorch.conv import SGConv def evaluate(model, g, features, labels, mask): model.eval() with torch.no_grad(): logits = model(g, features)[mask] # only compute the evaluation set labels = labels[mask] _, indices = torch.max(logits, dim=1) correct = torch.sum(indices == labels) return correct.item() * 1.0 / len(labels) def main(args): # load and preprocess dataset if args.dataset == "cora": data = CoraGraphDataset() elif args.dataset == "citeseer": data = CiteseerGraphDataset() elif args.dataset == "pubmed": data = PubmedGraphDataset() else: raise ValueError("Unknown dataset: {}".format(args.dataset)) g = data[0] if args.gpu < 0: cuda = False else: cuda = True g = g.int().to(args.gpu) features = g.ndata["feat"] labels = g.ndata["label"] train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] in_feats = features.shape[1] n_classes = data.num_classes n_edges = g.num_edges() print( """----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % ( n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item(), ) ) n_edges = g.num_edges() # add self loop g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) # create SGC model model = SGConv(in_feats, n_classes, k=2, cached=True, bias=args.bias) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.weight_decay ) # initialize graph dur = [] for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(g, features) # only compute the train set loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model, g, features, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format( epoch, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000, ) ) print() acc = evaluate(model, g, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc)) if __name__ == "__main__": parser = argparse.ArgumentParser(description="SGC") register_data_args(parser) parser.add_argument("--gpu", type=int, default=-1, help="gpu") parser.add_argument("--lr", type=float, default=0.2, help="learning rate") parser.add_argument( "--bias", action="store_true", default=False, help="flag to use bias" ) parser.add_argument( "--n-epochs", type=int, default=100, help="number of training epochs" ) parser.add_argument( "--weight-decay", type=float, default=5e-6, help="Weight for L2 loss" ) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/pytorch/sgc/sgc_reddit.py ================================================ """ This code was modified from the GCN implementation in DGL examples. Simplifying Graph Convolutional Networks Paper: https://arxiv.org/abs/1902.07153 Code: https://github.com/Tiiiger/SGC SGC implementation in DGL. """ import argparse import math import time import dgl.function as fn import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from dgl import DGLGraph from dgl.data import load_data, register_data_args from dgl.nn.pytorch.conv import SGConv def normalize(h): return (h - h.mean(0)) / h.std(0) def evaluate(model, features, graph, labels, mask): model.eval() with torch.no_grad(): logits = model(graph, features)[mask] # only compute the evaluation set labels = labels[mask] _, indices = torch.max(logits, dim=1) correct = torch.sum(indices == labels) return correct.item() * 1.0 / len(labels) def main(args): # load and preprocess dataset args.dataset = "reddit-self-loop" data = load_data(args) g = data[0] if args.gpu < 0: cuda = False else: cuda = True g = g.int().to(args.gpu) features = g.ndata["feat"] labels = g.ndata["label"] train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] in_feats = features.shape[1] n_classes = data.num_classes n_edges = g.num_edges() print( """----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % ( n_edges, n_classes, g.ndata["train_mask"].int().sum().item(), g.ndata["val_mask"].int().sum().item(), g.ndata["test_mask"].int().sum().item(), ) ) # graph preprocess and calculate normalization factor n_edges = g.num_edges() # normalization degs = g.in_degrees().float() norm = torch.pow(degs, -0.5) norm[torch.isinf(norm)] = 0 g.ndata["norm"] = norm.unsqueeze(1) # create SGC model model = SGConv( in_feats, n_classes, k=2, cached=True, bias=True, norm=normalize ) if args.gpu >= 0: model = model.cuda() # use optimizer optimizer = torch.optim.LBFGS(model.parameters()) # define loss closure def closure(): optimizer.zero_grad() output = model(g, features)[train_mask] loss_train = F.cross_entropy(output, labels[train_mask]) loss_train.backward() return loss_train # initialize graph for epoch in range(args.n_epochs): model.train() optimizer.step(closure) acc = evaluate(model, features, g, labels, test_mask) print("Test Accuracy {:.4f}".format(acc)) if __name__ == "__main__": parser = argparse.ArgumentParser(description="SGC") register_data_args(parser) parser.add_argument("--gpu", type=int, default=-1, help="gpu") parser.add_argument( "--bias", action="store_true", default=False, help="flag to use bias" ) parser.add_argument( "--n-epochs", type=int, default=2, help="number of training epochs" ) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/pytorch/sign/README.md ================================================ SIGN: Scalable Inception Graph Neural Networks =============== - paper link: [https://arxiv.org/pdf/2004.11198.pdf](https://arxiv.org/pdf/2004.11198.pdf) Requirements ---------------- ```bash pip install requests ogb ``` Results --------------- ### [Ogbn-products](https://ogb.stanford.edu/docs/nodeprop/#ogbn-products) (Amazon co-purchase dataset) ```bash python sign.py --dataset amazon ``` Test accuracy: mean 0.78672, std 0.00059 ### Reddit ```bash python sign.py --dataset reddit ``` Test accuracy: mean 0.96326, std 0.00010 ================================================ FILE: examples/pytorch/sign/dataset.py ================================================ import dgl import numpy as np import torch def load_dataset(name): dataset = name.lower() if dataset == "amazon": from ogb.nodeproppred.dataset_dgl import DglNodePropPredDataset dataset = DglNodePropPredDataset(name="ogbn-products") splitted_idx = dataset.get_idx_split() train_nid = splitted_idx["train"] val_nid = splitted_idx["valid"] test_nid = splitted_idx["test"] g, labels = dataset[0] n_classes = int(labels.max() - labels.min() + 1) g.ndata["label"] = labels.squeeze() g.ndata["feat"] = g.ndata["feat"].float() elif dataset in ["reddit", "cora"]: if dataset == "reddit": from dgl.data import RedditDataset data = RedditDataset(self_loop=True) g = data[0] else: from dgl.data import CitationGraphDataset data = CitationGraphDataset("cora") g = data[0] n_classes = data.num_classes train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] train_nid = torch.LongTensor(train_mask.nonzero().squeeze()) val_nid = torch.LongTensor(val_mask.nonzero().squeeze()) test_nid = torch.LongTensor(test_mask.nonzero().squeeze()) else: print("Dataset {} is not supported".format(name)) assert 0 return g, n_classes, train_nid, val_nid, test_nid ================================================ FILE: examples/pytorch/sign/sign.py ================================================ import argparse import os import time import dgl import dgl.function as fn import torch import torch.nn as nn import torch.nn.functional as F from dataset import load_dataset class FeedForwardNet(nn.Module): def __init__(self, in_feats, hidden, out_feats, n_layers, dropout): super(FeedForwardNet, self).__init__() self.layers = nn.ModuleList() self.n_layers = n_layers if n_layers == 1: self.layers.append(nn.Linear(in_feats, out_feats)) else: self.layers.append(nn.Linear(in_feats, hidden)) for i in range(n_layers - 2): self.layers.append(nn.Linear(hidden, hidden)) self.layers.append(nn.Linear(hidden, out_feats)) if self.n_layers > 1: self.prelu = nn.PReLU() self.dropout = nn.Dropout(dropout) self.reset_parameters() def reset_parameters(self): gain = nn.init.calculate_gain("relu") for layer in self.layers: nn.init.xavier_uniform_(layer.weight, gain=gain) nn.init.zeros_(layer.bias) def forward(self, x): for layer_id, layer in enumerate(self.layers): x = layer(x) if layer_id < self.n_layers - 1: x = self.dropout(self.prelu(x)) return x class Model(nn.Module): def __init__(self, in_feats, hidden, out_feats, R, n_layers, dropout): super(Model, self).__init__() self.dropout = nn.Dropout(dropout) self.prelu = nn.PReLU() self.inception_ffs = nn.ModuleList() for hop in range(R + 1): self.inception_ffs.append( FeedForwardNet(in_feats, hidden, hidden, n_layers, dropout) ) # self.linear = nn.Linear(hidden * (R + 1), out_feats) self.project = FeedForwardNet( (R + 1) * hidden, hidden, out_feats, n_layers, dropout ) def forward(self, feats): hidden = [] for feat, ff in zip(feats, self.inception_ffs): hidden.append(ff(feat)) out = self.project(self.dropout(self.prelu(torch.cat(hidden, dim=-1)))) return out def calc_weight(g): """ Compute row_normalized(D^(-1/2)AD^(-1/2)) """ with g.local_scope(): # compute D^(-0.5)*D(-1/2), assuming A is Identity g.ndata["in_deg"] = g.in_degrees().float().pow(-0.5) g.ndata["out_deg"] = g.out_degrees().float().pow(-0.5) g.apply_edges(fn.u_mul_v("out_deg", "in_deg", "weight")) # row-normalize weight g.update_all(fn.copy_e("weight", "msg"), fn.sum("msg", "norm")) g.apply_edges(fn.e_div_v("weight", "norm", "weight")) return g.edata["weight"] def preprocess(g, features, args): """ Pre-compute the average of n-th hop neighbors """ with torch.no_grad(): g.edata["weight"] = calc_weight(g) g.ndata["feat_0"] = features for hop in range(1, args.R + 1): g.update_all( fn.u_mul_e(f"feat_{hop-1}", "weight", "msg"), fn.sum("msg", f"feat_{hop}"), ) res = [] for hop in range(args.R + 1): res.append(g.ndata.pop(f"feat_{hop}")) return res def prepare_data(device, args): data = load_dataset(args.dataset) g, n_classes, train_nid, val_nid, test_nid = data g = g.to(device) in_feats = g.ndata["feat"].shape[1] feats = preprocess(g, g.ndata["feat"], args) labels = g.ndata["label"] # move to device train_nid = train_nid.to(device) val_nid = val_nid.to(device) test_nid = test_nid.to(device) train_feats = [x[train_nid] for x in feats] train_labels = labels[train_nid] return ( feats, labels, train_feats, train_labels, in_feats, n_classes, train_nid, val_nid, test_nid, ) def evaluate(epoch, args, model, feats, labels, train, val, test): with torch.no_grad(): batch_size = args.eval_batch_size if batch_size <= 0: pred = model(feats) else: pred = [] num_nodes = labels.shape[0] n_batch = (num_nodes + batch_size - 1) // batch_size for i in range(n_batch): batch_start = i * batch_size batch_end = min((i + 1) * batch_size, num_nodes) batch_feats = [feat[batch_start:batch_end] for feat in feats] pred.append(model(batch_feats)) pred = torch.cat(pred) pred = torch.argmax(pred, dim=1) correct = (pred == labels).float() train_acc = correct[train].sum() / len(train) val_acc = correct[val].sum() / len(val) test_acc = correct[test].sum() / len(test) return train_acc, val_acc, test_acc def main(args): if args.gpu < 0: device = "cpu" else: device = "cuda:{}".format(args.gpu) data = prepare_data(device, args) ( feats, labels, train_feats, train_labels, in_size, num_classes, train_nid, val_nid, test_nid, ) = data model = Model( in_size, args.num_hidden, num_classes, args.R, args.ff_layer, args.dropout, ) model = model.to(device) loss_fcn = nn.CrossEntropyLoss() optimizer = torch.optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.weight_decay ) best_epoch = 0 best_val = 0 best_test = 0 for epoch in range(1, args.num_epochs + 1): start = time.time() model.train() loss = loss_fcn(model(train_feats), train_labels) optimizer.zero_grad() loss.backward() optimizer.step() if epoch % args.eval_every == 0: model.eval() acc = evaluate( epoch, args, model, feats, labels, train_nid, val_nid, test_nid ) end = time.time() log = "Epoch {}, Times(s): {:.4f}".format(epoch, end - start) log += ", Accuracy: Train {:.4f}, Val {:.4f}, Test {:.4f}".format( *acc ) print(log) if acc[1] > best_val: best_val = acc[1] best_epoch = epoch best_test = acc[2] print( "Best Epoch {}, Val {:.4f}, Test {:.4f}".format( best_epoch, best_val, best_test ) ) if __name__ == "__main__": parser = argparse.ArgumentParser(description="SIGN") parser.add_argument("--num-epochs", type=int, default=1000) parser.add_argument("--num-hidden", type=int, default=256) parser.add_argument("--R", type=int, default=3, help="number of hops") parser.add_argument("--lr", type=float, default=0.003) parser.add_argument("--dataset", type=str, default="amazon") parser.add_argument("--dropout", type=float, default=0.5) parser.add_argument("--gpu", type=int, default=0) parser.add_argument("--weight-decay", type=float, default=0) parser.add_argument("--eval-every", type=int, default=50) parser.add_argument( "--eval-batch-size", type=int, default=250000, help="evaluation batch size, -1 for full batch", ) parser.add_argument( "--ff-layer", type=int, default=2, help="number of feed-forward layers" ) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/pytorch/stgcn_wave/README.md ================================================ Spatio-Temporal Graph Convolutional Networks ============ - Paper link: [arXiv](https://arxiv.org/pdf/1709.04875v4.pdf) - Author's code repo: https://github.com/VeritasYin/STGCN_IJCAI-18. - See [this blog](https://towardsdatascience.com/build-your-first-graph-neural-network-model-to-predict-traffic-speed-in-20-minutes-b593f8f838e5) for more details about running the code. - Dependencies - PyTorch 1.1.0+ - scikit-learn - dgl - tables How to run ---------- please get METR_LA dataset from [this Google drive](https://drive.google.com/open?id=10FOTa6HXPqX8Pf5WRoRwcFnW9BrNZEIX). and [this Github repo](https://github.com/chnsh/DCRNN_PyTorch) An experiment in default settings can be run with ```bash python main.py ``` An experiment on the METR_LA dataset in customized settings can be run with ```bash python main.py --lr --seed --disable-cuda --batch_size --epochs ``` If one wishes to adjust the model structure, you can change the arguments `control_str` and `channels` ```bash python main.py --control_str --channels ... ``` `` is a string of the following characters representing a sequence of neural network modules: * `T`: representing a dilated temporal convolution layer, working on the temporal dimension. The dilation factor is always twice as much as the previous temporal convolution layer. * `S`: representing a graph convolution layer, working on the spatial dimension. The input channels and output channels are the same. * `N`: a Layer Normalization. The argument list following `--channels` represents the output channels on each temporal convolution layer. The list should have `N + 1` elements, where `N` is the number of `T`'s in ``. The activation function between two layers are always ReLU. For example, the following command ```bash python main.py --control_str TNTSTNTST --channels 1 16 32 32 64 128 ``` specifies the following architecture: ``` +------------------------------------------------------------+ | Input | +------------------------------------------------------------+ | 1D Conv, in_channel = 1, out_channel = 16, dilation = 1 | +------------------------------------------------------------+ | Layer Normalization | +------------------------------------------------------------+ | 1D Conv, in_channel = 16, out_channel = 32, dilation = 2 | +------------------------------------------------------------+ | Graph Conv, in_channel = 32, out_channel = 32 | +------------------------------------------------------------+ | 1D Conv, in_channel = 32, out_channel = 32, dilation = 4 | +------------------------------------------------------------+ | Layer Normalization | +------------------------------------------------------------+ | 1D Conv, in_channel = 32, out_channel = 64, dilation = 8 | +------------------------------------------------------------+ | Graph Conv, in_channel = 64, out_channel = 64 | +------------------------------------------------------------+ | 1D Conv, in_channel = 64, out_channel = 128, dilation = 16 | +------------------------------------------------------------+ ``` Results ------- ```bash python main.py ``` METR_LA MAE: ~5.76 ================================================ FILE: examples/pytorch/stgcn_wave/load_data.py ================================================ import numpy as np import pandas as pd import torch def load_data(file_path, len_train, len_val): df = pd.read_csv(file_path, header=None).values.astype(float) train = df[:len_train] val = df[len_train : len_train + len_val] test = df[len_train + len_val :] return train, val, test def data_transform(data, n_his, n_pred, device): # produce data slices for training and testing n_route = data.shape[1] l = len(data) num = l - n_his - n_pred x = np.zeros([num, 1, n_his, n_route]) y = np.zeros([num, n_route]) cnt = 0 for i in range(l - n_his - n_pred): head = i tail = i + n_his x[cnt, :, :, :] = data[head:tail].reshape(1, n_his, n_route) y[cnt] = data[tail + n_pred - 1] cnt += 1 return torch.Tensor(x).to(device), torch.Tensor(y).to(device) ================================================ FILE: examples/pytorch/stgcn_wave/main.py ================================================ import argparse import random import numpy as np import pandas as pd import scipy.sparse as sp import torch import torch.nn as nn from load_data import * from model import * from sensors2graph import * from sklearn.preprocessing import StandardScaler from utils import * import dgl parser = argparse.ArgumentParser(description="STGCN_WAVE") parser.add_argument("--lr", default=0.001, type=float, help="learning rate") parser.add_argument("--disablecuda", action="store_true", help="Disable CUDA") parser.add_argument( "--batch_size", type=int, default=50, help="batch size for training and validation (default: 50)", ) parser.add_argument( "--epochs", type=int, default=50, help="epochs for training (default: 50)" ) parser.add_argument( "--num_layers", type=int, default=9, help="number of layers" ) parser.add_argument("--window", type=int, default=144, help="window length") parser.add_argument( "--sensorsfilepath", type=str, default="./data/sensor_graph/graph_sensor_ids.txt", help="sensors file path", ) parser.add_argument( "--disfilepath", type=str, default="./data/sensor_graph/distances_la_2012.csv", help="distance file path", ) parser.add_argument( "--tsfilepath", type=str, default="./data/metr-la.h5", help="ts file path" ) parser.add_argument( "--savemodelpath", type=str, default="stgcnwavemodel.pt", help="save model path", ) parser.add_argument( "--pred_len", type=int, default=5, help="how many steps away we want to predict", ) parser.add_argument( "--control_str", type=str, default="TNTSTNTST", help="model strcture controller, T: Temporal Layer, S: Spatio Layer, N: Norm Layer", ) parser.add_argument( "--channels", type=int, nargs="+", default=[1, 16, 32, 64, 32, 128], help="model strcture controller, T: Temporal Layer, S: Spatio Layer, N: Norm Layer", ) args = parser.parse_args() device = ( torch.device("cuda") if torch.cuda.is_available() and not args.disablecuda else torch.device("cpu") ) with open(args.sensorsfilepath) as f: sensor_ids = f.read().strip().split(",") distance_df = pd.read_csv(args.disfilepath, dtype={"from": "str", "to": "str"}) adj_mx = get_adjacency_matrix(distance_df, sensor_ids) sp_mx = sp.coo_matrix(adj_mx) G = dgl.from_scipy(sp_mx) df = pd.read_hdf(args.tsfilepath) num_samples, num_nodes = df.shape tsdata = df.to_numpy() n_his = args.window save_path = args.savemodelpath n_pred = args.pred_len n_route = num_nodes blocks = args.channels # blocks = [1, 16, 32, 64, 32, 128] drop_prob = 0 num_layers = args.num_layers batch_size = args.batch_size epochs = args.epochs lr = args.lr W = adj_mx len_val = round(num_samples * 0.1) len_train = round(num_samples * 0.7) train = df[:len_train] val = df[len_train : len_train + len_val] test = df[len_train + len_val :] scaler = StandardScaler() train = scaler.fit_transform(train) val = scaler.transform(val) test = scaler.transform(test) x_train, y_train = data_transform(train, n_his, n_pred, device) x_val, y_val = data_transform(val, n_his, n_pred, device) x_test, y_test = data_transform(test, n_his, n_pred, device) train_data = torch.utils.data.TensorDataset(x_train, y_train) train_iter = torch.utils.data.DataLoader(train_data, batch_size, shuffle=True) val_data = torch.utils.data.TensorDataset(x_val, y_val) val_iter = torch.utils.data.DataLoader(val_data, batch_size) test_data = torch.utils.data.TensorDataset(x_test, y_test) test_iter = torch.utils.data.DataLoader(test_data, batch_size) loss = nn.MSELoss() G = G.to(device) model = STGCN_WAVE( blocks, n_his, n_route, G, drop_prob, num_layers, device, args.control_str ).to(device) optimizer = torch.optim.RMSprop(model.parameters(), lr=lr) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.7) min_val_loss = np.inf for epoch in range(1, epochs + 1): l_sum, n = 0.0, 0 model.train() for x, y in train_iter: y_pred = model(x).view(len(x), -1) l = loss(y_pred, y) optimizer.zero_grad() l.backward() optimizer.step() l_sum += l.item() * y.shape[0] n += y.shape[0] scheduler.step() val_loss = evaluate_model(model, loss, val_iter) if val_loss < min_val_loss: min_val_loss = val_loss torch.save(model.state_dict(), save_path) print( "epoch", epoch, ", train loss:", l_sum / n, ", validation loss:", val_loss, ) best_model = STGCN_WAVE( blocks, n_his, n_route, G, drop_prob, num_layers, device, args.control_str ).to(device) best_model.load_state_dict(torch.load(save_path, weights_only=False)) l = evaluate_model(best_model, loss, test_iter) MAE, MAPE, RMSE = evaluate_metric(best_model, test_iter, scaler) print("test loss:", l, "\nMAE:", MAE, ", MAPE:", MAPE, ", RMSE:", RMSE) ================================================ FILE: examples/pytorch/stgcn_wave/model.py ================================================ import math import torch import torch.nn as nn import torch.nn.functional as F import torch.nn.init as init from dgl.nn.pytorch import GraphConv from dgl.nn.pytorch.conv import ChebConv class TemporalConvLayer(nn.Module): """Temporal convolution layer. arguments --------- c_in : int The number of input channels (features) c_out : int The number of output channels (features) dia : int The dilation size """ def __init__(self, c_in, c_out, dia=1): super(TemporalConvLayer, self).__init__() self.c_out = c_out self.c_in = c_in self.conv = nn.Conv2d( c_in, c_out, (2, 1), 1, dilation=dia, padding=(0, 0) ) def forward(self, x): return torch.relu(self.conv(x)) class SpatioConvLayer(nn.Module): def __init__(self, c, Lk): # c : hidden dimension Lk: graph matrix super(SpatioConvLayer, self).__init__() self.g = Lk self.gc = GraphConv(c, c, activation=F.relu) # self.gc = ChebConv(c, c, 3) def init(self): stdv = 1.0 / math.sqrt(self.W.weight.size(1)) self.W.weight.data.uniform_(-stdv, stdv) def forward(self, x): x = x.transpose(0, 3) x = x.transpose(1, 3) output = self.gc(self.g, x) output = output.transpose(1, 3) output = output.transpose(0, 3) return torch.relu(output) class FullyConvLayer(nn.Module): def __init__(self, c): super(FullyConvLayer, self).__init__() self.conv = nn.Conv2d(c, 1, 1) def forward(self, x): return self.conv(x) class OutputLayer(nn.Module): def __init__(self, c, T, n): super(OutputLayer, self).__init__() self.tconv1 = nn.Conv2d(c, c, (T, 1), 1, dilation=1, padding=(0, 0)) self.ln = nn.LayerNorm([n, c]) self.tconv2 = nn.Conv2d(c, c, (1, 1), 1, dilation=1, padding=(0, 0)) self.fc = FullyConvLayer(c) def forward(self, x): x_t1 = self.tconv1(x) x_ln = self.ln(x_t1.permute(0, 2, 3, 1)).permute(0, 3, 1, 2) x_t2 = self.tconv2(x_ln) return self.fc(x_t2) class STGCN_WAVE(nn.Module): def __init__( self, c, T, n, Lk, p, num_layers, device, control_str="TNTSTNTST" ): super(STGCN_WAVE, self).__init__() self.control_str = control_str # model structure controller self.num_layers = len(control_str) self.layers = nn.ModuleList([]) cnt = 0 diapower = 0 for i in range(self.num_layers): i_layer = control_str[i] if i_layer == "T": # Temporal Layer self.layers.append( TemporalConvLayer(c[cnt], c[cnt + 1], dia=2**diapower) ) diapower += 1 cnt += 1 if i_layer == "S": # Spatio Layer self.layers.append(SpatioConvLayer(c[cnt], Lk)) if i_layer == "N": # Norm Layer self.layers.append(nn.LayerNorm([n, c[cnt]])) self.output = OutputLayer(c[cnt], T + 1 - 2 ** (diapower), n) for layer in self.layers: layer = layer.to(device) def forward(self, x): for i in range(self.num_layers): i_layer = self.control_str[i] if i_layer == "N": x = self.layers[i](x.permute(0, 2, 3, 1)).permute(0, 3, 1, 2) else: x = self.layers[i](x) return self.output(x) ================================================ FILE: examples/pytorch/stgcn_wave/sensors2graph.py ================================================ import numpy as np def get_adjacency_matrix(distance_df, sensor_ids, normalized_k=0.1): """ :param distance_df: data frame with three columns: [from, to, distance]. :param sensor_ids: list of sensor ids. :param normalized_k: entries that become lower than normalized_k after normalization are set to zero for sparsity. :return: adjacency matrix """ num_sensors = len(sensor_ids) dist_mx = np.zeros((num_sensors, num_sensors), dtype=np.float32) dist_mx[:] = np.inf # Builds sensor id to index map. sensor_id_to_ind = {} for i, sensor_id in enumerate(sensor_ids): sensor_id_to_ind[sensor_id] = i # Fills cells in the matrix with distances. for row in distance_df.values: if row[0] not in sensor_id_to_ind or row[1] not in sensor_id_to_ind: continue dist_mx[sensor_id_to_ind[row[0]], sensor_id_to_ind[row[1]]] = row[2] # Calculates the standard deviation as theta. distances = dist_mx[~np.isinf(dist_mx)].flatten() std = distances.std() adj_mx = np.exp(-np.square(dist_mx / std)) # Make the adjacent matrix symmetric by taking the max. # adj_mx = np.maximum.reduce([adj_mx, adj_mx.T]) # Sets entries that lower than a threshold, i.e., k, to zero for sparsity. adj_mx[adj_mx < normalized_k] = 0 return adj_mx ================================================ FILE: examples/pytorch/stgcn_wave/utils.py ================================================ import numpy as np import torch def evaluate_model(model, loss, data_iter): model.eval() l_sum, n = 0.0, 0 with torch.no_grad(): for x, y in data_iter: y_pred = model(x).view(len(x), -1) l = loss(y_pred, y) l_sum += l.item() * y.shape[0] n += y.shape[0] return l_sum / n def evaluate_metric(model, data_iter, scaler): model.eval() with torch.no_grad(): mae, mape, mse = [], [], [] for x, y in data_iter: y = scaler.inverse_transform(y.cpu().numpy()).reshape(-1) y_pred = scaler.inverse_transform( model(x).view(len(x), -1).cpu().numpy() ).reshape(-1) d = np.abs(y - y_pred) mae += d.tolist() mape += (d / y).tolist() mse += (d**2).tolist() MAE = np.array(mae).mean() MAPE = np.array(mape).mean() RMSE = np.sqrt(np.array(mse).mean()) return MAE, MAPE, RMSE ================================================ FILE: examples/pytorch/tagcn/README.md ================================================ Topology Adaptive Graph Convolutional networks (TAGCN) ============ - Paper link: [https://arxiv.org/abs/1710.10370](https://arxiv.org/abs/1710.10370) Dependencies ------------ - PyTorch 0.4.1+ - requests ``bash pip install torch requests `` Results ------- Run with following (available dataset: "cora", "citeseer", "pubmed") ```bash python3 train.py --dataset cora --gpu 0 --self-loop ``` * cora: ~0.812 (0.804-0.823) (paper: 0.833) * citeseer: ~0.715 (paper: 0.714) * pubmed: ~0.794 (paper: 0.811) ================================================ FILE: examples/pytorch/tagcn/tagcn.py ================================================ """TAGCN using DGL nn package References: - Topology Adaptive Graph Convolutional Networks - Paper: https://arxiv.org/abs/1710.10370 """ import torch import torch.nn as nn from dgl.nn.pytorch.conv import TAGConv class TAGCN(nn.Module): def __init__( self, g, in_feats, n_hidden, n_classes, n_layers, activation, dropout ): super(TAGCN, self).__init__() self.g = g self.layers = nn.ModuleList() # input layer self.layers.append(TAGConv(in_feats, n_hidden, activation=activation)) # hidden layers for i in range(n_layers - 1): self.layers.append( TAGConv(n_hidden, n_hidden, activation=activation) ) # output layer self.layers.append(TAGConv(n_hidden, n_classes)) # activation=None self.dropout = nn.Dropout(p=dropout) def forward(self, features): h = features for i, layer in enumerate(self.layers): if i != 0: h = self.dropout(h) h = layer(self.g, h) return h ================================================ FILE: examples/pytorch/tagcn/train.py ================================================ import argparse import time import networkx as nx import numpy as np import torch import torch.nn as nn import torch.nn.functional as F from dgl import DGLGraph from dgl.data import load_data, register_data_args from tagcn import TAGCN def evaluate(model, features, labels, mask): model.eval() with torch.no_grad(): logits = model(features) logits = logits[mask] labels = labels[mask] _, indices = torch.max(logits, dim=1) correct = torch.sum(indices == labels) return correct.item() * 1.0 / len(labels) def main(args): # load and preprocess dataset data = load_data(args) g = data[0] if args.gpu < 0: cuda = False else: cuda = True g = g.to(args.gpu) features = g.ndata["feat"] labels = g.ndata["label"] train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] in_feats = features.shape[1] n_classes = data.num_classes n_edges = g.num_edges() print( """----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % ( n_edges, n_classes, train_mask.int().sum().item(), val_mask.int().sum().item(), test_mask.int().sum().item(), ) ) # graph preprocess and calculate normalization factor # add self loop if args.self_loop: g = g.remove_self_loop().add_self_loop() n_edges = g.num_edges() # create TAGCN model model = TAGCN( g, in_feats, args.n_hidden, n_classes, args.n_layers, F.relu, args.dropout, ) if cuda: model.cuda() loss_fcn = torch.nn.CrossEntropyLoss() # use optimizer optimizer = torch.optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.weight_decay ) # initialize graph dur = [] for epoch in range(args.n_epochs): model.train() if epoch >= 3: t0 = time.time() # forward logits = model(features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model, features, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format( epoch, np.mean(dur), loss.item(), acc, n_edges / np.mean(dur) / 1000, ) ) print() acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc)) if __name__ == "__main__": parser = argparse.ArgumentParser(description="TAGCN") register_data_args(parser) parser.add_argument( "--dropout", type=float, default=0.5, help="dropout probability" ) parser.add_argument("--gpu", type=int, default=-1, help="gpu") parser.add_argument("--lr", type=float, default=1e-2, help="learning rate") parser.add_argument( "--n-epochs", type=int, default=200, help="number of training epochs" ) parser.add_argument( "--n-hidden", type=int, default=16, help="number of hidden tagcn units" ) parser.add_argument( "--n-layers", type=int, default=1, help="number of hidden tagcn layers" ) parser.add_argument( "--weight-decay", type=float, default=5e-4, help="Weight for L2 loss" ) parser.add_argument( "--self-loop", action="store_true", help="graph self-loop (default=False)", ) parser.set_defaults(self_loop=False) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/pytorch/tgn/README.md ================================================ Temporal Graph Neural Network (TGN) === The example was temporarily removed due to the change in the `DataLoader` interface in DGL 1.0. Please refer to the v0.9 example [here](https://github.com/dmlc/dgl/tree/0.9.x/examples/pytorch/tgn). ================================================ FILE: examples/pytorch/tree_lstm/README.md ================================================ # Tree-LSTM This is a re-implementation of the following paper: > [**Improved Semantic Representations From Tree-Structured Long Short-Term Memory Networks**](http://arxiv.org/abs/1503.00075) > *Kai Sheng Tai, Richard Socher, and Christopher Manning*. The provided implementation can achieve a test accuracy of 51.72 which is comparable with the result reported in the original paper: 51.0(±0.5). ## Data The script will download the [SST dataset] (http://nlp.stanford.edu/sentiment/index.html) automatically, and you need to download the GloVe word vectors yourself. For the command line, you can use this. ``` wget http://nlp.stanford.edu/data/glove.840B.300d.zip unzip glove.840B.300d.zip ``` ## Dependencies * PyTorch 0.4.1+ * requests * nltk ``` pip install torch requests nltk ``` ## Usage ``` python3 train.py --gpu 0 ``` ## Speed On AWS p3.2x instance, it can achieve 3.18s per epoch when setting batch size to 256. ================================================ FILE: examples/pytorch/tree_lstm/train.py ================================================ import argparse import collections import time import dgl import numpy as np import torch as th import torch.nn.functional as F import torch.nn.init as INIT import torch.optim as optim from dgl.data.tree import SSTDataset from torch.utils.data import DataLoader from tree_lstm import TreeLSTM SSTBatch = collections.namedtuple( "SSTBatch", ["graph", "mask", "wordid", "label"] ) def batcher(device): def batcher_dev(batch): batch_trees = dgl.batch(batch) return SSTBatch( graph=batch_trees, mask=batch_trees.ndata["mask"].to(device), wordid=batch_trees.ndata["x"].to(device), label=batch_trees.ndata["y"].to(device), ) return batcher_dev def main(args): np.random.seed(args.seed) th.manual_seed(args.seed) th.cuda.manual_seed(args.seed) best_epoch = -1 best_dev_acc = 0 cuda = args.gpu >= 0 device = th.device("cuda:{}".format(args.gpu)) if cuda else th.device("cpu") if cuda: th.cuda.set_device(args.gpu) trainset = SSTDataset() train_loader = DataLoader( dataset=trainset, batch_size=args.batch_size, collate_fn=batcher(device), shuffle=True, num_workers=0, ) devset = SSTDataset(mode="dev") dev_loader = DataLoader( dataset=devset, batch_size=100, collate_fn=batcher(device), shuffle=False, num_workers=0, ) testset = SSTDataset(mode="test") test_loader = DataLoader( dataset=testset, batch_size=100, collate_fn=batcher(device), shuffle=False, num_workers=0, ) model = TreeLSTM( trainset.vocab_size, args.x_size, args.h_size, trainset.num_classes, args.dropout, cell_type="childsum" if args.child_sum else "nary", pretrained_emb=trainset.pretrained_emb, ).to(device) print(model) params_ex_emb = [ x for x in list(model.parameters()) if x.requires_grad and x.size(0) != trainset.vocab_size ] params_emb = list(model.embedding.parameters()) for p in params_ex_emb: if p.dim() > 1: INIT.xavier_uniform_(p) optimizer = optim.Adagrad( [ { "params": params_ex_emb, "lr": args.lr, "weight_decay": args.weight_decay, }, {"params": params_emb, "lr": 0.1 * args.lr}, ] ) dur = [] for epoch in range(args.epochs): t_epoch = time.time() model.train() for step, batch in enumerate(train_loader): g = batch.graph.to(device) n = g.num_nodes() h = th.zeros((n, args.h_size)).to(device) c = th.zeros((n, args.h_size)).to(device) if step >= 3: t0 = time.time() # tik logits = model(batch, g, h, c) logp = F.log_softmax(logits, 1) loss = F.nll_loss(logp, batch.label, reduction="sum") optimizer.zero_grad() loss.backward() optimizer.step() if step >= 3: dur.append(time.time() - t0) # tok if step > 0 and step % args.log_every == 0: pred = th.argmax(logits, 1) acc = th.sum(th.eq(batch.label, pred)) root_ids = [ i for i in range(g.num_nodes()) if g.out_degrees(i) == 0 ] root_acc = np.sum( batch.label.cpu().data.numpy()[root_ids] == pred.cpu().data.numpy()[root_ids] ) print( "Epoch {:05d} | Step {:05d} | Loss {:.4f} | Acc {:.4f} | Root Acc {:.4f} | Time(s) {:.4f}".format( epoch, step, loss.item(), 1.0 * acc.item() / len(batch.label), 1.0 * root_acc / len(root_ids), np.mean(dur), ) ) print( "Epoch {:05d} training time {:.4f}s".format( epoch, time.time() - t_epoch ) ) # eval on dev set accs = [] root_accs = [] model.eval() for step, batch in enumerate(dev_loader): g = batch.graph.to(device) n = g.num_nodes() with th.no_grad(): h = th.zeros((n, args.h_size)).to(device) c = th.zeros((n, args.h_size)).to(device) logits = model(batch, g, h, c) pred = th.argmax(logits, 1) acc = th.sum(th.eq(batch.label, pred)).item() accs.append([acc, len(batch.label)]) root_ids = [ i for i in range(g.num_nodes()) if g.out_degrees(i) == 0 ] root_acc = np.sum( batch.label.cpu().data.numpy()[root_ids] == pred.cpu().data.numpy()[root_ids] ) root_accs.append([root_acc, len(root_ids)]) dev_acc = ( 1.0 * np.sum([x[0] for x in accs]) / np.sum([x[1] for x in accs]) ) dev_root_acc = ( 1.0 * np.sum([x[0] for x in root_accs]) / np.sum([x[1] for x in root_accs]) ) print( "Epoch {:05d} | Dev Acc {:.4f} | Root Acc {:.4f}".format( epoch, dev_acc, dev_root_acc ) ) if dev_root_acc > best_dev_acc: best_dev_acc = dev_root_acc best_epoch = epoch th.save(model.state_dict(), "best_{}.pkl".format(args.seed)) else: if best_epoch <= epoch - 10: break # lr decay for param_group in optimizer.param_groups: param_group["lr"] = max(1e-5, param_group["lr"] * 0.99) # 10 print(param_group["lr"]) # test model.load_state_dict(th.load("best_{}.pkl".format(args.seed))) accs = [] root_accs = [] model.eval() for step, batch in enumerate(test_loader): g = batch.graph.to(device) n = g.num_nodes() with th.no_grad(): h = th.zeros((n, args.h_size)).to(device) c = th.zeros((n, args.h_size)).to(device) logits = model(batch, g, h, c) pred = th.argmax(logits, 1) acc = th.sum(th.eq(batch.label, pred)).item() accs.append([acc, len(batch.label)]) root_ids = [i for i in range(g.num_nodes()) if g.out_degrees(i) == 0] root_acc = np.sum( batch.label.cpu().data.numpy()[root_ids] == pred.cpu().data.numpy()[root_ids] ) root_accs.append([root_acc, len(root_ids)]) test_acc = 1.0 * np.sum([x[0] for x in accs]) / np.sum([x[1] for x in accs]) test_root_acc = ( 1.0 * np.sum([x[0] for x in root_accs]) / np.sum([x[1] for x in root_accs]) ) print( "------------------------------------------------------------------------------------" ) print( "Epoch {:05d} | Test Acc {:.4f} | Root Acc {:.4f}".format( best_epoch, test_acc, test_root_acc ) ) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--gpu", type=int, default=-1) parser.add_argument("--seed", type=int, default=41) parser.add_argument("--batch-size", type=int, default=20) parser.add_argument("--child-sum", action="store_true") parser.add_argument("--x-size", type=int, default=300) parser.add_argument("--h-size", type=int, default=150) parser.add_argument("--epochs", type=int, default=100) parser.add_argument("--log-every", type=int, default=5) parser.add_argument("--lr", type=float, default=0.05) parser.add_argument("--weight-decay", type=float, default=1e-4) parser.add_argument("--dropout", type=float, default=0.5) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/pytorch/tree_lstm/tree_lstm.py ================================================ """ Improved Semantic Representations From Tree-Structured Long Short-Term Memory Networks https://arxiv.org/abs/1503.00075 """ import itertools import time import dgl import networkx as nx import numpy as np import torch as th import torch.nn as nn import torch.nn.functional as F class TreeLSTMCell(nn.Module): def __init__(self, x_size, h_size): super(TreeLSTMCell, self).__init__() self.W_iou = nn.Linear(x_size, 3 * h_size, bias=False) self.U_iou = nn.Linear(2 * h_size, 3 * h_size, bias=False) self.b_iou = nn.Parameter(th.zeros(1, 3 * h_size)) self.U_f = nn.Linear(2 * h_size, 2 * h_size) def message_func(self, edges): return {"h": edges.src["h"], "c": edges.src["c"]} def reduce_func(self, nodes): h_cat = nodes.mailbox["h"].view(nodes.mailbox["h"].size(0), -1) f = th.sigmoid(self.U_f(h_cat)).view(*nodes.mailbox["h"].size()) c = th.sum(f * nodes.mailbox["c"], 1) return {"iou": self.U_iou(h_cat), "c": c} def apply_node_func(self, nodes): iou = nodes.data["iou"] + self.b_iou i, o, u = th.chunk(iou, 3, 1) i, o, u = th.sigmoid(i), th.sigmoid(o), th.tanh(u) c = i * u + nodes.data["c"] h = o * th.tanh(c) return {"h": h, "c": c} class ChildSumTreeLSTMCell(nn.Module): def __init__(self, x_size, h_size): super(ChildSumTreeLSTMCell, self).__init__() self.W_iou = nn.Linear(x_size, 3 * h_size, bias=False) self.U_iou = nn.Linear(h_size, 3 * h_size, bias=False) self.b_iou = nn.Parameter(th.zeros(1, 3 * h_size)) self.U_f = nn.Linear(h_size, h_size) def message_func(self, edges): return {"h": edges.src["h"], "c": edges.src["c"]} def reduce_func(self, nodes): h_tild = th.sum(nodes.mailbox["h"], 1) f = th.sigmoid(self.U_f(nodes.mailbox["h"])) c = th.sum(f * nodes.mailbox["c"], 1) return {"iou": self.U_iou(h_tild), "c": c} def apply_node_func(self, nodes): iou = nodes.data["iou"] + self.b_iou i, o, u = th.chunk(iou, 3, 1) i, o, u = th.sigmoid(i), th.sigmoid(o), th.tanh(u) c = i * u + nodes.data["c"] h = o * th.tanh(c) return {"h": h, "c": c} class TreeLSTM(nn.Module): def __init__( self, num_vocabs, x_size, h_size, num_classes, dropout, cell_type="nary", pretrained_emb=None, ): super(TreeLSTM, self).__init__() self.x_size = x_size self.embedding = nn.Embedding(num_vocabs, x_size) if pretrained_emb is not None: print("Using glove") self.embedding.weight.data.copy_(pretrained_emb) self.embedding.weight.requires_grad = True self.dropout = nn.Dropout(dropout) self.linear = nn.Linear(h_size, num_classes) cell = TreeLSTMCell if cell_type == "nary" else ChildSumTreeLSTMCell self.cell = cell(x_size, h_size) def forward(self, batch, g, h, c): """Compute tree-lstm prediction given a batch. Parameters ---------- batch : dgl.data.SSTBatch The data batch. g : dgl.DGLGraph Tree for computation. h : Tensor Initial hidden state. c : Tensor Initial cell state. Returns ------- logits : Tensor The prediction of each node. """ # feed embedding embeds = self.embedding(batch.wordid * batch.mask) g.ndata["iou"] = self.cell.W_iou( self.dropout(embeds) ) * batch.mask.float().unsqueeze(-1) g.ndata["h"] = h g.ndata["c"] = c # propagate dgl.prop_nodes_topo( g, self.cell.message_func, self.cell.reduce_func, apply_node_func=self.cell.apply_node_func, ) # compute logits h = self.dropout(g.ndata.pop("h")) logits = self.linear(h) return logits ================================================ FILE: examples/pytorch/vgae/README.md ================================================ # Variational Graph Auto-Encoders - Paper link:https://arxiv.org/abs/1611.07308 - Author's code repo:https://github.com/tkipf/gae ## Requirements - Pytorch - Python 3.x - DGL 0.6 - scikit-learn ## Run the demo Run with following (available dataset: "cora", "citeseer", "pubmed") ``` python train.py ``` ## Dataset In this example, I use two kinds of data source. One from DGL's bulit-in dataset (CoraGraphDataset, CiteseerGraphDataset and PubmedGraphDataset), another from website https://github.com/kimiyoung/planetoid. You can specify a dataset as follows: ``` python train.py --datasrc dgl --dataset cora // from DGL python train.py --datasrc website --dataset cora // from website ``` **Note**: If you want to train by dataset from website, you should download folder https://github.com/kimiyoung/planetoid/tree/master/data. Then put it under project folder. ## Results Use *area under the ROC curve* (AUC) and *average precision* (AP) scores for each model on the test set. Numbers show mean results and standard error for 10 runs with random initializations on fixed dataset splits. ### Dataset from DGL | Dataset | AUC | AP | | -------- | -------------- | ------------- | | Cora | 91.8$\pm$ 0.01 | 92.5$\pm$0.01 | | Citeseer | 89.2$\pm$0.02 | 90.8$\pm$0.01 | | Pubmed | 94.5$\pm$0.01 | 94.6$\pm$0.01 | ### Dataset from website | Dataset | AUC | AP | | -------- | -------------- | -------------- | | Cora | 90.9$\pm$ 0.01 | 92.1$\pm$0.01 | | Citeseer | 90.3$\pm$0.01 | 91.8$\pm$0.01 | | Pubmed | 94.4$\pm$ 0.01 | 94.6$\pm$ 0.01 | ### Reported results in paper | Dataset | AUC | AP | | -------- | -------------- | ------------- | | Cora | 91.4$\pm$ 0.01 | 92.6$\pm$0.01 | | Citeseer | 90.8$\pm$0.02 | 92.0$\pm$0.02 | | Pubmed | 94.4$\pm$0.02 | 94.7$\pm$0.02 | ================================================ FILE: examples/pytorch/vgae/input_data.py ================================================ """ ****************NOTE***************** CREDITS : Thomas Kipf since datasets are the same as those in kipf's implementation, Their preprocessing source was used as-is. ************************************* """ import pickle as pkl import sys import networkx as nx import numpy as np import scipy.sparse as sp def parse_index_file(filename): index = [] for line in open(filename): index.append(int(line.strip())) return index def load_data(dataset): # load the data: x, tx, allx, graph names = ["x", "tx", "allx", "graph"] objects = [] for i in range(len(names)): with open("data/ind.{}.{}".format(dataset, names[i]), "rb") as f: if sys.version_info > (3, 0): objects.append(pkl.load(f, encoding="latin1")) else: objects.append(pkl.load(f)) x, tx, allx, graph = tuple(objects) test_idx_reorder = parse_index_file( "data/ind.{}.test.index".format(dataset) ) test_idx_range = np.sort(test_idx_reorder) if dataset == "citeseer": # Fix citeseer dataset (there are some isolated nodes in the graph) # Find isolated nodes, add them as zero-vecs into the right position test_idx_range_full = range( min(test_idx_reorder), max(test_idx_reorder) + 1 ) tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1])) tx_extended[test_idx_range - min(test_idx_range), :] = tx tx = tx_extended features = sp.vstack((allx, tx)).tolil() features[test_idx_reorder, :] = features[test_idx_range, :] adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph)) return adj, features ================================================ FILE: examples/pytorch/vgae/model.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F from dgl.nn.pytorch import GraphConv from train import device class VGAEModel(nn.Module): def __init__(self, in_dim, hidden1_dim, hidden2_dim): super(VGAEModel, self).__init__() self.in_dim = in_dim self.hidden1_dim = hidden1_dim self.hidden2_dim = hidden2_dim layers = [ GraphConv( self.in_dim, self.hidden1_dim, activation=F.relu, allow_zero_in_degree=True, ), GraphConv( self.hidden1_dim, self.hidden2_dim, activation=lambda x: x, allow_zero_in_degree=True, ), GraphConv( self.hidden1_dim, self.hidden2_dim, activation=lambda x: x, allow_zero_in_degree=True, ), ] self.layers = nn.ModuleList(layers) def encoder(self, g, features): h = self.layers[0](g, features) self.mean = self.layers[1](g, h) self.log_std = self.layers[2](g, h) gaussian_noise = torch.randn(features.size(0), self.hidden2_dim).to( device ) sampled_z = self.mean + gaussian_noise * torch.exp(self.log_std).to( device ) return sampled_z def decoder(self, z): adj_rec = torch.sigmoid(torch.matmul(z, z.t())) return adj_rec def forward(self, g, features): z = self.encoder(g, features) adj_rec = self.decoder(z) return adj_rec ================================================ FILE: examples/pytorch/vgae/preprocess.py ================================================ import numpy as np import scipy.sparse as sp import torch def mask_test_edges(adj): # Function to build test set with 10% positive links # NOTE: Splits are randomized and results might slightly deviate from reported numbers in the paper. # TODO: Clean up. # Remove diagonal elements adj = adj - sp.dia_matrix( (adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape ) adj.eliminate_zeros() # Check that diag is zero: assert np.diag(adj.todense()).sum() == 0 adj_triu = sp.triu(adj) adj_tuple = sparse_to_tuple(adj_triu) edges = adj_tuple[0] edges_all = sparse_to_tuple(adj)[0] num_test = int(np.floor(edges.shape[0] / 10.0)) num_val = int(np.floor(edges.shape[0] / 20.0)) all_edge_idx = list(range(edges.shape[0])) np.random.shuffle(all_edge_idx) val_edge_idx = all_edge_idx[:num_val] test_edge_idx = all_edge_idx[num_val : (num_val + num_test)] test_edges = edges[test_edge_idx] val_edges = edges[val_edge_idx] train_edges = np.delete( edges, np.hstack([test_edge_idx, val_edge_idx]), axis=0 ) def ismember(a, b, tol=5): rows_close = np.all(np.round(a - b[:, None], tol) == 0, axis=-1) return np.any(rows_close) test_edges_false = [] while len(test_edges_false) < len(test_edges): idx_i = np.random.randint(0, adj.shape[0]) idx_j = np.random.randint(0, adj.shape[0]) if idx_i == idx_j: continue if ismember([idx_i, idx_j], edges_all): continue if test_edges_false: if ismember([idx_j, idx_i], np.array(test_edges_false)): continue if ismember([idx_i, idx_j], np.array(test_edges_false)): continue test_edges_false.append([idx_i, idx_j]) val_edges_false = [] while len(val_edges_false) < len(val_edges): idx_i = np.random.randint(0, adj.shape[0]) idx_j = np.random.randint(0, adj.shape[0]) if idx_i == idx_j: continue if ismember([idx_i, idx_j], train_edges): continue if ismember([idx_j, idx_i], train_edges): continue if ismember([idx_i, idx_j], val_edges): continue if ismember([idx_j, idx_i], val_edges): continue if val_edges_false: if ismember([idx_j, idx_i], np.array(val_edges_false)): continue if ismember([idx_i, idx_j], np.array(val_edges_false)): continue val_edges_false.append([idx_i, idx_j]) assert ~ismember(test_edges_false, edges_all) assert ~ismember(val_edges_false, edges_all) assert ~ismember(val_edges, train_edges) assert ~ismember(test_edges, train_edges) assert ~ismember(val_edges, test_edges) data = np.ones(train_edges.shape[0]) # Re-build adj matrix adj_train = sp.csr_matrix( (data, (train_edges[:, 0], train_edges[:, 1])), shape=adj.shape ) adj_train = adj_train + adj_train.T # NOTE: these edge lists only contain single direction of edge! return ( adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false, ) def mask_test_edges_dgl(graph, adj): src, dst = graph.edges() edges_all = torch.stack([src, dst], dim=0) edges_all = edges_all.t().cpu().numpy() num_test = int(np.floor(edges_all.shape[0] / 10.0)) num_val = int(np.floor(edges_all.shape[0] / 20.0)) all_edge_idx = list(range(edges_all.shape[0])) np.random.shuffle(all_edge_idx) val_edge_idx = all_edge_idx[:num_val] test_edge_idx = all_edge_idx[num_val : (num_val + num_test)] train_edge_idx = all_edge_idx[(num_val + num_test) :] test_edges = edges_all[test_edge_idx] val_edges = edges_all[val_edge_idx] train_edges = np.delete( edges_all, np.hstack([test_edge_idx, val_edge_idx]), axis=0 ) def ismember(a, b, tol=5): rows_close = np.all(np.round(a - b[:, None], tol) == 0, axis=-1) return np.any(rows_close) test_edges_false = [] while len(test_edges_false) < len(test_edges): idx_i = np.random.randint(0, adj.shape[0]) idx_j = np.random.randint(0, adj.shape[0]) if idx_i == idx_j: continue if ismember([idx_i, idx_j], edges_all): continue if test_edges_false: if ismember([idx_j, idx_i], np.array(test_edges_false)): continue if ismember([idx_i, idx_j], np.array(test_edges_false)): continue test_edges_false.append([idx_i, idx_j]) val_edges_false = [] while len(val_edges_false) < len(val_edges): idx_i = np.random.randint(0, adj.shape[0]) idx_j = np.random.randint(0, adj.shape[0]) if idx_i == idx_j: continue if ismember([idx_i, idx_j], train_edges): continue if ismember([idx_j, idx_i], train_edges): continue if ismember([idx_i, idx_j], val_edges): continue if ismember([idx_j, idx_i], val_edges): continue if val_edges_false: if ismember([idx_j, idx_i], np.array(val_edges_false)): continue if ismember([idx_i, idx_j], np.array(val_edges_false)): continue val_edges_false.append([idx_i, idx_j]) assert ~ismember(test_edges_false, edges_all) assert ~ismember(val_edges_false, edges_all) assert ~ismember(val_edges, train_edges) assert ~ismember(test_edges, train_edges) assert ~ismember(val_edges, test_edges) # NOTE: these edge lists only contain single direction of edge! return ( train_edge_idx, val_edges, val_edges_false, test_edges, test_edges_false, ) def sparse_to_tuple(sparse_mx): if not sp.isspmatrix_coo(sparse_mx): sparse_mx = sparse_mx.tocoo() coords = np.vstack((sparse_mx.row, sparse_mx.col)).transpose() values = sparse_mx.data shape = sparse_mx.shape return coords, values, shape def preprocess_graph(adj): adj = sp.coo_matrix(adj) adj_ = adj + sp.eye(adj.shape[0]) rowsum = np.array(adj_.sum(1)) degree_mat_inv_sqrt = sp.diags(np.power(rowsum, -0.5).flatten()) adj_normalized = ( adj_.dot(degree_mat_inv_sqrt) .transpose() .dot(degree_mat_inv_sqrt) .tocoo() ) return adj_normalized, sparse_to_tuple(adj_normalized) ================================================ FILE: examples/pytorch/vgae/train.py ================================================ import argparse import os import time import dgl import model import numpy as np import scipy.sparse as sp import torch import torch.nn.functional as F from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset from input_data import load_data from preprocess import ( mask_test_edges, mask_test_edges_dgl, preprocess_graph, sparse_to_tuple, ) from sklearn.metrics import average_precision_score, roc_auc_score os.environ["KMP_DUPLICATE_LIB_OK"] = "True" parser = argparse.ArgumentParser(description="Variant Graph Auto Encoder") parser.add_argument( "--learning_rate", type=float, default=0.01, help="Initial learning rate." ) parser.add_argument( "--epochs", "-e", type=int, default=200, help="Number of epochs to train." ) parser.add_argument( "--hidden1", "-h1", type=int, default=32, help="Number of units in hidden layer 1.", ) parser.add_argument( "--hidden2", "-h2", type=int, default=16, help="Number of units in hidden layer 2.", ) parser.add_argument( "--datasrc", "-s", type=str, default="dgl", help="Dataset download from dgl Dataset or website.", ) parser.add_argument( "--dataset", "-d", type=str, default="cora", help="Dataset string." ) parser.add_argument("--gpu_id", type=int, default=0, help="GPU id to use.") args = parser.parse_args() # check device device = torch.device( "cuda:{}".format(args.gpu_id) if torch.cuda.is_available() else "cpu" ) # device = "cpu" # roc_means = [] # ap_means = [] def compute_loss_para(adj): pos_weight = (adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = ( adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2) ) weight_mask = adj.view(-1) == 1 weight_tensor = torch.ones(weight_mask.size(0)).to(device) weight_tensor[weight_mask] = pos_weight return weight_tensor, norm def get_acc(adj_rec, adj_label): labels_all = adj_label.view(-1).long() preds_all = (adj_rec > 0.5).view(-1).long() accuracy = (preds_all == labels_all).sum().float() / labels_all.size(0) return accuracy def get_scores(edges_pos, edges_neg, adj_rec): def sigmoid(x): return 1 / (1 + np.exp(-x)) adj_rec = adj_rec.cpu() # Predict on test set of edges preds = [] for e in edges_pos: preds.append(sigmoid(adj_rec[e[0], e[1]].item())) preds_neg = [] for e in edges_neg: preds_neg.append(sigmoid(adj_rec[e[0], e[1]].data)) preds_all = np.hstack([preds, preds_neg]) labels_all = np.hstack([np.ones(len(preds)), np.zeros(len(preds_neg))]) roc_score = roc_auc_score(labels_all, preds_all) ap_score = average_precision_score(labels_all, preds_all) return roc_score, ap_score def dgl_main(): # Load from DGL dataset if args.dataset == "cora": dataset = CoraGraphDataset(reverse_edge=False) elif args.dataset == "citeseer": dataset = CiteseerGraphDataset(reverse_edge=False) elif args.dataset == "pubmed": dataset = PubmedGraphDataset(reverse_edge=False) else: raise NotImplementedError graph = dataset[0] # Extract node features feats = graph.ndata.pop("feat").to(device) in_dim = feats.shape[-1] # generate input adj_orig = graph.adj_external().to_dense() # build test set with 10% positive links ( train_edge_idx, val_edges, val_edges_false, test_edges, test_edges_false, ) = mask_test_edges_dgl(graph, adj_orig) graph = graph.to(device) # create train graph train_edge_idx = torch.tensor(train_edge_idx).to(device) train_graph = dgl.edge_subgraph(graph, train_edge_idx, relabel_nodes=False) train_graph = train_graph.to(device) adj = train_graph.adj_external().to_dense().to(device) # compute loss parameters weight_tensor, norm = compute_loss_para(adj) # create model vgae_model = model.VGAEModel(in_dim, args.hidden1, args.hidden2) vgae_model = vgae_model.to(device) # create training component optimizer = torch.optim.Adam(vgae_model.parameters(), lr=args.learning_rate) print( "Total Parameters:", sum([p.nelement() for p in vgae_model.parameters()]), ) # create training epoch for epoch in range(args.epochs): t = time.time() # Training and validation using a full graph vgae_model.train() logits = vgae_model.forward(graph, feats) # compute loss loss = norm * F.binary_cross_entropy( logits.view(-1), adj.view(-1), weight=weight_tensor ) kl_divergence = ( 0.5 / logits.size(0) * ( 1 + 2 * vgae_model.log_std - vgae_model.mean**2 - torch.exp(vgae_model.log_std) ** 2 ) .sum(1) .mean() ) loss -= kl_divergence # backward optimizer.zero_grad() loss.backward() optimizer.step() train_acc = get_acc(logits, adj) val_roc, val_ap = get_scores(val_edges, val_edges_false, logits) # Print out performance print( "Epoch:", "%04d" % (epoch + 1), "train_loss=", "{:.5f}".format(loss.item()), "train_acc=", "{:.5f}".format(train_acc), "val_roc=", "{:.5f}".format(val_roc), "val_ap=", "{:.5f}".format(val_ap), "time=", "{:.5f}".format(time.time() - t), ) test_roc, test_ap = get_scores(test_edges, test_edges_false, logits) # roc_means.append(test_roc) # ap_means.append(test_ap) print( "End of training!", "test_roc=", "{:.5f}".format(test_roc), "test_ap=", "{:.5f}".format(test_ap), ) def web_main(): adj, features = load_data(args.dataset) features = sparse_to_tuple(features.tocoo()) # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape ) adj_orig.eliminate_zeros() ( adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false, ) = mask_test_edges(adj) adj = adj_train # # Create model # graph = dgl.from_scipy(adj) # graph.add_self_loop() # Some preprocessing adj_normalization, adj_norm = preprocess_graph(adj) # Create model graph = dgl.from_scipy(adj_normalization) graph.add_self_loop() # Create Model pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = ( adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2) ) adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) adj_norm = torch.sparse.FloatTensor( torch.LongTensor(adj_norm[0].T), torch.FloatTensor(adj_norm[1]), torch.Size(adj_norm[2]), ) adj_label = torch.sparse.FloatTensor( torch.LongTensor(adj_label[0].T), torch.FloatTensor(adj_label[1]), torch.Size(adj_label[2]), ) features = torch.sparse.FloatTensor( torch.LongTensor(features[0].T), torch.FloatTensor(features[1]), torch.Size(features[2]), ) weight_mask = adj_label.to_dense().view(-1) == 1 weight_tensor = torch.ones(weight_mask.size(0)) weight_tensor[weight_mask] = pos_weight features = features.to_dense() in_dim = features.shape[-1] vgae_model = model.VGAEModel(in_dim, args.hidden1, args.hidden2) # create training component optimizer = torch.optim.Adam(vgae_model.parameters(), lr=args.learning_rate) print( "Total Parameters:", sum([p.nelement() for p in vgae_model.parameters()]), ) def get_scores(edges_pos, edges_neg, adj_rec): def sigmoid(x): return 1 / (1 + np.exp(-x)) # Predict on test set of edges preds = [] pos = [] for e in edges_pos: # print(e) # print(adj_rec[e[0], e[1]]) preds.append(sigmoid(adj_rec[e[0], e[1]].item())) pos.append(adj_orig[e[0], e[1]]) preds_neg = [] neg = [] for e in edges_neg: preds_neg.append(sigmoid(adj_rec[e[0], e[1]].data)) neg.append(adj_orig[e[0], e[1]]) preds_all = np.hstack([preds, preds_neg]) labels_all = np.hstack([np.ones(len(preds)), np.zeros(len(preds_neg))]) roc_score = roc_auc_score(labels_all, preds_all) ap_score = average_precision_score(labels_all, preds_all) return roc_score, ap_score def get_acc(adj_rec, adj_label): labels_all = adj_label.to_dense().view(-1).long() preds_all = (adj_rec > 0.5).view(-1).long() accuracy = (preds_all == labels_all).sum().float() / labels_all.size(0) return accuracy # create training epoch for epoch in range(args.epochs): t = time.time() # Training and validation using a full graph vgae_model.train() logits = vgae_model.forward(graph, features) # compute loss loss = norm * F.binary_cross_entropy( logits.view(-1), adj_label.to_dense().view(-1), weight=weight_tensor ) kl_divergence = ( 0.5 / logits.size(0) * ( 1 + 2 * vgae_model.log_std - vgae_model.mean**2 - torch.exp(vgae_model.log_std) ** 2 ) .sum(1) .mean() ) loss -= kl_divergence # backward optimizer.zero_grad() loss.backward() optimizer.step() train_acc = get_acc(logits, adj_label) val_roc, val_ap = get_scores(val_edges, val_edges_false, logits) # Print out performance print( "Epoch:", "%04d" % (epoch + 1), "train_loss=", "{:.5f}".format(loss.item()), "train_acc=", "{:.5f}".format(train_acc), "val_roc=", "{:.5f}".format(val_roc), "val_ap=", "{:.5f}".format(val_ap), "time=", "{:.5f}".format(time.time() - t), ) test_roc, test_ap = get_scores(test_edges, test_edges_false, logits) print( "End of training!", "test_roc=", "{:.5f}".format(test_roc), "test_ap=", "{:.5f}".format(test_ap), ) # roc_means.append(test_roc) # ap_means.append(test_ap) # if __name__ == '__main__': # for i in range(10): # web_main() # # roc_mean = np.mean(roc_means) # roc_std = np.std(roc_means, ddof=1) # ap_mean = np.mean(ap_means) # ap_std = np.std(ap_means, ddof=1) # print("roc_mean=", "{:.5f}".format(roc_mean), "roc_std=", "{:.5f}".format(roc_std), "ap_mean=", # "{:.5f}".format(ap_mean), "ap_std=", "{:.5f}".format(ap_std)) if __name__ == "__main__": if args.datasrc == "dgl": dgl_main() elif args.datasrc == "website": web_main() ================================================ FILE: examples/pytorch/vrgcn/README.md ================================================ VRGCN (control variate sampling) ================================ Paper: https://arxiv.org/abs/1710.10568 Run with ```bash python3 train_cv.py --num-epochs 30 python3 train_cv_multi_gpu.py --num-epochs 30 --gpu 0,1,2,3 # multi-GPU ``` ================================================ FILE: examples/pytorch/vrgcn/train_cv.py ================================================ import argparse import time import dgl import dgl.function as fn import dgl.nn.pytorch as dglnn import numpy as np import torch as th import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import tqdm from dgl.data import RedditDataset from torch.utils.data import DataLoader class SAGEConvWithCV(nn.Module): def __init__(self, in_feats, out_feats, activation): super().__init__() self.W = nn.Linear(in_feats * 2, out_feats) self.activation = activation self.reset_parameters() def reset_parameters(self): gain = nn.init.calculate_gain("relu") nn.init.xavier_uniform_(self.W.weight, gain=gain) nn.init.constant_(self.W.bias, 0) def forward(self, block, H, HBar=None): if self.training: with block.local_scope(): H_src, H_dst = H HBar_src, agg_HBar_dst = HBar block.dstdata["agg_hbar"] = agg_HBar_dst block.srcdata["hdelta"] = H_src - HBar_src block.update_all( fn.copy_u("hdelta", "m"), fn.mean("m", "hdelta_new") ) h_neigh = ( block.dstdata["agg_hbar"] + block.dstdata["hdelta_new"] ) h = self.W(th.cat([H_dst, h_neigh], 1)) if self.activation is not None: h = self.activation(h) return h else: with block.local_scope(): H_src, H_dst = H block.srcdata["h"] = H_src block.update_all(fn.copy_u("h", "m"), fn.mean("m", "h_new")) h_neigh = block.dstdata["h_new"] h = self.W(th.cat([H_dst, h_neigh], 1)) if self.activation is not None: h = self.activation(h) return h class SAGE(nn.Module): def __init__(self, in_feats, n_hidden, n_classes, n_layers, activation): super().__init__() self.n_layers = n_layers self.n_hidden = n_hidden self.n_classes = n_classes self.layers = nn.ModuleList() self.layers.append(SAGEConvWithCV(in_feats, n_hidden, activation)) for i in range(1, n_layers - 1): self.layers.append(SAGEConvWithCV(n_hidden, n_hidden, activation)) self.layers.append(SAGEConvWithCV(n_hidden, n_classes, None)) def forward(self, blocks): h = blocks[0].srcdata["features"] updates = [] for layer, block in zip(self.layers, blocks): # We need to first copy the representation of nodes on the RHS from the # appropriate nodes on the LHS. # Note that the shape of h is (num_nodes_LHS, D) and the shape of h_dst # would be (num_nodes_RHS, D) h_dst = h[: block.number_of_dst_nodes()] hbar_src = block.srcdata["hist"] agg_hbar_dst = block.dstdata["agg_hist"] # Then we compute the updated representation on the RHS. # The shape of h now becomes (num_nodes_RHS, D) h = layer(block, (h, h_dst), (hbar_src, agg_hbar_dst)) block.dstdata["h_new"] = h return h def inference(self, g, x, batch_size, device): """ Inference with the GraphSAGE model on full neighbors (i.e. without neighbor sampling). g : the entire graph. x : the input of entire node set. The inference code is written in a fashion that it could handle any number of nodes and layers. """ # During inference with sampling, multi-layer blocks are very inefficient because # lots of computations in the first few layers are repeated. # Therefore, we compute the representation of all nodes layer by layer. The nodes # on each layer are of course splitted in batches. # TODO: can we standardize this? nodes = th.arange(g.num_nodes()) ys = [] for l, layer in enumerate(self.layers): y = th.zeros( g.num_nodes(), self.n_hidden if l != len(self.layers) - 1 else self.n_classes, ) for start in tqdm.trange(0, len(nodes), batch_size): end = start + batch_size batch_nodes = nodes[start:end] block = dgl.to_block( dgl.in_subgraph(g, batch_nodes), batch_nodes ) block = block.int().to(device) induced_nodes = block.srcdata[dgl.NID] h = x[induced_nodes].to(device) h_dst = h[: block.number_of_dst_nodes()] h = layer(block, (h, h_dst)) y[start:end] = h.cpu() ys.append(y) x = y return y, ys class NeighborSampler(object): def __init__(self, g, fanouts): self.g = g self.fanouts = fanouts def sample_blocks(self, seeds): seeds = th.LongTensor(seeds) blocks = [] hist_blocks = [] for fanout in self.fanouts: # For each seed node, sample ``fanout`` neighbors. frontier = dgl.sampling.sample_neighbors(self.g, seeds, fanout) hist_frontier = dgl.in_subgraph(self.g, seeds) # Then we compact the frontier into a bipartite graph for message passing. block = dgl.to_block(frontier, seeds) hist_block = dgl.to_block(hist_frontier, seeds) # Obtain the seed nodes for next layer. seeds = block.srcdata[dgl.NID] blocks.insert(0, block) hist_blocks.insert(0, hist_block) return blocks, hist_blocks def compute_acc(pred, labels): """ Compute the accuracy of prediction given the labels. """ return (th.argmax(pred, dim=1) == labels).float().sum() / len(pred) def evaluate(model, g, labels, val_mask, batch_size, device): """ Evaluate the model on the validation set specified by ``val_mask``. g : The entire graph. inputs : The features of all the nodes. labels : The labels of all the nodes. val_mask : A 0-1 mask indicating which nodes do we actually compute the accuracy for. batch_size : Number of nodes to compute at the same time. device : The GPU device to evaluate on. """ model.eval() with th.no_grad(): inputs = g.ndata["features"] pred, _ = model.inference(g, inputs, batch_size, device) model.train() return compute_acc(pred[val_mask], labels[val_mask]) def load_subtensor( g, labels, blocks, hist_blocks, dev_id, aggregation_on_device=False ): """ Copys features and labels of a set of nodes onto GPU. """ blocks[0].srcdata["features"] = g.ndata["features"][ blocks[0].srcdata[dgl.NID] ] blocks[-1].dstdata["label"] = labels[blocks[-1].dstdata[dgl.NID]] ret_blocks = [] ret_hist_blocks = [] for i, (block, hist_block) in enumerate(zip(blocks, hist_blocks)): hist_col = "features" if i == 0 else "hist_%d" % i block.srcdata["hist"] = g.ndata[hist_col][block.srcdata[dgl.NID]] # Aggregate history hist_block.srcdata["hist"] = g.ndata[hist_col][ hist_block.srcdata[dgl.NID] ] if aggregation_on_device: hist_block = hist_block.to(dev_id) hist_block.update_all(fn.copy_u("hist", "m"), fn.mean("m", "agg_hist")) block = block.int().to(dev_id) if not aggregation_on_device: hist_block = hist_block.to(dev_id) block.dstdata["agg_hist"] = hist_block.dstdata["agg_hist"] ret_blocks.append(block) ret_hist_blocks.append(hist_block) return ret_blocks, ret_hist_blocks def init_history(g, model, dev_id): with th.no_grad(): history = model.inference(g, g.ndata["features"], 1000, dev_id)[1] for layer in range(args.num_layers + 1): if layer > 0: hist_col = "hist_%d" % layer g.ndata["hist_%d" % layer] = history[layer - 1] def update_history(g, blocks): with th.no_grad(): for i, block in enumerate(blocks): ids = block.dstdata[dgl.NID].cpu() hist_col = "hist_%d" % (i + 1) h_new = block.dstdata["h_new"].cpu() g.ndata[hist_col][ids] = h_new def run(args, dev_id, data): dropout = 0.2 th.cuda.set_device(dev_id) # Unpack data train_mask, val_mask, in_feats, labels, n_classes, g = data train_nid = train_mask.nonzero().squeeze() val_nid = val_mask.nonzero().squeeze() # Create sampler sampler = NeighborSampler(g, [int(_) for _ in args.fan_out.split(",")]) # Create PyTorch DataLoader for constructing blocks dataloader = DataLoader( dataset=train_nid.numpy(), batch_size=args.batch_size, collate_fn=sampler.sample_blocks, shuffle=True, drop_last=False, num_workers=args.num_workers_per_gpu, ) # Define model model = SAGE(in_feats, args.num_hidden, n_classes, args.num_layers, F.relu) # Move the model to GPU and define optimizer model = model.to(dev_id) loss_fcn = nn.CrossEntropyLoss() loss_fcn = loss_fcn.to(dev_id) optimizer = optim.Adam(model.parameters(), lr=args.lr) # Compute history tensor and their aggregation before training on CPU model.eval() init_history(g, model, dev_id) model.train() # Training loop avg = 0 iter_tput = [] for epoch in range(args.num_epochs): tic = time.time() model.train() tic_step = time.time() for step, (blocks, hist_blocks) in enumerate(dataloader): # The nodes for input lies at the LHS side of the first block. # The nodes for output lies at the RHS side of the last block. input_nodes = blocks[0].srcdata[dgl.NID] seeds = blocks[-1].dstdata[dgl.NID] blocks, hist_blocks = load_subtensor( g, labels, blocks, hist_blocks, dev_id, True ) # forward batch_pred = model(blocks) # update history update_history(g, blocks) # compute loss batch_labels = blocks[-1].dstdata["label"] loss = loss_fcn(batch_pred, batch_labels) # backward optimizer.zero_grad() loss.backward() optimizer.step() iter_tput.append(len(seeds) / (time.time() - tic_step)) if step % args.log_every == 0: acc = compute_acc(batch_pred, batch_labels) print( "Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train Acc {:.4f} | Speed (samples/sec) {:.4f}".format( epoch, step, loss.item(), acc.item(), np.mean(iter_tput[3:]), ) ) tic_step = time.time() toc = time.time() print("Epoch Time(s): {:.4f}".format(toc - tic)) if epoch >= 5: avg += toc - tic if epoch % args.eval_every == 0 and epoch != 0: model.eval() eval_acc = evaluate( model, g, labels, val_nid, args.val_batch_size, dev_id ) print("Eval Acc {:.4f}".format(eval_acc)) print("Avg epoch time: {}".format(avg / (epoch - 4))) if __name__ == "__main__": argparser = argparse.ArgumentParser("multi-gpu training") argparser.add_argument("--gpu", type=str, default="0") argparser.add_argument("--num-epochs", type=int, default=20) argparser.add_argument("--num-hidden", type=int, default=16) argparser.add_argument("--num-layers", type=int, default=2) argparser.add_argument("--fan-out", type=str, default="1,1") argparser.add_argument("--batch-size", type=int, default=1000) argparser.add_argument("--val-batch-size", type=int, default=1000) argparser.add_argument("--log-every", type=int, default=20) argparser.add_argument("--eval-every", type=int, default=5) argparser.add_argument("--lr", type=float, default=0.003) argparser.add_argument("--num-workers-per-gpu", type=int, default=0) args = argparser.parse_args() # load reddit data data = RedditDataset(self_loop=True) n_classes = data.num_classes g = data[0] features = g.ndata["feat"] in_feats = features.shape[1] labels = g.ndata["label"] train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] g.ndata["features"] = features g.create_formats_() # Pack data data = train_mask, val_mask, in_feats, labels, n_classes, g run(args, int(args.gpu), data) ================================================ FILE: examples/pytorch/vrgcn/train_cv_multi_gpu.py ================================================ import argparse import math import time import traceback import dgl import dgl.function as fn import dgl.nn.pytorch as dglnn import numpy as np import torch as th import torch.multiprocessing as mp import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import tqdm from dgl.data import RedditDataset from torch.nn.parallel import DistributedDataParallel from torch.utils.data import DataLoader class SAGEConvWithCV(nn.Module): def __init__(self, in_feats, out_feats, activation): super().__init__() self.W = nn.Linear(in_feats * 2, out_feats) self.activation = activation self.reset_parameters() def reset_parameters(self): gain = nn.init.calculate_gain("relu") nn.init.xavier_uniform_(self.W.weight, gain=gain) nn.init.constant_(self.W.bias, 0) def forward(self, block, H, HBar=None): if self.training: with block.local_scope(): H_src, H_dst = H HBar_src, agg_HBar_dst = HBar block.dstdata["agg_hbar"] = agg_HBar_dst block.srcdata["hdelta"] = H_src - HBar_src block.update_all( fn.copy_u("hdelta", "m"), fn.mean("m", "hdelta_new") ) h_neigh = ( block.dstdata["agg_hbar"] + block.dstdata["hdelta_new"] ) h = self.W(th.cat([H_dst, h_neigh], 1)) if self.activation is not None: h = self.activation(h) return h else: with block.local_scope(): H_src, H_dst = H block.srcdata["h"] = H_src block.update_all(fn.copy_u("h", "m"), fn.mean("m", "h_new")) h_neigh = block.dstdata["h_new"] h = self.W(th.cat([H_dst, h_neigh], 1)) if self.activation is not None: h = self.activation(h) return h class SAGE(nn.Module): def __init__(self, in_feats, n_hidden, n_classes, n_layers, activation): super().__init__() self.n_layers = n_layers self.n_hidden = n_hidden self.n_classes = n_classes self.layers = nn.ModuleList() self.layers.append(SAGEConvWithCV(in_feats, n_hidden, activation)) for i in range(1, n_layers - 1): self.layers.append(SAGEConvWithCV(n_hidden, n_hidden, activation)) self.layers.append(SAGEConvWithCV(n_hidden, n_classes, None)) def forward(self, blocks): h = blocks[0].srcdata["features"] updates = [] for layer, block in zip(self.layers, blocks): # We need to first copy the representation of nodes on the RHS from the # appropriate nodes on the LHS. # Note that the shape of h is (num_nodes_LHS, D) and the shape of h_dst # would be (num_nodes_RHS, D) h_dst = h[: block.number_of_dst_nodes()] hbar_src = block.srcdata["hist"] agg_hbar_dst = block.dstdata["agg_hist"] # Then we compute the updated representation on the RHS. # The shape of h now becomes (num_nodes_RHS, D) h = layer(block, (h, h_dst), (hbar_src, agg_hbar_dst)) block.dstdata["h_new"] = h return h def inference(self, g, x, batch_size, device): """ Inference with the GraphSAGE model on full neighbors (i.e. without neighbor sampling). g : the entire graph. x : the input of entire node set. The inference code is written in a fashion that it could handle any number of nodes and layers. """ # During inference with sampling, multi-layer blocks are very inefficient because # lots of computations in the first few layers are repeated. # Therefore, we compute the representation of all nodes layer by layer. The nodes # on each layer are of course splitted in batches. # TODO: can we standardize this? nodes = th.arange(g.num_nodes()) for l, layer in enumerate(self.layers): y = g.ndata["hist_%d" % (l + 1)] for start in tqdm.trange(0, len(nodes), batch_size): end = start + batch_size batch_nodes = nodes[start:end] block = dgl.to_block( dgl.in_subgraph(g, batch_nodes), batch_nodes ) induced_nodes = block.srcdata[dgl.NID] h = x[induced_nodes].to(device) block = block.to(device) h_dst = h[: block.number_of_dst_nodes()] h = layer(block, (h, h_dst)) y[start:end] = h.cpu() x = y return y class NeighborSampler(object): def __init__(self, g, fanouts): self.g = g self.fanouts = fanouts def sample_blocks(self, seeds): seeds = th.LongTensor(seeds) blocks = [] hist_blocks = [] for fanout in self.fanouts: # For each seed node, sample ``fanout`` neighbors. frontier = dgl.sampling.sample_neighbors(self.g, seeds, fanout) # For history aggregation we sample all neighbors. hist_frontier = dgl.in_subgraph(self.g, seeds) # Then we compact the frontier into a bipartite graph for message passing. block = dgl.to_block(frontier, seeds) hist_block = dgl.to_block(hist_frontier, seeds) # Obtain the seed nodes for next layer. seeds = block.srcdata[dgl.NID] blocks.insert(0, block) hist_blocks.insert(0, hist_block) return blocks, hist_blocks def compute_acc(pred, labels): """ Compute the accuracy of prediction given the labels. """ return (th.argmax(pred, dim=1) == labels).float().sum() / len(pred) def evaluate(model, g, labels, val_mask, batch_size, device): """ Evaluate the model on the validation set specified by ``val_mask``. g : The entire graph. inputs : The features of all the nodes. labels : The labels of all the nodes. val_mask : A 0-1 mask indicating which nodes do we actually compute the accuracy for. batch_size : Number of nodes to compute at the same time. device : The GPU device to evaluate on. """ model.eval() with th.no_grad(): inputs = g.ndata["features"] pred = model.inference( g, inputs, batch_size, device ) # also recomputes history tensors model.train() return compute_acc(pred[val_mask], labels[val_mask]) def load_subtensor( g, labels, blocks, hist_blocks, dev_id, aggregation_on_device=False ): """ Copys features and labels of a set of nodes onto GPU. """ blocks[0].srcdata["features"] = g.ndata["features"][ blocks[0].srcdata[dgl.NID] ] blocks[-1].dstdata["label"] = labels[blocks[-1].dstdata[dgl.NID]] ret_blocks = [] ret_hist_blocks = [] for i, (block, hist_block) in enumerate(zip(blocks, hist_blocks)): hist_col = "features" if i == 0 else "hist_%d" % i block.srcdata["hist"] = g.ndata[hist_col][block.srcdata[dgl.NID]] # Aggregate history hist_block.srcdata["hist"] = g.ndata[hist_col][ hist_block.srcdata[dgl.NID] ] if aggregation_on_device: hist_block = hist_block.to(dev_id) hist_block.srcdata["hist"] = hist_block.srcdata["hist"] hist_block.update_all(fn.copy_u("hist", "m"), fn.mean("m", "agg_hist")) block = block.to(dev_id) if not aggregation_on_device: hist_block = hist_block.to(dev_id) block.dstdata["agg_hist"] = hist_block.dstdata["agg_hist"] ret_blocks.append(block) ret_hist_blocks.append(hist_block) return ret_blocks, ret_hist_blocks def create_history_storage(g, args, n_classes): # Initialize history storage for l in range(args.num_layers): dim = args.num_hidden if l != args.num_layers - 1 else n_classes g.ndata["hist_%d" % (l + 1)] = th.zeros( g.num_nodes(), dim ).share_memory_() def init_history(g, model, dev_id, batch_size): with th.no_grad(): model.inference( g, g.ndata["features"], batch_size, dev_id ) # replaces hist_i features in-place def update_history(g, blocks): with th.no_grad(): for i, block in enumerate(blocks): ids = block.dstdata[dgl.NID].cpu() hist_col = "hist_%d" % (i + 1) h_new = block.dstdata["h_new"].cpu() g.ndata[hist_col][ids] = h_new def run(proc_id, n_gpus, args, devices, data): dropout = 0.2 dev_id = devices[proc_id] if n_gpus > 1: dist_init_method = "tcp://{master_ip}:{master_port}".format( master_ip="127.0.0.1", master_port="12345" ) world_size = n_gpus th.distributed.init_process_group( backend="nccl", init_method=dist_init_method, world_size=world_size, rank=proc_id, ) th.cuda.set_device(dev_id) # Unpack data train_mask, val_mask, in_feats, labels, n_classes, g = data train_nid = train_mask.nonzero().squeeze() val_nid = val_mask.nonzero().squeeze() # Create sampler sampler = NeighborSampler(g, [int(_) for _ in args.fan_out.split(",")]) # Create PyTorch DataLoader for constructing blocks if n_gpus > 1: dist_sampler = th.utils.data.distributed.DistributedSampler( train_nid.numpy(), shuffle=True, drop_last=False ) dataloader = DataLoader( dataset=train_nid.numpy(), batch_size=args.batch_size, collate_fn=sampler.sample_blocks, sampler=dist_sampler, num_workers=args.num_workers_per_gpu, ) else: dataloader = DataLoader( dataset=train_nid.numpy(), batch_size=args.batch_size, collate_fn=sampler.sample_blocks, shuffle=True, drop_last=False, num_workers=args.num_workers_per_gpu, ) # Define model model = SAGE(in_feats, args.num_hidden, n_classes, args.num_layers, F.relu) # Move the model to GPU and define optimizer model = model.to(dev_id) if n_gpus > 1: model = DistributedDataParallel( model, device_ids=[dev_id], output_device=dev_id ) loss_fcn = nn.CrossEntropyLoss() loss_fcn = loss_fcn.to(dev_id) optimizer = optim.Adam(model.parameters(), lr=args.lr) # Compute history tensor and their aggregation before training on CPU model.eval() if n_gpus > 1: if proc_id == 0: init_history(g, model.module, dev_id, args.val_batch_size) th.distributed.barrier() else: init_history(g, model, dev_id, args.val_batch_size) model.train() # Training loop avg = 0 iter_tput = [] for epoch in range(args.num_epochs): if n_gpus > 1: dist_sampler.set_epoch(epoch) tic = time.time() model.train() for step, (blocks, hist_blocks) in enumerate(dataloader): if proc_id == 0: tic_step = time.time() # The nodes for input lies at the LHS side of the first block. # The nodes for output lies at the RHS side of the last block. seeds = blocks[-1].dstdata[dgl.NID] blocks, hist_blocks = load_subtensor( g, labels, blocks, hist_blocks, dev_id, True ) # forward batch_pred = model(blocks) # update history update_history(g, blocks) # compute loss batch_labels = blocks[-1].dstdata["label"] loss = loss_fcn(batch_pred, batch_labels) # backward optimizer.zero_grad() loss.backward() optimizer.step() if proc_id == 0: iter_tput.append(len(seeds) * n_gpus / (time.time() - tic_step)) if step % args.log_every == 0 and proc_id == 0: acc = compute_acc(batch_pred, batch_labels) print( "Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train Acc {:.4f} | Speed (samples/sec) {:.4f}".format( epoch, step, loss.item(), acc.item(), np.mean(iter_tput[3:]), ) ) if n_gpus > 1: th.distributed.barrier() toc = time.time() if proc_id == 0: print("Epoch Time(s): {:.4f}".format(toc - tic)) if epoch >= 5: avg += toc - tic if epoch % args.eval_every == 0 and epoch != 0: model.eval() eval_acc = evaluate( model if n_gpus == 1 else model.module, g, labels, val_nid, args.val_batch_size, dev_id, ) print("Eval Acc {:.4f}".format(eval_acc)) if n_gpus > 1: th.distributed.barrier() if proc_id == 0: print("Avg epoch time: {}".format(avg / (epoch - 4))) if __name__ == "__main__": argparser = argparse.ArgumentParser("multi-gpu training") argparser.add_argument("--gpu", type=str, default="0") argparser.add_argument("--num-epochs", type=int, default=20) argparser.add_argument("--num-hidden", type=int, default=16) argparser.add_argument("--num-layers", type=int, default=2) argparser.add_argument("--fan-out", type=str, default="1,1") argparser.add_argument("--batch-size", type=int, default=1000) argparser.add_argument("--val-batch-size", type=int, default=1000) argparser.add_argument("--log-every", type=int, default=20) argparser.add_argument("--eval-every", type=int, default=5) argparser.add_argument("--lr", type=float, default=0.003) argparser.add_argument("--num-workers-per-gpu", type=int, default=0) args = argparser.parse_args() devices = list(map(int, args.gpu.split(","))) n_gpus = len(devices) # load reddit data data = RedditDataset(self_loop=True) n_classes = data.num_classes g = data[0] features = g.ndata["feat"] in_feats = features.shape[1] labels = g.ndata["label"] train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] g.ndata["features"] = features.share_memory_() create_history_storage(g, args, n_classes) # Create csr/coo/csc formats before launching training processes with multi-gpu. # This avoids creating certain formats in each sub-process, which saves momory and CPU. g.create_formats_() # Pack data data = train_mask, val_mask, in_feats, labels, n_classes, g if n_gpus == 1: run(0, n_gpus, args, devices, data) else: mp.spawn(run, args=(n_gpus, args, devices, data), nprocs=n_gpus) ================================================ FILE: examples/sparse/appnp.py ================================================ """ [Predict then Propagate: Graph Neural Networks meet Personalized PageRank] (https://arxiv.org/abs/1810.05997) """ import dgl.sparse as dglsp import torch import torch.nn as nn import torch.nn.functional as F from dgl.data import CoraGraphDataset from torch.optim import Adam class APPNP(nn.Module): def __init__( self, in_size, out_size, hidden_size=64, dropout=0.1, num_hops=10, alpha=0.1, ): super().__init__() self.f_theta = nn.Sequential( nn.Dropout(dropout), nn.Linear(in_size, hidden_size), nn.ReLU(), nn.Dropout(dropout), nn.Linear(hidden_size, out_size), ) self.num_hops = num_hops self.A_dropout = nn.Dropout(dropout) self.alpha = alpha def forward(self, A_hat, X): Z_0 = Z = self.f_theta(X) for _ in range(self.num_hops): A_drop = dglsp.val_like(A_hat, self.A_dropout(A_hat.val)) Z = (1 - self.alpha) * A_drop @ Z + self.alpha * Z_0 return Z def evaluate(g, pred): label = g.ndata["label"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] # Compute accuracy on validation/test set. val_acc = (pred[val_mask] == label[val_mask]).float().mean() test_acc = (pred[test_mask] == label[test_mask]).float().mean() return val_acc, test_acc def train(model, g, A_hat, X): label = g.ndata["label"] train_mask = g.ndata["train_mask"] optimizer = Adam(model.parameters(), lr=1e-2, weight_decay=5e-4) for epoch in range(50): # Forward. model.train() logits = model(A_hat, X) # Compute loss with nodes in training set. loss = F.cross_entropy(logits[train_mask], label[train_mask]) # Backward. optimizer.zero_grad() loss.backward() optimizer.step() # Compute prediction. model.eval() logits = model(A_hat, X) pred = logits.argmax(dim=1) # Evaluate the prediction. val_acc, test_acc = evaluate(g, pred) print( f"In epoch {epoch}, loss: {loss:.3f}, val acc: {val_acc:.3f}, test" f" acc: {test_acc:.3f}" ) if __name__ == "__main__": # If CUDA is available, use GPU to accelerate the training, use CPU # otherwise. dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Load graph from the existing dataset. dataset = CoraGraphDataset() g = dataset[0].to(dev) # Create the sparse adjacency matrix A. indices = torch.stack(g.edges()) N = g.num_nodes() A = dglsp.spmatrix(indices, shape=(N, N)) # Calculate the symmetrically normalized adjacency matrix. I = dglsp.identity(A.shape, device=dev) A_hat = A + I D_hat = dglsp.diag(A_hat.sum(dim=1)) ** -0.5 A_hat = D_hat @ A_hat @ D_hat # Create APPNP model. X = g.ndata["feat"] in_size = X.shape[1] out_size = dataset.num_classes model = APPNP(in_size, out_size).to(dev) # Kick off training. train(model, g, A_hat, X) ================================================ FILE: examples/sparse/c_and_s.py ================================================ """ [Combining Label Propagation and Simple Models Out-performs Graph Neural Networks](https://arxiv.org/abs/2010.13993) """ import dgl.sparse as dglsp import torch import torch.nn as nn import torch.nn.functional as F from dgl.data import CoraGraphDataset from torch.optim import Adam ############################################################################### # (HIGHLIGHT) Compute Label Propagation with Sparse Matrix API ############################################################################### @torch.no_grad() def label_propagation(A_hat, label, num_layers=20, alpha=0.9): Y = label for _ in range(num_layers): Y = alpha * A_hat @ Y + (1 - alpha) * label Y = Y.clamp_(0.0, 1.0) return Y def correct(A_hat, label, soft_label, mask): # Compute error. error = torch.zeros_like(soft_label) error[mask] = label[mask] - soft_label[mask] # Smooth error. smoothed_error = label_propagation(A_hat, error) # Autoscale. sigma = error[mask].abs() sigma = sigma.sum() / sigma.shape[0] scale = sigma / smoothed_error.abs().sum(dim=1, keepdim=True) scale[scale.isinf() | (scale > 1000)] = 1.0 # Correct. result = soft_label + scale * smoothed_error return result def smooth(A_hat, label, soft_label, mask): soft_label[mask] = label[mask].float() return label_propagation(A_hat, soft_label) def evaluate(g, pred): label = g.ndata["label"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] # Compute accuracy on validation/test set. val_acc = (pred[val_mask] == label[val_mask]).float().mean() test_acc = (pred[test_mask] == label[test_mask]).float().mean() return val_acc, test_acc def train(base_model, g, X): label = g.ndata["label"] train_mask = g.ndata["train_mask"] optimizer = Adam(base_model.parameters(), lr=0.01) for epoch in range(10): # Forward. base_model.train() logits = base_model(X) # Compute loss with nodes in training set. loss = F.cross_entropy(logits[train_mask], label[train_mask]) # Backward. optimizer.zero_grad() loss.backward() optimizer.step() # Compute prediction. base_model.eval() logits = base_model(X) pred = logits.argmax(dim=1) # Evaluate the prediction. val_acc, test_acc = evaluate(g, pred) print( f"Base model, In epoch {epoch}, loss: {loss:.3f}, " f"val acc: {val_acc:.3f}, test acc: {test_acc:.3f}" ) return logits if __name__ == "__main__": # If CUDA is available, use GPU to accelerate the training, use CPU # otherwise. dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Load graph from the existing dataset. dataset = CoraGraphDataset() g = dataset[0].to(dev) # Create the sparse adjacency matrix A. indices = torch.stack(g.edges()) N = g.num_nodes() A = dglsp.spmatrix(indices, shape=(N, N)) # Calculate the symmetrically normalized adjacency matrix. I = dglsp.identity(A.shape, device=dev) A_hat = A + I D_hat = dglsp.diag(A_hat.sum(dim=1)) ** -0.5 A_hat = D_hat @ A_hat @ D_hat # Create models. X = g.ndata["feat"] in_size = X.shape[1] out_size = dataset.num_classes base_model = nn.Linear(in_size, out_size).to(dev) # Stage1: Train the base model. logits = train(base_model, g, X) # Stage2: Correct and Smooth. soft_label = F.softmax(logits, dim=1) label = F.one_hot(g.ndata["label"]) soft_label = correct(A_hat, label, soft_label, g.ndata["train_mask"]) soft_label = smooth(A_hat, label, soft_label, g.ndata["train_mask"]) pred = soft_label.argmax(dim=1) val_acc, test_acc = evaluate(g, pred) print(f"val acc: {val_acc:.3f}, test acc: {test_acc:.3f}") ================================================ FILE: examples/sparse/gat.py ================================================ """ [Graph Attention Networks] (https://arxiv.org/abs/1710.10903) """ import dgl.sparse as dglsp import torch import torch.nn as nn import torch.nn.functional as F from dgl.data import CoraGraphDataset from torch.optim import Adam class GATConv(nn.Module): def __init__(self, in_size, out_size, num_heads, dropout): super().__init__() self.out_size = out_size self.num_heads = num_heads self.dropout = nn.Dropout(dropout) self.W = nn.Linear(in_size, out_size * num_heads) self.a_l = nn.Parameter(torch.zeros(1, out_size, num_heads)) self.a_r = nn.Parameter(torch.zeros(1, out_size, num_heads)) self.reset_parameters() def reset_parameters(self): gain = nn.init.calculate_gain("relu") nn.init.xavier_normal_(self.W.weight, gain=gain) nn.init.xavier_normal_(self.a_l, gain=gain) nn.init.xavier_normal_(self.a_r, gain=gain) ########################################################################### # (HIGHLIGHT) Take the advantage of DGL sparse APIs to implement # multihead attention. ########################################################################### def forward(self, A_hat, Z): Z = self.dropout(Z) Z = self.W(Z).view(Z.shape[0], self.out_size, self.num_heads) # a^T [Wh_i || Wh_j] = a_l Wh_i + a_r Wh_j e_l = (Z * self.a_l).sum(dim=1) e_r = (Z * self.a_r).sum(dim=1) e = e_l[A_hat.row] + e_r[A_hat.col] a = F.leaky_relu(e) A_atten = dglsp.val_like(A_hat, a).softmax() a_drop = self.dropout(A_atten.val) A_atten = dglsp.val_like(A_atten, a_drop) return dglsp.bspmm(A_atten, Z) class GAT(nn.Module): def __init__( self, in_size, out_size, hidden_size=8, num_heads=8, dropout=0.6 ): super().__init__() self.in_conv = GATConv( in_size, hidden_size, num_heads=num_heads, dropout=dropout ) self.out_conv = GATConv( hidden_size * num_heads, out_size, num_heads=1, dropout=dropout ) def forward(self, A_hat, X): # Flatten the head and feature dimension. Z = F.elu(self.in_conv(A_hat, X)).flatten(1) # Average over the head dimension. Z = self.out_conv(A_hat, Z).mean(-1) return Z def evaluate(g, pred): label = g.ndata["label"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] # Compute accuracy on validation/test set. val_acc = (pred[val_mask] == label[val_mask]).float().mean() test_acc = (pred[test_mask] == label[test_mask]).float().mean() return val_acc, test_acc def train(model, g, A_hat, X): label = g.ndata["label"] train_mask = g.ndata["train_mask"] optimizer = Adam(model.parameters(), lr=1e-2, weight_decay=5e-4) for epoch in range(50): # Forward. model.train() logits = model(A_hat, X) # Compute loss with nodes in training set. loss = F.cross_entropy(logits[train_mask], label[train_mask]) # Backward. optimizer.zero_grad() loss.backward() optimizer.step() # Compute prediction. model.eval() logits = model(A_hat, X) pred = logits.argmax(dim=1) # Evaluate the prediction. val_acc, test_acc = evaluate(g, pred) print( f"In epoch {epoch}, loss: {loss:.3f}, val acc: {val_acc:.3f}, test" f" acc: {test_acc:.3f}" ) if __name__ == "__main__": # If CUDA is available, use GPU to accelerate the training, use CPU # otherwise. dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Load graph from the existing dataset. dataset = CoraGraphDataset() g = dataset[0].to(dev) # Create the sparse adjacency matrix A. indices = torch.stack(g.edges()) N = g.num_nodes() A = dglsp.spmatrix(indices, shape=(N, N)) # Add self-loops. I = dglsp.identity(A.shape, device=dev) A_hat = A + I # Create GAT model. X = g.ndata["feat"] in_size = X.shape[1] out_size = dataset.num_classes model = GAT(in_size, out_size).to(dev) # Kick off training. train(model, g, A_hat, X) ================================================ FILE: examples/sparse/gcn.py ================================================ """ [Semi-Supervised Classification with Graph Convolutional Networks] (https://arxiv.org/abs/1609.02907) """ import dgl.sparse as dglsp import torch import torch.nn as nn import torch.nn.functional as F from dgl.data import CoraGraphDataset from torch.optim import Adam class GCN(nn.Module): def __init__(self, in_size, out_size, hidden_size=16): super().__init__() # Two-layer GCN. self.W1 = nn.Linear(in_size, hidden_size) self.W2 = nn.Linear(hidden_size, out_size) ############################################################################ # (HIGHLIGHT) Take the advantage of DGL sparse APIs to implement the GCN # forward process. ############################################################################ def forward(self, A_norm, X): X = A_norm @ self.W1(X) X = F.relu(X) X = A_norm @ self.W2(X) return X def evaluate(g, pred): label = g.ndata["label"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] # Compute accuracy on validation/test set. val_acc = (pred[val_mask] == label[val_mask]).float().mean() test_acc = (pred[test_mask] == label[test_mask]).float().mean() return val_acc, test_acc def train(model, g, A_norm, X): label = g.ndata["label"] train_mask = g.ndata["train_mask"] optimizer = Adam(model.parameters(), lr=1e-2, weight_decay=5e-4) loss_fcn = nn.CrossEntropyLoss() for epoch in range(200): model.train() # Forward. logits = model(A_norm, X) # Compute loss with nodes in the training set. loss = loss_fcn(logits[train_mask], label[train_mask]) # Backward. optimizer.zero_grad() loss.backward() optimizer.step() # Compute prediction. pred = logits.argmax(dim=1) # Evaluate the prediction. val_acc, test_acc = evaluate(g, pred) if epoch % 20 == 0: print( f"In epoch {epoch}, loss: {loss:.3f}, val acc: {val_acc:.3f}" f", test acc: {test_acc:.3f}" ) if __name__ == "__main__": # If CUDA is available, use GPU to accelerate the training, use CPU # otherwise. dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Load graph from the existing dataset. dataset = CoraGraphDataset() g = dataset[0].to(dev) num_classes = dataset.num_classes X = g.ndata["feat"] # Create the adjacency matrix of graph. indices = torch.stack(g.edges()) N = g.num_nodes() A = dglsp.spmatrix(indices, shape=(N, N)) ############################################################################ # (HIGHLIGHT) Compute the symmetrically normalized adjacency matrix with # Sparse Matrix API ############################################################################ I = dglsp.identity(A.shape, device=dev) A_hat = A + I D_hat = dglsp.diag(A_hat.sum(1)) ** -0.5 A_norm = D_hat @ A_hat @ D_hat # Create model. in_size = X.shape[1] out_size = num_classes model = GCN(in_size, out_size).to(dev) # Kick off training. train(model, g, A_norm, X) ================================================ FILE: examples/sparse/gcnii.py ================================================ """ [Simple and Deep Graph Convolutional Networks] (https://arxiv.org/abs/2007.02133) """ import math import dgl.sparse as dglsp import torch import torch.nn as nn import torch.nn.functional as F from dgl.data import CoraGraphDataset from torch.optim import Adam class GCNIIConvolution(nn.Module): def __init__(self, in_size, out_size): super().__init__() self.out_size = out_size self.weight = nn.Linear(in_size, out_size, bias=False) ############################################################################ # (HIGHLIGHT) Take the advantage of DGL sparse APIs to implement the GCNII # forward process. ############################################################################ def forward(self, A_norm, H, H0, lamda, alpha, l): beta = math.log(lamda / l + 1) # Multiply a sparse matrix by a dense matrix. H = A_norm @ H H = (1 - alpha) * H + alpha * H0 H = (1 - beta) * H + beta * self.weight(H) return H class GCNII(nn.Module): def __init__( self, in_size, out_size, hidden_size, n_layers, lamda, alpha, dropout=0.5, ): super().__init__() self.hidden_size = hidden_size self.n_layers = n_layers self.lamda = lamda self.alpha = alpha # The GCNII model. self.layers = nn.ModuleList() self.layers.append(nn.Linear(in_size, hidden_size)) for _ in range(n_layers): self.layers.append(GCNIIConvolution(hidden_size, hidden_size)) self.layers.append(nn.Linear(hidden_size, out_size)) self.activation = nn.ReLU() self.dropout = dropout def forward(self, A_norm, feature): H = feature H = F.dropout(H, self.dropout, training=self.training) H = self.layers[0](H) H = self.activation(H) H0 = H # The GCNII convolution forward. for i, conv in enumerate(self.layers[1:-1]): H = F.dropout(H, self.dropout, training=self.training) H = conv(A_norm, H, H0, self.lamda, self.alpha, i + 1) H = self.activation(H) H = F.dropout(H, self.dropout, training=self.training) H = self.layers[-1](H) return H def evaluate(model, A_norm, H, label, val_mask, test_mask): model.eval() logits = model(A_norm, H) pred = logits.argmax(dim=1) # Compute accuracy on validation/test set. val_acc = (pred[val_mask] == label[val_mask]).float().mean() test_acc = (pred[test_mask] == label[test_mask]).float().mean() return val_acc, test_acc def train(model, g, A_norm, H): label = g.ndata["label"] train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] optimizer = Adam(model.parameters(), lr=0.01, weight_decay=5e-4) loss_fcn = nn.CrossEntropyLoss() for epoch in range(100): model.train() optimizer.zero_grad() # Forward. logits = model(A_norm, H) # Compute loss with nodes in the training set. loss = loss_fcn(logits[train_mask], label[train_mask]) # Backward. loss.backward() optimizer.step() # Evaluate the prediction. val_acc, test_acc = evaluate( model, A_norm, H, label, val_mask, test_mask ) if epoch % 5 == 0: print( f"In epoch {epoch}, loss: {loss:.3f}, val acc: {val_acc:.3f}" f", test acc: {test_acc:.3f}" ) if __name__ == "__main__": # If CUDA is available, use GPU to accelerate the training, use CPU # otherwise. dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Load graph from the existing dataset. dataset = CoraGraphDataset() g = dataset[0].to(dev) num_classes = dataset.num_classes H = g.ndata["feat"] # Create the adjacency matrix of graph. indices = torch.stack(g.edges()) N = g.num_nodes() A = dglsp.spmatrix(indices, shape=(N, N)) ############################################################################ # (HIGHLIGHT) Compute the symmetrically normalized adjacency matrix with # Sparse Matrix API ############################################################################ I = dglsp.identity(A.shape, device=dev) A_hat = A + I D_hat = dglsp.diag(A_hat.sum(1)) ** -0.5 A_norm = D_hat @ A_hat @ D_hat # Create model. in_size = H.shape[1] out_size = num_classes model = GCNII( in_size, out_size, hidden_size=64, n_layers=64, lamda=0.5, alpha=0.2, dropout=0.5, ).to(dev) # Kick off training. train(model, g, A_norm, H) ================================================ FILE: examples/sparse/graph_transformer.py ================================================ """ [A Generalization of Transformer Networks to Graphs] (https://arxiv.org/abs/2012.09699) """ import dgl import dgl.nn as dglnn import dgl.sparse as dglsp import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from dgl.data import AsGraphPredDataset from dgl.dataloading import GraphDataLoader from ogb.graphproppred import collate_dgl, DglGraphPropPredDataset, Evaluator from ogb.graphproppred.mol_encoder import AtomEncoder from tqdm import tqdm class SparseMHA(nn.Module): """Sparse Multi-head Attention Module""" def __init__(self, hidden_size=80, num_heads=8): super().__init__() self.hidden_size = hidden_size self.num_heads = num_heads self.head_dim = hidden_size // num_heads self.scaling = self.head_dim**-0.5 self.q_proj = nn.Linear(hidden_size, hidden_size) self.k_proj = nn.Linear(hidden_size, hidden_size) self.v_proj = nn.Linear(hidden_size, hidden_size) self.out_proj = nn.Linear(hidden_size, hidden_size) def forward(self, A, h): N = len(h) q = self.q_proj(h).reshape(N, self.head_dim, self.num_heads) q *= self.scaling k = self.k_proj(h).reshape(N, self.head_dim, self.num_heads) v = self.v_proj(h).reshape(N, self.head_dim, self.num_heads) ###################################################################### # (HIGHLIGHT) Compute the multi-head attention with Sparse Matrix API ###################################################################### attn = dglsp.bsddmm(A, q, k.transpose(1, 0)) # [N, N, nh] attn = attn.softmax() out = dglsp.bspmm(attn, v) return self.out_proj(out.reshape(N, -1)) class GTLayer(nn.Module): """Graph Transformer Layer""" def __init__(self, hidden_size=80, num_heads=8): super().__init__() self.MHA = SparseMHA(hidden_size=hidden_size, num_heads=num_heads) self.batchnorm1 = nn.BatchNorm1d(hidden_size) self.batchnorm2 = nn.BatchNorm1d(hidden_size) self.FFN1 = nn.Linear(hidden_size, hidden_size * 2) self.FFN2 = nn.Linear(hidden_size * 2, hidden_size) def forward(self, A, h): h1 = h h = self.MHA(A, h) h = self.batchnorm1(h + h1) h2 = h h = self.FFN2(F.relu(self.FFN1(h))) h = h2 + h return self.batchnorm2(h) class GTModel(nn.Module): def __init__( self, out_size, hidden_size=80, pos_enc_size=2, num_layers=8, num_heads=8, ): super().__init__() self.atom_encoder = AtomEncoder(hidden_size) self.pos_linear = nn.Linear(pos_enc_size, hidden_size) self.layers = nn.ModuleList( [GTLayer(hidden_size, num_heads) for _ in range(num_layers)] ) self.pooler = dglnn.SumPooling() self.predictor = nn.Sequential( nn.Linear(hidden_size, hidden_size // 2), nn.ReLU(), nn.Linear(hidden_size // 2, hidden_size // 4), nn.ReLU(), nn.Linear(hidden_size // 4, out_size), ) def forward(self, g, X, pos_enc): indices = torch.stack(g.edges()) N = g.num_nodes() A = dglsp.spmatrix(indices, shape=(N, N)) h = self.atom_encoder(X) + self.pos_linear(pos_enc) for layer in self.layers: h = layer(A, h) h = self.pooler(g, h) return self.predictor(h) @torch.no_grad() def evaluate(model, dataloader, evaluator, device): model.eval() y_true = [] y_pred = [] for batched_g, labels in dataloader: batched_g, labels = batched_g.to(device), labels.to(device) y_hat = model(batched_g, batched_g.ndata["feat"], batched_g.ndata["PE"]) y_true.append(labels.view(y_hat.shape).detach().cpu()) y_pred.append(y_hat.detach().cpu()) y_true = torch.cat(y_true, dim=0).numpy() y_pred = torch.cat(y_pred, dim=0).numpy() input_dict = {"y_true": y_true, "y_pred": y_pred} return evaluator.eval(input_dict)["rocauc"] def train(model, dataset, evaluator, device): train_dataloader = GraphDataLoader( dataset[dataset.train_idx], batch_size=256, shuffle=True, collate_fn=collate_dgl, ) valid_dataloader = GraphDataLoader( dataset[dataset.val_idx], batch_size=256, collate_fn=collate_dgl ) test_dataloader = GraphDataLoader( dataset[dataset.test_idx], batch_size=256, collate_fn=collate_dgl ) optimizer = optim.Adam(model.parameters(), lr=0.001) num_epochs = 50 scheduler = optim.lr_scheduler.StepLR( optimizer, step_size=num_epochs, gamma=0.5 ) loss_fcn = nn.BCEWithLogitsLoss() for epoch in range(num_epochs): model.train() total_loss = 0.0 for batched_g, labels in train_dataloader: batched_g, labels = batched_g.to(device), labels.to(device) logits = model( batched_g, batched_g.ndata["feat"], batched_g.ndata["PE"] ) loss = loss_fcn(logits, labels.float()) total_loss += loss.item() optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() avg_loss = total_loss / len(train_dataloader) val_metric = evaluate(model, valid_dataloader, evaluator, device) test_metric = evaluate(model, test_dataloader, evaluator, device) print( f"Epoch: {epoch:03d}, Loss: {avg_loss:.4f}, " f"Val: {val_metric:.4f}, Test: {test_metric:.4f}" ) if __name__ == "__main__": # If CUDA is available, use GPU to accelerate the training, use CPU # otherwise. dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # load dataset pos_enc_size = 8 dataset = AsGraphPredDataset( DglGraphPropPredDataset("ogbg-molhiv", "./data/OGB") ) evaluator = Evaluator("ogbg-molhiv") # laplacian positional encoding for g, _ in tqdm(dataset, desc="Computing Laplacian PE"): g.ndata["PE"] = dgl.lap_pe(g, k=pos_enc_size, padding=True) # Create model. out_size = dataset.num_tasks model = GTModel(out_size=out_size, pos_enc_size=pos_enc_size).to(dev) # Kick off training. train(model, dataset, evaluator, dev) ================================================ FILE: examples/sparse/han.py ================================================ """ [Heterogeneous Graph Attention Network] (https://arxiv.org/abs/1903.07293) """ import pickle import dgl.sparse as dglsp import torch import torch.nn as nn import torch.nn.functional as F from dgl.data.utils import _get_dgl_url, download, get_download_dir from torch.optim import Adam class GATConv(nn.Module): def __init__(self, in_size, out_size, num_heads, dropout): super().__init__() self.out_size = out_size self.num_heads = num_heads self.dropout = nn.Dropout(dropout) self.W = nn.Linear(in_size, out_size * num_heads) self.a_l = nn.Parameter(torch.zeros(1, out_size, num_heads)) self.a_r = nn.Parameter(torch.zeros(1, out_size, num_heads)) self.reset_parameters() def reset_parameters(self): gain = nn.init.calculate_gain("relu") nn.init.xavier_normal_(self.W.weight, gain=gain) nn.init.xavier_normal_(self.a_l, gain=gain) nn.init.xavier_normal_(self.a_r, gain=gain) ########################################################################### # (HIGHLIGHT) Take the advantage of DGL sparse APIs to implement # multihead attention. ########################################################################### def forward(self, A_hat, Z): Z = self.dropout(Z) Z = self.W(Z).view(Z.shape[0], self.out_size, self.num_heads) # a^T [Wh_i || Wh_j] = a_l Wh_i + a_r Wh_j e_l = (Z * self.a_l).sum(dim=1) e_r = (Z * self.a_r).sum(dim=1) e = e_l[A_hat.row] + e_r[A_hat.col] a = F.leaky_relu(e) A_atten = dglsp.val_like(A_hat, a).softmax() a_drop = self.dropout(A_atten.val) A_atten = dglsp.val_like(A_atten, a_drop) return dglsp.bspmm(A_atten, Z) class SemanticAttention(nn.Module): def __init__(self, in_size, hidden_size=128): super().__init__() self.project = nn.Sequential( nn.Linear(in_size, hidden_size), nn.Tanh(), nn.Linear(hidden_size, 1, bias=False), ) def forward(self, z): w = self.project(z).mean(0) beta = torch.softmax(w, dim=0) beta = beta.expand((z.shape[0],) + beta.shape) return (beta * z).sum(1) class HAN(nn.Module): def __init__( self, num_meta_paths, in_size, out_size, hidden_size=8, num_heads=8, dropout=0.6, ): super().__init__() self.gat_layers = nn.ModuleList() for _ in range(num_meta_paths): self.gat_layers.append( GATConv(in_size, hidden_size, num_heads, dropout) ) in_size = hidden_size * num_heads self.semantic_attention = SemanticAttention(in_size) self.predict = nn.Linear(in_size, out_size) def forward(self, A_list, X): meta_path_Z_list = [] for i, A in enumerate(A_list): meta_path_Z_list.append(self.gat_layers[i](A, X).flatten(1)) # (num_nodes, num_meta_paths, hidden_size * num_heads) meta_path_Z = torch.stack(meta_path_Z_list, dim=1) Z = self.semantic_attention(meta_path_Z) Z = self.predict(Z) return Z def evaluate(label, val_idx, test_idx, pred): # Compute accuracy on validation/test set. val_acc = (pred[val_idx] == label[val_idx]).float().mean() test_acc = (pred[test_idx] == label[test_idx]).float().mean() return val_acc, test_acc def train(model, data, A_list, X, label): dev = X.device train_idx = torch.from_numpy(data["train_idx"]).long().squeeze(0).to(dev) val_idx = torch.from_numpy(data["val_idx"]).long().squeeze(0).to(dev) test_idx = torch.from_numpy(data["test_idx"]).long().squeeze(0).to(dev) optimizer = Adam(model.parameters(), lr=0.005, weight_decay=0.001) for epoch in range(70): # Forward. model.train() logits = model(A_list, X) # Compute loss with nodes in training set. loss = F.cross_entropy(logits[train_idx], label[train_idx]) # Backward. optimizer.zero_grad() loss.backward() optimizer.step() # Compute prediction. model.eval() logits = model(A_list, X) pred = logits.argmax(dim=1) # Evaluate the prediction. val_acc, test_acc = evaluate(label, val_idx, test_idx, pred) print( f"In epoch {epoch}, loss: {loss:.3f}, val acc: {val_acc:.3f}, test" f" acc: {test_acc:.3f}" ) if __name__ == "__main__": # If CUDA is available, use GPU to accelerate the training, use CPU # otherwise. dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # (TODO): Move the logic to a built-in dataset. # Load the data. url = "dataset/ACM3025.pkl" data_path = get_download_dir() + "/ACM3025.pkl" download(_get_dgl_url(url), path=data_path) with open(data_path, "rb") as f: data = pickle.load(f) # Create sparse adjacency matrices corresponding to two meta paths. # Self-loops already added. PAP_dst, PAP_src = data["PAP"].nonzero() PAP_indices = torch.stack( [torch.from_numpy(PAP_src).long(), torch.from_numpy(PAP_dst).long()] ).to(dev) PAP_A = dglsp.spmatrix(PAP_indices) PLP_dst, PLP_src = data["PLP"].nonzero() PLP_indices = torch.stack( [torch.from_numpy(PLP_src).long(), torch.from_numpy(PLP_src).long()] ).to(dev) PLP_A = dglsp.spmatrix(PLP_indices) A_list = [PAP_A, PLP_A] # Create HAN model. X = torch.from_numpy(data["feature"].todense()).float().to(dev) label = torch.from_numpy(data["label"].todense()) out_size = label.shape[1] label = label.nonzero()[:, 1].to(dev) in_size = X.shape[1] model = HAN(len(A_list), in_size, out_size).to(dev) # Kick off training. train(model, data, A_list, X, label) ================================================ FILE: examples/sparse/hetero-rgcn.py ================================================ """ Modeling Relational Data with Graph Convolutional Networks Paper: https://arxiv.org/abs/1703.06103 Reference Code: https://github.com/tkipf/relational-gcn This script trains and tests a Hetero Relational Graph Convolutional Networks (Hetero-RGCN) model based on the information of a full graph. This flowchart describes the main functional sequence of the provided example. main │ ├───> Load and preprocess full dataset │ ├───> Instantiate Hetero-RGCN model │ ├───> train │ │ │ └───> Training loop │ │ │ └───> Hetero-RGCN.forward └───> test │ └───> Evaluate the model """ import argparse import time import dgl import dgl.sparse as dglsp import numpy as np import torch as th import torch.nn as nn import torch.nn.functional as F from dgl.data.rdf import AIFBDataset, AMDataset, BGSDataset, MUTAGDataset class RelGraphEmbed(nn.Module): r"""Embedding layer for featureless heterograph.""" def __init__( self, ntype_num, embed_size, ): super(RelGraphEmbed, self).__init__() self.embed_size = embed_size self.dropout = nn.Dropout(0.0) # Create weight embeddings for each node for each relation. self.embeds = nn.ParameterDict() for ntype, num_nodes in ntype_num.items(): embed = nn.Parameter(th.Tensor(num_nodes, self.embed_size)) nn.init.xavier_uniform_(embed, gain=nn.init.calculate_gain("relu")) self.embeds[ntype] = embed def forward(self): return self.embeds class HeteroRelationalGraphConv(nn.Module): r"""HeteroRelational graph convolution layer. Parameters ---------- in_size : int Input feature size. out_size : int Output feature size. relation_names : list[str] Relation names. """ def __init__( self, in_size, out_size, relation_names, activation=None, ): super(HeteroRelationalGraphConv, self).__init__() self.in_size = in_size self.out_size = out_size self.relation_names = relation_names self.activation = activation ######################################################################## # (HIGHLIGHT) HeteroGraphConv is a graph convolution operator over # heterogeneous graphs. A dictionary is passed where the key is the # relation name and the value is the insatnce of conv layer. ######################################################################## self.W = nn.ModuleDict( {str(rel): nn.Linear(in_size, out_size) for rel in relation_names} ) self.dropout = nn.Dropout(0.0) def forward(self, A, inputs): """Forward computation Parameters ---------- A : Hetero Sparse Matrix Input graph. inputs : dict[str, torch.Tensor] Node feature for each node type. Returns ------- dict[str, torch.Tensor] New node features for each node type. """ hs = {} for rel in A: src_type, edge_type, dst_type = rel if dst_type not in hs: hs[dst_type] = th.zeros( inputs[dst_type].shape[0], self.out_size ) #################################################################### # (HIGHLIGHT) Sparse library use hetero sparse matrix to present # heterogeneous graphs. A dictionary is passed where the key is # the tuple of (source node type, edge type, destination node type) # and the value is the sparse matrix contructed from the key on # global graph. The convolution operation is the multiplication of # sparse matrix and convolutional layer. #################################################################### hs[dst_type] = hs[dst_type] + ( A[rel].T @ self.W[str(edge_type)](inputs[src_type]) ) if self.activation: hs[dst_type] = self.activation(hs[dst_type]) hs[dst_type] = self.dropout(hs[dst_type]) return hs class EntityClassify(nn.Module): def __init__( self, in_size, out_size, relation_names, embed_layer, ): super(EntityClassify, self).__init__() self.in_size = in_size self.out_size = out_size self.relation_names = relation_names self.relation_names.sort() self.embed_layer = embed_layer self.layers = nn.ModuleList() # Input to hidden. self.layers.append( HeteroRelationalGraphConv( self.in_size, self.in_size, self.relation_names, activation=F.relu, ) ) # Hidden to output. self.layers.append( HeteroRelationalGraphConv( self.in_size, self.out_size, self.relation_names, ) ) def forward(self, A): h = self.embed_layer() for layer in self.layers: h = layer(A, h) return h def main(args): # Load graph data. if args.dataset == "aifb": dataset = AIFBDataset() elif args.dataset == "bgs": dataset = BGSDataset() else: raise ValueError() g = dataset[0] category = dataset.predict_category num_classes = dataset.num_classes train_mask = g.nodes[category].data.pop("train_mask") test_mask = g.nodes[category].data.pop("test_mask") train_idx = th.nonzero(train_mask, as_tuple=False).squeeze() test_idx = th.nonzero(test_mask, as_tuple=False).squeeze() labels = g.nodes[category].data.pop("labels") # Split dataset into train, validate, test. val_idx = train_idx[: len(train_idx) // 5] train_idx = train_idx[len(train_idx) // 5 :] embed_layer = RelGraphEmbed( {ntype: g.num_nodes(ntype) for ntype in g.ntypes}, 16 ) # Create model. model = EntityClassify( 16, num_classes, list(set(g.etypes)), embed_layer, ) # Optimizer. optimizer = th.optim.Adam(model.parameters(), lr=1e-2, weight_decay=0) # Construct hetero sparse matrix. A = {} for stype, etype, dtype in g.canonical_etypes: eg = g[stype, etype, dtype] indices = th.stack(eg.edges("uv")) A[(stype, etype, dtype)] = dglsp.spmatrix( indices, shape=(g.num_nodes(stype), g.num_nodes(dtype)) ) ########################################################### # (HIGHLIGHT) Compute the normalized adjacency matrix with # Sparse Matrix API ########################################################### D1_hat = dglsp.diag(A[(stype, etype, dtype)].sum(1)) ** -0.5 D2_hat = dglsp.diag(A[(stype, etype, dtype)].sum(0)) ** -0.5 A[(stype, etype, dtype)] = D1_hat @ A[(stype, etype, dtype)] @ D2_hat # Training loop. print("start training...") model.train() for epoch in range(10): optimizer.zero_grad() logits = model(A)[category] loss = F.cross_entropy(logits[train_idx], labels[train_idx]) loss.backward() optimizer.step() train_acc = th.sum( logits[train_idx].argmax(dim=1) == labels[train_idx] ).item() / len(train_idx) val_loss = F.cross_entropy(logits[val_idx], labels[val_idx]) val_acc = th.sum( logits[val_idx].argmax(dim=1) == labels[val_idx] ).item() / len(val_idx) print( f"Epoch {epoch:05d} | Train Acc: {train_acc:.4f} | " f"Train Loss: {loss.item():.4f} | Valid Acc: {val_acc:.4f} | " f"Valid loss: {val_loss.item():.4f} " ) print() model.eval() logits = model.forward(A)[category] test_loss = F.cross_entropy(logits[test_idx], labels[test_idx]) test_acc = th.sum( logits[test_idx].argmax(dim=1) == labels[test_idx] ).item() / len(test_idx) print( "Test Acc: {:.4f} | Test loss: {:.4f}".format( test_acc, test_loss.item() ) ) print() if __name__ == "__main__": parser = argparse.ArgumentParser(description="RGCN") parser.add_argument( "-d", "--dataset", type=str, required=True, help="dataset to use" ) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/sparse/hgnn.py ================================================ """ Hypergraph Neural Networks (https://arxiv.org/pdf/1809.09401.pdf) """ import dgl.sparse as dglsp import torch import torch.nn as nn import torch.nn.functional as F import tqdm from dgl.data import CoraGraphDataset from torchmetrics.functional import accuracy class HGNN(nn.Module): def __init__(self, H, in_size, out_size, hidden_dims=16): super().__init__() self.Theta1 = nn.Linear(in_size, hidden_dims) self.Theta2 = nn.Linear(hidden_dims, out_size) self.dropout = nn.Dropout(0.5) ########################################################### # (HIGHLIGHT) Compute the Laplacian with Sparse Matrix API ########################################################### d_V = H.sum(1) # node degree d_E = H.sum(0) # edge degree n_edges = d_E.shape[0] D_V_invsqrt = dglsp.diag(d_V**-0.5) # D_V ** (-1/2) D_E_inv = dglsp.diag(d_E**-1) # D_E ** (-1) W = dglsp.identity((n_edges, n_edges)) self.laplacian = D_V_invsqrt @ H @ W @ D_E_inv @ H.T @ D_V_invsqrt def forward(self, X): X = self.laplacian @ self.Theta1(self.dropout(X)) X = F.relu(X) X = self.laplacian @ self.Theta2(self.dropout(X)) return X def train(model, optimizer, X, Y, train_mask): model.train() Y_hat = model(X) loss = F.cross_entropy(Y_hat[train_mask], Y[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() def evaluate(model, X, Y, val_mask, test_mask, num_classes): model.eval() Y_hat = model(X) val_acc = accuracy( Y_hat[val_mask], Y[val_mask], task="multiclass", num_classes=num_classes ) test_acc = accuracy( Y_hat[test_mask], Y[test_mask], task="multiclass", num_classes=num_classes, ) return val_acc, test_acc def load_data(): dataset = CoraGraphDataset() graph = dataset[0] # The paper created a hypergraph from the original graph. For each node in # the original graph, a hyperedge in the hypergraph is created to connect # its neighbors and itself. In this case, the incidence matrix of the # hypergraph is the same as the adjacency matrix of the original graph (with # self-loops). # We follow the paper and assume that the rows of the incidence matrix # are for nodes and the columns are for edges. indices = torch.stack(graph.edges()) H = dglsp.spmatrix(indices) H = H + dglsp.identity(H.shape) X = graph.ndata["feat"] Y = graph.ndata["label"] train_mask = graph.ndata["train_mask"] val_mask = graph.ndata["val_mask"] test_mask = graph.ndata["test_mask"] return H, X, Y, dataset.num_classes, train_mask, val_mask, test_mask def main(): H, X, Y, num_classes, train_mask, val_mask, test_mask = load_data() model = HGNN(H, X.shape[1], num_classes) optimizer = torch.optim.Adam(model.parameters(), lr=0.001) with tqdm.trange(500) as tq: for epoch in tq: train(model, optimizer, X, Y, train_mask) val_acc, test_acc = evaluate( model, X, Y, val_mask, test_mask, num_classes ) tq.set_postfix( { "Val acc": f"{val_acc:.5f}", "Test acc": f"{test_acc:.5f}", }, refresh=False, ) print(f"Test acc: {test_acc:.3f}") if __name__ == "__main__": main() ================================================ FILE: examples/sparse/hypergraphatt.py ================================================ """ Hypergraph Convolution and Hypergraph Attention (https://arxiv.org/pdf/1901.08150.pdf). """ import argparse import dgl.sparse as dglsp import torch import torch.nn as nn import torch.nn.functional as F import tqdm from dgl.data import CoraGraphDataset from torchmetrics.functional import accuracy def hypergraph_laplacian(H): ########################################################### # (HIGHLIGHT) Compute the Laplacian with Sparse Matrix API ########################################################### d_V = H.sum(1) # node degree d_E = H.sum(0) # edge degree n_edges = d_E.shape[0] D_V_invsqrt = dglsp.diag(d_V**-0.5) # D_V ** (-1/2) D_E_inv = dglsp.diag(d_E**-1) # D_E ** (-1) W = dglsp.identity((n_edges, n_edges)) return D_V_invsqrt @ H @ W @ D_E_inv @ H.T @ D_V_invsqrt class HypergraphAttention(nn.Module): """Hypergraph Attention module as in the paper `Hypergraph Convolution and Hypergraph Attention `_. """ def __init__(self, in_size, out_size): super().__init__() self.P = nn.Linear(in_size, out_size) self.a = nn.Linear(2 * out_size, 1) def forward(self, H, X, X_edges): Z = self.P(X) Z_edges = self.P(X_edges) sim = self.a(torch.cat([Z[H.row], Z_edges[H.col]], 1)) sim = F.leaky_relu(sim, 0.2).squeeze(1) # Reassign the hypergraph new weights. H_att = dglsp.val_like(H, sim) H_att = H_att.softmax() return hypergraph_laplacian(H_att) @ Z class Net(nn.Module): def __init__(self, in_size, out_size, hidden_size=16): super().__init__() self.layer1 = HypergraphAttention(in_size, hidden_size) self.layer2 = HypergraphAttention(hidden_size, out_size) def forward(self, H, X): Z = self.layer1(H, X, X) Z = F.elu(Z) Z = self.layer2(H, Z, Z) return Z def train(model, optimizer, H, X, Y, train_mask): model.train() Y_hat = model(H, X) loss = F.cross_entropy(Y_hat[train_mask], Y[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() return loss.item() def evaluate(model, H, X, Y, val_mask, test_mask, num_classes): model.eval() Y_hat = model(H, X) val_acc = accuracy( Y_hat[val_mask], Y[val_mask], task="multiclass", num_classes=num_classes ) test_acc = accuracy( Y_hat[test_mask], Y[test_mask], task="multiclass", num_classes=num_classes, ) return val_acc, test_acc def load_data(): dataset = CoraGraphDataset() graph = dataset[0] # The paper created a hypergraph from the original graph. For each node in # the original graph, a hyperedge in the hypergraph is created to connect # its neighbors and itself. In this case, the incidence matrix of the # hypergraph is the same as the adjacency matrix of the original graph (with # self-loops). # We follow the paper and assume that the rows of the incidence matrix # are for nodes and the columns are for edges. indices = torch.stack(graph.edges()) H = dglsp.spmatrix(indices) H = H + dglsp.identity(H.shape) X = graph.ndata["feat"] Y = graph.ndata["label"] train_mask = graph.ndata["train_mask"] val_mask = graph.ndata["val_mask"] test_mask = graph.ndata["test_mask"] return H, X, Y, dataset.num_classes, train_mask, val_mask, test_mask def main(args): H, X, Y, num_classes, train_mask, val_mask, test_mask = load_data() model = Net(X.shape[1], num_classes) optimizer = torch.optim.Adam(model.parameters(), lr=0.001) with tqdm.trange(args.epochs) as tq: for epoch in tq: loss = train(model, optimizer, H, X, Y, train_mask) val_acc, test_acc = evaluate( model, H, X, Y, val_mask, test_mask, num_classes ) tq.set_postfix( { "Loss": f"{loss:.5f}", "Val acc": f"{val_acc:.5f}", "Test acc": f"{test_acc:.5f}", }, refresh=False, ) print(f"Test acc: {test_acc:.3f}") if __name__ == "__main__": parser = argparse.ArgumentParser(description="Hypergraph Attention Example") parser.add_argument( "--epochs", type=int, default=500, help="Number of training epochs." ) args = parser.parse_args() main(args) ================================================ FILE: examples/sparse/pagerank.py ================================================ import dgl.sparse as dglsp import networkx as nx import torch N = 100 DAMP = 0.85 K = 10 def pagerank(A): D = A.sum(0) V = torch.ones(N) / N for _ in range(K): ######################################################################## # (HIGHLIGHT) Take the advantage of DGL sparse APIs to calculate the # page rank. ######################################################################## V = (1 - DAMP) / N + DAMP * A @ (V / D) return V if __name__ == "__main__": g = nx.erdos_renyi_graph(N, 0.05, seed=10086) # Create the adjacency matrix of graph. edges = list(g.to_directed().edges()) indices = torch.tensor(edges).transpose(0, 1) A = dglsp.spmatrix(indices, shape=(N, N)) V = pagerank(A) print(V) ================================================ FILE: examples/sparse/sampling/graphsage.py ================================================ """ This script demonstrate how to use dgl sparse library to sample on graph and train model. It trains and tests a GraphSAGE model using the sparse sample and compact operators to sample submatrix from the whole matrix. This flowchart describes the main functional sequence of the provided example. main │ ├───> Load and preprocess full dataset │ ├───> Instantiate SAGE model │ ├───> train │ │ │ └───> Training loop │ │ │ ├───> Sample submatrix │ │ │ └───> SAGE.forward └───> test │ ├───> Sample submatrix │ └───> Evaluate the model """ import argparse import dgl.sparse as dglsp import torch import torch.nn as nn import torch.nn.functional as F import torchmetrics.functional as MF from dgl.data import AsNodePredDataset from ogb.nodeproppred import DglNodePropPredDataset class SAGEConv(nn.Module): r"""GraphSAGE layer from `Inductive Representation Learning on Large Graphs `__ """ def __init__( self, in_size, out_size, ): super(SAGEConv, self).__init__() self._in_src_feats, self._in_dst_feats = in_size, in_size self._out_size = out_size self.fc_neigh = nn.Linear(self._in_src_feats, out_size, bias=False) self.fc_self = nn.Linear(self._in_dst_feats, out_size, bias=True) self.reset_parameters() def reset_parameters(self): gain = nn.init.calculate_gain("relu") nn.init.xavier_uniform_(self.fc_self.weight, gain=gain) nn.init.xavier_uniform_(self.fc_neigh.weight, gain=gain) def forward(self, A, feat): feat_src = feat feat_dst = feat[: A.shape[1]] # Aggregator type: mean. srcdata = self.fc_neigh(feat_src) # Divided by degree. D_hat = dglsp.diag(A.sum(0)) ** -1 A_div = A @ D_hat # Conv neighbors. dstdata = A_div.T @ srcdata rst = self.fc_self(feat_dst) + dstdata return rst class SAGE(nn.Module): def __init__(self, in_size, hid_size, out_size): super().__init__() self.layers = nn.ModuleList() # Three-layer GraphSAGE-gcn. self.layers.append(SAGEConv(in_size, hid_size)) self.layers.append(SAGEConv(hid_size, hid_size)) self.layers.append(SAGEConv(hid_size, out_size)) self.dropout = nn.Dropout(0.5) self.hid_size = hid_size self.out_size = out_size def forward(self, sampled_matrices, x): hidden_x = x for layer_idx, (layer, sampled_matrix) in enumerate( zip(self.layers, sampled_matrices) ): hidden_x = layer(sampled_matrix, hidden_x) if layer_idx != len(self.layers) - 1: hidden_x = F.relu(hidden_x) hidden_x = self.dropout(hidden_x) return hidden_x def multilayer_sample(A, fanouts, seeds, ndata): sampled_matrices = [] src = seeds ##################################################################### # (HIGHLIGHT) Using the sparse sample operator to preform random # sampling on the neighboring nodes of the seeds nodes. The sparse # compact operator is then employed to compact and relabel the sampled # matrix, resulting in the sampled matrix and the relabel index. ##################################################################### for fanout in fanouts: # Sample neighbors. sampled_matrix = A.sample(1, fanout, ids=src).coalesce() # Compact the sampled matrix. compacted_mat, row_ids = sampled_matrix.compact(0) sampled_matrices.insert(0, compacted_mat) src = row_ids x = ndata["feat"][src] y = ndata["label"][seeds] return sampled_matrices, x, y def evaluate(model, A, dataloader, ndata, num_classes): model.eval() ys = [] y_hats = [] fanouts = [10, 10, 10] for it, seeds in enumerate(dataloader): with torch.no_grad(): sampled_matrices, x, y = multilayer_sample(A, fanouts, seeds, ndata) ys.append(y) y_hats.append(model(sampled_matrices, x)) return MF.accuracy( torch.cat(y_hats), torch.cat(ys), task="multiclass", num_classes=num_classes, ) def validate(device, A, ndata, dataset, model, batch_size): inf_id = dataset.test_idx.to(device) inf_dataloader = torch.utils.data.DataLoader(inf_id, batch_size=batch_size) acc = evaluate(model, A, inf_dataloader, ndata, dataset.num_classes) return acc def train(device, A, ndata, dataset, model): # Create sampler & dataloader. train_idx = dataset.train_idx.to(device) val_idx = dataset.val_idx.to(device) train_dataloader = torch.utils.data.DataLoader( train_idx, batch_size=1024, shuffle=True ) val_dataloader = torch.utils.data.DataLoader(val_idx, batch_size=1024) optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=5e-4) fanouts = [10, 10, 10] for epoch in range(10): model.train() total_loss = 0 for it, seeds in enumerate(train_dataloader): sampled_matrices, x, y = multilayer_sample(A, fanouts, seeds, ndata) y_hat = model(sampled_matrices, x) loss = F.cross_entropy(y_hat, y) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.item() acc = evaluate(model, A, val_dataloader, ndata, dataset.num_classes) print( "Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} ".format( epoch, total_loss / (it + 1), acc.item() ) ) if __name__ == "__main__": parser = argparse.ArgumentParser(description="GraphSAGE") parser.add_argument( "--mode", default="gpu", choices=["cpu", "gpu"], help="Training mode. 'cpu' for CPU training, 'gpu' for GPU training.", ) args = parser.parse_args() if not torch.cuda.is_available(): args.mode = "cpu" print(f"Training in {args.mode} mode.") ##################################################################### # (HIGHLIGHT) This example implements a graphSAGE algorithm by sparse # operators, which involves sampling a subgraph from a full graph and # conducting training. # # First, the whole graph is loaded onto the CPU or GPU and transformed # to sparse matrix. To obtain the training subgraph, it samples three # submatrices by seed nodes, which contains their randomly sampled # 1-hop, 2-hop, and 3-hop neighbors. Then, the features of the # subgraph are input to the network for training. ##################################################################### # Load and preprocess dataset. print("Loading data") device = torch.device("cpu" if args.mode == "cpu" else "cuda") dataset = AsNodePredDataset(DglNodePropPredDataset("ogbn-products")) g = dataset[0] g = g.to(device) # Create GraphSAGE model. in_size = g.ndata["feat"].shape[1] out_size = dataset.num_classes model = SAGE(in_size, 256, out_size).to(device) # Create sparse. indices = torch.stack(g.edges()) N = g.num_nodes() A = dglsp.spmatrix(indices, shape=(N, N)) # Model training. print("Training...") train(device, A, g.ndata, dataset, model) # Test the model. print("Testing...") acc = validate(device, A, g.ndata, dataset, model, batch_size=4096) print(f"Test accuracy {acc:.4f}") ================================================ FILE: examples/sparse/sampling/ladies.py ================================================ """ This script demonstrates how to use dgl sparse library to sample on graph and train model. It trains and tests a LADIES model using the sparse power and sp_broadcast_v operators to sample submatrix from the whole matrix. This flowchart describes the main functional sequence of the provided example. main │ ├───> Load and preprocess full dataset │ ├───> Instantiate LADIES model │ ├───> train │ │ │ └───> Training loop │ │ │ ├───> Sample submatrix │ │ │ └───> LADIES.forward └───> test │ ├───> Sample submatrix │ └───> Evaluate the model """ import argparse import dgl.sparse as dglsp import torch import torch.nn as nn import torch.nn.functional as F import torchmetrics.functional as MF from dgl.data import AsNodePredDataset from dgl.sparse import sp_broadcast_v from ogb.nodeproppred import DglNodePropPredDataset class SAGEConv(nn.Module): r"""LADIES layer from `Layer-Dependent Importance Sampling for Training Deep and Large Graph Convolutional Networks `__""" def __init__( self, in_size, out_size, ): super(SAGEConv, self).__init__() self._in_src_feats, self._in_dst_feats = in_size, in_size self._out_size = out_size self.fc_neigh = nn.Linear(self._in_src_feats, out_size, bias=False) self.fc_self = nn.Linear(self._in_dst_feats, out_size, bias=True) self.reset_parameters() def reset_parameters(self): gain = nn.init.calculate_gain("relu") nn.init.xavier_uniform_(self.fc_self.weight, gain=gain) nn.init.xavier_uniform_(self.fc_neigh.weight, gain=gain) def forward(self, A, feat): feat_src = feat feat_dst = feat[: A.shape[1]] # Aggregator type: mean. srcdata = self.fc_neigh(feat_src) # Divided by degree. D_hat = dglsp.diag(A.sum(0)) ** -1 A_div = A @ D_hat # Conv neighbors. dstdata = A_div.T @ srcdata rst = self.fc_self(feat_dst) + dstdata return rst class LADIES(nn.Module): def __init__(self, in_size, hid_size, out_size): super().__init__() self.layers = nn.ModuleList() # Three-layer LADIES. self.layers.append(SAGEConv(in_size, hid_size)) self.layers.append(SAGEConv(hid_size, hid_size)) self.layers.append(SAGEConv(hid_size, out_size)) self.dropout = nn.Dropout(0.5) self.hid_size = hid_size self.out_size = out_size def forward(self, sampled_matrices, x): hidden_x = x for layer_idx, (layer, sampled_matrix) in enumerate( zip(self.layers, sampled_matrices) ): hidden_x = layer(sampled_matrix, hidden_x) if layer_idx != len(self.layers) - 1: hidden_x = F.relu(hidden_x) hidden_x = self.dropout(hidden_x) return hidden_x def multilayer_sample(A, fanouts, seeds, ndata): sampled_matrices = [] src = seeds ######################################################################### # (HIGHLIGHT) Using the sparse sample operator to preform LADIES sampling # algorithm from the neighboring nodes of the seeds nodes. # The sparse sp_power operator is applied to compute sample probability, # and sp_broadcast_v is then employed to normalize weight by performing # division operations on column. ######################################################################### for fanout in fanouts: # Sample neighbors. sub_A = A.index_select(1, src) # Compute probability weight. row_probs = (sub_A**2).sum(1) row_probs = row_probs / row_probs.sum(0) # Layer-wise sample nodes. row_ids = torch.multinomial(row_probs, fanout, replacement=False) # Add self-loop. row_ids = torch.cat((row_ids, src), 0).unique() sampled_matrix = sub_A.index_select(0, row_ids) # Normalize edge weights. div_matirx = sp_broadcast_v( sampled_matrix, row_probs[row_ids].reshape(-1, 1), "truediv" ) div_matirx = sp_broadcast_v(div_matirx, div_matirx.sum(0), "truediv") # Save the sampled matrix. sampled_matrices.insert(0, div_matirx) src = row_ids x = ndata["feat"][src] y = ndata["label"][seeds] return sampled_matrices, x, y def evaluate(model, A, dataloader, ndata, num_classes): model.eval() ys = [] y_hats = [] fanouts = [4000, 4000, 4000] for seeds in dataloader: with torch.no_grad(): sampled_matrices, x, y = multilayer_sample(A, fanouts, seeds, ndata) ys.append(y) y_hats.append(model(sampled_matrices, x)) return MF.accuracy( torch.cat(y_hats), torch.cat(ys), task="multiclass", num_classes=num_classes, ) def validate(device, A, ndata, dataset, model, batch_size): inf_id = dataset.test_idx.to(device) inf_dataloader = torch.utils.data.DataLoader(inf_id, batch_size=batch_size) acc = evaluate(model, A, inf_dataloader, ndata, dataset.num_classes) return acc def train(device, A, ndata, dataset, model): # Create sampler & dataloader. train_idx = dataset.train_idx.to(device) val_idx = dataset.val_idx.to(device) train_dataloader = torch.utils.data.DataLoader( train_idx, batch_size=1024, shuffle=True ) val_dataloader = torch.utils.data.DataLoader(val_idx, batch_size=1024) optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=5e-4) fanouts = [4000, 4000, 4000] for epoch in range(20): model.train() total_loss = 0 for it, seeds in enumerate(train_dataloader): sampled_matrices, x, y = multilayer_sample(A, fanouts, seeds, ndata) y_hat = model(sampled_matrices, x) loss = F.cross_entropy(y_hat, y) optimizer.zero_grad() loss.backward() optimizer.step() total_loss += loss.item() acc = evaluate(model, A, val_dataloader, ndata, dataset.num_classes) print( "Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} ".format( epoch, total_loss / (it + 1), acc.item() ) ) if __name__ == "__main__": parser = argparse.ArgumentParser(description="LADIESConv") parser.add_argument( "--mode", default="gpu", choices=["cpu", "gpu"], help="Training mode. 'cpu' for CPU training, 'gpu' for GPU training.", ) args = parser.parse_args() if not torch.cuda.is_available(): args.mode = "cpu" print(f"Training in {args.mode} mode.") ##################################################################### # (HIGHLIGHT) This example implements a LADIES algorithm by sparse # operators, which involves sampling a subgraph from a full graph and # conducting training. # # First, the whole graph is loaded onto the CPU or GPU and transformed # to sparse matrix. To obtain the training subgraph, it samples three # submatrices by seed nodes, which contains their layer-wise sampled # 1-hop, 2-hop, and 3-hop neighbors. Then, the features of the # subgraph are input to the network for training. ##################################################################### # Load and preprocess dataset. print("Loading data") device = torch.device("cpu" if args.mode == "cpu" else "cuda") dataset = AsNodePredDataset(DglNodePropPredDataset("ogbn-products")) g = dataset[0] # Create LADIES model. in_size = g.ndata["feat"].shape[1] out_size = dataset.num_classes model = LADIES(in_size, 256, out_size).to(device) # Create sparse. indices = torch.stack(g.edges()) N = g.num_nodes() A = dglsp.spmatrix(indices, shape=(N, N)).coalesce() I = dglsp.identity(A.shape) # Initialize laplacian matrix. A_hat = A + I D_hat = dglsp.diag(A_hat.sum(1)) ** -0.5 A_norm = D_hat @ A_hat @ D_hat A_norm = A_norm.to(device) g = g.to(device) # Model training. print("Training...") train(device, A_norm, g.ndata, dataset, model) # Test the model. print("Testing...") acc = validate(device, A_norm, g.ndata, dataset, model, batch_size=2048) print(f"Test accuracy {acc:.4f}") ================================================ FILE: examples/sparse/sgc.py ================================================ """ [Simplifying Graph Convolutional Networks] (https://arxiv.org/abs/1902.07153) """ import dgl.sparse as dglsp import torch import torch.nn as nn import torch.nn.functional as F from dgl.data import CoraGraphDataset from torch.optim import Adam ################################################################################ # (HIGHLIGHT) Take the advantage of DGL sparse APIs to implement the feature # pre-computation. ################################################################################ def pre_compute(A, X, k): for _ in range(k): X = A @ X return X def evaluate(g, pred): label = g.ndata["label"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] # Compute accuracy on validation/test set. val_acc = (pred[val_mask] == label[val_mask]).float().mean() test_acc = (pred[test_mask] == label[test_mask]).float().mean() return val_acc, test_acc def train(model, g, X_sgc): label = g.ndata["label"] train_mask = g.ndata["train_mask"] optimizer = Adam(model.parameters(), lr=2e-1, weight_decay=5e-6) for epoch in range(20): # Forward. logits = model(X_sgc) # Compute loss with nodes in the training set. loss = F.cross_entropy(logits[train_mask], label[train_mask]) # Backward. optimizer.zero_grad() loss.backward() optimizer.step() # Compute prediction. pred = logits.argmax(dim=1) # Evaluate the prediction. val_acc, test_acc = evaluate(g, pred) print( f"In epoch {epoch}, loss: {loss:.3f}, val acc: {val_acc:.3f}, test" f" acc: {test_acc:.3f}" ) if __name__ == "__main__": # If CUDA is available, use GPU to accelerate the training, use CPU # otherwise. dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Load graph from the existing dataset. dataset = CoraGraphDataset() g = dataset[0].to(dev) # Create the sparse adjacency matrix A indices = torch.stack(g.edges()) N = g.num_nodes() A = dglsp.spmatrix(indices, shape=(N, N)) # Calculate the symmetrically normalized adjacency matrix. I = dglsp.identity(A.shape, device=dev) A_hat = A + I D_hat = dglsp.diag(A_hat.sum(dim=1)) ** -0.5 A_hat = D_hat @ A_hat @ D_hat # 2-hop diffusion. k = 2 X = g.ndata["feat"] X_sgc = pre_compute(A_hat, X, k) # Create model. in_size = X.shape[1] out_size = dataset.num_classes model = nn.Linear(in_size, out_size).to(dev) # Kick off training. train(model, g, X_sgc) ================================================ FILE: examples/sparse/sign.py ================================================ """ [SIGN: Scalable Inception Graph Neural Networks] (https://arxiv.org/abs/2004.11198) This example shows a simplified version of SIGN: a precomputed 2-hops diffusion operator on top of symmetrically normalized adjacency matrix A_hat. """ import dgl.sparse as dglsp import torch import torch.nn as nn import torch.nn.functional as F from dgl.data import CoraGraphDataset from torch.optim import Adam ################################################################################ # (HIGHLIGHT) Take the advantage of DGL sparse APIs to implement the feature # diffusion in SIGN laconically. ################################################################################ def sign_diffusion(A, X, r): # Perform the r-hop diffusion operation. X_sign = [X] for _ in range(r): X = A @ X X_sign.append(X) return X_sign class SIGN(nn.Module): def __init__(self, in_size, out_size, r, hidden_size=256): super().__init__() # Note that theta and omega refer to the learnable matrices in the # original paper correspondingly. The variable r refers to subscript to # theta. self.theta = nn.ModuleList( [nn.Linear(in_size, hidden_size) for _ in range(r + 1)] ) self.omega = nn.Linear(hidden_size * (r + 1), out_size) def forward(self, X_sign): results = [] for i in range(len(X_sign)): results.append(self.theta[i](X_sign[i])) Z = F.relu(torch.cat(results, dim=1)) return self.omega(Z) def evaluate(g, pred): label = g.ndata["label"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] # Compute accuracy on validation/test set. val_acc = (pred[val_mask] == label[val_mask]).float().mean() test_acc = (pred[test_mask] == label[test_mask]).float().mean() return val_acc, test_acc def train(model, g, X_sign): label = g.ndata["label"] train_mask = g.ndata["train_mask"] optimizer = Adam(model.parameters(), lr=3e-3) for epoch in range(10): # Switch the model to training mode. model.train() # Forward. logits = model(X_sign) # Compute loss with nodes in training set. loss = F.cross_entropy(logits[train_mask], label[train_mask]) # Backward. optimizer.zero_grad() loss.backward() optimizer.step() # Switch the model to evaluating mode. model.eval() # Compute prediction. logits = model(X_sign) pred = logits.argmax(1) # Evaluate the prediction. val_acc, test_acc = evaluate(g, pred) print( f"In epoch {epoch}, loss: {loss:.3f}, val acc: {val_acc:.3f}, test" f" acc: {test_acc:.3f}" ) if __name__ == "__main__": # If CUDA is available, use GPU to accelerate the training, use CPU # otherwise. dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Load graph from the existing dataset. dataset = CoraGraphDataset() g = dataset[0].to(dev) # Create the sparse adjacency matrix A (note that W was used as the notation # for adjacency matrix in the original paper). indices = torch.stack(g.edges()) N = g.num_nodes() A = dglsp.spmatrix(indices, shape=(N, N)) # Calculate the symmetrically normalized adjacency matrix. I = dglsp.identity(A.shape, device=dev) A_hat = A + I D_hat = dglsp.diag(A_hat.sum(dim=1)) ** -0.5 A_hat = D_hat @ A_hat @ D_hat # 2-hop diffusion. r = 2 X = g.ndata["feat"] X_sign = sign_diffusion(A_hat, X, r) # Create SIGN model. in_size = X.shape[1] out_size = dataset.num_classes model = SIGN(in_size, out_size, r).to(dev) # Kick off training. train(model, g, X_sign) ================================================ FILE: examples/sparse/twirls.py ================================================ """ [Graph Neural Networks Inspired by Classical Iterative Algorithms] (https://arxiv.org/pdf/2103.06064.pdf) This example shows a simplified version of the TWIRLS model proposed in the paper. It implements two variants. One is the basic iterative graph diffusion algorithm. The other is an advanced implementation with attention. """ import argparse import dgl.sparse as dglsp import torch import torch.nn as nn import torch.nn.functional as F from dgl.data import CoraGraphDataset from torch.optim import Adam class MLP(nn.Module): def __init__(self, in_size, hidden_size): super().__init__() self.linear_1 = nn.Linear(in_size, hidden_size) self.linear_2 = nn.Linear(hidden_size, hidden_size) self.dropout = nn.Dropout(0.8) def forward(self, X): H = self.linear_1(X) H = F.relu(H) H = self.dropout(H) H = self.linear_2(H) return H ################################################################################ # (HIGHLIGHT) Use DGL sparse API to implement the iterative graph diffusion # algorithm. ################################################################################ class TWIRLS(nn.Module): def __init__( self, in_size, out_size, hidden_size=128, num_steps=16, lam=1.0, alpha=0.5, ): super().__init__() self.num_steps = num_steps self.lam = lam self.alpha = alpha self.mlp = MLP(in_size, hidden_size) self.linear_out = nn.Linear(hidden_size, out_size) def forward(self, A, X): # Compute Y = Y0 = f(X; W) using a two-layer MLP. Y = Y0 = self.mlp(X) # Compute diagonal matrix D_tild. I = dglsp.identity(A.shape, device=A.device) D_tild = self.lam * dglsp.diag(A.sum(1)) + I # Iteratively compute new Y by equation (6) in the paper. for k in range(self.num_steps): Y_hat = self.lam * A @ Y + Y0 # The inverse of a diagonal matrix inverses its diagonal values. Y = (1 - self.alpha) * Y + self.alpha * (D_tild**-1) @ Y_hat # Apply a linear layer on the final output. return self.linear_out(Y) ################################################################################ # (HIGHLIGHT) Implementation of the advanced TWIRLS model with attention # to show the usage of differentiable weighted sparse matrix. ################################################################################ class TWIRLSWithAttention(nn.Module): def __init__( self, in_size, out_size, hidden_size=128, num_steps=16, lam=1.0, alpha=0.5, ): super().__init__() self.num_steps = num_steps self.lam = lam self.alpha = alpha self.mlp = MLP(in_size, hidden_size) self.linear_out = nn.Linear(hidden_size, out_size) def forward(self, A, X): # Compute Y = Y0 = f(X; W) using a two-layer MLP. Y = Y0 = self.mlp(X) # Compute diagonal matrix D_tild. I = dglsp.identity(A.shape, device=A.device) D_tild = self.lam * dglsp.diag(A.sum(1)) + I # Conduct half of the diffusion steps. for k in range(self.num_steps // 2): Y_hat = self.lam * A @ Y + Y0 Y = (1 - self.alpha) * Y + self.alpha * (D_tild**-1) @ Y_hat # Calculate attention weight by equation (25) in the paper. Y_i = Y[A.row] Y_j = Y[A.col] norm_ij = torch.linalg.vector_norm(Y_i - Y_j, dim=1) # Bound the attention value within [0.0, 1.0). gamma_ij = torch.clamp(0.5 / (norm_ij + 1e-7), min=0.0, max=1.0) # Create a new adjacency matrix with the new weight. A = dglsp.val_like(A, gamma_ij) # Recompute D_tild. D_tild = self.lam * dglsp.diag(A.sum(1)) + I # Conduct the other half of the diffusion steps. for k in range(self.num_steps // 2): Y_hat = self.lam * A @ Y + Y0 Y = (1 - self.alpha) * Y + self.alpha * (D_tild**-1) @ Y_hat # Apply a linear layer on the final output. return self.linear_out(Y) def evaluate(g, pred): model.eval() label = g.ndata["label"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] # Compute accuracy on validation/test set. val_acc = (pred[val_mask] == label[val_mask]).float().mean() test_acc = (pred[test_mask] == label[test_mask]).float().mean() return val_acc, test_acc def train(g, model, A, X): labels = g.ndata["label"] train_mask = g.ndata["train_mask"] optimizer = Adam(model.parameters(), lr=5e-4) for epoch in range(300): model.train() # Forward. logits = model(A, X) # Compute loss with nodes in training set. loss = F.cross_entropy(logits[train_mask], labels[train_mask]) # Backward. optimizer.zero_grad() loss.backward() optimizer.step() # Compute prediction. pred = logits.argmax(1) # Evaluate the prediction. val_acc, test_acc = evaluate(g, pred) print( f"In epoch {epoch}, loss: {loss:.3f}, val acc: {val_acc:.3f}, test" f" acc: {test_acc:.3f}" ) if __name__ == "__main__": parser = argparse.ArgumentParser("TWIRLS example in DGL Sparse.") parser.add_argument( "--attention", action="store_true", help="Use TWIRLS with attention." ) args = parser.parse_args() # If CUDA is available, use GPU to accelerate the training, use CPU # otherwise. dev = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Load graph from the existing dataset. dataset = CoraGraphDataset() g = dataset[0].to(dev) X = g.ndata["feat"] # Create the sparse adjacency matrix A. indices = torch.stack(g.edges()) N = g.num_nodes() A = dglsp.spmatrix(indices, shape=(N, N)) # Create the TWIRLS model. in_size = X.shape[1] out_size = dataset.num_classes if args.attention: model = TWIRLSWithAttention(in_size, out_size).to(dev) else: model = TWIRLS(in_size, out_size).to(dev) # Kick off training. train(g, model, A, X) ================================================ FILE: examples/tensorflow/dgi/README.md ================================================ Deep Graph Infomax (DGI) ======================== - Paper link: [https://arxiv.org/abs/1809.10341](https://arxiv.org/abs/1809.10341) - Author's code repo (in Pytorch): [https://github.com/PetarV-/DGI](https://github.com/PetarV-/DGI) Dependencies ------------ - tensorflow 2.1+ - requests ```bash pip install tensorflow requests ``` How to run ---------- Run with following: ```bash python3 train.py --dataset=cora --gpu=0 --self-loop ``` ```bash python3 train.py --dataset=citeseer --gpu=0 ``` ```bash python3 train.py --dataset=pubmed --gpu=0 ``` Results ------- * cora: ~81.6 (80.9-82.9) (paper: 82.3) * citeseer: ~70.2 (paper: 71.8) * pubmed: ~77.2 (paper: 76.8) ================================================ FILE: examples/tensorflow/dgi/dgi.py ================================================ """ Deep Graph Infomax in DGL References ---------- Papers: https://arxiv.org/abs/1809.10341 Author's code: https://github.com/PetarV-/DGI """ import math import numpy as np import tensorflow as tf from gcn import GCN from tensorflow.keras import layers class Encoder(layers.Layer): def __init__(self, g, in_feats, n_hidden, n_layers, activation, dropout): super(Encoder, self).__init__() self.g = g self.conv = GCN( g, in_feats, n_hidden, n_hidden, n_layers, activation, dropout ) def call(self, features, corrupt=False): if corrupt: perm = np.random.permutation(self.g.number_of_nodes()) features = tf.gather(features, perm) features = self.conv(features) return features class Discriminator(layers.Layer): def __init__(self, n_hidden): super(Discriminator, self).__init__() uinit = tf.keras.initializers.RandomUniform( -1.0 / math.sqrt(n_hidden), 1.0 / math.sqrt(n_hidden) ) self.weight = tf.Variable( initial_value=uinit(shape=(n_hidden, n_hidden), dtype="float32"), trainable=True, ) def call(self, features, summary): features = tf.matmul( features, tf.matmul(self.weight, tf.expand_dims(summary, -1)) ) return features class DGI(tf.keras.Model): def __init__(self, g, in_feats, n_hidden, n_layers, activation, dropout): super(DGI, self).__init__() self.encoder = Encoder( g, in_feats, n_hidden, n_layers, activation, dropout ) self.discriminator = Discriminator(n_hidden) self.loss = tf.nn.sigmoid_cross_entropy_with_logits def call(self, features): positive = self.encoder(features, corrupt=False) negative = self.encoder(features, corrupt=True) summary = tf.nn.sigmoid(tf.reduce_mean(positive, axis=0)) positive = self.discriminator(positive, summary) negative = self.discriminator(negative, summary) l1 = self.loss(tf.ones(positive.shape), positive) l2 = self.loss(tf.zeros(negative.shape), negative) return tf.reduce_mean(l1) + tf.reduce_mean(l2) class Classifier(layers.Layer): def __init__(self, n_hidden, n_classes): super(Classifier, self).__init__() self.fc = layers.Dense(n_classes) def call(self, features): features = self.fc(features) return features ================================================ FILE: examples/tensorflow/dgi/gcn.py ================================================ """ This code was copied from the GCN implementation in DGL examples. """ import tensorflow as tf from dgl.nn.tensorflow import GraphConv from tensorflow.keras import layers class GCN(layers.Layer): def __init__( self, g, in_feats, n_hidden, n_classes, n_layers, activation, dropout ): super(GCN, self).__init__() self.g = g self.layers = [] # input layer self.layers.append(GraphConv(in_feats, n_hidden, activation=activation)) # hidden layers for i in range(n_layers - 1): self.layers.append( GraphConv(n_hidden, n_hidden, activation=activation) ) # output layer self.layers.append(GraphConv(n_hidden, n_classes)) self.dropout = layers.Dropout(dropout) def call(self, features): h = features for i, layer in enumerate(self.layers): if i != 0: h = self.dropout(h) h = layer(self.g, h) return h ================================================ FILE: examples/tensorflow/dgi/train.py ================================================ import argparse import time import dgl import networkx as nx import numpy as np import tensorflow as tf from dgi import Classifier, DGI from dgl.data import ( CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset, register_data_args, ) from tensorflow.keras import layers def evaluate(model, features, labels, mask): logits = model(features, training=False) logits = logits[mask] labels = labels[mask] indices = tf.math.argmax(logits, axis=1) acc = tf.reduce_mean(tf.cast(indices == labels, dtype=tf.float32)) return acc.numpy().item() def main(args): # load and preprocess dataset if args.dataset == "cora": data = CoraGraphDataset() elif args.dataset == "citeseer": data = CiteseerGraphDataset() elif args.dataset == "pubmed": data = PubmedGraphDataset() else: raise ValueError("Unknown dataset: {}".format(args.dataset)) g = data[0] if args.gpu < 0: device = "/cpu:0" else: device = "/gpu:{}".format(args.gpu) g = g.to(device) with tf.device(device): features = g.ndata["feat"] labels = g.ndata["label"] train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] in_feats = features.shape[1] n_classes = data.num_classes n_edges = g.number_of_edges() # add self loop if args.self_loop: g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) n_edges = g.number_of_edges() # create DGI model dgi = DGI( g, in_feats, args.n_hidden, args.n_layers, tf.keras.layers.PReLU( alpha_initializer=tf.constant_initializer(0.25) ), args.dropout, ) dgi_optimizer = tf.keras.optimizers.Adam(learning_rate=args.dgi_lr) # train deep graph infomax cnt_wait = 0 best = 1e9 best_t = 0 dur = [] for epoch in range(args.n_dgi_epochs): if epoch >= 3: t0 = time.time() with tf.GradientTape() as tape: loss = dgi(features) # Manually Weight Decay # We found Tensorflow has a different implementation on weight decay # of Adam(W) optimizer with PyTorch. And this results in worse results. # Manually adding weights to the loss to do weight decay solves this problem. for weight in dgi.trainable_weights: loss = loss + args.weight_decay * tf.nn.l2_loss(weight) grads = tape.gradient(loss, dgi.trainable_weights) dgi_optimizer.apply_gradients(zip(grads, dgi.trainable_weights)) if loss < best: best = loss best_t = epoch cnt_wait = 0 dgi.save_weights("best_dgi.pkl") else: cnt_wait += 1 if cnt_wait == args.patience: print("Early stopping!") break if epoch >= 3: dur.append(time.time() - t0) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | " "ETputs(KTEPS) {:.2f}".format( epoch, np.mean(dur), loss.numpy().item(), n_edges / np.mean(dur) / 1000, ) ) # create classifier model classifier = Classifier(args.n_hidden, n_classes) classifier_optimizer = tf.keras.optimizers.Adam( learning_rate=args.classifier_lr ) # train classifier print("Loading {}th epoch".format(best_t)) dgi.load_weights("best_dgi.pkl") embeds = dgi.encoder(features, corrupt=False) embeds = tf.stop_gradient(embeds) dur = [] loss_fcn = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True ) for epoch in range(args.n_classifier_epochs): if epoch >= 3: t0 = time.time() with tf.GradientTape() as tape: preds = classifier(embeds) loss = loss_fcn(labels[train_mask], preds[train_mask]) # Manually Weight Decay # We found Tensorflow has a different implementation on weight decay # of Adam(W) optimizer with PyTorch. And this results in worse results. # Manually adding weights to the loss to do weight decay solves this problem. # In original code, there's no weight decay applied in this part # link: https://github.com/PetarV-/DGI/blob/master/execute.py#L121 # for weight in classifier.trainable_weights: # loss = loss + \ # args.weight_decay * tf.nn.l2_loss(weight) grads = tape.gradient(loss, classifier.trainable_weights) classifier_optimizer.apply_gradients( zip(grads, classifier.trainable_weights) ) if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(classifier, embeds, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format( epoch, np.mean(dur), loss.numpy().item(), acc, n_edges / np.mean(dur) / 1000, ) ) print() acc = evaluate(classifier, embeds, labels, test_mask) print("Test Accuracy {:.4f}".format(acc)) if __name__ == "__main__": parser = argparse.ArgumentParser(description="DGI") register_data_args(parser) parser.add_argument( "--dropout", type=float, default=0.0, help="dropout probability" ) parser.add_argument("--gpu", type=int, default=-1, help="gpu") parser.add_argument( "--dgi-lr", type=float, default=1e-3, help="dgi learning rate" ) parser.add_argument( "--classifier-lr", type=float, default=1e-2, help="classifier learning rate", ) parser.add_argument( "--n-dgi-epochs", type=int, default=300, help="number of training epochs", ) parser.add_argument( "--n-classifier-epochs", type=int, default=300, help="number of training epochs", ) parser.add_argument( "--n-hidden", type=int, default=512, help="number of hidden gcn units" ) parser.add_argument( "--n-layers", type=int, default=1, help="number of hidden gcn layers" ) parser.add_argument( "--weight-decay", type=float, default=0.0, help="Weight for L2 loss" ) parser.add_argument( "--patience", type=int, default=20, help="early stop patience condition" ) parser.add_argument( "--self-loop", action="store_true", help="graph self-loop (default=False)", ) parser.set_defaults(self_loop=False) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/tensorflow/gat/README.md ================================================ Graph Attention Networks (GAT) ============ - Paper link: [https://arxiv.org/abs/1710.10903](https://arxiv.org/abs/1710.10903) - Author's code repo (in Tensorflow): [https://github.com/PetarV-/GAT](https://github.com/PetarV-/GAT). - Popular pytorch implementation: [https://github.com/Diego999/pyGAT](https://github.com/Diego999/pyGAT). Dependencies ------------ - tensorflow 2.1.0+ - requests ```bash pip install tensorflow requests DGLBACKEND=tensorflow ``` How to run ---------- Run with following: ```bash python3 train.py --dataset=cora --gpu=0 ``` ```bash python3 train.py --dataset=citeseer --gpu=0 --early-stop ``` ```bash python3 train.py --dataset=pubmed --gpu=0 --num-out-heads=8 --weight-decay=0.001 --early-stop ``` Results ------- | Dataset | Test Accuracy | Baseline (paper) | | -------- | ------------- | ---------------- | | Cora | 84.2 | 83.0(+-0.7) | | Citeseer | 70.9 | 72.5(+-0.7) | | Pubmed | 78.5 | 79.0(+-0.3) | * All the accuracy numbers are obtained after 200 epochs. * All time is measured on EC2 p3.2xlarge instance w/ V100 GPU. ================================================ FILE: examples/tensorflow/gat/gat.py ================================================ """ Graph Attention Networks in DGL using SPMV optimization. References ---------- Paper: https://arxiv.org/abs/1710.10903 Author's code: https://github.com/PetarV-/GAT Pytorch implementation: https://github.com/Diego999/pyGAT """ import dgl.function as fn import tensorflow as tf from dgl.nn import GATConv from tensorflow.keras import layers class GAT(tf.keras.Model): def __init__( self, g, num_layers, in_dim, num_hidden, num_classes, heads, activation, feat_drop, attn_drop, negative_slope, residual, ): super(GAT, self).__init__() self.g = g self.num_layers = num_layers self.gat_layers = [] self.activation = activation # input projection (no residual) self.gat_layers.append( GATConv( in_dim, num_hidden, heads[0], feat_drop, attn_drop, negative_slope, False, self.activation, ) ) # hidden layers for l in range(1, num_layers): # due to multi-head, the in_dim = num_hidden * num_heads self.gat_layers.append( GATConv( num_hidden * heads[l - 1], num_hidden, heads[l], feat_drop, attn_drop, negative_slope, residual, self.activation, ) ) # output projection self.gat_layers.append( GATConv( num_hidden * heads[-2], num_classes, heads[-1], feat_drop, attn_drop, negative_slope, residual, None, ) ) def call(self, inputs): h = inputs for l in range(self.num_layers): h = self.gat_layers[l](self.g, h) h = tf.reshape(h, (h.shape[0], -1)) # output projection logits = tf.reduce_mean(self.gat_layers[-1](self.g, h), axis=1) return logits ================================================ FILE: examples/tensorflow/gat/train.py ================================================ """ Graph Attention Networks in DGL using SPMV optimization. Multiple heads are also batched together for faster training. Compared with the original paper, this code does not implement early stopping. References ---------- Paper: https://arxiv.org/abs/1710.10903 Author's code: https://github.com/PetarV-/GAT Pytorch implementation: https://github.com/Diego999/pyGAT """ import argparse import time import dgl import networkx as nx import numpy as np import tensorflow as tf from dgl.data import ( CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset, register_data_args, ) from gat import GAT from utils import EarlyStopping def accuracy(logits, labels): indices = tf.math.argmax(logits, axis=1) acc = tf.reduce_mean(tf.cast(indices == labels, dtype=tf.float32)) return acc.numpy().item() def evaluate(model, features, labels, mask): logits = model(features, training=False) logits = logits[mask] labels = labels[mask] return accuracy(logits, labels) def main(args): # load and preprocess dataset if args.dataset == "cora": data = CoraGraphDataset() elif args.dataset == "citeseer": data = CiteseerGraphDataset() elif args.dataset == "pubmed": data = PubmedGraphDataset() else: raise ValueError("Unknown dataset: {}".format(args.dataset)) g = data[0] if args.gpu < 0: device = "/cpu:0" else: device = "/gpu:{}".format(args.gpu) g = g.to(device) with tf.device(device): features = g.ndata["feat"] labels = g.ndata["label"] train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] num_feats = features.shape[1] n_classes = data.num_classes n_edges = g.number_of_edges() print( """----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % ( n_edges, n_classes, train_mask.numpy().sum(), val_mask.numpy().sum(), test_mask.numpy().sum(), ) ) g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) n_edges = g.number_of_edges() # create model heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] model = GAT( g, args.num_layers, num_feats, args.num_hidden, n_classes, heads, tf.nn.elu, args.in_drop, args.attn_drop, args.negative_slope, args.residual, ) print(model) if args.early_stop: stopper = EarlyStopping(patience=100) # loss_fcn = tf.keras.losses.SparseCategoricalCrossentropy( # from_logits=False) loss_fcn = tf.nn.sparse_softmax_cross_entropy_with_logits # use optimizer optimizer = tf.keras.optimizers.Adam( learning_rate=args.lr, epsilon=1e-8 ) # initialize graph dur = [] for epoch in range(args.epochs): if epoch >= 3: t0 = time.time() # forward with tf.GradientTape() as tape: tape.watch(model.trainable_weights) logits = model(features, training=True) loss_value = tf.reduce_mean( loss_fcn( labels=labels[train_mask], logits=logits[train_mask] ) ) # Manually Weight Decay # We found Tensorflow has a different implementation on weight decay # of Adam(W) optimizer with PyTorch. And this results in worse results. # Manually adding weights to the loss to do weight decay solves this problem. for weight in model.trainable_weights: loss_value = loss_value + args.weight_decay * tf.nn.l2_loss( weight ) grads = tape.gradient(loss_value, model.trainable_weights) optimizer.apply_gradients(zip(grads, model.trainable_weights)) if epoch >= 3: dur.append(time.time() - t0) train_acc = accuracy(logits[train_mask], labels[train_mask]) if args.fastmode: val_acc = accuracy(logits[val_mask], labels[val_mask]) else: val_acc = evaluate(model, features, labels, val_mask) if args.early_stop: if stopper.step(val_acc, model): break print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainAcc {:.4f} |" " ValAcc {:.4f} | ETputs(KTEPS) {:.2f}".format( epoch, np.mean(dur), loss_value.numpy().item(), train_acc, val_acc, n_edges / np.mean(dur) / 1000, ) ) print() if args.early_stop: model.load_weights("es_checkpoint.pb") acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc)) if __name__ == "__main__": parser = argparse.ArgumentParser(description="GAT") register_data_args(parser) parser.add_argument( "--gpu", type=int, default=-1, help="which GPU to use. Set -1 to use CPU.", ) parser.add_argument( "--epochs", type=int, default=200, help="number of training epochs" ) parser.add_argument( "--num-heads", type=int, default=8, help="number of hidden attention heads", ) parser.add_argument( "--num-out-heads", type=int, default=1, help="number of output attention heads", ) parser.add_argument( "--num-layers", type=int, default=1, help="number of hidden layers" ) parser.add_argument( "--num-hidden", type=int, default=8, help="number of hidden units" ) parser.add_argument( "--residual", action="store_true", default=False, help="use residual connection", ) parser.add_argument( "--in-drop", type=float, default=0.6, help="input feature dropout" ) parser.add_argument( "--attn-drop", type=float, default=0.6, help="attention dropout" ) parser.add_argument("--lr", type=float, default=0.005, help="learning rate") parser.add_argument( "--weight-decay", type=float, default=5e-4, help="weight decay" ) parser.add_argument( "--negative-slope", type=float, default=0.2, help="the negative slope of leaky relu", ) parser.add_argument( "--early-stop", action="store_true", default=False, help="indicates whether to use early stop or not", ) parser.add_argument( "--fastmode", action="store_true", default=False, help="skip re-evaluate the validation set", ) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/tensorflow/gat/utils.py ================================================ import numpy as np class EarlyStopping: def __init__(self, patience=10): self.patience = patience self.counter = 0 self.best_score = None self.early_stop = False def step(self, acc, model): score = acc if self.best_score is None: self.best_score = score self.save_checkpoint(model) elif score < self.best_score: self.counter += 1 print( f"EarlyStopping counter: {self.counter} out of {self.patience}" ) if self.counter >= self.patience: self.early_stop = True else: self.best_score = score self.save_checkpoint(model) self.counter = 0 return self.early_stop def save_checkpoint(self, model): """Saves model when validation loss decrease.""" model.save_weights("es_checkpoint.pb") ================================================ FILE: examples/tensorflow/gcn/README.md ================================================ Graph Convolutional Networks (GCN) ============ - Paper link: [https://arxiv.org/abs/1609.02907](https://arxiv.org/abs/1609.02907) - Author's code repo: [https://github.com/tkipf/gcn](https://github.com/tkipf/gcn). Note that the original code is implemented with Tensorflow for the paper. Dependencies ------------ - Tensorflow 2.1+ - requests ``bash pip install tensorflow requests export DGLBACKEND=tensorflow `` Codes ----- The folder contains three implementations of GCN: - `gcn.py` uses DGL's predefined graph convolution module. - `gcn_mp.py` uses user-defined message and reduce functions. - `gcn_builtin.py` improves from `gcn_mp.py` by using DGL's builtin functions so SPMV optimization could be applied. Results ------- Run with following (available dataset: "cora", "citeseer", "pubmed") ```bash python3 train.py --dataset cora --gpu 0 --self-loop ``` * cora: ~0.810 (0.79-0.83) (paper: 0.815) * citeseer: 0.707 (paper: 0.703) * pubmed: 0.792 (paper: 0.790) ================================================ FILE: examples/tensorflow/gcn/gcn.py ================================================ """GCN using DGL nn package References: - Semi-Supervised Classification with Graph Convolutional Networks - Paper: https://arxiv.org/abs/1609.02907 - Code: https://github.com/tkipf/gcn """ import tensorflow as tf from dgl.nn.tensorflow import GraphConv from tensorflow.keras import layers class GCN(tf.keras.Model): def __init__( self, g, in_feats, n_hidden, n_classes, n_layers, activation, dropout ): super(GCN, self).__init__() self.g = g self.layer_list = [] # input layer self.layer_list.append( GraphConv(in_feats, n_hidden, activation=activation) ) # hidden layers for i in range(n_layers - 1): self.layer_list.append( GraphConv(n_hidden, n_hidden, activation=activation) ) # output layer self.layer_list.append(GraphConv(n_hidden, n_classes)) self.dropout = layers.Dropout(dropout) def call(self, features): h = features for i, layer in enumerate(self.layer_list): if i != 0: h = self.dropout(h) h = layer(self.g, h) return h ================================================ FILE: examples/tensorflow/gcn/gcn_builtin.py ================================================ import argparse import math import time import dgl import dgl.function as fn import networkx as nx import numpy as np import tensorflow as tf from dgl.data import ( CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset, register_data_args, ) from tensorflow.keras import layers class GCNLayer(layers.Layer): def __init__(self, g, in_feats, out_feats, activation, dropout, bias=True): super(GCNLayer, self).__init__() self.g = g w_init = tf.keras.initializers.VarianceScaling( scale=1.0, mode="fan_out", distribution="uniform" ) self.weight = tf.Variable( initial_value=w_init(shape=(in_feats, out_feats), dtype="float32"), trainable=True, ) if dropout: self.dropout = layers.Dropout(rate=dropout) else: self.dropout = 0.0 if bias: b_init = tf.zeros_initializer() self.bias = tf.Variable( initial_value=b_init(shape=(out_feats,), dtype="float32"), trainable=True, ) else: self.bias = None self.activation = activation def call(self, h): if self.dropout: h = self.dropout(h) self.g.ndata["h"] = tf.matmul(h, self.weight) self.g.ndata["norm_h"] = self.g.ndata["h"] * self.g.ndata["norm"] self.g.update_all(fn.copy_u("norm_h", "m"), fn.sum("m", "h")) h = self.g.ndata["h"] if self.bias is not None: h = h + self.bias if self.activation: h = self.activation(h) return h class GCN(layers.Layer): def __init__( self, g, in_feats, n_hidden, n_classes, n_layers, activation, dropout ): super(GCN, self).__init__() self.layers = [] # input layer self.layers.append(GCNLayer(g, in_feats, n_hidden, activation, dropout)) # hidden layers for i in range(n_layers - 1): self.layers.append( GCNLayer(g, n_hidden, n_hidden, activation, dropout) ) # output layer self.layers.append(GCNLayer(g, n_hidden, n_classes, None, dropout)) def call(self, features): h = features for layer in self.layers: h = layer(h) return h def evaluate(model, features, labels, mask): logits = model(features, training=False) logits = logits[mask] labels = labels[mask] indices = tf.math.argmax(logits, axis=1) acc = tf.reduce_mean(tf.cast(indices == labels, dtype=tf.float32)) return acc.numpy().item() def main(args): # load and preprocess dataset if args.dataset == "cora": data = CoraGraphDataset() elif args.dataset == "citeseer": data = CiteseerGraphDataset() elif args.dataset == "pubmed": data = PubmedGraphDataset() else: raise ValueError("Unknown dataset: {}".format(args.dataset)) g = data[0] if args.gpu < 0: device = "/cpu:0" else: device = "/gpu:{}".format(args.gpu) g = g.to(device) with tf.device(device): features = g.ndata["feat"] labels = g.ndata["label"] train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] in_feats = features.shape[1] n_classes = data.num_classes n_edges = data.graph.number_of_edges() print( """----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % ( n_edges, n_classes, train_mask.numpy().sum(), val_mask.numpy().sum(), test_mask.numpy().sum(), ) ) # add self loop g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) n_edges = g.number_of_edges() # # normalization degs = tf.cast(tf.identity(g.in_degrees()), dtype=tf.float32) norm = tf.math.pow(degs, -0.5) norm = tf.where(tf.math.is_inf(norm), tf.zeros_like(norm), norm) g.ndata["norm"] = tf.expand_dims(norm, -1) # create GCN model model = GCN( g, in_feats, args.n_hidden, n_classes, args.n_layers, tf.nn.relu, args.dropout, ) optimizer = tf.keras.optimizers.Adam(learning_rate=args.lr) loss_fcn = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True ) # initialize graph dur = [] for epoch in range(args.n_epochs): if epoch >= 3: t0 = time.time() # forward with tf.GradientTape() as tape: logits = model(features) loss_value = loss_fcn(labels[train_mask], logits[train_mask]) # Manually Weight Decay # We found Tensorflow has a different implementation on weight decay # of Adam(W) optimizer with PyTorch. And this results in worse results. # Manually adding weights to the loss to do weight decay solves this problem. for weight in model.trainable_weights: loss_value = loss_value + args.weight_decay * tf.nn.l2_loss( weight ) grads = tape.gradient(loss_value, model.trainable_weights) optimizer.apply_gradients(zip(grads, model.trainable_weights)) if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model, features, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format( epoch, np.mean(dur), loss_value.numpy().item(), acc, n_edges / np.mean(dur) / 1000, ) ) acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc)) if __name__ == "__main__": parser = argparse.ArgumentParser(description="GCN") register_data_args(parser) parser.add_argument( "--dropout", type=float, default=0.5, help="dropout probability" ) parser.add_argument("--gpu", type=int, default=-1, help="gpu") parser.add_argument("--lr", type=float, default=1e-2, help="learning rate") parser.add_argument( "--n-epochs", type=int, default=200, help="number of training epochs" ) parser.add_argument( "--n-hidden", type=int, default=16, help="number of hidden gcn units" ) parser.add_argument( "--n-layers", type=int, default=1, help="number of hidden gcn layers" ) parser.add_argument( "--weight-decay", type=float, default=5e-4, help="Weight for L2 loss" ) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/tensorflow/gcn/gcn_mp.py ================================================ import argparse import math import time import dgl import networkx as nx import numpy as np import tensorflow as tf from dgl.data import ( CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset, register_data_args, ) from tensorflow.keras import layers def gcn_msg(edge): msg = edge.src["h"] * edge.src["norm"] return {"m": msg} def gcn_reduce(node): accum = tf.reduce_sum(node.mailbox["m"], 1) * node.data["norm"] return {"h": accum} class GCNLayer(layers.Layer): def __init__(self, g, in_feats, out_feats, activation, dropout, bias=True): super(GCNLayer, self).__init__() self.g = g w_init = tf.random_normal_initializer() self.weight = tf.Variable( initial_value=w_init(shape=(in_feats, out_feats), dtype="float32"), trainable=True, ) if dropout: self.dropout = layers.Dropout(rate=dropout) else: self.dropout = 0.0 if bias: b_init = tf.zeros_initializer() self.bias = tf.Variable( initial_value=b_init(shape=(out_feats,), dtype="float32"), trainable=True, ) else: self.bias = None self.activation = activation def call(self, h): if self.dropout: h = self.dropout(h) self.g.ndata["h"] = tf.matmul(h, self.weight) self.g.update_all(gcn_msg, gcn_reduce) h = self.g.ndata["h"] if self.bias is not None: h = h + self.bias if self.activation: h = self.activation(h) return h class GCN(layers.Layer): def __init__( self, g, in_feats, n_hidden, n_classes, n_layers, activation, dropout ): super(GCN, self).__init__() self.layers = [] # input layer self.layers.append(GCNLayer(g, in_feats, n_hidden, activation, dropout)) # hidden layers for i in range(n_layers - 1): self.layers.append( GCNLayer(g, n_hidden, n_hidden, activation, dropout) ) # output layer self.layers.append(GCNLayer(g, n_hidden, n_classes, None, dropout)) def call(self, features): h = features for layer in self.layers: h = layer(h) return h def evaluate(model, features, labels, mask): logits = model(features, training=False) logits = logits[mask] labels = labels[mask] indices = tf.math.argmax(logits, axis=1) acc = tf.reduce_mean(tf.cast(indices == labels, dtype=tf.float32)) return acc.numpy().item() def main(args): # load and preprocess dataset if args.dataset == "cora": data = CoraGraphDataset() elif args.dataset == "citeseer": data = CiteseerGraphDataset() elif args.dataset == "pubmed": data = PubmedGraphDataset() else: raise ValueError("Unknown dataset: {}".format(args.dataset)) g = data[0] if args.gpu < 0: device = "/cpu:0" else: device = "/gpu:{}".format(args.gpu) g = g.to(device) with tf.device(device): features = g.ndata["feat"] labels = g.ndata["label"] train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] in_feats = features.shape[1] n_classes = data.num_classes n_edges = data.graph.number_of_edges() print( """----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % ( n_edges, n_classes, train_mask.numpy().sum(), val_mask.numpy().sum(), test_mask.numpy().sum(), ) ) # add self loop if args.self_loop: g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) n_edges = g.number_of_edges() n_edges = g.number_of_edges() # # normalization degs = tf.cast(tf.identity(g.in_degrees()), dtype=tf.float32) norm = tf.math.pow(degs, -0.5) norm = tf.where(tf.math.is_inf(norm), tf.zeros_like(norm), norm) g.ndata["norm"] = tf.expand_dims(norm, -1) # create GCN model model = GCN( g, in_feats, args.n_hidden, n_classes, args.n_layers, tf.nn.relu, args.dropout, ) optimizer = tf.keras.optimizers.Adam(learning_rate=args.lr) loss_fcn = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True ) # initialize graph dur = [] for epoch in range(args.n_epochs): if epoch >= 3: t0 = time.time() # forward with tf.GradientTape() as tape: logits = model(features) loss_value = loss_fcn(labels[train_mask], logits[train_mask]) # Manually Weight Decay # We found Tensorflow has a different implementation on weight decay # of Adam(W) optimizer with PyTorch. And this results in worse results. # Manually adding weights to the loss to do weight decay solves this problem. for weight in model.trainable_weights: loss_value = loss_value + args.weight_decay * tf.nn.l2_loss( weight ) grads = tape.gradient(loss_value, model.trainable_weights) optimizer.apply_gradients(zip(grads, model.trainable_weights)) if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model, features, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format( epoch, np.mean(dur), loss_value.numpy().item(), acc, n_edges / np.mean(dur) / 1000, ) ) acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc)) if __name__ == "__main__": parser = argparse.ArgumentParser(description="GCN") register_data_args(parser) parser.add_argument( "--dropout", type=float, default=0.5, help="dropout probability" ) parser.add_argument("--gpu", type=int, default=-1, help="gpu") parser.add_argument("--lr", type=float, default=1e-2, help="learning rate") parser.add_argument( "--n-epochs", type=int, default=200, help="number of training epochs" ) parser.add_argument( "--n-hidden", type=int, default=16, help="number of hidden gcn units" ) parser.add_argument( "--n-layers", type=int, default=1, help="number of hidden gcn layers" ) parser.add_argument( "--weight-decay", type=float, default=5e-4, help="Weight for L2 loss" ) parser.add_argument( "--self-loop", action="store_true", help="graph self-loop (default=False)", ) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/tensorflow/gcn/train.py ================================================ import argparse import time import dgl import numpy as np import tensorflow as tf from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset from gcn import GCN def evaluate(model, features, labels, mask): logits = model(features, training=False) logits = logits[mask] labels = labels[mask] indices = tf.math.argmax(logits, axis=1) acc = tf.reduce_mean(tf.cast(indices == labels, dtype=tf.float32)) return acc.numpy().item() def main(args): # load and preprocess dataset if args.dataset == "cora": data = CoraGraphDataset() elif args.dataset == "citeseer": data = CiteseerGraphDataset() elif args.dataset == "pubmed": data = PubmedGraphDataset() else: raise ValueError("Unknown dataset: {}".format(args.dataset)) g = data[0] if args.gpu < 0: device = "/cpu:0" else: device = "/gpu:{}".format(args.gpu) g = g.to(device) with tf.device(device): features = g.ndata["feat"] labels = g.ndata["label"] train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] in_feats = features.shape[1] n_classes = data.num_classes n_edges = g.number_of_edges() print( """----Data statistics------' #Edges %d #Classes %d #Train samples %d #Val samples %d #Test samples %d""" % ( n_edges, n_classes, train_mask.numpy().sum(), val_mask.numpy().sum(), test_mask.numpy().sum(), ) ) # add self loop if args.self_loop: g = dgl.remove_self_loop(g) g = dgl.add_self_loop(g) n_edges = g.number_of_edges() # normalization degs = tf.cast(tf.identity(g.in_degrees()), dtype=tf.float32) norm = tf.math.pow(degs, -0.5) norm = tf.where(tf.math.is_inf(norm), tf.zeros_like(norm), norm) g.ndata["norm"] = tf.expand_dims(norm, -1) # create GCN model model = GCN( g, in_feats, args.n_hidden, n_classes, args.n_layers, tf.nn.relu, args.dropout, ) loss_fcn = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True ) # use optimizer optimizer = tf.keras.optimizers.Adam( learning_rate=args.lr, epsilon=1e-8 ) # initialize graph dur = [] for epoch in range(args.n_epochs): if epoch >= 3: t0 = time.time() # forward with tf.GradientTape() as tape: logits = model(features) loss_value = loss_fcn(labels[train_mask], logits[train_mask]) # Manually Weight Decay # We found Tensorflow has a different implementation on weight decay # of Adam(W) optimizer with PyTorch. And this results in worse results. # Manually adding weights to the loss to do weight decay solves this problem. for weight in model.trainable_weights: loss_value = loss_value + args.weight_decay * tf.nn.l2_loss( weight ) grads = tape.gradient(loss_value, model.trainable_weights) optimizer.apply_gradients(zip(grads, model.trainable_weights)) if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(model, features, labels, val_mask) print( "Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | " "ETputs(KTEPS) {:.2f}".format( epoch, np.mean(dur), loss_value.numpy().item(), acc, n_edges / np.mean(dur) / 1000, ) ) acc = evaluate(model, features, labels, test_mask) print("Test Accuracy {:.4f}".format(acc)) if __name__ == "__main__": parser = argparse.ArgumentParser(description="GCN") parser.add_argument( "--dataset", type=str, default="cora", help="Dataset name ('cora', 'citeseer', 'pubmed').", ) parser.add_argument( "--dropout", type=float, default=0.5, help="dropout probability" ) parser.add_argument("--gpu", type=int, default=-1, help="gpu") parser.add_argument("--lr", type=float, default=1e-2, help="learning rate") parser.add_argument( "--n-epochs", type=int, default=200, help="number of training epochs" ) parser.add_argument( "--n-hidden", type=int, default=16, help="number of hidden gcn units" ) parser.add_argument( "--n-layers", type=int, default=1, help="number of hidden gcn layers" ) parser.add_argument( "--weight-decay", type=float, default=5e-4, help="Weight for L2 loss" ) parser.add_argument( "--self-loop", action="store_true", help="graph self-loop (default=False)", ) parser.set_defaults(self_loop=False) args = parser.parse_args() print(args) main(args) ================================================ FILE: examples/tensorflow/rgcn/README.md ================================================ # Relational-GCN * Paper: [https://arxiv.org/abs/1703.06103](https://arxiv.org/abs/1703.06103) * Author's code for entity classification: [https://github.com/tkipf/relational-gcn](https://github.com/tkipf/relational-gcn) * Author's code for link prediction: [https://github.com/MichSchli/RelationPrediction](https://github.com/MichSchli/RelationPrediction) ### Dependencies * Tensorflow 2.2+ * requests * rdflib * pandas ``` pip install requests tensorflow rdflib pandas export DGLBACKEND=tensorflow ``` Example code was tested with rdflib 4.2.2 and pandas 0.23.4 ### Entity Classification AIFB: accuracy 92.78% (5 runs, DGL), 95.83% (paper) ``` python3 entity_classify.py -d aifb --testing --gpu 0 ``` MUTAG: accuracy 71.47% (5 runs, DGL), 73.23% (paper) ``` python3 entity_classify.py -d mutag --l2norm 5e-4 --n-bases 30 --testing --gpu 0 ``` BGS: accuracy 93.10% (5 runs, DGL n-base=25), 83.10% (paper n-base=40) ``` python3 entity_classify.py -d bgs --l2norm 5e-4 --n-bases 25 --testing --gpu 0 ``` ================================================ FILE: examples/tensorflow/rgcn/entity_classify.py ================================================ """ Modeling Relational Data with Graph Convolutional Networks Paper: https://arxiv.org/abs/1703.06103 Code: https://github.com/tkipf/relational-gcn Difference compared to tkipf/relation-gcn * l2norm applied to all weights * remove nodes that won't be touched """ import argparse import time from functools import partial import dgl import numpy as np import tensorflow as tf from dgl.data.rdf import AIFBDataset, AMDataset, BGSDataset, MUTAGDataset from dgl.nn.tensorflow import RelGraphConv from model import BaseRGCN from tensorflow.keras import layers class EntityClassify(BaseRGCN): def create_features(self): features = tf.range(self.num_nodes) return features def build_input_layer(self): return RelGraphConv( self.num_nodes, self.h_dim, self.num_rels, "basis", self.num_bases, activation=tf.nn.relu, self_loop=self.use_self_loop, dropout=self.dropout, ) def build_hidden_layer(self, idx): return RelGraphConv( self.h_dim, self.h_dim, self.num_rels, "basis", self.num_bases, activation=tf.nn.relu, self_loop=self.use_self_loop, dropout=self.dropout, ) def build_output_layer(self): return RelGraphConv( self.h_dim, self.out_dim, self.num_rels, "basis", self.num_bases, activation=partial(tf.nn.softmax, axis=1), self_loop=self.use_self_loop, ) def acc(logits, labels, mask): logits = tf.gather(logits, mask) labels = tf.gather(labels, mask) indices = tf.math.argmax(logits, axis=1) acc = tf.reduce_mean(tf.cast(indices == labels, dtype=tf.float32)) return acc def main(args): # load graph data if args.dataset == "aifb": dataset = AIFBDataset() elif args.dataset == "mutag": dataset = MUTAGDataset() elif args.dataset == "bgs": dataset = BGSDataset() elif args.dataset == "am": dataset = AMDataset() else: raise ValueError() # preprocessing in cpu with tf.device("/cpu:0"): # Load from hetero-graph hg = dataset[0] num_rels = len(hg.canonical_etypes) category = dataset.predict_category num_classes = dataset.num_classes train_mask = hg.nodes[category].data.pop("train_mask") test_mask = hg.nodes[category].data.pop("test_mask") train_idx = tf.squeeze(tf.where(train_mask)) test_idx = tf.squeeze(tf.where(test_mask)) labels = hg.nodes[category].data.pop("labels") # split dataset into train, validate, test if args.validation: val_idx = train_idx[: len(train_idx) // 5] train_idx = train_idx[len(train_idx) // 5 :] else: val_idx = train_idx # calculate norm for each edge type and store in edge for canonical_etype in hg.canonical_etypes: u, v, eid = hg.all_edges(form="all", etype=canonical_etype) _, inverse_index, count = tf.unique_with_counts(v) degrees = tf.gather(count, inverse_index) norm = tf.ones(eid.shape[0]) / tf.cast(degrees, tf.float32) norm = tf.expand_dims(norm, 1) hg.edges[canonical_etype].data["norm"] = norm # get target category id category_id = len(hg.ntypes) for i, ntype in enumerate(hg.ntypes): if ntype == category: category_id = i # edge type and normalization factor g = dgl.to_homogeneous(hg, edata=["norm"]) # check cuda if args.gpu < 0: device = "/cpu:0" use_cuda = False else: device = "/gpu:{}".format(args.gpu) g = g.to(device) use_cuda = True num_nodes = g.number_of_nodes() node_ids = tf.range(num_nodes, dtype=tf.int64) edge_norm = g.edata["norm"] edge_type = tf.cast(g.edata[dgl.ETYPE], tf.int64) # find out the target node ids in g node_tids = g.ndata[dgl.NTYPE] loc = node_tids == category_id target_idx = tf.squeeze(tf.where(loc)) # since the nodes are featureless, the input feature is then the node id. feats = tf.range(num_nodes, dtype=tf.int64) with tf.device(device): # create model model = EntityClassify( num_nodes, args.n_hidden, num_classes, num_rels, num_bases=args.n_bases, num_hidden_layers=args.n_layers - 2, dropout=args.dropout, use_self_loop=args.use_self_loop, use_cuda=use_cuda, ) # optimizer optimizer = tf.keras.optimizers.Adam(learning_rate=args.lr) # training loop print("start training...") forward_time = [] backward_time = [] loss_fcn = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=False ) for epoch in range(args.n_epochs): t0 = time.time() with tf.GradientTape() as tape: logits = model(g, feats, edge_type, edge_norm) logits = tf.gather(logits, target_idx) loss = loss_fcn( tf.gather(labels, train_idx), tf.gather(logits, train_idx) ) # Manually Weight Decay # We found Tensorflow has a different implementation on weight decay # of Adam(W) optimizer with PyTorch. And this results in worse results. # Manually adding weights to the loss to do weight decay solves this problem. for weight in model.trainable_weights: loss = loss + args.l2norm * tf.nn.l2_loss(weight) t1 = time.time() grads = tape.gradient(loss, model.trainable_weights) optimizer.apply_gradients(zip(grads, model.trainable_weights)) t2 = time.time() forward_time.append(t1 - t0) backward_time.append(t2 - t1) print( "Epoch {:05d} | Train Forward Time(s) {:.4f} | Backward Time(s) {:.4f}".format( epoch, forward_time[-1], backward_time[-1] ) ) train_acc = acc(logits, labels, train_idx) val_loss = loss_fcn( tf.gather(labels, val_idx), tf.gather(logits, val_idx) ) val_acc = acc(logits, labels, val_idx) print( "Train Accuracy: {:.4f} | Train Loss: {:.4f} | Validation Accuracy: {:.4f} | Validation loss: {:.4f}".format( train_acc, loss.numpy().item(), val_acc, val_loss.numpy().item(), ) ) print() logits = model(g, feats, edge_type, edge_norm) logits = tf.gather(logits, target_idx) test_loss = loss_fcn( tf.gather(labels, test_idx), tf.gather(logits, test_idx) ) test_acc = acc(logits, labels, test_idx) print( "Test Accuracy: {:.4f} | Test loss: {:.4f}".format( test_acc, test_loss.numpy().item() ) ) print() print( "Mean forward time: {:4f}".format( np.mean(forward_time[len(forward_time) // 4 :]) ) ) print( "Mean backward time: {:4f}".format( np.mean(backward_time[len(backward_time) // 4 :]) ) ) if __name__ == "__main__": parser = argparse.ArgumentParser(description="RGCN") parser.add_argument( "--dropout", type=float, default=0, help="dropout probability" ) parser.add_argument( "--n-hidden", type=int, default=16, help="number of hidden units" ) parser.add_argument("--gpu", type=int, default=-1, help="gpu") parser.add_argument("--lr", type=float, default=1e-2, help="learning rate") parser.add_argument( "--n-bases", type=int, default=-1, help="number of filter weight matrices, default: -1 [use all]", ) parser.add_argument( "--n-layers", type=int, default=2, help="number of propagation rounds" ) parser.add_argument( "-e", "--n-epochs", type=int, default=50, help="number of training epochs", ) parser.add_argument( "-d", "--dataset", type=str, required=True, help="dataset to use" ) parser.add_argument("--l2norm", type=float, default=0, help="l2 norm coef") parser.add_argument( "--use-self-loop", default=False, action="store_true", help="include self feature as a special relation", ) fp = parser.add_mutually_exclusive_group(required=False) fp.add_argument("--validation", dest="validation", action="store_true") fp.add_argument("--testing", dest="validation", action="store_false") parser.set_defaults(validation=True) args = parser.parse_args() print(args) args.bfs_level = args.n_layers + 1 # pruning used nodes for memory main(args) ================================================ FILE: examples/tensorflow/rgcn/model.py ================================================ import tensorflow as tf from tensorflow.keras import layers class BaseRGCN(layers.Layer): def __init__( self, num_nodes, h_dim, out_dim, num_rels, num_bases, num_hidden_layers=1, dropout=0, use_self_loop=False, use_cuda=False, ): super(BaseRGCN, self).__init__() self.num_nodes = num_nodes self.h_dim = h_dim self.out_dim = out_dim self.num_rels = num_rels self.num_bases = None if num_bases < 0 else num_bases self.num_hidden_layers = num_hidden_layers self.dropout = dropout self.use_self_loop = use_self_loop self.use_cuda = use_cuda # create rgcn layers self.build_model() def build_model(self): self.layers = [] # i2h i2h = self.build_input_layer() if i2h is not None: self.layers.append(i2h) # h2h for idx in range(self.num_hidden_layers): h2h = self.build_hidden_layer(idx) self.layers.append(h2h) # h2o h2o = self.build_output_layer() if h2o is not None: self.layers.append(h2o) def build_input_layer(self): return None def build_hidden_layer(self, idx): raise NotImplementedError def build_output_layer(self): return None def call(self, g, h, r, norm): for layer in self.layers: h = layer(g, h, r, norm) return h ================================================ FILE: examples/tensorflow/rgcn/utils.py ================================================ """ Utility functions for link prediction Most code is adapted from authors' implementation of RGCN link prediction: https://github.com/MichSchli/RelationPrediction """ import dgl import numpy as np import tensorflow as tf ####################################################################### # # Utility function for building training and testing graphs # ####################################################################### def get_adj_and_degrees(num_nodes, triplets): """Get adjacency list and degrees of the graph""" adj_list = [[] for _ in range(num_nodes)] for i, triplet in enumerate(triplets): adj_list[triplet[0]].append([i, triplet[2]]) adj_list[triplet[2]].append([i, triplet[0]]) degrees = np.array([len(a) for a in adj_list]) adj_list = [np.array(a) for a in adj_list] return adj_list, degrees def sample_edge_neighborhood(adj_list, degrees, n_triplets, sample_size): """Sample edges by neighborhool expansion. This guarantees that the sampled edges form a connected graph, which may help deeper GNNs that require information from more than one hop. """ edges = np.zeros((sample_size), dtype=np.int32) # initialize sample_counts = np.array([d for d in degrees]) picked = np.array([False for _ in range(n_triplets)]) seen = np.array([False for _ in degrees]) for i in range(0, sample_size): weights = sample_counts * seen if np.sum(weights) == 0: weights = np.ones_like(weights) weights[np.where(sample_counts == 0)] = 0 probabilities = (weights) / np.sum(weights) chosen_vertex = np.random.choice( np.arange(degrees.shape[0]), p=probabilities ) chosen_adj_list = adj_list[chosen_vertex] seen[chosen_vertex] = True chosen_edge = np.random.choice(np.arange(chosen_adj_list.shape[0])) chosen_edge = chosen_adj_list[chosen_edge] edge_number = chosen_edge[0] while picked[edge_number]: chosen_edge = np.random.choice(np.arange(chosen_adj_list.shape[0])) chosen_edge = chosen_adj_list[chosen_edge] edge_number = chosen_edge[0] edges[i] = edge_number other_vertex = chosen_edge[1] picked[edge_number] = True sample_counts[chosen_vertex] -= 1 sample_counts[other_vertex] -= 1 seen[other_vertex] = True return edges def sample_edge_uniform(adj_list, degrees, n_triplets, sample_size): """Sample edges uniformly from all the edges.""" all_edges = np.arange(n_triplets) return np.random.choice(all_edges, sample_size, replace=False) def generate_sampled_graph_and_labels( triplets, sample_size, split_size, num_rels, adj_list, degrees, negative_rate, sampler="uniform", ): """Get training graph and signals First perform edge neighborhood sampling on graph, then perform negative sampling to generate negative samples """ # perform edge neighbor sampling if sampler == "uniform": edges = sample_edge_uniform( adj_list, degrees, len(triplets), sample_size ) elif sampler == "neighbor": edges = sample_edge_neighborhood( adj_list, degrees, len(triplets), sample_size ) else: raise ValueError("Sampler type must be either 'uniform' or 'neighbor'.") # relabel nodes to have consecutive node ids edges = triplets[edges] src, rel, dst = edges.transpose() uniq_v, edges = np.unique((src, dst), return_inverse=True) src, dst = np.reshape(edges, (2, -1)) relabeled_edges = np.stack((src, rel, dst)).transpose() # negative sampling samples, labels = negative_sampling( relabeled_edges, len(uniq_v), negative_rate ) # further split graph, only half of the edges will be used as graph # structure, while the rest half is used as unseen positive samples split_size = int(sample_size * split_size) graph_split_ids = np.random.choice( np.arange(sample_size), size=split_size, replace=False ) src = src[graph_split_ids] dst = dst[graph_split_ids] rel = rel[graph_split_ids] # build DGL graph print("# sampled nodes: {}".format(len(uniq_v))) print("# sampled edges: {}".format(len(src) * 2)) g, rel, norm = build_graph_from_triplets( len(uniq_v), num_rels, (src, rel, dst) ) return g, uniq_v, rel, norm, samples, labels def comp_deg_norm(g): g = g.local_var() in_deg = g.in_degrees(range(g.number_of_nodes())).float().numpy() norm = 1.0 / in_deg norm[np.isinf(norm)] = 0 return norm def build_graph_from_triplets(num_nodes, num_rels, triplets): """Create a DGL graph. The graph is bidirectional because RGCN authors use reversed relations. This function also generates edge type and normalization factor (reciprocal of node incoming degree) """ g = dgl.DGLGraph() g.add_nodes(num_nodes) src, rel, dst = triplets src, dst = np.concatenate((src, dst)), np.concatenate((dst, src)) rel = np.concatenate((rel, rel + num_rels)) edges = sorted(zip(dst, src, rel)) dst, src, rel = np.array(edges).transpose() g.add_edges(src, dst) norm = comp_deg_norm(g) print("# nodes: {}, # edges: {}".format(num_nodes, len(src))) return g, rel, norm def build_test_graph(num_nodes, num_rels, edges): src, rel, dst = edges.transpose() print("Test graph:") return build_graph_from_triplets(num_nodes, num_rels, (src, rel, dst)) def negative_sampling(pos_samples, num_entity, negative_rate): size_of_batch = len(pos_samples) num_to_generate = size_of_batch * negative_rate neg_samples = np.tile(pos_samples, (negative_rate, 1)) labels = np.zeros(size_of_batch * (negative_rate + 1), dtype=np.float32) labels[:size_of_batch] = 1 values = np.random.randint(num_entity, size=num_to_generate) choices = np.random.uniform(size=num_to_generate) subj = choices > 0.5 obj = choices <= 0.5 neg_samples[subj, 0] = values[subj] neg_samples[obj, 2] = values[obj] return np.concatenate((pos_samples, neg_samples)), labels ================================================ FILE: examples/tensorflow/sgc/README.md ================================================ # Simple Graph Convolution (SGC) > Graph Convolutional Networks derive inspiration primarily from recent deep learning approaches, and as a result, may inherit unnecessary complexity and redundant computation. In this paper, we reduce this excess complexity through successively removing nonlinearities and collapsing weight matrices between consecutive layers. We theoretically analyze the resulting linear model and show that it corresponds to a fixed low-pass filter followed by a linear classifier. * [Paper](https://arxiv.org/abs/1902.07153) * [Author Implementation](https://github.com/Tiiiger/SGC) Note: TensorFlow uses a different implementation of weight decay in AdamW to PyTorch. This results in differences in performance. You can see this by manually adding the L2 of the weights to the loss like [this](https://github.com/dmlc/dgl/blob/d696558b0bbcb60f1c4cf68dc93cd22c1077ce06/examples/tensorflow/gcn/train.py#L99) for comparison. ## Requirements This example is tested with TensorFlow 2.3.0. ```bash $ pip install dgl tensorflow tensorflow_addons ``` ## Usage ```bash $ python sgc.py --help usage: sgc.py [-h] [--dataset DATASET] [--lr LR] [--bias] [--n-epochs N_EPOCHS] [--weight-decay WEIGHT_DECAY] Run experiment for Simple Graph Convolution (SGC) optional arguments: -h, --help show this help message and exit --dataset DATASET dataset to run --lr LR learning rate --bias flag to use bias --n-epochs N_EPOCHS number of training epochs --weight-decay WEIGHT_DECAY weight for L2 loss ``` ## Results ```bash # Cora citation network dataset $ python sgc.py --dataset cora --lr 0.2 --n-epochs 100 --weight-decay 5e-6 ... Epoch 100/100 1/1 [==============================] - 0s 40ms/step - loss: 0.0313 - accuracy: 1.0000 - val_loss: 0.7870 - val_accuracy: 0.7620 Test Accuracy: 77.2% # Citeseer citation network dataset $ python sgc.py --dataset citeseer --lr 0.2 --n-epochs 150 --bias --weight-decay 5e-5 ... Epoch 150/150 1/1 [==============================] - 0s 65ms/step - loss: 0.0160 - accuracy: 1.0000 - val_loss: 1.1021 - val_accuracy: 0.6420 Test Accuracy: 63.9% # Pubmed citation network dataset $ python sgc.py --dataset pubmed --lr 0.2 --n-epochs 100 --bias --weight-decay 5e-5 ... Epoch 100/100 1/1 [==============================] - 0s 52ms/step - loss: 0.0421 - accuracy: 1.0000 - val_loss: 0.5862 - val_accuracy: 0.7680 Test Accuracy: 76.3% ``` | Dataset | Accuracy | Paper | |----------|----------|-------| | Cora | 77.3% | 81.0% | | Citeseer | 63.9% | 71.9% | | Pubmed | 76.4% | 78.9% | ================================================ FILE: examples/tensorflow/sgc/sgc.py ================================================ """ This code was modified from implementations of SGC in other backends. Simplifying Graph Convolutional Networks (Wu, Zhang and Souza et al, 2019) Paper: https://arxiv.org/abs/1902.07153 Author Implementation: https://github.com/Tiiiger/SGC SGC implementation in DGL. """ import argparse import textwrap import tensorflow as tf import tensorflow_addons as tfa from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset from dgl.nn.tensorflow.conv import SGConv _DATASETS = { "citeseer": CiteseerGraphDataset(verbose=False), "cora": CoraGraphDataset(verbose=False), "pubmed": PubmedGraphDataset(verbose=False), } def load_data(dataset): return _DATASETS[dataset] def _sum_boolean_tensor(x): return tf.reduce_sum(tf.cast(x, dtype="int64")) def describe_data(data): g = data[0] n_edges = g.number_of_edges() num_classes = data.num_classes train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] description = textwrap.dedent( f""" ----Data statistics---- Edges {n_edges:,.0f} Classes {num_classes:,.0f} Train samples {_sum_boolean_tensor(train_mask):,.0f} Val samples {_sum_boolean_tensor(val_mask):,.0f} Test samples {_sum_boolean_tensor(test_mask):,.0f} """ ) return description class SGC(tf.keras.Model): def __init__(self, g, num_classes, bias=False): super().__init__() self.num_classes = num_classes self.g = self.ensure_self_loop(g) self.conv = SGConv( in_feats=self.in_feats, out_feats=self.num_classes, k=2, cached=True, bias=bias, ) def call(self, inputs): return self.conv(self.g, inputs) @property def in_feats(self): return self.g.ndata["feat"].shape[1] @property def num_nodes(self): return self.g.num_nodes() @staticmethod def ensure_self_loop(g): g = g.remove_self_loop() g = g.add_self_loop() return g def train_step(self, data): X, y = data mask = self.g.ndata["train_mask"] with tf.GradientTape() as tape: y_pred = self(X, training=True) loss = self.compiled_loss(y[mask], y_pred[mask]) trainable_variables = self.trainable_variables gradients = tape.gradient(loss, trainable_variables) self.optimizer.apply_gradients(zip(gradients, trainable_variables)) self.compiled_metrics.update_state(y[mask], y_pred[mask]) return {m.name: m.result() for m in self.metrics} def test_step(self, data): X, y = data mask = self.g.ndata["val_mask"] y_pred = self(X, training=False) self.compiled_loss(y[mask], y_pred[mask]) self.compiled_metrics.update_state(y[mask], y_pred[mask]) return {m.name: m.result() for m in self.metrics} def compile(self, *args, **kwargs): super().compile(*args, **kwargs, run_eagerly=True) def fit(self, *args, **kwargs): kwargs["batch_size"] = self.num_nodes kwargs["shuffle"] = False super().fit(*args, **kwargs) def predict(self, *args, **kwargs): kwargs["batch_size"] = self.num_nodes return super().predict(*args, **kwargs) def main(dataset, lr, bias, n_epochs, weight_decay): data = load_data(dataset) print(describe_data(data)) g = data[0] X = g.ndata["feat"] y = g.ndata["label"] model = SGC(g=g, num_classes=data.num_classes, bias=bias) loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True) optimizer = tfa.optimizers.AdamW(weight_decay, lr) accuracy = tf.metrics.SparseCategoricalAccuracy(name="accuracy") model.compile(optimizer, loss, metrics=[accuracy]) model.fit(x=X, y=y, epochs=n_epochs, validation_data=(X, y)) y_pred = model.predict(X, batch_size=len(X)) test_mask = g.ndata["test_mask"] test_accuracy = accuracy(y[test_mask], y_pred[test_mask]) print(f"Test Accuracy: {test_accuracy:.1%}") def _parse_args(): parser = argparse.ArgumentParser( description="Run experiment for Simple Graph Convolution (SGC)" ) parser.add_argument("--dataset", default="cora", help="dataset to run") parser.add_argument("--lr", type=float, default=0.2, help="learning rate") parser.add_argument( "--bias", action="store_true", default=False, help="flag to use bias" ) parser.add_argument( "--n-epochs", type=int, default=100, help="number of training epochs" ) parser.add_argument( "--weight-decay", type=float, default=5e-6, help="weight for L2 loss" ) return parser.parse_args() if __name__ == "__main__": args = _parse_args() main( dataset=args.dataset, lr=args.lr, bias=args.bias, n_epochs=args.n_epochs, weight_decay=args.weight_decay, ) ================================================ FILE: graphbolt/CMakeLists.txt ================================================ cmake_minimum_required(VERSION 3.18) project(graphbolt C CXX) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) if(USE_CUDA) message(STATUS "Build graphbolt with CUDA support") enable_language(CUDA) add_definitions(-DGRAPHBOLT_USE_CUDA) endif() # For windows, define NOMINMAX to avoid conflict with std::min/max if(MSVC) add_definitions(-DNOMINMAX) endif() # Find PyTorch cmake files and PyTorch versions with the python interpreter # $PYTHON_INTERP ("python3" or "python" if empty) if(NOT PYTHON_INTERP) find_program(PYTHON_INTERP NAMES python3 python) endif() message(STATUS "Using Python interpreter: ${PYTHON_INTERP}") file(TO_NATIVE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/find_cmake.py FIND_CMAKE_PY) execute_process( COMMAND ${PYTHON_INTERP} ${FIND_CMAKE_PY} OUTPUT_VARIABLE TORCH_PREFIX_VER OUTPUT_STRIP_TRAILING_WHITESPACE ) message(STATUS "find_cmake.py output: ${TORCH_PREFIX_VER}") list(GET TORCH_PREFIX_VER 0 TORCH_PREFIX) list(GET TORCH_PREFIX_VER 1 TORCH_VER) message(STATUS "Configuring for PyTorch ${TORCH_VER}") string(REPLACE "." ";" TORCH_VERSION_LIST ${TORCH_VER}) set(Torch_DIR "${TORCH_PREFIX}/Torch") message(STATUS "Setting directory to ${Torch_DIR}") find_package(Torch REQUIRED) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${TORCH_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g3 -ggdb") set(LIB_GRAPHBOLT_NAME "graphbolt_pytorch_${TORCH_VER}") option(BUILD_WITH_TASKFLOW "Use taskflow as parallel backend" ON) option(USE_OPENMP "Use OpenMP for graphbolt" ON) option(USE_LIBURING "Build graphbolt with liburing support" ON) set(BOLT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src") set(BOLT_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/include") file(GLOB BOLT_HEADERS ${BOLT_INCLUDE}) file(GLOB BOLT_SRC ${BOLT_DIR}/*.cc) if(USE_CUDA) file(GLOB BOLT_CUDA_SRC ${BOLT_DIR}/cuda/*.cu ${BOLT_DIR}/cuda/*.cc ) list(APPEND BOLT_SRC ${BOLT_CUDA_SRC}) if(DEFINED ENV{CUDAARCHS}) set(CMAKE_CUDA_ARCHITECTURES $ENV{CUDAARCHS}) endif() set(CMAKE_CUDA_ARCHITECTURES_FILTERED ${CMAKE_CUDA_ARCHITECTURES}) # CUDA extension supports only sm_70 and up (Volta+). list(FILTER CMAKE_CUDA_ARCHITECTURES_FILTERED EXCLUDE REGEX "[2-6][0-9]") list(LENGTH CMAKE_CUDA_ARCHITECTURES_FILTERED CMAKE_CUDA_ARCHITECTURES_FILTERED_LEN) if(CMAKE_CUDA_ARCHITECTURES_FILTERED_LEN EQUAL 0) # Build the CUDA extension at least build for Volta. set(CMAKE_CUDA_ARCHITECTURES_FILTERED "70") endif() set(LIB_GRAPHBOLT_CUDA_NAME "${LIB_GRAPHBOLT_NAME}_cuda") endif() add_library(${LIB_GRAPHBOLT_NAME} SHARED ${BOLT_SRC} ${BOLT_HEADERS}) include_directories(BEFORE ${BOLT_DIR} ${BOLT_HEADERS} # For CXX20 features: # `std::atomic_ref`, `std::counting_semaphore` "../third_party/cccl/libcudacxx/include" "../third_party/pcg/include" "../third_party/tsl_robin_map/include") target_link_libraries(${LIB_GRAPHBOLT_NAME} "${TORCH_LIBRARIES}") if(BUILD_WITH_TASKFLOW) target_include_directories(${LIB_GRAPHBOLT_NAME} PRIVATE "../third_party/taskflow") target_compile_definitions(${LIB_GRAPHBOLT_NAME} PRIVATE BUILD_WITH_TASKFLOW=1) endif() if(USE_OPENMP) find_package(OpenMP REQUIRED) target_link_libraries(${LIB_GRAPHBOLT_NAME} OpenMP::OpenMP_CXX) message(STATUS "Build graphbolt with OpenMP.") endif(USE_OPENMP) if(CMAKE_SYSTEM_NAME MATCHES "Linux") if(USE_LIBURING) add_definitions(-DHAVE_LIBRARY_LIBURING) include(ExternalProject) set(LIBURING_INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/third_party/liburing) set(LIBURING_C_COMPILER "${CMAKE_C_COMPILER} -w") ExternalProject_Add( liburing SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/liburing CONFIGURE_COMMAND /configure --cc=${LIBURING_C_COMPILER} --cxx=${CMAKE_CXX_COMPILER} --prefix=/ # In order to avoid the error `error: redefinition of 'struct in6_pktinfo'` on ubi7 # when building examples, let's build src only. BUILD_COMMAND bash -c "make -j 4 -C src/" BUILD_IN_SOURCE ON INSTALL_COMMAND make install DESTDIR=${LIBURING_INSTALL_DIR} BUILD_BYPRODUCTS ${LIBURING_INSTALL_DIR}/lib/liburing.a BUILD_BYPRODUCTS ${LIBURING_INSTALL_DIR}/include DOWNLOAD_EXTRACT_TIMESTAMP true ) set(LIBURING_INCLUDE ${LIBURING_INSTALL_DIR}/include) set(LIBURING ${LIBURING_INSTALL_DIR}/lib/liburing.a) target_include_directories(${LIB_GRAPHBOLT_NAME} PRIVATE ${LIBURING_INCLUDE}) add_dependencies(${LIB_GRAPHBOLT_NAME} liburing) target_link_libraries(${LIB_GRAPHBOLT_NAME} ${CMAKE_CURRENT_BINARY_DIR}/third_party/liburing/lib/liburing.a) message(STATUS "Build graphbolt with liburing.") endif(USE_LIBURING) endif() if(USE_CUDA) file(GLOB BOLT_CUDA_EXTENSION_SRC ${BOLT_DIR}/cuda/extension/*.cu ${BOLT_DIR}/cuda/extension/*.cc ../third_party/HugeCTR/gpu_cache/src/nv_gpu_cache.cu ) # Until https://github.com/NVIDIA/cccl/issues/1083 is resolved, we need to # compile the cuda/extension folder with Volta+ CUDA architectures. add_library(${LIB_GRAPHBOLT_CUDA_NAME} STATIC ${BOLT_CUDA_EXTENSION_SRC} ${BOLT_HEADERS}) target_link_libraries(${LIB_GRAPHBOLT_CUDA_NAME} "${TORCH_LIBRARIES}") set_target_properties(${LIB_GRAPHBOLT_NAME} PROPERTIES CUDA_STANDARD 17) set_target_properties(${LIB_GRAPHBOLT_CUDA_NAME} PROPERTIES CUDA_STANDARD 17) set_target_properties(${LIB_GRAPHBOLT_CUDA_NAME} PROPERTIES CUDA_ARCHITECTURES "${CMAKE_CUDA_ARCHITECTURES_FILTERED}") set_target_properties(${LIB_GRAPHBOLT_CUDA_NAME} PROPERTIES POSITION_INDEPENDENT_CODE TRUE) # Enables libcudacxx for gpu_cache. target_compile_definitions(${LIB_GRAPHBOLT_CUDA_NAME} PRIVATE LIBCUDACXX_VERSION) include_directories(AFTER "../third_party/HugeCTR/gpu_cache/include") message(STATUS "Build graphbolt extension with HugeCTR GPU embedding cache.") message(STATUS "Use external CCCL library for a consistent API and performance for graphbolt.") include_directories(BEFORE "../third_party/cccl/thrust" "../third_party/cccl/cub" "../third_party/cuco/include") get_property(archs TARGET ${LIB_GRAPHBOLT_NAME} PROPERTY CUDA_ARCHITECTURES) message(STATUS "CUDA_ARCHITECTURES for graphbolt: ${archs}") get_property(archs TARGET ${LIB_GRAPHBOLT_CUDA_NAME} PROPERTY CUDA_ARCHITECTURES) message(STATUS "CUDA_ARCHITECTURES for graphbolt extension: ${archs}") target_link_libraries(${LIB_GRAPHBOLT_NAME} ${LIB_GRAPHBOLT_CUDA_NAME}) endif() # The Torch CMake configuration only sets up the path for the MKL library when # using the conda distribution. The following is a workaround to address this # when using a standalone installation of MKL. if(DEFINED MKL_LIBRARIES) target_link_directories(${LIB_GRAPHBOLT_NAME} PRIVATE ${MKL_ROOT}/lib/${MKL_ARCH}) endif() ================================================ FILE: graphbolt/build.bat ================================================ REM Helper script to build Graphbolt libraries for PyTorch @ECHO OFF SETLOCAL EnableDelayedExpansion MD "%BINDIR%\graphbolt" DEL /S /Q build MD build PUSHD build IF x%1x == xx GOTO single FOR %%X IN (%*) DO ( DEL /S /Q * "%CMAKE_COMMAND%" -DGPU_CACHE_BUILD_DIR=%BINDIR% -DCMAKE_CONFIGURATION_TYPES=Release -DPYTHON_INTERP=%%X -DTORCH_CUDA_ARCH_LIST=Volta .. -G "Visual Studio 16 2019" || EXIT /B 1 msbuild graphbolt.sln /m /nr:false || EXIT /B 1 COPY /Y Release\*.dll "%BINDIR%\graphbolt" || EXIT /B 1 ) GOTO end :single DEL /S /Q * "%CMAKE_COMMAND%" -DGPU_CACHE_BUILD_DIR=%BINDIR% -DCMAKE_CONFIGURATION_TYPES=Release -DTORCH_CUDA_ARCH_LIST=Volta .. -G "Visual Studio 16 2019" || EXIT /B 1 msbuild graphbolt.sln /m /nr:false || EXIT /B 1 COPY /Y Release\*.dll "%BINDIR%\graphbolt" || EXIT /B 1 :end POPD ENDLOCAL ================================================ FILE: graphbolt/build.sh ================================================ #!/bin/bash # Helper script to build graphbolt libraries for PyTorch set -e mkdir -p build mkdir -p $BINDIR/graphbolt cd build if [ $(uname) = 'Darwin' ]; then CPSOURCE=*.dylib else CPSOURCE=*.so fi # We build for the same architectures as DGL, thus we hardcode # TORCH_CUDA_ARCH_LIST and we need to at least compile for Volta. Until # https://github.com/NVIDIA/cccl/issues/1083 is resolved, we need to compile the # cuda/extension folder with Volta+ CUDA architectures. TORCH_CUDA_ARCH_LIST="Volta" if ! [[ -z "${CUDAARCHS}" ]]; then # The architecture list is passed as an environment variable, we set # TORCH_CUDA_ARCH_LIST to the latest architecture. CUDAARCHSARR=(${CUDAARCHS//;/ }) LAST_ARCHITECTURE=${CUDAARCHSARR[-1]} # TORCH_CUDA_ARCH_LIST has to be at least 70 to override Volta default. if (( $LAST_ARCHITECTURE >= 70 )); then # Convert "75" to "7.5". TORCH_CUDA_ARCH_LIST=${LAST_ARCHITECTURE:0:-1}'.'${LAST_ARCHITECTURE: -1} fi fi CMAKE_FLAGS="-DCUDA_TOOLKIT_ROOT_DIR=$CUDA_TOOLKIT_ROOT_DIR -DUSE_CUDA=$USE_CUDA -DTORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST" echo "graphbolt cmake flags: $CMAKE_FLAGS" if [ $# -eq 0 ]; then $CMAKE_COMMAND $CMAKE_FLAGS .. make -j cp -v $CPSOURCE $BINDIR/graphbolt else for PYTHON_INTERP in $@; do TORCH_VER=$($PYTHON_INTERP -c 'import torch; print(torch.__version__.split("+")[0])') mkdir -p $TORCH_VER cd $TORCH_VER $CMAKE_COMMAND $CMAKE_FLAGS -DPYTHON_INTERP=$PYTHON_INTERP ../.. make -j cp -v $CPSOURCE $BINDIR/graphbolt cd .. done fi ================================================ FILE: graphbolt/find_cmake.py ================================================ import os import torch cmake_prefix_path = getattr( torch.utils, "cmake_prefix_path", os.path.join(os.path.dirname(torch.__file__), "share", "cmake"), ) version = torch.__version__.split("+")[0] print(";".join([cmake_prefix_path, version])) ================================================ FILE: graphbolt/include/graphbolt/async.h ================================================ /** * Copyright (c) 2024, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file graphbolt/async.h * @brief Provides asynchronous task utilities for GraphBolt. */ #ifndef GRAPHBOLT_ASYNC_H_ #define GRAPHBOLT_ASYNC_H_ #include #include #include #include #include #include #ifdef BUILD_WITH_TASKFLOW #include #include #else #include #include #include #endif #ifdef GRAPHBOLT_USE_CUDA #include #include #include #include #endif namespace graphbolt { enum ThreadPool { intraop, interop }; #ifdef BUILD_WITH_TASKFLOW template inline tf::Executor& _get_thread_pool() { static std::unique_ptr pool; static std::once_flag flag; std::call_once(flag, [&] { const int num_threads = pool_type == ThreadPool::intraop ? torch::get_num_threads() : torch::get_num_interop_threads(); pool = std::make_unique(num_threads); }); return *pool.get(); } inline tf::Executor& intraop_pool() { return _get_thread_pool(); } inline tf::Executor& interop_pool() { return _get_thread_pool(); } inline tf::Executor& get_thread_pool(ThreadPool pool_type) { return pool_type == ThreadPool::intraop ? intraop_pool() : interop_pool(); } #endif // BUILD_WITH_TASKFLOW inline int get_num_threads() { #ifdef BUILD_WITH_TASKFLOW return intraop_pool().num_workers(); #else return torch::get_num_threads(); #endif } inline int get_num_interop_threads() { #ifdef BUILD_WITH_TASKFLOW return interop_pool().num_workers(); #else return torch::get_num_interop_threads(); #endif } template class Future : public torch::CustomClassHolder { #ifdef GRAPHBOLT_USE_CUDA using T_no_event = std::conditional_t, std::monostate, T>; using T_with_event = std::conditional_t< std::is_void_v, at::cuda::CUDAEvent, std::pair>; using future_type = std::future>; #else using future_type = std::future; #endif public: #ifdef GRAPHBOLT_USE_CUDA using return_type = std::variant; #else using return_type = T; #endif Future(future_type&& future) : future_(std::move(future)) {} Future() = default; T Wait() { #ifdef GRAPHBOLT_USE_CUDA auto result = future_.get(); if constexpr (std::is_void_v) { if (std::holds_alternative(result)) { auto&& event = std::get(result); event.block(c10::cuda::getCurrentCUDAStream()); } return; } else if (std::holds_alternative(result)) { auto&& [value, event] = std::get(result); event.block(c10::cuda::getCurrentCUDAStream()); return value; } else { return std::get(result); } #else return future_.get(); #endif } private: future_type future_; }; /** * @brief Utilizes at::launch to launch an async task in the interop thread * pool. We should not make use of any native CPU torch ops inside the launched * task to avoid spawning a new OpenMP threadpool on each interop thread. */ template inline auto async(F&& function, bool is_cuda = false) { using T = decltype(function()); #ifdef GRAPHBOLT_USE_CUDA struct c10::StreamData3 stream_data; if (is_cuda) { stream_data = c10::cuda::getCurrentCUDAStream().pack3(); } #endif using return_type = typename Future::return_type; auto fn = [=, func = std::move(function)]() -> return_type { #ifdef GRAPHBOLT_USE_CUDA // We make sure to use the same CUDA stream as the thread launching the // async operation. if (is_cuda) { auto stream = c10::cuda::CUDAStream::unpack3( stream_data.stream_id, stream_data.device_index, stream_data.device_type); c10::cuda::CUDAStreamGuard guard(stream); at::cuda::CUDAEvent event; // Might be executed on the GPU so we record an event to be able to // synchronize with it later, in case it is executed on an alternative // CUDA stream. if constexpr (std::is_void_v) { func(); event.record(); return event; } else { auto result = func(); event.record(); return std::make_pair(std::move(result), std::move(event)); } } if constexpr (std::is_void_v) { func(); return std::monostate{}; } else { return func(); } #else return func(); #endif }; #ifdef BUILD_WITH_TASKFLOW auto future = interop_pool().async(std::move(fn)); #else auto promise = std::make_shared>(); auto future = promise->get_future(); at::launch([promise, func = std::move(fn)]() { if constexpr (std::is_void_v) { func(); promise->set_value(); } else promise->set_value(func()); }); #endif return c10::make_intrusive>(std::move(future)); } template inline void _parallel_for( const int64_t begin, const int64_t end, const int64_t grain_size, const F& f) { if (begin >= end) return; int64_t num_threads = get_num_threads(); const auto num_iter = end - begin; const bool use_parallel = (num_iter > grain_size && num_iter > 1 && num_threads > 1); if (!use_parallel) { if constexpr (for_each) { for (int64_t i = begin; i < end; i++) f(i); } else { f(begin, end); } return; } if (grain_size > 0) { num_threads = std::min(num_threads, at::divup(end - begin, grain_size)); } int64_t chunk_size = at::divup((end - begin), num_threads); #ifdef BUILD_WITH_TASKFLOW tf::Taskflow flow; flow.for_each_index(int64_t{0}, num_threads, int64_t{1}, [=](int64_t tid) { const int64_t begin_tid = begin + tid * chunk_size; if (begin_tid < end) { const int64_t end_tid = std::min(end, begin_tid + chunk_size); if constexpr (for_each) { for (int64_t i = begin_tid; i < end_tid; i++) f(i); } else { f(begin_tid, end_tid); } } }); _get_thread_pool().run(flow).get(); #else std::promise promise; std::future future; std::atomic_flag err_flag = ATOMIC_FLAG_INIT; std::exception_ptr eptr; int num_launched = 0; std::atomic num_finished = 0; for (int tid = num_threads - 1; tid >= 0; tid--) { const int64_t begin_tid = begin + tid * chunk_size; if (begin_tid < end) { const int64_t end_tid = std::min(end, begin_tid + chunk_size); if (tid == 0) { // Launch the thread 0's work inline. if constexpr (for_each) { for (int64_t i = begin_tid; i < end_tid; i++) f(i); } else { f(begin_tid, end_tid); } continue; } if (!future.valid()) { future = promise.get_future(); num_launched = tid; } at::launch([&f, &err_flag, &eptr, &promise, &num_finished, num_launched, begin_tid, end_tid] { try { if constexpr (for_each) { for (int64_t i = begin_tid; i < end_tid; i++) f(i); } else { f(begin_tid, end_tid); } } catch (...) { if (!err_flag.test_and_set()) { eptr = std::current_exception(); } } auto ticket = num_finished.fetch_add(1, std::memory_order_release); if (1 + ticket == num_launched) { // The last thread signals the end of execution. promise.set_value(); } }); } } // Wait for the launched work to finish. if (num_launched > 0) { future.get(); if (eptr) { std::rethrow_exception(eptr); } } #endif } /** * @brief GraphBolt's version of torch::parallel_for. Since torch::parallel_for * uses OpenMP threadpool, async tasks can not make use of it due to multiple * OpenMP threadpools being created for each async thread. Moreover, inside * graphbolt::parallel_for, we should not make use of any native CPU torch ops * as they will spawn an OpenMP threadpool. */ template inline void parallel_for( const int64_t begin, const int64_t end, const int64_t grain_size, const F& f) { _parallel_for(begin, end, grain_size, f); } /** * @brief Compared to parallel_for, it expects the passed function to take a * single argument for each iteration. */ template inline void parallel_for_each( const int64_t begin, const int64_t end, const int64_t grain_size, const F& f) { _parallel_for(begin, end, grain_size, f); } /** * @brief Same as parallel_for but uses the interop thread pool. */ template inline void parallel_for_interop( const int64_t begin, const int64_t end, const int64_t grain_size, const F& f) { _parallel_for(begin, end, grain_size, f); } /** * @brief Compared to parallel_for_interop, it expects the passed function to * take a single argument for each iteration. */ template inline void parallel_for_each_interop( const int64_t begin, const int64_t end, const int64_t grain_size, const F& f) { _parallel_for(begin, end, grain_size, f); } } // namespace graphbolt #endif // GRAPHBOLT_ASYNC_H_ ================================================ FILE: graphbolt/include/graphbolt/continuous_seed.h ================================================ /** * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file graphbolt/continuous_seed.h * @brief CPU and CUDA implementation for continuous random seeds */ #ifndef GRAPHBOLT_CONTINUOUS_SEED_H_ #define GRAPHBOLT_CONTINUOUS_SEED_H_ #include #include #ifdef __CUDACC__ #include #else #include #include #endif // __CUDA_ARCH__ #ifndef M_SQRT1_2 #define M_SQRT1_2 0.707106781186547524401 #endif // M_SQRT1_2 namespace graphbolt { class continuous_seed { uint64_t s[2]; float c[2]; public: /* implicit */ continuous_seed(const int64_t seed) { // NOLINT s[0] = s[1] = seed; c[0] = c[1] = 0; } continuous_seed(torch::Tensor seed_arr, float r) { auto seed = seed_arr.data_ptr(); s[0] = seed[0]; s[1] = seed[seed_arr.size(0) - 1]; const auto pi = std::acos(-1.0); c[0] = std::cos(pi * r / 2); c[1] = std::sin(pi * r / 2); } uint64_t get_seed(int i) const { return s[i != 0]; } #ifdef __CUDACC__ __device__ inline float uniform(const uint64_t t) const { const uint64_t kCurandSeed = 999961; // Could be any random number. curandStatePhilox4_32_10_t rng; curand_init(kCurandSeed, s[0], t, &rng); float rnd; if (s[0] != s[1]) { rnd = c[0] * curand_normal(&rng); curand_init(kCurandSeed, s[1], t, &rng); rnd += c[1] * curand_normal(&rng); rnd = normcdff(rnd); } else { rnd = curand_uniform(&rng); } return rnd; } #else inline float uniform(const uint64_t t) const { pcg32 ng0(s[0], t); float rnd; if (s[0] != s[1]) { std::normal_distribution norm; rnd = c[0] * norm(ng0); pcg32 ng1(s[1], t); norm.reset(); rnd += c[1] * norm(ng1); rnd = std::erfc(-rnd * static_cast(M_SQRT1_2)) / 2.0f; } else { std::uniform_real_distribution uni; rnd = uni(ng0); } return rnd; } #endif // __CUDA_ARCH__ }; class single_seed { uint64_t seed_; public: /* implicit */ single_seed(const int64_t seed) : seed_(seed) {} // NOLINT single_seed(torch::Tensor seed_arr) : seed_(seed_arr.data_ptr()[0]) {} #ifdef __CUDACC__ __device__ inline float uniform(const uint64_t id) const { const uint64_t kCurandSeed = 999961; // Could be any random number. curandStatePhilox4_32_10_t rng; curand_init(kCurandSeed, seed_, id, &rng); return curand_uniform(&rng); } #else inline float uniform(const uint64_t id) const { pcg32 ng0(seed_, id); std::uniform_real_distribution uni; return uni(ng0); } #endif // __CUDA_ARCH__ }; } // namespace graphbolt #endif // GRAPHBOLT_CONTINUOUS_SEED_H_ ================================================ FILE: graphbolt/include/graphbolt/cuda_ops.h ================================================ /** * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file graphbolt/cuda_ops.h * @brief Available CUDA operations in Graphbolt. */ #ifndef GRAPHBOLT_CUDA_OPS_H_ #define GRAPHBOLT_CUDA_OPS_H_ #include #include namespace graphbolt { namespace ops { /** * @brief Sorts the given input and optionally returns the original indexes. * * @param input A pointer to storage containing IDs. * @param num_items Size of the input storage. * @param num_bits An integer such that all elements of input tensor are * are less than (1 << num_bits). * * @return * - A tuple of tensors if return_original_positions is true, where the first * one includes sorted input, the second contains original positions of the * sorted result. If return_original_positions is false, then returns only the * sorted input. */ template std::conditional_t< return_original_positions, std::pair, torch::Tensor> Sort(const scalar_t* input, int64_t num_items, int num_bits); /** * @brief Sorts the given input and optionally returns the original indexes. * * @param input A tensor containing IDs. * @param num_bits An integer such that all elements of input tensor are * are less than (1 << num_bits). * * @return * - A tuple of tensors if return_original_positions is true, where the first * one includes sorted input, the second contains original positions of the * sorted result. If return_original_positions is false, then returns only the * sorted input. */ template std::conditional_t< return_original_positions, std::pair, torch::Tensor> Sort(torch::Tensor input, int num_bits = 0); /** * @brief Tests if each element of elements is in test_elements. Returns a * boolean tensor of the same shape as elements that is True for elements * in test_elements and False otherwise. Enhance torch.isin by implementing * multi-threaded searching, as detailed in the documentation at * https://pytorch.org/docs/stable/generated/torch.isin.html." * * @param elements Input elements * @param test_elements Values against which to test for each input element. * * @return * A boolean tensor of the same shape as elements that is True for elements * in test_elements and False otherwise. */ torch::Tensor IsIn(torch::Tensor elements, torch::Tensor test_elements); /** * @brief Returns the indexes of the nonzero elements in the given boolean mask * if logical_not is false. Otherwise, returns the indexes of the zero elements * instead. * * @param mask Input boolean mask. * @param logical_not Whether mask should be treated as ~mask. * * @return An int64_t tensor of the same shape as mask containing the indexes * of the selected elements. */ torch::Tensor Nonzero(torch::Tensor mask, bool logical_not); /** * @brief Select columns for a sparse matrix in a CSC format according to nodes * tensor. * * NOTE: The shape of all tensors must be 1-D. * * @param in_degree Indegree tensor containing degrees of nodes being copied. * @param sliced_indptr Sliced_indptr tensor containing indptr values of nodes * being copied. * @param indices Indices tensor with edge information of shape (indptr[N],). * @param nodes Nodes tensor with shape (M,). * @param nodes_max An upperbound on `nodes.max()`. * @param output_size The total number of edges being copied. * @return (torch::Tensor, torch::Tensor) Output indptr and indices tensors of * shapes (M + 1,) and ((indptr[nodes + 1] - indptr[nodes]).sum(),). */ std::tuple IndexSelectCSCImpl( torch::Tensor in_degree, torch::Tensor sliced_indptr, torch::Tensor indices, torch::Tensor nodes, int64_t nodes_max, torch::optional output_size = torch::nullopt); /** * @brief Select columns for a sparse matrix in a CSC format according to nodes * tensor. * * NOTE: The shape of all tensors must be 1-D. * * @param indptr Indptr tensor containing offsets with shape (N,). * @param indices Indices tensor with edge information of shape (indptr[N],). * @param nodes Nodes tensor with shape (M,). * @param output_size The total number of edges being copied. * @return (torch::Tensor, torch::Tensor) Output indptr and indices tensors of * shapes (M + 1,) and ((indptr[nodes + 1] - indptr[nodes]).sum(),). */ std::tuple IndexSelectCSCImpl( torch::Tensor indptr, torch::Tensor indices, torch::Tensor nodes, torch::optional output_size = torch::nullopt); /** * @brief Select columns for a sparse matrix in a CSC format according to nodes * tensor for a given list of tensors. * * NOTE: The shape of all tensors must be 1-D. * * @param indptr Indptr tensor containing offsets with shape (N,). * @param indices_list Vector of indices tensor with edge information of shape * (indptr[N],). * @param nodes Nodes tensor with shape (M,). * @param with_edge_ids Whether to return edge ids tensor corresponding to * sliced edges as the last element of the output. * @param output_size The total number of edges being copied. * @return (torch::Tensor, std::vector) Output indptr and vector * of indices tensors of shapes (M + 1,) and ((indptr[nodes + 1] - * indptr[nodes]).sum(),). */ std::tuple> IndexSelectCSCBatchedImpl( torch::Tensor indptr, std::vector indices_list, torch::Tensor nodes, bool with_edge_ids, torch::optional output_size); /** * @brief Slices the indptr tensor with nodes and returns the indegrees of the * given nodes and their indptr values. * * @param indptr The indptr tensor. * @param nodes The nodes to read from indptr. If not provided, assumed to be * equal to torch.arange(indptr.size(0) - 1). * * @return Tuple of tensors with values: * (indptr[nodes + 1] - indptr[nodes], indptr[nodes]), the returned indegrees * tensor (first one) has size nodes.size(0) + 1 so that calling ExclusiveCumSum * on it gives the output indptr. */ std::tuple SliceCSCIndptr( torch::Tensor indptr, torch::optional nodes); /** * @brief Given the compacted sub_indptr tensor, edge type tensor and * sliced_indptr tensor of the original graph, returns the heterogenous * versions of sub_indptr, indegrees and sliced_indptr. * * @param sub_indptr The compacted indptr tensor. * @param etypes The compacted type_per_edge tensor. * @param sliced_indptr The sliced_indptr tensor of original graph. * @param num_fanouts The number of fanout values. * * @return Tuple of tensors (new_sub_indptr, new_indegrees, new_sliced_indptr): */ std::tuple SliceCSCIndptrHetero( torch::Tensor sub_indptr, torch::Tensor etypes, torch::Tensor sliced_indptr, int64_t num_fanouts); /** * @brief Computes the exclusive prefix sum of the given input. * * @param input The input tensor. * * @return The prefix sum result such that r[i] = \sum_{j=0}^{i-1} input[j] */ torch::Tensor ExclusiveCumSum(torch::Tensor input); /** * @brief Computes the gather operation on a given input and index tensor. * * @param input The input tensor. * @param index The index tensor. * @param dtype The optional output dtype. If not given, inferred from the input * tensor. * * @return The result of the input.gather(0, index).to(dtype) operation. */ torch::Tensor Gather( torch::Tensor input, torch::Tensor index, torch::optional dtype = torch::nullopt); /** * @brief Select rows from input tensor according to index tensor. * * NOTE: * 1. The shape of input tensor can be multi-dimensional, but the index tensor * must be 1-D. * 2. Should be called if input is on pinned memory and index is on pinned * memory or GPU memory. * * @param input Input tensor with shape (N, ...). * @param index Index tensor with shape (M,). * @return torch::Tensor Output tensor with shape (M, ...). */ torch::Tensor UVAIndexSelectImpl(torch::Tensor input, torch::Tensor index); /** * @brief ExpandIndptrImpl implements conversion from a given indptr offset * tensor to a COO format tensor. If node_ids is not given, it is assumed to be * equal to torch::arange(indptr.size(0) - 1, dtype=dtype). * * @param indptr The indptr offset tensor. * @param dtype The dtype of the returned output tensor. * @param node_ids Optional 1D tensor represents the node ids. * @param output_size Optional value of indptr[-1]. Passing it eliminates CPU * GPU synchronization. * * @return The resulting tensor. */ torch::Tensor ExpandIndptrImpl( torch::Tensor indptr, torch::ScalarType dtype, torch::optional node_ids = torch::nullopt, torch::optional output_size = torch::nullopt); /** * @brief IndptrEdgeIdsImpl implements conversion from a given indptr offset * tensor to a COO edge ids tensor. For a given indptr [0, 2, 5, 7] and offset * tensor [0, 100, 200], the output will be [0, 1, 100, 101, 102, 201, 202]. If * offset was not provided, the output would be [0, 1, 0, 1, 2, 0, 1]. * * @param indptr The indptr offset tensor. * @param dtype The dtype of the returned output tensor. * @param offset The offset tensor. * @param output_size Optional value of indptr[-1]. Passing it eliminates CPU * GPU synchronization. * * @return The resulting tensor. */ torch::Tensor IndptrEdgeIdsImpl( torch::Tensor indptr, torch::ScalarType dtype, torch::optional offset, torch::optional output_size); /** * @brief Removes duplicate elements from the concatenated 'unique_dst_ids' and * 'src_ids' tensor and applies the uniqueness information to compact both * source and destination tensors. * * The function performs two main operations: * 1. Unique Operation: 'unique(concat(unique_dst_ids, src_ids))', in which * the unique operator will guarantee the 'unique_dst_ids' are at the head of * the result tensor. * 2. Compact Operation: Utilizes the reverse mapping derived from the unique * operation to transform 'src_ids' and 'dst_ids' into compacted IDs. * * When world_size is greater than 1, then the given ids are partitioned between * the available ranks. The ids corresponding to the given rank are guaranteed * to come before the ids of other ranks. To do this, the partition ids are * rotated backwards by the given rank so that the ids are ordered as: * [rank, rank + 1, world_size, 0, ..., rank - 1]. This is supported only for * Volta and later generation NVIDIA GPUs. * * @param src_ids A tensor containing source IDs. * @param dst_ids A tensor containing destination IDs. * @param unique_dst_ids A tensor containing unique destination IDs, which is * exactly all the unique elements in 'dst_ids'. * @param rank The rank of the current GPU. * @param world_size The total # GPUs, world size. * * @return (unique_ids, compacted_src_ids, compacted_dst_ids, unique_offsets) * - A tensor representing all unique elements in 'src_ids' and 'dst_ids' after * removing duplicates. The indices in this tensor precisely match the compacted * IDs of the corresponding elements. * - The tensor corresponding to the 'src_ids' tensor, where the entries are * mapped to compacted IDs. * - The tensor corresponding to the 'dst_ids' tensor, where the entries are * mapped to compacted IDs. * - The tensor corresponding to the offsets into the unique_ids tensor. Has * size `world_size + 1` and unique_ids[offsets[i]: offsets[i + 1]] belongs to * the rank `(rank + i) % world_size`. * * @example * torch::Tensor src_ids = src * torch::Tensor dst_ids = dst * torch::Tensor unique_dst_ids = torch::unique(dst); * auto result = UniqueAndCompact(src_ids, dst_ids, unique_dst_ids); * torch::Tensor unique_ids = std::get<0>(result); * torch::Tensor compacted_src_ids = std::get<1>(result); * torch::Tensor compacted_dst_ids = std::get<2>(result); */ std::tuple UniqueAndCompact( const torch::Tensor src_ids, const torch::Tensor dst_ids, const torch::Tensor unique_dst_ids, const int64_t rank, const int64_t world_size); /** * @brief Batched version of UniqueAndCompact. The ith element of the return * value is equal to the passing the ith elements of the input arguments to * UniqueAndCompact. */ std::vector< std::tuple> UniqueAndCompactBatched( const std::vector& src_ids, const std::vector& dst_ids, const std::vector& unique_dst_ids, const int64_t rank, const int64_t world_size); } // namespace ops } // namespace graphbolt #endif // GRAPHBOLT_CUDA_OPS_H_ ================================================ FILE: graphbolt/include/graphbolt/cuda_sampling_ops.h ================================================ /** * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file graphbolt/cuda_sampling_ops.h * @brief Available CUDA sampling operations in Graphbolt. */ #ifndef GRAPHBOLT_CUDA_SAMPLING_OPS_H_ #define GRAPHBOLT_CUDA_SAMPLING_OPS_H_ #include #include namespace graphbolt { namespace ops { /** * @brief Sample neighboring edges of the given nodes and return the induced * subgraph. * * @param indptr Index pointer array of the CSC. * @param indices Indices array of the CSC. * @param seeds The nodes from which to sample neighbors. If not provided, * assumed to be equal to torch.arange(indptr.size(0) - 1). * @param seed_offsets The offsets of the given seeds, * seeds[seed_offsets[i]: seed_offsets[i + 1]] has node type i. * @param fanouts The number of edges to be sampled for each node with or * without considering edge types. * - When the length is 1, it indicates that the fanout applies to all * neighbors of the node as a collective, regardless of the edge type. * - Otherwise, the length should equal to the number of edge types, and * each fanout value corresponds to a specific edge type of the node. * The value of each fanout should be >= 0 or = -1. * - When the value is -1, all neighbors will be chosen for sampling. It is * equivalent to selecting all neighbors with non-zero probability when the * fanout is >= the number of neighbors (and replacement is set to false). * - When the value is a non-negative integer, it serves as a minimum * threshold for selecting neighbors. * @param replace Boolean indicating whether the sample is preformed with or * without replacement. If True, a value can be selected multiple times. * Otherwise, each value can be selected only once. * @param layer Boolean indicating whether neighbors should be sampled in a * layer sampling fashion. Uses the LABOR-0 algorithm to increase overlap of * sampled edges, see arXiv:2210.13339. * @param returning_indices_is_optional Boolean indicating whether returning * indices tensor is optional. * @param type_per_edge A tensor representing the type of each edge, if present. * @param probs_or_mask An optional tensor with (unnormalized) probabilities * corresponding to each neighboring edge of a node. It must be * a 1D tensor, with the number of elements equaling the total number of edges. * @param node_type_to_id A dictionary mapping node type names to type IDs. The * length of it is equal to the number of node types. The key is the node type * name, and the value is the corresponding type ID. * @param edge_type_to_id A dictionary mapping edge type names to type IDs. The * length of it is equal to the number of edge types. The key is the edge type * name, and the value is the corresponding type ID. * @param random_seed The random seed for the sampler for layer=True. * @param seed2_contribution The contribution of the second random seed, [0, 1) * for layer=True. * @param seeds_timestamp The timestamp of the seeds. * @param seeds_pre_time_window The time window of the seeds represents a period * of time before `seeds_timestamp`. If provided, only neighbors and related * edges whose timestamps fall within * `[seeds_timestamp - seeds_pre_time_window, seeds_timestamp]` will be * filtered. * @param node_timestamp An optional tensor that contains the timestamp of nodes * in the graph. * @param edge_timestamp An optional tensor that contains the timestamp of edges * in the graph. * * @return An intrusive pointer to a FusedSampledSubgraph object containing * the sampled graph's information. */ c10::intrusive_ptr SampleNeighbors( torch::Tensor indptr, torch::Tensor indices, torch::optional seeds, torch::optional> seed_offsets, const std::vector& fanouts, bool replace, bool layer, bool returning_indices_is_optional, torch::optional type_per_edge = torch::nullopt, torch::optional probs_or_mask = torch::nullopt, torch::optional node_type_offset = torch::nullopt, torch::optional> node_type_to_id = torch::nullopt, torch::optional> edge_type_to_id = torch::nullopt, torch::optional random_seed = torch::nullopt, float seed2_contribution = .0f, // Optional temporal sampling arguments begin. torch::optional seeds_timestamp = torch::nullopt, torch::optional seeds_pre_time_window = torch::nullopt, torch::optional node_timestamp = torch::nullopt, torch::optional edge_timestamp = torch::nullopt // Optional temporal sampling arguments end. ); /** * @brief Return the subgraph induced on the inbound edges of the given nodes. * @param nodes Type agnostic node IDs to form the subgraph. * * @return FusedSampledSubgraph. */ c10::intrusive_ptr InSubgraph( torch::Tensor indptr, torch::Tensor indices, torch::Tensor nodes, torch::optional type_per_edge); } // namespace ops } // namespace graphbolt #endif // GRAPHBOLT_CUDA_SAMPLING_OPS_H_ ================================================ FILE: graphbolt/include/graphbolt/fused_csc_sampling_graph.h ================================================ /** * Copyright (c) 2023 by Contributors * @file graphbolt/fused_csc_sampling_graph.h * @brief Header file of csc sampling graph. */ #ifndef GRAPHBOLT_CSC_SAMPLING_GRAPH_H_ #define GRAPHBOLT_CSC_SAMPLING_GRAPH_H_ #include #include #include #include #include #include #include namespace graphbolt { namespace sampling { enum SamplerType { NEIGHBOR, LABOR, LABOR_DEPENDENT }; enum TemporalOption { NOT_TEMPORAL, TEMPORAL }; constexpr bool is_labor(SamplerType S) { return S == SamplerType::LABOR || S == SamplerType::LABOR_DEPENDENT; } template struct SamplerArgs; template <> struct SamplerArgs {}; template <> struct SamplerArgs { const torch::Tensor& indices; single_seed random_seed; int64_t num_nodes; }; template <> struct SamplerArgs { const torch::Tensor& indices; continuous_seed random_seed; int64_t num_nodes; }; /** * @brief A sampling oriented csc format graph. * * Example usage: * * Suppose the graph has 3 node types, 3 edge types and 6 edges * auto node_type_offset = {0, 2, 4, 6} * auto type_per_edge = {0, 1, 0, 2, 1, 2} * auto graph = FusedCSCSamplingGraph(..., ..., node_type_offset, type_per_edge) * * The `node_type_offset` tensor represents the offset array of node type, the * given array indicates that node [0, 2) has type id 0, [2, 4) has type id 1, * and [4, 6) has type id 2. And the `type_per_edge` tensor represents the type * id of each edge. */ class FusedCSCSamplingGraph : public torch::CustomClassHolder { public: using NodeTypeToIDMap = torch::Dict; using EdgeTypeToIDMap = torch::Dict; using NodeAttrMap = torch::Dict; using EdgeAttrMap = torch::Dict; /** @brief Default constructor. */ FusedCSCSamplingGraph() = default; /** * @brief Constructor for CSC with data. * @param indptr The CSC format index pointer array. * @param indices The CSC format index array. * @param node_type_offset A tensor representing the offset of node types, if * present. * @param type_per_edge A tensor representing the type of each edge, if * present. * @param node_type_to_id A dictionary mapping node type names to type IDs, if * present. * @param edge_type_to_id A dictionary mapping edge type names to type IDs, if * present. * @param node_attributes A dictionary of node attributes, if present. * @param edge_attributes A dictionary of edge attributes, if present. * */ FusedCSCSamplingGraph( const torch::Tensor& indptr, const torch::Tensor& indices, const torch::optional& node_type_offset = torch::nullopt, const torch::optional& type_per_edge = torch::nullopt, const torch::optional& node_type_to_id = torch::nullopt, const torch::optional& edge_type_to_id = torch::nullopt, const torch::optional& node_attributes = torch::nullopt, const torch::optional& edge_attributes = torch::nullopt); /** * @brief Create a fused CSC graph from tensors of CSC format. * @param indptr Index pointer array of the CSC. * @param indices Indices array of the CSC. * @param node_type_offset A tensor representing the offset of node types, if * present. * @param type_per_edge A tensor representing the type of each edge, if * present. * @param node_type_to_id A dictionary mapping node type names to type IDs, if * present. * @param edge_type_to_id A dictionary mapping edge type names to type IDs, if * present. * @param node_attributes A dictionary of node attributes, if present. * @param edge_attributes A dictionary of edge attributes, if present. * * @return FusedCSCSamplingGraph */ static c10::intrusive_ptr Create( const torch::Tensor& indptr, const torch::Tensor& indices, const torch::optional& node_type_offset, const torch::optional& type_per_edge, const torch::optional& node_type_to_id, const torch::optional& edge_type_to_id, const torch::optional& node_attributes, const torch::optional& edge_attributes); /** @brief Get the number of nodes. */ int64_t NumNodes() const { return indptr_.size(0) - 1; } /** @brief Get the number of edges. */ int64_t NumEdges() const { return indices_.size(0); } /** @brief Get the csc index pointer tensor. */ const torch::Tensor CSCIndptr() const { return indptr_; } /** @brief Get the index tensor. */ const torch::Tensor Indices() const { return indices_; } /** @brief Get the node type offset tensor for a heterogeneous graph. */ inline const torch::optional NodeTypeOffset() const { return node_type_offset_; } /** @brief Get the edge type tensor for a heterogeneous graph. */ inline const torch::optional TypePerEdge() const { return type_per_edge_; } /** * @brief Get the node type to id map for a heterogeneous graph. * @note The map is a dictionary mapping node type names to type IDs. */ inline const torch::optional NodeTypeToID() const { return node_type_to_id_; } /** * @brief Get the edge type to id map for a heterogeneous graph. * @note The map is a dictionary mapping edge type names to type IDs. */ inline const torch::optional EdgeTypeToID() const { return edge_type_to_id_; } /** @brief Get the node attributes dictionary. */ inline const torch::optional NodeAttributes() const { return node_attributes_; } /** @brief Get the edge attributes dictionary. */ inline const torch::optional EdgeAttributes() const { return edge_attributes_; } /** * @brief Get the node attribute tensor by name. * * If the input name is empty, return nullopt. Otherwise, return the node * attribute tensor by name. */ inline torch::optional NodeAttribute( torch::optional name) const { if (!name.has_value()) { return torch::nullopt; } TORCH_CHECK( node_attributes_.has_value() && node_attributes_.value().contains(name.value()), "Node attribute ", name.value(), " does not exist."); return torch::optional( node_attributes_.value().at(name.value())); } /** * @brief Get the edge attribute tensor by name. * * If the input name is empty, return nullopt. Otherwise, return the edge * attribute tensor by name. */ inline torch::optional EdgeAttribute( torch::optional name) const { if (!name.has_value()) { return torch::nullopt; } TORCH_CHECK( edge_attributes_.has_value() && edge_attributes_.value().contains(name.value()), "Edge attribute ", name.value(), " does not exist."); return torch::optional( edge_attributes_.value().at(name.value())); } /** @brief Set the csc index pointer tensor. */ inline void SetCSCIndptr(const torch::Tensor& indptr) { indptr_ = indptr; } /** @brief Set the index tensor. */ inline void SetIndices(const torch::Tensor& indices) { indices_ = indices; } /** @brief Set the node type offset tensor for a heterogeneous graph. */ inline void SetNodeTypeOffset( const torch::optional& node_type_offset) { node_type_offset_ = node_type_offset; } /** @brief Set the edge type tensor for a heterogeneous graph. */ inline void SetTypePerEdge( const torch::optional& type_per_edge) { type_per_edge_ = type_per_edge; } /** * @brief Set the node type to id map for a heterogeneous graph. * @note The map is a dictionary mapping node type names to type IDs. */ inline void SetNodeTypeToID( const torch::optional& node_type_to_id) { node_type_to_id_ = node_type_to_id; } /** * @brief Set the edge type to id map for a heterogeneous graph. * @note The map is a dictionary mapping edge type names to type IDs. */ inline void SetEdgeTypeToID( const torch::optional& edge_type_to_id) { edge_type_to_id_ = edge_type_to_id; } /** @brief Set the node attributes dictionary. */ inline void SetNodeAttributes( const torch::optional& node_attributes) { node_attributes_ = node_attributes; } /** @brief Set the edge attributes dictionary. */ inline void SetEdgeAttributes( const torch::optional& edge_attributes) { edge_attributes_ = edge_attributes; } /** @brief Add node attribute by name. */ inline void AddNodeAttribute( const std::string& name, const torch::Tensor& node_attribute) { if (!node_attributes_.has_value()) { node_attributes_ = NodeAttrMap(); } node_attributes_.value().insert_or_assign(name, node_attribute); } /** @brief Add edge attribute by name. */ inline void AddEdgeAttribute( const std::string& name, const torch::Tensor& edge_attribute) { if (!edge_attributes_.has_value()) { edge_attributes_ = EdgeAttrMap(); } edge_attributes_.value().insert_or_assign(name, edge_attribute); } /** * @brief Magic number to indicate graph version in serialize/deserialize * stage. */ static constexpr int64_t kCSCSamplingGraphSerializeMagic = 0xDD2E60F0F6B4A128; /** * @brief Load graph from stream. * @param archive Input stream for deserializing. */ void Load(torch::serialize::InputArchive& archive); /** * @brief Save graph to stream. * @param archive Output stream for serializing. */ void Save(torch::serialize::OutputArchive& archive) const; /** * @brief Pickle method for deserializing. * @param state The state of serialized FusedCSCSamplingGraph. */ void SetState( const torch::Dict>& state); /** * @brief Pickle method for serializing. * @returns The state of this FusedCSCSamplingGraph. */ torch::Dict> GetState() const; /** * @brief Return the subgraph induced on the inbound edges of the given nodes. * @param nodes Type agnostic node IDs to form the subgraph. * * @return FusedSampledSubgraph. */ c10::intrusive_ptr InSubgraph( const torch::Tensor& nodes) const; /** * @brief Sample neighboring edges of the given nodes and return the induced * subgraph. * * @param seeds The nodes from which to sample neighbors. If not provided, * assumed to be equal to torch.arange(NumNodes()). * @param seed_offsets The offsets of the given seeds, * seeds[seed_offsets[i]: seed_offsets[i + 1]] has node type id i. * @param fanouts The number of edges to be sampled for each node with or * without considering edge types. * - When the length is 1, it indicates that the fanout applies to all * neighbors of the node as a collective, regardless of the edge type. * - Otherwise, the length should equal to the number of edge types, and * each fanout value corresponds to a specific edge type of the node. * The value of each fanout should be >= 0 or = -1. * - When the value is -1, all neighbors will be chosen for sampling. It is * equivalent to selecting all neighbors with non-zero probability when the * fanout is >= the number of neighbors (and replacement is set to false). * - When the value is a non-negative integer, it serves as a minimum * threshold for selecting neighbors. * @param replace Boolean indicating whether the sample is preformed with or * without replacement. If True, a value can be selected multiple times. * Otherwise, each value can be selected only once. * @param layer Boolean indicating whether neighbors should be sampled in a * layer sampling fashion. Uses the LABOR-0 algorithm to increase overlap of * sampled edges, see arXiv:2210.13339. * @param returning_indices_is_optional Boolean indicating whether returning * indices tensor is optional. * @param probs_or_mask An optional edge attribute tensor for probablities * or masks. This attribute tensor should contain (unnormalized) * probabilities corresponding to each neighboring edge of a node. It must be * a 1D floating-point or boolean tensor, with the number of elements * equalling the total number of edges. * @param random_seed The random seed for the sampler for layer=True. * @param seed2_contribution The contribution of the second random seed, * [0, 1) for layer=True. * * @return An intrusive pointer to a FusedSampledSubgraph object containing * the sampled graph's information. */ c10::intrusive_ptr SampleNeighbors( torch::optional seeds, torch::optional> seed_offsets, const std::vector& fanouts, bool replace, bool layer, bool returning_indices_is_optional, torch::optional probs_or_mask, torch::optional random_seed, double seed2_contribution) const; c10::intrusive_ptr>> SampleNeighborsAsync( torch::optional seeds, torch::optional> seed_offsets, const std::vector& fanouts, bool replace, bool layer, bool returning_indices_is_optional, torch::optional probs_or_mask, torch::optional random_seed, double seed2_contribution) const; /** * @brief Sample neighboring edges of the given nodes with a temporal * constraint. If `node_timestamp_attr_name` or `edge_timestamp_attr_name` is * given, the sampled neighbors or edges of an input node must have a * timestamp that is smaller than that of the input node. * * @param seeds The seeds nodes from which to sample neighbors. * @param seed_offsets The offsets of the given seeds, * seeds[seed_offsets[i]: seed_offsets[i + 1]] has node type id i. * @param seeds_timestamp The timestamp of the nodes. * @param fanouts The number of edges to be sampled for each node with or * without considering edge types, following the same rules as in * SampleNeighbors. * @param replace Boolean indicating whether the sample is preformed with or * without replacement. If True, a value can be selected multiple times. * Otherwise, each value can be selected only once. * @param layer Boolean indicating whether neighbors should be sampled in a * layer sampling fashion. Uses the LABOR-0 algorithm to increase overlap of * sampled edges, see arXiv:2210.13339. * @param returning_indices_is_optional Boolean indicating whether returning * indices tensor is optional. * @param seeds_pre_time_window The time window of the seed nodes represents * a period of time before `seeds_timestamp`. If provided, only * neighbors and related edges whose timestamps fall within * `[seeds_timestamp - seeds_pre_time_window, seeds_timestamp]` will be * filtered. * @param probs_or_mask An optional edge attribute tensor for probablities * or masks, following the same rules as in SampleNeighbors. * @param node_timestamp_attr_name An optional string specifying the name of * the node attribute that contains the timestamp of nodes in the graph. * @param edge_timestamp_attr_name An optional string specifying the name of * the edge attribute that contains the timestamp of edges in the graph. * * @return An intrusive pointer to a FusedSampledSubgraph object containing * the sampled graph's information. * */ c10::intrusive_ptr TemporalSampleNeighbors( const torch::optional& seeds, const torch::optional>& seed_offsets, const torch::Tensor& seeds_timestamp, const std::vector& fanouts, bool replace, bool layer, bool returning_indices_is_optional, torch::optional seeds_pre_time_window, torch::optional probs_or_mask, torch::optional node_timestamp_attr_name, torch::optional edge_timestamp_attr_name, torch::optional random_seed, double seed2_contribution) const; /** * @brief Copy the graph to shared memory. * @param shared_memory_name The name of the shared memory. * * @return A new FusedCSCSamplingGraph object on shared memory. */ c10::intrusive_ptr CopyToSharedMemory( const std::string& shared_memory_name); /** * @brief Load the graph from shared memory. * @param shared_memory_name The name of the shared memory. * * @return A new FusedCSCSamplingGraph object on shared memory. */ static c10::intrusive_ptr LoadFromSharedMemory( const std::string& shared_memory_name); /** * @brief Hold the shared memory objects of the the tensor metadata and data. * @note Shared memory used to hold the tensor metadata and data of this * class. By storing its shared memory objects, the graph controls the * resources of shared memory, which will be released automatically when the * graph is destroyed. This function is for internal use by CopyToSharedMemory * and LoadFromSharedMemory. Please contact the DGL team if you need to use * it. * @param tensor_metadata_shm The shared memory objects of tensor metadata. * @param tensor_data_shm The shared memory objects of tensor data. */ void HoldSharedMemoryObject( SharedMemoryPtr tensor_metadata_shm, SharedMemoryPtr tensor_data_shm); private: template c10::intrusive_ptr SampleNeighborsImpl( const torch::Tensor& seeds, const torch::optional>& seed_offsets, const std::vector& fanouts, NumPickFn num_pick_fn, PickFn pick_fn) const; /** @brief CSC format index pointer array. */ torch::Tensor indptr_; /** @brief CSC format index array. */ torch::Tensor indices_; /** * @brief Offset array of node type. The length of it is equal to the number * of node types + 1. The tensor is in ascending order as nodes of the same * type have continuous IDs, and larger node IDs are paired with larger node * type IDs. Its first value is 0 and last value is the number of nodes. And * nodes with ID between `node_type_offset_[i] ~ node_type_offset_[i+1]` are * of type id `i`. */ torch::optional node_type_offset_; /** * @brief Type id of each edge, where type id is the corresponding index of * edge types. The length of it is equal to the number of edges. */ torch::optional type_per_edge_; /** * @brief A dictionary mapping node type names to type IDs. The length of it * is equal to the number of node types. The key is the node type name, and * the value is the corresponding type ID. */ torch::optional node_type_to_id_; /** * @brief A dictionary mapping edge type names to type IDs. The length of it * is equal to the number of edge types. The key is the edge type name, and * the value is the corresponding type ID. */ torch::optional edge_type_to_id_; /** * @brief A dictionary of node attributes. Each key represents the attribute's * name, while the corresponding value holds the attribute's specific value. * The length of each value should match the total number of nodes." */ torch::optional node_attributes_; /** * @brief A dictionary of edge attributes. Each key represents the attribute's * name, while the corresponding value holds the attribute's specific value. * The length of each value should match the total number of edges." */ torch::optional edge_attributes_; /** * @brief Shared memory used to hold the tensor metadata and data of this * class. By storing its shared memory objects, the graph controls the * resources of shared memory, which will be released automatically when the * graph is destroyed. */ SharedMemoryPtr tensor_metadata_shm_, tensor_data_shm_; }; /** * @brief Calculate the number of the neighbors to be picked for the given node. * * @param fanout The number of edges to be sampled for each node. It should be * >= 0 or -1. * - When the value is -1, all neighbors (with non-zero probability, if * weighted) will be chosen for sampling. It is equivalent to selecting all * neighbors with non-zero probability when the fanout is >= the number of * neighbors (and replacement is set to false). * - When the value is a non-negative integer, it serves as a minimum * threshold for selecting neighbors. * @param replace Boolean indicating whether the sample is performed with or * without replacement. If True, a value can be selected multiple times. * Otherwise, each value can be selected only once. * @param probs_or_mask Optional tensor containing the (unnormalized) * probabilities associated with each neighboring edge of a node in the original * graph. It must be a 1D floating-point tensor with the number of elements * equal to the number of edges in the graph. * @param offset The starting edge ID for the connected neighbors of the given * node. * @param num_neighbors The number of neighbors of this node. * @param num_picked_ptr The pointer of the tensor which stores the pick * numbers. */ template void NumPick( int64_t fanout, bool replace, const torch::optional& probs_or_mask, int64_t offset, int64_t num_neighbors, PickedNumType* num_picked_ptr); int64_t TemporalNumPick( torch::Tensor seed_timestamp, torch::Tensor csc_indics, int64_t fanout, bool replace, const torch::optional& seed_pre_time_window, const torch::optional& probs_or_mask, const torch::optional& node_timestamp, const torch::optional& edge_timestamp, int64_t seed_offset, int64_t offset, int64_t num_neighbors); template void NumPickByEtype( bool with_seed_offsets, const std::vector& fanouts, bool replace, const torch::Tensor& type_per_edge, const torch::optional& probs_or_mask, int64_t offset, int64_t num_neighbors, PickedNumType* num_picked_ptr, int64_t seed_index, const std::vector& etype_id_to_num_picked_offset); int64_t TemporalNumPickByEtype( torch::Tensor seed_timestamp, torch::Tensor csc_indices, const std::vector& fanouts, bool replace, const torch::Tensor& type_per_edge, const torch::optional& seed_pre_time_window, const torch::optional& probs_or_mask, const torch::optional& node_timestamp, const torch::optional& edge_timestamp, int64_t seed_offset, int64_t offset, int64_t num_neighbors); /** * @brief Picks a specified number of neighbors for a node, starting from the * given offset and having the specified number of neighbors. * * If 'probs_or_mask' is provided, it indicates that the sampling is * non-uniform. In such cases: * - When the number of neighbors with non-zero probability is less than or * equal to fanout, all neighbors with non-zero probability will be selected. * - When the number of neighbors with non-zero probability exceeds fanout, the * sampling process will select 'fanout' elements based on their respective * probabilities. Higher probabilities will increase the chances of being chosen * during the sampling process. * * @param offset The starting edge ID for the connected neighbors of the sampled * node. * @param num_neighbors The number of neighbors to pick. * @param fanout The number of edges to be sampled for each node. It should be * >= 0 or -1. * - When the value is -1, all neighbors will be chosen for sampling. It is * equivalent to selecting all neighbors with non-zero probability when the * fanout is >= the number of neighbors (and replacement is set to false). * - When the value is a non-negative integer, it serves as a minimum * threshold for selecting neighbors. * @param replace Boolean indicating whether the sample is preformed with or * without replacement. If True, a value can be selected multiple times. * Otherwise, each value can be selected only once. * @param options Tensor options specifying the desired data type of the result. * @param probs_or_mask Optional tensor containing the (unnormalized) * probabilities associated with each neighboring edge of a node in the original * graph. It must be a 1D floating-point tensor with the number of elements * equal to the number of edges in the graph. * @param picked_data_ptr The destination address where the picked neighbors * should be put. Enough memory space should be allocated in advance. */ template int64_t Pick( int64_t offset, int64_t num_neighbors, int64_t fanout, bool replace, const torch::TensorOptions& options, const torch::optional& probs_or_mask, SamplerArgs args, PickedType* picked_data_ptr); template std::enable_if_t Pick( int64_t offset, int64_t num_neighbors, int64_t fanout, bool replace, const torch::TensorOptions& options, const torch::optional& probs_or_mask, SamplerArgs args, PickedType* picked_data_ptr); template int64_t TemporalPick( torch::Tensor seed_timestamp, torch::Tensor csc_indices, int64_t seed_offset, int64_t offset, int64_t num_neighbors, int64_t fanout, bool replace, const torch::TensorOptions& options, const torch::optional& seed_pre_time_window, const torch::optional& probs_or_mask, const torch::optional& node_timestamp, const torch::optional& edge_timestamp, PickedType* picked_data_ptr); /** * @brief Picks a specified number of neighbors for a node per edge type, * starting from the given offset and having the specified number of neighbors. * * @param offset The starting edge ID for the connected neighbors of the sampled * node. * @param num_neighbors The number of neighbors to pick. * @param fanouts The edge sampling numbers corresponding to each edge type for * a single node. The value of each fanout should be >= 0 or = 1. * - When the value is -1, all neighbors with non-zero probability will be * chosen for sampling. It is equivalent to selecting all neighbors when the * fanout is >= the number of neighbors (and replacement is set to false). * - When the value is a non-negative integer, it serves as a minimum threshold * for selecting neighbors. * @param replace Boolean indicating whether the sample is preformed with or * without replacement. If True, a value can be selected multiple times. * Otherwise, each value can be selected only once. * @param options Tensor options specifying the desired data type of the result. * @param type_per_edge Tensor representing the type of each edge in the * original graph. * @param probs_or_mask Optional tensor containing the (unnormalized) * probabilities associated with each neighboring edge of a node in the original * graph. It must be a 1D floating-point tensor with the number of elements * equal to the number of edges in the graph. * @param picked_data_ptr The pointer of the tensor where the picked neighbors * should be put. Enough memory space should be allocated in advance. * @param seed_offset The offset(index) of the seed among the group of seeds * which share the same node type. * @param subgraph_indptr_ptr The pointer of the tensor which stores the indptr * of the sampled subgraph. * @param etype_id_to_num_picked_offset A vector storing the mappings from each * etype_id to the offset of its pick numbers in the tensor. */ template int64_t PickByEtype( bool with_seed_offsets, int64_t offset, int64_t num_neighbors, const std::vector& fanouts, bool replace, const torch::TensorOptions& options, const torch::Tensor& type_per_edge, const torch::optional& probs_or_mask, SamplerArgs args, PickedType* picked_data_ptr, int64_t seed_offset, PickedType* subgraph_indptr_ptr, const std::vector& etype_id_to_num_picked_offset); template int64_t TemporalPickByEtype( torch::Tensor seed_timestamp, torch::Tensor csc_indices, int64_t seed_offset, int64_t offset, int64_t num_neighbors, const std::vector& fanouts, bool replace, const torch::TensorOptions& options, const torch::Tensor& type_per_edge, const torch::optional& seed_pre_time_window, const torch::optional& probs_or_mask, const torch::optional& node_timestamp, const torch::optional& edge_timestamp, PickedType* picked_data_ptr); template < bool NonUniform, bool Replace, typename ProbsType, SamplerType S, typename PickedType, int StackSize = 1024> std::enable_if_t LaborPick( int64_t offset, int64_t num_neighbors, int64_t fanout, const torch::TensorOptions& options, const torch::optional& probs_or_mask, SamplerArgs args, PickedType* picked_data_ptr); } // namespace sampling } // namespace graphbolt #endif // GRAPHBOLT_CSC_SAMPLING_GRAPH_H_ ================================================ FILE: graphbolt/include/graphbolt/fused_sampled_subgraph.h ================================================ /** * Copyright (c) 2023 by Contributors * @file graphbolt/fused_sampled_subgraph.h * @brief Header file of sampled sub graph. */ #ifndef GRAPHBOLT_FUSED_SAMPLED_SUBGRAPH_H_ #define GRAPHBOLT_FUSED_SAMPLED_SUBGRAPH_H_ #include #include namespace graphbolt { namespace sampling { /** * @brief Struct representing a sampled subgraph. * * Example usage: * * Suppose the subgraph has 3 nodes and 4 edges. * ``` * auto indptr = torch::tensor({0, 2, 3, 4}, {torch::kInt64}); * auto indices = torch::tensor({55, 101, 3, 3}, {torch::kInt64}); * auto original_column_node_ids = torch::tensor({3, 3, 101}, {torch::kInt64}); * * FusedSampledSubgraph sampledSubgraph(indptr, indices, * original_column_node_ids); * ``` * * The `original_column_node_ids` indicates that nodes `[3, 3, 101]` in the * original graph are mapped to `[0, 1, 2]` in this subgraph, and because * `original_row_node_ids` is `Null`, `{55, 101, 3, 3}` in `indices` is just * the original node ids without compaction. * * If `original_row_node_ids = torch::tensor({55, 101, 3}, {torch::kInt64})`, * it would indicate a different mapping for the row nodes. Note this is * inconsistent with column, which is legal, as `3` is mapped to `0` and `1` in * the column while `2` in the row. */ struct FusedSampledSubgraph : torch::CustomClassHolder { public: /** * @brief Constructor for the FusedSampledSubgraph struct. * * @param indptr CSC format index pointer array. * @param indices CSC format index array. * @param original_column_node_ids Row's reverse node ids in the original * graph. * @param original_row_node_ids Column's reverse node ids in the original * graph. * @param original_edge_ids Mapping of subgraph edge IDs to original * FusedCSCSamplingGraph edge IDs. * @param type_per_edge Type id of each edge. * @param etype_offsets Edge offsets for the sampled edges for the sampled * edges that are sorted w.r.t. edge types. */ FusedSampledSubgraph( torch::Tensor indptr, torch::optional indices, torch::Tensor original_edge_ids, torch::optional original_column_node_ids, torch::optional original_row_node_ids = torch::nullopt, torch::optional type_per_edge = torch::nullopt, torch::optional etype_offsets = torch::nullopt) : indptr(indptr), indices(indices), original_edge_ids(original_edge_ids), original_column_node_ids(original_column_node_ids), original_row_node_ids(original_row_node_ids), type_per_edge(type_per_edge), etype_offsets(etype_offsets) {} FusedSampledSubgraph() = default; /** * @brief CSC format index pointer array, where the implicit node ids are * already compacted. And the original ids are stored in the * `original_column_node_ids` field. Its length is equal to: * 1 + \sum_{etype} #seeds with dst_node_type(etype) */ torch::Tensor indptr; /** * @brief CSC format index array, where the node ids can be compacted ids or * original ids. If compacted, the original ids are stored in the * `original_row_node_ids` field. The indices are sorted w.r.t. their edge * types for the heterogenous case. * * @note This is optional if its fetch operation will be performed later using * the original_edge_ids tensor. */ torch::optional indices; /** * @brief Mapping of subgraph edge IDs to original FusedCSCSamplingGraph * edge IDs. * * In this subgraph, the edge at index i corresponds to the edge with ID * original_edge_ids[i] in the original FusedCSCSamplingGraph. Edges are * sorted by type for heterogeneous graphs. * * Note: To retrieve the actual original edge IDs for feature fetching, use * the `_ORIGINAL_EDGE_ID` edge attribute in FusedCSCSamplingGraph to map the * `original_edge_ids` agin, as IDs may have been remapped during conversion * to FusedCSCSamplingGraph. */ torch::Tensor original_edge_ids; /** * @brief Column's reverse node ids in the original graph. A graph structure * can be treated as a coordinated row and column pair, and this is the the * mapped ids of the column. * * @note This is optional and the mapping relations can be inconsistent with * column's. It can be missing when the sampling algorithm is called via a * sliced sampled subgraph with missing seeds argument. */ torch::optional original_column_node_ids; /** * @brief Row's reverse node ids in the original graph. A graph structure * can be treated as a coordinated row and column pair, and this is the the * mapped ids of the row. * * @note This is optional and the mapping relations can be inconsistent with * row's. */ torch::optional original_row_node_ids; /** * @brief Type id of each edge, where type id is the corresponding index of * edge types. The length of it is equal to the number of edges in the * subgraph. * * @note This output is not created by the CUDA implementation as the edges * are sorted w.r.t edge types, one has to use etype_offsets to infer the edge * type information. This field is going to be deprecated. It can be generated * when needed by computing gb.expand_indptr(etype_offsets). */ torch::optional type_per_edge; /** * @brief Offsets of each etype, * type_per_edge[etype_offsets[i]: etype_offsets[i + 1]] == i * It has length equal to (1 + #etype), and the edges are guaranteed to be * sorted w.r.t. their edge types. */ torch::optional etype_offsets; }; } // namespace sampling } // namespace graphbolt #endif // GRAPHBOLT_FUSED_SAMPLED_SUBGRAPH_H_ ================================================ FILE: graphbolt/include/graphbolt/isin.h ================================================ /** * Copyright (c) 2023 by Contributors * * @file graphbolt/isin.h * @brief isin op. */ #ifndef GRAPHBOLT_ISIN_H_ #define GRAPHBOLT_ISIN_H_ #include #include namespace graphbolt { namespace sampling { /** * @brief Tests if each element of elements is in test_elements. Returns a * boolean tensor of the same shape as elements that is True for elements * in test_elements and False otherwise. Enhance torch.isin by implementing * multi-threaded searching, as detailed in the documentation at * https://pytorch.org/docs/stable/generated/torch.isin.html." * * @param elements Input elements * @param test_elements Values against which to test for each input element. * * @return * A boolean tensor of the same shape as elements that is True for elements * in test_elements and False otherwise. */ torch::Tensor IsIn( const torch::Tensor& elements, const torch::Tensor& test_elements); /** * @brief Tests if each element of elements is not in test_elements. Returns an * int64_t tensor of the same shape as elements containing the indexes of the * elements not found in test_elements. * * @param elements Input elements * @param test_elements Values against which to test for each input element. * * @return An int64_t tensor of the same shape as elements containing indexes of * elements not found in test_elements. */ torch::Tensor IsNotInIndex( const torch::Tensor& elements, const torch::Tensor& test_elements); c10::intrusive_ptr> IsNotInIndexAsync( const torch::Tensor& elements, const torch::Tensor& test_elements); } // namespace sampling } // namespace graphbolt #endif // GRAPHBOLT_ISIN_H_ ================================================ FILE: graphbolt/include/graphbolt/serialize.h ================================================ /** * Copyright (c) 2023 by Contributors * @file graphbolt/serialize.h * @brief Utility functions for serialize and deserialize. */ #ifndef GRAPHBOLT_SERIALIZE_H_ #define GRAPHBOLT_SERIALIZE_H_ #include #include #include #include /** * @brief Overload stream operator to enable `torch::save()` and `torch.load()` * for FusedCSCSamplingGraph. */ namespace torch { /** * @brief Overload input stream operator for FusedCSCSamplingGraph * deserialization. This enables `torch::load()` for FusedCSCSamplingGraph. * * @param archive Input stream for deserializing. * @param graph FusedCSCSamplingGraph. * * @return archive * * @code * auto&& graph = c10::make_intrusive(); * torch::load(*graph, filename); */ inline serialize::InputArchive& operator>>( serialize::InputArchive& archive, graphbolt::sampling::FusedCSCSamplingGraph& graph); /** * @brief Overload output stream operator for FusedCSCSamplingGraph * serialization. This enables `torch::save()` for FusedCSCSamplingGraph. * @param archive Output stream for serializing. * @param graph FusedCSCSamplingGraph. * * @return archive * * @code * auto&& graph = c10::make_intrusive(); * torch::save(*graph, filename); */ inline serialize::OutputArchive& operator<<( serialize::OutputArchive& archive, const graphbolt::sampling::FusedCSCSamplingGraph& graph); } // namespace torch namespace graphbolt { /** * @brief Read data from archive and format to specified type. * @param archive Input archive. * @param key Key name of data. * * @return data. */ template T read_from_archive( torch::serialize::InputArchive& archive, const std::string& key) { torch::IValue data; archive.read(key, data); return data.to(); } } // namespace graphbolt #endif // GRAPHBOLT_SERIALIZE_H_ ================================================ FILE: graphbolt/include/graphbolt/shared_memory.h ================================================ /** * Copyright (c) 2023 by Contributors * * @file graphbolt/shared_memory.h * @brief Header file of graphbolt shared memory. */ #ifndef GRAPHBOLT_SHARED_MEMORY_H_ #define GRAPHBOLT_SHARED_MEMORY_H_ #ifdef _WIN32 #include #endif // _WIN32 #include #include namespace graphbolt { namespace sampling { /** * @brief The SharedMemory is responsible for storing all the necessary * parameters of the buffer. Each SharedMemory instance is associated with a * shared memory object. The object will be removed when the associated * SharedMemory instance is destroyed. */ class SharedMemory { public: /** * @brief Constructor of the shared memory. * @param name The name of the shared memory. */ explicit SharedMemory(const std::string& name); SharedMemory(const SharedMemory&) = delete; SharedMemory& operator=(const SharedMemory&) = delete; /** * @brief The destructor is responsible for unmapping the shared memory and * removing the associated shared memory object. */ ~SharedMemory(); /** @brief Get the name of shared memory. */ std::string GetName() const { return name_; } /** @brief Get the pointer to the shared memory. */ void* GetMemory() const { return ptr_; } /** @brief Get the size of the shared memory. */ size_t GetSize() const { return size_; } /** * @brief Creates the shared memory object and map the shared memory. * * @param size The size of the shared memory. * @return The pointer to the shared memory. */ void* Create(size_t size); /** * @brief Open the created shared memory object and map the shared memory. * */ void* Open(); /** * @brief Check if the shared memory exists. * * @param name The name of the shared memory. * @return True if the shared memory exists, otherwise False. */ static bool Exists(const std::string& name); private: /** @brief The name of the shared memory. */ std::string name_; /** @brief The size of the shared memory. */ size_t size_; /** @brief The pointer of the shared memory. */ void* ptr_; #ifdef _WIN32 /** @brief The handle of the shared memory object. */ HANDLE handle_; #else // _WIN32 /** @brief The file descriptor of the shared memory object. */ int file_descriptor_; /** * @brief Whether the shared memory is created by the instance. * * The instance that creates the shared memory object is responsible for * unlinking the shared memory object. */ bool is_creator_; #endif // _WIN32 }; using SharedMemoryPtr = std::unique_ptr; } // namespace sampling } // namespace graphbolt #endif // GRAPHBOLT_SHARED_MEMORY_H_ ================================================ FILE: graphbolt/include/graphbolt/unique_and_compact.h ================================================ /** * Copyright (c) 2023 by Contributors * * @file unique_and_compact.h * @brief Unique and compact op. */ #ifndef GRAPHBOLT_UNIQUE_AND_COMPACT_H_ #define GRAPHBOLT_UNIQUE_AND_COMPACT_H_ #include #include namespace graphbolt { namespace sampling { /** * @brief Removes duplicate elements from the concatenated 'unique_dst_ids' and * 'src_ids' tensor and applies the uniqueness information to compact both * source and destination tensors. * * The function performs two main operations: * 1. Unique Operation: 'unique(concat(unique_dst_ids, src_ids))', in which * the unique operator will guarantee the 'unique_dst_ids' are at the head of * the result tensor. * 2. Compact Operation: Utilizes the reverse mapping derived from the unique * operation to transform 'src_ids' and 'dst_ids' into compacted IDs. * * When world_size is greater than 1, then the given ids are partitioned between * the available ranks. The ids corresponding to the given rank are guaranteed * to come before the ids of other ranks. To do this, the partition ids are * rotated backwards by the given rank so that the ids are ordered as: * [rank, rank + 1, world_size, 0, ..., rank - 1]. This is supported only for * Volta and later generation NVIDIA GPUs. * * @param src_ids A tensor containing source IDs. * @param dst_ids A tensor containing destination IDs. * @param unique_dst_ids A tensor containing unique destination IDs, which is * exactly all the unique elements in 'dst_ids'. * @param rank The rank of the current GPU. * @param world_size The total # GPUs, world size. * * @return (unique_ids, compacted_src_ids, compacted_dst_ids, unique_offsets) * - A tensor representing all unique elements in 'src_ids' and 'dst_ids' after * removing duplicates. The indices in this tensor precisely match the compacted * IDs of the corresponding elements. * - The tensor corresponding to the 'src_ids' tensor, where the entries are * mapped to compacted IDs. * - The tensor corresponding to the 'dst_ids' tensor, where the entries are * mapped to compacted IDs. * - The tensor corresponding to the offsets into the unique_ids tensor. Has * size `world_size + 1` and unique_ids[offsets[i]: offsets[i + 1]] belongs to * the rank `(rank + i) % world_size`. * * @example * torch::Tensor src_ids = src * torch::Tensor dst_ids = dst * torch::Tensor unique_dst_ids = torch::unique(dst); * auto result = UniqueAndCompact(src_ids, dst_ids, unique_dst_ids); * torch::Tensor unique_ids = std::get<0>(result); * torch::Tensor compacted_src_ids = std::get<1>(result); * torch::Tensor compacted_dst_ids = std::get<2>(result); */ std::tuple UniqueAndCompact( const torch::Tensor& src_ids, const torch::Tensor& dst_ids, const torch::Tensor unique_dst_ids, const int64_t rank, const int64_t world_size); std::vector< std::tuple> UniqueAndCompactBatched( const std::vector& src_ids, const std::vector& dst_ids, const std::vector unique_dst_ids, const int64_t rank, const int64_t world_size); c10::intrusive_ptr>>> UniqueAndCompactBatchedAsync( const std::vector& src_ids, const std::vector& dst_ids, const std::vector unique_dst_ids, const int64_t rank, const int64_t world_size); } // namespace sampling } // namespace graphbolt #endif // GRAPHBOLT_UNIQUE_AND_COMPACT_H_ ================================================ FILE: graphbolt/src/cache_policy.cc ================================================ /** * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file cache_policy.cc * @brief Cache policy implementation on the CPU. */ #include "./cache_policy.h" #include "./utils.h" namespace graphbolt { namespace storage { template std::tuple BaseCachePolicy::QueryImpl(CachePolicy& policy, torch::Tensor keys) { auto positions = torch::empty_like( keys, keys.options() .dtype(torch::kInt64) .pinned_memory(utils::is_pinned(keys))); auto indices = torch::empty_like( keys, keys.options() .dtype(torch::kInt64) .pinned_memory(utils::is_pinned(keys))); auto found_ptr_tensor = torch::empty_like( keys, keys.options() .dtype(torch::kInt64) .pinned_memory(utils::is_pinned(keys))); auto missing_keys = torch::empty_like( keys, keys.options().pinned_memory(utils::is_pinned(keys))); int64_t found_cnt = 0; int64_t missing_cnt = keys.size(0); AT_DISPATCH_INDEX_TYPES( keys.scalar_type(), "BaseCachePolicy::Query::DispatchForKeys", ([&] { auto keys_ptr = keys.data_ptr(); auto positions_ptr = positions.data_ptr(); auto indices_ptr = indices.data_ptr(); static_assert( sizeof(CacheKey*) == sizeof(int64_t), "You need 64 bit pointers."); auto found_ptr = reinterpret_cast(found_ptr_tensor.data_ptr()); auto missing_keys_ptr = missing_keys.data_ptr(); for (int64_t i = 0; i < keys.size(0); i++) { const auto key = keys_ptr[i]; auto cache_key_ptr = policy.Read(key); if (cache_key_ptr) { positions_ptr[found_cnt] = cache_key_ptr->getPos(); found_ptr[found_cnt] = cache_key_ptr; indices_ptr[found_cnt++] = i; } else { indices_ptr[--missing_cnt] = i; missing_keys_ptr[missing_cnt] = key; } } })); return { positions.slice(0, 0, found_cnt), indices, missing_keys.slice(0, found_cnt), found_ptr_tensor.slice(0, 0, found_cnt)}; } template std::tuple BaseCachePolicy::QueryAndReplaceImpl(CachePolicy& policy, torch::Tensor keys) { auto positions = torch::empty_like( keys, keys.options() .dtype(torch::kInt64) .pinned_memory(utils::is_pinned(keys))); auto indices = torch::empty_like( keys, keys.options() .dtype(torch::kInt64) .pinned_memory(utils::is_pinned(keys))); auto pointers = torch::empty_like(keys, keys.options().dtype(torch::kInt64)); auto missing_keys = torch::empty_like( keys, keys.options().pinned_memory(utils::is_pinned(keys))); int64_t found_cnt = 0; int64_t missing_cnt = keys.size(0); AT_DISPATCH_INDEX_TYPES( keys.scalar_type(), "BaseCachePolicy::Replace", ([&] { auto keys_ptr = keys.data_ptr(); auto positions_ptr = positions.data_ptr(); auto indices_ptr = indices.data_ptr(); static_assert( sizeof(CacheKey*) == sizeof(int64_t), "You need 64 bit pointers."); auto pointers_ptr = reinterpret_cast(pointers.data_ptr()); auto missing_keys_ptr = missing_keys.data_ptr(); set_t position_set; position_set.reserve(keys.size(0)); // Query and Replace combined. for (int64_t i = 0; i < keys.size(0); i++) { const auto key = keys_ptr[i]; const auto [it, can_read] = policy.Emplace(key); if (can_read) { auto& cache_key = *it->second; positions_ptr[found_cnt] = cache_key.getPos(); pointers_ptr[found_cnt] = &cache_key; indices_ptr[found_cnt++] = i; } else { indices_ptr[--missing_cnt] = i; missing_keys_ptr[missing_cnt] = key; // Ensure that even if an offset is added, it stays negative. auto position = std::numeric_limits::min(); CacheKey* cache_key_ptr = nullptr; if (it->second == policy.getMapSentinelValue()) { cache_key_ptr = policy.Insert(it); position = cache_key_ptr->getPos(); TORCH_CHECK( // We check for the uniqueness of the positions. std::get<1>(position_set.insert(position)), "Can't insert all, larger cache capacity is needed."); } positions_ptr[missing_cnt] = position; pointers_ptr[missing_cnt] = cache_key_ptr; } } })); return {positions, indices, pointers, missing_keys.slice(0, found_cnt)}; } template std::tuple BaseCachePolicy::ReplaceImpl( CachePolicy& policy, torch::Tensor keys) { auto positions = torch::empty_like( keys, keys.options() .dtype(torch::kInt64) .pinned_memory(utils::is_pinned(keys))); auto pointers = torch::empty_like( keys, keys.options() .dtype(torch::kInt64) .pinned_memory(utils::is_pinned(keys))); AT_DISPATCH_INDEX_TYPES( keys.scalar_type(), "BaseCachePolicy::Replace", ([&] { auto keys_ptr = keys.data_ptr(); auto positions_ptr = positions.data_ptr(); static_assert( sizeof(CacheKey*) == sizeof(int64_t), "You need 64 bit pointers."); auto pointers_ptr = reinterpret_cast(pointers.data_ptr()); set_t position_set; position_set.reserve(keys.size(0)); for (int64_t i = 0; i < keys.size(0); i++) { const auto key = keys_ptr[i]; // Ensure that even if an offset is added, it stays negative. auto position = std::numeric_limits::min(); CacheKey* cache_key_ptr = nullptr; const auto [it, _] = policy.Emplace(key); if (it->second == policy.getMapSentinelValue()) { cache_key_ptr = policy.Insert(it); position = cache_key_ptr->getPos(); TORCH_CHECK( // We check for the uniqueness of the positions. std::get<1>(position_set.insert(position)), "Can't insert all, larger cache capacity is needed."); } positions_ptr[i] = position; pointers_ptr[i] = cache_key_ptr; } })); return {positions, pointers}; } template void BaseCachePolicy::ReadingWritingCompletedImpl(torch::Tensor pointers) { static_assert( sizeof(CacheKey*) == sizeof(int64_t), "You need 64 bit pointers."); auto pointers_ptr = reinterpret_cast(pointers.data_ptr()); for (int64_t i = 0; i < pointers.size(0); i++) { const auto pointer = pointers_ptr[i]; if (!write || pointer) { pointer->EndUse(); } } } void BaseCachePolicy::ReadingCompleted(torch::Tensor pointers) { ReadingWritingCompletedImpl(pointers); } void BaseCachePolicy::WritingCompleted(torch::Tensor pointers) { ReadingWritingCompletedImpl(pointers); } S3FifoCachePolicy::S3FifoCachePolicy(int64_t capacity) : BaseCachePolicy(capacity), ghost_queue_(capacity - capacity / 10), small_queue_size_target_(capacity / 10), small_queue_size_(0) { TORCH_CHECK(small_queue_size_target_ > 0, "Capacity is not large enough."); ghost_set_.reserve(ghost_queue_.Capacity()); key_to_cache_key_.reserve(kCapacityFactor * (capacity + 1)); } std::tuple S3FifoCachePolicy::Query(torch::Tensor keys) { return QueryImpl(*this, keys); } std::tuple S3FifoCachePolicy::QueryAndReplace(torch::Tensor keys) { return QueryAndReplaceImpl(*this, keys); } std::tuple S3FifoCachePolicy::Replace( torch::Tensor keys) { return ReplaceImpl(*this, keys); } SieveCachePolicy::SieveCachePolicy(int64_t capacity) // Ensure that queue_ is constructed first before accessing its `.end()`. : BaseCachePolicy(capacity), queue_(), hand_(queue_.end()) { TORCH_CHECK(capacity > 0, "Capacity needs to be positive."); key_to_cache_key_.reserve(kCapacityFactor * (capacity + 1)); } std::tuple SieveCachePolicy::Query(torch::Tensor keys) { return QueryImpl(*this, keys); } std::tuple SieveCachePolicy::QueryAndReplace(torch::Tensor keys) { return QueryAndReplaceImpl(*this, keys); } std::tuple SieveCachePolicy::Replace( torch::Tensor keys) { return ReplaceImpl(*this, keys); } LruCachePolicy::LruCachePolicy(int64_t capacity) : BaseCachePolicy(capacity) { TORCH_CHECK(capacity > 0, "Capacity needs to be positive."); key_to_cache_key_.reserve(kCapacityFactor * (capacity + 1)); } std::tuple LruCachePolicy::Query(torch::Tensor keys) { return QueryImpl(*this, keys); } std::tuple LruCachePolicy::QueryAndReplace(torch::Tensor keys) { return QueryAndReplaceImpl(*this, keys); } std::tuple LruCachePolicy::Replace( torch::Tensor keys) { return ReplaceImpl(*this, keys); } ClockCachePolicy::ClockCachePolicy(int64_t capacity) : BaseCachePolicy(capacity) { TORCH_CHECK(capacity > 0, "Capacity needs to be positive."); key_to_cache_key_.reserve(kCapacityFactor * (capacity + 1)); } std::tuple ClockCachePolicy::Query(torch::Tensor keys) { return QueryImpl(*this, keys); } std::tuple ClockCachePolicy::QueryAndReplace(torch::Tensor keys) { return QueryAndReplaceImpl(*this, keys); } std::tuple ClockCachePolicy::Replace( torch::Tensor keys) { return ReplaceImpl(*this, keys); } } // namespace storage } // namespace graphbolt ================================================ FILE: graphbolt/src/cache_policy.h ================================================ /** * Copyright (c) 2024, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file cache_policy.h * @brief Cache policy implementation on the CPU. */ #ifndef GRAPHBOLT_CACHE_POLICY_H_ #define GRAPHBOLT_CACHE_POLICY_H_ #include #include #include #include #include #include #include "./circular_queue.h" namespace graphbolt { namespace storage { struct CacheKey { auto getKey() const { return (static_cast(key_higher_16_bits_) << 32) + key_lower_32_bits_; } CacheKey(int64_t key) : CacheKey(key, std::numeric_limits::min()) {} CacheKey(int64_t key, int64_t position) : freq_(0), // EndUse() should be called to reset the reference count. reference_count_(-1), key_higher_16_bits_(key >> 32), key_lower_32_bits_(key), position_in_cache_(position) { TORCH_CHECK(key == getKey()); static_assert(sizeof(CacheKey) == 2 * sizeof(int64_t)); } CacheKey() = default; auto getFreq() const { return freq_; } auto getPos() const { return position_in_cache_; } CacheKey& setPos(int64_t pos) { position_in_cache_ = pos; return *this; } CacheKey& Increment() { freq_ = std::min(3, static_cast(freq_ + 1)); return *this; } CacheKey& Decrement() { freq_ = std::max(0, static_cast(freq_ - 1)); return *this; } CacheKey& SetFreq() { freq_ = 1; return *this; } CacheKey& ResetFreq() { freq_ = 0; return *this; } CacheKey& StartRead() { ::cuda::std::atomic_ref ref(reference_count_); // StartRead runs concurrently only with EndUse. EndUse does not need to see // this modification at all. So we can use the relaxed memory order. const auto old_val = ref.fetch_add(1, ::cuda::std::memory_order_relaxed); TORCH_CHECK( old_val < std::numeric_limits::max(), "There are too many in-flight read requests to the same cache entry!"); return *this; } template CacheKey& EndUse() { ::cuda::std::atomic_ref ref(reference_count_); // The EndUse operation needs to synchronize with the InUse operation. So we // have an release-acquire ordering between the two. // https://en.cppreference.com/w/cpp/atomic/memory_order#Release-Acquire_ordering if constexpr (write) { ref.fetch_add(1, ::cuda::std::memory_order_release); } else { ref.fetch_add(-1, ::cuda::std::memory_order_release); } return *this; } bool InUse() const { ::cuda::std::atomic_ref ref(reference_count_); // The operations after a call to this function need to happen after the // load operation. Hence the acquire order. return ref.load(::cuda::std::memory_order_acquire); } bool BeingWritten() const { ::cuda::std::atomic_ref ref(reference_count_); // The only operation coming after this op is the StartRead operation. Since // StartRead is a refcount increment operation, it is fine if we don't // synchronize with EndUse ops. return ref.load(::cuda::std::memory_order_relaxed) < 0; } friend std::ostream& operator<<(std::ostream& os, const CacheKey& key_ref) { ::cuda::std::atomic_ref ref(key_ref.reference_count_); return os << '(' << key_ref.getKey() << ", " << key_ref.freq_ << ", " << key_ref.position_in_cache_ << ", " << ref.load() << ")"; } private: int8_t freq_; // Negative values indicate writing while positive values indicate reading. // Access only through an std::atomic_ref instance atomically. int8_t reference_count_; // Keys are restricted to be 48-bit unsigned integers. uint16_t key_higher_16_bits_; uint32_t key_lower_32_bits_; int64_t position_in_cache_; }; class BaseCachePolicy { public: BaseCachePolicy(int64_t capacity) : capacity_(capacity), cache_usage_(0) {} BaseCachePolicy() = default; /** * @brief A virtual base class constructor ensures that the derived class * destructor gets called. */ virtual ~BaseCachePolicy() = default; /** * @brief The policy query function. * @param keys The keys to query the cache. * * @return (positions, indices, missing_keys, found_ptrs), where positions has * the locations of the keys which were found in the cache, missing_keys has * the keys that were not found and indices is defined such that * keys[indices[:positions.size(0)]] gives us the keys for the found pointers * and keys[indices[positions.size(0):]] is identical to missing_keys. */ virtual std::tuple Query(torch::Tensor keys) = 0; /** * @brief The policy query function. * @param keys The keys to query the cache. * * @return (positions, indices, pointers, missing_keys), where positions has * the locations of the keys which were emplaced into the cache, pointers * point to the emplaced CacheKey pointers in the cache, missing_keys has the * keys that were not found and just inserted and indices is defined such that * keys[indices[:keys.size(0) - missing_keys.size(0)]] gives us the keys for * the found keys and keys[indices[keys.size(0) - missing_keys.size(0):]] is * identical to missing_keys. */ virtual std::tuple QueryAndReplace(torch::Tensor keys) = 0; /** * @brief The policy replace function. * @param keys The keys to query the cache. * * @return (positions, pointers), where positions has the locations of the * replaced entries and pointers point to their CacheKey pointers in the * cache. */ virtual std::tuple Replace( torch::Tensor keys) = 0; /** * @brief A reader has finished reading these keys, so they can be evicted. * @param pointers The CacheKey pointers in the cache to unmark. */ static void ReadingCompleted(torch::Tensor pointers); /** * @brief A writer has finished writing these keys, so they can be evicted. * @param pointers The CacheKey pointers in the cache to unmark. */ static void WritingCompleted(torch::Tensor pointers); protected: template using map_t = tsl::robin_map; template using set_t = tsl::robin_set; template static auto& mutable_value_ref(iterator it) { return it.value(); } static constexpr int kCapacityFactor = 2; template static std::tuple QueryImpl(CachePolicy& policy, torch::Tensor keys); template static std::tuple QueryAndReplaceImpl(CachePolicy& policy, torch::Tensor keys); template static std::tuple ReplaceImpl( CachePolicy& policy, torch::Tensor keys); template static void MoveToFront( std::list& from, std::list& to, typename std::list::iterator it) { std::list temp; // Transfer the element to temp to keep references valid. auto next_it = it; std::advance(next_it, 1); temp.splice(temp.begin(), from, it, next_it); // Move the element to the beginning of the queue. to.splice(to.begin(), temp); // The iterators and references are not invalidated. TORCH_INTERNAL_ASSERT_DEBUG_ONLY(it == to.begin()); } int64_t capacity_; int64_t cache_usage_; private: template static void ReadingWritingCompletedImpl(torch::Tensor pointers); }; /** * @brief S3FIFO is a simple, scalable FIFObased algorithm with three static * queues (S3-FIFO). https://dl.acm.org/doi/pdf/10.1145/3600006.3613147 **/ class S3FifoCachePolicy : public BaseCachePolicy { public: using map_iterator = map_t::iterator; /** * @brief Constructor for the S3FifoCachePolicy class. * * @param capacity The capacity of the cache in terms of # elements. */ S3FifoCachePolicy(int64_t capacity); S3FifoCachePolicy() = default; S3FifoCachePolicy(S3FifoCachePolicy&&) = default; virtual ~S3FifoCachePolicy() = default; /** * @brief See BaseCachePolicy::Query. */ std::tuple Query( torch::Tensor keys); /** * @brief See BaseCachePolicy::QueryAndReplace. */ std::tuple QueryAndReplace(torch::Tensor keys); /** * @brief See BaseCachePolicy::Replace. */ std::tuple Replace(torch::Tensor keys); CacheKey* Read(int64_t key) { auto it = key_to_cache_key_.find(key); if (it != key_to_cache_key_.end()) { auto& cache_key = it->second->Increment(); if (!cache_key.BeingWritten()) { return &cache_key.StartRead(); } } return nullptr; } auto getMapSentinelValue() const { return nullptr; } std::pair Emplace(int64_t key) { auto [it, inserted] = key_to_cache_key_.emplace(key, getMapSentinelValue()); bool readable = false; if (!inserted) { auto& cache_key = it->second->Increment(); if (!cache_key.BeingWritten()) { cache_key.StartRead(); readable = true; } } return {it, readable}; } CacheKey* Insert(map_iterator it) { const auto key = it->first; const auto in_ghost_queue = ghost_set_.erase(key); auto& queue = in_ghost_queue ? main_queue_ : small_queue_; queue.push_front(CacheKey(key)); small_queue_size_ += 1 - in_ghost_queue; auto cache_key_ptr = &queue.front(); mutable_value_ref(it) = cache_key_ptr; return &cache_key_ptr->setPos(Evict()); } private: int64_t EvictMainQueue() { while (true) { auto& evicted = main_queue_.back(); if (evicted.getFreq() > 0 || evicted.InUse()) { evicted.Decrement(); auto it = main_queue_.end(); std::advance(it, -1); MoveToFront(main_queue_, main_queue_, it); } else { key_to_cache_key_.erase(evicted.getKey()); const auto evicted_pos = evicted.getPos(); main_queue_.pop_back(); return evicted_pos; } } } int64_t EvictSmallQueue() { while (small_queue_size_ > small_queue_size_target_) { --small_queue_size_; auto& evicted = small_queue_.back(); if (evicted.getFreq() > 0 || evicted.InUse()) { evicted.ResetFreq(); auto it = small_queue_.end(); std::advance(it, -1); MoveToFront(small_queue_, main_queue_, it); } else { const auto evicted_key = evicted.getKey(); key_to_cache_key_.erase(evicted_key); const auto evicted_pos = evicted.getPos(); small_queue_.pop_back(); if (ghost_queue_.IsFull()) { ghost_set_.erase(ghost_queue_.Pop()); } ghost_set_.insert(evicted_key); ghost_queue_.Push(evicted_key); return evicted_pos; } } return -1; } int64_t Evict() { // If the cache has space, get an unused slot otherwise perform eviction. if (cache_usage_ < capacity_) return cache_usage_++; const auto pos = EvictSmallQueue(); return pos >= 0 ? pos : EvictMainQueue(); } std::list small_queue_, main_queue_; CircularQueue ghost_queue_; size_t small_queue_size_target_; // std::list<>::size() is O(N) before the CXX11 ABI which torch enforces. size_t small_queue_size_; set_t ghost_set_; map_t key_to_cache_key_; }; /** * @brief SIEVE is a simple, scalable FIFObased algorithm with a single static * queue. https://www.usenix.org/system/files/nsdi24-zhang-yazhuo.pdf **/ class SieveCachePolicy : public BaseCachePolicy { public: using map_iterator = map_t::iterator; /** * @brief Constructor for the SieveCachePolicy class. * * @param capacity The capacity of the cache in terms of # elements. */ SieveCachePolicy(int64_t capacity); SieveCachePolicy() = default; virtual ~SieveCachePolicy() = default; /** * @brief See BaseCachePolicy::Query. */ std::tuple Query( torch::Tensor keys); /** * @brief See BaseCachePolicy::QueryAndReplace. */ std::tuple QueryAndReplace(torch::Tensor keys); /** * @brief See BaseCachePolicy::Replace. */ std::tuple Replace(torch::Tensor keys); CacheKey* Read(int64_t key) { auto it = key_to_cache_key_.find(key); if (it != key_to_cache_key_.end()) { auto& cache_key = it->second->SetFreq(); if (!cache_key.BeingWritten()) { return &cache_key.StartRead(); } } return nullptr; } auto getMapSentinelValue() const { return nullptr; } std::pair Emplace(int64_t key) { auto [it, inserted] = key_to_cache_key_.emplace(key, getMapSentinelValue()); bool readable = false; if (!inserted) { auto& cache_key = it->second->SetFreq(); if (!cache_key.BeingWritten()) { cache_key.StartRead(); readable = true; } } return {it, readable}; } CacheKey* Insert(map_iterator it) { const auto key = it->first; queue_.push_front(CacheKey(key)); auto cache_key_ptr = &queue_.front(); mutable_value_ref(it) = cache_key_ptr; return &cache_key_ptr->setPos(Evict()); } private: int64_t Evict() { // If the cache has space, get an unused slot otherwise perform eviction. if (cache_usage_ < capacity_) return cache_usage_++; --hand_; while (hand_->getFreq() || hand_->InUse()) { hand_->ResetFreq(); if (hand_ == queue_.begin()) hand_ = queue_.end(); --hand_; } key_to_cache_key_.erase(hand_->getKey()); const auto pos = hand_->getPos(); const auto temp = hand_; if (hand_ == queue_.begin()) { hand_ = queue_.end(); } else { ++hand_; } queue_.erase(temp); return pos; } std::list queue_; decltype(queue_)::iterator hand_; map_t key_to_cache_key_; }; /** * @brief LeastRecentlyUsed is a simple, scalable FIFObased algorithm with a * single static queue. **/ class LruCachePolicy : public BaseCachePolicy { public: using map_iterator = map_t::iterator>::iterator; /** * @brief Constructor for the LruCachePolicy class. * * @param capacity The capacity of the cache in terms of # elements. */ LruCachePolicy(int64_t capacity); LruCachePolicy() = default; virtual ~LruCachePolicy() = default; /** * @brief See BaseCachePolicy::Query. */ std::tuple Query( torch::Tensor keys); /** * @brief See BaseCachePolicy::QueryAndReplace. */ std::tuple QueryAndReplace(torch::Tensor keys); /** * @brief See BaseCachePolicy::Replace. */ std::tuple Replace(torch::Tensor keys); CacheKey* Read(int64_t key) { auto it = key_to_cache_key_.find(key); if (it != key_to_cache_key_.end()) { auto& cache_key = *it->second; MoveToFront(queue_, queue_, it->second); if (!cache_key.BeingWritten()) { return &cache_key.StartRead(); } } return nullptr; } auto getMapSentinelValue() { return queue_.end(); } std::pair Emplace(int64_t key) { auto [it, inserted] = key_to_cache_key_.emplace(key, getMapSentinelValue()); bool readable = false; if (!inserted) { auto& cache_key = *it->second; MoveToFront(queue_, queue_, it->second); if (!cache_key.BeingWritten()) { cache_key.StartRead(); readable = true; } } return {it, readable}; } CacheKey* Insert(map_iterator it) { const auto key = it->first; queue_.push_front(CacheKey(key)); mutable_value_ref(it) = queue_.begin(); auto cache_key_ptr = &queue_.front(); return &cache_key_ptr->setPos(Evict()); } private: int64_t Evict() { // If the cache has space, get an unused slot otherwise perform eviction. if (cache_usage_ < capacity_) return cache_usage_++; // Do not evict items that are still in use. while (queue_.back().InUse()) { auto it = queue_.end(); std::advance(it, -1); // Move the last element to the front without invalidating references. MoveToFront(queue_, queue_, it); } const auto& cache_key = queue_.back(); key_to_cache_key_.erase(cache_key.getKey()); const auto pos = cache_key.getPos(); queue_.pop_back(); return pos; } std::list queue_; map_t key_to_cache_key_; }; /** * @brief Clock (FIFO-Reinsertion) is a simple, scalable FIFObased algorithm * with a single static queue. * https://people.csail.mit.edu/saltzer/Multics/MHP-Saltzer-060508/bookcases/M00s/M0104%20074-12).PDF **/ class ClockCachePolicy : public BaseCachePolicy { public: using map_iterator = map_t::iterator; /** * @brief Constructor for the ClockCachePolicy class. * * @param capacity The capacity of the cache in terms of # elements. */ ClockCachePolicy(int64_t capacity); ClockCachePolicy() = default; ClockCachePolicy(ClockCachePolicy&&) = default; virtual ~ClockCachePolicy() = default; /** * @brief See BaseCachePolicy::Query. */ std::tuple Query( torch::Tensor keys); /** * @brief See BaseCachePolicy::QueryAndReplace. */ std::tuple QueryAndReplace(torch::Tensor keys); /** * @brief See BaseCachePolicy::Replace. */ std::tuple Replace(torch::Tensor keys); CacheKey* Read(int64_t key) { auto it = key_to_cache_key_.find(key); if (it != key_to_cache_key_.end()) { auto& cache_key = it->second->SetFreq(); if (!cache_key.BeingWritten()) { return &cache_key.StartRead(); } } return nullptr; } auto getMapSentinelValue() const { return nullptr; } std::pair Emplace(int64_t key) { auto [it, inserted] = key_to_cache_key_.emplace(key, getMapSentinelValue()); bool readable = false; if (!inserted) { auto& cache_key = it->second->SetFreq(); if (!cache_key.BeingWritten()) { cache_key.StartRead(); readable = true; } } return {it, readable}; } CacheKey* Insert(map_iterator it) { const auto key = it->first; queue_.push_front(CacheKey(key)); auto cache_key_ptr = &queue_.front(); mutable_value_ref(it) = cache_key_ptr; return &cache_key_ptr->setPos(Evict()); } private: int64_t Evict() { // If the cache has space, get an unused slot otherwise perform eviction. if (cache_usage_ < capacity_) return cache_usage_++; while (true) { auto& cache_key = queue_.back(); if (cache_key.getFreq() || cache_key.InUse()) { cache_key.ResetFreq(); auto it = queue_.end(); std::advance(it, -1); MoveToFront(queue_, queue_, it); } else { key_to_cache_key_.erase(cache_key.getKey()); const auto evicted_pos = cache_key.getPos(); queue_.pop_back(); return evicted_pos; } } } std::list queue_; map_t key_to_cache_key_; }; } // namespace storage } // namespace graphbolt #endif // GRAPHBOLT_CACHE_POLICY_H_ ================================================ FILE: graphbolt/src/circular_queue.h ================================================ /** * Copyright (c) 2024, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file circular_queue.h * @brief Circular queue implementation. */ #ifndef GRAPHBOLT_CIRCULAR_QUEUE_H_ #define GRAPHBOLT_CIRCULAR_QUEUE_H_ #include namespace graphbolt { template struct CircularQueue { CircularQueue(const int64_t capacity) : tail_(0), head_(0), // + 1 is needed to be able to differentiate empty and full states. capacity_(capacity + 1), data_{new T[capacity + 1]} {} CircularQueue() = default; T* Push(const T& x) { auto insert_ptr = &data_[PostIncrement(tail_)]; *insert_ptr = x; return insert_ptr; } T Pop() { return data_[PostIncrement(head_)]; } void PopN(int64_t N) { head_ += N; if (head_ >= capacity_) head_ -= capacity_; } auto Clear() { head_ = tail_; } T& Front() const { return data_[head_]; } bool IsFull() const { const auto diff = tail_ + 1 - head_; return diff == 0 || diff == capacity_; } auto Size() const { auto diff = tail_ - head_; if (diff < 0) diff += capacity_; return diff; } friend std::ostream& operator<<( std::ostream& os, const CircularQueue& queue) { for (auto i = queue.head_; i != queue.tail_; queue.PostIncrement(i)) { os << queue.data_[i] << ", "; } return os << "\n"; } bool IsEmpty() const { return tail_ == head_; } auto Capacity() const { return capacity_ - 1; } private: int64_t PostIncrement(int64_t& i) const { const auto ret = i++; if (i >= capacity_) i -= capacity_; return ret; } int64_t tail_; int64_t head_; int64_t capacity_; std::unique_ptr data_; }; } // namespace graphbolt #endif // GRAPHBOLT_CIRCULAR_QUEUE_H_ ================================================ FILE: graphbolt/src/cnumpy.cc ================================================ /** * Copyright (c) 2024, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * Copyright (c) 2023 by Contributors * @file cnumpy.cc * @brief Numpy File Fetecher class. */ #include "./cnumpy.h" #include "./io_uring.h" #ifdef HAVE_LIBRARY_LIBURING #include #include #include #endif #include #include #include #include #include #include #include #include #include #include "./circular_queue.h" #include "./utils.h" namespace graphbolt { namespace storage { OnDiskNpyArray::OnDiskNpyArray( std::string filename, torch::ScalarType dtype, const std::vector &shape, torch::optional num_threads) : filename_(filename), feature_dim_(shape), dtype_(dtype), feature_size_(std::accumulate( shape.begin() + 1, shape.end(), c10::elementSize(dtype), std::multiplies())) { #ifndef __linux__ throw std::runtime_error( "OnDiskNpyArray is not supported on non-Linux systems."); #endif #ifdef HAVE_LIBRARY_LIBURING ParseNumpyHeader(); file_description_ = ::open(filename.c_str(), O_RDONLY | O_DIRECT); if (file_description_ < 0) { throw std::runtime_error("npy_load: Unable to open file " + filename); } struct stat st; TORCH_CHECK(::fstat(file_description_, &st) == 0); const auto file_size = st.st_size; block_size_ = st.st_blksize; TORCH_CHECK(file_size - prefix_len_ >= feature_dim_[0] * feature_size_); // The minimum page size to contain one feature. aligned_length_ = (feature_size_ + block_size_ - 1) & ~(block_size_ - 1); std::call_once(call_once_flag_, [&] { // Get system max interop thread count. num_queues_ = io_uring::num_threads.value_or(torch::get_num_interop_threads()); TORCH_CHECK(num_queues_ > 0, "A positive # queues is required."); io_uring_queue_ = std::unique_ptr<::io_uring[], io_uring_queue_destroyer>( new ::io_uring[num_queues_], io_uring_queue_destroyer{num_queues_}); TORCH_CHECK(num_queues_ <= counting_semaphore_t::max()); semaphore_.release(num_queues_); available_queues_.reserve(num_queues_); // Init io_uring queue. for (int64_t t = 0; t < num_queues_; t++) { available_queues_.push_back(t); TORCH_CHECK( ::io_uring_queue_init(2 * kGroupSize, &io_uring_queue_[t], 0) == 0); // We have allocated 2 * kGroupSize submission queue entries and // 4 * kGroupSize completion queue entries after this call. } }); num_thread_ = std::min( static_cast(num_queues_), num_threads.value_or(num_queues_)); TORCH_CHECK(num_thread_ > 0, "A positive # threads is required."); // We allocate buffers for each existing queue because we might get assigned // any queue in range [0, num_queues_). read_tensor_ = torch::empty( ReadBufferSizePerThread() * num_queues_ + block_size_ - 1, torch::TensorOptions().dtype(torch::kInt8).device(torch::kCPU)); #else throw std::runtime_error("DiskBasedFeature is not available now."); #endif // HAVE_LIBRARY_LIBURING } c10::intrusive_ptr OnDiskNpyArray::Create( std::string path, torch::ScalarType dtype, const std::vector &shape, torch::optional num_threads) { return c10::make_intrusive(path, dtype, shape, num_threads); } OnDiskNpyArray::~OnDiskNpyArray() { #ifdef HAVE_LIBRARY_LIBURING TORCH_CHECK(::close(file_description_) == 0); #endif // HAVE_LIBRARY_LIBURING } void OnDiskNpyArray::ParseNumpyHeader() { // Parse numpy file header to get basic info of feature. // Get file prefix length. std::ifstream file(filename_); if (!file.is_open()) { throw std::runtime_error( "ParseNumpyHeader: Unable to open file " + filename_); } std::string header; std::getline(file, header); // Get prefix length for computing feature offset, // add one for new-line character. prefix_len_ = header.size() + 1; } c10::intrusive_ptr> OnDiskNpyArray::IndexSelect( torch::Tensor index) { #ifdef HAVE_LIBRARY_LIBURING return IndexSelectIOUring(index); #else TORCH_CHECK(false, "OnDiskNpyArray is not supported on non-Linux systems."); return {}; #endif // HAVE_LIBRARY_LIBURING } class ReadRequest { public: char *destination_; int64_t read_len_; int64_t offset_; int64_t block_size_; char *aligned_read_buffer_; auto AlignedOffset() const { return offset_ & ~(block_size_ - 1); } auto ReadBuffer() const { return aligned_read_buffer_ + offset_ - AlignedOffset(); } auto AlignedReadSize() const { const int64_t end_offset = offset_ + read_len_; const int64_t aligned_end_offset = (end_offset + block_size_ - 1) & ~(block_size_ - 1); return aligned_end_offset - AlignedOffset(); } auto MinimumReadSize() const { return offset_ + read_len_ - AlignedOffset(); } }; #ifdef HAVE_LIBRARY_LIBURING torch::Tensor OnDiskNpyArray::IndexSelectIOUringImpl(torch::Tensor index) { std::vector shape(index.sizes().begin(), index.sizes().end()); shape.insert(shape.end(), feature_dim_.begin() + 1, feature_dim_.end()); auto result = torch::empty( shape, index.options() .dtype(dtype_) .layout(torch::kStrided) .pinned_memory(utils::is_pinned(index)) .requires_grad(false)); auto result_buffer = reinterpret_cast(result.data_ptr()); // Indicator for index error. std::atomic error_flag{}; std::atomic work_queue{}; // Construct a QueueAndBufferAcquirer object so that the worker threads can // share the available queues and buffers. QueueAndBufferAcquirer queue_source(this); graphbolt::parallel_for_each_interop(0, num_thread_, 1, [&](int) { // The completion queue might contain 4 * kGroupSize while we may submit // 4 * kGroupSize more. No harm in overallocation here. CircularQueue read_queue(8 * kGroupSize); int64_t num_submitted = 0; int64_t num_completed = 0; auto [acquired_queue_handle, read_buffer_source2] = queue_source.get(); auto &io_uring_queue = acquired_queue_handle.get(); // Capturing structured binding is available only in C++20, so we rename. auto read_buffer_source = read_buffer_source2; auto submit_fn = [&](int64_t submission_minimum_batch_size) { if (read_queue.Size() < submission_minimum_batch_size) return; TORCH_CHECK( // Check for sqe overflow. read_queue.Size() <= 2 * kGroupSize); TORCH_CHECK( // Check for cqe overflow. read_queue.Size() + num_submitted - num_completed <= 4 * kGroupSize); // Submit and wait for the reads. while (!read_queue.IsEmpty()) { const auto submitted = ::io_uring_submit(&io_uring_queue); TORCH_CHECK(submitted >= 0); num_submitted += submitted; // Pop the submitted entries from the queue. read_queue.PopN(submitted); } }; for (int64_t read_buffer_slot = 0; true;) { auto request_read_buffer = [&]() { return read_buffer_source + (aligned_length_ + block_size_) * (read_buffer_slot++ % (8 * kGroupSize)); }; const auto num_requested_items = std::max( std::min( // The condition not to overflow the completion queue. 2 * kGroupSize - (read_queue.Size() + num_submitted - num_completed), // The condition not to overflow the submission queue. kGroupSize - read_queue.Size()), int64_t{}); const auto begin = work_queue.fetch_add(num_requested_items, std::memory_order_relaxed); if ((begin >= index.numel() && read_queue.IsEmpty() && num_completed >= num_submitted) || // Even when we encounter out of bounds index (error_flag == 1), we // continue. We want to ensure the reads in flight successfully // complete to avoid the instability due to incompleted reads. error_flag.load(std::memory_order_relaxed) > 1) break; const auto end = std::min(begin + num_requested_items, index.numel()); AT_DISPATCH_INDEX_TYPES( index.scalar_type(), "IndexSelectIOUring", ([&] { auto index_data = index.data_ptr(); for (int64_t i = begin; i < end; ++i) { int64_t feature_id = index_data[i]; if (feature_id < 0) feature_id += feature_dim_[0]; if (feature_id < 0 || feature_id >= feature_dim_[0]) { error_flag.store(1, std::memory_order_relaxed); // Simply skip the out of bounds index. continue; } // calculate offset of the feature. const int64_t offset = feature_id * feature_size_ + prefix_len_; ReadRequest req{ result_buffer + feature_size_ * i, feature_size_, offset, block_size_, request_read_buffer()}; // Put requests into io_uring queue. struct io_uring_sqe *sqe = io_uring_get_sqe(&io_uring_queue); TORCH_CHECK(sqe); io_uring_sqe_set_data(sqe, read_queue.Push(req)); io_uring_prep_read( sqe, file_description_, req.aligned_read_buffer_, req.AlignedReadSize(), req.AlignedOffset()); submit_fn(kGroupSize); } })); submit_fn(1); // Submit all sqes. // Wait for the reads; completion queue entries. struct io_uring_cqe *cqe; TORCH_CHECK(num_submitted - num_completed <= 2 * kGroupSize); TORCH_CHECK( ::io_uring_wait_cqe_nr( &io_uring_queue, &cqe, num_submitted - num_completed) == 0); // Check the reads and abort on failure. int num_cqes_seen = 0; unsigned head; io_uring_for_each_cqe(&io_uring_queue, head, cqe) { const auto &req = *reinterpret_cast(io_uring_cqe_get_data(cqe)); auto actual_read_len = cqe->res; if (actual_read_len < 0) { error_flag.store(actual_read_len, std::memory_order_relaxed); break; } const auto remaining_read_len = std::max(req.MinimumReadSize() - actual_read_len, int64_t{}); const auto remaining_useful_read_len = std::min(remaining_read_len, req.read_len_); const auto useful_read_len = req.read_len_ - remaining_useful_read_len; if (remaining_read_len) { // Remaining portion will be read as part of the next batch. ReadRequest rest{ req.destination_ + useful_read_len, remaining_useful_read_len, req.offset_ + useful_read_len, block_size_, request_read_buffer()}; // Put requests into io_uring queue. struct io_uring_sqe *sqe = io_uring_get_sqe(&io_uring_queue); TORCH_CHECK(sqe); io_uring_sqe_set_data(sqe, read_queue.Push(rest)); io_uring_prep_read( sqe, file_description_, rest.aligned_read_buffer_, rest.AlignedReadSize(), rest.AlignedOffset()); submit_fn(kGroupSize); } // Copy results into result_buffer. std::memcpy(req.destination_, req.ReadBuffer(), useful_read_len); num_cqes_seen++; } // Move the head pointer of completion queue. io_uring_cq_advance(&io_uring_queue, num_cqes_seen); num_completed += num_cqes_seen; } }); const auto ret_val = error_flag.load(std::memory_order_relaxed); switch (ret_val) { case 0: // Successful. return result; case 1: throw std::out_of_range("IndexError: Index out of range."); default: throw std::runtime_error( "io_uring error with errno: " + std::to_string(-ret_val)); } } c10::intrusive_ptr> OnDiskNpyArray::IndexSelectIOUring( torch::Tensor index) { return async([=, this] { return IndexSelectIOUringImpl(index); }); } #endif // HAVE_LIBRARY_LIBURING } // namespace storage } // namespace graphbolt ================================================ FILE: graphbolt/src/cnumpy.h ================================================ /** * Copyright (c) 2024, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * Copyright (c) 2023 by Contributors * @file cnumpy.h * @brief Numpy File Fetecher class. */ #ifdef HAVE_LIBRARY_LIBURING #include #endif // HAVE_LIBRARY_LIBURING #include #include #include #include #include #include #include #include #include #include #include #include #include namespace graphbolt { namespace storage { namespace { #ifdef HAVE_LIBRARY_LIBURING struct io_uring_queue_destroyer { int num_thread_; void operator()(::io_uring* queues) { if (!queues) return; for (int t = 0; t < num_thread_; t++) { // IO queue exit. ::io_uring_queue_exit(&queues[t]); } delete[] queues; } }; #endif // HAVE_LIBRARY_LIBURING } // namespace /** * @brief Disk Numpy Fetecher class. */ class OnDiskNpyArray : public torch::CustomClassHolder { // No user will need more than 1024 io_uring queues. using counting_semaphore_t = ::cuda::std::counting_semaphore<1024>; public: static constexpr int kGroupSize = 256; /** @brief Default constructor. */ OnDiskNpyArray() = default; /** * @brief Constructor with given file path and data type. * @param path Path to the on disk numpy file. * @param dtype Data type of numpy array. * * @return OnDiskNpyArray */ OnDiskNpyArray( std::string filename, torch::ScalarType dtype, const std::vector& shape, torch::optional num_threads); /** @brief Create a disk feature fetcher from numpy file. */ static c10::intrusive_ptr Create( std::string path, torch::ScalarType dtype, const std::vector& shape, torch::optional num_threads); /** @brief Deconstructor. */ ~OnDiskNpyArray(); /** * @brief Parses the header of a numpy file to extract feature information. **/ void ParseNumpyHeader(); /** * @brief Read disk numpy file based on given index and transform to * tensor. */ c10::intrusive_ptr> IndexSelect(torch::Tensor index); #ifdef HAVE_LIBRARY_LIBURING /** * @brief Index-select operation on an on-disk numpy array using IO Uring for * asynchronous I/O. * * This function performs index-select operation on an on-disk numpy array. It * uses IO Uring for asynchronous I/O to efficiently read data from disk. The * input tensor 'index' specifies the indices of features to select. The * function reads features corresponding to the indices from the disk and * returns a new tensor containing the selected features. * * @param index A 1D tensor containing the indices of features to select. * @return A tensor containing the selected features. * @throws std::runtime_error If index is out of range. */ c10::intrusive_ptr> IndexSelectIOUring( torch::Tensor index); torch::Tensor IndexSelectIOUringImpl(torch::Tensor index); #endif // HAVE_LIBRARY_LIBURING private: int64_t ReadBufferSizePerThread() const { return (aligned_length_ + block_size_) * kGroupSize * 8; } char* ReadBuffer(int thread_id) const { auto read_buffer_void_ptr = read_tensor_.data_ptr(); size_t read_buffer_size = read_tensor_.numel(); auto read_buffer = reinterpret_cast(std::align( block_size_, ReadBufferSizePerThread() * num_thread_, read_buffer_void_ptr, read_buffer_size)); TORCH_CHECK(read_buffer, "read_buffer allocation failed!"); return read_buffer + ReadBufferSizePerThread() * thread_id; } const std::string filename_; // Path to numpy file. int file_description_; // File description. int64_t block_size_; // Block size of the opened file. int64_t prefix_len_; // Length of head data in numpy file. const std::vector feature_dim_; // Shape of features, e.g. {N,M,K,L}. const torch::ScalarType dtype_; // Feature data type. const int64_t feature_size_; // Number of bytes of feature size. int64_t aligned_length_; // Aligned feature_size. int num_thread_; // Default thread number. torch::Tensor read_tensor_; // Provides temporary read buffer. #ifdef HAVE_LIBRARY_LIBURING static inline std::once_flag call_once_flag_; // Protect initialization of below. static inline int num_queues_; // Number of queues. static inline std::unique_ptr<::io_uring[], io_uring_queue_destroyer> io_uring_queue_; // io_uring queue. static inline counting_semaphore_t semaphore_{ 0}; // Control access to the io_uring queues. static inline std::mutex available_queues_mtx_; // available_queues_ mutex. static inline std::vector available_queues_; /** * @brief This class is meant to distribute the available read buffers and the * statically declared io_uring queues among the worker threads. */ class QueueAndBufferAcquirer { public: class UniqueQueue { public: UniqueQueue(int thread_id) : thread_id_(thread_id) {} UniqueQueue(const UniqueQueue&) = delete; UniqueQueue& operator=(const UniqueQueue&) = delete; /** * @brief Returns the queue back to the pool. */ ~UniqueQueue() { { // We give back the slot we used. std::lock_guard lock(available_queues_mtx_); available_queues_.push_back(thread_id_); } semaphore_.release(); } /** * @brief Returns the raw io_uring queue. */ ::io_uring& get() const { return io_uring_queue_[thread_id_]; } private: int thread_id_; }; QueueAndBufferAcquirer(OnDiskNpyArray* array) : array_(array) { semaphore_.acquire(); } ~QueueAndBufferAcquirer() { // If none of the worker threads acquire the semaphore, we make sure to // release the ticket taken in the constructor. if (!entering_first_.test_and_set(std::memory_order_relaxed)) { semaphore_.release(); } } /** * @brief Returns the secured io_uring queue and the read buffer as a pair. * The raw io_uring queue can be accessed by calling `.get()` on the * returned UniqueQueue object. * * @note The returned UniqueQueue object manages the lifetime of the * io_uring queue. Its destructor returns the queue back to the pool. */ std::pair get() { // We consume a slot from the semaphore to use a queue. if (entering_first_.test_and_set(std::memory_order_relaxed)) { semaphore_.acquire(); } const auto thread_id = [&] { std::lock_guard lock(available_queues_mtx_); TORCH_CHECK(!available_queues_.empty()); const auto thread_id = available_queues_.back(); available_queues_.pop_back(); return thread_id; }(); return { std::piecewise_construct, std::make_tuple(thread_id), std::make_tuple(array_->ReadBuffer(thread_id))}; } private: const OnDiskNpyArray* array_; std::atomic_flag entering_first_ = ATOMIC_FLAG_INIT; }; #endif // HAVE_LIBRARY_LIBURING }; } // namespace storage } // namespace graphbolt ================================================ FILE: graphbolt/src/concurrent_id_hash_map.cc ================================================ /** * Copyright (c) 2023 by Contributors * @file concurrent_id_hash_map.cc * @brief Class about id hash map. */ #include "concurrent_id_hash_map.h" #ifdef _MSC_VER #include #endif // _MSC_VER #include #include #include namespace { static constexpr int64_t kEmptyKey = -1; static constexpr int kGrainSize = 256; // The formula is established from experience which is used to get the hashmap // size from the input array size. inline size_t GetMapSize(size_t num) { size_t capacity = 1; return capacity << static_cast(1 + std::log2(num * 3)); } } // namespace namespace graphbolt { namespace sampling { template ConcurrentIdHashMap::ConcurrentIdHashMap( const torch::Tensor& ids, size_t num_seeds) { const IdType* ids_data = ids.data_ptr(); const size_t num_ids = static_cast(ids.size(0)); size_t capacity = GetMapSize(num_ids); mask_ = static_cast(capacity - 1); hash_map_ = torch::full({static_cast(capacity * 2)}, -1, ids.options()); // This code block is to fill the ids into hash_map_. unique_ids_ = torch::empty_like(ids); IdType* unique_ids_data = unique_ids_.data_ptr(); // Insert all ids into the hash map. torch::parallel_for(0, num_ids, kGrainSize, [&](int64_t s, int64_t e) { for (int64_t i = s; i < e; i++) { InsertAndSetMin(ids_data[i], static_cast(i)); } }); // Place the first `num_seeds` ids. unique_ids_.slice(0, 0, num_seeds) = ids.slice(0, 0, num_seeds); auto valid_tensor = torch::empty(num_ids, ids.options().dtype(torch::kInt8)); auto valid = valid_tensor.data_ptr(); const int64_t num_threads = torch::get_num_threads(); std::vector block_offset(num_threads + 1, 0); // Count the valid numbers in each thread. torch::parallel_for( num_seeds, num_ids, kGrainSize, [&](int64_t s, int64_t e) { size_t count = 0; for (int64_t i = s; i < e; i++) { if (MapId(ids_data[i]) == i) { count++; valid[i] = 1; } else { valid[i] = 0; } } auto thread_id = torch::get_thread_num(); block_offset[thread_id + 1] = count; }); // Get ExclusiveSum of each block. std::partial_sum( block_offset.begin() + 1, block_offset.end(), block_offset.begin() + 1); unique_ids_ = unique_ids_.slice(0, 0, num_seeds + block_offset.back()); // Get unique array from ids and set value for hash map. torch::parallel_for( num_seeds, num_ids, kGrainSize, [&](int64_t s, int64_t e) { auto thread_id = torch::get_thread_num(); auto pos = block_offset[thread_id] + num_seeds; for (int64_t i = s; i < e; i++) { if (valid[i]) { unique_ids_data[pos] = ids_data[i]; Set(ids_data[i], pos); pos = pos + 1; } } }); } template torch::Tensor ConcurrentIdHashMap::MapIds( const torch::Tensor& ids) const { const IdType* ids_data = ids.data_ptr(); torch::Tensor new_ids = torch::empty_like(ids); auto num_ids = new_ids.size(0); IdType* values_data = new_ids.data_ptr(); torch::parallel_for(0, num_ids, kGrainSize, [&](int64_t s, int64_t e) { for (int64_t i = s; i < e; i++) { values_data[i] = MapId(ids_data[i]); } }); return new_ids; } template constexpr IdType getKeyIndex(IdType pos) { return 2 * pos; } template constexpr IdType getValueIndex(IdType pos) { return 2 * pos + 1; } template inline void ConcurrentIdHashMap::Next( IdType* pos, IdType* delta) const { // Use Quadric probing. *pos = (*pos + (*delta) * (*delta)) & mask_; *delta = *delta + 1; } template inline IdType ConcurrentIdHashMap::MapId(IdType id) const { IdType pos = (id & mask_), delta = 1; IdType empty_key = static_cast(kEmptyKey); IdType* hash_map_data = hash_map_.data_ptr(); IdType key = hash_map_data[getKeyIndex(pos)]; while (key != empty_key && key != id) { Next(&pos, &delta); key = hash_map_data[getKeyIndex(pos)]; } if (key == empty_key) { throw std::out_of_range("Id not found: " + std::to_string(id)); } return hash_map_data[getValueIndex(pos)]; } template bool ConcurrentIdHashMap::Insert(IdType id) { IdType pos = (id & mask_), delta = 1; InsertState state = AttemptInsertAt(pos, id); while (state == InsertState::OCCUPIED) { Next(&pos, &delta); state = AttemptInsertAt(pos, id); } return state == InsertState::INSERTED; } template inline void ConcurrentIdHashMap::Set(IdType key, IdType value) { IdType pos = (key & mask_), delta = 1; IdType* hash_map_data = hash_map_.data_ptr(); while (hash_map_data[getKeyIndex(pos)] != key) { Next(&pos, &delta); } hash_map_data[getValueIndex(pos)] = value; } template inline void ConcurrentIdHashMap::InsertAndSet(IdType id, IdType value) { IdType pos = (id & mask_), delta = 1; while (AttemptInsertAt(pos, id) == InsertState::OCCUPIED) { Next(&pos, &delta); } hash_map_.data_ptr()[getValueIndex(pos)] = value; } template void ConcurrentIdHashMap::InsertAndSetMin(IdType id, IdType value) { IdType pos = (id & mask_), delta = 1; InsertState state = AttemptInsertAt(pos, id); while (state == InsertState::OCCUPIED) { Next(&pos, &delta); state = AttemptInsertAt(pos, id); } IdType empty_key = static_cast(kEmptyKey); IdType val_pos = getValueIndex(pos); ::cuda::std::atomic_ref value_ref( reinterpret_cast(hash_map_.data_ptr())[val_pos]); for (auto old_val = empty_key; old_val == empty_key || old_val > value;) { // It is more efficient to use weak variant in a loop. if (value_ref.compare_exchange_weak(old_val, value)) break; } } template inline typename ConcurrentIdHashMap::InsertState ConcurrentIdHashMap::AttemptInsertAt(int64_t pos, IdType key) { auto expected = static_cast(kEmptyKey); ::cuda::std::atomic_ref key_ref( reinterpret_cast(hash_map_.data_ptr())[getKeyIndex(pos)]); if (key_ref.compare_exchange_strong(expected, key)) { return InsertState::INSERTED; } else if (expected == key) { return InsertState::EXISTED; } else { return InsertState::OCCUPIED; } } template class ConcurrentIdHashMap; template class ConcurrentIdHashMap; } // namespace sampling } // namespace graphbolt ================================================ FILE: graphbolt/src/concurrent_id_hash_map.h ================================================ /** * Copyright (c) 2023 by Contributors * @file concurrent_id_hash_map.h * @brief Class about concurrent id hash map. */ #ifndef GRAPHBOLT_CONCURRENT_ID_HASH_MAP_H_ #define GRAPHBOLT_CONCURRENT_ID_HASH_MAP_H_ #include #include #include #include namespace graphbolt { namespace sampling { /** * @brief A CPU targeted hashmap for mapping duplicate and non-consecutive ids * in the provided array to unique and consecutive ones. It utilizes * multi-threading to accelerate the insert and search speed. Currently it is * only designed to be used in `ToBlockCpu` for optimizing, so it only support * key insertions once with Init function, and it does not support key deletion. * * The hash map should be prepared in two phases before using. With the first * being creating the hashmap, and then initialize it with an id array which is * divided into 2 parts: [`seed ids`, `sampled ids`]. `Seed ids` refer to * a set ids chosen as the input for sampling process and `sampled ids` are the * ids new sampled from the process (note the the `seed ids` might also be * sampled in the process and included in the `sampled ids`). In result `seed * ids` are mapped to [0, num_seed_ids) and `sampled ids` to [num_seed_ids, * num_unique_ids). Notice that mapping order is stable for `seed ids` while not * for the `sampled ids`. * * For example, for an array `A` having 4 seed ids with following entries: * [99, 98, 100, 97, 97, 101, 101, 102, 101] * Create the hashmap `H` with: * `H = ConcurrentIdHashMap()` (1) * And Init it with: * `U = H.Init(A)` (2) (U is an id array used to store the unqiue * ids in A). * Then `U` should be (U is not exclusive as the overall mapping is not stable): * [99, 98, 100, 97, 102, 101] * And the hashmap should generate following mappings: * * [ * {key: 99, value: 0}, * {key: 98, value: 1}, * {key: 100, value: 2}, * {key: 97, value: 3}, * {key: 102, value: 4}, * {key: 101, value: 5} * ] * Search the hashmap with array `I`=[98, 99, 102]: * R = H.Map(I) (3) * R should be: * [1, 0, 4] **/ template class ConcurrentIdHashMap { private: /** * @brief The result state of an attempt to insert. */ enum class InsertState { OCCUPIED, // Indicates that the space where an insertion is being // attempted is already occupied by another element. EXISTED, // Indicates that the element being inserted already exists in the // map, and thus no insertion is performed. INSERTED // Indicates that the insertion was successful and a new element // was added to the map. }; public: /** * @brief Initialize the hashmap with an array of ids. The first `num_seeds` * ids are unique and must be mapped to a contiguous array starting * from 0. The left can be duplicated and the mapping result is not stable. * The unique'ified ids can be accessed through calling `GetUniqueIds()`; * * @param ids The array of the ids to be inserted. * @param num_seeds The number of seed ids. */ ConcurrentIdHashMap(const torch::Tensor& ids, size_t num_seeds); ConcurrentIdHashMap(const ConcurrentIdHashMap& other) = delete; ConcurrentIdHashMap& operator=(const ConcurrentIdHashMap& other) = delete; /** * @brief Get the unique ids for the keys given in the constructor. */ const torch::Tensor& GetUniqueIds() const { return unique_ids_; } /** * @brief Find mappings of given keys. * * @param ids The keys to map for. * * @return Mapping results corresponding to `ids`. */ torch::Tensor MapIds(const torch::Tensor& ids) const; private: /** * @brief Get the next position and delta for probing. * * @param[in,out] pos Calculate the next position with quadric probing. * @param[in,out] delta Calculate the next delta by adding 1. */ inline void Next(IdType* pos, IdType* delta) const; /** * @brief Find the mapping of a given key. * * @param id The key to map for. * * @return Mapping result corresponding to `id`. */ inline IdType MapId(const IdType id) const; /** * @brief Insert an id into the hash map. * * @param id The id to be inserted. * * @return Whether the `id` is inserted or not. */ inline bool Insert(IdType id); /** * @brief Set the value for the key in the hash map. * * @param key The key to set for. * @param value The value to be set for the `key`. * * @warning Key must exist. */ inline void Set(IdType key, IdType value); /** * @brief Insert a key into the hash map. * * @param id The key to be inserted. * @param value The value to be set for the `key`. * */ inline void InsertAndSet(IdType key, IdType value); /** * @brief Insert a key into the hash map. If the key exists, set the value * with the smaller value. * * @param id The key to be inserted. * @param value The value to be set for the `key`. * */ inline void InsertAndSetMin(IdType id, IdType value); /** * @brief Attempt to insert the key into the hash map at the given position. * * @param pos The position in the hash map to be inserted at. * @param key The key to be inserted. * * @return The state of the insertion. */ inline InsertState AttemptInsertAt(int64_t pos, IdType key); private: /** * @brief Hash maps which is used to store all elements. */ torch::Tensor hash_map_; /** * @brief Holds the ids that are made unique in the constructor. */ torch::Tensor unique_ids_; /** * @brief Mask which is assisted to get the position in the table * for a key by performing `&` operation with it. */ IdType mask_; }; } // namespace sampling } // namespace graphbolt #endif // GRAPHBOLT_CONCURRENT_ID_HASH_MAP_H_ ================================================ FILE: graphbolt/src/cuda/common.h ================================================ /** * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file cuda/common.h * @brief Common utilities for CUDA */ #ifndef GRAPHBOLT_CUDA_COMMON_H_ #define GRAPHBOLT_CUDA_COMMON_H_ #include #include #include #include #include #include #include #include #include namespace graphbolt { namespace cuda { /** * @brief This class is designed to allocate workspace storage * and to get a nonblocking thrust execution policy * that uses torch's CUDA memory pool and the current cuda stream: * * cuda::CUDAWorkspaceAllocator allocator; * const auto stream = torch::cuda::getDefaultCUDAStream(); * const auto exec_policy = thrust::cuda::par_nosync(allocator).on(stream); * * Now, one can pass exec_policy to thrust functions * * To get an integer array of size 1000 whose lifetime is managed by unique_ptr, * use: * * auto int_array = allocator.AllocateStorage(1000); * * int_array.get() gives the raw pointer. */ template struct CUDAWorkspaceAllocator { static_assert(sizeof(char) == 1, "sizeof(char) == 1 should hold."); // Required by thrust to satisfy allocator requirements. using value_type = value_t; explicit CUDAWorkspaceAllocator() { at::globalContext().lazyInitDevice(at::kCUDA); } template CUDAWorkspaceAllocator(CUDAWorkspaceAllocator const&) noexcept {} CUDAWorkspaceAllocator& operator=(const CUDAWorkspaceAllocator&) = default; void operator()(void* ptr) const { c10::cuda::CUDACachingAllocator::raw_delete(ptr); } // Required by thrust to satisfy allocator requirements. value_type* allocate(std::ptrdiff_t size) const { return reinterpret_cast( c10::cuda::CUDACachingAllocator::raw_alloc(size * sizeof(value_type))); } // Required by thrust to satisfy allocator requirements. void deallocate(value_type* ptr, std::size_t) const { operator()(ptr); } template std::unique_ptr AllocateStorage( std::size_t size) const { return std::unique_ptr( reinterpret_cast( c10::cuda::CUDACachingAllocator::raw_alloc(sizeof(T) * size)), *this); } }; inline auto GetAllocator() { return CUDAWorkspaceAllocator{}; } inline auto GetCurrentStream() { return c10::cuda::getCurrentCUDAStream(); } template inline bool is_zero(T size) { return size == 0; } template <> inline bool is_zero(dim3 size) { return size.x == 0 || size.y == 0 || size.z == 0; } #define CUDA_RUNTIME_CHECK(EXPR) \ do { \ cudaError_t __err = EXPR; \ if (__err != cudaSuccess) { \ auto get_error_str_err = cudaGetErrorString(__err); \ AT_ERROR("CUDA runtime error: ", get_error_str_err); \ } \ } while (0) #define CUDA_CALL(func) C10_CUDA_CHECK((func)) #define CUDA_KERNEL_CALL(kernel, nblks, nthrs, shmem, ...) \ { \ if (!graphbolt::cuda::is_zero((nblks)) && \ !graphbolt::cuda::is_zero((nthrs))) { \ auto stream = graphbolt::cuda::GetCurrentStream(); \ (kernel)<<<(nblks), (nthrs), (shmem), stream>>>(__VA_ARGS__); \ C10_CUDA_KERNEL_LAUNCH_CHECK(); \ } \ } #define CUB_CALL(fn, ...) \ { \ auto allocator = graphbolt::cuda::GetAllocator(); \ auto stream = graphbolt::cuda::GetCurrentStream(); \ size_t workspace_size = 0; \ CUDA_CALL(cub::fn(nullptr, workspace_size, __VA_ARGS__, stream)); \ auto workspace = allocator.AllocateStorage(workspace_size); \ CUDA_CALL(cub::fn(workspace.get(), workspace_size, __VA_ARGS__, stream)); \ } #define THRUST_CALL(fn, ...) \ [&] { \ auto allocator = graphbolt::cuda::GetAllocator(); \ auto stream = graphbolt::cuda::GetCurrentStream(); \ const auto exec_policy = thrust::cuda::par_nosync(allocator).on(stream); \ return thrust::fn(exec_policy, __VA_ARGS__); \ }() /** * @brief This class is designed to handle the copy operation of a single * scalar_t item from a given CUDA device pointer. Later, if the object is cast * into scalar_t, the value can be read. * * auto num_edges = cuda::CopyScalar(indptr.data_ptr() + * indptr.size(0) - 1); * // Perform many operations here, they will run as normal. * // We finally need to read num_edges. * auto indices = torch::empty(static_cast(num_edges)); */ template struct CopyScalar { CopyScalar() : is_ready_(true) { init_pinned_storage(); } void record(at::cuda::CUDAStream stream = GetCurrentStream()) { copy_event_.record(stream); is_ready_ = false; } scalar_t* get() { return reinterpret_cast(pinned_scalar_.data_ptr()); } CopyScalar(const scalar_t* device_ptr) { init_pinned_storage(); auto stream = GetCurrentStream(); CUDA_CALL(cudaMemcpyAsync( reinterpret_cast(pinned_scalar_.data_ptr()), device_ptr, sizeof(scalar_t), cudaMemcpyDeviceToHost, stream)); record(stream); } operator scalar_t() { if (!is_ready_) { copy_event_.synchronize(); is_ready_ = true; } return *get(); } private: void init_pinned_storage() { pinned_scalar_ = torch::empty( sizeof(scalar_t), c10::TensorOptions().dtype(torch::kBool).pinned_memory(true)); } torch::Tensor pinned_scalar_; at::cuda::CUDAEvent copy_event_; bool is_ready_; }; #define GRAPHBOLT_DISPATCH_ELEMENT_SIZES(element_size, name, ...) \ [&] { \ switch (element_size) { \ case 1: { \ using element_size_t = uint8_t; \ return __VA_ARGS__(); \ } \ case 2: { \ using element_size_t = uint16_t; \ return __VA_ARGS__(); \ } \ case 4: { \ using element_size_t = uint32_t; \ return __VA_ARGS__(); \ } \ case 8: { \ using element_size_t = uint64_t; \ return __VA_ARGS__(); \ } \ case 16: { \ using element_size_t = float4; \ return __VA_ARGS__(); \ } \ default: \ TORCH_CHECK(false, name, " with the element_size is not supported!"); \ using element_size_t = uint8_t; \ return __VA_ARGS__(); \ } \ }() } // namespace cuda } // namespace graphbolt #endif // GRAPHBOLT_CUDA_COMMON_H_ ================================================ FILE: graphbolt/src/cuda/cooperative_minibatching_utils.cu ================================================ /** * Copyright (c) 2024, mfbalin (Muhammed Fatih Balin) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file cuda/cooperative_minibatching_utils.cu * @brief Cooperative Minibatching (arXiv:2310.12403) utility function * implementations in CUDA. */ #include #include #include #include #include #include "../utils.h" #include "./common.h" #include "./cooperative_minibatching_utils.cuh" #include "./cooperative_minibatching_utils.h" #include "./utils.h" namespace graphbolt { namespace cuda { torch::Tensor RankAssignment( torch::Tensor nodes, const int64_t rank, const int64_t world_size) { auto part_ids = torch::empty_like(nodes, nodes.options().dtype(kPartDType)); auto part_ids_ptr = part_ids.data_ptr(); AT_DISPATCH_INDEX_TYPES( nodes.scalar_type(), "RankAssignment", ([&] { auto nodes_ptr = nodes.data_ptr(); THRUST_CALL( transform, nodes_ptr, nodes_ptr + nodes.numel(), part_ids_ptr, ::cuda::proclaim_return_type( [rank = static_cast(rank), world_size = static_cast( world_size)] __device__(index_t id) -> part_t { return rank_assignment(id, rank, world_size); })); })); return part_ids; } std::tuple RankSortImpl( torch::Tensor nodes, torch::Tensor part_ids, torch::Tensor offsets_dev, const int64_t world_size) { const int num_bits = cuda::NumberOfBits(world_size); const auto num_batches = offsets_dev.numel() - 1; auto offsets_dev_ptr = offsets_dev.data_ptr(); auto part_ids_sorted = torch::empty_like(part_ids); auto part_ids2 = part_ids.clone(); auto part_ids2_sorted = torch::empty_like(part_ids2); auto nodes_sorted = torch::empty_like(nodes); auto index = torch::arange(nodes.numel(), nodes.options()); auto index_sorted = torch::empty_like(index); return AT_DISPATCH_INDEX_TYPES( nodes.scalar_type(), "RankSortImpl", ([&] { CUB_CALL( DeviceSegmentedRadixSort::SortPairs, part_ids.data_ptr(), part_ids_sorted.data_ptr(), nodes.data_ptr(), nodes_sorted.data_ptr(), nodes.numel(), num_batches, offsets_dev_ptr, offsets_dev_ptr + 1, 0, num_bits); auto offsets = torch::empty( num_batches * world_size + 1, c10::TensorOptions() .dtype(offsets_dev.scalar_type()) .pinned_memory(true)); CUB_CALL( DeviceFor::Bulk, num_batches * world_size + 1, [=, part_ids = part_ids_sorted.data_ptr(), offsets = offsets.data_ptr()] __device__(int64_t i) { const auto batch_id = i / world_size; const auto rank = i % world_size; const auto offset_begin = offsets_dev_ptr[batch_id]; const auto offset_end = offsets_dev_ptr[::cuda::std::min(batch_id + 1, num_batches)]; offsets[i] = cub::LowerBound( part_ids + offset_begin, offset_end - offset_begin, rank) + offset_begin; }); at::cuda::CUDAEvent offsets_event; offsets_event.record(); CUB_CALL( DeviceSegmentedRadixSort::SortPairs, part_ids2.data_ptr(), part_ids2_sorted.data_ptr(), index.data_ptr(), index_sorted.data_ptr(), nodes.numel(), num_batches, offsets_dev_ptr, offsets_dev_ptr + 1, 0, num_bits); auto values = ops::IndptrEdgeIdsImpl( offsets_dev, nodes.scalar_type(), torch::nullopt, nodes.numel()); THRUST_CALL( scatter, values.data_ptr(), values.data_ptr() + values.numel(), index_sorted.data_ptr(), index.data_ptr()); return std::make_tuple( nodes_sorted, index, offsets, std::move(offsets_event)); })); } std::vector> RankSort( const std::vector& nodes_list, const int64_t rank, const int64_t world_size) { const auto num_batches = nodes_list.size(); auto nodes = torch::cat(nodes_list, 0); auto offsets = torch::empty( num_batches + 1, c10::TensorOptions().dtype(torch::kInt64).pinned_memory(true)); auto offsets_ptr = offsets.data_ptr(); offsets_ptr[0] = 0; for (int64_t i = 0; i < num_batches; i++) { offsets_ptr[i + 1] = offsets_ptr[i] + nodes_list[i].numel(); } auto part_ids = RankAssignment(nodes, rank, world_size); auto offsets_dev = torch::empty_like(offsets, nodes.options().dtype(offsets.scalar_type())); CUDA_CALL(cudaMemcpyAsync( offsets_dev.data_ptr(), offsets_ptr, sizeof(int64_t) * offsets.numel(), cudaMemcpyHostToDevice, cuda::GetCurrentStream())); auto [nodes_sorted, index_sorted, rank_offsets, rank_offsets_event] = RankSortImpl(nodes, part_ids, offsets_dev, world_size); std::vector> results; rank_offsets_event.synchronize(); for (int64_t i = 0; i < num_batches; i++) { results.emplace_back( nodes_sorted.slice(0, offsets_ptr[i], offsets_ptr[i + 1]), index_sorted.slice(0, offsets_ptr[i], offsets_ptr[i + 1]), rank_offsets.slice(0, i * world_size, (i + 1) * world_size + 1)); } return results; } c10::intrusive_ptr>>> RankSortAsync( const std::vector& nodes_list, const int64_t rank, const int64_t world_size) { return async( [=] { return RankSort(nodes_list, rank, world_size); }, utils::is_on_gpu(nodes_list.at(0))); } } // namespace cuda } // namespace graphbolt ================================================ FILE: graphbolt/src/cuda/cooperative_minibatching_utils.cuh ================================================ /** * Copyright (c) 2024, mfbalin (Muhammed Fatih Balin) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file cuda/cooperative_minibatching_utils.cuh * @brief Cooperative Minibatching (arXiv:2310.12403) utility device functions * in CUDA. */ #ifndef GRAPHBOLT_CUDA_COOPERATIVE_MINIBATCHING_UTILS_CUH_ #define GRAPHBOLT_CUDA_COOPERATIVE_MINIBATCHING_UTILS_CUH_ #include namespace graphbolt { namespace cuda { using part_t = uint8_t; constexpr auto kPartDType = torch::kUInt8; /** * @brief Given a vertex id, the rank of current GPU and the world size, returns * the rank that this id belongs in a deterministic manner. * * @param id The node id that will mapped to a rank in [0, world_size). * @param rank The rank of the current GPU. * @param world_size The world size, the total number of cooperating GPUs. * * @return The rank of the GPU the given id is mapped to. */ template __device__ inline auto rank_assignment( index_t id, uint32_t rank, uint32_t world_size) { // Consider using a faster implementation in the future. constexpr uint64_t kCurandSeed = 999961; // Any random number. curandStatePhilox4_32_10_t rng; curand_init(kCurandSeed, 0, id, &rng); return (curand(&rng) - rank) % world_size; } } // namespace cuda } // namespace graphbolt #endif // GRAPHBOLT_CUDA_COOPERATIVE_MINIBATCHING_UTILS_CUH_ ================================================ FILE: graphbolt/src/cuda/cooperative_minibatching_utils.h ================================================ /** * Copyright (c) 2024, mfbalin (Muhammed Fatih Balin) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file cuda/cooperative_minibatching_utils.h * @brief Cooperative Minibatching (arXiv:2310.12403) utility function headers * in CUDA. */ #ifndef GRAPHBOLT_CUDA_COOPERATIVE_MINIBATCHING_UTILS_H_ #define GRAPHBOLT_CUDA_COOPERATIVE_MINIBATCHING_UTILS_H_ #include #include #include namespace graphbolt { namespace cuda { /** * @brief Given node ids, the rank of current GPU and the world size, returns * the ranks that the given ids belong in a deterministic manner. * * @param nodes Node id tensor to be mapped to a rank in [0, world_size). * @param rank Rank of the current GPU. * @param world_size World size, the total number of cooperating GPUs. * * @return The rank tensor of the GPU the given id tensor is mapped to. */ torch::Tensor RankAssignment( torch::Tensor nodes, int64_t rank, int64_t world_size); /** * @brief Given node ids, the ranks they belong, the offsets to separate * different node types and world size, returns node ids sorted w.r.t. the ranks * that the given ids belong along with their new positions. * * @param nodes Node id tensor to be mapped to a rank in [0, world_size). * @param part_ids Rank tensor the nodes belong to. * @param offsets_dev Offsets to separate different node types. * @param world_size World size, the total number of cooperating GPUs. * * @return (sorted_nodes, new_positions, rank_offsets, rank_offsets_event), * where the first one includes sorted nodes, the second contains new positions * of the given nodes, so that sorted_nodes[new_positions] == nodes, and the * third contains the offsets of the sorted_nodes indicating * sorted_nodes[rank_offsets[i]: rank_offsets[i + 1]] contains nodes that * belongs to the `i`th rank. Before accessing rank_offsets on the CPU, * `rank_offsets_event.synchronize()` is required. */ std::tuple RankSortImpl( torch::Tensor nodes, torch::Tensor part_ids, torch::Tensor offsets_dev, int64_t world_size); /** * @brief Given a vector of node ids, the rank of current GPU and the world * size, returns node ids sorted w.r.t. the ranks that the given ids belong * along with the original positions. * * @param nodes_list Node id tensor to be mapped to a rank in [0, world_size). * @param rank Rank of the current GPU. * @param world_size World size, the total number of cooperating GPUs. * * @return vector of (sorted_nodes, new_positions, rank_offsets), where the * first one includes sorted nodes, the second contains new positions of the * given nodes, so that sorted_nodes[new_positions] == nodes, and the third * contains the offsets of the sorted_nodes indicating * sorted_nodes[rank_offsets[i]: rank_offsets[i + 1]] contains nodes that * belongs to the `i`th rank. */ std::vector> RankSort( const std::vector& nodes_list, int64_t rank, int64_t world_size); c10::intrusive_ptr>>> RankSortAsync( const std::vector& nodes_list, const int64_t rank, const int64_t world_size); } // namespace cuda } // namespace graphbolt #endif // GRAPHBOLT_CUDA_COOPERATIVE_MINIBATCHING_UTILS_H_ ================================================ FILE: graphbolt/src/cuda/cumsum.cu ================================================ /** * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file cuda/cumsum.cu * @brief Cumsum operators implementation on CUDA. */ #include #include "./common.h" namespace graphbolt { namespace ops { torch::Tensor ExclusiveCumSum(torch::Tensor input) { auto result = torch::empty_like(input); AT_DISPATCH_INTEGRAL_TYPES(input.scalar_type(), "ExclusiveCumSum", ([&] { CUB_CALL( DeviceScan::ExclusiveSum, input.data_ptr(), result.data_ptr(), input.size(0)); })); return result; } } // namespace ops } // namespace graphbolt ================================================ FILE: graphbolt/src/cuda/expand_indptr.cu ================================================ /** * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file cuda/expand_indptr.cu * @brief ExpandIndptr operator implementation on CUDA. */ #include #include #include #include #include #include "./common.h" namespace graphbolt { namespace ops { template struct RepeatIndex { const nodes_t* nodes; __host__ __device__ auto operator()(indices_t i) { return thrust::make_constant_iterator(nodes ? nodes[i] : i); } }; template struct IotaIndex { const nodes_t* nodes; __host__ __device__ auto operator()(indices_t i) { return thrust::make_counting_iterator(nodes ? nodes[i] : 0); } }; template struct OutputBufferIndexer { const indptr_t* indptr; indices_t* buffer; __host__ __device__ auto operator()(int64_t i) { return buffer + indptr[i]; } }; template struct AdjacentDifference { const indptr_t* indptr; __host__ __device__ auto operator()(int64_t i) { return indptr[i + 1] - indptr[i]; } }; torch::Tensor ExpandIndptrImpl( torch::Tensor indptr, torch::ScalarType dtype, torch::optional nodes, torch::optional output_size, const bool is_edge_ids_variant) { if (!output_size.has_value()) { output_size = AT_DISPATCH_INTEGRAL_TYPES( indptr.scalar_type(), "ExpandIndptrIndptr[-1]", ([&]() -> int64_t { auto indptr_ptr = indptr.data_ptr(); auto output_size = cuda::CopyScalar{indptr_ptr + indptr.size(0) - 1}; return static_cast(output_size); })); } auto csc_rows = torch::empty(output_size.value(), indptr.options().dtype(dtype)); AT_DISPATCH_INTEGRAL_TYPES( indptr.scalar_type(), "ExpandIndptrIndptr", ([&] { using indptr_t = scalar_t; auto indptr_ptr = indptr.data_ptr(); AT_DISPATCH_INTEGRAL_TYPES( dtype, "ExpandIndptrIndices", ([&] { using indices_t = scalar_t; auto csc_rows_ptr = csc_rows.data_ptr(); auto nodes_dtype = nodes ? nodes.value().scalar_type() : dtype; AT_DISPATCH_INTEGRAL_TYPES( nodes_dtype, "ExpandIndptrNodes", ([&] { using nodes_t = scalar_t; auto nodes_ptr = nodes ? nodes.value().data_ptr() : nullptr; thrust::counting_iterator iota(0); auto output_buffer = thrust::make_transform_iterator( iota, OutputBufferIndexer{ indptr_ptr, csc_rows_ptr}); auto buffer_sizes = thrust::make_transform_iterator( iota, AdjacentDifference{indptr_ptr}); const auto num_rows = indptr.size(0) - 1; constexpr int64_t max_copy_at_once = std::numeric_limits::max(); if (is_edge_ids_variant) { auto input_buffer = thrust::make_transform_iterator( iota, IotaIndex{nodes_ptr}); for (int64_t i = 0; i < num_rows; i += max_copy_at_once) { CUB_CALL( DeviceCopy::Batched, input_buffer + i, output_buffer + i, buffer_sizes + i, std::min(num_rows - i, max_copy_at_once)); } } else { auto input_buffer = thrust::make_transform_iterator( iota, RepeatIndex{nodes_ptr}); for (int64_t i = 0; i < num_rows; i += max_copy_at_once) { CUB_CALL( DeviceCopy::Batched, input_buffer + i, output_buffer + i, buffer_sizes + i, std::min(num_rows - i, max_copy_at_once)); } } })); })); })); return csc_rows; } torch::Tensor ExpandIndptrImpl( torch::Tensor indptr, torch::ScalarType dtype, torch::optional nodes, torch::optional output_size) { return ExpandIndptrImpl(indptr, dtype, nodes, output_size, false); } torch::Tensor IndptrEdgeIdsImpl( torch::Tensor indptr, torch::ScalarType dtype, torch::optional offset, torch::optional output_size) { return ExpandIndptrImpl(indptr, dtype, offset, output_size, true); } } // namespace ops } // namespace graphbolt ================================================ FILE: graphbolt/src/cuda/extension/gpu_cache.cu ================================================ /** * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file cuda/gpu_cache.cu * @brief GPUCache implementation on CUDA. */ #include #include "../common.h" #include "./gpu_cache.h" namespace graphbolt { namespace cuda { GpuCache::GpuCache(const std::vector &shape, torch::ScalarType dtype) { TORCH_CHECK(shape.size() >= 2, "Shape must at least have 2 dimensions."); const auto num_items = shape[0]; TORCH_CHECK( num_items > 0, "The capacity of GpuCache needs to be a positive."); const int64_t num_feats = std::accumulate(shape.begin() + 1, shape.end(), 1ll, std::multiplies<>()); const int element_size = torch::empty(1, torch::TensorOptions().dtype(dtype)).element_size(); num_bytes_ = num_feats * element_size; num_float_feats_ = (num_bytes_ + sizeof(float) - 1) / sizeof(float); cache_ = std::make_unique( (num_items + bucket_size - 1) / bucket_size, num_float_feats_); shape_ = shape; shape_[0] = -1; dtype_ = dtype; device_id_ = cuda::GetCurrentStream().device_index(); } std::tuple GpuCache::Query( torch::Tensor keys) { TORCH_CHECK(keys.device().is_cuda(), "Keys should be on a CUDA device."); TORCH_CHECK( keys.device().index() == device_id_, "Keys should be on the correct CUDA device."); TORCH_CHECK(keys.sizes().size() == 1, "Keys should be a 1D tensor."); keys = keys.to(torch::kLong); auto values = torch::empty( {keys.size(0), num_float_feats_}, keys.options().dtype(torch::kFloat)); auto missing_index = torch::empty(keys.size(0), keys.options().dtype(torch::kLong)); auto missing_keys = torch::empty(keys.size(0), keys.options().dtype(torch::kLong)); auto allocator = cuda::GetAllocator(); auto missing_len_device = allocator.AllocateStorage(1); cache_->Query( reinterpret_cast(keys.data_ptr()), keys.size(0), values.data_ptr(), reinterpret_cast(missing_index.data_ptr()), reinterpret_cast(missing_keys.data_ptr()), missing_len_device.get(), cuda::GetCurrentStream()); values = values.view(torch::kByte) .slice(1, 0, num_bytes_) .view(dtype_) .view(shape_); cuda::CopyScalar missing_len(missing_len_device.get()); missing_index = missing_index.slice(0, 0, static_cast(missing_len)); missing_keys = missing_keys.slice(0, 0, static_cast(missing_len)); return std::make_tuple(values, missing_index, missing_keys); } c10::intrusive_ptr>> GpuCache::QueryAsync( torch::Tensor keys) { return async( [=] { auto [values, missing_index, missing_keys] = Query(keys); return std::vector{values, missing_index, missing_keys}; }, true); } void GpuCache::Replace(torch::Tensor keys, torch::Tensor values) { TORCH_CHECK(keys.device().is_cuda(), "Keys should be on a CUDA device."); TORCH_CHECK( keys.device().index() == device_id_, "Keys should be on the correct CUDA device."); TORCH_CHECK(values.device().is_cuda(), "Keys should be on a CUDA device."); TORCH_CHECK( values.device().index() == device_id_, "Values should be on the correct CUDA device."); TORCH_CHECK( keys.size(0) == values.size(0), "The first dimensions of keys and values must match."); TORCH_CHECK( std::equal(shape_.begin() + 1, shape_.end(), values.sizes().begin() + 1), "Values should have the correct dimensions."); TORCH_CHECK( values.scalar_type() == dtype_, "Values should have the correct dtype."); if (keys.numel() == 0) return; keys = keys.to(torch::kLong); torch::Tensor float_values; if (num_bytes_ % sizeof(float) != 0) { float_values = torch::empty( {values.size(0), num_float_feats_}, values.options().dtype(torch::kFloat)); float_values.view(torch::kByte) .slice(1, 0, num_bytes_) .copy_(values.view(torch::kByte).view({values.size(0), -1})); } else { float_values = values.view(torch::kByte) .view({values.size(0), -1}) .view(torch::kFloat) .contiguous(); } cache_->Replace( reinterpret_cast(keys.data_ptr()), keys.size(0), float_values.data_ptr(), cuda::GetCurrentStream()); } c10::intrusive_ptr GpuCache::Create( const std::vector &shape, torch::ScalarType dtype) { return c10::make_intrusive(shape, dtype); } } // namespace cuda } // namespace graphbolt ================================================ FILE: graphbolt/src/cuda/extension/gpu_cache.h ================================================ /** * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file cuda/gpu_cache.h * @brief Header file of HugeCTR gpu_cache wrapper. */ #ifndef GRAPHBOLT_GPU_CACHE_H_ #define GRAPHBOLT_GPU_CACHE_H_ #include #include #include #include #include namespace graphbolt { namespace cuda { class GpuCache : public torch::CustomClassHolder { using key_t = long long; constexpr static int set_associativity = 2; constexpr static int WARP_SIZE = 32; constexpr static int bucket_size = WARP_SIZE * set_associativity; using gpu_cache_t = ::gpu_cache::gpu_cache< key_t, uint64_t, std::numeric_limits::max(), set_associativity, WARP_SIZE>; public: /** * @brief Constructor for the GpuCache struct. * * @param shape The shape of the GPU cache. * @param dtype The datatype of items to be stored. */ GpuCache(const std::vector& shape, torch::ScalarType dtype); GpuCache() = default; std::tuple Query( torch::Tensor keys); c10::intrusive_ptr>> QueryAsync( torch::Tensor keys); void Replace(torch::Tensor keys, torch::Tensor values); static c10::intrusive_ptr Create( const std::vector& shape, torch::ScalarType dtype); private: std::vector shape_; torch::ScalarType dtype_; std::unique_ptr cache_; int64_t num_bytes_; int64_t num_float_feats_; torch::DeviceIndex device_id_; }; // The cu file in HugeCTR gpu cache uses unsigned int and long long. // Changing to int64_t results in a mismatch of template arguments. static_assert( sizeof(long long) == sizeof(int64_t), "long long and int64_t needs to have the same size."); // NOLINT } // namespace cuda } // namespace graphbolt #endif // GRAPHBOLT_GPU_CACHE_H_ ================================================ FILE: graphbolt/src/cuda/extension/gpu_graph_cache.cu ================================================ /** * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file cuda/gpu_graph_cache.cu * @brief GPU graph cache implementation on CUDA. */ #include #include #include #include #include #include #include #include #include #include #include #include "../common.h" #include "../utils.h" #include "./gpu_graph_cache.h" namespace graphbolt { namespace cuda { namespace { constexpr int cg_size = 1; template using probing_t = cuco::linear_probing>; template using allocator_t = cuda::CUDAWorkspaceAllocator>; template using map_t = cuco::static_map< index_t, index_t, cuco::extent, ::cuda::thread_scope_device, thrust::equal_to, probing_t, allocator_t>; template __global__ void _Insert( const int64_t num_nodes, const index_t num_existing, const index_t* seeds, const index_t* missing_indices, const index_t* indices, map_t map) { int64_t i = blockIdx.x * blockDim.x + threadIdx.x; const int stride = gridDim.x * blockDim.x; while (i < num_nodes) { const auto key = seeds[missing_indices[indices[i]]]; auto slot = map.find(key); slot->second = num_existing + i; i += stride; } } /** * @brief For node ids not in the cache, it keeps their access count inside * a hash table as (v, -c) where v is the node id and c is the access count. * When c == -threshold, it means that v will be inserted into the cache * during the call to the replace method. Once v is inserted into the cache, * c is assigned to a nonnegative value and indicates the local id of vertex * v in the cache. * * @param num_nodes The number of node ids. * @param seeds The node ids the cache is being queried with. * @param positions Holds the values found in the hash table. * @param map The hash table holding (v, -c) or (v, local_id). * */ template __global__ void _QueryAndIncrement( const int64_t num_nodes, const index_t* seeds, index_t* positions, map_t map) { int64_t i = blockIdx.x * blockDim.x + threadIdx.x; const int stride = gridDim.x * blockDim.x; while (i < num_nodes) { const auto key = seeds[i]; constexpr index_t minusONE = -1; auto [slot, is_new_key] = map.insert_and_find(cuco::pair{key, minusONE}); int64_t position = -1; if (!is_new_key) { auto ref = ::cuda::atomic_ref{ slot->second}; position = ref.load(::cuda::memory_order_relaxed); if (position < 0) { position = ref.fetch_add(-1, ::cuda::memory_order_relaxed) - 1; } } positions[i] = position; i += stride; } } constexpr int kIntBlockSize = 512; } // namespace c10::intrusive_ptr GpuGraphCache::Create( const int64_t num_edges, const int64_t threshold, torch::ScalarType indptr_dtype, std::vector dtypes, bool has_original_edge_ids) { return c10::make_intrusive( num_edges, threshold, indptr_dtype, dtypes, has_original_edge_ids); } GpuGraphCache::GpuGraphCache( const int64_t num_edges, const int64_t threshold, torch::ScalarType indptr_dtype, std::vector dtypes, bool has_original_edge_ids) { const int64_t initial_node_capacity = 1024; AT_DISPATCH_INDEX_TYPES( dtypes.at(0), "GpuGraphCache::GpuGraphCache", ([&] { auto map_temp = map_t{ initial_node_capacity, kDoubleLoadFactor, cuco::empty_key{static_cast(-1)}, cuco::empty_value{std::numeric_limits::lowest()}, {}, probing_t{}, {}, {}, allocator_t{}, ::cuda::stream_ref{cuda::GetCurrentStream()}}; map_ = new map_t{std::move(map_temp)}; })); C10_CUDA_KERNEL_LAUNCH_CHECK(); // Check the map constructor's success. const auto options = torch::TensorOptions().device(c10::DeviceType::CUDA); TORCH_CHECK(threshold > 0, "Threshold should be a position integer."); threshold_ = threshold; device_id_ = cuda::GetCurrentStream().device_index(); map_size_ = 0; num_nodes_ = 0; num_edges_ = 0; indptr_ = torch::zeros(initial_node_capacity + 1, options.dtype(indptr_dtype)); if (!has_original_edge_ids) { offset_ = torch::empty(indptr_.size(0) - 1, indptr_.options()); } for (auto dtype : dtypes) { cached_edge_tensors_.push_back( torch::empty(num_edges, options.dtype(dtype))); } } GpuGraphCache::~GpuGraphCache() { AT_DISPATCH_INDEX_TYPES( cached_edge_tensors_.at(0).scalar_type(), "GpuGraphCache::GpuGraphCache", ([&] { delete reinterpret_cast*>(map_); })); } std::tuple GpuGraphCache::Query( torch::Tensor seeds) { TORCH_CHECK(seeds.device().is_cuda(), "Seeds should be on a CUDA device."); TORCH_CHECK( seeds.device().index() == device_id_, "Seeds should be on the correct CUDA device."); TORCH_CHECK(seeds.sizes().size() == 1, "Keys should be a 1D tensor."); std::lock_guard lock(mtx_); auto allocator = cuda::GetAllocator(); auto index_dtype = cached_edge_tensors_.at(0).scalar_type(); const dim3 block(kIntBlockSize); const dim3 grid((seeds.size(0) + kIntBlockSize - 1) / kIntBlockSize); return AT_DISPATCH_INDEX_TYPES( index_dtype, "GpuGraphCache::Query", ([&] { auto map = reinterpret_cast*>(map_); while (( map_size_ + seeds.size(0) >= map->capacity() * kDoubleLoadFactor)) { map->rehash_async( map->capacity() * kIntGrowthFactor, ::cuda::stream_ref{cuda::GetCurrentStream()}); } auto positions = torch::empty_like(seeds); CUDA_KERNEL_CALL( _QueryAndIncrement, grid, block, 0, static_cast(seeds.size(0)), seeds.data_ptr(), positions.data_ptr(), map->ref(cuco::insert_and_find)); auto num_threshold_new_hit = allocator.AllocateStorage>( 1); // Since threshold_ is a class member, we want the lambda functions // below to only capture this particular variable by reassigning it to a // local variable. const auto threshold = -threshold_; auto is_threshold_new_hit = thrust::make_transform_iterator( positions.data_ptr(), [=] __host__ __device__(index_t x) { int64_t is_threshold = x == threshold; int64_t is_new = x == -1; int64_t is_hit = x >= 0; return thrust::make_tuple(is_threshold, is_new, is_hit); }); CUB_CALL( DeviceReduce::Reduce, is_threshold_new_hit, num_threshold_new_hit.get(), positions.size(0), [] __host__ __device__( const thrust::tuple& a, const thrust::tuple& b) { return thrust::make_tuple( thrust::get<0>(a) + thrust::get<0>(b), thrust::get<1>(a) + thrust::get<1>(b), thrust::get<2>(a) + thrust::get<2>(b)); }, thrust::tuple{}); CopyScalar num_threshold_new_hit_cpu{num_threshold_new_hit.get()}; thrust::counting_iterator iota{0}; auto position_and_index = thrust::make_zip_iterator(positions.data_ptr(), iota); auto output_positions = torch::empty_like(seeds); auto output_indices = torch::empty_like(seeds); auto output_position_and_index = thrust::make_zip_iterator( output_positions.data_ptr(), output_indices.data_ptr()); CUB_CALL( DevicePartition::If, position_and_index, output_position_and_index, cub::DiscardOutputIterator{}, seeds.size(0), [] __device__(thrust::tuple & x) { return thrust::get<0>(x) >= 0; }); const auto [num_threshold, num_new, num_hit] = static_cast>( num_threshold_new_hit_cpu); map_size_ += num_new; return std::make_tuple( output_indices, output_positions, num_hit, num_threshold); })); } c10::intrusive_ptr< Future>> GpuGraphCache::QueryAsync(torch::Tensor seeds) { return async([=] { return Query(seeds); }, true); } std::tuple> GpuGraphCache::Replace( torch::Tensor seeds, torch::Tensor indices, torch::Tensor positions, int64_t num_hit, int64_t num_threshold, torch::Tensor indptr, std::vector edge_tensors) { const auto with_edge_ids = offset_.has_value(); // The last element of edge_tensors has the edge ids. const auto num_tensors = edge_tensors.size() - with_edge_ids; TORCH_CHECK( num_tensors == cached_edge_tensors_.size(), "Same number of tensors need to be passed!"); const auto num_nodes = seeds.size(0); TORCH_CHECK( indptr.size(0) == num_nodes - num_hit + 1, "(indptr.size(0) == seeds.size(0) - num_hit + 1) failed."); std::lock_guard lock(mtx_); const int64_t num_buffers = num_nodes * num_tensors; auto allocator = cuda::GetAllocator(); auto index_dtype = cached_edge_tensors_.at(0).scalar_type(); return AT_DISPATCH_INDEX_TYPES( index_dtype, "GpuGraphCache::Replace", ([&] { using indices_t = index_t; return AT_DISPATCH_INDEX_TYPES( indptr_.scalar_type(), "GpuGraphCache::Replace::copy_prep", ([&] { using indptr_t = index_t; static_assert( sizeof(int64_t) == sizeof(void*), "Pointers have to be 64-bit."); static_assert( sizeof(std::byte) == 1, "Byte needs to have a size of 1."); auto cache_missing_dtype = torch::empty( // Below, we use this storage to store a tuple of 4 elements, // since each element is 64-bit, we need 4x int64 storage. 4 * num_tensors, c10::TensorOptions() .dtype(torch::kInt64) .pinned_memory(true)); auto cache_missing_dtype_ptr = reinterpret_cast<::cuda::std::tuple< std::byte*, std::byte*, int64_t, int64_t>*>( cache_missing_dtype.data_ptr()); int64_t total_size = 0; for (size_t i = 0; i < num_tensors; i++) { TORCH_CHECK( cached_edge_tensors_[i].scalar_type() == edge_tensors[i].scalar_type(), "The dtypes of edge tensors must match."); if (i > 0) { TORCH_CHECK( edge_tensors[i - 1].size(0) == edge_tensors[i].size(0), "The missing edge tensors should have identical size."); } const int64_t element_size = edge_tensors[i].element_size(); cache_missing_dtype_ptr[i] = { reinterpret_cast( cached_edge_tensors_[i].data_ptr()), reinterpret_cast(edge_tensors[i].data_ptr()), element_size, total_size}; total_size += element_size; } auto cache_missing_dtype_dev = allocator.AllocateStorage< ::cuda::std::tuple>( num_tensors); THRUST_CALL( copy_n, cache_missing_dtype_ptr, num_tensors, cache_missing_dtype_dev.get()); auto input = allocator.AllocateStorage(num_buffers); auto input_size = allocator.AllocateStorage(num_buffers + 1); torch::optional edge_id_offsets; if (with_edge_ids) { edge_id_offsets = torch::empty( num_nodes, seeds.options().dtype(offset_.value().scalar_type())); } const auto cache_missing_dtype_dev_ptr = cache_missing_dtype_dev.get(); const auto indices_ptr = indices.data_ptr(); const auto positions_ptr = positions.data_ptr(); const auto input_ptr = input.get(); const auto input_size_ptr = input_size.get(); const auto edge_id_offsets_ptr = edge_id_offsets ? edge_id_offsets->data_ptr() : nullptr; const auto cache_indptr = indptr_.data_ptr(); const auto missing_indptr = indptr.data_ptr(); const auto cache_offset = offset_ ? offset_->data_ptr() : nullptr; const auto missing_edge_ids = edge_id_offsets ? edge_tensors.back().data_ptr() : nullptr; CUB_CALL(DeviceFor::Bulk, num_buffers, [=] __device__(int64_t i) { const auto tensor_idx = i / num_nodes; const auto idx = i % num_nodes; const auto pos = positions_ptr[idx]; const auto original_idx = indices_ptr[idx]; const auto [cache_ptr, missing_ptr, size, cum_size] = cache_missing_dtype_dev_ptr[tensor_idx]; const auto is_cached = pos >= 0; const auto offset = is_cached ? cache_indptr[pos] : missing_indptr[idx - num_hit]; const auto offset_end = is_cached ? cache_indptr[pos + 1] : missing_indptr[idx - num_hit + 1]; const auto out_idx = tensor_idx * num_nodes + original_idx; input_ptr[out_idx] = (is_cached ? cache_ptr : missing_ptr) + offset * size; input_size_ptr[out_idx] = size * (offset_end - offset); if (edge_id_offsets_ptr && i < num_nodes) { const auto edge_id = is_cached ? cache_offset[pos] : missing_edge_ids[offset]; edge_id_offsets_ptr[out_idx] = edge_id; } }); auto output_indptr = torch::empty( num_nodes + 1, seeds.options().dtype(indptr_.scalar_type())); auto output_indptr_ptr = output_indptr.data_ptr(); const auto element_size = ::cuda::std::get<2>(cache_missing_dtype_ptr[0]); auto input_indegree = thrust::make_transform_iterator( input_size_ptr, [=] __host__ __device__(size_t x) { return x / element_size; }); CUB_CALL( DeviceScan::ExclusiveSum, input_indegree, output_indptr_ptr, num_nodes + 1); CopyScalar output_size{output_indptr_ptr + num_nodes}; if (num_threshold > 0) { // Insert the vertices whose access count equal threshold. auto missing_positions = positions.slice(0, num_hit); auto missing_indices = indices.slice(0, num_hit); thrust::counting_iterator iota{0}; auto threshold = -threshold_; auto is_threshold = thrust::make_transform_iterator( missing_positions.data_ptr(), [=] __host__ __device__(indices_t x) { return x == threshold; }); auto output_indices = torch::empty(num_threshold, seeds.options()); CUB_CALL( DeviceSelect::Flagged, iota, is_threshold, output_indices.data_ptr(), cub::DiscardOutputIterator{}, missing_positions.size(0)); auto [in_degree, sliced_indptr] = ops::SliceCSCIndptr(indptr, output_indices); while (num_nodes_ + num_threshold >= indptr_.size(0)) { auto new_indptr = torch::empty( indptr_.size(0) * kIntGrowthFactor, indptr_.options()); new_indptr.slice(0, 0, indptr_.size(0)) = indptr_; indptr_ = new_indptr; if (offset_) { auto new_offset = torch::empty(indptr_.size(0) - 1, offset_->options()); new_offset.slice(0, 0, offset_->size(0)) = *offset_; offset_ = new_offset; } } torch::Tensor sindptr; bool enough_space; torch::optional cached_output_size; for (size_t i = 0; i < num_tensors; i++) { torch::Tensor sindices; std::tie(sindptr, sindices) = ops::IndexSelectCSCImpl( in_degree, sliced_indptr, edge_tensors[i], output_indices, indptr.size(0) - 2, cached_output_size); cached_output_size = sindices.size(0); enough_space = num_edges_ + *cached_output_size <= cached_edge_tensors_[i].size(0); if (enough_space) { cached_edge_tensors_[i].slice( 0, num_edges_, num_edges_ + *cached_output_size) = sindices; } else break; } if (enough_space) { auto num_edges = num_edges_; if (offset_) { auto transform_input_it = thrust::make_zip_iterator( sindptr.data_ptr() + 1, sliced_indptr.data_ptr()); auto transform_output_it = thrust::make_zip_iterator( indptr_.data_ptr() + num_nodes_ + 1, offset_->data_ptr() + num_nodes_); THRUST_CALL( transform, transform_input_it, transform_input_it + sindptr.size(0) - 1, transform_output_it, [=] __host__ __device__( const thrust::tuple& x) { return thrust::make_tuple( thrust::get<0>(x) + num_edges, missing_edge_ids[thrust::get<1>(x)]); }); } else { THRUST_CALL( transform, sindptr.data_ptr() + 1, sindptr.data_ptr() + sindptr.size(0), indptr_.data_ptr() + num_nodes_ + 1, [=] __host__ __device__(const indptr_t& x) { return x + num_edges; }); } auto map = reinterpret_cast*>(map_); const dim3 block(kIntBlockSize); const dim3 grid( (num_threshold + kIntBlockSize - 1) / kIntBlockSize); CUDA_KERNEL_CALL( _Insert, grid, block, 0, output_indices.size(0), static_cast(num_nodes_), seeds.data_ptr(), missing_indices.data_ptr(), output_indices.data_ptr(), map->ref(cuco::find)); num_edges_ += *cached_output_size; num_nodes_ += num_threshold; } } constexpr int alignment = 128; const auto output_allocation_count = (static_cast(output_size) + alignment - 1) / alignment * alignment; auto output_allocation = torch::empty( output_allocation_count * total_size, seeds.options().dtype(torch::kInt8)); const auto output_allocation_ptr = output_allocation.data_ptr(); std::vector output_edge_tensors; for (size_t i = 0; i < num_tensors; i++) { const auto cum_size = ::cuda::std::get<3>(cache_missing_dtype_ptr[i]); output_edge_tensors.push_back( output_allocation .slice(0, cum_size * output_allocation_count) .view(edge_tensors[i].scalar_type()) .slice(0, 0, static_cast(output_size))); } if (edge_id_offsets) { // Append the edge ids as the last element of the output. output_edge_tensors.push_back(ops::IndptrEdgeIdsImpl( output_indptr, output_indptr.scalar_type(), *edge_id_offsets, static_cast(static_cast(output_size)))); } { thrust::counting_iterator iota{0}; auto output_buffer_it = thrust::make_transform_iterator( iota, [=] __host__ __device__(int64_t i) { const auto tensor_idx = i / num_nodes; const auto idx = i % num_nodes; const auto offset = output_indptr_ptr[idx]; const auto [_0, _1, size, cum_size] = cache_missing_dtype_dev_ptr[tensor_idx]; return output_allocation_ptr + cum_size * output_allocation_count + offset * size; }); constexpr int64_t max_copy_at_once = std::numeric_limits::max(); for (int64_t i = 0; i < num_buffers; i += max_copy_at_once) { CUB_CALL( DeviceMemcpy::Batched, input.get() + i, output_buffer_it + i, input_size_ptr + i, std::min(num_buffers - i, max_copy_at_once)); } } return std::make_tuple(output_indptr, output_edge_tensors); })); })); } c10::intrusive_ptr< Future>>> GpuGraphCache::ReplaceAsync( torch::Tensor seeds, torch::Tensor indices, torch::Tensor positions, int64_t num_hit, int64_t num_threshold, torch::Tensor indptr, std::vector edge_tensors) { return async( [=] { return Replace( seeds, indices, positions, num_hit, num_threshold, indptr, edge_tensors); }, true); } } // namespace cuda } // namespace graphbolt ================================================ FILE: graphbolt/src/cuda/extension/gpu_graph_cache.h ================================================ /** * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file cuda/gpu_graph_cache.h * @brief Header file of GPU graph cache. */ #ifndef GRAPHBOLT_GPU_GRAPH_CACHE_H_ #define GRAPHBOLT_GPU_GRAPH_CACHE_H_ #include #include #include #include namespace graphbolt { namespace cuda { class GpuGraphCache : public torch::CustomClassHolder { // The load factor of the constructed hash table. static constexpr double kDoubleLoadFactor = 0.8; // The growth factor of the hash table and the dynamically sized indptr // tensor. static constexpr int kIntGrowthFactor = 2; public: /** * @brief Constructor for the GpuGraphCache struct. * * @param num_edges The edge capacity of GPU cache. * @param threshold The access threshold before a vertex neighborhood is * cached. * @param indptr_dtype The node id datatype. * @param dtypes The dtypes of the edge tensors to be cached. dtypes[0] is * reserved for the indices edge tensor holding node ids. * @param has_original_edge_ids Whether the graph to be cached has original * edge ids. */ GpuGraphCache( const int64_t num_edges, const int64_t threshold, torch::ScalarType indptr_dtype, std::vector dtypes, bool has_original_edge_ids); GpuGraphCache() = default; ~GpuGraphCache(); /** * @brief Queries the cache. Returns tensors indicating which elements are * missing. * * @param seeds The node ids to query the cache with. * * @return * (torch::Tensor, torch::Tensor, int64_t, int64_t) index, position, * number of cache hits and number of ids that will enter the cache. */ std::tuple Query( torch::Tensor seeds); c10::intrusive_ptr< Future>> QueryAsync(torch::Tensor seeds); /** * @brief After the graph structure for the missing node ids are fetched, it * inserts the node ids which passes the threshold and returns the final * output graph structure, combining the information in the cache with the * graph structure for the missing node ids. * * @param seeds The node ids that the cache was queried with. * @param indices seeds[indices[:num_hit]] gives us the node ids that were * found in the cache * @param positions positions[:num_hit] gives where the node ids can be found * in the cache. * @param num_hit The number of seeds that are already in the cache. * @param num_threshold The number of seeds among the missing node ids that * will be inserted into the cache. * @param indptr The indptr for the missing seeds fetched from remote. * @param edge_tensors The edge tensors for the missing seeds. The last * element of edge_tensors is treated as the edge ids tensor with * indptr_dtype. * * @return (torch::Tensor, std::vector) The final indptr and * edge_tensors, directly corresponding to the seeds tensor. */ std::tuple> Replace( torch::Tensor seeds, torch::Tensor indices, torch::Tensor positions, int64_t num_hit, int64_t num_threshold, torch::Tensor indptr, std::vector edge_tensors); c10::intrusive_ptr< Future>>> ReplaceAsync( torch::Tensor seeds, torch::Tensor indices, torch::Tensor positions, int64_t num_hit, int64_t num_threshold, torch::Tensor indptr, std::vector edge_tensors); static c10::intrusive_ptr Create( const int64_t num_edges, const int64_t threshold, torch::ScalarType indptr_dtype, std::vector dtypes, bool has_original_edge_ids); private: void* map_; // pointer to the hash table. int64_t threshold_; // A positive threshold value. torch::DeviceIndex device_id_; // Which GPU the cache resides in. int64_t map_size_; // The number of nodes inside the hash table. int64_t num_nodes_; // The number of cached nodes in the cache. int64_t num_edges_; // The number of cached edges in the cache. torch::Tensor indptr_; // The cached graph structure indptr tensor. torch::optional offset_; // The original graph's sliced_indptr tensor. std::vector cached_edge_tensors_; // The cached graph // structure edge tensors. std::mutex mtx_; // Protects the data structure and makes it threadsafe. }; } // namespace cuda } // namespace graphbolt #endif // GRAPHBOLT_GPU_CACHE_H_ ================================================ FILE: graphbolt/src/cuda/extension/unique_and_compact.h ================================================ /** * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file cuda/unique_and_compact.h * @brief Unique and compact operator utilities on CUDA using hash table. */ #ifndef GRAPHBOLT_CUDA_UNIQUE_AND_COMPACT_H_ #define GRAPHBOLT_CUDA_UNIQUE_AND_COMPACT_H_ #include #include namespace graphbolt { namespace ops { std::vector< std::tuple> UniqueAndCompactBatchedHashMapBased( const std::vector& src_ids, const std::vector& dst_ids, const std::vector& unique_dst_ids, const int64_t rank, const int64_t world_size); } // namespace ops } // namespace graphbolt #endif // GRAPHBOLT_CUDA_UNIQUE_AND_COMPACT_H_ ================================================ FILE: graphbolt/src/cuda/extension/unique_and_compact_map.cu ================================================ /** * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file cuda/unique_and_compact_map.cu * @brief Unique and compact operator implementation on CUDA using hash table. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "../common.h" #include "../cooperative_minibatching_utils.cuh" #include "../cooperative_minibatching_utils.h" #include "../utils.h" #include "./unique_and_compact.h" namespace graphbolt { namespace ops { // Support graphs with up to 2^kNodeIdBits nodes. constexpr int kNodeIdBits = 40; template __global__ void _InsertAndSetMinBatched( const int64_t num_edges, const int32_t* const indexes, index_t** pointers, const int64_t* const offsets, map_t map) { int64_t i = blockIdx.x * blockDim.x + threadIdx.x; const int stride = gridDim.x * blockDim.x; while (i < num_edges) { const auto tensor_index = indexes[i]; const auto tensor_offset = i - offsets[tensor_index]; const int64_t node_id = pointers[tensor_index][tensor_offset]; const int64_t batch_index = tensor_index / 2; const int64_t key = node_id | (batch_index << kNodeIdBits); auto [slot, is_new_key] = map.insert_and_find(cuco::pair{key, i}); if (!is_new_key) { auto ref = ::cuda::atomic_ref{ slot->second}; ref.fetch_min(i, ::cuda::memory_order_relaxed); } i += stride; } } template __global__ void _MapIdsBatched( const int num_batches, const int64_t num_edges, const int32_t* const indexes, index_t** pointers, const int64_t* const offsets, const int64_t* const unique_ids_offsets, const index_t* const index, map_t map, index_t* mapped_ids) { int64_t i = blockIdx.x * blockDim.x + threadIdx.x; const int stride = gridDim.x * blockDim.x; while (i < num_edges) { const auto tensor_index = indexes[i]; int64_t batch_index; if (tensor_index >= 2 * num_batches) { batch_index = tensor_index - 2 * num_batches; } else if (tensor_index & 1) { batch_index = tensor_index / 2; } else { batch_index = -1; } // Only map src or dst ids. if (batch_index >= 0) { const auto tensor_offset = i - offsets[tensor_index]; const int64_t node_id = pointers[tensor_index][tensor_offset]; const int64_t key = node_id | (batch_index << kNodeIdBits); auto slot = map.find(key); auto new_id = slot->second; if (index) { new_id = index[new_id]; } else { new_id -= unique_ids_offsets[batch_index]; } mapped_ids[i] = new_id; } i += stride; } } std::vector< std::tuple> UniqueAndCompactBatchedHashMapBased( const std::vector& src_ids, const std::vector& dst_ids, const std::vector& unique_dst_ids, const int64_t rank, const int64_t world_size) { TORCH_CHECK( rank < world_size, "rank needs to be smaller than the world_size."); TORCH_CHECK(world_size <= std::numeric_limits::max()); auto allocator = cuda::GetAllocator(); auto stream = cuda::GetCurrentStream(); auto scalar_type = src_ids.at(0).scalar_type(); constexpr int BLOCK_SIZE = 512; const auto num_batches = src_ids.size(); static_assert( sizeof(std::ptrdiff_t) == sizeof(int64_t), "Need to be compiled on a 64-bit system."); constexpr int batch_id_bits = sizeof(int64_t) * 8 - 1 - kNodeIdBits; TORCH_CHECK( num_batches <= (1 << batch_id_bits), "UniqueAndCompactBatched supports a batch size of up to ", 1 << batch_id_bits); return AT_DISPATCH_INDEX_TYPES( scalar_type, "unique_and_compact", ([&] { // For 2 batches of inputs, stores the input tensor pointers in the // unique_dst, src, unique_dst, src, dst, dst order. Since there are // 3 * num_batches input tensors, we need the first 3 * num_batches to // store the input tensor pointers. Then, we store offsets in the rest // of the 3 * num_batches + 1 space as if they were stored contiguously. auto pointers_and_offsets = torch::empty( 6 * num_batches + 1, c10::TensorOptions().dtype(torch::kInt64).pinned_memory(true)); // Points to the input tensor pointers. auto pointers_ptr = reinterpret_cast(pointers_and_offsets.data_ptr()); // Points to the input tensor storage logical offsets. auto offsets_ptr = pointers_and_offsets.data_ptr() + 3 * num_batches; for (std::size_t i = 0; i < num_batches; i++) { pointers_ptr[2 * i] = unique_dst_ids.at(i).data_ptr(); offsets_ptr[2 * i] = unique_dst_ids[i].size(0); pointers_ptr[2 * i + 1] = src_ids.at(i).data_ptr(); offsets_ptr[2 * i + 1] = src_ids[i].size(0); pointers_ptr[2 * num_batches + i] = dst_ids.at(i).data_ptr(); offsets_ptr[2 * num_batches + i] = dst_ids[i].size(0); } // Finish computing the offsets by taking a cumulative sum. std::exclusive_scan( offsets_ptr, offsets_ptr + 3 * num_batches + 1, offsets_ptr, 0ll); // Device version of the tensors defined above. We store the information // initially on the CPU, which are later copied to the device. auto pointers_and_offsets_dev = torch::empty( pointers_and_offsets.size(0), src_ids[0].options().dtype(pointers_and_offsets.scalar_type())); auto offsets_dev = pointers_and_offsets_dev.slice(0, 3 * num_batches); auto pointers_dev_ptr = reinterpret_cast(pointers_and_offsets_dev.data_ptr()); auto offsets_dev_ptr = offsets_dev.data_ptr(); CUDA_CALL(cudaMemcpyAsync( pointers_dev_ptr, pointers_ptr, sizeof(int64_t) * pointers_and_offsets.size(0), cudaMemcpyHostToDevice, stream)); auto indexes = ExpandIndptrImpl( offsets_dev, torch::kInt32, torch::nullopt, offsets_ptr[3 * num_batches]); cuco::static_map map{ offsets_ptr[2 * num_batches], 0.5, // load_factor cuco::empty_key{static_cast(-1)}, cuco::empty_value{static_cast(-1)}, {}, cuco::linear_probing<1, cuco::default_hash_function>{}, {}, {}, cuda::CUDAWorkspaceAllocator>{}, ::cuda::stream_ref{stream}, }; C10_CUDA_KERNEL_LAUNCH_CHECK(); // Check the map constructor's success. const dim3 block(BLOCK_SIZE); const dim3 grid( (offsets_ptr[2 * num_batches] + BLOCK_SIZE - 1) / BLOCK_SIZE); CUDA_KERNEL_CALL( _InsertAndSetMinBatched, grid, block, 0, offsets_ptr[2 * num_batches], indexes.data_ptr(), pointers_dev_ptr, offsets_dev_ptr, map.ref(cuco::insert_and_find)); cub::ArgIndexInputIterator index_it(indexes.data_ptr()); auto input_it = thrust::make_transform_iterator( index_it, ::cuda::proclaim_return_type< ::cuda::std::tuple>( [=, map = map.ref(cuco::find)] __device__(auto it) -> ::cuda::std::tuple { const auto i = it.key; const auto tensor_index = it.value; const auto tensor_offset = i - offsets_dev_ptr[tensor_index]; const int64_t node_id = pointers_dev_ptr[tensor_index][tensor_offset]; const auto batch_index = tensor_index / 2; const int64_t key = node_id | (static_cast(batch_index) << kNodeIdBits); const auto batch_offset = offsets_dev_ptr[batch_index * 2]; auto slot = map.find(key); const auto valid = slot->second == i; return {&slot->second, node_id, batch_index, valid}; })); torch::optional part_ids; if (world_size > 1) { part_ids = torch::empty( offsets_ptr[2 * num_batches], src_ids[0].options().dtype(cuda::kPartDType)); } auto unique_ids = torch::empty(offsets_ptr[2 * num_batches], src_ids[0].options()); auto unique_ids_offsets_dev = torch::full( num_batches + 1, std::numeric_limits::max(), src_ids[0].options().dtype(torch::kInt64)); auto unique_ids_offsets_dev_ptr = unique_ids_offsets_dev.data_ptr(); auto output_it = thrust::make_tabulate_output_iterator( ::cuda::proclaim_return_type( [=, unique_ids_ptr = unique_ids.data_ptr(), part_ids_ptr = part_ids ? part_ids->data_ptr() : nullptr, rank = static_cast(rank), world_size = static_cast( world_size)] __device__(const int64_t i, const auto& t) { *::cuda::std::get<0>(t) = i; const auto node_id = ::cuda::std::get<1>(t); unique_ids_ptr[i] = node_id; if (part_ids_ptr) { part_ids_ptr[i] = cuda::rank_assignment(node_id, rank, world_size); } const auto batch_index = ::cuda::std::get<2>(t); auto ref = ::cuda::atomic_ref{ unique_ids_offsets_dev_ptr[batch_index]}; ref.fetch_min(i, ::cuda::memory_order_relaxed); })); CUB_CALL( DeviceSelect::If, input_it, output_it, unique_ids_offsets_dev_ptr + num_batches, offsets_ptr[2 * num_batches], ::cuda::proclaim_return_type([] __device__(const auto& t) { return ::cuda::std::get<3>(t); })); auto unique_ids_offsets = torch::empty( num_batches + 1, c10::TensorOptions().dtype(torch::kInt64).pinned_memory(true)); { auto unique_ids_offsets_dev2 = torch::empty_like(unique_ids_offsets_dev); CUB_CALL( DeviceScan::InclusiveScan, thrust::make_reverse_iterator( num_batches + 1 + unique_ids_offsets_dev_ptr), thrust::make_reverse_iterator( num_batches + 1 + thrust::make_transform_output_iterator( thrust::make_zip_iterator( unique_ids_offsets_dev2.data_ptr(), unique_ids_offsets.data_ptr()), ::cuda::proclaim_return_type< thrust::tuple>( [=] __device__(const auto x) { return thrust::make_tuple(x, x); }))), cub::Min{}, num_batches + 1); unique_ids_offsets_dev = unique_ids_offsets_dev2; unique_ids_offsets_dev_ptr = unique_ids_offsets_dev.data_ptr(); } at::cuda::CUDAEvent unique_ids_offsets_event; unique_ids_offsets_event.record(); torch::optional index; if (part_ids) { unique_ids_offsets_event.synchronize(); const auto num_unique = unique_ids_offsets.data_ptr()[num_batches]; unique_ids = unique_ids.slice(0, 0, num_unique); part_ids = part_ids->slice(0, 0, num_unique); std::tie( unique_ids, index, unique_ids_offsets, unique_ids_offsets_event) = cuda::RankSortImpl( unique_ids, *part_ids, unique_ids_offsets_dev, world_size); } auto mapped_ids = torch::empty(offsets_ptr[3 * num_batches], unique_ids.options()); CUDA_KERNEL_CALL( _MapIdsBatched, grid, block, 0, num_batches, offsets_ptr[3 * num_batches], indexes.data_ptr(), pointers_dev_ptr, offsets_dev_ptr, unique_ids_offsets_dev_ptr, index ? index->data_ptr() : nullptr, map.ref(cuco::find), mapped_ids.data_ptr()); std::vector> results; unique_ids_offsets_event.synchronize(); auto unique_ids_offsets_ptr = unique_ids_offsets.data_ptr(); for (int64_t i = 0; i < num_batches; i++) { results.emplace_back( unique_ids.slice( 0, unique_ids_offsets_ptr[i * world_size], unique_ids_offsets_ptr[(i + 1) * world_size]), mapped_ids.slice( 0, offsets_ptr[2 * i + 1], offsets_ptr[2 * i + 2]), mapped_ids.slice( 0, offsets_ptr[2 * num_batches + i], offsets_ptr[2 * num_batches + i + 1]), unique_ids_offsets.slice( 0, i * world_size, (i + 1) * world_size + 1)); } return results; })); } } // namespace ops } // namespace graphbolt ================================================ FILE: graphbolt/src/cuda/gather.cu ================================================ /** * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file cuda/gather.cu * @brief Gather operators implementation on CUDA. */ #include #include "./common.h" namespace graphbolt { namespace ops { torch::Tensor Gather( torch::Tensor input, torch::Tensor index, torch::optional dtype) { if (!dtype.has_value()) dtype = input.scalar_type(); auto output = torch::empty(index.sizes(), index.options().dtype(*dtype)); AT_DISPATCH_INDEX_TYPES( index.scalar_type(), "GatherIndexType", ([&] { AT_DISPATCH_INTEGRAL_TYPES( input.scalar_type(), "GatherInputType", ([&] { using input_t = scalar_t; AT_DISPATCH_INTEGRAL_TYPES(*dtype, "GatherOutputType", ([&] { using output_t = scalar_t; THRUST_CALL( gather, index.data_ptr(), index.data_ptr() + index.size(0), input.data_ptr(), output.data_ptr()); })); })); })); return output; } } // namespace ops } // namespace graphbolt ================================================ FILE: graphbolt/src/cuda/index_select_csc_impl.cu ================================================ /** * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file cuda/index_select_csc_impl.cu * @brief Index select csc operator implementation on CUDA. */ #include #include #include #include #include #include #include #include #include "./common.h" #include "./max_uva_threads.h" #include "./utils.h" namespace graphbolt { namespace ops { constexpr int BLOCK_SIZE = CUDA_MAX_NUM_THREADS; // Given the in_degree array and a permutation, returns in_degree of the output // and the permuted and modified in_degree of the input. The modified in_degree // is modified so that there is slack to be able to align as needed. template struct AlignmentFunc { static_assert(GPU_CACHE_LINE_SIZE % sizeof(indices_t) == 0); const indptr_t* in_degree; const int64_t* perm; int64_t num_nodes; __host__ __device__ auto operator()(int64_t row) { constexpr int num_elements = GPU_CACHE_LINE_SIZE / sizeof(indices_t); return thrust::make_tuple( in_degree[row], // A single cache line has num_elements items, we add num_elements - 1 // to ensure there is enough slack to move forward or backward by // num_elements - 1 items if the performed access is not aligned. static_cast( in_degree[perm ? perm[row % num_nodes] : row] + num_elements - 1)); } }; template __global__ void _CopyIndicesAlignedKernel( const indptr_t edge_count, const indptr_t* const indptr, const indptr_t* const output_indptr, const indptr_t* const output_indptr_aligned, const indices_t* const indices, const coo_rows_t* const coo_aligned_rows, indices_t* const output_indices, const int64_t* const perm) { indptr_t idx = static_cast(blockIdx.x) * blockDim.x + threadIdx.x; const int stride_x = gridDim.x * blockDim.x; while (idx < edge_count) { const auto permuted_row_pos = coo_aligned_rows[idx]; const auto row_pos = perm ? perm[permuted_row_pos] : permuted_row_pos; const auto out_row = output_indptr[row_pos]; const auto d = output_indptr[row_pos + 1] - out_row; const int offset = (reinterpret_cast( indices + indptr[row_pos] - output_indptr_aligned[permuted_row_pos]) % GPU_CACHE_LINE_SIZE) / sizeof(indices_t); const auto rofs = idx - output_indptr_aligned[permuted_row_pos] - offset; if (rofs >= 0 && rofs < d) { const auto in_idx = indptr[row_pos] + rofs; assert( reinterpret_cast(indices + in_idx - idx) % GPU_CACHE_LINE_SIZE == 0); const auto u = indices[in_idx]; output_indices[out_row + rofs] = u; } idx += stride_x; } } struct PairSum { template __host__ __device__ auto operator()( const thrust::tuple a, const thrust::tuple b) { return thrust::make_tuple( thrust::get<0>(a) + thrust::get<0>(b), thrust::get<1>(a) + thrust::get<1>(b)); }; }; template std::tuple UVAIndexSelectCSCCopyIndices( torch::Tensor indices, const int64_t num_nodes, const indptr_t* const in_degree, const indptr_t* const sliced_indptr, const int64_t* const perm, torch::TensorOptions options, torch::ScalarType indptr_scalar_type, torch::optional output_size) { auto allocator = cuda::GetAllocator(); thrust::counting_iterator iota(0); // Output indptr for the slice indexed by nodes. auto output_indptr = torch::empty(num_nodes + 1, options.dtype(indptr_scalar_type)); auto output_indptr_aligned = torch::empty(num_nodes + 1, options.dtype(indptr_scalar_type)); auto output_indptr_aligned_ptr = output_indptr_aligned.data_ptr(); { // Returns the actual and modified_indegree as a pair, the // latter overestimates the actual indegree for alignment // purposes. auto modified_in_degree = thrust::make_transform_iterator( iota, AlignmentFunc{in_degree, perm, num_nodes}); auto output_indptr_pair = thrust::make_zip_iterator( output_indptr.data_ptr(), output_indptr_aligned_ptr); thrust::tuple zero_value{}; // Compute the prefix sum over actual and modified indegrees. CUB_CALL( DeviceScan::ExclusiveScan, modified_in_degree, output_indptr_pair, PairSum{}, zero_value, num_nodes + 1); } // Copy the actual total number of edges. if (!output_size.has_value()) { auto edge_count = cuda::CopyScalar{output_indptr.data_ptr() + num_nodes}; output_size = static_cast(edge_count); } // Copy the modified number of edges. auto edge_count_aligned_ = cuda::CopyScalar{output_indptr_aligned_ptr + num_nodes}; const int64_t edge_count_aligned = static_cast(edge_count_aligned_); // Allocate output array with actual number of edges. torch::Tensor output_indices = torch::empty(output_size.value(), options.dtype(indices.scalar_type())); const dim3 block(BLOCK_SIZE); const dim3 grid( (std::min(edge_count_aligned, cuda::max_uva_threads.value_or(1 << 20)) + BLOCK_SIZE - 1) / BLOCK_SIZE); // Find the smallest integer type to store the coo_aligned_rows tensor. const int num_bits = cuda::NumberOfBits(num_nodes); std::array type_bits = {8, 15, 31, 63}; const auto type_index = std::lower_bound(type_bits.begin(), type_bits.end(), num_bits) - type_bits.begin(); std::array types = { torch::kByte, torch::kInt16, torch::kInt32, torch::kLong, torch::kLong}; auto coo_dtype = types[type_index]; auto coo_aligned_rows = ExpandIndptrImpl( output_indptr_aligned, coo_dtype, torch::nullopt, edge_count_aligned); AT_DISPATCH_INTEGRAL_TYPES( coo_dtype, "UVAIndexSelectCSCCopyIndicesCOO", ([&] { using coo_rows_t = scalar_t; // Perform the actual copying, of the indices array into // output_indices in an aligned manner. CUDA_KERNEL_CALL( _CopyIndicesAlignedKernel, grid, block, 0, static_cast(edge_count_aligned_), sliced_indptr, output_indptr.data_ptr(), output_indptr_aligned_ptr, reinterpret_cast(indices.data_ptr()), coo_aligned_rows.data_ptr(), reinterpret_cast(output_indices.data_ptr()), perm); })); return {output_indptr, output_indices}; } std::tuple UVAIndexSelectCSCImpl( torch::Tensor in_degree, torch::Tensor sliced_indptr, torch::Tensor indices, torch::Tensor nodes, int num_bits, torch::optional output_size) { // Sorting nodes so that accesses over PCI-e are more regular. const auto sorted_idx = Sort(nodes, num_bits).second; const int64_t num_nodes = nodes.size(0); return AT_DISPATCH_INTEGRAL_TYPES( sliced_indptr.scalar_type(), "UVAIndexSelectCSCIndptr", ([&] { using indptr_t = scalar_t; return GRAPHBOLT_DISPATCH_ELEMENT_SIZES( indices.element_size(), "UVAIndexSelectCSCCopyIndices", ([&] { return UVAIndexSelectCSCCopyIndices( indices, num_nodes, in_degree.data_ptr(), sliced_indptr.data_ptr(), sorted_idx.data_ptr(), nodes.options(), sliced_indptr.scalar_type(), output_size); })); })); } template struct IteratorFunc { indptr_t* indptr; indices_t* indices; __host__ __device__ auto operator()(int64_t i) { return indices + indptr[i]; } }; template struct ConvertToBytes { const indptr_t* in_degree; __host__ __device__ indptr_t operator()(int64_t i) { return in_degree[i] * sizeof(indices_t); } }; template void IndexSelectCSCCopyIndices( const int64_t num_nodes, indices_t* const indices, indptr_t* const sliced_indptr, const indptr_t* const in_degree, indptr_t* const output_indptr, indices_t* const output_indices) { thrust::counting_iterator iota(0); auto input_buffer_it = thrust::make_transform_iterator( iota, IteratorFunc{sliced_indptr, indices}); auto output_buffer_it = thrust::make_transform_iterator( iota, IteratorFunc{output_indptr, output_indices}); auto buffer_sizes = thrust::make_transform_iterator( iota, ConvertToBytes{in_degree}); constexpr int64_t max_copy_at_once = std::numeric_limits::max(); // Performs the copy from indices into output_indices. for (int64_t i = 0; i < num_nodes; i += max_copy_at_once) { CUB_CALL( DeviceMemcpy::Batched, input_buffer_it + i, output_buffer_it + i, buffer_sizes + i, std::min(num_nodes - i, max_copy_at_once)); } } std::tuple DeviceIndexSelectCSCImpl( torch::Tensor in_degree, torch::Tensor sliced_indptr, torch::Tensor indices, torch::TensorOptions options, torch::optional output_size) { const int64_t num_nodes = sliced_indptr.size(0); return AT_DISPATCH_INTEGRAL_TYPES( sliced_indptr.scalar_type(), "IndexSelectCSCIndptr", ([&] { using indptr_t = scalar_t; auto in_degree_ptr = in_degree.data_ptr(); auto sliced_indptr_ptr = sliced_indptr.data_ptr(); // Output indptr for the slice indexed by nodes. torch::Tensor output_indptr = torch::empty( num_nodes + 1, options.dtype(sliced_indptr.scalar_type())); // Compute the output indptr, output_indptr. CUB_CALL( DeviceScan::ExclusiveSum, in_degree_ptr, output_indptr.data_ptr(), num_nodes + 1); // Number of edges being copied. if (!output_size.has_value()) { auto edge_count = cuda::CopyScalar{output_indptr.data_ptr() + num_nodes}; output_size = static_cast(edge_count); } // Allocate output array of size number of copied edges. torch::Tensor output_indices = torch::empty( output_size.value(), options.dtype(indices.scalar_type())); GRAPHBOLT_DISPATCH_ELEMENT_SIZES( indices.element_size(), "IndexSelectCSCCopyIndices", ([&] { using indices_t = element_size_t; IndexSelectCSCCopyIndices( num_nodes, reinterpret_cast(indices.data_ptr()), sliced_indptr_ptr, in_degree_ptr, output_indptr.data_ptr(), reinterpret_cast(output_indices.data_ptr())); })); return std::make_tuple(output_indptr, output_indices); })); } std::tuple IndexSelectCSCImpl( torch::Tensor in_degree, torch::Tensor sliced_indptr, torch::Tensor indices, torch::Tensor nodes, int64_t nodes_max, torch::optional output_size) { if (indices.is_pinned()) { int num_bits = cuda::NumberOfBits(nodes_max + 1); return UVAIndexSelectCSCImpl( in_degree, sliced_indptr, indices, nodes, num_bits, output_size); } else { return DeviceIndexSelectCSCImpl( in_degree, sliced_indptr, indices, nodes.options(), output_size); } } std::tuple IndexSelectCSCImpl( torch::Tensor indptr, torch::Tensor indices, torch::Tensor nodes, torch::optional output_size) { auto [in_degree, sliced_indptr] = SliceCSCIndptr(indptr, nodes); return IndexSelectCSCImpl( in_degree, sliced_indptr, indices, nodes, indptr.size(0) - 2, output_size); } std::tuple> IndexSelectCSCBatchedImpl( torch::Tensor indptr, std::vector indices_list, torch::Tensor nodes, bool with_edge_ids, torch::optional output_size) { auto [in_degree, sliced_indptr] = SliceCSCIndptr(indptr, nodes); std::vector results; results.reserve(indices_list.size()); torch::Tensor output_indptr; for (auto& indices : indices_list) { torch::Tensor output_indices; std::tie(output_indptr, output_indices) = IndexSelectCSCImpl( in_degree, sliced_indptr, indices, nodes, indptr.size(0) - 2, output_size); if (!output_size.has_value()) output_size = output_indices.size(0); TORCH_CHECK(*output_size == output_indices.size(0)); results.push_back(output_indices); } if (with_edge_ids) { results.push_back(IndptrEdgeIdsImpl( output_indptr, sliced_indptr.scalar_type(), sliced_indptr, output_size)); } return {output_indptr, results}; } } // namespace ops } // namespace graphbolt ================================================ FILE: graphbolt/src/cuda/index_select_impl.cu ================================================ /** * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file cuda/index_select_impl.cu * @brief Index select operator implementation on CUDA. */ #include #include #include #include "./common.h" #include "./max_uva_threads.h" #include "./utils.h" namespace graphbolt { namespace ops { /** @brief Index select operator implementation for feature size 1. */ template __global__ void IndexSelectSingleKernel( const DType* input, const int64_t input_len, const IdType* index, const int64_t output_len, DType* output, const int64_t* permutation = nullptr) { int64_t out_row_index = blockIdx.x * blockDim.x + threadIdx.x; int stride = gridDim.x * blockDim.x; while (out_row_index < output_len) { assert(index[out_row_index] >= 0 && index[out_row_index] < input_len); const auto out_row = permutation ? permutation[out_row_index] : out_row_index; output[out_row] = input[index[out_row_index]]; out_row_index += stride; } } /** * @brief Index select operator implementation for feature size > 1. */ template __global__ void IndexSelectMultiKernel( const DType* const input, const int64_t input_len, const int64_t feature_size, const IdType* const index, const int64_t output_len, DType* const output, const int64_t* permutation = nullptr) { int64_t out_row_index = blockIdx.x * blockDim.y + threadIdx.y; const int64_t stride = blockDim.y * gridDim.x; while (out_row_index < output_len) { int64_t column = threadIdx.x; const int64_t in_row = index[out_row_index]; assert(in_row >= 0 && in_row < input_len); const auto out_row = permutation ? permutation[out_row_index] : out_row_index; while (column < feature_size) { output[out_row * feature_size + column] = input[in_row * feature_size + column]; column += blockDim.x; } out_row_index += stride; } } /** * @brief Index select operator implementation for feature size > 1. * * @note This is a cross-device access version of IndexSelectMultiKernel. Since * the memory access over PCIe is more sensitive to the data access aligment * (cacheline), we need a separate version here. */ template __global__ void IndexSelectMultiKernelAligned( const DType* const input, const int64_t input_len, const int64_t feature_size, const IdType* const index, const int64_t output_len, DType* const output, const int64_t* permutation = nullptr) { int64_t out_row_index = blockIdx.x * blockDim.y + threadIdx.y; const int64_t stride = blockDim.y * gridDim.x; while (out_row_index < output_len) { int64_t col = threadIdx.x; const int64_t in_row = index[out_row_index]; assert(in_row >= 0 && in_row < input_len); const int64_t idx_offset = ((uint64_t)(&input[in_row * feature_size]) % GPU_CACHE_LINE_SIZE) / sizeof(DType); col = col - idx_offset; const auto out_row = permutation ? permutation[out_row_index] : out_row_index; while (col < feature_size) { if (col >= 0) output[out_row * feature_size + col] = input[in_row * feature_size + col]; col += blockDim.x; } out_row_index += stride; } } template torch::Tensor UVAIndexSelectImpl_(torch::Tensor input, torch::Tensor index) { const int64_t input_len = input.size(0); const int64_t return_len = index.size(0); const int64_t original_feature_size = std::accumulate( input.sizes().begin() + 1, input.sizes().end(), 1ll, std::multiplies<>()); const auto aligned_feature_size = input.element_size() * original_feature_size / sizeof(DType); torch::Tensor ret = torch::empty( {return_len, original_feature_size}, torch::TensorOptions() .dtype(input.dtype()) .device(c10::DeviceType::CUDA)); DType* input_ptr = reinterpret_cast(input.data_ptr()); DType* ret_ptr = reinterpret_cast(ret.data_ptr()); // Sort the index to improve the memory access pattern. torch::Tensor sorted_index, permutation; std::tie(sorted_index, permutation) = Sort(index, cuda::NumberOfBits(input_len)); const IdType* index_sorted_ptr = sorted_index.data_ptr(); const int64_t* permutation_ptr = permutation.data_ptr(); if (aligned_feature_size == 1) { // Use a single thread to process each output row to avoid wasting threads. const int num_threads = cuda::FindNumThreads(return_len); const int num_blocks = (std::min(return_len, cuda::max_uva_threads.value_or(1 << 20)) + num_threads - 1) / num_threads; CUDA_KERNEL_CALL( IndexSelectSingleKernel, num_blocks, num_threads, 0, input_ptr, input_len, index_sorted_ptr, return_len, ret_ptr, permutation_ptr); } else { constexpr int BLOCK_SIZE = CUDA_MAX_NUM_THREADS; dim3 block(BLOCK_SIZE, 1); while (static_cast(block.x) >= 2 * aligned_feature_size) { block.x >>= 1; block.y <<= 1; } const dim3 grid(std::min( (return_len + block.y - 1) / block.y, cuda::max_uva_threads.value_or(1 << 20) / BLOCK_SIZE)); if (aligned_feature_size * sizeof(DType) <= GPU_CACHE_LINE_SIZE) { // When feature size is smaller than GPU cache line size, use unaligned // version for less SM usage, which is more resource efficient. CUDA_KERNEL_CALL( IndexSelectMultiKernel, grid, block, 0, input_ptr, input_len, aligned_feature_size, index_sorted_ptr, return_len, ret_ptr, permutation_ptr); } else { // Use aligned version to improve the memory access pattern. CUDA_KERNEL_CALL( IndexSelectMultiKernelAligned, grid, block, 0, input_ptr, input_len, aligned_feature_size, index_sorted_ptr, return_len, ret_ptr, permutation_ptr); } } auto return_shape = std::vector({return_len}); return_shape.insert( return_shape.end(), input.sizes().begin() + 1, input.sizes().end()); ret = ret.reshape(return_shape); return ret; } /** * @brief UVA index select operator implementation on CUDA. * * All basic torch types are supported for input. * The supporting index types are: int, int64_t. */ torch::Tensor UVAIndexSelectImpl(torch::Tensor input, torch::Tensor index) { return AT_DISPATCH_INDEX_TYPES( index.scalar_type(), "UVAIndexSelectImpl", ([&] { const auto ptr = (size_t)input.data_ptr(); const int64_t feature_size = std::accumulate( input.sizes().begin() + 1, input.sizes().end(), 1ll, std::multiplies<>()); // We perform the copy with datatype of size powers of 2, and the // maximum data type we use has 16 bytes. We check the alignment of the // pointer and the feature dimensionality to determine the largest // type to use for the copy to minimize the number of CUDA threads used. // Alignment denotes the maximum suitable alignment and datatype size // for the copies. const int aligned_access_size = std::gcd(16, std::gcd(ptr, input.element_size() * feature_size)); return GRAPHBOLT_DISPATCH_ELEMENT_SIZES( aligned_access_size, "UVAIndexSelectImplElementSize", ([&] { return UVAIndexSelectImpl_(input, index); })); })); } } // namespace ops } // namespace graphbolt ================================================ FILE: graphbolt/src/cuda/insubgraph.cu ================================================ /** * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file cuda/insubgraph.cu * @brief InSubgraph operator implementation on CUDA. */ #include #include #include "./common.h" namespace graphbolt { namespace ops { c10::intrusive_ptr InSubgraph( torch::Tensor indptr, torch::Tensor indices, torch::Tensor nodes, torch::optional type_per_edge) { auto [in_degree, sliced_indptr] = SliceCSCIndptr(indptr, nodes); auto [output_indptr, output_indices] = IndexSelectCSCImpl( in_degree, sliced_indptr, indices, nodes, indptr.size(0) - 2); const int64_t num_edges = output_indices.size(0); torch::optional output_type_per_edge; if (type_per_edge) { output_type_per_edge = std::get<1>(IndexSelectCSCImpl( in_degree, sliced_indptr, type_per_edge.value(), nodes, indptr.size(0) - 2, num_edges)); } auto edge_ids = IndptrEdgeIdsImpl( output_indptr, sliced_indptr.scalar_type(), sliced_indptr, num_edges); return c10::make_intrusive( output_indptr, output_indices, edge_ids, nodes, torch::nullopt, output_type_per_edge); } } // namespace ops } // namespace graphbolt ================================================ FILE: graphbolt/src/cuda/isin.cu ================================================ /** * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file cuda/isin.cu * @brief IsIn operator implementation on CUDA. */ #include #include #include #include "./common.h" namespace graphbolt { namespace ops { torch::Tensor IsIn(torch::Tensor elements, torch::Tensor test_elements) { auto sorted_test_elements = Sort(test_elements); auto result = torch::empty_like(elements, torch::kBool); AT_DISPATCH_INTEGRAL_TYPES( elements.scalar_type(), "IsInOperation", ([&] { THRUST_CALL( binary_search, sorted_test_elements.data_ptr(), sorted_test_elements.data_ptr() + sorted_test_elements.size(0), elements.data_ptr(), elements.data_ptr() + elements.size(0), result.data_ptr()); })); return result; } torch::Tensor Nonzero(torch::Tensor mask, bool logical_not) { thrust::counting_iterator iota(0); auto result = torch::empty_like(mask, torch::kInt64); auto mask_ptr = mask.data_ptr(); auto result_ptr = result.data_ptr(); auto allocator = cuda::GetAllocator(); auto num_copied = allocator.AllocateStorage(1); if (logical_not) { CUB_CALL( DeviceSelect::FlaggedIf, iota, mask_ptr, result_ptr, num_copied.get(), mask.numel(), thrust::logical_not{}); } else { CUB_CALL( DeviceSelect::Flagged, iota, mask_ptr, result_ptr, num_copied.get(), mask.numel()); } cuda::CopyScalar num_copied_cpu(num_copied.get()); return result.slice(0, 0, static_cast(num_copied_cpu)); } } // namespace ops } // namespace graphbolt ================================================ FILE: graphbolt/src/cuda/max_uva_threads.cc ================================================ /** * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file cuda/max_uva_threads.cc * @brief Max uva threads variable setter function. */ #include "./max_uva_threads.h" namespace graphbolt { namespace cuda { void set_max_uva_threads(int64_t count) { max_uva_threads = count; } } // namespace cuda } // namespace graphbolt ================================================ FILE: graphbolt/src/cuda/max_uva_threads.h ================================================ /** * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file cuda/max_uva_threads.h * @brief Max uva threads variable declaration. */ #ifndef GRAPHBOLT_MAX_UVA_THREADS_H_ #define GRAPHBOLT_MAX_UVA_THREADS_H_ #include #include namespace graphbolt { namespace cuda { /** @brief Set a limit on the number of CUDA threads for UVA accesses. */ inline std::optional max_uva_threads; void set_max_uva_threads(int64_t count); } // namespace cuda } // namespace graphbolt #endif // GRAPHBOLT_MAX_UVA_THREADS_H_ ================================================ FILE: graphbolt/src/cuda/neighbor_sampler.cu ================================================ /** * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file cuda/index_select_impl.cu * @brief Index select operator implementation on CUDA. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #if __CUDA_ARCH__ >= 700 #include #endif // __CUDA_ARCH__ >= 700 #include #include #include #include "../macro.h" #include "../random.h" #include "../utils.h" #include "./common.h" #include "./utils.h" namespace graphbolt { namespace ops { constexpr int BLOCK_SIZE = 128; inline __device__ int64_t AtomicMax(int64_t* const address, const int64_t val) { // To match the type of "::atomicCAS", ignore lint warning. using Type = unsigned long long int; // NOLINT static_assert(sizeof(Type) == sizeof(*address), "Type width must match"); return atomicMax(reinterpret_cast(address), static_cast(val)); } inline __device__ int32_t AtomicMax(int32_t* const address, const int32_t val) { // To match the type of "::atomicCAS", ignore lint warning. using Type = int; // NOLINT static_assert(sizeof(Type) == sizeof(*address), "Type width must match"); return atomicMax(reinterpret_cast(address), static_cast(val)); } /** * @brief Performs neighbor sampling and fills the edge_ids array with * original edge ids if sliced_indptr is valid. If not, then it fills the edge * ids array with numbers upto the node degree. */ template __global__ void _ComputeRandomsNS( const int64_t num_edges, const indptr_t* const sliced_indptr, const indptr_t* const sub_indptr, const indptr_t* const output_indptr, const indices_t* const csr_rows, const uint64_t random_seed, indptr_t* edge_ids) { int64_t i = blockIdx.x * blockDim.x + threadIdx.x; const int stride = gridDim.x * blockDim.x; curandStatePhilox4_32_10_t rng; curand_init(random_seed, i, 0, &rng); while (i < num_edges) { const auto row_position = csr_rows[i]; const auto row_offset = i - sub_indptr[row_position]; const auto output_offset = output_indptr[row_position]; const auto fanout = output_indptr[row_position + 1] - output_offset; const auto rnd = row_offset < fanout ? row_offset : curand(&rng) % (row_offset + 1); if (rnd < fanout) { const indptr_t edge_id = row_offset + (sliced_indptr ? sliced_indptr[row_position] : 0); #if __CUDA_ARCH__ >= 700 ::cuda::atomic_ref a( edge_ids[output_offset + rnd]); a.fetch_max(edge_id, ::cuda::std::memory_order_relaxed); #else AtomicMax(edge_ids + output_offset + rnd, edge_id); #endif // __CUDA_ARCH__ } i += stride; } } /** * @brief Fills the random_arr with random numbers and the edge_ids array with * original edge ids. When random_arr is sorted along with edge_ids, the first * fanout elements of each row gives us the sampled edges. */ template < typename float_t, typename indptr_t, typename indices_t, typename weights_t, typename edge_id_t> __global__ void _ComputeRandoms( const int64_t num_edges, const indptr_t* const sliced_indptr, const indptr_t* const sub_indptr, const indices_t* const csr_rows, const weights_t* const sliced_weights, const indices_t* const indices, const continuous_seed random_seed, float_t* random_arr, edge_id_t* edge_ids) { int64_t i = blockIdx.x * blockDim.x + threadIdx.x; const int stride = gridDim.x * blockDim.x; const auto labor = indices != nullptr; const float_t inf = static_cast(std::numeric_limits::infinity()); while (i < num_edges) { const auto row_position = csr_rows[i]; const auto row_offset = i - sub_indptr[row_position]; const auto in_idx = sliced_indptr[row_position] + row_offset; const auto rnd = random_seed.uniform(labor ? indices[in_idx] : i); const auto prob = sliced_weights ? sliced_weights[i] : static_cast(1); const auto exp_rnd = -__logf(rnd); const float_t adjusted_rnd = prob > 0 ? static_cast(exp_rnd / prob) : inf; random_arr[i] = adjusted_rnd; edge_ids[i] = row_offset; i += stride; } } struct IsPositive { template __host__ __device__ auto operator()(probs_t x) { return x > 0; } }; template struct MinInDegreeFanout { const indptr_t* in_degree; const int64_t* fanouts; size_t num_fanouts; __host__ __device__ auto operator()(int64_t i) { return static_cast( min(static_cast(in_degree[i]), fanouts[i % num_fanouts])); } }; template struct IteratorFunc { indptr_t* indptr; indices_t* indices; __host__ __device__ auto operator()(int64_t i) { return indices + indptr[i]; } }; template struct AddOffset { indptr_t offset; template __host__ __device__ indptr_t operator()(edge_id_t x) { return x + offset; } }; template struct IteratorFuncAddOffset { indptr_t* indptr; indptr_t* sliced_indptr; indices_t* indices; __host__ __device__ auto operator()(int64_t i) { return thrust::transform_output_iterator{ indices + indptr[i], AddOffset{sliced_indptr[i]}}; } }; template struct SegmentEndFunc { indptr_t* indptr; in_degree_iterator_t in_degree; __host__ __device__ auto operator()(int64_t i) { return indptr[i] + in_degree[i]; } }; c10::intrusive_ptr SampleNeighbors( torch::Tensor indptr, torch::Tensor indices, torch::optional seeds, torch::optional> seed_offsets, const std::vector& fanouts, bool replace, bool layer, bool returning_indices_is_optional, torch::optional type_per_edge, torch::optional probs_or_mask, torch::optional node_type_offset, torch::optional> node_type_to_id, torch::optional> edge_type_to_id, torch::optional random_seed_tensor, float seed2_contribution, // Optional temporal sampling arguments begin. torch::optional seeds_timestamp, torch::optional seeds_pre_time_window, torch::optional node_timestamp, torch::optional edge_timestamp // Optional temporal sampling arguments end. ) { // When seed_offsets.has_value() in the hetero case, we compute the output of // sample_neighbors _convert_to_sampled_subgraph in a fused manner so that // _convert_to_sampled_subgraph only has to perform slices over the returned // indptr and indices tensors to form CSC outputs for each edge type. TORCH_CHECK(!replace, "Sampling with replacement is not supported yet!"); // Assume that indptr, indices, seeds, type_per_edge and probs_or_mask // are all resident on the GPU. If not, it is better to first extract them // before calling this function. auto allocator = cuda::GetAllocator(); auto num_rows = seeds.has_value() ? seeds.value().size(0) : indptr.size(0) - 1; auto fanouts_pinned = torch::empty( fanouts.size(), c10::TensorOptions().dtype(torch::kLong).pinned_memory(true)); auto fanouts_pinned_ptr = fanouts_pinned.data_ptr(); for (size_t i = 0; i < fanouts.size(); i++) { fanouts_pinned_ptr[i] = fanouts[i] >= 0 ? fanouts[i] : std::numeric_limits::max(); } // Finally, copy the adjusted fanout values to the device memory. auto fanouts_device = allocator.AllocateStorage(fanouts.size()); CUDA_CALL(cudaMemcpyAsync( fanouts_device.get(), fanouts_pinned_ptr, sizeof(int64_t) * fanouts.size(), cudaMemcpyHostToDevice, cuda::GetCurrentStream())); auto in_degree_and_sliced_indptr = SliceCSCIndptr(indptr, seeds); auto in_degree = std::get<0>(in_degree_and_sliced_indptr); auto sliced_indptr = std::get<1>(in_degree_and_sliced_indptr); const auto homo_in_degree = in_degree; const auto homo_sliced_indptr = sliced_indptr; auto max_in_degree = torch::empty( 1, c10::TensorOptions().dtype(in_degree.scalar_type()).pinned_memory(true)); AT_DISPATCH_INDEX_TYPES( indptr.scalar_type(), "SampleNeighborsMaxInDegree", ([&] { CUB_CALL( DeviceReduce::Max, in_degree.data_ptr(), max_in_degree.data_ptr(), num_rows); })); // Protect access to max_in_degree with a CUDAEvent at::cuda::CUDAEvent max_in_degree_event; max_in_degree_event.record(); torch::optional num_edges; torch::Tensor sub_indptr; if (!seeds.has_value()) { num_edges = indices.size(0); sub_indptr = indptr; } torch::optional sliced_probs_or_mask; if (probs_or_mask.has_value()) { if (seeds.has_value()) { torch::Tensor sliced_probs_or_mask_tensor; std::tie(sub_indptr, sliced_probs_or_mask_tensor) = IndexSelectCSCImpl( in_degree, sliced_indptr, probs_or_mask.value(), seeds.value(), indptr.size(0) - 2, num_edges); sliced_probs_or_mask = sliced_probs_or_mask_tensor; num_edges = sliced_probs_or_mask_tensor.size(0); } else { sliced_probs_or_mask = probs_or_mask; } } if (fanouts.size() > 1) { torch::Tensor sliced_type_per_edge; if (seeds.has_value()) { std::tie(sub_indptr, sliced_type_per_edge) = IndexSelectCSCImpl( in_degree, sliced_indptr, type_per_edge.value(), seeds.value(), indptr.size(0) - 2, num_edges); } else { sliced_type_per_edge = type_per_edge.value(); } std::tie(sub_indptr, in_degree, sliced_indptr) = SliceCSCIndptrHetero( sub_indptr, sliced_type_per_edge, sliced_indptr, fanouts.size()); num_rows = sliced_indptr.size(0); num_edges = sliced_type_per_edge.size(0); } // If sub_indptr was not computed in the two code blocks above: if (seeds.has_value() && !probs_or_mask.has_value() && fanouts.size() <= 1) { sub_indptr = ExclusiveCumSum(in_degree); } torch::optional homo_coo_rows; if (seeds_timestamp.has_value()) { // Temporal sampling is enabled. const auto homo_sub_indptr = fanouts.size() > 1 ? ExclusiveCumSum(homo_in_degree) : sub_indptr; homo_coo_rows = ExpandIndptrImpl( homo_sub_indptr, indices.scalar_type(), torch::nullopt, num_edges); num_edges = homo_coo_rows->size(0); const auto is_probs_initialized = sliced_probs_or_mask.has_value(); if (!is_probs_initialized) { sliced_probs_or_mask = torch::empty(*num_edges, sub_indptr.options().dtype(torch::kBool)); } GRAPHBOLT_DISPATCH_ALL_TYPES( sliced_probs_or_mask->scalar_type(), "SampleNeighborsTemporalProbsOrMask", ([&] { const scalar_t* input_probs_ptr = is_probs_initialized ? sliced_probs_or_mask->data_ptr() : nullptr; auto output_probs_ptr = sliced_probs_or_mask->data_ptr(); using timestamp_t = int64_t; const auto seeds_timestamp_ptr = seeds_timestamp->data_ptr(); const timestamp_t* seeds_pre_time_window_ptr = seeds_pre_time_window.has_value() ? seeds_pre_time_window->data_ptr() : nullptr; const timestamp_t* node_timestamp_ptr = node_timestamp.has_value() ? node_timestamp->data_ptr() : nullptr; const timestamp_t* edge_timestamp_ptr = edge_timestamp.has_value() ? edge_timestamp->data_ptr() : nullptr; AT_DISPATCH_INDEX_TYPES( homo_coo_rows->scalar_type(), "SampleNeighborsTemporalMaskIndices", ([&] { const auto coo_rows_ptr = homo_coo_rows->data_ptr(); const auto indices_ptr = indices.data_ptr(); AT_DISPATCH_INDEX_TYPES( homo_sliced_indptr.scalar_type(), "SampleNeighborsTemporalMaskIndptr", ([&] { const auto sliced_indptr_data = homo_sliced_indptr.data_ptr(); const auto sub_indptr_data = homo_sub_indptr.data_ptr(); CUB_CALL( DeviceFor::Bulk, *num_edges, [=] __device__(int64_t i) { const auto row = coo_rows_ptr[i]; const auto seed_timestamp = seeds_timestamp_ptr[row]; const auto row_offset = i - sub_indptr_data[row]; const auto in_idx = sliced_indptr_data[row] + row_offset; bool mask = true; if (node_timestamp_ptr) { const auto index = indices_ptr[in_idx]; const auto neighbor_timestamp = node_timestamp_ptr[index]; mask &= neighbor_timestamp < seed_timestamp; if (seeds_pre_time_window_ptr) { mask &= neighbor_timestamp > seed_timestamp - seeds_pre_time_window_ptr[row]; } } if (edge_timestamp_ptr) { const auto edge_timestamp = edge_timestamp_ptr[in_idx]; mask &= edge_timestamp < seed_timestamp; if (seeds_pre_time_window_ptr) { mask &= edge_timestamp > seed_timestamp - seeds_pre_time_window_ptr[row]; } } const scalar_t prob = input_probs_ptr ? input_probs_ptr[i] : scalar_t{1}; output_probs_ptr[i] = prob * static_cast(mask); }); })); })); })); } const continuous_seed random_seed = [&] { if (random_seed_tensor.has_value()) { return continuous_seed(random_seed_tensor.value(), seed2_contribution); } else { return continuous_seed{RandomEngine::ThreadLocal()->RandInt( static_cast(0), std::numeric_limits::max())}; } }(); auto output_indptr = torch::empty_like(sub_indptr); torch::Tensor picked_eids; torch::optional output_indices; AT_DISPATCH_INDEX_TYPES( indptr.scalar_type(), "SampleNeighborsIndptr", ([&] { using indptr_t = index_t; if (sliced_probs_or_mask.has_value()) { // Count nonzero probs into in_degree. GRAPHBOLT_DISPATCH_ALL_TYPES( sliced_probs_or_mask->scalar_type(), "SampleNeighborsPositiveProbs", ([&] { using probs_t = scalar_t; auto is_nonzero = thrust::make_transform_iterator( sliced_probs_or_mask->data_ptr(), IsPositive{}); CUB_CALL( DeviceSegmentedReduce::Sum, is_nonzero, in_degree.data_ptr(), num_rows, sub_indptr.data_ptr(), sub_indptr.data_ptr() + 1); })); } thrust::counting_iterator iota(0); auto sampled_degree = thrust::make_transform_iterator( iota, MinInDegreeFanout{ in_degree.data_ptr(), fanouts_device.get(), fanouts.size()}); // Compute output_indptr. CUB_CALL( DeviceScan::ExclusiveSum, sampled_degree, output_indptr.data_ptr(), num_rows + 1); auto num_sampled_edges = cuda::CopyScalar{output_indptr.data_ptr() + num_rows}; // This operation is placed after num_sampled_edges copy is started to // hide the latency of copy synchronization later. torch::Tensor coo_rows; if (!homo_coo_rows.has_value() || fanouts.size() > 1) { coo_rows = ExpandIndptrImpl( sub_indptr, indices.scalar_type(), torch::nullopt, num_edges); num_edges = coo_rows.size(0); } else { coo_rows = *homo_coo_rows; } // Find the smallest integer type to store the edge id offsets. We synch // the CUDAEvent so that the access is safe. auto compute_num_bits = [&] { max_in_degree_event.synchronize(); return cuda::NumberOfBits(max_in_degree.data_ptr()[0]); }; if (layer || sliced_probs_or_mask.has_value()) { const int num_bits = compute_num_bits(); std::array type_bits = {8, 16, 32, 64}; const auto type_index = std::lower_bound(type_bits.begin(), type_bits.end(), num_bits) - type_bits.begin(); std::array types = { torch::kByte, torch::kInt16, torch::kInt32, torch::kLong, torch::kLong}; auto edge_id_dtype = types[type_index]; AT_DISPATCH_INTEGRAL_TYPES( edge_id_dtype, "SampleNeighborsEdgeIDs", ([&] { using edge_id_t = std::make_unsigned_t; TORCH_CHECK( num_bits <= sizeof(edge_id_t) * 8, "Selected edge_id_t must be capable of storing edge_ids."); // Using bfloat16 for random numbers works just as reliably as // float32 and provides around 30% speedup. using rnd_t = nv_bfloat16; auto randoms = allocator.AllocateStorage(num_edges.value()); auto randoms_sorted = allocator.AllocateStorage(num_edges.value()); auto edge_id_segments = allocator.AllocateStorage(num_edges.value()); auto sorted_edge_id_segments = allocator.AllocateStorage(num_edges.value()); AT_DISPATCH_INDEX_TYPES( indices.scalar_type(), "SampleNeighborsIndices", ([&] { using indices_t = index_t; auto probs_or_mask_scalar_type = torch::kFloat32; if (sliced_probs_or_mask.has_value()) { probs_or_mask_scalar_type = sliced_probs_or_mask->scalar_type(); } GRAPHBOLT_DISPATCH_ALL_TYPES( probs_or_mask_scalar_type, "SampleNeighborsProbs", ([&] { using probs_t = scalar_t; probs_t* sliced_probs_ptr = nullptr; if (sliced_probs_or_mask.has_value()) { sliced_probs_ptr = sliced_probs_or_mask->data_ptr(); } const indices_t* indices_ptr = layer ? indices.data_ptr() : nullptr; const dim3 block(BLOCK_SIZE); const dim3 grid( (num_edges.value() + BLOCK_SIZE - 1) / BLOCK_SIZE); // Compute row and random number pairs. CUDA_KERNEL_CALL( _ComputeRandoms, grid, block, 0, num_edges.value(), sliced_indptr.data_ptr(), sub_indptr.data_ptr(), coo_rows.data_ptr(), sliced_probs_ptr, indices_ptr, random_seed, randoms.get(), edge_id_segments.get()); })); })); // Sort the random numbers along with edge ids, after // sorting the first fanout elements of each row will // give us the sampled edges. CUB_CALL( DeviceSegmentedSort::SortPairs, randoms.get(), randoms_sorted.get(), edge_id_segments.get(), sorted_edge_id_segments.get(), num_edges.value(), num_rows, sub_indptr.data_ptr(), sub_indptr.data_ptr() + 1); picked_eids = torch::empty( static_cast(num_sampled_edges), sub_indptr.options()); // Need to sort the sampled edges only when fanouts.size() == 1 // since multiple fanout sampling case is automatically going to // be sorted. if (type_per_edge && fanouts.size() == 1) { // Ensuring sort result still ends up in // sorted_edge_id_segments std::swap(edge_id_segments, sorted_edge_id_segments); auto sampled_segment_end_it = thrust::make_transform_iterator( iota, SegmentEndFunc{ sub_indptr.data_ptr(), sampled_degree}); CUB_CALL( DeviceSegmentedSort::SortKeys, edge_id_segments.get(), sorted_edge_id_segments.get(), picked_eids.size(0), num_rows, sub_indptr.data_ptr(), sampled_segment_end_it); } auto input_buffer_it = thrust::make_transform_iterator( iota, IteratorFunc{ sub_indptr.data_ptr(), sorted_edge_id_segments.get()}); auto output_buffer_it = thrust::make_transform_iterator( iota, IteratorFuncAddOffset{ output_indptr.data_ptr(), sliced_indptr.data_ptr(), picked_eids.data_ptr()}); constexpr int64_t max_copy_at_once = std::numeric_limits::max(); // Copy the sampled edge ids into picked_eids tensor. for (int64_t i = 0; i < num_rows; i += max_copy_at_once) { CUB_CALL( DeviceCopy::Batched, input_buffer_it + i, output_buffer_it + i, sampled_degree + i, std::min(num_rows - i, max_copy_at_once)); } })); } else { // Non-weighted neighbor sampling. picked_eids = torch::zeros(num_edges.value(), sub_indptr.options()); const auto sort_needed = type_per_edge && fanouts.size() == 1; const auto sliced_indptr_ptr = sort_needed ? nullptr : sliced_indptr.data_ptr(); const dim3 block(BLOCK_SIZE); const dim3 grid( (std::min(num_edges.value(), static_cast(1 << 20)) + BLOCK_SIZE - 1) / BLOCK_SIZE); AT_DISPATCH_INDEX_TYPES( indices.scalar_type(), "SampleNeighborsIndices", ([&] { using indices_t = index_t; // Compute row and random number pairs. CUDA_KERNEL_CALL( _ComputeRandomsNS, grid, block, 0, num_edges.value(), sliced_indptr_ptr, sub_indptr.data_ptr(), output_indptr.data_ptr(), coo_rows.data_ptr(), random_seed.get_seed(0), picked_eids.data_ptr()); })); picked_eids = picked_eids.slice(0, 0, static_cast(num_sampled_edges)); // Need to sort the sampled edges only when fanouts.size() == 1 // since multiple fanout sampling case is automatically going to // be sorted. if (sort_needed) { const int num_bits = compute_num_bits(); std::array type_bits = {8, 15, 31, 63}; const auto type_index = std::lower_bound(type_bits.begin(), type_bits.end(), num_bits) - type_bits.begin(); std::array types = { torch::kByte, torch::kInt16, torch::kInt32, torch::kLong, torch::kLong}; auto edge_id_dtype = types[type_index]; AT_DISPATCH_INTEGRAL_TYPES( edge_id_dtype, "SampleNeighborsEdgeIDs", ([&] { using edge_id_t = scalar_t; TORCH_CHECK( num_bits <= sizeof(edge_id_t) * 8, "Selected edge_id_t must be capable of storing " "edge_ids."); auto picked_offsets = picked_eids.to(edge_id_dtype); auto sorted_offsets = torch::empty_like(picked_offsets); CUB_CALL( DeviceSegmentedSort::SortKeys, picked_offsets.data_ptr(), sorted_offsets.data_ptr(), picked_eids.size(0), num_rows, output_indptr.data_ptr(), output_indptr.data_ptr() + 1); auto edge_id_offsets = ExpandIndptrImpl( output_indptr, picked_eids.scalar_type(), sliced_indptr, picked_eids.size(0)); picked_eids = sorted_offsets.to(picked_eids.scalar_type()) + edge_id_offsets; })); } } if (!returning_indices_is_optional || utils::is_on_gpu(indices)) { output_indices = Gather(indices, picked_eids); } })); torch::optional output_type_per_edge; torch::optional edge_offsets; if (type_per_edge && seed_offsets) { const int64_t num_etypes = edge_type_to_id.has_value() ? edge_type_to_id->size() : 1; // If we performed homogenous sampling on hetero graph, we have to look at // type_per_edge of sampled edges and determine the offsets of different // sampled etypes and convert to fused hetero indptr representation. if (fanouts.size() == 1) { output_type_per_edge = Gather(*type_per_edge, picked_eids); torch::Tensor output_in_degree, sliced_output_indptr; sliced_output_indptr = output_indptr.slice(0, 0, output_indptr.size(0) - 1); std::tie(output_indptr, output_in_degree, sliced_output_indptr) = SliceCSCIndptrHetero( output_indptr, output_type_per_edge.value(), sliced_output_indptr, num_etypes); // We use num_rows to hold num_seeds * num_etypes. So, it needs to be // updated when sampling with a single fanout value when the graph is // heterogenous. num_rows = sliced_output_indptr.size(0); } // Here, we check what are the dst node types for the given seeds so that // we can compute the output indptr space later. std::vector etype_id_to_dst_ntype_id(num_etypes); // Here, we check what are the src node types for the given seeds so that // we can subtract source node offset from indices later. auto etype_id_to_src_ntype_id = torch::empty( 2 * num_etypes, c10::TensorOptions().dtype(torch::kLong).pinned_memory(true)); auto etype_id_to_src_ntype_id_ptr = etype_id_to_src_ntype_id.data_ptr(); for (auto& etype_and_id : edge_type_to_id.value()) { auto etype = etype_and_id.key(); auto id = etype_and_id.value(); auto [src_type, dst_type] = utils::parse_src_dst_ntype_from_etype(etype); etype_id_to_dst_ntype_id[id] = node_type_to_id->at(dst_type); etype_id_to_src_ntype_id_ptr[2 * id] = etype_id_to_src_ntype_id_ptr[2 * id + 1] = node_type_to_id->at(src_type); } auto indices_offsets_device = torch::empty( etype_id_to_src_ntype_id.size(0), picked_eids.options().dtype(torch::kLong)); AT_DISPATCH_INDEX_TYPES( node_type_offset->scalar_type(), "SampleNeighborsNodeTypeOffset", ([&] { THRUST_CALL( gather, etype_id_to_src_ntype_id_ptr, etype_id_to_src_ntype_id_ptr + etype_id_to_src_ntype_id.size(0), node_type_offset->data_ptr(), indices_offsets_device.data_ptr()); })); // For each edge type, we compute the start and end offsets to index into // indptr to form the final output_indptr. auto indptr_offsets = torch::empty( num_etypes * 2, c10::TensorOptions().dtype(torch::kLong).pinned_memory(true)); auto indptr_offsets_ptr = indptr_offsets.data_ptr(); // We compute the indptr offsets here, right now, output_indptr is of size // # seeds * num_etypes + 1. We can simply take slices to get correct output // indptr. The final output_indptr is same as current indptr except that // some intermediate values are removed to change the node ids space from // all of the seed vertices to the node id space of the dst node type of // each edge type. for (int i = 0; i < num_etypes; i++) { indptr_offsets_ptr[2 * i] = num_rows / num_etypes * i + seed_offsets->at(etype_id_to_dst_ntype_id[i]); indptr_offsets_ptr[2 * i + 1] = num_rows / num_etypes * i + seed_offsets->at(etype_id_to_dst_ntype_id[i] + 1); } auto permutation = torch::arange( 0, num_rows * num_etypes, num_etypes, output_indptr.options()); permutation = permutation.remainder(num_rows) + permutation.div(num_rows, "floor"); // This permutation, when applied sorts the sampled edges with respect to // edge types. auto [output_in_degree, sliced_output_indptr] = SliceCSCIndptr(output_indptr, permutation); std::tie(output_indptr, picked_eids) = IndexSelectCSCImpl( output_in_degree, sliced_output_indptr, picked_eids, permutation, num_rows - 1, picked_eids.size(0)); edge_offsets = torch::empty( num_etypes * 2, c10::TensorOptions() .dtype(output_indptr.scalar_type()) .pinned_memory(true)); auto edge_offsets_device = torch::empty(num_etypes * 2, output_indptr.options()); at::cuda::CUDAEvent edge_offsets_event; AT_DISPATCH_INDEX_TYPES( indptr.scalar_type(), "SampleNeighborsEdgeOffsets", ([&] { auto edge_offsets_pinned_device_pair = thrust::make_transform_output_iterator( thrust::make_zip_iterator( edge_offsets->data_ptr(), edge_offsets_device.data_ptr()), [=] __device__(index_t x) { return thrust::make_tuple(x, x); }); THRUST_CALL( gather, indptr_offsets_ptr, indptr_offsets_ptr + indptr_offsets.size(0), output_indptr.data_ptr(), edge_offsets_pinned_device_pair); })); edge_offsets_event.record(); if (output_indices.has_value()) { auto indices_offset_subtract = ExpandIndptrImpl( edge_offsets_device, indices.scalar_type(), indices_offsets_device, output_indices->size(0)); // The output_indices is permuted here. std::tie(output_indptr, output_indices) = IndexSelectCSCImpl( output_in_degree, sliced_output_indptr, *output_indices, permutation, num_rows - 1, output_indices->size(0)); *output_indices -= indices_offset_subtract; } auto output_indptr_offsets = torch::empty( num_etypes * 2, c10::TensorOptions().dtype(torch::kLong).pinned_memory(true)); auto output_indptr_offsets_ptr = output_indptr_offsets.data_ptr(); std::vector indptr_list; for (int i = 0; i < num_etypes; i++) { indptr_list.push_back(output_indptr.slice( 0, indptr_offsets_ptr[2 * i], indptr_offsets_ptr[2 * i + 1] + 1)); output_indptr_offsets_ptr[2 * i] = i == 0 ? 0 : output_indptr_offsets_ptr[2 * i - 1]; output_indptr_offsets_ptr[2 * i + 1] = output_indptr_offsets_ptr[2 * i] + indptr_list.back().size(0); } auto output_indptr_offsets_device = torch::empty( output_indptr_offsets.size(0), output_indptr.options().dtype(torch::kLong)); THRUST_CALL( copy_n, output_indptr_offsets_ptr, output_indptr_offsets.size(0), output_indptr_offsets_device.data_ptr()); // We form the final output indptr by concatenating pieces for different // edge types. output_indptr = torch::cat(indptr_list); auto indptr_offset_subtract = ExpandIndptrImpl( output_indptr_offsets_device, indptr.scalar_type(), edge_offsets_device, output_indptr.size(0)); output_indptr -= indptr_offset_subtract; edge_offsets_event.synchronize(); // We read the edge_offsets here, they are in pairs but we don't need it to // be in pairs. So we remove the duplicate information from it and turn it // into a real offsets array. AT_DISPATCH_INDEX_TYPES( indptr.scalar_type(), "SampleNeighborsEdgeOffsetsCheck", ([&] { auto edge_offsets_ptr = edge_offsets->data_ptr(); TORCH_CHECK(edge_offsets_ptr[0] == 0, "edge_offsets is incorrect."); for (int i = 1; i < num_etypes; i++) { TORCH_CHECK( edge_offsets_ptr[2 * i - 1] == edge_offsets_ptr[2 * i], "edge_offsets is incorrect."); } TORCH_CHECK( edge_offsets_ptr[2 * num_etypes - 1] == picked_eids.size(0), "edge_offsets is incorrect."); for (int i = 0; i < num_etypes; i++) { edge_offsets_ptr[i + 1] = edge_offsets_ptr[2 * i + 1]; } })); edge_offsets = edge_offsets->slice(0, 0, num_etypes + 1); } else { // Convert output_indptr back to homo by discarding intermediate offsets. output_indptr = output_indptr.slice(0, 0, output_indptr.size(0), fanouts.size()); if (type_per_edge) output_type_per_edge = Gather(*type_per_edge, picked_eids); } return c10::make_intrusive( output_indptr, output_indices, picked_eids, seeds, torch::nullopt, output_type_per_edge, edge_offsets); } } // namespace ops } // namespace graphbolt ================================================ FILE: graphbolt/src/cuda/sampling_utils.cu ================================================ /** * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file cuda/sampling_utils.cu * @brief Sampling utility function implementations on CUDA. */ #include #include #include #include "./common.h" #include "./utils.h" namespace graphbolt { namespace ops { // Given rows and indptr, computes: // inrow_indptr[i] = indptr[rows[i]]; // in_degree[i] = indptr[rows[i] + 1] - indptr[rows[i]]; template struct SliceFunc { const nodes_t* rows; const indptr_t* indptr; indptr_t* in_degree; indptr_t* inrow_indptr; __host__ __device__ auto operator()(int64_t tIdx) { const auto out_row = rows[tIdx]; const auto indptr_val = indptr[out_row]; const auto degree = indptr[out_row + 1] - indptr_val; in_degree[tIdx] = degree; inrow_indptr[tIdx] = indptr_val; } }; // Returns (indptr[nodes + 1] - indptr[nodes], indptr[nodes]) std::tuple SliceCSCIndptr( torch::Tensor indptr, torch::optional nodes_optional) { if (nodes_optional.has_value()) { auto nodes = nodes_optional.value(); const int64_t num_nodes = nodes.size(0); // Read indptr only once in case it is pinned and access is slow. auto sliced_indptr = torch::empty(num_nodes, nodes.options().dtype(indptr.scalar_type())); // compute in-degrees auto in_degree = torch::empty( num_nodes + 1, nodes.options().dtype(indptr.scalar_type())); thrust::counting_iterator iota(0); AT_DISPATCH_INTEGRAL_TYPES( indptr.scalar_type(), "IndexSelectCSCIndptr", ([&] { using indptr_t = scalar_t; AT_DISPATCH_INDEX_TYPES( nodes.scalar_type(), "IndexSelectCSCNodes", ([&] { using nodes_t = index_t; THRUST_CALL( for_each, iota, iota + num_nodes, SliceFunc{ nodes.data_ptr(), indptr.data_ptr(), in_degree.data_ptr(), sliced_indptr.data_ptr()}); })); })); return {in_degree, sliced_indptr}; } else { const int64_t num_nodes = indptr.size(0) - 1; auto sliced_indptr = indptr.slice(0, 0, num_nodes); auto in_degree = torch::empty( num_nodes + 2, indptr.options().dtype(indptr.scalar_type())); AT_DISPATCH_INTEGRAL_TYPES( indptr.scalar_type(), "IndexSelectCSCIndptr", ([&] { using indptr_t = scalar_t; CUB_CALL( DeviceAdjacentDifference::SubtractLeftCopy, indptr.data_ptr(), in_degree.data_ptr(), num_nodes + 1, cub::Difference{}); })); in_degree = in_degree.slice(0, 1); return {in_degree, sliced_indptr}; } } template struct EdgeTypeSearch { const indptr_t* sub_indptr; const indptr_t* sliced_indptr; const etype_t* etypes; int64_t num_fanouts; int64_t num_rows; indptr_t* new_sub_indptr; indptr_t* new_sliced_indptr; __host__ __device__ auto operator()(int64_t i) { const auto homo_i = i / num_fanouts; const auto indptr_i = sub_indptr[homo_i]; const auto degree = sub_indptr[homo_i + 1] - indptr_i; const etype_t etype = i % num_fanouts; auto offset = cub::LowerBound(etypes + indptr_i, degree, etype); new_sub_indptr[i] = indptr_i + offset; new_sliced_indptr[i] = sliced_indptr[homo_i] + offset; if (i == num_rows - 1) new_sub_indptr[num_rows] = indptr_i + degree; } }; std::tuple SliceCSCIndptrHetero( torch::Tensor sub_indptr, torch::Tensor etypes, torch::Tensor sliced_indptr, int64_t num_fanouts) { auto num_rows = (sub_indptr.size(0) - 1) * num_fanouts; auto new_sub_indptr = torch::empty(num_rows + 1, sub_indptr.options()); auto new_indegree = torch::empty(num_rows + 2, sub_indptr.options()); auto new_sliced_indptr = torch::empty(num_rows, sliced_indptr.options()); thrust::counting_iterator iota(0); AT_DISPATCH_INTEGRAL_TYPES( sub_indptr.scalar_type(), "SliceCSCIndptrHeteroIndptr", ([&] { using indptr_t = scalar_t; AT_DISPATCH_INTEGRAL_TYPES( etypes.scalar_type(), "SliceCSCIndptrHeteroTypePerEdge", ([&] { using etype_t = scalar_t; THRUST_CALL( for_each, iota, iota + num_rows, EdgeTypeSearch{ sub_indptr.data_ptr(), sliced_indptr.data_ptr(), etypes.data_ptr(), num_fanouts, num_rows, new_sub_indptr.data_ptr(), new_sliced_indptr.data_ptr()}); })); CUB_CALL( DeviceAdjacentDifference::SubtractLeftCopy, new_sub_indptr.data_ptr(), new_indegree.data_ptr(), num_rows + 1, cub::Difference{}); })); // Discard the first element of the SubtractLeftCopy result and ensure that // new_indegree tensor has size num_rows + 1 so that its ExclusiveCumSum is // directly equivalent to new_sub_indptr. // Equivalent to new_indegree = new_indegree[1:] in Python. new_indegree = new_indegree.slice(0, 1); return {new_sub_indptr, new_indegree, new_sliced_indptr}; } } // namespace ops } // namespace graphbolt ================================================ FILE: graphbolt/src/cuda/sort_impl.cu ================================================ /** * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file cuda/sort_impl.cu * @brief Sort implementation on CUDA. */ #include #include #include "./common.h" #include "./utils.h" namespace graphbolt { namespace ops { template std::conditional_t< return_original_positions, std::pair, torch::Tensor> Sort(const scalar_t* input_keys, int64_t num_items, int num_bits) { const auto options = torch::TensorOptions().device(c10::DeviceType::CUDA); constexpr c10::ScalarType dtype = c10::CppTypeToScalarType::value; auto sorted_array = torch::empty(num_items, options.dtype(dtype)); auto sorted_keys = sorted_array.data_ptr(); if (num_bits == 0) { num_bits = sizeof(scalar_t) * 8; } if constexpr (return_original_positions) { // We utilize int64_t for the values array. (torch::kLong == int64_t) auto original_idx = torch::arange(num_items, options.dtype(torch::kLong)); auto sorted_idx = torch::empty_like(original_idx); const int64_t* input_values = original_idx.data_ptr(); int64_t* sorted_values = sorted_idx.data_ptr(); CUB_CALL( DeviceRadixSort::SortPairs, input_keys, sorted_keys, input_values, sorted_values, num_items, 0, num_bits); return std::make_pair(sorted_array, sorted_idx); } else { CUB_CALL( DeviceRadixSort::SortKeys, input_keys, sorted_keys, num_items, 0, num_bits); return sorted_array; } } template std::conditional_t< return_original_positions, std::pair, torch::Tensor> Sort(torch::Tensor input, int num_bits) { return AT_DISPATCH_INTEGRAL_TYPES(input.scalar_type(), "SortImpl", ([&] { return Sort( input.data_ptr(), input.size(0), num_bits); })); } template torch::Tensor Sort(torch::Tensor input, int num_bits); template std::pair Sort( torch::Tensor input, int num_bits); } // namespace ops } // namespace graphbolt ================================================ FILE: graphbolt/src/cuda/unique_and_compact_impl.cu ================================================ /** * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file cuda/unique_and_compact_impl.cu * @brief Unique and compact operator implementation on CUDA. */ #include #include #include #include #include #include #include #include #include #include "./common.h" #include "./extension/unique_and_compact.h" #include "./utils.h" namespace graphbolt { namespace ops { template struct EqualityFunc { const scalar_t* sorted_order; const scalar_t* found_locations; const scalar_t* searched_items; __host__ __device__ auto operator()(int64_t i) { return sorted_order[found_locations[i]] == searched_items[i]; } }; #define DefineCubReductionFunction(cub_reduce_fn, name) \ template \ auto name(const scalar_iterator_t input, int64_t size) { \ using scalar_t = std::remove_reference_t; \ cuda::CopyScalar result; \ CUB_CALL(cub_reduce_fn, input, result.get(), size); \ return result; \ } DefineCubReductionFunction(DeviceReduce::Max, Max); DefineCubReductionFunction(DeviceReduce::Min, Min); std::vector> UniqueAndCompactBatchedSortBased( const std::vector& src_ids, const std::vector& dst_ids, const std::vector& unique_dst_ids, int num_bits = 0) { auto allocator = cuda::GetAllocator(); auto stream = cuda::GetCurrentStream(); auto scalar_type = src_ids.at(0).scalar_type(); return AT_DISPATCH_INDEX_TYPES( scalar_type, "unique_and_compact", ([&] { std::vector src_ids_ptr, dst_ids_ptr, unique_dst_ids_ptr; for (std::size_t i = 0; i < src_ids.size(); i++) { src_ids_ptr.emplace_back(src_ids[i].data_ptr()); dst_ids_ptr.emplace_back(dst_ids[i].data_ptr()); unique_dst_ids_ptr.emplace_back( unique_dst_ids[i].data_ptr()); } // If num_bits is not given, compute maximum vertex ids to compute // num_bits later to speedup the expensive sort operations. std::vector> max_id_src; std::vector> max_id_dst; for (std::size_t i = 0; num_bits == 0 && i < src_ids.size(); i++) { max_id_src.emplace_back(Max(src_ids_ptr[i], src_ids[i].size(0))); max_id_dst.emplace_back( Max(unique_dst_ids_ptr[i], unique_dst_ids[i].size(0))); } // Sort the unique_dst_ids tensor. std::vector sorted_unique_dst_ids; std::vector sorted_unique_dst_ids_ptr; for (std::size_t i = 0; i < unique_dst_ids.size(); i++) { sorted_unique_dst_ids.emplace_back(Sort( unique_dst_ids_ptr[i], unique_dst_ids[i].size(0), num_bits)); sorted_unique_dst_ids_ptr.emplace_back( sorted_unique_dst_ids[i].data_ptr()); } // Mark dst nodes in the src_ids tensor. std::vector(0))> is_dst; for (std::size_t i = 0; i < src_ids.size(); i++) { is_dst.emplace_back( allocator.AllocateStorage(src_ids[i].size(0))); THRUST_CALL( binary_search, sorted_unique_dst_ids_ptr[i], sorted_unique_dst_ids_ptr[i] + unique_dst_ids[i].size(0), src_ids_ptr[i], src_ids_ptr[i] + src_ids[i].size(0), is_dst[i].get()); } // Filter the non-dst nodes in the src_ids tensor, hence only_src. std::vector only_src; { std::vector> only_src_size; for (std::size_t i = 0; i < src_ids.size(); i++) { only_src.emplace_back(torch::empty( src_ids[i].size(0), sorted_unique_dst_ids[i].options())); auto is_src = thrust::make_transform_iterator( is_dst[i].get(), thrust::logical_not{}); only_src_size.emplace_back(cuda::CopyScalar{}); CUB_CALL( DeviceSelect::Flagged, src_ids_ptr[i], is_src, only_src[i].data_ptr(), only_src_size[i].get(), src_ids[i].size(0)); } stream.synchronize(); for (std::size_t i = 0; i < only_src.size(); i++) { only_src[i] = only_src[i].slice(0, 0, static_cast(only_src_size[i])); } } // The code block above synchronizes, ensuring safe access to // max_id_src and max_id_dst. if (num_bits == 0) { index_t max_id = 0; for (std::size_t i = 0; i < max_id_src.size(); i++) { max_id = std::max(max_id, static_cast(max_id_src[i])); max_id = std::max(max_id, static_cast(max_id_dst[i])); } num_bits = cuda::NumberOfBits(1ll + max_id); } // Sort the only_src tensor so that we can unique it later. std::vector sorted_only_src; for (auto& only_src_i : only_src) { sorted_only_src.emplace_back(Sort( only_src_i.data_ptr(), only_src_i.size(0), num_bits)); } std::vector unique_only_src; std::vector unique_only_src_ptr; std::vector> unique_only_src_size; for (std::size_t i = 0; i < src_ids.size(); i++) { // Compute the unique operation on the only_src tensor. unique_only_src.emplace_back( torch::empty(only_src[i].size(0), src_ids[i].options())); unique_only_src_ptr.emplace_back( unique_only_src[i].data_ptr()); unique_only_src_size.emplace_back(cuda::CopyScalar{}); CUB_CALL( DeviceSelect::Unique, sorted_only_src[i].data_ptr(), unique_only_src_ptr[i], unique_only_src_size[i].get(), only_src[i].size(0)); } stream.synchronize(); for (std::size_t i = 0; i < unique_only_src.size(); i++) { unique_only_src[i] = unique_only_src[i].slice( 0, 0, static_cast(unique_only_src_size[i])); } std::vector real_order; for (std::size_t i = 0; i < unique_dst_ids.size(); i++) { real_order.emplace_back( torch::cat({unique_dst_ids[i], unique_only_src[i]})); } // Sort here so that binary search can be used to lookup new_ids. std::vector sorted_order, new_ids; std::vector sorted_order_ptr; std::vector new_ids_ptr; for (std::size_t i = 0; i < real_order.size(); i++) { auto [sorted_order_i, new_ids_i] = Sort(real_order[i], num_bits); sorted_order_ptr.emplace_back(sorted_order_i.data_ptr()); new_ids_ptr.emplace_back(new_ids_i.data_ptr()); sorted_order.emplace_back(std::move(sorted_order_i)); new_ids.emplace_back(std::move(new_ids_i)); } // Holds the found locations of the src and dst ids in the // sorted_order. Later is used to lookup the new ids of the src_ids // and dst_ids tensors. std::vector(0))> new_dst_ids_loc; for (std::size_t i = 0; i < sorted_order.size(); i++) { new_dst_ids_loc.emplace_back( allocator.AllocateStorage(dst_ids[i].size(0))); THRUST_CALL( lower_bound, sorted_order_ptr[i], sorted_order_ptr[i] + sorted_order[i].size(0), dst_ids_ptr[i], dst_ids_ptr[i] + dst_ids[i].size(0), new_dst_ids_loc[i].get()); } std::vector> all_exist; at::cuda::CUDAEvent all_exist_event; bool should_record = false; // Check if unique_dst_ids includes all dst_ids. for (std::size_t i = 0; i < dst_ids.size(); i++) { if (dst_ids[i].size(0) > 0) { thrust::counting_iterator iota(0); auto equal_it = thrust::make_transform_iterator( iota, EqualityFunc{ sorted_order_ptr[i], new_dst_ids_loc[i].get(), dst_ids_ptr[i]}); all_exist.emplace_back(Min(equal_it, dst_ids[i].size(0))); should_record = true; } else { all_exist.emplace_back(cuda::CopyScalar{}); } } if (should_record) all_exist_event.record(); std::vector(0))> new_src_ids_loc; for (std::size_t i = 0; i < sorted_order.size(); i++) { new_src_ids_loc.emplace_back( allocator.AllocateStorage(src_ids[i].size(0))); THRUST_CALL( lower_bound, sorted_order_ptr[i], sorted_order_ptr[i] + sorted_order[i].size(0), src_ids_ptr[i], src_ids_ptr[i] + src_ids[i].size(0), new_src_ids_loc[i].get()); } // Finally, lookup the new compact ids of the src and dst tensors // via gather operations. std::vector new_src_ids; for (std::size_t i = 0; i < src_ids.size(); i++) { new_src_ids.emplace_back(torch::empty_like(src_ids[i])); THRUST_CALL( gather, new_src_ids_loc[i].get(), new_src_ids_loc[i].get() + src_ids[i].size(0), new_ids[i].data_ptr(), new_src_ids[i].data_ptr()); } // Perform check before we gather for the dst indices. for (std::size_t i = 0; i < dst_ids.size(); i++) { if (dst_ids[i].size(0) > 0) { if (should_record) { all_exist_event.synchronize(); should_record = false; } if (!static_cast(all_exist[i])) { throw std::out_of_range("Some ids not found."); } } } std::vector new_dst_ids; for (std::size_t i = 0; i < dst_ids.size(); i++) { new_dst_ids.emplace_back(torch::empty_like(dst_ids[i])); THRUST_CALL( gather, new_dst_ids_loc[i].get(), new_dst_ids_loc[i].get() + dst_ids[i].size(0), new_ids[i].data_ptr(), new_dst_ids[i].data_ptr()); } std::vector> results; for (std::size_t i = 0; i < src_ids.size(); i++) { results.emplace_back( std::move(real_order[i]), std::move(new_src_ids[i]), std::move(new_dst_ids[i])); } return results; })); } std::vector< std::tuple> UniqueAndCompactBatched( const std::vector& src_ids, const std::vector& dst_ids, const std::vector& unique_dst_ids, const int64_t rank, const int64_t world_size) { if (cuda::compute_capability() >= 70) { // Utilizes a hash table based implementation, the mapped id of a vertex // will be monotonically increasing as the first occurrence index of it in // torch.cat([unique_dst_ids, src_ids]). Thus, it is deterministic. return UniqueAndCompactBatchedHashMapBased( src_ids, dst_ids, unique_dst_ids, rank, world_size); } TORCH_CHECK( world_size <= 1, "Cooperative Minibatching (arXiv:2310.12403) is not supported on " "pre-Volta generation GPUs."); // Utilizes a sort based algorithm, the mapped id of a vertex part of the // src_ids but not part of the unique_dst_ids will be monotonically increasing // as the actual vertex id increases. Thus, it is deterministic. auto results3 = UniqueAndCompactBatchedSortBased(src_ids, dst_ids, unique_dst_ids); std::vector< std::tuple> results4; auto offsets = torch::zeros( 2 * results3.size(), c10::TensorOptions().dtype(torch::kInt64).pinned_memory(true)); for (const auto& [a, b, c] : results3) { auto d = offsets.slice(0, 0, 2); d.data_ptr()[1] = a.size(0); results4.emplace_back(a, b, c, d); offsets = offsets.slice(0, 2); } return results4; } std::tuple UniqueAndCompact( const torch::Tensor src_ids, const torch::Tensor dst_ids, const torch::Tensor unique_dst_ids, const int64_t rank, const int64_t world_size) { return UniqueAndCompactBatched( {src_ids}, {dst_ids}, {unique_dst_ids}, rank, world_size)[0]; } } // namespace ops } // namespace graphbolt ================================================ FILE: graphbolt/src/cuda/utils.h ================================================ /** * Copyright (c) 2023 by Contributors * * @file utils.h * @brief CUDA utilities. */ #ifndef GRAPHBOLT_CUDA_UTILS_H_ #define GRAPHBOLT_CUDA_UTILS_H_ // The cache line size of GPU. constexpr int GPU_CACHE_LINE_SIZE = 128; // The max number of threads per block. constexpr int CUDA_MAX_NUM_THREADS = 1024; namespace graphbolt { namespace cuda { /** * @brief Returns the compute capability of the cuda device, e.g. 70 for Volta. */ inline int compute_capability( int device = cuda::GetCurrentStream().device_index()) { int sm_version; CUDA_RUNTIME_CHECK(cub::SmVersion(sm_version, device)); return sm_version / 10; }; /** * @brief Calculate the number of threads needed given the size of the dimension * to be processed. * * It finds the largest power of two that is less than or equal to the minimum * of size and CUDA_MAX_NUM_THREADS. */ inline int FindNumThreads(int size) { int ret = 1; while ((ret << 1) <= std::min(size, CUDA_MAX_NUM_THREADS)) { ret <<= 1; } return ret; } /** * @brief Calculate the smallest number of bits needed to represent a given * range of integers [0, range). */ template int NumberOfBits(const T& range) { if (range <= 1) { // ranges of 0 or 1 require no bits to store return 0; } int bits = 1; const auto urange = static_cast>(range); while (bits < static_cast(sizeof(T) * 8) && (1ull << bits) < urange) { ++bits; } return bits; } } // namespace cuda } // namespace graphbolt #endif // GRAPHBOLT_CUDA_UTILS_H_ ================================================ FILE: graphbolt/src/expand_indptr.cc ================================================ /** * Copyright (c) 2023 by Contributors * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * @file expand_indptr.cc * @brief ExpandIndptr operators. */ #include #include #include "./macro.h" #include "./utils.h" namespace graphbolt { namespace ops { torch::Tensor ExpandIndptr( torch::Tensor indptr, torch::ScalarType dtype, torch::optional node_ids, torch::optional output_size) { if (utils::is_on_gpu(indptr) && (!node_ids.has_value() || utils::is_on_gpu(node_ids.value()))) { GRAPHBOLT_DISPATCH_CUDA_ONLY_DEVICE(c10::DeviceType::CUDA, "ExpandIndptr", { return ExpandIndptrImpl(indptr, dtype, node_ids, output_size); }); } if (!node_ids.has_value()) { return torch::repeat_interleave(indptr.diff(), output_size).to(dtype); } return node_ids.value().to(dtype).repeat_interleave( indptr.diff(), 0, output_size); } torch::Tensor IndptrEdgeIds( torch::Tensor indptr, torch::ScalarType dtype, torch::optional offset, torch::optional output_size) { if (utils::is_on_gpu(indptr) && (!offset.has_value() || utils::is_on_gpu(offset.value()))) { GRAPHBOLT_DISPATCH_CUDA_ONLY_DEVICE( c10::DeviceType::CUDA, "IndptrEdgeIds", { return IndptrEdgeIdsImpl(indptr, dtype, offset, output_size); }); } TORCH_CHECK(false, "CPU implementation of IndptrEdgeIds is not available."); } TORCH_LIBRARY_IMPL(graphbolt, CPU, m) { m.impl("expand_indptr", &ExpandIndptr); } #ifdef GRAPHBOLT_USE_CUDA TORCH_LIBRARY_IMPL(graphbolt, CUDA, m) { m.impl("expand_indptr", &ExpandIndptrImpl); } #endif TORCH_LIBRARY_IMPL(graphbolt, Autograd, m) { m.impl("expand_indptr", torch::autograd::autogradNotImplementedFallback()); } TORCH_LIBRARY_IMPL(graphbolt, CPU, m) { m.impl("indptr_edge_ids", &IndptrEdgeIds); } #ifdef GRAPHBOLT_USE_CUDA TORCH_LIBRARY_IMPL(graphbolt, CUDA, m) { m.impl("indptr_edge_ids", &IndptrEdgeIdsImpl); } #endif TORCH_LIBRARY_IMPL(graphbolt, Autograd, m) { m.impl("indptr_edge_ids", torch::autograd::autogradNotImplementedFallback()); } } // namespace ops } // namespace graphbolt ================================================ FILE: graphbolt/src/expand_indptr.h ================================================ /** * Copyright (c) 2023 by Contributors * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * @file expand_indptr.h * @brief ExpandIndptr operators. */ #ifndef GRAPHBOLT_EXPAND_INDPTR_H_ #define GRAPHBOLT_EXPAND_INDPTR_H_ #include namespace graphbolt { namespace ops { /** * @brief ExpandIndptr implements conversion from a given indptr offset * tensor to a COO format tensor. If node_ids is not given, it is assumed to be * equal to torch::arange(indptr.size(0) - 1, dtype=dtype). * * @param indptr The indptr offset tensor. * @param dtype The dtype of the returned output tensor. * @param node_ids 1D tensor represents the node ids. * @param output_size Optional, value of indptr[-1]. Passing it eliminates CPU * GPU synchronization. * * @return The resulting tensor. */ torch::Tensor ExpandIndptr( torch::Tensor indptr, torch::ScalarType dtype, torch::optional node_ids = torch::nullopt, torch::optional output_size = torch::nullopt); /** * @brief IndptrEdgeIdsImpl implements conversion from a given indptr offset * tensor to a COO edge ids tensor. For a given indptr [0, 2, 5, 7] and offset * tensor [0, 100, 200], the output will be [0, 1, 100, 101, 102, 201, 202]. If * offset was not provided, the output would be [0, 1, 0, 1, 2, 0, 1]. * * @param indptr The indptr offset tensor. * @param dtype The dtype of the returned output tensor. * @param offset The offset tensor. * @param output_size Optional value of indptr[-1]. Passing it eliminates CPU * GPU synchronization. * * @return The resulting tensor. */ torch::Tensor IndptrEdgeIds( torch::Tensor indptr, torch::ScalarType dtype, torch::optional offset, torch::optional output_size); } // namespace ops } // namespace graphbolt #endif // GRAPHBOLT_EXPAND_INDPTR_H_ ================================================ FILE: graphbolt/src/feature_cache.cc ================================================ /** * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file feature_cache.cc * @brief Feature cache implementation on the CPU. */ #include "./feature_cache.h" #include "./index_select.h" #include "./utils.h" namespace graphbolt { namespace storage { constexpr int kIntGrainSize = 64; FeatureCache::FeatureCache( const std::vector& shape, torch::ScalarType dtype, bool pin_memory) : tensor_(torch::empty( shape, c10::TensorOptions().dtype(dtype).pinned_memory(pin_memory))) { } torch::Tensor FeatureCache::Query( torch::Tensor positions, torch::Tensor indices, int64_t size) { const bool pin_memory = utils::is_pinned(positions) || utils::is_pinned(indices); std::vector output_shape{ tensor_.sizes().begin(), tensor_.sizes().end()}; output_shape[0] = size; auto values = torch::empty(output_shape, tensor_.options().pinned_memory(pin_memory)); const auto row_bytes = values.slice(0, 0, 1).numel() * values.element_size(); auto values_ptr = reinterpret_cast(values.data_ptr()); const auto tensor_ptr = reinterpret_cast(tensor_.data_ptr()); const auto positions_ptr = positions.data_ptr(); const auto indices_ptr = indices.data_ptr(); graphbolt::parallel_for_each( 0, positions.size(0), kIntGrainSize, [&](const int64_t i) { std::memcpy( values_ptr + indices_ptr[i] * row_bytes, tensor_ptr + positions_ptr[i] * row_bytes, row_bytes); }); return values; } c10::intrusive_ptr> FeatureCache::QueryAsync( torch::Tensor positions, torch::Tensor indices, int64_t size) { return async([=] { return Query(positions, indices, size); }); } torch::Tensor FeatureCache::IndexSelect(torch::Tensor positions) { return ops::IndexSelect(tensor_, positions); } void FeatureCache::Replace(torch::Tensor positions, torch::Tensor values) { TORCH_CHECK(positions.size(0) == values.size(0)); if (values.numel() == 0) return; const auto row_bytes = values.slice(0, 0, 1).numel() * values.element_size(); TORCH_CHECK( row_bytes == tensor_.slice(0, 0, 1).numel() * tensor_.element_size(), "The # bytes of a single row should match the cache's."); auto values_ptr = reinterpret_cast(values.data_ptr()); const auto tensor_ptr = reinterpret_cast(tensor_.data_ptr()); const auto positions_ptr = positions.data_ptr(); graphbolt::parallel_for_each( 0, positions.size(0), kIntGrainSize, [&](const int64_t i) { const auto position = positions_ptr[i]; if (position >= 0) { std::memcpy( tensor_ptr + position * row_bytes, values_ptr + i * row_bytes, row_bytes); } }); } c10::intrusive_ptr> FeatureCache::ReplaceAsync( torch::Tensor positions, torch::Tensor values) { return async([=] { return Replace(positions, values); }); } c10::intrusive_ptr FeatureCache::Create( const std::vector& shape, torch::ScalarType dtype, bool pin_memory) { return c10::make_intrusive(shape, dtype, pin_memory); } } // namespace storage } // namespace graphbolt ================================================ FILE: graphbolt/src/feature_cache.h ================================================ /** * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file feature_cache.h * @brief Feature cache implementation on the CPU. */ #ifndef GRAPHBOLT_FEATURE_CACHE_H_ #define GRAPHBOLT_FEATURE_CACHE_H_ #include #include #include #include namespace graphbolt { namespace storage { struct FeatureCache : public torch::CustomClassHolder { /** * @brief Constructor for the FeatureCache struct. * * @param shape The shape of the cache. * @param dtype The dtype of elements stored in the cache. * @param pin_memory Whether to pin the memory of the cache storage tensor. */ FeatureCache( const std::vector& shape, torch::ScalarType dtype, bool pin_memory); bool IsPinned() const { return tensor_.is_pinned(); } int64_t NumBytes() const { return tensor_.numel() * tensor_.element_size(); } /** * @brief The cache query function. Allocates an empty tensor `values` with * size as the first dimension and runs * values[indices[:positions.size(0)]] = cache_tensor[positions] before * returning it. * * @param positions The positions of the queried items. * @param indices The indices of the queried items among the original keys. * Only the first portion corresponding to the provided positions tensor is * used, e.g. indices[:positions.size(0)]. * @param size The size of the original keys, hence the first dimension of * the output shape. * * @return The values tensor is returned. Its memory is pinned if pin_memory * is true. */ torch::Tensor Query( torch::Tensor positions, torch::Tensor indices, int64_t size); c10::intrusive_ptr> QueryAsync( torch::Tensor positions, torch::Tensor indices, int64_t size); /** * @brief The cache tensor index_select returns cache_tensor[positions]. * * @param positions The positions of the queried items. * * @return The values tensor is returned on the same device as positions. */ torch::Tensor IndexSelect(torch::Tensor positions); /** * @brief The cache replace function. * * @param positions The positions to replace in the cache. * @param values The values to be inserted into the cache. */ void Replace(torch::Tensor positions, torch::Tensor values); c10::intrusive_ptr> ReplaceAsync( torch::Tensor positions, torch::Tensor values); static c10::intrusive_ptr Create( const std::vector& shape, torch::ScalarType dtype, bool pin_memory); private: torch::Tensor tensor_; }; } // namespace storage } // namespace graphbolt #endif // GRAPHBOLT_FEATURE_CACHE_H_ ================================================ FILE: graphbolt/src/fused_csc_sampling_graph.cc ================================================ /** * Copyright (c) 2023 by Contributors * @file fused_csc_sampling_graph.cc * @brief Source file of sampling graph. */ #include #include #include #include #include #include #include #include #include #include #include #include #include "./expand_indptr.h" #include "./index_select.h" #include "./macro.h" #include "./random.h" #include "./shared_memory_helper.h" #include "./utils.h" namespace { torch::optional> TensorizeDict( const torch::optional>& dict) { if (!dict.has_value()) { return torch::nullopt; } torch::Dict result; for (const auto& pair : dict.value()) { result.insert(pair.key(), torch::tensor(pair.value(), torch::kInt64)); } return result; } torch::optional> DetensorizeDict( const torch::optional>& dict) { if (!dict.has_value()) { return torch::nullopt; } torch::Dict result; for (const auto& pair : dict.value()) { result.insert(pair.key(), pair.value().item()); } return result; } } // namespace namespace graphbolt { namespace sampling { static const int kPickleVersion = 6199; FusedCSCSamplingGraph::FusedCSCSamplingGraph( const torch::Tensor& indptr, const torch::Tensor& indices, const torch::optional& node_type_offset, const torch::optional& type_per_edge, const torch::optional& node_type_to_id, const torch::optional& edge_type_to_id, const torch::optional& node_attributes, const torch::optional& edge_attributes) : indptr_(indptr), indices_(indices), node_type_offset_(node_type_offset), type_per_edge_(type_per_edge), node_type_to_id_(node_type_to_id), edge_type_to_id_(edge_type_to_id), node_attributes_(node_attributes), edge_attributes_(edge_attributes) { TORCH_CHECK(indptr.dim() == 1); TORCH_CHECK(indices.dim() == 1); TORCH_CHECK(indptr.device() == indices.device()); } c10::intrusive_ptr FusedCSCSamplingGraph::Create( const torch::Tensor& indptr, const torch::Tensor& indices, const torch::optional& node_type_offset, const torch::optional& type_per_edge, const torch::optional& node_type_to_id, const torch::optional& edge_type_to_id, const torch::optional& node_attributes, const torch::optional& edge_attributes) { if (node_type_offset.has_value()) { auto& offset = node_type_offset.value(); TORCH_CHECK(offset.dim() == 1); TORCH_CHECK(node_type_to_id.has_value()); TORCH_CHECK( offset.size(0) == static_cast(node_type_to_id.value().size() + 1)); } if (type_per_edge.has_value()) { TORCH_CHECK(type_per_edge.value().dim() == 1); TORCH_CHECK(type_per_edge.value().size(0) == indices.size(0)); TORCH_CHECK(edge_type_to_id.has_value()); } if (node_attributes.has_value()) { for (const auto& pair : node_attributes.value()) { TORCH_CHECK( pair.value().size(0) == indptr.size(0) - 1, "Expected node_attribute.size(0) and num_nodes to be equal, " "but node_attribute.size(0) was ", pair.value().size(0), ", and num_nodes was ", indptr.size(0) - 1, "."); } } if (edge_attributes.has_value()) { for (const auto& pair : edge_attributes.value()) { TORCH_CHECK( pair.value().size(0) == indices.size(0), "Expected edge_attribute.size(0) and num_edges to be equal, " "but edge_attribute.size(0) was ", pair.value().size(0), ", and num_edges was ", indices.size(0), "."); } } return c10::make_intrusive( indptr, indices, node_type_offset, type_per_edge, node_type_to_id, edge_type_to_id, node_attributes, edge_attributes); } void FusedCSCSamplingGraph::Load(torch::serialize::InputArchive& archive) { const int64_t magic_num = read_from_archive(archive, "FusedCSCSamplingGraph/magic_num"); TORCH_CHECK( magic_num == kCSCSamplingGraphSerializeMagic, "Magic numbers mismatch when loading FusedCSCSamplingGraph."); indptr_ = read_from_archive(archive, "FusedCSCSamplingGraph/indptr"); indices_ = read_from_archive( archive, "FusedCSCSamplingGraph/indices"); if (read_from_archive( archive, "FusedCSCSamplingGraph/has_node_type_offset")) { node_type_offset_ = read_from_archive( archive, "FusedCSCSamplingGraph/node_type_offset"); } if (read_from_archive( archive, "FusedCSCSamplingGraph/has_type_per_edge")) { type_per_edge_ = read_from_archive( archive, "FusedCSCSamplingGraph/type_per_edge"); } if (read_from_archive( archive, "FusedCSCSamplingGraph/has_node_type_to_id")) { node_type_to_id_ = read_from_archive( archive, "FusedCSCSamplingGraph/node_type_to_id"); } if (read_from_archive( archive, "FusedCSCSamplingGraph/has_edge_type_to_id")) { edge_type_to_id_ = read_from_archive( archive, "FusedCSCSamplingGraph/edge_type_to_id"); } if (read_from_archive( archive, "FusedCSCSamplingGraph/has_node_attributes")) { node_attributes_ = read_from_archive( archive, "FusedCSCSamplingGraph/node_attributes"); } if (read_from_archive( archive, "FusedCSCSamplingGraph/has_edge_attributes")) { edge_attributes_ = read_from_archive( archive, "FusedCSCSamplingGraph/edge_attributes"); } } void FusedCSCSamplingGraph::Save( torch::serialize::OutputArchive& archive) const { archive.write( "FusedCSCSamplingGraph/magic_num", kCSCSamplingGraphSerializeMagic); archive.write("FusedCSCSamplingGraph/indptr", indptr_); archive.write("FusedCSCSamplingGraph/indices", indices_); archive.write( "FusedCSCSamplingGraph/has_node_type_offset", node_type_offset_.has_value()); if (node_type_offset_) { archive.write( "FusedCSCSamplingGraph/node_type_offset", node_type_offset_.value()); } archive.write( "FusedCSCSamplingGraph/has_type_per_edge", type_per_edge_.has_value()); if (type_per_edge_) { archive.write( "FusedCSCSamplingGraph/type_per_edge", type_per_edge_.value()); } archive.write( "FusedCSCSamplingGraph/has_node_type_to_id", node_type_to_id_.has_value()); if (node_type_to_id_) { archive.write( "FusedCSCSamplingGraph/node_type_to_id", node_type_to_id_.value()); } archive.write( "FusedCSCSamplingGraph/has_edge_type_to_id", edge_type_to_id_.has_value()); if (edge_type_to_id_) { archive.write( "FusedCSCSamplingGraph/edge_type_to_id", edge_type_to_id_.value()); } archive.write( "FusedCSCSamplingGraph/has_node_attributes", node_attributes_.has_value()); if (node_attributes_) { archive.write( "FusedCSCSamplingGraph/node_attributes", node_attributes_.value()); } archive.write( "FusedCSCSamplingGraph/has_edge_attributes", edge_attributes_.has_value()); if (edge_attributes_) { archive.write( "FusedCSCSamplingGraph/edge_attributes", edge_attributes_.value()); } } void FusedCSCSamplingGraph::SetState( const torch::Dict>& state) { // State is a dict of dicts. The tensor-type attributes are stored in the dict // with key "independent_tensors". The dict-type attributes (edge_attributes) // are stored directly with the their name as the key. const auto& independent_tensors = state.at("independent_tensors"); TORCH_CHECK( independent_tensors.at("version_number") .equal(torch::tensor({kPickleVersion})), "Version number mismatches when loading pickled FusedCSCSamplingGraph.") indptr_ = independent_tensors.at("indptr"); indices_ = independent_tensors.at("indices"); if (independent_tensors.find("node_type_offset") != independent_tensors.end()) { node_type_offset_ = independent_tensors.at("node_type_offset"); } if (independent_tensors.find("type_per_edge") != independent_tensors.end()) { type_per_edge_ = independent_tensors.at("type_per_edge"); } if (state.find("node_type_to_id") != state.end()) { node_type_to_id_ = DetensorizeDict(state.at("node_type_to_id")); } if (state.find("edge_type_to_id") != state.end()) { edge_type_to_id_ = DetensorizeDict(state.at("edge_type_to_id")); } if (state.find("node_attributes") != state.end()) { node_attributes_ = state.at("node_attributes"); } if (state.find("edge_attributes") != state.end()) { edge_attributes_ = state.at("edge_attributes"); } } torch::Dict> FusedCSCSamplingGraph::GetState() const { // State is a dict of dicts. The tensor-type attributes are stored in the dict // with key "independent_tensors". The dict-type attributes (edge_attributes) // are stored directly with the their name as the key. torch::Dict> state; torch::Dict independent_tensors; // Serialization version number. It indicates the serialization method of the // whole state. independent_tensors.insert("version_number", torch::tensor({kPickleVersion})); independent_tensors.insert("indptr", indptr_); independent_tensors.insert("indices", indices_); if (node_type_offset_.has_value()) { independent_tensors.insert("node_type_offset", node_type_offset_.value()); } if (type_per_edge_.has_value()) { independent_tensors.insert("type_per_edge", type_per_edge_.value()); } state.insert("independent_tensors", independent_tensors); if (node_type_to_id_.has_value()) { state.insert("node_type_to_id", TensorizeDict(node_type_to_id_).value()); } if (edge_type_to_id_.has_value()) { state.insert("edge_type_to_id", TensorizeDict(edge_type_to_id_).value()); } if (node_attributes_.has_value()) { state.insert("node_attributes", node_attributes_.value()); } if (edge_attributes_.has_value()) { state.insert("edge_attributes", edge_attributes_.value()); } return state; } c10::intrusive_ptr FusedCSCSamplingGraph::InSubgraph( const torch::Tensor& nodes) const { if (utils::is_on_gpu(nodes) && utils::is_accessible_from_gpu(indptr_) && utils::is_accessible_from_gpu(indices_) && (!type_per_edge_.has_value() || utils::is_accessible_from_gpu(type_per_edge_.value()))) { GRAPHBOLT_DISPATCH_CUDA_ONLY_DEVICE(c10::DeviceType::CUDA, "InSubgraph", { return ops::InSubgraph(indptr_, indices_, nodes, type_per_edge_); }); } std::vector tensors{indices_}; if (type_per_edge_.has_value()) { tensors.push_back(*type_per_edge_); } auto [output_indptr, results] = ops::IndexSelectCSCBatched(indptr_, tensors, nodes, true, torch::nullopt); torch::optional type_per_edge; if (type_per_edge_.has_value()) { type_per_edge = results.at(1); } return c10::make_intrusive( // original_row_node_ids is not computed here and is unused. output_indptr, results.at(0), results.back(), nodes, torch::nullopt, type_per_edge); } /** * @brief Get a lambda function which counts the number of the neighbors to be * sampled. * * @param fanouts The number of edges to be sampled for each node with or * without considering edge types. * @param replace Boolean indicating whether the sample is performed with or * without replacement. If True, a value can be selected multiple times. * Otherwise, each value can be selected only once. * @param type_per_edge A tensor representing the type of each edge, if * present. * @param probs_or_mask Optional tensor containing the (unnormalized) * probabilities associated with each neighboring edge of a node in the original * graph. It must be a 1D floating-point tensor with the number of elements * equal to the number of edges in the graph. * * @return A lambda function (int64_t seed_offset, int64_t offset, int64_t * num_neighbors) -> torch::Tensor, which takes seed offset (the offset of the * seed to sample), offset (the starting edge ID of the given node) and * num_neighbors (number of neighbors) as params and returns the pick number of * the given node. */ auto GetNumPickFn( const std::vector& fanouts, bool replace, const torch::optional& type_per_edge, const torch::optional& probs_or_mask, bool with_seed_offsets) { // If fanouts.size() > 1, returns the total number of all edge types of the // given node. return [&fanouts, replace, &probs_or_mask, &type_per_edge, with_seed_offsets]( int64_t offset, int64_t num_neighbors, auto num_picked_ptr, int64_t seed_index, const std::vector& etype_id_to_num_picked_offset) { if (fanouts.size() > 1) { NumPickByEtype( with_seed_offsets, fanouts, replace, type_per_edge.value(), probs_or_mask, offset, num_neighbors, num_picked_ptr, seed_index, etype_id_to_num_picked_offset); } else { NumPick( fanouts[0], replace, probs_or_mask, offset, num_neighbors, num_picked_ptr + seed_index); } }; } auto GetTemporalNumPickFn( torch::Tensor seed_timestamp, torch::Tensor csc_indices, const std::vector& fanouts, bool replace, const torch::optional& type_per_edge, const torch::optional& seed_pre_time_window, const torch::optional& probs_or_mask, const torch::optional& node_timestamp, const torch::optional& edge_timestamp) { // If fanouts.size() > 1, returns the total number of all edge types of the // given node. return [&seed_timestamp, &csc_indices, &fanouts, replace, &seed_pre_time_window, &probs_or_mask, &type_per_edge, &node_timestamp, &edge_timestamp]( int64_t seed_offset, int64_t offset, int64_t num_neighbors) { if (fanouts.size() > 1) { return TemporalNumPickByEtype( seed_timestamp, csc_indices, fanouts, replace, type_per_edge.value(), seed_pre_time_window, probs_or_mask, node_timestamp, edge_timestamp, seed_offset, offset, num_neighbors); } else { return TemporalNumPick( seed_timestamp, csc_indices, fanouts[0], replace, seed_pre_time_window, probs_or_mask, node_timestamp, edge_timestamp, seed_offset, offset, num_neighbors); } }; } /** * @brief Get a lambda function which contains the sampling process. * * @param fanouts The number of edges to be sampled for each node with or * without considering edge types. * @param replace Boolean indicating whether the sample is performed with or * without replacement. If True, a value can be selected multiple times. * Otherwise, each value can be selected only once. * @param options Tensor options specifying the desired data type of the result. * @param type_per_edge A tensor representing the type of each edge, if * present. * @param probs_or_mask Optional tensor containing the (unnormalized) * probabilities associated with each neighboring edge of a node in the original * graph. It must be a 1D floating-point tensor with the number of elements * equal to the number of edges in the graph. * @param args Contains sampling algorithm specific arguments. * * @return A lambda function: (int64_t seed_offset, int64_t offset, int64_t * num_neighbors, PickedType* picked_data_ptr) -> torch::Tensor, which takes * seed_offset (the offset of the seed to sample), offset (the starting edge ID * of the given node) and num_neighbors (number of neighbors) as params and puts * the picked neighbors at the address specified by picked_data_ptr. */ template auto GetPickFn( const std::vector& fanouts, bool replace, const torch::TensorOptions& options, const torch::optional& type_per_edge, const torch::optional& probs_or_mask, bool with_seed_offsets, SamplerArgs args) { return [&fanouts, replace, &options, &type_per_edge, &probs_or_mask, args, with_seed_offsets]( int64_t offset, int64_t num_neighbors, auto picked_data_ptr, int64_t seed_offset, auto subgraph_indptr_ptr, const std::vector& etype_id_to_num_picked_offset) { // If fanouts.size() > 1, perform sampling for each edge type of each // node; otherwise just sample once for each node with no regard of edge // types. if (fanouts.size() > 1) { return PickByEtype( with_seed_offsets, offset, num_neighbors, fanouts, replace, options, type_per_edge.value(), probs_or_mask, args, picked_data_ptr, seed_offset, subgraph_indptr_ptr, etype_id_to_num_picked_offset); } else { picked_data_ptr += subgraph_indptr_ptr[seed_offset]; int64_t num_sampled = Pick( offset, num_neighbors, fanouts[0], replace, options, probs_or_mask, args, picked_data_ptr); if (type_per_edge) { std::sort(picked_data_ptr, picked_data_ptr + num_sampled); } return num_sampled; } }; } template auto GetTemporalPickFn( torch::Tensor seed_timestamp, torch::Tensor csc_indices, const std::vector& fanouts, bool replace, const torch::TensorOptions& options, const torch::optional& type_per_edge, const torch::optional& seed_pre_time_window, const torch::optional& probs_or_mask, const torch::optional& node_timestamp, const torch::optional& edge_timestamp, SamplerArgs args) { return [&seed_timestamp, &csc_indices, &fanouts, replace, &options, &type_per_edge, &seed_pre_time_window, &probs_or_mask, &node_timestamp, &edge_timestamp, args]( int64_t seed_offset, int64_t offset, int64_t num_neighbors, auto picked_data_ptr) { // If fanouts.size() > 1, perform sampling for each edge type of each // node; otherwise just sample once for each node with no regard of edge // types. if (fanouts.size() > 1) { return TemporalPickByEtype( seed_timestamp, csc_indices, seed_offset, offset, num_neighbors, fanouts, replace, options, type_per_edge.value(), seed_pre_time_window, probs_or_mask, node_timestamp, edge_timestamp, args, picked_data_ptr); } else { int64_t num_sampled = TemporalPick( seed_timestamp, csc_indices, seed_offset, offset, num_neighbors, fanouts[0], replace, options, seed_pre_time_window, probs_or_mask, node_timestamp, edge_timestamp, args, picked_data_ptr); if (type_per_edge.has_value()) { std::sort(picked_data_ptr, picked_data_ptr + num_sampled); } return num_sampled; } }; } template c10::intrusive_ptr FusedCSCSamplingGraph::SampleNeighborsImpl( const torch::Tensor& seeds, const torch::optional>& seed_offsets, const std::vector& fanouts, NumPickFn num_pick_fn, PickFn pick_fn) const { const int64_t num_seeds = seeds.size(0); const auto indptr_options = indptr_.options(); // Calculate GrainSize for parallel_for. // Set the default grain size to 64. const int64_t grain_size = 64; torch::Tensor picked_eids; torch::Tensor subgraph_indptr; torch::Tensor subgraph_indices; torch::optional subgraph_type_per_edge = torch::nullopt; torch::optional edge_offsets = torch::nullopt; bool with_seed_offsets = seed_offsets.has_value(); bool hetero_with_seed_offsets = with_seed_offsets && fanouts.size() > 1 && Temporal == TemporalOption::NOT_TEMPORAL; // Get the number of edge types. If it's homo or if the size of fanouts is 1 // (hetero graph but sampled as a homo graph), set num_etypes as 1. // In temporal sampling, this will not be used for now since the logic hasn't // been adopted for temporal sampling. const int64_t num_etypes = (edge_type_to_id_.has_value() && hetero_with_seed_offsets) ? edge_type_to_id_->size() : 1; std::vector etype_id_to_src_ntype_id(num_etypes); std::vector etype_id_to_dst_ntype_id(num_etypes); torch::optional subgraph_indptr_substract = torch::nullopt; // The pick numbers are stored in a single tensor by the order of etype. Each // etype corresponds to a group of seeds whose ntype are the same as the // dst_type. `etype_id_to_num_picked_offset` indicates the beginning offset // where each etype's corresponding seeds' pick numbers are stored in the pick // number tensor. std::vector etype_id_to_num_picked_offset(num_etypes + 1); if (hetero_with_seed_offsets) { for (auto& etype_and_id : edge_type_to_id_.value()) { auto etype = etype_and_id.key(); auto id = etype_and_id.value(); auto [src_type, dst_type] = utils::parse_src_dst_ntype_from_etype(etype); auto dst_ntype_id = node_type_to_id_->at(dst_type); etype_id_to_src_ntype_id[id] = node_type_to_id_->at(src_type); etype_id_to_dst_ntype_id[id] = dst_ntype_id; etype_id_to_num_picked_offset[id + 1] = seed_offsets->at(dst_ntype_id + 1) - seed_offsets->at(dst_ntype_id) + 1; } std::partial_sum( etype_id_to_num_picked_offset.begin(), etype_id_to_num_picked_offset.end(), etype_id_to_num_picked_offset.begin()); } else { etype_id_to_dst_ntype_id[0] = 0; etype_id_to_num_picked_offset[1] = num_seeds + 1; } // `num_rows` indicates the length of `num_picked_neighbors_per_node`, which // is used for storing pick numbers. In non-temporal hetero sampling, it // equals to sum_{etype} #seeds with ntype=dst_type(etype). In homo sampling, // it equals to `num_seeds`. const int64_t num_rows = etype_id_to_num_picked_offset[num_etypes]; torch::Tensor num_picked_neighbors_per_node = // Need to use zeros because all nodes don't have all etypes. torch::zeros({num_rows}, indptr_options); AT_DISPATCH_INDEX_TYPES( indptr_.scalar_type(), "SampleNeighborsImplWrappedWithIndptr", ([&] { using indptr_t = index_t; AT_DISPATCH_INDEX_TYPES( seeds.scalar_type(), "SampleNeighborsImplWrappedWithSeeds", ([&] { using seeds_t = index_t; const auto indptr_data = indptr_.data_ptr(); const auto num_picked_neighbors_data_ptr = num_picked_neighbors_per_node.data_ptr(); num_picked_neighbors_data_ptr[0] = 0; const auto seeds_data_ptr = seeds.data_ptr(); // Step 1. Calculate pick number of each node. torch::parallel_for( 0, num_seeds, grain_size, [&](int64_t begin, int64_t end) { for (int64_t i = begin; i < end; ++i) { const auto nid = seeds_data_ptr[i]; TORCH_CHECK( nid >= 0 && nid < NumNodes(), "The seed nodes' IDs should fall within the range of " "the graph's node IDs."); const auto offset = indptr_data[nid]; const auto num_neighbors = indptr_data[nid + 1] - offset; if constexpr (Temporal == TemporalOption::TEMPORAL) { num_picked_neighbors_data_ptr[i + 1] = num_neighbors == 0 ? 0 : num_pick_fn(i, offset, num_neighbors); } else { const auto seed_type_id = (hetero_with_seed_offsets) ? std::upper_bound( seed_offsets->begin(), seed_offsets->end(), i) - seed_offsets->begin() - 1 : 0; // `seed_index` indicates the index of the current // seed within the group of seeds which have the same // node type. const auto seed_index = (hetero_with_seed_offsets) ? i - seed_offsets->at(seed_type_id) : i; num_pick_fn( offset, num_neighbors, num_picked_neighbors_data_ptr + 1, seed_index, etype_id_to_num_picked_offset); } } }); // Step 2. Calculate prefix sum to get total length and offsets of // each node. It's also the indptr of the generated subgraph. subgraph_indptr = num_picked_neighbors_per_node.cumsum( 0, indptr_.scalar_type()); auto subgraph_indptr_data_ptr = subgraph_indptr.data_ptr(); if (hetero_with_seed_offsets) { torch::Tensor num_picked_offset_tensor = torch::empty({num_etypes + 1}, indptr_options); const auto num_picked_offset_data_ptr = num_picked_offset_tensor.data_ptr(); std::copy( etype_id_to_num_picked_offset.begin(), etype_id_to_num_picked_offset.end(), num_picked_offset_data_ptr); torch::Tensor substract_offset = torch::empty({num_etypes}, indptr_options); const auto substract_offset_data_ptr = substract_offset.data_ptr(); for (auto i = 0; i < num_etypes; ++i) { // Collect the total pick number subtract offsets. substract_offset_data_ptr[i] = subgraph_indptr_data_ptr [etype_id_to_num_picked_offset[i]]; } subgraph_indptr_substract = ops::ExpandIndptr( num_picked_offset_tensor, indptr_.scalar_type(), substract_offset); } // When doing non-temporal hetero sampling, we generate an // edge_offsets tensor. if (hetero_with_seed_offsets) { edge_offsets = torch::empty({num_etypes + 1}, indptr_options); auto edge_offsets_data_ptr = edge_offsets.value().data_ptr(); edge_offsets_data_ptr[0] = 0; for (auto i = 0; i < num_etypes; ++i) { edge_offsets_data_ptr[i + 1] = subgraph_indptr_data_ptr [etype_id_to_num_picked_offset[i + 1] - 1]; } } // Step 3. Allocate the tensor for picked neighbors. const auto total_length = subgraph_indptr.data_ptr()[num_rows - 1]; picked_eids = torch::empty({total_length}, indptr_options); subgraph_indices = torch::empty({total_length}, indices_.options()); if (!hetero_with_seed_offsets && type_per_edge_.has_value()) { subgraph_type_per_edge = torch::empty( {total_length}, type_per_edge_.value().options()); } auto picked_eids_data_ptr = picked_eids.data_ptr(); torch::parallel_for( 0, num_seeds, grain_size, [&](int64_t begin, int64_t end) { for (int64_t i = begin; i < end; ++i) { const auto nid = seeds_data_ptr[i]; const auto offset = indptr_data[nid]; const auto num_neighbors = indptr_data[nid + 1] - offset; auto picked_number = 0; const auto seed_type_id = (hetero_with_seed_offsets) ? std::upper_bound( seed_offsets->begin(), seed_offsets->end(), i) - seed_offsets->begin() - 1 : 0; const auto seed_index = (hetero_with_seed_offsets) ? i - seed_offsets->at(seed_type_id) : i; // Step 4. Pick neighbors for each node. if constexpr (Temporal == TemporalOption::TEMPORAL) { picked_number = num_picked_neighbors_data_ptr[i + 1]; auto picked_offset = subgraph_indptr_data_ptr[i]; if (picked_number > 0) { auto actual_picked_count = pick_fn( i, offset, num_neighbors, picked_eids_data_ptr + picked_offset); TORCH_CHECK( actual_picked_count == picked_number, "Actual picked count doesn't match the calculated" " pick number."); } } else { picked_number = pick_fn( offset, num_neighbors, picked_eids_data_ptr, seed_index, subgraph_indptr_data_ptr, etype_id_to_num_picked_offset); if (!hetero_with_seed_offsets) { TORCH_CHECK( num_picked_neighbors_data_ptr[i + 1] == picked_number, "Actual picked count doesn't match the calculated" " pick number."); } } // Step 5. Calculate other attributes and return the // subgraph. if (picked_number > 0) { // indices dtype and seeds dtype is required to be same. using index_t = seeds_t; auto subgraph_indices_data_ptr = subgraph_indices.data_ptr(); auto indices_data_ptr = indices_.data_ptr(); for (auto i = 0; i < num_etypes; ++i) { if (etype_id_to_dst_ntype_id[i] != seed_type_id) continue; const auto indptr_offset = with_seed_offsets ? etype_id_to_num_picked_offset[i] + seed_index : seed_index; const auto picked_begin = subgraph_indptr_data_ptr[indptr_offset]; const auto picked_end = subgraph_indptr_data_ptr[indptr_offset + 1]; for (auto j = picked_begin; j < picked_end; ++j) { subgraph_indices_data_ptr[j] = indices_data_ptr[picked_eids_data_ptr[j]]; if (hetero_with_seed_offsets && node_type_offset_.has_value()) { // Substract the node type offset from // subgraph indices. Assuming // node_type_offset has the same dtype as // indices. auto node_type_offset_data = node_type_offset_.value().data_ptr(); subgraph_indices_data_ptr[j] -= node_type_offset_data [etype_id_to_src_ntype_id[i]]; } } } if (!hetero_with_seed_offsets && type_per_edge_.has_value()) { // When hetero graph is sampled as a homo graph, we // still generate type_per_edge tensor for this // situation. AT_DISPATCH_INTEGRAL_TYPES( subgraph_type_per_edge.value().scalar_type(), "IndexSelectTypePerEdge", ([&] { auto subgraph_type_per_edge_data_ptr = subgraph_type_per_edge.value() .data_ptr(); auto type_per_edge_data_ptr = type_per_edge_.value().data_ptr(); const auto picked_offset = subgraph_indptr_data_ptr[seed_index]; for (auto j = picked_offset; j < picked_offset + picked_number; ++j) subgraph_type_per_edge_data_ptr[j] = type_per_edge_data_ptr [picked_eids_data_ptr[j]]; })); } } } }); })); })); if (subgraph_indptr_substract.has_value()) { subgraph_indptr -= subgraph_indptr_substract.value(); } return c10::make_intrusive( subgraph_indptr, subgraph_indices, picked_eids, seeds, torch::nullopt, subgraph_type_per_edge, edge_offsets); } c10::intrusive_ptr FusedCSCSamplingGraph::SampleNeighbors( torch::optional seeds, torch::optional> seed_offsets, const std::vector& fanouts, bool replace, bool layer, bool returning_indices_is_optional, torch::optional probs_or_mask, torch::optional random_seed, double seed2_contribution) const { // If seeds does not have a value, then we expect all arguments to be resident // on the GPU. If seeds has a value, then we expect them to be accessible from // GPU. This is required for the dispatch to work when CUDA is not available. if (((!seeds.has_value() && utils::is_on_gpu(indptr_) && utils::is_on_gpu(indices_) && (!probs_or_mask.has_value() || utils::is_on_gpu(probs_or_mask.value())) && (!type_per_edge_.has_value() || utils::is_on_gpu(type_per_edge_.value()))) || (seeds.has_value() && utils::is_on_gpu(seeds.value()) && utils::is_accessible_from_gpu(indptr_) && utils::is_accessible_from_gpu(indices_) && (!probs_or_mask.has_value() || utils::is_accessible_from_gpu(probs_or_mask.value())) && (!type_per_edge_.has_value() || utils::is_accessible_from_gpu(type_per_edge_.value())))) && !replace) { GRAPHBOLT_DISPATCH_CUDA_ONLY_DEVICE( c10::DeviceType::CUDA, "SampleNeighbors", { return ops::SampleNeighbors( indptr_, indices_, seeds, seed_offsets, fanouts, replace, layer, returning_indices_is_optional, type_per_edge_, probs_or_mask, node_type_offset_, node_type_to_id_, edge_type_to_id_, random_seed, seed2_contribution); }); } TORCH_CHECK(seeds.has_value(), "Nodes can not be None on the CPU."); if (probs_or_mask.has_value()) { // Note probs will be passed as input for 'torch.multinomial' in deeper // stack, which doesn't support 'torch.half' and 'torch.bool' data types. To // avoid crashes, convert 'probs_or_mask' to 'float32' data type. if (probs_or_mask.value().dtype() == torch::kBool || probs_or_mask.value().dtype() == torch::kFloat16) { probs_or_mask = probs_or_mask.value().to(torch::kFloat32); } } bool with_seed_offsets = seed_offsets.has_value(); if (layer) { if (random_seed.has_value() && random_seed->numel() >= 2) { SamplerArgs args{ indices_, {random_seed.value(), static_cast(seed2_contribution)}, NumNodes()}; return SampleNeighborsImpl( seeds.value(), seed_offsets, fanouts, GetNumPickFn( fanouts, replace, type_per_edge_, probs_or_mask, with_seed_offsets), GetPickFn( fanouts, replace, indptr_.options(), type_per_edge_, probs_or_mask, with_seed_offsets, args)); } else { auto args = [&] { if (random_seed.has_value() && random_seed->numel() == 1) { return SamplerArgs{ indices_, random_seed.value(), NumNodes()}; } else { return SamplerArgs{ indices_, RandomEngine::ThreadLocal()->RandInt( static_cast(0), std::numeric_limits::max()), NumNodes()}; } }(); return SampleNeighborsImpl( seeds.value(), seed_offsets, fanouts, GetNumPickFn( fanouts, replace, type_per_edge_, probs_or_mask, with_seed_offsets), GetPickFn( fanouts, replace, indptr_.options(), type_per_edge_, probs_or_mask, with_seed_offsets, args)); } } else { SamplerArgs args; return SampleNeighborsImpl( seeds.value(), seed_offsets, fanouts, GetNumPickFn( fanouts, replace, type_per_edge_, probs_or_mask, with_seed_offsets), GetPickFn( fanouts, replace, indptr_.options(), type_per_edge_, probs_or_mask, with_seed_offsets, args)); } } c10::intrusive_ptr>> FusedCSCSamplingGraph::SampleNeighborsAsync( torch::optional seeds, torch::optional> seed_offsets, const std::vector& fanouts, bool replace, bool layer, bool returning_indices_is_optional, torch::optional probs_or_mask, torch::optional random_seed, double seed2_contribution) const { return async( [=] { return this->SampleNeighbors( seeds, seed_offsets, fanouts, replace, layer, returning_indices_is_optional, probs_or_mask, random_seed, seed2_contribution); }, (seeds.has_value() && utils::is_on_gpu(*seeds)) || utils::is_on_gpu(indptr_)); } c10::intrusive_ptr FusedCSCSamplingGraph::TemporalSampleNeighbors( const torch::optional& seeds, const torch::optional>& seed_offsets, const torch::Tensor& seeds_timestamp, const std::vector& fanouts, bool replace, bool layer, bool returning_indices_is_optional, torch::optional seeds_pre_time_window, torch::optional probs_or_mask, torch::optional node_timestamp_attr_name, torch::optional edge_timestamp_attr_name, torch::optional random_seed, double seed2_contribution) const { // 1. Get the timestamp attribute for nodes of the graph const auto node_timestamp = this->NodeAttribute(node_timestamp_attr_name); // 2. Get the timestamp attribute for edges of the graph const auto edge_timestamp = this->EdgeAttribute(edge_timestamp_attr_name); // If seeds does not have a value, then we expect all arguments to be resident // on the GPU. If seeds has a value, then we expect them to be accessible from // GPU. This is required for the dispatch to work when CUDA is not available. if (((!seeds.has_value() && utils::is_on_gpu(indptr_) && utils::is_on_gpu(indices_) && (!probs_or_mask.has_value() || utils::is_on_gpu(probs_or_mask.value())) && (!type_per_edge_.has_value() || utils::is_on_gpu(type_per_edge_.value()))) || (seeds.has_value() && utils::is_on_gpu(seeds.value()) && utils::is_accessible_from_gpu(indptr_) && utils::is_accessible_from_gpu(indices_) && (!probs_or_mask.has_value() || utils::is_accessible_from_gpu(probs_or_mask.value())) && (!type_per_edge_.has_value() || utils::is_accessible_from_gpu(type_per_edge_.value())))) && utils::is_accessible_from_gpu(seeds_timestamp) && (!seeds_pre_time_window.has_value() || utils::is_accessible_from_gpu(*seeds_pre_time_window)) && (!node_timestamp.has_value() || utils::is_accessible_from_gpu(*node_timestamp)) && (!edge_timestamp.has_value() || utils::is_accessible_from_gpu(*edge_timestamp)) && !replace) { GRAPHBOLT_DISPATCH_CUDA_ONLY_DEVICE( c10::DeviceType::CUDA, "SampleNeighbors", { return ops::SampleNeighbors( indptr_, indices_, seeds, seed_offsets, fanouts, replace, layer, returning_indices_is_optional, type_per_edge_, probs_or_mask, node_type_offset_, node_type_to_id_, edge_type_to_id_, random_seed, seed2_contribution, seeds_timestamp, seeds_pre_time_window, node_timestamp, edge_timestamp); }); } TORCH_CHECK(seeds.has_value(), "Nodes can not be None for CPU."); // 3. Get probs_or_mask. if (probs_or_mask.has_value()) { // Note probs will be passed as input for 'torch.multinomial' in deeper // stack, which doesn't support 'torch.half' and 'torch.bool' data types. To // avoid crashes, convert 'probs_or_mask' to 'float32' data type. if (probs_or_mask.value().dtype() == torch::kBool || probs_or_mask.value().dtype() == torch::kFloat16) { probs_or_mask = probs_or_mask.value().to(torch::kFloat32); } } // 4. Call SampleNeighborsImpl if (layer) { if (random_seed.has_value() && random_seed->numel() >= 2) { SamplerArgs args{ indices_, {random_seed.value(), static_cast(seed2_contribution)}, NumNodes()}; return SampleNeighborsImpl( *seeds, seed_offsets, fanouts, GetTemporalNumPickFn( seeds_timestamp, indices_, fanouts, replace, type_per_edge_, seeds_pre_time_window, probs_or_mask, node_timestamp, edge_timestamp), GetTemporalPickFn( seeds_timestamp, indices_, fanouts, replace, indptr_.options(), type_per_edge_, seeds_pre_time_window, probs_or_mask, node_timestamp, edge_timestamp, args)); } else { auto args = [&] { if (random_seed.has_value() && random_seed->numel() == 1) { return SamplerArgs{ indices_, random_seed.value(), NumNodes()}; } else { return SamplerArgs{ indices_, RandomEngine::ThreadLocal()->RandInt( static_cast(0), std::numeric_limits::max()), NumNodes()}; } }(); return SampleNeighborsImpl( *seeds, seed_offsets, fanouts, GetTemporalNumPickFn( seeds_timestamp, indices_, fanouts, replace, type_per_edge_, seeds_pre_time_window, probs_or_mask, node_timestamp, edge_timestamp), GetTemporalPickFn( seeds_timestamp, indices_, fanouts, replace, indptr_.options(), type_per_edge_, seeds_pre_time_window, probs_or_mask, node_timestamp, edge_timestamp, args)); } } else { SamplerArgs args; return SampleNeighborsImpl( *seeds, seed_offsets, fanouts, GetTemporalNumPickFn( seeds_timestamp, this->indices_, fanouts, replace, type_per_edge_, seeds_pre_time_window, probs_or_mask, node_timestamp, edge_timestamp), GetTemporalPickFn( seeds_timestamp, this->indices_, fanouts, replace, indptr_.options(), type_per_edge_, seeds_pre_time_window, probs_or_mask, node_timestamp, edge_timestamp, args)); } } static c10::intrusive_ptr BuildGraphFromSharedMemoryHelper(SharedMemoryHelper&& helper) { helper.InitializeRead(); auto indptr = helper.ReadTorchTensor(); auto indices = helper.ReadTorchTensor(); auto node_type_offset = helper.ReadTorchTensor(); auto type_per_edge = helper.ReadTorchTensor(); auto node_type_to_id = DetensorizeDict(helper.ReadTorchTensorDict()); auto edge_type_to_id = DetensorizeDict(helper.ReadTorchTensorDict()); auto node_attributes = helper.ReadTorchTensorDict(); auto edge_attributes = helper.ReadTorchTensorDict(); auto graph = c10::make_intrusive( indptr.value(), indices.value(), node_type_offset, type_per_edge, node_type_to_id, edge_type_to_id, node_attributes, edge_attributes); auto shared_memory = helper.ReleaseSharedMemory(); graph->HoldSharedMemoryObject( std::move(shared_memory.first), std::move(shared_memory.second)); return graph; } c10::intrusive_ptr FusedCSCSamplingGraph::CopyToSharedMemory( const std::string& shared_memory_name) { SharedMemoryHelper helper(shared_memory_name); helper.WriteTorchTensor(indptr_); helper.WriteTorchTensor(indices_); helper.WriteTorchTensor(node_type_offset_); helper.WriteTorchTensor(type_per_edge_); helper.WriteTorchTensorDict(TensorizeDict(node_type_to_id_)); helper.WriteTorchTensorDict(TensorizeDict(edge_type_to_id_)); helper.WriteTorchTensorDict(node_attributes_); helper.WriteTorchTensorDict(edge_attributes_); helper.Flush(); return BuildGraphFromSharedMemoryHelper(std::move(helper)); } c10::intrusive_ptr FusedCSCSamplingGraph::LoadFromSharedMemory( const std::string& shared_memory_name) { SharedMemoryHelper helper(shared_memory_name); return BuildGraphFromSharedMemoryHelper(std::move(helper)); } void FusedCSCSamplingGraph::HoldSharedMemoryObject( SharedMemoryPtr tensor_metadata_shm, SharedMemoryPtr tensor_data_shm) { tensor_metadata_shm_ = std::move(tensor_metadata_shm); tensor_data_shm_ = std::move(tensor_data_shm); } template void NumPick( int64_t fanout, bool replace, const torch::optional& probs_or_mask, int64_t offset, int64_t num_neighbors, PickedNumType* picked_num_ptr) { int64_t num_valid_neighbors = num_neighbors; if (probs_or_mask.has_value() && num_neighbors > 0) { // Subtract the count of zeros in probs_or_mask. AT_DISPATCH_ALL_TYPES( probs_or_mask.value().scalar_type(), "CountZero", ([&] { scalar_t* probs_data_ptr = probs_or_mask.value().data_ptr(); num_valid_neighbors -= std::count( probs_data_ptr + offset, probs_data_ptr + offset + num_neighbors, 0); })); } if (num_valid_neighbors == 0 || fanout == -1) { *picked_num_ptr = num_valid_neighbors; } else { *picked_num_ptr = replace ? fanout : std::min(fanout, num_valid_neighbors); } } torch::Tensor TemporalMask( int64_t seed_timestamp, torch::Tensor csc_indices, const torch::optional& seed_pre_time_window, const torch::optional& probs_or_mask, const torch::optional& node_timestamp, const torch::optional& edge_timestamp, std::pair edge_range) { auto [l, r] = edge_range; torch::Tensor mask = torch::ones({r - l}, torch::kBool); if (node_timestamp.has_value()) { auto neighbor_timestamp = node_timestamp.value().index_select(0, csc_indices.slice(0, l, r)); mask &= neighbor_timestamp < seed_timestamp; if (seed_pre_time_window.has_value()) mask &= neighbor_timestamp > seed_timestamp - seed_pre_time_window.value(); } if (edge_timestamp.has_value()) { auto edge_ts = edge_timestamp.value().slice(0, l, r); mask &= edge_ts < seed_timestamp; if (seed_pre_time_window.has_value()) mask &= edge_ts > seed_timestamp - seed_pre_time_window.value(); } if (probs_or_mask.has_value()) { mask &= probs_or_mask.value().slice(0, l, r) != 0; } return mask; } /** * @brief Fast path for temporal sampling without probability. It is used when * the number of neighbors is large. It randomly samples neighbors and checks * the timestamp of the neighbors. It is successful if the number of sampled * neighbors in kTriedThreshold trials is equal to the fanout. */ std::pair> FastTemporalPick( torch::Tensor seed_timestamp, torch::Tensor csc_indices, int64_t fanout, bool replace, const torch::optional& seed_pre_time_window, const torch::optional& node_timestamp, const torch::optional& edge_timestamp, int64_t seed_offset, int64_t offset, int64_t num_neighbors) { constexpr int64_t kTriedThreshold = 1000; auto timestamp = utils::GetValueByIndex(seed_timestamp, seed_offset); torch::optional time_window = torch::nullopt; if (seed_pre_time_window.has_value()) { time_window = utils::GetValueByIndex( seed_pre_time_window.value(), seed_offset); } std::vector sampled_edges; sampled_edges.reserve(fanout); std::set sampled_edge_set; int64_t sample_count = 0; int64_t tried = 0; while (sample_count < fanout && tried < kTriedThreshold) { int64_t edge_id = RandomEngine::ThreadLocal()->RandInt(offset, offset + num_neighbors); ++tried; if (!replace && sampled_edge_set.count(edge_id) > 0) { continue; } if (node_timestamp.has_value()) { bool flag = true; AT_DISPATCH_INDEX_TYPES( csc_indices.scalar_type(), "CheckNodeTimeStamp", ([&] { int64_t neighbor_id = utils::GetValueByIndex(csc_indices, edge_id); auto neighbor_ts = utils::GetValueByIndex( node_timestamp.value(), neighbor_id); if (neighbor_ts >= timestamp || (time_window.has_value() && neighbor_ts <= (timestamp - time_window.value()))) flag = false; })); if (!flag) continue; } if (edge_timestamp.has_value()) { auto edge_ts = utils::GetValueByIndex(edge_timestamp.value(), edge_id); if (edge_ts >= timestamp || (time_window.has_value() && edge_ts <= (timestamp - time_window.value()))) continue; continue; } if (!replace) { sampled_edge_set.insert(edge_id); } sampled_edges.push_back(edge_id); sample_count++; } if (sample_count < fanout) { return {false, {}}; } return {true, sampled_edges}; } int64_t TemporalNumPick( torch::Tensor seed_timestamp, torch::Tensor csc_indics, int64_t fanout, bool replace, const torch::optional& seed_pre_time_window, const torch::optional& probs_or_mask, const torch::optional& node_timestamp, const torch::optional& edge_timestamp, int64_t seed_offset, int64_t offset, int64_t num_neighbors) { constexpr int64_t kFastPathThreshold = 1000; if (num_neighbors > kFastPathThreshold && !probs_or_mask.has_value()) { // TODO: Currently we use the fast path both in TemporalNumPick and // TemporalPick. We may only sample once in TemporalNumPick and use the // sampled edges in TemporalPick to avoid sampling twice. auto [success, sampled_edges] = FastTemporalPick( seed_timestamp, csc_indics, fanout, replace, seed_pre_time_window, node_timestamp, edge_timestamp, seed_offset, offset, num_neighbors); if (success) return sampled_edges.size(); } torch::optional time_window = torch::nullopt; if (seed_pre_time_window.has_value()) { time_window = utils::GetValueByIndex( seed_pre_time_window.value(), seed_offset); } auto mask = TemporalMask( utils::GetValueByIndex(seed_timestamp, seed_offset), csc_indics, time_window, probs_or_mask, node_timestamp, edge_timestamp, {offset, offset + num_neighbors}); int64_t num_valid_neighbors = utils::GetValueByIndex(mask.sum(), 0); if (num_valid_neighbors == 0 || fanout == -1) return num_valid_neighbors; return replace ? fanout : std::min(fanout, num_valid_neighbors); } template void NumPickByEtype( bool with_seed_offsets, const std::vector& fanouts, bool replace, const torch::Tensor& type_per_edge, const torch::optional& probs_or_mask, int64_t offset, int64_t num_neighbors, PickedNumType* num_picked_ptr, int64_t seed_index, const std::vector& etype_id_to_num_picked_offset) { int64_t etype_begin = offset; const int64_t end = offset + num_neighbors; PickedNumType total_count = 0; AT_DISPATCH_INTEGRAL_TYPES( type_per_edge.scalar_type(), "NumPickFnByEtype", ([&] { const scalar_t* type_per_edge_data = type_per_edge.data_ptr(); while (etype_begin < end) { scalar_t etype = type_per_edge_data[etype_begin]; TORCH_CHECK( etype >= 0 && etype < (int64_t)fanouts.size(), "Etype values exceed the number of fanouts."); auto etype_end_it = std::upper_bound( type_per_edge_data + etype_begin, type_per_edge_data + end, etype); int64_t etype_end = etype_end_it - type_per_edge_data; // Do sampling for one etype. if (with_seed_offsets) { // The pick numbers aren't stored continuously, but separately for // each different etype. const auto offset = etype_id_to_num_picked_offset[etype] + seed_index; NumPick( fanouts[etype], replace, probs_or_mask, etype_begin, etype_end - etype_begin, num_picked_ptr + offset); } else { PickedNumType picked_count = 0; NumPick( fanouts[etype], replace, probs_or_mask, etype_begin, etype_end - etype_begin, &picked_count); total_count += picked_count; } etype_begin = etype_end; } })); if (!with_seed_offsets) { num_picked_ptr[seed_index] = total_count; } } int64_t TemporalNumPickByEtype( torch::Tensor seed_timestamp, torch::Tensor csc_indices, const std::vector& fanouts, bool replace, const torch::Tensor& type_per_edge, const torch::optional& seed_pre_time_window, const torch::optional& probs_or_mask, const torch::optional& node_timestamp, const torch::optional& edge_timestamp, int64_t seed_offset, int64_t offset, int64_t num_neighbors) { int64_t etype_begin = offset; const int64_t end = offset + num_neighbors; int64_t total_count = 0; AT_DISPATCH_INTEGRAL_TYPES( type_per_edge.scalar_type(), "TemporalNumPickFnByEtype", ([&] { const scalar_t* type_per_edge_data = type_per_edge.data_ptr(); while (etype_begin < end) { scalar_t etype = type_per_edge_data[etype_begin]; TORCH_CHECK( etype >= 0 && etype < (int64_t)fanouts.size(), "Etype values exceed the number of fanouts."); auto etype_end_it = std::upper_bound( type_per_edge_data + etype_begin, type_per_edge_data + end, etype); int64_t etype_end = etype_end_it - type_per_edge_data; // Do sampling for one etype. total_count += TemporalNumPick( seed_timestamp, csc_indices, fanouts[etype], replace, seed_pre_time_window, probs_or_mask, node_timestamp, edge_timestamp, seed_offset, etype_begin, etype_end - etype_begin); etype_begin = etype_end; } })); return total_count; } /** * @brief Perform uniform sampling of elements and return the sampled indices. * * @param offset The starting edge ID for the connected neighbors of the sampled * node. * @param num_neighbors The number of neighbors to pick. * @param fanout The number of edges to be sampled for each node. It should be * >= 0 or -1. * - When the value is -1, all neighbors will be sampled once regardless of * replacement. It is equivalent to selecting all neighbors when the fanout is * >= the number of neighbors (and replacement is set to false). * - When the value is a non-negative integer, it serves as a minimum * threshold for selecting neighbors. * @param replace Boolean indicating whether the sample is performed with or * without replacement. If True, a value can be selected multiple times. * Otherwise, each value can be selected only once. * @param options Tensor options specifying the desired data type of the result. * @param picked_data_ptr The destination address where the picked neighbors * should be put. Enough memory space should be allocated in advance. */ template inline int64_t UniformPick( int64_t offset, int64_t num_neighbors, int64_t fanout, bool replace, const torch::TensorOptions& options, PickedType* picked_data_ptr) { if ((fanout == -1) || (num_neighbors <= fanout && !replace)) { std::iota(picked_data_ptr, picked_data_ptr + num_neighbors, offset); return num_neighbors; } else if (replace) { std::memcpy( picked_data_ptr, torch::randint(offset, offset + num_neighbors, {fanout}, options) .data_ptr(), fanout * sizeof(PickedType)); return fanout; } else { // We use different sampling strategies for different sampling case. if (fanout >= num_neighbors / 10) { // [Algorithm] // This algorithm is conceptually related to the Fisher-Yates // shuffle. // // [Complexity Analysis] // This algorithm's memory complexity is O(num_neighbors), but // it generates fewer random numbers (O(fanout)). // // (Compare) Reservoir algorithm is one of the most classical // sampling algorithms. Both the reservoir algorithm and our // algorithm offer distinct advantages, we need to compare to // illustrate our trade-offs. // The reservoir algorithm is memory-efficient (O(fanout)) but // creates many random numbers (O(num_neighbors)), which is // costly. // // [Practical Consideration] // Use this algorithm when `fanout >= num_neighbors / 10` to // reduce computation. // In this scenarios above, memory complexity is not a concern due // to the small size of both `fanout` and `num_neighbors`. And it // is efficient to allocate a small amount of memory. So the // algorithm performence is great in this case. std::vector seq(num_neighbors); // Assign the seq with [offset, offset + num_neighbors]. std::iota(seq.begin(), seq.end(), offset); for (int64_t i = 0; i < fanout; ++i) { auto j = RandomEngine::ThreadLocal()->RandInt(i, num_neighbors); std::swap(seq[i], seq[j]); } // Save the randomly sampled fanout elements to the output tensor. std::copy(seq.begin(), seq.begin() + fanout, picked_data_ptr); return fanout; } else if (fanout < 64) { // [Algorithm] // Use linear search to verify uniqueness. // // [Complexity Analysis] // Since the set of numbers is small (up to 64), so it is more // cost-effective for the CPU to use this algorithm. auto begin = picked_data_ptr; auto end = picked_data_ptr + fanout; while (begin != end) { // Put the new random number in the last position. *begin = RandomEngine::ThreadLocal()->RandInt( offset, offset + num_neighbors); // Check if a new value doesn't exist in current // range(picked_data_ptr, begin). Otherwise get a new // value until we haven't unique range of elements. auto it = std::find(picked_data_ptr, begin, *begin); if (it == begin) ++begin; } return fanout; } else { // [Algorithm] // Use hash-set to verify uniqueness. In the best scenario, the // time complexity is O(fanout), assuming no conflicts occur. // // [Complexity Analysis] // Let K = (fanout / num_neighbors), the expected number of extra // sampling steps is roughly K^2 / (1-K) * num_neighbors, which // means in the worst case scenario, the time complexity is // O(num_neighbors^2). // // [Practical Consideration] // In practice, we set the threshold K to 1/10. This trade-off is // due to the slower performance of std::unordered_set, which // would otherwise increase the sampling cost. By doing so, we // achieve a balance between theoretical efficiency and practical // performance. std::unordered_set picked_set; while (static_cast(picked_set.size()) < fanout) { picked_set.insert(RandomEngine::ThreadLocal()->RandInt( offset, offset + num_neighbors)); } std::copy(picked_set.begin(), picked_set.end(), picked_data_ptr); return picked_set.size(); } } } /** @brief An operator to perform non-uniform sampling. */ static torch::Tensor NonUniformPickOp( torch::Tensor probs, int64_t fanout, bool replace) { auto positive_probs_indices = probs.nonzero().squeeze(1); auto num_positive_probs = positive_probs_indices.size(0); if (num_positive_probs == 0) return torch::empty({0}, torch::kLong); if ((fanout == -1) || (num_positive_probs <= fanout && !replace)) { return positive_probs_indices; } if (!replace) fanout = std::min(fanout, num_positive_probs); if (fanout == 0) return torch::empty({0}, torch::kLong); auto ret_tensor = torch::empty({fanout}, torch::kLong); auto ret_ptr = ret_tensor.data_ptr(); AT_DISPATCH_FLOATING_TYPES( probs.scalar_type(), "MultinomialSampling", ([&] { auto probs_data_ptr = probs.data_ptr(); auto positive_probs_indices_ptr = positive_probs_indices.data_ptr(); if (!replace) { // The algorithm is from gumbel softmax. // s = argmax( logp - log(-log(eps)) ) where eps ~ U(0, 1). // Here we can apply exp to the formula which will not affect result // of argmax or topk. Then we have // s = argmax( p / (-log(eps)) ) where eps ~ U(0, 1). // We can also simplify the formula above by // s = argmax( p / q ) where q ~ Exp(1). if (fanout == 1) { // Return argmax(p / q). scalar_t max_prob = 0; int64_t max_prob_index = -1; // We only care about the neighbors with non-zero probability. for (auto i = 0; i < num_positive_probs; ++i) { // Calculate (p / q) for the current neighbor. scalar_t current_prob = probs_data_ptr[positive_probs_indices_ptr[i]] / RandomEngine::ThreadLocal()->Exponential(1.); if (current_prob > max_prob) { max_prob = current_prob; max_prob_index = positive_probs_indices_ptr[i]; } } ret_ptr[0] = max_prob_index; } else { // Return topk(p / q). std::vector> q(num_positive_probs); for (auto i = 0; i < num_positive_probs; ++i) { q[i].first = probs_data_ptr[positive_probs_indices_ptr[i]] / RandomEngine::ThreadLocal()->Exponential(1.); q[i].second = positive_probs_indices_ptr[i]; } if (fanout < num_positive_probs / 64) { // Use partial_sort. std::partial_sort( q.begin(), q.begin() + fanout, q.end(), std::greater{}); for (auto i = 0; i < fanout; ++i) { ret_ptr[i] = q[i].second; } } else { // Use nth_element. std::nth_element( q.begin(), q.begin() + fanout - 1, q.end(), std::greater{}); for (auto i = 0; i < fanout; ++i) { ret_ptr[i] = q[i].second; } } } } else { // Calculate cumulative sum of probabilities. std::vector prefix_sum_probs(num_positive_probs); scalar_t sum_probs = 0; for (auto i = 0; i < num_positive_probs; ++i) { sum_probs += probs_data_ptr[positive_probs_indices_ptr[i]]; prefix_sum_probs[i] = sum_probs; } // Normalize. if ((sum_probs > 1.00001) || (sum_probs < 0.99999)) { for (auto i = 0; i < num_positive_probs; ++i) { prefix_sum_probs[i] /= sum_probs; } } for (auto i = 0; i < fanout; ++i) { // Sample a probability mass from a uniform distribution. double uniform_sample = RandomEngine::ThreadLocal()->Uniform(0., 1.); // Use a binary search to find the index. int sampled_index = std::lower_bound( prefix_sum_probs.begin(), prefix_sum_probs.end(), uniform_sample) - prefix_sum_probs.begin(); ret_ptr[i] = positive_probs_indices_ptr[sampled_index]; } } })); return ret_tensor; } /** * @brief Perform non-uniform sampling of elements based on probabilities and * return the sampled indices. * * If 'probs_or_mask' is provided, it indicates that the sampling is * non-uniform. In such cases: * - When the number of neighbors with non-zero probability is less than or * equal to fanout, all neighbors with non-zero probability will be selected. * - When the number of neighbors with non-zero probability exceeds fanout, the * sampling process will select 'fanout' elements based on their respective * probabilities. Higher probabilities will increase the chances of being chosen * during the sampling process. * * @param offset The starting edge ID for the connected neighbors of the sampled * node. * @param num_neighbors The number of neighbors to pick. * @param fanout The number of edges to be sampled for each node. It should be * >= 0 or -1. * - When the value is -1, all neighbors with non-zero probability will be * sampled once regardless of replacement. It is equivalent to selecting all * neighbors with non-zero probability when the fanout is >= the number of * neighbors (and replacement is set to false). * - When the value is a non-negative integer, it serves as a minimum * threshold for selecting neighbors. * @param replace Boolean indicating whether the sample is performed with or * without replacement. If True, a value can be selected multiple times. * Otherwise, each value can be selected only once. * @param options Tensor options specifying the desired data type of the result. * @param probs_or_mask Optional tensor containing the (unnormalized) * probabilities associated with each neighboring edge of a node in the original * graph. It must be a 1D floating-point tensor with the number of elements * equal to the number of edges in the graph. * @param picked_data_ptr The destination address where the picked neighbors * should be put. Enough memory space should be allocated in advance. */ template inline int64_t NonUniformPick( int64_t offset, int64_t num_neighbors, int64_t fanout, bool replace, const torch::TensorOptions& options, const torch::Tensor& probs_or_mask, PickedType* picked_data_ptr) { auto local_probs = probs_or_mask.size(0) > num_neighbors ? probs_or_mask.slice(0, offset, offset + num_neighbors) : probs_or_mask; auto picked_indices = NonUniformPickOp(local_probs, fanout, replace); auto picked_indices_ptr = picked_indices.data_ptr(); for (int i = 0; i < picked_indices.numel(); ++i) { picked_data_ptr[i] = static_cast(picked_indices_ptr[i]) + offset; } return picked_indices.numel(); } template int64_t Pick( int64_t offset, int64_t num_neighbors, int64_t fanout, bool replace, const torch::TensorOptions& options, const torch::optional& probs_or_mask, SamplerArgs args, PickedType* picked_data_ptr) { if (fanout == 0 || num_neighbors == 0) return 0; if (probs_or_mask.has_value()) { return NonUniformPick( offset, num_neighbors, fanout, replace, options, probs_or_mask.value(), picked_data_ptr); } else { return UniformPick( offset, num_neighbors, fanout, replace, options, picked_data_ptr); } } template int64_t TemporalPick( torch::Tensor seed_timestamp, torch::Tensor csc_indices, int64_t seed_offset, int64_t offset, int64_t num_neighbors, int64_t fanout, bool replace, const torch::TensorOptions& options, const torch::optional& seed_pre_time_window, const torch::optional& probs_or_mask, const torch::optional& node_timestamp, const torch::optional& edge_timestamp, SamplerArgs args, PickedType* picked_data_ptr) { constexpr int64_t kFastPathThreshold = 1000; if (S == SamplerType::NEIGHBOR && num_neighbors > kFastPathThreshold && !probs_or_mask.has_value()) { auto [success, sampled_edges] = FastTemporalPick( seed_timestamp, csc_indices, fanout, replace, seed_pre_time_window, node_timestamp, edge_timestamp, seed_offset, offset, num_neighbors); if (success) { for (size_t i = 0; i < sampled_edges.size(); ++i) { picked_data_ptr[i] = static_cast(sampled_edges[i]); } return sampled_edges.size(); } } torch::optional time_window = torch::nullopt; if (seed_pre_time_window.has_value()) { time_window = utils::GetValueByIndex( seed_pre_time_window.value(), seed_offset); } auto mask = TemporalMask( utils::GetValueByIndex(seed_timestamp, seed_offset), csc_indices, time_window, probs_or_mask, node_timestamp, edge_timestamp, {offset, offset + num_neighbors}); torch::Tensor masked_prob; if (probs_or_mask.has_value()) { masked_prob = probs_or_mask.value().slice(0, offset, offset + num_neighbors) * mask; } else { masked_prob = S == SamplerType::NEIGHBOR ? mask.to(torch::kFloat32) : mask; } if constexpr (S == SamplerType::NEIGHBOR) { auto picked_indices = NonUniformPickOp(masked_prob, fanout, replace); auto picked_indices_ptr = picked_indices.data_ptr(); for (int i = 0; i < picked_indices.numel(); ++i) { picked_data_ptr[i] = static_cast(picked_indices_ptr[i]) + offset; } return picked_indices.numel(); } if constexpr (is_labor(S)) { return Pick( offset, num_neighbors, fanout, replace, options, masked_prob, args, picked_data_ptr); } } template int64_t PickByEtype( bool with_seed_offsets, int64_t offset, int64_t num_neighbors, const std::vector& fanouts, bool replace, const torch::TensorOptions& options, const torch::Tensor& type_per_edge, const torch::optional& probs_or_mask, SamplerArgs args, PickedType* picked_data_ptr, int64_t seed_index, PickedType* subgraph_indptr_ptr, const std::vector& etype_id_to_num_picked_offset) { int64_t etype_begin = offset; int64_t etype_end = offset; int64_t picked_total_count = 0; AT_DISPATCH_INTEGRAL_TYPES( type_per_edge.scalar_type(), "PickByEtype", ([&] { const scalar_t* type_per_edge_data = type_per_edge.data_ptr(); const auto end = offset + num_neighbors; while (etype_begin < end) { scalar_t etype = type_per_edge_data[etype_begin]; TORCH_CHECK( etype >= 0 && etype < (int64_t)fanouts.size(), "Etype values exceed the number of fanouts."); int64_t fanout = fanouts[etype]; auto etype_end_it = std::upper_bound( type_per_edge_data + etype_begin, type_per_edge_data + end, etype); etype_end = etype_end_it - type_per_edge_data; // Do sampling for one etype. The picked nodes aren't stored // continuously, but separately for each different etype. if (fanout != 0) { auto picked_count = 0; if (with_seed_offsets) { const auto indptr_offset = etype_id_to_num_picked_offset[etype] + seed_index; picked_count = Pick( etype_begin, etype_end - etype_begin, fanout, replace, options, probs_or_mask, args, picked_data_ptr + subgraph_indptr_ptr[indptr_offset]); TORCH_CHECK( subgraph_indptr_ptr[indptr_offset + 1] - subgraph_indptr_ptr[indptr_offset] == picked_count, "Actual picked count doesn't match the calculated " "pick number."); } else { picked_count = Pick( etype_begin, etype_end - etype_begin, fanout, replace, options, probs_or_mask, args, picked_data_ptr + subgraph_indptr_ptr[seed_index] + picked_total_count); } picked_total_count += picked_count; } etype_begin = etype_end; } })); return picked_total_count; } template int64_t TemporalPickByEtype( torch::Tensor seed_timestamp, torch::Tensor csc_indices, int64_t seed_offset, int64_t offset, int64_t num_neighbors, const std::vector& fanouts, bool replace, const torch::TensorOptions& options, const torch::Tensor& type_per_edge, const torch::optional& seed_pre_time_window, const torch::optional& probs_or_mask, const torch::optional& node_timestamp, const torch::optional& edge_timestamp, SamplerArgs args, PickedType* picked_data_ptr) { int64_t etype_begin = offset; int64_t etype_end = offset; int64_t pick_offset = 0; AT_DISPATCH_INTEGRAL_TYPES( type_per_edge.scalar_type(), "TemporalPickByEtype", ([&] { const scalar_t* type_per_edge_data = type_per_edge.data_ptr(); const auto end = offset + num_neighbors; while (etype_begin < end) { scalar_t etype = type_per_edge_data[etype_begin]; TORCH_CHECK( etype >= 0 && etype < (int64_t)fanouts.size(), "Etype values exceed the number of fanouts."); int64_t fanout = fanouts[etype]; auto etype_end_it = std::upper_bound( type_per_edge_data + etype_begin, type_per_edge_data + end, etype); etype_end = etype_end_it - type_per_edge_data; // Do sampling for one etype. if (fanout != 0) { int64_t picked_count = TemporalPick( seed_timestamp, csc_indices, seed_offset, etype_begin, etype_end - etype_begin, fanout, replace, options, seed_pre_time_window, probs_or_mask, node_timestamp, edge_timestamp, args, picked_data_ptr + pick_offset); pick_offset += picked_count; } etype_begin = etype_end; } })); return pick_offset; } template std::enable_if_t Pick( int64_t offset, int64_t num_neighbors, int64_t fanout, bool replace, const torch::TensorOptions& options, const torch::optional& probs_or_mask, SamplerArgs args, PickedType* picked_data_ptr) { if (fanout == 0 || num_neighbors == 0) return 0; if (probs_or_mask.has_value()) { if (fanout < 0) { return NonUniformPick( offset, num_neighbors, fanout, replace, options, probs_or_mask.value(), picked_data_ptr); } else { int64_t picked_count; GRAPHBOLT_DISPATCH_ALL_TYPES( probs_or_mask.value().scalar_type(), "LaborPickFloatType", ([&] { if (replace) { picked_count = LaborPick( offset, num_neighbors, fanout, options, probs_or_mask, args, picked_data_ptr); } else { picked_count = LaborPick( offset, num_neighbors, fanout, options, probs_or_mask, args, picked_data_ptr); } })); return picked_count; } } else if (fanout < 0) { return UniformPick( offset, num_neighbors, fanout, replace, options, picked_data_ptr); } else if (replace) { return LaborPick( offset, num_neighbors, fanout, options, /* probs_or_mask= */ torch::nullopt, args, picked_data_ptr); } else { // replace = false return LaborPick( offset, num_neighbors, fanout, options, /* probs_or_mask= */ torch::nullopt, args, picked_data_ptr); } } template inline void safe_divide(T& a, U b) { a = b > 0 ? (T)(a / b) : std::numeric_limits::infinity(); } namespace labor { template inline T invcdf(T u, int64_t n, T rem) { constexpr T one = 1; return rem * (one - std::pow(one - u, one / n)); } template inline T jth_sorted_uniform_random( seed_t seed, int64_t t, int64_t c, int64_t j, T& rem, int64_t n) { const T u = seed.uniform(t + j * c); // https://mathematica.stackexchange.com/a/256707 rem -= invcdf(u, n, rem); return 1 - rem; } }; // namespace labor /** * @brief Perform uniform-nonuniform sampling of elements depending on the * template parameter NonUniform and return the sampled indices. * * @param offset The starting edge ID for the connected neighbors of the sampled * node. * @param num_neighbors The number of neighbors to pick. * @param fanout The number of edges to be sampled for each node. It should be * >= 0 or -1. * - When the value is -1, all neighbors (with non-zero probability, if * weighted) will be sampled once regardless of replacement. It is equivalent to * selecting all neighbors with non-zero probability when the fanout is >= the * number of neighbors (and replacement is set to false). * - When the value is a non-negative integer, it serves as a minimum * threshold for selecting neighbors. * @param options Tensor options specifying the desired data type of the result. * @param probs_or_mask Optional tensor containing the (unnormalized) * probabilities associated with each neighboring edge of a node in the original * graph. It must be a 1D floating-point tensor with the number of elements * equal to the number of edges in the graph. * @param args Contains labor specific arguments. * @param picked_data_ptr The destination address where the picked neighbors * should be put. Enough memory space should be allocated in advance. */ template < bool NonUniform, bool Replace, typename ProbsType, SamplerType S, typename PickedType, int StackSize> inline std::enable_if_t LaborPick( int64_t offset, int64_t num_neighbors, int64_t fanout, const torch::TensorOptions& options, const torch::optional& probs_or_mask, SamplerArgs args, PickedType* picked_data_ptr) { fanout = Replace ? fanout : std::min(fanout, num_neighbors); if (!NonUniform && !Replace && fanout >= num_neighbors) { std::iota(picked_data_ptr, picked_data_ptr + num_neighbors, offset); return num_neighbors; } // Assuming max_degree of a vertex is <= 4 billion. std::array, StackSize> heap; auto heap_data = heap.data(); torch::Tensor heap_tensor; if (fanout > StackSize) { constexpr int factor = sizeof(heap_data[0]) / sizeof(int32_t); heap_tensor = torch::empty({fanout * factor}, torch::kInt32); heap_data = reinterpret_cast*>( heap_tensor.data_ptr()); } const ProbsType* local_probs_data = NonUniform ? probs_or_mask.value().data_ptr() + offset : nullptr; if (NonUniform && probs_or_mask.value().size(0) <= num_neighbors) { local_probs_data -= offset; } AT_DISPATCH_INDEX_TYPES( args.indices.scalar_type(), "LaborPickMain", ([&] { const auto local_indices_data = reinterpret_cast(args.indices.data_ptr()) + offset; if constexpr (Replace) { // [Algorithm] @mfbalin // Use a max-heap to get rid of the big random numbers and filter the // smallest fanout of them. Implements arXiv:2210.13339 Section A.3. // Unlike sampling without replacement below, the same item can be // included fanout times in our sample. Thus, we sort and pick the // smallest fanout random numbers out of num_neighbors * fanout of // them. Each item has fanout many random numbers in the race and the // smallest fanout of them get picked. Instead of generating // fanout * num_neighbors random numbers and increase the complexity, // I devised an algorithm to generate the fanout numbers for an item // in a sorted manner on demand, meaning we continue generating random // numbers for an item only if it has been sampled that many times // already. // https://gist.github.com/mfbalin/096dcad5e3b1f6a59ff7ff2f9f541618 // // [Complexity Analysis] // Will modify the heap at most linear in O(num_neighbors + fanout) // and each modification takes O(log(fanout)). So the total complexity // is O((fanout + num_neighbors) log(fanout)). It is possible to // decrease the logarithmic factor down to // O(log(min(fanout, num_neighbors))). std::array remaining; auto remaining_data = remaining.data(); torch::Tensor remaining_tensor; if (num_neighbors > StackSize) { remaining_tensor = torch::empty({num_neighbors}, torch::kFloat32); remaining_data = remaining_tensor.data_ptr(); } std::fill_n(remaining_data, num_neighbors, 1.f); auto heap_end = heap_data; const auto init_count = (num_neighbors + fanout - 1) / num_neighbors; auto sample_neighbor_i_with_index_t_jth_time = [&](index_t t, int64_t j, uint32_t i) { auto rnd = labor::jth_sorted_uniform_random( args.random_seed, t, args.num_nodes, j, remaining_data[i], fanout - j); // r_t if constexpr (NonUniform) { safe_divide(rnd, local_probs_data[i]); } // r_t / \pi_t if (heap_end < heap_data + fanout) { heap_end[0] = std::make_pair(rnd, i); if (++heap_end >= heap_data + fanout) { std::make_heap(heap_data, heap_data + fanout); } return false; } else if (rnd < heap_data[0].first) { std::pop_heap(heap_data, heap_data + fanout); heap_data[fanout - 1] = std::make_pair(rnd, i); std::push_heap(heap_data, heap_data + fanout); return false; } else { remaining_data[i] = -1; return true; } }; for (uint32_t i = 0; i < num_neighbors; ++i) { const auto t = local_indices_data[i]; for (int64_t j = 0; j < init_count; j++) { sample_neighbor_i_with_index_t_jth_time(t, j, i); } } for (uint32_t i = 0; i < num_neighbors; ++i) { if (remaining_data[i] == -1) continue; const auto t = local_indices_data[i]; for (int64_t j = init_count; j < fanout; ++j) { if (sample_neighbor_i_with_index_t_jth_time(t, j, i)) break; } } } else { // [Algorithm] // Use a max-heap to get rid of the big random numbers and filter the // smallest fanout of them. Implements arXiv:2210.13339 Section A.3. // // [Complexity Analysis] // the first for loop and std::make_heap runs in time O(fanouts). // The next for loop compares each random number to the current // minimum fanout numbers. For any given i, the probability that the // current random number will replace any number in the heap is fanout // / i. Summing from i=fanout to num_neighbors, we get f * (H_n - // H_f), where n is num_neighbors and f is fanout, H_f is \sum_j=1^f // 1/j. In the end H_n - H_f = O(log n/f), there are n - f iterations, // each heap operation takes time log f, so the total complexity is // O(f + (n - f) // + f log(n/f) log f) = O(n + f log(f) log(n/f)). If f << n (f is a // constant in almost all cases), then the average complexity is // O(num_neighbors). for (uint32_t i = 0; i < fanout; ++i) { const auto t = local_indices_data[i]; auto rnd = args.random_seed.uniform(t); // r_t if constexpr (NonUniform) { safe_divide(rnd, local_probs_data[i]); } // r_t / \pi_t heap_data[i] = std::make_pair(rnd, i); } if (!NonUniform || fanout < num_neighbors) { std::make_heap(heap_data, heap_data + fanout); } for (uint32_t i = fanout; i < num_neighbors; ++i) { const auto t = local_indices_data[i]; auto rnd = args.random_seed.uniform(t); // r_t if constexpr (NonUniform) { safe_divide(rnd, local_probs_data[i]); } // r_t / \pi_t if (rnd < heap_data[0].first) { std::pop_heap(heap_data, heap_data + fanout); heap_data[fanout - 1] = std::make_pair(rnd, i); std::push_heap(heap_data, heap_data + fanout); } } } })); int64_t num_sampled = 0; for (int64_t i = 0; i < fanout; ++i) { const auto [rnd, j] = heap_data[i]; if (!NonUniform || rnd < std::numeric_limits::infinity()) { picked_data_ptr[num_sampled++] = offset + j; } } return num_sampled; } } // namespace sampling } // namespace graphbolt ================================================ FILE: graphbolt/src/index_select.cc ================================================ /** * Copyright (c) 2023 by Contributors * @file index_select.cc * @brief Index select operators. */ #include "./index_select.h" #include #include #include #include #include "./macro.h" #include "./utils.h" namespace graphbolt { namespace ops { constexpr int kIntGrainSize = 64; torch::Tensor IndexSelect(torch::Tensor input, torch::Tensor index) { if (utils::is_on_gpu(index)) { if (input.is_pinned()) { GRAPHBOLT_DISPATCH_CUDA_ONLY_DEVICE( c10::DeviceType::CUDA, "UVAIndexSelect", { return UVAIndexSelectImpl(input, index); }); } else { return torch::index_select(input, 0, index); } } auto output_shape = input.sizes().vec(); output_shape[0] = index.numel(); auto result = torch::empty( output_shape, index.options() .dtype(input.dtype()) .pinned_memory(utils::is_pinned(index))); auto result_ptr = reinterpret_cast(result.data_ptr()); const auto input_ptr = reinterpret_cast(input.data_ptr()); const auto row_bytes = input.slice(0, 0, 1).numel() * input.element_size(); const auto stride = input.stride(0) * input.element_size(); const auto num_input_rows = input.size(0); AT_DISPATCH_INDEX_TYPES( index.scalar_type(), "IndexSelect::index::scalar_type()", ([&] { const auto index_ptr = index.data_ptr(); graphbolt::parallel_for( 0, index.size(0), kIntGrainSize, [&](int64_t begin, int64_t end) { for (int64_t i = begin; i < end; i++) { auto idx = index_ptr[i]; if (idx < 0) idx += num_input_rows; if (idx < 0 || idx >= num_input_rows) { // Throw IndexError via torch. idx += input[num_input_rows].item(); } std::memcpy( result_ptr + i * row_bytes, input_ptr + idx * stride, row_bytes); } }); })); return result; } c10::intrusive_ptr> IndexSelectAsync( torch::Tensor input, torch::Tensor index) { TORCH_CHECK(!utils::is_on_gpu(index) && !utils::is_on_gpu(input)); return async([=] { return IndexSelect(input, index); }); } c10::intrusive_ptr> ScatterAsync( torch::Tensor input, torch::Tensor index, torch::Tensor src) { TORCH_CHECK( !utils::is_on_gpu(input) && !utils::is_on_gpu(index) && !utils::is_on_gpu(src)); TORCH_CHECK(index.sizes().size() == 1, "index tensor needs to be 1d."); for (size_t i = 1; i < input.sizes().size(); i++) { TORCH_CHECK( input.size(i) == src.size(i), "dimension mismatch between input and src at ", i, "th dimension: ", input.size(i), " != ", src.size(i), "."); } return async([=] { const auto row_bytes = src.slice(0, 0, 1).numel() * src.element_size(); const auto src_ptr = reinterpret_cast(src.data_ptr()); auto input_ptr = reinterpret_cast(input.data_ptr()); AT_DISPATCH_INDEX_TYPES( index.scalar_type(), "ScatterAsync::index::scalar_type()", ([&] { const auto index_ptr = index.data_ptr(); graphbolt::parallel_for( 0, index.size(0), kIntGrainSize, [&](int64_t begin, int64_t end) { for (int64_t i = begin; i < end; i++) { std::memcpy( input_ptr + index_ptr[i] * row_bytes, src_ptr + i * row_bytes, row_bytes); } }); })); return input; }); } std::tuple IndexSelectCSC( torch::Tensor indptr, torch::Tensor indices, torch::Tensor nodes, torch::optional output_size) { TORCH_CHECK( indices.sizes().size() == 1, "IndexSelectCSC only supports 1d tensors"); if (utils::is_on_gpu(nodes) && utils::is_accessible_from_gpu(indptr) && utils::is_accessible_from_gpu(indices)) { GRAPHBOLT_DISPATCH_CUDA_ONLY_DEVICE( c10::DeviceType::CUDA, "IndexSelectCSCImpl", { return IndexSelectCSCImpl(indptr, indices, nodes, output_size); }); } auto [output_indptr, results] = IndexSelectCSCBatched( indptr, std::vector{indices}, nodes, false, output_size); return std::make_tuple(output_indptr, results.at(0)); } std::tuple> IndexSelectCSCBatched( torch::Tensor indptr, std::vector indices_list, torch::Tensor nodes, bool with_edge_ids, torch::optional output_size) { for (auto& indices : indices_list) { TORCH_CHECK( indices.sizes().size() == 1, "IndexSelectCSCBatched only supports 1d tensors"); } if (utils::is_on_gpu(nodes) && utils::is_accessible_from_gpu(indptr) && utils::are_accessible_from_gpu(indices_list)) { GRAPHBOLT_DISPATCH_CUDA_ONLY_DEVICE( c10::DeviceType::CUDA, "IndexSelectCSCImpl", { return IndexSelectCSCBatchedImpl( indptr, indices_list, nodes, with_edge_ids, output_size); }); } constexpr int kDefaultGrainSize = 128; const auto num_nodes = nodes.size(0); torch::Tensor output_indptr = torch::empty( {num_nodes + 1}, nodes.options().dtype(indptr.scalar_type())); std::vector results; torch::optional edge_ids; AT_DISPATCH_INDEX_TYPES( indptr.scalar_type(), "IndexSelectCSCBatched::indptr", ([&] { using indptr_t = index_t; const auto indptr_data = indptr.data_ptr(); auto out_indptr_data = output_indptr.data_ptr(); out_indptr_data[0] = 0; AT_DISPATCH_INDEX_TYPES( nodes.scalar_type(), "IndexSelectCSCBatched::nodes", ([&] { const auto nodes_data = nodes.data_ptr(); torch::parallel_for( 0, num_nodes, kDefaultGrainSize, [&](int64_t begin, int64_t end) { for (int64_t i = begin; i < end; i++) { const auto node_id = nodes_data[i]; const auto degree = indptr_data[node_id + 1] - indptr_data[node_id]; out_indptr_data[i + 1] = degree; } }); output_indptr = output_indptr.cumsum(0, indptr.scalar_type()); out_indptr_data = output_indptr.data_ptr(); TORCH_CHECK( !output_size.has_value() || out_indptr_data[num_nodes] == *output_size, "An incorrect output_size argument was provided."); output_size = out_indptr_data[num_nodes]; for (const auto& indices : indices_list) { results.push_back(torch::empty( *output_size, nodes.options().dtype(indices.scalar_type()))); } if (with_edge_ids) { edge_ids = torch::empty( *output_size, nodes.options().dtype(indptr.scalar_type())); } torch::parallel_for( 0, num_nodes, kDefaultGrainSize, [&](int64_t begin, int64_t end) { for (int64_t i = begin; i < end; i++) { const auto output_offset = out_indptr_data[i]; const auto numel = out_indptr_data[i + 1] - output_offset; const auto input_offset = indptr_data[nodes_data[i]]; for (size_t tensor_id = 0; tensor_id < indices_list.size(); tensor_id++) { auto output = reinterpret_cast( results[tensor_id].data_ptr()); const auto input = reinterpret_cast( indices_list[tensor_id].data_ptr()); const auto element_size = indices_list[tensor_id].element_size(); std::memcpy( output + output_offset * element_size, input + input_offset * element_size, element_size * numel); } if (edge_ids.has_value()) { auto output = edge_ids->data_ptr(); std::iota( output + output_offset, output + output_offset + numel, input_offset); } } }); })); })); if (edge_ids) results.push_back(*edge_ids); return std::make_tuple(output_indptr, results); } c10::intrusive_ptr< Future>>> IndexSelectCSCBatchedAsync( torch::Tensor indptr, std::vector indices_list, torch::Tensor nodes, bool with_edge_ids, torch::optional output_size) { return async( [=] { return IndexSelectCSCBatched( indptr, indices_list, nodes, with_edge_ids, output_size); }, utils::is_on_gpu(nodes)); } } // namespace ops } // namespace graphbolt ================================================ FILE: graphbolt/src/index_select.h ================================================ /** * Copyright (c) 2023 by Contributors * @file index_select.h * @brief Index select operators. */ #ifndef GRAPHBOLT_INDEX_SELECT_H_ #define GRAPHBOLT_INDEX_SELECT_H_ #include #include namespace graphbolt { namespace ops { /** * @brief Select columns for a sparse matrix in a CSC format according to nodes * tensor. * * NOTE: * 1. The shape of all tensors must be 1-D. * 2. If indices is on pinned memory and nodes is on pinned memory or GPU * memory, then UVAIndexSelectCSCImpl will be called. If indices is on GPU * memory, then IndexSelectCSCImpl will be called. Otherwise, * FusedCSCSamplingGraph::InSubgraph will be called. * * @param indptr Indptr tensor containing offsets with shape (N,). * @param indices Indices tensor with edge information of shape (indptr[N],). * @param nodes Nodes tensor with shape (M,). * @param output_size The total number of edges being copied. * @return (torch::Tensor, torch::Tensor) Output indptr and indices tensors of * shapes (M + 1,) and ((indptr[nodes + 1] - indptr[nodes]).sum(),). */ std::tuple IndexSelectCSC( torch::Tensor indptr, torch::Tensor indices, torch::Tensor nodes, torch::optional output_size = torch::nullopt); /** * @brief Select rows from input tensor according to index tensor. * * NOTE: * 1. The shape of input tensor can be multi-dimensional, but the index tensor * must be 1-D. * 2. If input is on pinned memory and index is on pinned memory or GPU memory, * then UVAIndexSelectImpl will be called. Otherwise, torch::index_select will * be called. * * @param input Input tensor with shape (N, ...). * @param index Index tensor with shape (M,). * @return torch::Tensor Output tensor with shape (M, ...). */ torch::Tensor IndexSelect(torch::Tensor input, torch::Tensor index); /** * @brief The async version of IndexSelect, available for only CPU tensors. * * @return Returns a future containing a torch::Tensor. */ c10::intrusive_ptr> IndexSelectAsync( torch::Tensor input, torch::Tensor index); /** * @brief The async version of operation input[index] = src. * @param input The input tensor. * @param index The index tensor into input. * @param src The src tensor being assigned into input. * * @return Returns a future containing input, a torch::Tensor. */ c10::intrusive_ptr> ScatterAsync( torch::Tensor input, torch::Tensor index, torch::Tensor src); /** * @brief Select columns for a sparse matrix in a CSC format according to nodes * tensor. * * NOTE: The shape of all tensors must be 1-D. * * @param indptr Indptr tensor containing offsets with shape (N,). * @param indices_list Vector of indices tensor with edge information of shape * (indptr[N],). * @param nodes Nodes tensor with shape (M,). * @param with_edge_ids Whether to return edge ids tensor corresponding to * sliced edges as the last element of the output. * @param output_size The total number of edges being copied. * * @return (torch::Tensor, std::vector) Output indptr and vector * of indices tensors of shapes (M + 1,) and ((indptr[nodes + 1] - * indptr[nodes]).sum(),). */ std::tuple> IndexSelectCSCBatched( torch::Tensor indptr, std::vector indices_list, torch::Tensor nodes, bool with_edge_ids, torch::optional output_size); c10::intrusive_ptr< Future>>> IndexSelectCSCBatchedAsync( torch::Tensor indptr, std::vector indices_list, torch::Tensor nodes, bool with_edge_ids, torch::optional output_size); } // namespace ops } // namespace graphbolt #endif // GRAPHBOLT_INDEX_SELECT_H_ ================================================ FILE: graphbolt/src/io_uring.cc ================================================ /** * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file io_uring.cc * @brief io_uring related functions. */ #include "./io_uring.h" #ifdef HAVE_LIBRARY_LIBURING #include #include #include #include #include #include #include #include struct io_uring_probe_destroyer { void operator()(struct io_uring_probe* p) { if (p) io_uring_free_probe(p); } }; #endif namespace graphbolt { namespace io_uring { bool IsAvailable() { #ifdef HAVE_LIBRARY_LIBURING /** @brief The cached value of whether io_uring is available. */ static bool cached_is_available; /** @brief Ensure cached_is_available is initialized once and thread-safe. */ static std::once_flag initialization_flag; std::call_once(initialization_flag, []() { // https://unix.stackexchange.com/a/596284/314554 cached_is_available = !(syscall( __NR_io_uring_register, 0, IORING_UNREGISTER_BUFFERS, NULL, 0) && errno == ENOSYS); std::unique_ptr probe( io_uring_get_probe(), io_uring_probe_destroyer()); if (probe.get()) { cached_is_available = cached_is_available && io_uring_opcode_supported(probe.get(), IORING_OP_READ); cached_is_available = cached_is_available && io_uring_opcode_supported(probe.get(), IORING_OP_READV); } else { cached_is_available = false; } }); return cached_is_available; #else return false; #endif } void SetNumThreads(int64_t count) { num_threads = count; } } // namespace io_uring } // namespace graphbolt ================================================ FILE: graphbolt/src/io_uring.h ================================================ /** * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file io_uring.h * @brief io_uring related functions. */ #ifndef GRAPHBOLT_IO_URING_H_ #define GRAPHBOLT_IO_URING_H_ #include #include namespace graphbolt { namespace io_uring { bool IsAvailable(); /** @brief Set a limit on # background io_uring threads. */ inline std::optional num_threads; /** * @brief Set the number of background io_uring threads. */ void SetNumThreads(int64_t count); } // namespace io_uring } // namespace graphbolt #endif // GRAPHBOLT_IO_URING_H_ ================================================ FILE: graphbolt/src/isin.cc ================================================ /** * Copyright (c) 2023 by Contributors * * @file isin.cc * @brief Isin op. */ #include #include #include "./macro.h" #include "./utils.h" namespace { static constexpr int kSearchGrainSize = 4096; } // namespace namespace graphbolt { namespace sampling { torch::Tensor IsInCPU( const torch::Tensor& elements, const torch::Tensor& test_elements) { torch::Tensor sorted_test_elements; std::tie(sorted_test_elements, std::ignore) = test_elements.sort( /*stable=*/false, /*dim=*/0, /*descending=*/false); torch::Tensor result = torch::empty_like(elements, torch::kBool); size_t num_test_elements = test_elements.size(0); size_t num_elements = elements.size(0); AT_DISPATCH_INTEGRAL_TYPES( elements.scalar_type(), "IsInOperation", ([&] { const scalar_t* elements_ptr = elements.data_ptr(); const scalar_t* sorted_test_elements_ptr = sorted_test_elements.data_ptr(); bool* result_ptr = result.data_ptr(); torch::parallel_for( 0, num_elements, kSearchGrainSize, [&](size_t start, size_t end) { for (auto i = start; i < end; i++) { result_ptr[i] = std::binary_search( sorted_test_elements_ptr, sorted_test_elements_ptr + num_test_elements, elements_ptr[i]); } }); })); return result; } torch::Tensor IsIn( const torch::Tensor& elements, const torch::Tensor& test_elements) { if (utils::is_on_gpu(elements) && utils::is_on_gpu(test_elements)) { GRAPHBOLT_DISPATCH_CUDA_ONLY_DEVICE( c10::DeviceType::CUDA, "IsInOperation", { return ops::IsIn(elements, test_elements); }); } else { return IsInCPU(elements, test_elements); } } torch::Tensor IsNotInIndex( const torch::Tensor& elements, const torch::Tensor& test_elements) { auto mask = IsIn(elements, test_elements); if (utils::is_on_gpu(mask)) { GRAPHBOLT_DISPATCH_CUDA_ONLY_DEVICE( c10::DeviceType::CUDA, "NonzeroOperation", { return ops::Nonzero(mask, true); }); } return torch::nonzero(torch::logical_not(mask)).squeeze(1); } c10::intrusive_ptr> IsNotInIndexAsync( const torch::Tensor& elements, const torch::Tensor& test_elements) { return async([=] { return IsNotInIndex(elements, test_elements); }); } } // namespace sampling } // namespace graphbolt ================================================ FILE: graphbolt/src/macro.h ================================================ /** * Copyright (c) 2023 by Contributors * @file macro.h * @brief Graphbolt macros. */ #ifndef GRAPHBOLT_MACRO_H_ #define GRAPHBOLT_MACRO_H_ #include namespace graphbolt { // Dispatch operator implementation function to CUDA device only. #ifdef GRAPHBOLT_USE_CUDA #define GRAPHBOLT_DISPATCH_CUDA_ONLY_DEVICE(device_type, name, ...) \ if (device_type == c10::DeviceType::CUDA) { \ [[maybe_unused]] auto XPU = c10::DeviceType::CUDA; \ __VA_ARGS__ \ } else { \ TORCH_CHECK(false, name, " is only available on CUDA device."); \ } #else #define GRAPHBOLT_DISPATCH_CUDA_ONLY_DEVICE(device_type, name, ...) \ TORCH_CHECK(false, name, " is only available on CUDA device."); #endif // This includes all integer, float and boolean types. #define GRAPHBOLT_DISPATCH_CASE_ALL_TYPES(...) \ AT_DISPATCH_CASE_ALL_TYPES(__VA_ARGS__) \ AT_DISPATCH_CASE(at::ScalarType::Half, __VA_ARGS__) \ AT_DISPATCH_CASE(at::ScalarType::BFloat16, __VA_ARGS__) \ AT_DISPATCH_CASE(at::ScalarType::Bool, __VA_ARGS__) #define GRAPHBOLT_DISPATCH_ALL_TYPES(TYPE, NAME, ...) \ AT_DISPATCH_SWITCH(TYPE, NAME, GRAPHBOLT_DISPATCH_CASE_ALL_TYPES(__VA_ARGS__)) } // namespace graphbolt #endif // GRAPHBOLT_MACRO_H_ ================================================ FILE: graphbolt/src/partitioned_cache_policy.cc ================================================ /** * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file partitioned_cache_policy.cc * @brief Partitioned cache policy implementation on the CPU. */ #include "./partitioned_cache_policy.h" #include #include #include #include "./utils.h" namespace graphbolt { namespace storage { constexpr int kIntGrainSize = 256; torch::Tensor AddOffset(torch::Tensor keys, int64_t offset) { if (offset == 0) return keys; auto output = torch::empty_like( keys, keys.options().pinned_memory(utils::is_pinned(keys))); AT_DISPATCH_INDEX_TYPES( keys.scalar_type(), "AddOffset", ([&] { auto keys_ptr = keys.data_ptr(); auto output_ptr = output.data_ptr(); graphbolt::parallel_for_each( 0, keys.numel(), kIntGrainSize, [&](int64_t i) { const auto result = keys_ptr[i] + offset; if constexpr (!std::is_same_v) { TORCH_CHECK( std::numeric_limits::min() <= result && result <= std::numeric_limits::max()); } output_ptr[i] = static_cast(result); }); })); return output; } template PartitionedCachePolicy::PartitionedCachePolicy( CachePolicy, int64_t capacity, int64_t num_partitions) : capacity_(capacity) { TORCH_CHECK(num_partitions >= 1, "# partitions need to be positive."); for (int64_t i = 0; i < num_partitions; i++) { const auto begin = i * capacity / num_partitions; const auto end = (i + 1) * capacity / num_partitions; policies_.emplace_back(std::make_unique(end - begin)); } } std::tuple PartitionedCachePolicy::Partition(torch::Tensor keys) { const int64_t num_parts = policies_.size(); torch::Tensor offsets = torch::empty( num_parts * num_parts + 1, keys.options().dtype(torch::kInt64)); auto offsets_ptr = offsets.data_ptr(); std::fill_n(offsets_ptr, offsets.size(0), int64_t{}); auto indices = torch::empty_like(keys, keys.options().dtype(torch::kInt64)); auto part_id = torch::empty_like(keys, keys.options().dtype(torch::kInt32)); const auto num_keys = keys.size(0); auto part_id_ptr = part_id.data_ptr(); AT_DISPATCH_INDEX_TYPES( keys.scalar_type(), "PartitionedCachePolicy::partition", ([&] { auto keys_ptr = keys.data_ptr(); namespace gb = graphbolt; gb::parallel_for_each(0, num_parts, 1, [&](int64_t tid) { const auto begin = tid * num_keys / num_parts; const auto end = (tid + 1) * num_keys / num_parts; for (int64_t i = begin; i < end; i++) { const auto part_id = PartAssignment(keys_ptr[i]); offsets_ptr[tid * num_parts + part_id]++; part_id_ptr[i] = part_id; } }); })); // Transpose the offsets tensor, take cumsum and transpose back. auto offsets_permuted = torch::empty_like(offsets); auto offsets_permuted_ptr = offsets_permuted.data_ptr(); graphbolt::parallel_for_each( 0, num_parts * num_parts, kIntGrainSize, [&](int64_t i) { const auto part_id = i % num_parts; const auto tid = i / num_parts; // + 1 so that we have exclusive_scan after torch.cumsum(). offsets_permuted_ptr[part_id * num_parts + tid + 1] = offsets_ptr[i]; }); offsets_permuted_ptr[0] = 0; // offsets = offsets_permuted.cumsum(0); @TODO implement this in parallel. std::inclusive_scan( offsets_permuted_ptr, offsets_permuted_ptr + num_parts * num_parts + 1, offsets_ptr); offsets_ptr = offsets.data_ptr(); graphbolt::parallel_for_each( 0, num_parts * num_parts, kIntGrainSize, [&](int64_t i) { const auto part_id = i % num_parts; const auto tid = i / num_parts; offsets_permuted_ptr[i] = offsets_ptr[part_id * num_parts + tid]; }); auto indices_ptr = indices.data_ptr(); auto permuted_keys = torch::empty_like(keys); auto offsets_sliced = torch::empty(num_parts + 1, offsets.options()); auto offsets_sliced_ptr = offsets_sliced.data_ptr(); offsets_sliced_ptr[0] = 0; AT_DISPATCH_INDEX_TYPES( keys.scalar_type(), "PartitionedCachePolicy::partition", ([&] { auto keys_ptr = keys.data_ptr(); auto permuted_keys_ptr = permuted_keys.data_ptr(); namespace gb = graphbolt; gb::parallel_for_each(0, num_parts, 1, [&](int64_t tid) { const auto begin = tid * num_keys / num_parts; const auto end = (tid + 1) * num_keys / num_parts; for (int64_t i = begin; i < end; i++) { const auto part_id = part_id_ptr[i]; auto& offset = offsets_permuted_ptr[tid * num_parts + part_id]; indices_ptr[offset] = i; permuted_keys_ptr[offset++] = keys_ptr[i]; } offsets_sliced_ptr[tid + 1] = offsets_ptr[(tid + 1) * num_parts]; }); })); return {offsets_sliced, indices, permuted_keys}; } std::tuple< torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor> PartitionedCachePolicy::Query(torch::Tensor keys, const int64_t offset) { keys = AddOffset(keys, offset); if (policies_.size() == 1) { std::lock_guard lock(mtx_); auto [positions, output_indices, missing_keys, found_pointers] = policies_[0]->Query(keys); auto found_and_missing_offsets = torch::empty(4, found_pointers.options()); auto found_and_missing_offsets_ptr = found_and_missing_offsets.data_ptr(); // Found offsets part. found_and_missing_offsets_ptr[0] = 0; found_and_missing_offsets_ptr[1] = found_pointers.size(0); // Missing offsets part. found_and_missing_offsets_ptr[2] = 0; found_and_missing_offsets_ptr[3] = missing_keys.size(0); auto found_offsets = found_and_missing_offsets.slice(0, 0, 2); auto missing_offsets = found_and_missing_offsets.slice(0, 2); missing_keys = AddOffset(missing_keys, -offset); return {positions, output_indices, missing_keys, found_pointers, found_offsets, missing_offsets}; }; torch::Tensor offsets, indices, permuted_keys; std::tie(offsets, indices, permuted_keys) = Partition(keys); auto offsets_ptr = offsets.data_ptr(); auto indices_ptr = indices.data_ptr(); std::vector< std::tuple> results(policies_.size()); torch::Tensor result_offsets_tensor = torch::empty(policies_.size() * 2 + 1, offsets.options()); auto result_offsets = result_offsets_tensor.data_ptr(); namespace gb = graphbolt; { std::lock_guard lock(mtx_); gb::parallel_for_each(0, policies_.size(), 1, [&](int64_t tid) { const auto begin = offsets_ptr[tid]; const auto end = offsets_ptr[tid + 1]; results[tid] = policies_.at(tid)->Query(permuted_keys.slice(0, begin, end)); result_offsets[tid] = std::get<0>(results[tid]).size(0); result_offsets[tid + policies_.size()] = std::get<2>(results[tid]).size(0); }); } std::exclusive_scan( result_offsets, result_offsets + result_offsets_tensor.size(0), result_offsets, 0); torch::Tensor positions = torch::empty( result_offsets[policies_.size()], std::get<0>(results[0]).options().pinned_memory(utils::is_pinned(keys))); torch::Tensor output_indices = torch::empty_like( indices, indices.options().pinned_memory(utils::is_pinned(keys))); torch::Tensor missing_keys = torch::empty( indices.size(0) - positions.size(0), std::get<2>(results[0]).options().pinned_memory(utils::is_pinned(keys))); torch::Tensor found_pointers = torch::empty( positions.size(0), std::get<3>(results[0]).options().pinned_memory(utils::is_pinned(keys))); auto missing_offsets = torch::empty(policies_.size() + 1, result_offsets_tensor.options()); auto output_indices_ptr = output_indices.data_ptr(); auto missing_offsets_ptr = missing_offsets.data_ptr(); missing_offsets_ptr[0] = 0; gb::parallel_for_each(0, policies_.size(), 1, [&](int64_t tid) { auto out_index_ptr = indices_ptr + offsets_ptr[tid]; auto begin = result_offsets[tid]; auto end = result_offsets[tid + 1]; const auto num_selected = end - begin; auto indices_ptr = std::get<1>(results[tid]).data_ptr(); for (int64_t i = 0; i < num_selected; i++) { output_indices_ptr[begin + i] = out_index_ptr[indices_ptr[i]]; } auto selected_positions_ptr = std::get<0>(results[tid]).data_ptr(); std::transform( selected_positions_ptr, selected_positions_ptr + num_selected, positions.data_ptr() + begin, [off = tid * capacity_ / policies_.size()](auto x) { return x + off; }); auto selected_pointers_ptr = std::get<3>(results[tid]).data_ptr(); std::copy( selected_pointers_ptr, selected_pointers_ptr + num_selected, found_pointers.data_ptr() + begin); begin = result_offsets[policies_.size() + tid]; end = result_offsets[policies_.size() + tid + 1]; missing_offsets[tid + 1] = end - result_offsets[policies_.size()]; const auto num_missing = end - begin; for (int64_t i = 0; i < num_missing; i++) { output_indices_ptr[begin + i] = out_index_ptr[indices_ptr[i + num_selected]]; } std::memcpy( reinterpret_cast(missing_keys.data_ptr()) + (begin - positions.size(0)) * missing_keys.element_size(), std::get<2>(results[tid]).data_ptr(), num_missing * missing_keys.element_size()); }); auto found_offsets = result_offsets_tensor.slice(0, 0, policies_.size() + 1); missing_keys = AddOffset(missing_keys, -offset); return std::make_tuple( positions, output_indices, missing_keys, found_pointers, found_offsets, missing_offsets); } c10::intrusive_ptr>> PartitionedCachePolicy::QueryAsync(torch::Tensor keys, const int64_t offset) { return async([=] { auto [positions, output_indices, missing_keys, found_pointers, found_offsets, missing_offsets] = Query(keys, offset); return std::vector{positions, output_indices, missing_keys, found_pointers, found_offsets, missing_offsets}; }); } std::tuple< torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor> PartitionedCachePolicy::QueryAndReplace( torch::Tensor keys, const int64_t offset) { keys = AddOffset(keys, offset); if (policies_.size() == 1) { std::lock_guard lock(mtx_); auto [positions, output_indices, pointers, missing_keys] = policies_[0]->QueryAndReplace(keys); auto found_and_missing_offsets = torch::empty(4, pointers.options()); auto found_and_missing_offsets_ptr = found_and_missing_offsets.data_ptr(); // Found offsets part. found_and_missing_offsets_ptr[0] = 0; found_and_missing_offsets_ptr[1] = keys.size(0) - missing_keys.size(0); // Missing offsets part. found_and_missing_offsets_ptr[2] = 0; found_and_missing_offsets_ptr[3] = missing_keys.size(0); auto found_offsets = found_and_missing_offsets.slice(0, 0, 2); auto missing_offsets = found_and_missing_offsets.slice(0, 2); missing_keys = AddOffset(missing_keys, -offset); return {positions, output_indices, pointers, missing_keys, found_offsets, missing_offsets}; } torch::Tensor offsets, indices, permuted_keys; std::tie(offsets, indices, permuted_keys) = Partition(keys); auto offsets_ptr = offsets.data_ptr(); auto indices_ptr = indices.data_ptr(); std::vector< std::tuple> results(policies_.size()); torch::Tensor result_offsets_tensor = torch::empty(policies_.size() * 2 + 1, offsets.options()); auto result_offsets = result_offsets_tensor.data_ptr(); namespace gb = graphbolt; { std::lock_guard lock(mtx_); gb::parallel_for_each(0, policies_.size(), 1, [&](int64_t tid) { const auto begin = offsets_ptr[tid]; const auto end = offsets_ptr[tid + 1]; results[tid] = policies_.at(tid)->QueryAndReplace( permuted_keys.slice(0, begin, end)); const auto missing_cnt = std::get<3>(results[tid]).size(0); result_offsets[tid] = end - begin - missing_cnt; result_offsets[tid + policies_.size()] = missing_cnt; }); } std::exclusive_scan( result_offsets, result_offsets + result_offsets_tensor.size(0), result_offsets, 0); torch::Tensor positions = torch::empty( keys.size(0), std::get<0>(results[0]).options().pinned_memory(utils::is_pinned(keys))); torch::Tensor output_indices = torch::empty_like( indices, indices.options().pinned_memory(utils::is_pinned(keys))); torch::Tensor pointers = torch::empty( keys.size(0), std::get<2>(results[0]).options().pinned_memory(utils::is_pinned(keys))); torch::Tensor missing_keys = torch::empty( result_offsets[2 * policies_.size()] - result_offsets[policies_.size()], std::get<3>(results[0]).options().pinned_memory(utils::is_pinned(keys))); auto missing_offsets = torch::empty(policies_.size() + 1, result_offsets_tensor.options()); auto positions_ptr = positions.data_ptr(); auto output_indices_ptr = output_indices.data_ptr(); auto pointers_ptr = pointers.data_ptr(); auto missing_offsets_ptr = missing_offsets.data_ptr(); missing_offsets_ptr[0] = 0; gb::parallel_for_each(0, policies_.size(), 1, [&](int64_t tid) { auto out_index_ptr = indices_ptr + offsets_ptr[tid]; auto begin = result_offsets[tid]; auto end = result_offsets[tid + 1]; const auto num_selected = end - begin; auto indices_ptr = std::get<1>(results[tid]).data_ptr(); for (int64_t i = 0; i < num_selected; i++) { output_indices_ptr[begin + i] = out_index_ptr[indices_ptr[i]]; } auto selected_positions_ptr = std::get<0>(results[tid]).data_ptr(); std::transform( selected_positions_ptr, selected_positions_ptr + num_selected, positions_ptr + begin, [off = tid * capacity_ / policies_.size()](auto x) { return x + off; }); auto selected_pointers_ptr = std::get<2>(results[tid]).data_ptr(); std::copy( selected_pointers_ptr, selected_pointers_ptr + num_selected, pointers_ptr + begin); begin = result_offsets[policies_.size() + tid]; end = result_offsets[policies_.size() + tid + 1]; missing_offsets[tid + 1] = end - result_offsets[policies_.size()]; const auto num_missing = end - begin; for (int64_t i = 0; i < num_missing; i++) { output_indices_ptr[begin + i] = out_index_ptr[indices_ptr[i + num_selected]]; } auto missing_positions_ptr = selected_positions_ptr + num_selected; std::transform( missing_positions_ptr, missing_positions_ptr + num_missing, positions_ptr + begin, [off = tid * capacity_ / policies_.size()](auto x) { return x + off; }); auto missing_pointers_ptr = selected_pointers_ptr + num_selected; std::copy( missing_pointers_ptr, missing_pointers_ptr + num_missing, pointers_ptr + begin); std::memcpy( reinterpret_cast(missing_keys.data_ptr()) + (begin - result_offsets[policies_.size()]) * missing_keys.element_size(), std::get<3>(results[tid]).data_ptr(), num_missing * missing_keys.element_size()); }); auto found_offsets = result_offsets_tensor.slice(0, 0, policies_.size() + 1); missing_keys = AddOffset(missing_keys, -offset); return std::make_tuple( positions, output_indices, pointers, missing_keys, found_offsets, missing_offsets); } c10::intrusive_ptr>> PartitionedCachePolicy::QueryAndReplaceAsync( torch::Tensor keys, const int64_t offset) { return async([=] { auto [positions, output_indices, pointers, missing_keys, found_offsets, missing_offsets] = QueryAndReplace(keys, offset); return std::vector{positions, output_indices, pointers, missing_keys, found_offsets, missing_offsets}; }); } std::tuple PartitionedCachePolicy::Replace( torch::Tensor keys, torch::optional offsets, const int64_t offset) { keys = AddOffset(keys, offset); if (policies_.size() == 1) { std::lock_guard lock(mtx_); auto [positions, pointers] = policies_[0]->Replace(keys); if (!offsets.has_value()) { offsets = torch::empty(2, pointers.options()); auto offsets_ptr = offsets->data_ptr(); offsets_ptr[0] = 0; offsets_ptr[1] = pointers.size(0); } return {positions, pointers, *offsets}; } const auto offsets_provided = offsets.has_value(); torch::Tensor indices, permuted_keys; if (!offsets_provided) { std::tie(offsets, indices, permuted_keys) = Partition(keys); } else { permuted_keys = keys; } auto output_positions = torch::empty_like( keys, keys.options() .dtype(torch::kInt64) .pinned_memory(utils::is_pinned(keys))); auto output_pointers = torch::empty_like( keys, keys.options() .dtype(torch::kInt64) .pinned_memory(utils::is_pinned(keys))); auto offsets_ptr = offsets->data_ptr(); auto indices_ptr = offsets_provided ? nullptr : indices.data_ptr(); auto output_positions_ptr = output_positions.data_ptr(); auto output_pointers_ptr = output_pointers.data_ptr(); namespace gb = graphbolt; std::unique_lock lock(mtx_); std::atomic semaphore = policies_.size(); gb::parallel_for_each(0, policies_.size(), 1, [&](int64_t tid) { const auto begin = offsets_ptr[tid]; const auto end = offsets_ptr[tid + 1]; auto [positions, pointers] = policies_.at(tid)->Replace(permuted_keys.slice(0, begin, end)); const auto ticket = semaphore.fetch_add(-1, std::memory_order_release) - 1; if (ticket == 0) { // This thread was the last thread in the critical region. lock.unlock(); } auto positions_ptr = positions.data_ptr(); const auto off = tid * capacity_ / policies_.size(); if (indices_ptr) { for (int64_t i = 0; i < positions.size(0); i++) { output_positions_ptr[indices_ptr[begin + i]] = positions_ptr[i] + off; } } else { std::transform( positions_ptr, positions_ptr + positions.size(0), output_positions_ptr + begin, [off](auto x) { return x + off; }); } auto pointers_ptr = pointers.data_ptr(); std::copy( pointers_ptr, pointers_ptr + pointers.size(0), output_pointers_ptr + begin); }); return {output_positions, output_pointers, *offsets}; } c10::intrusive_ptr>> PartitionedCachePolicy::ReplaceAsync( torch::Tensor keys, torch::optional offsets, const int64_t offset) { return async([=] { auto [positions, pointers, offsets_out] = Replace(keys, offsets, offset); return std::vector{positions, pointers, offsets_out}; }); } template void PartitionedCachePolicy::ReadingWritingCompletedImpl( torch::Tensor pointers, torch::Tensor offsets) { if (policies_.size() == 1) { if constexpr (write) policies_[0]->WritingCompleted(pointers); else policies_[0]->ReadingCompleted(pointers); return; } auto offsets_ptr = offsets.data_ptr(); namespace gb = graphbolt; gb::parallel_for_each(0, policies_.size(), 1, [&](int64_t tid) { const auto begin = offsets_ptr[tid]; const auto end = offsets_ptr[tid + 1]; if constexpr (write) policies_.at(tid)->WritingCompleted(pointers.slice(0, begin, end)); else policies_.at(tid)->ReadingCompleted(pointers.slice(0, begin, end)); }); } void PartitionedCachePolicy::ReadingCompleted( torch::Tensor pointers, torch::Tensor offsets) { ReadingWritingCompletedImpl(pointers, offsets); } void PartitionedCachePolicy::WritingCompleted( torch::Tensor pointers, torch::Tensor offsets) { ReadingWritingCompletedImpl(pointers, offsets); } c10::intrusive_ptr> PartitionedCachePolicy::ReadingCompletedAsync( torch::Tensor pointers, torch::Tensor offsets) { return async([=] { return ReadingCompleted(pointers, offsets); }); } c10::intrusive_ptr> PartitionedCachePolicy::WritingCompletedAsync( torch::Tensor pointers, torch::Tensor offsets) { return async([=] { return WritingCompleted(pointers, offsets); }); } template c10::intrusive_ptr PartitionedCachePolicy::Create( int64_t capacity, int64_t num_partitions) { static_assert(std::is_base_of_v); return c10::make_intrusive( CachePolicy(), capacity, num_partitions); } template c10::intrusive_ptr PartitionedCachePolicy::Create(int64_t, int64_t); template c10::intrusive_ptr PartitionedCachePolicy::Create(int64_t, int64_t); template c10::intrusive_ptr PartitionedCachePolicy::Create(int64_t, int64_t); template c10::intrusive_ptr PartitionedCachePolicy::Create(int64_t, int64_t); } // namespace storage } // namespace graphbolt ================================================ FILE: graphbolt/src/partitioned_cache_policy.h ================================================ /** * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file partitioned_cache_policy.h * @brief Partitioned cache policy implementation on the CPU. */ #ifndef GRAPHBOLT_PARTITIONED_CACHE_H_ #define GRAPHBOLT_PARTITIONED_CACHE_H_ #include #include #include #include #include #include #include #include #include "./cache_policy.h" namespace graphbolt { namespace storage { /** * @brief PartitionedCachePolicy works by partitioning the key space to a set * number of partitions that is provided as the second argument of its * constructor. Since the partitioning is random but deterministic, the caching * policy performance is not affected as the key distribution stays the same in * each partition. **/ class PartitionedCachePolicy : public torch::CustomClassHolder { public: /** * @brief The policy query function. * @param capacity The capacity of the cache. * @param num_partitions The number of caching policies instantiated in a * one-to-one mapping to each partition. */ template PartitionedCachePolicy(CachePolicy, int64_t capacity, int64_t num_partitions); /** * @brief The policy query function. * @param keys The keys to query the cache. * @param offset The offset to be added to the keys. * * @return (positions, indices, missing_keys, found_ptrs, found_offsets, * missing_offsets), where positions has the locations of the keys which were * found in the cache, missing_keys has the keys that were not found and * indices is defined such that keys[indices[:positions.size(0)]] gives us the * keys for the found pointers and keys[indices[positions.size(0):]] is * identical to missing_keys. The found_offsets tensor holds the partition * offsets for the found pointers. The missing_offsets holds the partition * offsets for the missing_keys. */ std::tuple< torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor> Query(torch::Tensor keys, int64_t offset); c10::intrusive_ptr>> QueryAsync( torch::Tensor keys, int64_t offset); /** * @brief The policy query and then replace function. * @param keys The keys to query the cache. * @param offset The offset to be added to the keys. * * @return (positions, indices, pointers, missing_keys, found_offsets, * missing_offsets), where positions has the locations of the keys which were * emplaced into the cache, pointers point to the emplaced CacheKey pointers * in the cache, missing_keys has the keys that were not found and just * inserted and indices is defined such that keys[indices[:keys.size(0) - * missing_keys.size(0)]] gives us the keys for the found keys and * keys[indices[keys.size(0) - missing_keys.size(0):]] is identical to * missing_keys. The found_offsets tensor holds the partition offsets for the * found pointers. The missing_offsets holds the partition offsets for the * missing_keys and missing pointers. */ std::tuple< torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor> QueryAndReplace(torch::Tensor keys, int64_t offset); c10::intrusive_ptr>> QueryAndReplaceAsync( torch::Tensor keys, int64_t offset); /** * @brief The policy replace function. * @param keys The keys to query the cache. * @param offsets The partition offsets for the keys. * @param offset The offset to be added to the keys. * * @return (positions, pointers, offsets), where positions holds the locations * of the replaced entries in the cache, pointers holds the CacheKey pointers * for the inserted keys and offsets holds the partition offsets for pointers. */ std::tuple Replace( torch::Tensor keys, torch::optional offsets, int64_t offset); c10::intrusive_ptr>> ReplaceAsync( torch::Tensor keys, torch::optional offsets, int64_t offset); template void ReadingWritingCompletedImpl( torch::Tensor pointers, torch::Tensor offsets); /** * @brief A reader has finished reading these keys, so they can be * evicted. * @param pointers The CacheKey pointers in the cache to unmark. * @param offsets The partition offsets for the pointers. */ void ReadingCompleted(torch::Tensor pointers, torch::Tensor offsets); /** * @brief A writer has finished writing these keys, so they can be evicted. * @param pointers The CacheKey pointers in the cache to unmark. * @param offsets The partition offsets for the pointers. */ void WritingCompleted(torch::Tensor pointers, torch::Tensor offsets); c10::intrusive_ptr> ReadingCompletedAsync( torch::Tensor pointers, torch::Tensor offsets); c10::intrusive_ptr> WritingCompletedAsync( torch::Tensor pointers, torch::Tensor offsets); template static c10::intrusive_ptr Create( int64_t capacity, int64_t num_partitions); private: static constexpr uint64_t seed = 1e9 + 7; /** * @brief Deterministic assignment of keys to different parts. */ int32_t PartAssignment(int64_t key) { pcg32 rng(seed, key); std::uniform_int_distribution dist(0, policies_.size() - 1); return dist(rng); } /** * @brief The partition function for a given keys tensor. * @param keys The keys to query the cache. * * @return (offsets, indices, permuted_keys), the returned tensors have the * following properties: * permuted_keys[offsets[i]: offsets[i + 1]] belong to part i and * keys[indices] == permuted_keys */ std::tuple Partition( torch::Tensor keys); int64_t capacity_; std::vector> policies_; std::mutex mtx_; }; } // namespace storage } // namespace graphbolt #endif // GRAPHBOLT_PARTITIONED_CACHE_H_ ================================================ FILE: graphbolt/src/python_binding.cc ================================================ /** * Copyright (c) 2023 by Contributors * @file python_binding.cc * @brief Graph bolt library Python binding. */ #include #include #include #include #ifdef GRAPHBOLT_USE_CUDA #include "./cuda/cooperative_minibatching_utils.h" #include "./cuda/max_uva_threads.h" #endif #include "./cnumpy.h" #include "./feature_cache.h" #include "./index_select.h" #include "./io_uring.h" #include "./partitioned_cache_policy.h" #include "./random.h" #include "./utils.h" #ifdef GRAPHBOLT_USE_CUDA #include "./cuda/extension/gpu_cache.h" #include "./cuda/extension/gpu_graph_cache.h" #endif namespace graphbolt { namespace sampling { TORCH_LIBRARY(graphbolt, m) { m.class_("FusedSampledSubgraph") .def(torch::init<>()) .def_readwrite("indptr", &FusedSampledSubgraph::indptr) .def_readwrite("indices", &FusedSampledSubgraph::indices) .def_readwrite( "original_row_node_ids", &FusedSampledSubgraph::original_row_node_ids) .def_readwrite( "original_column_node_ids", &FusedSampledSubgraph::original_column_node_ids) .def_readwrite( "original_edge_ids", &FusedSampledSubgraph::original_edge_ids) .def_readwrite("type_per_edge", &FusedSampledSubgraph::type_per_edge) .def_readwrite("etype_offsets", &FusedSampledSubgraph::etype_offsets); m.class_>("VoidFuture").def("wait", &Future::Wait); m.class_>("TensorFuture") .def("wait", &Future::Wait); m.class_>>("TensorListFuture") .def("wait", &Future>::Wait); m.class_>>( "FusedSampledSubgraphFuture") .def("wait", &Future>::Wait); m.class_>>>( "UniqueAndCompactBatchedFuture") .def( "wait", &Future>>:: Wait); m.class_>>>( "RankSortFuture") .def( "wait", &Future>>::Wait); m.class_>>( "GpuGraphCacheQueryFuture") .def( "wait", &Future>:: Wait); m.class_>>>( "GpuGraphCacheReplaceFuture") .def( "wait", &Future>>::Wait); m.class_("OnDiskNpyArray") .def("index_select", &storage::OnDiskNpyArray::IndexSelect); m.class_("FusedCSCSamplingGraph") .def("num_nodes", &FusedCSCSamplingGraph::NumNodes) .def("num_edges", &FusedCSCSamplingGraph::NumEdges) .def("csc_indptr", &FusedCSCSamplingGraph::CSCIndptr) .def("indices", &FusedCSCSamplingGraph::Indices) .def("node_type_offset", &FusedCSCSamplingGraph::NodeTypeOffset) .def("type_per_edge", &FusedCSCSamplingGraph::TypePerEdge) .def("node_type_to_id", &FusedCSCSamplingGraph::NodeTypeToID) .def("edge_type_to_id", &FusedCSCSamplingGraph::EdgeTypeToID) .def("node_attributes", &FusedCSCSamplingGraph::NodeAttributes) .def("edge_attributes", &FusedCSCSamplingGraph::EdgeAttributes) .def("node_attribute", &FusedCSCSamplingGraph::NodeAttribute) .def("edge_attribute", &FusedCSCSamplingGraph::EdgeAttribute) .def("set_csc_indptr", &FusedCSCSamplingGraph::SetCSCIndptr) .def("set_indices", &FusedCSCSamplingGraph::SetIndices) .def("set_node_type_offset", &FusedCSCSamplingGraph::SetNodeTypeOffset) .def("set_type_per_edge", &FusedCSCSamplingGraph::SetTypePerEdge) .def("set_node_type_to_id", &FusedCSCSamplingGraph::SetNodeTypeToID) .def("set_edge_type_to_id", &FusedCSCSamplingGraph::SetEdgeTypeToID) .def("set_node_attributes", &FusedCSCSamplingGraph::SetNodeAttributes) .def("set_edge_attributes", &FusedCSCSamplingGraph::SetEdgeAttributes) .def("add_node_attribute", &FusedCSCSamplingGraph::AddNodeAttribute) .def("add_edge_attribute", &FusedCSCSamplingGraph::AddEdgeAttribute) .def("in_subgraph", &FusedCSCSamplingGraph::InSubgraph) .def("sample_neighbors", &FusedCSCSamplingGraph::SampleNeighbors) .def( "sample_neighbors_async", &FusedCSCSamplingGraph::SampleNeighborsAsync) .def( "temporal_sample_neighbors", &FusedCSCSamplingGraph::TemporalSampleNeighbors) .def("copy_to_shared_memory", &FusedCSCSamplingGraph::CopyToSharedMemory) .def_pickle( // __getstate__ [](const c10::intrusive_ptr& self) -> torch::Dict< std::string, torch::Dict> { return self->GetState(); }, // __setstate__ [](torch::Dict> state) -> c10::intrusive_ptr { auto g = c10::make_intrusive(); g->SetState(state); return g; }); #ifdef GRAPHBOLT_USE_CUDA m.class_("GpuCache") .def("query", &cuda::GpuCache::Query) .def("query_async", &cuda::GpuCache::QueryAsync) .def("replace", &cuda::GpuCache::Replace); m.def("gpu_cache", &cuda::GpuCache::Create); m.class_("GpuGraphCache") .def("query", &cuda::GpuGraphCache::Query) .def("query_async", &cuda::GpuGraphCache::QueryAsync) .def("replace", &cuda::GpuGraphCache::Replace) .def("replace_async", &cuda::GpuGraphCache::ReplaceAsync); m.def("gpu_graph_cache", &cuda::GpuGraphCache::Create); #endif m.def("fused_csc_sampling_graph", &FusedCSCSamplingGraph::Create); m.class_("PartitionedCachePolicy") .def("query", &storage::PartitionedCachePolicy::Query) .def("query_async", &storage::PartitionedCachePolicy::QueryAsync) .def( "query_and_replace", &storage::PartitionedCachePolicy::QueryAndReplace) .def( "query_and_replace_async", &storage::PartitionedCachePolicy::QueryAndReplaceAsync) .def("replace", &storage::PartitionedCachePolicy::Replace) .def("replace_async", &storage::PartitionedCachePolicy::ReplaceAsync) .def( "reading_completed", &storage::PartitionedCachePolicy::ReadingCompleted) .def( "reading_completed_async", &storage::PartitionedCachePolicy::ReadingCompletedAsync) .def( "writing_completed", &storage::PartitionedCachePolicy::WritingCompleted) .def( "writing_completed_async", &storage::PartitionedCachePolicy::WritingCompletedAsync); m.def( "s3_fifo_cache_policy", &storage::PartitionedCachePolicy::Create); m.def( "sieve_cache_policy", &storage::PartitionedCachePolicy::Create); m.def( "lru_cache_policy", &storage::PartitionedCachePolicy::Create); m.def( "clock_cache_policy", &storage::PartitionedCachePolicy::Create); m.class_("FeatureCache") .def("is_pinned", &storage::FeatureCache::IsPinned) .def_property("nbytes", &storage::FeatureCache::NumBytes) .def("index_select", &storage::FeatureCache::IndexSelect) .def("query", &storage::FeatureCache::Query) .def("query_async", &storage::FeatureCache::QueryAsync) .def("replace", &storage::FeatureCache::Replace) .def("replace_async", &storage::FeatureCache::ReplaceAsync); m.def("feature_cache", &storage::FeatureCache::Create); m.def( "load_from_shared_memory", &FusedCSCSamplingGraph::LoadFromSharedMemory); m.def("unique_and_compact", &UniqueAndCompact); m.def("unique_and_compact_batched", &UniqueAndCompactBatched); m.def("unique_and_compact_batched_async", &UniqueAndCompactBatchedAsync); m.def("isin", &IsIn); m.def("is_not_in_index", &IsNotInIndex); m.def("is_not_in_index_async", &IsNotInIndexAsync); m.def("index_select", &ops::IndexSelect); m.def("index_select_async", &ops::IndexSelectAsync); m.def("scatter_async", &ops::ScatterAsync); m.def("index_select_csc", &ops::IndexSelectCSC); m.def("index_select_csc_batched", &ops::IndexSelectCSCBatched); m.def("index_select_csc_batched_async", &ops::IndexSelectCSCBatchedAsync); m.def("ondisk_npy_array", &storage::OnDiskNpyArray::Create); m.def("detect_io_uring", &io_uring::IsAvailable); m.def("set_num_io_uring_threads", &io_uring::SetNumThreads); m.def("set_worker_id", &utils::SetWorkerId); m.def("set_seed", &RandomEngine::SetManualSeed); #ifdef GRAPHBOLT_USE_CUDA m.def("set_max_uva_threads", &cuda::set_max_uva_threads); m.def("rank_sort", &cuda::RankSort); m.def("rank_sort_async", &cuda::RankSortAsync); #endif #ifdef HAS_IMPL_ABSTRACT_PYSTUB m.impl_abstract_pystub("dgl.graphbolt.base", "//dgl.graphbolt.base"); #endif m.def( "expand_indptr(Tensor indptr, ScalarType dtype, Tensor? node_ids, " "SymInt? output_size) -> Tensor" #ifdef HAS_PT2_COMPLIANT_TAG , {at::Tag::pt2_compliant_tag} #endif ); m.def( "indptr_edge_ids(Tensor indptr, ScalarType dtype, Tensor? offset, " "SymInt? output_size) -> " "Tensor" #ifdef HAS_PT2_COMPLIANT_TAG , {at::Tag::pt2_compliant_tag} #endif ); } } // namespace sampling } // namespace graphbolt ================================================ FILE: graphbolt/src/random.cc ================================================ /** * Copyright (c) 2023 by Contributors * @file random.cc * @brief Random Engine. */ #include "./random.h" #include namespace graphbolt { namespace { // Get a unique integer ID representing this thread. inline uint32_t GetThreadId() { static int num_threads = 0; static std::mutex mutex; static thread_local int id = -1; if (id == -1) { std::lock_guard guard(mutex); id = num_threads; num_threads++; } return id; } }; // namespace std::mutex RandomEngine::manual_seed_mutex; std::optional RandomEngine::manual_seed; /** @brief Constructor with default seed. */ RandomEngine::RandomEngine() { std::random_device rd; std::lock_guard lock(manual_seed_mutex); if (!manual_seed.has_value()) manual_seed = rd(); SetSeed(manual_seed.value()); } /** @brief Constructor with given seed. */ RandomEngine::RandomEngine(uint64_t seed) : RandomEngine(seed, GetThreadId()) {} /** @brief Constructor with given seed. */ RandomEngine::RandomEngine(uint64_t seed, uint64_t stream) { SetSeed(seed, stream); } /** @brief Get the thread-local random number generator instance. */ RandomEngine* RandomEngine::ThreadLocal() { static thread_local RandomEngine engine; return &engine; } /** @brief Set the seed. */ void RandomEngine::SetSeed(uint64_t seed) { SetSeed(seed, GetThreadId()); } /** @brief Set the seed. */ void RandomEngine::SetSeed(uint64_t seed, uint64_t stream) { rng_.seed(seed, stream); } /** @brief Manually fix the seed. */ void RandomEngine::SetManualSeed(int64_t seed) { // Intentionally set the seed for current thread also. RandomEngine::ThreadLocal()->SetSeed(seed); std::lock_guard lock(manual_seed_mutex); manual_seed = seed; } } // namespace graphbolt ================================================ FILE: graphbolt/src/random.h ================================================ /** * Copyright (c) 2023 by Contributors * * @file random.h * @brief Random Engine class. */ #ifndef GRAPHBOLT_RANDOM_H_ #define GRAPHBOLT_RANDOM_H_ #include #include #include #include #include namespace graphbolt { /** * @brief Thread-local Random Number Generator class. */ class RandomEngine { public: /** @brief Constructor with default seed. */ RandomEngine(); /** @brief Constructor with given seed. */ explicit RandomEngine(uint64_t seed); explicit RandomEngine(uint64_t seed, uint64_t stream); /** @brief Get the thread-local random number generator instance. */ static RandomEngine* ThreadLocal(); /** @brief Set the seed. */ void SetSeed(uint64_t seed); void SetSeed(uint64_t seed, uint64_t stream); /** @brief Protect manual seed accesses. */ static std::mutex manual_seed_mutex; /** @brief Manually fix the seed. */ static std::optional manual_seed; static void SetManualSeed(int64_t seed); /** * @brief Generate a uniform random integer in [low, high). */ template T RandInt(T lower, T upper) { std::uniform_int_distribution dist(lower, upper - 1); return dist(rng_); } /** * @brief Generate a uniform random real number in [low, high). */ template T Uniform(T lower, T upper) { std::uniform_real_distribution dist(lower, upper); return dist(rng_); } /** * @brief Generate random non-negative floating-point values according to * exponential distribution. Probability density function: P(x|λ) = λe^(-λx). */ template T Exponential(T lambda) { std::exponential_distribution dist(lambda); return dist(rng_); } private: pcg32 rng_; }; } // namespace graphbolt #endif // GRAPHBOLT_RANDOM_H_ ================================================ FILE: graphbolt/src/serialize.cc ================================================ /** * Copyright (c) 2023 by Contributors * @file graphbolt/src/serialize.cc * @brief Source file of serialize. */ #include #include namespace torch { serialize::InputArchive& operator>>( serialize::InputArchive& archive, graphbolt::sampling::FusedCSCSamplingGraph& graph) { graph.Load(archive); return archive; } serialize::OutputArchive& operator<<( serialize::OutputArchive& archive, const graphbolt::sampling::FusedCSCSamplingGraph& graph) { graph.Save(archive); return archive; } } // namespace torch ================================================ FILE: graphbolt/src/shared_memory.cc ================================================ /** * Copyright (c) 2023 by Contributors * @file shared_memory.cc * @brief Source file of graphbolt shared memory. */ #ifndef _WIN32 #include #include #include #include #endif // !_WIN32 #include #include #include #include namespace graphbolt { namespace sampling { // Two processes opening the same path are guaranteed to access the same shared // memory object if and only if path begins with a slash ('/') character. constexpr char kSharedMemNamePrefix[] = "/dgl.graphbolt."; constexpr char kSharedMemNameSuffix[] = ".lock"; // A prefix and a suffix are added to the name of the shared memory to create // the name of the shared memory object. inline std::string DecorateName(const std::string& name) { return kSharedMemNamePrefix + name + kSharedMemNameSuffix; } SharedMemory::SharedMemory(const std::string& name) : name_(name), size_(0), ptr_(nullptr) { #ifdef _WIN32 this->handle_ = nullptr; #else // _WIN32 this->file_descriptor_ = -1; this->is_creator_ = false; #endif // _WIN32 } #ifdef _WIN32 SharedMemory::~SharedMemory() { if (ptr_) CHECK(UnmapViewOfFile(ptr_)) << "Win32 Error: " << GetLastError(); if (handle_) CloseHandle(handle_); } void* SharedMemory::Create(size_t size) { size_ = size; std::string decorated_name = DecorateName(name_); handle_ = CreateFileMapping( INVALID_HANDLE_VALUE, nullptr, PAGE_READWRITE, static_cast(size >> 32), static_cast(size & 0xFFFFFFFF), decorated_name.c_str()); TORCH_CHECK( handle_ != nullptr, "Failed to open ", decorated_name, ", Win32 error: ", GetLastError()); ptr_ = MapViewOfFile(handle_, FILE_MAP_ALL_ACCESS, 0, 0, size); TORCH_CHECK( ptr_ != nullptr, "Memory mapping failed, Win32 error: ", GetLastError()); return ptr_; } void* SharedMemory::Open() { std::string decorated_name = DecorateName(name_); handle_ = OpenFileMapping(FILE_MAP_ALL_ACCESS, FALSE, decorated_name.c_str()); TORCH_CHECK( handle_ != nullptr, "Failed to open ", decorated_name, ", Win32 Error: ", GetLastError()); ptr_ = MapViewOfFile(handle_, FILE_MAP_ALL_ACCESS, 0, 0, 0); TORCH_CHECK( ptr_ != nullptr, "Memory mapping failed, Win32 error: ", GetLastError()); // Obtain the size of the memory-mapped file. MEMORY_BASIC_INFORMATION memInfo; TORCH_CHECK( VirtualQuery(ptr_, &memInfo, sizeof(memInfo)) != 0, "Failed to get the size of shared memory: ", GetLastError()); size_ = static_cast(memInfo.RegionSize); return ptr_; } bool SharedMemory::Exists(const std::string& name) { std::string decorated_name = DecorateName(name); HANDLE handle = OpenFileMapping(FILE_MAP_ALL_ACCESS, FALSE, decorated_name.c_str()); bool exists = handle != nullptr; if (exists) { CloseHandle(handle); } return exists; } #else // _WIN32 SharedMemory::~SharedMemory() { if (ptr_ && size_ != 0) CHECK(munmap(ptr_, size_) != -1) << strerror(errno); if (file_descriptor_ != -1) close(file_descriptor_); std::string decorated_name = DecorateName(name_); if (is_creator_ && decorated_name != "") shm_unlink(decorated_name.c_str()); } void *SharedMemory::Create(size_t size) { size_ = size; is_creator_ = true; // TODO(zhenkun): handle the error properly if the shared memory object // already exists. std::string decorated_name = DecorateName(name_); file_descriptor_ = shm_open(decorated_name.c_str(), O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); TORCH_CHECK(file_descriptor_ != -1, "Failed to open: ", strerror(errno)); auto status = ftruncate(file_descriptor_, size); TORCH_CHECK(status != -1, "Failed to truncate the file: ", strerror(errno)); ptr_ = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, file_descriptor_, 0); TORCH_CHECK( ptr_ != MAP_FAILED, "Failed to map shared memory, mmap failed with error: ", strerror(errno)); return ptr_; } void *SharedMemory::Open() { std::string decorated_name = DecorateName(name_); file_descriptor_ = shm_open(decorated_name.c_str(), O_RDWR, S_IRUSR | S_IWUSR); TORCH_CHECK( file_descriptor_ != -1, "Failed to open ", decorated_name, ": ", strerror(errno)); struct stat shm_stat; TORCH_CHECK( fstat(file_descriptor_, &shm_stat) == 0, "Failed to get the size of shared memory: ", strerror(errno)); size_ = shm_stat.st_size; ptr_ = mmap( NULL, size_, PROT_READ | PROT_WRITE, MAP_SHARED, file_descriptor_, 0); TORCH_CHECK( ptr_ != MAP_FAILED, "Failed to map shared memory, mmap failed with error: ", strerror(errno)); return ptr_; } bool SharedMemory::Exists(const std::string &name) { std::string decorated_name = DecorateName(name); int file_descriptor = shm_open(decorated_name.c_str(), O_RDONLY, S_IRUSR | S_IWUSR); bool exists = file_descriptor > 0; if (exists) { close(file_descriptor); } return exists; } #endif // _WIN32 } // namespace sampling } // namespace graphbolt ================================================ FILE: graphbolt/src/shared_memory_helper.cc ================================================ /** * Copyright (c) 2023 by Contributors * * @file shared_memory_helper.cc * @brief Share memory helper implementation. */ #include "./shared_memory_helper.h" #include #include #include #include #include #include #include namespace graphbolt { namespace sampling { static std::string GetSharedMemoryMetadataName(const std::string& name) { return name + "_metadata"; } static std::string GetSharedMemoryDataName(const std::string& name) { return name + "_data"; } // To avoid unaligned memory access, we round the size of the binary buffer to // the nearest multiple of 8 bytes. inline static int64_t GetRoundedSize(int64_t size) { constexpr int64_t ALIGNED_SIZE = 8; return (size + ALIGNED_SIZE - 1) / ALIGNED_SIZE * ALIGNED_SIZE; } SharedMemoryHelper::SharedMemoryHelper(const std::string& name) : name_(name), metadata_size_(0), data_size_(0), metadata_shared_memory_(nullptr), data_shared_memory_(nullptr), metadata_offset_(0), data_offset_(0) {} void SharedMemoryHelper::InitializeRead() { metadata_offset_ = 0; data_offset_ = 0; if (metadata_shared_memory_ == nullptr) { // Reader process opens the shared memory. metadata_shared_memory_ = std::make_unique(GetSharedMemoryMetadataName(name_)); metadata_shared_memory_->Open(); metadata_size_ = metadata_shared_memory_->GetSize(); data_shared_memory_ = std::make_unique(GetSharedMemoryDataName(name_)); data_shared_memory_->Open(); data_size_ = data_shared_memory_->GetSize(); } } void SharedMemoryHelper::WriteTorchArchive( torch::serialize::OutputArchive&& archive) { metadata_to_write_.emplace_back(std::move(archive)); } torch::serialize::InputArchive SharedMemoryHelper::ReadTorchArchive() { auto metadata_ptr = this->GetCurrentMetadataPtr(); int64_t metadata_size = static_cast(metadata_ptr)[0]; torch::serialize::InputArchive archive; archive.load_from( static_cast(metadata_ptr) + sizeof(int64_t), metadata_size); auto rounded_size = GetRoundedSize(metadata_size); this->MoveMetadataPtr(sizeof(int64_t) + rounded_size); return archive; } void SharedMemoryHelper::WriteTorchTensor( torch::optional tensor) { torch::serialize::OutputArchive archive; archive.write("has_value", tensor.has_value()); if (tensor.has_value()) { archive.write("shape", tensor.value().sizes()); archive.write("dtype", tensor.value().scalar_type()); } this->WriteTorchArchive(std::move(archive)); tensors_to_write_.push_back(tensor); } torch::optional SharedMemoryHelper::ReadTorchTensor() { auto archive = this->ReadTorchArchive(); bool has_value = read_from_archive(archive, "has_value"); if (has_value) { auto shape = read_from_archive>(archive, "shape"); auto dtype = read_from_archive(archive, "dtype"); auto data_ptr = this->GetCurrentDataPtr(); auto tensor = torch::from_blob(data_ptr, shape, dtype); auto rounded_size = GetRoundedSize(tensor.numel() * tensor.element_size()); this->MoveDataPtr(rounded_size); return tensor; } else { return torch::nullopt; } } void SharedMemoryHelper::WriteTorchTensorDict( torch::optional> tensor_dict) { torch::serialize::OutputArchive archive; if (!tensor_dict.has_value()) { archive.write("has_value", false); this->WriteTorchArchive(std::move(archive)); return; } archive.write("has_value", true); auto dict_value = tensor_dict.value(); archive.write("num_tensors", static_cast(dict_value.size())); int counter = 0; for (auto it = dict_value.begin(); it != dict_value.end(); ++it) { archive.write(std::string("key_") + std::to_string(counter), it->key()); counter++; } this->WriteTorchArchive(std::move(archive)); for (auto it = dict_value.begin(); it != dict_value.end(); ++it) { this->WriteTorchTensor(it->value()); } } torch::optional> SharedMemoryHelper::ReadTorchTensorDict() { auto archive = this->ReadTorchArchive(); if (!read_from_archive(archive, "has_value")) { return torch::nullopt; } int64_t num_tensors = read_from_archive(archive, "num_tensors"); torch::Dict tensor_dict; for (int64_t i = 0; i < num_tensors; ++i) { auto key = read_from_archive( archive, std::string("key_") + std::to_string(i)); auto tensor = this->ReadTorchTensor(); tensor_dict.insert(key, tensor.value()); } return tensor_dict; } void SharedMemoryHelper::SerializeMetadata() { for (auto& archive : metadata_to_write_) { std::stringstream serialized; archive.save_to(serialized); metadata_strings_to_write_.push_back(std::move(serialized.str())); } metadata_to_write_.clear(); } void SharedMemoryHelper::WriteMetadataToSharedMemory() { metadata_offset_ = 0; for (const auto& str : metadata_strings_to_write_) { auto metadata_ptr = this->GetCurrentMetadataPtr(); static_cast(metadata_ptr)[0] = str.size(); memcpy( static_cast(metadata_ptr) + sizeof(int64_t), str.data(), str.size()); int64_t rounded_size = GetRoundedSize(str.size()); this->MoveMetadataPtr(sizeof(int64_t) + rounded_size); } metadata_strings_to_write_.clear(); } void SharedMemoryHelper::WriteTorchTensorInternal( torch::optional tensor) { if (tensor.has_value()) { size_t memory_size = tensor.value().numel() * tensor.value().element_size(); auto data_ptr = this->GetCurrentDataPtr(); auto contiguous_tensor = tensor.value().contiguous(); memcpy(data_ptr, contiguous_tensor.data_ptr(), memory_size); this->MoveDataPtr(GetRoundedSize(memory_size)); } } void SharedMemoryHelper::Flush() { size_t data_size = 0; for (auto tensor : tensors_to_write_) { if (tensor.has_value()) { auto tensor_size = tensor.value().numel() * tensor.value().element_size(); data_size += GetRoundedSize(tensor_size); } } // Serialize the metadata archives. SerializeMetadata(); // Create the shared memory objects. const size_t metadata_size = std::accumulate( metadata_strings_to_write_.begin(), metadata_strings_to_write_.end(), 0, [](size_t sum, const std::string& str) { return sum + sizeof(int64_t) + GetRoundedSize(str.size()); }); metadata_shared_memory_ = std::make_unique(GetSharedMemoryMetadataName(name_)); metadata_shared_memory_->Create(metadata_size); metadata_size_ = metadata_size; // Write the metadata and tensor data to the shared memory. WriteMetadataToSharedMemory(); data_shared_memory_ = std::make_unique(GetSharedMemoryDataName(name_)); data_shared_memory_->Create(data_size); data_size_ = data_size; data_offset_ = 0; for (auto tensor : tensors_to_write_) { this->WriteTorchTensorInternal(tensor); } metadata_to_write_.clear(); tensors_to_write_.clear(); } std::pair SharedMemoryHelper::ReleaseSharedMemory() { return std::make_pair( std::move(metadata_shared_memory_), std::move(data_shared_memory_)); } } // namespace sampling } // namespace graphbolt ================================================ FILE: graphbolt/src/shared_memory_helper.h ================================================ /** * Copyright (c) 2023 by Contributors * * @file shared_memory_helper.h * @brief Share memory helper. */ #ifndef GRAPHBOLT_SHARED_MEMORY_HELPER_H_ #define GRAPHBOLT_SHARED_MEMORY_HELPER_H_ #include #include #include #include #include #include #include namespace graphbolt { namespace sampling { /** * @brief SharedMemoryHelper is a helper class to write/read data structures * to/from shared memory. * * In order to write data structure to shared memory, we need to serialize the * data structure to a binary buffer and then write the buffer to the shared * memory. However, the size of the binary buffer is not known in advance. To * solve this problem, we use two shared memory objects: one for storing the * metadata and the other for storing the binary buffer. The metadata includes * the metadata of data structures such as size and shape. The size of the * metadata is decided by the size of metadata. The size of the binary buffer is * decided by the size of the data structures. * * To avoid repeated shared memory allocation, this helper class uses lazy data * structure writing. The data structures are written to the shared memory only * when `Flush` is called. The data structures are written in the order of * calling `WriteTorchArchive`, `WriteTorchTensor` and `WriteTorchTensorDict`, * and also read in the same order. * * The usage of this class as a writer is as follows: * @code{.cpp} * SharedMemoryHelper shm_helper("shm_name", 1024, true); * shm_helper.WriteTorchArchive(archive); * shm_helper.WriteTorchTensor(tensor); * shm_helper.WriteTorchTensorDict(tensor_dict); * shm_helper.Flush(); * // After `Flush`, the data structures are written to the shared memory. * // Then the helper class can be used as a reader. * shm_helper.InitializeRead(); * auto archive = shm_helper.ReadTorchArchive(); * auto tensor = shm_helper.ReadTorchTensor(); * auto tensor_dict = shm_helper.ReadTorchTensorDict(); * @endcode * * The usage of this class as a reader is as follows: * @code{.cpp} * SharedMemoryHelper shm_helper("shm_name", 1024, false); * shm_helper.InitializeRead(); * auto archive = shm_helper.ReadTorchArchive(); * auto tensor = shm_helper.ReadTorchTensor(); * auto tensor_dict = shm_helper.ReadTorchTensorDict(); * @endcode * * */ class SharedMemoryHelper { public: /** * @brief Constructor of the shared memory helper. * @param name The name of the shared memory. */ SharedMemoryHelper(const std::string& name); /** @brief Initialize this helper class before reading. */ void InitializeRead(); void WriteTorchArchive(torch::serialize::OutputArchive&& archive); torch::serialize::InputArchive ReadTorchArchive(); void WriteTorchTensor(torch::optional tensor); torch::optional ReadTorchTensor(); void WriteTorchTensorDict( torch::optional> tensor_dict); torch::optional> ReadTorchTensorDict(); /** @brief Flush the data structures to the shared memory. */ void Flush(); /** @brief Release the shared memory and return their left values. */ std::pair ReleaseSharedMemory(); private: /** * @brief Serialize metadata to string. */ void SerializeMetadata(); /** * @brief Write the metadata to the shared memory. This function is * called by `Flush`. */ void WriteMetadataToSharedMemory(); /** * @brief Write the tensor data to the shared memory. This function is * called by `Flush`. */ void WriteTorchTensorInternal(torch::optional tensor); inline void* GetCurrentMetadataPtr() const { return static_cast(metadata_shared_memory_->GetMemory()) + metadata_offset_; } inline void* GetCurrentDataPtr() const { return static_cast(data_shared_memory_->GetMemory()) + data_offset_; } inline void MoveMetadataPtr(int64_t offset) { TORCH_CHECK( metadata_offset_ + offset <= metadata_size_, "The size of metadata exceeds the maximum size of shared memory."); metadata_offset_ += offset; } inline void MoveDataPtr(int64_t offset) { TORCH_CHECK( data_offset_ + offset <= data_size_, "The size of data exceeds the maximum size of shared memory."); data_offset_ += offset; } std::string name_; bool is_creator_; size_t metadata_size_; size_t data_size_; // The shared memory objects for storing metadata and tensor data. SharedMemoryPtr metadata_shared_memory_, data_shared_memory_; // The read/write offsets of the metadata and tensor data. size_t metadata_offset_, data_offset_; // The data structures to write to the shared memory. They are written to the // shared memory only when `Flush` is called. std::vector metadata_to_write_; std::vector metadata_strings_to_write_; std::vector> tensors_to_write_; }; } // namespace sampling } // namespace graphbolt #endif // GRAPHBOLT_SHARED_MEMORY_HELPER_H_ ================================================ FILE: graphbolt/src/unique_and_compact.cc ================================================ /** * Copyright (c) 2023 by Contributors * * @file unique_and_compact.cc * @brief Unique and compact op. */ #include #include #include "./concurrent_id_hash_map.h" #include "./macro.h" #include "./utils.h" namespace graphbolt { namespace sampling { std::tuple UniqueAndCompact( const torch::Tensor& src_ids, const torch::Tensor& dst_ids, const torch::Tensor unique_dst_ids, const int64_t rank, const int64_t world_size) { if (utils::is_on_gpu(src_ids) && utils::is_on_gpu(dst_ids) && utils::is_on_gpu(unique_dst_ids)) { GRAPHBOLT_DISPATCH_CUDA_ONLY_DEVICE( c10::DeviceType::CUDA, "unique_and_compact", { return ops::UniqueAndCompact( src_ids, dst_ids, unique_dst_ids, rank, world_size); }); } TORCH_CHECK( world_size <= 1, "Cooperative Minibatching (arXiv:2310.12403) is supported only on GPUs."); auto num_dst = unique_dst_ids.size(0); torch::Tensor ids = torch::cat({unique_dst_ids, src_ids}); auto [unique_ids, compacted_src, compacted_dst] = AT_DISPATCH_INDEX_TYPES( ids.scalar_type(), "unique_and_compact", ([&] { ConcurrentIdHashMap id_map(ids, num_dst); return std::make_tuple( id_map.GetUniqueIds(), id_map.MapIds(src_ids), id_map.MapIds(dst_ids)); })); auto offsets = torch::zeros(2, c10::TensorOptions().dtype(torch::kInt64)); offsets.data_ptr()[1] = unique_ids.size(0); return {unique_ids, compacted_src, compacted_dst, offsets}; } std::vector< std::tuple> UniqueAndCompactBatched( const std::vector& src_ids, const std::vector& dst_ids, const std::vector unique_dst_ids, const int64_t rank, const int64_t world_size) { TORCH_CHECK( src_ids.size() == dst_ids.size() && dst_ids.size() == unique_dst_ids.size(), "The batch dimension of the parameters need to be identical."); bool all_on_gpu = true; for (std::size_t i = 0; i < src_ids.size(); i++) { all_on_gpu = all_on_gpu && utils::is_on_gpu(src_ids[i]) && utils::is_on_gpu(dst_ids[i]) && utils::is_on_gpu(unique_dst_ids[i]); if (!all_on_gpu) break; } if (all_on_gpu) { GRAPHBOLT_DISPATCH_CUDA_ONLY_DEVICE( c10::DeviceType::CUDA, "unique_and_compact", { return ops::UniqueAndCompactBatched( src_ids, dst_ids, unique_dst_ids, rank, world_size); }); } std::vector< std::tuple> results; results.reserve(src_ids.size()); for (std::size_t i = 0; i < src_ids.size(); i++) { results.emplace_back(UniqueAndCompact( src_ids[i], dst_ids[i], unique_dst_ids[i], rank, world_size)); } return results; } c10::intrusive_ptr>>> UniqueAndCompactBatchedAsync( const std::vector& src_ids, const std::vector& dst_ids, const std::vector unique_dst_ids, const int64_t rank, const int64_t world_size) { return async( [=] { return UniqueAndCompactBatched( src_ids, dst_ids, unique_dst_ids, rank, world_size); }, utils::is_on_gpu(src_ids.at(0))); } } // namespace sampling } // namespace graphbolt ================================================ FILE: graphbolt/src/utils.cc ================================================ /** * Copyright (c) 2024, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file utils.cc * @brief Graphbolt utils implementations. */ #include "./utils.h" #include namespace graphbolt { namespace utils { namespace { std::optional worker_id; } std::optional GetWorkerId() { return worker_id; } void SetWorkerId(int64_t worker_id_value) { worker_id = worker_id_value; } } // namespace utils } // namespace graphbolt ================================================ FILE: graphbolt/src/utils.h ================================================ /** * Copyright (c) 2023 by Contributors * @file utils.h * @brief Graphbolt utils. */ #ifndef GRAPHBOLT_UTILS_H_ #define GRAPHBOLT_UTILS_H_ #include #include namespace graphbolt { namespace utils { /** * @brief If this process is a worker part as part of a DataLoader, then returns * the assigned worker id less than the # workers. */ std::optional GetWorkerId(); /** * @brief If this process is a worker part as part of a DataLoader, then this * function is called to initialize its worked id to be less than the # workers. */ void SetWorkerId(int64_t worker_id_value); /** * @brief Checks whether the tensor is stored on the GPU. */ inline bool is_on_gpu(const torch::Tensor& tensor) { return tensor.device().is_cuda(); } /** * @brief Checks whether the tensor is stored on the GPU or the pinned memory. */ inline bool is_accessible_from_gpu(const torch::Tensor& tensor) { return is_on_gpu(tensor) || tensor.is_pinned(); } /** * @brief Checks whether the tensor is stored on the pinned memory. */ inline bool is_pinned(const torch::Tensor& tensor) { // If this process is a worker, we should avoid initializing the CUDA context. return !GetWorkerId() && tensor.is_pinned(); } /** * @brief Checks whether the tensors are all stored on the GPU or the pinned * memory. */ template inline bool are_accessible_from_gpu(const TensorContainer& tensors) { for (auto& tensor : tensors) { if (!is_accessible_from_gpu(tensor)) return false; } return true; } /** * @brief Parses the source and destination node type from a given edge type * triple seperated with ":". */ inline std::pair parse_src_dst_ntype_from_etype( std::string etype) { auto first_seperator_it = std::find(etype.begin(), etype.end(), ':'); auto second_seperator_pos = std::find(first_seperator_it + 1, etype.end(), ':') - etype.begin(); return { etype.substr(0, first_seperator_it - etype.begin()), etype.substr(second_seperator_pos + 1)}; } /** * @brief Retrieves the value of the tensor at the given index. * * @note If the tensor is not contiguous, it will be copied to a contiguous * tensor. * * @tparam T The type of the tensor. * @param tensor The tensor. * @param index The index. * * @return T The value of the tensor at the given index. */ template T GetValueByIndex(const torch::Tensor& tensor, int64_t index) { TORCH_CHECK( index >= 0 && index < tensor.numel(), "The index should be within the range of the tensor, but got index ", index, " and tensor size ", tensor.numel()); auto contiguous_tensor = tensor.contiguous(); auto data_ptr = contiguous_tensor.data_ptr(); return data_ptr[index]; } } // namespace utils } // namespace graphbolt #endif // GRAPHBOLT_UTILS_H_ ================================================ FILE: include/dgl/array.h ================================================ /** * Copyright (c) 2020 by Contributors * @file dgl/array.h * @brief Common array operations required by DGL. * * Note that this is not meant for a full support of array library such as ATen. * Only a limited set of operators required by DGL are implemented. */ #ifndef DGL_ARRAY_H_ #define DGL_ARRAY_H_ #include "./aten/array_ops.h" #include "./aten/coo.h" #include "./aten/csr.h" #include "./aten/macro.h" #include "./aten/spmat.h" #include "./aten/types.h" #endif // DGL_ARRAY_H_ ================================================ FILE: include/dgl/array_iterator.h ================================================ /** * Copyright (c) 2020 by Contributors * @file dgl/array_iterator.h * @brief Various iterators. */ #ifndef DGL_ARRAY_ITERATOR_H_ #define DGL_ARRAY_ITERATOR_H_ #ifdef __CUDA_ARCH__ #define CUB_INLINE __host__ __device__ __forceinline__ #else #define CUB_INLINE inline #endif // __CUDA_ARCH__ #include #include #include namespace dgl { namespace aten { using std::swap; // Make std::pair work on both host and device template struct Pair { Pair() = default; Pair(const Pair& other) = default; Pair(Pair&& other) = default; CUB_INLINE Pair(DType a, DType b) : first(a), second(b) {} CUB_INLINE Pair& operator=(const Pair& other) { first = other.first; second = other.second; return *this; } CUB_INLINE operator std::pair() const { return std::make_pair(first, second); } CUB_INLINE bool operator==(const Pair& other) const { return (first == other.first) && (second == other.second); } CUB_INLINE void swap(const Pair& other) const { std::swap(first, other.first); std::swap(second, other.second); } DType first, second; }; template CUB_INLINE void swap(const Pair& r1, const Pair& r2) { r1.swap(r2); } // PairRef and PairIterator that serves as an iterator over a pair of arrays in // a zipped fashion like zip(a, b). template struct PairRef { PairRef() = delete; PairRef(const PairRef& other) = default; PairRef(PairRef&& other) = default; CUB_INLINE PairRef(DType* const r, DType* const c) : a(r), b(c) {} CUB_INLINE PairRef& operator=(const PairRef& other) { *a = *other.a; *b = *other.b; return *this; } CUB_INLINE PairRef& operator=(const Pair& val) { *a = val.first; *b = val.second; return *this; } CUB_INLINE operator Pair() const { return Pair(*a, *b); } CUB_INLINE operator std::pair() const { return std::make_pair(*a, *b); } CUB_INLINE bool operator==(const PairRef& other) const { return (*a == *(other.a)) && (*b == *(other.b)); } CUB_INLINE void swap(const PairRef& other) const { std::swap(*a, *other.a); std::swap(*b, *other.b); } DType *a, *b; }; template CUB_INLINE void swap(const PairRef& r1, const PairRef& r2) { r1.swap(r2); } template struct PairIterator : public std::iterator< std::random_access_iterator_tag, Pair, std::ptrdiff_t, Pair, PairRef> { PairIterator() = default; PairIterator(const PairIterator& other) = default; PairIterator(PairIterator&& other) = default; CUB_INLINE PairIterator(DType* x, DType* y) : a(x), b(y) {} PairIterator& operator=(const PairIterator& other) = default; PairIterator& operator=(PairIterator&& other) = default; ~PairIterator() = default; CUB_INLINE bool operator==(const PairIterator& other) const { return a == other.a; } CUB_INLINE bool operator!=(const PairIterator& other) const { return a != other.a; } CUB_INLINE bool operator<(const PairIterator& other) const { return a < other.a; } CUB_INLINE bool operator>(const PairIterator& other) const { return a > other.a; } CUB_INLINE bool operator<=(const PairIterator& other) const { return a <= other.a; } CUB_INLINE bool operator>=(const PairIterator& other) const { return a >= other.a; } CUB_INLINE PairIterator& operator+=(const std::ptrdiff_t& movement) { a += movement; b += movement; return *this; } CUB_INLINE PairIterator& operator-=(const std::ptrdiff_t& movement) { a -= movement; b -= movement; return *this; } CUB_INLINE PairIterator& operator++() { ++a; ++b; return *this; } CUB_INLINE PairIterator& operator--() { --a; --b; return *this; } CUB_INLINE PairIterator operator++(int) { PairIterator ret(*this); operator++(); return ret; } CUB_INLINE PairIterator operator--(int) { PairIterator ret(*this); operator--(); return ret; } CUB_INLINE PairIterator operator+(const std::ptrdiff_t& movement) const { return PairIterator(a + movement, b + movement); } CUB_INLINE PairIterator operator-(const std::ptrdiff_t& movement) const { return PairIterator(a - movement, b - movement); } CUB_INLINE std::ptrdiff_t operator-(const PairIterator& other) const { return a - other.a; } CUB_INLINE PairRef operator*() const { return PairRef(a, b); } CUB_INLINE PairRef operator*() { return PairRef(a, b); } CUB_INLINE PairRef operator[](size_t offset) const { return PairRef(a + offset, b + offset); } CUB_INLINE PairRef operator[](size_t offset) { return PairRef(a + offset, b + offset); } DType *a, *b; }; }; // namespace aten }; // namespace dgl #endif // DGL_ARRAY_ITERATOR_H_ ================================================ FILE: include/dgl/aten/array_ops.h ================================================ /** * Copyright (c) 2020 by Contributors * @file dgl/aten/array_ops.h * @brief Common array operations required by DGL. * * Note that this is not meant for a full support of array library such as ATen. * Only a limited set of operators required by DGL are implemented. */ #ifndef DGL_ATEN_ARRAY_OPS_H_ #define DGL_ATEN_ARRAY_OPS_H_ #include #include #include #include #include #include "./types.h" namespace dgl { namespace aten { ////////////////////////////////////////////////////////////////////// // ID array ////////////////////////////////////////////////////////////////////// /** @return A special array to represent null. */ inline NDArray NullArray( const DGLDataType& dtype = DGLDataType{kDGLInt, 64, 1}, const DGLContext& ctx = DGLContext{kDGLCPU, 0}) { return NDArray::Empty({0}, dtype, ctx); } /** * @return Whether the input array is a null array. */ inline bool IsNullArray(NDArray array) { return array->shape[0] == 0; } /** * @brief Create a new id array with given length * @param length The array length * @param ctx The array context * @param nbits The number of integer bits * @return id array */ IdArray NewIdArray( int64_t length, DGLContext ctx = DGLContext{kDGLCPU, 0}, uint8_t nbits = 64); /** * @brief Create a new float array with given length * @param length The array length * @param ctx The array context * @param nbits The number of integer bits * @return float array */ FloatArray NewFloatArray(int64_t length, DGLContext ctx = DGLContext{kDGLCPU, 0}, uint8_t nbits = 32); /** * @brief Create a new id array using the given vector data * @param vec The vector data * @param nbits The integer bits of the returned array * @param ctx The array context * @return the id array */ template IdArray VecToIdArray( const std::vector& vec, uint8_t nbits = 64, DGLContext ctx = DGLContext{kDGLCPU, 0}); /** * @brief Return an array representing a 1D range. * @param low Lower bound (inclusive). * @param high Higher bound (exclusive). * @param nbits result array's bits (32 or 64) * @param ctx Device context * @return range array */ IdArray Range(int64_t low, int64_t high, uint8_t nbits, DGLContext ctx); /** * @brief Return an array full of the given value * @param val The value to fill. * @param length Number of elements. * @param nbits result array's bits (32 or 64) * @param ctx Device context * @return the result array */ IdArray Full(int64_t val, int64_t length, uint8_t nbits, DGLContext ctx); /** * @brief Return an array full of the given value with the given type. * @param val The value to fill. * @param length Number of elements. * @param ctx Device context * @return the result array */ template NDArray Full(DType val, int64_t length, DGLContext ctx); /** @brief Create a deep copy of the given array */ IdArray Clone(IdArray arr); /** @brief Convert the idarray to the given bit width */ IdArray AsNumBits(IdArray arr, uint8_t bits); /** @brief Arithmetic functions */ IdArray Add(IdArray lhs, IdArray rhs); IdArray Sub(IdArray lhs, IdArray rhs); IdArray Mul(IdArray lhs, IdArray rhs); IdArray Div(IdArray lhs, IdArray rhs); IdArray Mod(IdArray lhs, IdArray rhs); IdArray Add(IdArray lhs, int64_t rhs); IdArray Sub(IdArray lhs, int64_t rhs); IdArray Mul(IdArray lhs, int64_t rhs); IdArray Div(IdArray lhs, int64_t rhs); IdArray Mod(IdArray lhs, int64_t rhs); IdArray Add(int64_t lhs, IdArray rhs); IdArray Sub(int64_t lhs, IdArray rhs); IdArray Mul(int64_t lhs, IdArray rhs); IdArray Div(int64_t lhs, IdArray rhs); IdArray Mod(int64_t lhs, IdArray rhs); IdArray Neg(IdArray array); // XXX(minjie): currently using integer array for bool type IdArray GT(IdArray lhs, IdArray rhs); IdArray LT(IdArray lhs, IdArray rhs); IdArray GE(IdArray lhs, IdArray rhs); IdArray LE(IdArray lhs, IdArray rhs); IdArray EQ(IdArray lhs, IdArray rhs); IdArray NE(IdArray lhs, IdArray rhs); IdArray GT(IdArray lhs, int64_t rhs); IdArray LT(IdArray lhs, int64_t rhs); IdArray GE(IdArray lhs, int64_t rhs); IdArray LE(IdArray lhs, int64_t rhs); IdArray EQ(IdArray lhs, int64_t rhs); IdArray NE(IdArray lhs, int64_t rhs); IdArray GT(int64_t lhs, IdArray rhs); IdArray LT(int64_t lhs, IdArray rhs); IdArray GE(int64_t lhs, IdArray rhs); IdArray LE(int64_t lhs, IdArray rhs); IdArray EQ(int64_t lhs, IdArray rhs); IdArray NE(int64_t lhs, IdArray rhs); /** @brief Stack two arrays (of len L) into a 2*L length array */ IdArray HStack(IdArray arr1, IdArray arr2); /** @brief Return the indices of the elements that are non-zero. */ IdArray NonZero(BoolArray bool_arr); /** * @brief Return the data under the index. In numpy notation, A[I] * @tparam ValueType The type of return value. */ template ValueType IndexSelect(NDArray array, int64_t index); /** * @brief Return the data under the index. In numpy notation, A[I] */ NDArray IndexSelect(NDArray array, IdArray index); /** * @brief Return the data from `start` (inclusive) to `end` (exclusive). */ NDArray IndexSelect(NDArray array, int64_t start, int64_t end); /** * @brief Permute the elements of an array according to given indices. * * Only support 1D arrays. * * Equivalent to: * * * result = np.zeros_like(array) * result[indices] = array * */ NDArray Scatter(NDArray array, IdArray indices); /** * @brief Scatter data into the output array. * * Equivalent to: * * * out[index] = value * */ void Scatter_(IdArray index, NDArray value, NDArray out); /** * @brief Repeat each element a number of times. Equivalent to np.repeat(array, * repeats) * @param array A 1D vector * @param repeats A 1D integer vector for number of times to repeat for each * element in \c array. Must have the same shape as \c array. */ NDArray Repeat(NDArray array, IdArray repeats); /** * @brief Relabel the given ids to consecutive ids. * * Relabeling is done inplace. The mapping is created from the union * of the give arrays. * * Example: * * Given two IdArrays [2, 3, 10, 0, 2] and [4, 10, 5], one possible return * mapping is [2, 3, 10, 4, 0, 5], meaning the new ID 0 maps to the old ID * 2, 1 maps to 3, so on and so forth. * * @param arrays The id arrays to relabel. * @return mapping array M from new id to old id. */ IdArray Relabel_(const std::vector& arrays); /** * @brief concatenate the given id arrays to one array * * Example: * * Given two IdArrays [2, 3, 10, 0, 2] and [4, 10, 5] * Return [2, 3, 10, 0, 2, 4, 10, 5] * * @param arrays The id arrays to concatenate. * @return concatenated array. */ NDArray Concat(const std::vector& arrays); /** @brief Return whether the array is a valid 1D int array*/ inline bool IsValidIdArray(const dgl::runtime::NDArray& arr) { return arr->ndim == 1 && arr->dtype.code == kDGLInt; } /** * @brief Packs a tensor containing padded sequences of variable length. * * Similar to \c pack_padded_sequence in PyTorch, except that * * 1. The length for each sequence (before padding) is inferred as the number * of elements before the first occurrence of \c pad_value. * 2. It does not sort the sequences by length. * 3. Along with the tensor containing the packed sequence, it returns both the * length, as well as the offsets to the packed tensor, of each sequence. * * @param array The tensor containing sequences padded to the same length * @param pad_value The padding value * @return A triplet of packed tensor, the length tensor, and the offset tensor * * @note Example: consider the following array with padding value -1: * * * [[1, 2, -1, -1], * [3, 4, 5, -1]] * * * The packed tensor would be [1, 2, 3, 4, 5]. * * The length tensor would be [2, 3], i.e. the length of each sequence before * padding. * * The offset tensor would be [0, 2], i.e. the offset to the packed tensor for * each sequence (before padding) */ template std::tuple Pack(NDArray array, ValueType pad_value); /** * @brief Batch-slice a 1D or 2D array, and then pack the list of sliced arrays * by concatenation. * * If a 2D array is given, then the function is equivalent to: * * * def ConcatSlices(array, lengths): * slices = [array[i, :l] for i, l in enumerate(lengths)] * packed = np.concatenate(slices) * offsets = np.cumsum([0] + lengths[:-1]) * return packed, offsets * * * If a 1D array is given, then the function is equivalent to * * * def ConcatSlices(array, lengths): * slices = [array[:l] for l in lengths] * packed = np.concatenate(slices) * offsets = np.cumsum([0] + lengths[:-1]) * return packed, offsets * * * @param array A 1D or 2D tensor for slicing * @param lengths A 1D tensor indicating the number of elements to slice * @return The tensor with packed slices along with the offsets. */ std::pair ConcatSlices(NDArray array, IdArray lengths); /** * @brief Return the cumulative summation (or inclusive sum) of the input array. * * The first element out[0] is equal to the first element of the input array * array[0]. The rest elements are defined recursively, out[i] = out[i-1] + * array[i]. Hence, the result array length is the same as the input array * length. * * If prepend_zero is true, then the first element is zero and the result array * length is the input array length plus one. This is useful for creating * an indptr array over a count array. * * @param array The 1D input array. * @return Array after cumsum. */ IdArray CumSum(IdArray array, bool prepend_zero = false); /** * @brief Return the nonzero index. * * Only support 1D array. The result index array is in int64. * * @param array The input array. * @return A 1D index array storing the positions of the non zero values. */ IdArray NonZero(NDArray array); /** * @brief Sort the ID vector in ascending order. * * It performs both sort and arg_sort (returning the sorted index). The sorted * index is always in int64. * * @param array Input array. * @param num_bits The number of bits used in key comparison. For example, if * the data type of the input array is int32_t and `num_bits = 8`, it only uses * bits in index range [0, 8) for sorting. Setting it to a small value could * speed up the sorting if the underlying sorting algorithm is * radix sort (e.g., on GPU). Setting it to zero (default value) means using all * the bits for comparison. On CPU, it currently has no effect. * @return A pair of arrays: sorted values and sorted index to the original * position. */ std::pair Sort(IdArray array, int num_bits = 0); /** * @brief Return a string that prints out some debug information. */ std::string ToDebugString(NDArray array); // inline implementations template IdArray VecToIdArray(const std::vector& vec, uint8_t nbits, DGLContext ctx) { IdArray ret = NewIdArray(vec.size(), DGLContext{kDGLCPU, 0}, nbits); if (nbits == 32) { std::copy(vec.begin(), vec.end(), static_cast(ret->data)); } else if (nbits == 64) { std::copy(vec.begin(), vec.end(), static_cast(ret->data)); } else { LOG(FATAL) << "Only int32 or int64 is supported."; } return ret.CopyTo(ctx); } /** * @brief Get the context of the first array, and check if the non-null arrays' * contexts are the same. */ inline DGLContext GetContextOf(const std::vector& arrays) { bool first = true; DGLContext result; for (auto& array : arrays) { if (first) { first = false; result = array->ctx; } else { CHECK_EQ(array->ctx, result) << "Context of the input arrays are different"; } } return result; } } // namespace aten } // namespace dgl #endif // DGL_ATEN_ARRAY_OPS_H_ ================================================ FILE: include/dgl/aten/coo.h ================================================ /** * Copyright (c) 2020-2022 by Contributors * @file dgl/aten/coo.h * @brief Common COO operations required by DGL. */ #ifndef DGL_ATEN_COO_H_ #define DGL_ATEN_COO_H_ #include #include #include #include #include #include #include "./array_ops.h" #include "./macro.h" #include "./spmat.h" #include "./types.h" namespace dgl { namespace aten { struct CSRMatrix; /** * @brief Plain COO structure * * The data array stores integer ids for reading edge features. * Note that we do allow duplicate non-zero entries -- multiple non-zero entries * that have the same row, col indices. It corresponds to multigraph in * graph terminology. */ constexpr uint64_t kDGLSerialize_AtenCooMatrixMagic = 0xDD61ffd305dff127; // TODO(BarclayII): Graph queries on COO formats should support the case where // data ordered by rows/columns instead of EID. struct COOMatrix { /** @brief the dense shape of the matrix */ int64_t num_rows = 0, num_cols = 0; /** @brief COO index arrays */ IdArray row, col; /** @brief data index array. When is null, assume it is from 0 to NNZ - 1. */ IdArray data; /** @brief whether the row indices are sorted */ bool row_sorted = false; /** @brief whether the column indices per row are sorted */ bool col_sorted = false; /** @brief whether the matrix is in pinned memory */ bool is_pinned = false; /** @brief default constructor */ COOMatrix() = default; /** @brief constructor */ COOMatrix( int64_t nrows, int64_t ncols, IdArray rarr, IdArray carr, IdArray darr = NullArray(), bool rsorted = false, bool csorted = false) : num_rows(nrows), num_cols(ncols), row(rarr), col(carr), data(darr), row_sorted(rsorted), col_sorted(csorted) { CheckValidity(); } /** @brief constructor from SparseMatrix object */ explicit COOMatrix(const SparseMatrix& spmat) : num_rows(spmat.num_rows), num_cols(spmat.num_cols), row(spmat.indices[0]), col(spmat.indices[1]), data(spmat.indices[2]), row_sorted(spmat.flags[0]), col_sorted(spmat.flags[1]) { CheckValidity(); } // Convert to a SparseMatrix object that can return to python. SparseMatrix ToSparseMatrix() const { return SparseMatrix( static_cast(SparseFormat::kCOO), num_rows, num_cols, {row, col, data}, {row_sorted, col_sorted}); } bool Load(dmlc::Stream* fs) { uint64_t magicNum; CHECK(fs->Read(&magicNum)) << "Invalid Magic Number"; CHECK_EQ(magicNum, kDGLSerialize_AtenCooMatrixMagic) << "Invalid COOMatrix Data"; CHECK(fs->Read(&num_cols)) << "Invalid num_cols"; CHECK(fs->Read(&num_rows)) << "Invalid num_rows"; CHECK(fs->Read(&row)) << "Invalid row"; CHECK(fs->Read(&col)) << "Invalid col"; CHECK(fs->Read(&data)) << "Invalid data"; CHECK(fs->Read(&row_sorted)) << "Invalid row_sorted"; CHECK(fs->Read(&col_sorted)) << "Invalid col_sorted"; CheckValidity(); return true; } void Save(dmlc::Stream* fs) const { fs->Write(kDGLSerialize_AtenCooMatrixMagic); fs->Write(num_cols); fs->Write(num_rows); fs->Write(row); fs->Write(col); fs->Write(data); fs->Write(row_sorted); fs->Write(col_sorted); } inline void CheckValidity() const { CHECK_SAME_DTYPE(row, col); CHECK_SAME_CONTEXT(row, col); if (!aten::IsNullArray(data)) { CHECK_SAME_DTYPE(row, data); CHECK_SAME_CONTEXT(row, data); } CHECK_NO_OVERFLOW(row->dtype, num_rows); CHECK_NO_OVERFLOW(row->dtype, num_cols); } inline bool IsEmpty() const { return aten::IsNullArray(row) && aten::IsNullArray(col) && aten::IsNullArray(data); } // Check and update the internal flag is_pinned. // This function will initialize a cuda context. inline bool CheckIfPinnedInCUDA() { is_pinned = (aten::IsNullArray(row) || row.IsPinned()) && (aten::IsNullArray(col) || col.IsPinned()) && (aten::IsNullArray(data) || data.IsPinned()); return is_pinned; } /** @brief Return a copy of this matrix on the give device context. */ inline COOMatrix CopyTo(const DGLContext& ctx) const { if (ctx == row->ctx) return *this; return COOMatrix( num_rows, num_cols, row.CopyTo(ctx), col.CopyTo(ctx), aten::IsNullArray(data) ? data : data.CopyTo(ctx), row_sorted, col_sorted); } /** @brief Return a copy of this matrix in pinned (page-locked) memory. */ inline COOMatrix PinMemory() { if (!IsEmpty()) { if (is_pinned) return *this; auto new_coo = COOMatrix( num_rows, num_cols, row.PinMemory(), col.PinMemory(), aten::IsNullArray(data) ? data : data.PinMemory(), row_sorted, col_sorted); CHECK(new_coo.CheckIfPinnedInCUDA()) << "An internal DGL error has occured while trying to pin a COO " "matrix. Please file a bug at " "'https://github.com/dmlc/dgl/issues' " "with the above stacktrace."; return new_coo; } is_pinned = true; return *this; } /** * @brief Pin the row, col and data (if not Null) of the matrix. * @note This is an in-place method. Behavior depends on the current context, * kDGLCPU: will be pinned; * IsPinned: directly return; * kDGLCUDA: invalid, will throw an error. * The context check is deferred to pinning the NDArray. */ inline void PinMemory_() { if (!IsEmpty()) { if (is_pinned) return; row.PinMemory_(); col.PinMemory_(); if (!aten::IsNullArray(data)) { data.PinMemory_(); } is_pinned = true; } is_pinned = true; return; } /** * @brief Unpin the row, col and data (if not Null) of the matrix. * @note This is an in-place method. Behavior depends on the current context, * IsPinned: will be unpinned; * others: directly return. * The context check is deferred to unpinning the NDArray. */ inline void UnpinMemory_() { if (!IsEmpty()) { if (!is_pinned) return; row.UnpinMemory_(); col.UnpinMemory_(); if (!aten::IsNullArray(data)) { data.UnpinMemory_(); } is_pinned = false; } is_pinned = false; return; } /** * @brief Record stream for the row, col and data (if not Null) of the matrix. * @param stream The stream that is using the graph */ inline void RecordStream(DGLStreamHandle stream) const { row.RecordStream(stream); col.RecordStream(stream); if (!aten::IsNullArray(data)) { data.RecordStream(stream); } } }; ///////////////////////// COO routines ////////////////////////// /** @brief Return true if the value (row, col) is non-zero */ bool COOIsNonZero(COOMatrix, int64_t row, int64_t col); /** * @brief Batched implementation of COOIsNonZero. * @note This operator allows broadcasting (i.e, either row or col can be of * length 1). */ runtime::NDArray COOIsNonZero( COOMatrix, runtime::NDArray row, runtime::NDArray col); /** @brief Return the nnz of the given row */ int64_t COOGetRowNNZ(COOMatrix, int64_t row); runtime::NDArray COOGetRowNNZ(COOMatrix, runtime::NDArray row); /** @brief Return the data array of the given row */ std::pair COOGetRowDataAndIndices( COOMatrix, int64_t row); /** @brief Whether the COO matrix contains data */ inline bool COOHasData(COOMatrix csr) { return !IsNullArray(csr.data); } /** * @brief Check whether the COO is sorted. * * It returns two flags: one for whether the row is sorted; * the other for whether the columns of each row is sorted * if the first flag is true. * * Complexity: O(NNZ) */ std::pair COOIsSorted(COOMatrix coo); /** * @brief Get the data and the row,col indices for each returned entries. * * The operator supports matrix with duplicate entries and all the matched * entries will be returned. The operator assumes there is NO duplicate (row, * col) pair in the given input. Otherwise, the returned result is undefined. * * @note This operator allows broadcasting (i.e, either row or col can be of * length 1). * @param mat Sparse matrix * @param rows Row index * @param cols Column index * @return Three arrays {rows, cols, data} */ std::vector COOGetDataAndIndices( COOMatrix mat, runtime::NDArray rows, runtime::NDArray cols); /** * @brief Get data. The return type is an ndarray due to possible duplicate * entries. */ inline runtime::NDArray COOGetAllData(COOMatrix mat, int64_t row, int64_t col) { IdArray rows = VecToIdArray({row}, mat.row->dtype.bits, mat.row->ctx); IdArray cols = VecToIdArray({col}, mat.row->dtype.bits, mat.row->ctx); const auto& rst = COOGetDataAndIndices(mat, rows, cols); return rst[2]; } /** * @brief Get the data for each (row, col) pair. * * The operator supports matrix with duplicate entries but only one matched * entry will be returned for each (row, col) pair. Support duplicate input * (row, col) pairs. * * @note This operator allows broadcasting (i.e, either row or col can be of * length 1). * * @param mat Sparse matrix. * @param rows Row index. * @param cols Column index. * @return Data array. The i^th element is the data of (rows[i], cols[i]) */ runtime::NDArray COOGetData( COOMatrix mat, runtime::NDArray rows, runtime::NDArray cols); /** @brief Return a transposed COO matrix */ COOMatrix COOTranspose(COOMatrix coo); /** * @brief Convert COO matrix to CSR matrix. * * If the input COO matrix does not have data array, the data array of * the result CSR matrix stores a shuffle index for how the entries * will be reordered in CSR. The i^th entry in the result CSR corresponds * to the CSR.data[i] th entry in the input COO. * * Conversion complexity: O(nnz) * * - The function first check whether the input COO matrix is sorted * using a linear scan. * - If the COO matrix is row sorted, the conversion can be done very * efficiently in a sequential scan. The result indices and data arrays * are directly equal to the column and data arrays from the input. * - If the COO matrix is further column sorted, the result CSR is * also column sorted. * - Otherwise, the conversion is more costly but still is O(nnz). * * @param coo Input COO matrix. * @return CSR matrix. */ CSRMatrix COOToCSR(COOMatrix coo); /** * @brief Slice rows of the given matrix and return. * @param coo COO matrix * @param start Start row id (inclusive) * @param end End row id (exclusive) */ COOMatrix COOSliceRows(COOMatrix coo, int64_t start, int64_t end); COOMatrix COOSliceRows(COOMatrix coo, runtime::NDArray rows); /** * @brief Get the submatrix specified by the row and col ids. * * In numpy notation, given matrix M, row index array I, col index array J * This function returns the submatrix M[I, J]. * * @param coo The input coo matrix * @param rows The row index to select * @param cols The col index to select * @return submatrix */ COOMatrix COOSliceMatrix( COOMatrix coo, runtime::NDArray rows, runtime::NDArray cols); /** @return True if the matrix has duplicate entries */ bool COOHasDuplicate(COOMatrix coo); /** * @brief Deduplicate the entries of a sorted COO matrix, replacing the data * with the number of occurrences of the row-col coordinates. */ std::pair COOCoalesce(COOMatrix coo); /** * @brief Sort the indices of a COO matrix in-place. * * The function sorts row indices in ascending order. If sort_column is true, * col indices are sorted in ascending order too. The data array of the returned * COOMatrix stores the shuffled index which could be used to fetch edge data. * * Complexity: O(N*log(N)) time and O(1) space, where N is the number of * nonzeros. * TODO(minjie): The time complexity could be improved to O(N) by using a O(N) * space. * * @param mat The coo matrix to sort. * @param sort_column True if column index should be sorted too. */ void COOSort_(COOMatrix* mat, bool sort_column = false); /** * @brief Sort the indices of a COO matrix. * * The function sorts row indices in ascending order. If sort_column is true, * col indices are sorted in ascending order too. The data array of the returned * COOMatrix stores the shuffled index which could be used to fetch edge data. * * Complexity: O(N*log(N)) time and O(1) space, where N is the number of * nonzeros. * TODO(minjie): The time complexity could be improved to O(N) by using a O(N) * space. * * @param mat The input coo matrix * @param sort_column True if column index should be sorted too. * @return COO matrix with index sorted. */ inline COOMatrix COOSort(COOMatrix mat, bool sort_column = false) { if ((mat.row_sorted && !sort_column) || mat.col_sorted) return mat; COOMatrix ret( mat.num_rows, mat.num_cols, mat.row.Clone(), mat.col.Clone(), COOHasData(mat) ? mat.data.Clone() : mat.data, mat.row_sorted, mat.col_sorted); COOSort_(&ret, sort_column); return ret; } /** * @brief Remove entries from COO matrix by entry indices (data indices) * @return A new COO matrix as well as a mapping from the new COO entries to the * old COO entries. */ COOMatrix COORemove(COOMatrix coo, IdArray entries); /** * @brief Reorder the rows and colmns according to the new row and column order. * @param csr The input coo matrix. * @param new_row_ids the new row Ids (the index is the old row Id) * @param new_col_ids the new column Ids (the index is the old col Id). */ COOMatrix COOReorder( COOMatrix coo, runtime::NDArray new_row_ids, runtime::NDArray new_col_ids); /** * @brief Randomly select a fixed number of non-zero entries along each given * row using arXiv:2210.13339, Labor sampling. * * The picked indices are returned in the form of a COO matrix. * * The passed random_seed makes it so that for any seed vertex s and its * neighbor t, the rolled random variate r_t is the same for any call to this * function with the same random seed. When sampling as part of the same batch, * one would want identical seeds so that LABOR can globally sample. One example * is that for heterogenous graphs, there is a single random seed passed for * each edge type. This will sample much fewer vertices compared to having * unique random seeds for each edge type. If one called this function * individually for each edge type for a heterogenous graph with different * random seeds, then it would run LABOR locally for each edge type, resulting * into a larger number of vertices being sampled. * * If this function is called without a random_seed, we get the random seed by * getting a random number from DGL. * * * Examples: * * // coo.num_rows = 4; * // coo.num_cols = 4; * // coo.rows = [0, 0, 1, 3, 3] * // coo.cols = [0, 1, 1, 2, 3] * // coo.data = [2, 3, 0, 1, 4] * COOMatrix coo = ...; * IdArray rows = ... ; // [1, 3] * COOMatrix sampled = COOLaborSampling(coo, rows, 2, NullArray(), 0 \ * , NullArray(), NullArray()); * // possible sampled coo matrix: * // sampled.num_rows = 4 * // sampled.num_cols = 4 * // sampled.rows = [1, 3, 3] * // sampled.cols = [1, 2, 3] * // sampled.data = [3, 0, 4] * * @param mat Input coo matrix. * @param rows Rows to sample from. * @param num_samples Number of samples using labor sampling * @param prob Probability array for nonuniform sampling * @param importance_sampling Whether to enable importance sampling * @param random_seed The random seed for the sampler * @param seed2_contribution The contribution of the second random seed, [0, 1) * @param NIDs global nids if sampling from a subgraph * @return A pair of COOMatrix storing the picked row and col indices and edge * weights if importance_sampling != 0 or prob argument was passed. * Its data field stores the the index of the picked elements in the * value array. */ std::pair COOLaborSampling( COOMatrix mat, IdArray rows, int64_t num_samples, FloatArray prob = NullArray(), int importance_sampling = 0, IdArray random_seed = NullArray(), float seed2_contribution = 0, IdArray NIDs = NullArray()); /** * @brief Randomly select a fixed number of non-zero entries along each given * row independently. * * The function performs random choices along each row independently. * The picked indices are returned in the form of a COO matrix. * * If replace is false and a row has fewer non-zero values than num_samples, * all the values are picked. * * Examples: * * // coo.num_rows = 4; * // coo.num_cols = 4; * // coo.rows = [0, 0, 1, 3, 3] * // coo.cols = [0, 1, 1, 2, 3] * // coo.data = [2, 3, 0, 1, 4] * COOMatrix coo = ...; * IdArray rows = ... ; // [1, 3] * COOMatrix sampled = COORowWiseSampling(coo, rows, 2, FloatArray(), false); * // possible sampled coo matrix: * // sampled.num_rows = 4 * // sampled.num_cols = 4 * // sampled.rows = [1, 3, 3] * // sampled.cols = [1, 2, 3] * // sampled.data = [3, 0, 4] * * @param mat Input coo matrix. * @param rows Rows to sample from. * @param num_samples Number of samples * @param prob_or_mask Unnormalized probability array or mask array. * Should be of the same length as the data array. * If an empty array is provided, assume uniform. * @param replace True if sample with replacement * @return A COOMatrix storing the picked row and col indices. Its data field * stores the the index of the picked elements in the value array. */ COOMatrix COORowWiseSampling( COOMatrix mat, IdArray rows, int64_t num_samples, NDArray prob_or_mask = NDArray(), bool replace = true); /** * @brief Randomly select a fixed number of non-zero entries for each edge type * along each given row independently. * * The function performs random choices along each row independently. * In each row, num_samples samples is picked for each edge type. (The edge * type is stored in etypes) * The picked indices are returned in the form of a COO matrix. * * If replace is false and a row has fewer non-zero values than num_samples, * all the values are picked. * * Examples: * * // coo.num_rows = 4; * // coo.num_cols = 4; * // coo.rows = [0, 0, 0, 0, 3] * // coo.cols = [0, 1, 3, 2, 3] * // coo.data = [2, 3, 0, 1, 4] * // eid2etype_offset = [0, 3, 4, 5] * COOMatrix coo = ...; * IdArray rows = ... ; // [0, 3] * std::vector num_samples = {2, 2, 2}; * COOMatrix sampled = COORowWisePerEtypeSampling(coo, rows, eid2etype_offset, * num_samples, FloatArray(), false); * // possible sampled coo matrix: * // sampled.num_rows = 4 * // sampled.num_cols = 4 * // sampled.rows = [0, 0, 0, 3] * // sampled.cols = [0, 3, 2, 3] * // sampled.data = [2, 0, 1, 4] * * @param mat Input coo matrix. * @param rows Rows to sample from. * @param eid2etype_offset The offset to each edge type. * @param num_samples Number of samples * @param prob_or_mask Unnormalized probability array or mask array. * Should be of the same length as the data array. * If an empty array is provided, assume uniform. * @param replace True if sample with replacement * @return A COOMatrix storing the picked row and col indices. Its data field * stores the the index of the picked elements in the value array. * @note The edges of the entire graph must be ordered by their edge types. */ COOMatrix COORowWisePerEtypeSampling( COOMatrix mat, IdArray rows, const std::vector& eid2etype_offset, const std::vector& num_samples, const std::vector& prob_or_mask, bool replace = true); /** * @brief Select K non-zero entries with the largest weights along each given * row. * * The function performs top-k selection along each row independently. * The picked indices are returned in the form of a COO matrix. * * If replace is false and a row has fewer non-zero values than k, * all the values are picked. * * Examples: * * // coo.num_rows = 4; * // coo.num_cols = 4; * // coo.rows = [0, 0, 1, 3, 3] * // coo.cols = [0, 1, 1, 2, 3] * // coo.data = [2, 3, 0, 1, 4] * COOMatrix coo = ...; * IdArray rows = ... ; // [0, 1, 3] * FloatArray weight = ... ; // [1., 0., -1., 10., 20.] * COOMatrix sampled = COORowWiseTopk(coo, rows, 1, weight); * // possible sampled coo matrix: * // sampled.num_rows = 4 * // sampled.num_cols = 4 * // sampled.rows = [0, 1, 3] * // sampled.cols = [1, 1, 2] * // sampled.data = [3, 0, 1] * * @param mat Input COO matrix. * @param rows Rows to sample from. * @param k The K value. * @param weight Weight associated with each entry. Should be of the same length * as the data array. If an empty array is provided, assume uniform. * @param ascending If true, elements are sorted by ascending order, equivalent * to find the K smallest values. Otherwise, find K largest values. * @return A COOMatrix storing the picked row and col indices. Its data field * stores the the index of the picked elements in the value array. */ COOMatrix COORowWiseTopk( COOMatrix mat, IdArray rows, int64_t k, NDArray weight, bool ascending = false); /** * @brief Union two COOMatrix into one COOMatrix. * * Two Matrix must have the same shape. * * Example: * * A = [[0, 0, 1, 0], * [1, 0, 1, 1], * [0, 1, 0, 0]] * * B = [[0, 1, 1, 0], * [0, 0, 0, 1], * [0, 0, 1, 0]] * * COOMatrix_A.num_rows : 3 * COOMatrix_A.num_cols : 4 * COOMatrix_B.num_rows : 3 * COOMatrix_B.num_cols : 4 * * C = UnionCoo({A, B}); * * C = [[0, 1, 2, 0], * [1, 0, 1, 2], * [0, 1, 1, 0]] * * COOMatrix_C.num_rows : 3 * COOMatrix_C.num_cols : 4 */ COOMatrix UnionCoo(const std::vector& coos); /** * @brief DisjointUnion a list COOMatrix into one COOMatrix. * * Examples: * * A = [[0, 0, 1], * [1, 0, 1], * [0, 1, 0]] * * B = [[0, 0], * [1, 0]] * * COOMatrix_A.num_rows : 3 * COOMatrix_A.num_cols : 3 * COOMatrix_B.num_rows : 2 * COOMatrix_B.num_cols : 2 * * C = DisjointUnionCoo({A, B}); * * C = [[0, 0, 1, 0, 0], * [1, 0, 1, 0, 0], * [0, 1, 0, 0, 0], * [0, 0, 0, 0, 0], * [0, 0, 0, 1, 0]] * COOMatrix_C.num_rows : 5 * COOMatrix_C.num_cols : 5 * * @param coos The input list of coo matrix. * @param src_offset A list of integers recording src vertix id offset of each * Matrix in coos * @param src_offset A list of integers recording dst vertix id offset of each * Matrix in coos * @return The combined COOMatrix. */ COOMatrix DisjointUnionCoo(const std::vector& coos); /** * @brief COOMatrix toSimple. * * A = [[0, 0, 0], * [3, 0, 2], * [1, 1, 0], * [0, 0, 4]] * * B, cnt, edge_map = COOToSimple(A) * * B = [[0, 0, 0], * [1, 0, 1], * [1, 1, 0], * [0, 0, 1]] * cnt = [3, 2, 1, 1, 4] * edge_map = [0, 0, 0, 1, 1, 2, 3, 4, 4, 4, 4] * * @return The simplified COOMatrix * The count recording the number of duplicated edges from the original * graph. The edge mapping from the edge IDs of original graph to those of the * returned graph. */ std::tuple COOToSimple(const COOMatrix& coo); /** * @brief Split a COOMatrix into multiple disjoin components. * * Examples: * * C = [[0, 0, 1, 0, 0], * [1, 0, 1, 0, 0], * [0, 1, 0, 0, 0], * [0, 0, 0, 0, 0], * [0, 0, 0, 1, 0], * [0, 0, 0, 0, 1]] * COOMatrix_C.num_rows : 6 * COOMatrix_C.num_cols : 5 * * batch_size : 2 * edge_cumsum : [0, 4, 6] * src_vertex_cumsum : [0, 3, 6] * dst_vertex_cumsum : [0, 3, 5] * * ret = DisjointPartitionCooBySizes(C, * batch_size, * edge_cumsum, * src_vertex_cumsum, * dst_vertex_cumsum) * * A = [[0, 0, 1], * [1, 0, 1], * [0, 1, 0]] * COOMatrix_A.num_rows : 3 * COOMatrix_A.num_cols : 3 * * B = [[0, 0], * [1, 0], * [0, 1]] * COOMatrix_B.num_rows : 3 * COOMatrix_B.num_cols : 2 * * @param coo COOMatrix to split. * @param batch_size Number of disjoin components (Sub COOMatrix) * @param edge_cumsum Number of edges of each components * @param src_vertex_cumsum Number of src vertices of each component. * @param dst_vertex_cumsum Number of dst vertices of each component. * @return A list of COOMatrixes representing each disjoint components. */ std::vector DisjointPartitionCooBySizes( const COOMatrix& coo, const uint64_t batch_size, const std::vector& edge_cumsum, const std::vector& src_vertex_cumsum, const std::vector& dst_vertex_cumsum); /** * @brief Slice a contiguous chunk from a COOMatrix * * Examples: * * C = [[0, 0, 1, 0, 0], * [1, 0, 1, 0, 0], * [0, 1, 0, 0, 0], * [0, 0, 0, 0, 0], * [0, 0, 0, 1, 0], * [0, 0, 0, 0, 1]] * COOMatrix_C.num_rows : 6 * COOMatrix_C.num_cols : 5 * * edge_range : [4, 6] * src_vertex_range : [3, 6] * dst_vertex_range : [3, 5] * * ret = COOSliceContiguousChunk(C, * edge_range, * src_vertex_range, * dst_vertex_range) * * ret = [[0, 0], * [1, 0], * [0, 1]] * COOMatrix_ret.num_rows : 3 * COOMatrix_ret.num_cols : 2 * * @param coo COOMatrix to slice. * @param edge_range ID range of the edges in the chunk * @param src_vertex_range ID range of the src vertices in the chunk. * @param dst_vertex_range ID range of the dst vertices in the chunk. * @return COOMatrix representing the chunk. */ COOMatrix COOSliceContiguousChunk( const COOMatrix& coo, const std::vector& edge_range, const std::vector& src_vertex_range, const std::vector& dst_vertex_range); /** * @brief Create a LineGraph of input coo * * A = [[0, 0, 1], * [1, 0, 1], * [1, 1, 0]] * A.row = [0, 1, 1, 2, 2] * A.col = [2, 0, 2, 0, 1] * A.eid = [0, 1, 2, 3, 4] * * B = COOLineGraph(A, backtracking=False) * * B = [[0, 0, 0, 0, 1], * [1, 0, 0, 0, 0], * [0, 0, 0, 1, 0], * [0, 0, 0, 0, 0], * [0, 1, 0, 0, 0]] * * C = COOLineGraph(A, backtracking=True) * * C = [[0, 0, 0, 1, 1], * [1, 0, 0, 0, 0], * [0, 0, 0, 1, 1], * [1, 0, 0, 0, 0], * [0, 1, 1, 0, 0]] * * @param coo COOMatrix to create the LineGraph * @param backtracking whether the pair of (v, u) (u, v) edges are treated as * linked * @return LineGraph in COO format */ COOMatrix COOLineGraph(const COOMatrix& coo, bool backtracking); /** * @brief Generalized Sparse Matrix-Matrix Multiplication on COO. * @param op The binary operator, could be `add`, `sub', `mul`, 'div', * `copy_u`, `copy_e'. * @param op The reduce operator, could be `sum`, `min`, `max'. * @param coo The COO we apply SpMM on. * @param ufeat The source node feature. * @param efeat The edge feature. * @param out The output feature on destination nodes. * @param out_aux A list of NDArray's that contains auxiliary information such * as the argmax on source nodes and edges for reduce operators such as * `min` and `max`. */ void COOSpMM( const std::string& op, const std::string& reduce, const COOMatrix& coo, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); /** @brief COOSpMM C interface without std::string. */ void COOSpMM( const char* op, const char* reduce, const COOMatrix& coo, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); /** * @brief Generalized Sampled Dense-Dense Matrix Multiplication on COO. * @param op The binary operator, could be `add`, `sub', `mul`, 'div', * `dot`, `copy_u`, `copy_e'. * @param coo The COO we apply SpMM on. * @param ufeat The source node feature. * @param vfeat The destination node feature. * @param out The output feature on edge. * @param lhs_target Type of `ufeat` (0: source, 1: edge, 2: destination). * @param rhs_target Type of `ufeat` (0: source, 1: edge, 2: destination). */ void COOSDDMM( const std::string& op, const COOMatrix& coo, NDArray ufeat, NDArray efeat, NDArray out, int lhs_target, int rhs_target); /** @brief COOSDDMM C interface without std::string. */ void COOSDDMM( const char* op, const COOMatrix& coo, NDArray ufeat, NDArray efeat, NDArray out, int lhs_target, int rhs_target); } // namespace aten } // namespace dgl namespace dmlc { DMLC_DECLARE_TRAITS(has_saveload, dgl::aten::COOMatrix, true); } // namespace dmlc #endif // DGL_ATEN_COO_H_ ================================================ FILE: include/dgl/aten/csr.h ================================================ /** * Copyright (c) 2020-2022 by Contributors * @file dgl/aten/csr.h * @brief Common CSR operations required by DGL. */ #ifndef DGL_ATEN_CSR_H_ #define DGL_ATEN_CSR_H_ #include #include #include #include #include #include #include "./array_ops.h" #include "./macro.h" #include "./spmat.h" #include "./types.h" namespace dgl { namespace aten { struct COOMatrix; /** * @brief Plain CSR matrix * * The column indices are 0-based and are not necessarily sorted. The data array * stores integer ids for reading edge features. * * Note that we do allow duplicate non-zero entries -- multiple non-zero entries * that have the same row, col indices. It corresponds to multigraph in * graph terminology. */ constexpr uint64_t kDGLSerialize_AtenCsrMatrixMagic = 0xDD6cd31205dff127; struct CSRMatrix { /** @brief the dense shape of the matrix */ int64_t num_rows = 0, num_cols = 0; /** @brief CSR index arrays */ IdArray indptr, indices; /** @brief data index array. When is null, assume it is from 0 to NNZ - 1. */ IdArray data; /** @brief whether the column indices per row are sorted */ bool sorted = false; /** @brief whether the matrix is in pinned memory */ bool is_pinned = false; /** @brief default constructor */ CSRMatrix() = default; /** @brief constructor */ CSRMatrix( int64_t nrows, int64_t ncols, IdArray parr, IdArray iarr, IdArray darr = NullArray(), bool sorted_flag = false) : num_rows(nrows), num_cols(ncols), indptr(parr), indices(iarr), data(darr), sorted(sorted_flag) { CheckValidity(); } /** @brief constructor from SparseMatrix object */ explicit CSRMatrix(const SparseMatrix& spmat) : num_rows(spmat.num_rows), num_cols(spmat.num_cols), indptr(spmat.indices[0]), indices(spmat.indices[1]), data(spmat.indices[2]), sorted(spmat.flags[0]) { CheckValidity(); } // Convert to a SparseMatrix object that can return to python. SparseMatrix ToSparseMatrix() const { return SparseMatrix( static_cast(SparseFormat::kCSR), num_rows, num_cols, {indptr, indices, data}, {sorted}); } bool Load(dmlc::Stream* fs) { uint64_t magicNum; CHECK(fs->Read(&magicNum)) << "Invalid Magic Number"; CHECK_EQ(magicNum, kDGLSerialize_AtenCsrMatrixMagic) << "Invalid CSRMatrix Data"; CHECK(fs->Read(&num_cols)) << "Invalid num_cols"; CHECK(fs->Read(&num_rows)) << "Invalid num_rows"; CHECK(fs->Read(&indptr)) << "Invalid indptr"; CHECK(fs->Read(&indices)) << "Invalid indices"; CHECK(fs->Read(&data)) << "Invalid data"; CHECK(fs->Read(&sorted)) << "Invalid sorted"; CheckValidity(); return true; } void Save(dmlc::Stream* fs) const { fs->Write(kDGLSerialize_AtenCsrMatrixMagic); fs->Write(num_cols); fs->Write(num_rows); fs->Write(indptr); fs->Write(indices); fs->Write(data); fs->Write(sorted); } inline void CheckValidity() const { CHECK_SAME_DTYPE(indptr, indices); CHECK_SAME_CONTEXT(indptr, indices); if (!aten::IsNullArray(data)) { CHECK_SAME_DTYPE(indptr, data); CHECK_SAME_CONTEXT(indptr, data); } CHECK_NO_OVERFLOW(indptr->dtype, num_rows); CHECK_NO_OVERFLOW(indptr->dtype, num_cols); CHECK_EQ(indptr->shape[0], num_rows + 1); } inline bool IsEmpty() const { return aten::IsNullArray(indptr) && aten::IsNullArray(indices) && aten::IsNullArray(data); } // Check and update the internal flag is_pinned. // This function will initialize a cuda context. inline bool CheckIfPinnedInCUDA() { is_pinned = (aten::IsNullArray(indptr) || indptr.IsPinned()) && (aten::IsNullArray(indices) || indices.IsPinned()) && (aten::IsNullArray(data) || data.IsPinned()); return is_pinned; } /** @brief Return a copy of this matrix on the give device context. */ inline CSRMatrix CopyTo(const DGLContext& ctx) const { if (ctx == indptr->ctx) return *this; return CSRMatrix( num_rows, num_cols, indptr.CopyTo(ctx), indices.CopyTo(ctx), aten::IsNullArray(data) ? data : data.CopyTo(ctx), sorted); } /** @brief Return a copy of this matrix in pinned (page-locked) memory. */ inline CSRMatrix PinMemory() { if (!IsEmpty()) { if (is_pinned) return *this; auto new_csr = CSRMatrix( num_rows, num_cols, indptr.PinMemory(), indices.PinMemory(), aten::IsNullArray(data) ? data : data.PinMemory(), sorted); CHECK(new_csr.CheckIfPinnedInCUDA()) << "An internal DGL error has occured while trying to pin a CSR " "matrix. Please file a bug at " "'https://github.com/dmlc/dgl/issues' " "with the above stacktrace."; return new_csr; } is_pinned = true; return *this; } /** * @brief Pin the indptr, indices and data (if not Null) of the matrix. * @note This is an in-place method. Behavior depends on the current context, * kDGLCPU: will be pinned; * IsPinned: directly return; * kDGLCUDA: invalid, will throw an error. * The context check is deferred to pinning the NDArray. */ inline void PinMemory_() { if (!IsEmpty()) { if (is_pinned) return; indptr.PinMemory_(); indices.PinMemory_(); if (!aten::IsNullArray(data)) { data.PinMemory_(); } is_pinned = true; } is_pinned = true; return; } /** * @brief Unpin the indptr, indices and data (if not Null) of the matrix. * @note This is an in-place method. Behavior depends on the current context, * IsPinned: will be unpinned; * others: directly return. * The context check is deferred to unpinning the NDArray. */ inline void UnpinMemory_() { if (!IsEmpty()) { if (!is_pinned) return; indptr.UnpinMemory_(); indices.UnpinMemory_(); if (!aten::IsNullArray(data)) { data.UnpinMemory_(); } is_pinned = false; } is_pinned = false; return; } /** * @brief Record stream for the indptr, indices and data (if not Null) of the * matrix. * @param stream The stream that is using the graph */ inline void RecordStream(DGLStreamHandle stream) const { indptr.RecordStream(stream); indices.RecordStream(stream); if (!aten::IsNullArray(data)) { data.RecordStream(stream); } } }; ///////////////////////// CSR routines ////////////////////////// /** @brief Return true if the value (row, col) is non-zero */ bool CSRIsNonZero(CSRMatrix, int64_t row, int64_t col); /** * @brief Batched implementation of CSRIsNonZero. * @note This operator allows broadcasting (i.e, either row or col can be of * length 1). */ runtime::NDArray CSRIsNonZero( CSRMatrix, runtime::NDArray row, runtime::NDArray col); /** @brief Return the nnz of the given row */ int64_t CSRGetRowNNZ(CSRMatrix, int64_t row); runtime::NDArray CSRGetRowNNZ(CSRMatrix, runtime::NDArray row); /** @brief Return the column index array of the given row */ runtime::NDArray CSRGetRowColumnIndices(CSRMatrix, int64_t row); /** @brief Return the data array of the given row */ runtime::NDArray CSRGetRowData(CSRMatrix, int64_t row); /** @brief Whether the CSR matrix contains data */ inline bool CSRHasData(CSRMatrix csr) { return !IsNullArray(csr.data); } /** @brief Whether the column indices of each row is sorted. */ bool CSRIsSorted(CSRMatrix csr); /** * @brief Get the data and the row,col indices for each returned entries. * * The operator supports matrix with duplicate entries and all the matched * entries will be returned. The operator assumes there is NO duplicate (row, * col) pair in the given input. Otherwise, the returned result is undefined. * * If some (row, col) pairs do not contain a valid non-zero elements, * they will not be included in the return arrays. * * @note This operator allows broadcasting (i.e, either row or col can be of * length 1). * @param mat Sparse matrix * @param rows Row index * @param cols Column index * @return Three arrays {rows, cols, data} */ std::vector CSRGetDataAndIndices( CSRMatrix, runtime::NDArray rows, runtime::NDArray cols); /** * @brief Get data. The return type is an ndarray due to possible duplicate * entries. */ inline runtime::NDArray CSRGetAllData(CSRMatrix mat, int64_t row, int64_t col) { const auto& nbits = mat.indptr->dtype.bits; const auto& ctx = mat.indptr->ctx; IdArray rows = VecToIdArray({row}, nbits, ctx); IdArray cols = VecToIdArray({col}, nbits, ctx); const auto& rst = CSRGetDataAndIndices(mat, rows, cols); return rst[2]; } /** * @brief Get the data for each (row, col) pair. * * The operator supports matrix with duplicate entries but only one matched * entry will be returned for each (row, col) pair. Support duplicate input * (row, col) pairs. * * If some (row, col) pairs do not contain a valid non-zero elements, * their data values are filled with -1. * * @note This operator allows broadcasting (i.e, either row or col can be of * length 1). * * @param mat Sparse matrix. * @param rows Row index. * @param cols Column index. * @return Data array. The i^th element is the data of (rows[i], cols[i]) */ runtime::NDArray CSRGetData( CSRMatrix, runtime::NDArray rows, runtime::NDArray cols); /** * @brief Get the data for each (row, col) pair, then index into the weights * array. * * The operator supports matrix with duplicate entries but only one matched * entry will be returned for each (row, col) pair. Support duplicate input * (row, col) pairs. * * If some (row, col) pairs do not contain a valid non-zero elements to index * into the weights array, DGL returns the value \a filler for that pair * instead. * * @note This operator allows broadcasting (i.e, either row or col can be of * length 1). * * @tparam DType the data type of the weights array. * @param mat Sparse matrix. * @param rows Row index. * @param cols Column index. * @param weights The weights array. * @param filler The value to return for row-column pairs not existent in the * matrix. * @return Data array. The i^th element is the data of (rows[i], cols[i]) */ template runtime::NDArray CSRGetData( CSRMatrix, runtime::NDArray rows, runtime::NDArray cols, runtime::NDArray weights, DType filler); /** * @brief Get the data for each (row, col) pair, then index into the weights * array. * * The operator supports matrix with duplicate entries but only one matched * entry will be returned for each (row, col) pair. Support duplicate input * (row, col) pairs. * * If some (row, col) pairs do not contain a valid non-zero elements to index * into the weights array, DGL returns the value \a filler for that pair * instead. * * @note This operator allows broadcasting (i.e, either row or col can be of * length 1). * @note This is the floating point number version of `CSRGetData`, which removes the dtype template. * * @param mat Sparse matrix. * @param rows Row index. * @param cols Column index. * @param weights The weights array. * @param filler The value to return for row-column pairs not existent in the * matrix. * @return Data array. The i^th element is the data of (rows[i], cols[i]) */ runtime::NDArray CSRGetFloatingData( CSRMatrix, runtime::NDArray rows, runtime::NDArray cols, runtime::NDArray weights, double filler); /** @brief Return a transposed CSR matrix */ CSRMatrix CSRTranspose(CSRMatrix csr); /** * @brief Convert CSR matrix to COO matrix. * * Complexity: O(nnz) * * - If data_as_order is false, the column and data arrays of the * result COO are equal to the indices and data arrays of the * input CSR. The result COO is also row sorted. * - If the input CSR is further sorted, the result COO is also * column sorted. * * @param csr Input csr matrix * @param data_as_order If true, the data array in the input csr matrix contains * the order by which the resulting COO tuples are stored. In this case, the * data array of the resulting COO matrix will be empty * because it is essentially a consecutive range. * @return a coo matrix */ COOMatrix CSRToCOO(CSRMatrix csr, bool data_as_order); /** * @brief Slice rows of the given matrix and return. * * The sliced row IDs are relabeled to starting from zero. * * Examples: * num_rows = 4 * num_cols = 4 * indptr = [0, 2, 3, 3, 5] * indices = [1, 0, 2, 3, 1] * * After CSRSliceRows(csr, 1, 3) * * num_rows = 2 * num_cols = 4 * indptr = [0, 1, 1] * indices = [2] * * @param csr CSR matrix * @param start Start row id (inclusive) * @param end End row id (exclusive) * @return sliced rows stored in a CSR matrix */ CSRMatrix CSRSliceRows(CSRMatrix csr, int64_t start, int64_t end); CSRMatrix CSRSliceRows(CSRMatrix csr, runtime::NDArray rows); /** * @brief Get the submatrix specified by the row and col ids. * * In numpy notation, given matrix M, row index array I, col index array J * This function returns the submatrix M[I, J]. It assumes that there is no * duplicate (row, col) pair in the given indices. M could have duplicate * entries. * * The sliced row and column IDs are relabeled according to the given * rows and cols (i.e., row #0 in the new matrix corresponds to rows[0] in * the original matrix). * * @param csr The input csr matrix * @param rows The row index to select * @param cols The col index to select * @return submatrix */ CSRMatrix CSRSliceMatrix( CSRMatrix csr, runtime::NDArray rows, runtime::NDArray cols); /** @return True if the matrix has duplicate entries */ bool CSRHasDuplicate(CSRMatrix csr); /** * @brief Sort the column index at each row in ascending order in-place. * * Only the indices and data arrays (if available) will be mutated. The indptr * array stays the same. * * Examples: * num_rows = 4 * num_cols = 4 * indptr = [0, 2, 3, 3, 5] * indices = [1, 0, 2, 3, 1] * * After CSRSort_(&csr) * * indptr = [0, 2, 3, 3, 5] * indices = [0, 1, 1, 2, 3] */ void CSRSort_(CSRMatrix* csr); /** * @brief Sort the column index at each row in ascending order. * * Return a new CSR matrix with sorted column indices and data arrays. */ inline CSRMatrix CSRSort(CSRMatrix csr) { if (csr.sorted) return csr; CSRMatrix ret( csr.num_rows, csr.num_cols, csr.indptr, csr.indices.Clone(), CSRHasData(csr) ? csr.data.Clone() : csr.data, csr.sorted); CSRSort_(&ret); return ret; } /** * @brief Reorder the rows and colmns according to the new row and column order. * @param csr The input csr matrix. * @param new_row_ids the new row Ids (the index is the old row Id) * @param new_col_ids the new column Ids (the index is the old col Id). */ CSRMatrix CSRReorder( CSRMatrix csr, runtime::NDArray new_row_ids, runtime::NDArray new_col_ids); /** * @brief Remove entries from CSR matrix by entry indices (data indices) * @return A new CSR matrix as well as a mapping from the new CSR entries to the * old CSR entries. */ CSRMatrix CSRRemove(CSRMatrix csr, IdArray entries); /** * @brief Randomly select a fixed number of non-zero entries along each given * row using arXiv:2210.13339, Labor sampling. * * The picked indices are returned in the form of a COO matrix. * * The passed random_seed makes it so that for any seed vertex s and its * neighbor t, the rolled random variate r_t is the same for any call to this * function with the same random seed. When sampling as part of the same batch, * one would want identical seeds so that LABOR can globally sample. One example * is that for heterogenous graphs, there is a single random seed passed for * each edge type. This will sample much fewer vertices compared to having * unique random seeds for each edge type. If one called this function * individually for each edge type for a heterogenous graph with different * random seeds, then it would run LABOR locally for each edge type, resulting * into a larger number of vertices being sampled. * * If this function is called without a random_seed, we get the random seed by * getting a random number from DGL. * * * Examples: * * // csr.num_rows = 4; * // csr.num_cols = 4; * // csr.indptr = [0, 2, 3, 3, 5] * // csr.indices = [0, 1, 1, 2, 3] * // csr.data = [2, 3, 0, 1, 4] * CSRMatrix csr = ...; * IdArray rows = ... ; // [1, 3] * COOMatrix sampled = CSRLaborSampling(csr, rows, 2, NullArray(), 0, \ * NullArray(), NullArray()); * // possible sampled coo matrix: * // sampled.num_rows = 4 * // sampled.num_cols = 4 * // sampled.rows = [1, 3, 3] * // sampled.cols = [1, 2, 3] * // sampled.data = [3, 0, 4] * * @param mat Input CSR matrix. * @param rows Rows to sample from. * @param num_samples Number of samples using labor sampling * @param prob Probability array for nonuniform sampling * @param importance_sampling Whether to enable importance sampling * @param random_seed The random seed for the sampler * @param seed2_contribution The contribution of the second random seed, [0, 1) * @param NIDs global nids if sampling from a subgraph * @return A pair of COOMatrix storing the picked row and col indices and edge * weights if importance_sampling != 0 or prob argument was passed. Its * data field stores the the index of the picked elements in the value * array. */ std::pair CSRLaborSampling( CSRMatrix mat, IdArray rows, int64_t num_samples, FloatArray prob = NullArray(), int importance_sampling = 0, IdArray random_seed = NullArray(), float seed2_contribution = 0, IdArray NIDs = NullArray()); /*! * @brief Randomly select a fixed number of non-zero entries along each given * row independently. * * The function performs random choices along each row independently. * The picked indices are returned in the form of a COO matrix. * * If replace is false and a row has fewer non-zero values than num_samples, * all the values are picked. * * Examples: * * // csr.num_rows = 4; * // csr.num_cols = 4; * // csr.indptr = [0, 2, 3, 3, 5] * // csr.indices = [0, 1, 1, 2, 3] * // csr.data = [2, 3, 0, 1, 4] * CSRMatrix csr = ...; * IdArray rows = ... ; // [1, 3] * COOMatrix sampled = CSRRowWiseSampling(csr, rows, 2, FloatArray(), false); * // possible sampled coo matrix: * // sampled.num_rows = 4 * // sampled.num_cols = 4 * // sampled.rows = [1, 3, 3] * // sampled.cols = [1, 2, 3] * // sampled.data = [3, 0, 4] * * @param mat Input CSR matrix. * @param rows Rows to sample from. * @param num_samples Number of samples * @param prob_or_mask Unnormalized probability array or mask array. * Should be of the same length as the data array. * If an empty array is provided, assume uniform. * @param replace True if sample with replacement * @return A COOMatrix storing the picked row, col and data indices. * @note The edges of the entire graph must be ordered by their edge types. */ COOMatrix CSRRowWiseSampling( CSRMatrix mat, IdArray rows, int64_t num_samples, NDArray prob_or_mask = NDArray(), bool replace = true); /*! * @brief Randomly select a fixed number of non-zero entries along each given * row independently. * * The function performs random choices along each row independently. * The picked indices are returned in the form of a CSR matrix, with * additional IdArray that is an extended version of CSR's index pointers. * * With template parameter set to True rows are also saved as new seed nodes and * mapped * * If replace is false and a row has fewer non-zero values than num_samples, * all the values are picked. * * Examples: * * // csr.num_rows = 4; * // csr.num_cols = 4; * // csr.indptr = [0, 2, 3, 3, 5] * // csr.indices = [0, 1, 1, 2, 3] * // csr.data = [2, 3, 0, 1, 4] * CSRMatrix csr = ...; * IdArray rows = ... ; // [1, 3] * IdArray seed_mapping = [-1, -1, -1, -1]; * std::vector new_seed_nodes = {}; * * std::pair sampled = CSRRowWiseSamplingFused< * typename IdType, True>( * csr, rows, seed_mapping, * new_seed_nodes, 2, * FloatArray(), false); * // possible sampled csr matrix: * // sampled.first.num_rows = 2 * // sampled.first.num_cols = 3 * // sampled.first.indptr = [0, 1, 3] * // sampled.first.indices = [1, 2, 3] * // sampled.first.data = [0, 1, 4] * // sampled.second = [0, 1, 1] * // seed_mapping = [-1, 0, -1, 1]; * // new_seed_nodes = {1, 3}; * * @tparam IdType Graph's index data type, can be int32_t or int64_t * @tparam map_seed_nodes If set for true we map and copy rows to new_seed_nodes * @param mat Input CSR matrix. * @param rows Rows to sample from. * @param seed_mapping Mapping array used if map_seed_nodes=true. If so each row * from rows will be set to its position e.g. mapping[rows[i]] = i. * @param new_seed_nodes Vector used if map_seed_nodes=true. If so it will * contain rows. * @param rows Rows to sample from. * @param num_samples Number of samples * @param prob_or_mask Unnormalized probability array or mask array. * Should be of the same length as the data array. * If an empty array is provided, assume uniform. * @param replace True if sample with replacement * @return A CSRMatrix storing the picked row, col and data indices, * COO version of picked rows * @note The edges of the entire graph must be ordered by their edge types, * rows must be unique */ template std::pair CSRRowWiseSamplingFused( CSRMatrix mat, IdArray rows, IdArray seed_mapping, std::vector* new_seed_nodes, int64_t num_samples, NDArray prob_or_mask = NDArray(), bool replace = true); /** * @brief Randomly select a fixed number of non-zero entries for each edge type * along each given row independently. * * The function performs random choices along each row independently. * In each row, num_samples samples is picked for each edge type. (The edge * type is stored in etypes) * The picked indices are returned in the form of a COO matrix. * * If replace is false and a row has fewer non-zero values than num_samples, * all the values are picked. * * Examples: TODO * * // csr.num_rows = 4; * // csr.num_cols = 4; * // csr.indptr = [0, 4, 4, 4, 5] * // csr.cols = [0, 1, 3, 2, 3] * // csr.data = [2, 3, 0, 1, 4] * // eid2etype_offset = [0, 3, 4, 5] * CSRMatrix csr = ...; * IdArray rows = ... ; // [0, 3] * std::vector num_samples = {2, 2, 2}; * COOMatrix sampled = CSRRowWisePerEtypeSampling(csr, rows, eid2etype_offset, * num_samples, FloatArray(), false); * // possible sampled coo matrix: * // sampled.num_rows = 4 * // sampled.num_cols = 4 * // sampled.rows = [0, 0, 0, 3] * // sampled.cols = [0, 3, 2, 3] * // sampled.data = [2, 0, 1, 4] * * @param mat Input CSR matrix. * @param rows Rows to sample from. * @param eid2etype_offset The offset to each edge type. * @param num_samples Number of samples to choose per edge type. * @param prob_or_mask Unnormalized probability array or mask array. * Should be of the same length as the data array. * If an empty array is provided, assume uniform. * @param replace True if sample with replacement * @param rowwise_etype_sorted whether the CSR column indices per row are * ordered by edge type. * @return A COOMatrix storing the picked row, col and data indices. * @note The edges must be ordered by their edge types. */ COOMatrix CSRRowWisePerEtypeSampling( CSRMatrix mat, IdArray rows, const std::vector& eid2etype_offset, const std::vector& num_samples, const std::vector& prob_or_mask, bool replace = true, bool rowwise_etype_sorted = false); /** * @brief Select K non-zero entries with the largest weights along each given * row. * * The function performs top-k selection along each row independently. * The picked indices are returned in the form of a COO matrix. * * If replace is false and a row has fewer non-zero values than k, * all the values are picked. * * Examples: * * // csr.num_rows = 4; * // csr.num_cols = 4; * // csr.indptr = [0, 2, 3, 3, 5] * // csr.indices = [0, 1, 1, 2, 3] * // csr.data = [2, 3, 0, 1, 4] * CSRMatrix csr = ...; * IdArray rows = ... ; // [0, 1, 3] * FloatArray weight = ... ; // [1., 0., -1., 10., 20.] * COOMatrix sampled = CSRRowWiseTopk(csr, rows, 1, weight); * // possible sampled coo matrix: * // sampled.num_rows = 4 * // sampled.num_cols = 4 * // sampled.rows = [0, 1, 3] * // sampled.cols = [1, 1, 2] * // sampled.data = [3, 0, 1] * * @param mat Input CSR matrix. * @param rows Rows to sample from. * @param k The K value. * @param weight Weight associated with each entry. Should be of the same length * as the data array. If an empty array is provided, assume uniform. * @param ascending If true, elements are sorted by ascending order, equivalent * to find the K smallest values. Otherwise, find K largest values. * @return A COOMatrix storing the picked row and col indices. Its data field * stores the the index of the picked elements in the value array. */ COOMatrix CSRRowWiseTopk( CSRMatrix mat, IdArray rows, int64_t k, FloatArray weight, bool ascending = false); /** * @brief Randomly select a fixed number of non-zero entries along each given * row independently, where the probability of columns to be picked can be * biased according to its tag. * * Each column is assigned an integer tag which determines its probability to be * sampled. Users can assign different probability to different tags. * * This function only works with a CSR matrix sorted according to the tag so * that entries with the same column tag are arranged in a consecutive range, * and the input `tag_offset` represents the boundaries of these ranges. * However, the function itself will not check if the input matrix has been * sorted. It's the caller's responsibility to ensure the input matrix has been * sorted by `CSRSortByTag` (it will also return a NDArray `tag_offset` which * should be used as an input of this function). * * The picked indices are returned in the form of a COO matrix. * * If replace is false and a row has fewer non-zero values than num_samples, * all the values are picked. * * Examples: * * // csr.num_rows = 4; * // csr.num_cols = 4; * // csr.indptr = [0, 2, 4, 5, 5] * // csr.indices = [1, 2, 2, 3, 3] * // tag of each element's column: 0, 0, 0, 1, 1 * // tag_offset = [[0, 2, 2], [0, 1, 2], [0, 0, 1]] * // csr.data = [2, 3, 0, 1, 4] * // bias = [1.0, 0.0] * CSRMatrix mat = ...; * IdArray rows = ...; //[0, 1] * NDArray tag_offset = ...; * FloatArray bias = ...; * COOMatrix sampled = CSRRowWiseSamplingBiased(mat, rows, 1, bias); * // possible sampled coo matrix: * // sampled.num_rows = 4 * // sampled.num_cols = 4 * // sampled.rows = [0, 1] * // sampled.cols = [1, 2] * // sampled.data = [2, 0] * // Note that in this case, for row 1, the column 3 will never be picked as it * has tag 1 and the * // probability of tag 1 is 0. * * * @param mat Input CSR matrix. * @param rows Rows to sample from. * @param num_samples Number of samples. * @param tag_offset The boundaries of tags. Should be of the shape [num_row, * num_tags+1] * @param bias Unnormalized probability array. Should be of length num_tags * @param replace True if sample with replacement * @return A COOMatrix storing the picked row and col indices. Its data field * stores the the index of the picked elements in the value array. * */ COOMatrix CSRRowWiseSamplingBiased( CSRMatrix mat, IdArray rows, int64_t num_samples, NDArray tag_offset, FloatArray bias, bool replace = true); /** * @brief Uniformly sample row-column pairs whose entries do not exist in the * given sparse matrix using rejection sampling. * * @note The number of samples returned may not necessarily be the number of * samples given. * * @param csr The CSR matrix. * @param num_samples The number of samples. * @param num_trials The number of trials. * @param exclude_self_loops Do not include the examples where the row equals * the column. * @param replace Whether to sample with replacement. * @param redundancy How much redundant negative examples to take in case of * duplicate examples. * @return A pair of row and column tensors. */ std::pair CSRGlobalUniformNegativeSampling( const CSRMatrix& csr, int64_t num_samples, int num_trials, bool exclude_self_loops, bool replace, double redundancy); /** * @brief Sort the column index according to the tag of each column. * * Example: * indptr = [0, 5, 8] * indices = [0, 1, 2, 3, 4, 0, 1, 2] * * tag = [1, 1, 0, 2, 0] * * After CSRSortByTag * * indptr = [0, 5, 8] * indices = [2, 4, 0, 1, 3, 2, 0, 1] * (tag) = [0, 0, 1, 1, 2, 0, 1, 1] * ^ ^ ^ ^ * ^ ^ ^^ * (the tag array itself is unchanged.) * * Return: * [[0, 2, 4, 5], [0, 1, 3, 3]] (marked with ^) * * @param csr The csr matrix to be sorted * @param tag_array Tag of each column. IdArray with length num_cols * @param num_tags Number of tags. It should be equal to max(tag_array)+1. * @return 1. A sorted copy of the given CSR matrix * 2. The split positions of different tags. NDArray of shape (num_rows, * num_tags + 1) */ std::pair CSRSortByTag( const CSRMatrix& csr, const IdArray tag_array, int64_t num_tags); /** * @brief Union two CSRMatrix into one CSRMatrix. * * Two Matrix must have the same shape. * * Example: * * A = [[0, 0, 1, 0], * [1, 0, 1, 1], * [0, 1, 0, 0]] * * B = [[0, 1, 1, 0], * [0, 0, 0, 1], * [0, 0, 1, 0]] * * CSRMatrix_A.num_rows : 3 * CSRMatrix_A.num_cols : 4 * CSRMatrix_B.num_rows : 3 * CSRMatrix_B.num_cols : 4 * * C = UnionCsr({A, B}); * * C = [[0, 1, 2, 0], * [1, 0, 1, 2], * [0, 1, 1, 0]] * * CSRMatrix_C.num_rows : 3 * CSRMatrix_C.num_cols : 4 */ CSRMatrix UnionCsr(const std::vector& csrs); /** * @brief Union a list CSRMatrix into one CSRMatrix. * * Examples: * * A = [[0, 0, 1], * [1, 0, 1], * [0, 1, 0]] * * B = [[0, 0], * [1, 0]] * * CSRMatrix_A.num_rows : 3 * CSRMatrix_A.num_cols : 3 * CSRMatrix_B.num_rows : 2 * CSRMatrix_B.num_cols : 2 * * C = DisjointUnionCsr({A, B}); * * C = [[0, 0, 1, 0, 0], * [1, 0, 1, 0, 0], * [0, 1, 0, 0, 0], * [0, 0, 0, 0, 0], * [0, 0, 0, 1, 0]] * CSRMatrix_C.num_rows : 5 * CSRMatrix_C.num_cols : 5 * * @param csrs The input list of csr matrix. * @param src_offset A list of integers recording src vertix id offset of each * Matrix in csrs * @param src_offset A list of integers recording dst vertix id offset of each * Matrix in csrs * @return The combined CSRMatrix. */ CSRMatrix DisjointUnionCsr(const std::vector& csrs); /** * @brief CSRMatrix toSimple. * * A = [[0, 0, 0], * [3, 0, 2], * [1, 1, 0], * [0, 0, 4]] * * B, cnt, edge_map = CSRToSimple(A) * * B = [[0, 0, 0], * [1, 0, 1], * [1, 1, 0], * [0, 0, 1]] * cnt = [3, 2, 1, 1, 4] * edge_map = [0, 0, 0, 1, 1, 2, 3, 4, 4, 4, 4] * * @return The simplified CSRMatrix * The count recording the number of duplicated edges from the original * graph. The edge mapping from the edge IDs of original graph to those of the * returned graph. */ std::tuple CSRToSimple(const CSRMatrix& csr); /** * @brief Split a CSRMatrix into multiple disjoint components. * * Examples: * * C = [[0, 0, 1, 0, 0], * [1, 0, 1, 0, 0], * [0, 1, 0, 0, 0], * [0, 0, 0, 0, 0], * [0, 0, 0, 1, 0], * [0, 0, 0, 0, 1]] * CSRMatrix_C.num_rows : 6 * CSRMatrix_C.num_cols : 5 * * batch_size : 2 * edge_cumsum : [0, 4, 6] * src_vertex_cumsum : [0, 3, 6] * dst_vertex_cumsum : [0, 3, 5] * * ret = DisjointPartitionCsrBySizes(C, * batch_size, * edge_cumsum, * src_vertex_cumsum, * dst_vertex_cumsum) * * A = [[0, 0, 1], * [1, 0, 1], * [0, 1, 0]] * CSRMatrix_A.num_rows : 3 * CSRMatrix_A.num_cols : 3 * * B = [[0, 0], * [1, 0], * [0, 1]] * CSRMatrix_B.num_rows : 3 * CSRMatrix_B.num_cols : 2 * * @param csr CSRMatrix to split. * @param batch_size Number of disjoin components (Sub CSRMatrix) * @param edge_cumsum Number of edges of each components * @param src_vertex_cumsum Number of src vertices of each component. * @param dst_vertex_cumsum Number of dst vertices of each component. * @return A list of CSRMatrixes representing each disjoint components. */ std::vector DisjointPartitionCsrBySizes( const CSRMatrix& csrs, const uint64_t batch_size, const std::vector& edge_cumsum, const std::vector& src_vertex_cumsum, const std::vector& dst_vertex_cumsum); /** * @brief Slice a contiguous chunk from a CSRMatrix * * Examples: * * C = [[0, 0, 1, 0, 0], * [1, 0, 1, 0, 0], * [0, 1, 0, 0, 0], * [0, 0, 0, 0, 0], * [0, 0, 0, 1, 0], * [0, 0, 0, 0, 1]] * CSRMatrix_C.num_rows : 6 * CSRMatrix_C.num_cols : 5 * * edge_range : [4, 6] * src_vertex_range : [3, 6] * dst_vertex_range : [3, 5] * * ret = CSRSliceContiguousChunk(C, * edge_range, * src_vertex_range, * dst_vertex_range) * * ret = [[0, 0], * [1, 0], * [0, 1]] * CSRMatrix_ret.num_rows : 3 * CSRMatrix_ret.num_cols : 2 * * @param csr CSRMatrix to slice. * @param edge_range ID range of the edges in the chunk * @param src_vertex_range ID range of the src vertices in the chunk. * @param dst_vertex_range ID range of the dst vertices in the chunk. * @return CSRMatrix representing the chunk. */ CSRMatrix CSRSliceContiguousChunk( const CSRMatrix& csr, const std::vector& edge_range, const std::vector& src_vertex_range, const std::vector& dst_vertex_range); /** * @brief Generalized Sparse Matrix-Matrix Multiplication on CSR. * @param op The binary operator, could be `add`, `sub', `mul`, 'div', * `copy_u`, `copy_e'. * @param op The reduce operator, could be `sum`, `min`, `max'. * @param csr The CSR we apply SpMM on. * @param ufeat The source node feature. * @param efeat The edge feature. * @param out The output feature on destination nodes. * @param out_aux A list of NDArray's that contains auxiliary information such * as the argmax on source nodes and edges for reduce operators such as * `min` and `max`. */ void CSRSpMM( const std::string& op, const std::string& reduce, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); /** @brief CSRSpMM C interface without std::string. */ void CSRSpMM( const char* op, const char* reduce, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); /** * @brief Generalized Sampled Dense-Dense Matrix Multiplication on CSR. * @param op The binary operator, could be `add`, `sub', `mul`, 'div', * `dot`, `copy_u`, `copy_e'. * @param csr The CSR we apply SpMM on. * @param ufeat The source node feature. * @param vfeat The destination node feature. * @param out The output feature on edge. * @param lhs_target Type of `ufeat` (0: source, 1: edge, 2: destination). * @param rhs_target Type of `ufeat` (0: source, 1: edge, 2: destination). */ void CSRSDDMM( const std::string& op, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out, int lhs_target, int rhs_target); /** @brief CSRSDDMM C interface without std::string. */ void CSRSDDMM( const char* op, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out, int lhs_target, int rhs_target); } // namespace aten } // namespace dgl namespace dmlc { DMLC_DECLARE_TRAITS(has_saveload, dgl::aten::CSRMatrix, true); } // namespace dmlc #endif // DGL_ATEN_CSR_H_ ================================================ FILE: include/dgl/aten/macro.h ================================================ /** * Copyright (c) 2020 by Contributors * @file dgl/aten/macro.h * @brief Common macros for aten package. */ #ifndef DGL_ATEN_MACRO_H_ #define DGL_ATEN_MACRO_H_ ///////////////////////// Dispatchers ////////////////////////// /** * Dispatch according to device: * * ATEN_XPU_SWITCH(array->ctx.device_type, XPU, { * // Now XPU is a placeholder for array->ctx.device_type * DeviceSpecificImplementation(...); * }); */ #define ATEN_XPU_SWITCH(val, XPU, op, ...) \ do { \ if ((val) == kDGLCPU) { \ constexpr auto XPU = kDGLCPU; \ { __VA_ARGS__ } \ } else { \ LOG(FATAL) << "Operator " << (op) << " does not support " \ << dgl::runtime::DeviceTypeCode2Str(val) << " device."; \ } \ } while (0) /** * Dispatch according to device: * * XXX(minjie): temporary macro that allows CUDA operator * * ATEN_XPU_SWITCH(array->ctx.device_type, XPU, { * // Now XPU is a placeholder for array->ctx.device_type * DeviceSpecificImplementation(...); * }); * * We treat pinned memory as normal host memory if we don't want * to enable CUDA UVA access for this operator */ #ifdef DGL_USE_CUDA #define ATEN_XPU_SWITCH_CUDA(val, XPU, op, ...) \ do { \ if ((val) == kDGLCPU) { \ constexpr auto XPU = kDGLCPU; \ { __VA_ARGS__ } \ } else if ((val) == kDGLCUDA) { \ constexpr auto XPU = kDGLCUDA; \ { __VA_ARGS__ } \ } else { \ LOG(FATAL) << "Operator " << (op) << " does not support " \ << dgl::runtime::DeviceTypeCode2Str(val) << " device."; \ } \ } while (0) #else // DGL_USE_CUDA #define ATEN_XPU_SWITCH_CUDA ATEN_XPU_SWITCH #endif // DGL_USE_CUDA /** * Dispatch according to integral type (either int32 or int64): * * ATEN_ID_TYPE_SWITCH(array->dtype, IdType, { * // Now IdType is the type corresponding to data type in array. * // For instance, one can do this for a CPU array: * DType *data = static_cast(array->data); * }); */ #define ATEN_ID_TYPE_SWITCH(val, IdType, ...) \ do { \ CHECK_EQ((val).code, kDGLInt) << "ID must be integer type"; \ if ((val).bits == 32) { \ typedef int32_t IdType; \ { __VA_ARGS__ } \ } else if ((val).bits == 64) { \ typedef int64_t IdType; \ { __VA_ARGS__ } \ } else { \ LOG(FATAL) << "ID can only be int32 or int64"; \ } \ } while (0) /** * Dispatch according to bits (either int32 or int64): * * ATEN_ID_BITS_SWITCH(bits, IdType, { * // Now IdType is the type corresponding to data type in array. * // For instance, one can do this for a CPU array: * DType *data = static_cast(array->data); * }); */ #define ATEN_ID_BITS_SWITCH(bits, IdType, ...) \ do { \ CHECK((bits) == 32 || (bits) == 64) << "bits must be 32 or 64"; \ if ((bits) == 32) { \ typedef int32_t IdType; \ { __VA_ARGS__ } \ } else if ((bits) == 64) { \ typedef int64_t IdType; \ { __VA_ARGS__ } \ } else { \ LOG(FATAL) << "ID can only be int32 or int64"; \ } \ } while (0) /** * Dispatch according to float type (either float32 or float64): * * ATEN_FLOAT_TYPE_SWITCH(array->dtype, FloatType, { * // Now FloatType is the type corresponding to data type in array. * // For instance, one can do this for a CPU array: * FloatType *data = static_cast(array->data); * }); */ #define ATEN_FLOAT_TYPE_SWITCH(val, FloatType, val_name, ...) \ do { \ CHECK_EQ((val).code, kDGLFloat) << (val_name) << " must be float type"; \ if ((val).bits == 32) { \ typedef float FloatType; \ { __VA_ARGS__ } \ } else if ((val).bits == 64) { \ typedef double FloatType; \ { __VA_ARGS__ } \ } else { \ LOG(FATAL) << (val_name) << " can only be float32 or float64"; \ } \ } while (0) /** * Dispatch according to float type, including 16bits * (float16/bfloat16/float32/float64). */ #ifdef DGL_USE_CUDA #if BF16_ENABLED #define ATEN_FLOAT_TYPE_SWITCH_16BITS(val, FloatType, XPU, val_name, ...) \ do { \ CHECK((val).code == kDGLFloat || (val.code == kDGLBfloat)) \ << (val_name) << " must be float type"; \ if ((val).bits == 32) { \ typedef float FloatType; \ { __VA_ARGS__ } \ } else if ((val).bits == 64) { \ typedef double FloatType; \ { __VA_ARGS__ } \ } else if ( \ XPU == kDGLCUDA && (val).bits == 16 && (val).code == kDGLFloat) { \ typedef __half FloatType; \ { __VA_ARGS__ } \ } else if ( \ XPU == kDGLCUDA && (val).bits == 16 && (val).code == kDGLBfloat) { \ typedef __nv_bfloat16 FloatType; \ { __VA_ARGS__ } \ } else if ( \ XPU == kDGLCPU && (val).bits == 16 && (val).code == kDGLFloat) { \ LOG(FATAL) << (val_name) << " can't be float16 on CPU"; \ } else if ( \ XPU == kDGLCPU && (val).bits == 16 && (val).code == kDGLBfloat) { \ typedef BFloat16 FloatType; \ { __VA_ARGS__ } \ } else { \ LOG(FATAL) << (val_name) \ << " can only be float16/bfloat16/float32/float64 on GPU"; \ } \ } while (0) #else // BF16_ENABLED #define ATEN_FLOAT_TYPE_SWITCH_16BITS(val, FloatType, XPU, val_name, ...) \ do { \ CHECK((val).code == kDGLFloat || (val.code == kDGLBfloat)) \ << (val_name) << " must be float type"; \ if ((val).bits == 32) { \ typedef float FloatType; \ { __VA_ARGS__ } \ } else if ((val).bits == 64) { \ typedef double FloatType; \ { __VA_ARGS__ } \ } else if ( \ XPU == kDGLCUDA && (val).bits == 16 && (val).code == kDGLFloat) { \ typedef __half FloatType; \ { __VA_ARGS__ } \ } else if ( \ XPU == kDGLCUDA && (val).bits == 16 && (val).code == kDGLBfloat) { \ LOG(FATAL) << "bfloat16 requires CUDA >= 11.0"; \ } else if ( \ XPU == kDGLCPU && (val).bits == 16 && (val).code == kDGLFloat) { \ LOG(FATAL) << (val_name) << " can't be float16 on CPU"; \ } else if ( \ XPU == kDGLCPU && (val).bits == 16 && (val).code == kDGLBfloat) { \ typedef BFloat16 FloatType; \ { __VA_ARGS__ } \ } else { \ LOG(FATAL) << (val_name) \ << " can only be float16/float32/float64 on GPU"; \ } \ } while (0) #endif // BF16_ENABLED #else // DGL_USE_CUDA #define ATEN_FLOAT_TYPE_SWITCH_16BITS(val, FloatType, XPU, val_name, ...) \ do { \ CHECK((val).code == kDGLFloat || (val.code == kDGLBfloat)) \ << (val_name) << " must be float type"; \ if ((val).bits == 32) { \ typedef float FloatType; \ { __VA_ARGS__ } \ } else if ((val).bits == 64) { \ typedef double FloatType; \ { __VA_ARGS__ } \ } else if ( \ XPU == kDGLCPU && (val).bits == 16 && (val).code == kDGLBfloat) { \ typedef BFloat16 FloatType; \ { __VA_ARGS__ } \ } else { \ LOG(FATAL) << (val_name) \ << " can only be bfloat16/float32/float64 on CPU"; \ } \ } while (0) #endif // DGL_USE_CUDA /** * Dispatch according to data type (int32, int64, float32 or float64): * * ATEN_DTYPE_SWITCH(array->dtype, DType, { * // Now DType is the type corresponding to data type in array. * // For instance, one can do this for a CPU array: * DType *data = static_cast(array->data); * }); */ #define ATEN_DTYPE_SWITCH(val, DType, val_name, ...) \ do { \ if ((val).code == kDGLInt && (val).bits == 32) { \ typedef int32_t DType; \ { __VA_ARGS__ } \ } else if ((val).code == kDGLInt && (val).bits == 64) { \ typedef int64_t DType; \ { __VA_ARGS__ } \ } else if ((val).code == kDGLFloat && (val).bits == 32) { \ typedef float DType; \ { __VA_ARGS__ } \ } else if ((val).code == kDGLFloat && (val).bits == 64) { \ typedef double DType; \ { __VA_ARGS__ } \ } else { \ LOG(FATAL) << (val_name) \ << " can only be int32, int64, float32 or float64"; \ } \ } while (0) /** * Dispatch according to data type (int8, uint8, float32 or float64): * * ATEN_FLOAT_INT8_UINT8_TYPE_SWITCH(array->dtype, DType, { * // Now DType is the type corresponding to data type in array. * // For instance, one can do this for a CPU array: * DType *data = static_cast(array->data); * }); */ #define ATEN_FLOAT_INT8_UINT8_TYPE_SWITCH(val, DType, val_name, ...) \ do { \ if ((val).code == kDGLInt && (val).bits == 8) { \ typedef int8_t DType; \ { __VA_ARGS__ } \ } else if ((val).code == kDGLUInt && (val).bits == 8) { \ typedef uint8_t DType; \ { __VA_ARGS__ } \ } else if ((val).code == kDGLFloat && (val).bits == 32) { \ typedef float DType; \ { __VA_ARGS__ } \ } else if ((val).code == kDGLFloat && (val).bits == 64) { \ typedef double DType; \ { __VA_ARGS__ } \ } else { \ LOG(FATAL) << (val_name) \ << " can only be int8, uint8, float32 or float64"; \ } \ } while (0) /** * Dispatch data type only based on bit-width (8-bit, 16-bit, 32-bit, 64-bit): * * ATEN_DTYPE_BITS_ONLY_SWITCH(array->dtype, DType, { * // Now DType is the type which has the same bit-width with the * // data type in array. * // Do not use for computation, but only for read and write. * // For instance, one can do this for a CPU array: * DType *data = static_cast(array->data); * }); */ #define ATEN_DTYPE_BITS_ONLY_SWITCH(val, DType, val_name, ...) \ do { \ if ((val).bits == 8) { \ typedef int8_t DType; \ { __VA_ARGS__ } \ } else if ((val).bits == 16) { \ typedef int16_t DType; \ { __VA_ARGS__ } \ } else if ((val).bits == 32) { \ typedef int32_t DType; \ { __VA_ARGS__ } \ } else if ((val).bits == 64) { \ typedef int64_t DType; \ { __VA_ARGS__ } \ } else { \ LOG(FATAL) << (val_name) \ << " can only be 8-bit, 16-bit, 32-bit, or 64-bit"; \ } \ } while (0) /** * Dispatch according to integral type of CSR graphs. * Identical to ATEN_ID_TYPE_SWITCH except for a different error message. */ #define ATEN_CSR_DTYPE_SWITCH(val, DType, ...) \ do { \ if ((val).code == kDGLInt && (val).bits == 32) { \ typedef int32_t DType; \ { __VA_ARGS__ } \ } else if ((val).code == kDGLInt && (val).bits == 64) { \ typedef int64_t DType; \ { __VA_ARGS__ } \ } else { \ LOG(FATAL) << "CSR matrix data can only be int32 or int64"; \ } \ } while (0) // Macro to dispatch according to device context and index type. #define ATEN_CSR_SWITCH(csr, XPU, IdType, op, ...) \ ATEN_XPU_SWITCH((csr).indptr->ctx.device_type, XPU, op, { \ ATEN_ID_TYPE_SWITCH((csr).indptr->dtype, IdType, {{__VA_ARGS__}}); \ }); // Macro to dispatch according to device context and index type. #define ATEN_COO_SWITCH(coo, XPU, IdType, op, ...) \ ATEN_XPU_SWITCH((coo).row->ctx.device_type, XPU, op, { \ ATEN_ID_TYPE_SWITCH((coo).row->dtype, IdType, {{__VA_ARGS__}}); \ }); #define CHECK_VALID_CONTEXT(VAR1, VAR2) \ CHECK( \ ((VAR1)->ctx == (VAR2)->ctx) || (VAR1).IsPinned() || \ ((VAR1).NumElements() == 0)) /* Let empty arrays pass */ \ << "Expected " << (#VAR2) << "(" << (VAR2)->ctx << ")" \ << " to have the same device " \ << "context as " << (#VAR1) << "(" << (VAR1)->ctx << "). " \ << "Or " << (#VAR1) << "(" << (VAR1)->ctx << ")" \ << " is pinned"; /** * Macro to dispatch according to the context of array and dtype of csr * to enable CUDA UVA ops. * Context check is covered here to avoid confusion with CHECK_SAME_CONTEXT. * If csr has the same context with array, same behivor as ATEN_CSR_SWITCH_CUDA. * If csr is pinned, array's context will conduct the actual operation. */ #define ATEN_CSR_SWITCH_CUDA_UVA(csr, array, XPU, IdType, op, ...) \ do { \ CHECK_VALID_CONTEXT(csr.indices, array); \ ATEN_XPU_SWITCH_CUDA(array->ctx.device_type, XPU, op, { \ ATEN_ID_TYPE_SWITCH((csr).indptr->dtype, IdType, {{__VA_ARGS__}}); \ }); \ } while (0) // Macro to dispatch according to device context (allowing cuda) #ifdef DGL_USE_CUDA #define ATEN_CSR_SWITCH_CUDA(csr, XPU, IdType, op, ...) \ ATEN_XPU_SWITCH_CUDA((csr).indptr->ctx.device_type, XPU, op, { \ ATEN_ID_TYPE_SWITCH((csr).indptr->dtype, IdType, {{__VA_ARGS__}}); \ }); // Macro to dispatch according to device context and index type. #define ATEN_COO_SWITCH_CUDA(coo, XPU, IdType, op, ...) \ ATEN_XPU_SWITCH_CUDA((coo).row->ctx.device_type, XPU, op, { \ ATEN_ID_TYPE_SWITCH((coo).row->dtype, IdType, {{__VA_ARGS__}}); \ }); #else // DGL_USE_CUDA #define ATEN_CSR_SWITCH_CUDA ATEN_CSR_SWITCH #define ATEN_COO_SWITCH_CUDA ATEN_COO_SWITCH #endif // DGL_USE_CUDA ///////////////////////// Array checks ////////////////////////// #define IS_INT32(a) ((a)->dtype.code == kDGLInt && (a)->dtype.bits == 32) #define IS_INT64(a) ((a)->dtype.code == kDGLInt && (a)->dtype.bits == 64) #define IS_FLOAT32(a) ((a)->dtype.code == kDGLFloat && (a)->dtype.bits == 32) #define IS_FLOAT64(a) ((a)->dtype.code == kDGLFloat && (a)->dtype.bits == 64) #define CHECK_IF(cond, prop, value_name, dtype_name) \ CHECK(cond) << "Expecting " << (prop) << " of " << (value_name) << " to be " \ << (dtype_name) #define CHECK_INT32(value, value_name) \ CHECK_IF(IS_INT32(value), "dtype", value_name, "int32") #define CHECK_INT64(value, value_name) \ CHECK_IF(IS_INT64(value), "dtype", value_name, "int64") #define CHECK_INT(value, value_name) \ CHECK_IF( \ IS_INT32(value) || IS_INT64(value), "dtype", value_name, \ "int32 or int64") #define CHECK_FLOAT32(value, value_name) \ CHECK_IF(IS_FLOAT32(value), "dtype", value_name, "float32") #define CHECK_FLOAT64(value, value_name) \ CHECK_IF(IS_FLOAT64(value), "dtype", value_name, "float64") #define CHECK_FLOAT(value, value_name) \ CHECK_IF( \ IS_FLOAT32(value) || IS_FLOAT64(value), "dtype", value_name, \ "float32 or float64") #define CHECK_NDIM(value, _ndim, value_name) \ CHECK_IF((value)->ndim == (_ndim), "ndim", value_name, _ndim) #define CHECK_SAME_DTYPE(VAR1, VAR2) \ CHECK((VAR1)->dtype == (VAR2)->dtype) \ << "Expected " << (#VAR2) << " to be the same type as " << (#VAR1) \ << "(" << (VAR1)->dtype << ")" \ << ". But got " << (VAR2)->dtype << "."; #define CHECK_SAME_CONTEXT(VAR1, VAR2) \ CHECK((VAR1)->ctx == (VAR2)->ctx) \ << "Expected " << (#VAR2) << " to have the same device context as " \ << (#VAR1) << "(" << (VAR1)->ctx << ")" \ << ". But got " << (VAR2)->ctx << "."; #define CHECK_NO_OVERFLOW(dtype, val) \ do { \ if (sizeof(val) == 8 && (dtype).bits == 32) \ CHECK_LE((val), 0x7FFFFFFFL) \ << "int32 overflow for argument " << (#val) << "."; \ } while (0); #define CHECK_IS_ID_ARRAY(VAR) \ CHECK((VAR)->ndim == 1 && (IS_INT32(VAR) || IS_INT64(VAR))) \ << "Expected argument " << (#VAR) << " to be an 1D integer array."; #endif // DGL_ATEN_MACRO_H_ ================================================ FILE: include/dgl/aten/spmat.h ================================================ /** * Copyright (c) 2020 by Contributors * @file dgl/aten/spmat.h * @brief Sparse matrix definitions */ #ifndef DGL_ATEN_SPMAT_H_ #define DGL_ATEN_SPMAT_H_ #include #include #include "../runtime/object.h" #include "./types.h" namespace dgl { /** * @brief Sparse format. */ enum class SparseFormat { kCOO = 1, kCSR = 2, kCSC = 3, }; /** * @brief Sparse format codes */ const dgl_format_code_t ALL_CODE = 0x7; const dgl_format_code_t ANY_CODE = 0x0; const dgl_format_code_t COO_CODE = 0x1; const dgl_format_code_t CSR_CODE = 0x2; const dgl_format_code_t CSC_CODE = 0x4; // Parse sparse format from string. inline SparseFormat ParseSparseFormat(const std::string& name) { if (name == "coo") return SparseFormat::kCOO; else if (name == "csr") return SparseFormat::kCSR; else if (name == "csc") return SparseFormat::kCSC; else LOG(FATAL) << "Sparse format not recognized"; return SparseFormat::kCOO; } // Create string from sparse format. inline std::string ToStringSparseFormat(SparseFormat sparse_format) { if (sparse_format == SparseFormat::kCOO) return std::string("coo"); else if (sparse_format == SparseFormat::kCSR) return std::string("csr"); else return std::string("csc"); } inline std::vector CodeToSparseFormats(dgl_format_code_t code) { std::vector ret; if (code & COO_CODE) ret.push_back(SparseFormat::kCOO); if (code & CSR_CODE) ret.push_back(SparseFormat::kCSR); if (code & CSC_CODE) ret.push_back(SparseFormat::kCSC); return ret; } inline dgl_format_code_t SparseFormatsToCode( const std::vector& formats) { dgl_format_code_t ret = 0; for (auto format : formats) { switch (format) { case SparseFormat::kCOO: ret |= COO_CODE; break; case SparseFormat::kCSR: ret |= CSR_CODE; break; case SparseFormat::kCSC: ret |= CSC_CODE; break; default: LOG(FATAL) << "Only support COO/CSR/CSC formats."; } } return ret; } inline std::string CodeToStr(dgl_format_code_t code) { std::string ret = ""; if (code & COO_CODE) ret += "coo "; if (code & CSR_CODE) ret += "csr "; if (code & CSC_CODE) ret += "csc "; return ret; } inline SparseFormat DecodeFormat(dgl_format_code_t code) { if (code & COO_CODE) return SparseFormat::kCOO; if (code & CSC_CODE) return SparseFormat::kCSC; return SparseFormat::kCSR; } // Sparse matrix object that is exposed to python API. struct SparseMatrix : public runtime::Object { // Sparse format. int32_t format = 0; // Shape of this matrix. int64_t num_rows = 0, num_cols = 0; // Index arrays. For CSR, it is {indptr, indices, data}. For COO, it is {row, // col, data}. std::vector indices; // Boolean flags. // TODO(minjie): We might revisit this later to provide a more general // solution. Currently, we only consider aten::COOMatrix and aten::CSRMatrix. std::vector flags; SparseMatrix() {} SparseMatrix( int32_t fmt, int64_t nrows, int64_t ncols, const std::vector& idx, const std::vector& flg) : format(fmt), num_rows(nrows), num_cols(ncols), indices(idx), flags(flg) {} static constexpr const char* _type_key = "aten.SparseMatrix"; DGL_DECLARE_OBJECT_TYPE_INFO(SparseMatrix, runtime::Object); }; // Define SparseMatrixRef DGL_DEFINE_OBJECT_REF(SparseMatrixRef, SparseMatrix); } // namespace dgl #endif // DGL_ATEN_SPMAT_H_ ================================================ FILE: include/dgl/aten/types.h ================================================ /** * Copyright (c) 2020 by Contributors * @file dgl/aten/types.h * @brief Array and ID types */ #ifndef DGL_ATEN_TYPES_H_ #define DGL_ATEN_TYPES_H_ #include #include "../runtime/ndarray.h" namespace dgl { typedef uint64_t dgl_id_t; typedef uint64_t dgl_type_t; /** @brief Type for dgl fomrat code, whose binary representation indices * which sparse format is in use and which is not. * * Suppose the binary representation is xyz, then * - x indicates whether csc is in use (1 for true and 0 for false). * - y indicates whether csr is in use. * - z indicates whether coo is in use. */ typedef uint8_t dgl_format_code_t; using dgl::runtime::NDArray; typedef NDArray IdArray; typedef NDArray DegreeArray; typedef NDArray BoolArray; typedef NDArray IntArray; typedef NDArray FloatArray; typedef NDArray TypeArray; namespace aten { static const DGLContext CPU{kDGLCPU, 0}; } // namespace aten } // namespace dgl #endif // DGL_ATEN_TYPES_H_ ================================================ FILE: include/dgl/base_heterograph.h ================================================ /** * Copyright (c) 2019 by Contributors * @file dgl/heterograph_interface.h * @brief DGL heterogeneous graph index class. */ #ifndef DGL_BASE_HETEROGRAPH_H_ #define DGL_BASE_HETEROGRAPH_H_ #include #include #include #include #include #include "./runtime/object.h" #include "array.h" #include "aten/spmat.h" #include "aten/types.h" #include "graph_interface.h" namespace dgl { // Forward declaration class BaseHeteroGraph; class HeteroPickleStates; typedef std::shared_ptr HeteroGraphPtr; struct FlattenedHeteroGraph; typedef std::shared_ptr FlattenedHeteroGraphPtr; struct HeteroSubgraph; /** @brief Enum class for edge direction */ enum class EdgeDir { kIn, // in edge direction kOut // out edge direction }; /** * @brief Base heterogenous graph. * * In heterograph, nodes represent entities and edges represent relations. * Nodes and edges are associated with types. The same pair of entity types * can have multiple relation types between them, but relation type **uniquely** * identifies the source and destination entity types. * * In a high-level, a heterograph is a data structure composed of: * - A meta-graph that stores the entity-entity relation graph. * - A dictionary of relation type to the bipartite graph representing the * actual connections among entity nodes. */ class BaseHeteroGraph : public runtime::Object { public: explicit BaseHeteroGraph(GraphPtr meta_graph) : meta_graph_(meta_graph) {} virtual ~BaseHeteroGraph() = default; ////////////////////// query/operations on meta graph /////////////////////// /** @return the number of vertex types */ virtual uint64_t NumVertexTypes() const { return meta_graph_->NumVertices(); } /** @return the number of edge types */ virtual uint64_t NumEdgeTypes() const { return meta_graph_->NumEdges(); } /** @return given the edge type, find the source type */ virtual std::pair GetEndpointTypes( dgl_type_t etype) const { return meta_graph_->FindEdge(etype); } /** @return the meta graph */ virtual GraphPtr meta_graph() const { return meta_graph_; } /** * @brief Return the bipartite graph of the given edge type. * @param etype The edge type. * @return The bipartite graph. */ virtual HeteroGraphPtr GetRelationGraph(dgl_type_t etype) const = 0; ///////////////////// query/operations on realized graph ///////////////////// /** @brief Add vertices to the given vertex type */ virtual void AddVertices(dgl_type_t vtype, uint64_t num_vertices) = 0; /** @brief Add one edge to the given edge type */ virtual void AddEdge(dgl_type_t etype, dgl_id_t src, dgl_id_t dst) = 0; /** @brief Add edges to the given edge type */ virtual void AddEdges(dgl_type_t etype, IdArray src_ids, IdArray dst_ids) = 0; /** * @brief Clear the graph. Remove all vertices/edges. */ virtual void Clear() = 0; /** * @brief Get the data type of node and edge IDs of this graph. */ virtual DGLDataType DataType() const = 0; /** * @brief Get the device context of this graph. */ virtual DGLContext Context() const = 0; /** * @brief Pin graph. */ virtual void PinMemory_() = 0; /** * @brief Check if this graph is pinned. */ virtual bool IsPinned() const = 0; /** * @brief Record stream for this graph. * @param stream The stream that is using the graph */ virtual void RecordStream(DGLStreamHandle stream) = 0; /** * @brief Get the number of integer bits used to store node/edge ids (32 or * 64). */ // TODO(BarclayII) replace NumBits() calls to DataType() calls virtual uint8_t NumBits() const = 0; /** * @return whether the graph is a multigraph */ virtual bool IsMultigraph() const = 0; /** @return whether the graph is read-only */ virtual bool IsReadonly() const = 0; /** @return the number of vertices in the graph.*/ virtual uint64_t NumVertices(dgl_type_t vtype) const = 0; /** @return the number of vertices for each type in the graph as a vector */ inline virtual std::vector NumVerticesPerType() const { LOG(FATAL) << "[BUG] NumVerticesPerType() not supported on this object."; return {}; } /** @return the number of edges in the graph.*/ virtual uint64_t NumEdges(dgl_type_t etype) const = 0; /** @return true if the given vertex is in the graph.*/ virtual bool HasVertex(dgl_type_t vtype, dgl_id_t vid) const = 0; /** @return a 0-1 array indicating whether the given vertices are in the * graph. */ virtual BoolArray HasVertices(dgl_type_t vtype, IdArray vids) const = 0; /** @return true if the given edge is in the graph.*/ virtual bool HasEdgeBetween( dgl_type_t etype, dgl_id_t src, dgl_id_t dst) const = 0; /** @return a 0-1 array indicating whether the given edges are in the graph.*/ virtual BoolArray HasEdgesBetween( dgl_type_t etype, IdArray src_ids, IdArray dst_ids) const = 0; /** * @brief Find the predecessors of a vertex. * @note The given vertex should belong to the source vertex type * of the given edge type. * @param etype The edge type * @param vid The vertex id. * @return the predecessor id array. */ virtual IdArray Predecessors(dgl_type_t etype, dgl_id_t dst) const = 0; /** * @brief Find the successors of a vertex. * @note The given vertex should belong to the dest vertex type * of the given edge type. * @param etype The edge type * @param vid The vertex id. * @return the successor id array. */ virtual IdArray Successors(dgl_type_t etype, dgl_id_t src) const = 0; /** * @brief Get all edge ids between the two given endpoints * @note The given src and dst vertices should belong to the source vertex * type and the dest vertex type of the given edge type, respectively. * @param etype The edge type * @param src The source vertex. * @param dst The destination vertex. * @return the edge id array. */ virtual IdArray EdgeId( dgl_type_t etype, dgl_id_t src, dgl_id_t dst) const = 0; /** * @brief Get all edge ids between the given endpoint pairs. * * @param etype The edge type * @param src The src vertex ids. * @param dst The dst vertex ids. * @return EdgeArray containing all edges between all pairs. */ virtual EdgeArray EdgeIdsAll( dgl_type_t etype, IdArray src, IdArray dst) const = 0; /** * @brief Get edge ids between the given endpoint pairs. * * Only find one matched edge Ids even if there are multiple matches due to * parallel edges. The i^th Id in the returned array is for edge (src[i], * dst[i]). * * @param etype The edge type * @param src The src vertex ids. * @param dst The dst vertex ids. * @return EdgeArray containing all edges between all pairs. */ virtual IdArray EdgeIdsOne( dgl_type_t etype, IdArray src, IdArray dst) const = 0; /** * @brief Find the edge ID and return the pair of endpoints * @param etype The edge type * @param eid The edge ID * @return a pair whose first element is the source and the second the * destination. */ virtual std::pair FindEdge( dgl_type_t etype, dgl_id_t eid) const = 0; /** * @brief Find the edge IDs and return their source and target node IDs. * @param etype The edge type * @param eids The edge ID array. * @return EdgeArray containing all edges with id in eid. The order is * preserved. */ virtual EdgeArray FindEdges(dgl_type_t etype, IdArray eids) const = 0; /** * @brief Get the in edges of the vertex. * @note The given vertex should belong to the dest vertex type * of the given edge type. * @param etype The edge type * @param vid The vertex id. * @return the edges */ virtual EdgeArray InEdges(dgl_type_t etype, dgl_id_t vid) const = 0; /** * @brief Get the in edges of the vertices. * @note The given vertex should belong to the dest vertex type * of the given edge type. * @param etype The edge type * @param vids The vertex id array. * @return the id arrays of the two endpoints of the edges. */ virtual EdgeArray InEdges(dgl_type_t etype, IdArray vids) const = 0; /** * @brief Get the out edges of the vertex. * @note The given vertex should belong to the source vertex type * of the given edge type. * @param etype The edge type * @param vid The vertex id. * @return the id arrays of the two endpoints of the edges. */ virtual EdgeArray OutEdges(dgl_type_t etype, dgl_id_t vid) const = 0; /** * @brief Get the out edges of the vertices. * @note The given vertex should belong to the source vertex type * of the given edge type. * @param etype The edge type * @param vids The vertex id array. * @return the id arrays of the two endpoints of the edges. */ virtual EdgeArray OutEdges(dgl_type_t etype, IdArray vids) const = 0; /** * @brief Get all the edges in the graph. * @note If order is "srcdst", the returned edges list is sorted by their src * and dst ids. If order is "eid", they are in their edge id order. Otherwise, * in the arbitrary order. * @param etype The edge type * @param order The order of the returned edge list. * @return the id arrays of the two endpoints of the edges. */ virtual EdgeArray Edges( dgl_type_t etype, const std::string& order = "") const = 0; /** * @brief Get the in degree of the given vertex. * @note The given vertex should belong to the dest vertex type of the given * edge type. * @param etype The edge type * @param vid The vertex id. * @return the in degree */ virtual uint64_t InDegree(dgl_type_t etype, dgl_id_t vid) const = 0; /** * @brief Get the in degrees of the given vertices. * @note The given vertex should belong to the dest vertex type of the given * edge type. * @param etype The edge type * @param vid The vertex id array. * @return the in degree array */ virtual DegreeArray InDegrees(dgl_type_t etype, IdArray vids) const = 0; /** * @brief Get the out degree of the given vertex. * @note The given vertex should belong to the source vertex type of the given * edge type. * @param etype The edge type * @param vid The vertex id. * @return the out degree */ virtual uint64_t OutDegree(dgl_type_t etype, dgl_id_t vid) const = 0; /** * @brief Get the out degrees of the given vertices. * @note The given vertex should belong to the source vertex type of the given * edge type. * @param etype The edge type * @param vid The vertex id array. * @return the out degree array */ virtual DegreeArray OutDegrees(dgl_type_t etype, IdArray vids) const = 0; /** * @brief Return the successor vector * @note The given vertex should belong to the source vertex type of the given * edge type. * @param vid The vertex id. * @return the successor vector iterator pair. */ virtual DGLIdIters SuccVec(dgl_type_t etype, dgl_id_t vid) const = 0; /** * @brief Return the out edge id vector * @note The given vertex should belong to the source vertex type of the given * edge type. * @param vid The vertex id. * @return the out edge id vector iterator pair. */ virtual DGLIdIters OutEdgeVec(dgl_type_t etype, dgl_id_t vid) const = 0; /** * @brief Return the predecessor vector * @note The given vertex should belong to the dest vertex type of the given * edge type. * @param vid The vertex id. * @return the predecessor vector iterator pair. */ virtual DGLIdIters PredVec(dgl_type_t etype, dgl_id_t vid) const = 0; /** * @brief Return the in edge id vector * @note The given vertex should belong to the dest vertex type of the given * edge type. * @param vid The vertex id. * @return the in edge id vector iterator pair. */ virtual DGLIdIters InEdgeVec(dgl_type_t etype, dgl_id_t vid) const = 0; /** * @brief Get the adjacency matrix of the graph. * * TODO(minjie): deprecate this interface; replace it with GetXXXMatrix. * * By default, a row of returned adjacency matrix represents the destination * of an edge and the column represents the source. * * If the fmt is 'csr', the function should return three arrays, representing * indptr, indices and edge ids * * If the fmt is 'coo', the function should return one array of shape (2, * nnz), representing a horitonzal stack of row and col indices. * * @param transpose A flag to transpose the returned adjacency matrix. * @param fmt the format of the returned adjacency matrix. * @return a vector of IdArrays. */ virtual std::vector GetAdj( dgl_type_t etype, bool transpose, const std::string& fmt) const = 0; /** * @brief Determine which format to use with a preference. * * Otherwise, it will return whatever DGL thinks is the most appropriate given * the arguments. * * @param etype Edge type. * @param preferred_formats Preferred sparse formats. * @return Available sparse format. */ virtual SparseFormat SelectFormat( dgl_type_t etype, dgl_format_code_t preferred_formats) const = 0; /** * @brief Return sparse formats already created for the graph. * * @return a number of type dgl_format_code_t. */ virtual dgl_format_code_t GetCreatedFormats() const = 0; /** * @brief Return allowed sparse formats for the graph. * * @return a number of type dgl_format_code_t. */ virtual dgl_format_code_t GetAllowedFormats() const = 0; /** * @brief Return the graph in specified available formats. * * @return The new graph. */ virtual HeteroGraphPtr GetGraphInFormat(dgl_format_code_t formats) const = 0; /** * @brief Get adjacency matrix in COO format. * @param etype Edge type. * @return COO matrix. */ virtual aten::COOMatrix GetCOOMatrix(dgl_type_t etype) const = 0; /** * @brief Get adjacency matrix in CSR format. * * The row and column sizes are equal to the number of dsttype and srctype * nodes, respectively. * * @param etype Edge type. * @return CSR matrix. */ virtual aten::CSRMatrix GetCSRMatrix(dgl_type_t etype) const = 0; /** * @brief Get adjacency matrix in CSC format. * * A CSC matrix is equivalent to the transpose of a CSR matrix. * We reuse the CSRMatrix data structure as return value. The row and column * sizes are equal to the number of dsttype and srctype nodes, respectively. * * @param etype Edge type. * @return A CSR matrix. */ virtual aten::CSRMatrix GetCSCMatrix(dgl_type_t etype) const = 0; /** * @brief Extract the induced subgraph by the given vertices. * * The length of the given vector should be equal to the number of vertex * types. Empty arrays can be provided if no vertex is needed for the type. * The result subgraph has the same meta graph with the parent, but some types * can have no node/edge. * * @param vids the induced vertices per type. * @return the subgraph. */ virtual HeteroSubgraph VertexSubgraph( const std::vector& vids) const = 0; /** * @brief Extract the induced subgraph by the given edges. * * The length of the given vector should be equal to the number of edge types. * Empty arrays can be provided if no edge is needed for the type. The result * subgraph has the same meta graph with the parent, but some types can have * no node/edge. * * @param eids The edges in the subgraph. * @param preserve_nodes If true, the vertices will not be relabeled, so some * vertices may have no incident edges. * @return the subgraph. */ virtual HeteroSubgraph EdgeSubgraph( const std::vector& eids, bool preserve_nodes = false) const = 0; /** * @brief Convert the list of requested unitgraph graphs into a single * unitgraph graph. * * @param etypes The list of edge type IDs. * @return The flattened graph, with induced source/edge/destination * types/IDs. */ virtual FlattenedHeteroGraphPtr Flatten( const std::vector& etypes) const { LOG(FATAL) << "Flatten operation unsupported"; return nullptr; } /** @brief Cast this graph to immutable graph */ virtual GraphPtr AsImmutableGraph() const { LOG(FATAL) << "AsImmutableGraph not supported."; return nullptr; } static constexpr const char* _type_key = "graph.HeteroGraph"; DGL_DECLARE_OBJECT_TYPE_INFO(BaseHeteroGraph, runtime::Object); protected: /** @brief meta graph */ GraphPtr meta_graph_; // empty constructor BaseHeteroGraph() {} }; // Define HeteroGraphRef DGL_DEFINE_OBJECT_REF(HeteroGraphRef, BaseHeteroGraph); /** * @brief Hetero-subgraph data structure. * * This class can be used as arguments and return values of a C API. * * * DGL_REGISTER_GLOBAL("some_c_api") * .set_body([] (DGLArgs args, DGLRetValue* rv) { * HeteroSubgraphRef subg = args[0]; * std::shared_ptr ret = do_something( ... ); * *rv = HeteroSubgraphRef(ret); * }); * */ struct HeteroSubgraph : public runtime::Object { /** @brief The heterograph. */ HeteroGraphPtr graph; /** * @brief The induced vertex ids of each entity type. * The vector length is equal to the number of vertex types in the parent * graph. Each array i has the same length as the number of vertices in type * i. Empty array is allowed if the mapping is identity. */ std::vector induced_vertices; /** * @brief The induced edge ids of each relation type. * The vector length is equal to the number of edge types in the parent graph. * Each array i has the same length as the number of edges in type i. * Empty array is allowed if the mapping is identity. */ std::vector induced_edges; static constexpr const char* _type_key = "graph.HeteroSubgraph"; DGL_DECLARE_OBJECT_TYPE_INFO(HeteroSubgraph, runtime::Object); }; // Define HeteroSubgraphRef DGL_DEFINE_OBJECT_REF(HeteroSubgraphRef, HeteroSubgraph); /** @brief The flattened heterograph */ struct FlattenedHeteroGraph : public runtime::Object { /** @brief The graph */ HeteroGraphRef graph; /** * @brief Mapping from source node ID to node type in parent graph * @note The induced type array guarantees that the same type always appear * contiguously. */ IdArray induced_srctype; /** * @brief The set of node types in parent graph appearing in source nodes. */ IdArray induced_srctype_set; /** @brief Mapping from source node ID to local node ID in parent graph */ IdArray induced_srcid; /** * @brief Mapping from edge ID to edge type in parent graph * @note The induced type array guarantees that the same type always appear * contiguously. */ IdArray induced_etype; /** * @brief The set of edge types in parent graph appearing in edges. */ IdArray induced_etype_set; /** @brief Mapping from edge ID to local edge ID in parent graph */ IdArray induced_eid; /** * @brief Mapping from destination node ID to node type in parent graph * @note The induced type array guarantees that the same type always appear * contiguously. */ IdArray induced_dsttype; /** * @brief The set of node types in parent graph appearing in destination * nodes. */ IdArray induced_dsttype_set; /** @brief Mapping from destination node ID to local node ID in parent graph */ IdArray induced_dstid; void VisitAttrs(runtime::AttrVisitor* v) final { v->Visit("graph", &graph); v->Visit("induced_srctype", &induced_srctype); v->Visit("induced_srctype_set", &induced_srctype_set); v->Visit("induced_srcid", &induced_srcid); v->Visit("induced_etype", &induced_etype); v->Visit("induced_etype_set", &induced_etype_set); v->Visit("induced_eid", &induced_eid); v->Visit("induced_dsttype", &induced_dsttype); v->Visit("induced_dsttype_set", &induced_dsttype_set); v->Visit("induced_dstid", &induced_dstid); } static constexpr const char* _type_key = "graph.FlattenedHeteroGraph"; DGL_DECLARE_OBJECT_TYPE_INFO(FlattenedHeteroGraph, runtime::Object); }; DGL_DEFINE_OBJECT_REF(FlattenedHeteroGraphRef, FlattenedHeteroGraph); // Declarations of functions and algorithms /** * @brief Create a heterograph from meta graph and a list of bipartite graph, * additionally specifying number of nodes per type. */ HeteroGraphPtr CreateHeteroGraph( GraphPtr meta_graph, const std::vector& rel_graphs, const std::vector& num_nodes_per_type = {}); /** * @brief Create a heterograph from COO input. * @param num_vtypes Number of vertex types. Must be 1 or 2. * @param num_src Number of nodes in the source type. * @param num_dst Number of nodes in the destination type. * @param row Src node ids of the edges. * @param col Dst node ids of the edges. * @param row_sorted Whether the `row` array is in sorted ascending order. * @param col_sorted When `row_sorted` is true, whether the columns within each * row are also sorted. When `row_sorted` is false, this flag must also be * false. * @param formats Sparse formats used for storing this graph. * @return A heterograph pointer. */ HeteroGraphPtr CreateFromCOO( int64_t num_vtypes, int64_t num_src, int64_t num_dst, IdArray row, IdArray col, bool row_sorted = false, bool col_sorted = false, dgl_format_code_t formats = ALL_CODE); /** * @brief Create a heterograph from COO input. * @param num_vtypes Number of vertex types. Must be 1 or 2. * @param mat The COO matrix * @param formats Sparse formats used for storing this graph. * @return A heterograph pointer. */ HeteroGraphPtr CreateFromCOO( int64_t num_vtypes, const aten::COOMatrix& mat, dgl_format_code_t formats = ALL_CODE); /** * @brief Create a heterograph from CSR input. * @param num_vtypes Number of vertex types. Must be 1 or 2. * @param num_src Number of nodes in the source type. * @param num_dst Number of nodes in the destination type. * @param indptr Indptr array * @param indices Indices array * @param edge_ids Edge ids * @param formats Sparse formats for storing this graph. * @return A heterograph pointer. */ HeteroGraphPtr CreateFromCSR( int64_t num_vtypes, int64_t num_src, int64_t num_dst, IdArray indptr, IdArray indices, IdArray edge_ids, dgl_format_code_t formats = ALL_CODE); /** * @brief Create a heterograph from CSR input. * @param num_vtypes Number of vertex types. Must be 1 or 2. * @param mat The CSR matrix * @param formats Sparse formats for storing this graph. * @return A heterograph pointer. */ HeteroGraphPtr CreateFromCSR( int64_t num_vtypes, const aten::CSRMatrix& mat, dgl_format_code_t formats = ALL_CODE); /** * @brief Create a heterograph from CSC input. * @param num_vtypes Number of vertex types. Must be 1 or 2. * @param num_src Number of nodes in the source type. * @param num_dst Number of nodes in the destination type. * @param indptr Indptr array * @param indices Indices array * @param edge_ids Edge ids * @param formats Sparse formats used for storing this graph. * @return A heterograph pointer. */ HeteroGraphPtr CreateFromCSC( int64_t num_vtypes, int64_t num_src, int64_t num_dst, IdArray indptr, IdArray indices, IdArray edge_ids, dgl_format_code_t formats = ALL_CODE); /** * @brief Create a heterograph from CSC input. * @param num_vtypes Number of vertex types. Must be 1 or 2. * @param mat The CSC matrix * @param formats Sparse formats available for storing this graph. * @return A heterograph pointer. */ HeteroGraphPtr CreateFromCSC( int64_t num_vtypes, const aten::CSRMatrix& mat, dgl_format_code_t formats = ALL_CODE); /** * @brief Extract the subgraph of the in edges of the given nodes. * @param graph Graph * @param nodes Node IDs of each type * @param relabel_nodes Whether to remove isolated nodes and relabel the rest * ones * @return Subgraph containing only the in edges. The returned graph has * the same schema as the original one. */ HeteroSubgraph InEdgeGraph( const HeteroGraphPtr graph, const std::vector& nodes, bool relabel_nodes = false); /** * @brief Extract the subgraph of the out edges of the given nodes. * @param graph Graph * @param nodes Node IDs of each type * @param relabel_nodes Whether to remove isolated nodes and relabel the rest * ones * @return Subgraph containing only the out edges. The returned graph has * the same schema as the original one. */ HeteroSubgraph OutEdgeGraph( const HeteroGraphPtr graph, const std::vector& nodes, bool relabel_nodes = false); /** * @brief Joint union multiple graphs into one graph. * * All input graphs should have the same metagraph. * * TODO(xiangsx): remove the meta_graph argument * * @param meta_graph Metagraph of the inputs and result. * @param component_graphs Input graphs * @return One graph that unions all the components */ HeteroGraphPtr JointUnionHeteroGraph( GraphPtr meta_graph, const std::vector& component_graphs); /** * @brief Union multiple graphs into one with each input graph as one disjoint * component. * * All input graphs should have the same metagraph. * * TODO(minjie): remove the meta_graph argument * * @tparam IdType Graph's index data type, can be int32_t or int64_t * @param meta_graph Metagraph of the inputs and result. * @param component_graphs Input graphs * @return One graph that unions all the components */ template HeteroGraphPtr DisjointUnionHeteroGraph( GraphPtr meta_graph, const std::vector& component_graphs); HeteroGraphPtr DisjointUnionHeteroGraph2( GraphPtr meta_graph, const std::vector& component_graphs); /** * @brief Slice a contiguous subgraph, e.g. retrieve a component graph from a * batched graph. * * TODO(mufei): remove the meta_graph argument * * @param meta_graph Metagraph of the input and result. * @param batched_graph Input graph. * @param num_nodes_per_type Number of vertices of each type in the result. * @param start_nid_per_type Start vertex ID of each type to slice. * @param num_edges_per_type Number of edges of each type in the result. * @param start_eid_per_type Start edge ID of each type to slice. * @return Sliced graph */ HeteroGraphPtr SliceHeteroGraph( GraphPtr meta_graph, HeteroGraphPtr batched_graph, IdArray num_nodes_per_type, IdArray start_nid_per_type, IdArray num_edges_per_type, IdArray start_eid_per_type); /** * @brief Split a graph into multiple disjoin components. * * Edges across different components are ignored. All the result graphs have the * same metagraph as the input one. * * The `vertex_sizes` and `edge_sizes` arrays the concatenation of arrays of * each node/edge type. Suppose there are N vertex types, then the array length * should be B*N, where B is the number of components to split. * * TODO(minjie): remove the meta_graph argument; use vector for * vertex_sizes and edge_sizes. * * @tparam IdType Graph's index data type, can be int32_t or int64_t * @param meta_graph Metagraph. * @param batched_graph Input graph. * @param vertex_sizes Number of vertices of each component. * @param edge_sizes Number of vertices of each component. * @return A list of graphs representing each disjoint components. */ template std::vector DisjointPartitionHeteroBySizes( GraphPtr meta_graph, HeteroGraphPtr batched_graph, IdArray vertex_sizes, IdArray edge_sizes); std::vector DisjointPartitionHeteroBySizes2( GraphPtr meta_graph, HeteroGraphPtr batched_graph, IdArray vertex_sizes, IdArray edge_sizes); /** * @brief Structure for pickle/unpickle. * * The design principle is to leverage the NDArray class as much as possible so * that when they are converted to backend-specific tensors, we could leverage * the efficient pickle/unpickle solutions from the backend framework. * * NOTE(minjie): This is a temporary solution before we support shared memory * storage ourselves. * * This class can be used as arguments and return values of a C API. */ struct HeteroPickleStates : public runtime::Object { /** @brief version number */ int64_t version = 0; /** @brief Metainformation * * metagraph, number of nodes per type, format, flags */ std::string meta; /** @brief Arrays representing graph structure (coo or csr) */ std::vector arrays; /* To support backward compatibility, we have to retain fields in the old * version of HeteroPickleStates */ /** @brief Metagraph(64bits ImmutableGraph) */ GraphPtr metagraph; /** @brief Number of nodes per type */ std::vector num_nodes_per_type; /** @brief adjacency matrices of each relation graph */ std::vector > adjs; static constexpr const char* _type_key = "graph.HeteroPickleStates"; DGL_DECLARE_OBJECT_TYPE_INFO(HeteroPickleStates, runtime::Object); }; // Define HeteroPickleStatesRef DGL_DEFINE_OBJECT_REF(HeteroPickleStatesRef, HeteroPickleStates); /** * @brief Create a heterograph from pickling states. * * @param states Pickle states * @return A heterograph pointer */ HeteroGraphPtr HeteroUnpickle(const HeteroPickleStates& states); /** * @brief Get the pickling state of the relation graph structure in backend * tensors. * * @return a HeteroPickleStates object */ HeteroPickleStates HeteroPickle(HeteroGraphPtr graph); /** * @brief Old version of HeteroUnpickle, for backward compatibility * * @param states Pickle states * @return A heterograph pointer */ HeteroGraphPtr HeteroUnpickleOld(const HeteroPickleStates& states); /** * @brief Create heterograph from pickling states pickled by ForkingPickler. * * This is different from HeteroUnpickle where * (1) Backward compatibility is not required, * (2) All graph formats are pickled instead of only one. */ HeteroGraphPtr HeteroForkingUnpickle(const HeteroPickleStates& states); /** * @brief Get the pickling states of the relation graph structure in backend * tensors for ForkingPickler. * * This is different from HeteroPickle where * (1) Backward compatibility is not required, * (2) All graph formats are pickled instead of only one. */ HeteroPickleStates HeteroForkingPickle(HeteroGraphPtr graph); #define FORMAT_HAS_CSC(format) ((format)&CSC_CODE) #define FORMAT_HAS_CSR(format) ((format)&CSR_CODE) #define FORMAT_HAS_COO(format) ((format)&COO_CODE) } // namespace dgl #endif // DGL_BASE_HETEROGRAPH_H_ ================================================ FILE: include/dgl/bcast.h ================================================ /** * Copyright (c) 2020 by Contributors * @file dgl/aten/bcast.h * @brief Broadcast related function C++ header. */ #ifndef DGL_BCAST_H_ #define DGL_BCAST_H_ #include #include #include "./runtime/ndarray.h" using namespace dgl::runtime; namespace dgl { /** * @brief Broadcast offsets and auxiliary information. */ struct BcastOff { /** * @brief offset vector of lhs operand and rhs operand. * @note lhs_offset[i] indicates the start position of the scalar * in lhs operand that required to compute the i-th element * in the output, likewise for rhs_offset. * * For example, when lhs array has shape (1, 3) and rhs array * has shape (5, 1), the resulting array would have shape (5, 3), * then both lhs_offset and rhs_offset would contain 15 elements. * * lhs_offset: 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2 * rhs_offset: 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4 * * in order to compute the 7-th (row 2, column 0) element in the output, * we need the 0-th element in the lhs array and the 2-th element in the * rhs array. */ std::vector lhs_offset, rhs_offset; /** @brief Whether broadcast is required or not. */ bool use_bcast; /** * @brief Auxiliary information for kernel computation * @note lhs_len refers to the left hand side operand length. * e.g. 15 for shape (1, 3, 5) * rhs_len refers to the right hand side operand length. * e.g. 15 for shape (3, 1, 5) * out_len refers to the output length. * e.g. 45 for shape (3, 3, 5) * reduce_size refers to the reduction size (for op like dot). * e.g. 1 for add, 5 for dot and lhs_shape,rhs_shape=(3,5) */ int64_t lhs_len, rhs_len, out_len, reduce_size; }; /** * @brief: Compute broadcast and auxiliary information given operator * and operands for kernel computation. * @param op: a string indicates the operator, could be `add`, `sub`, * `mul`, `div`, `dot`, 'copy_u`, `copy_e`. * @param lhs The left hand side operand of NDArray class. * @param rhs The right hand side operand of NDArray class. * @return the broadcast information of BcastOff class. */ BcastOff CalcBcastOff(const std::string& op, NDArray lhs, NDArray rhs); } // namespace dgl #endif // DGL_BCAST_H_ ================================================ FILE: include/dgl/env_variable.h ================================================ /** * Copyright (c) 2023 by Contributors * @file dgl/env_variable.h * @brief Class about envrionment variables. */ #ifndef DGL_ENV_VARIABLE_H_ #define DGL_ENV_VARIABLE_H_ #include namespace dgl { static const char* kDGLParallelForGrainSize = std::getenv("DGL_PARALLEL_FOR_GRAIN_SIZE"); } // namespace dgl #endif // DGL_ENV_VARIABLE_H_ ================================================ FILE: include/dgl/graph.h ================================================ /** * Copyright (c) 2018 by Contributors * @file dgl/graph.h * @brief DGL graph index class. */ #ifndef DGL_GRAPH_H_ #define DGL_GRAPH_H_ #include #include #include #include #include #include #include "graph_interface.h" namespace dgl { class Graph; class GraphOp; typedef std::shared_ptr MutableGraphPtr; /** @brief Mutable graph based on adjacency list. */ class Graph : public GraphInterface { public: /** @brief default constructor */ Graph() {} /** @brief construct a graph from the coo format. */ Graph(IdArray src_ids, IdArray dst_ids, size_t num_nodes); /** @brief default copy constructor */ Graph(const Graph& other) = default; #ifndef _MSC_VER /** @brief default move constructor */ Graph(Graph&& other) = default; #else Graph(Graph&& other) { adjlist_ = other.adjlist_; reverse_adjlist_ = other.reverse_adjlist_; all_edges_src_ = other.all_edges_src_; all_edges_dst_ = other.all_edges_dst_; read_only_ = other.read_only_; num_edges_ = other.num_edges_; other.Clear(); } #endif // _MSC_VER /** @brief default assign constructor */ Graph& operator=(const Graph& other) = default; /** @brief default destructor */ ~Graph() = default; /** * @brief Add vertices to the graph. * @note Since vertices are integers enumerated from zero, only the number of * vertices to be added needs to be specified. * @param num_vertices The number of vertices to be added. */ void AddVertices(uint64_t num_vertices) override; /** * @brief Add one edge to the graph. * @param src The source vertex. * @param dst The destination vertex. */ void AddEdge(dgl_id_t src, dgl_id_t dst) override; /** * @brief Add edges to the graph. * @param src_ids The source vertex id array. * @param dst_ids The destination vertex id array. */ void AddEdges(IdArray src_ids, IdArray dst_ids) override; /** * @brief Clear the graph. Remove all vertices/edges. */ void Clear() override { adjlist_.clear(); reverse_adjlist_.clear(); all_edges_src_.clear(); all_edges_dst_.clear(); read_only_ = false; num_edges_ = 0; } DGLContext Context() const override { return DGLContext{kDGLCPU, 0}; } uint8_t NumBits() const override { return 64; } /** * @note not const since we have caches * @return whether the graph is a multigraph */ bool IsMultigraph() const override; /** * @return whether the graph is read-only */ bool IsReadonly() const override { return false; } /** @return the number of vertices in the graph.*/ uint64_t NumVertices() const override { return adjlist_.size(); } /** @return the number of edges in the graph.*/ uint64_t NumEdges() const override { return num_edges_; } /** @return a 0-1 array indicating whether the given vertices are in the * graph. */ BoolArray HasVertices(IdArray vids) const override; /** @return true if the given edge is in the graph.*/ bool HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const override; /** @return a 0-1 array indicating whether the given edges are in the graph.*/ BoolArray HasEdgesBetween(IdArray src_ids, IdArray dst_ids) const override; /** * @brief Find the predecessors of a vertex. * @param vid The vertex id. * @param radius The radius of the neighborhood. Default is immediate neighbor * (radius=1). * @return the predecessor id array. */ IdArray Predecessors(dgl_id_t vid, uint64_t radius = 1) const override; /** * @brief Find the successors of a vertex. * @param vid The vertex id. * @param radius The radius of the neighborhood. Default is immediate neighbor * (radius=1). * @return the successor id array. */ IdArray Successors(dgl_id_t vid, uint64_t radius = 1) const override; /** * @brief Get all edge ids between the two given endpoints * @note Edges are associated with an integer id start from zero. * The id is assigned when the edge is being added to the graph. * @param src The source vertex. * @param dst The destination vertex. * @return the edge id array. */ IdArray EdgeId(dgl_id_t src, dgl_id_t dst) const override; /** * @brief Get all edge ids between the given endpoint pairs. * @note Edges are associated with an integer id start from zero. * The id is assigned when the edge is being added to the graph. * If duplicate pairs exist, the returned edge IDs will also duplicate. * The order of returned edge IDs will follow the order of src-dst pairs * first, and ties are broken by the order of edge ID. * @return EdgeArray containing all edges between all pairs. */ EdgeArray EdgeIds(IdArray src, IdArray dst) const override; /** * @brief Find the edge ID and return the pair of endpoints * @param eid The edge ID * @return a pair whose first element is the source and the second the * destination. */ std::pair FindEdge(dgl_id_t eid) const override { return std::make_pair(all_edges_src_[eid], all_edges_dst_[eid]); } /** * @brief Find the edge IDs and return their source and target node IDs. * @param eids The edge ID array. * @return EdgeArray containing all edges with id in eid. The order is * preserved. */ EdgeArray FindEdges(IdArray eids) const override; /** * @brief Get the in edges of the vertex. * @note The returned dst id array is filled with vid. * @param vid The vertex id. * @return the edges */ EdgeArray InEdges(dgl_id_t vid) const override; /** * @brief Get the in edges of the vertices. * @param vids The vertex id array. * @return the id arrays of the two endpoints of the edges. */ EdgeArray InEdges(IdArray vids) const override; /** * @brief Get the out edges of the vertex. * @note The returned src id array is filled with vid. * @param vid The vertex id. * @return the id arrays of the two endpoints of the edges. */ EdgeArray OutEdges(dgl_id_t vid) const override; /** * @brief Get the out edges of the vertices. * @param vids The vertex id array. * @return the id arrays of the two endpoints of the edges. */ EdgeArray OutEdges(IdArray vids) const override; /** * @brief Get all the edges in the graph. * @note If sorted is true, the returned edges list is sorted by their src and * dst ids. Otherwise, they are in their edge id order. * @param sorted Whether the returned edge list is sorted by their src and dst * ids. * @return the id arrays of the two endpoints of the edges. */ EdgeArray Edges(const std::string& order = "") const override; /** * @brief Get the in degree of the given vertex. * @param vid The vertex id. * @return the in degree */ uint64_t InDegree(dgl_id_t vid) const override { CHECK(HasVertex(vid)) << "invalid vertex: " << vid; return reverse_adjlist_[vid].succ.size(); } /** * @brief Get the in degrees of the given vertices. * @param vid The vertex id array. * @return the in degree array */ DegreeArray InDegrees(IdArray vids) const override; /** * @brief Get the out degree of the given vertex. * @param vid The vertex id. * @return the out degree */ uint64_t OutDegree(dgl_id_t vid) const override { CHECK(HasVertex(vid)) << "invalid vertex: " << vid; return adjlist_[vid].succ.size(); } /** * @brief Get the out degrees of the given vertices. * @param vid The vertex id array. * @return the out degree array */ DegreeArray OutDegrees(IdArray vids) const override; /** * @brief Construct the induced subgraph of the given vertices. * * The induced subgraph is a subgraph formed by specifying a set of vertices * V' and then selecting all of the edges from the original graph that connect * two vertices in V'. * * Vertices and edges in the original graph will be "reindexed" to local * index. The local index of the vertices preserve the order of the given id * array, while the local index of the edges preserve the index order in the * original graph. Vertices not in the original graph are ignored. * * The result subgraph is read-only. * * @param vids The vertices in the subgraph. * @return the induced subgraph */ Subgraph VertexSubgraph(IdArray vids) const override; /** * @brief Construct the induced edge subgraph of the given edges. * * The induced edges subgraph is a subgraph formed by specifying a set of * edges E' and then selecting all of the nodes from the original graph that * are endpoints in E'. * * Vertices and edges in the original graph will be "reindexed" to local * index. The local index of the edges preserve the order of the given id * array, while the local index of the vertices preserve the index order in * the original graph. Edges not in the original graph are ignored. * * The result subgraph is read-only. * * @param eids The edges in the subgraph. * @return the induced edge subgraph */ Subgraph EdgeSubgraph( IdArray eids, bool preserve_nodes = false) const override; /** * @brief Return the successor vector * @param vid The vertex id. * @return the successor vector */ DGLIdIters SuccVec(dgl_id_t vid) const override { auto data = adjlist_[vid].succ.data(); auto size = adjlist_[vid].succ.size(); return DGLIdIters(data, data + size); } /** * @brief Return the out edge id vector * @param vid The vertex id. * @return the out edge id vector */ DGLIdIters OutEdgeVec(dgl_id_t vid) const override { auto data = adjlist_[vid].edge_id.data(); auto size = adjlist_[vid].edge_id.size(); return DGLIdIters(data, data + size); } /** * @brief Return the predecessor vector * @param vid The vertex id. * @return the predecessor vector */ DGLIdIters PredVec(dgl_id_t vid) const override { auto data = reverse_adjlist_[vid].succ.data(); auto size = reverse_adjlist_[vid].succ.size(); return DGLIdIters(data, data + size); } /** * @brief Return the in edge id vector * @param vid The vertex id. * @return the in edge id vector */ DGLIdIters InEdgeVec(dgl_id_t vid) const override { auto data = reverse_adjlist_[vid].edge_id.data(); auto size = reverse_adjlist_[vid].edge_id.size(); return DGLIdIters(data, data + size); } /** * @brief Get the adjacency matrix of the graph. * * By default, a row of returned adjacency matrix represents the destination * of an edge and the column represents the source. * @param transpose A flag to transpose the returned adjacency matrix. * @param fmt the format of the returned adjacency matrix. * @return a vector of three IdArray. */ std::vector GetAdj( bool transpose, const std::string& fmt) const override; /** @brief Create an empty graph */ static MutableGraphPtr Create() { return std::make_shared(); } /** @brief Create from coo */ static MutableGraphPtr CreateFromCOO( int64_t num_nodes, IdArray src_ids, IdArray dst_ids) { return std::make_shared(src_ids, dst_ids, num_nodes); } protected: friend class GraphOp; /** @brief Internal edge list type */ struct EdgeList { /** @brief successor vertex list */ std::vector succ; /** @brief out edge list */ std::vector edge_id; }; typedef std::vector AdjacencyList; /** @brief adjacency list using vector storage */ AdjacencyList adjlist_; /** @brief reverse adjacency list using vector storage */ AdjacencyList reverse_adjlist_; /** @brief all edges' src endpoints in their edge id order */ std::vector all_edges_src_; /** @brief all edges' dst endpoints in their edge id order */ std::vector all_edges_dst_; /** @brief read only flag */ bool read_only_ = false; /** @brief number of edges */ uint64_t num_edges_ = 0; }; } // namespace dgl #endif // DGL_GRAPH_H_ ================================================ FILE: include/dgl/graph_interface.h ================================================ /** * Copyright (c) 2018 by Contributors * @file dgl/graph_interface.h * @brief DGL graph index class. */ #ifndef DGL_GRAPH_INTERFACE_H_ #define DGL_GRAPH_INTERFACE_H_ #include #include #include #include #include #include "./runtime/object.h" #include "array.h" namespace dgl { const dgl_id_t DGL_INVALID_ID = static_cast(-1); /** * @brief This class references data in std::vector. * * This isn't a STL-style iterator. It provides a STL data container interface. * but it doesn't own data itself. instead, it only references data in * std::vector. */ class DGLIdIters { public: /** @brief default constructor to create an empty range */ DGLIdIters() {} /** @brief constructor with given begin and end */ DGLIdIters(const dgl_id_t *begin, const dgl_id_t *end) { this->begin_ = begin; this->end_ = end; } const dgl_id_t *begin() const { return this->begin_; } const dgl_id_t *end() const { return this->end_; } dgl_id_t operator[](int64_t i) const { return *(this->begin_ + i); } size_t size() const { return this->end_ - this->begin_; } private: const dgl_id_t *begin_{nullptr}, *end_{nullptr}; }; /** * @brief int32 version for DGLIdIters * */ class DGLIdIters32 { public: /** @brief default constructor to create an empty range */ DGLIdIters32() {} /** @brief constructor with given begin and end */ DGLIdIters32(const int32_t *begin, const int32_t *end) { this->begin_ = begin; this->end_ = end; } const int32_t *begin() const { return this->begin_; } const int32_t *end() const { return this->end_; } int32_t operator[](int32_t i) const { return *(this->begin_ + i); } size_t size() const { return this->end_ - this->begin_; } private: const int32_t *begin_{nullptr}, *end_{nullptr}; }; /* @brief structure used to represent a list of edges */ typedef struct { /* @brief the two endpoints and the id of the edge */ IdArray src, dst, id; } EdgeArray; // forward declaration struct Subgraph; class GraphRef; class GraphInterface; typedef std::shared_ptr GraphPtr; /** * @brief dgl graph index interface. * * DGL's graph is directed. Vertices are integers enumerated from zero. * * When calling functions supporing multiple edges (e.g. AddEdges, HasEdges), * the input edges are represented by two id arrays for source and destination * vertex ids. In the general case, the two arrays should have the same length. * If the length of src id array is one, it represents one-many connections. * If the length of dst id array is one, it represents many-one connections. */ class GraphInterface : public runtime::Object { public: virtual ~GraphInterface() = default; /** * @brief Add vertices to the graph. * @note Since vertices are integers enumerated from zero, only the number of * vertices to be added needs to be specified. * @param num_vertices The number of vertices to be added. */ virtual void AddVertices(uint64_t num_vertices) = 0; /** * @brief Add one edge to the graph. * @param src The source vertex. * @param dst The destination vertex. */ virtual void AddEdge(dgl_id_t src, dgl_id_t dst) = 0; /** * @brief Add edges to the graph. * @param src_ids The source vertex id array. * @param dst_ids The destination vertex id array. */ virtual void AddEdges(IdArray src_ids, IdArray dst_ids) = 0; /** * @brief Clear the graph. Remove all vertices/edges. */ virtual void Clear() = 0; /** * @brief Get the device context of this graph. */ virtual DGLContext Context() const = 0; /** * @brief Get the number of integer bits used to store node/edge ids * (32 or 64). */ virtual uint8_t NumBits() const = 0; /** * @return whether the graph is a multigraph */ virtual bool IsMultigraph() const = 0; /** * @return whether the graph is unibipartite */ virtual bool IsUniBipartite() const { EdgeArray edges = Edges(); IdArray src = edges.src; IdArray dst = edges.dst; bool is_unibipartite = true; const size_t n = edges.src.NumElements(); ATEN_ID_TYPE_SWITCH(src->dtype, IdType, { auto src_v = src.ToVector(); std::sort(src_v.begin(), src_v.end()); auto dst_v = dst.ToVector(); std::sort(dst_v.begin(), dst_v.end()); // std::set_intersection() requires output, so this is better for (size_t i = 0, j = 0; i < n && j < n;) { if (src_v[i] < dst_v[j]) { ++i; } else if (src_v[i] == dst_v[j]) { is_unibipartite = false; break; } else { ++j; } } }); return is_unibipartite; } /** * @return whether the graph is read-only */ virtual bool IsReadonly() const = 0; /** @return the number of vertices in the graph.*/ virtual uint64_t NumVertices() const = 0; /** @return the number of edges in the graph.*/ virtual uint64_t NumEdges() const = 0; /** @return true if the given vertex is in the graph.*/ virtual bool HasVertex(dgl_id_t vid) const { return vid < NumVertices(); } /** @return a 0-1 array indicating whether the given vertices are in the * graph. */ virtual BoolArray HasVertices(IdArray vids) const = 0; /** @return true if the given edge is in the graph.*/ virtual bool HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const = 0; /** @return a 0-1 array indicating whether the given edges are in the graph.*/ virtual BoolArray HasEdgesBetween(IdArray src_ids, IdArray dst_ids) const = 0; /** * @brief Find the predecessors of a vertex. * @param vid The vertex id. * @param radius The radius of the neighborhood. Default is immediate neighbor * (radius=1). * @return the predecessor id array. */ virtual IdArray Predecessors(dgl_id_t vid, uint64_t radius = 1) const = 0; /** * @brief Find the successors of a vertex. * @param vid The vertex id. * @param radius The radius of the neighborhood. Default is immediate neighbor * (radius=1). * @return the successor id array. */ virtual IdArray Successors(dgl_id_t vid, uint64_t radius = 1) const = 0; /** * @brief Get all edge ids between the two given endpoints * @note Edges are associated with an integer id start from zero. * The id is assigned when the edge is being added to the graph. * @param src The source vertex. * @param dst The destination vertex. * @return the edge id array. */ virtual IdArray EdgeId(dgl_id_t src, dgl_id_t dst) const = 0; /** * @brief Get all edge ids between the given endpoint pairs. * @note Edges are associated with an integer id start from zero. * The id is assigned when the edge is being added to the graph. * If duplicate pairs exist, the returned edge IDs will also duplicate. * The order of returned edge IDs will follow the order of src-dst pairs * first, and ties are broken by the order of edge ID. * @return EdgeArray containing all edges between all pairs. */ virtual EdgeArray EdgeIds(IdArray src, IdArray dst) const = 0; /** * @brief Find the edge ID and return the pair of endpoints * @param eid The edge ID * @return a pair whose first element is the source and the second the * destination. */ virtual std::pair FindEdge(dgl_id_t eid) const = 0; /** * @brief Find the edge IDs and return their source and target node IDs. * @param eids The edge ID array. * @return EdgeArray containing all edges with id in eid. The order is * preserved. */ virtual EdgeArray FindEdges(IdArray eids) const = 0; /** * @brief Get the in edges of the vertex. * @note The returned dst id array is filled with vid. * @param vid The vertex id. * @return the edges */ virtual EdgeArray InEdges(dgl_id_t vid) const = 0; /** * @brief Get the in edges of the vertices. * @param vids The vertex id array. * @return the id arrays of the two endpoints of the edges. */ virtual EdgeArray InEdges(IdArray vids) const = 0; /** * @brief Get the out edges of the vertex. * @note The returned src id array is filled with vid. * @param vid The vertex id. * @return the id arrays of the two endpoints of the edges. */ virtual EdgeArray OutEdges(dgl_id_t vid) const = 0; /** * @brief Get the out edges of the vertices. * @param vids The vertex id array. * @return the id arrays of the two endpoints of the edges. */ virtual EdgeArray OutEdges(IdArray vids) const = 0; /** * @brief Get all the edges in the graph. * @note If order is "srcdst", the returned edges list is sorted by their src * and dst ids. If order is "eid", they are in their edge id order. * Otherwise, in the arbitrary order. * @param order The order of the returned edge list. * @return the id arrays of the two endpoints of the edges. */ virtual EdgeArray Edges(const std::string &order = "") const = 0; /** * @brief Get the in degree of the given vertex. * @param vid The vertex id. * @return the in degree */ virtual uint64_t InDegree(dgl_id_t vid) const = 0; /** * @brief Get the in degrees of the given vertices. * @param vid The vertex id array. * @return the in degree array */ virtual DegreeArray InDegrees(IdArray vids) const = 0; /** * @brief Get the out degree of the given vertex. * @param vid The vertex id. * @return the out degree */ virtual uint64_t OutDegree(dgl_id_t vid) const = 0; /** * @brief Get the out degrees of the given vertices. * @param vid The vertex id array. * @return the out degree array */ virtual DegreeArray OutDegrees(IdArray vids) const = 0; /** * @brief Construct the induced subgraph of the given vertices. * * The induced subgraph is a subgraph formed by specifying a set of vertices * V' and then selecting all of the edges from the original graph that connect * two vertices in V'. * * Vertices and edges in the original graph will be "reindexed" to local * index. The local index of the vertices preserve the order of the given id * array, while the local index of the edges preserve the index order in the * original graph. Vertices not in the original graph are ignored. * * The result subgraph is read-only. * * @param vids The vertices in the subgraph. * @return the induced subgraph */ virtual Subgraph VertexSubgraph(IdArray vids) const = 0; /** * @brief Construct the induced edge subgraph of the given edges. * * The induced edges subgraph is a subgraph formed by specifying a set of * edges E' and then selecting all of the nodes from the original graph that * are endpoints in E'. * * Vertices and edges in the original graph will be "reindexed" to local * index. The local index of the edges preserve the order of the given id * array, while the local index of the vertices preserve the index order in * the original graph. Edges not in the original graph are ignored. * * The result subgraph is read-only. * * @param eids The edges in the subgraph. * @param preserve_nodes If true, the vertices will not be relabeled, so some * vertices may have no incident edges. * @return the induced edge subgraph */ virtual Subgraph EdgeSubgraph( IdArray eids, bool preserve_nodes = false) const = 0; /** * @brief Return the successor vector * @param vid The vertex id. * @return the successor vector iterator pair. */ virtual DGLIdIters SuccVec(dgl_id_t vid) const = 0; /** * @brief Return the out edge id vector * @param vid The vertex id. * @return the out edge id vector iterator pair. */ virtual DGLIdIters OutEdgeVec(dgl_id_t vid) const = 0; /** * @brief Return the predecessor vector * @param vid The vertex id. * @return the predecessor vector iterator pair. */ virtual DGLIdIters PredVec(dgl_id_t vid) const = 0; /** * @brief Return the in edge id vector * @param vid The vertex id. * @return the in edge id vector iterator pair. */ virtual DGLIdIters InEdgeVec(dgl_id_t vid) const = 0; /** * @brief Get the adjacency matrix of the graph. * * By default, a row of returned adjacency matrix represents the destination * of an edge and the column represents the source. * * If the fmt is 'csr', the function should return three arrays, representing * indptr, indices and edge ids * * If the fmt is 'coo', the function should return one array of shape (2, * nnz), representing a horitonzal stack of row and col indices. * * @param transpose A flag to transpose the returned adjacency matrix. * @param fmt the format of the returned adjacency matrix. * @return a vector of IdArrays. */ virtual std::vector GetAdj( bool transpose, const std::string &fmt) const = 0; /** * @brief Sort the columns in CSR. * * This sorts the columns in each row based on the column Ids. * The edge ids should be sorted accordingly. */ virtual void SortCSR() {} static constexpr const char *_type_key = "graph.Graph"; DGL_DECLARE_OBJECT_TYPE_INFO(GraphInterface, runtime::Object); }; // Define GraphRef DGL_DEFINE_OBJECT_REF(GraphRef, GraphInterface); /** @brief Subgraph data structure */ struct Subgraph : public runtime::Object { /** @brief The graph. */ GraphPtr graph; /** * @brief The induced vertex ids. * @note This is also a map from the new vertex id to the vertex id in the * parent graph. */ IdArray induced_vertices; /** * @brief The induced edge ids. * @note This is also a map from the new edge id to the edge id in the parent * graph. */ IdArray induced_edges; static constexpr const char *_type_key = "graph.Subgraph"; DGL_DECLARE_OBJECT_TYPE_INFO(Subgraph, runtime::Object); }; /** @brief Subgraph data structure for negative subgraph */ struct NegSubgraph : public Subgraph { /** @brief The existence of the negative edges in the parent graph. */ IdArray exist; /** @brief The Ids of head nodes */ IdArray head_nid; /** @brief The Ids of tail nodes */ IdArray tail_nid; }; /** @brief Subgraph data structure for halo subgraph */ struct HaloSubgraph : public Subgraph { /** @brief Indicate if a node belongs to the partition. */ IdArray inner_nodes; }; // Define SubgraphRef DGL_DEFINE_OBJECT_REF(SubgraphRef, Subgraph); } // namespace dgl #endif // DGL_GRAPH_INTERFACE_H_ ================================================ FILE: include/dgl/graph_op.h ================================================ /** * Copyright (c) 2018 by Contributors * @file dgl/graph_op.h * @brief Operations on graph index. */ #ifndef DGL_GRAPH_OP_H_ #define DGL_GRAPH_OP_H_ #include #include "graph.h" #include "immutable_graph.h" namespace dgl { class GraphOp { public: /** * @brief Return a new graph with all the edges reversed. * * The returned graph preserves the vertex and edge index in the original * graph. * * @return the reversed graph */ static GraphPtr Reverse(GraphPtr graph); /** * @brief Return the line graph. * * If i~j and j~i are two edges in original graph G, then * (i,j)~(j,i) and (j,i)~(i,j) are the "backtracking" edges on * the line graph. * * @param graph The input graph. * @param backtracking Whether the backtracking edges are included or not * @return the line graph */ static GraphPtr LineGraph(GraphPtr graph, bool backtracking); /** * @brief Return a disjoint union of the input graphs. * * The new graph will include all the nodes/edges in the given graphs. * Nodes/Edges will be relabled by adding the cumsum of the previous graph * sizes in the given sequence order. For example, giving input [g1, g2, g3], * where they have 5, 6, 7 nodes respectively. Then node#2 of g2 will become * node#7 in the result graph. Edge ids are re-assigned similarly. * * The input list must be either ALL mutable graphs or ALL immutable graphs. * The returned graph type is also determined by the input graph type. * * @param graphs A list of input graphs to be unioned. * @return the disjoint union of the graphs */ static GraphPtr DisjointUnion(std::vector graphs); /** * @brief Partition the graph into several subgraphs. * * This is a reverse operation of DisjointUnion. The graph will be partitioned * into num graphs. This requires the given number of partitions to evenly * divides the number of nodes in the graph. * * If the input graph is mutable, the result graphs are mutable. * If the input graph is immutable, the result graphs are immutable. * * @param graph The graph to be partitioned. * @param num The number of partitions. * @return a list of partitioned graphs */ static std::vector DisjointPartitionByNum( GraphPtr graph, int64_t num); /** * @brief Partition the graph into several subgraphs. * * This is a reverse operation of DisjointUnion. The graph will be partitioned * based on the given sizes. This requires the sum of the given sizes is equal * to the number of nodes in the graph. * * If the input graph is mutable, the result graphs are mutable. * If the input graph is immutable, the result graphs are immutable. * * @param graph The graph to be partitioned. * @param sizes The number of partitions. * @return a list of partitioned graphs */ static std::vector DisjointPartitionBySizes( GraphPtr graph, IdArray sizes); /** * @brief Map vids in the parent graph to the vids in the subgraph. * * If the Id doesn't exist in the subgraph, -1 will be used. * * @param parent_vid_map An array that maps the vids in the parent graph to * the subgraph. The elements store the vertex Ids in the parent graph, and * the indices indicate the vertex Ids in the subgraph. * @param query The vertex Ids in the parent graph. * @return an Id array that contains the subgraph node Ids. */ static IdArray MapParentIdToSubgraphId(IdArray parent_vid_map, IdArray query); /** * @brief Expand an Id array based on the offset array. * * For example, * ids: [0, 1, 2, 3, 4], * offset: [0, 2, 2, 5, 6, 7], * result: [0, 0, 2, 2, 2, 3, 4]. * The offset array has one more element than the ids array. * (offset[i], offset[i+1]) shows the location of ids[i] in the result array. * * @param ids An array that contains the node or edge Ids. * @param offset An array that contains the offset after expansion. * @return a expanded Id array. */ static IdArray ExpandIds(IdArray ids, IdArray offset); /** * @brief Convert the graph to a simple graph. * @param graph The input graph. * @return a new immutable simple graph with no multi-edge. */ static GraphPtr ToSimpleGraph(GraphPtr graph); /** * @brief Convert the graph to a mutable bidirected graph. * * If the original graph has m edges for i -> j and n edges for * j -> i, the new graph will have max(m, n) edges for both * i -> j and j -> i. * * @param graph The input graph. * @return a new mutable bidirected graph. */ static GraphPtr ToBidirectedMutableGraph(GraphPtr graph); /** * @brief Same as BidirectedMutableGraph except that the returned graph is * immutable. * @param graph The input graph. * @return a new immutable bidirected * graph. */ static GraphPtr ToBidirectedImmutableGraph(GraphPtr graph); /** * @brief Same as BidirectedMutableGraph except that the returned graph is * immutable and call gk_csr_MakeSymmetric in GKlib. This is more efficient * than ToBidirectedImmutableGraph. It return a null pointer if the conversion * fails. * * @param graph The input graph. * @return a new immutable bidirected graph. */ static GraphPtr ToBidirectedSimpleImmutableGraph(ImmutableGraphPtr ig); /** * @brief Get a induced subgraph with HALO nodes. * The HALO nodes are the ones that can be reached from `nodes` within * `num_hops`. * @param graph The input graph. * @param nodes The input nodes that form the core of the induced subgraph. * @param num_hops The number of hops to reach. * @return the induced subgraph with HALO nodes. */ static HaloSubgraph GetSubgraphWithHalo( GraphPtr graph, IdArray nodes, int num_hops); /** * @brief Reorder the nodes in the immutable graph. * @param graph The input graph. * @param new_order The node Ids in the new graph. The index in `new_order` is * old node Ids. * @return the graph with reordered node Ids */ static GraphPtr ReorderImmutableGraph( ImmutableGraphPtr ig, IdArray new_order); }; } // namespace dgl #endif // DGL_GRAPH_OP_H_ ================================================ FILE: include/dgl/graph_serializer.h ================================================ /** * Copyright (c) 2018 by Contributors * @file graph/graph_serializer.cc * @brief DGL serializer APIs */ #ifndef DGL_GRAPH_SERIALIZER_H_ #define DGL_GRAPH_SERIALIZER_H_ #include namespace dgl { // Util class to call the private/public empty constructor, which is needed for // serialization class Serializer { public: template static T* new_object() { return new T(); } template static std::shared_ptr make_shared() { return std::shared_ptr(new T()); } }; } // namespace dgl #endif // DGL_GRAPH_SERIALIZER_H_ ================================================ FILE: include/dgl/graph_traversal.h ================================================ /** * Copyright (c) 2020 by Contributors * @file dgl/graph_traversal.h * @brief common graph traversal operations */ #ifndef DGL_GRAPH_TRAVERSAL_H_ #define DGL_GRAPH_TRAVERSAL_H_ #include "array.h" #include "base_heterograph.h" namespace dgl { ///////////////////////// Graph Traverse routines ////////////////////////// /** * @brief Class for representing frontiers. * * Each frontier is a list of nodes/edges (specified by their ids). * An optional tag can be specified on each node/edge (represented by an int * value). */ struct Frontiers { /** @brief a vector store for the nodes/edges in all the frontiers */ IdArray ids; /** * @brief a vector store for node/edge tags. Dtype is int64. * Empty if no tags are requested */ IdArray tags; /** @brief a section vector to indicate each frontier Dtype is int64. */ IdArray sections; }; namespace aten { /** * @brief Traverse the graph in a breadth-first-search (BFS) order. * * @param csr The input csr matrix. * @param sources Source nodes. * @return A Frontiers object containing the search result */ Frontiers BFSNodesFrontiers(const CSRMatrix& csr, IdArray source); /** * @brief Traverse the graph in a breadth-first-search (BFS) order, returning * the edges of the BFS tree. * * @param csr The input csr matrix. * @param sources Source nodes. * @return A Frontiers object containing the search result */ Frontiers BFSEdgesFrontiers(const CSRMatrix& csr, IdArray source); /** * @brief Traverse the graph in topological order. * * @param csr The input csr matrix. * @return A Frontiers object containing the search result */ Frontiers TopologicalNodesFrontiers(const CSRMatrix& csr); /** * @brief Traverse the graph in a depth-first-search (DFS) order. * * @param csr The input csr matrix. * @param sources Source nodes. * @return A Frontiers object containing the search result */ Frontiers DGLDFSEdges(const CSRMatrix& csr, IdArray source); /** * @brief Traverse the graph in a depth-first-search (DFS) order and return the * recorded edge tag if return_labels is specified. * * The traversal visit edges in its DFS order. Edges have three tags: * FORWARD(0), REVERSE(1), NONTREE(2) * * A FORWARD edge is one in which `u` has been visisted but `v` has not. * A REVERSE edge is one in which both `u` and `v` have been visisted and the * edge is in the DFS tree. * A NONTREE edge is one in which both `u` and `v` have been visisted but the * edge is NOT in the DFS tree. * * @param csr The input csr matrix. * @param sources Source nodes. * @param has_reverse_edge If true, REVERSE edges are included * @param has_nontree_edge If true, NONTREE edges are included * @param return_labels If true, return the recorded edge tags. * @return A Frontiers object containing the search result */ Frontiers DGLDFSLabeledEdges( const CSRMatrix& csr, IdArray source, const bool has_reverse_edge, const bool has_nontree_edge, const bool return_labels); } // namespace aten } // namespace dgl #endif // DGL_GRAPH_TRAVERSAL_H_ ================================================ FILE: include/dgl/immutable_graph.h ================================================ /** * Copyright (c) 2018 by Contributors * @file dgl/immutable_graph.h * @brief DGL immutable graph index class. */ #ifndef DGL_IMMUTABLE_GRAPH_H_ #define DGL_IMMUTABLE_GRAPH_H_ #include #include #include #include #include #include #include #include "base_heterograph.h" #include "graph_interface.h" #include "lazy.h" #include "runtime/ndarray.h" namespace dgl { class CSR; class COO; typedef std::shared_ptr CSRPtr; typedef std::shared_ptr COOPtr; class ImmutableGraph; typedef std::shared_ptr ImmutableGraphPtr; /** * @brief Graph class stored using CSR structure. */ class CSR : public GraphInterface { public: // Create a csr graph that has the given number of verts and edges. CSR(int64_t num_vertices, int64_t num_edges); // Create a csr graph whose memory is stored in the shared memory // that has the given number of verts and edges. CSR(const std::string &shared_mem_name, int64_t num_vertices, int64_t num_edges); // Create a csr graph that shares the given indptr and indices. CSR(IdArray indptr, IdArray indices, IdArray edge_ids); // Create a csr graph by data iterator template CSR(int64_t num_vertices, int64_t num_edges, IndptrIter indptr_begin, IndicesIter indices_begin, EdgeIdIter edge_ids_begin); // Create a csr graph whose memory is stored in the shared memory // and the structure is given by the indptr and indcies. CSR(IdArray indptr, IdArray indices, IdArray edge_ids, const std::string &shared_mem_name); void AddVertices(uint64_t num_vertices) override { LOG(FATAL) << "CSR graph does not allow mutation."; } void AddEdge(dgl_id_t src, dgl_id_t dst) override { LOG(FATAL) << "CSR graph does not allow mutation."; } void AddEdges(IdArray src_ids, IdArray dst_ids) override { LOG(FATAL) << "CSR graph does not allow mutation."; } void Clear() override { LOG(FATAL) << "CSR graph does not allow mutation."; } DGLContext Context() const override { return adj_.indptr->ctx; } uint8_t NumBits() const override { return adj_.indices->dtype.bits; } bool IsMultigraph() const override; bool IsReadonly() const override { return true; } uint64_t NumVertices() const override { return adj_.indptr->shape[0] - 1; } uint64_t NumEdges() const override { return adj_.indices->shape[0]; } BoolArray HasVertices(IdArray vids) const override { LOG(FATAL) << "Not enabled for CSR graph"; return {}; } bool HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const override; BoolArray HasEdgesBetween(IdArray src_ids, IdArray dst_ids) const override; IdArray Predecessors(dgl_id_t vid, uint64_t radius = 1) const override { LOG(FATAL) << "CSR graph does not support efficient predecessor query." << " Please use successors on the reverse CSR graph."; return {}; } IdArray Successors(dgl_id_t vid, uint64_t radius = 1) const override; IdArray EdgeId(dgl_id_t src, dgl_id_t dst) const override; EdgeArray EdgeIds(IdArray src, IdArray dst) const override; std::pair FindEdge(dgl_id_t eid) const override { LOG(FATAL) << "CSR graph does not support efficient FindEdge." << " Please use COO graph."; return {}; } EdgeArray FindEdges(IdArray eids) const override { LOG(FATAL) << "CSR graph does not support efficient FindEdges." << " Please use COO graph."; return {}; } EdgeArray InEdges(dgl_id_t vid) const override { LOG(FATAL) << "CSR graph does not support efficient inedges query." << " Please use outedges on the reverse CSR graph."; return {}; } EdgeArray InEdges(IdArray vids) const override { LOG(FATAL) << "CSR graph does not support efficient inedges query." << " Please use outedges on the reverse CSR graph."; return {}; } EdgeArray OutEdges(dgl_id_t vid) const override; EdgeArray OutEdges(IdArray vids) const override; EdgeArray Edges(const std::string &order = "") const override; uint64_t InDegree(dgl_id_t vid) const override { LOG(FATAL) << "CSR graph does not support efficient indegree query." << " Please use outdegree on the reverse CSR graph."; return 0; } DegreeArray InDegrees(IdArray vids) const override { LOG(FATAL) << "CSR graph does not support efficient indegree query." << " Please use outdegree on the reverse CSR graph."; return {}; } uint64_t OutDegree(dgl_id_t vid) const override { return aten::CSRGetRowNNZ(adj_, vid); } DegreeArray OutDegrees(IdArray vids) const override; Subgraph VertexSubgraph(IdArray vids) const override; Subgraph EdgeSubgraph( IdArray eids, bool preserve_nodes = false) const override { LOG(FATAL) << "CSR graph does not support efficient EdgeSubgraph." << " Please use COO graph instead."; return {}; } DGLIdIters SuccVec(dgl_id_t vid) const override; DGLIdIters OutEdgeVec(dgl_id_t vid) const override; DGLIdIters PredVec(dgl_id_t vid) const override { LOG(FATAL) << "CSR graph does not support efficient PredVec." << " Please use SuccVec on the reverse CSR graph."; return DGLIdIters(nullptr, nullptr); } DGLIdIters InEdgeVec(dgl_id_t vid) const override { LOG(FATAL) << "CSR graph does not support efficient InEdgeVec." << " Please use OutEdgeVec on the reverse CSR graph."; return DGLIdIters(nullptr, nullptr); } std::vector GetAdj( bool transpose, const std::string &fmt) const override { CHECK(!transpose && fmt == "csr") << "Not valid adj format request."; return {adj_.indptr, adj_.indices, adj_.data}; } /** @brief Indicate whether this uses shared memory. */ bool IsSharedMem() const { return !shared_mem_name_.empty(); } /** @brief Return the reverse of this CSR graph (i.e, a CSC graph) */ CSRPtr Transpose() const; /** @brief Convert this CSR to COO */ COOPtr ToCOO() const; /** * @return the csr matrix that represents this graph. * @note The csr matrix shares the storage with this graph. * The data field of the CSR matrix stores the edge ids. */ aten::CSRMatrix ToCSRMatrix() const { return adj_; } /** * @brief Copy the data to another context. * @param ctx The target context. * @return The graph under another context. */ CSR CopyTo(const DGLContext &ctx) const; /** * @brief Copy data to shared memory. * @param name The name of the shared memory. * @return The graph in the shared memory */ CSR CopyToSharedMem(const std::string &name) const; /** * @brief Convert the graph to use the given number of bits for storage. * @param bits The new number of integer bits (32 or 64). * @return The graph with new bit size storage. */ CSR AsNumBits(uint8_t bits) const; // member getters IdArray indptr() const { return adj_.indptr; } IdArray indices() const { return adj_.indices; } IdArray edge_ids() const { return adj_.data; } /** @return Load CSR from stream */ bool Load(dmlc::Stream *fs); /** @return Save CSR to stream */ void Save(dmlc::Stream *fs) const; void SortCSR() override { if (adj_.sorted) return; aten::CSRSort_(&adj_); } private: friend class Serializer; /** @brief private default constructor */ CSR() { adj_.sorted = false; } // The internal CSR adjacency matrix. // The data field stores edge ids. aten::CSRMatrix adj_; // The name of the shared memory to store data. // If it's empty, data isn't stored in shared memory. std::string shared_mem_name_; }; class COO : public GraphInterface { public: // Create a coo graph that shares the given src and dst COO(int64_t num_vertices, IdArray src, IdArray dst, bool row_sorted = false, bool col_sorted = false); // TODO(da): add constructor for creating COO from shared memory void AddVertices(uint64_t num_vertices) override { LOG(FATAL) << "COO graph does not allow mutation."; } void AddEdge(dgl_id_t src, dgl_id_t dst) override { LOG(FATAL) << "COO graph does not allow mutation."; } void AddEdges(IdArray src_ids, IdArray dst_ids) override { LOG(FATAL) << "COO graph does not allow mutation."; } void Clear() override { LOG(FATAL) << "COO graph does not allow mutation."; } DGLContext Context() const override { return adj_.row->ctx; } uint8_t NumBits() const override { return adj_.row->dtype.bits; } bool IsMultigraph() const override; bool IsReadonly() const override { return true; } uint64_t NumVertices() const override { return adj_.num_rows; } uint64_t NumEdges() const override { return adj_.row->shape[0]; } bool HasVertex(dgl_id_t vid) const override { return vid < NumVertices(); } BoolArray HasVertices(IdArray vids) const override { LOG(FATAL) << "Not enabled for COO graph"; return {}; } bool HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const override { LOG(FATAL) << "COO graph does not support efficient HasEdgeBetween." << " Please use CSR graph or AdjList graph instead."; return false; } BoolArray HasEdgesBetween(IdArray src_ids, IdArray dst_ids) const override { LOG(FATAL) << "COO graph does not support efficient HasEdgeBetween." << " Please use CSR graph or AdjList graph instead."; return {}; } IdArray Predecessors(dgl_id_t vid, uint64_t radius = 1) const override { LOG(FATAL) << "COO graph does not support efficient Predecessors." << " Please use CSR graph or AdjList graph instead."; return {}; } IdArray Successors(dgl_id_t vid, uint64_t radius = 1) const override { LOG(FATAL) << "COO graph does not support efficient Successors." << " Please use CSR graph or AdjList graph instead."; return {}; } IdArray EdgeId(dgl_id_t src, dgl_id_t dst) const override { LOG(FATAL) << "COO graph does not support efficient EdgeId." << " Please use CSR graph or AdjList graph instead."; return {}; } EdgeArray EdgeIds(IdArray src, IdArray dst) const override { LOG(FATAL) << "COO graph does not support efficient EdgeId." << " Please use CSR graph or AdjList graph instead."; return {}; } std::pair FindEdge(dgl_id_t eid) const override; EdgeArray FindEdges(IdArray eids) const override; EdgeArray InEdges(dgl_id_t vid) const override { LOG(FATAL) << "COO graph does not support efficient InEdges." << " Please use CSR graph or AdjList graph instead."; return {}; } EdgeArray InEdges(IdArray vids) const override { LOG(FATAL) << "COO graph does not support efficient InEdges." << " Please use CSR graph or AdjList graph instead."; return {}; } EdgeArray OutEdges(dgl_id_t vid) const override { LOG(FATAL) << "COO graph does not support efficient OutEdges." << " Please use CSR graph or AdjList graph instead."; return {}; } EdgeArray OutEdges(IdArray vids) const override { LOG(FATAL) << "COO graph does not support efficient OutEdges." << " Please use CSR graph or AdjList graph instead."; return {}; } EdgeArray Edges(const std::string &order = "") const override; uint64_t InDegree(dgl_id_t vid) const override { LOG(FATAL) << "COO graph does not support efficient InDegree." << " Please use CSR graph or AdjList graph instead."; return 0; } DegreeArray InDegrees(IdArray vids) const override { LOG(FATAL) << "COO graph does not support efficient InDegrees." << " Please use CSR graph or AdjList graph instead."; return {}; } uint64_t OutDegree(dgl_id_t vid) const override { LOG(FATAL) << "COO graph does not support efficient OutDegree." << " Please use CSR graph or AdjList graph instead."; return 0; } DegreeArray OutDegrees(IdArray vids) const override { LOG(FATAL) << "COO graph does not support efficient OutDegrees." << " Please use CSR graph or AdjList graph instead."; return {}; } Subgraph VertexSubgraph(IdArray vids) const override { LOG(FATAL) << "COO graph does not support efficient VertexSubgraph." << " Please use CSR graph or AdjList graph instead."; return {}; } Subgraph EdgeSubgraph( IdArray eids, bool preserve_nodes = false) const override; DGLIdIters SuccVec(dgl_id_t vid) const override { LOG(FATAL) << "COO graph does not support efficient SuccVec." << " Please use CSR graph or AdjList graph instead."; return DGLIdIters(nullptr, nullptr); } DGLIdIters OutEdgeVec(dgl_id_t vid) const override { LOG(FATAL) << "COO graph does not support efficient OutEdgeVec." << " Please use CSR graph or AdjList graph instead."; return DGLIdIters(nullptr, nullptr); } DGLIdIters PredVec(dgl_id_t vid) const override { LOG(FATAL) << "COO graph does not support efficient PredVec." << " Please use CSR graph or AdjList graph instead."; return DGLIdIters(nullptr, nullptr); } DGLIdIters InEdgeVec(dgl_id_t vid) const override { LOG(FATAL) << "COO graph does not support efficient InEdgeVec." << " Please use CSR graph or AdjList graph instead."; return DGLIdIters(nullptr, nullptr); } std::vector GetAdj( bool transpose, const std::string &fmt) const override { CHECK(fmt == "coo") << "Not valid adj format request."; if (transpose) { return {aten::HStack(adj_.col, adj_.row)}; } else { return {aten::HStack(adj_.row, adj_.col)}; } } /** @brief Return the transpose of this COO */ COOPtr Transpose() const { return COOPtr(new COO(adj_.num_rows, adj_.col, adj_.row)); } /** @brief Convert this COO to CSR */ CSRPtr ToCSR() const; /** * @brief Get the coo matrix that represents this graph. * @note The coo matrix shares the storage with this graph. * The data field of the coo matrix is none. */ aten::COOMatrix ToCOOMatrix() const { return adj_; } /** * @brief Copy the data to another context. * @param ctx The target context. * @return The graph under another context. */ COO CopyTo(const DGLContext &ctx) const; /** * @brief Copy data to shared memory. * @param name The name of the shared memory. * @return The graph in the shared memory */ COO CopyToSharedMem(const std::string &name) const; /** * @brief Convert the graph to use the given number of bits for storage. * @param bits The new number of integer bits (32 or 64). * @return The graph with new bit size storage. */ COO AsNumBits(uint8_t bits) const; /** @brief Indicate whether this uses shared memory. */ bool IsSharedMem() const { return false; } // member getters IdArray src() const { return adj_.row; } IdArray dst() const { return adj_.col; } private: /** @brief private default constructor */ COO() {} // The internal COO adjacency matrix. // The data field is empty aten::COOMatrix adj_; }; /** * @brief DGL immutable graph index class. * * DGL's graph is directed. Vertices are integers enumerated from zero. */ class ImmutableGraph : public GraphInterface { public: /** @brief Construct an immutable graph from the COO format. */ explicit ImmutableGraph(COOPtr coo) : coo_(coo) {} /** * @brief Construct an immutable graph from the CSR format. * * For a single graph, we need two CSRs, one stores the in-edges of vertices * and the other stores the out-edges of vertices. These two CSRs stores the * same edges. The reason we need both is that some operators are faster on * in-edge CSR and the other operators are faster on out-edge CSR. * * However, not both CSRs are required. Technically, one CSR contains all * information. Thus, when we construct a temporary graphs (e.g., the sampled * subgraphs), we only construct one of the CSRs that runs fast for some * operations we expect and construct the other CSR on demand. */ ImmutableGraph(CSRPtr in_csr, CSRPtr out_csr) : in_csr_(in_csr), out_csr_(out_csr) { CHECK(in_csr_ || out_csr_) << "Both CSR are missing."; } /** @brief Construct an immutable graph from one CSR. */ explicit ImmutableGraph(CSRPtr csr) : out_csr_(csr) {} /** @brief default copy constructor */ ImmutableGraph(const ImmutableGraph &other) = default; #ifndef _MSC_VER /** @brief default move constructor */ ImmutableGraph(ImmutableGraph &&other) = default; #else ImmutableGraph(ImmutableGraph &&other) { this->in_csr_ = other.in_csr_; this->out_csr_ = other.out_csr_; this->coo_ = other.coo_; other.in_csr_ = nullptr; other.out_csr_ = nullptr; other.coo_ = nullptr; } #endif // _MSC_VER /** @brief default assign constructor */ ImmutableGraph &operator=(const ImmutableGraph &other) = default; /** @brief default destructor */ ~ImmutableGraph() = default; void AddVertices(uint64_t num_vertices) override { LOG(FATAL) << "AddVertices isn't supported in ImmutableGraph"; } void AddEdge(dgl_id_t src, dgl_id_t dst) override { LOG(FATAL) << "AddEdge isn't supported in ImmutableGraph"; } void AddEdges(IdArray src_ids, IdArray dst_ids) override { LOG(FATAL) << "AddEdges isn't supported in ImmutableGraph"; } void Clear() override { LOG(FATAL) << "Clear isn't supported in ImmutableGraph"; } DGLContext Context() const override { return AnyGraph()->Context(); } uint8_t NumBits() const override { return AnyGraph()->NumBits(); } /** * @note not const since we have caches * @return whether the graph is a multigraph */ bool IsMultigraph() const override { return AnyGraph()->IsMultigraph(); } /** * @return whether the graph is read-only */ bool IsReadonly() const override { return true; } /** * @brief Check if the graph is unibipartite. * * @return True if the graph is unibipartite. */ bool IsUniBipartite() const override { if (!is_unibipartite_set_) { is_unibipartite_ = GraphInterface::IsUniBipartite(); is_unibipartite_set_ = true; } return is_unibipartite_; } /** @return the number of vertices in the graph.*/ uint64_t NumVertices() const override { return AnyGraph()->NumVertices(); } /** @return the number of edges in the graph.*/ uint64_t NumEdges() const override { return AnyGraph()->NumEdges(); } /** @return true if the given vertex is in the graph.*/ bool HasVertex(dgl_id_t vid) const override { return vid < NumVertices(); } BoolArray HasVertices(IdArray vids) const override; /** @return true if the given edge is in the graph.*/ bool HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const override { if (in_csr_) { return in_csr_->HasEdgeBetween(dst, src); } else { return GetOutCSR()->HasEdgeBetween(src, dst); } } BoolArray HasEdgesBetween(IdArray src, IdArray dst) const override { if (in_csr_) { return in_csr_->HasEdgesBetween(dst, src); } else { return GetOutCSR()->HasEdgesBetween(src, dst); } } /** * @brief Find the predecessors of a vertex. * @param vid The vertex id. * @param radius The radius of the neighborhood. Default is immediate neighbor * (radius=1). * @return the predecessor id array. */ IdArray Predecessors(dgl_id_t vid, uint64_t radius = 1) const override { return GetInCSR()->Successors(vid, radius); } /** * @brief Find the successors of a vertex. * @param vid The vertex id. * @param radius The radius of the neighborhood. Default is immediate neighbor * (radius=1). * @return the successor id array. */ IdArray Successors(dgl_id_t vid, uint64_t radius = 1) const override { return GetOutCSR()->Successors(vid, radius); } /** * @brief Get all edge ids between the two given endpoints * @note Edges are associated with an integer id start from zero. * The id is assigned when the edge is being added to the graph. * @param src The source vertex. * @param dst The destination vertex. * @return the edge id array. */ IdArray EdgeId(dgl_id_t src, dgl_id_t dst) const override { if (in_csr_) { return in_csr_->EdgeId(dst, src); } else { return GetOutCSR()->EdgeId(src, dst); } } /** * @brief Get all edge ids between the given endpoint pairs. * @note Edges are associated with an integer id start from zero. * The id is assigned when the edge is being added to the graph. * If duplicate pairs exist, the returned edge IDs will also duplicate. * The order of returned edge IDs will follow the order of src-dst pairs * first, and ties are broken by the order of edge ID. * @return EdgeArray containing all edges between all pairs. */ EdgeArray EdgeIds(IdArray src, IdArray dst) const override { if (in_csr_) { EdgeArray edges = in_csr_->EdgeIds(dst, src); return EdgeArray{edges.dst, edges.src, edges.id}; } else { return GetOutCSR()->EdgeIds(src, dst); } } /** * @brief Find the edge ID and return the pair of endpoints * @param eid The edge ID * @return a pair whose first element is the source and the second the * destination. */ std::pair FindEdge(dgl_id_t eid) const override { return GetCOO()->FindEdge(eid); } /** * @brief Find the edge IDs and return their source and target node IDs. * @param eids The edge ID array. * @return EdgeArray containing all edges with id in eid. The order is * preserved. */ EdgeArray FindEdges(IdArray eids) const override { return GetCOO()->FindEdges(eids); } /** * @brief Get the in edges of the vertex. * @note The returned dst id array is filled with vid. * @param vid The vertex id. * @return the edges */ EdgeArray InEdges(dgl_id_t vid) const override { const EdgeArray &ret = GetInCSR()->OutEdges(vid); return {ret.dst, ret.src, ret.id}; } /** * @brief Get the in edges of the vertices. * @param vids The vertex id array. * @return the id arrays of the two endpoints of the edges. */ EdgeArray InEdges(IdArray vids) const override { const EdgeArray &ret = GetInCSR()->OutEdges(vids); return {ret.dst, ret.src, ret.id}; } /** * @brief Get the out edges of the vertex. * @note The returned src id array is filled with vid. * @param vid The vertex id. * @return the id arrays of the two endpoints of the edges. */ EdgeArray OutEdges(dgl_id_t vid) const override { return GetOutCSR()->OutEdges(vid); } /** * @brief Get the out edges of the vertices. * @param vids The vertex id array. * @return the id arrays of the two endpoints of the edges. */ EdgeArray OutEdges(IdArray vids) const override { return GetOutCSR()->OutEdges(vids); } /** * @brief Get all the edges in the graph. * @note If sorted is true, the returned edges list is sorted by their src and * dst ids. Otherwise, they are in their edge id order. * @param sorted Whether the returned edge list is sorted by their src and dst * ids. * @return the id arrays of the two endpoints of the edges. */ EdgeArray Edges(const std::string &order = "") const override; /** * @brief Get the in degree of the given vertex. * @param vid The vertex id. * @return the in degree */ uint64_t InDegree(dgl_id_t vid) const override { return GetInCSR()->OutDegree(vid); } /** * @brief Get the in degrees of the given vertices. * @param vid The vertex id array. * @return the in degree array */ DegreeArray InDegrees(IdArray vids) const override { return GetInCSR()->OutDegrees(vids); } /** * @brief Get the out degree of the given vertex. * @param vid The vertex id. * @return the out degree */ uint64_t OutDegree(dgl_id_t vid) const override { return GetOutCSR()->OutDegree(vid); } /** * @brief Get the out degrees of the given vertices. * @param vid The vertex id array. * @return the out degree array */ DegreeArray OutDegrees(IdArray vids) const override { return GetOutCSR()->OutDegrees(vids); } /** * @brief Construct the induced subgraph of the given vertices. * * The induced subgraph is a subgraph formed by specifying a set of vertices * V' and then selecting all of the edges from the original graph that connect * two vertices in V'. * * Vertices and edges in the original graph will be "reindexed" to local * index. The local index of the vertices preserve the order of the given id * array, while the local index of the edges preserve the index order in the * original graph. Vertices not in the original graph are ignored. * * The result subgraph is read-only. * * @param vids The vertices in the subgraph. * @return the induced subgraph */ Subgraph VertexSubgraph(IdArray vids) const override; /** * @brief Construct the induced edge subgraph of the given edges. * * The induced edges subgraph is a subgraph formed by specifying a set of * edges E' and then selecting all of the nodes from the original graph that * are endpoints in E'. * * Vertices and edges in the original graph will be "reindexed" to local * index. The local index of the edges preserve the order of the given id * array, while the local index of the vertices preserve the index order in * the original graph. Edges not in the original graph are ignored. * * The result subgraph is read-only. * * @param eids The edges in the subgraph. * @return the induced edge subgraph */ Subgraph EdgeSubgraph( IdArray eids, bool preserve_nodes = false) const override; /** * @brief Return the successor vector * @param vid The vertex id. * @return the successor vector */ DGLIdIters SuccVec(dgl_id_t vid) const override { return GetOutCSR()->SuccVec(vid); } /** * @brief Return the out edge id vector * @param vid The vertex id. * @return the out edge id vector */ DGLIdIters OutEdgeVec(dgl_id_t vid) const override { return GetOutCSR()->OutEdgeVec(vid); } /** * @brief Return the predecessor vector * @param vid The vertex id. * @return the predecessor vector */ DGLIdIters PredVec(dgl_id_t vid) const override { return GetInCSR()->SuccVec(vid); } /** * @brief Return the in edge id vector * @param vid The vertex id. * @return the in edge id vector */ DGLIdIters InEdgeVec(dgl_id_t vid) const override { return GetInCSR()->OutEdgeVec(vid); } /** * @brief Get the adjacency matrix of the graph. * * By default, a row of returned adjacency matrix represents the destination * of an edge and the column represents the source. * @param transpose A flag to transpose the returned adjacency matrix. * @param fmt the format of the returned adjacency matrix. * @return a vector of three IdArray. */ std::vector GetAdj( bool transpose, const std::string &fmt) const override; /** @brief Return in csr. If not exist, transpose the other one.*/ CSRPtr GetInCSR() const; /** @brief Return out csr. If not exist, transpose the other one.*/ CSRPtr GetOutCSR() const; /** @brief Return coo. If not exist, create from csr.*/ COOPtr GetCOO() const; /** @brief Create an immutable graph from CSR. */ static ImmutableGraphPtr CreateFromCSR( IdArray indptr, IdArray indices, IdArray edge_ids, const std::string &edge_dir); static ImmutableGraphPtr CreateFromCSR(const std::string &shared_mem_name); /** @brief Create an immutable graph from COO. */ static ImmutableGraphPtr CreateFromCOO( int64_t num_vertices, IdArray src, IdArray dst, bool row_osrted = false, bool col_sorted = false); /** * @brief Convert the given graph to an immutable graph. * * If the graph is already an immutable graph. The result graph will share * the storage with the given one. * * @param graph The input graph. * @return an immutable graph object. */ static ImmutableGraphPtr ToImmutable(GraphPtr graph); /** * @brief Copy the data to another context. * @param ctx The target context. * @return The graph under another context. */ static ImmutableGraphPtr CopyTo(ImmutableGraphPtr g, const DGLContext &ctx); /** * @brief Copy data to shared memory. * @param name The name of the shared memory. * @return The graph in the shared memory */ static ImmutableGraphPtr CopyToSharedMem( ImmutableGraphPtr g, const std::string &name); /** * @brief Convert the graph to use the given number of bits for storage. * @param bits The new number of integer bits (32 or 64). * @return The graph with new bit size storage. */ static ImmutableGraphPtr AsNumBits(ImmutableGraphPtr g, uint8_t bits); /** * @brief Return a new graph with all the edges reversed. * * The returned graph preserves the vertex and edge index in the original * graph. * * @return the reversed graph */ ImmutableGraphPtr Reverse() const; /** @return Load ImmutableGraph from stream, using out csr */ bool Load(dmlc::Stream *fs); /** @return Save ImmutableGraph to stream, using out csr */ void Save(dmlc::Stream *fs) const; void SortCSR() override { GetInCSR()->SortCSR(); GetOutCSR()->SortCSR(); } bool HasInCSR() const { return in_csr_ != NULL; } bool HasOutCSR() const { return out_csr_ != NULL; } /** @brief Cast this graph to a heterograph */ HeteroGraphPtr AsHeteroGraph() const; protected: friend class Serializer; friend class UnitGraph; /** @brief internal default constructor */ ImmutableGraph() {} /** @brief internal constructor for all the members */ ImmutableGraph(CSRPtr in_csr, CSRPtr out_csr, COOPtr coo) : in_csr_(in_csr), out_csr_(out_csr), coo_(coo) { CHECK(AnyGraph()) << "At least one graph structure should exist."; } ImmutableGraph( CSRPtr in_csr, CSRPtr out_csr, const std::string shared_mem_name) : in_csr_(in_csr), out_csr_(out_csr) { CHECK(in_csr_ || out_csr_) << "Both CSR are missing."; this->shared_mem_name_ = shared_mem_name; } /** @brief return pointer to any available graph structure */ GraphPtr AnyGraph() const { if (in_csr_) { return in_csr_; } else if (out_csr_) { return out_csr_; } else { return coo_; } } // Store the in csr (i.e, the reverse csr) CSRPtr in_csr_; // Store the out csr (i.e, the normal csr) CSRPtr out_csr_; // Store the edge list indexed by edge id (COO) COOPtr coo_; // The name of shared memory for this graph. // If it's empty, the graph isn't stored in shared memory. std::string shared_mem_name_; // We serialize the metadata of the graph index here for shared memory. NDArray serialized_shared_meta_; // Whether or not the `is_unibipartite_` property has been set. mutable bool is_unibipartite_set_ = false; // Whether this graph is unibipartite. If `is_unibipartite_set_` is false, // then this flag should be considered in an unititialized state. mutable bool is_unibipartite_ = false; }; // inline implementations template CSR::CSR( int64_t num_vertices, int64_t num_edges, IndptrIter indptr_begin, IndicesIter indices_begin, EdgeIdIter edge_ids_begin) { // TODO(minjie): this should be changed to a device-agnostic implementation // in the future. adj_.num_rows = num_vertices; adj_.num_cols = num_vertices; adj_.indptr = aten::NewIdArray(num_vertices + 1); adj_.indices = aten::NewIdArray(num_edges); adj_.data = aten::NewIdArray(num_edges); dgl_id_t *indptr_data = static_cast(adj_.indptr->data); dgl_id_t *indices_data = static_cast(adj_.indices->data); dgl_id_t *edge_ids_data = static_cast(adj_.data->data); for (int64_t i = 0; i < num_vertices + 1; ++i) *(indptr_data++) = *(indptr_begin++); for (int64_t i = 0; i < num_edges; ++i) { *(indices_data++) = *(indices_begin++); *(edge_ids_data++) = *(edge_ids_begin++); } } } // namespace dgl namespace dmlc { DMLC_DECLARE_TRAITS(has_saveload, dgl::CSR, true); DMLC_DECLARE_TRAITS(has_saveload, dgl::ImmutableGraph, true); } // namespace dmlc #endif // DGL_IMMUTABLE_GRAPH_H_ ================================================ FILE: include/dgl/kernel.h ================================================ /** * Copyright (c) 2020 by Contributors * @file dgl/aten/kernel.h * @brief Sparse matrix operators. */ #ifndef DGL_KERNEL_H_ #define DGL_KERNEL_H_ #include #include #include #include "./base_heterograph.h" #include "./bcast.h" #include "array.h" namespace dgl { namespace aten { /** * @brief Generalized Sparse Matrix-Matrix Multiplication. * @param op The binary operator, could be `add`, `sub', `mul`, 'div', * `copy_u`, `copy_e'. * @param op The reduce operator, could be `sum`, `min`, `max'. * @param graph The graph we apply SpMM on. * @param ufeat The source node feature. * @param efeat The edge feature. * @param out The output feature on destination nodes. * @param out_aux A list of NDArray's that contains auxiliary information such * as the argmax on source nodes and edges for reduce operators such as * `min` and `max`. */ void SpMM( const std::string& op, const std::string& reduce, HeteroGraphPtr graph, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); /** * @brief Generalized Sampled Dense-Dense Matrix Multiplication. * @param op The binary operator, could be `add`, `sub', `mul`, 'div', * `dot`, `copy_u`, `copy_e'. * @param graph The graph we apply SpMM on. * @param ufeat The source node feature. * @param vfeat The destination node feature. * @param out The output feature on edge. */ void SDDMM( const std::string& op, HeteroGraphPtr graph, NDArray ufeat, NDArray efeat, NDArray out); /** * @brief Sparse-sparse matrix multiplication. * * The sparse matrices must have scalar weights (i.e. \a A_weights and \a * B_weights are 1D vectors.) */ std::pair CSRMM( CSRMatrix A, NDArray A_weights, CSRMatrix B, NDArray B_weights); /** * @brief Summing up a list of sparse matrices. * * The sparse matrices must have scalar weights (i.e. the arrays in \a A_weights * are 1D vectors.) */ std::pair CSRSum( const std::vector& A, const std::vector& A_weights); } // namespace aten } // namespace dgl #endif // DGL_KERNEL_H_ ================================================ FILE: include/dgl/lazy.h ================================================ /** * Copyright (c) 2019 by Contributors * @file dgl/lazy.h * @brief Lazy object that will be materialized only when being queried. */ #ifndef DGL_LAZY_H_ #define DGL_LAZY_H_ #include namespace dgl { /** * @brief Lazy object that will be materialized only when being queried. * * The object should be immutable -- no mutation once materialized. * The object is currently not threaad safe. */ template class Lazy { public: /** @brief default constructor to construct a lazy object */ Lazy() {} /** * @brief constructor to construct an object with given value (non-lazy case) */ explicit Lazy(const T& val) : ptr_(new T(val)) {} /** @brief destructor */ ~Lazy() = default; /** * @brief Get the value of this object. If the object has not been * instantiated, using the provided function to create it. * @param fn The creator function. * @return the object value. */ template const T& Get(Fn fn) { if (!ptr_) { ptr_.reset(new T(fn())); } return *ptr_; } private: /** @brief the internal data pointer */ std::shared_ptr ptr_{nullptr}; }; } // namespace dgl #endif // DGL_LAZY_H_ ================================================ FILE: include/dgl/nodeflow.h ================================================ /** * Copyright (c) 2019 by Contributors * @file dgl/nodeflow.h * @brief DGL NodeFlow class. */ #ifndef DGL_NODEFLOW_H_ #define DGL_NODEFLOW_H_ #include #include #include #include "./runtime/object.h" #include "graph_interface.h" namespace dgl { class ImmutableGraph; /** * @brief A NodeFlow graph stores the sampling results for a sampler that * samples nodes/edges in layers. * * We store multiple layers of the sampling results in a single graph, which * results in a more compact format. We store extra information, such as the * node and edge mapping from the NodeFlow graph to the parent graph. */ struct NodeFlowObject : public runtime::Object { /** @brief The graph. */ GraphPtr graph; /** * @brief the offsets of each layer. */ IdArray layer_offsets; /** * @brief the offsets of each flow. */ IdArray flow_offsets; /** * @brief The node mapping from the NodeFlow graph to the parent graph. */ IdArray node_mapping; /** * @brief The edge mapping from the NodeFlow graph to the parent graph. */ IdArray edge_mapping; static constexpr const char *_type_key = "graph.NodeFlow"; DGL_DECLARE_OBJECT_TYPE_INFO(NodeFlowObject, runtime::Object); }; // Define NodeFlow as the reference class of NodeFlowObject class NodeFlow : public runtime::ObjectRef { public: DGL_DEFINE_OBJECT_REF_METHODS(NodeFlow, runtime::ObjectRef, NodeFlowObject); /** @brief create a new nodeflow reference */ static NodeFlow Create() { return NodeFlow(std::make_shared()); } }; /** * @brief Get a slice on a graph that represents a NodeFlow. * * The entire block has to be taken as a slice. Users have to specify the * correct starting and ending location of a layer. * * If remap is false, the returned arrays can be viewed as a sub-matrix slice * of the adjmat of the input graph. Let the adjmat of the input graph be A, * then the slice is equal to (in numpy syntax): * A[layer1_start:layer1_end, layer0_start:layer0_end] * * If remap is true, the returned arrays represents an adjacency matrix * of shape NxM, where N is the number of nodes in layer1 and M is * the number of nodes in layer0. Nodes in layer0 will be remapped to * [0, M) and nodes in layer1 will be remapped to [0, N). * * A row of the returned adjacency matrix represents the destination * of an edge and the column represents the source. * * If fmt == "csr", the function returns three arrays: indptr, indices, eid. * If fmt == "coo", the function returns two arrays: idx, eid. Here, the idx * array is the concatenation of src and dst node id arrays. * * @param graph An immutable graph. * @param fmt the format of the returned adjacency matrix. * @param layer0_size the size of the first layer in the block. * @param layer1_start the location where the second layer starts. * @param layer1_end the location where the secnd layer ends. * @param remap Indicates to remap all vertex ids and edge Ids to local Id * space. * @return a vector of IdArrays. */ std::vector GetNodeFlowSlice( const ImmutableGraph &graph, const std::string &fmt, size_t layer0_size, size_t layer1_start, size_t layer1_end, bool remap); } // namespace dgl #endif // DGL_NODEFLOW_H_ ================================================ FILE: include/dgl/packed_func_ext.h ================================================ /** * Copyright (c) 2019 by Contributors * @file packed_func_ext.h * @brief Extension package to PackedFunc * This enables pass ObjectRef types into/from PackedFunc. */ #ifndef DGL_PACKED_FUNC_EXT_H_ #define DGL_PACKED_FUNC_EXT_H_ #include #include #include #include #include "./runtime/container.h" #include "./runtime/object.h" #include "./runtime/packed_func.h" namespace dgl { namespace runtime { /** * @brief Runtime type checker for node type. * @tparam T the type to be checked. */ template struct ObjectTypeChecker { static inline bool Check(Object* sptr) { // This is the only place in the project where RTTI is used // It can be turned off, but will make non strict checking. // TODO(tqchen) possibly find alternative to turn of RTTI using ContainerType = typename T::ContainerType; return sptr->derived_from(); } static inline void PrintName(std::ostringstream& os) { // NOLINT(*) using ContainerType = typename T::ContainerType; os << ContainerType::_type_key; } }; template struct ObjectTypeChecker > { static inline bool Check(Object* sptr) { if (sptr == nullptr) return false; if (!sptr->is_type()) return false; ListObject* n = static_cast(sptr); for (const auto& p : n->data) { if (!ObjectTypeChecker::Check(p.get())) return false; } return true; } static inline void PrintName(std::ostringstream& os) { // NOLINT(*) os << "list<"; ObjectTypeChecker::PrintName(os); os << ">"; } }; template struct ObjectTypeChecker > { static inline bool Check(Object* sptr) { if (sptr == nullptr) return false; if (!sptr->is_type()) return false; StrMapObject* n = static_cast(sptr); for (const auto& kv : n->data) { if (!ObjectTypeChecker::Check(kv.second.get())) return false; } return true; } static inline void PrintName(std::ostringstream& os) { // NOLINT(*) os << "map::PrintName(os); os << '>'; } }; template struct ObjectTypeChecker > { static inline bool Check(Object* sptr) { if (sptr == nullptr) return false; if (!sptr->is_type()) return false; MapObject* n = static_cast(sptr); for (const auto& kv : n->data) { if (!ObjectTypeChecker::Check(kv.first.get())) return false; if (!ObjectTypeChecker::Check(kv.second.get())) return false; } return true; } static inline void PrintName(std::ostringstream& os) { // NOLINT(*) os << "map<"; ObjectTypeChecker::PrintName(os); os << ','; ObjectTypeChecker::PrintName(os); os << '>'; } }; template inline std::string NodeTypeName() { std::ostringstream os; ObjectTypeChecker::PrintName(os); return os.str(); } // extensions for DGLArgValue template inline TObjectRef DGLArgValue::AsObjectRef() const { static_assert( std::is_base_of::value, "Conversion only works for ObjectRef derived class"); if (type_code_ == kNull) return TObjectRef(); DGL_CHECK_TYPE_CODE(type_code_, kObjectHandle); std::shared_ptr& sptr = *ptr >(); CHECK(ObjectTypeChecker::Check(sptr.get())) << "Expected type " << NodeTypeName() << " but get " << sptr->type_key(); return TObjectRef(sptr); } inline std::shared_ptr& DGLArgValue::obj_sptr() { DGL_CHECK_TYPE_CODE(type_code_, kObjectHandle); return *ptr >(); } template inline bool DGLArgValue::IsObjectType() const { DGL_CHECK_TYPE_CODE(type_code_, kObjectHandle); std::shared_ptr& sptr = *ptr >(); return ObjectTypeChecker::Check(sptr.get()); } // extensions for DGLRetValue inline DGLRetValue& DGLRetValue::operator=( const std::shared_ptr& other) { if (other.get() == nullptr) { SwitchToPOD(kNull); } else { SwitchToClass >(kObjectHandle, other); } return *this; } inline DGLRetValue& DGLRetValue::operator=(const ObjectRef& other) { if (!other.defined()) { SwitchToPOD(kNull); } else { SwitchToClass >(kObjectHandle, other.obj_); } return *this; } template inline TObjectRef DGLRetValue::AsObjectRef() const { static_assert( std::is_base_of::value, "Conversion only works for ObjectRef"); if (type_code_ == kNull) return TObjectRef(); DGL_CHECK_TYPE_CODE(type_code_, kObjectHandle); return TObjectRef(*ptr >()); } inline void DGLArgsSetter::operator()( size_t i, const ObjectRef& other) const { // NOLINT(*) if (other.defined()) { values_[i].v_handle = const_cast*>(&(other.obj_)); type_codes_[i] = kObjectHandle; } else { type_codes_[i] = kNull; } } } // namespace runtime } // namespace dgl #endif // DGL_PACKED_FUNC_EXT_H_ ================================================ FILE: include/dgl/random.h ================================================ /** * Copyright (c) 2017 by Contributors * @file dgl/random.h * @brief Random number generators */ #ifndef DGL_RANDOM_H_ #define DGL_RANDOM_H_ #include #include #include #include #include #include #include namespace dgl { namespace { // Get a unique integer ID representing this thread. inline uint32_t GetThreadId() { static int num_threads = 0; static std::mutex mutex; static thread_local int id = -1; if (id == -1) { std::lock_guard guard(mutex); id = num_threads; num_threads++; } return id; } }; // namespace /** * @brief Thread-local Random Number Generator class */ class RandomEngine { public: /** @brief Constructor with default seed */ RandomEngine() { std::random_device rd; SetSeed(rd()); } /** @brief Constructor with given seed */ explicit RandomEngine(uint64_t seed, uint64_t stream = GetThreadId()) { SetSeed(seed, stream); } /** @brief Get the thread-local random number generator instance */ static RandomEngine* ThreadLocal() { return dmlc::ThreadLocalStore::Get(); } /** * @brief Set the seed of this random number generator */ void SetSeed(uint64_t seed, uint64_t stream = GetThreadId()) { rng_.seed(seed, stream); } /** * @brief Generate an arbitrary random 32-bit integer. */ int32_t RandInt32() { return static_cast(rng_()); } /** * @brief Generate a uniform random integer in [0, upper) */ template T RandInt(T upper) { return RandInt(0, upper); } /** * @brief Generate a uniform random integer in [lower, upper) */ template T RandInt(T lower, T upper) { CHECK_LT(lower, upper); std::uniform_int_distribution dist(lower, upper - 1); return dist(rng_); } /** * @brief Generate a uniform random float in [0, 1) */ template T Uniform() { return Uniform(0., 1.); } /** * @brief Generate a uniform random float in [lower, upper) */ template T Uniform(T lower, T upper) { // Although the result is in [lower, upper), we allow lower == upper as in // www.cplusplus.com/reference/random/uniform_real_distribution/uniform_real_distribution/ CHECK_LE(lower, upper); std::uniform_real_distribution dist(lower, upper); return dist(rng_); } /** * @brief Pick a random integer between 0 to N-1 according to given * probabilities. * @tparam IdxType Return integer type. * @param prob Array of N unnormalized probability of each element. Must be * non-negative. * @return An integer randomly picked from 0 to N-1. */ template IdxType Choice(FloatArray prob); /** * @brief Pick random integers between 0 to N-1 according to given * probabilities * * If replace is false, the number of picked integers must not larger than N. * * @tparam IdxType Id type * @tparam FloatType Probability value type * @param num Number of integers to choose * @param prob Array of N unnormalized probability of each element. Must be * non-negative. * @param out The output buffer to write selected indices. * @param replace If true, choose with replacement. */ template void Choice(IdxType num, FloatArray prob, IdxType* out, bool replace = true); /** * @brief Pick random integers between 0 to N-1 according to given * probabilities * * If replace is false, the number of picked integers must not larger than N. * * @tparam IdxType Id type * @tparam FloatType Probability value type * @param num Number of integers to choose * @param prob Array of N unnormalized probability of each element. Must be * non-negative. * @param replace If true, choose with replacement. * @return Picked indices */ template IdArray Choice(IdxType num, FloatArray prob, bool replace = true) { const DGLDataType dtype{kDGLInt, sizeof(IdxType) * 8, 1}; IdArray ret = IdArray::Empty({num}, dtype, prob->ctx); Choice( num, prob, static_cast(ret->data), replace); return ret; } /** * @brief Pick random integers from population by uniform distribution. * * If replace is false, num must not be larger than population. * * @tparam IdxType Return integer type * @param num Number of integers to choose * @param population Total number of elements to choose from. * @param out The output buffer to write selected indices. * @param replace If true, choose with replacement. */ template void UniformChoice( IdxType num, IdxType population, IdxType* out, bool replace = true); /** * @brief Pick random integers from population by uniform distribution. * * If replace is false, num must not be larger than population. * * @tparam IdxType Return integer type * @param num Number of integers to choose * @param population Total number of elements to choose from. * @param replace If true, choose with replacement. * @return Picked indices */ template IdArray UniformChoice(IdxType num, IdxType population, bool replace = true) { const DGLDataType dtype{kDGLInt, sizeof(IdxType) * 8, 1}; // TODO(minjie): only CPU implementation right now IdArray ret = IdArray::Empty({num}, dtype, DGLContext{kDGLCPU, 0}); UniformChoice( num, population, static_cast(ret->data), replace); return ret; } /** * @brief Pick random integers with different probability for different * segments. * * For example, if split=[0, 4, 10] and bias=[1.5, 1], it means to pick some * integers from 0 to 9, which is divided into two segments. 0-3 are in the * first segment and the rest belongs to the second. The weight(bias) of each * candidate in the first segment is upweighted to 1.5. * * candidate | 0 1 2 3 | 4 5 6 7 8 9 | * split ^ ^ ^ * bias | 1.5 | 1 | * * * The complexity of this operator is O(k * log(T)) where k is the number of * integers we want to pick, and T is the number of segments. It is much * faster compared with assigning probability for each candidate, of which the * complexity is O(k * log(N)) where N is the number of all candidates. * * If replace is false, num must not be larger than population. * * @tparam IdxType Return integer type * @param num Number of integers to choose * @param split Array of T+1 split positions of different segments(including * start and end) * @param bias Array of T weight of each segments. * @param out The output buffer to write selected indices. * @param replace If true, choose with replacement. */ template void BiasedChoice( IdxType num, const IdxType* split, FloatArray bias, IdxType* out, bool replace = true); /** * @brief Pick random integers with different probability for different * segments. * * If replace is false, num must not be larger than population. * * @tparam IdxType Return integer type * @param num Number of integers to choose * @param split Split positions of different segments * @param bias Weights of different segments * @param replace If true, choose with replacement. */ template IdArray BiasedChoice( IdxType num, const IdxType* split, FloatArray bias, bool replace = true) { const DGLDataType dtype{kDGLInt, sizeof(IdxType) * 8, 1}; IdArray ret = IdArray::Empty({num}, dtype, DGLContext{kDGLCPU, 0}); BiasedChoice( num, split, bias, static_cast(ret->data), replace); return ret; } private: pcg32 rng_; }; }; // namespace dgl #endif // DGL_RANDOM_H_ ================================================ FILE: include/dgl/runtime/bfloat16.h ================================================ /** * Copyright (c) 2023 by Contributors * @file dgl/runtime/ndarray.h * @brief BFloat16 CPU header */ #ifndef DGL_RUNTIME_BFLOAT16_H_ #define DGL_RUNTIME_BFLOAT16_H_ #include class BFloat16 { uint16_t val; public: constexpr BFloat16() : val(0) {} // Disable lint "explicit" warning, since implicit usage on constructor is // expected. BFloat16(float f) { // NOLINT if (std::isnan(f)) { val = 0x7FC0; } else { union { uint16_t iraw16[2]; uint32_t iraw32; float f32; }; f32 = f; const uint32_t rounding_bias = 0x00007FFF + (iraw16[1] & 0x1); val = static_cast((iraw32 + rounding_bias) >> 16); } } static constexpr BFloat16 Min() { BFloat16 min; min.val = 0xFF80; return min; } static constexpr BFloat16 Max() { BFloat16 max; max.val = 0x7F80; return max; } BFloat16& operator-=(const float& rhs) { float lhs = (*this); (*this) = lhs - rhs; return *this; } BFloat16& operator+=(const float& rhs) { float lhs = (*this); (*this) = lhs + rhs; return *this; } operator float() const { union { float f; uint16_t raw[2]; }; raw[0] = 0; raw[1] = val; return f; } }; #endif // DGL_RUNTIME_BFLOAT16_H_ ================================================ FILE: include/dgl/runtime/c_backend_api.h ================================================ /** * Copyright (c) 2017 by Contributors * @file dgl/runtime/c_backend_api.h * @brief DGL runtime backend API. * * The functions defined in this header are intended to be * used by compiled dgl operators, usually user do not need to use these * function directly. */ #ifndef DGL_RUNTIME_C_BACKEND_API_H_ #define DGL_RUNTIME_C_BACKEND_API_H_ #include "c_runtime_api.h" #ifdef __cplusplus extern "C" { #endif // Backend related functions. /** * @brief Backend function for modules to get function * from its environment mod_node (its imports and global function). * The user do should not call DGLFuncFree on func. * * @param mod_node The module handle. * @param func_name The name of the function. * @param out The result function. * @return 0 when no error is thrown, -1 when failure happens */ DGL_DLL int DGLBackendGetFuncFromEnv( void* mod_node, const char* func_name, DGLFunctionHandle* out); /** * @brief Backend function to register system-wide library symbol. * * @param name The name of the symbol * @param ptr The symbol address. * @return 0 when no error is thrown, -1 when failure happens */ DGL_DLL int DGLBackendRegisterSystemLibSymbol(const char* name, void* ptr); /** * @brief Backend function to allocate temporal workspace. * * @note The result allocate spaced is ensured to be aligned to * kTempAllocaAlignment. * * @param nbytes The size of the space requested. * @param device_type The device type which the space will be allocated. * @param device_id The device id which the space will be allocated. * @param dtype_code_hint The type code of the array elements. Only used in * certain backends such as OpenGL. * @param dtype_bits_hint The type bits of the array elements. Only used in * certain backends such as OpenGL. * @return nullptr when error is thrown, a valid ptr if success */ DGL_DLL void* DGLBackendAllocWorkspace( int device_type, int device_id, uint64_t nbytes, int dtype_code_hint, int dtype_bits_hint); /** * @brief Backend function to free temporal workspace. * * @param ptr The result allocated space pointer. * @param device_type The device type which the space will be allocated. * @param device_id The device id which the space will be allocated. * @return 0 when no error is thrown, -1 when failure happens * * @sa DGLBackendAllocWorkspace */ DGL_DLL int DGLBackendFreeWorkspace(int device_type, int device_id, void* ptr); /** * @brief Environment for DGL parallel task. */ typedef struct { /** * @brief Auxiliary used for synchronization */ void* sync_handle; /** @brief total amount of task */ int32_t num_task; } DGLParallelGroupEnv; /** * @brief The callback function to execute a parallel lambda * @param task_id the task id of the function. * @param penv The parallel environment backs the execution. * @param cdata The supporting closure data. */ typedef int (*FDGLParallelLambda)( int task_id, DGLParallelGroupEnv* penv, void* cdata); /** * @brief Backend function for running parallel jobs. * * @param flambda The parallel function to be launched. * @param cdata The closure data. * @param num_task Number of tasks to launch, can be 0, means launch * with all available threads. * * @return 0 when no error is thrown, -1 when failure happens */ DGL_DLL int DGLBackendParallelLaunch( FDGLParallelLambda flambda, void* cdata, int num_task); /** * @brief BSP barrrier between parallel threads * @param task_id the task id of the function. * @param penv The parallel environment backs the execution. * @return 0 when no error is thrown, -1 when failure happens */ DGL_DLL int DGLBackendParallelBarrier(int task_id, DGLParallelGroupEnv* penv); /** * @brief Simple static initialization fucntion. * Run f once and set handle to be not null. * This function is mainly used for test purpose. * * @param handle An global address to indicate f * @param f The function to be ran * @param cdata The closure data to pass to the function. * @param nbytes Number of bytes in the closure data. * @return 0 when no error is thrown, -1 when failure happens */ DGL_DLL int DGLBackendRunOnce( void** handle, int (*f)(void*), void* cdata, int nbytes); #ifdef __cplusplus } // DGL_EXTERN_C #endif #endif // DGL_RUNTIME_C_BACKEND_API_H_ ================================================ FILE: include/dgl/runtime/c_object_api.h ================================================ /** * Copyright (c) 2019 by Contributors * @file dgl/runtime/c_object_api.h * * @brief DGL Object C API, used to extend and prototype new CAPIs. * * @note Most API functions are registerd as PackedFunc and * can be grabbed via DGLFuncGetGlobal */ #ifndef DGL_RUNTIME_C_OBJECT_API_H_ #define DGL_RUNTIME_C_OBJECT_API_H_ #include "./c_runtime_api.h" #ifdef __cplusplus extern "C" { #endif /** @brief handle to object */ typedef void* ObjectHandle; /** * @brief free the object handle * @param handle The object handle to be freed. * @return 0 when success, -1 when failure happens */ DGL_DLL int DGLObjectFree(ObjectHandle handle); /** * @brief Convert type key to type index. * @param type_key The key of the type. * @param out_index the corresponding type index. * @return 0 when success, -1 when failure happens */ DGL_DLL int DGLObjectTypeKey2Index(const char* type_key, int* out_index); /** * @brief Get runtime type index of the object. * @param handle the object handle. * @param out_index the corresponding type index. * @return 0 when success, -1 when failure happens */ DGL_DLL int DGLObjectGetTypeIndex(ObjectHandle handle, int* out_index); /** * @brief get attributes given key * @param handle The object handle * @param key The attribute name * @param out_value The attribute value * @param out_type_code The type code of the attribute. * @param out_success Whether get is successful. * @return 0 when success, -1 when failure happens * @note API calls always exchanges with type bits=64, lanes=1 */ DGL_DLL int DGLObjectGetAttr( ObjectHandle handle, const char* key, DGLValue* out_value, int* out_type_code, int* out_success); /** * @brief get attributes names in the object. * @param handle The object handle * @param out_size The number of functions * @param out_array The array of function names. * @return 0 when success, -1 when failure happens */ DGL_DLL int DGLObjectListAttrNames( ObjectHandle handle, int* out_size, const char*** out_array); #ifdef __cplusplus } // DGL_EXTERN_C #endif #endif // DGL_RUNTIME_C_OBJECT_API_H_ ================================================ FILE: include/dgl/runtime/c_runtime_api.h ================================================ /** * Copyright (c) 2016-2022 by Contributors * @file dgl/runtime/c_runtime_api.h * @brief DGL runtime library. * * This runtime is adapted from TVM project (commit: 2ce5277) */ #ifndef DGL_RUNTIME_C_RUNTIME_API_H_ #define DGL_RUNTIME_C_RUNTIME_API_H_ // Macros to do weak linking #ifdef _MSC_VER #define DGL_WEAK __declspec(selectany) #else #define DGL_WEAK __attribute__((weak)) #endif #ifdef __EMSCRIPTEN__ #include #define DGL_DLL EMSCRIPTEN_KEEPALIVE #endif #ifndef DGL_DLL #ifdef _WIN32 #ifdef DGL_EXPORTS #define DGL_DLL __declspec(dllexport) #else #define DGL_DLL __declspec(dllimport) #endif #else #define DGL_DLL #endif #endif // DGL version #define DGL_VERSION "2.5" #ifdef __cplusplus extern "C" { #endif #include #include /** @brief type of array index. */ typedef int64_t dgl_index_t; /** * @brief The device type in DGLContext. */ #ifdef __cplusplus typedef enum : int32_t { #else typedef enum { #endif /** @brief CPU device */ kDGLCPU = 1, /** @brief CUDA GPU device */ kDGLCUDA = 2, // add more devices once supported } DGLDeviceType; /** * @brief The object type code is used in DGL FFI to indicate the types of * objects passed between C and Python. */ typedef enum { kObjectInt = 0U, kObjectUInt = 1U, kObjectFloat = 2U, kHandle = 3U, kNull = 4U, kDGLDataType = 5U, kDGLContext = 6U, kArrayHandle = 7U, kObjectHandle = 8U, kModuleHandle = 9U, kFuncHandle = 10U, kStr = 11U, kBytes = 12U, kNDArrayContainer = 13U, // Extension codes for other frameworks to integrate DGL PackedFunc. // To make sure each framework's id do not conflict, use first and // last sections to mark ranges. // Open an issue at the repo if you need a section of code. kExtBegin = 15U, kNNVMFirst = 16U, kNNVMLast = 20U, // The following section of code is used for non-reserved types. kExtReserveEnd = 64U, kExtEnd = 128U } DGLObjectTypeCode; /** * @brief The type code options DGLDataType. */ typedef enum { /** @brief signed integer */ kDGLInt = 0U, /** @brief unsigned integer */ kDGLUInt = 1U, /** @brief IEEE floating point */ kDGLFloat = 2U, /** @brief bfloat16 */ kDGLBfloat = 4U, // add more data types if we are going to support them } DGLDataTypeCode; /** * @brief The data type the tensor can hold. The data type is assumed to follow * the native endian-ness. An explicit error message should be raised when * attempting to export an array with non-native endianness * * Examples * - float: type_code = 2, bits = 32, lanes=1 * - float4(vectorized 4 float): type_code = 2, bits = 32, lanes=4 * - int8: type_code = 0, bits = 8, lanes=1 */ typedef struct { /** * @brief Type code of base types. * We keep it uint8_t instead of DGLDataTypeCode for minimal memory * footprint, but the value should be one of DGLDataTypeCode enum values. * */ uint8_t code; /** * @brief Number of bits, common choices are 8, 16, 32. */ uint8_t bits; /** @brief Number of lanes in the type, used for vector types. */ uint16_t lanes; } DGLDataType; /** * @brief The Device information, abstract away common device types. */ typedef struct { /** @brief The device type used in the device. */ DGLDeviceType device_type; /** * @brief The device index. * For vanilla CPU memory, pinned memory, or managed memory, this is set to 0. */ int32_t device_id; } DGLContext; /** * @brief The tensor array stucture to DGL API. * The structure is heavily inspired by DLTensor from DLPack. */ typedef struct { /** * @brief The data pointer points to the allocated data. * * Depending on the device context, it can be a CPU pointer, or a CUDA * device pointer or acl_mem handle in OpenCL. * This pointer is always aligned to 256 bytes as in CUDA. Use the * `byte_offset` field to mark the beginning of the actual data (if the * address is not 256 byte aligned). * * Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow, * TVM, perhaps others) do not adhere to this 256 byte alignment requirement * on CPU/CUDA/ROCm, and always use `byte_offset=0`. This is likely to be * fixed in the future; at the moment it is recommended * to not rely on the data pointer being correctly aligned. * * For a DGLArray, the size of memory required to store the contents of * data can be calculated as follows: * * @code{.c} * static inline size_t GetDataSize(const DGLArray* t) { * size_t size = 1; * for (int32_t i = 0; i < t->ndim; ++i) { * size *= t->shape[i]; * } * size *= (t->dtype.bits * t->dtype.lanes + 7) / 8; * return size; * } * @endcode */ void* data; /** @brief The device of the tensor */ DGLContext ctx; /** @brief Number of dimensions */ int32_t ndim; /** @brief The data type of the pointer*/ DGLDataType dtype; /** @brief The shape of the tensor */ int64_t* shape; /** * @brief strides of the tensor (in number of elements, not bytes) * can be NULL, indicating tensor is compact and row-majored. */ int64_t* strides; /** @brief The offset in bytes to the beginning pointer to data */ uint64_t byte_offset; } DGLArray; /** @brief the array handle */ typedef DGLArray* DGLArrayHandle; /** * @brief Union type of values * being passed through API and function calls. */ typedef union { int64_t v_int64; double v_float64; void* v_handle; const char* v_str; DGLDataType v_type; DGLContext v_ctx; } DGLValue; /** * @brief Byte array type used to pass in byte array * When kBytes is used as data type. */ typedef struct { const char* data; size_t size; } DGLByteArray; /** @brief Handle to DGL runtime modules. */ typedef void* DGLModuleHandle; /** @brief Handle to packed function handle. */ typedef void* DGLFunctionHandle; /** @brief Handle to hold return value. */ typedef void* DGLRetValueHandle; /** * @brief The stream that is specific to device * can be NULL, which indicates the default one. */ typedef void* DGLStreamHandle; /** * @brief Used for implementing C API function. * Set last error message before return. * @param msg The error message to be set. */ DGL_DLL void DGLAPISetLastError(const char* msg); /** * @brief return str message of the last error * all function in this file will return 0 when success * and -1 when an error occured, * DGLGetLastError can be called to retrieve the error * * this function is threadsafe and can be called by different thread * * @return error info */ DGL_DLL const char* DGLGetLastError(void); /** * @brief Load module from file. * @param file_name The file name to load the module from. * @param format The format of the module. * @param out The result module * * @return 0 when success, -1 when failure happens * @note The resulting module do not contain import relation. * It can be reconstructed by DGLModImport. */ DGL_DLL int DGLModLoadFromFile( const char* file_name, const char* format, DGLModuleHandle* out); /** * @brief Add dep to mod's dependency. * This allows functions in this module to use modules. * * @param mod The module handle. * @param dep The dependent module to be imported. * @return 0 when success, -1 when failure happens */ DGL_DLL int DGLModImport(DGLModuleHandle mod, DGLModuleHandle dep); /** * @brief Get function from the module. * @param mod The module handle. * @param func_name The name of the function. * @param query_imports Whether to query imported modules * @param out The result function, can be NULL if it is not available. * @return 0 when no error is thrown, -1 when failure happens */ DGL_DLL int DGLModGetFunction( DGLModuleHandle mod, const char* func_name, int query_imports, DGLFunctionHandle* out); /** * @brief Free front-end extension type resource. * @param handle The extension handle. * @param type_code The type of of the extension type. * @return 0 when success, -1 when failure happens */ DGL_DLL int DGLExtTypeFree(void* handle, int type_code); /** * @brief Free the Module * @param mod The module to be freed. * * @note This may not free up the module's resources. * If there is active DGLFunctionHandle uses the module * Or if this module is imported by another active module. * * The all functions remains valid until DGLFuncFree is called. * @return 0 when success, -1 when failure happens */ DGL_DLL int DGLModFree(DGLModuleHandle mod); /** * @brief Free the function when it is no longer needed. * @param func The function handle * @return 0 when success, -1 when failure happens */ DGL_DLL int DGLFuncFree(DGLFunctionHandle func); /** * @brief Call a Packed DGL Function. * * @param func node handle of the function. * @param arg_values The arguments * @param type_codes The type codes of the arguments * @param num_args Number of arguments. * * @param ret_val The return value. * @param ret_type_code the type code of return value. * * @return 0 when success, -1 when failure happens * @note DGL calls always exchanges with type bits=64, lanes=1 * * @note API calls always exchanges with type bits=64, lanes=1 * If API call returns container handles (e.g. FunctionHandle) * these handles should be managed by the front-end. * The front-end need to call free function (e.g. DGLFuncFree) * to free these handles. */ DGL_DLL int DGLFuncCall( DGLFunctionHandle func, DGLValue* arg_values, int* type_codes, int num_args, DGLValue* ret_val, int* ret_type_code); /** * @brief Set the return value of DGLPackedCFunc. * * This function is called by DGLPackedCFunc to set the return value. * When this function is not called, the function returns null by default. * * @param ret The return value handle, pass by ret in DGLPackedCFunc * @param value The value to be returned. * @param type_code The type of the value to be returned. * @param num_ret Number of return values, for now only 1 is supported. */ DGL_DLL int DGLCFuncSetReturn( DGLRetValueHandle ret, DGLValue* value, int* type_code, int num_ret); /** * @brief Inplace translate callback argument value to return value. * This is only needed for non-POD arguments. * * @param value The value to be translated. * @param code The type code to be translated. * @note This function will do a shallow copy when necessary. * * @return 0 when success, -1 when failure happens. */ DGL_DLL int DGLCbArgToReturn(DGLValue* value, int code); /** * @brief C type of packed function. * * @param args The arguments * @param type_codes The type codes of the arguments * @param num_args Number of arguments. * @param ret The return value handle. * @param resource_handle The handle additional resouce handle from fron-end. * @return 0 if success, -1 if failure happens, set error via * DGLAPISetLastError. * @sa DGLCFuncSetReturn */ typedef int (*DGLPackedCFunc)( DGLValue* args, int* type_codes, int num_args, DGLRetValueHandle ret, void* resource_handle); /** * @brief C callback to free the resource handle in C packed function. * @param resource_handle The handle additional resouce handle from fron-end. */ typedef void (*DGLPackedCFuncFinalizer)(void* resource_handle); /** * @brief Signature for extension function declarer. * * DGL call this function to get the extension functions * The declarer will call register_func to register function and their name. * * @param register_func_handle The register function * @return 0 if success, -1 if failure happens */ typedef int (*DGLExtensionFuncDeclarer)(DGLFunctionHandle register_func_handle); /** * @brief Wrap a DGLPackedCFunc to become a FunctionHandle. * * The resource_handle will be managed by DGL API, until the function is no * longer used. * * @param func The packed C function. * @param resource_handle The resource handle from front-end, can be NULL. * @param fin The finalizer on resource handle when the FunctionHandle get * freed, can be NULL. * @param out the result function handle. * @return 0 when success, -1 when failure happens. */ DGL_DLL int DGLFuncCreateFromCFunc( DGLPackedCFunc func, void* resource_handle, DGLPackedCFuncFinalizer fin, DGLFunctionHandle* out); /** * @brief Register the function to runtime's global table. * * The registered function then can be pulled by the backend by the name. * * @param name The name of the function. * @param f The function to be registered. * @param override Whether allow override already registered function. */ DGL_DLL int DGLFuncRegisterGlobal( const char* name, DGLFunctionHandle f, int override); /** * @brief Get a global function. * * @param name The name of the function. * @param out the result function pointer, NULL if it does not exist. * * @note The function handle of global function is managed by DGL runtime, * So DGLFuncFree is should not be called when it get deleted. */ DGL_DLL int DGLFuncGetGlobal(const char* name, DGLFunctionHandle* out); /** * @brief List all the globally registered function name * @param out_size The number of functions * @param out_array The array of function names. * @return 0 when success, -1 when failure happens */ DGL_DLL int DGLFuncListGlobalNames(int* out_size, const char*** out_array); // Array related apis for quick proptyping /** * @brief Allocate a nd-array's memory, * including space of shape, of given spec. * * @param shape The shape of the array, the data content will be copied to out * @param ndim The number of dimension of the array. * @param dtype_code The type code of the dtype * @param dtype_bits The number of bits of dtype * @param dtype_lanes The number of lanes in the dtype. * @param device_type The device type of context * @param device_id The device id of context. * @param out The output handle. * @return 0 when success, -1 when failure happens */ DGL_DLL int DGLArrayAlloc( const dgl_index_t* shape, int ndim, int dtype_code, int dtype_bits, int dtype_lanes, int device_type, int device_id, DGLArrayHandle* out); /** * @brief Allocate a nd-array's with shared memory, * including space of shape, of given spec. * * @param the name of the shared memory * @param shape The shape of the array, the data content will be copied to out * @param ndim The number of dimension of the array. * @param dtype_code The type code of the dtype * @param dtype_bits The number of bits of dtype * @param dtype_lanes The number of lanes in the dtype. * @param is_create whether the shared memory is created * @param out The output handle. * @return 0 when success, -1 when failure happens */ int DGLArrayAllocSharedMem( const char* mem_name, const dgl_index_t* shape, int ndim, int dtype_code, int dtype_bits, int dtype_lanes, bool is_create, DGLArrayHandle* out); /** * @brief Free the DGL Array. * @param handle The array handle to be freed. * @return 0 when success, -1 when failure happens */ DGL_DLL int DGLArrayFree(DGLArrayHandle handle); /** * @brief Copy array data from CPU byte array. * @param handle The array handle. * @param data the data pointer * @param nbytes The number of bytes to copy. * @return 0 when success, -1 when failure happens */ DGL_DLL int DGLArrayCopyFromBytes( DGLArrayHandle handle, void* data, size_t nbytes); /** * @brief Copy array data to CPU byte array. * @param handle The array handle. * @param data the data pointer * @param nbytes The number of bytes to copy. * @return 0 when success, -1 when failure happens */ DGL_DLL int DGLArrayCopyToBytes( DGLArrayHandle handle, void* data, size_t nbytes); /** * @brief Copy the array, both from and to must be valid during the copy. * @param from The array to be copied from. * @param to The target space. * @return 0 when success, -1 when failure happens */ DGL_DLL int DGLArrayCopyFromTo(DGLArrayHandle from, DGLArrayHandle to); /** * @brief Create a new runtime stream. * * @param device_type The device type of context * @param device_id The device id of context * @param out The new stream handle * @return 0 when success, -1 when failure happens */ DGL_DLL int DGLStreamCreate( int device_type, int device_id, DGLStreamHandle* out); /** * @brief Free a created stream handle. * * @param device_type The device type of context * @param device_id The device id of context * @param stream The stream to be freed * @return 0 when success, -1 when failure happens */ DGL_DLL int DGLStreamFree( int device_type, int device_id, DGLStreamHandle stream); /** * @brief Set the runtime stream of current thread to be stream. * The subsequent calls to the same device_type * will use the setted stream handle. * The specific type of stream is runtime device dependent. * * @param device_type The device type of context * @param device_id The device id of context. * @param handle The stream handle. * @return 0 when success, -1 when failure happens */ DGL_DLL int DGLSetStream( int device_type, int device_id, DGLStreamHandle handle); /** * @brief Get the runtime stream of current thread. * * @param device_type The device type of context * @param device_id The device id of context. * @param handle The stream handle. * @return 0 when success, -1 when failure happens */ DGL_DLL int DGLGetStream( int device_type, int device_id, DGLStreamHandle* handle); /** * @brief Wait until all computations on stream completes. * * @param device_type The device type of context * @param device_id The device id of context. * @param stream The stream to be synchronized. * @return 0 when success, -1 when failure happens */ DGL_DLL int DGLSynchronize( int device_type, int device_id, DGLStreamHandle stream); /** * @brief Synchronize two streams of execution. * * @param device_type The device type of context * @param device_id The device id of context * @param src The source stream to synchronize. * @param dst The destination stream to synchronize. * @return 0 when success, -1 when failure happens */ DGL_DLL int DGLStreamStreamSynchronize( int device_type, int device_id, DGLStreamHandle src, DGLStreamHandle dst); /** * @brief Load tensor adapter. * @return 0 when success, -1 when failure happens. */ DGL_DLL int DGLLoadTensorAdapter(const char* path); /** * @brief Pin host memory. */ int DGLArrayPinData(DGLArrayHandle handle, DGLContext ctx); /** * @brief Unpin host memory. */ int DGLArrayUnpinData(DGLArrayHandle handle, DGLContext ctx); /** * @brief Record the stream that's using this tensor. */ int DGLArrayRecordStream(DGLArrayHandle handle, DGLStreamHandle stream); /** * @brief Bug report macro. * * This serves as a sanity check on system side to make sure the code is correct * by checking whether a condition always holds for complex reasons. Failing * the condition signifies a system bug instead of users giving invalid inputs * or using the functionality incorrectly. * * Hints the user to file a bug report if the condition fails. */ #define BUG_IF_FAIL(cond) \ CHECK(cond) \ << "A bug has been occurred. " \ "Please file a bug report at https://github.com/dmlc/dgl/issues. " \ "Message: " #ifdef __cplusplus } // DGL_EXTERN_C #endif #endif // DGL_RUNTIME_C_RUNTIME_API_H_ ================================================ FILE: include/dgl/runtime/config.h ================================================ /** * Copyright (c) 2019 by Contributors * @file runtime/config.h * @brief DGL runtime config */ #ifndef DGL_RUNTIME_CONFIG_H_ #define DGL_RUNTIME_CONFIG_H_ namespace dgl { namespace runtime { class Config { public: static Config* Global() { static Config config; return &config; } // Enabling or disable use libxsmm for Spmm void EnableLibxsmm(bool); bool IsLibxsmmAvailable() const; private: Config(); bool libxsmm_; }; } // namespace runtime } // namespace dgl #endif // DGL_RUNTIME_CONFIG_H_ ================================================ FILE: include/dgl/runtime/container.h ================================================ /** * Copyright (c) 2019 by Contributors * @file runtime/container.h * @brief Defines the container object data structures. */ #ifndef DGL_RUNTIME_CONTAINER_H_ #define DGL_RUNTIME_CONTAINER_H_ #include #include #include #include #include #include "object.h" #include "packed_func.h" namespace dgl { namespace runtime { /** * @brief value object. * * It is typically used to wrap a non-Object type to Object type. * Any type that is supported by DGLRetValue is supported by this. */ class ValueObject : public Object { public: /** @brief the value data */ DGLRetValue data; static constexpr const char* _type_key = "Value"; DGL_DECLARE_OBJECT_TYPE_INFO(ValueObject, Object); }; /** @brief Construct a value object. */ template inline std::shared_ptr MakeValue(T&& val) { auto obj = std::make_shared(); obj->data = val; return obj; } /** @brief Vallue reference type */ class Value : public ObjectRef { public: Value() {} explicit Value(std::shared_ptr o) : ObjectRef(o) {} const ValueObject* operator->() const { return static_cast(obj_.get()); } using ContainerType = ValueObject; }; /** @brief list obj content in list */ class ListObject : public Object { public: /** @brief the data content */ std::vector > data; void VisitAttrs(AttrVisitor* visitor) final { // Visitor to list have no effect. } static constexpr const char* _type_key = "List"; DGL_DECLARE_OBJECT_TYPE_INFO(ListObject, Object); }; /** @brief map obj content */ class MapObject : public Object { public: void VisitAttrs(AttrVisitor* visitor) final { // Visitor to map have no effect. } // hash function struct Hash { size_t operator()(const std::shared_ptr& n) const { return std::hash()(n.get()); } }; // comparator struct Equal { bool operator()( const std::shared_ptr& a, const std::shared_ptr& b) const { return a.get() == b.get(); } }; /** @brief The corresponding conatiner type */ using ContainerType = std::unordered_map< std::shared_ptr, std::shared_ptr, Hash, Equal>; /** @brief the data content */ ContainerType data; static constexpr const char* _type_key = "Map"; DGL_DECLARE_OBJECT_TYPE_INFO(MapObject, Object); }; /** @brief specialized map obj with string as key */ class StrMapObject : public Object { public: void VisitAttrs(AttrVisitor* visitor) final { // Visitor to map have no effect. } /** @brief The corresponding conatiner type */ using ContainerType = std::unordered_map >; /** @brief the data content */ ContainerType data; static constexpr const char* _type_key = "StrMap"; DGL_DECLARE_OBJECT_TYPE_INFO(StrMapObject, Object); }; /** * @brief iterator adapter that adapts TIter to return another type. * @tparam Converter a struct that contains converting function * @tparam TIter the content iterator type. */ template class IterAdapter { public: explicit IterAdapter(TIter iter) : iter_(iter) {} inline IterAdapter& operator++() { // NOLINT(*) ++iter_; return *this; } inline IterAdapter& operator++(int) { // NOLINT(*) ++iter_; return *this; } inline IterAdapter operator+(int offset) const { // NOLINT(*) return IterAdapter(iter_ + offset); } inline bool operator==(IterAdapter other) const { return iter_ == other.iter_; } inline bool operator!=(IterAdapter other) const { return !(*this == other); } inline const typename Converter::ResultType operator*() const { return Converter::convert(*iter_); } private: TIter iter_; }; /** * @brief List container of ObjectRef. * * List implements copy on write semantics, which means list is mutable * but copy will happen when list is referenced in more than two places. * * That is said when using this container for runtime arguments or return * values, try use the constructor to create the list at once (for example * from an existing vector). * * operator[] only provide const access, use Set to mutate the content. * * @tparam T The content ObjectRef type. * * @note The element type must subclass \c ObjectRef. Otherwise, the * compiler would throw an error: * * * error: no type named 'type' in 'struct std::enable_if' * * * Example: * * * // List list; // fails * // List list2; // fails * List list; // works * list.push_back(Value(MakeValue(1))); // works * list.push_back(Value(MakeValue(NDArray::Empty(shape, dtype, ctx)))); // * works * */ template < typename T, typename = typename std::enable_if::value>::type> class List : public ObjectRef { public: /** * @brief default constructor */ List() { obj_ = std::make_shared(); } /** * @brief move constructor * @param other source */ List(List&& other) { // NOLINT(*) obj_ = std::move(other.obj_); } /** * @brief copy constructor * @param other source */ List(const List& other) : ObjectRef(other.obj_) { // NOLINT(*) } /** * @brief constructor from pointer * @param n the container pointer */ explicit List(std::shared_ptr n) : ObjectRef(n) {} /** * @brief constructor from iterator * @param begin begin of iterator * @param end end of iterator * @tparam IterType The type of iterator */ template List(IterType begin, IterType end) { assign(begin, end); } /** * @brief constructor from initializer list * @param init The initalizer list */ List(std::initializer_list init) { // NOLINT(*) assign(init.begin(), init.end()); } /** * @brief constructor from vector * @param init The vector */ List(const std::vector& init) { // NOLINT(*) assign(init.begin(), init.end()); } /** * @brief Constructs a container with n elements. Each element is a copy of * val * @param n The size of the container * @param val The init value */ explicit List(size_t n, const T& val) { auto tmp_obj = std::make_shared(); for (size_t i = 0; i < n; ++i) { tmp_obj->data.push_back(val.obj_); } obj_ = std::move(tmp_obj); } /** * @brief move assign operator * @param other The source of assignment * @return reference to self. */ List& operator=(List&& other) { obj_ = std::move(other.obj_); return *this; } /** * @brief copy assign operator * @param other The source of assignment * @return reference to self. */ List& operator=(const List& other) { obj_ = other.obj_; return *this; } /** * @brief reset the list to content from iterator. * @param begin begin of iterator * @param end end of iterator * @tparam IterType The type of iterator */ template void assign(IterType begin, IterType end) { auto n = std::make_shared(); for (IterType it = begin; it != end; ++it) { n->data.push_back((*it).obj_); } obj_ = std::move(n); } /** * @brief Read i-th element from list. * @param i The index * @return the i-th element. */ inline const T operator[](size_t i) const { return T(static_cast(obj_.get())->data[i]); } /** @return The size of the list */ inline size_t size() const { if (obj_.get() == nullptr) return 0; return static_cast(obj_.get())->data.size(); } /** * @brief copy on write semantics * Do nothing if current handle is the unique copy of the list. * Otherwise make a new copy of the list to ensure the current handle * hold a unique copy. * * @return Handle to the internal obj container(which ganrantees to be unique) */ inline ListObject* CopyOnWrite() { if (obj_.get() == nullptr || !obj_.unique()) { obj_ = std::make_shared( *static_cast(obj_.get())); } return static_cast(obj_.get()); } /** * @brief push a new item to the back of the list * @param item The item to be pushed. */ inline void push_back(const T& item) { ListObject* n = this->CopyOnWrite(); n->data.push_back(item.obj_); } /** * @brief set i-th element of the list. * @param i The index * @param value The value to be setted. */ inline void Set(size_t i, const T& value) { ListObject* n = this->CopyOnWrite(); n->data[i] = value.obj_; } /** @return whether list is empty */ inline bool empty() const { return size() == 0; } /** @brief Copy the content to a vector */ inline std::vector ToVector() const { return std::vector(begin(), end()); } /** @brief specify container obj */ using ContainerType = ListObject; struct Ptr2ObjectRef { using ResultType = T; static inline T convert(const std::shared_ptr& n) { return T(n); } }; using iterator = IterAdapter< Ptr2ObjectRef, std::vector >::const_iterator>; using reverse_iterator = IterAdapter< Ptr2ObjectRef, std::vector >::const_reverse_iterator>; /** @return begin iterator */ inline iterator begin() const { return iterator(static_cast(obj_.get())->data.begin()); } /** @return end iterator */ inline iterator end() const { return iterator(static_cast(obj_.get())->data.end()); } /** @return rbegin iterator */ inline reverse_iterator rbegin() const { return reverse_iterator( static_cast(obj_.get())->data.rbegin()); } /** @return rend iterator */ inline reverse_iterator rend() const { return reverse_iterator( static_cast(obj_.get())->data.rend()); } }; /** * @brief Map container of ObjectRef->ObjectRef. * * Map implements copy on write semantics, which means map is mutable * but copy will happen when list is referenced in more than two places. * * That is said when using this container for runtime arguments or return * values, try use the constructor to create it at once (for example * from an existing std::map). * * operator[] only provide const acces, use Set to mutate the content. * * @tparam K The key ObjectRef type. * @tparam V The value ObjectRef type. * * @note The element type must subclass \c ObjectRef. Otherwise, the * compiler would throw an error: * * * error: no type named 'type' in 'struct std::enable_if' * * * Example: * * * // Map map; // fails * // Map map2; // fails * Map map; // works * map.Set("key1", Value(MakeValue(1))); // works * map.Set("key2", Value(MakeValue(NDArray::Empty(shape, dtype, ctx)))); // * works * */ template < typename K, typename V, typename = typename std::enable_if< std::is_base_of::value || std::is_base_of::value>::type, typename = typename std::enable_if::value>::type> class Map : public ObjectRef { public: /** * @brief default constructor */ Map() { obj_ = std::make_shared(); } /** * @brief move constructor * @param other source */ Map(Map&& other) { // NOLINT(*) obj_ = std::move(other.obj_); } /** * @brief copy constructor * @param other source */ Map(const Map& other) : ObjectRef(other.obj_) { // NOLINT(*) } /** * @brief constructor from pointer * @param n the container pointer */ explicit Map(std::shared_ptr n) : ObjectRef(n) {} /** * @brief constructor from iterator * @param begin begin of iterator * @param end end of iterator * @tparam IterType The type of iterator */ template Map(IterType begin, IterType end) { assign(begin, end); } /** * @brief constructor from initializer list * @param init The initalizer list */ Map(std::initializer_list > init) { // NOLINT(*) assign(init.begin(), init.end()); } /** * @brief constructor from vector * @param init The vector */ template Map(const std::unordered_map& init) { // NOLINT(*) assign(init.begin(), init.end()); } /** * @brief move assign operator * @param other The source of assignment * @return reference to self. */ Map& operator=(Map&& other) { obj_ = std::move(other.obj_); return *this; } /** * @brief copy assign operator * @param other The source of assignment * @return reference to self. */ Map& operator=(const Map& other) { obj_ = other.obj_; return *this; } /** * @brief reset the list to content from iterator. * @param begin begin of iterator * @param end end of iterator * @tparam IterType The type of iterator */ template void assign(IterType begin, IterType end) { auto n = std::shared_ptr(); for (IterType i = begin; i != end; ++i) { n->data.emplace(std::make_pair(i->first.obj_, i->second.obj_)); } obj_ = std::move(n); } /** * @brief Read element from map. * @param key The key * @return the corresonding element. */ inline const V operator[](const K& key) const { return V(static_cast(obj_.get())->data.at(key.obj_)); } /** * @brief Read element from map. * @param key The key * @return the corresonding element. */ inline const V at(const K& key) const { return V(static_cast(obj_.get())->data.at(key.obj_)); } /** @return The size of the list */ inline size_t size() const { if (obj_.get() == nullptr) return 0; return static_cast(obj_.get())->data.size(); } /** @return The size of the list */ inline size_t count(const K& key) const { if (obj_.get() == nullptr) return 0; return static_cast(obj_.get())->data.count(key.obj_); } /** * @brief copy on write semantics * Do nothing if current handle is the unique copy of the list. * Otherwise make a new copy of the list to ensure the current handle * hold a unique copy. * * @return Handle to the internal obj container(which ganrantees to be unique) */ inline MapObject* CopyOnWrite() { if (obj_.get() == nullptr || !obj_.unique()) { obj_ = std::make_shared( *static_cast(obj_.get())); } return static_cast(obj_.get()); } /** * @brief set the Map. * @param key The index key. * @param value The value to be setted. */ inline void Set(const K& key, const V& value) { MapObject* n = this->CopyOnWrite(); n->data[key.obj_] = value.obj_; } /** @return whether list is empty */ inline bool empty() const { return size() == 0; } /** @brief specify container obj */ using ContainerType = MapObject; struct Ptr2ObjectRef { using ResultType = std::pair; static inline ResultType convert( const std::pair, std::shared_ptr >& n) { return std::make_pair(K(n.first), V(n.second)); } }; using iterator = IterAdapter; /** @return begin iterator */ inline iterator begin() const { return iterator(static_cast(obj_.get())->data.begin()); } /** @return end iterator */ inline iterator end() const { return iterator(static_cast(obj_.get())->data.end()); } /** @return begin iterator */ inline iterator find(const K& key) const { return iterator( static_cast(obj_.get())->data.find(key.obj_)); } }; // specialize of string map template class Map : public ObjectRef { public: // for code reuse Map() { obj_ = std::make_shared(); } Map(Map&& other) { // NOLINT(*) obj_ = std::move(other.obj_); } Map(const Map& other) : ObjectRef(other.obj_) { // NOLINT(*) } explicit Map(std::shared_ptr n) : ObjectRef(n) {} template Map(IterType begin, IterType end) { assign(begin, end); } Map(std::initializer_list > init) { // NOLINT(*) assign(init.begin(), init.end()); } template Map(const std::unordered_map& init) { // NOLINT(*) assign(init.begin(), init.end()); } Map& operator=(Map&& other) { obj_ = std::move(other.obj_); return *this; } Map& operator=(const Map& other) { obj_ = other.obj_; return *this; } template void assign(IterType begin, IterType end) { auto n = std::make_shared(); for (IterType i = begin; i != end; ++i) { n->data.emplace(std::make_pair(i->first, i->second.obj_)); } obj_ = std::move(n); } inline const V operator[](const std::string& key) const { return V(static_cast(obj_.get())->data.at(key)); } inline const V at(const std::string& key) const { return V(static_cast(obj_.get())->data.at(key)); } inline size_t size() const { if (obj_.get() == nullptr) return 0; return static_cast(obj_.get())->data.size(); } inline size_t count(const std::string& key) const { if (obj_.get() == nullptr) return 0; return static_cast(obj_.get())->data.count(key); } inline StrMapObject* CopyOnWrite() { if (obj_.get() == nullptr || !obj_.unique()) { obj_ = std::make_shared( *static_cast(obj_.get())); } return static_cast(obj_.get()); } inline void Set(const std::string& key, const V& value) { StrMapObject* n = this->CopyOnWrite(); n->data[key] = value.obj_; } inline bool empty() const { return size() == 0; } using ContainerType = StrMapObject; struct Ptr2ObjectRef { using ResultType = std::pair; static inline ResultType convert( const std::pair >& n) { return std::make_pair(n.first, V(n.second)); } }; using iterator = IterAdapter; /** @return begin iterator */ inline iterator begin() const { return iterator(static_cast(obj_.get())->data.begin()); } /** @return end iterator */ inline iterator end() const { return iterator(static_cast(obj_.get())->data.end()); } /** @return begin iterator */ inline iterator find(const std::string& key) const { return iterator( static_cast(obj_.get())->data.find(key)); } }; /** * @brief Helper function to convert a List object to a vector. * @tparam T element type * @param list Input list object. * @return std vector */ template inline std::vector ListValueToVector(const List& list) { std::vector ret; ret.reserve(list.size()); for (Value val : list) // (BarclayII) apparently MSVC 2017 CL 19.10 had trouble parsing // ret.push_back(val->data) // So I kindly tell it how to properly parse it. ret.push_back(val->data.operator T()); return ret; } } // namespace runtime } // namespace dgl #endif // DGL_RUNTIME_CONTAINER_H_ ================================================ FILE: include/dgl/runtime/device_api.h ================================================ /** * Copyright (c) 2016 by Contributors * @file dgl/runtime/device_api.h * @brief Abstract device memory management API */ #ifndef DGL_RUNTIME_DEVICE_API_H_ #define DGL_RUNTIME_DEVICE_API_H_ #include #include "c_runtime_api.h" #include "packed_func.h" namespace dgl { namespace runtime { /** * @brief the query type into GetAttr */ enum DeviceAttrKind : int { kExist = 0, kMaxThreadsPerBlock = 1, kWarpSize = 2, kMaxSharedMemoryPerBlock = 3, kComputeVersion = 4, kDeviceName = 5, kMaxClockRate = 6, kMultiProcessorCount = 7, kMaxThreadDimensions = 8 }; /** @brief Number of bytes each allocation must align to */ constexpr int kAllocAlignment = 64; /** @brief Number of bytes each allocation must align to in temporary allocation */ constexpr int kTempAllocaAlignment = 64; /** @brief Maximum size that can be allocated on stack */ constexpr int kMaxStackAlloca = 1024; /** * @brief DGL Runtime Device API, abstracts the device * specific interface for memory management. */ class DeviceAPI { public: /** @brief virtual destructor */ virtual ~DeviceAPI() {} /** * @brief Check whether the device is available. */ virtual bool IsAvailable() { return true; } /** * @brief Set the environment device id to ctx * @param ctx The context to be set. */ virtual void SetDevice(DGLContext ctx) = 0; /** * @brief Get attribute of specified device. * @param ctx The device context * @param kind The result kind * @param rv The return value. * @sa DeviceAttrKind */ virtual void GetAttr( DGLContext ctx, DeviceAttrKind kind, DGLRetValue* rv) = 0; /** * @brief Allocate a data space on device. * @param ctx The device context to perform operation. * @param nbytes The number of bytes in memory. * @param alignment The alignment of the memory. * @param type_hint The type of elements. Only needed by certain backends such * as OpenGL, as nbytes & alignment are sufficient for most backends. * @return The allocated device pointer. */ virtual void* AllocDataSpace( DGLContext ctx, size_t nbytes, size_t alignment, DGLDataType type_hint) = 0; /** * @brief Free a data space on device. * @param ctx The device context to perform operation. * @param ptr The data space. */ virtual void FreeDataSpace(DGLContext ctx, void* ptr) = 0; /** * @brief copy data from one place to another * @param from The source array. * @param from_offset The byte offeset in the from. * @param to The target array. * @param to_offset The byte offset in the to. * @param num_bytes The size of the memory in bytes. * @param ctx_from The source context. * @param ctx_to The target context. * @param type_hint The type of elements, only needed by certain backends, * can be useful for cross device endian converison. */ virtual void CopyDataFromTo( const void* from, size_t from_offset, void* to, size_t to_offset, size_t num_bytes, DGLContext ctx_from, DGLContext ctx_to, DGLDataType type_hint) = 0; /** * @brief copy data between device and CPU while recording the event. * @param from The source array. * @param from_offset The byte offeset in the from. * @param to The target array. * @param to_offset The byte offset in the to. * @param num_bytes The size of the memory in bytes. * @param ctx_from The source context. * @param ctx_to The target context. * @param type_hint The type of elements, only needed by certain backends, * can be useful for cross device endian converison. * @param pytorch_ctx The context pointer from PyTorch's CachingHostAllocator. * @note This function only works when PyTorch CachingHostAllocator is * available. */ virtual void RecordedCopyDataFromTo( void* from, size_t from_offset, void* to, size_t to_offset, size_t num_bytes, DGLContext ctx_from, DGLContext ctx_to, DGLDataType type_hint, void* pytorch_ctx) = 0; /** * @brief Create a new stream of execution. * * @param ctx The context of allocation. */ DGL_DLL virtual DGLStreamHandle CreateStream(DGLContext ctx); /** * @brief Free a stream of execution * * @param ctx The context of the stream * @param stream The pointer to be freed. */ DGL_DLL virtual void FreeStream(DGLContext ctx, DGLStreamHandle stream); /** * @brief Synchronize the stream * @param ctx The context to perform operation. * @param stream The stream to be sync. */ virtual void StreamSync(DGLContext ctx, DGLStreamHandle stream) = 0; /** * @brief Set the stream * @param ctx The context to set stream. * @param stream The stream to be set. */ virtual void SetStream(DGLContext ctx, DGLStreamHandle stream) {} /** * @brief Get the stream */ virtual DGLStreamHandle GetStream() const { return nullptr; } /** * @brief Synchronize 2 streams of execution. * * An event is created in event_src stream that the second then * stream waits on. Neither event_src or event_dst need to be of * the same device ID as the context, but they must be of the same * device type. * * @param ctx The context of the streams. * @param event_src The source stream to synchronize. * @param event_dst The destination stream to synchronize. */ DGL_DLL virtual void SyncStreamFromTo( DGLContext ctx, DGLStreamHandle event_src, DGLStreamHandle event_dst); /** * @brief Pin host memory using cudaHostRegister(). * * @param ptr The host memory pointer to be pinned. * @param nbytes The size to be pinned. * @return false when pinning an empty tensor. true otherwise. */ DGL_DLL virtual bool PinData(void* ptr, size_t nbytes); /** * @brief Unpin host memory using cudaHostUnregister(). * * @param ptr The host memory pointer to be unpinned. */ DGL_DLL virtual void UnpinData(void* ptr); /** * @brief Allocate the pinned memory using PyTorch CachingHostAllocator. * * @param nbytes The size to be pinned. * @param ctx Pointer to the context pointer from PyTorch's * CachingHostAllocator. * @param deleter Pointer to the deleter function from PyTorch's * CachingHostAllocator. */ DGL_DLL virtual void* AllocPinnedDataSpace( size_t nbytes, void** ctx, void** deleter); /** * @brief 'Deallocate' the pinned memory from PyTorch CachingHostAllocator. * @note It avoids unnecessary cudaFreeHost calls and puts the memory * block into CachingHostAllocator's free list. * @param deleter Pointer to the deleter function from PyTorch's * CachingHostAllocator. */ DGL_DLL virtual void FreePinnedDataSpace(void** deleter); /** * @brief Check whether the memory is in pinned memory. */ DGL_DLL virtual bool IsPinned(const void* ptr) { return false; } /** * @brief Allocate temporal workspace for backend execution. * * \note We have the following assumption about backend temporal * workspace allocation, and backend will optimize for such assumption: * * - Only a few allocation will happen, and space will be released after use. * - The release order is usually in reverse order of allocate (stack style). * - Repeative pattern of same allocations over different runs. * - Workspace should not overlap between different threads(i.e. be * threadlocal) * * @param ctx The context of allocation. * @param nbytes The size to be allocated. * @param type_hint The type of elements. Only needed by certain backends such * as OpenGL, as nbytes is sufficient for most backends. */ DGL_DLL virtual void* AllocWorkspace( DGLContext ctx, size_t nbytes, DGLDataType type_hint = {}); /** * @brief Free temporal workspace in backend execution. * * @param ctx The context of allocation. * @param ptr The pointer to be freed. */ DGL_DLL virtual void FreeWorkspace(DGLContext ctx, void* ptr); /** * @brief Get device API based on context. * @param ctx The context * @param allow_missing Whether allow missing * @return The corresponding device API. */ DGL_DLL static DeviceAPI* Get(DGLContext ctx, bool allow_missing = false); /** * @brief Get device API based on device type. * @param dev_type The device type * @param allow_missing Whether allow missing * @return The corresponding device API. */ DGL_DLL static DeviceAPI* Get( DGLDeviceType dev_type, bool allow_missing = false); }; /** @brief The device type bigger than this is RPC device */ constexpr int kRPCSessMask = 128; } // namespace runtime } // namespace dgl #endif // DGL_RUNTIME_DEVICE_API_H_ ================================================ FILE: include/dgl/runtime/dlpack_convert.h ================================================ /** * Copyright (c) 2022 by Contributors * @file include/dgl/runtime/dlpack_convert.h * @brief Conversion between NDArray and DLPack. */ #ifndef DGL_RUNTIME_DLPACK_CONVERT_H_ #define DGL_RUNTIME_DLPACK_CONVERT_H_ #include "c_runtime_api.h" #include "ndarray.h" struct DLManagedTensor; namespace dgl { namespace runtime { struct DLPackConvert { /** * @brief Create a DGL NDArray from a DLPack tensor. * * This allows us to create a NDArray using the memory * allocated by an external deep learning framework * that is DLPack compatible. * * The memory is retained until the NDArray went out of scope. * @param tensor The DLPack tensor to copy from. * @return The created NDArray view. */ static NDArray FromDLPack(DLManagedTensor* tensor); /** * @brief Deleter for NDArray converted from DLPack. * * This is used from data which is passed from external * DLPack(DLManagedTensor) that are not allocated inside of DGL. This enables * us to create NDArray from memory allocated by other frameworks that are * DLPack compatible */ static void DLPackDeleter(NDArray::Container* ptr); /** @brief Convert a DGL NDArray to a DLPack tensor. * * @param from The DGL NDArray. * @return A DLPack tensor. */ static DLManagedTensor* ToDLPack(const NDArray& from); }; } // namespace runtime } // namespace dgl #ifdef __cplusplus extern "C" { #endif /** * @brief Delete (free) a DLManagedTensor's data. * @param dltensor Pointer to the DLManagedTensor. */ DGL_DLL void DGLDLManagedTensorCallDeleter(DLManagedTensor* dltensor); /** * @brief Produce an array from the DLManagedTensor that shares data memory * with the DLManagedTensor. * @param from The source DLManagedTensor. * @param out The output array handle. * @return 0 when success, -1 when failure happens */ DGL_DLL int DGLArrayFromDLPack(DLManagedTensor* from, DGLArrayHandle* out); /** * @brief Produce a DLMangedTensor from the array that shares data memory with * the array. * @param from The source array. * @param out The DLManagedTensor handle. * @return 0 when success, -1 when failure happens */ DGL_DLL int DGLArrayToDLPack( DGLArrayHandle from, DLManagedTensor** out, int alignment = 0); #ifdef __cplusplus } // DGL_EXTERN_C #endif #endif // DGL_RUNTIME_DLPACK_CONVERT_H_ ================================================ FILE: include/dgl/runtime/module.h ================================================ /** * Copyright (c) 2017 by Contributors * @file dgl/runtime/module.h * @brief Runtime container of the functions generated by DGL, * This is used to support dynamically link, load and save * functions from different convention under unified API. */ #ifndef DGL_RUNTIME_MODULE_H_ #define DGL_RUNTIME_MODULE_H_ #include #include #include #include #include #include "c_runtime_api.h" namespace dgl { namespace runtime { // The internal container of module. class ModuleNode; class PackedFunc; /** * @brief Module container of DGL. */ class Module { public: Module() {} // constructor from container. explicit Module(std::shared_ptr n) : node_(n) {} /** * @brief Get packed function from current module by name. * * @param name The name of the function. * @param query_imports Whether also query dependency modules. * @return The result function. * This function will return PackedFunc(nullptr) if function do not exist. * @note Implemented in packed_func.cc */ inline PackedFunc GetFunction( const std::string& name, bool query_imports = false); /** @return internal container */ inline ModuleNode* operator->(); /** @return internal container */ inline const ModuleNode* operator->() const; // The following functions requires link with runtime. /** * @brief Import another module into this module. * @param other The module to be imported. * * @note Cyclic dependency is not allowed among modules, * An error will be thrown when cyclic dependency is detected. */ DGL_DLL void Import(Module other); /** * @brief Load a module from file. * @param file_name The name of the host function module. * @param format The format of the file. * @note This function won't load the import relationship. * Re-create import relationship by calling Import. */ DGL_DLL static Module LoadFromFile( const std::string& file_name, const std::string& format = ""); private: std::shared_ptr node_; }; /** * @brief Base node container of module. * Do not create this directly, instead use Module. */ class ModuleNode { public: /** @brief virtual destructor */ virtual ~ModuleNode() {} /** @return The module type key */ virtual const char* type_key() const = 0; /** * @brief Get a PackedFunc from module. * * The PackedFunc may not be fully initialized, * there might still be first time running overhead when * executing the function on certain devices. * For benchmarking, use prepare to eliminate * * @param name the name of the function. * @param sptr_to_self The shared_ptr that points to this module node. * * @return PackedFunc(nullptr) when it is not available. * * @note The function will always remain valid. * If the function need resource from the module(e.g. late linking), * it should capture sptr_to_self. */ virtual PackedFunc GetFunction( const std::string& name, const std::shared_ptr& sptr_to_self) = 0; /** * @brief Save the module to file. * @param file_name The file to be saved to. * @param format The format of the file. */ virtual void SaveToFile( const std::string& file_name, const std::string& format); /** * @brief Save the module to binary stream. * @param stream The binary stream to save to. * @note It is recommended to implement this for device modules, * but not necessarily host modules. * We can use this to do AOT loading of bundled device functions. */ DGL_DLL virtual void SaveToBinary(dmlc::Stream* stream); /** * @brief Get the source code of module, when available. * @param format Format of the source code, can be empty by default. * @return Possible source code when available. */ DGL_DLL virtual std::string GetSource(const std::string& format = ""); /** * @brief Get a function from current environment * The environment includes all the imports as well as Global functions. * * @param name name of the function. * @return The corresponding function. */ DGL_DLL const PackedFunc* GetFuncFromEnv(const std::string& name); /** @return The module it imports from */ const std::vector& imports() const { return imports_; } protected: friend class Module; /** @brief The modules this module depend on */ std::vector imports_; private: /** @brief Cache used by GetImport */ std::unordered_map > import_cache_; }; /** @brief namespace for constant symbols */ namespace symbol { /** @brief Global variable to store module context. */ constexpr const char* dgl_module_ctx = "__dgl_module_ctx"; /** @brief Global variable to store device module blob */ constexpr const char* dgl_dev_mblob = "__dgl_dev_mblob"; /** @brief Number of bytes of device module blob. */ constexpr const char* dgl_dev_mblob_nbytes = "__dgl_dev_mblob_nbytes"; /** @brief global function to set device */ constexpr const char* dgl_set_device = "__dgl_set_device"; /** @brief Auxiliary counter to global barrier. */ constexpr const char* dgl_global_barrier_state = "__dgl_global_barrier_state"; /** * @brief Prepare the global barrier before kernels that uses global barrier. */ constexpr const char* dgl_prepare_global_barrier = "__dgl_prepare_global_barrier"; /** @brief Placeholder for the module's entry function. */ constexpr const char* dgl_module_main = "__dgl_main__"; } // namespace symbol // implementations of inline functions. inline ModuleNode* Module::operator->() { return node_.get(); } inline const ModuleNode* Module::operator->() const { return node_.get(); } } // namespace runtime } // namespace dgl #include "packed_func.h" #endif // DGL_RUNTIME_MODULE_H_ ================================================ FILE: include/dgl/runtime/ndarray.h ================================================ /** * Copyright (c) 2017-2022 by Contributors * @file dgl/runtime/ndarray.h * @brief Abstract device memory management API */ #ifndef DGL_RUNTIME_NDARRAY_H_ #define DGL_RUNTIME_NDARRAY_H_ #include #include #include #include #include #include "bfloat16.h" #include "c_runtime_api.h" #include "serializer.h" #include "shared_mem.h" #ifdef DGL_USE_CUDA #include #define BF16_ENABLED (defined(CUDART_VERSION) && CUDART_VERSION >= 11000) #include #if BF16_ENABLED #include #endif // BF16_ENABLED #endif // DGL_USE_CUDA // forward declaration inline std::ostream& operator<<(std::ostream& os, DGLDataType t); namespace dgl { /** * @brief Type traits that converts a C type to a DGLDataType. * * Usage: * DGLDataTypeTraits::dtype == dtype */ template struct DGLDataTypeTraits { static constexpr DGLDataType dtype{0, 0, 0}; // dummy }; #define GEN_DGLDATATYPETRAITS_FOR(T, code, bits) \ template <> \ struct DGLDataTypeTraits { \ static constexpr DGLDataType dtype{code, bits, 1}; \ } GEN_DGLDATATYPETRAITS_FOR(int8_t, kDGLInt, 8); GEN_DGLDATATYPETRAITS_FOR(uint8_t, kDGLUInt, 8); GEN_DGLDATATYPETRAITS_FOR(int16_t, kDGLInt, 16); GEN_DGLDATATYPETRAITS_FOR(int32_t, kDGLInt, 32); GEN_DGLDATATYPETRAITS_FOR(int64_t, kDGLInt, 64); // XXX(BarclayII) most DL frameworks do not support unsigned int and long // arrays, so I'm just converting uints to signed DTypes. GEN_DGLDATATYPETRAITS_FOR(uint32_t, kDGLInt, 32); GEN_DGLDATATYPETRAITS_FOR(uint64_t, kDGLInt, 64); #ifdef DGL_USE_CUDA GEN_DGLDATATYPETRAITS_FOR(__half, kDGLFloat, 16); #if BF16_ENABLED GEN_DGLDATATYPETRAITS_FOR(__nv_bfloat16, kDGLBfloat, 16); #endif // BF16_ENABLED #endif // DGL_USE_CUDA GEN_DGLDATATYPETRAITS_FOR(float, kDGLFloat, 32); GEN_DGLDATATYPETRAITS_FOR(double, kDGLFloat, 64); #undef GEN_DGLDATATYPETRAITS_FOR namespace runtime { /** * @brief DLPack converter. */ struct DLPackConvert; /** * @brief Managed NDArray. * The array is backed by reference counted blocks. */ class NDArray { public: // internal container type struct Container; /** @brief default constructor */ NDArray() {} /** * @brief cosntruct a NDArray that refers to data * @param data The data this NDArray refers to */ explicit inline NDArray(Container* data); /** * @brief copy constructor * @param other The value to be copied */ inline NDArray(const NDArray& other); // NOLINT(*) /** * @brief move constructor * @param other The value to be moved */ NDArray(NDArray&& other) // NOLINT(*) : data_(other.data_) { other.data_ = nullptr; } /** @brief destructor */ ~NDArray() { this->reset(); } /** * @brief Swap this array with another NDArray * @param other The other NDArray */ void swap(NDArray& other) { // NOLINT(*) std::swap(data_, other.data_); } /** * @brief copy assignmemt * @param other The value to be assigned. * @return reference to self. */ NDArray& operator=(const NDArray& other) { // NOLINT(*) // copy-and-swap idiom NDArray(other).swap(*this); // NOLINT(*) return *this; } /** * @brief move assignmemt * @param other The value to be assigned. * @return reference to self. */ NDArray& operator=(NDArray&& other) { // NOLINT(*) // copy-and-swap idiom NDArray(std::move(other)).swap(*this); // NOLINT(*) return *this; } /** @return If NDArray is defined */ bool defined() const { return data_ != nullptr; } /** @return If both NDArray reference the same container */ bool same_as(const NDArray& other) const { return data_ == other.data_; } /** @brief reset the content of NDArray to be nullptr */ inline void reset(); /** * @return the reference counter * @note this number is approximate in multi-threaded setting. */ inline int use_count() const; /** @return Pointer to content of DGLArray */ inline const DGLArray* operator->() const; /** @return True if the ndarray is contiguous. */ bool IsContiguous() const; /** @return the data pointer with type. */ template inline T* Ptr() const { if (!defined()) return nullptr; else return static_cast(operator->()->data); } /** * @brief Copy data content from/into another array. * @param other The source array to be copied from. * @note The copy runs on the dgl internal stream if it involves a GPU * context. */ inline void CopyFrom(DGLArray* other); inline void CopyFrom(const NDArray& other); inline void CopyTo(DGLArray* other) const; inline void CopyTo(const NDArray& other) const; /** * @brief Copy the data to another context. * @param ctx The target context. * @return The array under another context. */ inline NDArray CopyTo(const DGLContext& ctx) const; /** * @brief Return a new array with a copy of the content. */ inline NDArray Clone() const; /** * @brief Return a copy of the current instance of NDArray in pinned * (page-locked) memory. * @note This is an out-of-place method, which utilizes PyTorch's * CachingHostAllocator for allocating pinned memory and copying data * from the current NDAarray. As a result, PyTorch is responsible for * managing the lifecycle of the returned NDArray, including deciding * when to flush the data for reuse or call cudaFreeHost. The current * context must be kDGLCPU, otherwise, an error will be thrown. */ inline NDArray PinMemory(); /** * @brief In-place method to pin the current array by calling PinContainer * on the underlying NDArray:Container. * @note This is an in-place method that flags the memory as page-locked by * utilizing cudaHostRegister at the underlying level to pin the current * instance of NDArray. The current context must be kDGLCPU, otherwise, * an error will be thrown. */ inline void PinMemory_(); /** * @brief In-place method to unpin the current array by calling UnpinContainer * on the underlying NDArray:Container. * @note This is an in-place method. Behavior depends on the current context, * IsPinned: will be unpinned; * others: directly return. */ inline void UnpinMemory_(); /** * @brief Check if the array is pinned. */ inline bool IsPinned() const; /** * @brief Record streams that are using the underlying tensor. * @param stream The stream that is using the underlying tensor. */ inline void RecordStream(DGLStreamHandle stream) const; /** * @brief Load NDArray from stream * @param stream The input data stream * @return Whether load is successful */ bool Load(dmlc::Stream* stream); /** * @brief Save NDArray to stream * @param stream The output data stream */ void Save(dmlc::Stream* stream) const; /** * @brief Create a NDArray that shares the data memory with the current one. * @param shape The shape of the new array. * @param dtype The data type of the new array. * @param offset The offset (in bytes) of the starting pointer. * @note The memory size of new array must be smaller than the current one. */ DGL_DLL NDArray CreateView(std::vector shape, DGLDataType dtype, int64_t offset = 0); /** * @brief Create an empty NDArray. * @param shape The shape of the new array. * @param dtype The data type of the new array. * @param ctx The context of the array. * @return The created Array */ DGL_DLL static NDArray Empty( std::vector shape, DGLDataType dtype, DGLContext ctx); /** * @brief Create an empty NDArray in pinned memory. * @param shape The shape of the new array. * @param dtype The data type of the new array. * @param ctx The context of the array. * @return The created array. */ DGL_DLL static NDArray PinnedEmpty( std::vector shape, DGLDataType dtype, DGLContext ctx); /** * @brief Create an empty NDArray with shared memory. * @param name The name of shared memory. * @param shape The shape of the new array. * @param dtype The data type of the new array. * @param ctx The context of the array. * @param is_create whether to create shared memory. * @return The created Array */ DGL_DLL static NDArray EmptyShared( const std::string& name, std::vector shape, DGLDataType dtype, DGLContext ctx, bool is_create); /** * @brief Get the size of the array in the number of bytes. */ size_t GetSize() const; /** * @brief Get the number of elements in this array. */ int64_t NumElements() const; /** * @brief Create a NDArray by copying from std::vector. * @tparam T Type of vector data. Determines the dtype of returned array. */ template DGL_DLL static NDArray FromVector( const std::vector& vec, DGLContext ctx = DGLContext{kDGLCPU, 0}); /** * @brief Create a NDArray from a raw pointer. */ DGL_DLL static NDArray CreateFromRaw( const std::vector& shape, DGLDataType dtype, DGLContext ctx, void* raw, bool auto_free); /** * @brief Create a std::vector from a 1D NDArray. * @tparam T Type of vector data. * @note Type casting is NOT performed. The caller has to make sure that the * vector type matches the dtype of NDArray. */ template std::vector ToVector() const; std::shared_ptr GetSharedMem() const; /** * @brief Function to copy data from one array to another. * @param from The source array. * @param to The target array. * @param (optional) stream The stream used in copy. */ DGL_DLL static void CopyFromTo(DGLArray* from, DGLArray* to); DGL_DLL static void CopyFromTo( DGLArray* from, DGLArray* to, DGLStreamHandle stream); /** * @brief Function to copy data between device and CPU while recording the * event. * @param from The source array. * @param to The target array. * @param pytorch_ctx The context pointer from PyTorch's CachingHostAllocator. * @note This function fuses data-copy and event recording to ensure * CachingHostAllocator works properly. */ DGL_DLL static void RecordedCopyFromTo( DGLArray* from, DGLArray* to, void* pytorch_ctx); /** * @brief Function to pin the DGLArray of a Container. * @param ptr The container to be pinned. * @note Data of the given array will be pinned inplace. * Behavior depends on the current context, * kDGLCPU: will be pinned; * IsPinned: directly return; * kDGLCUDA: invalid, will throw an error. */ DGL_DLL static void PinContainer(Container* ptr); /** * @brief Function to unpin the DGLArray of a Container. * @param ptr The container to be unpinned. * @note Data of the given array will be unpinned inplace. * Behavior depends on the current context, * IsPinned: will be unpinned; * others: directly return. */ DGL_DLL static void UnpinContainer(Container* ptr); /** * @brief Function check if the DGLArray of a Container is pinned. * @param ptr The container to be checked. * @return true if pinned. */ DGL_DLL static bool IsContainerPinned(Container* ptr); /** * @brief Record streams that are using this tensor. * @param ptr Pointer of the tensor to be recorded. * @param stream The stream that is using this tensor. */ DGL_DLL static void RecordStream(DGLArray* tensor, DGLStreamHandle stream); // internal namespace struct Internal { // Default deleter for the container static void DefaultDeleter(NDArray::Container* ptr); // Local create function which allocates tensor metadata // but does not allocate space for the data. static NDArray Create( std::vector shape, DGLDataType dtype, DGLContext ctx); // Implementation of API function static DGLArray* MoveAsDGLArray(NDArray arr); }; private: /** @brief Internal Data content */ Container* data_{nullptr}; // enable internal functions friend struct Internal; friend struct DLPackConvert; friend class DGLRetValue; friend class DGLArgsSetter; }; /** * @brief Save a DGLArray to stream * @param strm The outpu stream * @param tensor The tensor to be saved. */ inline bool SaveDGLArray(dmlc::Stream* strm, const DGLArray* tensor); /** * @brief Reference counted Container object used to back NDArray. * * This object is DGLArray compatible: * the pointer to the NDArrayContainer can be directly * interpreted as a DGLArray* * * @note: do not use this function directly, use NDArray. */ struct NDArray::Container { public: /** NOTE: the first part of this structure is the same as * DLManagedTensor, note that, however, the deleter * is only called when the reference counter goes to 0 */ /** * @brief Tensor structure. * @note it is important that the first field is DGLArray * So that this data structure is DGLArray compatible. * The head ptr of this struct can be viewed as DGLArray*. */ DGLArray dl_tensor; /** * @brief addtional context, reserved for recycling * @note We can attach additional content here * which the current container depend on * (e.g. reference to original memory when creating views). */ void* manager_ctx{nullptr}; /** * @brief Customized deleter * * @note The customized deleter is helpful to enable * different ways of memory allocator that are not * currently defined by the system. */ void (*deleter)(Container* self) = nullptr; /** @brief default constructor */ Container() { dl_tensor.data = nullptr; dl_tensor.ndim = 0; dl_tensor.shape = nullptr; dl_tensor.strides = nullptr; dl_tensor.byte_offset = 0; } /** @brief pointer to shared memory */ std::shared_ptr mem; /** @brief developer function, increases reference counter */ void IncRef() { ref_counter_.fetch_add(1, std::memory_order_relaxed); } /** @brief developer function, decrease reference counter */ void DecRef() { if (ref_counter_.fetch_sub(1, std::memory_order_release) == 1) { std::atomic_thread_fence(std::memory_order_acquire); if (this->deleter != nullptr) { (*this->deleter)(this); } } } private: friend struct DLPackConvert; friend class NDArray; friend class RPCWrappedFunc; /** * @brief The shape container, * can be used for shape data. */ std::vector shape_; /** * @brief The stride container, * can be used for stride data. */ std::vector stride_; /** @brief The internal array object */ std::atomic ref_counter_{0}; /** @brief Whether underlying dl_tensor is pinned by DGL. */ bool pinned_by_dgl_{false}; /** @brief Whether underlying dl_tensor is pinned by PyTorch * (CachingHostAllocator). */ bool pinned_by_pytorch_{false}; /** @brief The PyTorch storage ctx ptr if pinned_by_pytorch_ = True. */ void* pytorch_ctx_{nullptr}; /** @brief Pointer to the corresp. PyTorch deleter if pinned_by_pytorch_ = * True. */ void* pytorch_raw_deleter_{nullptr}; }; // implementations of inline functions // the usages of functions are documented in place. inline NDArray::NDArray(Container* data) : data_(data) { if (data_) data_->IncRef(); } inline NDArray::NDArray(const NDArray& other) : data_(other.data_) { if (data_) data_->IncRef(); } inline void NDArray::reset() { if (data_) { data_->DecRef(); data_ = nullptr; } } inline void NDArray::CopyFrom(DGLArray* other) { CHECK(data_ != nullptr); CopyFromTo(other, &(data_->dl_tensor)); } inline void NDArray::CopyFrom(const NDArray& other) { CHECK(other.data_ != nullptr); // Copy between two devices if (data_->dl_tensor.ctx.device_type != other.data_->dl_tensor.ctx.device_type) { CHECK(data_ != nullptr); auto to_ctx_type = data_->dl_tensor.ctx.device_type; auto cpu_data = (to_ctx_type == kDGLCPU ? data_ : other.data_); // Pinned by PyTorch if (cpu_data->pinned_by_pytorch_) { // To ensure correct behavior, the event must be recorded after // cudaMemcpyAsync as long as the memory is pinned by PyTorch. void* pytorch_ctx = cpu_data->pytorch_ctx_; RecordedCopyFromTo( &(other.data_->dl_tensor), &(data_->dl_tensor), pytorch_ctx); return; } } CopyFrom(&(other.data_->dl_tensor)); } inline void NDArray::CopyTo(DGLArray* other) const { CHECK(data_ != nullptr); CopyFromTo(&(data_->dl_tensor), other); } inline void NDArray::CopyTo(const NDArray& other) const { CHECK(other.data_ != nullptr); // copy between two devices if (data_->dl_tensor.ctx.device_type != other.data_->dl_tensor.ctx.device_type) { CHECK(data_ != nullptr); auto from_ctx_type = data_->dl_tensor.ctx.device_type; auto cpu_data = (from_ctx_type == kDGLCPU ? data_ : other.data_); // pinned by PyTorch if (cpu_data->pinned_by_pytorch_) { // To ensure correct behavior, the event must be recorded after // cudaMemcpyAsync as long as the memory is pinned by PyTorch. void* pytorch_ctx = cpu_data->pytorch_ctx_; RecordedCopyFromTo( &(data_->dl_tensor), &(other.data_->dl_tensor), pytorch_ctx); return; } } CopyTo(&(other.data_->dl_tensor)); } inline NDArray NDArray::CopyTo(const DGLContext& ctx) const { CHECK(data_ != nullptr); const DGLArray* array = operator->(); NDArray ret = Empty( std::vector(array->shape, array->shape + array->ndim), array->dtype, ctx); this->CopyTo(ret); return ret; } inline NDArray NDArray::Clone() const { CHECK(data_ != nullptr); const DGLArray* array = operator->(); return this->CopyTo(array->ctx); } inline NDArray NDArray::PinMemory() { CHECK(data_ != nullptr); const DGLArray* array = operator->(); auto ctx = array->ctx; NDArray ret = PinnedEmpty( std::vector(array->shape, array->shape + array->ndim), array->dtype, ctx); this->CopyTo(ret); return ret; } inline void NDArray::PinMemory_() { CHECK(data_ != nullptr); PinContainer(data_); } inline void NDArray::UnpinMemory_() { CHECK(data_ != nullptr); UnpinContainer(data_); } inline bool NDArray::IsPinned() const { CHECK(data_ != nullptr); return IsContainerPinned(data_); } inline void NDArray::RecordStream(DGLStreamHandle stream) const { CHECK(data_ != nullptr); RecordStream(&(data_->dl_tensor), stream); } inline int NDArray::use_count() const { if (data_ == nullptr) return 0; return data_->ref_counter_.load(std::memory_order_relaxed); } inline const DGLArray* NDArray::operator->() const { return &(data_->dl_tensor); } /** @brief Magic number for NDArray file */ constexpr uint64_t kDGLNDArrayMagic = 0xDD5E40F096B4A13F; inline bool SaveDGLArray(dmlc::Stream* strm, DGLArray* tensor) { uint64_t header = kDGLNDArrayMagic, reserved = 0; strm->Write(header); strm->Write(reserved); // Always save data as CPU context // // Parameters that get serialized should be in CPU by default. // So even the array's context is GPU, it will be stored as CPU array. // This is used to prevent case when another user loads the parameters // back on machine that do not have GPU or related context. // // We can always do array.CopyTo(target_ctx) to get a corresponding // array in the target context. DGLContext cpu_ctx; cpu_ctx.device_type = kDGLCPU; cpu_ctx.device_id = 0; strm->Write(cpu_ctx); strm->Write(tensor->ndim); strm->Write(tensor->dtype); int ndim = tensor->ndim; strm->WriteArray(tensor->shape, ndim); int type_bytes = tensor->dtype.bits / 8; int64_t num_elems = 1; for (int i = 0; i < ndim; ++i) { num_elems *= tensor->shape[i]; } int64_t data_byte_size = type_bytes * num_elems; strm->Write(data_byte_size); if (DMLC_IO_NO_ENDIAN_SWAP && tensor->ctx.device_type == kDGLCPU && tensor->strides == nullptr && tensor->byte_offset == 0) { // quick path strm->Write(tensor->data, data_byte_size); } else { std::vector bytes(data_byte_size); CHECK_EQ( DGLArrayCopyToBytes(tensor, dmlc::BeginPtr(bytes), data_byte_size), 0) << DGLGetLastError(); if (!DMLC_IO_NO_ENDIAN_SWAP) { dmlc::ByteSwap(dmlc::BeginPtr(bytes), type_bytes, num_elems); } strm->Write(dmlc::BeginPtr(bytes), data_byte_size); } return true; } /** * @brief Convert type code to its name * @param type_code The type code . * @return The name of type code. */ inline const char* TypeCode2Str(int type_code) { switch (type_code) { case kDGLInt: return "int"; case kDGLUInt: return "uint"; case kDGLFloat: return "float"; case kStr: return "str"; case kBytes: return "bytes"; case kHandle: return "handle"; case kNull: return "NULL"; case kObjectHandle: return "ObjectHandle"; case kArrayHandle: return "ArrayHandle"; case kDGLDataType: return "DGLDataType"; case kDGLContext: return "DGLContext"; case kFuncHandle: return "FunctionHandle"; case kModuleHandle: return "ModuleHandle"; case kNDArrayContainer: return "NDArrayContainer"; default: LOG(FATAL) << "unknown type_code=" << static_cast(type_code); return ""; } } /** * @brief Convert device type code to its name * @param device_type The device type code. * @return The name of the device. */ inline const char* DeviceTypeCode2Str(DGLDeviceType device_type) { switch (device_type) { case kDGLCPU: return "cpu"; case kDGLCUDA: return "cuda"; default: LOG(FATAL) << "Unsupported device type code=" << static_cast(device_type); return ""; } } /** * @brief convert a string to DGL type. * @param s The string to be converted. * @return The corresponding dgl type. */ inline DGLDataType String2DGLDataType(std::string s) { DGLDataType t; t.bits = 32; t.lanes = 1; const char* scan; if (s.substr(0, 3) == "int") { t.code = kDGLInt; scan = s.c_str() + 3; } else if (s.substr(0, 4) == "uint") { t.code = kDGLUInt; scan = s.c_str() + 4; } else if (s.substr(0, 5) == "float") { t.code = kDGLFloat; scan = s.c_str() + 5; } else if (s.substr(0, 6) == "handle") { t.code = kHandle; t.bits = 64; // handle uses 64 bit by default. scan = s.c_str() + 6; } else { scan = s.c_str(); LOG(FATAL) << "unknown type " << s; } char* xdelim; // emulate sscanf("%ux%u", bits, lanes) uint8_t bits = static_cast(strtoul(scan, &xdelim, 10)); if (bits != 0) t.bits = bits; if (*xdelim == 'x') { t.lanes = static_cast(strtoul(xdelim + 1, nullptr, 10)); } return t; } /** * @brief convert a DGL type to string. * @param t The type to be converted. * @return The corresponding dgl type in string. */ inline std::string DGLDataType2String(DGLDataType t) { #ifndef _LIBCPP_SGX_NO_IOSTREAMS std::ostringstream os; os << t; return os.str(); #else std::string repr = ""; repr += TypeCode2Str(t.code); if (t.code == kHandle) return repr; repr += std::to_string(static_cast(t.bits)); if (t.lanes != 1) { repr += "x" + std::to_string(static_cast(t.lanes)); } return repr; #endif } // macro to check type code. #define DGL_CHECK_TYPE_CODE(CODE, T) \ CHECK_EQ(CODE, T) << " expected " << TypeCode2Str(T) << " but get " \ << TypeCode2Str(CODE) } // namespace runtime } // namespace dgl namespace dmlc { DMLC_DECLARE_TRAITS(has_saveload, dgl::runtime::NDArray, true); } // namespace dmlc ///////////////// Operator overloading for NDArray ///////////////// dgl::runtime::NDArray operator+( const dgl::runtime::NDArray& a1, const dgl::runtime::NDArray& a2); dgl::runtime::NDArray operator-( const dgl::runtime::NDArray& a1, const dgl::runtime::NDArray& a2); dgl::runtime::NDArray operator*( const dgl::runtime::NDArray& a1, const dgl::runtime::NDArray& a2); dgl::runtime::NDArray operator/( const dgl::runtime::NDArray& a1, const dgl::runtime::NDArray& a2); dgl::runtime::NDArray operator%( const dgl::runtime::NDArray& a1, const dgl::runtime::NDArray& a2); dgl::runtime::NDArray operator+(const dgl::runtime::NDArray& a1, int64_t rhs); dgl::runtime::NDArray operator-(const dgl::runtime::NDArray& a1, int64_t rhs); dgl::runtime::NDArray operator*(const dgl::runtime::NDArray& a1, int64_t rhs); dgl::runtime::NDArray operator/(const dgl::runtime::NDArray& a1, int64_t rhs); dgl::runtime::NDArray operator%(const dgl::runtime::NDArray& a1, int64_t rhs); dgl::runtime::NDArray operator+(int64_t lhs, const dgl::runtime::NDArray& a2); dgl::runtime::NDArray operator-(int64_t lhs, const dgl::runtime::NDArray& a2); dgl::runtime::NDArray operator*(int64_t lhs, const dgl::runtime::NDArray& a2); dgl::runtime::NDArray operator/(int64_t lhs, const dgl::runtime::NDArray& a2); dgl::runtime::NDArray operator%(int64_t lhs, const dgl::runtime::NDArray& a2); dgl::runtime::NDArray operator-(const dgl::runtime::NDArray& array); dgl::runtime::NDArray operator>( const dgl::runtime::NDArray& a1, const dgl::runtime::NDArray& a2); dgl::runtime::NDArray operator<( const dgl::runtime::NDArray& a1, const dgl::runtime::NDArray& a2); dgl::runtime::NDArray operator>=( const dgl::runtime::NDArray& a1, const dgl::runtime::NDArray& a2); dgl::runtime::NDArray operator<=( const dgl::runtime::NDArray& a1, const dgl::runtime::NDArray& a2); dgl::runtime::NDArray operator==( const dgl::runtime::NDArray& a1, const dgl::runtime::NDArray& a2); dgl::runtime::NDArray operator!=( const dgl::runtime::NDArray& a1, const dgl::runtime::NDArray& a2); dgl::runtime::NDArray operator>(const dgl::runtime::NDArray& a1, int64_t rhs); dgl::runtime::NDArray operator<(const dgl::runtime::NDArray& a1, int64_t rhs); dgl::runtime::NDArray operator>=(const dgl::runtime::NDArray& a1, int64_t rhs); dgl::runtime::NDArray operator<=(const dgl::runtime::NDArray& a1, int64_t rhs); dgl::runtime::NDArray operator==(const dgl::runtime::NDArray& a1, int64_t rhs); dgl::runtime::NDArray operator!=(const dgl::runtime::NDArray& a1, int64_t rhs); dgl::runtime::NDArray operator>(int64_t lhs, const dgl::runtime::NDArray& a2); dgl::runtime::NDArray operator<(int64_t lhs, const dgl::runtime::NDArray& a2); dgl::runtime::NDArray operator>=(int64_t lhs, const dgl::runtime::NDArray& a2); dgl::runtime::NDArray operator<=(int64_t lhs, const dgl::runtime::NDArray& a2); dgl::runtime::NDArray operator==(int64_t lhs, const dgl::runtime::NDArray& a2); dgl::runtime::NDArray operator!=(int64_t lhs, const dgl::runtime::NDArray& a2); std::ostream& operator<<(std::ostream& os, dgl::runtime::NDArray array); ///////////////// Operator overloading for DGLDataType ///////////////// /** @brief Check whether two data types are the same.*/ inline bool operator==(const DGLDataType& ty1, const DGLDataType& ty2) { return ty1.code == ty2.code && ty1.bits == ty2.bits && ty1.lanes == ty2.lanes; } /** @brief Check whether two data types are different.*/ inline bool operator!=(const DGLDataType& ty1, const DGLDataType& ty2) { return !(ty1 == ty2); } #ifndef _LIBCPP_SGX_NO_IOSTREAMS inline std::ostream& operator<<(std::ostream& os, DGLDataType t) { os << dgl::runtime::TypeCode2Str(t.code); if (t.code == kHandle) return os; os << static_cast(t.bits); if (t.lanes != 1) { os << 'x' << static_cast(t.lanes); } return os; } #endif ///////////////// Operator overloading for DGLContext ///////////////// /** @brief Check whether two device contexts are the same.*/ inline bool operator==(const DGLContext& ctx1, const DGLContext& ctx2) { return ctx1.device_type == ctx2.device_type && ctx1.device_id == ctx2.device_id; } /** @brief Check whether two device contexts are different.*/ inline bool operator!=(const DGLContext& ctx1, const DGLContext& ctx2) { return !(ctx1 == ctx2); } #ifndef _LIBCPP_SGX_NO_IOSTREAMS inline std::ostream& operator<<(std::ostream& os, const DGLContext& ctx) { return os << dgl::runtime::DeviceTypeCode2Str(ctx.device_type) << ":" << ctx.device_id; } #endif #endif // DGL_RUNTIME_NDARRAY_H_ ================================================ FILE: include/dgl/runtime/object.h ================================================ /** * Copyright (c) 2019 by Contributors * @file runtime/object.h * @brief Defines the Object data structures. */ #ifndef DGL_RUNTIME_OBJECT_H_ #define DGL_RUNTIME_OBJECT_H_ #include #include #include #include #include namespace dgl { namespace runtime { // forward declaration class Object; class ObjectRef; class NDArray; /** * @brief Visitor class to each object attribute. * The content is going to be called for each field. */ class AttrVisitor { public: //! \cond Doxygen_Suppress virtual void Visit(const char* key, double* value) = 0; virtual void Visit(const char* key, int64_t* value) = 0; virtual void Visit(const char* key, uint64_t* value) = 0; virtual void Visit(const char* key, int* value) = 0; virtual void Visit(const char* key, bool* value) = 0; virtual void Visit(const char* key, std::string* value) = 0; virtual void Visit(const char* key, ObjectRef* value) = 0; virtual void Visit(const char* key, NDArray* value) = 0; template < typename ENum, typename = typename std::enable_if::value>::type> void Visit(const char* key, ENum* ptr) { static_assert( std::is_same::type>::value, "declare enum to be enum int to use visitor"); this->Visit(key, reinterpret_cast(ptr)); } //! \endcond }; /** * @brief base class of object container. * All object's internal is stored as std::shared_ptr */ class Object { public: /** @brief virtual destructor */ virtual ~Object() {} /** @return The unique type key of the object */ virtual const char* type_key() const = 0; /** * @brief Apply visitor to each field of the Object * Visitor could mutate the content of the object. * override if Object contains attribute fields. * @param visitor The visitor */ virtual void VisitAttrs(AttrVisitor* visitor) {} /** @return the type index of the object */ virtual uint32_t type_index() const = 0; /** * @brief Whether this object derives from object with type_index=tid. * Implemented by DGL_DECLARE_OBJECT_TYPE_INFO * * @param tid The type index. * @return the check result. */ virtual bool _DerivedFrom(uint32_t tid) const; /** * @brief get a runtime unique type index given a type key * @param type_key Type key of a type. * @return the corresponding type index. */ static uint32_t TypeKey2Index(const char* type_key); /** * @brief get type key from type index. * @param index The type index * @return the corresponding type key. */ static const char* TypeIndex2Key(uint32_t index); /** * @return whether the type is derived from */ template inline bool derived_from() const; /** * @return whether the object is of type T * @tparam The type to be checked. */ template inline bool is_type() const; // object ref can see this friend class ObjectRef; static constexpr const char* _type_key = "Object"; }; /** @brief base class of all reference object */ class ObjectRef { public: /** @brief type indicate the container type */ using ContainerType = Object; /** * @brief Comparator * * Compare with the two are referencing to the same object (compare by * address). * * @param other Another object ref. * @return the compare result. * @sa same_as */ inline bool operator==(const ObjectRef& other) const; /** * @brief Comparator * * Compare with the two are referencing to the same object (compare by * address). * * @param other Another object ref. * @return the compare result. */ inline bool same_as(const ObjectRef& other) const; /** * @brief Comparator * * The operator overload allows ObjectRef be used in std::map. * * @param other Another object ref. * @return the compare result. */ inline bool operator<(const ObjectRef& other) const; /** * @brief Comparator * @param other Another object ref. * @return the compare result. * @sa same_as */ inline bool operator!=(const ObjectRef& other) const; /** @return the hash function for ObjectRef */ inline size_t hash() const; /** @return whether the expression is null */ inline bool defined() const; /** @return the internal type index of Object */ inline uint32_t type_index() const; /** @return the internal object pointer */ inline const Object* get() const; /** @return the internal object pointer */ inline const Object* operator->() const; /** * @brief Downcast this object to its actual type. * This returns nullptr if the object is not of the requested type. * Example usage: * * if (const Banana *banana = obj->as()) { * // This is a Banana! * } * @tparam T the target type, must be subtype of Object */ template inline const T* as() const; /** @brief default constructor */ ObjectRef() = default; explicit ObjectRef(std::shared_ptr obj) : obj_(obj) {} /** @brief the internal object, do not touch */ std::shared_ptr obj_; }; /** * @brief helper macro to declare type information in a base object. * * This is macro should be used in abstract base class definition * because it does not define type_key and type_index. */ #define DGL_DECLARE_BASE_OBJECT_INFO(TypeName, Parent) \ const bool _DerivedFrom(uint32_t tid) const override { \ static uint32_t tidx = TypeKey2Index(TypeName::_type_key); \ if (tidx == tid) return true; \ return Parent::_DerivedFrom(tid); \ } /** * @brief helper macro to declare type information in a terminal class * * This is macro should be used in terminal class definition. * * For example: * * // This class is an abstract class and cannot create instances * class SomeBaseClass : public Object { * public: * static constexpr const char* _type_key = "some_base"; * DGL_DECLARE_BASE_OBJECT_INFO(SomeBaseClass, Object); * }; * * // Child class that allows instantiation * class SomeChildClass : public SomeBaseClass { * public: * static constexpr const char* _type_key = "some_child"; * DGL_DECLARE_OBJECT_TYPE_INFO(SomeChildClass, SomeBaseClass); * }; */ #define DGL_DECLARE_OBJECT_TYPE_INFO(TypeName, Parent) \ const char* type_key() const final { return TypeName::_type_key; } \ uint32_t type_index() const final { \ static uint32_t tidx = TypeKey2Index(TypeName::_type_key); \ return tidx; \ } \ bool _DerivedFrom(uint32_t tid) const final { \ static uint32_t tidx = TypeKey2Index(TypeName::_type_key); \ if (tidx == tid) return true; \ return Parent::_DerivedFrom(tid); \ } /** @brief Macro to generate common object reference class method definition */ #define DGL_DEFINE_OBJECT_REF_METHODS(TypeName, BaseTypeName, ObjectName) \ TypeName() {} \ explicit TypeName(std::shared_ptr obj) \ : BaseTypeName(obj) {} \ const ObjectName* operator->() const { \ return static_cast(obj_.get()); \ } \ ObjectName* operator->() { return static_cast(obj_.get()); } \ std::shared_ptr sptr() const { \ return CHECK_NOTNULL(std::dynamic_pointer_cast(obj_)); \ } \ operator bool() const { return this->defined(); } \ using ContainerType = ObjectName /** @brief Macro to generate object reference class definition */ #define DGL_DEFINE_OBJECT_REF(TypeName, ObjectName) \ class TypeName : public ::dgl::runtime::ObjectRef { \ public: \ DGL_DEFINE_OBJECT_REF_METHODS( \ TypeName, ::dgl::runtime::ObjectRef, ObjectName); \ } // implementations of inline functions after this template inline bool Object::is_type() const { // use static field so query only happens once. static uint32_t type_id = Object::TypeKey2Index(T::_type_key); return type_id == this->type_index(); } template inline bool Object::derived_from() const { // use static field so query only happens once. static uint32_t type_id = Object::TypeKey2Index(T::_type_key); return this->_DerivedFrom(type_id); } inline const Object* ObjectRef::get() const { return obj_.get(); } inline const Object* ObjectRef::operator->() const { return obj_.get(); } inline bool ObjectRef::defined() const { return obj_.get() != nullptr; } inline bool ObjectRef::operator==(const ObjectRef& other) const { return obj_.get() == other.obj_.get(); } inline bool ObjectRef::same_as(const ObjectRef& other) const { return obj_.get() == other.obj_.get(); } inline bool ObjectRef::operator<(const ObjectRef& other) const { return obj_.get() < other.obj_.get(); } inline bool ObjectRef::operator!=(const ObjectRef& other) const { return obj_.get() != other.obj_.get(); } inline size_t ObjectRef::hash() const { return std::hash()(obj_.get()); } inline uint32_t ObjectRef::type_index() const { CHECK(obj_.get() != nullptr) << "null type"; return get()->type_index(); } template inline const T* ObjectRef::as() const { const Object* ptr = get(); if (ptr && ptr->is_type()) { return static_cast(ptr); } return nullptr; } /** @brief The hash function for nodes */ struct ObjectHash { size_t operator()(const ObjectRef& a) const { return a.hash(); } }; /** @brief The equal comparator for nodes */ struct ObjectEqual { bool operator()(const ObjectRef& a, const ObjectRef& b) const { return a.get() == b.get(); } }; } // namespace runtime } // namespace dgl namespace std { template <> struct hash<::dgl::runtime::ObjectRef> { std::size_t operator()(const ::dgl::runtime::ObjectRef& k) const { return k.hash(); } }; } // namespace std #endif // DGL_RUNTIME_OBJECT_H_ ================================================ FILE: include/dgl/runtime/packed_func.h ================================================ /** * Copyright (c) 2017 by Contributors * @file dgl/runtime/packed_func.h * @brief Type-erased function used across DGL API. */ #ifndef DGL_RUNTIME_PACKED_FUNC_H_ #define DGL_RUNTIME_PACKED_FUNC_H_ #include #include #include #include #include #include #include #include #include #include "c_runtime_api.h" #include "module.h" #include "ndarray.h" // Whether use DGL runtime in header only mode. #ifndef DGL_RUNTIME_HEADER_ONLY #define DGL_RUNTIME_HEADER_ONLY 0 #endif namespace dgl { namespace runtime { // Forward declare ObjectRef and Object for extensions. // This header works fine without depend on ObjectRef // as long as it is not used. class Object; class ObjectRef; // forward declarations class DGLArgs; class DGLArgValue; class DGLRetValue; class DGLArgsSetter; /** * @brief Packed function is a type-erased function. * The arguments are passed by packed format. * * This is an useful unified interface to call generated functions, * It is the unified function function type of DGL. * It corresponds to DGLFunctionHandle in C runtime API. */ class PackedFunc { public: /** * @brief The internal std::function * @param args The arguments to the function. * @param rv The return value. * * @code * // Example code on how to implemented FType * void MyPackedFunc(DGLArgs args, DGLRetValue* rv) { * // automatically convert arguments to desired type. * int a0 = args[0]; * float a1 = args[1]; * ... * // automatically assign values to rv * std::string my_return_value = "x"; * *rv = my_return_value; * } * @endcode */ using FType = std::function; /** @brief default constructor */ PackedFunc() {} /** * @brief constructing a packed function from a std::function. * @param body the internal container of packed function. */ explicit PackedFunc(FType body) : body_(body) {} /** * @brief Call packed function by directly passing in unpacked format. * @param args Arguments to be passed. * @tparam Args arguments to be passed. * * @code * // Example code on how to call packed function * void CallPacked(PackedFunc f) { * // call like normal functions by pass in arguments * // return value is automatically converted back * int rvalue = f(1, 2.0); * } * @endcode */ template inline DGLRetValue operator()(Args&&... args) const; /** * @brief Call the function in packed format. * @param args The arguments * @param rv The return value. */ inline void CallPacked(DGLArgs args, DGLRetValue* rv) const; /** @return the internal body function */ inline FType body() const; /** @return Whether the packed function is nullptr */ bool operator==(std::nullptr_t null) const { return body_ == nullptr; } /** @return Whether the packed function is not nullptr */ bool operator!=(std::nullptr_t null) const { return body_ != nullptr; } private: /** @brief internal container of packed function */ FType body_; }; /** * @brief Please refer to \ref TypedPackedFuncAnchor * "TypedPackedFunc" */ template class TypedPackedFunc; /** * @anchor TypedPackedFuncAnchor * @brief A PackedFunc wrapper to provide typed function signature. * It is backed by a PackedFunc internally. * * TypedPackedFunc enables compile time type checking. * TypedPackedFunc works with the runtime system: * - It can be passed as an argument of PackedFunc. * - It can be assigned to DGLRetValue. * - It can be directly converted to a type-erased PackedFunc. * * Developers should prefer TypedPackedFunc over PackedFunc in C++ code * as it enables compile time checking. * We can construct a TypedPackedFunc from a lambda function * with the same signature. * * @code * // user defined lambda function. * auto addone = [](int x)->int { * return x + 1; * }; * // We can directly convert * // lambda function to TypedPackedFunc * TypedPackedFunc ftyped(addone); * // invoke the function. * int y = ftyped(1); * // Can be directly converted to PackedFunc * PackedFunc packed = ftype; * @endcode * @tparam R The return value of the function. * @tparam Args The argument signature of the function. */ template class TypedPackedFunc { public: /** @brief short hand for this function type */ using TSelf = TypedPackedFunc; /** @brief default constructor */ TypedPackedFunc() {} /** * @brief construct by wrap a PackedFunc * * Example usage: * @code * PackedFunc packed([](DGLArgs args, DGLRetValue *rv) { * int x = args[0]; * *rv = x + 1; * }); * // construct from packed function * TypedPackedFunc ftyped(packed); * // call the typed version. * CHECK_EQ(ftyped(1), 2); * @endcode * * @param packed The packed function */ inline explicit TypedPackedFunc(PackedFunc packed); /** * @brief construct from a lambda function with the same signature. * * Example usage: * @code * auto typed_lambda = [](int x)->int { return x + 1; } * // construct from packed function * TypedPackedFunc ftyped(typed_lambda); * // call the typed version. * CHECK_EQ(ftyped(1), 2); * @endcode * * @param typed_lambda typed lambda function. * @tparam FLambda the type of the lambda function. */ template < typename FLambda, typename = typename std::enable_if >::value>::type> explicit TypedPackedFunc(const FLambda& typed_lambda) { this->AssignTypedLambda(typed_lambda); } /** * @brief copy assignment operator from typed lambda * * Example usage: * @code * // construct from packed function * TypedPackedFunc ftyped; * ftyped = [](int x) { return x + 1; } * // call the typed version. * CHECK_EQ(ftyped(1), 2); * @endcode * * @param typed_lambda typed lambda function. * @tparam FLambda the type of the lambda function. * @returns reference to self. */ template < typename FLambda, typename = typename std::enable_if >::value>::type> TSelf& operator=(FLambda typed_lambda) { // NOLINT(*) this->AssignTypedLambda(typed_lambda); return *this; } /** * @brief copy assignment operator from PackedFunc. * @param packed The packed function. * @returns reference to self. */ TSelf& operator=(PackedFunc packed) { packed_ = packed; return *this; } /** * @brief Invoke the operator. * @param args The arguments * @returns The return value. */ inline R operator()(Args... args) const; /** * @brief convert to PackedFunc * @return the internal PackedFunc */ operator PackedFunc() const { return packed(); } /** * @return reference the internal PackedFunc */ const PackedFunc& packed() const { return packed_; } private: friend class DGLRetValue; /** @brief The internal packed function */ PackedFunc packed_; /** * @brief Assign the packed field using a typed lambda function. * * @param flambda The lambda function. * @tparam FLambda The lambda function type. * @note We capture the lambda when possible for maximum efficiency. */ template inline void AssignTypedLambda(FLambda flambda); }; /** @brief Arguments into DGL functions. */ class DGLArgs { public: const DGLValue* values; const int* type_codes; int num_args; /** * @brief constructor * @param values The argument values * @param type_codes The argument type codes * @param num_args number of arguments. */ DGLArgs(const DGLValue* values, const int* type_codes, int num_args) : values(values), type_codes(type_codes), num_args(num_args) {} /** @return size of the arguments */ inline int size() const; /** * @brief Get i-th argument * @param i the index. * @return the ith argument. */ inline DGLArgValue operator[](int i) const; }; /** * @brief Type traits to mark if a class is dgl extension type. * * To enable extension type in C++ must be register () ed via marco. * DGL_REGISTER_EXT_TYPE(TypeName) after defining this with this traits. * * Extension class can be passed and returned via PackedFunc in all dgl runtime. * Internally extension class is stored as T*. * * @tparam T the typename */ template struct extension_class_info { static const int code = 0; }; /** * @brief Runtime function table about extension type. */ class ExtTypeVTable { public: /** @brief function to be called to delete a handle */ void (*destroy)(void* handle); /** @brief function to be called when clone a handle */ void* (*clone)(void* handle); /** * @brief Register type * @tparam T The type to be register. * @return The registered vtable. */ template static inline ExtTypeVTable* Register_(); /** * @brief Get a vtable based on type code. * @param type_code The type code * @return The registered vtable. */ DGL_DLL static ExtTypeVTable* Get(int type_code); private: // Internal registration function. DGL_DLL static ExtTypeVTable* RegisterInternal( int type_code, const ExtTypeVTable& vt); }; /** * @brief Internal base class to * handle conversion to POD values. */ class DGLPODValue_ { public: operator double() const { // Allow automatic conversion from int to float // This avoids errors when user pass in int from // the frontend while the API expects a float. if (type_code_ == kDGLInt) { return static_cast(value_.v_int64); } DGL_CHECK_TYPE_CODE(type_code_, kDGLFloat); return value_.v_float64; } operator int64_t() const { DGL_CHECK_TYPE_CODE(type_code_, kDGLInt); return value_.v_int64; } operator uint64_t() const { DGL_CHECK_TYPE_CODE(type_code_, kDGLInt); return value_.v_int64; } operator int() const { DGL_CHECK_TYPE_CODE(type_code_, kDGLInt); CHECK_LE(value_.v_int64, std::numeric_limits::max()); return static_cast(value_.v_int64); } operator bool() const { DGL_CHECK_TYPE_CODE(type_code_, kDGLInt); return value_.v_int64 != 0; } operator void*() const { if (type_code_ == kNull) return nullptr; if (type_code_ == kArrayHandle) return value_.v_handle; DGL_CHECK_TYPE_CODE(type_code_, kHandle); return value_.v_handle; } operator DGLArray*() const { if (type_code_ == kArrayHandle || type_code_ == kNDArrayContainer) { return static_cast(value_.v_handle); } else { if (type_code_ == kNull) return nullptr; LOG(FATAL) << "Expected " << "DGLArray* or NDArray but get " << TypeCode2Str(type_code_); return nullptr; } } operator NDArray() const { if (type_code_ == kNull) return NDArray(); DGL_CHECK_TYPE_CODE(type_code_, kNDArrayContainer); return NDArray(static_cast(value_.v_handle)); } operator DGLContext() const { DGL_CHECK_TYPE_CODE(type_code_, kDGLContext); return value_.v_ctx; } template const TExtension& AsExtension() const { CHECK_LT(type_code_, kExtEnd); return static_cast(value_.v_handle)[0]; } int type_code() const { return type_code_; } /** * @brief return handle as specific pointer type. * @tparam T the data type. * @return The pointer type. */ template T* ptr() const { return static_cast(value_.v_handle); } protected: friend class DGLArgsSetter; friend class DGLRetValue; DGLPODValue_() : type_code_(kNull) {} DGLPODValue_(DGLValue value, int type_code) : value_(value), type_code_(type_code) {} /** @brief The value */ DGLValue value_; /** @brief the type code */ int type_code_; }; /** * @brief A single argument value to PackedFunc. * Containing both type_code and DGLValue * * Provides utilities to do type cast into other types. */ class DGLArgValue : public DGLPODValue_ { public: /** @brief default constructor */ DGLArgValue() {} /** * @brief constructor * @param value of the function * @param type_code The type code. */ DGLArgValue(DGLValue value, int type_code) : DGLPODValue_(value, type_code) {} // reuse converter from parent using DGLPODValue_::operator double; using DGLPODValue_::operator int64_t; using DGLPODValue_::operator uint64_t; using DGLPODValue_::operator int; using DGLPODValue_::operator bool; using DGLPODValue_::operator void*; using DGLPODValue_::operator DGLArray*; using DGLPODValue_::operator NDArray; using DGLPODValue_::operator DGLContext; // conversion operator. operator std::string() const { if (type_code_ == kDGLDataType) { return DGLDataType2String(operator DGLDataType()); } else if (type_code_ == kBytes) { DGLByteArray* arr = static_cast(value_.v_handle); return std::string(arr->data, arr->size); } else { DGL_CHECK_TYPE_CODE(type_code_, kStr); return std::string(value_.v_str); } } operator DGLDataType() const { if (type_code_ == kStr) { return String2DGLDataType(operator std::string()); } DGL_CHECK_TYPE_CODE(type_code_, kDGLDataType); return value_.v_type; } operator PackedFunc() const { if (type_code_ == kNull) return PackedFunc(); DGL_CHECK_TYPE_CODE(type_code_, kFuncHandle); return *ptr(); } template operator TypedPackedFunc() const { return TypedPackedFunc(operator PackedFunc()); } operator Module() const { DGL_CHECK_TYPE_CODE(type_code_, kModuleHandle); return *ptr(); } const DGLValue& value() const { return value_; } // Deferred extension handler. template inline TObjectRef AsObjectRef() const; // Convert this value to arbitrary class type template < typename T, typename = typename std::enable_if::value>::type> inline operator T() const; // Return true if the value is of TObjectRef type template < typename TObjectRef, typename = typename std::enable_if< std::is_class::value>::type> inline bool IsObjectType() const; // get internal node ptr, if it is node inline std::shared_ptr& obj_sptr(); }; /** * @brief Return Value container, * Unlike DGLArgValue, which only holds reference and do not delete * the underlying container during destruction. * * DGLRetValue holds value and will manage the underlying containers * when it stores a complicated data type. */ class DGLRetValue : public DGLPODValue_ { public: /** @brief default constructor */ DGLRetValue() {} /** * @brief move constructor from anoter return value. * @param other The other return value. */ DGLRetValue(DGLRetValue&& other) : DGLPODValue_(other.value_, other.type_code_) { other.value_.v_handle = nullptr; other.type_code_ = kNull; } /** @brief destructor */ ~DGLRetValue() { this->Clear(); } // reuse converter from parent using DGLPODValue_::operator double; using DGLPODValue_::operator int64_t; using DGLPODValue_::operator uint64_t; using DGLPODValue_::operator int; using DGLPODValue_::operator bool; using DGLPODValue_::operator void*; using DGLPODValue_::operator DGLArray*; using DGLPODValue_::operator DGLContext; using DGLPODValue_::operator NDArray; // Disable copy and assign from another value, but allow move. DGLRetValue(const DGLRetValue& other) { this->Assign(other); } // conversion operators operator std::string() const { if (type_code_ == kDGLDataType) { return DGLDataType2String(operator DGLDataType()); } else if (type_code_ == kBytes) { return *ptr(); } DGL_CHECK_TYPE_CODE(type_code_, kStr); return *ptr(); } operator DGLDataType() const { if (type_code_ == kStr) { return String2DGLDataType(operator std::string()); } DGL_CHECK_TYPE_CODE(type_code_, kDGLDataType); return value_.v_type; } operator PackedFunc() const { if (type_code_ == kNull) return PackedFunc(); DGL_CHECK_TYPE_CODE(type_code_, kFuncHandle); return *ptr(); } template operator TypedPackedFunc() const { return TypedPackedFunc(operator PackedFunc()); } operator Module() const { DGL_CHECK_TYPE_CODE(type_code_, kModuleHandle); return *ptr(); } // Assign operators DGLRetValue& operator=(DGLRetValue&& other) { this->Clear(); value_ = other.value_; type_code_ = other.type_code_; other.type_code_ = kNull; return *this; } DGLRetValue& operator=(double value) { this->SwitchToPOD(kDGLFloat); value_.v_float64 = value; return *this; } DGLRetValue& operator=(std::nullptr_t value) { this->SwitchToPOD(kNull); value_.v_handle = value; return *this; } DGLRetValue& operator=(void* value) { this->SwitchToPOD(kHandle); value_.v_handle = value; return *this; } DGLRetValue& operator=(int64_t value) { this->SwitchToPOD(kDGLInt); value_.v_int64 = value; return *this; } DGLRetValue& operator=(int value) { this->SwitchToPOD(kDGLInt); value_.v_int64 = value; return *this; } DGLRetValue& operator=(DGLDataType t) { this->SwitchToPOD(kDGLDataType); value_.v_type = t; return *this; } DGLRetValue& operator=(DGLContext ctx) { this->SwitchToPOD(kDGLContext); value_.v_ctx = ctx; return *this; } DGLRetValue& operator=(bool value) { this->SwitchToPOD(kDGLInt); value_.v_int64 = value; return *this; } DGLRetValue& operator=(std::string value) { this->SwitchToClass(kStr, value); return *this; } DGLRetValue& operator=(DGLByteArray value) { this->SwitchToClass(kBytes, std::string(value.data, value.size)); return *this; } DGLRetValue& operator=(NDArray other) { this->Clear(); type_code_ = kNDArrayContainer; value_.v_handle = other.data_; other.data_ = nullptr; return *this; } DGLRetValue& operator=(PackedFunc f) { this->SwitchToClass(kFuncHandle, f); return *this; } template DGLRetValue& operator=(const TypedPackedFunc& f) { return operator=(f.packed()); } DGLRetValue& operator=(Module m) { this->SwitchToClass(kModuleHandle, m); return *this; } DGLRetValue& operator=(const DGLRetValue& other) { // NOLINT(*0 this->Assign(other); return *this; } DGLRetValue& operator=(const DGLArgValue& other) { this->Assign(other); return *this; } template < typename T, typename = typename std::enable_if< extension_class_info::code != 0>::type> DGLRetValue& operator=(const T& other) { this->SwitchToClass(extension_class_info::code, other); return *this; } /** * @brief Move the value back to front-end via C API. * This marks the current container as null. * The managed resources is moved to front-end and * the front end should take charge in managing them. * * @param ret_value The return value. * @param ret_type_code The return type code. */ void MoveToCHost(DGLValue* ret_value, int* ret_type_code) { // cannot move str; need specially handle. CHECK(type_code_ != kStr && type_code_ != kBytes); *ret_value = value_; *ret_type_code = type_code_; type_code_ = kNull; } /** @return The value field, if the data is POD */ const DGLValue& value() const { CHECK( type_code_ != kObjectHandle && type_code_ != kFuncHandle && type_code_ != kModuleHandle && type_code_ != kStr) << "DGLRetValue.value can only be used for POD data"; return value_; } // ObjectRef related extenstions: in dgl/packed_func_ext.h template < typename T, typename = typename std::enable_if::value>::type> inline operator T() const; template inline TObjectRef AsObjectRef() const; inline DGLRetValue& operator=(const ObjectRef& other); inline DGLRetValue& operator=(const std::shared_ptr& other); private: template void Assign(const T& other) { switch (other.type_code()) { case kStr: { SwitchToClass(kStr, other); break; } case kBytes: { SwitchToClass(kBytes, other); break; } case kFuncHandle: { SwitchToClass(kFuncHandle, other); break; } case kModuleHandle: { SwitchToClass(kModuleHandle, other); break; } case kNDArrayContainer: { *this = other.operator NDArray(); break; } case kObjectHandle: { SwitchToClass >( kObjectHandle, *other.template ptr >()); break; } default: { if (other.type_code() < kExtBegin) { SwitchToPOD(other.type_code()); value_ = other.value_; } else { #if DGL_RUNTIME_HEADER_ONLY LOG(FATAL) << "Header only mode do not support ext type"; #else this->Clear(); type_code_ = other.type_code(); value_.v_handle = (*(ExtTypeVTable::Get(other.type_code())->clone))( other.value().v_handle); #endif } break; } } } // get the internal container. void SwitchToPOD(int type_code) { if (type_code_ != type_code) { this->Clear(); type_code_ = type_code; } } template void SwitchToClass(int type_code, T v) { if (type_code_ != type_code) { this->Clear(); type_code_ = type_code; value_.v_handle = new T(v); } else { *static_cast(value_.v_handle) = v; } } void Clear() { if (type_code_ == kNull) return; switch (type_code_) { case kStr: case kBytes: delete ptr(); break; case kFuncHandle: delete ptr(); break; case kModuleHandle: delete ptr(); break; case kObjectHandle: delete ptr >(); break; case kNDArrayContainer: { static_cast(value_.v_handle)->DecRef(); break; } } if (type_code_ > kExtBegin) { #if DGL_RUNTIME_HEADER_ONLY LOG(FATAL) << "Header only mode do not support ext type"; #else (*(ExtTypeVTable::Get(type_code_)->destroy))(value_.v_handle); #endif } type_code_ = kNull; } }; // implementation details inline DGLArgValue DGLArgs::operator[](int i) const { CHECK_LT(i, num_args) << "not enough argument passed, " << num_args << " passed" << " but request arg[" << i << "]."; return DGLArgValue(values[i], type_codes[i]); } inline int DGLArgs::size() const { return num_args; } inline void PackedFunc::CallPacked(DGLArgs args, DGLRetValue* rv) const { body_(args, rv); } inline PackedFunc::FType PackedFunc::body() const { return body_; } // internal namespace namespace detail { template struct for_each_dispatcher { template static void run(const F& f, T&& value, Args&&... args) { // NOLINT(*) f(I, std::forward(value)); for_each_dispatcher::run( f, std::forward(args)...); } }; template struct for_each_dispatcher { static void run(const F& f) {} // NOLINT(*) }; template inline void for_each(const F& f, Args&&... args) { // NOLINT(*) for_each_dispatcher::run( f, std::forward(args)...); } } // namespace detail /* @brief argument settter to PackedFunc */ class DGLArgsSetter { public: DGLArgsSetter(DGLValue* values, int* type_codes) : values_(values), type_codes_(type_codes) {} // setters for POD types template < typename T, typename = typename std::enable_if::value>::type> void operator()(size_t i, T value) const { values_[i].v_int64 = static_cast(value); type_codes_[i] = kDGLInt; } void operator()(size_t i, uint64_t value) const { values_[i].v_int64 = static_cast(value); CHECK_LE(value, static_cast(std::numeric_limits::max())); type_codes_[i] = kDGLInt; } void operator()(size_t i, double value) const { values_[i].v_float64 = value; type_codes_[i] = kDGLFloat; } void operator()(size_t i, std::nullptr_t value) const { values_[i].v_handle = value; type_codes_[i] = kNull; } void operator()(size_t i, const DGLArgValue& value) const { values_[i] = value.value_; type_codes_[i] = value.type_code_; } void operator()(size_t i, void* value) const { values_[i].v_handle = value; type_codes_[i] = kHandle; } void operator()(size_t i, DGLArray* value) const { values_[i].v_handle = value; type_codes_[i] = kArrayHandle; } void operator()(size_t i, DGLContext value) const { values_[i].v_ctx = value; type_codes_[i] = kDGLContext; } void operator()(size_t i, DGLDataType value) const { values_[i].v_type = value; type_codes_[i] = kDGLDataType; } void operator()(size_t i, const char* value) const { values_[i].v_str = value; type_codes_[i] = kStr; } // setters for container type // They must be reference(instead of const ref) // to make sure they are alive in the tuple(instead of getting converted) void operator()(size_t i, const std::string& value) const { // NOLINT(*) values_[i].v_str = value.c_str(); type_codes_[i] = kStr; } void operator()(size_t i, const DGLByteArray& value) const { // NOLINT(*) values_[i].v_handle = const_cast(&value); type_codes_[i] = kBytes; } void operator()(size_t i, const PackedFunc& value) const { // NOLINT(*) values_[i].v_handle = const_cast(&value); type_codes_[i] = kFuncHandle; } template void operator()( size_t i, const TypedPackedFunc& value) const { // NOLINT(*) operator()(i, value.packed()); } void operator()(size_t i, const Module& value) const { // NOLINT(*) values_[i].v_handle = const_cast(&value); type_codes_[i] = kModuleHandle; } void operator()(size_t i, const NDArray& value) const { // NOLINT(*) values_[i].v_handle = value.data_; type_codes_[i] = kNDArrayContainer; } void operator()(size_t i, const DGLRetValue& value) const { // NOLINT(*) if (value.type_code() == kStr) { values_[i].v_str = value.ptr()->c_str(); type_codes_[i] = kStr; } else { CHECK_NE(value.type_code(), kBytes) << "not handled."; values_[i] = value.value_; type_codes_[i] = value.type_code(); } } // extension template < typename T, typename = typename std::enable_if< extension_class_info::code != 0>::type> inline void operator()(size_t i, const T& value) const; // ObjectRef related extenstions: in dgl/packed_func_ext.h inline void operator()(size_t i, const ObjectRef& other) const; // NOLINT(*) private: /** @brief The values fields */ DGLValue* values_; /** @brief The type code fields */ int* type_codes_; }; template inline DGLRetValue PackedFunc::operator()(Args&&... args) const { const int kNumArgs = sizeof...(Args); const int kArraySize = kNumArgs > 0 ? kNumArgs : 1; DGLValue values[kArraySize]; int type_codes[kArraySize]; detail::for_each( DGLArgsSetter(values, type_codes), std::forward(args)...); DGLRetValue rv; body_(DGLArgs(values, type_codes, kNumArgs), &rv); return rv; } namespace detail { template struct unpack_call_dispatcher { template static void run( const F& f, const DGLArgs& args_pack, DGLRetValue* rv, Args&&... unpacked_args) { unpack_call_dispatcher::run( f, args_pack, rv, std::forward(unpacked_args)..., args_pack[index]); } }; template struct unpack_call_dispatcher { template static void run( const F& f, const DGLArgs& args_pack, DGLRetValue* rv, Args&&... unpacked_args) { *rv = R(f(std::forward(unpacked_args)...)); } }; template struct unpack_call_dispatcher { template static void run( const F& f, const DGLArgs& args_pack, DGLRetValue* rv, Args&&... unpacked_args) { f(std::forward(unpacked_args)...); } }; template inline void unpack_call(const F& f, const DGLArgs& args, DGLRetValue* rv) { unpack_call_dispatcher::run(f, args, rv); } template inline R call_packed(const PackedFunc& pf, Args&&... args) { return R(pf(std::forward(args)...)); } template struct typed_packed_call_dispatcher { template static inline R run(const PackedFunc& pf, Args&&... args) { return pf(std::forward(args)...); } }; template <> struct typed_packed_call_dispatcher { template static inline void run(const PackedFunc& pf, Args&&... args) { pf(std::forward(args)...); } }; } // namespace detail template TypedPackedFunc::TypedPackedFunc(PackedFunc packed) : packed_(packed) {} template template inline void TypedPackedFunc::AssignTypedLambda(FType flambda) { packed_ = PackedFunc([flambda](const DGLArgs& args, DGLRetValue* rv) { detail::unpack_call(flambda, args, rv); }); } template inline R TypedPackedFunc::operator()(Args... args) const { return detail::typed_packed_call_dispatcher::run( packed_, std::forward(args)...); } // extension and node type handling namespace detail { template struct DGLValueCast { static T Apply(const TSrc* self) { return self->template AsObjectRef(); } }; template struct DGLValueCast { static T Apply(const TSrc* self) { return self->template AsExtension(); } }; } // namespace detail template inline DGLArgValue::operator T() const { return detail::DGLValueCast< T, DGLArgValue, extension_class_info::code != 0>::Apply(this); } template inline DGLRetValue::operator T() const { return detail::DGLValueCast< T, DGLRetValue, extension_class_info::code != 0>::Apply(this); } template inline void DGLArgsSetter::operator()(size_t i, const T& value) const { static_assert( extension_class_info::code != 0, "Need to have extesion code"); type_codes_[i] = extension_class_info::code; values_[i].v_handle = const_cast(&value); } // extension type handling template struct ExtTypeInfo { static void destroy(void* handle) { delete static_cast(handle); } static void* clone(void* handle) { return new T(*static_cast(handle)); } }; template inline ExtTypeVTable* ExtTypeVTable::Register_() { const int code = extension_class_info::code; static_assert( code != 0, "require extension_class_info traits to be declared with non-zero code"); ExtTypeVTable vt; vt.clone = ExtTypeInfo::clone; vt.destroy = ExtTypeInfo::destroy; return ExtTypeVTable::RegisterInternal(code, vt); } // Implement Module::GetFunction // Put implementation in this file so we have seen the PackedFunc inline PackedFunc Module::GetFunction( const std::string& name, bool query_imports) { PackedFunc pf = node_->GetFunction(name, node_); if (pf != nullptr) return pf; if (query_imports) { for (const Module& m : node_->imports_) { pf = m.node_->GetFunction(name, m.node_); if (pf != nullptr) return pf; } } return pf; } } // namespace runtime } // namespace dgl #endif // DGL_RUNTIME_PACKED_FUNC_H_ ================================================ FILE: include/dgl/runtime/parallel_for.h ================================================ /** * Copyright (c) 2021 by Contributors * @file runtime/container.h * @brief Defines the container object data structures. */ #ifndef DGL_RUNTIME_PARALLEL_FOR_H_ #define DGL_RUNTIME_PARALLEL_FOR_H_ #include #include #include #include #include #include #include #include #include namespace { int64_t divup(int64_t x, int64_t y) { return (x + y - 1) / y; } } // namespace namespace dgl { namespace runtime { namespace { struct DefaultGrainSizeT { size_t grain_size; DefaultGrainSizeT() : DefaultGrainSizeT(1) {} explicit DefaultGrainSizeT(size_t default_grain_size) { auto var = dgl::kDGLParallelForGrainSize; if (var) { grain_size = std::stoul(var); } else { grain_size = default_grain_size; } } size_t operator()() { return grain_size; } }; } // namespace inline size_t compute_num_threads(size_t begin, size_t end, size_t grain_size) { #ifdef _OPENMP if (omp_in_parallel() || end - begin <= grain_size || end - begin == 1) return 1; return std::min( static_cast(omp_get_max_threads()), divup(end - begin, grain_size)); #else return 1; #endif } static DefaultGrainSizeT default_grain_size; /** * @brief OpenMP-based parallel for loop. * * It requires each thread's workload to have at least \a grain_size elements. * The loop body will be a function that takes in two arguments \a begin and \a * end, which stands for the starting (inclusive) and ending index (exclusive) * of the workload. */ template void parallel_for( const size_t begin, const size_t end, const size_t grain_size, F&& f) { if (begin >= end) { return; } #ifdef _OPENMP auto num_threads = compute_num_threads(begin, end, grain_size); // (BarclayII) the exception code is borrowed from PyTorch. std::atomic_flag err_flag = ATOMIC_FLAG_INIT; std::exception_ptr eptr; #pragma omp parallel num_threads(num_threads) { auto tid = omp_get_thread_num(); auto chunk_size = divup((end - begin), num_threads); auto begin_tid = begin + tid * chunk_size; if (begin_tid < end) { auto end_tid = std::min(end, static_cast(chunk_size + begin_tid)); try { f(begin_tid, end_tid); } catch (...) { if (!err_flag.test_and_set()) eptr = std::current_exception(); } } } if (eptr) std::rethrow_exception(eptr); #else f(begin, end); #endif } /** * @brief OpenMP-based parallel for loop with default grain size. * * parallel_for with grain size to default value, either 1 or controlled through * environment variable DGL_PARALLEL_FOR_GRAIN_SIZE. * If grain size is set to 1, the function behaves the same way as OpenMP * parallel for pragma with static scheduling. */ template void parallel_for(const size_t begin, const size_t end, F&& f) { parallel_for(begin, end, default_grain_size(), std::forward(f)); } /** * @brief OpenMP-based two-stage parallel reduction. * * The first-stage reduction function \a f works in parallel. Each thread's * workload has at least \a grain_size elements. The loop body will be a * function that takes in the starting index (inclusive), the ending index * (exclusive), and the reduction identity. * * The second-stage reduction function \a sf is a binary function working in the * main thread. It aggregates the partially reduced result computed from each * thread. * * Example to compute a parallelized max reduction of an array \c a: * * parallel_reduce( * 0, // starting index * 100, // ending index * 1, // grain size * -std::numeric_limits::infinity, // identity * [&a] (int begin, int end, float ident) { // first-stage partial * reducer float result = ident; for (int i = begin; i < end; ++i) result = * std::max(result, a[i]); return result; * }, * [] (float result, float partial_result) { * return std::max(result, partial_result); * }); */ template DType parallel_reduce( const size_t begin, const size_t end, const size_t grain_size, const DType ident, const F& f, const SF& sf) { if (begin >= end) { return ident; } int num_threads = compute_num_threads(begin, end, grain_size); if (num_threads == 1) { return f(begin, end, ident); } std::vector results(num_threads, ident); std::atomic_flag err_flag = ATOMIC_FLAG_INIT; std::exception_ptr eptr; #pragma omp parallel num_threads(num_threads) { auto tid = omp_get_thread_num(); auto chunk_size = divup((end - begin), num_threads); auto begin_tid = begin + tid * chunk_size; if (begin_tid < end) { auto end_tid = std::min(end, static_cast(chunk_size + begin_tid)); try { results[tid] = f(begin_tid, end_tid, ident); } catch (...) { if (!err_flag.test_and_set()) eptr = std::current_exception(); } } } if (eptr) std::rethrow_exception(eptr); DType out = ident; for (int64_t i = 0; i < num_threads; ++i) out = sf(out, results[i]); return out; } } // namespace runtime } // namespace dgl #endif // DGL_RUNTIME_PARALLEL_FOR_H_ ================================================ FILE: include/dgl/runtime/registry.h ================================================ /** * Copyright (c) 2017 by Contributors * @file dgl/runtime/registry.h * @brief This file defines the DGL global function registry. * * The registered functions will be made available to front-end * as well as backend users. * * The registry stores type-erased functions. * Each registered function is automatically exposed * to front-end language(e.g. python). * * Front-end can also pass callbacks as PackedFunc, or register * then into the same global registry in C++. * The goal is to mix the front-end language and the DGL back-end. * * @code * // register the function as MyAPIFuncName * DGL_REGISTER_GLOBAL(MyAPIFuncName) * .set_body([](DGLArgs args, DGLRetValue* rv) { * // my code. * }); * @endcode */ #ifndef DGL_RUNTIME_REGISTRY_H_ #define DGL_RUNTIME_REGISTRY_H_ #include #include #include "packed_func.h" namespace dgl { namespace runtime { /** @brief Registry for global function */ class Registry { public: /** * @brief set the body of the function to be f * @param f The body of the function. */ DGL_DLL Registry& set_body(PackedFunc f); // NOLINT(*) /** * @brief set the body of the function to be f * @param f The body of the function. */ Registry& set_body(PackedFunc::FType f) { // NOLINT(*) return set_body(PackedFunc(f)); } /** * @brief set the body of the function to be TypedPackedFunc. * * @code * * DGL_REGISTER_API("addone") * .set_body_typed([](int x) { return x + 1; }); * * @endcode * * @param f The body of the function. * @tparam FType the signature of the function. * @tparam FLambda The type of f. */ template Registry& set_body_typed(FLambda f) { return set_body(TypedPackedFunc(f).packed()); } /** * @brief Register a function with given name * @param name The name of the function. * @param override Whether allow oveeride existing function. * @return Reference to theregistry. */ DGL_DLL static Registry& Register( const std::string& name, bool override = false); // NOLINT(*) /** * @brief Erase global function from registry, if exist. * @param name The name of the function. * @return Whether function exist. */ DGL_DLL static bool Remove(const std::string& name); /** * @brief Get the global function by name. * @param name The name of the function. * @return pointer to the registered function, * nullptr if it does not exist. */ DGL_DLL static const PackedFunc* Get(const std::string& name); // NOLINT(*) /** * @brief Get the names of currently registered global function. * @return The names */ DGL_DLL static std::vector ListNames(); // Internal class. struct Manager; protected: /** @brief name of the function */ std::string name_; /** @brief internal packed function */ PackedFunc func_; friend struct Manager; }; /** @brief helper macro to supress unused warning */ #if defined(__GNUC__) #define DGL_ATTRIBUTE_UNUSED __attribute__((unused)) #else #define DGL_ATTRIBUTE_UNUSED #endif #define DGL_STR_CONCAT_(__x, __y) __x##__y #define DGL_STR_CONCAT(__x, __y) DGL_STR_CONCAT_(__x, __y) #define DGL_FUNC_REG_VAR_DEF \ static DGL_ATTRIBUTE_UNUSED ::dgl::runtime::Registry& __mk_##DGL #define DGL_TYPE_REG_VAR_DEF \ static DGL_ATTRIBUTE_UNUSED ::dgl::runtime::ExtTypeVTable* __mk_##DGLT /** * @brief Register a function globally. * @code * DGL_REGISTER_GLOBAL("MyPrint") * .set_body([](DGLArgs args, DGLRetValue* rv) { * }); * @endcode */ #define DGL_REGISTER_GLOBAL(OpName) \ DGL_STR_CONCAT(DGL_FUNC_REG_VAR_DEF, __COUNTER__) = \ ::dgl::runtime::Registry::Register(OpName) /** * @brief Macro to register extension type. * This must be registered in a cc file * after the trait extension_class_info is defined. */ #define DGL_REGISTER_EXT_TYPE(T) \ DGL_STR_CONCAT(DGL_TYPE_REG_VAR_DEF, __COUNTER__) = \ ::dgl::runtime::ExtTypeVTable::Register_() } // namespace runtime } // namespace dgl #endif // DGL_RUNTIME_REGISTRY_H_ ================================================ FILE: include/dgl/runtime/serializer.h ================================================ /** * Copyright (c) 2017 by Contributors * @file dgl/runtime/serializer.h * @brief Serializer extension to support DGL data types * Include this file to enable serialization of DGLDataType, DGLContext */ #ifndef DGL_RUNTIME_SERIALIZER_H_ #define DGL_RUNTIME_SERIALIZER_H_ #include #include #include "c_runtime_api.h" #include "smart_ptr_serializer.h" namespace dmlc { namespace serializer { template <> struct Handler { inline static void Write(Stream *strm, const DGLDataType &dtype) { Handler::Write(strm, dtype.code); Handler::Write(strm, dtype.bits); Handler::Write(strm, dtype.lanes); } inline static bool Read(Stream *strm, DGLDataType *dtype) { if (!Handler::Read(strm, &(dtype->code))) return false; if (!Handler::Read(strm, &(dtype->bits))) return false; if (!Handler::Read(strm, &(dtype->lanes))) return false; return true; } }; template <> struct Handler { inline static void Write(Stream *strm, const DGLContext &ctx) { int32_t device_type = static_cast(ctx.device_type); Handler::Write(strm, device_type); Handler::Write(strm, ctx.device_id); } inline static bool Read(Stream *strm, DGLContext *ctx) { int32_t device_type = 0; if (!Handler::Read(strm, &(device_type))) return false; ctx->device_type = static_cast(device_type); if (!Handler::Read(strm, &(ctx->device_id))) return false; return true; } }; } // namespace serializer } // namespace dmlc #endif // DGL_RUNTIME_SERIALIZER_H_ ================================================ FILE: include/dgl/runtime/shared_mem.h ================================================ /** * Copyright (c) 2017 by Contributors * @file dgl/runtime/ndarray.h * @brief shared memory management. */ #ifndef DGL_RUNTIME_SHARED_MEM_H_ #define DGL_RUNTIME_SHARED_MEM_H_ #ifdef _WIN32 #include #endif // _WIN32 #include namespace dgl { namespace runtime { /** * @brief This class owns shared memory. * * When the object is gone, the shared memory will also be destroyed. * When the shared memory is destroyed, the file corresponding to * the shared memory is removed. */ class SharedMemory { /** * @brief whether the shared memory is owned by the object. * * If shared memory is created in the object, it'll be owned by the object * and will be responsible for deleting it when the object is destroyed. */ bool own_; /* @brief the file descripter of the shared memory. */ #ifndef _WIN32 int fd_; #else // !_WIN32 HANDLE handle_; #endif // _WIN32 /* @brief the address of the shared memory. */ void *ptr_; /* @brief the size of the shared memory. */ size_t size_; /** * @brief the name of the object. * * In Unix, shared memory is identified by a file. Thus, `name` is actually * the file name that identifies the shared memory. */ std::string name; public: /* @brief Get the filename of shared memory file */ std::string GetName() const { return name; } /** * @brief constructor of the shared memory. * @param name The file corresponding to the shared memory. */ explicit SharedMemory(const std::string &name); /** * @brief destructor of the shared memory. * It deallocates the shared memory and removes the corresponding file. */ ~SharedMemory(); /** * @brief create shared memory. * It creates the file and shared memory. * @param sz the size of the shared memory. * @return the address of the shared memory */ void *CreateNew(size_t sz); /** * @brief allocate shared memory that has been created. * @param sz the size of the shared memory. * @return the address of the shared memory */ void *Open(size_t sz); /** * @brief check if the shared memory exist. * @param name the name of the shared memory. * @return a boolean value to indicate if the shared memory exists. */ static bool Exist(const std::string &name); }; } // namespace runtime } // namespace dgl #endif // DGL_RUNTIME_SHARED_MEM_H_ ================================================ FILE: include/dgl/runtime/smart_ptr_serializer.h ================================================ /** * Copyright (c) 2017 by Contributors * @file dgl/runtime/serializer.h * @brief Serializer extension to support DGL data types * Include this file to enable serialization of DGLDataType, DGLContext */ #ifndef DGL_RUNTIME_SMART_PTR_SERIALIZER_H_ #define DGL_RUNTIME_SMART_PTR_SERIALIZER_H_ #include #include #include #include namespace dmlc { namespace serializer { //! \cond Doxygen_Suppress template struct Handler> { inline static void Write(Stream *strm, const std::shared_ptr &data) { Handler::Write(strm, *data.get()); } inline static bool Read(Stream *strm, std::shared_ptr *data) { // When read, the default initialization behavior of shared_ptr is // shared_ptr(), which is holding a nullptr. Here we need to manually // reset to a real object for further loading if (!(*data)) { data->reset(dgl::Serializer::new_object()); } return Handler::Read(strm, data->get()); } }; template struct Handler> { inline static void Write(Stream *strm, const std::unique_ptr &data) { Handler::Write(strm, *data.get()); } inline static bool Read(Stream *strm, std::unique_ptr *data) { // When read, the default initialization behavior of unique_ptr is // unique_ptr(), which is holding a nullptr. Here we need to manually // reset to a real object for further loading if (!(*data)) { data->reset(dgl::Serializer::new_object()); } return Handler::Read(strm, data->get()); } }; } // namespace serializer } // namespace dmlc #endif // DGL_RUNTIME_SMART_PTR_SERIALIZER_H_ ================================================ FILE: include/dgl/runtime/tensordispatch.h ================================================ /** * Copyright (c) 2020-2022 by Contributors * @file array/tensordispatch.h * @brief This file defines the dispatcher of tensor operators to * framework-specific implementations. * * The dispatcher consists of a TensorDispatcher singleton in DGL C library and * one separately-built shared library per supported backend. * * Those shared libraries contain wrappers of the framework-specific operators. * The wrappers are defined with extern "C", meaning that the C++ compiler will * not do name mangling for those functions so that DGL can conveniently locate * them using dlsym(3) (or GetProcAddress in Windows). * * The TensorDispatcher singleton maintains a mapping from an array operator to * the address of the corresponding symbol in the shared library. During * initialization, the TensorDispatcher checks which backend DGL is using. * It then locates and opens the corresponding shared library using dlopen(3) * (or LoadLibrary in Windows), and populates the said mapping above with * dlsym(3) (or GetProcAddress in Windows). * * A tensor operator in TensorDispatcher first checks whether the corresponding * symbol address is found in the mapping. If so, it calls the function located * at the symbol address instead, allocate/free pieces of memory on CPU/GPU. If * not, it falls back to DeviceAPI::AllocWorkspace/FreeWorkspace. */ #ifndef DGL_RUNTIME_TENSORDISPATCH_H_ #define DGL_RUNTIME_TENSORDISPATCH_H_ #include #include #if defined(WIN32) || defined(_WIN32) #include #endif // WIN32 #ifdef DGL_USE_CUDA #include #endif // DGL_USE_CUDA #include "ndarray.h" /** * @brief Casts a pointer \c entry to a function pointer with signature of \c * func. */ #define FUNCCAST(func, entry) (*reinterpret_cast(entry)) namespace dgl { namespace runtime { /** * @brief Dispatcher that delegates the function calls to framework-specific C++ * APIs. * * This class is not thread-safe. */ class TensorDispatcher { public: /** @brief Get the singleton instance. */ static TensorDispatcher* Global() { static TensorDispatcher inst; return &inst; } /** @brief Whether an adapter library is available. */ inline bool IsAvailable() { return available_; } /** @brief Load symbols from the given tensor adapter library path. */ bool Load(const char* path_cstr); /** * @brief Allocate a piece of CPU memory via PyTorch's CPUAllocator. * Used in CPUDeviceAPI::AllocWorkspace(). * * @param nbytes The size to be allocated. * @return Pointer to the allocated memory. */ inline void* CPUAllocWorkspace(size_t nbytes) { auto entry = entrypoints_[Op::kCPURawAlloc]; return FUNCCAST(tensoradapter::CPURawAlloc, entry)(nbytes); } /** * @brief Free the CPU memory. * Used in CPUDeviceAPI::FreeWorkspace(). * * @param ptr Pointer to the memory to be freed. */ inline void CPUFreeWorkspace(void* ptr) { auto entry = entrypoints_[Op::kCPURawDelete]; FUNCCAST(tensoradapter::CPURawDelete, entry)(ptr); } #ifdef DGL_USE_CUDA /** * @brief Allocate a piece of GPU memory via * PyTorch's THCCachingAllocator. * Used in CUDADeviceAPI::AllocWorkspace(). * * @note THCCachingAllocator specify the device to allocate on * via cudaGetDevice(). Make sure to call cudaSetDevice() * before invoking this function. * * @param nbytes The size to be allocated. * @param stream The stream to be allocated on. * @return Pointer to the allocated memory. */ inline void* CUDAAllocWorkspace(size_t nbytes, cudaStream_t stream) { auto entry = entrypoints_[Op::kCUDARawAlloc]; return FUNCCAST(tensoradapter::CUDARawAlloc, entry)(nbytes, stream); } /** * @brief Free the GPU memory. * Used in CUDADeviceAPI::FreeWorkspace(). * * @param ptr Pointer to the memory to be freed. */ inline void CUDAFreeWorkspace(void* ptr) { auto entry = entrypoints_[Op::kCUDARawDelete]; FUNCCAST(tensoradapter::CUDARawDelete, entry)(ptr); } /** * @brief Find the current PyTorch CUDA stream * Used in runtime::getCurrentCUDAStream(). * * @note PyTorch pre-allocates/sets the current CUDA stream * on current device via cudaGetDevice(). Make sure to call cudaSetDevice() * before invoking this function. * * @return cudaStream_t stream handle */ inline cudaStream_t CUDAGetCurrentStream() { auto entry = entrypoints_[Op::kCUDACurrentStream]; return FUNCCAST(tensoradapter::CUDACurrentStream, entry)(); } /** * @brief Allocate a piece of pinned CPU memory via PyTorch * CachingHostAllocator. * @note Used in CUDADeviceAPI::AllocPinnedDataSpace(). * @param nbytes The size to be allocated. * @param ctx Pointer to the PyTorch storage ctx ptr returned from the * allocator. * @param deleter Pointer to the delete function ptr returned from the * allocator. * @return Raw pointer to the allocated memory. */ inline void* CUDAAllocHostWorkspace( size_t nbytes, void** ctx, void** deleter) { auto entry = entrypoints_[Op::kCUDARawHostAlloc]; auto alloc_func = FUNCCAST(tensoradapter::CUDARawHostAlloc, entry); return alloc_func(nbytes, ctx, deleter); } /** * @brief Insert the pinned memory block (allocated via PyTorch * CachingHostAllocator) back to the free list for future usage.(ref: * pytorch/pytorch/blob/master/aten/src/ATen/cuda/CachingHostAllocator.cpp). * @note Used in CUDADeviceAPI::FreePinnedDataSpace(). * @param deleter Pointer to the delete function ptr returned from the * allocator. */ inline void CUDAFreeHostWorkspace(void** deleter) { auto entry = entrypoints_[Op::kCUDARawHostDelete]; FUNCCAST(tensoradapter::CUDARawHostDelete, entry)(deleter); } /** * @brief Invoke the record_event function call from PyTorch * CachingHostAllocator. * @note This function assoicates a CUDA stream (used by a copy kernel) to the * pinned data. In the free path of this data, which is achieved by * calling CUDAFreeHostWorkspace, the set of associated streams is then * consumed to ensure proper functionlity. (ref: * pytorch/pytorch/blob/master/aten/src/ATen/cuda/CachingHostAllocator.cpp). * Used in CUDADeviceAPI::RecordedCopyDataFromTo(). * * @param data Pointer of the tensor to be recorded. * @param ctx PyTorch storage ctx ptr returned from the allocator. * @param stream The stream that currently consumes this tensor. * @param device_id Device of the tensor. */ inline void CUDARecordHostAlloc( void* data, void* ctx, cudaStream_t stream, int device_id) { auto entry = entrypoints_[Op::kCUDARecordHostAlloc]; auto recorded_alloc = FUNCCAST(tensoradapter::CUDARecordHostAlloc, entry); recorded_alloc(data, ctx, stream, device_id); } /** * @brief Release cached pinned memory allocations via cudaHostFree. * @note Used in CUDADeviceAPI::PinData() before pinning any host memory by * DGL. */ inline void CUDAHostAllocatorEmptyCache() { auto entry = entrypoints_[Op::kCUDAHostAllocatorEmptyCache]; FUNCCAST(tensoradapter::CUDAHostAllocatorEmptyCache, entry)(); } #endif // DGL_USE_CUDA /** * @brief Record streams that are using this tensor. * Used in NDArray::RecordStream(). * * @param ptr Pointer of the tensor to be recorded. * @param stream The stream that is using this tensor. * @param device_id Device of the tensor. */ inline void RecordStream(void* ptr, DGLStreamHandle stream, int device_id) { #ifdef DGL_USE_CUDA auto entry = entrypoints_[Op::kRecordStream]; FUNCCAST(tensoradapter::RecordStream, entry) (ptr, static_cast(stream), device_id); #endif } private: /** @brief ctor */ TensorDispatcher() = default; /** @brief dtor */ ~TensorDispatcher(); /** * @brief List of symbols in the adapter library. * * Must match the functions in tensoradapter/include/tensoradapter.h. */ static constexpr const char* names_[] = { "CPURawAlloc", "CPURawDelete", #ifdef DGL_USE_CUDA "CUDARawAlloc", "CUDARawDelete", "CUDACurrentStream", "RecordStream", "CUDARawHostAlloc", "CUDARawHostDelete", "CUDARecordHostAlloc", "CUDAHostAllocatorEmptyCache", #endif // DGL_USE_CUDA }; /** @brief Index of each function to the symbol list */ class Op { public: static constexpr int kCPURawAlloc = 0; static constexpr int kCPURawDelete = 1; #ifdef DGL_USE_CUDA static constexpr int kCUDARawAlloc = 2; static constexpr int kCUDARawDelete = 3; static constexpr int kCUDACurrentStream = 4; static constexpr int kRecordStream = 5; static constexpr int kCUDARawHostAlloc = 6; static constexpr int kCUDARawHostDelete = 7; static constexpr int kCUDARecordHostAlloc = 8; static constexpr int kCUDAHostAllocatorEmptyCache = 9; #endif // DGL_USE_CUDA }; /** @brief Number of functions */ static constexpr int num_entries_ = sizeof(names_) / sizeof(names_[0]); /** @brief Entrypoints of each function */ void* entrypoints_[num_entries_] = { nullptr, nullptr, #ifdef DGL_USE_CUDA nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, #endif // DGL_USE_CUDA }; bool available_ = false; #if defined(WIN32) || defined(_WIN32) HINSTANCE handle_; #else // !WIN32 void* handle_; #endif // WIN32 }; }; // namespace runtime }; // namespace dgl #undef FUNCCAST #endif // DGL_RUNTIME_TENSORDISPATCH_H_ ================================================ FILE: include/dgl/runtime/threading_backend.h ================================================ /** * Copyright (c) 2018 by Contributors * @file dgl/runtime/threading_backend.h * @brief Utilities for manipulating thread pool threads. */ #ifndef DGL_RUNTIME_THREADING_BACKEND_H_ #define DGL_RUNTIME_THREADING_BACKEND_H_ #include #include #include namespace dgl { namespace runtime { namespace threading { /** * @brief A platform-agnostic abstraction for managing a collection of * thread pool threads. */ class ThreadGroup { public: class Impl; /** * @brief Creates a collection of threads which run a provided function. * * @param num_workers The total number of worker threads in this group. Includes main thread if `exclude_worker0 = true` * @param worker_callback A callback which is run in its own thread. Receives the worker_id as an argument. * @param exclude_worker0 Whether to use the main thread as a worker. * If `true`, worker0 will not be launched in a new thread and * `worker_callback` will only be called for values >= 1. This * allows use of the main thread as a worker. */ ThreadGroup( int num_workers, std::function worker_callback, bool exclude_worker0 = false); ~ThreadGroup(); /** * @brief Blocks until all non-main threads in the pool finish. */ void Join(); enum AffinityMode : int { kBig = 1, kLittle = -1, }; /** * @brief configure the CPU id affinity * * @param mode The preferred CPU type (1 = big, -1 = little). * @param nthreads The number of threads to use (0 = use all). * @param exclude_worker0 Whether to use the main thread as a worker. * If `true`, worker0 will not be launched in a new thread and * `worker_callback` will only be called for values >= 1. This * allows use of the main thread as a worker. * * @return The number of workers to use. */ int Configure(AffinityMode mode, int nthreads, bool exclude_worker0); private: Impl* impl_; }; /** * @brief Platform-agnostic no-op. */ // This used to be Yield(), renaming to YieldThread() because windows.h defined // it as a macro in later SDKs. void YieldThread(); /** * @return the maximum number of effective workers for this system. */ int MaxConcurrency(); } // namespace threading } // namespace runtime } // namespace dgl #endif // DGL_RUNTIME_THREADING_BACKEND_H_ ================================================ FILE: include/dgl/runtime/util.h ================================================ /** * Copyright (c) 2017 by Contributors * @file dgl/runtime/util.h * @brief Useful runtime util. */ #ifndef DGL_RUNTIME_UTIL_H_ #define DGL_RUNTIME_UTIL_H_ #include "c_runtime_api.h" namespace dgl { namespace runtime { /** * @brief Check whether type matches the given spec. * @param t The type * @param code The type code. * @param bits The number of bits to be matched. * @param lanes The number of lanes sin the type. */ inline bool TypeMatch(DGLDataType t, int code, int bits, int lanes = 1) { return t.code == code && t.bits == bits && t.lanes == lanes; } } // namespace runtime } // namespace dgl // Forward declare the intrinsic id we need // in structure fetch to enable stackvm in runtime namespace dgl { namespace ir { namespace intrinsic { /** @brief The kind of structure field info used in intrinsic */ enum DGLStructFieldKind : int { // array head address kArrAddr, kArrData, kArrShape, kArrStrides, kArrNDim, kArrTypeCode, kArrTypeBits, kArrTypeLanes, kArrByteOffset, kArrDeviceId, kArrDeviceType, kArrKindBound_, // DGLValue field kDGLValueContent, kDGLValueKindBound_ }; } // namespace intrinsic } // namespace ir } // namespace dgl #endif // DGL_RUNTIME_UTIL_H_ ================================================ FILE: include/dgl/sampler.h ================================================ /** * Copyright (c) 2018 by Contributors * @file dgl/sampler.h * @brief DGL sampler header. */ #ifndef DGL_SAMPLER_H_ #define DGL_SAMPLER_H_ #include #include #include #include #include "graph_interface.h" #include "nodeflow.h" namespace dgl { class ImmutableGraph; class SamplerOp { public: /** * @brief Sample a graph from the seed vertices with neighbor sampling. * The neighbors are sampled with a uniform distribution. * * @param graph A graph for sampling. * @param seeds the nodes where we should start to sample. * @param edge_type the type of edges we should sample neighbors. * @param num_hops the number of hops to sample neighbors. * @param expand_factor the max number of neighbors to sample. * @param add_self_loop whether to add self loop to the sampled subgraph * @param probability the transition probability (float/double). * @return a NodeFlow graph. */ template static NodeFlow NeighborSample( const ImmutableGraph *graph, const std::vector &seeds, const std::string &edge_type, int num_hops, int expand_factor, const bool add_self_loop, const ValueType *probability); /** * @brief Sample a graph from the seed vertices with layer sampling. * The layers are sampled with a uniform distribution. * * @param graph A graph for sampling. * @param seeds the nodes where we should start to sample. * @param edge_type the type of edges we should sample neighbors. * @param layer_sizes The size of layers. * @return a NodeFlow graph. */ static NodeFlow LayerUniformSample( const ImmutableGraph *graph, const std::vector &seeds, const std::string &neigh_type, IdArray layer_sizes); }; } // namespace dgl #endif // DGL_SAMPLER_H_ ================================================ FILE: include/dgl/sampling/negative.h ================================================ /** * Copyright (c) 2020 by Contributors * @file dgl/sampling/negative.h * @brief Negative sampling. */ #ifndef DGL_SAMPLING_NEGATIVE_H_ #define DGL_SAMPLING_NEGATIVE_H_ #include #include #include namespace dgl { namespace sampling { /** * @brief Given an edge type, uniformly sample source-destination pairs that do * not have an edge in between using rejection sampling. * * @note This function may not return the same number of elements as the given * number of samples. * @note This function requires sorting the CSR or CSC matrix of the graph * in-place. It prefers CSC over CSR. * * @param hg The graph. * @param etype The edge type. * @param num_samples The number of negative examples to sample. * @param num_trials The number of rejection sampling trials. * @param exclude_self_loops Do not include the examples where the source equals * the destination. * @param replace Whether to sample with replacement. * @param redundancy How much redundant negative examples to take in case of * duplicate examples. * @return The pair of source and destination tensors. */ std::pair GlobalUniformNegativeSampling( HeteroGraphPtr hg, dgl_type_t etype, int64_t num_samples, int num_trials, bool exclude_self_loops, bool replace, double redundancy); }; // namespace sampling }; // namespace dgl #endif // DGL_SAMPLING_NEGATIVE_H_ ================================================ FILE: include/dgl/sampling/neighbor.h ================================================ /** * Copyright (c) 2020 by Contributors * @file dgl/sampling/neighbor.h * @brief Neighborhood-based sampling. */ #ifndef DGL_SAMPLING_NEIGHBOR_H_ #define DGL_SAMPLING_NEIGHBOR_H_ #include #include #include #include namespace dgl { namespace sampling { /** * @brief Sample from the neighbors of the given nodes and return the sampled * edges as a graph. * * When sampling with replacement, the sampled subgraph could have parallel * edges. * * For sampling without replace, if fanout > the number of neighbors, all the * neighbors will be sampled. * * @param hg The input graph. * @param nodes Node IDs of each type. The vector length must be equal to the * number of node types. Empty array is allowed. * @param fanouts Number of sampled neighbors for each edge type. The vector * length should be equal to the number of edge types, or one if they all have * the same fanout. * @param dir Edge direction. * @param probability A vector of 1D float arrays, indicating the transition * probability of each edge by edge type. An empty float array assumes uniform * transition. * @param exclude_edges Edges IDs of each type which will be excluded during * sampling. The vector length must be equal to the number of edges types. Empty * array is allowed. * @param replace If true, sample with replacement. * @return Sampled neighborhoods as a graph. The return graph has the same * schema as the original one. */ HeteroSubgraph SampleNeighbors( const HeteroGraphPtr hg, const std::vector& nodes, const std::vector& fanouts, EdgeDir dir, const std::vector& probability, const std::vector& exclude_edges, bool replace = true); /** * @brief Sample from the neighbors of the given nodes and convert a graph into * a bipartite-structured graph for message passing. * * Specifically, we create one node type \c ntype_l on the "left" side and * another node type \c ntype_r on the "right" side for each node type \c ntype. * The nodes of type \c ntype_r would contain the nodes designated by the * caller, and node type \c ntype_l would contain the nodes that has an edge * connecting to one of the designated nodes. * * The nodes of \c ntype_l would also contain the nodes in node type \c ntype_r. * When sampling with replacement, the sampled subgraph could have parallel * edges. * * For sampling without replace, if fanout > the number of neighbors, all the * neighbors will be sampled. * * Non-deterministic algorithm, requires nodes parameter to store unique Node * IDs. * * @tparam IdType Graph's index data type, can be int32_t or int64_t * @param hg The input graph. * @param nodes Node IDs of each type. The vector length must be equal to the * number of node types. Empty array is allowed. * @param mapping External parameter that should be set to a vector of IdArrays * filled with -1, required for mapping of nodes in returned * graph * @param fanouts Number of sampled neighbors for each edge type. The vector * length should be equal to the number of edge types, or one if they all have * the same fanout. * @param dir Edge direction. * @param probability A vector of 1D float arrays, indicating the transition * probability of each edge by edge type. An empty float array assumes uniform * transition. * @param exclude_edges Edges IDs of each type which will be excluded during * sampling. The vector length must be equal to the number of edges types. Empty * array is allowed. * @param replace If true, sample with replacement. * @return Sampled neighborhoods as a graph. The return graph has the same * schema as the original one. */ template std::tuple, std::vector> SampleNeighborsFused( const HeteroGraphPtr hg, const std::vector& nodes, const std::vector& mapping, const std::vector& fanouts, EdgeDir dir, const std::vector& prob_or_mask, const std::vector& exclude_edges, bool replace = true); /** * Select the neighbors with k-largest weights on the connecting edges for each * given node. * * If k > the number of neighbors, all the neighbors are sampled. * * @param hg The input graph. * @param nodes Node IDs of each type. The vector length must be equal to the * number of node types. Empty array is allowed. * @param k The k value for each edge type. The vector length should be equal to * the number of edge types, or one if they all have the same fanout. * @param dir Edge direction. * @param weight A vector of 1D float arrays, indicating the weights associated * witheach edge. * @param ascending If true, elements are sorted by ascending order, equivalent * to find the K smallest values. Otherwise, find K largest values. * @return Sampled neighborhoods as a graph. The return graph has the same * schema as the original one. */ HeteroSubgraph SampleNeighborsTopk( const HeteroGraphPtr hg, const std::vector& nodes, const std::vector& k, EdgeDir dir, const std::vector& weight, bool ascending = false); HeteroSubgraph SampleNeighborsBiased( const HeteroGraphPtr hg, const IdArray& nodes, const int64_t fanouts, const NDArray& bias, const NDArray& tag_offset, const EdgeDir dir, const bool replace); } // namespace sampling } // namespace dgl #endif // DGL_SAMPLING_NEIGHBOR_H_ ================================================ FILE: include/dgl/sampling/randomwalks.h ================================================ /** * Copyright (c) 2019 by Contributors * @file dgl/samplinig/randomwalks.h * @brief Random walk functions. */ #ifndef DGL_SAMPLING_RANDOMWALKS_H_ #define DGL_SAMPLING_RANDOMWALKS_H_ #include #include #include #include #include namespace dgl { namespace sampling { /** * @brief Metapath-based random walk. * @param hg The heterograph. * @param seeds A 1D array of seed nodes, with the type the source type of the * first edge type in the metapath. * @param metapath A 1D array of edge types representing the metapath. * @param prob A vector of 1D float arrays, indicating the transition * probability of each edge by edge type. An empty float array assumes uniform * transition. * @return A pair of * 1. One 2D array of shape (len(seeds), len(metapath) + 1) with node * IDs. The paths that terminated early are padded with -1. * 2. One 2D array of shape (len(seeds), len(metapath)) with edge IDs. * The paths that terminated early are padded with -1. * 3. One 1D array of shape (len(metapath) + 1) with node type IDs. */ std::tuple RandomWalk( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, const std::vector &prob); /** * @brief Metapath-based random walk with restart probability. * @param hg The heterograph. * @param seeds A 1D array of seed nodes, with the type the source type of the * first edge type in the metapath. * @param metapath A 1D array of edge types representing the metapath. * @param prob A vector of 1D float arrays, indicating the transition * probability of each edge by edge type. An empty float array assumes uniform * transition. * @param restart_prob Restart probability. * @return A pair of * 1. One 2D array of shape (len(seeds), len(metapath) + 1) with node * IDs. The paths that terminated early are padded with -1. * 2. One 2D array of shape (len(seeds), len(metapath)) with edge IDs. * The paths that terminated early are padded with -1. * 3. One 1D array of shape (len(metapath) + 1) with node type IDs. */ std::tuple RandomWalkWithRestart( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, const std::vector &prob, double restart_prob); /** * @brief Metapath-based random walk with stepwise restart probability. Useful * for PinSAGE-like models. * @param hg The heterograph. * @param seeds A 1D array of seed nodes, with the type the source type of the * first edge type in the metapath. * @param metapath A 1D array of edge types representing the metapath. * @param prob A vector of 1D float arrays, indicating the transition * probability of each edge by edge type. An empty float array assumes uniform * transition. * @param restart_prob Restart probability array which has the same number of * elements as \c metapath, indicating the probability to terminate after * transition. * @return A pair of * 1. One 2D array of shape (len(seeds), len(metapath) + 1) with node * IDs. The paths that terminated early are padded with -1. * 2. One 2D array of shape (len(seeds), len(metapath)) with edge IDs. * The paths that terminated early are padded with -1. * 3. One 1D array of shape (len(metapath) + 1) with node type IDs. */ std::tuple RandomWalkWithStepwiseRestart( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, const std::vector &prob, FloatArray restart_prob); }; // namespace sampling }; // namespace dgl #endif // DGL_SAMPLING_RANDOMWALKS_H_ ================================================ FILE: include/dgl/scheduler.h ================================================ /** * Copyright (c) 2018 by Contributors * @file dgl/scheduler.h * @brief Operations on graph index. */ #ifndef DGL_SCHEDULER_H_ #define DGL_SCHEDULER_H_ #include #include "runtime/ndarray.h" namespace dgl { typedef dgl::runtime::NDArray IdArray; namespace sched { /** * @brief Generate degree bucketing schedule * @tparam IdType Graph's index data type, can be int32_t or int64_t * @param msg_ids The edge id for each message * @param vids The destination vertex for each message * @param recv_ids The recv nodes (for checking zero degree nodes) * @note If there are multiple messages going into the same destination vertex, * then there will be multiple copies of the destination vertex in vids. * @return a vector of 5 IdArrays for degree bucketing. The 5 arrays are: * degrees: degrees for each bucket * nids: destination node ids * nid_section: number of nodes in each bucket (used to split nids) * mids: message ids * mid_section: number of messages in each bucket (used to split mids) */ template std::vector DegreeBucketing( const IdArray& msg_ids, const IdArray& vids, const IdArray& recv_ids); /** * @brief Generate degree bucketing schedule for group_apply edge * @tparam IdType Graph's index data type, can be int32_t or int64_t * @param uids One end vertex of edge by which edges are grouped * @param vids The other end vertex of edge * @param eids Edge ids * @note This function always generate group_apply schedule based on degrees of * nodes in uids. Therefore, if group_apply by source nodes, then uids * should be source. If group_apply by destination nodes, then uids * should be destination. * @return a vector of 5 IdArrays for degree bucketing. The 5 arrays are: * degrees: degrees for each bucket * new_uids: uids reordered by degree bucket * new_vids: vids reordered by degree bucket * new_edis: eids reordered by degree bucket * sections: number of edges in each degree bucket (used to partition * new_uids, new_vids, and new_eids) */ template std::vector GroupEdgeByNodeDegree( const IdArray& uids, const IdArray& vids, const IdArray& eids); } // namespace sched } // namespace dgl #endif // DGL_SCHEDULER_H_ ================================================ FILE: include/dgl/transform.h ================================================ /** * Copyright (c) 2019 by Contributors * @file dgl/transform.h * @brief DGL graph transformations */ #ifndef DGL_TRANSFORM_H_ #define DGL_TRANSFORM_H_ #include #include #include #include "array.h" #include "base_heterograph.h" namespace dgl { namespace transform { /** * @brief Given a list of graphs, remove the common nodes that do not have * inbound and outbound edges. * * The graphs should have identical node ID space (i.e. should have the same set * of nodes, including types and IDs). * * @param graphs The list of graphs. * @param always_preserve The list of nodes to preserve regardless of whether * the inbound or outbound edges exist. * * @return A pair. The first element is the list of compacted graphs, and the * second element is the mapping from the compacted graphs and the original * graph. */ std::pair, std::vector> CompactGraphs( const std::vector &graphs, const std::vector &always_preserve); /** * @brief Convert a graph into a bipartite-structured graph for message passing. * * Specifically, we create one node type \c ntype_l on the "left" side and * another node type \c ntype_r on the "right" side for each node type \c ntype. * The nodes of type \c ntype_r would contain the nodes designated by the * caller, and node type \c ntype_l would contain the nodes that has an edge * connecting to one of the designated nodes. * * The nodes of \c ntype_l would also contain the nodes in node type \c ntype_r. * * This function is often used for constructing a series of dependency graphs * for multi-layer message passing, where we first construct a series of * frontier graphs on the original node space, and run the following to get the * bipartite graph needed for message passing with each GNN layer: * * * bipartites = [None] * len(num_layers) * for l in reversed(range(len(layers))): * bipartites[l], seeds = to_bipartite(frontier[l], seeds) * x = graph.ndata["h"][seeds] * for g, layer in zip(bipartites, layers): * x_src = x * x_dst = x[:len(g.dsttype)] * x = sageconv(g, (x_src, x_dst)) * output = x * * * @param graph The graph. * @param rhs_nodes Designated nodes that would appear on the right side. * @param include_rhs_in_lhs If false, do not include the nodes of node type \c * ntype_r in \c ntype_l. * * @return A triplet containing * * The bipartite-structured graph, * * The induced node from the left side for each graph, * * The induced edges. * * @note If include_rhs_in_lhs is true, then for each node type \c ntype, the * nodes in rhs_nodes[ntype] would always appear first in the nodes of type \c * ntype_l in the new graph. */ std::tuple, std::vector> ToBlock( HeteroGraphPtr graph, const std::vector &rhs_nodes, bool include_rhs_in_lhs); /** * @brief Convert a multigraph to a simple graph. * * @return A triplet of * * @c hg : The said simple graph. * * @c count : The array of edge occurrences per edge type. * * @c edge_map : The mapping from original edge IDs to new edge IDs per edge * type. * * @note Example: consider a graph with the following edges * * [(0, 1), (1, 3), (2, 2), (1, 3), (1, 4), (1, 4)] * * Then ToSimpleGraph(g) would yield the following elements: * * * The first element would be the simple graph itself with the following edges * * [(0, 1), (1, 3), (1, 4), (2, 2)] * * * The second element is an array \c count. \c count[i] stands for the number * of edges connecting simple_g.src[i] and simple_g.dst[i] in the original * graph. * * count[0] = [1, 2, 2, 1] * * * One can find the mapping between edges from the original graph to the new * simple graph. * * edge_map[0] = [0, 1, 3, 1, 2, 2] */ std::tuple, std::vector> ToSimpleGraph(const HeteroGraphPtr graph); /** * @brief Remove edges from a graph. * * @param graph The graph. * @param eids The edge IDs to remove per edge type. * * @return A pair of the graph with edges removed, as well as the edge ID * mapping from the original graph to the new graph per edge type. */ std::pair> RemoveEdges( const HeteroGraphPtr graph, const std::vector &eids); }; // namespace transform }; // namespace dgl #endif // DGL_TRANSFORM_H_ ================================================ FILE: include/dgl/zerocopy_serializer.h ================================================ /** * Copyright (c) 2020 by Contributors * @file rpc/shared_mem_serializer.h * @brief headers for serializer. */ #ifndef DGL_ZEROCOPY_SERIALIZER_H_ #define DGL_ZEROCOPY_SERIALIZER_H_ #include #include #include #include #include #include #include #include #include #include #include #include "dmlc/logging.h" namespace dgl { /** * * StreamWithBuffer is backed up by dmlc::MemoryFixedSizeStream or * dmlc::MemoryStringStream. This class supports serializing and deserializing * NDArrays stored in shared memory. If the stream is created for * sending/recving data through network, the data pointer of the NDArray will be * transmitted directly without and copy. Otherwise, the stream is for * sending/recving data to another process on the same machine, so if an NDArray * is stored in shared memory, it will just record the shared memory name * instead of the actual data buffer. * * For example: * * std::string blob; * // Send to local * StreamWithBuffer strm(&blob, false); * // Send to remote * StreamWithBuffer strm(&blob, true); * // Receive from local * StreamWithBuffer strm(&blob, false); * // Receive from remote * std::vector ptr_list * StreamWithBuffer strm(&blob, ptr_list); */ class StreamWithBuffer : public dmlc::SeekStream { public: // Buffer type. Storing NDArray to maintain the reference counting to ensure // the liveness of data pointer struct Buffer { dgl::runtime::NDArray tensor = dgl::runtime::NDArray(); void* data = nullptr; int64_t size = 0; Buffer(const dgl::runtime::NDArray& tensor, void* data, int64_t data_size) : tensor(tensor), data(data), size(data_size) {} explicit Buffer(void* data) : data(data) {} }; /** * @brief This constructor is for writing scenario or reading from local * machine * @param strm The backup stream to write/load from * @param send_to_remote Whether this stream will be deserialized at remote * machine or the local machine. If true, will record the data pointer into * buffer list. */ StreamWithBuffer(std::unique_ptr strm, bool send_to_remote) : strm_(std::move(strm)), buffer_list_(), send_to_remote_(send_to_remote) {} /** * @brief This constructor is for reading from remote * @param strm The stream to write/load from zerocopy write/load * @param data_ptr_list list of pointer to reconstruct NDArray * * For example: * std::string blob; * std::vector data_ptr_list; * // Read from remote sended pointer list * StreamWithBuffer buf_strm(&blob, data_ptr_list) */ StreamWithBuffer( std::unique_ptr strm, const std::vector& data_ptr_list) : strm_(std::move(strm)), send_to_remote_(true) { for (void* data : data_ptr_list) { buffer_list_.emplace_back(data); } } /** * @brief Construct stream backed up by string * @param blob The string to write/load from zerocopy write/load * @param send_to_remote Whether this stream will be deserialized at remote * machine or the local machine. If true, will record the data pointer into * buffer list. */ StreamWithBuffer(std::string* blob, bool send_to_remote) : strm_(new dmlc::MemoryStringStream(blob)), send_to_remote_(send_to_remote) {} /** * @brief Construct stream backed up by string * @param p_buffer buffer pointer * @param size buffer size * @param send_to_remote Whether this stream will be deserialized at remote * machine or the local machine. If true, will record the data pointer into * buffer list. */ StreamWithBuffer(char* p_buffer, size_t size, bool send_to_remote) : strm_(new dmlc::MemoryFixedSizeStream(p_buffer, size)), send_to_remote_(send_to_remote) {} /** * @brief Construct stream backed up by string, and reconstruct NDArray * from data_ptr_list * @param blob The string to write/load from zerocopy write/load * @param data_ptr_list pointer list for NDArrays to deconstruct from */ StreamWithBuffer(std::string* blob, const std::vector& data_ptr_list) : strm_(new dmlc::MemoryStringStream(blob)), send_to_remote_(true) { for (void* data : data_ptr_list) { buffer_list_.emplace_back(data); } } /** * @brief Construct stream backed up by string, and reconstruct NDArray * from data_ptr_list * @param p_buffer buffer pointer * @param size buffer size * @param data_ptr_list pointer list for NDArrays to deconstruct from */ StreamWithBuffer( char* p_buffer, size_t size, const std::vector& data_ptr_list) : strm_(new dmlc::MemoryFixedSizeStream(p_buffer, size)), send_to_remote_(true) { for (void* data : data_ptr_list) { buffer_list_.emplace_back(data); } } // delegate methods to strm_ virtual size_t Read(void* ptr, size_t size) { return strm_->Read(ptr, size); } virtual void Write(const void* ptr, size_t size) { strm_->Write(ptr, size); } virtual void Seek(size_t pos) { strm_->Seek(pos); } virtual size_t Tell(void) { return strm_->Tell(); } using dmlc::Stream::Read; using dmlc::Stream::Write; /** * @brief push NDArray into stream * If send_to_remote=true, the NDArray will be saved to the buffer list * If send_to_remote=false, the NDArray will be saved to the backedup string */ void PushNDArray(const runtime::NDArray& tensor); /** * @brief pop NDArray from stream * If send_to_remote=true, the NDArray will be reconstructed from buffer list * If send_to_remote=false, the NDArray will be reconstructed from shared * memory */ dgl::runtime::NDArray PopNDArray(); /** * @brief Get whether this stream is for remote usage */ bool send_to_remote() { return send_to_remote_; } /** * @brief Get underlying buffer list */ const std::deque& buffer_list() const { return buffer_list_; } private: std::unique_ptr strm_; std::deque buffer_list_; bool send_to_remote_; }; // namespace dgl } // namespace dgl #endif // DGL_ZEROCOPY_SERIALIZER_H_ ================================================ FILE: notebooks/graphbolt/walkthrough.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "id": "e1qfiZMOJYYv" }, "source": [ "# Graphbolt Quick Walkthrough\n", "\n", "The tutorial provides a quick walkthrough of operators provided by the `dgl.graphbolt` package, and illustrates how to create a GNN datapipe with the package. To learn more details about Stochastic Training of GNNs, please read the [materials](https://docs.dgl.ai/tutorials/large/index.html) provided by DGL.\n", "\n", "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dmlc/dgl/blob/master/notebooks/graphbolt/walkthrough.ipynb) [![GitHub](https://img.shields.io/badge/-View%20on%20GitHub-181717?logo=github&logoColor=ffffff)](https://github.com/dmlc/dgl/blob/master/notebooks/graphbolt/walkthrough.ipynb)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "fWiaC1WaDE-W" }, "outputs": [], "source": [ "# Install required packages.\n", "import os\n", "import torch\n", "os.environ['TORCH'] = torch.__version__\n", "os.environ['DGLBACKEND'] = \"pytorch\"\n", "\n", "# Install the CPU version.\n", "device = torch.device(\"cpu\")\n", "!pip install --pre dgl -f https://data.dgl.ai/wheels-test/repo.html\n", "\n", "try:\n", " import dgl.graphbolt as gb\n", " installed = True\n", "except ImportError as error:\n", " installed = False\n", " print(error)\n", "print(\"DGL installed!\" if installed else \"DGL not found!\")" ] }, { "cell_type": "markdown", "metadata": { "id": "8O7PfsY4sPoN" }, "source": [ "## Dataset\n", "\n", "The dataset has three primary components. *1*. An itemset, which can be iterated over as the training target. *2*. A sampling graph, which is used by the subgraph sampling algorithm to generate a subgraph. *3*. A feature store, which stores node, edge, and graph features.\n", "\n", "* The **Itemset** is created from iterable data or tuple of iterable data." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "g73ZAbMQsSgV" }, "outputs": [], "source": [ "seeds = torch.tensor(\n", " [[7, 0], [6, 0], [1, 3], [3, 3], [2, 4], [8, 4], [1, 4], [2, 4], [1, 5],\n", " [9, 6], [0, 6], [8, 6], [7, 7], [7, 7], [4, 7], [6, 8], [5, 8], [9, 9],\n", " [4, 9], [4, 9], [5, 9], [9, 9], [5, 9], [9, 9], [7, 9]]\n", ")\n", "item_set = gb.ItemSet(seeds, names=\"seeds\")\n", "print(list(item_set))" ] }, { "cell_type": "markdown", "metadata": { "id": "Lqty9p4cs0OR" }, "source": [ "* The **SamplingGraph** is used by the subgraph sampling algorithm to generate a subgraph. In graphbolt, we provide a canonical solution, the FusedCSCSamplingGraph, which achieves state-of-the-art time and space efficiency on CPU sampling. However, this requires enough CPU memory to host all FusedCSCSamplingGraph objects in memory." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "jDjY149xs3PI" }, "outputs": [], "source": [ "indptr = torch.tensor([0, 2, 2, 2, 4, 8, 9, 12, 15, 17, 25])\n", "indices = torch.tensor(\n", " [7, 6, 1, 3, 2, 8, 1, 2, 1, 9, 0, 8, 7, 7, 4, 6, 5, 9, 4, 4, 5, 9, 5, 9, 7]\n", ")\n", "num_edges = 25\n", "eid = torch.arange(num_edges)\n", "edge_attributes = {gb.ORIGINAL_EDGE_ID: eid}\n", "graph = gb.fused_csc_sampling_graph(indptr, indices, edge_attributes=edge_attributes)\n", "print(graph)" ] }, { "cell_type": "markdown", "metadata": { "id": "mNp2S2_Vs8af" }, "source": [ "* The **FeatureStore** is used to store node, edge, and graph features. In graphbolt, we provide the TorchBasedFeature and related optimizations, such as the GPUCachedFeature, for different use cases." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "zIU6KWe1Sm2g" }, "outputs": [], "source": [ "num_nodes = 10\n", "num_edges = 25\n", "node_feature_data = torch.rand((num_nodes, 2))\n", "edge_feature_data = torch.rand((num_edges, 3))\n", "node_feature = gb.TorchBasedFeature(node_feature_data)\n", "edge_feature = gb.TorchBasedFeature(edge_feature_data)\n", "features = {\n", " (\"node\", None, \"feat\") : node_feature,\n", " (\"edge\", None, \"feat\") : edge_feature,\n", "}\n", "feature_store = gb.BasicFeatureStore(features)\n", "print(feature_store)" ] }, { "cell_type": "markdown", "metadata": { "id": "Oh2ockWWoXQ0" }, "source": [ "## DataPipe\n", "\n", "The DataPipe in Graphbolt is an extension of the PyTorch DataPipe, but it is specifically designed to address the challenges of training graph neural networks (GNNs). Each stage of the data pipeline loads data from different sources and can be combined with other stages to create more complex data pipelines. The intermediate data will be stored in **MiniBatch** data packs.\n", "\n", "* **ItemSampler** iterates over input **Itemset** and create subsets." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "XtqPDprrogR7" }, "outputs": [], "source": [ "datapipe = gb.ItemSampler(item_set, batch_size=3, shuffle=False)\n", "print(next(iter(datapipe)))" ] }, { "cell_type": "markdown", "metadata": { "id": "BjkAK37xopp1" }, "source": [ "* **NegativeSampler** generate negative samples and return a mix of positive and negative samples." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "PrFpGoOGopJy" }, "outputs": [], "source": [ "datapipe = datapipe.sample_uniform_negative(graph, 1)\n", "print(next(iter(datapipe)))" ] }, { "cell_type": "markdown", "metadata": { "id": "fYO_oIwkpmb3" }, "source": [ "* **SubgraphSampler** samples a subgraph from a given set of nodes from a larger graph." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "4UsY3PL3ppYV" }, "outputs": [], "source": [ "fanouts = torch.tensor([1])\n", "datapipe = datapipe.sample_neighbor(graph, [fanouts])\n", "print(next(iter(datapipe)))" ] }, { "cell_type": "markdown", "metadata": { "id": "0uIydsjUqMA0" }, "source": [ "* **FeatureFetcher** fetchs features for node/edge in graphbolt." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "YAj8G7YBqO6G" }, "outputs": [], "source": [ "datapipe = datapipe.fetch_feature(feature_store, node_feature_keys=[\"feat\"], edge_feature_keys=[\"feat\"])\n", "print(next(iter(datapipe)))" ] }, { "cell_type": "markdown", "metadata": { "id": "hjBSLPRPrsD2" }, "source": [ "* Copy the data to the GPU for training on the GPU." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "RofiZOUMqt_u" }, "outputs": [], "source": [ "datapipe = datapipe.copy_to(device=device)\n", "print(next(iter(datapipe)))" ] }, { "cell_type": "markdown", "metadata": { "id": "xm9HnyHRvxXj" }, "source": [ "## Exercise: Node classification\n", "\n", "Similarly, the following Dataset is created for node classification, can you implement the data pipeline for the dataset?" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "YV-mk-xAv78v" }, "outputs": [], "source": [ "# Dataset for node classification.\n", "num_nodes = 10\n", "nodes = torch.arange(num_nodes)\n", "labels = torch.tensor([1, 2, 0, 2, 2, 0, 2, 2, 2, 2])\n", "item_set = gb.ItemSet((nodes, labels), names=(\"seeds\", \"labels\"))\n", "\n", "indptr = torch.tensor([0, 2, 2, 2, 4, 8, 9, 12, 15, 17, 25])\n", "indices = torch.tensor(\n", " [7, 6, 1, 3, 2, 8, 1, 2, 1, 9, 0, 8, 7, 7, 4, 6, 5, 9, 4, 4, 5, 9, 5, 9, 7]\n", ")\n", "eid = torch.tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,\n", " 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24])\n", "edge_attributes = {gb.ORIGINAL_EDGE_ID: eid}\n", "graph = gb.from_fused_csc(indptr, indices, None, None, edge_attributes, None)\n", "\n", "num_nodes = 10\n", "num_edges = 25\n", "node_feature_data = torch.rand((num_nodes, 2))\n", "edge_feature_data = torch.rand((num_edges, 3))\n", "node_feature = gb.TorchBasedFeature(node_feature_data)\n", "edge_feature = gb.TorchBasedFeature(edge_feature_data)\n", "features = {\n", " (\"node\", None, \"feat\") : node_feature,\n", " (\"edge\", None, \"feat\") : edge_feature,\n", "}\n", "feature_store = gb.BasicFeatureStore(features)\n", "\n", "# Datapipe.\n", "...\n", "print(next(iter(datapipe)))" ] } ], "metadata": { "accelerator": "GPU", "colab": { "collapsed_sections": [ "BjkAK37xopp1" ], "gpuType": "T4", "private_outputs": true, "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 0 } ================================================ FILE: notebooks/sparse/gcn.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU", "gpuClass": "standard" }, "cells": [ { "cell_type": "markdown", "source": [ "# Building a Graph Convolutional Network Using Sparse Matrices\n", "\n", "This tutorial illustrates step-by-step how to write and train a Graph Convolutional Network ([Kipf et al. (2017)](https://arxiv.org/abs/1609.02907)) using DGL's sparse matrix APIs.\n", "\n", "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dmlc/dgl/blob/master/notebooks/sparse/gcn.ipynb) [![GitHub](https://img.shields.io/badge/-View%20on%20GitHub-181717?logo=github&logoColor=ffffff)](https://github.com/dmlc/dgl/blob/master/notebooks/sparse/gcn.ipynb)" ], "metadata": { "id": "_iqWrPwxtZr6" } }, { "cell_type": "code", "source": [ "# Install required packages.\n", "import os\n", "import torch\n", "os.environ['TORCH'] = torch.__version__\n", "os.environ['DGLBACKEND'] = \"pytorch\"\n", "\n", "# Uncomment below to install required packages. If the CUDA version is not 11.8,\n", "# check the https://www.dgl.ai/pages/start.html to find the supported CUDA\n", "# version and corresponding command to install DGL.\n", "#!pip install dgl -f https://data.dgl.ai/wheels/cu118/repo.html > /dev/null\n", "\n", "try:\n", " import dgl\n", " installed = True\n", "except ImportError:\n", " installed = False\n", "print(\"DGL installed!\" if installed else \"DGL not found!\")" ], "metadata": { "id": "FTqB360eRvya" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "## Graph Convolutional Layer\n", "\n", "Mathematically, the graph convolutional layer is defined as:\n", "\n", "$$f(X^{(l)}, A) = \\sigma(\\bar{D}^{-\\frac{1}{2}}\\bar{A}\\bar{D}^{-\\frac{1}{2}}X^{(l)}W^{(l)})$$\n", "\n", "with $\\bar{A} = A + I$, where $A$ denotes the adjacency matrix and $I$ denotes the identity matrix, $\\bar{D}$ refers to the diagonal node degree matrix of $\\bar{A}$ and $W^{(l)}$ denotes a trainable weight matrix. $\\sigma$ refers to a non-linear activation (e.g. relu).\n", "\n", "The code below shows how to implement it using the `dgl.sparse` package. The core operations are:\n", "\n", "* `dgl.sparse.identity` creates the identity matrix $I$.\n", "* The augmented adjacency matrix $\\bar{A}$ is then computed by adding the identity matrix to the adjacency matrix $A$.\n", "* `A_hat.sum(0)` aggregates the augmented adjacency matrix $\\bar{A}$ along the first dimension which gives the degree vector of the augmented graph. The diagonal degree matrix $\\bar{D}$ is then created by `dgl.sparse.diag`.\n", "* Compute $\\bar{D}^{-\\frac{1}{2}}$.\n", "* `D_hat_invsqrt @ A_hat @ D_hat_invsqrt` computes the convolution matrix which is then multiplied by the linearly transformed node features." ], "metadata": { "id": "r3qB1atg_ld0" } }, { "cell_type": "code", "source": [ "import torch\n", "import torch.nn as nn\n", "import torch.nn.functional as F\n", "\n", "import dgl.sparse as dglsp\n", "\n", "class GCNLayer(nn.Module):\n", " def __init__(self, in_size, out_size):\n", " super(GCNLayer, self).__init__()\n", " self.W = nn.Linear(in_size, out_size)\n", "\n", " def forward(self, A, X):\n", " ########################################################################\n", " # (HIGHLIGHT) Compute the symmetrically normalized adjacency matrix with\n", " # Sparse Matrix API\n", " ########################################################################\n", " I = dglsp.identity(A.shape)\n", " A_hat = A + I\n", " D_hat = dglsp.diag(A_hat.sum(0))\n", " D_hat_invsqrt = D_hat ** -0.5\n", " return D_hat_invsqrt @ A_hat @ D_hat_invsqrt @ self.W(X)" ], "metadata": { "id": "Y4I4EhHQ_kKb" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "A Graph Convolutional Network is then defined by stacking this layer." ], "metadata": { "id": "bvP7O2IwV_c7" } }, { "cell_type": "code", "source": [ "# Create a GCN with the GCN layer.\n", "class GCN(nn.Module):\n", " def __init__(self, in_size, out_size, hidden_size):\n", " super(GCN, self).__init__()\n", " self.conv1 = GCNLayer(in_size, hidden_size)\n", " self.conv2 = GCNLayer(hidden_size, out_size)\n", "\n", " def forward(self, A, X):\n", " X = self.conv1(A, X)\n", " X = F.relu(X)\n", " return self.conv2(A, X)" ], "metadata": { "id": "BHX3vRjDWJTO" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "## Training the GCN\n", "\n", "We then train the GCN model on the Cora dataset for node classification. Note that since the model expects an adjacency matrix as the first argument, we first construct the adjacency matrix from the graph using the `dgl.sparse.from_coo` API which returns a DGL `SparseMatrix` object." ], "metadata": { "id": "2Qw7fTdGNnEp" } }, { "cell_type": "code", "source": [ "def evaluate(g, pred):\n", " label = g.ndata[\"label\"]\n", " val_mask = g.ndata[\"val_mask\"]\n", " test_mask = g.ndata[\"test_mask\"]\n", "\n", " # Compute accuracy on validation/test set.\n", " val_acc = (pred[val_mask] == label[val_mask]).float().mean()\n", " test_acc = (pred[test_mask] == label[test_mask]).float().mean()\n", " return val_acc, test_acc\n", "\n", "def train(model, g):\n", " features = g.ndata[\"feat\"]\n", " label = g.ndata[\"label\"]\n", " train_mask = g.ndata[\"train_mask\"]\n", " optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4)\n", " loss_fcn = nn.CrossEntropyLoss()\n", "\n", " # Preprocess to get the adjacency matrix of the graph.\n", " indices = torch.stack(g.edges())\n", " N = g.num_nodes()\n", " A = dglsp.spmatrix(indices, shape=(N, N))\n", "\n", " for epoch in range(100):\n", " model.train()\n", "\n", " # Forward.\n", " logits = model(A, features)\n", "\n", " # Compute loss with nodes in the training set.\n", " loss = loss_fcn(logits[train_mask], label[train_mask])\n", "\n", " # Backward.\n", " optimizer.zero_grad()\n", " loss.backward()\n", " optimizer.step()\n", "\n", " # Compute prediction.\n", " pred = logits.argmax(dim=1)\n", "\n", " # Evaluate the prediction.\n", " val_acc, test_acc = evaluate(g, pred)\n", " if epoch % 5 == 0:\n", " print(\n", " f\"In epoch {epoch}, loss: {loss:.3f}, val acc: {val_acc:.3f}\"\n", " f\", test acc: {test_acc:.3f}\"\n", " )\n", "\n", "\n", "# Load graph from the existing dataset.\n", "dataset = dgl.data.CoraGraphDataset()\n", "g = dataset[0]\n", "\n", "# Create model.\n", "feature = g.ndata['feat']\n", "in_size = feature.shape[1]\n", "out_size = dataset.num_classes\n", "gcn_model = GCN(in_size, out_size, 16)\n", "\n", "# Kick off training.\n", "train(gcn_model, g)" ], "metadata": { "id": "5Sp1B1_QHgC2", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "552e2c22-44f4-4495-c7f9-a57f13484270" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Downloading /root/.dgl/cora_v2.zip from https://data.dgl.ai/dataset/cora_v2.zip...\n", "Extracting file to /root/.dgl/cora_v2\n", "Finished data loading and preprocessing.\n", " NumNodes: 2708\n", " NumEdges: 10556\n", " NumFeats: 1433\n", " NumClasses: 7\n", " NumTrainingSamples: 140\n", " NumValidationSamples: 500\n", " NumTestSamples: 1000\n", "Done saving data into cached files.\n", "In epoch 0, loss: 1.954, val acc: 0.114, test acc: 0.103\n", "In epoch 5, loss: 1.921, val acc: 0.158, test acc: 0.147\n", "In epoch 10, loss: 1.878, val acc: 0.288, test acc: 0.283\n", "In epoch 15, loss: 1.822, val acc: 0.344, test acc: 0.353\n", "In epoch 20, loss: 1.751, val acc: 0.388, test acc: 0.389\n", "In epoch 25, loss: 1.663, val acc: 0.406, test acc: 0.410\n", "In epoch 30, loss: 1.562, val acc: 0.472, test acc: 0.481\n", "In epoch 35, loss: 1.450, val acc: 0.558, test acc: 0.573\n", "In epoch 40, loss: 1.333, val acc: 0.636, test acc: 0.641\n", "In epoch 45, loss: 1.216, val acc: 0.684, test acc: 0.683\n", "In epoch 50, loss: 1.102, val acc: 0.726, test acc: 0.713\n", "In epoch 55, loss: 0.996, val acc: 0.740, test acc: 0.740\n", "In epoch 60, loss: 0.899, val acc: 0.754, test acc: 0.760\n", "In epoch 65, loss: 0.813, val acc: 0.762, test acc: 0.771\n", "In epoch 70, loss: 0.737, val acc: 0.768, test acc: 0.781\n", "In epoch 75, loss: 0.671, val acc: 0.776, test acc: 0.786\n", "In epoch 80, loss: 0.614, val acc: 0.784, test acc: 0.790\n", "In epoch 85, loss: 0.566, val acc: 0.780, test acc: 0.788\n", "In epoch 90, loss: 0.524, val acc: 0.780, test acc: 0.791\n", "In epoch 95, loss: 0.489, val acc: 0.772, test acc: 0.795\n" ] } ] }, { "cell_type": "markdown", "source": [ "*Check out the full example script* [here](https://github.com/dmlc/dgl/blob/master/examples/sparse/gcn.py)." ], "metadata": { "id": "yQnJZvE9ZduM" } } ] } ================================================ FILE: notebooks/sparse/graph_diffusion.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "toc_visible": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "gpuClass": "standard", "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "source": [ "# Graph Diffusion in Graph Neural Networks\n", "\n", "This tutorial first briefly introduces the diffusion process on graphs. It then illustrates how Graph Neural Networks can utilize this concept to enhance prediction power.\n", "\n", "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dmlc/dgl/blob/master/notebooks/sparse/graph_diffusion.ipynb) [![GitHub](https://img.shields.io/badge/-View%20on%20GitHub-181717?logo=github&logoColor=ffffff)](https://github.com/dmlc/dgl/blob/master/notebooks/sparse/graph_diffusion.ipynb)" ], "metadata": { "id": "SfdsDpOK7yOT" } }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "F6eQWmWn7lqh" }, "outputs": [], "source": [ "# Install required packages.\n", "import os\n", "import torch\n", "os.environ['TORCH'] = torch.__version__\n", "os.environ['DGLBACKEND'] = \"pytorch\"\n", "\n", "# Uncomment below to install required packages. If the CUDA version is not 11.8,\n", "# check the https://www.dgl.ai/pages/start.html to find the supported CUDA\n", "# version and corresponding command to install DGL.\n", "#!pip install dgl -f https://data.dgl.ai/wheels/cu118/repo.html > /dev/null\n", "#!pip install --upgrade scipy networkx > /dev/null\n", "\n", "try:\n", " import dgl\n", " installed = True\n", "except ImportError:\n", " installed = False\n", "print(\"DGL installed!\" if installed else \"Failed to install DGL!\")" ] }, { "cell_type": "markdown", "source": [ "## Graph Diffusion\n", "\n", "Diffusion describes the process of substances moving from one region to another. In the context of graph, the diffusing substances (e.g., real-value signals) travel along edges from nodes to nodes.\n", "\n", "Mathematically, let $\\vec x$ be the vector of node signals, then a graph diffusion operation can be defined as:\n", "\n", "$$\n", "\\vec{y} = \\tilde{A} \\vec{x}\n", "$$\n", "\n", ", where $\\tilde{A}$ is the **diffusion matrix** that is typically derived from the adjacency matrix of the graph. Although the selection of diffusion matrices may vary, the diffusion matrix is typically sparse and $\\tilde{A} \\vec{x}$ is thus a sparse-dense matrix multiplication.\n", "\n", "Let us understand it more with a simple example. First, we obtain the adjacency matrix of the famous [Karate Club Network](https://en.wikipedia.org/wiki/Zachary%27s_karate_club)." ], "metadata": { "id": "iH6os3oFcyze" } }, { "cell_type": "code", "source": [ "import dgl\n", "import dgl.sparse as dglsp\n", "from dgl.data import KarateClubDataset\n", "\n", "# Get the graph from DGL's builtin dataset.\n", "dataset = KarateClubDataset()\n", "dgl_g = dataset[0]\n", "\n", "# Get its adjacency matrix.\n", "indices = torch.stack(dgl_g.edges())\n", "N = dgl_g.num_nodes()\n", "A = dglsp.spmatrix(indices, shape=(N, N))\n", "print(A.to_dense())" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "_TnCECJmBKJE", "outputId": "d8b78f0b-3a1c-4a9e-bcc9-ed4df7b7b5b7" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "tensor([[0., 1., 1., ..., 1., 0., 0.],\n", " [1., 0., 1., ..., 0., 0., 0.],\n", " [1., 1., 0., ..., 0., 1., 0.],\n", " ...,\n", " [1., 0., 0., ..., 0., 1., 1.],\n", " [0., 0., 1., ..., 1., 0., 1.],\n", " [0., 0., 0., ..., 1., 1., 0.]])\n" ] } ] }, { "cell_type": "markdown", "source": [ "We use the graph convolution matrix from Graph Convolution Networks as the diffusion matrix in this example. The graph convolution matrix is defined as:\n", "\n", "$$\\tilde{A} = \\bar{D}^{-\\frac{1}{2}}\\bar{A}\\bar{D}^{-\\frac{1}{2}}$$\n", "\n", "with $\\bar{A} = A + I$, where $A$ denotes the adjacency matrix and $I$ denotes the identity matrix, $\\bar{D}$ refers to the diagonal node degree matrix of $\\bar{A}$." ], "metadata": { "id": "wJMT4oHOCCqJ" } }, { "cell_type": "code", "source": [ "# Compute graph convolution matrix.\n", "I = dglsp.identity(A.shape)\n", "A_hat = A + I\n", "D_hat = dglsp.diag(A_hat.sum(dim=1))\n", "D_hat_invsqrt = D_hat ** -0.5\n", "A_tilde = D_hat_invsqrt @ A_hat @ D_hat_invsqrt\n", "print(A_tilde.to_dense())" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "JyzctBGaC_O5", "outputId": "b03ef3dc-dcf5-494e-9191-30591d09f138" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "tensor([[0.0588, 0.0767, 0.0731, ..., 0.0917, 0.0000, 0.0000],\n", " [0.0767, 0.1000, 0.0953, ..., 0.0000, 0.0000, 0.0000],\n", " [0.0731, 0.0953, 0.0909, ..., 0.0000, 0.0836, 0.0000],\n", " ...,\n", " [0.0917, 0.0000, 0.0000, ..., 0.1429, 0.1048, 0.0891],\n", " [0.0000, 0.0000, 0.0836, ..., 0.1048, 0.0769, 0.0654],\n", " [0.0000, 0.0000, 0.0000, ..., 0.0891, 0.0654, 0.0556]])\n" ] } ] }, { "cell_type": "markdown", "source": [ "For node signals, we set all nodes but one to be zero." ], "metadata": { "id": "geYvWuUkDbiL" } }, { "cell_type": "code", "source": [ "# Initial node signals. All nodes except one are set to zero.\n", "X = torch.zeros(N)\n", "X[0] = 5.\n", "\n", "# Number of diffusion steps.\n", "r = 8\n", "\n", "# Record the signals after each diffusion step.\n", "results = [X]\n", "for _ in range(r):\n", " X = A_tilde @ X\n", " results.append(X)" ], "metadata": { "id": "DXb0uKqXDZKb" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "The program below visualizes the diffusion process with animation. To play the animation, click the \"play\" icon. You will see how node features converge over time." ], "metadata": { "id": "TpqMz4muF2aO" } }, { "cell_type": "code", "source": [ "import matplotlib.pyplot as plt\n", "import networkx as nx\n", "from IPython.display import HTML\n", "from matplotlib import animation\n", "\n", "nx_g = dgl_g.to_networkx().to_undirected()\n", "pos = nx.spring_layout(nx_g)\n", "\n", "fig, ax = plt.subplots()\n", "plt.close()\n", "\n", "def animate(i):\n", " ax.cla()\n", " # Color nodes based on their features.\n", " nodes = nx.draw_networkx_nodes(nx_g, pos, ax=ax, node_size=200, node_color=results[i].tolist(), cmap=plt.cm.Blues)\n", " # Set boundary color of the nodes.\n", " nodes.set_edgecolor(\"#000000\")\n", " nx.draw_networkx_edges(nx_g, pos, ax=ax)\n", "\n", "ani = animation.FuncAnimation(fig, animate, frames=len(results), interval=1000)\n", "HTML(ani.to_jshtml())" ], "metadata": { "id": "eN3kmJ8nl7_z", "colab": { "base_uri": "https://localhost:8080/", "height": 386 }, "outputId": "be93263e-2283-4db7-caff-2e15e75ceb02" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "" ], "text/html": [ "\n", "\n", "\n", "\n", "\n", "\n", "
\n", " \n", "
\n", " \n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", "
\n", "
\n", "
\n", "\n", "\n", "\n" ] }, "metadata": {}, "execution_count": 5 } ] }, { "cell_type": "markdown", "source": [ "## Graph Diffusion in GNNs\n", "\n", "[Scalable Inception Graph Neural Networks (SIGN)](https://arxiv.org/abs/2004.11198) leverages multiple diffusion operators simultaneously. Formally, it is defined as:\n", "\n", "$$\n", "Z=\\sigma([X\\Theta_{0},A_1X\\Theta_{1},\\cdots,A_rX\\Theta_{r}])\\\\\n", "Y=\\xi(Z\\Omega)\n", "$$\n", "\n", "where:\n", "* $\\sigma$ and $\\xi$ are nonlinear activation functions.\n", "* $[\\cdot,\\cdots,\\cdot]$ is the concatenation operation.\n", "* $X\\in\\mathbb{R}^{n\\times d}$ is the input node feature matrix with $n$ nodes and $d$-dimensional feature vector per node.\n", "* $\\Theta_0,\\cdots,\\Theta_r\\in\\mathbb{R}^{d\\times d'}$ are learnable weight matrices.\n", "* $A_1,\\cdots, A_r\\in\\mathbb{R}^{n\\times n}$ are linear diffusion operators. In the example below, we consider $A^i$ for $A_i$, where $A$ is the convolution matrix of the graph.\n", "- $\\Omega\\in\\mathbb{R}^{d'(r+1)\\times c}$ is a learnable weight matrix and $c$ is the number of classes.\n", "\n", "The code below implements the diffusion function to compute $A_1X, A_2X, \\cdots, A_rX$ and the module that combines all the diffused node features." ], "metadata": { "id": "unL_mAj-TqC6" } }, { "cell_type": "code", "source": [ "import torch\n", "import torch.nn as nn\n", "import torch.nn.functional as F\n", "\n", "\n", "################################################################################\n", "# (HIGHLIGHT) Take the advantage of DGL sparse APIs to implement the feature\n", "# diffusion in SIGN laconically.\n", "################################################################################\n", "def sign_diffusion(A, X, r):\n", " # Perform the r-hop diffusion operation.\n", " X_sign = [X]\n", " for i in range(r):\n", " # A^i X\n", " X = A @ X\n", " X_sign.append(X)\n", " return X_sign\n", "\n", "class SIGN(nn.Module):\n", " def __init__(self, in_size, out_size, r, hidden_size=256):\n", " super().__init__()\n", " self.theta = nn.ModuleList(\n", " [nn.Linear(in_size, hidden_size) for _ in range(r + 1)]\n", " )\n", " self.omega = nn.Linear(hidden_size * (r + 1), out_size)\n", "\n", " def forward(self, X_sign):\n", " results = []\n", " for i in range(len(X_sign)):\n", " results.append(self.theta[i](X_sign[i]))\n", " Z = F.relu(torch.cat(results, dim=1))\n", " return self.omega(Z)" ], "metadata": { "id": "__U3Hsp_S0SR" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "## Training\n", "\n", "We train the SIGN model on [Cora dataset](https://docs.dgl.ai/en/latest/generated/dgl.data.CoraGraphDataset.html). The node features are diffused in the pre-processing stage." ], "metadata": { "id": "ngyh4-YZTkNY" } }, { "cell_type": "code", "source": [ "from dgl.data import CoraGraphDataset\n", "from torch.optim import Adam\n", "\n", "\n", "def evaluate(g, pred):\n", " label = g.ndata[\"label\"]\n", " val_mask = g.ndata[\"val_mask\"]\n", " test_mask = g.ndata[\"test_mask\"]\n", "\n", " # Compute accuracy on validation/test set.\n", " val_acc = (pred[val_mask] == label[val_mask]).float().mean()\n", " test_acc = (pred[test_mask] == label[test_mask]).float().mean()\n", " return val_acc, test_acc\n", "\n", "\n", "def train(model, g, X_sign):\n", " label = g.ndata[\"label\"]\n", " train_mask = g.ndata[\"train_mask\"]\n", " optimizer = Adam(model.parameters(), lr=3e-3)\n", "\n", " for epoch in range(10):\n", " # Switch the model to training mode.\n", " model.train()\n", "\n", " # Forward.\n", " logits = model(X_sign)\n", "\n", " # Compute loss with nodes in training set.\n", " loss = F.cross_entropy(logits[train_mask], label[train_mask])\n", "\n", " # Backward.\n", " optimizer.zero_grad()\n", " loss.backward()\n", " optimizer.step()\n", "\n", " # Switch the model to evaluating mode.\n", " model.eval()\n", "\n", " # Compute prediction.\n", " logits = model(X_sign)\n", " pred = logits.argmax(1)\n", "\n", " # Evaluate the prediction.\n", " val_acc, test_acc = evaluate(g, pred)\n", " print(\n", " f\"In epoch {epoch}, loss: {loss:.3f}, val acc: {val_acc:.3f}, test\"\n", " f\" acc: {test_acc:.3f}\"\n", " )\n", "\n", "\n", "# If CUDA is available, use GPU to accelerate the training, use CPU\n", "# otherwise.\n", "dev = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", "\n", "# Load graph from the existing dataset.\n", "dataset = CoraGraphDataset()\n", "g = dataset[0].to(dev)\n", "\n", "# Create the sparse adjacency matrix A (note that W was used as the notation\n", "# for adjacency matrix in the original paper).\n", "indices = torch.stack(g.edges())\n", "N = g.num_nodes()\n", "A = dglsp.spmatrix(indices, shape=(N, N))\n", "\n", "# Calculate the graph convolution matrix.\n", "I = dglsp.identity(A.shape, device=dev)\n", "A_hat = A + I\n", "D_hat_invsqrt = dglsp.diag(A_hat.sum(dim=1)) ** -0.5\n", "A_hat = D_hat_invsqrt @ A_hat @ D_hat_invsqrt\n", "\n", "# 2-hop diffusion.\n", "r = 2\n", "X = g.ndata[\"feat\"]\n", "X_sign = sign_diffusion(A_hat, X, r)\n", "\n", "# Create SIGN model.\n", "in_size = X.shape[1]\n", "out_size = dataset.num_classes\n", "model = SIGN(in_size, out_size, r).to(dev)\n", "\n", "# Kick off training.\n", "train(model, g, X_sign)" ], "metadata": { "id": "58WnPtPvT2mx", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "19e86f6a-c7f1-4b40-8cfc-58a181fc30d7" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Downloading /root/.dgl/cora_v2.zip from https://data.dgl.ai/dataset/cora_v2.zip...\n", "Extracting file to /root/.dgl/cora_v2\n", "Finished data loading and preprocessing.\n", " NumNodes: 2708\n", " NumEdges: 10556\n", " NumFeats: 1433\n", " NumClasses: 7\n", " NumTrainingSamples: 140\n", " NumValidationSamples: 500\n", " NumTestSamples: 1000\n", "Done saving data into cached files.\n", "In epoch 0, loss: 1.946, val acc: 0.164, test acc: 0.200\n", "In epoch 1, loss: 1.937, val acc: 0.712, test acc: 0.690\n", "In epoch 2, loss: 1.926, val acc: 0.610, test acc: 0.595\n", "In epoch 3, loss: 1.914, val acc: 0.656, test acc: 0.640\n", "In epoch 4, loss: 1.898, val acc: 0.724, test acc: 0.726\n", "In epoch 5, loss: 1.880, val acc: 0.734, test acc: 0.753\n", "In epoch 6, loss: 1.859, val acc: 0.730, test acc: 0.746\n", "In epoch 7, loss: 1.834, val acc: 0.732, test acc: 0.743\n", "In epoch 8, loss: 1.807, val acc: 0.734, test acc: 0.746\n", "In epoch 9, loss: 1.776, val acc: 0.734, test acc: 0.745\n" ] } ] }, { "cell_type": "markdown", "source": [ "Check out the full example script [here](https://github.com/dmlc/dgl/blob/master/examples/sparse/sign.py). Learn more about how graph diffusion is used in other GNN models:\n", "\n", "* *Predict then Propagate: Graph Neural Networks meet Personalized PageRank* [paper](https://arxiv.org/abs/1810.05997) [code](https://github.com/dmlc/dgl/blob/master/examples/sparse/appnp.py)\n", "* *Combining Label Propagation and Simple Models Out-performs Graph Neural Networks* [paper](https://arxiv.org/abs/2010.13993) [code](https://github.com/dmlc/dgl/blob/master/examples/sparse/c_and_s.py)\n", "* *Simplifying Graph Convolutional Networks* [paper](https://arxiv.org/abs/1902.07153) [code](https://github.com/dmlc/dgl/blob/master/examples/sparse/sgc.py)\n", "* *Graph Neural Networks Inspired by Classical Iterative Algorithms* [paper](https://arxiv.org/pdf/2103.06064.pdf) [code](https://github.com/dmlc/dgl/blob/master/examples/sparse/twirls.py)" ], "metadata": { "id": "lI2Nms8PXq-y" } } ] } ================================================ FILE: notebooks/sparse/graph_transformer.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "id": "Jv-tHPvR-JKa" }, "source": [ "# Graph Transformer in a Nutshell\n", "\n", "The **Transformer** [(Vaswani et al. 2017)](https://proceedings.neurips.cc/paper/2017/hash/3f5ee243547dee91fbd053c1c4a845aa-Abstract.html) has been proven an effective learning architecture in natural language processing and computer vision.\n", "Recently, researchers turns to explore the application of transformer in graph learning. They have achieved inital success on many practical tasks, e.g., graph property prediction.\n", "[Dwivedi et al. (2020)](https://arxiv.org/abs/2012.09699) firstly generalize the transformer neural architecture to graph-structured data. Here, we present how to build such a graph transformer with DGL's sparse matrix APIs.\n", "\n", "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dmlc/dgl/blob/master/notebooks/sparse/graph_transformer.ipynb) [![GitHub](https://img.shields.io/badge/-View%20on%20GitHub-181717?logo=github&logoColor=ffffff)](https://github.com/dmlc/dgl/blob/master/notebooks/sparse/graph_transformer.ipynb)" ] }, { "cell_type": "code", "source": [ "# Install required packages.\n", "import os\n", "import torch\n", "os.environ['TORCH'] = torch.__version__\n", "os.environ['DGLBACKEND'] = \"pytorch\"\n", "\n", "# Uncomment below to install required packages. If the CUDA version is not 11.8,\n", "# check the https://www.dgl.ai/pages/start.html to find the supported CUDA\n", "# version and corresponding command to install DGL.\n", "#!pip install dgl -f https://data.dgl.ai/wheels/cu118/repo.html > /dev/null\n", "#!pip install ogb >/dev/null\n", "\n", "try:\n", " import dgl\n", " installed = True\n", "except ImportError:\n", " installed = False\n", "print(\"DGL installed!\" if installed else \"Failed to install DGL!\")" ], "metadata": { "id": "8wIJZQqODy-7" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "nOpFdtLI-JKb" }, "source": [ "## Sparse Multi-head Attention\n", "\n", "Recall the all-pairs scaled-dot-product attention mechanism in vanillar Transformer:\n", "\n", "$$\\text{Attn}=\\text{softmax}(\\dfrac{QK^T} {\\sqrt{d}})V,$$\n", "\n", "The graph transformer (GT) model employs a Sparse Multi-head Attention block:\n", "\n", "$$\\text{SparseAttn}(Q, K, V, A) = \\text{softmax}(\\frac{(QK^T) \\circ A}{\\sqrt{d}})V,$$\n", "\n", "where $Q, K, V ∈\\mathbb{R}^{N\\times d}$ are query feature, key feature, and value feature, respectively. $A\\in[0,1]^{N\\times N}$ is the adjacency matrix of the input graph. $(QK^T)\\circ A$ means that the multiplication of query matrix and key matrix is followed by a Hadamard product (or element-wise multiplication) with the sparse adjacency matrix as illustrated in the figure below:\n", "\n", "\n", "\n", "Essentially, only the attention scores between connected nodes are computed according to the sparsity of $A$. This operation is also called *Sampled Dense Dense Matrix Multiplication (SDDMM)*.\n", "\n", "Enjoying the [batched SDDMM API](https://docs.dgl.ai/en/latest/generated/dgl.sparse.bsddmm.html) in DGL, we can parallel the computation on multiple attention heads (different representation subspaces).\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "dh7zc5v0-JKb" }, "outputs": [], "source": [ "import dgl\n", "import dgl.nn as dglnn\n", "import dgl.sparse as dglsp\n", "import torch\n", "import torch.nn as nn\n", "import torch.nn.functional as F\n", "import torch.optim as optim\n", "\n", "from dgl.data import AsGraphPredDataset\n", "from dgl.dataloading import GraphDataLoader\n", "from ogb.graphproppred import collate_dgl, DglGraphPropPredDataset, Evaluator\n", "from ogb.graphproppred.mol_encoder import AtomEncoder\n", "from tqdm import tqdm\n", "\n", "\n", "class SparseMHA(nn.Module):\n", " \"\"\"Sparse Multi-head Attention Module\"\"\"\n", "\n", " def __init__(self, hidden_size=80, num_heads=8):\n", " super().__init__()\n", " self.hidden_size = hidden_size\n", " self.num_heads = num_heads\n", " self.head_dim = hidden_size // num_heads\n", " self.scaling = self.head_dim**-0.5\n", "\n", " self.q_proj = nn.Linear(hidden_size, hidden_size)\n", " self.k_proj = nn.Linear(hidden_size, hidden_size)\n", " self.v_proj = nn.Linear(hidden_size, hidden_size)\n", " self.out_proj = nn.Linear(hidden_size, hidden_size)\n", "\n", " def forward(self, A, h):\n", " N = len(h)\n", " # [N, dh, nh]\n", " q = self.q_proj(h).reshape(N, self.head_dim, self.num_heads)\n", " q *= self.scaling\n", " # [N, dh, nh]\n", " k = self.k_proj(h).reshape(N, self.head_dim, self.num_heads)\n", " # [N, dh, nh]\n", " v = self.v_proj(h).reshape(N, self.head_dim, self.num_heads)\n", "\n", " ######################################################################\n", " # (HIGHLIGHT) Compute the multi-head attention with Sparse Matrix API\n", " ######################################################################\n", " attn = dglsp.bsddmm(A, q, k.transpose(1, 0)) # (sparse) [N, N, nh]\n", " # Sparse softmax by default applies on the last sparse dimension.\n", " attn = attn.softmax() # (sparse) [N, N, nh]\n", " out = dglsp.bspmm(attn, v) # [N, dh, nh]\n", "\n", " return self.out_proj(out.reshape(N, -1))" ] }, { "cell_type": "markdown", "metadata": { "id": "3_Fm6Lrx-JKc" }, "source": [ "## Graph Transformer Layer\n", "\n", "The GT layer is composed of Multi-head Attention, Batch Norm, and Feed-forward Network, connected by residual links as in vanilla transformer.\n", "\n", "" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "M6h7JVWT-JKd" }, "outputs": [], "source": [ "class GTLayer(nn.Module):\n", " \"\"\"Graph Transformer Layer\"\"\"\n", "\n", " def __init__(self, hidden_size=80, num_heads=8):\n", " super().__init__()\n", " self.MHA = SparseMHA(hidden_size=hidden_size, num_heads=num_heads)\n", " self.batchnorm1 = nn.BatchNorm1d(hidden_size)\n", " self.batchnorm2 = nn.BatchNorm1d(hidden_size)\n", " self.FFN1 = nn.Linear(hidden_size, hidden_size * 2)\n", " self.FFN2 = nn.Linear(hidden_size * 2, hidden_size)\n", "\n", " def forward(self, A, h):\n", " h1 = h\n", " h = self.MHA(A, h)\n", " h = self.batchnorm1(h + h1)\n", "\n", " h2 = h\n", " h = self.FFN2(F.relu(self.FFN1(h)))\n", " h = h2 + h\n", "\n", " return self.batchnorm2(h)" ] }, { "cell_type": "markdown", "metadata": { "id": "t40DhVjI-JKd" }, "source": [ "## Graph Transformer Model\n", "\n", "The GT model is constructed by stacking GT layers. The input positional encoding of vanilla transformer is replaced with Laplacian positional encoding [(Dwivedi et al. 2020)](https://arxiv.org/abs/2003.00982). For the graph-level prediction task, an extra pooler is stacked on top of GT layers to aggregate node feature of the same graph." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "UrjvEBrF-JKe" }, "outputs": [], "source": [ "class GTModel(nn.Module):\n", " def __init__(\n", " self,\n", " out_size,\n", " hidden_size=80,\n", " pos_enc_size=2,\n", " num_layers=8,\n", " num_heads=8,\n", " ):\n", " super().__init__()\n", " self.atom_encoder = AtomEncoder(hidden_size)\n", " self.pos_linear = nn.Linear(pos_enc_size, hidden_size)\n", " self.layers = nn.ModuleList(\n", " [GTLayer(hidden_size, num_heads) for _ in range(num_layers)]\n", " )\n", " self.pooler = dglnn.SumPooling()\n", " self.predictor = nn.Sequential(\n", " nn.Linear(hidden_size, hidden_size // 2),\n", " nn.ReLU(),\n", " nn.Linear(hidden_size // 2, hidden_size // 4),\n", " nn.ReLU(),\n", " nn.Linear(hidden_size // 4, out_size),\n", " )\n", "\n", " def forward(self, g, X, pos_enc):\n", " indices = torch.stack(g.edges())\n", " N = g.num_nodes()\n", " A = dglsp.spmatrix(indices, shape=(N, N))\n", " h = self.atom_encoder(X) + self.pos_linear(pos_enc)\n", " for layer in self.layers:\n", " h = layer(A, h)\n", " h = self.pooler(g, h)\n", "\n", " return self.predictor(h)" ] }, { "cell_type": "markdown", "metadata": { "id": "RdrPU18I-JKe" }, "source": [ "## Training\n", "\n", "We train the GT model on [ogbg-molhiv](https://ogb.stanford.edu/docs/graphprop/#ogbg-mol) benchmark. The Laplacian positional encoding of each graph is pre-computed (with the API [here](https://docs.dgl.ai/en/latest/generated/dgl.laplacian_pe.html)) as part of the input to the model.\n", "\n", "*Note that we down-sample the dataset to make this demo runs faster. See the* [*example script*](https://github.com/dmlc/dgl/blob/master/examples/sparse/graph_transformer.py) *for the performance on the full dataset.*" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "V41i0w-9-JKe", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "15343d1a-a32d-4677-d053-d9da96910f43" }, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "Computing Laplacian PE: 1%| | 25/4000 [00:00<00:16, 244.77it/s]/usr/local/lib/python3.8/dist-packages/dgl/backend/pytorch/tensor.py:52: UserWarning: Casting complex values to real discards the imaginary part (Triggered internally at ../aten/src/ATen/native/Copy.cpp:250.)\n", " return th.as_tensor(data, dtype=dtype)\n", "Computing Laplacian PE: 100%|██████████| 4000/4000 [00:13<00:00, 296.04it/s]\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "Epoch: 000, Loss: 0.2486, Val: 0.3082, Test: 0.3068\n", "Epoch: 001, Loss: 0.1695, Val: 0.4684, Test: 0.4572\n", "Epoch: 002, Loss: 0.1428, Val: 0.5887, Test: 0.4721\n", "Epoch: 003, Loss: 0.1237, Val: 0.6375, Test: 0.5010\n", "Epoch: 004, Loss: 0.1127, Val: 0.6628, Test: 0.4854\n", "Epoch: 005, Loss: 0.1047, Val: 0.6811, Test: 0.4983\n", "Epoch: 006, Loss: 0.0949, Val: 0.6751, Test: 0.5409\n", "Epoch: 007, Loss: 0.0901, Val: 0.6340, Test: 0.5357\n", "Epoch: 008, Loss: 0.0811, Val: 0.6717, Test: 0.5543\n", "Epoch: 009, Loss: 0.0643, Val: 0.7861, Test: 0.5628\n", "Epoch: 010, Loss: 0.0489, Val: 0.7319, Test: 0.5341\n", "Epoch: 011, Loss: 0.0340, Val: 0.7884, Test: 0.5299\n", "Epoch: 012, Loss: 0.0285, Val: 0.5887, Test: 0.4293\n", "Epoch: 013, Loss: 0.0361, Val: 0.5514, Test: 0.3419\n", "Epoch: 014, Loss: 0.0451, Val: 0.6795, Test: 0.4964\n", "Epoch: 015, Loss: 0.0429, Val: 0.7405, Test: 0.5527\n", "Epoch: 016, Loss: 0.0331, Val: 0.7859, Test: 0.4994\n", "Epoch: 017, Loss: 0.0177, Val: 0.6544, Test: 0.4457\n", "Epoch: 018, Loss: 0.0201, Val: 0.8250, Test: 0.6073\n", "Epoch: 019, Loss: 0.0093, Val: 0.7356, Test: 0.5561\n" ] } ], "source": [ "@torch.no_grad()\n", "def evaluate(model, dataloader, evaluator, device):\n", " model.eval()\n", " y_true = []\n", " y_pred = []\n", " for batched_g, labels in dataloader:\n", " batched_g, labels = batched_g.to(device), labels.to(device)\n", " y_hat = model(batched_g, batched_g.ndata[\"feat\"], batched_g.ndata[\"PE\"])\n", " y_true.append(labels.view(y_hat.shape).detach().cpu())\n", " y_pred.append(y_hat.detach().cpu())\n", " y_true = torch.cat(y_true, dim=0).numpy()\n", " y_pred = torch.cat(y_pred, dim=0).numpy()\n", " input_dict = {\"y_true\": y_true, \"y_pred\": y_pred}\n", " return evaluator.eval(input_dict)[\"rocauc\"]\n", "\n", "\n", "def train(model, dataset, evaluator, device):\n", " train_dataloader = GraphDataLoader(\n", " dataset[dataset.train_idx],\n", " batch_size=256,\n", " shuffle=True,\n", " collate_fn=collate_dgl,\n", " )\n", " valid_dataloader = GraphDataLoader(\n", " dataset[dataset.val_idx], batch_size=256, collate_fn=collate_dgl\n", " )\n", " test_dataloader = GraphDataLoader(\n", " dataset[dataset.test_idx], batch_size=256, collate_fn=collate_dgl\n", " )\n", " optimizer = optim.Adam(model.parameters(), lr=0.001)\n", " num_epochs = 20\n", " scheduler = optim.lr_scheduler.StepLR(\n", " optimizer, step_size=num_epochs, gamma=0.5\n", " )\n", " loss_fcn = nn.BCEWithLogitsLoss()\n", "\n", " for epoch in range(num_epochs):\n", " model.train()\n", " total_loss = 0.0\n", " for batched_g, labels in train_dataloader:\n", " batched_g, labels = batched_g.to(device), labels.to(device)\n", " logits = model(\n", " batched_g, batched_g.ndata[\"feat\"], batched_g.ndata[\"PE\"]\n", " )\n", " loss = loss_fcn(logits, labels.float())\n", " total_loss += loss.item()\n", " optimizer.zero_grad()\n", " loss.backward()\n", " optimizer.step()\n", " scheduler.step()\n", " avg_loss = total_loss / len(train_dataloader)\n", " val_metric = evaluate(model, valid_dataloader, evaluator, device)\n", " test_metric = evaluate(model, test_dataloader, evaluator, device)\n", " print(\n", " f\"Epoch: {epoch:03d}, Loss: {avg_loss:.4f}, \"\n", " f\"Val: {val_metric:.4f}, Test: {test_metric:.4f}\"\n", " )\n", "\n", "\n", "# Training device.\n", "dev = torch.device(\"cpu\")\n", "# Uncomment the code below to train on GPU. Be sure to install DGL with CUDA support.\n", "#dev = torch.device(\"cuda:0\")\n", "\n", "# Load dataset.\n", "pos_enc_size = 8\n", "dataset = AsGraphPredDataset(\n", " DglGraphPropPredDataset(\"ogbg-molhiv\", \"./data/OGB\")\n", ")\n", "evaluator = Evaluator(\"ogbg-molhiv\")\n", "\n", "# Down sample the dataset to make the tutorial run faster.\n", "import random\n", "random.seed(42)\n", "train_size = len(dataset.train_idx)\n", "val_size = len(dataset.val_idx)\n", "test_size = len(dataset.test_idx)\n", "dataset.train_idx = dataset.train_idx[\n", " torch.LongTensor(random.sample(range(train_size), 2000))\n", "]\n", "dataset.val_idx = dataset.val_idx[\n", " torch.LongTensor(random.sample(range(val_size), 1000))\n", "]\n", "dataset.test_idx = dataset.test_idx[\n", " torch.LongTensor(random.sample(range(test_size), 1000))\n", "]\n", "\n", "# Laplacian positional encoding.\n", "indices = torch.cat([dataset.train_idx, dataset.val_idx, dataset.test_idx])\n", "for idx in tqdm(indices, desc=\"Computing Laplacian PE\"):\n", " g, _ = dataset[idx]\n", " g.ndata[\"PE\"] = dgl.laplacian_pe(g, k=pos_enc_size, padding=True)\n", "\n", "# Create model.\n", "out_size = dataset.num_tasks\n", "model = GTModel(out_size=out_size, pos_enc_size=pos_enc_size).to(dev)\n", "\n", "# Kick off training.\n", "train(model, dataset, evaluator, dev)" ] } ], "metadata": { "language_info": { "name": "python" }, "orig_nbformat": 4, "colab": { "provenance": [] }, "gpuClass": "standard", "kernelspec": { "name": "python3", "display_name": "Python 3" }, "accelerator": "GPU" }, "nbformat": 4, "nbformat_minor": 0 } ================================================ FILE: notebooks/sparse/hgnn.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "toc_visible": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "gpuClass": "standard", "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "source": [ "# Hypergraph Neural Networks\n", "\n", "This tutorial illustrates what is hypergraph and how to build a Hypergraph Neural Network using DGL's sparse matrix APIs.\n", "\n", "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dmlc/dgl/blob/master/notebooks/sparse/hgnn.ipynb) [![GitHub](https://img.shields.io/badge/-View%20on%20GitHub-181717?logo=github&logoColor=ffffff)](https://github.com/dmlc/dgl/blob/master/notebooks/sparse/hgnn.ipynb)" ], "metadata": { "id": "eiDu3XgReCt4" } }, { "cell_type": "code", "source": [ "# Install required packages.\n", "import os\n", "import torch\n", "os.environ['TORCH'] = torch.__version__\n", "os.environ['DGLBACKEND'] = \"pytorch\"\n", "\n", "# Uncomment below to install required packages. If the CUDA version is not 11.8,\n", "# check the https://www.dgl.ai/pages/start.html to find the supported CUDA\n", "# version and corresponding command to install DGL.\n", "#!pip install dgl -f https://data.dgl.ai/wheels/cu118/repo.html > /dev/null\n", "#!pip install torchmetrics > /dev/null\n", "\n", "try:\n", " import dgl\n", " installed = True\n", "except ImportError:\n", " installed = False\n", "print(\"DGL installed!\" if installed else \"Failed to install DGL!\")" ], "metadata": { "id": "__2tKqL0eaB0" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "## Hypergraphs\n", "\n", "A [hypergraph](https://en.wikipedia.org/wiki/Hypergraph) consists of *nodes* and *hyperedges*. Contrary to edges in graphs, a *hyperedge* can connect arbitrary number of nodes. For instance, the following figure shows a hypergraph with 11 nodes and 5 hyperedges drawn in different colors.\n", "![](https://data.dgl.ai/tutorial/img/hgnn/hypergraph4.PNG)\n", "\n", "Hypergraphs are particularly useful when the relationships between data points within the dataset is not binary. For instance, more than two products can be co-purchased together in an e-commerce system, so the relationship of co-purchase is $n$-ary rather than binary, and therefore it is better described as a hypergraph rather than a normal graph.\n", "\n", "A hypergraph is usually characterized by its *incidence matrix* $H$, whose rows represent nodes and columns represent hyperedges. An entry $H_{ij}$ is 1 if hyperedge $j$ includes node $i$, or 0 otherwise. For example, the hypergraph in the figure above can be characterized by a $11 \\times 5$ matrix as follows:\n", "\n", "$$\n", "H = \\begin{bmatrix}\n", "1 & 0 & 0 & 0 & 0 \\\\\n", "1 & 0 & 0 & 0 & 0 \\\\\n", "1 & 1 & 0 & 1 & 1 \\\\\n", "0 & 0 & 1 & 0 & 0 \\\\\n", "0 & 1 & 0 & 0 & 0 \\\\\n", "1 & 0 & 1 & 1 & 1 \\\\\n", "0 & 0 & 1 & 0 & 0 \\\\\n", "0 & 1 & 0 & 1 & 0 \\\\\n", "0 & 1 & 0 & 1 & 0 \\\\\n", "0 & 0 & 1 & 0 & 1 \\\\\n", "0 & 0 & 0 & 0 & 1 \\\\\n", "\\end{bmatrix}\n", "$$\n", "\n", "One can construct the hypergraph incidence matrix by specifying two tensors `nodes` and `hyperedges`, where the node ID `nodes[i]` belongs to the hyperedge ID `hyperedges[i]` for all `i`. In the case above, the incidence matrix can be constructed below.\n" ], "metadata": { "id": "unL_mAj-TqC6" } }, { "cell_type": "code", "source": [ "import dgl.sparse as dglsp\n", "import torch\n", "\n", "H = dglsp.spmatrix(\n", " torch.LongTensor([[0, 1, 2, 2, 2, 2, 3, 4, 5, 5, 5, 5, 6, 7, 7, 8, 8, 9, 9, 10],\n", " [0, 0, 0, 1, 3, 4, 2, 1, 0, 2, 3, 4, 2, 1, 3, 1, 3, 2, 4, 4]])\n", ")\n", "\n", "print(H.to_dense())" ], "metadata": { "id": "I_cExvtIJD1F", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "a1a576f6-1559-479c-9f3e-93e41a56833d" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "tensor([[1., 0., 0., 0., 0.],\n", " [1., 0., 0., 0., 0.],\n", " [1., 1., 0., 1., 1.],\n", " [0., 0., 1., 0., 0.],\n", " [0., 1., 0., 0., 0.],\n", " [1., 0., 1., 1., 1.],\n", " [0., 0., 1., 0., 0.],\n", " [0., 1., 0., 1., 0.],\n", " [0., 1., 0., 1., 0.],\n", " [0., 0., 1., 0., 1.],\n", " [0., 0., 0., 0., 1.]])\n" ] } ] }, { "cell_type": "markdown", "source": [ "The degree of a node in a hypergraph is defined as the number of hyperedges including the node. Similarly, the degree of a hyperedge in a hypergraph is defined as the number of nodes included by the hyperedge. In the example above, the hyperedge degrees can be computed by the sum of row vectors (i.e. all 4), while the node degree can be computed by the sum of column vectors." ], "metadata": { "id": "p-shCPQPHvBB" } }, { "cell_type": "code", "source": [ "node_degrees = H.sum(1)\n", "print(\"Node degrees\", node_degrees)\n", "\n", "hyperedge_degrees = H.sum(0)\n", "print(\"Hyperedge degrees\", hyperedge_degrees)" ], "metadata": { "id": "wjKm9gkTOnU9", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "ffe2c441-8c2c-48a7-cef2-4ef6e96548ec" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Node degrees tensor([1., 1., 4., 1., 1., 4., 1., 2., 2., 2., 1.])\n", "Hyperedge degrees tensor([4., 4., 4., 4., 4.])\n" ] } ] }, { "cell_type": "markdown", "source": [ "\n", "## Hypergraph Neural Network (HGNN) Layer\n", "\n", "The [HGNN layer](https://arxiv.org/pdf/1809.09401.pdf) is defined as:\n", "\n", "$$f(X^{(l)}, H; W^{(l)}) = \\sigma(L X^{(l)} W^{(l)})$$$$L = D_v^{-1/2} H B D_e^{-1} H^\\top D_v^{-1/2}$$\n", "\n", "where\n", "\n", "* $H \\in \\mathbb{R}^{N \\times M}$ is the incidence matrix of hypergraph with $N$ nodes and $M$ hyperedges.\n", "* $D_v \\in \\mathbb{R}^{N \\times N}$ is a diagonal matrix representing node degrees, whose $i$-th diagonal element is $\\sum_{j=1}^M H_{ij}$.\n", "* $D_e \\in \\mathbb{R}^{M \\times M}$ is a diagonal matrix representing hyperedge degrees, whose $j$-th diagonal element is $\\sum_{i=1}^N H_{ij}$.\n", "* $B \\in \\mathbb{R}^{M \\times M}$ is a diagonal matrix representing the hyperedge weights, whose $j$-th diagonal element is the weight of $j$-th hyperedge. In our example, $B$ is an identity matrix.\n", "\n", "The following code builds a two-layer HGNN." ], "metadata": { "id": "7kxrINkVHrAi" } }, { "cell_type": "code", "source": [ "import dgl.sparse as dglsp\n", "import torch\n", "import torch.nn as nn\n", "import torch.nn.functional as F\n", "import tqdm\n", "from dgl.data import CoraGraphDataset\n", "from torchmetrics.functional import accuracy\n", "\n", "\n", "class HGNN(nn.Module):\n", " def __init__(self, H, in_size, out_size, hidden_dims=16):\n", " super().__init__()\n", "\n", " self.W1 = nn.Linear(in_size, hidden_dims)\n", " self.W2 = nn.Linear(hidden_dims, out_size)\n", " self.dropout = nn.Dropout(0.5)\n", "\n", " ###########################################################\n", " # (HIGHLIGHT) Compute the Laplacian with Sparse Matrix API\n", " ###########################################################\n", " # Compute node degree.\n", " d_V = H.sum(1)\n", " # Compute edge degree.\n", " d_E = H.sum(0)\n", " # Compute the inverse of the square root of the diagonal D_v.\n", " D_v_invsqrt = dglsp.diag(d_V**-0.5)\n", " # Compute the inverse of the diagonal D_e.\n", " D_e_inv = dglsp.diag(d_E**-1)\n", " # In our example, B is an identity matrix.\n", " n_edges = d_E.shape[0]\n", " B = dglsp.identity((n_edges, n_edges))\n", " # Compute Laplacian from the equation above.\n", " self.L = D_v_invsqrt @ H @ B @ D_e_inv @ H.T @ D_v_invsqrt\n", "\n", " def forward(self, X):\n", " X = self.L @ self.W1(self.dropout(X))\n", " X = F.relu(X)\n", " X = self.L @ self.W2(self.dropout(X))\n", " return X" ], "metadata": { "id": "58WnPtPvT2mx" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "## Loading Data\n", "\n", "We use Cora citation network in our example. But instead of using the original \"cite\" relationship between papers, we consider the \"co-cite\" relationship between papers. We build a hypergraph from the original citation network where for each paper we construct a hyperedge that includes all the other papers it cited, as well as the paper itself.\n", "\n", "![](https://data.dgl.ai/tutorial/img/hgnn/equiv.PNG)\n", "\n", "Note that a hypergraph constructed this way has an incidence matrix exactly identical to the adjacency matrix of the original graph (plus an identity matrix for self-loops). This is because each hyperedge has a one-to-one correspondence to each paper. So we can directly take the graph's adjacency matrix and add an identity matrix to it, and we use it as the hypergraph's incidence matrix." ], "metadata": { "id": "bPrOHVaGwUD0" } }, { "cell_type": "code", "source": [ "def load_data():\n", " dataset = CoraGraphDataset()\n", "\n", " graph = dataset[0]\n", " indices = torch.stack(graph.edges())\n", " H = dglsp.spmatrix(indices)\n", " H = H + dglsp.identity(H.shape)\n", "\n", " X = graph.ndata[\"feat\"]\n", " Y = graph.ndata[\"label\"]\n", " train_mask = graph.ndata[\"train_mask\"]\n", " val_mask = graph.ndata[\"val_mask\"]\n", " test_mask = graph.ndata[\"test_mask\"]\n", " return H, X, Y, dataset.num_classes, train_mask, val_mask, test_mask" ], "metadata": { "id": "qI0j1J9pwTFg" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "## Training and Evaluation\n", "\n", "Now we can write the training and evaluation functions as follows." ], "metadata": { "id": "--rq1-r7wMST" } }, { "cell_type": "code", "source": [ "def train(model, optimizer, X, Y, train_mask):\n", " model.train()\n", " Y_hat = model(X)\n", " loss = F.cross_entropy(Y_hat[train_mask], Y[train_mask])\n", " optimizer.zero_grad()\n", " loss.backward()\n", " optimizer.step()\n", "\n", "\n", "def evaluate(model, X, Y, val_mask, test_mask, num_classes):\n", " model.eval()\n", " Y_hat = model(X)\n", " val_acc = accuracy(\n", " Y_hat[val_mask], Y[val_mask], task=\"multiclass\", num_classes=num_classes\n", " )\n", " test_acc = accuracy(\n", " Y_hat[test_mask],\n", " Y[test_mask],\n", " task=\"multiclass\",\n", " num_classes=num_classes,\n", " )\n", " return val_acc, test_acc\n", "\n", "\n", "H, X, Y, num_classes, train_mask, val_mask, test_mask = load_data()\n", "model = HGNN(H, X.shape[1], num_classes)\n", "optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n", "\n", "with tqdm.trange(500) as tq:\n", " for epoch in tq:\n", " train(model, optimizer, X, Y, train_mask)\n", " val_acc, test_acc = evaluate(\n", " model, X, Y, val_mask, test_mask, num_classes\n", " )\n", " tq.set_postfix(\n", " {\n", " \"Val acc\": f\"{val_acc:.5f}\",\n", " \"Test acc\": f\"{test_acc:.5f}\",\n", " },\n", " refresh=False,\n", " )\n", "\n", "print(f\"Test acc: {test_acc:.3f}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "IfEc6JRXwHPt", "outputId": "0172578a-6a1b-49eb-adcb-77ee1a949186" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Downloading /root/.dgl/cora_v2.zip from https://data.dgl.ai/dataset/cora_v2.zip...\n", "Extracting file to /root/.dgl/cora_v2\n", "Finished data loading and preprocessing.\n", " NumNodes: 2708\n", " NumEdges: 10556\n", " NumFeats: 1433\n", " NumClasses: 7\n", " NumTrainingSamples: 140\n", " NumValidationSamples: 500\n", " NumTestSamples: 1000\n", "Done saving data into cached files.\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "100%|██████████| 500/500 [00:57<00:00, 8.70it/s, Val acc=0.77800, Test acc=0.78100]" ] }, { "output_type": "stream", "name": "stdout", "text": [ "Test acc: 0.781\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "\n" ] } ] }, { "cell_type": "markdown", "source": [ "For the complete example of HGNN, please refer to [here](https://github.com/dmlc/dgl/blob/master/examples/sparse/hgnn.py)." ], "metadata": { "id": "59pCzjpBOyEW" } } ] } ================================================ FILE: notebooks/sparse/quickstart.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "private_outputs": true, "toc_visible": true, "gpuType": "T4" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "source": [ "# Quickstart\n", "\n", "The tutorial provides a quick walkthrough of the classes and operators provided by the `dgl.sparse` package.\n", "\n", "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dmlc/dgl/blob/master/notebooks/sparse/quickstart.ipynb) [![GitHub](https://img.shields.io/badge/-View%20on%20GitHub-181717?logo=github&logoColor=ffffff)](https://github.com/dmlc/dgl/blob/master/notebooks/sparse/quickstart.ipynb)" ], "metadata": { "id": "E0DAKDMuWz7I" } }, { "cell_type": "code", "source": [ "# Install the required packages.\n", "\n", "import os\n", "# Uncomment following commands to download Pytorch and DGL\n", "# !pip install torch==2.0.0+cpu torchvision==0.15.1+cpu torchaudio==2.0.1 --index-url https://download.pytorch.org/whl/cpu > /dev/null\n", "# !pip install dgl==1.1.0 -f https://data.dgl.ai/wheels/repo.html > /dev/null\n", "import torch\n", "os.environ['TORCH'] = torch.__version__\n", "os.environ['DGLBACKEND'] = \"pytorch\"\n", "\n", "\n", "try:\n", " import dgl.sparse as dglsp\n", " installed = True\n", "except ImportError:\n", " installed = False\n", "print(\"DGL installed!\" if installed else \"DGL not found!\")" ], "metadata": { "id": "19UZd7wyWzpT" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "## Sparse Matrix\n", "\n", "The core abstraction of DGL's sparse package is the `SparseMatrix` class. Compared with other sparse matrix libraries (such as `scipy.sparse` and `torch.sparse`), DGL's `SparseMatrix` is specialized for the deep learning workloads on structure data (e.g., Graph Neural Networks), with the following features:\n", "\n", "* **Auto sparse format.** Don't bother choosing between different sparse formats. There is only one `SparseMatrix` and it will select the best format for the operation to be performed.\n", "* **Non-zero elements can be scalar or vector.** Easy for modeling relations (e.g., edges) by vector representation.\n", "* **Fully PyTorch compatible.** The package is built upon PyTorch and is natively compatible with other tools in the PyTorch ecosystem.\n" ], "metadata": { "id": "GsWoAGC4RpHw" } }, { "cell_type": "markdown", "source": [ "### Creating a DGL Sparse Matrix\n", "\n", "The simplest way to create a sparse matrix is using the `spmatrix` API by providing the indices of the non-zero elements. The indices are stored in a tensor of shape `(2, nnz)`, where the `i`-th non-zero element is stored at position `(indices[0][i], indices[1][i])`. The code below creates a 3x3 sparse matrix.\n" ], "metadata": { "id": "_q4HYodcWenB" } }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "h-ryVEs1PuIP" }, "outputs": [], "source": [ "import torch\n", "import dgl.sparse as dglsp\n", "\n", "i = torch.tensor([[1, 1, 2],\n", " [0, 2, 0]])\n", "A = dglsp.spmatrix(i) # 1.0 is default value for nnz elements.\n", "\n", "print(A)\n", "print(\"\")\n", "print(\"In dense format:\")\n", "print(A.to_dense())" ] }, { "cell_type": "markdown", "source": [ "If not specified, the shape is inferred automatically from the indices but you can specify it explicitly too." ], "metadata": { "id": "W1JJg-eZ7K3t" } }, { "cell_type": "code", "source": [ "i = torch.tensor([[0, 0, 1],\n", " [0, 2, 0]])\n", "\n", "A1 = dglsp.spmatrix(i)\n", "print(f\"Implicit Shape: {A1.shape}\")\n", "print(A1.to_dense())\n", "print(\"\")\n", "\n", "A2 = dglsp.spmatrix(i, shape=(3, 3))\n", "print(f\"Explicit Shape: {A2.shape}\")\n", "print(A2.to_dense())" ], "metadata": { "id": "80NNSQfd7L5V" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Both scalar values and vector values can be set for nnz elements in Sparse Matrix." ], "metadata": { "id": "zdNgUf0ShfCe" } }, { "cell_type": "code", "source": [ "i = torch.tensor([[1, 1, 2],\n", " [0, 2, 0]])\n", "# The length of the value should match the nnz elements represented by the\n", "# sparse matrix format.\n", "scalar_val = torch.tensor([1., 2., 3.])\n", "vector_val = torch.tensor([[1., 1.], [2., 2.], [3., 3.]])\n", "\n", "print(\"-----Scalar Values-----\")\n", "A = dglsp.spmatrix(i, scalar_val)\n", "print(A)\n", "print(\"\")\n", "print(\"In dense format:\")\n", "print(A.to_dense())\n", "print(\"\")\n", "\n", "print(\"-----Vector Values-----\")\n", "A = dglsp.spmatrix(i, vector_val)\n", "print(A)\n", "print(\"\")\n", "print(\"In dense format:\")\n", "print(A.to_dense())" ], "metadata": { "id": "buE9ZkKvhp1f" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "*Duplicated indices*" ], "metadata": { "id": "7ufTCDAVsrmP" } }, { "cell_type": "code", "source": [ "i = torch.tensor([[0, 0, 0, 1],\n", " [0, 2, 2, 0]])\n", "val = torch.tensor([1., 2., 3., 4])\n", "A = dglsp.spmatrix(i, val)\n", "print(A)\n", "print(f\"Whether A contains duplicate indices: {A.has_duplicate()}\")\n", "print(\"\")\n", "\n", "B = A.coalesce()\n", "print(B)\n", "print(f\"Whether B contains duplicate indices: {B.has_duplicate()}\")" ], "metadata": { "id": "ilSAlFLOs0o8" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "**val_like**\n", "\n", "You can create a new sparse matrix by retaining the non-zero indices of a given sparse matrix but with different non-zero values." ], "metadata": { "id": "ZJ09qM5NaxuI" } }, { "cell_type": "code", "source": [ "i = torch.tensor([[1, 1, 2],\n", " [0, 2, 0]])\n", "val = torch.tensor([1., 2., 3.])\n", "A = dglsp.spmatrix(i, val)\n", "\n", "new_val = torch.tensor([4., 5., 6.])\n", "B = dglsp.val_like(A, new_val)\n", "print(B)" ], "metadata": { "id": "UB3lKJVBbsUD" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "**Create a sparse matrix from various sparse formats**\n", "\n", "* `from_coo()`: Create a sparse matrix from [COO](https://en.wikipedia.org/wiki/Sparse_matrix#Coordinate_list_(COO)) format.\n", "* `from_csr()`: Create a sparse matrix from [CSR](https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_row_(CSR,_CRS_or_Yale_format)) format.\n", "* `from_csc()`: Create a sparse matrix from [CSC](https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_column_(CSC_or_CCS)) format." ], "metadata": { "id": "nWjBSFDBXDPJ" } }, { "cell_type": "code", "source": [ "row = torch.tensor([0, 1, 2, 2, 2])\n", "col = torch.tensor([1, 2, 0, 1, 2])\n", "\n", "print(\"-----Create from COO format-----\")\n", "A = dglsp.from_coo(row, col)\n", "print(A)\n", "print(\"\")\n", "print(\"In dense format:\")\n", "print(A.to_dense())\n", "print(\"\")\n", "\n", "indptr = torch.tensor([0, 1, 2, 5])\n", "indices = torch.tensor([1, 2, 0, 1, 2])\n", "\n", "print(\"-----Create from CSR format-----\")\n", "A = dglsp.from_csr(indptr, indices)\n", "print(A)\n", "print(\"\")\n", "print(\"In dense format:\")\n", "print(A.to_dense())\n", "print(\"\")\n", "\n", "print(\"-----Create from CSC format-----\")\n", "B = dglsp.from_csc(indptr, indices)\n", "print(B)\n", "print(\"\")\n", "print(\"In dense format:\")\n", "print(B.to_dense())" ], "metadata": { "id": "3puXyMFsvdlj" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "### Attributes and methods of a DGL Sparse Matrix" ], "metadata": { "id": "nd4hJ9ysd4St" } }, { "cell_type": "code", "source": [ "i = torch.tensor([[0, 1, 1, 2],\n", " [1, 0, 2, 0]])\n", "val = torch.tensor([1., 2., 3., 4.])\n", "A = dglsp.spmatrix(i, val)\n", "\n", "print(f\"Shape of sparse matrix: {A.shape}\")\n", "print(f\"The number of nonzero elements of sparse matrix: {A.nnz}\")\n", "print(f\"Datatype of sparse matrix: {A.dtype}\")\n", "print(f\"Device sparse matrix is stored on: {A.device}\")\n", "print(f\"Get the values of the nonzero elements: {A.val}\")\n", "print(f\"Get the row indices of the nonzero elements: {A.row}\")\n", "print(f\"Get the column indices of the nonzero elements: {A.col}\")\n", "print(f\"Get the coordinate (COO) representation: {A.coo()}\")\n", "print(f\"Get the compressed sparse row (CSR) representation: {A.csr()}\")\n", "print(f\"Get the compressed sparse column (CSC) representation: {A.csc()}\")" ], "metadata": { "id": "OKbFiWKIzZVe" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "**dtype and/or device conversion**" ], "metadata": { "id": "VzosM7i3yQPK" } }, { "cell_type": "code", "source": [ "i = torch.tensor([[0, 1, 1, 2],\n", " [1, 0, 2, 0]])\n", "val = torch.tensor([1., 2., 3., 4.])\n", "A = dglsp.spmatrix(i, val)\n", "\n", "B = A.to(device='cpu', dtype=torch.int32)\n", "print(f\"Device sparse matrix is stored on: {B.device}\")\n", "print(f\"Datatype of sparse matrix: {B.dtype}\")" ], "metadata": { "id": "y_RJihw-ypXp" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Similar to pytorch, we also provide various fine-grained APIs ([Doc](https://docs.dgl.ai/en/latest/api/python/dgl.sparse_v0.html)) for dtype and/or device conversion." ], "metadata": { "id": "U26arLlJzfkN" } }, { "cell_type": "markdown", "source": [ "## Diagonal Matrix\n", "\n", "Diagonal Matrix is a special type of Sparse Matrix, in which the entries outside the main diagonal are all zero.\n", "\n", "\n" ], "metadata": { "id": "EFe9ABRuWHqf" } }, { "cell_type": "markdown", "source": [ "### Initializing a DGL Diagonal Sparse Matrix\n", "A DGL Diagonal Sparse Matrix can be initiate by `dglsp.diag()`.\n", "\n", "Identity Matrix is a special type of Diagonal Sparse Matrix, in which all the value on the diagonal are 1.0. Use `dglsp.identity()` to initiate a Diagonal Sparse Matrix." ], "metadata": { "id": "1CeCoE2Fgl_x" } }, { "cell_type": "code", "source": [ "val = torch.tensor([1., 2., 3., 4.])\n", "D = dglsp.diag(val)\n", "print(D)\n", "\n", "I = dglsp.identity(shape=(3, 3))\n", "print(I)" ], "metadata": { "id": "9wzJNApahXAR" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "## Operations on Sparse Matrix\n", "* Elementwise operations\n", " * `A + B`\n", " * `A - B`\n", " * `A * B`\n", " * `A / B`\n", " * `A ** scalar`\n", "* Broadcast operations\n", " * `sp__v()`\n", "* Reduce operations\n", " * `reduce()`\n", " * `sum()`\n", " * `smax()`\n", " * `smin()`\n", " * `smean()`\n", "* Matrix transformations\n", " * `SparseMatrix.transpose()` or `SparseMatrix.T`\n", " * `SparseMatrix.neg()`\n", " * `SparseMatrix.inv()`\n", "* Matrix multiplication\n", " * `matmul()`\n", " * `sddmm()`\n", "\n", "\n", "*We are using dense format to print sparse matrix in this tutorial since it is more intuitive to read.*" ], "metadata": { "id": "Tjsapqp6zSFR" } }, { "cell_type": "markdown", "source": [ "### *Elementwise operations*" ], "metadata": { "id": "psvGwcIqYvC2" } }, { "cell_type": "markdown", "source": [ "**add(A, B), equivalent to A + B**\n", "\n", "Element-wise addition on two sparse matrices, returning a sparse matrix." ], "metadata": { "id": "39YJitpW-K9v" } }, { "cell_type": "code", "source": [ "i = torch.tensor([[1, 1, 2],\n", " [0, 2, 0]])\n", "val = torch.tensor([1., 2., 3.])\n", "A1 = dglsp.spmatrix(i, val, shape=(3, 3))\n", "print(\"A1:\")\n", "print(A1.to_dense())\n", "\n", "i = torch.tensor([[0, 1, 2],\n", " [0, 2, 1]])\n", "val = torch.tensor([4., 5., 6.])\n", "A2 = dglsp.spmatrix(i, val, shape=(3, 3))\n", "print(\"A2:\")\n", "print(A2.to_dense())\n", "\n", "val = torch.tensor([-1., -2., -3.])\n", "D1 = dglsp.diag(val)\n", "print(\"D1:\")\n", "print(D1.to_dense())\n", "\n", "val = torch.tensor([-4., -5., -6.])\n", "D2 = dglsp.diag(val)\n", "print(\"D2:\")\n", "print(D2.to_dense())\n", "\n", "print(\"A1 + A2:\")\n", "print((A1 + A2).to_dense())\n", "\n", "print(\"A1 + D1:\")\n", "print((A1 + D1).to_dense())\n", "\n", "print(\"D1 + D2:\")\n", "print((D1 + D2).to_dense())" ], "metadata": { "id": "pj3Ckx41-BSu" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "**sub(A, B), equivalent to A - B**\n", "\n", "Element-wise substraction on two sparse matrices, returning a sparse matrix." ], "metadata": { "id": "i25N0JHUTUX9" } }, { "cell_type": "code", "source": [ "i = torch.tensor([[1, 1, 2],\n", " [0, 2, 0]])\n", "val = torch.tensor([1., 2., 3.])\n", "A1 = dglsp.spmatrix(i, val, shape=(3, 3))\n", "print(\"A1:\")\n", "print(A1.to_dense())\n", "\n", "i = torch.tensor([[0, 1, 2],\n", " [0, 2, 1]])\n", "val = torch.tensor([4., 5., 6.])\n", "A2 = dglsp.spmatrix(i, val, shape=(3, 3))\n", "print(\"A2:\")\n", "print(A2.to_dense())\n", "\n", "val = torch.tensor([-1., -2., -3.])\n", "D1 = dglsp.diag(val)\n", "print(\"D1:\")\n", "print(D1.to_dense())\n", "\n", "val = torch.tensor([-4., -5., -6.])\n", "D2 = dglsp.diag(val)\n", "print(\"D2:\")\n", "print(D2.to_dense())\n", "\n", "print(\"A1 - A2:\")\n", "print((A1 - A2).to_dense())\n", "\n", "print(\"A1 - D1:\")\n", "print((A1 - D1).to_dense())\n", "\n", "print(\"D1 - A1:\")\n", "print((D1 - A1).to_dense())\n", "\n", "print(\"D1 - D2:\")\n", "print((D1 - D2).to_dense())" ], "metadata": { "id": "GMxfz-cyT129" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "**mul(A, B), equivalent to A * B**\n", "\n", "Element-wise multiplication on two sparse matrices or on a sparse matrix and a scalar, returning a sparse matrix." ], "metadata": { "id": "bg45jnq8T9EJ" } }, { "cell_type": "code", "source": [ "i = torch.tensor([[1, 1, 2],\n", " [0, 2, 0]])\n", "val = torch.tensor([1., 2., 3.])\n", "A1 = dglsp.spmatrix(i, val, shape=(3, 3))\n", "print(\"A1:\")\n", "print(A1.to_dense())\n", "\n", "i = torch.tensor([[0, 1, 2, 2],\n", " [0, 2, 0, 1]])\n", "val = torch.tensor([1., 2., 3., 4.])\n", "A2 = dglsp.spmatrix(i, val, shape=(3, 3))\n", "\n", "print(\"A2:\")\n", "print(A2.to_dense())\n", "\n", "print(\"A1 * 3:\")\n", "print((A1 * 3).to_dense())\n", "print(\"3 * A1:\")\n", "print((3 * A1).to_dense())\n", "\n", "print(\"A1 * A2\")\n", "print((A1 * A2).to_dense())\n", "\n", "val = torch.tensor([-1., -2., -3.])\n", "D1 = dglsp.diag(val)\n", "print(\"D1:\")\n", "print(D1.to_dense())\n", "\n", "print(\"D1 * A2\")\n", "print((D1 * A2).to_dense())\n", "\n", "val = torch.tensor([-4., -5., -6.])\n", "D2 = dglsp.diag(val)\n", "print(\"D2:\")\n", "print(D2.to_dense())\n", "\n", "print(\"D1 * -2:\")\n", "print((D1 * -2).to_dense())\n", "print(\"-2 * D1:\")\n", "print((-2 * D1).to_dense())\n", "\n", "print(\"D1 * D2:\")\n", "print((D1 * D2).to_dense())" ], "metadata": { "id": "4PAITJqHUB8J" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "**div(A, B), equivalent to A / B**\n", "\n", "Element-wise multiplication on two sparse matrices or on a sparse matrix and a scalar, returning a sparse matrix. If both `A` and `B` are sparse matrices, both of them must have the same sparsity. And the returned matrix has the same order of non-zero entries as `A`." ], "metadata": { "id": "Xb2RU6H4UBCs" } }, { "cell_type": "code", "source": [ "i = torch.tensor([[1, 1, 2],\n", " [0, 2, 0]])\n", "val = torch.tensor([1., 2., 3.])\n", "A1 = dglsp.spmatrix(i, val, shape=(3, 3))\n", "print(\"A1:\")\n", "print(A1.to_dense())\n", "\n", "i = torch.tensor([[1, 2, 1],\n", " [0, 0, 2]])\n", "val = torch.tensor([1., 3., 2.])\n", "A2 = dglsp.spmatrix(i, val, shape=(3, 3))\n", "\n", "print(\"A1 / 2:\")\n", "print((A1 / 2).to_dense())\n", "\n", "print(\"A1 / A2\")\n", "print((A1 / A2).to_dense())\n", "\n", "val = torch.tensor([-1., -2., -3.])\n", "D1 = dglsp.diag(val)\n", "print(\"D1:\")\n", "print(D1.to_dense())\n", "\n", "val = torch.tensor([-4., -5., -6.])\n", "D2 = dglsp.diag(val)\n", "print(\"D2:\")\n", "print(D2.to_dense())\n", "\n", "print(\"D1 / D2:\")\n", "print((D1 / D2).to_dense())\n", "\n", "print(\"D1 / 2:\")\n", "print((D1 / 2).to_dense())" ], "metadata": { "id": "TFB_UcmEUdr3" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "**power(A, B), equivalent to A \\*\\* B**\n", "\n", "Element-wise power of a sparse matrix and a scalar, returning a sparse matrix." ], "metadata": { "id": "2lZbyTYUUgSi" } }, { "cell_type": "code", "source": [ "i = torch.tensor([[1, 1, 2],\n", " [0, 2, 0]])\n", "val = torch.tensor([1., 2., 3.])\n", "A = dglsp.spmatrix(i, val, shape=(3, 3))\n", "print(\"A:\")\n", "print(A.to_dense())\n", "\n", "print(\"A ** 3:\")\n", "print((A ** 3).to_dense())\n", "\n", "val = torch.tensor([-1., -2., -3.])\n", "D = dglsp.diag(val)\n", "print(\"D:\")\n", "print(D.to_dense())\n", "\n", "print(\"D1 ** 2:\")\n", "print((D1 ** 2).to_dense())" ], "metadata": { "id": "ox-XxCnuUqAy" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "### *Broadcast operations*" ], "metadata": { "id": "VXBz4j5x_wQ4" } }, { "cell_type": "markdown", "source": [ "**sp_\\_v(A, v)**\n", "\n", "Broadcast operations on a sparse matrix and a vector, returning a sparse matrix. `v` is broadcasted to the shape of `A` and then the operator is applied on the non-zero values of `A`. `` can be add, sub, mul, and div. \n", "\n", "There are two cases regarding the shape of `v`:\n", "\n", "1. `v` is a vector of shape `(1, A.shape[1])` or `(A.shape[1])`. In this case, `v` is broadcasted on the row dimension of `A`.\n", "\n", "2. `v` is a vector of shape `(A.shape[0], 1)`. In this case, `v` is broadcasted on the column dimension of `A`." ], "metadata": { "id": "PtnyZdXHAZ6Z" } }, { "cell_type": "code", "source": [ "i = torch.tensor([[1, 0, 2], [0, 3, 2]])\n", "val = torch.tensor([10, 20, 30])\n", "A = dglsp.spmatrix(i, val, shape=(3, 4))\n", "\n", "v1 = torch.tensor([1, 2, 3, 4])\n", "print(\"A:\")\n", "print(A.to_dense())\n", "\n", "print(\"v1:\")\n", "print(v1)\n", "\n", "print(\"sp_add_v(A, v1)\")\n", "print(dglsp.sp_add_v(A, v1).to_dense())\n", "\n", "v2 = v1.reshape(1, -1)\n", "print(\"v2:\")\n", "print(v2)\n", "\n", "print(\"sp_add_v(A, v2)\")\n", "print(dglsp.sp_add_v(A, v2).to_dense())\n", "\n", "v3 = torch.tensor([1, 2, 3]).reshape(-1, 1)\n", "print(\"v3:\")\n", "print(v3)\n", "\n", "print(\"sp_add_v(A, v3)\")\n", "print(dglsp.sp_add_v(A, v3).to_dense())" ], "metadata": { "id": "xxf3s-uWBRR7" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "### *Reduce operations*\n", "\n", "All DGL sparse reduce operations only consider non-zero elements. To distinguish them from dense PyTorch reduce operations that consider zero elements, we use name `smax`, `smin` and `smean` (`s` stands for sparse)." ], "metadata": { "id": "TQJJlctZjYPv" } }, { "cell_type": "code", "source": [ "i = torch.tensor([[0, 1, 1, 2],\n", " [1, 0, 2, 0]])\n", "val = torch.tensor([1., 2., 3., 4.])\n", "A = dglsp.spmatrix(i, val)\n", "print(A.T.to_dense())\n", "print(\"\")\n", "\n", "# O1, O2 will have the same value.\n", "O1 = A.reduce(0, 'sum')\n", "O2 = A.sum(0)\n", "print(\"Reduce with reducer:sum along dim = 0:\")\n", "print(O1)\n", "print(\"\")\n", "\n", "# O3, O4 will have the same value.\n", "O3 = A.reduce(0, 'smax')\n", "O4 = A.smax(0)\n", "print(\"Reduce with reducer:max along dim = 0:\")\n", "print(O3)\n", "print(\"\")\n", "\n", "# O5, O6 will have the same value.\n", "O5 = A.reduce(0, 'smin')\n", "O6 = A.smin(0)\n", "print(\"Reduce with reducer:min along dim = 0:\")\n", "print(O5)\n", "print(\"\")\n", "\n", "# O7, O8 will have the same value.\n", "O7 = A.reduce(0, 'smean')\n", "O8 = A.smean(0)\n", "print(\"Reduce with reducer:smean along dim = 0:\")\n", "print(O7)\n", "print(\"\")" ], "metadata": { "id": "GhS49Js1jW4b" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "### *Matrix transformations*" ], "metadata": { "id": "kanwnB7LOQui" } }, { "cell_type": "markdown", "source": [ "*Sparse Matrix*" ], "metadata": { "id": "NiiXso9elM2p" } }, { "cell_type": "code", "source": [ "i = torch.tensor([[0, 1, 1, 2],\n", " [1, 0, 2, 0]])\n", "val = torch.tensor([1., 2., 3., 4.])\n", "A = dglsp.spmatrix(i, val)\n", "print(A.to_dense())\n", "print(\"\")\n", "\n", "print(\"Get transpose of sparse matrix.\")\n", "print(A.T.to_dense())\n", "# Alias\n", "# A.transpose()\n", "# A.t()\n", "print(\"\")\n", "\n", "print(\"Get a sparse matrix with the negation of the original nonzero values.\")\n", "print(A.neg().to_dense())\n", "print(\"\")" ], "metadata": { "id": "qJcmZHmf-oTY" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "### *Matrix multiplication*" ], "metadata": { "id": "4uQlDFb0Uzto" } }, { "cell_type": "markdown", "source": [ "**matmul(A, B), equivalent to A @ B**\n", "\n", "Matrix multiplication on sparse matrices and/or dense matrix. There are two cases as follows." ], "metadata": { "id": "THWE30v6WpAk" } }, { "cell_type": "markdown", "source": [ "**SparseMatrix @ SparseMatrix -> SparseMatrix:**\n", "\n", "For a $L \\times M$ sparse matrix A and a $M \\times N$ sparse matrix B, the shape of `A @ B` will be $L \\times N$ sparse matrix." ], "metadata": { "id": "VxyykR-vX7lF" } }, { "cell_type": "code", "source": [ "i = torch.tensor([[1, 1, 2],\n", " [0, 2, 0]])\n", "val = torch.tensor([1., 2., 3.])\n", "A1 = dglsp.spmatrix(i, val, shape=(3, 3))\n", "print(\"A1:\")\n", "print(A1.to_dense())\n", "\n", "i = torch.tensor([[0, 1, 2],\n", " [0, 2, 1]])\n", "val = torch.tensor([4., 5., 6.])\n", "A2 = dglsp.spmatrix(i, val, shape=(3, 3))\n", "print(\"A2:\")\n", "print(A2.to_dense())\n", "\n", "val = torch.tensor([-1., -2., -3.])\n", "D1 = dglsp.diag(val)\n", "print(\"D1:\")\n", "print(D1.to_dense())\n", "\n", "val = torch.tensor([-4., -5., -6.])\n", "D2 = dglsp.diag(val)\n", "print(\"D2:\")\n", "print(D2.to_dense())\n", "\n", "print(\"A1 @ A2:\")\n", "print((A1 @ A2).to_dense())\n", "\n", "print(\"A1 @ D1:\")\n", "print((A1 @ D1).to_dense())\n", "\n", "print(\"D1 @ A1:\")\n", "print((D1 @ A1).to_dense())\n", "\n", "print(\"D1 @ D2:\")\n", "print((D1 @ D2).to_dense())" ], "metadata": { "id": "XRDFC2rOYQM4" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "**SparseMatrix @ Tensor -> Tensor:**\n", "\n", "For a $L \\times M$ sparse matrix A and a $M \\times N$ dense matrix B, the shape of `A @ B` will be $L \\times N$ dense matrix." ], "metadata": { "id": "g13fG8nvaVOt" } }, { "cell_type": "code", "source": [ "i = torch.tensor([[1, 1, 2],\n", " [0, 2, 0]])\n", "val = torch.tensor([1., 2., 3.])\n", "A = dglsp.spmatrix(i, val, shape=(3, 3))\n", "print(\"A:\")\n", "print(A.to_dense())\n", "\n", "val = torch.tensor([-1., -2., -3.])\n", "D = dglsp.diag(val)\n", "print(\"D:\")\n", "print(D.to_dense())\n", "\n", "X = torch.tensor([[11., 22.], [33., 44.], [55., 66.]])\n", "print(\"X:\")\n", "print(X)\n", "\n", "print(\"A @ X:\")\n", "print(A @ X)\n", "\n", "print(\"D @ X:\")\n", "print(D @ X)" ], "metadata": { "id": "FcQ-CnqdlgWF" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "This operator also supports batched sparse-dense matrix multiplication. The sparse matrix A should have shape $L \\times M$, where the non-zero values are vectors of length $K$. The dense matrix B should have shape $M \\times N \\times K$. The output is a dense matrix of shape $L \\times N \\times K$." ], "metadata": { "id": "_KZiULLbmEZE" } }, { "cell_type": "code", "source": [ "i = torch.tensor([[1, 1, 2],\n", " [0, 2, 0]])\n", "val = torch.tensor([[1., 1.], [2., 2.], [3., 3.]])\n", "A = dglsp.spmatrix(i, val, shape=(3, 3))\n", "print(\"A:\")\n", "print(A.to_dense())\n", "\n", "X = torch.tensor([[[1., 1.], [1., 2.]],\n", " [[1., 3.], [1., 4.]],\n", " [[1., 5.], [1., 6.]]])\n", "print(\"X:\")\n", "print(X)\n", "\n", "print(\"A @ X:\")\n", "print(A @ X)" ], "metadata": { "id": "ZUzXQk7Ab2wG" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "**Sampled-Dense-Dense Matrix Multiplication (SDDMM)**\n", "\n", "``sddmm`` matrix-multiplies two dense matrices X1 and X2, then elementwise-multiplies the result with sparse matrix A at the nonzero locations. This is designed for sparse matrix with scalar values.\n", "\n", "$$out = (X_1 @ X_2) * A$$\n", "\n", "For a $L \\times N$ sparse matrix A, a $L \\times M$ dense matrix X1 and a $M \\times N$ dense matrix X2, `sddmm(A, X1, X2)` will be a $L \\times N$ sparse matrix." ], "metadata": { "id": "qO_8f_vhPKtf" } }, { "cell_type": "code", "source": [ "i = torch.tensor([[1, 1, 2],\n", " [2, 3, 3]])\n", "val = torch.tensor([1., 2., 3.])\n", "A = dglsp.spmatrix(i, val, (3, 4))\n", "print(\"A:\")\n", "print(A.to_dense())\n", "\n", "X1 = torch.randn(3, 5)\n", "X2 = torch.randn(5, 4)\n", "print(\"X1:\")\n", "print(X1)\n", "print(\"X2:\")\n", "print(X2)\n", "\n", "O = dglsp.sddmm(A, X1, X2)\n", "print(\"dglsp.sddmm(A, X1, X2):\")\n", "print(O.to_dense())" ], "metadata": { "id": "3ZIFV0TgPhwH" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "This operator also supports batched sampled-dense-dense matrix multiplication. For a $L \\times N$ sparse matrix A with non-zero vector values of length $𝐾$, a $L \\times M \\times K$ dense matrix X1 and a $M \\times N \\times K$ dense matrix X2, `sddmm(A, X1, X2)` will be a $L \\times N \\times K$ sparse matrix." ], "metadata": { "id": "RmNmXU_ZqyF7" } }, { "cell_type": "code", "source": [ "i = torch.tensor([[1, 1, 2],\n", " [2, 3, 3]])\n", "val = torch.tensor([[1., 1.], [2., 2.], [3., 3.]])\n", "A = dglsp.spmatrix(i, val, (3, 4))\n", "print(\"A:\")\n", "print(A.to_dense())\n", "\n", "X1 = torch.randn(3, 5, 2)\n", "X2 = torch.randn(5, 4, 2)\n", "print(\"X1:\")\n", "print(X1)\n", "print(\"X2:\")\n", "print(X2)\n", "\n", "O = dglsp.sddmm(A, X1, X2)\n", "print(\"dglsp.sddmm(A, X1, X2):\")\n", "print(O.to_dense())" ], "metadata": { "id": "DuSAjamyrIO_" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "## Non-linear activation functions" ], "metadata": { "id": "fVkbTT28ZzPr" } }, { "cell_type": "markdown", "source": [ "### Element-wise functions\n", "\n", "Most activation functions are element-wise and can be further grouped into two categories:\n", "\n", "**Sparse-preserving functions** such as `sin()`, `tanh()`, `sigmoid()`, `relu()`, etc. You can directly apply them on the `val` tensor of the sparse matrix and then recreate a new matrix of the same sparsity using `val_like`." ], "metadata": { "id": "XuaNdFO7XG2r" } }, { "cell_type": "code", "source": [ "i = torch.tensor([[0, 1, 1, 2],\n", " [1, 0, 2, 0]])\n", "val = torch.randn(4)\n", "A = dglsp.spmatrix(i, val)\n", "print(A.to_dense())\n", "\n", "print(\"Apply tanh.\")\n", "A_new = dglsp.val_like(A, torch.tanh(A.val))\n", "print(A_new.to_dense())" ], "metadata": { "id": "GZkCJJ0TX0cI" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "**Non-sparse-preserving functions** such as `exp()`, `cos()`, etc. You can first convert the sparse matrix to dense before applying the functions." ], "metadata": { "id": "i92lhMEnYas3" } }, { "cell_type": "code", "source": [ "i = torch.tensor([[0, 1, 1, 2],\n", " [1, 0, 2, 0]])\n", "val = torch.randn(4)\n", "A = dglsp.spmatrix(i, val)\n", "print(A.to_dense())\n", "\n", "print(\"Apply exp.\")\n", "A_new = A.to_dense().exp()\n", "print(A_new)" ], "metadata": { "id": "sroJpzRNYZq5" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "### Softmax\n", "\n", "Apply row-wise softmax to the nonzero entries of the sparse matrix." ], "metadata": { "id": "y8OQZReVXpo3" } }, { "cell_type": "code", "source": [ "i = torch.tensor([[0, 1, 1, 2],\n", " [1, 0, 2, 0]])\n", "val = torch.tensor([1., 2., 3., 4.])\n", "A = dglsp.spmatrix(i, val)\n", "\n", "print(A.softmax())\n", "print(\"In dense format:\")\n", "print(A.softmax().to_dense())\n", "print(\"\\n\")" ], "metadata": { "id": "CQaKgzCJULjt" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "## Exercise \\#1\n", "\n", "*Let's test what you've learned. Feel free to [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dmlc/dgl/blob/master/notebooks/sparse/quickstart.ipynb).*\n", "\n", "Given a sparse symmetrical adjacency matrix $A$, calculate its symmetrically normalized adjacency matrix: $$norm = \\bar{D}^{-\\frac{1}{2}}\\bar{A}\\bar{D}^{-\\frac{1}{2}}$$\n", "\n", "Where $\\bar{A} = A + I$, $I$ is the identity matrix, and $\\bar{D}$ is the diagonal node degree matrix of $\\bar{A}$." ], "metadata": { "id": "1iBNlJVYz3zi" } }, { "cell_type": "code", "source": [ "i = torch.tensor([[0, 0, 1, 1, 2, 2, 3],\n", " [1, 3, 2, 5, 3, 5, 4]])\n", "asym_A = dglsp.spmatrix(i, shape=(6, 6))\n", "# Step 1: create symmetrical adjacency matrix A from asym_A.\n", "# A =\n", "\n", "# Step 2: calculate A_hat from A.\n", "# A_hat =\n", "\n", "# Step 3: diagonal node degree matrix of A_hat\n", "# D_hat =\n", "\n", "# Step 4: calculate the norm from D_hat and A_hat.\n", "# norm = " ], "metadata": { "id": "0dDhfbJo0ByV" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "## Exercise \\#2\n", "\n", "Let's implement a simplified version of the Graph Attention Network (GAT) layer.\n", "\n", "A GAT layer has two inputs: the adjacency matrix $A$ and the node input features $X$. The idea of GAT layer is to update each node's representation with a weighted average of the node's own representation and its neighbors' representations. In particular, when computing the output for node $i$, the GAT layer does the following:\n", "1. Compute the scores $S_{ij}$ representing the attention logit from neighbor $j$ to node $i$. $S_{ij}$ is a function of $i$ and $j$'s input features $X_i$ and $X_j$: $$S_{ij} = LeakyReLU(X_i^\\top v_1 + X_j^\\top v_2)$$, where $v_1$ and $v_2$ are trainable vectors.\n", "2. Compute a softmax attention $R_{ij} = \\exp S_{ij} / \\left( \\sum_{j' \\in \\mathcal{N}_i} s_{ij'} \\right)$, where $\\mathcal{N}_j$ means the neighbors of $j$. This means that $R$ is a row-wise softmax attention of $S$.\n", "3. Compute the weighted average $H_i = \\sum_{j' : j' \\in \\mathcal{N}_i} R_{j'} X_{j'} W$, where $W$ is a trainable matrix.\n", "\n", "The following code defined all the parameters you need but only completes step 1. Could you implement step 2 and step 3?" ], "metadata": { "id": "yfEVQBUuI-cE" } }, { "cell_type": "code", "source": [ "import torch.nn as nn\n", "import torch.nn.functional as F\n", "\n", "class SimplifiedGAT(nn.Module):\n", " def __init__(self, in_size, out_size):\n", " super().__init__()\n", "\n", " self.W = nn.Parameter(torch.randn(in_size, out_size))\n", " self.v1 = nn.Parameter(torch.randn(in_size))\n", " self.v2 = nn.Parameter(torch.randn(in_size))\n", "\n", " def forward(self, A, X):\n", " # A: A sparse matrix with size (N, N). A[i, j] represent the edge from j to i.\n", " # X: A dense matrix with size (N, D)\n", " # Step 1: compute S[i, j]\n", " Xv1 = X @ self.v1\n", " Xv2 = X @ self.v2\n", " s = F.leaky_relu(Xv1[A.col] + Xv2[A.row])\n", " S = dglsp.val_like(A, s)\n", "\n", " # Step 2: compute R[i, j] which is the row-wise attention of $S$.\n", " # EXERCISE: replace the statement below.\n", " R = S\n", "\n", " # Step 3: compute H.\n", " # EXERCISE: replace the statement below.\n", " H = X\n", "\n", " return H" ], "metadata": { "id": "pYrgSxq6La5c" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# Test:\n", "# Let's use the symmetric A created above.\n", "X = torch.randn(6, 20)\n", "module = SimplifiedGAT(20, 10)\n", "Y = module(A, X)" ], "metadata": { "id": "qjcXiidYCqGK" }, "execution_count": null, "outputs": [] } ] } ================================================ FILE: notebooks/stochastic_training/link_prediction.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "id": "Ow8CQmZIV8Yn" }, "source": [ "# Link Prediction\n", "\n", "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dmlc/dgl/blob/master/notebooks/stochastic_training/link_prediction.ipynb) [![GitHub](https://img.shields.io/badge/-View%20on%20GitHub-181717?logo=github&logoColor=ffffff)](https://github.com/dmlc/dgl/blob/master/notebooks/stochastic_training/link_prediction.ipynb)\n", "\n", "This tutorial will show how to train a multi-layer GraphSAGE for link\n", "prediction on [CoraGraphDataset](https://data.dgl.ai/dataset/cora_v2.zip).\n", "The dataset contains 2708 nodes and 10556 edges.\n", "\n", "By the end of this tutorial, you will be able to\n", "\n", "- Train a GNN model for link prediction on target device with DGL's\n", " neighbor sampling components.\n" ] }, { "cell_type": "markdown", "metadata": { "id": "onVijYWpWlMj" }, "source": [ "## Install DGL package" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "QcpjTazg6hEo" }, "outputs": [], "source": [ "# Install required packages.\n", "import os\n", "import torch\n", "os.environ['TORCH'] = torch.__version__\n", "os.environ['DGLBACKEND'] = \"pytorch\"\n", "\n", "# Install the CPU version in default. If you want to install CUDA version,\n", "# please refer to https://www.dgl.ai/pages/start.html and change runtime type\n", "# accordingly.\n", "device = torch.device(\"cpu\")\n", "!pip install --pre dgl -f https://data.dgl.ai/wheels-test/repo.html\n", "\n", "try:\n", " import dgl\n", " import dgl.graphbolt as gb\n", " installed = True\n", "except ImportError as error:\n", " installed = False\n", " print(error)\n", "print(\"DGL installed!\" if installed else \"DGL not found!\")" ] }, { "cell_type": "markdown", "metadata": { "id": "OOKZxxT7W1Rz" }, "source": [ "## Loading Dataset\n", "`cora` is already prepared as `BuiltinDataset` in **GraphBolt**.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "RnJkkSKhWiUG" }, "outputs": [], "source": [ "dataset = gb.BuiltinDataset(\"cora-seeds\").load()" ] }, { "cell_type": "markdown", "metadata": { "id": "WxnTMEQXXKsM" }, "source": [ "Dataset consists of graph, feature and tasks. You can get the training-validation-test set from the tasks. Seed nodes and corresponding labels are already stored in each training-validation-test set. This dataset contains 2 tasks, one for node classification and the other for link prediction. We will use the link prediction task." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "YCm8CGkOX9lK" }, "outputs": [], "source": [ "graph = dataset.graph.to(device)\n", "feature = dataset.feature.to(device)\n", "train_set = dataset.tasks[1].train_set\n", "test_set = dataset.tasks[1].test_set\n", "task_name = dataset.tasks[1].metadata[\"name\"]\n", "print(f\"Task: {task_name}.\")" ] }, { "cell_type": "markdown", "metadata": { "id": "2y-P5omQYP00" }, "source": [ "## Defining Neighbor Sampler and Data Loader in DGL\n", "Different from the link prediction tutorial for full graph, a common practice to train GNN on large graphs is to iterate over the edges in minibatches, since computing the probability of all edges is usually impossible. For each minibatch of edges, you compute the output representation of their incident nodes using neighbor sampling and GNN, in a similar fashion introduced in the node classification tutorial.\n", "\n", "To perform link prediction, you need to specify a negative sampler. DGL provides builtin negative samplers such as `dgl.graphbolt.UniformNegativeSampler`. Here this tutorial uniformly draws 5 negative examples per positive example.\n", "\n", "Except for the negative sampler, the rest of the code is identical to the node classification tutorial.\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "LZgXGfBvYijJ" }, "outputs": [], "source": [ "from functools import partial\n", "datapipe = gb.ItemSampler(train_set, batch_size=256, shuffle=True)\n", "datapipe = datapipe.copy_to(device)\n", "datapipe = datapipe.sample_uniform_negative(graph, 5)\n", "datapipe = datapipe.sample_neighbor(graph, [5, 5])\n", "datapipe = datapipe.transform(partial(gb.exclude_seed_edges, include_reverse_edges=True))\n", "datapipe = datapipe.fetch_feature(feature, node_feature_keys=[\"feat\"])\n", "train_dataloader = gb.DataLoader(datapipe)" ] }, { "cell_type": "markdown", "metadata": { "id": "5sU_aulqYkwK" }, "source": [ "You can peek one minibatch from train_dataloader and see what it will give you.\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "euEdzmerYmZi" }, "outputs": [], "source": [ "data = next(iter(train_dataloader))\n", "print(f\"MiniBatch: {data}\")" ] }, { "cell_type": "markdown", "metadata": { "id": "WYQqfrDWYtU0" }, "source": [ "## Defining Model for Node Representation\n", "Let’s consider training a 2-layer GraphSAGE with neighbor sampling. The model can be written as follows:\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "0qQbBwO7Y3-Q" }, "outputs": [], "source": [ "import dgl.nn as dglnn\n", "import torch.nn as nn\n", "import torch.nn.functional as F\n", "\n", "\n", "class SAGE(nn.Module):\n", " def __init__(self, in_size, hidden_size):\n", " super().__init__()\n", " self.layers = nn.ModuleList()\n", " self.layers.append(dglnn.SAGEConv(in_size, hidden_size, \"mean\"))\n", " self.layers.append(dglnn.SAGEConv(hidden_size, hidden_size, \"mean\"))\n", " self.hidden_size = hidden_size\n", " self.predictor = nn.Sequential(\n", " nn.Linear(hidden_size, hidden_size),\n", " nn.ReLU(),\n", " nn.Linear(hidden_size, 1),\n", " )\n", "\n", " def forward(self, blocks, x):\n", " hidden_x = x\n", " for layer_idx, (layer, block) in enumerate(zip(self.layers, blocks)):\n", " hidden_x = layer(block, hidden_x)\n", " is_last_layer = layer_idx == len(self.layers) - 1\n", " if not is_last_layer:\n", " hidden_x = F.relu(hidden_x)\n", " return hidden_x" ] }, { "cell_type": "markdown", "metadata": { "id": "y23JppwHY5MC" }, "source": [ "## Defining Traing Loop\n", "The following initializes the model and defines the optimizer.\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "omSIB_ePZACg" }, "outputs": [], "source": [ "in_size = feature.size(\"node\", None, \"feat\")[0]\n", "model = SAGE(in_size, 128).to(device)\n", "optimizer = torch.optim.Adam(model.parameters(), lr=0.001)" ] }, { "cell_type": "markdown", "metadata": { "id": "QyWtzNZcZRgp" }, "source": [ "The following is the training loop for link prediction and evaluation.\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "SccLVrjSZSkd" }, "outputs": [], "source": [ "from tqdm.auto import tqdm\n", "for epoch in range(3):\n", " model.train()\n", " total_loss = 0\n", " for step, data in tqdm(enumerate(train_dataloader)):\n", " # Get node pairs with labels for loss calculation.\n", " compacted_seeds = data.compacted_seeds.T\n", " labels = data.labels\n", " node_feature = data.node_features[\"feat\"]\n", " # Convert sampled subgraphs to DGL blocks.\n", " blocks = data.blocks\n", "\n", " # Get the embeddings of the input nodes.\n", " y = model(blocks, node_feature)\n", " logits = model.predictor(\n", " y[compacted_seeds[0]] * y[compacted_seeds[1]]\n", " ).squeeze()\n", "\n", " # Compute loss.\n", " loss = F.binary_cross_entropy_with_logits(logits, labels)\n", " optimizer.zero_grad()\n", " loss.backward()\n", " optimizer.step()\n", "\n", " total_loss += loss.item()\n", "\n", " print(f\"Epoch {epoch:03d} | Loss {total_loss / (step + 1):.3f}\")" ] }, { "cell_type": "markdown", "metadata": { "id": "pxow2XSkZXoO" }, "source": [ "## Evaluating Performance with Link Prediction\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "IMulfsnIZZVh" }, "outputs": [], "source": [ "model.eval()\n", "\n", "datapipe = gb.ItemSampler(test_set, batch_size=256, shuffle=False)\n", "datapipe = datapipe.copy_to(device)\n", "# Since we need to use all neghborhoods for evaluation, we set the fanout\n", "# to -1.\n", "datapipe = datapipe.sample_neighbor(graph, [-1, -1])\n", "datapipe = datapipe.fetch_feature(feature, node_feature_keys=[\"feat\"])\n", "eval_dataloader = gb.DataLoader(datapipe, num_workers=0)\n", "\n", "logits = []\n", "labels = []\n", "for step, data in tqdm(enumerate(eval_dataloader)):\n", " # Get node pairs with labels for loss calculation.\n", " compacted_seeds = data.compacted_seeds.T\n", " label = data.labels\n", "\n", " # The features of sampled nodes.\n", " x = data.node_features[\"feat\"]\n", "\n", " # Forward.\n", " y = model(data.blocks, x)\n", " logit = (\n", " model.predictor(y[compacted_seeds[0]] * y[compacted_seeds[1]])\n", " .squeeze()\n", " .detach()\n", " )\n", "\n", " logits.append(logit)\n", " labels.append(label)\n", "\n", "logits = torch.cat(logits, dim=0)\n", "labels = torch.cat(labels, dim=0)\n", "\n", "\n", "# Compute the AUROC score.\n", "from sklearn.metrics import roc_auc_score\n", "\n", "auc = roc_auc_score(labels.cpu(), logits.cpu())\n", "print(\"Link Prediction AUC:\", auc)" ] }, { "cell_type": "markdown", "metadata": { "id": "KoCoIvqAZeCS" }, "source": [ "## Conclusion\n", "In this tutorial, you have learned how to train a multi-layer GraphSAGE for link prediction with neighbor sampling." ] } ], "metadata": { "colab": { "private_outputs": true, "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 0 } ================================================ FILE: notebooks/stochastic_training/multigpu_node_classification.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "id": "2ppSJal9At7-" }, "source": [ "# Multi-GPU Node Classification\n", "\n", "This tutorial shows how to train a multi-layer GraphSAGE for node classification on the `ogbn-products` dataset provided by [Open Graph\n", "Benchmark (OGB)](https://ogb.stanford.edu/). The dataset contains around 2.4 million nodes and 62 million edges.\n", "\n", "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dmlc/dgl/blob/master/notebooks/stochastic_training/multigpu_node_classification.ipynb)\n", "[![GitHub](https://img.shields.io/badge/-View%20on%20GitHub-181717?logo=github&logoColor=ffffff)](https://github.com/dmlc/dgl/blob/master/notebooks/stochastic_training/multigpu_node_classification.ipynb)\n", "\n", "By the end of this tutorial, you will be able to\n", "\n", "- Train a GNN model for node classification on multiple GPUs with DGL's neighbor sampling components. After learning how to use multiple GPUs, you will\n", "be able to extend it to other scenarios such as link prediction." ] }, { "cell_type": "markdown", "metadata": { "id": "mzZKrVVk6Y_8" }, "source": [ "## Install DGL package and other dependencies" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "id": "QTCc1RrD_5Id" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n", "Looking in links: https://data.dgl.ai/wheels-test/cu121/repo.html\n", "Requirement already satisfied: dgl in /localscratch/dgl-3/python (2.1)\n", "Requirement already satisfied: numpy>=1.14.0 in /usr/local/lib/python3.10/dist-packages (from dgl) (1.24.4)\n", "Requirement already satisfied: scipy>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from dgl) (1.11.4)\n", "Requirement already satisfied: networkx>=2.1 in /usr/local/lib/python3.10/dist-packages (from dgl) (2.6.3)\n", "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from dgl) (2.31.0)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from dgl) (4.66.1)\n", "Requirement already satisfied: psutil>=5.8.0 in /usr/local/lib/python3.10/dist-packages (from dgl) (5.9.4)\n", "Requirement already satisfied: torchdata>=0.5.0 in /usr/local/lib/python3.10/dist-packages (from dgl) (0.7.0a0)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->dgl) (3.3.2)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->dgl) (3.6)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->dgl) (1.26.18)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->dgl) (2023.11.17)\n", "Requirement already satisfied: torch>=2 in /usr/local/lib/python3.10/dist-packages (from torchdata>=0.5.0->dgl) (2.2.0a0+81ea7a4)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=2->torchdata>=0.5.0->dgl) (3.13.1)\n", "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=2->torchdata>=0.5.0->dgl) (4.8.0)\n", "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=2->torchdata>=0.5.0->dgl) (1.12)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=2->torchdata>=0.5.0->dgl) (3.1.2)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch>=2->torchdata>=0.5.0->dgl) (2023.12.0)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=2->torchdata>=0.5.0->dgl) (2.1.3)\n", "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=2->torchdata>=0.5.0->dgl) (1.3.0)\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n", "Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com\n", "Requirement already satisfied: torchmetrics in /usr/local/lib/python3.10/dist-packages (1.3.0.post0)\n", "Requirement already satisfied: multiprocess in /usr/local/lib/python3.10/dist-packages (0.70.16)\n", "Requirement already satisfied: numpy>1.20.0 in /usr/local/lib/python3.10/dist-packages (from torchmetrics) (1.24.4)\n", "Requirement already satisfied: packaging>17.1 in /usr/local/lib/python3.10/dist-packages (from torchmetrics) (23.2)\n", "Requirement already satisfied: torch>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from torchmetrics) (2.2.0a0+81ea7a4)\n", "Requirement already satisfied: lightning-utilities>=0.8.0 in /usr/local/lib/python3.10/dist-packages (from torchmetrics) (0.10.1)\n", "Requirement already satisfied: dill>=0.3.8 in /usr/local/lib/python3.10/dist-packages (from multiprocess) (0.3.8)\n", "Requirement already satisfied: setuptools in /usr/local/lib/python3.10/dist-packages (from lightning-utilities>=0.8.0->torchmetrics) (68.2.2)\n", "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from lightning-utilities>=0.8.0->torchmetrics) (4.8.0)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->torchmetrics) (3.13.1)\n", "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->torchmetrics) (1.12)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->torchmetrics) (2.6.3)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->torchmetrics) (3.1.2)\n", "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->torchmetrics) (2023.12.0)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.10.0->torchmetrics) (2.1.3)\n", "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.10.0->torchmetrics) (1.3.0)\n", "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n", "DGL installed!\n" ] } ], "source": [ "# Install required packages.\n", "import os\n", "import torch\n", "os.environ['TORCH'] = torch.__version__\n", "os.environ['DGLBACKEND'] = \"pytorch\"\n", "\n", "# Install the CUDA version. If you want to install CPU version, please\n", "# refer to https://www.dgl.ai/pages/start.html.\n", "!pip install --pre dgl -f https://data.dgl.ai/wheels-test/cu121/repo.html\n", "!pip install torchmetrics multiprocess\n", "\n", "try:\n", " import dgl\n", " import dgl.graphbolt as gb\n", " installed = True\n", "except ImportError as error:\n", " installed = False\n", " print(error)\n", "print(\"DGL installed!\" if installed else \"DGL not found!\")" ] }, { "cell_type": "markdown", "metadata": { "id": "q7GrcJTnZQjt" }, "source": [ "## Defining Neighbor Sampler and Data Loader in DGL\n", "\n", "The major difference from the previous tutorial is that we will use `DistributedItemSampler` instead of `ItemSampler` to sample mini-batches of nodes. `DistributedItemSampler` is a distributed version of `ItemSampler` that works with `DistributedDataParallel`. It is implemented as a wrapper around `ItemSampler` and will sample the same minibatch on all replicas. It also supports dropping the last non-full minibatch to avoid the need for padding.\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "id": "eel0Wn_aEYAd" }, "outputs": [], "source": [ "def create_dataloader(graph, features, itemset, device, is_train):\n", " datapipe = gb.DistributedItemSampler(\n", " item_set=itemset,\n", " batch_size=1024,\n", " drop_last=is_train,\n", " shuffle=is_train,\n", " drop_uneven_inputs=is_train,\n", " )\n", " datapipe = datapipe.copy_to(device)\n", " # Now that we have moved to device, sample_neighbor and fetch_feature steps\n", " # will be executed on GPUs.\n", " datapipe = datapipe.sample_neighbor(graph, [10, 10, 10])\n", " datapipe = datapipe.fetch_feature(features, node_feature_keys=[\"feat\"])\n", " return gb.DataLoader(datapipe)" ] }, { "cell_type": "markdown", "metadata": { "id": "uswPlvOLF1IX" }, "source": [ "## Weighted reduction across GPUs\n", "\n", "As the different GPUs might process differing numbers of data points, we define a function to compute the exact average of values such as loss or accuracy in a\n", "weighted manner.\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "id": "VXP0hmzVGKnp" }, "outputs": [], "source": [ "import torch.distributed as dist\n", "\n", "def weighted_reduce(tensor, weight, dst=0):\n", " ########################################################################\n", " # (HIGHLIGHT) Collect accuracy and loss values from sub-processes and\n", " # obtain overall average values.\n", " #\n", " # `torch.distributed.reduce` is used to reduce tensors from all the\n", " # sub-processes to a specified process, ReduceOp.SUM is used by default.\n", " #\n", " # Because the GPUs may have differing numbers of processed items, we\n", " # perform a weighted mean to calculate the exact loss and accuracy.\n", " ########################################################################\n", " dist.reduce(tensor=tensor, dst=dst)\n", " weight = torch.tensor(weight, device=tensor.device)\n", " dist.reduce(tensor=weight, dst=dst)\n", " return tensor / weight" ] }, { "cell_type": "markdown", "metadata": { "id": "fV6epnRxbZl4" }, "source": [ "## Defining Model\n", "Let’s consider training a 3-layer GraphSAGE with neighbor sampling. The model can be written as follows:\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "id": "ft9Ldg-yEsa5" }, "outputs": [], "source": [ "from torch import nn\n", "import torch.nn.functional as F\n", "from dgl.nn import SAGEConv\n", "\n", "class SAGE(nn.Module):\n", " def __init__(self, in_size, hidden_size, out_size):\n", " super().__init__()\n", " self.layers = nn.ModuleList()\n", " # Three-layer GraphSAGE-mean.\n", " self.layers.append(SAGEConv(in_size, hidden_size, \"mean\"))\n", " self.layers.append(SAGEConv(hidden_size, hidden_size, \"mean\"))\n", " self.layers.append(SAGEConv(hidden_size, out_size, \"mean\"))\n", " self.dropout = nn.Dropout(0.5)\n", " self.hidden_size = hidden_size\n", " self.out_size = out_size\n", " # Set the dtype for the layers manually.\n", " self.float()\n", "\n", " def forward(self, blocks, x):\n", " hidden_x = x\n", " for layer_idx, (layer, block) in enumerate(zip(self.layers, blocks)):\n", " hidden_x = layer(block, hidden_x)\n", " is_last_layer = layer_idx == len(self.layers) - 1\n", " if not is_last_layer:\n", " hidden_x = F.relu(hidden_x)\n", " hidden_x = self.dropout(hidden_x)\n", " return hidden_x" ] }, { "cell_type": "markdown", "metadata": { "id": "CjuvDKDVGbPW" }, "source": [ "## Evaluation function\n", "\n", "The evaluation function can be used to calculate the validation accuracy during training or the testing accuracy at the end of the training. The difference from\n", "the previous tutorial is that we need to return the number of items processed\n", "by each GPU to take a weighted average." ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "id": "j4djoX9tG7Ib" }, "outputs": [], "source": [ "import torchmetrics.functional as MF\n", "import tqdm\n", "\n", "@torch.no_grad()\n", "def evaluate(rank, model, graph, features, itemset, num_classes, device):\n", " model.eval()\n", " y = []\n", " y_hats = []\n", " dataloader = create_dataloader(\n", " graph,\n", " features,\n", " itemset,\n", " device,\n", " is_train=False,\n", " )\n", "\n", " for data in tqdm.tqdm(dataloader) if rank == 0 else dataloader:\n", " blocks = data.blocks\n", " x = data.node_features[\"feat\"]\n", " y.append(data.labels)\n", " y_hats.append(model.module(blocks, x))\n", "\n", " res = MF.accuracy(\n", " torch.cat(y_hats),\n", " torch.cat(y),\n", " task=\"multiclass\",\n", " num_classes=num_classes,\n", " )\n", "\n", " return res.to(device), sum(y_i.size(0) for y_i in y)" ] }, { "cell_type": "markdown", "metadata": { "id": "kN5BbnR4HSU2" }, "source": [ "## Training Loop\n", "\n", "The training loop is almost identical to the previous tutorial. In this tutorial, we explicitly disable uneven inputs coming from the dataloader, however, the Join Context Manager could be used to train possibly with incomplete batches at the end of epochs. Please refer to [this tutorial](https://pytorch.org/tutorials/advanced/generic_join.html) for more information." ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "id": "bdOceP3yH-eI" }, "outputs": [], "source": [ "import time\n", "\n", "def train(\n", " rank,\n", " graph,\n", " features,\n", " train_set,\n", " valid_set,\n", " num_classes,\n", " model,\n", " device,\n", "):\n", " optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n", " # Create training data loader.\n", " dataloader = create_dataloader(\n", " graph,\n", " features,\n", " train_set,\n", " device,\n", " is_train=True,\n", " )\n", "\n", " for epoch in range(5):\n", " epoch_start = time.time()\n", "\n", " model.train()\n", " total_loss = torch.tensor(0, dtype=torch.float, device=device)\n", " num_train_items = 0\n", " for data in tqdm.tqdm(dataloader) if rank == 0 else dataloader:\n", " # The input features are from the source nodes in the first\n", " # layer's computation graph.\n", " x = data.node_features[\"feat\"]\n", "\n", " # The ground truth labels are from the destination nodes\n", " # in the last layer's computation graph.\n", " y = data.labels\n", "\n", " blocks = data.blocks\n", "\n", " y_hat = model(blocks, x)\n", "\n", " # Compute loss.\n", " loss = F.cross_entropy(y_hat, y)\n", "\n", " optimizer.zero_grad()\n", " loss.backward()\n", " optimizer.step()\n", "\n", " total_loss += loss.detach() * y.size(0)\n", " num_train_items += y.size(0)\n", "\n", " # Evaluate the model.\n", " if rank == 0:\n", " print(\"Validating...\")\n", " acc, num_val_items = evaluate(\n", " rank,\n", " model,\n", " graph,\n", " features,\n", " valid_set,\n", " num_classes,\n", " device,\n", " )\n", " total_loss = weighted_reduce(total_loss, num_train_items)\n", " acc = weighted_reduce(acc * num_val_items, num_val_items)\n", "\n", " # We synchronize before measuring the epoch time.\n", " torch.cuda.synchronize()\n", " epoch_end = time.time()\n", " if rank == 0:\n", " print(\n", " f\"Epoch {epoch:05d} | \"\n", " f\"Average Loss {total_loss.item():.4f} | \"\n", " f\"Accuracy {acc.item():.4f} | \"\n", " f\"Time {epoch_end - epoch_start:.4f}\"\n", " )" ] }, { "cell_type": "markdown", "metadata": { "id": "mA-Xu37uIHc4" }, "source": [ "## Defining Training and Evaluation Procedures\n", "\n", "The following code defines the main function for each process. It is similar to the previous tutorial except that we need to initialize a distributed training context with `torch.distributed` and wrap the model with `torch.nn.parallel.DistributedDataParallel`." ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "id": "sW__HeslIMTT" }, "outputs": [], "source": [ "def run(rank, world_size, devices, dataset):\n", " # Set up multiprocessing environment.\n", " device = devices[rank]\n", " torch.cuda.set_device(device)\n", " dist.init_process_group(\n", " backend=\"nccl\", # Use NCCL backend for distributed GPU training\n", " init_method=\"tcp://127.0.0.1:12345\",\n", " world_size=world_size,\n", " rank=rank,\n", " )\n", "\n", " # Pin the graph and features in-place to enable GPU access.\n", " graph = dataset.graph.pin_memory_()\n", " features = dataset.feature.pin_memory_()\n", " train_set = dataset.tasks[0].train_set\n", " valid_set = dataset.tasks[0].validation_set\n", " num_classes = dataset.tasks[0].metadata[\"num_classes\"]\n", "\n", " in_size = features.size(\"node\", None, \"feat\")[0]\n", " hidden_size = 256\n", " out_size = num_classes\n", "\n", " # Create GraphSAGE model. It should be copied onto a GPU as a replica.\n", " model = SAGE(in_size, hidden_size, out_size).to(device)\n", " model = nn.parallel.DistributedDataParallel(model)\n", "\n", " # Model training.\n", " if rank == 0:\n", " print(\"Training...\")\n", " train(\n", " rank,\n", " graph,\n", " features,\n", " train_set,\n", " valid_set,\n", " num_classes,\n", " model,\n", " device,\n", " )\n", "\n", " # Test the model.\n", " if rank == 0:\n", " print(\"Testing...\")\n", " test_set = dataset.tasks[0].test_set\n", " test_acc, num_test_items = evaluate(\n", " rank,\n", " model,\n", " graph,\n", " features,\n", " itemset=test_set,\n", " num_classes=num_classes,\n", " device=device,\n", " )\n", " test_acc = weighted_reduce(test_acc * num_test_items, num_test_items)\n", "\n", " if rank == 0:\n", " print(f\"Test Accuracy {test_acc.item():.4f}\")" ] }, { "cell_type": "markdown", "metadata": { "id": "qMzt0aBFIfbS" }, "source": [ "## Spawning Trainer Processes\n", "\n", "The following code spawns a process for each GPU and calls the run function defined above." ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "id": "5Dt95eSVIiyM" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training with 1 gpus.\n", "The dataset is already preprocessed.\n", "Training...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "192it [00:09, 21.32it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validating...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "39it [00:00, 78.32it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Epoch 00000 | Average Loss 1.2953 | Accuracy 0.8556 | Time 9.5520\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "192it [00:03, 61.08it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validating...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "39it [00:00, 79.10it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Epoch 00001 | Average Loss 0.5859 | Accuracy 0.8788 | Time 3.6609\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "192it [00:03, 62.82it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validating...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "39it [00:00, 80.55it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Epoch 00002 | Average Loss 0.4858 | Accuracy 0.8852 | Time 3.5646\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "192it [00:03, 60.34it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validating...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "39it [00:00, 44.41it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Epoch 00003 | Average Loss 0.4407 | Accuracy 0.8920 | Time 4.0852\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "192it [00:03, 58.87it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validating...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "39it [00:00, 78.52it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Epoch 00004 | Average Loss 0.4122 | Accuracy 0.8943 | Time 3.7938\n", "Testing...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2162it [00:24, 89.75it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Test Accuracy 0.7514\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "import torch.multiprocessing as mp\n", "\n", "def main():\n", " if not torch.cuda.is_available():\n", " print(\"No GPU found!\")\n", " return\n", "\n", " devices = [\n", " torch.device(f\"cuda:{i}\") for i in range(torch.cuda.device_count())\n", " ][:1]\n", " world_size = len(devices)\n", "\n", " print(f\"Training with {world_size} gpus.\")\n", "\n", " # Load and preprocess dataset.\n", " dataset = gb.BuiltinDataset(\"ogbn-products\").load()\n", "\n", " # Thread limiting to avoid resource competition.\n", " os.environ[\"OMP_NUM_THREADS\"] = str(mp.cpu_count() // 2 // world_size)\n", "\n", " if world_size > 1:\n", " # The following launch method is not supported in a notebook.\n", " mp.set_sharing_strategy(\"file_system\")\n", " mp.spawn(\n", " run,\n", " args=(world_size, devices, dataset),\n", " nprocs=world_size,\n", " join=True,\n", " )\n", " else:\n", " run(0, 1, devices, dataset)\n", "\n", "\n", "if __name__ == \"__main__\":\n", " main()" ] } ], "metadata": { "accelerator": "GPU", "colab": { "gpuType": "T4", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 0 } ================================================ FILE: notebooks/stochastic_training/neighbor_sampling_overview.ipynb ================================================ { "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "private_outputs": true, "provenance": [], "authorship_tag": "ABX9TyMxpiQDo/pG6bIgkfWOPqXY" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "source": [ "# Neighbor Sampling Overview\n", "\n", "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dmlc/dgl/blob/master/notebooks/stochastic_training/neighbor_sampling_overview.ipynb) [![GitHub](https://img.shields.io/badge/-View%20on%20GitHub-181717?logo=github&logoColor=ffffff)](https://github.com/dmlc/dgl/blob/master/notebooks/stochastic_training/neighbor_sampling_overview.ipynb)\n", "\n", "In previous tutorials you have learned how to train GNNs by computing the representations of all nodes on a graph. However, sometimes your graph is too large to fit the computation of all nodes in a single GPU.\n", "\n", "By the end of this tutorial, you will be able to\n", "\n", "- Understand the pipeline of stochastic GNN training.\n", "\n", "- Understand what is neighbor sampling and why it yields a bipartite graph for each GNN layer." ], "metadata": { "id": "p7tTmsjh3dEy" } }, { "cell_type": "markdown", "source": [ "## Message Passing Review\n", "Recall that in [Gilmer et al.](https://arxiv.org/abs/1704.01212), the message passing formulation is as follows:\n", "\n", "$$m_{u \\to v}^{(l)} = M^{(l)}\\left(h_v^{(l-1)}, h_u^{(l-1)}, e_{u \\to v}^{(l-1)}\\right)$$\n", "\n", "$$m_{v}^{(l)} = \\sum_{u \\in \\mathcal{N}(v)} m_{u \\to v}^{(l)}$$\n", "\n", "$$h_v^{(l)} = U^{(l)}\\left(h_v^{(l-1)}, m_v^{(l)}\\right)$$\n", "\n", "\n", "where DGL calls\n", "- message function: $M^{(l)}$\n", "- reduce function: $\\sum$\n", "- update function: $U^{(l)}$\n", "\n", "Note that $\\sum$ here can represent any function and is not necessarily a summation.\n", "\n", "Essentially, the $l$-th layer representation of a single node depends on the $(l-1)$-th layer representation of the same node, as well as the $(l-1)$-th layer representation of the neighboring nodes. Those $(l-1)$-th layer representations then depend on the $(l-2)$-th layer representation of those nodes, as well as their neighbors.\n", "\n", "The following animation shows how a 2-layer GNN is supposed to compute the output of node 5:\n", "\n", "![image1](https://data.dgl.ai/tutorial/img/sampling.gif)\n", "\n", "You can see that to compute node 5 from the second layer, you will need its direct neighbors’ first layer representations (colored in yellow), which in turn needs their direct neighbors’ (i.e. node 5’s second-hop neighbors’) representations (colored in green)." ], "metadata": { "id": "eJs-O2Vz88Kd" } }, { "cell_type": "markdown", "source": [ "## Neighbor Sampling Overview\n", "You can also see from the previous example that computing representation for a small number of nodes often requires input features of a significantly larger number of nodes. Taking all neighbors for message aggregation is often too costly since the nodes needed for input features would easily cover a large portion of the graph, especially for real-world graphs which are often [scale-free](https://en.wikipedia.org/wiki/Scale-free_network).\n", "\n", "Neighbor sampling addresses this issue by selecting a subset of the neighbors to perform aggregation. For instance, to compute ${h}_5^{(2)}$, you can choose two of the neighbors instead of all of them to aggregate, as in the following animation:\n", "\n", "![image2](https://data.dgl.ai/tutorial/img/bipartite.gif)\n", "\n", "You can see that this method uses much fewer nodes needed in message passing for a single minibatch.\n", "\n", "You can also notice in the animation above that the computation dependencies in the animation above can be described as a series of bipartite graphs. The output nodes (called destination nodes) are on one side and all the nodes necessary for inputs (called source nodes) are on the other side. The arrows indicate how the sampled neighbors propagates messages to the nodes. DGL calls such graphs **message flow graphs (MFG)**.\n", "\n", "Note that some GNN modules, such as `SAGEConv`, need to use the destination nodes’ features on the previous layer to compute the outputs. Without loss of generality, DGL always includes the destination nodes themselves in the source nodes." ], "metadata": { "id": "0yYSBM8s9M_P" } } ] } ================================================ FILE: notebooks/stochastic_training/node_classification.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "id": "OxbY2KlG4ZfJ" }, "source": [ "# Node Classification\n", "This tutorial shows how to train a multi-layer GraphSAGE for node\n", "classification on ``ogbn-arxiv`` provided by [Open Graph\n", "Benchmark (OGB)](https://ogb.stanford.edu/). The dataset contains around\n", "170 thousand nodes and 1 million edges.\n", "\n", "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dmlc/dgl/blob/master/notebooks/stochastic_training/node_classification.ipynb) [![GitHub](https://img.shields.io/badge/-View%20on%20GitHub-181717?logo=github&logoColor=ffffff)](https://github.com/dmlc/dgl/blob/master/notebooks/stochastic_training/node_classification.ipynb)\n", "\n", "By the end of this tutorial, you will be able to\n", "\n", "- Train a GNN model for node classification on a single GPU with DGL's\n", " neighbor sampling components." ] }, { "cell_type": "markdown", "metadata": { "id": "mzZKrVVk6Y_8" }, "source": [ "## Install DGL package" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "QcpjTazg6hEo" }, "outputs": [], "source": [ "# Install required packages.\n", "import os\n", "import torch\n", "import numpy as np\n", "os.environ['TORCH'] = torch.__version__\n", "os.environ['DGLBACKEND'] = \"pytorch\"\n", "\n", "# Install the CPU version in default. If you want to install CUDA version,\n", "# please refer to https://www.dgl.ai/pages/start.html and change runtime type\n", "# accordingly.\n", "device = torch.device(\"cpu\")\n", "!pip install --pre dgl -f https://data.dgl.ai/wheels-test/repo.html\n", "\n", "try:\n", " import dgl\n", " import dgl.graphbolt as gb\n", " installed = True\n", "except ImportError as error:\n", " installed = False\n", " print(error)\n", "print(\"DGL installed!\" if installed else \"DGL not found!\")" ] }, { "cell_type": "markdown", "metadata": { "id": "XWdRZAM-51Cb" }, "source": [ "## Loading Dataset\n", "`ogbn-arxiv` is already prepared as ``BuiltinDataset`` in **GraphBolt**." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "RnJkkSKhWiUG" }, "outputs": [], "source": [ "dataset = gb.BuiltinDataset(\"ogbn-arxiv-seeds\").load()" ] }, { "cell_type": "markdown", "metadata": { "id": "S8avoKBiXA9j" }, "source": [ "Dataset consists of graph, feature and tasks. You can get the training-validation-test set from the tasks. Seed nodes and corresponding labels are already stored in each training-validation-test set. Other metadata such as number of classes are also stored in the tasks. In this dataset, there is only one task: `node classification`." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "IXGZmgIaXJWQ" }, "outputs": [], "source": [ "graph = dataset.graph.to(device)\n", "feature = dataset.feature.to(device)\n", "train_set = dataset.tasks[0].train_set\n", "valid_set = dataset.tasks[0].validation_set\n", "test_set = dataset.tasks[0].test_set\n", "task_name = dataset.tasks[0].metadata[\"name\"]\n", "num_classes = dataset.tasks[0].metadata[\"num_classes\"]\n", "print(f\"Task: {task_name}. Number of classes: {num_classes}\")" ] }, { "cell_type": "markdown", "metadata": { "id": "y8yn77Kg6HkW" }, "source": [ "## How DGL Handles Computation Dependency¶\n", "The computation dependency for message passing of a single node can be described as a series of message flow graphs (MFG).\n", "\n", "![DGL Computation](https://data.dgl.ai/tutorial/img/bipartite.gif)" ] }, { "cell_type": "markdown", "metadata": { "id": "q7GrcJTnZQjt" }, "source": [ "## Defining Neighbor Sampler and Data Loader in DGL\n", "\n", "DGL provides tools to iterate over the dataset in minibatches while generating the computation dependencies to compute their outputs with the MFGs above. For node classification, you can use `dgl.graphbolt.DataLoader` for iterating over the dataset. It accepts a data pipe that generates minibatches of nodes and their labels, sample neighbors for each node, and generate the computation dependencies in the form of MFGs. Feature fetching, block creation and copying to target device are also supported. All these operations are split into separate stages in the data pipe, so that you can customize the data pipeline by inserting your own operations.\n", "\n", "Let’s say that each node will gather messages from 4 neighbors on each layer. The code defining the data loader and neighbor sampler will look like the following.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "yQVYDO0ZbBvi" }, "outputs": [], "source": [ "def create_dataloader(itemset, shuffle):\n", " datapipe = gb.ItemSampler(itemset, batch_size=1024, shuffle=shuffle)\n", " datapipe = datapipe.copy_to(device)\n", " datapipe = datapipe.sample_neighbor(graph, [4, 4])\n", " datapipe = datapipe.fetch_feature(feature, node_feature_keys=[\"feat\"])\n", " return gb.DataLoader(datapipe)" ] }, { "cell_type": "markdown", "metadata": { "id": "7Rp12SUhbEV1" }, "source": [ "You can iterate over the data loader and a `MiniBatch` object is yielded.\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "V7vQiKj2bL_o" }, "outputs": [], "source": [ "data = next(iter(create_dataloader(train_set, shuffle=True)))\n", "print(data)" ] }, { "cell_type": "markdown", "metadata": { "id": "-eBuPnT-bS-o" }, "source": [ "You can get the input node IDs from MFGs." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "bN4sgZqFbUvd" }, "outputs": [], "source": [ "mfgs = data.blocks\n", "input_nodes = mfgs[0].srcdata[dgl.NID]\n", "print(f\"Input nodes: {input_nodes}.\")" ] }, { "cell_type": "markdown", "metadata": { "id": "fV6epnRxbZl4" }, "source": [ "## Defining Model\n", "Let’s consider training a 2-layer GraphSAGE with neighbor sampling. The model can be written as follows:\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "iKhEIL0Ccmwx" }, "outputs": [], "source": [ "import torch.nn as nn\n", "import torch.nn.functional as F\n", "from dgl.nn import SAGEConv\n", "\n", "\n", "class Model(nn.Module):\n", " def __init__(self, in_feats, h_feats, num_classes):\n", " super(Model, self).__init__()\n", " self.conv1 = SAGEConv(in_feats, h_feats, aggregator_type=\"mean\")\n", " self.conv2 = SAGEConv(h_feats, num_classes, aggregator_type=\"mean\")\n", " self.h_feats = h_feats\n", "\n", " def forward(self, mfgs, x):\n", " h = self.conv1(mfgs[0], x)\n", " h = F.relu(h)\n", " h = self.conv2(mfgs[1], h)\n", " return h\n", "\n", "\n", "in_size = feature.size(\"node\", None, \"feat\")[0]\n", "model = Model(in_size, 64, num_classes).to(device)" ] }, { "cell_type": "markdown", "metadata": { "id": "OGLN3kCcwCA8" }, "source": [ "## Defining Training Loop\n", "\n", "The following initializes the model and defines the optimizer.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "dET8i_hewLUi" }, "outputs": [], "source": [ "opt = torch.optim.Adam(model.parameters())" ] }, { "cell_type": "markdown", "metadata": { "id": "leZvFP4GwMcq" }, "source": [ "When computing the validation score for model selection, usually you can also do neighbor sampling. We can just reuse our create_dataloader function to create two separate dataloaders for training and validation." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "Gvd7vFWZwQI5" }, "outputs": [], "source": [ "train_dataloader = create_dataloader(train_set, shuffle=True)\n", "valid_dataloader = create_dataloader(valid_set, shuffle=False)\n", "\n", "import sklearn.metrics" ] }, { "cell_type": "markdown", "metadata": { "id": "nTIIfVMDwXqX" }, "source": [ "The following is a training loop that performs validation every epoch. It also saves the model with the best validation accuracy into a file." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "wsfqhKUvwZEj" }, "outputs": [], "source": [ "from tqdm.auto import tqdm\n", "\n", "for epoch in range(10):\n", " model.train()\n", "\n", " with tqdm(train_dataloader) as tq:\n", " for step, data in enumerate(tq):\n", " x = data.node_features[\"feat\"]\n", " labels = data.labels\n", "\n", " predictions = model(data.blocks, x)\n", "\n", " loss = F.cross_entropy(predictions, labels)\n", " opt.zero_grad()\n", " loss.backward()\n", " opt.step()\n", "\n", " accuracy = sklearn.metrics.accuracy_score(\n", " labels.cpu().numpy(),\n", " predictions.argmax(1).detach().cpu().numpy(),\n", " )\n", "\n", " tq.set_postfix(\n", " {\"loss\": \"%.03f\" % loss.item(), \"acc\": \"%.03f\" % accuracy},\n", " refresh=False,\n", " )\n", "\n", " model.eval()\n", "\n", " predictions = []\n", " labels = []\n", " with tqdm(valid_dataloader) as tq, torch.no_grad():\n", " for data in tq:\n", " x = data.node_features[\"feat\"]\n", " labels.append(data.labels.cpu().numpy())\n", " predictions.append(model(data.blocks, x).argmax(1).cpu().numpy())\n", " predictions = np.concatenate(predictions)\n", " labels = np.concatenate(labels)\n", " accuracy = sklearn.metrics.accuracy_score(labels, predictions)\n", " print(\"Epoch {} Validation Accuracy {}\".format(epoch, accuracy))" ] }, { "cell_type": "markdown", "metadata": { "id": "kmHnUI0QwfJ4" }, "source": [ "## Conclusion\n", "\n", "In this tutorial, you have learned how to train a multi-layer GraphSAGE with neighbor sampling.\n" ] } ], "metadata": { "colab": { "private_outputs": true, "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 0 } ================================================ FILE: notebooks/stochastic_training/ondisk_dataset_heterograph.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "id": "FnFhPMaAfLtJ" }, "source": [ "# OnDiskDataset for Heterogeneous Graph\n", "\n", "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dmlc/dgl/blob/master/notebooks/stochastic_training/ondisk_dataset_heterograph.ipynb) [![GitHub](https://img.shields.io/badge/-View%20on%20GitHub-181717?logo=github&logoColor=ffffff)](https://github.com/dmlc/dgl/blob/master/notebooks/stochastic_training/ondisk_dataset_heterograph.ipynb)\n", "\n", "This tutorial shows how to create `OnDiskDataset` for heterogeneous graph that could be used in **GraphBolt** framework. The major difference from creating dataset for homogeneous graph is that we need to specify node/edge types for edges, feature data, training/validation/test sets.\n", "\n", "By the end of this tutorial, you will be able to\n", "\n", "- organize graph structure data.\n", "- organize feature data.\n", "- organize training/validation/test set for specific tasks.\n", "\n", "To create an ``OnDiskDataset`` object, you need to organize all the data including graph structure, feature data and tasks into a directory. The directory should contain a ``metadata.yaml`` file that describes the metadata of the dataset.\n", "\n", "Now let's generate various data step by step and organize them together to instantiate `OnDiskDataset` finally." ] }, { "cell_type": "markdown", "metadata": { "id": "Wlb19DtWgtzq" }, "source": [ "## Install DGL package" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "UojlT9ZGgyr9" }, "outputs": [], "source": [ "# Install required packages.\n", "import os\n", "import torch\n", "import numpy as np\n", "os.environ['TORCH'] = torch.__version__\n", "os.environ['DGLBACKEND'] = \"pytorch\"\n", "\n", "# Install the CPU version.\n", "device = torch.device(\"cpu\")\n", "!pip install --pre dgl -f https://data.dgl.ai/wheels-test/repo.html\n", "\n", "try:\n", " import dgl\n", " import dgl.graphbolt as gb\n", " installed = True\n", "except ImportError as error:\n", " installed = False\n", " print(error)\n", "print(\"DGL installed!\" if installed else \"DGL not found!\")" ] }, { "cell_type": "markdown", "metadata": { "id": "2R7WnSbjsfbr" }, "source": [ "## Data preparation\n", "In order to demonstrate how to organize various data, let's create a base directory first." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "SZipbzyltLfO" }, "outputs": [], "source": [ "base_dir = './ondisk_dataset_heterograph'\n", "os.makedirs(base_dir, exist_ok=True)\n", "print(f\"Created base directory: {base_dir}\")" ] }, { "cell_type": "markdown", "metadata": { "id": "qhNtIn_xhlnl" }, "source": [ "### Generate graph structure data\n", "For heterogeneous graph, we need to save different edge edges(namely seeds) into separate **Numpy** or **CSV** files.\n", "\n", "Note:\n", "- when saving to **Numpy**, the array requires to be in shape of `(2, N)`. This format is recommended as constructing graph from it is much faster than **CSV** file.\n", "- when saving to **CSV** file, do not save index and header.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "HcBt4G5BmSjr" }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "\n", "# For simplicity, we create a heterogeneous graph with\n", "# 2 node types: `user`, `item`\n", "# 2 edge types: `user:like:item`, `user:follow:user`\n", "# And each node/edge type has the same number of nodes/edges.\n", "num_nodes = 1000\n", "num_edges = 10 * num_nodes\n", "\n", "# Edge type: \"user:like:item\"\n", "like_edges_path = os.path.join(base_dir, \"like-edges.csv\")\n", "like_edges = np.random.randint(0, num_nodes, size=(num_edges, 2))\n", "print(f\"Part of [user:like:item] edges: {like_edges[:5, :]}\\n\")\n", "\n", "df = pd.DataFrame(like_edges)\n", "df.to_csv(like_edges_path, index=False, header=False)\n", "print(f\"[user:like:item] edges are saved into {like_edges_path}\\n\")\n", "\n", "# Edge type: \"user:follow:user\"\n", "follow_edges_path = os.path.join(base_dir, \"follow-edges.csv\")\n", "follow_edges = np.random.randint(0, num_nodes, size=(num_edges, 2))\n", "print(f\"Part of [user:follow:user] edges: {follow_edges[:5, :]}\\n\")\n", "\n", "df = pd.DataFrame(follow_edges)\n", "df.to_csv(follow_edges_path, index=False, header=False)\n", "print(f\"[user:follow:user] edges are saved into {follow_edges_path}\\n\")" ] }, { "cell_type": "markdown", "metadata": { "id": "kh-4cPtzpcaH" }, "source": [ "### Generate feature data for graph\n", "For feature data, numpy arrays and torch tensors are supported for now. Let's generate feature data for each node/edge type." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "_PVu1u5brBhF" }, "outputs": [], "source": [ "# Generate node[user] feature in numpy array.\n", "node_user_feat_0_path = os.path.join(base_dir, \"node-user-feat-0.npy\")\n", "node_user_feat_0 = np.random.rand(num_nodes, 5)\n", "print(f\"Part of node[user] feature [feat_0]: {node_user_feat_0[:3, :]}\")\n", "np.save(node_user_feat_0_path, node_user_feat_0)\n", "print(f\"Node[user] feature [feat_0] is saved to {node_user_feat_0_path}\\n\")\n", "\n", "# Generate another node[user] feature in torch tensor\n", "node_user_feat_1_path = os.path.join(base_dir, \"node-user-feat-1.pt\")\n", "node_user_feat_1 = torch.rand(num_nodes, 5)\n", "print(f\"Part of node[user] feature [feat_1]: {node_user_feat_1[:3, :]}\")\n", "torch.save(node_user_feat_1, node_user_feat_1_path)\n", "print(f\"Node[user] feature [feat_1] is saved to {node_user_feat_1_path}\\n\")\n", "\n", "# Generate node[item] feature in numpy array.\n", "node_item_feat_0_path = os.path.join(base_dir, \"node-item-feat-0.npy\")\n", "node_item_feat_0 = np.random.rand(num_nodes, 5)\n", "print(f\"Part of node[item] feature [feat_0]: {node_item_feat_0[:3, :]}\")\n", "np.save(node_item_feat_0_path, node_item_feat_0)\n", "print(f\"Node[item] feature [feat_0] is saved to {node_item_feat_0_path}\\n\")\n", "\n", "# Generate another node[item] feature in torch tensor\n", "node_item_feat_1_path = os.path.join(base_dir, \"node-item-feat-1.pt\")\n", "node_item_feat_1 = torch.rand(num_nodes, 5)\n", "print(f\"Part of node[item] feature [feat_1]: {node_item_feat_1[:3, :]}\")\n", "torch.save(node_item_feat_1, node_item_feat_1_path)\n", "print(f\"Node[item] feature [feat_1] is saved to {node_item_feat_1_path}\\n\")\n", "\n", "# Generate edge[user:like:item] feature in numpy array.\n", "edge_like_feat_0_path = os.path.join(base_dir, \"edge-like-feat-0.npy\")\n", "edge_like_feat_0 = np.random.rand(num_edges, 5)\n", "print(f\"Part of edge[user:like:item] feature [feat_0]: {edge_like_feat_0[:3, :]}\")\n", "np.save(edge_like_feat_0_path, edge_like_feat_0)\n", "print(f\"Edge[user:like:item] feature [feat_0] is saved to {edge_like_feat_0_path}\\n\")\n", "\n", "# Generate another edge[user:like:item] feature in torch tensor\n", "edge_like_feat_1_path = os.path.join(base_dir, \"edge-like-feat-1.pt\")\n", "edge_like_feat_1 = torch.rand(num_edges, 5)\n", "print(f\"Part of edge[user:like:item] feature [feat_1]: {edge_like_feat_1[:3, :]}\")\n", "torch.save(edge_like_feat_1, edge_like_feat_1_path)\n", "print(f\"Edge[user:like:item] feature [feat_1] is saved to {edge_like_feat_1_path}\\n\")\n", "\n", "# Generate edge[user:follow:user] feature in numpy array.\n", "edge_follow_feat_0_path = os.path.join(base_dir, \"edge-follow-feat-0.npy\")\n", "edge_follow_feat_0 = np.random.rand(num_edges, 5)\n", "print(f\"Part of edge[user:follow:user] feature [feat_0]: {edge_follow_feat_0[:3, :]}\")\n", "np.save(edge_follow_feat_0_path, edge_follow_feat_0)\n", "print(f\"Edge[user:follow:user] feature [feat_0] is saved to {edge_follow_feat_0_path}\\n\")\n", "\n", "# Generate another edge[user:follow:user] feature in torch tensor\n", "edge_follow_feat_1_path = os.path.join(base_dir, \"edge-follow-feat-1.pt\")\n", "edge_follow_feat_1 = torch.rand(num_edges, 5)\n", "print(f\"Part of edge[user:follow:user] feature [feat_1]: {edge_follow_feat_1[:3, :]}\")\n", "torch.save(edge_follow_feat_1, edge_follow_feat_1_path)\n", "print(f\"Edge[user:follow:user] feature [feat_1] is saved to {edge_follow_feat_1_path}\\n\")" ] }, { "cell_type": "markdown", "metadata": { "id": "ZyqgOtsIwzh_" }, "source": [ "### Generate tasks\n", "`OnDiskDataset` supports multiple tasks. For each task, we need to prepare training/validation/test sets respectively. Such sets usually vary among different tasks. In this tutorial, let's create a **Node Classification** task and **Link Prediction** task." ] }, { "cell_type": "markdown", "metadata": { "id": "hVxHaDIfzCkr" }, "source": [ "#### Node Classification Task\n", "For node classification task, we need **node IDs** and corresponding **labels** for each training/validation/test set. Like feature data, numpy arrays and torch tensors are supported for these sets." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "S5-fyBbHzTCO" }, "outputs": [], "source": [ "# For illustration, let's generate item sets for each node type.\n", "num_trains = int(num_nodes * 0.6)\n", "num_vals = int(num_nodes * 0.2)\n", "num_tests = num_nodes - num_trains - num_vals\n", "\n", "user_ids = np.arange(num_nodes)\n", "np.random.shuffle(user_ids)\n", "\n", "item_ids = np.arange(num_nodes)\n", "np.random.shuffle(item_ids)\n", "\n", "# Train IDs for user.\n", "nc_train_user_ids_path = os.path.join(base_dir, \"nc-train-user-ids.npy\")\n", "nc_train_user_ids = user_ids[:num_trains]\n", "print(f\"Part of train ids[user] for node classification: {nc_train_user_ids[:3]}\")\n", "np.save(nc_train_user_ids_path, nc_train_user_ids)\n", "print(f\"NC train ids[user] are saved to {nc_train_user_ids_path}\\n\")\n", "\n", "# Train labels for user.\n", "nc_train_user_labels_path = os.path.join(base_dir, \"nc-train-user-labels.pt\")\n", "nc_train_user_labels = torch.randint(0, 10, (num_trains,))\n", "print(f\"Part of train labels[user] for node classification: {nc_train_user_labels[:3]}\")\n", "torch.save(nc_train_user_labels, nc_train_user_labels_path)\n", "print(f\"NC train labels[user] are saved to {nc_train_user_labels_path}\\n\")\n", "\n", "# Train IDs for item.\n", "nc_train_item_ids_path = os.path.join(base_dir, \"nc-train-item-ids.npy\")\n", "nc_train_item_ids = item_ids[:num_trains]\n", "print(f\"Part of train ids[item] for node classification: {nc_train_item_ids[:3]}\")\n", "np.save(nc_train_item_ids_path, nc_train_item_ids)\n", "print(f\"NC train ids[item] are saved to {nc_train_item_ids_path}\\n\")\n", "\n", "# Train labels for item.\n", "nc_train_item_labels_path = os.path.join(base_dir, \"nc-train-item-labels.pt\")\n", "nc_train_item_labels = torch.randint(0, 10, (num_trains,))\n", "print(f\"Part of train labels[item] for node classification: {nc_train_item_labels[:3]}\")\n", "torch.save(nc_train_item_labels, nc_train_item_labels_path)\n", "print(f\"NC train labels[item] are saved to {nc_train_item_labels_path}\\n\")\n", "\n", "# Val IDs for user.\n", "nc_val_user_ids_path = os.path.join(base_dir, \"nc-val-user-ids.npy\")\n", "nc_val_user_ids = user_ids[num_trains:num_trains+num_vals]\n", "print(f\"Part of val ids[user] for node classification: {nc_val_user_ids[:3]}\")\n", "np.save(nc_val_user_ids_path, nc_val_user_ids)\n", "print(f\"NC val ids[user] are saved to {nc_val_user_ids_path}\\n\")\n", "\n", "# Val labels for user.\n", "nc_val_user_labels_path = os.path.join(base_dir, \"nc-val-user-labels.pt\")\n", "nc_val_user_labels = torch.randint(0, 10, (num_vals,))\n", "print(f\"Part of val labels[user] for node classification: {nc_val_user_labels[:3]}\")\n", "torch.save(nc_val_user_labels, nc_val_user_labels_path)\n", "print(f\"NC val labels[user] are saved to {nc_val_user_labels_path}\\n\")\n", "\n", "# Val IDs for item.\n", "nc_val_item_ids_path = os.path.join(base_dir, \"nc-val-item-ids.npy\")\n", "nc_val_item_ids = item_ids[num_trains:num_trains+num_vals]\n", "print(f\"Part of val ids[item] for node classification: {nc_val_item_ids[:3]}\")\n", "np.save(nc_val_item_ids_path, nc_val_item_ids)\n", "print(f\"NC val ids[item] are saved to {nc_val_item_ids_path}\\n\")\n", "\n", "# Val labels for item.\n", "nc_val_item_labels_path = os.path.join(base_dir, \"nc-val-item-labels.pt\")\n", "nc_val_item_labels = torch.randint(0, 10, (num_vals,))\n", "print(f\"Part of val labels[item] for node classification: {nc_val_item_labels[:3]}\")\n", "torch.save(nc_val_item_labels, nc_val_item_labels_path)\n", "print(f\"NC val labels[item] are saved to {nc_val_item_labels_path}\\n\")\n", "\n", "# Test IDs for user.\n", "nc_test_user_ids_path = os.path.join(base_dir, \"nc-test-user-ids.npy\")\n", "nc_test_user_ids = user_ids[-num_tests:]\n", "print(f\"Part of test ids[user] for node classification: {nc_test_user_ids[:3]}\")\n", "np.save(nc_test_user_ids_path, nc_test_user_ids)\n", "print(f\"NC test ids[user] are saved to {nc_test_user_ids_path}\\n\")\n", "\n", "# Test labels for user.\n", "nc_test_user_labels_path = os.path.join(base_dir, \"nc-test-user-labels.pt\")\n", "nc_test_user_labels = torch.randint(0, 10, (num_tests,))\n", "print(f\"Part of test labels[user] for node classification: {nc_test_user_labels[:3]}\")\n", "torch.save(nc_test_user_labels, nc_test_user_labels_path)\n", "print(f\"NC test labels[user] are saved to {nc_test_user_labels_path}\\n\")\n", "\n", "# Test IDs for item.\n", "nc_test_item_ids_path = os.path.join(base_dir, \"nc-test-item-ids.npy\")\n", "nc_test_item_ids = item_ids[-num_tests:]\n", "print(f\"Part of test ids[item] for node classification: {nc_test_item_ids[:3]}\")\n", "np.save(nc_test_item_ids_path, nc_test_item_ids)\n", "print(f\"NC test ids[item] are saved to {nc_test_item_ids_path}\\n\")\n", "\n", "# Test labels for item.\n", "nc_test_item_labels_path = os.path.join(base_dir, \"nc-test-item-labels.pt\")\n", "nc_test_item_labels = torch.randint(0, 10, (num_tests,))\n", "print(f\"Part of test labels[item] for node classification: {nc_test_item_labels[:3]}\")\n", "torch.save(nc_test_item_labels, nc_test_item_labels_path)\n", "print(f\"NC test labels[item] are saved to {nc_test_item_labels_path}\\n\")" ] }, { "cell_type": "markdown", "metadata": { "id": "LhAcDCHQ_KJ0" }, "source": [ "#### Link Prediction Task\n", "For link prediction task, we need **seeds** or **corresponding labels and indexes** which representing the pos/neg property and group of the seeds for each training/validation/test set. Like feature data, numpy arrays and torch tensors are supported for these sets." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "u0jCnXIcAQy4" }, "outputs": [], "source": [ "# For illustration, let's generate item sets for each edge type.\n", "num_trains = int(num_edges * 0.6)\n", "num_vals = int(num_edges * 0.2)\n", "num_tests = num_edges - num_trains - num_vals\n", "\n", "# Train seeds for user:like:item.\n", "lp_train_like_seeds_path = os.path.join(base_dir, \"lp-train-like-seeds.npy\")\n", "lp_train_like_seeds = like_edges[:num_trains, :]\n", "print(f\"Part of train seeds[user:like:item] for link prediction: {lp_train_like_seeds[:3]}\")\n", "np.save(lp_train_like_seeds_path, lp_train_like_seeds)\n", "print(f\"LP train seeds[user:like:item] are saved to {lp_train_like_seeds_path}\\n\")\n", "\n", "# Train seeds for user:follow:user.\n", "lp_train_follow_seeds_path = os.path.join(base_dir, \"lp-train-follow-seeds.npy\")\n", "lp_train_follow_seeds = follow_edges[:num_trains, :]\n", "print(f\"Part of train seeds[user:follow:user] for link prediction: {lp_train_follow_seeds[:3]}\")\n", "np.save(lp_train_follow_seeds_path, lp_train_follow_seeds)\n", "print(f\"LP train seeds[user:follow:user] are saved to {lp_train_follow_seeds_path}\\n\")\n", "\n", "# Val seeds for user:like:item.\n", "lp_val_like_seeds_path = os.path.join(base_dir, \"lp-val-like-seeds.npy\")\n", "lp_val_like_seeds = like_edges[num_trains:num_trains+num_vals, :]\n", "lp_val_like_neg_dsts = np.random.randint(0, num_nodes, (num_vals, 10)).reshape(-1)\n", "lp_val_like_neg_srcs = np.repeat(lp_val_like_seeds[:,0], 10)\n", "lp_val_like_neg_seeds = np.concatenate((lp_val_like_neg_srcs, lp_val_like_neg_dsts)).reshape(2,-1).T\n", "lp_val_like_seeds = np.concatenate((lp_val_like_seeds, lp_val_like_neg_seeds))\n", "print(f\"Part of val seeds[user:like:item] for link prediction: {lp_val_like_seeds[:3]}\")\n", "np.save(lp_val_like_seeds_path, lp_val_like_seeds)\n", "print(f\"LP val seeds[user:like:item] are saved to {lp_val_like_seeds_path}\\n\")\n", "\n", "# Val labels for user:like:item.\n", "lp_val_like_labels_path = os.path.join(base_dir, \"lp-val-like-labels.npy\")\n", "lp_val_like_labels = np.empty(num_vals * (10 + 1))\n", "lp_val_like_labels[:num_vals] = 1\n", "lp_val_like_labels[num_vals:] = 0\n", "print(f\"Part of val labels[user:like:item] for link prediction: {lp_val_like_labels[:3]}\")\n", "np.save(lp_val_like_labels_path, lp_val_like_labels)\n", "print(f\"LP val labels[user:like:item] are saved to {lp_val_like_labels_path}\\n\")\n", "\n", "# Val indexes for user:like:item.\n", "lp_val_like_indexes_path = os.path.join(base_dir, \"lp-val-like-indexes.npy\")\n", "lp_val_like_indexes = np.arange(0, num_vals)\n", "lp_val_like_neg_indexes = np.repeat(lp_val_like_indexes, 10)\n", "lp_val_like_indexes = np.concatenate([lp_val_like_indexes, lp_val_like_neg_indexes])\n", "print(f\"Part of val indexes[user:like:item] for link prediction: {lp_val_like_indexes[:3]}\")\n", "np.save(lp_val_like_indexes_path, lp_val_like_indexes)\n", "print(f\"LP val indexes[user:like:item] are saved to {lp_val_like_indexes_path}\\n\")\n", "\n", "# Val seeds for user:follow:item.\n", "lp_val_follow_seeds_path = os.path.join(base_dir, \"lp-val-follow-seeds.npy\")\n", "lp_val_follow_seeds = follow_edges[num_trains:num_trains+num_vals, :]\n", "lp_val_follow_neg_dsts = np.random.randint(0, num_nodes, (num_vals, 10)).reshape(-1)\n", "lp_val_follow_neg_srcs = np.repeat(lp_val_follow_seeds[:,0], 10)\n", "lp_val_follow_neg_seeds = np.concatenate((lp_val_follow_neg_srcs, lp_val_follow_neg_dsts)).reshape(2,-1).T\n", "lp_val_follow_seeds = np.concatenate((lp_val_follow_seeds, lp_val_follow_neg_seeds))\n", "print(f\"Part of val seeds[user:follow:item] for link prediction: {lp_val_follow_seeds[:3]}\")\n", "np.save(lp_val_follow_seeds_path, lp_val_follow_seeds)\n", "print(f\"LP val seeds[user:follow:item] are saved to {lp_val_follow_seeds_path}\\n\")\n", "\n", "# Val labels for user:follow:item.\n", "lp_val_follow_labels_path = os.path.join(base_dir, \"lp-val-follow-labels.npy\")\n", "lp_val_follow_labels = np.empty(num_vals * (10 + 1))\n", "lp_val_follow_labels[:num_vals] = 1\n", "lp_val_follow_labels[num_vals:] = 0\n", "print(f\"Part of val labels[user:follow:item] for link prediction: {lp_val_follow_labels[:3]}\")\n", "np.save(lp_val_follow_labels_path, lp_val_follow_labels)\n", "print(f\"LP val labels[user:follow:item] are saved to {lp_val_follow_labels_path}\\n\")\n", "\n", "# Val indexes for user:follow:item.\n", "lp_val_follow_indexes_path = os.path.join(base_dir, \"lp-val-follow-indexes.npy\")\n", "lp_val_follow_indexes = np.arange(0, num_vals)\n", "lp_val_follow_neg_indexes = np.repeat(lp_val_follow_indexes, 10)\n", "lp_val_follow_indexes = np.concatenate([lp_val_follow_indexes, lp_val_follow_neg_indexes])\n", "print(f\"Part of val indexes[user:follow:item] for link prediction: {lp_val_follow_indexes[:3]}\")\n", "np.save(lp_val_follow_indexes_path, lp_val_follow_indexes)\n", "print(f\"LP val indexes[user:follow:item] are saved to {lp_val_follow_indexes_path}\\n\")\n", "\n", "# Test seeds for user:like:item.\n", "lp_test_like_seeds_path = os.path.join(base_dir, \"lp-test-like-seeds.npy\")\n", "lp_test_like_seeds = like_edges[-num_tests:, :]\n", "lp_test_like_neg_dsts = np.random.randint(0, num_nodes, (num_tests, 10)).reshape(-1)\n", "lp_test_like_neg_srcs = np.repeat(lp_test_like_seeds[:,0], 10)\n", "lp_test_like_neg_seeds = np.concatenate((lp_test_like_neg_srcs, lp_test_like_neg_dsts)).reshape(2,-1).T\n", "lp_test_like_seeds = np.concatenate((lp_test_like_seeds, lp_test_like_neg_seeds))\n", "print(f\"Part of test seeds[user:like:item] for link prediction: {lp_test_like_seeds[:3]}\")\n", "np.save(lp_test_like_seeds_path, lp_test_like_seeds)\n", "print(f\"LP test seeds[user:like:item] are saved to {lp_test_like_seeds_path}\\n\")\n", "\n", "# Test labels for user:like:item.\n", "lp_test_like_labels_path = os.path.join(base_dir, \"lp-test-like-labels.npy\")\n", "lp_test_like_labels = np.empty(num_tests * (10 + 1))\n", "lp_test_like_labels[:num_tests] = 1\n", "lp_test_like_labels[num_tests:] = 0\n", "print(f\"Part of test labels[user:like:item] for link prediction: {lp_test_like_labels[:3]}\")\n", "np.save(lp_test_like_labels_path, lp_test_like_labels)\n", "print(f\"LP test labels[user:like:item] are saved to {lp_test_like_labels_path}\\n\")\n", "\n", "# Test indexes for user:like:item.\n", "lp_test_like_indexes_path = os.path.join(base_dir, \"lp-test-like-indexes.npy\")\n", "lp_test_like_indexes = np.arange(0, num_tests)\n", "lp_test_like_neg_indexes = np.repeat(lp_test_like_indexes, 10)\n", "lp_test_like_indexes = np.concatenate([lp_test_like_indexes, lp_test_like_neg_indexes])\n", "print(f\"Part of test indexes[user:like:item] for link prediction: {lp_test_like_indexes[:3]}\")\n", "np.save(lp_test_like_indexes_path, lp_test_like_indexes)\n", "print(f\"LP test indexes[user:like:item] are saved to {lp_test_like_indexes_path}\\n\")\n", "\n", "# Test seeds for user:follow:item.\n", "lp_test_follow_seeds_path = os.path.join(base_dir, \"lp-test-follow-seeds.npy\")\n", "lp_test_follow_seeds = follow_edges[-num_tests:, :]\n", "lp_test_follow_neg_dsts = np.random.randint(0, num_nodes, (num_tests, 10)).reshape(-1)\n", "lp_test_follow_neg_srcs = np.repeat(lp_test_follow_seeds[:,0], 10)\n", "lp_test_follow_neg_seeds = np.concatenate((lp_test_follow_neg_srcs, lp_test_follow_neg_dsts)).reshape(2,-1).T\n", "lp_test_follow_seeds = np.concatenate((lp_test_follow_seeds, lp_test_follow_neg_seeds))\n", "print(f\"Part of test seeds[user:follow:item] for link prediction: {lp_test_follow_seeds[:3]}\")\n", "np.save(lp_test_follow_seeds_path, lp_test_follow_seeds)\n", "print(f\"LP test seeds[user:follow:item] are saved to {lp_test_follow_seeds_path}\\n\")\n", "\n", "# Test labels for user:follow:item.\n", "lp_test_follow_labels_path = os.path.join(base_dir, \"lp-test-follow-labels.npy\")\n", "lp_test_follow_labels = np.empty(num_tests * (10 + 1))\n", "lp_test_follow_labels[:num_tests] = 1\n", "lp_test_follow_labels[num_tests:] = 0\n", "print(f\"Part of test labels[user:follow:item] for link prediction: {lp_test_follow_labels[:3]}\")\n", "np.save(lp_test_follow_labels_path, lp_test_follow_labels)\n", "print(f\"LP test labels[user:follow:item] are saved to {lp_test_follow_labels_path}\\n\")\n", "\n", "# Test indexes for user:follow:item.\n", "lp_test_follow_indexes_path = os.path.join(base_dir, \"lp-test-follow-indexes.npy\")\n", "lp_test_follow_indexes = np.arange(0, num_tests)\n", "lp_test_follow_neg_indexes = np.repeat(lp_test_follow_indexes, 10)\n", "lp_test_follow_indexes = np.concatenate([lp_test_follow_indexes, lp_test_follow_neg_indexes])\n", "print(f\"Part of test indexes[user:follow:item] for link prediction: {lp_test_follow_indexes[:3]}\")\n", "np.save(lp_test_follow_indexes_path, lp_test_follow_indexes)\n", "print(f\"LP test indexes[user:follow:item] are saved to {lp_test_follow_indexes_path}\\n\")" ] }, { "cell_type": "markdown", "metadata": { "id": "wbk6-wxRK-6S" }, "source": [ "## Organize Data into YAML File\n", "Now we need to create a `metadata.yaml` file which contains the paths, dadta types of graph structure, feature data, training/validation/test sets. Please note that all path should be relative to `metadata.yaml`.\n", "\n", "For heterogeneous graph, we need to specify the node/edge type in **type** fields. For edge type, canonical etype is required which is a string that's concatenated by source node type, etype, and destination node type together with `:`.\n", "\n", "Notes:\n", "- all path should be relative to `metadata.yaml`.\n", "- Below fields are optional and not specified in below example.\n", " - `in_memory`: indicates whether to load dada into memory or `mmap`. Default is `True`.\n", "\n", "Please refer to [YAML specification](https://github.com/dmlc/dgl/blob/master/docs/source/stochastic_training/ondisk-dataset-specification.rst) for more details." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "ddGTWW61Lpwp" }, "outputs": [], "source": [ "yaml_content = f\"\"\"\n", " dataset_name: heterogeneous_graph_nc_lp\n", " graph:\n", " nodes:\n", " - type: user\n", " num: {num_nodes}\n", " - type: item\n", " num: {num_nodes}\n", " edges:\n", " - type: \"user:like:item\"\n", " format: csv\n", " path: {os.path.basename(like_edges_path)}\n", " - type: \"user:follow:user\"\n", " format: csv\n", " path: {os.path.basename(follow_edges_path)}\n", " feature_data:\n", " - domain: node\n", " type: user\n", " name: feat_0\n", " format: numpy\n", " path: {os.path.basename(node_user_feat_0_path)}\n", " - domain: node\n", " type: user\n", " name: feat_1\n", " format: torch\n", " path: {os.path.basename(node_user_feat_1_path)}\n", " - domain: node\n", " type: item\n", " name: feat_0\n", " format: numpy\n", " path: {os.path.basename(node_item_feat_0_path)}\n", " - domain: node\n", " type: item\n", " name: feat_1\n", " format: torch\n", " path: {os.path.basename(node_item_feat_1_path)}\n", " - domain: edge\n", " type: \"user:like:item\"\n", " name: feat_0\n", " format: numpy\n", " path: {os.path.basename(edge_like_feat_0_path)}\n", " - domain: edge\n", " type: \"user:like:item\"\n", " name: feat_1\n", " format: torch\n", " path: {os.path.basename(edge_like_feat_1_path)}\n", " - domain: edge\n", " type: \"user:follow:user\"\n", " name: feat_0\n", " format: numpy\n", " path: {os.path.basename(edge_follow_feat_0_path)}\n", " - domain: edge\n", " type: \"user:follow:user\"\n", " name: feat_1\n", " format: torch\n", " path: {os.path.basename(edge_follow_feat_1_path)}\n", " tasks:\n", " - name: node_classification\n", " num_classes: 10\n", " train_set:\n", " - type: user\n", " data:\n", " - name: seeds\n", " format: numpy\n", " path: {os.path.basename(nc_train_user_ids_path)}\n", " - name: labels\n", " format: torch\n", " path: {os.path.basename(nc_train_user_labels_path)}\n", " - type: item\n", " data:\n", " - name: seeds\n", " format: numpy\n", " path: {os.path.basename(nc_train_item_ids_path)}\n", " - name: labels\n", " format: torch\n", " path: {os.path.basename(nc_train_item_labels_path)}\n", " validation_set:\n", " - type: user\n", " data:\n", " - name: seeds\n", " format: numpy\n", " path: {os.path.basename(nc_val_user_ids_path)}\n", " - name: labels\n", " format: torch\n", " path: {os.path.basename(nc_val_user_labels_path)}\n", " - type: item\n", " data:\n", " - name: seeds\n", " format: numpy\n", " path: {os.path.basename(nc_val_item_ids_path)}\n", " - name: labels\n", " format: torch\n", " path: {os.path.basename(nc_val_item_labels_path)}\n", " test_set:\n", " - type: user\n", " data:\n", " - name: seeds\n", " format: numpy\n", " path: {os.path.basename(nc_test_user_ids_path)}\n", " - name: labels\n", " format: torch\n", " path: {os.path.basename(nc_test_user_labels_path)}\n", " - type: item\n", " data:\n", " - name: seeds\n", " format: numpy\n", " path: {os.path.basename(nc_test_item_ids_path)}\n", " - name: labels\n", " format: torch\n", " path: {os.path.basename(nc_test_item_labels_path)}\n", " - name: link_prediction\n", " num_classes: 10\n", " train_set:\n", " - type: \"user:like:item\"\n", " data:\n", " - name: seeds\n", " format: numpy\n", " path: {os.path.basename(lp_train_like_seeds_path)}\n", " - type: \"user:follow:user\"\n", " data:\n", " - name: seeds\n", " format: numpy\n", " path: {os.path.basename(lp_train_follow_seeds_path)}\n", " validation_set:\n", " - type: \"user:like:item\"\n", " data:\n", " - name: seeds\n", " format: numpy\n", " path: {os.path.basename(lp_val_like_seeds_path)}\n", " - name: labels\n", " format: numpy\n", " path: {os.path.basename(lp_val_like_labels_path)}\n", " - name: indexes\n", " format: numpy\n", " path: {os.path.basename(lp_val_like_indexes_path)}\n", " - type: \"user:follow:user\"\n", " data:\n", " - name: seeds\n", " format: numpy\n", " path: {os.path.basename(lp_val_follow_seeds_path)}\n", " - name: labels\n", " format: numpy\n", " path: {os.path.basename(lp_val_follow_labels_path)}\n", " - name: indexes\n", " format: numpy\n", " path: {os.path.basename(lp_val_follow_indexes_path)}\n", " test_set:\n", " - type: \"user:like:item\"\n", " data:\n", " - name: seeds\n", " format: numpy\n", " path: {os.path.basename(lp_test_like_seeds_path)}\n", " - name: labels\n", " format: numpy\n", " path: {os.path.basename(lp_test_like_labels_path)}\n", " - name: indexes\n", " format: numpy\n", " path: {os.path.basename(lp_test_like_indexes_path)}\n", " - type: \"user:follow:user\"\n", " data:\n", " - name: seeds\n", " format: numpy\n", " path: {os.path.basename(lp_test_follow_seeds_path)}\n", " - name: labels\n", " format: numpy\n", " path: {os.path.basename(lp_test_follow_labels_path)}\n", " - name: indexes\n", " format: numpy\n", " path: {os.path.basename(lp_test_follow_indexes_path)}\n", "\"\"\"\n", "metadata_path = os.path.join(base_dir, \"metadata.yaml\")\n", "with open(metadata_path, \"w\") as f:\n", " f.write(yaml_content)" ] }, { "cell_type": "markdown", "metadata": { "id": "kEfybHGhOW7O" }, "source": [ "## Instantiate `OnDiskDataset`\n", "Now we're ready to load dataset via `dgl.graphbolt.OnDiskDataset`. When instantiating, we just pass in the base directory where `metadata.yaml` file lies.\n", "\n", "During first instantiation, GraphBolt preprocesses the raw data such as constructing `FusedCSCSamplingGraph` from edges. All data including graph, feature data, training/validation/test sets are put into `preprocessed` directory after preprocessing. Any following dataset loading will skip the preprocess stage.\n", "\n", "After preprocessing, `load()` is required to be called explicitly in order to load graph, feature data and tasks." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "W58CZoSzOiyo" }, "outputs": [], "source": [ "dataset = gb.OnDiskDataset(base_dir).load()\n", "graph = dataset.graph\n", "print(f\"Loaded graph: {graph}\\n\")\n", "\n", "feature = dataset.feature\n", "print(f\"Loaded feature store: {feature}\\n\")\n", "\n", "tasks = dataset.tasks\n", "nc_task = tasks[0]\n", "print(f\"Loaded node classification task: {nc_task}\\n\")\n", "lp_task = tasks[1]\n", "print(f\"Loaded link prediction task: {lp_task}\\n\")" ] } ], "metadata": { "colab": { "private_outputs": true, "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 0 } ================================================ FILE: notebooks/stochastic_training/ondisk_dataset_homograph.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "metadata": { "id": "FnFhPMaAfLtJ" }, "source": [ "# OnDiskDataset for Homogeneous Graph\n", "\n", "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dmlc/dgl/blob/master/notebooks/stochastic_training/ondisk_dataset_homograph.ipynb) [![GitHub](https://img.shields.io/badge/-View%20on%20GitHub-181717?logo=github&logoColor=ffffff)](https://github.com/dmlc/dgl/blob/master/notebooks/stochastic_training/ondisk_dataset_homograph.ipynb)\n", "\n", "This tutorial shows how to create `OnDiskDataset` for homogeneous graph that could be used in **GraphBolt** framework.\n", "\n", "By the end of this tutorial, you will be able to\n", "\n", "- organize graph structure data.\n", "- organize feature data.\n", "- organize training/validation/test set for specific tasks.\n", "\n", "To create an ``OnDiskDataset`` object, you need to organize all the data including graph structure, feature data and tasks into a directory. The directory should contain a ``metadata.yaml`` file that describes the metadata of the dataset.\n", "\n", "Now let's generate various data step by step and organize them together to instantiate `OnDiskDataset` finally." ] }, { "cell_type": "markdown", "metadata": { "id": "Wlb19DtWgtzq" }, "source": [ "## Install DGL package" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "UojlT9ZGgyr9" }, "outputs": [], "source": [ "# Install required packages.\n", "import os\n", "import torch\n", "import numpy as np\n", "os.environ['TORCH'] = torch.__version__\n", "os.environ['DGLBACKEND'] = \"pytorch\"\n", "\n", "# Install the CPU version.\n", "device = torch.device(\"cpu\")\n", "!pip install --pre dgl -f https://data.dgl.ai/wheels-test/repo.html\n", "\n", "try:\n", " import dgl\n", " import dgl.graphbolt as gb\n", " installed = True\n", "except ImportError as error:\n", " installed = False\n", " print(error)\n", "print(\"DGL installed!\" if installed else \"DGL not found!\")" ] }, { "cell_type": "markdown", "metadata": { "id": "2R7WnSbjsfbr" }, "source": [ "## Data preparation\n", "In order to demonstrate how to organize various data, let's create a base directory first." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "SZipbzyltLfO" }, "outputs": [], "source": [ "base_dir = './ondisk_dataset_homograph'\n", "os.makedirs(base_dir, exist_ok=True)\n", "print(f\"Created base directory: {base_dir}\")" ] }, { "cell_type": "markdown", "metadata": { "id": "qhNtIn_xhlnl" }, "source": [ "### Generate graph structure data\n", "For homogeneous graph, we just need to save edges(namely seeds) into **Numpy** or **CSV** file.\n", "\n", "Note:\n", "- when saving to **Numpy**, the array requires to be in shape of `(2, N)`. This format is recommended as constructing graph from it is much faster than **CSV** file.\n", "- when saving to **CSV** file, do not save index and header.\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "HcBt4G5BmSjr" }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "num_nodes = 1000\n", "num_edges = 10 * num_nodes\n", "edges_path = os.path.join(base_dir, \"edges.csv\")\n", "edges = np.random.randint(0, num_nodes, size=(num_edges, 2))\n", "\n", "print(f\"Part of edges: {edges[:5, :]}\")\n", "\n", "df = pd.DataFrame(edges)\n", "df.to_csv(edges_path, index=False, header=False)\n", "\n", "print(f\"Edges are saved into {edges_path}\")" ] }, { "cell_type": "markdown", "metadata": { "id": "kh-4cPtzpcaH" }, "source": [ "### Generate feature data for graph\n", "For feature data, numpy arrays and torch tensors are supported for now." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "_PVu1u5brBhF" }, "outputs": [], "source": [ "# Generate node feature in numpy array.\n", "node_feat_0_path = os.path.join(base_dir, \"node-feat-0.npy\")\n", "node_feat_0 = np.random.rand(num_nodes, 5)\n", "print(f\"Part of node feature [feat_0]: {node_feat_0[:3, :]}\")\n", "np.save(node_feat_0_path, node_feat_0)\n", "print(f\"Node feature [feat_0] is saved to {node_feat_0_path}\\n\")\n", "\n", "# Generate another node feature in torch tensor\n", "node_feat_1_path = os.path.join(base_dir, \"node-feat-1.pt\")\n", "node_feat_1 = torch.rand(num_nodes, 5)\n", "print(f\"Part of node feature [feat_1]: {node_feat_1[:3, :]}\")\n", "torch.save(node_feat_1, node_feat_1_path)\n", "print(f\"Node feature [feat_1] is saved to {node_feat_1_path}\\n\")\n", "\n", "# Generate edge feature in numpy array.\n", "edge_feat_0_path = os.path.join(base_dir, \"edge-feat-0.npy\")\n", "edge_feat_0 = np.random.rand(num_edges, 5)\n", "print(f\"Part of edge feature [feat_0]: {edge_feat_0[:3, :]}\")\n", "np.save(edge_feat_0_path, edge_feat_0)\n", "print(f\"Edge feature [feat_0] is saved to {edge_feat_0_path}\\n\")\n", "\n", "# Generate another edge feature in torch tensor\n", "edge_feat_1_path = os.path.join(base_dir, \"edge-feat-1.pt\")\n", "edge_feat_1 = torch.rand(num_edges, 5)\n", "print(f\"Part of edge feature [feat_1]: {edge_feat_1[:3, :]}\")\n", "torch.save(edge_feat_1, edge_feat_1_path)\n", "print(f\"Edge feature [feat_1] is saved to {edge_feat_1_path}\\n\")\n" ] }, { "cell_type": "markdown", "metadata": { "id": "ZyqgOtsIwzh_" }, "source": [ "### Generate tasks\n", "`OnDiskDataset` supports multiple tasks. For each task, we need to prepare training/validation/test sets respectively. Such sets usually vary among different tasks. In this tutorial, let's create a **Node Classification** task and **Link Prediction** task." ] }, { "cell_type": "markdown", "metadata": { "id": "hVxHaDIfzCkr" }, "source": [ "#### Node Classification Task\n", "For node classification task, we need **node IDs** and corresponding **labels** for each training/validation/test set. Like feature data, numpy arrays and torch tensors are supported for these sets." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "S5-fyBbHzTCO" }, "outputs": [], "source": [ "num_trains = int(num_nodes * 0.6)\n", "num_vals = int(num_nodes * 0.2)\n", "num_tests = num_nodes - num_trains - num_vals\n", "\n", "ids = np.arange(num_nodes)\n", "np.random.shuffle(ids)\n", "\n", "nc_train_ids_path = os.path.join(base_dir, \"nc-train-ids.npy\")\n", "nc_train_ids = ids[:num_trains]\n", "print(f\"Part of train ids for node classification: {nc_train_ids[:3]}\")\n", "np.save(nc_train_ids_path, nc_train_ids)\n", "print(f\"NC train ids are saved to {nc_train_ids_path}\\n\")\n", "\n", "nc_train_labels_path = os.path.join(base_dir, \"nc-train-labels.pt\")\n", "nc_train_labels = torch.randint(0, 10, (num_trains,))\n", "print(f\"Part of train labels for node classification: {nc_train_labels[:3]}\")\n", "torch.save(nc_train_labels, nc_train_labels_path)\n", "print(f\"NC train labels are saved to {nc_train_labels_path}\\n\")\n", "\n", "nc_val_ids_path = os.path.join(base_dir, \"nc-val-ids.npy\")\n", "nc_val_ids = ids[num_trains:num_trains+num_vals]\n", "print(f\"Part of val ids for node classification: {nc_val_ids[:3]}\")\n", "np.save(nc_val_ids_path, nc_val_ids)\n", "print(f\"NC val ids are saved to {nc_val_ids_path}\\n\")\n", "\n", "nc_val_labels_path = os.path.join(base_dir, \"nc-val-labels.pt\")\n", "nc_val_labels = torch.randint(0, 10, (num_vals,))\n", "print(f\"Part of val labels for node classification: {nc_val_labels[:3]}\")\n", "torch.save(nc_val_labels, nc_val_labels_path)\n", "print(f\"NC val labels are saved to {nc_val_labels_path}\\n\")\n", "\n", "nc_test_ids_path = os.path.join(base_dir, \"nc-test-ids.npy\")\n", "nc_test_ids = ids[-num_tests:]\n", "print(f\"Part of test ids for node classification: {nc_test_ids[:3]}\")\n", "np.save(nc_test_ids_path, nc_test_ids)\n", "print(f\"NC test ids are saved to {nc_test_ids_path}\\n\")\n", "\n", "nc_test_labels_path = os.path.join(base_dir, \"nc-test-labels.pt\")\n", "nc_test_labels = torch.randint(0, 10, (num_tests,))\n", "print(f\"Part of test labels for node classification: {nc_test_labels[:3]}\")\n", "torch.save(nc_test_labels, nc_test_labels_path)\n", "print(f\"NC test labels are saved to {nc_test_labels_path}\\n\")" ] }, { "cell_type": "markdown", "metadata": { "id": "LhAcDCHQ_KJ0" }, "source": [ "#### Link Prediction Task\n", "For link prediction task, we need **seeds** or **corresponding labels and indexes** which representing the pos/neg property and group of the seeds for each training/validation/test set. Like feature data, numpy arrays and torch tensors are supported for these sets." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "u0jCnXIcAQy4" }, "outputs": [], "source": [ "num_trains = int(num_edges * 0.6)\n", "num_vals = int(num_edges * 0.2)\n", "num_tests = num_edges - num_trains - num_vals\n", "\n", "lp_train_seeds_path = os.path.join(base_dir, \"lp-train-seeds.npy\")\n", "lp_train_seeds = edges[:num_trains, :]\n", "print(f\"Part of train seeds for link prediction: {lp_train_seeds[:3]}\")\n", "np.save(lp_train_seeds_path, lp_train_seeds)\n", "print(f\"LP train seeds are saved to {lp_train_seeds_path}\\n\")\n", "\n", "lp_val_seeds_path = os.path.join(base_dir, \"lp-val-seeds.npy\")\n", "lp_val_seeds = edges[num_trains:num_trains+num_vals, :]\n", "lp_val_neg_dsts = np.random.randint(0, num_nodes, (num_vals, 10)).reshape(-1)\n", "lp_val_neg_srcs = np.repeat(lp_val_seeds[:,0], 10)\n", "lp_val_neg_seeds = np.concatenate((lp_val_neg_srcs, lp_val_neg_dsts)).reshape(2,-1).T\n", "lp_val_seeds = np.concatenate((lp_val_seeds, lp_val_neg_seeds))\n", "print(f\"Part of val seeds for link prediction: {lp_val_seeds[:3]}\")\n", "np.save(lp_val_seeds_path, lp_val_seeds)\n", "print(f\"LP val seeds are saved to {lp_val_seeds_path}\\n\")\n", "\n", "lp_val_labels_path = os.path.join(base_dir, \"lp-val-labels.npy\")\n", "lp_val_labels = np.empty(num_vals * (10 + 1))\n", "lp_val_labels[:num_vals] = 1\n", "lp_val_labels[num_vals:] = 0\n", "print(f\"Part of val labels for link prediction: {lp_val_labels[:3]}\")\n", "np.save(lp_val_labels_path, lp_val_labels)\n", "print(f\"LP val labels are saved to {lp_val_labels_path}\\n\")\n", "\n", "lp_val_indexes_path = os.path.join(base_dir, \"lp-val-indexes.npy\")\n", "lp_val_indexes = np.arange(0, num_vals)\n", "lp_val_neg_indexes = np.repeat(lp_val_indexes, 10)\n", "lp_val_indexes = np.concatenate([lp_val_indexes, lp_val_neg_indexes])\n", "print(f\"Part of val indexes for link prediction: {lp_val_indexes[:3]}\")\n", "np.save(lp_val_indexes_path, lp_val_indexes)\n", "print(f\"LP val indexes are saved to {lp_val_indexes_path}\\n\")\n", "\n", "lp_test_seeds_path = os.path.join(base_dir, \"lp-test-seeds.npy\")\n", "lp_test_seeds = edges[-num_tests:, :]\n", "lp_test_neg_dsts = np.random.randint(0, num_nodes, (num_tests, 10)).reshape(-1)\n", "lp_test_neg_srcs = np.repeat(lp_test_seeds[:,0], 10)\n", "lp_test_neg_seeds = np.concatenate((lp_test_neg_srcs, lp_test_neg_dsts)).reshape(2,-1).T\n", "lp_test_seeds = np.concatenate((lp_test_seeds, lp_test_neg_seeds))\n", "print(f\"Part of test seeds for link prediction: {lp_test_seeds[:3]}\")\n", "np.save(lp_test_seeds_path, lp_test_seeds)\n", "print(f\"LP test seeds are saved to {lp_test_seeds_path}\\n\")\n", "\n", "lp_test_labels_path = os.path.join(base_dir, \"lp-test-labels.npy\")\n", "lp_test_labels = np.empty(num_tests * (10 + 1))\n", "lp_test_labels[:num_tests] = 1\n", "lp_test_labels[num_tests:] = 0\n", "print(f\"Part of val labels for link prediction: {lp_test_labels[:3]}\")\n", "np.save(lp_test_labels_path, lp_test_labels)\n", "print(f\"LP test labels are saved to {lp_test_labels_path}\\n\")\n", "\n", "lp_test_indexes_path = os.path.join(base_dir, \"lp-test-indexes.npy\")\n", "lp_test_indexes = np.arange(0, num_tests)\n", "lp_test_neg_indexes = np.repeat(lp_test_indexes, 10)\n", "lp_test_indexes = np.concatenate([lp_test_indexes, lp_test_neg_indexes])\n", "print(f\"Part of test indexes for link prediction: {lp_test_indexes[:3]}\")\n", "np.save(lp_test_indexes_path, lp_test_indexes)\n", "print(f\"LP test indexes are saved to {lp_test_indexes_path}\\n\")" ] }, { "cell_type": "markdown", "metadata": { "id": "wbk6-wxRK-6S" }, "source": [ "## Organize Data into YAML File\n", "Now we need to create a `metadata.yaml` file which contains the paths, dadta types of graph structure, feature data, training/validation/test sets.\n", "\n", "Notes:\n", "- all path should be relative to `metadata.yaml`.\n", "- Below fields are optional and not specified in below example.\n", " - `in_memory`: indicates whether to load dada into memory or `mmap`. Default is `True`.\n", "\n", "Please refer to [YAML specification](https://github.com/dmlc/dgl/blob/master/docs/source/stochastic_training/ondisk-dataset-specification.rst) for more details." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "ddGTWW61Lpwp" }, "outputs": [], "source": [ "yaml_content = f\"\"\"\n", " dataset_name: homogeneous_graph_nc_lp\n", " graph:\n", " nodes:\n", " - num: {num_nodes}\n", " edges:\n", " - format: csv\n", " path: {os.path.basename(edges_path)}\n", " feature_data:\n", " - domain: node\n", " name: feat_0\n", " format: numpy\n", " path: {os.path.basename(node_feat_0_path)}\n", " - domain: node\n", " name: feat_1\n", " format: torch\n", " path: {os.path.basename(node_feat_1_path)}\n", " - domain: edge\n", " name: feat_0\n", " format: numpy\n", " path: {os.path.basename(edge_feat_0_path)}\n", " - domain: edge\n", " name: feat_1\n", " format: torch\n", " path: {os.path.basename(edge_feat_1_path)}\n", " tasks:\n", " - name: node_classification\n", " num_classes: 10\n", " train_set:\n", " - data:\n", " - name: seeds\n", " format: numpy\n", " path: {os.path.basename(nc_train_ids_path)}\n", " - name: labels\n", " format: torch\n", " path: {os.path.basename(nc_train_labels_path)}\n", " validation_set:\n", " - data:\n", " - name: seeds\n", " format: numpy\n", " path: {os.path.basename(nc_val_ids_path)}\n", " - name: labels\n", " format: torch\n", " path: {os.path.basename(nc_val_labels_path)}\n", " test_set:\n", " - data:\n", " - name: seeds\n", " format: numpy\n", " path: {os.path.basename(nc_test_ids_path)}\n", " - name: labels\n", " format: torch\n", " path: {os.path.basename(nc_test_labels_path)}\n", " - name: link_prediction\n", " num_classes: 10\n", " train_set:\n", " - data:\n", " - name: seeds\n", " format: numpy\n", " path: {os.path.basename(lp_train_seeds_path)}\n", " validation_set:\n", " - data:\n", " - name: seeds\n", " format: numpy\n", " path: {os.path.basename(lp_val_seeds_path)}\n", " - name: labels\n", " format: numpy\n", " path: {os.path.basename(lp_val_labels_path)}\n", " - name: indexes\n", " format: numpy\n", " path: {os.path.basename(lp_val_indexes_path)}\n", " test_set:\n", " - data:\n", " - name: seeds\n", " format: numpy\n", " path: {os.path.basename(lp_test_seeds_path)}\n", " - name: labels\n", " format: numpy\n", " path: {os.path.basename(lp_test_labels_path)}\n", " - name: indexes\n", " format: numpy\n", " path: {os.path.basename(lp_test_indexes_path)}\n", "\"\"\"\n", "metadata_path = os.path.join(base_dir, \"metadata.yaml\")\n", "with open(metadata_path, \"w\") as f:\n", " f.write(yaml_content)" ] }, { "cell_type": "markdown", "metadata": { "id": "kEfybHGhOW7O" }, "source": [ "## Instantiate `OnDiskDataset`\n", "Now we're ready to load dataset via `dgl.graphbolt.OnDiskDataset`. When instantiating, we just pass in the base directory where `metadata.yaml` file lies.\n", "\n", "During first instantiation, GraphBolt preprocesses the raw data such as constructing `FusedCSCSamplingGraph` from edges. All data including graph, feature data, training/validation/test sets are put into `preprocessed` directory after preprocessing. Any following dataset loading will skip the preprocess stage.\n", "\n", "After preprocessing, `load()` is required to be called explicitly in order to load graph, feature data and tasks." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "W58CZoSzOiyo" }, "outputs": [], "source": [ "dataset = gb.OnDiskDataset(base_dir).load()\n", "graph = dataset.graph\n", "print(f\"Loaded graph: {graph}\\n\")\n", "\n", "feature = dataset.feature\n", "print(f\"Loaded feature store: {feature}\\n\")\n", "\n", "tasks = dataset.tasks\n", "nc_task = tasks[0]\n", "print(f\"Loaded node classification task: {nc_task}\\n\")\n", "lp_task = tasks[1]\n", "print(f\"Loaded link prediction task: {lp_task}\\n\")" ] } ], "metadata": { "colab": { "private_outputs": true, "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 0 } ================================================ FILE: pyproject.toml ================================================ [tool.black] line-length = 80 ================================================ FILE: python/dgl/__init__.py ================================================ """ The ``dgl`` package contains data structure for storing structural and feature data (i.e., the :class:`DGLGraph` class) and also utilities for generating, manipulating and transforming graphs. """ # Windows compatibility # This initializes Winsock and performs cleanup at termination as required import socket # Backend and logging should be imported before other modules. from .logging import enable_verbose_logging # usort: skip from .backend import backend_name, load_backend # usort: skip from . import ( container, cuda, dataloading, function, ops, random, sampling, storages, ) from ._ffi.base import __version__, DGLError from ._ffi.function import ( extract_ext_funcs, get_global_func, list_global_func_names, register_func, ) from ._ffi.runtime_ctypes import TypeCode from .base import ALL, EID, ETYPE, NID, NTYPE from .readout import * from .batch import * from .convert import * from .generators import * from .dataloading import ( set_dst_lazy_features, set_edge_lazy_features, set_node_lazy_features, set_src_lazy_features, ) from .heterograph import ( # pylint: disable=reimported DGLGraph, DGLGraph as DGLHeteroGraph, ) from .merge import * from .subgraph import * from .traversal import * from .transforms import * from .propagate import * from .random import * from . import optim from .data.utils import load_graphs, save_graphs from .frame import LazyFeature from .global_config import is_libxsmm_enabled, use_libxsmm from .utils import apply_each from .mpops import * from .homophily import * from .label_informativeness import * ================================================ FILE: python/dgl/_api_internal.py ================================================ """Namespace for internal apis.""" ================================================ FILE: python/dgl/_ffi/README.md ================================================ # C API and runtime Borrowed and adapted from TVM project. (commit: 2ce5277) ================================================ FILE: python/dgl/_ffi/__init__.py ================================================ """C interfacing code. This namespace contains everything that interacts with C code. Most C related object are ctypes compatible, which means they contains a handle field that is ctypes.c_void_p and can be used via ctypes function calls. Some performance critical functions are implemented by cython and have a ctypes fallback implementation. """ ================================================ FILE: python/dgl/_ffi/_ctypes/__init__.py ================================================ """ctypes specific implementation of FFI""" ================================================ FILE: python/dgl/_ffi/_ctypes/function.py ================================================ # coding: utf-8 # pylint: disable=invalid-name, protected-access, too-many-branches, global-statement """Function configuration API.""" from __future__ import absolute_import import ctypes import traceback from numbers import Integral, Number from ..base import _LIB, c_str, check_call, string_types from ..object_generic import convert_to_object, ObjectGeneric from ..runtime_ctypes import DGLByteArray, DGLContext, DGLDataType from . import ndarray as _nd, object as _object from .ndarray import _make_array, NDArrayBase from .object import ObjectBase from .types import ( _wrap_arg_func, C_TO_PY_ARG_SWITCH, DGLCFuncFinalizer, DGLPackedCFunc, DGLValue, RETURN_SWITCH, TypeCode, ) FunctionHandle = ctypes.c_void_p ModuleHandle = ctypes.c_void_p DGLRetValueHandle = ctypes.c_void_p def _ctypes_free_resource(rhandle): """callback to free resources when it it not needed.""" pyobj = ctypes.cast(rhandle, ctypes.py_object) ctypes.pythonapi.Py_DecRef(pyobj) # Global callback that is always alive DGL_FREE_PYOBJ = DGLCFuncFinalizer(_ctypes_free_resource) ctypes.pythonapi.Py_IncRef(ctypes.py_object(DGL_FREE_PYOBJ)) def convert_to_dgl_func(pyfunc): """Convert a python function to DGL function Parameters ---------- pyfunc : python function The python function to be converted. Returns ------- dglfunc: dgl.nd.Function The converted dgl function. """ local_pyfunc = pyfunc def cfun(args, type_codes, num_args, ret, _): """ctypes function""" num_args = ( num_args.value if isinstance(num_args, ctypes.c_int) else num_args ) pyargs = ( C_TO_PY_ARG_SWITCH[type_codes[i]](args[i]) for i in range(num_args) ) # pylint: disable=broad-except try: rv = local_pyfunc(*pyargs) except Exception: msg = traceback.format_exc() _LIB.DGLAPISetLastError(c_str(msg)) return -1 if rv is not None: if isinstance(rv, tuple): raise ValueError( "PackedFunction can only support one return value" ) temp_args = [] values, tcodes, _ = _make_dgl_args((rv,), temp_args) if not isinstance(ret, DGLRetValueHandle): ret = DGLRetValueHandle(ret) check_call( _LIB.DGLCFuncSetReturn(ret, values, tcodes, ctypes.c_int(1)) ) _ = temp_args _ = rv return 0 handle = FunctionHandle() f = DGLPackedCFunc(cfun) # NOTE: We will need to use python-api to increase ref count of the f # DGL_FREE_PYOBJ will be called after it is no longer needed. pyobj = ctypes.py_object(f) ctypes.pythonapi.Py_IncRef(pyobj) check_call( _LIB.DGLFuncCreateFromCFunc( f, pyobj, DGL_FREE_PYOBJ, ctypes.byref(handle) ) ) return _CLASS_FUNCTION(handle, False) def _make_dgl_args(args, temp_args): """Pack arguments into c args dgl call accept. temp_args is used to temporarily save the arguments so they will not be freed during C API function call. """ num_args = len(args) values = (DGLValue * num_args)() type_codes = (ctypes.c_int * num_args)() for i, arg in enumerate(args): if arg is None: values[i].v_handle = None type_codes[i] = TypeCode.NULL elif isinstance(arg, ObjectBase): values[i].v_handle = arg.handle type_codes[i] = TypeCode.OBJECT_HANDLE elif isinstance(arg, (list, tuple, dict, ObjectGeneric)): arg = convert_to_object(arg) values[i].v_handle = arg.handle type_codes[i] = TypeCode.OBJECT_HANDLE temp_args.append(arg) elif isinstance(arg, NDArrayBase): values[i].v_handle = ctypes.cast(arg.handle, ctypes.c_void_p) type_codes[i] = ( TypeCode.NDARRAY_CONTAINER if not arg.is_view else TypeCode.ARRAY_HANDLE ) elif isinstance(arg, _nd._DGL_COMPATS): values[i].v_handle = ctypes.c_void_p(arg._dgl_handle) type_codes[i] = arg.__class__._dgl_tcode elif isinstance(arg, Integral): values[i].v_int64 = arg type_codes[i] = TypeCode.INT elif isinstance(arg, Number): values[i].v_float64 = arg type_codes[i] = TypeCode.FLOAT elif isinstance(arg, DGLDataType): values[i].v_str = c_str(str(arg)) type_codes[i] = TypeCode.STR elif isinstance(arg, DGLContext): values[i].v_ctx = arg type_codes[i] = TypeCode.DGL_CONTEXT elif isinstance(arg, bytearray): arr = DGLByteArray() arr.data = ctypes.cast( (ctypes.c_byte * len(arg)).from_buffer(arg), ctypes.POINTER(ctypes.c_byte), ) arr.size = len(arg) values[i].v_handle = ctypes.c_void_p(ctypes.addressof(arr)) temp_args.append(arr) type_codes[i] = TypeCode.BYTES elif isinstance(arg, string_types): values[i].v_str = c_str(arg) type_codes[i] = TypeCode.STR # NOTE(minjie): module is not used in DGL # elif isinstance(arg, _CLASS_MODULE): # values[i].v_handle = arg.handle # type_codes[i] = TypeCode.MODULE_HANDLE elif isinstance(arg, FunctionBase): values[i].v_handle = arg.handle type_codes[i] = TypeCode.FUNC_HANDLE elif isinstance(arg, ctypes.c_void_p): values[i].v_handle = arg type_codes[i] = TypeCode.HANDLE elif callable(arg): arg = convert_to_dgl_func(arg) values[i].v_handle = arg.handle type_codes[i] = TypeCode.FUNC_HANDLE temp_args.append(arg) else: raise TypeError("Don't know how to handle type %s" % type(arg)) return values, type_codes, num_args class FunctionBase(object): """Function base.""" __slots__ = ["handle", "is_global"] # pylint: disable=no-member def __init__(self, handle, is_global): """Initialize the function with handle Parameters ---------- handle : FunctionHandle the handle to the underlying function. is_global : bool Whether this is a global function in python """ self.handle = handle self.is_global = is_global def __del__(self): if not self.is_global and _LIB is not None: check_call(_LIB.DGLFuncFree(self.handle)) def __call__(self, *args): """Call the function with positional arguments args : list The positional arguments to the function call. """ temp_args = [] values, tcodes, num_args = _make_dgl_args(args, temp_args) ret_val = DGLValue() ret_tcode = ctypes.c_int() check_call( _LIB.DGLFuncCall( self.handle, values, tcodes, ctypes.c_int(num_args), ctypes.byref(ret_val), ctypes.byref(ret_tcode), ) ) _ = temp_args _ = args return RETURN_SWITCH[ret_tcode.value](ret_val) def __init_handle_by_constructor__(fconstructor, args): """Initialize handle by constructor""" temp_args = [] values, tcodes, num_args = _make_dgl_args(args, temp_args) ret_val = DGLValue() ret_tcode = ctypes.c_int() check_call( _LIB.DGLFuncCall( fconstructor.handle, values, tcodes, ctypes.c_int(num_args), ctypes.byref(ret_val), ctypes.byref(ret_tcode), ) ) _ = temp_args _ = args assert ret_tcode.value == TypeCode.OBJECT_HANDLE handle = ret_val.v_handle return handle def _return_module(x): """Return function""" handle = x.v_handle if not isinstance(handle, ModuleHandle): handle = ModuleHandle(handle) return _CLASS_MODULE(handle) def _handle_return_func(x): """Return function""" handle = x.v_handle if not isinstance(handle, FunctionHandle): handle = FunctionHandle(handle) return _CLASS_FUNCTION(handle, False) # setup return handle for function type _object.__init_by_constructor__ = __init_handle_by_constructor__ RETURN_SWITCH[TypeCode.FUNC_HANDLE] = _handle_return_func RETURN_SWITCH[TypeCode.MODULE_HANDLE] = _return_module RETURN_SWITCH[TypeCode.NDARRAY_CONTAINER] = lambda x: _make_array( x.v_handle, False ) C_TO_PY_ARG_SWITCH[TypeCode.FUNC_HANDLE] = _wrap_arg_func( _handle_return_func, TypeCode.FUNC_HANDLE ) C_TO_PY_ARG_SWITCH[TypeCode.MODULE_HANDLE] = _wrap_arg_func( _return_module, TypeCode.MODULE_HANDLE ) C_TO_PY_ARG_SWITCH[TypeCode.ARRAY_HANDLE] = lambda x: _make_array( x.v_handle, True ) C_TO_PY_ARG_SWITCH[TypeCode.NDARRAY_CONTAINER] = lambda x: _make_array( x.v_handle, False ) _CLASS_MODULE = None _CLASS_FUNCTION = None def _set_class_module(module_class): """Initialize the module.""" global _CLASS_MODULE _CLASS_MODULE = module_class def _set_class_function(func_class): global _CLASS_FUNCTION _CLASS_FUNCTION = func_class ================================================ FILE: python/dgl/_ffi/_ctypes/ndarray.py ================================================ # pylint: disable=invalid-name """Runtime NDArray api""" from __future__ import absolute_import import ctypes from ..base import _LIB, c_str, check_call from ..runtime_ctypes import DGLArrayHandle from .types import ( _return_handle, _wrap_arg_func, C_TO_PY_ARG_SWITCH, RETURN_SWITCH, ) DGLPyCapsuleDestructor = ctypes.CFUNCTYPE(None, ctypes.c_void_p) _c_str_dltensor = c_str("dltensor") _c_str_used_dltensor = c_str("used_dltensor") # used for PyCapsule manipulation if hasattr(ctypes, "pythonapi"): ctypes.pythonapi.PyCapsule_GetName.restype = ctypes.c_char_p ctypes.pythonapi.PyCapsule_GetPointer.restype = ctypes.c_void_p ctypes.pythonapi.PyCapsule_New.restype = ctypes.py_object def _from_dlpack(dltensor): dltensor = ctypes.py_object(dltensor) if ctypes.pythonapi.PyCapsule_IsValid(dltensor, _c_str_dltensor): ptr = ctypes.pythonapi.PyCapsule_GetPointer(dltensor, _c_str_dltensor) # XXX(minjie): The below cast should be unnecessary given the code to # set restype of PyCapsule calls. But weirdly, this does not # work out always. ptr = ctypes.cast(ptr, ctypes.c_void_p) handle = DGLArrayHandle() check_call(_LIB.DGLArrayFromDLPack(ptr, ctypes.byref(handle))) ctypes.pythonapi.PyCapsule_SetName(dltensor, _c_str_used_dltensor) ctypes.pythonapi.PyCapsule_SetDestructor( dltensor, DGLPyCapsuleDestructor(0) ) return _make_array(handle, False) raise ValueError( "Expect a dltensor field, PyCapsule can only be consumed once" ) def _dlpack_deleter(pycapsule): pycapsule = ctypes.cast(pycapsule, ctypes.py_object) if ctypes.pythonapi.PyCapsule_IsValid(pycapsule, _c_str_dltensor): ptr = ctypes.pythonapi.PyCapsule_GetPointer(pycapsule, _c_str_dltensor) # XXX(minjie): The below cast should be unnecessary given the code to # set restype of PyCapsule calls. But weirdly, this does not # work out always. ptr = ctypes.cast(ptr, ctypes.c_void_p) _LIB.DGLDLManagedTensorCallDeleter(ptr) ctypes.pythonapi.PyCapsule_SetDestructor( pycapsule, DGLPyCapsuleDestructor(0) ) _c_dlpack_deleter = DGLPyCapsuleDestructor(_dlpack_deleter) class NDArrayBase(object): """A simple Device/CPU Array object in runtime.""" __slots__ = ["handle", "is_view"] # pylint: disable=no-member def __init__(self, handle, is_view=False): """Initialize the function with handle Parameters ---------- handle : DGLArrayHandle the handle to the underlying C++ DGLArray """ self.handle = handle self.is_view = is_view def __del__(self): if not self.is_view and _LIB: check_call(_LIB.DGLArrayFree(self.handle)) @property def _dgl_handle(self): return ctypes.cast(self.handle, ctypes.c_void_p).value def to_dlpack(self, alignment=0): """Produce an array from a DLPack Tensor without copying memory Args ------- alignment: int, default to be 0 Indicates the alignment requirement when converting to dlpack. Will copy to a new tensor if the alignment requirement is not satisfied. 0 means no alignment requirement. Returns ------- dlpack : DLPack tensor view of the array data """ ptr = ctypes.c_void_p() check_call( _LIB.DGLArrayToDLPack(self.handle, ctypes.byref(ptr), alignment) ) return ctypes.pythonapi.PyCapsule_New( ptr, _c_str_dltensor, _c_dlpack_deleter ) def _make_array(handle, is_view): handle = ctypes.cast(handle, DGLArrayHandle) return _CLASS_NDARRAY(handle, is_view) _DGL_COMPATS = () def _reg_extension(cls, fcreate): global _DGL_COMPATS _DGL_COMPATS += (cls,) if fcreate: fret = lambda x: fcreate(_return_handle(x)) RETURN_SWITCH[cls._dgl_tcode] = fret C_TO_PY_ARG_SWITCH[cls._dgl_tcode] = _wrap_arg_func( fret, cls._dgl_tcode ) _CLASS_NDARRAY = None def _set_class_ndarray(cls): global _CLASS_NDARRAY _CLASS_NDARRAY = cls ================================================ FILE: python/dgl/_ffi/_ctypes/object.py ================================================ """ctypes object API.""" from __future__ import absolute_import import ctypes from ..base import _LIB, c_str, check_call from ..object_generic import _set_class_object_base from .types import ( _wrap_arg_func, C_TO_PY_ARG_SWITCH, DGLValue, RETURN_SWITCH, TypeCode, ) ObjectHandle = ctypes.c_void_p __init_by_constructor__ = None """Maps object type to its constructor""" OBJECT_TYPE = {} def _register_object(index, cls): """register object class in python""" OBJECT_TYPE[index] = cls def _return_object(x): """Construct a object object from the given DGLValue object""" handle = x.v_handle if not isinstance(handle, ObjectHandle): handle = ObjectHandle(handle) tindex = ctypes.c_int() check_call(_LIB.DGLObjectGetTypeIndex(handle, ctypes.byref(tindex))) cls = OBJECT_TYPE.get(tindex.value, ObjectBase) # Avoid calling __init__ of cls, instead directly call __new__ # This allows child class to implement their own __init__ obj = cls.__new__(cls) obj.handle = handle return obj RETURN_SWITCH[TypeCode.OBJECT_HANDLE] = _return_object C_TO_PY_ARG_SWITCH[TypeCode.OBJECT_HANDLE] = _wrap_arg_func( _return_object, TypeCode.OBJECT_HANDLE ) class ObjectBase(object): """Object base class""" __slots__ = ["handle"] # pylint: disable=no-member def __del__(self): if _LIB is not None and hasattr(self, "handle"): check_call(_LIB.DGLObjectFree(self.handle)) def __getattr__(self, name): if name == "handle": raise AttributeError( "'handle' is a reserved attribute name that should not be used" ) ret_val = DGLValue() ret_type_code = ctypes.c_int() ret_success = ctypes.c_int() check_call( _LIB.DGLObjectGetAttr( self.handle, c_str(name), ctypes.byref(ret_val), ctypes.byref(ret_type_code), ctypes.byref(ret_success), ) ) if not ret_success.value: raise AttributeError( "'%s' object has no attribute '%s'" % (str(type(self)), name) ) return RETURN_SWITCH[ret_type_code.value](ret_val) def __init_handle_by_constructor__(self, fconstructor, *args): """Initialize the handle by calling constructor function. Parameters ---------- fconstructor : Function Constructor function. args: list of objects The arguments to the constructor Note ---- We have a special calling convention to call constructor functions. So the return handle is directly set into the Object object instead of creating a new Object. """ # assign handle first to avoid error raising self.handle = None handle = __init_by_constructor__( fconstructor, args ) # pylint: disable=not-callable if not isinstance(handle, ObjectHandle): handle = ObjectHandle(handle) self.handle = handle _set_class_object_base(ObjectBase) ================================================ FILE: python/dgl/_ffi/_ctypes/types.py ================================================ """The C Types used in API.""" # pylint: disable=invalid-name from __future__ import absolute_import as _abs import ctypes from ..base import _LIB, check_call, py_str from ..runtime_ctypes import DGLByteArray, DGLContext, DGLDataType, TypeCode class DGLValue(ctypes.Union): """DGLValue in C API""" _fields_ = [ ("v_int64", ctypes.c_int64), ("v_float64", ctypes.c_double), ("v_handle", ctypes.c_void_p), ("v_str", ctypes.c_char_p), ("v_type", DGLDataType), ("v_ctx", DGLContext), ] DGLPackedCFunc = ctypes.CFUNCTYPE( ctypes.c_int, ctypes.POINTER(DGLValue), ctypes.POINTER(ctypes.c_int), ctypes.c_int, ctypes.c_void_p, ctypes.c_void_p, ) DGLCFuncFinalizer = ctypes.CFUNCTYPE(None, ctypes.c_void_p) def _return_handle(x): """return handle""" handle = x.v_handle if not isinstance(handle, ctypes.c_void_p): handle = ctypes.c_void_p(handle) return handle def _return_bytes(x): """return handle""" handle = x.v_handle if not isinstance(handle, ctypes.c_void_p): handle = ctypes.c_void_p(handle) arr = ctypes.cast(handle, ctypes.POINTER(DGLByteArray))[0] size = arr.size res = bytearray(size) rptr = (ctypes.c_byte * size).from_buffer(res) if not ctypes.memmove(rptr, arr.data, size): raise RuntimeError("memmove failed") return res def _wrap_arg_func(return_f, type_code): tcode = ctypes.c_int(type_code) def _wrap_func(x): check_call(_LIB.DGLCbArgToReturn(ctypes.byref(x), tcode)) return return_f(x) return _wrap_func RETURN_SWITCH = { TypeCode.INT: lambda x: x.v_int64, TypeCode.FLOAT: lambda x: x.v_float64, TypeCode.HANDLE: _return_handle, TypeCode.NULL: lambda x: None, TypeCode.STR: lambda x: py_str(x.v_str), TypeCode.BYTES: _return_bytes, TypeCode.DGL_CONTEXT: lambda x: DGLContext( x.v_ctx.device_type, x.v_ctx.device_id ), } C_TO_PY_ARG_SWITCH = { TypeCode.INT: lambda x: x.v_int64, TypeCode.FLOAT: lambda x: x.v_float64, TypeCode.HANDLE: _return_handle, TypeCode.NULL: lambda x: None, TypeCode.STR: lambda x: py_str(x.v_str), TypeCode.BYTES: _return_bytes, TypeCode.DGL_CONTEXT: lambda x: DGLContext( x.v_ctx.device_type, x.v_ctx.device_id ), } ================================================ FILE: python/dgl/_ffi/_cy2/__init__.py ================================================ """cython2 namespace""" ================================================ FILE: python/dgl/_ffi/_cy3/__init__.py ================================================ """cython3 namespace""" ================================================ FILE: python/dgl/_ffi/_cython/.gitignore ================================================ *.cpp ================================================ FILE: python/dgl/_ffi/_cython/base.pxi ================================================ from ..base import DGLError from libcpp.vector cimport vector from libcpp cimport bool from cpython.version cimport PY_MAJOR_VERSION from cpython cimport pycapsule from libc.stdint cimport int32_t, int64_t, uint64_t, uint8_t, uint16_t import ctypes cdef enum DGLObjectTypeCode: kObjectInt = 0 kObjectUInt = 1 kObjectFloat = 2 kHandle = 3 kNull = 4 kDGLDataType = 5 kDGLContext = 6 kArrayHandle = 7 kObjectHandle = 8 kModuleHandle = 9 kFuncHandle = 10 kStr = 11 kBytes = 12 kNDArrayContainer = 13 kExtBegin = 15 cdef extern from "dgl/runtime/c_runtime_api.h": ctypedef struct DGLDataType: uint8_t code uint8_t bits uint16_t lanes ctypedef struct DGLContext: int32_t device_type int32_t device_id ctypedef struct DGLArray: void* data DGLContext ctx int32_t ndim DGLDataType dtype int64_t* shape int64_t* strides uint64_t byte_offset ctypedef struct DLManagedTensor: DGLArray dl_tensor void* manager_ctx void (*deleter)(DLManagedTensor* self) ctypedef struct DGLValue: int64_t v_int64 double v_float64 void* v_handle const char* v_str DGLDataType v_type DGLContext v_ctx ctypedef int64_t dgl_index_t ctypedef DGLArray* DGLArrayHandle ctypedef void* DGLStreamHandle ctypedef void* DGLRetValueHandle ctypedef void* DGLFunctionHandle ctypedef void* ObjectHandle ctypedef int (*DGLPackedCFunc)( DGLValue* args, int* type_codes, int num_args, DGLRetValueHandle ret, void* resource_handle) ctypedef void (*DGLPackedCFuncFinalizer)(void* resource_handle) cdef extern from "dgl/runtime/c_runtime_api.h": void DGLAPISetLastError(const char* msg) const char *DGLGetLastError() int DGLFuncCall(DGLFunctionHandle func, DGLValue* arg_values, int* type_codes, int num_args, DGLValue* ret_val, int* ret_type_code) nogil int DGLFuncFree(DGLFunctionHandle func) int DGLCFuncSetReturn(DGLRetValueHandle ret, DGLValue* value, int* type_code, int num_ret) int DGLFuncCreateFromCFunc(DGLPackedCFunc func, void* resource_handle, DGLPackedCFuncFinalizer fin, DGLFunctionHandle *out) int DGLCbArgToReturn(DGLValue* value, int code) int DGLArrayAlloc(dgl_index_t* shape, dgl_index_t ndim, DGLDataType dtype, DGLContext ctx, DGLArrayHandle* out) int DGLArrayAllocSharedMem(const char *mem_name, const dgl_index_t *shape, int ndim, int dtype_code, int dtype_bits, int dtype_lanes, bool is_create, DGLArrayHandle* out) int DGLArrayFree(DGLArrayHandle handle) int DGLArrayCopyFromTo(DGLArrayHandle src, DGLArrayHandle to) cdef extern from "dgl/runtime/c_object_api.h": int DGLObjectFree(ObjectHandle handle) int DGLObjectTypeKey2Index(const char* type_key, int* out_index) int DGLObjectGetTypeIndex(ObjectHandle handle, int* out_index) int DGLObjectGetAttr(ObjectHandle handle, const char* key, DGLValue* out_value, int* out_type_code, int* out_success) cdef extern from "dgl/runtime/dlpack_convert.h": int DGLArrayFromDLPack(DLManagedTensor* arr_from, DGLArrayHandle* out) int DGLArrayToDLPack(DGLArrayHandle arr_from, DLManagedTensor** out, int alignment) void DGLDLManagedTensorCallDeleter(DLManagedTensor* dltensor) cdef inline py_str(const char* x): if PY_MAJOR_VERSION < 3: return x else: return x.decode("utf-8") cdef inline c_str(pystr): """Create ctypes char * from a python string Parameters ---------- string : string type python string Returns ------- str : c_char_p A char pointer that can be passed to C API """ return pystr.encode("utf-8") cdef inline CALL(int ret): if ret != 0: raise DGLError(py_str(DGLGetLastError())) cdef inline object ctypes_handle(void* chandle): """Cast C handle to ctypes handle.""" return ctypes.cast(chandle, ctypes.c_void_p) cdef inline void* c_handle(object handle): """Cast C types handle to c handle.""" cdef unsigned long long v_ptr if handle.value is None: return NULL else: v_ptr = handle.value return (v_ptr) ================================================ FILE: python/dgl/_ffi/_cython/core.pyx ================================================ include "./base.pxi" include "./object.pxi" include "./function.pxi" include "./ndarray.pxi" ================================================ FILE: python/dgl/_ffi/_cython/function.pxi ================================================ import ctypes import traceback from cpython cimport Py_INCREF, Py_DECREF from numbers import Number, Integral from ..base import string_types from ..object_generic import convert_to_object, ObjectGeneric from ..runtime_ctypes import DGLDataType as CTypesDGLDataType, \ DGLContext as CTypesDGLContext, \ DGLByteArray cdef void dgl_callback_finalize(void* fhandle): local_pyfunc = (fhandle) Py_DECREF(local_pyfunc) cdef int dgl_callback(DGLValue* args, int* type_codes, int num_args, DGLRetValueHandle ret, void* fhandle) with gil: cdef list pyargs cdef DGLValue value cdef int tcode local_pyfunc = (fhandle) pyargs = [] for i in range(num_args): value = args[i] tcode = type_codes[i] if (tcode == kObjectHandle or tcode == kFuncHandle or tcode == kModuleHandle or tcode > kExtBegin): CALL(DGLCbArgToReturn(&value, tcode)) if tcode != kArrayHandle: pyargs.append(make_ret(value, tcode)) else: pyargs.append(c_make_array(value.v_handle, True)) try: rv = local_pyfunc(*pyargs) except Exception: msg = traceback.format_exc() DGLAPISetLastError(c_str(msg)) return -1 if rv is not None: if isinstance(rv, tuple): raise ValueError("PackedFunction can only support one return value") temp_args = [] make_arg(rv, &value, &tcode, temp_args) CALL(DGLCFuncSetReturn(ret, &value, &tcode, 1)) return 0 def convert_to_dgl_func(object pyfunc): """Convert a python function to DGL function Parameters ---------- pyfunc : python function The python function to be converted. Returns ------- dglfunc: dgl.Function The converted dgl function. """ cdef DGLFunctionHandle chandle Py_INCREF(pyfunc) CALL(DGLFuncCreateFromCFunc(dgl_callback, (pyfunc), dgl_callback_finalize, &chandle)) ret = _CLASS_FUNCTION(None, False) (ret).chandle = chandle return ret cdef inline int make_arg(object arg, DGLValue* value, int* tcode, list temp_args) except -1: """Pack arguments into c args dgl call accept""" cdef unsigned long long ptr if isinstance(arg, ObjectBase): value[0].v_handle = (arg).chandle tcode[0] = kObjectHandle elif isinstance(arg, NDArrayBase): value[0].v_handle = (arg).chandle tcode[0] = (kNDArrayContainer if not (arg).c_is_view else kArrayHandle) elif isinstance(arg, _DGL_COMPATS): ptr = arg._dgl_handle value[0].v_handle = (ptr) tcode[0] = arg.__class__._dgl_tcode elif isinstance(arg, (int, long)): value[0].v_int64 = arg tcode[0] = kObjectInt elif isinstance(arg, float): value[0].v_float64 = arg tcode[0] = kObjectFloat elif isinstance(arg, str): tstr = c_str(arg) value[0].v_str = tstr tcode[0] = kStr temp_args.append(tstr) elif arg is None: value[0].v_handle = NULL tcode[0] = kNull elif isinstance(arg, Number): value[0].v_float64 = arg tcode[0] = kObjectFloat elif isinstance(arg, CTypesDGLDataType): tstr = c_str(str(arg)) value[0].v_str = tstr tcode[0] = kStr temp_args.append(tstr) elif isinstance(arg, CTypesDGLContext): value[0].v_ctx = (( ctypes.addressof(arg)))[0] tcode[0] = kDGLContext elif isinstance(arg, bytearray): arr = DGLByteArray() arr.data = ctypes.cast( (ctypes.c_byte * len(arg)).from_buffer(arg), ctypes.POINTER(ctypes.c_byte)) arr.size = len(arg) value[0].v_handle = ( ctypes.addressof(arr)) tcode[0] = kBytes temp_args.append(arr) elif isinstance(arg, string_types): tstr = c_str(arg) value[0].v_str = tstr tcode[0] = kStr temp_args.append(tstr) elif isinstance(arg, (list, tuple, dict, ObjectGeneric)): arg = convert_to_object(arg) value[0].v_handle = (arg).chandle tcode[0] = kObjectHandle temp_args.append(arg) #elif isinstance(arg, _CLASS_MODULE): # value[0].v_handle = c_handle(arg.handle) # tcode[0] = kModuleHandle elif isinstance(arg, FunctionBase): value[0].v_handle = (arg).chandle tcode[0] = kFuncHandle elif isinstance(arg, ctypes.c_void_p): value[0].v_handle = c_handle(arg) tcode[0] = kHandle elif callable(arg): arg = convert_to_dgl_func(arg) value[0].v_handle = (arg).chandle tcode[0] = kFuncHandle temp_args.append(arg) else: raise TypeError("Don't know how to handle type %s" % type(arg)) return 0 cdef inline bytearray make_ret_bytes(void* chandle): handle = ctypes_handle(chandle) arr = ctypes.cast(handle, ctypes.POINTER(DGLByteArray))[0] size = arr.size res = bytearray(size) rptr = (ctypes.c_byte * size).from_buffer(res) if not ctypes.memmove(rptr, arr.data, size): raise RuntimeError('memmove failed') return res cdef inline object make_ret(DGLValue value, int tcode): """convert result to return value.""" if tcode == kObjectHandle: return make_ret_object(value.v_handle) elif tcode == kNull: return None elif tcode == kObjectInt: return value.v_int64 elif tcode == kObjectFloat: return value.v_float64 elif tcode == kNDArrayContainer: return c_make_array(value.v_handle, False) elif tcode == kStr: return py_str(value.v_str) elif tcode == kBytes: return make_ret_bytes(value.v_handle) elif tcode == kHandle: return ctypes_handle(value.v_handle) elif tcode == kDGLContext: return CTypesDGLContext(value.v_ctx.device_type, value.v_ctx.device_id) # (minjie): class module are not used in DGL. #elif tcode == kModuleHandle: # return _CLASS_MODULE(ctypes_handle(value.v_handle)) elif tcode == kFuncHandle: fobj = _CLASS_FUNCTION(None, False) (fobj).chandle = value.v_handle return fobj elif tcode in _DGL_EXT_RET: return _DGL_EXT_RET[tcode](ctypes_handle(value.v_handle)) raise ValueError("Unhandled type code %d" % tcode) cdef inline int FuncCall3(void* chandle, tuple args, int nargs, DGLValue* ret_val, int* ret_tcode) except -1: cdef DGLValue[3] values cdef int[3] tcodes nargs = len(args) temp_args = [] for i in range(nargs): make_arg(args[i], &values[i], &tcodes[i], temp_args) with nogil: ret = DGLFuncCall(chandle, &values[0], &tcodes[0], nargs, ret_val, ret_tcode) if ret != 0: raise DGLError(py_str(DGLGetLastError())) return 0 cdef inline int FuncCall(void* chandle, tuple args, DGLValue* ret_val, int* ret_tcode) except -1: cdef int nargs nargs = len(args) if nargs <= 3: FuncCall3(chandle, args, nargs, ret_val, ret_tcode) return 0 cdef vector[DGLValue] values cdef vector[int] tcodes values.resize(max(nargs, 1)) tcodes.resize(max(nargs, 1)) temp_args = [] for i in range(nargs): make_arg(args[i], &values[i], &tcodes[i], temp_args) with nogil: ret = DGLFuncCall(chandle, &values[0], &tcodes[0], nargs, ret_val, ret_tcode) if ret != 0: raise DGLError(py_str(DGLGetLastError())) return 0 cdef inline int ConstructorCall(void* constructor_handle, int type_code, tuple args, void** handle) except -1: """Call contructor of a handle function""" cdef DGLValue ret_val cdef int ret_tcode FuncCall(constructor_handle, args, &ret_val, &ret_tcode) assert ret_tcode == type_code handle[0] = ret_val.v_handle return 0 cdef class FunctionBase: cdef DGLFunctionHandle chandle cdef int is_global cdef inline _set_handle(self, handle): if handle is None: self.chandle = NULL else: self.chandle = c_handle(handle) property is_global: def __get__(self): return self.c_is_global != 0 def __set__(self, value): self.c_is_global = value property handle: def __get__(self): if self.chandle == NULL: return None else: return ctypes.cast(self.chandle, ctypes.c_void_p) def __set__(self, value): self._set_handle(value) def __init__(self, handle, is_global): self._set_handle(handle) self.c_is_global = is_global def __dealloc__(self): if self.is_global == 0: CALL(DGLFuncFree(self.chandle)) def __call__(self, *args): cdef DGLValue ret_val cdef int ret_tcode FuncCall(self.chandle, args, &ret_val, &ret_tcode) return make_ret(ret_val, ret_tcode) _CLASS_FUNCTION = None _CLASS_MODULE = None def _set_class_module(module_class): """Initialize the module.""" global _CLASS_MODULE _CLASS_MODULE = module_class def _set_class_function(func_class): global _CLASS_FUNCTION _CLASS_FUNCTION = func_class ================================================ FILE: python/dgl/_ffi/_cython/ndarray.pxi ================================================ from ..runtime_ctypes import DGLArrayHandle as PyDGLArrayHandle from cpython cimport PyCapsule_Destructor cdef const char* _c_str_dltensor = "dltensor" cdef const char* _c_str_used_dltensor = "used_dltensor" cdef _c_dlpack_deleter(object pycaps): cdef DLManagedTensor* dltensor if pycapsule.PyCapsule_IsValid(pycaps, _c_str_dltensor): dltensor = pycapsule.PyCapsule_GetPointer(pycaps, _c_str_dltensor) DGLDLManagedTensorCallDeleter(dltensor) def _from_dlpack(object dltensor): cdef DLManagedTensor* ptr cdef DGLArrayHandle chandle if pycapsule.PyCapsule_IsValid(dltensor, _c_str_dltensor): ptr = pycapsule.PyCapsule_GetPointer(dltensor, _c_str_dltensor) CALL(DGLArrayFromDLPack(ptr, &chandle)) # set name and destructor to be empty pycapsule.PyCapsule_SetDestructor(dltensor, NULL) pycapsule.PyCapsule_SetName(dltensor, _c_str_used_dltensor) return c_make_array(chandle, 0) raise ValueError("Expect a dltensor field, pycapsule.PyCapsule can only be consumed once") cdef class NDArrayBase: cdef DGLArray* chandle cdef int c_is_view cdef inline _set_handle(self, handle): cdef unsigned long long ptr if handle is None: self.chandle = NULL else: ptr = ctypes.cast(handle, ctypes.c_void_p).value self.chandle = (ptr) property _dgl_handle: def __get__(self): return self.chandle property handle: def __get__(self): if self.chandle == NULL: return None else: return ctypes.cast( self.chandle, PyDGLArrayHandle) def __set__(self, value): self._set_handle(value) def __init__(self, handle, is_view): self._set_handle(handle) self.c_is_view = is_view def __dealloc__(self): if self.c_is_view == 0: CALL(DGLArrayFree(self.chandle)) def to_dlpack(self, alignment=0): """Produce an array from a DLPack Tensor without copying memory Args ------- alignment: int, default to be 0 Indicates the alignment requirement when converting to dlpack. Will copy to a new tensor if the alignment requirement is not satisfied. 0 means no alignment requirement. Returns ------- dlpack : DLPack tensor view of the array data """ cdef DLManagedTensor* dltensor if self.c_is_view != 0: raise ValueError("to_dlpack do not work with memory views") CALL(DGLArrayToDLPack(self.chandle, &dltensor, alignment)) return pycapsule.PyCapsule_New(dltensor, _c_str_dltensor, _c_dlpack_deleter) cdef c_make_array(void* chandle, is_view): ret = _CLASS_NDARRAY(None, is_view) (ret).chandle = chandle return ret cdef _DGL_COMPATS = () cdef _DGL_EXT_RET = {} def _reg_extension(cls, fcreate): global _DGL_COMPATS _DGL_COMPATS += (cls,) if fcreate: _DGL_EXT_RET[cls._dgl_tcode] = fcreate def _make_array(handle, is_view): cdef unsigned long long ptr ptr = ctypes.cast(handle, ctypes.c_void_p).value return c_make_array(ptr, is_view) cdef object _CLASS_NDARRAY = None def _set_class_ndarray(cls): global _CLASS_NDARRAY _CLASS_NDARRAY = cls ================================================ FILE: python/dgl/_ffi/_cython/object.pxi ================================================ from ... import _api_internal from ..base import string_types from ..object_generic import _set_class_object_base """Maps object type to its constructor""" OBJECT_TYPE = [] def _register_object(int index, object cls): """register object class""" while len(OBJECT_TYPE) <= index: OBJECT_TYPE.append(None) OBJECT_TYPE[index] = cls cdef inline object make_ret_object(void* chandle): global OBJECT_TYPE cdef int tindex cdef list object_type cdef object cls object_type = OBJECT_TYPE CALL(DGLObjectGetTypeIndex(chandle, &tindex)) if tindex < len(object_type): cls = object_type[tindex] if cls is not None: obj = cls.__new__(cls) else: obj = ObjectBase.__new__(ObjectBase) else: obj = ObjectBase.__new__(ObjectBase) (obj).chandle = chandle return obj cdef class ObjectBase: cdef void* chandle cdef _set_handle(self, handle): cdef unsigned long long ptr if handle is None: self.chandle = NULL else: ptr = handle.value self.chandle = (ptr) property handle: def __get__(self): if self.chandle == NULL: return None else: return ctypes_handle(self.chandle) def __set__(self, value): self._set_handle(value) def __dealloc__(self): CALL(DGLObjectFree(self.chandle)) def __getattr__(self, name): cdef DGLValue ret_val cdef int ret_type_code, ret_succ CALL(DGLObjectGetAttr(self.chandle, c_str(name), &ret_val, &ret_type_code, &ret_succ)) if ret_succ == 0: raise AttributeError( "'%s' object has no attribute '%s'" % (type(self), name)) return make_ret(ret_val, ret_type_code) def __init_handle_by_constructor__(self, fconstructor, *args): """Initialize the handle by calling constructor function. Parameters ---------- fconstructor : Function Constructor function. args: list of objects The arguments to the constructor Note ---- We have a special calling convention to call constructor functions. So the return handle is directly set into the Object object instead of creating a new Object. """ cdef void* chandle ConstructorCall( (fconstructor).chandle, kObjectHandle, args, &chandle) self.chandle = chandle _set_class_object_base(ObjectBase) ================================================ FILE: python/dgl/_ffi/base.py ================================================ # coding: utf-8 # pylint: disable=invalid-name """ctypes library and helper functions """ from __future__ import absolute_import import ctypes import logging import os import sys import numpy as np from . import libinfo # ---------------------------- # library loading # ---------------------------- if sys.version_info[0] == 3: string_types = (str,) numeric_types = (float, int, np.float32, np.int32) # this function is needed for python3 # to convert ctypes.char_p .value back to python str py_str = lambda x: x.decode("utf-8") else: string_types = (basestring,) numeric_types = (float, int, long, np.float32, np.int32) py_str = lambda x: x class DGLError(Exception): """Error thrown by DGL function""" pass # pylint: disable=unnecessary-pass def _load_lib(): """Load libary by searching possible path.""" lib_path = libinfo.find_lib_path() lib = ctypes.CDLL(lib_path[0]) dirname = os.path.dirname(lib_path[0]) basename = os.path.basename(lib_path[0]) # DMatrix functions lib.DGLGetLastError.restype = ctypes.c_char_p return lib, basename, dirname # version number __version__ = libinfo.__version__ # library instance of nnvm _LIB, _LIB_NAME, _DIR_NAME = _load_lib() # The FFI mode of DGL _FFI_MODE = os.environ.get("DGL_FFI", "auto") # ---------------------------- # helper function in ctypes. # ---------------------------- def check_call(ret): """Check the return value of C API call This function will raise exception when error occurs. Wrap every API call with this function Parameters ---------- ret : int return value from API calls """ if ret != 0: raise DGLError(py_str(_LIB.DGLGetLastError())) def c_str(string): """Create ctypes char * from a python string Parameters ---------- string : string type python string Returns ------- str : c_char_p A char pointer that can be passed to C API """ return ctypes.c_char_p(string.encode("utf-8")) def c_array(ctype, values): """Create ctypes array from a python array Parameters ---------- ctype : ctypes data type data type of the array we want to convert to values : tuple or list data content Returns ------- out : ctypes array Created ctypes array """ return (ctype * len(values))(*values) def decorate(func, fwrapped): """A wrapper call of decorator package, differs to call time Parameters ---------- func : function The original function fwrapped : function The wrapped function """ import decorator return decorator.decorate(func, fwrapped) tensor_adapter_loaded = False def load_tensor_adapter(backend, version): """Tell DGL to load a tensoradapter library for given backend and version. Parameters ---------- backend : str The backend (currently ``pytorch``, ``mxnet`` or ``tensorflow``). version : str The version number of the backend. """ global tensor_adapter_loaded version = version.split("+")[0] if sys.platform.startswith("linux"): basename = "libtensoradapter_%s_%s.so" % (backend, version) elif sys.platform.startswith("darwin"): basename = "libtensoradapter_%s_%s.dylib" % (backend, version) elif sys.platform.startswith("win"): basename = "tensoradapter_%s_%s.dll" % (backend, version) else: raise NotImplementedError("Unsupported system: %s" % sys.platform) path = os.path.join(_DIR_NAME, "tensoradapter", backend, basename) tensor_adapter_loaded = _LIB.DGLLoadTensorAdapter(path.encode("utf-8")) == 0 if not tensor_adapter_loaded: logger = logging.getLogger("dgl-core") logger.debug("Memory optimization with PyTorch is not enabled.") def is_tensor_adaptor_enabled() -> bool: """Check whether TensorAdaptor is enabled.""" return tensor_adapter_loaded ================================================ FILE: python/dgl/_ffi/capi.py ================================================ """Init all C APIs in the default namespace.""" from .function import _init_api __all__ = _init_api("dgl.capi", __name__) ================================================ FILE: python/dgl/_ffi/function.py ================================================ # pylint: disable=invalid-name, unused-import """Function namespace.""" from __future__ import absolute_import import ctypes import sys from .base import _FFI_MODE, _LIB, c_str, check_call, py_str, string_types IMPORT_EXCEPT = RuntimeError if _FFI_MODE == "cython" else ImportError try: # pylint: disable=wrong-import-position if _FFI_MODE == "ctypes": raise ImportError() if sys.version_info >= (3, 0): from ._cy3.core import ( _set_class_function, _set_class_module, convert_to_dgl_func, FunctionBase as _FunctionBase, ) else: from ._cy2.core import ( _set_class_function, _set_class_module, convert_to_dgl_func, FunctionBase as _FunctionBase, ) except IMPORT_EXCEPT: # pylint: disable=wrong-import-position from ._ctypes.function import ( _set_class_function, _set_class_module, convert_to_dgl_func, FunctionBase as _FunctionBase, ) FunctionHandle = ctypes.c_void_p class Function(_FunctionBase): """The PackedFunc object. Function plays an key role to bridge front and backend in DGL. Function provide a type-erased interface, you can call function with positional arguments. The compiled module returns Function. DGL backend also registers and exposes its API as Functions. For example, the developer function exposed in dgl.ir_pass are actually C++ functions that are registered as PackedFunc The following are list of common usage scenario of dgl.Function. - Automatic exposure of C++ API into python - To call PackedFunc from python side - To call python callbacks to inspect results in generated code - Bring python hook into C++ backend See Also -------- dgl.register_func: How to register global function. dgl.get_global_func: How to get global function. """ pass # pylint: disable=unnecessary-pass class ModuleBase(object): """Base class for module""" __slots__ = ["handle", "_entry", "entry_name"] def __init__(self, handle): self.handle = handle self._entry = None self.entry_name = "__dgl_main__" def __del__(self): check_call(_LIB.DGLModFree(self.handle)) @property def entry_func(self): """Get the entry function Returns ------- f : Function The entry function if exist """ if self._entry: return self._entry self._entry = self.get_function(self.entry_name) return self._entry def get_function(self, name, query_imports=False): """Get function from the module. Parameters ---------- name : str The name of the function query_imports : bool Whether also query modules imported by this module. Returns ------- f : Function The result function. """ ret_handle = FunctionHandle() check_call( _LIB.DGLModGetFunction( self.handle, c_str(name), ctypes.c_int(query_imports), ctypes.byref(ret_handle), ) ) if not ret_handle.value: raise AttributeError("Module has no function '%s'" % name) return Function(ret_handle, False) def import_module(self, module): """Add module to the import list of current one. Parameters ---------- module : Module The other module. """ check_call(_LIB.DGLModImport(self.handle, module.handle)) def __getitem__(self, name): if not isinstance(name, string_types): raise ValueError("Can only take string as function name") return self.get_function(name) def __call__(self, *args): if self._entry: return self._entry(*args) f = self.entry_func return f(*args) def register_func(func_name, f=None, override=False): """Register global function Parameters ---------- func_name : str or function The function name f : function, optional The function to be registered. override: boolean optional Whether override existing entry. Returns ------- fregister : function Register function if f is not specified. Examples -------- The following code registers my_packed_func as global function. Note that we simply get it back from global function table to invoke it from python side. However, we can also invoke the same function from C++ backend, or in the compiled DGL code. .. code-block:: python targs = (10, 10.0, "hello") @dgl.register_func def my_packed_func(*args): assert(tuple(args) == targs) return 10 # Get it out from global function table f = dgl.get_global_func("my_packed_func") assert isinstance(f, dgl.nd.Function) y = f(*targs) assert y == 10 """ if callable(func_name): f = func_name func_name = f.__name__ if not isinstance(func_name, str): raise ValueError("expect string function name") ioverride = ctypes.c_int(override) def register(myf): """internal register function""" if not isinstance(myf, Function): myf = convert_to_dgl_func(myf) check_call( _LIB.DGLFuncRegisterGlobal(c_str(func_name), myf.handle, ioverride) ) return myf if f: return register(f) return register def get_global_func(name, allow_missing=False): """Get a global function by name Parameters ---------- name : str The name of the global function allow_missing : bool Whether allow missing function or raise an error. Returns ------- func : dgl.Function The function to be returned, None if function is missing. """ handle = FunctionHandle() check_call(_LIB.DGLFuncGetGlobal(c_str(name), ctypes.byref(handle))) if handle.value: return Function(handle, False) else: if allow_missing: return None else: raise ValueError("Cannot find global function %s" % name) def list_global_func_names(): """Get list of global functions registered. Returns ------- names : list List of global functions names. """ plist = ctypes.POINTER(ctypes.c_char_p)() size = ctypes.c_uint() check_call( _LIB.DGLFuncListGlobalNames(ctypes.byref(size), ctypes.byref(plist)) ) fnames = [] for i in range(size.value): fnames.append(py_str(plist[i])) return fnames def extract_ext_funcs(finit): """ Extract the extension PackedFuncs from a C module. Parameters ---------- finit : ctypes function a ctypes that takes signature of DGLExtensionDeclarer Returns ------- fdict : dict of str to Function The extracted functions """ fdict = {} def _list(name, func): fdict[name] = func myf = convert_to_dgl_func(_list) ret = finit(myf.handle) _ = myf if ret != 0: raise RuntimeError("cannot initialize with %s" % finit) return fdict def _get_api(f): flocal = f flocal.is_global = True return flocal def _init_api(namespace, target_module_name=None): """Initialize api for a given module name namespace : str The namespace of the source registry target_module_name : str The target module name if different from namespace """ target_module_name = target_module_name if target_module_name else namespace if namespace.startswith("dgl."): return _init_api_prefix(target_module_name, namespace[4:]) else: return _init_api_prefix(target_module_name, namespace) def _init_api_prefix(module_name, prefix): module = sys.modules[module_name] name_list = [] for name in list_global_func_names(): if name.startswith("_") and not name.startswith("_deprecate"): # internal APIs are ignored continue name_split = name.rsplit(".", 1) if name_split[0] != prefix: continue if len(name_split) == 1: print('Warning: invalid API name "%s".' % name) continue fname = name_split[1] target_module = module f = get_global_func(name) ff = _get_api(f) ff.__name__ = fname ff.__doc__ = "DGL PackedFunc %s. " % fname setattr(target_module, ff.__name__, ff) name_list.append(fname) return name_list def _init_internal_api(): for name in list_global_func_names(): if not name.startswith("_") or name.startswith("_deprecate"): # normal APIs are ignored continue target_module = sys.modules["dgl._api_internal"] fname = name if fname.find(".") != -1: print('Warning: invalid API name "%s".' % fname) continue f = get_global_func(name) ff = _get_api(f) ff.__name__ = fname ff.__doc__ = "DGL PackedFunc %s. " % fname setattr(target_module, ff.__name__, ff) _set_class_function(Function) ================================================ FILE: python/dgl/_ffi/libinfo.py ================================================ """Library information.""" from __future__ import absolute_import import os import pathlib import sys def find_lib_path(name=None, search_path=None, optional=False): """Find dynamic library files. Parameters ---------- name : list of str List of names to be found. Returns ------- lib_path : list(string) List of all found path to the libraries """ # See https://github.com/dmlc/tvm/issues/281 for some background. # NB: This will either be the source directory (if DGL is run # inplace) or the install directory (if DGL is installed). # An installed DGL's curr_path will look something like: # $PREFIX/lib/python3.6/site-packages/dgl/_ffi ffi_dir = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) source_dir = os.path.join(ffi_dir, "..", "..", "..") install_lib_dir = os.path.join(ffi_dir, "..", "..", "..", "..") dll_path = [] if os.environ.get("DGL_LIBRARY_PATH", None): dll_path.append(os.environ["DGL_LIBRARY_PATH"]) if sys.platform.startswith("linux") and os.environ.get( "LD_LIBRARY_PATH", None ): dll_path.extend( [p.strip() for p in os.environ["LD_LIBRARY_PATH"].split(":")] ) elif sys.platform.startswith("darwin") and os.environ.get( "DYLD_LIBRARY_PATH", None ): dll_path.extend( [p.strip() for p in os.environ["DYLD_LIBRARY_PATH"].split(":")] ) # Pip lib directory dll_path.append(os.path.join(ffi_dir, "..")) # Default cmake build directory dll_path.append(os.path.join(source_dir, "build")) dll_path.append(os.path.join(source_dir, "build", "Release")) # Default make build directory dll_path.append(os.path.join(source_dir, "lib")) dll_path.append(install_lib_dir) if search_path is not None: if isinstance(search_path, (list, tuple, set)): dll_path = dll_path + list(search_path) elif isinstance(search_path, str): dll_path.append(search_path) else: raise ValueError( "type(search_path)={} is invalid".format(type(search_path)) ) dll_path = [ str(x.absolute()) if isinstance(x, pathlib.Path) else os.path.abspath(x) for x in dll_path ] if name is None: if sys.platform.startswith("win32"): name = ["libdgl.dll", "dgl.dll"] elif sys.platform.startswith("darwin"): name = "libdgl.dylib" else: name = "libdgl.so" if isinstance(name, str): name = [name] lib_dll_path = [] for n in name: lib_dll_path += [os.path.join(p, n) for p in dll_path] lib_found = [p for p in lib_dll_path if os.path.isfile(p)] if not lib_found: message = ( "Cannot find the files.\n" + "List of candidates:\n" + str("\n".join(lib_dll_path)) ) if not optional: raise RuntimeError(message) return None return lib_found # current version # We use the version of the incoming release for code # that is under development. # The following line is set by dgl/python/update_version.py __version__ = "2.5" ================================================ FILE: python/dgl/_ffi/ndarray.py ================================================ # pylint: disable=invalid-name, unused-import """Runtime NDArray api""" from __future__ import absolute_import import ctypes import sys import numpy as np from .base import _FFI_MODE, _LIB, c_array, c_str, check_call, string_types from .runtime_ctypes import ( dgl_shape_index_t, DGLArray, DGLArrayHandle, DGLContext, DGLDataType, TypeCode, ) IMPORT_EXCEPT = RuntimeError if _FFI_MODE == "cython" else ImportError try: # pylint: disable=wrong-import-position if _FFI_MODE == "ctypes": raise ImportError() if sys.version_info >= (3, 0): from ._cy3.core import ( _from_dlpack, _make_array, _reg_extension, _set_class_ndarray, NDArrayBase as _NDArrayBase, ) else: from ._cy2.core import ( _from_dlpack, _make_array, _reg_extension, _set_class_ndarray, NDArrayBase as _NDArrayBase, ) except IMPORT_EXCEPT: # pylint: disable=wrong-import-position from ._ctypes.ndarray import ( _from_dlpack, _make_array, _reg_extension, _set_class_ndarray, NDArrayBase as _NDArrayBase, ) def context(dev_type, dev_id=0): """Construct a DGL context with given device type and id. Parameters ---------- dev_type: int or str The device type mask or name of the device. dev_id : int, optional The integer device id Returns ------- ctx: DGLContext The corresponding context. Examples -------- Context can be used to create reflection of context by string representation of the device type. .. code-block:: python assert dgl.context("cpu", 1) == dgl.cpu(1) assert dgl.context("gpu", 0) == dgl.gpu(0) assert dgl.context("cuda", 0) == dgl.gpu(0) """ if isinstance(dev_type, string_types): dev_type = dev_type.split()[0] if dev_type not in DGLContext.STR2MASK: raise ValueError("Unknown device type %s" % dev_type) dev_type = DGLContext.STR2MASK[dev_type] return DGLContext(dev_type, dev_id) def numpyasarray(np_data): """Return a DGLArray representation of a numpy array.""" data = np_data assert data.flags["C_CONTIGUOUS"] arr = DGLArray() shape = c_array(dgl_shape_index_t, data.shape) arr.data = data.ctypes.data_as(ctypes.c_void_p) arr.shape = shape arr.strides = None arr.dtype = DGLDataType(np.dtype(data.dtype).name) arr.ndim = data.ndim # CPU device arr.ctx = context(1, 0) return arr, shape def empty(shape, dtype="float32", ctx=context(1, 0)): """Create an empty array given shape and device Parameters ---------- shape : tuple of int The shape of the array dtype : type or str The data type of the array. ctx : DGLContext The context of the array Returns ------- arr : dgl.nd.NDArray The array dgl supported. """ shape = c_array(dgl_shape_index_t, shape) ndim = ctypes.c_int(len(shape)) handle = DGLArrayHandle() dtype = DGLDataType(dtype) check_call( _LIB.DGLArrayAlloc( shape, ndim, ctypes.c_int(dtype.type_code), ctypes.c_int(dtype.bits), ctypes.c_int(dtype.lanes), ctx.device_type, ctx.device_id, ctypes.byref(handle), ) ) return _make_array(handle, False) def empty_shared_mem(name, is_create, shape, dtype="float32"): """Create an empty array with shared memory given shape and dtype Parameters ---------- name : string The name of the shared memory. It's a file name in Unix. is_create : bool Whether to create the shared memory or use the one created by somewhere else. shape : tuple of int The shape of the array dtype : type or str The data type of the array. Returns ------- arr : dgl.nd.NDArray The array dgl supported. """ name = ctypes.c_char_p(name.encode("utf-8")) shape = c_array(dgl_shape_index_t, shape) ndim = ctypes.c_int(len(shape)) handle = DGLArrayHandle() dtype = DGLDataType(dtype) check_call( _LIB.DGLArrayAllocSharedMem( name, shape, ndim, ctypes.c_int(dtype.type_code), ctypes.c_int(dtype.bits), ctypes.c_int(dtype.lanes), is_create, ctypes.byref(handle), ) ) return _make_array(handle, False) def from_dlpack(dltensor): """Produce an array from a DLPack tensor without memory copy. Retrieves the underlying DLPack tensor's pointer to create an array from the data. Removes the original DLPack tensor's destructor as now the array is responsible for destruction. Parameters ---------- dltensor : DLPack tensor Input DLManagedTensor, can only be consumed once. Returns ------- arr: dgl.nd.NDArray The array view of the tensor data. """ return _from_dlpack(dltensor) class NDArrayBase(_NDArrayBase): """A simple Device/CPU Array object in runtime.""" @property def shape(self): """Shape of this array""" return tuple( self.handle.contents.shape[i] for i in range(self.handle.contents.ndim) ) @property def dtype(self): """Type of this array""" return str(self.handle.contents.dtype) @property def ctx(self): """context of this array""" return self.handle.contents.ctx @property def context(self): """context of this array""" return self.ctx def __hash__(self): return ctypes.cast(self.handle, ctypes.c_void_p).value def __eq__(self, other): return self.same_as(other) def __ne__(self, other): return not self.__eq__(other) def same_as(self, other): """Check object identity equality Parameters ---------- other : object The other object to compare to Returns ------- same : bool Whether other is same as self. """ if not isinstance(other, NDArrayBase): return False return self.__hash__() == other.__hash__() def __setitem__(self, in_slice, value): """Set ndarray value""" if ( not isinstance(in_slice, slice) or in_slice.start is not None or in_slice.stop is not None ): raise ValueError("Array only support set from numpy array") if isinstance(value, NDArrayBase): if value.handle is not self.handle: value.copyto(self) elif isinstance(value, (np.ndarray, np.generic)): self.copyfrom(value) else: raise TypeError("type %s not supported" % str(type(value))) def copyfrom(self, source_array): """Perform a synchronized copy from the array. Parameters ---------- source_array : array_like The data source we should like to copy from. Returns ------- arr : NDArray Reference to self. """ if isinstance(source_array, NDArrayBase): source_array.copyto(self) return self if not isinstance(source_array, np.ndarray): try: source_array = np.asarray(source_array, dtype=self.dtype) except: raise TypeError( "array must be an array_like data," + "type %s is not supported" % str(type(source_array)) ) t = DGLDataType(self.dtype) shape, dtype = self.shape, self.dtype if t.lanes > 1: shape = shape + (t.lanes,) t.lanes = 1 dtype = str(t) if source_array.shape != shape: raise ValueError( "array shape do not match the shape of NDArray {0} vs {1}".format( source_array.shape, shape ) ) source_array = np.ascontiguousarray(source_array, dtype=dtype) assert source_array.flags["C_CONTIGUOUS"] data = source_array.ctypes.data_as(ctypes.c_void_p) nbytes = ctypes.c_size_t( source_array.size * source_array.dtype.itemsize ) check_call(_LIB.DGLArrayCopyFromBytes(self.handle, data, nbytes)) return self def __repr__(self): res = "dgl.{0}@{1}".format(self.asnumpy().__repr__(), self.context) return res def __str__(self): return str(self.asnumpy()) def asnumpy(self): """Convert this array to numpy array Returns ------- np_arr : numpy.ndarray The corresponding numpy array. """ t = DGLDataType(self.dtype) shape, dtype = self.shape, self.dtype if t.lanes > 1: shape = shape + (t.lanes,) t.lanes = 1 dtype = str(t) np_arr = np.empty(shape, dtype=dtype) assert np_arr.flags["C_CONTIGUOUS"] data = np_arr.ctypes.data_as(ctypes.c_void_p) nbytes = ctypes.c_size_t(np_arr.size * np_arr.dtype.itemsize) check_call(_LIB.DGLArrayCopyToBytes(self.handle, data, nbytes)) return np_arr def copyto(self, target): """Copy array to target Parameters ---------- target : NDArray The target array to be copied, must have same shape as this array. """ if isinstance(target, DGLContext): target = empty(self.shape, self.dtype, target) if isinstance(target, NDArrayBase): check_call(_LIB.DGLArrayCopyFromTo(self.handle, target.handle)) else: raise ValueError("Unsupported target type %s" % str(type(target))) return target def pin_memory_(self): """Pin host memory and map into GPU address space (in-place)""" check_call(_LIB.DGLArrayPinData(self.handle)) def unpin_memory_(self): """Unpin host memory pinned by pin_memory_()""" check_call(_LIB.DGLArrayUnpinData(self.handle)) def record_stream(self, stream): """Record the stream that is using this tensor. Note ---- This API is more for testing. Users should call ``record_stream`` on torch.Tensor or dgl.graph directly. Parameters ---------- stream : DGLStreamHandle """ check_call(_LIB.DGLArrayRecordStream(self.handle, stream)) def free_extension_handle(handle, type_code): """Free c++ extension type handle Parameters ---------- handle : ctypes.c_void_p The handle to the extension type. type_code : int The tyoe code """ check_call(_LIB.DGLExtTypeFree(handle, ctypes.c_int(type_code))) def register_extension(cls, fcreate=None): """Register a extension class to DGL. After the class is registered, the class will be able to directly pass as Function argument generated by DGL. Parameters ---------- cls : class The class object to be registered as extension. Note ---- The registered class is requires one property: _dgl_handle and a class attribute _dgl_tcode. - ```_dgl_handle``` returns integer represents the address of the handle. - ```_dgl_tcode``` gives integer represents type code of the class. Returns ------- cls : class The class being registered. fcreate : function, optional The creation function to create a class object given handle value. Example ------- The following code registers user defined class MyTensor to be DLTensor compatible. .. code-block:: python @dgl.register_extension class MyTensor(object): _dgl_tcode = dgl.TypeCode.ARRAY_HANDLE def __init__(self): self.handle = _LIB.NewDLTensor() @property def _dgl_handle(self): return self.handle.value """ if fcreate and cls._dgl_tcode < TypeCode.EXT_BEGIN: raise ValueError( "Cannot register create when extension tcode is same as buildin" ) _reg_extension(cls, fcreate) return cls ================================================ FILE: python/dgl/_ffi/object.py ================================================ """Object namespace""" # pylint: disable=unused-import from __future__ import absolute_import import ctypes import sys from .. import _api_internal from .base import _FFI_MODE, _LIB, c_str, check_call, py_str from .object_generic import convert_to_object, ObjectGeneric # pylint: disable=invalid-name IMPORT_EXCEPT = RuntimeError if _FFI_MODE == "cython" else ImportError try: # pylint: disable=wrong-import-position if _FFI_MODE == "ctypes": raise ImportError() if sys.version_info >= (3, 0): from ._cy3.core import _register_object, ObjectBase as _ObjectBase else: from ._cy2.core import _register_object, ObjectBase as _ObjectBase except IMPORT_EXCEPT: # pylint: disable=wrong-import-position from ._ctypes.object import _register_object, ObjectBase as _ObjectBase def _new_object(cls): """Helper function for pickle""" return cls.__new__(cls) class ObjectBase(_ObjectBase): """ObjectBase is the base class of all DGL CAPI object. The core attribute is ``handle``, which is a C raw pointer. It must be initialized via ``__init_handle_by_constructor__``. Note that the same handle **CANNOT** be shared across multiple ObjectBase instances. """ def __dir__(self): plist = ctypes.POINTER(ctypes.c_char_p)() size = ctypes.c_uint() check_call( _LIB.DGLObjectListAttrNames( self.handle, ctypes.byref(size), ctypes.byref(plist) ) ) names = [] for i in range(size.value): names.append(py_str(plist[i])) return names def __hash__(self): return _api_internal._raw_ptr(self) def __eq__(self, other): return self.same_as(other) def __ne__(self, other): return not self.__eq__(other) def __reduce__(self): cls = type(self) return (_new_object, (cls,), self.__getstate__()) def __getstate__(self): # TODO(minjie): TVM assumes that a Node (Object in DGL) can be serialized # to json. However, this is not true in DGL because DGL Object is meant # for runtime API, so it could contain binary data such as NDArray. # If this feature is required, please raise a RFC to DGL issue. raise RuntimeError("__getstate__ is not supported for object type") def __setstate__(self, state): # pylint: disable=assigning-non-slot # TODO(minjie): TVM assumes that a Node (Object in DGL) can be serialized # to json. However, this is not true in DGL because DGL Object is meant # for runtime API, so it could contain binary data such as NDArray. # If this feature is required, please raise a RFC to DGL issue. raise RuntimeError("__setstate__ is not supported for object type") def same_as(self, other): """check object identity equality""" if not isinstance(other, ObjectBase): return False return self.__hash__() == other.__hash__() def register_object(type_key=None): """Decorator used to register object type Examples -------- >>> @register_object >>> class MyObject: >>> ... pass Parameters ---------- type_key : str or cls The type key of the object """ object_name = type_key if isinstance(type_key, str) else type_key.__name__ def register(cls): """internal register function""" tindex = ctypes.c_int() ret = _LIB.DGLObjectTypeKey2Index( c_str(object_name), ctypes.byref(tindex) ) if ret == 0: _register_object(tindex.value, cls) return cls if isinstance(type_key, str): return register return register(type_key) ================================================ FILE: python/dgl/_ffi/object_generic.py ================================================ """Common implementation of Object generic related logic""" # pylint: disable=unused-import from __future__ import absolute_import from numbers import Integral, Number from .. import _api_internal from .base import string_types # Object base class _CLASS_OBJECT_BASE = None def _set_class_object_base(cls): global _CLASS_OBJECT_BASE _CLASS_OBJECT_BASE = cls class ObjectGeneric(object): """Base class for all classes that can be converted to object.""" def asobject(self): """Convert value to object""" raise NotImplementedError() def convert_to_object(value): """Convert a python value to corresponding object type. Parameters ---------- value : str The value to be inspected. Returns ------- object : Object The corresponding object value. """ if isinstance(value, _CLASS_OBJECT_BASE): return value if isinstance(value, (list, tuple)): value = [convert_to_object(x) for x in value] return _api_internal._List(*value) if isinstance(value, dict): vlist = [] for item in value.items(): if not isinstance(item[0], _CLASS_OBJECT_BASE) and not isinstance( item[0], string_types ): raise ValueError( "key of map must already been a container type" ) vlist.append(item[0]) vlist.append(convert_to_object(item[1])) return _api_internal._Map(*vlist) if isinstance(value, ObjectGeneric): return value.asobject() return _api_internal._Value(value) ================================================ FILE: python/dgl/_ffi/runtime_ctypes.py ================================================ """Common runtime ctypes.""" # pylint: disable=invalid-name, super-init-not-called from __future__ import absolute_import import ctypes import json import numpy as np from .. import _api_internal from .base import _LIB, check_call dgl_shape_index_t = ctypes.c_int64 class TypeCode(object): """Type code used in API calls""" INT = 0 UINT = 1 FLOAT = 2 HANDLE = 3 NULL = 4 DGL_DATA_TYPE = 5 DGL_CONTEXT = 6 ARRAY_HANDLE = 7 OBJECT_HANDLE = 8 MODULE_HANDLE = 9 FUNC_HANDLE = 10 STR = 11 BYTES = 12 NDARRAY_CONTAINER = 13 EXT_BEGIN = 15 class DGLByteArray(ctypes.Structure): """Temp data structure for byte array.""" _fields_ = [ ("data", ctypes.POINTER(ctypes.c_byte)), ("size", ctypes.c_size_t), ] class DGLDataType(ctypes.Structure): """DGL datatype structure""" _fields_ = [ ("type_code", ctypes.c_uint8), ("bits", ctypes.c_uint8), ("lanes", ctypes.c_uint16), ] CODE2STR = {0: "int", 1: "uint", 2: "float", 4: "handle"} _cache = {} def __new__(cls, type_str): if type_str in cls._cache: return cls._cache[type_str] inst = super(DGLDataType, cls).__new__(DGLDataType) if isinstance(type_str, np.dtype): type_str = str(type_str) arr = type_str.split("x") head = arr[0] inst.lanes = int(arr[1]) if len(arr) > 1 else 1 bits = 32 if head.startswith("int"): inst.type_code = 0 head = head[3:] elif head.startswith("uint"): inst.type_code = 1 head = head[4:] elif head.startswith("float"): inst.type_code = 2 head = head[5:] elif head.startswith("handle"): inst.type_code = 4 bits = 64 head = "" else: raise ValueError("Do not know how to handle type %s" % type_str) bits = int(head) if head else bits inst.bits = bits cls._cache[type_str] = inst return inst def __init__(self, type_str): pass def __repr__(self): x = "%s%d" % (DGLDataType.CODE2STR[self.type_code], self.bits) if self.lanes != 1: x += "x%d" % self.lanes return x def __eq__(self, other): return ( self.bits == other.bits and self.type_code == other.type_code and self.lanes == other.lanes ) def __ne__(self, other): return not self.__eq__(other) RPC_SESS_MASK = 128 class DGLContext(ctypes.Structure): """DGL context strucure.""" _fields_ = [("device_type", ctypes.c_int), ("device_id", ctypes.c_int)] MASK2STR = { 1: "cpu", 2: "gpu", 4: "opencl", 5: "aocl", 6: "sdaccel", 7: "vulkan", 8: "metal", 9: "vpi", 10: "rocm", 11: "opengl", 12: "ext_dev", } STR2MASK = { "llvm": 1, "stackvm": 1, "cpu": 1, "gpu": 2, "cuda": 2, "nvptx": 2, "cl": 4, "opencl": 4, "aocl": 5, "aocl_sw_emu": 5, "sdaccel": 6, "vulkan": 7, "metal": 8, "vpi": 9, "rocm": 10, "opengl": 11, "ext_dev": 12, } _cache = {} def __new__(cls, device_type, device_id): if (device_type, device_id) in cls._cache: return cls._cache[(device_type, device_id)] inst = super(DGLContext, cls).__new__(DGLContext) inst.device_type = device_type inst.device_id = device_id cls._cache[(device_type, device_id)] = inst return inst def __init__(self, device_type, device_id): pass @property def exist(self): """Whether this device exist.""" return ( _api_internal._GetDeviceAttr(self.device_type, self.device_id, 0) != 0 ) @property def max_threads_per_block(self): """Maximum number of threads on each block.""" return _api_internal._GetDeviceAttr(self.device_type, self.device_id, 1) @property def warp_size(self): """Number of threads that executes in concurrent.""" return _api_internal._GetDeviceAttr(self.device_type, self.device_id, 2) @property def max_shared_memory_per_block(self): """Total amount of shared memory per block in bytes.""" return _api_internal._GetDeviceAttr(self.device_type, self.device_id, 3) @property def compute_version(self): """Get compute verison number in string. Currently used to get compute capability of CUDA device. Returns ------- version : str The version string in `major.minor` format. """ return _api_internal._GetDeviceAttr(self.device_type, self.device_id, 4) @property def device_name(self): """Return the string name of device.""" return _api_internal._GetDeviceAttr(self.device_type, self.device_id, 5) @property def max_clock_rate(self): """Return the max clock frequency of device.""" return _api_internal._GetDeviceAttr(self.device_type, self.device_id, 6) @property def multi_processor_count(self): """Return the number of compute units of device.""" return _api_internal._GetDeviceAttr(self.device_type, self.device_id, 7) @property def max_thread_dimensions(self): """Return the maximum size of each thread axis Returns ------- dims: List of int The maximum length of threadIdx.x, threadIdx.y, threadIdx.z """ return json.loads( _api_internal._GetDeviceAttr(self.device_type, self.device_id, 8) ) def sync(self): """Synchronize until jobs finished at the context.""" check_call(_LIB.DGLSynchronize(self.device_type, self.device_id, None)) def __eq__(self, other): return ( isinstance(other, DGLContext) and self.device_id == other.device_id and self.device_type == other.device_type ) def __ne__(self, other): return not self.__eq__(other) def __repr__(self): if self.device_type >= RPC_SESS_MASK: tbl_id = self.device_type / RPC_SESS_MASK - 1 dev_type = self.device_type % RPC_SESS_MASK return "remote[%d]:%s(%d)" % ( tbl_id, DGLContext.MASK2STR[dev_type], self.device_id, ) return "%s(%d)" % ( DGLContext.MASK2STR[self.device_type], self.device_id, ) def __hash__(self): return hash((self.device_type, self.device_id)) class DGLArray(ctypes.Structure): """DGLValue in C API""" _fields_ = [ ("data", ctypes.c_void_p), ("ctx", DGLContext), ("ndim", ctypes.c_int), ("dtype", DGLDataType), ("shape", ctypes.POINTER(dgl_shape_index_t)), ("strides", ctypes.POINTER(dgl_shape_index_t)), ("byte_offset", ctypes.c_uint64), ] DGLArrayHandle = ctypes.POINTER(DGLArray) DGLStreamHandle = ctypes.c_void_p ================================================ FILE: python/dgl/_ffi/streams.py ================================================ # pylint: disable=invalid-name, unused-import """Runtime stream APIs which are mainly for internal test use only. For applications, please use PyTorch's stream management, of which DGL is aware. """ from __future__ import absolute_import import ctypes from .base import _FFI_MODE, _LIB, check_call from .runtime_ctypes import DGLStreamHandle def to_dgl_stream_handle(cuda_stream): """Convert torch.cuda.Stream to DGL stream handle Parameters ---------- cuda_stream : torch.cuda.Stream. Returns ------- DGLStreamHandle DGLStreamHandle of the input ``cuda_stream``. """ return ctypes.c_void_p(cuda_stream.cuda_stream) def _dgl_get_stream(ctx): """Get the current CUDA stream of the given DGL context. Parameters ---------- ctx : DGL context. Returns ------- DGLStreamHandle DGLStreamHandle of the current CUDA stream. """ current_cuda_stream = DGLStreamHandle() check_call( _LIB.DGLGetStream( ctx.device_type, ctx.device_id, ctypes.byref(current_cuda_stream) ) ) return current_cuda_stream ================================================ FILE: python/dgl/_sparse_ops.py ================================================ """Module for sparse matrix operators.""" # pylint: disable= invalid-name from __future__ import absolute_import from . import backend as F, ndarray as nd from ._ffi.function import _init_api from .base import DGLError def infer_broadcast_shape(op, shp1, shp2): r"""Check the shape validity, and infer the output shape given input shape and operator. Note the both :attr:`shp1`, :attr:`shp2` and the returned shape are feature shapes (i.e. we remove the first dimension, which correspond to graph statistics such as number of nodes, number of edges, etc.). We allow applying op on operands with different shapes, according to the broadcasting semantics of Numpy/Scipy: https://numpy.org/doc/stable/user/basics.broadcasting.html Parameters ---------- op : str The binary op's name, could be `add`, `sub`, `mul`, `div`, `dot`, `copy_lhs`, `copy_rhs`. shp1 : tuple[int] The shape of lhs operand. shp2 : tuple[int] The shape of rhs operand. Returns ------- tuple[int] shape after broadcasting """ pad_shp1, pad_shp2 = shp1, shp2 if op == "dot": if shp1[-1] != shp2[-1]: raise DGLError( "Dot operator is only available for arrays with the " "same size on last dimension, but got {} and {}.".format( shp1, shp2 ) ) if op == "copy_lhs": return shp1 if op == "copy_rhs": return shp2 # operands are padded to have the same dimensionality with leading 1's. if len(shp1) > len(shp2): pad_shp2 = (1,) * (len(shp1) - len(shp2)) + shp2 elif len(shp1) < len(shp2): pad_shp1 = (1,) * (len(shp2) - len(shp1)) + shp1 for d1, d2 in zip(pad_shp1, pad_shp2): if d1 != d2 and d1 != 1 and d2 != 1: raise DGLError( "Feature shapes {} and {} are not valid for broadcasting.".format( shp1, shp2 ) ) rst = tuple(max(d1, d2) for d1, d2 in zip(pad_shp1, pad_shp2)) return rst[:-1] + (1,) if op == "dot" else rst def to_dgl_nd(x): """Convert framework-specific tensor/None to dgl ndarray.""" return nd.NULL["int64"] if x is None else F.zerocopy_to_dgl_ndarray(x) def to_dgl_nd_for_write(x): """Convert framework-specific tensor/None to dgl ndarray for write.""" return ( nd.NULL["int64"] if x is None else F.zerocopy_to_dgl_ndarray_for_write(x) ) def get_typeid_by_target(gidx, etid, target): """Find the src/dst/etype id based on the target 'u', 'v' or 'e'.""" src_id, dst_id = gidx.metagraph.find_edge(etid) if target in [0, "u"]: return src_id if target in [2, "v"]: return dst_id return etid target_mapping = {"u": 0, "e": 1, "v": 2, "src": 0, "edge": 1, "dst": 2} def _edge_softmax_backward(gidx, out, sds): r"""Edge_softmax backward interface. Parameters ---------- gidx : HeteroGraphIndex The input graph index. out : tensor The result of Edge_softmax during forward. sds : tensor The result of out * gradient. Returns ------- The result of Edge_softmax during backward Notes ----- This function does not support gpu op. """ op = "copy_rhs" back_out = F.zeros_like(out) _CAPI_DGLKernelEdge_softmax_backward( gidx, op, to_dgl_nd(out), to_dgl_nd(sds), to_dgl_nd_for_write(back_out), to_dgl_nd(None), ) return back_out def _edge_softmax_forward(gidx, e, op): r"""Edge_softmax forward interface. Parameters ---------- gidx : HeteroGraphIndex The input graph index. op : str The binary op's name, default as ``copy_rhs``. e : tensor or None The feature on edges. Returns ------- The result of Edge_softmax during forward Notes ----- This function does not support gpu op. """ if F.ndim(e) == 1: e = F.unsqueeze(e, -1) expand = True else: expand = False myout = F.zeros_like(e) _CAPI_DGLKernelEdge_softmax_forward( gidx, op, to_dgl_nd(None), to_dgl_nd(e), to_dgl_nd_for_write(myout) ) myout = F.squeeze(myout, -1) if expand else myout return myout def _gspmm(gidx, op, reduce_op, u, e): r"""Generalized Sparse Matrix Multiplication interface. It takes the result of :attr:`op` on source node feature and edge feature, leads to a message on edge. Then aggregates the message by :attr:`reduce_op` on destination nodes. .. math:: x_v = \psi_{(u, v, e)\in \mathcal{G}}(\rho(x_u, x_e)) where :math:`x_v` is the returned feature on destination nodes, and :math`x_u`, :math:`x_e` refers to :attr:`u`, :attr:`e` respectively. :math:`\rho` means binary operator :attr:`op` and :math:`\psi` means reduce operator :attr:`reduce_op`, :math:`\mathcal{G}` is the graph we apply gspmm on: :attr:`g`. Note that this function does not handle gradients. Parameters ---------- gidx : HeteroGraphIndex The input graph index. op : str The binary op's name, could be ``add``, ``sub``, ``mul``, ``div``, ``copy_lhs``, ``copy_rhs``. reduce_op : str Reduce operator, could be ``sum``, ``max``, ``min``. u : tensor or None The feature on source nodes, could be None if op is ``copy_rhs``. e : tensor or None The feature on edges, could be None if op is ``copy_lhs``. Returns ------- tuple The returned tuple is composed of two elements: - The first element refers to the result tensor. - The second element refers to a tuple composed of arg_u and arg_e (which is useful when reducer is `min`/`max`). Notes ----- This function does not handle gradients. """ if gidx.number_of_etypes() != 1: raise DGLError("We only support gspmm on graph with one edge type") use_u = op != "copy_rhs" use_e = op != "copy_lhs" if use_u and use_e: if F.dtype(u) != F.dtype(e): raise DGLError( "The node features' data type {} doesn't match edge" " features' data type {}, please convert them to the" " same type.".format(F.dtype(u), F.dtype(e)) ) # deal with scalar features. expand_u, expand_e = False, False if use_u: if F.ndim(u) == 1: u = F.unsqueeze(u, -1) expand_u = True if use_e: if F.ndim(e) == 1: e = F.unsqueeze(e, -1) expand_e = True ctx = F.context(u) if use_u else F.context(e) dtype = F.dtype(u) if use_u else F.dtype(e) u_shp = F.shape(u) if use_u else (0,) e_shp = F.shape(e) if use_e else (0,) _, dsttype = gidx.metagraph.find_edge(0) v_shp = (gidx.num_nodes(dsttype),) + infer_broadcast_shape( op, u_shp[1:], e_shp[1:] ) v = F.zeros(v_shp, dtype, ctx) use_cmp = reduce_op in ["max", "min"] arg_u, arg_e = None, None idtype = getattr(F, gidx.dtype) if use_cmp: if use_u: arg_u = F.zeros(v_shp, idtype, ctx) if use_e: arg_e = F.zeros(v_shp, idtype, ctx) arg_u_nd = to_dgl_nd_for_write(arg_u) arg_e_nd = to_dgl_nd_for_write(arg_e) if gidx.num_edges(0) > 0: _CAPI_DGLKernelSpMM( gidx, op, reduce_op, to_dgl_nd(u if use_u else None), to_dgl_nd(e if use_e else None), to_dgl_nd_for_write(v), arg_u_nd, arg_e_nd, ) # NOTE(zihao): actually we can avoid the following step, because arg_*_nd # refers to the data that stores arg_*. After we call _CAPI_DGLKernelSpMM, # arg_* should have already been changed. But we found this doesn't work # under Tensorflow when index type is int32. (arg_u and arg_e would be # all zero). # The workaround is proposed by Jinjing, and we still need to investigate # where the problem is. arg_u = None if arg_u is None else F.zerocopy_from_dgl_ndarray(arg_u_nd) arg_e = None if arg_e is None else F.zerocopy_from_dgl_ndarray(arg_e_nd) # To deal with scalar node/edge features. if (expand_u or not use_u) and (expand_e or not use_e): v = F.squeeze(v, -1) if expand_u and use_cmp: arg_u = F.squeeze(arg_u, -1) if expand_e and use_cmp: arg_e = F.squeeze(arg_e, -1) return v, (arg_u, arg_e) def _gspmm_hetero(gidx, op, reduce_op, u_len, u_and_e_tuple): r"""Generalized Sparse Matrix Multiplication interface on heterogeneous graphs. It handles multiple node and edge types of the graph. For each edge type, it takes the result of :attr:`op` on source node feature and edge feature, and leads to a message on edge. Then it aggregates the message by :attr:`reduce_op` on the destination nodes of the etype. .. math:: x_v = \psi_{(u, v, e)\in \mathcal{G}}(\rho(x_u, x_e)) where :math:`x_v` is the returned feature on destination nodes, and :math`x_u`, :math:`x_e` refers to :attr:`u`, :attr:`e` respectively. :math:`\rho` means binary operator :attr:`op` and :math:`\psi` means reduce operator :attr:`reduce_op`, :math:`\mathcal{G}` is the graph we apply gspmm on: :attr:`g`. Note that this function does not handle gradients. Parameters ---------- gidx : HeteroGraphIndex The input graph index. op : str The binary op's name, could be ``add``, ``sub``, ``mul``, ``div``, ``copy_lhs``, ``copy_rhs``. reduce_op : str Reduce operator, could be ``sum``, ``max``, ``min``. u_len : int The number of tensors in ``u`` (source node features) u_and_e_tuple : Tuple of tensors Tuple of source nodes' features and edges' features. ``u_and_e_tuple[:u_len]`` stores the source nodes's features of all source node types. ``u_and_e_tuple[u_len:]`` stores the edges's features of all the edge types. The source nodes' features of the soruce node types could be None if op is ``copy_rhs``. The edges' features of the edge types could be None if op is ``copy_lhs``. Returns ------- tuple The returned tuple is composed of two elements: - The first element refers to the tuple of result tensors. - The second element refers to a tuple composed of arg_u and arg_e (which is useful when reducer is `min`/`max`). Notes ----- This function does not handle gradients. """ u_tuple, e_tuple = u_and_e_tuple[:u_len], u_and_e_tuple[u_len:] use_u = op != "copy_rhs" use_e = op != "copy_lhs" # TODO (Israt): Add check - F.dtype(u) != F.dtype(e): # deal with scalar features. expand_u, expand_e = False, False num_ntypes = gidx.number_of_ntypes() num_etypes = gidx.number_of_etypes() list_u = [None] * num_ntypes list_v = [None] * num_ntypes list_e = [None] * num_etypes list_arg_u_nd = [None] * num_ntypes list_arg_u = [None] * num_ntypes list_arg_u_ntype_nd = [None] * num_ntypes list_arg_u_ntype = [None] * num_ntypes # TODO(Israt): double check ntype or etype list_arg_e_nd = [None] * num_ntypes list_arg_e = [None] * num_ntypes list_arg_e_etype_nd = [None] * num_ntypes list_arg_e_etype = [None] * num_ntypes use_cmp = reduce_op in ["max", "min"] idtype = getattr(F, gidx.dtype) for etid in range(num_etypes): src_id, dst_id = gidx.metagraph.find_edge(etid) u = u_tuple[src_id] if use_u else None e = e_tuple[etid] if use_e else None if use_u: if u is not None and F.ndim(u) == 1: u = F.unsqueeze(u, -1) expand_u = True list_u[src_id] = u if use_u else None if use_e: if e is not None and F.ndim(e) == 1: e = F.unsqueeze(e, -1) expand_e = True list_e[etid] = e if use_e else None ctx = ( F.context(u) if use_u else F.context(e) ) # TODO(Israt): Put outside of loop dtype = ( F.dtype(u) if use_u else F.dtype(e) ) # TODO(Israt): Put outside of loop u_shp = F.shape(u) if use_u else (0,) e_shp = F.shape(e) if use_e else (0,) v_shp = (gidx.num_nodes(dst_id),) + infer_broadcast_shape( op, u_shp[1:], e_shp[1:] ) list_v[dst_id] = F.zeros(v_shp, dtype, ctx) if use_cmp: if use_u: list_arg_u[dst_id] = F.zeros(v_shp, idtype, ctx) list_arg_u_ntype[dst_id] = F.zeros(v_shp, idtype, ctx) if use_e: list_arg_e[dst_id] = F.zeros(v_shp, idtype, ctx) list_arg_e_etype[dst_id] = F.zeros(v_shp, idtype, ctx) list_arg_u_nd[dst_id] = to_dgl_nd_for_write(list_arg_u[dst_id]) list_arg_u_ntype_nd[dst_id] = to_dgl_nd_for_write( list_arg_u_ntype[dst_id] ) list_arg_e_nd[dst_id] = to_dgl_nd_for_write(list_arg_e[dst_id]) list_arg_e_etype_nd[dst_id] = to_dgl_nd_for_write( list_arg_e_etype[dst_id] ) if gidx.num_edges(0) > 0: _CAPI_DGLKernelSpMMHetero( gidx, op, reduce_op, [to_dgl_nd(u_i) for u_i in list_u], [to_dgl_nd(e_i) for e_i in list_e], [to_dgl_nd_for_write(v_i) for v_i in list_v], list_arg_u_nd, list_arg_e_nd, list_arg_u_ntype_nd, list_arg_e_etype_nd, ) for l, arg_u_nd in enumerate(list_arg_u_nd): # TODO(Israt): l or src_id as index of lhs list_arg_u[l] = ( None if list_arg_u[l] is None else F.zerocopy_from_dgl_ndarray(arg_u_nd) ) if list_arg_u[l] is not None and expand_u and use_cmp: list_arg_u[l] = F.squeeze(list_arg_u[l], -1) for l, arg_e_nd in enumerate(list_arg_e_nd): list_arg_e[l] = ( None if list_arg_e[l] is None else F.zerocopy_from_dgl_ndarray(arg_e_nd) ) if list_arg_e[l] is not None and expand_e and use_cmp: list_arg_e[l] = F.squeeze(list_arg_e[l], -1) for l, arg_u_ntype_nd in enumerate(list_arg_u_ntype_nd): list_arg_u_ntype[l] = ( None if arg_u_ntype_nd is None else F.zerocopy_from_dgl_ndarray(arg_u_ntype_nd) ) for l, arg_e_etype_nd in enumerate(list_arg_e_etype_nd): list_arg_e_etype[l] = ( None if arg_e_etype_nd is None else F.zerocopy_from_dgl_ndarray(arg_e_etype_nd) ) # To deal with scalar node/edge features. for l in range(num_ntypes): # replace None by empty tensor. Forward func doesn't accept None in tuple. v = list_v[l] v = F.tensor([]) if v is None else v if (expand_u or not use_u) and (expand_e or not use_e): v = F.squeeze(v, -1) # To deal with scalar node/edge features. list_v[l] = v out = tuple(list_v) return out, (list_arg_u, list_arg_e, list_arg_u_ntype, list_arg_e_etype) def _segment_mm(A, B, out, seglen_A, b_trans=False): """Invoke the C API of segment_mm.""" _CAPI_DGLKernelSEGMENTMM( to_dgl_nd(A), to_dgl_nd(B), to_dgl_nd_for_write(out), to_dgl_nd(seglen_A), False, b_trans, ) return out def _segment_mm_backward_B(A, dC, dB, seglen): """Invoke the C API of the backward of segment_mm on B.""" _CAPI_DGLKernelSEGMENTMMBackwardB( to_dgl_nd(A), to_dgl_nd(dC), to_dgl_nd_for_write(dB), to_dgl_nd(seglen) ) return dB def _gather_mm(A, B, out, idx_a=None, idx_b=None): r"""Invoke the C API of the gather_mm operator.""" _CAPI_DGLKernelGATHERMM( to_dgl_nd(A), to_dgl_nd(B), to_dgl_nd_for_write(out), to_dgl_nd(idx_a), to_dgl_nd(idx_b), ) return out def _gather_mm_scatter(A, B, out, idx_a=None, idx_b=None, idx_c=None): r"""Invoke the C API of the gather_mm_scatter operator.""" _CAPI_DGLKernelGATHERMMSCATTER( to_dgl_nd(A), to_dgl_nd(B), to_dgl_nd_for_write(out), to_dgl_nd(idx_a), to_dgl_nd(idx_b), to_dgl_nd(idx_c), ) return out def _gsddmm(gidx, op, lhs, rhs, lhs_target="u", rhs_target="v"): r"""Generalized Sampled-Dense-Dense Matrix Multiplication interface. It takes the result of :attr:`op` on source node feature and destination node feature, leads to a feature on edge. .. math:: x_{e} = \phi(x_u, x_e, x_v), \forall (u,e,v)\in \mathcal{G} where :math:`x_{e}` is the returned feature on edges and :math:`x_u`, :math:`x_v` refers to :attr:`u`, :attr:`v` respectively. :math:`\phi` is the binary operator :attr:`op`, and :math:`\mathcal{G}` is the graph we apply gsddmm on: :attr:`g`. Parameters ---------- gidx : HeteroGraphIndex The input graph index. op : str Binary operator, could be ``add``, ``sub``, ``mul``, ``div``, ``dot``, ``copy_lhs``, ``copy_rhs``. lhs : tensor or None Left hand operand. rhs : tensor or None Right hand operand. lhs_target : str The target of left hand operand, could be ``src``, ``edge``, ``dst`` or their alias ``u``, ``e``, ``v``. rhs_target : str The target of right hand operand, could be ``src``, ``edge``, ``dst`` or their alias ``u``, ``e``, ``v``. Returns ------- tensor The result tensor. Notes ----- This function does not handle gradients. """ if gidx.number_of_etypes() != 1: raise DGLError("We only support gsddmm on graph with one edge type") use_lhs = op != "copy_rhs" use_rhs = op != "copy_lhs" if use_lhs and use_rhs: if F.dtype(lhs) != F.dtype(rhs): raise DGLError( "The operands data type don't match: {} and {}, please convert them" " to the same type.".format(F.dtype(lhs), F.dtype(rhs)) ) # deal with scalar features. expand_lhs, expand_rhs = False, False if use_lhs: if F.ndim(lhs) == 1: lhs = F.unsqueeze(lhs, -1) expand_lhs = True if use_rhs: if F.ndim(rhs) == 1: rhs = F.unsqueeze(rhs, -1) expand_rhs = True lhs_target = target_mapping[lhs_target] rhs_target = target_mapping[rhs_target] ctx = F.context(lhs) if use_lhs else F.context(rhs) dtype = F.dtype(lhs) if use_lhs else F.dtype(rhs) lhs_shp = F.shape(lhs) if use_lhs else (0,) rhs_shp = F.shape(rhs) if use_rhs else (0,) out_shp = (gidx.num_edges(0),) + infer_broadcast_shape( op, lhs_shp[1:], rhs_shp[1:] ) out = F.empty(out_shp, dtype, ctx) if gidx.num_edges(0) > 0: _CAPI_DGLKernelSDDMM( gidx, op, to_dgl_nd(lhs if use_lhs else None), to_dgl_nd(rhs if use_rhs else None), to_dgl_nd_for_write(out), lhs_target, rhs_target, ) if (expand_lhs or not use_lhs) and (expand_rhs or not use_rhs): out = F.squeeze(out, -1) return out def _gsddmm_hetero( gidx, op, lhs_len, lhs_target="u", rhs_target="v", lhs_and_rhs_tuple=None ): r"""Generalized Sampled-Dense-Dense Matrix Multiplication interface.""" lhs_tuple, rhs_tuple = ( lhs_and_rhs_tuple[:lhs_len], lhs_and_rhs_tuple[lhs_len:], ) use_lhs = op != "copy_rhs" use_rhs = op != "copy_lhs" # TODO (Israt): Add check - F.dtype(u) != F.dtype(e): # deal with scalar features. expand_lhs, expand_rhs = False, False num_ntype = gidx.number_of_ntypes() num_etype = gidx.number_of_etypes() lhs_list = ( [None] * num_ntype if lhs_target in ["u", "v"] else [None] * num_etype ) rhs_list = ( [None] * num_ntype if rhs_target in ["u", "v"] else [None] * num_etype ) out_list = [None] * gidx.number_of_etypes() lhs_target = target_mapping[lhs_target] rhs_target = target_mapping[rhs_target] for etid in range(gidx.number_of_etypes()): lhs_id = get_typeid_by_target(gidx, etid, lhs_target) rhs_id = get_typeid_by_target(gidx, etid, rhs_target) lhs = lhs_tuple[lhs_id] rhs = rhs_tuple[rhs_id] if use_lhs: if lhs is not None and F.ndim(lhs) == 1: lhs = F.unsqueeze(lhs, -1) expand_lhs = True if use_rhs: if rhs is not None and F.ndim(rhs) == 1: rhs = F.unsqueeze(rhs, -1) expand_rhs = True ctx = F.context(lhs) if use_lhs else F.context(rhs) dtype = F.dtype(lhs) if use_lhs else F.dtype(rhs) lhs_shp = F.shape(lhs) if use_lhs else (0,) rhs_shp = F.shape(rhs) if use_rhs else (0,) lhs_list[lhs_id] = lhs if use_lhs else None rhs_list[rhs_id] = rhs if use_rhs else None out_shp = (gidx.num_edges(etid),) + infer_broadcast_shape( op, lhs_shp[1:], rhs_shp[1:] ) out_list[etid] = F.empty(out_shp, dtype, ctx) if gidx.num_edges(0) > 0: _CAPI_DGLKernelSDDMMHetero( gidx, op, [to_dgl_nd(lhs) for lhs in lhs_list], [to_dgl_nd(rhs) for rhs in rhs_list], [to_dgl_nd_for_write(out) for out in out_list], lhs_target, rhs_target, ) for l in range(gidx.number_of_etypes()): # Replace None by empty tensor. Forward func doesn't accept None in tuple. e = out_list[l] e = F.tensor([]) if e is None else e if (expand_lhs or not use_lhs) and (expand_rhs or not use_rhs): e = F.squeeze(e, -1) out_list[l] = e out = tuple(out_list) return out def _segment_reduce(op, feat, offsets): r"""Segment reduction operator. It aggregates the value tensor along the first dimension by segments. The argument ``offsets`` specifies the start offset of each segment (and the upper bound of the last segment). Zero-length segments are allowed. .. math:: y_i = \Phi_{j=\mathrm{offsets}_i}^{\mathrm{offsets}_{i+1}-1} x_j where :math:`\Phi` is the reduce operator. Parameters ---------- op : str Aggregation method. Can be ``sum``, ``max``, ``min``. x : Tensor Value to aggregate. offsets : Tensor The start offsets of segments. Returns ------- tuple(Tensor) The first tensor correspond to aggregated tensor of shape ``(len(seglen), value.shape[1:])``, and the second tensor records the argmin/max at each position for computing gradients. Notes ----- This function does not handle gradients. """ n = F.shape(offsets)[0] - 1 out_shp = (n,) + F.shape(feat)[1:] ctx = F.context(feat) dtype = F.dtype(feat) idtype = F.dtype(offsets) out = F.zeros(out_shp, dtype, ctx) arg = None if op in ["min", "max"]: arg = F.zeros(out_shp, idtype, ctx) arg_nd = to_dgl_nd_for_write(arg) _CAPI_DGLKernelSegmentReduce( op, to_dgl_nd(feat), to_dgl_nd(offsets), to_dgl_nd_for_write(out), arg_nd, ) arg = None if arg is None else F.zerocopy_from_dgl_ndarray(arg_nd) return out, arg def _scatter_add(x, idx, m): r"""Scatter add operator (on first dimension) implementation. Math: y[idx[i], *] += x[i, *] Parameters ---------- x : Tensor The input feature. idx : Tensor The indices array. m : int The length of output. Returns ------- Tensor The output tensor. """ out_shp = (m,) + F.shape(x)[1:] ctx = F.context(x) dtype = F.dtype(x) out = F.zeros(out_shp, dtype, ctx) _CAPI_DGLKernelScatterAdd( to_dgl_nd(x), to_dgl_nd(idx), to_dgl_nd_for_write(out) ) return out def _update_grad_minmax_hetero( gidx, op, list_x, list_idx, list_idx_etype, list_dX ): r"""Update gradients for reduce operator max and min (on first dimension) implementation. Parameters ---------- gidx : HeteroGraphIndex The input graph index. list_x : List of tensors List of the input features. list_idx : List of tensors List of the indices array. list_idx_etype : List of tensors List of the node- or edge-type array. list_dX : List of tensors List of gradients. Returns ------- Tensor The output tensor. """ use_u = op != "copy_rhs" use_e = op != "copy_lhs" list_out = [None] * len(list_dX) for etid in range(gidx.number_of_etypes()): src_id, dst_id = gidx.metagraph.find_edge(etid) # gidx is reveresed x = list_x[src_id] ctx = F.context(x) dtype = F.dtype(x) if use_u: out_shp = (len(list_dX[dst_id]),) + F.shape(x)[1:] list_out[dst_id] = F.zeros(out_shp, dtype, ctx) if use_e: out_shp = (len(list_dX[etid]),) + F.shape(x)[1:] list_out[etid] = F.zeros(out_shp, dtype, ctx) _CAPI_DGLKernelUpdateGradMinMaxHetero( gidx, op, [to_dgl_nd(x) for x in list_x], [to_dgl_nd(idx) for idx in list_idx], [to_dgl_nd(idx_etype) for idx_etype in list_idx_etype], [to_dgl_nd_for_write(out) for out in list_out], ) return tuple(list_out) def _bwd_segment_cmp(feat, arg, m): r"""Backward phase of segment reduction (for 'min'/'max' reduction). It computes the gradient of input feature given output gradient of the segment reduction result. Parameters ---------- feat : Tensor The output gradient arg : Tensor The ArgMin/Max tensor produced by segment_reduce op. m : int The length of input gradients' first dimension. Returns ------- Tensor The input gradient. """ out_shp = (m,) + F.shape(feat)[1:] ctx = F.context(feat) dtype = F.dtype(feat) out = F.zeros(out_shp, dtype, ctx) _CAPI_DGLKernelBwdSegmentCmp( to_dgl_nd(feat), to_dgl_nd(arg), to_dgl_nd_for_write(out) ) return out def _csrmm(A, A_weights, B, B_weights, num_vtypes): """Return a graph whose adjacency matrix is the sparse matrix multiplication of those of two given graphs. Note that the edge weights of both graphs must be scalar, i.e. :attr:`A_weights` and :attr:`B_weights` must be 1D vectors. Parameters ---------- A : HeteroGraphIndex The input graph index as left operand. A_weights : Tensor The edge weights of graph A as 1D tensor. B : HeteroGraphIndex The input graph index as right operand. B_weights : Tensor The edge weights of graph B as 1D tensor. num_vtypes : int The number of node types for the returned graph (must be either 1 or 2). Returns ------- C : HeteroGraphIndex The output graph index. C_weights : Tensor The edge weights of the output graph. """ C, C_weights = _CAPI_DGLCSRMM( A, F.to_dgl_nd(A_weights), B, F.to_dgl_nd(B_weights), num_vtypes ) return C, F.from_dgl_nd(C_weights) def _csrsum(As, A_weights): """Return a graph whose adjacency matrix is the sparse matrix summation of the given list of graphs. Note that the edge weights of all graphs must be scalar, i.e. the arrays in :attr:`A_weights` must be 1D vectors. Parameters ---------- As : list[HeteroGraphIndex] The input graph indices. A_weights : list[Tensor] The edge weights of graph A as 1D tensor. Returns ------- C : HeteroGraphIndex The output graph index. C_weights : Tensor The edge weights of the output graph. """ C, C_weights = _CAPI_DGLCSRSum(As, [F.to_dgl_nd(w) for w in A_weights]) return C, F.from_dgl_nd(C_weights) def _csrmask(A, A_weights, B): """Return the weights of A at the locations identical to the sparsity pattern of B. If a non-zero entry in B does not exist in A, DGL returns 0 for that location instead. Note that the edge weights of the graph must be scalar, i.e. :attr:`A_weights` must be a 1D vector. In scipy notation this is identical to ``A[B != 0]``. Parameters ---------- A : HeteroGraphIndex The input graph index as left operand. A_weights : Tensor The edge weights of graph A as 1D tensor. B : HeteroGraphIndex The input graph index as right operand. Returns ------- B_weights : Tensor The output weights. """ return F.from_dgl_nd(_CAPI_DGLCSRMask(A, F.to_dgl_nd(A_weights), B)) ################################################################################################### ## Libra Graph Partition def libra_vertex_cut( nc, node_degree, edgenum_unassigned, community_weights, u, v, w, out, N, N_e, dataset, ): """ This function invokes C/C++ code for Libra based graph partitioning. Parameter details are present in dgl/src/array/libra_partition.cc """ _CAPI_DGLLibraVertexCut( nc, to_dgl_nd_for_write(node_degree), to_dgl_nd_for_write(edgenum_unassigned), to_dgl_nd_for_write(community_weights), to_dgl_nd(u), to_dgl_nd(v), to_dgl_nd(w), to_dgl_nd_for_write(out), N, N_e, dataset, ) def libra2dgl_build_dict( a, b, indices, ldt_key, gdt_key, gdt_value, node_map, offset, nc, c, fsize, dataset, ): """ This function invokes C/C++ code for pre-processing Libra output. After graph partitioning using Libra, during conversion from Libra output to DGL/DistGNN input, this function creates dictionaries to assign local node ids to the partitioned nodes and also to create a database of the split nodes. Parameter details are present in dgl/src/array/libra_partition.cc """ ret = _CAPI_DGLLibra2dglBuildDict( to_dgl_nd_for_write(a), to_dgl_nd_for_write(b), to_dgl_nd_for_write(indices), to_dgl_nd_for_write(ldt_key), to_dgl_nd_for_write(gdt_key), to_dgl_nd_for_write(gdt_value), to_dgl_nd_for_write(node_map), to_dgl_nd_for_write(offset), nc, c, fsize, dataset, ) return ret def libra2dgl_build_adjlist( feat, gfeat, adj, inner_node, ldt, gdt_key, gdt_value, node_map, lr, lrtensor, num_nodes, nc, c, feat_size, labels, trainm, testm, valm, glabels, gtrainm, gtestm, gvalm, feat_shape, ): """ This function invokes C/C++ code for pre-processing Libra output. After graph partitioning using Libra, once the local and global dictionaries are built, for each node in each partition, this function copies the split node details from the global dictionary. It also copies features, label, train, test, and validation information for each node from the input graph to the corresponding partitions. Parameter details are present in dgl/src/array/libra_partition.cc """ _CAPI_DGLLibra2dglBuildAdjlist( to_dgl_nd(feat), to_dgl_nd_for_write(gfeat), to_dgl_nd_for_write(adj), to_dgl_nd_for_write(inner_node), to_dgl_nd(ldt), to_dgl_nd(gdt_key), to_dgl_nd(gdt_value), to_dgl_nd(node_map), to_dgl_nd_for_write(lr), to_dgl_nd(lrtensor), num_nodes, nc, c, feat_size, to_dgl_nd(labels), to_dgl_nd(trainm), to_dgl_nd(testm), to_dgl_nd(valm), to_dgl_nd_for_write(glabels), to_dgl_nd_for_write(gtrainm), to_dgl_nd_for_write(gtestm), to_dgl_nd_for_write(gvalm), feat_shape, ) def libra2dgl_set_lr(gdt_key, gdt_value, lrtensor, nc, Nn): """ This function invokes C/C++ code for pre-processing Libra output. To prepare the graph partitions for DistGNN input, this function sets the leaf and root (1-level tree) among the split copies (across different partitions) of a node from input graph. Parameter details are present in dgl/src/array/libra_partition.cc """ _CAPI_DGLLibra2dglSetLR( to_dgl_nd(gdt_key), to_dgl_nd(gdt_value), to_dgl_nd_for_write(lrtensor), nc, Nn, ) _init_api("dgl.sparse", __name__) ================================================ FILE: python/dgl/backend/__init__.py ================================================ from __future__ import absolute_import import importlib import json import logging import os import sys from . import backend from .set_default_backend import set_default_backend _enabled_apis = set() logger = logging.getLogger("dgl-core") def _gen_missing_api(api, mod_name): def _missing_api(*args, **kwargs): raise ImportError( 'API "%s" is not supported by backend "%s".' " You can switch to other backends by setting" " the DGLBACKEND environment." % (api, mod_name) ) return _missing_api def load_backend(mod_name): # Load backend does four things: # (1) Import backend framework (PyTorch, MXNet, Tensorflow, etc.) # (2) Import DGL C library. DGL imports it *after* PyTorch/MXNet/Tensorflow. Otherwise # DGL will crash with errors like `munmap_chunk(): invalid pointer`. # (3) Sets up the tensoradapter library path. # (4) Import the Python wrappers of the backend framework. DGL does this last because # it already depends on both the backend framework and the DGL C library. if mod_name == "pytorch": import torch mod = torch elif mod_name == "mxnet": import mxnet mod = mxnet elif mod_name == "tensorflow": import tensorflow mod = tensorflow else: raise NotImplementedError("Unsupported backend: %s" % mod_name) from .._ffi.base import load_tensor_adapter # imports DGL C library version = mod.__version__ load_tensor_adapter(mod_name, version) logger.debug("Using backend: %s" % mod_name) mod = importlib.import_module(".%s" % mod_name, __name__) thismod = sys.modules[__name__] for api in backend.__dict__.keys(): if api.startswith("__"): # ignore python builtin attributes continue if api == "data_type_dict": # load data type if api not in mod.__dict__: raise ImportError( 'API "data_type_dict" is required but missing for' ' backend "%s".' % (mod_name) ) data_type_dict = mod.__dict__[api]() for name, dtype in data_type_dict.items(): setattr(thismod, name, dtype) # override data type dict function setattr(thismod, "data_type_dict", data_type_dict) # for data types with aliases, treat the first listed type as # the true one rev_data_type_dict = {} for k, v in data_type_dict.items(): if not v in rev_data_type_dict.keys(): rev_data_type_dict[v] = k setattr(thismod, "reverse_data_type_dict", rev_data_type_dict) # log backend name setattr(thismod, "backend_name", mod_name) else: # load functions if api in mod.__dict__: _enabled_apis.add(api) setattr(thismod, api, mod.__dict__[api]) else: setattr(thismod, api, _gen_missing_api(api, mod_name)) def get_preferred_backend(): default_dir = None if "DGLDEFAULTDIR" in os.environ: default_dir = os.getenv("DGLDEFAULTDIR") else: default_dir = os.path.join(os.path.expanduser("~"), ".dgl") config_path = os.path.join(default_dir, "config.json") backend_name = None if "DGLBACKEND" in os.environ: backend_name = os.getenv("DGLBACKEND") elif os.path.exists(config_path): with open(config_path, "r") as config_file: config_dict = json.load(config_file) backend_name = config_dict.get("backend", "").lower() if backend_name in ["tensorflow", "mxnet", "pytorch"]: return backend_name else: print( "DGL backend not selected or invalid. " "Assuming PyTorch for now.", file=sys.stderr, ) set_default_backend(default_dir, "pytorch") return "pytorch" load_backend(get_preferred_backend()) def is_enabled(api): """Return true if the api is enabled by the current backend. Parameters ---------- api : str The api name. Returns ------- bool True if the API is enabled by the current backend. """ return api in _enabled_apis def to_dgl_nd(data): return zerocopy_to_dgl_ndarray(data) def from_dgl_nd(data): return zerocopy_from_dgl_ndarray(data) ================================================ FILE: python/dgl/backend/backend.py ================================================ """This file defines the unified tensor framework interface required by DGL. The principles of this interface: * There should be as few interfaces as possible. * The interface is used by DGL system so it is more important to have clean definition rather than convenient usage. * Default arguments should be avoided. * Keyword or positional arguments should be avoided. * Argument type should be easier to understand. It is recommended the frameworks implement all the interfaces. However, it is also OK to skip some. The generated backend module has an ``is_enabled`` function that returns whether the interface is supported by the framework or not. """ ############################################################################### # Tensor, data type and context interfaces def data_type_dict(): """Returns a dictionary from data type string to the data type. The dictionary should include at least: bfloat16 float16 float32 float64 uint8 int8 int16 int32 int64 bool This function will be called only *once* during the initialization fo the backend module. The returned dictionary will become the attributes of the backend module. Examples -------- >>> import torch as th >>> def data_type_dict(): >>> return { 'float16' : th.float16, 'float32' : th.float32, ... } After the module is initialized. >>> import backend as F >>> F.float16 # this will point to torch.float16 Returns ------- dict of str to data type The data type dict. """ pass def cpu(): """Return a context object for CPU device.""" pass def tensor(data, dtype=None): """Create a tensor given the data and data type. If the input is already a tensor and has the same dtype, directly return. Scalar input is converted to a array of one element instead of a 0-dim tensor to avoid certain issues with some backends. Parameters ---------- data : int, iterable, Tensor The interface should at least support list and numpy array. The data is copied to a newly-allocated tensor. dtype : data type, optional It should be one of the values in the data type dict. If is none, the type should be inferred from data. Returns ------- Tensor A framework-specific tensor. """ pass def as_scalar(data): """Returns a scalar whose value is copied from this array. Parameters ---------- data : Tensor The input data Returns ------- scalar The scalar value in the tensor. """ pass def get_preferred_sparse_format(): """Get the preferred sparse matrix format supported by the backend. Different backends have their preferred backend. This info is useful when constructing a sparse matrix. Returns ------- string the name of the preferred sparse matrix format. """ pass def sparse_matrix(data, index, shape, force_format=False): """Create a sparse matrix. NOTE: Please make sure that the data and index tensors are not copied. This is critical to the performance. Parameters ---------- data : Tensor Data tensor. It should be of shape (nnz,). index : tuple This is used to support different sparse formats. For COO format: index=('coo', coord), where coord is of shape (2, nnz). coord[0,:] should be the row index and coord[1,:] should be the column index. For CSR format: index=('csr', indices, indptr), where indices is of shape (nnz,) and indptr is of shape (nrows+1,). See ``scipy.sparse.csr_matrix`` for more documents on what each array means. shape : tuple of int The shape. force_format : bool If true, the returned sparse matrix must be stored in the same format as the given index. Returns ------- SparseMatrix The framework-specific sparse matrix. It can be stored in any format unless force_format is True. Tensor The data convert index due to sparse format change. None if no conversion is needed. """ pass def sparse_matrix_indices(spmat): """Return the indices of the given sparse matrix. Parameters ---------- spmat : SparseMatrix The framework-specific sparse matrix. Returns ------- index : tuple This is used to support different sparse formats. For COO format: index=('coo', coord), where coord is of shape (2, nnz). coord[0,:] should be the row index and coord[1,:] should be the column index. For CSR format: index=('csr', indices, indptr), where indices is of shape (nnz,) and indptr is of shape (nrows+1,). See ``scipy.sparse.csr_matrix`` for more documents on what each array means. """ pass def is_tensor(obj): """Returns true if the given object is a framework-specific tensor.""" pass def shape(input): """Return the shape of the tensor. Parameters ---------- input : Tensor The input tensor. Returns ------- tuple of int The tensor shape. """ pass def dtype(input): """Return the data type of the tensor. Parameters ---------- input : Tensor The input tensor. Returns ------- data type It should be one of the values in the data type dict. """ pass def ndim(input): """Return the number of dimensions of the tensor. Parameters ---------- input : Tensor The input tensor. Returns ------- int The number of dimensions """ pass def context(input): """Return the context/device of the input tensor. Parameters ---------- input : Tensor The input tensor. Returns ------- Context object A framework-specific context object. """ pass def device_type(ctx): """Return a str representing device type. Parameters ---------- ctx : Device context object. Device context. Returns ------- str """ pass def device_id(ctx): """Return device index. For CPU, the index does not matter. For GPU, the index means which GPU device on the machine. Parameters ---------- ctx : Device context object. Device context. Returns ------- int The device index. """ pass def to_backend_ctx(dglctx): """Convert a DGL context object to a backend context. Parameters ---------- dglctx : dgl.ndarray.DGLContext DGL context object. See _ffi.runtime_types for definition. Returns ------- ctx : framework-specific context object. """ pass def astype(input, ty): """Convert the input tensor to the given data type. Parameters ---------- input : Tensor The input tensor. ty : data type It should be one of the values in the data type dict. Returns ------- Tensor A framework-specific tensor. """ pass def asnumpy(input): """Convert the input tensor to numpy array. The data is copied. Parameters ---------- input : Tensor The input tensor. Returns ------- numpy.ndarray Numpy array. """ pass def copy_to(input, ctx, **kwargs): """Copy the given tensor to the context. Parameters ---------- input : Tensor The input tensor ctx : A framework-specific context object. Returns ------- Tensor The tensor on the given context. """ pass def is_pinned(input): """Check whether the tensor is in pinned memory. Parameters ---------- input : Tensor The tensor. Returns ------- bool Whether the tensor is in pinned memory. """ pass ############################################################################### # Tensor functions on feature data # -------------------------------- # These functions are performance critical, so it's better to have efficient # implementation in each framework. def sum(input, dim, keepdims=False): """Reduce sum the input tensor along the given dim. Parameters ---------- input : Tensor The input tensor. dim : int The reduce dim. keepdims : bool Whether to keep the summed dimension. Returns ------- Tensor A framework-specific tensor. """ pass def floor_div(in1, in2): """Element-wise integer division and rounds each quotient towards zero. Parameters ---------- in1 : Tensor The input tensor in2 : Tensor or integer The input Returns ------- Tensor A framework-specific tensor. """ def reduce_sum(input): """Returns the sum of all elements in the input tensor. Parameters ---------- input : Tensor The input tensor. Returns ------- Tensor A framework-specific tensor with shape (1,) """ pass def cumsum(input, dim): """Return the cumulative sum of the elements along a given axis. Parameters ---------- input : Tensor The input tensor. dim : int The cumulative dimension. Returns ------- Tensor A framework-specific tensor. """ pass def mean(input, dim): """Reduce average the input tensor along the given dim. Parameters ---------- input : Tensor The input tensor. dim : int The reduce dim. Returns ------- Tensor A framework-specific tensor. """ pass def reduce_mean(input): """Returns the average of all elements in the input tensor. Parameters ---------- input : Tensor The input tensor. Returns ------- Tensor A framework-specific tensor with shape (1,) """ pass def max(input, dim): """Reduce max the input tensor along the given dim. Parameters ---------- input : Tensor The input tensor. dim : int The reduce dim. Returns ------- Tensor A framework-specific tensor. """ pass def reduce_max(input): """Returns the max of all elements in the input tensor. Parameters ---------- input : Tensor The input tensor. Returns ------- Tensor A framework-specific tensor with shape (1,) """ pass def min(input, dim): """Reduce min the input tensor along the given dim. Parameters ---------- input : Tensor The input tensor. dim : int The reduce dim. Returns ------- Tensor A framework-specific tensor. """ pass def reduce_min(input): """Returns the min of all elements in the input tensor. Parameters ---------- input : Tensor The input tensor. Returns ------- Tensor A framework-specific tensor with shape (1,) """ pass def argsort(input, dim, descending): """Return the indices that would sort the input along the given dim. Parameters ---------- input : Tensor The input tensor. dim : int The dim to sort along. descending : bool Controls the sorting order (False: ascending, True: descending) Returns ------- Tensor A framework-specific tensor. """ def topk(input, k, dim, descending=True): """Return the k largest elements of the given input tensor along the given dimension. If descending is False then the k smallest elements are returned. Parameters ---------- input : Tensor The input tensor. k : int The number of elements. dim : int The dim to sort along. descending : bool Controls whether to return largest/smallest elements. """ pass def argtopk(input, k, dim, descending=True): """Return the indices of the k largest elements of the given input tensor along the given dimension. If descending is False then the k smallest elements are returned. Parameters ---------- input : Tensor The input tensor. k : int The number of elements. dim : int The dimension to sort along. descending : bool Controls whether to return largest/smallest elements. """ pass def exp(input): """Returns a new tensor with the exponential of the elements of the input tensor `input`. Parameters ---------- input : Tensor The input tensor. Returns ------- Tensor The output tensor. """ pass def inverse(input): """Returns the inverse matrix of a square matrix if it exists. Parameters ---------- input : Tensor The input square matrix. Returns ------- Tensor The output tensor. """ pass def sqrt(input): """Returns a new tensor with the square root of the elements of the input tensor `input`. Parameters ---------- input : Tensor The input tensor. Returns ------- Tensor The output tensor. """ pass def softmax(input, dim=-1): """Apply the softmax function on given dimension. Parameters ---------- input : Tensor The input tensor. dim : int The dimension along which to compute softmax. Returns ------- Tensor The output tensor. """ pass def cat(seq, dim): """Concat the sequence of tensors in the given dimension. Parameters ---------- seq : list of Tensor The tensor sequence. dim : int The concat dim. Returns ------- Tensor A framework-specific tensor. """ pass def stack(seq, dim): """Stack the sequence of tensors along the given dimension. Parameters ---------- seq : list of Tensor The tensor sequence. dim : int The concat dim. Returns ------- Tensor A framework-specific tensor. """ pass def split(input, sizes_or_sections, dim): """Split the input tensor into chunks. If ``sizes_or_sections`` is an integer, then the tensor will be splitted into equal pieces. If ``sizes_or_sections`` is a list, then the tensor will be splitted into segments. Parameters ---------- input : Tensor Tensor to split. sizes_or_sections : int, list[int] Split sizes or sections. dim : int The dimension to split on. Returns ------- list of Tensor The splitted tensors. """ pass def repeat(input, repeats, dim): """Repeats elements of an array. Parameters ---------- input : Tensor Input data array repeats : int, Tensor The number of repetitions for each element dim : int The dim along which to repeat values. Returns ------- Tensor The obtained tensor. """ pass def gather_row(data, row_index): """Slice out the data given the row index. Parameters ---------- data : Tensor The data tensor row_index : Tensor A 1-D integer tensor containing which rows to be sliced out. Returns ------- Tensor The sliced data. The first dimension should equal to ``len(row_index)``. """ pass def slice_axis(data, axis, begin, end): """Slice along a given axis. Returns an array slice along a given axis starting from :attr:`begin` index to :attr:`end` index. Parameters ---------- data : Tensor The data tensor. axis : int The axis along to slice the tensor. begin : int Indicates the begin index. end : int Indicates the end index. Returns: -------- Tensor The sliced tensor. """ pass def take(data, indices, dim): """Takes elements from an input array along the given dim. Parameters ---------- data : Tensor The data tensor. indices : Tensor The indices tensor. dim : Tensor The dimension to gather along. """ pass def narrow_row(x, start, stop): """Narrow down the tensor along the first dimension. Parameters ---------- x : Tensor The input tensor. start : int The start index (inclusive). stop : int The stop index (exclusive). Returns ------- Tensor The narrowed tensor Notes ----- The returned tensor could be a view of the original tensor. """ pass def scatter_row(data, row_index, value): """Write the value into the data tensor using the row index. This is an out-place write so it can work with autograd. Parameters ---------- data : Tensor The data tensor to be updated. row_index : Tensor A 1-D integer tensor containing which rows to be updated. value : Tensor The new value. Returns ------- Tensor The new data. """ pass def index_add_inplace(data, row_idx, value): """Add the values into the data tensor using the row index inplace. If two row indices are the same, the corresponding values are sum up before adding to the data tensor. Examples -------- >>> import torch as th >>> arr = th.zeros((10)) >>> F. index_add_inplace(arr, th.tensor([0, 1, 1]), th.tensor([1.0, 1.0, 1.0])) >>> arr tensor([1., 2., 0., 0., 0., 0., 0., 0., 0., 0.]) Parameters ---------- data : Tensor The data tensor to be updated. row_index : Tensor A 1-D integer tensor containing which rows to be updated. value : Tensor The new value. """ pass def scatter_row_inplace(data, row_index, value): """Write the value into the data tensor using the row index inplace. This is an inplace write so it will break the autograd. Parameters ---------- data : Tensor The data tensor to be updated. row_index : Tensor A 1-D integer tensor containing which rows to be updated. value : Tensor The new value. """ pass def squeeze(input, dim): """Remove the given dimension of size 1. Parameters ---------- input : Tensor The input tensor. dim : int The dimension to be squeezed. Returns ------- Tensor The result tensor. """ pass def unsqueeze(input, dim): """Add the given dimension of size 1. Parameters ---------- input : Tensor The input tensor. dim : int The dimension to be unsqueezed. Returns ------- Tensor The result tensor. """ pass def reshape(input, shape): """Reshape the tensor. Parameters ---------- input : Tensor The input tensor. shape : tuple of int The new shape. Returns ------- Tensor The reshaped tensor. """ pass def swapaxes(input, axis1, axis2): """Interchange the two given axes of a tensor. Parameters ---------- input : Tensor The input tensor. axis1, axis2 : int The two axes. Returns ------- Tensor The transposed tensor. """ pass def empty(shape, dtype, ctx): """Create a tensor filled with uninitialized data. Parameters ---------- shape : tuple of int The tensor shape. dtype : data type It should be one of the values in the data type dict. ctx : context The device of the result tensor. Returns ------- Tensor The emtpy tensor. """ pass def zeros(shape, dtype, ctx): """Create a zero tensor. Parameters ---------- shape : tuple of int The tensor shape. dtype : data type It should be one of the values in the data type dict. ctx : context The device of the result tensor. Returns ------- Tensor The zero tensor. """ pass def zeros_like(input): """Create a zero tensor with the same shape, dtype and context of the given tensor. Parameters ---------- input : Tensor The input Returns ------- Tensor The result """ pass def ones(shape, dtype, ctx): """Create a one tensor. Parameters ---------- shape : tuple of int The tensor shape. dtype : data type It should be one of the values in the data type dict. ctx : context The device of the result tensor. Returns ------- Tensor The one tensor. """ pass def uniform(shape, dtype, ctx, low, high): """Create a tensor with random value in a uniform distribution between low (inclusive) and high (exclusive). Parameters ---------- shape : tuple of int The tensor shape. dtype : data type It should be one of the values in the data type dict. ctx : context The device of the result tensor. Returns ------- Tensor The random tensor. """ pass def randint(shape, dtype, ctx, low, high): """Create a tensor with random value in a uniform integer distribution between low (inclusive) and high (exclusive) Parameters ---------- shape : tuple of int The tensor shape. dtype : data type It should be one of the values in the data type dict. ctx : context The device of the result tensor. Returns ------- Tensor The random tensor. """ pass def pad_packed_tensor(input, lengths, value, l_min=None): r"""Pads a packed batch of variable length tensors with given value. Parameters ---------- input : Tensor The input tensor with shape :math:`(N, *)` lengths : list or tensor The array of tensor lengths (of the first dimension) :math:`L`. It should satisfy :math:`\sum_{i=1}^{B}L_i = N`, where :math:`B` is the length of :math:`L`. value : float The value to fill in the tensor. l_min : int or None, defaults to None. The minimum length each tensor need to be padded to, if set to None, then there is no minimum length requirement. Returns ------- Tensor The obtained tensor with shape :math:`(B, \max(\max_i(L_i), l_{min}), *)` """ pass def pack_padded_tensor(input, lengths): r"""Packs a tensor containing padded sequence of variable length. Parameters ---------- input : Tensor The input tensor with shape :math:`(B, L, *)`, where :math:`B` is the batch size and :math:`L` is the maximum length of the batch. lengths : list or tensor The array of tensor lengths (of the first dimension) :math:`L`. :math:`\max_i(L_i)` should equal :math:`L`. Returns ------- Tensor The obtained tensor with shape :math:`(N, *)` where :math:`N = \sum_{i=1}^{B}L_i` """ pass def boolean_mask(input, mask): """Selects elements in x according to the given mask from the first dimension. Parameters ---------- input : Tensor The input tensor mask : Boolean Tensor The mask Returns ------- Tensor The result """ pass def equal(x, y): """Compares whether the elements are equal. Parameters ---------- x, y : Tensor The two tensors Returns ------- Boolean or integer tensor The result, with the same shape as input. """ pass def allclose(x, y, rtol=1e-4, atol=1e-4): """Compares whether all elements are close. Parameters ---------- x : Tensor First tensor y : Tensor Second tensor rtol : float, optional Relative tolerance atol : float, optional Absolute tolerance """ def logical_not(input): """Perform a logical not operation. Equivalent to np.logical_not Parameters ---------- input : Tensor The input Returns ------- Tensor The result """ pass def logical_and(input1, input2): pass def clone(input): """Return a clone of the input tensor. Parameters ---------- input : Tensor Input tensor. Returns ------- Tensor A clone tensor. """ pass def clamp(data, min_val, max_val): """Clamp all elements in :attr:`input` into the range [min_val, max_val] and return a resulting tensor. Parameters ---------- data : Tensor Input tensor min_val : Scalar Min value. max_val : Scalar Max value. Returns ------- Tensor The result. """ pass def replace_inf_with_zero(x): """Returns a new tensor replacing infinity and negative infinity with zeros. Parameters ---------- x : Tensor The input Returns ------- Tensor The result """ pass def count_nonzero(input): """Return the count of non-zero values in the tensor input. Parameters ---------- input : Tensor The tensor to be counted Returns ------- Integer The result """ pass ############################################################################### # Tensor functions used *only* on index tensor # ---------------- # These operators are light-weighted, so it is acceptable to fallback to # numpy operators if currently missing in the framework. Ideally in the future, # DGL should contain all the operations on index, so this set of operators # should be gradually removed. def unique(input, return_inverse=False, return_counts=False): """Returns the unique scalar elements in a tensor. Parameters ---------- input : Tensor Must be a 1-D tensor. return_inverse : bool, optional Whether to also return the indices for where elements in the original input ended up in the returned unique list. return_counts : bool, optional Whether to also return the counts for each unique element. Returns ------- Tensor A 1-D tensor containing unique elements. Tensor, optional A 1-D tensor containing the new positions of the elements in the input. It is returned if return_inverse is True. Tensor, optional A 1-D tensor containing the number of occurrences for each unique value or tensor. It is returned if return_counts is True. """ pass def full_1d(length, fill_value, dtype, ctx): """Create a 1D tensor full of the fill_value. Parameters ---------- shape : int The length of the vector. fill_value : int The filled value. dtype : data type It should be one of the values in the data type dict. ctx : context The device of the result tensor. Returns ------- Tensor A result 1D tensor """ pass def nonzero_1d(input): """Return the nonzero index of the given 1D input. Parameters ---------- input : Tensor Must be a 1D tensor. Returns ------- Tensor A 1D integer tensor containing the nonzero indices. """ pass def sort_1d(input): """Sort a 1D tensor (in ascending order) and also return the original index. Parameters ---------- input : Tensor The tensor to be sorted. Returns ------- Tensor Sorted tensor. Tensor Index tensor of the elements in the original input. """ pass def arange(start, stop, dtype, ctx): """Create a 1D range int64 tensor. Parameters ---------- start : int The range start. stop : int The range stop. dtype: str The dtype of result tensor. ctx : Device context object. Device context. Returns ------- Tensor The result tensor. """ pass def rand_shuffle(arr): """Random shuffle the data in the first dimension of the array. The shuffled data is stored in a new array. Parameters ---------- arr : Tensor The data tensor Returns ------- Tensor The result tensor """ pass def zerocopy_to_dlpack(input): """Create a dlpack tensor that shares the input memory. Parameters ---------- input : Tensor The input tensor Returns ------- dlpack capsule A dlpack capsule that can be used by other framework. """ pass def zerocopy_from_dlpack(dlpack_tensor): """Create a tensor that shares the dlpack_tensor. Parameters ---------- dlpack_tensor : dlpack capsule The dlpack tensor. Returns ------- Tensor A framework-specific tensor. """ pass def zerocopy_to_numpy(input): """Create a numpy ndarray that shares the input memory. Parameters ---------- input : Tensor The input tensor Returns ------- numpy.ndarray A numpy ndarray. """ pass def zerocopy_from_numpy(np_array): """Create a tensor that shares the numpy array. Parameters ---------- np_array : numpy.ndarray The numpy ndarray. Returns ------- Tensor A framework-specific tensor. """ pass def zerocopy_to_dgl_ndarray(input): """Zerocopy a framework-specific Tensor to dgl.ndarray.NDArray Parameters ---------- input : Tensor Returns ------- dgl.ndarray.NDArray """ pass def zerocopy_to_dgl_ndarray_for_write(input): """Zerocopy a framework-specific Tensor to dgl.ndarray.NDArray that is ready for write (required in MXNet). Parameters ---------- input : Tensor Returns ------- dgl.ndarray.NDArray """ pass def zerocopy_from_dgl_ndarray(input): """Zerocopy a dgl.ndarray.NDArray to framework-specific Tensor Parameters ---------- input : dgl.ndarray.NDArray Returns ------- Tensor """ pass ############################################################################### # Custom Operators for graph level computations. # Note: These operators are supposed to be implemented using DGL-provided # kernels (see kernel.py), and plug into tensor framework using custom op # extensions. def binary_reduce( reducer, binary_op, graph, lhs, rhs, lhs_data, rhs_data, out_size, lhs_map, rhs_map, out_map, ): """Perform binary operation between given data and reduce based on graph structure. Parameters ---------- reducer : str Type of reduction: 'sum', 'max', 'min', 'mean', 'prod', 'none' (no reduction) binary_op : str Binary operation to perform, can be 'add', 'mul', 'sub', 'div' graph : GraphIndex The graph lhs : int The lhs target (src, dst, edge) rhs : int The rhs target (src, dst, edge) lhs_data : Tensor The lhs data rhs_data : Tensor The rhs data out_size : int Size of first dimension of output data lhs_map : tuple Two lhs id mapping arrays, one for forward pass, the other for backward rhs_map : tuple Two rhs id mapping arrays, one for forward pass, the other for backward out_map : tuple Two out id mapping arrays, one for forward pass, the other for backward Returns ------- Tensor The result. """ pass def copy_reduce(reducer, graph, target, in_data, out_size, in_map, out_map): """Copy target data and perform reduce based on graph structure. Parameters ---------- reducer : str Type of reduction: be 'sum', 'max', 'min', 'mean', 'prod', 'none' (no reduction) graph : GraphIndex The graph target : int The input target (src, dst, edge) in_data : Tensor The input data out_size : int Size of first dimension of output data in_map : tuple Two input id mapping arrays, one for forward, the other for backward out_map : tuple Two output id mapping arrays, one for forward, the other for backward Returns ------- Tensor The result. """ pass def gspmm(gidx, op, reduce_op, lhs_data, rhs_data): r"""Generalized Sparse Matrix Multiplication interface. It fuses two steps into one kernel. (1) Computes messages by :attr:`op` source node and edge features. (2) Aggregate the messages by :attr:`reduce_op` as the features on destination nodes. .. math:: x_v = \psi_{(u, v, e)\in \mathcal{G}}(\rho(x_u, x_e)) where :math:`x_v` is the returned feature on destination nodes, and :math`x_u`, :math:`x_e` refers to :attr:`u`, :attr:`e` respectively. :math:`\rho` means binary operator :attr:`op` and :math:`\psi` means reduce operator :attr:`reduce_op`, :math:`\mathcal{G}` is the graph we apply gspmm on: :attr:`g`. Note that this function does not handle gradients. Parameters ---------- gidx : HeteroGraphIndex The input graph. op : str The binary op's name, could be ``add``, ``sub``, ``mul``, ``div``, ``copy_lhs``, ``copy_rhs``. reduce_op : str Reduce operator, could be ``sum``, ``max``, ``min``. lhs_data : tensor or None The left operand, could be None if it's not required by the op. rhs_data : tensor or None The right operand, could be None if it's not required by the op. Returns ------- tensor The result tensor. """ pass def gspmm_hetero(g, op, reduce_op, lhs_len, *lhs_and_rhs_tuple): r"""Generalized Sparse Matrix Multiplication interface on heterogenenous graph. All the relation types of the heterogeneous graph will be processed together. It fuses two steps into one kernel. (1) Computes messages by :attr:`op` source node and edge features. (2) Aggregate the messages by :attr:`reduce_op` as the features on destination nodes. .. math:: x_v = \psi_{(u, v, e)\in \mathcal{G}}(\rho(x_u, x_e)) where :math:`x_v` is the returned feature on destination nodes, and :math`x_u`, :math:`x_e` refers to :attr:`u`, :attr:`e` respectively. :math:`\rho` means binary operator :attr:`op` and :math:`\psi` means reduce operator :attr:`reduce_op`, :math:`\mathcal{G}` is the graph we apply gspmm on: :attr:`g`. Note that this function does not handle gradients. Parameters ---------- g : HeteroGraph The input graph. op : str The binary op's name, could be ``add``, ``sub``, ``mul``, ``div``, ``copy_lhs``, ``copy_rhs``. reduce_op : str Reduce operator, could be ``sum``, ``max``, ``min``. lhs_len : int Length of the lhs data lhs_and_rhs_tuple : tuple of tensors lhs_data and rhs_data are concatenated to one tuple. lhs_data is also a tuple of tensors of size number of ntypes. Same is true for rhs_data. The tensor(s) in the tuple could be None Returns ------- tuple of tensor The resulting tuple of tensor. """ pass def gsddmm(gidx, op, lhs_data, rhs_data, lhs_target="u", rhs_target="v"): r"""Generalized Sampled-Dense-Dense Matrix Multiplication interface. It computes edge features by :attr:`op` lhs features and rhs features. .. math:: x_{e} = \phi(x_{lhs}, x_{rhs}), \forall (u,e,v)\in \mathcal{G} where :math:`x_{e}` is the returned feature on edges and :math:`x_u`, :math:`x_v` refers to :attr:`u`, :attr:`v` respectively. :math:`\phi` is the binary operator :attr:`op`, and :math:`\mathcal{G}` is the graph we apply gsddmm on: :attr:`g`. $lhs$ and $rhs$ are one of $u,v,e$'s. Parameters ---------- gidx : HeteroGraphIndex The input graph. op : str Binary operator, could be ``add``, ``sub``, ``mul``, ``div``, ``dot``, ``copy_lhs``, ``copy_rhs``. lhs_data : tensor or None The left operand, could be None if it's not required by op. rhs_data : tensor or None The right operand, could be None if it's not required by op. lhs_target: str Choice of `u`(source), `e`(edge) or `v`(destination) for left operand. rhs_target: str Choice of `u`(source), `e`(edge) or `v`(destination) for right operand. Returns ------- tensor The result tensor. """ pass def gsddmm_hetero( g, op, lhs_len, lhs_target="u", rhs_target="v", *lhs_and_rhs_tuple ): r"""Generalized Sampled-Dense-Dense Matrix Multiplication interface on heterogenenous graph. All the relation types of the heterogeneous graph will be processed together. It computes edge features by :attr:`op` lhs features and rhs features. .. math:: x_{e} = \phi(x_{lhs}, x_{rhs}), \forall (u,e,v)\in \mathcal{G} where :math:`x_{e}` is the returned feature on edges and :math:`x_u`, :math:`x_v` refers to :attr:`u`, :attr:`v` respectively. :math:`\phi` is the binary operator :attr:`op`, and :math:`\mathcal{G}` is the graph we apply gsddmm on: :attr:`g`. $lhs$ and $rhs$ are one of $u,v,e$'s. Parameters ---------- gidx : HeteroGraphIndex The input graph. op : str Binary operator, could be ``add``, ``sub``, ``mul``, ``div``, ``dot``, ``copy_lhs``, ``copy_rhs``. lhs_len : int Length of the lhs data lhs_target: str Choice of `u`(source), `e`(edge) or `v`(destination) for left operand. rhs_target: str Choice of `u`(source), `e`(edge) or `v`(destination) for right operand. lhs_and_rhs_tuple : tuple of tensors lhs_data and rhs_data are concatenated to one tuple. lhs_data is also a tuple of tensors of size number of ntypes. Same is true for rhs_data. The tensor(s) in the tuple could be None Returns ------- tuple of tensor The resulting tuple of tensor. """ pass def edge_softmax(gidx, logits, eids, norm_by): r"""Compute edge softmax. For a node :math:`i`, edge softmax is an operation of computing .. math:: a_{ij} = \frac{\exp(z_{ij})}{\sum_{j\in\mathcal{N}(i)}\exp(z_{ij})} where :math:`z_{ij}` is a signal of edge :math:`j\rightarrow i`, also called logits in the context of softmax. :math:`\mathcal{N}(i)` is the set of nodes that have an edge to :math:`i`. By default edge softmax is normalized by destination nodes(i.e. :math:`ij` are incoming edges of `i` in the formula above). We also support edge softmax normalized by source nodes(i.e. :math:`ij` are outgoing edges of `i` in the formula). The previous case correspond to softmax in GAT and Transformer, and the later case correspond to softmax in Capsule network. Parameters ---------- gidx : HeteroGraphIndex The graph to perfor edge softmax on. logits : torch.Tensor The input edge feature eids : torch.Tensor or ALL, optional Edges on which to apply edge softmax. If ALL, apply edge softmax on all edges in the graph. Default: ALL. norm_by : str, could be `src` or `dst` Normalized by source nodes or destination nodes. Default: `dst`. Returns ------- Tensor Softmax value """ pass def edge_softmax_hetero(gidx, eids, norm_by, *logits): r"""Compute edge softmax. For a node :math:`i`, edge softmax is an operation of computing .. math:: a_{ij} = \frac{\exp(z_{ij})}{\sum_{j\in\mathcal{N}(i)}\exp(z_{ij})} where :math:`z_{ij}` is a signal of edge :math:`j\rightarrow i`, also called logits in the context of softmax. :math:`\mathcal{N}(i)` is the set of nodes that have an edge to :math:`i`. By default edge softmax is normalized by destination nodes(i.e. :math:`ij` are incoming edges of `i` in the formula above). We also support edge softmax normalized by source nodes(i.e. :math:`ij` are outgoing edges of `i` in the formula). The previous case correspond to softmax in GAT and Transformer, and the later case correspond to softmax in Capsule network. Parameters ---------- gidx : HeteroGraphIndex The graph to perfor edge softmax on. eids : dict of tensors Each tensor has the edges on which to apply edge softmax for a corresponsing relation type. logits : tuple of tensors The input edge features of different relation types. norm_by : str, could be `src` or `dst` Normalized by source nodes or destination nodes. Default: `dst`. Returns ------- Tensor Softmax value """ pass def segment_reduce(op, x, offsets): """Segment reduction operator. It aggregates the value tensor along the first dimension by segments. The argument ``offsets`` specifies the start offset of each segment (and the upper bound of the last segment). Zero-length segments are allowed. .. math:: y_i = \Phi_{j=\mathrm{offsets}_i}^{\mathrm{offsets}_{i+1}-1} x_j where :math:`\Phi` is the reduce operator. Parameters ---------- op : str Aggregation method. Can be ``sum``, ``max``, ``min``. x : Tensor Value to aggregate. offsets : Tensor The start offsets of segments. Returns ------- Tensor Aggregated tensor of shape ``(len(offsets) - 1, value.shape[1:])``. """ pass def scatter_add(x, idx, m): """Scatter add (on first dimension) operator. Math: y[idx[i], *] += x[i, *] Parameters ---------- x : Tensor The input feature. idx : Tensor The indices array. m : int The length of output. Returns ------- Tensor The output tensor. """ pass def csrmm(A, A_weights, B, B_weights, num_vtypes): """Compute weighted adjacency matrix multiplication. Notes ----- Both A and B must allow creation of CSR representations, and must be simple graphs (i.e. having at most one edge between two nodes). The output unit graph has no format restriction. Parameters ---------- A : HeteroGraphIndex The unit graph as left operand. A_weights : Tensor The edge weights of A. Must be a 1D vector. B : HeteroGraphIndex The unit graph as right operand. B_weights : Tensor The edge weights of B. Must be a 1D vector. num_vtypes : int The number of node types of the output graph. Must be either 1 or 2. Returns ------- HeteroGraphIndex The output unit graph. Tensor The output edge weights. """ pass def csrsum(gidxs, weights): """Compute weighted adjacency matrix summation. Notes ----- All unit graphs must allow creation of CSR representations, and must be simple graphs (i.e. having at most one edge between two nodes). The output unit graph has no format restriction. Parameters ---------- gidxs : list[HeteroGraphIndex] The unit graphs. weights : list[Tensor] The edge weights of each graph. Must be 1D vectors. Returns ------- HeteroGraphIndex The output unit graph. Tensor The output edge weights. """ pass def csrmask(A, A_weights, B): """Retrieve the values in the weighted adjacency matrix of graph :attr:`A` at the non-zero positions of graph :attr:`B`'s adjacency matrix. In scipy, this is equivalent to ``A[B != 0]``. Notes ----- Both A and B must allow creation of CSR representations, and must be simple graphs (i.e. having at most one edge between two nodes). Parameters ---------- A : HeteroGraphIndex The unit graph as left operand. A_weights : Tensor The edge weights of A. Must be a 1D vector. B : HeteroGraphIndex The unit graph as right operand. Returns ------- Tensor The output tensor. """ pass def gather_mm(A, B, idx_a, idx_b): r"""Dense Matrix Multiplication interface. It multiplies 2D dense tensor A and 3D dense tensor B according to their relation types. A is unsorted and the relation type is fetched from idx_b. Parameters ---------- A : tensor 2-D tensor of shape (N, D1) B : tensor 3-D tensor of shape (R, D1, D2) idx_a : Tensor, optional If specified, must be a 1-D integer tensor of shape (K,). idx_b : Tensor, optional If specified, must be a 1-D integer tensor of shape (K,). Returns ------- Tensor The output dense matrix of shape (N, D2) """ pass def segment_mm(A, B, seglen_A): r"""Dense Matrix Multiplication interface. It multiplies dense tensor A and dense tensor B according to relation types. A is sorted and concatenated according to relation types. Parameters ---------- A : tensor 2-D tensor of shape (N, D1) B : tensor 3-D tensor of shape (R, D1, D2) seglen_A : Tensor An integer tensor of shape (R,). Each element is the length of segments of input ``A``. The summation of all elements must be equal to N. Returns ------- Tensor The output dense matrix of shape (N, D2) """ pass ############################################################################### # Other interfaces # ---------------- # These are not related to tensors. Some of them are temporary workarounds that # should be included in DGL in the future. def sync(): """Synchronize computation. In DL frameworks such as MXNet and TensorFlow, the computation in operators are done asynchronously. This is to synchronize computation and makes sure that all computation is complete after this function call. """ pass def attach_grad(tensor): """Attach gradients to the input tensor""" pass def backward(x, head_gradient=None): """Invoke backward computation with an optional head gradient.""" pass def grad(x): """Fetches the gradient from the tensor after backward computation.""" pass def is_no_grad(x): """Test if the input tensor has gradient""" pass def is_recording(): """Test if the execution is recording gradients.""" pass class record_grad(object): """Context manager that records the gradients""" def __init__(self): pass def __enter__(self): pass def __exit__(self, exc_type, exc_value, exc_traceback): pass class no_grad(object): """Context manager that explicitly disables gradient computation""" def __init__(self): pass def __enter__(self): pass def __exit__(self, exc_type, exc_value, exc_traceback): pass class NodeEmbedding(object): """Sparse node embeddings""" def __init__(self): pass def __enter__(self): pass def __exit__(self, exc_type, exc_value, exc_traceback): pass ================================================ FILE: python/dgl/backend/mxnet/__init__.py ================================================ from .sparse import * from .tensor import * ================================================ FILE: python/dgl/backend/mxnet/sparse.py ================================================ import mxnet as mx import numpy as np from mxnet import nd from ..._sparse_ops import ( _bwd_segment_cmp, _csrmask, _csrmm, _csrsum, _gsddmm, _gspmm, _scatter_add, _segment_reduce, ) from ...base import ALL, dgl_warning, is_all from ...heterograph_index import create_unitgraph_from_csr from .tensor import ( asnumpy, context, copy_to, to_backend_ctx, zerocopy_from_numpy, ) __all__ = [ "gspmm", "gsddmm", "edge_softmax", "segment_reduce", "scatter_add", "csrmm", "csrsum", "csrmask", ] def _scatter_nd(index, src, n_rows): """Similar to PyTorch's scatter nd on first dimension.""" assert index.shape == src.shape dgl_warning("MXNet do not support scatter_add, fallback to numpy.") ctx = context(src) index = asnumpy(index) src = asnumpy(src) shp = index.shape ndim = src.ndim offsets = [] stride = 1 for i in reversed(range(1, ndim)): di = shp[i] offset_i = np.arange(di, dtype=index.dtype) offsets.append( (stride * offset_i).reshape( (1,) * i + (di,) + (1,) * (ndim - 1 - i) ) ) stride *= di if ndim > 1: new_idx = index * stride + sum(offsets) else: new_idx = index src = src.reshape(-1) new_idx = new_idx.reshape(-1) rst = np.zeros((stride * n_rows,), dtype=src.dtype) np.add.at(rst, new_idx, src) rst = rst.reshape(n_rows, *shp[1:]) rst = copy_to(zerocopy_from_numpy(rst), ctx) return rst def _gather_nd(index, src): """Similar to PyTorch's gather nd on first dimension.""" ctx = context(src) shp = index.shape ndim = src.ndim offsets = [] stride = 1 for i in reversed(range(1, ndim)): di = shp[i] offset_i = nd.arange(di, dtype=index.dtype) offsets.append( (stride * offset_i).reshape( (1,) * i + (di,) + (1,) * (ndim - 1 - i) ) ) stride *= di if ndim > 1: new_idx = index * stride + copy_to(sum(offsets), ctx) else: new_idx = index src = src.reshape(-1) new_idx = new_idx.reshape(-1) rst = nd.take(src, new_idx).reshape(shp) return rst def _reduce_grad(grad, shape): """Reduce gradient on the broadcast dimension If there is broadcast in forward pass, gradients need to be reduced on broadcast dimension. This function checks the input tensor shape and gradient shape and perform the reduction. Parameters ---------- grad: Tensor Gradient tensor shape: tuple Shape of input tensor Returns ------- Tensor """ grad_shape = grad.shape[1:] in_shape = shape[1:] if in_shape == grad_shape: # no need to reduce return grad num_to_squeeze = len(grad_shape) - len(in_shape) # pad inshape in_shape = (1,) * num_to_squeeze + in_shape # pad in_shape in_shape = (1,) * num_to_squeeze + in_shape reduce_idx = np.nonzero(np.asarray(grad_shape) - np.asarray(in_shape))[0] reduce_idx += 1 # skip batch dim grad = grad.sum(axis=tuple(reduce_idx), keepdims=True) return grad.reshape(shape) def _need_reduce_last_dim(ufeat, efeat): """Indicates whether to reduce the last dimension on edges in the backward pass of spmm, if so, use dot instead of mul.""" ushp = ufeat.shape eshp = efeat.shape return ushp[1:-1] == eshp[1:-1] and eshp[-1] == 1 and ushp[-1] > 1 def _muldiv(op, x): return 1.0 / x if op == "div" else x def _addsub(op, x): return -x if op == "sub" else x def _expand(x, shape): return x.broadcast_to((x.shape[0], *shape)) class GSpMM(mx.autograd.Function): def __init__(self, gidx, op, reduce_op): super(GSpMM, self).__init__() self.gidx = gidx self.op = op self.reduce_op = reduce_op def forward(self, X, Y): out, (argX, argY) = _gspmm(self.gidx, self.op, self.reduce_op, X, Y) self.save_for_backward(X, Y, argX, argY) return out def backward(self, dZ): ctx = context(dZ) X, Y, argX, argY = self.saved_tensors gidx, op, reduce_op = self.gidx, self.op, self.reduce_op if op != "copy_rhs": g_rev = gidx.reverse() if reduce_op == "sum": if op in ["mul", "div"]: dX = _gspmm(g_rev, "mul", "sum", dZ, _muldiv(op, Y))[0] elif op in ["add", "sub"]: dX = _gspmm(g_rev, "copy_lhs", "sum", dZ, Y)[0] elif op == "copy_lhs": dX = _gspmm(g_rev, "copy_lhs", "sum", dZ, None)[0] else: if op in ["mul", "div"]: dX = _scatter_nd( argX, _muldiv(op, _gather_nd(argY, _expand(Y, dZ.shape[1:]))) * dZ, X.shape[0], ) elif op in ["add", "sub", "copy_lhs"]: dX = _scatter_nd(argX, dZ, X.shape[0]) dX = _reduce_grad(dX, X.shape) else: dX = nd.zeros_like(X) if op != "copy_lhs": if reduce_op == "sum": if op == "mul" and _need_reduce_last_dim(X, Y): dY = _gsddmm(gidx, "dot", X, dZ) elif op in ["mul", "div"]: dY = _gsddmm(gidx, "mul", X, dZ) if op == "div": dY = -dY / (Y**2) elif op in ["add", "sub", "copy_rhs"]: dY = _gsddmm(gidx, "copy_rhs", X, _addsub(op, dZ)) else: if op in ["mul", "div"]: dY = _scatter_nd( argY, _gather_nd(argX, _expand(X, dZ.shape[1:])) * dZ, Y.shape[0], ) if op == "div": dY = -dY / (Y**2) elif op in ["add", "sub", "copy_rhs"]: dY = _scatter_nd(argY, _addsub(op, dZ), Y.shape[0]) dY = _reduce_grad(dY, Y.shape) else: dY = nd.zeros_like(Y) self.saved_tensors = None return dX, dY def gspmm(gidx, op, reduce_op, lhs_data, rhs_data): func = GSpMM(gidx, op, reduce_op) ctx = to_backend_ctx(gidx.ctx) # XXX(minjie): There is a bug in MXNet's autograd system when one of the inputs # does not require gradient. Although it still invokes the backward function, # it does not set the gradient value to the correct buffer, resulting all the # input gradients to be zero. Fix this by enforcing all the inputs to require # gradients. if lhs_data is None: lhs_data = nd.zeros((1,), ctx=ctx) lhs_data.attach_grad() if rhs_data is None: rhs_data = nd.zeros((1,), ctx=ctx) rhs_data.attach_grad() return func(lhs_data, rhs_data) class GSDDMM(mx.autograd.Function): def __init__(self, gidx, op, lhs_target, rhs_target): super(GSDDMM, self).__init__() self.gidx = gidx self.op = op self.lhs_target = lhs_target self.rhs_target = rhs_target def forward(self, X, Y): out = _gsddmm( self.gidx, self.op, X, Y, self.lhs_target, self.rhs_target ) self.save_for_backward(X, Y) return out def backward(self, dZ): ctx = context(dZ) X, Y = self.saved_tensors gidx, op = self.gidx, self.op lhs_target, rhs_target = self.lhs_target, self.rhs_target if op != "copy_rhs": if lhs_target in ["u", "v"]: _gidx = gidx if self.lhs_target == "v" else gidx.reverse() if op in ["add", "sub", "copy_lhs"]: dX = _gspmm(_gidx, "copy_rhs", "sum", None, dZ)[0] else: # mul, div, dot if rhs_target == lhs_target: dX = _gspmm(_gidx, "copy_rhs", "sum", None, dZ)[ 0 ] * _muldiv(op, Y) elif self.rhs_target == "e": dX = _gspmm( _gidx, "copy_rhs", "sum", None, dZ * _muldiv(op, Y) )[0] else: # rhs_target = !lhs_target dX = _gspmm(_gidx, "mul", "sum", _muldiv(op, Y), dZ)[0] else: # lhs_target == 'e' if op in ["add", "sub", "copy_lhs"]: dX = dZ else: # mul, div, dot dX = _gsddmm( gidx, "mul", dZ, _muldiv(op, Y), "e", rhs_target ) dX = _reduce_grad(dX, X.shape) else: dX = nd.zeros_like(X) if op != "copy_lhs": if self.rhs_target in ["u", "v"]: _gidx = gidx if rhs_target == "v" else gidx.reverse() if op in ["add", "sub", "copy_rhs"]: dY = _gspmm( _gidx, "copy_rhs", "sum", None, _addsub(op, dZ) )[0] else: # mul, div, dot if lhs_target == rhs_target: dY = _gspmm(_gidx, "copy_rhs", "sum", None, dZ)[0] * X elif self.lhs_target == "e": dY = _gspmm(_gidx, "copy_rhs", "sum", None, dZ * X)[0] else: # rhs_target = !lhs_target dY = _gspmm(_gidx, "mul", "sum", X, dZ)[0] if op == "div": dY = -dY / (Y**2) else: if op in ["add", "sub", "copy_rhs"]: dY = _addsub(op, dZ) else: # mul, div, dot dY = _gsddmm(gidx, "mul", dZ, X, "e", lhs_target) if op == "div": dY = -dY / (Y**2) dY = _reduce_grad(dY, Y.shape) else: dY = nd.zeros_like(Y) self.saved_tensors = None return dX, dY def gsddmm(gidx, op, lhs_data, rhs_data, lhs_target="u", rhs_target="v"): func = GSDDMM(gidx, op, lhs_target, rhs_target) ctx = to_backend_ctx(gidx.ctx) if lhs_data is None: lhs_data = nd.zeros((1,), ctx=ctx) if rhs_data is None: rhs_data = nd.zeros((1,), ctx=ctx) return func(lhs_data, rhs_data) class EdgeSoftmax(mx.autograd.Function): def __init__(self, gidx, eids, norm_by): super(EdgeSoftmax, self).__init__() if not is_all(eids): gidx = gidx.edge_subgraph([eids], True).graph if norm_by == "src": gidx = gidx.reverse() self.gidx = gidx def forward(self, score): """Forward function. Pseudo-code: .. code:: python score = dgl.EData(g, score) score_max = score.dst_max() # of type dgl.NData score = score - score_max # edge_sub_dst, ret dgl.EData score_sum = score.dst_sum() # of type dgl.NData out = score / score_sum # edge_div_dst, ret dgl.EData return out.data """ gidx = self.gidx score_max = _gspmm(gidx, "copy_rhs", "max", None, score)[0] score = mx.nd.exp(_gsddmm(gidx, "sub", score, score_max, "e", "v")) score_sum = _gspmm(gidx, "copy_rhs", "sum", None, score)[0] out = _gsddmm(gidx, "div", score, score_sum, "e", "v") self.save_for_backward(out) return out def backward(self, grad_out): """Backward function. Pseudo-code: .. code:: python g, out = ctx.backward_cache grad_out = dgl.EData(g, grad_out) out = dgl.EData(g, out) sds = out * grad_out # type dgl.EData sds_sum = sds.dst_sum() # type dgl.NData grad_score = sds - sds * sds_sum # multiple expressions """ (out,) = self.saved_tensors gidx = self.gidx sds = out * grad_out accum = gspmm(gidx, "copy_rhs", "sum", None, sds) grad_score = sds - gsddmm(gidx, "mul", out, accum, "e", "v") self.save_tensors = None return grad_score def edge_softmax(gidx, logits, eids=ALL, norm_by="dst"): softmax_op = EdgeSoftmax(gidx, eids, norm_by) return softmax_op(logits) class SegmentReduce(mx.autograd.Function): def __init__(self, op, offsets): super(SegmentReduce, self).__init__() self.op = op self.offsets = offsets def forward(self, x): y, arg = _segment_reduce(self.op, x, self.offsets) self.save_for_backward(arg) return y def backward(self, dy): (arg,) = self.saved_tensors offsets = self.offsets m = offsets[-1].asscalar() if self.op == "sum": offsets_np = asnumpy(offsets[1:]) indices_np = np.zeros((m + 1,), dtype=offsets_np.dtype) np.add.at(indices_np, offsets_np, np.ones_like(offsets_np)) indices_np = np.cumsum(indices_np, -1)[:-1] indices = zerocopy_from_numpy(indices_np) dx = dy[indices] else: dx = _bwd_segment_cmp(dy, arg, m) return dx def segment_reduce(op, x, offsets): segment_reduce_op = SegmentReduce(op, offsets) return segment_reduce_op(x) class ScatterAdd(mx.autograd.Function): def __init__(self, idx, m): super(ScatterAdd, self).__init__() self.idx = idx self.m = m def forward(self, x): y = _scatter_add(x, self.idx, self.m) return y def backward(self, dy): return dy[self.idx] def scatter_add(x, idx, m): scatter_add_op = ScatterAdd(idx, m) return scatter_add_op(x) class CSRMM(mx.autograd.Function): def __init__(self, gidxA, gidxB, num_vtypes): super().__init__() self.gidxA = gidxA self.gidxB = gidxB self.num_vtypes = num_vtypes def forward(self, A_weights, B_weights): gidxC, C_weights = _csrmm( self.gidxA, A_weights, self.gidxB, B_weights, self.num_vtypes ) ( nrows, ncols, C_indptr, C_indices, C_eids, ) = gidxC.adjacency_matrix_tensors(0, False, "csr") # Note: the returned C_indptr, C_indices and C_eids tensors MUST be the same # as the underlying tensors of the created graph gidxC. self.backward_cache = gidxC self.save_for_backward(A_weights, B_weights) nrows = nd.array([nrows], dtype="int64") ncols = nd.array([ncols], dtype="int64") return nrows, ncols, C_indptr, C_indices, C_eids, C_weights def backward( self, dnrows, dncols, dC_indptr, dC_indices, dC_eids, dC_weights ): # Only the last argument is meaningful. gidxC = self.backward_cache A_weights, B_weights = self.saved_tensors dgidxA, dA_weights = _csrmm( gidxC, dC_weights, self.gidxB.reverse(), B_weights, self.gidxA.number_of_ntypes(), ) dgidxB, dB_weights = _csrmm( self.gidxA.reverse(), A_weights, gidxC, dC_weights, self.gidxB.number_of_ntypes(), ) dA_weights = _csrmask(dgidxA, dA_weights, self.gidxA) dB_weights = _csrmask(dgidxB, dB_weights, self.gidxB) return dA_weights, dB_weights def csrmm(gidxA, A_weights, gidxB, B_weights, num_vtypes): op = CSRMM(gidxA, gidxB, num_vtypes) nrows, ncols, C_indptr, C_indices, C_eids, C_weights = op( A_weights, B_weights ) gidxC = create_unitgraph_from_csr( num_vtypes, nrows.asscalar(), ncols.asscalar(), C_indptr, C_indices, C_eids, ["coo", "csr", "csc"], ) return gidxC, C_weights class CSRSum(mx.autograd.Function): def __init__(self, gidxs): super().__init__() self.gidxs = gidxs def forward(self, *weights): gidxC, C_weights = _csrsum(self.gidxs, weights) ( nrows, ncols, C_indptr, C_indices, C_eids, ) = gidxC.adjacency_matrix_tensors(0, False, "csr") # Note: the returned C_indptr, C_indices and C_eids tensors MUST be the same # as the underlying tensors of the created graph gidxC. self.backward_cache = gidxC nrows = nd.array([nrows], dtype="int64") ncols = nd.array([ncols], dtype="int64") return nrows, ncols, C_indptr, C_indices, C_eids, C_weights def backward( self, dnrows, dncols, dC_indptr, dC_indices, dC_eids, dC_weights ): # Only the last argument is meaningful. gidxC = self.backward_cache return tuple(csrmask(gidxC, dC_weights, gidx) for gidx in self.gidxs) def csrsum(gidxs, weights): op = CSRSum(gidxs) nrows, ncols, C_indptr, C_indices, C_eids, C_weights = op(*weights) num_vtypes = gidxs[0].number_of_ntypes() gidxC = create_unitgraph_from_csr( num_vtypes, nrows.asscalar(), ncols.asscalar(), C_indptr, C_indices, C_eids, ["coo", "csr", "csc"], ) return gidxC, C_weights class CSRMask(mx.autograd.Function): def __init__(self, gidxA, gidxB): super().__init__() self.gidxA = gidxA self.gidxB = gidxB def forward(self, A_weights): return _csrmask(self.gidxA, A_weights, self.gidxB) def backward(self, dB_weights): return _csrmask(self.gidxB, dB_weights, self.gidxA) def csrmask(gidxA, A_weights, gidxB): op = CSRMask(gidxA, gidxB) return op(A_weights) ================================================ FILE: python/dgl/backend/mxnet/sparse_optim.py ================================================ """Sparse optimizer is not supported for mxnet""" ================================================ FILE: python/dgl/backend/mxnet/tensor.py ================================================ from __future__ import absolute_import import builtins import numbers import os import mxnet as mx import mxnet.ndarray as nd import numpy as np from ... import ndarray as dglnd from ...function.base import TargetCode from ...utils import version if version.parse(mx.__version__) < version.parse("1.6.0"): raise RuntimeError("DGL requires MXNet >= 1.6") # After MXNet 1.5, empty tensors aren't supprted by default. # After we turn on the numpy compatible flag, MXNet supports empty NDArray. mx.set_np_shape(bool(os.environ.get("DGL_MXNET_SET_NP_SHAPE", True))) def data_type_dict(): return { "float16": np.float16, "float32": np.float32, "float64": np.float64, "uint8": np.uint8, "int8": np.int8, "int16": np.int16, "int32": np.int32, "int64": np.int64, "bool": np.bool_, } # mxnet does not support bool def cpu(): return mx.cpu() def tensor(data, dtype=None): if dtype == np.bool_: # mxnet doesn't support bool dtype = np.int32 if isinstance(data, nd.NDArray): if dtype is None or data.dtype == dtype: return data else: return data.astype(dtype) else: if isinstance(data, numbers.Number): data = [data] if dtype is None: if isinstance(data, np.ndarray): dtype = np.int32 if data.dtype == np.bool_ else data.dtype elif len(data) == 0: dtype = np.int64 else: dtype = ( np.int64 if isinstance(data[0], numbers.Integral) else np.float32 ) return nd.array(data, dtype=dtype) def as_scalar(data): if data.size != 1: raise ValueError("The current array is not a scalar") if data.shape != (1,): data = data.expand_dims(axis=0) return data.asscalar() def get_preferred_sparse_format(): """Get the preferred sparse matrix format supported by the backend. Different backends have their preferred backend. This info is useful when constructing a sparse matrix. """ return "csr" def sparse_matrix(data, index, shape, force_format=False): fmt = index[0] if fmt == "coo": if force_format: raise TypeError( "MXNet backend only supports CSR format," " but COO format is forced." ) coord = index[1] # generate convert idx # FIXME: cannot use int64 tmp_data = nd.arange( len(coord[0]), dtype=data.dtype, ctx=coord[0].context ) tmp_spmat = nd.sparse.csr_matrix( (tmp_data, (coord[0], coord[1])), tuple(shape), ctx=data.context ) convert_idx = nd.cast(tmp_spmat.data, dtype="int64") # shuffle the data data = data[convert_idx] spmat = nd.sparse.csr_matrix( (data, tmp_spmat.indices, tmp_spmat.indptr), tuple(shape), ctx=data.context, ) return spmat, convert_idx elif fmt == "csr": indices = index[1] indptr = index[2] spmat = nd.sparse.csr_matrix( (data, indices, indptr), tuple(shape), ctx=data.context ) # No conversion is required. return spmat, None else: raise TypeError("Invalid format: %s." % fmt) def sparse_matrix_indices(spmat): return ("csr", spmat.indices, spmat.indptr) def is_tensor(obj): return isinstance(obj, nd.NDArray) def shape(input): # NOTE: the input cannot be a symbol return input.shape def dtype(input): # NOTE: the input cannot be a symbol return input.dtype def ndim(input): return input.ndim def context(input): return input.context def device_type(ctx): return ctx.device_type def device_id(ctx): return ctx.device_id def to_backend_ctx(dglctx): dev_type = dglctx.device_type if dev_type == 1: return mx.cpu() elif dev_type == 2: return mx.gpu(dglctx.device_id) else: raise ValueError("Unsupported DGL device context:", dglctx) def astype(input, ty): if ty == np.bool_: ty = np.int32 return input.astype(ty) def asnumpy(input): return input.asnumpy() def copy_to(input, ctx, **kwargs): return input.as_in_context(ctx) def is_pinned(input): return input.context == mx.cpu_pinned() def sum(input, dim, keepdims=False): if len(input) == 0: return nd.array([0.0], dtype=input.dtype, ctx=input.context) return nd.sum(input, axis=dim, keepdims=keepdims) def floor_div(in1, in2): return in1 / in2 def reduce_sum(input): return input.sum() def cumsum(input, dim): return nd.cumsum(input, axis=dim) def mean(input, dim): return nd.mean(input, axis=dim) def reduce_mean(input): return input.mean() def max(input, dim): return nd.max(input, axis=dim) def reduce_max(input): return input.max() def min(input, dim): return nd.min(input, axis=dim) def reduce_min(input): return input.min() def topk(input, k, dim, descending=True): return nd.topk( input, axis=dim, k=k, ret_typ="value", is_ascend=not descending ) def argtopk(input, k, dim, descending=True): idx = nd.argsort(input, dim, is_ascend=not descending) return nd.slice_axis(input, dim, 0, k) def argsort(input, dim, descending): idx = nd.argsort(input, dim, is_ascend=not descending) idx = nd.cast(idx, dtype="int64") return idx def exp(input): return nd.exp(input) def inverse(input): return nd.linalg_inverse(input) def sqrt(input): return nd.sqrt(input) def softmax(input, dim=-1): return nd.softmax(input, axis=dim) def cat(seq, dim): return nd.concat(*seq, dim=dim) def stack(seq, dim): return nd.stack(*seq, axis=dim) def split(x, sizes_or_sections, dim): if isinstance(sizes_or_sections, list) and len(sizes_or_sections) == 1: assert len(x) == sizes_or_sections[0] return [x] if isinstance(sizes_or_sections, (np.ndarray, list)): sizes_or_sections1 = tuple(np.cumsum(sizes_or_sections)[:-1]) return nd.split_v2(x, sizes_or_sections1, axis=dim) def repeat(input, repeats, dim): if isinstance(repeats, nd.NDArray): return nd.array( np.repeat(input.asnumpy(), repeats.asnumpy(), axis=dim), ctx=input.context, dtype=input.dtype, ) else: return nd.repeat(input, repeats, axis=dim) def gather_row(data, row_index): # MXNet workaround for empty row index if len(row_index) == 0: if data.shape[0] == 0: return data else: return data[0:0] if isinstance(row_index, nd.NDArray): return nd.take(data, row_index) else: return data[ row_index, ] def slice_axis(data, axis, begin, end): dim = data.shape[axis] if begin < 0: begin += dim if end <= 0: end += dim return nd.slice_axis(data, axis, begin, end) def take(data, indices, dim): return nd.take(data, indices, dim) def narrow_row(data, start, stop): return data[start:stop] def index_add_inplace(data, row_idx, value): raise NotImplementedError("MXNet doesn't support inplace index_add") def scatter_row(data, row_index, value): return mx.nd.contrib.index_copy(data, row_index, value) def scatter_row_inplace(data, row_index, value): data[row_index] = value def squeeze(input, dim): return nd.squeeze(input, axis=dim) def unsqueeze(input, dim): return nd.expand_dims(input, axis=dim) def reshape(input, shape): # NOTE: the input cannot be a symbol return nd.reshape(input, shape) def swapaxes(input, axis1, axis2): return nd.swapaxes(input, axis1, axis2) def empty(shape, dtype, ctx): return nd.empty(shape, dtype=dtype, ctx=ctx) def zeros(shape, dtype, ctx): return nd.zeros(shape, dtype=dtype, ctx=ctx) def zeros_like(input): return nd.zeros_like(input) def ones(shape, dtype, ctx): return nd.ones(shape, dtype=dtype, ctx=ctx) def uniform(shape, dtype, ctx, low, high): return nd.random.uniform(low, high, ctx=ctx, dtype=dtype, shape=shape) def randint(shape, dtype, ctx, low, high): return nd.random.randint(low, high, ctx=ctx, dtype=dtype, shape=shape) def pad_packed_tensor(input, lengths, value, l_min=None): old_shape = input.shape if isinstance(lengths, nd.NDArray): lengths = list(lengths.asnumpy()) max_len = builtins.max(lengths) if l_min is not None: max_len = builtins.max(max_len, l_min) batch_size = len(lengths) ctx = input.context dtype = input.dtype x = nd.full( (batch_size * max_len, *old_shape[1:]), value, ctx=ctx, dtype=dtype ) index = [] for i, l in enumerate(lengths): index.extend(range(i * max_len, i * max_len + l)) index = nd.array(index, ctx=ctx) return scatter_row(x, index, input).reshape( batch_size, max_len, *old_shape[1:] ) def pack_padded_tensor(input, lengths): batch_size, max_len = input.shape[:2] ctx = input.context index = [] for i, l in enumerate(lengths): index.extend(range(i * max_len, i * max_len + l)) index = nd.array(index, ctx=ctx) return gather_row(input.reshape(batch_size * max_len, -1), index) def boolean_mask(input, mask): return mx.contrib.nd.boolean_mask(input, mask) def equal(x, y): return x == y def allclose(x, y, rtol=1e-4, atol=1e-4): return np.allclose(x.asnumpy(), y.asnumpy(), rtol=rtol, atol=atol) def logical_not(input): return nd.logical_not(input) def logical_and(input1, input2): return nd.logical_and(input1, input2) def clone(input): return input.copy() def clamp(data, min_val, max_val): return nd.clip(data, min_val, max_val) def replace_inf_with_zero(x): return nd.where(nd.abs(x) == np.inf, nd.zeros_like(x), x) def count_nonzero(input): # TODO: fallback to numpy is unfortunate tmp = input.asnumpy() return np.count_nonzero(tmp) def unique(input, return_inverse=False, return_counts=False): # TODO: fallback to numpy is unfortunate tmp = input.asnumpy() if return_inverse and return_counts: tmp, inv, count = np.unique( tmp, return_inverse=True, return_counts=True ) tmp = nd.array(tmp, ctx=input.context, dtype=input.dtype) inv = nd.array(inv, ctx=input.context) count = nd.array(count, ctx=input.context) return tmp, inv, count elif return_inverse or return_counts: tmp, tmp2 = np.unique( tmp, return_inverse=return_inverse, return_counts=return_counts ) tmp = nd.array(tmp, ctx=input.context, dtype=input.dtype) tmp2 = nd.array(tmp2, ctx=input.context) return tmp, tmp2 else: tmp = np.unique(tmp) return nd.array(tmp, ctx=input.context, dtype=input.dtype) def full_1d(length, fill_value, dtype, ctx): return nd.full((length,), fill_value, dtype=dtype, ctx=ctx) def nonzero_1d(input): # TODO: fallback to numpy is unfortunate tmp = input.asnumpy() tmp = np.nonzero(tmp)[0] r = nd.array(tmp, ctx=input.context, dtype=tmp.dtype) return r def sort_1d(input): # TODO: this isn't an ideal implementation. val = nd.sort(input, axis=None, is_ascend=True) idx = nd.argsort(input, is_ascend=True) idx = nd.cast(idx, dtype="int64") return val, idx def arange(start, stop, dtype=np.int64, ctx=None): if start >= stop: return nd.array([], dtype=dtype, ctx=ctx) else: return nd.arange(start, stop, dtype=dtype, ctx=ctx) def rand_shuffle(arr): return mx.nd.random.shuffle(arr) def zerocopy_to_dlpack(arr): return arr.to_dlpack_for_read() def zerocopy_from_dlpack(dlpack_arr): return nd.from_dlpack(dlpack_arr) def zerocopy_to_numpy(arr): # NOTE: not zerocopy return arr.asnumpy() def zerocopy_from_numpy(np_data): np_data = np.asarray(np_data, order="C") return mx.nd.from_numpy(np_data, zero_copy=True) def zerocopy_to_dgl_ndarray(arr): arr.to_dlpack_for_read() return dglnd.from_dlpack(arr.to_dlpack_for_read()) def zerocopy_to_dgl_ndarray_for_write(arr): return dglnd.from_dlpack(arr.to_dlpack_for_write()) def zerocopy_from_dgl_ndarray(arr): return nd.from_dlpack(arr.to_dlpack()) def sync(): """Synchronize computation. In DL frameworks such as MXNet and TensorFlow, the computation in operators are done asynchronously. This is to synchronize computation and makes sure that all computation is complete after this function call. """ mx.nd.waitall() def attach_grad(tensor): tensor.attach_grad() return tensor def backward(x, head_gradient=None): x.backward(head_gradient) def grad(x): return x.grad def is_no_grad(x): return (x != 0).sum() == 0 def is_recording(): return mx.autograd.is_recording() record_grad = mx.autograd.record class no_grad(object): def __init__(self): pass def __enter__(self): pass def __exit__(self, exc_type, exc_value, exc_traceback): pass ================================================ FILE: python/dgl/backend/pytorch/__init__.py ================================================ from .sparse import * from .tensor import * ================================================ FILE: python/dgl/backend/pytorch/sparse.py ================================================ import torch as th from ..._sparse_ops import ( _bwd_segment_cmp, _csrmask, _csrmm, _csrsum, _edge_softmax_backward, _edge_softmax_forward, _gather_mm, _gather_mm_scatter, _gsddmm, _gsddmm_hetero, _gspmm, _gspmm_hetero, _scatter_add, _segment_mm, _segment_mm_backward_B, _segment_reduce, _update_grad_minmax_hetero, ) from ...base import ALL, is_all from ...heterograph_index import create_unitgraph_from_csr __all__ = [ "gspmm", "gsddmm", "gspmm_hetero", "gsddmm_hetero", "edge_softmax", "edge_softmax_hetero", "segment_reduce", "scatter_add", "csrmm", "csrsum", "csrmask", "gather_mm", "segment_mm", ] def _reduce_grad(grad, shape): """Reduce gradient on the broadcast dimension If there is broadcast in forward pass, gradients need to be reduced on broadcast dimension. This function checks the input tensor shape and gradient shape and perform the reduction. Parameters ---------- grad: Tensor Gradient tensor shape: tuple Shape of input tensor Returns ------- Tensor """ grad_shape = grad.shape[1:] in_shape = shape[1:] if in_shape == grad_shape: # no need to reduce return grad num_to_squeeze = len(grad_shape) - len(in_shape) # pad inshape in_shape = (1,) * num_to_squeeze + in_shape reduce_idx = th.nonzero( th.tensor(grad_shape) - th.tensor(in_shape), as_tuple=False ) reduce_idx += 1 # skip batch dim if len(reduce_idx) > 0: grad = grad.sum(dim=tuple(reduce_idx), keepdim=True) return grad.view(-1, *shape[1:]) def _need_reduce_last_dim(ufeat, efeat): """Indicates whether to reduce the last dimension on edges in the backward pass of spmm, if so, use dot instead of mul.""" if ufeat is None or efeat is None: return False ushp = ufeat.shape eshp = efeat.shape return ushp[1:-1] == eshp[1:-1] and eshp[-1] == 1 and ushp[-1] > 1 def _expand(x, shape): return x.expand(-1, *shape) def spmm_cache_X(binary_op, reduce_op, req_grad_X, req_grad_Y): """Rules to identify whether to cache X in SpMM forward stage.""" if binary_op != "copy_lhs" and req_grad_Y: if reduce_op == "sum": return True else: if binary_op == "mul": return True return False def spmm_cache_Y(binary_op, reduce_op, req_grad_X, req_grad_Y): """Rules to identify whether to cache Y in SpMM forward stage.""" if binary_op != "copy_rhs" and req_grad_X: if reduce_op == "sum": if binary_op in ["mul", "add"]: return True else: if binary_op == "mul": return True return False def spmm_cache_argX(binary_op, reduce_op, req_grad_X, req_grad_Y): """Rules to identify whether to cache argX in SpMM forward stage.""" if req_grad_X or req_grad_Y: if reduce_op in ["min", "max"]: return True return False def spmm_cache_argY(binary_op, reduce_op, req_grad_X, req_grad_Y): """Rules to identify whether to cache argY in SpMM forward stage.""" if req_grad_X or req_grad_Y: if reduce_op in ["min", "max"]: return True return False class empty_context: """Empty context that does nothing""" def __init__(self, *args, **kargs): return def __enter__(self, *args, **kargs): return self def __exit__(self, *args, **kargs): return # Disable CUDA autocast since we have casted args manually, # and do it only in a nested autocast context. def _disable_autocast_if_enabled(): if th.is_autocast_enabled(): return th.cuda.amp.autocast(enabled=False) else: return empty_context() def _cast_if_autocast_enabled(*args): if not th.is_autocast_enabled(): return args else: return th.cuda.amp.autocast_mode._cast( args, th.get_autocast_gpu_dtype() ) class GSpMM(th.autograd.Function): @staticmethod def forward(ctx, gidx, op, reduce_op, X, Y): out, (argX, argY) = _gspmm(gidx, op, reduce_op, X, Y) reduce_last = _need_reduce_last_dim(X, Y) X_shape = X.shape if X is not None else None Y_shape = Y.shape if Y is not None else None dtype = X.dtype if X is not None else Y.dtype device = X.device if X is not None else Y.device ctx.backward_cache = ( gidx, op, reduce_op, X_shape, Y_shape, dtype, device, reduce_last, ) req_grad_X = X.requires_grad if X is not None else False req_grad_Y = Y.requires_grad if Y is not None else False if not spmm_cache_X(op, reduce_op, req_grad_X, req_grad_Y): X = None if not spmm_cache_Y(op, reduce_op, req_grad_X, req_grad_Y): Y = None if not spmm_cache_argX(op, reduce_op, req_grad_X, req_grad_Y): argX = None if not spmm_cache_argY(op, reduce_op, req_grad_X, req_grad_Y): argY = None ctx.save_for_backward(X, Y, argX, argY) return out @staticmethod def backward(ctx, dZ): ( gidx, op, reduce_op, X_shape, Y_shape, dtype, device, reduce_last, ) = ctx.backward_cache X, Y, argX, argY = ctx.saved_tensors if op != "copy_rhs" and ctx.needs_input_grad[3]: g_rev = gidx.reverse() if reduce_op == "sum": if op == "mul": dX = gspmm(g_rev, "mul", "sum", dZ, Y) elif op == "add": dX = gspmm(g_rev, "copy_lhs", "sum", dZ, Y) elif op == "copy_lhs": dX = gspmm(g_rev, "copy_lhs", "sum", dZ, None) else: # max/min dX = th.zeros( (X_shape[0],) + dZ.shape[1:], dtype=dtype, device=device ) if op == "mul": grad = _expand(Y, dZ.shape[1:]).gather(0, argY.long()) * dZ dX.scatter_add_(0, argX.long(), grad) elif op in ["add", "copy_lhs"]: dX.scatter_add_(0, argX.long(), dZ) dX = _reduce_grad(dX, X_shape) else: # X has not gradient dX = None if op != "copy_lhs" and ctx.needs_input_grad[4]: if reduce_op == "sum": if op == "mul" and reduce_last: dY = gsddmm(gidx, "dot", X, dZ) elif op == "mul": dY = gsddmm(gidx, "mul", X, dZ) elif op in ["add", "copy_rhs"]: dY = gsddmm(gidx, "copy_rhs", X, dZ) else: # max/min dY = th.zeros( (Y_shape[0],) + dZ.shape[1:], dtype=dtype, device=device ) if op == "mul": grad = _expand(X, dZ.shape[1:]).gather(0, argX.long()) * dZ dY.scatter_add_(0, argY.long(), grad) elif op in ["add", "copy_rhs"]: dY.scatter_add_(0, argY.long(), dZ) dY = _reduce_grad(dY, Y_shape) else: # Y has no gradient dY = None return None, None, None, dX, dY class GSpMM_hetero(th.autograd.Function): @staticmethod def forward( ctx, gidx, op, reduce_op, X_len, *feats ): # feats = lhs_data + rhs_data out, (argX, argY, argX_ntype, argY_etype) = _gspmm_hetero( gidx, op, reduce_op, X_len, feats ) X, Y = feats[:X_len], feats[X_len:] # TODO (Israt): check target to decide src_id/dst_id? src_id, dst_id = gidx.metagraph.find_edge(0) reduce_last = _need_reduce_last_dim(X[src_id], Y[dst_id]) X_shape = tuple( [X[i].shape if X[i] is not None else None for i in range(X_len)] ) Y_shape = tuple( [Y[i].shape if Y[i] is not None else None for i in range(len(Y))] ) dtype = X[src_id].dtype if X[src_id] is not None else Y[dst_id].dtype device = X[src_id].device if X[src_id] is not None else Y[dst_id].device ctx.backward_cache = ( gidx, op, reduce_op, X_shape, Y_shape, dtype, device, reduce_last, X_len, ) req_grad_X = tuple( [ X[i].requires_grad if X[i] is not None else False for i in range(X_len) ] ) req_grad_Y = tuple( [ Y[i].requires_grad if Y[i] is not None else False for i in range(len(Y)) ] ) # checking the first relation to decide for all the relations if not spmm_cache_argX( op, reduce_op, req_grad_X[src_id], req_grad_Y[dst_id] ): argX = tuple([None] * len(X)) if not spmm_cache_argY( op, reduce_op, req_grad_X[src_id], req_grad_Y[dst_id] ): argY = tuple([None] * len(X)) ctx.save_for_backward(*feats, *argX, *argX_ntype, *argY, *argY_etype) return out @staticmethod def backward(ctx, *dZ): ( gidx, op, reduce_op, X_shape, Y_shape, dtype, device, reduce_last, X_len, ) = ctx.backward_cache num_ntypes = gidx.number_of_ntypes() feats = ctx.saved_tensors[: -(4 * num_ntypes)] argX = ctx.saved_tensors[-(4 * num_ntypes) : -(3 * num_ntypes)] argX_ntype = ctx.saved_tensors[-(3 * num_ntypes) : -(2 * num_ntypes)] argY = ctx.saved_tensors[-(2 * num_ntypes) : -num_ntypes] argY_etype = ctx.saved_tensors[-num_ntypes:] X, Y = feats[:X_len], feats[X_len:] if op != "copy_rhs" and any([x is not None for x in X]): g_rev = gidx.reverse() if reduce_op == "sum": if op == "mul": dX = gspmm_hetero( g_rev, "mul", "sum", len(X), *tuple(dZ + Y) ) elif op == "add": dX = gspmm_hetero( g_rev, "copy_lhs", "sum", len(X), *tuple(dZ + Y) ) elif op == "copy_lhs": tpl_None = tuple([None] * len(Y)) dX = gspmm_hetero( g_rev, "copy_lhs", "sum", len(X), *tuple(dZ + tpl_None) ) else: # max/min # Assuming that the features are of the same dimension (enforced by the forward function) src_id, dst_id = gidx.metagraph.find_edge(0) dX = tuple( [ th.zeros( (X_shape[i][0],) + dZ[dst_id].shape[1:], dtype=dtype, device=device, ) if X[i] is not None else None for i in range(len(X)) ] ) if op == "mul": grad = _expand(Y, dZ.shape[1:]).gather(0, argY.long()) * dZ dX.scatter_add_(0, argX.long(), grad) elif op in ["add", "copy_lhs"]: dX = _update_grad_minmax_hetero( g_rev, op, dZ, argX, argX_ntype, dX ) dX = tuple( [ _reduce_grad(dX[i], X_shape[i]) if X[i] is not None else None for i in range(len(X)) ] ) else: # X has not gradient dX = tuple([None] * len(X)) if op != "copy_lhs" and any([y is not None for y in Y]): # TODO(Israt): implement other combinations of reduce functions if reduce_op == "sum": tpl_dZ = tuple( [ dZ[i] if dZ[i] is not None else None for i in range(len(dZ)) ] ) tpl_X_dZ = tuple(X + tpl_dZ) if op == "mul" and reduce_last: dY = gsddmm_hetero(gidx, "dot", X_len, "u", "v", *tpl_X_dZ) elif op == "mul": dY = gsddmm_hetero(gidx, "mul", X_len, "u", "v", *tpl_X_dZ) elif op in ["add", "copy_rhs"]: dY = gsddmm_hetero( gidx, "copy_rhs", X_len, "u", "v", *tpl_X_dZ ) else: # max/min src_id, dst_id = gidx.metagraph.find_edge(0) dY = tuple( [ th.zeros( (Y_shape[i][0],) + dZ[dst_id].shape[1:], dtype=dtype, device=device, ) if Y[i] is not None else None for i in range(len(Y)) ] ) if op == "mul": grad = _expand(X, dZ.shape[1:]).gather(0, argX.long()) * dZ dY.scatter_add_(0, argY.long(), grad) elif op in ["add", "copy_rhs"]: dY = _update_grad_minmax_hetero( gidx.reverse(), op, dZ, argY, argY_etype, dY ) dY = tuple( [ _reduce_grad(dY[i], Y_shape[i]) if dY[i] is not None else None for i in range(len(dY)) ] ) else: # Y has no gradient dY = tuple([None] * len(Y)) return (None, None, None, None) + dX + dY def sddmm_cache_X(op, req_grad_X, req_grad_Y): """Rules to identify whether to cache X in SDDMM forward stage.""" if op in ["mul", "dot"] and req_grad_Y: return True return False def sddmm_cache_Y(op, req_grad_X, req_grad_Y): """Rules to identify whether to cache Y in SDDMM forward stage.""" if op in ["mul", "dot"] and req_grad_X: return True return False class GSDDMM(th.autograd.Function): @staticmethod def forward(ctx, gidx, op, X, Y, lhs_target, rhs_target): out = _gsddmm(gidx, op, X, Y, lhs_target, rhs_target) X_shape = X.shape if X is not None else None Y_shape = Y.shape if Y is not None else None ctx.backward_cache = gidx, op, lhs_target, rhs_target, X_shape, Y_shape req_grad_X = X.requires_grad if X is not None else False req_grad_Y = Y.requires_grad if Y is not None else False if not sddmm_cache_X(op, req_grad_X, req_grad_Y): X = None if not sddmm_cache_Y(op, req_grad_X, req_grad_Y): Y = None ctx.save_for_backward(X, Y) return out @staticmethod def backward(ctx, dZ): gidx, op, lhs_target, rhs_target, X_shape, Y_shape = ctx.backward_cache X, Y = ctx.saved_tensors if op != "copy_rhs" and ctx.needs_input_grad[2]: if lhs_target in ["u", "v"]: _gidx = gidx if lhs_target == "v" else gidx.reverse() if op in ["add", "copy_lhs"]: dX = gspmm(_gidx, "copy_rhs", "sum", None, dZ) else: # mul, dot if rhs_target == lhs_target: dX = gspmm(_gidx, "copy_rhs", "sum", None, dZ) * Y elif rhs_target == "e": dX = gspmm(_gidx, "copy_rhs", "sum", None, dZ * Y) else: # rhs_target = !lhs_target dX = gspmm(_gidx, "mul", "sum", Y, dZ) else: # lhs_target == 'e' if op in ["add", "copy_lhs"]: dX = dZ else: # mul, dot dX = gsddmm(gidx, "mul", dZ, Y, "e", rhs_target) dX = _reduce_grad(dX, X_shape) else: dX = None if op != "copy_lhs" and ctx.needs_input_grad[3]: if rhs_target in ["u", "v"]: _gidx = gidx if rhs_target == "v" else gidx.reverse() if op in ["add", "copy_rhs"]: dY = gspmm(_gidx, "copy_rhs", "sum", None, dZ) else: # mul, dot if lhs_target == rhs_target: dY = gspmm(_gidx, "copy_rhs", "sum", None, dZ) * X elif lhs_target == "e": dY = gspmm(_gidx, "copy_rhs", "sum", None, dZ * X) else: # rhs_target = !lhs_target dY = gspmm(_gidx, "mul", "sum", X, dZ) else: if op in ["add", "copy_rhs"]: dY = dZ else: # mul, dot dY = gsddmm(gidx, "mul", dZ, X, "e", lhs_target) dY = _reduce_grad(dY, Y_shape) else: dY = None return None, None, dX, dY, None, None class GSDDMM_hetero(th.autograd.Function): @staticmethod def forward( ctx, gidx, op, X_len, lhs_target, rhs_target, *feats ): # feats = X+Y out = _gsddmm_hetero(gidx, op, X_len, lhs_target, rhs_target, feats) X, Y = feats[:X_len], feats[X_len:] X_shape = tuple( [X[i].shape if X[i] is not None else None for i in range(len(X))] ) Y_shape = tuple( [Y[i].shape if Y[i] is not None else None for i in range(len(Y))] ) ctx.backward_cache = ( gidx, op, lhs_target, rhs_target, X_shape, Y_shape, X_len, ) req_grad_X = tuple( [ X[i].requires_grad if X[i] is not None else False for i in range(len(X)) ] ) req_grad_Y = tuple( [ Y[i].requires_grad if Y[i] is not None else False for i in range(len(Y)) ] ) ctx.save_for_backward(*feats) return out @staticmethod # TODO(Israt): Implement the complete backward operator def backward(ctx, *dZ): ( gidx, op, lhs_target, rhs_target, X_shape, Y_shape, X_len, ) = ctx.backward_cache feats = ctx.saved_tensors X, Y = feats[:X_len], feats[X_len:] if op != "copy_rhs" and any([x is not None for x in X]): if lhs_target in ["u", "v"]: _gidx = gidx if lhs_target == "v" else gidx.reverse() tpl_of_None = tuple([None] * len(X)) if op in ["add", "copy_lhs"]: dX = gspmm_hetero( _gidx, "copy_rhs", "sum", len(X), *(tuple(tpl_of_None + dZ)) ) else: # mul, dot if rhs_target == lhs_target: dX = ( gspmm_hetero( _gidx, "copy_rhs", "sum", len(X), *(tuple(tpl_of_None + dZ)) ) * Y ) elif rhs_target == "e": dZ_mul_Y = tuple( [ dZ[i] * Y[i] if dZ[i] is not None else None for i in range(len(Y)) ] ) dX = gspmm_hetero( _gidx, "copy_rhs", "sum", len(X), *(tuple(tpl_of_None + dZ_mul_Y)) ) else: # rhs_target = !lhs_target dX = gspmm_hetero( _gidx, "mul", "sum", len(X), *tuple(Y + dZ) ) else: # lhs_target == 'e' if op in ["add", "copy_lhs"]: dX = dZ else: # mul, dot num_etype = gidx.number_of_etypes() dX = gsddmm_hetero( gidx, "mul", num_etype, "e", rhs_target, *tuple(dZ + Y) ) dX = tuple( [ _reduce_grad(dX[i], X_shape[i]) if X[i] is not None else None for i in range(len(X)) ] ) else: dX = tuple([None] * len(X)) if op != "copy_lhs" and any([y is not None for y in Y]): if rhs_target in ["u", "v"]: _gidx = gidx if rhs_target == "v" else gidx.reverse() tpl_of_None = tuple([None] * len(X)) if op in ["add", "copy_rhs"]: dY = gspmm_hetero( _gidx, "copy_rhs", "sum", len(X), *(tuple(tpl_of_None + dZ)) ) else: # mul, dot if lhs_target == rhs_target: dY = ( gspmm_hetero( _gidx, "copy_rhs", "sum", len(X), *(tuple(tpl_of_None + dZ)) ) * X ) elif lhs_target == "e": dZ_mul_X = tuple( [ dZ[i] * X[i] if dZ[i] is not None else None for i in range(len(X)) ] ) dY = gspmm_hetero( _gidx, "copy_rhs", "sum", len(X), *(tuple(tpl_of_None + dZ_mul_X)) ) else: # rhs_target = !lhs_target dY = gspmm_hetero( _gidx, "mul", "sum", len(X), *tuple(X + dZ) ) else: if op in ["add", "copy_rhs"]: dY = tuple( [ dZ[i] if dZ[i] is not None else None for i in range(len(dZ)) ] ) else: # mul, dot num_etype = gidx.number_of_etypes() dY = gsddmm_hetero( gidx, "mul", num_etype, "e", lhs_target, *tuple(dZ + X) ) dY = tuple( [ _reduce_grad(dY[i], Y_shape[i]) if Y[i] is not None else None for i in range(len(Y)) ] ) else: dY = tuple([None] * len(Y)) return (None, None, None, None, None) + dX + dY class EdgeSoftmax(th.autograd.Function): @staticmethod def forward(ctx, gidx, score, eids, norm_by): """Forward function. Pseudo-code: .. code:: python score = dgl.EData(g, score) score_max = score.dst_max() # of type dgl.NData score = score - score_max # edge_sub_dst, ret dgl.EData score_sum = score.dst_sum() # of type dgl.NData out = score / score_sum # edge_div_dst, ret dgl.EData return out.data """ # remember to save the graph to backward cache before making it # a local variable if not is_all(eids): gidx = gidx.edge_subgraph([eids], True).graph if norm_by == "src": gidx = gidx.reverse() # Note: Now _edge_softmax_forward op only supports CPU # TODO(Zhejiang): We will support GPU in the future if score.is_cuda: score_max = _gspmm(gidx, "copy_rhs", "max", None, score)[0] score = th.exp(_gsddmm(gidx, "sub", score, score_max, "e", "v")) score_sum = _gspmm(gidx, "copy_rhs", "sum", None, score)[0] out = _gsddmm(gidx, "div", score, score_sum, "e", "v") else: out = _edge_softmax_forward(gidx, score, "copy_rhs") ctx.backward_cache = gidx ctx.save_for_backward(out) return out @staticmethod def backward(ctx, grad_out): """Backward function. Pseudo-code: .. code:: python g, out = ctx.backward_cache grad_out = dgl.EData(g, grad_out) out = dgl.EData(g, out) sds = out * grad_out # type dgl.EData sds_sum = sds.dst_sum() # type dgl.NData grad_score = sds - out * sds_sum # multiple expressions return grad_score.data """ gidx = ctx.backward_cache (out,) = ctx.saved_tensors sds = out * grad_out # Note: Now _edge_softmax_backward op only supports CPU # TODO(Zhejiang): We will support GPU in the future if out.is_cuda: accum = gspmm(gidx, "copy_rhs", "sum", None, sds) grad_score = sds - gsddmm(gidx, "mul", out, accum, "e", "v") else: grad_score = _edge_softmax_backward(gidx, out, sds) return None, grad_score, None, None class EdgeSoftmax_hetero(th.autograd.Function): @staticmethod def forward(ctx, gidx, eids, norm_by, *score): """Forward function. Pseudo-code: .. code:: python score = dgl.EData(g, score) score_max = score.dst_max() # of type dgl.NData score = score - score_max # edge_sub_dst, ret dgl.EData score_sum = score.dst_sum() # of type dgl.NData out = score / score_sum # edge_div_dst, ret dgl.EData return out.data """ # remember to save the graph to backward cache before making it # a local variable if not is_all(eids): gidx = gidx.edge_subgraph([eids], True).graph if norm_by == "src": gidx = gidx.reverse() u_len = gidx.number_of_ntypes() e_len = gidx.number_of_etypes() lhs = [None] * u_len feats = tuple(lhs + list(score)) score_max = _gspmm_hetero(gidx, "copy_rhs", "max", u_len, feats)[0] out_tmp = _gsddmm_hetero( gidx, "sub", e_len, "e", "v", tuple(list(score) + list(score_max)) ) score = tuple( [ th.exp(out_tmp[i]) if out_tmp[i] is not None else None for i in range(len(out_tmp)) ] ) score_sum = _gspmm_hetero( gidx, "copy_rhs", "sum", u_len, tuple(lhs + list(score)) )[0] out = _gsddmm_hetero( gidx, "div", e_len, "e", "v", tuple(list(score) + list(score_sum)) ) ctx.backward_cache = gidx ctx.save_for_backward(*out) return out @staticmethod def backward(ctx, *grad_out): """Backward function. Pseudo-code: .. code:: python g, out = ctx.backward_cache grad_out = dgl.EData(g, grad_out) out = dgl.EData(g, out) sds = out * grad_out # type dgl.EData sds_sum = sds.dst_sum() # type dgl.NData grad_score = sds - out * sds_sum # multiple expressions return grad_score.data """ gidx = ctx.backward_cache u_len = gidx.number_of_ntypes() e_len = gidx.number_of_etypes() lhs = [None] * u_len out = ctx.saved_tensors sds = tuple([out[i] * grad_out[i] for i in range(len(out))]) accum = _gspmm_hetero( gidx, "copy_rhs", "sum", u_len, tuple(lhs + list(sds)) )[0] out_sddmm = _gsddmm_hetero( gidx, "mul", e_len, "e", "v", tuple(list(out) + list(accum)) ) grad_score = tuple([sds[i] - out_sddmm[i] for i in range(len(sds))]) return (None, None, None) + grad_score class SegmentReduce(th.autograd.Function): @staticmethod def forward(ctx, op, x, offsets): y, arg = _segment_reduce(op, x, offsets) ctx.save_for_backward(arg, offsets) ctx.backward_cache = op return y @staticmethod def backward(ctx, dy): op = ctx.backward_cache arg, offsets = ctx.saved_tensors m = offsets[-1].item() if op == "sum": offsets = offsets[1:] # To address the issue of trailing zeros, related issue: # https://github.com/dmlc/dgl/pull/2610 indices = th.zeros( (m + 1,), device=offsets.device, dtype=offsets.dtype ) indices.scatter_add_(0, offsets, th.ones_like(offsets)) indices = th.cumsum(indices, -1)[:-1] dx = dy[indices] else: dx = _bwd_segment_cmp(dy, arg, m) return None, dx, None class ScatterAdd(th.autograd.Function): @staticmethod def forward(ctx, x, idx, m): y = _scatter_add(x, idx, m) ctx.save_for_backward(idx) return y @staticmethod def backward(ctx, dy): idx = ctx.saved_tensors return dy[idx], None, None class CSRMM(th.autograd.Function): @staticmethod def forward(ctx, gidxA, A_weights, gidxB, B_weights, num_vtypes): gidxC, C_weights = _csrmm( gidxA, A_weights, gidxB, B_weights, num_vtypes ) ( nrows, ncols, C_indptr, C_indices, C_eids, ) = gidxC.adjacency_matrix_tensors(0, False, "csr") # Note: the returned C_indptr, C_indices and C_eids tensors MUST be the same # as the underlying tensors of the created graph gidxC. ctx.backward_cache = gidxA, gidxB, gidxC ctx.save_for_backward(A_weights, B_weights) return ( th.tensor(nrows), th.tensor(ncols), C_indptr, C_indices, C_eids, C_weights, ) @staticmethod def backward( ctx, dnrows, dncols, dC_indptr, dC_indices, dC_eids, dC_weights ): # Only the last argument is meaningful. gidxA, gidxB, gidxC = ctx.backward_cache A_weights, B_weights = ctx.saved_tensors dgidxA, dA_weights = csrmm( gidxC, dC_weights, gidxB.reverse(), B_weights, gidxA.number_of_ntypes(), ) dgidxB, dB_weights = csrmm( gidxA.reverse(), A_weights, gidxC, dC_weights, gidxB.number_of_ntypes(), ) dA_weights = csrmask(dgidxA, dA_weights, gidxA) dB_weights = csrmask(dgidxB, dB_weights, gidxB) return None, dA_weights, None, dB_weights, None class CSRSum(th.autograd.Function): @staticmethod def forward(ctx, gidxs, *weights): # PyTorch tensors must be explicit arguments of the forward function gidxC, C_weights = _csrsum(gidxs, weights) ( nrows, ncols, C_indptr, C_indices, C_eids, ) = gidxC.adjacency_matrix_tensors(0, False, "csr") # Note: the returned C_indptr, C_indices and C_eids tensors MUST be the same # as the underlying tensors of the created graph gidxC. ctx.backward_cache = gidxs, gidxC return ( th.tensor(nrows), th.tensor(ncols), C_indptr, C_indices, C_eids, C_weights, ) @staticmethod def backward( ctx, dnrows, dncols, dC_indptr, dC_indices, dC_eids, dC_weights ): # Only the last argument is meaningful. gidxs, gidxC = ctx.backward_cache return (None,) + tuple( csrmask(gidxC, dC_weights, gidx) for gidx in gidxs ) class CSRMask(th.autograd.Function): @staticmethod def forward(ctx, gidxA, A_weights, gidxB): ctx.backward_cache = gidxA, gidxB return _csrmask(gidxA, A_weights, gidxB) @staticmethod def backward(ctx, dB_weights): gidxA, gidxB = ctx.backward_cache return None, csrmask(gidxB, dB_weights, gidxA), None class SEGMENTMM(th.autograd.Function): @staticmethod def forward(ctx, A, B, seglen_A): if B.dim() != 3: raise ValueError("segment_mm expects B to be a 3D tensor.") C = th.empty((A.shape[0], B.shape[2]), device=A.device, dtype=A.dtype) C = _segment_mm(A, B, C, seglen_A) ctx.backward_cache = A, B, seglen_A return C @staticmethod def backward(ctx, dZ): A, B, seglen_A = ctx.backward_cache A_grad = B_grad = None if ctx.needs_input_grad[0]: # Compute A_grad = Out_grad * B^T A_grad = th.empty(A.shape, device=A.device, dtype=A.dtype) A_grad = _segment_mm(dZ, B, A_grad, seglen_A, b_trans=True) if ctx.needs_input_grad[1]: # Compute B_grad = A^T * Out_grad B_grad = th.empty(B.shape, device=B.device, dtype=B.dtype) B_grad = _segment_mm_backward_B(A, dZ, B_grad, seglen_A) return A_grad, B_grad, None class GATHERMM(th.autograd.Function): @staticmethod def forward(ctx, A, B, idx_a, idx_b): if B.dim() != 3: raise ValueError( "Expected dimension of B is 3. Got " + str(B.dim()) ) N = len(idx_b) if idx_a is None else len(idx_a) C = th.zeros((N, B.shape[2]), device=A.device, dtype=A.dtype) C = _gather_mm(A, B, C, idx_a, idx_b) ctx.backward_cache = A, B, idx_a, idx_b return C @staticmethod def backward(ctx, dZ): A, B, idx_a, idx_b = ctx.backward_cache A_grad = B_grad = None if ctx.needs_input_grad[0]: # Compute A_grad = Out_grad * B^T A_grad = th.zeros(A.shape, device=A.device, dtype=A.dtype) A_grad = _gather_mm_scatter( dZ, B.transpose(1, 2), A_grad, idx_b=idx_b, idx_c=idx_a ) if ctx.needs_input_grad[1]: # Compute B_grad = A^T * Out_grad B_grad = th.zeros(B.shape, device=B.device, dtype=B.dtype) B_grad = _gather_mm_scatter(A, dZ, B_grad, idx_a=idx_a, idx_c=idx_b) return A_grad, B_grad, None, None def gspmm(gidx, op, reduce_op, lhs_data, rhs_data): if op == "sub": op = "add" rhs_data = -rhs_data if op == "div": op = "mul" rhs_data = 1.0 / rhs_data args = _cast_if_autocast_enabled(gidx, op, reduce_op, lhs_data, rhs_data) with _disable_autocast_if_enabled(): return GSpMM.apply(*args) def gsddmm(gidx, op, lhs_data, rhs_data, lhs_target="u", rhs_target="v"): if op == "sub": op = "add" rhs_data = -rhs_data if op == "div": op = "mul" rhs_data = 1.0 / rhs_data args = _cast_if_autocast_enabled( gidx, op, lhs_data, rhs_data, lhs_target, rhs_target ) with _disable_autocast_if_enabled(): return GSDDMM.apply(*args) def gspmm_hetero(g, op, reduce_op, lhs_len, *lhs_and_rhs_tuple): lhs_tuple, rhs_tuple = ( lhs_and_rhs_tuple[:lhs_len], lhs_and_rhs_tuple[lhs_len:], ) if op == "sub": op = "add" rhs_tuple = tuple( [ -rhs_tuple[i] if rhs_tuple[i] is not None else None for i in range(len(rhs_tuple)) ] ) if op == "div": op = "mul" rhs_tuple = tuple( [ (1.0 / rhs_tuple[i]) if rhs_tuple[i] is not None else None for i in range(len(rhs_tuple)) ] ) if op in ["add", "mul"]: lhs_and_rhs_tuple = tuple(list(lhs_tuple) + list(rhs_tuple)) args = _cast_if_autocast_enabled( g, op, reduce_op, lhs_len, *lhs_and_rhs_tuple ) with _disable_autocast_if_enabled(): return GSpMM_hetero.apply(*args) def gsddmm_hetero( g, op, lhs_len, lhs_target="u", rhs_target="v", *lhs_and_rhs_tuple ): lhs_tuple, rhs_tuple = ( lhs_and_rhs_tuple[:lhs_len], lhs_and_rhs_tuple[lhs_len:], ) if op == "sub": op = "add" rhs_tuple = tuple( [ -rhs_tuple[i] if rhs_tuple[i] is not None else None for i in range(len(rhs_tuple)) ] ) if op == "div": op = "mul" rhs_tuple = tuple( [ (1.0 / rhs_tuple[i]) if rhs_tuple[i] is not None else None for i in range(len(rhs_tuple)) ] ) if op in ["add", "mul"]: lhs_and_rhs_tuple = tuple(list(lhs_tuple) + list(rhs_tuple)) args = _cast_if_autocast_enabled( g, op, lhs_len, lhs_target, rhs_target, *lhs_and_rhs_tuple ) with _disable_autocast_if_enabled(): return GSDDMM_hetero.apply(*args) def edge_softmax(gidx, logits, eids=ALL, norm_by="dst"): args = _cast_if_autocast_enabled(gidx, logits, eids, norm_by) with _disable_autocast_if_enabled(): return EdgeSoftmax.apply(*args) def edge_softmax_hetero(gidx, eids=ALL, norm_by="dst", *logits): args = _cast_if_autocast_enabled(gidx, eids, norm_by, *logits) with _disable_autocast_if_enabled(): return EdgeSoftmax_hetero.apply(*args) def segment_reduce(op, x, offsets): args = _cast_if_autocast_enabled(op, x, offsets) with _disable_autocast_if_enabled(): return SegmentReduce.apply(*args) def scatter_add(x, idx, m): args = _cast_if_autocast_enabled(x, idx, m) with _disable_autocast_if_enabled(): return ScatterAdd.apply(*args) def csrmm(gidxA, A_weights, gidxB, B_weights, num_vtypes): nrows, ncols, C_indptr, C_indices, C_eids, C_weights = CSRMM.apply( gidxA, A_weights, gidxB, B_weights, num_vtypes ) gidxC = create_unitgraph_from_csr( num_vtypes, nrows.item(), ncols.item(), C_indptr, C_indices, C_eids, ["coo", "csr", "csc"], ) return gidxC, C_weights def csrsum(gidxs, weights): nrows, ncols, C_indptr, C_indices, C_eids, C_weights = CSRSum.apply( gidxs, *weights ) gidxC = create_unitgraph_from_csr( gidxs[0].number_of_ntypes(), nrows.item(), ncols.item(), C_indptr, C_indices, C_eids, ["coo", "csr", "csc"], ) return gidxC, C_weights def csrmask(gidxA, A_weights, gidxB): return CSRMask.apply(gidxA, A_weights, gidxB) def segment_mm(A, B, seglen_A): if A.device.type == "cpu": C = [] off = 0 for i in range(B.shape[0]): C.append(A[off : off + seglen_A[i]] @ B[i]) off += seglen_A[i] return th.cat(C) else: args = _cast_if_autocast_enabled(A, B, seglen_A) with _disable_autocast_if_enabled(): return SEGMENTMM.apply(*args) def gather_mm(A, B, idx_A=None, idx_B=None): if A.device.type == "cpu": A = A[idx_A] if idx_A is not None else A B = B[idx_B] if idx_B is not None else B return th.bmm(A.unsqueeze(1), B).squeeze(1) else: args = _cast_if_autocast_enabled(A, B, idx_A, idx_B) with _disable_autocast_if_enabled(): return GATHERMM.apply(*args) ================================================ FILE: python/dgl/backend/pytorch/tensor.py ================================================ from __future__ import absolute_import import builtins import numbers import numpy as np import scipy # Weird bug in new pytorch when import scipy after import torch import torch as th from torch.utils import dlpack from ... import ndarray as nd from ...function.base import TargetCode from ...utils import version if version.parse(th.__version__) < version.parse("2.1.0"): raise RuntimeError("DGL requires PyTorch >= 2.1.0") def data_type_dict(): return { "bfloat16": th.bfloat16, "float16": th.float16, "float32": th.float32, "float64": th.float64, "uint8": th.uint8, "int8": th.int8, "int16": th.int16, "int32": th.int32, "int64": th.int64, "bool": th.bool, } def cpu(): return th.device("cpu") def tensor(data, dtype=None): if isinstance(data, numbers.Number): data = [data] if ( isinstance(data, list) and len(data) > 0 and isinstance(data[0], th.Tensor) ): # prevent GPU->CPU->GPU copies if data[0].ndim == 0: # zero dimenion scalar tensors return th.stack(data) if isinstance(data, th.Tensor): return th.as_tensor(data, dtype=dtype, device=data.device) else: return th.as_tensor(data, dtype=dtype) def as_scalar(data): return data.item() def get_preferred_sparse_format(): """Get the preferred sparse matrix format supported by the backend. Different backends have their preferred backend. This info is useful when constructing a sparse matrix. """ return "coo" def sparse_matrix(data, index, shape, force_format=False): fmt = index[0] if fmt != "coo": raise TypeError( "Pytorch backend only supports COO format. But got %s." % fmt ) spmat = th.sparse_coo_tensor(index[1], data, shape) return spmat, None def sparse_matrix_indices(spmat): return ("coo", spmat._indices()) def is_tensor(obj): return isinstance(obj, th.Tensor) def shape(input): return input.shape def dtype(input): return input.dtype def ndim(input): return input.dim() def context(input): return input.device def device_type(ctx): return th.device(ctx).type def device_id(ctx): ctx = th.device(ctx) if ctx.index is None: return 0 if ctx.type == "cpu" else th.cuda.current_device() else: return ctx.index def to_backend_ctx(dglctx): dev_type = dglctx.device_type if dev_type == 1: return th.device("cpu") elif dev_type == 2: return th.device("cuda", dglctx.device_id) else: raise ValueError("Unsupported DGL device context:", dglctx) def astype(input, ty): return input.type(ty) def asnumpy(input): if isinstance(input, th.sparse.FloatTensor): return input.to_dense().cpu().detach().numpy() else: return input.cpu().detach().numpy() def copy_to(input, ctx, **kwargs): ctx = th.device(ctx) if ctx.type == "cpu": return input.cpu() elif ctx.type == "cuda": if ctx.index is not None: th.cuda.set_device(ctx.index) return input.cuda(**kwargs) else: raise RuntimeError("Invalid context", ctx) def is_pinned(input): return input.is_pinned() def sum(input, dim, keepdims=False): return th.sum(input, dim=dim, keepdim=keepdims) def floor_div(in1, in2): return in1 // in2 def reduce_sum(input): return input.sum() def cumsum(input, dim): return th.cumsum(input, dim=dim) def mean(input, dim): return th.mean(input, dim=dim) def reduce_mean(input): return input.mean() def max(input, dim): # NOTE: the second argmax array is not returned return th.max(input, dim=dim)[0] def reduce_max(input): return input.max() def min(input, dim): # NOTE: the second argmin array is not returned return th.min(input, dim=dim)[0] def reduce_min(input): return input.min() def argsort(input, dim, descending): return th.argsort(input, dim=dim, descending=descending) def topk(input, k, dim, descending=True): return th.topk(input, k, dim, largest=descending)[0] def argtopk(input, k, dim, descending=True): return th.topk(input, k, dim, largest=descending)[1] def exp(input): return th.exp(input) def inverse(input): return th.inverse(input) def sqrt(input): return th.sqrt(input) def softmax(input, dim=-1): return th.softmax(input, dim=dim) def cat(seq, dim): return th.cat(seq, dim=dim) def stack(seq, dim): return th.stack(seq, dim=dim) def split(input, sizes_or_sections, dim): return th.split(input, sizes_or_sections, dim) def repeat(input, repeats, dim): return th.repeat_interleave(input, repeats, dim) # PyTorch 1.1 def gather_row(data, row_index): return th.index_select(data, 0, row_index.long()) def slice_axis(data, axis, begin, end): return th.narrow(data, axis, begin, end - begin) def take(data, indices, dim): new_shape = data.shape[:dim] + indices.shape + data.shape[dim + 1 :] return th.index_select(data, dim, indices.view(-1)).view(new_shape) def narrow_row(x, start, stop): return x[start:stop] def index_add_inplace(data, row_idx, value): data.index_add_(0, row_idx, value) def scatter_row(data, row_index, value): return data.index_copy(0, row_index.long(), value) def scatter_row_inplace(data, row_index, value): data[row_index.long()] = value def squeeze(input, dim): return th.squeeze(input, dim) def unsqueeze(input, dim): return th.unsqueeze(input, dim) def reshape(input, shape): return th.reshape(input, shape) def swapaxes(input, axis1, axis2): return th.transpose(input, axis1, axis2) def empty(shape, dtype, ctx): return th.empty(shape, dtype=dtype, device=ctx) def zeros(shape, dtype, ctx): return th.zeros(shape, dtype=dtype, device=ctx) def zeros_like(input): return th.zeros_like(input) def ones(shape, dtype, ctx): return th.ones(shape, dtype=dtype, device=ctx) def uniform(shape, dtype, ctx, low, high): return th.empty(shape, dtype=dtype, device=ctx).uniform_(low, high) def randint(shape, dtype, ctx, low, high): return th.randint(low, high, shape, dtype=dtype, device=ctx) def pad_packed_tensor(input, lengths, value, l_min=None): old_shape = input.shape device = input.device if not is_tensor(lengths): lengths = th.tensor(lengths, dtype=th.int64, device=device) else: lengths = lengths.to(device) max_len = as_scalar(lengths.max()) if l_min is not None: max_len = builtins.max(max_len, l_min) batch_size = len(lengths) x = input.new(batch_size * max_len, *old_shape[1:]) x.fill_(value) index = th.ones(len(input), dtype=th.int64, device=device) cum_lengths = th.cumsum(lengths, 0) index[cum_lengths[:-1]] += max_len - lengths[:-1] index = th.cumsum(index, 0) - 1 x[index] = input return x.view(batch_size, max_len, *old_shape[1:]) def pack_padded_tensor(input, lengths): max_len = input.shape[1] device = input.device if not is_tensor(lengths): lengths = th.tensor(lengths, dtype=th.int64, device=device) else: lengths = lengths.to(device) input = input.view(-1, *input.shape[2:]) out_len = lengths.sum().item() index = th.ones(out_len, dtype=th.int64, device=device) cum_lengths = th.cumsum(lengths, 0) index[cum_lengths[:-1]] += max_len - lengths[:-1] index = th.cumsum(index, 0) - 1 return input[index] def boolean_mask(input, mask): if "bool" not in str(mask.dtype): mask = th.as_tensor(mask, dtype=th.bool) return input[mask] def equal(x, y): return x == y def allclose(x, y, rtol=1e-4, atol=1e-4): return th.allclose(x, y, rtol=rtol, atol=atol) def logical_not(input): return ~input def logical_and(input1, input2): return input1 & input2 def clone(input): return input.clone() def clamp(data, min_val, max_val): return th.clamp(data, min_val, max_val) def replace_inf_with_zero(x): return th.masked_fill(x, th.isinf(x), 0) def count_nonzero(input): # TODO: fallback to numpy for backward compatibility return np.count_nonzero(input) def unique(input, return_inverse=False, return_counts=False): if input.dtype == th.bool: input = input.type(th.int8) return th.unique( input, return_inverse=return_inverse, return_counts=return_counts ) def full_1d(length, fill_value, dtype, ctx): return th.full((length,), fill_value, dtype=dtype, device=ctx) def nonzero_1d(input): x = th.nonzero(input, as_tuple=False).squeeze() return x if x.dim() == 1 else x.view(-1) def sort_1d(input): return th.sort(input) def arange(start, stop, dtype=th.int64, ctx=None): return th.arange(start, stop, dtype=dtype, device=ctx) def rand_shuffle(arr): idx = th.randperm(len(arr)) return arr[idx] def zerocopy_to_dlpack(input): return dlpack.to_dlpack(input.contiguous()) def zerocopy_from_dlpack(dlpack_tensor): return dlpack.from_dlpack(dlpack_tensor) def zerocopy_to_numpy(input): # NOTE: not zerocopy return asnumpy(input) def zerocopy_from_numpy(np_array): return th.as_tensor(np_array) def zerocopy_to_dgl_ndarray(data): if data.dtype == th.bool: data = data.byte() return nd.from_dlpack(dlpack.to_dlpack(data.contiguous())) # NGC PyTorch containers are shipping alpha version PyTorch. if version.parse(th.__version__) >= version.parse("2.0.0a0"): def check_is_view(input): assert ( input.data_ptr() == input.untyped_storage().data_ptr() ), "Cannot convert view tensors to dgl ndarray for write." else: def check_is_view(input): assert ( input.data_ptr() == input._storage().data_ptr() ), "Cannot convert view tensors to dgl ndarray for write." def zerocopy_to_dgl_ndarray_for_write(input): if input.numel() > 0: # only check non-empty tensors assert input.is_contiguous(), ( "Cannot convert non-contiguous tensors " "to dgl ndarray for write. Call .to_contiguous() first." ) check_is_view(input) return zerocopy_to_dgl_ndarray(input) def zerocopy_from_dgl_ndarray(data): if data.shape == (0,): # NOTE: PyTorch v1.5 does not accept DLPack object representing empty CUDA tensor. # Related issue: https://github.com/pytorch/pytorch/issues/41182 # The issue will be fixed in v1.6 and later. return th.tensor( [], dtype=getattr(th, data.dtype), device=to_backend_ctx(data.ctx) ) elif len(data.shape) == 0 or builtins.min(data.shape) == 0: # Workaround the same issue as above, but preserve the shape of the # empty tensor. This is needed by the sparse optimizer when one of # processors may receive no gradients to update, but we want to keep # the dimension of the embedding. return th.empty( data.shape, dtype=getattr(th, data.dtype), device=to_backend_ctx(data.ctx), ) else: return dlpack.from_dlpack(data.to_dlpack()) def sync(): # Pytorch performs computation synchronously, so no need for synchronization. pass def attach_grad(x): if x.grad is not None: x.grad.zero_() return x else: return x.requires_grad_() def backward(x, head_gradient=None): if ( head_gradient is not None and head_gradient.shape[0] == 1 and len(head_gradient.shape) == 1 ): # Fix for torch 1.3.1 head_gradient = th.tensor(head_gradient.item()).to(head_gradient.device) x.backward(head_gradient) def grad(x): x.retain_grad() return x.grad def is_no_grad(x): return x.grad is None or (x.grad == 0).all() def is_recording(): return th.is_grad_enabled() class record_grad(object): def __init__(self): pass def __enter__(self): pass def __exit__(self, exc_type, exc_value, exc_traceback): pass no_grad = th.no_grad ================================================ FILE: python/dgl/backend/set_default_backend.py ================================================ import argparse import json import os def set_default_backend(default_dir, backend_name): os.makedirs(default_dir, exist_ok=True) config_path = os.path.join(default_dir, "config.json") with open(config_path, "w") as config_file: json.dump({"backend": backend_name.lower()}, config_file) print( 'Setting the default backend to "{}". You can change it in the ' "~/.dgl/config.json file or export the DGLBACKEND environment variable. " "Valid options are: pytorch, mxnet, tensorflow (all lowercase)".format( backend_name ) ) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "default_dir", type=str, default=os.path.join(os.path.expanduser("~"), ".dgl"), ) parser.add_argument( "backend", nargs=1, type=str, choices=["pytorch", "tensorflow", "mxnet"], help="Set default backend", ) args = parser.parse_args() set_default_backend(args.default_dir, args.backend[0]) ================================================ FILE: python/dgl/backend/tensorflow/__init__.py ================================================ import os os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true" from .sparse import * from .tensor import * ================================================ FILE: python/dgl/backend/tensorflow/sparse.py ================================================ import numpy as np import tensorflow as tf from ..._sparse_ops import ( _bwd_segment_cmp, _csrmask, _csrmm, _csrsum, _gsddmm, _gspmm, _scatter_add, _segment_reduce, ) from ...base import ALL, is_all from ...heterograph_index import create_unitgraph_from_csr from .tensor import asnumpy, context, copy_to, tensor, zerocopy_from_numpy __all__ = [ "gspmm", "gsddmm", "edge_softmax", "segment_reduce", "scatter_add", "csrmm", "csrsum", "csrmask", ] def _scatter_nd(index, src, n_rows): assert index.shape == src.shape shp = index.shape ctx = context(src) ndim = index.ndim offsets = [] stride = 1 for i in reversed(range(1, ndim)): di = shp[i] offset_i = tf.range(di, dtype=index.dtype) offsets.append( tf.reshape( (stride * offset_i), (1,) * i + (di,) + (1,) * (ndim - 1 - i) ) ) stride *= di if ndim > 1: new_idx = index * stride + copy_to(sum(offsets), ctx) else: new_idx = index src = tf.reshape(src, (-1,)) new_idx = tf.reshape(new_idx, (-1, 1)) rst = tf.reshape( tf.scatter_nd(new_idx, src, (stride * n_rows,)), (n_rows, *shp[1:]) ) return rst def _gather_nd(index, src): shp = index.shape ctx = context(src) ndim = index.ndim offsets = [] stride = 1 for i in reversed(range(1, ndim)): di = shp[i] offset_i = tf.range(di, dtype=index.dtype) offsets.append( tf.reshape( (stride * offset_i), (1,) * i + (di,) + (1,) * (ndim - 1 - i) ) ) stride *= di if ndim > 1: new_idx = index * stride + copy_to(sum(offsets), ctx) else: new_idx = index src = tf.reshape(src, (-1,)) new_idx = tf.reshape(new_idx, (-1)) rst = tf.reshape(tf.gather(src, new_idx), shp) return rst def _reduce_grad(grad, shape): """Reduce gradient on the broadcast dimension If there is broadcast in forward pass, gradients need to be reduced on broadcast dimension. This function checks the input tensor shape and gradient shape and perform the reduction. Parameters ---------- grad: Tensor Gradient tensor shape: tuple Shape of input tensor Returns ------- Tensor """ grad_shape = grad.shape[1:] in_shape = shape[1:] if in_shape == grad_shape: # no need to reduce return grad num_to_squeeze = len(grad_shape) - len(in_shape) # pad inshape in_shape = (1,) * num_to_squeeze + in_shape reduce_idx = np.asarray( np.nonzero(np.asarray(grad_shape) - np.asarray(in_shape)) ) reduce_idx += 1 # skip batch dim reduce_idx_tensor = tf.constant( tuple(reduce_idx.flatten().tolist()), dtype=tf.int32 ) grad = tf.reduce_sum(grad, axis=reduce_idx_tensor, keepdims=True) return tf.reshape(grad, shape) def _need_reduce_last_dim(ufeat, efeat): """Indicates whether to reduce the last dimension on edges in the backward pass of spmm, if so, use dot instead of mul.""" ushp = ufeat.shape eshp = efeat.shape return ushp[1:-1] == eshp[1:-1] and eshp[-1] == 1 and ushp[-1] > 1 def _muldiv(op, x): return 1.0 / x if op == "div" else x def _addsub(op, x): return -x if op == "sub" else x def _expand(x, shape): return tf.broadcast_to(x, (x.shape[0], *shape)) def gspmm_real(gidx, op, reduce_op, X, Y): out, (argX, argY) = _gspmm(gidx, op, reduce_op, X, Y) def grad(dZ): dZ = tensor(dZ) if op != "copy_rhs": g_rev = gidx.reverse() if reduce_op == "sum": if op in ["mul", "div"]: dX = _gspmm(g_rev, "mul", "sum", dZ, _muldiv(op, Y))[0] elif op in ["add", "sub"]: dX = _gspmm(g_rev, "copy_lhs", "sum", dZ, Y)[0] elif op == "copy_lhs": dX = _gspmm(g_rev, "copy_lhs", "sum", dZ, None)[0] else: if op in ["mul", "div"]: dX = _scatter_nd( argX, _muldiv(op, _gather_nd(argY, _expand(Y, dZ.shape[1:]))) * dZ, X.shape[0], ) elif op in ["add", "sub", "copy_lhs"]: dX = _scatter_nd(argX, dZ, X.shape[0]) dX = _reduce_grad(dX, X.shape) else: dX = tf.zeros_like(X) if op != "copy_lhs": if reduce_op == "sum": if op == "mul" and _need_reduce_last_dim(X, Y): dY = _gsddmm(gidx, "dot", X, dZ) elif op in ["mul", "div"]: dY = _gsddmm(gidx, "mul", X, dZ) if op == "div": dY = -dY / (Y**2) elif op in ["add", "sub", "copy_rhs"]: dY = _gsddmm(gidx, "copy_rhs", X, _addsub(op, dZ)) else: out_shp = (Y.shape[0],) + dZ.shape[1:] if op in ["mul", "div"]: dY = _scatter_nd( argY, _gather_nd(argX, _expand(X, dZ.shape[1:])) * dZ, Y.shape[0], ) if op == "div": dY = -dY / (Y**2) elif op in ["add", "sub", "copy_rhs"]: dY = _scatter_nd(argY, _addsub(op, dZ), Y.shape[0]) dY = _reduce_grad(dY, Y.shape) else: dY = tf.zeros_like(Y) return dX, dY return out, grad def gspmm(gidx, op, reduce_op, X, Y): @tf.custom_gradient def _lambda(X, Y): return gspmm_real(gidx, op, reduce_op, X, Y) if X is None: X = tf.zeros(()) if Y is None: Y = tf.zeros(()) return _lambda(X, Y) def gsddmm_real(gidx, op, X, Y, lhs_target, rhs_target): out = _gsddmm(gidx, op, X, Y, lhs_target, rhs_target) def grad(dZ): if op != "copy_rhs": if lhs_target in ["u", "v"]: _gidx = gidx if lhs_target == "v" else gidx.reverse() if op in ["add", "sub", "copy_lhs"]: dX = _gspmm(_gidx, "copy_rhs", "sum", None, dZ)[0] else: # mul, div, dot if rhs_target == lhs_target: dX = _gspmm(_gidx, "copy_rhs", "sum", None, dZ)[ 0 ] * _muldiv(op, Y) elif rhs_target == "e": dX = _gspmm( _gidx, "copy_rhs", "sum", None, dZ * _muldiv(op, Y) )[0] else: # rhs_target = !lhs_target dX = _gspmm(_gidx, "mul", "sum", _muldiv(op, Y), dZ)[0] else: # lhs_target == 'e' if op in ["add", "sub", "copy_lhs"]: dX = dZ else: # mul, div, dot dX = _gsddmm( gidx, "mul", dZ, _muldiv(op, Y), "e", rhs_target ) dX = _reduce_grad(dX, X.shape) else: dX = tf.zeros_like(X) if op != "copy_lhs": if rhs_target in ["u", "v"]: _gidx = gidx if rhs_target == "v" else gidx.reverse() if op in ["add", "sub", "copy_rhs"]: dY = _gspmm( _gidx, "copy_rhs", "sum", None, _addsub(op, dZ) )[0] else: # mul, div, dot if lhs_target == rhs_target: dY = _gspmm(_gidx, "copy_rhs", "sum", None, dZ)[0] * X elif lhs_target == "e": dY = _gspmm(_gidx, "copy_rhs", "sum", None, dZ * X)[0] else: # rhs_target = !lhs_target dY = _gspmm(_gidx, "mul", "sum", X, dZ)[0] if op == "div": dY = -dY / (Y**2) else: if op in ["add", "sub", "copy_rhs"]: dY = _addsub(op, dZ) else: # mul, div, dot dY = _gsddmm(gidx, "mul", dZ, X, "e", lhs_target) if op == "div": dY = -dY / (Y**2) dY = _reduce_grad(dY, Y.shape) else: dY = tf.zeros_like(Y) return dX, dY return out, grad def gsddmm(gidx, op, X, Y, lhs_target="u", rhs_target="v"): @tf.custom_gradient def _lambda(X, Y): return gsddmm_real(gidx, op, X, Y, lhs_target, rhs_target) if X is None: X = tf.zeros(()) if Y is None: Y = tf.zeros(()) return _lambda(X, Y) def edge_softmax_real(gidx, score, eids=ALL, norm_by="dst"): if not is_all(eids): gidx = gidx.edge_subgraph([eids], True).graph if norm_by == "src": gidx = gidx.reverse() score_max = _gspmm(gidx, "copy_rhs", "max", None, score)[0] score = tf.math.exp(_gsddmm(gidx, "sub", score, score_max, "e", "v")) score_sum = _gspmm(gidx, "copy_rhs", "sum", None, score)[0] out = _gsddmm(gidx, "div", score, score_sum, "e", "v") def edge_softmax_backward(grad_out): sds = out * grad_out accum = gspmm(gidx, "copy_rhs", "sum", None, sds) grad_score = sds - gsddmm(gidx, "mul", out, accum, "e", "v") return grad_score return out, edge_softmax_backward def edge_softmax(gidx, logits, eids=ALL, norm_by="dst"): @tf.custom_gradient def _lambda(logits): return edge_softmax_real(gidx, logits, eids, norm_by) return _lambda(logits) def segment_reduce_real(op, x, offsets): y, arg = _segment_reduce(op, x, offsets) def segment_reduce_backward(dy): m = x.shape[0] if op == "sum": offsets_np = asnumpy(offsets[1:]) indices_np = np.zeros((m + 1,), dtype=offsets_np.dtype) np.add.at(indices_np, offsets_np, np.ones_like(offsets_np)) indices_np = np.cumsum(indices_np, -1)[:-1] indices = zerocopy_from_numpy(indices_np) dx = tf.gather(dy, indices) else: dx = _bwd_segment_cmp(dy, arg, m) return dx return y, segment_reduce_backward def segment_reduce(op, x, offsets): @tf.custom_gradient def _lambda(x): return segment_reduce_real(op, x, offsets) return _lambda(x) def scatter_add_real(x, idx, m): y = _scatter_add(x, idx, m) def scatter_add_backward(dy): return tf.gather(dy, idx) return y, scatter_add_backward def scatter_add(x, idx, m): @tf.custom_gradient def _lambda(x): return scatter_add_real(x, idx, m) return _lambda(x) def csrmm_real(gidxA, A_weights, gidxB, B_weights, num_vtypes): gidxC, C_weights = _csrmm(gidxA, A_weights, gidxB, B_weights, num_vtypes) nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors( 0, False, "csr" ) def grad(dnrows, dncols, dC_indptr, dC_indices, dC_eids, dC_weights): # Only the last argument is meaningful. dgidxA, dA_weights = _csrmm( gidxC, dC_weights, gidxB.reverse(), B_weights, gidxA.number_of_ntypes(), ) dgidxB, dB_weights = _csrmm( gidxA.reverse(), A_weights, gidxC, dC_weights, gidxB.number_of_ntypes(), ) dA_weights = _csrmask(dgidxA, dA_weights, gidxA) dB_weights = _csrmask(dgidxB, dB_weights, gidxB) return dA_weights, dB_weights return ( tf.constant(nrows), tf.constant(ncols), C_indptr, C_indices, C_eids, C_weights, ), grad def csrmm(gidxA, A_weights, gidxB, B_weights, num_vtypes): @tf.custom_gradient def _lambda(A_weights, B_weights): return csrmm_real(gidxA, A_weights, gidxB, B_weights, num_vtypes) nrows, ncols, C_indptr, C_indices, C_eids, C_weights = _lambda( A_weights, B_weights ) gidxC = create_unitgraph_from_csr( num_vtypes, nrows.numpy(), ncols.numpy(), C_indptr, C_indices, C_eids, ["coo", "csr", "csc"], ) return gidxC, C_weights def csrsum_real(gidxs, weights): gidxC, C_weights = _csrsum(gidxs, weights) nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors( 0, False, "csr" ) def grad(dnrows, dncols, dC_indptr, dC_indices, dC_eids, dC_weights): # Only the last argument is meaningful. return tuple(_csrmask(gidxC, dC_weights, gidx) for gidx in gidxs) return ( tf.constant(nrows), tf.constant(ncols), C_indptr, C_indices, C_eids, C_weights, ), grad def csrsum(gidxs, weights): @tf.custom_gradient def _lambda(*weights): return csrsum_real(gidxs, weights) nrows, ncols, C_indptr, C_indices, C_eids, C_weights = _lambda(*weights) num_vtypes = gidxs[0].number_of_ntypes() gidxC = create_unitgraph_from_csr( num_vtypes, nrows.numpy(), ncols.numpy(), C_indptr, C_indices, C_eids, ["coo", "csr", "csc"], ) return gidxC, C_weights def csrmask_real(gidxA, A_weights, gidxB): B_weights = _csrmask(gidxA, A_weights, gidxB) def grad(dB_weights): return _csrmask(gidxB, dB_weights, gidxA) return B_weights, grad def csrmask(gidxA, A_weights, gidxB): @tf.custom_gradient def _lambda(A_weights): return csrmask_real(gidxA, A_weights, gidxB) return _lambda(A_weights) ================================================ FILE: python/dgl/backend/tensorflow/sparse_optim.py ================================================ """Sparse optimizer is not supported for tensorflow""" ================================================ FILE: python/dgl/backend/tensorflow/tensor.py ================================================ """Tensorflow backend implementation""" from __future__ import absolute_import import builtins import numbers import numpy as np import tensorflow as tf from ... import ndarray as nd from ...function.base import TargetCode from ...utils import version if version.parse(tf.__version__) < version.parse("2.3.0"): raise RuntimeError( "DGL requires TensorFlow>=2.3.0 for the official DLPack support." ) def zerocopy_to_dlpack(data): return tf.experimental.dlpack.to_dlpack(data) def zerocopy_from_dlpack(dlpack_tensor): # TODO(Jinjing): Tensorflow requires memory to be 64-bytes aligned. We check the # alignment and make a copy if needed. The functionality is better in TF's main repo. aligned = nd.from_dlpack(dlpack_tensor).to_dlpack(64) return tf.experimental.dlpack.from_dlpack(aligned) def data_type_dict(): return { "bfloat16": tf.bfloat16, "float16": tf.float16, "float32": tf.float32, "float64": tf.float64, "uint8": tf.uint8, "int8": tf.int8, "int16": tf.int16, "int32": tf.int32, "int64": tf.int64, "bool": tf.bool, } def cpu(): return "/cpu:0" def tensor(data, dtype=None): if isinstance(data, tf.Tensor): if dtype is None or data.dtype == dtype: return data else: return tf.cast(data, dtype=dtype) else: if isinstance(data, numbers.Number): data = [data] return tf.convert_to_tensor(data, dtype=dtype) def initialize_context(): tf.zeros(1) def as_scalar(data): data = data.numpy() return data if np.isscalar(data) else data.item() def get_preferred_sparse_format(): """Get the preferred sparse matrix format supported by the backend. Different backends have their preferred backend. This info is useful when constructing a sparse matrix. """ return "coo" def sparse_matrix(data, index, shape, force_format=False): fmt = index[0] if fmt != "coo": raise TypeError( "Tensorflow backend only supports COO format. But got %s." % fmt ) # tf.SparseTensor only supports int64 indexing, # therefore manually casting to int64 when input in int32 spmat = tf.SparseTensor( indices=tf.cast(tf.transpose(index[1], (1, 0)), tf.int64), values=data, dense_shape=shape, ) return spmat, None def sparse_matrix_indices(spmat): return ("coo", spmat.indices) def is_tensor(obj): return isinstance(obj, tf.Tensor) def shape(input): return input.shape def dtype(input): return input.dtype def ndim(input): return input.ndim def context(input): spec = tf.DeviceSpec.from_string(input.device) return "/{}:{}".format(spec.device_type.lower(), spec.device_index) def device_type(ctx): return tf.DeviceSpec.from_string(ctx).device_type.lower() def device_id(ctx): return tf.DeviceSpec.from_string(ctx).device_index def to_backend_ctx(dglctx): dev_type = dglctx.device_type if dev_type == 1: return "/cpu:0" elif dev_type == 2: return "/gpu:%d" % (dglctx.device_id) else: raise ValueError("Unsupported DGL device context:", dglctx) def astype(input, ty): with tf.device(input.device): return tf.cast(input, dtype=ty) def asnumpy(input): if isinstance(input, tf.SparseTensor): # tf.sparse.to_dense assume sorted indices, need to turn off validate_indices in our cases return tf.sparse.to_dense(input, validate_indices=False).numpy() else: return input.numpy() def copy_to(input, ctx, **kwargs): with tf.device(ctx): new_tensor = tf.identity(input) return new_tensor def is_pinned(input): return False # not sure how to do this def sum(input, dim, keepdims=False): if input.dtype == tf.bool: input = tf.cast(input, tf.int32) return tf.reduce_sum(input, axis=dim, keepdims=keepdims) def floor_div(in1, in2): return astype(in1 / in2, dtype(in1)) def reduce_sum(input): if input.dtype == tf.bool: input = tf.cast(input, tf.int32) return tf.reduce_sum(input) def cumsum(input, dim): if input.dtype == tf.bool: input = tf.cast(input, tf.int32) return tf.cumsum(input, axis=dim) def mean(input, dim): return tf.reduce_mean(input, axis=dim) def reduce_mean(input): return tf.reduce_mean(input) def max(input, dim): return tf.reduce_max(input, axis=dim) def reduce_max(input): return tf.reduce_max(input) def min(input, dim): return tf.reduce_min(input, axis=dim) def reduce_min(input): return tf.reduce_min(input) def argsort(input, dim, descending): if descending: return tf.cast( tf.argsort(input, axis=dim, direction="DESCENDING"), dtype=tf.int64 ) else: return tf.cast( tf.argsort(input, axis=dim, direction="ASCENDING"), dtype=tf.int64 ) def topk(input, k, dim, descending=True): if not descending: input = -input shape = np.arange(input.ndim) shape[dim], shape[-1] = shape[-1], shape[dim] out1 = tf.transpose(input, perm=shape) out2 = tf.math.top_k(out1, k=k, sorted=True) out = tf.transpose(out2[0], shape) if not descending: out = -out return out def argtopk(input, k, dim, descending=True): if not descending: input = -input shape = np.arange(input.ndim) shape[dim], shape[-1] = shape[-1], shape[dim] out1 = tf.transpose(input, perm=shape) out2 = tf.math.top_k(out1, k=k, sorted=True) out = tf.transpose(out2[1], shape) if not descending: out = -out return out def exp(input): return tf.exp(input) def inverse(input): return tf.linalg.inv(input) def sqrt(input): return tf.sqrt(input) def softmax(input, dim=-1): return tf.math.softmax(input, axis=dim) def cat(seq, dim): return tf.concat(seq, axis=dim) def stack(seq, dim): return tf.stack(seq, axis=dim) def split(input, sizes_or_sections, dim): return [ copy_to(_, input.device) for _ in tf.split(input, sizes_or_sections, axis=dim) ] def repeat(input, repeats, dim): return tf.repeat(input, repeats, dim) def gather_row(data, row_index): return tf.gather(data, row_index) def slice_axis(data, axis, begin, end): # assert axis == 0 # tf doesn't behave well with negative s = [slice(None) for i in range(data.ndim)] if end == 0: end = data.shape[axis] s[axis] = slice(begin, end, None) return data[tuple(s)] def take(data, indices, dim): return tf.gather_nd(data, indices, dim) def narrow_row(x, start, stop): return x[start:stop] def scatter_row(data, row_index, value): row_index = tf.expand_dims(row_index, 1) # XXX(minjie): Normally, the copy_to here is unnecessary. However, TF has this # notorious legacy issue that int32 type data is always on CPU, which will # crash the program since DGL requires feature data to be on the same device # as graph structure. return copy_to( tf.tensor_scatter_nd_update(data, row_index, value), data.device ) def index_add_inplace(data, row_idx, value): raise NotImplementedError("Tensorflow doesn't support inplace index_add") def scatter_row_inplace(data, row_index, value): raise NotImplementedError("Tensorflow doesn't support inplace update") def squeeze(input, dim): return tf.squeeze(input, axis=dim) def unsqueeze(input, dim): return tf.expand_dims(input, axis=dim) def reshape(input, shape): return tf.reshape(input, shape) def swapaxes(input, axis1, axis2): ndim = input.ndim t = list(range(ndim)) t[axis1], t[axis2] = axis2 % ndim, axis1 % ndim return tf.transpose(input, perm=t) def empty(shape, dtype, ctx): # tf doesn't have tf.empty(), use zeros() as a workaround return zeros(shape, dtype, ctx) def zeros(shape, dtype, ctx): with tf.device(ctx): t = tf.zeros(shape, dtype=dtype) return t def zeros_like(input): return tf.zeros_like(input) def ones(shape, dtype, ctx): with tf.device(ctx): t = tf.ones(shape, dtype=dtype) return t def uniform(shape, dtype, ctx, low, high): with tf.device(ctx): t = tf.random.uniform(shape, dtype=dtype, minval=low, maxval=high) return t def randint(shape, dtype, ctx, low, high): with tf.device(ctx): t = tf.random.uniform(shape, dtype=dtype, minval=low, maxval=high) return t def pad_packed_tensor(input, lengths, value, l_min=None): old_shape = input.shape if isinstance(lengths, tf.Tensor): max_len = as_scalar(tf.reduce_max(lengths)) else: max_len = builtins.max(lengths) if l_min is not None: max_len = builtins.max(max_len, l_min) batch_size = len(lengths) ndim = input.ndim tensor_list = [] cum_row = 0 pad_nparray = np.zeros((ndim, 2), dtype=np.int32) for l in lengths: t = input[cum_row : cum_row + l] pad_nparray[0, 1] = max_len - l t = tf.pad( t, tf.constant(pad_nparray), mode="CONSTANT", constant_values=value ) tensor_list.append(t) cum_row += l return tf.stack(tensor_list, axis=0) def pack_padded_tensor(input, lengths): out_list = [] for i, l in enumerate(lengths): t = input[i] out = t[:l] out_list.append(out) return tf.concat(out_list, axis=0) def boolean_mask(input, mask): return tf.boolean_mask(input, mask) def equal(x, y): return x == y def allclose(x, y, rtol=1e-4, atol=1e-4): return np.allclose( tf.convert_to_tensor(x).numpy(), tf.convert_to_tensor(y).numpy(), rtol=rtol, atol=atol, ) def logical_not(input): return ~input def logical_and(input1, input2): return tf.math.logical_and(input1, input2) def clone(input): # TF tensor is always immutable so returning the input is safe. return input def clamp(data, min_val, max_val): return tf.clip_by_value(data, min_val, max_val) def replace_inf_with_zero(x): return tf.where(tf.abs(x) == np.inf, 0, x) def count_nonzero(input): return int(tf.math.count_nonzero(input)) def unique(input, return_inverse=False, return_counts=False): if return_inverse and return_counts: return tf.unique_with_counts(input) elif return_counts: result = tf.unique_with_counts(input) return result.y, result.count elif return_inverse: return tf.unique(input) else: return tf.unique(input).y def full_1d(length, fill_value, dtype, ctx): with tf.device(ctx): t = tf.fill([length], value=fill_value) t = tf.cast(t, dtype=dtype) return t def nonzero_1d(input): nonzero_bool = tf.cast(input, tf.bool) return tf.reshape(tf.where(nonzero_bool), (-1,)) def sort_1d(input): return tf.sort(input), tf.cast(tf.argsort(input), dtype=tf.int64) def arange(start, stop, dtype=tf.int64, ctx=None): if not ctx: ctx = "/cpu:0" with tf.device(ctx): t = tf.range(start, stop, dtype=dtype) return t def rand_shuffle(arr): return tf.random.shuffle(arr) def zerocopy_to_numpy(input): return np.asarray(memoryview(input)) def zerocopy_from_numpy(np_array): # NOTE: not zerocopy # This assumes tensor should be on cpu with tf.device("/cpu:0"): t = tf.convert_to_tensor(np_array) return t def zerocopy_to_dgl_ndarray(data): if device_type(data.device) == "gpu" and data.dtype in (tf.int32, tf.int64): # NOTE: TF doesn't keep signed tensors on GPU due to legacy issues with # shape inference. Convert it to unsigned and cast it back afterwards. if data.dtype == tf.int32: data = tf.cast(data, tf.uint32) elif data.dtype == tf.int64: data = tf.cast(data, tf.uint64) return nd.cast_to_signed(nd.from_dlpack(zerocopy_to_dlpack(data))) else: return nd.from_dlpack(zerocopy_to_dlpack(data)) def zerocopy_to_dgl_ndarray_for_write(input): return zerocopy_to_dgl_ndarray(input) def zerocopy_from_dgl_ndarray(input): return zerocopy_from_dlpack(input.to_dlpack()) def sync(): context = context().context() context.async_wait() class GradContext: def __init__(self): self.tensor_for_grad = [] self.grad_list = [] self.tape = None def set_tape(self, tape): self.tape = tape def add_tensor(self, x): idx_pop = [] for idx, ele in enumerate(self.tensor_for_grad): if ele._id == x._id: idx_pop.append(idx) if len(idx_pop) > 0: self.tensor_for_grad.pop(idx_pop[0]) if self.tape is not None: self.tape.watch(x) self.tensor_for_grad.append(x) def backward(self, x, head_gradient=None): if head_gradient is not None: x = x * head_gradient self.grad_list = self.tape.gradient(x, self.tensor_for_grad) def is_no_grad(self, x): idx_pop = [] for idx, ele in enumerate(self.tensor_for_grad): if ele._id == x._id: idx_pop.append(idx) if len(idx_pop) == 0: return True else: return self.grad_list[idx_pop[0]] is None def grad(self, x): idx_pop = [] for idx, ele in enumerate(self.tensor_for_grad): if ele._id == x._id: idx_pop.append(idx) assert len(idx_pop) == 1 t = self.grad_list[idx_pop[0]] return tf.convert_to_tensor(t) cgrad = GradContext() def get_cgrad(): return cgrad class record_grad: def __init__(self): self.tape = tf.GradientTape() def __enter__(self): cgrad.set_tape(self.tape) self.tape.__enter__() for x in cgrad.tensor_for_grad: self.tape.watch(x) def __exit__(self, exc_type, exc_value, exc_traceback): # pass self.tape.__exit__(exc_type, exc_value, exc_traceback) cgrad.tape = None def attach_grad(x): cgrad.add_tensor(x) return x def backward(x, head_gradient=None): cgrad.backward(x, head_gradient) def grad(x): return cgrad.grad(x) def is_no_grad(x): return cgrad.is_no_grad(x) def is_recording(): raise NotImplementedError("Tensorflow doesn't support is_recording") no_grad = None initialize_context() ================================================ FILE: python/dgl/base.py ================================================ """Module for base types and utilities.""" from __future__ import absolute_import import warnings from ._ffi.base import DGLError # pylint: disable=unused-import from ._ffi.function import _init_internal_api # A special symbol for selecting all nodes or edges. ALL = "__ALL__" # An alias for [:] SLICE_FULL = slice(None, None, None) # Reserved column names for storing parent node/edge types and IDs in flattened heterographs NTYPE = "_TYPE" NID = "_ID" ETYPE = "_TYPE" EID = "_ID" _INTERNAL_COLUMNS = {NTYPE, NID, ETYPE, EID} def is_internal_column(name): """Return true if the column name is reversed by DGL.""" return name in _INTERNAL_COLUMNS def is_all(arg): """Return true if the argument is a special symbol for all nodes or edges.""" return isinstance(arg, str) and arg == ALL # pylint: disable=invalid-name _default_formatwarning = warnings.formatwarning class DGLWarning(UserWarning): """DGL Warning class.""" # pylint: disable=unused-argument def dgl_warning_format(message, category, filename, lineno, line=None): """Format DGL warnings.""" if isinstance(category, DGLWarning): return "DGL Warning: {}\n".format(message) else: return _default_formatwarning( message, category, filename, lineno, line=None ) def dgl_warning(message, category=DGLWarning, stacklevel=2): """DGL warning wrapper that defaults to ``DGLWarning`` instead of ``UserWarning`` category.""" return warnings.warn(message, category=category, stacklevel=stacklevel) warnings.formatwarning = dgl_warning_format _init_internal_api() ================================================ FILE: python/dgl/batch.py ================================================ """Utilities for batching/unbatching graphs.""" from collections.abc import Mapping from . import backend as F, convert, utils from .base import ALL, DGLError, EID, is_all, NID from .heterograph import DGLGraph from .heterograph_index import disjoint_union, slice_gidx __all__ = ["batch", "unbatch", "slice_batch"] def batch(graphs, ndata=ALL, edata=ALL): r"""Batch a collection of :class:`DGLGraph` s into one graph for more efficient graph computation. Each input graph becomes one disjoint component of the batched graph. The nodes and edges are relabeled to be disjoint segments: ================= ========= ================= === ========= graphs[0] graphs[1] ... graphs[k] ================= ========= ================= === ========= Original node ID 0 ~ N_0 0 ~ N_1 ... 0 ~ N_k New node ID 0 ~ N_0 N_0 ~ N_0+N_1 ... \sum_{i=0}^{k-1} N_i ~ \sum_{i=0}^k N_i ================= ========= ================= === ========= Because of this, many of the computations on a batched graph are the same as if performed on each graph individually, but become much more efficient since they can be parallelized easily. This makes ``dgl.batch`` very useful for tasks dealing with many graph samples such as graph classification tasks. For heterograph inputs, they must share the same set of relations (i.e., node types and edge types) and the function will perform batching on each relation one by one. Thus, the result is also a heterograph and has the same set of relations as the inputs. The numbers of nodes and edges of the input graphs are accessible via the :func:`DGLGraph.batch_num_nodes` and :func:`DGLGraph.batch_num_edges` attributes of the resulting graph. For homogeneous graphs, they are 1D integer tensors, with each element being the number of nodes/edges of the corresponding input graph. For heterographs, they are dictionaries of 1D integer tensors, with node type or edge type as the keys. The function supports batching batched graphs. The batch size of the result graph is the sum of the batch sizes of all the input graphs. By default, node/edge features are batched by concatenating the feature tensors of all input graphs. This thus requires features of the same name to have the same data type and feature size. One can pass ``None`` to the ``ndata`` or ``edata`` argument to prevent feature batching, or pass a list of strings to specify which features to batch. To unbatch the graph back to a list, use the :func:`dgl.unbatch` function. Parameters ---------- graphs : list[DGLGraph] Input graphs. ndata : list[str], None, optional Node features to batch. edata : list[str], None, optional Edge features to batch. Returns ------- DGLGraph Batched graph. Examples -------- Batch homogeneous graphs >>> import dgl >>> import torch as th >>> # 4 nodes, 3 edges >>> g1 = dgl.graph((th.tensor([0, 1, 2]), th.tensor([1, 2, 3]))) >>> # 3 nodes, 4 edges >>> g2 = dgl.graph((th.tensor([0, 0, 0, 1]), th.tensor([0, 1, 2, 0]))) >>> bg = dgl.batch([g1, g2]) >>> bg Graph(num_nodes=7, num_edges=7, ndata_schemes={} edata_schemes={}) >>> bg.batch_size 2 >>> bg.batch_num_nodes() tensor([4, 3]) >>> bg.batch_num_edges() tensor([3, 4]) >>> bg.edges() (tensor([0, 1, 2, 4, 4, 4, 5], tensor([1, 2, 3, 4, 5, 6, 4])) Batch batched graphs >>> bbg = dgl.batch([bg, bg]) >>> bbg.batch_size 4 >>> bbg.batch_num_nodes() tensor([4, 3, 4, 3]) >>> bbg.batch_num_edges() tensor([3, 4, 3, 4]) Batch graphs with feature data >>> g1.ndata['x'] = th.zeros(g1.num_nodes(), 3) >>> g1.edata['w'] = th.ones(g1.num_edges(), 2) >>> g2.ndata['x'] = th.ones(g2.num_nodes(), 3) >>> g2.edata['w'] = th.zeros(g2.num_edges(), 2) >>> bg = dgl.batch([g1, g2]) >>> bg.ndata['x'] tensor([[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [1, 1, 1], [1, 1, 1], [1, 1, 1]]) >>> bg.edata['w'] tensor([[1, 1], [1, 1], [1, 1], [0, 0], [0, 0], [0, 0], [0, 0]]) Batch heterographs >>> hg1 = dgl.heterograph({ ... ('user', 'plays', 'game') : (th.tensor([0, 1]), th.tensor([0, 0]))}) >>> hg2 = dgl.heterograph({ ... ('user', 'plays', 'game') : (th.tensor([0, 0, 0]), th.tensor([1, 0, 2]))}) >>> bhg = dgl.batch([hg1, hg2]) >>> bhg Graph(num_nodes={'user': 3, 'game': 4}, num_edges={('user', 'plays', 'game'): 5}, metagraph=[('drug', 'game')]) >>> bhg.batch_size 2 >>> bhg.batch_num_nodes() {'user' : tensor([2, 1]), 'game' : tensor([1, 3])} >>> bhg.batch_num_edges() {('user', 'plays', 'game') : tensor([2, 3])} See Also -------- unbatch """ if len(graphs) == 0: raise DGLError("The input list of graphs cannot be empty.") if not (is_all(ndata) or isinstance(ndata, list) or ndata is None): raise DGLError( "Invalid argument ndata: must be a string list but got {}.".format( type(ndata) ) ) if not (is_all(edata) or isinstance(edata, list) or edata is None): raise DGLError( "Invalid argument edata: must be a string list but got {}.".format( type(edata) ) ) if any(g.is_block for g in graphs): raise DGLError("Batching a MFG is not supported.") relations = list(graphs[0].canonical_etypes) relation_ids = [graphs[0].get_etype_id(r) for r in relations] ntypes = list(graphs[0].ntypes) ntype_ids = [graphs[0].get_ntype_id(n) for n in ntypes] etypes = [etype for _, etype, _ in relations] gidx = disjoint_union( graphs[0]._graph.metagraph, [g._graph for g in graphs] ) retg = DGLGraph(gidx, ntypes, etypes) # Compute batch num nodes bnn = {} for ntype in ntypes: bnn[ntype] = F.cat([g.batch_num_nodes(ntype) for g in graphs], 0) retg.set_batch_num_nodes(bnn) # Compute batch num edges bne = {} for etype in relations: bne[etype] = F.cat([g.batch_num_edges(etype) for g in graphs], 0) retg.set_batch_num_edges(bne) # Batch node feature if ndata is not None: for ntype_id, ntype in zip(ntype_ids, ntypes): all_empty = all(g._graph.num_nodes(ntype_id) == 0 for g in graphs) frames = [ g._node_frames[ntype_id] for g in graphs if g._graph.num_nodes(ntype_id) > 0 or all_empty ] # TODO: do we require graphs with no nodes/edges to have the same schema? Currently # we allow empty graphs to have no features during batching. ret_feat = _batch_feat_dicts( frames, ndata, 'nodes["{}"].data'.format(ntype) ) retg.nodes[ntype].data.update(ret_feat) # Batch edge feature if edata is not None: for etype_id, etype in zip(relation_ids, relations): all_empty = all(g._graph.num_edges(etype_id) == 0 for g in graphs) frames = [ g._edge_frames[etype_id] for g in graphs if g._graph.num_edges(etype_id) > 0 or all_empty ] # TODO: do we require graphs with no nodes/edges to have the same schema? Currently # we allow empty graphs to have no features during batching. ret_feat = _batch_feat_dicts( frames, edata, "edges[{}].data".format(etype) ) retg.edges[etype].data.update(ret_feat) return retg def _batch_feat_dicts(frames, keys, feat_dict_name): """Internal function to batch feature dictionaries. Parameters ---------- frames : list[Frame] List of frames keys : list[str] Feature keys. Can be '__ALL__', meaning batching all features. feat_dict_name : str Name of the feature dictionary for reporting errors. Returns ------- dict[str, Tensor] New feature dict. """ if len(frames) == 0: return {} schemas = [frame.schemes for frame in frames] # sanity checks if is_all(keys): utils.check_all_same_schema(schemas, feat_dict_name) keys = schemas[0].keys() else: utils.check_all_same_schema_for_keys(schemas, keys, feat_dict_name) # concat features ret_feat = {k: F.cat([fd[k] for fd in frames], 0) for k in keys} return ret_feat def unbatch(g, node_split=None, edge_split=None): """Revert the batch operation by split the given graph into a list of small ones. This is the reverse operation of :func:``dgl.batch``. If the ``node_split`` or the ``edge_split`` is not given, it calls :func:`DGLGraph.batch_num_nodes` and :func:`DGLGraph.batch_num_edges` of the input graph to get the information. If the ``node_split`` or the ``edge_split`` arguments are given, it will partition the graph according to the given segments. One must assure that the partition is valid -- edges of the i^th graph only connect nodes belong to the i^th graph. Otherwise, DGL will throw an error. The function supports heterograph input, in which case the two split section arguments shall be of dictionary type -- similar to the :func:`DGLGraph.batch_num_nodes` and :func:`DGLGraph.batch_num_edges` attributes of a heterograph. Parameters ---------- g : DGLGraph Input graph to unbatch. node_split : Tensor, dict[str, Tensor], optional Number of nodes of each result graph. edge_split : Tensor, dict[str, Tensor], optional Number of edges of each result graph. Returns ------- list[DGLGraph] Unbatched list of graphs. Examples -------- Unbatch a batched graph >>> import dgl >>> import torch as th >>> # 4 nodes, 3 edges >>> g1 = dgl.graph((th.tensor([0, 1, 2]), th.tensor([1, 2, 3]))) >>> # 3 nodes, 4 edges >>> g2 = dgl.graph((th.tensor([0, 0, 0, 1]), th.tensor([0, 1, 2, 0]))) >>> # add features >>> g1.ndata['x'] = th.zeros(g1.num_nodes(), 3) >>> g1.edata['w'] = th.ones(g1.num_edges(), 2) >>> g2.ndata['x'] = th.ones(g2.num_nodes(), 3) >>> g2.edata['w'] = th.zeros(g2.num_edges(), 2) >>> bg = dgl.batch([g1, g2]) >>> f1, f2 = dgl.unbatch(bg) >>> f1 Graph(num_nodes=4, num_edges=3, ndata_schemes={‘x’ : Scheme(shape=(3,), dtype=torch.float32)} edata_schemes={‘w’ : Scheme(shape=(2,), dtype=torch.float32)}) >>> f2 Graph(num_nodes=3, num_edges=4, ndata_schemes={‘x’ : Scheme(shape=(3,), dtype=torch.float32)} edata_schemes={‘w’ : Scheme(shape=(2,), dtype=torch.float32)}) With provided split arguments: >>> g1 = dgl.graph((th.tensor([0, 1, 2]), th.tensor([1, 2, 3]))) >>> g2 = dgl.graph((th.tensor([0, 0, 0, 1]), th.tensor([0, 1, 2, 0]))) >>> g3 = dgl.graph((th.tensor([0]), th.tensor([1]))) >>> bg = dgl.batch([g1, g2, g3]) >>> bg.batch_num_nodes() tensor([4, 3, 2]) >>> bg.batch_num_edges() tensor([3, 4, 1]) >>> # unbatch but merge g2 and g3 >>> f1, f2 = dgl.unbatch(bg, th.tensor([4, 5]), th.tensor([3, 5])) >>> f1 Graph(num_nodes=4, num_edges=3, ndata_schemes={} edata_schemes={}) >>> f2 Graph(num_nodes=5, num_edges=5, ndata_schemes={} edata_schemes={}) Heterograph input >>> hg1 = dgl.heterograph({ ... ('user', 'plays', 'game') : (th.tensor([0, 1]), th.tensor([0, 0]))}) >>> hg2 = dgl.heterograph({ ... ('user', 'plays', 'game') : (th.tensor([0, 0, 0]), th.tensor([1, 0, 2]))}) >>> bhg = dgl.batch([hg1, hg2]) >>> f1, f2 = dgl.unbatch(bhg) >>> f1 Graph(num_nodes={'user': 2, 'game': 1}, num_edges={('user', 'plays', 'game'): 2}, metagraph=[('drug', 'game')]) >>> f2 Graph(num_nodes={'user': 1, 'game': 3}, num_edges={('user', 'plays', 'game'): 3}, metagraph=[('drug', 'game')]) See Also -------- batch """ num_split = None # Parse node_split if node_split is None: node_split = {ntype: g.batch_num_nodes(ntype) for ntype in g.ntypes} elif not isinstance(node_split, Mapping): if len(g.ntypes) != 1: raise DGLError( "Must provide a dictionary for argument node_split when" " there are multiple node types." ) node_split = {g.ntypes[0]: node_split} if node_split.keys() != set(g.ntypes): raise DGLError("Must specify node_split for each node type.") for split in node_split.values(): if num_split is not None and num_split != len(split): raise DGLError( "All node_split and edge_split must specify the same number" " of split sizes." ) num_split = len(split) # Parse edge_split if edge_split is None: edge_split = { etype: g.batch_num_edges(etype) for etype in g.canonical_etypes } elif not isinstance(edge_split, Mapping): if len(g.etypes) != 1: raise DGLError( "Must provide a dictionary for argument edge_split when" " there are multiple edge types." ) edge_split = {g.canonical_etypes[0]: edge_split} if edge_split.keys() != set(g.canonical_etypes): raise DGLError("Must specify edge_split for each canonical edge type.") for split in edge_split.values(): if num_split is not None and num_split != len(split): raise DGLError( "All edge_split and edge_split must specify the same number" " of split sizes." ) num_split = len(split) node_split = { k: F.asnumpy(split).tolist() for k, split in node_split.items() } edge_split = { k: F.asnumpy(split).tolist() for k, split in edge_split.items() } # Split edges for each relation edge_dict_per = [{} for i in range(num_split)] for rel in g.canonical_etypes: srctype, etype, dsttype = rel srcnid_off = dstnid_off = 0 u, v = g.edges(order="eid", etype=rel) us = F.split(u, edge_split[rel], 0) vs = F.split(v, edge_split[rel], 0) for i, (subu, subv) in enumerate(zip(us, vs)): edge_dict_per[i][rel] = (subu - srcnid_off, subv - dstnid_off) srcnid_off += node_split[srctype][i] dstnid_off += node_split[dsttype][i] num_nodes_dict_per = [ {k: split[i] for k, split in node_split.items()} for i in range(num_split) ] # Create graphs gs = [ convert.heterograph(edge_dict, num_nodes_dict, idtype=g.idtype) for edge_dict, num_nodes_dict in zip(edge_dict_per, num_nodes_dict_per) ] # Unbatch node features for ntype in g.ntypes: for key, feat in g.nodes[ntype].data.items(): subfeats = F.split(feat, node_split[ntype], 0) for subg, subf in zip(gs, subfeats): subg.nodes[ntype].data[key] = subf # Unbatch edge features for etype in g.canonical_etypes: for key, feat in g.edges[etype].data.items(): subfeats = F.split(feat, edge_split[etype], 0) for subg, subf in zip(gs, subfeats): subg.edges[etype].data[key] = subf return gs def slice_batch(g, gid, store_ids=False): """Get a particular graph from a batch of graphs. Parameters ---------- g : DGLGraph Input batched graph. gid : int The ID of the graph to retrieve. store_ids : bool If True, it will store the raw IDs of the extracted nodes and edges in the ``ndata`` and ``edata`` of the resulting graph under name ``dgl.NID`` and ``dgl.EID``, respectively. Returns ------- DGLGraph Retrieved graph. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Create a batched graph. >>> g1 = dgl.graph(([0, 1], [2, 3])) >>> g2 = dgl.graph(([1], [2])) >>> bg = dgl.batch([g1, g2]) Get the second component graph. >>> g = dgl.slice_batch(bg, 1) >>> print(g) Graph(num_nodes=3, num_edges=1, ndata_schemes={} edata_schemes={}) """ start_nid = [] num_nodes = [] for ntype in g.ntypes: batch_num_nodes = g.batch_num_nodes(ntype) num_nodes.append(F.as_scalar(batch_num_nodes[gid])) if gid == 0: start_nid.append(0) else: start_nid.append( F.as_scalar(F.sum(F.slice_axis(batch_num_nodes, 0, 0, gid), 0)) ) start_eid = [] num_edges = [] for etype in g.canonical_etypes: batch_num_edges = g.batch_num_edges(etype) num_edges.append(F.as_scalar(batch_num_edges[gid])) if gid == 0: start_eid.append(0) else: start_eid.append( F.as_scalar(F.sum(F.slice_axis(batch_num_edges, 0, 0, gid), 0)) ) # Slice graph structure gidx = slice_gidx( g._graph, utils.toindex(num_nodes), utils.toindex(start_nid), utils.toindex(num_edges), utils.toindex(start_eid), ) retg = DGLGraph(gidx, g.ntypes, g.etypes) # Slice node features for ntid, ntype in enumerate(g.ntypes): stnid = start_nid[ntid] for key, feat in g.nodes[ntype].data.items(): subfeats = F.slice_axis(feat, 0, stnid, stnid + num_nodes[ntid]) retg.nodes[ntype].data[key] = subfeats if store_ids: retg.nodes[ntype].data[NID] = F.arange( stnid, stnid + num_nodes[ntid], retg.idtype, retg.device ) # Slice edge features for etid, etype in enumerate(g.canonical_etypes): steid = start_eid[etid] for key, feat in g.edges[etype].data.items(): subfeats = F.slice_axis(feat, 0, steid, steid + num_edges[etid]) retg.edges[etype].data[key] = subfeats if store_ids: retg.edges[etype].data[EID] = F.arange( steid, steid + num_edges[etid], retg.idtype, retg.device ) return retg ================================================ FILE: python/dgl/container.py ================================================ """Container data structures used in DGL runtime. reference: tvm/python/tvm/collections.py """ from __future__ import absolute_import as _abs from . import _api_internal from ._ffi.object import ObjectBase, register_object from ._ffi.object_generic import convert_to_object @register_object class List(ObjectBase): """List container of DGL. You do not need to create List explicitly. Normally python list and tuple will be converted automatically to List during dgl function call. You may get List in return values of DGL function call. """ def __getitem__(self, i): if isinstance(i, slice): start = i.start if i.start is not None else 0 stop = i.stop if i.stop is not None else len(self) step = i.step if i.step is not None else 1 if start < 0: start += len(self) if stop < 0: stop += len(self) return [self[idx] for idx in range(start, stop, step)] if i < -len(self) or i >= len(self): raise IndexError( "List index out of range. List size: {}, got index {}".format( len(self), i ) ) if i < 0: i += len(self) ret = _api_internal._ListGetItem(self, i) if isinstance(ret, Value): ret = ret.data return ret def __len__(self): return _api_internal._ListSize(self) @register_object class Map(ObjectBase): """Map container of DGL. You do not need to create Map explicitly. Normally python dict will be converted automaticall to Map during dgl function call. You can use convert to create a dict[ObjectBase-> ObjectBase] into a Map """ def __getitem__(self, k): return _api_internal._MapGetItem(self, k) def __contains__(self, k): return _api_internal._MapCount(self, k) != 0 def items(self): """Get the items from the map""" akvs = _api_internal._MapItems(self) return [(akvs[i], akvs[i + 1]) for i in range(0, len(akvs), 2)] def __len__(self): return _api_internal._MapSize(self) @register_object class StrMap(Map): """A special map container that has str as key. You can use convert to create a dict[str->ObjectBase] into a Map. """ def items(self): """Get the items from the map""" akvs = _api_internal._MapItems(self) return [(akvs[i], akvs[i + 1]) for i in range(0, len(akvs), 2)] @register_object class Value(ObjectBase): """Object wrapper for various values.""" @property def data(self): """Return the value data.""" return _api_internal._ValueGet(self) def convert_to_strmap(value): """Convert a python dictionary to a dgl.contrainer.StrMap""" assert isinstance(value, dict), "Only support dict" if len(value) == 0: return _api_internal._EmptyStrMap() else: return convert_to_object(value) ================================================ FILE: python/dgl/convert.py ================================================ """Module for converting graph from/to other object.""" from collections import defaultdict from collections.abc import Mapping import networkx as nx import numpy as np from scipy.sparse import spmatrix from . import backend as F, graph_index, heterograph_index, utils from .base import DGLError, EID, ETYPE, NID, NTYPE from .heterograph import combine_frames, DGLBlock, DGLGraph __all__ = [ "graph", "hetero_from_shared_memory", "heterograph", "create_block", "block_to_graph", "to_heterogeneous", "to_homogeneous", "from_scipy", "bipartite_from_scipy", "from_networkx", "bipartite_from_networkx", "to_networkx", "from_cugraph", "to_cugraph", ] def graph( data, *, num_nodes=None, idtype=None, device=None, row_sorted=False, col_sorted=False, ): """Create a graph and return. Parameters ---------- data : graph data The data for constructing a graph, which takes the form of :math:`(U, V)`. :math:`(U[i], V[i])` forms the edge with ID :math:`i` in the graph. The allowed data formats are: - ``(Tensor, Tensor)``: Each tensor must be a 1D tensor containing node IDs. DGL calls this format "tuple of node-tensors". The tensors should have the same data type of int32/int64 and device context (see below the descriptions of :attr:`idtype` and :attr:`device`). - ``('coo', (Tensor, Tensor))``: Same as ``(Tensor, Tensor)``. - ``('csr', (Tensor, Tensor, Tensor))``: The three tensors form the CSR representation of the graph's adjacency matrix. The first one is the row index pointer. The second one is the column indices. The third one is the edge IDs, which can be empty to represent consecutive integer IDs starting from 0. - ``('csc', (Tensor, Tensor, Tensor))``: The three tensors form the CSC representation of the graph's adjacency matrix. The first one is the column index pointer. The second one is the row indices. The third one is the edge IDs, which can be empty to represent consecutive integer IDs starting from 0. The tensors can be replaced with any iterable of integers (e.g. list, tuple, numpy.ndarray). num_nodes : int, optional The number of nodes in the graph. If not given, this will be the largest node ID plus 1 from the :attr:`data` argument. If given and the value is no greater than the largest node ID from the :attr:`data` argument, DGL will raise an error. idtype : int32 or int64, optional The data type for storing the structure-related graph information such as node and edge IDs. It should be a framework-specific data type object (e.g., ``torch.int32``). If ``None`` (default), DGL infers the ID type from the :attr:`data` argument. See "Notes" for more details. device : device context, optional The device of the returned graph, which should be a framework-specific device object (e.g., ``torch.device``). If ``None`` (default), DGL uses the device of the tensors of the :attr:`data` argument. If :attr:`data` is not a tuple of node-tensors, the returned graph is on CPU. If the specified :attr:`device` differs from that of the provided tensors, it casts the given tensors to the specified device first. row_sorted : bool, optional Whether or not the rows of the COO are in ascending order. col_sorted : bool, optional Whether or not the columns of the COO are in ascending order within each row. This only has an effect when ``row_sorted`` is True. Returns ------- DGLGraph The created graph. Notes ----- 1. If the :attr:`idtype` argument is not given then: - in the case of the tuple of node-tensor format, DGL uses the data type of the given ID tensors. - in the case of the tuple of sequence format, DGL uses int64. Once the graph has been created, you can change the data type by using :func:`dgl.DGLGraph.long` or :func:`dgl.DGLGraph.int`. If the specified :attr:`idtype` argument differs from the data type of the provided tensors, it casts the given tensors to the specified data type first. 2. The most efficient construction approach is to provide a tuple of node tensors without specifying :attr:`idtype` and :attr:`device`. This is because the returned graph shares the storage with the input node-tensors in this case. 3. DGL internally maintains multiple copies of the graph structure in different `sparse formats `_ and chooses the most efficient one depending on the computation invoked. If memory usage becomes an issue in the case of large graphs, use :func:`dgl.DGLGraph.formats` to restrict the allowed formats. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Create a small three-edge graph. >>> # Source nodes for edges (2, 1), (3, 2), (4, 3) >>> src_ids = torch.tensor([2, 3, 4]) >>> # Destination nodes for edges (2, 1), (3, 2), (4, 3) >>> dst_ids = torch.tensor([1, 2, 3]) >>> g = dgl.graph((src_ids, dst_ids)) Explicitly specify the number of nodes in the graph. >>> g = dgl.graph((src_ids, dst_ids), num_nodes=100) Create a graph on the first GPU with data type int32. >>> g = dgl.graph((src_ids, dst_ids), idtype=torch.int32, device='cuda:0') Creating a graph with CSR representation: >>> g = dgl.graph(('csr', ([0, 0, 0, 1, 2, 3], [1, 2, 3], []))) Create the same graph with CSR representation and edge IDs. >>> g = dgl.graph(('csr', ([0, 0, 0, 1, 2, 3], [1, 2, 3], [0, 1, 2]))) See Also -------- from_scipy from_networkx """ if isinstance(data, spmatrix): raise DGLError( "dgl.graph no longer supports graph construction from a SciPy " "sparse matrix, use dgl.from_scipy instead." ) if isinstance(data, nx.Graph): raise DGLError( "dgl.graph no longer supports graph construction from a NetworkX " "graph, use dgl.from_networkx instead." ) (sparse_fmt, arrays), urange, vrange = utils.graphdata2tensors(data, idtype) if num_nodes is not None: # override the number of nodes if num_nodes < max(urange, vrange): raise DGLError( "The num_nodes argument must be larger than the max ID in the data," " but got {} and {}.".format(num_nodes, max(urange, vrange) - 1) ) urange, vrange = num_nodes, num_nodes g = create_from_edges( sparse_fmt, arrays, "_N", "_E", "_N", urange, vrange, row_sorted=row_sorted, col_sorted=col_sorted, ) return g.to(device) def hetero_from_shared_memory(name): """Create a heterograph from shared memory with the given name. The newly created graph will have the same node types and edge types as the original graph. But it does not have node features or edges features. Paramaters ---------- name : str The name of the share memory Returns ------- HeteroGraph (in shared memory) """ g, ntypes, etypes = heterograph_index.create_heterograph_from_shared_memory( name ) return DGLGraph(g, ntypes, etypes) def heterograph(data_dict, num_nodes_dict=None, idtype=None, device=None): """Create a heterogeneous graph and return. Parameters ---------- data_dict : graph data The dictionary data for constructing a heterogeneous graph. The keys are in the form of string triplets (src_type, edge_type, dst_type), specifying the source node, edge, and destination node types. The values are graph data in the form of :math:`(U, V)`, where :math:`(U[i], V[i])` forms the edge with ID :math:`i`. The allowed graph data formats are: - ``(Tensor, Tensor)``: Each tensor must be a 1D tensor containing node IDs. DGL calls this format "tuple of node-tensors". The tensors should have the same data type, which must be either int32 or int64. They should also have the same device context (see below the descriptions of :attr:`idtype` and :attr:`device`). - ``('coo', (Tensor, Tensor))``: Same as ``(Tensor, Tensor)``. - ``('csr', (Tensor, Tensor, Tensor))``: The three tensors form the CSR representation of the graph's adjacency matrix. The first one is the row index pointer. The second one is the column indices. The third one is the edge IDs, which can be empty (i.e. with 0 elements) to represent consecutive integer IDs starting from 0. - ``('csc', (Tensor, Tensor, Tensor))``: The three tensors form the CSC representation of the graph's adjacency matrix. The first one is the column index pointer. The second one is the row indices. The third one is the edge IDs, which can be empty to represent consecutive integer IDs starting from 0. The tensors can be replaced with any iterable of integers (e.g. list, tuple, numpy.ndarray). num_nodes_dict : dict[str, int], optional The number of nodes for some node types, which is a dictionary mapping a node type :math:`T` to the number of :math:`T`-typed nodes. If not given for a node type :math:`T`, DGL finds the largest ID appearing in *every* graph data whose source or destination node type is :math:`T`, and sets the number of nodes to be that ID plus one. If given and the value is no greater than the largest ID for some node type, DGL will raise an error. By default, DGL infers the number of nodes for all node types. idtype : int32 or int64, optional The data type for storing the structure-related graph information such as node and edge IDs. It should be a framework-specific data type object (e.g., ``torch.int32``). If ``None`` (default), DGL infers the ID type from the :attr:`data_dict` argument. device : device context, optional The device of the returned graph, which should be a framework-specific device object (e.g., ``torch.device``). If ``None`` (default), DGL uses the device of the tensors of the :attr:`data` argument. If :attr:`data` is not a tuple of node-tensors, the returned graph is on CPU. If the specified :attr:`device` differs from that of the provided tensors, it casts the given tensors to the specified device first. Returns ------- DGLGraph The created graph. Notes ----- 1. If the :attr:`idtype` argument is not given then: - in the case of the tuple of node-tensor format, DGL uses the data type of the given ID tensors. - in the case of the tuple of sequence format, DGL uses int64. Once the graph has been created, you can change the data type by using :func:`dgl.DGLGraph.long` or :func:`dgl.DGLGraph.int`. If the specified :attr:`idtype` argument differs from the data type of the provided tensors, it casts the given tensors to the specified data type first. 2. The most efficient construction approach is to provide a tuple of node tensors without specifying :attr:`idtype` and :attr:`device`. This is because the returned graph shares the storage with the input node-tensors in this case. 3. DGL internally maintains multiple copies of the graph structure in different sparse formats and chooses the most efficient one depending on the computation invoked. If memory usage becomes an issue in the case of large graphs, use :func:`dgl.DGLGraph.formats` to restrict the allowed formats. 4. DGL internally decides a deterministic order for the same set of node types and canonical edge types, which does not necessarily follow the order in :attr:`data_dict`. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Create a heterograph with three canonical edge types. >>> data_dict = { ... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])), ... ('user', 'follows', 'topic'): (torch.tensor([1, 1]), torch.tensor([1, 2])), ... ('user', 'plays', 'game'): (torch.tensor([0, 3]), torch.tensor([3, 4])) ... } >>> g = dgl.heterograph(data_dict) >>> g Graph(num_nodes={'game': 5, 'topic': 3, 'user': 4}, num_edges={('user', 'follows', 'topic'): 2, ('user', 'follows', 'user'): 2, ('user', 'plays', 'game'): 2}, metagraph=[('user', 'topic', 'follows'), ('user', 'user', 'follows'), ('user', 'game', 'plays')]) Explicitly specify the number of nodes for each node type in the graph. >>> num_nodes_dict = {'user': 4, 'topic': 4, 'game': 6} >>> g = dgl.heterograph(data_dict, num_nodes_dict=num_nodes_dict) Create a graph on the first GPU with data type int32. >>> g = dgl.heterograph(data_dict, idtype=torch.int32, device='cuda:0') """ # Convert all data to node tensors first node_tensor_dict = {} need_infer = num_nodes_dict is None if num_nodes_dict is None: num_nodes_dict = defaultdict(int) for (sty, ety, dty), data in data_dict.items(): if isinstance(data, spmatrix): raise DGLError( "dgl.heterograph no longer supports graph construction from a SciPy " "sparse matrix, use dgl.from_scipy instead." ) if isinstance(data, nx.Graph): raise DGLError( "dgl.heterograph no longer supports graph construction from a NetworkX " "graph, use dgl.from_networkx instead." ) is_bipartite = sty != dty (sparse_fmt, arrays), urange, vrange = utils.graphdata2tensors( data, idtype, bipartite=is_bipartite ) node_tensor_dict[(sty, ety, dty)] = (sparse_fmt, arrays) if need_infer: num_nodes_dict[sty] = max(num_nodes_dict[sty], urange) num_nodes_dict[dty] = max(num_nodes_dict[dty], vrange) else: # sanity check if num_nodes_dict[sty] < urange: raise DGLError( "The given number of nodes of node type {} must be larger than" " the max ID in the data, but got {} and {}.".format( sty, num_nodes_dict[sty], urange - 1 ) ) if num_nodes_dict[dty] < vrange: raise DGLError( "The given number of nodes of node type {} must be larger than" " the max ID in the data, but got {} and {}.".format( dty, num_nodes_dict[dty], vrange - 1 ) ) # Create the graph ( metagraph, ntypes, etypes, relations, ) = heterograph_index.create_metagraph_index( num_nodes_dict.keys(), node_tensor_dict.keys() ) num_nodes_per_type = utils.toindex( [num_nodes_dict[ntype] for ntype in ntypes], "int64" ) rel_graphs = [] for srctype, etype, dsttype in relations: sparse_fmt, arrays = node_tensor_dict[(srctype, etype, dsttype)] g = create_from_edges( sparse_fmt, arrays, srctype, etype, dsttype, num_nodes_dict[srctype], num_nodes_dict[dsttype], ) rel_graphs.append(g) # create graph index hgidx = heterograph_index.create_heterograph_from_relations( metagraph, [rgrh._graph for rgrh in rel_graphs], num_nodes_per_type ) retg = DGLGraph(hgidx, ntypes, etypes) return retg.to(device) def create_block( data_dict, num_src_nodes=None, num_dst_nodes=None, idtype=None, device=None, node_count_check=True, ): """Create a message flow graph (MFG) as a :class:`DGLBlock` object. Parameters ---------- data_dict : graph data The dictionary data for constructing a MFG. The keys are in the form of string triplets (src_type, edge_type, dst_type), specifying the source node type, edge type, and destination node type. The values are graph data in the form of :math:`(U, V)`, where :math:`(U[i], V[i])` forms the edge with ID :math:`i`. The allowed graph data formats are: - ``(Tensor, Tensor)``: Each tensor must be a 1D tensor containing node IDs. DGL calls this format "tuple of node-tensors". The tensors should have the same data type, which must be either int32 or int64. They should also have the same device context (see below the descriptions of :attr:`idtype` and :attr:`device`). - ``('coo', (Tensor, Tensor))``: Same as ``(Tensor, Tensor)``. - ``('csr', (Tensor, Tensor, Tensor))``: The three tensors form the CSR representation of the graph's adjacency matrix. The first one is the row index pointer. The second one is the column indices. The third one is the edge IDs, which can be empty to represent consecutive integer IDs starting from 0. - ``('csc', (Tensor, Tensor, Tensor))``: The three tensors form the CSC representation of the graph's adjacency matrix. The first one is the column index pointer. The second one is the row indices. The third one is the edge IDs, which can be empty to represent consecutive integer IDs starting from 0. The tensors can be replaced with any iterable of integers (e.g. list, tuple, numpy.ndarray). If you would like to create a MFG with a single source node type, a single destination node type, and a single edge type, then you can pass in the graph data directly without wrapping it as a dictionary. num_src_nodes : dict[str, int] or int, optional The number of nodes for each source node type, which is a dictionary mapping a node type :math:`T` to the number of :math:`T`-typed source nodes. If not given for a node type :math:`T`, DGL finds the largest ID appearing in *every* graph data whose source node type is :math:`T`, and sets the number of nodes to be that ID plus one. If given and the value is no greater than the largest ID for some source node type, DGL will raise an error. By default, DGL infers the number of nodes for all source node types. If you would like to create a MFG with a single source node type, a single destination node type, and a single edge type, then you can pass in an integer to directly represent the number of source nodes. num_dst_nodes : dict[str, int] or int, optional The number of nodes for each destination node type, which is a dictionary mapping a node type :math:`T` to the number of :math:`T`-typed destination nodes. If not given for a node type :math:`T`, DGL finds the largest ID appearing in *every* graph data whose destination node type is :math:`T`, and sets the number of nodes to be that ID plus one. If given and the value is no greater than the largest ID for some destination node type, DGL will raise an error. By default, DGL infers the number of nodes for all destination node types. If you would like to create a MFG with a single destination node type, a single destination node type, and a single edge type, then you can pass in an integer to directly represent the number of destination nodes. idtype : int32 or int64, optional The data type for storing the structure-related graph information such as node and edge IDs. It should be a framework-specific data type object (e.g., ``torch.int32``). If ``None`` (default), DGL infers the ID type from the :attr:`data_dict` argument. device : device context, optional The device of the returned graph, which should be a framework-specific device object (e.g., ``torch.device``). If ``None`` (default), DGL uses the device of the tensors of the :attr:`data` argument. If :attr:`data` is not a tuple of node-tensors, the returned graph is on CPU. If the specified :attr:`device` differs from that of the provided tensors, it casts the given tensors to the specified device first. node_count_check : bool, optional When num_src_nodes and num_dst_nodes are passed, whether we should perform sanity checks to ensure they are valid. Returns ------- DGLBlock The created MFG. Notes ----- 1. If the :attr:`idtype` argument is not given then: - in the case of the tuple of node-tensor format, DGL uses the data type of the given ID tensors. - in the case of the tuple of sequence format, DGL uses int64. Once the graph has been created, you can change the data type by using :func:`dgl.DGLGraph.long` or :func:`dgl.DGLGraph.int`. If the specified :attr:`idtype` argument differs from the data type of the provided tensors, it casts the given tensors to the specified data type first. 2. The most efficient construction approach is to provide a tuple of node tensors without specifying :attr:`idtype` and :attr:`device`. This is because the returned graph shares the storage with the input node-tensors in this case. 3. DGL internally maintains multiple copies of the graph structure in different sparse formats and chooses the most efficient one depending on the computation invoked. If memory usage becomes an issue in the case of large graphs, use :func:`dgl.DGLGraph.formats` to restrict the allowed formats. 4. DGL internally decides a deterministic order for the same set of node types and canonical edge types, which does not necessarily follow the order in :attr:`data_dict`. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> block = dgl.create_block(([0, 1, 2], [1, 2, 3]), num_src_nodes=3, num_dst_nodes=4) >>> block Block(num_src_nodes=3, num_dst_nodes=4, num_edges=3) >>> block = dgl.create_block({ ... ('A', 'AB', 'B'): ([1, 2, 3], [2, 1, 0]), ... ('B', 'BA', 'A'): ([2, 1], [2, 3])}, ... num_src_nodes={'A': 6, 'B': 5}, ... num_dst_nodes={'A': 4, 'B': 3}) >>> block Block(num_src_nodes={'A': 6, 'B': 5}, num_dst_nodes={'A': 4, 'B': 3}, num_edges={('A', 'AB', 'B'): 3, ('B', 'BA', 'A'): 2}, metagraph=[('A', 'B', 'AB'), ('B', 'A', 'BA')]) See also -------- to_block """ need_infer = num_src_nodes is None and num_dst_nodes is None if not isinstance(data_dict, Mapping): data_dict = {("_N", "_E", "_N"): data_dict} if not need_infer: assert isinstance( num_src_nodes, int ), "num_src_nodes must be a pair of integers if data_dict is not a dict" assert isinstance( num_dst_nodes, int ), "num_dst_nodes must be a pair of integers if data_dict is not a dict" num_src_nodes = {"_N": num_src_nodes} num_dst_nodes = {"_N": num_dst_nodes} else: if not need_infer: assert isinstance( num_src_nodes, Mapping ), "num_src_nodes must be a dict if data_dict is a dict" assert isinstance( num_dst_nodes, Mapping ), "num_dst_nodes must be a dict if data_dict is a dict" if need_infer: num_src_nodes = defaultdict(int) num_dst_nodes = defaultdict(int) # Convert all data to node tensors first node_tensor_dict = {} for (sty, ety, dty), data in data_dict.items(): (sparse_fmt, arrays), urange, vrange = utils.graphdata2tensors( data, idtype, bipartite=True, infer_node_count=need_infer or node_count_check, ) node_tensor_dict[(sty, ety, dty)] = (sparse_fmt, arrays) if need_infer: num_src_nodes[sty] = max(num_src_nodes[sty], urange) num_dst_nodes[dty] = max(num_dst_nodes[dty], vrange) elif node_count_check: # sanity check if num_src_nodes[sty] < urange: raise DGLError( "The given number of nodes of source node type {} must be larger" " than the max ID in the data, but got {} and {}.".format( sty, num_src_nodes[sty], urange - 1 ) ) if num_dst_nodes[dty] < vrange: raise DGLError( "The given number of nodes of destination node type {} must be" " larger than the max ID in the data, but got {} and {}.".format( dty, num_dst_nodes[dty], vrange - 1 ) ) # Create the graph # Sort the ntypes and relation tuples to have a deterministic order for the same set # of type names. srctypes = list(sorted(num_src_nodes.keys())) dsttypes = list(sorted(num_dst_nodes.keys())) relations = list(sorted(node_tensor_dict.keys())) num_nodes_per_type = utils.toindex( [num_src_nodes[ntype] for ntype in srctypes] + [num_dst_nodes[ntype] for ntype in dsttypes], "int64", ) srctype_dict = {ntype: i for i, ntype in enumerate(srctypes)} dsttype_dict = { ntype: i + len(srctypes) for i, ntype in enumerate(dsttypes) } meta_edges_src = [] meta_edges_dst = [] etypes = [] rel_graphs = [] for srctype, etype, dsttype in relations: meta_edges_src.append(srctype_dict[srctype]) meta_edges_dst.append(dsttype_dict[dsttype]) etypes.append(etype) sparse_fmt, arrays = node_tensor_dict[(srctype, etype, dsttype)] g = create_from_edges( sparse_fmt, arrays, "SRC/" + srctype, etype, "DST/" + dsttype, num_src_nodes[srctype], num_dst_nodes[dsttype], ) rel_graphs.append(g) # metagraph is DGLGraph, currently still using int64 as index dtype metagraph = graph_index.from_coo( len(srctypes) + len(dsttypes), meta_edges_src, meta_edges_dst, True ) # create graph index hgidx = heterograph_index.create_heterograph_from_relations( metagraph, [rgrh._graph for rgrh in rel_graphs], num_nodes_per_type ) retg = DGLBlock(hgidx, (srctypes, dsttypes), etypes) return retg.to(device) def block_to_graph(block): """Convert a message flow graph (MFG) as a :class:`DGLBlock` object to a :class:`DGLGraph`. DGL will rename all the source node types by suffixing with ``_src``, and all the destination node types by suffixing with ``_dst``. Features on the returned graph will be preserved. Parameters ---------- block : DGLBlock The MFG. Returns ------- DGLGraph The graph. Examples -------- >>> block = dgl.create_block({ ... ('A', 'AB', 'B'): ([1, 2, 3], [2, 1, 0]), ... ('B', 'BA', 'A'): ([2, 1], [2, 3])}) >>> g = dgl.block_to_graph(block) >>> g Graph(num_nodes={'A_src': 4, 'B_src': 3, 'A_dst': 4, 'B_dst': 3}, num_edges={('A_src', 'AB', 'B_dst'): 3, ('B_src', 'BA', 'A_dst'): 2}, metagraph=[('A_src', 'B_dst', 'AB'), ('B_src', 'A_dst', 'BA')]) """ new_types = [ntype + "_src" for ntype in block.srctypes] + [ ntype + "_dst" for ntype in block.dsttypes ] retg = DGLGraph(block._graph, new_types, block.etypes) for srctype in block.srctypes: retg.nodes[srctype + "_src"].data.update(block.srcnodes[srctype].data) for dsttype in block.dsttypes: retg.nodes[dsttype + "_dst"].data.update(block.dstnodes[dsttype].data) for srctype, etype, dsttype in block.canonical_etypes: retg.edges[srctype + "_src", etype, dsttype + "_dst"].data.update( block.edges[srctype, etype, dsttype].data ) return retg def to_heterogeneous( G, ntypes, etypes, ntype_field=NTYPE, etype_field=ETYPE, metagraph=None ): """Convert a homogeneous graph to a heterogeneous graph and return. The input graph should have only one type of nodes and edges. Each node and edge stores an integer feature as its type ID (specified by :attr:`ntype_field` and :attr:`etype_field`). DGL uses it to retrieve the type names stored in the given :attr:`ntypes` and :attr:`etypes` arguments. The function will automatically distinguish edge types that have the same given type IDs but different src and dst type IDs. For example, it allows both edges A and B to have the same type ID 0, but one has (0, 1) and the other as (2, 3) as the (src, dst) type IDs. In this case, the function will "split" edge type 0 into two types: (0, ty_A, 1) and (2, ty_B, 3). In another word, these two edges share the same edge type name, but can be distinguished by an edge type triplet. The function stores the node and edge IDs in the input graph using the ``dgl.NID`` and ``dgl.EID`` names in the ``ndata`` and ``edata`` of the resulting graph. It also copies any node/edge features from :attr:`G` to the returned heterogeneous graph, except for reserved fields for storing type IDs (``dgl.NTYPE`` and ``dgl.ETYPE``) and node/edge IDs (``dgl.NID`` and ``dgl.EID``). Parameters ---------- G : DGLGraph The homogeneous graph. ntypes : list[str] The node type names. etypes : list[str] The edge type names. ntype_field : str, optional The feature field used to store node type. (Default: ``dgl.NTYPE``) etype_field : str, optional The feature field used to store edge type. (Default: ``dgl.ETYPE``) metagraph : networkx MultiDiGraph, optional Metagraph of the returned heterograph. If provided, DGL assumes that G can indeed be described with the given metagraph. If None, DGL will infer the metagraph from the given inputs, which could be costly for large graphs. Returns ------- DGLGraph A heterogeneous graph. Notes ----- * The returned node and edge types may not necessarily be in the same order as ``ntypes`` and ``etypes``. * Calling :func:`~dgl.to_homogeneous` then calling :func:`~dgl.to_heterogeneous` again yields the same result. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch >>> hg = dgl.heterograph({ ... ('user', 'develops', 'activity'): (torch.tensor([0, 1]), torch.tensor([1, 2])), ... ('developer', 'develops', 'game'): (torch.tensor([0, 1]), torch.tensor([0, 1])) ... }) >>> print(hg) Graph(num_nodes={'activity': 3, 'developer': 2, 'game': 2, 'user': 2}, num_edges={('developer', 'develops', 'game'): 2, ('user', 'develops', 'activity'): 2}, metagraph=[('developer', 'game', 'develops'), ('user', 'activity', 'develops')]) We first convert the heterogeneous graph to a homogeneous graph. >>> g = dgl.to_homogeneous(hg) >>> print(g) Graph(num_nodes=9, num_edges=4, ndata_schemes={'_TYPE': Scheme(shape=(), dtype=torch.int64), '_ID': Scheme(shape=(), dtype=torch.int64)} edata_schemes={'_TYPE': Scheme(shape=(), dtype=torch.int64), '_ID': Scheme(shape=(), dtype=torch.int64)}) >>> g.ndata {'_TYPE': tensor([0, 0, 0, 1, 1, 2, 2, 3, 3]), '_ID': tensor([0, 1, 2, 0, 1, 0, 1, 0, 1])} Nodes 0, 1, 2 for 'activity', 3, 4 for 'developer', 5, 6 for 'game', 7, 8 for 'user' >>> g.edata {'_TYPE': tensor([0, 0, 1, 1]), '_ID': tensor([0, 1, 0, 1])} Edges 0, 1 for ('developer', 'develops', 'game'), 2, 3 for ('user', 'develops', 'activity') Now convert the homogeneous graph back to a heterogeneous graph. >>> hg_2 = dgl.to_heterogeneous(g, hg.ntypes, hg.etypes) >>> print(hg_2) Graph(num_nodes={'activity': 3, 'developer': 2, 'game': 2, 'user': 2}, num_edges={('developer', 'develops', 'game'): 2, ('user', 'develops', 'activity'): 2}, metagraph=[('developer', 'game', 'develops'), ('user', 'activity', 'develops')]) Retrieve the original node/edge IDs. >>> hg_2.ndata[dgl.NID] {'activity': tensor([0, 1, 2]), 'developer': tensor([3, 4]), 'game': tensor([5, 6]), 'user': tensor([7, 8])} >>> hg_2.edata[dgl.EID] {('developer', 'develops', 'game'): tensor([0, 1]), ('user', 'develops', 'activity'): tensor([2, 3])} See Also -------- to_homogeneous """ if ( hasattr(G, "ntypes") and len(G.ntypes) > 1 or hasattr(G, "etypes") and len(G.etypes) > 1 ): raise DGLError( "The input graph should be homogeneous and have only one " " type of nodes and edges." ) num_ntypes = len(ntypes) idtype = G.idtype device = G.device ntype_ids = F.asnumpy(G.ndata[ntype_field]) etype_ids = F.asnumpy(G.edata[etype_field]) # relabel nodes to per-type local IDs ntype_count = np.bincount(ntype_ids, minlength=num_ntypes) ntype_offset = np.insert(np.cumsum(ntype_count), 0, 0) ntype_ids_sortidx = np.argsort(ntype_ids, kind="stable") ntype_local_ids = np.zeros_like(ntype_ids) node_groups = [] for i in range(num_ntypes): node_group = ntype_ids_sortidx[ntype_offset[i] : ntype_offset[i + 1]] node_groups.append(node_group) ntype_local_ids[node_group] = np.arange(ntype_count[i]) src, dst = G.all_edges(order="eid") src = F.asnumpy(src) dst = F.asnumpy(dst) src_local = ntype_local_ids[src] dst_local = ntype_local_ids[dst] # a 2D tensor of shape (E, 3). Each row represents the (stid, etid, dtid) tuple. edge_ctids = np.stack([ntype_ids[src], etype_ids, ntype_ids[dst]], 1) # infer metagraph and canonical edge types # No matter which branch it takes, the code will generate a 2D tensor of shape (E_m, 3), # E_m is the set of all possible canonical edge tuples. Each row represents the # (stid, dtid, dtid) tuple. We then compute a 2D tensor of shape (E, E_m) using the # above ``edge_ctids`` matrix. Each element i,j indicates whether the edge i is of the # canonical edge type j. We can then group the edges of the same type together. if metagraph is None: canonical_etids, _, etype_remapped = utils.make_invmap( list(tuple(_) for _ in edge_ctids), False ) etype_mask = ( etype_remapped[None, :] == np.arange(len(canonical_etids))[:, None] ) else: ntypes_invmap = {nt: i for i, nt in enumerate(ntypes)} etypes_invmap = {et: i for i, et in enumerate(etypes)} canonical_etids = [] for i, (srctype, dsttype, etype) in enumerate( metagraph.edges(keys=True) ): srctype_id = ntypes_invmap[srctype] etype_id = etypes_invmap[etype] dsttype_id = ntypes_invmap[dsttype] canonical_etids.append((srctype_id, etype_id, dsttype_id)) canonical_etids = np.asarray(canonical_etids) etype_mask = (edge_ctids[None, :] == canonical_etids[:, None]).all(2) edge_groups = [ etype_mask[i].nonzero()[0] for i in range(len(canonical_etids)) ] data_dict = dict() canonical_etypes = [] for i, (stid, etid, dtid) in enumerate(canonical_etids): src_of_etype = src_local[edge_groups[i]] dst_of_etype = dst_local[edge_groups[i]] canonical_etypes.append((ntypes[stid], etypes[etid], ntypes[dtid])) data_dict[canonical_etypes[-1]] = (src_of_etype, dst_of_etype) hg = heterograph( data_dict, dict(zip(ntypes, ntype_count)), idtype=idtype, device=device ) ntype2ngrp = {ntype: node_groups[ntid] for ntid, ntype in enumerate(ntypes)} # features for key, data in G.ndata.items(): if key in [ntype_field, NID]: continue for ntid, ntype in enumerate(hg.ntypes): rows = F.copy_to(F.tensor(ntype2ngrp[ntype]), F.context(data)) hg._node_frames[ntid][key] = F.gather_row(data, rows) for key, data in G.edata.items(): if key in [etype_field, EID]: continue for etid in range(len(hg.canonical_etypes)): rows = F.copy_to(F.tensor(edge_groups[etid]), F.context(data)) hg._edge_frames[hg.get_etype_id(canonical_etypes[etid])][ key ] = F.gather_row(data, rows) # Record the original IDs of the nodes/edges for ntid, ntype in enumerate(hg.ntypes): hg._node_frames[ntid][NID] = F.copy_to( F.tensor(ntype2ngrp[ntype]), device ) for etid in range(len(hg.canonical_etypes)): hg._edge_frames[hg.get_etype_id(canonical_etypes[etid])][ EID ] = F.copy_to(F.tensor(edge_groups[etid]), device) return hg def to_homogeneous( G, ndata=None, edata=None, store_type=True, return_count=False ): """Convert a heterogeneous graph to a homogeneous graph and return. By default, the function stores the node and edge types of the input graph as the ``dgl.NTYPE`` and ``dgl.ETYPE`` features in the returned graph. Each feature is an integer representing the type id, determined by the :meth:`DGLGraph.get_ntype_id` and :meth:`DGLGraph.get_etype_id` methods. One can omit it by specifying ``store_type=False``. The result graph assigns nodes and edges of the same type with IDs in continuous range (i.e., nodes of the first type have IDs 0 ~ ``G.num_nodes(G.ntypes[0])``; nodes of the second type come after; so on and so forth). Therefore, a more memory-efficient format for type information is an integer list; the i^th corresponds to the number of nodes/edges of the i^th type. One can choose this format by specifying ``return_count=True``. Parameters ---------- G : DGLGraph The heterogeneous graph. ndata : list[str], optional The node features to combine across all node types. For each feature ``feat`` in :attr:`ndata`, it concatenates ``G.nodes[T].data[feat]`` across all node types ``T``. As a result, the feature ``feat`` of all node types should have the same shape and data type. By default, the returned graph will not have any node features. edata : list[str], optional The edge features to combine across all edge types. For each feature ``feat`` in :attr:`edata`, it concatenates ``G.edges[T].data[feat]`` across all edge types ``T``. As a result, the feature ``feat`` of all edge types should have the same shape and data type. By default, the returned graph will not have any edge features. store_type : bool, optional If True, store type information as the ``dgl.NTYPE`` and ``dgl.ETYPE`` features in the returned graph. return_count : bool, optional If True, return type information as an integer list; the i^th element corresponds to the number of nodes/edges of the i^th type. Returns ------- DGLGraph A homogeneous graph. ntype_count : list[int], optional Number of nodes of each type. Return when ``return_count`` is True. etype_count : list[int], optional Number of edges of each type. Return when ``return_count`` is True. Notes ----- * Calculating type information may introduce noticeable cost. Setting both ``store_type`` and ``return_count`` to False can avoid such cost if type information is not needed. Otherwise, DGL recommends to use ``store_type=False`` and ``return_count=True`` due to its memory efficiency. * The ``ntype_count`` and ``etype_count`` lists can help speed up some operations. See :class:`~dgl.nn.pytorch.conv.RelGraphConv` for such an example. * Calling :func:`~dgl.to_homogeneous` then calling :func:`~dgl.to_heterogeneous` again yields the same result. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch >>> hg = dgl.heterograph({ ... ('user', 'follows', 'user'): ([0, 1], [1, 2]), ... ('developer', 'develops', 'game'): ([0, 1], [0, 1]) ... }) >>> hg.nodes['user'].data['h'] = torch.ones(3, 1) >>> hg.nodes['developer'].data['h'] = torch.zeros(2, 1) >>> hg.nodes['game'].data['h'] = torch.ones(2, 1) >>> g = dgl.to_homogeneous(hg) >>> # The first three nodes are for 'user', the next two are for 'developer', >>> # and the last two are for 'game' >>> g.ndata {'_TYPE': tensor([0, 0, 0, 1, 1, 2, 2]), '_ID': tensor([0, 1, 2, 0, 1, 0, 1])} >>> # The first two edges are for 'follows', and the next two are for 'develops' edges. >>> g.edata {'_TYPE': tensor([0, 0, 1, 1]), '_ID': tensor([0, 1, 0, 1])} Combine feature 'h' across all node types in the conversion. >>> g = dgl.to_homogeneous(hg, ndata=['h']) >>> g.ndata['h'] tensor([[1.], [1.], [1.], [0.], [0.], [1.], [1.]]) See Also -------- to_heterogeneous """ num_nodes_per_ntype = [G.num_nodes(ntype) for ntype in G.ntypes] offset_per_ntype = np.insert(np.cumsum(num_nodes_per_ntype), 0, 0) srcs = [] dsts = [] nids = [] eids = [] if store_type: ntype_ids = [] etype_ids = [] if return_count: ntype_count = [] etype_count = [] total_num_nodes = 0 for ntype_id, ntype in enumerate(G.ntypes): num_nodes = G.num_nodes(ntype) total_num_nodes += num_nodes if store_type: # Type ID is always in int64 ntype_ids.append(F.full_1d(num_nodes, ntype_id, F.int64, G.device)) if return_count: ntype_count.append(num_nodes) nids.append(F.arange(0, num_nodes, G.idtype, G.device)) for etype_id, etype in enumerate(G.canonical_etypes): srctype, _, dsttype = etype src, dst = G.all_edges(etype=etype, order="eid") num_edges = len(src) srcs.append(src + int(offset_per_ntype[G.get_ntype_id(srctype)])) dsts.append(dst + int(offset_per_ntype[G.get_ntype_id(dsttype)])) if store_type: # Type ID is always in int64 etype_ids.append(F.full_1d(num_edges, etype_id, F.int64, G.device)) if return_count: etype_count.append(num_edges) eids.append(F.arange(0, num_edges, G.idtype, G.device)) retg = graph( (F.cat(srcs, 0), F.cat(dsts, 0)), num_nodes=total_num_nodes, idtype=G.idtype, device=G.device, ) # copy features if ndata is None: ndata = [] if edata is None: edata = [] comb_nf = combine_frames( G._node_frames, range(len(G.ntypes)), col_names=ndata ) comb_ef = combine_frames( G._edge_frames, range(len(G.etypes)), col_names=edata ) if comb_nf is not None: retg.ndata.update(comb_nf) if comb_ef is not None: retg.edata.update(comb_ef) retg.ndata[NID] = F.cat(nids, 0) retg.edata[EID] = F.cat(eids, 0) if store_type: retg.ndata[NTYPE] = F.cat(ntype_ids, 0) retg.edata[ETYPE] = F.cat(etype_ids, 0) if return_count: return retg, ntype_count, etype_count else: return retg def from_scipy(sp_mat, eweight_name=None, idtype=None, device=None): """Create a graph from a SciPy sparse matrix and return. Parameters ---------- sp_mat : scipy.sparse.spmatrix The graph adjacency matrix. Each nonzero entry ``sp_mat[i, j]`` represents an edge from node ``i`` to ``j``. The matrix must have square shape ``(N, N)``, where ``N`` is the number of nodes in the graph. eweight_name : str, optional The edata name for storing the nonzero values of :attr:`sp_mat`. If given, DGL will store the nonzero values of :attr:`sp_mat` in ``edata[eweight_name]`` of the returned graph. idtype : int32 or int64, optional The data type for storing the structure-related graph information such as node and edge IDs. It should be a framework-specific data type object (e.g., ``torch.int32``). By default, DGL uses int64. device : device context, optional The device of the resulting graph. It should be a framework-specific device object (e.g., ``torch.device``). By default, DGL stores the graph on CPU. Returns ------- DGLGraph The created graph. Notes ----- 1. The function supports all kinds of SciPy sparse matrix classes (e.g., :class:`scipy.sparse.csr.csr_matrix`). It converts the input matrix to the COOrdinate format using :func:`scipy.sparse.spmatrix.tocoo` before creates a :class:`DGLGraph`. Creating from a :class:`scipy.sparse.coo.coo_matrix` is hence the most efficient way. 2. DGL internally maintains multiple copies of the graph structure in different sparse formats and chooses the most efficient one depending on the computation invoked. If memory usage becomes an issue in the case of large graphs, use :func:`dgl.DGLGraph.formats` to restrict the allowed formats. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import numpy as np >>> import torch >>> from scipy.sparse import coo_matrix Create a small three-edge graph. >>> # Source nodes for edges (2, 1), (3, 2), (4, 3) >>> src_ids = np.array([2, 3, 4]) >>> # Destination nodes for edges (2, 1), (3, 2), (4, 3) >>> dst_ids = np.array([1, 2, 3]) >>> # Weight for edges (2, 1), (3, 2), (4, 3) >>> eweight = np.array([0.2, 0.3, 0.5]) >>> sp_mat = coo_matrix((eweight, (src_ids, dst_ids)), shape=(5, 5)) >>> g = dgl.from_scipy(sp_mat) Retrieve the edge weights. >>> g = dgl.from_scipy(sp_mat, eweight_name='w') >>> g.edata['w'] tensor([0.2000, 0.3000, 0.5000], dtype=torch.float64) Create a graph on the first GPU with data type int32. >>> g = dgl.from_scipy(sp_mat, idtype=torch.int32, device='cuda:0') See Also -------- graph from_networkx """ # Sanity check num_rows = sp_mat.shape[0] num_cols = sp_mat.shape[1] if num_rows != num_cols: raise DGLError( "Expect the number of rows to be the same as the number of columns for " "sp_mat, got {:d} and {:d}.".format(num_rows, num_cols) ) (sparse_fmt, arrays), urange, vrange = utils.graphdata2tensors( sp_mat, idtype ) g = create_from_edges(sparse_fmt, arrays, "_N", "_E", "_N", urange, vrange) if eweight_name is not None: g.edata[eweight_name] = F.tensor(sp_mat.data) return g.to(device) def bipartite_from_scipy( sp_mat, utype, etype, vtype, eweight_name=None, idtype=None, device=None ): """Create a uni-directional bipartite graph from a SciPy sparse matrix and return. The created graph will have two types of nodes ``utype`` and ``vtype`` as well as one edge type ``etype`` whose edges are from ``utype`` to ``vtype``. Parameters ---------- sp_mat : scipy.sparse.spmatrix The graph adjacency matrix. Each nonzero entry ``sp_mat[i, j]`` represents an edge from node ``i`` of type :attr:`utype` to ``j`` of type :attr:`vtype`. Let the matrix shape be ``(N, M)``. There will be ``N`` nodes of type :attr:`utype` and ``M`` nodes of type ``vtype`` in the resulting graph. utype : str, optional The name of the source node type. etype : str, optional The name of the edge type. vtype : str, optional The name of the destination node type. eweight_name : str, optional The edata name for storing the nonzero values of :attr:`sp_mat`. If given, DGL will store the nonzero values of :attr:`sp_mat` in ``edata[eweight_name]`` of the returned graph. idtype : int32 or int64, optional The data type for storing the structure-related graph information such as node and edge IDs. It should be a framework-specific data type object (e.g., ``torch.int32``). By default, DGL uses int64. device : device context, optional The device of the resulting graph. It should be a framework-specific device object (e.g., ``torch.device``). By default, DGL stores the graph on CPU. Returns ------- DGLGraph The created graph. Notes ----- 1. The function supports all kinds of SciPy sparse matrix classes (e.g., :class:`scipy.sparse.csr.csr_matrix`). It converts the input matrix to the COOrdinate format using :func:`scipy.sparse.spmatrix.tocoo` before creates a :class:`DGLGraph`. Creating from a :class:`scipy.sparse.coo.coo_matrix` is hence the most efficient way. 2. DGL internally maintains multiple copies of the graph structure in different sparse formats and chooses the most efficient one depending on the computation invoked. If memory usage becomes an issue in the case of large graphs, use :func:`dgl.DGLGraph.formats` to restrict the allowed formats. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import numpy as np >>> import torch >>> from scipy.sparse import coo_matrix Create a small three-edge graph. >>> # Source nodes for edges (2, 1), (3, 2), (4, 3) >>> src_ids = np.array([2, 3, 4]) >>> # Destination nodes for edges (2, 1), (3, 2), (4, 3) >>> dst_ids = np.array([1, 2, 3]) >>> # Weight for edges (2, 1), (3, 2), (4, 3) >>> eweight = np.array([0.2, 0.3, 0.5]) >>> sp_mat = coo_matrix((eweight, (src_ids, dst_ids))) >>> g = dgl.bipartite_from_scipy(sp_mat, utype='_U', etype='_E', vtype='_V') Retrieve the edge weights. >>> g = dgl.bipartite_from_scipy(sp_mat, utype='_U', etype='_E', vtype='_V', eweight_name='w') >>> g.edata['w'] tensor([0.2000, 0.3000, 0.5000], dtype=torch.float64) Create a graph on the first GPU with data type int32. >>> g = dgl.bipartite_from_scipy(sp_mat, utype='_U', etype='_E', vtype='_V', ... idtype=torch.int32, device='cuda:0') See Also -------- heterograph bipartite_from_networkx """ (sparse_fmt, arrays), urange, vrange = utils.graphdata2tensors( sp_mat, idtype, bipartite=True ) g = create_from_edges( sparse_fmt, arrays, utype, etype, vtype, urange, vrange ) if eweight_name is not None: g.edata[eweight_name] = F.tensor(sp_mat.data) return g.to(device) def _batcher(lst): if F.is_tensor(lst[0]): return F.cat([F.unsqueeze(x, 0) for x in lst], dim=0) if isinstance(lst[0], np.ndarray): return F.tensor(np.array(lst)) return F.tensor(lst) def from_networkx( nx_graph, node_attrs=None, edge_attrs=None, edge_id_attr_name=None, idtype=None, device=None, ): """Create a graph from a NetworkX graph and return. .. note:: Creating a DGLGraph from a NetworkX graph is not fast especially for large scales. It is recommended to first convert a NetworkX graph into a tuple of node-tensors and then construct a DGLGraph with :func:`dgl.graph`. Parameters ---------- nx_graph : networkx.Graph The NetworkX graph holding the graph structure and the node/edge attributes. DGL will relabel the nodes using consecutive integers starting from zero if it is not the case. If the input graph is undirected, DGL converts it to a directed graph by :func:`networkx.Graph.to_directed`. node_attrs : list[str], optional The names of the node attributes to retrieve from the NetworkX graph. If given, DGL stores the retrieved node attributes in ``ndata`` of the returned graph using their original names. The attribute data must be convertible to Tensor type (e.g., scalar, numpy.ndarray, list, etc.). edge_attrs : list[str], optional The names of the edge attributes to retrieve from the NetworkX graph. If given, DGL stores the retrieved edge attributes in ``edata`` of the returned graph using their original names. The attribute data must be convertible to Tensor type (e.g., scalar, ``numpy.ndarray``, list, etc.). It must be None if :attr:`nx_graph` is undirected. edge_id_attr_name : str, optional The name of the edge attribute that stores the edge IDs. If given, DGL will assign edge IDs accordingly when creating the graph, so the attribute must be valid IDs, i.e. consecutive integers starting from zero. By default, the edge IDs of the returned graph can be arbitrary. It must be None if :attr:`nx_graph` is undirected. idtype : int32 or int64, optional The data type for storing the structure-related graph information such as node and edge IDs. It should be a framework-specific data type object (e.g., ``torch.int32``). By default, DGL uses int64. device : device context, optional The device of the resulting graph. It should be a framework-specific device object (e.g., ``torch.device``). By default, DGL stores the graph on CPU. Returns ------- DGLGraph The created graph. Notes ----- DGL internally maintains multiple copies of the graph structure in different sparse formats and chooses the most efficient one depending on the computation invoked. If memory usage becomes an issue in the case of large graphs, use :func:`dgl.DGLGraph.formats` to restrict the allowed formats. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import networkx as nx >>> import numpy as np >>> import torch Create a 2-edge NetworkX graph. >>> nx_g = nx.DiGraph() >>> # Add 3 nodes and two features for them >>> nx_g.add_nodes_from([0, 1, 2], feat1=np.zeros((3, 1)), feat2=np.ones((3, 1))) >>> # Add 2 edges (1, 2) and (2, 1) with two features, one being edge IDs >>> nx_g.add_edge(1, 2, weight=np.ones((1, 1)), eid=np.array([1])) >>> nx_g.add_edge(2, 1, weight=np.ones((1, 1)), eid=np.array([0])) Convert it into a DGLGraph with structure only. >>> g = dgl.from_networkx(nx_g) Retrieve the node/edge features of the graph. >>> g = dgl.from_networkx(nx_g, node_attrs=['feat1', 'feat2'], edge_attrs=['weight']) Use a pre-specified ordering of the edges. >>> g.edges() (tensor([1, 2]), tensor([2, 1])) >>> g = dgl.from_networkx(nx_g, edge_id_attr_name='eid') (tensor([2, 1]), tensor([1, 2])) Create a graph on the first GPU with data type int32. >>> g = dgl.from_networkx(nx_g, idtype=torch.int32, device='cuda:0') See Also -------- graph from_scipy """ # Sanity check if ( edge_id_attr_name is not None and edge_id_attr_name not in next(iter(nx_graph.edges(data=True)))[-1] ): raise DGLError( "Failed to find the pre-specified edge IDs in the edge features of " "the NetworkX graph with name {}".format(edge_id_attr_name) ) if not nx_graph.is_directed() and not ( edge_id_attr_name is None and edge_attrs is None ): raise DGLError( "Expect edge_id_attr_name and edge_attrs to be None when nx_graph is " "undirected, got {} and {}".format(edge_id_attr_name, edge_attrs) ) # Relabel nodes using consecutive integers starting from 0 nx_graph = nx.convert_node_labels_to_integers(nx_graph, ordering="sorted") if not nx_graph.is_directed(): nx_graph = nx_graph.to_directed() (sparse_fmt, arrays), urange, vrange = utils.graphdata2tensors( nx_graph, idtype, edge_id_attr_name=edge_id_attr_name ) g = create_from_edges(sparse_fmt, arrays, "_N", "_E", "_N", urange, vrange) # nx_graph.edges(data=True) returns src, dst, attr_dict has_edge_id = ( nx_graph.number_of_edges() > 0 and edge_id_attr_name is not None ) # handle features # copy attributes if node_attrs is not None: # mapping from feature name to a list of tensors to be concatenated attr_dict = defaultdict(list) for nid in range(g.num_nodes()): for attr in node_attrs: attr_dict[attr].append(nx_graph.nodes[nid][attr]) for attr in node_attrs: g.ndata[attr] = F.copy_to(_batcher(attr_dict[attr]), g.device) if edge_attrs is not None: # mapping from feature name to a list of tensors to be concatenated attr_dict = defaultdict(lambda: [None] * g.num_edges()) # each defaultdict value is initialized to be a list of None # None here serves as placeholder to be replaced by feature with # corresponding edge id if has_edge_id: num_edges = g.num_edges() for _, _, attrs in nx_graph.edges(data=True): if attrs[edge_id_attr_name] >= num_edges: raise DGLError( "Expect the pre-specified edge ids to be" " smaller than the number of edges --" " {}, got {}.".format(num_edges, attrs["id"]) ) for key in edge_attrs: attr_dict[key][attrs[edge_id_attr_name]] = attrs[key] else: # XXX: assuming networkx iteration order is deterministic # so the order is the same as graph_index.from_networkx for eid, (_, _, attrs) in enumerate(nx_graph.edges(data=True)): for key in edge_attrs: attr_dict[key][eid] = attrs[key] for attr in edge_attrs: for val in attr_dict[attr]: if val is None: raise DGLError( "Not all edges have attribute {}.".format(attr) ) g.edata[attr] = F.copy_to(_batcher(attr_dict[attr]), g.device) return g.to(device) def bipartite_from_networkx( nx_graph, utype, etype, vtype, u_attrs=None, e_attrs=None, v_attrs=None, edge_id_attr_name=None, idtype=None, device=None, ): """Create a unidirectional bipartite graph from a NetworkX graph and return. The created graph will have two types of nodes ``utype`` and ``vtype`` as well as one edge type ``etype`` whose edges are from ``utype`` to ``vtype``. .. note:: Creating a DGLGraph from a NetworkX graph is not fast especially for large scales. It is recommended to first convert a NetworkX graph into a tuple of node-tensors and then construct a DGLGraph with :func:`dgl.heterograph`. Parameters ---------- nx_graph : networkx.DiGraph The NetworkX graph holding the graph structure and the node/edge attributes. DGL will relabel the nodes using consecutive integers starting from zero if it is not the case. The graph must follow `NetworkX's bipartite graph convention `_, and furthermore the edges must be from nodes with attribute ``bipartite=0`` to nodes with attribute ``bipartite=1``. utype : str, optional The name of the source node type. etype : str, optional The name of the edge type. vtype : str, optional The name of the destination node type. u_attrs : list[str], optional The names of the node attributes for node type :attr:`utype` to retrieve from the NetworkX graph. If given, DGL stores the retrieved node attributes in ``nodes[utype].data`` of the returned graph using their original names. The attribute data must be convertible to Tensor type (e.g., scalar, ``numpy.ndarray``, list, etc.). e_attrs : list[str], optional The names of the edge attributes to retrieve from the NetworkX graph. If given, DGL stores the retrieved edge attributes in ``edata`` of the returned graph using their original names. The attribute data must be convertible to Tensor type (e.g., scalar, numpy.ndarray, list, etc.). v_attrs : list[str], optional The names of the node attributes for node type :attr:`vtype` to retrieve from the NetworkX graph. If given, DGL stores the retrieved node attributes in ``nodes[vtype].data`` of the returned graph using their original names. The attribute data must be convertible to Tensor type (e.g., scalar, numpy.array, list, etc.). edge_id_attr_name : str, optional The name of the edge attribute that stores the edge IDs. If given, DGL will assign edge IDs accordingly when creating the graph, so the attribute must be valid IDs, i.e. consecutive integers starting from zero. By default, the edge IDs of the returned graph can be arbitrary. idtype : int32 or int64, optional The data type for storing the structure-related graph information such as node and edge IDs. It should be a framework-specific data type object (e.g., torch.int32). By default, DGL uses int64. device : device context, optional The device of the resulting graph. It should be a framework-specific device object (e.g., torch.device). By default, DGL stores the graph on CPU. Returns ------- DGLGraph The created graph. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import networkx as nx >>> import numpy as np >>> import torch Create a 2-edge unidirectional bipartite graph. >>> nx_g = nx.DiGraph() >>> # Add nodes for the source type >>> nx_g.add_nodes_from([1, 3], bipartite=0, feat1=np.zeros((2, 1)), feat2=np.ones((2, 1))) >>> # Add nodes for the destination type >>> nx_g.add_nodes_from([2, 4, 5], bipartite=1, feat3=np.zeros((3, 1))) >>> nx_g.add_edge(1, 4, weight=np.ones((1, 1)), eid=np.array([1])) >>> nx_g.add_edge(3, 5, weight=np.ones((1, 1)), eid=np.array([0])) Convert it into a DGLGraph with structure only. >>> g = dgl.bipartite_from_networkx(nx_g, utype='_U', etype='_E', vtype='_V') Retrieve the node/edge features of the graph. >>> g = dgl.bipartite_from_networkx(nx_g, utype='_U', etype='_E', vtype='_V', ... u_attrs=['feat1', 'feat2'], ... e_attrs=['weight'], ... v_attrs=['feat3']) Use a pre-specified ordering of the edges. >>> g.edges() (tensor([0, 1]), tensor([1, 2])) >>> g = dgl.bipartite_from_networkx(nx_g, ... utype='_U', etype='_E', vtype='_V', ... edge_id_attr_name='eid') (tensor([1, 0]), tensor([2, 1])) Create a graph on the first GPU with data type int32. >>> g = dgl.bipartite_from_networkx(nx_g, utype='_U', etype='_E', vtype='_V', ... idtype=torch.int32, device='cuda:0') See Also -------- heterograph bipartite_from_scipy """ if not nx_graph.is_directed(): raise DGLError("Expect nx_graph to be a directed NetworkX graph.") if ( edge_id_attr_name is not None and not edge_id_attr_name in next(iter(nx_graph.edges(data=True)))[-1] ): raise DGLError( "Failed to find the pre-specified edge IDs in the edge features " "of the NetworkX graph with name {}".format(edge_id_attr_name) ) # Get the source and destination node sets top_nodes = set() bottom_nodes = set() for n, ndata in nx_graph.nodes(data=True): if "bipartite" not in ndata: raise DGLError( "Expect the node {} to have attribute bipartite".format(n) ) if ndata["bipartite"] == 0: top_nodes.add(n) elif ndata["bipartite"] == 1: bottom_nodes.add(n) else: raise ValueError( "Expect the bipartite attribute of the node {} to be 0 or 1, " "got {}".format(n, ndata["bipartite"]) ) # Separately relabel the source and destination nodes. top_nodes = sorted(top_nodes) bottom_nodes = sorted(bottom_nodes) top_map = {n: i for i, n in enumerate(top_nodes)} bottom_map = {n: i for i, n in enumerate(bottom_nodes)} # Get the node tensors and the number of nodes (sparse_fmt, arrays), urange, vrange = utils.graphdata2tensors( nx_graph, idtype, bipartite=True, edge_id_attr_name=edge_id_attr_name, top_map=top_map, bottom_map=bottom_map, ) g = create_from_edges( sparse_fmt, arrays, utype, etype, vtype, urange, vrange ) # nx_graph.edges(data=True) returns src, dst, attr_dict has_edge_id = ( nx_graph.number_of_edges() > 0 and edge_id_attr_name is not None ) # handle features # copy attributes if u_attrs is not None: # mapping from feature name to a list of tensors to be concatenated src_attr_dict = defaultdict(list) for nid in top_map.keys(): for attr in u_attrs: src_attr_dict[attr].append(nx_graph.nodes[nid][attr]) for attr in u_attrs: g.srcdata[attr] = F.copy_to(_batcher(src_attr_dict[attr]), g.device) if v_attrs is not None: # mapping from feature name to a list of tensors to be concatenated dst_attr_dict = defaultdict(list) for nid in bottom_map.keys(): for attr in v_attrs: dst_attr_dict[attr].append(nx_graph.nodes[nid][attr]) for attr in v_attrs: g.dstdata[attr] = F.copy_to(_batcher(dst_attr_dict[attr]), g.device) if e_attrs is not None: # mapping from feature name to a list of tensors to be concatenated attr_dict = defaultdict(lambda: [None] * g.num_edges()) # each defaultdict value is initialized to be a list of None # None here serves as placeholder to be replaced by feature with # corresponding edge id if has_edge_id: for _, _, attrs in nx_graph.edges(data=True): for key in e_attrs: attr_dict[key][attrs[edge_id_attr_name]] = attrs[key] else: # XXX: assuming networkx iteration order is deterministic # so the order is the same as graph_index.from_networkx for eid, (_, _, attrs) in enumerate(nx_graph.edges(data=True)): for key in e_attrs: attr_dict[key][eid] = attrs[key] for attr in e_attrs: for val in attr_dict[attr]: if val is None: raise DGLError( "Not all edges have attribute {}.".format(attr) ) g.edata[attr] = F.copy_to(_batcher(attr_dict[attr]), g.device) return g.to(device) def _to_networkx_homogeneous(g, node_attrs, edge_attrs): # TODO: consider adding an eid_attr parameter as in # `_to_networkx_heterogeneous` when this function is properly tested # (see GitHub issue #5735) src, dst = g.edges() src = F.asnumpy(src) dst = F.asnumpy(dst) # xiangsx: Always treat graph as multigraph nx_graph = nx.MultiDiGraph() nx_graph.add_nodes_from(range(g.num_nodes())) for eid, (u, v) in enumerate(zip(src, dst)): nx_graph.add_edge(u, v, id=eid) if node_attrs is not None: for nid, attr in nx_graph.nodes(data=True): feat_dict = g._get_n_repr(0, nid) attr.update( {key: F.squeeze(feat_dict[key], 0) for key in node_attrs} ) if edge_attrs is not None: for _, _, attr in nx_graph.edges(data=True): eid = attr["id"] feat_dict = g._get_e_repr(0, eid) attr.update( {key: F.squeeze(feat_dict[key], 0) for key in edge_attrs} ) return nx_graph def _to_networkx_heterogeneous( g, node_attrs, edge_attrs, ntype_attr, etype_attr, eid_attr ): nx_graph = nx.MultiDiGraph() # This implementation does not use `ndata` and `edata` in the call to # `to_homogeneous` because the function expects node and edge attributes # both to be defined for every type and to have the same shape. # If the `to_homogeneous` function is updated to support non-uniform node # and edge attributes, the implementation can be simplified. hom_g = to_homogeneous(g, store_type=True, return_count=False) ntypes = g.ntypes etypes = g.canonical_etypes for hom_nid, ndata in enumerate(zip(hom_g.ndata[NID], hom_g.ndata[NTYPE])): orig_nid, ntype = ndata attrs = {ntype_attr: ntypes[ntype]} if node_attrs is not None: assert ntype_attr not in node_attrs, ( f"'{ntype_attr}' already used as node type attribute, " f"please provide a different value for ntype_attr" ) feat_dict = g._get_n_repr(ntype, orig_nid) attrs.update( { key: F.squeeze(feat_dict[key], 0) for key in node_attrs if key in feat_dict } ) nx_graph.add_node(hom_nid, **attrs) for hom_eid, edata in enumerate(zip(hom_g.edata[EID], hom_g.edata[ETYPE])): orig_eid, etype = edata attrs = {eid_attr: hom_eid, etype_attr: etypes[etype]} if edge_attrs is not None: assert etype_attr not in edge_attrs, ( f"'{etype_attr}' already used as edge type attribute, " f"please provide a different value for etype_attr" ) assert eid_attr not in edge_attrs, ( f"'{eid_attr}' already used as edge ID attribute, " f"please provide a different value for eid_attr" ) feat_dict = g._get_e_repr(etype, orig_eid) attrs.update( { key: F.squeeze(feat_dict[key], 0) for key in edge_attrs if key in feat_dict } ) src, dst = hom_g.find_edges(hom_eid) nx_graph.add_edge(int(src), int(dst), **attrs) return nx_graph def to_networkx( g, node_attrs=None, edge_attrs=None, ntype_attr="ntype", etype_attr="etype", eid_attr="id", ): """Convert a graph to a NetworkX graph and return. The resulting NetworkX graph also contains the node/edge features of the input graph. Additionally, DGL saves the edge IDs as the ``'id'`` edge attribute in the returned NetworkX graph. Parameters ---------- g : DGLGraph A homogeneous or heterogeneous graph. node_attrs : iterable of str, optional The node attributes to copy from ``g.ndata``. (Default: None) edge_attrs : iterable of str, optional The edge attributes to copy from ``g.edata``. (Default: None) ntype_attr : str, optional The name of the node attribute to store the node types in the NetworkX object. (Default: "ntype") etype_attr : str, optional The name of the edge attribute to store the edge canonical types in the NetworkX object. (Default: "etype") eid_attr : str, optional The name of the edge attribute to store the original edge ID in the NetworkX object. (Default: "id") Returns ------- networkx.DiGraph The converted NetworkX graph. Notes ----- The function only supports CPU graph input. Examples -------- The following examples use the PyTorch backend. >>> import dgl >>> import torch With a homogeneous graph: >>> g = dgl.graph((torch.tensor([1, 2]), torch.tensor([1, 3]))) >>> g.ndata['h'] = torch.zeros(4, 1) >>> g.edata['h1'] = torch.ones(2, 1) >>> g.edata['h2'] = torch.zeros(2, 2) >>> nx_g = dgl.to_networkx(g, node_attrs=['h'], edge_attrs=['h1', 'h2']) >>> nx_g.nodes(data=True) NodeDataView({ 0: {'h': tensor([0.])}, 1: {'h': tensor([0.])}, 2: {'h': tensor([0.])}, 3: {'h': tensor([0.])} }) >>> nx_g.edges(data=True) OutMultiEdgeDataView([ (1, 1, {'id': 0, 'h1': tensor([1.]), 'h2': tensor([0., 0.])}), (2, 3, {'id': 1, 'h1': tensor([1.]), 'h2': tensor([0., 0.])}) ]) With a heterogeneous graph: >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])), ... ('user', 'follows', 'topic'): (torch.tensor([1, 1]), torch.tensor([1, 2])), ... ('user', 'plays', 'game'): (torch.tensor([0, 3]), torch.tensor([3, 4])) ... }) >>> g.ndata['n'] = { ... 'game': torch.zeros(5, 1), ... 'user': torch.ones(4, 1) ... } >>> g.edata['e'] = { ... ('user', 'follows', 'user'): torch.zeros(2, 1), ... 'plays': torch.ones(2, 1) ... } >>> nx_g = dgl.to_networkx(g, node_attrs=['n'], edge_attrs=['e']) >>> nx_g.nodes(data=True) NodeDataView({ 0: {'ntype': 'game', 'n': tensor([0.])}, 1: {'ntype': 'game', 'n': tensor([0.])}, 2: {'ntype': 'game', 'n': tensor([0.])}, 3: {'ntype': 'game', 'n': tensor([0.])}, 4: {'ntype': 'game', 'n': tensor([0.])}, 5: {'ntype': 'topic'}, 6: {'ntype': 'topic'}, 7: {'ntype': 'topic'}, 8: {'ntype': 'user', 'n': tensor([1.])}, 9: {'ntype': 'user', 'n': tensor([1.])}, 10: {'ntype': 'user', 'n': tensor([1.])}, 11: {'ntype': 'user', 'n': tensor([1.])} }) >>> nx_g.edges(data=True) OutMultiEdgeDataView([ (8, 9, {'id': 2, 'etype': ('user', 'follows', 'user'), 'e': tensor([0.])}), (8, 3, {'id': 4, 'etype': ('user', 'plays', 'game'), 'e': tensor([1.])}), (9, 6, {'id': 0, 'etype': ('user', 'follows', 'topic')}), (9, 7, {'id': 1, 'etype': ('user', 'follows', 'topic')}), (9, 10, {'id': 3, 'etype': ('user', 'follows', 'user'), 'e': tensor([0.])}), (11, 4, {'id': 5, 'etype': ('user', 'plays', 'game'), 'e': tensor([1.])}) ]) """ if g.device != F.cpu(): raise DGLError( "Cannot convert a CUDA graph to networkx. Call g.cpu() first." ) if g.is_homogeneous: return _to_networkx_homogeneous(g, node_attrs, edge_attrs) else: return _to_networkx_heterogeneous( g, node_attrs, edge_attrs, ntype_attr, etype_attr, eid_attr ) DGLGraph.to_networkx = to_networkx def to_cugraph(g): """Convert a DGL graph to a :class:`cugraph.Graph` and return. Parameters ---------- g : DGLGraph A homogeneous graph. Returns ------- cugraph.Graph The converted cugraph graph. Notes ----- The function only supports GPU graph input. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import cugraph >>> import torch >>> g = dgl.graph((torch.tensor([1, 2]), torch.tensor([1, 3]))).to('cuda') >>> cugraph_g = g.to_cugraph() >>> cugraph_g.edges() src dst 0 2 3 1 1 1 """ if g.device.type != "cuda": raise DGLError( f"Cannot convert a {g.device.type} graph to cugraph." + "Call g.to('cuda') first." ) if not g.is_homogeneous: raise DGLError("dgl.to_cugraph only supports homogeneous graphs.") try: import cudf import cugraph except ModuleNotFoundError: raise ModuleNotFoundError( "to_cugraph requires cugraph which could not be imported" ) edgelist = g.edges() src_ser = cudf.from_dlpack(F.zerocopy_to_dlpack(edgelist[0])) dst_ser = cudf.from_dlpack(F.zerocopy_to_dlpack(edgelist[1])) cudf_data = cudf.DataFrame({"source": src_ser, "destination": dst_ser}) g_cugraph = cugraph.Graph(directed=True) g_cugraph.from_cudf_edgelist( cudf_data, source="source", destination="destination" ) return g_cugraph DGLGraph.to_cugraph = to_cugraph def from_cugraph(cugraph_graph): """Create a graph from a :class:`cugraph.Graph` object. Parameters ---------- cugraph_graph : cugraph.Graph The cugraph graph object holding the graph structure. Node and edge attributes are dropped. If the input graph is undirected, DGL converts it to a directed graph by :func:`cugraph.Graph.to_directed`. Returns ------- DGLGraph The created graph. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import cugraph >>> import cudf Create a cugraph graph. >>> cugraph_g = cugraph.Graph(directed=True) >>> df = cudf.DataFrame({"source":[0, 1, 2, 3], "destination":[1, 2, 3, 0]}) >>> cugraph_g.from_cudf_edgelist(df) Convert it into a DGLGraph >>> g = dgl.from_cugraph(cugraph_g) >>> g.edges() (tensor([1, 2, 3, 0], device='cuda:0'), tensor([2, 3, 0, 1], device='cuda:0')) """ if not cugraph_graph.is_directed(): cugraph_graph = cugraph_graph.to_directed() edges = cugraph_graph.edges() src_t = F.zerocopy_from_dlpack(edges["src"].to_dlpack()) dst_t = F.zerocopy_from_dlpack(edges["dst"].to_dlpack()) g = graph((src_t, dst_t)) return g ############################################################ # Internal APIs ############################################################ def create_from_edges( sparse_fmt, arrays, utype, etype, vtype, urange, vrange, row_sorted=False, col_sorted=False, ): """Internal function to create a graph from incident nodes with types. utype could be equal to vtype Parameters ---------- sparse_fmt : str The sparse adjacency matrix format. arrays : tuple[Tensor] The sparse adjacency matrix arrays. utype : str Source node type name. etype : str Edge type name. vtype : str Destination node type name. urange : int, optional The source node ID range. If None, the value is the maximum of the source node IDs in the edge list plus 1. (Default: None) vrange : int, optional The destination node ID range. If None, the value is the maximum of the destination node IDs in the edge list plus 1. (Default: None) row_sorted : bool, optional Whether or not the rows of the COO are in ascending order. col_sorted : bool, optional Whether or not the columns of the COO are in ascending order within each row. This only has an effect when ``row_sorted`` is True. Returns ------- DGLGraph """ if utype == vtype: num_ntypes = 1 else: num_ntypes = 2 if sparse_fmt == "coo": u, v = arrays hgidx = heterograph_index.create_unitgraph_from_coo( num_ntypes, urange, vrange, u, v, ["coo", "csr", "csc"], row_sorted, col_sorted, ) else: # 'csr' or 'csc' indptr, indices, eids = arrays hgidx = heterograph_index.create_unitgraph_from_csr( num_ntypes, urange, vrange, indptr, indices, eids, ["coo", "csr", "csc"], sparse_fmt == "csc", ) if utype == vtype: return DGLGraph(hgidx, [utype], [etype]) else: return DGLGraph(hgidx, [utype, vtype], [etype]) ================================================ FILE: python/dgl/core.py ================================================ """Implementation for core graph computation.""" # pylint: disable=not-callable import numpy as np from . import backend as F, function as fn, ops from .base import ALL, dgl_warning, DGLError, EID, is_all, NID from .frame import Frame from .udf import EdgeBatch, NodeBatch def is_builtin(func): """Return true if the function is a DGL builtin function.""" return isinstance(func, fn.BuiltinFunction) def invoke_node_udf(graph, nid, ntype, func, *, ndata=None, orig_nid=None): """Invoke user-defined node function on the given nodes. Parameters ---------- graph : DGLGraph The input graph. nid : Tensor The IDs of the nodes to invoke UDF on. ntype : str Node type. func : callable The user-defined function. ndata : dict[str, Tensor], optional If provided, apply the UDF on this ndata instead of the ndata of the graph. orig_nid : Tensor, optional Original node IDs. Useful if the input graph is an extracted subgraph. Returns ------- dict[str, Tensor] Results from running the UDF. """ ntid = graph.get_ntype_id(ntype) if ndata is None: if is_all(nid): ndata = graph._node_frames[ntid] nid = graph.nodes(ntype=ntype) else: ndata = graph._node_frames[ntid].subframe(nid) nbatch = NodeBatch( graph, nid if orig_nid is None else orig_nid, ntype, ndata ) return func(nbatch) def invoke_edge_udf(graph, eid, etype, func, *, orig_eid=None): """Invoke user-defined edge function on the given edges. Parameters ---------- graph : DGLGraph The input graph. eid : Tensor The IDs of the edges to invoke UDF on. etype : (str, str, str) Edge type. func : callable The user-defined function. orig_eid : Tensor, optional Original edge IDs. Useful if the input graph is an extracted subgraph. Returns ------- dict[str, Tensor] Results from running the UDF. """ etid = graph.get_etype_id(etype) stid, dtid = graph._graph.metagraph.find_edge(etid) if is_all(eid): u, v, eid = graph.edges(form="all") edata = graph._edge_frames[etid] else: u, v = graph.find_edges(eid) edata = graph._edge_frames[etid].subframe(eid) if len(u) == 0: dgl_warning( "The input graph for the user-defined edge function " "does not contain valid edges" ) srcdata = graph._node_frames[stid].subframe(u) dstdata = graph._node_frames[dtid].subframe(v) ebatch = EdgeBatch( graph, eid if orig_eid is None else orig_eid, etype, srcdata, edata, dstdata, ) return func(ebatch) def invoke_udf_reduce(graph, func, msgdata, *, orig_nid=None): """Invoke user-defined reduce function on all the nodes in the graph. It analyzes the graph, groups nodes by their degrees and applies the UDF on each group -- a strategy called *degree-bucketing*. Parameters ---------- graph : DGLGraph The input graph. func : callable The user-defined function. msgdata : dict[str, Tensor] Message data. orig_nid : Tensor, optional Original node IDs. Useful if the input graph is an extracted subgraph. Returns ------- dict[str, Tensor] Results from running the UDF. """ degs = graph.in_degrees() nodes = graph.dstnodes() if orig_nid is None: orig_nid = nodes ntype = graph.dsttypes[0] ntid = graph.get_ntype_id_from_dst(ntype) dstdata = graph._node_frames[ntid] msgdata = Frame(msgdata) # degree bucketing unique_degs, bucketor = _bucketing(degs) bkt_rsts = [] bkt_nodes = [] for deg, node_bkt, orig_nid_bkt in zip( unique_degs, bucketor(nodes), bucketor(orig_nid) ): if deg == 0: # skip reduce function for zero-degree nodes continue bkt_nodes.append(node_bkt) ndata_bkt = dstdata.subframe(node_bkt) # order the incoming edges per node by edge ID eid_bkt = F.zerocopy_to_numpy(graph.in_edges(node_bkt, form="eid")) assert len(eid_bkt) == deg * len(node_bkt) eid_bkt = np.sort(eid_bkt.reshape((len(node_bkt), deg)), 1) eid_bkt = F.zerocopy_from_numpy(eid_bkt.flatten()) msgdata_bkt = msgdata.subframe(eid_bkt) # reshape all msg tensors to (num_nodes_bkt, degree, feat_size) maildata = {} for k, msg in msgdata_bkt.items(): newshape = (len(node_bkt), deg) + F.shape(msg)[1:] maildata[k] = F.reshape(msg, newshape) # invoke udf nbatch = NodeBatch(graph, orig_nid_bkt, ntype, ndata_bkt, msgs=maildata) bkt_rsts.append(func(nbatch)) # prepare a result frame retf = Frame(num_rows=len(nodes)) retf._initializers = dstdata._initializers retf._default_initializer = dstdata._default_initializer # merge bucket results and write to the result frame if ( len(bkt_rsts) != 0 ): # if all the nodes have zero degree, no need to merge results. merged_rst = {} for k in bkt_rsts[0].keys(): merged_rst[k] = F.cat([rst[k] for rst in bkt_rsts], dim=0) merged_nodes = F.cat(bkt_nodes, dim=0) retf.update_row(merged_nodes, merged_rst) return retf def _bucketing(val): """Internal function to create groups on the values. Parameters ---------- val : Tensor Value tensor. Returns ------- unique_val : Tensor Unique values. bucketor : callable[Tensor -> list[Tensor]] A bucketing function that splits the given tensor data as the same way of how the :attr:`val` tensor is grouped. """ sorted_val, idx = F.sort_1d(val) unique_val = F.asnumpy(F.unique(sorted_val)) bkt_idx = [] for v in unique_val: eqidx = F.nonzero_1d(F.equal(sorted_val, v)) bkt_idx.append(F.gather_row(idx, eqidx)) def bucketor(data): bkts = [F.gather_row(data, idx) for idx in bkt_idx] return bkts return unique_val, bucketor def data_dict_to_list(graph, data_dict, func, target): """Get node or edge feature data of the given name for all the types. Parameters ------------- graph : DGLGraph The input graph. data_dict : dict[str, Tensor] or dict[(str, str, str), Tensor]] or Tensor Node or edge data stored in DGLGraph. The key of the dictionary is the node type name or edge type name. If there is only single source node type, data_dict is the value of feature(a Tensor) not a dict. func : dgl.function.BaseMessageFunction Built-in message function. target : 'u', 'v' or 'e' The target of the lhs or rhs data Returns -------- data_list : list(Tensor) Feature data stored in a list of tensors. The i^th tensor stores the feature data of type ``types[i]``. """ if isinstance(func, fn.BinaryMessageFunction): if target in ["u", "v"]: output_list = [None] * graph._graph.number_of_ntypes() # If there is only single source node type, data_dict should be the value of # feature, namely, a tensor. if not isinstance(data_dict, dict): src_id, dst_id = graph._graph.metagraph.find_edge(0) if target == "u": output_list[src_id] = data_dict else: output_list[dst_id] = data_dict else: for srctype, _, dsttype in graph.canonical_etypes: if target == "u": src_id = graph.get_ntype_id(srctype) output_list[src_id] = data_dict[srctype] else: dst_id = graph.get_ntype_id(dsttype) output_list[dst_id] = data_dict[dsttype] else: # target == 'e' output_list = [None] * graph._graph.number_of_etypes() for rel in graph.canonical_etypes: etid = graph.get_etype_id(rel) output_list[etid] = data_dict[rel] return output_list else: if target == "u": lhs_list = [None] * graph._graph.number_of_ntypes() if not isinstance(data_dict, dict): src_id, _ = graph._graph.metagraph.find_edge(0) lhs_list[src_id] = data_dict else: for srctype, _, _ in graph.canonical_etypes: src_id = graph.get_ntype_id(srctype) lhs_list[src_id] = data_dict[srctype] return lhs_list else: # target == 'e': rhs_list = [None] * graph._graph.number_of_etypes() for rel in graph.canonical_etypes: etid = graph.get_etype_id(rel) rhs_list[etid] = data_dict[rel] return rhs_list def invoke_gsddmm(graph, func): """Invoke g-SDDMM computation on the graph. Parameters ---------- graph : DGLGraph The input graph. func : dgl.function.BaseMessageFunction Built-in message function. Returns ------- dict[str, Tensor] Results from the g-SDDMM computation. """ alldata = [graph.srcdata, graph.dstdata, graph.edata] if isinstance(func, fn.BinaryMessageFunction): x = alldata[func.lhs][func.lhs_field] y = alldata[func.rhs][func.rhs_field] op = getattr(ops, func.name) if graph._graph.number_of_etypes() > 1: lhs_target, _, rhs_target = func.name.split("_", 2) x = data_dict_to_list(graph, x, func, lhs_target) y = data_dict_to_list(graph, y, func, rhs_target) z = op(graph, x, y) else: x = alldata[func.target][func.in_field] op = getattr(ops, func.name) if graph._graph.number_of_etypes() > 1: # Convert to list as dict is unordered. if func.name == "copy_u": x = data_dict_to_list(graph, x, func, "u") else: # "copy_e" x = data_dict_to_list(graph, x, func, "e") z = op(graph, x) return {func.out_field: z} def invoke_gspmm( graph, mfunc, rfunc, *, srcdata=None, dstdata=None, edata=None ): """Invoke g-SPMM computation on the graph. Parameters ---------- graph : DGLGraph The input graph. mfunc : dgl.function.BaseMessageFunction Built-in message function. rfunc : dgl.function.BaseReduceFunction Built-in reduce function. srcdata : dict[str, Tensor], optional Source node feature data. If not provided, it use ``graph.srcdata``. dstdata : dict[str, Tensor], optional Destination node feature data. If not provided, it use ``graph.dstdata``. edata : dict[str, Tensor], optional Edge feature data. If not provided, it use ``graph.edata``. Returns ------- dict[str, Tensor] Results from the g-SPMM computation. """ # sanity check if mfunc.out_field != rfunc.msg_field: raise DGLError( "Invalid message ({}) and reduce ({}) function pairs." " The output field of the message function must be equal to the" " message field of the reduce function.".format(mfunc, rfunc) ) if edata is None: edata = graph.edata if srcdata is None: srcdata = graph.srcdata if dstdata is None: dstdata = graph.dstdata alldata = [srcdata, dstdata, edata] if isinstance(mfunc, fn.BinaryMessageFunction): x = alldata[mfunc.lhs][mfunc.lhs_field] y = alldata[mfunc.rhs][mfunc.rhs_field] op = getattr(ops, "{}_{}".format(mfunc.name, rfunc.name)) if graph._graph.number_of_etypes() > 1: lhs_target, _, rhs_target = mfunc.name.split("_", 2) x = data_dict_to_list(graph, x, mfunc, lhs_target) y = data_dict_to_list(graph, y, mfunc, rhs_target) z = op(graph, x, y) else: x = alldata[mfunc.target][mfunc.in_field] op = getattr(ops, "{}_{}".format(mfunc.name, rfunc.name)) if graph._graph.number_of_etypes() > 1 and not isinstance(x, tuple): if mfunc.name == "copy_u": x = data_dict_to_list(graph, x, mfunc, "u") else: # "copy_e" x = data_dict_to_list(graph, x, mfunc, "e") z = op(graph, x) return {rfunc.out_field: z} def message_passing(g, mfunc, rfunc, afunc): """Invoke message passing computation on the whole graph. Parameters ---------- g : DGLGraph The input graph. mfunc : callable or dgl.function.BuiltinFunction Message function. rfunc : callable or dgl.function.BuiltinFunction Reduce function. afunc : callable or dgl.function.BuiltinFunction Apply function. Returns ------- dict[str, Tensor] Results from the message passing computation. """ if ( is_builtin(mfunc) and is_builtin(rfunc) and getattr(ops, "{}_{}".format(mfunc.name, rfunc.name), None) is not None ): # invoke fused message passing ndata = invoke_gspmm(g, mfunc, rfunc) else: # invoke message passing in two separate steps # message phase if is_builtin(mfunc): msgdata = invoke_gsddmm(g, mfunc) else: orig_eid = g.edata.get(EID, None) msgdata = invoke_edge_udf( g, ALL, g.canonical_etypes[0], mfunc, orig_eid=orig_eid ) # reduce phase if is_builtin(rfunc): msg = rfunc.msg_field ndata = invoke_gspmm(g, fn.copy_e(msg, msg), rfunc, edata=msgdata) else: orig_nid = g.dstdata.get(NID, None) ndata = invoke_udf_reduce(g, rfunc, msgdata, orig_nid=orig_nid) # apply phase if afunc is not None: for k, v in g.dstdata.items(): # include original node features if k not in ndata: ndata[k] = v orig_nid = g.dstdata.get(NID, None) ndata = invoke_node_udf( g, ALL, g.dsttypes[0], afunc, ndata=ndata, orig_nid=orig_nid ) return ndata ================================================ FILE: python/dgl/cuda/__init__.py ================================================ """ CUDA wrappers """ from .. import backend as F from .gpu_cache import GPUCache if F.get_preferred_backend() == "pytorch": from . import nccl ================================================ FILE: python/dgl/cuda/gpu_cache.py ================================================ """API wrapping HugeCTR gpu_cache.""" # Copyright (c) 2022, NVIDIA Corporation # All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # @file gpu_cache.py # @brief API for managing a GPU Cache from .. import backend as F from .._ffi.function import _init_api class GPUCache(object): """High-level wrapper for GPU embedding cache""" def __init__(self, num_items, num_feats, idtype=F.int64): assert idtype in [F.int32, F.int64] self._cache = _CAPI_DGLGpuCacheCreate( num_items, num_feats, 32 if idtype == F.int32 else 64 ) self.idtype = idtype self.total_miss = 0 self.total_queries = 0 def query(self, keys): """Queries the GPU cache. Parameters ---------- keys : Tensor The keys to query the GPU cache with. Returns ------- tuple(Tensor, Tensor, Tensor) A tuple containing (values, missing_indices, missing_keys) where values[missing_indices] corresponds to cache misses that should be filled by quering another source with missing_keys. """ self.total_queries += keys.shape[0] keys = F.astype(keys, self.idtype) values, missing_index, missing_keys = _CAPI_DGLGpuCacheQuery( self._cache, F.to_dgl_nd(keys) ) self.total_miss += missing_keys.shape[0] return ( F.from_dgl_nd(values), F.from_dgl_nd(missing_index), F.from_dgl_nd(missing_keys), ) def replace(self, keys, values): """Inserts key-value pairs into the GPU cache using the Least-Recently Used (LRU) algorithm to remove old key-value pairs if it is full. Parameters ---------- keys: Tensor The keys to insert to the GPU cache. values: Tensor The values to insert to the GPU cache. """ keys = F.astype(keys, self.idtype) values = F.astype(values, F.float32) _CAPI_DGLGpuCacheReplace( self._cache, F.to_dgl_nd(keys), F.to_dgl_nd(values) ) @property def miss_rate(self): """Returns the cache miss rate since creation.""" return self.total_miss / self.total_queries _init_api("dgl.cuda", __name__) ================================================ FILE: python/dgl/cuda/nccl.py ================================================ """API wrapping NCCL primitives.""" import torch import torch.distributed as dist def sparse_all_to_all_push(idx, value, partition): """Perform an all-to-all-v operation, where by all processors send out a set of indices and corresponding values. Indices and values, corresponding to the current process, will copied into the output arrays. Note: This method requires 'torch.distributed.get_backend() == "nccl"'. Parameters ---------- idx : torch.Tensor The 1D set of indices to send to other processors. value : torch.Tensor The multi-dimension set of values to send to other processors. The first dimension must match that of `idx`. partition : NDArrayPartition The object containing information for assigning indices to processors. Returns ------- torch.Tensor The 1D tensor of the recieved indices. torch.Tensor The set of recieved values. Examples -------- To perform a sparse_all_to_all_push(), a partition object must be provided. A partition of a homgeonous graph, where the vertices are striped across processes can be generated via: >>> from dgl.partition import NDArrayPartition >>> part = NDArrayPartition(g.num_nodes(), world_size, mode='remainder') With this partition, each processor can send values to be associatd with vertices in the graph. So if we have an array `global_idxs` of all of the neighbors updated during mini-batch processing, and an array `global_values` containing the new values associated with the neighbors, we communicate them to the own processes via: >>> my_idxs, my_values = nccl.sparse_all_to_all_push(global_idxs, global_values, part) This communication pattern is common when communicating gradient updates for node embeddings. Indices the current process owns, do not need to treated specially, as internally they will be copied to the output array. If we have a set of indices in process 0 '[0, 3, 8, 9, 10]` and for process 1 '[0, 2, 4, 5, 8, 8, 9]'. Using a remainder partition will result indices for processe 0 of '[0, 8, 10, 0, 2, 4, 8, 8]', and for process 1 of '[3, 9, 5, 9]'. """ if not dist.is_initialized() or dist.get_world_size() == 1: return idx, value assert ( dist.get_backend() == "nccl" ), "requires NCCL backend to communicate CUDA tensors." perm, send_splits = partition.generate_permutation(idx) perm = perm.long() # Get receive splits. recv_splits = torch.empty_like(send_splits) dist.all_to_all_single(recv_splits, send_splits) # Use pinned memory to speedup D2H copy. recv_splits = recv_splits.to("cpu", non_blocking=True) send_splits = send_splits.to("cpu", non_blocking=True) send_idx = idx[perm] send_value = value[perm] # Wait D2H copy finish. torch.cuda.current_stream().synchronize() recv_sum = recv_splits.sum() recv_splits = recv_splits.tolist() send_splits = send_splits.tolist() # Send idx. recv_idx = torch.empty((recv_sum,), dtype=idx.dtype, device=idx.device) dist.all_to_all_single(recv_idx, send_idx, recv_splits, send_splits) # Send value. recv_value = torch.empty( (recv_sum, *value.shape[1:]), dtype=value.dtype, device=value.device ) dist.all_to_all_single(recv_value, send_value, recv_splits, send_splits) return recv_idx, recv_value def sparse_all_to_all_pull(req_idx, value, partition): """Perform an all-to-all-v operation, where by all processors request the values corresponding to their set of indices. Note: This method requires 'torch.distributed.get_backend() == "nccl"'. Parameters ---------- req_idx : torch.Tensor The set of indices this processor is requesting. value : torch.Tensor The multi-dimension set of values that can be requested from this processor. partition : NDArrayPartition The object containing information for assigning indices to processors. Returns ------- torch.Tensor The set of recieved values, corresponding to `req_idx`. Examples -------- To perform a sparse_all_to_all_pull(), a partition object must be provided. A partition of a homgeonous graph, where the vertices are striped across processes can be generated via: >>> from dgl.partition import NDArrayPartition >>> part = NDArrayPartition(g.num_nodes(), world_size, mode='remainder') With this partition, each processor can request values/features associated with vertices in the graph. So in the case where we have a set of neighbors 'nbr_idxs' we need features for, and each process has a tensor 'node_feat' storing the features of nodes it owns in the partition, the features can be requested via: >>> nbr_values = nccl.sparse_all_to_all_pull(nbr_idxs, node_feat, part) Then two the arrays 'nbr_idxs' and 'nbr_values' forms the sparse set of features, where 'nbr_idxs[i]' is the global node id, and 'nbr_values[i]' is the feature vector for that node. This communication pattern is useful for node features or node embeddings. """ if not dist.is_initialized() or dist.get_world_size() == 1: return value[req_idx.long()] assert ( dist.get_backend() == "nccl" ), "requires NCCL backend to communicate CUDA tensors." perm, req_splits = partition.generate_permutation(req_idx) perm = perm.long() # Get response splits. resp_splits = torch.empty_like(req_splits) dist.all_to_all_single(resp_splits, req_splits) # Use pinned memory to speedup D2H copy. resp_splits = resp_splits.to("cpu", non_blocking=True) req_splits = req_splits.to("cpu", non_blocking=True) req_idx = req_idx[perm] # Wait D2H copy finish. torch.cuda.current_stream().synchronize() resp_sum = resp_splits.sum() resp_splits = resp_splits.tolist() req_splits = req_splits.tolist() # Gather requested indices. resp_idx = torch.empty( (resp_sum,), dtype=req_idx.dtype, device=req_idx.device ) dist.all_to_all_single(resp_idx, req_idx, resp_splits, req_splits) # Convert requested indices to local indices depending on partition. if resp_sum > 0: resp_idx = partition.map_to_local(resp_idx) # Collect the request value. req_value = torch.empty( (req_idx.size(0), *value.shape[1:]), dtype=value.dtype, device=value.device, ) dist.all_to_all_single(req_value, value[resp_idx], req_splits, resp_splits) # Permute the value back into the requested order. return_value = torch.empty_like(req_value) return_value[perm] = req_value return return_value ================================================ FILE: python/dgl/data/__init__.py ================================================ """The ``dgl.data`` package contains datasets hosted by DGL and also utilities for downloading, processing, saving and loading data from external resources. """ from __future__ import absolute_import from . import citation_graph as citegrh from .actor import ActorDataset from .movielens import MovieLensDataset from .adapter import * from .bitcoinotc import BitcoinOTC, BitcoinOTCDataset from .citation_graph import ( CitationGraphDataset, CiteseerGraphDataset, CoraBinary, CoraGraphDataset, PubmedGraphDataset, ) from .csv_dataset import CSVDataset from .dgl_dataset import DGLBuiltinDataset, DGLDataset from .fakenews import FakeNewsDataset from .flickr import FlickrDataset from .fraud import FraudAmazonDataset, FraudDataset, FraudYelpDataset from .gdelt import GDELT, GDELTDataset from .gindt import GINDataset from .gnn_benchmark import ( AmazonCoBuy, AmazonCoBuyComputerDataset, AmazonCoBuyPhotoDataset, Coauthor, CoauthorCSDataset, CoauthorPhysicsDataset, CoraFull, CoraFullDataset, ) from .icews18 import ICEWS18, ICEWS18Dataset from .karate import KarateClub, KarateClubDataset from .knowledge_graph import FB15k237Dataset, FB15kDataset, WN18Dataset from .minigc import * from .ppi import LegacyPPIDataset, PPIDataset from .qm7b import QM7b, QM7bDataset from .qm9 import QM9, QM9Dataset from .qm9_edge import QM9Edge, QM9EdgeDataset from .rdf import AIFBDataset, AMDataset, BGSDataset, MUTAGDataset from .reddit import RedditDataset from .sbm import SBMMixture, SBMMixtureDataset from .synthetic import ( BA2MotifDataset, BACommunityDataset, BAShapeDataset, TreeCycleDataset, TreeGridDataset, ) from .tree import SST, SSTDataset from .tu import LegacyTUDataset, TUDataset from .utils import * from .cluster import CLUSTERDataset from .geom_gcn import ( ChameleonDataset, CornellDataset, SquirrelDataset, TexasDataset, WisconsinDataset, ) from .heterophilous_graphs import ( AmazonRatingsDataset, MinesweeperDataset, QuestionsDataset, RomanEmpireDataset, TolokersDataset, ) # RDKit is required for Peptides-Structural, Peptides-Functional dataset. # Exception handling was added to prevent crashes for users who are using other # datasets. try: from .lrgb import ( COCOSuperpixelsDataset, PeptidesFunctionalDataset, PeptidesStructuralDataset, VOCSuperpixelsDataset, ) except ImportError: pass from .pattern import PATTERNDataset from .superpixel import CIFAR10SuperPixelDataset, MNISTSuperPixelDataset from .wikics import WikiCSDataset from .yelp import YelpDataset from .zinc import ZINCDataset def register_data_args(parser): parser.add_argument( "--dataset", type=str, required=False, help="The input dataset. Can be cora, citeseer, pubmed, syn(synthetic dataset) or reddit", ) def load_data(args): if args.dataset == "cora": return citegrh.load_cora() elif args.dataset == "citeseer": return citegrh.load_citeseer() elif args.dataset == "pubmed": return citegrh.load_pubmed() elif args.dataset is not None and args.dataset.startswith("reddit"): return RedditDataset(self_loop=("self-loop" in args.dataset)) else: raise ValueError("Unknown dataset: {}".format(args.dataset)) ================================================ FILE: python/dgl/data/actor.py ================================================ """ Actor-only induced subgraph of the film-directoractor-writer network. """ import os import numpy as np from ..convert import graph from .dgl_dataset import DGLBuiltinDataset from .utils import _get_dgl_url class ActorDataset(DGLBuiltinDataset): r"""Actor-only induced subgraph of the film-directoractor-writer network from `Social Influence Analysis in Large-scale Networks `, introduced by `Geom-GCN: Geometric Graph Convolutional Networks ` Nodes represent actors, and edges represent co-occurrence on the same Wikipedia page. Node features correspond to some keywords in the Wikipedia pages. Statistics: - Nodes: 7600 - Edges: 33391 - Number of Classes: 5 - 10 train/val/test splits - Train: 3648 - Val: 2432 - Test: 1520 Parameters ---------- raw_dir : str, optional Raw file directory to store the processed data. Default: ~/.dgl/ force_reload : bool, optional Whether to re-download the data source. Default: False verbose : bool, optional Whether to print progress information. Default: True transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Default: None Attributes ---------- num_classes : int Number of node classes Notes ----- The graph does not come with edges for both directions. """ def __init__( self, raw_dir=None, force_reload=False, verbose=True, transform=None ): super(ActorDataset, self).__init__( name="actor", url=_get_dgl_url("dataset/actor.zip"), raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) def process(self): """Load and process the data.""" try: import torch except ImportError: raise ModuleNotFoundError( "This dataset requires PyTorch to be the backend." ) # Process node features and labels. with open(f"{self.raw_path}/out1_node_feature_label.txt", "r") as f: data = [x.split("\t") for x in f.read().split("\n")[1:-1]] rows, cols = [], [] labels = torch.empty(len(data), dtype=torch.long) for n_id, col, label in data: col = [int(x) for x in col.split(",")] rows += [int(n_id)] * len(col) cols += col labels[int(n_id)] = int(label) row, col = torch.tensor(rows), torch.tensor(cols) features = torch.zeros(len(data), int(col.max()) + 1) features[row, col] = 1.0 self._num_classes = int(labels.max().item()) + 1 # Process graph structure. with open(f"{self.raw_path}/out1_graph_edges.txt", "r") as f: data = f.read().split("\n")[1:-1] data = [[int(v) for v in r.split("\t")] for r in data] dst, src = torch.tensor(data, dtype=torch.long).t().contiguous() self._g = graph((src, dst), num_nodes=features.size(0)) self._g.ndata["feat"] = features self._g.ndata["label"] = labels # Process 10 train/val/test node splits. train_masks, val_masks, test_masks = [], [], [] for i in range(10): filepath = f"{self.raw_path}/{self.name}_split_0.6_0.2_{i}.npz" f = np.load(filepath) train_masks += [torch.from_numpy(f["train_mask"])] val_masks += [torch.from_numpy(f["val_mask"])] test_masks += [torch.from_numpy(f["test_mask"])] self._g.ndata["train_mask"] = torch.stack(train_masks, dim=1).bool() self._g.ndata["val_mask"] = torch.stack(val_masks, dim=1).bool() self._g.ndata["test_mask"] = torch.stack(test_masks, dim=1).bool() def has_cache(self): return os.path.exists(self.raw_path) def load(self): self.process() def __getitem__(self, idx): assert idx == 0, "This dataset has only one graph." if self._transform is None: return self._g else: return self._transform(self._g) def __len__(self): return 1 @property def num_classes(self): return self._num_classes ================================================ FILE: python/dgl/data/adapter.py ================================================ """Dataset adapters for re-purposing a dataset for a different kind of training task.""" import json import os import numpy as np from .. import backend as F from ..base import DGLError from ..convert import graph as create_dgl_graph from ..sampling.negative import _calc_redundancy from . import utils from .dgl_dataset import DGLDataset __all__ = ["AsNodePredDataset", "AsLinkPredDataset", "AsGraphPredDataset"] class AsNodePredDataset(DGLDataset): """Repurpose a dataset for a standard semi-supervised transductive node prediction task. The class converts a given dataset into a new dataset object such that: - Contains only one graph, accessible from ``dataset[0]``. - The graph stores: - Node labels in ``g.ndata['label']``. - Train/val/test masks in ``g.ndata['train_mask']``, ``g.ndata['val_mask']``, and ``g.ndata['test_mask']`` respectively. - In addition, the dataset contains the following attributes: - ``num_classes``, the number of classes to predict. - ``train_idx``, ``val_idx``, ``test_idx``, train/val/test indexes. If the input dataset contains heterogeneous graphs, users need to specify the ``target_ntype`` argument to indicate which node type to make predictions for. In this case: - Node labels are stored in ``g.nodes[target_ntype].data['label']``. - Training masks are stored in ``g.nodes[target_ntype].data['train_mask']``. So do validation and test masks. The class will keep only the first graph in the provided dataset and generate train/val/test masks according to the given split ratio. The generated masks will be cached to disk for fast re-loading. If the provided split ratio differs from the cached one, it will re-process the dataset properly. Parameters ---------- dataset : DGLDataset The dataset to be converted. split_ratio : (float, float, float), optional Split ratios for training, validation and test sets. They must sum to one. target_ntype : str, optional The node type to add split mask for. Attributes ---------- num_classes : int Number of classes to predict. train_idx : Tensor An 1-D integer tensor of training node IDs. val_idx : Tensor An 1-D integer tensor of validation node IDs. test_idx : Tensor An 1-D integer tensor of test node IDs. Examples -------- >>> ds = dgl.data.AmazonCoBuyComputerDataset() >>> print(ds) Dataset("amazon_co_buy_computer", num_graphs=1, save_path=...) >>> new_ds = dgl.data.AsNodePredDataset(ds, [0.8, 0.1, 0.1]) >>> print(new_ds) Dataset("amazon_co_buy_computer-as-nodepred", num_graphs=1, save_path=...) >>> print('train_mask' in new_ds[0].ndata) True """ def __init__(self, dataset, split_ratio=None, target_ntype=None, **kwargs): self.dataset = dataset self.split_ratio = split_ratio self.target_ntype = target_ntype super().__init__( self.dataset.name + "-as-nodepred", hash_key=(split_ratio, target_ntype, dataset.name, "nodepred"), **kwargs ) def process(self): is_ogb = hasattr(self.dataset, "get_idx_split") if is_ogb: g, label = self.dataset[0] self.g = g.clone() self.g.ndata["label"] = F.reshape(label, (g.num_nodes(),)) else: self.g = self.dataset[0].clone() if "label" not in self.g.nodes[self.target_ntype].data: raise ValueError( "Missing node labels. Make sure labels are stored " "under name 'label'." ) if self.split_ratio is None: if is_ogb: split = self.dataset.get_idx_split() train_idx, val_idx, test_idx = ( split["train"], split["valid"], split["test"], ) n = self.g.num_nodes() train_mask = utils.generate_mask_tensor( utils.idx2mask(train_idx, n) ) val_mask = utils.generate_mask_tensor( utils.idx2mask(val_idx, n) ) test_mask = utils.generate_mask_tensor( utils.idx2mask(test_idx, n) ) self.g.ndata["train_mask"] = train_mask self.g.ndata["val_mask"] = val_mask self.g.ndata["test_mask"] = test_mask else: assert ( "train_mask" in self.g.nodes[self.target_ntype].data ), "train_mask is not provided, please specify split_ratio to generate the masks" assert ( "val_mask" in self.g.nodes[self.target_ntype].data ), "val_mask is not provided, please specify split_ratio to generate the masks" assert ( "test_mask" in self.g.nodes[self.target_ntype].data ), "test_mask is not provided, please specify split_ratio to generate the masks" else: if self.verbose: print("Generating train/val/test masks...") utils.add_nodepred_split(self, self.split_ratio, self.target_ntype) self._set_split_index() self.num_classes = getattr(self.dataset, "num_classes", None) if self.num_classes is None: self.num_classes = len( F.unique(self.g.nodes[self.target_ntype].data["label"]) ) def has_cache(self): return os.path.isfile( os.path.join(self.save_path, "graph_{}.bin".format(self.hash)) ) def load(self): with open( os.path.join(self.save_path, "info_{}.json".format(self.hash)), "r" ) as f: info = json.load(f) if ( info["split_ratio"] != self.split_ratio or info["target_ntype"] != self.target_ntype ): raise ValueError( "Provided split ratio is different from the cached file. " "Re-process the dataset." ) self.split_ratio = info["split_ratio"] self.target_ntype = info["target_ntype"] self.num_classes = info["num_classes"] gs, _ = utils.load_graphs( os.path.join(self.save_path, "graph_{}.bin".format(self.hash)) ) self.g = gs[0] self._set_split_index() def save(self): utils.save_graphs( os.path.join(self.save_path, "graph_{}.bin".format(self.hash)), [self.g], ) with open( os.path.join(self.save_path, "info_{}.json".format(self.hash)), "w" ) as f: json.dump( { "split_ratio": self.split_ratio, "target_ntype": self.target_ntype, "num_classes": self.num_classes, }, f, ) def __getitem__(self, idx): return self.g def __len__(self): return 1 def _set_split_index(self): """Add train_idx/val_idx/test_idx as dataset attributes according to corresponding mask.""" ndata = self.g.nodes[self.target_ntype].data self.train_idx = F.nonzero_1d(ndata["train_mask"]) self.val_idx = F.nonzero_1d(ndata["val_mask"]) self.test_idx = F.nonzero_1d(ndata["test_mask"]) def negative_sample(g, num_samples): """Random sample negative edges from graph, excluding self-loops, the result samples might be less than num_samples """ num_nodes = g.num_nodes() redundancy = _calc_redundancy(num_samples, g.num_edges(), num_nodes**2) sample_size = int(num_samples * (1 + redundancy)) edges = np.random.randint(0, num_nodes, size=(2, sample_size)) edges = np.unique(edges, axis=1) # remove self loop mask_self_loop = edges[0] == edges[1] # remove existing edges has_edges = F.asnumpy(g.has_edges_between(edges[0], edges[1])) mask = ~(np.logical_or(mask_self_loop, has_edges)) edges = edges[:, mask] if edges.shape[1] >= num_samples: edges = edges[:, :num_samples] return edges class AsLinkPredDataset(DGLDataset): """Repurpose a dataset for link prediction task. The created dataset will include data needed for link prediction. Currently it only supports homogeneous graphs. It will keep only the first graph in the provided dataset and generate train/val/test edges according to the given split ratio, and the correspondent negative edges based on the neg_ratio. The generated edges will be cached to disk for fast re-loading. If the provided split ratio differs from the cached one, it will re-process the dataset properly. Parameters ---------- dataset : DGLDataset The dataset to be converted. split_ratio : (float, float, float), optional Split ratios for training, validation and test sets. Must sum to one. neg_ratio : int, optional Indicate how much negative samples to be sampled The number of the negative samples will be equal or less than neg_ratio * num_positive_edges. Attributes ------- feat_size: int The size of the feature dimension in the graph train_graph: DGLGraph The DGLGraph for training val_edges: Tuple[Tuple[Tensor, Tensor], Tuple[Tensor, Tensor]] The validation set edges, encoded as ((positive_edge_src, positive_edge_dst), (negative_edge_src, negative_edge_dst)) test_edges: Tuple[Tuple[Tensor, Tensor], Tuple[Tensor, Tensor]] The test set edges, encoded as ((positive_edge_src, positive_edge_dst), (negative_edge_src, negative_edge_dst)) Examples -------- >>> ds = dgl.data.CoraGraphDataset() >>> print(ds) Dataset("cora_v2", num_graphs=1, save_path=...) >>> new_ds = dgl.data.AsLinkPredDataset(ds, [0.8, 0.1, 0.1]) >>> print(new_ds) Dataset("cora_v2-as-linkpred", num_graphs=1, save_path=/home/ubuntu/.dgl/cora_v2-as-linkpred) >>> print(hasattr(new_ds, "test_edges")) True """ def __init__(self, dataset, split_ratio=None, neg_ratio=3, **kwargs): self.g = dataset[0] self.num_nodes = self.g.num_nodes() self.dataset = dataset self.split_ratio = split_ratio self.neg_ratio = neg_ratio super().__init__( dataset.name + "-as-linkpred", hash_key=(neg_ratio, split_ratio, dataset.name, "linkpred"), **kwargs ) def process(self): if self.split_ratio is None: # Handle logics for OGB link prediction dataset assert hasattr( self.dataset, "get_edge_split" ), "dataset doesn't have get_edge_split method, please specify split_ratio and neg_ratio to generate the split" # This is likely to be an ogb dataset self.edge_split = self.dataset.get_edge_split() self._train_graph = self.g if "source_node" in self.edge_split["test"]: # Probably ogbl-citation2 pos_e = ( self.edge_split["valid"]["source_node"], self.edge_split["valid"]["target_node"], ) neg_e_size = self.edge_split["valid"]["target_node_neg"].shape[ -1 ] neg_e_src = np.repeat( self.edge_split["valid"]["source_node"], neg_e_size ) neg_e_dst = np.reshape( self.edge_split["valid"]["target_node_neg"], -1 ) self._val_edges = pos_e, (neg_e_src, neg_e_dst) pos_e = ( self.edge_split["test"]["source_node"], self.edge_split["test"]["target_node"], ) neg_e_size = self.edge_split["test"]["target_node_neg"].shape[ -1 ] neg_e_src = np.repeat( self.edge_split["test"]["source_node"], neg_e_size ) neg_e_dst = np.reshape( self.edge_split["test"]["target_node_neg"], -1 ) self._test_edges = pos_e, (neg_e_src, neg_e_dst) elif "edge" in self.edge_split["test"]: # Probably ogbl-collab pos_e_tensor, neg_e_tensor = ( self.edge_split["valid"]["edge"], self.edge_split["valid"]["edge_neg"], ) pos_e = (pos_e_tensor[:, 0], pos_e_tensor[:, 1]) neg_e = (neg_e_tensor[:, 0], neg_e_tensor[:, 1]) self._val_edges = pos_e, neg_e pos_e_tensor, neg_e_tensor = ( self.edge_split["test"]["edge"], self.edge_split["test"]["edge_neg"], ) pos_e = (pos_e_tensor[:, 0], pos_e_tensor[:, 1]) neg_e = (neg_e_tensor[:, 0], neg_e_tensor[:, 1]) self._test_edges = pos_e, neg_e # delete edge split to save memory self.edge_split = None else: assert self.split_ratio is not None, "Need to specify split_ratio" assert self.neg_ratio is not None, "Need to specify neg_ratio" ratio = self.split_ratio graph = self.dataset[0] n = graph.num_edges() src, dst = graph.edges() src, dst = F.asnumpy(src), F.asnumpy(dst) n_train, n_val, n_test = ( int(n * ratio[0]), int(n * ratio[1]), int(n * ratio[2]), ) idx = np.random.permutation(n) train_pos_idx = idx[:n_train] val_pos_idx = idx[n_train : n_train + n_val] test_pos_idx = idx[n_train + n_val :] neg_src, neg_dst = negative_sample( graph, self.neg_ratio * (n_val + n_test) ) neg_n_val, neg_n_test = ( self.neg_ratio * n_val, self.neg_ratio * n_test, ) neg_val_src, neg_val_dst = neg_src[:neg_n_val], neg_dst[:neg_n_val] neg_test_src, neg_test_dst = ( neg_src[neg_n_val:], neg_dst[neg_n_val:], ) self._val_edges = ( F.tensor(src[val_pos_idx]), F.tensor(dst[val_pos_idx]), ), (F.tensor(neg_val_src), F.tensor(neg_val_dst)) self._test_edges = ( F.tensor(src[test_pos_idx]), F.tensor(dst[test_pos_idx]), ), (F.tensor(neg_test_src), F.tensor(neg_test_dst)) self._train_graph = create_dgl_graph( (src[train_pos_idx], dst[train_pos_idx]), num_nodes=self.num_nodes, ) self._train_graph.ndata["feat"] = graph.ndata["feat"] def has_cache(self): return os.path.isfile( os.path.join(self.save_path, "graph_{}.bin".format(self.hash)) ) def load(self): gs, tensor_dict = utils.load_graphs( os.path.join(self.save_path, "graph_{}.bin".format(self.hash)) ) self.g = gs[0] self._train_graph = self.g self._val_edges = ( tensor_dict["val_pos_src"], tensor_dict["val_pos_dst"], ), (tensor_dict["val_neg_src"], tensor_dict["val_neg_dst"]) self._test_edges = ( tensor_dict["test_pos_src"], tensor_dict["test_pos_dst"], ), (tensor_dict["test_neg_src"], tensor_dict["test_neg_dst"]) with open( os.path.join(self.save_path, "info_{}.json".format(self.hash)), "r" ) as f: info = json.load(f) self.split_ratio = info["split_ratio"] self.neg_ratio = info["neg_ratio"] def save(self): tensor_dict = { "val_pos_src": self._val_edges[0][0], "val_pos_dst": self._val_edges[0][1], "val_neg_src": self._val_edges[1][0], "val_neg_dst": self._val_edges[1][1], "test_pos_src": self._test_edges[0][0], "test_pos_dst": self._test_edges[0][1], "test_neg_src": self._test_edges[1][0], "test_neg_dst": self._test_edges[1][1], } utils.save_graphs( os.path.join(self.save_path, "graph_{}.bin".format(self.hash)), [self._train_graph], tensor_dict, ) with open( os.path.join(self.save_path, "info_{}.json".format(self.hash)), "w" ) as f: json.dump( {"split_ratio": self.split_ratio, "neg_ratio": self.neg_ratio}, f, ) @property def feat_size(self): return self._train_graph.ndata["feat"].shape[-1] @property def train_graph(self): return self._train_graph @property def val_edges(self): return self._val_edges @property def test_edges(self): return self._test_edges def __getitem__(self, idx): return self.g def __len__(self): return 1 class AsGraphPredDataset(DGLDataset): """Repurpose a dataset for standard graph property prediction task. The created dataset will include data needed for graph property prediction. Currently it only supports homogeneous graphs. The class converts a given dataset into a new dataset object such that: - It stores ``len(dataset)`` graphs. - The i-th graph and its label is accessible from ``dataset[i]``. The class will generate a train/val/test split if :attr:`split_ratio` is provided. The generated split will be cached to disk for fast re-loading. If the provided split ratio differs from the cached one, it will re-process the dataset properly. Parameters ---------- dataset : DGLDataset The dataset to be converted. split_ratio : (float, float, float), optional Split ratios for training, validation and test sets. They must sum to one. Attributes ---------- num_tasks : int Number of tasks to predict. num_classes : int Number of classes to predict per task, None for regression datasets. train_idx : Tensor An 1-D integer tensor of training node IDs. val_idx : Tensor An 1-D integer tensor of validation node IDs. test_idx : Tensor An 1-D integer tensor of test node IDs. node_feat_size : int Input node feature size, None if not applicable. edge_feat_size : int Input edge feature size, None if not applicable. Examples -------- >>> from dgl.data import AsGraphPredDataset >>> from ogb.graphproppred import DglGraphPropPredDataset >>> dataset = DglGraphPropPredDataset(name='ogbg-molhiv') >>> new_dataset = AsGraphPredDataset(dataset) >>> print(new_dataset) Dataset("ogbg-molhiv-as-graphpred", num_graphs=41127, save_path=...) >>> print(len(new_dataset)) 41127 >>> print(new_dataset[0]) (Graph(num_nodes=19, num_edges=40, ndata_schemes={'feat': Scheme(shape=(9,), dtype=torch.int64)} edata_schemes={'feat': Scheme(shape=(3,), dtype=torch.int64)}), tensor([0])) """ def __init__(self, dataset, split_ratio=None, **kwargs): self.dataset = dataset self.split_ratio = split_ratio super().__init__( dataset.name + "-as-graphpred", hash_key=(split_ratio, dataset.name, "graphpred"), **kwargs ) def process(self): is_ogb = hasattr(self.dataset, "get_idx_split") if self.split_ratio is None: if is_ogb: split = self.dataset.get_idx_split() self.train_idx = split["train"] self.val_idx = split["valid"] self.test_idx = split["test"] else: # Handle FakeNewsDataset try: self.train_idx = F.nonzero_1d(self.dataset.train_mask) self.val_idx = F.nonzero_1d(self.dataset.val_mask) self.test_idx = F.nonzero_1d(self.dataset.test_mask) except: raise DGLError( "The input dataset does not have default train/val/test\ split. Please specify split_ratio to generate the split." ) else: if self.verbose: print("Generating train/val/test split...") train_ratio, val_ratio, _ = self.split_ratio num_graphs = len(self.dataset) num_train = int(num_graphs * train_ratio) num_val = int(num_graphs * val_ratio) idx = np.random.permutation(num_graphs) self.train_idx = F.tensor(idx[:num_train]) self.val_idx = F.tensor(idx[num_train : num_train + num_val]) self.test_idx = F.tensor(idx[num_train + num_val :]) if hasattr(self.dataset, "num_classes"): # GINDataset, MiniGCDataset, FakeNewsDataset, TUDataset, # LegacyTUDataset, BA2MotifDataset self.num_classes = self.dataset.num_classes else: # None for multi-label classification and regression self.num_classes = None if hasattr(self.dataset, "num_tasks"): # OGB datasets self.num_tasks = self.dataset.num_tasks else: self.num_tasks = 1 def has_cache(self): return os.path.isfile( os.path.join(self.save_path, "info_{}.json".format(self.hash)) ) def load(self): with open( os.path.join(self.save_path, "info_{}.json".format(self.hash)), "r" ) as f: info = json.load(f) if info["split_ratio"] != self.split_ratio: raise ValueError( "Provided split ratio is different from the cached file. " "Re-process the dataset." ) self.split_ratio = info["split_ratio"] self.num_tasks = info["num_tasks"] self.num_classes = info["num_classes"] split = np.load( os.path.join(self.save_path, "split_{}.npz".format(self.hash)) ) self.train_idx = F.zerocopy_from_numpy(split["train_idx"]) self.val_idx = F.zerocopy_from_numpy(split["val_idx"]) self.test_idx = F.zerocopy_from_numpy(split["test_idx"]) def save(self): if not os.path.exists(self.save_path): os.makedirs(self.save_path) with open( os.path.join(self.save_path, "info_{}.json".format(self.hash)), "w" ) as f: json.dump( { "split_ratio": self.split_ratio, "num_tasks": self.num_tasks, "num_classes": self.num_classes, }, f, ) np.savez( os.path.join(self.save_path, "split_{}.npz".format(self.hash)), train_idx=F.zerocopy_to_numpy(self.train_idx), val_idx=F.zerocopy_to_numpy(self.val_idx), test_idx=F.zerocopy_to_numpy(self.test_idx), ) def __getitem__(self, idx): return self.dataset[idx] def __len__(self): return len(self.dataset) @property def node_feat_size(self): g = self[0][0] return g.ndata["feat"].shape[-1] if "feat" in g.ndata else None @property def edge_feat_size(self): g = self[0][0] return g.edata["feat"].shape[-1] if "feat" in g.edata else None ================================================ FILE: python/dgl/data/bitcoinotc.py ================================================ """ BitcoinOTC dataset for fraud detection """ import datetime import gzip import os import shutil import numpy as np from .. import backend as F from ..convert import graph as dgl_graph from .dgl_dataset import DGLBuiltinDataset from .utils import check_sha1, download, load_graphs, makedirs, save_graphs class BitcoinOTCDataset(DGLBuiltinDataset): r"""BitcoinOTC dataset for fraud detection This is who-trusts-whom network of people who trade using Bitcoin on a platform called Bitcoin OTC. Since Bitcoin users are anonymous, there is a need to maintain a record of users' reputation to prevent transactions with fraudulent and risky users. Offical website: ``_ Bitcoin OTC dataset statistics: - Nodes: 5,881 - Edges: 35,592 - Range of edge weight: -10 to +10 - Percentage of positive edges: 89% Parameters ---------- raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose: bool Whether to print out progress information. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- graphs : list A list of DGLGraph objects is_temporal : bool Indicate whether the graphs are temporal graphs Raises ------ UserWarning If the raw data is changed in the remote server by the author. Examples -------- >>> dataset = BitcoinOTCDataset() >>> len(dataset) 136 >>> for g in dataset: .... # get edge feature .... edge_weights = g.edata['h'] .... # your code here >>> """ _url = "https://snap.stanford.edu/data/soc-sign-bitcoinotc.csv.gz" _sha1_str = "c14281f9e252de0bd0b5f1c6e2bae03123938641" def __init__( self, raw_dir=None, force_reload=False, verbose=False, transform=None ): super(BitcoinOTCDataset, self).__init__( name="bitcoinotc", url=self._url, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) def download(self): gz_file_path = os.path.join(self.raw_dir, self.name + ".csv.gz") download(self.url, path=gz_file_path) if not check_sha1(gz_file_path, self._sha1_str): raise UserWarning( "File {} is downloaded but the content hash does not match." "The repo may be outdated or download may be incomplete. " "Otherwise you can create an issue for it.".format( self.name + ".csv.gz" ) ) self._extract_gz(gz_file_path, self.raw_path) def process(self): filename = os.path.join(self.save_path, self.name + ".csv") data = np.loadtxt(filename, delimiter=",").astype(np.int64) data[:, 0:2] = data[:, 0:2] - data[:, 0:2].min() delta = datetime.timedelta(days=14).total_seconds() # The source code is not released, but the paper indicates there're # totally 137 samples. The cutoff below has exactly 137 samples. time_index = np.around((data[:, 3] - data[:, 3].min()) / delta).astype( np.int64 ) self._graphs = [] for i in range(time_index.max()): row_mask = time_index <= i edges = data[row_mask][:, 0:2] rate = data[row_mask][:, 2] g = dgl_graph((edges[:, 0], edges[:, 1])) g.edata["h"] = F.tensor( rate.reshape(-1, 1), dtype=F.data_type_dict["int64"] ) self._graphs.append(g) @property def graph_path(self): return os.path.join(self.save_path, "dgl_graph.bin") def has_cache(self): return os.path.exists(self.graph_path) def save(self): save_graphs(self.graph_path, self.graphs) def load(self): self._graphs = load_graphs(self.graph_path)[0] @property def graphs(self): return self._graphs def __len__(self): r"""Number of graphs in the dataset. Return ------- int """ return len(self.graphs) def __getitem__(self, item): r"""Get graph by index Parameters ---------- item : int Item index Returns ------- :class:`dgl.DGLGraph` The graph contains: - ``edata['h']`` : edge weights """ if self._transform is None: return self.graphs[item] else: return self._transform(self.graphs[item]) @property def is_temporal(self): r"""Are the graphs temporal graphs Returns ------- bool """ return True def _extract_gz(self, file, target_dir, overwrite=False): if os.path.exists(target_dir) and not overwrite: return print("Extracting file to {}".format(target_dir)) fname = os.path.basename(file) makedirs(target_dir) out_file_path = os.path.join(target_dir, fname[:-3]) with gzip.open(file, "rb") as f_in: with open(out_file_path, "wb") as f_out: shutil.copyfileobj(f_in, f_out) BitcoinOTC = BitcoinOTCDataset ================================================ FILE: python/dgl/data/citation_graph.py ================================================ """Cora, citeseer, pubmed dataset. (lingfan): following dataset loading and preprocessing code from tkipf/gcn https://github.com/tkipf/gcn/blob/master/gcn/utils.py """ from __future__ import absolute_import import os, sys import pickle as pkl import warnings import networkx as nx import numpy as np import scipy.sparse as sp from .. import backend as F, convert from ..batch import batch as batch_graphs from ..convert import from_networkx, graph as dgl_graph, to_networkx from ..transforms import reorder_graph from .dgl_dataset import DGLBuiltinDataset from .utils import ( _get_dgl_url, deprecate_function, deprecate_property, generate_mask_tensor, load_graphs, load_info, makedirs, save_graphs, save_info, ) backend = os.environ.get("DGLBACKEND", "pytorch") def _pickle_load(pkl_file): with warnings.catch_warnings(): warnings.simplefilter("ignore", category=DeprecationWarning) if sys.version_info > (3, 0): return pkl.load(pkl_file, encoding="latin1") else: return pkl.load(pkl_file) class CitationGraphDataset(DGLBuiltinDataset): r"""The citation graph dataset, including cora, citeseer and pubmeb. Nodes mean authors and edges mean citation relationships. Parameters ----------- name: str name can be 'cora', 'citeseer' or 'pubmed'. raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: True. reverse_edge : bool Whether to add reverse edges in graph. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. reorder : bool Whether to reorder the graph using :func:`~dgl.reorder_graph`. Default: False. """ _urls = { "cora_v2": "dataset/cora_v2.zip", "citeseer": "dataset/citeseer.zip", "pubmed": "dataset/pubmed.zip", } def __init__( self, name, raw_dir=None, force_reload=False, verbose=True, reverse_edge=True, transform=None, reorder=False, ): assert name.lower() in ["cora", "citeseer", "pubmed"] # Previously we use the pre-processing in pygcn (https://github.com/tkipf/pygcn) # for Cora, which is slightly different from the one used in the GCN paper if name.lower() == "cora": name = "cora_v2" url = _get_dgl_url(self._urls[name]) self._reverse_edge = reverse_edge self._reorder = reorder super(CitationGraphDataset, self).__init__( name, url=url, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) def process(self): """Loads input data from data directory and reorder graph for better locality ind.name.x => the feature vectors of the training instances as scipy.sparse.csr.csr_matrix object; ind.name.tx => the feature vectors of the test instances as scipy.sparse.csr.csr_matrix object; ind.name.allx => the feature vectors of both labeled and unlabeled training instances (a superset of ind.name.x) as scipy.sparse.csr.csr_matrix object; ind.name.y => the one-hot labels of the labeled training instances as numpy.ndarray object; ind.name.ty => the one-hot labels of the test instances as numpy.ndarray object; ind.name.ally => the labels for instances in ind.name.allx as numpy.ndarray object; ind.name.graph => a dict in the format {index: [index_of_neighbor_nodes]} as collections.defaultdict object; ind.name.test.index => the indices of test instances in graph, for the inductive setting as list object. """ root = self.raw_path objnames = ["x", "y", "tx", "ty", "allx", "ally", "graph"] objects = [] for i in range(len(objnames)): with open( "{}/ind.{}.{}".format(root, self.name, objnames[i]), "rb" ) as f: objects.append(_pickle_load(f)) x, y, tx, ty, allx, ally, graph = tuple(objects) test_idx_reorder = _parse_index_file( "{}/ind.{}.test.index".format(root, self.name) ) test_idx_range = np.sort(test_idx_reorder) if self.name == "citeseer": # Fix citeseer dataset (there are some isolated nodes in the graph) # Find isolated nodes, add them as zero-vecs into the right position test_idx_range_full = range( min(test_idx_reorder), max(test_idx_reorder) + 1 ) tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1])) tx_extended[test_idx_range - min(test_idx_range), :] = tx tx = tx_extended ty_extended = np.zeros((len(test_idx_range_full), y.shape[1])) ty_extended[test_idx_range - min(test_idx_range), :] = ty ty = ty_extended features = sp.vstack((allx, tx)).tolil() features[test_idx_reorder, :] = features[test_idx_range, :] if self.reverse_edge: graph = nx.DiGraph(nx.from_dict_of_lists(graph)) g = from_networkx(graph) else: graph = nx.Graph(nx.from_dict_of_lists(graph)) edges = list(graph.edges()) u, v = map(list, zip(*edges)) g = dgl_graph((u, v)) onehot_labels = np.vstack((ally, ty)) onehot_labels[test_idx_reorder, :] = onehot_labels[test_idx_range, :] labels = np.argmax(onehot_labels, 1) idx_test = test_idx_range.tolist() idx_train = range(len(y)) idx_val = range(len(y), len(y) + 500) train_mask = generate_mask_tensor( _sample_mask(idx_train, labels.shape[0]) ) val_mask = generate_mask_tensor(_sample_mask(idx_val, labels.shape[0])) test_mask = generate_mask_tensor( _sample_mask(idx_test, labels.shape[0]) ) g.ndata["train_mask"] = train_mask g.ndata["val_mask"] = val_mask g.ndata["test_mask"] = test_mask g.ndata["label"] = F.tensor(labels) g.ndata["feat"] = F.tensor( _preprocess_features(features), dtype=F.data_type_dict["float32"] ) self._num_classes = onehot_labels.shape[1] self._labels = labels if self._reorder: self._g = reorder_graph( g, node_permute_algo="rcmk", edge_permute_algo="dst", store_ids=False, ) else: self._g = g if self.verbose: print("Finished data loading and preprocessing.") print(" NumNodes: {}".format(self._g.num_nodes())) print(" NumEdges: {}".format(self._g.num_edges())) print(" NumFeats: {}".format(self._g.ndata["feat"].shape[1])) print(" NumClasses: {}".format(self.num_classes)) print( " NumTrainingSamples: {}".format( F.nonzero_1d(self._g.ndata["train_mask"]).shape[0] ) ) print( " NumValidationSamples: {}".format( F.nonzero_1d(self._g.ndata["val_mask"]).shape[0] ) ) print( " NumTestSamples: {}".format( F.nonzero_1d(self._g.ndata["test_mask"]).shape[0] ) ) @property def graph_path(self): return os.path.join(self.save_path, self.save_name + ".bin") @property def info_path(self): return os.path.join(self.save_path, self.save_name + ".pkl") def has_cache(self): if os.path.exists(self.graph_path) and os.path.exists(self.info_path): return True return False def save(self): """save the graph list and the labels""" save_graphs(str(self.graph_path), self._g) save_info(str(self.info_path), {"num_classes": self.num_classes}) def load(self): graphs, _ = load_graphs(str(self.graph_path)) info = load_info(str(self.info_path)) graph = graphs[0] self._g = graph # for compatability graph = graph.clone() graph.ndata.pop("train_mask") graph.ndata.pop("val_mask") graph.ndata.pop("test_mask") graph.ndata.pop("feat") graph.ndata.pop("label") graph = to_networkx(graph) self._num_classes = info["num_classes"] self._g.ndata["train_mask"] = generate_mask_tensor( F.asnumpy(self._g.ndata["train_mask"]) ) self._g.ndata["val_mask"] = generate_mask_tensor( F.asnumpy(self._g.ndata["val_mask"]) ) self._g.ndata["test_mask"] = generate_mask_tensor( F.asnumpy(self._g.ndata["test_mask"]) ) # hack for mxnet compatability if self.verbose: print(" NumNodes: {}".format(self._g.num_nodes())) print(" NumEdges: {}".format(self._g.num_edges())) print(" NumFeats: {}".format(self._g.ndata["feat"].shape[1])) print(" NumClasses: {}".format(self.num_classes)) print( " NumTrainingSamples: {}".format( F.nonzero_1d(self._g.ndata["train_mask"]).shape[0] ) ) print( " NumValidationSamples: {}".format( F.nonzero_1d(self._g.ndata["val_mask"]).shape[0] ) ) print( " NumTestSamples: {}".format( F.nonzero_1d(self._g.ndata["test_mask"]).shape[0] ) ) def __getitem__(self, idx): assert idx == 0, "This dataset has only one graph" if self._transform is None: return self._g else: return self._transform(self._g) def __len__(self): return 1 @property def save_name(self): return self.name + "_dgl_graph" @property def num_labels(self): deprecate_property("dataset.num_labels", "dataset.num_classes") return self.num_classes @property def num_classes(self): return self._num_classes """ Citation graph is used in many examples We preserve these properties for compatability. """ @property def reverse_edge(self): return self._reverse_edge def _preprocess_features(features): """Row-normalize feature matrix and convert to tuple representation""" features = _normalize(features) return np.asarray(features.todense()) def _parse_index_file(filename): """Parse index file.""" index = [] for line in open(filename): index.append(int(line.strip())) return index def _sample_mask(idx, l): """Create mask.""" mask = np.zeros(l) mask[idx] = 1 return mask class CoraGraphDataset(CitationGraphDataset): r"""Cora citation network dataset. Nodes mean paper and edges mean citation relationships. Each node has a predefined feature with 1433 dimensions. The dataset is designed for the node classification task. The task is to predict the category of certain paper. Statistics: - Nodes: 2708 - Edges: 10556 - Number of Classes: 7 - Label split: - Train: 140 - Valid: 500 - Test: 1000 Parameters ---------- raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: True. reverse_edge : bool Whether to add reverse edges in graph. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. reorder : bool Whether to reorder the graph using :func:`~dgl.reorder_graph`. Default: False. Attributes ---------- num_classes: int Number of label classes Notes ----- The node feature is row-normalized. Examples -------- >>> dataset = CoraGraphDataset() >>> g = dataset[0] >>> num_class = dataset.num_classes >>> >>> # get node feature >>> feat = g.ndata['feat'] >>> >>> # get data split >>> train_mask = g.ndata['train_mask'] >>> val_mask = g.ndata['val_mask'] >>> test_mask = g.ndata['test_mask'] >>> >>> # get labels >>> label = g.ndata['label'] """ def __init__( self, raw_dir=None, force_reload=False, verbose=True, reverse_edge=True, transform=None, reorder=False, ): name = "cora" super(CoraGraphDataset, self).__init__( name, raw_dir, force_reload, verbose, reverse_edge, transform, reorder, ) def __getitem__(self, idx): r"""Gets the graph object Parameters ----------- idx: int Item index, CoraGraphDataset has only one graph object Return ------ :class:`dgl.DGLGraph` graph structure, node features and labels. - ``ndata['train_mask']``: mask for training node set - ``ndata['val_mask']``: mask for validation node set - ``ndata['test_mask']``: mask for test node set - ``ndata['feat']``: node feature - ``ndata['label']``: ground truth labels """ return super(CoraGraphDataset, self).__getitem__(idx) def __len__(self): r"""The number of graphs in the dataset.""" return super(CoraGraphDataset, self).__len__() class CiteseerGraphDataset(CitationGraphDataset): r"""Citeseer citation network dataset. Nodes mean scientific publications and edges mean citation relationships. Each node has a predefined feature with 3703 dimensions. The dataset is designed for the node classification task. The task is to predict the category of certain publication. Statistics: - Nodes: 3327 - Edges: 9228 - Number of Classes: 6 - Label Split: - Train: 120 - Valid: 500 - Test: 1000 Parameters ----------- raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: True. reverse_edge : bool Whether to add reverse edges in graph. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. reorder : bool Whether to reorder the graph using :func:`~dgl.reorder_graph`. Default: False. Attributes ---------- num_classes: int Number of label classes Notes ----- The node feature is row-normalized. In citeseer dataset, there are some isolated nodes in the graph. These isolated nodes are added as zero-vecs into the right position. Examples -------- >>> dataset = CiteseerGraphDataset() >>> g = dataset[0] >>> num_class = dataset.num_classes >>> >>> # get node feature >>> feat = g.ndata['feat'] >>> >>> # get data split >>> train_mask = g.ndata['train_mask'] >>> val_mask = g.ndata['val_mask'] >>> test_mask = g.ndata['test_mask'] >>> >>> # get labels >>> label = g.ndata['label'] """ def __init__( self, raw_dir=None, force_reload=False, verbose=True, reverse_edge=True, transform=None, reorder=False, ): name = "citeseer" super(CiteseerGraphDataset, self).__init__( name, raw_dir, force_reload, verbose, reverse_edge, transform, reorder, ) def __getitem__(self, idx): r"""Gets the graph object Parameters ----------- idx: int Item index, CiteseerGraphDataset has only one graph object Return ------ :class:`dgl.DGLGraph` graph structure, node features and labels. - ``ndata['train_mask']``: mask for training node set - ``ndata['val_mask']``: mask for validation node set - ``ndata['test_mask']``: mask for test node set - ``ndata['feat']``: node feature - ``ndata['label']``: ground truth labels """ return super(CiteseerGraphDataset, self).__getitem__(idx) def __len__(self): r"""The number of graphs in the dataset.""" return super(CiteseerGraphDataset, self).__len__() class PubmedGraphDataset(CitationGraphDataset): r"""Pubmed citation network dataset. Nodes mean scientific publications and edges mean citation relationships. Each node has a predefined feature with 500 dimensions. The dataset is designed for the node classification task. The task is to predict the category of certain publication. Statistics: - Nodes: 19717 - Edges: 88651 - Number of Classes: 3 - Label Split: - Train: 60 - Valid: 500 - Test: 1000 Parameters ----------- raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: True. reverse_edge : bool Whether to add reverse edges in graph. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. reorder : bool Whether to reorder the graph using :func:`~dgl.reorder_graph`. Default: False. Attributes ---------- num_classes: int Number of label classes Notes ----- The node feature is row-normalized. Examples -------- >>> dataset = PubmedGraphDataset() >>> g = dataset[0] >>> num_class = dataset.num_of_class >>> >>> # get node feature >>> feat = g.ndata['feat'] >>> >>> # get data split >>> train_mask = g.ndata['train_mask'] >>> val_mask = g.ndata['val_mask'] >>> test_mask = g.ndata['test_mask'] >>> >>> # get labels >>> label = g.ndata['label'] """ def __init__( self, raw_dir=None, force_reload=False, verbose=True, reverse_edge=True, transform=None, reorder=False, ): name = "pubmed" super(PubmedGraphDataset, self).__init__( name, raw_dir, force_reload, verbose, reverse_edge, transform, reorder, ) def __getitem__(self, idx): r"""Gets the graph object Parameters ----------- idx: int Item index, PubmedGraphDataset has only one graph object Return ------ :class:`dgl.DGLGraph` graph structure, node features and labels. - ``ndata['train_mask']``: mask for training node set - ``ndata['val_mask']``: mask for validation node set - ``ndata['test_mask']``: mask for test node set - ``ndata['feat']``: node feature - ``ndata['label']``: ground truth labels """ return super(PubmedGraphDataset, self).__getitem__(idx) def __len__(self): r"""The number of graphs in the dataset.""" return super(PubmedGraphDataset, self).__len__() def load_cora( raw_dir=None, force_reload=False, verbose=True, reverse_edge=True, transform=None, ): """Get CoraGraphDataset Parameters ----------- raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: True. reverse_edge : bool Whether to add reverse edges in graph. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Return ------- CoraGraphDataset """ data = CoraGraphDataset( raw_dir, force_reload, verbose, reverse_edge, transform ) return data def load_citeseer( raw_dir=None, force_reload=False, verbose=True, reverse_edge=True, transform=None, ): """Get CiteseerGraphDataset Parameters ----------- raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: True. reverse_edge : bool Whether to add reverse edges in graph. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Return ------- CiteseerGraphDataset """ data = CiteseerGraphDataset( raw_dir, force_reload, verbose, reverse_edge, transform ) return data def load_pubmed( raw_dir=None, force_reload=False, verbose=True, reverse_edge=True, transform=None, ): """Get PubmedGraphDataset Parameters ----------- raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: True. reverse_edge : bool Whether to add reverse edges in graph. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Return ------- PubmedGraphDataset """ data = PubmedGraphDataset( raw_dir, force_reload, verbose, reverse_edge, transform ) return data class CoraBinary(DGLBuiltinDataset): """A mini-dataset for binary classification task using Cora. After loaded, it has following members: graphs : list of :class:`~dgl.DGLGraph` pmpds : list of :class:`scipy.sparse.coo_matrix` labels : list of :class:`numpy.ndarray` Parameters ----------- raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose: bool Whether to print out progress information. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. """ def __init__( self, raw_dir=None, force_reload=False, verbose=True, transform=None ): name = "cora_binary" url = _get_dgl_url("dataset/cora_binary.zip") super(CoraBinary, self).__init__( name, url=url, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) def process(self): root = self.raw_path # load graphs self.graphs = [] with open("{}/graphs.txt".format(root), "r") as f: elist = [] for line in f.readlines(): if line.startswith("graph"): if len(elist) != 0: self.graphs.append(dgl_graph(tuple(zip(*elist)))) elist = [] else: u, v = line.strip().split(" ") elist.append((int(u), int(v))) if len(elist) != 0: self.graphs.append(dgl_graph(tuple(zip(*elist)))) with open("{}/pmpds.pkl".format(root), "rb") as f: self.pmpds = _pickle_load(f) self.labels = [] with open("{}/labels.txt".format(root), "r") as f: cur = [] for line in f.readlines(): if line.startswith("graph"): if len(cur) != 0: self.labels.append(np.asarray(cur)) cur = [] else: cur.append(int(line.strip())) if len(cur) != 0: self.labels.append(np.asarray(cur)) # sanity check assert len(self.graphs) == len(self.pmpds) assert len(self.graphs) == len(self.labels) @property def graph_path(self): return os.path.join(self.save_path, self.save_name + ".bin") def has_cache(self): if os.path.exists(self.graph_path): return True return False def save(self): """save the graph list and the labels""" labels = {} for i, label in enumerate(self.labels): labels["{}".format(i)] = F.tensor(label) save_graphs(str(self.graph_path), self.graphs, labels) if self.verbose: print("Done saving data into cached files.") def load(self): self.graphs, labels = load_graphs(str(self.graph_path)) self.labels = [] for i in range(len(labels)): self.labels.append(F.asnumpy(labels["{}".format(i)])) # load pmpds under self.raw_path with open("{}/pmpds.pkl".format(self.raw_path), "rb") as f: self.pmpds = _pickle_load(f) if self.verbose: print("Done loading data into cached files.") # sanity check assert len(self.graphs) == len(self.pmpds) assert len(self.graphs) == len(self.labels) def __len__(self): return len(self.graphs) def __getitem__(self, i): r"""Gets the idx-th sample. Parameters ----------- idx : int The sample index. Returns ------- (dgl.DGLGraph, scipy.sparse.coo_matrix, int) The graph, scipy sparse coo_matrix and its label. """ if self._transform is None: g = self.graphs[i] else: g = self._transform(self.graphs[i]) return (g, self.pmpds[i], self.labels[i]) @property def save_name(self): return self.name + "_dgl_graph" @staticmethod def collate_fn(cur): graphs, pmpds, labels = zip(*cur) batched_graphs = batch_graphs(graphs) batched_pmpds = sp.block_diag(pmpds) batched_labels = np.concatenate(labels, axis=0) return batched_graphs, batched_pmpds, batched_labels def _normalize(mx): """Row-normalize sparse matrix""" rowsum = np.asarray(mx.sum(1)) mask = np.equal(rowsum, 0.0).flatten() rowsum[mask] = np.nan r_inv = np.power(rowsum, -1).flatten() r_inv[mask] = 0.0 r_mat_inv = sp.diags(r_inv) return r_mat_inv.dot(mx) def _encode_onehot(labels): classes = list(sorted(set(labels))) classes_dict = { c: np.identity(len(classes))[i, :] for i, c in enumerate(classes) } labels_onehot = np.asarray( list(map(classes_dict.get, labels)), dtype=np.int32 ) return labels_onehot ================================================ FILE: python/dgl/data/cluster.py ================================================ """ CLUSTERDataset for inductive learning. """ import os from .dgl_dataset import DGLBuiltinDataset from .utils import _get_dgl_url, load_graphs class CLUSTERDataset(DGLBuiltinDataset): r"""CLUSTER dataset for semi-supervised clustering task. Each graph contains 6 SBM clusters with sizes randomly selected between [5, 35] and probabilities p = 0.55, q = 0.25. The graphs are of sizes 40 -190 nodes. Each node can take an input feature value in {0, 1, 2, ..., 6} and values 1~6 correspond to classes 0~5 respectively, while value 0 means that the class of the node is unknown. There is only one labeled node that is randomly assigned to each community and most node features are set to 0. Reference ``_ Statistics: - Train examples: 10,000 - Valid examples: 1,000 - Test examples: 1,000 - Number of classes for each node: 6 Parameters ---------- mode : str Must be one of ('train', 'valid', 'test'). Default: 'train' raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: False transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- num_classes : int Number of classes for each node. Examples -------- >>> from dgl.data import CLUSTERDataset >>> >>> trainset = CLUSTERDataset(mode='train') >>> >>> trainset.num_classes 6 >>> len(trainset) 10000 >>> trainset[0] Graph(num_nodes=117, num_edges=4104, ndata_schemes={'label': Scheme(shape=(), dtype=torch.int16), 'feat': Scheme(shape=(), dtype=torch.int64)} edata_schemes={'feat': Scheme(shape=(1,), dtype=torch.float32)}) """ def __init__( self, mode="train", raw_dir=None, force_reload=False, verbose=False, transform=None, ): self._url = _get_dgl_url("dataset/SBM_CLUSTER.zip") self.mode = mode super(CLUSTERDataset, self).__init__( name="cluster", url=self._url, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) def process(self): self.load() def has_cache(self): graph_path = os.path.join( self.save_path, "CLUSTER_{}.bin".format(self.mode) ) return os.path.exists(graph_path) def load(self): graph_path = os.path.join( self.save_path, "CLUSTER_{}.bin".format(self.mode) ) self._graphs, _ = load_graphs(graph_path) @property def num_classes(self): r"""Number of classes for each node.""" return 6 def __len__(self): r"""The number of examples in the dataset.""" return len(self._graphs) def __getitem__(self, idx): r"""Get the idx^th sample. Parameters --------- idx : int The sample index. Returns ------- :class:`dgl.DGLGraph` graph structure, node features, node labels and edge features. - ``ndata['feat']``: node features - ``ndata['label']``: node labels - ``edata['feat']``: edge features """ if self._transform is None: return self._graphs[idx] else: return self._transform(self._graphs[idx]) ================================================ FILE: python/dgl/data/csv_dataset.py ================================================ import os import numpy as np from .. import backend as F from ..base import DGLError from .dgl_dataset import DGLDataset from .utils import load_graphs, save_graphs, Subset class CSVDataset(DGLDataset): """Dataset class that loads and parses graph data from CSV files. This class requires the following additional packages: - pyyaml >= 5.4.1 - pandas >= 1.1.5 - pydantic >= 1.9.0 The parsed graph and feature data will be cached for faster reloading. If the source CSV files are modified, please specify ``force_reload=True`` to re-parse from them. Parameters ---------- data_path : str Directory which contains 'meta.yaml' and CSV files force_reload : bool, optional Whether to reload the dataset. Default: False verbose: bool, optional Whether to print out progress information. Default: True. ndata_parser : dict[str, callable] or callable, optional Callable object which takes in the ``pandas.DataFrame`` object created from CSV file, parses node data and returns a dictionary of parsed data. If given a dictionary, the key is node type and the value is a callable object which is used to parse data of corresponding node type. If given a single callable object, such object is used to parse data of all node type data. Default: None. If None, a default data parser is applied which load data directly and tries to convert list into array. edata_parser : dict[(str, str, str), callable], or callable, optional Callable object which takes in the ``pandas.DataFrame`` object created from CSV file, parses edge data and returns a dictionary of parsed data. If given a dictionary, the key is edge type and the value is a callable object which is used to parse data of corresponding edge type. If given a single callable object, such object is used to parse data of all edge type data. Default: None. If None, a default data parser is applied which load data directly and tries to convert list into array. gdata_parser : callable, optional Callable object which takes in the ``pandas.DataFrame`` object created from CSV file, parses graph data and returns a dictionary of parsed data. Default: None. If None, a default data parser is applied which load data directly and tries to convert list into array. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- graphs : :class:`dgl.DGLGraph` Graphs of the dataset data : dict any available graph-level data such as graph-level feature, labels. Examples -------- Please refer to :ref:`guide-data-pipeline-loadcsv`. """ META_YAML_NAME = "meta.yaml" def __init__( self, data_path, force_reload=False, verbose=True, ndata_parser=None, edata_parser=None, gdata_parser=None, transform=None, ): from .csv_dataset_base import ( DefaultDataParser, load_yaml_with_sanity_check, ) self.graphs = None self.data = None self.ndata_parser = {} if ndata_parser is None else ndata_parser self.edata_parser = {} if edata_parser is None else edata_parser self.gdata_parser = gdata_parser self.default_data_parser = DefaultDataParser() meta_yaml_path = os.path.join(data_path, CSVDataset.META_YAML_NAME) if not os.path.exists(meta_yaml_path): raise DGLError( "'{}' cannot be found under {}.".format( CSVDataset.META_YAML_NAME, data_path ) ) self.meta_yaml = load_yaml_with_sanity_check(meta_yaml_path) ds_name = self.meta_yaml.dataset_name super().__init__( ds_name, raw_dir=os.path.dirname(meta_yaml_path), force_reload=force_reload, verbose=verbose, transform=transform, ) def process(self): """Parse node/edge data from CSV files and construct DGL.Graphs""" from .csv_dataset_base import ( DGLGraphConstructor, EdgeData, GraphData, NodeData, ) meta_yaml = self.meta_yaml base_dir = self.raw_dir node_data = [] for meta_node in meta_yaml.node_data: if meta_node is None: continue ntype = meta_node.ntype data_parser = ( self.ndata_parser if callable(self.ndata_parser) else self.ndata_parser.get(ntype, self.default_data_parser) ) ndata = NodeData.load_from_csv( meta_node, base_dir=base_dir, separator=meta_yaml.separator, data_parser=data_parser, ) node_data.append(ndata) edge_data = [] for meta_edge in meta_yaml.edge_data: if meta_edge is None: continue etype = tuple(meta_edge.etype) data_parser = ( self.edata_parser if callable(self.edata_parser) else self.edata_parser.get(etype, self.default_data_parser) ) edata = EdgeData.load_from_csv( meta_edge, base_dir=base_dir, separator=meta_yaml.separator, data_parser=data_parser, ) edge_data.append(edata) graph_data = None if meta_yaml.graph_data is not None: meta_graph = meta_yaml.graph_data data_parser = ( self.default_data_parser if self.gdata_parser is None else self.gdata_parser ) graph_data = GraphData.load_from_csv( meta_graph, base_dir=base_dir, separator=meta_yaml.separator, data_parser=data_parser, ) # construct graphs self.graphs, self.data = DGLGraphConstructor.construct_graphs( node_data, edge_data, graph_data ) if len(self.data) == 1: self.labels = list(self.data.values())[0] def has_cache(self): graph_path = os.path.join(self.save_path, self.name + ".bin") if os.path.exists(graph_path): return True return False def save(self): if self.graphs is None: raise DGLError("No graphs available in dataset") graph_path = os.path.join(self.save_path, self.name + ".bin") save_graphs(graph_path, self.graphs, labels=self.data) def load(self): graph_path = os.path.join(self.save_path, self.name + ".bin") self.graphs, self.data = load_graphs(graph_path) if len(self.data) == 1: self.labels = list(self.data.values())[0] def __getitem__(self, i): if F.is_tensor(i) and F.ndim(i) == 1: return Subset(self, F.copy_to(i, F.cpu())) if self._transform is None: g = self.graphs[i] else: g = self._transform(self.graphs[i]) if len(self.data) == 1: return g, self.labels[i] elif len(self.data) > 0: data = {k: v[i] for (k, v) in self.data.items()} return g, data else: return g def __len__(self): return len(self.graphs) ================================================ FILE: python/dgl/data/csv_dataset_base.py ================================================ import ast import os from typing import Callable, List, Optional import numpy as np import pandas as pd import pydantic as dt import yaml from .. import backend as F from ..base import dgl_warning, DGLError from ..convert import heterograph as dgl_heterograph class MetaNode(dt.BaseModel): """Class of node_data in YAML. Internal use only.""" file_name: str ntype: Optional[str] = "_V" graph_id_field: Optional[str] = "graph_id" node_id_field: Optional[str] = "node_id" class MetaEdge(dt.BaseModel): """Class of edge_data in YAML. Internal use only.""" file_name: str etype: Optional[List[str]] = ["_V", "_E", "_V"] graph_id_field: Optional[str] = "graph_id" src_id_field: Optional[str] = "src_id" dst_id_field: Optional[str] = "dst_id" class MetaGraph(dt.BaseModel): """Class of graph_data in YAML. Internal use only.""" file_name: str graph_id_field: Optional[str] = "graph_id" class MetaYaml(dt.BaseModel): """Class of YAML. Internal use only.""" version: Optional[str] = "1.0.0" dataset_name: str separator: Optional[str] = "," node_data: List[MetaNode] edge_data: List[MetaEdge] graph_data: Optional[MetaGraph] = None def load_yaml_with_sanity_check(yaml_file): """Load yaml and do sanity check. Internal use only.""" with open(yaml_file) as f: yaml_data = yaml.load(f, Loader=yaml.loader.SafeLoader) try: meta_yaml = MetaYaml(**yaml_data) except dt.ValidationError as e: print("Details of pydantic.ValidationError:\n{}".format(e.json())) raise DGLError( "Validation Error for YAML fields. Details are shown above." ) if meta_yaml.version != "1.0.0": raise DGLError( "Invalid CSVDataset version {}. Supported versions: '1.0.0'".format( meta_yaml.version ) ) ntypes = [meta.ntype for meta in meta_yaml.node_data] if len(ntypes) > len(set(ntypes)): raise DGLError( "Each node CSV file must have a unique node type name, but found duplicate node type: {}.".format( ntypes ) ) etypes = [tuple(meta.etype) for meta in meta_yaml.edge_data] if len(etypes) > len(set(etypes)): raise DGLError( "Each edge CSV file must have a unique edge type name, but found duplicate edge type: {}.".format( etypes ) ) return meta_yaml def _validate_data_length(data_dict): len_dict = {k: len(v) for k, v in data_dict.items()} lst = list(len_dict.values()) res = lst.count(lst[0]) == len(lst) if not res: raise DGLError( "All data are required to have same length while some of them does not. Length of data={}".format( str(len_dict) ) ) def _tensor(data, dtype=None): """Float32 is the default dtype for float tensor in DGL so let's cast float64 into float32 to avoid dtype mismatch. """ ret = F.tensor(data, dtype) if F.dtype(ret) == F.float64: ret = F.tensor(ret, dtype=F.float32) return ret class BaseData: """Class of base data which is inherited by Node/Edge/GraphData. Internal use only.""" @staticmethod def read_csv(file_name, base_dir, separator): csv_path = file_name if base_dir is not None: csv_path = os.path.join(base_dir, csv_path) return pd.read_csv(csv_path, sep=separator) @staticmethod def pop_from_dataframe(df: pd.DataFrame, item: str): ret = None try: ret = df.pop(item).to_numpy().squeeze() except KeyError: pass return ret class NodeData(BaseData): """Class of node data which is used for DGLGraph construction. Internal use only.""" def __init__(self, node_id, data, type=None, graph_id=None): self.id = np.array(node_id) self.data = data self.type = type if type is not None else "_V" self.graph_id = ( np.array(graph_id) if graph_id is not None else np.full(len(node_id), 0) ) _validate_data_length( {**{"id": self.id, "graph_id": self.graph_id}, **self.data} ) @staticmethod def load_from_csv( meta: MetaNode, data_parser: Callable, base_dir=None, separator="," ): df = BaseData.read_csv(meta.file_name, base_dir, separator) node_ids = BaseData.pop_from_dataframe(df, meta.node_id_field) graph_ids = BaseData.pop_from_dataframe(df, meta.graph_id_field) if node_ids is None: raise DGLError( "Missing node id field [{}] in file [{}].".format( meta.node_id_field, meta.file_name ) ) ntype = meta.ntype ndata = data_parser(df) return NodeData(node_ids, ndata, type=ntype, graph_id=graph_ids) @staticmethod def to_dict(node_data: List["NodeData"]) -> dict: # node_ids could be numeric or non-numeric values, but duplication is not allowed. node_dict = {} for n_data in node_data: graph_ids = np.unique(n_data.graph_id) for graph_id in graph_ids: idx = n_data.graph_id == graph_id ids = n_data.id[idx] u_ids, u_indices, u_counts = np.unique( ids, return_index=True, return_counts=True ) if len(ids) > len(u_ids): raise DGLError( "Node IDs are required to be unique but the following ids are duplicate: {}".format( u_ids[u_counts > 1] ) ) if graph_id not in node_dict: node_dict[graph_id] = {} node_dict[graph_id][n_data.type] = { "mapping": { index: i for i, index in enumerate(ids[u_indices]) }, "data": { k: _tensor(v[idx][u_indices]) for k, v in n_data.data.items() }, "dtype": ids.dtype, } return node_dict class EdgeData(BaseData): """Class of edge data which is used for DGLGraph construction. Internal use only.""" def __init__(self, src_id, dst_id, data, type=None, graph_id=None): self.src = np.array(src_id) self.dst = np.array(dst_id) self.data = data self.type = type if type is not None else ("_V", "_E", "_V") self.graph_id = ( np.array(graph_id) if graph_id is not None else np.full(len(src_id), 0) ) _validate_data_length( { **{"src": self.src, "dst": self.dst, "graph_id": self.graph_id}, **self.data, } ) @staticmethod def load_from_csv( meta: MetaEdge, data_parser: Callable, base_dir=None, separator="," ): df = BaseData.read_csv(meta.file_name, base_dir, separator) src_ids = BaseData.pop_from_dataframe(df, meta.src_id_field) if src_ids is None: raise DGLError( "Missing src id field [{}] in file [{}].".format( meta.src_id_field, meta.file_name ) ) dst_ids = BaseData.pop_from_dataframe(df, meta.dst_id_field) if dst_ids is None: raise DGLError( "Missing dst id field [{}] in file [{}].".format( meta.dst_id_field, meta.file_name ) ) graph_ids = BaseData.pop_from_dataframe(df, meta.graph_id_field) etype = tuple(meta.etype) edata = data_parser(df) return EdgeData(src_ids, dst_ids, edata, type=etype, graph_id=graph_ids) @staticmethod def to_dict(edge_data: List["EdgeData"], node_dict: dict) -> dict: edge_dict = {} for e_data in edge_data: (src_type, e_type, dst_type) = e_data.type graph_ids = np.unique(e_data.graph_id) for graph_id in graph_ids: if graph_id in edge_dict and e_data.type in edge_dict[graph_id]: raise DGLError( f"Duplicate edge type[{e_data.type}] for same graph[{graph_id}], please place the same edge_type for same graph into single EdgeData." ) idx = e_data.graph_id == graph_id src_mapping = node_dict[graph_id][src_type]["mapping"] dst_mapping = node_dict[graph_id][dst_type]["mapping"] orig_src_ids = e_data.src[idx].astype( node_dict[graph_id][src_type]["dtype"] ) orig_dst_ids = e_data.dst[idx].astype( node_dict[graph_id][dst_type]["dtype"] ) src_ids = [src_mapping[index] for index in orig_src_ids] dst_ids = [dst_mapping[index] for index in orig_dst_ids] if graph_id not in edge_dict: edge_dict[graph_id] = {} edge_dict[graph_id][e_data.type] = { "edges": (_tensor(src_ids), _tensor(dst_ids)), "data": { k: _tensor(v[idx]) for k, v in e_data.data.items() }, } return edge_dict class GraphData(BaseData): """Class of graph data which is used for DGLGraph construction. Internal use only.""" def __init__(self, graph_id, data): self.graph_id = np.array(graph_id) self.data = data _validate_data_length({**{"graph_id": self.graph_id}, **self.data}) @staticmethod def load_from_csv( meta: MetaGraph, data_parser: Callable, base_dir=None, separator="," ): df = BaseData.read_csv(meta.file_name, base_dir, separator) graph_ids = BaseData.pop_from_dataframe(df, meta.graph_id_field) if graph_ids is None: raise DGLError( "Missing graph id field [{}] in file [{}].".format( meta.graph_id_field, meta.file_name ) ) gdata = data_parser(df) return GraphData(graph_ids, gdata) @staticmethod def to_dict(graph_data: "GraphData", graphs_dict: dict) -> dict: missing_ids = np.setdiff1d( np.array(list(graphs_dict.keys())), graph_data.graph_id ) if len(missing_ids) > 0: raise DGLError( "Found following graph ids in node/edge CSVs but not in graph CSV: {}.".format( missing_ids ) ) graph_ids = graph_data.graph_id graphs = [] for graph_id in graph_ids: if graph_id not in graphs_dict: graphs_dict[graph_id] = dgl_heterograph( {("_V", "_E", "_V"): ([], [])} ) for graph_id in graph_ids: graphs.append(graphs_dict[graph_id]) data = { k: F.reshape(_tensor(v), (len(graphs), -1)) for k, v in graph_data.data.items() } return graphs, data class DGLGraphConstructor: """Class for constructing DGLGraph from Node/Edge/Graph data. Internal use only.""" @staticmethod def construct_graphs(node_data, edge_data, graph_data=None): if not isinstance(node_data, list): node_data = [node_data] if not isinstance(edge_data, list): edge_data = [edge_data] node_dict = NodeData.to_dict(node_data) edge_dict = EdgeData.to_dict(edge_data, node_dict) graph_dict = DGLGraphConstructor._construct_graphs(node_dict, edge_dict) if graph_data is None: graph_data = GraphData(np.full(1, 0), {}) graphs, data = GraphData.to_dict(graph_data, graph_dict) return graphs, data @staticmethod def _construct_graphs(node_dict, edge_dict): graph_dict = {} for graph_id in node_dict: if graph_id not in edge_dict: edge_dict[graph_id][("_V", "_E", "_V")] = {"edges": ([], [])} graph = dgl_heterograph( { etype: edata["edges"] for etype, edata in edge_dict[graph_id].items() }, num_nodes_dict={ ntype: len(ndata["mapping"]) for ntype, ndata in node_dict[graph_id].items() }, ) def assign_data(type, src_data, dst_data): for key, value in src_data.items(): dst_data[type].data[key] = value for type, data in node_dict[graph_id].items(): assign_data(type, data["data"], graph.nodes) for (type), data in edge_dict[graph_id].items(): assign_data(type, data["data"], graph.edges) graph_dict[graph_id] = graph return graph_dict class DefaultDataParser: """Default data parser for CSVDataset. It 1. ignores any columns which does not have a header. 2. tries to convert to list of numeric values(generated by np.array().tolist()) if cell data is a str separated by ','. 3. read data and infer data type directly, otherwise. """ def __call__(self, df: pd.DataFrame): data = {} for header in df: if "Unnamed" in header: dgl_warning("Unnamed column is found. Ignored...") continue dt = df[header].to_numpy().squeeze() if len(dt) > 0 and isinstance(dt[0], str): # probably consists of list of numeric values dt = np.array([ast.literal_eval(row) for row in dt]) data[header] = dt return data ================================================ FILE: python/dgl/data/dgl_dataset.py ================================================ """Basic DGL Dataset """ from __future__ import absolute_import import abc import hashlib import os import traceback from ..utils import retry_method_with_fix from .utils import download, extract_archive, get_download_dir, makedirs class DGLDataset(object): r"""The basic DGL dataset for creating graph datasets. This class defines a basic template class for DGL Dataset. The following steps will be executed automatically: 1. Check whether there is a dataset cache on disk (already processed and stored on the disk) by invoking ``has_cache()``. If true, goto 5. 2. Call ``download()`` to download the data if ``url`` is not None. 3. Call ``process()`` to process the data. 4. Call ``save()`` to save the processed dataset on disk and goto 6. 5. Call ``load()`` to load the processed dataset from disk. 6. Done. Users can overwite these functions with their own data processing logic. Parameters ---------- name : str Name of the dataset url : str Url to download the raw dataset. Default: None raw_dir : str Specifying the directory that will store the downloaded data or the directory that already stores the input data. Default: ~/.dgl/ save_dir : str Directory to save the processed dataset. Default: same as raw_dir hash_key : tuple A tuple of values as the input for the hash function. Users can distinguish instances (and their caches on the disk) from the same dataset class by comparing the hash values. Default: (), the corresponding hash value is ``'f9065fa7'``. force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- url : str The URL to download the dataset name : str The dataset name raw_dir : str Directory to store all the downloaded raw datasets. raw_path : str Path to the downloaded raw dataset folder. An alias for ``os.path.join(self.raw_dir, self.name)``. save_dir : str Directory to save all the processed datasets. save_path : str Path to the processed dataset folder. An alias for ``os.path.join(self.save_dir, self.name)``. verbose : bool Whether to print more runtime information. hash : str Hash value for the dataset and the setting. """ def __init__( self, name, url=None, raw_dir=None, save_dir=None, hash_key=(), force_reload=False, verbose=False, transform=None, ): self._name = name self._url = url self._force_reload = force_reload self._verbose = verbose self._hash_key = hash_key self._hash = self._get_hash() self._transform = transform # if no dir is provided, the default dgl download dir is used. if raw_dir is None: self._raw_dir = get_download_dir() else: self._raw_dir = raw_dir if save_dir is None: self._save_dir = self._raw_dir else: self._save_dir = save_dir self._load() def download(self): r"""Overwite to realize your own logic of downloading data. It is recommended to download the to the :obj:`self.raw_dir` folder. Can be ignored if the dataset is already in :obj:`self.raw_dir`. """ pass def save(self): r"""Overwite to realize your own logic of saving the processed dataset into files. It is recommended to use ``dgl.data.utils.save_graphs`` to save dgl graph into files and use ``dgl.data.utils.save_info`` to save extra information into files. """ pass def load(self): r"""Overwite to realize your own logic of loading the saved dataset from files. It is recommended to use ``dgl.data.utils.load_graphs`` to load dgl graph from files and use ``dgl.data.utils.load_info`` to load extra information into python dict object. """ pass @abc.abstractmethod def process(self): r"""Overwrite to realize your own logic of processing the input data.""" pass def has_cache(self): r"""Overwrite to realize your own logic of deciding whether there exists a cached dataset. By default False. """ return False @retry_method_with_fix(download) def _download(self): """Download dataset by calling ``self.download()`` if the dataset does not exists under ``self.raw_path``. By default ``self.raw_path = os.path.join(self.raw_dir, self.name)`` One can overwrite ``raw_path()`` function to change the path. """ if os.path.exists(self.raw_path): # pragma: no cover return makedirs(self.raw_dir) self.download() def _load(self): """Entry point from __init__ to load the dataset. If cache exists: - Load the dataset from saved dgl graph and information files. - If loadin process fails, re-download and process the dataset. else: - Download the dataset if needed. - Process the dataset and build the dgl graph. - Save the processed dataset into files. """ load_flag = not self._force_reload and self.has_cache() if load_flag: try: self.load() if self.verbose: print("Done loading data from cached files.") except KeyboardInterrupt: raise except: load_flag = False if self.verbose: print(traceback.format_exc()) print("Loading from cache failed, re-processing.") if not load_flag: self._download() self.process() self.save() if self.verbose: print("Done saving data into cached files.") def _get_hash(self): """Compute the hash of the input tuple Example ------- Assume `self._hash_key = (10, False, True)` >>> hash_value = self._get_hash() >>> hash_value 'a770b222' """ hash_func = hashlib.sha1() hash_func.update(str(self._hash_key).encode("utf-8")) return hash_func.hexdigest()[:8] def _get_hash_url_suffix(self): """Get the suffix based on the hash value of the url.""" if self._url is None: return "" else: hash_func = hashlib.sha1() hash_func.update(str(self._url).encode("utf-8")) return "_" + hash_func.hexdigest()[:8] @property def url(self): r"""Get url to download the raw dataset.""" return self._url @property def name(self): r"""Name of the dataset.""" return self._name @property def raw_dir(self): r"""Raw file directory contains the input data folder.""" return self._raw_dir @property def raw_path(self): r"""Directory contains the input data files. By default raw_path = os.path.join(self.raw_dir, self.name) """ return os.path.join( self.raw_dir, self.name + self._get_hash_url_suffix() ) @property def save_dir(self): r"""Directory to save the processed dataset.""" return self._save_dir @property def save_path(self): r"""Path to save the processed dataset.""" return os.path.join( self.save_dir, self.name + self._get_hash_url_suffix() ) @property def verbose(self): r"""Whether to print information.""" return self._verbose @property def hash(self): r"""Hash value for the dataset and the setting.""" return self._hash @abc.abstractmethod def __getitem__(self, idx): r"""Gets the data object at index.""" pass @abc.abstractmethod def __len__(self): r"""The number of examples in the dataset.""" pass def __repr__(self): return ( f'Dataset("{self.name}", num_graphs={len(self)},' + f" save_path={self.save_path})" ) class DGLBuiltinDataset(DGLDataset): r"""The Basic DGL Builtin Dataset. Parameters ---------- name : str Name of the dataset. url : str Url to download the raw dataset. raw_dir : str Specifying the directory that will store the downloaded data or the directory that already stores the input data. Default: ~/.dgl/ hash_key : tuple A tuple of values as the input for the hash function. Users can distinguish instances (and their caches on the disk) from the same dataset class by comparing the hash values. force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: False transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. """ def __init__( self, name, url, raw_dir=None, hash_key=(), force_reload=False, verbose=False, transform=None, ): super(DGLBuiltinDataset, self).__init__( name, url=url, raw_dir=raw_dir, save_dir=None, hash_key=hash_key, force_reload=force_reload, verbose=verbose, transform=transform, ) def download(self): r"""Automatically download data and extract it.""" if self.url is not None: zip_file_path = os.path.join(self.raw_dir, self.name + ".zip") download(self.url, path=zip_file_path) extract_archive(zip_file_path, self.raw_path) ================================================ FILE: python/dgl/data/fakenews.py ================================================ import os import numpy as np import scipy.sparse as sp from .. import backend as F from ..convert import graph from .dgl_dataset import DGLBuiltinDataset from .utils import _get_dgl_url, load_graphs, load_info, save_graphs, save_info class FakeNewsDataset(DGLBuiltinDataset): r"""Fake News Graph Classification dataset. The dataset is composed of two sets of tree-structured fake/real news propagation graphs extracted from Twitter. Different from most of the benchmark datasets for the graph classification task, the graphs in this dataset are directed tree-structured graphs where the root node represents the news, the leaf nodes are Twitter users who retweeted the root news. Besides, the node features are encoded user historical tweets using different pretrained language models: - bert: the 768-dimensional node feature composed of Twitter user historical tweets encoded by the bert-as-service - content: the 310-dimensional node feature composed of a 300-dimensional “spacy” vector plus a 10-dimensional “profile” vector - profile: the 10-dimensional node feature composed of ten Twitter user profile attributes. - spacy: the 300-dimensional node feature composed of Twitter user historical tweets encoded by the spaCy word2vec encoder. Reference: Note: this dataset is for academic use only, and commercial use is prohibited. Statistics: Politifact: - Graphs: 314 - Nodes: 41,054 - Edges: 40,740 - Classes: - Fake: 157 - Real: 157 - Node feature size: - bert: 768 - content: 310 - profile: 10 - spacy: 300 Gossipcop: - Graphs: 5,464 - Nodes: 314,262 - Edges: 308,798 - Classes: - Fake: 2,732 - Real: 2,732 - Node feature size: - bert: 768 - content: 310 - profile: 10 - spacy: 300 Parameters ---------- name : str Name of the dataset (gossipcop, or politifact) feature_name : str Name of the feature (bert, content, profile, or spacy) raw_dir : str Specifying the directory that will store the downloaded data or the directory that already stores the input data. Default: ~/.dgl/ transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- name : str Name of the dataset (gossipcop, or politifact) num_classes : int Number of label classes num_graphs : int Number of graphs graphs : list A list of DGLGraph objects labels : Tensor Graph labels feature_name : str Name of the feature (bert, content, profile, or spacy) feature : Tensor Node features train_mask : Tensor Mask of training set val_mask : Tensor Mask of validation set test_mask : Tensor Mask of testing set Examples -------- >>> dataset = FakeNewsDataset('gossipcop', 'bert') >>> graph, label = dataset[0] >>> num_classes = dataset.num_classes >>> feat = dataset.feature >>> labels = dataset.labels """ file_urls = { "gossipcop": "dataset/FakeNewsGOS.zip", "politifact": "dataset/FakeNewsPOL.zip", } def __init__(self, name, feature_name, raw_dir=None, transform=None): assert name in [ "gossipcop", "politifact", ], "Only supports 'gossipcop' or 'politifact'." url = _get_dgl_url(self.file_urls[name]) assert feature_name in [ "bert", "content", "profile", "spacy", ], "Only supports 'bert', 'content', 'profile', or 'spacy'" self.feature_name = feature_name super(FakeNewsDataset, self).__init__( name=name, url=url, raw_dir=raw_dir, transform=transform ) def process(self): """process raw data to graph, labels and masks""" self.labels = F.tensor( np.load(os.path.join(self.raw_path, "graph_labels.npy")) ) num_graphs = self.labels.shape[0] node_graph_id = np.load( os.path.join(self.raw_path, "node_graph_id.npy") ) edges = np.genfromtxt( os.path.join(self.raw_path, "A.txt"), delimiter=",", dtype=int ) src = edges[:, 0] dst = edges[:, 1] g = graph((src, dst)) node_idx_list = [] for idx in range(np.max(node_graph_id) + 1): node_idx = np.where(node_graph_id == idx) node_idx_list.append(node_idx[0]) self.graphs = [g.subgraph(node_idx) for node_idx in node_idx_list] train_idx = np.load(os.path.join(self.raw_path, "train_idx.npy")) val_idx = np.load(os.path.join(self.raw_path, "val_idx.npy")) test_idx = np.load(os.path.join(self.raw_path, "test_idx.npy")) train_mask = np.zeros(num_graphs, dtype=np.bool_) val_mask = np.zeros(num_graphs, dtype=np.bool_) test_mask = np.zeros(num_graphs, dtype=np.bool_) train_mask[train_idx] = True val_mask[val_idx] = True test_mask[test_idx] = True self.train_mask = F.tensor(train_mask) self.val_mask = F.tensor(val_mask) self.test_mask = F.tensor(test_mask) feature_file = "new_" + self.feature_name + "_feature.npz" self.feature = F.tensor( sp.load_npz(os.path.join(self.raw_path, feature_file)).todense() ) def save(self): """save the graph list and the labels""" save_graphs(str(self.graph_path), self.graphs) save_info( self.info_path, { "label": self.labels, "feature": self.feature, "train_mask": self.train_mask, "val_mask": self.val_mask, "test_mask": self.test_mask, }, ) @property def graph_path(self): return os.path.join(self.save_path, self.name + "_dgl_graph.bin") @property def info_path(self): return os.path.join(self.save_path, self.name + "_dgl_graph.pkl") def has_cache(self): """check whether there are processed data in `self.save_path`""" return os.path.exists(self.graph_path) and os.path.exists( self.info_path ) def load(self): """load processed data from directory `self.save_path`""" graphs, _ = load_graphs(str(self.graph_path)) info = load_info(str(self.info_path)) self.graphs = graphs self.labels = info["label"] self.feature = info["feature"] self.train_mask = info["train_mask"] self.val_mask = info["val_mask"] self.test_mask = info["test_mask"] @property def num_classes(self): """Number of classes for each graph, i.e. number of prediction tasks.""" return 2 @property def num_graphs(self): """Number of graphs.""" return self.labels.shape[0] def __getitem__(self, i): r"""Get graph and label by index Parameters ---------- i : int Item index Returns ------- (:class:`dgl.DGLGraph`, Tensor) """ if self._transform is None: g = self.graphs[i] else: g = self._transform(self.graphs[i]) return g, self.labels[i] def __len__(self): r"""Number of graphs in the dataset. Return ------- int """ return len(self.graphs) ================================================ FILE: python/dgl/data/flickr.py ================================================ """Flickr Dataset""" import json import os import numpy as np import scipy.sparse as sp from .. import backend as F from ..convert import from_scipy from ..transforms import reorder_graph from .dgl_dataset import DGLBuiltinDataset from .utils import _get_dgl_url, generate_mask_tensor, load_graphs, save_graphs class FlickrDataset(DGLBuiltinDataset): r"""Flickr dataset for node classification from `GraphSAINT: Graph Sampling Based Inductive Learning Method `_ The task of this dataset is categorizing types of images based on the descriptions and common properties of online images. Flickr dataset statistics: - Nodes: 89,250 - Edges: 899,756 - Number of classes: 7 - Node feature size: 500 Parameters ---------- raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: False transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. reorder : bool Whether to reorder the graph using :func:`~dgl.reorder_graph`. Default: False. Attributes ---------- num_classes : int Number of node classes Examples -------- >>> from dgl.data import FlickrDataset >>> dataset = FlickrDataset() >>> dataset.num_classes 7 >>> g = dataset[0] >>> # get node feature >>> feat = g.ndata['feat'] >>> # get node labels >>> labels = g.ndata['label'] >>> # get data split >>> train_mask = g.ndata['train_mask'] >>> val_mask = g.ndata['val_mask'] >>> test_mask = g.ndata['test_mask'] """ def __init__( self, raw_dir=None, force_reload=False, verbose=False, transform=None, reorder=False, ): _url = _get_dgl_url("dataset/flickr.zip") self._reorder = reorder super(FlickrDataset, self).__init__( name="flickr", raw_dir=raw_dir, url=_url, force_reload=force_reload, verbose=verbose, transform=transform, ) def process(self): """process raw data to graph, labels and masks""" coo_adj = sp.load_npz(os.path.join(self.raw_path, "adj_full.npz")) g = from_scipy(coo_adj) features = np.load(os.path.join(self.raw_path, "feats.npy")) features = F.tensor(features, dtype=F.float32) y = [-1] * features.shape[0] with open(os.path.join(self.raw_path, "class_map.json")) as f: class_map = json.load(f) for key, item in class_map.items(): y[int(key)] = item labels = F.tensor(np.array(y), dtype=F.int64) with open(os.path.join(self.raw_path, "role.json")) as f: role = json.load(f) train_mask = np.zeros(features.shape[0], dtype=bool) train_mask[role["tr"]] = True val_mask = np.zeros(features.shape[0], dtype=bool) val_mask[role["va"]] = True test_mask = np.zeros(features.shape[0], dtype=bool) test_mask[role["te"]] = True g.ndata["feat"] = features g.ndata["label"] = labels g.ndata["train_mask"] = generate_mask_tensor(train_mask) g.ndata["val_mask"] = generate_mask_tensor(val_mask) g.ndata["test_mask"] = generate_mask_tensor(test_mask) if self._reorder: self._graph = reorder_graph( g, node_permute_algo="rcmk", edge_permute_algo="dst", store_ids=False, ) else: self._graph = g def has_cache(self): graph_path = os.path.join(self.save_path, "dgl_graph.bin") return os.path.exists(graph_path) def save(self): graph_path = os.path.join(self.save_path, "dgl_graph.bin") save_graphs(graph_path, self._graph) def load(self): graph_path = os.path.join(self.save_path, "dgl_graph.bin") g, _ = load_graphs(graph_path) self._graph = g[0] @property def num_classes(self): return 7 def __len__(self): r"""The number of graphs in the dataset.""" return 1 def __getitem__(self, idx): r"""Get graph object Parameters ---------- idx : int Item index, FlickrDataset has only one graph object Returns ------- :class:`dgl.DGLGraph` The graph contains: - ``ndata['label']``: node label - ``ndata['feat']``: node feature - ``ndata['train_mask']``: mask for training node set - ``ndata['val_mask']``: mask for validation node set - ``ndata['test_mask']``: mask for test node set """ assert idx == 0, "This dataset has only one graph" if self._transform is None: return self._graph else: return self._transform(self._graph) ================================================ FILE: python/dgl/data/fraud.py ================================================ """Fraud Dataset """ import os import numpy as np from scipy import io from .. import backend as F from ..convert import heterograph from .dgl_dataset import DGLBuiltinDataset from .utils import _get_dgl_url, load_graphs, save_graphs class FraudDataset(DGLBuiltinDataset): r"""Fraud node prediction dataset. The dataset includes two multi-relational graphs extracted from Yelp and Amazon where nodes represent fraudulent reviews or fraudulent reviewers. It was first proposed in a CIKM'20 paper and has been used by a recent WWW'21 paper as a benchmark. Another paper also takes the dataset as an example to study the non-homophilous graphs. This dataset is built upon industrial data and has rich relational information and unique properties like class-imbalance and feature inconsistency, which makes the dataset be a good instance to investigate how GNNs perform on real-world noisy graphs. These graphs are bidirected and not self connected. Reference: Parameters ---------- name : str Name of the dataset raw_dir : str Specifying the directory that will store the downloaded data or the directory that already stores the input data. Default: ~/.dgl/ random_seed : int Specifying the random seed in splitting the dataset. Default: 717 train_size : float training set size of the dataset. Default: 0.7 val_size : float validation set size of the dataset, and the size of testing set is (1 - train_size - val_size) Default: 0.1 force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- num_classes : int Number of label classes graph : dgl.DGLGraph Graph structure, etc. seed : int Random seed in splitting the dataset. train_size : float Training set size of the dataset. val_size : float Validation set size of the dataset Examples -------- >>> dataset = FraudDataset('yelp') >>> graph = dataset[0] >>> num_classes = dataset.num_classes >>> feat = graph.ndata['feature'] >>> label = graph.ndata['label'] """ file_urls = { "yelp": "dataset/FraudYelp.zip", "amazon": "dataset/FraudAmazon.zip", } relations = { "yelp": ["net_rsr", "net_rtr", "net_rur"], "amazon": ["net_upu", "net_usu", "net_uvu"], } file_names = {"yelp": "YelpChi.mat", "amazon": "Amazon.mat"} node_name = {"yelp": "review", "amazon": "user"} def __init__( self, name, raw_dir=None, random_seed=717, train_size=0.7, val_size=0.1, force_reload=False, verbose=True, transform=None, ): assert name in ["yelp", "amazon"], "only supports 'yelp', or 'amazon'" url = _get_dgl_url(self.file_urls[name]) self.seed = random_seed self.train_size = train_size self.val_size = val_size super(FraudDataset, self).__init__( name=name, url=url, raw_dir=raw_dir, hash_key=(random_seed, train_size, val_size), force_reload=force_reload, verbose=verbose, transform=transform, ) def process(self): """process raw data to graph, labels, splitting masks""" file_path = os.path.join(self.raw_path, self.file_names[self.name]) data = io.loadmat(file_path) node_features = data["features"].todense() # remove additional dimension of length 1 in raw .mat file node_labels = data["label"].squeeze() graph_data = {} for relation in self.relations[self.name]: adj = data[relation].tocoo() row, col = adj.row, adj.col graph_data[ (self.node_name[self.name], relation, self.node_name[self.name]) ] = (row, col) g = heterograph(graph_data) g.ndata["feature"] = F.tensor( node_features, dtype=F.data_type_dict["float32"] ) g.ndata["label"] = F.tensor( node_labels, dtype=F.data_type_dict["int64"] ) self.graph = g self._random_split( g.ndata["feature"], self.seed, self.train_size, self.val_size ) def __getitem__(self, idx): r"""Get graph object Parameters ---------- idx : int Item index Returns ------- :class:`dgl.DGLGraph` graph structure, node features, node labels and masks - ``ndata['feature']``: node features - ``ndata['label']``: node labels - ``ndata['train_mask']``: mask of training set - ``ndata['val_mask']``: mask of validation set - ``ndata['test_mask']``: mask of testing set """ assert idx == 0, "This dataset has only one graph" if self._transform is None: return self.graph else: return self._transform(self.graph) def __len__(self): """number of data examples""" return len(self.graph) @property def num_classes(self): """Number of classes. Return ------- int """ return 2 def save(self): """save processed data to directory `self.save_path`""" graph_path = os.path.join( self.save_path, self.name + "_dgl_graph_{}.bin".format(self.hash) ) save_graphs(str(graph_path), self.graph) def load(self): """load processed data from directory `self.save_path`""" graph_path = os.path.join( self.save_path, self.name + "_dgl_graph_{}.bin".format(self.hash) ) graph_list, _ = load_graphs(str(graph_path)) g = graph_list[0] self.graph = g def has_cache(self): """check whether there are processed data in `self.save_path`""" graph_path = os.path.join( self.save_path, self.name + "_dgl_graph_{}.bin".format(self.hash) ) return os.path.exists(graph_path) def _random_split(self, x, seed=717, train_size=0.7, val_size=0.1): """split the dataset into training set, validation set and testing set""" assert 0 <= train_size + val_size <= 1, ( "The sum of valid training set size and validation set size " "must between 0 and 1 (inclusive)." ) N = x.shape[0] index = np.arange(N) if self.name == "amazon": # 0-3304 are unlabeled nodes index = np.arange(3305, N) index = np.random.RandomState(seed).permutation(index) train_idx = index[: int(train_size * len(index))] val_idx = index[len(index) - int(val_size * len(index)) :] test_idx = index[ int(train_size * len(index)) : len(index) - int(val_size * len(index)) ] train_mask = np.zeros(N, dtype=np.bool_) val_mask = np.zeros(N, dtype=np.bool_) test_mask = np.zeros(N, dtype=np.bool_) train_mask[train_idx] = True val_mask[val_idx] = True test_mask[test_idx] = True self.graph.ndata["train_mask"] = F.tensor(train_mask) self.graph.ndata["val_mask"] = F.tensor(val_mask) self.graph.ndata["test_mask"] = F.tensor(test_mask) class FraudYelpDataset(FraudDataset): r"""Fraud Yelp Dataset The Yelp dataset includes hotel and restaurant reviews filtered (spam) and recommended (legitimate) by Yelp. A spam review detection task can be conducted, which is a binary classification task. 32 handcrafted features from are taken as the raw node features. Reviews are nodes in the graph, and three relations are: 1. R-U-R: it connects reviews posted by the same user 2. R-S-R: it connects reviews under the same product with the same star rating (1-5 stars) 3. R-T-R: it connects two reviews under the same product posted in the same month. Statistics: - Nodes: 45,954 - Edges: - R-U-R: 98,630 - R-T-R: 1,147,232 - R-S-R: 6,805,486 - Classes: - Positive (spam): 6,677 - Negative (legitimate): 39,277 - Positive-Negative ratio: 1 : 5.9 - Node feature size: 32 Parameters ---------- raw_dir : str Specifying the directory that will store the downloaded data or the directory that already stores the input data. Default: ~/.dgl/ random_seed : int Specifying the random seed in splitting the dataset. Default: 717 train_size : float training set size of the dataset. Default: 0.7 val_size : float validation set size of the dataset, and the size of testing set is (1 - train_size - val_size) Default: 0.1 force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Examples -------- >>> dataset = FraudYelpDataset() >>> graph = dataset[0] >>> num_classes = dataset.num_classes >>> feat = graph.ndata['feature'] >>> label = graph.ndata['label'] """ def __init__( self, raw_dir=None, random_seed=717, train_size=0.7, val_size=0.1, force_reload=False, verbose=True, transform=None, ): super(FraudYelpDataset, self).__init__( name="yelp", raw_dir=raw_dir, random_seed=random_seed, train_size=train_size, val_size=val_size, force_reload=force_reload, verbose=verbose, transform=transform, ) class FraudAmazonDataset(FraudDataset): r"""Fraud Amazon Dataset The Amazon dataset includes product reviews under the Musical Instruments category. Users with more than 80% helpful votes are labelled as benign entities and users with less than 20% helpful votes are labelled as fraudulent entities. A fraudulent user detection task can be conducted on the Amazon dataset, which is a binary classification task. 25 handcrafted features from are taken as the raw node features . Users are nodes in the graph, and three relations are: 1. U-P-U : it connects users reviewing at least one same product 2. U-S-U : it connects users having at least one same star rating within one week 3. U-V-U : it connects users with top 5% mutual review text similarities (measured by TF-IDF) among all users. Statistics: - Nodes: 11,944 - Edges: - U-P-U: 351,216 - U-S-U: 7,132,958 - U-V-U: 2,073,474 - Classes: - Positive (fraudulent): 821 - Negative (benign): 7,818 - Unlabeled: 3,305 - Positive-Negative ratio: 1 : 10.5 - Node feature size: 25 Parameters ---------- raw_dir : str Specifying the directory that will store the downloaded data or the directory that already stores the input data. Default: ~/.dgl/ random_seed : int Specifying the random seed in splitting the dataset. Default: 717 train_size : float training set size of the dataset. Default: 0.7 val_size : float validation set size of the dataset, and the size of testing set is (1 - train_size - val_size) Default: 0.1 force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Examples -------- >>> dataset = FraudAmazonDataset() >>> graph = dataset[0] >>> num_classes = dataset.num_classes >>> feat = graph.ndata['feature'] >>> label = graph.ndata['label'] """ def __init__( self, raw_dir=None, random_seed=717, train_size=0.7, val_size=0.1, force_reload=False, verbose=True, transform=None, ): super(FraudAmazonDataset, self).__init__( name="amazon", raw_dir=raw_dir, random_seed=random_seed, train_size=train_size, val_size=val_size, force_reload=force_reload, verbose=verbose, transform=transform, ) ================================================ FILE: python/dgl/data/gdelt.py ================================================ """ GDELT dataset for temporal graph """ import os import numpy as np from .. import backend as F from ..convert import graph as dgl_graph from .dgl_dataset import DGLBuiltinDataset from .utils import _get_dgl_url, load_info, loadtxt, save_info class GDELTDataset(DGLBuiltinDataset): r"""GDELT dataset for event-based temporal graph The Global Database of Events, Language, and Tone (GDELT) dataset. This contains events happend all over the world (ie every protest held anywhere in Russia on a given day is collapsed to a single entry). This Dataset consists ofevents collected from 1/1/2018 to 1/31/2018 (15 minutes time granularity). Reference: - `Recurrent Event Network for Reasoning over Temporal Knowledge Graphs `_ - `The Global Database of Events, Language, and Tone (GDELT) `_ Statistics: - Train examples: 2,304 - Valid examples: 288 - Test examples: 384 Parameters ---------- mode : str Must be one of ('train', 'valid', 'test'). Default: 'train' raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- start_time : int Start time of the temporal graph end_time : int End time of the temporal graph is_temporal : bool Does the dataset contain temporal graphs Examples ---------- >>> # get train, valid, test dataset >>> train_data = GDELTDataset() >>> valid_data = GDELTDataset(mode='valid') >>> test_data = GDELTDataset(mode='test') >>> >>> # length of train set >>> train_size = len(train_data) >>> >>> for g in train_data: .... e_feat = g.edata['rel_type'] .... # your code here .... >>> """ def __init__( self, mode="train", raw_dir=None, force_reload=False, verbose=False, transform=None, ): mode = mode.lower() assert mode in ["train", "valid", "test"], "Mode not valid." self.mode = mode self.num_nodes = 23033 _url = _get_dgl_url("dataset/gdelt.zip") super(GDELTDataset, self).__init__( name="GDELT", url=_url, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) def process(self): file_path = os.path.join(self.raw_path, self.mode + ".txt") self.data = loadtxt(file_path, delimiter="\t").astype(np.int64) # The source code is not released, but the paper indicates there're # totally 137 samples. The cutoff below has exactly 137 samples. self.time_index = np.floor(self.data[:, 3] / 15).astype(np.int64) self._start_time = self.time_index.min() self._end_time = self.time_index.max() @property def info_path(self): return os.path.join(self.save_path, self.mode + "_info.pkl") def has_cache(self): return os.path.exists(self.info_path) def save(self): save_info( self.info_path, { "data": self.data, "time_index": self.time_index, "start_time": self.start_time, "end_time": self.end_time, }, ) def load(self): info = load_info(self.info_path) self.data, self.time_index, self._start_time, self._end_time = ( info["data"], info["time_index"], info["start_time"], info["end_time"], ) @property def start_time(self): r"""Start time of events in the temporal graph Returns ------- int """ return self._start_time @property def end_time(self): r"""End time of events in the temporal graph Returns ------- int """ return self._end_time def __getitem__(self, t): r"""Get graph by with events before time `t + self.start_time` Parameters ---------- t : int Time, its value must be in range [0, `self.end_time` - `self.start_time`] Returns ------- :class:`dgl.DGLGraph` The graph contains: - ``edata['rel_type']``: edge type """ if t >= len(self) or t < 0: raise IndexError("Index out of range") i = t + self.start_time row_mask = self.time_index <= i edges = self.data[row_mask][:, [0, 2]] rate = self.data[row_mask][:, 1] g = dgl_graph((edges[:, 0], edges[:, 1])) g.edata["rel_type"] = F.tensor( rate.reshape(-1, 1), dtype=F.data_type_dict["int64"] ) if self._transform is not None: g = self._transform(g) return g def __len__(self): r"""Number of graphs in the dataset. Return ------- int """ return self._end_time - self._start_time + 1 @property def is_temporal(self): r"""Does the dataset contain temporal graphs Returns ------- bool """ return True GDELT = GDELTDataset ================================================ FILE: python/dgl/data/geom_gcn.py ================================================ """Datasets introduced in the Geom-GCN paper.""" import os import numpy as np from ..convert import graph from .dgl_dataset import DGLBuiltinDataset from .utils import _get_dgl_url class GeomGCNDataset(DGLBuiltinDataset): r"""Datasets introduced in `Geom-GCN: Geometric Graph Convolutional Networks `__ Parameters ---------- name : str Name of the dataset. raw_dir : str Raw file directory to store the processed data. force_reload : bool Whether to re-download the data source. verbose : bool Whether to print progress information. transform : callable A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. """ def __init__(self, name, raw_dir, force_reload, verbose, transform): url = _get_dgl_url(f"dataset/{name}.zip") super(GeomGCNDataset, self).__init__( name=name, url=url, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) def process(self): """Load and process the data.""" try: import torch except ImportError: raise ModuleNotFoundError( "This dataset requires PyTorch to be the backend." ) # Process node features and labels. with open(f"{self.raw_path}/out1_node_feature_label.txt", "r") as f: data = f.read().split("\n")[1:-1] features = [ [float(v) for v in r.split("\t")[1].split(",")] for r in data ] features = torch.tensor(features, dtype=torch.float) labels = [int(r.split("\t")[2]) for r in data] self._num_classes = max(labels) + 1 labels = torch.tensor(labels, dtype=torch.long) # Process graph structure. with open(f"{self.raw_path}/out1_graph_edges.txt", "r") as f: data = f.read().split("\n")[1:-1] data = [[int(v) for v in r.split("\t")] for r in data] dst, src = torch.tensor(data, dtype=torch.long).t().contiguous() self._g = graph((src, dst), num_nodes=features.size(0)) self._g.ndata["feat"] = features self._g.ndata["label"] = labels # Process 10 train/val/test node splits. train_masks, val_masks, test_masks = [], [], [] for i in range(10): filepath = f"{self.raw_path}/{self.name}_split_0.6_0.2_{i}.npz" f = np.load(filepath) train_masks += [torch.from_numpy(f["train_mask"])] val_masks += [torch.from_numpy(f["val_mask"])] test_masks += [torch.from_numpy(f["test_mask"])] self._g.ndata["train_mask"] = torch.stack(train_masks, dim=1).bool() self._g.ndata["val_mask"] = torch.stack(val_masks, dim=1).bool() self._g.ndata["test_mask"] = torch.stack(test_masks, dim=1).bool() def has_cache(self): return os.path.exists(self.raw_path) def load(self): self.process() def __getitem__(self, idx): assert idx == 0, "This dataset has only one graph." if self._transform is None: return self._g else: return self._transform(self._g) def __len__(self): return 1 @property def num_classes(self): return self._num_classes class ChameleonDataset(GeomGCNDataset): r"""Wikipedia page-page network on chameleons from `Multi-scale Attributed Node Embedding `__ and later modified by `Geom-GCN: Geometric Graph Convolutional Networks `__ Nodes represent articles from the English Wikipedia, edges reflect mutual links between them. Node features indicate the presence of particular nouns in the articles. The nodes were classified into 5 classes in terms of their average monthly traffic. Statistics: - Nodes: 2277 - Edges: 36101 - Number of Classes: 5 - 10 train/val/test splits - Train: 1092 - Val: 729 - Test: 456 Parameters ---------- raw_dir : str, optional Raw file directory to store the processed data. Default: ~/.dgl/ force_reload : bool, optional Whether to re-download the data source. Default: False verbose : bool, optional Whether to print progress information. Default: True transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Default: None Attributes ---------- num_classes : int Number of node classes Notes ----- The graph does not come with edges for both directions. Examples -------- >>> from dgl.data import ChameleonDataset >>> dataset = ChameleonDataset() >>> g = dataset[0] >>> num_classes = dataset.num_classes >>> # get node features >>> feat = g.ndata["feat"] >>> # get data split >>> train_mask = g.ndata["train_mask"] >>> val_mask = g.ndata["val_mask"] >>> test_mask = g.ndata["test_mask"] >>> # get labels >>> label = g.ndata['label'] """ def __init__( self, raw_dir=None, force_reload=False, verbose=True, transform=None ): super(ChameleonDataset, self).__init__( name="chameleon", raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) class SquirrelDataset(GeomGCNDataset): r"""Wikipedia page-page network on squirrels from `Multi-scale Attributed Node Embedding `__ and later modified by `Geom-GCN: Geometric Graph Convolutional Networks `__ Nodes represent articles from the English Wikipedia, edges reflect mutual links between them. Node features indicate the presence of particular nouns in the articles. The nodes were classified into 5 classes in terms of their average monthly traffic. Statistics: - Nodes: 5201 - Edges: 217073 - Number of Classes: 5 - 10 train/val/test splits - Train: 2496 - Val: 1664 - Test: 1041 Parameters ---------- raw_dir : str, optional Raw file directory to store the processed data. Default: ~/.dgl/ force_reload : bool, optional Whether to re-download the data source. Default: False verbose : bool, optional Whether to print progress information. Default: True transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Default: None Attributes ---------- num_classes : int Number of node classes Notes ----- The graph does not come with edges for both directions. Examples -------- >>> from dgl.data import SquirrelDataset >>> dataset = SquirrelDataset() >>> g = dataset[0] >>> num_classes = dataset.num_classes >>> # get node features >>> feat = g.ndata["feat"] >>> # get data split >>> train_mask = g.ndata["train_mask"] >>> val_mask = g.ndata["val_mask"] >>> test_mask = g.ndata["test_mask"] >>> # get labels >>> label = g.ndata['label'] """ def __init__( self, raw_dir=None, force_reload=False, verbose=True, transform=None ): super(SquirrelDataset, self).__init__( name="squirrel", raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) class CornellDataset(GeomGCNDataset): r"""Cornell subset of `WebKB `__, later modified by `Geom-GCN: Geometric Graph Convolutional Networks `__ Nodes represent web pages. Edges represent hyperlinks between them. Node features are the bag-of-words representation of web pages. The web pages are manually classified into the five categories, student, project, course, staff, and faculty. Statistics: - Nodes: 183 - Edges: 298 - Number of Classes: 5 - 10 train/val/test splits - Train: 87 - Val: 59 - Test: 37 Parameters ---------- raw_dir : str, optional Raw file directory to store the processed data. Default: ~/.dgl/ force_reload : bool, optional Whether to re-download the data source. Default: False verbose : bool, optional Whether to print progress information. Default: True transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Default: None Attributes ---------- num_classes : int Number of node classes Notes ----- The graph does not come with edges for both directions. Examples -------- >>> from dgl.data import CornellDataset >>> dataset = CornellDataset() >>> g = dataset[0] >>> num_classes = dataset.num_classes >>> # get node features >>> feat = g.ndata["feat"] >>> # get data split >>> train_mask = g.ndata["train_mask"] >>> val_mask = g.ndata["val_mask"] >>> test_mask = g.ndata["test_mask"] >>> # get labels >>> label = g.ndata['label'] """ def __init__( self, raw_dir=None, force_reload=False, verbose=True, transform=None ): super(CornellDataset, self).__init__( name="cornell", raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) class TexasDataset(GeomGCNDataset): r"""Texas subset of `WebKB `__, later modified by `Geom-GCN: Geometric Graph Convolutional Networks `__ Nodes represent web pages. Edges represent hyperlinks between them. Node features are the bag-of-words representation of web pages. The web pages are manually classified into the five categories, student, project, course, staff, and faculty. Statistics: - Nodes: 183 - Edges: 325 - Number of Classes: 5 - 10 train/val/test splits - Train: 87 - Val: 59 - Test: 37 Parameters ---------- raw_dir : str, optional Raw file directory to store the processed data. Default: ~/.dgl/ force_reload : bool, optional Whether to re-download the data source. Default: False verbose : bool, optional Whether to print progress information. Default: True transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Default: None Attributes ---------- num_classes : int Number of node classes Notes ----- The graph does not come with edges for both directions. Examples -------- >>> from dgl.data import TexasDataset >>> dataset = TexasDataset() >>> g = dataset[0] >>> num_classes = dataset.num_classes >>> # get node features >>> feat = g.ndata["feat"] >>> # get data split >>> train_mask = g.ndata["train_mask"] >>> val_mask = g.ndata["val_mask"] >>> test_mask = g.ndata["test_mask"] >>> # get labels >>> label = g.ndata['label'] """ def __init__( self, raw_dir=None, force_reload=False, verbose=True, transform=None ): super(TexasDataset, self).__init__( name="texas", raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) class WisconsinDataset(GeomGCNDataset): r"""Wisconsin subset of `WebKB `__, later modified by `Geom-GCN: Geometric Graph Convolutional Networks `__ Nodes represent web pages. Edges represent hyperlinks between them. Node features are the bag-of-words representation of web pages. The web pages are manually classified into the five categories, student, project, course, staff, and faculty. Statistics: - Nodes: 251 - Edges: 515 - Number of Classes: 5 - 10 train/val/test splits - Train: 120 - Val: 80 - Test: 51 Parameters ---------- raw_dir : str, optional Raw file directory to store the processed data. Default: ~/.dgl/ force_reload : bool, optional Whether to re-download the data source. Default: False verbose : bool, optional Whether to print progress information. Default: True transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Default: None Attributes ---------- num_classes : int Number of node classes Notes ----- The graph does not come with edges for both directions. Examples -------- >>> from dgl.data import WisconsinDataset >>> dataset = WisconsinDataset() >>> g = dataset[0] >>> num_classes = dataset.num_classes >>> # get node features >>> feat = g.ndata["feat"] >>> # get data split >>> train_mask = g.ndata["train_mask"] >>> val_mask = g.ndata["val_mask"] >>> test_mask = g.ndata["test_mask"] >>> # get labels >>> label = g.ndata['label'] """ def __init__( self, raw_dir=None, force_reload=False, verbose=True, transform=None ): super(WisconsinDataset, self).__init__( name="wisconsin", raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) ================================================ FILE: python/dgl/data/gindt.py ================================================ """Datasets used in How Powerful Are Graph Neural Networks? (chen jun) Datasets include: MUTAG, COLLAB, IMDBBINARY, IMDBMULTI, NCI1, PROTEINS, PTC, REDDITBINARY, REDDITMULTI5K https://github.com/weihua916/powerful-gnns/blob/master/dataset.zip """ import os import numpy as np from .. import backend as F from ..convert import graph as dgl_graph from ..utils import retry_method_with_fix from .dgl_dataset import DGLBuiltinDataset from .utils import ( download, extract_archive, load_graphs, load_info, loadtxt, save_graphs, save_info, ) class GINDataset(DGLBuiltinDataset): """Dataset Class for `How Powerful Are Graph Neural Networks? `_. This is adapted from ``_. The class provides an interface for nine datasets used in the paper along with the paper-specific settings. The datasets are ``'MUTAG'``, ``'COLLAB'``, ``'IMDBBINARY'``, ``'IMDBMULTI'``, ``'NCI1'``, ``'PROTEINS'``, ``'PTC'``, ``'REDDITBINARY'``, ``'REDDITMULTI5K'``. If ``degree_as_nlabel`` is set to ``False``, then ``ndata['label']`` stores the provided node label, otherwise ``ndata['label']`` stores the node in-degrees. For graphs that have node attributes, ``ndata['attr']`` stores the node attributes. For graphs that have no attribute, ``ndata['attr']`` stores the corresponding one-hot encoding of ``ndata['label']``. Parameters --------- name: str dataset name, one of (``'MUTAG'``, ``'COLLAB'``, \ ``'IMDBBINARY'``, ``'IMDBMULTI'``, \ ``'NCI1'``, ``'PROTEINS'``, ``'PTC'``, \ ``'REDDITBINARY'``, ``'REDDITMULTI5K'``) self_loop: bool add self to self edge if true degree_as_nlabel: bool take node degree as label and feature if true transform: callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- num_classes : int Number of classes for multiclass classification Examples -------- >>> data = GINDataset(name='MUTAG', self_loop=False) The dataset instance is an iterable >>> len(data) 188 >>> g, label = data[128] >>> g Graph(num_nodes=13, num_edges=26, ndata_schemes={'label': Scheme(shape=(), dtype=torch.int64), 'attr': Scheme(shape=(7,), dtype=torch.float32)} edata_schemes={}) >>> label tensor(1) Batch the graphs and labels for mini-batch training >>> graphs, labels = zip(*[data[i] for i in range(16)]) >>> batched_graphs = dgl.batch(graphs) >>> batched_labels = torch.tensor(labels) >>> batched_graphs Graph(num_nodes=330, num_edges=748, ndata_schemes={'label': Scheme(shape=(), dtype=torch.int64), 'attr': Scheme(shape=(7,), dtype=torch.float32)} edata_schemes={}) """ def __init__( self, name, self_loop, degree_as_nlabel=False, raw_dir=None, force_reload=False, verbose=False, transform=None, ): self._name = name # MUTAG gin_url = "https://raw.githubusercontent.com/weihua916/powerful-gnns/master/dataset.zip" self.ds_name = "nig" self.self_loop = self_loop self.graphs = [] self.labels = [] # relabel self.glabel_dict = {} self.nlabel_dict = {} self.elabel_dict = {} self.ndegree_dict = {} # global num self.N = 0 # total graphs number self.n = 0 # total nodes number self.m = 0 # total edges number # global num of classes self.gclasses = 0 self.nclasses = 0 self.eclasses = 0 self.dim_nfeats = 0 # flags self.degree_as_nlabel = degree_as_nlabel self.nattrs_flag = False self.nlabels_flag = False super(GINDataset, self).__init__( name=name, url=gin_url, hash_key=(name, self_loop, degree_as_nlabel), raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) @property def raw_path(self): return os.path.join(self.raw_dir, "GINDataset") def download(self): r"""Automatically download data and extract it.""" zip_file_path = os.path.join(self.raw_dir, "GINDataset.zip") download(self.url, path=zip_file_path) extract_archive(zip_file_path, self.raw_path) def __len__(self): """Return the number of graphs in the dataset.""" return len(self.graphs) def __getitem__(self, idx): """Get the idx-th sample. Parameters --------- idx : int The sample index. Returns ------- (:class:`dgl.Graph`, Tensor) The graph and its label. """ if self._transform is None: g = self.graphs[idx] else: g = self._transform(self.graphs[idx]) return g, self.labels[idx] def _file_path(self): return os.path.join( self.raw_dir, "GINDataset", "dataset", self.name, "{}.txt".format(self.name), ) def process(self): """Loads input dataset from dataset/NAME/NAME.txt file""" if self.verbose: print("loading data...") self.file = self._file_path() with open(self.file, "r") as f: # line_1 == N, total number of graphs self.N = int(f.readline().strip()) for i in range(self.N): if (i + 1) % 10 == 0 and self.verbose is True: print("processing graph {}...".format(i + 1)) grow = f.readline().strip().split() # line_2 == [n_nodes, l] is equal to # [node number of a graph, class label of a graph] n_nodes, glabel = [int(w) for w in grow] # relabel graphs if glabel not in self.glabel_dict: mapped = len(self.glabel_dict) self.glabel_dict[glabel] = mapped self.labels.append(self.glabel_dict[glabel]) g = dgl_graph(([], [])) g.add_nodes(n_nodes) nlabels = [] # node labels nattrs = [] # node attributes if it has m_edges = 0 for j in range(n_nodes): nrow = f.readline().strip().split() # handle edges and attributes(if has) tmp = int(nrow[1]) + 2 # tmp == 2 + #edges if tmp == len(nrow): # no node attributes nrow = [int(w) for w in nrow] elif tmp > len(nrow): nrow = [int(w) for w in nrow[:tmp]] nattr = [float(w) for w in nrow[tmp:]] nattrs.append(nattr) else: raise Exception("edge number is incorrect!") # relabel nodes if it has labels # if it doesn't have node labels, then every nrow[0]==0 if not nrow[0] in self.nlabel_dict: mapped = len(self.nlabel_dict) self.nlabel_dict[nrow[0]] = mapped nlabels.append(self.nlabel_dict[nrow[0]]) m_edges += nrow[1] g.add_edges(j, nrow[2:]) # add self loop if self.self_loop: m_edges += 1 g.add_edges(j, j) if (j + 1) % 10 == 0 and self.verbose is True: print( "processing node {} of graph {}...".format( j + 1, i + 1 ) ) print("this node has {} edgs.".format(nrow[1])) if nattrs != []: nattrs = np.stack(nattrs) g.ndata["attr"] = F.tensor(nattrs, F.float32) self.nattrs_flag = True g.ndata["label"] = F.tensor(nlabels) if len(self.nlabel_dict) > 1: self.nlabels_flag = True assert g.num_nodes() == n_nodes # update statistics of graphs self.n += n_nodes self.m += m_edges self.graphs.append(g) self.labels = F.tensor(self.labels) # if no attr if not self.nattrs_flag: if self.verbose: print("there are no node features in this dataset!") # generate node attr by node degree if self.degree_as_nlabel: if self.verbose: print("generate node features by node degree...") for g in self.graphs: # actually this label shouldn't be updated # in case users want to keep it # but usually no features means no labels, fine. g.ndata["label"] = g.in_degrees() # extracting unique node labels # in case the labels/degrees are not continuous number nlabel_set = set([]) for g in self.graphs: nlabel_set = nlabel_set.union( set([F.as_scalar(nl) for nl in g.ndata["label"]]) ) nlabel_set = list(nlabel_set) is_label_valid = all( [label in self.nlabel_dict for label in nlabel_set] ) if ( is_label_valid and len(nlabel_set) == np.max(nlabel_set) + 1 and np.min(nlabel_set) == 0 ): # Note this is different from the author's implementation. In weihua916's implementation, # the labels are relabeled anyway. But here we didn't relabel it if the labels are contiguous # to make it consistent with the original dataset label2idx = self.nlabel_dict else: label2idx = {nlabel_set[i]: i for i in range(len(nlabel_set))} # generate node attr by node label for g in self.graphs: attr = np.zeros((g.num_nodes(), len(label2idx))) attr[ range(g.num_nodes()), [ label2idx[nl] for nl in F.asnumpy(g.ndata["label"]).tolist() ], ] = 1 g.ndata["attr"] = F.tensor(attr, F.float32) # after load, get the #classes and #dim self.gclasses = len(self.glabel_dict) self.nclasses = len(self.nlabel_dict) self.eclasses = len(self.elabel_dict) self.dim_nfeats = len(self.graphs[0].ndata["attr"][0]) if self.verbose: print("Done.") print( """ -------- Data Statistics --------' #Graphs: %d #Graph Classes: %d #Nodes: %d #Node Classes: %d #Node Features Dim: %d #Edges: %d #Edge Classes: %d Avg. of #Nodes: %.2f Avg. of #Edges: %.2f Graph Relabeled: %s Node Relabeled: %s Degree Relabeled(If degree_as_nlabel=True): %s \n """ % ( self.N, self.gclasses, self.n, self.nclasses, self.dim_nfeats, self.m, self.eclasses, self.n / self.N, self.m / self.N, self.glabel_dict, self.nlabel_dict, self.ndegree_dict, ) ) def save(self): label_dict = {"labels": self.labels} info_dict = { "N": self.N, "n": self.n, "m": self.m, "self_loop": self.self_loop, "gclasses": self.gclasses, "nclasses": self.nclasses, "eclasses": self.eclasses, "dim_nfeats": self.dim_nfeats, "degree_as_nlabel": self.degree_as_nlabel, "glabel_dict": self.glabel_dict, "nlabel_dict": self.nlabel_dict, "elabel_dict": self.elabel_dict, "ndegree_dict": self.ndegree_dict, } save_graphs(str(self.graph_path), self.graphs, label_dict) save_info(str(self.info_path), info_dict) def load(self): graphs, label_dict = load_graphs(str(self.graph_path)) info_dict = load_info(str(self.info_path)) self.graphs = graphs self.labels = label_dict["labels"] self.N = info_dict["N"] self.n = info_dict["n"] self.m = info_dict["m"] self.self_loop = info_dict["self_loop"] self.gclasses = info_dict["gclasses"] self.nclasses = info_dict["nclasses"] self.eclasses = info_dict["eclasses"] self.dim_nfeats = info_dict["dim_nfeats"] self.glabel_dict = info_dict["glabel_dict"] self.nlabel_dict = info_dict["nlabel_dict"] self.elabel_dict = info_dict["elabel_dict"] self.ndegree_dict = info_dict["ndegree_dict"] self.degree_as_nlabel = info_dict["degree_as_nlabel"] @property def graph_path(self): return os.path.join( self.save_path, "gin_{}_{}.bin".format(self.name, self.hash) ) @property def info_path(self): return os.path.join( self.save_path, "gin_{}_{}.pkl".format(self.name, self.hash) ) def has_cache(self): if os.path.exists(self.graph_path) and os.path.exists(self.info_path): return True return False @property def num_classes(self): return self.gclasses ================================================ FILE: python/dgl/data/gnn_benchmark.py ================================================ """GNN Benchmark datasets for node classification.""" import os import numpy as np import scipy.sparse as sp from .. import backend as F, transforms from ..convert import graph as dgl_graph from .dgl_dataset import DGLBuiltinDataset from .utils import ( _get_dgl_url, deprecate_class, deprecate_property, load_graphs, save_graphs, ) __all__ = [ "AmazonCoBuyComputerDataset", "AmazonCoBuyPhotoDataset", "CoauthorPhysicsDataset", "CoauthorCSDataset", "CoraFullDataset", "AmazonCoBuy", "Coauthor", "CoraFull", ] def eliminate_self_loops(A): """Remove self-loops from the adjacency matrix.""" A = A.tolil() A.setdiag(0) A = A.tocsr() A.eliminate_zeros() return A class GNNBenchmarkDataset(DGLBuiltinDataset): r"""Base Class for GNN Benchmark dataset Reference: https://github.com/shchur/gnn-benchmark#datasets """ def __init__( self, name, raw_dir=None, force_reload=False, verbose=False, transform=None, ): _url = _get_dgl_url("dataset/" + name + ".zip") super(GNNBenchmarkDataset, self).__init__( name=name, url=_url, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) def process(self): npz_path = os.path.join(self.raw_path, self.name + ".npz") g = self._load_npz(npz_path) g = transforms.reorder_graph( g, node_permute_algo="rcmk", edge_permute_algo="dst", store_ids=False, ) self._graph = g self._data = [g] self._print_info() def has_cache(self): graph_path = os.path.join(self.save_path, "dgl_graph_v1.bin") if os.path.exists(graph_path): return True return False def save(self): graph_path = os.path.join(self.save_path, "dgl_graph_v1.bin") save_graphs(graph_path, self._graph) def load(self): graph_path = os.path.join(self.save_path, "dgl_graph_v1.bin") graphs, _ = load_graphs(graph_path) self._graph = graphs[0] self._data = [graphs[0]] self._print_info() def _print_info(self): if self.verbose: print(" NumNodes: {}".format(self._graph.num_nodes())) print(" NumEdges: {}".format(self._graph.num_edges())) print(" NumFeats: {}".format(self._graph.ndata["feat"].shape[-1])) print(" NumbClasses: {}".format(self.num_classes)) def _load_npz(self, file_name): with np.load(file_name, allow_pickle=True) as loader: loader = dict(loader) num_nodes = loader["adj_shape"][0] adj_matrix = sp.csr_matrix( ( loader["adj_data"], loader["adj_indices"], loader["adj_indptr"], ), shape=loader["adj_shape"], ).tocoo() if "attr_data" in loader: # Attributes are stored as a sparse CSR matrix attr_matrix = sp.csr_matrix( ( loader["attr_data"], loader["attr_indices"], loader["attr_indptr"], ), shape=loader["attr_shape"], ).todense() elif "attr_matrix" in loader: # Attributes are stored as a (dense) np.ndarray attr_matrix = loader["attr_matrix"] else: attr_matrix = None if "labels_data" in loader: # Labels are stored as a CSR matrix labels = sp.csr_matrix( ( loader["labels_data"], loader["labels_indices"], loader["labels_indptr"], ), shape=loader["labels_shape"], ).todense() elif "labels" in loader: # Labels are stored as a numpy array labels = loader["labels"] else: labels = None g = dgl_graph((adj_matrix.row, adj_matrix.col)) g = transforms.to_bidirected(g) g.ndata["feat"] = F.tensor(attr_matrix, F.data_type_dict["float32"]) g.ndata["label"] = F.tensor(labels, F.data_type_dict["int64"]) return g @property def num_classes(self): """Number of classes.""" raise NotImplementedError def __getitem__(self, idx): r"""Get graph by index Parameters ---------- idx : int Item index Returns ------- :class:`dgl.DGLGraph` The graph contains: - ``ndata['feat']``: node features - ``ndata['label']``: node labels """ assert idx == 0, "This dataset has only one graph" if self._transform is None: return self._graph else: return self._transform(self._graph) def __len__(self): r"""Number of graphs in the dataset""" return 1 class CoraFullDataset(GNNBenchmarkDataset): r"""CORA-Full dataset for node classification task. Extended Cora dataset. Nodes represent paper and edges represent citations. Reference: ``_ Statistics: - Nodes: 19,793 - Edges: 126,842 (note that the original dataset has 65,311 edges but DGL adds the reverse edges and remove the duplicates, hence with a different number) - Number of Classes: 70 - Node feature size: 8,710 Parameters ---------- raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- num_classes : int Number of classes for each node. Examples -------- >>> data = CoraFullDataset() >>> g = data[0] >>> num_class = data.num_classes >>> feat = g.ndata['feat'] # get node feature >>> label = g.ndata['label'] # get node labels """ def __init__( self, raw_dir=None, force_reload=False, verbose=False, transform=None ): super(CoraFullDataset, self).__init__( name="cora_full", raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) @property def num_classes(self): """Number of classes. Return ------- int """ return 70 class CoauthorCSDataset(GNNBenchmarkDataset): r"""'Computer Science (CS)' part of the Coauthor dataset for node classification task. Coauthor CS and Coauthor Physics are co-authorship graphs based on the Microsoft Academic Graph from the KDD Cup 2016 challenge. Here, nodes are authors, that are connected by an edge if they co-authored a paper; node features represent paper keywords for each author’s papers, and class labels indicate most active fields of study for each author. Reference: ``_ Statistics: - Nodes: 18,333 - Edges: 163,788 (note that the original dataset has 81,894 edges but DGL adds the reverse edges and remove the duplicates, hence with a different number) - Number of classes: 15 - Node feature size: 6,805 Parameters ---------- raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- num_classes : int Number of classes for each node. Examples -------- >>> data = CoauthorCSDataset() >>> g = data[0] >>> num_class = data.num_classes >>> feat = g.ndata['feat'] # get node feature >>> label = g.ndata['label'] # get node labels """ def __init__( self, raw_dir=None, force_reload=False, verbose=False, transform=None ): super(CoauthorCSDataset, self).__init__( name="coauthor_cs", raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) @property def num_classes(self): """Number of classes. Return ------- int """ return 15 class CoauthorPhysicsDataset(GNNBenchmarkDataset): r"""'Physics' part of the Coauthor dataset for node classification task. Coauthor CS and Coauthor Physics are co-authorship graphs based on the Microsoft Academic Graph from the KDD Cup 2016 challenge. Here, nodes are authors, that are connected by an edge if they co-authored a paper; node features represent paper keywords for each author’s papers, and class labels indicate most active fields of study for each author. Reference: ``_ Statistics - Nodes: 34,493 - Edges: 495,924 (note that the original dataset has 247,962 edges but DGL adds the reverse edges and remove the duplicates, hence with a different number) - Number of classes: 5 - Node feature size: 8,415 Parameters ---------- raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- num_classes : int Number of classes for each node. Examples -------- >>> data = CoauthorPhysicsDataset() >>> g = data[0] >>> num_class = data.num_classes >>> feat = g.ndata['feat'] # get node feature >>> label = g.ndata['label'] # get node labels """ def __init__( self, raw_dir=None, force_reload=False, verbose=False, transform=None ): super(CoauthorPhysicsDataset, self).__init__( name="coauthor_physics", raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) @property def num_classes(self): """Number of classes. Return ------- int """ return 5 class AmazonCoBuyComputerDataset(GNNBenchmarkDataset): r"""'Computer' part of the AmazonCoBuy dataset for node classification task. Amazon Computers and Amazon Photo are segments of the Amazon co-purchase graph [McAuley et al., 2015], where nodes represent goods, edges indicate that two goods are frequently bought together, node features are bag-of-words encoded product reviews, and class labels are given by the product category. Reference: ``_ Statistics: - Nodes: 13,752 - Edges: 491,722 (note that the original dataset has 245,778 edges but DGL adds the reverse edges and remove the duplicates, hence with a different number) - Number of classes: 10 - Node feature size: 767 Parameters ---------- raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- num_classes : int Number of classes for each node. Examples -------- >>> data = AmazonCoBuyComputerDataset() >>> g = data[0] >>> num_class = data.num_classes >>> feat = g.ndata['feat'] # get node feature >>> label = g.ndata['label'] # get node labels """ def __init__( self, raw_dir=None, force_reload=False, verbose=False, transform=None ): super(AmazonCoBuyComputerDataset, self).__init__( name="amazon_co_buy_computer", raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) @property def num_classes(self): """Number of classes. Return ------- int """ return 10 class AmazonCoBuyPhotoDataset(GNNBenchmarkDataset): r"""AmazonCoBuy dataset for node classification task. Amazon Computers and Amazon Photo are segments of the Amazon co-purchase graph [McAuley et al., 2015], where nodes represent goods, edges indicate that two goods are frequently bought together, node features are bag-of-words encoded product reviews, and class labels are given by the product category. Reference: ``_ Statistics - Nodes: 7,650 - Edges: 238,163 (note that the original dataset has 119,043 edges but DGL adds the reverse edges and remove the duplicates, hence with a different number) - Number of classes: 8 - Node feature size: 745 Parameters ---------- raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- num_classes : int Number of classes for each node. Examples -------- >>> data = AmazonCoBuyPhotoDataset() >>> g = data[0] >>> num_class = data.num_classes >>> feat = g.ndata['feat'] # get node feature >>> label = g.ndata['label'] # get node labels """ def __init__( self, raw_dir=None, force_reload=False, verbose=False, transform=None ): super(AmazonCoBuyPhotoDataset, self).__init__( name="amazon_co_buy_photo", raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) @property def num_classes(self): """Number of classes. Return ------- int """ return 8 class CoraFull(CoraFullDataset): def __init__(self, **kwargs): deprecate_class("CoraFull", "CoraFullDataset") super(CoraFull, self).__init__(**kwargs) def AmazonCoBuy(name): if name == "computers": deprecate_class("AmazonCoBuy", "AmazonCoBuyComputerDataset") return AmazonCoBuyComputerDataset() elif name == "photo": deprecate_class("AmazonCoBuy", "AmazonCoBuyPhotoDataset") return AmazonCoBuyPhotoDataset() else: raise ValueError('Dataset name should be "computers" or "photo".') def Coauthor(name): if name == "cs": deprecate_class("Coauthor", "CoauthorCSDataset") return CoauthorCSDataset() elif name == "physics": deprecate_class("Coauthor", "CoauthorPhysicsDataset") return CoauthorPhysicsDataset() else: raise ValueError('Dataset name should be "cs" or "physics".') ================================================ FILE: python/dgl/data/graph_serialize.py ================================================ """For Graph Serialization""" from __future__ import absolute_import import os from .. import backend as F from .._ffi.function import _init_api from .._ffi.object import ObjectBase, register_object from ..base import dgl_warning, DGLError from ..heterograph import DGLGraph from .heterograph_serialize import save_heterographs _init_api("dgl.data.graph_serialize") __all__ = ["save_graphs", "load_graphs", "load_labels"] @register_object("graph_serialize.StorageMetaData") class StorageMetaData(ObjectBase): """StorageMetaData Object attributes available: num_graph [int]: return numbers of graphs nodes_num_list Value of NDArray: return number of nodes for each graph edges_num_list Value of NDArray: return number of edges for each graph labels [dict of backend tensors]: return dict of labels graph_data [list of GraphData]: return list of GraphData Object """ def is_local_path(filepath): return not ( filepath.startswith("hdfs://") or filepath.startswith("viewfs://") or filepath.startswith("s3://") ) def check_local_file_exists(filename): if is_local_path(filename) and not os.path.exists(filename): raise DGLError("File {} does not exist.".format(filename)) @register_object("graph_serialize.GraphData") class GraphData(ObjectBase): """GraphData Object""" @staticmethod def create(g): """Create GraphData""" # TODO(zihao): support serialize batched graph in the future. assert ( g.batch_size == 1 ), "Batched DGLGraph is not supported for serialization" ghandle = g._graph if len(g.ndata) != 0: node_tensors = dict() for key, value in g.ndata.items(): node_tensors[key] = F.zerocopy_to_dgl_ndarray(value) else: node_tensors = None if len(g.edata) != 0: edge_tensors = dict() for key, value in g.edata.items(): edge_tensors[key] = F.zerocopy_to_dgl_ndarray(value) else: edge_tensors = None return _CAPI_MakeGraphData(ghandle, node_tensors, edge_tensors) def get_graph(self): """Get DGLGraph from GraphData""" ghandle = _CAPI_GDataGraphHandle(self) hgi = _CAPI_DGLAsHeteroGraph(ghandle) g = DGLGraph(hgi, ["_U"], ["_E"]) node_tensors_items = _CAPI_GDataNodeTensors(self).items() edge_tensors_items = _CAPI_GDataEdgeTensors(self).items() for k, v in node_tensors_items: g.ndata[k] = F.zerocopy_from_dgl_ndarray(v) for k, v in edge_tensors_items: g.edata[k] = F.zerocopy_from_dgl_ndarray(v) return g def save_graphs(filename, g_list, labels=None, formats=None): r"""Save graphs and optionally their labels to file. Besides saving to local files, DGL supports writing the graphs directly to S3 (by providing a ``"s3://..."`` path) or to HDFS (by providing ``"hdfs://..."`` a path). The function saves both the graph structure and node/edge features to file in DGL's own binary format. For graph-level features, pass them via the :attr:`labels` argument. Parameters ---------- filename : str The file name to store the graphs and labels. g_list: list The graphs to be saved. labels: dict[str, Tensor] labels should be dict of tensors, with str as keys formats: str or list[str] Save graph in specified formats. It could be any combination of ``coo``, ``csc`` and ``csr``. If not specified, save one format only according to what format is available. If multiple formats are available, selection priority from high to low is ``coo``, ``csc``, ``csr``. Examples ---------- >>> import dgl >>> import torch as th Create :class:`DGLGraph` objects and initialize node and edge features. >>> g1 = dgl.graph(([0, 1, 2], [1, 2, 3])) >>> g2 = dgl.graph(([0, 2], [2, 3])) >>> g2.edata["e"] = th.ones(2, 4) Save Graphs into file >>> from dgl.data.utils import save_graphs >>> graph_labels = {"glabel": th.tensor([0, 1])} >>> save_graphs("./data.bin", [g1, g2], graph_labels) See Also -------- load_graphs """ # if it is local file, do some sanity check if is_local_path(filename): if os.path.isdir(filename): raise DGLError( "Filename {} is an existing directory.".format(filename) ) f_path = os.path.dirname(filename) if f_path and not os.path.exists(f_path): os.makedirs(f_path) g_sample = g_list[0] if isinstance(g_list, list) else g_list if type(g_sample) == DGLGraph: # Doesn't support DGLGraph's derived class save_heterographs(filename, g_list, labels, formats) else: raise DGLError( "Invalid argument g_list. Must be a DGLGraph or a list of DGLGraphs." ) def load_graphs(filename, idx_list=None): """Load graphs and optionally their labels from file saved by :func:`save_graphs`. Besides loading from local files, DGL supports loading the graphs directly from S3 (by providing a ``"s3://..."`` path) or from HDFS (by providing ``"hdfs://..."`` a path). Parameters ---------- filename: str The file name to load graphs from. idx_list: list[int], optional The indices of the graphs to be loaded if the file contains multiple graphs. Default is loading all the graphs stored in the file. Returns -------- graph_list: list[DGLGraph] The loaded graphs. labels: dict[str, Tensor] The graph labels stored in file. If no label is stored, the dictionary is empty. Regardless of whether the ``idx_list`` argument is given or not, the returned dictionary always contains the labels of all the graphs. Examples ---------- Following the example in :func:`save_graphs`. >>> from dgl.data.utils import load_graphs >>> glist, label_dict = load_graphs("./data.bin") # glist will be [g1, g2] >>> glist, label_dict = load_graphs("./data.bin", [0]) # glist will be [g1] See Also -------- save_graphs """ # if it is local file, do some sanity check check_local_file_exists(filename) version = _CAPI_GetFileVersion(filename) if version == 1: dgl_warning( "You are loading a graph file saved by old version of dgl. \ Please consider saving it again with the current format." ) return load_graph_v1(filename, idx_list) elif version == 2: return load_graph_v2(filename, idx_list) else: raise DGLError("Invalid DGL Version Number.") def load_graph_v2(filename, idx_list=None): """Internal functions for loading DGLGraphs.""" if idx_list is None: idx_list = [] assert isinstance(idx_list, list) heterograph_list = _CAPI_LoadGraphFiles_V2(filename, idx_list) label_dict = load_labels_v2(filename) return [gdata.get_graph() for gdata in heterograph_list], label_dict def load_graph_v1(filename, idx_list=None): """ "Internal functions for loading DGLGraphs (V0).""" if idx_list is None: idx_list = [] assert isinstance(idx_list, list) metadata = _CAPI_LoadGraphFiles_V1(filename, idx_list, False) label_dict = {} for k, v in metadata.labels.items(): label_dict[k] = F.zerocopy_from_dgl_ndarray(v) return [gdata.get_graph() for gdata in metadata.graph_data], label_dict def load_labels(filename): """ Load label dict from file Parameters ---------- filename: str filename to load DGLGraphs Returns ---------- labels: dict dict of labels stored in file (empty dict returned if no label stored) Examples ---------- Following the example in save_graphs. >>> from dgl.data.utils import load_labels >>> label_dict = load_graphs("./data.bin") """ # if it is local file, do some sanity check check_local_file_exists(filename) version = _CAPI_GetFileVersion(filename) if version == 1: return load_labels_v1(filename) elif version == 2: return load_labels_v2(filename) else: raise Exception("Invalid DGL Version Number") def load_labels_v2(filename): """Internal functions for loading labels from V2 format""" label_dict = {} nd_dict = _CAPI_LoadLabels_V2(filename) for k, v in nd_dict.items(): label_dict[k] = F.zerocopy_from_dgl_ndarray(v) return label_dict def load_labels_v1(filename): """Internal functions for loading labels from V1 format""" metadata = _CAPI_LoadGraphFiles_V1(filename, [], True) label_dict = {} for k, v in metadata.labels.items(): label_dict[k] = F.zerocopy_from_dgl_ndarray(v) return label_dict ================================================ FILE: python/dgl/data/heterograph_serialize.py ================================================ """For HeteroGraph Serialization""" from __future__ import absolute_import from .. import backend as F from .._ffi.function import _init_api from .._ffi.object import ObjectBase, register_object from ..container import convert_to_strmap from ..frame import Frame from ..heterograph import DGLGraph _init_api("dgl.data.heterograph_serialize") def tensor_dict_to_ndarray_dict(tensor_dict): """Convert dict[str, tensor] to StrMap[NDArray]""" ndarray_dict = {} for key, value in tensor_dict.items(): ndarray_dict[key] = F.zerocopy_to_dgl_ndarray(value) return convert_to_strmap(ndarray_dict) def save_heterographs(filename, g_list, labels, formats): """Save heterographs into file""" if labels is None: labels = {} if isinstance(g_list, DGLGraph): g_list = [g_list] assert all( [type(g) == DGLGraph for g in g_list] ), "Invalid DGLGraph in g_list argument" gdata_list = [HeteroGraphData.create(g) for g in g_list] if formats is None: formats = [] elif isinstance(formats, str): formats = [formats] _CAPI_SaveHeteroGraphData( filename, gdata_list, tensor_dict_to_ndarray_dict(labels), formats ) @register_object("heterograph_serialize.HeteroGraphData") class HeteroGraphData(ObjectBase): """Object to hold the data to be stored for DGLGraph""" @staticmethod def create(g): edata_list = [] ndata_list = [] for etype in g.canonical_etypes: edata_list.append(tensor_dict_to_ndarray_dict(g.edges[etype].data)) for ntype in g.ntypes: ndata_list.append(tensor_dict_to_ndarray_dict(g.nodes[ntype].data)) return _CAPI_MakeHeteroGraphData( g._graph, ndata_list, edata_list, g.ntypes, g.etypes ) def get_graph(self): ntensor_list = list(_CAPI_GetNDataFromHeteroGraphData(self)) etensor_list = list(_CAPI_GetEDataFromHeteroGraphData(self)) ntype_names = list(_CAPI_GetNtypesFromHeteroGraphData(self)) etype_names = list(_CAPI_GetEtypesFromHeteroGraphData(self)) gidx = _CAPI_GetGindexFromHeteroGraphData(self) nframes = [] eframes = [] for ntid, ntensor in enumerate(ntensor_list): ndict = { ntensor[i]: F.zerocopy_from_dgl_ndarray(ntensor[i + 1]) for i in range(0, len(ntensor), 2) } nframes.append(Frame(ndict, num_rows=gidx.num_nodes(ntid))) for etid, etensor in enumerate(etensor_list): edict = { etensor[i]: F.zerocopy_from_dgl_ndarray(etensor[i + 1]) for i in range(0, len(etensor), 2) } eframes.append(Frame(edict, num_rows=gidx.num_edges(etid))) return DGLGraph(gidx, ntype_names, etype_names, nframes, eframes) ================================================ FILE: python/dgl/data/heterophilous_graphs.py ================================================ """ Datasets introduced in the 'A Critical Look at the Evaluation of GNNs under Heterophily: Are We Really Making Progress? '__ paper. """ import os import numpy as np from ..convert import graph from ..transforms.functional import to_bidirected from .dgl_dataset import DGLBuiltinDataset from .utils import download class HeterophilousGraphDataset(DGLBuiltinDataset): r"""Datasets introduced in the 'A Critical Look at the Evaluation of GNNs under Heterophily: Are We Really Making Progress? '__ paper. Parameters ---------- name : str Name of the dataset. One of 'roman-empire', 'amazon-ratings', 'minesweeper', 'tolokers', 'questions'. raw_dir : str Raw file directory to store the processed data. force_reload : bool Whether to re-download the data source. verbose : bool Whether to print progress information. transform : callable A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. """ def __init__( self, name, raw_dir=None, force_reload=False, verbose=True, transform=None, ): name = name.lower().replace("-", "_") url = f"https://github.com/yandex-research/heterophilous-graphs/raw/main/data/{name}.npz" super(HeterophilousGraphDataset, self).__init__( name=name, url=url, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) def download(self): download( url=self.url, path=os.path.join(self.raw_path, f"{self.name}.npz") ) def process(self): """Load and process the data.""" try: import torch except ImportError: raise ModuleNotFoundError( "This dataset requires PyTorch to be the backend." ) data = np.load(os.path.join(self.raw_path, f"{self.name}.npz")) src = torch.from_numpy(data["edges"][:, 0]) dst = torch.from_numpy(data["edges"][:, 1]) features = torch.from_numpy(data["node_features"]) labels = torch.from_numpy(data["node_labels"]) train_masks = torch.from_numpy(data["train_masks"].T) val_masks = torch.from_numpy(data["val_masks"].T) test_masks = torch.from_numpy(data["test_masks"].T) num_nodes = len(labels) num_classes = len(labels.unique()) self._num_classes = num_classes self._g = to_bidirected(graph((src, dst), num_nodes=num_nodes)) self._g.ndata["feat"] = features self._g.ndata["label"] = labels self._g.ndata["train_mask"] = train_masks self._g.ndata["val_mask"] = val_masks self._g.ndata["test_mask"] = test_masks def has_cache(self): return os.path.exists(self.raw_path) def load(self): self.process() def __getitem__(self, idx): assert idx == 0, "This dataset has only one graph." if self._transform is None: return self._g else: return self._transform(self._g) def __len__(self): return 1 @property def num_classes(self): return self._num_classes class RomanEmpireDataset(HeterophilousGraphDataset): r"""Roman-empire dataset from the 'A Critical Look at the Evaluation of GNNs under Heterophily: Are We Really Making Progress? '__ paper. This dataset is based on the Roman Empire article from English Wikipedia, which was selected since it is one of the longest articles on Wikipedia. Each node in the graph corresponds to one (non-unique) word in the text. Thus, the number of nodes in the graph is equal to the article’s length. Two words are connected with an edge if at least one of the following two conditions holds: either these words follow each other in the text, or these words are connected in the dependency tree of the sentence (one word depends on the other). Thus, the graph is a chain graph with additional shortcut edges corresponding to syntactic dependencies between words. The class of a node is its syntactic role (17 most frequent roles were selected as unique classes and all the other roles were grouped into the 18th class). Node features are word embeddings. Statistics: - Nodes: 22662 - Edges: 65854 - Classes: 18 - Node features: 300 - 10 train/val/test splits Parameters ---------- raw_dir : str, optional Raw file directory to store the processed data. Default: ~/.dgl/ force_reload : bool, optional Whether to re-download the data source. Default: False verbose : bool, optional Whether to print progress information. Default: True transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Default: None Attributes ---------- num_classes : int Number of node classes Examples -------- >>> from dgl.data import RomanEmpireDataset >>> dataset = RomanEmpireDataset() >>> g = dataset[0] >>> num_classes = dataset.num_classes >>> # get node features >>> feat = g.ndata["feat"] >>> # get the first data split >>> train_mask = g.ndata["train_mask"][:, 0] >>> val_mask = g.ndata["val_mask"][:, 0] >>> test_mask = g.ndata["test_mask"][:, 0] >>> # get labels >>> label = g.ndata['label'] """ def __init__( self, raw_dir=None, force_reload=False, verbose=True, transform=None ): super(RomanEmpireDataset, self).__init__( name="roman-empire", raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) class AmazonRatingsDataset(HeterophilousGraphDataset): r"""Amazon-ratings dataset from the 'A Critical Look at the Evaluation of GNNs under Heterophily: Are We Really Making Progress? '__ paper. This dataset is based on the Amazon product co-purchasing data. Nodes are products (books, music CDs, DVDs, VHS video tapes), and edges connect products that are frequently bought together. The task is to predict the average rating given to a product by reviewers. All possible rating values were grouped into five classes. Node features are the mean of word embeddings for words in the product description. Statistics: - Nodes: 24492 - Edges: 186100 - Classes: 5 - Node features: 300 - 10 train/val/test splits Parameters ---------- raw_dir : str, optional Raw file directory to store the processed data. Default: ~/.dgl/ force_reload : bool, optional Whether to re-download the data source. Default: False verbose : bool, optional Whether to print progress information. Default: True transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Default: None Attributes ---------- num_classes : int Number of node classes Examples -------- >>> from dgl.data import AmazonRatingsDataset >>> dataset = AmazonRatingsDataset() >>> g = dataset[0] >>> num_classes = dataset.num_classes >>> # get node features >>> feat = g.ndata["feat"] >>> # get the first data split >>> train_mask = g.ndata["train_mask"][:, 0] >>> val_mask = g.ndata["val_mask"][:, 0] >>> test_mask = g.ndata["test_mask"][:, 0] >>> # get labels >>> label = g.ndata['label'] """ def __init__( self, raw_dir=None, force_reload=False, verbose=True, transform=None ): super(AmazonRatingsDataset, self).__init__( name="amazon-ratings", raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) class MinesweeperDataset(HeterophilousGraphDataset): r"""Minesweeper dataset from the 'A Critical Look at the Evaluation of GNNs under Heterophily: Are We Really Making Progress? '__ paper. This dataset is inspired by the Minesweeper game. The graph is a regular 100x100 grid where each node (cell) is connected to eight neighboring nodes (with the exception of nodes at the edge of the grid, which have fewer neighbors). 20% of the nodes are randomly selected as mines. The task is to predict which nodes are mines. The node features are one-hot-encoded numbers of neighboring mines. However, for randomly selected 50% of the nodes, the features are unknown, which is indicated by a separate binary feature. Statistics: - Nodes: 10000 - Edges: 78804 - Classes: 2 - Node features: 7 - 10 train/val/test splits Parameters ---------- raw_dir : str, optional Raw file directory to store the processed data. Default: ~/.dgl/ force_reload : bool, optional Whether to re-download the data source. Default: False verbose : bool, optional Whether to print progress information. Default: True transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Default: None Attributes ---------- num_classes : int Number of node classes Examples -------- >>> from dgl.data import MinesweeperDataset >>> dataset = MinesweeperDataset() >>> g = dataset[0] >>> num_classes = dataset.num_classes >>> # get node features >>> feat = g.ndata["feat"] >>> # get the first data split >>> train_mask = g.ndata["train_mask"][:, 0] >>> val_mask = g.ndata["val_mask"][:, 0] >>> test_mask = g.ndata["test_mask"][:, 0] >>> # get labels >>> label = g.ndata['label'] """ def __init__( self, raw_dir=None, force_reload=False, verbose=True, transform=None ): super(MinesweeperDataset, self).__init__( name="minesweeper", raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) class TolokersDataset(HeterophilousGraphDataset): r"""Tolokers dataset from the 'A Critical Look at the Evaluation of GNNs under Heterophily: Are We Really Making Progress? '__ paper. This dataset is based on data from the Toloka crowdsourcing platform. The nodes represent tolokers (workers). An edge connects two tolokers if they have worked on the same task. The goal is to predict which tolokers have been banned in one of the projects. Node features are based on the worker’s profile information and task performance statistics. Statistics: - Nodes: 11758 - Edges: 1038000 - Classes: 2 - Node features: 10 - 10 train/val/test splits Parameters ---------- raw_dir : str, optional Raw file directory to store the processed data. Default: ~/.dgl/ force_reload : bool, optional Whether to re-download the data source. Default: False verbose : bool, optional Whether to print progress information. Default: True transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Default: None Attributes ---------- num_classes : int Number of node classes Examples -------- >>> from dgl.data import TolokersDataset >>> dataset = TolokersDataset() >>> g = dataset[0] >>> num_classes = dataset.num_classes >>> # get node features >>> feat = g.ndata["feat"] >>> # get the first data split >>> train_mask = g.ndata["train_mask"][:, 0] >>> val_mask = g.ndata["val_mask"][:, 0] >>> test_mask = g.ndata["test_mask"][:, 0] >>> # get labels >>> label = g.ndata['label'] """ def __init__( self, raw_dir=None, force_reload=False, verbose=True, transform=None ): super(TolokersDataset, self).__init__( name="tolokers", raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) class QuestionsDataset(HeterophilousGraphDataset): r"""Questions dataset from the 'A Critical Look at the Evaluation of GNNs under Heterophily: Are We Really Making Progress? '__ paper. This dataset is based on data from the question-answering website Yandex Q. Nodes are users, and an edge connects two nodes if one user answered the other user’s question. The task is to predict which users remained active on the website (were not deleted or blocked). Node features are the mean of word embeddings for words in the user description. Users that do not have description are indicated by a separate binary feature. Statistics: - Nodes: 48921 - Edges: 307080 - Classes: 2 - Node features: 301 - 10 train/val/test splits Parameters ---------- raw_dir : str, optional Raw file directory to store the processed data. Default: ~/.dgl/ force_reload : bool, optional Whether to re-download the data source. Default: False verbose : bool, optional Whether to print progress information. Default: True transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Default: None Attributes ---------- num_classes : int Number of node classes Examples -------- >>> from dgl.data import QuestionsDataset >>> dataset = QuestionsDataset() >>> g = dataset[0] >>> num_classes = dataset.num_classes >>> # get node features >>> feat = g.ndata["feat"] >>> # get the first data split >>> train_mask = g.ndata["train_mask"][:, 0] >>> val_mask = g.ndata["val_mask"][:, 0] >>> test_mask = g.ndata["test_mask"][:, 0] >>> # get labels >>> label = g.ndata['label'] """ def __init__( self, raw_dir=None, force_reload=False, verbose=True, transform=None ): super(QuestionsDataset, self).__init__( name="questions", raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) ================================================ FILE: python/dgl/data/icews18.py ================================================ """ICEWS18 dataset for temporal graph""" import os import numpy as np from .. import backend as F from ..convert import graph as dgl_graph from .dgl_dataset import DGLBuiltinDataset from .utils import _get_dgl_url, load_graphs, loadtxt, save_graphs class ICEWS18Dataset(DGLBuiltinDataset): r"""ICEWS18 dataset for temporal graph Integrated Crisis Early Warning System (ICEWS18) Event data consists of coded interactions between socio-political actors (i.e., cooperative or hostile actions between individuals, groups, sectors and nation states). This Dataset consists of events from 1/1/2018 to 10/31/2018 (24 hours time granularity). Reference: - `Recurrent Event Network for Reasoning over Temporal Knowledge Graphs `_ - `ICEWS Coded Event Data `_ Statistics: - Train examples: 240 - Valid examples: 30 - Test examples: 34 - Nodes per graph: 23033 Parameters ---------- mode: str Load train/valid/test data. Has to be one of ['train', 'valid', 'test'] raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ------- is_temporal : bool Is the dataset contains temporal graphs Examples -------- >>> # get train, valid, test set >>> train_data = ICEWS18Dataset() >>> valid_data = ICEWS18Dataset(mode='valid') >>> test_data = ICEWS18Dataset(mode='test') >>> >>> train_size = len(train_data) >>> for g in train_data: .... e_feat = g.edata['rel_type'] .... # your code here .... >>> """ def __init__( self, mode="train", raw_dir=None, force_reload=False, verbose=False, transform=None, ): mode = mode.lower() assert mode in ["train", "valid", "test"], "Mode not valid" self.mode = mode _url = _get_dgl_url("dataset/icews18.zip") super(ICEWS18Dataset, self).__init__( name="ICEWS18", url=_url, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) def process(self): data = loadtxt( os.path.join(self.save_path, "{}.txt".format(self.mode)), delimiter="\t", ).astype(np.int64) num_nodes = 23033 # The source code is not released, but the paper indicates there're # totally 137 samples. The cutoff below has exactly 137 samples. time_index = np.floor(data[:, 3] / 24).astype(np.int64) start_time = time_index[time_index != -1].min() end_time = time_index.max() self._graphs = [] for i in range(start_time, end_time + 1): row_mask = time_index <= i edges = data[row_mask][:, [0, 2]] rate = data[row_mask][:, 1] g = dgl_graph((edges[:, 0], edges[:, 1])) g.edata["rel_type"] = F.tensor( rate.reshape(-1, 1), dtype=F.data_type_dict["int64"] ) self._graphs.append(g) def has_cache(self): graph_path = os.path.join( self.save_path, "{}_dgl_graph.bin".format(self.mode) ) return os.path.exists(graph_path) def save(self): graph_path = os.path.join( self.save_path, "{}_dgl_graph.bin".format(self.mode) ) save_graphs(graph_path, self._graphs) def load(self): graph_path = os.path.join( self.save_path, "{}_dgl_graph.bin".format(self.mode) ) self._graphs = load_graphs(graph_path)[0] def __getitem__(self, idx): r"""Get graph by index Parameters ---------- idx : int Item index Returns ------- :class:`dgl.DGLGraph` The graph contains: - ``edata['rel_type']``: edge type """ if self._transform is None: return self._graphs[idx] else: return self._transform(self._graphs[idx]) def __len__(self): r"""Number of graphs in the dataset. Return ------- int """ return len(self._graphs) @property def is_temporal(self): r"""Is the dataset contains temporal graphs Returns ------- bool """ return True ICEWS18 = ICEWS18Dataset ================================================ FILE: python/dgl/data/karate.py ================================================ """KarateClub Dataset """ import networkx as nx import numpy as np from .. import backend as F from ..convert import from_networkx from .dgl_dataset import DGLDataset from .utils import deprecate_property __all__ = ["KarateClubDataset", "KarateClub"] class KarateClubDataset(DGLDataset): r"""Karate Club dataset for Node Classification Zachary's karate club is a social network of a university karate club, described in the paper "An Information Flow Model for Conflict and Fission in Small Groups" by Wayne W. Zachary. The network became a popular example of community structure in networks after its use by Michelle Girvan and Mark Newman in 2002. Official website: ``_ Karate Club dataset statistics: - Nodes: 34 - Edges: 156 - Number of Classes: 2 Parameters ---------- transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- num_classes : int Number of node classes Examples -------- >>> dataset = KarateClubDataset() >>> num_classes = dataset.num_classes >>> g = dataset[0] >>> labels = g.ndata['label'] """ def __init__(self, transform=None): super(KarateClubDataset, self).__init__( name="karate_club", transform=transform ) def process(self): kc_graph = nx.karate_club_graph() label = np.asarray( [kc_graph.nodes[i]["club"] != "Mr. Hi" for i in kc_graph.nodes] ).astype(np.int64) label = F.tensor(label) g = from_networkx(kc_graph) g.ndata["label"] = label self._graph = g self._data = [g] @property def num_classes(self): """Number of classes.""" return 2 def __getitem__(self, idx): r"""Get graph object Parameters ---------- idx : int Item index, KarateClubDataset has only one graph object Returns ------- :class:`dgl.DGLGraph` graph structure and labels. - ``ndata['label']``: ground truth labels """ assert idx == 0, "This dataset has only one graph" if self._transform is None: return self._graph else: return self._transform(self._graph) def __len__(self): r"""The number of graphs in the dataset.""" return 1 KarateClub = KarateClubDataset ================================================ FILE: python/dgl/data/knowledge_graph.py ================================================ from __future__ import absolute_import import os, sys import pickle as pkl import networkx as nx import numpy as np import scipy.sparse as sp from .. import backend as F from ..convert import graph as dgl_graph from ..utils import retry_method_with_fix from .dgl_dataset import DGLBuiltinDataset from .utils import ( _get_dgl_url, deprecate_function, deprecate_property, download, extract_archive, generate_mask_tensor, get_download_dir, load_graphs, load_info, makedirs, save_graphs, save_info, ) class KnowledgeGraphDataset(DGLBuiltinDataset): """KnowledgeGraph link prediction dataset The dataset contains a graph depicting the connectivity of a knowledge base. Currently, the knowledge bases from the `RGCN paper `_ supported are FB15k-237, FB15k, wn18 Parameters ----------- name : str Name can be 'FB15k-237', 'FB15k' or 'wn18'. reverse : bool Whether add reverse edges. Default: True. raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. """ def __init__( self, name, reverse=True, raw_dir=None, force_reload=False, verbose=True, transform=None, ): self._name = name self.reverse = reverse url = _get_dgl_url("dataset/") + "{}.tgz".format(name) super(KnowledgeGraphDataset, self).__init__( name, url=url, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) def download(self): r"""Automatically download data and extract it.""" tgz_path = os.path.join(self.raw_dir, self.name + ".tgz") download(self.url, path=tgz_path) extract_archive(tgz_path, self.raw_path) def process(self): """ The original knowledge base is stored in triplets. This function will parse these triplets and build the DGLGraph. """ root_path = self.raw_path entity_path = os.path.join(root_path, "entities.dict") relation_path = os.path.join(root_path, "relations.dict") train_path = os.path.join(root_path, "train.txt") valid_path = os.path.join(root_path, "valid.txt") test_path = os.path.join(root_path, "test.txt") entity_dict = _read_dictionary(entity_path) relation_dict = _read_dictionary(relation_path) train = np.asarray( _read_triplets_as_list(train_path, entity_dict, relation_dict) ) valid = np.asarray( _read_triplets_as_list(valid_path, entity_dict, relation_dict) ) test = np.asarray( _read_triplets_as_list(test_path, entity_dict, relation_dict) ) num_nodes = len(entity_dict) num_rels = len(relation_dict) if self.verbose: print("# entities: {}".format(num_nodes)) print("# relations: {}".format(num_rels)) print("# training edges: {}".format(train.shape[0])) print("# validation edges: {}".format(valid.shape[0])) print("# testing edges: {}".format(test.shape[0])) # for compatability self._train = train self._valid = valid self._test = test self._num_nodes = num_nodes self._num_rels = num_rels # build graph g, data = build_knowledge_graph( num_nodes, num_rels, train, valid, test, reverse=self.reverse ) ( etype, ntype, train_edge_mask, valid_edge_mask, test_edge_mask, train_mask, val_mask, test_mask, ) = data g.edata["train_edge_mask"] = train_edge_mask g.edata["valid_edge_mask"] = valid_edge_mask g.edata["test_edge_mask"] = test_edge_mask g.edata["train_mask"] = train_mask g.edata["val_mask"] = val_mask g.edata["test_mask"] = test_mask g.edata["etype"] = etype g.ndata["ntype"] = ntype self._g = g @property def graph_path(self): return os.path.join(self.save_path, self.save_name + ".bin") @property def info_path(self): return os.path.join(self.save_path, self.save_name + ".pkl") def has_cache(self): if os.path.exists(self.graph_path) and os.path.exists(self.info_path): return True return False def __getitem__(self, idx): assert idx == 0, "This dataset has only one graph" if self._transform is None: return self._g else: return self._transform(self._g) def __len__(self): return 1 def save(self): """save the graph list and the labels""" save_graphs(str(self.graph_path), self._g) save_info( str(self.info_path), {"num_nodes": self.num_nodes, "num_rels": self.num_rels}, ) def load(self): graphs, _ = load_graphs(str(self.graph_path)) info = load_info(str(self.info_path)) self._num_nodes = info["num_nodes"] self._num_rels = info["num_rels"] self._g = graphs[0] train_mask = self._g.edata["train_edge_mask"].numpy() val_mask = self._g.edata["valid_edge_mask"].numpy() test_mask = self._g.edata["test_edge_mask"].numpy() # convert mask tensor into bool tensor if possible self._g.edata["train_edge_mask"] = generate_mask_tensor( self._g.edata["train_edge_mask"].numpy() ) self._g.edata["valid_edge_mask"] = generate_mask_tensor( self._g.edata["valid_edge_mask"].numpy() ) self._g.edata["test_edge_mask"] = generate_mask_tensor( self._g.edata["test_edge_mask"].numpy() ) self._g.edata["train_mask"] = generate_mask_tensor( self._g.edata["train_mask"].numpy() ) self._g.edata["val_mask"] = generate_mask_tensor( self._g.edata["val_mask"].numpy() ) self._g.edata["test_mask"] = generate_mask_tensor( self._g.edata["test_mask"].numpy() ) # for compatability (with 0.4.x) generate train_idx, valid_idx and test_idx etype = self._g.edata["etype"].numpy() self._etype = etype u, v = self._g.all_edges(form="uv") u = u.numpy() v = v.numpy() train_idx = np.nonzero(train_mask == 1) self._train = np.column_stack( (u[train_idx], etype[train_idx], v[train_idx]) ) valid_idx = np.nonzero(val_mask == 1) self._valid = np.column_stack( (u[valid_idx], etype[valid_idx], v[valid_idx]) ) test_idx = np.nonzero(test_mask == 1) self._test = np.column_stack( (u[test_idx], etype[test_idx], v[test_idx]) ) if self.verbose: print("# entities: {}".format(self.num_nodes)) print("# relations: {}".format(self.num_rels)) print("# training edges: {}".format(self._train.shape[0])) print("# validation edges: {}".format(self._valid.shape[0])) print("# testing edges: {}".format(self._test.shape[0])) @property def num_nodes(self): return self._num_nodes @property def num_rels(self): return self._num_rels @property def save_name(self): return self.name + "_dgl_graph" def _read_dictionary(filename): d = {} with open(filename, "r+") as f: for line in f: line = line.strip().split("\t") d[line[1]] = int(line[0]) return d def _read_triplets(filename): with open(filename, "r+") as f: for line in f: processed_line = line.strip().split("\t") yield processed_line def _read_triplets_as_list(filename, entity_dict, relation_dict): l = [] for triplet in _read_triplets(filename): s = entity_dict[triplet[0]] r = relation_dict[triplet[1]] o = entity_dict[triplet[2]] l.append([s, r, o]) return l def build_knowledge_graph( num_nodes, num_rels, train, valid, test, reverse=True ): """Create a DGL Homogeneous graph with heterograph info stored as node or edge features.""" src = [] rel = [] dst = [] raw_subg = {} raw_subg_eset = {} raw_subg_etype = {} raw_reverse_sugb = {} raw_reverse_subg_eset = {} raw_reverse_subg_etype = {} # here there is noly one node type s_type = "node" d_type = "node" def add_edge(s, r, d, reverse, edge_set): r_type = str(r) e_type = (s_type, r_type, d_type) if raw_subg.get(e_type, None) is None: raw_subg[e_type] = ([], []) raw_subg_eset[e_type] = [] raw_subg_etype[e_type] = [] raw_subg[e_type][0].append(s) raw_subg[e_type][1].append(d) raw_subg_eset[e_type].append(edge_set) raw_subg_etype[e_type].append(r) if reverse is True: r_type = str(r + num_rels) re_type = (d_type, r_type, s_type) if raw_reverse_sugb.get(re_type, None) is None: raw_reverse_sugb[re_type] = ([], []) raw_reverse_subg_etype[re_type] = [] raw_reverse_subg_eset[re_type] = [] raw_reverse_sugb[re_type][0].append(d) raw_reverse_sugb[re_type][1].append(s) raw_reverse_subg_eset[re_type].append(edge_set) raw_reverse_subg_etype[re_type].append(r + num_rels) for edge in train: s, r, d = edge assert r < num_rels add_edge(s, r, d, reverse, 1) # train set for edge in valid: s, r, d = edge assert r < num_rels add_edge(s, r, d, reverse, 2) # valid set for edge in test: s, r, d = edge assert r < num_rels add_edge(s, r, d, reverse, 3) # test set subg = [] fg_s = [] fg_d = [] fg_etype = [] fg_settype = [] for e_type, val in raw_subg.items(): s, d = val s = np.asarray(s) d = np.asarray(d) etype = raw_subg_etype[e_type] etype = np.asarray(etype) settype = raw_subg_eset[e_type] settype = np.asarray(settype) fg_s.append(s) fg_d.append(d) fg_etype.append(etype) fg_settype.append(settype) settype = np.concatenate(fg_settype) if reverse is True: settype = np.concatenate([settype, np.full((settype.shape[0]), 0)]) train_edge_mask = generate_mask_tensor(settype == 1) valid_edge_mask = generate_mask_tensor(settype == 2) test_edge_mask = generate_mask_tensor(settype == 3) for e_type, val in raw_reverse_sugb.items(): s, d = val s = np.asarray(s) d = np.asarray(d) etype = raw_reverse_subg_etype[e_type] etype = np.asarray(etype) settype = raw_reverse_subg_eset[e_type] settype = np.asarray(settype) fg_s.append(s) fg_d.append(d) fg_etype.append(etype) fg_settype.append(settype) s = np.concatenate(fg_s) d = np.concatenate(fg_d) g = dgl_graph((s, d), num_nodes=num_nodes) etype = np.concatenate(fg_etype) settype = np.concatenate(fg_settype) etype = F.tensor(etype, dtype=F.data_type_dict["int64"]) train_edge_mask = train_edge_mask valid_edge_mask = valid_edge_mask test_edge_mask = test_edge_mask train_mask = ( generate_mask_tensor(settype == 1) if reverse is True else train_edge_mask ) valid_mask = ( generate_mask_tensor(settype == 2) if reverse is True else valid_edge_mask ) test_mask = ( generate_mask_tensor(settype == 3) if reverse is True else test_edge_mask ) ntype = F.full_1d( num_nodes, 0, dtype=F.data_type_dict["int64"], ctx=F.cpu() ) return g, ( etype, ntype, train_edge_mask, valid_edge_mask, test_edge_mask, train_mask, valid_mask, test_mask, ) class FB15k237Dataset(KnowledgeGraphDataset): r"""FB15k237 link prediction dataset. FB15k-237 is a subset of FB15k where inverse relations are removed. When creating the dataset, a reverse edge with reversed relation types are created for each edge by default. FB15k237 dataset statistics: - Nodes: 14541 - Number of relation types: 237 - Number of reversed relation types: 237 - Label Split: - Train: 272115 - Valid: 17535 - Test: 20466 Parameters ---------- reverse : bool Whether to add reverse edge. Default True. raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- num_nodes: int Number of nodes num_rels: int Number of relation types Examples ---------- >>> dataset = FB15k237Dataset() >>> g = dataset.graph >>> e_type = g.edata['e_type'] >>> >>> # get data split >>> train_mask = g.edata['train_mask'] >>> val_mask = g.edata['val_mask'] >>> test_mask = g.edata['test_mask'] >>> >>> train_set = th.arange(g.num_edges())[train_mask] >>> val_set = th.arange(g.num_edges())[val_mask] >>> >>> # build train_g >>> train_edges = train_set >>> train_g = g.edge_subgraph(train_edges, relabel_nodes=False) >>> train_g.edata['e_type'] = e_type[train_edges]; >>> >>> # build val_g >>> val_edges = th.cat([train_edges, val_edges]) >>> val_g = g.edge_subgraph(val_edges, relabel_nodes=False) >>> val_g.edata['e_type'] = e_type[val_edges]; >>> >>> # Train, Validation and Test """ def __init__( self, reverse=True, raw_dir=None, force_reload=False, verbose=True, transform=None, ): name = "FB15k-237" super(FB15k237Dataset, self).__init__( name, reverse, raw_dir, force_reload, verbose, transform ) def __getitem__(self, idx): r"""Gets the graph object Parameters ----------- idx: int Item index, FB15k237Dataset has only one graph object Return ------- :class:`dgl.DGLGraph` The graph contains - ``edata['e_type']``: edge relation type - ``edata['train_edge_mask']``: positive training edge mask - ``edata['val_edge_mask']``: positive validation edge mask - ``edata['test_edge_mask']``: positive testing edge mask - ``edata['train_mask']``: training edge set mask (include reversed training edges) - ``edata['val_mask']``: validation edge set mask (include reversed validation edges) - ``edata['test_mask']``: testing edge set mask (include reversed testing edges) - ``ndata['ntype']``: node type. All 0 in this dataset """ return super(FB15k237Dataset, self).__getitem__(idx) def __len__(self): r"""The number of graphs in the dataset.""" return super(FB15k237Dataset, self).__len__() class FB15kDataset(KnowledgeGraphDataset): r"""FB15k link prediction dataset. The FB15K dataset was introduced in `Translating Embeddings for Modeling Multi-relational Data `_. It is a subset of Freebase which contains about 14,951 entities with 1,345 different relations. When creating the dataset, a reverse edge with reversed relation types are created for each edge by default. FB15k dataset statistics: - Nodes: 14,951 - Number of relation types: 1,345 - Number of reversed relation types: 1,345 - Label Split: - Train: 483142 - Valid: 50000 - Test: 59071 Parameters ---------- reverse : bool Whether to add reverse edge. Default True. raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- num_nodes: int Number of nodes num_rels: int Number of relation types Examples ---------- >>> dataset = FB15kDataset() >>> g = dataset.graph >>> e_type = g.edata['e_type'] >>> >>> # get data split >>> train_mask = g.edata['train_mask'] >>> val_mask = g.edata['val_mask'] >>> >>> train_set = th.arange(g.num_edges())[train_mask] >>> val_set = th.arange(g.num_edges())[val_mask] >>> >>> # build train_g >>> train_edges = train_set >>> train_g = g.edge_subgraph(train_edges, relabel_nodes=False) >>> train_g.edata['e_type'] = e_type[train_edges]; >>> >>> # build val_g >>> val_edges = th.cat([train_edges, val_edges]) >>> val_g = g.edge_subgraph(val_edges, relabel_nodes=False) >>> val_g.edata['e_type'] = e_type[val_edges]; >>> >>> # Train, Validation and Test >>> """ def __init__( self, reverse=True, raw_dir=None, force_reload=False, verbose=True, transform=None, ): name = "FB15k" super(FB15kDataset, self).__init__( name, reverse, raw_dir, force_reload, verbose, transform ) def __getitem__(self, idx): r"""Gets the graph object Parameters ----------- idx: int Item index, FB15kDataset has only one graph object Return ------- :class:`dgl.DGLGraph` The graph contains - ``edata['e_type']``: edge relation type - ``edata['train_edge_mask']``: positive training edge mask - ``edata['val_edge_mask']``: positive validation edge mask - ``edata['test_edge_mask']``: positive testing edge mask - ``edata['train_mask']``: training edge set mask (include reversed training edges) - ``edata['val_mask']``: validation edge set mask (include reversed validation edges) - ``edata['test_mask']``: testing edge set mask (include reversed testing edges) - ``ndata['ntype']``: node type. All 0 in this dataset """ return super(FB15kDataset, self).__getitem__(idx) def __len__(self): r"""The number of graphs in the dataset.""" return super(FB15kDataset, self).__len__() class WN18Dataset(KnowledgeGraphDataset): r"""WN18 link prediction dataset. The WN18 dataset was introduced in `Translating Embeddings for Modeling Multi-relational Data `_. It included the full 18 relations scraped from WordNet for roughly 41,000 synsets. When creating the dataset, a reverse edge with reversed relation types are created for each edge by default. WN18 dataset statistics: - Nodes: 40943 - Number of relation types: 18 - Number of reversed relation types: 18 - Label Split: - Train: 141442 - Valid: 5000 - Test: 5000 Parameters ---------- reverse : bool Whether to add reverse edge. Default True. raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- num_nodes: int Number of nodes num_rels: int Number of relation types Examples ---------- >>> dataset = WN18Dataset() >>> g = dataset.graph >>> e_type = g.edata['e_type'] >>> >>> # get data split >>> train_mask = g.edata['train_mask'] >>> val_mask = g.edata['val_mask'] >>> >>> train_set = th.arange(g.num_edges())[train_mask] >>> val_set = th.arange(g.num_edges())[val_mask] >>> >>> # build train_g >>> train_edges = train_set >>> train_g = g.edge_subgraph(train_edges, relabel_nodes=False) >>> train_g.edata['e_type'] = e_type[train_edges]; >>> >>> # build val_g >>> val_edges = th.cat([train_edges, val_edges]) >>> val_g = g.edge_subgraph(val_edges, relabel_nodes=False) >>> val_g.edata['e_type'] = e_type[val_edges]; >>> >>> # Train, Validation and Test >>> """ def __init__( self, reverse=True, raw_dir=None, force_reload=False, verbose=True, transform=None, ): name = "wn18" super(WN18Dataset, self).__init__( name, reverse, raw_dir, force_reload, verbose, transform ) def __getitem__(self, idx): r"""Gets the graph object Parameters ----------- idx: int Item index, WN18Dataset has only one graph object Return ------- :class:`dgl.DGLGraph` The graph contains - ``edata['e_type']``: edge relation type - ``edata['train_edge_mask']``: positive training edge mask - ``edata['val_edge_mask']``: positive validation edge mask - ``edata['test_edge_mask']``: positive testing edge mask - ``edata['train_mask']``: training edge set mask (include reversed training edges) - ``edata['val_mask']``: validation edge set mask (include reversed validation edges) - ``edata['test_mask']``: testing edge set mask (include reversed testing edges) - ``ndata['ntype']``: node type. All 0 in this dataset """ return super(WN18Dataset, self).__getitem__(idx) def __len__(self): r"""The number of graphs in the dataset.""" return super(WN18Dataset, self).__len__() def load_data(dataset): r"""Load knowledge graph dataset for RGCN link prediction tasks It supports three datasets: wn18, FB15k and FB15k-237 Parameters ---------- dataset: str The name of the dataset to load. Return ------ The dataset object. """ if dataset == "wn18": return WN18Dataset() elif dataset == "FB15k": return FB15kDataset() elif dataset == "FB15k-237": return FB15k237Dataset() ================================================ FILE: python/dgl/data/lrgb.py ================================================ import hashlib import os import pickle import pandas as pd from ogb.utils import smiles2graph as smiles2graph_OGB from tqdm.auto import tqdm from .. import backend as F from ..convert import graph as dgl_graph from .dgl_dataset import DGLDataset from .utils import ( download, extract_archive, load_graphs, makedirs, save_graphs, Subset, ) class PeptidesStructuralDataset(DGLDataset): r"""Peptides structure dataset for the graph regression task. DGL dataset of Peptides-struct in the LRGB benchmark which contains 15,535 small peptides represented as their molecular graph (SMILES) with 11 regression targets derived from the peptide's 3D structure. The 11 regression targets were precomputed from molecules' 3D structure: - Inertia_mass_[a-c]: The principal component of the inertia of the mass, with some normalizations. (Sorted) - Inertia_valence_[a-c]: The principal component of the inertia of the Hydrogen atoms. This is basically a measure of the 3D distribution of hydrogens. (Sorted) - length_[a-c]: The length around the 3 main geometric axis of the 3D objects (without considering atom types). (Sorted) - Spherocity: SpherocityIndex descriptor computed by rdkit.Chem.rdMolDescriptors.CalcSpherocityIndex - Plane_best_fit: Plane of best fit (PBF) descriptor computed by rdkit.Chem.rdMolDescriptors.CalcPBF Reference ``_ Statistics: - Train examples: 10,873 - Valid examples: 2,331 - Test examples: 2,331 - Average number of nodes: 150.94 - Average number of edges: 307.30 - Number of atom types: 9 - Number of bond types: 3 Parameters ---------- raw_dir : str Directory to store all the downloaded raw datasets. Default: "~/.dgl/". force_reload : bool Whether to reload the dataset. Default: False. verbose : bool Whether to print out progress information. Default: False. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. smiles2graph : callable A callable function that converts a SMILES string into a graph object. * The default smiles2graph requires rdkit to be installed * Examples --------- >>> from dgl.data import PeptidesStructuralDataset >>> dataset = PeptidesStructuralDataset() >>> len(dataset) 15535 >>> dataset.num_atom_types 9 >>> graph, label = dataset[0] >>> graph Graph(num_nodes=119, num_edges=244, ndata_schemes={'feat': Scheme(shape=(9,), dtype=torch.int64)} edata_schemes={'feat': Scheme(shape=(3,), dtype=torch.int64)}) >>> # support tensor to be index when transform is None >>> # see details in __getitem__ function >>> # get train dataset >>> split_dict = dataset.get_idx_split() >>> trainset = dataset[split_dict["train"]] >>> graph, label = trainset[0] >>> graph Graph(num_nodes=338, num_edges=682, ndata_schemes={'feat': Scheme(shape=(9,), dtype=torch.int64)} edata_schemes={'feat': Scheme(shape=(3,), dtype=torch.int64)}) >>> # get subset of dataset >>> import torch >>> idx = torch.tensor([0, 1, 2]) >>> dataset_subset = dataset[idx] >>> graph, label = dataset_subset[0] >>> graph Graph(num_nodes=119, num_edges=244, ndata_schemes={'feat': Scheme(shape=(9,), dtype=torch.int64)} edata_schemes={'feat': Scheme(shape=(3,), dtype=torch.int64)}) """ def __init__( self, raw_dir=None, force_reload=None, verbose=None, transform=None, smiles2graph=smiles2graph_OGB, ): self.smiles2graph = smiles2graph # MD5 hash of the dataset file. self.md5sum_data = "9786061a34298a0684150f2e4ff13f47" self.url_stratified_split = """ https://www.dropbox.com/s/9dfifzft1hqgow6/splits_random_stratified_peptide_structure.pickle?dl=1 """ self.md5sum_stratified_split = "5a0114bdadc80b94fc7ae974f13ef061" self.graphs = [] self.labels = [] super().__init__( name="Peptides-struc", raw_dir=raw_dir, url=""" https://www.dropbox.com/s/464u3303eu2u4zp/peptide_structure_dataset.csv.gz?dl=1 """, force_reload=force_reload, verbose=verbose, transform=transform, ) @property def raw_data_path(self): r"""Path to save the raw dataset file.""" return os.path.join(self.raw_path, "peptide_structure_dataset.csv.gz") @property def split_data_path(self): r"""Path to save the dataset split file.""" return os.path.join( self.raw_path, "splits_random_stratified_peptide_structure.pickle" ) @property def graph_path(self): r"""Path to save the processed dataset file.""" return os.path.join(self.save_path, "Peptides-struc.bin") @property def num_atom_types(self): r"""Number of atom types.""" return 9 @property def num_bond_types(self): r"""Number of bond types.""" return 3 def _md5sum(self, path): hash_md5 = hashlib.md5() with open(path, "rb") as file: buffer = file.read() hash_md5.update(buffer) return hash_md5.hexdigest() def download(self): path = download(self.url, path=self.raw_data_path) # Save to disk the MD5 hash of the downloaded file. hash_data = self._md5sum(path) if hash_data != self.md5sum_data: raise ValueError("Unexpected MD5 hash of the downloaded file") open(os.path.join(self.raw_path, hash_data), "w").close() # Download train/val/test splits. path_split = download( self.url_stratified_split, path=self.split_data_path ) hash_split = self._md5sum(path_split) if hash_split != self.md5sum_stratified_split: raise ValueError("Unexpected MD5 hash of the split file") def process(self): data_df = pd.read_csv(self.raw_data_path) smiles_list = data_df["smiles"] target_names = [ "Inertia_mass_a", "Inertia_mass_b", "Inertia_mass_c", "Inertia_valence_a", "Inertia_valence_b", "Inertia_valence_c", "length_a", "length_b", "length_c", "Spherocity", "Plane_best_fit", ] # Normalize to zero mean and unit standard deviation. data_df.loc[:, target_names] = data_df.loc[:, target_names].apply( lambda x: (x - x.mean()) / x.std(), axis=0 ) if self.verbose: print("Converting SMILES strings into graphs...") for i in tqdm(range(len(smiles_list))): smiles = smiles_list[i] y = data_df.iloc[i][target_names] graph = self.smiles2graph(smiles) assert len(graph["edge_feat"]) == graph["edge_index"].shape[1] assert len(graph["node_feat"]) == graph["num_nodes"] DGLgraph = dgl_graph( (graph["edge_index"][0], graph["edge_index"][1]), num_nodes=graph["num_nodes"], ) DGLgraph.edata["feat"] = F.zerocopy_from_numpy( graph["edge_feat"] ).to(F.int64) DGLgraph.ndata["feat"] = F.zerocopy_from_numpy( graph["node_feat"] ).to(F.int64) self.graphs.append(DGLgraph) self.labels.append(y) self.labels = F.tensor(self.labels, dtype=F.float32) def load(self): self.graphs, label_dict = load_graphs(self.graph_path) self.labels = label_dict["labels"] def save(self): save_graphs( self.graph_path, self.graphs, labels={"labels": self.labels} ) def has_cache(self): return os.path.exists(self.graph_path) def get_idx_split(self): """Get dataset splits. Returns: Dict with 'train', 'val', 'test', splits indices. """ with open(self.split_data_path, "rb") as file: split_dict = pickle.load(file) for key in split_dict.keys(): split_dict[key] = F.zerocopy_from_numpy(split_dict[key]) return split_dict def __len__(self): return len(self.graphs) def __getitem__(self, idx): """Get the idx-th sample. Parameters --------- idx : int or tensor The sample index. 1-D tensor as `idx` is allowed when transform is None. Returns ------- (:class:`dgl.DGLGraph`, Tensor) Graph with node feature stored in ``feat`` field and its label. or :class:`dgl.data.utils.Subset` Subset of the dataset at specified indices """ if F.is_tensor(idx) and idx.dim() == 1: if self._transform is None: return Subset(self, idx.cpu()) raise ValueError( "Tensor idx not supported when transform is not None." ) if self._transform is None: return self.graphs[idx], self.labels[idx] return self._transform(self.graphs[idx]), self.labels[idx] class PeptidesFunctionalDataset(DGLDataset): r"""Peptides functional dataset for the graph classification task. DGL dataset of Peptides-func in the LRGB benchmark which contains 15,535 peptides represented as their molecular graph(SMILES) with 10-way multi-task binary classification of their functional classes. The 10 classes represent the following functional classes (in order): ['antifungal', 'cell_cell_communication', 'anticancer', 'drug_delivery_vehicle', 'antimicrobial', 'antiviral', 'antihypertensive', 'antibacterial', 'antiparasitic', 'toxic'] Reference ``_ Statistics: - Train examples: 10,873 - Valid examples: 2,331 - Test examples: 2,331 - Average number of nodes: 150.94 - Average number of edges: 307.30 - Number of atom types: 9 - Number of bond types: 3 Parameters ---------- raw_dir : str Directory to store all the downloaded raw datasets. Default: "~/.dgl/". force_reload : bool Whether to reload the dataset. Default: False. verbose : bool Whether to print out progress information. Default: False. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. smiles2graph (callable): A callable function that converts a SMILES string into a graph object. * The default smiles2graph requires rdkit to be installed * Examples --------- >>> from dgl.data import PeptidesFunctionalDataset >>> dataset = PeptidesFunctionalDataset() >>> len(dataset) 15535 >>> dataset.num_classes 10 >>> graph, label = dataset[0] >>> graph Graph(num_nodes=119, num_edges=244, ndata_schemes={'feat': Scheme(shape=(9,), dtype=torch.int64)} edata_schemes={'feat': Scheme(shape=(3,), dtype=torch.int64)}) >>> # support tensor to be index when transform is None >>> # see details in __getitem__ function >>> # get train dataset >>> split_dict = dataset.get_idx_split() >>> trainset = dataset[split_dict["train"]] >>> graph, label = trainset[0] >>> graph Graph(num_nodes=338, num_edges=682, ndata_schemes={'feat': Scheme(shape=(9,), dtype=torch.int64)} edata_schemes={'feat': Scheme(shape=(3,), dtype=torch.int64)}) >>> # get subset of dataset >>> import torch >>> idx = torch.tensor([0, 1, 2]) >>> dataset_subset = dataset[idx] >>> graph, label = dataset_subset[0] >>> graph Graph(num_nodes=119, num_edges=244, ndata_schemes={'feat': Scheme(shape=(9,), dtype=torch.int64)} edata_schemes={'feat': Scheme(shape=(3,), dtype=torch.int64)}) """ def __init__( self, raw_dir=None, force_reload=None, verbose=None, transform=None, smiles2graph=smiles2graph_OGB, ): self.smiles2graph = smiles2graph # MD5 hash of the dataset file. self.md5sum_data = "701eb743e899f4d793f0e13c8fa5a1b4" self.url_stratified_split = """ https://www.dropbox.com/s/j4zcnx2eipuo0xz/splits_random_stratified_peptide.pickle?dl=1 """ self.md5sum_stratified_split = "5a0114bdadc80b94fc7ae974f13ef061" self.graphs = [] self.labels = [] super().__init__( name="Peptides-func", raw_dir=raw_dir, url=""" https://www.dropbox.com/s/ol2v01usvaxbsr8/peptide_multi_class_dataset.csv.gz?dl=1 """, force_reload=force_reload, verbose=verbose, transform=transform, ) @property def raw_data_path(self): r"""Path to save the raw dataset file.""" return os.path.join(self.raw_path, "peptide_multi_class_dataset.csv.gz") @property def split_data_path(self): r"""Path to save the dataset split file.""" return os.path.join( self.raw_path, "splits_random_stratified_peptide.pickle" ) @property def graph_path(self): r"""Path to save the processed dataset file.""" return os.path.join(self.save_path, "Peptides-func.bin") @property def num_atom_types(self): r"""Number of atom types.""" return 9 @property def num_bond_types(self): r"""Number of bond types.""" return 3 @property def num_classes(self): r"""Number of graph classes.""" return 10 def _md5sum(self, path): hash_md5 = hashlib.md5() with open(path, "rb") as file: buffer = file.read() hash_md5.update(buffer) return hash_md5.hexdigest() def download(self): path = download(self.url, path=self.raw_data_path) # Save to disk the MD5 hash of the downloaded file. hash_data = self._md5sum(path) if hash_data != self.md5sum_data: raise ValueError("Unexpected MD5 hash of the downloaded file") open(os.path.join(self.raw_path, hash_data), "w").close() # Download train/val/test splits. path_split = download( self.url_stratified_split, path=self.split_data_path ) hash_split = self._md5sum(path_split) if hash_split != self.md5sum_stratified_split: raise ValueError("Unexpected MD5 hash of the split file") def process(self): data_df = pd.read_csv(self.raw_data_path) smiles_list = data_df["smiles"] if self.verbose: print("Converting SMILES strings into graphs...") for i in tqdm(range(len(smiles_list))): smiles = smiles_list[i] graph = self.smiles2graph(smiles) assert len(graph["edge_feat"]) == graph["edge_index"].shape[1] assert len(graph["node_feat"]) == graph["num_nodes"] DGLgraph = dgl_graph( (graph["edge_index"][0], graph["edge_index"][1]), num_nodes=graph["num_nodes"], ) DGLgraph.edata["feat"] = F.zerocopy_from_numpy( graph["edge_feat"] ).to(F.int64) DGLgraph.ndata["feat"] = F.zerocopy_from_numpy( graph["node_feat"] ).to(F.int64) self.graphs.append(DGLgraph) self.labels.append(eval(data_df["labels"].iloc[i])) self.labels = F.tensor(self.labels, dtype=F.float32) def load(self): self.graphs, label_dict = load_graphs(self.graph_path) self.labels = label_dict["labels"] def save(self): save_graphs( self.graph_path, self.graphs, labels={"labels": self.labels} ) def has_cache(self): return os.path.exists(self.graph_path) def get_idx_split(self): """Get dataset splits. Returns: Dict with 'train', 'val', 'test', splits indices. """ with open(self.split_data_path, "rb") as file: split_dict = pickle.load(file) for key in split_dict.keys(): split_dict[key] = F.zerocopy_from_numpy(split_dict[key]) return split_dict def __len__(self): return len(self.graphs) def __getitem__(self, idx): """Get the idx-th sample. Parameters --------- idx : int or tensor The sample index. 1-D tensor as `idx` is allowed when transform is None. Returns ------- (:class:`dgl.DGLGraph`, Tensor) Graph with node feature stored in ``feat`` field and its label. or :class:`dgl.data.utils.Subset` Subset of the dataset at specified indices """ if F.is_tensor(idx) and idx.dim() == 1: if self._transform is None: return Subset(self, idx.cpu()) raise ValueError( "Tensor idx not supported when transform is not None." ) if self._transform is None: return self.graphs[idx], self.labels[idx] return self._transform(self.graphs[idx]), self.labels[idx] class VOCSuperpixelsDataset(DGLDataset): r"""VOCSuperpixels dataset for the node classification task. DGL dataset of PascalVOC-SP in the LRGB benchmark which contains image superpixels and a semantic segmentation label for each node superpixel. color map 0=background, 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle, 6=bus, 7=car, 8=cat, 9=chair, 10=cow, 11=diningtable, 12=dog, 13=horse, 14=motorbike, 15=person, 16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor Reference ``_ Statistics: - Train examples: 8,498 - Valid examples: 1,428 - Test examples: 1,429 - Average number of nodes: 479.40 - Average number of edges: 2,710.48 Parameters ---------- raw_dir : str Directory to store all the downloaded raw datasets. Default: "~/.dgl/". split : str Should be chosen from ["train", "val", "test"] Default: "train". construct_format : str, optional Option to select the graph construction format. Should be chosen from the following formats: - "edge_wt_only_coord": the graphs are 8-nn graphs with the edge weights computed based on only spatial coordinates of superpixel nodes. - "edge_wt_coord_feat": the graphs are 8-nn graphs with the edge weights computed based on combination of spatial coordinates and feature values of superpixel nodes. - "edge_wt_region_boundary": the graphs region boundary graphs where two regions (i.e. superpixel nodes) have an edge between them if they share a boundary in the original image. Default: "edge_wt_region_boundary". slic_compactness : int, optional Option to select compactness of slic that was used for superpixels Should be chosen from [10, 30] Default: 30. force_reload : bool Whether to reload the dataset. Default: False. verbose : bool Whether to print out progress information. Default: False. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Examples --------- >>> from dgl.data import VOCSuperpixelsDataset >>> train_dataset = VOCSuperpixelsDataset(split="train") >>> len(train_dataset) 8498 >>> train_dataset.num_classes 21 >>> graph = train_dataset[0] >>> graph Graph(num_nodes=460, num_edges=2632, ndata_schemes={'feat': Scheme(shape=(14,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int32)} edata_schemes={'feat': Scheme(shape=(2,), dtype=torch.float32)}) >>> # support tensor to be index when transform is None >>> # see details in __getitem__ function >>> import torch >>> idx = torch.tensor([0, 1, 2]) >>> train_dataset_subset = train_dataset[idx] >>> train_dataset_subset[0] Graph(num_nodes=460, num_edges=2632, ndata_schemes={'feat': Scheme(shape=(14,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int32)} edata_schemes={'feat': Scheme(shape=(2,), dtype=torch.float32)}) """ urls = { 10: { "edge_wt_only_coord": """ https://www.dropbox.com/s/rk6pfnuh7tq3t37/voc_superpixels_edge_wt_only_coord.zip?dl=1 """, "edge_wt_coord_feat": """ https://www.dropbox.com/s/2a53nmfp6llqg8y/voc_superpixels_edge_wt_coord_feat.zip?dl=1 """, "edge_wt_region_boundary": """ https://www.dropbox.com/s/6pfz2mccfbkj7r3/voc_superpixels_edge_wt_region_boundary.zip?dl=1 """, }, 30: { "edge_wt_only_coord": """ https://www.dropbox.com/s/toqulkdpb1jrswk/voc_superpixels_edge_wt_only_coord.zip?dl=1 """, "edge_wt_coord_feat": """ https://www.dropbox.com/s/xywki8ysj63584d/voc_superpixels_edge_wt_coord_feat.zip?dl=1 """, "edge_wt_region_boundary": """ https://www.dropbox.com/s/8x722ai272wqwl4/voc_superpixels_edge_wt_region_boundary.zip?dl=1 """, }, } def __init__( self, raw_dir=None, split="train", construct_format="edge_wt_region_boundary", slic_compactness=30, force_reload=None, verbose=None, transform=None, ): assert split in ["train", "val", "test"], "split not valid." assert construct_format in [ "edge_wt_only_coord", "edge_wt_coord_feat", "edge_wt_region_boundary", ], "construct_format not valid." assert slic_compactness in [10, 30], "slic_compactness not valid." self.construct_format = construct_format self.slic_compactness = slic_compactness self.split = split self.graphs = [] super().__init__( name="PascalVOC-SP", raw_dir=raw_dir, url=self.urls[self.slic_compactness][self.construct_format], force_reload=force_reload, verbose=verbose, transform=transform, ) @property def save_path(self): r"""Directory to save the processed dataset.""" return os.path.join( self.raw_path, "slic_compactness_" + str(self.slic_compactness), self.construct_format, ) @property def raw_data_path(self): r"""Path to save the raw dataset file.""" return os.path.join(self.save_path, f"{self.split}.pickle") @property def graph_path(self): r"""Path to save the processed dataset file.""" return os.path.join(self.save_path, f"processed_{self.split}.pkl") @property def num_classes(self): r"""Number of classes for each node.""" return 21 def __len__(self): r"""The number of examples in the dataset.""" return len(self.graphs) def download(self): zip_file_path = os.path.join( self.raw_path, "voc_superpixels_" + self.construct_format + ".zip" ) path = download(self.url, path=zip_file_path) extract_archive(path, self.raw_path, overwrite=True) makedirs(self.save_path) os.rename( os.path.join( self.raw_path, "voc_superpixels_" + self.construct_format ), self.save_path, ) os.unlink(path) def process(self): with open(self.raw_data_path, "rb") as file: graphs = pickle.load(file) for idx in tqdm( range(len(graphs)), desc=f"Processing {self.split} dataset" ): graph = graphs[idx] """ Each `graph` is a tuple (x, edge_attr, edge_index, y) Shape of x : [num_nodes, 14] Shape of edge_attr : [num_edges, 1] or [num_edges, 2] Shape of edge_index : [2, num_edges] Shape of y : [num_nodes] """ DGLgraph = dgl_graph( (graph[2][0], graph[2][1]), num_nodes=len(graph[3]), ) DGLgraph.ndata["feat"] = graph[0].to(F.float32) DGLgraph.edata["feat"] = graph[1].to(F.float32) DGLgraph.ndata["label"] = F.tensor(graph[3]) self.graphs.append(DGLgraph) def load(self): with open(self.graph_path, "rb") as file: graphs = pickle.load(file) self.graphs = graphs def save(self): with open(os.path.join(self.graph_path), "wb") as file: pickle.dump(self.graphs, file) def has_cache(self): return os.path.exists(self.graph_path) def __getitem__(self, idx): r"""Get the idx-th sample. Parameters --------- idx : int or tensor The sample index. 1-D tensor as `idx` is allowed when transform is None. Returns ------- :class:`dgl.DGLGraph` graph structure, node features, node labels and edge features. - ``ndata['feat']``: node features - ``ndata['label']``: node labels - ``edata['feat']``: edge features or :class:`dgl.data.utils.Subset` Subset of the dataset at specified indices """ if F.is_tensor(idx) and idx.dim() == 1: if self._transform is None: return Subset(self, idx.cpu()) raise ValueError( "Tensor idx not supported when transform is not None." ) if self._transform is None: return self.graphs[idx] return self._transform(self.graphs[idx]) class COCOSuperpixelsDataset(DGLDataset): r"""COCO superpixel dataset for the node classification task. DGL dataset of COCO-SP in the LRGB benckmark which contains image superpixels and a semantic segmentation label for each node superpixel. Based on the COCO 2017 dataset. Original source ``_ Reference ``_ Statistics: - Train examples: 113,286 - Valid examples: 5,000 - Test examples: 5,000 - Average number of nodes: 476.88 - Average number of edges: 2,710.48 - Number of node classes: 81 Parameters ---------- raw_dir : str Directory to store all the downloaded raw datasets. Default: "~/.dgl/". split : str Should be chosen from ["train", "val", "test"] Default: "train". construct_format : str, optional Option to select the graph construction format. Should be chosen from the following formats: - "edge_wt_only_coord": the graphs are 8-nn graphs with the edge weights computed based on only spatial coordinates of superpixel nodes. - "edge_wt_coord_feat": the graphs are 8-nn graphs with the edge weights computed based on combination of spatial coordinates and feature values of superpixel nodes. - "edge_wt_region_boundary": the graphs region boundary graphs where two regions (i.e. superpixel nodes) have an edge between them if they share a boundary in the original image. Default: "edge_wt_region_boundary". slic_compactness : int, optional Option to select compactness of slic that was used for superpixels Should be chosen from [10, 30] Default: 30. force_reload : bool Whether to reload the dataset. Default: False. verbose : bool Whether to print out progress information. Default: False. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Examples --------- >>> from dgl.data import COCOSuperpixelsDataset >>> train_dataset = COCOSuperpixelsDataset(split="train") >>> len(train_dataset) 113286 >>> train_dataset.num_classes 81 >>> graph = train_dataset[0] >>> graph Graph(num_nodes=488, num_edges=2766, ndata_schemes={'feat': Scheme(shape=(14,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.uint8)} edata_schemes={'feat': Scheme(shape=(2,), dtype=torch.float32)}) >>> # support tensor to be index when transform is None >>> # see details in __getitem__ function >>> import torch >>> idx = torch.tensor([0, 1, 2]) >>> train_dataset_subset = train_dataset[idx] >>> train_dataset_subset[0] Graph(num_nodes=488, num_edges=2766, ndata_schemes={'feat': Scheme(shape=(14,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.uint8)} edata_schemes={'feat': Scheme(shape=(2,), dtype=torch.float32)}) """ urls = { 10: { "edge_wt_only_coord": """ https://www.dropbox.com/s/prqizdep8gk0ndk/coco_superpixels_edge_wt_only_coord.zip?dl=1 """, "edge_wt_coord_feat": """ https://www.dropbox.com/s/zftoyln1pkcshcg/coco_superpixels_edge_wt_coord_feat.zip?dl=1 """, "edge_wt_region_boundary": """ https://www.dropbox.com/s/fhihfcyx2y978u8/coco_superpixels_edge_wt_region_boundary.zip?dl=1 """, }, 30: { "edge_wt_only_coord": """ https://www.dropbox.com/s/hrbfkxmc5z9lsaz/coco_superpixels_edge_wt_only_coord.zip?dl=1 """, "edge_wt_coord_feat": """ https://www.dropbox.com/s/4rfa2d5ij1gfu9b/coco_superpixels_edge_wt_coord_feat.zip?dl=1 """, "edge_wt_region_boundary": """ https://www.dropbox.com/s/r6ihg1f4pmyjjy0/coco_superpixels_edge_wt_region_boundary.zip?dl=1 """, }, } def __init__( self, raw_dir=None, split="train", construct_format="edge_wt_region_boundary", slic_compactness=30, force_reload=None, verbose=None, transform=None, ): assert split in ["train", "val", "test"], "split not valid." assert construct_format in [ "edge_wt_only_coord", "edge_wt_coord_feat", "edge_wt_region_boundary", ], "construct_format not valid." assert slic_compactness in [10, 30], "slic_compactness not valid." self.construct_format = construct_format self.slic_compactness = slic_compactness self.split = split self.graphs = [] super().__init__( name="COCO-SP", raw_dir=raw_dir, url=self.urls[self.slic_compactness][self.construct_format], force_reload=force_reload, verbose=verbose, transform=transform, ) @property def save_path(self): r"""Directory to save the processed dataset.""" return os.path.join( self.raw_path, "slic_compactness_" + str(self.slic_compactness), self.construct_format, ) @property def raw_data_path(self): r"""Path to save the raw dataset file.""" return os.path.join(self.save_path, f"{self.split}.pickle") @property def graph_path(self): r"""Path to save the processed dataset file.""" return os.path.join(self.save_path, f"processed_{self.split}.pkl") @property def num_classes(self): r"""Number of classes for each node.""" return 81 def __len__(self): r"""The number of examples in the dataset.""" return len(self.graphs) def download(self): zip_file_path = os.path.join( self.raw_path, "coco_superpixels_" + self.construct_format + ".zip" ) path = download(self.url, path=zip_file_path, overwrite=True) extract_archive(path, self.raw_path, overwrite=True) makedirs(self.save_path) os.rename( os.path.join( self.raw_path, "coco_superpixels_" + self.construct_format ), self.save_path, ) os.unlink(path) def label_remap(self): # Util function to remap the labels as the original label # idxs are not contiguous # fmt: off original_label_idx = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90 ] # fmt: on label_map = {} for i, key in enumerate(original_label_idx): label_map[key] = i return label_map def process(self): with open(self.raw_data_path, "rb") as file: graphs = pickle.load(file) label_map = self.label_remap() for idx in tqdm( range(len(graphs)), desc=f"Processing {self.split} dataset" ): graph = graphs[idx] """ Each `graph` is a tuple (x, edge_attr, edge_index, y) Shape of x : [num_nodes, 14] Shape of edge_attr : [num_edges, 1] or [num_edges, 2] Shape of edge_index : [2, num_edges] Shape of y : [num_nodes] """ DGLgraph = dgl_graph( (graph[2][0], graph[2][1]), num_nodes=len(graph[3]), ) DGLgraph.ndata["feat"] = graph[0].to(F.float32) DGLgraph.edata["feat"] = graph[1].to(F.float32) y = F.tensor(graph[3]) # Label remapping. See self.label_remap() func for i, label in enumerate(y): y[i] = label_map[label.item()] DGLgraph.ndata["label"] = y self.graphs.append(DGLgraph) def load(self): with open(self.graph_path, "rb") as file: graphs = pickle.load(file) self.graphs = graphs def save(self): with open(os.path.join(self.graph_path), "wb") as file: pickle.dump(self.graphs, file) def has_cache(self): return os.path.exists(self.graph_path) def __getitem__(self, idx): r"""Get the idx-th sample. Parameters --------- idx : int or tensor The sample index. 1-D tensor as `idx` is allowed when transform is None. Returns ------- :class:`dgl.DGLGraph` graph structure, node features, node labels and edge features. - ``ndata['feat']``: node features - ``ndata['label']``: node labels - ``edata['feat']``: edge features or :class:`dgl.data.utils.Subset` Subset of the dataset at specified indices """ if F.is_tensor(idx) and idx.dim() == 1: if self._transform is None: return Subset(self, idx.cpu()) raise ValueError( "Tensor idx not supported when transform is not None." ) if self._transform is None: return self.graphs[idx] return self._transform(self.graphs[idx]) ================================================ FILE: python/dgl/data/minigc.py ================================================ """A mini synthetic dataset for graph classification benchmark.""" import math import os import networkx as nx import numpy as np from .. import backend as F from ..convert import from_networkx from ..transforms import add_self_loop from .dgl_dataset import DGLDataset from .utils import load_graphs, makedirs, save_graphs __all__ = ["MiniGCDataset"] class MiniGCDataset(DGLDataset): """The synthetic graph classification dataset class. The datset contains 8 different types of graphs. - class 0 : cycle graph - class 1 : star graph - class 2 : wheel graph - class 3 : lollipop graph - class 4 : hypercube graph - class 5 : grid graph - class 6 : clique graph - class 7 : circular ladder graph Parameters ---------- num_graphs: int Number of graphs in this dataset. min_num_v: int Minimum number of nodes for graphs max_num_v: int Maximum number of nodes for graphs seed: int, default is 0 Random seed for data generation transform: callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- num_graphs : int Number of graphs min_num_v : int The minimum number of nodes max_num_v : int The maximum number of nodes num_classes : int The number of classes Examples -------- >>> data = MiniGCDataset(100, 16, 32, seed=0) The dataset instance is an iterable >>> len(data) 100 >>> g, label = data[64] >>> g Graph(num_nodes=20, num_edges=82, ndata_schemes={} edata_schemes={}) >>> label tensor(5) Batch the graphs and labels for mini-batch training >>> graphs, labels = zip(*[data[i] for i in range(16)]) >>> batched_graphs = dgl.batch(graphs) >>> batched_labels = torch.tensor(labels) >>> batched_graphs Graph(num_nodes=356, num_edges=1060, ndata_schemes={} edata_schemes={}) """ def __init__( self, num_graphs, min_num_v, max_num_v, seed=0, save_graph=True, force_reload=False, verbose=False, transform=None, ): self.num_graphs = num_graphs self.min_num_v = min_num_v self.max_num_v = max_num_v self.seed = seed self.save_graph = save_graph super(MiniGCDataset, self).__init__( name="minigc", hash_key=(num_graphs, min_num_v, max_num_v, seed), force_reload=force_reload, verbose=verbose, transform=transform, ) def process(self): self.graphs = [] self.labels = [] self._generate(self.seed) def __len__(self): """Return the number of graphs in the dataset.""" return len(self.graphs) def __getitem__(self, idx): """Get the idx-th sample. Parameters --------- idx : int The sample index. Returns ------- (:class:`dgl.Graph`, Tensor) The graph and its label. """ if self._transform is None: g = self.graphs[idx] else: g = self._transform(self.graphs[idx]) return g, self.labels[idx] def has_cache(self): graph_path = os.path.join( self.save_path, "dgl_graph_{}.bin".format(self.hash) ) if os.path.exists(graph_path): return True return False def save(self): """save the graph list and the labels""" if self.save_graph: graph_path = os.path.join( self.save_path, "dgl_graph_{}.bin".format(self.hash) ) save_graphs(str(graph_path), self.graphs, {"labels": self.labels}) def load(self): graphs, label_dict = load_graphs( os.path.join(self.save_path, "dgl_graph_{}.bin".format(self.hash)) ) self.graphs = graphs self.labels = label_dict["labels"] @property def num_classes(self): """Number of classes.""" return 8 def _generate(self, seed): if seed is not None: np.random.seed(seed) self._gen_cycle(self.num_graphs // 8) self._gen_star(self.num_graphs // 8) self._gen_wheel(self.num_graphs // 8) self._gen_lollipop(self.num_graphs // 8) self._gen_hypercube(self.num_graphs // 8) self._gen_grid(self.num_graphs // 8) self._gen_clique(self.num_graphs // 8) self._gen_circular_ladder(self.num_graphs - len(self.graphs)) # preprocess for i in range(self.num_graphs): # convert to DGLGraph, and add self loops self.graphs[i] = add_self_loop(from_networkx(self.graphs[i])) self.labels = F.tensor(np.array(self.labels).astype(np.int64)) def _gen_cycle(self, n): for _ in range(n): num_v = np.random.randint(self.min_num_v, self.max_num_v) g = nx.cycle_graph(num_v) self.graphs.append(g) self.labels.append(0) def _gen_star(self, n): for _ in range(n): num_v = np.random.randint(self.min_num_v, self.max_num_v) # nx.star_graph(N) gives a star graph with N+1 nodes g = nx.star_graph(num_v - 1) self.graphs.append(g) self.labels.append(1) def _gen_wheel(self, n): for _ in range(n): num_v = np.random.randint(self.min_num_v, self.max_num_v) g = nx.wheel_graph(num_v) self.graphs.append(g) self.labels.append(2) def _gen_lollipop(self, n): for _ in range(n): num_v = np.random.randint(self.min_num_v, self.max_num_v) path_len = np.random.randint(2, num_v // 2) g = nx.lollipop_graph(m=num_v - path_len, n=path_len) self.graphs.append(g) self.labels.append(3) def _gen_hypercube(self, n): for _ in range(n): num_v = np.random.randint(self.min_num_v, self.max_num_v) g = nx.hypercube_graph(int(math.log(num_v, 2))) g = nx.convert_node_labels_to_integers(g) self.graphs.append(g) self.labels.append(4) def _gen_grid(self, n): for _ in range(n): num_v = np.random.randint(self.min_num_v, self.max_num_v) assert num_v >= 4, ( "We require a grid graph to contain at least two " "rows and two columns, thus 4 nodes, got {:d} " "nodes".format(num_v) ) n_rows = np.random.randint(2, num_v // 2) n_cols = num_v // n_rows g = nx.grid_graph([n_rows, n_cols]) g = nx.convert_node_labels_to_integers(g) self.graphs.append(g) self.labels.append(5) def _gen_clique(self, n): for _ in range(n): num_v = np.random.randint(self.min_num_v, self.max_num_v) g = nx.complete_graph(num_v) self.graphs.append(g) self.labels.append(6) def _gen_circular_ladder(self, n): for _ in range(n): num_v = np.random.randint(self.min_num_v, self.max_num_v) g = nx.circular_ladder_graph(num_v // 2) self.graphs.append(g) self.labels.append(7) ================================================ FILE: python/dgl/data/movielens.py ================================================ """MovieLens dataset""" import os import numpy as np import pandas as pd from torch import LongTensor, Tensor from ..base import dgl_warning from ..convert import heterograph from .dgl_dataset import DGLDataset from .utils import ( _get_dgl_url, download, extract_archive, load_graphs, load_info, save_graphs, save_info, split_dataset, ) GENRES_ML_100K = [ "unknown", "Action", "Adventure", "Animation", "Children", "Comedy", "Crime", "Documentary", "Drama", "Fantasy", "Film-Noir", "Horror", "Musical", "Mystery", "Romance", "Sci-Fi", "Thriller", "War", "Western", ] GENRES_ML_1M = GENRES_ML_100K[1:] GENRES_ML_10M = GENRES_ML_100K + ["IMAX"] try: import torch except ImportError: HAS_TORCH = False else: HAS_TORCH = True def check_pytorch(): """Check if PyTorch is the backend.""" if not HAS_TORCH: raise ModuleNotFoundError( "MovieLensDataset requires PyTorch to be the backend." ) class MovieLensDataset(DGLDataset): r"""MovieLens dataset for edge prediction tasks. The raw datasets are extracted from `MovieLens `, introduced by `Movielens unplugged: experiences with an occasionally connected recommender system `. The datasets consist of user ratings for movies and incorporate additional user/movie information in the form of features. The nodes represent users and movies, and the edges store ratings that users assign to movies. Statistics: MovieLens-100K (ml-100k) - Users: 943 - Movies: 1,682 - Ratings: 100,000 (1, 2, 3, 4, 5) MovieLens-1M (ml-1m) - Users: 6,040 - Movies: 3,706 - Ratings: 1,000,209 (1, 2, 3, 4, 5) MovieLens-10M (ml-10m) - Users: 69,878 - Movies: 10,677 - Ratings: 10,000,054 (0.5, 1, 1.5, ..., 4.5, 5.0) Parameters ---------- name: str Dataset name. (:obj:`"ml-100k"`, :obj:`"ml-1m"`, :obj:`"ml-10m"`). valid_ratio: int Ratio of validation samples out of the whole dataset. Should be in (0.0, 1.0). test_ratio: int, optional Ratio of testing samples out of the whole dataset. Should be in (0.0, 1.0). And its sum with :obj:`valid_ratio` should be in (0.0, 1.0) as well. This parameter is invalid when :obj:`name` is :obj:`"ml-100k"`, since its testing samples are pre-specified. Default: None raw_dir : str, optional Raw file directory to download/store the data. Default: ~/.dgl/ force_reload : bool, optional Whether to re-download(if the dataset has not been downloaded) and re-process the dataset. Default: False verbose : bool, optional Whether to print progress information. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. random_state : int, optional Random seed used for random dataset split. Default: 0 Notes ----- - When :obj:`name` is :obj:`"ml-100k"`, the :obj:`test_ratio` is invalid, and the training ratio is equal to 1-:obj:`valid_ratio`. When :obj:`name` is :obj:`"ml-1m"` or :obj:`"ml-10m"`, the :obj:`test_ratio` is valid, and the training ratio is equal to 1-:obj:`valid_ratio`-:obj:`test_ratio`. - The number of edges is doubled to form an undirected(bidirected) graph structure. Examples -------- >>> from dgl.data import MovieLensDataset >>> dataset = MovieLensDataset(name='ml-100k', valid_ratio=0.2) >>> g = dataset[0] >>> g Graph(num_nodes={'movie': 1682, 'user': 943}, num_edges={('movie', 'movie-user', 'user'): 100000, ('user', 'user-movie', 'movie'): 100000}, metagraph=[('movie', 'user', 'movie-user'), ('user', 'movie', 'user-movie')]) >>> # get ratings of edges in the training graph. >>> rate = g.edges['user-movie'].data['rate'] # or rate = g.edges['movie-user'].data['rate'] >>> rate tensor([5., 5., 3., ..., 3., 3., 5.]) >>> # get train, valid and test mask of edges >>> train_mask = g.edges['user-movie'].data['train_mask'] >>> valid_mask = g.edges['user-movie'].data['valid_mask'] >>> test_mask = g.edges['user-movie'].data['test_mask'] >>> # get train, valid and test ratings >>> train_ratings = rate[train_mask] >>> valid_ratings = rate[valid_mask] >>> test_ratings = rate[test_mask] >>> # get input features of users >>> g.nodes["user"].data["feat"] # or g.nodes["movie"].data["feat"] for movie nodes tensor([[0.4800, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], [1.0600, 1.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], [0.4600, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], ..., [0.4000, 0.0000, 1.0000, ..., 0.0000, 0.0000, 0.0000], [0.9600, 1.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000], [0.4400, 0.0000, 1.0000, ..., 0.0000, 0.0000, 0.0000]]) """ _url = { "ml-100k": "dataset/ml-100k.zip", "ml-1m": "dataset/ml-1m.zip", "ml-10m": "dataset/ml-10m.zip", } def __init__( self, name, valid_ratio, test_ratio=None, raw_dir=None, force_reload=None, verbose=None, transform=None, random_state=0, ): check_pytorch() assert name in [ "ml-100k", "ml-1m", "ml-10m", ], f"currently movielens does not support {name}" # test regarding valid and test split ratio assert ( valid_ratio > 0.0 and valid_ratio < 1.0 ), f"valid_ratio {valid_ratio} must be in (0.0, 1.0)" if name in ["ml-1m", "ml-10m"]: assert ( test_ratio is not None and test_ratio > 0.0 and test_ratio < 1.0 ), f"test_ratio({test_ratio}) must be set to a value in (0.0, 1.0) when using ml-1m and ml-10m" assert ( test_ratio + valid_ratio > 0.0 and test_ratio + valid_ratio < 1.0 ), f"test_ratio({test_ratio}) + valid_ratio({valid_ratio}) must be set to (0.0, 1.0) when using ml-1m and ml-10m" if name == "ml-100k" and test_ratio is not None: dgl_warning( f"test_ratio ({test_ratio}) is not set to None for ml-100k. " "Note that dataset split would not be affected by the test_ratio since " "testing samples of ml-100k have been pre-specified." ) self.valid_ratio = valid_ratio self.test_ratio = test_ratio self.random_state = random_state if name == "ml-100k": self.genres = GENRES_ML_100K elif name == "ml-1m": self.genres = GENRES_ML_1M elif name == "ml-10m": self.genres = GENRES_ML_10M else: raise NotImplementedError super(MovieLensDataset, self).__init__( name=name, url=_get_dgl_url(self._url[name]), raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) def check_version(self): valid_ratio, test_ratio = load_info(self.version_path) if self.valid_ratio == valid_ratio and ( self.test_ratio == test_ratio if self.name != "ml-100k" else True ): return True else: if self.name == "ml-100k": print( f"The current valid ratio ({self.valid_ratio}) " "is not the same as the last setting " f"(valid: {valid_ratio}). " f"MovieLens {self.name} will be re-processed with the new dataset split setting." ) else: print( f"At least one of current valid ({self.valid_ratio}) and test ({self.test_ratio}) ratio " "are not the same as the last setting " f"(valid: {valid_ratio}, test: {test_ratio}). " f"MovieLens {self.name} will be re-processed with the new dataset split setting." ) return False def download(self): zip_file_path = os.path.join(self.raw_dir, self.name + ".zip") download(self.url, path=zip_file_path) extract_archive(zip_file_path, self.raw_dir, overwrite=True) def process(self): print(f"Starting processing {self.name} ...") # 0. loading movie features movie_feat = load_info( os.path.join(self.raw_path, "movie_feat.pkl") ).to(torch.float) # 1. dataset split: train + (valid + ) test if self.name == "ml-100k": train_rating_data = self._load_raw_rates( os.path.join(self.raw_path, "u1.base"), "\t" ) test_rating_data = self._load_raw_rates( os.path.join(self.raw_path, "u1.test"), "\t" ) indices = np.arange(len(train_rating_data)) train, valid, _ = split_dataset( indices, [1 - self.valid_ratio, self.valid_ratio, 0.0], shuffle=True, random_state=self.random_state, ) train_rating_data, valid_rating_data = ( train_rating_data.iloc[train.indices], train_rating_data.iloc[valid.indices], ) all_rating_data = pd.concat( [train_rating_data, valid_rating_data, test_rating_data] ) elif self.name == "ml-1m" or self.name == "ml-10m": all_rating_data = self._load_raw_rates( os.path.join(self.raw_path, "ratings.dat"), "::" ) indices = np.arange(len(all_rating_data)) train, valid, test = split_dataset( indices, [ 1 - self.valid_ratio - self.test_ratio, self.valid_ratio, self.test_ratio, ], shuffle=True, random_state=self.random_state, ) train_rating_data, valid_rating_data, test_rating_data = ( all_rating_data.iloc[train.indices], all_rating_data.iloc[valid.indices], all_rating_data.iloc[test.indices], ) # 2. load user and movie data, and drop those unseen in rating_data user_data = self._load_raw_user_data() movie_data = self._load_raw_movie_data() user_data = self._drop_unseen_nodes( data_df=user_data, col_name="id", reserved_ids_set=set(all_rating_data["user_id"].values), ) movie_data = self._drop_unseen_nodes( data_df=movie_data, col_name="id", reserved_ids_set=set(all_rating_data["movie_id"].values), ) user_feat = Tensor(self._process_user_feat(user_data)) # 3. generate rating pairs # Map user/movie to the global id self._global_user_id_map = { ele: i for i, ele in enumerate(user_data["id"]) } self._global_movie_id_map = { ele: i for i, ele in enumerate(movie_data["id"]) } # pair value is idx rather than id u_indices, v_indices, labels = self._generate_pair_value( all_rating_data ) all_rating_pairs = ( LongTensor(u_indices), LongTensor(v_indices), ) all_rating_values = Tensor(labels) graph = self.construct_g( all_rating_pairs, all_rating_values, user_feat, movie_feat ) self.graph = self.add_masks( graph, train_rating_data, valid_rating_data, test_rating_data ) print(f"End processing {self.name} ...") def construct_g(self, rate_pairs, rate_values, user_feat, movie_feat): g = heterograph( { ("user", "user-movie", "movie"): (rate_pairs[0], rate_pairs[1]), ("movie", "movie-user", "user"): (rate_pairs[1], rate_pairs[0]), } ) ndata = {"user": user_feat, "movie": movie_feat} edata = {"user-movie": rate_values, "movie-user": rate_values} g.ndata["feat"] = ndata g.edata["rate"] = edata return g def add_masks( self, g, train_rating_data, valid_rating_data, test_rating_data ): train_u_indices, train_v_indices, _ = self._generate_pair_value( train_rating_data ) valid_u_indices, valid_v_indices, _ = self._generate_pair_value( valid_rating_data ) test_u_indices, test_v_indices, _ = self._generate_pair_value( test_rating_data ) # user-movie train_mask = torch.zeros((g.num_edges("user-movie"),), dtype=torch.bool) train_mask[ g.edge_ids(train_u_indices, train_v_indices, etype="user-movie") ] = True valid_mask = torch.zeros((g.num_edges("user-movie"),), dtype=torch.bool) valid_mask[ g.edge_ids(valid_u_indices, valid_v_indices, etype="user-movie") ] = True test_mask = torch.zeros((g.num_edges("user-movie"),), dtype=torch.bool) test_mask[ g.edge_ids(test_u_indices, test_v_indices, etype="user-movie") ] = True g.edges["user-movie"].data["train_mask"] = train_mask g.edges["user-movie"].data["valid_mask"] = valid_mask g.edges["user-movie"].data["test_mask"] = test_mask # movie-user train_mask_rev = torch.zeros( (g.num_edges("movie-user"),), dtype=torch.bool ) train_mask_rev[ g.edge_ids(train_v_indices, train_u_indices, etype="movie-user") ] = True valid_mask_rev = torch.zeros( (g.num_edges("movie-user"),), dtype=torch.bool ) valid_mask_rev[ g.edge_ids(valid_v_indices, valid_u_indices, etype="movie-user") ] = True test_mask_rev = torch.zeros( (g.num_edges("movie-user"),), dtype=torch.bool ) test_mask_rev[ g.edge_ids(test_v_indices, test_u_indices, etype="movie-user") ] = True g.edges["movie-user"].data["train_mask"] = train_mask_rev g.edges["movie-user"].data["valid_mask"] = valid_mask_rev g.edges["movie-user"].data["test_mask"] = test_mask_rev return g def has_cache(self): if ( os.path.exists(self.graph_path) and os.path.exists(self.version_path) and self.check_version() ): return True return False def save(self): save_graphs(self.graph_path, [self.graph]) save_info(self.version_path, [self.valid_ratio, self.test_ratio]) if self.verbose: print(f"Done saving data into {self.raw_path}.") def load(self): g_list, _ = load_graphs(self.graph_path) self.graph = g_list[0] """ To avoid the problem each time loading boolean tensor from the disk, boolean values would be automatically converted into torch.uint8 types, and a deprecation warning would be raised for using torch.uint8 """ for e in self.graph.etypes: self.graph.edges[e].data["train_mask"] = ( self.graph.edges[e].data["train_mask"].to(torch.bool) ) self.graph.edges[e].data["valid_mask"] = ( self.graph.edges[e].data["valid_mask"].to(torch.bool) ) self.graph.edges[e].data["test_mask"] = ( self.graph.edges[e].data["test_mask"].to(torch.bool) ) def __getitem__(self, idx): assert ( idx == 0 ), "This dataset has only one set of training, validation and testing graph" if self._transform is None: return self.graph else: return self._transform(self.graph) def __len__(self): return 1 @property def raw_path(self): return os.path.join(self.raw_dir, self.name) @property def graph_path(self): return os.path.join(self.raw_path, self.name + ".bin") @property def version_path(self): return os.path.join(self.raw_path, self.name + "_version.pkl") def _process_user_feat(self, user_data): if self.name == "ml-100k" or self.name == "ml-1m": ages = user_data["age"].values.astype(np.float32) gender = (user_data["gender"] == "F").values.astype(np.float32) all_occupations = set(user_data["occupation"]) occupation_map = {ele: i for i, ele in enumerate(all_occupations)} occupation_one_hot = np.zeros( shape=(user_data.shape[0], len(all_occupations)), dtype=np.float32, ) occupation_one_hot[ np.arange(user_data.shape[0]), np.array( [occupation_map[ele] for ele in user_data["occupation"]] ), ] = 1 user_features = np.concatenate( [ ages.reshape((user_data.shape[0], 1)) / 50.0, gender.reshape((user_data.shape[0], 1)), occupation_one_hot, ], axis=1, ) elif self.name == "ml-10m": user_features = np.zeros( shape=(user_data.shape[0], 1), dtype=np.float32 ) else: raise NotImplementedError return user_features def _load_raw_user_data(self): if self.name == "ml-100k": user_data = pd.read_csv( os.path.join(self.raw_path, "u.user"), sep="|", header=None, names=["id", "age", "gender", "occupation", "zip_code"], engine="python", ) elif self.name == "ml-1m": user_data = pd.read_csv( os.path.join(self.raw_path, "users.dat"), sep="::", header=None, names=["id", "gender", "age", "occupation", "zip_code"], engine="python", ) elif self.name == "ml-10m": rating_info = pd.read_csv( os.path.join(self.raw_path, "ratings.dat"), sep="::", header=None, names=["user_id", "movie_id", "rating", "timestamp"], dtype={ "user_id": np.int32, "movie_id": np.int32, "ratings": np.float32, "timestamp": np.int64, }, engine="python", ) user_data = pd.DataFrame( np.unique(rating_info["user_id"].values.astype(np.int32)), columns=["id"], ) else: raise NotImplementedError return user_data def _load_raw_movie_data(self): file_path = os.path.join(self.raw_path, "u.item") if self.name == "ml-100k": movie_data = pd.read_csv( file_path, sep="|", header=None, names=[ "id", "title", "release_date", "video_release_date", "url", ] + GENRES_ML_100K, engine="python", encoding="ISO-8859-1", ) elif self.name == "ml-1m" or self.name == "ml-10m": file_path = os.path.join(self.raw_path, "movies.dat") movie_data = pd.read_csv( file_path, sep="::", header=None, names=["id", "title", "genres"], encoding="iso-8859-1", engine="python", ) genre_map = {ele: i for i, ele in enumerate(self.genres)} genre_map["Children's"] = genre_map["Children"] genre_map["Childrens"] = genre_map["Children"] movie_genres = np.zeros( shape=(movie_data.shape[0], len(self.genres)), dtype=np.float32 ) for i, genres in enumerate(movie_data["genres"]): for ele in genres.split("|"): if ele in genre_map: movie_genres[i, genre_map[ele]] = 1.0 else: movie_genres[i, genre_map["unknown"]] = 1.0 for idx, genre_name in enumerate(self.genres): movie_data[genre_name] = movie_genres[:, idx] movie_data = movie_data.drop(columns=["genres"]) else: raise NotImplementedError return movie_data def _load_raw_rates(self, file_path, sep): rating_data = pd.read_csv( file_path, sep=sep, header=None, names=["user_id", "movie_id", "rating", "timestamp"], dtype={ "user_id": np.int32, "movie_id": np.int32, "ratings": np.float32, "timestamp": np.int64, }, engine="python", ) rating_data = rating_data.reset_index(drop=True) return rating_data def _drop_unseen_nodes(self, data_df, col_name, reserved_ids_set): data_df = data_df[data_df[col_name].isin(reserved_ids_set)] data_df.reset_index(drop=True, inplace=True) return data_df def _generate_pair_value(self, rating_data): rating_pairs = ( np.array( [ self._global_user_id_map[ele] for ele in rating_data["user_id"] ], dtype=np.int32, ), np.array( [ self._global_movie_id_map[ele] for ele in rating_data["movie_id"] ], dtype=np.int32, ), ) rating_values = rating_data["rating"].values.astype(np.float32) return rating_pairs[0], rating_pairs[1], rating_values def __repr__(self): return ( f'Dataset("{self.name}", num_graphs={len(self)},' + f" save_path={self.raw_path}), valid_ratio={self.valid_ratio}, test_ratio={self.test_ratio}" ) ================================================ FILE: python/dgl/data/pattern.py ================================================ """ PATTERNDataset for inductive learning. """ import os from .dgl_dataset import DGLBuiltinDataset from .utils import _get_dgl_url, load_graphs class PATTERNDataset(DGLBuiltinDataset): r"""PATTERN dataset for graph pattern recognition task. Each graph G contains 5 communities with sizes randomly selected between [5, 35]. The SBM of each community is p = 0.5, q = 0.35, and the node features on G are generated with a uniform random distribution with a vocabulary of size 3, i.e. {0, 1, 2}. Then randomly generate 100 patterns P composed of 20 nodes with intra-probability :math:`p_P` = 0.5 and extra-probability :math:`q_P` = 0.5 (i.e. 50% of nodes in P are connected to G). The node features for P are also generated as a random signal with values {0, 1, 2}. The graphs are of sizes 44-188 nodes. The output node labels have value 1 if the node belongs to P and value 0 if it is in G. Reference ``_ Statistics: - Train examples: 10,000 - Valid examples: 2,000 - Test examples: 2,000 - Number of classes for each node: 2 Parameters ---------- mode : str Must be one of ('train', 'valid', 'test'). Default: 'train' raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: False transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- num_classes : int Number of classes for each node. Examples -------- >>> from dgl.data import PATTERNDataset >>> data = PATTERNDataset(mode='train') >>> data.num_classes 2 >>> len(trainset) 10000 >>> data[0] Graph(num_nodes=108, num_edges=4884, ndata_schemes={'feat': Scheme(shape=(), dtype=torch.int64), 'label': Scheme(shape=(), dtype=torch.int16)} edata_schemes={'feat': Scheme(shape=(1,), dtype=torch.float32)}) """ def __init__( self, mode="train", raw_dir=None, force_reload=False, verbose=False, transform=None, ): assert mode in ["train", "valid", "test"] self.mode = mode _url = _get_dgl_url("dataset/SBM_PATTERN.zip") super(PATTERNDataset, self).__init__( name="pattern", url=_url, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) def process(self): self.load() @property def graph_path(self): return os.path.join( self.save_path, "SBM_PATTERN_{}.bin".format(self.mode) ) def has_cache(self): return os.path.exists(self.graph_path) def load(self): self._graphs, _ = load_graphs(self.graph_path) @property def num_classes(self): r"""Number of classes for each node.""" return 2 def __len__(self): r"""The number of examples in the dataset.""" return len(self._graphs) def __getitem__(self, idx): r"""Get the idx^th sample. Parameters --------- idx : int The sample index. Returns ------- :class:`dgl.DGLGraph` graph structure, node features, node labels and edge features. - ``ndata['feat']``: node features - ``ndata['label']``: node labels - ``edata['feat']``: edge features """ if self._transform is None: return self._graphs[idx] else: return self._transform(self._graphs[idx]) ================================================ FILE: python/dgl/data/ppi.py ================================================ """ PPIDataset for inductive learning. """ import json import os import networkx as nx import numpy as np from networkx.readwrite import json_graph from .. import backend as F from ..convert import from_networkx from .dgl_dataset import DGLBuiltinDataset from .utils import _get_dgl_url, load_graphs, load_info, save_graphs, save_info class PPIDataset(DGLBuiltinDataset): r"""Protein-Protein Interaction dataset for inductive node classification A toy Protein-Protein Interaction network dataset. The dataset contains 24 graphs. The average number of nodes per graph is 2372. Each node has 50 features and 121 labels. 20 graphs for training, 2 for validation and 2 for testing. Reference: ``_ Statistics: - Train examples: 20 - Valid examples: 2 - Test examples: 2 Parameters ---------- mode : str Must be one of ('train', 'valid', 'test'). Default: 'train' raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- num_labels : int Number of labels for each node labels : Tensor Node labels features : Tensor Node features Examples -------- >>> dataset = PPIDataset(mode='valid') >>> num_classes = dataset.num_classes >>> for g in dataset: .... feat = g.ndata['feat'] .... label = g.ndata['label'] .... # your code here >>> """ def __init__( self, mode="train", raw_dir=None, force_reload=False, verbose=False, transform=None, ): assert mode in ["train", "valid", "test"] self.mode = mode _url = _get_dgl_url("dataset/ppi.zip") super(PPIDataset, self).__init__( name="ppi", url=_url, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) def process(self): graph_file = os.path.join( self.save_path, "{}_graph.json".format(self.mode) ) label_file = os.path.join( self.save_path, "{}_labels.npy".format(self.mode) ) feat_file = os.path.join( self.save_path, "{}_feats.npy".format(self.mode) ) graph_id_file = os.path.join( self.save_path, "{}_graph_id.npy".format(self.mode) ) g_data = json.load(open(graph_file)) self._labels = np.load(label_file) self._feats = np.load(feat_file) self.graph = from_networkx( nx.DiGraph(json_graph.node_link_graph(g_data)) ) graph_id = np.load(graph_id_file) # lo, hi means the range of graph ids for different portion of the dataset, # 20 graphs for training, 2 for validation and 2 for testing. lo, hi = 1, 21 if self.mode == "valid": lo, hi = 21, 23 elif self.mode == "test": lo, hi = 23, 25 graph_masks = [] self.graphs = [] for g_id in range(lo, hi): g_mask = np.where(graph_id == g_id)[0] graph_masks.append(g_mask) g = self.graph.subgraph(g_mask) g.ndata["feat"] = F.tensor( self._feats[g_mask], dtype=F.data_type_dict["float32"] ) g.ndata["label"] = F.tensor( self._labels[g_mask], dtype=F.data_type_dict["float32"] ) self.graphs.append(g) @property def graph_list_path(self): return os.path.join( self.save_path, "{}_dgl_graph_list.bin".format(self.mode) ) @property def g_path(self): return os.path.join( self.save_path, "{}_dgl_graph.bin".format(self.mode) ) @property def info_path(self): return os.path.join(self.save_path, "{}_info.pkl".format(self.mode)) def has_cache(self): return ( os.path.exists(self.graph_list_path) and os.path.exists(self.g_path) and os.path.exists(self.info_path) ) def save(self): save_graphs(self.graph_list_path, self.graphs) save_graphs(self.g_path, self.graph) save_info( self.info_path, {"labels": self._labels, "feats": self._feats} ) def load(self): self.graphs = load_graphs(self.graph_list_path)[0] g, _ = load_graphs(self.g_path) self.graph = g[0] info = load_info(self.info_path) self._labels = info["labels"] self._feats = info["feats"] @property def num_labels(self): return 121 @property def num_classes(self): return 121 def __len__(self): """Return number of samples in this dataset.""" return len(self.graphs) def __getitem__(self, item): """Get the item^th sample. Parameters --------- item : int The sample index. Returns ------- :class:`dgl.DGLGraph` graph structure, node features and node labels. - ``ndata['feat']``: node features - ``ndata['label']``: node labels """ if self._transform is None: return self.graphs[item] else: return self._transform(self.graphs[item]) class LegacyPPIDataset(PPIDataset): """Legacy version of PPI Dataset""" def __getitem__(self, item): """Get the item^th sample. Paramters --------- idx : int The sample index. Returns ------- (dgl.DGLGraph, Tensor, Tensor) The graph, features and its label. """ if self._transform is None: g = self.graphs[item] else: g = self._transform(self.graphs[item]) return g, g.ndata["feat"], g.ndata["label"] ================================================ FILE: python/dgl/data/qm7b.py ================================================ """QM7b dataset for graph property prediction (regression).""" import os from scipy import io from .. import backend as F from ..convert import graph as dgl_graph from .dgl_dataset import DGLDataset from .utils import check_sha1, download, load_graphs, save_graphs class QM7bDataset(DGLDataset): r"""QM7b dataset for graph property prediction (regression) This dataset consists of 7,211 molecules with 14 regression targets. Nodes means atoms and edges means bonds. Edge data 'h' means the entry of Coulomb matrix. Reference: ``_ Statistics: - Number of graphs: 7,211 - Number of regression targets: 14 - Average number of nodes: 15 - Average number of edges: 245 - Edge feature size: 1 Parameters ---------- raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- num_tasks : int Number of prediction tasks num_labels : int (DEPRECATED, use num_tasks instead) Number of prediction tasks Raises ------ UserWarning If the raw data is changed in the remote server by the author. Examples -------- >>> data = QM7bDataset() >>> data.num_tasks 14 >>> >>> # iterate over the dataset >>> for g, label in data: ... edge_feat = g.edata['h'] # get edge feature ... # your code here... ... >>> """ _url = ( "http://deepchem.io.s3-website-us-west-1.amazonaws.com/" "datasets/qm7b.mat" ) _sha1_str = "4102c744bb9d6fd7b40ac67a300e49cd87e28392" def __init__( self, raw_dir=None, force_reload=False, verbose=False, transform=None ): super(QM7bDataset, self).__init__( name="qm7b", url=self._url, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) def process(self): mat_path = os.path.join(self.raw_dir, self.name + ".mat") self.graphs, self.label = self._load_graph(mat_path) def _load_graph(self, filename): data = io.loadmat(filename) labels = F.tensor(data["T"], dtype=F.data_type_dict["float32"]) feats = data["X"] num_graphs = labels.shape[0] graphs = [] for i in range(num_graphs): edge_list = feats[i].nonzero() g = dgl_graph(edge_list) g.edata["h"] = F.tensor( feats[i][edge_list[0], edge_list[1]].reshape(-1, 1), dtype=F.data_type_dict["float32"], ) graphs.append(g) return graphs, labels def save(self): """save the graph list and the labels""" graph_path = os.path.join(self.save_path, "dgl_graph.bin") save_graphs(str(graph_path), self.graphs, {"labels": self.label}) def has_cache(self): graph_path = os.path.join(self.save_path, "dgl_graph.bin") return os.path.exists(graph_path) def load(self): graphs, label_dict = load_graphs( os.path.join(self.save_path, "dgl_graph.bin") ) self.graphs = graphs self.label = label_dict["labels"] def download(self): file_path = os.path.join(self.raw_dir, self.name + ".mat") download(self.url, path=file_path) if not check_sha1(file_path, self._sha1_str): raise UserWarning( "File {} is downloaded but the content hash does not match." "The repo may be outdated or download may be incomplete. " "Otherwise you can create an issue for it.".format(self.name) ) @property def num_tasks(self): """Number of prediction tasks.""" return self.num_labels @property def num_labels(self): """Number of prediction tasks.""" return 14 @property def num_classes(self): """Number of prediction tasks.""" return 14 def __getitem__(self, idx): r"""Get graph and label by index Parameters ---------- idx : int Item index Returns ------- (:class:`dgl.DGLGraph`, Tensor) """ if self._transform is None: g = self.graphs[idx] else: g = self._transform(self.graphs[idx]) return g, self.label[idx] def __len__(self): r"""Number of graphs in the dataset. Return ------- int """ return len(self.graphs) QM7b = QM7bDataset ================================================ FILE: python/dgl/data/qm9.py ================================================ """QM9 dataset for graph property prediction (regression).""" import os import numpy as np import scipy.sparse as sp from .. import backend as F from ..convert import graph as dgl_graph from ..transforms import to_bidirected from .dgl_dataset import DGLDataset from .utils import _get_dgl_url, download class QM9Dataset(DGLDataset): r"""QM9 dataset for graph property prediction (regression) This dataset consists of 130,831 molecules with 12 regression targets. Nodes correspond to atoms and edges correspond to close atom pairs. This dataset differs from :class:`~dgl.data.QM9EdgeDataset` in the following aspects: 1. Edges in this dataset are purely distance-based. 2. It only provides atoms' coordinates and atomic numbers as node features 3. It only provides 12 regression targets. Reference: - `"Quantum-Machine.org" `_, - `"Directional Message Passing for Molecular Graphs" `_ Statistics: - Number of graphs: 130,831 - Number of regression targets: 12 +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | Keys | Property | Description | Unit | +========+==================================+===================================================================================+=============================================+ | mu | :math:`\mu` | Dipole moment | :math:`\textrm{D}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | alpha | :math:`\alpha` | Isotropic polarizability | :math:`{a_0}^3` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | homo | :math:`\epsilon_{\textrm{HOMO}}` | Highest occupied molecular orbital energy | :math:`\textrm{eV}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | lumo | :math:`\epsilon_{\textrm{LUMO}}` | Lowest unoccupied molecular orbital energy | :math:`\textrm{eV}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | gap | :math:`\Delta \epsilon` | Gap between :math:`\epsilon_{\textrm{HOMO}}` and :math:`\epsilon_{\textrm{LUMO}}` | :math:`\textrm{eV}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | r2 | :math:`\langle R^2 \rangle` | Electronic spatial extent | :math:`{a_0}^2` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | zpve | :math:`\textrm{ZPVE}` | Zero point vibrational energy | :math:`\textrm{eV}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | U0 | :math:`U_0` | Internal energy at 0K | :math:`\textrm{eV}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | U | :math:`U` | Internal energy at 298.15K | :math:`\textrm{eV}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | H | :math:`H` | Enthalpy at 298.15K | :math:`\textrm{eV}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | G | :math:`G` | Free energy at 298.15K | :math:`\textrm{eV}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | Cv | :math:`c_{\textrm{v}}` | Heat capavity at 298.15K | :math:`\frac{\textrm{cal}}{\textrm{mol K}}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ Parameters ---------- label_keys : list Names of the regression property, which should be a subset of the keys in the table above. cutoff : float Cutoff distance for interatomic interactions, i.e. two atoms are connected in the corresponding graph if the distance between them is no larger than this. Default: 5.0 Angstrom raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- num_tasks : int Number of prediction tasks num_labels : int (DEPRECATED, use num_tasks instead) Number of prediction tasks Raises ------ UserWarning If the raw data is changed in the remote server by the author. Examples -------- >>> data = QM9Dataset(label_keys=['mu', 'gap'], cutoff=5.0) >>> data.num_tasks 2 >>> >>> # iterate over the dataset >>> for g, label in data: ... R = g.ndata['R'] # get coordinates of each atom ... Z = g.ndata['Z'] # get atomic numbers of each atom ... # your code here... >>> """ def __init__( self, label_keys, cutoff=5.0, raw_dir=None, force_reload=False, verbose=False, transform=None, ): self.cutoff = cutoff self.label_keys = label_keys self._url = _get_dgl_url("dataset/qm9_eV.npz") super(QM9Dataset, self).__init__( name="qm9", url=self._url, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) def process(self): npz_path = f"{self.raw_dir}/qm9_eV.npz" data_dict = np.load(npz_path, allow_pickle=True) # data_dict['N'] contains the number of atoms in each molecule. # Atomic properties (Z and R) of all molecules are concatenated as single tensors, # so you need this value to select the correct atoms for each molecule. self.N = data_dict["N"] self.R = data_dict["R"] self.Z = data_dict["Z"] self.label = np.stack( [data_dict[key] for key in self.label_keys], axis=1 ) self.N_cumsum = np.concatenate([[0], np.cumsum(self.N)]) def download(self): file_path = f"{self.raw_dir}/qm9_eV.npz" if not os.path.exists(file_path): download(self._url, path=file_path) @property def num_labels(self): r""" Returns -------- int Number of prediction tasks. """ return self.label.shape[1] @property def num_classes(self): r""" Returns -------- int Number of prediction tasks. """ return self.label.shape[1] @property def num_tasks(self): r""" Returns -------- int Number of prediction tasks. """ return self.label.shape[1] def __getitem__(self, idx): r"""Get graph and label by index Parameters ---------- idx : int Item index Returns ------- dgl.DGLGraph The graph contains: - ``ndata['R']``: the coordinates of each atom - ``ndata['Z']``: the atomic number Tensor Property values of molecular graphs """ label = F.tensor(self.label[idx], dtype=F.data_type_dict["float32"]) n_atoms = self.N[idx] R = self.R[self.N_cumsum[idx] : self.N_cumsum[idx + 1]] dist = np.linalg.norm(R[:, None, :] - R[None, :, :], axis=-1) adj = sp.csr_matrix(dist <= self.cutoff) - sp.eye( n_atoms, dtype=np.bool_ ) adj = adj.tocoo() u, v = F.tensor(adj.row), F.tensor(adj.col) g = dgl_graph((u, v)) g = to_bidirected(g) g.ndata["R"] = F.tensor(R, dtype=F.data_type_dict["float32"]) g.ndata["Z"] = F.tensor( self.Z[self.N_cumsum[idx] : self.N_cumsum[idx + 1]], dtype=F.data_type_dict["int64"], ) if self._transform is not None: g = self._transform(g) return g, label def __len__(self): r"""Number of graphs in the dataset. Return ------- int """ return self.label.shape[0] QM9 = QM9Dataset ================================================ FILE: python/dgl/data/qm9_edge.py ================================================ """ QM9 dataset for graph property prediction (regression) """ import os import numpy as np from .. import backend as F from ..convert import graph as dgl_graph from .dgl_dataset import DGLDataset from .utils import _get_dgl_url, download, extract_archive class QM9EdgeDataset(DGLDataset): r"""QM9Edge dataset for graph property prediction (regression) This dataset consists of 130,831 molecules with 19 regression targets. Nodes correspond to atoms and edges correspond to bonds. This dataset differs from :class:`~dgl.data.QM9Dataset` in the following aspects: 1. It includes the bonds in a molecule in the edges of the corresponding graph while the edges in :class:`~dgl.data.QM9Dataset` are purely distance-based. 2. It provides edge features, and node features in addition to the atoms' coordinates and atomic numbers. 3. It provides another 7 regression tasks(from 12 to 19). This class is built based on a preprocessed version of the dataset, and we provide the preprocessing datails `here `_. Reference: - `"MoleculeNet: A Benchmark for Molecular Machine Learning" `_ - `"Neural Message Passing for Quantum Chemistry" `_ For Statistics: - Number of graphs: 130,831. - Number of regression targets: 19. Node attributes: - pos: the 3D coordinates of each atom. - attr: the 11D atom features. Edge attributes: - edge_attr: the 4D bond features. Regression targets: +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | Keys | Property | Description | Unit | +========+==================================+===================================================================================+=============================================+ | mu | :math:`\mu` | Dipole moment | :math:`\textrm{D}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | alpha | :math:`\alpha` | Isotropic polarizability | :math:`{a_0}^3` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | homo | :math:`\epsilon_{\textrm{HOMO}}` | Highest occupied molecular orbital energy | :math:`\textrm{eV}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | lumo | :math:`\epsilon_{\textrm{LUMO}}` | Lowest unoccupied molecular orbital energy | :math:`\textrm{eV}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | gap | :math:`\Delta \epsilon` | Gap between :math:`\epsilon_{\textrm{HOMO}}` and :math:`\epsilon_{\textrm{LUMO}}` | :math:`\textrm{eV}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | r2 | :math:`\langle R^2 \rangle` | Electronic spatial extent | :math:`{a_0}^2` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | zpve | :math:`\textrm{ZPVE}` | Zero point vibrational energy | :math:`\textrm{eV}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | U0 | :math:`U_0` | Internal energy at 0K | :math:`\textrm{eV}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | U | :math:`U` | Internal energy at 298.15K | :math:`\textrm{eV}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | H | :math:`H` | Enthalpy at 298.15K | :math:`\textrm{eV}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | G | :math:`G` | Free energy at 298.15K | :math:`\textrm{eV}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | Cv | :math:`c_{\textrm{v}}` | Heat capavity at 298.15K | :math:`\frac{\textrm{cal}}{\textrm{mol K}}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | U0_atom| :math:`U_0^{\textrm{ATOM}}` | Atomization energy at 0K | :math:`\textrm{eV}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | U_atom | :math:`U^{\textrm{ATOM}}` | Atomization energy at 298.15K | :math:`\textrm{eV}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | H_atom | :math:`H^{\textrm{ATOM}}` | Atomization enthalpy at 298.15K | :math:`\textrm{eV}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | G_atom | :math:`G^{\textrm{ATOM}}` | Atomization free energy at 298.15K | :math:`\textrm{eV}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | A | :math:`A` | Rotational constant | :math:`\textrm{GHz}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | B | :math:`B` | Rotational constant | :math:`\textrm{GHz}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ | C | :math:`C` | Rotational constant | :math:`\textrm{GHz}` | +--------+----------------------------------+-----------------------------------------------------------------------------------+---------------------------------------------+ Parameters ---------- label_keys : list Names of the regression property, which should be a subset of the keys in the table above. If not provided, it will load all the labels. raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False. verbose : bool Whether to print out progress information. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- num_tasks : int Number of prediction tasks num_labels : int (DEPRECATED, use num_tasks instead) Number of prediction tasks Raises ------ UserWarning If the raw data is changed in the remote server by the author. Examples -------- >>> data = QM9EdgeDataset(label_keys=['mu', 'alpha']) >>> data.num_tasks 2 >>> # iterate over the dataset >>> for graph, labels in data: ... print(graph) # get information of each graph ... print(labels) # get labels of the corresponding graph ... # your code here... >>> """ keys = [ "mu", "alpha", "homo", "lumo", "gap", "r2", "zpve", "U0", "U", "H", "G", "Cv", "U0_atom", "U_atom", "H_atom", "G_atom", "A", "B", "C", ] map_dict = {} for i, key in enumerate(keys): map_dict[key] = i def __init__( self, label_keys=None, raw_dir=None, force_reload=False, verbose=True, transform=None, ): if label_keys is None: self.label_keys = None self.num_labels = 19 else: self.label_keys = [self.map_dict[i] for i in label_keys] self.num_labels = len(label_keys) self._url = _get_dgl_url("dataset/qm9_edge.npz") super(QM9EdgeDataset, self).__init__( name="qm9Edge", raw_dir=raw_dir, url=self._url, force_reload=force_reload, verbose=verbose, transform=transform, ) def download(self): if not os.path.exists(self.npz_path): download(self._url, path=self.npz_path) def process(self): self.load() @property def npz_path(self): return f"{self.raw_dir}/qm9_edge.npz" def has_cache(self): return os.path.exists(self.npz_path) def save(self): np.savez_compressed( self.npz_path, n_node=self.n_node, n_edge=self.n_edge, node_attr=self.node_attr, node_pos=self.node_pos, edge_attr=self.edge_attr, src=self.src, dst=self.dst, targets=self.targets, ) def load(self): data_dict = np.load(self.npz_path, allow_pickle=True) self.n_node = data_dict["n_node"] self.n_edge = data_dict["n_edge"] self.node_attr = data_dict["node_attr"] self.node_pos = data_dict["node_pos"] self.edge_attr = data_dict["edge_attr"] self.targets = data_dict["targets"] self.src = data_dict["src"] self.dst = data_dict["dst"] self.n_cumsum = np.concatenate([[0], np.cumsum(self.n_node)]) self.ne_cumsum = np.concatenate([[0], np.cumsum(self.n_edge)]) def __getitem__(self, idx): r"""Get graph and label by index Parameters ---------- idx : int Item index Returns ------- dgl.DGLGraph The graph contains: - ``ndata['pos']``: the coordinates of each atom - ``ndata['attr']``: the features of each atom - ``edata['edge_attr']``: the features of each bond Tensor Property values of molecular graphs """ pos = self.node_pos[self.n_cumsum[idx] : self.n_cumsum[idx + 1]] src = self.src[self.ne_cumsum[idx] : self.ne_cumsum[idx + 1]] dst = self.dst[self.ne_cumsum[idx] : self.ne_cumsum[idx + 1]] g = dgl_graph((src, dst)) g.ndata["pos"] = F.tensor(pos, dtype=F.data_type_dict["float32"]) g.ndata["attr"] = F.tensor( self.node_attr[self.n_cumsum[idx] : self.n_cumsum[idx + 1]], dtype=F.data_type_dict["float32"], ) g.edata["edge_attr"] = F.tensor( self.edge_attr[self.ne_cumsum[idx] : self.ne_cumsum[idx + 1]], dtype=F.data_type_dict["float32"], ) label = F.tensor( self.targets[idx][self.label_keys], dtype=F.data_type_dict["float32"], ) if self._transform is not None: g = self._transform(g) return g, label def __len__(self): r"""Number of graphs in the dataset. Returns ------- int """ return self.n_node.shape[0] @property def num_tasks(self): r""" Returns ------- int Number of prediction tasks """ return self.num_labels QM9Edge = QM9EdgeDataset ================================================ FILE: python/dgl/data/rdf.py ================================================ """RDF datasets Datasets from "A Collection of Benchmark Datasets for Systematic Evaluations of Machine Learning on the Semantic Web" """ import abc import itertools import os import re from collections import OrderedDict import networkx as nx import numpy as np import dgl import dgl.backend as F from .dgl_dataset import DGLBuiltinDataset from .utils import ( _get_dgl_url, generate_mask_tensor, idx2mask, load_graphs, load_info, save_graphs, save_info, ) __all__ = ["AIFBDataset", "MUTAGDataset", "BGSDataset", "AMDataset"] # Dictionary for renaming reserved node/edge type names to the ones # that are allowed by nn.Module. RENAME_DICT = { "type": "rdftype", "rev-type": "rev-rdftype", } class Entity: """Class for entities Parameters ---------- id : str ID of this entity cls : str Type of this entity """ def __init__(self, e_id, cls): self.id = e_id self.cls = cls def __str__(self): return "{}/{}".format(self.cls, self.id) class Relation: """Class for relations Parameters ---------- cls : str Type of this relation """ def __init__(self, cls): self.cls = cls def __str__(self): return str(self.cls) class RDFGraphDataset(DGLBuiltinDataset): """Base graph dataset class from RDF tuples. To derive from this, implement the following abstract methods: * ``parse_entity`` * ``parse_relation`` * ``process_tuple`` * ``process_idx_file_line`` * ``predict_category`` Preprocessed graph and other data will be cached in the download folder to speedup data loading. The dataset should contain a "trainingSet.tsv" and a "testSet.tsv" file for training and testing samples. Attributes ---------- num_classes : int Number of classes to predict predict_category : str The entity category (node type) that has labels for prediction Parameters ---------- name : str Name of the dataset url : str or path URL to download the raw dataset. predict_category : str Predict category. print_every : int, optional Preprocessing log for every X tuples. insert_reverse : bool, optional If true, add reverse edge and reverse relations to the final graph. raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool, optional If true, force load and process from raw data. Ignore cached pre-processed data. verbose : bool Whether to print out progress information. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. """ def __init__( self, name, url, predict_category, print_every=10000, insert_reverse=True, raw_dir=None, force_reload=False, verbose=True, transform=None, ): self._insert_reverse = insert_reverse self._print_every = print_every self._predict_category = predict_category super(RDFGraphDataset, self).__init__( name, url, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) def process(self): raw_tuples = self.load_raw_tuples(self.raw_path) self.process_raw_tuples(raw_tuples, self.raw_path) def load_raw_tuples(self, root_path): """Loading raw RDF dataset Parameters ---------- root_path : str Root path containing the data Returns ------- Loaded rdf data """ import rdflib as rdf raw_rdf_graphs = [] for _, filename in enumerate(os.listdir(root_path)): fmt = None if filename.endswith("nt"): fmt = "nt" elif filename.endswith("n3"): fmt = "n3" if fmt is None: continue g = rdf.Graph() print("Parsing file %s ..." % filename) g.parse(os.path.join(root_path, filename), format=fmt) raw_rdf_graphs.append(g) return itertools.chain(*raw_rdf_graphs) def process_raw_tuples(self, raw_tuples, root_path): """Processing raw RDF dataset Parameters ---------- raw_tuples: Raw rdf tuples root_path: str Root path containing the data """ mg = nx.MultiDiGraph() ent_classes = OrderedDict() rel_classes = OrderedDict() entities = OrderedDict() src = [] dst = [] ntid = [] etid = [] sorted_tuples = [] for t in raw_tuples: sorted_tuples.append(t) sorted_tuples.sort() for i, (sbj, pred, obj) in enumerate(sorted_tuples): if self.verbose and i % self._print_every == 0: print( "Processed %d tuples, found %d valid tuples." % (i, len(src)) ) sbjent = self.parse_entity(sbj) rel = self.parse_relation(pred) objent = self.parse_entity(obj) processed = self.process_tuple( (sbj, pred, obj), sbjent, rel, objent ) if processed is None: # ignored continue # meta graph sbjclsid = _get_id(ent_classes, sbjent.cls) objclsid = _get_id(ent_classes, objent.cls) relclsid = _get_id(rel_classes, rel.cls) mg.add_edge(sbjent.cls, objent.cls, key=rel.cls) if self._insert_reverse: mg.add_edge(objent.cls, sbjent.cls, key="rev-%s" % rel.cls) # instance graph src_id = _get_id(entities, str(sbjent)) if len(entities) > len(ntid): # found new entity ntid.append(sbjclsid) dst_id = _get_id(entities, str(objent)) if len(entities) > len(ntid): # found new entity ntid.append(objclsid) src.append(src_id) dst.append(dst_id) etid.append(relclsid) src = np.asarray(src) dst = np.asarray(dst) ntid = np.asarray(ntid) etid = np.asarray(etid) ntypes = list(ent_classes.keys()) etypes = list(rel_classes.keys()) # add reverse edge with reverse relation if self._insert_reverse: if self.verbose: print("Adding reverse edges ...") newsrc = np.hstack([src, dst]) newdst = np.hstack([dst, src]) src = newsrc dst = newdst etid = np.hstack([etid, etid + len(etypes)]) etypes.extend(["rev-%s" % t for t in etypes]) hg = self.build_graph(mg, src, dst, ntid, etid, ntypes, etypes) if self.verbose: print("Load training/validation/testing split ...") idmap = F.asnumpy(hg.nodes[self.predict_category].data[dgl.NID]) glb2lcl = {glbid: lclid for lclid, glbid in enumerate(idmap)} def findidfn(ent): if ent not in entities: return None else: return glb2lcl[entities[ent]] self._hg = hg train_idx, test_idx, labels, num_classes = self.load_data_split( findidfn, root_path ) train_mask = idx2mask( train_idx, self._hg.num_nodes(self.predict_category) ) test_mask = idx2mask( test_idx, self._hg.num_nodes(self.predict_category) ) labels = F.tensor(labels, F.data_type_dict["int64"]) train_mask = generate_mask_tensor(train_mask) test_mask = generate_mask_tensor(test_mask) self._hg.nodes[self.predict_category].data["train_mask"] = train_mask self._hg.nodes[self.predict_category].data["test_mask"] = test_mask # TODO(minjie): Deprecate 'labels', use 'label' for consistency. self._hg.nodes[self.predict_category].data["labels"] = labels self._hg.nodes[self.predict_category].data["label"] = labels self._num_classes = num_classes def build_graph(self, mg, src, dst, ntid, etid, ntypes, etypes): """Build the graphs Parameters ---------- mg: MultiDiGraph Input graph src: Numpy array Source nodes dst: Numpy array Destination nodes ntid: Numpy array Node types for each node etid: Numpy array Edge types for each edge ntypes: list Node types etypes: list Edge types Returns ------- g: DGLGraph """ # create homo graph if self.verbose: print("Creating one whole graph ...") g = dgl.graph((src, dst)) g.ndata[dgl.NTYPE] = F.tensor(ntid) g.edata[dgl.ETYPE] = F.tensor(etid) if self.verbose: print("Total #nodes:", g.num_nodes()) print("Total #edges:", g.num_edges()) # rename names such as 'type' so that they an be used as keys # to nn.ModuleDict etypes = [RENAME_DICT.get(ty, ty) for ty in etypes] mg_edges = mg.edges(keys=True) mg = nx.MultiDiGraph() for sty, dty, ety in mg_edges: mg.add_edge(sty, dty, key=RENAME_DICT.get(ety, ety)) # convert to heterograph if self.verbose: print("Convert to heterograph ...") hg = dgl.to_heterogeneous(g, ntypes, etypes, metagraph=mg) if self.verbose: print("#Node types:", len(hg.ntypes)) print("#Canonical edge types:", len(hg.etypes)) print("#Unique edge type names:", len(set(hg.etypes))) return hg def load_data_split(self, ent2id, root_path): """Load data split Parameters ---------- ent2id: func A function mapping entity to id root_path: str Root path containing the data Return ------ train_idx: Numpy array Training set test_idx: Numpy array Testing set labels: Numpy array Labels num_classes: int Number of classes """ label_dict = {} labels = np.zeros((self._hg.num_nodes(self.predict_category),)) - 1 train_idx = self.parse_idx_file( os.path.join(root_path, "trainingSet.tsv"), ent2id, label_dict, labels, ) test_idx = self.parse_idx_file( os.path.join(root_path, "testSet.tsv"), ent2id, label_dict, labels ) train_idx = np.array(train_idx) test_idx = np.array(test_idx) labels = np.array(labels) num_classes = len(label_dict) return train_idx, test_idx, labels, num_classes def parse_idx_file(self, filename, ent2id, label_dict, labels): """Parse idx files Parameters ---------- filename: str File to parse ent2id: func A function mapping entity to id label_dict: dict Map label to label id labels: dict Map entity id to label id Return ------ idx: list Entity idss """ idx = [] with open(filename, "r") as f: for i, line in enumerate(f): if i == 0: continue # first line is the header sample, label = self.process_idx_file_line(line) # person, _, label = line.strip().split('\t') ent = self.parse_entity(sample) entid = ent2id(str(ent)) if entid is None: print( 'Warning: entity "%s" does not have any valid links associated. Ignored.' % str(ent) ) else: idx.append(entid) lblid = _get_id(label_dict, label) labels[entid] = lblid return idx def has_cache(self): """check if there is a processed data""" graph_path = os.path.join(self.save_path, self.save_name + ".bin") info_path = os.path.join(self.save_path, self.save_name + ".pkl") if os.path.exists(graph_path) and os.path.exists(info_path): return True return False def save(self): """save the graph list and the labels""" graph_path = os.path.join(self.save_path, self.save_name + ".bin") info_path = os.path.join(self.save_path, self.save_name + ".pkl") save_graphs(str(graph_path), self._hg) save_info( str(info_path), { "num_classes": self.num_classes, "predict_category": self.predict_category, }, ) def load(self): """load the graph list and the labels from disk""" graph_path = os.path.join(self.save_path, self.save_name + ".bin") info_path = os.path.join(self.save_path, self.save_name + ".pkl") graphs, _ = load_graphs(str(graph_path)) info = load_info(str(info_path)) self._num_classes = info["num_classes"] self._predict_category = info["predict_category"] self._hg = graphs[0] # For backward compatibility if "label" not in self._hg.nodes[self.predict_category].data: self._hg.nodes[self.predict_category].data[ "label" ] = self._hg.nodes[self.predict_category].data["labels"] def __getitem__(self, idx): r"""Gets the graph object""" g = self._hg if self._transform is not None: g = self._transform(g) return g def __len__(self): r"""The number of graphs in the dataset.""" return 1 @property def save_name(self): return self.name + "_dgl_graph" @property def predict_category(self): return self._predict_category @property def num_classes(self): return self._num_classes @abc.abstractmethod def parse_entity(self, term): """Parse one entity from an RDF term. Return None if the term does not represent a valid entity and the whole tuple should be ignored. Parameters ---------- term : rdflib.term.Identifier RDF term Returns ------- Entity or None An entity. """ pass @abc.abstractmethod def parse_relation(self, term): """Parse one relation from an RDF term. Return None if the term does not represent a valid relation and the whole tuple should be ignored. Parameters ---------- term : rdflib.term.Identifier RDF term Returns ------- Relation or None A relation """ pass @abc.abstractmethod def process_tuple(self, raw_tuple, sbj, rel, obj): """Process the tuple. Return (Entity, Relation, Entity) tuple for as the final tuple. Return None if the tuple should be ignored. Parameters ---------- raw_tuple : tuple of rdflib.term.Identifier (subject, predicate, object) tuple sbj : Entity Subject entity rel : Relation Relation obj : Entity Object entity Returns ------- (Entity, Relation, Entity) The final tuple or None if should be ignored """ pass @abc.abstractmethod def process_idx_file_line(self, line): """Process one line of ``trainingSet.tsv`` or ``testSet.tsv``. Parameters ---------- line : str One line of the file Returns ------- (str, str) One sample and its label """ pass def _get_id(dict, key): id = dict.get(key, None) if id is None: id = len(dict) dict[key] = id return id class AIFBDataset(RDFGraphDataset): r"""AIFB dataset for node classification task AIFB DataSet is a Semantic Web (RDF) dataset used as a benchmark in data mining. It records the organizational structure of AIFB at the University of Karlsruhe. AIFB dataset statistics: - Nodes: 7262 - Edges: 48810 (including reverse edges) - Target Category: Personen - Number of Classes: 4 - Label Split: - Train: 140 - Test: 36 Parameters ----------- print_every : int Preprocessing log for every X tuples. Default: 10000. insert_reverse : bool If true, add reverse edge and reverse relations to the final graph. Default: True. raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- num_classes : int Number of classes to predict predict_category : str The entity category (node type) that has labels for prediction Examples -------- >>> dataset = dgl.data.rdf.AIFBDataset() >>> graph = dataset[0] >>> category = dataset.predict_category >>> num_classes = dataset.num_classes >>> >>> train_mask = g.nodes[category].data['train_mask'] >>> test_mask = g.nodes[category].data['test_mask'] >>> label = g.nodes[category].data['label'] """ entity_prefix = "http://www.aifb.uni-karlsruhe.de/" relation_prefix = "http://swrc.ontoware.org/" def __init__( self, print_every=10000, insert_reverse=True, raw_dir=None, force_reload=False, verbose=True, transform=None, ): import rdflib as rdf self.employs = rdf.term.URIRef( "http://swrc.ontoware.org/ontology#employs" ) self.affiliation = rdf.term.URIRef( "http://swrc.ontoware.org/ontology#affiliation" ) url = _get_dgl_url("dataset/rdf/aifb-hetero.zip") name = "aifb-hetero" predict_category = "Personen" super(AIFBDataset, self).__init__( name, url, predict_category, print_every=print_every, insert_reverse=insert_reverse, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) def __getitem__(self, idx): r"""Gets the graph object Parameters ----------- idx: int Item index, AIFBDataset has only one graph object Return ------- :class:`dgl.DGLGraph` The graph contains: - ``ndata['train_mask']``: mask for training node set - ``ndata['test_mask']``: mask for testing node set - ``ndata['label']``: node labels """ return super(AIFBDataset, self).__getitem__(idx) def __len__(self): r"""The number of graphs in the dataset. Return ------- int """ return super(AIFBDataset, self).__len__() def parse_entity(self, term): import rdflib as rdf if isinstance(term, rdf.Literal): return Entity(e_id=str(term), cls="_Literal") if isinstance(term, rdf.BNode): return None entstr = str(term) if entstr.startswith(self.entity_prefix): sp = entstr.split("/") return Entity(e_id=sp[5], cls=sp[3]) else: return None def parse_relation(self, term): if term == self.employs or term == self.affiliation: return None relstr = str(term) if relstr.startswith(self.relation_prefix): return Relation(cls=relstr.split("/")[3]) else: relstr = relstr.split("/")[-1] return Relation(cls=relstr) def process_tuple(self, raw_tuple, sbj, rel, obj): if sbj is None or rel is None or obj is None: return None return (sbj, rel, obj) def process_idx_file_line(self, line): person, _, label = line.strip().split("\t") return person, label class MUTAGDataset(RDFGraphDataset): r"""MUTAG dataset for node classification task Mutag dataset statistics: - Nodes: 27163 - Edges: 148100 (including reverse edges) - Target Category: d - Number of Classes: 2 - Label Split: - Train: 272 - Test: 68 Parameters ----------- print_every : int Preprocessing log for every X tuples. Default: 10000. insert_reverse : bool If true, add reverse edge and reverse relations to the final graph. Default: True. raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- num_classes : int Number of classes to predict predict_category : str The entity category (node type) that has labels for prediction graph : :class:`dgl.DGLGraph` Graph structure Examples -------- >>> dataset = dgl.data.rdf.MUTAGDataset() >>> graph = dataset[0] >>> category = dataset.predict_category >>> num_classes = dataset.num_classes >>> >>> train_mask = g.nodes[category].data['train_mask'] >>> test_mask = g.nodes[category].data['test_mask'] >>> label = g.nodes[category].data['label'] """ d_entity = re.compile("d[0-9]") bond_entity = re.compile("bond[0-9]") entity_prefix = "http://dl-learner.org/carcinogenesis#" relation_prefix = entity_prefix def __init__( self, print_every=10000, insert_reverse=True, raw_dir=None, force_reload=False, verbose=True, transform=None, ): import rdflib as rdf self.is_mutagenic = rdf.term.URIRef( "http://dl-learner.org/carcinogenesis#isMutagenic" ) self.rdf_type = rdf.term.URIRef( "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" ) self.rdf_subclassof = rdf.term.URIRef( "http://www.w3.org/2000/01/rdf-schema#subClassOf" ) self.rdf_domain = rdf.term.URIRef( "http://www.w3.org/2000/01/rdf-schema#domain" ) url = _get_dgl_url("dataset/rdf/mutag-hetero.zip") name = "mutag-hetero" predict_category = "d" super(MUTAGDataset, self).__init__( name, url, predict_category, print_every=print_every, insert_reverse=insert_reverse, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) def __getitem__(self, idx): r"""Gets the graph object Parameters ----------- idx: int Item index, MUTAGDataset has only one graph object Return ------- :class:`dgl.DGLGraph` The graph contains: - ``ndata['train_mask']``: mask for training node set - ``ndata['test_mask']``: mask for testing node set - ``ndata['label']``: node labels """ return super(MUTAGDataset, self).__getitem__(idx) def __len__(self): r"""The number of graphs in the dataset. Return ------- int """ return super(MUTAGDataset, self).__len__() def parse_entity(self, term): import rdflib as rdf if isinstance(term, rdf.Literal): return Entity(e_id=str(term), cls="_Literal") elif isinstance(term, rdf.BNode): return None entstr = str(term) if entstr.startswith(self.entity_prefix): inst = entstr[len(self.entity_prefix) :] if self.d_entity.match(inst): cls = "d" elif self.bond_entity.match(inst): cls = "bond" else: cls = None return Entity(e_id=inst, cls=cls) else: return None def parse_relation(self, term): if term == self.is_mutagenic: return None relstr = str(term) if relstr.startswith(self.relation_prefix): cls = relstr[len(self.relation_prefix) :] return Relation(cls=cls) else: relstr = relstr.split("/")[-1] return Relation(cls=relstr) def process_tuple(self, raw_tuple, sbj, rel, obj): if sbj is None or rel is None or obj is None: return None if not raw_tuple[1].startswith("http://dl-learner.org/carcinogenesis#"): obj.cls = "SCHEMA" if sbj.cls is None: sbj.cls = "SCHEMA" if obj.cls is None: obj.cls = rel.cls assert sbj.cls is not None and obj.cls is not None return (sbj, rel, obj) def process_idx_file_line(self, line): bond, _, label = line.strip().split("\t") return bond, label class BGSDataset(RDFGraphDataset): r"""BGS dataset for node classification task BGS namespace convention: ``http://data.bgs.ac.uk/(ref|id)///INSTANCE``. We ignored all literal nodes and the relations connecting them in the output graph. We also ignored the relation used to mark whether a term is CURRENT or DEPRECATED. BGS dataset statistics: - Nodes: 94806 - Edges: 672884 (including reverse edges) - Target Category: Lexicon/NamedRockUnit - Number of Classes: 2 - Label Split: - Train: 117 - Test: 29 Parameters ----------- print_every : int Preprocessing log for every X tuples. Default: 10000. insert_reverse : bool If true, add reverse edge and reverse relations to the final graph. Default: True. raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- num_classes : int Number of classes to predict predict_category : str All the labels of the entities in ``predict_category`` Examples -------- >>> dataset = dgl.data.rdf.BGSDataset() >>> graph = dataset[0] >>> category = dataset.predict_category >>> num_classes = dataset.num_classes >>> >>> train_mask = g.nodes[category].data['train_mask'] >>> test_mask = g.nodes[category].data['test_mask'] >>> label = g.nodes[category].data['label'] """ entity_prefix = "http://data.bgs.ac.uk/" status_prefix = "http://data.bgs.ac.uk/ref/CurrentStatus" relation_prefix = "http://data.bgs.ac.uk/ref" def __init__( self, print_every=10000, insert_reverse=True, raw_dir=None, force_reload=False, verbose=True, transform=None, ): import rdflib as rdf url = _get_dgl_url("dataset/rdf/bgs-hetero.zip") name = "bgs-hetero" predict_category = "Lexicon/NamedRockUnit" self.lith = rdf.term.URIRef( "http://data.bgs.ac.uk/ref/Lexicon/hasLithogenesis" ) super(BGSDataset, self).__init__( name, url, predict_category, print_every=print_every, insert_reverse=insert_reverse, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) def __getitem__(self, idx): r"""Gets the graph object Parameters ----------- idx: int Item index, BGSDataset has only one graph object Return ------- :class:`dgl.DGLGraph` The graph contains: - ``ndata['train_mask']``: mask for training node set - ``ndata['test_mask']``: mask for testing node set - ``ndata['label']``: node labels """ return super(BGSDataset, self).__getitem__(idx) def __len__(self): r"""The number of graphs in the dataset. Return ------- int """ return super(BGSDataset, self).__len__() def parse_entity(self, term): import rdflib as rdf if isinstance(term, rdf.Literal): return None elif isinstance(term, rdf.BNode): return None entstr = str(term) if entstr.startswith(self.status_prefix): return None if entstr.startswith(self.entity_prefix): sp = entstr.split("/") if len(sp) != 7: return None # instance cls = "%s/%s" % (sp[4], sp[5]) inst = sp[6] return Entity(e_id=inst, cls=cls) else: return None def parse_relation(self, term): if term == self.lith: return None relstr = str(term) if relstr.startswith(self.relation_prefix): sp = relstr.split("/") if len(sp) < 6: return None assert len(sp) == 6, relstr cls = "%s/%s" % (sp[4], sp[5]) return Relation(cls=cls) else: relstr = relstr.replace(".", "_") return Relation(cls=relstr) def process_tuple(self, raw_tuple, sbj, rel, obj): if sbj is None or rel is None or obj is None: return None return (sbj, rel, obj) def process_idx_file_line(self, line): _, rock, label = line.strip().split("\t") return rock, label class AMDataset(RDFGraphDataset): """AM dataset. for node classification task Namespace convention: - Instance: ``http://purl.org/collections/nl/am/-`` - Relation: ``http://purl.org/collections/nl/am/`` We ignored all literal nodes and the relations connecting them in the output graph. AM dataset statistics: - Nodes: 881680 - Edges: 5668682 (including reverse edges) - Target Category: proxy - Number of Classes: 11 - Label Split: - Train: 802 - Test: 198 Parameters ----------- print_every : int Preprocessing log for every X tuples. Default: 10000. insert_reverse : bool If true, add reverse edge and reverse relations to the final graph. Default: True. raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- num_classes : int Number of classes to predict predict_category : str The entity category (node type) that has labels for prediction Examples -------- >>> dataset = dgl.data.rdf.AMDataset() >>> graph = dataset[0] >>> category = dataset.predict_category >>> num_classes = dataset.num_classes >>> >>> train_mask = g.nodes[category].data['train_mask'] >>> test_mask = g.nodes[category].data['test_mask'] >>> label = g.nodes[category].data['label'] """ entity_prefix = "http://purl.org/collections/nl/am/" relation_prefix = entity_prefix def __init__( self, print_every=10000, insert_reverse=True, raw_dir=None, force_reload=False, verbose=True, transform=None, ): import rdflib as rdf self.objectCategory = rdf.term.URIRef( "http://purl.org/collections/nl/am/objectCategory" ) self.material = rdf.term.URIRef( "http://purl.org/collections/nl/am/material" ) url = _get_dgl_url("dataset/rdf/am-hetero.zip") name = "am-hetero" predict_category = "proxy" super(AMDataset, self).__init__( name, url, predict_category, print_every=print_every, insert_reverse=insert_reverse, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) def __getitem__(self, idx): r"""Gets the graph object Parameters ----------- idx: int Item index, AMDataset has only one graph object Return ------- :class:`dgl.DGLGraph` The graph contains: - ``ndata['train_mask']``: mask for training node set - ``ndata['test_mask']``: mask for testing node set - ``ndata['label']``: node labels """ return super(AMDataset, self).__getitem__(idx) def __len__(self): r"""The number of graphs in the dataset. Return ------- int """ return super(AMDataset, self).__len__() def parse_entity(self, term): import rdflib as rdf if isinstance(term, rdf.Literal): return None elif isinstance(term, rdf.BNode): return Entity(e_id=str(term), cls="_BNode") entstr = str(term) if entstr.startswith(self.entity_prefix): sp = entstr.split("/") assert len(sp) == 7, entstr spp = sp[6].split("-") if len(spp) == 2: # instance cls, inst = spp else: cls = "TYPE" inst = spp return Entity(e_id=inst, cls=cls) else: return None def parse_relation(self, term): if term == self.objectCategory or term == self.material: return None relstr = str(term) if relstr.startswith(self.relation_prefix): sp = relstr.split("/") assert len(sp) == 7, relstr cls = sp[6] return Relation(cls=cls) else: relstr = relstr.replace(".", "_") return Relation(cls=relstr) def process_tuple(self, raw_tuple, sbj, rel, obj): if sbj is None or rel is None or obj is None: return None return (sbj, rel, obj) def process_idx_file_line(self, line): proxy, _, label = line.strip().split("\t") return proxy, label ================================================ FILE: python/dgl/data/reddit.py ================================================ """ Reddit dataset for community detection """ from __future__ import absolute_import import os import numpy as np import scipy.sparse as sp from .. import backend as F from ..convert import from_scipy from ..transforms import reorder_graph from .dgl_dataset import DGLBuiltinDataset from .utils import ( _get_dgl_url, deprecate_property, generate_mask_tensor, load_graphs, save_graphs, ) class RedditDataset(DGLBuiltinDataset): r"""Reddit dataset for community detection (node classification) This is a graph dataset from Reddit posts made in the month of September, 2014. The node label in this case is the community, or “subreddit”, that a post belongs to. The authors sampled 50 large communities and built a post-to-post graph, connecting posts if the same user comments on both. In total this dataset contains 232,965 posts with an average degree of 492. We use the first 20 days for training and the remaining days for testing (with 30% used for validation). Reference: ``_ Statistics - Nodes: 232,965 - Edges: 114,615,892 - Node feature size: 602 - Number of training samples: 153,431 - Number of validation samples: 23,831 - Number of test samples: 55,703 Parameters ---------- self_loop : bool Whether load dataset with self loop connections. Default: False raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- num_classes : int Number of classes for each node Examples -------- >>> data = RedditDataset() >>> g = data[0] >>> num_classes = data.num_classes >>> >>> # get node feature >>> feat = g.ndata['feat'] >>> >>> # get data split >>> train_mask = g.ndata['train_mask'] >>> val_mask = g.ndata['val_mask'] >>> test_mask = g.ndata['test_mask'] >>> >>> # get labels >>> label = g.ndata['label'] >>> >>> # Train, Validation and Test """ def __init__( self, self_loop=False, raw_dir=None, force_reload=False, verbose=False, transform=None, ): self_loop_str = "" if self_loop: self_loop_str = "_self_loop" _url = _get_dgl_url("dataset/reddit{}.zip".format(self_loop_str)) self._self_loop_str = self_loop_str super(RedditDataset, self).__init__( name="reddit{}".format(self_loop_str), url=_url, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) def process(self): # graph coo_adj = sp.load_npz( os.path.join( self.raw_path, "reddit{}_graph.npz".format(self._self_loop_str) ) ) self._graph = from_scipy(coo_adj) # features and labels reddit_data = np.load(os.path.join(self.raw_path, "reddit_data.npz")) features = reddit_data["feature"] labels = reddit_data["label"] # tarin/val/test indices node_types = reddit_data["node_types"] train_mask = node_types == 1 val_mask = node_types == 2 test_mask = node_types == 3 self._graph.ndata["train_mask"] = generate_mask_tensor(train_mask) self._graph.ndata["val_mask"] = generate_mask_tensor(val_mask) self._graph.ndata["test_mask"] = generate_mask_tensor(test_mask) self._graph.ndata["feat"] = F.tensor( features, dtype=F.data_type_dict["float32"] ) self._graph.ndata["label"] = F.tensor( labels, dtype=F.data_type_dict["int64"] ) self._graph = reorder_graph( self._graph, node_permute_algo="rcmk", edge_permute_algo="dst", store_ids=False, ) self._print_info() def has_cache(self): graph_path = os.path.join(self.save_path, "dgl_graph.bin") if os.path.exists(graph_path): return True return False def save(self): graph_path = os.path.join(self.save_path, "dgl_graph.bin") save_graphs(graph_path, self._graph) def load(self): graph_path = os.path.join(self.save_path, "dgl_graph.bin") graphs, _ = load_graphs(graph_path) self._graph = graphs[0] self._graph.ndata["train_mask"] = generate_mask_tensor( self._graph.ndata["train_mask"].numpy() ) self._graph.ndata["val_mask"] = generate_mask_tensor( self._graph.ndata["val_mask"].numpy() ) self._graph.ndata["test_mask"] = generate_mask_tensor( self._graph.ndata["test_mask"].numpy() ) self._print_info() def _print_info(self): if self.verbose: print("Finished data loading.") print(" NumNodes: {}".format(self._graph.num_nodes())) print(" NumEdges: {}".format(self._graph.num_edges())) print(" NumFeats: {}".format(self._graph.ndata["feat"].shape[1])) print(" NumClasses: {}".format(self.num_classes)) print( " NumTrainingSamples: {}".format( F.nonzero_1d(self._graph.ndata["train_mask"]).shape[0] ) ) print( " NumValidationSamples: {}".format( F.nonzero_1d(self._graph.ndata["val_mask"]).shape[0] ) ) print( " NumTestSamples: {}".format( F.nonzero_1d(self._graph.ndata["test_mask"]).shape[0] ) ) @property def num_classes(self): r"""Number of classes for each node.""" return 41 def __getitem__(self, idx): r"""Get graph by index Parameters ---------- idx : int Item index Returns ------- :class:`dgl.DGLGraph` graph structure, node labels, node features and splitting masks: - ``ndata['label']``: node label - ``ndata['feat']``: node feature - ``ndata['train_mask']``: mask for training node set - ``ndata['val_mask']``: mask for validation node set - ``ndata['test_mask']:`` mask for test node set """ assert idx == 0, "Reddit Dataset only has one graph" if self._transform is None: return self._graph else: return self._transform(self._graph) def __len__(self): r"""Number of graphs in the dataset""" return 1 ================================================ FILE: python/dgl/data/sbm.py ================================================ """Dataset for stochastic block model.""" import math import os import random import numpy as np import numpy.random as npr import scipy as sp from .. import batch from ..convert import from_scipy from .dgl_dataset import DGLDataset from .utils import load_graphs, load_info, save_graphs, save_info def sbm(n_blocks, block_size, p, q, rng=None): """(Symmetric) Stochastic Block Model Parameters ---------- n_blocks : int Number of blocks. block_size : int Block size. p : float Probability for intra-community edge. q : float Probability for inter-community edge. rng : numpy.random.RandomState, optional Random number generator. Returns ------- scipy sparse matrix The adjacency matrix of generated graph. """ n = n_blocks * block_size p /= n q /= n rng = np.random.RandomState() if rng is None else rng rows = [] cols = [] for i in range(n_blocks): for j in range(i, n_blocks): density = p if i == j else q block = sp.sparse.random( block_size, block_size, density, random_state=rng, data_rvs=lambda n: np.ones(n), ) rows.append(block.row + i * block_size) cols.append(block.col + j * block_size) rows = np.hstack(rows) cols = np.hstack(cols) a = sp.sparse.coo_matrix( (np.ones(rows.shape[0]), (rows, cols)), shape=(n, n) ) adj = sp.sparse.triu(a) + sp.sparse.triu(a, 1).transpose() return adj class SBMMixtureDataset(DGLDataset): r"""Symmetric Stochastic Block Model Mixture Reference: Appendix C of `Supervised Community Detection with Hierarchical Graph Neural Networks `_ Parameters ---------- n_graphs : int Number of graphs. n_nodes : int Number of nodes. n_communities : int Number of communities. k : int, optional Multiplier. Default: 2 avg_deg : int, optional Average degree. Default: 3 pq : list of pair of nonnegative float or str, optional Random densities. This parameter is for future extension, for now it's always using the default value. Default: Appendix_C rng : numpy.random.RandomState, optional Random number generator. If not given, it's numpy.random.RandomState() with `seed=None`, which read data from /dev/urandom (or the Windows analogue) if available or seed from the clock otherwise. Default: None Raises ------ RuntimeError is raised if pq is not a list or string. Examples -------- >>> data = SBMMixtureDataset(n_graphs=16, n_nodes=10000, n_communities=2) >>> from torch.utils.data import DataLoader >>> dataloader = DataLoader(data, batch_size=1, collate_fn=data.collate_fn) >>> for graph, line_graph, graph_degrees, line_graph_degrees, pm_pd in dataloader: ... # your code here """ def __init__( self, n_graphs, n_nodes, n_communities, k=2, avg_deg=3, pq="Appendix_C", rng=None, ): self._n_graphs = n_graphs self._n_nodes = n_nodes self._n_communities = n_communities assert n_nodes % n_communities == 0 self._block_size = n_nodes // n_communities self._k = k self._avg_deg = avg_deg self._pq = pq self._rng = rng super(SBMMixtureDataset, self).__init__( name="sbmmixture", hash_key=(n_graphs, n_nodes, n_communities, k, avg_deg, pq, rng), ) def process(self): pq = self._pq if type(pq) is list: assert len(pq) == self._n_graphs elif type(pq) is str: generator = {"Appendix_C": self._appendix_c}[pq] pq = [generator() for _ in range(self._n_graphs)] else: raise RuntimeError() self._graphs = [ from_scipy(sbm(self._n_communities, self._block_size, *x)) for x in pq ] self._line_graphs = [ g.line_graph(backtracking=False) for g in self._graphs ] in_degrees = lambda g: g.in_degrees().float() self._graph_degrees = [in_degrees(g) for g in self._graphs] self._line_graph_degrees = [in_degrees(lg) for lg in self._line_graphs] self._pm_pds = list(zip(*[g.edges() for g in self._graphs]))[0] @property def graph_path(self): return os.path.join(self.save_path, "graphs_{}.bin".format(self.hash)) @property def line_graph_path(self): return os.path.join( self.save_path, "line_graphs_{}.bin".format(self.hash) ) @property def info_path(self): return os.path.join(self.save_path, "info_{}.pkl".format(self.hash)) def has_cache(self): return ( os.path.exists(self.graph_path) and os.path.exists(self.line_graph_path) and os.path.exists(self.info_path) ) def save(self): save_graphs(self.graph_path, self._graphs) save_graphs(self.line_graph_path, self._line_graphs) save_info( self.info_path, { "graph_degree": self._graph_degrees, "line_graph_degree": self._line_graph_degrees, "pm_pds": self._pm_pds, }, ) def load(self): self._graphs, _ = load_graphs(self.graph_path) self._line_graphs, _ = load_graphs(self.line_graph_path) info = load_info(self.info_path) self._graph_degrees = info["graph_degree"] self._line_graph_degrees = info["line_graph_degree"] self._pm_pds = info["pm_pds"] def __len__(self): r"""Number of graphs in the dataset.""" return len(self._graphs) def __getitem__(self, idx): r"""Get one example by index Parameters ---------- idx : int Item index Returns ------- graph: :class:`dgl.DGLGraph` The original graph line_graph: :class:`dgl.DGLGraph` The line graph of `graph` graph_degree: numpy.ndarray In degrees for each node in `graph` line_graph_degree: numpy.ndarray In degrees for each node in `line_graph` pm_pd: numpy.ndarray Edge indicator matrices Pm and Pd """ return ( self._graphs[idx], self._line_graphs[idx], self._graph_degrees[idx], self._line_graph_degrees[idx], self._pm_pds[idx], ) def _appendix_c(self): q = npr.uniform(0, self._avg_deg - math.sqrt(self._avg_deg)) p = self._k * self._avg_deg - q if random.random() < 0.5: return p, q else: return q, p def collate_fn(self, x): r"""The `collate` function for dataloader Parameters ---------- x : tuple a batch of data that contains: - graph: :class:`dgl.DGLGraph` The original graph - line_graph: :class:`dgl.DGLGraph` The line graph of `graph` - graph_degree: numpy.ndarray In degrees for each node in `graph` - line_graph_degree: numpy.ndarray In degrees for each node in `line_graph` - pm_pd: numpy.ndarray Edge indicator matrices Pm and Pd Returns ------- g_batch: :class:`dgl.DGLGraph` Batched graphs lg_batch: :class:`dgl.DGLGraph` Batched line graphs degg_batch: numpy.ndarray A batch of in degrees for each node in `g_batch` deglg_batch: numpy.ndarray A batch of in degrees for each node in `lg_batch` pm_pd_batch: numpy.ndarray A batch of edge indicator matrices Pm and Pd """ g, lg, deg_g, deg_lg, pm_pd = zip(*x) g_batch = batch.batch(g) lg_batch = batch.batch(lg) degg_batch = np.concatenate(deg_g, axis=0) deglg_batch = np.concatenate(deg_lg, axis=0) pm_pd_batch = np.concatenate( [x + i * self._n_nodes for i, x in enumerate(pm_pd)], axis=0 ) return g_batch, lg_batch, degg_batch, deglg_batch, pm_pd_batch SBMMixture = SBMMixtureDataset ================================================ FILE: python/dgl/data/superpixel.py ================================================ import os import pickle import numpy as np from scipy.spatial.distance import cdist from tqdm.auto import tqdm from .. import backend as F from ..convert import graph as dgl_graph from .dgl_dataset import DGLDataset from .utils import download, extract_archive, load_graphs, save_graphs, Subset def sigma(dists, kth=8): num_nodes = dists.shape[0] # Compute sigma and reshape. if kth > num_nodes: # Handling for graphs with num_nodes less than kth. sigma = np.array([1] * num_nodes).reshape(num_nodes, 1) else: # Get k-nearest neighbors for each node. knns = np.partition(dists, kth, axis=-1)[:, : kth + 1] sigma = knns.sum(axis=1).reshape((knns.shape[0], 1)) / kth return sigma + 1e-8 def compute_adjacency_matrix_images(coord, feat, use_feat=True): coord = coord.reshape(-1, 2) # Compute coordinate distance. c_dist = cdist(coord, coord) if use_feat: # Compute feature distance. f_dist = cdist(feat, feat) # Compute adjacency. A = np.exp( -((c_dist / sigma(c_dist)) ** 2) - (f_dist / sigma(f_dist)) ** 2 ) else: A = np.exp(-((c_dist / sigma(c_dist)) ** 2)) # Convert to symmetric matrix. A = 0.5 * (A + A.T) A[np.diag_indices_from(A)] = 0 return A def compute_edges_list(A, kth=9): # Get k-similar neighbor indices for each node. num_nodes = A.shape[0] new_kth = num_nodes - kth if num_nodes > kth: knns = np.argpartition(A, new_kth - 1, axis=-1)[:, new_kth:-1] knn_values = np.partition(A, new_kth - 1, axis=-1)[:, new_kth:-1] else: # Handling for graphs with less than kth nodes. # In such cases, the resulting graph will be fully connected. knns = np.tile(np.arange(num_nodes), num_nodes).reshape( num_nodes, num_nodes ) knn_values = A # Removing self loop. if num_nodes != 1: knn_values = A[knns != np.arange(num_nodes)[:, None]].reshape( num_nodes, -1 ) knns = knns[knns != np.arange(num_nodes)[:, None]].reshape( num_nodes, -1 ) return knns, knn_values class SuperPixelDataset(DGLDataset): def __init__( self, raw_dir=None, name="MNIST", split="train", use_feature=False, force_reload=False, verbose=False, transform=None, ): assert split in ["train", "test"], "split not valid." assert name in ["MNIST", "CIFAR10"], "name not valid." self.use_feature = use_feature self.split = split self._dataset_name = name self.graphs = [] self.labels = [] super().__init__( name="Superpixel", raw_dir=raw_dir, url=""" https://www.dropbox.com/s/y2qwa77a0fxem47/superpixels.zip?dl=1 """, force_reload=force_reload, verbose=verbose, transform=transform, ) @property def img_size(self): r"""Size of dataset image.""" if self._dataset_name == "MNIST": return 28 return 32 @property def save_path(self): r"""Directory to save the processed dataset.""" return os.path.join(self.raw_path, "processed") @property def raw_data_path(self): r"""Path to save the raw dataset file.""" return os.path.join(self.raw_path, "superpixels.zip") @property def graph_path(self): r"""Path to save the processed dataset file.""" if self.use_feature: return os.path.join( self.save_path, f"use_feat_{self._dataset_name}_{self.split}.pkl", ) return os.path.join( self.save_path, f"{self._dataset_name}_{self.split}.pkl" ) def download(self): path = download(self.url, path=self.raw_data_path) extract_archive(path, target_dir=self.raw_path, overwrite=True) def process(self): if self._dataset_name == "MNIST": plk_file = "mnist_75sp" elif self._dataset_name == "CIFAR10": plk_file = "cifar10_150sp" with open( os.path.join( self.raw_path, "superpixels", f"{plk_file}_{self.split}.pkl" ), "rb", ) as f: self.labels, self.sp_data = pickle.load(f) self.labels = F.tensor(self.labels) self.Adj_matrices = [] self.node_features = [] self.edges_lists = [] self.edge_features = [] for index, sample in enumerate( tqdm(self.sp_data, desc=f"Processing {self.split} dataset") ): mean_px, coord = sample[:2] coord = coord / self.img_size if self.use_feature: A = compute_adjacency_matrix_images( coord, mean_px ) # using super-pixel locations + features else: A = compute_adjacency_matrix_images( coord, mean_px, False ) # using only super-pixel locations edges_list, edge_values_list = compute_edges_list(A) N_nodes = A.shape[0] mean_px = mean_px.reshape(N_nodes, -1) coord = coord.reshape(N_nodes, 2) x = np.concatenate((mean_px, coord), axis=1) edge_values_list = edge_values_list.reshape(-1) self.node_features.append(x) self.edge_features.append(edge_values_list) self.Adj_matrices.append(A) self.edges_lists.append(edges_list) for index in tqdm( range(len(self.sp_data)), desc=f"Dump {self.split} dataset" ): N = self.node_features[index].shape[0] src_nodes = [] dst_nodes = [] for src, dsts in enumerate(self.edges_lists[index]): # handling for 1 node where the self loop would be the only edge if N == 1: src_nodes.append(src) dst_nodes.append(dsts) else: dsts = dsts[dsts != src] srcs = [src] * len(dsts) src_nodes.extend(srcs) dst_nodes.extend(dsts) src_nodes = F.tensor(src_nodes) dst_nodes = F.tensor(dst_nodes) g = dgl_graph((src_nodes, dst_nodes), num_nodes=N) g.ndata["feat"] = F.zerocopy_from_numpy( self.node_features[index] ).to(F.float32) g.edata["feat"] = ( F.zerocopy_from_numpy(self.edge_features[index]) .to(F.float32) .unsqueeze(1) ) self.graphs.append(g) def load(self): self.graphs, label_dict = load_graphs(self.graph_path) self.labels = label_dict["labels"] def save(self): save_graphs( self.graph_path, self.graphs, labels={"labels": self.labels} ) def has_cache(self): return os.path.exists(self.graph_path) def __len__(self): return len(self.graphs) def __getitem__(self, idx): """Get the idx-th sample. Parameters --------- idx : int or tensor The sample index. 1-D tensor as `idx` is allowed when transform is None. Returns ------- (:class:`dgl.DGLGraph`, Tensor) Graph with node feature stored in ``feat`` field and its label. or :class:`dgl.data.utils.Subset` Subset of the dataset at specified indices """ if F.is_tensor(idx) and idx.dim() == 1: if self._transform is None: return Subset(self, idx.cpu()) raise ValueError( "Tensor idx not supported when transform is not None." ) if self._transform is None: return self.graphs[idx], self.labels[idx] return self._transform(self.graphs[idx]), self.labels[idx] class MNISTSuperPixelDataset(SuperPixelDataset): r"""MNIST superpixel dataset for the graph classification task. DGL dataset of MNIST and CIFAR10 in the benchmark-gnn which contains graphs converted fromt the original MINST and CIFAR10 images. Reference ``_ Statistics: - Train examples: 60,000 - Test examples: 10,000 - Size of dataset images: 28 Parameters ---------- raw_dir : str Directory to store all the downloaded raw datasets. Default: "~/.dgl/". split : str Should be chosen from ["train", "test"] Default: "train". use_feature: bool - True: Adj matrix defined from super-pixel locations + features - False: Adj matrix defined from super-pixel locations (only) Default: False. force_reload : bool Whether to reload the dataset. Default: False. verbose : bool Whether to print out progress information. Default: False. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Examples --------- >>> from dgl.data import MNISTSuperPixelDataset >>> # MNIST dataset >>> train_dataset = MNISTSuperPixelDataset(split="train") >>> len(train_dataset) 60000 >>> graph, label = train_dataset[0] >>> graph Graph(num_nodes=71, num_edges=568, ndata_schemes={'feat': Scheme(shape=(3,), dtype=torch.float32)} edata_schemes={'feat': Scheme(shape=(1,), dtype=torch.float32)}) >>> # support tensor to be index when transform is None >>> # see details in __getitem__ function >>> import torch >>> idx = torch.tensor([0, 1, 2]) >>> train_dataset_subset = train_dataset[idx] >>> train_dataset_subset[0] Graph(num_nodes=71, num_edges=568, ndata_schemes={'feat': Scheme(shape=(3,), dtype=torch.float32)} edata_schemes={'feat': Scheme(shape=(1,), dtype=torch.float32)}) """ def __init__( self, raw_dir=None, split="train", use_feature=False, force_reload=False, verbose=False, transform=None, ): super().__init__( raw_dir=raw_dir, name="MNIST", split=split, use_feature=use_feature, force_reload=force_reload, verbose=verbose, transform=transform, ) class CIFAR10SuperPixelDataset(SuperPixelDataset): r"""CIFAR10 superpixel dataset for the graph classification task. DGL dataset of CIFAR10 in the benchmark-gnn which contains graphs converted fromt the original CIFAR10 images. Reference ``_ Statistics: - Train examples: 50,000 - Test examples: 10,000 - Size of dataset images: 32 Parameters ---------- raw_dir : str Directory to store all the downloaded raw datasets. Default: "~/.dgl/". split : str Should be chosen from ["train", "test"] Default: "train". use_feature: bool - True: Adj matrix defined from super-pixel locations + features - False: Adj matrix defined from super-pixel locations (only) Default: False. force_reload : bool Whether to reload the dataset. Default: False. verbose : bool Whether to print out progress information. Default: False. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Examples --------- >>> from dgl.data import CIFAR10SuperPixelDataset >>> # CIFAR10 dataset >>> train_dataset = CIFAR10SuperPixelDataset(split="train") >>> len(train_dataset) 50000 >>> graph, label = train_dataset[0] >>> graph Graph(num_nodes=123, num_edges=984, ndata_schemes={'feat': Scheme(shape=(5,), dtype=torch.float32)} edata_schemes={'feat': Scheme(shape=(1,), dtype=torch.float32)}), >>> # support tensor to be index when transform is None >>> # see details in __getitem__ function >>> import torch >>> idx = torch.tensor([0, 1, 2]) >>> train_dataset_subset = train_dataset[idx] >>> train_dataset_subset[0] Graph(num_nodes=123, num_edges=984, ndata_schemes={'feat': Scheme(shape=(5,), dtype=torch.float32)} edata_schemes={'feat': Scheme(shape=(1,), dtype=torch.float32)}), """ def __init__( self, raw_dir=None, split="train", use_feature=False, force_reload=False, verbose=False, transform=None, ): super().__init__( raw_dir=raw_dir, name="CIFAR10", split=split, use_feature=use_feature, force_reload=force_reload, verbose=verbose, transform=transform, ) ================================================ FILE: python/dgl/data/synthetic.py ================================================ """Synthetic graph datasets.""" import math import os import pickle import random import networkx as nx import numpy as np from .. import backend as F from ..batch import batch from ..convert import graph from ..transforms import reorder_graph from .dgl_dataset import DGLBuiltinDataset from .utils import _get_dgl_url, download, load_graphs, save_graphs class BAShapeDataset(DGLBuiltinDataset): r"""BA-SHAPES dataset from `GNNExplainer: Generating Explanations for Graph Neural Networks `__ This is a synthetic dataset for node classification. It is generated by performing the following steps in order. - Construct a base Barabási–Albert (BA) graph. - Construct a set of five-node house-structured network motifs. - Attach the motifs to randomly selected nodes of the base graph. - Perturb the graph by adding random edges. - Nodes are assigned to 4 classes. Nodes of label 0 belong to the base BA graph. Nodes of label 1, 2, 3 are separately at the middle, bottom, or top of houses. - Generate constant feature for all nodes, which is 1. Parameters ---------- num_base_nodes : int, optional Number of nodes in the base BA graph. Default: 300 num_base_edges_per_node : int, optional Number of edges to attach from a new node to existing nodes in constructing the base BA graph. Default: 5 num_motifs : int, optional Number of house-structured network motifs to use. Default: 80 perturb_ratio : float, optional Number of random edges to add in perturbation divided by the number of edges in the original graph. Default: 0.01 seed : integer, random_state, or None, optional Indicator of random number generation state. Default: None raw_dir : str, optional Raw file directory to store the processed data. Default: ~/.dgl/ force_reload : bool, optional Whether to always generate the data from scratch rather than load a cached version. Default: False verbose : bool, optional Whether to print progress information. Default: True transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Default: None Attributes ---------- num_classes : int Number of node classes Examples -------- >>> from dgl.data import BAShapeDataset >>> dataset = BAShapeDataset() >>> dataset.num_classes 4 >>> g = dataset[0] >>> label = g.ndata['label'] >>> feat = g.ndata['feat'] """ def __init__( self, num_base_nodes=300, num_base_edges_per_node=5, num_motifs=80, perturb_ratio=0.01, seed=None, raw_dir=None, force_reload=False, verbose=True, transform=None, ): self.num_base_nodes = num_base_nodes self.num_base_edges_per_node = num_base_edges_per_node self.num_motifs = num_motifs self.perturb_ratio = perturb_ratio self.seed = seed super(BAShapeDataset, self).__init__( name="BA-SHAPES", url=None, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) def process(self): g = nx.barabasi_albert_graph( self.num_base_nodes, self.num_base_edges_per_node, self.seed ) edges = list(g.edges()) src, dst = map(list, zip(*edges)) n = self.num_base_nodes # Nodes in the base BA graph belong to class 0 node_labels = [0] * n # The motifs will be evenly attached to the nodes in the base graph. spacing = math.floor(n / self.num_motifs) for motif_id in range(self.num_motifs): # Construct a five-node house-structured network motif motif_edges = [ (n, n + 1), (n + 1, n + 2), (n + 2, n + 3), (n + 3, n), (n + 4, n), (n + 4, n + 1), ] motif_src, motif_dst = map(list, zip(*motif_edges)) src.extend(motif_src) dst.extend(motif_dst) # Nodes at the middle of a house belong to class 1 # Nodes at the bottom of a house belong to class 2 # Nodes at the top of a house belong to class 3 node_labels.extend([1, 1, 2, 2, 3]) # Attach the motif to the base BA graph src.append(n) dst.append(int(motif_id * spacing)) n += 5 g = graph((src, dst), num_nodes=n) # Perturb the graph by adding non-self-loop random edges num_real_edges = g.num_edges() max_ratio = (n * (n - 1) - num_real_edges) / num_real_edges assert ( self.perturb_ratio <= max_ratio ), "perturb_ratio cannot exceed {:.4f}".format(max_ratio) num_random_edges = int(num_real_edges * self.perturb_ratio) if self.seed is not None: np.random.seed(self.seed) for _ in range(num_random_edges): while True: u = np.random.randint(0, n) v = np.random.randint(0, n) if (not g.has_edges_between(u, v)) and (u != v): break g.add_edges(u, v) g.ndata["label"] = F.tensor(node_labels, F.int64) g.ndata["feat"] = F.ones((n, 1), F.float32, F.cpu()) self._graph = reorder_graph( g, node_permute_algo="rcmk", edge_permute_algo="dst", store_ids=False, ) @property def graph_path(self): return os.path.join( self.save_path, "{}_dgl_graph.bin".format(self.name) ) def save(self): save_graphs(str(self.graph_path), self._graph) def has_cache(self): return os.path.exists(self.graph_path) def load(self): graphs, _ = load_graphs(str(self.graph_path)) self._graph = graphs[0] def __getitem__(self, idx): assert idx == 0, "This dataset has only one graph." if self._transform is None: return self._graph else: return self._transform(self._graph) def __len__(self): return 1 @property def num_classes(self): return 4 class BACommunityDataset(DGLBuiltinDataset): r"""BA-COMMUNITY dataset from `GNNExplainer: Generating Explanations for Graph Neural Networks `__ This is a synthetic dataset for node classification. It is generated by performing the following steps in order. - Construct a base Barabási–Albert (BA) graph. - Construct a set of five-node house-structured network motifs. - Attach the motifs to randomly selected nodes of the base graph. - Perturb the graph by adding random edges. - Nodes are assigned to 4 classes. Nodes of label 0 belong to the base BA graph. Nodes of label 1, 2, 3 are separately at the middle, bottom, or top of houses. - Generate normally distributed features of length 10 - Repeat the above steps to generate another graph. Its nodes are assigned to class 4, 5, 6, 7. Its node features are generated with a distinct normal distribution. - Join the two graphs by randomly adding edges between them. Parameters ---------- num_base_nodes : int, optional Number of nodes in each base BA graph. Default: 300 num_base_edges_per_node : int, optional Number of edges to attach from a new node to existing nodes in constructing a base BA graph. Default: 4 num_motifs : int, optional Number of house-structured network motifs to use in constructing each graph. Default: 80 perturb_ratio : float, optional Number of random edges to add to a graph in perturbation divided by the number of original edges in it. Default: 0.01 num_inter_edges : int, optional Number of random edges to add between the two graphs. Default: 350 seed : integer, random_state, or None, optional Indicator of random number generation state. Default: None raw_dir : str, optional Raw file directory to store the processed data. Default: ~/.dgl/ force_reload : bool, optional Whether to always generate the data from scratch rather than load a cached version. Default: False verbose : bool, optional Whether to print progress information. Default: True transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Default: None Attributes ---------- num_classes : int Number of node classes Examples -------- >>> from dgl.data import BACommunityDataset >>> dataset = BACommunityDataset() >>> dataset.num_classes 8 >>> g = dataset[0] >>> label = g.ndata['label'] >>> feat = g.ndata['feat'] """ def __init__( self, num_base_nodes=300, num_base_edges_per_node=4, num_motifs=80, perturb_ratio=0.01, num_inter_edges=350, seed=None, raw_dir=None, force_reload=False, verbose=True, transform=None, ): self.num_base_nodes = num_base_nodes self.num_base_edges_per_node = num_base_edges_per_node self.num_motifs = num_motifs self.perturb_ratio = perturb_ratio self.num_inter_edges = num_inter_edges self.seed = seed super(BACommunityDataset, self).__init__( name="BA-COMMUNITY", url=None, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) def process(self): if self.seed is not None: random.seed(self.seed) np.random.seed(self.seed) # Construct two BA-SHAPES graphs g1 = BAShapeDataset( self.num_base_nodes, self.num_base_edges_per_node, self.num_motifs, self.perturb_ratio, force_reload=True, verbose=False, )[0] g2 = BAShapeDataset( self.num_base_nodes, self.num_base_edges_per_node, self.num_motifs, self.perturb_ratio, force_reload=True, verbose=False, )[0] # Join them and randomly add edges between them g = batch([g1, g2]) num_nodes = g.num_nodes() // 2 src = np.random.randint(0, num_nodes, (self.num_inter_edges,)) dst = np.random.randint( num_nodes, 2 * num_nodes, (self.num_inter_edges,) ) src = F.astype(F.zerocopy_from_numpy(src), g.idtype) dst = F.astype(F.zerocopy_from_numpy(dst), g.idtype) g.add_edges(src, dst) g.ndata["label"] = F.cat( [g1.ndata["label"], g2.ndata["label"] + 4], dim=0 ) # feature generation random_mu = [0.0] * 8 random_sigma = [1.0] * 8 mu_1, sigma_1 = np.array([-1.0] * 2 + random_mu), np.array( [0.5] * 2 + random_sigma ) feat1 = np.random.multivariate_normal(mu_1, np.diag(sigma_1), num_nodes) mu_2, sigma_2 = np.array([1.0] * 2 + random_mu), np.array( [0.5] * 2 + random_sigma ) feat2 = np.random.multivariate_normal(mu_2, np.diag(sigma_2), num_nodes) feat = np.concatenate([feat1, feat2]) g.ndata["feat"] = F.zerocopy_from_numpy(feat) self._graph = reorder_graph( g, node_permute_algo="rcmk", edge_permute_algo="dst", store_ids=False, ) @property def graph_path(self): return os.path.join( self.save_path, "{}_dgl_graph.bin".format(self.name) ) def save(self): save_graphs(str(self.graph_path), self._graph) def has_cache(self): return os.path.exists(self.graph_path) def load(self): graphs, _ = load_graphs(str(self.graph_path)) self._graph = graphs[0] def __getitem__(self, idx): assert idx == 0, "This dataset has only one graph." if self._transform is None: return self._graph else: return self._transform(self._graph) def __len__(self): return 1 @property def num_classes(self): return 8 class TreeCycleDataset(DGLBuiltinDataset): r"""TREE-CYCLES dataset from `GNNExplainer: Generating Explanations for Graph Neural Networks `__ This is a synthetic dataset for node classification. It is generated by performing the following steps in order. - Construct a balanced binary tree as the base graph. - Construct a set of cycle motifs. - Attach the motifs to randomly selected nodes of the base graph. - Perturb the graph by adding random edges. - Generate constant feature for all nodes, which is 1. - Nodes in the tree belong to class 0 and nodes in cycles belong to class 1. Parameters ---------- tree_height : int, optional Height of the balanced binary tree. Default: 8 num_motifs : int, optional Number of cycle motifs to use. Default: 60 cycle_size : int, optional Number of nodes in a cycle motif. Default: 6 perturb_ratio : float, optional Number of random edges to add in perturbation divided by the number of original edges in the graph. Default: 0.01 seed : integer, random_state, or None, optional Indicator of random number generation state. Default: None raw_dir : str, optional Raw file directory to store the processed data. Default: ~/.dgl/ force_reload : bool, optional Whether to always generate the data from scratch rather than load a cached version. Default: False verbose : bool, optional Whether to print progress information. Default: True transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Default: None Attributes ---------- num_classes : int Number of node classes Examples -------- >>> from dgl.data import TreeCycleDataset >>> dataset = TreeCycleDataset() >>> dataset.num_classes 2 >>> g = dataset[0] >>> label = g.ndata['label'] >>> feat = g.ndata['feat'] """ def __init__( self, tree_height=8, num_motifs=60, cycle_size=6, perturb_ratio=0.01, seed=None, raw_dir=None, force_reload=False, verbose=True, transform=None, ): self.tree_height = tree_height self.num_motifs = num_motifs self.cycle_size = cycle_size self.perturb_ratio = perturb_ratio self.seed = seed super(TreeCycleDataset, self).__init__( name="TREE-CYCLES", url=None, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) def process(self): if self.seed is not None: np.random.seed(self.seed) g = nx.balanced_tree(r=2, h=self.tree_height) edges = list(g.edges()) src, dst = map(list, zip(*edges)) n = nx.number_of_nodes(g) # Nodes in the base tree graph belong to class 0 node_labels = [0] * n # The motifs will be evenly attached to the nodes in the base graph. spacing = math.floor(n / self.num_motifs) for motif_id in range(self.num_motifs): # Construct a six-node cycle motif_edges = [(n + i, n + i + 1) for i in range(5)] motif_edges.append((n + 5, n)) motif_src, motif_dst = map(list, zip(*motif_edges)) src.extend(motif_src) dst.extend(motif_dst) # Nodes in cycles belong to class 1 node_labels.extend([1] * self.cycle_size) # Attach the motif to the base tree graph anchor = int(motif_id * spacing) src.append(n) dst.append(anchor) if np.random.random() > 0.5: a = np.random.randint(1, 4) b = np.random.randint(1, 4) src.append(n + a) dst.append(anchor + b) n += self.cycle_size g = graph((src, dst), num_nodes=n) # Perturb the graph by adding non-self-loop random edges num_real_edges = g.num_edges() max_ratio = (n * (n - 1) - num_real_edges) / num_real_edges assert ( self.perturb_ratio <= max_ratio ), "perturb_ratio cannot exceed {:.4f}".format(max_ratio) num_random_edges = int(num_real_edges * self.perturb_ratio) for _ in range(num_random_edges): while True: u = np.random.randint(0, n) v = np.random.randint(0, n) if (not g.has_edges_between(u, v)) and (u != v): break g.add_edges(u, v) g.ndata["label"] = F.tensor(node_labels, F.int64) g.ndata["feat"] = F.ones((n, 1), F.float32, F.cpu()) self._graph = reorder_graph( g, node_permute_algo="rcmk", edge_permute_algo="dst", store_ids=False, ) @property def graph_path(self): return os.path.join( self.save_path, "{}_dgl_graph.bin".format(self.name) ) def save(self): save_graphs(str(self.graph_path), self._graph) def has_cache(self): return os.path.exists(self.graph_path) def load(self): graphs, _ = load_graphs(str(self.graph_path)) self._graph = graphs[0] def __getitem__(self, idx): assert idx == 0, "This dataset has only one graph." if self._transform is None: return self._graph else: return self._transform(self._graph) def __len__(self): return 1 @property def num_classes(self): return 2 class TreeGridDataset(DGLBuiltinDataset): r"""TREE-GRIDS dataset from `GNNExplainer: Generating Explanations for Graph Neural Networks `__ This is a synthetic dataset for node classification. It is generated by performing the following steps in order. - Construct a balanced binary tree as the base graph. - Construct a set of n-by-n grid motifs. - Attach the motifs to randomly selected nodes of the base graph. - Perturb the graph by adding random edges. - Generate constant feature for all nodes, which is 1. - Nodes in the tree belong to class 0 and nodes in grids belong to class 1. Parameters ---------- tree_height : int, optional Height of the balanced binary tree. Default: 8 num_motifs : int, optional Number of grid motifs to use. Default: 80 grid_size : int, optional The number of nodes in a grid motif will be grid_size ^ 2. Default: 3 perturb_ratio : float, optional Number of random edges to add in perturbation divided by the number of original edges in the graph. Default: 0.1 seed : integer, random_state, or None, optional Indicator of random number generation state. Default: None raw_dir : str, optional Raw file directory to store the processed data. Default: ~/.dgl/ force_reload : bool, optional Whether to always generate the data from scratch rather than load a cached version. Default: False verbose : bool, optional Whether to print progress information. Default: True transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Default: None Attributes ---------- num_classes : int Number of node classes Examples -------- >>> from dgl.data import TreeGridDataset >>> dataset = TreeGridDataset() >>> dataset.num_classes 2 >>> g = dataset[0] >>> label = g.ndata['label'] >>> feat = g.ndata['feat'] """ def __init__( self, tree_height=8, num_motifs=80, grid_size=3, perturb_ratio=0.1, seed=None, raw_dir=None, force_reload=False, verbose=True, transform=None, ): self.tree_height = tree_height self.num_motifs = num_motifs self.grid_size = grid_size self.perturb_ratio = perturb_ratio self.seed = seed super(TreeGridDataset, self).__init__( name="TREE-GRIDS", url=None, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) def process(self): if self.seed is not None: np.random.seed(self.seed) g = nx.balanced_tree(r=2, h=self.tree_height) edges = list(g.edges()) src, dst = map(list, zip(*edges)) n = nx.number_of_nodes(g) # Nodes in the base tree graph belong to class 0 node_labels = [0] * n # The motifs will be evenly attached to the nodes in the base graph. spacing = math.floor(n / self.num_motifs) # Construct an n-by-n grid motif_g = nx.grid_graph([self.grid_size, self.grid_size]) grid_size = nx.number_of_nodes(motif_g) motif_g = nx.convert_node_labels_to_integers(motif_g, first_label=0) motif_edges = list(motif_g.edges()) motif_src, motif_dst = map(list, zip(*motif_edges)) motif_src, motif_dst = np.array(motif_src), np.array(motif_dst) for motif_id in range(self.num_motifs): src.extend((motif_src + n).tolist()) dst.extend((motif_dst + n).tolist()) # Nodes in grids belong to class 1 node_labels.extend([1] * grid_size) # Attach the motif to the base tree graph src.append(n) dst.append(int(motif_id * spacing)) n += grid_size g = graph((src, dst), num_nodes=n) # Perturb the graph by adding non-self-loop random edges num_real_edges = g.num_edges() max_ratio = (n * (n - 1) - num_real_edges) / num_real_edges assert ( self.perturb_ratio <= max_ratio ), "perturb_ratio cannot exceed {:.4f}".format(max_ratio) num_random_edges = int(num_real_edges * self.perturb_ratio) for _ in range(num_random_edges): while True: u = np.random.randint(0, n) v = np.random.randint(0, n) if (not g.has_edges_between(u, v)) and (u != v): break g.add_edges(u, v) g.ndata["label"] = F.tensor(node_labels, F.int64) g.ndata["feat"] = F.ones((n, 1), F.float32, F.cpu()) self._graph = reorder_graph( g, node_permute_algo="rcmk", edge_permute_algo="dst", store_ids=False, ) @property def graph_path(self): return os.path.join( self.save_path, "{}_dgl_graph.bin".format(self.name) ) def save(self): save_graphs(str(self.graph_path), self._graph) def has_cache(self): return os.path.exists(self.graph_path) def load(self): graphs, _ = load_graphs(str(self.graph_path)) self._graph = graphs[0] def __getitem__(self, idx): assert idx == 0, "This dataset has only one graph." if self._transform is None: return self._graph else: return self._transform(self._graph) def __len__(self): return 1 @property def num_classes(self): return 2 class BA2MotifDataset(DGLBuiltinDataset): r"""BA-2motifs dataset from `Parameterized Explainer for Graph Neural Network `__ This is a synthetic dataset for graph classification. It was generated by performing the following steps in order. - Construct 1000 base Barabási–Albert (BA) graphs. - Attach house-structured network motifs to half of the base BA graphs. - Attach five-node cycle motifs to the rest base BA graphs. - Assign each graph to one of two classes according to the type of the attached motif. Parameters ---------- raw_dir : str, optional Raw file directory to download and store the data. Default: ~/.dgl/ force_reload : bool, optional Whether to reload the dataset. Default: False verbose : bool, optional Whether to print progress information. Default: True transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Default: None Attributes ---------- num_classes : int Number of graph classes Examples -------- >>> from dgl.data import BA2MotifDataset >>> dataset = BA2MotifDataset() >>> dataset.num_classes 2 >>> # Get the first graph and its label >>> g, label = dataset[0] >>> feat = g.ndata['feat'] """ def __init__( self, raw_dir=None, force_reload=False, verbose=True, transform=None ): super(BA2MotifDataset, self).__init__( name="BA-2motifs", url=_get_dgl_url("dataset/BA-2motif.pkl"), raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) def download(self): r"""Automatically download data.""" file_path = os.path.join(self.raw_dir, self.name + ".pkl") download(self.url, path=file_path) def process(self): file_path = os.path.join(self.raw_dir, self.name + ".pkl") with open(file_path, "rb") as f: adjs, features, labels = pickle.load(f) self.graphs = [] self.labels = F.tensor(labels, F.int64) for i in range(len(adjs)): g = graph(adjs[i].nonzero()) g.ndata["feat"] = F.zerocopy_from_numpy(features[i]) self.graphs.append(g) @property def graph_path(self): return os.path.join( self.save_path, "{}_dgl_graph.bin".format(self.name) ) def save(self): label_dict = {"labels": self.labels} save_graphs(str(self.graph_path), self.graphs, label_dict) def has_cache(self): return os.path.exists(self.graph_path) def load(self): self.graphs, label_dict = load_graphs(str(self.graph_path)) self.labels = label_dict["labels"] def __getitem__(self, idx): g = self.graphs[idx] if self._transform is not None: g = self._transform(g) return g, self.labels[idx] def __len__(self): return len(self.graphs) @property def num_classes(self): return 2 ================================================ FILE: python/dgl/data/tensor_serialize.py ================================================ """For Tensor Serialization""" from __future__ import absolute_import from .. import backend as F from .._ffi.function import _init_api from ..ndarray import NDArray __all__ = ["save_tensors", "load_tensors"] _init_api("dgl.data.tensor_serialize") def save_tensors(filename, tensor_dict): """ Save dict of tensors to file Parameters ---------- filename : str File name to store dict of tensors. tensor_dict: dict of dgl NDArray or backend tensor Python dict using string as key and tensor as value Returns ---------- status : bool Return whether save operation succeeds """ nd_dict = {} is_empty_dict = len(tensor_dict) == 0 for key, value in tensor_dict.items(): if not isinstance(key, str): raise Exception("Dict key has to be str") if F.is_tensor(value): nd_dict[key] = F.zerocopy_to_dgl_ndarray(value) elif isinstance(value, NDArray): nd_dict[key] = value else: raise Exception( "Dict value has to be backend tensor or dgl ndarray" ) return _CAPI_SaveNDArrayDict(filename, nd_dict, is_empty_dict) def load_tensors(filename, return_dgl_ndarray=False): """ load dict of tensors from file Parameters ---------- filename : str File name to load dict of tensors. return_dgl_ndarray: bool Whether return dict of dgl NDArrays or backend tensors Returns --------- tensor_dict : dict dict of tensor or ndarray based on return_dgl_ndarray flag """ nd_dict = _CAPI_LoadNDArrayDict(filename) tensor_dict = {} for key, value in nd_dict.items(): if return_dgl_ndarray: tensor_dict[key] = value else: tensor_dict[key] = F.zerocopy_from_dgl_ndarray(value) return tensor_dict ================================================ FILE: python/dgl/data/tree.py ================================================ """Tree-structured data. Including: - Stanford Sentiment Treebank """ from __future__ import absolute_import import os from collections import OrderedDict import networkx as nx import numpy as np from .. import backend as F from ..convert import from_networkx from .dgl_dataset import DGLBuiltinDataset from .utils import ( _get_dgl_url, deprecate_property, load_graphs, load_info, save_graphs, save_info, ) __all__ = ["SST", "SSTDataset"] class SSTDataset(DGLBuiltinDataset): r"""Stanford Sentiment Treebank dataset. Each sample is the constituency tree of a sentence. The leaf nodes represent words. The word is a int value stored in the ``x`` feature field. The non-leaf node has a special value ``PAD_WORD`` in the ``x`` field. Each node also has a sentiment annotation: 5 classes (very negative, negative, neutral, positive and very positive). The sentiment label is a int value stored in the ``y`` feature field. Official site: ``_ Statistics: - Train examples: 8,544 - Dev examples: 1,101 - Test examples: 2,210 - Number of classes for each node: 5 Parameters ---------- mode : str, optional Should be one of ['train', 'dev', 'test', 'tiny'] Default: train glove_embed_file : str, optional The path to pretrained glove embedding file. Default: None vocab_file : str, optional Optional vocabulary file. If not given, the default vacabulary file is used. Default: None raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: True. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- vocab : OrderedDict Vocabulary of the dataset num_classes : int Number of classes for each node pretrained_emb: Tensor Pretrained glove embedding with respect the vocabulary. vocab_size : int The size of the vocabulary Notes ----- All the samples will be loaded and preprocessed in the memory first. Examples -------- >>> # get dataset >>> train_data = SSTDataset() >>> dev_data = SSTDataset(mode='dev') >>> test_data = SSTDataset(mode='test') >>> tiny_data = SSTDataset(mode='tiny') >>> >>> len(train_data) 8544 >>> train_data.num_classes 5 >>> glove_embed = train_data.pretrained_emb >>> train_data.vocab_size 19536 >>> train_data[0] Graph(num_nodes=71, num_edges=70, ndata_schemes={'x': Scheme(shape=(), dtype=torch.int64), 'y': Scheme(shape=(), dtype=torch.int64), 'mask': Scheme(shape=(), dtype=torch.int64)} edata_schemes={}) >>> for tree in train_data: ... input_ids = tree.ndata['x'] ... labels = tree.ndata['y'] ... mask = tree.ndata['mask'] ... # your code here """ PAD_WORD = -1 # special pad word id UNK_WORD = -1 # out-of-vocabulary word id def __init__( self, mode="train", glove_embed_file=None, vocab_file=None, raw_dir=None, force_reload=False, verbose=False, transform=None, ): assert mode in ["train", "dev", "test", "tiny"] _url = _get_dgl_url("dataset/sst.zip") self._glove_embed_file = glove_embed_file if mode == "train" else None self.mode = mode self._vocab_file = vocab_file super(SSTDataset, self).__init__( name="sst", url=_url, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) def process(self): from nltk.corpus.reader import BracketParseCorpusReader # load vocab file self._vocab = OrderedDict() vocab_file = ( self._vocab_file if self._vocab_file is not None else os.path.join(self.raw_path, "vocab.txt") ) with open(vocab_file, encoding="utf-8") as vf: for line in vf.readlines(): line = line.strip() self._vocab[line] = len(self._vocab) # filter glove if self._glove_embed_file is not None and os.path.exists( self._glove_embed_file ): glove_emb = {} with open(self._glove_embed_file, "r", encoding="utf-8") as pf: for line in pf.readlines(): sp = line.split(" ") if sp[0].lower() in self._vocab: glove_emb[sp[0].lower()] = np.asarray( [float(x) for x in sp[1:]] ) files = ["{}.txt".format(self.mode)] corpus = BracketParseCorpusReader(self.raw_path, files) sents = corpus.parsed_sents(files[0]) # initialize with glove pretrained_emb = [] fail_cnt = 0 for line in self._vocab.keys(): if self._glove_embed_file is not None and os.path.exists( self._glove_embed_file ): if not line.lower() in glove_emb: fail_cnt += 1 pretrained_emb.append( glove_emb.get( line.lower(), np.random.uniform(-0.05, 0.05, 300) ) ) self._pretrained_emb = None if self._glove_embed_file is not None and os.path.exists( self._glove_embed_file ): self._pretrained_emb = F.tensor(np.stack(pretrained_emb, 0)) print( "Miss word in GloVe {0:.4f}".format( 1.0 * fail_cnt / len(self._pretrained_emb) ) ) # build trees self._trees = [] for sent in sents: self._trees.append(self._build_tree(sent)) def _build_tree(self, root): g = nx.DiGraph() def _rec_build(nid, node): for child in node: cid = g.number_of_nodes() if isinstance(child[0], str) or isinstance(child[0], bytes): # leaf node word = self.vocab.get(child[0].lower(), self.UNK_WORD) g.add_node(cid, x=word, y=int(child.label()), mask=1) else: g.add_node( cid, x=SSTDataset.PAD_WORD, y=int(child.label()), mask=0 ) _rec_build(cid, child) g.add_edge(cid, nid) # add root g.add_node(0, x=SSTDataset.PAD_WORD, y=int(root.label()), mask=0) _rec_build(0, root) ret = from_networkx(g, node_attrs=["x", "y", "mask"]) return ret @property def graph_path(self): return os.path.join(self.save_path, self.mode + "_dgl_graph.bin") @property def vocab_path(self): return os.path.join(self.save_path, "vocab.pkl") def has_cache(self): return os.path.exists(self.graph_path) and os.path.exists( self.vocab_path ) def save(self): save_graphs(self.graph_path, self._trees) save_info(self.vocab_path, {"vocab": self.vocab}) if self.pretrained_emb: emb_path = os.path.join(self.save_path, "emb.pkl") save_info(emb_path, {"embed": self.pretrained_emb}) def load(self): emb_path = os.path.join(self.save_path, "emb.pkl") self._trees = load_graphs(self.graph_path)[0] self._vocab = load_info(self.vocab_path)["vocab"] self._pretrained_emb = None if os.path.exists(emb_path): self._pretrained_emb = load_info(emb_path)["embed"] @property def vocab(self): r"""Vocabulary Returns ------- OrderedDict """ return self._vocab @property def pretrained_emb(self): r"""Pre-trained word embedding, if given.""" return self._pretrained_emb def __getitem__(self, idx): r"""Get graph by index Parameters ---------- idx : int Returns ------- :class:`dgl.DGLGraph` graph structure, word id for each node, node labels and masks. - ``ndata['x']``: word id of the node - ``ndata['y']:`` label of the node - ``ndata['mask']``: 1 if the node is a leaf, otherwise 0 """ if self._transform is None: return self._trees[idx] else: return self._transform(self._trees[idx]) def __len__(self): r"""Number of graphs in the dataset.""" return len(self._trees) @property def vocab_size(self): r"""Vocabulary size.""" return len(self._vocab) @property def num_classes(self): r"""Number of classes for each node.""" return 5 SST = SSTDataset ================================================ FILE: python/dgl/data/tu.py ================================================ from __future__ import absolute_import import os import numpy as np from .. import backend as F from ..convert import graph as dgl_graph from .dgl_dataset import DGLBuiltinDataset from .utils import load_graphs, load_info, loadtxt, save_graphs, save_info class LegacyTUDataset(DGLBuiltinDataset): r"""LegacyTUDataset contains lots of graph kernel datasets for graph classification. Parameters ---------- name : str Dataset Name, such as ``ENZYMES``, ``DD``, ``COLLAB``, ``MUTAG``, can be the datasets name on ``_. use_pandas : bool Numpy's file read function has performance issue when file is large, using pandas can be faster. Default: False hidden_size : int Some dataset doesn't contain features. Use constant node features initialization instead, with hidden size as ``hidden_size``. Default : 10 max_allow_node : int Remove graphs that contains more nodes than ``max_allow_node``. Default : None transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- max_num_node : int Maximum number of nodes num_classes : int Number of classes num_labels : numpy.int64 (DEPRECATED, use num_classes instead) Number of classes Notes ----- LegacyTUDataset uses provided node feature by default. If no feature provided, it uses one-hot node label instead. If neither labels provided, it uses constant for node feature. The dataset sorts graphs by their labels. Shuffle is preferred before manual train/val split. Examples -------- >>> data = LegacyTUDataset('DD') The dataset instance is an iterable >>> len(data) 1178 >>> g, label = data[1024] >>> g Graph(num_nodes=88, num_edges=410, ndata_schemes={'feat': Scheme(shape=(89,), dtype=torch.float32), '_ID': Scheme(shape=(), dtype=torch.int64)} edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}) >>> label tensor(1) Batch the graphs and labels for mini-batch training >>> graphs, labels = zip(*[data[i] for i in range(16)]) >>> batched_graphs = dgl.batch(graphs) >>> batched_labels = torch.tensor(labels) >>> batched_graphs Graph(num_nodes=9539, num_edges=47382, ndata_schemes={'feat': Scheme(shape=(89,), dtype=torch.float32), '_ID': Scheme(shape=(), dtype=torch.int64)} edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}) """ _url = r"https://www.chrsmrrs.com/graphkerneldatasets/{}.zip" def __init__( self, name, use_pandas=False, hidden_size=10, max_allow_node=None, raw_dir=None, force_reload=False, verbose=False, transform=None, ): url = self._url.format(name) self.hidden_size = hidden_size self.max_allow_node = max_allow_node self.use_pandas = use_pandas super(LegacyTUDataset, self).__init__( name=name, url=url, raw_dir=raw_dir, hash_key=(name, use_pandas, hidden_size, max_allow_node), force_reload=force_reload, verbose=verbose, transform=transform, ) def process(self): self.data_mode = None if self.use_pandas: import pandas as pd DS_edge_list = self._idx_from_zero( pd.read_csv( self._file_path("A"), delimiter=",", dtype=int, header=None ).values ) else: DS_edge_list = self._idx_from_zero( np.genfromtxt(self._file_path("A"), delimiter=",", dtype=int) ) DS_indicator = self._idx_from_zero( np.genfromtxt(self._file_path("graph_indicator"), dtype=int) ) if os.path.exists(self._file_path("graph_labels")): DS_graph_labels = self._idx_from_zero( np.genfromtxt(self._file_path("graph_labels"), dtype=int) ) self.num_labels = max(DS_graph_labels) + 1 self.graph_labels = DS_graph_labels elif os.path.exists(self._file_path("graph_attributes")): DS_graph_labels = np.genfromtxt( self._file_path("graph_attributes"), dtype=float ) self.num_labels = None self.graph_labels = DS_graph_labels else: raise Exception("Unknown graph label or graph attributes") g = dgl_graph(([], [])) g.add_nodes(int(DS_edge_list.max()) + 1) g.add_edges(DS_edge_list[:, 0], DS_edge_list[:, 1]) node_idx_list = [] self.max_num_node = 0 for idx in range(np.max(DS_indicator) + 1): node_idx = np.where(DS_indicator == idx) node_idx_list.append(node_idx[0]) if len(node_idx[0]) > self.max_num_node: self.max_num_node = len(node_idx[0]) self.graph_lists = [g.subgraph(node_idx) for node_idx in node_idx_list] try: DS_node_labels = self._idx_from_zero( np.loadtxt(self._file_path("node_labels"), dtype=int) ) g.ndata["node_label"] = F.tensor(DS_node_labels) one_hot_node_labels = self._to_onehot(DS_node_labels) for idxs, g in zip(node_idx_list, self.graph_lists): g.ndata["feat"] = F.tensor( one_hot_node_labels[idxs, :], F.float32 ) self.data_mode = "node_label" except IOError: print("No Node Label Data") try: DS_node_attr = np.loadtxt( self._file_path("node_attributes"), delimiter="," ) if DS_node_attr.ndim == 1: DS_node_attr = np.expand_dims(DS_node_attr, -1) for idxs, g in zip(node_idx_list, self.graph_lists): g.ndata["feat"] = F.tensor(DS_node_attr[idxs, :], F.float32) self.data_mode = "node_attr" except IOError: print("No Node Attribute Data") if "feat" not in g.ndata.keys(): for idxs, g in zip(node_idx_list, self.graph_lists): g.ndata["feat"] = F.ones( (g.num_nodes(), self.hidden_size), F.float32, F.cpu() ) self.data_mode = "constant" if self.verbose: print( "Use Constant one as Feature with hidden size {}".format( self.hidden_size ) ) # remove graphs that are too large by user given standard # optional pre-processing steop in conformity with Rex Ying's original # DiffPool implementation if self.max_allow_node: preserve_idx = [] if self.verbose: print("original dataset length : ", len(self.graph_lists)) for i, g in enumerate(self.graph_lists): if g.num_nodes() <= self.max_allow_node: preserve_idx.append(i) self.graph_lists = [self.graph_lists[i] for i in preserve_idx] if self.verbose: print( "after pruning graphs that are too big : ", len(self.graph_lists), ) self.graph_labels = [self.graph_labels[i] for i in preserve_idx] self.max_num_node = self.max_allow_node self.graph_labels = F.tensor(self.graph_labels) def save(self): label_dict = {"labels": self.graph_labels} info_dict = { "max_num_node": self.max_num_node, "num_labels": self.num_labels, } save_graphs(str(self.graph_path), self.graph_lists, label_dict) save_info(str(self.info_path), info_dict) def load(self): graphs, label_dict = load_graphs(str(self.graph_path)) info_dict = load_info(str(self.info_path)) self.graph_lists = graphs self.graph_labels = label_dict["labels"] self.max_num_node = info_dict["max_num_node"] self.num_labels = info_dict["num_labels"] @property def graph_path(self): return os.path.join( self.save_path, "legacy_tu_{}_{}.bin".format(self.name, self.hash) ) @property def info_path(self): return os.path.join( self.save_path, "legacy_tu_{}_{}.pkl".format(self.name, self.hash) ) def has_cache(self): if os.path.exists(self.graph_path) and os.path.exists(self.info_path): return True return False def __getitem__(self, idx): """Get the idx-th sample. Parameters --------- idx : int The sample index. Returns ------- (:class:`dgl.DGLGraph`, Tensor) Graph with node feature stored in ``feat`` field and node label in ``node_label`` if available. And its label. """ g = self.graph_lists[idx] if self._transform is not None: g = self._transform(g) return g, self.graph_labels[idx] def __len__(self): """Return the number of graphs in the dataset.""" return len(self.graph_lists) def _file_path(self, category): return os.path.join( self.raw_path, self.name, "{}_{}.txt".format(self.name, category) ) @staticmethod def _idx_from_zero(idx_tensor): return idx_tensor - np.min(idx_tensor) @staticmethod def _to_onehot(label_tensor): label_num = label_tensor.shape[0] assert np.min(label_tensor) == 0 one_hot_tensor = np.zeros((label_num, np.max(label_tensor) + 1)) one_hot_tensor[np.arange(label_num), label_tensor] = 1 return one_hot_tensor def statistics(self): return ( self.graph_lists[0].ndata["feat"].shape[1], self.num_labels, self.max_num_node, ) @property def num_classes(self): return int(self.num_labels) class TUDataset(DGLBuiltinDataset): r""" TUDataset contains lots of graph kernel datasets for graph classification. Parameters ---------- name : str Dataset Name, such as ``ENZYMES``, ``DD``, ``COLLAB``, ``MUTAG``, can be the datasets name on ``_. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- max_num_node : int Maximum number of nodes num_classes : int Number of classes num_labels : int (DEPRECATED, use num_classes instead) Number of classes Notes ----- **IMPORTANT:** Some of the datasets have duplicate edges exist in the graphs, e.g. the edges in ``IMDB-BINARY`` are all duplicated. DGL faithfully keeps the duplicates as per the original data. Other frameworks such as PyTorch Geometric removes the duplicates by default. You can remove the duplicate edges with :func:`dgl.to_simple`. Graphs may have node labels, node attributes, edge labels, and edge attributes, varing from different dataset. Labels are mapped to :math:`\lbrace 0,\cdots,n-1 \rbrace` where :math:`n` is the number of labels (some datasets have raw labels :math:`\lbrace -1, 1 \rbrace` which will be mapped to :math:`\lbrace 0, 1 \rbrace`). In previous versions, the minimum label was added so that :math:`\lbrace -1, 1 \rbrace` was mapped to :math:`\lbrace 0, 2 \rbrace`. The dataset sorts graphs by their labels. Shuffle is preferred before manual train/val split. Examples -------- >>> data = TUDataset('DD') The dataset instance is an iterable >>> len(data) 1178 >>> g, label = data[1024] >>> g Graph(num_nodes=88, num_edges=410, ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64), 'node_labels': Scheme(shape=(1,), dtype=torch.int64)} edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}) >>> label tensor([1]) Batch the graphs and labels for mini-batch training >>> graphs, labels = zip(*[data[i] for i in range(16)]) >>> batched_graphs = dgl.batch(graphs) >>> batched_labels = torch.tensor(labels) >>> batched_graphs Graph(num_nodes=9539, num_edges=47382, ndata_schemes={'node_labels': Scheme(shape=(1,), dtype=torch.int64), '_ID': Scheme(shape=(), dtype=torch.int64)} edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}) """ _url = r"https://www.chrsmrrs.com/graphkerneldatasets/{}.zip" def __init__( self, name, raw_dir=None, force_reload=False, verbose=False, transform=None, ): url = self._url.format(name) super(TUDataset, self).__init__( name=name, url=url, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) def process(self): DS_edge_list = self._idx_from_zero( loadtxt(self._file_path("A"), delimiter=",").astype(int) ) DS_indicator = self._idx_from_zero( loadtxt(self._file_path("graph_indicator"), delimiter=",").astype( int ) ) if os.path.exists(self._file_path("graph_labels")): DS_graph_labels = self._idx_reset( loadtxt(self._file_path("graph_labels"), delimiter=",").astype( int ) ) self.num_labels = int(max(DS_graph_labels) + 1) self.graph_labels = F.tensor(DS_graph_labels) elif os.path.exists(self._file_path("graph_attributes")): DS_graph_labels = loadtxt( self._file_path("graph_attributes"), delimiter="," ).astype(float) self.num_labels = None self.graph_labels = F.tensor(DS_graph_labels) else: raise Exception("Unknown graph label or graph attributes") g = dgl_graph(([], [])) g.add_nodes(int(DS_edge_list.max()) + 1) g.add_edges(DS_edge_list[:, 0], DS_edge_list[:, 1]) node_idx_list = [] self.max_num_node = 0 for idx in range(np.max(DS_indicator) + 1): node_idx = np.where(DS_indicator == idx) node_idx_list.append(node_idx[0]) if len(node_idx[0]) > self.max_num_node: self.max_num_node = len(node_idx[0]) self.attr_dict = { "node_labels": ("ndata", "node_labels"), "node_attributes": ("ndata", "node_attr"), "edge_labels": ("edata", "edge_labels"), "edge_attributes": ("edata", "node_labels"), } for filename, field_name in self.attr_dict.items(): try: data = loadtxt(self._file_path(filename), delimiter=",") if "label" in filename: data = F.tensor(self._idx_from_zero(data)) else: data = F.tensor(data) getattr(g, field_name[0])[field_name[1]] = data except IOError: pass self.graph_lists = [g.subgraph(node_idx) for node_idx in node_idx_list] @property def graph_path(self): return os.path.join(self.save_path, "tu_{}.bin".format(self.name)) @property def info_path(self): return os.path.join(self.save_path, "tu_{}.pkl".format(self.name)) def save(self): label_dict = {"labels": self.graph_labels} info_dict = { "max_num_node": self.max_num_node, "num_labels": self.num_labels, } save_graphs(str(self.graph_path), self.graph_lists, label_dict) save_info(str(self.info_path), info_dict) def load(self): graphs, label_dict = load_graphs(str(self.graph_path)) info_dict = load_info(str(self.info_path)) self.graph_lists = graphs self.graph_labels = label_dict["labels"] self.max_num_node = info_dict["max_num_node"] self.num_labels = info_dict["num_labels"] def has_cache(self): if os.path.exists(self.graph_path) and os.path.exists(self.info_path): return True return False def __getitem__(self, idx): """Get the idx-th sample. Parameters --------- idx : int The sample index. Returns ------- (:class:`dgl.DGLGraph`, Tensor) Graph with node feature stored in ``feat`` field and node label in ``node_labels`` if available. And its label. """ g = self.graph_lists[idx] if self._transform is not None: g = self._transform(g) return g, self.graph_labels[idx] def __len__(self): """Return the number of graphs in the dataset.""" return len(self.graph_lists) def _file_path(self, category): return os.path.join( self.raw_path, self.name, "{}_{}.txt".format(self.name, category) ) @staticmethod def _idx_from_zero(idx_tensor): return idx_tensor - np.min(idx_tensor) @staticmethod def _idx_reset(idx_tensor): """Maps n unique labels to {0, ..., n-1} in an ordered fashion.""" labels = np.unique(idx_tensor) relabel_map = {x: i for i, x in enumerate(labels)} new_idx_tensor = np.vectorize(relabel_map.get)(idx_tensor) return new_idx_tensor def statistics(self): return ( self.graph_lists[0].ndata["feat"].shape[1], self.num_labels, self.max_num_node, ) @property def num_classes(self): return self.num_labels ================================================ FILE: python/dgl/data/utils.py ================================================ """Dataset utilities.""" from __future__ import absolute_import import errno import hashlib import os import pickle import sys import warnings import networkx.algorithms as A import numpy as np import requests from tqdm.auto import tqdm from .. import backend as F from .graph_serialize import load_graphs, load_labels, save_graphs from .tensor_serialize import load_tensors, save_tensors __all__ = [ "loadtxt", "download", "check_sha1", "extract_archive", "get_download_dir", "Subset", "split_dataset", "save_graphs", "load_graphs", "load_labels", "save_tensors", "load_tensors", "add_nodepred_split", "add_node_property_split", "mask_nodes_by_property", ] def loadtxt(path, delimiter, dtype=None): try: import pandas as pd df = pd.read_csv(path, delimiter=delimiter, header=None) return df.values except ImportError: warnings.warn( "Pandas is not installed, now using numpy.loadtxt to load data, " "which could be extremely slow. Accelerate by installing pandas" ) return np.loadtxt(path, delimiter=delimiter) def _get_dgl_url(file_url): """Get DGL online url for download.""" dgl_repo_url = "https://data.dgl.ai/" repo_url = os.environ.get("DGL_REPO", dgl_repo_url) if repo_url[-1] != "/": repo_url = repo_url + "/" return repo_url + file_url def split_dataset(dataset, frac_list=None, shuffle=False, random_state=None): """Split dataset into training, validation and test set. Parameters ---------- dataset We assume ``len(dataset)`` gives the number of datapoints and ``dataset[i]`` gives the ith datapoint. frac_list : list or None, optional A list of length 3 containing the fraction to use for training, validation and test. If None, we will use [0.8, 0.1, 0.1]. shuffle : bool, optional By default we perform a consecutive split of the dataset. If True, we will first randomly shuffle the dataset. random_state : None, int or array_like, optional Random seed used to initialize the pseudo-random number generator. Can be any integer between 0 and 2**32 - 1 inclusive, an array (or other sequence) of such integers, or None (the default). If seed is None, then RandomState will try to read data from /dev/urandom (or the Windows analogue) if available or seed from the clock otherwise. Returns ------- list of length 3 Subsets for training, validation and test. """ from itertools import accumulate if frac_list is None: frac_list = [0.8, 0.1, 0.1] frac_list = np.asarray(frac_list) assert np.allclose( np.sum(frac_list), 1.0 ), "Expect frac_list sum to 1, got {:.4f}".format(np.sum(frac_list)) num_data = len(dataset) lengths = (num_data * frac_list).astype(int) lengths[-1] = num_data - np.sum(lengths[:-1]) if shuffle: indices = np.random.RandomState(seed=random_state).permutation(num_data) else: indices = np.arange(num_data) return [ Subset(dataset, indices[offset - length : offset]) for offset, length in zip(accumulate(lengths), lengths) ] def download( url, path=None, overwrite=True, sha1_hash=None, retries=5, verify_ssl=True, log=True, ): """Download a given URL. Codes borrowed from mxnet/gluon/utils.py Parameters ---------- url : str URL to download. path : str, optional Destination path to store downloaded file. By default stores to the current directory with the same name as in url. overwrite : bool, optional Whether to overwrite the destination file if it already exists. By default always overwrites the downloaded file. sha1_hash : str, optional Expected sha1 hash in hexadecimal digits. Will ignore existing file when hash is specified but doesn't match. retries : integer, default 5 The number of times to attempt downloading in case of failure or non 200 return codes. verify_ssl : bool, default True Verify SSL certificates. log : bool, default True Whether to print the progress for download Returns ------- str The file path of the downloaded file. """ if path is None: fname = url.split("/")[-1] # Empty filenames are invalid assert fname, ( "Can't construct file-name from this URL. " "Please set the `path` option manually." ) else: path = os.path.expanduser(path) if os.path.isdir(path): fname = os.path.join(path, url.split("/")[-1]) else: fname = path assert retries >= 0, "Number of retries should be at least 0" if not verify_ssl: warnings.warn( "Unverified HTTPS request is being made (verify_ssl=False). " "Adding certificate verification is strongly advised." ) if ( overwrite or not os.path.exists(fname) or (sha1_hash and not check_sha1(fname, sha1_hash)) ): dirname = os.path.dirname(os.path.abspath(os.path.expanduser(fname))) if not os.path.exists(dirname): os.makedirs(dirname) while retries + 1 > 0: # Disable pyling too broad Exception # pylint: disable=W0703 try: if log: print("Downloading %s from %s..." % (fname, url)) r = requests.get(url, stream=True, verify=verify_ssl) if r.status_code != 200: raise RuntimeError("Failed downloading url %s" % url) # Get the total file size. total_size = int(r.headers.get("content-length", 0)) with tqdm( total=total_size, unit="B", unit_scale=True, desc=fname ) as bar: with open(fname, "wb") as f: for chunk in r.iter_content(chunk_size=1024): if chunk: # filter out keep-alive new chunks f.write(chunk) bar.update(len(chunk)) if sha1_hash and not check_sha1(fname, sha1_hash): raise UserWarning( "File {} is downloaded but the content hash does not match." " The repo may be outdated or download may be incomplete. " 'If the "repo_url" is overridden, consider switching to ' "the default repo.".format(fname) ) break except Exception as e: retries -= 1 if retries <= 0: raise e else: if log: print( "download failed, retrying, {} attempt{} left".format( retries, "s" if retries > 1 else "" ) ) return fname def check_sha1(filename, sha1_hash): """Check whether the sha1 hash of the file content matches the expected hash. Codes borrowed from mxnet/gluon/utils.py Parameters ---------- filename : str Path to the file. sha1_hash : str Expected sha1 hash in hexadecimal digits. Returns ------- bool Whether the file content matches the expected hash. """ sha1 = hashlib.sha1() with open(filename, "rb") as f: while True: data = f.read(1048576) if not data: break sha1.update(data) return sha1.hexdigest() == sha1_hash def extract_archive(file, target_dir, overwrite=True): """Extract archive file. Parameters ---------- file : str Absolute path of the archive file. target_dir : str Target directory of the archive to be uncompressed. overwrite : bool, default True Whether to overwrite the contents inside the directory. By default always overwrites. """ if os.path.exists(target_dir) and not overwrite: return print("Extracting file to {}".format(target_dir)) if ( file.endswith(".tar.gz") or file.endswith(".tar") or file.endswith(".tgz") ): import tarfile with tarfile.open(file, "r") as archive: def is_within_directory(directory, target): abs_directory = os.path.abspath(directory) abs_target = os.path.abspath(target) prefix = os.path.commonprefix([abs_directory, abs_target]) return prefix == abs_directory def safe_extract( tar, path=".", members=None, *, numeric_owner=False ): for member in tar.getmembers(): member_path = os.path.join(path, member.name) if not is_within_directory(path, member_path): raise Exception("Attempted Path Traversal in Tar File") tar.extractall(path, members, numeric_owner=numeric_owner) safe_extract(archive, path=target_dir) elif file.endswith(".gz"): import gzip import shutil with gzip.open(file, "rb") as f_in: target_file = os.path.join(target_dir, os.path.basename(file)[:-3]) with open(target_file, "wb") as f_out: shutil.copyfileobj(f_in, f_out) elif file.endswith(".zip"): import zipfile with zipfile.ZipFile(file, "r") as archive: archive.extractall(path=target_dir) else: raise Exception("Unrecognized file type: " + file) def get_download_dir(): """Get the absolute path to the download directory. Returns ------- dirname : str Path to the download directory """ default_dir = os.path.join(os.path.expanduser("~"), ".dgl") dirname = os.environ.get("DGL_DOWNLOAD_DIR", default_dir) if not os.path.exists(dirname): os.makedirs(dirname) return dirname def makedirs(path): try: os.makedirs(os.path.expanduser(os.path.normpath(path))) except OSError as e: if e.errno != errno.EEXIST and os.path.isdir(path): raise e def save_info(path, info): """Save dataset related information into disk. Parameters ---------- path : str File to save information. info : dict A python dict storing information to save on disk. """ with open(path, "wb") as pf: pickle.dump(info, pf) def load_info(path): """Load dataset related information from disk. Parameters ---------- path : str File to load information from. Returns ------- info : dict A python dict storing information loaded from disk. """ with open(path, "rb") as pf: info = pickle.load(pf) return info def deprecate_property(old, new): warnings.warn( "Property {} will be deprecated, please use {} instead.".format( old, new ) ) def deprecate_function(old, new): warnings.warn( "Function {} will be deprecated, please use {} instead.".format( old, new ) ) def deprecate_class(old, new): warnings.warn( "Class {} will be deprecated, please use {} instead.".format(old, new) ) def idx2mask(idx, len): """Create mask.""" mask = np.zeros(len) mask[idx] = 1 return mask def generate_mask_tensor(mask): """Generate mask tensor according to different backend For torch and tensorflow, it will create a bool tensor For mxnet, it will create a float tensor Parameters ---------- mask: numpy ndarray input mask tensor """ assert isinstance(mask, np.ndarray), ( "input for generate_mask_tensor" "should be an numpy ndarray" ) if F.backend_name == "mxnet": return F.tensor(mask, dtype=F.data_type_dict["float32"]) else: return F.tensor(mask, dtype=F.data_type_dict["bool"]) class Subset(object): """Subset of a dataset at specified indices Code adapted from PyTorch. Parameters ---------- dataset dataset[i] should return the ith datapoint indices : list List of datapoint indices to construct the subset """ def __init__(self, dataset, indices): self.dataset = dataset self.indices = indices def __getitem__(self, item): """Get the datapoint indexed by item Returns ------- tuple datapoint """ return self.dataset[self.indices[item]] def __len__(self): """Get subset size Returns ------- int Number of datapoints in the subset """ return len(self.indices) def add_nodepred_split(dataset, ratio, ntype=None): """Split the given dataset into training, validation and test sets for transductive node predction task. It adds three node mask arrays ``'train_mask'``, ``'val_mask'`` and ``'test_mask'``, to each graph in the dataset. Each sample in the dataset thus must be a :class:`DGLGraph`. Fix the random seed of NumPy to make the result deterministic:: numpy.random.seed(42) Parameters ---------- dataset : DGLDataset The dataset to modify. ratio : (float, float, float) Split ratios for training, validation and test sets. Must sum to one. ntype : str, optional The node type to add mask for. Examples -------- >>> dataset = dgl.data.AmazonCoBuyComputerDataset() >>> print('train_mask' in dataset[0].ndata) False >>> dgl.data.utils.add_nodepred_split(dataset, [0.8, 0.1, 0.1]) >>> print('train_mask' in dataset[0].ndata) True """ if len(ratio) != 3: raise ValueError( f"Split ratio must be a float triplet but got {ratio}." ) for i in range(len(dataset)): g = dataset[i] n = g.num_nodes(ntype) idx = np.arange(0, n) np.random.shuffle(idx) n_train, n_val, n_test = ( int(n * ratio[0]), int(n * ratio[1]), int(n * ratio[2]), ) train_mask = generate_mask_tensor(idx2mask(idx[:n_train], n)) val_mask = generate_mask_tensor( idx2mask(idx[n_train : n_train + n_val], n) ) test_mask = generate_mask_tensor(idx2mask(idx[n_train + n_val :], n)) g.nodes[ntype].data["train_mask"] = train_mask g.nodes[ntype].data["val_mask"] = val_mask g.nodes[ntype].data["test_mask"] = test_mask def mask_nodes_by_property(property_values, part_ratios, random_seed=None): """Provide the split masks for a node split with distributional shift based on a given node property, as proposed in `Evaluating Robustness and Uncertainty of Graph Models Under Structural Distributional Shifts `__ It considers the in-distribution (ID) and out-of-distribution (OOD) subsets of nodes. The ID subset includes training, validation and testing parts, while the OOD subset includes validation and testing parts. It sorts the nodes in the ascending order of their property values, splits them into 5 non-intersecting parts, and creates 5 associated node mask arrays: - 3 for the ID nodes: ``'in_train_mask'``, ``'in_valid_mask'``, ``'in_test_mask'``, - and 2 for the OOD nodes: ``'out_valid_mask'``, ``'out_test_mask'``. Parameters ---------- property_values : numpy ndarray The node property (float) values by which the dataset will be split. The length of the array must be equal to the number of nodes in graph. part_ratios : list A list of 5 ratios for training, ID validation, ID test, OOD validation, OOD testing parts. The values in the list must sum to one. random_seed : int, optional Random seed to fix for the initial permutation of nodes. It is used to create a random order for the nodes that have the same property values or belong to the ID subset. (default: None) Returns ---------- split_masks : dict A python dict storing the mask names as keys and the corresponding node mask arrays as values. Examples -------- >>> num_nodes = 1000 >>> property_values = np.random.uniform(size=num_nodes) >>> part_ratios = [0.3, 0.1, 0.1, 0.3, 0.2] >>> split_masks = dgl.data.utils.mask_nodes_by_property(property_values, part_ratios) >>> print('in_valid_mask' in split_masks) True """ num_nodes = len(property_values) part_sizes = np.round(num_nodes * np.array(part_ratios)).astype(int) part_sizes[-1] -= np.sum(part_sizes) - num_nodes generator = np.random.RandomState(random_seed) permutation = generator.permutation(num_nodes) node_indices = np.arange(num_nodes)[permutation] property_values = property_values[permutation] in_distribution_size = np.sum(part_sizes[:3]) node_indices_ordered = node_indices[np.argsort(property_values)] node_indices_ordered[:in_distribution_size] = generator.permutation( node_indices_ordered[:in_distribution_size] ) sections = np.cumsum(part_sizes) node_split = np.split(node_indices_ordered, sections)[:-1] mask_names = [ "in_train_mask", "in_valid_mask", "in_test_mask", "out_valid_mask", "out_test_mask", ] split_masks = {} for mask_name, node_indices in zip(mask_names, node_split): split_mask = idx2mask(node_indices, num_nodes) split_masks[mask_name] = generate_mask_tensor(split_mask) return split_masks def add_node_property_split( dataset, part_ratios, property_name, ascending=True, random_seed=None ): """Create a node split with distributional shift based on a given node property, as proposed in `Evaluating Robustness and Uncertainty of Graph Models Under Structural Distributional Shifts `__ It splits the nodes of each graph in the given dataset into 5 non-intersecting parts based on their structural properties. This can be used for transductive node prediction task with distributional shifts. It considers the in-distribution (ID) and out-of-distribution (OOD) subsets of nodes. The ID subset includes training, validation and testing parts, while the OOD subset includes validation and testing parts. As a result, it creates 5 associated node mask arrays for each graph: - 3 for the ID nodes: ``'in_train_mask'``, ``'in_valid_mask'``, ``'in_test_mask'``, - and 2 for the OOD nodes: ``'out_valid_mask'``, ``'out_test_mask'``. This function implements 3 particular strategies for inducing distributional shifts in graph — based on **popularity**, **locality** or **density**. Parameters ---------- dataset : :class:`~DGLDataset` or list of :class:`~dgl.DGLGraph` The dataset to induce structural distributional shift. part_ratios : list A list of 5 ratio values for training, ID validation, ID test, OOD validation and OOD test parts. The values must sum to 1.0. property_name : str The name of the node property to be used, which must be ``'popularity'``, ``'locality'`` or ``'density'``. ascending : bool, optional Whether to sort nodes in the ascending order of the node property, so that nodes with greater values of the property are considered to be OOD (default: True) random_seed : int, optional Random seed to fix for the initial permutation of nodes. It is used to create a random order for the nodes that have the same property values or belong to the ID subset. (default: None) Examples -------- >>> dataset = dgl.data.AmazonCoBuyComputerDataset() >>> print('in_valid_mask' in dataset[0].ndata) False >>> part_ratios = [0.3, 0.1, 0.1, 0.3, 0.2] >>> property_name = 'popularity' >>> dgl.data.utils.add_node_property_split(dataset, part_ratios, property_name) >>> print('in_valid_mask' in dataset[0].ndata) True """ assert property_name in [ "popularity", "locality", "density", ], "The name of property has to be 'popularity', 'locality', or 'density'" assert len(part_ratios) == 5, "part_ratios must contain 5 values" import networkx as nx for idx in range(len(dataset)): graph_dgl = dataset[idx] graph_nx = nx.Graph(graph_dgl.to_networkx()) compute_property_fn = _property_name_to_compute_fn[property_name] property_values = compute_property_fn(graph_nx, ascending) node_masks = mask_nodes_by_property( property_values, part_ratios, random_seed ) for mask_name, node_mask in node_masks.items(): graph_dgl.ndata[mask_name] = node_mask def _compute_popularity_property(graph_nx, ascending=True): direction = -1 if ascending else 1 property_values = direction * np.array(list(A.pagerank(graph_nx).values())) return property_values def _compute_locality_property(graph_nx, ascending=True): num_nodes = graph_nx.number_of_nodes() pagerank_values = np.array(list(A.pagerank(graph_nx).values())) personalization = dict(zip(range(num_nodes), [0.0] * num_nodes)) personalization[np.argmax(pagerank_values)] = 1.0 direction = -1 if ascending else 1 property_values = direction * np.array( list(A.pagerank(graph_nx, personalization=personalization).values()) ) return property_values def _compute_density_property(graph_nx, ascending=True): direction = -1 if ascending else 1 property_values = direction * np.array( list(A.clustering(graph_nx).values()) ) return property_values _property_name_to_compute_fn = { "popularity": _compute_popularity_property, "locality": _compute_locality_property, "density": _compute_density_property, } ================================================ FILE: python/dgl/data/wikics.py ================================================ """Wiki-CS Dataset""" import itertools import json import os import numpy as np from .. import backend as F from ..convert import graph from ..transforms import reorder_graph, to_bidirected from .dgl_dataset import DGLBuiltinDataset from .utils import _get_dgl_url, generate_mask_tensor, load_graphs, save_graphs class WikiCSDataset(DGLBuiltinDataset): r"""Wiki-CS is a Wikipedia-based dataset for node classification from `Wiki-CS: A Wikipedia-Based Benchmark for Graph Neural Networks `_ The dataset consists of nodes corresponding to Computer Science articles, with edges based on hyperlinks and 10 classes representing different branches of the field. WikiCS dataset statistics: - Nodes: 11,701 - Edges: 431,726 (note that the original dataset has 216,123 edges but DGL adds the reverse edges and removes the duplicate edges, hence with a different number) - Number of classes: 10 - Node feature size: 300 - Number of different train, validation, stopping splits: 20 - Number of test split: 1 Parameters ---------- raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: False transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- num_classes : int Number of node classes Examples -------- >>> from dgl.data import WikiCSDataset >>> dataset = WikiCSDataset() >>> dataset.num_classes 10 >>> g = dataset[0] >>> # get node feature >>> feat = g.ndata['feat'] >>> # get node labels >>> labels = g.ndata['label'] >>> # get data split >>> train_mask = g.ndata['train_mask'] >>> val_mask = g.ndata['val_mask'] >>> stopping_mask = g.ndata['stopping_mask'] >>> test_mask = g.ndata['test_mask'] >>> # The shape of train, val and stopping masks are (num_nodes, num_splits). >>> # The num_splits is the number of different train, validation, stopping splits. >>> # Due to the number of test spilt is 1, the shape of test mask is (num_nodes,). >>> print(train_mask.shape, val_mask.shape, stopping_mask.shape) (11701, 20) (11701, 20) (11701, 20) >>> print(test_mask.shape) (11701,) """ def __init__( self, raw_dir=None, force_reload=False, verbose=False, transform=None ): _url = _get_dgl_url("dataset/wiki_cs.zip") super(WikiCSDataset, self).__init__( name="wiki_cs", raw_dir=raw_dir, url=_url, force_reload=force_reload, verbose=verbose, transform=transform, ) def process(self): """process raw data to graph, labels and masks""" with open(os.path.join(self.raw_path, "data.json")) as f: data = json.load(f) features = F.tensor(np.array(data["features"]), dtype=F.float32) labels = F.tensor(np.array(data["labels"]), dtype=F.int64) train_masks = np.array(data["train_masks"], dtype=bool).T val_masks = np.array(data["val_masks"], dtype=bool).T stopping_masks = np.array(data["stopping_masks"], dtype=bool).T test_mask = np.array(data["test_mask"], dtype=bool) edges = [[(i, j) for j in js] for i, js in enumerate(data["links"])] edges = np.array(list(itertools.chain(*edges))) src, dst = edges[:, 0], edges[:, 1] g = graph((src, dst)) g = to_bidirected(g) g.ndata["feat"] = features g.ndata["label"] = labels g.ndata["train_mask"] = generate_mask_tensor(train_masks) g.ndata["val_mask"] = generate_mask_tensor(val_masks) g.ndata["stopping_mask"] = generate_mask_tensor(stopping_masks) g.ndata["test_mask"] = generate_mask_tensor(test_mask) g = reorder_graph( g, node_permute_algo="rcmk", edge_permute_algo="dst", store_ids=False, ) self._graph = g def has_cache(self): graph_path = os.path.join(self.save_path, "dgl_graph.bin") return os.path.exists(graph_path) def save(self): graph_path = os.path.join(self.save_path, "dgl_graph.bin") save_graphs(graph_path, self._graph) def load(self): graph_path = os.path.join(self.save_path, "dgl_graph.bin") g, _ = load_graphs(graph_path) self._graph = g[0] @property def num_classes(self): return 10 def __len__(self): r"""The number of graphs in the dataset.""" return 1 def __getitem__(self, idx): r"""Get graph object Parameters ---------- idx : int Item index, WikiCSDataset has only one graph object Returns ------- :class:`dgl.DGLGraph` The graph contains: - ``ndata['feat']``: node features - ``ndata['label']``: node labels - ``ndata['train_mask']``: train mask is for retrieving the nodes for training. - ``ndata['val_mask']``: val mask is for retrieving the nodes for hyperparameter tuning. - ``ndata['stopping_mask']``: stopping mask is for retrieving the nodes for early stopping criterion. - ``ndata['test_mask']``: test mask is for retrieving the nodes for testing. """ assert idx == 0, "This dataset has only one graph" if self._transform is None: return self._graph else: return self._transform(self._graph) ================================================ FILE: python/dgl/data/yelp.py ================================================ """Yelp Dataset""" import json import os import numpy as np import scipy.sparse as sp from .. import backend as F from ..convert import from_scipy from ..transforms import reorder_graph from .dgl_dataset import DGLBuiltinDataset from .utils import _get_dgl_url, generate_mask_tensor, load_graphs, save_graphs class YelpDataset(DGLBuiltinDataset): r"""Yelp dataset for node classification from `GraphSAINT: Graph Sampling Based Inductive Learning Method `_ The task of this dataset is categorizing types of businesses based on customer reviewers and friendship. Yelp dataset statistics: - Nodes: 716,847 - Edges: 13,954,819 - Number of classes: 100 (Multi-class) - Node feature size: 300 Parameters ---------- raw_dir : str Raw file directory to download/contains the input data directory. Default: ~/.dgl/ force_reload : bool Whether to reload the dataset. Default: False verbose : bool Whether to print out progress information. Default: False transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. reorder : bool Whether to reorder the graph using :func:`~dgl.reorder_graph`. Default: False. Attributes ---------- num_classes : int Number of node classes Examples -------- >>> dataset = YelpDataset() >>> dataset.num_classes 100 >>> g = dataset[0] >>> # get node feature >>> feat = g.ndata['feat'] >>> # get node labels >>> labels = g.ndata['label'] >>> # get data split >>> train_mask = g.ndata['train_mask'] >>> val_mask = g.ndata['val_mask'] >>> test_mask = g.ndata['test_mask'] """ def __init__( self, raw_dir=None, force_reload=False, verbose=False, transform=None, reorder=False, ): _url = _get_dgl_url("dataset/yelp.zip") self._reorder = reorder super(YelpDataset, self).__init__( name="yelp", raw_dir=raw_dir, url=_url, force_reload=force_reload, verbose=verbose, transform=transform, ) def process(self): """process raw data to graph, labels and masks""" coo_adj = sp.load_npz(os.path.join(self.raw_path, "adj_full.npz")) g = from_scipy(coo_adj) features = np.load(os.path.join(self.raw_path, "feats.npy")) features = F.tensor(features, dtype=F.float32) y = [-1] * features.shape[0] with open(os.path.join(self.raw_path, "class_map.json")) as f: class_map = json.load(f) for key, item in class_map.items(): y[int(key)] = item labels = F.tensor(np.array(y), dtype=F.int64) with open(os.path.join(self.raw_path, "role.json")) as f: role = json.load(f) train_mask = np.zeros(features.shape[0], dtype=bool) train_mask[role["tr"]] = True val_mask = np.zeros(features.shape[0], dtype=bool) val_mask[role["va"]] = True test_mask = np.zeros(features.shape[0], dtype=bool) test_mask[role["te"]] = True g.ndata["feat"] = features g.ndata["label"] = labels g.ndata["train_mask"] = generate_mask_tensor(train_mask) g.ndata["val_mask"] = generate_mask_tensor(val_mask) g.ndata["test_mask"] = generate_mask_tensor(test_mask) if self._reorder: self._graph = reorder_graph( g, node_permute_algo="rcmk", edge_permute_algo="dst", store_ids=False, ) else: self._graph = g def has_cache(self): graph_path = os.path.join(self.save_path, "dgl_graph.bin") return os.path.exists(graph_path) def save(self): graph_path = os.path.join(self.save_path, "dgl_graph.bin") save_graphs(graph_path, self._graph) def load(self): graph_path = os.path.join(self.save_path, "dgl_graph.bin") g, _ = load_graphs(graph_path) self._graph = g[0] @property def num_classes(self): return 100 def __len__(self): r"""The number of graphs in the dataset.""" return 1 def __getitem__(self, idx): r"""Get graph object Parameters ---------- idx : int Item index, FlickrDataset has only one graph object Returns ------- :class:`dgl.DGLGraph` The graph contains: - ``ndata['label']``: node label - ``ndata['feat']``: node feature - ``ndata['train_mask']``: mask for training node set - ``ndata['val_mask']``: mask for validation node set - ``ndata['test_mask']``: mask for test node set """ assert idx == 0, "This dataset has only one graph" if self._transform is None: return self._graph else: return self._transform(self._graph) ================================================ FILE: python/dgl/data/zinc.py ================================================ import os from .dgl_dataset import DGLBuiltinDataset from .utils import _get_dgl_url, load_graphs class ZINCDataset(DGLBuiltinDataset): r"""ZINC dataset for the graph regression task. A subset (12K) of ZINC molecular graphs (250K) dataset is used to regress a molecular property known as the constrained solubility. For each molecular graph, the node features are the types of heavy atoms, between which the edge features are the types of bonds. Each graph contains 9-37 nodes and 16-84 edges. Reference ``_ Statistics: Train examples: 10,000 Valid examples: 1,000 Test examples: 1,000 Average number of nodes: 23.16 Average number of edges: 39.83 Number of atom types: 28 Number of bond types: 4 Parameters ---------- mode : str, optional Should be chosen from ["train", "valid", "test"] Default: "train". raw_dir : str Raw file directory to download/contains the input data directory. Default: "~/.dgl/". force_reload : bool Whether to reload the dataset. Default: False. verbose : bool Whether to print out progress information. Default: False. transform : callable, optional A transform that takes in a :class:`~dgl.DGLGraph` object and returns a transformed version. The :class:`~dgl.DGLGraph` object will be transformed before every access. Attributes ---------- num_atom_types : int Number of atom types. num_bond_types : int Number of bond types. Examples --------- >>> from dgl.data import ZINCDataset >>> training_set = ZINCDataset(mode="train") >>> training_set.num_atom_types 28 >>> len(training_set) 10000 >>> graph, label = training_set[0] >>> graph Graph(num_nodes=29, num_edges=64, ndata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)} edata_schemes={'feat': Scheme(shape=(), dtype=torch.int64)}) """ def __init__( self, mode="train", raw_dir=None, force_reload=False, verbose=False, transform=None, ): self._url = _get_dgl_url("dataset/ZINC12k.zip") self.mode = mode super(ZINCDataset, self).__init__( name="zinc", url=self._url, raw_dir=raw_dir, force_reload=force_reload, verbose=verbose, transform=transform, ) def process(self): self.load() @property def graph_path(self): return os.path.join(self.save_path, "ZincDGL_{}.bin".format(self.mode)) def has_cache(self): return os.path.exists(self.graph_path) def load(self): self._graphs, self._labels = load_graphs(self.graph_path) @property def num_atom_types(self): return 28 @property def num_bond_types(self): return 4 def __len__(self): return len(self._graphs) def __getitem__(self, idx): r"""Get one example by index. Parameters ---------- idx : int The sample index. Returns ------- dgl.DGLGraph Each graph contains: - ``ndata['feat']``: Types of heavy atoms as node features - ``edata['feat']``: Types of bonds as edge features Tensor Constrained solubility as graph label """ labels = self._labels["g_label"] if self._transform is None: return self._graphs[idx], labels[idx] else: return self._transform(self._graphs[idx]), labels[idx] ================================================ FILE: python/dgl/dataloading/__init__.py ================================================ """Package for dataloaders and samplers.""" from .. import backend as F from . import negative_sampler from .base import * from .cluster_gcn import * from .graphsaint import * from .labor_sampler import * from .neighbor_sampler import * from .shadow import * if F.get_preferred_backend() == "pytorch": from .spot_target import * from .dataloader import * ================================================ FILE: python/dgl/dataloading/base.py ================================================ """Base classes and functionalities for dataloaders""" import inspect from collections.abc import Mapping from .. import backend as F from ..base import EID, NID from ..convert import heterograph from ..frame import LazyFeature from ..transforms import compact_graphs from ..utils import context_of, recursive_apply def _set_lazy_features(x, xdata, feature_names): if feature_names is None: return if not isinstance(feature_names, Mapping): xdata.update({k: LazyFeature(k) for k in feature_names}) else: for type_, names in feature_names.items(): x[type_].data.update({k: LazyFeature(k) for k in names}) def set_node_lazy_features(g, feature_names): """Assign lazy features to the ``ndata`` of the input graph for prefetching optimization. When used in a :class:`~dgl.dataloading.Sampler`, lazy features mark which data should be fetched before computation in model. See :ref:`guide-minibatch-prefetching` for a detailed explanation. If the graph is homogeneous, this is equivalent to: .. code:: python g.ndata.update({k: LazyFeature(k, g.ndata[dgl.NID]) for k in feature_names}) If the graph is heterogeneous, this is equivalent to: .. code:: python for type_, names in feature_names.items(): g.nodes[type_].data.update( {k: LazyFeature(k, g.nodes[type_].data[dgl.NID]) for k in names}) Parameters ---------- g : DGLGraph The graph. feature_names : list[str] or dict[str, list[str]] The feature names to prefetch. See also -------- dgl.LazyFeature """ return _set_lazy_features(g.nodes, g.ndata, feature_names) def set_edge_lazy_features(g, feature_names): """Assign lazy features to the ``edata`` of the input graph for prefetching optimization. When used in a :class:`~dgl.dataloading.Sampler`, lazy features mark which data should be fetched before computation in model. See :ref:`guide-minibatch-prefetching` for a detailed explanation. If the graph is homogeneous, this is equivalent to: .. code:: python g.edata.update({k: LazyFeature(k, g.edata[dgl.EID]) for k in feature_names}) If the graph is heterogeneous, this is equivalent to: .. code:: python for type_, names in feature_names.items(): g.edges[type_].data.update( {k: LazyFeature(k, g.edges[type_].data[dgl.EID]) for k in names}) Parameters ---------- g : DGLGraph The graph. feature_names : list[str] or dict[etype, list[str]] The feature names to prefetch. The ``etype`` key is either a string or a triplet. See also -------- dgl.LazyFeature """ return _set_lazy_features(g.edges, g.edata, feature_names) def set_src_lazy_features(g, feature_names): """Assign lazy features to the ``srcdata`` of the input graph for prefetching optimization. When used in a :class:`~dgl.dataloading.Sampler`, lazy features mark which data should be fetched before computation in model. See :ref:`guide-minibatch-prefetching` for a detailed explanation. If the graph is homogeneous, this is equivalent to: .. code:: python g.srcdata.update({k: LazyFeature(k, g.srcdata[dgl.NID]) for k in feature_names}) If the graph is heterogeneous, this is equivalent to: .. code:: python for type_, names in feature_names.items(): g.srcnodes[type_].data.update( {k: LazyFeature(k, g.srcnodes[type_].data[dgl.NID]) for k in names}) Parameters ---------- g : DGLGraph The graph. feature_names : list[str] or dict[str, list[str]] The feature names to prefetch. See also -------- dgl.LazyFeature """ return _set_lazy_features(g.srcnodes, g.srcdata, feature_names) def set_dst_lazy_features(g, feature_names): """Assign lazy features to the ``dstdata`` of the input graph for prefetching optimization. When used in a :class:`~dgl.dataloading.Sampler`, lazy features mark which data should be fetched before computation in model. See :ref:`guide-minibatch-prefetching` for a detailed explanation. If the graph is homogeneous, this is equivalent to: .. code:: python g.dstdata.update({k: LazyFeature(k, g.dstdata[dgl.NID]) for k in feature_names}) If the graph is heterogeneous, this is equivalent to: .. code:: python for type_, names in feature_names.items(): g.dstnodes[type_].data.update( {k: LazyFeature(k, g.dstnodes[type_].data[dgl.NID]) for k in names}) Parameters ---------- g : DGLGraph The graph. feature_names : list[str] or dict[str, list[str]] The feature names to prefetch. See also -------- dgl.LazyFeature """ return _set_lazy_features(g.dstnodes, g.dstdata, feature_names) class Sampler(object): """Base class for graph samplers. All graph samplers must subclass this class and override the ``sample`` method. .. code:: python from dgl.dataloading import Sampler class SubgraphSampler(Sampler): def __init__(self): super().__init__() def sample(self, g, indices): return g.subgraph(indices) """ def sample(self, g, indices): """Abstract sample method. Parameters ---------- g : DGLGraph The graph. indices : object Any object representing the indices selected in the current minibatch. """ raise NotImplementedError class BlockSampler(Sampler): """Base class for sampling mini-batches in the form of Message-passing Flow Graphs (MFGs). It provides prefetching options to fetch the node features for the first MFG's ``srcdata``, the node labels for the last MFG's ``dstdata`` and the edge features of all MFG's ``edata``. Parameters ---------- prefetch_node_feats : list[str] or dict[str, list[str]], optional The node data to prefetch for the first MFG. DGL will populate the first layer's MFG's ``srcnodes`` and ``srcdata`` with the node data of the given names from the original graph. prefetch_labels : list[str] or dict[str, list[str]], optional The node data to prefetch for the last MFG. DGL will populate the last layer's MFG's ``dstnodes`` and ``dstdata`` with the node data of the given names from the original graph. prefetch_edge_feats : list[str] or dict[etype, list[str]], optional The edge data names to prefetch for all the MFGs. DGL will populate every MFG's ``edges`` and ``edata`` with the edge data of the given names from the original graph. output_device : device, optional The device of the output subgraphs or MFGs. Default is the same as the minibatch of seed nodes. """ def __init__( self, prefetch_node_feats=None, prefetch_labels=None, prefetch_edge_feats=None, output_device=None, ): super().__init__() self.prefetch_node_feats = prefetch_node_feats or [] self.prefetch_labels = prefetch_labels or [] self.prefetch_edge_feats = prefetch_edge_feats or [] self.output_device = output_device def sample_blocks(self, g, seed_nodes, exclude_eids=None): """Generates a list of blocks from the given seed nodes. This function must return a triplet where the first element is the input node IDs for the first GNN layer (a tensor or a dict of tensors for heterogeneous graphs), the second element is the output node IDs for the last GNN layer, and the third element is the said list of blocks. """ raise NotImplementedError def assign_lazy_features(self, result): """Assign lazy features for prefetching.""" input_nodes, output_nodes, blocks = result set_src_lazy_features(blocks[0], self.prefetch_node_feats) set_dst_lazy_features(blocks[-1], self.prefetch_labels) for block in blocks: set_edge_lazy_features(block, self.prefetch_edge_feats) return input_nodes, output_nodes, blocks def sample( self, g, seed_nodes, exclude_eids=None ): # pylint: disable=arguments-differ """Sample a list of blocks from the given seed nodes.""" result = self.sample_blocks(g, seed_nodes, exclude_eids=exclude_eids) return self.assign_lazy_features(result) def _find_exclude_eids_with_reverse_id(g, eids, reverse_eid_map): if isinstance(eids, Mapping): eids = {g.to_canonical_etype(k): v for k, v in eids.items()} exclude_eids = { k: F.cat([v, F.gather_row(reverse_eid_map[k], v)], 0) for k, v in eids.items() } else: exclude_eids = F.cat([eids, F.gather_row(reverse_eid_map, eids)], 0) return exclude_eids def _find_exclude_eids_with_reverse_types(g, eids, reverse_etype_map): exclude_eids = {g.to_canonical_etype(k): v for k, v in eids.items()} reverse_etype_map = { g.to_canonical_etype(k): g.to_canonical_etype(v) for k, v in reverse_etype_map.items() } for k, v in reverse_etype_map.items(): if k in exclude_eids: if v in exclude_eids: exclude_eids[v] = F.unique( F.cat((exclude_eids[k], exclude_eids[v]), dim=0) ) else: exclude_eids[v] = exclude_eids[k] return exclude_eids def _find_exclude_eids(g, exclude_mode, eids, **kwargs): if exclude_mode is None: return None elif callable(exclude_mode): return exclude_mode(eids) elif F.is_tensor(exclude_mode) or ( isinstance(exclude_mode, Mapping) and all(F.is_tensor(v) for v in exclude_mode.values()) ): return exclude_mode elif exclude_mode == "self": return eids elif exclude_mode == "reverse_id": return _find_exclude_eids_with_reverse_id( g, eids, kwargs["reverse_eid_map"] ) elif exclude_mode == "reverse_types": return _find_exclude_eids_with_reverse_types( g, eids, kwargs["reverse_etype_map"] ) else: raise ValueError("unsupported mode {}".format(exclude_mode)) def find_exclude_eids( g, seed_edges, exclude, reverse_eids=None, reverse_etypes=None, output_device=None, ): """Find all edge IDs to exclude according to :attr:`exclude_mode`. Parameters ---------- g : DGLGraph The graph. exclude : Can be either of the following, None (default) Does not exclude any edge. 'self' Exclude the given edges themselves but nothing else. 'reverse_id' Exclude all edges specified in ``eids``, as well as their reverse edges of the same edge type. The mapping from each edge ID to its reverse edge ID is specified in the keyword argument ``reverse_eid_map``. This mode assumes that the reverse of an edge with ID ``e`` and type ``etype`` will have ID ``reverse_eid_map[e]`` and type ``etype``. 'reverse_types' Exclude all edges specified in ``eids``, as well as their reverse edges of the corresponding edge types. The mapping from each edge type to its reverse edge type is specified in the keyword argument ``reverse_etype_map``. This mode assumes that the reverse of an edge with ID ``e`` and type ``etype`` will have ID ``e`` and type ``reverse_etype_map[etype]``. callable Any function that takes in a single argument :attr:`seed_edges` and returns a tensor or dict of tensors. eids : Tensor or dict[etype, Tensor] The edge IDs. reverse_eids : Tensor or dict[etype, Tensor] The mapping from edge ID to its reverse edge ID. reverse_etypes : dict[etype, etype] The mapping from edge etype to its reverse edge type. output_device : device The device of the output edge IDs. """ exclude_eids = _find_exclude_eids( g, exclude, seed_edges, reverse_eid_map=reverse_eids, reverse_etype_map=reverse_etypes, ) if exclude_eids is not None and output_device is not None: exclude_eids = recursive_apply( exclude_eids, lambda x: F.copy_to(x, output_device) ) return exclude_eids class EdgePredictionSampler(Sampler): """Sampler class that wraps an existing sampler for node classification into another one for edge classification or link prediction. See also -------- as_edge_prediction_sampler """ def __init__( self, sampler, exclude=None, reverse_eids=None, reverse_etypes=None, negative_sampler=None, prefetch_labels=None, ): super().__init__() # Check if the sampler's sample method has an optional third argument. argspec = inspect.getfullargspec(sampler.sample) if len(argspec.args) < 4: # ['self', 'g', 'indices', 'exclude_eids'] raise TypeError( "This sampler does not support edge or link prediction; please add an" "optional third argument for edge IDs to exclude in its sample() method." ) self.reverse_eids = reverse_eids self.reverse_etypes = reverse_etypes self.exclude = exclude self.sampler = sampler self.negative_sampler = negative_sampler self.prefetch_labels = prefetch_labels or [] self.output_device = sampler.output_device def _build_neg_graph(self, g, seed_edges): neg_srcdst = self.negative_sampler(g, seed_edges) if not isinstance(neg_srcdst, Mapping): assert len(g.canonical_etypes) == 1, ( "graph has multiple or no edge types; " "please return a dict in negative sampler." ) neg_srcdst = {g.canonical_etypes[0]: neg_srcdst} dtype = F.dtype(list(neg_srcdst.values())[0][0]) ctx = context_of(seed_edges) if seed_edges is not None else g.device neg_edges = { etype: neg_srcdst.get( etype, ( F.copy_to(F.tensor([], dtype), ctx=ctx), F.copy_to(F.tensor([], dtype), ctx=ctx), ), ) for etype in g.canonical_etypes } neg_pair_graph = heterograph( neg_edges, {ntype: g.num_nodes(ntype) for ntype in g.ntypes} ) return neg_pair_graph def assign_lazy_features(self, result): """Assign lazy features for prefetching.""" pair_graph = result[1] set_edge_lazy_features(pair_graph, self.prefetch_labels) # In-place updates return result def sample(self, g, seed_edges): # pylint: disable=arguments-differ """Samples a list of blocks, as well as a subgraph containing the sampled edges from the original graph. If :attr:`negative_sampler` is given, also returns another graph containing the negative pairs as edges. """ if isinstance(seed_edges, Mapping): seed_edges = { g.to_canonical_etype(k): v for k, v in seed_edges.items() } exclude = self.exclude pair_graph = g.edge_subgraph( seed_edges, relabel_nodes=False, output_device=self.output_device ) eids = pair_graph.edata[EID] if self.negative_sampler is not None: neg_graph = self._build_neg_graph(g, seed_edges) pair_graph, neg_graph = compact_graphs([pair_graph, neg_graph]) else: pair_graph = compact_graphs(pair_graph) pair_graph.edata[EID] = eids seed_nodes = pair_graph.ndata[NID] exclude_eids = find_exclude_eids( g, seed_edges, exclude, self.reverse_eids, self.reverse_etypes, self.output_device, ) input_nodes, _, blocks = self.sampler.sample( g, seed_nodes, exclude_eids ) if self.negative_sampler is None: return self.assign_lazy_features((input_nodes, pair_graph, blocks)) else: return self.assign_lazy_features( (input_nodes, pair_graph, neg_graph, blocks) ) def as_edge_prediction_sampler( sampler, exclude=None, reverse_eids=None, reverse_etypes=None, negative_sampler=None, prefetch_labels=None, ): """Create an edge-wise sampler from a node-wise sampler. For each batch of edges, the sampler applies the provided node-wise sampler to their source and destination nodes to extract subgraphs. It also generates negative edges if a negative sampler is provided, and extract subgraphs for their incident nodes as well. For each iteration, the sampler will yield * A tensor of input nodes necessary for computing the representation on edges, or a dictionary of node type names and such tensors. * A subgraph that contains only the edges in the minibatch and their incident nodes. Note that the graph has an identical metagraph with the original graph. * If a negative sampler is given, another graph that contains the "negative edges", connecting the source and destination nodes yielded from the given negative sampler. * The subgraphs or MFGs returned by the provided node-wise sampler, generated from the incident nodes of the edges in the minibatch (as well as those of the negative edges if applicable). Parameters ---------- sampler : Sampler The node-wise sampler object. It additionally requires that the :attr:`sample` method must have an optional third argument :attr:`exclude_eids` representing the edge IDs to exclude from neighborhood. The argument will be either a tensor for homogeneous graphs or a dict of edge types and tensors for heterogeneous graphs. exclude : Union[str, callable], optional Whether and how to exclude dependencies related to the sampled edges in the minibatch. Possible values are * None, for not excluding any edges. * ``self``, for excluding the edges in the current minibatch. * ``reverse_id``, for excluding not only the edges in the current minibatch but also their reverse edges according to the ID mapping in the argument :attr:`reverse_eids`. * ``reverse_types``, for excluding not only the edges in the current minibatch but also their reverse edges stored in another type according to the argument :attr:`reverse_etypes`. * User-defined exclusion rule. It is a callable with edges in the current minibatch as a single argument and should return the edges to be excluded. reverse_eids : Tensor or dict[etype, Tensor], optional A tensor of reverse edge ID mapping. The i-th element indicates the ID of the i-th edge's reverse edge. If the graph is heterogeneous, this argument requires a dictionary of edge types and the reverse edge ID mapping tensors. reverse_etypes : dict[etype, etype], optional The mapping from the original edge types to their reverse edge types. negative_sampler : callable, optional The negative sampler. prefetch_labels : list[str] or dict[etype, list[str]], optional The edge labels to prefetch for the returned positive pair graph. See :ref:`guide-minibatch-prefetching` for a detailed explanation of prefetching. Examples -------- The following example shows how to train a 3-layer GNN for edge classification on a set of edges ``train_eid`` on a homogeneous undirected graph. Each node takes messages from all neighbors. Given an array of source node IDs ``src`` and another array of destination node IDs ``dst``, the following code creates a bidirectional graph: >>> g = dgl.graph((torch.cat([src, dst]), torch.cat([dst, src]))) Edge :math:`i`'s reverse edge in the graph above is edge :math:`i + |E|`. Therefore, we can create a reverse edge mapping ``reverse_eids`` by: >>> E = len(src) >>> reverse_eids = torch.cat([torch.arange(E, 2 * E), torch.arange(0, E)]) By passing ``reverse_eids`` to the edge sampler, the edges in the current mini-batch and their reversed edges will be excluded from the extracted subgraphs to avoid information leakage. >>> sampler = dgl.dataloading.as_edge_prediction_sampler( ... dgl.dataloading.NeighborSampler([15, 10, 5]), ... exclude='reverse_id', reverse_eids=reverse_eids) >>> dataloader = dgl.dataloading.DataLoader( ... g, train_eid, sampler, ... batch_size=1024, shuffle=True, drop_last=False, num_workers=4) >>> for input_nodes, pair_graph, blocks in dataloader: ... train_on(input_nodes, pair_graph, blocks) For link prediction, one can provide a negative sampler to sample negative edges. The code below uses DGL's :class:`~dgl.dataloading.negative_sampler.Uniform` to generate 5 negative samples per edge: >>> neg_sampler = dgl.dataloading.negative_sampler.Uniform(5) >>> sampler = dgl.dataloading.as_edge_prediction_sampler( ... dgl.dataloading.NeighborSampler([15, 10, 5]), ... sampler, exclude='reverse_id', reverse_eids=reverse_eids, ... negative_sampler=neg_sampler) >>> dataloader = dgl.dataloading.DataLoader( ... g, train_eid, sampler, ... batch_size=1024, shuffle=True, drop_last=False, num_workers=4) >>> for input_nodes, pos_pair_graph, neg_pair_graph, blocks in dataloader: ... train_on(input_nodes, pair_graph, neg_pair_graph, blocks) For heterogeneous graphs, reverse edges may belong to a different relation. For example, the relations "user-click-item" and "item-click-by-user" in the graph below are mutual reverse. >>> g = dgl.heterograph({ ... ('user', 'click', 'item'): (user, item), ... ('item', 'clicked-by', 'user'): (item, user)}) To correctly exclude edges from each mini-batch, set ``exclude='reverse_types'`` and pass a dictionary ``{'click': 'clicked-by', 'clicked-by': 'click'}`` to the ``reverse_etypes`` argument. >>> sampler = dgl.dataloading.as_edge_prediction_sampler( ... dgl.dataloading.NeighborSampler([15, 10, 5]), ... exclude='reverse_types', ... reverse_etypes={'click': 'clicked-by', 'clicked-by': 'click'}) >>> dataloader = dgl.dataloading.DataLoader( ... g, {'click': train_eid}, sampler, ... batch_size=1024, shuffle=True, drop_last=False, num_workers=4) >>> for input_nodes, pair_graph, blocks in dataloader: ... train_on(input_nodes, pair_graph, blocks) For link prediction, provide a negative sampler to generate negative samples: >>> neg_sampler = dgl.dataloading.negative_sampler.Uniform(5) >>> sampler = dgl.dataloading.as_edge_prediction_sampler( ... dgl.dataloading.NeighborSampler([15, 10, 5]), ... exclude='reverse_types', ... reverse_etypes={'click': 'clicked-by', 'clicked-by': 'click'}, ... negative_sampler=neg_sampler) >>> dataloader = dgl.dataloading.DataLoader( ... g, train_eid, sampler, ... batch_size=1024, shuffle=True, drop_last=False, num_workers=4) >>> for input_nodes, pos_pair_graph, neg_pair_graph, blocks in dataloader: ... train_on(input_nodes, pair_graph, neg_pair_graph, blocks) """ return EdgePredictionSampler( sampler, exclude=exclude, reverse_eids=reverse_eids, reverse_etypes=reverse_etypes, negative_sampler=negative_sampler, prefetch_labels=prefetch_labels, ) ================================================ FILE: python/dgl/dataloading/capped_neighbor_sampler.py ================================================ """Capped neighbor sampler.""" from collections import defaultdict import numpy as np import torch from ..sampling.utils import EidExcluder from .base import Sampler, set_edge_lazy_features, set_node_lazy_features class CappedNeighborSampler(Sampler): """Subgraph sampler that sets an upper bound on the number of nodes included in each layer of the sampled subgraph. At each layer, the frontier is randomly subsampled. Rare node types can also be upsampled by taking the scaled square root of the sampling probabilities. The sampler returns the subgraph induced by all the sampled nodes. This code was contributed by a community member ([@ayushnoori](https://github.com/ayushnoori)). There aren't currently any unit tests in place to verify its functionality, so please be cautious if you need to make any changes to the code's logic. Parameters ---------- fanouts : list[int] or dict[etype, int] List of neighbors to sample per edge type for each GNN layer, with the i-th element being the fanout for the i-th GNN layer. - If only a single integer is provided, DGL assumes that every edge type will have the same fanout. - If -1 is provided for one edge type on one layer, then all inbound edges of that edge type will be included. fixed_k : int The number of nodes to sample for each GNN layer. upsample_rare_types : bool Whether or not to upsample rare node types. replace : bool, default True Whether to sample with replacement. prob : str, optional If given, the probability of each neighbor being sampled is proportional to the edge feature value with the given name in ``g.edata``. The feature must be a scalar on each edge. """ def __init__( self, fanouts, fixed_k, upsample_rare_types, replace=False, prob=None, prefetch_node_feats=None, prefetch_edge_feats=None, output_device=None, ): super().__init__() self.fanouts = fanouts self.replace = replace self.fixed_k = fixed_k self.upsample_rare_types = upsample_rare_types self.prob = prob self.prefetch_node_feats = prefetch_node_feats self.prefetch_edge_feats = prefetch_edge_feats self.output_device = output_device def sample( self, g, indices, exclude_eids=None ): # pylint: disable=arguments-differ """Sampling function. Parameters ---------- g : DGLGraph The graph to sample from. indices : Tensor or dict[str, Tensor] Nodes which induce the subgraph. exclude_eids : Tensor or dict[etype, Tensor], optional The edges to exclude from the sampled subgraph. Returns ------- input_nodes : Tensor or dict[str, Tensor] The node IDs inducing the subgraph. output_nodes : Tensor or dict[str, Tensor] The node IDs that are sampled in this minibatch. subg : DGLGraph The subgraph itself. """ # Define empty dictionary to store reached nodes. output_nodes = indices all_reached_nodes = [indices] # Iterate over fanout. for fanout in reversed(self.fanouts): # Sample frontier. frontier = g.sample_neighbors( indices, fanout, output_device=self.output_device, replace=self.replace, prob=self.prob, exclude_edges=exclude_eids, ) # Get reached nodes. curr_reached = defaultdict(list) for c_etype in frontier.canonical_etypes: (src_type, _, _) = c_etype src, _ = frontier.edges(etype=c_etype) curr_reached[src_type].append(src) # De-duplication. curr_reached = { ntype: torch.unique(torch.cat(srcs)) for ntype, srcs in curr_reached.items() } # Generate type sampling probabilties. type_count = { node_type: indices.shape[0] for node_type, indices in curr_reached.items() } total_count = sum(type_count.values()) probs = { node_type: count / total_count for node_type, count in type_count.items() } # Upsample rare node types. if self.upsample_rare_types: # Take scaled square root of probabilities. prob_dist = list(probs.values()) prob_dist = np.sqrt(prob_dist) prob_dist = prob_dist / prob_dist.sum() # Update probabilities. probs = { node_type: prob_dist[i] for i, node_type in enumerate(probs.keys()) } # Generate node counts per type. n_per_type = { node_type: int(self.fixed_k * prob) for node_type, prob in probs.items() } remainder = self.fixed_k - sum(n_per_type.values()) for _ in range(remainder): node_type = np.random.choice( list(probs.keys()), p=list(probs.values()) ) n_per_type[node_type] += 1 # Downsample nodes. curr_reached_k = {} for node_type, node_ids in curr_reached.items(): # Get number of total nodes and number to sample. num_nodes = node_ids.shape[0] n_to_sample = min(num_nodes, n_per_type[node_type]) # Downsample nodes of current type. random_indices = torch.randperm(num_nodes)[:n_to_sample] curr_reached_k[node_type] = node_ids[random_indices] # Update seed nodes. indices = curr_reached_k all_reached_nodes.append(curr_reached_k) # Merge all reached nodes before sending to `DGLGraph.subgraph`. merged_nodes = {} for ntype in g.ntypes: merged_nodes[ntype] = torch.unique( torch.cat( [reached.get(ntype, []) for reached in all_reached_nodes] ) ) subg = g.subgraph( merged_nodes, relabel_nodes=True, output_device=self.output_device ) if exclude_eids is not None: subg = EidExcluder(exclude_eids)(subg) set_node_lazy_features(subg, self.prefetch_node_feats) set_edge_lazy_features(subg, self.prefetch_edge_feats) return indices, output_nodes, subg ================================================ FILE: python/dgl/dataloading/cluster_gcn.py ================================================ """Cluster-GCN samplers.""" import os import pickle import numpy as np from .. import backend as F from ..base import DGLError from ..partition import metis_partition_assignment from .base import Sampler, set_edge_lazy_features, set_node_lazy_features class ClusterGCNSampler(Sampler): """Cluster sampler from `Cluster-GCN: An Efficient Algorithm for Training Deep and Large Graph Convolutional Networks `__ This sampler first partitions the graph with METIS partitioning, then it caches the nodes of each partition to a file within the given cache directory. The sampler then selects the graph partitions according to the provided partition IDs, take the union of all nodes in those partitions, and return an induced subgraph in its :attr:`sample` method. Parameters ---------- g : DGLGraph The original graph. Must be homogeneous and on CPU. k : int The number of partitions. cache_path : str The path to the cache directory for storing the partition result. balance_ntypes, balkance_edges, mode : Passed to :func:`dgl.metis_partition_assignment`. prefetch_ndata : list[str], optional The node data to prefetch for the subgraph. See :ref:`guide-minibatch-prefetching` for a detailed explanation of prefetching. prefetch_edata : list[str], optional The edge data to prefetch for the subgraph. See :ref:`guide-minibatch-prefetching` for a detailed explanation of prefetching. output_device : device, optional The device of the output subgraphs or MFGs. Default is the same as the minibatch of partition indices. Examples -------- **Node classification** With this sampler, the data loader will accept the list of partition IDs as indices to iterate over. For instance, the following code first splits the graph into 1000 partitions using METIS, and at each iteration it gets a subgraph induced by the nodes covered by 20 randomly selected partitions. >>> num_parts = 1000 >>> sampler = dgl.dataloading.ClusterGCNSampler(g, num_parts) >>> dataloader = dgl.dataloading.DataLoader( ... g, torch.arange(num_parts), sampler, ... batch_size=20, shuffle=True, drop_last=False, num_workers=4) >>> for subg in dataloader: ... train_on(subg) """ def __init__( self, g, k, cache_path="cluster_gcn.pkl", balance_ntypes=None, balance_edges=False, mode="k-way", prefetch_ndata=None, prefetch_edata=None, output_device=None, ): super().__init__() if os.path.exists(cache_path): try: with open(cache_path, "rb") as f: ( self.partition_offset, self.partition_node_ids, ) = pickle.load(f) except (EOFError, TypeError, ValueError): raise DGLError( f"The contents in the cache file {cache_path} is invalid. " f"Please remove the cache file {cache_path} or specify another path." ) if len(self.partition_offset) != k + 1: raise DGLError( f"Number of partitions in the cache does not match the value of k. " f"Please remove the cache file {cache_path} or specify another path." ) if len(self.partition_node_ids) != g.num_nodes(): raise DGLError( f"Number of nodes in the cache does not match the given graph. " f"Please remove the cache file {cache_path} or specify another path." ) else: partition_ids = metis_partition_assignment( g, k, balance_ntypes=balance_ntypes, balance_edges=balance_edges, mode=mode, ) partition_ids = F.asnumpy(partition_ids) partition_node_ids = np.argsort(partition_ids) partition_size = F.zerocopy_from_numpy( np.bincount(partition_ids, minlength=k) ) partition_offset = F.zerocopy_from_numpy( np.insert(np.cumsum(partition_size), 0, 0) ) partition_node_ids = F.zerocopy_from_numpy(partition_node_ids) with open(cache_path, "wb") as f: pickle.dump((partition_offset, partition_node_ids), f) self.partition_offset = partition_offset self.partition_node_ids = partition_node_ids self.prefetch_ndata = prefetch_ndata or [] self.prefetch_edata = prefetch_edata or [] self.output_device = output_device def sample(self, g, partition_ids): # pylint: disable=arguments-differ """Sampling function. Parameters ---------- g : DGLGraph The graph to sample from. partition_ids : Tensor A 1-D integer tensor of partition IDs. Returns ------- DGLGraph The sampled subgraph. """ node_ids = F.cat( [ self.partition_node_ids[ self.partition_offset[i] : self.partition_offset[i + 1] ] for i in F.asnumpy(partition_ids) ], 0, ) sg = g.subgraph( node_ids, relabel_nodes=True, output_device=self.output_device ) set_node_lazy_features(sg, self.prefetch_ndata) set_edge_lazy_features(sg, self.prefetch_edata) return sg ================================================ FILE: python/dgl/dataloading/dataloader.py ================================================ """DGL PyTorch DataLoaders""" import atexit import inspect import itertools import math import operator import os import re import threading from collections.abc import Mapping, Sequence from contextlib import contextmanager from functools import reduce from queue import Empty, Full, Queue import numpy as np import psutil import torch import torch.distributed as dist from torch.utils.data.distributed import DistributedSampler from .. import backend as F from .._ffi.base import is_tensor_adaptor_enabled from ..base import dgl_warning, DGLError, EID, NID from ..batch import batch as batch_graphs from ..cuda import GPUCache from ..frame import LazyFeature from ..heterograph import DGLGraph from ..storages import wrap_storage from ..utils import ( dtype_of, ExceptionWrapper, get_num_threads, get_numa_nodes_cores, recursive_apply, recursive_apply_pair, set_num_threads, ) PYTHON_EXIT_STATUS = False def _set_python_exit_flag(): global PYTHON_EXIT_STATUS PYTHON_EXIT_STATUS = True atexit.register(_set_python_exit_flag) prefetcher_timeout = int(os.environ.get("DGL_PREFETCHER_TIMEOUT", "30")) class _TensorizedDatasetIter(object): def __init__(self, dataset, batch_size, drop_last, mapping_keys, shuffle): self.dataset = dataset self.batch_size = batch_size self.drop_last = drop_last self.mapping_keys = mapping_keys self.index = 0 self.shuffle = shuffle # For PyTorch Lightning compatibility def __iter__(self): return self def _next_indices(self): num_items = self.dataset.shape[0] if self.index >= num_items: raise StopIteration end_idx = self.index + self.batch_size if end_idx > num_items: if self.drop_last: raise StopIteration end_idx = num_items batch = self.dataset[self.index : end_idx] self.index += self.batch_size return batch def __next__(self): batch = self._next_indices() if self.mapping_keys is None: # clone() fixes #3755, probably. Not sure why. Need to take a look afterwards. return batch.clone() # convert the type-ID pairs to dictionary type_ids = batch[:, 0] indices = batch[:, 1] _, type_ids_sortidx = torch.sort(type_ids, stable=True) type_ids = type_ids[type_ids_sortidx] indices = indices[type_ids_sortidx] type_id_uniq, type_id_count = torch.unique_consecutive( type_ids, return_counts=True ) type_id_uniq = type_id_uniq.tolist() type_id_offset = type_id_count.cumsum(0).tolist() type_id_offset.insert(0, 0) id_dict = { self.mapping_keys[type_id_uniq[i]]: indices[ type_id_offset[i] : type_id_offset[i + 1] ].clone() for i in range(len(type_id_uniq)) } return id_dict def _get_id_tensor_from_mapping(indices, device, keys): dtype = dtype_of(indices) id_tensor = torch.empty( sum(v.shape[0] for v in indices.values()), 2, dtype=dtype, device=device ) offset = 0 for i, k in enumerate(keys): if k not in indices: continue index = indices[k] length = index.shape[0] id_tensor[offset : offset + length, 0] = i id_tensor[offset : offset + length, 1] = index offset += length return id_tensor def _split_to_local_id_tensor_from_mapping( indices, keys, local_lower_bound, local_upper_bound ): dtype = dtype_of(indices) device = next(iter(indices.values())).device num_samples = local_upper_bound - local_lower_bound id_tensor = torch.empty(num_samples, 2, dtype=dtype, device=device) index_offset = 0 split_id_offset = 0 for i, k in enumerate(keys): if k not in indices: continue index = indices[k] length = index.shape[0] index_offset2 = index_offset + length lower = max(local_lower_bound, index_offset) upper = min(local_upper_bound, index_offset2) if upper > lower: split_id_offset2 = split_id_offset + (upper - lower) assert split_id_offset2 <= num_samples id_tensor[split_id_offset:split_id_offset2, 0] = i id_tensor[split_id_offset:split_id_offset2, 1] = index[ lower - index_offset : upper - index_offset ] split_id_offset += upper - lower if split_id_offset2 == num_samples: break index_offset = index_offset2 return id_tensor def _split_to_local_id_tensor(indices, local_lower_bound, local_upper_bound): dtype = dtype_of(indices) device = indices.device num_samples = local_upper_bound - local_lower_bound id_tensor = torch.empty(num_samples, dtype=dtype, device=device) if local_upper_bound > len(indices): remainder = len(indices) - local_lower_bound id_tensor[0:remainder] = indices[local_lower_bound:] else: id_tensor = indices[local_lower_bound:local_upper_bound] return id_tensor def _divide_by_worker(dataset, batch_size, drop_last): num_samples = dataset.shape[0] worker_info = torch.utils.data.get_worker_info() if worker_info: num_batches = ( num_samples + (0 if drop_last else batch_size - 1) ) // batch_size num_batches_per_worker = num_batches // worker_info.num_workers left_over = num_batches % worker_info.num_workers start = (num_batches_per_worker * worker_info.id) + min( left_over, worker_info.id ) end = start + num_batches_per_worker + (worker_info.id < left_over) start *= batch_size end = min(end * batch_size, num_samples) dataset = dataset[start:end] return dataset class TensorizedDataset(torch.utils.data.IterableDataset): """Custom Dataset wrapper that returns a minibatch as tensors or dicts of tensors. When the dataset is on the GPU, this significantly reduces the overhead. """ def __init__( self, indices, batch_size, drop_last, shuffle, use_shared_memory ): if isinstance(indices, Mapping): self._mapping_keys = list(indices.keys()) self._device = next(iter(indices.values())).device self._id_tensor = _get_id_tensor_from_mapping( indices, self._device, self._mapping_keys ) else: self._id_tensor = indices self._device = indices.device self._mapping_keys = None # Use a shared memory array to permute indices for shuffling. This is to make sure that # the worker processes can see it when persistent_workers=True, where self._indices # would not be duplicated every epoch. self._indices = torch.arange( self._id_tensor.shape[0], dtype=torch.int64 ) if use_shared_memory: self._indices.share_memory_() self.batch_size = batch_size self.drop_last = drop_last self._shuffle = shuffle def shuffle(self): """Shuffle the dataset.""" np.random.shuffle(self._indices.numpy()) def __iter__(self): indices = _divide_by_worker( self._indices, self.batch_size, self.drop_last ) id_tensor = self._id_tensor[indices] return _TensorizedDatasetIter( id_tensor, self.batch_size, self.drop_last, self._mapping_keys, self._shuffle, ) def __len__(self): num_samples = self._id_tensor.shape[0] return ( num_samples + (0 if self.drop_last else (self.batch_size - 1)) ) // self.batch_size def _decompose_one_dimension(length, world_size, rank, drop_last): if drop_last: num_samples = math.floor(length / world_size) else: num_samples = math.ceil(length / world_size) sta = rank * num_samples end = (rank + 1) * num_samples return sta, end class DDPTensorizedDataset(torch.utils.data.IterableDataset): """Custom Dataset wrapper that returns a minibatch as tensors or dicts of tensors. When the dataset is on the GPU, this significantly reduces the overhead. This class additionally saves the index tensor in shared memory and therefore avoids duplicating the same index tensor during shuffling. """ def __init__(self, indices, batch_size, drop_last, ddp_seed, shuffle): if isinstance(indices, Mapping): self._mapping_keys = list(indices.keys()) len_indices = sum(len(v) for v in indices.values()) else: self._mapping_keys = None len_indices = len(indices) self.rank = dist.get_rank() self.num_replicas = dist.get_world_size() self.seed = ddp_seed self.epoch = 0 self.batch_size = batch_size self.drop_last = drop_last self._shuffle = shuffle ( self.local_lower_bound, self.local_upper_bound, ) = _decompose_one_dimension( len_indices, self.num_replicas, self.rank, drop_last ) self.num_samples = self.local_upper_bound - self.local_lower_bound self.local_num_indices = self.num_samples if self.local_upper_bound > len_indices: assert not drop_last self.local_num_indices = len_indices - self.local_lower_bound if isinstance(indices, Mapping): self._id_tensor = _split_to_local_id_tensor_from_mapping( indices, self._mapping_keys, self.local_lower_bound, self.local_upper_bound, ) else: self._id_tensor = _split_to_local_id_tensor( indices, self.local_lower_bound, self.local_upper_bound ) self._device = self._id_tensor.device # padding self._indices when drop_last = False (self._indices always on cpu) self._indices = torch.empty(self.num_samples, dtype=torch.int64) torch.arange( self.local_num_indices, out=self._indices[: self.local_num_indices] ) if not drop_last: torch.arange( self.num_samples - self.local_num_indices, out=self._indices[self.local_num_indices :], ) assert len(self._id_tensor) == self.num_samples def shuffle(self): """Shuffles the dataset.""" np.random.shuffle(self._indices[: self.local_num_indices].numpy()) if not self.drop_last: # pad extra from local indices self._indices[self.local_num_indices :] = self._indices[ : self.num_samples - self.local_num_indices ] def __iter__(self): indices = _divide_by_worker( self._indices, self.batch_size, self.drop_last ) id_tensor = self._id_tensor[indices] return _TensorizedDatasetIter( id_tensor, self.batch_size, self.drop_last, self._mapping_keys, self._shuffle, ) def __len__(self): return ( self.num_samples + (0 if self.drop_last else (self.batch_size - 1)) ) // self.batch_size def _numel_of_shape(shape): return reduce(operator.mul, shape, 1) def _init_gpu_caches(graph, gpu_caches): if not hasattr(graph, "_gpu_caches"): graph._gpu_caches = {"node": {}, "edge": {}} if gpu_caches is None: return assert isinstance(gpu_caches, dict), "GPU cache argument should be a dict" for i, frames in enumerate([graph._node_frames, graph._edge_frames]): node_or_edge = ["node", "edge"][i] cache_inf = gpu_caches.get(node_or_edge, {}) for tid, frame in enumerate(frames): type_ = [graph.ntypes, graph.canonical_etypes][i][tid] for key in frame.keys(): if key in cache_inf and cache_inf[key] > 0: column = frame._columns[key] if (key, type_) not in graph._gpu_caches[node_or_edge]: cache = GPUCache( cache_inf[key], _numel_of_shape(column.shape), graph.idtype, ) graph._gpu_caches[node_or_edge][key, type_] = ( cache, column.shape, ) def _prefetch_update_feats( feats, frames, types, get_storage_func, id_name, device, pin_prefetcher, gpu_caches, ): for tid, frame in enumerate(frames): type_ = types[tid] default_id = frame.get(id_name, None) for key in frame.keys(): column = frame._columns[key] if isinstance(column, LazyFeature): parent_key = column.name or key if column.id_ is None and default_id is None: raise DGLError( "Found a LazyFeature with no ID specified, " "and the graph does not have dgl.NID or dgl.EID columns" ) ids = column.id_ or default_id if (parent_key, type_) in gpu_caches: cache, item_shape = gpu_caches[parent_key, type_] values, missing_index, missing_keys = cache.query(ids) missing_values = get_storage_func(parent_key, type_).fetch( missing_keys, device, pin_prefetcher ) cache.replace( missing_keys, F.astype(missing_values, F.float32) ) values = F.astype(values, F.dtype(missing_values)) F.scatter_row_inplace(values, missing_index, missing_values) # Reshape the flattened result to match the original shape. F.reshape(values, (values.shape[0],) + item_shape) values.__cache_miss__ = missing_keys.shape[0] / ids.shape[0] feats[tid, key] = values else: feats[tid, key] = get_storage_func(parent_key, type_).fetch( ids, device, pin_prefetcher ) # This class exists to avoid recursion into the feature dictionary returned by the # prefetcher when calling recursive_apply(). class _PrefetchedGraphFeatures(object): __slots__ = ["node_feats", "edge_feats"] def __init__(self, node_feats, edge_feats): self.node_feats = node_feats self.edge_feats = edge_feats def _prefetch_for_subgraph(subg, dataloader): node_feats, edge_feats = {}, {} _prefetch_update_feats( node_feats, subg._node_frames, subg.ntypes, dataloader.graph.get_node_storage, NID, dataloader.device, dataloader.pin_prefetcher, dataloader.graph._gpu_caches["node"], ) _prefetch_update_feats( edge_feats, subg._edge_frames, subg.canonical_etypes, dataloader.graph.get_edge_storage, EID, dataloader.device, dataloader.pin_prefetcher, dataloader.graph._gpu_caches["edge"], ) return _PrefetchedGraphFeatures(node_feats, edge_feats) def _prefetch_for(item, dataloader): if isinstance(item, DGLGraph): return _prefetch_for_subgraph(item, dataloader) elif isinstance(item, LazyFeature): return dataloader.other_storages[item.name].fetch( item.id_, dataloader.device, dataloader.pin_prefetcher ) else: return None def _await_or_return(x): if hasattr(x, "wait"): return x.wait() elif isinstance(x, _PrefetchedGraphFeatures): node_feats = recursive_apply(x.node_feats, _await_or_return) edge_feats = recursive_apply(x.edge_feats, _await_or_return) return _PrefetchedGraphFeatures(node_feats, edge_feats) else: return x def _record_stream(x, stream): if stream is None: return x if hasattr(x, "record_stream"): x.record_stream(stream) return x elif isinstance(x, _PrefetchedGraphFeatures): node_feats = recursive_apply(x.node_feats, _record_stream, stream) edge_feats = recursive_apply(x.edge_feats, _record_stream, stream) return _PrefetchedGraphFeatures(node_feats, edge_feats) else: return x def _prefetch(batch, dataloader, stream): # feats has the same nested structure of batch, except that # (1) each subgraph is replaced with a pair of node features and edge features, both # being dictionaries whose keys are (type_id, column_name) and values are either # tensors or futures. # (2) each LazyFeature object is replaced with a tensor or future. # (3) everything else are replaced with None. # # Once the futures are fetched, this function waits for them to complete by # calling its wait() method. if stream is not None: current_stream = torch.cuda.current_stream() current_stream.wait_stream(stream) else: current_stream = None with torch.cuda.stream(stream): # fetch node/edge features feats = recursive_apply(batch, _prefetch_for, dataloader) feats = recursive_apply(feats, _await_or_return) feats = recursive_apply(feats, _record_stream, current_stream) # transfer input nodes/seed nodes/subgraphs batch = recursive_apply( batch, lambda x: x.to(dataloader.device, non_blocking=True) ) batch = recursive_apply(batch, _record_stream, current_stream) stream_event = stream.record_event() if stream is not None else None return batch, feats, stream_event def _assign_for(item, feat): if isinstance(item, DGLGraph): subg = item for (tid, key), value in feat.node_feats.items(): assert isinstance(subg._node_frames[tid][key], LazyFeature) subg._node_frames[tid][key] = value for (tid, key), value in feat.edge_feats.items(): assert isinstance(subg._edge_frames[tid][key], LazyFeature) subg._edge_frames[tid][key] = value return subg elif isinstance(item, LazyFeature): return feat else: return item def _put_if_event_not_set(queue, result, event): while not event.is_set(): try: queue.put(result, timeout=1.0) break except Full: continue def _prefetcher_entry( dataloader_it, dataloader, queue, num_threads, stream, done_event ): # PyTorch will set the number of threads to 1 which slows down pin_memory() calls # in main process if a prefetching thread is created. if num_threads is not None: torch.set_num_threads(num_threads) try: while not done_event.is_set(): try: batch = next(dataloader_it) except StopIteration: break batch = recursive_apply( batch, restore_parent_storage_columns, dataloader.graph ) batch, feats, stream_event = _prefetch(batch, dataloader, stream) _put_if_event_not_set( queue, (batch, feats, stream_event, None), done_event ) _put_if_event_not_set(queue, (None, None, None, None), done_event) except: # pylint: disable=bare-except _put_if_event_not_set( queue, (None, None, None, ExceptionWrapper(where="in prefetcher")), done_event, ) # DGLGraphs have the semantics of lazy feature slicing with subgraphs. Such behavior depends # on that DGLGraph's ndata and edata are maintained by Frames. So to maintain compatibility # with older code, DGLGraphs and other graph storages are handled separately: (1) # DGLGraphs will preserve the lazy feature slicing for subgraphs. (2) Other graph storages # will not have lazy feature slicing; all feature slicing will be eager. def remove_parent_storage_columns(item, g): """Removes the storage objects in the given graphs' Frames if it is a sub-frame of the given parent graph, so that the storages are not serialized during IPC from PyTorch DataLoader workers. """ if not isinstance(item, DGLGraph) or not isinstance(g, DGLGraph): return item for subframe, frame in zip( itertools.chain(item._node_frames, item._edge_frames), itertools.chain(g._node_frames, g._edge_frames), ): for key in list(subframe.keys()): subcol = subframe._columns[key] # directly get the column object if isinstance(subcol, LazyFeature): continue col = frame._columns.get(key, None) if col is None: continue if col.storage is subcol.storage: subcol.storage = None return item def restore_parent_storage_columns(item, g): """Restores the storage objects in the given graphs' Frames if it is a sub-frame of the given parent graph (i.e. when the storage object is None). """ if not isinstance(item, DGLGraph) or not isinstance(g, DGLGraph): return item for subframe, frame in zip( itertools.chain(item._node_frames, item._edge_frames), itertools.chain(g._node_frames, g._edge_frames), ): for key in subframe.keys(): subcol = subframe._columns[key] if isinstance(subcol, LazyFeature): continue col = frame._columns.get(key, None) if col is None: continue if subcol.storage is None: subcol.storage = col.storage return item class _PrefetchingIter(object): def __init__(self, dataloader, dataloader_it, num_threads=None): self.queue = Queue(1) self.dataloader_it = dataloader_it self.dataloader = dataloader self.num_threads = num_threads self.use_thread = dataloader.use_prefetch_thread self.use_alternate_streams = dataloader.use_alternate_streams self.device = self.dataloader.device if self.use_alternate_streams and self.device.type == "cuda": self.stream = torch.cuda.Stream(device=self.device) else: self.stream = None self._shutting_down = False if self.use_thread: self._done_event = threading.Event() thread = threading.Thread( target=_prefetcher_entry, args=( dataloader_it, dataloader, self.queue, num_threads, self.stream, self._done_event, ), daemon=True, ) thread.start() self.thread = thread def __iter__(self): return self def _shutdown(self): # Sometimes when Python is exiting complicated operations like # self.queue.get_nowait() will hang. So we set it to no-op and let Python handle # the rest since the thread is daemonic. # PyTorch takes the same solution. if PYTHON_EXIT_STATUS is True or PYTHON_EXIT_STATUS is None: return if not self._shutting_down: try: self._shutting_down = True self._done_event.set() try: self.queue.get_nowait() # In case the thread is blocking on put(). except: # pylint: disable=bare-except pass self.thread.join() except: # pylint: disable=bare-except pass def __del__(self): if self.use_thread: self._shutdown() def _next_non_threaded(self): batch = next(self.dataloader_it) batch = recursive_apply( batch, restore_parent_storage_columns, self.dataloader.graph ) batch, feats, stream_event = _prefetch( batch, self.dataloader, self.stream ) return batch, feats, stream_event def _next_threaded(self): try: batch, feats, stream_event, exception = self.queue.get( timeout=prefetcher_timeout ) except Empty: raise RuntimeError( f"Prefetcher thread timed out at {prefetcher_timeout} seconds." ) if batch is None: self.thread.join() if exception is None: raise StopIteration exception.reraise() return batch, feats, stream_event def __next__(self): batch, feats, stream_event = ( self._next_non_threaded() if not self.use_thread else self._next_threaded() ) batch = recursive_apply_pair(batch, feats, _assign_for) if stream_event is not None: stream_event.wait() return batch # Make them classes to work with pickling in mp.spawn class CollateWrapper(object): """Wraps a collate function with :func:`remove_parent_storage_columns` for serializing from PyTorch DataLoader workers. """ def __init__(self, sample_func, g, use_uva, device): self.sample_func = sample_func self.g = g self.use_uva = use_uva self.device = device def __call__(self, items): graph_device = getattr(self.g, "device", None) if self.use_uva or (graph_device != torch.device("cpu")): # Only copy the indices to the given device if in UVA mode or the graph # is not on CPU. items = recursive_apply(items, lambda x: x.to(self.device)) batch = self.sample_func(self.g, items) return recursive_apply(batch, remove_parent_storage_columns, self.g) class WorkerInitWrapper(object): """Wraps the :attr:`worker_init_fn` argument of the DataLoader to set the number of DGL OMP threads to 1 for PyTorch DataLoader workers. """ def __init__(self, func): self.func = func def __call__(self, worker_id): set_num_threads(1) if self.func is not None: self.func(worker_id) def create_tensorized_dataset( indices, batch_size, drop_last, use_ddp, ddp_seed, shuffle, use_shared_memory, ): """Converts a given indices tensor to a TensorizedDataset, an IterableDataset that returns views of the original tensor, to reduce overhead from having a list of scalar tensors in default PyTorch DataLoader implementation. """ if use_ddp: # DDP always uses shared memory return DDPTensorizedDataset( indices, batch_size, drop_last, ddp_seed, shuffle ) else: return TensorizedDataset( indices, batch_size, drop_last, shuffle, use_shared_memory ) def _get_device(device): device = torch.device(device) if device.type == "cuda" and device.index is None: device = torch.device("cuda", torch.cuda.current_device()) return device class DataLoader(torch.utils.data.DataLoader): """Sampled graph data loader. Wrap a :class:`~dgl.DGLGraph` and a :class:`~dgl.dataloading.Sampler` into an iterable over mini-batches of samples. DGL's ``DataLoader`` extends PyTorch's ``DataLoader`` by handling creation and transmission of graph samples. It supports iterating over a set of nodes, edges or any kinds of indices to get samples in the form of ``DGLGraph``, message flow graphs (MFGS), or any other structures necessary to train a graph neural network. Parameters ---------- graph : DGLGraph The graph. indices : Tensor or dict[ntype, Tensor] The set of indices. It can either be a tensor of integer indices or a dictionary of types and indices. The actual meaning of the indices is defined by the :meth:`sample` method of :attr:`graph_sampler`. graph_sampler : dgl.dataloading.Sampler The subgraph sampler. device : device context, optional The device of the generated MFGs in each iteration, which should be a PyTorch device object (e.g., ``torch.device``). By default this value is None. If :attr:`use_uva` is True, MFGs and graphs will generated in torch.cuda.current_device(), otherwise generated in the same device of :attr:`g`. use_ddp : boolean, optional If True, tells the DataLoader to split the training set for each participating process appropriately using :class:`torch.utils.data.distributed.DistributedSampler`. Overrides the :attr:`sampler` argument of :class:`torch.utils.data.DataLoader`. ddp_seed : int, optional The seed for shuffling the dataset in :class:`torch.utils.data.distributed.DistributedSampler`. Only effective when :attr:`use_ddp` is True. use_uva : bool, optional Whether to use Unified Virtual Addressing (UVA) to directly sample the graph and slice the features from CPU into GPU. Setting it to True will pin the graph and feature tensors into pinned memory. If True, requires that :attr:`indices` must have the same device as the :attr:`device` argument. Default: False. use_prefetch_thread : bool, optional (Advanced option) Spawns a new Python thread to perform feature slicing asynchronously. Can make things faster at the cost of GPU memory. Default: True if the graph is on CPU and :attr:`device` is CUDA. False otherwise. use_alternate_streams : bool, optional (Advanced option) Whether to slice and transfers the features to GPU on a non-default stream. Default: True if the graph is on CPU, :attr:`device` is CUDA, and :attr:`use_uva` is False. False otherwise. pin_prefetcher : bool, optional (Advanced option) Whether to pin the feature tensors into pinned memory. Default: True if the graph is on CPU and :attr:`device` is CUDA. False otherwise. gpu_cache : dict[dict], optional Which node and edge features to cache using HugeCTR gpu_cache. Example: {"node": {"features": 500000}, "edge": {"types": 4000000}} would indicate that we want to cache 500k of the node "features" and 4M of the edge "types" in GPU caches. Is supported only on NVIDIA GPUs with compute capability 70 or above. The dictionary holds the keys of features along with the corresponding cache sizes. Please see https://github.com/NVIDIA-Merlin/HugeCTR/blob/main/gpu_cache/ReadMe.md for further reference. kwargs : dict Key-word arguments to be passed to the parent PyTorch :py:class:`torch.utils.data.DataLoader` class. Common arguments are: - ``batch_size`` (int): The number of indices in each batch. - ``drop_last`` (bool): Whether to drop the last incomplete batch. - ``shuffle`` (bool): Whether to randomly shuffle the indices at each epoch. Examples -------- To train a 3-layer GNN for node classification on a set of nodes ``train_nid`` on a homogeneous graph where each node takes messages from 15 neighbors on the first layer, 10 neighbors on the second, and 5 neighbors on the third (assume the backend is PyTorch): >>> sampler = dgl.dataloading.MultiLayerNeighborSampler([15, 10, 5]) >>> dataloader = dgl.dataloading.DataLoader( ... g, train_nid, sampler, ... batch_size=1024, shuffle=True, drop_last=False, num_workers=4) >>> for input_nodes, output_nodes, blocks in dataloader: ... train_on(input_nodes, output_nodes, blocks) **Using with Distributed Data Parallel** If you are using PyTorch's distributed training (e.g. when using :mod:`torch.nn.parallel.DistributedDataParallel`), you can train the model by turning on the `use_ddp` option: >>> sampler = dgl.dataloading.MultiLayerNeighborSampler([15, 10, 5]) >>> dataloader = dgl.dataloading.DataLoader( ... g, train_nid, sampler, use_ddp=True, ... batch_size=1024, shuffle=True, drop_last=False, num_workers=4) >>> for epoch in range(start_epoch, n_epochs): ... for input_nodes, output_nodes, blocks in dataloader: ... train_on(input_nodes, output_nodes, blocks) Notes ----- Please refer to :doc:`Minibatch Training Tutorials ` and :ref:`User Guide Section 6 ` for usage. **Tips for selecting the proper device** * If the input graph :attr:`g` is on GPU, the output device :attr:`device` must be the same GPU and :attr:`num_workers` must be zero. In this case, the sampling and subgraph construction will take place on the GPU. This is the recommended setting when using a single-GPU and the whole graph fits in GPU memory. * If the input graph :attr:`g` is on CPU while the output device :attr:`device` is GPU, then depending on the value of :attr:`use_uva`: - If :attr:`use_uva` is set to True, the sampling and subgraph construction will happen on GPU even if the GPU itself cannot hold the entire graph. This is the recommended setting unless there are operations not supporting UVA. :attr:`num_workers` must be 0 in this case. - Otherwise, both the sampling and subgraph construction will take place on the CPU. """ def __init__( self, graph, indices, graph_sampler, device=None, use_ddp=False, ddp_seed=0, batch_size=1, drop_last=False, shuffle=False, use_prefetch_thread=None, use_alternate_streams=None, pin_prefetcher=None, use_uva=False, gpu_cache=None, **kwargs, ): # (BarclayII) PyTorch Lightning sometimes will recreate a DataLoader from an existing # DataLoader with modifications to the original arguments. The arguments are retrieved # from the attributes with the same name, and because we change certain arguments # when calling super().__init__() (e.g. batch_size attribute is None even if the # batch_size argument is not, so the next DataLoader's batch_size argument will be # None), we cannot reinitialize the DataLoader with attributes from the previous # DataLoader directly. # A workaround is to check whether "collate_fn" appears in kwargs. If "collate_fn" # is indeed in kwargs and it's already a CollateWrapper object, we can assume that # the arguments come from a previously created DGL DataLoader, and directly initialize # the new DataLoader from kwargs without any changes. if isinstance(kwargs.get("collate_fn", None), CollateWrapper): assert batch_size is None # must be None # restore attributes self.graph = graph self.indices = indices self.graph_sampler = graph_sampler self.device = device self.use_ddp = use_ddp self.ddp_seed = ddp_seed self.shuffle = shuffle self.drop_last = drop_last self.use_prefetch_thread = use_prefetch_thread self.use_alternate_streams = use_alternate_streams self.pin_prefetcher = pin_prefetcher self.use_uva = use_uva kwargs["batch_size"] = None super().__init__(**kwargs) return # (BarclayII) I hoped that pin_prefetcher can be merged into PyTorch's native # pin_memory argument. But our neighbor samplers and subgraph samplers # return indices, which could be CUDA tensors (e.g. during UVA sampling) # hence cannot be pinned. PyTorch's native pin memory thread does not ignore # CUDA tensors when pinning and will crash. To enable pin memory for prefetching # features and disable pin memory for sampler's return value, I had to use # a different argument. Of course I could change the meaning of pin_memory # to pinning prefetched features and disable pin memory for sampler's returns # no matter what, but I doubt if it's reasonable. self.graph = graph self.indices = indices # For PyTorch-Lightning num_workers = kwargs.get("num_workers", 0) indices_device = None try: if isinstance(indices, Mapping): indices = { k: (torch.tensor(v) if not torch.is_tensor(v) else v) for k, v in indices.items() } indices_device = next(iter(indices.values())).device else: indices = ( torch.tensor(indices) if not torch.is_tensor(indices) else indices ) indices_device = indices.device except: # pylint: disable=bare-except # ignore when it fails to convert to torch Tensors. pass if indices_device is None: if not hasattr(indices, "device"): raise AttributeError( 'Custom indices dataset requires a "device" \ attribute indicating where the indices is.' ) indices_device = indices.device if device is None: if use_uva: device = torch.cuda.current_device() else: device = self.graph.device self.device = _get_device(device) # Sanity check - we only check for DGLGraphs. if isinstance(self.graph, DGLGraph): # Check graph and indices device as well as num_workers if use_uva: if self.graph.device.type != "cpu": raise ValueError( "Graph must be on CPU if UVA sampling is enabled." ) if num_workers > 0: raise ValueError( "num_workers must be 0 if UVA sampling is enabled." ) # Create all the formats and pin the features - custom GraphStorages # will need to do that themselves. self.graph.create_formats_() self.graph.pin_memory_() else: if self.graph.device != indices_device: raise ValueError( "Expect graph and indices to be on the same device when use_uva=False. " ) if self.graph.device.type == "cuda" and num_workers > 0: raise ValueError( "num_workers must be 0 if graph and indices are on CUDA." ) if self.graph.device.type == "cpu" and num_workers > 0: # Instantiate all the formats if the number of workers is greater than 0. self.graph.create_formats_() # Check pin_prefetcher and use_prefetch_thread - should be only effective # if performing CPU sampling but output device is CUDA if ( self.device.type == "cuda" and self.graph.device.type == "cpu" and not use_uva ): if pin_prefetcher is None: pin_prefetcher = True if use_prefetch_thread is None: use_prefetch_thread = True else: if pin_prefetcher is True: raise ValueError( "pin_prefetcher=True is only effective when device=cuda and " "sampling is performed on CPU." ) if pin_prefetcher is None: pin_prefetcher = False if use_prefetch_thread is True: raise ValueError( "use_prefetch_thread=True is only effective when device=cuda and " "sampling is performed on CPU." ) if use_prefetch_thread is None: use_prefetch_thread = False # Check use_alternate_streams if use_alternate_streams is None: use_alternate_streams = ( self.device.type == "cuda" and self.graph.device.type == "cpu" and not use_uva and is_tensor_adaptor_enabled() ) elif use_alternate_streams and not is_tensor_adaptor_enabled(): dgl_warning( "use_alternate_streams is turned off because " "TensorAdaptor is not available." ) use_alternate_streams = False if torch.is_tensor(indices) or ( isinstance(indices, Mapping) and all(torch.is_tensor(v) for v in indices.values()) ): self.dataset = create_tensorized_dataset( indices, batch_size, drop_last, use_ddp, ddp_seed, shuffle, kwargs.get("persistent_workers", False), ) else: self.dataset = indices self.ddp_seed = ddp_seed self.use_ddp = use_ddp self.use_uva = use_uva self.shuffle = shuffle self.drop_last = drop_last self.graph_sampler = graph_sampler self.use_alternate_streams = use_alternate_streams self.pin_prefetcher = pin_prefetcher self.use_prefetch_thread = use_prefetch_thread self.cpu_affinity_enabled = False worker_init_fn = WorkerInitWrapper(kwargs.pop("worker_init_fn", None)) self.other_storages = {} _init_gpu_caches(self.graph, gpu_cache) super().__init__( self.dataset, collate_fn=CollateWrapper( self.graph_sampler.sample, graph, self.use_uva, self.device ), batch_size=None, pin_memory=self.pin_prefetcher, worker_init_fn=worker_init_fn, **kwargs, ) def __iter__(self): if ( self.device.type == "cpu" and hasattr(psutil.Process, "cpu_affinity") and not self.cpu_affinity_enabled ): link = "https://docs.dgl.ai/tutorials/cpu/cpu_best_practises.html" dgl_warning( f"Dataloader CPU affinity opt is not enabled, consider switching it on " f"(see enable_cpu_affinity() or CPU best practices for DGL [{link}])" ) if self.shuffle: self.dataset.shuffle() # When using multiprocessing PyTorch sometimes set the number of PyTorch threads to 1 # when spawning new Python threads. This drastically slows down pinning features. num_threads = torch.get_num_threads() if self.num_workers > 0 else None return _PrefetchingIter( self, super().__iter__(), num_threads=num_threads ) @contextmanager def enable_cpu_affinity( self, loader_cores=None, compute_cores=None, verbose=True ): """Helper method for enabling cpu affinity for compute threads and dataloader workers Only for CPU devices Uses only NUMA node 0 by default for multi-node systems Parameters ---------- loader_cores : [int] (optional) List of cpu cores to which dataloader workers should affinitize to. default: node0_cores[0:num_workers] compute_cores : [int] (optional) List of cpu cores to which compute threads should affinitize to default: node0_cores[num_workers:] verbose : bool (optional) If True, affinity information will be printed to the console Usage ----- with dataloader.enable_cpu_affinity(): """ if self.device.type == "cpu": if not self.num_workers > 0: raise Exception( "ERROR: affinity should be used with at least one DL worker" ) if loader_cores and len(loader_cores) != self.num_workers: raise Exception( "ERROR: cpu_affinity incorrect " "number of loader_cores={} for num_workers={}".format( loader_cores, self.num_workers ) ) # False positive E0203 (access-member-before-definition) linter warning worker_init_fn_old = self.worker_init_fn # pylint: disable=E0203 affinity_old = psutil.Process().cpu_affinity() nthreads_old = get_num_threads() compute_cores = compute_cores[:] if compute_cores else [] loader_cores = loader_cores[:] if loader_cores else [] def init_fn(worker_id): try: psutil.Process().cpu_affinity([loader_cores[worker_id]]) except: raise Exception( "ERROR: cannot use affinity id={} cpu={}".format( worker_id, loader_cores ) ) worker_init_fn_old(worker_id) if not loader_cores or not compute_cores: numa_info = get_numa_nodes_cores() if numa_info and len(numa_info[0]) > self.num_workers: # take one thread per each node 0 core node0_cores = [cpus[0] for core_id, cpus in numa_info[0]] else: node0_cores = list(range(psutil.cpu_count(logical=False))) if len(node0_cores) < self.num_workers: raise Exception("ERROR: more workers than available cores") loader_cores = loader_cores or node0_cores[0 : self.num_workers] compute_cores = [ cpu for cpu in node0_cores if cpu not in loader_cores ] try: psutil.Process().cpu_affinity(compute_cores) set_num_threads(len(compute_cores)) self.worker_init_fn = init_fn self.cpu_affinity_enabled = True if verbose: print( f"{self.num_workers} DL workers are assigned to cpus " f"{loader_cores}, main process will use cpus " f"{compute_cores}" ) yield finally: # restore omp_num_threads and cpu affinity psutil.Process().cpu_affinity(affinity_old) set_num_threads(nthreads_old) self.worker_init_fn = worker_init_fn_old self.cpu_affinity_enabled = False else: yield # To allow data other than node/edge data to be prefetched. def attach_data(self, name, data): """Add a data other than node and edge features for prefetching.""" self.other_storages[name] = wrap_storage(data) ######## Graph DataLoaders ######## # GraphDataLoader loads a set of graphs so it's not relevant to the above. They are currently # copied from the old DataLoader implementation. def _create_dist_sampler(dataset, dataloader_kwargs, ddp_seed): # Note: will change the content of dataloader_kwargs dist_sampler_kwargs = {"shuffle": dataloader_kwargs.get("shuffle", False)} dataloader_kwargs["shuffle"] = False dist_sampler_kwargs["seed"] = ddp_seed dist_sampler_kwargs["drop_last"] = dataloader_kwargs.get("drop_last", False) dataloader_kwargs["drop_last"] = False return DistributedSampler(dataset, **dist_sampler_kwargs) class GraphCollator(object): """Given a set of graphs as well as their graph-level data, the collate function will batch the graphs into a batched graph, and stack the tensors into a single bigger tensor. If the example is a container (such as sequences or mapping), the collate function preserves the structure and collates each of the elements recursively. If the set of graphs has no graph-level data, the collate function will yield a batched graph. Examples -------- To train a GNN for graph classification on a set of graphs in ``dataset`` (assume the backend is PyTorch): >>> dataloader = dgl.dataloading.GraphDataLoader( ... dataset, batch_size=1024, shuffle=True, drop_last=False, num_workers=4) >>> for batched_graph, labels in dataloader: ... train_on(batched_graph, labels) """ def __init__(self): self.graph_collate_err_msg_format = ( "graph_collate: batch must contain DGLGraph, tensors, numpy arrays, " "numbers, dicts or lists; found {}" ) self.np_str_obj_array_pattern = re.compile(r"[SaUO]") # This implementation is based on torch.utils.data._utils.collate.default_collate def collate(self, items): """This function is similar to ``torch.utils.data._utils.collate.default_collate``. It combines the sampled graphs and corresponding graph-level data into a batched graph and tensors. Parameters ---------- items : list of data points or tuples Elements in the list are expected to have the same length. Each sub-element will be batched as a batched graph, or a batched tensor correspondingly. Returns ------- A tuple of the batching results. """ elem = items[0] elem_type = type(elem) if isinstance(elem, DGLGraph): batched_graphs = batch_graphs(items) return batched_graphs elif F.is_tensor(elem): return F.stack(items, 0) elif ( elem_type.__module__ == "numpy" and elem_type.__name__ != "str_" and elem_type.__name__ != "string_" ): if ( elem_type.__name__ == "ndarray" or elem_type.__name__ == "memmap" ): # array of string classes and object if ( self.np_str_obj_array_pattern.search(elem.dtype.str) is not None ): raise TypeError( self.graph_collate_err_msg_format.format(elem.dtype) ) return self.collate([F.tensor(b) for b in items]) elif elem.shape == (): # scalars return F.tensor(items) elif isinstance(elem, float): return F.tensor(items, dtype=F.float64) elif isinstance(elem, int): return F.tensor(items) elif isinstance(elem, (str, bytes)): return items elif isinstance(elem, Mapping): return {key: self.collate([d[key] for d in items]) for key in elem} elif isinstance(elem, tuple) and hasattr(elem, "_fields"): # namedtuple return elem_type( *(self.collate(samples) for samples in zip(*items)) ) elif isinstance(elem, Sequence): # check to make sure that the elements in batch have consistent size item_iter = iter(items) elem_size = len(next(item_iter)) if not all(len(elem) == elem_size for elem in item_iter): raise RuntimeError( "each element in list of batch should be of equal size" ) transposed = zip(*items) return [self.collate(samples) for samples in transposed] raise TypeError(self.graph_collate_err_msg_format.format(elem_type)) class GraphDataLoader(torch.utils.data.DataLoader): """Batched graph data loader. PyTorch dataloader for batch-iterating over a set of graphs, generating the batched graph and corresponding label tensor (if provided) of the said minibatch. Parameters ---------- dataset : torch.utils.data.Dataset The dataset to load graphs from. collate_fn : Function, default is None The customized collate function. Will use the default collate function if not given. use_ddp : boolean, optional If True, tells the DataLoader to split the training set for each participating process appropriately using :class:`torch.utils.data.distributed.DistributedSampler`. Overrides the :attr:`sampler` argument of :class:`torch.utils.data.DataLoader`. ddp_seed : int, optional The seed for shuffling the dataset in :class:`torch.utils.data.distributed.DistributedSampler`. Only effective when :attr:`use_ddp` is True. kwargs : dict Key-word arguments to be passed to the parent PyTorch :py:class:`torch.utils.data.DataLoader` class. Common arguments are: - ``batch_size`` (int): The number of indices in each batch. - ``drop_last`` (bool): Whether to drop the last incomplete batch. - ``shuffle`` (bool): Whether to randomly shuffle the indices at each epoch. Examples -------- To train a GNN for graph classification on a set of graphs in ``dataset``: >>> dataloader = dgl.dataloading.GraphDataLoader( ... dataset, batch_size=1024, shuffle=True, drop_last=False, num_workers=4) >>> for batched_graph, labels in dataloader: ... train_on(batched_graph, labels) **With Distributed Data Parallel** If you are using PyTorch's distributed training (e.g. when using :mod:`torch.nn.parallel.DistributedDataParallel`), you can train the model by turning on the :attr:`use_ddp` option: >>> dataloader = dgl.dataloading.GraphDataLoader( ... dataset, use_ddp=True, batch_size=1024, shuffle=True, drop_last=False, num_workers=4) >>> for epoch in range(start_epoch, n_epochs): ... dataloader.set_epoch(epoch) ... for batched_graph, labels in dataloader: ... train_on(batched_graph, labels) """ collator_arglist = inspect.getfullargspec(GraphCollator).args def __init__( self, dataset, collate_fn=None, use_ddp=False, ddp_seed=0, **kwargs ): collator_kwargs = {} dataloader_kwargs = {} for k, v in kwargs.items(): if k in self.collator_arglist: collator_kwargs[k] = v else: dataloader_kwargs[k] = v self.use_ddp = use_ddp if use_ddp: self.dist_sampler = _create_dist_sampler( dataset, dataloader_kwargs, ddp_seed ) dataloader_kwargs["sampler"] = self.dist_sampler if collate_fn is None and kwargs.get("batch_size", 1) is not None: collate_fn = GraphCollator(**collator_kwargs).collate super().__init__( dataset=dataset, collate_fn=collate_fn, **dataloader_kwargs ) def set_epoch(self, epoch): """Sets the epoch number for the underlying sampler which ensures all replicas to use a different ordering for each epoch. Only available when :attr:`use_ddp` is True. Calls :meth:`torch.utils.data.distributed.DistributedSampler.set_epoch`. Parameters ---------- epoch : int The epoch number. """ if self.use_ddp: self.dist_sampler.set_epoch(epoch) else: raise DGLError("set_epoch is only available when use_ddp is True.") class NodeCollator: """Deprecated. Please use :class:`~dgl.distributed.NodeCollator` instead.""" def __new__(cls, *args, **kwargs): dgl_warning( "NodeCollator is defined in dgl.distributed This class is for " "backward compatibility and will be removed soon. Please update " "your code to use `dgl.distributed.NodeCollator`." ) from ..distributed import NodeCollator as NewNodeCollator return NewNodeCollator(*args, **kwargs) class EdgeCollator: """Deprecated. Please use :class:`~dgl.distributed.EdgeCollator` instead.""" def __new__(cls, *args, **kwargs): dgl_warning( "EdgeCollator is defined in dgl.distributed This class is for " "backward compatibility and will be removed soon. Please update " "your code to use `dgl.distributed.EdgeCollator`." ) from ..distributed import EdgeCollator as NewEdgeCollator return NewEdgeCollator(*args, **kwargs) def _remove_kwargs_dist(kwargs): """Deprecated.""" if "num_workers" in kwargs: del kwargs["num_workers"] if "pin_memory" in kwargs: del kwargs["pin_memory"] print("Distributed DataLoaders do not support pin_memory.") return kwargs class DistDataLoader: """Deprecated. Please use :class:`~dgl.distributed.DistDataLoader` instead.""" def __new__(cls, *args, **kwargs): dgl_warning( "DistDataLoader is defined in dgl.distributed This class is for " "backward compatibility and will be removed soon. Please update " "your code to use `dgl.distributed.DistDataLoader`." ) from ..distributed import DistDataLoader as NewDistDataLoader return NewDistDataLoader(*args, **kwargs) class DistNodeDataLoader: """Deprecated. Please use :class:`~dgl.distributed.DistNodeDataLoader` instead. """ def __new__(cls, *args, **kwargs): dgl_warning( "dgl.dataloading.DistNodeDataLoader has been moved to " "dgl.distributed.DistNodeDataLoader. This old class is deprecated " "and will be removed soon. Please update your code to use the new " "class." ) from ..distributed import DistNodeDataLoader as NewDistNodeDataLoader return NewDistNodeDataLoader(*args, **kwargs) class DistEdgeDataLoader: """Deprecated. Please use :class:`~dgl.distributed.DistEdgeDataLoader` instead. """ def __new__(cls, *args, **kwargs): dgl_warning( "dgl.dataloading.DistEdgeDataLoader has been moved to " "dgl.distributed.DistEdgeDataLoader. This old class is deprecated " "and will be removed soon. Please update your code to use the new " "class." ) from ..distributed import DistEdgeDataLoader as NewDistEdgeDataLoader return NewDistEdgeDataLoader(*args, **kwargs) ================================================ FILE: python/dgl/dataloading/graphsaint.py ================================================ """GraphSAINT samplers.""" from ..base import DGLError from ..random import choice from ..sampling import pack_traces, random_walk from .base import Sampler, set_edge_lazy_features, set_node_lazy_features try: import torch except ImportError: pass class SAINTSampler(Sampler): """Random node/edge/walk sampler from `GraphSAINT: Graph Sampling Based Inductive Learning Method `__ For each call, the sampler samples a node subset and then returns a node induced subgraph. There are three options for sampling node subsets: - For :attr:`'node'` sampler, the probability to sample a node is in proportion to its out-degree. - The :attr:`'edge'` sampler first samples an edge subset and then use the end nodes of the edges. - The :attr:`'walk'` sampler uses the nodes visited by random walks. It uniformly selects a number of root nodes and then performs a fixed-length random walk from each root node. Parameters ---------- mode : str The sampler to use, which can be :attr:`'node'`, :attr:`'edge'`, or :attr:`'walk'`. budget : int or tuple[int] Sampler configuration. - For :attr:`'node'` sampler, budget specifies the number of nodes in each sampled subgraph. - For :attr:`'edge'` sampler, budget specifies the number of edges to sample for inducing a subgraph. - For :attr:`'walk'` sampler, budget is a tuple. budget[0] specifies the number of root nodes to generate random walks. budget[1] specifies the length of a random walk. cache : bool, optional If False, it will not cache the probability arrays for sampling. Setting it to False is required if you want to use the sampler across different graphs. prefetch_ndata : list[str], optional The node data to prefetch for the subgraph. See :ref:`guide-minibatch-prefetching` for a detailed explanation of prefetching. prefetch_edata : list[str], optional The edge data to prefetch for the subgraph. See :ref:`guide-minibatch-prefetching` for a detailed explanation of prefetching. output_device : device, optional The device of the output subgraphs. Examples -------- >>> import torch >>> from dgl.dataloading import SAINTSampler, DataLoader >>> num_iters = 1000 >>> sampler = SAINTSampler(mode='node', budget=6000) >>> # Assume g.ndata['feat'] and g.ndata['label'] hold node features and labels >>> dataloader = DataLoader(g, torch.arange(num_iters), sampler, num_workers=4) >>> for subg in dataloader: ... train_on(subg) """ def __init__( self, mode, budget, cache=True, prefetch_ndata=None, prefetch_edata=None, output_device="cpu", ): super().__init__() self.budget = budget if mode == "node": self.sampler = self.node_sampler elif mode == "edge": self.sampler = self.edge_sampler elif mode == "walk": self.sampler = self.walk_sampler else: raise DGLError( f"Expect mode to be 'node', 'edge' or 'walk', got {mode}." ) self.cache = cache self.prob = None self.prefetch_ndata = prefetch_ndata or [] self.prefetch_edata = prefetch_edata or [] self.output_device = output_device def node_sampler(self, g): """Node ID sampler for random node sampler""" # Alternatively, this can be realized by uniformly sampling an edge subset, # and then take the src node of the sampled edges. However, the number of edges # is typically much larger than the number of nodes. if self.cache and self.prob is not None: prob = self.prob else: prob = g.out_degrees().float().clamp(min=1) if self.cache: self.prob = prob return ( torch.multinomial(prob, num_samples=self.budget, replacement=True) .unique() .type(g.idtype) ) def edge_sampler(self, g): """Node ID sampler for random edge sampler""" src, dst = g.edges() if self.cache and self.prob is not None: prob = self.prob else: in_deg = g.in_degrees().float().clamp(min=1) out_deg = g.out_degrees().float().clamp(min=1) # We can reduce the sample space by half if graphs are always symmetric. prob = 1.0 / in_deg[dst.long()] + 1.0 / out_deg[src.long()] prob /= prob.sum() if self.cache: self.prob = prob sampled_edges = torch.unique( choice(len(prob), size=self.budget, prob=prob) ) sampled_nodes = torch.cat([src[sampled_edges], dst[sampled_edges]]) return sampled_nodes.unique().type(g.idtype) def walk_sampler(self, g): """Node ID sampler for random walk sampler""" num_roots, walk_length = self.budget sampled_roots = torch.randint(0, g.num_nodes(), (num_roots,)) traces, types = random_walk(g, nodes=sampled_roots, length=walk_length) sampled_nodes, _, _, _ = pack_traces(traces, types) return sampled_nodes.unique().type(g.idtype) def sample(self, g, indices): """Sampling function Parameters ---------- g : DGLGraph The graph to sample from. indices : Tensor Placeholder not used. Returns ------- DGLGraph The sampled subgraph. """ node_ids = self.sampler(g) sg = g.subgraph( node_ids, relabel_nodes=True, output_device=self.output_device ) set_node_lazy_features(sg, self.prefetch_ndata) set_edge_lazy_features(sg, self.prefetch_edata) return sg ================================================ FILE: python/dgl/dataloading/labor_sampler.py ================================================ # # Copyright (c) 2022 by Contributors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # Based off of neighbor_sampler.py # """Data loading components for labor sampling""" from numpy.random import default_rng from .. import backend as F from ..base import EID, NID from ..random import choice from ..transforms import to_block from .base import BlockSampler class LaborSampler(BlockSampler): """Sampler that builds computational dependency of node representations via labor sampling for multilayer GNN from the NeurIPS 2023 paper `Layer-Neighbor Sampling -- Defusing Neighborhood Explosion in GNNs `__ This sampler will make every node gather messages from a fixed number of neighbors per edge type. The neighbors are picked uniformly with default parameters. For every vertex t that will be considered to be sampled, there will be a single random variate r_t. Parameters ---------- fanouts : list[int] or list[dict[etype, int]] List of neighbors to sample per edge type for each GNN layer, with the i-th element being the fanout for the i-th GNN layer. If only a single integer is provided, DGL assumes that every edge type will have the same fanout. If -1 is provided for one edge type on one layer, then all inbound edges of that edge type will be included. edge_dir : str, default ``'in'`` Can be either ``'in'`` where the neighbors will be sampled according to incoming edges, or ``'out'`` otherwise, same as :func:`dgl.sampling.sample_neighbors`. prob : str, optional If given, the probability of each neighbor being sampled is proportional to the edge feature value with the given name in ``g.edata``. The feature must be a scalar on each edge. In this case, the returned blocks edata include ``'edge_weights'`` that needs to be used in the message passing operation. importance_sampling : int, default ``0`` Whether to use importance sampling or uniform sampling, use of negative values optimizes importance sampling probabilities until convergence while use of positive values runs optimization steps that many times. If the value is i, then LABOR-i variant is used. When used with a nonzero parameter, the returned blocks edata include ``'edge_weights'`` that needs to be used in the message passing operation. layer_dependency : bool, default ``False`` Specifies whether different layers should use same random variates. Results into a reduction in the number of vertices sampled, but may degrade the quality slightly. batch_dependency : int, default ``1`` Specifies whether different minibatches should use similar random variates. Results in a higher temporal access locality of sampled vertices, but may degrade the quality slightly. prefetch_node_feats : list[str] or dict[ntype, list[str]], optional The source node data to prefetch for the first MFG, corresponding to the input node features necessary for the first GNN layer. prefetch_labels : list[str] or dict[ntype, list[str]], optional The destination node data to prefetch for the last MFG, corresponding to the node labels of the minibatch. prefetch_edge_feats : list[str] or dict[etype, list[str]], optional The edge data names to prefetch for all the MFGs, corresponding to the edge features necessary for all GNN layers. output_device : device, optional The device of the output subgraphs or MFGs. Default is the same as the minibatch of seed nodes. Examples -------- **Node classification** To train a 3-layer GNN for node classification on a set of nodes ``train_nid`` on a homogeneous graph where each node takes messages from 5, 10, 15 neighbors for the first, second, and third layer respectively (assuming the backend is PyTorch): >>> sampler = dgl.dataloading.LaborSampler([5, 10, 15]) >>> dataloader = dgl.dataloading.DataLoader( ... g, train_nid, sampler, ... batch_size=1024, shuffle=True, drop_last=False, num_workers=4) >>> for input_nodes, output_nodes, blocks in dataloader: ... train_on(blocks) If training on a heterogeneous graph and you want different number of neighbors for each edge type, one should instead provide a list of dicts. Each dict would specify the number of neighbors to pick per edge type. >>> sampler = dgl.dataloading.LaborSampler([ ... {('user', 'follows', 'user'): 5, ... ('user', 'plays', 'game'): 4, ... ('game', 'played-by', 'user'): 3}] * 3) If you would like non-uniform labor sampling: >>> # any non-negative 1D vector works >>> g.edata['p'] = torch.rand(g.num_edges()) >>> sampler = dgl.dataloading.LaborSampler([5, 10, 15], prob='p') **Edge classification and link prediction** This class can also work for edge classification and link prediction together with :func:`as_edge_prediction_sampler`. >>> sampler = dgl.dataloading.LaborSampler([5, 10, 15]) >>> sampler = dgl.dataloading.as_edge_prediction_sampler(sampler) >>> dataloader = dgl.dataloading.DataLoader( ... g, train_eid, sampler, ... batch_size=1024, shuffle=True, drop_last=False, num_workers=4) See the documentation :func:`as_edge_prediction_sampler` for more details. Notes ----- For the concept of MFGs, please refer to :ref:`User Guide Section 6 ` and :doc:`Minibatch Training Tutorials `. """ def __init__( self, fanouts, edge_dir="in", prob=None, importance_sampling=0, layer_dependency=False, batch_dependency=1, prefetch_node_feats=None, prefetch_labels=None, prefetch_edge_feats=None, output_device=None, ): super().__init__( prefetch_node_feats=prefetch_node_feats, prefetch_labels=prefetch_labels, prefetch_edge_feats=prefetch_edge_feats, output_device=output_device, ) self.fanouts = fanouts self.edge_dir = edge_dir self.prob = prob self.importance_sampling = importance_sampling self.layer_dependency = layer_dependency self.cnt = F.zeros(2, F.int64, F.cpu()) self.cnt[0] = -1 self.cnt[1] = batch_dependency self.random_seed = F.zeros( 2 if self.cnt[1] > 1 else 1, F.int64, F.cpu() ) self.set_seed(None if batch_dependency > 0 else choice(1e18, 1).item()) def set_seed(self, random_seed=None): """Updates the underlying seed for the sampler Calling this function enforces the sampling algorithm to use the same seed on every edge type. This can reduce the number of nodes being sampled because the passed random_seed makes it so that for any seed vertex ``s`` and its neighbor ``t``, the rolled random variate ``r_t`` is the same for any instance of this class with the same random seed. When sampling as part of the same batch, one would want identical seeds so that LABOR can globally sample. One example is that for heterogenous graphs, there is a single random seed passed for each edge type. This will sample much fewer vertices compared to having unique random seeds for each edge type. If one called this function individually for each edge type for a heterogenous graph with different random seeds, then it would run LABOR locally for each edge type, resulting into a larger number of vertices being sampled. If this function is called without any parameters, we get the random seed by getting a random number from DGL. Call this function if multiple instances of LaborSampler are used to sample as part of a single batch. Parameters ---------- random_seed : int, default ``None`` The random seed to be used for next sampling call. """ if random_seed is None: self.cnt[0] += 1 if self.cnt[1] > 0 and self.cnt[0] % self.cnt[1] == 0: if self.cnt[0] <= 0 or self.cnt[1] <= 1: if not hasattr(self, "rng"): self.rng = default_rng(choice(1e18, 1).item()) self.random_seed[0] = self.rng.integers(1e18) if self.cnt[1] > 1: self.random_seed[1] = self.rng.integers(1e18) else: self.random_seed[0] = self.random_seed[1] self.random_seed[1] = self.rng.integers(1e18) else: self.rng = default_rng(random_seed) self.random_seed[0] = self.rng.integers(1e18) if self.cnt[1] > 1: self.random_seed[1] = self.rng.integers(1e18) self.cnt[0] = 0 def sample_blocks(self, g, seed_nodes, exclude_eids=None): output_nodes = seed_nodes blocks = [] for i, fanout in enumerate(reversed(self.fanouts)): random_seed_i = F.zerocopy_to_dgl_ndarray( self.random_seed + (i if not self.layer_dependency else 0) ) if self.cnt[1] <= 1: seed2_contr = 0 else: seed2_contr = ((self.cnt[0] % self.cnt[1]) / self.cnt[1]).item() frontier, importances = g.sample_labors( seed_nodes, fanout, edge_dir=self.edge_dir, prob=self.prob, importance_sampling=self.importance_sampling, random_seed=random_seed_i, seed2_contribution=seed2_contr, output_device=self.output_device, exclude_edges=exclude_eids, ) eid = frontier.edata[EID] block = to_block( frontier, seed_nodes, include_dst_in_src=True, src_nodes=None ) block.edata[EID] = eid if len(g.canonical_etypes) > 1: for etype, importance in zip(g.canonical_etypes, importances): if importance.shape[0] == block.num_edges(etype): block.edata["edge_weights"][etype] = importance elif importances[0].shape[0] == block.num_edges(): block.edata["edge_weights"] = importances[0] seed_nodes = block.srcdata[NID] blocks.insert(0, block) self.set_seed() return seed_nodes, output_nodes, blocks ================================================ FILE: python/dgl/dataloading/negative_sampler.py ================================================ """Negative samplers""" from collections.abc import Mapping from .. import backend as F class _BaseNegativeSampler(object): def _generate(self, g, eids, canonical_etype): raise NotImplementedError def __call__(self, g, eids): """Returns negative samples. Parameters ---------- g : DGLGraph The graph. eids : Tensor or dict[etype, Tensor] The sampled edges in the minibatch. Returns ------- tuple[Tensor, Tensor] or dict[etype, tuple[Tensor, Tensor]] The returned source-destination pairs as negative samples. """ if isinstance(eids, Mapping): eids = {g.to_canonical_etype(k): v for k, v in eids.items()} neg_pair = {k: self._generate(g, v, k) for k, v in eids.items()} else: assert ( len(g.canonical_etypes) == 1 ), "please specify a dict of etypes and ids for graphs with multiple edge types" neg_pair = self._generate(g, eids, g.canonical_etypes[0]) return neg_pair class PerSourceUniform(_BaseNegativeSampler): """Negative sampler that randomly chooses negative destination nodes for each source node according to a uniform distribution. For each edge ``(u, v)`` of type ``(srctype, etype, dsttype)``, DGL generates :attr:`k` pairs of negative edges ``(u, v')``, where ``v'`` is chosen uniformly from all the nodes of type ``dsttype``. The resulting edges will also have type ``(srctype, etype, dsttype)``. Parameters ---------- k : int The number of negative samples per edge. Examples -------- >>> g = dgl.graph(([0, 1, 2], [1, 2, 3])) >>> neg_sampler = dgl.dataloading.negative_sampler.PerSourceUniform(2) >>> neg_sampler(g, torch.tensor([0, 1])) (tensor([0, 0, 1, 1]), tensor([1, 0, 2, 3])) """ def __init__(self, k): self.k = k def _generate(self, g, eids, canonical_etype): _, _, vtype = canonical_etype shape = F.shape(eids) dtype = F.dtype(eids) ctx = F.context(eids) shape = (shape[0] * self.k,) src, _ = g.find_edges(eids, etype=canonical_etype) src = F.repeat(src, self.k, 0) dst = F.randint(shape, dtype, ctx, 0, g.num_nodes(vtype)) return src, dst # Alias Uniform = PerSourceUniform class GlobalUniform(_BaseNegativeSampler): """Negative sampler that randomly chooses negative source-destination pairs according to a uniform distribution. For each edge ``(u, v)`` of type ``(srctype, etype, dsttype)``, DGL generates at most :attr:`k` pairs of negative edges ``(u', v')``, where ``u'`` is chosen uniformly from all the nodes of type ``srctype`` and ``v'`` is chosen uniformly from all the nodes of type ``dsttype``. The resulting edges will also have type ``(srctype, etype, dsttype)``. DGL guarantees that the sampled pairs will not have edges in between. Parameters ---------- k : int The desired number of negative samples to generate per edge. exclude_self_loops : bool, optional Whether to exclude self-loops from negative samples. (Default: True) replace : bool, optional Whether to sample with replacement. Setting it to True will make things faster. (Default: False) Notes ----- This negative sampler will try to generate as many negative samples as possible, but it may rarely return less than :attr:`k` negative samples per edge. This is more likely to happen if a graph is so small or dense that not many unique negative samples exist. Examples -------- >>> g = dgl.graph(([0, 1, 2], [1, 2, 3])) >>> neg_sampler = dgl.dataloading.negative_sampler.GlobalUniform(2, True) >>> neg_sampler(g, torch.LongTensor([0, 1])) (tensor([0, 1, 3, 2]), tensor([2, 0, 2, 1])) """ def __init__(self, k, exclude_self_loops=True, replace=False): self.k = k self.exclude_self_loops = exclude_self_loops self.replace = replace def _generate(self, g, eids, canonical_etype): return g.global_uniform_negative_sampling( len(eids) * self.k, self.exclude_self_loops, self.replace, canonical_etype, ) ================================================ FILE: python/dgl/dataloading/neighbor_sampler.py ================================================ """Data loading components for neighbor sampling""" from .. import backend as F from ..base import EID, NID from ..heterograph import DGLGraph from ..transforms import to_block from ..utils import get_num_threads from .base import BlockSampler class NeighborSampler(BlockSampler): """Sampler that builds computational dependency of node representations via neighbor sampling for multilayer GNN. This sampler will make every node gather messages from a fixed number of neighbors per edge type. The neighbors are picked uniformly. Parameters ---------- fanouts : list[int] or list[dict[etype, int]] List of neighbors to sample per edge type for each GNN layer, with the i-th element being the fanout for the i-th GNN layer. If only a single integer is provided, DGL assumes that every edge type will have the same fanout. If -1 is provided for one edge type on one layer, then all inbound edges of that edge type will be included. edge_dir : str, default ``'in'`` Can be either ``'in' `` where the neighbors will be sampled according to incoming edges, or ``'out'`` otherwise, same as :func:`dgl.sampling.sample_neighbors`. prob : str, optional If given, the probability of each neighbor being sampled is proportional to the edge feature value with the given name in ``g.edata``. The feature must be a scalar on each edge. This argument is mutually exclusive with :attr:`mask`. If you want to specify both a mask and a probability, consider multiplying the probability with the mask instead. mask : str, optional If given, a neighbor could be picked only if the edge mask with the given name in ``g.edata`` is True. The data must be boolean on each edge. This argument is mutually exclusive with :attr:`prob`. If you want to specify both a mask and a probability, consider multiplying the probability with the mask instead. replace : bool, default False Whether to sample with replacement prefetch_node_feats : list[str] or dict[ntype, list[str]], optional The source node data to prefetch for the first MFG, corresponding to the input node features necessary for the first GNN layer. prefetch_labels : list[str] or dict[ntype, list[str]], optional The destination node data to prefetch for the last MFG, corresponding to the node labels of the minibatch. prefetch_edge_feats : list[str] or dict[etype, list[str]], optional The edge data names to prefetch for all the MFGs, corresponding to the edge features necessary for all GNN layers. output_device : device, optional The device of the output subgraphs or MFGs. Default is the same as the minibatch of seed nodes. fused : bool, default True If True and device is CPU fused sample neighbors is invoked. This version requires seed_nodes to be unique Examples -------- **Node classification** To train a 3-layer GNN for node classification on a set of nodes ``train_nid`` on a homogeneous graph where each node takes messages from 5, 10, 15 neighbors for the first, second, and third layer respectively (assuming the backend is PyTorch): >>> sampler = dgl.dataloading.NeighborSampler([5, 10, 15]) >>> dataloader = dgl.dataloading.DataLoader( ... g, train_nid, sampler, ... batch_size=1024, shuffle=True, drop_last=False, num_workers=4) >>> for input_nodes, output_nodes, blocks in dataloader: ... train_on(blocks) If training on a heterogeneous graph and you want different number of neighbors for each edge type, one should instead provide a list of dicts. Each dict would specify the number of neighbors to pick per edge type. >>> sampler = dgl.dataloading.NeighborSampler([ ... {('user', 'follows', 'user'): 5, ... ('user', 'plays', 'game'): 4, ... ('game', 'played-by', 'user'): 3}] * 3) If you would like non-uniform neighbor sampling: >>> g.edata['p'] = torch.rand(g.num_edges()) # any non-negative 1D vector works >>> sampler = dgl.dataloading.NeighborSampler([5, 10, 15], prob='p') Or sampling on edge masks: >>> g.edata['mask'] = torch.rand(g.num_edges()) < 0.2 # any 1D boolean mask works >>> sampler = dgl.dataloading.NeighborSampler([5, 10, 15], prob='mask') **Edge classification and link prediction** This class can also work for edge classification and link prediction together with :func:`as_edge_prediction_sampler`. >>> sampler = dgl.dataloading.NeighborSampler([5, 10, 15]) >>> sampler = dgl.dataloading.as_edge_prediction_sampler(sampler) >>> dataloader = dgl.dataloading.DataLoader( ... g, train_eid, sampler, ... batch_size=1024, shuffle=True, drop_last=False, num_workers=4) See the documentation :func:`as_edge_prediction_sampler` for more details. Notes ----- For the concept of MFGs, please refer to :ref:`User Guide Section 6 ` and :doc:`Minibatch Training Tutorials `. """ def __init__( self, fanouts, edge_dir="in", prob=None, mask=None, replace=False, prefetch_node_feats=None, prefetch_labels=None, prefetch_edge_feats=None, output_device=None, fused=True, ): super().__init__( prefetch_node_feats=prefetch_node_feats, prefetch_labels=prefetch_labels, prefetch_edge_feats=prefetch_edge_feats, output_device=output_device, ) self.fanouts = fanouts self.edge_dir = edge_dir if mask is not None and prob is not None: raise ValueError( "Mask and probability arguments are mutually exclusive. " "Consider multiplying the probability with the mask " "to achieve the same goal." ) self.prob = prob or mask self.replace = replace self.fused = fused self.mapping = {} self.g = None def sample_blocks(self, g, seed_nodes, exclude_eids=None): output_nodes = seed_nodes blocks = [] # sample_neighbors_fused function requires multithreading to be more efficient # than sample_neighbors if self.fused and get_num_threads() > 1: cpu = F.device_type(g.device) == "cpu" if isinstance(seed_nodes, dict): for ntype in list(seed_nodes.keys()): if not cpu: break cpu = ( cpu and F.device_type(seed_nodes[ntype].device) == "cpu" ) else: cpu = cpu and F.device_type(seed_nodes.device) == "cpu" if cpu and isinstance(g, DGLGraph) and F.backend_name == "pytorch": if self.g != g: self.mapping = {} self.g = g for fanout in reversed(self.fanouts): block = g.sample_neighbors_fused( seed_nodes, fanout, edge_dir=self.edge_dir, prob=self.prob, replace=self.replace, exclude_edges=exclude_eids, mapping=self.mapping, ) seed_nodes = block.srcdata[NID] blocks.insert(0, block) return seed_nodes, output_nodes, blocks for fanout in reversed(self.fanouts): frontier = g.sample_neighbors( seed_nodes, fanout, edge_dir=self.edge_dir, prob=self.prob, replace=self.replace, output_device=self.output_device, exclude_edges=exclude_eids, ) block = to_block(frontier, seed_nodes) # If sampled from graphbolt-backed DistGraph, `EID` may not be in # the block. If not exists, we should remove it from the block. if EID in frontier.edata.keys(): block.edata[EID] = frontier.edata[EID] else: del block.edata[EID] seed_nodes = block.srcdata[NID] blocks.insert(0, block) return seed_nodes, output_nodes, blocks MultiLayerNeighborSampler = NeighborSampler class MultiLayerFullNeighborSampler(NeighborSampler): """Sampler that builds computational dependency of node representations by taking messages from all neighbors for multilayer GNN. This sampler will make every node gather messages from every single neighbor per edge type. Parameters ---------- num_layers : int The number of GNN layers to sample. kwargs : Passed to :class:`dgl.dataloading.NeighborSampler`. Examples -------- To train a 3-layer GNN for node classification on a set of nodes ``train_nid`` on a homogeneous graph where each node takes messages from all neighbors for the first, second, and third layer respectively (assuming the backend is PyTorch): >>> sampler = dgl.dataloading.MultiLayerFullNeighborSampler(3) >>> dataloader = dgl.dataloading.DataLoader( ... g, train_nid, sampler, ... batch_size=1024, shuffle=True, drop_last=False, num_workers=4) >>> for input_nodes, output_nodes, blocks in dataloader: ... train_on(blocks) Notes ----- For the concept of MFGs, please refer to :ref:`User Guide Section 6 ` and :doc:`Minibatch Training Tutorials `. """ def __init__(self, num_layers, **kwargs): super().__init__([-1] * num_layers, **kwargs) ================================================ FILE: python/dgl/dataloading/shadow.py ================================================ """ShaDow-GNN subgraph samplers.""" from .. import transforms from ..base import NID from ..sampling.utils import EidExcluder from .base import Sampler, set_edge_lazy_features, set_node_lazy_features class ShaDowKHopSampler(Sampler): """K-hop subgraph sampler from `Deep Graph Neural Networks with Shallow Subgraph Samplers `__. It performs node-wise neighbor sampling and returns the subgraph induced by all the sampled nodes. The seed nodes from which the neighbors are sampled will appear the first in the induced nodes of the subgraph. Parameters ---------- fanouts : list[int] or list[dict[etype, int]] List of neighbors to sample per edge type for each GNN layer, with the i-th element being the fanout for the i-th GNN layer. If only a single integer is provided, DGL assumes that every edge type will have the same fanout. If -1 is provided for one edge type on one layer, then all inbound edges of that edge type will be included. replace : bool, default True Whether to sample with replacement prob : str, optional If given, the probability of each neighbor being sampled is proportional to the edge feature value with the given name in ``g.edata``. The feature must be a scalar on each edge. Examples -------- **Node classification** To train a 3-layer GNN for node classification on a set of nodes ``train_nid`` on a homogeneous graph where each node takes messages from 5, 10, 15 neighbors for the first, second, and third layer respectively (assuming the backend is PyTorch): >>> g = dgl.data.CoraFullDataset()[0] >>> sampler = dgl.dataloading.ShaDowKHopSampler([5, 10, 15]) >>> dataloader = dgl.dataloading.DataLoader( ... g, torch.arange(g.num_nodes()), sampler, ... batch_size=5, shuffle=True, drop_last=False, num_workers=4) >>> for input_nodes, output_nodes, subgraph in dataloader: ... print(subgraph) ... assert torch.equal(input_nodes, subgraph.ndata[dgl.NID]) ... assert torch.equal(input_nodes[:output_nodes.shape[0]], output_nodes) ... break Graph(num_nodes=529, num_edges=3796, ndata_schemes={'label': Scheme(shape=(), dtype=torch.int64), 'feat': Scheme(shape=(8710,), dtype=torch.float32), '_ID': Scheme(shape=(), dtype=torch.int64)} edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}) If training on a heterogeneous graph and you want different number of neighbors for each edge type, one should instead provide a list of dicts. Each dict would specify the number of neighbors to pick per edge type. >>> sampler = dgl.dataloading.ShaDowKHopSampler([ ... {('user', 'follows', 'user'): 5, ... ('user', 'plays', 'game'): 4, ... ('game', 'played-by', 'user'): 3}] * 3) If you would like non-uniform neighbor sampling: >>> g.edata['p'] = torch.rand(g.num_edges()) # any non-negative 1D vector works >>> sampler = dgl.dataloading.ShaDowKHopSampler([5, 10, 15], prob='p') """ def __init__( self, fanouts, replace=False, prob=None, prefetch_node_feats=None, prefetch_edge_feats=None, output_device=None, ): super().__init__() self.fanouts = fanouts self.replace = replace self.prob = prob self.prefetch_node_feats = prefetch_node_feats self.prefetch_edge_feats = prefetch_edge_feats self.output_device = output_device def sample( self, g, seed_nodes, exclude_eids=None ): # pylint: disable=arguments-differ """Sampling function. Parameters ---------- g : DGLGraph The graph to sample nodes from. seed_nodes : Tensor or dict[str, Tensor] The nodes sampled in the current minibatch. exclude_eids : Tensor or dict[etype, Tensor], optional The edges to exclude from neighborhood expansion. Returns ------- input_nodes, output_nodes, subg A triplet containing (1) the node IDs inducing the subgraph, (2) the node IDs that are sampled in this minibatch, and (3) the subgraph itself. """ output_nodes = seed_nodes for fanout in reversed(self.fanouts): frontier = g.sample_neighbors( seed_nodes, fanout, output_device=self.output_device, replace=self.replace, prob=self.prob, exclude_edges=exclude_eids, ) block = transforms.to_block(frontier, seed_nodes) seed_nodes = block.srcdata[NID] subg = g.subgraph( seed_nodes, relabel_nodes=True, output_device=self.output_device ) if exclude_eids is not None: subg = EidExcluder(exclude_eids)(subg) set_node_lazy_features(subg, self.prefetch_node_feats) set_edge_lazy_features(subg, self.prefetch_edge_feats) return seed_nodes, output_nodes, subg ================================================ FILE: python/dgl/dataloading/spot_target.py ================================================ """SpotTarget: Target edge excluder for link prediction""" import torch from .base import find_exclude_eids class SpotTarget(object): """Callable excluder object to exclude the edges by the degree threshold. Besides excluding all the edges or given edges in the edge sampler ``dgl.dataloading.as_edge_prediction_sampler`` in link prediction training, this excluder can extend the exclusion function by only excluding the edges incident to low-degree nodes in the graph to bring the performance increase in training link prediction model. This function will exclude the edge if incident to at least one node with degree larger or equal to ``degree_threshold``. The performance boost by excluding the target edges incident to low-degree nodes can be found in this paper: https://arxiv.org/abs/2306.00899 Parameters ---------- g : DGLGraph The graph. exclude : Union[str, callable] Whether and how to exclude dependencies related to the sampled edges in the minibatch. Possible values are * ``self``, for excluding the edges in the current minibatch. * ``reverse_id``, for excluding not only the edges in the current minibatch but also their reverse edges according to the ID mapping in the argument :attr:`reverse_eids`. * ``reverse_types``, for excluding not only the edges in the current minibatch but also their reverse edges stored in another type according to the argument :attr:`reverse_etypes`. * User-defined exclusion rule. It is a callable with edges in the current minibatch as a single argument and should return the edges to be excluded. degree_threshold : int The threshold of node degrees, if the source or target node of an edge incident to has larger or equal degrees than ``degree_threshold``, this edge will be excluded from the graph reverse_eids : Tensor or dict[etype, Tensor], optional A tensor of reverse edge ID mapping. The i-th element indicates the ID of the i-th edge's reverse edge. If the graph is heterogeneous, this argument requires a dictionary of edge types and the reverse edge ID mapping tensors. reverse_etypes : dict[etype, etype], optional The mapping from the original edge types to their reverse edge types. Examples -------- .. code:: python low_degree_excluder = SpotTarget(g, degree_threshold=10) sampler = as_edge_prediction_sampler(sampler, exclude=low_degree_excluder, reverse_eids=reverse_eids, negative_sampler=negative_sampler.Uniform(1)) """ def __init__( self, g, exclude, degree_threshold=10, reverse_eids=None, reverse_etypes=None, ): self.g = g self.exclude = exclude self.degree_threshold = degree_threshold self.reverse_eids = reverse_eids self.reverse_etypes = reverse_etypes def __call__(self, seed_edges): g = self.g src, dst = g.find_edges(seed_edges) head_degree = g.in_degrees(src) tail_degree = g.in_degrees(dst) degree = torch.min(head_degree, tail_degree) degree_mask = degree < self.degree_threshold edges_need_to_exclude = seed_edges[degree_mask] return find_exclude_eids( g, edges_need_to_exclude, self.exclude, self.reverse_eids, self.reverse_etypes, ) ================================================ FILE: python/dgl/distgnn/__init__.py ================================================ """ This package contains DistGNN and Libra based graph partitioning tools. """ from . import partition, tools ================================================ FILE: python/dgl/distgnn/partition/__init__.py ================================================ """ This package contains Libra graph partitioner. """ from .libra_partition import partition_graph ================================================ FILE: python/dgl/distgnn/partition/libra_partition.py ================================================ r"""Libra partition functions. Libra partition is a vertex-cut based partitioning algorithm from `Distributed Power-law Graph Computing: Theoretical and Empirical Analysis `__ from Xie et al. """ # Copyright (c) 2021 Intel Corporation # \file distgnn/partition/libra_partition.py # \brief Libra - Vertex-cut based graph partitioner for distributed training # \author Vasimuddin Md , # Guixiang Ma # Sanchit Misra , # Ramanarayan Mohanty , # Sasikanth Avancha # Nesreen K. Ahmed # \cite Distributed Power-law Graph Computing: Theoretical and Empirical Analysis import json import os import time import torch as th from dgl import DGLGraph from dgl._sparse_ops import ( libra2dgl_build_adjlist, libra2dgl_build_dict, libra2dgl_set_lr, libra_vertex_cut, ) from dgl.base import DGLError from dgl.data.utils import save_graphs, save_tensors def libra_partition(num_community, G, resultdir): """ Performs vertex-cut based graph partitioning and converts the partitioning output to DGL input format. Parameters ---------- num_community : Number of partitions to create G : Input graph to be partitioned resultdir : Output location for storing the partitioned graphs Output ------ 1. Creates X partition folder as XCommunities (say, X=2, so, 2Communities) XCommunities contains file name communityZ.txt per partition Z (Z <- 0 .. X-1); each such file contains a list of edges assigned to that partition. These files constitute the output of Libra graph partitioner (An intermediate result of this function). 2. The folder also contains partZ folders, each of these folders stores DGL/DistGNN graphs for the Z partitions; these graph files are used as input to DistGNN. 3. The folder also contains a json file which contains partitions' information. """ num_nodes = G.num_nodes() # number of nodes num_edges = G.num_edges() # number of edges print("Number of nodes in the graph: ", num_nodes) print("Number of edges in the graph: ", num_edges) in_d = G.in_degrees() out_d = G.out_degrees() node_degree = in_d + out_d edgenum_unassigned = node_degree.clone() u_t, v_t = G.edges() weight_ = th.ones(u_t.shape[0], dtype=th.int64) community_weights = th.zeros(num_community, dtype=th.int64) # self_loop = 0 # for p, q in zip(u_t, v_t): # if p == q: # self_loop += 1 # print("#self loops in the dataset: ", self_loop) # del G ## call to C/C++ code out = th.zeros(u_t.shape[0], dtype=th.int32) libra_vertex_cut( num_community, node_degree, edgenum_unassigned, community_weights, u_t, v_t, weight_, out, num_nodes, num_edges, resultdir, ) print("Max partition size: ", int(community_weights.max())) print(" ** Converting libra partitions to dgl graphs **") fsize = int(community_weights.max()) + 1024 ## max edges in partition # print("fsize: ", fsize, flush=True) node_map = th.zeros(num_community, dtype=th.int64) indices = th.zeros(num_nodes, dtype=th.int64) lrtensor = th.zeros(num_nodes, dtype=th.int64) gdt_key = th.zeros(num_nodes, dtype=th.int64) gdt_value = th.zeros([num_nodes, num_community], dtype=th.int64) offset = th.zeros(1, dtype=th.int64) ldt_ar = [] gg_ar = [DGLGraph() for i in range(num_community)] part_nodes = [] print(">>> ", "num_nodes ", " ", "num_edges") ## Iterator over number of partitions for i in range(num_community): g = gg_ar[i] a_t = th.zeros(fsize, dtype=th.int64) b_t = th.zeros(fsize, dtype=th.int64) ldt_key = th.zeros(fsize, dtype=th.int64) ldt_ar.append(ldt_key) ## building node, parition dictionary ## Assign local node ids and mapping to global node ids ret = libra2dgl_build_dict( a_t, b_t, indices, ldt_key, gdt_key, gdt_value, node_map, offset, num_community, i, fsize, resultdir, ) num_nodes_partition = int(ret[0]) num_edges_partition = int(ret[1]) part_nodes.append(num_nodes_partition) print(">>> ", num_nodes_partition, " ", num_edges_partition) g.add_edges(a_t[0:num_edges_partition], b_t[0:num_edges_partition]) ######################################################## ## fixing lr - 1-level tree for the split-nodes libra2dgl_set_lr(gdt_key, gdt_value, lrtensor, num_community, num_nodes) ######################################################## # graph_name = dataset graph_name = resultdir.split("_")[-1].split("/")[0] part_method = "Libra" num_parts = num_community ## number of paritions/communities num_hops = 0 node_map_val = node_map.tolist() edge_map_val = 0 out_path = resultdir part_metadata = { "graph_name": graph_name, "num_nodes": G.num_nodes(), "num_edges": G.num_edges(), "part_method": part_method, "num_parts": num_parts, "halo_hops": num_hops, "node_map": node_map_val, "edge_map": edge_map_val, } ############################################################ for i in range(num_community): g = gg_ar[0] num_nodes_partition = part_nodes[i] adj = th.zeros([num_nodes_partition, num_community - 1], dtype=th.int64) inner_node = th.zeros(num_nodes_partition, dtype=th.int32) lr_t = th.zeros(num_nodes_partition, dtype=th.int64) ldt = ldt_ar[0] try: feat = G.ndata["feat"] except KeyError: feat = G.ndata["features"] try: labels = G.ndata["label"] except KeyError: labels = G.ndata["labels"] trainm = G.ndata["train_mask"].int() testm = G.ndata["test_mask"].int() valm = G.ndata["val_mask"].int() feat_size = feat.shape[1] gfeat = th.zeros([num_nodes_partition, feat_size], dtype=feat.dtype) glabels = th.zeros(num_nodes_partition, dtype=labels.dtype) gtrainm = th.zeros(num_nodes_partition, dtype=trainm.dtype) gtestm = th.zeros(num_nodes_partition, dtype=testm.dtype) gvalm = th.zeros(num_nodes_partition, dtype=valm.dtype) ## build remote node databse per local node ## gather feats, train, test, val, and labels for each partition libra2dgl_build_adjlist( feat, gfeat, adj, inner_node, ldt, gdt_key, gdt_value, node_map, lr_t, lrtensor, num_nodes_partition, num_community, i, feat_size, labels, trainm, testm, valm, glabels, gtrainm, gtestm, gvalm, feat.shape[0], ) g.ndata["adj"] = adj ## database of remote clones g.ndata["inner_node"] = inner_node ## split node '0' else '1' g.ndata["feat"] = gfeat ## gathered features g.ndata["lf"] = lr_t ## 1-level tree among split nodes g.ndata["label"] = glabels g.ndata["train_mask"] = gtrainm g.ndata["test_mask"] = gtestm g.ndata["val_mask"] = gvalm # Validation code, run only small graphs # for l in range(num_nodes_partition): # index = int(ldt[l]) # assert glabels[l] == labels[index] # assert gtrainm[l] == trainm[index] # assert gtestm[l] == testm[index] # for j in range(feat_size): # assert gfeat[l][j] == feat[index][j] print("Writing partition {} to file".format(i), flush=True) part = g part_id = i part_dir = os.path.join(out_path, "part" + str(part_id)) node_feat_file = os.path.join(part_dir, "node_feat.dgl") edge_feat_file = os.path.join(part_dir, "edge_feat.dgl") part_graph_file = os.path.join(part_dir, "graph.dgl") part_metadata["part-{}".format(part_id)] = { "node_feats": node_feat_file, "edge_feats": edge_feat_file, "part_graph": part_graph_file, } os.makedirs(part_dir, mode=0o775, exist_ok=True) save_tensors(node_feat_file, part.ndata) save_graphs(part_graph_file, [part]) del g del gg_ar[0] del ldt del ldt_ar[0] with open("{}/{}.json".format(out_path, graph_name), "w") as outfile: json.dump(part_metadata, outfile, sort_keys=True, indent=4) print("Conversion libra2dgl completed !!!") def partition_graph(num_community, G, resultdir): """ Performs vertex-cut based graph partitioning and converts the partitioning output to DGL input format. Given a graph, this function will create a folder named ``XCommunities`` where ``X`` stands for the number of communities. It will contain ``X`` files named ``communityZ.txt`` for each partition Z (from 0 to X-1); each such file contains a list of edges assigned to that partition. These files constitute the output of Libra graph partitioner. The folder also contains X subfolders named ``partZ``, each of these folders stores DGL/DistGNN graphs for partition Z; these graph files are used as input to DistGNN. The folder also contains a json file which contains partitions' information. Currently we require the graph's node data to contain the following columns: * ``features`` for node features. * ``label`` for node labels. * ``train_mask`` as a boolean mask of training node set. * ``val_mask`` as a boolean mask of validation node set. * ``test_mask`` as a boolean mask of test node set. Parameters ---------- num_community : int Number of partitions to create. G : DGLGraph Input graph to be partitioned. resultdir : str Output location for storing the partitioned graphs. """ print("num partitions: ", num_community) print("output location: ", resultdir) ## create ouptut directory try: os.makedirs(resultdir, mode=0o775, exist_ok=True) except: raise DGLError("Error: Could not create directory: ", resultdir) tic = time.time() print( "####################################################################" ) print("Executing parititons: ", num_community) ltic = time.time() try: resultdir = os.path.join(resultdir, str(num_community) + "Communities") os.makedirs(resultdir, mode=0o775, exist_ok=True) except: raise DGLError("Error: Could not create sub-directory: ", resultdir) ## Libra partitioning libra_partition(num_community, G, resultdir) ltoc = time.time() print( "Time taken by {} partitions {:0.4f} sec".format( num_community, ltoc - ltic ) ) print() toc = time.time() print( "Generated ", num_community, " partitions in {:0.4f} sec".format(toc - tic), flush=True, ) print("Partitioning completed successfully !!!") ================================================ FILE: python/dgl/distgnn/tools/__init__.py ================================================ """ This package contains extra routines related to Libra graph partitioner. """ from .tools import load_proteins ================================================ FILE: python/dgl/distgnn/tools/tools.py ================================================ r""" Copyright (c) 2021 Intel Corporation \file distgnn/tools/tools.py \brief Tools for use in Libra graph partitioner. \author Vasimuddin Md """ import os import random import requests import torch as th from scipy.io import mmread import dgl from dgl.base import DGLError from dgl.data.utils import load_graphs, save_graphs, save_tensors def rep_per_node(prefix, num_community): """ Used on Libra partitioned data. This function reports number of split-copes per node (replication) of a partitioned graph Parameters ---------- prefix: Partition folder location (contains replicationlist.csv) num_community: number of partitions or communities """ ifile = os.path.join(prefix, "replicationlist.csv") fhandle = open(ifile, "r") r_dt = {} fline = fhandle.readline() ## reading first line, contains the comment. print(fline) for line in fhandle: if line[0] == "#": raise DGLError("[Bug] Read Hash char in rep_per_node func.") node = line.strip("\n") if r_dt.get(node, -100) == -100: r_dt[node] = 1 else: r_dt[node] += 1 fhandle.close() ## sanity checks for v in r_dt.values(): if v >= num_community: raise DGLError( "[Bug] Unexpected event in rep_per_node() in tools.py." ) return r_dt def download_proteins(): """ Downloads the proteins dataset """ print("Downloading dataset...") print("This might a take while..") url = "https://portal.nersc.gov/project/m1982/GNN/" file_name = "subgraph3_iso_vs_iso_30_70length_ALL.m100.propermm.mtx" url = url + file_name try: req = requests.get(url) except: raise DGLError( "Error: Failed to download Proteins dataset!! Aborting.." ) with open("proteins.mtx", "wb") as handle: handle.write(req.content) def proteins_mtx2dgl(): """ This function converts Proteins dataset from mtx to dgl format. """ print("Converting mtx2dgl..") print("This might a take while..") a_mtx = mmread("proteins.mtx") coo = a_mtx.tocoo() u = th.tensor(coo.row, dtype=th.int64) v = th.tensor(coo.col, dtype=th.int64) g = dgl.DGLGraph() g.add_edges(u, v) n = g.num_nodes() feat_size = 128 ## arbitrary number feats = th.empty([n, feat_size], dtype=th.float32) ## arbitrary numbers train_size = 1000000 test_size = 500000 val_size = 5000 nlabels = 256 train_mask = th.zeros(n, dtype=th.bool) test_mask = th.zeros(n, dtype=th.bool) val_mask = th.zeros(n, dtype=th.bool) label = th.zeros(n, dtype=th.int64) for i in range(train_size): train_mask[i] = True for i in range(test_size): test_mask[train_size + i] = True for i in range(val_size): val_mask[train_size + test_size + i] = True for i in range(n): label[i] = random.choice(range(nlabels)) g.ndata["feat"] = feats g.ndata["train_mask"] = train_mask g.ndata["test_mask"] = test_mask g.ndata["val_mask"] = val_mask g.ndata["label"] = label return g def save(g, dataset): """ This function saves input dataset to dgl format Parameters ---------- g : graph to be saved dataset : output folder name """ print("Saving dataset..") part_dir = os.path.join("./" + dataset) node_feat_file = os.path.join(part_dir, "node_feat.dgl") part_graph_file = os.path.join(part_dir, "graph.dgl") os.makedirs(part_dir, mode=0o775, exist_ok=True) save_tensors(node_feat_file, g.ndata) save_graphs(part_graph_file, [g]) print("Graph saved successfully !!") def load_proteins(dataset): """ This function downloads, converts, and load Proteins graph dataset Parameter --------- dataset: output folder name """ part_dir = dataset graph_file = os.path.join(part_dir + "/graph.dgl") if not os.path.exists("proteins.mtx"): download_proteins() if not os.path.exists(graph_file): g = proteins_mtx2dgl() save(g, dataset) ## load graph = load_graphs(graph_file)[0][0] return graph ================================================ FILE: python/dgl/distributed/__init__.py ================================================ """DGL distributed module""" from . import optim from .dist_context import exit_client, initialize from .dist_dataloader import ( DistDataLoader, DistEdgeDataLoader, DistNodeDataLoader, EdgeCollator, NodeCollator, ) from .dist_graph import DistGraph, DistGraphServer, edge_split, node_split from .dist_tensor import DistTensor from .graph_partition_book import GraphPartitionBook, PartitionPolicy from .graph_services import * from .kvstore import KVClient, KVServer from .nn import * from .partition import ( dgl_partition_to_graphbolt, gb_convert_single_dgl_partition, load_partition, load_partition_book, load_partition_feats, partition_graph, ) from .rpc import * from .rpc_client import connect_to_server from .rpc_server import start_server from .server_state import ServerState from .constants import * ================================================ FILE: python/dgl/distributed/constants.py ================================================ """Define all the constants used by DGL rpc""" # Maximum size of message queue in bytes MAX_QUEUE_SIZE = 20 * 1024 * 1024 * 1024 SERVER_EXIT = "server_exit" DEFAULT_NTYPE = "_N" DEFAULT_ETYPE = (DEFAULT_NTYPE, "_E", DEFAULT_NTYPE) DGL2GB_EID = "_dgl2gb_eid" GB_DST_ID = "_gb_dst_id" ================================================ FILE: python/dgl/distributed/dist_context.py ================================================ """Initialize the distributed services""" # pylint: disable=line-too-long import atexit import gc import multiprocessing as mp import os import queue import sys import time import traceback from enum import Enum from .. import utils from ..base import dgl_warning, DGLError from . import rpc from .constants import MAX_QUEUE_SIZE from .kvstore import close_kvstore, init_kvstore from .role import init_role from .rpc_client import connect_to_server SAMPLER_POOL = None NUM_SAMPLER_WORKERS = 0 INITIALIZED = False def set_initialized(value=True): """Set the initialized state of rpc""" global INITIALIZED INITIALIZED = value def get_sampler_pool(): """Return the sampler pool and num_workers""" return SAMPLER_POOL, NUM_SAMPLER_WORKERS def _init_rpc( ip_config, num_servers, max_queue_size, role, num_threads, group_id, ): """This init function is called in the worker processes.""" try: utils.set_num_threads(num_threads) if os.environ.get("DGL_DIST_MODE", "standalone") != "standalone": connect_to_server(ip_config, num_servers, max_queue_size, group_id) init_role(role) init_kvstore(ip_config, num_servers, role) except Exception as e: print(e, flush=True) traceback.print_exc() raise e class MpCommand(Enum): """Enum class for multiprocessing command""" INIT_RPC = 0 # Not used in the task queue SET_COLLATE_FN = 1 CALL_BARRIER = 2 DELETE_COLLATE_FN = 3 CALL_COLLATE_FN = 4 CALL_FN_ALL_WORKERS = 5 FINALIZE_POOL = 6 def init_process(rpc_config, mp_contexts): """Work loop in the worker""" try: _init_rpc(*rpc_config) keep_polling = True data_queue, task_queue, barrier = mp_contexts collate_fn_dict = {} while keep_polling: try: # Follow https://github.com/pytorch/pytorch/blob/d57ce8cf8989c0b737e636d8d7abe16c1f08f70b/torch/utils/data/_utils/worker.py#L260 command, args = task_queue.get(timeout=5) except queue.Empty: continue if command == MpCommand.SET_COLLATE_FN: dataloader_name, func = args collate_fn_dict[dataloader_name] = func elif command == MpCommand.CALL_BARRIER: barrier.wait() elif command == MpCommand.DELETE_COLLATE_FN: (dataloader_name,) = args del collate_fn_dict[dataloader_name] elif command == MpCommand.CALL_COLLATE_FN: dataloader_name, collate_args = args data_queue.put( ( dataloader_name, collate_fn_dict[dataloader_name](collate_args), ) ) elif command == MpCommand.CALL_FN_ALL_WORKERS: func, func_args = args func(func_args) elif command == MpCommand.FINALIZE_POOL: _exit() keep_polling = False else: raise Exception("Unknown command") except Exception as e: traceback.print_exc() raise e class CustomPool: """Customized worker pool""" def __init__(self, num_workers, rpc_config): """ Customized worker pool init function """ ctx = mp.get_context("spawn") self.num_workers = num_workers # As pool could be used by any number of dataloaders, queues # should be able to take infinite elements to avoid dead lock. self.queue_size = 0 self.result_queue = ctx.Queue(self.queue_size) self.results = {} # key is dataloader name, value is fetched batch. self.task_queues = [] self.process_list = [] self.current_proc_id = 0 self.cache_result_dict = {} self.barrier = ctx.Barrier(num_workers) for _ in range(num_workers): task_queue = ctx.Queue(self.queue_size) self.task_queues.append(task_queue) proc = ctx.Process( target=init_process, args=( rpc_config, (self.result_queue, task_queue, self.barrier), ), ) proc.daemon = True proc.start() self.process_list.append(proc) def set_collate_fn(self, func, dataloader_name): """Set collate function in subprocess""" for i in range(self.num_workers): self.task_queues[i].put( (MpCommand.SET_COLLATE_FN, (dataloader_name, func)) ) self.results[dataloader_name] = [] def submit_task(self, dataloader_name, args): """Submit task to workers""" # Round robin self.task_queues[self.current_proc_id].put( (MpCommand.CALL_COLLATE_FN, (dataloader_name, args)) ) self.current_proc_id = (self.current_proc_id + 1) % self.num_workers def submit_task_to_all_workers(self, func, args): """Submit task to all workers""" for i in range(self.num_workers): self.task_queues[i].put( (MpCommand.CALL_FN_ALL_WORKERS, (func, args)) ) def get_result(self, dataloader_name, timeout=1800): """Get result from result queue""" if dataloader_name not in self.results: raise DGLError( f"Got result from an unknown dataloader {dataloader_name}." ) while len(self.results[dataloader_name]) == 0: dl_name, data = self.result_queue.get(timeout=timeout) self.results[dl_name].append(data) return self.results[dataloader_name].pop(0) def delete_collate_fn(self, dataloader_name): """Delete collate function""" for i in range(self.num_workers): self.task_queues[i].put( (MpCommand.DELETE_COLLATE_FN, (dataloader_name,)) ) del self.results[dataloader_name] def call_barrier(self): """Call barrier at all workers""" for i in range(self.num_workers): self.task_queues[i].put((MpCommand.CALL_BARRIER, tuple())) def close(self): """Close worker pool""" for i in range(self.num_workers): self.task_queues[i].put( (MpCommand.FINALIZE_POOL, tuple()), block=False ) time.sleep(0.5) # Fix for early python version def join(self): """Join the close process of worker pool""" for i in range(self.num_workers): self.process_list[i].join() def initialize( ip_config, max_queue_size=MAX_QUEUE_SIZE, net_type=None, num_worker_threads=1, use_graphbolt=False, ): """Initialize DGL's distributed module This function initializes DGL's distributed module. It acts differently in server or client modes. In the server mode, it runs the server code and never returns. In the client mode, it builds connections with servers for communication and creates worker processes for distributed sampling. Parameters ---------- ip_config: str File path of ip_config file max_queue_size : int Maximal size (bytes) of client queue buffer (~20 GB on default). Note that the 20 GB is just an upper-bound and DGL uses zero-copy and it will not allocate 20GB memory at once. net_type : str, optional [Deprecated] Networking type, can be 'socket' only. num_worker_threads: int The number of OMP threads in each sampler process. use_graphbolt: bool, optional Whether to use GraphBolt for distributed train. Note ---- Users have to invoke this API before any DGL's distributed API and framework-specific distributed API. For example, when used with Pytorch, users have to invoke this function before Pytorch's `pytorch.distributed.init_process_group`. """ print( f"Initialize the distributed services with graphbolt: {use_graphbolt}" ) if net_type is not None: dgl_warning( "net_type is deprecated and will be removed in future release." ) if os.environ.get("DGL_ROLE", "client") == "server": from .dist_graph import DistGraphServer assert ( os.environ.get("DGL_SERVER_ID") is not None ), "Please define DGL_SERVER_ID to run DistGraph server" assert ( os.environ.get("DGL_IP_CONFIG") is not None ), "Please define DGL_IP_CONFIG to run DistGraph server" assert ( os.environ.get("DGL_NUM_SERVER") is not None ), "Please define DGL_NUM_SERVER to run DistGraph server" assert ( os.environ.get("DGL_NUM_CLIENT") is not None ), "Please define DGL_NUM_CLIENT to run DistGraph server" assert ( os.environ.get("DGL_CONF_PATH") is not None ), "Please define DGL_CONF_PATH to run DistGraph server" formats = os.environ.get("DGL_GRAPH_FORMAT", "csc").split(",") formats = [f.strip() for f in formats] rpc.reset() serv = DistGraphServer( int(os.environ.get("DGL_SERVER_ID")), os.environ.get("DGL_IP_CONFIG"), int(os.environ.get("DGL_NUM_SERVER")), int(os.environ.get("DGL_NUM_CLIENT")), os.environ.get("DGL_CONF_PATH"), graph_format=formats, use_graphbolt=use_graphbolt, ) serv.start() sys.exit() else: num_workers = int(os.environ.get("DGL_NUM_SAMPLER", 0)) num_servers = int(os.environ.get("DGL_NUM_SERVER", 1)) group_id = int(os.environ.get("DGL_GROUP_ID", 0)) rpc.reset() global SAMPLER_POOL global NUM_SAMPLER_WORKERS is_standalone = ( os.environ.get("DGL_DIST_MODE", "standalone") == "standalone" ) if num_workers > 0 and not is_standalone: SAMPLER_POOL = CustomPool( num_workers, ( ip_config, num_servers, max_queue_size, "sampler", num_worker_threads, group_id, ), ) else: SAMPLER_POOL = None NUM_SAMPLER_WORKERS = num_workers if not is_standalone: assert ( num_servers is not None and num_servers > 0 ), "The number of servers per machine must be specified with a positive number." connect_to_server( ip_config, num_servers, max_queue_size, group_id=group_id, ) init_role("default") init_kvstore(ip_config, num_servers, "default") def finalize_client(): """Release resources of this client.""" if os.environ.get("DGL_DIST_MODE", "standalone") != "standalone": rpc.finalize_sender() rpc.finalize_receiver() def _exit(): exit_client() time.sleep(1) def finalize_worker(): """Finalize workers Python's multiprocessing pool will not call atexit function when close """ global SAMPLER_POOL if SAMPLER_POOL is not None: SAMPLER_POOL.close() def join_finalize_worker(): """join the worker close process""" global SAMPLER_POOL if SAMPLER_POOL is not None: SAMPLER_POOL.join() SAMPLER_POOL = None def is_initialized(): """Is RPC initialized?""" return INITIALIZED def _shutdown_servers(): set_initialized(False) # send ShutDownRequest to servers if rpc.get_rank() == 0: # Only client_0 issue this command req = rpc.ShutDownRequest(rpc.get_rank()) for server_id in range(rpc.get_num_server()): rpc.send_request(server_id, req) def exit_client(): """Trainer exits This function is called automatically when a Python process exits. Normally, the training script does not need to invoke this function at the end. In the case that the training script needs to initialize the distributed module multiple times (so far, this is needed in the unit tests), the training script needs to call `exit_client` before calling `initialize` again. """ # Only client with rank_0 will send shutdown request to servers. print( "Client[{}] in group[{}] is exiting...".format( rpc.get_rank(), rpc.get_group_id() ) ) finalize_worker() # finalize workers should be earilier than barrier, and non-blocking # collect data such as DistTensor before exit gc.collect() if os.environ.get("DGL_DIST_MODE", "standalone") != "standalone": rpc.client_barrier() _shutdown_servers() finalize_client() join_finalize_worker() close_kvstore() atexit.unregister(exit_client) ================================================ FILE: python/dgl/distributed/dist_dataloader.py ================================================ # pylint: disable=global-variable-undefined, invalid-name """Multiprocess dataloader for distributed training""" import inspect from abc import ABC, abstractmethod from collections.abc import Mapping from .. import backend as F, transforms, utils from ..base import EID, NID from ..convert import heterograph from .dist_context import get_sampler_pool __all__ = [ "NodeCollator", "EdgeCollator", "DistDataLoader", "DistNodeDataLoader", "DistEdgeDataLoader", ] DATALOADER_ID = 0 class DistDataLoader: """DGL customized multiprocessing dataloader. DistDataLoader provides a similar interface to Pytorch's DataLoader to generate mini-batches with multiprocessing. It utilizes the worker processes created by :func:`dgl.distributed.initialize` to parallelize sampling. Parameters ---------- dataset: a tensor Tensors of node IDs or edge IDs. batch_size: int The number of samples per batch to load. shuffle: bool, optional Set to ``True`` to have the data reshuffled at every epoch (default: ``False``). collate_fn: callable, optional The function is typically used to sample neighbors of the nodes in a batch or the endpoint nodes of the edges in a batch. drop_last: bool, optional Set to ``True`` to drop the last incomplete batch, if the dataset size is not divisible by the batch size. If ``False`` and the size of dataset is not divisible by the batch size, then the last batch will be smaller. (default: ``False``) queue_size: int, optional Size of multiprocessing queue Examples -------- >>> g = dgl.distributed.DistGraph('graph-name') >>> def sample(seeds): ... seeds = th.LongTensor(np.asarray(seeds)) ... frontier = dgl.distributed.sample_neighbors(g, seeds, 10) ... return dgl.to_block(frontier, seeds) >>> dataloader = dgl.distributed.DistDataLoader(dataset=nodes, batch_size=1000, collate_fn=sample, shuffle=True) >>> for block in dataloader: ... feat = g.ndata['features'][block.srcdata[dgl.NID]] ... labels = g.ndata['labels'][block.dstdata[dgl.NID]] ... pred = model(block, feat) Note ---- When performing DGL's distributed sampling with multiprocessing, users have to use this class instead of Pytorch's DataLoader because DGL's RPC requires that all processes establish connections with servers before invoking any DGL's distributed API. Therefore, this dataloader uses the worker processes created in :func:`dgl.distributed.initialize`. Note ---- This dataloader does not guarantee the iteration order. For example, if dataset = [1, 2, 3, 4], batch_size = 2 and shuffle = False, the order of [1, 2] and [3, 4] is not guaranteed. """ def __init__( self, dataset, batch_size, shuffle=False, collate_fn=None, drop_last=False, queue_size=None, ): self.pool, self.num_workers = get_sampler_pool() if queue_size is None: queue_size = self.num_workers * 4 if self.num_workers > 0 else 4 self.queue_size = queue_size # prefetch size self.batch_size = batch_size self.num_pending = 0 self.collate_fn = collate_fn self.current_pos = 0 self.queue = [] # Only used when pool is None self.drop_last = drop_last self.recv_idxs = 0 self.shuffle = shuffle self.is_closed = False self.dataset = dataset self.data_idx = F.arange(0, len(dataset)) self.expected_idxs = len(dataset) // self.batch_size if not self.drop_last and len(dataset) % self.batch_size != 0: self.expected_idxs += 1 # We need to have a unique ID for each data loader to identify itself # in the sampler processes. global DATALOADER_ID self.name = "dataloader-" + str(DATALOADER_ID) DATALOADER_ID += 1 if self.pool is not None: self.pool.set_collate_fn(self.collate_fn, self.name) def __del__(self): # When the process exits, the process pool may have been closed. We should try # and get the process pool again and see if we need to clean up the process pool. self.pool, self.num_workers = get_sampler_pool() if self.pool is not None: self.pool.delete_collate_fn(self.name) def __next__(self): if self.pool is None: num_reqs = 1 else: num_reqs = self.queue_size - self.num_pending for _ in range(num_reqs): self._request_next_batch() if self.recv_idxs < self.expected_idxs: result = self._get_data_from_result_queue() self.recv_idxs += 1 self.num_pending -= 1 return result else: assert self.num_pending == 0 raise StopIteration def _get_data_from_result_queue(self, timeout=1800): if self.pool is None: ret = self.queue.pop(0) else: ret = self.pool.get_result(self.name, timeout=timeout) return ret def __iter__(self): if self.shuffle: self.data_idx = F.rand_shuffle(self.data_idx) self.recv_idxs = 0 self.current_pos = 0 self.num_pending = 0 return self def _request_next_batch(self): next_data = self._next_data() if next_data is None: return elif self.pool is not None: self.pool.submit_task(self.name, next_data) else: result = self.collate_fn(next_data) self.queue.append(result) self.num_pending += 1 def _next_data(self): if self.current_pos == len(self.dataset): return None end_pos = 0 if self.current_pos + self.batch_size > len(self.dataset): if self.drop_last: return None else: end_pos = len(self.dataset) else: end_pos = self.current_pos + self.batch_size idx = self.data_idx[self.current_pos : end_pos].tolist() ret = [self.dataset[i] for i in idx] # Sharing large number of tensors between processes will consume too many # file descriptors, so let's convert each tensor to scalar value beforehand. if isinstance(ret[0], tuple): ret = [(type, F.as_scalar(id)) for (type, id) in ret] else: ret = [F.as_scalar(id) for id in ret] self.current_pos = end_pos return ret # [Note] As implementation of ``dgl.distributed.DistDataLoader`` is independent # of ``dgl.dataloading.DataLoader`` currently, dedicated collators are defined # here instead of using ``dgl.dataloading.CollateWrapper``. def _find_exclude_eids_with_reverse_id(g, eids, reverse_eid_map): if isinstance(eids, Mapping): eids = {g.to_canonical_etype(k): v for k, v in eids.items()} exclude_eids = { k: F.cat([v, F.gather_row(reverse_eid_map[k], v)], 0) for k, v in eids.items() } else: exclude_eids = F.cat([eids, F.gather_row(reverse_eid_map, eids)], 0) return exclude_eids def _find_exclude_eids_with_reverse_types(g, eids, reverse_etype_map): exclude_eids = {g.to_canonical_etype(k): v for k, v in eids.items()} reverse_etype_map = { g.to_canonical_etype(k): g.to_canonical_etype(v) for k, v in reverse_etype_map.items() } exclude_eids.update( {reverse_etype_map[k]: v for k, v in exclude_eids.items()} ) return exclude_eids def _find_exclude_eids(g, exclude_mode, eids, **kwargs): """Find all edge IDs to exclude according to :attr:`exclude_mode`. Parameters ---------- g : DGLGraph The graph. exclude_mode : str, optional Can be either of the following, None (default) Does not exclude any edge. 'self' Exclude the given edges themselves but nothing else. 'reverse_id' Exclude all edges specified in ``eids``, as well as their reverse edges of the same edge type. The mapping from each edge ID to its reverse edge ID is specified in the keyword argument ``reverse_eid_map``. This mode assumes that the reverse of an edge with ID ``e`` and type ``etype`` will have ID ``reverse_eid_map[e]`` and type ``etype``. 'reverse_types' Exclude all edges specified in ``eids``, as well as their reverse edges of the corresponding edge types. The mapping from each edge type to its reverse edge type is specified in the keyword argument ``reverse_etype_map``. This mode assumes that the reverse of an edge with ID ``e`` and type ``etype`` will have ID ``e`` and type ``reverse_etype_map[etype]``. eids : Tensor or dict[etype, Tensor] The edge IDs. reverse_eid_map : Tensor or dict[etype, Tensor] The mapping from edge ID to its reverse edge ID. reverse_etype_map : dict[etype, etype] The mapping from edge etype to its reverse edge type. """ if exclude_mode is None: return None elif exclude_mode == "self": if isinstance(eids, Mapping): eids = {g.to_canonical_etype(k): v for k, v in eids.items()} return eids elif exclude_mode == "reverse_id": return _find_exclude_eids_with_reverse_id( g, eids, kwargs["reverse_eid_map"] ) elif exclude_mode == "reverse_types": return _find_exclude_eids_with_reverse_types( g, eids, kwargs["reverse_etype_map"] ) else: raise ValueError("unsupported mode {}".format(exclude_mode)) class Collator(ABC): """Abstract DGL collator for training GNNs on downstream tasks stochastically. Provides a :attr:`dataset` object containing the collection of all nodes or edges, as well as a :attr:`collate` method that combines a set of items from :attr:`dataset` and obtains the message flow graphs (MFGs). Notes ----- For the concept of MFGs, please refer to :ref:`User Guide Section 6 ` and :doc:`Minibatch Training Tutorials `. """ @property @abstractmethod def dataset(self): """Returns the dataset object of the collator.""" raise NotImplementedError @abstractmethod def collate(self, items): """Combines the items from the dataset object and obtains the list of MFGs. Parameters ---------- items : list[str, int] The list of node or edge IDs or type-ID pairs. Notes ----- For the concept of MFGs, please refer to :ref:`User Guide Section 6 ` and :doc:`Minibatch Training Tutorials `. """ raise NotImplementedError @staticmethod def add_edge_attribute_to_graph(g, data_name, gb_padding): """Add data into the graph as an edge attribute. For some cases such as prob/mask-based sampling on GraphBolt partitions, we need to prepare such data beforehand. This is because data are usually saved in DistGraph.ndata/edata, but such data is not in the format that GraphBolt partitions require. And in GraphBolt, such data are saved as edge attributes. So we need to add such data into the graph before any sampling is kicked off. Parameters ---------- g : DistGraph The graph. data_name : str The name of data that's stored in DistGraph.ndata/edata. gb_padding : int, optional The padding value for GraphBolt partitions' new edge_attributes. """ if g._use_graphbolt and data_name: g.add_edge_attribute(data_name, gb_padding) class NodeCollator(Collator): """DGL collator to combine nodes and their computation dependencies within a minibatch for training node classification or regression on a single graph with neighborhood sampling. Parameters ---------- g : DGLGraph The graph. nids : Tensor or dict[ntype, Tensor] The node set to compute outputs. graph_sampler : dgl.dataloading.BlockSampler The neighborhood sampler. gb_padding : int, optional The padding value for GraphBolt partitions' new edge_attributes if the attributes in DistGraph are None. e.g. prob/mask-based sampling. Only when the mask of one edge is set as 1, an edge will be sampled in dgl.graphbolt.FusedCSCSamplingGraph.sample_neighbors. The argument will be used in add_edge_attribute_to_graph to add new edge_attributes in graphbolt. Examples -------- To train a 3-layer GNN for node classification on a set of nodes ``train_nid`` on a homogeneous graph where each node takes messages from all neighbors (assume the backend is PyTorch): >>> sampler = dgl.dataloading.MultiLayerNeighborSampler([15, 10, 5]) >>> collator = dgl.dataloading.NodeCollator(g, train_nid, sampler) >>> dataloader = torch.utils.data.DataLoader( ... collator.dataset, collate_fn=collator.collate, ... batch_size=1024, shuffle=True, drop_last=False, num_workers=4) >>> for input_nodes, output_nodes, blocks in dataloader: ... train_on(input_nodes, output_nodes, blocks) Notes ----- For the concept of MFGs, please refer to :ref:`User Guide Section 6 ` and :doc:`Minibatch Training Tutorials `. """ def __init__(self, g, nids, graph_sampler, gb_padding=1): self.g = g if not isinstance(nids, Mapping): assert ( len(g.ntypes) == 1 ), "nids should be a dict of node type and ids for graph with multiple node types" self.graph_sampler = graph_sampler self.nids = utils.prepare_tensor_or_dict(g, nids, "nids") self._dataset = utils.maybe_flatten_dict(self.nids) # Add prob/mask into graphbolt partition's edge attributes if needed. if hasattr(self.graph_sampler, "prob"): Collator.add_edge_attribute_to_graph( self.g, self.graph_sampler.prob, gb_padding ) @property def dataset(self): return self._dataset def collate(self, items): """Find the list of MFGs necessary for computing the representation of given nodes for a node classification/regression task. Parameters ---------- items : list[int] or list[tuple[str, int]] Either a list of node IDs (for homogeneous graphs), or a list of node type-ID pairs (for heterogeneous graphs). Returns ------- input_nodes : Tensor or dict[ntype, Tensor] The input nodes necessary for computation in this minibatch. If the original graph has multiple node types, return a dictionary of node type names and node ID tensors. Otherwise, return a single tensor. output_nodes : Tensor or dict[ntype, Tensor] The nodes whose representations are to be computed in this minibatch. If the original graph has multiple node types, return a dictionary of node type names and node ID tensors. Otherwise, return a single tensor. MFGs : list[DGLGraph] The list of MFGs necessary for computing the representation. """ if isinstance(items[0], tuple): # returns a list of pairs: group them by node types into a dict items = utils.group_as_dict(items) items = utils.prepare_tensor_or_dict(self.g, items, "items") input_nodes, output_nodes, blocks = self.graph_sampler.sample_blocks( self.g, items ) return input_nodes, output_nodes, blocks class EdgeCollator(Collator): """DGL collator to combine edges and their computation dependencies within a minibatch for training edge classification, edge regression, or link prediction on a single graph with neighborhood sampling. Given a set of edges, the collate function will yield * A tensor of input nodes necessary for computing the representation on edges, or a dictionary of node type names and such tensors. * A subgraph that contains only the edges in the minibatch and their incident nodes. Note that the graph has an identical metagraph with the original graph. * If a negative sampler is given, another graph that contains the "negative edges", connecting the source and destination nodes yielded from the given negative sampler. * A list of MFGs necessary for computing the representation of the incident nodes of the edges in the minibatch. Parameters ---------- g : DGLGraph The graph from which the edges are iterated in minibatches and the subgraphs are generated. eids : Tensor or dict[etype, Tensor] The edge set in graph :attr:`g` to compute outputs. graph_sampler : dgl.dataloading.BlockSampler The neighborhood sampler. g_sampling : DGLGraph, optional The graph where neighborhood sampling and message passing is performed. Note that this is not necessarily the same as :attr:`g`. If None, assume to be the same as :attr:`g`. exclude : str, optional Whether and how to exclude dependencies related to the sampled edges in the minibatch. Possible values are * None, which excludes nothing. * ``'self'``, which excludes the sampled edges themselves but nothing else. * ``'reverse_id'``, which excludes the reverse edges of the sampled edges. The said reverse edges have the same edge type as the sampled edges. Only works on edge types whose source node type is the same as its destination node type. * ``'reverse_types'``, which excludes the reverse edges of the sampled edges. The said reverse edges have different edge types from the sampled edges. If ``g_sampling`` is given, ``exclude`` is ignored and will be always ``None``. reverse_eids : Tensor or dict[etype, Tensor], optional A tensor of reverse edge ID mapping. The i-th element indicates the ID of the i-th edge's reverse edge. If the graph is heterogeneous, this argument requires a dictionary of edge types and the reverse edge ID mapping tensors. Required and only used when ``exclude`` is set to ``reverse_id``. For heterogeneous graph this will be a dict of edge type and edge IDs. Note that only the edge types whose source node type is the same as destination node type are needed. reverse_etypes : dict[etype, etype], optional The mapping from the edge type to its reverse edge type. Required and only used when ``exclude`` is set to ``reverse_types``. negative_sampler : callable, optional The negative sampler. Can be omitted if no negative sampling is needed. The negative sampler must be a callable that takes in the following arguments: * The original (heterogeneous) graph. * The ID array of sampled edges in the minibatch, or the dictionary of edge types and ID array of sampled edges in the minibatch if the graph is heterogeneous. It should return * A pair of source and destination node ID arrays as negative samples, or a dictionary of edge types and such pairs if the graph is heterogenenous. A set of builtin negative samplers are provided in :ref:`the negative sampling module `. gb_padding : int, optional The padding value for GraphBolt partitions' new edge_attributes if the attributes in DistGraph are None. e.g. prob/mask-based sampling. Only when the mask of one edge is set as 1, an edge will be sampled in dgl.graphbolt.FusedCSCSamplingGraph.sample_neighbors. The argument will be used in add_edge_attribute_to_graph to add new edge_attributes in graphbolt. -------- The following example shows how to train a 3-layer GNN for edge classification on a set of edges ``train_eid`` on a homogeneous undirected graph. Each node takes messages from all neighbors. Say that you have an array of source node IDs ``src`` and another array of destination node IDs ``dst``. One can make it bidirectional by adding another set of edges that connects from ``dst`` to ``src``: >>> g = dgl.graph((torch.cat([src, dst]), torch.cat([dst, src]))) One can then know that the ID difference of an edge and its reverse edge is ``|E|``, where ``|E|`` is the length of your source/destination array. The reverse edge mapping can be obtained by >>> E = len(src) >>> reverse_eids = torch.cat([torch.arange(E, 2 * E), torch.arange(0, E)]) Note that the sampled edges as well as their reverse edges are removed from computation dependencies of the incident nodes. This is a common trick to avoid information leakage. >>> sampler = dgl.dataloading.MultiLayerNeighborSampler([15, 10, 5]) >>> collator = dgl.dataloading.EdgeCollator( ... g, train_eid, sampler, exclude='reverse_id', ... reverse_eids=reverse_eids) >>> dataloader = torch.utils.data.DataLoader( ... collator.dataset, collate_fn=collator.collate, ... batch_size=1024, shuffle=True, drop_last=False, num_workers=4) >>> for input_nodes, pair_graph, blocks in dataloader: ... train_on(input_nodes, pair_graph, blocks) To train a 3-layer GNN for link prediction on a set of edges ``train_eid`` on a homogeneous graph where each node takes messages from all neighbors (assume the backend is PyTorch), with 5 uniformly chosen negative samples per edge: >>> sampler = dgl.dataloading.MultiLayerNeighborSampler([15, 10, 5]) >>> neg_sampler = dgl.dataloading.negative_sampler.Uniform(5) >>> collator = dgl.dataloading.EdgeCollator( ... g, train_eid, sampler, exclude='reverse_id', ... reverse_eids=reverse_eids, negative_sampler=neg_sampler) >>> dataloader = torch.utils.data.DataLoader( ... collator.dataset, collate_fn=collator.collate, ... batch_size=1024, shuffle=True, drop_last=False, num_workers=4) >>> for input_nodes, pos_pair_graph, neg_pair_graph, blocks in dataloader: ... train_on(input_nodse, pair_graph, neg_pair_graph, blocks) For heterogeneous graphs, the reverse of an edge may have a different edge type from the original edge. For instance, consider that you have an array of user-item clicks, representated by a user array ``user`` and an item array ``item``. You may want to build a heterogeneous graph with a user-click-item relation and an item-clicked-by-user relation. >>> g = dgl.heterograph({ ... ('user', 'click', 'item'): (user, item), ... ('item', 'clicked-by', 'user'): (item, user)}) To train a 3-layer GNN for edge classification on a set of edges ``train_eid`` with type ``click``, you can write >>> sampler = dgl.dataloading.MultiLayerNeighborSampler([15, 10, 5]) >>> collator = dgl.dataloading.EdgeCollator( ... g, {'click': train_eid}, sampler, exclude='reverse_types', ... reverse_etypes={'click': 'clicked-by', 'clicked-by': 'click'}) >>> dataloader = torch.utils.data.DataLoader( ... collator.dataset, collate_fn=collator.collate, ... batch_size=1024, shuffle=True, drop_last=False, num_workers=4) >>> for input_nodes, pair_graph, blocks in dataloader: ... train_on(input_nodes, pair_graph, blocks) To train a 3-layer GNN for link prediction on a set of edges ``train_eid`` with type ``click``, you can write >>> sampler = dgl.dataloading.MultiLayerNeighborSampler([15, 10, 5]) >>> neg_sampler = dgl.dataloading.negative_sampler.Uniform(5) >>> collator = dgl.dataloading.EdgeCollator( ... g, train_eid, sampler, exclude='reverse_types', ... reverse_etypes={'click': 'clicked-by', 'clicked-by': 'click'}, ... negative_sampler=neg_sampler) >>> dataloader = torch.utils.data.DataLoader( ... collator.dataset, collate_fn=collator.collate, ... batch_size=1024, shuffle=True, drop_last=False, num_workers=4) >>> for input_nodes, pos_pair_graph, neg_pair_graph, blocks in dataloader: ... train_on(input_nodes, pair_graph, neg_pair_graph, blocks) Notes ----- For the concept of MFGs, please refer to :ref:`User Guide Section 6 ` and :doc:`Minibatch Training Tutorials `. """ def __init__( self, g, eids, graph_sampler, g_sampling=None, exclude=None, reverse_eids=None, reverse_etypes=None, negative_sampler=None, gb_padding=1, ): self.g = g if not isinstance(eids, Mapping): assert ( len(g.etypes) == 1 ), "eids should be a dict of etype and ids for graph with multiple etypes" self.graph_sampler = graph_sampler # One may wish to iterate over the edges in one graph while perform sampling in # another graph. This may be the case for iterating over validation and test # edge set while perform neighborhood sampling on the graph formed by only # the training edge set. # See GCMC for an example usage. if g_sampling is not None: self.g_sampling = g_sampling self.exclude = None else: self.g_sampling = self.g self.exclude = exclude self.reverse_eids = reverse_eids self.reverse_etypes = reverse_etypes self.negative_sampler = negative_sampler self.eids = utils.prepare_tensor_or_dict(g, eids, "eids") self._dataset = utils.maybe_flatten_dict(self.eids) # Add prob/mask into graphbolt partition's edge attributes if needed. if hasattr(self.graph_sampler, "prob"): Collator.add_edge_attribute_to_graph( self.g, self.graph_sampler.prob, gb_padding ) @property def dataset(self): return self._dataset def _collate(self, items): if isinstance(items[0], tuple): # returns a list of pairs: group them by node types into a dict items = utils.group_as_dict(items) items = utils.prepare_tensor_or_dict(self.g_sampling, items, "items") pair_graph = self.g.edge_subgraph(items) seed_nodes = pair_graph.ndata[NID] exclude_eids = _find_exclude_eids( self.g_sampling, self.exclude, items, reverse_eid_map=self.reverse_eids, reverse_etype_map=self.reverse_etypes, ) input_nodes, _, blocks = self.graph_sampler.sample_blocks( self.g_sampling, seed_nodes, exclude_eids=exclude_eids ) return input_nodes, pair_graph, blocks def _collate_with_negative_sampling(self, items): if isinstance(items[0], tuple): # returns a list of pairs: group them by node types into a dict items = utils.group_as_dict(items) items = utils.prepare_tensor_or_dict(self.g_sampling, items, "items") pair_graph = self.g.edge_subgraph(items, relabel_nodes=False) induced_edges = pair_graph.edata[EID] neg_srcdst = self.negative_sampler(self.g, items) if not isinstance(neg_srcdst, Mapping): assert len(self.g.etypes) == 1, ( "graph has multiple or no edge types; " "please return a dict in negative sampler." ) neg_srcdst = {self.g.canonical_etypes[0]: neg_srcdst} # Get dtype from a tuple of tensors dtype = F.dtype(list(neg_srcdst.values())[0][0]) ctx = F.context(pair_graph) neg_edges = { etype: neg_srcdst.get( etype, ( F.copy_to(F.tensor([], dtype), ctx), F.copy_to(F.tensor([], dtype), ctx), ), ) for etype in self.g.canonical_etypes } neg_pair_graph = heterograph( neg_edges, {ntype: self.g.num_nodes(ntype) for ntype in self.g.ntypes}, ) pair_graph, neg_pair_graph = transforms.compact_graphs( [pair_graph, neg_pair_graph] ) pair_graph.edata[EID] = induced_edges seed_nodes = pair_graph.ndata[NID] exclude_eids = _find_exclude_eids( self.g_sampling, self.exclude, items, reverse_eid_map=self.reverse_eids, reverse_etype_map=self.reverse_etypes, ) input_nodes, _, blocks = self.graph_sampler.sample_blocks( self.g_sampling, seed_nodes, exclude_eids=exclude_eids ) return input_nodes, pair_graph, neg_pair_graph, blocks def collate(self, items): """Combines the sampled edges into a minibatch for edge classification, edge regression, and link prediction tasks. Parameters ---------- items : list[int] or list[tuple[str, int]] Either a list of edge IDs (for homogeneous graphs), or a list of edge type-ID pairs (for heterogeneous graphs). Returns ------- Either ``(input_nodes, pair_graph, blocks)``, or ``(input_nodes, pair_graph, negative_pair_graph, blocks)`` if negative sampling is enabled. input_nodes : Tensor or dict[ntype, Tensor] The input nodes necessary for computation in this minibatch. If the original graph has multiple node types, return a dictionary of node type names and node ID tensors. Otherwise, return a single tensor. pair_graph : DGLGraph The graph that contains only the edges in the minibatch as well as their incident nodes. Note that the metagraph of this graph will be identical to that of the original graph. negative_pair_graph : DGLGraph The graph that contains only the edges connecting the source and destination nodes yielded from the given negative sampler, if negative sampling is enabled. Note that the metagraph of this graph will be identical to that of the original graph. blocks : list[DGLGraph] The list of MFGs necessary for computing the representation of the edges. """ if self.negative_sampler is None: return self._collate(items) else: return self._collate_with_negative_sampling(items) def _remove_kwargs_dist(kwargs): if "num_workers" in kwargs: del kwargs["num_workers"] if "pin_memory" in kwargs: del kwargs["pin_memory"] print("Distributed DataLoaders do not support pin_memory.") return kwargs class DistNodeDataLoader(DistDataLoader): """Sampled graph data loader over nodes for distributed graph storage. It wraps an iterable over a set of nodes, generating the list of message flow graphs (MFGs) as computation dependency of the said minibatch, on a distributed graph. All the arguments have the same meaning as the single-machine counterpart :class:`dgl.dataloading.DataLoader` except the first argument :attr:`g` which must be a :class:`dgl.distributed.DistGraph`. Parameters ---------- g : DistGraph The distributed graph. nids, graph_sampler, device, kwargs : See :class:`dgl.dataloading.DataLoader`. See also -------- dgl.dataloading.DataLoader """ def __init__(self, g, nids, graph_sampler, device=None, **kwargs): collator_kwargs = {} dataloader_kwargs = {} _collator_arglist = inspect.getfullargspec(NodeCollator).args for k, v in kwargs.items(): if k in _collator_arglist: collator_kwargs[k] = v else: dataloader_kwargs[k] = v if device is None: # for the distributed case default to the CPU device = "cpu" assert ( device == "cpu" ), "Only cpu is supported in the case of a DistGraph." # Distributed DataLoader currently does not support heterogeneous graphs # and does not copy features. Fallback to normal solution self.collator = NodeCollator(g, nids, graph_sampler, **collator_kwargs) _remove_kwargs_dist(dataloader_kwargs) super().__init__( self.collator.dataset, collate_fn=self.collator.collate, **dataloader_kwargs ) self.device = device class DistEdgeDataLoader(DistDataLoader): """Sampled graph data loader over edges for distributed graph storage. It wraps an iterable over a set of edges, generating the list of message flow graphs (MFGs) as computation dependency of the said minibatch for edge classification, edge regression, and link prediction, on a distributed graph. All the arguments have the same meaning as the single-machine counterpart :class:`dgl.dataloading.DataLoader` except the first argument :attr:`g` which must be a :class:`dgl.distributed.DistGraph`. Parameters ---------- g : DistGraph The distributed graph. eids, graph_sampler, device, kwargs : See :class:`dgl.dataloading.DataLoader`. See also -------- dgl.dataloading.DataLoader """ def __init__(self, g, eids, graph_sampler, device=None, **kwargs): collator_kwargs = {} dataloader_kwargs = {} _collator_arglist = inspect.getfullargspec(EdgeCollator).args for k, v in kwargs.items(): if k in _collator_arglist: collator_kwargs[k] = v else: dataloader_kwargs[k] = v if device is None: # for the distributed case default to the CPU device = "cpu" assert ( device == "cpu" ), "Only cpu is supported in the case of a DistGraph." # Distributed DataLoader currently does not support heterogeneous graphs # and does not copy features. Fallback to normal solution self.collator = EdgeCollator(g, eids, graph_sampler, **collator_kwargs) _remove_kwargs_dist(dataloader_kwargs) super().__init__( self.collator.dataset, collate_fn=self.collator.collate, **dataloader_kwargs ) self.device = device ================================================ FILE: python/dgl/distributed/dist_graph.py ================================================ """Define distributed graph.""" import gc import os from collections import namedtuple from collections.abc import Mapping, MutableMapping import numpy as np import torch from .. import backend as F, graphbolt as gb, heterograph_index from .._ffi.ndarray import empty_shared_mem from ..base import ALL, DGLError, EID, ETYPE, is_all, NID from ..convert import graph as dgl_graph, heterograph as dgl_heterograph from ..frame import infer_scheme from ..heterograph import DGLGraph from ..ndarray import exist_shared_mem_array from ..transforms import compact_graphs from . import graph_services, role, rpc from .dist_tensor import DistTensor from .graph_partition_book import ( _etype_str_to_tuple, EdgePartitionPolicy, get_shared_mem_partition_book, HeteroDataName, NodePartitionPolicy, parse_hetero_data_name, PartitionPolicy, ) from .graph_services import ( find_edges as dist_find_edges, in_degrees as dist_in_degrees, out_degrees as dist_out_degrees, ) from .kvstore import get_kvstore, KVServer from .partition import ( load_partition, load_partition_book, load_partition_feats, RESERVED_FIELD_DTYPE, ) from .rpc_server import start_server from .server_state import ServerState from .shared_mem_utils import ( _get_edata_path, _get_ndata_path, _to_shared_mem, DTYPE_DICT, ) INIT_GRAPH = 800001 QUERY_IF_USE_GRAPHBOLT = 800002 ADD_EDGE_ATTRIBUTE_FROM_KV = 800003 ADD_EDGE_ATTRIBUTE_FROM_SHARED_MEM = 800004 class InitGraphRequest(rpc.Request): """Init graph on the backup servers. When the backup server starts, they don't load the graph structure. This request tells the backup servers that they can map to the graph structure with shared memory. """ def __init__(self, graph_name): self._graph_name = graph_name def __getstate__(self): return self._graph_name def __setstate__(self, state): self._graph_name = state def process_request(self, server_state): if server_state.graph is None: server_state.graph = _get_graph_from_shared_mem( self._graph_name, server_state.use_graphbolt ) return InitGraphResponse(self._graph_name) class InitGraphResponse(rpc.Response): """Ack the init graph request""" def __init__(self, graph_name): self._graph_name = graph_name def __getstate__(self): return self._graph_name def __setstate__(self, state): self._graph_name = state class QueryIfUseGraphBoltRequest(rpc.Request): """Query if use GraphBolt.""" def __getstate__(self): return None def __setstate__(self, state): pass def process_request(self, server_state): return QueryIfUseGraphBoltResponse(server_state.use_graphbolt) class QueryIfUseGraphBoltResponse(rpc.Response): """Ack the query request about if use GraphBolt.""" def __init__(self, use_graphbolt): self._use_graphbolt = use_graphbolt def __getstate__(self): return self._use_graphbolt def __setstate__(self, state): self._use_graphbolt = state def _copy_data_to_shared_mem(data, name): """Copy data to shared memory.""" # [TODO] Copy data to shared memory. assert data.dtype == torch.float32, "Only float32 is supported." data_type = F.reverse_data_type_dict[F.dtype(data)] shared_data = empty_shared_mem(name, True, data.shape, data_type) dlpack = shared_data.to_dlpack() ret = F.zerocopy_from_dlpack(dlpack) rpc.copy_data_to_shared_memory(ret, data) return ret def _copy_data_from_shared_mem(name, shape): """Copy data from shared memory.""" data_type = F.reverse_data_type_dict[F.float32] data = empty_shared_mem(name, False, shape, data_type) dlpack = data.to_dlpack() return F.zerocopy_from_dlpack(dlpack) class AddEdgeAttributeFromKVRequest(rpc.Request): """Add edge attribute from kvstore to local GraphBolt partition.""" def __init__(self, name, kv_names, padding): self._name = name self._kv_names = kv_names self._padding = padding def __getstate__(self): return self._name, self._kv_names, self._padding def __setstate__(self, state): self._name, self._kv_names, self._padding = state def process_request(self, server_state): # For now, this is only used to add prob/mask data to the graph. name = self._name g = server_state.graph if name not in g.edge_attributes: # Fetch target data from kvstore. kv_store = server_state.kv_store data = [ kv_store.data_store[kv_name] if kv_name else None for kv_name in self._kv_names ] # Due to data type limitation in GraphBolt's sampling, we only support float32. data_type = torch.float32 gpb = server_state.partition_book # Initialize the edge attribute. num_edges = g.total_num_edges # Padding is used to fill missing edge attributes (e.g., 'prob' or 'mask') for certain edge types. # In DGLGraph, some edges may lack these attributes or have them set to None, but DGL will still sample these edges. # In contrast, GraphBolt samples edges based on specific attributes (e.g., 'mask' == 1) and will skip edges with missing attributes. # To ensure consistent sampling behavior in GraphBolt, we pad missing attributes with default values (e.g., 'mask' = 1), # allowing all edges to be sampled, even if their attributes were missing or None in DGLGraph. attr_data = torch.full((num_edges,), self._padding, dtype=data_type) # Map data from kvstore to the local partition for inner edges only. num_inner_edges = gpb.metadata()[gpb.partid]["num_edges"] homo_eids = g.edge_attributes[EID][:num_inner_edges] etype_ids, typed_eids = gpb.map_to_per_etype(homo_eids) for etype_id, c_etype in enumerate(gpb.canonical_etypes): curr_indices = torch.nonzero(etype_ids == etype_id).squeeze() curr_typed_eids = typed_eids[curr_indices] curr_local_eids = gpb.eid2localeid( curr_typed_eids, gpb.partid, etype=c_etype ) if data[etype_id] is None: continue attr_data[curr_indices] = data[etype_id][curr_local_eids].to( data_type ) # Copy data to shared memory. attr_data = _copy_data_to_shared_mem(attr_data, "__edge__" + name) g.add_edge_attribute(name, attr_data) return AddEdgeAttributeFromKVResponse(name) class AddEdgeAttributeFromKVResponse(rpc.Response): """Ack the request of adding edge attribute.""" def __init__(self, name): self._name = name def __getstate__(self): return self._name def __setstate__(self, state): self._name = state class AddEdgeAttributeFromSharedMemRequest(rpc.Request): """Add edge attribute from shared memory to local GraphBolt partition.""" def __init__(self, name): self._name = name def __getstate__(self): return self._name def __setstate__(self, state): self._name = state def process_request(self, server_state): name = self._name g = server_state.graph if name not in g.edge_attributes: data = _copy_data_from_shared_mem( "__edge__" + name, (g.total_num_edges,) ) g.add_edge_attribute(name, data) return AddEdgeAttributeFromSharedMemResponse(name) class AddEdgeAttributeFromSharedMemResponse(rpc.Response): """Ack the request of adding edge attribute from shared memory.""" def __init__(self, name): self._name = name def __getstate__(self): return self._name def __setstate__(self, state): self._name = state def _copy_graph_to_shared_mem(g, graph_name, graph_format, use_graphbolt): if use_graphbolt: return g.copy_to_shared_memory(graph_name) new_g = g.shared_memory(graph_name, formats=graph_format) # We should share the node/edge data to the client explicitly instead of putting them # in the KVStore because some of the node/edge data may be duplicated. new_g.ndata["inner_node"] = _to_shared_mem( g.ndata["inner_node"], _get_ndata_path(graph_name, "inner_node") ) new_g.ndata[NID] = _to_shared_mem( g.ndata[NID], _get_ndata_path(graph_name, NID) ) new_g.edata["inner_edge"] = _to_shared_mem( g.edata["inner_edge"], _get_edata_path(graph_name, "inner_edge") ) new_g.edata[EID] = _to_shared_mem( g.edata[EID], _get_edata_path(graph_name, EID) ) # for heterogeneous graph, we need to put ETYPE into KVStore # for homogeneous graph, ETYPE does not exist if ETYPE in g.edata: new_g.edata[ETYPE] = _to_shared_mem( g.edata[ETYPE], _get_edata_path(graph_name, ETYPE), ) return new_g def _get_shared_mem_ndata(g, graph_name, name): """Get shared-memory node data from DistGraph server. This is called by the DistGraph client to access the node data in the DistGraph server with shared memory. """ shape = (g.num_nodes(),) dtype = RESERVED_FIELD_DTYPE[name] dtype = DTYPE_DICT[dtype] data = empty_shared_mem( _get_ndata_path(graph_name, name), False, shape, dtype ) dlpack = data.to_dlpack() return F.zerocopy_from_dlpack(dlpack) def _get_shared_mem_edata(g, graph_name, name): """Get shared-memory edge data from DistGraph server. This is called by the DistGraph client to access the edge data in the DistGraph server with shared memory. """ shape = (g.num_edges(),) dtype = RESERVED_FIELD_DTYPE[name] dtype = DTYPE_DICT[dtype] data = empty_shared_mem( _get_edata_path(graph_name, name), False, shape, dtype ) dlpack = data.to_dlpack() return F.zerocopy_from_dlpack(dlpack) def _exist_shared_mem_array(graph_name, name): return exist_shared_mem_array(_get_edata_path(graph_name, name)) def _get_graph_from_shared_mem(graph_name, use_graphbolt): """Get the graph from the DistGraph server. The DistGraph server puts the graph structure of the local partition in the shared memory. The client can access the graph structure and some metadata on nodes and edges directly through shared memory to reduce the overhead of data access. """ if use_graphbolt: return gb.load_from_shared_memory(graph_name) g, ntypes, etypes = heterograph_index.create_heterograph_from_shared_memory( graph_name ) if g is None: return None g = DGLGraph(g, ntypes, etypes) g.ndata["inner_node"] = _get_shared_mem_ndata(g, graph_name, "inner_node") g.ndata[NID] = _get_shared_mem_ndata(g, graph_name, NID) g.edata["inner_edge"] = _get_shared_mem_edata(g, graph_name, "inner_edge") g.edata[EID] = _get_shared_mem_edata(g, graph_name, EID) # heterogeneous graph has ETYPE if _exist_shared_mem_array(graph_name, ETYPE): g.edata[ETYPE] = _get_shared_mem_edata(g, graph_name, ETYPE) return g NodeSpace = namedtuple("NodeSpace", ["data"]) EdgeSpace = namedtuple("EdgeSpace", ["data"]) class HeteroNodeView(object): """A NodeView class to act as G.nodes for a DistGraph.""" __slots__ = ["_graph"] def __init__(self, graph): self._graph = graph def __getitem__(self, key): assert isinstance(key, str) return NodeSpace(data=NodeDataView(self._graph, key)) class HeteroEdgeView(object): """An EdgeView class to act as G.edges for a DistGraph.""" __slots__ = ["_graph"] def __init__(self, graph): self._graph = graph def __getitem__(self, key): assert isinstance(key, str) or ( isinstance(key, tuple) and len(key) == 3 ), f"Expect edge type in string or triplet of string, but got {key}." return EdgeSpace(data=EdgeDataView(self._graph, key)) class NodeDataView(MutableMapping): """The data view class when dist_graph.ndata[...].data is called.""" __slots__ = ["_graph", "_data"] def __init__(self, g, ntype=None): self._graph = g if ntype is None or len(g.ntypes) == 1: self._data = g._ndata_store else: if ntype not in g.ntypes: raise DGLError(f"Node type {ntype} does not exist.") self._data = g._ndata_store[ntype] def _get_names(self): return list(self._data.keys()) def __getitem__(self, key): return self._data[key] def __setitem__(self, key, val): self._data[key] = val def __delitem__(self, key): del self._data[key] def __len__(self): # The number of node data may change. Let's count it every time we need them. # It's not called frequently. It should be fine. return len(self._data) def __iter__(self): return iter(self._data) def __repr__(self): reprs = {} for name in self._data: dtype = F.dtype(self._data[name]) shape = F.shape(self._data[name]) reprs[name] = "DistTensor(shape={}, dtype={})".format( str(shape), str(dtype) ) return repr(reprs) class EdgeDataView(MutableMapping): """The data view class when G.edges[...].data is called.""" __slots__ = ["_graph", "_data"] def __init__(self, g, etype=None): self._graph = g if etype is None or len(g.canonical_etypes) == 1: self._data = g._edata_store else: c_etype = g.to_canonical_etype(etype) self._data = g._edata_store[c_etype] def _get_names(self): return list(self._data.keys()) def __getitem__(self, key): return self._data[key] def __setitem__(self, key, val): self._data[key] = val def __delitem__(self, key): del self._data[key] def __len__(self): # The number of edge data may change. Let's count it every time we need them. # It's not called frequently. It should be fine. return len(self._data) def __iter__(self): return iter(self._data) def __repr__(self): reprs = {} for name in self._data: dtype = F.dtype(self._data[name]) shape = F.shape(self._data[name]) reprs[name] = "DistTensor(shape={}, dtype={})".format( str(shape), str(dtype) ) return repr(reprs) def _format_partition(graph, graph_format): """Format the partition to the specified format.""" if isinstance(graph, gb.FusedCSCSamplingGraph): return graph # formatting dtype # TODO(Rui) Formatting forcely is not a perfect solution. # We'd better store all dtypes when mapping to shared memory # and map back with original dtypes. for k, dtype in RESERVED_FIELD_DTYPE.items(): if k in graph.ndata: graph.ndata[k] = F.astype(graph.ndata[k], dtype) if k in graph.edata: graph.edata[k] = F.astype(graph.edata[k], dtype) # Create the graph formats specified the users. print( "Start to create specified graph formats which may take " "non-trivial time." ) graph = graph.formats(graph_format) graph.create_formats_() print(f"Finished creating specified graph formats: {graph_format}") return graph class DistGraphServer(KVServer): """The DistGraph server. This DistGraph server loads the graph data and sets up a service so that trainers and samplers can read data of a graph partition (graph structure, node data and edge data) from remote machines. A server is responsible for one graph partition. Currently, each machine runs only one main server with a set of backup servers to handle clients' requests. The main server and the backup servers all handle the requests for the same graph partition. They all share the partition data (graph structure and node/edge data) with shared memory. By default, the partition data is shared with the DistGraph clients that run on the same machine. However, a user can disable shared memory option. This is useful for the case that a user wants to run the server and the client on different machines. Parameters ---------- server_id : int The server ID (start from 0). ip_config : str Path of IP configuration file. num_servers : int Server count on each machine. num_clients : int Total number of client nodes. part_config : string The path of the config file generated by the partition tool. disable_shared_mem : bool Disable shared memory. graph_format : str or list of str The graph formats. use_graphbolt : bool Whether to load GraphBolt partition. Default: False. """ def __init__( self, server_id, ip_config, num_servers, num_clients, part_config, disable_shared_mem=False, graph_format=("csc", "coo"), use_graphbolt=False, ): super(DistGraphServer, self).__init__( server_id=server_id, ip_config=ip_config, num_servers=num_servers, num_clients=num_clients, ) self.ip_config = ip_config self.num_servers = num_servers self.use_graphbolt = use_graphbolt # Load graph partition data. if self.is_backup_server(): # The backup server doesn't load the graph partition. It'll initialized afterwards. self.gpb, graph_name, ntypes, etypes = load_partition_book( part_config, self.part_id ) self.client_g = None else: # Loading of node/edge_feats are deferred to lower the peak memory consumption. ( self.client_g, _, _, self.gpb, graph_name, ntypes, etypes, ) = load_partition( part_config, self.part_id, load_feats=False, use_graphbolt=use_graphbolt, ) print("load " + graph_name) self.client_g = _format_partition(self.client_g, graph_format) if not disable_shared_mem: self.client_g = _copy_graph_to_shared_mem( self.client_g, graph_name, graph_format, use_graphbolt ) if not disable_shared_mem: self.gpb.shared_memory(graph_name) assert self.gpb.partid == self.part_id for ntype in ntypes: node_name = HeteroDataName(True, ntype, "") self.add_part_policy( PartitionPolicy(node_name.policy_str, self.gpb) ) for etype in etypes: edge_name = HeteroDataName(False, etype, "") self.add_part_policy( PartitionPolicy(edge_name.policy_str, self.gpb) ) if not self.is_backup_server(): node_feats, _ = load_partition_feats( part_config, self.part_id, load_nodes=True, load_edges=False ) for name in node_feats: # The feature name has the following format: node_type + "/" + feature_name to avoid # feature name collision for different node types. ntype, feat_name = name.split("/") data_name = HeteroDataName(True, ntype, feat_name) self.init_data( name=str(data_name), policy_str=data_name.policy_str, data_tensor=node_feats[name], ) self.orig_data.add(str(data_name)) # Let's free once node features are copied to shared memory del node_feats gc.collect() _, edge_feats = load_partition_feats( part_config, self.part_id, load_nodes=False, load_edges=True ) for name in edge_feats: # The feature name has the following format: edge_type + "/" + feature_name to avoid # feature name collision for different edge types. etype, feat_name = name.split("/") etype = _etype_str_to_tuple(etype) data_name = HeteroDataName(False, etype, feat_name) self.init_data( name=str(data_name), policy_str=data_name.policy_str, data_tensor=edge_feats[name], ) self.orig_data.add(str(data_name)) # Let's free once edge features are copied to shared memory del edge_feats gc.collect() def start(self): """Start graph store server.""" # start server server_state = ServerState( kv_store=self, local_g=self.client_g, partition_book=self.gpb, use_graphbolt=self.use_graphbolt, ) print( "start graph service on server {} for part {}".format( self.server_id, self.part_id ) ) start_server( server_id=self.server_id, ip_config=self.ip_config, num_servers=self.num_servers, num_clients=self.num_clients, server_state=server_state, ) class DistGraph: """The class for accessing a distributed graph. This class provides a subset of DGLGraph APIs for accessing partitioned graph data in distributed GNN training and inference. Thus, its main use case is to work with distributed sampling APIs to generate mini-batches and perform forward and backward computation on the mini-batches. The class can run in two modes: the standalone mode and the distributed mode. * When a user runs the training script normally, ``DistGraph`` will be in the standalone mode. In this mode, the input data must be constructed by :py:meth:`~dgl.distributed.partition.partition_graph` with only one partition. This mode is used for testing and debugging purpose. In this mode, users have to provide ``part_config`` so that ``DistGraph`` can load the input graph. * When a user runs the training script with the distributed launch script, ``DistGraph`` will be set into the distributed mode. This is used for actual distributed training. All data of partitions are loaded by the ``DistGraph`` servers, which are created by DGL's launch script. ``DistGraph`` connects with the servers to access the partitioned graph data. Currently, the ``DistGraph`` servers and clients run on the same set of machines in the distributed mode. ``DistGraph`` uses shared-memory to access the partition data in the local machine. This gives the best performance for distributed training Users may want to run ``DistGraph`` servers and clients on separate sets of machines. In this case, a user may want to disable shared memory by passing ``disable_shared_mem=False`` when creating ``DistGraphServer``. When shared memory is disabled, a user has to pass a partition book. Parameters ---------- graph_name : str The name of the graph. This name has to be the same as the one used for partitioning a graph in :py:meth:`dgl.distributed.partition.partition_graph`. gpb : GraphPartitionBook, optional The partition book object. Normally, users do not need to provide the partition book. This argument is necessary only when users want to run server process and trainer processes on different machines. part_config : str, optional The path of partition configuration file generated by :py:meth:`dgl.distributed.partition.partition_graph`. It's used in the standalone mode. Examples -------- The example shows the creation of ``DistGraph`` in the standalone mode. >>> dgl.distributed.partition_graph(g, 'graph_name', 1, num_hops=1, part_method='metis', ... out_path='output/') >>> g = dgl.distributed.DistGraph('graph_name', part_config='output/graph_name.json') The example shows the creation of ``DistGraph`` in the distributed mode. >>> g = dgl.distributed.DistGraph('graph-name') The code below shows the mini-batch training using ``DistGraph``. >>> def sample(seeds): ... seeds = th.LongTensor(np.asarray(seeds)) ... frontier = dgl.distributed.sample_neighbors(g, seeds, 10) ... return dgl.to_block(frontier, seeds) >>> dataloader = dgl.distributed.DistDataLoader(dataset=nodes, batch_size=1000, ... collate_fn=sample, shuffle=True) >>> for block in dataloader: ... feat = g.ndata['features'][block.srcdata[dgl.NID]] ... labels = g.ndata['labels'][block.dstdata[dgl.NID]] ... pred = model(block, feat) Note ---- DGL's distributed training by default runs server processes and trainer processes on the same set of machines. If users need to run them on different sets of machines, it requires manually setting up servers and trainers. The setup is not fully tested yet. """ def __init__(self, graph_name, gpb=None, part_config=None): self.graph_name = graph_name self._added_edge_attributes = [] # For prob/mask sampling on GB. if os.environ.get("DGL_DIST_MODE", "standalone") == "standalone": # "GraphBolt is not supported in standalone mode." self._use_graphbolt = False assert ( part_config is not None ), "When running in the standalone model, the partition config file is required" self._client = get_kvstore() assert ( self._client is not None ), "Distributed module is not initialized. Please call dgl.distributed.initialize." # Load graph partition data. g, node_feats, edge_feats, self._gpb, _, _, _ = load_partition( part_config, 0 ) assert ( self._gpb.num_partitions() == 1 ), "The standalone mode can only work with the graph data with one partition" if self._gpb is None: self._gpb = gpb self._g = g for name in node_feats: # The feature name has the following format: node_type + "/" + feature_name. ntype, feat_name = name.split("/") self._client.add_data( str(HeteroDataName(True, ntype, feat_name)), node_feats[name], NodePartitionPolicy(self._gpb, ntype=ntype), ) for name in edge_feats: # The feature name has the following format: edge_type + "/" + feature_name. etype, feat_name = name.split("/") etype = _etype_str_to_tuple(etype) self._client.add_data( str(HeteroDataName(False, etype, feat_name)), edge_feats[name], EdgePartitionPolicy(self._gpb, etype=etype), ) self._client.map_shared_data(self._gpb) rpc.set_num_client(1) else: # Query the main server about whether GraphBolt is used. rpc.send_request(0, QueryIfUseGraphBoltRequest()) self._use_graphbolt = rpc.recv_response()._use_graphbolt self._init(gpb) # Tell the backup servers to load the graph structure from shared memory. for server_id in range(self._client.num_servers): rpc.send_request(server_id, InitGraphRequest(graph_name)) for server_id in range(self._client.num_servers): rpc.recv_response() self._client.barrier() self._init_ndata_store() self._init_edata_store() self._init_metadata() def _init(self, gpb): self._client = get_kvstore() assert ( self._client is not None ), "Distributed module is not initialized. Please call dgl.distributed.initialize." self._g = _get_graph_from_shared_mem( self.graph_name, self._use_graphbolt ) self._gpb = get_shared_mem_partition_book(self.graph_name) if self._gpb is None: self._gpb = gpb self._client.map_shared_data(self._gpb) def _init_ndata_store(self): """Initialize node data store.""" self._ndata_store = {} for ntype in self.ntypes: names = self._get_ndata_names(ntype) data = {} for name in names: assert name.is_node() policy = PartitionPolicy( name.policy_str, self.get_partition_book() ) dtype, shape, _ = self._client.get_data_meta(str(name)) # We create a wrapper on the existing tensor in the kvstore. data[name.get_name()] = DistTensor( shape, dtype, name.get_name(), part_policy=policy, attach=False, ) if len(self.ntypes) == 1: self._ndata_store = data else: self._ndata_store[ntype] = data def _init_edata_store(self): """Initialize edge data store.""" self._edata_store = {} for etype in self.canonical_etypes: names = self._get_edata_names(etype) data = {} for name in names: assert name.is_edge() policy = PartitionPolicy( name.policy_str, self.get_partition_book() ) dtype, shape, _ = self._client.get_data_meta(str(name)) # We create a wrapper on the existing tensor in the kvstore. data[name.get_name()] = DistTensor( shape, dtype, name.get_name(), part_policy=policy, attach=False, ) if len(self.canonical_etypes) == 1: self._edata_store = data else: self._edata_store[etype] = data def _init_metadata(self): self._num_nodes = 0 self._num_edges = 0 for part_md in self._gpb.metadata(): self._num_nodes += int(part_md["num_nodes"]) self._num_edges += int(part_md["num_edges"]) # When we store node/edge types in a list, they are stored in the order of type IDs. self._ntype_map = {ntype: i for i, ntype in enumerate(self.ntypes)} self._etype_map = { etype: i for i, etype in enumerate(self.canonical_etypes) } def __getstate__(self): return ( self.graph_name, self._gpb, self._use_graphbolt, self._added_edge_attributes, ) def __setstate__(self, state): ( self.graph_name, gpb, self._use_graphbolt, self._added_edge_attributes, ) = state self._init(gpb) self._init_ndata_store() self._init_edata_store() self._init_metadata() # For prob/mask sampling on GB only. if self._use_graphbolt and len(self._added_edge_attributes) > 0: # Add edge attribute from main server's shared memory. for name in self._added_edge_attributes: data = _copy_data_from_shared_mem( "__edge__" + name, (self.local_partition.total_num_edges,) ) self.local_partition.add_edge_attribute(name, data) @property def local_partition(self): """Return the local partition on the client DistGraph provides a global view of the distributed graph. Internally, it may contains a partition of the graph if it is co-located with the server. When servers and clients run on separate sets of machines, this returns None. Returns ------- DGLGraph The local partition """ return self._g @property def nodes(self): """Return a node view""" return HeteroNodeView(self) @property def edges(self): """Return an edge view""" return HeteroEdgeView(self) @property def ndata(self): """Return the data view of all the nodes. Returns ------- NodeDataView The data view in the distributed graph storage. """ assert ( len(self.ntypes) == 1 ), "ndata only works for a graph with one node type." return NodeDataView(self) @property def edata(self): """Return the data view of all the edges. Returns ------- EdgeDataView The data view in the distributed graph storage. """ assert ( len(self.etypes) == 1 ), "edata only works for a graph with one edge type." return EdgeDataView(self) @property def idtype(self): """The dtype of graph index Returns ------- backend dtype object th.int32/th.int64 or tf.int32/tf.int64 etc. See Also -------- long int """ # TODO(da?): describe when self._g is None and idtype shouldn't be called. # For GraphBolt partition, we use the global node ID's dtype. return ( self.get_partition_book().global_nid_dtype if self._use_graphbolt else F.int64 ) @property def device(self): """Get the device context of this graph. Examples -------- The following example uses PyTorch backend. >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]) ... }) >>> print(g.device) device(type='cpu') >>> g = g.to('cuda:0') >>> print(g.device) device(type='cuda', index=0) Returns ------- Device context object """ # TODO(da?): describe when self._g is None and device shouldn't be called. return F.cpu() def is_pinned(self): """Check if the graph structure is pinned to the page-locked memory. Returns ------- bool True if the graph structure is pinned. """ # (Xin Yao): Currently we don't support pinning a DistGraph. return False @property def ntypes(self): """Return the list of node types of this graph. Returns ------- list of str Examples -------- >>> g = DistGraph("test") >>> g.ntypes ['_U'] """ return self._gpb.ntypes @property def etypes(self): """Return the list of edge types of this graph. Returns ------- list of str Examples -------- >>> g = DistGraph("test") >>> g.etypes ['_E'] """ return self._gpb.etypes @property def canonical_etypes(self): """Return all the canonical edge types in the graph. A canonical edge type is a string triplet ``(str, str, str)`` for source node type, edge type and destination node type. Returns ------- list[(str, str, str)] All the canonical edge type triplets in a list. Notes ----- DGL internally assigns an integer ID for each edge type. The returned edge type names are sorted according to their IDs. See Also -------- etypes Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch >>> g = DistGraph("test") >>> g.canonical_etypes [('user', 'follows', 'user'), ('user', 'follows', 'game'), ('user', 'plays', 'game')] """ return self._gpb.canonical_etypes def to_canonical_etype(self, etype): """Convert an edge type to the corresponding canonical edge type in the graph. A canonical edge type is a string triplet ``(str, str, str)`` for source node type, edge type and destination node type. The function expects the given edge type name can uniquely identify a canonical edge type. DGL will raise error if this is not the case. Parameters ---------- etype : str or (str, str, str) If :attr:`etype` is an edge type (str), it returns the corresponding canonical edge type in the graph. If :attr:`etype` is already a canonical edge type, it directly returns the input unchanged. Returns ------- (str, str, str) The canonical edge type corresponding to the edge type. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch >>> g = DistGraph("test") >>> g.canonical_etypes [('user', 'follows', 'user'), ('user', 'follows', 'game'), ('user', 'plays', 'game')] >>> g.to_canonical_etype('plays') ('user', 'plays', 'game') >>> g.to_canonical_etype(('user', 'plays', 'game')) ('user', 'plays', 'game') See Also -------- canonical_etypes """ return self._gpb.to_canonical_etype(etype) def get_ntype_id(self, ntype): """Return the ID of the given node type. ntype can also be None. If so, there should be only one node type in the graph. Parameters ---------- ntype : str Node type Returns ------- int """ if ntype is None: if len(self._ntype_map) != 1: raise DGLError( "Node type name must be specified if there are more than one " "node types." ) return 0 return self._ntype_map[ntype] def get_etype_id(self, etype): """Return the id of the given edge type. etype can also be None. If so, there should be only one edge type in the graph. Parameters ---------- etype : str or tuple of str Edge type Returns ------- int """ if etype is None: if len(self._etype_map) != 1: raise DGLError( "Edge type name must be specified if there are more than one " "edge types." ) return 0 etype = self.to_canonical_etype(etype) return self._etype_map[etype] def number_of_nodes(self, ntype=None): """Alias of :func:`num_nodes`""" return self.num_nodes(ntype) def number_of_edges(self, etype=None): """Alias of :func:`num_edges`""" return self.num_edges(etype) def num_nodes(self, ntype=None): """Return the total number of nodes in the distributed graph. Parameters ---------- ntype : str, optional The node type name. If given, it returns the number of nodes of the type. If not given (default), it returns the total number of nodes of all types. Returns ------- int The number of nodes Examples -------- >>> g = dgl.distributed.DistGraph('ogb-product') >>> print(g.num_nodes()) 2449029 """ if ntype is None: if len(self.ntypes) == 1: return self._gpb._num_nodes(self.ntypes[0]) else: return sum( [self._gpb._num_nodes(ntype) for ntype in self.ntypes] ) return self._gpb._num_nodes(ntype) def num_edges(self, etype=None): """Return the total number of edges in the distributed graph. Parameters ---------- etype : str or (str, str, str), optional The type name of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. If not provided, return the total number of edges regardless of the types in the graph. Returns ------- int The number of edges Examples -------- >>> g = dgl.distributed.DistGraph('ogb-product') >>> print(g.num_edges()) 123718280 """ if etype is None: return sum( [ self._gpb._num_edges(c_etype) for c_etype in self.canonical_etypes ] ) return self._gpb._num_edges(etype) def out_degrees(self, u=ALL): """Return the out-degree(s) of the given nodes. It computes the out-degree(s). It does not support heterogeneous graphs yet. Parameters ---------- u : node IDs The node IDs. The allowed formats are: * ``int``: A single node. * Int Tensor: Each element is a node ID. The tensor must have the same device type and ID data type as the graph's. * iterable[int]: Each element is a node ID. If not given, return the in-degrees of all the nodes. Returns ------- int or Tensor The out-degree(s) of the node(s) in a Tensor. The i-th element is the out-degree of the i-th input node. If :attr:`v` is an ``int``, return an ``int`` too. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Query for all nodes. >>> g.out_degrees() tensor([2, 2, 0, 0]) Query for nodes 1 and 2. >>> g.out_degrees(torch.tensor([1, 2])) tensor([2, 0]) See Also -------- in_degrees """ if is_all(u): u = F.arange(0, self.num_nodes()) return dist_out_degrees(self, u) def in_degrees(self, v=ALL): """Return the in-degree(s) of the given nodes. It computes the in-degree(s). It does not support heterogeneous graphs yet. Parameters ---------- v : node IDs The node IDs. The allowed formats are: * ``int``: A single node. * Int Tensor: Each element is a node ID. The tensor must have the same device type and ID data type as the graph's. * iterable[int]: Each element is a node ID. If not given, return the in-degrees of all the nodes. Returns ------- int or Tensor The in-degree(s) of the node(s) in a Tensor. The i-th element is the in-degree of the i-th input node. If :attr:`v` is an ``int``, return an ``int`` too. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Query for all nodes. >>> g.in_degrees() tensor([0, 2, 1, 1]) Query for nodes 1 and 2. >>> g.in_degrees(torch.tensor([1, 2])) tensor([2, 1]) See Also -------- out_degrees """ if is_all(v): v = F.arange(0, self.num_nodes()) return dist_in_degrees(self, v) def node_attr_schemes(self): """Return the node feature schemes. Each feature scheme is a named tuple that stores the shape and data type of the node feature. Returns ------- dict of str to schemes The schemes of node feature columns. Examples -------- The following uses PyTorch backend. >>> g.node_attr_schemes() {'h': Scheme(shape=(4,), dtype=torch.float32)} See Also -------- edge_attr_schemes """ schemes = {} for key in self.ndata: schemes[key] = infer_scheme(self.ndata[key]) return schemes def edge_attr_schemes(self): """Return the edge feature schemes. Each feature scheme is a named tuple that stores the shape and data type of the edge feature. Returns ------- dict of str to schemes The schemes of edge feature columns. Examples -------- The following uses PyTorch backend. >>> g.edge_attr_schemes() {'h': Scheme(shape=(4,), dtype=torch.float32)} See Also -------- node_attr_schemes """ schemes = {} for key in self.edata: schemes[key] = infer_scheme(self.edata[key]) return schemes def rank(self): """The rank of the current DistGraph. This returns a unique number to identify the DistGraph object among all of the client processes. Returns ------- int The rank of the current DistGraph. """ return role.get_global_rank() def find_edges(self, edges, etype=None): """Given an edge ID array, return the source and destination node ID array ``s`` and ``d``. ``s[i]`` and ``d[i]`` are source and destination node ID for edge ``eid[i]``. Parameters ---------- edges : Int Tensor Each element is an ID. The tensor must have the same device type and ID data type as the graph's. etype : str or (str, str, str), optional The type names of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. Can be omitted if the graph has only one type of edges. Returns ------- tensor The source node ID array. tensor The destination node ID array. """ if etype is None: assert ( len(self.etypes) == 1 ), "find_edges requires etype for heterogeneous graphs." gpb = self.get_partition_book() if len(gpb.etypes) > 1: edges = gpb.map_to_homo_eid(edges, etype) src, dst = dist_find_edges(self, edges) if len(gpb.ntypes) > 1: _, src = gpb.map_to_per_ntype(src) _, dst = gpb.map_to_per_ntype(dst) return src, dst def edge_subgraph(self, edges, relabel_nodes=True, store_ids=True): """Return a subgraph induced on the given edges. An edge-induced subgraph is equivalent to creating a new graph using the given edges. In addition to extracting the subgraph, DGL also copies the features of the extracted nodes and edges to the resulting graph. The copy is *lazy* and incurs data movement only when needed. If the graph is heterogeneous, DGL extracts a subgraph per relation and composes them as the resulting graph. Thus, the resulting graph has the same set of relations as the input one. Parameters ---------- edges : Int Tensor or dict[(str, str, str), Int Tensor] The edges to form the subgraph. Each element is an edge ID. The tensor must have the same device type and ID data type as the graph's. If the graph is homogeneous, one can directly pass an Int Tensor. Otherwise, the argument must be a dictionary with keys being edge types and values being the edge IDs in the above formats. relabel_nodes : bool, optional If True, it will remove the isolated nodes and relabel the incident nodes in the extracted subgraph. store_ids : bool, optional If True, it will store the raw IDs of the extracted edges in the ``edata`` of the resulting graph under name ``dgl.EID``; if ``relabel_nodes`` is ``True``, it will also store the raw IDs of the incident nodes in the ``ndata`` of the resulting graph under name ``dgl.NID``. Returns ------- G : DGLGraph The subgraph. """ if isinstance(edges, dict): # TODO(zhengda) we need to directly generate subgraph of all relations with # one invocation. subg = {} for etype, edge in edges.items(): etype = self.to_canonical_etype(etype) subg[etype] = self.find_edges(edge, etype) num_nodes = {ntype: self.num_nodes(ntype) for ntype in self.ntypes} subg = dgl_heterograph(subg, num_nodes_dict=num_nodes) for etype in edges: subg.edges[etype].data[EID] = edges[etype] else: assert len(self.etypes) == 1 subg = self.find_edges(edges) subg = dgl_graph(subg, num_nodes=self.num_nodes()) subg.edata[EID] = edges if relabel_nodes: subg = compact_graphs(subg) assert store_ids, "edge_subgraph always stores original node/edge IDs." return subg def get_partition_book(self): """Get the partition information. Returns ------- GraphPartitionBook Object that stores all graph partition information. """ return self._gpb def get_node_partition_policy(self, ntype): """Get the partition policy for a node type. When creating a new distributed tensor, we need to provide a partition policy that indicates how to distribute data of the distributed tensor in a cluster of machines. When we load a distributed graph in the cluster, we have pre-defined partition policies for each node type and each edge type. By providing the node type, we can reference to the pre-defined partition policy for the node type. Parameters ---------- ntype : str The node type Returns ------- PartitionPolicy The partition policy for the node type. """ return NodePartitionPolicy(self.get_partition_book(), ntype) def get_edge_partition_policy(self, etype): """Get the partition policy for an edge type. When creating a new distributed tensor, we need to provide a partition policy that indicates how to distribute data of the distributed tensor in a cluster of machines. When we load a distributed graph in the cluster, we have pre-defined partition policies for each node type and each edge type. By providing the edge type, we can reference to the pre-defined partition policy for the edge type. Parameters ---------- etype : str or (str, str, str) The edge type Returns ------- PartitionPolicy The partition policy for the edge type. """ etype = self.to_canonical_etype(etype) return EdgePartitionPolicy(self.get_partition_book(), etype) def barrier(self): """Barrier for all client nodes. This API blocks the current process untill all the clients invoke this API. Please use this API with caution. """ self._client.barrier() def sample_neighbors( self, seed_nodes, fanout, edge_dir="in", prob=None, exclude_edges=None, replace=False, etype_sorted=True, output_device=None, ): # pylint: disable=unused-argument """Sample neighbors from a distributed graph.""" if exclude_edges is not None: # Convert exclude edge IDs to homogeneous edge IDs. gpb = self.get_partition_book() if isinstance(exclude_edges, Mapping): exclude_eids = [] for c_etype, eids in exclude_edges.items(): exclude_eids.append(gpb.map_to_homo_eid(eids, c_etype)) exclude_edges = torch.cat(exclude_eids) if len(self.etypes) > 1: frontier = graph_services.sample_etype_neighbors( self, seed_nodes, fanout, replace=replace, etype_sorted=etype_sorted, prob=prob, exclude_edges=exclude_edges, use_graphbolt=self._use_graphbolt, ) else: frontier = graph_services.sample_neighbors( self, seed_nodes, fanout, replace=replace, prob=prob, exclude_edges=exclude_edges, use_graphbolt=self._use_graphbolt, ) return frontier def _get_ndata_names(self, ntype=None): """Get the names of all node data.""" names = self._client.gdata_name_list() ndata_names = [] for name in names: name = parse_hetero_data_name(name) right_type = ( (name.get_type() == ntype) if ntype is not None else True ) if name.is_node() and right_type: ndata_names.append(name) return ndata_names def _get_edata_names(self, etype=None): """Get the names of all edge data.""" if etype is not None: etype = self.to_canonical_etype(etype) names = self._client.gdata_name_list() edata_names = [] for name in names: name = parse_hetero_data_name(name) right_type = ( (name.get_type() == etype) if etype is not None else True ) if name.is_edge() and right_type: edata_names.append(name) return edata_names def add_edge_attribute(self, name, padding): """Add an edge attribute into GraphBolt partition from edge data. Parameters ---------- name : str The name of the edge attribute. padding : int, optional The padding value for the new edge attribute. """ # Sanity checks. if not self._use_graphbolt: raise DGLError("GraphBolt is not used.") # Send add request to main server on the same machine. kv_names = [ ( self.edges[etype].data[name].kvstore_key if name in self.edges[etype].data else None ) for etype in self.canonical_etypes ] rpc.send_request( self._client._main_server_id, AddEdgeAttributeFromKVRequest(name, kv_names, padding), ) # Wait for the response. assert rpc.recv_response()._name == name # Send add request to local backup servers. for i in range(self._client.group_count - 1): server_id = ( self._client.machine_id * self._client.group_count + i + 1 ) rpc.send_request( server_id, AddEdgeAttributeFromSharedMemRequest(name) ) # Receive response from local backup servers. for _ in range(self._client.group_count - 1): response = rpc.recv_response() assert response._name == name # Add edge attribute from main server's shared memory. data = _copy_data_from_shared_mem( "__edge__" + name, (self.local_partition.total_num_edges,) ) self.local_partition.add_edge_attribute(name, data) # Sync local clients. self._client.barrier() # Save the edge attribute into state. This is required by separate samplers. self._added_edge_attributes.append(name) def _get_overlap(mask_arr, ids): """Select the IDs given a boolean mask array. The boolean mask array indicates all of the IDs to be selected. We want to find the overlap between the IDs selected by the boolean mask array and the ID array. Parameters ---------- mask_arr : 1D tensor A boolean mask array. ids : 1D tensor A vector with IDs. Returns ------- 1D tensor The selected IDs. """ if isinstance(mask_arr, DistTensor): masks = mask_arr[ids] return F.boolean_mask(ids, masks) else: masks = F.gather_row(F.tensor(mask_arr), ids) return F.boolean_mask(ids, masks) def _split_local(partition_book, rank, elements, local_eles): """Split the input element list with respect to data locality.""" num_clients = role.get_num_trainers() num_client_per_part = num_clients // partition_book.num_partitions() if rank is None: rank = role.get_trainer_rank() assert ( rank < num_clients ), "The input rank ({}) is incorrect. #Trainers: {}".format( rank, num_clients ) # all ranks of the clients in the same machine are in a contiguous range. client_id_in_part = rank % num_client_per_part local_eles = _get_overlap(elements, local_eles) # get a subset for the local client. size = len(local_eles) // num_client_per_part # if this isn't the last client in the partition. if client_id_in_part + 1 < num_client_per_part: return local_eles[ (size * client_id_in_part) : (size * (client_id_in_part + 1)) ] else: return local_eles[(size * client_id_in_part) :] def _even_offset(n, k): """Split an array of length n into k segments and the difference of thier length is at most 1. Return the offset of each segment. """ eles_per_part = n // k offset = np.array([0] + [eles_per_part] * k, dtype=int) offset[1 : n - eles_per_part * k + 1] += 1 return np.cumsum(offset) def _split_even_to_part(partition_book, elements): """Split the input element list evenly.""" # here we divide the element list as evenly as possible. If we use range partitioning, # the split results also respect the data locality. Range partitioning is the default # strategy. # TODO(zhengda) we need another way to divide the list for other partitioning strategy. if isinstance(elements, DistTensor): nonzero_count = elements.count_nonzero() else: elements = F.tensor(elements) nonzero_count = F.count_nonzero(elements) # compute the offset of each split and ensure that the difference of each partition size # is 1. offsets = _even_offset(nonzero_count, partition_book.num_partitions()) assert offsets[-1] == nonzero_count # Get the elements that belong to the partition. partid = partition_book.partid left, right = offsets[partid], offsets[partid + 1] x = y = 0 num_elements = len(elements) block_size = num_elements // partition_book.num_partitions() part_eles = F.tensor([], dtype=elements.dtype) # compute the nonzero tensor of each partition instead of whole tensor to save memory for idx in range(0, num_elements, block_size): nonzero_block = F.nonzero_1d( elements[idx : min(idx + block_size, num_elements)] ) x = y y += len(nonzero_block) if y > left and x < right: start = max(x, left) - x end = min(y, right) - x tmp = nonzero_block[start:end] + idx part_eles = F.cat((part_eles, tmp), 0) elif x >= right: break return part_eles def _split_random_within_part(partition_book, rank, part_eles): # If there are more than one client in a partition, we need to randomly select a subset of # elements in the partition for a client. We have to make sure that the set of elements # for different clients are disjoint. num_clients = role.get_num_trainers() num_client_per_part = num_clients // partition_book.num_partitions() if num_client_per_part == 1: return part_eles if rank is None: rank = role.get_trainer_rank() assert ( rank < num_clients ), "The input rank ({}) is incorrect. #Trainers: {}".format( rank, num_clients ) client_id_in_part = rank % num_client_per_part offset = _even_offset(len(part_eles), num_client_per_part) # We set the random seed for each partition, so that each process (client) in a partition # permute the elements in a partition in the same way, so each process gets a disjoint subset # of elements. np.random.seed(partition_book.partid) rand_idx = np.random.permutation(len(part_eles)) rand_idx = rand_idx[ offset[client_id_in_part] : offset[client_id_in_part + 1] ] idx, _ = F.sort_1d(F.tensor(rand_idx)) return F.gather_row(part_eles, idx) def _split_by_trainer_id( partition_book, part_eles, trainer_id, num_client_per_part, client_id_in_part, ): # TODO(zhengda): MXNet cannot deal with empty tensors, which makes the implementation # much more difficult. Let's just use numpy for the computation for now. We just # perform operations on vectors. It shouldn't be too difficult. trainer_id = F.asnumpy(trainer_id) part_eles = F.asnumpy(part_eles) part_id = trainer_id // num_client_per_part trainer_id = trainer_id % num_client_per_part local_eles = part_eles[ np.nonzero(part_id[part_eles] == partition_book.partid)[0] ] # these are the Ids of the local elements in the partition. The Ids are global Ids. remote_eles = part_eles[ np.nonzero(part_id[part_eles] != partition_book.partid)[0] ] # these are the Ids of the remote nodes in the partition. The Ids are global Ids. local_eles_idx = np.concatenate( [ np.nonzero(trainer_id[local_eles] == i)[0] for i in range(num_client_per_part) ], # trainer_id[local_eles] is the trainer ids of local nodes in the partition and we # pick out the indices where the node belongs to each trainer i respectively, and # concatenate them. axis=0, ) # `local_eles_idx` is used to sort `local_eles` according to `trainer_id`. It is a # permutation of 0...(len(local_eles)-1) local_eles = local_eles[local_eles_idx] # evenly split local nodes to trainers local_offsets = _even_offset(len(local_eles), num_client_per_part) # evenly split remote nodes to trainers remote_offsets = _even_offset(len(remote_eles), num_client_per_part) client_local_eles = local_eles[ local_offsets[client_id_in_part] : local_offsets[client_id_in_part + 1] ] client_remote_eles = remote_eles[ remote_offsets[client_id_in_part] : remote_offsets[ client_id_in_part + 1 ] ] client_eles = np.concatenate( [client_local_eles, client_remote_eles], axis=0 ) return F.tensor(client_eles) def node_split( nodes, partition_book=None, ntype="_N", rank=None, force_even=True, node_trainer_ids=None, ): """Split nodes and return a subset for the local rank. This function splits the input nodes based on the partition book and returns a subset of nodes for the local rank. This method is used for dividing workloads for distributed training. The input nodes are stored as a vector of masks. The length of the vector is the same as the number of nodes in a graph; 1 indicates that the vertex in the corresponding location exists. There are two strategies to split the nodes. By default, it splits the nodes in a way to maximize data locality. That is, all nodes that belong to a process are returned. If ``force_even`` is set to true, the nodes are split evenly so that each process gets almost the same number of nodes. When ``force_even`` is True, the data locality is still preserved if a graph is partitioned with Metis and the node/edge IDs are shuffled. In this case, majority of the nodes returned for a process are the ones that belong to the process. If node/edge IDs are not shuffled, data locality is not guaranteed. Parameters ---------- nodes : 1D tensor or DistTensor A boolean mask vector that indicates input nodes. partition_book : GraphPartitionBook, optional The graph partition book ntype : str, optional The node type of the input nodes. rank : int, optional The rank of a process. If not given, the rank of the current process is used. force_even : bool, optional Force the nodes are split evenly. node_trainer_ids : 1D tensor or DistTensor, optional If not None, split the nodes to the trainers on the same machine according to trainer IDs assigned to each node. Otherwise, split randomly. Returns ------- 1D-tensor The vector of node IDs that belong to the rank. """ if not isinstance(nodes, DistTensor): assert ( partition_book is not None ), "Regular tensor requires a partition book." elif partition_book is None: partition_book = nodes.part_policy.partition_book assert len(nodes) == partition_book._num_nodes( ntype ), "The length of boolean mask vector should be the number of nodes in the graph." if rank is None: rank = role.get_trainer_rank() if force_even: num_clients = role.get_num_trainers() num_client_per_part = num_clients // partition_book.num_partitions() assert ( num_clients % partition_book.num_partitions() == 0 ), "The total number of clients should be multiple of the number of partitions." part_nid = _split_even_to_part(partition_book, nodes) if num_client_per_part == 1: return part_nid elif node_trainer_ids is None: return _split_random_within_part(partition_book, rank, part_nid) else: trainer_id = node_trainer_ids[0 : len(node_trainer_ids)] max_trainer_id = F.as_scalar(F.reduce_max(trainer_id)) + 1 if max_trainer_id > num_clients: # We hope the partition scheme with trainer_id could be used when the number of # trainers is less than the `num_trainers_per_machine` previously assigned during # partitioning. assert max_trainer_id % num_clients == 0 trainer_id //= max_trainer_id // num_clients client_id_in_part = rank % num_client_per_part return _split_by_trainer_id( partition_book, part_nid, trainer_id, num_client_per_part, client_id_in_part, ) else: # Get all nodes that belong to the rank. local_nids = partition_book.partid2nids( partition_book.partid, ntype=ntype ) return _split_local(partition_book, rank, nodes, local_nids) def edge_split( edges, partition_book=None, etype="_E", rank=None, force_even=True, edge_trainer_ids=None, ): """Split edges and return a subset for the local rank. This function splits the input edges based on the partition book and returns a subset of edges for the local rank. This method is used for dividing workloads for distributed training. The input edges can be stored as a vector of masks. The length of the vector is the same as the number of edges in a graph; 1 indicates that the edge in the corresponding location exists. There are two strategies to split the edges. By default, it splits the edges in a way to maximize data locality. That is, all edges that belong to a process are returned. If ``force_even`` is set to true, the edges are split evenly so that each process gets almost the same number of edges. When ``force_even`` is True, the data locality is still preserved if a graph is partitioned with Metis and the node/edge IDs are shuffled. In this case, majority of the nodes returned for a process are the ones that belong to the process. If node/edge IDs are not shuffled, data locality is not guaranteed. Parameters ---------- edges : 1D tensor or DistTensor A boolean mask vector that indicates input edges. partition_book : GraphPartitionBook, optional The graph partition book etype : str or (str, str, str), optional The edge type of the input edges. rank : int, optional The rank of a process. If not given, the rank of the current process is used. force_even : bool, optional Force the edges are split evenly. edge_trainer_ids : 1D tensor or DistTensor, optional If not None, split the edges to the trainers on the same machine according to trainer IDs assigned to each edge. Otherwise, split randomly. Returns ------- 1D-tensor The vector of edge IDs that belong to the rank. """ if not isinstance(edges, DistTensor): assert ( partition_book is not None ), "Regular tensor requires a partition book." elif partition_book is None: partition_book = edges.part_policy.partition_book assert len(edges) == partition_book._num_edges( etype ), "The length of boolean mask vector should be the number of edges in the graph." if rank is None: rank = role.get_trainer_rank() if force_even: num_clients = role.get_num_trainers() num_client_per_part = num_clients // partition_book.num_partitions() assert ( num_clients % partition_book.num_partitions() == 0 ), "The total number of clients should be multiple of the number of partitions." part_eid = _split_even_to_part(partition_book, edges) if num_client_per_part == 1: return part_eid elif edge_trainer_ids is None: return _split_random_within_part(partition_book, rank, part_eid) else: trainer_id = edge_trainer_ids[0 : len(edge_trainer_ids)] max_trainer_id = F.as_scalar(F.reduce_max(trainer_id)) + 1 if max_trainer_id > num_clients: # We hope the partition scheme with trainer_id could be used when the number of # trainers is less than the `num_trainers_per_machine` previously assigned during # partitioning. assert max_trainer_id % num_clients == 0 trainer_id //= max_trainer_id // num_clients client_id_in_part = rank % num_client_per_part return _split_by_trainer_id( partition_book, part_eid, trainer_id, num_client_per_part, client_id_in_part, ) else: # Get all edges that belong to the rank. local_eids = partition_book.partid2eids( partition_book.partid, etype=etype ) return _split_local(partition_book, rank, edges, local_eids) rpc.register_service(INIT_GRAPH, InitGraphRequest, InitGraphResponse) rpc.register_service( QUERY_IF_USE_GRAPHBOLT, QueryIfUseGraphBoltRequest, QueryIfUseGraphBoltResponse, ) rpc.register_service( ADD_EDGE_ATTRIBUTE_FROM_KV, AddEdgeAttributeFromKVRequest, AddEdgeAttributeFromKVResponse, ) rpc.register_service( ADD_EDGE_ATTRIBUTE_FROM_SHARED_MEM, AddEdgeAttributeFromSharedMemRequest, AddEdgeAttributeFromSharedMemResponse, ) ================================================ FILE: python/dgl/distributed/dist_tensor.py ================================================ """Define distributed tensor.""" import os from .. import backend as F, utils from .dist_context import is_initialized from .kvstore import get_kvstore from .role import get_role from .rpc import get_group_id def _default_init_data(shape, dtype): return F.zeros(shape, dtype, F.cpu()) # These IDs can identify the anonymous distributed tensors. DIST_TENSOR_ID = 0 class DistTensor: """Distributed tensor. ``DistTensor`` references to a distributed tensor sharded and stored in a cluster of machines. It has the same interface as Pytorch Tensor to access its metadata (e.g., shape and data type). To access data in a distributed tensor, it supports slicing rows and writing data to rows. It does not support any operators of a deep learning framework, such as addition and multiplication. Currently, distributed tensors are designed to store node data and edge data of a distributed graph. Therefore, their first dimensions have to be the number of nodes or edges in the graph. The tensors are sharded in the first dimension based on the partition policy of nodes or edges. When a distributed tensor is created, the partition policy is automatically determined based on the first dimension if the partition policy is not provided. If the first dimension matches the number of nodes of a node type, ``DistTensor`` will use the partition policy for this particular node type; if the first dimension matches the number of edges of an edge type, ``DistTensor`` will use the partition policy for this particular edge type. If DGL cannot determine the partition policy automatically (e.g., multiple node types or edge types have the same number of nodes or edges), users have to explicity provide the partition policy. A distributed tensor can be ether named or anonymous. When a distributed tensor has a name, the tensor can be persistent if ``persistent=True``. Normally, DGL destroys the distributed tensor in the system when the ``DistTensor`` object goes away. However, a persistent tensor lives in the system even if the ``DistTenor`` object disappears in the trainer process. The persistent tensor has the same life span as the DGL servers. DGL does not allow an anonymous tensor to be persistent. When a ``DistTensor`` object is created, it may reference to an existing distributed tensor or create a new one. A distributed tensor is identified by the name passed to the constructor. If the name exists, ``DistTensor`` will reference the existing one. In this case, the shape and the data type must match the existing tensor. If the name doesn't exist, a new tensor will be created in the kvstore. When a distributed tensor is created, its values are initialized to zero. Users can define an initialization function to control how the values are initialized. The init function has two input arguments: shape and data type and returns a tensor. Below shows an example of an init function: .. highlight:: python .. code-block:: python def init_func(shape, dtype): return torch.ones(shape=shape, dtype=dtype) Parameters ---------- shape : tuple The shape of the tensor. The first dimension has to be the number of nodes or the number of edges of a distributed graph. dtype : dtype The dtype of the tensor. The data type has to be the one in the deep learning framework. name : string, optional The name of the embeddings. The name can uniquely identify embeddings in a system so that another ``DistTensor`` object can referent to the distributed tensor. init_func : callable, optional The function to initialize data in the tensor. If the init function is not provided, the values of the embeddings are initialized to zero. part_policy : PartitionPolicy, optional The partition policy of the rows of the tensor to different machines in the cluster. Currently, it only supports node partition policy or edge partition policy. The system determines the right partition policy automatically. persistent : bool Whether the created tensor lives after the ``DistTensor`` object is destroyed. is_gdata : bool Whether the created tensor is a ndata/edata or not. attach : bool Whether to attach group ID into name to be globally unique. Examples -------- >>> init = lambda shape, dtype: th.ones(shape, dtype=dtype) >>> arr = dgl.distributed.DistTensor((g.num_nodes(), 2), th.int32, init_func=init) >>> print(arr[0:3]) tensor([[1, 1], [1, 1], [1, 1]], dtype=torch.int32) >>> arr[0:3] = th.ones((3, 2), dtype=th.int32) * 2 >>> print(arr[0:3]) tensor([[2, 2], [2, 2], [2, 2]], dtype=torch.int32) Note ---- The creation of ``DistTensor`` is a synchronized operation. When a trainer process tries to create a ``DistTensor`` object, the creation succeeds only when all trainer processes do the same. """ def __init__( self, shape, dtype, name=None, init_func=None, part_policy=None, persistent=False, is_gdata=True, attach=True, ): self.kvstore = get_kvstore() assert ( self.kvstore is not None ), "Distributed module is not initialized. Please call dgl.distributed.initialize." self._shape = shape self._dtype = dtype self._attach = attach self._is_gdata = is_gdata part_policies = self.kvstore.all_possible_part_policy # If a user doesn't provide a partition policy, we should find one based on # the input shape. if part_policy is None: for policy_name in part_policies: policy = part_policies[policy_name] if policy.get_size() == shape[0]: # If multiple partition policies match the input shape, we cannot # decide which is the right one automatically. We should ask users # to provide one. assert part_policy is None, ( "Multiple partition policies match the input shape. " + "Please provide a partition policy explicitly." ) part_policy = policy assert part_policy is not None, ( "Cannot find a right partition policy. It is either because " + "its first dimension does not match the number of nodes or edges " + "of a distributed graph or there does not exist a distributed graph." ) self._part_policy = part_policy assert ( part_policy.get_size() == shape[0] ), "The partition policy does not match the input shape." if init_func is None: init_func = _default_init_data exist_names = self.kvstore.data_name_list() # If a user doesn't provide a name, we generate a name ourselves. # We need to generate the name in a deterministic way. if name is None: assert ( not persistent ), "We cannot generate anonymous persistent distributed tensors" global DIST_TENSOR_ID # All processes of the same role should create DistTensor synchronously. # Thus, all of them should have the same IDs. name = "anonymous-" + get_role() + "-" + str(DIST_TENSOR_ID) DIST_TENSOR_ID += 1 assert isinstance(name, str), "name {} is type {}".format( name, type(name) ) name = self._attach_group_id(name) self._tensor_name = name data_name = part_policy.get_data_name(name) self._name = str(data_name) self._persistent = persistent if self._name not in exist_names: self._owner = True self.kvstore.init_data( self._name, shape, dtype, part_policy, init_func, is_gdata ) else: self._owner = False dtype1, shape1, _ = self.kvstore.get_data_meta(self._name) assert ( dtype == dtype1 ), "The dtype does not match with the existing tensor" assert ( shape == shape1 ), "The shape does not match with the existing tensor" def __del__(self): initialized = ( os.environ.get("DGL_DIST_MODE", "standalone") == "standalone" or is_initialized() ) if not self._persistent and self._owner and initialized: self.kvstore.delete_data(self._name) def __getitem__(self, idx): idx = utils.toindex(idx) idx = idx.tousertensor() return self.kvstore.pull(name=self._name, id_tensor=idx) def __setitem__(self, idx, val): idx = utils.toindex(idx) idx = idx.tousertensor() # TODO(zhengda) how do we want to support broadcast (e.g., G.ndata['h'][idx] = 1). self.kvstore.push(name=self._name, id_tensor=idx, data_tensor=val) @property def kvstore_key(self): """Return the key string of this DistTensor in the associated KVStore.""" return self._name @property def local_partition(self): """Return the local partition of this DistTensor.""" return self.kvstore.data_store[self._name] def __or__(self, other): new_dist_tensor = DistTensor( self._shape, self._dtype, part_policy=self._part_policy, persistent=self._persistent, is_gdata=self._is_gdata, attach=self._attach, ) kvstore = self.kvstore kvstore.union(self._name, other._name, new_dist_tensor._name) return new_dist_tensor def __len__(self): return self._shape[0] @property def part_policy(self): """Return the partition policy Returns ------- PartitionPolicy The partition policy of the distributed tensor. """ return self._part_policy @property def shape(self): """Return the shape of the distributed tensor. Returns ------- tuple The shape of the distributed tensor. """ return self._shape @property def dtype(self): """Return the data type of the distributed tensor. Returns ------ dtype The data type of the tensor. """ return self._dtype @property def name(self): """Return the name of the distributed tensor Returns ------- str The name of the tensor. """ return self._detach_group_id(self._name) @property def tensor_name(self): """Return the tensor name Returns ------- str The name of the tensor. """ return self._detach_group_id(self._tensor_name) def count_nonzero(self): """Count and return the number of nonzero value Returns ------- int the number of nonzero value """ return self.kvstore.count_nonzero(name=self._name) def _attach_group_id(self, name): """Attach group ID if needed Returns ------- str new name with group ID attached """ if not self._attach: return name return "{}_{}".format(name, get_group_id()) def _detach_group_id(self, name): """Detach group ID if needed Returns ------- str original name without group ID """ if not self._attach: return name suffix = "_{}".format(get_group_id()) return name[: -len(suffix)] ================================================ FILE: python/dgl/distributed/graph_partition_book.py ================================================ """Define graph partition book.""" import pickle from abc import ABC import numpy as np from .. import backend as F, utils from .._ffi.ndarray import empty_shared_mem from ..base import DGLError from ..ndarray import exist_shared_mem_array from ..partition import NDArrayPartition from .constants import DEFAULT_ETYPE, DEFAULT_NTYPE from .id_map import IdMap from .shared_mem_utils import ( _get_edata_path, _get_ndata_path, _to_shared_mem, DTYPE_DICT, ) CANONICAL_ETYPE_DELIMITER = ":" def _etype_tuple_to_str(c_etype): """Convert canonical etype from tuple to string. Examples -------- >>> c_etype = ('user', 'like', 'item') >>> c_etype_str = _etype_tuple_to_str(c_etype) >>> print(c_etype_str) 'user:like:item' """ assert isinstance(c_etype, tuple) and len(c_etype) == 3, ( "Passed-in canonical etype should be in format of (str, str, str). " f"But got {c_etype}." ) return CANONICAL_ETYPE_DELIMITER.join(c_etype) def _etype_str_to_tuple(c_etype): """Convert canonical etype from tuple to string. Examples -------- >>> c_etype_str = 'user:like:item' >>> c_etype = _etype_str_to_tuple(c_etype_str) >>> print(c_etype) ('user', 'like', 'item') """ ret = tuple(c_etype.split(CANONICAL_ETYPE_DELIMITER)) assert len(ret) == 3, ( "Passed-in canonical etype should be in format of 'str:str:str'. " f"But got {c_etype}." ) return ret def _move_metadata_to_shared_mem( graph_name, num_nodes, num_edges, part_id, num_partitions, node_map, edge_map, is_range_part, ): """Move all metadata of the partition book to the shared memory. These metadata will be used to construct graph partition book. Parameters ---------- graph_name : str The name of the graph num_nodes : int The total number of nodes num_edges : int The total number of edges part_id : int The partition ID. num_partitions : int The number of physical partitions generated for the graph. node_map : Tensor It stores the mapping information from node IDs to partitions. With range partitioning, the tensor stores the serialized result of partition ranges. edge_map : Tensor It stores the mapping information from edge IDs to partitions. With range partitioning, the tensor stores the serialized result of partition ranges. is_range_part : bool Indicate that we use a range partition. This is important for us to deserialize data in node_map and edge_map. Returns ------- (Tensor, Tensor, Tensor) The first tensor stores the serialized metadata, the second tensor stores the serialized node map and the third tensor stores the serialized edge map. All tensors are stored in shared memory. """ meta = _to_shared_mem( F.tensor( [ int(is_range_part), num_nodes, num_edges, num_partitions, part_id, len(node_map), len(edge_map), ] ), _get_ndata_path(graph_name, "meta"), ) node_map = _to_shared_mem(node_map, _get_ndata_path(graph_name, "node_map")) edge_map = _to_shared_mem(edge_map, _get_edata_path(graph_name, "edge_map")) return meta, node_map, edge_map def _get_shared_mem_metadata(graph_name): """Get the metadata of the graph from shared memory. The server serializes the metadata of a graph and store them in shared memory. The client needs to deserialize the data in shared memory and get the metadata of the graph. Parameters ---------- graph_name : str The name of the graph. We can use the graph name to find the shared memory name. Returns ------- (bool, int, int, Tensor, Tensor) The first element indicates whether it is range partitioning; the second element is the partition ID; the third element is the number of partitions; the fourth element is the tensor that stores the serialized result of node maps; the fifth element is the tensor that stores the serialized result of edge maps. """ # The metadata has 7 elements: is_range_part, num_nodes, num_edges, num_partitions, part_id, # the length of node map and the length of the edge map. shape = (7,) dtype = F.int64 dtype = DTYPE_DICT[dtype] data = empty_shared_mem( _get_ndata_path(graph_name, "meta"), False, shape, dtype ) dlpack = data.to_dlpack() meta = F.asnumpy(F.zerocopy_from_dlpack(dlpack)) ( is_range_part, _, _, num_partitions, part_id, node_map_len, edge_map_len, ) = meta # Load node map data = empty_shared_mem( _get_ndata_path(graph_name, "node_map"), False, (node_map_len,), dtype ) dlpack = data.to_dlpack() node_map = F.zerocopy_from_dlpack(dlpack) # Load edge_map data = empty_shared_mem( _get_edata_path(graph_name, "edge_map"), False, (edge_map_len,), dtype ) dlpack = data.to_dlpack() edge_map = F.zerocopy_from_dlpack(dlpack) return is_range_part, part_id, num_partitions, node_map, edge_map def get_shared_mem_partition_book(graph_name): """Get a graph partition book from shared memory. A graph partition book of a specific graph can be serialized to shared memory. We can reconstruct a graph partition book from shared memory. Parameters ---------- graph_name : str The name of the graph. Returns ------- GraphPartitionBook A graph partition book for a particular partition. """ if not exist_shared_mem_array(_get_ndata_path(graph_name, "meta")): return None ( is_range_part, part_id, num_parts, node_map_data, edge_map_data, ) = _get_shared_mem_metadata(graph_name) if is_range_part == 1: # node ID ranges and edge ID ranges are stored in the order of node type IDs # and edge type IDs. node_map = {} ntypes = {} # node_map_data and edge_map_data were serialized with pickle and converted into # a list of bytes and then stored in a numpy array before being placed in shared # memory. To deserialize, we need to reverse the process. node_map_data = pickle.loads(bytes(F.asnumpy(node_map_data).tolist())) for i, (ntype, nid_range) in enumerate(node_map_data): ntypes[ntype] = i node_map[ntype] = nid_range edge_map = {} etypes = {} edge_map_data = pickle.loads(bytes(F.asnumpy(edge_map_data).tolist())) for i, (etype, eid_range) in enumerate(edge_map_data): etypes[etype] = i edge_map[etype] = eid_range return RangePartitionBook( part_id, num_parts, node_map, edge_map, ntypes, etypes ) else: raise TypeError("Only RangePartitionBook is supported currently.") def get_node_partition_from_book(book, device): """Get an NDArrayPartition of the nodes from a RangePartitionBook. Parameters ---------- book : RangePartitionBook The partition book to extract the node partition from. device : Device context object. The location to node partition is to be used. Returns ------- NDarrayPartition The NDArrayPartition object for the nodes in the graph. """ assert isinstance(book, RangePartitionBook), ( "Can only convert " "RangePartitionBook to NDArrayPartition." ) # create prefix-sum array on host max_node_ids = F.zerocopy_from_numpy(book._max_node_ids) cpu_range = F.cat( [F.tensor([0], dtype=F.dtype(max_node_ids)), max_node_ids + 1], dim=0 ) gpu_range = F.copy_to(cpu_range, ctx=device) # convert from numpy array_size = int(F.as_scalar(cpu_range[-1])) num_parts = book.num_partitions() return NDArrayPartition( array_size, num_parts, mode="range", part_ranges=gpu_range ) class GraphPartitionBook(ABC): """The base class of the graph partition book. For distributed training, a graph is partitioned into multiple parts and is loaded in multiple machines. The partition book contains all necessary information to locate nodes and edges in the cluster. The partition book contains various partition information, including * the number of partitions, * the partition ID that a node or edge belongs to, * the node IDs and the edge IDs that a partition has. * the local IDs of nodes and edges in a partition. Currently, only one class that implement ``GraphPartitionBook`` :``RangePartitionBook``. It calculates the mapping between node/edge IDs and partition IDs based on some small metadata because nodes/edges have been relabeled to have IDs in the same partition fall in a contiguous ID range. A graph partition book is constructed automatically when a graph is partitioned. When a graph partition is loaded, a graph partition book is loaded as well. Please see :py:meth:`~dgl.distributed.partition.partition_graph`, :py:meth:`~dgl.distributed.partition.load_partition` and :py:meth:`~dgl.distributed.partition.load_partition_book` for more details. """ def shared_memory(self, graph_name): """Move the partition book to shared memory. Parameters ---------- graph_name : str The graph name. This name will be used to read the partition book from shared memory in another process. """ def num_partitions(self): """Return the number of partitions. Returns ------- int number of partitions """ def metadata(self): """Return the partition meta data. The meta data includes: * The machine ID. * Number of nodes and edges of each partition. Examples -------- >>> print(g.get_partition_book().metadata()) >>> [{'machine_id' : 0, 'num_nodes' : 3000, 'num_edges' : 5000}, ... {'machine_id' : 1, 'num_nodes' : 2000, 'num_edges' : 4888}, ... ...] Returns ------- list[dict[str, any]] Meta data of each partition. """ def nid2partid(self, nids, ntype): """From global node IDs to partition IDs Parameters ---------- nids : tensor global node IDs ntype : str The node type Returns ------- tensor partition IDs """ def eid2partid(self, eids, etype): """From global edge IDs to partition IDs Parameters ---------- eids : tensor global edge IDs etype : str or (str, str, str) The edge type Returns ------- tensor partition IDs """ def partid2nids(self, partid, ntype): """From partition id to global node IDs Parameters ---------- partid : int partition id ntype : str The node type Returns ------- tensor node IDs """ def partid2eids(self, partid, etype): """From partition id to global edge IDs Parameters ---------- partid : int partition id etype : str or (str, str, str) The edge type Returns ------- tensor edge IDs """ def nid2localnid(self, nids, partid, ntype): """Get local node IDs within the given partition. Parameters ---------- nids : tensor global node IDs partid : int partition ID ntype : str The node type Returns ------- tensor local node IDs """ def eid2localeid(self, eids, partid, etype): """Get the local edge ids within the given partition. Parameters ---------- eids : tensor global edge IDs partid : int partition ID etype : str or (str, str, str) The edge type Returns ------- tensor local edge IDs """ @property def partid(self): """Get the current partition ID Return ------ int The partition ID of current machine """ @property def ntypes(self): """Get the list of node types""" @property def etypes(self): """Get the list of edge types""" @property def canonical_etypes(self): """Get the list of canonical edge types Returns ------- list[(str, str, str)] A list of canonical etypes """ def to_canonical_etype(self, etype): """Convert an edge type to the corresponding canonical edge type. Parameters ---------- etype : str or (str, str, str) The edge type Returns ------- (str, str, str) The corresponding canonical edge type """ @property def is_homogeneous(self): """check if homogeneous""" return not (len(self.etypes) > 1 or len(self.ntypes) > 1) def map_to_per_ntype(self, ids): """Map homogeneous node IDs to type-wise IDs and node types. Parameters ---------- ids : tensor Homogeneous node IDs. Returns ------- (tensor, tensor) node type IDs and type-wise node IDs. """ def map_to_per_etype(self, ids): """Map homogeneous edge IDs to type-wise IDs and edge types. Parameters ---------- ids : tensor Homogeneous edge IDs. Returns ------- (tensor, tensor) edge type IDs and type-wise edge IDs. """ def map_to_homo_nid(self, ids, ntype): """Map type-wise node IDs and type IDs to homogeneous node IDs. Parameters ---------- ids : tensor Type-wise node Ids ntype : str node type Returns ------- Tensor Homogeneous node IDs. """ def map_to_homo_eid(self, ids, etype): """Map type-wise edge IDs and type IDs to homogeneous edge IDs. Parameters ---------- ids : tensor Type-wise edge Ids etype : str or (str, str, str) The edge type Returns ------- Tensor Homogeneous edge IDs. """ class RangePartitionBook(GraphPartitionBook): """This partition book supports more efficient storage of partition information. This partition book is used if the nodes and edges of a graph partition are assigned with contiguous IDs. It uses very small amount of memory to store the partition information. Parameters ---------- part_id : int partition ID of current partition book num_parts : int number of total partitions node_map : dict[str, Tensor] Global node ID ranges within partitions for each node type. The key is the node type name in string. The value is a tensor of shape :math:`(K, 2)`, where :math:`K` is the number of partitions. Each row has two integers: the starting and the ending IDs for a particular node type in a partition. For example, all nodes of type ``"T"`` in partition ``i`` has ID range ``node_map["T"][i][0]`` to ``node_map["T"][i][1]``. edge_map : dict[(str, str, str), Tensor] Global edge ID ranges within partitions for each edge type. The key is the edge type name in string. The value is a tensor of shape :math:`(K, 2)`, where :math:`K` is the number of partitions. Each row has two integers: the starting and the ending IDs for a particular edge type in a partition. For example, all edges of type ``"T"`` in partition ``i`` has ID range ``edge_map["T"][i][0]`` to ``edge_map["T"][i][1]``. ntypes : dict[str, int] map ntype strings to ntype IDs. etypes : dict[(str, str, str), int] map canonical etypes to etype IDs. """ def __init__(self, part_id, num_parts, node_map, edge_map, ntypes, etypes): assert part_id >= 0, "part_id cannot be a negative number." assert num_parts > 0, "num_parts must be greater than zero." self._partid = part_id self._num_partitions = num_parts self._ntypes = [None] * len(ntypes) self._etypes = [None] * len(etypes) self._canonical_etypes = [None] * len(etypes) # map etypes to canonical ones self._etype2canonical = {} for ntype in ntypes: ntype_id = ntypes[ntype] self._ntypes[ntype_id] = ntype assert all( ntype is not None for ntype in self._ntypes ), "The node types have invalid IDs." for c_etype, etype_id in etypes.items(): assert isinstance(c_etype, tuple) and len(c_etype) == 3, ( "Expect canonical edge type in a triplet of string, but got " f"{c_etype}." ) etype = c_etype[1] self._etypes[etype_id] = etype self._canonical_etypes[etype_id] = c_etype if etype in self._etype2canonical: # If one etype maps to multiple canonical etypes, empty tuple # is used to indicate such ambiguity casued by etype. See more # details in self.to_canonical_etype(). self._etype2canonical[etype] = tuple() else: self._etype2canonical[etype] = c_etype assert all( etype is not None for etype in self._etypes ), "The edge types have invalid IDs." # This stores the node ID ranges for each node type in each partition. # The key is the node type, the value is a NumPy matrix with two # columns, in which each row indicates the start and the end of the # node ID range in a partition. The node IDs are global node IDs in the # homogeneous representation. self._typed_nid_range = {} # This stores the node ID map for per-node-type IDs in each partition. # The key is the node type, the value is a NumPy vector which indicates # the last node ID in a partition. self._typed_max_node_ids = {} max_node_map = np.zeros((num_parts,), dtype=np.int64) for key in node_map: assert key in ntypes, "Unexpected ntype: {}.".format(key) if not isinstance(node_map[key], np.ndarray): node_map[key] = F.asnumpy(node_map[key]) assert node_map[key].shape == (num_parts, 2) self._typed_nid_range[key] = node_map[key] # This is used for per-node-type lookup. self._typed_max_node_ids[key] = np.cumsum( self._typed_nid_range[key][:, 1] - self._typed_nid_range[key][:, 0] ) # This is used for homogeneous node ID lookup. max_node_map = np.maximum( self._typed_nid_range[key][:, 1], max_node_map ) # This is a vector that indicates the last node ID in each partition. # The ID is the global ID in the homogeneous representation. self._max_node_ids = max_node_map # Similar to _typed_nid_range. self._typed_eid_range = {} # similar to _typed_max_node_ids. self._typed_max_edge_ids = {} max_edge_map = np.zeros((num_parts,), dtype=np.int64) for key in edge_map: assert key in etypes, "Unexpected etype: {}.".format(key) if not isinstance(edge_map[key], np.ndarray): edge_map[key] = F.asnumpy(edge_map[key]) assert edge_map[key].shape == (num_parts, 2) self._typed_eid_range[key] = edge_map[key] # This is used for per-edge-type lookup. self._typed_max_edge_ids[key] = np.cumsum( self._typed_eid_range[key][:, 1] - self._typed_eid_range[key][:, 0] ) # This is used for homogeneous edge ID lookup. max_edge_map = np.maximum( self._typed_eid_range[key][:, 1], max_edge_map ) # Similar to _max_node_ids self._max_edge_ids = max_edge_map # These two are map functions that map node/edge IDs to node/edge type IDs. self._nid_map = IdMap(self._typed_nid_range) self._eid_map = IdMap(self._typed_eid_range) # Local node/edge type offset that maps the local homogenized node/edge IDs # to local heterogenized node/edge IDs. One can do the mapping by binary search # on these arrays. self._local_ntype_offset = np.cumsum( [0] + [ v[self._partid, 1] - v[self._partid, 0] for v in self._typed_nid_range.values() ] ).tolist() self._local_etype_offset = np.cumsum( [0] + [ v[self._partid, 1] - v[self._partid, 0] for v in self._typed_eid_range.values() ] ).tolist() # Get meta data of the partition book self._partition_meta_data = [] for partid in range(self._num_partitions): nrange_start = max_node_map[partid - 1] if partid > 0 else 0 nrange_end = max_node_map[partid] num_nodes = nrange_end - nrange_start erange_start = max_edge_map[partid - 1] if partid > 0 else 0 erange_end = max_edge_map[partid] num_edges = erange_end - erange_start part_info = {} part_info["machine_id"] = partid part_info["num_nodes"] = int(num_nodes) part_info["num_edges"] = int(num_edges) self._partition_meta_data.append(part_info) def shared_memory(self, graph_name): """Move data to shared memory.""" # we need to store the nid ranges and eid ranges of different types in the order defined # by type IDs. nid_range = [None] * len(self.ntypes) for i, ntype in enumerate(self.ntypes): nid_range[i] = (ntype, self._typed_nid_range[ntype]) nid_range_pickle = list(pickle.dumps(nid_range)) eid_range = [None] * len(self.canonical_etypes) for i, etype in enumerate(self.canonical_etypes): eid_range[i] = (etype, self._typed_eid_range[etype]) eid_range_pickle = list(pickle.dumps(eid_range)) self._meta = _move_metadata_to_shared_mem( graph_name, 0, # We don't need to provide the number of nodes 0, # We don't need to provide the number of edges self._partid, self._num_partitions, F.tensor(nid_range_pickle), F.tensor(eid_range_pickle), True, ) def num_partitions(self): """Return the number of partitions.""" return self._num_partitions def _num_nodes(self, ntype=DEFAULT_NTYPE): """The total number of nodes""" if ntype == DEFAULT_NTYPE: return int(self._max_node_ids[-1]) else: return int(self._typed_max_node_ids[ntype][-1]) def _num_edges(self, etype=DEFAULT_ETYPE): """The total number of edges""" if etype in (DEFAULT_ETYPE, DEFAULT_ETYPE[1]): return int(self._max_edge_ids[-1]) else: c_etype = self.to_canonical_etype(etype) return int(self._typed_max_edge_ids[c_etype][-1]) def metadata(self): """Return the partition meta data.""" return self._partition_meta_data def map_to_per_ntype(self, ids): """Map global homogeneous node IDs to node type IDs. Returns type_ids, per_type_ids """ return self._nid_map(ids) def map_to_per_etype(self, ids): """Map global homogeneous edge IDs to edge type IDs. Returns type_ids, per_type_ids """ return self._eid_map(ids) def map_to_homo_nid(self, ids, ntype): """Map per-node-type IDs to global node IDs in the homogeneous format.""" ids = utils.toindex(ids).tousertensor() partids = self.nid2partid(ids, ntype) typed_max_nids = F.zerocopy_from_numpy(self._typed_max_node_ids[ntype]) end_diff = F.gather_row(typed_max_nids, partids) - ids typed_nid_range = F.zerocopy_from_numpy( self._typed_nid_range[ntype][:, 1] ) return F.gather_row(typed_nid_range, partids) - end_diff def map_to_homo_eid(self, ids, etype): """Map per-edge-type IDs to global edge IDs in the homoenegeous format.""" ids = utils.toindex(ids).tousertensor() c_etype = self.to_canonical_etype(etype) partids = self.eid2partid(ids, c_etype) typed_max_eids = F.zerocopy_from_numpy( self._typed_max_edge_ids[c_etype] ) end_diff = F.gather_row(typed_max_eids, partids) - ids typed_eid_range = F.zerocopy_from_numpy( self._typed_eid_range[c_etype][:, 1] ) return F.gather_row(typed_eid_range, partids) - end_diff def nid2partid(self, nids, ntype=DEFAULT_NTYPE): """From global node IDs to partition IDs""" nids = utils.toindex(nids) if ntype == DEFAULT_NTYPE: ret = np.searchsorted( self._max_node_ids, nids.tonumpy(), side="right" ) else: ret = np.searchsorted( self._typed_max_node_ids[ntype], nids.tonumpy(), side="right" ) ret = utils.toindex(ret) return ret.tousertensor() def eid2partid(self, eids, etype=DEFAULT_ETYPE): """From global edge IDs to partition IDs""" eids = utils.toindex(eids) if etype in (DEFAULT_ETYPE, DEFAULT_ETYPE[1]): ret = np.searchsorted( self._max_edge_ids, eids.tonumpy(), side="right" ) else: c_etype = self.to_canonical_etype(etype) ret = np.searchsorted( self._typed_max_edge_ids[c_etype], eids.tonumpy(), side="right" ) ret = utils.toindex(ret) return ret.tousertensor() def partid2nids(self, partid, ntype=DEFAULT_NTYPE): """From partition ID to global node IDs""" # TODO do we need to cache it? if ntype == DEFAULT_NTYPE: start = self._max_node_ids[partid - 1] if partid > 0 else 0 end = self._max_node_ids[partid] return F.arange(start, end) else: start = ( self._typed_max_node_ids[ntype][partid - 1] if partid > 0 else 0 ) end = self._typed_max_node_ids[ntype][partid] return F.arange(start, end) def partid2eids(self, partid, etype=DEFAULT_ETYPE): """From partition ID to global edge IDs""" # TODO do we need to cache it? if etype in (DEFAULT_ETYPE, DEFAULT_ETYPE[1]): start = self._max_edge_ids[partid - 1] if partid > 0 else 0 end = self._max_edge_ids[partid] return F.arange(start, end) else: c_etype = self.to_canonical_etype(etype) start = ( self._typed_max_edge_ids[c_etype][partid - 1] if partid > 0 else 0 ) end = self._typed_max_edge_ids[c_etype][partid] return F.arange(start, end) def nid2localnid(self, nids, partid, ntype=DEFAULT_NTYPE): """Get local node IDs within the given partition.""" if partid != self._partid: raise RuntimeError( "Now RangePartitionBook does not support \ getting remote tensor of nid2localnid." ) nids = utils.toindex(nids) nids = nids.tousertensor() if ntype == DEFAULT_NTYPE: start = self._max_node_ids[partid - 1] if partid > 0 else 0 else: start = ( self._typed_max_node_ids[ntype][partid - 1] if partid > 0 else 0 ) return nids - int(start) def eid2localeid(self, eids, partid, etype=DEFAULT_ETYPE): """Get the local edge IDs within the given partition.""" if partid != self._partid: raise RuntimeError( "Now RangePartitionBook does not support \ getting remote tensor of eid2localeid." ) eids = utils.toindex(eids) eids = eids.tousertensor() if etype in (DEFAULT_ETYPE, DEFAULT_ETYPE[1]): start = self._max_edge_ids[partid - 1] if partid > 0 else 0 else: c_etype = self.to_canonical_etype(etype) start = ( self._typed_max_edge_ids[c_etype][partid - 1] if partid > 0 else 0 ) return eids - int(start) @property def partid(self): """Get the current partition ID.""" return self._partid @property def ntypes(self): """Get the list of node types""" return self._ntypes @property def etypes(self): """Get the list of edge types""" return self._etypes @property def canonical_etypes(self): """Get the list of canonical edge types Returns ------- list[(str, str, str)] or list[None] A list of canonical etypes. If keys of ``edge_map`` and ``etypes`` are strings, a list of ``None`` is returned as canonical etypes are not available. """ return self._canonical_etypes @property def local_ntype_offset(self): """Get the node type offset array of the local partition. The i-th element indicates the starting position of the i-th node type. """ return self._local_ntype_offset @property def local_etype_offset(self): """Get the edge type offset array of the local partition. The i-th element indicates the starting position of the i-th edge type. """ return self._local_etype_offset def to_canonical_etype(self, etype): """Convert an edge type to the corresponding canonical edge type. Parameters ---------- etype : str or (str, str, str) The edge type Returns ------- (str, str, str) The corresponding canonical edge type """ if isinstance(etype, tuple): if etype not in self.canonical_etypes: raise DGLError('Edge type "{}" does not exist.'.format(etype)) return etype ret = self._etype2canonical.get(etype, None) if ret is None: raise DGLError('Edge type "{}" does not exist.'.format(etype)) if len(ret) == 0: raise DGLError( 'Edge type "%s" is ambiguous. Please use canonical edge type ' "in the form of (srctype, etype, dsttype)" % etype ) return ret @property def global_nid_dtype(self): """Get the node ID's dtype""" return self._nid_map.torch_dtype @property def global_eid_dtype(self): """Get the edge ID's dtype""" return self._eid_map.torch_dtype NODE_PART_POLICY = "node" EDGE_PART_POLICY = "edge" POLICY_DELIMITER = "~" class PartitionPolicy(object): """This defines a partition policy for a distributed tensor or distributed embedding. When DGL shards tensors and stores them in a cluster of machines, it requires partition policies that map rows of the tensors to machines in the cluster. Although an arbitrary partition policy can be defined, DGL currently supports two partition policies for mapping nodes and edges to machines. To define a partition policy from a graph partition book, users need to specify the policy name ('node' or 'edge'). Parameters ---------- policy_str : str Partition policy name, e.g., 'edge~_N:_E:_N' or 'node~_N'. partition_book : GraphPartitionBook A graph partition book """ def __init__(self, policy_str, partition_book): assert policy_str.startswith(NODE_PART_POLICY) or policy_str.startswith( EDGE_PART_POLICY ), ( f"policy_str must start with {NODE_PART_POLICY} or " f"{EDGE_PART_POLICY}, but got {policy_str}." ) if NODE_PART_POLICY == policy_str: policy_str = NODE_PART_POLICY + POLICY_DELIMITER + DEFAULT_NTYPE if EDGE_PART_POLICY == policy_str: policy_str = EDGE_PART_POLICY + POLICY_DELIMITER + DEFAULT_ETYPE[1] self._policy_str = policy_str self._part_id = partition_book.partid self._partition_book = partition_book part_policy, self._type_name = policy_str.split(POLICY_DELIMITER, 1) if part_policy == EDGE_PART_POLICY: self._type_name = _etype_str_to_tuple(self._type_name) self._is_node = self.policy_str.startswith(NODE_PART_POLICY) @property def policy_str(self): """Get the policy name Returns ------- str The name of the partition policy. """ return self._policy_str @property def type_name(self): """Get the type name: ntype or etype Returns ------- str or (str, str, str) The ntype or etype. """ return self._type_name @property def part_id(self): """Get partition ID Returns ------- int The partition ID """ return self._part_id @property def partition_book(self): """Get partition book Returns ------- GraphPartitionBook The graph partition book """ return self._partition_book @property def is_node(self): """Indicate whether the policy is for node or edge Returns ------- bool node or edge """ return self._is_node def get_data_name(self, name): """Get HeteroDataName""" return HeteroDataName(self.is_node, self.type_name, name) def to_local(self, id_tensor): """Mapping global ID to local ID. Parameters ---------- id_tensor : tensor Gloabl ID tensor Return ------ tensor local ID tensor """ if self.is_node: return self._partition_book.nid2localnid( id_tensor, self._part_id, self.type_name ) else: return self._partition_book.eid2localeid( id_tensor, self._part_id, self.type_name ) def to_partid(self, id_tensor): """Mapping global ID to partition ID. Parameters ---------- id_tensor : tensor Global ID tensor Return ------ tensor partition ID """ if self.is_node: return self._partition_book.nid2partid(id_tensor, self.type_name) else: return self._partition_book.eid2partid(id_tensor, self.type_name) def get_part_size(self): """Get data size of current partition. Returns ------- int data size """ if self.is_node: return len( self._partition_book.partid2nids(self._part_id, self.type_name) ) else: return len( self._partition_book.partid2eids(self._part_id, self.type_name) ) def get_size(self): """Get the full size of the data. Returns ------- int data size """ if self.is_node: return self._partition_book._num_nodes(self.type_name) else: return self._partition_book._num_edges(self.type_name) class NodePartitionPolicy(PartitionPolicy): """Partition policy for nodes.""" def __init__(self, partition_book, ntype=DEFAULT_NTYPE): super(NodePartitionPolicy, self).__init__( NODE_PART_POLICY + POLICY_DELIMITER + ntype, partition_book ) class EdgePartitionPolicy(PartitionPolicy): """Partition policy for edges.""" def __init__(self, partition_book, etype=DEFAULT_ETYPE): assert ( isinstance(etype, tuple) and len(etype) == 3 ), f"Expect canonical edge type in a triplet of string, but got {etype}." super(EdgePartitionPolicy, self).__init__( EDGE_PART_POLICY + POLICY_DELIMITER + _etype_tuple_to_str(etype), partition_book, ) class HeteroDataName(object): """The data name in a heterogeneous graph. A unique data name has three components: * indicate it's node data or edge data. * indicate the node/edge type. * the name of the data. Parameters ---------- is_node : bool Indicate whether it's node data or edge data. entity_type : str or (str, str, str) The type of the node/edge. data_name : str The name of the data. """ def __init__(self, is_node, entity_type, data_name): self._policy = NODE_PART_POLICY if is_node else EDGE_PART_POLICY if not is_node: assert isinstance(entity_type, tuple) and len(entity_type) == 3, ( "Expect canonical edge type in a triplet of string, but got " f"{entity_type}." ) self._entity_type = entity_type self.data_name = data_name @property def policy_str(self): """concatenate policy and entity type into string""" entity_type = self.get_type() if self.is_edge(): entity_type = _etype_tuple_to_str(entity_type) return self._policy + POLICY_DELIMITER + entity_type def is_node(self): """Is this the name of node data""" return self._policy == NODE_PART_POLICY def is_edge(self): """Is this the name of edge data""" return self._policy == EDGE_PART_POLICY def get_type(self): """The type of the node/edge. This is only meaningful in a heterogeneous graph. In homogeneous graph, type is '_N' for a node and '_N:_E:_N' for an edge. """ return self._entity_type def get_name(self): """The name of the data.""" return self.data_name def __str__(self): """The full name of the data. The full name is used as the key in the KVStore. """ return self.policy_str + POLICY_DELIMITER + self.data_name def parse_hetero_data_name(name): """Parse data name and create HeteroDataName. The data name has a specialized format. We can parse the name to determine if it's node data or edge data, node/edge type and its actual name. The data name has three fields and they are separated by ":". Parameters ---------- name : str The data name Returns ------- HeteroDataName """ names = name.split(POLICY_DELIMITER) assert len(names) == 3, "{} is not a valid heterograph data name".format( name ) assert names[0] in ( NODE_PART_POLICY, EDGE_PART_POLICY, ), "{} is not a valid heterograph data name".format(name) is_node = names[0] == NODE_PART_POLICY entity_type = names[1] if not is_node: entity_type = _etype_str_to_tuple(entity_type) return HeteroDataName(is_node, entity_type, names[2]) ================================================ FILE: python/dgl/distributed/graph_services.py ================================================ """A set of graph services of getting subgraphs from DistGraph""" import os from collections import namedtuple import numpy as np import torch from .. import backend as F, graphbolt as gb from ..base import EID, ETYPE, NID from ..convert import graph, heterograph from ..sampling import ( sample_etype_neighbors as local_sample_etype_neighbors, sample_neighbors as local_sample_neighbors, ) from ..subgraph import in_subgraph as local_in_subgraph from ..utils import toindex from .constants import DGL2GB_EID, GB_DST_ID from .rpc import ( recv_responses, register_service, Request, Response, send_requests_to_machine, ) __all__ = [ "sample_neighbors", "sample_etype_neighbors", "in_subgraph", "find_edges", ] SAMPLING_SERVICE_ID = 6657 INSUBGRAPH_SERVICE_ID = 6658 EDGES_SERVICE_ID = 6659 OUTDEGREE_SERVICE_ID = 6660 INDEGREE_SERVICE_ID = 6661 ETYPE_SAMPLING_SERVICE_ID = 6662 class SubgraphResponse(Response): """The response for sampling and in_subgraph""" def __init__( self, global_src, global_dst, *, global_eids=None, etype_ids=None ): self.global_src = global_src self.global_dst = global_dst self.global_eids = global_eids self.etype_ids = etype_ids def __setstate__(self, state): ( self.global_src, self.global_dst, self.global_eids, self.etype_ids, ) = state def __getstate__(self): return ( self.global_src, self.global_dst, self.global_eids, self.etype_ids, ) class FindEdgeResponse(Response): """The response for sampling and in_subgraph""" def __init__(self, global_src, global_dst, order_id): self.global_src = global_src self.global_dst = global_dst self.order_id = order_id def __setstate__(self, state): self.global_src, self.global_dst, self.order_id = state def __getstate__(self): return self.global_src, self.global_dst, self.order_id def _sample_neighbors_graphbolt( g, gpb, nodes, fanout, edge_dir="in", prob=None, exclude_edges=None, replace=False, ): """Sample from local partition via graphbolt. The input nodes use global IDs. We need to map the global node IDs to local node IDs, perform sampling and map the sampled results to the global IDs space again. The sampled results are stored in three vectors that store source nodes, destination nodes, etype IDs and edge IDs. Parameters ---------- g : FusedCSCSamplingGraph The local partition. gpb : GraphPartitionBook The graph partition book. nodes : tensor The nodes to sample neighbors from. fanout : tensor or int The number of edges to be sampled for each node. edge_dir : str, optional Determines whether to sample inbound or outbound edges. prob : tensor, optional The probability associated with each neighboring edge of a node. exclude_edges : tensor, optional The edges to exclude when sampling. replace : bool, optional If True, sample with replacement. Returns ------- tensor The source node ID array. tensor The destination node ID array. tensor The edge ID array. tensor The edge type ID array. """ assert ( edge_dir == "in" ), f"GraphBolt only supports inbound edge sampling but got {edge_dir}." assert exclude_edges is None, "GraphBolt does not support excluding edges." # 1. Map global node IDs to local node IDs. nodes = gpb.nid2localnid(nodes, gpb.partid) # Local partition may be saved in torch.int32 even though the global graph # is in torch.int64. nodes = nodes.to(dtype=g.indices.dtype) # 2. Perform sampling. probs_or_mask = None if prob is not None: probs_or_mask = g.edge_attributes[prob] # Sanity checks. assert isinstance( g, gb.FusedCSCSamplingGraph ), "Expect a FusedCSCSamplingGraph." assert isinstance(nodes, torch.Tensor), "Expect a tensor of nodes." if isinstance(fanout, int): fanout = torch.LongTensor([fanout]) assert isinstance(fanout, torch.Tensor), "Expect a tensor of fanout." subgraph = g._sample_neighbors( nodes, None, fanout, replace=replace, probs_or_mask=probs_or_mask, ) # 3. Map local node IDs to global node IDs. local_src = subgraph.indices local_dst = gb.expand_indptr( subgraph.indptr, dtype=local_src.dtype, node_ids=subgraph.original_column_node_ids, output_size=local_src.shape[0], ) global_nid_mapping = g.node_attributes[NID] global_src = global_nid_mapping[local_src] global_dst = global_nid_mapping[local_dst] global_eids = None if g.edge_attributes is not None and EID in g.edge_attributes: global_eids = g.edge_attributes[EID][subgraph.original_edge_ids] return LocalSampledGraph( global_src, global_dst, global_eids, subgraph.type_per_edge ) def _sample_neighbors_dgl( local_g, partition_book, seed_nodes, fan_out, edge_dir="in", prob=None, exclude_edges=None, replace=False, ): """Sample from local partition. The input nodes use global IDs. We need to map the global node IDs to local node IDs, perform sampling and map the sampled results to the global IDs space again. The sampled results are stored in three vectors that store source nodes, destination nodes and edge IDs. """ local_ids = partition_book.nid2localnid(seed_nodes, partition_book.partid) local_ids = F.astype(local_ids, local_g.idtype) # local_ids = self.seed_nodes sampled_graph = local_sample_neighbors( local_g, local_ids, fan_out, edge_dir=edge_dir, prob=prob, exclude_edges=exclude_edges, replace=replace, _dist_training=True, ) global_nid_mapping = local_g.ndata[NID] src, dst = sampled_graph.edges() global_src, global_dst = F.gather_row( global_nid_mapping, src ), F.gather_row(global_nid_mapping, dst) global_eids = F.gather_row(local_g.edata[EID], sampled_graph.edata[EID]) return LocalSampledGraph(global_src, global_dst, global_eids) def _sample_neighbors(use_graphbolt, *args, **kwargs): """Wrapper for sampling neighbors. The actual sampling function depends on whether to use GraphBolt. Parameters ---------- use_graphbolt : bool Whether to use GraphBolt for sampling. args : list The arguments for the sampling function. kwargs : dict The keyword arguments for the sampling function. Returns ------- tensor The source node ID array. tensor The destination node ID array. tensor The edge ID array. tensor The edge type ID array. """ func = ( _sample_neighbors_graphbolt if use_graphbolt else _sample_neighbors_dgl ) return func(*args, **kwargs) def _sample_etype_neighbors_dgl( local_g, partition_book, seed_nodes, fan_out, edge_dir="in", prob=None, exclude_edges=None, replace=False, etype_offset=None, etype_sorted=False, ): """Sample from local partition. The input nodes use global IDs. We need to map the global node IDs to local node IDs, perform sampling and map the sampled results to the global IDs space again. The sampled results are stored in three vectors that store source nodes, destination nodes and edge IDs. """ assert etype_offset is not None, "The etype offset is not provided." local_ids = partition_book.nid2localnid(seed_nodes, partition_book.partid) local_ids = F.astype(local_ids, local_g.idtype) sampled_graph = local_sample_etype_neighbors( local_g, local_ids, etype_offset, fan_out, edge_dir=edge_dir, prob=prob, exclude_edges=exclude_edges, replace=replace, etype_sorted=etype_sorted, _dist_training=True, ) global_nid_mapping = local_g.ndata[NID] src, dst = sampled_graph.edges() global_src, global_dst = F.gather_row( global_nid_mapping, src ), F.gather_row(global_nid_mapping, dst) global_eids = F.gather_row(local_g.edata[EID], sampled_graph.edata[EID]) return LocalSampledGraph(global_src, global_dst, global_eids) def _sample_etype_neighbors(use_graphbolt, *args, **kwargs): """Wrapper for sampling etype neighbors. The actual sampling function depends on whether to use GraphBolt. Parameters ---------- use_graphbolt : bool Whether to use GraphBolt for sampling. args : list The arguments for the sampling function. kwargs : dict The keyword arguments for the sampling function. Returns ------- tensor The source node ID array. tensor The destination node ID array. tensor The edge ID array. tensor The edge type ID array. """ func = ( _sample_neighbors_graphbolt if use_graphbolt else _sample_etype_neighbors_dgl ) if use_graphbolt: # GraphBolt does not require `etype_offset` and `etype_sorted`. kwargs.pop("etype_offset", None) kwargs.pop("etype_sorted", None) return func(*args, **kwargs) def _find_edges(local_g, partition_book, seed_edges): """Given an edge ID array, return the source and destination node ID array ``s`` and ``d`` in the local partition. """ local_eids = partition_book.eid2localeid(seed_edges, partition_book.partid) if isinstance(local_g, gb.FusedCSCSamplingGraph): # When converting from DGLGraph to FusedCSCSamplingGraph, the edge IDs # are re-ordered. In order to find the correct node pairs, we need to # map the DGL edge IDs back to GraphBolt edge IDs. if ( DGL2GB_EID not in local_g.edge_attributes or GB_DST_ID not in local_g.edge_attributes ): raise ValueError( "The edge attributes DGL2GB_EID and GB_DST_ID are not found. " "Please make sure `coo` format is available when generating " "partitions in GraphBolt format." ) local_eids = local_g.edge_attributes[DGL2GB_EID][local_eids] local_src = local_g.indices[local_eids] local_dst = local_g.edge_attributes[GB_DST_ID][local_eids] global_nid_mapping = local_g.node_attributes[NID] else: local_eids = F.astype(local_eids, local_g.idtype) local_src, local_dst = local_g.find_edges(local_eids) global_nid_mapping = local_g.ndata[NID] global_src = global_nid_mapping[local_src] global_dst = global_nid_mapping[local_dst] return global_src, global_dst def _in_degrees(local_g, partition_book, n): """Get in-degree of the nodes in the local partition.""" local_nids = partition_book.nid2localnid(n, partition_book.partid) local_nids = F.astype(local_nids, local_g.idtype) return local_g.in_degrees(local_nids) def _out_degrees(local_g, partition_book, n): """Get out-degree of the nodes in the local partition.""" local_nids = partition_book.nid2localnid(n, partition_book.partid) local_nids = F.astype(local_nids, local_g.idtype) return local_g.out_degrees(local_nids) def _in_subgraph(local_g, partition_book, seed_nodes): """Get in subgraph from local partition. The input nodes use global IDs. We need to map the global node IDs to local node IDs, get in-subgraph and map the sampled results to the global IDs space again. The results are stored in three vectors that store source nodes, destination nodes and edge IDs. """ local_ids = partition_book.nid2localnid(seed_nodes, partition_book.partid) local_ids = F.astype(local_ids, local_g.idtype) # local_ids = self.seed_nodes sampled_graph = local_in_subgraph(local_g, local_ids) global_nid_mapping = local_g.ndata[NID] src, dst = sampled_graph.edges() global_src, global_dst = global_nid_mapping[src], global_nid_mapping[dst] global_eids = F.gather_row(local_g.edata[EID], sampled_graph.edata[EID]) return LocalSampledGraph(global_src, global_dst, global_eids) # --- NOTE 1 --- # (BarclayII) # If the sampling algorithm needs node and edge data, ideally the # algorithm should query the underlying feature storage to get what it # just needs to complete the job. For instance, with # sample_etype_neighbors, we only need the probability of the seed nodes' # neighbors. # # However, right now we are reusing the existing subgraph sampling # interfaces of DGLGraph (i.e. single machine solution), which needs # the data of *all* the nodes/edges. Going distributed, we now need # the node/edge data of the *entire* local graph partition. # # If the sampling algorithm only use edge data, the current design works # because the local graph partition contains all the in-edges of the # assigned nodes as well as the data. This is the case for # sample_etype_neighbors. # # However, if the sampling algorithm requires data of the neighbor nodes # (e.g. sample_neighbors_biased which performs biased sampling based on the # type of the neighbor nodes), the current design will fail because the # neighbor nodes (hence the data) may not belong to the current partition. # This is a limitation of the current DistDGL design. We should improve it # later. class SamplingRequest(Request): """Sampling Request""" def __init__( self, nodes, fan_out, edge_dir="in", prob=None, exclude_edges=None, replace=False, use_graphbolt=False, ): self.seed_nodes = nodes self.edge_dir = edge_dir self.prob = prob self.exclude_edges = exclude_edges self.replace = replace self.fan_out = fan_out self.use_graphbolt = use_graphbolt def __setstate__(self, state): ( self.seed_nodes, self.edge_dir, self.prob, self.exclude_edges, self.replace, self.fan_out, self.use_graphbolt, ) = state def __getstate__(self): return ( self.seed_nodes, self.edge_dir, self.prob, self.exclude_edges, self.replace, self.fan_out, self.use_graphbolt, ) def process_request(self, server_state): local_g = server_state.graph partition_book = server_state.partition_book kv_store = server_state.kv_store if self.prob is not None and (not self.use_graphbolt): prob = [kv_store.data_store[self.prob]] else: prob = self.prob res = _sample_neighbors( self.use_graphbolt, local_g, partition_book, self.seed_nodes, self.fan_out, edge_dir=self.edge_dir, prob=prob, exclude_edges=self.exclude_edges, replace=self.replace, ) return SubgraphResponse( res.global_src, res.global_dst, global_eids=res.global_eids, etype_ids=res.etype_ids, ) class SamplingRequestEtype(Request): """Sampling Request""" def __init__( self, nodes, fan_out, edge_dir="in", prob=None, exclude_edges=None, replace=False, etype_sorted=True, use_graphbolt=False, ): self.seed_nodes = nodes self.edge_dir = edge_dir self.prob = prob self.exclude_edges = exclude_edges self.replace = replace self.fan_out = fan_out self.etype_sorted = etype_sorted self.use_graphbolt = use_graphbolt def __setstate__(self, state): ( self.seed_nodes, self.edge_dir, self.prob, self.exclude_edges, self.replace, self.fan_out, self.etype_sorted, self.use_graphbolt, ) = state def __getstate__(self): return ( self.seed_nodes, self.edge_dir, self.prob, self.exclude_edges, self.replace, self.fan_out, self.etype_sorted, self.use_graphbolt, ) def process_request(self, server_state): local_g = server_state.graph partition_book = server_state.partition_book kv_store = server_state.kv_store etype_offset = partition_book.local_etype_offset # See NOTE 1 if self.prob is not None and (not self.use_graphbolt): probs = [ kv_store.data_store[key] if key != "" else None for key in self.prob ] else: probs = self.prob res = _sample_etype_neighbors( self.use_graphbolt, local_g, partition_book, self.seed_nodes, self.fan_out, edge_dir=self.edge_dir, prob=probs, exclude_edges=self.exclude_edges, replace=self.replace, etype_offset=etype_offset, etype_sorted=self.etype_sorted, ) return SubgraphResponse( res.global_src, res.global_dst, global_eids=res.global_eids, etype_ids=res.etype_ids, ) class EdgesRequest(Request): """Edges Request""" def __init__(self, edge_ids, order_id): self.edge_ids = edge_ids self.order_id = order_id def __setstate__(self, state): self.edge_ids, self.order_id = state def __getstate__(self): return self.edge_ids, self.order_id def process_request(self, server_state): local_g = server_state.graph partition_book = server_state.partition_book global_src, global_dst = _find_edges( local_g, partition_book, self.edge_ids ) return FindEdgeResponse(global_src, global_dst, self.order_id) class InDegreeRequest(Request): """In-degree Request""" def __init__(self, n, order_id): self.n = n self.order_id = order_id def __setstate__(self, state): self.n, self.order_id = state def __getstate__(self): return self.n, self.order_id def process_request(self, server_state): local_g = server_state.graph partition_book = server_state.partition_book deg = _in_degrees(local_g, partition_book, self.n) return InDegreeResponse(deg, self.order_id) class InDegreeResponse(Response): """The response for in-degree""" def __init__(self, deg, order_id): self.val = deg self.order_id = order_id def __setstate__(self, state): self.val, self.order_id = state def __getstate__(self): return self.val, self.order_id class OutDegreeRequest(Request): """Out-degree Request""" def __init__(self, n, order_id): self.n = n self.order_id = order_id def __setstate__(self, state): self.n, self.order_id = state def __getstate__(self): return self.n, self.order_id def process_request(self, server_state): local_g = server_state.graph partition_book = server_state.partition_book deg = _out_degrees(local_g, partition_book, self.n) return OutDegreeResponse(deg, self.order_id) class OutDegreeResponse(Response): """The response for out-degree""" def __init__(self, deg, order_id): self.val = deg self.order_id = order_id def __setstate__(self, state): self.val, self.order_id = state def __getstate__(self): return self.val, self.order_id class InSubgraphRequest(Request): """InSubgraph Request""" def __init__(self, nodes): self.seed_nodes = nodes def __setstate__(self, state): self.seed_nodes = state def __getstate__(self): return self.seed_nodes def process_request(self, server_state): local_g = server_state.graph partition_book = server_state.partition_book global_src, global_dst, global_eids = _in_subgraph( local_g, partition_book, self.seed_nodes ) return SubgraphResponse(global_src, global_dst, global_eids=global_eids) def merge_graphs(res_list, num_nodes, exclude_edges=None): """Merge request from multiple servers""" if len(res_list) > 1: srcs = [] dsts = [] eids = [] etype_ids = [] for res in res_list: srcs.append(res.global_src) dsts.append(res.global_dst) eids.append(res.global_eids) etype_ids.append(res.etype_ids) src_tensor = F.cat(srcs, 0) dst_tensor = F.cat(dsts, 0) eid_tensor = None if eids[0] is None else F.cat(eids, 0) etype_id_tensor = None if etype_ids[0] is None else F.cat(etype_ids, 0) else: src_tensor = res_list[0].global_src dst_tensor = res_list[0].global_dst eid_tensor = res_list[0].global_eids etype_id_tensor = res_list[0].etype_ids if exclude_edges is not None: mask = torch.isin( eid_tensor, exclude_edges, assume_unique=True, invert=True ) src_tensor = src_tensor[mask] dst_tensor = dst_tensor[mask] eid_tensor = eid_tensor[mask] if etype_id_tensor is not None: etype_id_tensor = etype_id_tensor[mask] g = graph((src_tensor, dst_tensor), num_nodes=num_nodes) if eid_tensor is not None: g.edata[EID] = eid_tensor if etype_id_tensor is not None: g.edata[ETYPE] = etype_id_tensor return g LocalSampledGraph = namedtuple( # pylint: disable=unexpected-keyword-arg "LocalSampledGraph", "global_src global_dst global_eids etype_ids", defaults=(None, None, None, None), ) def _distributed_access( g, nodes, issue_remote_req, local_access, exclude_edges=None ): """A routine that fetches local neighborhood of nodes from the distributed graph. The local neighborhood of some nodes are stored in the local machine and the other nodes have their neighborhood on remote machines. This code will issue remote access requests first before fetching data from the local machine. In the end, we combine the data from the local machine and remote machines. In this way, we can hide the latency of accessing data on remote machines. Parameters ---------- g : DistGraph The distributed graph nodes : tensor The nodes whose neighborhood are to be fetched. issue_remote_req : callable The function that issues requests to access remote data. local_access : callable The function that reads data on the local machine. exclude_edges : tensor The edges to exclude after sampling. Returns ------- DGLGraph The subgraph that contains the neighborhoods of all input nodes. """ req_list = [] partition_book = g.get_partition_book() if not isinstance(nodes, torch.Tensor): nodes = toindex(nodes).tousertensor() partition_id = partition_book.nid2partid(nodes) local_nids = None for pid in range(partition_book.num_partitions()): node_id = F.boolean_mask(nodes, partition_id == pid) # We optimize the sampling on a local partition if the server and the client # run on the same machine. With a good partitioning, most of the seed nodes # should reside in the local partition. If the server and the client # are not co-located, the client doesn't have a local partition. if pid == partition_book.partid and g.local_partition is not None: assert local_nids is None local_nids = node_id elif len(node_id) != 0: req = issue_remote_req(node_id) req_list.append((pid, req)) # send requests to the remote machine. msgseq2pos = None if len(req_list) > 0: msgseq2pos = send_requests_to_machine(req_list) # sample neighbors for the nodes in the local partition. res_list = [] if local_nids is not None: res = local_access(g.local_partition, partition_book, local_nids) res_list.append(res) # receive responses from remote machines. if msgseq2pos is not None: results = recv_responses(msgseq2pos) res_list.extend(results) sampled_graph = merge_graphs( res_list, g.num_nodes(), exclude_edges=exclude_edges ) return sampled_graph def _frontier_to_heterogeneous_graph(g, frontier, gpb): # We need to handle empty frontiers correctly. if frontier.num_edges() == 0: data_dict = { etype: (np.zeros(0), np.zeros(0)) for etype in g.canonical_etypes } return heterograph( data_dict, {ntype: g.num_nodes(ntype) for ntype in g.ntypes}, idtype=g.idtype, ) # For DGL partitions, the global edge IDs are always stored in the edata. # For GraphBolt partitions, the edge type IDs are always stored in the # edata. As for the edge IDs, they are stored in the edata if the graph is # partitioned with `store_eids=True`. Otherwise, the edge IDs are not # stored. etype_ids, type_wise_eids = ( gpb.map_to_per_etype(frontier.edata[EID]) if EID in frontier.edata else (frontier.edata[ETYPE], None) ) etype_ids, idx = F.sort_1d(etype_ids) if type_wise_eids is not None: type_wise_eids = F.gather_row(type_wise_eids, idx) # Sort the edges by their edge types. src, dst = frontier.edges() src, dst = F.gather_row(src, idx), F.gather_row(dst, idx) src_ntype_ids, src = gpb.map_to_per_ntype(src) dst_ntype_ids, dst = gpb.map_to_per_ntype(dst) data_dict = dict() edge_ids = {} for etid, etype in enumerate(g.canonical_etypes): src_ntype, _, dst_ntype = etype src_ntype_id = g.get_ntype_id(src_ntype) dst_ntype_id = g.get_ntype_id(dst_ntype) type_idx = etype_ids == etid data_dict[etype] = ( F.boolean_mask(src, type_idx), F.boolean_mask(dst, type_idx), ) if "DGL_DIST_DEBUG" in os.environ: assert torch.all( src_ntype_id == src_ntype_ids[type_idx] ), "source ntype is is not expected." assert torch.all( dst_ntype_id == dst_ntype_ids[type_idx] ), "destination ntype is is not expected." if type_wise_eids is not None: edge_ids[etype] = F.boolean_mask(type_wise_eids, type_idx) hg = heterograph( data_dict, {ntype: g.num_nodes(ntype) for ntype in g.ntypes}, idtype=g.idtype, ) for etype in edge_ids: hg.edges[etype].data[EID] = edge_ids[etype] return hg def sample_etype_neighbors( g, nodes, fanout, edge_dir="in", prob=None, exclude_edges=None, replace=False, etype_sorted=True, use_graphbolt=False, ): """Sample from the neighbors of the given nodes from a distributed graph. For each node, a number of inbound (or outbound when ``edge_dir == 'out'``) edges will be randomly chosen. The returned graph will contain all the nodes in the original graph, but only the sampled edges. Node/edge features are not preserved. The original IDs of the sampled edges are stored as the `dgl.EID` feature in the returned graph. This function assumes the input is a homogeneous ``DGLGraph`` with the edges ordered by their edge types. The sampled subgraph is also stored in the homogeneous graph format. That is, all nodes and edges are assigned with unique IDs (in contrast, we typically use a type name and a node/edge ID to identify a node or an edge in ``DGLGraph``). We refer to this type of IDs as *homogeneous ID*. Users can use :func:`dgl.distributed.GraphPartitionBook.map_to_per_ntype` and :func:`dgl.distributed.GraphPartitionBook.map_to_per_etype` to identify their node/edge types and node/edge IDs of that type. Parameters ---------- g : DistGraph The distributed graph.. nodes : tensor or dict Node IDs to sample neighbors from. If it's a dict, it should contain only one key-value pair to make this API consistent with dgl.sampling.sample_neighbors. fanout : int or dict[etype, int] The number of edges to be sampled for each node per edge type. If an integer is given, DGL assumes that the same fanout is applied to every edge type. If -1 is given, all of the neighbors will be selected. edge_dir : str, optional Determines whether to sample inbound or outbound edges. Can take either ``in`` for inbound edges or ``out`` for outbound edges. prob : str, optional Feature name used as the (unnormalized) probabilities associated with each neighboring edge of a node. The feature must have only one element for each edge. The features must be non-negative floats, and the sum of the features of inbound/outbound edges for every node must be positive (though they don't have to sum up to one). Otherwise, the result will be undefined. exclude_edges : tensor, optional The edges to exclude when sampling. Homogeneous edge IDs are used. replace : bool, optional If True, sample with replacement. When sampling with replacement, the sampled subgraph could have parallel edges. For sampling without replacement, if fanout > the number of neighbors, all the neighbors are sampled. If fanout == -1, all neighbors are collected. etype_sorted : bool, optional Indicates whether etypes are sorted. use_graphbolt : bool, optional Whether to use GraphBolt for sampling. Returns ------- DGLGraph A sampled subgraph containing only the sampled neighboring edges. It is on CPU. """ if isinstance(fanout, int): fanout = F.full_1d(len(g.canonical_etypes), fanout, F.int64, F.cpu()) else: etype_ids = {etype: i for i, etype in enumerate(g.canonical_etypes)} fanout_array = [None] * len(g.canonical_etypes) for etype, v in fanout.items(): c_etype = g.to_canonical_etype(etype) fanout_array[etype_ids[c_etype]] = v assert all(v is not None for v in fanout_array), ( "Not all etypes have valid fanout. Please make sure passed-in " "fanout in dict includes all the etypes in graph. Passed-in " f"fanout: {fanout}, graph etypes: {g.canonical_etypes}." ) fanout = F.tensor(fanout_array, dtype=F.int64) gpb = g.get_partition_book() if isinstance(nodes, dict): homo_nids = [] for ntype in nodes.keys(): assert ( ntype in g.ntypes ), "The sampled node type {} does not exist in the input graph".format( ntype ) if F.is_tensor(nodes[ntype]): typed_nodes = nodes[ntype] else: typed_nodes = toindex(nodes[ntype]).tousertensor() homo_nids.append(gpb.map_to_homo_nid(typed_nodes, ntype)) nodes = F.cat(homo_nids, 0) def issue_remote_req(node_ids): if prob is not None and (not use_graphbolt): # See NOTE 1 _prob = [ ( # NOTE (BarclayII) # Currently DistGraph.edges[] does not accept canonical etype. g.edges[etype].data[prob].kvstore_key if prob in g.edges[etype].data else "" ) for etype in g.canonical_etypes ] else: _prob = prob return SamplingRequestEtype( node_ids, fanout, edge_dir=edge_dir, prob=_prob, exclude_edges=None, replace=replace, etype_sorted=etype_sorted, use_graphbolt=use_graphbolt, ) def local_access(local_g, partition_book, local_nids): etype_offset = gpb.local_etype_offset # See NOTE 1 if prob is not None and (not use_graphbolt): _prob = [ ( g.edges[etype].data[prob].local_partition if prob in g.edges[etype].data else None ) for etype in g.canonical_etypes ] else: _prob = prob return _sample_etype_neighbors( use_graphbolt, local_g, partition_book, local_nids, fanout, edge_dir=edge_dir, prob=_prob, exclude_edges=None, replace=replace, etype_offset=etype_offset, etype_sorted=etype_sorted, ) frontier = _distributed_access( g, nodes, issue_remote_req, local_access, exclude_edges=exclude_edges ) if not gpb.is_homogeneous: return _frontier_to_heterogeneous_graph(g, frontier, gpb) else: return frontier def sample_neighbors( g, nodes, fanout, edge_dir="in", prob=None, exclude_edges=None, replace=False, use_graphbolt=False, ): """Sample from the neighbors of the given nodes from a distributed graph. For each node, a number of inbound (or outbound when ``edge_dir == 'out'``) edges will be randomly chosen. The returned graph will contain all the nodes in the original graph, but only the sampled edges. Node/edge features are not preserved. The original IDs of the sampled edges are stored as the `dgl.EID` feature in the returned graph. For heterogeneous graphs, ``nodes`` is a dictionary whose key is node type and the value is type-specific node IDs. Parameters ---------- g : DistGraph The distributed graph.. nodes : tensor or dict Node IDs to sample neighbors from. If it's a dict, it should contain only one key-value pair to make this API consistent with dgl.sampling.sample_neighbors. fanout : int The number of edges to be sampled for each node. If -1 is given, all of the neighbors will be selected. edge_dir : str, optional Determines whether to sample inbound or outbound edges. Can take either ``in`` for inbound edges or ``out`` for outbound edges. prob : str, optional Feature name used as the (unnormalized) probabilities associated with each neighboring edge of a node. The feature must have only one element for each edge. The features must be non-negative floats, and the sum of the features of inbound/outbound edges for every node must be positive (though they don't have to sum up to one). Otherwise, the result will be undefined. exclude_edges: tensor or dict, optional Edge IDs to exclude during sampling neighbors for the seed nodes. This argument can take a single ID tensor or a dictionary of edge types and ID tensors. If a single tensor is given, the graph must only have one type of nodes. replace : bool, optional If True, sample with replacement. When sampling with replacement, the sampled subgraph could have parallel edges. For sampling without replacement, if fanout > the number of neighbors, all the neighbors are sampled. If fanout == -1, all neighbors are collected. use_graphbolt : bool, optional Whether to use GraphBolt for sampling. Returns ------- DGLGraph A sampled subgraph containing only the sampled neighboring edges. It is on CPU. """ gpb = g.get_partition_book() if not gpb.is_homogeneous: assert isinstance(nodes, dict) homo_nids = [] for ntype in nodes: assert ( ntype in g.ntypes ), "The sampled node type does not exist in the input graph" if F.is_tensor(nodes[ntype]): typed_nodes = nodes[ntype] else: typed_nodes = toindex(nodes[ntype]).tousertensor() homo_nids.append(gpb.map_to_homo_nid(typed_nodes, ntype)) nodes = F.cat(homo_nids, 0) elif isinstance(nodes, dict): assert len(nodes) == 1 nodes = list(nodes.values())[0] def issue_remote_req(node_ids): if prob is not None and (not use_graphbolt): # See NOTE 1 _prob = g.edata[prob].kvstore_key else: _prob = prob return SamplingRequest( node_ids, fanout, edge_dir=edge_dir, prob=_prob, exclude_edges=None, replace=replace, use_graphbolt=use_graphbolt, ) def local_access(local_g, partition_book, local_nids): # See NOTE 1 _prob = ( [g.edata[prob].local_partition] if prob is not None and (not use_graphbolt) else prob ) return _sample_neighbors( use_graphbolt, local_g, partition_book, local_nids, fanout, edge_dir=edge_dir, prob=_prob, exclude_edges=None, replace=replace, ) frontier = _distributed_access( g, nodes, issue_remote_req, local_access, exclude_edges=exclude_edges ) if not gpb.is_homogeneous: return _frontier_to_heterogeneous_graph(g, frontier, gpb) else: return frontier def _distributed_edge_access(g, edges, issue_remote_req, local_access): """A routine that fetches local edges from distributed graph. The source and destination nodes of local edges are stored in the local machine and others are stored on remote machines. This code will issue remote access requests first before fetching data from the local machine. In the end, we combine the data from the local machine and remote machines. Parameters ---------- g : DistGraph The distributed graph edges : tensor The edges to find their source and destination nodes. issue_remote_req : callable The function that issues requests to access remote data. local_access : callable The function that reads data on the local machine. Returns ------- tensor The source node ID array. tensor The destination node ID array. """ req_list = [] partition_book = g.get_partition_book() edges = toindex(edges).tousertensor() partition_id = partition_book.eid2partid(edges) local_eids = None reorder_idx = [] for pid in range(partition_book.num_partitions()): mask = partition_id == pid edge_id = F.boolean_mask(edges, mask) reorder_idx.append(F.nonzero_1d(mask)) if pid == partition_book.partid and g.local_partition is not None: assert local_eids is None local_eids = edge_id elif len(edge_id) != 0: req = issue_remote_req(edge_id, pid) req_list.append((pid, req)) # send requests to the remote machine. msgseq2pos = None if len(req_list) > 0: msgseq2pos = send_requests_to_machine(req_list) # handle edges in local partition. src_ids = F.zeros_like(edges) dst_ids = F.zeros_like(edges) if local_eids is not None: src, dst = local_access(g.local_partition, partition_book, local_eids) src_ids = F.scatter_row( src_ids, reorder_idx[partition_book.partid], src ) dst_ids = F.scatter_row( dst_ids, reorder_idx[partition_book.partid], dst ) # receive responses from remote machines. if msgseq2pos is not None: results = recv_responses(msgseq2pos) for result in results: src = result.global_src dst = result.global_dst src_ids = F.scatter_row(src_ids, reorder_idx[result.order_id], src) dst_ids = F.scatter_row(dst_ids, reorder_idx[result.order_id], dst) return src_ids, dst_ids def find_edges(g, edge_ids): """Given an edge ID array, return the source and destination node ID array ``s`` and ``d`` from a distributed graph. ``s[i]`` and ``d[i]`` are source and destination node ID for edge ``eid[i]``. Parameters ---------- g : DistGraph The distributed graph. edges : tensor The edge ID array. Returns ------- tensor The source node ID array. tensor The destination node ID array. """ def issue_remote_req(edge_ids, order_id): return EdgesRequest(edge_ids, order_id) def local_access(local_g, partition_book, edge_ids): return _find_edges(local_g, partition_book, edge_ids) return _distributed_edge_access(g, edge_ids, issue_remote_req, local_access) def in_subgraph(g, nodes): """Return the subgraph induced on the inbound edges of the given nodes. The subgraph keeps the same type schema and all the nodes are preserved regardless of whether they have an edge or not. Node/edge features are not preserved. The original IDs of the extracted edges are stored as the `dgl.EID` feature in the returned graph. For now, we only support the input graph with one node type and one edge type. Parameters ---------- g : DistGraph The distributed graph structure. nodes : tensor or dict Node ids to sample neighbors from. Returns ------- DGLGraph The subgraph. One can retrieve the mapping from subgraph edge ID to parent edge ID via ``dgl.EID`` edge features of the subgraph. """ if isinstance(nodes, dict): assert ( len(nodes) == 1 ), "The distributed in_subgraph only supports one node type for now." nodes = list(nodes.values())[0] def issue_remote_req(node_ids): return InSubgraphRequest(node_ids) def local_access(local_g, partition_book, local_nids): return _in_subgraph(local_g, partition_book, local_nids) return _distributed_access(g, nodes, issue_remote_req, local_access) def _distributed_get_node_property(g, n, issue_remote_req, local_access): req_list = [] partition_book = g.get_partition_book() n = toindex(n).tousertensor() partition_id = partition_book.nid2partid(n) local_nids = None reorder_idx = [] for pid in range(partition_book.num_partitions()): mask = partition_id == pid nid = F.boolean_mask(n, mask) reorder_idx.append(F.nonzero_1d(mask)) if pid == partition_book.partid and g.local_partition is not None: assert local_nids is None local_nids = nid elif len(nid) != 0: req = issue_remote_req(nid, pid) req_list.append((pid, req)) # send requests to the remote machine. msgseq2pos = None if len(req_list) > 0: msgseq2pos = send_requests_to_machine(req_list) # handle edges in local partition. vals = None if local_nids is not None: local_vals = local_access(g.local_partition, partition_book, local_nids) shape = list(F.shape(local_vals)) shape[0] = len(n) vals = F.zeros(shape, F.dtype(local_vals), F.cpu()) vals = F.scatter_row( vals, reorder_idx[partition_book.partid], local_vals ) # receive responses from remote machines. if msgseq2pos is not None: results = recv_responses(msgseq2pos) if len(results) > 0 and vals is None: shape = list(F.shape(results[0].val)) shape[0] = len(n) vals = F.zeros(shape, F.dtype(results[0].val), F.cpu()) for result in results: val = result.val vals = F.scatter_row(vals, reorder_idx[result.order_id], val) return vals def in_degrees(g, v): """Get in-degrees""" def issue_remote_req(v, order_id): return InDegreeRequest(v, order_id) def local_access(local_g, partition_book, v): return _in_degrees(local_g, partition_book, v) return _distributed_get_node_property(g, v, issue_remote_req, local_access) def out_degrees(g, u): """Get out-degrees""" def issue_remote_req(u, order_id): return OutDegreeRequest(u, order_id) def local_access(local_g, partition_book, u): return _out_degrees(local_g, partition_book, u) return _distributed_get_node_property(g, u, issue_remote_req, local_access) register_service(SAMPLING_SERVICE_ID, SamplingRequest, SubgraphResponse) register_service(EDGES_SERVICE_ID, EdgesRequest, FindEdgeResponse) register_service(INSUBGRAPH_SERVICE_ID, InSubgraphRequest, SubgraphResponse) register_service(OUTDEGREE_SERVICE_ID, OutDegreeRequest, OutDegreeResponse) register_service(INDEGREE_SERVICE_ID, InDegreeRequest, InDegreeResponse) register_service( ETYPE_SAMPLING_SERVICE_ID, SamplingRequestEtype, SubgraphResponse ) ================================================ FILE: python/dgl/distributed/id_map.py ================================================ """Module for mapping between node/edge IDs and node/edge types.""" import numpy as np import torch from .. import backend as F, utils from .._ffi.function import _init_api __all__ = ["IdMap"] class IdMap: """A map for converting node/edge IDs to their type IDs and type-wise IDs. For a heterogeneous graph, DGL assigns an integer ID to each node/edge type; node and edge of different types have independent IDs starting from zero. Therefore, a node/edge can be uniquely identified by an ID pair, ``(type_id, type_wise_id)``. To make it convenient for distributed processing, DGL further encodes the ID pair into one integer ID, which we refer to as *homogeneous ID*. DGL arranges nodes and edges so that all nodes of the same type have contiguous homogeneous IDs. If the graph is partitioned, the nodes/edges of the same type within a partition have contiguous homogeneous IDs. Below is an example adjancency matrix of an unpartitioned heterogeneous graph stored using the above ID assignment. Here, the graph has two types of nodes (``T0`` and ``T1``), and four types of edges (``R0``, ``R1``, ``R2``, ``R3``). There are a total of 400 nodes in the graph and each type has 200 nodes. Nodes of type 0 have IDs in [0,200), while nodes of type 1 have IDs in [200, 400). ``` 0 <- T0 -> 200 <- T1 -> 400 0 +-----------+------------+ | | | ^ | R0 | R1 | T0 | | | v | | | 200 +-----------+------------+ | | | ^ | R2 | R3 | T1 | | | v | | | 400 +-----------+------------+ ``` Below shows the adjacency matrix after the graph is partitioned into two. Note that each partition still has two node types and four edge types, and nodes/edges of the same type have contiguous IDs. ``` partition 0 partition 1 0 <- T0 -> 100 <- T1 -> 200 <- T0 -> 300 <- T1 -> 400 0 +-----------+------------+-----------+------------+ | | | | ^ | R0 | R1 | | T0 | | | | v | | | | 100 +-----------+------------+ | | | | | ^ | R2 | R3 | | T1 | | | | v | | | | 200 +-----------+------------+-----------+------------+ | | | | ^ | | R0 | R1 | T0 | | | | v | | | | 100 | +-----------+------------+ | | | | ^ | | R2 | R3 | T1 | | | | v | | | | 200 +-----------+------------+-----------+------------+ ``` The following table is an alternative way to represent the above ID assignments. It is easy to see that the homogeneous ID range [0, 100) is used for nodes of type 0 in partition 0, [100, 200) is used for nodes of type 1 in partition 0, and so on. ``` +---------+------+---------- range | type | partition [0, 100) | 0 | 0 [100,200) | 1 | 0 [200,300) | 0 | 1 [300,400) | 1 | 1 ``` The goal of this class is to, given a node's homogenous ID, convert it into the ID pair ``(type_id, type_wise_id)``. For example, homogeneous node ID 90 is mapped to (0, 90); homogeneous node ID 201 is mapped to (0, 101). Parameters ---------- id_ranges : dict[str, Tensor]. Node ID ranges within partitions for each node type. The key is the node type name in string. The value is a tensor of shape :math:`(K, 2)`, where :math:`K` is the number of partitions. Each row has two integers: the starting and the ending IDs for a particular node type in a partition. For example, all nodes of type ``"T"`` in partition ``i`` has ID range ``id_ranges["T"][i][0]`` to ``id_ranges["T"][i][1]``. It is the same as the `node_map` argument in `RangePartitionBook`. """ def __init__(self, id_ranges): id_ranges_values = list(id_ranges.values()) assert isinstance( id_ranges_values[0], np.ndarray ), "id_ranges should be a dict of numpy arrays." self.num_parts = id_ranges_values[0].shape[0] self.dtype = id_ranges_values[0].dtype self.dtype_str = "int32" if self.dtype == np.int32 else "int64" self.num_types = len(id_ranges) ranges = np.zeros( (self.num_parts * self.num_types, 2), dtype=self.dtype ) typed_map = [] id_ranges = id_ranges_values id_ranges.sort(key=lambda a: a[0, 0]) for i, id_range in enumerate(id_ranges): ranges[i :: self.num_types] = id_range map1 = np.cumsum(id_range[:, 1] - id_range[:, 0], dtype=self.dtype) typed_map.append(map1) assert np.all(np.diff(ranges[:, 0]) >= 0) assert np.all(np.diff(ranges[:, 1]) >= 0) self.range_start = utils.toindex( np.ascontiguousarray(ranges[:, 0]), dtype=self.dtype_str ) self.range_end = utils.toindex( np.ascontiguousarray(ranges[:, 1]) - 1, dtype=self.dtype_str ) self.typed_map = utils.toindex( np.concatenate(typed_map), dtype=self.dtype_str ) def __call__(self, ids): """Convert the homogeneous IDs to (type_id, type_wise_id). Parameters ---------- ids : 1D tensor The homogeneous ID. Returns ------- type_ids : Tensor Type IDs per_type_ids : Tensor Type-wise IDs """ if self.num_types == 0: return F.zeros((len(ids),), F.dtype(ids), F.cpu()), ids if len(ids) == 0: return ids, ids ids = utils.toindex(ids, dtype=self.dtype_str) ret = _CAPI_DGLHeteroMapIds( ids.todgltensor(), self.range_start.todgltensor(), self.range_end.todgltensor(), self.typed_map.todgltensor(), self.num_parts, self.num_types, ) ret = utils.toindex(ret, dtype=self.dtype_str).tousertensor() return ret[: len(ids)], ret[len(ids) :] @property def torch_dtype(self): """Return the data type of the ID map.""" # [TODO][Rui] Use torch instead of numpy. return torch.int32 if self.dtype == np.int32 else torch.int64 _init_api("dgl.distributed.id_map") ================================================ FILE: python/dgl/distributed/kvstore.py ================================================ """Define distributed kvstore""" import os import numpy as np from .. import backend as F, utils from .._ffi.ndarray import empty_shared_mem from . import rpc from .graph_partition_book import EdgePartitionPolicy, NodePartitionPolicy from .standalone_kvstore import KVClient as SA_KVClient ############################ Register KVStore Requsts and Responses ############################### KVSTORE_PULL = 901231 class PullResponse(rpc.Response): """Send the sliced data tensor back to the client. Parameters ---------- server_id : int ID of current server data_tensor : tensor sliced data tensor """ def __init__(self, server_id, data_tensor): self.server_id = server_id self.data_tensor = data_tensor def __getstate__(self): return self.server_id, self.data_tensor def __setstate__(self, state): self.server_id, self.data_tensor = state class PullRequest(rpc.Request): """Send ID tensor to server and get target data tensor as response. Parameters ---------- name : str data name id_tensor : tensor a vector storing the data ID """ def __init__(self, name, id_tensor): self.name = name self.id_tensor = id_tensor def __getstate__(self): return self.name, self.id_tensor def __setstate__(self, state): self.name, self.id_tensor = state def process_request(self, server_state): kv_store = server_state.kv_store if self.name not in kv_store.part_policy: raise RuntimeError( "KVServer cannot find partition policy with name: %s" % self.name ) if self.name not in kv_store.data_store: raise RuntimeError( "KVServer Cannot find data tensor with name: %s" % self.name ) local_id = kv_store.part_policy[self.name].to_local(self.id_tensor) data = kv_store.pull_handlers[self.name]( kv_store.data_store, self.name, local_id ) res = PullResponse(kv_store.server_id, data) return res KVSTORE_PUSH = 901232 class PushRequest(rpc.Request): """Send ID tensor and data tensor to server and update kvstore's data. This request has no response. Parameters ---------- name : str data name id_tensor : tensor a vector storing the data ID data_tensor : tensor a tensor with the same row size of data ID """ def __init__(self, name, id_tensor, data_tensor): self.name = name self.id_tensor = id_tensor self.data_tensor = data_tensor def __getstate__(self): return self.name, self.id_tensor, self.data_tensor def __setstate__(self, state): self.name, self.id_tensor, self.data_tensor = state def process_request(self, server_state): kv_store = server_state.kv_store if self.name not in kv_store.part_policy: raise RuntimeError( "KVServer cannot find partition policy with name: %s" % self.name ) if self.name not in kv_store.data_store: raise RuntimeError( "KVServer Cannot find data tensor with name: %s" % self.name ) local_id = kv_store.part_policy[self.name].to_local(self.id_tensor) kv_store.push_handlers[self.name]( kv_store.data_store, self.name, local_id, self.data_tensor ) INIT_DATA = 901233 INIT_MSG = "Init" class InitDataResponse(rpc.Response): """Send a confirmation response (just a short string message) of InitDataRequest to client. Parameters ---------- msg : string string message """ def __init__(self, msg): self.msg = msg def __getstate__(self): return self.msg def __setstate__(self, state): self.msg = state class InitDataRequest(rpc.Request): """Send meta data to server and init data tensor on server using UDF init function. Parameters ---------- name : str data name shape : tuple data shape dtype : str data type string, e.g., 'int64', 'float32', etc. policy_str : str partition-policy string, e.g., 'edge' or 'node'. init_func : function UDF init function. """ def __init__(self, name, shape, dtype, policy_str, init_func): self.name = name self.shape = shape self.dtype = dtype self.policy_str = policy_str self.init_func = init_func def __getstate__(self): return ( self.name, self.shape, self.dtype, self.policy_str, self.init_func, ) def __setstate__(self, state): ( self.name, self.shape, self.dtype, self.policy_str, self.init_func, ) = state def process_request(self, server_state): kv_store = server_state.kv_store dtype = F.data_type_dict[self.dtype] # We should see requests from multiple clients. We need to ignore the duplicated # reqeusts. if self.name in kv_store.data_store: assert tuple(F.shape(kv_store.data_store[self.name])) == tuple( self.shape ) assert ( F.reverse_data_type_dict[ F.dtype(kv_store.data_store[self.name]) ] == self.dtype ) assert kv_store.part_policy[self.name].policy_str == self.policy_str else: if not kv_store.is_backup_server(): data_tensor = self.init_func(self.shape, dtype) kv_store.init_data( name=self.name, policy_str=self.policy_str, data_tensor=data_tensor, ) else: kv_store.init_data(name=self.name, policy_str=self.policy_str) res = InitDataResponse(INIT_MSG) return res BARRIER = 901234 BARRIER_MSG = "Barrier" class BarrierResponse(rpc.Response): """Send an confimation signal (just a short string message) of BarrierRequest to client. Parameters ---------- msg : string string msg """ def __init__(self, msg): self.msg = msg def __getstate__(self): return self.msg def __setstate__(self, state): self.msg = state class BarrierRequest(rpc.Request): """Send a barrier signal (just a short string message) to server. Parameters ---------- role : string client role """ def __init__(self, role): self.role = role self.group_id = rpc.get_group_id() def __getstate__(self): return self.role, self.group_id def __setstate__(self, state): self.role, self.group_id = state def process_request(self, server_state): kv_store = server_state.kv_store roles = server_state.roles role = roles[self.group_id] barrier_count = kv_store.barrier_count[self.group_id] count = barrier_count[self.role] barrier_count[self.role] = count + 1 if barrier_count[self.role] == len(role[self.role]): barrier_count[self.role] = 0 res_list = [] for client_id, _ in role[self.role]: res_list.append((client_id, BarrierResponse(BARRIER_MSG))) return res_list return None REGISTER_PULL = 901235 REGISTER_PULL_MSG = "Register_Pull" class RegisterPullHandlerResponse(rpc.Response): """Send a confirmation signal (just a short string message) of RegisterPullHandler to client. Parameters ---------- msg : string string message """ def __init__(self, msg): self.msg = msg def __getstate__(self): return self.msg def __setstate__(self, state): self.msg = state class RegisterPullHandlerRequest(rpc.Request): """Send an UDF and register Pull handler on server. Parameters ---------- pull_func : func UDF pull handler """ def __init__(self, name, pull_func): self.name = name self.pull_func = pull_func def __getstate__(self): return self.name, self.pull_func def __setstate__(self, state): self.name, self.pull_func = state def process_request(self, server_state): kv_store = server_state.kv_store kv_store.pull_handlers[self.name] = self.pull_func res = RegisterPullHandlerResponse(REGISTER_PULL_MSG) return res REGISTER_PUSH = 901236 REGISTER_PUSH_MSG = "Register_Push" class RegisterPushHandlerResponse(rpc.Response): """Send a confirmation signal (just a short string message) of RegisterPushHandler to client. Parameters ---------- msg : string string message """ def __init__(self, msg): self.msg = msg def __getstate__(self): return self.msg def __setstate__(self, state): self.msg = state class RegisterPushHandlerRequest(rpc.Request): """Send an UDF to register Push handler on server. Parameters ---------- push_func : func UDF push handler """ def __init__(self, name, push_func): self.name = name self.push_func = push_func def __getstate__(self): return self.name, self.push_func def __setstate__(self, state): self.name, self.push_func = state def process_request(self, server_state): kv_store = server_state.kv_store kv_store.push_handlers[self.name] = self.push_func res = RegisterPushHandlerResponse(REGISTER_PUSH_MSG) return res GET_SHARED = 901237 GET_SHARED_MSG = "Get_Shared" class GetSharedDataResponse(rpc.Response): """Send meta data of shared-memory tensor to client. Parameters ---------- meta : dict a dict of meta, e.g., {'data_0' : (shape, dtype, policy_str), 'data_1' : (shape, dtype, policy_str)} """ def __init__(self, meta): self.meta = meta def __getstate__(self): return self.meta def __setstate__(self, state): self.meta = state class GetSharedDataRequest(rpc.Request): """Send a signal (just a short string message) to get the meta data of shared-tensor from server. Parameters ---------- msg : string string message """ def __init__(self, msg): self.msg = msg def __getstate__(self): return self.msg def __setstate__(self, state): self.msg = state def process_request(self, server_state): assert self.msg == GET_SHARED_MSG meta = {} kv_store = server_state.kv_store for name, data in kv_store.data_store.items(): meta[name] = ( F.shape(data), F.reverse_data_type_dict[F.dtype(data)], kv_store.part_policy[name].policy_str, ) res = GetSharedDataResponse(meta) return res GET_PART_SHAPE = 901238 class GetPartShapeResponse(rpc.Response): """Send the partitioned data shape back to client. Parameters ---------- shape : tuple shape of tensor """ def __init__(self, shape): self.shape = shape def __getstate__(self): return self.shape def __setstate__(self, state): # When the shape has only one dimension, state is an integer. if isinstance(state, int): self.shape = (state,) else: self.shape = state class GetPartShapeRequest(rpc.Request): """Send data name to get the partitioned data shape from server. Parameters ---------- name : str data name """ def __init__(self, name): self.name = name def __getstate__(self): return self.name def __setstate__(self, state): self.name = state def process_request(self, server_state): kv_store = server_state.kv_store if self.name not in kv_store.data_store: raise RuntimeError( "KVServer Cannot find data tensor with name: %s" % self.name ) data_shape = F.shape(kv_store.data_store[self.name]) res = GetPartShapeResponse(data_shape) return res SEND_META_TO_BACKUP = 901239 SEND_META_TO_BACKUP_MSG = "Send_Meta_TO_Backup" class SendMetaToBackupResponse(rpc.Response): """Send a confirmation signal (just a short string message) of SendMetaToBackupRequest to client. """ def __init__(self, msg): self.msg = msg def __getstate__(self): return self.msg def __setstate__(self, state): self.msg = state class SendMetaToBackupRequest(rpc.Request): """Send meta data to backup server and backup server will use this meta data to read shared-memory tensor. Parameters ---------- name : str data name dtype : str data type string shape : tuple of int data shape policy_str : str partition-policy string, e.g., 'edge' or 'node'. pull_handler : callable The callback function when data is pulled from kvstore. push_handler : callable The callback function when data is pushed to kvstore. """ def __init__( self, name, dtype, shape, policy_str, pull_handler, push_handler ): self.name = name self.dtype = dtype self.shape = shape self.policy_str = policy_str self.pull_handler = pull_handler self.push_handler = push_handler def __getstate__(self): return ( self.name, self.dtype, self.shape, self.policy_str, self.pull_handler, self.push_handler, ) def __setstate__(self, state): ( self.name, self.dtype, self.shape, self.policy_str, self.pull_handler, self.push_handler, ) = state def process_request(self, server_state): kv_store = server_state.kv_store assert kv_store.is_backup_server() if self.name not in kv_store.data_store: shared_data = empty_shared_mem( self.name + "-kvdata-", False, self.shape, self.dtype ) dlpack = shared_data.to_dlpack() kv_store.data_store[self.name] = F.zerocopy_from_dlpack(dlpack) kv_store.part_policy[self.name] = kv_store.find_policy( self.policy_str ) kv_store.pull_handlers[self.name] = self.pull_handler kv_store.push_handlers[self.name] = self.push_handler else: assert tuple(F.shape(kv_store.data_store[self.name])) == tuple( self.shape ) assert ( F.reverse_data_type_dict[ F.dtype(kv_store.data_store[self.name]) ] == self.dtype ) assert kv_store.part_policy[self.name].policy_str == self.policy_str assert kv_store.pull_handlers[self.name] == self.pull_handler assert kv_store.push_handlers[self.name] == self.push_handler res = SendMetaToBackupResponse(SEND_META_TO_BACKUP_MSG) return res DELETE_DATA = 901240 DELETE_MSG = "Delete_Data" class DeleteDataResponse(rpc.Response): """Send a confirmation signal (just a short string message) of DeleteDataRequest to client. """ def __init__(self, msg): self.msg = msg def __getstate__(self): return self.msg def __setstate__(self, state): self.msg = state class DeleteDataRequest(rpc.Request): """Send message to server to delete data tensor Parameters ---------- name : str data name """ def __init__(self, name): self.name = name def __getstate__(self): return self.name def __setstate__(self, state): self.name = state def process_request(self, server_state): kv_store = server_state.kv_store if self.name in kv_store.data_store: del kv_store.data_store[self.name] del kv_store.part_policy[self.name] del kv_store.push_handlers[self.name] del kv_store.pull_handlers[self.name] res = DeleteDataResponse(DELETE_MSG) return res COUNT_LOCAL_NONZERO = 901241 class CountLocalNonzeroResponse(rpc.Response): """Send the number of nonzero value in local data""" def __init__(self, num_local_nonzero): self.num_local_nonzero = num_local_nonzero def __getstate__(self): return self.num_local_nonzero def __setstate__(self, state): self.num_local_nonzero = state class CountLocalNonzeroRequest(rpc.Request): """Send data name to server to count local nonzero value Parameters ---------- name : str data name """ def __init__(self, name): self.name = name def __getstate__(self): return self.name def __setstate__(self, state): self.name = state def process_request(self, server_state): kv_store = server_state.kv_store num_local_nonzero = kv_store.count_local_nonzero(self.name) res = CountLocalNonzeroResponse(num_local_nonzero) return res ############################ KVServer ############################### def default_push_handler(target, name, id_tensor, data_tensor): """Default handler for PUSH message. On default, _push_handler perform scatter_row() operation for the tensor. Parameters ---------- target : tensor target tensor name : str data name id_tensor : tensor a vector storing the ID list. data_tensor : tensor a tensor with the same row size of id """ # TODO(chao): support Tensorflow backend target[name][id_tensor] = data_tensor def default_pull_handler(target, name, id_tensor): """Default handler for PULL operation. On default, _pull_handler perform gather_row() operation for the tensor. Parameters ---------- target : tensor target tensor name : str data name id_tensor : tensor a vector storing the ID list. Return ------ tensor a tensor with the same row size of ID. """ # TODO(chao): support Tensorflow backend return target[name][id_tensor] class KVServer(object): """KVServer is a lightweight key-value store service for DGL distributed training. In practice, developers can use KVServer to hold large-scale graph features or graph embeddings across machines in a distributed setting. KVServer depends on DGL rpc infrastructure thats support backup servers, which means we can lunach many KVServers on the same machine for load-balancing. DO NOT use KVServer in mult-threads because this behavior is not defined. For now, KVServer can only support CPU-to-CPU communication. We may support GPU-communication in the future. Parameters ---------- server_id : int ID of current server (starts from 0). ip_config : str Path of IP configuration file. num_servers : int Server count on each machine. num_clients : int Total number of KVClients that will be connected to the KVServer. """ def __init__(self, server_id, ip_config, num_servers, num_clients): assert server_id >= 0, ( "server_id (%d) cannot be a negative number." % server_id ) assert num_servers > 0, ( "num_servers (%d) must be a positive number." % num_servers ) assert os.path.exists(ip_config), "Cannot open file: %s" % ip_config assert num_clients >= 0, ( "num_clients (%d) cannot be a negative number." % num_clients ) # Register services on server rpc.register_service(KVSTORE_PULL, PullRequest, PullResponse) rpc.register_service(KVSTORE_PUSH, PushRequest, None) rpc.register_service(INIT_DATA, InitDataRequest, InitDataResponse) rpc.register_service(BARRIER, BarrierRequest, BarrierResponse) rpc.register_service( REGISTER_PUSH, RegisterPushHandlerRequest, RegisterPushHandlerResponse, ) rpc.register_service( REGISTER_PULL, RegisterPullHandlerRequest, RegisterPullHandlerResponse, ) rpc.register_service( GET_SHARED, GetSharedDataRequest, GetSharedDataResponse ) rpc.register_service( GET_PART_SHAPE, GetPartShapeRequest, GetPartShapeResponse ) rpc.register_service( SEND_META_TO_BACKUP, SendMetaToBackupRequest, SendMetaToBackupResponse, ) rpc.register_service(DELETE_DATA, DeleteDataRequest, DeleteDataResponse) rpc.register_service( COUNT_LOCAL_NONZERO, CountLocalNonzeroRequest, CountLocalNonzeroResponse, ) # Store the tensor data with specified data name self._data_store = {} # Store original tensor data names when instantiating DistGraphServer self._orig_data = set() # Store the partition information with specified data name self._policy_set = set() self._part_policy = {} # Basic information self._server_id = server_id self._server_namebook = rpc.read_ip_config(ip_config, num_servers) assert ( server_id in self._server_namebook ), "Trying to start server {}, but there are {} servers in the config file".format( server_id, len(self._server_namebook) ) self._machine_id = self._server_namebook[server_id][0] self._group_count = self._server_namebook[server_id][3] # We assume partition_id is equal to machine_id self._part_id = self._machine_id self._num_clients = num_clients self._barrier_count = {} # push and pull handler self._push_handlers = {} self._pull_handlers = {} @property def server_id(self): """Get server ID""" return self._server_id @property def barrier_count(self): """Get barrier count""" return self._barrier_count @barrier_count.setter def barrier_count(self, count): """Set barrier count""" self._barrier_count = count @property def num_clients(self): """Get number of clients""" return self._num_clients @property def data_store(self): """Get data store""" return self._data_store @property def orig_data(self): """Get original data""" return self._orig_data @property def part_policy(self): """Get part policy""" return self._part_policy @property def part_id(self): """Get part ID""" return self._part_id @property def push_handlers(self): """Get push handler""" return self._push_handlers @property def pull_handlers(self): """Get pull handler""" return self._pull_handlers def is_backup_server(self): """Return True if current server is a backup server.""" if self._server_id % self._group_count == 0: return False return True def add_part_policy(self, policy): """Add partition policy to kvserver. Parameters ---------- policy : PartitionPolicy Store the partition information """ self._policy_set.add(policy) def init_data(self, name, policy_str, data_tensor=None): """Init data tensor on kvserver. Parameters ---------- name : str data name policy_str : str partition-policy string, e.g., 'edge' or 'node'. data_tensor : tensor If the data_tensor is None, KVServer will read shared-memory when client invoking get_shared_data(). """ assert len(name) > 0, "name cannot be empty." if name in self._data_store: raise RuntimeError("Data %s has already exists!" % name) self._part_policy[name] = self.find_policy(policy_str) if data_tensor is not None: # Create shared-tensor data_type = F.reverse_data_type_dict[F.dtype(data_tensor)] shared_data = empty_shared_mem( name + "-kvdata-", True, data_tensor.shape, data_type ) dlpack = shared_data.to_dlpack() self._data_store[name] = F.zerocopy_from_dlpack(dlpack) rpc.copy_data_to_shared_memory(self._data_store[name], data_tensor) assert ( self._part_policy[name].get_part_size() == data_tensor.shape[0] ), "kvserver expect partition {} for {} has {} rows, but gets {} rows".format( self._part_policy[name].part_id, policy_str, self._part_policy[name].get_part_size(), data_tensor.shape[0], ) self._pull_handlers[name] = default_pull_handler self._push_handlers[name] = default_push_handler def find_policy(self, policy_str): """Find a partition policy from existing policy set Parameters ---------- policy_str : str partition-policy string, e.g., 'edge' or 'node'. """ for policy in self._policy_set: if policy_str == policy.policy_str: return policy raise RuntimeError( "Cannot find policy_str: %s from kvserver." % policy_str ) def count_local_nonzero(self, name): """Count nonzero in local data Parameters ---------- name : str data name. Returns ------- int the number of nonzero in local data. """ assert len(name) > 0, "name cannot be empty." if name not in self._data_store: raise RuntimeError("Data %s has not be created!" % name) return F.count_nonzero(self._data_store[name]) ############################ KVClient ############################### class KVClient(object): """KVClient is used to push/pull data to/from KVServer. If the target kvclient and kvserver are in the same machine, they can communicate with each other using local shared-memory automatically, instead of going through the tcp/ip RPC. DO NOT use KVClient in multi-threads because this behavior is not defined. For now, KVClient can only support CPU-to-CPU communication. We may support GPU-communication in the future. Parameters ---------- ip_config : str Path of IP configuration file. num_servers : int Server count on each machine. role : str We can set different role for kvstore. """ def __init__(self, ip_config, num_servers, role="default"): assert ( rpc.get_rank() != -1 ), "Please invoke rpc.connect_to_server() before creating KVClient." assert os.path.exists(ip_config), "Cannot open file: %s" % ip_config assert num_servers > 0, ( "num_servers (%d) must be a positive number." % num_servers ) # Register services on client rpc.register_service(KVSTORE_PULL, PullRequest, PullResponse) rpc.register_service(KVSTORE_PUSH, PushRequest, None) rpc.register_service(INIT_DATA, InitDataRequest, InitDataResponse) rpc.register_service(BARRIER, BarrierRequest, BarrierResponse) rpc.register_service( REGISTER_PUSH, RegisterPushHandlerRequest, RegisterPushHandlerResponse, ) rpc.register_service( REGISTER_PULL, RegisterPullHandlerRequest, RegisterPullHandlerResponse, ) rpc.register_service( GET_SHARED, GetSharedDataRequest, GetSharedDataResponse ) rpc.register_service( GET_PART_SHAPE, GetPartShapeRequest, GetPartShapeResponse ) rpc.register_service( SEND_META_TO_BACKUP, SendMetaToBackupRequest, SendMetaToBackupResponse, ) rpc.register_service(DELETE_DATA, DeleteDataRequest, DeleteDataResponse) rpc.register_service( COUNT_LOCAL_NONZERO, CountLocalNonzeroRequest, CountLocalNonzeroResponse, ) # Store the tensor data with specified data name self._data_store = {} # Store the partition information with specified data name self._part_policy = {} # This stores all unique partition policies in the kvstore. The key is the policy name. self._all_possible_part_policy = {} # Store the full data shape across kvserver self._full_data_shape = {} # Store all the data name self._data_name_list = set() # Store all graph data name self._gdata_name_list = set() # Basic information self._server_namebook = rpc.read_ip_config(ip_config, num_servers) self._server_count = len(self._server_namebook) self._group_count = self._server_namebook[0][3] self._machine_count = int(self._server_count / self._group_count) self._client_id = rpc.get_rank() self._machine_id = rpc.get_machine_id() self._part_id = self._machine_id self._main_server_id = self._machine_id * self._group_count # push and pull handler self._pull_handlers = {} self._push_handlers = {} # register role on server-0 self._role = role @property def all_possible_part_policy(self): """Get all possible partition policies""" return self._all_possible_part_policy @property def client_id(self): """Get client ID""" return self._client_id @property def role(self): """Get client role""" return self._role @property def machine_id(self): """Get machine ID""" return self._machine_id @property def num_servers(self): """Get the number of servers""" return self._server_count @property def group_count(self): """Get the number of groups --num_servers""" return self._group_count def barrier(self): """Barrier for all client nodes. This API will be blocked untill all the clients invoke this API. """ request = BarrierRequest(self._role) rpc.send_request(0, request) response = rpc.recv_response() assert response.msg == BARRIER_MSG def register_push_handler(self, name, func): """Register UDF push function. This UDF is triggered for every push. The signature of the UDF is ``` def push_handler(data_store, name, local_offset, data) ``` ``data_store`` is a dict that contains all tensors in the kvstore. ``name`` is the name of the tensor where new data is pushed to. ``local_offset`` is the offset where new data should be written in the tensor in the local partition. ``data`` is the new data to be written. Parameters ---------- name : str The name of the tensor func : callable The function to be called. """ self.barrier() request = RegisterPushHandlerRequest(name, func) # send request to all the server nodes for server_id in range(self._server_count): rpc.send_request(server_id, request) # recv response from all the server nodes for _ in range(self._server_count): response = rpc.recv_response() assert response.msg == REGISTER_PUSH_MSG self._push_handlers[name] = func self.barrier() def register_pull_handler(self, name, func): """Register UDF pull function. This UDF is triggered for every pull. The signature of the UDF is ``` def pull_handler(data_store, name, local_offset) ``` ``data_store`` is a dict that contains all tensors in the kvstore. ``name`` is the name of the tensor where new data is pushed to. ``local_offset`` is the offset where new data should be written in the tensor in the local partition. Parameters ---------- name : str The name of the tensor func : callable The function to be called. """ self.barrier() request = RegisterPullHandlerRequest(name, func) # send request to all the server nodes for server_id in range(self._server_count): rpc.send_request(server_id, request) # recv response from all the server nodes for _ in range(self._server_count): response = rpc.recv_response() assert response.msg == REGISTER_PULL_MSG self._pull_handlers[name] = func self.barrier() def init_data( self, name, shape, dtype, part_policy, init_func, is_gdata=True ): """Send message to kvserver to initialize new data tensor and mapping this data from server side to client side. Parameters ---------- name : str data name shape : list or tuple of int data shape dtype : dtype data type part_policy : PartitionPolicy partition policy. init_func : func UDF init function is_gdata : bool Whether the created tensor is a ndata/edata or not. """ assert len(name) > 0, "name cannot be empty." assert len(shape) > 0, "shape cannot be empty" assert name not in self._data_name_list, ( "data name: %s already exists." % name ) self.barrier() shape = list(shape) # Send request to the servers to initialize data. # The servers may handle the duplicated initializations. part_shape = shape.copy() part_shape[0] = part_policy.get_part_size() request = InitDataRequest( name, tuple(part_shape), F.reverse_data_type_dict[dtype], part_policy.policy_str, init_func, ) # The request is sent to the servers in one group, which are on the same machine. for n in range(self._group_count): server_id = part_policy.part_id * self._group_count + n rpc.send_request(server_id, request) for _ in range(self._group_count): response = rpc.recv_response() assert response.msg == INIT_MSG self.barrier() # Create local shared-data local_shape = shape.copy() local_shape[0] = part_policy.get_part_size() if name in self._part_policy: raise RuntimeError("Policy %s has already exists!" % name) if name in self._data_store: raise RuntimeError("Data %s has already exists!" % name) if name in self._full_data_shape: raise RuntimeError("Data shape %s has already exists!" % name) self._part_policy[name] = part_policy self._all_possible_part_policy[part_policy.policy_str] = part_policy shared_data = empty_shared_mem( name + "-kvdata-", False, local_shape, F.reverse_data_type_dict[dtype], ) dlpack = shared_data.to_dlpack() self._data_store[name] = F.zerocopy_from_dlpack(dlpack) self._data_name_list.add(name) if is_gdata: self._gdata_name_list.add(name) self._full_data_shape[name] = tuple(shape) self._pull_handlers[name] = default_pull_handler self._push_handlers[name] = default_push_handler # Now we need to tell the backup server the new tensor. request = SendMetaToBackupRequest( name, F.reverse_data_type_dict[dtype], part_shape, part_policy.policy_str, self._pull_handlers[name], self._push_handlers[name], ) # send request to all the backup server nodes for i in range(self._group_count - 1): server_id = self._machine_id * self._group_count + i + 1 rpc.send_request(server_id, request) # recv response from all the backup server nodes for _ in range(self._group_count - 1): response = rpc.recv_response() assert response.msg == SEND_META_TO_BACKUP_MSG self.barrier() def delete_data(self, name): """Send message to kvserver to delete tensor and clear the meta data Parameters ---------- name : str data name """ assert len(name) > 0, "name cannot be empty." assert name in self._data_name_list, "data name: %s not exists." % name self.barrier() part_policy = self._part_policy[name] # send request to every server nodes request = DeleteDataRequest(name) for n in range(self._group_count): server_id = part_policy.part_id * self._group_count + n rpc.send_request(server_id, request) for _ in range(self._group_count): response = rpc.recv_response() assert response.msg == DELETE_MSG self.barrier() self._data_name_list.remove(name) if name in self._gdata_name_list: self._gdata_name_list.remove(name) # TODO(chao) : remove the delete log print del self._data_store[name] del self._full_data_shape[name] del self._part_policy[name] del self._pull_handlers[name] del self._push_handlers[name] self.barrier() def map_shared_data(self, partition_book): """Mapping shared-memory tensor from server to client. Parameters ---------- partition_book : GraphPartitionBook Store the partition information """ # Get all partition policies for ntype in partition_book.ntypes: policy = NodePartitionPolicy(partition_book, ntype) self._all_possible_part_policy[policy.policy_str] = policy for etype in partition_book.canonical_etypes: policy = EdgePartitionPolicy(partition_book, etype) self._all_possible_part_policy[policy.policy_str] = policy # Get shared data from server side self.barrier() request = GetSharedDataRequest(GET_SHARED_MSG) rpc.send_request(self._main_server_id, request) response = rpc.recv_response() for name, meta in response.meta.items(): if name not in self._data_name_list: shape, dtype, policy_str = meta assert policy_str in self._all_possible_part_policy shared_data = empty_shared_mem( name + "-kvdata-", False, shape, dtype ) dlpack = shared_data.to_dlpack() self._data_store[name] = F.zerocopy_from_dlpack(dlpack) self._part_policy[name] = self._all_possible_part_policy[ policy_str ] self._pull_handlers[name] = default_pull_handler self._push_handlers[name] = default_push_handler # Get full data shape across servers for name, meta in response.meta.items(): if name not in self._data_name_list: shape, _, _ = meta data_shape = list(shape) data_shape[0] = 0 request = GetPartShapeRequest(name) # send request to all main server nodes for machine_id in range(self._machine_count): server_id = machine_id * self._group_count rpc.send_request(server_id, request) # recv response from all the main server nodes for _ in range(self._machine_count): res = rpc.recv_response() data_shape[0] += res.shape[0] self._full_data_shape[name] = tuple(data_shape) # Send meta data to backup servers for name, meta in response.meta.items(): shape, dtype, policy_str = meta request = SendMetaToBackupRequest( name, dtype, shape, policy_str, self._pull_handlers[name], self._push_handlers[name], ) # send request to all the backup server nodes for i in range(self._group_count - 1): server_id = self._machine_id * self._group_count + i + 1 rpc.send_request(server_id, request) # recv response from all the backup server nodes for _ in range(self._group_count - 1): response = rpc.recv_response() assert response.msg == SEND_META_TO_BACKUP_MSG self._data_name_list.add(name) # map_shared_data happens only at DistGraph initialization # TODO(xiangsx): We assume there is no non-graph data initialized at this time self._gdata_name_list.add(name) self.barrier() def gdata_name_list(self): """Get all the graph data name""" return list(self._gdata_name_list) def data_name_list(self): """Get all the data name""" return list(self._data_name_list) def get_data_meta(self, name): """Get meta data (data_type, data_shape, partition_policy)""" assert len(name) > 0, "name cannot be empty." data_type = F.dtype(self._data_store[name]) data_shape = self._full_data_shape[name] part_policy = self._part_policy[name] return (data_type, data_shape, part_policy) def get_partid(self, name, id_tensor): """ Parameters ---------- name : str data name id_tensor : tensor a vector storing the global data ID """ assert len(name) > 0, "name cannot be empty." id_tensor = utils.toindex(id_tensor) id_tensor = id_tensor.tousertensor() assert F.ndim(id_tensor) == 1, "ID must be a vector." # partition data machine_id = self._part_policy[name].to_partid(id_tensor) return machine_id def push(self, name, id_tensor, data_tensor): """Push data to KVServer. Note that, the push() is an non-blocking operation that will return immediately. Parameters ---------- name : str data name id_tensor : tensor a vector storing the global data ID data_tensor : tensor a tensor with the same row size of data ID """ assert len(name) > 0, "name cannot be empty." id_tensor = utils.toindex(id_tensor) id_tensor = id_tensor.tousertensor() assert F.ndim(id_tensor) == 1, "ID must be a vector." assert ( F.shape(id_tensor)[0] == F.shape(data_tensor)[0] ), "The data must has the same row size with ID." # partition data machine_id = self._part_policy[name].to_partid(id_tensor) # sort index by machine id sorted_id = F.tensor(np.argsort(F.asnumpy(machine_id))) id_tensor = id_tensor[sorted_id] data_tensor = data_tensor[sorted_id] machine, count = np.unique(F.asnumpy(machine_id), return_counts=True) # push data to server by order start = 0 local_id = None local_data = None for idx, machine_idx in enumerate(machine): end = start + count[idx] if start == end: # No data for target machine continue partial_id = id_tensor[start:end] partial_data = data_tensor[start:end] if machine_idx == self._machine_id: # local push # Note that DO NOT push local data right now because we can overlap # communication-local_push here local_id = self._part_policy[name].to_local(partial_id) local_data = partial_data else: # push data to remote server request = PushRequest(name, partial_id, partial_data) rpc.send_request_to_machine(machine_idx, request) start += count[idx] if local_id is not None: # local push self._push_handlers[name]( self._data_store, name, local_id, local_data ) def pull(self, name, id_tensor): """Pull message from KVServer. Parameters ---------- name : str data name id_tensor : tensor a vector storing the ID list Returns ------- tensor a data tensor with the same row size of id_tensor. """ assert len(name) > 0, "name cannot be empty." id_tensor = utils.toindex(id_tensor) id_tensor = id_tensor.tousertensor() assert F.ndim(id_tensor) == 1, "ID must be a vector." if self._pull_handlers[name] is default_pull_handler: # Use fast-pull part_id = self._part_policy[name].to_partid(id_tensor) return rpc.fast_pull( name, id_tensor, part_id, KVSTORE_PULL, self._machine_count, self._group_count, self._machine_id, self._client_id, self._data_store[name], self._part_policy[name], ) else: # partition data machine_id = self._part_policy[name].to_partid(id_tensor) # sort index by machine id sorted_id = F.tensor(np.argsort(F.asnumpy(machine_id))) back_sorted_id = F.tensor(np.argsort(F.asnumpy(sorted_id))) id_tensor = id_tensor[sorted_id] machine, count = np.unique( F.asnumpy(machine_id), return_counts=True ) # pull data from server by order start = 0 pull_count = 0 local_id = None for idx, machine_idx in enumerate(machine): end = start + count[idx] if start == end: # No data for target machine continue partial_id = id_tensor[start:end] if machine_idx == self._machine_id: # local pull # Note that DO NOT pull local data right now because we can overlap # communication-local_pull here local_id = self._part_policy[name].to_local(partial_id) else: # pull data from remote server request = PullRequest(name, partial_id) rpc.send_request_to_machine(machine_idx, request) pull_count += 1 start += count[idx] # recv response response_list = [] if local_id is not None: # local pull local_data = self._pull_handlers[name]( self._data_store, name, local_id ) server_id = self._main_server_id local_response = PullResponse(server_id, local_data) response_list.append(local_response) # wait response from remote server nodes for _ in range(pull_count): remote_response = rpc.recv_response() response_list.append(remote_response) # sort response by server_id and concat tensor response_list.sort(key=self._take_id) data_tensor = F.cat( seq=[response.data_tensor for response in response_list], dim=0 ) return data_tensor[ back_sorted_id ] # return data with original index order def union(self, operand1_name, operand2_name, output_name): """Compute the union of two mask arrays in the KVStore.""" # Each trainer computes its own result from its local storage. self._data_store[output_name][:] = ( self._data_store[operand1_name] | self._data_store[operand2_name] ) def _take_id(self, elem): """Used by sort response list""" return elem.server_id def count_nonzero(self, name): """Count nonzero value by pull request from KVServers. Parameters ---------- name : str data name Returns ------- int the number of nonzero in this data. """ total = 0 pull_count = 0 for machine_id in range(self._machine_count): if machine_id == self._machine_id: local_id = F.tensor( np.arange( self._part_policy[name].get_part_size(), dtype=np.int64 ) ) total += F.count_nonzero(self._data_store[name][local_id]) else: request = CountLocalNonzeroRequest(name) rpc.send_request_to_machine(machine_id, request) pull_count += 1 for _ in range(pull_count): res = rpc.recv_response() total += res.num_local_nonzero return total @property def data_store(self): """Return the local partition of the data storage. Returns ------- dict[str, Tensor] The tensor storages of the local partition. """ return self._data_store KVCLIENT = None def init_kvstore(ip_config, num_servers, role): """initialize KVStore""" global KVCLIENT if KVCLIENT is None: if os.environ.get("DGL_DIST_MODE", "standalone") == "standalone": KVCLIENT = SA_KVClient() else: KVCLIENT = KVClient(ip_config, num_servers, role) def close_kvstore(): """Close the current KVClient""" global KVCLIENT KVCLIENT = None def get_kvstore(): """get the KVClient""" return KVCLIENT ================================================ FILE: python/dgl/distributed/nn/__init__.py ================================================ """dgl distributed.optims.""" import importlib import os import sys from ...backend import backend_name from ...utils import expand_as_pair def _load_backend(mod_name): mod = importlib.import_module(".%s" % mod_name, __name__) thismod = sys.modules[__name__] for api, obj in mod.__dict__.items(): setattr(thismod, api, obj) _load_backend(backend_name) ================================================ FILE: python/dgl/distributed/nn/mxnet/__init__.py ================================================ ================================================ FILE: python/dgl/distributed/nn/pytorch/__init__.py ================================================ """dgl distributed sparse optimizer for pytorch.""" from .sparse_emb import DistEmbedding ================================================ FILE: python/dgl/distributed/nn/pytorch/sparse_emb.py ================================================ """Define sparse embedding and optimizer.""" import torch as th from .... import backend as F, utils from ...dist_tensor import DistTensor class DistEmbedding: """Distributed node embeddings. DGL provides a distributed embedding to support models that require learnable embeddings. DGL's distributed embeddings are mainly used for learning node embeddings of graph models. Because distributed embeddings are part of a model, they are updated by mini-batches. The distributed embeddings have to be updated by DGL's optimizers instead of the optimizers provided by the deep learning frameworks (e.g., Pytorch and MXNet). To support efficient training on a graph with many nodes, the embeddings support sparse updates. That is, only the embeddings involved in a mini-batch computation are updated. Please refer to `Distributed Optimizers `__ for available optimizers in DGL. Distributed embeddings are sharded and stored in a cluster of machines in the same way as :class:`dgl.distributed.DistTensor`, except that distributed embeddings are trainable. Because distributed embeddings are sharded in the same way as nodes and edges of a distributed graph, it is usually much more efficient to access than the sparse embeddings provided by the deep learning frameworks. Parameters ---------- num_embeddings : int The number of embeddings. Currently, the number of embeddings has to be the same as the number of nodes or the number of edges. embedding_dim : int The dimension size of embeddings. name : str, optional The name of the embeddings. The name can uniquely identify embeddings in a system so that another DistEmbedding object can referent to the same embeddings. init_func : callable, optional The function to create the initial data. If the init function is not provided, the values of the embeddings are initialized to zero. part_policy : PartitionPolicy, optional The partition policy that assigns embeddings to different machines in the cluster. Currently, it only supports node partition policy or edge partition policy. The system determines the right partition policy automatically. Examples -------- >>> def initializer(shape, dtype): arr = th.zeros(shape, dtype=dtype) arr.uniform_(-1, 1) return arr >>> emb = dgl.distributed.DistEmbedding(g.num_nodes(), 10, init_func=initializer) >>> optimizer = dgl.distributed.optim.SparseAdagrad([emb], lr=0.001) >>> for blocks in dataloader: ... feats = emb(nids) ... loss = F.sum(feats + 1, 0) ... loss.backward() ... optimizer.step() Note ---- When a ``DistEmbedding`` object is used in the forward computation, users have to invoke :py:meth:`~dgl.distributed.optim.SparseAdagrad.step` afterwards. Otherwise, there will be some memory leak. """ def __init__( self, num_embeddings, embedding_dim, name=None, init_func=None, part_policy=None, ): self._tensor = DistTensor( (num_embeddings, embedding_dim), F.float32, name, init_func=init_func, part_policy=part_policy, ) self._trace = [] self._name = name self._num_embeddings = num_embeddings self._embedding_dim = embedding_dim # Check whether it is multi-gpu/distributed training or not if th.distributed.is_initialized(): self._rank = th.distributed.get_rank() self._world_size = th.distributed.get_world_size() # [TODO] The following code is clearly wrong but changing it to "raise DGLError" # actually fails unit test. ??? # else: # assert 'th.distributed should be initialized' self._optm_state = None # track optimizer state self._part_policy = part_policy def __call__(self, idx, device=th.device("cpu")): """ node_ids : th.tensor Index of the embeddings to collect. device : th.device Target device to put the collected embeddings. Returns ------- Tensor The requested node embeddings """ idx = utils.toindex(idx).tousertensor() emb = self._tensor[idx].to(device, non_blocking=True) if F.is_recording(): emb = F.attach_grad(emb) self._trace.append((idx.to(device, non_blocking=True), emb)) return emb def reset_trace(self): """Reset the traced data.""" self._trace = [] @property def part_policy(self): """Return the partition policy Returns ------- PartitionPolicy partition policy """ return self._part_policy @property def name(self): """Return the name of the embeddings Returns ------- str The name of the embeddings """ return self._tensor.tensor_name @property def data_name(self): """Return the data name of the embeddings Returns ------- str The data name of the embeddings """ return self._tensor._name @property def kvstore(self): """Return the kvstore client Returns ------- KVClient The kvstore client """ return self._tensor.kvstore @property def num_embeddings(self): """Return the number of embeddings Returns ------- int The number of embeddings """ return self._num_embeddings @property def embedding_dim(self): """Return the dimension of embeddings Returns ------- int The dimension of embeddings """ return self._embedding_dim @property def optm_state(self): """Return the optimizer related state tensor. Returns ------- tuple of torch.Tensor The optimizer related state. """ return self._optm_state @property def weight(self): """Return the tensor storing the node embeddings Returns ------- torch.Tensor The tensor storing the node embeddings """ return self._tensor ================================================ FILE: python/dgl/distributed/nn/tensorflow/__init__.py ================================================ ================================================ FILE: python/dgl/distributed/optim/__init__.py ================================================ """dgl distributed.optims.""" import importlib import os import sys from ...backend import backend_name from ...utils import expand_as_pair def _load_backend(mod_name): mod = importlib.import_module(".%s" % mod_name, __name__) thismod = sys.modules[__name__] for api, obj in mod.__dict__.items(): setattr(thismod, api, obj) _load_backend(backend_name) ================================================ FILE: python/dgl/distributed/optim/mxnet/__init__.py ================================================ ================================================ FILE: python/dgl/distributed/optim/pytorch/__init__.py ================================================ """dgl distributed sparse optimizer for pytorch.""" from .sparse_optim import SparseAdagrad, SparseAdam ================================================ FILE: python/dgl/distributed/optim/pytorch/sparse_optim.py ================================================ """Node embedding optimizers for distributed training""" import abc import warnings from abc import abstractmethod from os.path import exists import torch as th import dgl from .... import backend as F from ...dist_tensor import DistTensor from ...graph_partition_book import EDGE_PART_POLICY, NODE_PART_POLICY from ...nn.pytorch import DistEmbedding from .utils import alltoall, alltoallv EMB_STATES = "emb_states" WORLD_SIZE = "world_size" IDS = "ids" PARAMS = "params" STATES = "states" class DistSparseGradOptimizer(abc.ABC): r"""The abstract dist sparse optimizer. Note: dgl dist sparse optimizer only work with dgl.distributed.DistEmbedding Parameters ---------- params : list of DistEmbedding The list of DistEmbedding. lr : float The learning rate. """ def __init__(self, params, lr): self._params = params self._lr = lr self._rank = None self._world_size = None self._shared_cache = {} self._clean_grad = False self._opt_meta = {} self._state = {} ## collect all hyper parameters for save self._defaults = {} if th.distributed.is_initialized(): self._rank = th.distributed.get_rank() self._world_size = th.distributed.get_world_size() else: self._rank = 0 self._world_size = 1 def local_state_dict(self): """Return the state pertaining to current rank of the optimizer. Returns ------- dict Local state dict Example Dict of Adagrad Optimizer: .. code-block:: json { "params": { "_lr": 0.01, "_eps": "1e-8", "world_size": 2 }, "emb_states": { "emb_name1": { "ids": [0, 2, 4, 6 ,8 ,10], ## tensor, "emb_name1_sum": [0.1 , 0.2, 0.5, 0.1, 0.2] ## tensor, }, "emb_name2": { "ids": [0, 2, 4, 6 ,8 ,10], ## tensor, "emb_name2_sum": [0.3 , 0.2, 0.4, 0.5, 0.2] ## tensor, } } } :param json: json object See Also -------- load_local_state_dict """ local_state_dict = {} local_state_dict[EMB_STATES] = {} local_state_dict[PARAMS] = {WORLD_SIZE: self._world_size} for emb in self._params: trainers_per_machine = self._world_size // max( 1, dgl.distributed.get_num_machines() ) emb_state_dict = {} part_policy = ( emb.part_policy if emb.part_policy else emb.weight.part_policy ) idx = self._get_local_ids(part_policy) if trainers_per_machine > 1: kv_idx_split = (idx % trainers_per_machine).long() local_rank = self._rank % trainers_per_machine mask = kv_idx_split == local_rank idx = F.boolean_mask(idx, mask) emb_state_dict.update({IDS: idx}) emb_state = {} states = ( list(self._state[emb.name]) if isinstance(self._state[emb.name], tuple) else [self._state[emb.name]] ) emb_state = {state.name: state[idx] for state in states} emb_state_dict.update({STATES: emb_state}) local_state_dict[EMB_STATES].update({emb.name: emb_state_dict}) local_state_dict[PARAMS].update(self._defaults) return local_state_dict def load_local_state_dict(self, local_state_dict): """Load the local state from the input state_dict, updating the optimizer as needed. Parameters ---------- local_state_dict : dict Optimizer state; should be an object returned from a call to local_state_dict(). See Also -------- local_state_dict """ for emb_name, emb_state in local_state_dict[EMB_STATES].items(): idx = emb_state[IDS] # As state of an embedding of different optimizers can be a single # DistTensor(Adagrad) or a tuple(Adam) of that, converting it to list for # consistency. The list contains reference(s) to original DistTensor(s). states = ( list(self._state[emb_name]) if isinstance(self._state[emb_name], tuple) else [self._state[emb_name]] ) if len(emb_state[STATES]) != len(states): raise ValueError( f"loaded state dict has a different number of states" f" of embedding {emb_name}" ) name_to_index = { state.name: index for index, state in enumerate(states) } for name, state in emb_state[STATES].items(): if name not in name_to_index: raise ValueError( "loaded state dict contains a state {name}" "that can't be found in the optimizer states" ) state_idx = name_to_index[name] state = state.to( th.device("cpu"), states[name_to_index[name]].dtype ) states[state_idx][idx] = state self._defaults.update(local_state_dict[PARAMS]) self.__dict__.update(local_state_dict[PARAMS]) def save(self, f): """Save the local state_dict to disk on per rank. Saved dict contains 2 parts: * 'params': hyper parameters of the optimizer. * 'emb_states': partial optimizer states, each embedding contains 2 items: 1. ```ids```: global id of the nodes/edges stored in this rank. 2. ```states```: state data corrseponding to ```ids```. NOTE: This needs to be called on all ranks. Parameters ---------- f : Union[str, os.PathLike] The path of the file to save to. See Also -------- load """ if self._world_size > 1: th.distributed.barrier() f = f if isinstance(f, str) else str(f, "UTF-8") f = f"{f}_{self._rank}" th.save(self.local_state_dict(), f) if self._world_size > 1: th.distributed.barrier() def load(self, f): """Load the local state of the optimizer from the file on per rank. NOTE: This needs to be called on all ranks. Parameters ---------- f : Union[str, os.PathLike] The path of the file to load from. See Also -------- save """ if self._world_size > 1: th.distributed.barrier() f = f if isinstance(f, str) else str(f, "UTF-8") f_attach_rank = f"{f}_{self._rank}" # Don't throw error here to support device number scale-out # after reloading, but make sure your hyper parameter is same # as before because new added local optimizers will be filled # in nothing if not exists(f_attach_rank): warnings.warn(f"File {f_attach_rank} can't be found, load nothing.") else: old_world_size = self._load_state_from(f_attach_rank) # Device number scale-in if self._world_size < old_world_size: for rank in range( self._rank + self._world_size, old_world_size, self._world_size, ): self._load_state_from(f"{f}_{rank}") if self._world_size > 1: th.distributed.barrier() def _load_state_from(self, f): local_state_dict = th.load(f) world_size = local_state_dict[PARAMS].pop(WORLD_SIZE) self.load_local_state_dict(local_state_dict) return world_size def _get_local_ids(self, part_policy): if EDGE_PART_POLICY in part_policy.policy_str: return part_policy.partition_book.partid2eids( part_policy.part_id, part_policy.type_name ) elif NODE_PART_POLICY in part_policy.policy_str: return part_policy._partition_book.partid2nids( part_policy.part_id, part_policy.type_name ) else: raise RuntimeError( "Cannot support policy: %s " % part_policy.policy_str ) def step(self): """The step function. The step function is invoked at the end of every batch to push the gradients of the embeddings involved in a mini-batch to DGL's servers and update the embeddings. """ with th.no_grad(): # [Rui] # As `gloo` supports CPU tensors only while `nccl` supports GPU # tensors only, we firstly create tensors on the corresponding # devices and then copy the data to target device if needed. # Please note that the target device can be different from the # preferred device. target_device = None preferred_device = ( th.device(f"cuda:{self._rank}") if th.distributed.get_backend() == "nccl" else th.device("cpu") ) local_indics = {emb.name: [] for emb in self._params} local_grads = {emb.name: [] for emb in self._params} for emb in self._params: name = emb.weight.name kvstore = emb.weight.kvstore trainers_per_server = self._world_size // kvstore.num_servers idics = [] grads = [] for trace in emb._trace: if trace[1].grad is not None: idics.append(trace[0]) grads.append(trace[1].grad.data) else: assert len(trace[0]) == 0 # If the sparse embedding is not used in the previous forward step # The idx and grad will be empty, initialize them as empty tensors to # avoid crashing the optimizer step logic. # # Note: we cannot skip the gradient exchange and update steps as other # working processes may send gradient update requests corresponding # to certain embedding to this process. # # [WARNING][TODO][Rui] # For empty idx and grad, we blindly create data on the # preferred device, which may not be the device where the # embedding is stored. idics = ( th.cat(idics, dim=0) if len(idics) != 0 else th.zeros((0,), dtype=th.int64, device=preferred_device) ) grads = ( th.cat(grads, dim=0) if len(grads) != 0 else th.zeros( (0, emb.embedding_dim), dtype=th.float32, device=preferred_device, ) ) target_device = grads.device # will send grad to each corresponding trainer if self._world_size > 1: # get idx split from kvstore idx_split = kvstore.get_partid(emb.data_name, idics) idx_split_size = [] idics_list = [] grad_list = [] # split idx and grad first for i in range(kvstore.num_servers): mask = idx_split == i idx_i = idics[mask] grad_i = grads[mask] if trainers_per_server <= 1: idx_split_size.append( th.tensor( [idx_i.shape[0]], dtype=th.int64, device=preferred_device, ) ) idics_list.append(idx_i) grad_list.append(grad_i) else: kv_idx_split = th.remainder( idx_i, trainers_per_server ).long() for j in range(trainers_per_server): mask = kv_idx_split == j idx_j = idx_i[mask] grad_j = grad_i[mask] idx_split_size.append( th.tensor( [idx_j.shape[0]], dtype=th.int64, device=preferred_device, ) ) idics_list.append(idx_j) grad_list.append(grad_j) # if one machine launch multiple KVServer, they share the same storage. # For each machine, the pytorch rank is num_trainers * # machine_id + i # use scatter to sync across trainers about the p2p tensor size # Note: If we have GPU nccl support, we can use all_to_all to # sync information here gather_list = list( th.empty( [self._world_size], dtype=th.int64, device=preferred_device, ).chunk(self._world_size) ) alltoall( self._rank, self._world_size, gather_list, idx_split_size, ) idx_gather_list = [ th.empty( (int(num_emb),), dtype=idics.dtype, device=preferred_device, ) for num_emb in gather_list ] alltoallv( self._rank, self._world_size, idx_gather_list, idics_list, ) local_indics[name] = idx_gather_list grad_gather_list = [ th.empty( (int(num_emb), grads.shape[1]), dtype=grads.dtype, device=preferred_device, ) for num_emb in gather_list ] alltoallv( self._rank, self._world_size, grad_gather_list, grad_list, ) local_grads[name] = grad_gather_list else: local_indics[name] = [idics] local_grads[name] = [grads] if self._clean_grad: # clean gradient track for emb in self._params: emb.reset_trace() self._clean_grad = False # do local update for emb in self._params: name = emb.weight.name idx = th.cat(local_indics[name], dim=0) grad = th.cat(local_grads[name], dim=0) self.update( idx.to(target_device, non_blocking=True), grad.to(target_device, non_blocking=True), emb, ) # synchronized gradient update if self._world_size > 1: th.distributed.barrier() @abstractmethod def update(self, idx, grad, emb): """Update embeddings in a sparse manner Sparse embeddings are updated in mini batches. We maintain gradient states for each embedding so they can be updated separately. Parameters ---------- idx : tensor Index of the embeddings to be updated. grad : tensor Gradient of each embedding. emb : dgl.distributed.DistEmbedding Sparse node embedding to update. """ def zero_grad(self): """clean grad cache""" self._clean_grad = True def initializer(shape, dtype): """Sparse optimizer state initializer Parameters ---------- shape : tuple of ints The shape of the state tensor dtype : torch dtype The data type of the state tensor """ arr = th.zeros(shape, dtype=dtype) return arr class SparseAdagrad(DistSparseGradOptimizer): r"""Distributed Node embedding optimizer using the Adagrad algorithm. This optimizer implements a distributed sparse version of Adagrad algorithm for optimizing :class:`dgl.distributed.DistEmbedding`. Being sparse means it only updates the embeddings whose gradients have updates, which are usually a very small portion of the total embeddings. Adagrad maintains a :math:`G_{t,i,j}` for every parameter in the embeddings, where :math:`G_{t,i,j}=G_{t-1,i,j} + g_{t,i,j}^2` and :math:`g_{t,i,j}` is the gradient of the dimension :math:`j` of embedding :math:`i` at step :math:`t`. NOTE: The support of sparse Adagrad optimizer is experimental. Parameters ---------- params : list[dgl.distributed.DistEmbedding] The list of dgl.distributed.DistEmbedding. lr : float The learning rate. eps : float, Optional The term added to the denominator to improve numerical stability Default: 1e-10 """ def __init__(self, params, lr, eps=1e-10): super(SparseAdagrad, self).__init__(params, lr) self._eps = eps self._defaults = {"_lr": lr, "_eps": eps} # We need to register a state sum for each embedding in the kvstore. for emb in params: assert isinstance( emb, DistEmbedding ), "SparseAdagrad only supports dgl.distributed.DistEmbedding" name = emb.name + "_sum" state = DistTensor( (emb.num_embeddings, emb.embedding_dim), th.float32, name, init_func=initializer, part_policy=emb.part_policy, is_gdata=False, ) assert ( emb.name not in self._state ), "{} already registered in the optimizer".format(emb.name) self._state[emb.name] = state def update(self, idx, grad, emb): """Update embeddings in a sparse manner Sparse embeddings are updated in mini batches. We maintain gradient states for each embedding so they can be updated separately. Parameters ---------- idx : tensor Index of the embeddings to be updated. grad : tensor Gradient of each embedding. emb : dgl.distributed.DistEmbedding Sparse embedding to update. """ eps = self._eps clr = self._lr state_dev = th.device("cpu") exec_dev = grad.device # only perform async copies cpu -> gpu, or gpu-> gpu, but block # when copying to the cpu, so as to ensure the copy is finished # before operating on the data on the cpu state_block = state_dev == th.device("cpu") and exec_dev != state_dev # the update is non-linear so indices must be unique grad_indices, inverse, cnt = th.unique( idx, return_inverse=True, return_counts=True ) grad_values = th.zeros( (grad_indices.shape[0], grad.shape[1]), device=exec_dev ) grad_values.index_add_(0, inverse, grad) grad_values = grad_values / cnt.unsqueeze(1) grad_sum = grad_values * grad_values # update grad state grad_state = self._state[emb.name][grad_indices].to(exec_dev) grad_state += grad_sum grad_state_dst = grad_state.to(state_dev, non_blocking=True) if state_block: # use events to try and overlap CPU and GPU as much as possible update_event = th.cuda.Event() update_event.record() # update emb std_values = grad_state.sqrt_().add_(eps) tmp = clr * grad_values / std_values tmp_dst = tmp.to(state_dev, non_blocking=True) if state_block: std_event = th.cuda.Event() std_event.record() # wait for our transfers from exec_dev to state_dev to finish # before we can use them update_event.wait() self._state[emb.name][grad_indices] = grad_state_dst if state_block: # wait for the transfer of std_values to finish before we # can use it std_event.wait() emb._tensor[grad_indices] -= tmp_dst class SparseAdam(DistSparseGradOptimizer): r"""Distributed Node embedding optimizer using the Adam algorithm. This optimizer implements a distributed sparse version of Adam algorithm for optimizing :class:`dgl.distributed.DistEmbedding`. Being sparse means it only updates the embeddings whose gradients have updates, which are usually a very small portion of the total embeddings. Adam maintains a :math:`Gm_{t,i,j}` and `Gp_{t,i,j}` for every parameter in the embeddings, where :math:`Gm_{t,i,j}=beta1 * Gm_{t-1,i,j} + (1-beta1) * g_{t,i,j}`, :math:`Gp_{t,i,j}=beta2 * Gp_{t-1,i,j} + (1-beta2) * g_{t,i,j}^2`, :math:`g_{t,i,j} = lr * Gm_{t,i,j} / (1 - beta1^t) / \sqrt{Gp_{t,i,j} / (1 - beta2^t)}` and :math:`g_{t,i,j}` is the gradient of the dimension :math:`j` of embedding :math:`i` at step :math:`t`. NOTE: The support of sparse Adam optimizer is experimental. Parameters ---------- params : list[dgl.distributed.DistEmbedding] The list of dgl.distributed.DistEmbedding. lr : float The learning rate. betas : tuple[float, float], Optional Coefficients used for computing running averages of gradient and its square. Default: (0.9, 0.999) eps : float, Optional The term added to the denominator to improve numerical stability Default: 1e-8 """ def __init__(self, params, lr, betas=(0.9, 0.999), eps=1e-08): super(SparseAdam, self).__init__(params, lr) self._eps = eps # We need to register a state sum for each embedding in the kvstore. self._beta1 = betas[0] self._beta2 = betas[1] self._defaults = { "_lr": lr, "_eps": eps, "_beta1": betas[0], "_beta2": betas[1], } for emb in params: assert isinstance( emb, DistEmbedding ), "SparseAdam only supports dgl.distributed.DistEmbedding" state_step = DistTensor( (emb.num_embeddings,), th.float32, emb.name + "_step", init_func=initializer, part_policy=emb.part_policy, is_gdata=False, ) state_mem = DistTensor( (emb.num_embeddings, emb.embedding_dim), th.float32, emb.name + "_mem", init_func=initializer, part_policy=emb.part_policy, is_gdata=False, ) state_power = DistTensor( (emb.num_embeddings, emb.embedding_dim), th.float32, emb.name + "_power", init_func=initializer, part_policy=emb.part_policy, is_gdata=False, ) state = (state_step, state_mem, state_power) assert ( emb.name not in self._state ), "{} already registered in the optimizer".format(emb.name) self._state[emb.name] = state def update(self, idx, grad, emb): """Update embeddings in a sparse manner Sparse embeddings are updated in mini batches. We maintain gradient states for each embedding so they can be updated separately. Parameters ---------- idx : tensor Index of the embeddings to be updated. grad : tensor Gradient of each embedding. emb : dgl.distributed.DistEmbedding Sparse embedding to update. """ beta1 = self._beta1 beta2 = self._beta2 eps = self._eps clr = self._lr state_step, state_mem, state_power = self._state[emb.name] state_dev = th.device("cpu") exec_dev = grad.device # only perform async copies cpu -> gpu, or gpu-> gpu, but block # when copying to the cpu, so as to ensure the copy is finished # before operating on the data on the cpu state_block = state_dev == th.device("cpu") and exec_dev != state_dev # the update is non-linear so indices must be unique grad_indices, inverse, cnt = th.unique( idx, return_inverse=True, return_counts=True ) # update grad state state_idx = grad_indices.to(state_dev) # The original implementation will cause read/write contension. # state_step[state_idx] += 1 # state_step = state_step[state_idx].to(exec_dev, non_blocking=True) # In a distributed environment, the first line of code will send write requests to # kvstore servers to update the state_step which is asynchronous and the second line # of code will also send read requests to kvstore servers. The write and read requests # may be handled by different kvstore servers managing the same portion of the # state_step dist tensor in the same node. So that, the read request may read an old # value (i.e., 0 in the first iteration) which will cause # update_power_corr to be NaN state_val = state_step[state_idx] + 1 state_step[state_idx] = state_val state_step = state_val.to(exec_dev) orig_mem = state_mem[state_idx].to(exec_dev) orig_power = state_power[state_idx].to(exec_dev) grad_values = th.zeros( (grad_indices.shape[0], grad.shape[1]), device=exec_dev ) grad_values.index_add_(0, inverse, grad) grad_values = grad_values / cnt.unsqueeze(1) grad_mem = grad_values grad_power = grad_values * grad_values update_mem = beta1 * orig_mem + (1.0 - beta1) * grad_mem update_power = beta2 * orig_power + (1.0 - beta2) * grad_power update_mem_dst = update_mem.to(state_dev, non_blocking=True) update_power_dst = update_power.to(state_dev, non_blocking=True) if state_block: # use events to try and overlap CPU and GPU as much as possible update_event = th.cuda.Event() update_event.record() update_mem_corr = update_mem / ( 1.0 - th.pow(th.tensor(beta1, device=exec_dev), state_step) ).unsqueeze(1) update_power_corr = update_power / ( 1.0 - th.pow(th.tensor(beta2, device=exec_dev), state_step) ).unsqueeze(1) std_values = clr * update_mem_corr / (th.sqrt(update_power_corr) + eps) std_values_dst = std_values.to(state_dev, non_blocking=True) if state_block: std_event = th.cuda.Event() std_event.record() # wait for our transfers from exec_dev to state_dev to finish # before we can use them update_event.wait() state_mem[state_idx] = update_mem_dst state_power[state_idx] = update_power_dst if state_block: # wait for the transfer of std_values to finish before we # can use it std_event.wait() emb._tensor[state_idx] -= std_values_dst ================================================ FILE: python/dgl/distributed/optim/pytorch/utils.py ================================================ """Provide utils for distributed sparse optimizers """ import torch as th import torch.distributed as dist def alltoall_cpu(rank, world_size, output_tensor_list, input_tensor_list): """Each process scatters list of input tensors to all processes in a cluster and return gathered list of tensors in output list. The tensors should have the same shape. Parameters ---------- rank : int The rank of current worker world_size : int The size of the entire communicator output_tensor_list : List of tensor The received tensors input_tensor_list : List of tensor The tensors to exchange """ input_tensor_list = [ tensor.to(th.device("cpu")) for tensor in input_tensor_list ] for i in range(world_size): dist.scatter( output_tensor_list[i], input_tensor_list if i == rank else [], src=i ) def alltoallv_cpu(rank, world_size, output_tensor_list, input_tensor_list): """Each process scatters list of input tensors to all processes in a cluster and return gathered list of tensors in output list. Parameters ---------- rank : int The rank of current worker world_size : int The size of the entire communicator output_tensor_list : List of tensor The received tensors input_tensor_list : List of tensor The tensors to exchange """ # send tensor to each target trainer using torch.distributed.isend # isend is async senders = [] for i in range(world_size): if i == rank: output_tensor_list[i] = input_tensor_list[i].to(th.device("cpu")) else: sender = dist.isend( input_tensor_list[i].to(th.device("cpu")), dst=i ) senders.append(sender) for i in range(world_size): if i != rank: dist.recv(output_tensor_list[i], src=i) th.distributed.barrier() def alltoall(rank, world_size, output_tensor_list, input_tensor_list): """Each process scatters list of input tensors to all processes in a cluster and return gathered list of tensors in output list. The tensors should have the same shape. Parameters ---------- rank : int The rank of current worker world_size : int The size of the entire communicator output_tensor_list : List of tensor The received tensors input_tensor_list : List of tensor The tensors to exchange """ if th.distributed.get_backend() == "nccl": th.distributed.all_to_all(output_tensor_list, input_tensor_list) else: alltoall_cpu( rank, world_size, output_tensor_list, input_tensor_list, ) def alltoallv(rank, world_size, output_tensor_list, input_tensor_list): """Each process scatters list of input tensors to all processes in a cluster and return gathered list of tensors in output list. Parameters ---------- rank : int The rank of current worker world_size : int The size of the entire communicator output_tensor_list : List of tensor The received tensors input_tensor_list : List of tensor The tensors to exchange """ if th.distributed.get_backend() == "nccl": th.distributed.all_to_all(output_tensor_list, input_tensor_list) else: alltoallv_cpu( rank, world_size, output_tensor_list, input_tensor_list, ) ================================================ FILE: python/dgl/distributed/optim/tensorflow/__init__.py ================================================ ================================================ FILE: python/dgl/distributed/partition.py ================================================ """Functions for partitions. """ import concurrent import concurrent.futures import copy import json import logging import multiprocessing as mp import os import time from functools import partial import numpy as np import torch from .. import backend as F, graphbolt as gb from ..base import dgl_warning, DGLError, EID, ETYPE, NID, NTYPE from ..convert import heterograph, to_homogeneous from ..data.utils import load_graphs, load_tensors, save_graphs, save_tensors from ..partition import ( get_peak_mem, metis_partition_assignment, partition_graph_with_halo, ) from ..random import choice as random_choice from ..transforms import sort_csc_by_tag, sort_csr_by_tag from .constants import DEFAULT_ETYPE, DEFAULT_NTYPE, DGL2GB_EID, GB_DST_ID from .graph_partition_book import ( _etype_str_to_tuple, _etype_tuple_to_str, RangePartitionBook, ) RESERVED_FIELD_DTYPE = { "inner_node": ( F.uint8 ), # A flag indicates whether the node is inside a partition. "inner_edge": ( F.uint8 ), # A flag indicates whether the edge is inside a partition. NID: F.int64, EID: F.int64, NTYPE: F.int16, # `sort_csr_by_tag` and `sort_csc_by_tag` works on int32/64 only. ETYPE: F.int32, } def _format_part_metadata(part_metadata, formatter): """Format etypes with specified formatter.""" for key in ["edge_map", "etypes"]: if key not in part_metadata: continue orig_data = part_metadata[key] if not isinstance(orig_data, dict): continue new_data = {} for etype, data in orig_data.items(): etype = formatter(etype) new_data[etype] = data part_metadata[key] = new_data return part_metadata def _load_part_config(part_config): """Load part config and format.""" try: with open(part_config) as f: part_metadata = _format_part_metadata( json.load(f), _etype_str_to_tuple ) except AssertionError as e: raise DGLError( f"Failed to load partition config due to {e}. " "Probably caused by outdated config. If so, please refer to " "https://github.com/dmlc/dgl/tree/master/tools#change-edge-" "type-to-canonical-edge-type-for-partition-configuration-json" ) return part_metadata def _dump_part_config(part_config, part_metadata): """Format and dump part config.""" part_metadata = _format_part_metadata(part_metadata, _etype_tuple_to_str) with open(part_config, "w") as outfile: json.dump(part_metadata, outfile, sort_keys=False, indent=4) def process_partitions(g, formats=None, sort_etypes=False): """Preprocess partitions before saving: 1. format data types. 2. sort csc/csr by tag. """ for k, dtype in RESERVED_FIELD_DTYPE.items(): if k in g.ndata: g.ndata[k] = F.astype(g.ndata[k], dtype) if k in g.edata: g.edata[k] = F.astype(g.edata[k], dtype) if (sort_etypes) and (formats is not None): if "csr" in formats: g = sort_csr_by_tag(g, tag=g.edata[ETYPE], tag_type="edge") if "csc" in formats: g = sort_csc_by_tag(g, tag=g.edata[ETYPE], tag_type="edge") return g def _save_dgl_graphs(filename, g_list, formats=None): save_graphs(filename, g_list, formats=formats) def _get_inner_node_mask(graph, ntype_id, gpb=None): ndata = ( graph.node_attributes if isinstance(graph, gb.FusedCSCSamplingGraph) else graph.ndata ) assert "inner_node" in ndata, "'inner_node' is not in nodes' data" if NTYPE in ndata or gpb is not None: ntype = ( gpb.map_to_per_ntype(ndata[NID])[0] if gpb is not None else ndata[NTYPE] ) dtype = F.dtype(ndata["inner_node"]) return ndata["inner_node"] * F.astype(ntype == ntype_id, dtype) == 1 else: return ndata["inner_node"] == 1 def _get_inner_edge_mask( graph, etype_id, ): edata = ( graph.edge_attributes if isinstance(graph, gb.FusedCSCSamplingGraph) else graph.edata ) assert "inner_edge" in edata, "'inner_edge' is not in edges' data" etype = ( graph.type_per_edge if isinstance(graph, gb.FusedCSCSamplingGraph) else (graph.edata[ETYPE] if ETYPE in graph.edata else None) ) if etype is not None: dtype = F.dtype(edata["inner_edge"]) return edata["inner_edge"] * F.astype(etype == etype_id, dtype) == 1 else: return edata["inner_edge"] == 1 def _get_part_ranges(id_ranges): res = {} for key in id_ranges: # Normally, each element has two values that represent the starting ID and the ending ID # of the ID range in a partition. # If not, the data is probably still in the old format, in which only the ending ID is # stored. We need to convert it to the format we expect. if not isinstance(id_ranges[key][0], list): start = 0 for i, end in enumerate(id_ranges[key]): id_ranges[key][i] = [start, end] start = end res[key] = np.concatenate( [np.array(l) for l in id_ranges[key]] ).reshape(-1, 2) return res def _verify_dgl_partition(graph, part_id, gpb, ntypes, etypes): """Verify the partition of a DGL graph.""" assert ( NID in graph.ndata ), "the partition graph should contain node mapping to global node ID" assert ( EID in graph.edata ), "the partition graph should contain edge mapping to global edge ID" for ntype in ntypes: ntype_id = ntypes[ntype] # graph.ndata[NID] are global homogeneous node IDs. nids = F.boolean_mask( graph.ndata[NID], _get_inner_node_mask(graph, ntype_id) ) partids1 = gpb.nid2partid(nids) _, per_type_nids = gpb.map_to_per_ntype(nids) partids2 = gpb.nid2partid(per_type_nids, ntype) assert np.all(F.asnumpy(partids1 == part_id)), ( "Unexpected partition IDs are found in the loaded partition " "while querying via global homogeneous node IDs." ) assert np.all(F.asnumpy(partids2 == part_id)), ( "Unexpected partition IDs are found in the loaded partition " "while querying via type-wise node IDs." ) for etype in etypes: etype_id = etypes[etype] # graph.edata[EID] are global homogeneous edge IDs. eids = F.boolean_mask( graph.edata[EID], _get_inner_edge_mask(graph, etype_id) ) partids1 = gpb.eid2partid(eids) _, per_type_eids = gpb.map_to_per_etype(eids) partids2 = gpb.eid2partid(per_type_eids, etype) assert np.all(F.asnumpy(partids1 == part_id)), ( "Unexpected partition IDs are found in the loaded partition " "while querying via global homogeneous edge IDs." ) assert np.all(F.asnumpy(partids2 == part_id)), ( "Unexpected partition IDs are found in the loaded partition " "while querying via type-wise edge IDs." ) def _verify_graphbolt_partition(graph, part_id, gpb, ntypes, etypes): """Verify the partition of a GraphBolt graph.""" required_ndata_fields = [NID] required_edata_fields = [EID] assert all( field in graph.node_attributes for field in required_ndata_fields ), "the partition graph should contain node mapping to global node ID." assert all( field in graph.edge_attributes for field in required_edata_fields ), "the partition graph should contain edge mapping to global edge ID." num_edges = graph.total_num_edges local_src_ids = graph.indices local_dst_ids = gb.expand_indptr( graph.csc_indptr, dtype=local_src_ids.dtype, output_size=num_edges ) global_src_ids = graph.node_attributes[NID][local_src_ids] global_dst_ids = graph.node_attributes[NID][local_dst_ids] etype_ids, type_wise_eids = gpb.map_to_per_etype(graph.edge_attributes[EID]) if graph.type_per_edge is not None: assert torch.equal(etype_ids, graph.type_per_edge) etype_ids, etype_ids_indices = torch.sort(etype_ids) global_src_ids = global_src_ids[etype_ids_indices] global_dst_ids = global_dst_ids[etype_ids_indices] type_wise_eids = type_wise_eids[etype_ids_indices] src_ntype_ids, src_type_wise_nids = gpb.map_to_per_ntype(global_src_ids) dst_ntype_ids, dst_type_wise_nids = gpb.map_to_per_ntype(global_dst_ids) data_dict = dict() edge_ids = dict() for c_etype, etype_id in etypes.items(): idx = etype_ids == etype_id src_ntype, etype, dst_ntype = c_etype if idx.sum() == 0: continue actual_src_ntype_ids = src_ntype_ids[idx] actual_dst_ntype_ids = dst_ntype_ids[idx] expected_src_ntype_ids = ntypes[src_ntype] expected_dst_ntype_ids = ntypes[dst_ntype] assert all(actual_src_ntype_ids == expected_src_ntype_ids), ( f"Unexpected types of source nodes for {c_etype}. Expected: " f"{expected_src_ntype_ids}, but got: {actual_src_ntype_ids}." ) assert all(actual_dst_ntype_ids == expected_dst_ntype_ids), ( f"Unexpected types of destination nodes for {c_etype}. Expected: " f"{expected_dst_ntype_ids}, but got: {actual_dst_ntype_ids}." ) data_dict[c_etype] = (src_type_wise_nids[idx], dst_type_wise_nids[idx]) edge_ids[c_etype] = type_wise_eids[idx] # Make sure node/edge IDs are not out of range. hg = heterograph( data_dict, {ntype: gpb._num_nodes(ntype) for ntype in ntypes} ) for etype in edge_ids: hg.edges[etype].data[EID] = edge_ids[etype] assert all( hg.num_edges(etype) == len(eids) for etype, eids in edge_ids.items() ), "The number of edges per etype in the partition graph is not correct." assert num_edges == hg.num_edges(), ( f"The total number of edges in the partition graph is not correct. " f"Expected: {num_edges}, but got: {hg.num_edges()}." ) print(f"Partition {part_id} looks good!") def load_partition(part_config, part_id, load_feats=True, use_graphbolt=False): """Load data of a partition from the data path. A partition data includes a graph structure of the partition, a dict of node tensors, a dict of edge tensors and some metadata. The partition may contain the HALO nodes, which are the nodes replicated from other partitions. However, the dict of node tensors only contains the node data that belongs to the local partition. Similarly, edge tensors only contains the edge data that belongs to the local partition. The metadata include the information of the global graph (not the local partition), which includes the number of nodes, the number of edges as well as the node assignment of the global graph. The function currently loads data through the local filesystem interface. Parameters ---------- part_config : str The path of the partition config file. part_id : int The partition ID. load_feats : bool, optional Whether to load node/edge feats. If False, the returned node/edge feature dictionaries will be empty. Default: True. use_graphbolt : bool, optional Whether to load GraphBolt partition. Default: False. Returns ------- DGLGraph The graph partition structure. Dict[str, Tensor] Node features. Dict[(str, str, str), Tensor] Edge features. GraphPartitionBook The graph partition information. str The graph name List[str] The node types List[(str, str, str)] The edge types """ config_path = os.path.dirname(part_config) relative_to_config = lambda path: os.path.join(config_path, path) with open(part_config) as conf_f: part_metadata = json.load(conf_f) assert ( "part-{}".format(part_id) in part_metadata ), "part-{} does not exist".format(part_id) part_files = part_metadata["part-{}".format(part_id)] exist_dgl_graph = exist_graphbolt_graph = False if os.path.exists(os.path.join(config_path, f"part{part_id}", "graph.dgl")): use_graphbolt = False exist_dgl_graph = True if os.path.exists( os.path.join( config_path, f"part{part_id}", "fused_csc_sampling_graph.pt" ) ): use_graphbolt = True exist_graphbolt_graph = True # Check if both DGL graph and GraphBolt graph exist or not exist. Make sure only one exists. if not exist_dgl_graph and not exist_graphbolt_graph: raise ValueError("The graph object doesn't exist.") if exist_dgl_graph and exist_graphbolt_graph: raise ValueError( "Both DGL graph and GraphBolt graph exist. Please remove one." ) if use_graphbolt: part_graph_field = "part_graph_graphbolt" else: part_graph_field = "part_graph" assert ( part_graph_field in part_files ), f"the partition does not contain graph structure: {part_graph_field}" partition_path = relative_to_config(part_files[part_graph_field]) logging.info( "Start to load partition from %s which is " "%d bytes. It may take non-trivial " "time for large partition.", partition_path, os.path.getsize(partition_path), ) graph = ( torch.load(partition_path, weights_only=False) if use_graphbolt else load_graphs(partition_path)[0][0] ) logging.info("Finished loading partition from %s.", partition_path) gpb, graph_name, ntypes, etypes = load_partition_book(part_config, part_id) ntypes_list = list(ntypes.keys()) etypes_list = list(etypes.keys()) if "DGL_DIST_DEBUG" in os.environ: _verify_func = ( _verify_graphbolt_partition if use_graphbolt else _verify_dgl_partition ) _verify_func(graph, part_id, gpb, ntypes, etypes) node_feats = {} edge_feats = {} if load_feats: node_feats, edge_feats = load_partition_feats(part_config, part_id) return ( graph, node_feats, edge_feats, gpb, graph_name, ntypes_list, etypes_list, ) def load_partition_feats( part_config, part_id, load_nodes=True, load_edges=True ): """Load node/edge feature data from a partition. Parameters ---------- part_config : str The path of the partition config file. part_id : int The partition ID. load_nodes : bool, optional Whether to load node features. If ``False``, ``None`` is returned. load_edges : bool, optional Whether to load edge features. If ``False``, ``None`` is returned. Returns ------- Dict[str, Tensor] or None Node features. Dict[str, Tensor] or None Edge features. """ config_path = os.path.dirname(part_config) relative_to_config = lambda path: os.path.join(config_path, path) with open(part_config) as conf_f: part_metadata = json.load(conf_f) assert ( "part-{}".format(part_id) in part_metadata ), "part-{} does not exist".format(part_id) part_files = part_metadata["part-{}".format(part_id)] assert ( "node_feats" in part_files ), "the partition does not contain node features." assert ( "edge_feats" in part_files ), "the partition does not contain edge feature." node_feats = None if load_nodes: feat_path = relative_to_config(part_files["node_feats"]) logging.debug( "Start to load node data from %s which is " "%d bytes.", feat_path, os.path.getsize(feat_path), ) node_feats = load_tensors(feat_path) logging.info("Finished loading node data.") edge_feats = None if load_edges: feat_path = relative_to_config(part_files["edge_feats"]) logging.debug( "Start to load edge data from %s which is " "%d bytes.", feat_path, os.path.getsize(feat_path), ) edge_feats = load_tensors(feat_path) logging.info("Finished loading edge data.") # In the old format, the feature name doesn't contain node/edge type. # For compatibility, let's add node/edge types to the feature names. if node_feats is not None: new_feats = {} for name in node_feats: feat = node_feats[name] if name.find("/") == -1: name = DEFAULT_NTYPE + "/" + name new_feats[name] = feat node_feats = new_feats if edge_feats is not None: new_feats = {} for name in edge_feats: feat = edge_feats[name] if name.find("/") == -1: name = _etype_tuple_to_str(DEFAULT_ETYPE) + "/" + name new_feats[name] = feat edge_feats = new_feats return node_feats, edge_feats def load_partition_book(part_config, part_id, part_metadata=None): """Load a graph partition book from the partition config file. Parameters ---------- part_config : str The path of the partition config file. part_id : int The partition ID. part_metadata : dict The meta data of partition. Returns ------- GraphPartitionBook The global partition information. str The graph name dict The node types dict The edge types """ if part_metadata is None: part_metadata = _load_part_config(part_config) assert "num_parts" in part_metadata, "num_parts does not exist." assert ( part_metadata["num_parts"] > part_id ), "part {} is out of range (#parts: {})".format( part_id, part_metadata["num_parts"] ) num_parts = part_metadata["num_parts"] assert ( "num_nodes" in part_metadata ), "cannot get the number of nodes of the global graph." assert ( "num_edges" in part_metadata ), "cannot get the number of edges of the global graph." assert "node_map" in part_metadata, "cannot get the node map." assert "edge_map" in part_metadata, "cannot get the edge map." assert "graph_name" in part_metadata, "cannot get the graph name" # If this is a range partitioning, node_map actually stores a list, whose elements # indicate the boundary of range partitioning. Otherwise, node_map stores a filename # that contains node map in a NumPy array. node_map = part_metadata["node_map"] edge_map = part_metadata["edge_map"] if isinstance(node_map, dict): for key in node_map: is_range_part = isinstance(node_map[key], list) break elif isinstance(node_map, list): is_range_part = True node_map = {DEFAULT_NTYPE: node_map} else: is_range_part = False if isinstance(edge_map, list): edge_map = {DEFAULT_ETYPE: edge_map} ntypes = {DEFAULT_NTYPE: 0} etypes = {DEFAULT_ETYPE: 0} if "ntypes" in part_metadata: ntypes = part_metadata["ntypes"] if "etypes" in part_metadata: etypes = part_metadata["etypes"] if isinstance(node_map, dict): for key in node_map: assert key in ntypes, "The node type {} is invalid".format(key) if isinstance(edge_map, dict): for key in edge_map: assert key in etypes, "The edge type {} is invalid".format(key) if not is_range_part: raise TypeError("Only RangePartitionBook is supported currently.") node_map = _get_part_ranges(node_map) edge_map = _get_part_ranges(edge_map) # Format dtype of node/edge map if dtype is specified. def _format_node_edge_map(part_metadata, map_type, data): key = f"{map_type}_map_dtype" if key not in part_metadata: return data dtype = part_metadata[key] assert dtype in ["int32", "int64"], ( f"The {map_type} map dtype should be either int32 or int64, " f"but got {dtype}." ) for key in data: data[key] = data[key].astype(dtype) return data node_map = _format_node_edge_map(part_metadata, "node", node_map) edge_map = _format_node_edge_map(part_metadata, "edge", edge_map) # Sort the node/edge maps by the node/edge type ID. node_map = dict(sorted(node_map.items(), key=lambda x: ntypes[x[0]])) edge_map = dict(sorted(edge_map.items(), key=lambda x: etypes[x[0]])) def _assert_is_sorted(id_map): id_ranges = np.array(list(id_map.values())) ids = [] for i in range(num_parts): ids.append(id_ranges[:, i, :]) ids = np.array(ids).flatten() assert np.all( ids[:-1] <= ids[1:] ), f"The node/edge map is not sorted: {ids}" _assert_is_sorted(node_map) _assert_is_sorted(edge_map) return ( RangePartitionBook( part_id, num_parts, node_map, edge_map, ntypes, etypes ), part_metadata["graph_name"], ntypes, etypes, ) def _get_orig_ids(g, sim_g, orig_nids, orig_eids): """Convert/construct the original node IDs and edge IDs. It handles multiple cases: * If the graph has been reshuffled and it's a homogeneous graph, we just return the original node IDs and edge IDs in the inputs. * If the graph has been reshuffled and it's a heterogeneous graph, we need to split the original node IDs and edge IDs in the inputs based on the node types and edge types. * If the graph is not shuffled, the original node IDs and edge IDs don't change. Parameters ---------- g : DGLGraph The input graph for partitioning. sim_g : DGLGraph The homogeneous version of the input graph. orig_nids : tensor or None The original node IDs after the input graph is reshuffled. orig_eids : tensor or None The original edge IDs after the input graph is reshuffled. Returns ------- tensor or dict of tensors, tensor or dict of tensors """ is_hetero = not g.is_homogeneous if is_hetero: # Get the type IDs orig_ntype = F.gather_row(sim_g.ndata[NTYPE], orig_nids) orig_etype = F.gather_row(sim_g.edata[ETYPE], orig_eids) # Mapping between shuffled global IDs to original per-type IDs orig_nids = F.gather_row(sim_g.ndata[NID], orig_nids) orig_eids = F.gather_row(sim_g.edata[EID], orig_eids) orig_nids = { ntype: F.boolean_mask( orig_nids, orig_ntype == g.get_ntype_id(ntype) ) for ntype in g.ntypes } orig_eids = { etype: F.boolean_mask( orig_eids, orig_etype == g.get_etype_id(etype) ) for etype in g.canonical_etypes } return orig_nids, orig_eids def _set_trainer_ids(g, sim_g, node_parts): """Set the trainer IDs for each node and edge on the input graph. The trainer IDs will be stored as node data and edge data in the input graph. Parameters ---------- g : DGLGraph The input graph for partitioning. sim_g : DGLGraph The homogeneous version of the input graph. node_parts : tensor The node partition ID for each node in `sim_g`. """ if g.is_homogeneous: g.ndata["trainer_id"] = node_parts # An edge is assigned to a partition based on its destination node. g.edata["trainer_id"] = F.gather_row(node_parts, g.edges()[1]) else: for ntype_id, ntype in enumerate(g.ntypes): type_idx = sim_g.ndata[NTYPE] == ntype_id orig_nid = F.boolean_mask(sim_g.ndata[NID], type_idx) trainer_id = F.zeros((len(orig_nid),), F.dtype(node_parts), F.cpu()) F.scatter_row_inplace( trainer_id, orig_nid, F.boolean_mask(node_parts, type_idx) ) g.nodes[ntype].data["trainer_id"] = trainer_id for c_etype in g.canonical_etypes: # An edge is assigned to a partition based on its destination node. _, _, dst_type = c_etype trainer_id = F.gather_row( g.nodes[dst_type].data["trainer_id"], g.edges(etype=c_etype)[1] ) g.edges[c_etype].data["trainer_id"] = trainer_id def _partition_to_graphbolt( parts, part_i, part_config, part_metadata, *, store_eids=True, store_inner_node=False, store_inner_edge=False, graph_formats=None, ): gpb, _, ntypes, etypes = load_partition_book( part_config=part_config, part_id=part_i, part_metadata=part_metadata ) graph = parts[part_i] csc_graph = _convert_dgl_partition_to_gb( ntypes=ntypes, etypes=etypes, gpb=gpb, part_meta=part_metadata, graph=graph, store_eids=store_eids, store_inner_edge=store_inner_edge, store_inner_node=store_inner_node, graph_formats=graph_formats, ) rel_path_result = _save_graph_gb( part_config=part_config, part_id=part_i, csc_graph=csc_graph ) part_metadata[f"part-{part_i}"]["part_graph_graphbolt"] = rel_path_result def _update_node_edge_map(node_map_val, edge_map_val, g, num_parts): """ If the original graph contains few nodes or edges for specific node/edge types, the partitioned graph may have empty partitions for these types. And the node_map_val and edge_map_val will have -1 for the start and end ID of these types. This function updates the node_map_val and edge_map_val to be contiguous. Example case: Suppose we have a heterogeneous graph with 3 node/edge types and the number of partitions is 3. A possible node_map_val or edge_map_val is as follows: | part_id\\Node/Edge Type| Type A | Type B | Type C | |------------------------|--------|---------|--------| | 0 | 0, 1 | -1, -1 | 2, 3 | | 1 | -1, -1 | 3, 4 | 4, 5 | | 2 | 5, 6 | 7, 8 | -1, -1| As node/edge IDs are contiguous in node/edge type for each partition, we can update the node_map_val and edge_map_val via updating the start and end ID in row-wise order. Updated node_map_val or edge_map_val: | part_id\\Node/Edge Type| Type A | Type B | Type C | |------------------------|--------|---------|--------| | 0 | 0, 1 | 1, 1 | 2, 3 | | 1 | 3, 3 | 3, 4 | 4, 5 | | 2 | 5, 6 | 7, 8 | 8, 8 | """ # Update the node_map_val to be contiguous. ntype_ids = {ntype: g.get_ntype_id(ntype) for ntype in g.ntypes} ntype_ids_reverse = {v: k for k, v in ntype_ids.items()} for part_id in range(num_parts): for ntype_id in list(ntype_ids.values()): ntype = ntype_ids_reverse[ntype_id] start_id = node_map_val[ntype][part_id][0] end_id = node_map_val[ntype][part_id][1] if not (start_id == -1 and end_id == -1): continue prev_ntype_id = ( ntype_ids[ntype] - 1 if ntype_ids[ntype] > 0 else max(ntype_ids.values()) ) prev_ntype = ntype_ids_reverse[prev_ntype_id] if ntype_ids[ntype] == 0: if part_id == 0: node_map_val[ntype][part_id][0] = 0 else: node_map_val[ntype][part_id][0] = node_map_val[prev_ntype][ part_id - 1 ][1] else: node_map_val[ntype][part_id][0] = node_map_val[prev_ntype][ part_id ][1] node_map_val[ntype][part_id][1] = node_map_val[ntype][part_id][0] # Update the edge_map_val to be contiguous. etype_ids = {etype: g.get_etype_id(etype) for etype in g.canonical_etypes} etype_ids_reverse = {v: k for k, v in etype_ids.items()} for part_id in range(num_parts): for etype_id in list(etype_ids.values()): etype = etype_ids_reverse[etype_id] start_id = edge_map_val[etype][part_id][0] end_id = edge_map_val[etype][part_id][1] if not (start_id == -1 and end_id == -1): continue prev_etype_id = ( etype_ids[etype] - 1 if etype_ids[etype] > 0 else max(etype_ids.values()) ) prev_etype = etype_ids_reverse[prev_etype_id] if etype_ids[etype] == 0: if part_id == 0: edge_map_val[etype][part_id][0] = 0 else: edge_map_val[etype][part_id][0] = edge_map_val[prev_etype][ part_id - 1 ][1] else: edge_map_val[etype][part_id][0] = edge_map_val[prev_etype][ part_id ][1] edge_map_val[etype][part_id][1] = edge_map_val[etype][part_id][0] def partition_graph( g, graph_name, num_parts, out_path, num_hops=1, part_method="metis", balance_ntypes=None, balance_edges=False, return_mapping=False, num_trainers_per_machine=1, objtype="cut", graph_formats=None, use_graphbolt=False, **kwargs, ): """Partition a graph for distributed training and store the partitions on files. The partitioning occurs in three steps: 1) run a partition algorithm (e.g., Metis) to assign nodes to partitions; 2) construct partition graph structure based on the node assignment; 3) split the node features and edge features based on the partition result. When a graph is partitioned, each partition can contain *HALO* nodes, which are assigned to other partitions but are included in this partition for efficiency purpose. In this document, *local nodes/edges* refers to the nodes and edges that truly belong to a partition. The rest are "HALO nodes/edges". The partitioned data is stored into multiple files organized as follows: .. code-block:: none data_root_dir/ |-- graph_name.json # partition configuration file in JSON |-- node_map.npy # partition id of each node stored in a numpy array (optional) |-- edge_map.npy # partition id of each edge stored in a numpy array (optional) |-- part0/ # data for partition 0 |-- node_feats.dgl # node features stored in binary format |-- edge_feats.dgl # edge features stored in binary format |-- graph.dgl # graph structure of this partition stored in binary format |-- part1/ # data for partition 1 |-- node_feats.dgl |-- edge_feats.dgl |-- graph.dgl First, the metadata of the original graph and the partitioning is stored in a JSON file named after ``graph_name``. This JSON file contains the information of the original graph as well as the path of the files that store each partition. Below show an example. .. code-block:: none { "graph_name" : "test", "part_method" : "metis", "num_parts" : 2, "halo_hops" : 1, "node_map": { "_N": [ [ 0, 1261310 ], [ 1261310, 2449029 ] ] }, "edge_map": { "_N:_E:_N": [ [ 0, 62539528 ], [ 62539528, 123718280 ] ] }, "etypes": { "_N:_E:_N": 0 }, "ntypes": { "_N": 0 }, "num_nodes" : 1000000, "num_edges" : 52000000, "part-0" : { "node_feats" : "data_root_dir/part0/node_feats.dgl", "edge_feats" : "data_root_dir/part0/edge_feats.dgl", "part_graph" : "data_root_dir/part0/graph.dgl", }, "part-1" : { "node_feats" : "data_root_dir/part1/node_feats.dgl", "edge_feats" : "data_root_dir/part1/edge_feats.dgl", "part_graph" : "data_root_dir/part1/graph.dgl", }, } Here are the definition of the fields in the partition configuration file: * ``graph_name`` is the name of the graph given by a user. * ``part_method`` is the method used to assign nodes to partitions. Currently, it supports "random" and "metis". * ``num_parts`` is the number of partitions. * ``halo_hops`` is the number of hops of nodes we include in a partition as HALO nodes. * ``node_map`` is the node assignment map, which tells the partition ID a node is assigned to. The format of ``node_map`` is described below. * ``edge_map`` is the edge assignment map, which tells the partition ID an edge is assigned to. * ``num_nodes`` is the number of nodes in the global graph. * ``num_edges`` is the number of edges in the global graph. * `part-*` stores the data of a partition. As node/edge IDs are reshuffled, ``node_map`` and ``edge_map`` contains the information for mapping between global node/edge IDs to partition-local node/edge IDs. For heterogeneous graphs, the information in ``node_map`` and ``edge_map`` can also be used to compute node types and edge types. The format of the data in ``node_map`` and ``edge_map`` is as follows: .. code-block:: none { "node_type": [ [ part1_start, part1_end ], [ part2_start, part2_end ], ... ], ... }, Essentially, ``node_map`` and ``edge_map`` are dictionaries. The keys are node etypes and canonical edge types respectively. The values are lists of pairs containing the start and end of the ID range for the corresponding types in a partition. The length of the list is the number of partitions; each element in the list is a tuple that stores the start and the end of an ID range for a particular node/edge type in the partition. The graph structure of a partition is stored in a file with the DGLGraph format. Nodes in each partition is *relabeled* to always start with zero. We call the node ID in the original graph, *global ID*, while the relabeled ID in each partition, *local ID*. Each partition graph has an integer node data tensor stored under name `dgl.NID` and each value is the node's global ID. Similarly, edges are relabeled too and the mapping from local ID to global ID is stored as an integer edge data tensor under name `dgl.EID`. For a heterogeneous graph, the DGLGraph also contains a node data `dgl.NTYPE` for node type and an edge data `dgl.ETYPE` for the edge type. The partition graph contains additional node data ("inner_node") and edge data ("inner_edge"): * "inner_node" indicates whether a node belongs to a partition. * "inner_edge" indicates whether an edge belongs to a partition. Node and edge features are splitted and stored together with each graph partition. All node/edge features in a partition are stored in a file with DGL format. The node/edge features are stored in dictionaries, in which the key is the node/edge data name and the value is a tensor. We do not store features of HALO nodes and edges. When performing Metis partitioning, we can put some constraint on the partitioning. Current, it supports two constrants to balance the partitioning. By default, Metis always tries to balance the number of nodes in each partition. * ``balance_ntypes`` balances the number of nodes of different types in each partition. * ``balance_edges`` balances the number of edges in each partition. To balance the node types, a user needs to pass a vector of N elements to indicate the type of each node. N is the number of nodes in the input graph. Parameters ---------- g : DGLGraph The input graph to partition graph_name : str The name of the graph. The name will be used to construct :py:meth:`~dgl.distributed.DistGraph`. num_parts : int The number of partitions out_path : str The path to store the files for all partitioned data. num_hops : int, optional The number of hops of HALO nodes we construct on a partition graph structure. The default value is 1. part_method : str, optional The partition method. It supports "random" and "metis". The default value is "metis". balance_ntypes : tensor, optional Node type of each node. This is a 1D-array of integers. Its values indicates the node type of each node. This argument is used by Metis partition. When the argument is specified, the Metis algorithm will try to partition the input graph into partitions where each partition has roughly the same number of nodes for each node type. The default value is None, which means Metis partitions the graph to only balance the number of nodes. balance_edges : bool Indicate whether to balance the edges in each partition. This argument is used by the Metis algorithm. return_mapping : bool Indicate whether to return the mapping between shuffled node/edge IDs and the original node/edge IDs. num_trainers_per_machine : int, optional The number of trainers per machine. If is not 1, the whole graph will be first partitioned to each trainer, that is num_parts*num_trainers_per_machine parts. And the trainer ids of each node will be stored in the node feature 'trainer_id'. Then the partitions of trainers on the same machine will be coalesced into one larger partition. The final number of partitions is `num_part`. objtype : str, "cut" or "vol" Set the objective as edge-cut minimization or communication volume minimization. This argument is used by the Metis algorithm. graph_formats : str or list[str] Save partitions in specified formats. It could be any combination of ``coo``, ``csc`` and ``csr``. If not specified, save one format only according to what format is available. If multiple formats are available, selection priority from high to low is ``coo``, ``csc``, ``csr``. use_graphbolt : bool, optional Whether to save partitions in GraphBolt format. Default: False. kwargs : dict Other keyword arguments for converting DGL partitions to GraphBolt. Returns ------- Tensor or dict of tensors, optional If `return_mapping=True`, return a 1D tensor that indicates the mapping between shuffled node IDs and the original node IDs for a homogeneous graph; return a dict of 1D tensors whose key is the node type and value is a 1D tensor mapping between shuffled node IDs and the original node IDs for each node type for a heterogeneous graph. Tensor or dict of tensors, optional If `return_mapping=True`, return a 1D tensor that indicates the mapping between shuffled edge IDs and the original edge IDs for a homogeneous graph; return a dict of 1D tensors whose key is the edge type and value is a 1D tensor mapping between shuffled edge IDs and the original edge IDs for each edge type for a heterogeneous graph. Examples -------- >>> dgl.distributed.partition_graph(g, 'test', 4, num_hops=1, part_method='metis', ... out_path='output/', ... balance_ntypes=g.ndata['train_mask'], ... balance_edges=True) >>> ( ... g, node_feats, edge_feats, gpb, graph_name, ntypes_list, etypes_list, ... ) = dgl.distributed.load_partition('output/test.json', 0) """ # 'coo' is required for partition assert "coo" in np.concatenate( list(g.formats().values()) ), "'coo' format should be allowed for partitioning graph." def get_homogeneous(g, balance_ntypes): if g.is_homogeneous: sim_g = to_homogeneous(g) if isinstance(balance_ntypes, dict): assert len(balance_ntypes) == 1 bal_ntypes = list(balance_ntypes.values())[0] else: bal_ntypes = balance_ntypes elif isinstance(balance_ntypes, dict): # Here we assign node types for load balancing. # The new node types includes the ones provided by users. num_ntypes = 0 for key in g.ntypes: if key in balance_ntypes: g.nodes[key].data["bal_ntype"] = ( F.astype(balance_ntypes[key], F.int32) + num_ntypes ) uniq_ntypes = F.unique(balance_ntypes[key]) assert np.all( F.asnumpy(uniq_ntypes) == np.arange(len(uniq_ntypes)) ) num_ntypes += len(uniq_ntypes) else: g.nodes[key].data["bal_ntype"] = ( F.ones((g.num_nodes(key),), F.int32, F.cpu()) * num_ntypes ) num_ntypes += 1 sim_g = to_homogeneous(g, ndata=["bal_ntype"]) bal_ntypes = sim_g.ndata["bal_ntype"] print( "The graph has {} node types and balance among {} types".format( len(g.ntypes), len(F.unique(bal_ntypes)) ) ) # We now no longer need them. for key in g.ntypes: del g.nodes[key].data["bal_ntype"] del sim_g.ndata["bal_ntype"] else: sim_g = to_homogeneous(g) bal_ntypes = sim_g.ndata[NTYPE] return sim_g, bal_ntypes if objtype not in ["cut", "vol"]: raise ValueError if num_parts == 1: start = time.time() sim_g, balance_ntypes = get_homogeneous(g, balance_ntypes) print( "Converting to homogeneous graph takes {:.3f}s, peak mem: {:.3f} GB".format( time.time() - start, get_peak_mem() ) ) assert num_trainers_per_machine >= 1 if num_trainers_per_machine > 1: # First partition the whole graph to each trainer and save the trainer ids in # the node feature "trainer_id". start = time.time() node_parts = metis_partition_assignment( sim_g, num_parts * num_trainers_per_machine, balance_ntypes=balance_ntypes, balance_edges=balance_edges, mode="k-way", ) _set_trainer_ids(g, sim_g, node_parts) print( "Assigning nodes to METIS partitions takes {:.3f}s, peak mem: {:.3f} GB".format( time.time() - start, get_peak_mem() ) ) node_parts = F.zeros((sim_g.num_nodes(),), F.int64, F.cpu()) parts = {0: sim_g.clone()} orig_nids = parts[0].ndata[NID] = F.arange(0, sim_g.num_nodes()) orig_eids = parts[0].edata[EID] = F.arange(0, sim_g.num_edges()) # For one partition, we don't really shuffle nodes and edges. We just need to simulate # it and set node data and edge data of orig_id. parts[0].ndata["orig_id"] = orig_nids parts[0].edata["orig_id"] = orig_eids if return_mapping: if g.is_homogeneous: orig_nids = F.arange(0, sim_g.num_nodes()) orig_eids = F.arange(0, sim_g.num_edges()) else: orig_nids = { ntype: F.arange(0, g.num_nodes(ntype)) for ntype in g.ntypes } orig_eids = { etype: F.arange(0, g.num_edges(etype)) for etype in g.canonical_etypes } parts[0].ndata["inner_node"] = F.ones( (sim_g.num_nodes(),), RESERVED_FIELD_DTYPE["inner_node"], F.cpu(), ) parts[0].edata["inner_edge"] = F.ones( (sim_g.num_edges(),), RESERVED_FIELD_DTYPE["inner_edge"], F.cpu(), ) elif part_method in ("metis", "random"): start = time.time() sim_g, balance_ntypes = get_homogeneous(g, balance_ntypes) print( "Converting to homogeneous graph takes {:.3f}s, peak mem: {:.3f} GB".format( time.time() - start, get_peak_mem() ) ) if part_method == "metis": assert num_trainers_per_machine >= 1 start = time.time() if num_trainers_per_machine > 1: # First partition the whole graph to each trainer and save the trainer ids in # the node feature "trainer_id". node_parts = metis_partition_assignment( sim_g, num_parts * num_trainers_per_machine, balance_ntypes=balance_ntypes, balance_edges=balance_edges, mode="k-way", objtype=objtype, ) _set_trainer_ids(g, sim_g, node_parts) # And then coalesce the partitions of trainers on the same machine into one # larger partition. node_parts = F.floor_div(node_parts, num_trainers_per_machine) else: node_parts = metis_partition_assignment( sim_g, num_parts, balance_ntypes=balance_ntypes, balance_edges=balance_edges, objtype=objtype, ) print( "Assigning nodes to METIS partitions takes {:.3f}s, peak mem: {:.3f} GB".format( time.time() - start, get_peak_mem() ) ) else: node_parts = random_choice(num_parts, sim_g.num_nodes()) start = time.time() parts, orig_nids, orig_eids = partition_graph_with_halo( sim_g, node_parts, num_hops, reshuffle=True ) print( "Splitting the graph into partitions takes {:.3f}s, peak mem: {:.3f} GB".format( time.time() - start, get_peak_mem() ) ) if return_mapping: orig_nids, orig_eids = _get_orig_ids(g, sim_g, orig_nids, orig_eids) else: raise Exception("Unknown partitioning method: " + part_method) # If the input is a heterogeneous graph, get the original node types and original node IDs. # `part' has three types of node data at this point. # NTYPE: the node type. # orig_id: the global node IDs in the homogeneous version of input graph. # NID: the global node IDs in the reshuffled homogeneous version of the input graph. if not g.is_homogeneous: for name in parts: orig_ids = parts[name].ndata["orig_id"] ntype = F.gather_row(sim_g.ndata[NTYPE], orig_ids) parts[name].ndata[NTYPE] = F.astype( ntype, RESERVED_FIELD_DTYPE[NTYPE] ) assert np.all( F.asnumpy(ntype) == F.asnumpy(parts[name].ndata[NTYPE]) ) # Get the original edge types and original edge IDs. orig_ids = parts[name].edata["orig_id"] etype = F.gather_row(sim_g.edata[ETYPE], orig_ids) parts[name].edata[ETYPE] = F.astype( etype, RESERVED_FIELD_DTYPE[ETYPE] ) assert np.all( F.asnumpy(etype) == F.asnumpy(parts[name].edata[ETYPE]) ) # Calculate the global node IDs to per-node IDs mapping. inner_ntype = F.boolean_mask( parts[name].ndata[NTYPE], parts[name].ndata["inner_node"] == 1 ) inner_nids = F.boolean_mask( parts[name].ndata[NID], parts[name].ndata["inner_node"] == 1 ) for ntype in g.ntypes: inner_ntype_mask = inner_ntype == g.get_ntype_id(ntype) if F.sum(F.astype(inner_ntype_mask, F.int64), 0) == 0: # Skip if there is no node of this type in this partition. continue typed_nids = F.boolean_mask(inner_nids, inner_ntype_mask) # inner node IDs are in a contiguous ID range. expected_range = np.arange( int(F.as_scalar(typed_nids[0])), int(F.as_scalar(typed_nids[-1])) + 1, ) assert np.all(F.asnumpy(typed_nids) == expected_range) # Calculate the global edge IDs to per-edge IDs mapping. inner_etype = F.boolean_mask( parts[name].edata[ETYPE], parts[name].edata["inner_edge"] == 1 ) inner_eids = F.boolean_mask( parts[name].edata[EID], parts[name].edata["inner_edge"] == 1 ) for etype in g.canonical_etypes: inner_etype_mask = inner_etype == g.get_etype_id(etype) if F.sum(F.astype(inner_etype_mask, F.int64), 0) == 0: # Skip if there is no edge of this type in this partition. continue typed_eids = np.sort( F.asnumpy(F.boolean_mask(inner_eids, inner_etype_mask)) ) assert np.all( typed_eids == np.arange(int(typed_eids[0]), int(typed_eids[-1]) + 1) ) os.makedirs(out_path, mode=0o775, exist_ok=True) tot_num_inner_edges = 0 out_path = os.path.abspath(out_path) # With reshuffling, we can ensure that all nodes and edges are reshuffled # and are in contiguous ID space. if num_parts > 1: node_map_val = {} edge_map_val = {} for ntype in g.ntypes: ntype_id = g.get_ntype_id(ntype) val = [] node_map_val[ntype] = [] for i in parts: inner_node_mask = _get_inner_node_mask(parts[i], ntype_id) val.append( F.as_scalar(F.sum(F.astype(inner_node_mask, F.int64), 0)) ) if F.sum(F.astype(inner_node_mask, F.int64), 0) == 0: node_map_val[ntype].append([-1, -1]) continue inner_nids = F.boolean_mask( parts[i].ndata[NID], inner_node_mask ) node_map_val[ntype].append( [ int(F.as_scalar(inner_nids[0])), int(F.as_scalar(inner_nids[-1])) + 1, ] ) val = np.cumsum(val).tolist() assert val[-1] == g.num_nodes(ntype) for etype in g.canonical_etypes: etype_id = g.get_etype_id(etype) val = [] edge_map_val[etype] = [] for i in parts: inner_edge_mask = _get_inner_edge_mask(parts[i], etype_id) val.append( F.as_scalar(F.sum(F.astype(inner_edge_mask, F.int64), 0)) ) if F.sum(F.astype(inner_edge_mask, F.int64), 0) == 0: edge_map_val[etype].append([-1, -1]) continue inner_eids = np.sort( F.asnumpy( F.boolean_mask(parts[i].edata[EID], inner_edge_mask) ) ) edge_map_val[etype].append( [int(inner_eids[0]), int(inner_eids[-1]) + 1] ) val = np.cumsum(val).tolist() assert val[-1] == g.num_edges(etype) # Update the node_map_val and edge_map_val to be contiguous. _update_node_edge_map(node_map_val, edge_map_val, g, num_parts) else: node_map_val = {} edge_map_val = {} for ntype in g.ntypes: ntype_id = g.get_ntype_id(ntype) inner_node_mask = _get_inner_node_mask(parts[0], ntype_id) inner_nids = F.boolean_mask(parts[0].ndata[NID], inner_node_mask) node_map_val[ntype] = [ [ int(F.as_scalar(inner_nids[0])), int(F.as_scalar(inner_nids[-1])) + 1, ] ] for etype in g.canonical_etypes: etype_id = g.get_etype_id(etype) inner_edge_mask = _get_inner_edge_mask(parts[0], etype_id) inner_eids = F.boolean_mask(parts[0].edata[EID], inner_edge_mask) edge_map_val[etype] = [ [ int(F.as_scalar(inner_eids[0])), int(F.as_scalar(inner_eids[-1])) + 1, ] ] # Double check that the node IDs in the global ID space are sorted. for ntype in node_map_val: val = np.concatenate([np.array(l) for l in node_map_val[ntype]]) assert np.all(val[:-1] <= val[1:]) for etype in edge_map_val: val = np.concatenate([np.array(l) for l in edge_map_val[etype]]) assert np.all(val[:-1] <= val[1:]) start = time.time() ntypes = {ntype: g.get_ntype_id(ntype) for ntype in g.ntypes} etypes = {etype: g.get_etype_id(etype) for etype in g.canonical_etypes} part_metadata = { "graph_name": graph_name, "num_nodes": g.num_nodes(), "num_edges": g.num_edges(), "part_method": part_method, "num_parts": num_parts, "halo_hops": num_hops, "node_map": node_map_val, "edge_map": edge_map_val, "ntypes": ntypes, "etypes": etypes, } part_config = os.path.join(out_path, graph_name + ".json") for part_id in range(num_parts): part = parts[part_id] # Get the node/edge features of each partition. node_feats = {} edge_feats = {} if num_parts > 1: for ntype in g.ntypes: ntype_id = g.get_ntype_id(ntype) # To get the edges in the input graph, we should use original node IDs. # Both orig_id and NID stores the per-node-type IDs. ndata_name = "orig_id" inner_node_mask = _get_inner_node_mask(part, ntype_id) # This is global node IDs. local_nodes = F.boolean_mask( part.ndata[ndata_name], inner_node_mask ) if len(g.ntypes) > 1: # If the input is a heterogeneous graph. local_nodes = F.gather_row(sim_g.ndata[NID], local_nodes) print( "part {} has {} nodes of type {} and {} are inside the partition".format( part_id, F.as_scalar( F.sum(part.ndata[NTYPE] == ntype_id, 0) ), ntype, len(local_nodes), ) ) else: print( "part {} has {} nodes and {} are inside the partition".format( part_id, part.num_nodes(), len(local_nodes) ) ) for name in g.nodes[ntype].data: if name in [NID, "inner_node"]: continue node_feats[ntype + "/" + name] = F.gather_row( g.nodes[ntype].data[name], local_nodes ) for etype in g.canonical_etypes: etype_id = g.get_etype_id(etype) edata_name = "orig_id" inner_edge_mask = _get_inner_edge_mask(part, etype_id) # This is global edge IDs. local_edges = F.boolean_mask( part.edata[edata_name], inner_edge_mask ) if not g.is_homogeneous: local_edges = F.gather_row(sim_g.edata[EID], local_edges) print( "part {} has {} edges of type {} and {} are inside the partition".format( part_id, F.as_scalar( F.sum(part.edata[ETYPE] == etype_id, 0) ), etype, len(local_edges), ) ) else: print( "part {} has {} edges and {} are inside the partition".format( part_id, part.num_edges(), len(local_edges) ) ) tot_num_inner_edges += len(local_edges) for name in g.edges[etype].data: if name in [EID, "inner_edge"]: continue edge_feats[ _etype_tuple_to_str(etype) + "/" + name ] = F.gather_row(g.edges[etype].data[name], local_edges) else: for ntype in g.ntypes: if len(g.ntypes) > 1: ndata_name = "orig_id" ntype_id = g.get_ntype_id(ntype) inner_node_mask = _get_inner_node_mask(part, ntype_id) # This is global node IDs. local_nodes = F.boolean_mask( part.ndata[ndata_name], inner_node_mask ) local_nodes = F.gather_row(sim_g.ndata[NID], local_nodes) else: local_nodes = sim_g.ndata[NID] for name in g.nodes[ntype].data: if name in [NID, "inner_node"]: continue node_feats[ntype + "/" + name] = F.gather_row( g.nodes[ntype].data[name], local_nodes ) for etype in g.canonical_etypes: if not g.is_homogeneous: edata_name = "orig_id" etype_id = g.get_etype_id(etype) inner_edge_mask = _get_inner_edge_mask(part, etype_id) # This is global edge IDs. local_edges = F.boolean_mask( part.edata[edata_name], inner_edge_mask ) local_edges = F.gather_row(sim_g.edata[EID], local_edges) else: local_edges = sim_g.edata[EID] for name in g.edges[etype].data: if name in [EID, "inner_edge"]: continue edge_feats[ _etype_tuple_to_str(etype) + "/" + name ] = F.gather_row(g.edges[etype].data[name], local_edges) # delete `orig_id` from ndata/edata del part.ndata["orig_id"] del part.edata["orig_id"] part_dir = os.path.join(out_path, "part" + str(part_id)) node_feat_file = os.path.join(part_dir, "node_feat.dgl") edge_feat_file = os.path.join(part_dir, "edge_feat.dgl") os.makedirs(part_dir, mode=0o775, exist_ok=True) save_tensors(node_feat_file, node_feats) save_tensors(edge_feat_file, edge_feats) part_metadata["part-{}".format(part_id)] = { "node_feats": os.path.relpath(node_feat_file, out_path), "edge_feats": os.path.relpath(edge_feat_file, out_path), } sort_etypes = len(g.etypes) > 1 part = process_partitions(part, graph_formats, sort_etypes) # transmit to graphbolt and save graph if use_graphbolt: # save FusedCSCSamplingGraph kwargs["graph_formats"] = graph_formats n_jobs = kwargs.pop("n_jobs", 1) mp_ctx = mp.get_context("spawn") with concurrent.futures.ProcessPoolExecutor( # pylint: disable=unexpected-keyword-arg max_workers=min(num_parts, n_jobs), mp_context=mp_ctx, ) as executor: for part_id in range(num_parts): executor.submit( _partition_to_graphbolt( part_i=part_id, part_config=part_config, part_metadata=part_metadata, parts=parts, **kwargs, ) ) part_metadata["node_map_dtype"] = "int64" part_metadata["edge_map_dtype"] = "int64" else: for part_id, part in parts.items(): part_dir = os.path.join(out_path, "part" + str(part_id)) part_graph_file = os.path.join(part_dir, "graph.dgl") part_metadata["part-{}".format(part_id)][ "part_graph" ] = os.path.relpath(part_graph_file, out_path) # save DGLGraph _save_dgl_graphs( part_graph_file, [part], formats=graph_formats, ) _dump_part_config(part_config, part_metadata) num_cuts = sim_g.num_edges() - tot_num_inner_edges if num_parts == 1: num_cuts = 0 print( "There are {} edges in the graph and {} edge cuts for {} partitions.".format( g.num_edges(), num_cuts, num_parts ) ) print( "Save partitions: {:.3f} seconds, peak memory: {:.3f} GB".format( time.time() - start, get_peak_mem() ) ) if return_mapping: return orig_nids, orig_eids # [TODO][Rui] Due to int64_t is expected in RPC, we have to limit the data type # of node/edge IDs to int64_t. See more details in #7175. DTYPES_TO_CHECK = { "default": [torch.int32, torch.int64], NID: [torch.int64], EID: [torch.int64], NTYPE: [torch.int8, torch.int16, torch.int32, torch.int64], ETYPE: [torch.int8, torch.int16, torch.int32, torch.int64], "inner_node": [torch.uint8], "inner_edge": [torch.uint8], "part_id": [torch.int8, torch.int16, torch.int32, torch.int64], } def _cast_to_minimum_dtype(predicate, data, field=None): if data is None: return data dtypes_to_check = DTYPES_TO_CHECK.get(field, DTYPES_TO_CHECK["default"]) if data.dtype not in dtypes_to_check: dgl_warning( f"Skipping as the data type of field {field} is {data.dtype}, " f"while supported data types are {dtypes_to_check}." ) return data for dtype in dtypes_to_check: if predicate < torch.iinfo(dtype).max: return data.to(dtype) return data # Utility functions. def is_homogeneous(ntypes, etypes): """Checks if the provided ntypes and etypes form a homogeneous graph.""" return len(ntypes) == 1 and len(etypes) == 1 def init_type_per_edge(graph, gpb): """Initialize edge ids for every edge type.""" etype_ids = gpb.map_to_per_etype(graph.edata[EID])[0] return etype_ids def _load_part(part_config, part_id, parts=None): """load parts from variable or dist.""" if parts is None: graph, _, _, _, _, _, _ = load_partition( part_config, part_id, load_feats=False ) else: graph = parts[part_id] return graph def _save_graph_gb(part_config, part_id, csc_graph): csc_graph_save_dir = os.path.join( os.path.dirname(part_config), f"part{part_id}", ) csc_graph_path = os.path.join( csc_graph_save_dir, "fused_csc_sampling_graph.pt" ) torch.save(csc_graph, csc_graph_path) return os.path.relpath(csc_graph_path, os.path.dirname(part_config)) def cast_various_to_minimum_dtype_gb( num_parts, indptr, indices, type_per_edge, etypes, ntypes, node_attributes, edge_attributes, part_meta=None, graph=None, edge_count=None, node_count=None, tot_edge_count=None, tot_node_count=None, ): """Cast various data to minimum dtype.""" if graph is not None: assert part_meta is not None tot_edge_count = graph.num_edges() tot_node_count = graph.num_nodes() node_count = part_meta["num_nodes"] edge_count = part_meta["num_edges"] else: assert tot_edge_count is not None assert tot_node_count is not None assert edge_count is not None assert node_count is not None # Cast 1: indptr. indptr = _cast_to_minimum_dtype(tot_edge_count, indptr) # Cast 2: indices. indices = _cast_to_minimum_dtype(tot_node_count, indices) # Cast 3: type_per_edge. type_per_edge = _cast_to_minimum_dtype( len(etypes), type_per_edge, field=ETYPE ) # Cast 4: node/edge_attributes. predicates = { NID: node_count, "part_id": num_parts, NTYPE: len(ntypes), EID: edge_count, ETYPE: len(etypes), DGL2GB_EID: edge_count, GB_DST_ID: node_count, } for attributes in [node_attributes, edge_attributes]: for key in attributes: if key not in predicates: continue attributes[key] = _cast_to_minimum_dtype( predicates[key], attributes[key], field=key ) return indptr, indices, type_per_edge def _create_attributes_gb( graph, gpb, edge_ids, is_homo, store_inner_node, store_inner_edge, store_eids, debug_mode, ): # Save node attributes. Detailed attributes are shown below. # DGL_GB\Attributes dgl.NID("_ID") dgl.NTYPE("_TYPE") "inner_node" "part_id" # DGL_Homograph ✅ 🚫 ✅ ✅ # GB_Homograph ✅ 🚫 optional 🚫 # DGL_Heterograph ✅ ✅ ✅ ✅ # GB_Heterograph ✅ 🚫 optional 🚫 required_node_attrs = [NID] if store_inner_node: required_node_attrs.append("inner_node") if debug_mode: required_node_attrs = list(graph.ndata.keys()) node_attributes = {attr: graph.ndata[attr] for attr in required_node_attrs} # Save edge attributes. Detailed attributes are shown below. # DGL_GB\Attributes dgl.EID("_ID") dgl.ETYPE("_TYPE") "inner_edge" # DGL_Homograph ✅ 🚫 ✅ # GB_Homograph optional 🚫 optional # DGL_Heterograph ✅ ✅ ✅ # GB_Heterograph optional ✅ optional type_per_edge = None if not is_homo: type_per_edge = init_type_per_edge(graph, gpb)[edge_ids] type_per_edge = type_per_edge.to(RESERVED_FIELD_DTYPE[ETYPE]) required_edge_attrs = [] if store_eids: required_edge_attrs.append(EID) if store_inner_edge: required_edge_attrs.append("inner_edge") if debug_mode: required_edge_attrs = list(graph.edata.keys()) edge_attributes = { attr: graph.edata[attr][edge_ids] for attr in required_edge_attrs } return node_attributes, edge_attributes, type_per_edge def _convert_dgl_partition_to_gb( ntypes, etypes, gpb, part_meta, graph, graph_formats=None, store_eids=False, store_inner_node=False, store_inner_edge=False, ): """Converts a single DGL partition to GraphBolt. Parameters ---------- node types : dict The node types edge types : dict The edge types gpb : GraphPartitionBook The global partition information. part_meta : dict Contain the meta data of the partition. graph : DGLGraph The graph to be converted to graphbolt graph. graph_formats : str or list[str], optional Save partitions in specified formats. It could be any combination of `coo`, `csc`. As `csc` format is mandatory for `FusedCSCSamplingGraph`, it is not necessary to specify this argument. It's mainly for specifying `coo` format to save edge ID mapping and destination node IDs. If not specified, whether to save `coo` format is determined by the availability of the format in DGL partitions. Default: None. store_eids : bool, optional Whether to store edge IDs in the new graph. Default: True. store_inner_node : bool, optional Whether to store inner node mask in the new graph. Default: False. store_inner_edge : bool, optional Whether to store inner edge mask in the new graph. Default: False. """ debug_mode = "DGL_DIST_DEBUG" in os.environ if debug_mode: dgl_warning( "Running in debug mode which means all attributes of DGL partitions" " will be saved to the new format." ) num_parts = part_meta["num_parts"] is_homo = is_homogeneous(ntypes, etypes) node_type_to_id = ( None if is_homo else {ntype: ntid for ntid, ntype in enumerate(ntypes)} ) edge_type_to_id = ( None if is_homo else { gb.etype_tuple_to_str(etype): etid for etype, etid in etypes.items() } ) # Obtain CSC indtpr and indices. indptr, indices, edge_ids = graph.adj_tensors("csc") node_attributes, edge_attributes, type_per_edge = _create_attributes_gb( graph, gpb, edge_ids, is_homo, store_inner_node, store_inner_edge, store_eids, debug_mode, ) # When converting DGLGraph to FusedCSCSamplingGraph, edge IDs are # re-ordered(actually FusedCSCSamplingGraph does not have edge IDs # in nature). So we need to save such re-order info for any # operations that uses original local edge IDs. For now, this is # required by `DistGraph.find_edges()` for link prediction tasks. # # What's more, in order to find the dst nodes efficiently, we save # dst nodes directly in the edge attributes. # # So we require additional `(2 * E) * dtype` space in total. if graph_formats is not None and isinstance(graph_formats, str): graph_formats = [graph_formats] save_coo = ( graph_formats is None and "coo" in graph.formats()["created"] ) or (graph_formats is not None and "coo" in graph_formats) if save_coo: edge_attributes[DGL2GB_EID] = torch.argsort(edge_ids) edge_attributes[GB_DST_ID] = gb.expand_indptr( indptr, dtype=indices.dtype ) indptr, indices, type_per_edge = cast_various_to_minimum_dtype_gb( graph=graph, part_meta=part_meta, num_parts=num_parts, indptr=indptr, indices=indices, type_per_edge=type_per_edge, etypes=etypes, ntypes=ntypes, node_attributes=node_attributes, edge_attributes=edge_attributes, ) csc_graph = gb.fused_csc_sampling_graph( indptr, indices, node_type_offset=None, type_per_edge=type_per_edge, node_attributes=node_attributes, edge_attributes=edge_attributes, node_type_to_id=node_type_to_id, edge_type_to_id=edge_type_to_id, ) return csc_graph def gb_convert_single_dgl_partition( part_id, graph_formats, part_config, store_eids=True, store_inner_node=True, store_inner_edge=True, ): """ The pipeline converting signle partition to graphbolt. Parameters ---------- part_id : int The partition ID. graph_formats : str or list[str] Save partitions in specified formats. It could be any combination of `coo`, `csc`. As `csc` format is mandatory for `FusedCSCSamplingGraph`, it is not necessary to specify this argument. It's mainly for specifying `coo` format to save edge ID mapping and destination node IDs. If not specified, whether to save `coo` format is determined by the availability of the format in DGL partitions. Default: None. part_config : str The path of the partition config file. store_eids : bool, optional Whether to store edge IDs in the new graph. Default: True. store_inner_node : bool, optional Whether to store inner node mask in the new graph. Default: False. store_inner_edge : bool, optional Whether to store inner edge mask in the new graph. Default: False. Returns ------- str The path csc_graph to save. """ gpb, _, ntypes, etypes = load_partition_book( part_config=part_config, part_id=part_id ) part = _load_part(part_config, part_id) part_meta = copy.deepcopy(_load_part_config(part_config)) csc_graph = _convert_dgl_partition_to_gb( graph=part, ntypes=ntypes, etypes=etypes, gpb=gpb, part_meta=part_meta, graph_formats=graph_formats, store_eids=store_eids, store_inner_node=store_inner_node, store_inner_edge=store_inner_edge, ) rel_path = _save_graph_gb(part_config, part_id, csc_graph) return rel_path def _convert_partition_to_graphbolt_wrapper( graph_formats, part_config, store_eids, store_inner_node, store_inner_edge, n_jobs, num_parts, ): # [Rui] DGL partitions are always saved as homogeneous graphs even though # the original graph is heterogeneous. But heterogeneous information like # node/edge types are saved as node/edge data alongside with partitions. # What needs more attention is that due to the existence of HALO nodes in # each partition, the local node IDs are not sorted according to the node # types. So we fail to assign ``node_type_offset`` as required by GraphBolt. # But this is not a problem since such information is not used in sampling. # We can simply pass None to it. # Iterate over partitions. convert_with_format = partial( gb_convert_single_dgl_partition, part_config=part_config, graph_formats=graph_formats, store_eids=store_eids, store_inner_node=store_inner_node, store_inner_edge=store_inner_edge, ) # Need to create entirely new interpreters, because we call C++ downstream # See https://docs.python.org/3.12/library/multiprocessing.html#contexts-and-start-methods # and https://pybind11.readthedocs.io/en/stable/advanced/misc.html#global-interpreter-lock-gil rel_path_results = [] if n_jobs > 1 and num_parts > 1: mp_ctx = mp.get_context("spawn") with concurrent.futures.ProcessPoolExecutor( # pylint: disable=unexpected-keyword-arg max_workers=min(num_parts, n_jobs), mp_context=mp_ctx, ) as executor: for part_id in range(num_parts): rel_path_results.append( executor.submit( convert_with_format, part_id=part_id ).result() ) else: # If running single-threaded, avoid spawning new interpreter, which is slow for part_id in range(num_parts): rel_path = convert_with_format(part_id=part_id) rel_path_results.append(rel_path) part_meta = _load_part_config(part_config) for part_id in range(num_parts): # Update graph path. part_meta[f"part-{part_id}"]["part_graph_graphbolt"] = rel_path_results[ part_id ] # Save dtype info into partition config. # [TODO][Rui] Always use int64_t for node/edge IDs in GraphBolt. See more # details in #7175. part_meta["node_map_dtype"] = "int64" part_meta["edge_map_dtype"] = "int64" return part_meta def dgl_partition_to_graphbolt( part_config, *, store_eids=True, store_inner_node=False, store_inner_edge=False, graph_formats=None, n_jobs=1, ): """Convert partitions of dgl to FusedCSCSamplingGraph of GraphBolt. This API converts `DGLGraph` partitions to `FusedCSCSamplingGraph` which is dedicated for sampling in `GraphBolt`. New graphs will be stored alongside original graph as `fused_csc_sampling_graph.pt`. In the near future, partitions are supposed to be saved as `FusedCSCSamplingGraph` directly. At that time, this API should be deprecated. Parameters ---------- part_config : str The partition configuration JSON file. store_eids : bool, optional Whether to store edge IDs in the new graph. Default: True. store_inner_node : bool, optional Whether to store inner node mask in the new graph. Default: False. store_inner_edge : bool, optional Whether to store inner edge mask in the new graph. Default: False. graph_formats : str or list[str], optional Save partitions in specified formats. It could be any combination of `coo`, `csc`. As `csc` format is mandatory for `FusedCSCSamplingGraph`, it is not necessary to specify this argument. It's mainly for specifying `coo` format to save edge ID mapping and destination node IDs. If not specified, whether to save `coo` format is determined by the availability of the format in DGL partitions. Default: None. n_jobs: int Number of parallel jobs to run during partition conversion. Max parallelism is determined by the partition count. """ debug_mode = "DGL_DIST_DEBUG" in os.environ if debug_mode: dgl_warning( "Running in debug mode which means all attributes of DGL partitions" " will be saved to the new format." ) part_meta = _load_part_config(part_config) num_parts = part_meta["num_parts"] part_meta = _convert_partition_to_graphbolt_wrapper( graph_formats=graph_formats, part_config=part_config, store_eids=store_eids, store_inner_node=store_inner_node, store_inner_edge=store_inner_edge, n_jobs=n_jobs, num_parts=num_parts, ) _dump_part_config(part_config, part_meta) ================================================ FILE: python/dgl/distributed/role.py ================================================ """Manage the roles in different clients. Right now, the clients have different roles. Some clients work as samplers and some work as trainers. """ import os import numpy as np from . import rpc REGISTER_ROLE = 700001 REG_ROLE_MSG = "Register_Role" class RegisterRoleResponse(rpc.Response): """Send a confirmation signal (just a short string message) of RegisterRoleRequest to client. """ def __init__(self, msg): self.msg = msg def __getstate__(self): return self.msg def __setstate__(self, state): self.msg = state class RegisterRoleRequest(rpc.Request): """Send client id and role to server Parameters ---------- client_id : int ID of client role : str role of client """ def __init__(self, client_id, machine_id, role): self.client_id = client_id self.machine_id = machine_id self.role = role self.group_id = rpc.get_group_id() def __getstate__(self): return self.client_id, self.machine_id, self.role, self.group_id def __setstate__(self, state): self.client_id, self.machine_id, self.role, self.group_id = state def process_request(self, server_state): kv_store = server_state.kv_store role = server_state.roles.setdefault(self.group_id, {}) if self.role not in role: role[self.role] = set() if kv_store is not None: barrier_count = kv_store.barrier_count.setdefault( self.group_id, {} ) barrier_count[self.role] = 0 role[self.role].add((self.client_id, self.machine_id)) total_count = 0 for key in role: total_count += len(role[key]) # Clients are blocked util all clients register their roles. if total_count == rpc.get_num_client(): res_list = [] for target_id in range(rpc.get_num_client()): res_list.append((target_id, RegisterRoleResponse(REG_ROLE_MSG))) return res_list return None GET_ROLE = 700002 GET_ROLE_MSG = "Get_Role" class GetRoleResponse(rpc.Response): """Send the roles of all client processes""" def __init__(self, role): self.role = role self.msg = GET_ROLE_MSG def __getstate__(self): return self.role, self.msg def __setstate__(self, state): self.role, self.msg = state class GetRoleRequest(rpc.Request): """Send a request to get the roles of all client processes.""" def __init__(self): self.msg = GET_ROLE_MSG self.group_id = rpc.get_group_id() def __getstate__(self): return self.msg, self.group_id def __setstate__(self, state): self.msg, self.group_id = state def process_request(self, server_state): return GetRoleResponse(server_state.roles[self.group_id]) # The key is role, the value is a dict of mapping RPC rank to a rank within the role. PER_ROLE_RANK = {} # The global rank of a client process. The client processes of the same role have # global ranks that fall in a contiguous range. GLOBAL_RANK = {} # The role of the current process CUR_ROLE = None IS_STANDALONE = False def init_role(role): """Initialize the role of the current process. Each process is associated with a role so that we can determine what function can be invoked in a process. For example, we do not allow some functions in sampler processes. The initialization includes registeration the role of the current process and get the roles of all client processes. It also computes the rank of all client processes in a deterministic way so that all clients will have the same rank for the same client process. """ global CUR_ROLE CUR_ROLE = role global PER_ROLE_RANK global GLOBAL_RANK global IS_STANDALONE if os.environ.get("DGL_DIST_MODE", "standalone") == "standalone": if role == "default": GLOBAL_RANK[0] = 0 PER_ROLE_RANK["default"] = {0: 0} IS_STANDALONE = True return PER_ROLE_RANK = {} GLOBAL_RANK = {} # Register the current role. This blocks until all clients register themselves. client_id = rpc.get_rank() machine_id = rpc.get_machine_id() request = RegisterRoleRequest(client_id, machine_id, role) rpc.send_request(0, request) response = rpc.recv_response() assert response.msg == REG_ROLE_MSG # Get all clients on all machines. request = GetRoleRequest() rpc.send_request(0, request) response = rpc.recv_response() assert response.msg == GET_ROLE_MSG # Here we want to compute a new rank for each client. # We compute the per-role rank as well as global rank. # For per-role rank, we ensure that all ranks within a machine is contiguous. # For global rank, we also ensure that all ranks within a machine are contiguous, # and all ranks within a role are contiguous. global_rank = 0 # We want to ensure that the global rank of the trainer process starts from 0. role_names = ["default"] for role_name in response.role: if role_name not in role_names: role_names.append(role_name) for role_name in role_names: # Let's collect the ranks of this role in all machines. machines = {} for client_id, machine_id in response.role[role_name]: if machine_id not in machines: machines[machine_id] = [] machines[machine_id].append(client_id) num_machines = len(machines) PER_ROLE_RANK[role_name] = {} per_role_rank = 0 for i in range(num_machines): clients = machines[i] clients = np.sort(clients) for client_id in clients: GLOBAL_RANK[client_id] = global_rank global_rank += 1 PER_ROLE_RANK[role_name][client_id] = per_role_rank per_role_rank += 1 def get_global_rank(): """Get the global rank The rank can globally identify the client process. For the client processes of the same role, their ranks are in a contiguous range. """ if IS_STANDALONE: return 0 else: return GLOBAL_RANK[rpc.get_rank()] def get_rank(role): """Get the role-specific rank""" if IS_STANDALONE: return 0 else: return PER_ROLE_RANK[role][rpc.get_rank()] def get_trainer_rank(): """Get the rank of the current trainer process. This function can only be called in the trainer process. It will result in an error if it's called in the process of other roles. """ assert CUR_ROLE == "default" if IS_STANDALONE: return 0 else: return PER_ROLE_RANK["default"][rpc.get_rank()] def get_role(): """Get the role of the current process""" return CUR_ROLE def get_num_trainers(): """Get the number of trainer processes""" return len(PER_ROLE_RANK["default"]) rpc.register_service(REGISTER_ROLE, RegisterRoleRequest, RegisterRoleResponse) rpc.register_service(GET_ROLE, GetRoleRequest, GetRoleResponse) ================================================ FILE: python/dgl/distributed/rpc.py ================================================ """RPC components. They are typically functions or utilities used by both server and clients.""" import abc import os import pickle import random import numpy as np from .. import backend as F from .._ffi.function import _init_api from .._ffi.object import ObjectBase, register_object from ..base import DGLError from .constants import SERVER_EXIT __all__ = [ "set_rank", "get_rank", "Request", "Response", "register_service", "create_sender", "create_receiver", "finalize_sender", "finalize_receiver", "wait_for_senders", "connect_receiver", "read_ip_config", "get_group_id", "get_num_machines", "set_num_machines", "get_machine_id", "set_machine_id", "send_request", "recv_request", "send_response", "recv_response", "remote_call", "send_request_to_machine", "remote_call_to_machine", "fast_pull", "DistConnectError", "get_num_client", "set_num_client", "client_barrier", "copy_data_to_shared_memory", ] REQUEST_CLASS_TO_SERVICE_ID = {} RESPONSE_CLASS_TO_SERVICE_ID = {} SERVICE_ID_TO_PROPERTY = {} DEFUALT_PORT = 30050 def read_ip_config(filename, num_servers): """Read network configuration information of server from file. For exampple, the following TXT shows a 4-machine configuration: 172.31.40.143 172.31.36.140 172.31.47.147 172.31.30.180 Users can also set user-specified port for this network configuration. For example: 172.31.40.143 20090 172.31.36.140 20090 172.31.47.147 20090 172.31.30.180 20090 Note that, DGL supports multiple backup servers that shares data with each others on the same machine via shared-memory tensor. The num_servers should be >= 1. For example, if we set num_servers to 5, it means that we have 1 main server and 4 backup servers on current machine. Parameters ---------- filename : str Path of IP configuration file. num_servers : int Server count on each machine. Returns ------- dict server namebook. The key is server_id (int) The value is [machine_id, ip, port, num_servers] ([int, str, int, int]) e.g., {0:[0, '172.31.40.143', 30050, 2], 1:[0, '172.31.40.143', 30051, 2], 2:[1, '172.31.36.140', 30050, 2], 3:[1, '172.31.36.140', 30051, 2], 4:[2, '172.31.47.147', 30050, 2], 5:[2, '172.31.47.147', 30051, 2], 6:[3, '172.31.30.180', 30050, 2], 7:[3, '172.31.30.180', 30051, 2]} """ assert len(filename) > 0, "filename cannot be empty." assert num_servers > 0, ( "num_servers (%d) must be a positive number." % num_servers ) server_namebook = {} try: server_id = 0 machine_id = 0 lines = [line.rstrip("\n") for line in open(filename)] for line in lines: result = line.split() if len(result) == 2: port = int(result[1]) elif len(result) == 1: port = DEFUALT_PORT else: raise RuntimeError("length of result can only be 1 or 2.") ip_addr = result[0] for s_count in range(num_servers): server_namebook[server_id] = [ machine_id, ip_addr, port + s_count, num_servers, ] server_id += 1 machine_id += 1 except RuntimeError: print("Error: data format on each line should be: [ip] [port]") return server_namebook def reset(): """Reset the rpc context""" _CAPI_DGLRPCReset() def create_sender(max_queue_size): """Create rpc sender of this process. Parameters ---------- max_queue_size : int Maximal size (bytes) of network queue buffer. """ max_thread_count = int(os.getenv("DGL_SOCKET_MAX_THREAD_COUNT", "0")) _CAPI_DGLRPCCreateSender(int(max_queue_size), max_thread_count) def create_receiver(max_queue_size): """Create rpc receiver of this process. Parameters ---------- max_queue_size : int Maximal size (bytes) of network queue buffer. """ max_thread_count = int(os.getenv("DGL_SOCKET_MAX_THREAD_COUNT", "0")) _CAPI_DGLRPCCreateReceiver(int(max_queue_size), max_thread_count) def finalize_sender(): """Finalize rpc sender of this process.""" _CAPI_DGLRPCFinalizeSender() def finalize_receiver(): """Finalize rpc receiver of this process.""" _CAPI_DGLRPCFinalizeReceiver() def wait_for_senders(ip_addr, port, num_senders): """Wait all of the senders' connections. This api will be blocked until all the senders connect to the receiver. Parameters ---------- ip_addr : str receiver's IP address, e,g, '192.168.8.12' port : int receiver's port num_senders : int total number of senders """ _CAPI_DGLRPCWaitForSenders(ip_addr, int(port), int(num_senders)) def connect_receiver(ip_addr, port, recv_id, group_id=-1): """Connect to target receiver Parameters ---------- ip_addr : str receiver's IP address, e,g, '192.168.8.12' port : int receiver's listening port recv_id : int receiver's ID """ target_id = ( recv_id if group_id == -1 else register_client(recv_id, group_id) ) if target_id < 0: raise DGLError("Invalid target id: {}".format(target_id)) return _CAPI_DGLRPCConnectReceiver(ip_addr, int(port), int(target_id)) def connect_receiver_finalize(max_try_times): """Finalize the action to connect to receivers. Make sure that either all connections are successfully established or connection fails. When "socket" network backend is in use, the function issues actual requests to receiver sockets to establish connections. Parameters ---------- max_try_times : int maximum try times """ return _CAPI_DGLRPCConnectReceiverFinalize(max_try_times) def set_rank(rank): """Set the rank of this process. If the process is a client, this is equal to client ID. Otherwise, the process is a server and this is equal to server ID. Parameters ---------- rank : int Rank value """ _CAPI_DGLRPCSetRank(int(rank)) def get_rank(): """Get the rank of this process. If the process is a client, this is equal to client ID. Otherwise, the process is a server and this is equal to server ID. Returns ------- int Rank value """ return _CAPI_DGLRPCGetRank() def set_machine_id(machine_id): """Set current machine ID Parameters ---------- machine_id : int Current machine ID """ _CAPI_DGLRPCSetMachineID(int(machine_id)) def get_machine_id(): """Get current machine ID Returns ------- int machine ID """ return _CAPI_DGLRPCGetMachineID() def set_num_machines(num_machines): """Set number of machine Parameters ---------- num_machines : int Number of machine """ _CAPI_DGLRPCSetNumMachines(int(num_machines)) def get_num_machines(): """Get number of machines Returns ------- int number of machines """ return _CAPI_DGLRPCGetNumMachines() def set_num_server(num_server): """Set the total number of server.""" _CAPI_DGLRPCSetNumServer(int(num_server)) def get_num_server(): """Get the total number of server.""" return _CAPI_DGLRPCGetNumServer() def set_num_client(num_client): """Set the total number of client.""" _CAPI_DGLRPCSetNumClient(int(num_client)) def get_num_client(): """Get the total number of client.""" return _CAPI_DGLRPCGetNumClient() def set_num_server_per_machine(num_server): """Set the total number of server per machine""" _CAPI_DGLRPCSetNumServerPerMachine(num_server) def get_num_server_per_machine(): """Get the total number of server per machine""" return _CAPI_DGLRPCGetNumServerPerMachine() def incr_msg_seq(): """Increment the message sequence number and return the old one. Returns ------- long Message sequence number """ return _CAPI_DGLRPCIncrMsgSeq() def get_msg_seq(): """Get the current message sequence number. Returns ------- long Message sequence number """ return _CAPI_DGLRPCGetMsgSeq() def set_msg_seq(msg_seq): """Set the current message sequence number. Parameters ---------- msg_seq : int sequence number of current rpc message. """ _CAPI_DGLRPCSetMsgSeq(int(msg_seq)) def register_service(service_id, req_cls, res_cls=None): """Register a service to RPC. Parameter --------- service_id : int Service ID. req_cls : class Request class. res_cls : class, optional Response class. If none, the service has no response. """ REQUEST_CLASS_TO_SERVICE_ID[req_cls] = service_id if res_cls is not None: RESPONSE_CLASS_TO_SERVICE_ID[res_cls] = service_id SERVICE_ID_TO_PROPERTY[service_id] = (req_cls, res_cls) def get_service_property(service_id): """Get service property. Parameters ---------- service_id : int Service ID. Returns ------- (class, class) (Request class, Response class) """ return SERVICE_ID_TO_PROPERTY[service_id] class Request: """Base request class""" @abc.abstractmethod def __getstate__(self): """Get serializable states. Must be inherited by subclasses. For array members, return them as individual return values (i.e., do not put them in containers like dictionary or list). """ @abc.abstractmethod def __setstate__(self, state): """Construct the request object from serialized states. Must be inherited by subclasses. """ @abc.abstractmethod def process_request(self, server_state): """Server-side function to process the request. Must be inherited by subclasses. Parameters ---------- server_state : ServerState Server state data. Returns ------- Response Response of this request or None if no response. """ @property def service_id(self): """Get service ID.""" cls = self.__class__ sid = REQUEST_CLASS_TO_SERVICE_ID.get(cls, None) if sid is None: raise DGLError( "Request class {} has not been registered as a service.".format( cls ) ) return sid class Response: """Base response class""" @abc.abstractmethod def __getstate__(self): """Get serializable states. Must be inherited by subclasses. For array members, return them as individual return values (i.e., do not put them in containers like dictionary or list). """ @abc.abstractmethod def __setstate__(self, state): """Construct the response object from serialized states. Must be inherited by subclasses. """ @property def service_id(self): """Get service ID.""" cls = self.__class__ sid = RESPONSE_CLASS_TO_SERVICE_ID.get(cls, None) if sid is None: raise DGLError( "Response class {} has not been registered as a service.".format( cls ) ) return sid def serialize_to_payload(serializable): """Serialize an object to payloads. The object must have implemented the __getstate__ function. Parameters ---------- serializable : object Any serializable object. Returns ------- bytearray Serialized payload buffer. list[Tensor] A list of tensor payloads. """ state = serializable.__getstate__() if not isinstance(state, tuple): state = (state,) nonarray_pos = [] nonarray_state = [] array_state = [] for i, arr_state in enumerate(state): if F.is_tensor(arr_state): array_state.append(arr_state) else: nonarray_state.append(arr_state) nonarray_pos.append(i) data = bytearray(pickle.dumps((nonarray_pos, nonarray_state))) return data, array_state class PlaceHolder: """PlaceHolder object for deserialization""" _PLACEHOLDER = PlaceHolder() def deserialize_from_payload(cls, data, tensors): """Deserialize and reconstruct the object from payload. The object must have implemented the __setstate__ function. Parameters ---------- cls : class The object class. data : bytearray Serialized data buffer. tensors : list[Tensor] A list of tensor payloads. Returns ------- object De-serialized object of class cls. """ pos, nonarray_state = pickle.loads(data) # Use _PLACEHOLDER to distinguish with other deserizliaed elements state = [_PLACEHOLDER] * (len(nonarray_state) + len(tensors)) for i, no_state in zip(pos, nonarray_state): state[i] = no_state if len(tensors) != 0: j = 0 state_len = len(state) for i in range(state_len): if state[i] is _PLACEHOLDER: state[i] = tensors[j] j += 1 if len(state) == 1: state = state[0] else: state = tuple(state) obj = cls.__new__(cls) obj.__setstate__(state) return obj @register_object("rpc.RPCMessage") class RPCMessage(ObjectBase): """Serialized RPC message that can be sent to remote processes. This class can be used as argument or return value for C API. Attributes ---------- service_id : int The remote service ID the message wishes to invoke. msg_seq : int Sequence number of this message. client_id : int The client ID. server_id : int The server ID. data : bytearray Payload buffer carried by this request. tensors : list[tensor] Extra payloads in the form of tensors. group_id : int The group ID """ def __init__( self, service_id, msg_seq, client_id, server_id, data, tensors, group_id=0, ): self.__init_handle_by_constructor__( _CAPI_DGLRPCCreateRPCMessage, int(service_id), int(msg_seq), int(client_id), int(server_id), data, [F.zerocopy_to_dgl_ndarray(tsor) for tsor in tensors], int(group_id), ) @property def service_id(self): """Get service ID.""" return _CAPI_DGLRPCMessageGetServiceId(self) @property def msg_seq(self): """Get message sequence number.""" return _CAPI_DGLRPCMessageGetMsgSeq(self) @property def client_id(self): """Get client ID.""" return _CAPI_DGLRPCMessageGetClientId(self) @property def server_id(self): """Get server ID.""" return _CAPI_DGLRPCMessageGetServerId(self) @property def data(self): """Get payload buffer.""" return _CAPI_DGLRPCMessageGetData(self) @property def tensors(self): """Get tensor payloads.""" rst = _CAPI_DGLRPCMessageGetTensors(self) return [F.zerocopy_from_dgl_ndarray(tsor) for tsor in rst] @property def group_id(self): """Get group ID.""" return _CAPI_DGLRPCMessageGetGroupId(self) def send_request(target, request): """Send one request to the target server. Serialize the given request object to an :class:`RPCMessage` and send it out. The operation is non-blocking -- it does not guarantee the payloads have reached the target or even have left the sender process. However, all the payloads (i.e., data and arrays) can be safely freed after this function returns. Parameters ---------- target : int ID of target server. request : Request The request to send. Raises ------ ConnectionError if there is any problem with the connection. """ service_id = request.service_id msg_seq = incr_msg_seq() client_id = get_rank() server_id = target data, tensors = serialize_to_payload(request) msg = RPCMessage( service_id, msg_seq, client_id, server_id, data, tensors, group_id=get_group_id(), ) send_rpc_message(msg, server_id) def send_request_to_machine(target, request): """Send one request to the target machine, which will randomly select a server node to process this request. The operation is non-blocking -- it does not guarantee the payloads have reached the target or even have left the sender process. However, all the payloads (i.e., data and arrays) can be safely freed after this function returns. Parameters ---------- target : int ID of target machine. request : Request The request to send. Raises ------ ConnectionError if there is any problem with the connection. """ service_id = request.service_id msg_seq = incr_msg_seq() client_id = get_rank() server_id = random.randint( target * get_num_server_per_machine(), (target + 1) * get_num_server_per_machine() - 1, ) data, tensors = serialize_to_payload(request) msg = RPCMessage( service_id, msg_seq, client_id, server_id, data, tensors, get_group_id() ) send_rpc_message(msg, server_id) def send_response(target, response, group_id): """Send one response to the target client. Serialize the given response object to an :class:`RPCMessage` and send it out. The operation is non-blocking -- it does not guarantee the payloads have reached the target or even have left the sender process. However, all the payloads (i.e., data and arrays) can be safely freed after this function returns. Parameters ---------- target : int ID of target client. response : Response The response to send. group_id : int Group ID of target client. Raises ------ ConnectionError if there is any problem with the connection. """ service_id = response.service_id msg_seq = get_msg_seq() client_id = target server_id = get_rank() data, tensors = serialize_to_payload(response) msg = RPCMessage( service_id, msg_seq, client_id, server_id, data, tensors, group_id ) send_rpc_message(msg, get_client(client_id, group_id)) def recv_request(timeout=0): """Receive one request. Receive one :class:`RPCMessage` and de-serialize it into a proper Request object. The operation is blocking -- it returns when it receives any message or it times out. Parameters ---------- timeout : int, optional The timeout value in milliseconds. If zero, wait indefinitely. Returns ------- req : request One request received from the target, or None if it times out. client_id : int Client' ID received from the target, or -1 if it times out. group_id : int Group' ID received from the target, or -1 if it times out. Raises ------ ConnectionError if there is any problem with the connection. """ msg = recv_rpc_message(timeout) if msg is None: return None, -1, -1 set_msg_seq(msg.msg_seq) req_cls, _ = SERVICE_ID_TO_PROPERTY[msg.service_id] if req_cls is None: raise DGLError( "Got request message from service ID {}, " "but no request class is registered.".format(msg.service_id) ) req = deserialize_from_payload(req_cls, msg.data, msg.tensors) if msg.server_id != get_rank(): raise DGLError( "Got request sent to server {}, " "different from my rank {}!".format(msg.server_id, get_rank()) ) return req, msg.client_id, msg.group_id def recv_response(timeout=0): """Receive one response. Receive one :class:`RPCMessage` and de-serialize it into a proper Response object. The operation is blocking -- it returns when it receives any message or it times out. Parameters ---------- timeout : int, optional The timeout value in milliseconds. If zero, wait indefinitely. Returns ------- res : Response One response received from the target, or None if it times out. Raises ------ ConnectionError if there is any problem with the connection. """ msg = recv_rpc_message(timeout) if msg is None: return None _, res_cls = SERVICE_ID_TO_PROPERTY[msg.service_id] if res_cls is None: raise DGLError( "Got response message from service ID {}, " "but no response class is registered.".format(msg.service_id) ) res = deserialize_from_payload(res_cls, msg.data, msg.tensors) if msg.client_id != get_rank() and get_rank() != -1: raise DGLError( "Got response of request sent by client {}, " "different from my rank {}!".format(msg.client_id, get_rank()) ) if msg.group_id != get_group_id(): raise DGLError( "Got response of request sent by group {}, " "different from my group {}!".format(msg.group_id, get_group_id()) ) return res def remote_call(target_and_requests, timeout=0): """Invoke registered services on remote servers and collect responses. The operation is blocking -- it returns when it receives all responses or it times out. If the target server state is available locally, it invokes local computation to calculate the response. Parameters ---------- target_and_requests : list[(int, Request)] A list of requests and the server they should be sent to. timeout : int, optional The timeout value in milliseconds. If zero, wait indefinitely. Returns ------- list[Response] Responses for each target-request pair. If the request does not have response, None is placed. Raises ------ ConnectionError if there is any problem with the connection. """ all_res = [None] * len(target_and_requests) msgseq2pos = {} num_res = 0 myrank = get_rank() for pos, (target, request) in enumerate(target_and_requests): # send request service_id = request.service_id msg_seq = incr_msg_seq() client_id = get_rank() server_id = random.randint( target * get_num_server_per_machine(), (target + 1) * get_num_server_per_machine() - 1, ) data, tensors = serialize_to_payload(request) msg = RPCMessage( service_id, msg_seq, client_id, server_id, data, tensors, get_group_id(), ) send_rpc_message(msg, server_id) # check if has response res_cls = get_service_property(service_id)[1] if res_cls is not None: num_res += 1 msgseq2pos[msg_seq] = pos while num_res != 0: # recv response msg = recv_rpc_message(timeout) if msg is None: raise DGLError( f"Timed out for receiving message within {timeout} milliseconds" ) num_res -= 1 _, res_cls = SERVICE_ID_TO_PROPERTY[msg.service_id] if res_cls is None: raise DGLError( "Got response message from service ID {}, " "but no response class is registered.".format(msg.service_id) ) res = deserialize_from_payload(res_cls, msg.data, msg.tensors) if msg.client_id != myrank: raise DGLError( "Got reponse of request sent by client {}, " "different from my rank {}!".format(msg.client_id, myrank) ) # set response all_res[msgseq2pos[msg.msg_seq]] = res return all_res def send_requests_to_machine(target_and_requests): """Send requests to the remote machines. This operation isn't block. It returns immediately once it sends all requests. Parameters ---------- target_and_requests : list[(int, Request)] A list of requests and the machine they should be sent to. timeout : int, optional The timeout value in milliseconds. If zero, wait indefinitely. Returns ------- msgseq2pos : dict map the message sequence number to its position in the input list. """ msgseq2pos = {} for pos, (target, request) in enumerate(target_and_requests): # send request service_id = request.service_id msg_seq = incr_msg_seq() client_id = get_rank() server_id = random.randint( target * get_num_server_per_machine(), (target + 1) * get_num_server_per_machine() - 1, ) data, tensors = serialize_to_payload(request) msg = RPCMessage( service_id, msg_seq, client_id, server_id, data, tensors, get_group_id(), ) send_rpc_message(msg, server_id) # check if has response res_cls = get_service_property(service_id)[1] if res_cls is not None: msgseq2pos[msg_seq] = pos return msgseq2pos def recv_responses(msgseq2pos, timeout=0): """Receive responses It returns the responses in the same order as the requests. The order of requests are stored in msgseq2pos. The operation is blocking -- it returns when it receives all responses or it times out. Parameters ---------- msgseq2pos : dict map the message sequence number to its position in the input list. timeout : int, optional The timeout value in milliseconds. If zero, wait indefinitely. Returns ------- list[Response] Responses for each target-request pair. If the request does not have response, None is placed. """ myrank = get_rank() size = np.max(list(msgseq2pos.values())) + 1 all_res = [None] * size num_res = len(msgseq2pos) while num_res != 0: # recv response msg = recv_rpc_message(timeout) if msg is None: raise DGLError( f"Timed out for receiving message within {timeout} milliseconds" ) num_res -= 1 _, res_cls = SERVICE_ID_TO_PROPERTY[msg.service_id] if res_cls is None: raise DGLError( "Got response message from service ID {}, " "but no response class is registered.".format(msg.service_id) ) res = deserialize_from_payload(res_cls, msg.data, msg.tensors) if msg.client_id != myrank: raise DGLError( "Got reponse of request sent by client {}, " "different from my rank {}!".format(msg.client_id, myrank) ) # set response all_res[msgseq2pos[msg.msg_seq]] = res return all_res def remote_call_to_machine(target_and_requests, timeout=0): """Invoke registered services on remote machine (which will ramdom select a server to process the request) and collect responses. The operation is blocking -- it returns when it receives all responses or it times out. If the target server state is available locally, it invokes local computation to calculate the response. Parameters ---------- target_and_requests : list[(int, Request)] A list of requests and the machine they should be sent to. timeout : int, optional The timeout value in milliseconds. If zero, wait indefinitely. Returns ------- list[Response] Responses for each target-request pair. If the request does not have response, None is placed. Raises ------ ConnectionError if there is any problem with the connection. """ msgseq2pos = send_requests_to_machine(target_and_requests) return recv_responses(msgseq2pos, timeout) def send_rpc_message(msg, target): """Send one message to the target server. The operation is non-blocking -- it does not guarantee the payloads have reached the target or even have left the sender process. However, all the payloads (i.e., data and arrays) can be safely freed after this function returns. The data buffer in the requst will be copied to internal buffer for actual transmission, while no memory copy for tensor payloads (a.k.a. zero-copy). The underlying sending threads will hold references to the tensors until the contents have been transmitted. Parameters ---------- msg : RPCMessage The message to send. target : int target ID Raises ------ ConnectionError if there is any problem with the connection. """ _CAPI_DGLRPCSendRPCMessage(msg, int(target)) def recv_rpc_message(timeout=0): """Receive one message. The operation is blocking -- it returns when it receives any message or it times out. Parameters ---------- timeout : int, optional The timeout value in milliseconds. If zero, wait indefinitely. Returns ------- msg : RPCMessage One rpc message received from the target, or None if it times out. Raises ------ ConnectionError if there is any problem with the connection. """ msg = _CAPI_DGLRPCCreateEmptyRPCMessage() status = _CAPI_DGLRPCRecvRPCMessage(timeout, msg) return msg if status == 0 else None def client_barrier(): """Barrier all client processes""" req = ClientBarrierRequest() send_request(0, req) res = recv_response() assert res.msg == "barrier" def finalize_server(): """Finalize resources of current server""" finalize_sender() finalize_receiver() print("Server (%d) shutdown." % get_rank()) def fast_pull( name, id_tensor, part_id, service_id, machine_count, group_count, machine_id, client_id, local_data, policy, ): """Fast-pull api used by kvstore. Parameters ---------- name : str data name id_tensor : tensor data ID part_id : tensor partition ID of id_tensor service_id : int service_id of pull request machine_count : int total number of machine group_count : int total number of server inside machine machine_id : int current machine ID client_id : int current client ID local_data : tensor local data tensor policy : PartitionPolicy store the partition information """ msg_seq = incr_msg_seq() pickle_data = bytearray(pickle.dumps(([0], [name]))) global_id = _CAPI_DGLRPCGetGlobalIDFromLocalPartition( F.zerocopy_to_dgl_ndarray(id_tensor), F.zerocopy_to_dgl_ndarray(part_id), machine_id, ) global_id = F.zerocopy_from_dgl_ndarray(global_id) g2l_id = policy.to_local(global_id) res_tensor = _CAPI_DGLRPCFastPull( name, int(machine_id), int(machine_count), int(group_count), int(client_id), int(service_id), int(msg_seq), pickle_data, F.zerocopy_to_dgl_ndarray(id_tensor), F.zerocopy_to_dgl_ndarray(part_id), F.zerocopy_to_dgl_ndarray(g2l_id), F.zerocopy_to_dgl_ndarray(local_data), ) return F.zerocopy_from_dgl_ndarray(res_tensor) def register_sig_handler(): """Register for handling signal event.""" _CAPI_DGLRPCHandleSignal() def copy_data_to_shared_memory(dst, source): """Copy tensor data to shared-memory tensor""" F.zerocopy_to_dgl_ndarray(dst).copyfrom(F.zerocopy_to_dgl_ndarray(source)) ############### Some basic services will be defined here ############# CLIENT_REGISTER = 22451 class ClientRegisterRequest(Request): """This request will send client's ip to server. Parameters ---------- ip_addr : str client's IP address """ def __init__(self, ip_addr): self.ip_addr = ip_addr def __getstate__(self): return self.ip_addr def __setstate__(self, state): self.ip_addr = state def process_request(self, server_state): return None # do nothing class ClientRegisterResponse(Response): """This response will send assigned ID to client. Parameters ---------- ID : int client's ID """ def __init__(self, client_id): self.client_id = client_id def __getstate__(self): return self.client_id def __setstate__(self, state): self.client_id = state SHUT_DOWN_SERVER = 22452 class ShutDownRequest(Request): """Client send this request to shut-down a server. This request has no response. Parameters ---------- client_id : int client's ID """ def __init__(self, client_id, force_shutdown_server=False): self.client_id = client_id self.force_shutdown_server = force_shutdown_server def __getstate__(self): return self.client_id, self.force_shutdown_server def __setstate__(self, state): self.client_id, self.force_shutdown_server = state def process_request(self, server_state): assert self.client_id == 0 finalize_server() return SERVER_EXIT GET_NUM_CLIENT = 22453 class GetNumberClientsResponse(Response): """This reponse will send total number of clients. Parameters ---------- num_client : int total number of clients """ def __init__(self, num_client): self.num_client = num_client def __getstate__(self): return self.num_client def __setstate__(self, state): self.num_client = state class GetNumberClientsRequest(Request): """Client send this request to get the total number of client. Parameters ---------- client_id : int client's ID """ def __init__(self, client_id): self.client_id = client_id def __getstate__(self): return self.client_id def __setstate__(self, state): self.client_id = state def process_request(self, server_state): res = GetNumberClientsResponse(get_num_client()) return res CLIENT_BARRIER = 22454 class ClientBarrierResponse(Response): """Send the barrier confirmation to client Parameters ---------- msg : str string msg """ def __init__(self, msg="barrier"): self.msg = msg def __getstate__(self): return self.msg def __setstate__(self, state): self.msg = state class ClientBarrierRequest(Request): """Send the barrier information to server Parameters ---------- msg : str string msg """ def __init__(self, msg="barrier"): self.msg = msg self.group_id = get_group_id() def __getstate__(self): return self.msg, self.group_id def __setstate__(self, state): self.msg, self.group_id = state def process_request(self, server_state): _CAPI_DGLRPCSetBarrierCount( _CAPI_DGLRPCGetBarrierCount(self.group_id) + 1, self.group_id ) if _CAPI_DGLRPCGetBarrierCount(self.group_id) == get_num_client(): _CAPI_DGLRPCSetBarrierCount(0, self.group_id) res_list = [] for target_id in range(get_num_client()): res_list.append((target_id, ClientBarrierResponse())) return res_list return None def set_group_id(group_id): """Set current group ID Parameters ---------- group_id : int Current group ID """ _CAPI_DGLRPCSetGroupID(int(group_id)) def get_group_id(): """Get current group ID Returns ------- int group ID """ return _CAPI_DGLRPCGetGroupID() def register_client(client_id, group_id): """Register client Returns ------- int unique client ID """ return _CAPI_DGLRPCRegisterClient(int(client_id), int(group_id)) def get_client(client_id, group_id): """Get global client ID Parameters ---------- client_id : int client ID group_id : int group ID Returns ------- int global client ID """ return _CAPI_DGLRPCGetClient(int(client_id), int(group_id)) class DistConnectError(DGLError): """Exception raised for errors if fail to connect peer. Attributes ---------- kv_store : KVServer reference for KVServer """ def __init__(self, max_try_times, ip="", port=""): peer_str = "peer[{}:{}]".format(ip, port) if ip != "" else "peer" self.message = ( "Failed to build conncetion with {} after {} retries. " "Please check network availability or increase max try " "times via 'DGL_DIST_MAX_TRY_TIMES'.".format( peer_str, max_try_times ) ) super().__init__(self.message) _init_api("dgl.distributed.rpc") ================================================ FILE: python/dgl/distributed/rpc_client.py ================================================ """Functions used by client.""" import atexit import logging import os import socket import time from . import rpc from .constants import MAX_QUEUE_SIZE if os.name != "nt": import fcntl import struct def local_ip4_addr_list(): """Return a set of IPv4 address You can use `logging.getLogger("dgl-distributed-socket").setLevel(logging.WARNING+1)` to disable the warning here """ assert os.name != "nt", "Do not support Windows rpc yet." nic = set() logger = logging.getLogger("dgl-distributed-socket") for if_nidx in socket.if_nameindex(): name = if_nidx[1] sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) try: ip_of_ni = fcntl.ioctl( sock.fileno(), 0x8915, # SIOCGIFADDR struct.pack("256s", name[:15].encode("UTF-8")), ) except OSError as e: if e.errno == 99: # EADDRNOTAVAIL logger.warning( "Warning! Interface: %s \n" "IP address not available for interface.", name, ) continue raise e ip_addr = socket.inet_ntoa(ip_of_ni[20:24]) nic.add(ip_addr) return nic def get_local_machine_id(server_namebook): """Given server_namebook, find local machine ID Parameters ---------- server_namebook: dict IP address namebook of server nodes, where key is the server's ID (start from 0) and value is the server's machine_id, IP address, port, and group_count, e.g., {0:'[0, '172.31.40.143', 30050, 2], 1:'[0, '172.31.40.143', 30051, 2], 2:'[1, '172.31.36.140', 30050, 2], 3:'[1, '172.31.36.140', 30051, 2], 4:'[2, '172.31.47.147', 30050, 2], 5:'[2, '172.31.47.147', 30051, 2], 6:'[3, '172.31.30.180', 30050, 2], 7:'[3, '172.31.30.180', 30051, 2]} Returns ------- int local machine ID """ res = 0 ip_list = local_ip4_addr_list() for _, data in server_namebook.items(): machine_id = data[0] ip_addr = data[1] if ip_addr in ip_list: res = machine_id break return res def get_local_usable_addr(probe_addr): """Get local usable IP and port Returns ------- str IP address, e.g., '192.168.8.12:50051' """ sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) try: # should get the address on the same subnet as probe_addr's sock.connect((probe_addr, 1)) ip_addr = sock.getsockname()[0] except ValueError: ip_addr = "127.0.0.1" finally: sock.close() sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.bind(("", 0)) sock.listen(1) port = sock.getsockname()[1] sock.close() return ip_addr + ":" + str(port) def connect_to_server( ip_config, num_servers, max_queue_size=MAX_QUEUE_SIZE, group_id=0, ): """Connect this client to server. Parameters ---------- ip_config : str Path of server IP configuration file. num_servers : int server count on each machine. max_queue_size : int Maximal size (bytes) of client queue buffer (~20 GB on default). Note that the 20 GB is just an upper-bound and DGL uses zero-copy and it will not allocate 20GB memory at once. group_id : int Indicates which group this client belongs to. Clients that are booted together in each launch are gathered as a group and should have same unique group_id. Raises ------ ConnectionError : If anything wrong with the connection. """ assert num_servers > 0, ( "num_servers (%d) must be a positive number." % num_servers ) assert max_queue_size > 0, ( "queue_size (%d) cannot be a negative number." % max_queue_size ) # Register some basic service rpc.register_service( rpc.CLIENT_REGISTER, rpc.ClientRegisterRequest, rpc.ClientRegisterResponse, ) rpc.register_service(rpc.SHUT_DOWN_SERVER, rpc.ShutDownRequest, None) rpc.register_service( rpc.GET_NUM_CLIENT, rpc.GetNumberClientsRequest, rpc.GetNumberClientsResponse, ) rpc.register_service( rpc.CLIENT_BARRIER, rpc.ClientBarrierRequest, rpc.ClientBarrierResponse ) rpc.register_sig_handler() server_namebook = rpc.read_ip_config(ip_config, num_servers) num_servers = len(server_namebook) rpc.set_num_server(num_servers) # group_count means how many servers # (main_server + bakcup_server) in total inside a machine. group_count = [] max_machine_id = 0 for server_info in server_namebook.values(): group_count.append(server_info[3]) if server_info[0] > max_machine_id: max_machine_id = server_info[0] rpc.set_num_server_per_machine(group_count[0]) num_machines = max_machine_id + 1 rpc.set_num_machines(num_machines) machine_id = get_local_machine_id(server_namebook) rpc.set_machine_id(machine_id) rpc.set_group_id(group_id) rpc.create_sender(max_queue_size) rpc.create_receiver(max_queue_size) # Get connected with all server nodes max_try_times = int(os.environ.get("DGL_DIST_MAX_TRY_TIMES", 1024)) for server_id, addr in server_namebook.items(): server_ip = addr[1] server_port = addr[2] try_times = 0 while not rpc.connect_receiver(server_ip, server_port, server_id): try_times += 1 if try_times % 200 == 0: print( "Client is trying to connect server receiver: {}:{}".format( server_ip, server_port ) ) if try_times >= max_try_times: raise rpc.DistConnectError( max_try_times, server_ip, server_port ) time.sleep(3) if not rpc.connect_receiver_finalize(max_try_times): raise rpc.DistConnectError(max_try_times) # Get local usable IP address and port ip_addr = get_local_usable_addr(server_ip) client_ip, client_port = ip_addr.split(":") # Register client on server register_req = rpc.ClientRegisterRequest(ip_addr) for server_id in range(num_servers): rpc.send_request(server_id, register_req) # wait server connect back rpc.wait_for_senders(client_ip, client_port, num_servers) print( "Client [{}] waits on {}:{}".format(os.getpid(), client_ip, client_port) ) # recv client ID from server res = rpc.recv_response() rpc.set_rank(res.client_id) print( "Machine (%d) group (%d) client (%d) connect to server successfuly!" % (machine_id, group_id, rpc.get_rank()) ) # get total number of client get_client_num_req = rpc.GetNumberClientsRequest(rpc.get_rank()) rpc.send_request(0, get_client_num_req) res = rpc.recv_response() rpc.set_num_client(res.num_client) from .dist_context import exit_client, set_initialized atexit.register(exit_client) set_initialized(True) ================================================ FILE: python/dgl/distributed/rpc_server.py ================================================ """Functions used by server.""" import os import time from ..base import DGLError from . import rpc from .constants import MAX_QUEUE_SIZE, SERVER_EXIT def start_server( server_id, ip_config, num_servers, num_clients, server_state, max_queue_size=MAX_QUEUE_SIZE, ): """Start DGL server, which will be shared with all the rpc services. This is a blocking function -- it returns only when the server shutdown. Parameters ---------- server_id : int Current server ID (starts from 0). ip_config : str Path of IP configuration file. num_servers : int Server count on each machine. num_clients : int Total number of clients that will be connected to the server. Note that, we do not support dynamic connection for now. It means that when all the clients connect to server, no client will can be added to the cluster. server_state : ServerSate object Store in main data used by server. max_queue_size : int Maximal size (bytes) of server queue buffer (~20 GB on default). Note that the 20 GB is just an upper-bound because DGL uses zero-copy and it will not allocate 20GB memory at once. """ assert server_id >= 0, ( "server_id (%d) cannot be a negative number." % server_id ) assert num_servers > 0, ( "num_servers (%d) must be a positive number." % num_servers ) assert num_clients >= 0, ( "num_client (%d) cannot be a negative number." % num_clients ) assert max_queue_size > 0, ( "queue_size (%d) cannot be a negative number." % max_queue_size ) # Register signal handler. rpc.register_sig_handler() # Register some basic services rpc.register_service( rpc.CLIENT_REGISTER, rpc.ClientRegisterRequest, rpc.ClientRegisterResponse, ) rpc.register_service(rpc.SHUT_DOWN_SERVER, rpc.ShutDownRequest, None) rpc.register_service( rpc.GET_NUM_CLIENT, rpc.GetNumberClientsRequest, rpc.GetNumberClientsResponse, ) rpc.register_service( rpc.CLIENT_BARRIER, rpc.ClientBarrierRequest, rpc.ClientBarrierResponse ) rpc.set_rank(server_id) server_namebook = rpc.read_ip_config(ip_config, num_servers) machine_id = server_namebook[server_id][0] rpc.set_machine_id(machine_id) ip_addr = server_namebook[server_id][1] port = server_namebook[server_id][2] rpc.create_sender(max_queue_size) rpc.create_receiver(max_queue_size) # wait all the senders connect to server. # Once all the senders connect to server, server will not # accept new sender's connection print( "Server is waiting for connections on [{}:{}]...".format(ip_addr, port) ) rpc.wait_for_senders(ip_addr, port, num_clients) rpc.set_num_client(num_clients) recv_clients = {} while True: # go through if any client group is ready for connection for group_id in list(recv_clients.keys()): ips = recv_clients[group_id] if len(ips) < rpc.get_num_client(): continue del recv_clients[group_id] # a new client group is ready ips.sort() client_namebook = dict(enumerate(ips)) time.sleep(3) # wait for clients' receivers ready max_try_times = int(os.environ.get("DGL_DIST_MAX_TRY_TIMES", 120)) for client_id, addr in client_namebook.items(): client_ip, client_port = addr.split(":") try_times = 0 while not rpc.connect_receiver( client_ip, client_port, client_id, group_id ): try_times += 1 if try_times % 200 == 0: print( "Server~{} is trying to connect client receiver: {}:{}".format( server_id, client_ip, client_port ) ) if try_times >= max_try_times: raise rpc.DistConnectError( max_try_times, client_ip, client_port ) time.sleep(1) if not rpc.connect_receiver_finalize(max_try_times): raise rpc.DistConnectError(max_try_times) if rpc.get_rank() == 0: # server_0 send all the IDs for client_id, _ in client_namebook.items(): register_res = rpc.ClientRegisterResponse(client_id) rpc.send_response(client_id, register_res, group_id) # receive incomming client requests timeout = 60 * 1000 # in milliseconds req, client_id, group_id = rpc.recv_request(timeout) if req is None: continue if isinstance(req, rpc.ClientRegisterRequest): if group_id not in recv_clients: recv_clients[group_id] = [] recv_clients[group_id].append(req.ip_addr) continue res = req.process_request(server_state) if res is not None: if isinstance(res, list): for response in res: target_id, res_data = response rpc.send_response(target_id, res_data, group_id) elif isinstance(res, str): if res == SERVER_EXIT: print("Server is exiting...") return else: raise DGLError("Unexpected response: {}".format(res)) else: rpc.send_response(client_id, res, group_id) ================================================ FILE: python/dgl/distributed/server_state.py ================================================ """Server data""" from .._ffi.function import _init_api # Remove C++ bindings for now, since not used class ServerState: """Data stored in one DGL server. In a distributed setting, DGL partitions all data associated with the graph (e.g., node and edge features, graph structure, etc.) to multiple partitions, each handled by one DGL server. Hence, the ServerState class includes all the data associated with a graph partition. Under some setup, users may want to deploy servers in a heterogeneous way -- servers are further divided into special groups for fetching/updating node/edge data and for sampling/querying on graph structure respectively. In this case, the ServerState can be configured to include only node/edge data or graph structure. Each machine can have multiple server and client processes, but only one server is the *master* server while all the others are backup servers. All clients and backup servers share the state of the master server via shared memory, which means the ServerState class must be serializable and large bulk data (e.g., node/edge features) must be stored in NDArray to leverage shared memory. Attributes ---------- kv_store : KVServer reference for KVServer graph : DGLGraph Graph structure of one partition total_num_nodes : int Total number of nodes total_num_edges : int Total number of edges partition_book : GraphPartitionBook Graph Partition book use_graphbolt : bool Whether to use graphbolt for dataloading. """ def __init__(self, kv_store, local_g, partition_book, use_graphbolt=False): self._kv_store = kv_store self._graph = local_g self.partition_book = partition_book self._roles = {} self._use_graphbolt = use_graphbolt @property def roles(self): """Roles of the client processes""" return self._roles @property def kv_store(self): """Get data store.""" return self._kv_store @kv_store.setter def kv_store(self, kv_store): self._kv_store = kv_store @property def graph(self): """Get graph data.""" return self._graph @graph.setter def graph(self, graph): self._graph = graph @property def use_graphbolt(self): """Whether to use graphbolt for dataloading.""" return self._use_graphbolt _init_api("dgl.distributed.server_state") ================================================ FILE: python/dgl/distributed/shared_mem_utils.py ================================================ """Define utility functions for shared memory.""" from .. import backend as F, ndarray as nd from .._ffi.ndarray import empty_shared_mem DTYPE_DICT = F.data_type_dict DTYPE_DICT = {DTYPE_DICT[key]: key for key in DTYPE_DICT} def _get_ndata_path(graph_name, ndata_name): return "/" + graph_name + "_node_" + ndata_name def _get_edata_path(graph_name, edata_name): return "/" + graph_name + "_edge_" + edata_name def _to_shared_mem(arr, name): dlpack = F.zerocopy_to_dlpack(arr) dgl_tensor = nd.from_dlpack(dlpack) new_arr = empty_shared_mem( name, True, F.shape(arr), DTYPE_DICT[F.dtype(arr)] ) dgl_tensor.copyto(new_arr) dlpack = new_arr.to_dlpack() return F.zerocopy_from_dlpack(dlpack) ================================================ FILE: python/dgl/distributed/standalone_kvstore.py ================================================ """Define a fake kvstore This kvstore is used when running in the standalone mode """ from .. import backend as F class KVClient(object): """The fake KVStore client. This is to mimic the distributed KVStore client. It's used for DistGraph in standalone mode. """ def __init__(self): self._data = {} self._all_possible_part_policy = {} self._push_handlers = {} self._pull_handlers = {} # Store all graph data name self._gdata_name_list = set() @property def all_possible_part_policy(self): """Get all possible partition policies""" return self._all_possible_part_policy @property def num_servers(self): """Get the number of servers""" return 1 def barrier(self): """barrier""" def register_push_handler(self, name, func): """register push handler""" self._push_handlers[name] = func def register_pull_handler(self, name, func): """register pull handler""" self._pull_handlers[name] = func def add_data(self, name, tensor, part_policy): """add data to the client""" self._data[name] = tensor self._gdata_name_list.add(name) if part_policy.policy_str not in self._all_possible_part_policy: self._all_possible_part_policy[part_policy.policy_str] = part_policy def init_data( self, name, shape, dtype, part_policy, init_func, is_gdata=True ): """add new data to the client""" self._data[name] = init_func(shape, dtype) if part_policy.policy_str not in self._all_possible_part_policy: self._all_possible_part_policy[part_policy.policy_str] = part_policy if is_gdata: self._gdata_name_list.add(name) def delete_data(self, name): """delete the data""" del self._data[name] if name in self._gdata_name_list: self._gdata_name_list.remove(name) def data_name_list(self): """get the names of all data""" return list(self._data.keys()) def gdata_name_list(self): """get the names of graph data""" return list(self._gdata_name_list) def get_data_meta(self, name): """get the metadata of data""" return F.dtype(self._data[name]), F.shape(self._data[name]), None def push(self, name, id_tensor, data_tensor): """push data to kvstore""" if name in self._push_handlers: self._push_handlers[name](self._data, name, id_tensor, data_tensor) else: F.scatter_row_inplace(self._data[name], id_tensor, data_tensor) def pull(self, name, id_tensor): """pull data from kvstore""" if name in self._pull_handlers: return self._pull_handlers[name](self._data, name, id_tensor) else: return F.gather_row(self._data[name], id_tensor) def map_shared_data(self, partition_book): """Mapping shared-memory tensor from server to client.""" def count_nonzero(self, name): """Count nonzero value by pull request from KVServers. Parameters ---------- name : str data name Returns ------- int the number of nonzero in this data. """ return F.count_nonzero(self._data[name]) @property def data_store(self): """Return the local partition of the data storage. Returns ------- dict[str, Tensor] The tensor storages of the local partition. """ return self._data def union(self, operand1_name, operand2_name, output_name): """Compute the union of two mask arrays in the KVStore.""" self._data[output_name][:] = ( self._data[operand1_name] | self._data[operand2_name] ) ================================================ FILE: python/dgl/frame.py ================================================ """Columnar storage for DGLGraph.""" from __future__ import absolute_import from collections import namedtuple from collections.abc import MutableMapping from . import backend as F from .base import dgl_warning, DGLError from .init import zero_initializer from .storages import TensorStorage from .utils import gather_pinned_tensor_rows, pin_memory_inplace class _LazyIndex(object): def __init__(self, index): if isinstance(index, list): self._indices = index else: self._indices = [index] def __len__(self): return len(self._indices[-1]) def slice(self, index): """Create a new _LazyIndex object sliced by the given index tensor.""" # if our indices are in the same context, lets just slice now and free # memory, otherwise do nothing until we have to if F.context(self._indices[-1]) == F.context(index): return _LazyIndex( self._indices[:-1] + [F.gather_row(self._indices[-1], index)] ) return _LazyIndex(self._indices + [index]) def flatten(self): """Evaluate the chain of indices, and return a single index tensor.""" flat_index = self._indices[0] # here we actually need to resolve it for index in self._indices[1:]: if F.context(index) != F.context(flat_index): index = F.copy_to(index, F.context(flat_index)) flat_index = F.gather_row(flat_index, index) return flat_index def record_stream(self, stream): """Record stream for index. Parameters ---------- stream : torch.cuda.Stream. """ for index in self._indices: if F.context(index) != F.cpu(): index.record_stream(stream) class LazyFeature(object): """Placeholder for feature prefetching. One can assign this object to ``ndata`` or ``edata`` of the graphs returned by various samplers' :attr:`sample` method. When DGL's dataloader receives the subgraphs returned by the sampler, it will automatically look up all the ``ndata`` and ``edata`` whose data is a LazyFeature, replacing them with the actual data of the corresponding nodes/edges from the original graph instead. In particular, for a subgraph returned by the sampler has a LazyFeature with name ``k`` in ``subgraph.ndata[key]``: .. code:: python subgraph.ndata[key] = LazyFeature(k) Assuming that ``graph`` is the original graph, DGL's dataloader will perform .. code:: python subgraph.ndata[key] = graph.ndata[k][subgraph.ndata[dgl.NID]] DGL dataloader performs similar replacement for ``edata``. For heterogeneous graphs, the replacement is: .. code:: python subgraph.nodes[ntype].data[key] = graph.nodes[ntype].data[k][ subgraph.nodes[ntype].data[dgl.NID]] For MFGs' ``srcdata`` (and similarly ``dstdata``), the replacement is .. code:: python mfg.srcdata[key] = graph.ndata[k][mfg.srcdata[dgl.NID]] Parameters ---------- name : str The name of the data in the original graph. id_ : Tensor, optional The ID tensor. """ __slots__ = ["name", "id_"] def __init__(self, name=None, id_=None): self.name = name self.id_ = id_ def to( self, *args, **kwargs ): # pylint: disable=invalid-name, unused-argument """No-op. For compatibility of :meth:`Frame.to` method.""" return self @property def data(self): """No-op. For compatibility of :meth:`Frame.__repr__` method.""" return self def pin_memory_(self): """No-op. For compatibility of :meth:`Frame.pin_memory_` method.""" def unpin_memory_(self): """No-op. For compatibility of :meth:`Frame.unpin_memory_` method.""" def record_stream(self, stream): """No-op. For compatibility of :meth:`Frame.record_stream` method.""" class Scheme(namedtuple("Scheme", ["shape", "dtype"])): """The column scheme. Parameters ---------- shape : tuple of int The feature shape. dtype : backend-specific type object The feature data type. """ # Pickling torch dtypes could be problemetic; this is a workaround. # I also have to create data_type_dict and reverse_data_type_dict # attribute just for this bug. # I raised an issue in PyTorch bug tracker: # https://github.com/pytorch/pytorch/issues/14057 def __reduce__(self): state = (self.shape, F.reverse_data_type_dict[self.dtype]) return self._reconstruct_scheme, state @classmethod def _reconstruct_scheme(cls, shape, dtype_str): dtype = F.data_type_dict[dtype_str] return cls(shape, dtype) def infer_scheme(tensor): """Infer column scheme from the given tensor data. Parameters --------- tensor : Tensor The tensor data. Returns ------- Scheme The column scheme. """ return Scheme(tuple(F.shape(tensor)[1:]), F.dtype(tensor)) class Column(TensorStorage): """A column is a compact store of features of multiple nodes/edges. It batches all the feature tensors together along the first dimension as one dense tensor. The column can optionally have an index tensor I. In this case, the i^th feature is stored in ``storage[index[i]]``. The column class implements a Copy-On-Read semantics -- the index select operation happens upon the first read of the feature data. This is useful when one extracts a subset of the feature data but wishes the actual index select happens on-demand. Parameters ---------- storage : Tensor The feature data storage. scheme : Scheme, optional The scheme of the column. Will be inferred if not provided. index : Tensor, optional The row index to the feature data storage. None means an identity mapping. Attributes ---------- storage : Tensor The storage tensor. The storage tensor may not be the actual data tensor of this column when the index tensor is not None. This typically happens when the column is extracted from another column using the `subcolumn` method. It can also be None, which may only happen when transmitting a not-yet-materialized subcolumn from a subprocess to the main process. In this case, the main process should already maintain the content of the storage, and is responsible for restoring the subcolumn's storage pointer. data : Tensor The actual data tensor of this column. scheme : Scheme The scheme of the column. index : Tensor Index tensor """ def __init__(self, storage, *args, **kwargs): super().__init__(storage) self._init(*args, **kwargs) def __len__(self): """The number of features (number of rows) in this column.""" if self.index is None: return F.shape(self.storage)[0] else: return len(self.index) @property def shape(self): """Return the scheme shape (feature shape) of this column.""" return self.scheme.shape @property def data(self): """Return the feature data. Perform index selecting if needed.""" if self.index is not None: if isinstance(self.index, _LazyIndex): self.index = self.index.flatten() storage_ctx = F.context(self.storage) index_ctx = F.context(self.index) # If under the special case where the storage is pinned and the index is on # CUDA, directly call UVA slicing (even if they aree not in the same context). if ( storage_ctx != index_ctx and storage_ctx == F.cpu() and F.is_pinned(self.storage) ): self.storage = gather_pinned_tensor_rows( self.storage, self.index ) else: # If index and storage is not in the same context, # copy index to the same context of storage. # Copy index is usually cheaper than copy data if storage_ctx != index_ctx: kwargs = {} if self.device is not None: kwargs = self.device[1] self.index = F.copy_to(self.index, storage_ctx, **kwargs) self.storage = F.gather_row(self.storage, self.index) self.index = None # move data to the right device if self.device is not None: self.storage = F.copy_to( self.storage, self.device[0], **self.device[1] ) self.device = None # convert data to the right type if self.deferred_dtype is not None: self.storage = F.astype(self.storage, self.deferred_dtype) self.deferred_dtype = None return self.storage @data.setter def data(self, val): """Update the column data.""" self.index = None self.device = None self.deferred_dtype = None self.storage = val self._data_nd = None # should unpin data if it was pinned. self.pinned_by_dgl = False def to(self, device, **kwargs): # pylint: disable=invalid-name """Return a new column with columns copy to the targeted device (cpu/gpu). Parameters ---------- device : Framework-specific device context object The context to move data to. kwargs : Key-word arguments. Key-word arguments fed to the framework copy function. Returns ------- Column A new column """ col = self.clone() col.device = (device, kwargs) return col @property def dtype(self): """Return the effective data type of this Column""" if self.deferred_dtype is not None: return self.deferred_dtype return self.storage.dtype def astype(self, new_dtype): """Return a new column such that when its data is requested, it will be converted to new_dtype. Parameters ---------- new_dtype : Framework-specific type object The type to convert the data to. Returns ------- Column A new column """ col = self.clone() if col.dtype != new_dtype: # If there is already a pending conversion, ensure that the pending # conversion and transfer/sampling are done before this new conversion. if col.deferred_dtype is not None: _ = col.data if (col.device is None) and (col.index is None): # Do the conversion immediately if no device transfer or index # sampling is pending. The assumption is that this is most # likely to be the desired behaviour, such as converting an # entire graph's feature data to float16 (half) before transfer # to device when training, or converting back to float32 (float) # after fetching the data to a device. col.storage = F.astype(col.storage, new_dtype) else: # Defer the conversion if there is a pending transfer or sampling. # This is so that feature data that never gets accessed on the # device never needs to be transferred or sampled or converted. col.deferred_dtype = new_dtype return col def __getitem__(self, rowids): """Return the feature data given the rowids. The operation triggers index selection. Parameters ---------- rowids : Tensor Row ID tensor. Returns ------- Tensor The feature data """ return F.gather_row(self.data, rowids) def __setitem__(self, rowids, feats): """Update the feature data given the index. The update is performed out-placely so it can be used in autograd mode. The operation triggers index selection. Parameters ---------- rowids : Tensor Row IDs. feats : Tensor New features. """ self.update(rowids, feats) def update(self, rowids, feats): """Update the feature data given the index. Parameters ---------- rowids : Tensor Row IDs. feats : Tensor New features. """ feat_scheme = infer_scheme(feats) if feat_scheme != self.scheme: raise DGLError( "Cannot update column of scheme %s using feature of scheme %s." % (feat_scheme, self.scheme) ) self.data = F.scatter_row(self.data, rowids, feats) def extend(self, feats, feat_scheme=None): """Extend the feature data. The operation triggers index selection. Parameters ---------- feats : Tensor The new features. feat_scheme : Scheme, optional The scheme """ if feat_scheme is None: feat_scheme = infer_scheme(feats) if feat_scheme != self.scheme: raise DGLError( "Cannot update column of scheme %s using feature of scheme %s." % (feat_scheme, self.scheme) ) self.data = F.cat([self.data, feats], dim=0) def clone(self): """Return a shallow copy of this column.""" return Column( self.storage, self.scheme, self.index, self.device, self.deferred_dtype, ) def deepclone(self): """Return a deepcopy of this column. The operation triggers index selection. """ return Column(F.clone(self.data), copy.deepcopy(self.scheme)) def subcolumn(self, rowids): """Return a subcolumn. The resulting column will share the same storage as this column so this operation is quite efficient. If the current column is also a sub-column (i.e., the index tensor is not None), the current index tensor will be sliced by 'rowids', if they are on the same context. Otherwise, both index tensors are saved, and only applied when the data is accessed. Parameters ---------- rowids : Tensor Row IDs. Returns ------- Column Sub-column """ if self.index is None: return Column( self.storage, self.scheme, rowids, self.device, self.deferred_dtype, ) else: index = self.index if not isinstance(index, _LazyIndex): index = _LazyIndex(self.index) index = index.slice(rowids) return Column( self.storage, self.scheme, index, self.device, self.deferred_dtype, ) @staticmethod def create(data): """Create a new column using the given data.""" if isinstance(data, Column): return data.clone() else: return Column(data) def __repr__(self): return repr(self.data) def __getstate__(self): if self.storage is not None: # flush any deferred operations _ = self.data state = self.__dict__.copy() # data pinning does not get serialized, so we need to remove that from # the state state["_data_nd"] = None state["pinned_by_dgl"] = False return state def __setstate__(self, state): index = None device = None if "storage" in state and state["storage"] is not None: assert "index" not in state or state["index"] is None assert "device" not in state or state["device"] is None else: # we may have a column with only index information, and that is # valid index = None if "index" not in state else state["index"] device = None if "device" not in state else state["device"] assert "deferred_dtype" not in state or state["deferred_dtype"] is None assert "pinned_by_dgl" not in state or state["pinned_by_dgl"] is False assert "_data_nd" not in state or state["_data_nd"] is None self.__dict__ = state # properly initialize this object self._init( self.scheme if hasattr(self, "scheme") else None, index=index, device=device, ) def _init(self, scheme=None, index=None, device=None, deferred_dtype=None): self.scheme = scheme if scheme else infer_scheme(self.storage) self.index = index self.device = device self.deferred_dtype = deferred_dtype self.pinned_by_dgl = False self._data_nd = None def __copy__(self): return self.clone() def fetch(self, indices, device, pin_memory=False, **kwargs): _ = self.data # materialize in case of lazy slicing & data transfer return super().fetch(indices, device, pin_memory=pin_memory, **kwargs) def pin_memory_(self): """Pin the storage into page-locked memory. Does nothing if the storage is already pinned. """ if not self.pinned_by_dgl and not F.is_pinned(self.data): self._data_nd = pin_memory_inplace(self.data) self.pinned_by_dgl = True def unpin_memory_(self): """Unpin the storage pinned by ``pin_memory_`` method. Does nothing if the storage is not pinned by ``pin_memory_`` method, even if it is actually in page-locked memory. """ if self.pinned_by_dgl: self._data_nd.unpin_memory_() self._data_nd = None self.pinned_by_dgl = False def record_stream(self, stream): """Record stream that is using the storage. Does nothing if the backend is not PyTorch. Parameters ---------- stream : torch.cuda.Stream. """ if F.get_preferred_backend() != "pytorch": raise DGLError("record_stream only supports the PyTorch backend.") if self.index is not None and ( isinstance(self.index, _LazyIndex) or F.context(self.index) != F.cpu() ): self.index.record_stream(stream) if F.context(self.storage) != F.cpu(): self.storage.record_stream(stream) class Frame(MutableMapping): """The columnar storage for node/edge features. The frame is a dictionary from feature names to feature columns. All columns should have the same number of rows (i.e. the same first dimension). Parameters ---------- data : dict-like, optional The frame data in dictionary. If the provided data is another frame, this frame will NOT share columns with the given frame. So any out-place update on one will not reflect to the other. num_rows : int, optional The number of rows in this frame. If ``data`` is provided and is not empty, ``num_rows`` will be ignored and inferred from the given data. """ def __init__(self, data=None, num_rows=None): if data is None: self._columns = dict() self._num_rows = 0 if num_rows is None else num_rows else: assert not isinstance(data, Frame) # sanity check for code refactor # Note that we always create a new column for the given data. # This avoids two frames accidentally sharing the same column. self._columns = { k: v if isinstance(v, LazyFeature) else Column.create(v) for k, v in data.items() } self._num_rows = num_rows # infer num_rows & sanity check for name, col in self._columns.items(): if isinstance(col, LazyFeature): continue if self._num_rows is None: self._num_rows = len(col) elif len(col) != self._num_rows: raise DGLError( "Expected all columns to have same # rows (%d), " "got %d on %r." % (self._num_rows, len(col), name) ) # Initializer for empty values. Initializer is a callable. # If is none, then a warning will be raised # in the first call and zero initializer will be used later. self._initializers = {} # per-column initializers self._default_initializer = None def _set_zero_default_initializer(self): """Set the default initializer to be zero initializer.""" self._default_initializer = zero_initializer def get_initializer(self, column=None): """Get the initializer for empty values for the given column. Parameters ---------- column : str The column Returns ------- callable The initializer """ return self._initializers.get(column, self._default_initializer) def set_initializer(self, initializer, column=None): """Set the initializer for empty values, for a given column or all future columns. Initializer is a callable that returns a tensor given the shape and data type. Parameters ---------- initializer : callable The initializer. column : str, optional The column name """ if column is None: self._default_initializer = initializer else: self._initializers[column] = initializer @property def schemes(self): """Return a dictionary of column name to column schemes.""" return {k: col.scheme for k, col in self._columns.items()} @property def num_columns(self): """Return the number of columns in this frame.""" return len(self._columns) @property def num_rows(self): """Return the number of rows in this frame.""" return self._num_rows def __contains__(self, name): """Return true if the given column name exists.""" return name in self._columns def __getitem__(self, name): """Return the column of the given name. Parameters ---------- name : str The column name. Returns ------- Tensor Column data. """ return self._columns[name].data def __setitem__(self, name, data): """Update the whole column. Parameters ---------- name : str The column name. col : Column or data convertible to Column The column data. """ self.update_column(name, data) def __delitem__(self, name): """Delete the whole column. Parameters ---------- name : str The column name. """ del self._columns[name] def add_column(self, name, scheme, ctx): """Add a new column to the frame. The frame will be initialized by the initializer. Parameters ---------- name : str The column name. scheme : Scheme The column scheme. ctx : DGLContext The column context. """ if name in self: dgl_warning( 'Column "%s" already exists. Ignore adding this column again.' % name ) return if self.get_initializer(name) is None: self._set_zero_default_initializer() initializer = self.get_initializer(name) init_data = initializer( (self.num_rows,) + scheme.shape, scheme.dtype, ctx, slice(0, self.num_rows), ) self._columns[name] = Column(init_data, scheme) def add_rows(self, num_rows): """Add blank rows to this frame. For existing fields, the rows will be extended according to their initializers. Parameters ---------- num_rows : int The number of new rows """ feat_placeholders = {} for key, col in self._columns.items(): scheme = col.scheme ctx = F.context(col.data) if self.get_initializer(key) is None: self._set_zero_default_initializer() initializer = self.get_initializer(key) new_data = initializer( (num_rows,) + scheme.shape, scheme.dtype, ctx, slice(self._num_rows, self._num_rows + num_rows), ) feat_placeholders[key] = new_data self._append(Frame(feat_placeholders)) self._num_rows += num_rows def update_column(self, name, data): """Add or replace the column with the given name and data. Parameters ---------- name : str The column name. data : Column or data convertible to Column The column data. """ if isinstance(data, LazyFeature): self._columns[name] = data return col = Column.create(data) if len(col) != self.num_rows: raise DGLError( "Expected data to have %d rows, got %d." % (self.num_rows, len(col)) ) self._columns[name] = col def update_row(self, rowids, data): """Update the feature data of the given rows. If the data contains new keys (new columns) that do not exist in this frame, add a new column. The ``rowids`` shall not contain duplicates. Otherwise, the behavior is undefined. Parameters ---------- rowids : Tensor Row Ids. data : dict[str, Tensor] Row data. """ for key, val in data.items(): if key not in self: scheme = infer_scheme(val) ctx = F.context(val) self.add_column(key, scheme, ctx) for key, val in data.items(): self._columns[key].update(rowids, val) def _append(self, other): """Append ``other`` frame to ``self`` frame.""" # pad columns that are not provided in the other frame with initial values for key, col in self._columns.items(): if key in other: continue scheme = col.scheme ctx = F.context(col.data) if self.get_initializer(key) is None: self._set_zero_default_initializer() initializer = self.get_initializer(key) new_data = initializer( (other.num_rows,) + scheme.shape, scheme.dtype, ctx, slice(self._num_rows, self._num_rows + other.num_rows), ) other[key] = new_data # append other to self for key, col in other._columns.items(): if key not in self._columns: # the column does not exist; init a new column self.add_column(key, col.scheme, F.context(col.data)) self._columns[key].extend(col.data, col.scheme) def append(self, other): """Append another frame's data into this frame. If the current frame is empty, it will just use the columns of the given frame. Otherwise, the given data should contain all the column keys of this frame. Parameters ---------- other : Frame or dict-like The frame data to be appended. """ if not isinstance(other, Frame): other = Frame(other) self._append(other) self._num_rows += other.num_rows def clear(self): """Clear this frame. Remove all the columns.""" self._columns = {} self._num_rows = 0 def __iter__(self): """Return an iterator of columns.""" return iter(self._columns) def __len__(self): """Return the number of columns.""" return self.num_columns def keys(self): """Return the keys.""" return self._columns.keys() def values(self): """Return the values.""" return self._columns.values() def clone(self): """Return a clone of this frame. The clone frame does not share the underlying storage with this frame, i.e., adding or removing columns will not be visible to each other. However, they still share the tensor contents so any mutable operation on the column tensor are visible to each other. Hence, the function does not allocate extra tensor memory. Use :func:`~dgl.Frame.deepclone` for cloning a frame that does not share any data. Returns ------- Frame A cloned frame. """ newframe = Frame(self._columns, self._num_rows) newframe._initializers = self._initializers newframe._default_initializer = self._default_initializer return newframe def deepclone(self): """Return a deep clone of this frame. The clone frame has an copy of this frame and any modification to the clone frame is not visible to this frame. The function allocate new tensors and copy the contents from this frame. Use :func:`~dgl.Frame.clone` for cloning a frame that does not allocate extra tensor memory. Returns ------- Frame A deep-cloned frame. """ newframe = Frame( {k: col.deepclone() for k, col in self._columns.items()}, self._num_rows, ) newframe._initializers = self._initializers newframe._default_initializer = self._default_initializer return newframe def subframe(self, rowids): """Return a new frame whose columns are subcolumns of this frame. The given row IDs should be within range [0, self.num_rows), and allow duplicate IDs. Parameters ---------- rowids : Tensor Row IDs Returns ------- Frame A new subframe. """ subcols = {k: col.subcolumn(rowids) for k, col in self._columns.items()} subf = Frame(subcols, len(rowids)) subf._initializers = self._initializers subf._default_initializer = self._default_initializer return subf def to(self, device, **kwargs): # pylint: disable=invalid-name """Return a new frame with columns copy to the targeted device (cpu/gpu). Parameters ---------- device : Framework-specific device context object The context to move data to. kwargs : Key-word arguments. Key-word arguments fed to the framework copy function. Returns ------- Frame A new frame """ newframe = self.clone() new_columns = { key: col.to(device, **kwargs) for key, col in newframe._columns.items() } newframe._columns = new_columns return newframe def __repr__(self): return repr(dict(self)) def pin_memory_(self): """Registers the data of every column into pinned memory, materializing them if necessary.""" for column in self._columns.values(): column.pin_memory_() def unpin_memory_(self): """Unregisters the data of every column from pinned memory, materializing them if necessary.""" for column in self._columns.values(): column.unpin_memory_() def record_stream(self, stream): """Record stream that is using the data of every column, materializing them if necessary.""" for column in self._columns.values(): column.record_stream(stream) def _astype_float(self, new_type): assert new_type in [ F.float64, F.float32, F.float16, F.bfloat16, ], "'new_type' must be floating-point type: %s" % str(new_type) newframe = self.clone() new_columns = {} for name, column in self._columns.items(): dtype = column.dtype if dtype != new_type and dtype in [ F.float64, F.float32, F.float16, F.bfloat16, ]: new_columns[name] = column.astype(new_type) else: new_columns[name] = column newframe._columns = new_columns return newframe def bfloat16(self): """Return a new frame with all floating-point columns converted to bfloat16""" return self._astype_float(F.bfloat16) def half(self): """Return a new frame with all floating-point columns converted to half-precision (float16)""" return self._astype_float(F.float16) def float(self): """Return a new frame with all floating-point columns converted to single-precision (float32)""" return self._astype_float(F.float32) def double(self): """Return a new frame with all floating-point columns converted to double-precision (float64)""" return self._astype_float(F.float64) ================================================ FILE: python/dgl/function/__init__.py ================================================ """DGL builtin functors""" # pylint: disable=redefined-builtin from __future__ import absolute_import from .base import * from .message import * from .reducer import * ================================================ FILE: python/dgl/function/base.py ================================================ """Built-in function base class""" from __future__ import absolute_import __all__ = ["BuiltinFunction", "TargetCode"] class TargetCode(object): """Code for target Note: must be consistent with the target code definition in C++ side: src/kernel/binary_reduce_common.h """ SRC = 0 DST = 1 EDGE = 2 CODE2STR = { 0: "u", 1: "v", 2: "e", } class BuiltinFunction(object): """Base builtin function class.""" @property def name(self): """Return the name of this builtin function.""" raise NotImplementedError ================================================ FILE: python/dgl/function/message.py ================================================ """Built-in message function.""" from __future__ import absolute_import import sys from itertools import product from .base import BuiltinFunction, TargetCode __all__ = ["copy_u", "copy_e", "BinaryMessageFunction", "CopyMessageFunction"] class MessageFunction(BuiltinFunction): """Base builtin message function class.""" @property def name(self): """Return the name of this builtin function.""" raise NotImplementedError class BinaryMessageFunction(MessageFunction): """Class for the lhs_op_rhs builtin message function. See Also -------- u_mul_e """ def __init__(self, binary_op, lhs, rhs, lhs_field, rhs_field, out_field): self.binary_op = binary_op self.lhs = lhs self.rhs = rhs self.lhs_field = lhs_field self.rhs_field = rhs_field self.out_field = out_field @property def name(self): lhs = TargetCode.CODE2STR[self.lhs] rhs = TargetCode.CODE2STR[self.rhs] return "{}_{}_{}".format(lhs, self.binary_op, rhs) class CopyMessageFunction(MessageFunction): """Class for the copy builtin message function. See Also -------- copy_u """ def __init__(self, target, in_field, out_field): self.target = target self.in_field = in_field self.out_field = out_field @property def name(self): return "copy_{}".format(TargetCode.CODE2STR[self.target]) def copy_u(u, out): """Builtin message function that computes message using source node feature. Parameters ---------- u : str The source feature field. out : str The output message field. Examples -------- >>> import dgl >>> message_func = dgl.function.copy_u('h', 'm') The above example is equivalent to the following user defined function: >>> def message_func(edges): >>> return {'m': edges.src['h']} """ return CopyMessageFunction(TargetCode.SRC, u, out) def copy_e(e, out): """Builtin message function that computes message using edge feature. Parameters ---------- e : str The edge feature field. out : str The output message field. Examples -------- >>> import dgl >>> message_func = dgl.function.copy_e('h', 'm') The above example is equivalent to the following user defined function: >>> def message_func(edges): >>> return {'m': edges.data['h']} """ return CopyMessageFunction(TargetCode.EDGE, e, out) ############################################################################### # Generate all following builtin message functions: # element-wise message functions: # u_add_v, u_sub_v, u_mul_v, u_div_v # u_add_e, u_sub_e, u_mul_e, u_div_e # v_add_u, v_sub_u, v_mul_u, v_div_u # v_add_e, v_sub_e, v_mul_e, v_div_e # e_add_u, e_sub_u, e_mul_u, e_div_u # e_add_v, e_sub_v, e_mul_v, e_div_v # # dot message functions: # u_dot_v, u_dot_e, v_dot_e # v_dot_u, e_dot_u, e_dot_v _TARGET_MAP = { "u": TargetCode.SRC, "v": TargetCode.DST, "e": TargetCode.EDGE, } def _gen_message_builtin(lhs, rhs, binary_op): name = "{}_{}_{}".format(lhs, binary_op, rhs) docstring = """Builtin message function that computes a message on an edge by performing element-wise {} between features of {} and {} if the features have the same shape; otherwise, it first broadcasts the features to a new shape and performs the element-wise operation. Broadcasting follows NumPy semantics. Please see https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html for more details about the NumPy broadcasting semantics. Parameters ---------- lhs_field : str The feature field of {}. rhs_field : str The feature field of {}. out : str The output message field. Examples -------- >>> import dgl >>> message_func = dgl.function.{}('h', 'h', 'm') """.format( binary_op, TargetCode.CODE2STR[_TARGET_MAP[lhs]], TargetCode.CODE2STR[_TARGET_MAP[rhs]], TargetCode.CODE2STR[_TARGET_MAP[lhs]], TargetCode.CODE2STR[_TARGET_MAP[rhs]], name, ) def func(lhs_field, rhs_field, out): return BinaryMessageFunction( binary_op, _TARGET_MAP[lhs], _TARGET_MAP[rhs], lhs_field, rhs_field, out, ) func.__name__ = name func.__doc__ = docstring return func def _register_builtin_message_func(): """Register builtin message functions""" target = ["u", "v", "e"] for lhs, rhs in product(target, target): if lhs != rhs: for binary_op in ["add", "sub", "mul", "div", "dot"]: func = _gen_message_builtin(lhs, rhs, binary_op) setattr(sys.modules[__name__], func.__name__, func) __all__.append(func.__name__) _register_builtin_message_func() ================================================ FILE: python/dgl/function/reducer.py ================================================ """Built-in reducer function.""" # pylint: disable=redefined-builtin from __future__ import absolute_import import sys from .base import BuiltinFunction class ReduceFunction(BuiltinFunction): """Base builtin reduce function class.""" @property def name(self): """Return the name of this builtin function.""" raise NotImplementedError class SimpleReduceFunction(ReduceFunction): """Builtin reduce function that aggregates a single field into another single field.""" def __init__(self, name, msg_field, out_field): self._name = name self.msg_field = msg_field self.out_field = out_field @property def name(self): return self._name ############################################################################### # Generate all following reducer functions: # sum, max, min, mean, prod def _gen_reduce_builtin(reducer): docstring = """Builtin reduce function that aggregates messages by {0}. Parameters ---------- msg : str The message field. out : str The output node feature field. Examples -------- >>> import dgl >>> reduce_func = dgl.function.{0}('m', 'h') The above example is equivalent to the following user defined function (if using PyTorch): >>> import torch >>> def reduce_func(nodes): >>> return {{'h': torch.{0}(nodes.mailbox['m'], dim=1)}} """.format( reducer ) def func(msg, out): return SimpleReduceFunction(reducer, msg, out) func.__name__ = str(reducer) func.__qualname__ = str(reducer) func.__doc__ = docstring return func __all__ = [] def _register_builtin_reduce_func(): """Register builtin reduce functions""" for reduce_op in ["max", "min", "sum", "mean"]: builtin = _gen_reduce_builtin(reduce_op) setattr(sys.modules[__name__], reduce_op, builtin) __all__.append(reduce_op) _register_builtin_reduce_func() ================================================ FILE: python/dgl/generators.py ================================================ """Module for various graph generator functions.""" from . import backend as F, convert, random __all__ = ["rand_graph", "rand_bipartite"] def rand_graph(num_nodes, num_edges, idtype=F.int64, device=F.cpu()): """Generate a random graph of the given number of nodes/edges and return. It uniformly chooses ``num_edges`` from all possible node pairs and form a graph. The random choice is without replacement, which means there will be no multi-edge in the resulting graph. To control the randomness, set the random seed via :func:`dgl.seed`. Parameters ---------- num_nodes : int The number of nodes num_edges : int The number of edges idtype : int32, int64, optional The data type for storing the structure-related graph information such as node and edge IDs. It should be a framework-specific data type object (e.g., torch.int32). By default, DGL uses int64. device : Device context, optional The device of the resulting graph. It should be a framework-specific device object (e.g., torch.device). By default, DGL stores the graph on CPU. Returns ------- DGLGraph The generated random graph. See Also -------- rand_bipartite Examples -------- >>> import dgl >>> dgl.rand_graph(100, 10) Graph(num_nodes=100, num_edges=10, ndata_schemes={} edata_schemes={}) """ # TODO(minjie): support RNG as one of the arguments. eids = random.choice(num_nodes * num_nodes, num_edges, replace=False) eids = F.zerocopy_to_numpy(eids) rows = F.zerocopy_from_numpy(eids // num_nodes) cols = F.zerocopy_from_numpy(eids % num_nodes) rows = F.copy_to(F.astype(rows, idtype), device) cols = F.copy_to(F.astype(cols, idtype), device) return convert.graph( (rows, cols), num_nodes=num_nodes, idtype=idtype, device=device ) def rand_bipartite( utype, etype, vtype, num_src_nodes, num_dst_nodes, num_edges, idtype=F.int64, device=F.cpu(), ): """Generate a random uni-directional bipartite graph and return. It uniformly chooses ``num_edges`` from all possible node pairs and form a graph. The random choice is without replacement, which means there will be no multi-edge in the resulting graph. To control the randomness, set the random seed via :func:`dgl.seed`. Parameters ---------- utype : str, optional The name of the source node type. etype : str, optional The name of the edge type. vtype : str, optional The name of the destination node type. num_src_nodes : int The number of source nodes. num_dst_nodes : int The number of destination nodes. num_edges : int The number of edges idtype : int32, int64, optional The data type for storing the structure-related graph information such as node and edge IDs. It should be a framework-specific data type object (e.g., torch.int32). By default, DGL uses int64. device : Device context, optional The device of the resulting graph. It should be a framework-specific device object (e.g., torch.device). By default, DGL stores the graph on CPU. Returns ------- DGLGraph The generated random bipartite graph. See Also -------- rand_graph Examples -------- >>> import dgl >>> dgl.rand_bipartite('user', 'buys', 'game', 50, 100, 10) Graph(num_nodes={'game': 100, 'user': 50}, num_edges={('user', 'buys', 'game'): 10}, metagraph=[('user', 'game', 'buys')]) """ # TODO(minjie): support RNG as one of the arguments. eids = random.choice( num_src_nodes * num_dst_nodes, num_edges, replace=False ) eids = F.zerocopy_to_numpy(eids) rows = F.zerocopy_from_numpy(eids // num_dst_nodes) cols = F.zerocopy_from_numpy(eids % num_dst_nodes) rows = F.copy_to(F.astype(rows, idtype), device) cols = F.copy_to(F.astype(cols, idtype), device) return convert.heterograph( {(utype, etype, vtype): (rows, cols)}, {utype: num_src_nodes, vtype: num_dst_nodes}, idtype=idtype, device=device, ) ================================================ FILE: python/dgl/geometry/__init__.py ================================================ """The ``dgl.geometry`` package contains geometry operations: * Farthest point sampling for point cloud sampling * Neighbor matching module for graclus pooling .. note:: This package is experimental and the interfaces may be subject to changes in future releases. """ from .edge_coarsening import * from .fps import * ================================================ FILE: python/dgl/geometry/capi.py ================================================ """Python interfaces to DGL farthest point sampler.""" import numpy as np from .. import backend as F, ndarray as nd from .._ffi.base import DGLError from .._ffi.function import _init_api def _farthest_point_sampler( data, batch_size, sample_points, dist, start_idx, result ): r"""Farthest Point Sampler Parameters ---------- data : tensor A tensor of shape (N, d) where N is the number of points and d is the dimension. batch_size : int The number of batches in the ``data``. N should be divisible by batch_size. sample_points : int The number of points to sample in each batch. dist : tensor Pre-allocated tensor of shape (N, ) for to-sample distance. start_idx : tensor of int Pre-allocated tensor of shape (batch_size, ) for the starting sample in each batch. result : tensor of int Pre-allocated tensor of shape (sample_points * batch_size, ) for the sampled index. Returns ------- No return value. The input variable ``result`` will be overwriten with sampled indices. """ assert F.shape(data)[0] >= sample_points * batch_size assert F.shape(data)[0] % batch_size == 0 _CAPI_FarthestPointSampler( F.zerocopy_to_dgl_ndarray(data), batch_size, sample_points, F.zerocopy_to_dgl_ndarray(dist), F.zerocopy_to_dgl_ndarray(start_idx), F.zerocopy_to_dgl_ndarray(result), ) def _neighbor_matching( graph_idx, num_nodes, edge_weights=None, relabel_idx=True ): """ Description ----------- The neighbor matching procedure of edge coarsening used in `Metis `__ and `Graclus `__ for homogeneous graph coarsening. This procedure keeps picking an unmarked vertex and matching it with one its unmarked neighbors (that maximizes its edge weight) until no match can be done. If no edge weight is given, this procedure will randomly pick neighbor for each vertex. The GPU implementation is based on `A GPU Algorithm for Greedy Graph Matching `__ NOTE: The input graph must be bi-directed (undirected) graph. Call :obj:`dgl.to_bidirected` if you are not sure your graph is bi-directed. Parameters ---------- graph : HeteroGraphIndex The input homogeneous graph. num_nodes : int The number of nodes in this homogeneous graph. edge_weight : tensor, optional The edge weight tensor holding non-negative scalar weight for each edge. default: :obj:`None` relabel_idx : bool, optional If true, relabel resulting node labels to have consecutive node ids. default: :obj:`True` Returns ------- a 1-D tensor A vector with each element that indicates the cluster ID of a vertex. """ edge_weight_capi = nd.NULL["int64"] if edge_weights is not None: edge_weight_capi = F.zerocopy_to_dgl_ndarray(edge_weights) node_label = F.full_1d( num_nodes, -1, getattr(F, graph_idx.dtype), F.to_backend_ctx(graph_idx.ctx), ) node_label_capi = F.zerocopy_to_dgl_ndarray_for_write(node_label) _CAPI_NeighborMatching(graph_idx, edge_weight_capi, node_label_capi) if F.reduce_sum(node_label < 0).item() != 0: raise DGLError("Find unmatched node") # reorder node id # TODO: actually we can add `return_inverse` option for `unique` # function in backend for efficiency. if relabel_idx: node_label_np = F.zerocopy_to_numpy(node_label) _, node_label_np = np.unique(node_label_np, return_inverse=True) return F.tensor(node_label_np) else: return node_label _init_api("dgl.geometry", __name__) ================================================ FILE: python/dgl/geometry/edge_coarsening.py ================================================ """Edge coarsening procedure used in Metis and Graclus, for pytorch""" # pylint: disable=no-member, invalid-name, W0613 from .. import remove_self_loop from .capi import _neighbor_matching __all__ = ["neighbor_matching"] def neighbor_matching(graph, e_weights=None, relabel_idx=True): r""" Description ----------- The neighbor matching procedure of edge coarsening in `Metis `__ and `Graclus `__ for homogeneous graph coarsening. This procedure keeps picking an unmarked vertex and matching it with one its unmarked neighbors (that maximizes its edge weight) until no match can be done. If no edge weight is given, this procedure will randomly pick neighbor for each vertex. The GPU implementation is based on `A GPU Algorithm for Greedy Graph Matching `__ NOTE: The input graph must be bi-directed (undirected) graph. Call :obj:`dgl.to_bidirected` if you are not sure your graph is bi-directed. Parameters ---------- graph : DGLGraph The input homogeneous graph. edge_weight : torch.Tensor, optional The edge weight tensor holding non-negative scalar weight for each edge. default: :obj:`None` relabel_idx : bool, optional If true, relabel resulting node labels to have consecutive node ids. default: :obj:`True` Examples -------- The following example uses PyTorch backend. >>> import torch, dgl >>> from dgl.geometry import neighbor_matching >>> >>> g = dgl.graph(([0, 1, 1, 2], [1, 0, 2, 1])) >>> res = neighbor_matching(g) tensor([0, 1, 1]) """ assert ( graph.is_homogeneous ), "The graph used in graph node matching must be homogeneous" if e_weights is not None: graph.edata["e_weights"] = e_weights graph = remove_self_loop(graph) e_weights = graph.edata["e_weights"] graph.edata.pop("e_weights") else: graph = remove_self_loop(graph) return _neighbor_matching( graph._graph, graph.num_nodes(), e_weights, relabel_idx ) ================================================ FILE: python/dgl/geometry/fps.py ================================================ """Farthest Point Sampler for pytorch Geometry package""" # pylint: disable=no-member, invalid-name from .. import backend as F from ..base import DGLError from .capi import _farthest_point_sampler __all__ = ["farthest_point_sampler"] def farthest_point_sampler(pos, npoints, start_idx=None): """Farthest Point Sampler without the need to compute all pairs of distance. In each batch, the algorithm starts with the sample index specified by ``start_idx``. Then for each point, we maintain the minimum to-sample distance. Finally, we pick the point with the maximum such distance. This process will be repeated for ``sample_points`` - 1 times. Parameters ---------- pos : tensor The positional tensor of shape (B, N, C) npoints : int The number of points to sample in each batch. start_idx : int, optional If given, appoint the index of the starting point, otherwise randomly select a point as the start point. (default: None) Returns ------- tensor of shape (B, npoints) The sampled indices in each batch. Examples -------- The following exmaple uses PyTorch backend. >>> import torch >>> from dgl.geometry import farthest_point_sampler >>> x = torch.rand((2, 10, 3)) >>> point_idx = farthest_point_sampler(x, 2) >>> print(point_idx) tensor([[5, 6], [7, 8]]) """ ctx = F.context(pos) B, N, C = pos.shape pos = pos.reshape(-1, C) dist = F.zeros((B * N), dtype=pos.dtype, ctx=ctx) if start_idx is None: start_idx = F.randint( shape=(B,), dtype=F.int64, ctx=ctx, low=0, high=N - 1 ) else: if start_idx >= N or start_idx < 0: raise DGLError( "Invalid start_idx, expected 0 <= start_idx < {}, got {}".format( N, start_idx ) ) start_idx = F.full_1d(B, start_idx, dtype=F.int64, ctx=ctx) result = F.zeros((npoints * B), dtype=F.int64, ctx=ctx) _farthest_point_sampler(pos, B, npoints, dist, start_idx, result) return result.reshape(B, npoints) ================================================ FILE: python/dgl/global_config.py ================================================ """Module for global configuration operators.""" from ._ffi.function import _init_api __all__ = ["is_libxsmm_enabled", "use_libxsmm"] def use_libxsmm(flag): r"""Set whether DGL uses libxsmm at runtime. Detailed information about libxsmm can be found here: https://github.com/libxsmm/libxsmm Parameters ---------- flag : boolean If True, use libxsmm, otherwise not. See Also -------- is_libxsmm_enabled """ _CAPI_DGLConfigSetLibxsmm(flag) def is_libxsmm_enabled(): r"""Get whether the use_libxsmm flag is turned on. Returns ---------- use_libxsmm_flag[boolean] True if the use_libxsmm flag is turned on. See Also ---------- use_libxsmm """ return _CAPI_DGLConfigGetLibxsmm() _init_api("dgl.global_config") ================================================ FILE: python/dgl/graph_index.py ================================================ """Module for graph index class definition.""" from __future__ import absolute_import import networkx as nx import numpy as np import scipy from . import backend as F, utils from ._ffi.function import _init_api from ._ffi.object import ObjectBase, register_object from .base import dgl_warning, DGLError class BoolFlag(object): """Bool flag with unknown value""" BOOL_UNKNOWN = -1 BOOL_FALSE = 0 BOOL_TRUE = 1 @register_object("graph.Graph") class GraphIndex(ObjectBase): """Graph index object. Note ---- Do not create GraphIndex directly, you can create graph index object using following functions: - `dgl.graph_index.from_edge_list` - `dgl.graph_index.from_scipy_sparse_matrix` - `dgl.graph_index.from_networkx` - `dgl.graph_index.from_shared_mem_csr_matrix` - `dgl.graph_index.from_csr` - `dgl.graph_index.from_coo` """ def __new__(cls): obj = ObjectBase.__new__(cls) obj._readonly = None # python-side cache of the flag obj._cache = {} return obj def __getstate__(self): src, dst, _ = self.edges() n_nodes = self.num_nodes() readonly = self.is_readonly() return n_nodes, readonly, src, dst def __setstate__(self, state): """The pickle state of GraphIndex is defined as a triplet (num_nodes, readonly, src_nodes, dst_nodes) """ # Pickle compatibility check # TODO: we should store a storage version number in later releases. if isinstance(state, tuple) and len(state) == 5: dgl_warning( "The object is pickled pre-0.4.2. Multigraph flag is ignored in 0.4.3" ) num_nodes, _, readonly, src, dst = state elif isinstance(state, tuple) and len(state) == 4: # post-0.4.3. num_nodes, readonly, src, dst = state else: raise IOError("Unrecognized storage format.") self._cache = {} self._readonly = readonly self.__init_handle_by_constructor__( _CAPI_DGLGraphCreate, src.todgltensor(), dst.todgltensor(), int(num_nodes), readonly, ) def add_nodes(self, num): """Add nodes. Parameters ---------- num : int Number of nodes to be added. """ _CAPI_DGLGraphAddVertices(self, int(num)) self.clear_cache() def add_edge(self, u, v): """Add one edge. Parameters ---------- u : int The src node. v : int The dst node. """ _CAPI_DGLGraphAddEdge(self, int(u), int(v)) self.clear_cache() def add_edges(self, u, v): """Add many edges. Parameters ---------- u : utils.Index The src nodes. v : utils.Index The dst nodes. """ u_array = u.todgltensor() v_array = v.todgltensor() _CAPI_DGLGraphAddEdges(self, u_array, v_array) self.clear_cache() def clear(self): """Clear the graph.""" _CAPI_DGLGraphClear(self) self.clear_cache() def clear_cache(self): """Clear the cached graph structures.""" self._cache.clear() def is_multigraph(self): """Return whether the graph is a multigraph The time cost will be O(E) Returns ------- bool True if it is a multigraph, False otherwise. """ return bool(_CAPI_DGLGraphIsMultigraph(self)) def is_readonly(self): """Indicate whether the graph index is read-only. Returns ------- bool True if it is a read-only graph, False otherwise. """ if self._readonly is None: self._readonly = bool(_CAPI_DGLGraphIsReadonly(self)) return self._readonly def readonly(self, readonly_state=True): """Set the readonly state of graph index in-place. Parameters ---------- readonly_state : bool New readonly state of current graph index. """ # TODO(minjie): very ugly code, should fix this n_nodes, _, src, dst = self.__getstate__() self.clear_cache() state = (n_nodes, readonly_state, src, dst) self.__setstate__(state) def num_nodes(self): """Return the number of nodes. Returns ------- int The number of nodes. """ return _CAPI_DGLGraphNumVertices(self) def num_edges(self): """Return the number of edges. Returns ------- int The number of edges. """ return _CAPI_DGLGraphNumEdges(self) # TODO(#5485): remove this method. def number_of_nodes(self): """Return the number of nodes. Returns ------- int The number of nodes """ return _CAPI_DGLGraphNumVertices(self) # TODO(#5485): remove this method. def number_of_edges(self): """Return the number of edges. Returns ------- int The number of edges """ return _CAPI_DGLGraphNumEdges(self) def has_node(self, vid): """Return true if the node exists. Parameters ---------- vid : int The nodes Returns ------- bool True if the node exists, False otherwise. """ return bool(_CAPI_DGLGraphHasVertex(self, int(vid))) def has_nodes(self, vids): """Return true if the nodes exist. Parameters ---------- vid : utils.Index The nodes Returns ------- utils.Index 0-1 array indicating existence """ vid_array = vids.todgltensor() return utils.toindex(_CAPI_DGLGraphHasVertices(self, vid_array)) def has_edge_between(self, u, v): """Return true if the edge exists. Parameters ---------- u : int The src node. v : int The dst node. Returns ------- bool True if the edge exists, False otherwise """ return bool(_CAPI_DGLGraphHasEdgeBetween(self, int(u), int(v))) def has_edges_between(self, u, v): """Return true if the edge exists. Parameters ---------- u : utils.Index The src nodes. v : utils.Index The dst nodes. Returns ------- utils.Index 0-1 array indicating existence """ u_array = u.todgltensor() v_array = v.todgltensor() return utils.toindex( _CAPI_DGLGraphHasEdgesBetween(self, u_array, v_array) ) def predecessors(self, v, radius=1): """Return the predecessors of the node. Parameters ---------- v : int The node. radius : int, optional The radius of the neighborhood. Returns ------- utils.Index Array of predecessors """ return utils.toindex( _CAPI_DGLGraphPredecessors(self, int(v), int(radius)) ) def successors(self, v, radius=1): """Return the successors of the node. Parameters ---------- v : int The node. radius : int, optional The radius of the neighborhood. Returns ------- utils.Index Array of successors """ return utils.toindex( _CAPI_DGLGraphSuccessors(self, int(v), int(radius)) ) def edge_id(self, u, v): """Return the id array of all edges between u and v. Parameters ---------- u : int The src node. v : int The dst node. Returns ------- utils.Index The edge id array. """ return utils.toindex(_CAPI_DGLGraphEdgeId(self, int(u), int(v))) def edge_ids(self, u, v): """Return a triplet of arrays that contains the edge IDs. Parameters ---------- u : utils.Index The src nodes. v : utils.Index The dst nodes. Returns ------- utils.Index The src nodes. utils.Index The dst nodes. utils.Index The edge ids. """ u_array = u.todgltensor() v_array = v.todgltensor() edge_array = _CAPI_DGLGraphEdgeIds(self, u_array, v_array) src = utils.toindex(edge_array(0)) dst = utils.toindex(edge_array(1)) eid = utils.toindex(edge_array(2)) return src, dst, eid def find_edge(self, eid): """Return the edge tuple of the given id. Parameters ---------- eid : int The edge id. Returns ------- int src node id int dst node id """ ret = _CAPI_DGLGraphFindEdge(self, int(eid)) return ret(0), ret(1) def find_edges(self, eid): """Return a triplet of arrays that contains the edge IDs. Parameters ---------- eid : utils.Index The edge ids. Returns ------- utils.Index The src nodes. utils.Index The dst nodes. utils.Index The edge ids. """ eid_array = eid.todgltensor() edge_array = _CAPI_DGLGraphFindEdges(self, eid_array) src = utils.toindex(edge_array(0)) dst = utils.toindex(edge_array(1)) eid = utils.toindex(edge_array(2)) return src, dst, eid def in_edges(self, v): """Return the in edges of the node(s). Parameters ---------- v : utils.Index The node(s). Returns ------- utils.Index The src nodes. utils.Index The dst nodes. utils.Index The edge ids. """ if len(v) == 1: edge_array = _CAPI_DGLGraphInEdges_1(self, int(v[0])) else: v_array = v.todgltensor() edge_array = _CAPI_DGLGraphInEdges_2(self, v_array) src = utils.toindex(edge_array(0)) dst = utils.toindex(edge_array(1)) eid = utils.toindex(edge_array(2)) return src, dst, eid def out_edges(self, v): """Return the out edges of the node(s). Parameters ---------- v : utils.Index The node(s). Returns ------- utils.Index The src nodes. utils.Index The dst nodes. utils.Index The edge ids. """ if len(v) == 1: edge_array = _CAPI_DGLGraphOutEdges_1(self, int(v[0])) else: v_array = v.todgltensor() edge_array = _CAPI_DGLGraphOutEdges_2(self, v_array) src = utils.toindex(edge_array(0)) dst = utils.toindex(edge_array(1)) eid = utils.toindex(edge_array(2)) return src, dst, eid def sort_csr(self): """Sort the CSR matrix in the graph index. By default, when the CSR matrix is created, the edges may be stored in an arbitrary order. Sometimes, we want to sort them to accelerate some computation. For example, `has_edges_between` can be much faster on a giant adjacency matrix if the edges in the matrix is sorted. """ _CAPI_DGLSortAdj(self) @utils.cached_member(cache="_cache", prefix="edges") def edges(self, order=None): """Return all the edges Parameters ---------- order : string The order of the returned edges. Currently support: - 'srcdst' : sorted by their src and dst ids. - 'eid' : sorted by edge Ids. - None : the arbitrary order. Returns ------- utils.Index The src nodes. utils.Index The dst nodes. utils.Index The edge ids. """ if order is None: order = "" edge_array = _CAPI_DGLGraphEdges(self, order) src = edge_array(0) dst = edge_array(1) eid = edge_array(2) src = utils.toindex(src) dst = utils.toindex(dst) eid = utils.toindex(eid) return src, dst, eid def in_degree(self, v): """Return the in degree of the node. Parameters ---------- v : int The node. Returns ------- int The in degree. """ return _CAPI_DGLGraphInDegree(self, int(v)) def in_degrees(self, v): """Return the in degrees of the nodes. Parameters ---------- v : utils.Index The nodes. Returns ------- tensor The in degree array. """ v_array = v.todgltensor() return utils.toindex(_CAPI_DGLGraphInDegrees(self, v_array)) def out_degree(self, v): """Return the out degree of the node. Parameters ---------- v : int The node. Returns ------- int The out degree. """ return _CAPI_DGLGraphOutDegree(self, int(v)) def out_degrees(self, v): """Return the out degrees of the nodes. Parameters ---------- v : utils.Index The nodes. Returns ------- tensor The out degree array. """ v_array = v.todgltensor() return utils.toindex(_CAPI_DGLGraphOutDegrees(self, v_array)) def node_subgraph(self, v): """Return the induced node subgraph. Parameters ---------- v : utils.Index The nodes. Returns ------- SubgraphIndex The subgraph index. """ v_array = v.todgltensor() return _CAPI_DGLGraphVertexSubgraph(self, v_array) def node_halo_subgraph(self, v, num_hops): """Return an induced subgraph with halo nodes. Parameters ---------- v : utils.Index The nodes. num_hops : int The number of hops in which a HALO node can be accessed. Returns ------- SubgraphIndex The subgraph index. DGLTensor Indicate if a node belongs to a partition. DGLTensor Indicate if an edge belongs to a partition. """ v_array = v.todgltensor() subg = _CAPI_DGLGetSubgraphWithHalo(self, v_array, num_hops) inner_nodes = _CAPI_GetHaloSubgraphInnerNodes(subg) return subg, inner_nodes def node_subgraphs(self, vs_arr): """Return the induced node subgraphs. Parameters ---------- vs_arr : a list of utils.Index The nodes. Returns ------- a vector of SubgraphIndex The subgraph index. """ gis = [] for v in vs_arr: gis.append(self.node_subgraph(v)) return gis def edge_subgraph(self, e, preserve_nodes=False): """Return the induced edge subgraph. Parameters ---------- e : utils.Index The edges. preserve_nodes : bool Indicates whether to preserve all nodes or not. If true, keep the nodes which have no edge connected in the subgraph; If false, all nodes without edge connected to it would be removed. Returns ------- SubgraphIndex The subgraph index. """ e_array = e.todgltensor() return _CAPI_DGLGraphEdgeSubgraph(self, e_array, preserve_nodes) @utils.cached_member(cache="_cache", prefix="scipy_adj") def adjacency_matrix_scipy(self, transpose, fmt, return_edge_ids=None): """Return the scipy adjacency matrix representation of this graph. By default, a row of returned adjacency matrix represents the destination of an edge and the column represents the source. When transpose is True, a row represents the source and a column represents a destination. Parameters ---------- transpose : bool A flag to transpose the returned adjacency matrix. fmt : str Indicates the format of returned adjacency matrix. return_edge_ids : bool Indicates whether to return edge IDs or 1 as elements. Returns ------- scipy.sparse.spmatrix The scipy representation of adjacency matrix. """ if not isinstance(transpose, bool): raise DGLError( 'Expect bool value for "transpose" arg,' " but got %s." % (type(transpose)) ) if return_edge_ids is None: dgl_warning( "Adjacency matrix by default currently returns edge IDs." " As a result there is one 0 entry which is not eliminated." " In the next release it will return 1s by default," " and 0 will be eliminated otherwise.", FutureWarning, ) return_edge_ids = True rst = _CAPI_DGLGraphGetAdj(self, transpose, fmt) if fmt == "csr": indptr = utils.toindex(rst(0)).tonumpy() indices = utils.toindex(rst(1)).tonumpy() data = ( utils.toindex(rst(2)).tonumpy() if return_edge_ids else np.ones_like(indices) ) n = self.num_nodes() return scipy.sparse.csr_matrix( (data, indices, indptr), shape=(n, n) ) elif fmt == "coo": idx = utils.toindex(rst(0)).tonumpy() n = self.num_nodes() m = self.num_edges() row, col = np.reshape(idx, (2, m)) data = np.arange(0, m) if return_edge_ids else np.ones_like(row) return scipy.sparse.coo_matrix((data, (row, col)), shape=(n, n)) else: raise Exception("unknown format") @utils.cached_member(cache="_cache", prefix="immu_gidx") def get_immutable_gidx(self, ctx): """Create an immutable graph index and copy to the given device context. Note: this internal function is for DGL scheduler use only Parameters ---------- ctx : DGLContext The context of the returned graph. Returns ------- GraphIndex """ return self.to_immutable().asbits(self.bits_needed()).copy_to(ctx) def get_csr_shuffle_order(self): """Return the edge shuffling order when a coo graph is converted to csr format Returns ------- tuple of two utils.Index The first element of the tuple is the shuffle order for outward graph The second element of the tuple is the shuffle order for inward graph """ csr = _CAPI_DGLGraphGetAdj(self, True, "csr") order = csr(2) rev_csr = _CAPI_DGLGraphGetAdj(self, False, "csr") rev_order = rev_csr(2) return utils.toindex(order), utils.toindex(rev_order) def adjacency_matrix(self, transpose, ctx): """Return the adjacency matrix representation of this graph. By default, a row of returned adjacency matrix represents the destination of an edge and the column represents the source. When transpose is True, a row represents the source and a column represents a destination. Parameters ---------- transpose : bool A flag to transpose the returned adjacency matrix. ctx : context The context of the returned matrix. Returns ------- SparseTensor The adjacency matrix. utils.Index A index for data shuffling due to sparse format change. Return None if shuffle is not required. """ if not isinstance(transpose, bool): raise DGLError( 'Expect bool value for "transpose" arg,' " but got %s." % (type(transpose)) ) fmt = F.get_preferred_sparse_format() rst = _CAPI_DGLGraphGetAdj(self, transpose, fmt) if fmt == "csr": indptr = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx) indices = F.copy_to(utils.toindex(rst(1)).tousertensor(), ctx) shuffle = utils.toindex(rst(2)) dat = F.ones(indices.shape, dtype=F.float32, ctx=ctx) spmat = F.sparse_matrix( dat, ("csr", indices, indptr), (self.num_nodes(), self.num_nodes()), )[0] return spmat, shuffle elif fmt == "coo": ## FIXME(minjie): data type idx = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx) m = self.num_edges() idx = F.reshape(idx, (2, m)) dat = F.ones((m,), dtype=F.float32, ctx=ctx) n = self.num_nodes() adj, shuffle_idx = F.sparse_matrix(dat, ("coo", idx), (n, n)) shuffle_idx = ( utils.toindex(shuffle_idx) if shuffle_idx is not None else None ) return adj, shuffle_idx else: raise Exception("unknown format") def incidence_matrix(self, typestr, ctx): """Return the incidence matrix representation of this graph. An incidence matrix is an n x m sparse matrix, where n is the number of nodes and m is the number of edges. Each nnz value indicating whether the edge is incident to the node or not. There are three types of an incidence matrix `I`: * "in": - I[v, e] = 1 if e is the in-edge of v (or v is the dst node of e); - I[v, e] = 0 otherwise. * "out": - I[v, e] = 1 if e is the out-edge of v (or v is the src node of e); - I[v, e] = 0 otherwise. * "both": - I[v, e] = 1 if e is the in-edge of v; - I[v, e] = -1 if e is the out-edge of v; - I[v, e] = 0 otherwise (including self-loop). Parameters ---------- typestr : str Can be either "in", "out" or "both" ctx : context The context of returned incidence matrix. Returns ------- SparseTensor The incidence matrix. utils.Index A index for data shuffling due to sparse format change. Return None if shuffle is not required. """ src, dst, eid = self.edges() src = src.tousertensor(ctx) # the index of the ctx will be cached dst = dst.tousertensor(ctx) # the index of the ctx will be cached eid = eid.tousertensor(ctx) # the index of the ctx will be cached n = self.num_nodes() m = self.num_edges() if typestr == "in": row = F.unsqueeze(dst, 0) col = F.unsqueeze(eid, 0) idx = F.cat([row, col], dim=0) # FIXME(minjie): data type dat = F.ones((m,), dtype=F.float32, ctx=ctx) inc, shuffle_idx = F.sparse_matrix(dat, ("coo", idx), (n, m)) elif typestr == "out": row = F.unsqueeze(src, 0) col = F.unsqueeze(eid, 0) idx = F.cat([row, col], dim=0) # FIXME(minjie): data type dat = F.ones((m,), dtype=F.float32, ctx=ctx) inc, shuffle_idx = F.sparse_matrix(dat, ("coo", idx), (n, m)) elif typestr == "both": # first remove entries for self loops mask = F.logical_not(F.equal(src, dst)) src = F.boolean_mask(src, mask) dst = F.boolean_mask(dst, mask) eid = F.boolean_mask(eid, mask) n_entries = F.shape(src)[0] # create index row = F.unsqueeze(F.cat([src, dst], dim=0), 0) col = F.unsqueeze(F.cat([eid, eid], dim=0), 0) idx = F.cat([row, col], dim=0) # FIXME(minjie): data type x = -F.ones((n_entries,), dtype=F.float32, ctx=ctx) y = F.ones((n_entries,), dtype=F.float32, ctx=ctx) dat = F.cat([x, y], dim=0) inc, shuffle_idx = F.sparse_matrix(dat, ("coo", idx), (n, m)) else: raise DGLError("Invalid incidence matrix type: %s" % str(typestr)) shuffle_idx = ( utils.toindex(shuffle_idx) if shuffle_idx is not None else None ) return inc, shuffle_idx def to_networkx(self): """Convert to networkx graph. The edge id will be saved as the 'id' edge attribute. Returns ------- networkx.DiGraph The nx graph """ src, dst, eid = self.edges() # xiangsx: Always treat graph as multigraph ret = nx.MultiDiGraph() ret.add_nodes_from(range(self.num_nodes())) for u, v, e in zip(src, dst, eid): ret.add_edge(u, v, id=e) return ret def line_graph(self, backtracking=True): """Return the line graph of this graph. Parameters ---------- backtracking : bool, optional (default=False) Whether (i, j) ~ (j, i) in L(G). (i, j) ~ (j, i) is the behavior of networkx.line_graph. Returns ------- GraphIndex The line graph of this graph. """ return _CAPI_DGLGraphLineGraph(self, backtracking) def to_immutable(self): """Convert this graph index to an immutable one. Returns ------- GraphIndex An immutable graph index. """ return _CAPI_DGLToImmutable(self) def ctx(self): """Return the context of this graph index. Returns ------- DGLContext The context of the graph. """ return _CAPI_DGLGraphContext(self) @property def dtype(self): """Return the index dtype Returns ---------- str The dtype of graph index """ bits = self.nbits() if bits == 32: return "int32" else: return "int64" def copy_to(self, ctx): """Copy this immutable graph index to the given device context. NOTE: this method only works for immutable graph index Parameters ---------- ctx : DGLContext The target device context. Returns ------- GraphIndex The graph index on the given device context. """ return _CAPI_DGLImmutableGraphCopyTo( self, ctx.device_type, ctx.device_id ) def copyto_shared_mem(self, shared_mem_name): """Copy this immutable graph index to shared memory. NOTE: this method only works for immutable graph index Parameters ---------- shared_mem_name : string The name of the shared memory. Returns ------- GraphIndex The graph index on the given device context. """ return _CAPI_DGLImmutableGraphCopyToSharedMem(self, shared_mem_name) def nbits(self): """Return the number of integer bits used in the storage (32 or 64). Returns ------- int The number of bits. """ return _CAPI_DGLGraphNumBits(self) def bits_needed(self): """Return the number of integer bits needed to represent the graph Returns ------- int The number of bits needed """ if self.num_edges() >= 0x80000000 or self.num_nodes() >= 0x80000000: return 64 else: return 32 def asbits(self, bits): """Transform the graph to a new one with the given number of bits storage. NOTE: this method only works for immutable graph index Parameters ---------- bits : int The number of integer bits (32 or 64) Returns ------- GraphIndex The graph index stored using the given number of bits. """ return _CAPI_DGLImmutableGraphAsNumBits(self, int(bits)) @register_object("graph.Subgraph") class SubgraphIndex(ObjectBase): """Subgraph data structure""" @property def graph(self): """The subgraph structure Returns ------- GraphIndex The subgraph """ return _CAPI_DGLSubgraphGetGraph(self) @property def induced_nodes(self): """Induced nodes for each node type. The return list length should be equal to the number of node types. Returns ------- list of utils.Index Induced nodes """ ret = _CAPI_DGLSubgraphGetInducedVertices(self) return utils.toindex(ret) @property def induced_edges(self): """Induced edges for each edge type. The return list length should be equal to the number of edge types. Returns ------- list of utils.Index Induced edges """ ret = _CAPI_DGLSubgraphGetInducedEdges(self) return utils.toindex(ret) ############################################################### # Conversion functions ############################################################### def from_coo(num_nodes, src, dst, readonly): """Convert from coo arrays. Parameters ---------- num_nodes : int Number of nodes. src : Tensor Src end nodes of the edges. dst : Tensor Dst end nodes of the edges. readonly : bool True if the returned graph is readonly. Returns ------- GraphIndex The graph index. """ src = utils.toindex(src) dst = utils.toindex(dst) if readonly: gidx = _CAPI_DGLGraphCreate( src.todgltensor(), dst.todgltensor(), int(num_nodes), readonly ) else: gidx = _CAPI_DGLGraphCreateMutable() gidx.add_nodes(num_nodes) gidx.add_edges(src, dst) return gidx def from_csr(indptr, indices, direction): """Load a graph from CSR arrays. Parameters ---------- indptr : Tensor index pointer in the CSR format indices : Tensor column index array in the CSR format direction : str Returns ------ GraphIndex The graph index the edge direction. Either "in" or "out". """ indptr = utils.toindex(indptr) indices = utils.toindex(indices) gidx = _CAPI_DGLGraphCSRCreate( indptr.todgltensor(), indices.todgltensor(), direction ) return gidx def from_shared_mem_graph_index(shared_mem_name): """Load a graph index from the shared memory. Parameters ---------- shared_mem_name : string the name of shared memory Returns ------ GraphIndex The graph index """ return _CAPI_DGLGraphCSRCreateMMap(shared_mem_name) def from_networkx(nx_graph, readonly): """Convert from networkx graph. If 'id' edge attribute exists, the edge will be added follows the edge id order. Otherwise, order is undefined. Parameters ---------- nx_graph : networkx.DiGraph The nx graph or any graph that can be converted to nx.DiGraph readonly : bool True if the returned graph is readonly. Returns ------- GraphIndex The graph index. """ if not isinstance(nx_graph, nx.Graph): nx_graph = nx.DiGraph(nx_graph) else: if not nx_graph.is_directed(): # to_directed creates a deep copy of the networkx graph even if # the original graph is already directed and we do not want to do it. nx_graph = nx_graph.to_directed() num_nodes = nx_graph.number_of_nodes() # nx_graph.edges(data=True) returns src, dst, attr_dict if nx_graph.number_of_edges() > 0: has_edge_id = "id" in next(iter(nx_graph.edges(data=True)))[-1] else: has_edge_id = False if has_edge_id: num_edges = nx_graph.number_of_edges() src = np.zeros((num_edges,), dtype=np.int64) dst = np.zeros((num_edges,), dtype=np.int64) for u, v, attr in nx_graph.edges(data=True): eid = attr["id"] src[eid] = u dst[eid] = v else: src = [] dst = [] for e in nx_graph.edges: src.append(e[0]) dst.append(e[1]) num_nodes = nx_graph.number_of_nodes() # We store edge Ids as an edge attribute. src = utils.toindex(src) dst = utils.toindex(dst) return from_coo(num_nodes, src, dst, readonly) def from_scipy_sparse_matrix(adj, readonly): """Convert from scipy sparse matrix. Parameters ---------- adj : scipy sparse matrix readonly : bool True if the returned graph is readonly. Returns ------- GraphIndex The graph index. """ if adj.getformat() != "csr" or not readonly: num_nodes = max(adj.shape[0], adj.shape[1]) adj_coo = adj.tocoo() return from_coo(num_nodes, adj_coo.row, adj_coo.col, readonly) else: # If the input matrix is csr, we still treat it as multigraph. return from_csr(adj.indptr, adj.indices, "out") def from_edge_list(elist, readonly): """Convert from an edge list. Parameters --------- elist : list, tuple List of (u, v) edge tuple, or a tuple of src/dst lists """ if isinstance(elist, tuple): src, dst = elist else: src, dst = zip(*elist) src = np.asarray(src) dst = np.asarray(dst) src_ids = utils.toindex(src) dst_ids = utils.toindex(dst) num_nodes = max(src.max(), dst.max()) + 1 return from_coo(num_nodes, src_ids, dst_ids, readonly) def map_to_subgraph_nid(induced_nodes, parent_nids): """Map parent node Ids to the subgraph node Ids. Parameters ---------- induced_nodes: utils.Index Induced nodes of the subgraph. parent_nids: utils.Index Node Ids in the parent graph. Returns ------- utils.Index Node Ids in the subgraph. """ return utils.toindex( _CAPI_DGLMapSubgraphNID( induced_nodes.todgltensor(), parent_nids.todgltensor() ) ) def transform_ids(mapping, ids): """Transform ids by the given mapping. Parameters ---------- mapping : utils.Index The id mapping. new_id = mapping[old_id] ids : utils.Index The old ids. Returns ------- utils.Index The new ids. """ return utils.toindex( _CAPI_DGLMapSubgraphNID(mapping.todgltensor(), ids.todgltensor()) ) def disjoint_union(graphs): """Return a disjoint union of the input graphs. The new graph will include all the nodes/edges in the given graphs. Nodes/Edges will be relabeled by adding the cumsum of the previous graph sizes in the given sequence order. For example, giving input [g1, g2, g3], where they have 5, 6, 7 nodes respectively. Then node#2 of g2 will become node#7 in the result graph. Edge ids are re-assigned similarly. Parameters ---------- graphs : iterable of GraphIndex The input graphs Returns ------- GraphIndex The disjoint union """ return _CAPI_DGLDisjointUnion(list(graphs)) def disjoint_partition(graph, num_or_size_splits): """Partition the graph disjointly. This is a reverse operation of DisjointUnion. The graph will be partitioned into num graphs. This requires the given number of partitions to evenly divides the number of nodes in the graph. If the a size list is given, the sum of the given sizes is equal. Parameters ---------- graph : GraphIndex The graph to be partitioned num_or_size_splits : int or utils.Index The partition number of size splits Returns ------- list of GraphIndex The partitioned graphs """ if isinstance(num_or_size_splits, utils.Index): rst = _CAPI_DGLDisjointPartitionBySizes( graph, num_or_size_splits.todgltensor() ) else: rst = _CAPI_DGLDisjointPartitionByNum(graph, int(num_or_size_splits)) return rst def create_graph_index(graph_data, readonly): """Create a graph index object. Parameters ---------- graph_data : graph data Data to initialize graph. Same as networkx's semantics. readonly : bool Whether the graph structure is read-only. """ if isinstance(graph_data, GraphIndex): # FIXME(minjie): this return is not correct for mutable graph index return graph_data if graph_data is None: if readonly: raise Exception("can't create an empty immutable graph") return _CAPI_DGLGraphCreateMutable() elif isinstance(graph_data, (list, tuple)): # edge list return from_edge_list(graph_data, readonly) elif isinstance(graph_data, scipy.sparse.spmatrix): # scipy format return from_scipy_sparse_matrix(graph_data, readonly) else: # networkx - any format try: gidx = from_networkx(graph_data, readonly) except Exception: # pylint: disable=broad-except raise DGLError( 'Error while creating graph from input of type "%s".' % type(graph_data) ) return gidx def _get_halo_subgraph_inner_node(halo_subg): return _CAPI_GetHaloSubgraphInnerNodes(halo_subg) _init_api("dgl.graph_index") ================================================ FILE: python/dgl/graphbolt/__init__.py ================================================ """Graphbolt.""" import os import sys from .internal_utils import * CUDA_ALLOCATOR_ENV_WARNING_STR = """ An experimental feature for CUDA allocations is turned on for better allocation pattern resulting in better memory usage for minibatch GNN training workloads. See https://pytorch.org/docs/stable/notes/cuda.html#optimizing-memory-usage-with-pytorch-cuda-alloc-conf, and set the environment variable `PYTORCH_CUDA_ALLOC_CONF=expandable_segments:False` if you want to disable it and set it True to acknowledge and disable the warning. """ cuda_allocator_env = os.getenv("PYTORCH_CUDA_ALLOC_CONF") WARNING_STR_TO_BE_SHOWN = None configs = ( {} if cuda_allocator_env is None or len(cuda_allocator_env) == 0 else { kv_pair.split(":")[0]: kv_pair.split(":")[1] for kv_pair in cuda_allocator_env.split(",") } ) if "expandable_segments" in configs: if configs["expandable_segments"] != "True": WARNING_STR_TO_BE_SHOWN = ( "You should consider `expandable_segments:True` in the" " environment variable `PYTORCH_CUDA_ALLOC_CONF` for lower" " memory usage. See " "https://pytorch.org/docs/stable/notes/cuda.html" "#optimizing-memory-usage-with-pytorch-cuda-alloc-conf" ) else: configs["expandable_segments"] = "True" os.environ["PYTORCH_CUDA_ALLOC_CONF"] = ",".join( [k + ":" + v for k, v in configs.items()] ) WARNING_STR_TO_BE_SHOWN = CUDA_ALLOCATOR_ENV_WARNING_STR del configs del cuda_allocator_env del CUDA_ALLOCATOR_ENV_WARNING_STR # pylint: disable=wrong-import-position, wrong-import-order import torch ### FROM DGL @todo from .._ffi import libinfo def load_graphbolt(): """Load Graphbolt C++ library""" vers = torch.__version__.split("+", maxsplit=1)[0] if sys.platform.startswith("linux"): basename = f"libgraphbolt_pytorch_{vers}.so" elif sys.platform.startswith("darwin"): basename = f"libgraphbolt_pytorch_{vers}.dylib" elif sys.platform.startswith("win"): basename = f"graphbolt_pytorch_{vers}.dll" else: raise NotImplementedError("Unsupported system: %s" % sys.platform) dirname = os.path.dirname(libinfo.find_lib_path()[0]) path = os.path.join(dirname, "graphbolt", basename) if not os.path.exists(path): raise FileNotFoundError( f"Unable to locate the DGL C++ GraphBolt library at {path}. This " "error typically occurs due to a version mismatch between the " "installed DGL and the PyTorch version you are currently using. " "Please ensure that your DGL installation is compatible with your " "PyTorch version. For more information, refer to the installation " "guide at https://www.dgl.ai/pages/start.html." ) try: torch.classes.load_library(path) except Exception: # pylint: disable=W0703 raise ImportError("Cannot load Graphbolt C++ library") load_graphbolt() # pylint: disable=wrong-import-position from .base import * from .minibatch import * from .dataloader import * from .datapipes import * from .dataset import * from .feature_fetcher import * from .feature_store import * from .impl import * from .itemset import * from .item_sampler import * from .minibatch_transformer import * from .negative_sampler import * from .sampled_subgraph import * from .subgraph_sampler import * from .external_utils import add_reverse_edges, exclude_seed_edges from .internal import ( compact_csc_format, numpy_save_aligned, unique_and_compact, unique_and_compact_csc_formats, ) if torch.cuda.is_available() and not built_with_cuda(): raise ImportError( "torch was installed with CUDA support while GraphBolt's CPU version " "is installed. Consider reinstalling GraphBolt with CUDA support, see " "installation instructions at https://www.dgl.ai/pages/start.html" ) if torch.cuda.is_available() and WARNING_STR_TO_BE_SHOWN is not None: gb_warning(WARNING_STR_TO_BE_SHOWN) del WARNING_STR_TO_BE_SHOWN torch.ops.graphbolt.set_num_io_uring_threads( min((torch.get_num_threads() + 1) // 2, 8) ) ================================================ FILE: python/dgl/graphbolt/base.py ================================================ """Base types and utilities for Graph Bolt.""" from collections import deque from dataclasses import dataclass import torch from torch.torch_version import TorchVersion if ( TorchVersion(torch.__version__) >= "2.3.0" and TorchVersion(torch.__version__) < "2.3.1" ): # Due to https://github.com/dmlc/dgl/issues/7380, for torch 2.3.0, we need # to check if dill is available before using it. torch.utils.data.datapipes.utils.common.DILL_AVAILABLE = ( torch.utils._import_utils.dill_available() ) # pylint: disable=wrong-import-position from torch.utils.data import functional_datapipe, IterDataPipe from .internal_utils import ( get_nonproperty_attributes, recursive_apply, recursive_apply_reduce_all, ) __all__ = [ "CANONICAL_ETYPE_DELIMITER", "ORIGINAL_EDGE_ID", "etype_str_to_tuple", "etype_tuple_to_str", "CopyTo", "Waiter", "Bufferer", "EndMarker", "isin", "index_select", "expand_indptr", "indptr_edge_ids", "CSCFormatBase", "seed", "seed_type_str_to_ntypes", "get_host_to_device_uva_stream", "get_device_to_host_uva_stream", ] CANONICAL_ETYPE_DELIMITER = ":" ORIGINAL_EDGE_ID = "_ORIGINAL_EDGE_ID" # There needs to be a single instance of the uva_stream, if it is created # multiple times, it leads to multiple CUDA memory pools and memory leaks. def get_host_to_device_uva_stream(): """The host to device copy stream to be used for pipeline parallelism.""" if not hasattr(get_host_to_device_uva_stream, "stream"): get_host_to_device_uva_stream.stream = torch.cuda.Stream(priority=-1) return get_host_to_device_uva_stream.stream def get_device_to_host_uva_stream(): """The device to host copy stream to be used for pipeline parallelism.""" if not hasattr(get_device_to_host_uva_stream, "stream"): get_device_to_host_uva_stream.stream = torch.cuda.Stream(priority=-1) return get_device_to_host_uva_stream.stream def seed(val): """Set the random seed of Graphbolt. Parameters ---------- val : int The seed. """ torch.ops.graphbolt.set_seed(val) def isin(elements, test_elements): """Tests if each element of elements is in test_elements. Returns a boolean tensor of the same shape as elements that is True for elements in test_elements and False otherwise. Parameters ---------- elements : torch.Tensor A 1D tensor represents the input elements. test_elements : torch.Tensor A 1D tensor represents the values to test against for each input. Examples -------- >>> isin(torch.tensor([1, 2, 3, 4]), torch.tensor([2, 3])) tensor([[False, True, True, False]]) """ assert elements.dim() == 1, "Elements should be 1D tensor." assert test_elements.dim() == 1, "Test_elements should be 1D tensor." return torch.ops.graphbolt.isin(elements, test_elements) if TorchVersion(torch.__version__) >= TorchVersion("2.2.0a0"): torch_fake_decorator = ( torch.library.impl_abstract if TorchVersion(torch.__version__) < TorchVersion("2.4.0a0") else torch.library.register_fake ) @torch_fake_decorator("graphbolt::expand_indptr") def expand_indptr_fake(indptr, dtype, node_ids, output_size): """Fake implementation of expand_indptr for torch.compile() support.""" if output_size is None: output_size = torch.library.get_ctx().new_dynamic_size() if dtype is None: dtype = node_ids.dtype return indptr.new_empty(output_size, dtype=dtype) def expand_indptr(indptr, dtype=None, node_ids=None, output_size=None): """Converts a given indptr offset tensor to a COO format tensor. If node_ids is not given, it is assumed to be equal to torch.arange(indptr.size(0) - 1, dtype=dtype, device=indptr.device). This is equivalent to .. code:: python if node_ids is None: node_ids = torch.arange(len(indptr) - 1, dtype=dtype, device=indptr.device) return node_ids.to(dtype).repeat_interleave(indptr.diff()) Parameters ---------- indptr : torch.Tensor A 1D tensor represents the csc_indptr tensor. dtype : Optional[torch.dtype] The dtype of the returned output tensor. node_ids : Optional[torch.Tensor] A 1D tensor represents the column node ids that the returned tensor will be populated with. output_size : Optional[int] The size of the output tensor. Should be equal to indptr[-1]. Using this argument avoids a stream synchronization to calculate the output shape. Returns ------- torch.Tensor The converted COO tensor with values from node_ids. """ assert indptr.dim() == 1, "Indptr should be 1D tensor." assert not ( node_ids is None and dtype is None ), "One of node_ids or dtype must be given." assert ( node_ids is None or node_ids.dim() == 1 ), "Node_ids should be 1D tensor." if dtype is None: dtype = node_ids.dtype return torch.ops.graphbolt.expand_indptr( indptr, dtype, node_ids, output_size ) if TorchVersion(torch.__version__) >= TorchVersion("2.2.0a0"): torch_fake_decorator = ( torch.library.impl_abstract if TorchVersion(torch.__version__) < TorchVersion("2.4.0a0") else torch.library.register_fake ) @torch_fake_decorator("graphbolt::indptr_edge_ids") def indptr_edge_ids_fake(indptr, dtype, offset, output_size): """Fake implementation of indptr_edge_ids for torch.compile() support.""" if output_size is None: output_size = torch.library.get_ctx().new_dynamic_size() if dtype is None: dtype = offset.dtype return indptr.new_empty(output_size, dtype=dtype) def indptr_edge_ids(indptr, dtype=None, offset=None, output_size=None): """Converts a given indptr offset tensor to a COO format tensor for the edge ids. For a given indptr [0, 2, 5, 7] and offset tensor [0, 100, 200], the output will be [0, 1, 100, 101, 102, 201, 202]. If offset was not provided, the output would be [0, 1, 0, 1, 2, 0, 1]. Parameters ---------- indptr : torch.Tensor A 1D tensor represents the csc_indptr tensor. dtype : Optional[torch.dtype] The dtype of the returned output tensor. offset : Optional[torch.Tensor] A 1D tensor represents the offsets that the returned tensor will be populated with. output_size : Optional[int] The size of the output tensor. Should be equal to indptr[-1]. Using this argument avoids a stream synchronization to calculate the output shape. Returns ------- torch.Tensor The converted COO edge ids tensor. """ assert indptr.dim() == 1, "Indptr should be 1D tensor." assert offset is None or offset.dim() == 1, "Offset should be 1D tensor." if dtype is None: dtype = offset.dtype return torch.ops.graphbolt.indptr_edge_ids( indptr, dtype, offset, output_size ) def index_select(tensor, index): """Returns a new tensor which indexes the input tensor along dimension dim using the entries in index. The returned tensor has the same number of dimensions as the original tensor (tensor). The first dimension has the same size as the length of index; other dimensions have the same size as in the original tensor. When tensor is a pinned tensor and index.is_cuda is True, the operation runs on the CUDA device and the returned tensor will also be on CUDA. Parameters ---------- tensor : torch.Tensor The input tensor. index : torch.Tensor The 1-D tensor containing the indices to index. Returns ------- torch.Tensor The indexed input tensor, equivalent to tensor[index]. If index is in pinned memory, then the result is placed into pinned memory as well. """ assert index.dim() == 1, "Index should be 1D tensor." return torch.ops.graphbolt.index_select(tensor, index) def etype_tuple_to_str(c_etype): """Convert canonical etype from tuple to string. Examples -------- >>> c_etype = ("user", "like", "item") >>> c_etype_str = _etype_tuple_to_str(c_etype) >>> print(c_etype_str) "user:like:item" """ assert isinstance(c_etype, tuple) and len(c_etype) == 3, ( "Passed-in canonical etype should be in format of (str, str, str). " f"But got {c_etype}." ) return CANONICAL_ETYPE_DELIMITER.join(c_etype) def etype_str_to_tuple(c_etype): """Convert canonical etype from string to tuple. Examples -------- >>> c_etype_str = "user:like:item" >>> c_etype = _etype_str_to_tuple(c_etype_str) >>> print(c_etype) ("user", "like", "item") """ if isinstance(c_etype, tuple): return c_etype ret = tuple(c_etype.split(CANONICAL_ETYPE_DELIMITER)) assert len(ret) == 3, ( "Passed-in canonical etype should be in format of 'str:str:str'. " f"But got {c_etype}." ) return ret def seed_type_str_to_ntypes(seed_type, seed_size): """Convert seeds type to node types from string to list. Examples -------- 1. node pairs >>> seed_type = "user:like:item" >>> seed_size = 2 >>> node_type = seed_type_str_to_ntypes(seed_type, seed_size) >>> print(node_type) ["user", "item"] 2. hyperlink >>> seed_type = "query:user:item" >>> seed_size = 3 >>> node_type = seed_type_str_to_ntypes(seed_type, seed_size) >>> print(node_type) ["query", "user", "item"] """ assert isinstance( seed_type, str ), f"Passed-in seed type should be string, but got {type(seed_type)}" ntypes = seed_type.split(CANONICAL_ETYPE_DELIMITER) is_hyperlink = len(ntypes) == seed_size if not is_hyperlink: ntypes = ntypes[::2] return ntypes def apply_to(x, device, non_blocking=False): """Apply `to` function to object x only if it has `to`.""" if device == "pinned" and hasattr(x, "pin_memory"): return x.pin_memory() if not hasattr(x, "to"): return x if not non_blocking: return x.to(device) return x.to(device, non_blocking=True) def is_object_pinned(obj): """Recursively check all members of the object and return True if only if all are pinned.""" for attr in get_nonproperty_attributes(obj): member_result = recursive_apply_reduce_all( getattr(obj, attr), lambda x: x is None or x.is_pinned(), ) if not member_result: return False return True @functional_datapipe("copy_to") class CopyTo(IterDataPipe): """DataPipe that transfers each element yielded from the previous DataPipe to the given device. For MiniBatch, only the related attributes (automatically inferred) will be transferred by default. Functional name: :obj:`copy_to`. When ``data`` has ``to`` method implemented, ``CopyTo`` will be equivalent to .. code:: python for data in datapipe: yield data.to(device) Parameters ---------- datapipe : DataPipe The DataPipe. device : torch.device The PyTorch CUDA device. non_blocking : bool Whether the copy should be performed without blocking. All elements have to be already in pinned system memory if enabled. Default is False. """ def __init__(self, datapipe, device, non_blocking=False): super().__init__() self.datapipe = datapipe self.device = torch.device(device) self.non_blocking = non_blocking def __iter__(self): for data in self.datapipe: yield recursive_apply( data, apply_to, self.device, self.non_blocking ) @functional_datapipe("mark_end") class EndMarker(IterDataPipe): """Used to mark the end of a datapipe and is a no-op.""" def __init__(self, datapipe): self.datapipe = datapipe def __iter__(self): yield from self.datapipe @functional_datapipe("buffer") class Bufferer(IterDataPipe): """Buffers items before yielding them. Parameters ---------- datapipe : DataPipe The data pipeline. buffer_size : int, optional The size of the buffer which stores the fetched samples. If data coming from datapipe has latency spikes, consider setting to a higher value. Default is 1. """ def __init__(self, datapipe, buffer_size=1): self.datapipe = datapipe if buffer_size <= 0: raise ValueError( "'buffer_size' is required to be a positive integer." ) self.buffer = deque(maxlen=buffer_size) def __iter__(self): for data in self.datapipe: if len(self.buffer) < self.buffer.maxlen: self.buffer.append(data) else: return_data = self.buffer.popleft() self.buffer.append(data) yield return_data while len(self.buffer) > 0: yield self.buffer.popleft() def __getstate__(self): state = (self.datapipe, self.buffer.maxlen) if IterDataPipe.getstate_hook is not None: return IterDataPipe.getstate_hook(state) return state def __setstate__(self, state): self.datapipe, buffer_size = state self.buffer = deque(maxlen=buffer_size) def reset(self): """Resets the state of the datapipe.""" self.buffer.clear() @functional_datapipe("wait") class Waiter(IterDataPipe): """Calls the wait function of all items.""" def __init__(self, datapipe): self.datapipe = datapipe def __iter__(self): for data in self.datapipe: data.wait() yield data @dataclass class CSCFormatBase: r"""Basic class representing data in Compressed Sparse Column (CSC) format. Examples -------- >>> indptr = torch.tensor([0, 1, 3]) >>> indices = torch.tensor([1, 4, 2]) >>> csc_foramt_base = CSCFormatBase(indptr=indptr, indices=indices) >>> print(csc_format_base.indptr) ... torch.tensor([0, 1, 3]) >>> print(csc_foramt_base) ... torch.tensor([1, 4, 2]) """ indptr: torch.Tensor = None indices: torch.Tensor = None def __init__(self, indptr: torch.Tensor, indices: torch.Tensor): self.indptr = indptr self.indices = indices if not indptr.is_cuda: assert self.indptr[-1] == len( self.indices ), "The last element of indptr should be the same as the length of indices." def __repr__(self) -> str: return _csc_format_base_str(self) def to( # pylint: disable=invalid-name self, device: torch.device, non_blocking=False ) -> None: """Copy `CSCFormatBase` to the specified device using reflection.""" for attr in dir(self): # Only copy member variables. if not callable(getattr(self, attr)) and not attr.startswith("__"): setattr( self, attr, recursive_apply( getattr(self, attr), apply_to, device, non_blocking=non_blocking, ), ) return self def pin_memory(self): """Copy `SampledSubgraph` to the pinned memory using reflection.""" return self.to("pinned") def is_pinned(self) -> bool: """Check whether `SampledSubgraph` is pinned using reflection.""" return is_object_pinned(self) def _csc_format_base_str(csc_format_base: CSCFormatBase) -> str: final_str = "CSCFormatBase(" def _add_indent(_str, indent): lines = _str.split("\n") lines = [lines[0]] + [" " * indent + line for line in lines[1:]] return "\n".join(lines) final_str += ( f"indptr={_add_indent(str(csc_format_base.indptr), 21)},\n" + " " * 14 ) final_str += ( f"indices={_add_indent(str(csc_format_base.indices), 22)},\n" + ")" ) return final_str ================================================ FILE: python/dgl/graphbolt/dataloader.py ================================================ """Graph Bolt DataLoaders""" import torch import torch.utils.data as torch_data from .base import CopyTo from .datapipes import ( datapipe_graph_to_adjlist, find_dps, replace_dp, traverse_dps, ) from .feature_fetcher import FeatureFetcher, FeatureFetcherStartMarker from .impl.neighbor_sampler import SamplePerLayer from .internal_utils import gb_warning from .item_sampler import ItemSampler from .minibatch_transformer import MiniBatchTransformer __all__ = [ "DataLoader", ] def _find_and_wrap_parent(datapipe_graph, target_datapipe, wrapper, **kwargs): """Find parent of target_datapipe and wrap it with .""" datapipes = find_dps( datapipe_graph, target_datapipe, ) datapipe_adjlist = datapipe_graph_to_adjlist(datapipe_graph) for datapipe in datapipes: datapipe_id = id(datapipe) for parent_datapipe_id in datapipe_adjlist[datapipe_id][1]: parent_datapipe, _ = datapipe_adjlist[parent_datapipe_id] datapipe_graph = replace_dp( datapipe_graph, parent_datapipe, wrapper(parent_datapipe, **kwargs), ) return datapipe_graph def _set_worker_id(worked_id): torch.ops.graphbolt.set_worker_id(worked_id) class MultiprocessingWrapper(torch_data.IterDataPipe): """Wraps a datapipe with multiprocessing. Parameters ---------- datapipe : DataPipe The data pipeline. num_workers : int, optional The number of worker processes. Default is 0, meaning that there will be no multiprocessing. persistent_workers : bool, optional If True, the data loader will not shut down the worker processes after a dataset has been consumed once. This allows to maintain the workers instances alive. """ def __init__(self, datapipe, num_workers=0, persistent_workers=True): self.datapipe = datapipe self.dataloader = torch_data.DataLoader( datapipe, batch_size=None, num_workers=num_workers, persistent_workers=(num_workers > 0) and persistent_workers, worker_init_fn=_set_worker_id if num_workers > 0 else None, ) def __iter__(self): yield from self.dataloader class DataLoader(MiniBatchTransformer): """Multiprocessing DataLoader. Iterates over the data pipeline with everything before feature fetching (i.e. :class:`dgl.graphbolt.FeatureFetcher`) in subprocesses, and everything after feature fetching in the main process. The datapipe is modified in-place as a result. When the copy_to operation is placed earlier in the data pipeline, the num_workers argument is required to be 0 as utilizing CUDA in multiple worker processes is not supported. Parameters ---------- datapipe : DataPipe The data pipeline. num_workers : int, optional Number of worker processes. Default is 0. persistent_workers : bool, optional If True, the data loader will not shut down the worker processes after a dataset has been consumed once. This allows to maintain the workers instances alive. max_uva_threads : int, optional Limits the number of CUDA threads used for UVA copies so that the rest of the computations can run simultaneously with it. Setting it to a too high value will limit the amount of overlap while setting it too low may cause the PCI-e bandwidth to not get fully utilized. Manually tuned default is 10240, meaning around 5-7 Streaming Multiprocessors. """ def __init__( self, datapipe, num_workers=0, persistent_workers=True, max_uva_threads=10240, ): # Multiprocessing requires two modifications to the datapipe: # # 1. Insert a stage after ItemSampler to distribute the # minibatches evenly across processes. # 2. Cut the datapipe at FeatureFetcher, and wrap the inner datapipe # of the FeatureFetcher with a multiprocessing PyTorch DataLoader. datapipe = datapipe.mark_end() datapipe_graph = traverse_dps(datapipe) if num_workers > 0: # (1) Insert minibatch distribution. # TODO(BarclayII): Currently I'm using sharding_filter() as a # concept demonstration. Later on minibatch distribution should be # merged into ItemSampler to maximize efficiency. item_samplers = find_dps( datapipe_graph, ItemSampler, ) for item_sampler in item_samplers: datapipe_graph = replace_dp( datapipe_graph, item_sampler, item_sampler.sharding_filter(), ) # (2) Cut datapipe at FeatureFetcher and wrap. datapipe_graph = _find_and_wrap_parent( datapipe_graph, FeatureFetcherStartMarker, MultiprocessingWrapper, num_workers=num_workers, persistent_workers=persistent_workers, ) # (3) Limit the number of UVA threads used if the feature_fetcher # or any of the samplers have overlapping optimization enabled. if num_workers == 0 and torch.cuda.is_available(): feature_fetchers = find_dps( datapipe_graph, FeatureFetcher, ) for feature_fetcher in feature_fetchers: if feature_fetcher.max_num_stages > 0: # Overlap enabled. torch.ops.graphbolt.set_max_uva_threads(max_uva_threads) if num_workers == 0 and torch.cuda.is_available(): samplers = find_dps( datapipe_graph, SamplePerLayer, ) for sampler in samplers: if sampler.overlap_fetch: torch.ops.graphbolt.set_max_uva_threads(max_uva_threads) # (4) Cut datapipe at CopyTo and wrap with pinning and prefetching # before it. This enables enables non_blocking copies to the device. # Prefetching enables the data pipeline up to the CopyTo to run in a # separate thread. copiers = find_dps(datapipe_graph, CopyTo) if len(copiers) > 1: gb_warning( "Multiple CopyTo operations were found in the datapipe graph." " This case is not officially supported." ) for copier in copiers: # We enable the prefetch at all times for good CPU only performance. datapipe_graph = replace_dp( datapipe_graph, copier, # Add prefetch so that CPU and GPU can run concurrently. copier.datapipe.prefetch(2).copy_to( copier.device, non_blocking=True ), ) super().__init__(datapipe) ================================================ FILE: python/dgl/graphbolt/datapipes/__init__.py ================================================ """GraphBolt's datapipes, mostly copied from "torchdata==0.7.1".""" from .utils import * from .visualization import * ================================================ FILE: python/dgl/graphbolt/datapipes/utils.py ================================================ """DataPipe utilities""" import threading import time from collections import deque from typing import final, List, Set, Type # pylint: disable=no-name-in-module from torch.utils.data import functional_datapipe, IterDataPipe, MapDataPipe from torch.utils.data.graph import DataPipe, DataPipeGraph, traverse_dps __all__ = [ "datapipe_graph_to_adjlist", "find_dps", "replace_dp", "traverse_dps", ] # Copied from: # https://github.com/pytorch/data/blob/88c8bdc6662f37649b7ea5df0bd90a4b24a56876/torchdata/datapipes/iter/util/prefetcher.py#L19-L20 # Interval between buffer fulfillment checks PRODUCER_SLEEP_INTERVAL = 0.0001 # Interval between checking items availability in buffer CONSUMER_SLEEP_INTERVAL = 0.0001 def _get_parents(result_dict, datapipe_graph): for k, (v, parents) in datapipe_graph.items(): if k not in result_dict: result_dict[k] = (v, list(parents.keys())) _get_parents(result_dict, parents) def datapipe_graph_to_adjlist(datapipe_graph): """Given a DataPipe graph returned by :func:`torch.utils.data.graph.traverse_dps` in DAG form, convert it into adjacency list form. Namely, :func:`torch.utils.data.graph.traverse_dps` returns the following data structure: .. code:: { id(datapipe): ( datapipe, { id(parent1_of_datapipe): (parent1_of_datapipe, {...}), id(parent2_of_datapipe): (parent2_of_datapipe, {...}), ... } ) } We convert it into the following for easier access: .. code:: { id(datapipe1): ( datapipe1, [id(parent1_of_datapipe1), id(parent2_of_datapipe1), ...] ), id(datapipe2): ( datapipe2, [id(parent1_of_datapipe2), id(parent2_of_datapipe2), ...] ), ... } """ result_dict = {} _get_parents(result_dict, datapipe_graph) return result_dict # Copied from: # https://github.com/pytorch/data/blob/88c8bdc6662f37649b7ea5df0bd90a4b24a56876/torchdata/dataloader2/graph/utils.py#L16-L35 def find_dps(graph: DataPipeGraph, dp_type: Type[DataPipe]) -> List[DataPipe]: r""" Given the graph of DataPipe generated by ``traverse_dps`` function, return DataPipe instances with the provided DataPipe type. """ dps: List[DataPipe] = [] cache: Set[int] = set() def helper(g) -> None: # pyre-ignore for dp_id, (dp, src_graph) in g.items(): if dp_id in cache: continue cache.add(dp_id) # Please not use `isinstance`, there is a bug. if type(dp) is dp_type: # pylint: disable=unidiomatic-typecheck dps.append(dp) helper(src_graph) helper(graph) return dps # Copied from: # https://github.com/pytorch/data/blob/88c8bdc6662f37649b7ea5df0bd90a4b24a56876/torchdata/dataloader2/graph/utils.py#L82-L97 # Given the DataPipe needs to be replaced and the expected DataPipe, return a new graph def replace_dp( graph: DataPipeGraph, old_datapipe: DataPipe, new_datapipe: DataPipe ) -> DataPipeGraph: r""" Given the graph of DataPipe generated by ``traverse_dps`` function and the DataPipe to be replaced and the new DataPipe, return the new graph of DataPipe. """ assert len(graph) == 1 if id(old_datapipe) in graph: graph = traverse_dps(new_datapipe) final_datapipe = list(graph.values())[0][0] for recv_dp, send_graph in graph.values(): _replace_dp(recv_dp, send_graph, old_datapipe, new_datapipe) return traverse_dps(final_datapipe) # For each `recv_dp`, find if the source_datapipe needs to be replaced by the new one. # If found, find where the `old_dp` is located in `recv_dp` and switch it to the `new_dp` def _replace_dp( recv_dp, send_graph: DataPipeGraph, old_dp: DataPipe, new_dp: DataPipe ) -> None: old_dp_id = id(old_dp) for send_id in send_graph: if send_id == old_dp_id: _assign_attr(recv_dp, old_dp, new_dp, inner_dp=True) else: send_dp, sub_send_graph = send_graph[send_id] _replace_dp(send_dp, sub_send_graph, old_dp, new_dp) # Recursively re-assign datapipe for the sake of nested data structure # `inner_dp` is used to prevent recursive call if we have already met a `DataPipe` def _assign_attr(obj, old_dp, new_dp, inner_dp: bool = False): if obj is old_dp: return new_dp elif isinstance(obj, (IterDataPipe, MapDataPipe)): # Prevent recursive call for DataPipe if not inner_dp: return None for k in list(obj.__dict__.keys()): new_obj = _assign_attr(obj.__dict__[k], old_dp, new_dp) if new_obj is not None: obj.__dict__[k] = new_obj break return None elif isinstance(obj, dict): for k in list(obj.keys()): new_obj = _assign_attr(obj[k], old_dp, new_dp) if new_obj is not None: obj[k] = new_obj break return None # Tuple is immutable, has to re-create a tuple elif isinstance(obj, tuple): temp_list = [] flag = False for item in obj: new_obj = _assign_attr(item, old_dp, new_dp, inner_dp) if new_obj is not None: flag = True temp_list.append(new_dp) else: temp_list.append(item) if flag: return tuple(temp_list) # Special case else: return None elif isinstance(obj, list): for i in range(len(obj)): # pylint: disable=consider-using-enumerate new_obj = _assign_attr(obj[i], old_dp, new_dp, inner_dp) if new_obj is not None: obj[i] = new_obj break return None elif isinstance(obj, set): new_obj = None for item in obj: if _assign_attr(item, old_dp, new_dp, inner_dp) is not None: new_obj = new_dp break if new_obj is not None: obj.remove(old_dp) obj.add(new_dp) return None else: return None class _PrefetchData: def __init__(self, source_datapipe, buffer_size: int): self.run_prefetcher: bool = True self.prefetch_buffer: Deque = deque() self.buffer_size: int = buffer_size self.source_datapipe = source_datapipe self.stop_iteration: bool = False self.paused: bool = False # Copied from: # https://github.com/pytorch/data/blob/88c8bdc6662f37649b7ea5df0bd90a4b24a56876/torchdata/datapipes/iter/util/prefetcher.py#L34-L172 @functional_datapipe("prefetch") class PrefetcherIterDataPipe(IterDataPipe): r""" Prefetches elements from the source DataPipe and puts them into a buffer (functional name: ``prefetch``). Prefetching performs the operations (e.g. I/O, computations) of the DataPipes up to this one ahead of time and stores the result in the buffer, ready to be consumed by the subsequent DataPipe. It has no effect aside from getting the sample ready ahead of time. This is used by ``MultiProcessingReadingService`` when the arguments ``worker_prefetch_cnt`` (for prefetching at each worker process) or ``main_prefetch_cnt`` (for prefetching at the main loop) are greater than 0. Beyond the built-in use cases, this can be useful to put after I/O DataPipes that have expensive I/O operations (e.g. takes a long time to request a file from a remote server). Args: source_datapipe: IterDataPipe from which samples are prefetched buffer_size: the size of the buffer which stores the prefetched samples Example: >>> from torchdata.datapipes.iter import IterableWrapper >>> dp = IterableWrapper(file_paths).open_files().prefetch(5) """ def __init__(self, source_datapipe, buffer_size: int = 10): self.source_datapipe = source_datapipe if buffer_size <= 0: raise ValueError( "'buffer_size' is required to be a positive integer." ) self.buffer_size = buffer_size self.thread: Optional[threading.Thread] = None self.prefetch_data: Optional[_PrefetchData] = None @staticmethod def thread_worker( prefetch_data: _PrefetchData, ): # pylint: disable=missing-function-docstring itr = iter(prefetch_data.source_datapipe) while not prefetch_data.stop_iteration: # Run if not paused while prefetch_data.run_prefetcher: if ( len(prefetch_data.prefetch_buffer) < prefetch_data.buffer_size ): try: item = next(itr) prefetch_data.prefetch_buffer.append(item) except Exception as e: # pylint: disable=broad-except prefetch_data.run_prefetcher = False prefetch_data.stop_iteration = True prefetch_data.prefetch_buffer.append(e) else: # Buffer is full, waiting for main thread to consume items # TODO: Calculate sleep interval based on previous consumption speed time.sleep(PRODUCER_SLEEP_INTERVAL) prefetch_data.paused = True # Sleep longer when this prefetcher thread is paused time.sleep(PRODUCER_SLEEP_INTERVAL * 10) def __iter__(self): try: prefetch_data = _PrefetchData( self.source_datapipe, self.buffer_size ) self.prefetch_data = prefetch_data thread = threading.Thread( target=PrefetcherIterDataPipe.thread_worker, args=(prefetch_data,), daemon=True, ) thread.start() self.thread = thread while ( not prefetch_data.stop_iteration or len(prefetch_data.prefetch_buffer) > 0 ): if len(prefetch_data.prefetch_buffer) > 0: data = prefetch_data.prefetch_buffer.popleft() if isinstance(data, Exception): if isinstance(data, StopIteration): break raise data yield data else: time.sleep(CONSUMER_SLEEP_INTERVAL) finally: if "prefetch_data" in locals(): prefetch_data.run_prefetcher = False prefetch_data.stop_iteration = True prefetch_data.paused = False if "thread" in locals(): thread.join() def __getstate__(self): """ Getting state in threading environment requires next operations: 1) Stopping of the producer thread. 2) Saving buffer. 3) Adding lazy restart of producer thread when __next__ is called again (this will guarantee that you only change state of the source_datapipe after entire state of the graph is saved). """ # TODO: Update __getstate__ and __setstate__ to support snapshotting and restoration return { "source_datapipe": self.source_datapipe, "buffer_size": self.buffer_size, } def __setstate__(self, state): self.source_datapipe = state["source_datapipe"] self.buffer_size = state["buffer_size"] self.thread = None @final def reset(self): # pylint: disable=missing-function-docstring self.shutdown() def pause(self): # pylint: disable=missing-function-docstring if self.thread is not None: assert self.prefetch_data is not None self.prefetch_data.run_prefetcher = False if self.thread.is_alive(): # Blocking until the thread is paused while not self.prefetch_data.paused: time.sleep(PRODUCER_SLEEP_INTERVAL * 10) @final def resume(self): # pylint: disable=missing-function-docstring if ( self.thread is not None and self.prefetch_data is not None and ( not self.prefetch_data.stop_iteration or len(self.prefetch_data.prefetch_buffer) > 0 ) ): self.prefetch_data.run_prefetcher = True self.prefetch_data.paused = False @final def shutdown(self): # pylint: disable=missing-function-docstring if hasattr(self, "prefetch_data") and self.prefetch_data is not None: self.prefetch_data.run_prefetcher = False self.prefetch_data.stop_iteration = True self.prefetch_data.paused = False self.prefetch_data = None if hasattr(self, "thread") and self.thread is not None: self.thread.join() self.thread = None def __del__(self): self.shutdown() def __len__(self) -> int: if isinstance(self.source_datapipe, Sized): return len(self.source_datapipe) raise TypeError( f"{type(self).__name__} instance doesn't have valid length" ) ================================================ FILE: python/dgl/graphbolt/datapipes/visualization.py ================================================ # pylint: disable=W,C,R # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. # Original source: # https://github.com/pytorch/data/blob/v0.7.1/torchdata/datapipes/utils/_visualization.py import itertools from collections import defaultdict from typing import Optional, Set, TYPE_CHECKING from torch.utils.data.datapipes.iter.combining import _ChildDataPipe from .utils import IterDataPipe, traverse_dps if TYPE_CHECKING: import graphviz __all__ = [ "to_graph", ] class Node: def __init__(self, dp, *, name=None): self.dp = dp self.name = name or type(dp).__name__.replace("IterDataPipe", "") self.childs = set() self.parents = set() def add_child(self, child): self.childs.add(child) child.parents.add(self) def remove_child(self, child): self.childs.remove(child) child.parents.remove(self) def add_parent(self, parent): self.parents.add(parent) parent.childs.add(self) def remove_parent(self, parent): self.parents.remove(parent) parent.childs.remove(self) def __eq__(self, other): if not isinstance(other, Node): return NotImplemented return hash(self) == hash(other) def __hash__(self): return hash(self.dp) def __str__(self): return self.name def __repr__(self): return f"{self}-{hash(self)}" def to_nodes(dp, *, debug: bool) -> Set[Node]: def recurse(dp_graph, child=None): for _dp_id, (dp_node, dp_parents) in dp_graph.items(): node = Node(dp_node) if child is not None: node.add_child(child) yield node yield from recurse(dp_parents, child=node) def aggregate(nodes): groups = defaultdict(list) for node in nodes: groups[node].append(node) nodes = set() for node, group in groups.items(): if len(group) == 1: nodes.add(node) continue aggregated_node = Node(node.dp) for duplicate_node in group: for child in duplicate_node.childs.copy(): duplicate_node.remove_child(child) aggregated_node.add_child(child) for parent in duplicate_node.parents.copy(): duplicate_node.remove_parent(parent) aggregated_node.add_parent(parent) nodes.add(aggregated_node) if debug: return nodes child_dp_nodes = set( itertools.chain.from_iterable( node.parents for node in nodes if isinstance(node.dp, _ChildDataPipe) ) ) if not child_dp_nodes: return nodes for node in child_dp_nodes: fixed_parent_node = Node( type( str(node).lstrip("_"), (IterDataPipe,), dict(dp=node.dp, childs=node.childs), )() ) nodes.remove(node) nodes.add(fixed_parent_node) for parent in node.parents.copy(): node.remove_parent(parent) fixed_parent_node.add_parent(parent) for child in node.childs: nodes.remove(child) for actual_child in child.childs.copy(): actual_child.remove_parent(child) actual_child.add_parent(fixed_parent_node) return nodes return aggregate(recurse(traverse_dps(dp))) def to_graph(dp, *, debug: bool = False) -> "graphviz.Digraph": """Visualizes a DataPipe by returning a :class:`graphviz.Digraph`, which is a graph of the data pipeline. This allows you to visually inspect all the transformation that takes place in your DataPipes. .. note:: The package :mod:`graphviz` is required to use this function. .. note:: The most common interfaces for the returned graph object are: - :meth:`~graphviz.Digraph.render`: Save the graph to a file. - :meth:`~graphviz.Digraph.view`: Open the graph in a viewer. Args: dp: DataPipe that you would like to visualize (generally the last one in a chain of DataPipes). debug (bool): If ``True``, renders internal datapipes that are usually hidden from the user (such as ``ChildDataPipe`` of `demux` and `fork`). Defaults to ``False``. Example: >>> from torchdata.datapipes.iter import IterableWrapper >>> from torchdata.datapipes.utils import to_graph >>> dp = IterableWrapper(range(10)) >>> dp1, dp2 = dp.demux(num_instances=2, classifier_fn=lambda x: x % 2) >>> dp1 = dp1.map(lambda x: x + 1) >>> dp2 = dp2.filter(lambda _: True) >>> dp3 = dp1.zip(dp2).map(lambda t: t[0] + t[1]) >>> g = to_graph(dp3) >>> g.view() # This will open the graph in a viewer """ try: import graphviz except ModuleNotFoundError: raise ModuleNotFoundError( "The package `graphviz` is required to be installed to use this function. " "Please `pip install graphviz` or `conda install -c conda-forge graphviz`." ) from None # The graph style as well as the color scheme below was copied from https://github.com/szagoruyko/pytorchviz/ # https://github.com/szagoruyko/pytorchviz/blob/0adcd83af8aa7ab36d6afd139cabbd9df598edb7/torchviz/dot.py#L78-L85 node_attr = dict( style="filled", shape="box", align="left", fontsize="10", ranksep="0.1", height="0.2", fontname="monospace", ) graph = graphviz.Digraph(node_attr=node_attr, graph_attr=dict(size="12,12")) for node in to_nodes(dp, debug=debug): fillcolor: Optional[str] if not node.parents: fillcolor = "lightblue" elif not node.childs: fillcolor = "darkolivegreen1" else: fillcolor = None graph.node(name=repr(node), label=str(node), fillcolor=fillcolor) for child in node.childs: graph.edge(repr(node), repr(child)) return graph ================================================ FILE: python/dgl/graphbolt/dataset.py ================================================ """GraphBolt Dataset.""" from typing import Dict, List, Union from .feature_store import FeatureStore from .itemset import HeteroItemSet, ItemSet from .sampling_graph import SamplingGraph __all__ = [ "Task", "Dataset", ] class Task: """An abstract task which consists of meta information and Train/Validation/Test Set. * meta information The meta information of a task includes any kinds of data that are defined by the user in YAML when instantiating the task. * Train/Validation/Test Set The train/validation/test (TVT) set which is used to train the neural networks. We calculate the embeddings based on their respective features and the graph structure, and then utilize the embeddings to optimize the neural network parameters. """ @property def metadata(self) -> Dict: """Return the task metadata.""" raise NotImplementedError @property def train_set(self) -> Union[ItemSet, HeteroItemSet]: """Return the training set.""" raise NotImplementedError @property def validation_set(self) -> Union[ItemSet, HeteroItemSet]: """Return the validation set.""" raise NotImplementedError @property def test_set(self) -> Union[ItemSet, HeteroItemSet]: """Return the test set.""" raise NotImplementedError class Dataset: """An abstract dataset which provides abstraction for accessing the data required for training. The data abstraction could be a native CPU memory block, a shared memory block, a file handle of an opened file on disk, a service that provides the API to access the data e.t.c. There are 3 primary components in the dataset: * Task A task consists of several meta information and the Train/Validation/Test Set. A dataset could have multiple tasks. * Feature Storage A key-value store which stores node/edge/graph features. * Graph Topology Graph topology is used by the subgraph sampling algorithm to generate a subgraph. """ @property def tasks(self) -> List[Task]: """Return the tasks.""" raise NotImplementedError @property def graph(self) -> SamplingGraph: """Return the graph.""" raise NotImplementedError @property def feature(self) -> FeatureStore: """Return the feature.""" raise NotImplementedError @property def dataset_name(self) -> str: """Return the dataset name.""" raise NotImplementedError @property def all_nodes_set(self) -> Union[ItemSet, HeteroItemSet]: """Return the itemset containing all nodes.""" raise NotImplementedError ================================================ FILE: python/dgl/graphbolt/external_utils.py ================================================ """Utility functions for external use.""" from functools import partial from typing import Dict, Union import torch from torch.utils.data import functional_datapipe from .minibatch import MiniBatch from .minibatch_transformer import MiniBatchTransformer @functional_datapipe("exclude_seed_edges") class SeedEdgesExcluder(MiniBatchTransformer): """A mini-batch transformer used to manipulate mini-batch. Functional name: :obj:`transform`. Parameters ---------- datapipe : DataPipe The datapipe. include_reverse_edges : bool Whether reverse edges should be excluded as well. Default is False. reverse_etypes_mapping : Dict[str, str] = None The mapping from the original edge types to their reverse edge types. asynchronous: bool Boolean indicating whether edge exclusion stages should run on background threads to hide the latency of CPU GPU synchronization. Should be enabled only when sampling on the GPU. """ def __init__( self, datapipe, include_reverse_edges: bool = False, reverse_etypes_mapping: Dict[str, str] = None, asynchronous=False, ): exclude_seed_edges_fn = partial( exclude_seed_edges, include_reverse_edges=include_reverse_edges, reverse_etypes_mapping=reverse_etypes_mapping, async_op=asynchronous, ) datapipe = datapipe.transform(exclude_seed_edges_fn) if asynchronous: datapipe = datapipe.buffer() datapipe = datapipe.transform(self._wait_for_sampled_subgraphs) super().__init__(datapipe) @staticmethod def _wait_for_sampled_subgraphs(minibatch): minibatch.sampled_subgraphs = [ subgraph.wait() for subgraph in minibatch.sampled_subgraphs ] return minibatch def add_reverse_edges( edges: Union[Dict[str, torch.Tensor], torch.Tensor], reverse_etypes_mapping: Dict[str, str] = None, ): r""" This function finds the reverse edges of the given `edges` and returns the composition of them. In a homogeneous graph, reverse edges have inverted source and destination node IDs. While in a heterogeneous graph, reversing also involves swapping node IDs and their types. This function could be used before `exclude_edges` function to help find targeting edges. Note: The found reverse edges may not really exists in the original graph. And repeat edges could be added becasue reverse edges may already exists in the `edges`. Parameters ---------- edges : Union[Dict[str, torch.Tensor], torch.Tensor] - If sampled subgraph is homogeneous, then `edges` should be a N*2 tensors. - If sampled subgraph is heterogeneous, then `edges` should be a dictionary of edge types and the corresponding edges to exclude. reverse_etypes_mapping : Dict[str, str], optional The mapping from the original edge types to their reverse edge types. Returns ------- Union[Dict[str, torch.Tensor], torch.Tensor] The node pairs contain both the original edges and their reverse counterparts. Examples -------- >>> edges = {"A:r:B": torch.tensor([[0, 1],[1, 2]]))} >>> print(gb.add_reverse_edges(edges, {"A:r:B": "B:rr:A"})) {'A:r:B': torch.tensor([[0, 1],[1, 2]]), 'B:rr:A': torch.tensor([[1, 0],[2, 1]])} >>> edges = torch.tensor([[0, 1],[1, 2]]) >>> print(gb.add_reverse_edges(edges)) torch.tensor([[1, 0],[2, 1]]) """ if isinstance(edges, torch.Tensor): assert edges.ndim == 2 and edges.shape[1] == 2, ( "Only tensor with shape N*2 is supported now, but got " + f"{edges.shape}." ) reverse_edges = edges.flip(dims=(1,)) return torch.cat((edges, reverse_edges)) else: combined_edges = edges.copy() for etype, reverse_etype in reverse_etypes_mapping.items(): if etype in edges: assert edges[etype].ndim == 2 and edges[etype].shape[1] == 2, ( "Only tensor with shape N*2 is supported now, but got " + f"{edges[etype].shape}." ) if reverse_etype in combined_edges: combined_edges[reverse_etype] = torch.cat( ( combined_edges[reverse_etype], edges[etype].flip(dims=(1,)), ) ) else: combined_edges[reverse_etype] = edges[etype].flip(dims=(1,)) return combined_edges def exclude_seed_edges( minibatch: MiniBatch, include_reverse_edges: bool = False, reverse_etypes_mapping: Dict[str, str] = None, async_op: bool = False, ): """ Exclude seed edges with or without their reverse edges from the sampled subgraphs in the minibatch. Parameters ---------- minibatch : MiniBatch The minibatch. include_reverse_edges : bool Whether reverse edges should be excluded as well. Default is False. reverse_etypes_mapping : Dict[str, str] = None The mapping from the original edge types to their reverse edge types. async_op: bool Boolean indicating whether the call is asynchronous. If so, the result can be obtained by calling wait on the modified sampled_subgraphs. """ edges_to_exclude = minibatch.seeds if include_reverse_edges: edges_to_exclude = add_reverse_edges( edges_to_exclude, reverse_etypes_mapping ) minibatch.sampled_subgraphs = [ subgraph.exclude_edges(edges_to_exclude, async_op=async_op) for subgraph in minibatch.sampled_subgraphs ] return minibatch ================================================ FILE: python/dgl/graphbolt/feature_fetcher.py ================================================ """Feature fetchers""" from functools import partial from typing import Dict import torch from torch.utils.data import functional_datapipe from .base import etype_tuple_to_str from .impl.cooperative_conv import CooperativeConvFunction from .minibatch_transformer import MiniBatchTransformer __all__ = [ "FeatureFetcher", "FeatureFetcherStartMarker", ] def get_feature_key_list(feature_keys, domain): """Processes node_feature_keys and extracts their feature keys to a list.""" if isinstance(feature_keys, Dict): return [ (domain, type_name, feature_name) for type_name, feature_names in feature_keys.items() for feature_name in feature_names ] elif feature_keys is not None: return [(domain, None, feature_name) for feature_name in feature_keys] else: return [] @functional_datapipe("mark_feature_fetcher_start") class FeatureFetcherStartMarker(MiniBatchTransformer): """Used to mark the start of a FeatureFetcher and is a no-op. All the datapipes created during a FeatureFetcher instantiation are guarenteed to be contained between FeatureFetcherStartMarker and FeatureFetcher instances in the datapipe graph. """ def __init__(self, datapipe): super().__init__(datapipe, self._identity) @functional_datapipe("fetch_feature") class FeatureFetcher(MiniBatchTransformer): """A feature fetcher used to fetch features for node/edge in graphbolt. Functional name: :obj:`fetch_feature`. Parameters ---------- datapipe : DataPipe The datapipe. feature_store : FeatureStore A storage for features, support read and update. node_feature_keys : List[str] or Dict[str, List[str]] Node features keys indicates the node features need to be read. - If `node_features` is a list: It means the graph is homogeneous graph, and the 'str' inside are feature names. - If `node_features` is a dictionary: The keys should be node type and the values are lists of feature names. edge_feature_keys : List[str] or Dict[str, List[str]] Edge features name indicates the edge features need to be read. - If `edge_features` is a list: It means the graph is homogeneous graph, and the 'str' inside are feature names. - If `edge_features` is a dictionary: The keys are edge types, following the format 'str:str:str', and the values are lists of feature names. overlap_fetch : bool, optional If True, the feature fetcher will overlap the UVA feature fetcher operations with the rest of operations by using an alternative CUDA stream or utilizing asynchronous operations. Default is True. cooperative: bool, optional Boolean indicating whether Cooperative Minibatching, which was initially proposed in `Deep Graph Library PR#4337`__ and was later first fully described in `Cooperative Minibatching in Graph Neural Networks `__. Cooperation between the GPUs eliminates duplicate work performed across the GPUs due to the overlapping sampled k-hop neighborhoods of seed nodes when performing GNN minibatching. """ def __init__( self, datapipe, feature_store, node_feature_keys=None, edge_feature_keys=None, overlap_fetch=True, cooperative=False, ): datapipe = datapipe.mark_feature_fetcher_start() self.feature_store = feature_store self.node_feature_keys = node_feature_keys self.edge_feature_keys = edge_feature_keys max_val = 0 if overlap_fetch: for feature_key_list in [ get_feature_key_list(node_feature_keys, "node"), get_feature_key_list(edge_feature_keys, "edge"), ]: for feature_key in feature_key_list: if feature_key not in feature_store: continue for device_str in ["cpu", "cuda"]: try: max_val = max( feature_store[ feature_key ].read_async_num_stages( torch.device(device_str) ), max_val, ) except AssertionError: pass datapipe = datapipe.transform(self._read) for i in range(max_val, 0, -1): datapipe = datapipe.transform( partial(self._execute_stage, i) ).buffer(1) if max_val > 0: datapipe = datapipe.transform(self._final_stage) if cooperative: datapipe = datapipe.transform(self._cooperative_exchange) datapipe = datapipe.buffer() super().__init__(datapipe) # A positive value indicates that the overlap optimization is enabled. self.max_num_stages = max_val @staticmethod def _execute_stage(current_stage, data): all_features = [data.node_features] + [ data.edge_features[i] for i in range(data.num_layers()) ] for features in all_features: for key in features: handle, stage = features[key] assert current_stage >= stage if current_stage == stage: value = next(handle) features[key] = (handle if stage > 1 else value, stage - 1) return data @staticmethod def _final_stage(data): all_features = [data.node_features] + [ data.edge_features[i] for i in range(data.num_layers()) ] for features in all_features: for key in features: value, stage = features[key] assert stage == 0 features[key] = value.wait() return data def _cooperative_exchange(self, data): subgraph = data.sampled_subgraphs[0] is_heterogeneous = isinstance( self.node_feature_keys, Dict ) or isinstance(self.edge_feature_keys, Dict) if is_heterogeneous: node_features = {key: {} for key, _ in data.node_features.keys()} for (key, ntype), feature in data.node_features.items(): node_features[key][ntype] = feature for key, feature in node_features.items(): new_feature = CooperativeConvFunction.apply(subgraph, feature) for ntype, tensor in new_feature.items(): data.node_features[(key, ntype)] = tensor else: for key in data.node_features: feature = data.node_features[key] new_feature = CooperativeConvFunction.apply(subgraph, feature) data.node_features[key] = new_feature return data def _read(self, data): """ Fill in the node/edge features field in data. Parameters ---------- data : MiniBatch An instance of :class:`MiniBatch`. Even if 'node_feature' or 'edge_feature' is already filled, it will be overwritten for overlapping features. Returns ------- MiniBatch An instance of :class:`MiniBatch` filled with required features. """ node_features = {} num_layers = data.num_layers() edge_features = [{} for _ in range(num_layers)] is_heterogeneous = isinstance( self.node_feature_keys, Dict ) or isinstance(self.edge_feature_keys, Dict) # Read Node features. input_nodes = data.node_ids() def read_helper(feature_key, index): if self.max_num_stages > 0: feature = self.feature_store[feature_key] num_stages = feature.read_async_num_stages(index.device) if num_stages > 0: return (feature.read_async(index), num_stages) else: # Asynchronicity is not needed, compute in _final_stage. class _Waiter: def __init__(self, feature, index): self.feature = feature self.index = index def wait(self): """Returns the stored value when invoked.""" result = self.feature.read(self.index) # Ensure there is no memory leak. self.feature = self.index = None return result return (_Waiter(feature, index), 0) else: domain, type_name, feature_name = feature_key return self.feature_store.read( domain, type_name, feature_name, index ) if self.node_feature_keys and input_nodes is not None: if is_heterogeneous: for type_name, nodes in input_nodes.items(): if type_name not in self.node_feature_keys or nodes is None: continue for feature_name in self.node_feature_keys[type_name]: node_features[(type_name, feature_name)] = read_helper( ("node", type_name, feature_name), nodes ) else: for feature_name in self.node_feature_keys: node_features[feature_name] = read_helper( ("node", None, feature_name), input_nodes ) # Read Edge features. if self.edge_feature_keys and num_layers > 0: for i in range(num_layers): original_edge_ids = data.edge_ids(i) if is_heterogeneous: # Convert edge type to string. original_edge_ids = { ( etype_tuple_to_str(key) if isinstance(key, tuple) else key ): value for key, value in original_edge_ids.items() } for type_name, edges in original_edge_ids.items(): if ( type_name not in self.edge_feature_keys or edges is None ): continue for feature_name in self.edge_feature_keys[type_name]: edge_features[i][ (type_name, feature_name) ] = read_helper( ("edge", type_name, feature_name), edges ) else: for feature_name in self.edge_feature_keys: edge_features[i][feature_name] = read_helper( ("edge", None, feature_name), original_edge_ids ) data.set_node_features(node_features) data.set_edge_features(edge_features) return data ================================================ FILE: python/dgl/graphbolt/feature_store.py ================================================ """Feature store for GraphBolt.""" from typing import Dict, NamedTuple, Union import torch __all__ = [ "bytes_to_number_of_items", "Feature", "FeatureStore", "FeatureKey", "wrap_with_cached_feature", ] class FeatureKey(NamedTuple): """A named tuple class to represent feature keys in FeatureStore classes. The fields are domain, type and name all of which take string values. """ domain: str type: str name: int class Feature: r"""A wrapper of feature data for access.""" def __init__(self): pass def read(self, ids: torch.Tensor = None): """Read from the feature. Parameters ---------- ids : torch.Tensor, optional The index of the feature. If specified, only the specified indices of the feature are read. If None, the entire feature is returned. Returns ------- torch.Tensor The read feature. """ raise NotImplementedError def read_async(self, ids: torch.Tensor): """Read the feature by index asynchronously. Parameters ---------- ids : torch.Tensor The index of the feature. Only the specified indices of the feature are read. Returns ------- A generator object. The returned generator object returns a future on `read_async_num_stages(ids.device)`th invocation. The return result can be accessed by calling `.wait()`. on the returned future object. It is undefined behavior to call `.wait()` more than once. Example Usage -------- >>> import dgl.graphbolt as gb >>> feature = gb.Feature(...) >>> ids = torch.tensor([0, 2]) >>> for stage, future in enumerate(feature.read_async(ids)): ... pass >>> assert stage + 1 == feature.read_async_num_stages(ids.device) >>> result = future.wait() # result contains the read values. """ raise NotImplementedError def read_async_num_stages(self, ids_device: torch.device): """The number of stages of the read_async operation. See read_async function for directions on its use. This function is required to return the number of yield operations when read_async is used with a tensor residing on ids_device. Parameters ---------- ids_device : torch.device The device of the ids parameter passed into read_async. Returns ------- int The number of stages of the read_async operation. """ raise NotImplementedError def size(self): """Get the size of the feature. Returns ------- torch.Size The size of the feature. """ raise NotImplementedError def count(self): """Get the count of the feature. Returns ------- int The count of the feature. """ raise NotImplementedError def update(self, value: torch.Tensor, ids: torch.Tensor = None): """Update the feature. Parameters ---------- value : torch.Tensor The updated value of the feature. ids : torch.Tensor, optional The indices of the feature to update. If specified, only the specified indices of the feature will be updated. For the feature, the `ids[i]` row is updated to `value[i]`. So the indices and value must have the same length. If None, the entire feature will be updated. """ raise NotImplementedError def metadata(self): """Get the metadata of the feature. Returns ------- Dict The metadata of the feature. """ return {} class FeatureStore: r"""A store to manage multiple features for access.""" def __init__(self): pass def __getitem__(self, feature_key: FeatureKey) -> Feature: """Access the underlying `Feature` with its (domain, type, name) as the feature_key. """ raise NotImplementedError def __setitem__(self, feature_key: FeatureKey, feature: Feature): """Set the underlying `Feature` with its (domain, type, name) as the feature_key and feature as the value. """ raise NotImplementedError def __contains__(self, feature_key: FeatureKey) -> bool: """Checks whether the provided (domain, type, name) as the feature_key is container in the FeatureStore.""" raise NotImplementedError def read( self, domain: str, type_name: str, feature_name: str, ids: torch.Tensor = None, ): """Read from the feature store. Parameters ---------- domain : str The domain of the feature such as "node", "edge" or "graph". type_name : str The node or edge type name. feature_name : str The feature name. ids : torch.Tensor, optional The index of the feature. If specified, only the specified indices of the feature are read. If None, the entire feature is returned. Returns ------- torch.Tensor The read feature. """ return self.__getitem__((domain, type_name, feature_name)).read(ids) def size( self, domain: str, type_name: str, feature_name: str, ): """Get the size of the specified feature in the feature store. Parameters ---------- domain : str The domain of the feature such as "node", "edge" or "graph". type_name : str The node or edge type name. feature_name : str The feature name. Returns ------- torch.Size The size of the specified feature in the feature store. """ return self.__getitem__((domain, type_name, feature_name)).size() def count( self, domain: str, type_name: str, feature_name: str, ): """Get the count the specified feature in the feature store. Parameters ---------- domain : str The domain of the feature such as "node", "edge" or "graph". type_name : str The node or edge type name. feature_name : str The feature name. Returns ------- int The count of the specified feature in the feature store. """ return self.__getitem__((domain, type_name, feature_name)).count() def metadata( self, domain: str, type_name: str, feature_name: str, ): """Get the metadata of the specified feature in the feature store. Parameters ---------- domain : str The domain of the feature such as "node", "edge" or "graph". type_name : str The node or edge type name. feature_name : str The feature name. Returns ------- Dict The metadata of the feature. """ return self.__getitem__((domain, type_name, feature_name)).metadata() def update( self, domain: str, type_name: str, feature_name: str, value: torch.Tensor, ids: torch.Tensor = None, ): """Update the feature store. Parameters ---------- domain : str The domain of the feature such as "node", "edge" or "graph". type_name : str The node or edge type name. feature_name : str The feature name. value : torch.Tensor The updated value of the feature. ids : torch.Tensor, optional The indices of the feature to update. If specified, only the specified indices of the feature will be updated. For the feature, the `ids[i]` row is updated to `value[i]`. So the indices and value must have the same length. If None, the entire feature will be updated. """ self.__getitem__((domain, type_name, feature_name)).update(value, ids) def keys(self): """Get the keys of the features. Returns ------- List[tuple] The keys of the features. The tuples are in `(domain, type_name, feat_name)` format. """ raise NotImplementedError def bytes_to_number_of_items(cache_capacity_in_bytes, single_item): """Returns the number of rows to be cached.""" item_bytes = single_item.nbytes # Round up so that we never get a size of 0, unless bytes is 0. return (cache_capacity_in_bytes + item_bytes - 1) // item_bytes def wrap_with_cached_feature( cached_feature_type, fallback_features: Union[Feature, Dict[FeatureKey, Feature]], max_cache_size_in_bytes: int, *args, **kwargs, ) -> Union[Feature, Dict[FeatureKey, Feature]]: """Wraps the given features with the given cached feature type using a single cache instance.""" if not isinstance(fallback_features, dict): assert isinstance(fallback_features, Feature) return wrap_with_cached_feature( cached_feature_type, {"a": fallback_features}, max_cache_size_in_bytes, *args, **kwargs, )["a"] row_bytes = None cache = None wrapped_features = {} offset = 0 for feature_key, fallback_feature in fallback_features.items(): # Fetching the feature dimension from the underlying feature. feat0 = fallback_feature.read(torch.tensor([0])) if row_bytes is None: row_bytes = feat0.nbytes else: assert ( row_bytes == feat0.nbytes ), "The # bytes of a single row of the features should match." cache_size = bytes_to_number_of_items(max_cache_size_in_bytes, feat0) if cache is None: cache = cached_feature_type._cache_type( cache_shape=(cache_size,) + feat0.shape[1:], dtype=feat0.dtype, *args, **kwargs, ) wrapped_features[feature_key] = cached_feature_type( fallback_feature, cache=cache, offset=offset ) offset += fallback_feature.count() return wrapped_features ================================================ FILE: python/dgl/graphbolt/impl/__init__.py ================================================ """Implementation of GraphBolt.""" from .basic_feature_store import * from .fused_csc_sampling_graph import * from .gpu_feature_cache import * from .gpu_cached_feature import * from .in_subgraph_sampler import * from .legacy_dataset import * from .neighbor_sampler import * from .temporal_neighbor_sampler import * from .ondisk_dataset import * from .ondisk_metadata import * from .sampled_subgraph_impl import * from .torch_based_feature_store import * from .uniform_negative_sampler import * from .gpu_graph_cache import * from .cpu_feature_cache import * from .cpu_cached_feature import * from .cooperative_conv import * ================================================ FILE: python/dgl/graphbolt/impl/basic_feature_store.py ================================================ """Basic feature store for GraphBolt.""" from typing import Dict, Tuple from ..feature_store import Feature, FeatureKey, FeatureStore __all__ = ["BasicFeatureStore"] class BasicFeatureStore(FeatureStore): r"""A basic feature store to manage multiple features for access.""" def __init__(self, features: Dict[Tuple[str, str, str], Feature]): r"""Initiate a basic feature store. Parameters ---------- features : Dict[Tuple[str, str, str], Feature] The dict of features served by the feature store, in which the key is tuple of (domain, type_name, feature_name). Returns ------- The feature stores. """ super().__init__() self._features = features def __getitem__(self, feature_key: FeatureKey) -> Feature: """Access the underlying `Feature` with its (domain, type, name) as the feature_key. """ return self._features[feature_key] def __setitem__(self, feature_key: FeatureKey, feature: Feature): """Set the underlying `Feature` with its (domain, type, name) as the feature_key and feature as the value. """ self._features[feature_key] = feature def __contains__(self, feature_key: FeatureKey) -> bool: """Checks whether the provided (domain, type, name) as the feature_key is container in the BasicFeatureStore.""" return feature_key in self._features def __len__(self): """Return the number of features.""" return len(self._features) def keys(self): """Get the keys of the features. Returns ------- List[tuple] The keys of the features. The tuples are in `(domain, type_name, feat_name)` format. """ return list(self._features.keys()) ================================================ FILE: python/dgl/graphbolt/impl/cooperative_conv.py ================================================ """Graphbolt cooperative convolution.""" from typing import Dict, Union import torch from ..sampled_subgraph import SampledSubgraph from ..subgraph_sampler import all_to_all, convert_to_hetero, revert_to_homo __all__ = ["CooperativeConvFunction", "CooperativeConv"] class CooperativeConvFunction(torch.autograd.Function): """Cooperative convolution operation from Cooperative Minibatching. Implements the `all-to-all` message passing algorithm in Cooperative Minibatching, which was initially proposed in `Deep Graph Library PR#4337`__ and was later first fully described in `Cooperative Minibatching in Graph Neural Networks `__. Cooperation between the GPUs eliminates duplicate work performed across the GPUs due to the overlapping sampled k-hop neighborhoods of seed nodes when performing GNN minibatching. This reduces the redundant computations across GPUs at the expense of communication. """ @staticmethod def forward( ctx, subgraph: SampledSubgraph, tensor: Union[torch.Tensor, Dict[str, torch.Tensor]], ): """Implements the forward pass.""" counts_sent = convert_to_hetero(subgraph._counts_sent) counts_received = convert_to_hetero(subgraph._counts_received) seed_inverse_ids = convert_to_hetero(subgraph._seed_inverse_ids) seed_sizes = convert_to_hetero(subgraph._seed_sizes) ctx.communication_variables = ( counts_sent, counts_received, seed_inverse_ids, seed_sizes, ) outs = {} for ntype, typed_tensor in convert_to_hetero(tensor).items(): out = typed_tensor.new_empty( (sum(counts_sent[ntype]),) + typed_tensor.shape[1:] ) all_to_all( torch.split(out, counts_sent[ntype]), torch.split( typed_tensor[seed_inverse_ids[ntype]], counts_received[ntype], ), ) outs[ntype] = out return revert_to_homo(out) @staticmethod def backward( ctx, grad_output: Union[torch.Tensor, Dict[str, torch.Tensor]] ): """Implements the backward pass.""" ( counts_sent, counts_received, seed_inverse_ids, seed_sizes, ) = ctx.communication_variables delattr(ctx, "communication_variables") outs = {} for ntype, typed_grad_output in convert_to_hetero(grad_output).items(): out = typed_grad_output.new_empty( (sum(counts_received[ntype]),) + typed_grad_output.shape[1:] ) all_to_all( torch.split(out, counts_received[ntype]), torch.split(typed_grad_output, counts_sent[ntype]), ) i = out.new_empty(2, out.shape[0], dtype=torch.int64) i[0] = seed_inverse_ids[ntype] # src i[1] = torch.arange( out.shape[0], device=typed_grad_output.device ) # dst coo = torch.sparse_coo_tensor( i, torch.ones( i.shape[1], dtype=grad_output.dtype, device=i.device ), size=(seed_sizes[ntype], i.shape[1]), ) outs[ntype] = torch.sparse.mm(coo, out) return None, revert_to_homo(outs) class CooperativeConv(torch.nn.Module): """Cooperative convolution operation from Cooperative Minibatching. Implements the `all-to-all` message passing algorithm in Cooperative Minibatching, which was initially proposed in `Deep Graph Library PR#4337`__ and was later first fully described in `Cooperative Minibatching in Graph Neural Networks `__. Cooperation between the GPUs eliminates duplicate work performed across the GPUs due to the overlapping sampled k-hop neighborhoods of seed nodes when performing GNN minibatching. This reduces the redundant computations across GPUs at the expense of communication. """ def forward( self, subgraph: SampledSubgraph, x: Union[torch.Tensor, Dict[str, torch.Tensor]], ): """Implements the forward pass.""" return CooperativeConvFunction.apply(subgraph, x) ================================================ FILE: python/dgl/graphbolt/impl/cpu_cached_feature.py ================================================ """CPU cached feature for GraphBolt.""" from typing import Dict, Optional, Union import torch from ..base import get_device_to_host_uva_stream, get_host_to_device_uva_stream from ..feature_store import ( bytes_to_number_of_items, Feature, FeatureKey, wrap_with_cached_feature, ) from .cpu_feature_cache import CPUFeatureCache __all__ = ["CPUCachedFeature", "cpu_cached_feature"] class CPUCachedFeature(Feature): r"""CPU cached feature wrapping a fallback feature. Use `cpu_cached_feature` to construct an instance of this class. Parameters ---------- fallback_feature : Feature The fallback feature. cache : CPUFeatureCache A CPUFeatureCache instance to serve as the cache backend. offset : int, optional The offset value to add to the given ids before using the cache. This parameter is useful if multiple `CPUCachedFeature`s are sharing a single CPUFeatureCache object. """ _cache_type = CPUFeatureCache def __init__( self, fallback_feature: Feature, cache: CPUFeatureCache, offset: int = 0, ): super(CPUCachedFeature, self).__init__() assert isinstance(fallback_feature, Feature), ( f"The fallback_feature must be an instance of Feature, but got " f"{type(fallback_feature)}." ) self._fallback_feature = fallback_feature self._feature = cache self._offset = offset def read(self, ids: torch.Tensor = None): """Read the feature by index. Parameters ---------- ids : torch.Tensor, optional The index of the feature. If specified, only the specified indices of the feature are read. If None, the entire feature is returned. Returns ------- torch.Tensor The read feature. """ if ids is None: return self._fallback_feature.read() return self._feature.query_and_replace( ids.cpu(), self._fallback_feature.read, self._offset ).to(ids.device) def read_async(self, ids: torch.Tensor): r"""Read the feature by index asynchronously. Parameters ---------- ids : torch.Tensor The index of the feature. Only the specified indices of the feature are read. Returns ------- A generator object. The returned generator object returns a future on ``read_async_num_stages(ids.device)``\ th invocation. The return result can be accessed by calling ``.wait()``. on the returned future object. It is undefined behavior to call ``.wait()`` more than once. Examples -------- >>> import dgl.graphbolt as gb >>> feature = gb.Feature(...) >>> ids = torch.tensor([0, 2]) >>> for stage, future in enumerate(feature.read_async(ids)): ... pass >>> assert stage + 1 == feature.read_async_num_stages(ids.device) >>> result = future.wait() # result contains the read values. """ policy = self._feature._policy cache = self._feature._cache if ids.is_cuda and self.is_pinned(): ids_device = ids.device current_stream = torch.cuda.current_stream() device_to_host_stream = get_device_to_host_uva_stream() device_to_host_stream.wait_stream(current_stream) with torch.cuda.stream(device_to_host_stream): ids.record_stream(torch.cuda.current_stream()) ids = ids.to("cpu", non_blocking=True) ids_copy_event = torch.cuda.Event() ids_copy_event.record() yield # first stage is done. ids_copy_event.synchronize() policy_future = policy.query_and_replace_async(ids, self._offset) yield ( positions, index, pointers, missing_keys, found_offsets, missing_offsets, ) = policy_future.wait() self._feature.total_queries += ids.shape[0] self._feature.total_miss += missing_keys.shape[0] found_cnt = ids.size(0) - missing_keys.size(0) found_positions = positions[:found_cnt] missing_positions = positions[found_cnt:] found_pointers = pointers[:found_cnt] missing_pointers = pointers[found_cnt:] host_to_device_stream = get_host_to_device_uva_stream() with torch.cuda.stream(host_to_device_stream): found_positions = found_positions.to( ids_device, non_blocking=True ) values_from_cpu = cache.index_select(found_positions) values_from_cpu.record_stream(current_stream) values_from_cpu_copy_event = torch.cuda.Event() values_from_cpu_copy_event.record() fallback_reader = self._fallback_feature.read_async(missing_keys) for _ in range( self._fallback_feature.read_async_num_stages( missing_keys.device ) ): missing_values_future = next(fallback_reader, None) yield # fallback feature stages. values_from_cpu_copy_event.synchronize() reading_completed = policy.reading_completed_async( found_pointers, found_offsets ) missing_values = missing_values_future.wait() replace_future = cache.replace_async( missing_positions, missing_values ) host_to_device_stream = get_host_to_device_uva_stream() with torch.cuda.stream(host_to_device_stream): index = index.to(ids_device, non_blocking=True) missing_values = missing_values.to( ids_device, non_blocking=True ) index.record_stream(current_stream) missing_values.record_stream(current_stream) missing_values_copy_event = torch.cuda.Event() missing_values_copy_event.record() yield reading_completed.wait() replace_future.wait() writing_completed = policy.writing_completed_async( missing_pointers, missing_offsets ) class _Waiter: def __init__(self, events, existing, missing, index): self.events = events self.existing = existing self.missing = missing self.index = index def wait(self): """Returns the stored value when invoked.""" for event in self.events: event.wait() values = torch.empty( (self.index.shape[0],) + self.missing.shape[1:], dtype=self.missing.dtype, device=ids_device, ) num_found = self.existing.size(0) found_index = self.index[:num_found] missing_index = self.index[num_found:] values[found_index] = self.existing values[missing_index] = self.missing # Ensure there is no memory leak. self.events = self.existing = None self.missing = self.index = None return values yield _Waiter( [ writing_completed, values_from_cpu_copy_event, missing_values_copy_event, ], values_from_cpu, missing_values, index, ) elif ids.is_cuda: ids_device = ids.device current_stream = torch.cuda.current_stream() device_to_host_stream = get_device_to_host_uva_stream() device_to_host_stream.wait_stream(current_stream) with torch.cuda.stream(device_to_host_stream): ids.record_stream(torch.cuda.current_stream()) ids = ids.to("cpu", non_blocking=True) ids_copy_event = torch.cuda.Event() ids_copy_event.record() yield # first stage is done. ids_copy_event.synchronize() policy_future = policy.query_and_replace_async(ids, self._offset) yield ( positions, index, pointers, missing_keys, found_offsets, missing_offsets, ) = policy_future.wait() self._feature.total_queries += ids.shape[0] self._feature.total_miss += missing_keys.shape[0] found_cnt = ids.size(0) - missing_keys.size(0) found_positions = positions[:found_cnt] missing_positions = positions[found_cnt:] found_pointers = pointers[:found_cnt] missing_pointers = pointers[found_cnt:] values_future = cache.query_async( found_positions, index, ids.shape[0] ) fallback_reader = self._fallback_feature.read_async(missing_keys) for _ in range( self._fallback_feature.read_async_num_stages( missing_keys.device ) ): missing_values_future = next(fallback_reader, None) yield # fallback feature stages. values = values_future.wait() reading_completed = policy.reading_completed_async( found_pointers, found_offsets ) missing_index = index[found_cnt:] missing_values = missing_values_future.wait() replace_future = cache.replace_async( missing_positions, missing_values ) values = torch.ops.graphbolt.scatter_async( values, missing_index, missing_values ) yield host_to_device_stream = get_host_to_device_uva_stream() with torch.cuda.stream(host_to_device_stream): values = values.wait().to(ids_device, non_blocking=True) values.record_stream(current_stream) values_copy_event = torch.cuda.Event() values_copy_event.record() reading_completed.wait() replace_future.wait() writing_completed = policy.writing_completed_async( missing_pointers, missing_offsets ) class _Waiter: def __init__(self, events, values): self.events = events self.values = values def wait(self): """Returns the stored value when invoked.""" for event in self.events: event.wait() values = self.values # Ensure there is no memory leak. self.events = self.values = None return values yield _Waiter([values_copy_event, writing_completed], values) else: policy_future = policy.query_and_replace_async(ids, self._offset) yield ( positions, index, pointers, missing_keys, found_offsets, missing_offsets, ) = policy_future.wait() self._feature.total_queries += ids.shape[0] self._feature.total_miss += missing_keys.shape[0] found_cnt = ids.size(0) - missing_keys.size(0) found_positions = positions[:found_cnt] missing_positions = positions[found_cnt:] found_pointers = pointers[:found_cnt] missing_pointers = pointers[found_cnt:] values_future = cache.query_async( found_positions, index, ids.shape[0] ) fallback_reader = self._fallback_feature.read_async(missing_keys) for _ in range( self._fallback_feature.read_async_num_stages( missing_keys.device ) ): missing_values_future = next(fallback_reader, None) yield # fallback feature stages. values = values_future.wait() reading_completed = policy.reading_completed_async( found_pointers, found_offsets ) missing_index = index[found_cnt:] missing_values = missing_values_future.wait() replace_future = cache.replace_async( missing_positions, missing_values ) values = torch.ops.graphbolt.scatter_async( values, missing_index, missing_values ) yield reading_completed.wait() replace_future.wait() writing_completed = policy.writing_completed_async( missing_pointers, missing_offsets ) class _Waiter: def __init__(self, event, values): self.event = event self.values = values def wait(self): """Returns the stored value when invoked.""" self.event.wait() values = self.values.wait() # Ensure there is no memory leak. self.event = self.values = None return values yield _Waiter(writing_completed, values) def read_async_num_stages(self, ids_device: torch.device): """The number of stages of the read_async operation. See read_async function for directions on its use. This function is required to return the number of yield operations when read_async is used with a tensor residing on ids_device. Parameters ---------- ids_device : torch.device The device of the ids parameter passed into read_async. Returns ------- int The number of stages of the read_async operation. """ if ids_device.type == "cuda": return 4 + self._fallback_feature.read_async_num_stages( torch.device("cpu") ) else: return 3 + self._fallback_feature.read_async_num_stages(ids_device) def size(self): """Get the size of the feature. Returns ------- torch.Size The size of the feature. """ return self._fallback_feature.size() def count(self): """Get the count of the feature. Returns ------- int The count of the feature. """ return self._fallback_feature.count() def update(self, value: torch.Tensor, ids: torch.Tensor = None): """Update the feature. Parameters ---------- value : torch.Tensor The updated value of the feature. ids : torch.Tensor, optional The indices of the feature to update. If specified, only the specified indices of the feature will be updated. For the feature, the `ids[i]` row is updated to `value[i]`. So the indices and value must have the same length. If None, the entire feature will be updated. """ if ids is None: feat0 = value[:1] self._fallback_feature.update(value) cache_size = min( bytes_to_number_of_items(self.cache_size_in_bytes, feat0), value.shape[0], ) self._feature = None # Destroy the existing cache first. self._feature = self._cache_type( (cache_size,) + feat0.shape[1:], feat0.dtype ) else: self._fallback_feature.update(value, ids) self._feature.replace(ids, value, None, self._offset) def is_pinned(self): """Returns True if the cache storage is pinned.""" return self._feature.is_pinned() @property def cache_size_in_bytes(self): """Return the size taken by the cache in bytes.""" return self._feature.max_size_in_bytes @property def miss_rate(self): """Returns the cache miss rate since creation.""" return self._feature.miss_rate def cpu_cached_feature( fallback_features: Union[Feature, Dict[FeatureKey, Feature]], max_cache_size_in_bytes: int, policy: Optional[str] = None, pin_memory: bool = False, ) -> Union[CPUCachedFeature, Dict[FeatureKey, CPUCachedFeature]]: r"""CPU cached feature wrapping a fallback feature. Parameters ---------- fallback_features : Union[Feature, Dict[FeatureKey, Feature]] The fallback feature(s). max_cache_size_in_bytes : int The capacity of the cache in bytes. The size should be a few factors larger than the size of each read request. Otherwise, the caching policy will hang due to all cache entries being read and/or write locked, resulting in a deadlock. policy : str, optional The cache eviction policy algorithm name. The available policies are ["s3-fifo", "sieve", "lru", "clock"]. Default is "sieve". pin_memory : bool, optional Whether the cache storage should be allocated on system pinned memory. Default is False. Returns ------- Union[CPUCachedFeature, Dict[FeatureKey, CPUCachedFeature]] New feature(s) wrapped with CPUCachedFeature. """ return wrap_with_cached_feature( CPUCachedFeature, fallback_features, max_cache_size_in_bytes, policy=policy, pin_memory=pin_memory, ) ================================================ FILE: python/dgl/graphbolt/impl/cpu_feature_cache.py ================================================ """CPU Feature Cache implementation wrapper for graphbolt.""" import torch __all__ = ["CPUFeatureCache"] caching_policies = { "s3-fifo": torch.ops.graphbolt.s3_fifo_cache_policy, "sieve": torch.ops.graphbolt.sieve_cache_policy, "lru": torch.ops.graphbolt.lru_cache_policy, "clock": torch.ops.graphbolt.clock_cache_policy, } class CPUFeatureCache(object): r"""High level wrapper for the CPU feature cache. Parameters ---------- cache_shape : List[int] The shape of the cache. cache_shape[0] gives us the capacity. dtype : torch.dtype The data type of the elements stored in the cache. policy: str, optional The cache policy. Default is "sieve". "s3-fifo", "lru" and "clock" are also available. num_parts: int, optional The number of cache partitions for parallelism. Default is `torch.get_num_threads()`. pin_memory: bool, optional Whether the cache storage should be pinned. """ def __init__( self, cache_shape, dtype, policy=None, num_parts=None, pin_memory=False, ): if policy is None: policy = "sieve" assert ( policy in caching_policies ), f"{list(caching_policies.keys())} are the available caching policies." if num_parts is None: num_parts = torch.get_num_threads() min_num_cache_items = num_parts * (10 if policy == "s3-fifo" else 1) # Since we partition the cache, each partition needs to have a positive # number of slots. In addition, each "s3-fifo" partition needs at least # 10 slots since the small queue is 10% and the small queue needs a # positive size. if cache_shape[0] < min_num_cache_items: cache_shape = (min_num_cache_items,) + cache_shape[1:] self._policy = caching_policies[policy](cache_shape[0], num_parts) self._cache = torch.ops.graphbolt.feature_cache( cache_shape, dtype, pin_memory ) self.total_miss = 0 self.total_queries = 0 def is_pinned(self): """Returns True if the cache storage is pinned.""" return self._cache.is_pinned() @property def max_size_in_bytes(self): """Return the size taken by the cache in bytes.""" return self._cache.nbytes def query(self, keys, offset=0): """Queries the cache. Parameters ---------- keys : Tensor The keys to query the cache with. offset : int The offset to be added to the keys. Default is 0. Returns ------- tuple(Tensor, Tensor, Tensor, Tensor) A tuple containing (values, missing_indices, missing_keys, missing_offsets) where values[missing_indices] corresponds to cache misses that should be filled by quering another source with missing_keys. If keys is pinned, then the returned values tensor is pinned as well. The missing_offsets tensor has the partition offsets of missing_keys. """ self.total_queries += keys.shape[0] ( positions, index, missing_keys, found_pointers, found_offsets, missing_offsets, ) = self._policy.query(keys, offset) values = self._cache.query(positions, index, keys.shape[0]) self._policy.reading_completed(found_pointers, found_offsets) self.total_miss += missing_keys.shape[0] missing_index = index[positions.size(0) :] return values, missing_index, missing_keys, missing_offsets def query_and_replace(self, keys, reader_fn, offset=0): """Queries the cache. Then inserts the keys that are not found by reading them by calling `reader_fn(missing_keys)`, which are then inserted into the cache using the selected caching policy algorithm to remove the old entries if it is full. Parameters ---------- keys : Tensor The keys to query the cache with. reader_fn : reader_fn(keys: torch.Tensor) -> torch.Tensor A function that will take a missing keys tensor and will return their values. offset : int The offset to be added to the keys. Default is 0. Returns ------- Tensor A tensor containing values corresponding to the keys. Should equal `reader_fn(keys)`, computed in a faster way. """ self.total_queries += keys.shape[0] ( positions, index, pointers, missing_keys, found_offsets, missing_offsets, ) = self._policy.query_and_replace(keys, offset) found_cnt = keys.size(0) - missing_keys.size(0) found_positions = positions[:found_cnt] values = self._cache.query(found_positions, index, keys.shape[0]) found_pointers = pointers[:found_cnt] self._policy.reading_completed(found_pointers, found_offsets) self.total_miss += missing_keys.shape[0] missing_index = index[found_cnt:] missing_values = reader_fn(missing_keys) values[missing_index] = missing_values missing_positions = positions[found_cnt:] self._cache.replace(missing_positions, missing_values) missing_pointers = pointers[found_cnt:] self._policy.writing_completed(missing_pointers, missing_offsets) return values def replace(self, keys, values, offsets=None, offset=0): """Inserts key-value pairs into the cache using the selected caching policy algorithm to remove old key-value pairs if it is full. Parameters ---------- keys : Tensor The keys to insert to the cache. values : Tensor The values to insert to the cache. offsets : Tensor, optional The partition offsets of the keys. offset : int The offset to be added to the keys. Default is 0. """ positions, pointers, offsets = self._policy.replace( keys, offsets, offset ) self._cache.replace(positions, values) self._policy.writing_completed(pointers, offsets) @property def miss_rate(self): """Returns the cache miss rate since creation.""" return self.total_miss / self.total_queries ================================================ FILE: python/dgl/graphbolt/impl/fused_csc_sampling_graph.py ================================================ """CSC format sampling graph.""" import textwrap # pylint: disable= invalid-name from typing import Dict, Optional, Union import torch from ..base import etype_str_to_tuple, etype_tuple_to_str, ORIGINAL_EDGE_ID from ..internal_utils import gb_warning, is_wsl, recursive_apply from ..sampling_graph import SamplingGraph from .gpu_graph_cache import GPUGraphCache from .sampled_subgraph_impl import CSCFormatBase, SampledSubgraphImpl __all__ = [ "FusedCSCSamplingGraph", "fused_csc_sampling_graph", "load_from_shared_memory", "from_dglgraph", ] class _SampleNeighborsWaiter: def __init__( self, fn, future, seed_offsets, fetching_original_edge_ids_is_optional ): self.fn = fn self.future = future self.seed_offsets = seed_offsets self.fetching_original_edge_ids_is_optional = ( fetching_original_edge_ids_is_optional ) def wait(self): """Returns the stored value when invoked.""" fn = self.fn C_sampled_subgraph = self.future.wait() seed_offsets = self.seed_offsets fetching_original_edge_ids_is_optional = ( self.fetching_original_edge_ids_is_optional ) # Ensure there is no memory leak. self.fn = self.future = self.seed_offsets = None self.fetching_original_edge_ids_is_optional = None return fn( C_sampled_subgraph, seed_offsets, fetching_original_edge_ids_is_optional, ) class FusedCSCSamplingGraph(SamplingGraph): r"""A sampling graph in CSC format.""" def __repr__(self): final_str = ( "{classname}(csc_indptr={csc_indptr},\n" "indices={indices},\n" "{metadata})" ) classname_str = self.__class__.__name__ csc_indptr_str = str(self.csc_indptr) indices_str = str(self.indices) meta_str = f"total_num_nodes={self.total_num_nodes}, num_edges={self.num_edges}," if self.node_type_offset is not None: meta_str += f"\nnode_type_offset={self.node_type_offset}," if self.type_per_edge is not None: meta_str += f"\ntype_per_edge={self.type_per_edge}," if self.node_type_to_id is not None: meta_str += f"\nnode_type_to_id={self.node_type_to_id}," if self.edge_type_to_id is not None: meta_str += f"\nedge_type_to_id={self.edge_type_to_id}," if self.node_attributes is not None: meta_str += f"\nnode_attributes={self.node_attributes}," if self.edge_attributes is not None: meta_str += f"\nedge_attributes={self.edge_attributes}," final_str = final_str.format( classname=classname_str, csc_indptr=csc_indptr_str, indices=indices_str, metadata=meta_str, ) return textwrap.indent( final_str, " " * (len(classname_str) + 1) ).strip() def __init__( self, c_csc_graph: torch.ScriptObject, ): super().__init__() self._c_csc_graph = c_csc_graph def __del__(self): # torch.Tensor.pin_memory() is not an inplace operation. To make it # truly in-place, we need to use cudaHostRegister. Then, we need to use # cudaHostUnregister to unpin the tensor in the destructor. # https://github.com/pytorch/pytorch/issues/32167#issuecomment-753551842 if hasattr(self, "_is_inplace_pinned"): for tensor in self._is_inplace_pinned: assert self._inplace_unpinner(tensor.data_ptr()) == 0 @property def total_num_nodes(self) -> int: """Returns the number of nodes in the graph. Returns ------- int The number of rows in the dense format. """ return self._c_csc_graph.num_nodes() @property def total_num_edges(self) -> int: """Returns the number of edges in the graph. Returns ------- int The number of edges in the graph. """ return self._c_csc_graph.num_edges() @property def num_nodes(self) -> Union[int, Dict[str, int]]: """The number of nodes in the graph. - If the graph is homogenous, returns an integer. - If the graph is heterogenous, returns a dictionary. Returns ------- Union[int, Dict[str, int]] The number of nodes. Integer indicates the total nodes number of a homogenous graph; dict indicates nodes number per node types of a heterogenous graph. Examples -------- >>> import dgl.graphbolt as gb, torch >>> total_num_nodes = 5 >>> total_num_edges = 12 >>> ntypes = {"N0": 0, "N1": 1} >>> etypes = {"N0:R0:N0": 0, "N0:R1:N1": 1, ... "N1:R2:N0": 2, "N1:R3:N1": 3} >>> indptr = torch.LongTensor([0, 3, 5, 7, 9, 12]) >>> indices = torch.LongTensor([0, 1, 4, 2, 3, 0, 1, 1, 2, 0, 3, 4]) >>> node_type_offset = torch.LongTensor([0, 2, 5]) >>> type_per_edge = torch.LongTensor( ... [0, 0, 2, 2, 2, 1, 1, 1, 3, 1, 3, 3]) >>> graph = gb.fused_csc_sampling_graph(indptr, indices, ... node_type_offset=node_type_offset, ... type_per_edge=type_per_edge, ... node_type_to_id=ntypes, ... edge_type_to_id=etypes) >>> print(graph.num_nodes) {'N0': 2, 'N1': 3} """ offset = self._node_type_offset_list # Homogenous. if offset is None or self.node_type_to_id is None: return self._c_csc_graph.num_nodes() # Heterogenous else: num_nodes_per_type = { _type: offset[_idx + 1] - offset[_idx] for _type, _idx in self.node_type_to_id.items() } return num_nodes_per_type @property def num_edges(self) -> Union[int, Dict[str, int]]: """The number of edges in the graph. - If the graph is homogenous, returns an integer. - If the graph is heterogenous, returns a dictionary. Returns ------- Union[int, Dict[str, int]] The number of edges. Integer indicates the total edges number of a homogenous graph; dict indicates edges number per edge types of a heterogenous graph. Examples -------- >>> import dgl.graphbolt as gb, torch >>> total_num_nodes = 5 >>> total_num_edges = 12 >>> ntypes = {"N0": 0, "N1": 1} >>> etypes = {"N0:R0:N0": 0, "N0:R1:N1": 1, ... "N1:R2:N0": 2, "N1:R3:N1": 3} >>> indptr = torch.LongTensor([0, 3, 5, 7, 9, 12]) >>> indices = torch.LongTensor([0, 1, 4, 2, 3, 0, 1, 1, 2, 0, 3, 4]) >>> node_type_offset = torch.LongTensor([0, 2, 5]) >>> type_per_edge = torch.LongTensor( ... [0, 0, 2, 2, 2, 1, 1, 1, 3, 1, 3, 3]) >>> metadata = gb.GraphMetadata(ntypes, etypes) >>> graph = gb.fused_csc_sampling_graph(indptr, indices, node_type_offset, ... type_per_edge, None, metadata) >>> print(graph.num_edges) {'N0:R0:N0': 2, 'N0:R1:N1': 1, 'N1:R2:N0': 2, 'N1:R3:N1': 3} """ type_per_edge = self.type_per_edge # Homogenous. if type_per_edge is None or self.edge_type_to_id is None: return self._c_csc_graph.num_edges() # Heterogenous bincount = torch.bincount(type_per_edge) num_edges_per_type = {} for etype, etype_id in self.edge_type_to_id.items(): if etype_id < len(bincount): num_edges_per_type[etype] = bincount[etype_id].item() else: num_edges_per_type[etype] = 0 return num_edges_per_type @property def csc_indptr(self) -> torch.tensor: """Returns the indices pointer in the CSC graph. Returns ------- torch.tensor The indices pointer in the CSC graph. An integer tensor with shape `(total_num_nodes+1,)`. """ return self._c_csc_graph.csc_indptr() @csc_indptr.setter def csc_indptr(self, csc_indptr: torch.tensor) -> None: """Sets the indices pointer in the CSC graph.""" self._c_csc_graph.set_csc_indptr(csc_indptr) @property def indices(self) -> torch.tensor: """Returns the indices in the CSC graph. Returns ------- torch.tensor The indices in the CSC graph. An integer tensor with shape `(total_num_edges,)`. Notes ------- It is assumed that edges of each node are already sorted by edge type ids. """ return self._c_csc_graph.indices() @indices.setter def indices(self, indices: torch.tensor) -> None: """Sets the indices in the CSC graph.""" self._c_csc_graph.set_indices(indices) @property def node_type_offset(self) -> Optional[torch.Tensor]: """Returns the node type offset tensor if present. Do not modify the returned tensor in place. Returns ------- torch.Tensor or None If present, returns a 1D integer tensor of shape `(num_node_types + 1,)`. The tensor is in ascending order as nodes of the same type have continuous IDs, and larger node IDs are paired with larger node type IDs. The first value is 0 and last value is the number of nodes. And nodes with IDs between `node_type_offset_[i]~node_type_offset_[i+1]` are of type id 'i'. """ return self._c_csc_graph.node_type_offset() @property def _node_type_offset_list(self) -> Optional[list]: """Returns the node type offset list if present. Returns ------- list or None If present, returns a 1D integer list of shape `(num_node_types + 1,)`. The list is in ascending order as nodes of the same type have continuous IDs, and larger node IDs are paired with larger node type IDs. The first value is 0 and last value is the number of nodes. And nodes with IDs between `node_type_offset_[i]~node_type_offset_[i+1]` are of type id 'i'. """ if ( not hasattr(self, "_node_type_offset_cached_list") or self._node_type_offset_cached_list is None ): self._node_type_offset_cached_list = self.node_type_offset if self._node_type_offset_cached_list is not None: self._node_type_offset_cached_list = ( self._node_type_offset_cached_list.tolist() ) return self._node_type_offset_cached_list @node_type_offset.setter def node_type_offset( self, node_type_offset: Optional[torch.Tensor] ) -> None: """Sets the node type offset tensor if present.""" self._c_csc_graph.set_node_type_offset(node_type_offset) self._node_type_offset_cached_list = None @property def _indptr_node_type_offset_list(self) -> Optional[list]: """Returns the indptr node type offset list which presents the column id space when it does not match the global id space. It is useful when we slice a subgraph from another FusedCSCSamplingGraph. Returns ------- list or None If present, returns a 1D integer list of shape `(num_node_types + 1,)`. The list is in ascending order as nodes of the same type have continuous IDs, and larger node IDs are paired with larger node type IDs. The first value is 0 and last value is the number of nodes. And nodes with IDs between `node_type_offset_[i]~node_type_offset_[i+1]` are of type id 'i'. """ return ( self._indptr_node_type_offset_list_ if hasattr(self, "_indptr_node_type_offset_list_") else None ) @_indptr_node_type_offset_list.setter def _indptr_node_type_offset_list( self, indptr_node_type_offset_list: Optional[torch.Tensor] ): """Sets the indptr node type offset list if present.""" self._indptr_node_type_offset_list_ = indptr_node_type_offset_list @property def _gpu_graph_cache(self) -> Optional[GPUGraphCache]: return ( self._gpu_graph_cache_ if hasattr(self, "_gpu_graph_cache_") else None ) @property def type_per_edge(self) -> Optional[torch.Tensor]: """Returns the edge type tensor if present. Returns ------- torch.Tensor or None If present, returns a 1D integer tensor of shape (total_num_edges,) containing the type of each edge in the graph. """ return self._c_csc_graph.type_per_edge() @type_per_edge.setter def type_per_edge(self, type_per_edge: Optional[torch.Tensor]) -> None: """Sets the edge type tensor if present.""" self._c_csc_graph.set_type_per_edge(type_per_edge) @property def node_type_to_id(self) -> Optional[Dict[str, int]]: """Returns the node type to id dictionary if present. Returns ------- Dict[str, int] or None If present, returns a dictionary mapping node type to node type id. """ return self._c_csc_graph.node_type_to_id() @node_type_to_id.setter def node_type_to_id( self, node_type_to_id: Optional[Dict[str, int]] ) -> None: """Sets the node type to id dictionary if present.""" self._c_csc_graph.set_node_type_to_id(node_type_to_id) @property def edge_type_to_id(self) -> Optional[Dict[str, int]]: """Returns the edge type to id dictionary if present. Returns ------- Dict[str, int] or None If present, returns a dictionary mapping edge type to edge type id. """ return self._c_csc_graph.edge_type_to_id() @edge_type_to_id.setter def edge_type_to_id( self, edge_type_to_id: Optional[Dict[str, int]] ) -> None: """Sets the edge type to id dictionary if present.""" self._c_csc_graph.set_edge_type_to_id(edge_type_to_id) @property def node_attributes(self) -> Optional[Dict[str, torch.Tensor]]: """Returns the node attributes dictionary. Returns ------- Dict[str, torch.Tensor] or None If present, returns a dictionary of node attributes. Each key represents the attribute's name, while the corresponding value holds the attribute's specific value. The length of each value should match the total number of nodes." """ return self._c_csc_graph.node_attributes() @node_attributes.setter def node_attributes( self, node_attributes: Optional[Dict[str, torch.Tensor]] ) -> None: """Sets the node attributes dictionary.""" self._c_csc_graph.set_node_attributes(node_attributes) @property def edge_attributes(self) -> Optional[Dict[str, torch.Tensor]]: """Returns the edge attributes dictionary. Returns ------- Dict[str, torch.Tensor] or None If present, returns a dictionary of edge attributes. Each key represents the attribute's name, while the corresponding value holds the attribute's specific value. The length of each value should match the total number of edges." """ return self._c_csc_graph.edge_attributes() @edge_attributes.setter def edge_attributes( self, edge_attributes: Optional[Dict[str, torch.Tensor]] ) -> None: """Sets the edge attributes dictionary.""" self._c_csc_graph.set_edge_attributes(edge_attributes) def node_attribute(self, name: str) -> Optional[torch.Tensor]: """Returns the node attribute tensor by name. Parameters ---------- name: str The name of the node attribute. Returns ------- torch.Tensor or None If present, returns the node attribute tensor. """ return self._c_csc_graph.node_attribute(name) def add_node_attribute(self, name: str, tensor: torch.Tensor) -> None: """Adds node attribute tensor by name. Parameters ---------- name: str The name of the node attribute. tensor: torch.Tensor The node attribute tensor. """ self._c_csc_graph.add_node_attribute(name, tensor) def edge_attribute(self, name: str) -> Optional[torch.Tensor]: """Returns the edge attribute tensor by name. Parameters ---------- name: str The name of the edge attribute. Returns ------- torch.Tensor or None If present, returns the edge attribute tensor. """ return self._c_csc_graph.edge_attribute(name) def add_edge_attribute(self, name: str, tensor: torch.Tensor) -> None: """Adds edge attribute tensor by name. Parameters ---------- name: str The name of the edge attribute. tensor: torch.Tensor The edge attribute tensor. """ self._c_csc_graph.add_edge_attribute(name, tensor) def in_subgraph( self, nodes: Union[torch.Tensor, Dict[str, torch.Tensor]], ) -> SampledSubgraphImpl: """Return the subgraph induced on the inbound edges of the given nodes. An in subgraph is equivalent to creating a new graph using the incoming edges of the given nodes. Subgraph is compacted according to the order of passed-in `nodes`. Parameters ---------- nodes: torch.Tensor or Dict[str, torch.Tensor] IDs of the given seed nodes. - If `nodes` is a tensor: It means the graph is homogeneous graph, and ids inside are homogeneous ids. - If `nodes` is a dictionary: The keys should be node type and ids inside are heterogeneous ids. Returns ------- SampledSubgraphImpl The in subgraph. Examples -------- >>> import dgl.graphbolt as gb >>> import torch >>> total_num_nodes = 5 >>> total_num_edges = 12 >>> ntypes = {"N0": 0, "N1": 1} >>> etypes = { ... "N0:R0:N0": 0, "N0:R1:N1": 1, "N1:R2:N0": 2, "N1:R3:N1": 3} >>> indptr = torch.LongTensor([0, 3, 5, 7, 9, 12]) >>> indices = torch.LongTensor([0, 1, 4, 2, 3, 0, 1, 1, 2, 0, 3, 4]) >>> node_type_offset = torch.LongTensor([0, 2, 5]) >>> type_per_edge = torch.LongTensor( ... [0, 0, 2, 2, 2, 1, 1, 1, 3, 1, 3, 3]) >>> graph = gb.fused_csc_sampling_graph(indptr, indices, ... node_type_offset=node_type_offset, ... type_per_edge=type_per_edge, ... node_type_to_id=ntypes, ... edge_type_to_id=etypes) >>> nodes = {"N0":torch.LongTensor([1]), "N1":torch.LongTensor([1, 2])} >>> in_subgraph = graph.in_subgraph(nodes) >>> print(in_subgraph.sampled_csc) {'N0:R0:N0': CSCFormatBase(indptr=tensor([0, 0]), indices=tensor([], dtype=torch.int64), ), 'N0:R1:N1': CSCFormatBase(indptr=tensor([0, 1, 2]), indices=tensor([1, 0]), ), 'N1:R2:N0': CSCFormatBase(indptr=tensor([0, 2]), indices=tensor([0, 1]), ), 'N1:R3:N1': CSCFormatBase(indptr=tensor([0, 1, 3]), indices=tensor([0, 1, 2]), )} """ if isinstance(nodes, dict): nodes, _ = self._convert_to_homogeneous_nodes(nodes) # Ensure nodes is 1-D tensor. assert nodes.dim() == 1, "Nodes should be 1-D tensor." _in_subgraph = self._c_csc_graph.in_subgraph(nodes) return self._convert_to_sampled_subgraph(_in_subgraph) def _convert_to_homogeneous_nodes( self, nodes, timestamps=None, time_windows=None ): homogeneous_nodes = [] homogeneous_node_offsets = [0] homogeneous_timestamps = [] homogeneous_time_windows = [] offset = self._node_type_offset_list for ntype, ntype_id in self.node_type_to_id.items(): ids = nodes.get(ntype, []) if len(ids) > 0: homogeneous_nodes.append(ids + offset[ntype_id]) if timestamps is not None: homogeneous_timestamps.append(timestamps[ntype]) if time_windows is not None: homogeneous_time_windows.append(time_windows[ntype]) homogeneous_node_offsets.append( homogeneous_node_offsets[-1] + len(ids) ) if timestamps is not None: homogeneous_time_windows = ( torch.cat(homogeneous_time_windows) if homogeneous_time_windows else None ) return ( torch.cat(homogeneous_nodes), homogeneous_node_offsets, torch.cat(homogeneous_timestamps), homogeneous_time_windows, ) return torch.cat(homogeneous_nodes), homogeneous_node_offsets def _convert_to_sampled_subgraph( self, C_sampled_subgraph: torch.ScriptObject, seed_offsets: Optional[list] = None, fetching_original_edge_ids_is_optional: bool = False, ) -> SampledSubgraphImpl: """An internal function used to convert a fused homogeneous sampled subgraph to general struct 'SampledSubgraphImpl'.""" indptr = C_sampled_subgraph.indptr indices = C_sampled_subgraph.indices type_per_edge = C_sampled_subgraph.type_per_edge column = C_sampled_subgraph.original_column_node_ids edge_ids_in_fused_csc_sampling_graph = ( C_sampled_subgraph.original_edge_ids ) etype_offsets = C_sampled_subgraph.etype_offsets if etype_offsets is not None: etype_offsets = etype_offsets.tolist() has_original_eids = ( self.edge_attributes is not None and ORIGINAL_EDGE_ID in self.edge_attributes ) original_edge_ids = ( ( torch.ops.graphbolt.index_select( self.edge_attributes[ORIGINAL_EDGE_ID], edge_ids_in_fused_csc_sampling_graph, ) if not fetching_original_edge_ids_is_optional or not edge_ids_in_fused_csc_sampling_graph.is_cuda or not self.edge_attributes[ORIGINAL_EDGE_ID].is_pinned() else None ) if has_original_eids else edge_ids_in_fused_csc_sampling_graph ) if type_per_edge is None and etype_offsets is None: # The sampled graph is already a homogeneous graph. sampled_csc = CSCFormatBase(indptr=indptr, indices=indices) if indices is not None and original_edge_ids is not None: # Only needed to fetch indices or original_edge_ids. edge_ids_in_fused_csc_sampling_graph = None else: offset = self._node_type_offset_list original_hetero_edge_ids = {} sub_indices = {} sub_indptr = {} if etype_offsets is None: # UVA sampling requires us to move node_type_offset to GPU. self.node_type_offset = self.node_type_offset.to(column.device) # 1. Find node types for each nodes in column. node_types = ( torch.searchsorted( self.node_type_offset, column, right=True ) - 1 ) for ntype, ntype_id in self.node_type_to_id.items(): # Get all nodes of a specific node type in column. nids = torch.nonzero(node_types == ntype_id).view(-1) nids_original_indptr = indptr[nids + 1] for etype, etype_id in self.edge_type_to_id.items(): src_ntype, _, dst_ntype = etype_str_to_tuple(etype) if dst_ntype != ntype: continue # Get all edge ids of a specific edge type. eids = torch.nonzero(type_per_edge == etype_id).view(-1) src_ntype_id = self.node_type_to_id[src_ntype] sub_indices[etype] = ( indices[eids] - offset[src_ntype_id] ) cum_edges = torch.searchsorted( eids, nids_original_indptr, right=False ) sub_indptr[etype] = torch.cat( (torch.tensor([0], device=indptr.device), cum_edges) ) original_hetero_edge_ids[etype] = original_edge_ids[ eids ] sampled_hetero_edge_ids_in_fused_csc_sampling_graph = None else: sampled_hetero_edge_ids_in_fused_csc_sampling_graph = {} edge_offsets = [0] for etype, etype_id in self.edge_type_to_id.items(): src_ntype, _, dst_ntype = etype_str_to_tuple(etype) ntype_id = self.node_type_to_id[dst_ntype] edge_offsets.append( edge_offsets[-1] + seed_offsets[ntype_id + 1] - seed_offsets[ntype_id] + 1 ) for etype, etype_id in self.edge_type_to_id.items(): src_ntype, _, dst_ntype = etype_str_to_tuple(etype) ntype_id = self.node_type_to_id[dst_ntype] sub_indptr[etype] = indptr[ edge_offsets[etype_id] : edge_offsets[etype_id + 1] ] sub_indices[etype] = ( None if indices is None else indices[ etype_offsets[etype_id] : etype_offsets[ etype_id + 1 ] ] ) original_hetero_edge_ids[etype] = ( None if original_edge_ids is None else original_edge_ids[ etype_offsets[etype_id] : etype_offsets[ etype_id + 1 ] ] ) if indices is None or original_edge_ids is None: # Only needed to fetch indices or original edge ids. sampled_hetero_edge_ids_in_fused_csc_sampling_graph[ etype ] = edge_ids_in_fused_csc_sampling_graph[ etype_offsets[etype_id] : etype_offsets[ etype_id + 1 ] ] original_edge_ids = original_hetero_edge_ids edge_ids_in_fused_csc_sampling_graph = ( sampled_hetero_edge_ids_in_fused_csc_sampling_graph ) sampled_csc = { etype: CSCFormatBase( indptr=sub_indptr[etype], indices=sub_indices[etype], ) for etype in self.edge_type_to_id.keys() } return SampledSubgraphImpl( sampled_csc=sampled_csc, original_edge_ids=original_edge_ids, _edge_ids_in_fused_csc_sampling_graph=edge_ids_in_fused_csc_sampling_graph, ) def sample_neighbors( self, seeds: Union[torch.Tensor, Dict[str, torch.Tensor]], fanouts: torch.Tensor, replace: bool = False, probs_name: Optional[str] = None, returning_indices_and_original_edge_ids_are_optional: bool = False, async_op: bool = False, ) -> SampledSubgraphImpl: """Sample neighboring edges of the given nodes and return the induced subgraph. Parameters ---------- seeds: torch.Tensor or Dict[str, torch.Tensor] IDs of the given seed nodes. - If `nodes` is a tensor: It means the graph is homogeneous graph, and ids inside are homogeneous ids. - If `nodes` is a dictionary: The keys should be node type and ids inside are heterogeneous ids. fanouts: torch.Tensor The number of edges to be sampled for each node with or without considering edge types. - When the length is 1, it indicates that the fanout applies to all neighbors of the node as a collective, regardless of the edge type. - Otherwise, the length should equal to the number of edge types, and each fanout value corresponds to a specific edge type of the nodes. The value of each fanout should be >= 0 or = -1. - When the value is -1, all neighbors (with non-zero probability, if weighted) will be sampled once regardless of replacement. It is equivalent to selecting all neighbors with non-zero probability when the fanout is >= the number of neighbors (and replace is set to false). - When the value is a non-negative integer, it serves as a minimum threshold for selecting neighbors. replace: bool Boolean indicating whether the sample is preformed with or without replacement. If True, a value can be selected multiple times. Otherwise, each value can be selected only once. probs_name: str, optional An optional string specifying the name of an edge attribute used. This attribute tensor should contain (unnormalized) probabilities corresponding to each neighboring edge of a node. It must be a 1D floating-point or boolean tensor, with the number of elements equalling the total number of edges. returning_indices_and_original_edge_ids_are_optional: bool Boolean indicating whether it is okay for the call to this function to leave the indices and the original edge ids tensors uninitialized. In this case, it is the user's responsibility to gather them using _edge_ids_in_fused_csc_sampling_graph if either is missing. async_op: bool Boolean indicating whether the call is asynchronous. If so, the result can be obtained by calling wait on the returned future. Returns ------- SampledSubgraphImpl The sampled subgraph. Examples -------- >>> import dgl.graphbolt as gb >>> import torch >>> ntypes = {"n1": 0, "n2": 1} >>> etypes = {"n1:e1:n2": 0, "n2:e2:n1": 1} >>> indptr = torch.LongTensor([0, 2, 4, 6, 7, 9]) >>> indices = torch.LongTensor([2, 4, 2, 3, 0, 1, 1, 0, 1]) >>> node_type_offset = torch.LongTensor([0, 2, 5]) >>> type_per_edge = torch.LongTensor([1, 1, 1, 1, 0, 0, 0, 0, 0]) >>> graph = gb.fused_csc_sampling_graph(indptr, indices, ... node_type_offset=node_type_offset, ... type_per_edge=type_per_edge, ... node_type_to_id=ntypes, ... edge_type_to_id=etypes) >>> nodes = {'n1': torch.LongTensor([0]), 'n2': torch.LongTensor([0])} >>> fanouts = torch.tensor([1, 1]) >>> subgraph = graph.sample_neighbors(nodes, fanouts) >>> print(subgraph.sampled_csc) {'n1:e1:n2': CSCFormatBase(indptr=tensor([0, 1]), indices=tensor([0]), ), 'n2:e2:n1': CSCFormatBase(indptr=tensor([0, 1]), indices=tensor([2]), )} """ seed_offsets = None if isinstance(seeds, dict): seeds, seed_offsets = self._convert_to_homogeneous_nodes(seeds) elif seeds is None: seed_offsets = self._indptr_node_type_offset_list probs_or_mask = self.edge_attributes[probs_name] if probs_name else None C_sampled_subgraph = self._sample_neighbors( seeds, seed_offsets, fanouts, replace=replace, probs_or_mask=probs_or_mask, returning_indices_is_optional=returning_indices_and_original_edge_ids_are_optional, async_op=async_op, ) if async_op: return _SampleNeighborsWaiter( self._convert_to_sampled_subgraph, C_sampled_subgraph, seed_offsets, returning_indices_and_original_edge_ids_are_optional, ) else: return self._convert_to_sampled_subgraph( C_sampled_subgraph, seed_offsets, returning_indices_and_original_edge_ids_are_optional, ) def _check_sampler_arguments(self, nodes, fanouts, probs_or_mask): if nodes is not None: assert nodes.dim() == 1, "Nodes should be 1-D tensor." assert nodes.dtype == self.indices.dtype, ( f"Data type of nodes must be consistent with " f"indices.dtype({self.indices.dtype}), but got {nodes.dtype}." ) assert fanouts.dim() == 1, "Fanouts should be 1-D tensor." expected_fanout_len = 1 if self.edge_type_to_id: expected_fanout_len = len(self.edge_type_to_id) assert len(fanouts) in [ expected_fanout_len, 1, ], "Fanouts should have the same number of elements as etypes or \ should have a length of 1." if fanouts.size(0) > 1: assert ( self.type_per_edge is not None ), "To perform sampling for each edge type (when the length of \ `fanouts` > 1), the graph must include edge type information." assert torch.all( (fanouts >= 0) | (fanouts == -1) ), "Fanouts should consist of values that are either -1 or \ greater than or equal to 0." if probs_or_mask is not None: assert probs_or_mask.dim() == 1, "Probs should be 1-D tensor." assert ( probs_or_mask.size(0) == self.total_num_edges ), "Probs should have the same number of elements as the number \ of edges." assert probs_or_mask.dtype in [ torch.bool, torch.float16, torch.bfloat16, torch.float32, torch.float64, ], "Probs should have a floating-point or boolean data type." def _sample_neighbors( self, seeds: torch.Tensor, seed_offsets: Optional[list], fanouts: torch.Tensor, replace: bool = False, probs_or_mask: Optional[torch.Tensor] = None, returning_indices_is_optional: bool = False, async_op: bool = False, ) -> torch.ScriptObject: """Sample neighboring edges of the given nodes and return the induced subgraph. Parameters ---------- seeds: torch.Tensor IDs of the given seed nodes. seeds_offsets: list, optional The offsets of the given seeds, seeds[seed_offsets[i]: seed_offsets[i + 1]] has node type i. fanouts: torch.Tensor The number of edges to be sampled for each node with or without considering edge types. - When the length is 1, it indicates that the fanout applies to all neighbors of the node as a collective, regardless of the edge type. - Otherwise, the length should equal to the number of edge types, and each fanout value corresponds to a specific edge type of the nodes. The value of each fanout should be >= 0 or = -1. - When the value is -1, all neighbors (with non-zero probability, if weighted) will be sampled once regardless of replacement. It is equivalent to selecting all neighbors with non-zero probability when the fanout is >= the number of neighbors (and replace is set to false). - When the value is a non-negative integer, it serves as a minimum threshold for selecting neighbors. replace: bool Boolean indicating whether the sample is preformed with or without replacement. If True, a value can be selected multiple times. Otherwise, each value can be selected only once. probs_or_mask: torch.Tensor, optional An optional tensor of edge attribute for probability or masks. This attribute tensor should contain (unnormalized) probabilities corresponding to each neighboring edge of a node. It must be a 1D floating-point or boolean tensor, with the number of elements equalling the total number of edges. returning_indices_is_optional: bool Boolean indicating whether it is okay for the call to this function to leave the indices tensor uninitialized. In this case, it is the user's responsibility to gather it using the edge ids. async_op: bool Boolean indicating whether the call is asynchronous. If so, the result can be obtained by calling wait on the returned future. Returns ------- torch.classes.graphbolt.SampledSubgraph The sampled C subgraph. """ # Ensure nodes is 1-D tensor. self._check_sampler_arguments(seeds, fanouts, probs_or_mask) sampling_fn = ( self._c_csc_graph.sample_neighbors_async if async_op else self._c_csc_graph.sample_neighbors ) return sampling_fn( seeds, seed_offsets, fanouts.tolist(), replace, False, # is_labor returning_indices_is_optional, probs_or_mask, None, # random_seed, labor parameter 0, # seed2_contribution, labor_parameter ) def sample_layer_neighbors( self, seeds: Union[torch.Tensor, Dict[str, torch.Tensor]], fanouts: torch.Tensor, replace: bool = False, probs_name: Optional[str] = None, returning_indices_and_original_edge_ids_are_optional: bool = False, random_seed: torch.Tensor = None, seed2_contribution: float = 0.0, async_op: bool = False, ) -> SampledSubgraphImpl: """Sample neighboring edges of the given nodes and return the induced subgraph via layer-neighbor sampling from the NeurIPS 2023 paper `Layer-Neighbor Sampling -- Defusing Neighborhood Explosion in GNNs `__ Parameters ---------- seeds: torch.Tensor or Dict[str, torch.Tensor] IDs of the given seed nodes. - If `nodes` is a tensor: It means the graph is homogeneous graph, and ids inside are homogeneous ids. - If `nodes` is a dictionary: The keys should be node type and ids inside are heterogeneous ids. fanouts: torch.Tensor The number of edges to be sampled for each node with or without considering edge types. - When the length is 1, it indicates that the fanout applies to all neighbors of the node as a collective, regardless of the edge type. - Otherwise, the length should equal to the number of edge types, and each fanout value corresponds to a specific edge type of the nodes. The value of each fanout should be >= 0 or = -1. - When the value is -1, all neighbors (with non-zero probability, if weighted) will be sampled once regardless of replacement. It is equivalent to selecting all neighbors with non-zero probability when the fanout is >= the number of neighbors (and replace is set to false). - When the value is a non-negative integer, it serves as a minimum threshold for selecting neighbors. replace: bool Boolean indicating whether the sample is preformed with or without replacement. If True, a value can be selected multiple times. Otherwise, each value can be selected only once. probs_name: str, optional An optional string specifying the name of an edge attribute. This attribute tensor should contain (unnormalized) probabilities corresponding to each neighboring edge of a node. It must be a 1D floating-point or boolean tensor, with the number of elements equalling the total number of edges. returning_indices_and_original_edge_ids_are_optional: bool Boolean indicating whether it is okay for the call to this function to leave the indices and the original edge ids tensors uninitialized. In this case, it is the user's responsibility to gather them using _edge_ids_in_fused_csc_sampling_graph if either is missing. random_seed: torch.Tensor, optional An int64 tensor with one or two elements. The passed random_seed makes it so that for any seed node ``s`` and its neighbor ``t``, the rolled random variate ``r_t`` is the same for any call to this function with the same random seed. When sampling as part of the same batch, one would want identical seeds so that LABOR can globally sample. One example is that for heterogenous graphs, there is a single random seed passed for each edge type. This will sample much fewer nodes compared to having unique random seeds for each edge type. If one called this function individually for each edge type for a heterogenous graph with different random seeds, then it would run LABOR locally for each edge type, resulting into a larger number of nodes being sampled. If this function is called without a ``random_seed``, we get the random seed by getting a random number from GraphBolt. Use this argument with identical random_seed if multiple calls to this function are used to sample as part of a single batch. If given two numbers, then the ``seed2_contribution`` argument determines the interpolation between the two random seeds. seed2_contribution: float, optional A float value between [0, 1) that determines the contribution of the second random seed, ``random_seed[-1]``, to generate the random variates. async_op: bool Boolean indicating whether the call is asynchronous. If so, the result can be obtained by calling wait on the returned future. Returns ------- SampledSubgraphImpl The sampled subgraph. Examples -------- >>> import dgl.graphbolt as gb >>> import torch >>> ntypes = {"n1": 0, "n2": 1} >>> etypes = {"n1:e1:n2": 0, "n2:e2:n1": 1} >>> indptr = torch.LongTensor([0, 2, 4, 6, 7, 9]) >>> indices = torch.LongTensor([2, 4, 2, 3, 0, 1, 1, 0, 1]) >>> node_type_offset = torch.LongTensor([0, 2, 5]) >>> type_per_edge = torch.LongTensor([1, 1, 1, 1, 0, 0, 0, 0, 0]) >>> graph = gb.fused_csc_sampling_graph(indptr, indices, ... node_type_offset=node_type_offset, ... type_per_edge=type_per_edge, ... node_type_to_id=ntypes, ... edge_type_to_id=etypes) >>> nodes = {'n1': torch.LongTensor([0]), 'n2': torch.LongTensor([0])} >>> fanouts = torch.tensor([1, 1]) >>> subgraph = graph.sample_layer_neighbors(nodes, fanouts) >>> print(subgraph.sampled_csc) {'n1:e1:n2': CSCFormatBase(indptr=tensor([0, 1]), indices=tensor([0]), ), 'n2:e2:n1': CSCFormatBase(indptr=tensor([0, 1]), indices=tensor([2]), )} """ if random_seed is not None: assert ( 1 <= len(random_seed) <= 2 ), "There should be a 1 or 2 random seeds." if len(random_seed) == 2: assert ( 0 <= seed2_contribution <= 1 ), "seed2_contribution should be in [0, 1]." seed_offsets = None if isinstance(seeds, dict): seeds, seed_offsets = self._convert_to_homogeneous_nodes(seeds) elif seeds is None: seed_offsets = self._indptr_node_type_offset_list probs_or_mask = self.edge_attributes[probs_name] if probs_name else None self._check_sampler_arguments(seeds, fanouts, probs_or_mask) sampling_fn = ( self._c_csc_graph.sample_neighbors_async if async_op else self._c_csc_graph.sample_neighbors ) C_sampled_subgraph = sampling_fn( seeds, seed_offsets, fanouts.tolist(), replace, True, # is_labor returning_indices_and_original_edge_ids_are_optional, probs_or_mask, random_seed, seed2_contribution, ) if async_op: return _SampleNeighborsWaiter( self._convert_to_sampled_subgraph, C_sampled_subgraph, seed_offsets, returning_indices_and_original_edge_ids_are_optional, ) else: return self._convert_to_sampled_subgraph( C_sampled_subgraph, seed_offsets, returning_indices_and_original_edge_ids_are_optional, ) def temporal_sample_neighbors( self, seeds: Union[torch.Tensor, Dict[str, torch.Tensor]], seeds_timestamp: Union[torch.Tensor, Dict[str, torch.Tensor]], fanouts: torch.Tensor, replace: bool = False, seeds_pre_time_window: Optional[ Union[torch.Tensor, Dict[str, torch.Tensor]] ] = None, probs_name: Optional[str] = None, node_timestamp_attr_name: Optional[str] = None, edge_timestamp_attr_name: Optional[str] = None, ) -> torch.ScriptObject: """Temporally Sample neighboring edges of the given nodes and return the induced subgraph. If `node_timestamp_attr_name` or `edge_timestamp_attr_name` is given, the sampled neighbor or edge of an seed node must have a timestamp that is smaller than that of the seed node. Parameters ---------- seeds: torch.Tensor IDs of the given seed nodes. seeds_timestamp: torch.Tensor Timestamps of the given seed nodes. fanouts: torch.Tensor The number of edges to be sampled for each node with or without considering edge types. - When the length is 1, it indicates that the fanout applies to all neighbors of the node as a collective, regardless of the edge type. - Otherwise, the length should equal to the number of edge types, and each fanout value corresponds to a specific edge type of the nodes. The value of each fanout should be >= 0 or = -1. - When the value is -1, all neighbors (with non-zero probability, if weighted) will be sampled once regardless of replacement. It is equivalent to selecting all neighbors with non-zero probability when the fanout is >= the number of neighbors (and replace is set to false). - When the value is a non-negative integer, it serves as a minimum threshold for selecting neighbors. replace: bool Boolean indicating whether the sample is preformed with or without replacement. If True, a value can be selected multiple times. Otherwise, each value can be selected only once. seeds_pre_time_window: torch.Tensor The time window of the nodes represents a period of time before `seeds_timestamp`. If provided, only neighbors and related edges whose timestamps fall within `[seeds_timestamp - seeds_pre_time_window, seeds_timestamp]` will be filtered. probs_name: str, optional An optional string specifying the name of an edge attribute. This attribute tensor should contain (unnormalized) probabilities corresponding to each neighboring edge of a node. It must be a 1D floating-point or boolean tensor, with the number of elements equalling the total number of edges. node_timestamp_attr_name: str, optional An optional string specifying the name of an node attribute. edge_timestamp_attr_name: str, optional An optional string specifying the name of an edge attribute. Returns ------- SampledSubgraphImpl The sampled subgraph. """ seed_offsets = None if isinstance(seeds, dict): ( seeds, seed_offsets, seeds_timestamp, seeds_pre_time_window, ) = self._convert_to_homogeneous_nodes( seeds, seeds_timestamp, seeds_pre_time_window ) elif seeds is None: seed_offsets = self._indptr_node_type_offset_list # Ensure nodes is 1-D tensor. probs_or_mask = self.edge_attributes[probs_name] if probs_name else None self._check_sampler_arguments(seeds, fanouts, probs_or_mask) C_sampled_subgraph = self._c_csc_graph.temporal_sample_neighbors( seeds, seed_offsets, seeds_timestamp, fanouts.tolist(), replace, False, # is_labor False, # returning_indices_is_optional seeds_pre_time_window, probs_or_mask, node_timestamp_attr_name, edge_timestamp_attr_name, None, # random_seed, labor parameter 0, # seed2_contribution, labor_parameter ) return self._convert_to_sampled_subgraph( C_sampled_subgraph, seed_offsets ) def temporal_sample_layer_neighbors( self, seeds: Union[torch.Tensor, Dict[str, torch.Tensor]], seeds_timestamp: Union[torch.Tensor, Dict[str, torch.Tensor]], fanouts: torch.Tensor, replace: bool = False, seeds_pre_time_window: Optional[ Union[torch.Tensor, Dict[str, torch.Tensor]] ] = None, probs_name: Optional[str] = None, node_timestamp_attr_name: Optional[str] = None, edge_timestamp_attr_name: Optional[str] = None, random_seed: torch.Tensor = None, seed2_contribution: float = 0.0, ) -> torch.ScriptObject: """Temporally Sample neighboring edges of the given nodes and return the induced subgraph via layer-neighbor sampling from the NeurIPS 2023 paper `Layer-Neighbor Sampling -- Defusing Neighborhood Explosion in GNNs `__ If `node_timestamp_attr_name` or `edge_timestamp_attr_name` is given, the sampled neighbor or edge of an seed node must have a timestamp that is smaller than that of the seed node. Parameters ---------- seeds: torch.Tensor IDs of the given seed nodes. seeds_timestamp: torch.Tensor Timestamps of the given seed nodes. fanouts: torch.Tensor The number of edges to be sampled for each node with or without considering edge types. - When the length is 1, it indicates that the fanout applies to all neighbors of the node as a collective, regardless of the edge type. - Otherwise, the length should equal to the number of edge types, and each fanout value corresponds to a specific edge type of the nodes. The value of each fanout should be >= 0 or = -1. - When the value is -1, all neighbors (with non-zero probability, if weighted) will be sampled once regardless of replacement. It is equivalent to selecting all neighbors with non-zero probability when the fanout is >= the number of neighbors (and replace is set to false). - When the value is a non-negative integer, it serves as a minimum threshold for selecting neighbors. replace: bool Boolean indicating whether the sample is preformed with or without replacement. If True, a value can be selected multiple times. Otherwise, each value can be selected only once. seeds_pre_time_window: torch.Tensor The time window of the nodes represents a period of time before `seeds_timestamp`. If provided, only neighbors and related edges whose timestamps fall within `[seeds_timestamp - seeds_pre_time_window, seeds_timestamp]` will be filtered. probs_name: str, optional An optional string specifying the name of an edge attribute. This attribute tensor should contain (unnormalized) probabilities corresponding to each neighboring edge of a node. It must be a 1D floating-point or boolean tensor, with the number of elements equalling the total number of edges. node_timestamp_attr_name: str, optional An optional string specifying the name of an node attribute. edge_timestamp_attr_name: str, optional An optional string specifying the name of an edge attribute. random_seed: torch.Tensor, optional An int64 tensor with one or two elements. The passed random_seed makes it so that for any seed node ``s`` and its neighbor ``t``, the rolled random variate ``r_t`` is the same for any call to this function with the same random seed. When sampling as part of the same batch, one would want identical seeds so that LABOR can globally sample. One example is that for heterogenous graphs, there is a single random seed passed for each edge type. This will sample much fewer nodes compared to having unique random seeds for each edge type. If one called this function individually for each edge type for a heterogenous graph with different random seeds, then it would run LABOR locally for each edge type, resulting into a larger number of nodes being sampled. If this function is called without a ``random_seed``, we get the random seed by getting a random number from GraphBolt. Use this argument with identical random_seed if multiple calls to this function are used to sample as part of a single batch. If given two numbers, then the ``seed2_contribution`` argument determines the interpolation between the two random seeds. seed2_contribution: float, optional A float value between [0, 1) that determines the contribution of the second random seed, ``random_seed[-1]``, to generate the random variates. Returns ------- SampledSubgraphImpl The sampled subgraph. """ seed_offsets = None if isinstance(seeds, dict): ( seeds, seed_offsets, seeds_timestamp, seeds_pre_time_window, ) = self._convert_to_homogeneous_nodes( seeds, seeds_timestamp, seeds_pre_time_window ) elif seeds is None: seed_offsets = self._indptr_node_type_offset_list # Ensure nodes is 1-D tensor. probs_or_mask = self.edge_attributes[probs_name] if probs_name else None self._check_sampler_arguments(seeds, fanouts, probs_or_mask) C_sampled_subgraph = self._c_csc_graph.temporal_sample_neighbors( seeds, seed_offsets, seeds_timestamp, fanouts.tolist(), replace, True, # is_labor False, # returning_indices_is_optional seeds_pre_time_window, probs_or_mask, node_timestamp_attr_name, edge_timestamp_attr_name, random_seed, seed2_contribution, ) return self._convert_to_sampled_subgraph( C_sampled_subgraph, seed_offsets ) def sample_negative_edges_uniform( self, edge_type, node_pairs, negative_ratio ): """ Sample negative edges by randomly choosing negative source-destination edges according to a uniform distribution. For each edge ``(u, v)``, it is supposed to generate `negative_ratio` pairs of negative edges ``(u, v')``, where ``v'`` is chosen uniformly from all the nodes in the graph. ``u`` is exactly same as the corresponding positive edges. It returns positive edges concatenated with negative edges. In negative edges, negative sources are constructed from the corresponding positive edges. Parameters ---------- edge_type: str The type of edges in the provided node_pairs. Any negative edges sampled will also have the same type. If set to None, it will be considered as a homogeneous graph. node_pairs : torch.Tensor A 2D tensors that represent the N pairs of positive edges in source-destination format, with 'positive' indicating that these edges are present in the graph. It's important to note that within the context of a heterogeneous graph, the ids in these tensors signify heterogeneous ids. negative_ratio: int The ratio of the number of negative samples to positive samples. Returns ------- torch.Tensor A 2D tensors represents the N pairs of positive and negative source-destination node pairs. In the context of a heterogeneous graph, both the input nodes and the selected nodes are represented by heterogeneous IDs, and the formed edges are of the input type `edge_type`. Note that negative refers to false negatives, which means the edge could be present or not present in the graph. """ if edge_type: _, _, dst_ntype = etype_str_to_tuple(edge_type) max_node_id = self.num_nodes[dst_ntype] else: max_node_id = self.total_num_nodes pos_src = node_pairs[:, 0] num_negative = node_pairs.shape[0] * negative_ratio negative_seeds = ( torch.cat( ( pos_src.repeat_interleave(negative_ratio), torch.randint( 0, max_node_id, (num_negative,), dtype=node_pairs.dtype, device=node_pairs.device, ), ), ) .view(2, num_negative) .T ) seeds = torch.cat((node_pairs, negative_seeds)) return seeds def copy_to_shared_memory(self, shared_memory_name: str): """Copy the graph to shared memory. Parameters ---------- shared_memory_name : str Name of the shared memory. Returns ------- FusedCSCSamplingGraph The copied FusedCSCSamplingGraph object on shared memory. """ return FusedCSCSamplingGraph( self._c_csc_graph.copy_to_shared_memory(shared_memory_name), ) def _apply_to_members(self, fn): """Apply passed fn to all members of `FusedCSCSamplingGraph`.""" self.csc_indptr = recursive_apply(self.csc_indptr, fn) self.indices = recursive_apply(self.indices, fn) self.node_type_offset = recursive_apply(self.node_type_offset, fn) self.type_per_edge = recursive_apply(self.type_per_edge, fn) self.node_attributes = recursive_apply(self.node_attributes, fn) self.edge_attributes = recursive_apply(self.edge_attributes, fn) return self def to(self, device: torch.device) -> None: # pylint: disable=invalid-name """Copy `FusedCSCSamplingGraph` to the specified device.""" def _to(x): return x.to(device) if hasattr(x, "to") else x def _pin(x): return x.pin_memory() if hasattr(x, "pin_memory") else x # Create a copy of self. self2 = fused_csc_sampling_graph( self.csc_indptr, self.indices, self.node_type_offset, self.type_per_edge, self.node_type_to_id, self.edge_type_to_id, self.node_attributes, self.edge_attributes, ) return self2._apply_to_members(_pin if device == "pinned" else _to) def pin_memory_(self): """Copy `FusedCSCSamplingGraph` to the pinned memory in-place. Returns the same object modified in-place.""" if is_wsl(): gb_warning( "In place pinning is not supported on WSL. " "Returning the out of place pinned `FusedCSCSamplingGraph`." ) return self.to("pinned") # torch.Tensor.pin_memory() is not an inplace operation. To make it # truly in-place, we need to use cudaHostRegister. Then, we need to use # cudaHostUnregister to unpin the tensor in the destructor. # https://github.com/pytorch/pytorch/issues/32167#issuecomment-753551842 cudart = torch.cuda.cudart() if not hasattr(self, "_is_inplace_pinned"): self._is_inplace_pinned = set() def _pin(x): if hasattr(x, "pin_memory_"): x.pin_memory_() elif ( isinstance(x, torch.Tensor) and not x.is_pinned() and x.device.type == "cpu" ): assert ( x.is_contiguous() ), "Tensor pinning is only supported for contiguous tensors." assert ( cudart.cudaHostRegister( x.data_ptr(), x.numel() * x.element_size(), 0 ) == 0 ) self._is_inplace_pinned.add(x) self._inplace_unpinner = cudart.cudaHostUnregister return x return self._apply_to_members(_pin) def _initialize_gpu_graph_cache( self, num_gpu_cached_edges: int, gpu_cache_threshold: int, prob_name: Optional[str] = None, ): "Construct a GPUGraphCache given the cache parameters." num_gpu_cached_edges = min(num_gpu_cached_edges, self.total_num_edges) dtypes = [self.indices.dtype] if self.type_per_edge is not None: dtypes.append(self.type_per_edge.dtype) has_original_edge_ids = False if self.edge_attributes is not None: probs_or_mask = self.edge_attributes.get(prob_name, None) if probs_or_mask is not None: dtypes.append(probs_or_mask.dtype) original_edge_ids = self.edge_attributes.get(ORIGINAL_EDGE_ID, None) if original_edge_ids is not None: dtypes.append(original_edge_ids.dtype) has_original_edge_ids = True self._gpu_graph_cache_ = GPUGraphCache( num_gpu_cached_edges, gpu_cache_threshold, self.csc_indptr.dtype, dtypes, has_original_edge_ids, ) def fused_csc_sampling_graph( csc_indptr: torch.Tensor, indices: torch.Tensor, node_type_offset: Optional[torch.tensor] = None, type_per_edge: Optional[torch.tensor] = None, node_type_to_id: Optional[Dict[str, int]] = None, edge_type_to_id: Optional[Dict[str, int]] = None, node_attributes: Optional[Dict[str, torch.tensor]] = None, edge_attributes: Optional[Dict[str, torch.tensor]] = None, ) -> FusedCSCSamplingGraph: """Create a FusedCSCSamplingGraph object from a CSC representation. Parameters ---------- csc_indptr : torch.Tensor Pointer to the start of each row in the `indices`. An integer tensor with shape `(total_num_nodes+1,)`. indices : torch.Tensor Column indices of the non-zero elements in the CSC graph. An integer tensor with shape `(total_num_edges,)`. node_type_offset : Optional[torch.tensor], optional Offset of node types in the graph, by default None. type_per_edge : Optional[torch.tensor], optional Type ids of each edge in the graph, by default None. If provided, it is required that the edge types in each vertex neighborhood are in sorted order. To be more precise, For each i in [0, csc_indptr.size(0) - 1), `type_per_edge[indptr[i]: indptr[i + 1]]` is expected to be monotonically nondecreasing. node_type_to_id : Optional[Dict[str, int]], optional Map node types to ids, by default None. edge_type_to_id : Optional[Dict[str, int]], optional Map edge types to ids, by default None. node_attributes: Optional[Dict[str, torch.tensor]], optional Node attributes of the graph, by default None. edge_attributes: Optional[Dict[str, torch.tensor]], optional Edge attributes of the graph, by default None. Returns ------- FusedCSCSamplingGraph The created FusedCSCSamplingGraph object. Examples -------- >>> ntypes = {'n1': 0, 'n2': 1, 'n3': 2} >>> etypes = {'n1:e1:n2': 0, 'n1:e2:n3': 1} >>> csc_indptr = torch.tensor([0, 2, 5, 7, 8]) >>> indices = torch.tensor([1, 3, 0, 1, 2, 0, 3, 2]) >>> node_type_offset = torch.tensor([0, 1, 2, 4]) >>> type_per_edge = torch.tensor([0, 1, 0, 1, 1, 0, 0, 0]) >>> graph = graphbolt.fused_csc_sampling_graph(csc_indptr, indices, ... node_type_offset=node_type_offset, ... type_per_edge=type_per_edge, ... node_type_to_id=ntypes, edge_type_to_id=etypes, ... node_attributes=None, edge_attributes=None,) >>> print(graph) FusedCSCSamplingGraph(csc_indptr=tensor([0, 2, 5, 7, 8]), indices=tensor([1, 3, 0, 1, 2, 0, 3, 2]), total_num_nodes=4, num_edges={'n1:e1:n2': 5, 'n1:e2:n3': 3}, node_type_offset=tensor([0, 1, 2, 4]), type_per_edge=tensor([0, 1, 0, 1, 1, 0, 0, 0]), node_type_to_id={'n1': 0, 'n2': 1, 'n3': 2}, edge_type_to_id={'n1:e1:n2': 0, 'n1:e2:n3': 1},) """ if node_type_to_id is not None and edge_type_to_id is not None: node_types = list(node_type_to_id.keys()) edge_types = list(edge_type_to_id.keys()) node_type_ids = list(node_type_to_id.values()) edge_type_ids = list(edge_type_to_id.values()) # Validate node_type_to_id. assert all( isinstance(x, str) for x in node_types ), "Node type name should be string." assert all( isinstance(x, int) for x in node_type_ids ), "Node type id should be int." assert len(node_type_ids) == len( set(node_type_ids) ), "Multiple node types shoud not be mapped to a same id." # Validate edge_type_to_id. for edge_type in edge_types: src, edge, dst = etype_str_to_tuple(edge_type) assert isinstance(edge, str), "Edge type name should be string." assert ( src in node_types ), f"Unrecognized node type {src} in edge type {edge_type}" assert ( dst in node_types ), f"Unrecognized node type {dst} in edge type {edge_type}" assert all( isinstance(x, int) for x in edge_type_ids ), "Edge type id should be int." assert len(edge_type_ids) == len( set(edge_type_ids) ), "Multiple edge types shoud not be mapped to a same id." if node_type_offset is not None: assert len(node_type_to_id) + 1 == node_type_offset.size( 0 ), "node_type_offset length should be |ntypes| + 1." return FusedCSCSamplingGraph( torch.ops.graphbolt.fused_csc_sampling_graph( csc_indptr, indices, node_type_offset, type_per_edge, node_type_to_id, edge_type_to_id, node_attributes, edge_attributes, ), ) def load_from_shared_memory( shared_memory_name: str, ) -> FusedCSCSamplingGraph: """Load a FusedCSCSamplingGraph object from shared memory. Parameters ---------- shared_memory_name : str Name of the shared memory. Returns ------- FusedCSCSamplingGraph The loaded FusedCSCSamplingGraph object on shared memory. """ return FusedCSCSamplingGraph( torch.ops.graphbolt.load_from_shared_memory(shared_memory_name), ) def from_dglgraph( DGLGraphInstance, is_homogeneous: bool = False, include_original_edge_id: bool = False, ) -> FusedCSCSamplingGraph: """Convert a DGLGraph to FusedCSCSamplingGraph.""" from dgl.base import EID, ETYPE, NID, NTYPE from dgl.convert import to_homogeneous g = DGLGraphInstance homo_g, ntype_count, _ = to_homogeneous( g, ndata=g.ndata, edata=g.edata, return_count=True ) if is_homogeneous: node_type_to_id = None edge_type_to_id = None else: # Initialize metadata. node_type_to_id = {ntype: g.get_ntype_id(ntype) for ntype in g.ntypes} edge_type_to_id = { etype_tuple_to_str(etype): g.get_etype_id(etype) for etype in g.canonical_etypes } # Obtain CSC matrix. indptr, indices, edge_ids = homo_g.adj_tensors("csc") ntype_count.insert(0, 0) node_type_offset = ( None if is_homogeneous else torch.cumsum(torch.LongTensor(ntype_count), 0) ) # Assign edge type according to the order of CSC matrix. type_per_edge = ( None if is_homogeneous else torch.index_select(homo_g.edata[ETYPE], dim=0, index=edge_ids) ) node_attributes = {} edge_attributes = {} for feat_name, feat_data in homo_g.ndata.items(): if feat_name not in (NID, NTYPE): node_attributes[feat_name] = feat_data for feat_name, feat_data in homo_g.edata.items(): if feat_name not in (EID, ETYPE): edge_attributes[feat_name] = feat_data if include_original_edge_id: # Assign edge attributes according to the original eids mapping. edge_attributes[ORIGINAL_EDGE_ID] = torch.index_select( homo_g.edata[EID], dim=0, index=edge_ids ) return FusedCSCSamplingGraph( torch.ops.graphbolt.fused_csc_sampling_graph( indptr, indices, node_type_offset, type_per_edge, node_type_to_id, edge_type_to_id, node_attributes, edge_attributes, ), ) ================================================ FILE: python/dgl/graphbolt/impl/gpu_cached_feature.py ================================================ """GPU cached feature for GraphBolt.""" from typing import Dict, Union import torch from ..feature_store import ( bytes_to_number_of_items, Feature, FeatureKey, wrap_with_cached_feature, ) from .gpu_feature_cache import GPUFeatureCache __all__ = ["GPUCachedFeature", "gpu_cached_feature"] class GPUCachedFeature(Feature): r"""GPU cached feature wrapping a fallback feature. It uses the least recently used (LRU) algorithm as the cache eviction policy. Use `gpu_cached_feature` to construct an instance of this class. Places the GPU cache to torch.cuda.current_device(). Parameters ---------- fallback_feature : Feature The fallback feature. cache : GPUFeatureCache A GPUFeatureCache instance to serve as the cache backend. offset : int, optional The offset value to add to the given ids before using the cache. This parameter is useful if multiple `GPUCachedFeature`s are sharing a single GPUFeatureCache object. Examples -------- >>> import torch >>> from dgl import graphbolt as gb >>> torch_feat = torch.arange(10).reshape(2, -1).to("cuda") >>> cache_size = 5 >>> fallback_feature = gb.TorchBasedFeature(torch_feat) >>> feature = gb.gpu_cached_feature(fallback_feature, cache_size) >>> feature.read() tensor([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], device='cuda:0') >>> feature.read(torch.tensor([0]).to("cuda")) tensor([[0, 1, 2, 3, 4]], device='cuda:0') >>> feature.update(torch.tensor([[1 for _ in range(5)]]).to("cuda"), ... torch.tensor([1]).to("cuda")) >>> feature.read(torch.tensor([0, 1]).to("cuda")) tensor([[0, 1, 2, 3, 4], [1, 1, 1, 1, 1]], device='cuda:0') >>> feature.size() torch.Size([5]) """ _cache_type = GPUFeatureCache def __init__( self, fallback_feature: Feature, cache: GPUFeatureCache, offset: int = 0, ): super(GPUCachedFeature, self).__init__() assert isinstance(fallback_feature, Feature), ( f"The fallback_feature must be an instance of Feature, but got " f"{type(fallback_feature)}." ) self._fallback_feature = fallback_feature self._feature = cache self._offset = offset def read(self, ids: torch.Tensor = None): """Read the feature by index. The returned tensor is always in GPU memory, no matter whether the fallback feature is in memory or on disk. Parameters ---------- ids : torch.Tensor, optional The index of the feature. If specified, only the specified indices of the feature are read. If None, the entire feature is returned. Returns ------- torch.Tensor The read feature. """ if ids is None: return self._fallback_feature.read() values, missing_index, missing_keys = self._feature.query( ids if self._offset == 0 else ids + self._offset ) missing_values = self._fallback_feature.read( missing_keys if self._offset == 0 else missing_keys - self._offset ) values[missing_index] = missing_values self._feature.replace(missing_keys, missing_values) return values def read_async(self, ids: torch.Tensor): r"""Read the feature by index asynchronously. Parameters ---------- ids : torch.Tensor The index of the feature. Only the specified indices of the feature are read. Returns ------- A generator object. The returned generator object returns a future on ``read_async_num_stages(ids.device)``\ th invocation. The return result can be accessed by calling ``.wait()``. on the returned future object. It is undefined behavior to call ``.wait()`` more than once. Examples -------- >>> import dgl.graphbolt as gb >>> feature = gb.Feature(...) >>> ids = torch.tensor([0, 2]) >>> for stage, future in enumerate(feature.read_async(ids)): ... pass >>> assert stage + 1 == feature.read_async_num_stages(ids.device) >>> result = future.wait() # result contains the read values. """ future = self._feature.query( ids if self._offset == 0 else ids + self._offset, async_op=True ) yield values, missing_index, missing_keys = future.wait() fallback_reader = self._fallback_feature.read_async( missing_keys if self._offset == 0 else missing_keys - self._offset ) fallback_num_stages = self._fallback_feature.read_async_num_stages( missing_keys.device ) for i in range(fallback_num_stages): missing_values_future = next(fallback_reader, None) if i < fallback_num_stages - 1: yield # fallback feature stages. class _Waiter: def __init__( self, feature, values, missing_index, missing_keys, missing_values_future, ): self.feature = feature self.values = values self.missing_index = missing_index self.missing_keys = missing_keys self.missing_values_future = missing_values_future def wait(self): """Returns the stored value when invoked.""" missing_values = self.missing_values_future.wait() self.feature.replace(self.missing_keys, missing_values) self.values[self.missing_index] = missing_values values = self.values # Ensure there is no memory leak. self.feature = self.values = self.missing_index = None self.missing_keys = self.missing_values_future = None return values yield _Waiter( self._feature, values, missing_index, missing_keys, missing_values_future, ) def read_async_num_stages(self, ids_device: torch.device): """The number of stages of the read_async operation. See read_async function for directions on its use. This function is required to return the number of yield operations when read_async is used with a tensor residing on ids_device. Parameters ---------- ids_device : torch.device The device of the ids parameter passed into read_async. Returns ------- int The number of stages of the read_async operation. """ assert ids_device.type == "cuda" return 1 + self._fallback_feature.read_async_num_stages(ids_device) def size(self): """Get the size of the feature. Returns ------- torch.Size The size of the feature. """ return self._fallback_feature.size() def count(self): """Get the count of the feature. Returns ------- int The count of the feature. """ return self._fallback_feature.count() def update(self, value: torch.Tensor, ids: torch.Tensor = None): """Update the feature. Parameters ---------- value : torch.Tensor The updated value of the feature. ids : torch.Tensor, optional The indices of the feature to update. If specified, only the specified indices of the feature will be updated. For the feature, the `ids[i]` row is updated to `value[i]`. So the indices and value must have the same length. If None, the entire feature will be updated. """ if ids is None: feat0 = value[:1] self._fallback_feature.update(value) cache_size = min( bytes_to_number_of_items(self.cache_size_in_bytes, feat0), value.shape[0], ) self._feature = None # Destroy the existing cache first. self._feature = self._cache_type( (cache_size,) + feat0.shape[1:], feat0.dtype ) else: self._fallback_feature.update(value, ids) self._feature.replace(ids, value) @property def cache_size_in_bytes(self): """Return the size taken by the cache in bytes.""" return self._feature.max_size_in_bytes @property def miss_rate(self): """Returns the cache miss rate since creation.""" return self._feature.miss_rate def gpu_cached_feature( fallback_features: Union[Feature, Dict[FeatureKey, Feature]], max_cache_size_in_bytes: int, ) -> Union[GPUCachedFeature, Dict[FeatureKey, GPUCachedFeature]]: r"""GPU cached feature wrapping a fallback feature. It uses the least recently used (LRU) algorithm as the cache eviction policy. Places the GPU cache to torch.cuda.current_device(). Parameters ---------- fallback_features : Union[Feature, Dict[FeatureKey, Feature]] The fallback feature(s). max_cache_size_in_bytes : int The capacity of the GPU cache in bytes. Returns ------- Union[GPUCachedFeature, Dict[FeatureKey, GPUCachedFeature]] The feature(s) wrapped with GPUCachedFeature. """ return wrap_with_cached_feature( GPUCachedFeature, fallback_features, max_cache_size_in_bytes ) ================================================ FILE: python/dgl/graphbolt/impl/gpu_feature_cache.py ================================================ """HugeCTR gpu_cache wrapper for graphbolt.""" from functools import reduce from operator import mul import torch class GPUFeatureCache(object): """High-level wrapper for GPU embedding cache""" def __init__(self, cache_shape, dtype): major, _ = torch.cuda.get_device_capability() assert ( major >= 7 ), "GPUFeatureCache is supported only on CUDA compute capability >= 70 (Volta)." self._cache = torch.ops.graphbolt.gpu_cache(cache_shape, dtype) element_size = torch.tensor([], dtype=dtype).element_size() self.max_size_in_bytes = reduce(mul, cache_shape) * element_size self.total_miss = 0 self.total_queries = 0 def query(self, keys, async_op=False): """Queries the GPU cache. Parameters ---------- keys : Tensor The keys to query the GPU cache with. async_op: bool Boolean indicating whether the call is asynchronous. If so, the result can be obtained by calling wait on the returned future. Returns ------- tuple(Tensor, Tensor, Tensor) A tuple containing (values, missing_indices, missing_keys) where values[missing_indices] corresponds to cache misses that should be filled by quering another source with missing_keys. """ class _Waiter: def __init__(self, gpu_cache, future): self.gpu_cache = gpu_cache self.future = future def wait(self): """Returns the stored value when invoked.""" gpu_cache = self.gpu_cache values, missing_index, missing_keys = ( self.future.wait() if async_op else self.future ) # Ensure there is no leak. self.gpu_cache = self.future = None gpu_cache.total_queries += values.shape[0] gpu_cache.total_miss += missing_keys.shape[0] return values, missing_index, missing_keys if async_op: return _Waiter(self, self._cache.query_async(keys)) else: return _Waiter(self, self._cache.query(keys)).wait() def replace(self, keys, values): """Inserts key-value pairs into the GPU cache using the Least-Recently Used (LRU) algorithm to remove old key-value pairs if it is full. Parameters ---------- keys: Tensor The keys to insert to the GPU cache. values: Tensor The values to insert to the GPU cache. """ self._cache.replace(keys, values) @property def miss_rate(self): """Returns the cache miss rate since creation.""" return self.total_miss / self.total_queries ================================================ FILE: python/dgl/graphbolt/impl/gpu_graph_cache.py ================================================ """HugeCTR gpu_cache wrapper for graphbolt.""" import torch class GPUGraphCache(object): r"""High-level wrapper for GPU graph cache. Places the GPU graph cache to torch.cuda.current_device(). Parameters ---------- num_edges : int Upperbound on number of edges to cache. threshold : int The number of accesses before the neighborhood of a vertex is cached. indptr_dtype : torch.dtype The dtype of the indptr tensor of the graph. dtypes : list[torch.dtype] The dtypes of the edge tensors that are going to be cached. has_original_edge_ids : bool Whether the graph to be cached has original edge ids. """ def __init__( self, num_edges, threshold, indptr_dtype, dtypes, has_original_edge_ids ): major, _ = torch.cuda.get_device_capability() assert ( major >= 7 ), "GPUGraphCache is supported only on CUDA compute capability >= 70 (Volta)." self._cache = torch.ops.graphbolt.gpu_graph_cache( num_edges, threshold, indptr_dtype, dtypes, has_original_edge_ids ) self.total_miss = 0 self.total_queries = 0 def query(self, keys): """Queries the GPU cache. Parameters ---------- keys : Tensor The keys to query the GPU graph cache with. Returns ------- tuple(Tensor, func) A tuple containing (missing_keys, replace_fn) where replace_fn is a function that should be called with the graph structure corresponding to the missing keys. Its arguments are (Tensor, list(Tensor)), where the first tensor is the missing indptr and the second list is the missing edge tensors. """ self.total_queries += keys.shape[0] ( index, position, num_hit, num_threshold, ) = self._cache.query(keys) self.total_miss += keys.shape[0] - num_hit def replace_functional(missing_indptr, missing_edge_tensors): return self._cache.replace( keys, index, position, num_hit, num_threshold, missing_indptr, missing_edge_tensors, ) return keys[index[num_hit:]], replace_functional def query_async(self, keys): """Queries the GPU cache asynchronously. Parameters ---------- keys : Tensor The keys to query the GPU graph cache with. Returns ------- A generator object. The returned generator object returns the missing keys on the second invocation and expects the fetched indptr and edge tensors on the next invocation. The third and last invocation returns a future object and the return result can be accessed by calling `.wait()` on the returned future object. It is undefined behavior to call `.wait()` more than once. """ future = self._cache.query_async(keys) yield index, position, num_hit, num_threshold = future.wait() self.total_queries += keys.shape[0] self.total_miss += keys.shape[0] - num_hit missing_indptr, missing_edge_tensors = yield keys[index[num_hit:]] yield self._cache.replace_async( keys, index, position, num_hit, num_threshold, missing_indptr, missing_edge_tensors, ) @property def miss_rate(self): """Returns the cache miss rate since creation.""" return self.total_miss / self.total_queries ================================================ FILE: python/dgl/graphbolt/impl/in_subgraph_sampler.py ================================================ """In-subgraph sampler for GraphBolt.""" from torch.utils.data import functional_datapipe from ..internal import unique_and_compact_csc_formats from ..subgraph_sampler import SubgraphSampler from .sampled_subgraph_impl import SampledSubgraphImpl __all__ = ["InSubgraphSampler"] @functional_datapipe("sample_in_subgraph") class InSubgraphSampler(SubgraphSampler): """Sample the subgraph induced on the inbound edges of the given nodes. Functional name: :obj:`sample_in_subgraph`. In-subgraph sampler is responsible for sampling a subgraph from given data, returning an induced subgraph along with compacted information. Parameters ---------- datapipe : DataPipe The datapipe. graph : FusedCSCSamplingGraph The graph on which to perform in_subgraph sampling. Examples ------- >>> import dgl.graphbolt as gb >>> import torch >>> indptr = torch.LongTensor([0, 3, 5, 7, 9, 12, 14]) >>> indices = torch.LongTensor([0, 1, 4, 2, 3, 0, 5, 1, 2, 0, 3, 5, 1, 4]) >>> graph = gb.fused_csc_sampling_graph(indptr, indices) >>> item_set = gb.ItemSet(len(indptr) - 1, names="seeds") >>> item_sampler = gb.ItemSampler(item_set, batch_size=2) >>> insubgraph_sampler = gb.InSubgraphSampler(item_sampler, graph) >>> for _, data in enumerate(insubgraph_sampler): ... print(data.sampled_subgraphs[0].sampled_csc) ... print(data.sampled_subgraphs[0].original_row_node_ids) ... print(data.sampled_subgraphs[0].original_column_node_ids) CSCFormatBase(indptr=tensor([0, 3, 5]), indices=tensor([0, 1, 2, 3, 4]), ) tensor([0, 1, 4, 2, 3]) tensor([0, 1]) CSCFormatBase(indptr=tensor([0, 2, 4]), indices=tensor([2, 3, 4, 0]), ) tensor([2, 3, 0, 5, 1]) tensor([2, 3]) CSCFormatBase(indptr=tensor([0, 3, 5]), indices=tensor([2, 3, 1, 4, 0]), ) tensor([4, 5, 0, 3, 1]) tensor([4, 5]) """ def __init__( self, datapipe, graph, ): super().__init__(datapipe) self.graph = graph self.sampler = graph.in_subgraph def sample_subgraphs( self, seeds, seeds_timestamp, seeds_pre_time_window=None ): subgraph = self.sampler(seeds) ( original_row_node_ids, compacted_csc_formats, _, ) = unique_and_compact_csc_formats(subgraph.sampled_csc, seeds) subgraph = SampledSubgraphImpl( sampled_csc=compacted_csc_formats, original_column_node_ids=seeds, original_row_node_ids=original_row_node_ids, original_edge_ids=subgraph.original_edge_ids, ) seeds = original_row_node_ids return (seeds, [subgraph]) ================================================ FILE: python/dgl/graphbolt/impl/legacy_dataset.py ================================================ """Graphbolt dataset for legacy DGLDataset.""" from typing import List, Union from ..base import etype_tuple_to_str from ..dataset import Dataset, Task from ..itemset import HeteroItemSet, ItemSet from ..sampling_graph import SamplingGraph from .basic_feature_store import BasicFeatureStore from .fused_csc_sampling_graph import from_dglgraph from .ondisk_dataset import OnDiskTask from .torch_based_feature_store import TorchBasedFeature class LegacyDataset(Dataset): """A Graphbolt dataset for legacy DGLDataset.""" def __init__(self, legacy): # Only supports single graph cases. assert len(legacy) == 1 graph = legacy[0] # Handle OGB Dataset. if isinstance(graph, tuple): graph, _ = graph if graph.is_homogeneous: self._init_as_homogeneous_node_pred(legacy) else: self._init_as_heterogeneous_node_pred(legacy) def _init_as_heterogeneous_node_pred(self, legacy): def _init_item_set_dict(idx, labels): item_set_dict = {} for key in idx.keys(): item_set = ItemSet( (idx[key], labels[key][idx[key]]), names=("seeds", "labels"), ) item_set_dict[key] = item_set return HeteroItemSet(item_set_dict) # OGB Dataset has the idx split. if hasattr(legacy, "get_idx_split"): graph, labels = legacy[0] split_idx = legacy.get_idx_split() # Initialize tasks. tasks = [] metadata = { "num_classes": legacy.num_classes, "name": "node_classification", } train_set = _init_item_set_dict(split_idx["train"], labels) validation_set = _init_item_set_dict(split_idx["valid"], labels) test_set = _init_item_set_dict(split_idx["test"], labels) task = OnDiskTask(metadata, train_set, validation_set, test_set) tasks.append(task) self._tasks = tasks item_set_dict = {} for ntype in graph.ntypes: item_set = ItemSet(graph.num_nodes(ntype), names="seeds") item_set_dict[ntype] = item_set self._all_nodes_set = HeteroItemSet(item_set_dict) features = {} for ntype in graph.ntypes: for name in graph.nodes[ntype].data.keys(): tensor = graph.nodes[ntype].data[name] if tensor.dim() == 1: tensor = tensor.view(-1, 1) features[("node", ntype, name)] = TorchBasedFeature(tensor) for etype in graph.canonical_etypes: for name in graph.edges[etype].data.keys(): tensor = graph.edges[etype].data[name] if tensor.dim() == 1: tensor = tensor.view(-1, 1) gb_etype = etype_tuple_to_str(etype) features[("edge", gb_etype, name)] = TorchBasedFeature( tensor ) self._feature = BasicFeatureStore(features) self._graph = from_dglgraph(graph, is_homogeneous=False) self._dataset_name = legacy.name else: raise NotImplementedError( "Only support heterogeneous ogn node pred dataset" ) def _init_as_homogeneous_node_pred(self, legacy): from dgl.data import AsNodePredDataset legacy = AsNodePredDataset(legacy) # Initialize tasks. tasks = [] metadata = { "num_classes": legacy.num_classes, "name": "node_classification", } train_labels = legacy[0].ndata["label"][legacy.train_idx] validation_labels = legacy[0].ndata["label"][legacy.val_idx] test_labels = legacy[0].ndata["label"][legacy.test_idx] train_set = ItemSet( (legacy.train_idx, train_labels), names=("seeds", "labels"), ) validation_set = ItemSet( (legacy.val_idx, validation_labels), names=("seeds", "labels"), ) test_set = ItemSet( (legacy.test_idx, test_labels), names=("seeds", "labels") ) task = OnDiskTask(metadata, train_set, validation_set, test_set) tasks.append(task) self._tasks = tasks num_nodes = legacy[0].num_nodes() self._all_nodes_set = ItemSet(num_nodes, names="seeds") features = {} for name in legacy[0].ndata.keys(): tensor = legacy[0].ndata[name] if tensor.dim() == 1: tensor = tensor.view(-1, 1) features[("node", None, name)] = TorchBasedFeature(tensor) for name in legacy[0].edata.keys(): tensor = legacy[0].edata[name] if tensor.dim() == 1: tensor = tensor.view(-1, 1) features[("edge", None, name)] = TorchBasedFeature(tensor) self._feature = BasicFeatureStore(features) self._graph = from_dglgraph(legacy[0], is_homogeneous=True) self._dataset_name = legacy.name @property def tasks(self) -> List[Task]: """Return the tasks.""" return self._tasks @property def graph(self) -> SamplingGraph: """Return the graph.""" return self._graph @property def feature(self) -> BasicFeatureStore: """Return the feature.""" return self._feature @property def dataset_name(self) -> str: """Return the dataset name.""" return self._dataset_name @property def all_nodes_set(self) -> Union[ItemSet, HeteroItemSet]: """Return the itemset containing all nodes.""" return self._all_nodes_set ================================================ FILE: python/dgl/graphbolt/impl/neighbor_sampler.py ================================================ """Neighbor subgraph samplers for GraphBolt.""" from functools import partial import torch import torch.distributed as thd from torch.utils.data import functional_datapipe from torch.utils.data.datapipes.iter import Mapper from ..base import ( etype_str_to_tuple, get_host_to_device_uva_stream, index_select, ORIGINAL_EDGE_ID, ) from ..internal import ( compact_csc_format, unique_and_compact, unique_and_compact_csc_formats, ) from ..minibatch_transformer import MiniBatchTransformer from ..subgraph_sampler import all_to_all, revert_to_homo, SubgraphSampler from .fused_csc_sampling_graph import fused_csc_sampling_graph from .sampled_subgraph_impl import SampledSubgraphImpl __all__ = [ "NeighborSampler", "LayerNeighborSampler", "SamplePerLayer", "FetchInsubgraphData", "CombineCachedAndFetchedInSubgraph", ] @functional_datapipe("fetch_cached_insubgraph_data") class FetchCachedInsubgraphData(Mapper): """Queries the GPUGraphCache and returns the missing seeds and a generator handle that can be called with the fetched graph structure. """ def __init__(self, datapipe, gpu_graph_cache): datapipe = datapipe.transform(self._fetch_per_layer).buffer() super().__init__(datapipe, self._wait_query_future) self.cache = gpu_graph_cache def _fetch_per_layer(self, minibatch): minibatch._async_handle = self.cache.query_async(minibatch._seeds) # Start first stage next(minibatch._async_handle) return minibatch @staticmethod def _wait_query_future(minibatch): minibatch._seeds = next(minibatch._async_handle) return minibatch @functional_datapipe("combine_cached_and_fetched_insubgraph") class CombineCachedAndFetchedInSubgraph(Mapper): """Combined the fetched graph structure with the graph structure already found inside the GPUGraphCache. """ def __init__(self, datapipe, prob_name): datapipe = datapipe.transform(self._combine_per_layer).buffer() super().__init__(datapipe, self._wait_replace_future) self.prob_name = prob_name def _combine_per_layer(self, minibatch): subgraph = minibatch._sliced_sampling_graph edge_tensors = [subgraph.indices] if subgraph.type_per_edge is not None: edge_tensors.append(subgraph.type_per_edge) probs_or_mask = subgraph.edge_attribute(self.prob_name) if probs_or_mask is not None: edge_tensors.append(probs_or_mask) edge_tensors.append(subgraph.edge_attribute(ORIGINAL_EDGE_ID)) minibatch._future = minibatch._async_handle.send( (subgraph.csc_indptr, edge_tensors) ) delattr(minibatch, "_async_handle") return minibatch def _wait_replace_future(self, minibatch): subgraph = minibatch._sliced_sampling_graph subgraph.csc_indptr, edge_tensors = minibatch._future.wait() delattr(minibatch, "_future") subgraph.indices = edge_tensors[0] edge_tensors = edge_tensors[1:] if subgraph.type_per_edge is not None: subgraph.type_per_edge = edge_tensors[0] edge_tensors = edge_tensors[1:] probs_or_mask = subgraph.edge_attribute(self.prob_name) if probs_or_mask is not None: subgraph.add_edge_attribute(self.prob_name, edge_tensors[0]) edge_tensors = edge_tensors[1:] subgraph.add_edge_attribute(ORIGINAL_EDGE_ID, edge_tensors[0]) edge_tensors = edge_tensors[1:] assert len(edge_tensors) == 0 return minibatch @functional_datapipe("fetch_insubgraph_data") class FetchInsubgraphData(MiniBatchTransformer): """Fetches the insubgraph and wraps it in a FusedCSCSamplingGraph object. If the provided sample_per_layer_obj has a valid prob_name, then it reads the probabilies of all the fetched edges. Furthermore, if type_per_array tensor exists in the underlying graph, then the types of all the fetched edges are read as well.""" def __init__( self, datapipe, graph, prob_name, ): datapipe = datapipe.transform(self._concat_hetero_seeds) if graph._gpu_graph_cache is not None: datapipe = datapipe.fetch_cached_insubgraph_data( graph._gpu_graph_cache ) datapipe = datapipe.transform(self._fetch_per_layer_stage_1) datapipe = datapipe.buffer() datapipe = datapipe.transform(self._fetch_per_layer_stage_2) if graph._gpu_graph_cache is not None: datapipe = datapipe.combine_cached_and_fetched_insubgraph(prob_name) super().__init__(datapipe) self.graph = graph self.prob_name = prob_name def _concat_hetero_seeds(self, minibatch): """Concatenates the seeds into a single tensor in the hetero case.""" seeds = minibatch._seed_nodes if isinstance(seeds, dict): ( seeds, seed_offsets, ) = self.graph._convert_to_homogeneous_nodes(seeds) else: seed_offsets = None minibatch._seeds = seeds minibatch._seed_offsets = seed_offsets return minibatch def _fetch_per_layer_stage_1(self, minibatch): minibatch._async_handle_fetch = self._fetch_per_layer_async(minibatch) next(minibatch._async_handle_fetch) return minibatch def _fetch_per_layer_stage_2(self, minibatch): minibatch = next(minibatch._async_handle_fetch) delattr(minibatch, "_async_handle_fetch") return minibatch def _fetch_per_layer_async(self, minibatch): stream = torch.cuda.current_stream() uva_stream = get_host_to_device_uva_stream() uva_stream.wait_stream(stream) with torch.cuda.stream(uva_stream): seeds = minibatch._seeds seed_offsets = minibatch._seed_offsets delattr(minibatch, "_seeds") delattr(minibatch, "_seed_offsets") seeds.record_stream(torch.cuda.current_stream()) # Packs tensors for batch slicing. tensors_to_be_sliced = [self.graph.indices] has_type_per_edge = False if self.graph.type_per_edge is not None: tensors_to_be_sliced.append(self.graph.type_per_edge) has_type_per_edge = True has_probs_or_mask = False has_original_edge_ids = False if self.graph.edge_attributes is not None: probs_or_mask = self.graph.edge_attributes.get( self.prob_name, None ) if probs_or_mask is not None: tensors_to_be_sliced.append(probs_or_mask) has_probs_or_mask = True original_edge_ids = self.graph.edge_attributes.get( ORIGINAL_EDGE_ID, None ) if original_edge_ids is not None: tensors_to_be_sliced.append(original_edge_ids) has_original_edge_ids = True # Slices the batched tensors. future = torch.ops.graphbolt.index_select_csc_batched_async( self.graph.csc_indptr, tensors_to_be_sliced, seeds, # When there are no edge ids, we assume it is arange(num_edges). not has_original_edge_ids, None, ) yield # graphbolt::async has already recorded a CUDAEvent for us and # called CUDAStreamWaitEvent for us on the current stream. indptr, sliced_tensors = future.wait() for tensor in [indptr] + sliced_tensors: tensor.record_stream(stream) # Unpacks the sliced tensors. indices = sliced_tensors[0] sliced_tensors = sliced_tensors[1:] type_per_edge = None if has_type_per_edge: type_per_edge = sliced_tensors[0] sliced_tensors = sliced_tensors[1:] probs_or_mask = None if has_probs_or_mask: probs_or_mask = sliced_tensors[0] sliced_tensors = sliced_tensors[1:] edge_ids = sliced_tensors[0] sliced_tensors = sliced_tensors[1:] assert len(sliced_tensors) == 0 subgraph = fused_csc_sampling_graph( indptr, indices, node_type_offset=self.graph.node_type_offset, type_per_edge=type_per_edge, node_type_to_id=self.graph.node_type_to_id, edge_type_to_id=self.graph.edge_type_to_id, ) if self.prob_name is not None and probs_or_mask is not None: subgraph.add_edge_attribute(self.prob_name, probs_or_mask) subgraph.add_edge_attribute(ORIGINAL_EDGE_ID, edge_ids) subgraph._indptr_node_type_offset_list = seed_offsets minibatch._sliced_sampling_graph = subgraph yield minibatch @functional_datapipe("sample_per_layer") class SamplePerLayer(MiniBatchTransformer): """Sample neighbor edges from a graph for a single layer.""" def __init__( self, datapipe, sampler, fanout, replace, prob_name, overlap_fetch, asynchronous=False, ): graph = sampler.__self__ self.returning_indices_and_original_edge_ids_are_optional = False original_edge_ids = ( None if graph.edge_attributes is None else graph.edge_attributes.get(ORIGINAL_EDGE_ID, None) ) if ( overlap_fetch and sampler.__name__ == "sample_neighbors" and ( graph.indices.is_pinned() or ( original_edge_ids is not None and original_edge_ids.is_pinned() ) ) and graph._gpu_graph_cache is None ): datapipe = datapipe.transform(self._sample_per_layer) if asynchronous: datapipe = datapipe.buffer() datapipe = datapipe.transform(self._wait_subgraph_future) fetch_indices_and_original_edge_ids_fn = partial( self._fetch_indices_and_original_edge_ids, graph.indices, original_edge_ids, ) datapipe = ( datapipe.transform(fetch_indices_and_original_edge_ids_fn) .buffer() .wait() ) if graph.type_per_edge is not None: # Hetero case. datapipe = datapipe.transform( partial( self._subtract_hetero_indices_offset, graph._node_type_offset_list, graph.node_type_to_id, ) ) self.returning_indices_and_original_edge_ids_are_optional = True elif overlap_fetch: datapipe = datapipe.fetch_insubgraph_data(graph, prob_name) datapipe = datapipe.transform( self._sample_per_layer_from_fetched_subgraph ) if asynchronous: datapipe = datapipe.buffer() datapipe = datapipe.transform(self._wait_subgraph_future) else: datapipe = datapipe.transform(self._sample_per_layer) if asynchronous: datapipe = datapipe.buffer() datapipe = datapipe.transform(self._wait_subgraph_future) super().__init__(datapipe) self.sampler = sampler self.fanout = fanout self.replace = replace self.prob_name = prob_name self.overlap_fetch = overlap_fetch self.asynchronous = asynchronous def _sample_per_layer(self, minibatch): kwargs = { key[1:]: getattr(minibatch, key) for key in ["_random_seed", "_seed2_contribution"] if hasattr(minibatch, key) } subgraph = self.sampler( minibatch._seed_nodes, self.fanout, self.replace, self.prob_name, self.returning_indices_and_original_edge_ids_are_optional, async_op=self.asynchronous, **kwargs, ) minibatch.sampled_subgraphs.insert(0, subgraph) return minibatch def _sample_per_layer_from_fetched_subgraph(self, minibatch): subgraph = minibatch._sliced_sampling_graph delattr(minibatch, "_sliced_sampling_graph") kwargs = { key[1:]: getattr(minibatch, key) for key in ["_random_seed", "_seed2_contribution"] if hasattr(minibatch, key) } sampled_subgraph = getattr(subgraph, self.sampler.__name__)( None, self.fanout, self.replace, self.prob_name, async_op=self.asynchronous, **kwargs, ) minibatch.sampled_subgraphs.insert(0, sampled_subgraph) return minibatch @staticmethod def _wait_subgraph_future(minibatch): minibatch.sampled_subgraphs[0] = minibatch.sampled_subgraphs[0].wait() return minibatch @staticmethod def _fetch_indices_and_original_edge_ids(indices, orig_edge_ids, minibatch): stream = torch.cuda.current_stream() host_to_device_stream = get_host_to_device_uva_stream() host_to_device_stream.wait_stream(stream) def record_stream(tensor): tensor.record_stream(stream) return tensor with torch.cuda.stream(host_to_device_stream): minibatch._indices_needs_offset_subtraction = False subgraph = minibatch.sampled_subgraphs[0] if isinstance(subgraph.sampled_csc, dict): for etype, pair in subgraph.sampled_csc.items(): if pair.indices is None: edge_ids = ( subgraph._edge_ids_in_fused_csc_sampling_graph[ etype ] ) edge_ids.record_stream(torch.cuda.current_stream()) pair.indices = record_stream( index_select(indices, edge_ids) ) minibatch._indices_needs_offset_subtraction = True if ( orig_edge_ids is not None and subgraph.original_edge_ids[etype] is None ): edge_ids = ( subgraph._edge_ids_in_fused_csc_sampling_graph[ etype ] ) edge_ids.record_stream(torch.cuda.current_stream()) subgraph.original_edge_ids[etype] = record_stream( index_select(orig_edge_ids, edge_ids) ) else: if subgraph.sampled_csc.indices is None: subgraph._edge_ids_in_fused_csc_sampling_graph.record_stream( torch.cuda.current_stream() ) subgraph.sampled_csc.indices = record_stream( index_select( indices, subgraph._edge_ids_in_fused_csc_sampling_graph, ) ) if ( orig_edge_ids is not None and subgraph.original_edge_ids is None ): subgraph._edge_ids_in_fused_csc_sampling_graph.record_stream( torch.cuda.current_stream() ) subgraph.original_edge_ids = record_stream( index_select( orig_edge_ids, subgraph._edge_ids_in_fused_csc_sampling_graph, ) ) subgraph._edge_ids_in_fused_csc_sampling_graph = None minibatch.wait = torch.cuda.current_stream().record_event().wait return minibatch @staticmethod def _subtract_hetero_indices_offset( node_type_offset, node_type_to_id, minibatch ): if minibatch._indices_needs_offset_subtraction: subgraph = minibatch.sampled_subgraphs[0] for etype, pair in subgraph.sampled_csc.items(): src_ntype = etype_str_to_tuple(etype)[0] src_ntype_id = node_type_to_id[src_ntype] pair.indices -= node_type_offset[src_ntype_id] delattr(minibatch, "_indices_needs_offset_subtraction") return minibatch @functional_datapipe("compact_per_layer") class CompactPerLayer(MiniBatchTransformer): """Compact the sampled edges for a single layer.""" def __init__( self, datapipe, deduplicate, cooperative=False, asynchronous=False ): self.deduplicate = deduplicate self.cooperative = cooperative if asynchronous and deduplicate: datapipe = datapipe.transform(self._compact_per_layer_async) datapipe = datapipe.buffer() datapipe = datapipe.transform(self._compact_per_layer_wait_future) if cooperative: datapipe = datapipe.transform( self._seeds_cooperative_exchange_1 ) datapipe = datapipe.buffer() datapipe = datapipe.transform( self._seeds_cooperative_exchange_2 ) datapipe = datapipe.buffer() datapipe = datapipe.transform( self._seeds_cooperative_exchange_3 ) datapipe = datapipe.buffer() datapipe = datapipe.transform( self._seeds_cooperative_exchange_4 ) super().__init__(datapipe) else: super().__init__(datapipe, self._compact_per_layer) def _compact_per_layer(self, minibatch): subgraph = minibatch.sampled_subgraphs[0] seeds = minibatch._seed_nodes if self.deduplicate: ( original_row_node_ids, compacted_csc_format, _, ) = unique_and_compact_csc_formats(subgraph.sampled_csc, seeds) subgraph = SampledSubgraphImpl( sampled_csc=compacted_csc_format, original_column_node_ids=seeds, original_row_node_ids=original_row_node_ids, original_edge_ids=subgraph.original_edge_ids, ) else: ( original_row_node_ids, compacted_csc_format, ) = compact_csc_format(subgraph.sampled_csc, seeds) subgraph = SampledSubgraphImpl( sampled_csc=compacted_csc_format, original_column_node_ids=seeds, original_row_node_ids=original_row_node_ids, original_edge_ids=subgraph.original_edge_ids, ) minibatch._seed_nodes = original_row_node_ids minibatch.sampled_subgraphs[0] = subgraph return minibatch def _compact_per_layer_async(self, minibatch): subgraph = minibatch.sampled_subgraphs[0] seeds = minibatch._seed_nodes assert self.deduplicate rank = thd.get_rank() if self.cooperative else 0 world_size = thd.get_world_size() if self.cooperative else 1 minibatch._future = unique_and_compact_csc_formats( subgraph.sampled_csc, seeds, rank, world_size, async_op=True ) return minibatch def _compact_per_layer_wait_future(self, minibatch): subgraph = minibatch.sampled_subgraphs[0] seeds = minibatch._seed_nodes ( original_row_node_ids, compacted_csc_format, seeds_offsets, ) = minibatch._future.wait() delattr(minibatch, "_future") subgraph = SampledSubgraphImpl( sampled_csc=compacted_csc_format, original_column_node_ids=seeds, original_row_node_ids=original_row_node_ids, original_edge_ids=subgraph.original_edge_ids, ) minibatch._seed_nodes = original_row_node_ids minibatch.sampled_subgraphs[0] = subgraph if self.cooperative: subgraph._seeds_offsets = seeds_offsets return minibatch @staticmethod def _seeds_cooperative_exchange_1(minibatch): world_size = thd.get_world_size() subgraph = minibatch.sampled_subgraphs[0] seeds_offsets = subgraph._seeds_offsets is_homogeneous = not isinstance(seeds_offsets, dict) if is_homogeneous: seeds_offsets = {"_N": seeds_offsets} num_ntypes = len(seeds_offsets) counts_sent = torch.empty(world_size * num_ntypes, dtype=torch.int64) for i, offsets in enumerate(seeds_offsets.values()): counts_sent[ torch.arange(i, world_size * num_ntypes, num_ntypes) ] = offsets.diff() counts_received = torch.empty_like(counts_sent) subgraph._counts_future = all_to_all( counts_received.split(num_ntypes), counts_sent.split(num_ntypes), async_op=True, ) subgraph._counts_sent = counts_sent subgraph._counts_received = counts_received return minibatch @staticmethod def _seeds_cooperative_exchange_2(minibatch): world_size = thd.get_world_size() seeds = minibatch._seed_nodes is_homogenous = not isinstance(seeds, dict) if is_homogenous: seeds = {"_N": seeds} subgraph = minibatch.sampled_subgraphs[0] subgraph._counts_future.wait() delattr(subgraph, "_counts_future") num_ntypes = len(seeds.keys()) seeds_received = {} counts_sent = {} counts_received = {} for i, (ntype, typed_seeds) in enumerate(seeds.items()): idx = torch.arange(i, world_size * num_ntypes, num_ntypes) typed_counts_sent = subgraph._counts_sent[idx].tolist() typed_counts_received = subgraph._counts_received[idx].tolist() typed_seeds_received = typed_seeds.new_empty( sum(typed_counts_received) ) all_to_all( typed_seeds_received.split(typed_counts_received), typed_seeds.split(typed_counts_sent), ) seeds_received[ntype] = typed_seeds_received counts_sent[ntype] = typed_counts_sent counts_received[ntype] = typed_counts_received minibatch._seed_nodes = seeds_received subgraph._counts_sent = revert_to_homo(counts_sent) subgraph._counts_received = revert_to_homo(counts_received) return minibatch @staticmethod def _seeds_cooperative_exchange_3(minibatch): nodes = { ntype: [typed_seeds] for ntype, typed_seeds in minibatch._seed_nodes.items() } minibatch._unique_future = unique_and_compact( nodes, 0, 1, async_op=True ) return minibatch @staticmethod def _seeds_cooperative_exchange_4(minibatch): unique_seeds, inverse_seeds, _ = minibatch._unique_future.wait() delattr(minibatch, "_unique_future") inverse_seeds = { ntype: typed_inv[0] for ntype, typed_inv in inverse_seeds.items() } minibatch._seed_nodes = revert_to_homo(unique_seeds) subgraph = minibatch.sampled_subgraphs[0] sizes = { ntype: typed_seeds.size(0) for ntype, typed_seeds in unique_seeds.items() } subgraph._seed_sizes = revert_to_homo(sizes) subgraph._seed_inverse_ids = revert_to_homo(inverse_seeds) return minibatch class NeighborSamplerImpl(SubgraphSampler): # pylint: disable=abstract-method """Base class for NeighborSamplers.""" # pylint: disable=useless-super-delegation def __init__( self, datapipe, graph, fanouts, replace, prob_name, deduplicate, sampler, overlap_fetch, num_gpu_cached_edges, gpu_cache_threshold, cooperative, asynchronous, layer_dependency=None, batch_dependency=None, ): if overlap_fetch and num_gpu_cached_edges > 0: if graph._gpu_graph_cache is None: graph._initialize_gpu_graph_cache( num_gpu_cached_edges, gpu_cache_threshold, prob_name ) if sampler.__name__ == "sample_layer_neighbors": self._init_seed(batch_dependency) super().__init__( datapipe, graph, fanouts, replace, prob_name, deduplicate, sampler, overlap_fetch, cooperative=cooperative, asynchronous=asynchronous, layer_dependency=layer_dependency, ) def _init_seed(self, batch_dependency): self.rng = torch.random.manual_seed( torch.randint(0, int(1e18), size=tuple()) ) self.cnt = [-1, int(batch_dependency)] self.random_seed = torch.empty( 2 if self.cnt[1] > 1 else 1, dtype=torch.int64 ) self.random_seed.random_(generator=self.rng) def _set_seed(self, minibatch): self.cnt[0] += 1 if self.cnt[1] > 0 and self.cnt[0] % self.cnt[1] == 0: self.random_seed[0] = self.random_seed[-1] self.random_seed[-1:].random_(generator=self.rng) minibatch._random_seed = self.random_seed.clone() minibatch._seed2_contribution = ( 0.0 if self.cnt[1] <= 1 else (self.cnt[0] % self.cnt[1]) / self.cnt[1] ) minibatch._iter = self.cnt[0] return minibatch @staticmethod def _increment_seed(minibatch): minibatch._random_seed = 1 + minibatch._random_seed return minibatch @staticmethod def _delattr_dependency(minibatch): delattr(minibatch, "_random_seed") delattr(minibatch, "_seed2_contribution") return minibatch @staticmethod def _prepare(node_type_to_id, minibatch): seeds = minibatch._seed_nodes # Enrich seeds with all node types. if isinstance(seeds, dict): ntypes = list(node_type_to_id.keys()) # Loop over different seeds to extract the device they are on. device = None dtype = None for _, seed in seeds.items(): device = seed.device dtype = seed.dtype break default_tensor = torch.tensor([], dtype=dtype, device=device) seeds = { ntype: seeds.get(ntype, default_tensor) for ntype in ntypes } minibatch._seed_nodes = seeds minibatch.sampled_subgraphs = [] return minibatch @staticmethod def _set_input_nodes(minibatch): minibatch.input_nodes = minibatch._seed_nodes return minibatch # pylint: disable=arguments-differ def sampling_stages( self, datapipe, graph, fanouts, replace, prob_name, deduplicate, sampler, overlap_fetch, cooperative, asynchronous, layer_dependency, ): datapipe = datapipe.transform( partial(self._prepare, graph.node_type_to_id) ) is_labor = sampler.__name__ == "sample_layer_neighbors" if is_labor: datapipe = datapipe.transform(self._set_seed) for fanout in reversed(fanouts): # Convert fanout to tensor. if not isinstance(fanout, torch.Tensor): fanout = torch.LongTensor([int(fanout)]) datapipe = datapipe.sample_per_layer( sampler, fanout, replace, prob_name, overlap_fetch, asynchronous ) datapipe = datapipe.compact_per_layer( deduplicate, cooperative, asynchronous ) if is_labor and not layer_dependency: datapipe = datapipe.transform(self._increment_seed) if is_labor: datapipe = datapipe.transform(self._delattr_dependency) return datapipe.transform(self._set_input_nodes) @functional_datapipe("sample_neighbor") class NeighborSampler(NeighborSamplerImpl): # pylint: disable=abstract-method """Sample neighbor edges from a graph and return a subgraph. Functional name: :obj:`sample_neighbor`. Neighbor sampler is responsible for sampling a subgraph from given data. It returns an induced subgraph along with compacted information. In the context of a node classification task, the neighbor sampler directly utilizes the nodes provided as seed nodes. However, in scenarios involving link prediction, the process needs another pre-peocess operation. That is, gathering unique nodes from the given node pairs, encompassing both positive and negative node pairs, and employs these nodes as the seed nodes for subsequent steps. When the graph is hetero, sampled subgraphs in minibatch will contain every edge type even though it is empty after sampling. Parameters ---------- datapipe : DataPipe The datapipe. graph : FusedCSCSamplingGraph The graph on which to perform subgraph sampling. fanouts: list[torch.Tensor] or list[int] The number of edges to be sampled for each node with or without considering edge types. The length of this parameter implicitly signifies the layer of sampling being conducted. Note: The fanout order is from the outermost layer to innermost layer. For example, the fanout '[15, 10, 5]' means that 15 to the outermost layer, 10 to the intermediate layer and 5 corresponds to the innermost layer. replace: bool Boolean indicating whether the sample is preformed with or without replacement. If True, a value can be selected multiple times. Otherwise, each value can be selected only once. prob_name: str, optional The name of an edge attribute used as the weights of sampling for each node. This attribute tensor should contain (unnormalized) probabilities corresponding to each neighboring edge of a node. It must be a 1D floating-point or boolean tensor, with the number of elements equalling the total number of edges. deduplicate: bool Boolean indicating whether seeds between hops will be deduplicated. If True, the same elements in seeds will be deleted to only one. Otherwise, the same elements will be remained. overlap_fetch : bool, optional If True, the data loader will overlap the UVA graph fetching operations with the rest of operations by using an alternative CUDA stream. This option should be enabled if you have moved your graph to the pinned memory for optimal performance. Default is False. num_gpu_cached_edges : int, optional If positive and overlap_graph_fetch is True, then the GPU will cache frequently accessed vertex neighborhoods to reduce the PCI-e bandwidth demand due to pinned graph accesses. gpu_cache_threshold : int, optional Determines how many times a vertex needs to be accessed before its neighborhood ends up being cached on the GPU. cooperative: bool, optional Boolean indicating whether Cooperative Minibatching, which was initially proposed in `Deep Graph Library PR#4337`__ and was later first fully described in `Cooperative Minibatching in Graph Neural Networks `__. Cooperation between the GPUs eliminates duplicate work performed across the GPUs due to the overlapping sampled k-hop neighborhoods of seed nodes when performing GNN minibatching. asynchronous: bool Boolean indicating whether sampling and compaction stages should run in background threads to hide the latency of CPU GPU synchronization. Should be enabled only when sampling on the GPU. Examples ------- >>> import torch >>> import dgl.graphbolt as gb >>> indptr = torch.LongTensor([0, 2, 4, 5, 6, 7 ,8]) >>> indices = torch.LongTensor([1, 2, 0, 3, 5, 4, 3, 5]) >>> graph = gb.fused_csc_sampling_graph(indptr, indices) >>> seeds = torch.LongTensor([[0, 1], [1, 2]]) >>> item_set = gb.ItemSet(seeds, names="seeds") >>> datapipe = gb.ItemSampler(item_set, batch_size=1) >>> datapipe = datapipe.sample_uniform_negative(graph, 2) >>> datapipe = datapipe.sample_neighbor(graph, [5, 10, 15]) >>> next(iter(datapipe)).sampled_subgraphs [SampledSubgraphImpl(sampled_csc=CSCFormatBase( indptr=tensor([0, 2, 4, 5, 6, 7, 8]), indices=tensor([1, 4, 0, 5, 5, 3, 3, 2]), ), original_row_node_ids=tensor([0, 1, 4, 5, 2, 3]), original_edge_ids=None, original_column_node_ids=tensor([0, 1, 4, 5, 2, 3]), ), SampledSubgraphImpl(sampled_csc=CSCFormatBase( indptr=tensor([0, 2, 4, 5, 6, 7, 8]), indices=tensor([1, 4, 0, 5, 5, 3, 3, 2]), ), original_row_node_ids=tensor([0, 1, 4, 5, 2, 3]), original_edge_ids=None, original_column_node_ids=tensor([0, 1, 4, 5, 2, 3]), ), SampledSubgraphImpl(sampled_csc=CSCFormatBase( indptr=tensor([0, 2, 4, 5, 6]), indices=tensor([1, 4, 0, 5, 5, 3]), ), original_row_node_ids=tensor([0, 1, 4, 5, 2, 3]), original_edge_ids=None, original_column_node_ids=tensor([0, 1, 4, 5]), )] """ # pylint: disable=useless-super-delegation def __init__( self, datapipe, graph, fanouts, replace=False, prob_name=None, deduplicate=True, overlap_fetch=False, num_gpu_cached_edges=0, gpu_cache_threshold=1, cooperative=False, asynchronous=False, ): super().__init__( datapipe, graph, fanouts, replace, prob_name, deduplicate, graph.sample_neighbors, overlap_fetch, num_gpu_cached_edges, gpu_cache_threshold, cooperative, asynchronous, ) @functional_datapipe("sample_layer_neighbor") class LayerNeighborSampler(NeighborSamplerImpl): # pylint: disable=abstract-method """Sample layer neighbor edges from a graph and return a subgraph. Functional name: :obj:`sample_layer_neighbor`. Sampler that builds computational dependency of node representations via labor sampling for multilayer GNN from the NeurIPS 2023 paper `Layer-Neighbor Sampling -- Defusing Neighborhood Explosion in GNNs `__ Layer-Neighbor sampler is responsible for sampling a subgraph from given data. It returns an induced subgraph along with compacted information. In the context of a node classification task, the neighbor sampler directly utilizes the nodes provided as seed nodes. However, in scenarios involving link prediction, the process needs another pre-process operation. That is, gathering unique nodes from the given node pairs, encompassing both positive and negative node pairs, and employs these nodes as the seed nodes for subsequent steps. When the graph is hetero, sampled subgraphs in minibatch will contain every edge type even though it is empty after sampling. Implements the approach described in Appendix A.3 of the paper. Similar to dgl.dataloading.LaborSampler but this uses sequential poisson sampling instead of poisson sampling to keep the count of sampled edges per vertex deterministic like NeighborSampler. Thus, it is a drop-in replacement for NeighborSampler. However, unlike NeighborSampler, it samples fewer vertices and edges for multilayer GNN scenario without harming convergence speed with respect to training iterations. Parameters ---------- datapipe : DataPipe The datapipe. graph : FusedCSCSamplingGraph The graph on which to perform subgraph sampling. fanouts: list[torch.Tensor] The number of edges to be sampled for each node with or without considering edge types. The length of this parameter implicitly signifies the layer of sampling being conducted. replace: bool Boolean indicating whether the sample is preformed with or without replacement. If True, a value can be selected multiple times. Otherwise, each value can be selected only once. prob_name: str, optional The name of an edge attribute used as the weights of sampling for each node. This attribute tensor should contain (unnormalized) probabilities corresponding to each neighboring edge of a node. It must be a 1D floating-point or boolean tensor, with the number of elements equalling the total number of edges. deduplicate: bool Boolean indicating whether seeds between hops will be deduplicated. If True, the same elements in seeds will be deleted to only one. Otherwise, the same elements will be remained. layer_dependency: bool Boolean indicating whether different layers should use the same random variates. Results in a reduction in the number of nodes sampled and turns LayerNeighborSampler into a subgraph sampling method. Later layers will be guaranteed to sample overlapping neighbors as the previous layers. batch_dependency: int Specifies whether consecutive minibatches should use similar random variates. Results in a higher temporal access locality of sampled nodes and edges. Setting it to :math:`\\kappa` slows down the change in the random variates proportional to :math:`\\frac{1}{\\kappa}`. Implements the dependent minibatching approach in `arXiv:2310.12403 `__. overlap_fetch : bool, optional If True, the data loader will overlap the UVA graph fetching operations with the rest of operations by using an alternative CUDA stream. This option should be enabled if you have moved your graph to the pinned memory for optimal performance. Default is False. num_gpu_cached_edges : int, optional If positive and overlap_graph_fetch is True, then the GPU will cache frequently accessed vertex neighborhoods to reduce the PCI-e bandwidth demand due to pinned graph accesses. gpu_cache_threshold : int, optional Determines how many times a vertex needs to be accessed before its neighborhood ends up being cached on the GPU. cooperative: bool, optional Boolean indicating whether Cooperative Minibatching, which was initially proposed in `Deep Graph Library PR#4337`__ and was later first fully described in `Cooperative Minibatching in Graph Neural Networks `__. Cooperation between the GPUs eliminates duplicate work performed across the GPUs due to the overlapping sampled k-hop neighborhoods of seed nodes when performing GNN minibatching. asynchronous: bool Boolean indicating whether sampling and compaction stages should run in background threads to hide the latency of CPU GPU synchronization. Should be enabled only when sampling on the GPU. Examples ------- >>> import dgl.graphbolt as gb >>> import torch >>> indptr = torch.LongTensor([0, 2, 4, 5, 6, 7 ,8]) >>> indices = torch.LongTensor([1, 2, 0, 3, 5, 4, 3, 5]) >>> graph = gb.fused_csc_sampling_graph(indptr, indices) >>> seeds = torch.LongTensor([[0, 1], [1, 2]]) >>> item_set = gb.ItemSet(seeds, names="seeds") >>> item_sampler = gb.ItemSampler(item_set, batch_size=1,) >>> neg_sampler = gb.UniformNegativeSampler(item_sampler, graph, 2) >>> fanouts = [torch.LongTensor([5]), ... torch.LongTensor([10]),torch.LongTensor([15])] >>> subgraph_sampler = gb.LayerNeighborSampler(neg_sampler, graph, fanouts) >>> next(iter(subgraph_sampler)).sampled_subgraphs [SampledSubgraphImpl(sampled_csc=CSCFormatBase( indptr=tensor([0, 2, 4, 5, 6, 7, 8]), indices=tensor([1, 3, 0, 4, 2, 2, 5, 4]), ), original_row_node_ids=tensor([0, 1, 5, 2, 3, 4]), original_edge_ids=None, original_column_node_ids=tensor([0, 1, 5, 2, 3, 4]), ), SampledSubgraphImpl(sampled_csc=CSCFormatBase( indptr=tensor([0, 2, 4, 5, 6, 7]), indices=tensor([1, 3, 0, 4, 2, 2, 5]), ), original_row_node_ids=tensor([0, 1, 5, 2, 3, 4]), original_edge_ids=None, original_column_node_ids=tensor([0, 1, 5, 2, 3]), ), SampledSubgraphImpl(sampled_csc=CSCFormatBase( indptr=tensor([0, 2, 4, 5, 6]), indices=tensor([1, 3, 0, 4, 2, 2]), ), original_row_node_ids=tensor([0, 1, 5, 2, 3]), original_edge_ids=None, original_column_node_ids=tensor([0, 1, 5, 2]), )] >>> next(iter(subgraph_sampler)).compacted_seeds tensor([[0, 1], [0, 2], [0, 3]]) >>> next(iter(subgraph_sampler)).labels tensor([1., 0., 0.]) >>> next(iter(subgraph_sampler)).indexes tensor([0, 0, 0]) """ def __init__( self, datapipe, graph, fanouts, replace=False, prob_name=None, deduplicate=True, layer_dependency=False, batch_dependency=1, overlap_fetch=False, num_gpu_cached_edges=0, gpu_cache_threshold=1, cooperative=False, asynchronous=False, ): super().__init__( datapipe, graph, fanouts, replace, prob_name, deduplicate, graph.sample_layer_neighbors, overlap_fetch, num_gpu_cached_edges, gpu_cache_threshold, cooperative, asynchronous, layer_dependency, batch_dependency, ) ================================================ FILE: python/dgl/graphbolt/impl/ondisk_dataset.py ================================================ """GraphBolt OnDiskDataset.""" import bisect import json import os import shutil import textwrap from copy import deepcopy from typing import Dict, List, Union import numpy as np import torch import yaml from ..base import etype_str_to_tuple, ORIGINAL_EDGE_ID from ..dataset import Dataset, Task from ..internal import ( calculate_dir_hash, check_dataset_change, copy_or_convert_data, read_data, read_edges, ) from ..internal_utils import ( download, extract_archive, gb_warning, get_attributes, ) from ..itemset import HeteroItemSet, ItemSet from ..sampling_graph import SamplingGraph from .fused_csc_sampling_graph import ( fused_csc_sampling_graph, FusedCSCSamplingGraph, ) from .ondisk_metadata import ( OnDiskGraphTopology, OnDiskMetaData, OnDiskTaskData, OnDiskTVTSet, ) from .torch_based_feature_store import TorchBasedFeatureStore __all__ = ["OnDiskDataset", "preprocess_ondisk_dataset", "BuiltinDataset"] NAMES_INDICATING_NODE_IDS = [ "seeds", ] def _graph_data_to_fused_csc_sampling_graph( dataset_dir: str, graph_data: Dict, include_original_edge_id: bool, auto_cast_to_optimal_dtype: bool, ) -> FusedCSCSamplingGraph: """Convert the raw graph data into FusedCSCSamplingGraph. Parameters ---------- dataset_dir : str The path to the dataset directory. graph_data : Dict The raw data read from yaml file. include_original_edge_id : bool Whether to include the original edge id in the FusedCSCSamplingGraph. auto_cast_to_optimal_dtype: bool, optional Casts the dtypes of tensors in the dataset into smallest possible dtypes for reduced storage requirements and potentially increased performance. Returns ------- sampling_graph : FusedCSCSamplingGraph The FusedCSCSamplingGraph constructed from the raw data. """ from ...sparse import spmatrix is_homogeneous = ( len(graph_data["nodes"]) == 1 and len(graph_data["edges"]) == 1 and "type" not in graph_data["nodes"][0] and "type" not in graph_data["edges"][0] ) if is_homogeneous: # Homogeneous graph. edge_fmt = graph_data["edges"][0]["format"] edge_path = graph_data["edges"][0]["path"] src, dst = read_edges(dataset_dir, edge_fmt, edge_path) num_nodes = graph_data["nodes"][0]["num"] num_edges = len(src) coo_tensor = torch.tensor(np.array([src, dst])) sparse_matrix = spmatrix(coo_tensor, shape=(num_nodes, num_nodes)) del coo_tensor indptr, indices, edge_ids = sparse_matrix.csc() del sparse_matrix if auto_cast_to_optimal_dtype: if num_nodes <= torch.iinfo(torch.int32).max: indices = indices.to(torch.int32) if num_edges <= torch.iinfo(torch.int32).max: indptr = indptr.to(torch.int32) edge_ids = edge_ids.to(torch.int32) node_type_offset = None type_per_edge = None node_type_to_id = None edge_type_to_id = None node_attributes = {} edge_attributes = {} if include_original_edge_id: edge_attributes[ORIGINAL_EDGE_ID] = edge_ids else: # Heterogeneous graph. # Sort graph_data by ntype/etype lexicographically to ensure ordering. graph_data["nodes"].sort(key=lambda x: x["type"]) graph_data["edges"].sort(key=lambda x: x["type"]) # Construct node_type_offset and node_type_to_id. node_type_offset = [0] node_type_to_id = {} for ntype_id, node_info in enumerate(graph_data["nodes"]): node_type_to_id[node_info["type"]] = ntype_id node_type_offset.append(node_type_offset[-1] + node_info["num"]) total_num_nodes = node_type_offset[-1] # Construct edge_type_offset, edge_type_to_id and coo_tensor. edge_type_offset = [0] edge_type_to_id = {} coo_src_list = [] coo_dst_list = [] coo_etype_list = [] for etype_id, edge_info in enumerate(graph_data["edges"]): edge_type_to_id[edge_info["type"]] = etype_id edge_fmt = edge_info["format"] edge_path = edge_info["path"] src, dst = read_edges(dataset_dir, edge_fmt, edge_path) edge_type_offset.append(edge_type_offset[-1] + len(src)) src_type, _, dst_type = etype_str_to_tuple(edge_info["type"]) src += node_type_offset[node_type_to_id[src_type]] dst += node_type_offset[node_type_to_id[dst_type]] coo_src_list.append(torch.tensor(src)) coo_dst_list.append(torch.tensor(dst)) coo_etype_list.append(torch.full((len(src),), etype_id)) total_num_edges = edge_type_offset[-1] coo_src = torch.cat(coo_src_list) del coo_src_list coo_dst = torch.cat(coo_dst_list) del coo_dst_list if auto_cast_to_optimal_dtype: dtypes = [torch.uint8, torch.int16, torch.int32, torch.int64] dtype_maxes = [torch.iinfo(dtype).max for dtype in dtypes] dtype_id = bisect.bisect_left(dtype_maxes, len(edge_type_to_id) - 1) etype_dtype = dtypes[dtype_id] coo_etype_list = [ tensor.to(etype_dtype) for tensor in coo_etype_list ] coo_etype = torch.cat(coo_etype_list) del coo_etype_list sparse_matrix = spmatrix( indices=torch.stack((coo_src, coo_dst), dim=0), shape=(total_num_nodes, total_num_nodes), ) del coo_src, coo_dst indptr, indices, edge_ids = sparse_matrix.csc() del sparse_matrix if auto_cast_to_optimal_dtype: if total_num_nodes <= torch.iinfo(torch.int32).max: indices = indices.to(torch.int32) if total_num_edges <= torch.iinfo(torch.int32).max: indptr = indptr.to(torch.int32) edge_ids = edge_ids.to(torch.int32) node_type_offset = torch.tensor(node_type_offset, dtype=indices.dtype) type_per_edge = torch.index_select(coo_etype, dim=0, index=edge_ids) del coo_etype node_attributes = {} edge_attributes = {} if include_original_edge_id: # If uint8 or int16 was chosen above for etypes, we cast to int. temp_etypes = ( type_per_edge.int() if type_per_edge.element_size() < 4 else type_per_edge ) edge_ids -= torch.index_select( torch.tensor(edge_type_offset, dtype=edge_ids.dtype), dim=0, index=temp_etypes, ) del temp_etypes edge_attributes[ORIGINAL_EDGE_ID] = edge_ids # Load the sampling related node/edge features and add them to # the sampling-graph. if graph_data.get("feature_data", None): if is_homogeneous: # Homogeneous graph. for graph_feature in graph_data["feature_data"]: in_memory = ( True if "in_memory" not in graph_feature else graph_feature["in_memory"] ) if graph_feature["domain"] == "node": node_data = read_data( os.path.join(dataset_dir, graph_feature["path"]), graph_feature["format"], in_memory=in_memory, ) assert node_data.shape[0] == num_nodes node_attributes[graph_feature["name"]] = node_data elif graph_feature["domain"] == "edge": edge_data = read_data( os.path.join(dataset_dir, graph_feature["path"]), graph_feature["format"], in_memory=in_memory, ) assert edge_data.shape[0] == num_edges edge_attributes[graph_feature["name"]] = edge_data else: # Heterogeneous graph. node_feature_collector = {} edge_feature_collector = {} for graph_feature in graph_data["feature_data"]: in_memory = ( True if "in_memory" not in graph_feature else graph_feature["in_memory"] ) if graph_feature["domain"] == "node": node_data = read_data( os.path.join(dataset_dir, graph_feature["path"]), graph_feature["format"], in_memory=in_memory, ) if graph_feature["name"] not in node_feature_collector: node_feature_collector[graph_feature["name"]] = {} node_feature_collector[graph_feature["name"]][ graph_feature["type"] ] = node_data elif graph_feature["domain"] == "edge": edge_data = read_data( os.path.join(dataset_dir, graph_feature["path"]), graph_feature["format"], in_memory=in_memory, ) if graph_feature["name"] not in edge_feature_collector: edge_feature_collector[graph_feature["name"]] = {} edge_feature_collector[graph_feature["name"]][ graph_feature["type"] ] = edge_data # For heterogenous, a node/edge feature must cover all node/edge types. all_node_types = set(node_type_to_id.keys()) for feat_name, feat_data in node_feature_collector.items(): existing_node_type = set(feat_data.keys()) assert all_node_types == existing_node_type, ( f"Node feature {feat_name} does not cover all node types. " f"Existing types: {existing_node_type}. " f"Expected types: {all_node_types}." ) all_edge_types = set(edge_type_to_id.keys()) for feat_name, feat_data in edge_feature_collector.items(): existing_edge_type = set(feat_data.keys()) assert all_edge_types == existing_edge_type, ( f"Edge feature {feat_name} does not cover all edge types. " f"Existing types: {existing_edge_type}. " f"Expected types: {all_edge_types}." ) for feat_name, feat_data in node_feature_collector.items(): _feat = next(iter(feat_data.values())) feat_tensor = torch.empty( ([total_num_nodes] + list(_feat.shape[1:])), dtype=_feat.dtype, ) for ntype, feat in feat_data.items(): feat_tensor[ node_type_offset[ node_type_to_id[ntype] ] : node_type_offset[node_type_to_id[ntype] + 1] ] = feat node_attributes[feat_name] = feat_tensor del node_feature_collector for feat_name, feat_data in edge_feature_collector.items(): _feat = next(iter(feat_data.values())) feat_tensor = torch.empty( ([total_num_edges] + list(_feat.shape[1:])), dtype=_feat.dtype, ) for etype, feat in feat_data.items(): feat_tensor[ edge_type_offset[ edge_type_to_id[etype] ] : edge_type_offset[edge_type_to_id[etype] + 1] ] = feat edge_attributes[feat_name] = feat_tensor del edge_feature_collector if not bool(node_attributes): node_attributes = None if not bool(edge_attributes): edge_attributes = None # Construct the FusedCSCSamplingGraph. return fused_csc_sampling_graph( csc_indptr=indptr, indices=indices, node_type_offset=node_type_offset, type_per_edge=type_per_edge, node_type_to_id=node_type_to_id, edge_type_to_id=edge_type_to_id, node_attributes=node_attributes, edge_attributes=edge_attributes, ) def preprocess_ondisk_dataset( dataset_dir: str, include_original_edge_id: bool = False, force_preprocess: bool = None, auto_cast_to_optimal_dtype: bool = True, ) -> str: """Preprocess the on-disk dataset. Parse the input config file, load the data, and save the data in the format that GraphBolt supports. Parameters ---------- dataset_dir : str The path to the dataset directory. include_original_edge_id : bool, optional Whether to include the original edge id in the FusedCSCSamplingGraph. force_preprocess: bool, optional Whether to force reload the ondisk dataset. auto_cast_to_optimal_dtype: bool, optional Casts the dtypes of tensors in the dataset into smallest possible dtypes for reduced storage requirements and potentially increased performance. Default is True. Returns ------- output_config_path : str The path to the output config file. """ # Check if the dataset path is valid. if not os.path.exists(dataset_dir): raise RuntimeError(f"Invalid dataset path: {dataset_dir}") # Check if the dataset_dir is a directory. if not os.path.isdir(dataset_dir): raise RuntimeError( f"The dataset must be a directory. But got {dataset_dir}" ) # 0. Check if the dataset is already preprocessed. processed_dir_prefix = "preprocessed" preprocess_metadata_path = os.path.join( processed_dir_prefix, "metadata.yaml" ) if os.path.exists(os.path.join(dataset_dir, preprocess_metadata_path)): if force_preprocess is None: with open( os.path.join(dataset_dir, preprocess_metadata_path), "r" ) as f: preprocess_config = yaml.safe_load(f) if ( preprocess_config.get("include_original_edge_id", None) == include_original_edge_id ): force_preprocess = check_dataset_change( dataset_dir, processed_dir_prefix ) else: force_preprocess = True if force_preprocess: shutil.rmtree(os.path.join(dataset_dir, processed_dir_prefix)) print( "The on-disk dataset is re-preprocessing, so the existing " + "preprocessed dataset has been removed." ) else: print("The dataset is already preprocessed.") return os.path.join(dataset_dir, preprocess_metadata_path) print("Start to preprocess the on-disk dataset.") # Check if the metadata.yaml exists. metadata_file_path = os.path.join(dataset_dir, "metadata.yaml") if not os.path.exists(metadata_file_path): raise RuntimeError("metadata.yaml does not exist.") # Read the input config. with open(metadata_file_path, "r") as f: input_config = yaml.safe_load(f) # 1. Make `processed_dir_abs` directory if it does not exist. os.makedirs(os.path.join(dataset_dir, processed_dir_prefix), exist_ok=True) output_config = deepcopy(input_config) # 2. Load the data and create a FusedCSCSamplingGraph. if "graph" not in input_config: raise RuntimeError("Invalid config: does not contain graph field.") sampling_graph = _graph_data_to_fused_csc_sampling_graph( dataset_dir, input_config["graph"], include_original_edge_id, auto_cast_to_optimal_dtype, ) # 3. Record value of include_original_edge_id. output_config["include_original_edge_id"] = include_original_edge_id # 4. Save the FusedCSCSamplingGraph and modify the output_config. output_config["graph_topology"] = {} output_config["graph_topology"]["type"] = "FusedCSCSamplingGraph" output_config["graph_topology"]["path"] = os.path.join( processed_dir_prefix, "fused_csc_sampling_graph.pt" ) node_ids_within_int32 = ( sampling_graph.indices.dtype == torch.int32 and auto_cast_to_optimal_dtype ) torch.save( sampling_graph, os.path.join( dataset_dir, output_config["graph_topology"]["path"], ), ) del sampling_graph del output_config["graph"] # 5. Load the node/edge features and do necessary conversion. if input_config.get("feature_data", None): has_edge_feature_data = False for feature, out_feature in zip( input_config["feature_data"], output_config["feature_data"] ): # Always save the feature in numpy format. out_feature["format"] = "numpy" out_feature["path"] = os.path.join( processed_dir_prefix, feature["path"].replace("pt", "npy") ) in_memory = ( True if "in_memory" not in feature else feature["in_memory"] ) if not has_edge_feature_data and feature["domain"] == "edge": has_edge_feature_data = True copy_or_convert_data( os.path.join(dataset_dir, feature["path"]), os.path.join(dataset_dir, out_feature["path"]), feature["format"], output_format=out_feature["format"], in_memory=in_memory, is_feature=True, ) if has_edge_feature_data and not include_original_edge_id: gb_warning("Edge feature is stored, but edge IDs are not saved.") # 6. Save tasks and train/val/test split according to the output_config. if input_config.get("tasks", None): for input_task, output_task in zip( input_config["tasks"], output_config["tasks"] ): for set_name in ["train_set", "validation_set", "test_set"]: if set_name not in input_task: continue for input_set_per_type, output_set_per_type in zip( input_task[set_name], output_task[set_name] ): for input_data, output_data in zip( input_set_per_type["data"], output_set_per_type["data"] ): # Always save the feature in numpy format. output_data["format"] = "numpy" output_data["path"] = os.path.join( processed_dir_prefix, input_data["path"].replace("pt", "npy"), ) name = ( input_data["name"] if "name" in input_data else None ) copy_or_convert_data( os.path.join(dataset_dir, input_data["path"]), os.path.join(dataset_dir, output_data["path"]), input_data["format"], output_data["format"], within_int32=node_ids_within_int32 and name in NAMES_INDICATING_NODE_IDS, ) # 7. Save the output_config. output_config_path = os.path.join(dataset_dir, preprocess_metadata_path) with open(output_config_path, "w") as f: yaml.dump(output_config, f) print("Finish preprocessing the on-disk dataset.") # 8. Calculate and save the hash value of the dataset directory. hash_value_file = "dataset_hash_value.txt" hash_value_file_path = os.path.join( dataset_dir, processed_dir_prefix, hash_value_file ) if os.path.exists(hash_value_file_path): os.remove(hash_value_file_path) dir_hash = calculate_dir_hash(dataset_dir) with open(hash_value_file_path, "w") as f: f.write(json.dumps(dir_hash, indent=4)) # 9. Return the absolute path of the preprocessing yaml file. return output_config_path class OnDiskTask: """An on-disk task. An on-disk task is for ``OnDiskDataset``. It contains the metadata and the train/val/test sets. """ def __init__( self, metadata: Dict, train_set: Union[ItemSet, HeteroItemSet], validation_set: Union[ItemSet, HeteroItemSet], test_set: Union[ItemSet, HeteroItemSet], ): """Initialize a task. Parameters ---------- metadata : Dict Metadata. train_set : Union[ItemSet, HeteroItemSet] Training set. validation_set : Union[ItemSet, HeteroItemSet] Validation set. test_set : Union[ItemSet, HeteroItemSet] Test set. """ self._metadata = metadata self._train_set = train_set self._validation_set = validation_set self._test_set = test_set @property def metadata(self) -> Dict: """Return the task metadata.""" return self._metadata @property def train_set(self) -> Union[ItemSet, HeteroItemSet]: """Return the training set.""" return self._train_set @property def validation_set(self) -> Union[ItemSet, HeteroItemSet]: """Return the validation set.""" return self._validation_set @property def test_set(self) -> Union[ItemSet, HeteroItemSet]: """Return the test set.""" return self._test_set def __repr__(self) -> str: ret = "{Classname}({attributes})" attributes_str = "" attributes = get_attributes(self) attributes.reverse() for attribute in attributes: if attribute[0] == "_": continue value = getattr(self, attribute) attributes_str += f"{attribute}={value},\n" attributes_str = textwrap.indent( attributes_str, " " * len("OnDiskTask(") ).strip() return ret.format( Classname=self.__class__.__name__, attributes=attributes_str ) class OnDiskDataset(Dataset): """An on-disk dataset which reads graph topology, feature data and Train/Validation/Test set from disk. Due to limited resources, the data which are too large to fit into RAM will remain on disk while others reside in RAM once ``OnDiskDataset`` is initialized. This behavior could be controled by user via ``in_memory`` field in YAML file. All paths in YAML file are relative paths to the dataset directory. A full example of YAML file is as follows: .. code-block:: yaml dataset_name: graphbolt_test graph: nodes: - type: paper # could be omitted for homogeneous graph. num: 1000 - type: author num: 1000 edges: - type: author:writes:paper # could be omitted for homogeneous graph. format: csv # Can be csv only. path: edge_data/author-writes-paper.csv - type: paper:cites:paper format: csv path: edge_data/paper-cites-paper.csv feature_data: - domain: node type: paper # could be omitted for homogeneous graph. name: feat format: numpy in_memory: false # If not specified, default to true. path: node_data/paper-feat.npy - domain: edge type: "author:writes:paper" name: feat format: numpy in_memory: false path: edge_data/author-writes-paper-feat.npy tasks: - name: "edge_classification" num_classes: 10 train_set: - type: paper # could be omitted for homogeneous graph. data: # multiple data sources could be specified. - name: seeds format: numpy # Can be numpy or torch. in_memory: true # If not specified, default to true. path: set/paper-train-seeds.npy - name: labels format: numpy path: set/paper-train-labels.npy validation_set: - type: paper data: - name: seeds format: numpy path: set/paper-validation-seeds.npy - name: labels format: numpy path: set/paper-validation-labels.npy test_set: - type: paper data: - name: seeds format: numpy path: set/paper-test-seeds.npy - name: labels format: numpy path: set/paper-test-labels.npy Parameters ---------- path: str The YAML file path. include_original_edge_id: bool, optional Whether to include the original edge id in the FusedCSCSamplingGraph. force_preprocess: bool, optional Whether to force reload the ondisk dataset. auto_cast_to_optimal_dtype: bool, optional Casts the dtypes of tensors in the dataset into smallest possible dtypes for reduced storage requirements and potentially increased performance. Default is True. """ def __init__( self, path: str, include_original_edge_id: bool = False, force_preprocess: bool = None, auto_cast_to_optimal_dtype: bool = True, ) -> None: # Always call the preprocess function first. If already preprocessed, # the function will return the original path directly. self._dataset_dir = path yaml_path = preprocess_ondisk_dataset( path, include_original_edge_id, force_preprocess, auto_cast_to_optimal_dtype, ) with open(yaml_path) as f: self._yaml_data = yaml.load(f, Loader=yaml.loader.SafeLoader) self._loaded = False def _convert_yaml_path_to_absolute_path(self): """Convert the path in YAML file to absolute path.""" if "graph_topology" in self._yaml_data: self._yaml_data["graph_topology"]["path"] = os.path.join( self._dataset_dir, self._yaml_data["graph_topology"]["path"] ) if "feature_data" in self._yaml_data: for feature in self._yaml_data["feature_data"]: feature["path"] = os.path.join( self._dataset_dir, feature["path"] ) if "tasks" in self._yaml_data: for task in self._yaml_data["tasks"]: for set_name in ["train_set", "validation_set", "test_set"]: if set_name not in task: continue for set_per_type in task[set_name]: for data in set_per_type["data"]: data["path"] = os.path.join( self._dataset_dir, data["path"] ) def load(self, tasks: List[str] = None): """Load the dataset. Parameters ---------- tasks: List[str] = None The name of the tasks to be loaded. For single task, the type of tasks can be both string and List[str]. For multiple tasks, only List[str] is acceptable. Examples -------- 1. Loading via single task name "node_classification". >>> dataset = gb.OnDiskDataset(base_dir).load( ... tasks="node_classification") >>> len(dataset.tasks) 1 >>> dataset.tasks[0].metadata["name"] "node_classification" 2. Loading via single task name ["node_classification"]. >>> dataset = gb.OnDiskDataset(base_dir).load( ... tasks=["node_classification"]) >>> len(dataset.tasks) 1 >>> dataset.tasks[0].metadata["name"] "node_classification" 3. Loading via multiple task names ["node_classification", "link_prediction"]. >>> dataset = gb.OnDiskDataset(base_dir).load( ... tasks=["node_classification","link_prediction"]) >>> len(dataset.tasks) 2 >>> dataset.tasks[0].metadata["name"] "node_classification" >>> dataset.tasks[1].metadata["name"] "link_prediction" """ self._convert_yaml_path_to_absolute_path() self._meta = OnDiskMetaData(**self._yaml_data) self._dataset_name = self._meta.dataset_name self._graph = self._load_graph(self._meta.graph_topology) self._feature = TorchBasedFeatureStore(self._meta.feature_data) self._tasks = self._init_tasks(self._meta.tasks, tasks) self._all_nodes_set = self._init_all_nodes_set(self._graph) self._loaded = True return self @property def yaml_data(self) -> Dict: """Return the YAML data.""" return self._yaml_data @property def tasks(self) -> List[Task]: """Return the tasks.""" self._check_loaded() return self._tasks @property def graph(self) -> SamplingGraph: """Return the graph.""" self._check_loaded() return self._graph @property def feature(self) -> TorchBasedFeatureStore: """Return the feature.""" self._check_loaded() return self._feature @property def dataset_name(self) -> str: """Return the dataset name.""" self._check_loaded() return self._dataset_name @property def all_nodes_set(self) -> Union[ItemSet, HeteroItemSet]: """Return the itemset containing all nodes.""" self._check_loaded() return self._all_nodes_set def _init_tasks( self, tasks: List[OnDiskTaskData], selected_tasks: List[str] ) -> List[OnDiskTask]: """Initialize the tasks.""" if isinstance(selected_tasks, str): selected_tasks = [selected_tasks] if selected_tasks and not isinstance(selected_tasks, list): raise TypeError( f"The type of selected_task should be list, but got {type(selected_tasks)}" ) ret = [] if tasks is None: return ret task_names = set() for task in tasks: task_name = task.extra_fields.get("name", None) if selected_tasks is None or task_name in selected_tasks: ret.append( OnDiskTask( task.extra_fields, self._init_tvt_set(task.train_set), self._init_tvt_set(task.validation_set), self._init_tvt_set(task.test_set), ) ) if selected_tasks: task_names.add(task_name) if selected_tasks: not_found_tasks = set(selected_tasks) - task_names if len(not_found_tasks): gb_warning( f"Below tasks are not found in YAML: {not_found_tasks}. Skipped." ) return ret def _check_loaded(self): assert self._loaded, ( "Please ensure that you have called the OnDiskDataset.load() method" + " to properly load the data." ) def _load_graph( self, graph_topology: OnDiskGraphTopology ) -> FusedCSCSamplingGraph: """Load the graph topology.""" if graph_topology is None: return None if graph_topology.type == "FusedCSCSamplingGraph": return torch.load(graph_topology.path, weights_only=False) raise NotImplementedError( f"Graph topology type {graph_topology.type} is not supported." ) def _init_tvt_set( self, tvt_set: List[OnDiskTVTSet] ) -> Union[ItemSet, HeteroItemSet]: """Initialize the TVT set.""" ret = None if (tvt_set is None) or (len(tvt_set) == 0): return ret if tvt_set[0].type is None: assert ( len(tvt_set) == 1 ), "Only one TVT set is allowed if type is not specified." ret = ItemSet( tuple( read_data(data.path, data.format, data.in_memory) for data in tvt_set[0].data ), names=tuple(data.name for data in tvt_set[0].data), ) else: itemsets = {} for tvt in tvt_set: itemsets[tvt.type] = ItemSet( tuple( read_data(data.path, data.format, data.in_memory) for data in tvt.data ), names=tuple(data.name for data in tvt.data), ) ret = HeteroItemSet(itemsets) return ret def _init_all_nodes_set(self, graph) -> Union[ItemSet, HeteroItemSet]: if graph is None: gb_warning( "`all_nodes_set` is returned as None, since graph is None." ) return None num_nodes = graph.num_nodes dtype = graph.indices.dtype if isinstance(num_nodes, int): return ItemSet( torch.tensor(num_nodes, dtype=dtype), names="seeds", ) else: data = { node_type: ItemSet( torch.tensor(num_node, dtype=dtype), names="seeds", ) for node_type, num_node in num_nodes.items() } return HeteroItemSet(data) class BuiltinDataset(OnDiskDataset): """A utility class to download built-in dataset from AWS S3 and load it as :class:`OnDiskDataset`. Available built-in datasets include: **cora** The cora dataset is a homogeneous citation network dataset, which is designed for the node classification task. **ogbn-mag** The ogbn-mag dataset is a heterogeneous network composed of a subset of the Microsoft Academic Graph (MAG). See more details in `ogbn-mag `_. .. note:: Reverse edges are added to the original graph and duplicated edges are removed. **ogbl-citation2** The ogbl-citation2 dataset is a directed graph, representing the citation network between a subset of papers extracted from MAG. See more details in `ogbl-citation2 `_. .. note:: Reverse edges are added to the original graph and duplicated edges are removed. **ogbn-arxiv** The ogbn-arxiv dataset is a directed graph, representing the citation network between all Computer Science (CS) arXiv papers indexed by MAG. See more details in `ogbn-arxiv `_. .. note:: Reverse edges are added to the original graph and duplicated edges are removed. **ogbn-papers100M** The ogbn-papers100M dataset is a directed graph, representing the citation network between all Computer Science (CS) arXiv papers indexed by MAG. See more details in `ogbn-papers100M `_. .. note:: Reverse edges are added to the original graph and duplicated edges are removed. **ogbn-products** The ogbn-products dataset is an undirected and unweighted graph, representing an Amazon product co-purchasing network. See more details in `ogbn-products `_. .. note:: Reverse edges are added to the original graph. Node features are stored as float32. **ogb-lsc-mag240m** The ogb-lsc-mag240m dataset is a heterogeneous academic graph extracted from the Microsoft Academic Graph (MAG). See more details in `ogb-lsc-mag240m `_. .. note:: Reverse edges are added to the original graph. **igb-hom and igb-hom-[tiny|small|medium|large]** The igb-hom-[tiny|small|medium|large] and igb-hom dataset is a homogeneous citation network, which is designed for developers to train and evaluate GNN models with high fidelity. See more details in `igb-hom-[tiny|small|medium|large] `_. .. note:: Self edges are added to the original graph. Node features are stored as float32. **igb-het-[tiny|small|medium]** The igb-hom-[tiny|small|medium] dataset is a heterogeneous citation network, which is designed for developers to train and evaluate GNN models with high fidelity. See more details in `igb-het-[tiny|small|medium] `_. .. note:: Four Reverse edge types are added to the original graph. Node features are stored as float32. Parameters ---------- name : str The name of the builtin dataset. root : str, optional The root directory of the dataset. Default ot ``datasets``. """ # For dataset that is smaller than 30GB, we use the base url. # Otherwise, we use the accelerated url. _base_url = "https://data.dgl.ai/dataset/graphbolt/" _accelerated_url = ( "https://dgl-data.s3-accelerate.amazonaws.com/dataset/graphbolt/" ) _datasets = [ "cora", "cora-seeds", "ogbn-mag", "ogbn-mag-seeds", "ogbl-citation2", "ogbl-citation2-seeds", "ogbn-products", "ogbn-products-seeds", "ogbn-arxiv", "ogbn-arxiv-seeds", "igb-hom-tiny", "igb-hom-tiny-seeds", "igb-hom-small", "igb-hom-small-seeds", "igb-het-tiny", "igb-het-tiny-seeds", "igb-het-small", "igb-het-small-seeds", ] _large_datasets = [ "ogb-lsc-mag240m", "ogb-lsc-mag240m-seeds", "ogbn-papers100M", "ogbn-papers100M-seeds", "igb-hom-medium", "igb-hom-medium-seeds", "igb-hom-large", "igb-hom-large-seeds", "igb-hom", "igb-hom-seeds", "igb-het-medium", "igb-het-medium-seeds", ] _all_datasets = _datasets + _large_datasets def __init__(self, name: str, root: str = "datasets") -> OnDiskDataset: # For user using DGL 2.2 or later version, we prefer them to use # datasets with `seeds` suffix. This hack should be removed, when the # datasets with `seeds` suffix have covered previous ones. if "seeds" not in name: name += "-seeds" dataset_dir = os.path.join(root, name) if not os.path.exists(dataset_dir): if name not in self._all_datasets: raise RuntimeError( f"Dataset {name} is not available. Available datasets are " f"{self._all_datasets}." ) url = ( self._accelerated_url if name in self._large_datasets else self._base_url ) url += name + ".zip" os.makedirs(root, exist_ok=True) zip_file_path = os.path.join(root, name + ".zip") download(url, path=zip_file_path) extract_archive(zip_file_path, root, overwrite=True) os.remove(zip_file_path) super().__init__(dataset_dir, force_preprocess=False) ================================================ FILE: python/dgl/graphbolt/impl/ondisk_metadata.py ================================================ """Ondisk metadata of GraphBolt.""" from enum import Enum from typing import Any, Dict, List, Optional import pydantic from ..internal_utils import version __all__ = [ "OnDiskFeatureDataFormat", "OnDiskTVTSetData", "OnDiskTVTSet", "OnDiskFeatureDataDomain", "OnDiskFeatureData", "OnDiskMetaData", "OnDiskGraphTopologyType", "OnDiskGraphTopology", "OnDiskTaskData", ] class ExtraMetaData(pydantic.BaseModel, extra="allow"): """Group extra fields into metadata. Internal use only.""" extra_fields: Optional[Dict[str, Any]] = {} # As pydantic 2.0 has changed the API of validators, we need to use # different validators for different versions to be compatible with # previous versions. if version.parse(pydantic.__version__) >= version.parse("2.0"): @pydantic.model_validator(mode="before") @classmethod def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: """Build extra fields.""" for key in list(values.keys()): if key not in cls.model_fields: values["extra_fields"] = values.get("extra_fields", {}) values["extra_fields"][key] = values.pop(key) return values else: @pydantic.root_validator(pre=True) @classmethod def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: """Build extra fields.""" for key in list(values.keys()): if key not in cls.__fields__: values["extra_fields"] = values.get("extra_fields", {}) values["extra_fields"][key] = values.pop(key) return values class OnDiskFeatureDataFormat(str, Enum): """Enum of data format.""" TORCH = "torch" NUMPY = "numpy" class OnDiskTVTSetData(pydantic.BaseModel): """Train-Validation-Test set data.""" name: Optional[str] = None format: OnDiskFeatureDataFormat in_memory: Optional[bool] = True path: str class OnDiskTVTSet(pydantic.BaseModel): """Train-Validation-Test set.""" type: Optional[str] = None data: List[OnDiskTVTSetData] class OnDiskFeatureDataDomain(str, Enum): """Enum of feature data domain.""" NODE = "node" EDGE = "edge" GRAPH = "graph" class OnDiskFeatureData(ExtraMetaData): r"""The description of an on-disk feature.""" domain: OnDiskFeatureDataDomain type: Optional[str] = None name: str format: OnDiskFeatureDataFormat path: str in_memory: Optional[bool] = True class OnDiskGraphTopologyType(str, Enum): """Enum of graph topology type.""" FUSED_CSC_SAMPLING = "FusedCSCSamplingGraph" class OnDiskGraphTopology(pydantic.BaseModel): """The description of an on-disk graph topology.""" type: OnDiskGraphTopologyType path: str class OnDiskTaskData(ExtraMetaData): """Task specification in YAML.""" train_set: Optional[List[OnDiskTVTSet]] = [] validation_set: Optional[List[OnDiskTVTSet]] = [] test_set: Optional[List[OnDiskTVTSet]] = [] class OnDiskMetaData(pydantic.BaseModel): """Metadata specification in YAML. As multiple node/edge types and multiple splits are supported, each TVT set is a list of list of ``OnDiskTVTSet``. """ dataset_name: Optional[str] = None graph_topology: Optional[OnDiskGraphTopology] = None feature_data: Optional[List[OnDiskFeatureData]] = [] tasks: Optional[List[OnDiskTaskData]] = [] ================================================ FILE: python/dgl/graphbolt/impl/sampled_subgraph_impl.py ================================================ """Sampled subgraph for FusedCSCSamplingGraph.""" # pylint: disable= invalid-name from dataclasses import dataclass from typing import Dict, Union import torch from ..base import CSCFormatBase, etype_str_to_tuple from ..internal_utils import get_attributes from ..sampled_subgraph import SampledSubgraph __all__ = ["SampledSubgraphImpl"] @dataclass class SampledSubgraphImpl(SampledSubgraph): r"""Sampled subgraph of CSCSamplingGraph. Examples -------- >>> sampled_csc = {"A:relation:B": CSCFormatBase(indptr=torch.tensor([0, 1, 2, 3]), ... indices=torch.tensor([0, 1, 2]))} >>> original_column_node_ids = {'B': torch.tensor([10, 11, 12])} >>> original_row_node_ids = {'A': torch.tensor([13, 14, 15])} >>> original_edge_ids = {"A:relation:B": torch.tensor([19, 20, 21])} >>> subgraph = gb.SampledSubgraphImpl( ... sampled_csc=sampled_csc, ... original_column_node_ids=original_column_node_ids, ... original_row_node_ids=original_row_node_ids, ... original_edge_ids=original_edge_ids ... ) >>> print(subgraph.sampled_csc) {"A:relation:B": CSCForamtBase(indptr=torch.tensor([0, 1, 2, 3]), ... indices=torch.tensor([0, 1, 2]))} >>> print(subgraph.original_column_node_ids) {'B': tensor([10, 11, 12])} >>> print(subgraph.original_row_node_ids) {'A': tensor([13, 14, 15])} >>> print(subgraph.original_edge_ids) {"A:relation:B": tensor([19, 20, 21])} """ sampled_csc: Union[CSCFormatBase, Dict[str, CSCFormatBase]] = None original_column_node_ids: Union[ Dict[str, torch.Tensor], torch.Tensor ] = None original_row_node_ids: Union[Dict[str, torch.Tensor], torch.Tensor] = None original_edge_ids: Union[Dict[str, torch.Tensor], torch.Tensor] = None # Used to fetch sampled_csc.indices if it is missing. _edge_ids_in_fused_csc_sampling_graph: Union[ Dict[str, torch.Tensor], torch.Tensor ] = None def __post_init__(self): if isinstance(self.sampled_csc, dict): for etype, pair in self.sampled_csc.items(): assert ( isinstance(etype, str) and len(etype_str_to_tuple(etype)) == 3 ), "Edge type should be a string in format of str:str:str." assert pair.indptr is not None and isinstance( pair.indptr, torch.Tensor ), "Node pair should be have indptr of type torch.Tensor." # For CUDA, indices may be None because it will be fetched later. if not pair.indptr.is_cuda or pair.indices is not None: assert isinstance( pair.indices, torch.Tensor ), "Node pair should be have indices of type torch.Tensor." else: assert isinstance( self._edge_ids_in_fused_csc_sampling_graph.get( etype, None ), torch.Tensor, ), "When indices is missing, sampled edge ids needs to be provided." else: assert self.sampled_csc.indptr is not None and isinstance( self.sampled_csc.indptr, torch.Tensor ), "Node pair should be have torch.Tensor indptr." # For CUDA, indices may be None because it will be fetched later. if ( not self.sampled_csc.indptr.is_cuda or self.sampled_csc.indices is not None ): assert isinstance( self.sampled_csc.indices, torch.Tensor ), "Node pair should have a torch.Tensor indices." else: assert isinstance( self._edge_ids_in_fused_csc_sampling_graph, torch.Tensor ), "When indices is missing, sampled edge ids needs to be provided." def __repr__(self) -> str: return _sampled_subgraph_str(self, "SampledSubgraphImpl") def _sampled_subgraph_str(sampled_subgraph: SampledSubgraph, classname) -> str: final_str = classname + "(" attributes = get_attributes(sampled_subgraph) attributes.reverse() for name in attributes: if name in "_edge_ids_in_fused_csc_sampling_graph": continue val = getattr(sampled_subgraph, name) def _add_indent(_str, indent): lines = _str.split("\n") lines = [lines[0]] + [" " * indent + line for line in lines[1:]] return "\n".join(lines) val = str(val) final_str = ( final_str + f"{name}={_add_indent(val, len(name) + len(classname) + 1)},\n" + " " * len(classname) ) return final_str[: -len(classname)] + ")" ================================================ FILE: python/dgl/graphbolt/impl/temporal_neighbor_sampler.py ================================================ """Temporal neighbor subgraph samplers for GraphBolt.""" import torch from torch.utils.data import functional_datapipe from ..internal import compact_csc_format from ..subgraph_sampler import SubgraphSampler from .sampled_subgraph_impl import SampledSubgraphImpl __all__ = ["TemporalNeighborSampler", "TemporalLayerNeighborSampler"] class TemporalNeighborSamplerImpl(SubgraphSampler): """Base class for TemporalNeighborSamplers.""" def __init__( self, datapipe, graph, fanouts, replace, prob_name, node_timestamp_attr_name, edge_timestamp_attr_name, sampler, ): super().__init__(datapipe) self.graph = graph # Convert fanouts to a list of tensors. self.fanouts = [] for fanout in fanouts: if not isinstance(fanout, torch.Tensor): fanout = torch.LongTensor([int(fanout)]) self.fanouts.insert(0, fanout) self.replace = replace self.prob_name = prob_name self.node_timestamp_attr_name = node_timestamp_attr_name self.edge_timestamp_attr_name = edge_timestamp_attr_name self.sampler = sampler def sample_subgraphs( self, seeds, seeds_timestamp, seeds_pre_time_window=None ): assert ( seeds_timestamp is not None ), "seeds_timestamp must be provided for temporal neighbor sampling." subgraphs = [] num_layers = len(self.fanouts) # Enrich seeds with all node types. Ensure that the dtype and device # remain consistent with those of the existing seeds. if isinstance(seeds, dict): first_val = next(iter(seeds.items()))[1] ntypes = list(self.graph.node_type_to_id.keys()) seeds = { ntype: seeds.get( ntype, torch.tensor( [], dtype=first_val.dtype, device=first_val.device ), ) for ntype in ntypes } empty_tensor = torch.tensor( [], dtype=torch.int64, device=first_val.device ) seeds_timestamp = { ntype: seeds_timestamp.get(ntype, empty_tensor) for ntype in ntypes } if seeds_pre_time_window: seeds_pre_time_window = { ntype: seeds_pre_time_window.get(ntype, empty_tensor) for ntype in ntypes } for hop in range(num_layers): subgraph = self.sampler( seeds, seeds_timestamp, self.fanouts[hop], self.replace, seeds_pre_time_window, self.prob_name, self.node_timestamp_attr_name, self.edge_timestamp_attr_name, ) ( original_row_node_ids, compacted_csc_formats, row_timestamps, ) = compact_csc_format(subgraph.sampled_csc, seeds, seeds_timestamp) subgraph = SampledSubgraphImpl( sampled_csc=compacted_csc_formats, original_column_node_ids=seeds, original_row_node_ids=original_row_node_ids, original_edge_ids=subgraph.original_edge_ids, ) subgraphs.insert(0, subgraph) seeds = original_row_node_ids seeds_timestamp = row_timestamps return seeds, subgraphs @functional_datapipe("temporal_sample_neighbor") class TemporalNeighborSampler(TemporalNeighborSamplerImpl): """Temporally sample neighbor edges from a graph and return sampled subgraphs. Functional name: :obj:`temporal_sample_neighbor`. Neighbor sampler is responsible for sampling a subgraph from given data. It returns an induced subgraph along with compacted information. In the context of a node classification task, the neighbor sampler directly utilizes the nodes provided as seed nodes. However, in scenarios involving link prediction, the process needs another pre-peocess operation. That is, gathering unique nodes from the given node pairs, encompassing both positive and negative node pairs, and employs these nodes as the seed nodes for subsequent steps. Parameters ---------- datapipe : DataPipe The datapipe. graph : FusedCSCSamplingGraph The graph on which to perform subgraph sampling. fanouts: list[torch.Tensor] or list[int] The number of edges to be sampled for each node with or without considering edge types. The length of this parameter implicitly signifies the layer of sampling being conducted. Note: The fanout order is from the outermost layer to innermost layer. For example, the fanout '[15, 10, 5]' means that 15 to the outermost layer, 10 to the intermediate layer and 5 corresponds to the innermost layer. replace: bool Boolean indicating whether the sample is preformed with or without replacement. If True, a value can be selected multiple times. Otherwise, each value can be selected only once. prob_name: str, optional The name of an edge attribute used as the weights of sampling for each node. This attribute tensor should contain (unnormalized) probabilities corresponding to each neighboring edge of a node. It must be a 1D floating-point or boolean tensor, with the number of elements equalling the total number of edges. node_timestamp_attr_name: str, optional The name of an node attribute used as the timestamps of nodes. It must be a 1D integer tensor, with the number of elements equalling the total number of nodes. edge_timestamp_attr_name: str, optional The name of an edge attribute used as the timestamps of edges. It must be a 1D integer tensor, with the number of elements equalling the total number of edges. Examples ------- TODO(zhenkun) : Add an example after the API to pass timestamps is finalized. """ def __init__( self, datapipe, graph, fanouts, replace=False, prob_name=None, node_timestamp_attr_name=None, edge_timestamp_attr_name=None, ): super().__init__( datapipe, graph, fanouts, replace, prob_name, node_timestamp_attr_name, edge_timestamp_attr_name, graph.temporal_sample_neighbors, ) @functional_datapipe("temporal_sample_layer_neighbor") class TemporalLayerNeighborSampler(TemporalNeighborSamplerImpl): """Temporally sample neighbor edges from a graph and return sampled subgraphs. Functional name: :obj:`temporal_sample_layer_neighbor`. Sampler that builds computational dependency of node representations via labor sampling for multilayer GNN from the NeurIPS 2023 paper `Layer-Neighbor Sampling -- Defusing Neighborhood Explosion in GNNs `__ Layer-Neighbor sampler is responsible for sampling a subgraph from given data. It returns an induced subgraph along with compacted information. In the context of a node classification task, the neighbor sampler directly utilizes the nodes provided as seed nodes. However, in scenarios involving link prediction, the process needs another pre-process operation. That is, gathering unique nodes from the given node pairs, encompassing both positive and negative node pairs, and employs these nodes as the seed nodes for subsequent steps. When the graph is hetero, sampled subgraphs in minibatch will contain every edge type even though it is empty after sampling. Implements the approach described in Appendix A.3 of the paper. Similar to dgl.dataloading.LaborSampler but this uses sequential poisson sampling instead of poisson sampling to keep the count of sampled edges per vertex deterministic like NeighborSampler. Thus, it is a drop-in replacement for NeighborSampler. However, unlike NeighborSampler, it samples fewer vertices and edges for multilayer GNN scenario without harming convergence speed with respect to training iterations. Parameters ---------- datapipe : DataPipe The datapipe. graph : FusedCSCSamplingGraph The graph on which to perform subgraph sampling. fanouts: list[torch.Tensor] or list[int] The number of edges to be sampled for each node with or without considering edge types. The length of this parameter implicitly signifies the layer of sampling being conducted. Note: The fanout order is from the outermost layer to innermost layer. For example, the fanout '[15, 10, 5]' means that 15 to the outermost layer, 10 to the intermediate layer and 5 corresponds to the innermost layer. replace: bool Boolean indicating whether the sample is preformed with or without replacement. If True, a value can be selected multiple times. Otherwise, each value can be selected only once. prob_name: str, optional The name of an edge attribute used as the weights of sampling for each node. This attribute tensor should contain (unnormalized) probabilities corresponding to each neighboring edge of a node. It must be a 1D floating-point or boolean tensor, with the number of elements equalling the total number of edges. node_timestamp_attr_name: str, optional The name of an node attribute used as the timestamps of nodes. It must be a 1D integer tensor, with the number of elements equalling the total number of nodes. edge_timestamp_attr_name: str, optional The name of an edge attribute used as the timestamps of edges. It must be a 1D integer tensor, with the number of elements equalling the total number of edges. Examples ------- TODO(zhenkun) : Add an example after the API to pass timestamps is finalized. """ def __init__( self, datapipe, graph, fanouts, replace=False, prob_name=None, node_timestamp_attr_name=None, edge_timestamp_attr_name=None, ): super().__init__( datapipe, graph, fanouts, replace, prob_name, node_timestamp_attr_name, edge_timestamp_attr_name, graph.temporal_sample_layer_neighbors, ) ================================================ FILE: python/dgl/graphbolt/impl/torch_based_feature_store.py ================================================ """Torch-based feature store for GraphBolt.""" import copy import textwrap from typing import Dict, List import numpy as np import torch from ..base import ( get_device_to_host_uva_stream, get_host_to_device_uva_stream, index_select, ) from ..feature_store import Feature from ..internal_utils import gb_warning, is_wsl from .basic_feature_store import BasicFeatureStore from .ondisk_metadata import OnDiskFeatureData __all__ = ["TorchBasedFeature", "DiskBasedFeature", "TorchBasedFeatureStore"] class _Waiter: def __init__(self, event, values): self.event = event self.values = values def wait(self): """Returns the stored value when invoked.""" self.event.wait() values = self.values # Ensure there is no memory leak. self.event = self.values = None return values class TorchBasedFeature(Feature): r"""A wrapper of pytorch based feature. Initialize a torch based feature store by a torch feature. Note that the feature can be either in memory or on disk. Parameters ---------- torch_feature : torch.Tensor The torch feature. Note that the dimension of the tensor should be greater than 1. Examples -------- >>> import torch >>> from dgl import graphbolt as gb 1. The feature is in memory. >>> torch_feat = torch.arange(10).reshape(2, -1) >>> feature = gb.TorchBasedFeature(torch_feat) >>> feature.read() tensor([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]) >>> feature.read(torch.tensor([0])) tensor([[0, 1, 2, 3, 4]]) >>> feature.update(torch.tensor([[1 for _ in range(5)]]), ... torch.tensor([1])) >>> feature.read(torch.tensor([0, 1])) tensor([[0, 1, 2, 3, 4], [1, 1, 1, 1, 1]]) >>> feature.size() torch.Size([5]) 2. The feature is on disk. Note that you can use gb.numpy_save_aligned as a replacement for np.save to potentially get increased performance. >>> import numpy as np >>> arr = np.array([[1, 2], [3, 4]]) >>> np.save("/tmp/arr.npy", arr) >>> torch_feat = torch.from_numpy(np.load("/tmp/arr.npy", mmap_mode="r+")) >>> feature = gb.TorchBasedFeature(torch_feat) >>> feature.read() tensor([[1, 2], [3, 4]]) >>> feature.read(torch.tensor([0])) tensor([[1, 2]]) 3. Pinned CPU feature. >>> torch_feat = torch.arange(10).reshape(2, -1).pin_memory() >>> feature = gb.TorchBasedFeature(torch_feat) >>> feature.read().device device(type='cuda', index=0) >>> feature.read(torch.tensor([0]).cuda()).device device(type='cuda', index=0) """ def __init__(self, torch_feature: torch.Tensor, metadata: Dict = None): super().__init__() self._is_inplace_pinned = set() assert isinstance(torch_feature, torch.Tensor), ( f"torch_feature in TorchBasedFeature must be torch.Tensor, " f"but got {type(torch_feature)}." ) assert torch_feature.dim() > 1, ( f"dimension of torch_feature in TorchBasedFeature must be greater " f"than 1, but got {torch_feature.dim()} dimension." ) # Make sure the tensor is contiguous. self._tensor = torch_feature.contiguous() self._metadata = metadata def __del__(self): # torch.Tensor.pin_memory() is not an inplace operation. To make it # truly in-place, we need to use cudaHostRegister. Then, we need to use # cudaHostUnregister to unpin the tensor in the destructor. # https://github.com/pytorch/pytorch/issues/32167#issuecomment-753551842 for tensor in self._is_inplace_pinned: assert self._inplace_unpinner(tensor.data_ptr()) == 0 def read(self, ids: torch.Tensor = None): """Read the feature by index. If the feature is on pinned CPU memory and `ids` is on GPU or pinned CPU memory, it will be read by GPU and the returned tensor will be on GPU. Otherwise, the returned tensor will be on CPU. Parameters ---------- ids : torch.Tensor, optional The index of the feature. If specified, only the specified indices of the feature are read. If None, the entire feature is returned. Returns ------- torch.Tensor The read feature. """ if ids is None: if self._tensor.is_pinned(): return self._tensor.cuda() return self._tensor return index_select(self._tensor, ids) def read_async(self, ids: torch.Tensor): r"""Read the feature by index asynchronously. Parameters ---------- ids : torch.Tensor The index of the feature. Only the specified indices of the feature are read. Returns ------- A generator object. The returned generator object returns a future on ``read_async_num_stages(ids.device)``\ th invocation. The return result can be accessed by calling ``.wait()``. on the returned future object. It is undefined behavior to call ``.wait()`` more than once. Examples -------- >>> import dgl.graphbolt as gb >>> feature = gb.Feature(...) >>> ids = torch.tensor([0, 2]) >>> for stage, future in enumerate(feature.read_async(ids)): ... pass >>> assert stage + 1 == feature.read_async_num_stages(ids.device) >>> result = future.wait() # result contains the read values. """ assert self._tensor.device.type == "cpu" if ids.is_cuda and self.is_pinned(): current_stream = torch.cuda.current_stream() host_to_device_stream = get_host_to_device_uva_stream() host_to_device_stream.wait_stream(current_stream) with torch.cuda.stream(host_to_device_stream): ids.record_stream(torch.cuda.current_stream()) values = index_select(self._tensor, ids) values.record_stream(current_stream) values_copy_event = torch.cuda.Event() values_copy_event.record() yield _Waiter(values_copy_event, values) elif ids.is_cuda: ids_device = ids.device current_stream = torch.cuda.current_stream() device_to_host_stream = get_device_to_host_uva_stream() device_to_host_stream.wait_stream(current_stream) with torch.cuda.stream(device_to_host_stream): ids.record_stream(torch.cuda.current_stream()) ids = ids.to(self._tensor.device, non_blocking=True) ids_copy_event = torch.cuda.Event() ids_copy_event.record() yield # first stage is done. ids_copy_event.synchronize() values = torch.ops.graphbolt.index_select_async(self._tensor, ids) yield host_to_device_stream = get_host_to_device_uva_stream() with torch.cuda.stream(host_to_device_stream): values_cuda = values.wait().to(ids_device, non_blocking=True) values_cuda.record_stream(current_stream) values_copy_event = torch.cuda.Event() values_copy_event.record() yield _Waiter(values_copy_event, values_cuda) else: yield torch.ops.graphbolt.index_select_async(self._tensor, ids) def read_async_num_stages(self, ids_device: torch.device): """The number of stages of the read_async operation. See read_async function for directions on its use. This function is required to return the number of yield operations when read_async is used with a tensor residing on ids_device. Parameters ---------- ids_device : torch.device The device of the ids parameter passed into read_async. Returns ------- int The number of stages of the read_async operation. """ if ids_device.type == "cuda": if self._tensor.is_cuda: # If the ids and the tensor are on cuda, no need for async. return 0 return 1 if self.is_pinned() else 3 else: return 1 def size(self): """Get the size of the feature. Returns ------- torch.Size The size of the feature. """ return self._tensor.size()[1:] def count(self): """Get the count of the feature. Returns ------- int The count of the feature. """ return self._tensor.size()[0] def update(self, value: torch.Tensor, ids: torch.Tensor = None): """Update the feature store. Parameters ---------- value : torch.Tensor The updated value of the feature. ids : torch.Tensor, optional The indices of the feature to update. If specified, only the specified indices of the feature will be updated. For the feature, the `ids[i]` row is updated to `value[i]`. So the indices and value must have the same length. If None, the entire feature will be updated. """ if ids is None: self._tensor = value else: assert ids.shape[0] == value.shape[0], ( f"ids and value must have the same length, " f"but got {ids.shape[0]} and {value.shape[0]}." ) assert self.size() == value.size()[1:], ( f"The size of the feature is {self.size()}, " f"while the size of the value is {value.size()[1:]}." ) if self._tensor.is_pinned() and value.is_cuda and ids.is_cuda: raise NotImplementedError( "Update the feature on pinned CPU memory by GPU is not " "supported yet." ) self._tensor[ids] = value def metadata(self): """Get the metadata of the feature. Returns ------- Dict The metadata of the feature. """ return ( self._metadata if self._metadata is not None else super().metadata() ) def pin_memory_(self): """In-place operation to copy the feature to pinned memory. Returns the same object modified in-place.""" # torch.Tensor.pin_memory() is not an inplace operation. To make it # truly in-place, we need to use cudaHostRegister. Then, we need to use # cudaHostUnregister to unpin the tensor in the destructor. # https://github.com/pytorch/pytorch/issues/32167#issuecomment-753551842 x = self._tensor if not x.is_pinned() and x.device.type == "cpu": assert ( x.is_contiguous() ), "Tensor pinning is only supported for contiguous tensors." cudart = torch.cuda.cudart() assert ( cudart.cudaHostRegister( x.data_ptr(), x.numel() * x.element_size(), 0 ) == 0 ) self._is_inplace_pinned.add(x) self._inplace_unpinner = cudart.cudaHostUnregister return self def is_pinned(self): """Returns True if the stored feature is pinned.""" return self._tensor.is_pinned() def to(self, device): # pylint: disable=invalid-name """Copy `TorchBasedFeature` to the specified device.""" # copy.copy is a shallow copy so it does not copy tensor memory. self2 = copy.copy(self) if device == "pinned": self2._tensor = self2._tensor.pin_memory() else: self2._tensor = self2._tensor.to(device) return self2 def __repr__(self) -> str: ret = ( "{Classname}(\n" " feature={feature},\n" " metadata={metadata},\n" ")" ) feature_str = textwrap.indent( str(self._tensor), " " * len(" feature=") ).strip() metadata_str = textwrap.indent( str(self.metadata()), " " * len(" metadata=") ).strip() return ret.format( Classname=self.__class__.__name__, feature=feature_str, metadata=metadata_str, ) class DiskBasedFeature(Feature): r"""A wrapper of disk based feature. Initialize a disk based feature fetcher by a numpy file. Note that you can use gb.numpy_save_aligned as a replacement for np.save to potentially get increased performance. Parameters ---------- path : string The path to the numpy feature file. Note that the dimension of the numpy should be greater than 1. metadata : Dict The metadata of the feature. num_threads : int The number of threads driving io_uring queues. Examples -------- >>> import torch >>> from dgl import graphbolt as gb >>> torch_feat = torch.arange(10).reshape(2, -1) >>> pth = "path/to/feat.npy" >>> np.save(pth, torch_feat) >>> feature = gb.DiskBasedFeature(pth) >>> feature.read(torch.tensor([0])) tensor([[0, 1, 2, 3, 4]]) >>> feature.size() torch.Size([5]) """ def __init__(self, path: str, metadata: Dict = None, num_threads=None): super().__init__() mmap_mode = "r+" ondisk_data = np.load(path, mmap_mode=mmap_mode) assert ondisk_data.flags[ "C_CONTIGUOUS" ], "DiskBasedFeature only supports C_CONTIGUOUS array." self._tensor = torch.from_numpy(ondisk_data) self._metadata = metadata if torch.ops.graphbolt.detect_io_uring(): self._ondisk_npy_array = torch.ops.graphbolt.ondisk_npy_array( path, self._tensor.dtype, self._tensor.shape, num_threads ) def read(self, ids: torch.Tensor = None): """Read the feature by index. The returned tensor will be on CPU. Parameters ---------- ids : torch.Tensor The index of the feature. Only the specified indices of the feature are read. Returns ------- torch.Tensor The read feature. """ if ids is None: return self._tensor elif torch.ops.graphbolt.detect_io_uring(): try: return self._ondisk_npy_array.index_select(ids).wait() except RuntimeError: raise IndexError else: return index_select(self._tensor, ids) def read_async(self, ids: torch.Tensor): r"""Read the feature by index asynchronously. Parameters ---------- ids : torch.Tensor The index of the feature. Only the specified indices of the feature are read. Returns ------- A generator object. The returned generator object returns a future on ``read_async_num_stages(ids.device)``\ th invocation. The return result can be accessed by calling ``.wait()``. on the returned future object. It is undefined behavior to call ``.wait()`` more than once. Examples -------- >>> import dgl.graphbolt as gb >>> feature = gb.Feature(...) >>> ids = torch.tensor([0, 2]) >>> for stage, future in enumerate(feature.read_async(ids)): ... pass >>> assert stage + 1 == feature.read_async_num_stages(ids.device) >>> result = future.wait() # result contains the read values. """ assert torch.ops.graphbolt.detect_io_uring() if ids.is_cuda: ids_device = ids.device current_stream = torch.cuda.current_stream() device_to_host_stream = get_device_to_host_uva_stream() device_to_host_stream.wait_stream(current_stream) with torch.cuda.stream(device_to_host_stream): ids.record_stream(torch.cuda.current_stream()) ids = ids.to(self._tensor.device, non_blocking=True) ids_copy_event = torch.cuda.Event() ids_copy_event.record() yield # first stage is done. ids_copy_event.synchronize() values = self._ondisk_npy_array.index_select(ids) yield host_to_device_stream = get_host_to_device_uva_stream() with torch.cuda.stream(host_to_device_stream): values_cuda = values.wait().to(ids_device, non_blocking=True) values_cuda.record_stream(current_stream) values_copy_event = torch.cuda.Event() values_copy_event.record() yield _Waiter(values_copy_event, values_cuda) else: yield self._ondisk_npy_array.index_select(ids) def read_async_num_stages(self, ids_device: torch.device): """The number of stages of the read_async operation. See read_async function for directions on its use. This function is required to return the number of yield operations when read_async is used with a tensor residing on ids_device. Parameters ---------- ids_device : torch.device The device of the ids parameter passed into read_async. Returns ------- int The number of stages of the read_async operation. """ return 3 if ids_device.type == "cuda" else 1 def size(self): """Get the size of the feature. Returns ------- torch.Size The size of the feature. """ return self._tensor.size()[1:] def count(self): """Get the count of the feature. Returns ------- int The count of the feature. """ return self._tensor.size()[0] def update(self, value: torch.Tensor, ids: torch.Tensor = None): """Disk based feature does not support update for now.""" raise NotImplementedError def metadata(self): """Get the metadata of the feature. Returns ------- Dict The metadata of the feature. """ return ( self._metadata if self._metadata is not None else super().metadata() ) def read_into_memory(self) -> TorchBasedFeature: """Change disk-based feature to torch-based feature.""" return TorchBasedFeature(self._tensor, self._metadata) def to(self, _): # pylint: disable=invalid-name """Placeholder `DiskBasedFeature` to implementation. It is a no-op.""" gb_warning( "`DiskBasedFeature.to(device)` is not supported. Leaving unmodified." ) return self def pin_memory_(self): # pylint: disable=invalid-name r"""Placeholder `DiskBasedFeature` pin_memory_ implementation. It is a no-op.""" gb_warning( "`DiskBasedFeature.pin_memory_()` is not supported. Leaving unmodified." ) return self def __repr__(self) -> str: ret = ( "{Classname}(\n" " feature={feature},\n" " metadata={metadata},\n" ")" ) feature_str = textwrap.indent( str(self._tensor), " " * len(" feature=") ).strip() metadata_str = textwrap.indent( str(self.metadata()), " " * len(" metadata=") ).strip() return ret.format( Classname=self.__class__.__name__, feature=feature_str, metadata=metadata_str, ) class TorchBasedFeatureStore(BasicFeatureStore): r"""A store to manage multiple pytorch based feature for access. The feature stores are described by the `feat_data`. The `feat_data` is a list of `OnDiskFeatureData`. For a feature store, its format must be either "pt" or "npy" for Pytorch or Numpy formats. If the format is "pt", the feature store must be loaded in memory. If the format is "npy", the feature store can be loaded in memory or on disk. Note that you can use gb.numpy_save_aligned as a replacement for np.save to potentially get increased performance. Parameters ---------- feat_data : List[OnDiskFeatureData] The description of the feature stores. Examples -------- >>> import torch >>> import numpy as np >>> from dgl import graphbolt as gb >>> edge_label = torch.tensor([[1], [2], [3]]) >>> node_feat = torch.tensor([[1, 2, 3], [4, 5, 6]]) >>> torch.save(edge_label, "/tmp/edge_label.pt") >>> gb.numpy_save_aligned("/tmp/node_feat.npy", node_feat.numpy()) >>> feat_data = [ ... gb.OnDiskFeatureData(domain="edge", type="author:writes:paper", ... name="label", format="torch", path="/tmp/edge_label.pt", ... in_memory=True), ... gb.OnDiskFeatureData(domain="node", type="paper", name="feat", ... format="numpy", path="/tmp/node_feat.npy", in_memory=False), ... ] >>> feature_store = gb.TorchBasedFeatureStore(feat_data) """ def __init__(self, feat_data: List[OnDiskFeatureData]): features = {} for spec in feat_data: key = (spec.domain, spec.type, spec.name) metadata = spec.extra_fields if spec.format == "torch": assert spec.in_memory, ( f"Pytorch tensor can only be loaded in memory, " f"but the feature {key} is loaded on disk." ) features[key] = TorchBasedFeature( torch.load(spec.path, weights_only=False), metadata=metadata ) elif spec.format == "numpy": if spec.in_memory: # TorchBasedFeature is always in memory by default. features[key] = TorchBasedFeature( torch.as_tensor(np.load(spec.path)), metadata=metadata ) else: # DiskBasedFeature is always out of memory by default. features[key] = DiskBasedFeature( spec.path, metadata=metadata ) else: raise ValueError(f"Unknown feature format {spec.format}") super().__init__(features) def pin_memory_(self): """In-place operation to copy the feature store to pinned memory. Returns the same object modified in-place.""" if is_wsl(): gb_warning( "In place pinning is not supported on WSL. " "Returning the out of place pinned `TorchBasedFeatureStore`." ) return self.to("pinned") for feature in self._features.values(): feature.pin_memory_() return self def is_pinned(self): """Returns True if all the stored features are pinned.""" return all(feature.is_pinned() for feature in self._features.values()) def to(self, device): # pylint: disable=invalid-name """Copy `TorchBasedFeatureStore` to the specified device.""" # copy.copy is a shallow copy so it does not copy tensor memory. self2 = copy.copy(self) self2._features = {k: v.to(device) for k, v in self2._features.items()} return self2 def __repr__(self) -> str: ret = "{Classname}(\n" + " {features}\n" + ")" features_str = textwrap.indent(str(self._features), " ").strip() return ret.format( Classname=self.__class__.__name__, features=features_str ) ================================================ FILE: python/dgl/graphbolt/impl/uniform_negative_sampler.py ================================================ """Uniform negative sampler for GraphBolt.""" import torch from torch.utils.data import functional_datapipe from ..negative_sampler import NegativeSampler __all__ = ["UniformNegativeSampler"] @functional_datapipe("sample_uniform_negative") class UniformNegativeSampler(NegativeSampler): """Sample negative destination nodes for each source node based on a uniform distribution. Functional name: :obj:`sample_uniform_negative`. It's important to note that the term 'negative' refers to false negatives, indicating that the sampled pairs are not ensured to be absent in the graph. For each edge ``(u, v)``, it is supposed to generate `negative_ratio` pairs of negative edges ``(u, v')``, where ``v'`` is chosen uniformly from all the nodes in the graph. Parameters ---------- datapipe : DataPipe The datapipe. graph : FusedCSCSamplingGraph The graph on which to perform negative sampling. negative_ratio : int The proportion of negative samples to positive samples. Examples -------- >>> from dgl import graphbolt as gb >>> indptr = torch.LongTensor([0, 1, 2, 3, 4]) >>> indices = torch.LongTensor([1, 2, 3, 0]) >>> graph = gb.fused_csc_sampling_graph(indptr, indices) >>> seeds = torch.tensor([[0, 1], [1, 2], [2, 3], [3, 0]]) >>> item_set = gb.ItemSet(seeds, names="seeds") >>> item_sampler = gb.ItemSampler( ... item_set, batch_size=4,) >>> neg_sampler = gb.UniformNegativeSampler( ... item_sampler, graph, 2) >>> for minibatch in neg_sampler: ... print(minibatch.seeds) ... print(minibatch.labels) ... print(minibatch.indexes) tensor([[0, 1], [1, 2], [2, 3], [3, 0], [0, 1], [0, 3], [1, 1], [1, 2], [2, 1], [2, 0], [3, 0], [3, 2]]) tensor([1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.]) tensor([0, 1, 2, 3, 0, 0, 1, 1, 2, 2, 3, 3]) """ def __init__( self, datapipe, graph, negative_ratio, ): super().__init__(datapipe, negative_ratio) self.graph = graph def _sample_with_etype(self, seeds, etype=None): assert seeds.ndim == 2 and seeds.shape[1] == 2, ( "Only tensor with shape N*2 is supported for negative" + f" sampling, but got {seeds.shape}." ) # Sample negative edges, and concatenate positive edges with them. all_seeds = self.graph.sample_negative_edges_uniform( etype, seeds, self.negative_ratio, ) # Construct indexes for all node pairs. pos_num = seeds.shape[0] negative_ratio = self.negative_ratio pos_indexes = torch.arange(0, pos_num, device=all_seeds.device) neg_indexes = pos_indexes.repeat_interleave(negative_ratio) indexes = torch.cat((pos_indexes, neg_indexes)) # Construct labels for all node pairs. neg_num = all_seeds.shape[0] - pos_num labels = torch.empty(pos_num + neg_num, device=all_seeds.device) labels[:pos_num] = 1 labels[pos_num:] = 0 return all_seeds, labels, indexes ================================================ FILE: python/dgl/graphbolt/internal/__init__.py ================================================ """Utility functions for GraphBolt.""" from .utils import * from .sample_utils import * from .item_sampler_utils import * ================================================ FILE: python/dgl/graphbolt/internal/item_sampler_utils.py ================================================ """Utility functions for DistributedItemSampler.""" def count_split(total, num_workers, worker_id, batch_size=1): """Calculate the number of assigned items after splitting them by batch size evenly. It will return the number for this worker and also a sum of previous workers. """ quotient, remainder = divmod(total, num_workers * batch_size) if batch_size == 1: assigned = quotient + (worker_id < remainder) else: batch_count, last_batch = divmod(remainder, batch_size) assigned = quotient * batch_size + ( batch_size if worker_id < batch_count else (last_batch if worker_id == batch_count else 0) ) prefix_sum = quotient * worker_id * batch_size + min( worker_id * batch_size, remainder ) return (assigned, prefix_sum) def calculate_range( distributed, total, num_replicas, rank, num_workers, worker_id, batch_size, drop_last, drop_uneven_inputs, ): """Calculates the range of items to be assigned to the current worker. This function evenly distributes `total` items among multiple workers, batching them using `batch_size`. Each replica has `num_workers` workers. The batches generated by workers within the same replica are combined into the replica`s output. The `drop_last` parameter determines whether incomplete batches should be dropped. If `drop_last` is True, incomplete batches are discarded. The `drop_uneven_inputs` parameter determines if the number of batches assigned to each replica should be the same. If `drop_uneven_inputs` is True, excessive batches for some replicas will be dropped. Args: distributed (bool): Whether it's in distributed mode. total (int): The total number of items. num_replicas (int): The total number of replicas. rank (int): The rank of the current replica. num_workers (int): The number of workers per replica. worker_id (int): The ID of the current worker. batch_size (int): The desired batch size. drop_last (bool): Whether to drop incomplete batches. drop_uneven_inputs (bool): Whether to drop excessive batches for some replicas. Returns: tuple: A tuple containing three numbers: - start_offset (int): The starting offset of the range assigned to the current worker. - assigned_count (int): The length of the range assigned to the current worker. - output_count (int): The number of items that the current worker will produce after dropping. """ # Check if it's distributed mode. if not distributed: if not drop_last: return (0, total, total) else: return (0, total, total // batch_size * batch_size) # First, equally distribute items into all replicas. assigned_count, start_offset = count_split( total, num_replicas, rank, batch_size ) # Calculate the number of outputs when drop_uneven_inputs is True. # `assigned_count` is the number of items distributed to the current # process. `output_count` is the number of items should be output # by this process after dropping. if not drop_uneven_inputs: if not drop_last: output_count = assigned_count else: output_count = assigned_count // batch_size * batch_size else: if not drop_last: min_item_count, _ = count_split( total, num_replicas, num_replicas - 1, batch_size ) min_batch_count = (min_item_count + batch_size - 1) // batch_size output_count = min(min_batch_count * batch_size, assigned_count) else: output_count = total // (batch_size * num_replicas) * batch_size # If there are multiple workers, equally distribute the batches to # all workers. if num_workers > 1: # Equally distribute the dropped number too. dropped_items, prev_dropped_items = count_split( assigned_count - output_count, num_workers, worker_id ) output_count, prev_output_count = count_split( output_count, num_workers, worker_id, batch_size, ) assigned_count = output_count + dropped_items start_offset += prev_output_count + prev_dropped_items return (start_offset, assigned_count, output_count) ================================================ FILE: python/dgl/graphbolt/internal/sample_utils.py ================================================ """Utility functions for sampling.""" from collections import defaultdict from typing import Dict, List, Optional, Tuple, Union import torch from ..base import CSCFormatBase, etype_str_to_tuple, expand_indptr def unique_and_compact( nodes: Union[ List[torch.Tensor], Dict[str, List[torch.Tensor]], ], rank: int = 0, world_size: int = 1, async_op: bool = False, ): """ Compact a list of nodes tensor. The `rank` and `world_size` parameters are relevant when using Cooperative Minibatching, which was initially proposed in `Deep Graph Library PR#4337`__ and was later first fully described in `Cooperative Minibatching in Graph Neural Networks `__. Cooperation between the GPUs eliminates duplicate work performed across the GPUs due to the overlapping sampled k-hop neighborhoods of seed nodes when performing GNN minibatching. When `world_size` is greater than 1, then the given ids are partitioned between the available ranks. The ids corresponding to the given rank are guaranteed to come before the ids of other ranks. To do this, the partitioned ids are rotated backwards by the given rank so that the ids are ordered as: `[rank, rank + 1, world_size, 0, ..., rank - 1]`. This is supported only for Volta and later generation NVIDIA GPUs. Parameters ---------- nodes : List[torch.Tensor] or Dict[str, List[torch.Tensor]] List of nodes for compacting. the unique_and_compact will be done per type - If `nodes` is a list of tensor: All the tensors will do unique and compact together, usually it is used for homogeneous graph. - If `nodes` is a list of dictionary: The keys should be node type and the values should be corresponding nodes, the unique and compact will be done per type, usually it is used for heterogeneous graph. rank : int The rank of the current process. world_size : int The number of processes. async_op: bool Boolean indicating whether the call is asynchronous. If so, the result can be obtained by calling wait on the returned future. Returns ------- Tuple[unique_nodes, compacted_node_list, unique_nodes_offsets] The Unique nodes (per type) of all nodes in the input. And the compacted nodes list, where IDs inside are replaced with compacted node IDs. "Compacted node list" indicates that the node IDs in the input node list are replaced with mapped node IDs, where each type of node is mapped to a contiguous space of IDs ranging from 0 to N. The unique nodes offsets tensor partitions the unique_nodes tensor. Has size `world_size + 1` and `unique_nodes[offsets[i]: offsets[i + 1]]` belongs to the rank `(rank + i) % world_size`. """ is_heterogeneous = isinstance(nodes, dict) if not is_heterogeneous: homo_ntype = "a" nodes = {homo_ntype: nodes} nums = {} concat_nodes, empties = [], [] for ntype, nodes_of_type in nodes.items(): nums[ntype] = [node.size(0) for node in nodes_of_type] concat_nodes.append(torch.cat(nodes_of_type)) empties.append(concat_nodes[-1].new_empty(0)) unique_fn = ( torch.ops.graphbolt.unique_and_compact_batched_async if async_op else torch.ops.graphbolt.unique_and_compact_batched ) results = unique_fn(concat_nodes, empties, empties, rank, world_size) class _Waiter: def __init__(self, future, ntypes, nums): self.future = future self.ntypes = ntypes self.nums = nums def wait(self): """Returns the stored value when invoked.""" results = self.future.wait() if async_op else self.future ntypes = self.ntypes nums = self.nums # Ensure there is no memory leak. self.future = self.ntypes = self.nums = None unique, compacted, offsets = {}, {}, {} for ntype, result in zip(ntypes, results): ( unique[ntype], concat_compacted, _, offsets[ntype], ) = result compacted[ntype] = list(concat_compacted.split(nums[ntype])) if is_heterogeneous: return unique, compacted, offsets else: return ( unique[homo_ntype], compacted[homo_ntype], offsets[homo_ntype], ) post_processer = _Waiter(results, nodes.keys(), nums) if async_op: return post_processer else: return post_processer.wait() def compact_temporal_nodes(nodes, nodes_timestamp): """Compact a list of temporal nodes without unique. Note that since there is no unique, the nodes and nodes_timestamp are simply concatenated. And the compacted nodes are consecutive numbers starting from 0. Parameters ---------- nodes : List[torch.Tensor] or Dict[str, List[torch.Tensor]] List of nodes for compacting. the compact operator will be done per type - If `nodes` is a list of tensor: All the tensors will compact together, usually it is used for homogeneous graph. - If `nodes` is a list of dictionary: The keys should be node type and the values should be corresponding nodes, the compact will be done per type, usually it is used for heterogeneous graph. nodes_timestamp : List[torch.Tensor] or Dict[str, List[torch.Tensor]] List of timestamps for compacting. Returns ------- Tuple[nodes, nodes_timestamp, compacted_node_list] The concatenated nodes and nodes_timestamp, and the compacted nodes list, where IDs inside are replaced with compacted node IDs. """ def _compact_per_type(per_type_nodes, per_type_nodes_timestamp): nums = [node.size(0) for node in per_type_nodes] per_type_nodes = torch.cat(per_type_nodes) per_type_nodes_timestamp = torch.cat(per_type_nodes_timestamp) compacted_nodes = torch.arange( 0, per_type_nodes.numel(), dtype=per_type_nodes.dtype, device=per_type_nodes.device, ) compacted_nodes = list(compacted_nodes.split(nums)) return per_type_nodes, per_type_nodes_timestamp, compacted_nodes if isinstance(nodes, dict): ret_nodes, ret_timestamp, compacted = {}, {}, {} for ntype, nodes_of_type in nodes.items(): ( ret_nodes[ntype], ret_timestamp[ntype], compacted[ntype], ) = _compact_per_type(nodes_of_type, nodes_timestamp[ntype]) return ret_nodes, ret_timestamp, compacted else: return _compact_per_type(nodes, nodes_timestamp) def unique_and_compact_csc_formats( csc_formats: Union[ Tuple[torch.Tensor, torch.Tensor], Dict[str, Tuple[torch.Tensor, torch.Tensor]], ], unique_dst_nodes: Union[ torch.Tensor, Dict[str, torch.Tensor], ], rank: int = 0, world_size: int = 1, async_op: bool = False, ): """ Compact csc formats and return unique nodes (per type). The `rank` and `world_size` parameters are relevant when using Cooperative Minibatching, which was initially proposed in `Deep Graph Library PR#4337`__ and was later first fully described in `Cooperative Minibatching in Graph Neural Networks `__. Cooperation between the GPUs eliminates duplicate work performed across the GPUs due to the overlapping sampled k-hop neighborhoods of seed nodes when performing GNN minibatching. When `world_size` is greater than 1, then the given ids are partitioned between the available ranks. The ids corresponding to the given rank are guaranteed to come before the ids of other ranks. To do this, the partitioned ids are rotated backwards by the given rank so that the ids are ordered as: `[rank, rank + 1, world_size, 0, ..., rank - 1]`. This is supported only for Volta and later generation NVIDIA GPUs. Parameters ---------- csc_formats : Union[CSCFormatBase, Dict(str, CSCFormatBase)] CSC formats representing source-destination edges. - If `csc_formats` is a CSCFormatBase: It means the graph is homogeneous. Also, indptr and indice in it should be torch.tensor representing source and destination pairs in csc format. And IDs inside are homogeneous ids. - If `csc_formats` is a Dict[str, CSCFormatBase]: The keys should be edge type and the values should be csc format node pairs. And IDs inside are heterogeneous ids. unique_dst_nodes: torch.Tensor or Dict[str, torch.Tensor] Unique nodes of all destination nodes in the node pairs. - If `unique_dst_nodes` is a tensor: It means the graph is homogeneous. - If `csc_formats` is a dictionary: The keys are node type and the values are corresponding nodes. And IDs inside are heterogeneous ids. rank : int The rank of the current process. world_size : int The number of processes. async_op: bool Boolean indicating whether the call is asynchronous. If so, the result can be obtained by calling wait on the returned future. Returns ------- Tuple[unique_nodes, csc_formats, unique_nodes_offsets] The compacted csc formats, where node IDs are replaced with mapped node IDs, and the unique nodes (per type). "Compacted csc formats" indicates that the node IDs in the input node pairs are replaced with mapped node IDs, where each type of node is mapped to a contiguous space of IDs ranging from 0 to N. The unique nodes offsets tensor partitions the unique_nodes tensor. Has size `world_size + 1` and `unique_nodes[offsets[i]: offsets[i + 1]]` belongs to the rank `(rank + i) % world_size`. Examples -------- >>> import dgl.graphbolt as gb >>> N1 = torch.LongTensor([1, 2, 2]) >>> N2 = torch.LongTensor([5, 5, 6]) >>> unique_dst = { ... "n1": torch.LongTensor([1, 2]), ... "n2": torch.LongTensor([5, 6])} >>> csc_formats = { ... "n1:e1:n2": gb.CSCFormatBase(indptr=torch.tensor([0, 2, 3]),indices=N1), ... "n2:e2:n1": gb.CSCFormatBase(indptr=torch.tensor([0, 1, 3]),indices=N2)} >>> unique_nodes, compacted_csc_formats, _ = gb.unique_and_compact_csc_formats( ... csc_formats, unique_dst ... ) >>> print(unique_nodes) {'n1': tensor([1, 2]), 'n2': tensor([5, 6])} >>> print(compacted_csc_formats) {"n1:e1:n2": CSCFormatBase(indptr=torch.tensor([0, 2, 3]), indices=torch.tensor([0, 1, 1])), "n2:e2:n1": CSCFormatBase(indptr=torch.tensor([0, 1, 3]), indices=torch.Longtensor([0, 0, 1]))} """ is_homogeneous = not isinstance(csc_formats, dict) if is_homogeneous: csc_formats = {"_N:_E:_N": csc_formats} if unique_dst_nodes is not None: assert isinstance( unique_dst_nodes, torch.Tensor ), "Edge type not supported in homogeneous graph." unique_dst_nodes = {"_N": unique_dst_nodes} # Collect all source and destination nodes for each node type. indices = defaultdict(list) device = None for etype, csc_format in csc_formats.items(): if device is None: device = csc_format.indices.device src_type, _, dst_type = etype_str_to_tuple(etype) assert len(unique_dst_nodes.get(dst_type, [])) + 1 == len( csc_format.indptr ), "The seed nodes should correspond to indptr." indices[src_type].append(csc_format.indices) indices = {ntype: torch.cat(nodes) for ntype, nodes in indices.items()} ntypes = set(indices.keys()) dtype = list(indices.values())[0].dtype default_tensor = torch.tensor([], dtype=dtype, device=device) indice_list = [] unique_dst_list = [] for ntype in ntypes: indice_list.append(indices.get(ntype, default_tensor)) unique_dst_list.append(unique_dst_nodes.get(ntype, default_tensor)) dst_list = [torch.tensor([], dtype=dtype, device=device)] * len( unique_dst_list ) uniq_fn = ( torch.ops.graphbolt.unique_and_compact_batched_async if async_op else torch.ops.graphbolt.unique_and_compact_batched ) results = uniq_fn(indice_list, dst_list, unique_dst_list, rank, world_size) class _Waiter: def __init__(self, future, csc_formats): self.future = future self.csc_formats = csc_formats def wait(self): """Returns the stored value when invoked.""" results = self.future.wait() if async_op else self.future csc_formats = self.csc_formats # Ensure there is no memory leak. self.future = self.csc_formats = None unique_nodes = {} compacted_indices = {} offsets = {} for i, ntype in enumerate(ntypes): ( unique_nodes[ntype], compacted_indices[ntype], _, offsets[ntype], ) = results[i] compacted_csc_formats = {} # Map back with the same order. for etype, csc_format in csc_formats.items(): num_elem = csc_format.indices.size(0) src_type, _, _ = etype_str_to_tuple(etype) indice = compacted_indices[src_type][:num_elem] indptr = csc_format.indptr compacted_csc_formats[etype] = CSCFormatBase( indptr=indptr, indices=indice ) compacted_indices[src_type] = compacted_indices[src_type][ num_elem: ] # Return singleton for a homogeneous graph. if is_homogeneous: compacted_csc_formats = list(compacted_csc_formats.values())[0] unique_nodes = list(unique_nodes.values())[0] offsets = list(offsets.values())[0] return unique_nodes, compacted_csc_formats, offsets post_processer = _Waiter(results, csc_formats) if async_op: return post_processer else: return post_processer.wait() def _broadcast_timestamps(csc, dst_timestamps): """Broadcast the timestamp of each destination node to its corresponding source nodes.""" return expand_indptr( csc.indptr, node_ids=dst_timestamps, output_size=len(csc.indices) ) def compact_csc_format( csc_formats: Union[CSCFormatBase, Dict[str, CSCFormatBase]], dst_nodes: Union[torch.Tensor, Dict[str, torch.Tensor]], dst_timestamps: Optional[ Union[torch.Tensor, Dict[str, torch.Tensor]] ] = None, ): """ Relabel the row (source) IDs in the csc formats into a contiguous range from 0 and return the original row node IDs per type. Note that 1. The column (destination) IDs are included in the relabeled row IDs. 2. If there are repeated row IDs, they would not be uniqued and will be treated as different nodes. 3. If `dst_timestamps` is given, the timestamp of each destination node will be broadcasted to its corresponding source nodes. Parameters ---------- csc_formats: Union[CSCFormatBase, Dict[str, CSCFormatBase]] CSC formats representing source-destination edges. - If `csc_formats` is a CSCFormatBase: It means the graph is homogeneous. Also, indptr and indice in it should be torch.tensor representing source and destination pairs in csc format. And IDs inside are homogeneous ids. - If `csc_formats` is a Dict[str, CSCFormatBase]: The keys should be edge type and the values should be csc format node pairs. And IDs inside are heterogeneous ids. dst_nodes: Union[torch.Tensor, Dict[str, torch.Tensor]] Nodes of all destination nodes in the node pairs. - If `dst_nodes` is a tensor: It means the graph is homogeneous. - If `dst_nodes` is a dictionary: The keys are node type and the values are corresponding nodes. And IDs inside are heterogeneous ids. dst_timestamps: Optional[Union[torch.Tensor, Dict[str, torch.Tensor]]] Timestamps of all destination nodes in the csc formats. If given, the timestamp of each destination node will be broadcasted to its corresponding source nodes. Returns ------- Tuple[original_row_node_ids, compacted_csc_formats, ...] A tensor of original row node IDs (per type) of all nodes in the input. The compacted CSC formats, where node IDs are replaced with mapped node IDs ranging from 0 to N. The source timestamps (per type) of all nodes in the input if `dst_timestamps` is given. Examples -------- >>> import dgl.graphbolt as gb >>> csc_formats = { ... "n2:e2:n1": gb.CSCFormatBase( ... indptr=torch.tensor([0, 1, 3]), indices=torch.tensor([5, 4, 6]) ... ), ... "n1:e1:n1": gb.CSCFormatBase( ... indptr=torch.tensor([0, 1, 3]), indices=torch.tensor([1, 2, 3]) ... ), ... } >>> dst_nodes = {"n1": torch.LongTensor([2, 4])} >>> original_row_node_ids, compacted_csc_formats = gb.compact_csc_format( ... csc_formats, dst_nodes ... ) >>> original_row_node_ids {'n1': tensor([2, 4, 1, 2, 3]), 'n2': tensor([5, 4, 6])} >>> compacted_csc_formats {'n2:e2:n1': CSCFormatBase(indptr=tensor([0, 1, 3]), indices=tensor([0, 1, 2]), ), 'n1:e1:n1': CSCFormatBase(indptr=tensor([0, 1, 3]), indices=tensor([2, 3, 4]), )} >>> csc_formats = { ... "n2:e2:n1": gb.CSCFormatBase( ... indptr=torch.tensor([0, 1, 3]), indices=torch.tensor([5, 4, 6]) ... ), ... "n1:e1:n1": gb.CSCFormatBase( ... indptr=torch.tensor([0, 1, 3]), indices=torch.tensor([1, 2, 3]) ... ), ... } >>> dst_nodes = {"n1": torch.LongTensor([2, 4])} >>> original_row_node_ids, compacted_csc_formats = gb.compact_csc_format( ... csc_formats, dst_nodes ... ) >>> original_row_node_ids {'n1': tensor([2, 4, 1, 2, 3]), 'n2': tensor([5, 4, 6])} >>> compacted_csc_formats {'n2:e2:n1': CSCFormatBase(indptr=tensor([0, 1, 3]), indices=tensor([0, 1, 2]), ), 'n1:e1:n1': CSCFormatBase(indptr=tensor([0, 1, 3]), indices=tensor([2, 3, 4]), )} >>> dst_timestamps = {"n1": torch.LongTensor([10, 20])} >>> ( ... original_row_node_ids, ... compacted_csc_formats, ... src_timestamps, ... ) = gb.compact_csc_format(csc_formats, dst_nodes, dst_timestamps) >>> src_timestamps {'n1': tensor([10, 20, 10, 20, 20]), 'n2': tensor([10, 20, 20])} """ is_homogeneous = not isinstance(csc_formats, dict) has_timestamp = dst_timestamps is not None if is_homogeneous: if dst_nodes is not None: assert isinstance( dst_nodes, torch.Tensor ), "Edge type not supported in homogeneous graph." assert len(dst_nodes) + 1 == len( csc_formats.indptr ), "The seed nodes should correspond to indptr." offset = dst_nodes.size(0) original_row_ids = torch.cat((dst_nodes, csc_formats.indices)) compacted_csc_formats = CSCFormatBase( indptr=csc_formats.indptr, indices=( torch.arange( 0, csc_formats.indices.size(0), device=csc_formats.indices.device, ) + offset ), ) src_timestamps = None if has_timestamp: src_timestamps = torch.cat( [ dst_timestamps, _broadcast_timestamps( compacted_csc_formats, dst_timestamps ), ] ) else: compacted_csc_formats = {} src_timestamps = None original_row_ids = {key: val.clone() for key, val in dst_nodes.items()} if has_timestamp: src_timestamps = { key: val.clone() for key, val in dst_timestamps.items() } for etype, csc_format in csc_formats.items(): src_type, _, dst_type = etype_str_to_tuple(etype) assert len(dst_nodes.get(dst_type, [])) + 1 == len( csc_format.indptr ), "The seed nodes should correspond to indptr." device = csc_format.indices.device offset = original_row_ids.get( src_type, torch.tensor([], device=device) ).size(0) original_row_ids[src_type] = torch.cat( ( original_row_ids.get( src_type, torch.tensor( [], dtype=csc_format.indices.dtype, device=device ), ), csc_format.indices, ) ) compacted_csc_formats[etype] = CSCFormatBase( indptr=csc_format.indptr, indices=( torch.arange( 0, csc_format.indices.size(0), dtype=csc_format.indices.dtype, device=device, ) + offset ), ) if has_timestamp: # If destination timestamps are given, broadcast them to the # corresponding source nodes. src_timestamps[src_type] = torch.cat( ( src_timestamps.get( src_type, torch.tensor( [], dtype=dst_timestamps[dst_type].dtype, device=device, ), ), _broadcast_timestamps( csc_format, dst_timestamps[dst_type] ), ) ) if has_timestamp: return original_row_ids, compacted_csc_formats, src_timestamps return original_row_ids, compacted_csc_formats ================================================ FILE: python/dgl/graphbolt/internal/utils.py ================================================ """Utility functions for GraphBolt.""" import hashlib import json import os import shutil from typing import List, Union import numpy as np import pandas as pd import torch from numpy.lib.format import read_array_header_1_0, read_array_header_2_0 def numpy_save_aligned(*args, **kwargs): """A wrapper for numpy.save(), ensures the array is stored 4KiB aligned.""" # https://github.com/numpy/numpy/blob/2093a6d5b933f812d15a3de0eafeeb23c61f948a/numpy/lib/format.py#L179 has_array_align = hasattr(np.lib.format, "ARRAY_ALIGN") if has_array_align: default_alignment = np.lib.format.ARRAY_ALIGN # The maximum allowed alignment by the numpy code linked above is 4K. # Most filesystems work with block sizes of 4K so in practice, the file # size on the disk won't be larger. np.lib.format.ARRAY_ALIGN = 4096 np.save(*args, **kwargs) if has_array_align: np.lib.format.ARRAY_ALIGN = default_alignment def _read_torch_data(path): return torch.load(path, weights_only=False) def _read_numpy_data(path, in_memory=True): if in_memory: return torch.from_numpy(np.load(path)) return torch.as_tensor(np.load(path, mmap_mode="r+")) def read_data(path, fmt, in_memory=True): """Read data from disk.""" if fmt == "torch": return _read_torch_data(path) elif fmt == "numpy": return _read_numpy_data(path, in_memory=in_memory) else: raise RuntimeError(f"Unsupported format: {fmt}") def save_data(data, path, fmt): """Save data into disk.""" # Make sure the directory exists. os.makedirs(os.path.dirname(path), exist_ok=True) if fmt not in ["numpy", "torch"]: raise RuntimeError(f"Unsupported format: {fmt}") # Perform necessary conversion. if fmt == "numpy" and isinstance(data, torch.Tensor): data = data.cpu().numpy() elif fmt == "torch" and isinstance(data, np.ndarray): data = torch.from_numpy(data).cpu() # Save the data. if fmt == "numpy": if not data.flags["C_CONTIGUOUS"]: Warning( "The ndarray saved to disk is not contiguous, " "so it will be copied to contiguous memory." ) data = np.ascontiguousarray(data) numpy_save_aligned(path, data) elif fmt == "torch": if not data.is_contiguous(): Warning( "The tensor saved to disk is not contiguous, " "so it will be copied to contiguous memory." ) data = data.contiguous() torch.save(data, path) def get_npy_dim(npy_path): """Get the dim of numpy file.""" with open(npy_path, "rb") as f: # For the read_array_header API provided by numpy will only read the # length of the header, it will cause parsing failure and error if # first 8 bytes which contains magin string and version are not read # ahead of time. So, we need to make sure we have skipped these 8 # bytes. f.seek(8, 0) try: shape, _, _ = read_array_header_1_0(f) except ValueError: try: shape, _, _ = read_array_header_2_0(f) except ValueError: raise ValueError("Invalid file format") return len(shape) def _to_int32(data): if isinstance(data, torch.Tensor): return data.to(torch.int32) elif isinstance(data, np.ndarray): return data.astype(np.int32) else: raise TypeError( "Unsupported input type. Please provide a torch tensor or numpy array." ) def copy_or_convert_data( input_path, output_path, input_format, output_format="numpy", in_memory=True, is_feature=False, within_int32=False, ): """Copy or convert the data from input_path to output_path.""" assert ( output_format == "numpy" ), "The output format of the data should be numpy." os.makedirs(os.path.dirname(output_path), exist_ok=True) # We read the data always in case we need to cast its type. data = read_data(input_path, input_format, in_memory) if within_int32: data = _to_int32(data) if input_format == "numpy": # If dim of the data is 1, reshape it to n * 1 and save it to output_path. if is_feature and get_npy_dim(input_path) == 1: data = data.reshape(-1, 1) # If the data does not need to be modified, just copy the file. elif not within_int32 and data.numpy().flags["C_CONTIGUOUS"]: shutil.copyfile(input_path, output_path) return else: # If dim of the data is 1, reshape it to n * 1 and save it to output_path. if is_feature and data.dim() == 1: data = data.reshape(-1, 1) save_data(data, output_path, output_format) def read_edges(dataset_dir, edge_fmt, edge_path): """Read egde data from numpy or csv.""" assert edge_fmt in [ "numpy", "csv", ], f"`numpy` or `csv` is expected when reading edges but got `{edge_fmt}`." if edge_fmt == "numpy": edge_data = read_data( os.path.join(dataset_dir, edge_path), edge_fmt, ) assert ( edge_data.shape[0] == 2 and len(edge_data.shape) == 2 ), f"The shape of edges should be (2, N), but got {edge_data.shape}." src, dst = edge_data.numpy() else: edge_data = pd.read_csv( os.path.join(dataset_dir, edge_path), names=["src", "dst"], ) src, dst = edge_data["src"].to_numpy(), edge_data["dst"].to_numpy() return (src, dst) def calculate_file_hash(file_path, hash_algo="md5"): """Calculate the hash value of a file.""" hash_algos = ["md5", "sha1", "sha224", "sha256", "sha384", "sha512"] if hash_algo in hash_algos: hash_obj = getattr(hashlib, hash_algo)() else: raise ValueError( f"Hash algorithm must be one of: {hash_algos}, but got `{hash_algo}`." ) with open(file_path, "rb") as file: for chunk in iter(lambda: file.read(4096), b""): hash_obj.update(chunk) return hash_obj.hexdigest() def calculate_dir_hash( dir_path, hash_algo="md5", ignore: Union[str, List[str]] = None ): """Calculte the hash values of all files under the directory.""" hashes = {} for dirpath, _, filenames in os.walk(dir_path): for filename in filenames: if ignore and filename in ignore: continue filepath = os.path.join(dirpath, filename) file_hash = calculate_file_hash(filepath, hash_algo=hash_algo) hashes[filepath] = file_hash return hashes def check_dataset_change(dataset_dir, processed_dir): """Check whether dataset has been changed by checking its hash value.""" hash_value_file = "dataset_hash_value.txt" hash_value_file_path = os.path.join( dataset_dir, processed_dir, hash_value_file ) if not os.path.exists(hash_value_file_path): return True with open(hash_value_file_path, "r") as f: oringinal_hash_value = json.load(f) present_hash_value = calculate_dir_hash(dataset_dir, ignore=hash_value_file) if oringinal_hash_value == present_hash_value: force_preprocess = False else: force_preprocess = True return force_preprocess ================================================ FILE: python/dgl/graphbolt/internal_utils.py ================================================ """Miscallenous internal utils.""" import functools import hashlib import os import platform import warnings from collections.abc import Mapping, Sequence import requests import torch from tqdm.auto import tqdm try: from packaging import version # pylint: disable=unused-import except ImportError: # If packaging isn't installed, try and use the vendored copy in setuptools from setuptools.extern.packaging import version @functools.lru_cache(maxsize=None) def is_wsl(v: str = platform.uname().release) -> int: """Detects if Python is running in WSL""" if v.endswith("-Microsoft"): return 1 elif v.endswith("microsoft-standard-WSL2"): return 2 return 0 # pylint: disable=invalid-name _default_formatwarning = warnings.formatwarning def built_with_cuda(): """Returns whether GraphBolt was built with CUDA support.""" # This op is defined if graphbolt is built with CUDA support. return hasattr(torch.ops.graphbolt, "set_max_uva_threads") class GBWarning(UserWarning): """GraphBolt Warning class.""" # pylint: disable=unused-argument def gb_warning_format(message, category, filename, lineno, line=None): """Format GraphBolt warnings.""" if isinstance(category, GBWarning): return "GraphBolt Warning: {}\n".format(message) else: return _default_formatwarning( message, category, filename, lineno, line=None ) def gb_warning(message, category=GBWarning, stacklevel=2): """GraphBolt warning wrapper that defaults to ``GBWarning`` instead of ``UserWarning`` category. """ return warnings.warn(message, category=category, stacklevel=stacklevel) warnings.formatwarning = gb_warning_format def is_listlike(data): """Return if the data is a sequence but not a string.""" return isinstance(data, Sequence) and not isinstance(data, str) def recursive_apply(data, fn, *args, **kwargs): """Recursively apply a function to every element in a container. If the input data is a list or any sequence other than a string, returns a list whose elements are the same elements applied with the given function. If the input data is a dict or any mapping, returns a dict whose keys are the same and values are the elements applied with the given function. If the input data is a nested container, the result will have the same nested structure where each element is transformed recursively. The first argument of the function will be passed with the individual elements from the input data, followed by the arguments in :attr:`args` and :attr:`kwargs`. Parameters ---------- data : any Any object. fn : callable Any function. args, kwargs : Additional arguments and keyword-arguments passed to the function. Examples -------- Applying a ReLU function to a dictionary of tensors: >>> h = {k: torch.randn(3) for k in ['A', 'B', 'C']} >>> h = recursive_apply(h, torch.nn.functional.relu) >>> assert all((v >= 0).all() for v in h.values()) """ if isinstance(data, Mapping): return { k: recursive_apply(v, fn, *args, **kwargs) for k, v in data.items() } elif isinstance(data, tuple): return tuple(recursive_apply(v, fn, *args, **kwargs) for v in data) elif is_listlike(data): return [recursive_apply(v, fn, *args, **kwargs) for v in data] else: return fn(data, *args, **kwargs) def recursive_apply_reduce_all(data, fn, *args, **kwargs): """Recursively apply a function to every element in a container and reduce the boolean results with all. If the input data is a list or any sequence other than a string, returns True if and only if the given function returns True for all elements. If the input data is a dict or any mapping, returns True if and only if the given function returns True for values. If the input data is a nested container, the result will be reduced over the nested structure where each element is tested recursively. The first argument of the function will be passed with the individual elements from the input data, followed by the arguments in :attr:`args` and :attr:`kwargs`. Parameters ---------- data : any Any object. fn : callable Any function returning a boolean. args, kwargs : Additional arguments and keyword-arguments passed to the function. """ if isinstance(data, Mapping): return all( recursive_apply_reduce_all(v, fn, *args, **kwargs) for v in data.values() ) elif isinstance(data, tuple) or is_listlike(data): return all( recursive_apply_reduce_all(v, fn, *args, **kwargs) for v in data ) else: return fn(data, *args, **kwargs) def get_nonproperty_attributes(_obj) -> list: """Get attributes of the class except for the properties.""" attributes = [ attribute for attribute in dir(_obj) if not attribute.startswith("__") and ( not hasattr(type(_obj), attribute) or not isinstance(getattr(type(_obj), attribute), property) ) and not callable(getattr(_obj, attribute)) ] return attributes def get_attributes(_obj) -> list: """Get attributes of the class.""" attributes = [ attribute for attribute in dir(_obj) if not attribute.startswith("__") and not callable(getattr(_obj, attribute)) ] return attributes def download( url, path=None, overwrite=True, sha1_hash=None, retries=5, verify_ssl=True, log=True, ): """Download a given URL. Codes borrowed from mxnet/gluon/utils.py Parameters ---------- url : str URL to download. path : str, optional Destination path to store downloaded file. By default stores to the current directory with the same name as in url. overwrite : bool, optional Whether to overwrite the destination file if it already exists. By default always overwrites the downloaded file. sha1_hash : str, optional Expected sha1 hash in hexadecimal digits. Will ignore existing file when hash is specified but doesn't match. retries : integer, default 5 The number of times to attempt downloading in case of failure or non 200 return codes. verify_ssl : bool, default True Verify SSL certificates. log : bool, default True Whether to print the progress for download Returns ------- str The file path of the downloaded file. """ if path is None: fname = url.split("/")[-1] # Empty filenames are invalid assert fname, ( "Can't construct file-name from this URL. " "Please set the `path` option manually." ) else: path = os.path.expanduser(path) if os.path.isdir(path): fname = os.path.join(path, url.split("/")[-1]) else: fname = path assert retries >= 0, "Number of retries should be at least 0" if not verify_ssl: warnings.warn( "Unverified HTTPS request is being made (verify_ssl=False). " "Adding certificate verification is strongly advised." ) if ( overwrite or not os.path.exists(fname) or (sha1_hash and not check_sha1(fname, sha1_hash)) ): dirname = os.path.dirname(os.path.abspath(os.path.expanduser(fname))) if not os.path.exists(dirname): os.makedirs(dirname) while retries + 1 > 0: # Disable pyling too broad Exception # pylint: disable=W0703 try: if log: print("Downloading %s from %s..." % (fname, url)) r = requests.get(url, stream=True, verify=verify_ssl) if r.status_code != 200: raise RuntimeError("Failed downloading url %s" % url) # Get the total file size. total_size = int(r.headers.get("content-length", 0)) with tqdm( total=total_size, unit="B", unit_scale=True, desc=fname ) as progress_bar: with open(fname, "wb") as f: for chunk in r.iter_content(chunk_size=1024): if chunk: # filter out keep-alive new chunks f.write(chunk) progress_bar.update(len(chunk)) if sha1_hash and not check_sha1(fname, sha1_hash): raise UserWarning( "File {} is downloaded but the content hash does not match." " The repo may be outdated or download may be incomplete. " 'If the "repo_url" is overridden, consider switching to ' "the default repo.".format(fname) ) break except Exception as e: retries -= 1 if retries <= 0: raise e if log: print( "download failed, retrying, {} attempt{} left".format( retries, "s" if retries > 1 else "" ) ) return fname def check_sha1(filename, sha1_hash): """Check whether the sha1 hash of the file content matches the expected hash. Codes borrowed from mxnet/gluon/utils.py Parameters ---------- filename : str Path to the file. sha1_hash : str Expected sha1 hash in hexadecimal digits. Returns ------- bool Whether the file content matches the expected hash. """ sha1 = hashlib.sha1() with open(filename, "rb") as f: while True: data = f.read(1048576) if not data: break sha1.update(data) return sha1.hexdigest() == sha1_hash def extract_archive(file, target_dir, overwrite=True): """Extract archive file. Parameters ---------- file : str Absolute path of the archive file. target_dir : str Target directory of the archive to be uncompressed. overwrite : bool, default True Whether to overwrite the contents inside the directory. By default always overwrites. """ if os.path.exists(target_dir) and not overwrite: return print("Extracting file to {}".format(target_dir)) if ( file.endswith(".tar.gz") or file.endswith(".tar") or file.endswith(".tgz") ): import tarfile with tarfile.open(file, "r") as archive: def is_within_directory(directory, target): abs_directory = os.path.abspath(directory) abs_target = os.path.abspath(target) prefix = os.path.commonprefix([abs_directory, abs_target]) return prefix == abs_directory def safe_extract( tar, path=".", members=None, *, numeric_owner=False ): for member in tar.getmembers(): member_path = os.path.join(path, member.name) if not is_within_directory(path, member_path): raise Exception("Attempted Path Traversal in Tar File") tar.extractall(path, members, numeric_owner=numeric_owner) safe_extract(archive, path=target_dir) elif file.endswith(".gz"): import gzip import shutil with gzip.open(file, "rb") as f_in: target_file = os.path.join(target_dir, os.path.basename(file)[:-3]) with open(target_file, "wb") as f_out: shutil.copyfileobj(f_in, f_out) elif file.endswith(".zip"): import zipfile with zipfile.ZipFile(file, "r") as archive: archive.extractall(path=target_dir) else: raise Exception("Unrecognized file type: " + file) ================================================ FILE: python/dgl/graphbolt/item_sampler.py ================================================ """Item Sampler""" from collections.abc import Mapping from typing import Callable, Iterator, Optional, Union import numpy as np import torch import torch.distributed as dist from torch.utils.data import IterDataPipe from .internal import calculate_range from .internal_utils import gb_warning from .itemset import HeteroItemSet, ItemSet from .minibatch import MiniBatch __all__ = ["ItemSampler", "DistributedItemSampler", "minibatcher_default"] def minibatcher_default(batch, names): """Default minibatcher which maps a list of items to a `MiniBatch` with the same names as the items. The names of items are supposed to be provided and align with the data attributes of `MiniBatch`. If any unknown item name is provided, exception will be raised. If the names of items are not provided, the item list is returned as is and a warning will be raised. Parameters ---------- batch : list List of items. names : Tuple[str] or None Names of items in `batch` with same length. The order should align with `batch`. Returns ------- MiniBatch A minibatch. """ if names is None: gb_warning( "Failed to map item list to `MiniBatch` as the names of items are " "not provided. Please provide a customized `MiniBatcher`. " "The item list is returned as is." ) return batch if len(names) == 1: # Handle the case of single item: batch = tensor([0, 1, 2, 3]), names = # ("seeds",) as `zip(batch, names)` will iterate over the tensor # instead of the batch. init_data = {names[0]: batch} else: if isinstance(batch, Mapping): init_data = { name: {k: v[i] for k, v in batch.items()} for i, name in enumerate(names) } else: init_data = {name: item for item, name in zip(batch, names)} minibatch = MiniBatch() # TODO(#7254): Hacks for original `seed_nodes` and `node_pairs`, which need # to be cleaned up later. if "node_pairs" in names: pos_seeds = init_data["node_pairs"] # Build negative graph. if "negative_srcs" in names and "negative_dsts" in names: neg_srcs = init_data["negative_srcs"] neg_dsts = init_data["negative_dsts"] ( init_data["seeds"], init_data["labels"], init_data["indexes"], ) = _construct_seeds( pos_seeds, neg_srcs=neg_srcs, neg_dsts=neg_dsts ) elif "negative_srcs" in names: neg_srcs = init_data["negative_srcs"] ( init_data["seeds"], init_data["labels"], init_data["indexes"], ) = _construct_seeds(pos_seeds, neg_srcs=neg_srcs) elif "negative_dsts" in names: neg_dsts = init_data["negative_dsts"] ( init_data["seeds"], init_data["labels"], init_data["indexes"], ) = _construct_seeds(pos_seeds, neg_dsts=neg_dsts) else: init_data["seeds"] = pos_seeds for name, item in init_data.items(): if not hasattr(minibatch, name): gb_warning( f"Unknown item name '{name}' is detected and added into " "`MiniBatch`. You probably need to provide a customized " "`MiniBatcher`." ) # TODO(#7254): Hacks for original `seed_nodes` and `node_pairs`, which # need to be cleaned up later. if name == "seed_nodes": name = "seeds" if name in ("node_pairs", "negative_srcs", "negative_dsts"): continue setattr(minibatch, name, item) return minibatch class ItemSampler(IterDataPipe): """A sampler to iterate over input items and create minibatches. Input items could be node IDs, node pairs with or without labels, node pairs with negative sources/destinations. Note: This class `ItemSampler` is not decorated with `torch.utils.data.functional_datapipe` on purpose. This indicates it does not support function-like call. But any iterable datapipes from `torch.utils.data.datapipes` can be further appended. Parameters ---------- item_set : Union[ItemSet, HeteroItemSet] Data to be sampled. batch_size : int The size of each batch. minibatcher : Optional[Callable] A callable that takes in a list of items and returns a `MiniBatch`. drop_last : bool Option to drop the last batch if it's not full. shuffle : bool Option to shuffle before sample. seed: int The seed for reproducible stochastic shuffling. If None, a random seed will be generated. Examples -------- 1. Node IDs. >>> import torch >>> from dgl import graphbolt as gb >>> item_set = gb.ItemSet(torch.arange(0, 10), names="seeds") >>> item_sampler = gb.ItemSampler( ... item_set, batch_size=4, shuffle=False, drop_last=False ... ) >>> next(iter(item_sampler)) MiniBatch(seeds=tensor([0, 1, 2, 3]), sampled_subgraphs=None, node_features=None, labels=None, input_nodes=None, indexes=None, edge_features=None, compacted_seeds=None, blocks=None,) 2. Node pairs. >>> item_set = gb.ItemSet(torch.arange(0, 20).reshape(-1, 2), ... names="seeds") >>> item_sampler = gb.ItemSampler( ... item_set, batch_size=4, shuffle=False, drop_last=False ... ) >>> next(iter(item_sampler)) MiniBatch(seeds=tensor([[0, 1], [2, 3], [4, 5], [6, 7]]), sampled_subgraphs=None, node_features=None, labels=None, input_nodes=None, indexes=None, edge_features=None, compacted_seeds=None, blocks=None,) 3. Node pairs and labels. >>> item_set = gb.ItemSet( ... (torch.arange(0, 20).reshape(-1, 2), torch.arange(10, 20)), ... names=("seeds", "labels") ... ) >>> item_sampler = gb.ItemSampler( ... item_set, batch_size=4, shuffle=False, drop_last=False ... ) >>> next(iter(item_sampler)) MiniBatch(seeds=tensor([[0, 1], [2, 3], [4, 5], [6, 7]]), sampled_subgraphs=None, node_features=None, labels=tensor([10, 11, 12, 13]), input_nodes=None, indexes=None, edge_features=None, compacted_seeds=None, blocks=None,) 4. Node pairs, labels and indexes. >>> seeds = torch.arange(0, 20).reshape(-1, 2) >>> labels = torch.tensor([1, 1, 0, 0, 0, 0, 0, 0, 0, 0]) >>> indexes = torch.tensor([0, 1, 0, 0, 0, 0, 1, 1, 1, 1]) >>> item_set = gb.ItemSet((seeds, labels, indexes), names=("seeds", ... "labels", "indexes")) >>> item_sampler = gb.ItemSampler( ... item_set, batch_size=4, shuffle=False, drop_last=False ... ) >>> next(iter(item_sampler)) MiniBatch(seeds=tensor([[0, 1], [2, 3], [4, 5], [6, 7]]), sampled_subgraphs=None, node_features=None, labels=tensor([1, 1, 0, 0]), input_nodes=None, indexes=tensor([0, 1, 0, 0]), edge_features=None, compacted_seeds=None, blocks=None,) 5. Further process batches with other datapipes such as :class:`torch.utils.data.datapipes.iter.Mapper`. >>> item_set = gb.ItemSet(torch.arange(0, 10)) >>> data_pipe = gb.ItemSampler(item_set, 4) >>> def add_one(batch): ... return batch + 1 >>> data_pipe = data_pipe.map(add_one) >>> list(data_pipe) [tensor([1, 2, 3, 4]), tensor([5, 6, 7, 8]), tensor([ 9, 10])] 6. Heterogeneous node IDs. >>> ids = { ... "user": gb.ItemSet(torch.arange(0, 5), names="seeds"), ... "item": gb.ItemSet(torch.arange(0, 6), names="seeds"), ... } >>> item_set = gb.HeteroItemSet(ids) >>> item_sampler = gb.ItemSampler(item_set, batch_size=4) >>> next(iter(item_sampler)) MiniBatch(seeds={'user': tensor([0, 1, 2, 3])}, sampled_subgraphs=None, node_features=None, labels=None, input_nodes=None, indexes=None, edge_features=None, compacted_seeds=None, blocks=None,) 7. Heterogeneous node pairs. >>> seeds_like = torch.arange(0, 10).reshape(-1, 2) >>> seeds_follow = torch.arange(10, 20).reshape(-1, 2) >>> item_set = gb.HeteroItemSet({ ... "user:like:item": gb.ItemSet( ... seeds_like, names="seeds"), ... "user:follow:user": gb.ItemSet( ... seeds_follow, names="seeds"), ... }) >>> item_sampler = gb.ItemSampler(item_set, batch_size=4) >>> next(iter(item_sampler)) MiniBatch(seeds={'user:like:item': tensor([[0, 1], [2, 3], [4, 5], [6, 7]])}, sampled_subgraphs=None, node_features=None, labels=None, input_nodes=None, indexes=None, edge_features=None, compacted_seeds=None, blocks=None,) 8. Heterogeneous node pairs and labels. >>> seeds_like = torch.arange(0, 10).reshape(-1, 2) >>> labels_like = torch.arange(0, 5) >>> seeds_follow = torch.arange(10, 20).reshape(-1, 2) >>> labels_follow = torch.arange(5, 10) >>> item_set = gb.HeteroItemSet({ ... "user:like:item": gb.ItemSet((seeds_like, labels_like), ... names=("seeds", "labels")), ... "user:follow:user": gb.ItemSet((seeds_follow, labels_follow), ... names=("seeds", "labels")), ... }) >>> item_sampler = gb.ItemSampler(item_set, batch_size=4) >>> next(iter(item_sampler)) MiniBatch(seeds={'user:like:item': tensor([[0, 1], [2, 3], [4, 5], [6, 7]])}, sampled_subgraphs=None, node_features=None, labels={'user:like:item': tensor([0, 1, 2, 3])}, input_nodes=None, indexes=None, edge_features=None, compacted_seeds=None, blocks=None,) 9. Heterogeneous node pairs, labels and indexes. >>> seeds_like = torch.arange(0, 10).reshape(-1, 2) >>> labels_like = torch.tensor([1, 1, 0, 0, 0]) >>> indexes_like = torch.tensor([0, 1, 0, 0, 1]) >>> seeds_follow = torch.arange(20, 30).reshape(-1, 2) >>> labels_follow = torch.tensor([1, 1, 0, 0, 0]) >>> indexes_follow = torch.tensor([0, 1, 0, 0, 1]) >>> item_set = gb.HeteroItemSet({ ... "user:like:item": gb.ItemSet((seeds_like, labels_like, ... indexes_like), names=("seeds", "labels", "indexes")), ... "user:follow:user": gb.ItemSet((seeds_follow,labels_follow, ... indexes_follow), names=("seeds", "labels", "indexes")), ... }) >>> item_sampler = gb.ItemSampler(item_set, batch_size=4) >>> next(iter(item_sampler)) MiniBatch(seeds={'user:like:item': tensor([[0, 1], [2, 3], [4, 5], [6, 7]])}, sampled_subgraphs=None, node_features=None, labels={'user:like:item': tensor([1, 1, 0, 0])}, input_nodes=None, indexes={'user:like:item': tensor([0, 1, 0, 0])}, edge_features=None, compacted_seeds=None, blocks=None,) """ def __init__( self, item_set: Union[ItemSet, HeteroItemSet], batch_size: int, minibatcher: Optional[Callable] = minibatcher_default, drop_last: Optional[bool] = False, shuffle: Optional[bool] = False, seed: Optional[int] = None, ) -> None: super().__init__() self._item_set = item_set self._names = item_set.names self._batch_size = batch_size self._minibatcher = minibatcher self._drop_last = drop_last self._shuffle = shuffle self._distributed = False self._drop_uneven_inputs = False self._world_size = None self._rank = None # For the sake of reproducibility, the seed should be allowed to be # manually set by the user. if seed is None: self._seed = np.random.randint(0, np.iinfo(np.int32).max) else: self._seed = seed # The attribute `self._epoch` is added to make shuffling work properly # across multiple epochs. Otherwise, the same ordering will always be # used in every epoch. self._epoch = 0 def __iter__(self) -> Iterator: worker_info = torch.utils.data.get_worker_info() if worker_info is not None: num_workers = worker_info.num_workers worker_id = worker_info.id else: num_workers = 1 worker_id = 0 total = len(self._item_set) start_offset, assigned_count, output_count = calculate_range( self._distributed, total, self._world_size, self._rank, num_workers, worker_id, self._batch_size, self._drop_last, self._drop_uneven_inputs, ) if self._shuffle: g = torch.Generator().manual_seed(self._seed + self._epoch) permutation = torch.randperm(total, generator=g) indices = permutation[start_offset : start_offset + assigned_count] else: indices = torch.arange(start_offset, start_offset + assigned_count) for i in range(0, assigned_count, self._batch_size): if output_count <= 0: break yield self._minibatcher( self._item_set[ indices[i : i + min(self._batch_size, output_count)] ], self._names, ) output_count -= self._batch_size self._epoch += 1 class DistributedItemSampler(ItemSampler): """A sampler to iterate over input items and create subsets distributedly. This sampler creates a distributed subset of items from the given data set, which can be used for training with PyTorch's Distributed Data Parallel (DDP). The items can be node IDs, node pairs with or without labels, node pairs with negative sources/destinations, DGLGraphs, or heterogeneous counterparts. The original item set is split such that each replica (process) receives an exclusive subset. Note: The items will be first split onto each replica, then get shuffled (if needed) and batched. Therefore, each replica will always get a same set of items. Note: This class `DistributedItemSampler` is not decorated with `torch.utils.data.functional_datapipe` on purpose. This indicates it does not support function-like call. But any iterable datapipes from `torch.utils.data.datapipes` can be further appended. Parameters ---------- item_set : Union[ItemSet, HeteroItemSet] Data to be sampled. batch_size : int The size of each batch. minibatcher : Optional[Callable] A callable that takes in a list of items and returns a `MiniBatch`. drop_last : bool Option to drop the last batch if it's not full. shuffle : bool Option to shuffle before sample. num_replicas: int The number of model replicas that will be created during Distributed Data Parallel (DDP) training. It should be the same as the real world size, otherwise it could cause errors. By default, it is retrieved from the current distributed group. drop_uneven_inputs : bool Option to make sure the numbers of batches for each replica are the same. If some of the replicas have more batches than the others, the redundant batches of those replicas will be dropped. If the drop_last parameter is also set to True, the last batch will be dropped before the redundant batches are dropped. Note: When using Distributed Data Parallel (DDP) training, the program may hang or error if the a replica has fewer inputs. It is recommended to use the Join Context Manager provided by PyTorch to solve this problem. Please refer to https://pytorch.org/tutorials/advanced/generic_join.html. However, this option can be used if the Join Context Manager is not helpful for any reason. seed: int The seed for reproducible stochastic shuffling. If None, a random seed will be generated. Examples -------- 0. Preparation: DistributedItemSampler needs multi-processing environment to work. You need to spawn subprocesses and initialize processing group before executing following examples. Due to randomness, the output is not always the same as listed below. >>> import torch >>> from dgl import graphbolt as gb >>> item_set = gb.ItemSet(torch.arange(15)) >>> num_replicas = 4 >>> batch_size = 2 >>> mp.spawn(...) 1. shuffle = False, drop_last = False, drop_uneven_inputs = False. >>> item_sampler = gb.DistributedItemSampler( >>> item_set, batch_size=2, shuffle=False, drop_last=False, >>> drop_uneven_inputs=False >>> ) >>> data_loader = gb.DataLoader(item_sampler) >>> print(f"Replica#{proc_id}: {list(data_loader)}) Replica#0: [tensor([0, 1]), tensor([2, 3])] Replica#1: [tensor([4, 5]), tensor([6, 7])] Replica#2: [tensor([8, 9]), tensor([10, 11])] Replica#3: [tensor([12, 13]), tensor([14])] 2. shuffle = False, drop_last = True, drop_uneven_inputs = False. >>> item_sampler = gb.DistributedItemSampler( >>> item_set, batch_size=2, shuffle=False, drop_last=True, >>> drop_uneven_inputs=False >>> ) >>> data_loader = gb.DataLoader(item_sampler) >>> print(f"Replica#{proc_id}: {list(data_loader)}) Replica#0: [tensor([0, 1]), tensor([2, 3])] Replica#1: [tensor([4, 5]), tensor([6, 7])] Replica#2: [tensor([8, 9]), tensor([10, 11])] Replica#3: [tensor([12, 13])] 3. shuffle = False, drop_last = False, drop_uneven_inputs = True. >>> item_sampler = gb.DistributedItemSampler( >>> item_set, batch_size=2, shuffle=False, drop_last=False, >>> drop_uneven_inputs=True >>> ) >>> data_loader = gb.DataLoader(item_sampler) >>> print(f"Replica#{proc_id}: {list(data_loader)}) Replica#0: [tensor([0, 1]), tensor([2, 3])] Replica#1: [tensor([4, 5]), tensor([6, 7])] Replica#2: [tensor([8, 9]), tensor([10, 11])] Replica#3: [tensor([12, 13]), tensor([14])] 4. shuffle = False, drop_last = True, drop_uneven_inputs = True. >>> item_sampler = gb.DistributedItemSampler( >>> item_set, batch_size=2, shuffle=False, drop_last=True, >>> drop_uneven_inputs=True >>> ) >>> data_loader = gb.DataLoader(item_sampler) >>> print(f"Replica#{proc_id}: {list(data_loader)}) Replica#0: [tensor([0, 1])] Replica#1: [tensor([4, 5])] Replica#2: [tensor([8, 9])] Replica#3: [tensor([12, 13])] 5. shuffle = True, drop_last = True, drop_uneven_inputs = False. >>> item_sampler = gb.DistributedItemSampler( >>> item_set, batch_size=2, shuffle=True, drop_last=True, >>> drop_uneven_inputs=False >>> ) >>> data_loader = gb.DataLoader(item_sampler) >>> print(f"Replica#{proc_id}: {list(data_loader)}) (One possible output:) Replica#0: [tensor([3, 2]), tensor([0, 1])] Replica#1: [tensor([6, 5]), tensor([7, 4])] Replica#2: [tensor([8, 10])] Replica#3: [tensor([14, 12])] 6. shuffle = True, drop_last = True, drop_uneven_inputs = True. >>> item_sampler = gb.DistributedItemSampler( >>> item_set, batch_size=2, shuffle=True, drop_last=True, >>> drop_uneven_inputs=True >>> ) >>> data_loader = gb.DataLoader(item_sampler) >>> print(f"Replica#{proc_id}: {list(data_loader)}) (One possible output:) Replica#0: [tensor([1, 3])] Replica#1: [tensor([7, 5])] Replica#2: [tensor([11, 9])] Replica#3: [tensor([13, 14])] """ def __init__( self, item_set: Union[ItemSet, HeteroItemSet], batch_size: int, minibatcher: Optional[Callable] = minibatcher_default, drop_last: Optional[bool] = False, shuffle: Optional[bool] = False, drop_uneven_inputs: Optional[bool] = False, seed: Optional[int] = None, ) -> None: super().__init__( item_set, batch_size, minibatcher, drop_last, shuffle, seed, ) self._distributed = True self._drop_uneven_inputs = drop_uneven_inputs if not dist.is_available(): raise RuntimeError( "Distributed item sampler requires distributed package." ) self._world_size = dist.get_world_size() self._rank = dist.get_rank() if self._world_size > 1: # For the sake of reproducibility, the seed should be allowed to be # manually set by the user. self._align_seeds(src=0, seed=seed) def _align_seeds( self, src: Optional[int] = 0, seed: Optional[int] = None ) -> None: """Aligns seeds across distributed processes. This method synchronizes seeds across distributed processes, ensuring consistent randomness. Parameters ---------- src: int, optional The source process rank. Defaults to 0. seed: int, optional The seed value to synchronize. If None, a random seed will be generated. Defaults to None. """ device = ( torch.cuda.current_device() if torch.cuda.is_available() and dist.get_backend() == "nccl" else "cpu" ) if seed is None: seed = np.random.randint(0, np.iinfo(np.int32).max) if self._rank == src: seed_tensor = torch.tensor(seed, dtype=torch.int32, device=device) else: seed_tensor = torch.empty([], dtype=torch.int32, device=device) dist.broadcast(seed_tensor, src=src) self._seed = seed_tensor.item() def _construct_seeds(pos_seeds, neg_srcs=None, neg_dsts=None): # For homogeneous graph. if isinstance(pos_seeds, torch.Tensor): negative_ratio = neg_srcs.size(1) if neg_srcs else neg_dsts.size(1) neg_srcs = ( neg_srcs if neg_srcs is not None else pos_seeds[:, 0].repeat_interleave(negative_ratio) ).view(-1) neg_dsts = ( neg_dsts if neg_dsts is not None else pos_seeds[:, 1].repeat_interleave(negative_ratio) ).view(-1) neg_seeds = torch.cat((neg_srcs, neg_dsts)).view(2, -1).T seeds = torch.cat((pos_seeds, neg_seeds)) pos_seeds_num = pos_seeds.size(0) labels = torch.empty(seeds.size(0), device=pos_seeds.device) labels[:pos_seeds_num] = 1 labels[pos_seeds_num:] = 0 pos_indexes = torch.arange( 0, pos_seeds_num, device=pos_seeds.device, ) neg_indexes = pos_indexes.repeat_interleave(negative_ratio) indexes = torch.cat((pos_indexes, neg_indexes)) # For heterogeneous graph. else: negative_ratio = ( list(neg_srcs.values())[0].size(1) if neg_srcs else list(neg_dsts.values())[0].size(1) ) seeds = {} labels = {} indexes = {} for etype in pos_seeds: neg_src = ( neg_srcs[etype] if neg_srcs is not None else pos_seeds[etype][:, 0].repeat_interleave(negative_ratio) ).view(-1) neg_dst = ( neg_dsts[etype] if neg_dsts is not None else pos_seeds[etype][:, 1].repeat_interleave(negative_ratio) ).view(-1) seeds[etype] = torch.cat( ( pos_seeds[etype], torch.cat( ( neg_src, neg_dst, ) ) .view(2, -1) .T, ) ) pos_seeds_num = pos_seeds[etype].size(0) labels[etype] = torch.empty( seeds[etype].size(0), device=pos_seeds[etype].device ) labels[etype][:pos_seeds_num] = 1 labels[etype][pos_seeds_num:] = 0 pos_indexes = torch.arange( 0, pos_seeds_num, device=pos_seeds[etype].device, ) neg_indexes = pos_indexes.repeat_interleave(negative_ratio) indexes[etype] = torch.cat((pos_indexes, neg_indexes)) return seeds, labels, indexes ================================================ FILE: python/dgl/graphbolt/itemset.py ================================================ """GraphBolt Itemset.""" import textwrap from typing import Dict, Iterable, Tuple, Union import torch from .internal_utils import gb_warning __all__ = ["ItemSet", "HeteroItemSet", "ItemSetDict"] def is_scalar(x): """Checks if the input is a scalar.""" return ( len(x.shape) == 0 if isinstance(x, torch.Tensor) else isinstance(x, int) ) class ItemSet: r"""A wrapper of a tensor or tuple of tensors. Parameters ---------- items: Union[int, torch.Tensor, Tuple[torch.Tensor]] The tensors to be wrapped. - If it is a single scalar (an integer or a tensor that holds a single value), the item would be considered as a range_tensor created by `torch.arange`. - If it is a multi-dimensional tensor, the indexing will be performed along the first dimension. - If it is a tuple, each item in the tuple must be a tensor. names: Union[str, Tuple[str]], optional The names of the items. If it is a tuple, each name must corresponds to an item in the `items` parameter. The naming is arbitrary, but in general practice, the names should be chosen from ['labels', 'seeds', 'indexes'] to align with the attributes of class `dgl.graphbolt.MiniBatch`. Examples -------- >>> import torch >>> from dgl import graphbolt as gb 1. Integer: number of nodes. >>> num = 10 >>> item_set = gb.ItemSet(num, names="seeds") >>> list(item_set) [tensor(0), tensor(1), tensor(2), tensor(3), tensor(4), tensor(5), tensor(6), tensor(7), tensor(8), tensor(9)] >>> item_set[:] tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) >>> item_set.names ('seeds',) 2. Torch scalar: number of nodes. Customizable dtype compared to Integer. >>> num = torch.tensor(10, dtype=torch.int32) >>> item_set = gb.ItemSet(num, names="seeds") >>> list(item_set) [tensor(0, dtype=torch.int32), tensor(1, dtype=torch.int32), tensor(2, dtype=torch.int32), tensor(3, dtype=torch.int32), tensor(4, dtype=torch.int32), tensor(5, dtype=torch.int32), tensor(6, dtype=torch.int32), tensor(7, dtype=torch.int32), tensor(8, dtype=torch.int32), tensor(9, dtype=torch.int32)] >>> item_set[:] tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=torch.int32) >>> item_set.names ('seeds',) 3. Single tensor: seed nodes. >>> node_ids = torch.arange(0, 5) >>> item_set = gb.ItemSet(node_ids, names="seeds") >>> list(item_set) [tensor(0), tensor(1), tensor(2), tensor(3), tensor(4)] >>> item_set[:] tensor([0, 1, 2, 3, 4]) >>> item_set.names ('seeds',) 4. Tuple of tensors with same shape: seed nodes and labels. >>> node_ids = torch.arange(0, 5) >>> labels = torch.arange(5, 10) >>> item_set = gb.ItemSet( ... (node_ids, labels), names=("seeds", "labels")) >>> list(item_set) [(tensor(0), tensor(5)), (tensor(1), tensor(6)), (tensor(2), tensor(7)), (tensor(3), tensor(8)), (tensor(4), tensor(9))] >>> item_set[:] (tensor([0, 1, 2, 3, 4]), tensor([5, 6, 7, 8, 9])) >>> item_set.names ('seeds', 'labels') 5. Tuple of tensors with different shape: seeds and labels. >>> seeds = torch.arange(0, 10).reshape(-1, 2) >>> labels = torch.tensor([1, 1, 0, 0, 0]) >>> item_set = gb.ItemSet( ... (seeds, labels), names=("seeds", "lables")) >>> list(item_set) [(tensor([0, 1]), tensor([1])), (tensor([2, 3]), tensor([1])), (tensor([4, 5]), tensor([0])), (tensor([6, 7]), tensor([0])), (tensor([8, 9]), tensor([0]))] >>> item_set[:] (tensor([[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]]), tensor([1, 1, 0, 0, 0])) >>> item_set.names ('seeds', 'labels') 6. Tuple of tensors with different shape: hyperlink and labels. >>> seeds = torch.arange(0, 10).reshape(-1, 5) >>> labels = torch.tensor([1, 0]) >>> item_set = gb.ItemSet( ... (seeds, labels), names=("seeds", "lables")) >>> list(item_set) [(tensor([0, 1, 2, 3, 4]), tensor([1])), (tensor([5, 6, 7, 8, 9]), tensor([0]))] >>> item_set[:] (tensor([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]), tensor([1, 0])) >>> item_set.names ('seeds', 'labels') """ def __init__( self, items: Union[int, torch.Tensor, Tuple[torch.Tensor]], names: Union[str, Tuple[str]] = None, ) -> None: if is_scalar(items): self._length = int(items) self._items = items elif isinstance(items, tuple): self._length = len(items[0]) if any(self._length != len(item) for item in items): raise ValueError("Size mismatch between items.") self._items = items else: self._length = len(items) self._items = (items,) self._num_items = ( len(self._items) if isinstance(self._items, tuple) else 1 ) if names is not None: if isinstance(names, tuple): self._names = names else: self._names = (names,) assert self._num_items == len(self._names), ( f"Number of items ({self._num_items}) and " f"names ({len(self._names)}) don't match." ) else: self._names = None def __len__(self) -> int: return self._length def __getitem__(self, index: Union[int, slice, Iterable[int]]): if is_scalar(self._items): dtype = getattr(self._items, "dtype", torch.int64) if isinstance(index, slice): start, stop, step = index.indices(self._length) return torch.arange(start, stop, step, dtype=dtype) elif isinstance(index, int): if index < 0: index += self._length if index < 0 or index >= self._length: raise IndexError( f"{type(self).__name__} index out of range." ) return torch.tensor(index, dtype=dtype) elif isinstance(index, torch.Tensor): return index.to(dtype) else: raise TypeError( f"{type(self).__name__} indices must be int, slice, or " f"torch.Tensor, not {type(index)}." ) elif self._num_items == 1: return self._items[0][index] else: return tuple(item[index] for item in self._items) @property def names(self) -> Tuple[str]: """Return the names of the items.""" return self._names @property def num_items(self) -> int: """Return the number of the items.""" return self._num_items def __repr__(self) -> str: ret = ( f"{self.__class__.__name__}(\n" f" items={self._items},\n" f" names={self._names},\n" f")" ) return ret class HeteroItemSet: r"""A collection of itemsets, each associated with a unique type. This class aims to assemble existing itemsets with different types, for example, seed_nodes of different node types in a graph. Parameters ---------- itemsets: Dict[str, ItemSet] A dictionary whose keys are types and values are ItemSet instances. Examples -------- >>> import torch >>> from dgl import graphbolt as gb 1. Each itemset is a single tensor: seed nodes. >>> node_ids_user = torch.arange(0, 5) >>> node_ids_item = torch.arange(5, 10) >>> item_set = gb.HeteroItemSet({ ... "user": gb.ItemSet(node_ids_user, names="seeds"), ... "item": gb.ItemSet(node_ids_item, names="seeds")}) >>> list(item_set) [{"user": tensor(0)}, {"user": tensor(1)}, {"user": tensor(2)}, {"user": tensor(3)}, {"user": tensor(4)}, {"item": tensor(5)}, {"item": tensor(6)}, {"item": tensor(7)}, {"item": tensor(8)}, {"item": tensor(9)}}] >>> item_set[:] {"user": tensor([0, 1, 2, 3, 4]), "item": tensor([5, 6, 7, 8, 9])} >>> item_set.names ('seeds',) 2. Each itemset is a tuple of tensors with same shape: seed nodes and labels. >>> node_ids_user = torch.arange(0, 2) >>> labels_user = torch.arange(0, 2) >>> node_ids_item = torch.arange(2, 5) >>> labels_item = torch.arange(2, 5) >>> item_set = gb.HeteroItemSet({ ... "user": gb.ItemSet( ... (node_ids_user, labels_user), ... names=("seeds", "labels")), ... "item": gb.ItemSet( ... (node_ids_item, labels_item), ... names=("seeds", "labels"))}) >>> list(item_set) [{"user": (tensor(0), tensor(0))}, {"user": (tensor(1), tensor(1))}, {"item": (tensor(2), tensor(2))}, {"item": (tensor(3), tensor(3))}, {"item": (tensor(4), tensor(4))}}] >>> item_set[:] {"user": (tensor([0, 1]), tensor([0, 1])), "item": (tensor([2, 3, 4]), tensor([2, 3, 4]))} >>> item_set.names ('seeds', 'labels') 3. Each itemset is a tuple of tensors with different shape: seeds and labels. >>> seeds_like = torch.arange(0, 4).reshape(-1, 2) >>> labels_like = torch.tensor([1, 0]) >>> seeds_follow = torch.arange(0, 6).reshape(-1, 2) >>> labels_follow = torch.tensor([1, 1, 0]) >>> item_set = gb.HeteroItemSet({ ... "user:like:item": gb.ItemSet( ... (seeds_like, labels_like), ... names=("seeds", "labels")), ... "user:follow:user": gb.ItemSet( ... (seeds_follow, labels_follow), ... names=("seeds", "labels"))}) >>> list(item_set) [{'user:like:item': (tensor([0, 1]), tensor(1))}, {'user:like:item': (tensor([2, 3]), tensor(0))}, {'user:follow:user': (tensor([0, 1]), tensor(1))}, {'user:follow:user': (tensor([2, 3]), tensor(1))}, {'user:follow:user': (tensor([4, 5]), tensor(0))}] >>> item_set[:] {'user:like:item': (tensor([[0, 1], [2, 3]]), tensor([1, 0])), 'user:follow:user': (tensor([[0, 1], [2, 3], [4, 5]]), tensor([1, 1, 0]))} >>> item_set.names ('seeds', 'labels') 4. Each itemset is a tuple of tensors with different shape: hyperlink and labels. >>> first_seeds = torch.arange(0, 6).reshape(-1, 3) >>> first_labels = torch.tensor([1, 0]) >>> second_seeds = torch.arange(0, 2).reshape(-1, 1) >>> second_labels = torch.tensor([1, 0]) >>> item_set = gb.HeteroItemSet({ ... "query:user:item": gb.ItemSet( ... (first_seeds, first_labels), ... names=("seeds", "labels")), ... "user": gb.ItemSet( ... (second_seeds, second_labels), ... names=("seeds", "labels"))}) >>> list(item_set) [{'query:user:item': (tensor([0, 1, 2]), tensor(1))}, {'query:user:item': (tensor([3, 4, 5]), tensor(0))}, {'user': (tensor([0]), tensor(1))}, {'user': (tensor([1]), tensor(0))}] >>> item_set[:] {'query:user:item': (tensor([[0, 1, 2], [3, 4, 5]]), tensor([1, 0])), 'user': (tensor([[0], [1]]),tensor([1, 0]))} >>> item_set.names ('seeds', 'labels') """ def __init__(self, itemsets: Dict[str, ItemSet]) -> None: self._itemsets = itemsets self._names = next(iter(itemsets.values())).names assert all( self._names == itemset.names for itemset in itemsets.values() ), "All itemsets must have the same names." offset = [0] + [len(itemset) for itemset in self._itemsets.values()] self._offsets = torch.tensor(offset).cumsum(0) self._length = int(self._offsets[-1]) self._keys = list(self._itemsets.keys()) def __len__(self) -> int: return self._length def __getitem__(self, index: Union[int, slice, Iterable[int]]): if isinstance(index, int): if index < 0: index += self._length if index < 0 or index >= self._length: raise IndexError(f"{type(self).__name__} index out of range.") offset_idx = torch.searchsorted(self._offsets, index, right=True) offset_idx -= 1 index -= self._offsets[offset_idx] key = self._keys[offset_idx] return {key: self._itemsets[key][index]} elif isinstance(index, slice): start, stop, step = index.indices(self._length) if step != 1: return self.__getitem__(torch.arange(start, stop, step)) assert start < stop, "Start must be smaller than stop." data = {} offset_idx_start = max( 1, torch.searchsorted(self._offsets, start, right=False) ) for offset_idx in range(offset_idx_start, len(self._offsets)): key = self._keys[offset_idx - 1] data[key] = self._itemsets[key][ max(0, start - self._offsets[offset_idx - 1]) : stop - self._offsets[offset_idx - 1] ] if stop <= self._offsets[offset_idx]: break return data elif isinstance(index, Iterable): if not isinstance(index, torch.Tensor): index = torch.tensor(index) assert torch.all((index >= 0) & (index < self._length)) key_indices = ( torch.searchsorted(self._offsets, index, right=True) - 1 ) data = {} for key_id, key in enumerate(self._keys): mask = (key_indices == key_id).nonzero().squeeze(1) if len(mask) == 0: continue data[key] = self._itemsets[key][ index[mask] - self._offsets[key_id] ] return data else: raise TypeError( f"{type(self).__name__} indices must be int, slice, or " f"iterable of int, not {type(index)}." ) @property def names(self) -> Tuple[str]: """Return the names of the items.""" return self._names def __repr__(self) -> str: ret = ( "{Classname}(\n" " itemsets={itemsets},\n" " names={names},\n" ")" ) itemsets_str = textwrap.indent( repr(self._itemsets), " " * len(" itemsets=") ).strip() return ret.format( Classname=self.__class__.__name__, itemsets=itemsets_str, names=self._names, ) class ItemSetDict: """`ItemSetDict` is a deprecated class and will be removed in a future version. Please use `HeteroItemSet` instead. This class is an alias for `HeteroItemSet` and serves as a wrapper to provide a smooth transition for users of the old class name. It issues a deprecation warning upon instantiation and forwards all attribute access and method calls to an instance of `HeteroItemSet`. """ def __init__(self, itemsets: Dict[str, ItemSet]) -> None: gb_warning( "ItemSetDict is deprecated and will be removed in the future. " "Please use HeteroItemSet instead.", category=DeprecationWarning, ) self._new_instance = HeteroItemSet(itemsets) def __getattr__(self, name: str): return getattr(self._new_instance, name) def __getitem__(self, index): return self._new_instance[index] def __len__(self) -> int: return len(self._new_instance) def __repr__(self) -> str: ret = ( "{Classname}(\n" " itemsets={itemsets},\n" " names={names},\n" ")" ) itemsets_str = textwrap.indent( repr(self._itemsets), " " * len(" itemsets=") ).strip() return ret.format( Classname=self.__class__.__name__, itemsets=itemsets_str, names=self._names, ) ================================================ FILE: python/dgl/graphbolt/minibatch.py ================================================ """Unified data structure for input and ouput of all the stages in loading process.""" from dataclasses import dataclass from typing import Dict, List, Tuple, Union import torch from .base import ( apply_to, CSCFormatBase, etype_str_to_tuple, expand_indptr, is_object_pinned, ) from .internal_utils import ( get_attributes, get_nonproperty_attributes, recursive_apply, ) from .sampled_subgraph import SampledSubgraph __all__ = ["MiniBatch"] @dataclass class MiniBatch: r"""A composite data class for data structure in the graphbolt. It is designed to facilitate the exchange of data among different components involved in processing data. The purpose of this class is to unify the representation of input and output data across different stages, ensuring consistency and ease of use throughout the loading process.""" labels: Union[torch.Tensor, Dict[str, torch.Tensor]] = None """ Labels associated with seeds in the graph. - If `labels` is a tensor: It indicates the graph is homogeneous. The value should be corresponding labels to given 'seeds'. - If `labels` is a dictionary: The keys should be node or edge type and the value should be corresponding labels to given 'seeds'. """ seeds: Union[ torch.Tensor, Dict[str, torch.Tensor], ] = None """ Representation of seed items utilized in node classification tasks, link prediction tasks and hyperlinks tasks. - If `seeds` is a tensor: it indicates that the seeds originate from a homogeneous graph. It can be either a 1-dimensional or 2-dimensional tensor: - 1-dimensional tensor: Each element directly represents a seed node within the graph. - 2-dimensional tensor: Each row designates a seed item, which can encompass various entities such as edges, hyperlinks, or other graph components depending on the specific context. - If `seeds` is a dictionary: it indicates that the seeds originate from a heterogeneous graph. The keys should be edge or node type, and the value should be a tensor, which can be either a 1-dimensional or 2-dimensional tensor: - 1-dimensional tensor: Each element directly represents a seed node of the given type within the graph. - 2-dimensional tensor: Each row designates a seed item of the given type, which can encompass various entities such as edges, hyperlinks, or other graph components depending on the specific context. """ indexes: Union[torch.Tensor, Dict[str, torch.Tensor]] = None """ Indexes associated with seeds in the graph, which indicates to which query a seeds belongs. - If `indexes` is a tensor: It indicates the graph is homogeneous. The value should be corresponding query to given 'seeds'. - If `indexes` is a dictionary: It indicates the graph is heterogeneous. The keys should be node or edge type and the value should be corresponding query to given 'seeds'. For each key, indexes are consecutive integers starting from zero. """ sampled_subgraphs: List[SampledSubgraph] = None """A list of 'SampledSubgraph's, each one corresponding to one layer, representing a subset of a larger graph structure. """ input_nodes: Union[torch.Tensor, Dict[str, torch.Tensor]] = None """A representation of input nodes in the outermost layer. Conatins all nodes in the 'sampled_subgraphs'. - If `input_nodes` is a tensor: It indicates the graph is homogeneous. - If `input_nodes` is a dictionary: The keys should be node type and the value should be corresponding heterogeneous node id. """ node_features: Union[ Dict[str, torch.Tensor], Dict[Tuple[str, str], torch.Tensor] ] = None """A representation of node features. - If keys are single strings: It means the graph is homogeneous, and the keys are feature names. - If keys are tuples: It means the graph is heterogeneous, and the keys are tuples of '(node_type, feature_name)'. """ edge_features: List[ Union[Dict[str, torch.Tensor], Dict[Tuple[str, str], torch.Tensor]] ] = None """Edge features associated with the 'sampled_subgraphs'. - If keys are single strings: It means the graph is homogeneous, and the keys are feature names. - If keys are tuples: It means the graph is heterogeneous, and the keys are tuples of '(edge_type, feature_name)'. Note, edge type is single string of format 'str:str:str'. """ compacted_seeds: Union[ torch.Tensor, Dict[str, torch.Tensor], ] = None """ Representation of compacted seeds corresponding to 'seeds', where all node ids inside are compacted. """ _blocks: list = None """ A list of `DGLBlock`s. """ def __repr__(self) -> str: return _minibatch_str(self) def node_ids(self) -> Union[torch.Tensor, Dict[str, torch.Tensor]]: """A representation of input nodes in the outermost layer. Contains all nodes in the `sampled_subgraphs`. - If `input_nodes` is a tensor: It indicates the graph is homogeneous. - If `input_nodes` is a dictionary: The keys should be node type and the value should be corresponding heterogeneous node id. """ return self.input_nodes def num_layers(self) -> int: """Return the number of layers.""" if self.sampled_subgraphs is None: return 0 return len(self.sampled_subgraphs) def edge_ids( self, layer_id: int ) -> Union[Dict[str, torch.Tensor], torch.Tensor]: """Get the edge ids of a layer.""" return self.sampled_subgraphs[layer_id].original_edge_ids def set_node_features( self, node_features: Union[ Dict[str, torch.Tensor], Dict[Tuple[str, str], torch.Tensor] ], ) -> None: """Set node features.""" self.node_features = node_features def set_edge_features( self, edge_features: List[ Union[Dict[str, torch.Tensor], Dict[Tuple[str, str], torch.Tensor]] ], ) -> None: """Set edge features.""" self.edge_features = edge_features @property def blocks(self) -> list: """DGL blocks extracted from `MiniBatch` containing graphical structures and ID mappings. """ if not self.sampled_subgraphs: return None if self._blocks is None: self._blocks = self.compute_blocks() return self._blocks def compute_blocks(self) -> list: """Extracts DGL blocks from `MiniBatch` to construct graphical structures and ID mappings. """ from dgl.convert import create_block, EID, NID is_heterogeneous = isinstance( self.sampled_subgraphs[0].sampled_csc, Dict ) # Casts to minimum dtype in-place and returns self. def cast_to_minimum_dtype(v: CSCFormatBase): # Checks if number of vertices and edges fit into an int32. dtype = ( torch.int32 if max(v.indptr.size(0) - 2, v.indices.size(0)) <= torch.iinfo(torch.int32).max else torch.int64 ) v.indptr = v.indptr.to(dtype) v.indices = v.indices.to(dtype) return v blocks = [] for subgraph in self.sampled_subgraphs: original_row_node_ids = subgraph.original_row_node_ids assert ( original_row_node_ids is not None ), "Missing `original_row_node_ids` in sampled subgraph." original_column_node_ids = subgraph.original_column_node_ids assert ( original_column_node_ids is not None ), "Missing `original_column_node_ids` in sampled subgraph." if is_heterogeneous: node_types = set() sampled_csc = {} for v in subgraph.sampled_csc.values(): cast_to_minimum_dtype(v) for etype, v in subgraph.sampled_csc.items(): etype_tuple = etype_str_to_tuple(etype) node_types.add(etype_tuple[0]) node_types.add(etype_tuple[2]) sampled_csc[etype_tuple] = ( "csc", ( v.indptr, v.indices, torch.arange( 0, len(v.indices), device=v.indptr.device, dtype=v.indptr.dtype, ), ), ) num_src_nodes = { ntype: ( original_row_node_ids[ntype].size(0) if original_row_node_ids.get(ntype) is not None else 0 ) for ntype in node_types } num_dst_nodes = { ntype: ( original_column_node_ids[ntype].size(0) if original_column_node_ids.get(ntype) is not None else 0 ) for ntype in node_types } else: sampled_csc = cast_to_minimum_dtype(subgraph.sampled_csc) sampled_csc = ( "csc", ( sampled_csc.indptr, sampled_csc.indices, torch.arange( 0, len(sampled_csc.indices), device=sampled_csc.indptr.device, dtype=sampled_csc.indptr.dtype, ), ), ) num_src_nodes = original_row_node_ids.size(0) num_dst_nodes = original_column_node_ids.size(0) blocks.append( create_block( sampled_csc, num_src_nodes=num_src_nodes, num_dst_nodes=num_dst_nodes, node_count_check=False, ) ) if is_heterogeneous: # Assign reverse node ids to the outermost layer's source nodes. for node_type, reverse_ids in self.sampled_subgraphs[ 0 ].original_row_node_ids.items(): blocks[0].srcnodes[node_type].data[NID] = reverse_ids # Assign reverse edges ids. for block, subgraph in zip(blocks, self.sampled_subgraphs): if subgraph.original_edge_ids is not None: for ( edge_type, reverse_ids, ) in subgraph.original_edge_ids.items(): block.edges[etype_str_to_tuple(edge_type)].data[ EID ] = reverse_ids else: blocks[0].srcdata[NID] = self.sampled_subgraphs[ 0 ].original_row_node_ids # Assign reverse edges ids. for block, subgraph in zip(blocks, self.sampled_subgraphs): if subgraph.original_edge_ids is not None: block.edata[EID] = subgraph.original_edge_ids return blocks def to_pyg_data(self): """Construct a PyG Data from `MiniBatch`. This function only supports node classification task on a homogeneous graph and the number of features cannot be more than one. """ from torch_geometric.data import Data if self.sampled_subgraphs is None: edge_index = None else: col_nodes = [] row_nodes = [] for subgraph in self.sampled_subgraphs: if subgraph is None: continue sampled_csc = subgraph.sampled_csc indptr = sampled_csc.indptr indices = sampled_csc.indices expanded_indptr = expand_indptr( indptr, dtype=indices.dtype, output_size=len(indices) ) col_nodes.append(expanded_indptr) row_nodes.append(indices) col_nodes = torch.cat(col_nodes) row_nodes = torch.cat(row_nodes) edge_index = torch.unique( torch.stack((row_nodes, col_nodes)), dim=1 ).long() if self.node_features is None: node_features = None else: assert ( len(self.node_features) == 1 ), "`to_pyg_data` only supports single feature homogeneous graph." node_features = next(iter(self.node_features.values())) if self.seeds is not None: if isinstance(self.seeds, Dict): batch_size = len(next(iter(self.seeds.values()))) else: batch_size = len(self.seeds) else: batch_size = None pyg_data = Data( x=node_features, edge_index=edge_index, y=self.labels, batch_size=batch_size, n_id=self.node_ids(), ) return pyg_data def to( self, device: torch.device, non_blocking=False ): # pylint: disable=invalid-name """Copy `MiniBatch` to the specified device using reflection.""" copy_fn = lambda x: apply_to(x, device, non_blocking=non_blocking) transfer_attrs = get_nonproperty_attributes(self) for attr in transfer_attrs: # Only copy member variables. setattr(self, attr, recursive_apply(getattr(self, attr), copy_fn)) return self def pin_memory(self): """Copy `MiniBatch` to the pinned memory using reflection.""" return self.to("pinned") def is_pinned(self) -> bool: """Check whether `SampledSubgraph` is pinned using reflection.""" return is_object_pinned(self) def _minibatch_str(minibatch: MiniBatch) -> str: final_str = "" # Get all attributes in the class except methods. attributes = get_attributes(minibatch) attributes.reverse() # Insert key with its value into the string. for name in attributes: if name[0] == "_": continue val = getattr(minibatch, name) def _add_indent(_str, indent): lines = _str.split("\n") lines = [lines[0]] + [ " " * (indent + 10) + line for line in lines[1:] ] return "\n".join(lines) # Let the variables in the list occupy one line each, and adjust the # indentation on top of the original if the original data output has # line feeds. if isinstance(val, list): val = [str(val_str) for val_str in val] val = "[" + ",\n".join(val) + "]" elif isinstance(val, tuple): val = [str(val_str) for val_str in val] val = "(" + ",\n".join(val) + ")" else: val = str(val) final_str = ( final_str + f"{name}={_add_indent(val, len(name)+1)},\n" + " " * 10 ) return "MiniBatch(" + final_str[:-3] + ")" ================================================ FILE: python/dgl/graphbolt/minibatch_transformer.py ================================================ """Mini-batch transformer""" from torch.utils.data import functional_datapipe from torch.utils.data.datapipes.iter import Mapper from .minibatch import MiniBatch __all__ = [ "MiniBatchTransformer", ] @functional_datapipe("transform") class MiniBatchTransformer(Mapper): """A mini-batch transformer used to manipulate mini-batch. Functional name: :obj:`transform`. Parameters ---------- datapipe : DataPipe The datapipe. transformer: The function applied to each minibatch which is responsible for transforming the minibatch. """ def __init__( self, datapipe, transformer=None, ): super().__init__(datapipe, self._transformer) self.transformer = transformer or self._identity def _transformer(self, minibatch): minibatch = self.transformer(minibatch) assert isinstance( minibatch, (MiniBatch,) ), "The transformer output should be an instance of MiniBatch" return minibatch @staticmethod def _identity(minibatch): return minibatch ================================================ FILE: python/dgl/graphbolt/negative_sampler.py ================================================ """Negative samplers.""" from _collections_abc import Mapping from torch.utils.data import functional_datapipe from .minibatch_transformer import MiniBatchTransformer __all__ = [ "NegativeSampler", ] @functional_datapipe("sample_negative") class NegativeSampler(MiniBatchTransformer): """ A negative sampler used to generate negative samples and return a mix of positive and negative samples. Functional name: :obj:`sample_negative`. Parameters ---------- datapipe : DataPipe The datapipe. negative_ratio : int The proportion of negative samples to positive samples. """ def __init__( self, datapipe, negative_ratio, ): super().__init__(datapipe, self._sample) assert negative_ratio > 0, "Negative_ratio should be positive Integer." self.negative_ratio = negative_ratio def _sample(self, minibatch): """ Generate a mix of positive and negative samples. If `seeds` in minibatch is not None, `labels` and `indexes` will be constructed after negative sampling, based on corresponding seeds. Parameters ---------- minibatch : MiniBatch An instance of 'MiniBatch' class requires the 'seeds' field. This function is responsible for generating negative edges corresponding to the positive edges defined by the 'seeds'. Returns ------- MiniBatch An instance of 'MiniBatch' encompasses both positive and negative samples. """ seeds = minibatch.seeds if isinstance(seeds, Mapping): if minibatch.indexes is None: minibatch.indexes = {} if minibatch.labels is None: minibatch.labels = {} for etype, pos_pairs in seeds.items(): ( minibatch.seeds[etype], minibatch.labels[etype], minibatch.indexes[etype], ) = self._sample_with_etype(pos_pairs, etype) else: ( minibatch.seeds, minibatch.labels, minibatch.indexes, ) = self._sample_with_etype(seeds) return minibatch def _sample_with_etype(self, seeds, etype=None): """Generate negative pairs for a given etype form positive pairs for a given etype. If `seeds` is a 2D tensor, which represents `seeds` is used in minibatch, corresponding labels and indexes will be constructed. Parameters ---------- seeds : Tensor, Tensor A N*2 tensors that represent source-destination node pairs of positive edges, where positive means the edge must exist in the graph. etype : str Canonical edge type. Returns ------- Tensor A collection of postive and negative node pairs. Tensor Corresponding labels. If label is True, corresponding edge is positive. If label is False, corresponding edge is negative. Tensor Corresponding indexes, indicates to which query an edge belongs. """ raise NotImplementedError ================================================ FILE: python/dgl/graphbolt/sampled_subgraph.py ================================================ """Graphbolt sampled subgraph.""" # pylint: disable= invalid-name from typing import Dict, NamedTuple, Tuple, Union import torch from .base import ( apply_to, CSCFormatBase, etype_str_to_tuple, expand_indptr, is_object_pinned, isin, ) from .internal_utils import recursive_apply __all__ = ["SampledSubgraph"] class _ExcludeEdgesWaiter: def __init__(self, sampled_subgraph, index): self.sampled_subgraph = sampled_subgraph self.index = index def wait(self): """Returns the stored value when invoked.""" sampled_subgraph = self.sampled_subgraph index = self.index # Ensure there is no memory leak. self.sampled_subgraph = self.index = None if isinstance(index, dict): for k in list(index.keys()): index[k] = index[k].wait() else: index = index.wait() return type(sampled_subgraph)(*_slice_subgraph(sampled_subgraph, index)) class PyGLayerData(NamedTuple): """A named tuple class to represent homogenous inputs to a PyG model layer. The fields are x (input features), edge_index and size (source and destination sizes). """ x: torch.Tensor edge_index: torch.Tensor size: Tuple[int, int] class PyGLayerHeteroData(NamedTuple): """A named tuple class to represent heterogenous inputs to a PyG model layer. The fields are x (input features), edge_index and size (source and destination sizes), and all fields are dictionaries. """ x: Dict[str, torch.Tensor] edge_index: Dict[str, torch.Tensor] size: Dict[str, Tuple[int, int]] class SampledSubgraph: r"""An abstract class for sampled subgraph. In the context of a heterogeneous graph, each field should be of `Dict` type. Otherwise, for homogeneous graphs, each field should correspond to its respective value type.""" @property def sampled_csc( self, ) -> Union[CSCFormatBase, Dict[str, CSCFormatBase],]: """Returns the node pairs representing edges in csc format. - If `sampled_csc` is a CSCFormatBase: It should be in the csc format. `indptr` stores the index in the data array where each column starts. `indices` stores the row indices of the non-zero elements. - If `sampled_csc` is a dictionary: The keys should be edge type and the values should be corresponding node pairs. The ids inside is heterogeneous ids. Examples -------- 1. Homogeneous graph. >>> import dgl.graphbolt as gb >>> import torch >>> sampled_csc = gb.CSCFormatBase( ... indptr=torch.tensor([0, 1, 2, 3]), ... indices=torch.tensor([0, 1, 2])) >>> print(sampled_csc) CSCFormatBase(indptr=tensor([0, 1, 2, 3]), indices=tensor([0, 1, 2]), ) 2. Heterogeneous graph. >>> sampled_csc = {"A:relation:B": gb.CSCFormatBase( ... indptr=torch.tensor([0, 1, 2, 3]), ... indices=torch.tensor([0, 1, 2]))} >>> print(sampled_csc) {'A:relation:B': CSCFormatBase(indptr=tensor([0, 1, 2, 3]), indices=tensor([0, 1, 2]), )} """ raise NotImplementedError @property def original_column_node_ids( self, ) -> Union[torch.Tensor, Dict[str, torch.Tensor]]: """Returns corresponding reverse column node ids the original graph. Column's reverse node ids in the original graph. A graph structure can be treated as a coordinated row and column pair, and this is the mapped ids of the column. - If `original_column_node_ids` is a tensor: It represents the original node ids. - If `original_column_node_ids` is a dictionary: The keys should be node type and the values should be corresponding original heterogeneous node ids. If present, it means column IDs are compacted, and `sampled_csc` column IDs match these compacted ones. """ return None @property def original_row_node_ids( self, ) -> Union[torch.Tensor, Dict[str, torch.Tensor]]: """Returns corresponding reverse row node ids the original graph. Row's reverse node ids in the original graph. A graph structure can be treated as a coordinated row and column pair, and this is the mapped ids of the row. - If `original_row_node_ids` is a tensor: It represents the original node ids. - If `original_row_node_ids` is a dictionary: The keys should be node type and the values should be corresponding original heterogeneous node ids. If present, it means row IDs are compacted, and `sampled_csc` row IDs match these compacted ones.""" return None @property def original_edge_ids(self) -> Union[torch.Tensor, Dict[str, torch.Tensor]]: """Returns corresponding reverse edge ids the original graph. Reverse edge ids in the original graph. This is useful when edge features are needed. - If `original_edge_ids` is a tensor: It represents the original edge ids. - If `original_edge_ids` is a dictionary: The keys should be edge type and the values should be corresponding original heterogeneous edge ids. """ return None def exclude_edges( self, edges: Union[ Dict[str, torch.Tensor], torch.Tensor, ], assume_num_node_within_int32: bool = True, async_op: bool = False, ): r"""Exclude edges from the sampled subgraph. This function can be used with sampled subgraphs, regardless of whether they have compacted row/column nodes or not. If the original subgraph has compacted row or column nodes, the corresponding row or column nodes in the returned subgraph will also be compacted. Parameters ---------- self : SampledSubgraph The sampled subgraph. edges : Union[torch.Tensor, Dict[str, torch.Tensor]] Edges to exclude. If sampled subgraph is homogeneous, then `edges` should be a N*2 tensors representing the edges to exclude. If sampled subgraph is heterogeneous, then `edges` should be a dictionary of edge types and the corresponding edges to exclude. assume_num_node_within_int32: bool If True, assumes the value of node IDs in the provided `edges` fall within the int32 range, which can significantly enhance computation speed. Default: True async_op: bool Boolean indicating whether the call is asynchronous. If so, the result can be obtained by calling wait on the returned future. Returns ------- SampledSubgraph An instance of a class that inherits from `SampledSubgraph`. Examples -------- >>> import dgl.graphbolt as gb >>> import torch >>> sampled_csc = {"A:relation:B": gb.CSCFormatBase( ... indptr=torch.tensor([0, 1, 2, 3]), ... indices=torch.tensor([0, 1, 2]))} >>> original_column_node_ids = {"B": torch.tensor([10, 11, 12])} >>> original_row_node_ids = {"A": torch.tensor([13, 14, 15])} >>> original_edge_ids = {"A:relation:B": torch.tensor([19, 20, 21])} >>> subgraph = gb.SampledSubgraphImpl( ... sampled_csc=sampled_csc, ... original_column_node_ids=original_column_node_ids, ... original_row_node_ids=original_row_node_ids, ... original_edge_ids=original_edge_ids ... ) >>> edges_to_exclude = {"A:relation:B": torch.tensor([[14, 11], [15, 12]])} >>> result = subgraph.exclude_edges(edges_to_exclude) >>> print(result.sampled_csc) {'A:relation:B': CSCFormatBase(indptr=tensor([0, 1, 1, 1]), indices=tensor([0]), )} >>> print(result.original_column_node_ids) {'B': tensor([10, 11, 12])} >>> print(result.original_row_node_ids) {'A': tensor([13, 14, 15])} >>> print(result.original_edge_ids) {'A:relation:B': tensor([19])} """ # TODO: Add support for value > in32, then remove this line. assert ( assume_num_node_within_int32 ), "Values > int32 are not supported yet." assert (isinstance(self.sampled_csc, CSCFormatBase)) == isinstance( edges, torch.Tensor ), ( "The sampled subgraph and the edges to exclude should be both " "homogeneous or both heterogeneous." ) # Get type of calling class. calling_class = type(self) # Three steps to exclude edges: # 1. Convert the node pairs to the original ids if they are compacted. # 2. Exclude the edges and get the index of the edges to keep. # 3. Slice the subgraph according to the index. if isinstance(self.sampled_csc, CSCFormatBase): reverse_edges = _to_reverse_ids( self.sampled_csc, self.original_row_node_ids, self.original_column_node_ids, ) index = _exclude_homo_edges( reverse_edges, edges, assume_num_node_within_int32, async_op ) else: index = {} for etype, pair in self.sampled_csc.items(): if etype not in edges: # No edges need to be excluded. index[etype] = None continue src_type, _, dst_type = etype_str_to_tuple(etype) original_row_node_ids = ( None if self.original_row_node_ids is None else self.original_row_node_ids.get(src_type) ) original_column_node_ids = ( None if self.original_column_node_ids is None else self.original_column_node_ids.get(dst_type) ) reverse_edges = _to_reverse_ids( pair, original_row_node_ids, original_column_node_ids, ) index[etype] = _exclude_homo_edges( reverse_edges, edges[etype], assume_num_node_within_int32, async_op, ) if async_op: return _ExcludeEdgesWaiter(self, index) else: return calling_class(*_slice_subgraph(self, index)) def to_pyg( self, x: Union[torch.Tensor, Dict[str, torch.Tensor]] ) -> Union[PyGLayerData, PyGLayerHeteroData]: """ Process layer inputs so that they can be consumed by a PyG model layer. Parameters ---------- x : Union[torch.Tensor, Dict[str, torch.Tensor]] The input node features to the GNN layer. Returns ------- Union[PyGLayerData, PyGLayerHeteroData] A named tuple class with `x`, `edge_index` and `size` fields. Typically, a PyG GNN layer's forward method will accept these as arguments. """ if isinstance(x, torch.Tensor): # Homogenous src = self.sampled_csc.indices dst = expand_indptr( self.sampled_csc.indptr, dtype=src.dtype, output_size=src.size(0), ) edge_index = torch.stack([src, dst], dim=0).long() dst_size = self.sampled_csc.indptr.size(0) - 1 # h and h[:dst_size] correspond to source and destination features resp. return PyGLayerData( (x, x[:dst_size]), edge_index, (x.size(0), dst_size) ) else: # Heterogenous x_dst_dict = {} edge_index_dict = {} sizes_dict = {} for etype, sampled_csc in self.sampled_csc.items(): src = sampled_csc.indices dst = expand_indptr( sampled_csc.indptr, dtype=src.dtype, output_size=src.size(0), ) edge_index = torch.stack([src, dst], dim=0).long() dst_size = sampled_csc.indptr.size(0) - 1 # h and h[:dst_size] correspond to source and destination features resp. src_ntype, _, dst_ntype = etype_str_to_tuple(etype) x_dst_dict[dst_ntype] = x[dst_ntype][:dst_size] edge_index_dict[etype] = edge_index sizes_dict[etype] = (x[src_ntype].size(0), dst_size) return PyGLayerHeteroData( (x, x_dst_dict), edge_index_dict, sizes_dict ) def to( self, device: torch.device, non_blocking=False ) -> None: # pylint: disable=invalid-name """Copy `SampledSubgraph` to the specified device using reflection.""" for attr in dir(self): # Only copy member variables. if not callable(getattr(self, attr)) and not attr.startswith("__"): setattr( self, attr, recursive_apply( getattr(self, attr), apply_to, device, non_blocking=non_blocking, ), ) return self def pin_memory(self): """Copy `SampledSubgraph` to the pinned memory using reflection.""" return self.to("pinned") def is_pinned(self) -> bool: """Check whether `SampledSubgraph` is pinned using reflection.""" return is_object_pinned(self) def _to_reverse_ids(node_pair, original_row_node_ids, original_column_node_ids): indptr = node_pair.indptr indices = node_pair.indices if original_row_node_ids is not None: indices = torch.index_select( original_row_node_ids, dim=0, index=indices ) indptr = expand_indptr( indptr, indices.dtype, original_column_node_ids, len(indices) ) return (indices, indptr) def _relabel_two_arrays(lhs_array, rhs_array): """Relabel two arrays into a consecutive range starting from 0.""" concated = torch.cat([lhs_array, rhs_array]) _, mapping = torch.unique(concated, return_inverse=True) return mapping[: lhs_array.numel()], mapping[lhs_array.numel() :] def _exclude_homo_edges( edges: Tuple[torch.Tensor, torch.Tensor], edges_to_exclude: torch.Tensor, assume_num_node_within_int32: bool, async_op: bool, ): """Return the indices of edges to be included.""" if assume_num_node_within_int32: val = edges[0].long() << 32 | edges[1].long() edges_to_exclude_trans = edges_to_exclude.T val_to_exclude = ( edges_to_exclude_trans[0].long() << 32 | edges_to_exclude_trans[1].long() ) else: # TODO: Add support for value > int32. raise NotImplementedError( "Values out of range int32 are not supported yet" ) if async_op: return torch.ops.graphbolt.is_not_in_index_async(val, val_to_exclude) else: mask = ~isin(val, val_to_exclude) return torch.nonzero(mask, as_tuple=True)[0] def _slice_subgraph(subgraph: SampledSubgraph, index: torch.Tensor): """Slice the subgraph according to the index.""" def _index_select(obj, index): if obj is None: return None if index is None: return obj if isinstance(obj, CSCFormatBase): new_indices = obj.indices[index] new_indptr = torch.searchsorted(index, obj.indptr) return CSCFormatBase( indptr=new_indptr, indices=new_indices, ) if isinstance(obj, torch.Tensor): return obj[index] # Handle the case when obj is a dictionary. assert isinstance(obj, dict) assert isinstance(index, dict) ret = {} for k, v in obj.items(): ret[k] = _index_select(v, index[k]) return ret return ( _index_select(subgraph.sampled_csc, index), subgraph.original_column_node_ids, subgraph.original_row_node_ids, _index_select(subgraph.original_edge_ids, index), ) ================================================ FILE: python/dgl/graphbolt/sampling_graph.py ================================================ """Sampling Graphs.""" from typing import Dict, Union import torch __all__ = ["SamplingGraph"] class SamplingGraph: r"""Class for sampling graph.""" def __init__(self): pass def __repr__(self) -> str: """Return a string representation of the graph. Returns ------- str String representation of the graph. """ raise NotImplementedError @property def num_nodes(self) -> Union[int, Dict[str, int]]: """The number of nodes in the graph. - If the graph is homogenous, returns an integer. - If the graph is heterogenous, returns a dictionary. Returns ------- Union[int, Dict[str, int]] The number of nodes. Integer indicates the total nodes number of a homogenous graph; dict indicates nodes number per node types of a heterogenous graph. """ raise NotImplementedError @property def num_edges(self) -> Union[int, Dict[str, int]]: """The number of edges in the graph. - If the graph is homogenous, returns an integer. - If the graph is heterogenous, returns a dictionary. Returns ------- Union[int, Dict[str, int]] The number of edges. Integer indicates the total edges number of a homogenous graph; dict indicates edges number per edge types of a heterogenous graph. """ raise NotImplementedError def copy_to_shared_memory(self, shared_memory_name: str) -> "SamplingGraph": """Copy the graph to shared memory. Parameters ---------- shared_memory_name : str Name of the shared memory. Returns ------- SamplingGraph The copied SamplingGraph object on shared memory. """ raise NotImplementedError # pylint: disable=invalid-name def to(self, device: torch.device) -> "SamplingGraph": """Copy graph to the specified device. Parameters ---------- device : torch.device The destination device. Returns ------- SamplingGraph The graph on the specified device. """ raise NotImplementedError ================================================ FILE: python/dgl/graphbolt/subgraph_sampler.py ================================================ """Subgraph samplers""" from collections import defaultdict from functools import partial from typing import Dict import torch import torch.distributed as thd from torch.utils.data import functional_datapipe from .base import seed_type_str_to_ntypes from .internal import compact_temporal_nodes, unique_and_compact from .minibatch import MiniBatch from .minibatch_transformer import MiniBatchTransformer __all__ = [ "SubgraphSampler", "all_to_all", "convert_to_hetero", "revert_to_homo", ] class _NoOpWaiter: def __init__(self, result): self.result = result def wait(self): """Returns the stored value when invoked.""" result = self.result # Ensure there is no memory leak. self.result = None return result def _shift(inputs: list, group=None): cutoff = len(inputs) - thd.get_rank(group) return inputs[cutoff:] + inputs[:cutoff] def all_to_all(outputs, inputs, group=None, async_op=False): """Wrapper for thd.all_to_all that permuted outputs and inputs before calling it. The arguments have the permutation `rank, ..., world_size - 1, 0, ..., rank - 1` and we make it `0, world_size - 1` before calling `thd.all_to_all`.""" shift_fn = partial(_shift, group=group) outputs = shift_fn(list(outputs)) inputs = shift_fn(list(inputs)) if outputs[0].is_cuda: return thd.all_to_all(outputs, inputs, group, async_op) # gloo backend will be used. outputs_single = torch.cat(outputs) output_split_sizes = [o.size(0) for o in outputs] handle = thd.all_to_all_single( outputs_single, torch.cat(inputs), output_split_sizes, [i.size(0) for i in inputs], group, async_op, ) temp_outputs = outputs_single.split(output_split_sizes) class _Waiter: def __init__(self, handle, outputs, temp_outputs): self.handle = handle self.outputs = outputs self.temp_outputs = temp_outputs def wait(self): """Returns the stored value when invoked.""" handle = self.handle outputs = self.outputs temp_outputs = self.temp_outputs # Ensure that there is no leak self.handle = self.outputs = self.temp_outputs = None if handle is not None: handle.wait() for output, temp_output in zip(outputs, temp_outputs): output.copy_(temp_output) post_processor = _Waiter(handle, outputs, temp_outputs) return post_processor if async_op else post_processor.wait() def revert_to_homo(d: dict): """Utility function to convert a dictionary that stores homogenous data.""" is_homogenous = len(d) == 1 and "_N" in d return list(d.values())[0] if is_homogenous else d def convert_to_hetero(item): """Utility function to convert homogenous data to heterogenous with a single node type.""" is_heterogenous = isinstance(item, dict) return item if is_heterogenous else {"_N": item} @functional_datapipe("sample_subgraph") class SubgraphSampler(MiniBatchTransformer): """A subgraph sampler used to sample a subgraph from a given set of nodes from a larger graph. Functional name: :obj:`sample_subgraph`. This class is the base class of all subgraph samplers. Any subclass of SubgraphSampler should implement either the :meth:`sample_subgraphs` method or the :meth:`sampling_stages` method to define the fine-grained sampling stages to take advantage of optimizations provided by the GraphBolt DataLoader. Parameters ---------- datapipe : DataPipe The datapipe. args : Non-Keyword Arguments Arguments to be passed into sampling_stages. kwargs : Keyword Arguments Arguments to be passed into sampling_stages. Preprocessing stage makes use of the `asynchronous` and `cooperative` parameters before they are passed to the sampling stages. """ def __init__( self, datapipe, *args, **kwargs, ): async_op = kwargs.get("asynchronous", False) cooperative = kwargs.get("cooperative", False) preprocess_fn = partial( self._preprocess, cooperative=cooperative, async_op=async_op ) datapipe = datapipe.transform(preprocess_fn) if async_op: fn = partial(self._wait_preprocess_future, cooperative=cooperative) datapipe = datapipe.buffer().transform(fn) if cooperative: datapipe = datapipe.transform(self._seeds_cooperative_exchange_1) datapipe = datapipe.buffer() datapipe = datapipe.transform( self._seeds_cooperative_exchange_1_wait_future ).buffer() datapipe = datapipe.transform(self._seeds_cooperative_exchange_2) datapipe = datapipe.buffer() datapipe = datapipe.transform(self._seeds_cooperative_exchange_3) datapipe = datapipe.buffer() datapipe = datapipe.transform(self._seeds_cooperative_exchange_4) datapipe = self.sampling_stages(datapipe, *args, **kwargs) datapipe = datapipe.transform(self._postprocess) super().__init__(datapipe) @staticmethod def _postprocess(minibatch): delattr(minibatch, "_seed_nodes") delattr(minibatch, "_seeds_timestamp") return minibatch @staticmethod def _preprocess(minibatch, cooperative: bool, async_op: bool): if minibatch.seeds is None: raise ValueError( f"Invalid minibatch {minibatch}: `seeds` should have a value." ) rank = thd.get_rank() if cooperative else 0 world_size = thd.get_world_size() if cooperative else 1 results = SubgraphSampler._seeds_preprocess( minibatch, rank, world_size, async_op ) if async_op: minibatch._preprocess_future = results else: ( minibatch._seed_nodes, minibatch._seeds_timestamp, minibatch.compacted_seeds, offsets, ) = results if cooperative: minibatch._seeds_offsets = offsets return minibatch @staticmethod def _wait_preprocess_future(minibatch, cooperative: bool): ( minibatch._seed_nodes, minibatch._seeds_timestamp, minibatch.compacted_seeds, offsets, ) = minibatch._preprocess_future.wait() delattr(minibatch, "_preprocess_future") if cooperative: minibatch._seeds_offsets = offsets return minibatch @staticmethod def _seeds_cooperative_exchange_1(minibatch): rank = thd.get_rank() world_size = thd.get_world_size() seeds = minibatch._seed_nodes is_homogeneous = not isinstance(seeds, dict) if is_homogeneous: seeds = {"_N": seeds} if minibatch._seeds_offsets is None: assert minibatch.compacted_seeds is None minibatch._rank_sort_future = torch.ops.graphbolt.rank_sort_async( list(seeds.values()), rank, world_size ) return minibatch @staticmethod def _seeds_cooperative_exchange_1_wait_future(minibatch): world_size = thd.get_world_size() seeds = minibatch._seed_nodes is_homogeneous = not isinstance(seeds, dict) if is_homogeneous: seeds = {"_N": seeds} num_ntypes = len(seeds.keys()) if minibatch._seeds_offsets is None: result = minibatch._rank_sort_future.wait() delattr(minibatch, "_rank_sort_future") sorted_seeds, sorted_compacted, sorted_offsets = {}, {}, {} for i, ( seed_type, (typed_sorted_seeds, typed_index, typed_offsets), ) in enumerate(zip(seeds.keys(), result)): sorted_seeds[seed_type] = typed_sorted_seeds sorted_compacted[seed_type] = typed_index sorted_offsets[seed_type] = typed_offsets minibatch._seed_nodes = sorted_seeds minibatch.compacted_seeds = revert_to_homo(sorted_compacted) minibatch._seeds_offsets = sorted_offsets else: minibatch._seeds_offsets = {"_N": minibatch._seeds_offsets} counts_sent = torch.empty(world_size * num_ntypes, dtype=torch.int64) for i, offsets in enumerate(minibatch._seeds_offsets.values()): counts_sent[ torch.arange(i, world_size * num_ntypes, num_ntypes) ] = offsets.diff() delattr(minibatch, "_seeds_offsets") counts_received = torch.empty_like(counts_sent) minibatch._counts_future = all_to_all( counts_received.split(num_ntypes), counts_sent.split(num_ntypes), async_op=True, ) minibatch._counts_sent = counts_sent minibatch._counts_received = counts_received return minibatch @staticmethod def _seeds_cooperative_exchange_2(minibatch): world_size = thd.get_world_size() seeds = minibatch._seed_nodes minibatch._counts_future.wait() delattr(minibatch, "_counts_future") num_ntypes = len(seeds.keys()) seeds_received = {} counts_sent = {} counts_received = {} for i, (ntype, typed_seeds) in enumerate(seeds.items()): idx = torch.arange(i, world_size * num_ntypes, num_ntypes) typed_counts_sent = minibatch._counts_sent[idx].tolist() typed_counts_received = minibatch._counts_received[idx].tolist() typed_seeds_received = typed_seeds.new_empty( sum(typed_counts_received) ) all_to_all( typed_seeds_received.split(typed_counts_received), typed_seeds.split(typed_counts_sent), ) seeds_received[ntype] = typed_seeds_received counts_sent[ntype] = typed_counts_sent counts_received[ntype] = typed_counts_received minibatch._seed_nodes = seeds_received minibatch._counts_sent = revert_to_homo(counts_sent) minibatch._counts_received = revert_to_homo(counts_received) return minibatch @staticmethod def _seeds_cooperative_exchange_3(minibatch): nodes = { ntype: [typed_seeds] for ntype, typed_seeds in minibatch._seed_nodes.items() } minibatch._unique_future = unique_and_compact( nodes, 0, 1, async_op=True ) return minibatch @staticmethod def _seeds_cooperative_exchange_4(minibatch): unique_seeds, inverse_seeds, _ = minibatch._unique_future.wait() delattr(minibatch, "_unique_future") inverse_seeds = { ntype: typed_inv[0] for ntype, typed_inv in inverse_seeds.items() } minibatch._seed_nodes = revert_to_homo(unique_seeds) sizes = { ntype: typed_seeds.size(0) for ntype, typed_seeds in unique_seeds.items() } minibatch._seed_sizes = revert_to_homo(sizes) minibatch._seed_inverse_ids = revert_to_homo(inverse_seeds) return minibatch def _sample(self, minibatch): ( minibatch.input_nodes, minibatch.sampled_subgraphs, ) = self.sample_subgraphs( minibatch._seed_nodes, minibatch._seeds_timestamp ) return minibatch def sampling_stages(self, datapipe): """The sampling stages are defined here by chaining to the datapipe. The default implementation expects :meth:`sample_subgraphs` to be implemented. To define fine-grained stages, this method should be overridden. """ return datapipe.transform(self._sample) @staticmethod def _seeds_preprocess( minibatch: MiniBatch, rank: int = 0, world_size: int = 1, async_op: bool = False, ): """Preprocess `seeds` in a minibatch to construct `unique_seeds`, `node_timestamp` and `compacted_seeds` for further sampling. It optionally incorporates timestamps for temporal graphs, organizing and compacting seeds based on their types and timestamps. In heterogeneous graph, `seeds` with same node type will be unqiued together. Parameters ---------- minibatch: MiniBatch The minibatch. rank : int The rank of the current process among cooperating processes. world_size : int The number of cooperating (`arXiv:2210.13339`__) processes. async_op: bool Boolean indicating whether the call is asynchronous. If so, the result can be obtained by calling wait on the returned future. Returns ------- unique_seeds: torch.Tensor or Dict[str, torch.Tensor] A tensor or a dictionary of tensors representing the unique seeds. In heterogeneous graphs, seeds are returned for each node type. nodes_timestamp: None or a torch.Tensor or Dict[str, torch.Tensor] Containing timestamps for each seed. This is only returned if `minibatch` includes timestamps and the graph is temporal. compacted_seeds: torch.tensor or a Dict[str, torch.Tensor] Representation of compacted seeds corresponding to 'seeds', where all node ids inside are compacted. offsets: None or torch.Tensor or Dict[src, torch.Tensor] The unique nodes offsets tensor partitions the unique_nodes tensor. Has size `world_size + 1` and `unique_nodes[offsets[i]: offsets[i + 1]]` belongs to the rank `(rank + i) % world_size`. """ use_timestamp = hasattr(minibatch, "timestamp") assert ( not use_timestamp or world_size == 1 ), "Temporal code path does not currently support Cooperative Minibatching" seeds = minibatch.seeds is_heterogeneous = isinstance(seeds, Dict) if is_heterogeneous: # Collect nodes from all types of input. nodes = defaultdict(list) nodes_timestamp = None if use_timestamp: nodes_timestamp = defaultdict(list) for seed_type, typed_seeds in seeds.items(): # When typed_seeds is a one-dimensional tensor, it represents # seed nodes, which does not need to do unique and compact. if typed_seeds.ndim == 1: nodes_timestamp = ( minibatch.timestamp if hasattr(minibatch, "timestamp") else None ) result = _NoOpWaiter((seeds, nodes_timestamp, None, None)) break result = None assert typed_seeds.ndim == 2, ( "Only tensor with shape 1*N and N*M is " + f"supported now, but got {typed_seeds.shape}." ) ntypes = seed_type_str_to_ntypes( seed_type, typed_seeds.shape[1] ) if use_timestamp: negative_ratio = ( typed_seeds.shape[0] // minibatch.timestamp[seed_type].shape[0] - 1 ) neg_timestamp = minibatch.timestamp[ seed_type ].repeat_interleave(negative_ratio) for i, ntype in enumerate(ntypes): nodes[ntype].append(typed_seeds[:, i]) if use_timestamp: nodes_timestamp[ntype].append( minibatch.timestamp[seed_type] ) nodes_timestamp[ntype].append(neg_timestamp) class _Waiter: def __init__(self, nodes, nodes_timestamp, seeds): # Unique and compact the collected nodes. if use_timestamp: self.future = compact_temporal_nodes( nodes, nodes_timestamp ) else: self.future = unique_and_compact( nodes, rank, world_size, async_op ) self.seeds = seeds def wait(self): """Returns the stored value when invoked.""" if use_timestamp: unique_seeds, nodes_timestamp, compacted = self.future offsets = None else: unique_seeds, compacted, offsets = ( self.future.wait() if async_op else self.future ) nodes_timestamp = None seeds = self.seeds # Ensure there is no memory leak. self.future = self.seeds = None compacted_seeds = {} # Map back in same order as collect. for seed_type, typed_seeds in seeds.items(): ntypes = seed_type_str_to_ntypes( seed_type, typed_seeds.shape[1] ) compacted_seed = [] for ntype in ntypes: compacted_seed.append(compacted[ntype].pop(0)) compacted_seeds[seed_type] = ( torch.cat(compacted_seed).view(len(ntypes), -1).T ) return ( unique_seeds, nodes_timestamp, compacted_seeds, offsets, ) # When typed_seeds is not a one-dimensional tensor if result is None: result = _Waiter(nodes, nodes_timestamp, seeds) else: # When seeds is a one-dimensional tensor, it represents seed nodes, # which does not need to do unique and compact. if seeds.ndim == 1: nodes_timestamp = ( minibatch.timestamp if hasattr(minibatch, "timestamp") else None ) result = _NoOpWaiter((seeds, nodes_timestamp, None, None)) else: # Collect nodes from all types of input. nodes = [seeds.view(-1)] nodes_timestamp = None if use_timestamp: # Timestamp for source and destination nodes are the same. negative_ratio = ( seeds.shape[0] // minibatch.timestamp.shape[0] - 1 ) neg_timestamp = minibatch.timestamp.repeat_interleave( negative_ratio ) seeds_timestamp = torch.cat( (minibatch.timestamp, neg_timestamp) ) nodes_timestamp = [ seeds_timestamp for _ in range(seeds.shape[1]) ] class _Waiter: def __init__(self, nodes, nodes_timestamp, seeds): # Unique and compact the collected nodes. if use_timestamp: self.future = compact_temporal_nodes( nodes, nodes_timestamp ) else: self.future = unique_and_compact( nodes, async_op=async_op ) self.seeds = seeds def wait(self): """Returns the stored value when invoked.""" if use_timestamp: ( unique_seeds, nodes_timestamp, compacted, ) = self.future offsets = None else: unique_seeds, compacted, offsets = ( self.future.wait() if async_op else self.future ) nodes_timestamp = None seeds = self.seeds # Ensure there is no memory leak. self.future = self.seeds = None # Map back in same order as collect. compacted_seeds = compacted[0].view(seeds.shape) return ( unique_seeds, nodes_timestamp, compacted_seeds, offsets, ) result = _Waiter(nodes, nodes_timestamp, seeds) return result if async_op else result.wait() def sample_subgraphs( self, seeds, seeds_timestamp, seeds_pre_time_window=None ): """Sample subgraphs from the given seeds, possibly with temporal constraints. Any subclass of SubgraphSampler should implement this method. Parameters ---------- seeds : Union[torch.Tensor, Dict[str, torch.Tensor]] The seed nodes. seeds_timestamp : Union[torch.Tensor, Dict[str, torch.Tensor]] The timestamps of the seed nodes. If given, the sampled subgraphs should not contain any nodes or edges that are newer than the timestamps of the seed nodes. Default: None. seeds_pre_time_window : Union[torch.Tensor, Dict[str, torch.Tensor]] The time window of the nodes represents a period of time before `seeds_timestamp`. If provided, only neighbors and related edges whose timestamps fall within `[seeds_timestamp - seeds_pre_time_window, seeds_timestamp]` will be filtered. Returns ------- Union[torch.Tensor, Dict[str, torch.Tensor]] The input nodes. List[SampledSubgraph] The sampled subgraphs. Examples -------- >>> @functional_datapipe("my_sample_subgraph") >>> class MySubgraphSampler(SubgraphSampler): >>> def __init__(self, datapipe, graph, fanouts): >>> super().__init__(datapipe) >>> self.graph = graph >>> self.fanouts = fanouts >>> def sample_subgraphs(self, seeds): >>> # Sample subgraphs from the given seeds. >>> subgraphs = [] >>> subgraphs_nodes = [] >>> for fanout in reversed(self.fanouts): >>> subgraph = self.graph.sample_neighbors(seeds, fanout) >>> subgraphs.insert(0, subgraph) >>> subgraphs_nodes.append(subgraph.nodes) >>> seeds = subgraph.nodes >>> subgraphs_nodes = torch.unique(torch.cat(subgraphs_nodes)) >>> return subgraphs_nodes, subgraphs """ raise NotImplementedError ================================================ FILE: python/dgl/heterograph.py ================================================ """Classes for heterogeneous graphs.""" import copy import itertools import numbers # pylint: disable= too-many-lines from collections import defaultdict from collections.abc import Iterable, Mapping from contextlib import contextmanager import networkx as nx import numpy as np from . import backend as F, core, graph_index, heterograph_index, utils from ._ffi.function import _init_api from .base import ( ALL, dgl_warning, DGLError, EID, ETYPE, is_all, NID, NTYPE, SLICE_FULL, ) from .frame import Frame from .ops import segment from .view import ( HeteroEdgeDataView, HeteroEdgeView, HeteroNodeDataView, HeteroNodeView, ) __all__ = ["DGLGraph", "combine_names"] class DGLGraph(object): """Class for storing graph structure and node/edge feature data. There are a few ways to create a DGLGraph: * To create a homogeneous graph from Tensor data, use :func:`dgl.graph`. * To create a heterogeneous graph from Tensor data, use :func:`dgl.heterograph`. * To create a graph from other data sources, use ``dgl.*`` create ops. See :ref:`api-graph-create-ops`. Read the user guide chapter :ref:`guide-graph` for an in-depth explanation about its usage. """ is_block = False # pylint: disable=unused-argument, dangerous-default-value def __init__( self, gidx=[], ntypes=["_N"], etypes=["_E"], node_frames=None, edge_frames=None, **deprecate_kwargs ): """Internal constructor for creating a DGLGraph. Parameters ---------- gidx : HeteroGraphIndex Graph index object. ntypes : list of str, pair of list of str Node type list. ``ntypes[i]`` stores the name of node type i. If a pair is given, the graph created is a uni-directional bipartite graph, and its SRC node types and DST node types are given as in the pair. etypes : list of str Edge type list. ``etypes[i]`` stores the name of edge type i. node_frames : list[Frame], optional Node feature storage. If None, empty frame is created. Otherwise, ``node_frames[i]`` stores the node features of node type i. (default: None) edge_frames : list[Frame], optional Edge feature storage. If None, empty frame is created. Otherwise, ``edge_frames[i]`` stores the edge features of edge type i. (default: None) """ if isinstance(gidx, DGLGraph): raise DGLError( "The input is already a DGLGraph. No need to create it again." ) if not isinstance(gidx, heterograph_index.HeteroGraphIndex): dgl_warning( "Recommend creating graphs by `dgl.graph(data)`" " instead of `dgl.DGLGraph(data)`." ) (sparse_fmt, arrays), num_src, num_dst = utils.graphdata2tensors( gidx ) if sparse_fmt == "coo": gidx = heterograph_index.create_unitgraph_from_coo( 1, num_src, num_dst, arrays[0], arrays[1], ["coo", "csr", "csc"], ) else: gidx = heterograph_index.create_unitgraph_from_csr( 1, num_src, num_dst, arrays[0], arrays[1], arrays[2], ["coo", "csr", "csc"], sparse_fmt == "csc", ) if len(deprecate_kwargs) != 0: dgl_warning( "Keyword arguments {} are deprecated in v0.5, and can be safely" " removed in all cases.".format(list(deprecate_kwargs.keys())) ) self._init(gidx, ntypes, etypes, node_frames, edge_frames) def _init(self, gidx, ntypes, etypes, node_frames, edge_frames): """Init internal states.""" self._graph = gidx self._canonical_etypes = None self._batch_num_nodes = None self._batch_num_edges = None # Handle node types if isinstance(ntypes, tuple): if len(ntypes) != 2: errmsg = "Invalid input. Expect a pair (srctypes, dsttypes) but got {}".format( ntypes ) raise TypeError(errmsg) if not self._graph.is_metagraph_unibipartite(): raise ValueError( "Invalid input. The metagraph must be a uni-directional" " bipartite graph." ) self._ntypes = ntypes[0] + ntypes[1] self._srctypes_invmap = {t: i for i, t in enumerate(ntypes[0])} self._dsttypes_invmap = { t: i + len(ntypes[0]) for i, t in enumerate(ntypes[1]) } self._is_unibipartite = True if len(ntypes[0]) == 1 and len(ntypes[1]) == 1 and len(etypes) == 1: self._canonical_etypes = [ (ntypes[0][0], etypes[0], ntypes[1][0]) ] else: self._ntypes = ntypes if len(ntypes) == 1: src_dst_map = None else: src_dst_map = find_src_dst_ntypes( self._ntypes, self._graph.metagraph ) self._is_unibipartite = src_dst_map is not None if self._is_unibipartite: self._srctypes_invmap, self._dsttypes_invmap = src_dst_map else: self._srctypes_invmap = { t: i for i, t in enumerate(self._ntypes) } self._dsttypes_invmap = self._srctypes_invmap # Handle edge types self._etypes = etypes if self._canonical_etypes is None: if len(etypes) == 1 and len(ntypes) == 1: self._canonical_etypes = [(ntypes[0], etypes[0], ntypes[0])] else: self._canonical_etypes = make_canonical_etypes( self._etypes, self._ntypes, self._graph.metagraph ) # An internal map from etype to canonical etype tuple. # If two etypes have the same name, an empty tuple is stored instead to indicate # ambiguity. self._etype2canonical = {} for i, ety in enumerate(self._etypes): if ety in self._etype2canonical: self._etype2canonical[ety] = tuple() else: self._etype2canonical[ety] = self._canonical_etypes[i] self._etypes_invmap = { t: i for i, t in enumerate(self._canonical_etypes) } # node and edge frame if node_frames is None: node_frames = [None] * len(self._ntypes) node_frames = [ Frame(num_rows=self._graph.num_nodes(i)) if frame is None else frame for i, frame in enumerate(node_frames) ] self._node_frames = node_frames if edge_frames is None: edge_frames = [None] * len(self._etypes) edge_frames = [ Frame(num_rows=self._graph.num_edges(i)) if frame is None else frame for i, frame in enumerate(edge_frames) ] self._edge_frames = edge_frames def __setstate__(self, state): # Compatibility check # TODO: version the storage if isinstance(state, dict): # Since 0.5 we use the default __dict__ method self.__dict__.update(state) elif isinstance(state, tuple) and len(state) == 5: # DGL == 0.4.3 dgl_warning( "The object is pickled with DGL == 0.4.3. " "Some of the original attributes are ignored." ) self._init(*state) elif isinstance(state, dict): # DGL <= 0.4.2 dgl_warning( "The object is pickled with DGL <= 0.4.2. " "Some of the original attributes are ignored." ) self._init( state["_graph"], state["_ntypes"], state["_etypes"], state["_node_frames"], state["_edge_frames"], ) else: raise IOError("Unrecognized pickle format.") def __repr__(self): if len(self.ntypes) == 1 and len(self.etypes) == 1: ret = ( "Graph(num_nodes={node}, num_edges={edge},\n" " ndata_schemes={ndata}\n" " edata_schemes={edata})" ) return ret.format( node=self.num_nodes(), edge=self.num_edges(), ndata=str(self.node_attr_schemes()), edata=str(self.edge_attr_schemes()), ) else: ret = ( "Graph(num_nodes={node},\n" " num_edges={edge},\n" " metagraph={meta})" ) nnode_dict = { self.ntypes[i]: self._graph.num_nodes(i) for i in range(len(self.ntypes)) } nedge_dict = { self.canonical_etypes[i]: self._graph.num_edges(i) for i in range(len(self.etypes)) } meta = str(self.metagraph().edges(keys=True)) return ret.format(node=nnode_dict, edge=nedge_dict, meta=meta) def __copy__(self): """Shallow copy implementation.""" # TODO(minjie): too many states in python; should clean up and lower to C cls = type(self) obj = cls.__new__(cls) obj.__dict__.update(self.__dict__) return obj ################################################################# # Mutation operations ################################################################# def add_nodes(self, num, data=None, ntype=None): r"""Add new nodes of the same node type Parameters ---------- num : int Number of nodes to add. data : dict, optional Feature data of the added nodes. ntype : str, optional The type of the new nodes. Can be omitted if there is only one node type in the graph. Notes ----- * Inplace update is applied to the current graph. * If the key of ``data`` does not contain some existing feature fields, those features for the new nodes will be created by initializers defined with :func:`set_n_initializer` (default initializer fills zeros). * If the key of ``data`` contains new feature fields, those features for the old nodes will be created by initializers defined with :func:`set_n_initializer` (default initializer fills zeros). * This function discards the batch information. Please use :func:`dgl.DGLGraph.set_batch_num_nodes` and :func:`dgl.DGLGraph.set_batch_num_edges` on the transformed graph to maintain the information. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch **Homogeneous Graphs or Heterogeneous Graphs with A Single Node Type** >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2]))) >>> g.num_nodes() 3 >>> g.add_nodes(2) >>> g.num_nodes() 5 If the graph has some node features and new nodes are added without features, their features will be created by initializers defined with :func:`set_n_initializer`. >>> g.ndata['h'] = torch.ones(5, 1) >>> g.add_nodes(1) >>> g.ndata['h'] tensor([[1.], [1.], [1.], [1.], [1.], [0.]]) We can also assign features for the new nodes in adding new nodes. >>> g.add_nodes(1, {'h': torch.ones(1, 1), 'w': torch.ones(1, 1)}) >>> g.ndata['h'] tensor([[1.], [1.], [1.], [1.], [1.], [0.], [1.]]) Since ``data`` contains new feature fields, the features for old nodes will be created by initializers defined with :func:`set_n_initializer`. >>> g.ndata['w'] tensor([[0.], [0.], [0.], [0.], [0.], [0.], [1.]]) **Heterogeneous Graphs with Multiple Node Types** >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]), ... torch.tensor([0, 0, 1, 1])), ... ('developer', 'develops', 'game'): (torch.tensor([0, 1]), ... torch.tensor([0, 1])) ... }) >>> g.add_nodes(2) DGLError: Node type name must be specified if there are more than one node types. >>> g.num_nodes('user') 3 >>> g.add_nodes(2, ntype='user') >>> g.num_nodes('user') 5 See Also -------- remove_nodes add_edges remove_edges """ # TODO(xiangsx): block do not support add_nodes if ntype is None: if self._graph.number_of_ntypes() != 1: raise DGLError( "Node type name must be specified if there are more than one " "node types." ) # nothing happen if num == 0: return assert num > 0, "Number of new nodes should be larger than one." ntid = self.get_ntype_id(ntype) # update graph idx metagraph = self._graph.metagraph num_nodes_per_type = [] for c_ntype in self.ntypes: if self.get_ntype_id(c_ntype) == ntid: num_nodes_per_type.append(self.num_nodes(c_ntype) + num) else: num_nodes_per_type.append(self.num_nodes(c_ntype)) relation_graphs = [] for c_etype in self.canonical_etypes: # src or dst == ntype, update the relation graph if ( self.get_ntype_id(c_etype[0]) == ntid or self.get_ntype_id(c_etype[2]) == ntid ): u, v = self.edges(form="uv", order="eid", etype=c_etype) hgidx = heterograph_index.create_unitgraph_from_coo( 1 if c_etype[0] == c_etype[2] else 2, self.num_nodes(c_etype[0]) + (num if self.get_ntype_id(c_etype[0]) == ntid else 0), self.num_nodes(c_etype[2]) + (num if self.get_ntype_id(c_etype[2]) == ntid else 0), u, v, ["coo", "csr", "csc"], ) relation_graphs.append(hgidx) else: # do nothing relation_graphs.append( self._graph.get_relation_graph(self.get_etype_id(c_etype)) ) hgidx = heterograph_index.create_heterograph_from_relations( metagraph, relation_graphs, utils.toindex(num_nodes_per_type, "int64"), ) self._graph = hgidx # update data frames if data is None: # Initialize feature with :func:`set_n_initializer` self._node_frames[ntid].add_rows(num) else: self._node_frames[ntid].append(data) self._reset_cached_info() def add_edges(self, u, v, data=None, etype=None): r"""Add multiple new edges for the specified edge type The i-th new edge will be from ``u[i]`` to ``v[i]``. Parameters ---------- u : int, tensor, numpy.ndarray, list Source node IDs, ``u[i]`` gives the source node for the i-th new edge. v : int, tensor, numpy.ndarray, list Destination node IDs, ``v[i]`` gives the destination node for the i-th new edge. data : dict, optional Feature data of the added edges. The i-th row of the feature data corresponds to the i-th new edge. etype : str or tuple of str, optional The type of the new edges. Can be omitted if there is only one edge type in the graph. Notes ----- * Inplace update is applied to the current graph. * If end nodes of adding edges does not exists, add_nodes is invoked to add new nodes. The node features of the new nodes will be created by initializers defined with :func:`set_n_initializer` (default initializer fills zeros). In certain cases, it is recommanded to add_nodes first and then add_edges. * If the key of ``data`` does not contain some existing feature fields, those features for the new edges will be created by initializers defined with :func:`set_n_initializer` (default initializer fills zeros). * If the key of ``data`` contains new feature fields, those features for the old edges will be created by initializers defined with :func:`set_n_initializer` (default initializer fills zeros). * This function discards the batch information. Please use :func:`dgl.DGLGraph.set_batch_num_nodes` and :func:`dgl.DGLGraph.set_batch_num_edges` on the transformed graph to maintain the information. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch **Homogeneous Graphs or Heterogeneous Graphs with A Single Edge Type** >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2]))) >>> g.num_edges() 2 >>> g.add_edges(torch.tensor([1, 3]), torch.tensor([0, 1])) >>> g.num_edges() 4 Since ``u`` or ``v`` contains a non-existing node ID, the nodes are added implicitly. >>> g.num_nodes() 4 If the graph has some edge features and new edges are added without features, their features will be created by initializers defined with :func:`set_n_initializer`. >>> g.edata['h'] = torch.ones(4, 1) >>> g.add_edges(torch.tensor([1]), torch.tensor([1])) >>> g.edata['h'] tensor([[1.], [1.], [1.], [1.], [0.]]) We can also assign features for the new edges in adding new edges. >>> g.add_edges(torch.tensor([0, 0]), torch.tensor([2, 2]), ... {'h': torch.tensor([[1.], [2.]]), 'w': torch.ones(2, 1)}) >>> g.edata['h'] tensor([[1.], [1.], [1.], [1.], [0.], [1.], [2.]]) Since ``data`` contains new feature fields, the features for old edges will be created by initializers defined with :func:`set_n_initializer`. >>> g.edata['w'] tensor([[0.], [0.], [0.], [0.], [0.], [1.], [1.]]) **Heterogeneous Graphs with Multiple Edge Types** >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]), ... torch.tensor([0, 0, 1, 1])), ... ('developer', 'develops', 'game'): (torch.tensor([0, 1]), ... torch.tensor([0, 1])) ... }) >>> g.add_edges(torch.tensor([3]), torch.tensor([3])) DGLError: Edge type name must be specified if there are more than one edge types. >>> g.num_edges('plays') 4 >>> g.add_edges(torch.tensor([3]), torch.tensor([3]), etype='plays') >>> g.num_edges('plays') 5 See Also -------- add_nodes remove_nodes remove_edges """ # TODO(xiangsx): block do not support add_edges u = utils.prepare_tensor(self, u, "u") v = utils.prepare_tensor(self, v, "v") if etype is None: if self._graph.number_of_etypes() != 1: raise DGLError( "Edge type name must be specified if there are more than one " "edge types." ) # nothing changed if len(u) == 0 or len(v) == 0: return assert len(u) == len(v) or len(u) == 1 or len(v) == 1, ( "The number of source nodes and the number of destination nodes should be same, " "or either the number of source nodes or the number of destination nodes is 1." ) if len(u) == 1 and len(v) > 1: u = F.full_1d( len(v), F.as_scalar(u), dtype=F.dtype(u), ctx=F.context(u) ) if len(v) == 1 and len(u) > 1: v = F.full_1d( len(u), F.as_scalar(v), dtype=F.dtype(v), ctx=F.context(v) ) u_type, e_type, v_type = self.to_canonical_etype(etype) # if end nodes of adding edges does not exists # use add_nodes to add new nodes first. num_of_u = self.num_nodes(u_type) num_of_v = self.num_nodes(v_type) u_max = F.as_scalar(F.max(u, dim=0)) + 1 v_max = F.as_scalar(F.max(v, dim=0)) + 1 if u_type == v_type: num_nodes = max(u_max, v_max) if num_nodes > num_of_u: self.add_nodes(num_nodes - num_of_u, ntype=u_type) else: if u_max > num_of_u: self.add_nodes(u_max - num_of_u, ntype=u_type) if v_max > num_of_v: self.add_nodes(v_max - num_of_v, ntype=v_type) # metagraph is not changed metagraph = self._graph.metagraph num_nodes_per_type = [] for ntype in self.ntypes: num_nodes_per_type.append(self.num_nodes(ntype)) # update graph idx relation_graphs = [] for c_etype in self.canonical_etypes: # the target edge type if c_etype == (u_type, e_type, v_type): old_u, old_v = self.edges(form="uv", order="eid", etype=c_etype) hgidx = heterograph_index.create_unitgraph_from_coo( 1 if u_type == v_type else 2, self.num_nodes(u_type), self.num_nodes(v_type), F.cat([old_u, u], dim=0), F.cat([old_v, v], dim=0), ["coo", "csr", "csc"], ) relation_graphs.append(hgidx) else: # do nothing # Note: node range change has been handled in add_nodes() relation_graphs.append( self._graph.get_relation_graph(self.get_etype_id(c_etype)) ) hgidx = heterograph_index.create_heterograph_from_relations( metagraph, relation_graphs, utils.toindex(num_nodes_per_type, "int64"), ) self._graph = hgidx # handle data etid = self.get_etype_id(etype) if data is None: self._edge_frames[etid].add_rows(len(u)) else: self._edge_frames[etid].append(data) self._reset_cached_info() def remove_edges(self, eids, etype=None, store_ids=False): r"""Remove multiple edges with the specified edge type Nodes will not be removed. After removing edges, the rest edges will be re-indexed using consecutive integers from 0, with their relative order preserved. The features for the removed edges will be removed accordingly. Parameters ---------- eids : int, tensor, numpy.ndarray, list IDs for the edges to remove. etype : str or tuple of str, optional The type of the edges to remove. Can be omitted if there is only one edge type in the graph. store_ids : bool, optional If True, it will store the raw IDs of the extracted nodes and edges in the ``ndata`` and ``edata`` of the resulting graph under name ``dgl.NID`` and ``dgl.EID``, respectively. Notes ----- This function preserves the batch information. Examples -------- >>> import dgl >>> import torch **Homogeneous Graphs or Heterogeneous Graphs with A Single Edge Type** >>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([0, 1, 2]))) >>> g.edata['he'] = torch.arange(3).float().reshape(-1, 1) >>> g.remove_edges(torch.tensor([0, 1])) >>> g Graph(num_nodes=3, num_edges=1, ndata_schemes={} edata_schemes={'he': Scheme(shape=(1,), dtype=torch.float32)}) >>> g.edges('all') (tensor([2]), tensor([2]), tensor([0])) >>> g.edata['he'] tensor([[2.]]) Removing edges from a batched graph preserves batch information. >>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([0, 1, 2]))) >>> g2 = dgl.graph((torch.tensor([1, 2, 3]), torch.tensor([1, 3, 4]))) >>> bg = dgl.batch([g, g2]) >>> bg.batch_num_edges() tensor([3, 3]) >>> bg.remove_edges([1, 4]) >>> bg.batch_num_edges() tensor([2, 2]) **Heterogeneous Graphs with Multiple Edge Types** >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]), ... torch.tensor([0, 0, 1, 1])), ... ('developer', 'develops', 'game'): (torch.tensor([0, 1]), ... torch.tensor([0, 1])) ... }) >>> g.remove_edges(torch.tensor([0, 1])) DGLError: Edge type name must be specified if there are more than one edge types. >>> g.remove_edges(torch.tensor([0, 1]), 'plays') >>> g.edges('all', etype='plays') (tensor([0, 1]), tensor([0, 0]), tensor([0, 1])) See Also -------- add_nodes add_edges remove_nodes """ # TODO(xiangsx): block do not support remove_edges if etype is None: if self._graph.number_of_etypes() != 1: raise DGLError( "Edge type name must be specified if there are more than one " "edge types." ) eids = utils.prepare_tensor(self, eids, "u") if len(eids) == 0: # no edge to delete return assert self.num_edges(etype) > F.as_scalar( F.max(eids, dim=0) ), "The input eid {} is out of the range [0:{})".format( F.as_scalar(F.max(eids, dim=0)), self.num_edges(etype) ) # edge_subgraph edges = {} u_type, e_type, v_type = self.to_canonical_etype(etype) for c_etype in self.canonical_etypes: # the target edge type if c_etype == (u_type, e_type, v_type): origin_eids = self.edges(form="eid", order="eid", etype=c_etype) edges[c_etype] = utils.compensate(eids, origin_eids) else: edges[c_etype] = self.edges( form="eid", order="eid", etype=c_etype ) # If the graph is batched, update batch_num_edges batched = self._batch_num_edges is not None if batched: c_etype = (u_type, e_type, v_type) one_hot_removed_edges = F.zeros( (self.num_edges(c_etype),), F.float32, self.device ) one_hot_removed_edges = F.scatter_row( one_hot_removed_edges, eids, F.full_1d(len(eids), 1.0, F.float32, self.device), ) c_etype_batch_num_edges = self._batch_num_edges[c_etype] batch_num_removed_edges = segment.segment_reduce( c_etype_batch_num_edges, one_hot_removed_edges, reducer="sum" ) self._batch_num_edges[c_etype] = c_etype_batch_num_edges - F.astype( batch_num_removed_edges, self.idtype ) sub_g = self.edge_subgraph( edges, relabel_nodes=False, store_ids=store_ids ) self._graph = sub_g._graph self._node_frames = sub_g._node_frames self._edge_frames = sub_g._edge_frames def remove_nodes(self, nids, ntype=None, store_ids=False): r"""Remove multiple nodes with the specified node type Edges that connect to the nodes will be removed as well. After removing nodes and edges, the rest nodes and edges will be re-indexed using consecutive integers from 0, with their relative order preserved. The features for the removed nodes/edges will be removed accordingly. Parameters ---------- nids : int, tensor, numpy.ndarray, list Nodes to remove. ntype : str, optional The type of the nodes to remove. Can be omitted if there is only one node type in the graph. store_ids : bool, optional If True, it will store the raw IDs of the extracted nodes and edges in the ``ndata`` and ``edata`` of the resulting graph under name ``dgl.NID`` and ``dgl.EID``, respectively. Notes ----- This function preserves the batch information. Examples -------- >>> import dgl >>> import torch **Homogeneous Graphs or Heterogeneous Graphs with A Single Node Type** >>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([0, 1, 2]))) >>> g.ndata['hv'] = torch.arange(3).float().reshape(-1, 1) >>> g.edata['he'] = torch.arange(3).float().reshape(-1, 1) >>> g.remove_nodes(torch.tensor([0, 1])) >>> g Graph(num_nodes=1, num_edges=1, ndata_schemes={'hv': Scheme(shape=(1,), dtype=torch.float32)} edata_schemes={'he': Scheme(shape=(1,), dtype=torch.float32)}) >>> g.ndata['hv'] tensor([[2.]]) >>> g.edata['he'] tensor([[2.]]) Removing nodes from a batched graph preserves batch information. >>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([0, 1, 2]))) >>> g2 = dgl.graph((torch.tensor([1, 2, 3]), torch.tensor([1, 3, 4]))) >>> bg = dgl.batch([g, g2]) >>> bg.batch_num_nodes() tensor([3, 5]) >>> bg.remove_nodes([1, 4]) >>> bg.batch_num_nodes() tensor([2, 4]) >>> bg.batch_num_edges() tensor([2, 2]) **Heterogeneous Graphs with Multiple Node Types** >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]), ... torch.tensor([0, 0, 1, 1])), ... ('developer', 'develops', 'game'): (torch.tensor([0, 1]), ... torch.tensor([0, 1])) ... }) >>> g.remove_nodes(torch.tensor([0, 1])) DGLError: Node type name must be specified if there are more than one node types. >>> g.remove_nodes(torch.tensor([0, 1]), ntype='game') >>> g.num_nodes('user') 3 >>> g.num_nodes('game') 0 >>> g.num_edges('plays') 0 See Also -------- add_nodes add_edges remove_edges """ # TODO(xiangsx): block do not support remove_nodes if ntype is None: if self._graph.number_of_ntypes() != 1: raise DGLError( "Node type name must be specified if there are more than one " "node types." ) nids = utils.prepare_tensor(self, nids, "u") if len(nids) == 0: # no node to delete return assert self.num_nodes(ntype) > F.as_scalar( F.max(nids, dim=0) ), "The input nids {} is out of the range [0:{})".format( F.as_scalar(F.max(nids, dim=0)), self.num_nodes(ntype) ) ntid = self.get_ntype_id(ntype) nodes = {} for c_ntype in self.ntypes: if self.get_ntype_id(c_ntype) == ntid: target_ntype = c_ntype original_nids = self.nodes(c_ntype) nodes[c_ntype] = utils.compensate(nids, original_nids) else: nodes[c_ntype] = self.nodes(c_ntype) # If the graph is batched, update batch_num_nodes batched = self._batch_num_nodes is not None if batched: one_hot_removed_nodes = F.zeros( (self.num_nodes(target_ntype),), F.float32, self.device ) one_hot_removed_nodes = F.scatter_row( one_hot_removed_nodes, nids, F.full_1d(len(nids), 1.0, F.float32, self.device), ) c_ntype_batch_num_nodes = self._batch_num_nodes[target_ntype] batch_num_removed_nodes = segment.segment_reduce( c_ntype_batch_num_nodes, one_hot_removed_nodes, reducer="sum" ) self._batch_num_nodes[ target_ntype ] = c_ntype_batch_num_nodes - F.astype( batch_num_removed_nodes, self.idtype ) # Record old num_edges to check later whether some edges were removed old_num_edges = { c_etype: self._graph.num_edges(self.get_etype_id(c_etype)) for c_etype in self.canonical_etypes } # node_subgraph # If batch_num_edges is to be updated, record the original edge IDs sub_g = self.subgraph(nodes, store_ids=store_ids or batched) self._graph = sub_g._graph self._node_frames = sub_g._node_frames self._edge_frames = sub_g._edge_frames # If the graph is batched, update batch_num_edges if batched: canonical_etypes = [ c_etype for c_etype in self.canonical_etypes if self._graph.num_edges(self.get_etype_id(c_etype)) != old_num_edges[c_etype] ] for c_etype in canonical_etypes: if self._graph.num_edges(self.get_etype_id(c_etype)) == 0: self._batch_num_edges[c_etype] = F.zeros( (self.batch_size,), self.idtype, self.device ) continue one_hot_left_edges = F.zeros( (old_num_edges[c_etype],), F.float32, self.device ) eids = self.edges[c_etype].data[EID] one_hot_left_edges = F.scatter_row( one_hot_left_edges, eids, F.full_1d(len(eids), 1.0, F.float32, self.device), ) batch_num_left_edges = segment.segment_reduce( self._batch_num_edges[c_etype], one_hot_left_edges, reducer="sum", ) self._batch_num_edges[c_etype] = F.astype( batch_num_left_edges, self.idtype ) if batched and not store_ids: for c_ntype in self.ntypes: self.nodes[c_ntype].data.pop(NID) for c_etype in self.canonical_etypes: self.edges[c_etype].data.pop(EID) def _reset_cached_info(self): """Some info like batch_num_nodes may be stale after mutation Clean these cached info """ self._batch_num_nodes = None self._batch_num_edges = None ################################################################# # Metagraph query ################################################################# @property def is_unibipartite(self): """Return whether the graph is a uni-bipartite graph. A uni-bipartite heterograph can further divide its node types into two sets: SRC and DST. All edges are from nodes in SRC to nodes in DST. The following APIs can be used to get the type, data, and nodes that belong to SRC and DST sets: * :func:`srctype` and :func:`dsttype` * :func:`srcdata` and :func:`dstdata` * :func:`srcnodes` and :func:`dstnodes` Note that we allow two node types to have the same name as long as one belongs to SRC while the other belongs to DST. To distinguish them, prepend the name with ``"SRC/"`` or ``"DST/"`` when specifying a node type. """ return self._is_unibipartite @property def ntypes(self): """Return all the node type names in the graph. Returns ------- list[str] All the node type names in a list. Notes ----- DGL internally assigns an integer ID for each node type. The returned node type names are sorted according to their IDs. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])), ... ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])), ... ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3])) ... }) >>> g.ntypes ['game', 'user'] """ return self._ntypes @property def etypes(self): """Return all the edge type names in the graph. Returns ------- list[str] All the edge type names in a list. Notes ----- DGL internally assigns an integer ID for each edge type. The returned edge type names are sorted according to their IDs. The complete format to specify an relation is a string triplet ``(str, str, str)`` for source node type, edge type and destination node type. DGL calls this format *canonical edge type*. An edge type can appear in multiple canonical edge types. For example, ``'interacts'`` can appear in two canonical edge types ``('drug', 'interacts', 'drug')`` and ``('protein', 'interacts', 'protein')``. See Also -------- canonical_etypes Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])), ... ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])), ... ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3])) ... }) >>> g.etypes ['follows', 'follows', 'plays'] """ return self._etypes @property def canonical_etypes(self): """Return all the canonical edge types in the graph. A canonical edge type is a string triplet ``(str, str, str)`` for source node type, edge type and destination node type. Returns ------- list[(str, str, str)] All the canonical edge type triplets in a list. Notes ----- DGL internally assigns an integer ID for each edge type. The returned edge type names are sorted according to their IDs. See Also -------- etypes Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])), ... ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])), ... ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3])) ... }) >>> g.canonical_etypes [('user', 'follows', 'user'), ('user', 'follows', 'game'), ('user', 'plays', 'game')] """ return self._canonical_etypes @property def srctypes(self): """Return all the source node type names in this graph. If the graph can further divide its node types into two subsets A and B where all the edeges are from nodes of types in A to nodes of types in B, we call this graph a *uni-bipartite* graph and the nodes in A being the *source* nodes and the ones in B being the *destination* nodes. If the graph is not uni-bipartite, the source and destination nodes are just the entire set of nodes in the graph. Returns ------- list[str] All the source node type names in a list. See Also -------- dsttypes is_unibipartite Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Query for a uni-bipartite graph. >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): (torch.tensor([0]), torch.tensor([1])), ... ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2])) ... }) >>> g.srctypes ['developer', 'user'] Query for a graph that is not uni-bipartite. >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([0]), torch.tensor([1])), ... ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2])) ... }) >>> g.srctypes ['developer', 'game', 'user'] """ if self.is_unibipartite: return sorted(list(self._srctypes_invmap.keys())) else: return self.ntypes @property def dsttypes(self): """Return all the destination node type names in this graph. If the graph can further divide its node types into two subsets A and B where all the edeges are from nodes of types in A to nodes of types in B, we call this graph a *uni-bipartite* graph and the nodes in A being the *source* nodes and the ones in B being the *destination* nodes. If the graph is not uni-bipartite, the source and destination nodes are just the entire set of nodes in the graph. Returns ------- list[str] All the destination node type names in a list. See Also -------- srctypes is_unibipartite Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Query for a uni-bipartite graph. >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): (torch.tensor([0]), torch.tensor([1])), ... ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2])) ... }) >>> g.dsttypes ['game'] Query for a graph that is not uni-bipartite. >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([0]), torch.tensor([1])), ... ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2])) ... }) >>> g.dsttypes ['developer', 'game', 'user'] """ if self.is_unibipartite: return sorted(list(self._dsttypes_invmap.keys())) else: return self.ntypes def metagraph(self): """Return the metagraph of the heterograph. The metagraph (or network schema) of a heterogeneous network specifies type constraints on the sets of nodes and edges between the nodes. For a formal definition, refer to `Yizhou et al. `_. Returns ------- networkx.MultiDiGraph The metagraph. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])), ... ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])), ... ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3])) ... }) >>> meta_g = g.metagraph() >>> meta_g.nodes() NodeView(('user', 'game')) >>> meta_g.edges() OutMultiEdgeDataView([('user', 'user'), ('user', 'game'), ('user', 'game')]) """ nx_graph = self._graph.metagraph.to_networkx() nx_metagraph = nx.MultiDiGraph() for u_v in nx_graph.edges: srctype, etype, dsttype = self.canonical_etypes[ nx_graph.edges[u_v]["id"] ] nx_metagraph.add_edge(srctype, dsttype, etype) return nx_metagraph def to_canonical_etype(self, etype): """Convert an edge type to the corresponding canonical edge type in the graph. A canonical edge type is a string triplet ``(str, str, str)`` for source node type, edge type and destination node type. The function expects the given edge type name can uniquely identify a canonical edge type. DGL will raise error if this is not the case. Parameters ---------- etype : str or (str, str, str) If :attr:`etype` is an edge type (str), it returns the corresponding canonical edge type in the graph. If :attr:`etype` is already a canonical edge type, it directly returns the input unchanged. Returns ------- (str, str, str) The canonical edge type corresponding to the edge type. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Create a heterograph. >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): ([0, 1], [1, 2]), ... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 1, 1]), ... ('developer', 'follows', 'game'): ([0, 1], [0, 1]) ... }) Map an edge type to its corresponding canonical edge type. >>> g.to_canonical_etype('plays') ('user', 'plays', 'game') >>> g.to_canonical_etype(('user', 'plays', 'game')) ('user', 'plays', 'game') See Also -------- canonical_etypes """ if etype is None: if len(self.etypes) != 1: raise DGLError( "Edge type name must be specified if there are more than one " "edge types." ) etype = self.etypes[0] if isinstance(etype, tuple): return etype else: ret = self._etype2canonical.get(etype, None) if ret is None: raise DGLError('Edge type "{}" does not exist.'.format(etype)) if len(ret) == 0: raise DGLError( 'Edge type "%s" is ambiguous. Please use canonical edge type ' "in the form of (srctype, etype, dsttype)" % etype ) return ret def get_ntype_id(self, ntype): """Return the ID of the given node type. ntype can also be None. If so, there should be only one node type in the graph. Parameters ---------- ntype : str Node type Returns ------- int """ if self.is_unibipartite and ntype is not None: # Only check 'SRC/' and 'DST/' prefix when is_unibipartite graph is True. if ntype.startswith("SRC/"): return self.get_ntype_id_from_src(ntype[4:]) elif ntype.startswith("DST/"): return self.get_ntype_id_from_dst(ntype[4:]) # If there is no prefix, fallback to normal lookup. # Lookup both SRC and DST if ntype is None: if self.is_unibipartite or len(self._srctypes_invmap) != 1: raise DGLError( "Node type name must be specified if there are more than one " "node types." ) return 0 ntid = self._srctypes_invmap.get( ntype, self._dsttypes_invmap.get(ntype, None) ) if ntid is None: raise DGLError('Node type "{}" does not exist.'.format(ntype)) return ntid def get_ntype_id_from_src(self, ntype): """Internal function to return the ID of the given SRC node type. ntype can also be None. If so, there should be only one node type in the SRC category. Callable even when the self graph is not uni-bipartite. Parameters ---------- ntype : str Node type Returns ------- int """ if ntype is None: if len(self._srctypes_invmap) != 1: raise DGLError( "SRC node type name must be specified if there are more than one " "SRC node types." ) return next(iter(self._srctypes_invmap.values())) ntid = self._srctypes_invmap.get(ntype, None) if ntid is None: raise DGLError('SRC node type "{}" does not exist.'.format(ntype)) return ntid def get_ntype_id_from_dst(self, ntype): """Internal function to return the ID of the given DST node type. ntype can also be None. If so, there should be only one node type in the DST category. Callable even when the self graph is not uni-bipartite. Parameters ---------- ntype : str Node type Returns ------- int """ if ntype is None: if len(self._dsttypes_invmap) != 1: raise DGLError( "DST node type name must be specified if there are more than one " "DST node types." ) return next(iter(self._dsttypes_invmap.values())) ntid = self._dsttypes_invmap.get(ntype, None) if ntid is None: raise DGLError('DST node type "{}" does not exist.'.format(ntype)) return ntid def get_etype_id(self, etype): """Return the id of the given edge type. etype can also be None. If so, there should be only one edge type in the graph. Parameters ---------- etype : str or tuple of str Edge type Returns ------- int """ if etype is None: if self._graph.number_of_etypes() != 1: raise DGLError( "Edge type name must be specified if there are more than one " "edge types." ) return 0 etid = self._etypes_invmap.get(self.to_canonical_etype(etype), None) if etid is None: raise DGLError('Edge type "{}" does not exist.'.format(etype)) return etid ################################################################# # Batching ################################################################# @property def batch_size(self): """Return the number of graphs in the batched graph. Returns ------- int The Number of graphs in the batch. If the graph is not a batched one, it will return 1. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Query for homogeneous graphs. >>> g1 = dgl.graph((torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3]))) >>> g1.batch_size 1 >>> g2 = dgl.graph((torch.tensor([0, 0, 0, 1]), torch.tensor([0, 1, 2, 0]))) >>> bg = dgl.batch([g1, g2]) >>> bg.batch_size 2 Query for heterogeneous graphs. >>> hg1 = dgl.heterograph({ ... ('user', 'plays', 'game') : (torch.tensor([0, 1]), torch.tensor([0, 0]))}) >>> hg1.batch_size 1 >>> hg2 = dgl.heterograph({ ... ('user', 'plays', 'game') : (torch.tensor([0, 0]), torch.tensor([1, 0]))}) >>> bg = dgl.batch([hg1, hg2]) >>> bg.batch_size 2 """ return len(self.batch_num_nodes(self.ntypes[0])) def batch_num_nodes(self, ntype=None): """Return the number of nodes for each graph in the batch with the specified node type. Parameters ---------- ntype : str, optional The node type for query. If the graph has multiple node types, one must specify the argument. Otherwise, it can be omitted. If the graph is not a batched one, it will return a list of length 1 that holds the number of nodes in the graph. Returns ------- Tensor The number of nodes with the specified type for each graph in the batch. The i-th element of it is the number of nodes with the specified type for the i-th graph. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Query for homogeneous graphs. >>> g1 = dgl.graph((torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3]))) >>> g1.batch_num_nodes() tensor([4]) >>> g2 = dgl.graph((torch.tensor([0, 0, 0, 1]), torch.tensor([0, 1, 2, 0]))) >>> bg = dgl.batch([g1, g2]) >>> bg.batch_num_nodes() tensor([4, 3]) Query for heterogeneous graphs. >>> hg1 = dgl.heterograph({ ... ('user', 'plays', 'game') : (torch.tensor([0, 1]), torch.tensor([0, 0]))}) >>> hg2 = dgl.heterograph({ ... ('user', 'plays', 'game') : (torch.tensor([0, 0]), torch.tensor([1, 0]))}) >>> bg = dgl.batch([hg1, hg2]) >>> bg.batch_num_nodes('user') tensor([2, 1]) """ if ntype is not None and ntype not in self.ntypes: raise DGLError( "Expect ntype in {}, got {}".format(self.ntypes, ntype) ) if self._batch_num_nodes is None: self._batch_num_nodes = {} for ty in self.ntypes: bnn = F.copy_to( F.tensor([self.num_nodes(ty)], self.idtype), self.device ) self._batch_num_nodes[ty] = bnn if ntype is None: if len(self.ntypes) != 1: raise DGLError( "Node type name must be specified if there are more than one " "node types." ) ntype = self.ntypes[0] return self._batch_num_nodes[ntype] def set_batch_num_nodes(self, val): """Manually set the number of nodes for each graph in the batch with the specified node type. Parameters ---------- val : Tensor or Mapping[str, Tensor] The dictionary storing number of nodes for each graph in the batch for all node types. If the graph has only one node type, ``val`` can also be a single array indicating the number of nodes per graph in the batch. Notes ----- This API is always used together with ``set_batch_num_edges`` to specify batching information of a graph, it also do not check the correspondance between the graph structure and batching information and user must guarantee there will be no cross-graph edges in the batch. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Create a homogeneous graph. >>> g = dgl.graph(([0, 1, 2, 3, 4, 5], [1, 2, 0, 4, 5, 3])) Manually set batch information >>> g.set_batch_num_nodes(torch.tensor([3, 3])) >>> g.set_batch_num_edges(torch.tensor([3, 3])) Unbatch the graph. >>> dgl.unbatch(g) [Graph(num_nodes=3, num_edges=3, ndata_schemes={} edata_schemes={}), Graph(num_nodes=3, num_edges=3, ndata_schemes={} edata_schemes={})] Create a heterogeneous graph. >>> hg = dgl.heterograph({ ... ('user', 'plays', 'game') : ([0, 1, 2, 3, 4, 5], [0, 1, 1, 3, 3, 2]), ... ('developer', 'develops', 'game') : ([0, 1, 2, 3], [1, 0, 3, 2])}) Manually set batch information. >>> hg.set_batch_num_nodes({ ... 'user': torch.tensor([3, 3]), ... 'game': torch.tensor([2, 2]), ... 'developer': torch.tensor([2, 2])}) >>> hg.set_batch_num_edges({ ... ('user', 'plays', 'game'): torch.tensor([3, 3]), ... ('developer', 'develops', 'game'): torch.tensor([2, 2])}) Unbatch the graph. >>> g1, g2 = dgl.unbatch(hg) >>> g1 Graph(num_nodes={'developer': 2, 'game': 2, 'user': 3}, num_edges={('developer', 'develops', 'game'): 2, ('user', 'plays', 'game'): 3}, metagraph=[('developer', 'game', 'develops'), ('user', 'game', 'plays')]) >>> g2 Graph(num_nodes={'developer': 2, 'game': 2, 'user': 3}, num_edges={('developer', 'develops', 'game'): 2, ('user', 'plays', 'game'): 3}, metagraph=[('developer', 'game', 'develops'), ('user', 'game', 'plays')]) See Also -------- set_batch_num_edges batch unbatch """ val = utils.prepare_tensor_or_dict(self, val, "batch_num_nodes") if not isinstance(val, Mapping): if len(self.ntypes) != 1: raise DGLError( "Must provide a dictionary when there are multiple node types." ) val = {self.ntypes[0]: val} self._batch_num_nodes = val def batch_num_edges(self, etype=None): """Return the number of edges for each graph in the batch with the specified edge type. Parameters ---------- etype : str or tuple of str, optional The edge type for query, which can be an edge type (str) or a canonical edge type (3-tuple of str). When an edge type appears in multiple canonical edge types, one must use a canonical edge type. If the graph has multiple edge types, one must specify the argument. Otherwise, it can be omitted. Returns ------- Tensor The number of edges with the specified type for each graph in the batch. The i-th element of it is the number of edges with the specified type for the i-th graph. If the graph is not a batched one, it will return a list of length 1 that holds the number of edges in the graph. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Query for homogeneous graphs. >>> g1 = dgl.graph((torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3]))) >>> g1.batch_num_edges() tensor([3]) >>> g2 = dgl.graph((torch.tensor([0, 0, 0, 1]), torch.tensor([0, 1, 2, 0]))) >>> bg = dgl.batch([g1, g2]) >>> bg.batch_num_edges() tensor([3, 4]) Query for heterogeneous graphs. >>> hg1 = dgl.heterograph({ ... ('user', 'plays', 'game') : (torch.tensor([0, 1]), torch.tensor([0, 0]))}) >>> hg2 = dgl.heterograph({ ... ('user', 'plays', 'game') : (torch.tensor([0, 0]), torch.tensor([1, 0]))}) >>> bg = dgl.batch([hg1, hg2]) >>> bg.batch_num_edges('plays') tensor([2, 2]) """ if self._batch_num_edges is None: self._batch_num_edges = {} for ty in self.canonical_etypes: bne = F.copy_to( F.tensor([self.num_edges(ty)], self.idtype), self.device ) self._batch_num_edges[ty] = bne if etype is None: if len(self.etypes) != 1: raise DGLError( "Edge type name must be specified if there are more than one " "edge types." ) etype = self.canonical_etypes[0] else: etype = self.to_canonical_etype(etype) return self._batch_num_edges[etype] def set_batch_num_edges(self, val): """Manually set the number of edges for each graph in the batch with the specified edge type. Parameters ---------- val : Tensor or Mapping[str, Tensor] The dictionary storing number of edges for each graph in the batch for all edge types. If the graph has only one edge type, ``val`` can also be a single array indicating the number of edges per graph in the batch. Notes ----- This API is always used together with ``set_batch_num_nodes`` to specify batching information of a graph, it also do not check the correspondance between the graph structure and batching information and user must guarantee there will be no cross-graph edges in the batch. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Create a homogeneous graph. >>> g = dgl.graph(([0, 1, 2, 3, 4, 5], [1, 2, 0, 4, 5, 3])) Manually set batch information >>> g.set_batch_num_nodes(torch.tensor([3, 3])) >>> g.set_batch_num_edges(torch.tensor([3, 3])) Unbatch the graph. >>> dgl.unbatch(g) [Graph(num_nodes=3, num_edges=3, ndata_schemes={} edata_schemes={}), Graph(num_nodes=3, num_edges=3, ndata_schemes={} edata_schemes={})] Create a heterogeneous graph. >>> hg = dgl.heterograph({ ... ('user', 'plays', 'game') : ([0, 1, 2, 3, 4, 5], [0, 1, 1, 3, 3, 2]), ... ('developer', 'develops', 'game') : ([0, 1, 2, 3], [1, 0, 3, 2])}) Manually set batch information. >>> hg.set_batch_num_nodes({ ... 'user': torch.tensor([3, 3]), ... 'game': torch.tensor([2, 2]), ... 'developer': torch.tensor([2, 2])}) >>> hg.set_batch_num_edges( ... {('user', 'plays', 'game'): torch.tensor([3, 3]), ... ('developer', 'develops', 'game'): torch.tensor([2, 2])}) Unbatch the graph. >>> g1, g2 = dgl.unbatch(hg) >>> g1 Graph(num_nodes={'developer': 2, 'game': 2, 'user': 3}, num_edges={('developer', 'develops', 'game'): 2, ('user', 'plays', 'game'): 3}, metagraph=[('developer', 'game', 'develops'), ('user', 'game', 'plays')]) >>> g2 Graph(num_nodes={'developer': 2, 'game': 2, 'user': 3}, num_edges={('developer', 'develops', 'game'): 2, ('user', 'plays', 'game'): 3}, metagraph=[('developer', 'game', 'develops'), ('user', 'game', 'plays')]) See Also -------- set_batch_num_nodes batch unbatch """ val = utils.prepare_tensor_or_dict(self, val, "batch_num_edges") if not isinstance(val, Mapping): if len(self.etypes) != 1: raise DGLError( "Must provide a dictionary when there are multiple edge types." ) val = {self.canonical_etypes[0]: val} self._batch_num_edges = val ################################################################# # View ################################################################# def get_node_storage(self, key, ntype=None): """Get storage object of node feature of type :attr:`ntype` and name :attr:`key`.""" return self._node_frames[self.get_ntype_id(ntype)]._columns[key] def get_edge_storage(self, key, etype=None): """Get storage object of edge feature of type :attr:`etype` and name :attr:`key`.""" return self._edge_frames[self.get_etype_id(etype)]._columns[key] @property def nodes(self): """Return a node view One can use it for: 1. Getting the node IDs for a single node type. 2. Setting/getting features for all nodes of a single node type. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Create a homogeneous graph and a heterogeneous graph of two node types. >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2]))) >>> hg = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])), ... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6])) ... }) Get the node IDs of the homogeneous graph. >>> g.nodes() tensor([0, 1, 2]) Get the node IDs of the heterogeneous graph. With multiple node types introduced, one needs to specify the node type for query. >>> hg.nodes('user') tensor([0, 1, 2, 3, 4]) Set and get a feature 'h' for all nodes of a single type in the heterogeneous graph. >>> hg.nodes['user'].data['h'] = torch.ones(5, 1) >>> hg.nodes['user'].data['h'] tensor([[1.], [1.], [1.], [1.], [1.]]) To set node features for a graph with a single node type, use :func:`DGLGraph.ndata`. See Also -------- ndata """ # Todo (Mufei) Replace the syntax g.nodes[...].ndata[...] with g.nodes[...][...] return HeteroNodeView(self, self.get_ntype_id) @property def srcnodes(self): """Return a node view for source nodes If the graph is a uni-bipartite graph (see :func:`is_unibipartite` for reference), this is :func:`nodes` restricted to source node types. Otherwise, it is an alias for :func:`nodes`. One can use it for: 1. Getting the node IDs for a single node type. 2. Setting/getting features for all nodes of a single node type. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Create a uni-bipartite graph. >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): (torch.tensor([0]), torch.tensor([1])), ... ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2])) ... }) Get the node IDs for source node types. >>> g.srcnodes('user') tensor([0]) >>> g.srcnodes('developer') tensor([0, 1]) Set/get features for source node types. >>> g.srcnodes['user'].data['h'] = torch.ones(1, 1) >>> g.srcnodes['user'].data['h'] tensor([[1.]]) Create a graph that is not uni-bipartite. >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([0]), torch.tensor([1])), ... ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2])) ... }) :func:`dgl.DGLGraph.srcnodes` falls back to :func:`dgl.DGLGraph.nodes` and one can get the node IDs for both source and destination node types. >>> g.srcnodes('game') tensor([0, 1, 2]) One can also set/get features for destination node types in this case. >>> g.srcnodes['game'].data['h'] = torch.ones(3, 1) >>> g.srcnodes['game'].data['h'] tensor([[1.], [1.], [1.]]) See Also -------- srcdata """ return HeteroNodeView(self, self.get_ntype_id_from_src) @property def dstnodes(self): """Return a node view for destination nodes If the graph is a uni-bipartite graph (see :func:`is_unibipartite` for reference), this is :func:`nodes` restricted to destination node types. Otherwise, it is an alias for :func:`nodes`. One can use it for: 1. Getting the node IDs for a single node type. 2. Setting/getting features for all nodes of a single node type. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Create a uni-bipartite graph. >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): (torch.tensor([0]), torch.tensor([1])), ... ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2])) ... }) Get the node IDs for destination node types. >>> g.dstnodes('game') tensor([0, 1, 2]) Set/get features for destination node types. >>> g.dstnodes['game'].data['h'] = torch.ones(3, 1) >>> g.dstnodes['game'].data['h'] tensor([[1.], [1.], [1.]]) Create a graph that is not uni-bipartite. >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([0]), torch.tensor([1])), ... ('developer', 'develops', 'game'): (torch.tensor([1]), torch.tensor([2])) ... }) :func:`dgl.DGLGraph.dstnodes` falls back to :func:`dgl.DGLGraph.nodes` and one can get the node IDs for both source and destination node types. >>> g.dstnodes('developer') tensor([0, 1]) One can also set/get features for source node types in this case. >>> g.dstnodes['developer'].data['h'] = torch.ones(2, 1) >>> g.dstnodes['developer'].data['h'] tensor([[1.], [1.]]) See Also -------- dstdata """ return HeteroNodeView(self, self.get_ntype_id_from_dst) @property def ndata(self): """Return a node data view for setting/getting node features Let ``g`` be a DGLGraph. If ``g`` is a graph of a single node type, ``g.ndata[feat]`` returns the node feature associated with the name ``feat``. One can also set a node feature associated with the name ``feat`` by setting ``g.ndata[feat]`` to a tensor. If ``g`` is a graph of multiple node types, ``g.ndata[feat]`` returns a dict[str, Tensor] mapping node types to the node features associated with the name ``feat`` for the corresponding type. One can also set a node feature associated with the name ``feat`` for some node type(s) by setting ``g.ndata[feat]`` to a dictionary as described. Notes ----- For setting features, the device of the features must be the same as the device of the graph. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Set and get feature 'h' for a graph of a single node type. >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2]))) >>> g.ndata['h'] = torch.ones(3, 1) >>> g.ndata['h'] tensor([[1.], [1.], [1.]]) Set and get feature 'h' for a graph of multiple node types. >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([1, 2]), torch.tensor([3, 4])), ... ('player', 'plays', 'game'): (torch.tensor([2, 2]), torch.tensor([1, 1])) ... }) >>> g.ndata['h'] = {'game': torch.zeros(2, 1), 'player': torch.ones(3, 1)} >>> g.ndata['h'] {'game': tensor([[0.], [0.]]), 'player': tensor([[1.], [1.], [1.]])} >>> g.ndata['h'] = {'game': torch.ones(2, 1)} >>> g.ndata['h'] {'game': tensor([[1.], [1.]]), 'player': tensor([[1.], [1.], [1.]])} See Also -------- nodes """ if len(self.ntypes) == 1: ntid = self.get_ntype_id(None) ntype = self.ntypes[0] return HeteroNodeDataView(self, ntype, ntid, ALL) else: ntids = [self.get_ntype_id(ntype) for ntype in self.ntypes] ntypes = self.ntypes return HeteroNodeDataView(self, ntypes, ntids, ALL) @property def srcdata(self): """Return a node data view for setting/getting source node features. Let ``g`` be a DGLGraph. If ``g`` is a graph of a single source node type, ``g.srcdata[feat]`` returns the source node feature associated with the name ``feat``. One can also set a source node feature associated with the name ``feat`` by setting ``g.srcdata[feat]`` to a tensor. If ``g`` is a graph of multiple source node types, ``g.srcdata[feat]`` returns a dict[str, Tensor] mapping source node types to the node features associated with the name ``feat`` for the corresponding type. One can also set a node feature associated with the name ``feat`` for some source node type(s) by setting ``g.srcdata[feat]`` to a dictionary as described. Notes ----- For setting features, the device of the features must be the same as the device of the graph. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Set and get feature 'h' for a graph of a single source node type. >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): (torch.tensor([0, 1]), torch.tensor([1, 2]))}) >>> g.srcdata['h'] = torch.ones(2, 1) >>> g.srcdata['h'] tensor([[1.], [1.]]) Set and get feature 'h' for a graph of multiple source node types. >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): (torch.tensor([1, 2]), torch.tensor([3, 4])), ... ('player', 'plays', 'game'): (torch.tensor([2, 2]), torch.tensor([1, 1])) ... }) >>> g.srcdata['h'] = {'user': torch.zeros(3, 1), 'player': torch.ones(3, 1)} >>> g.srcdata['h'] {'player': tensor([[1.], [1.], [1.]]), 'user': tensor([[0.], [0.], [0.]])} >>> g.srcdata['h'] = {'user': torch.ones(3, 1)} >>> g.srcdata['h'] {'player': tensor([[1.], [1.], [1.]]), 'user': tensor([[1.], [1.], [1.]])} See Also -------- nodes ndata srcnodes """ if len(self.srctypes) == 1: ntype = self.srctypes[0] ntid = self.get_ntype_id_from_src(ntype) return HeteroNodeDataView(self, ntype, ntid, ALL) else: ntypes = self.srctypes ntids = [self.get_ntype_id_from_src(ntype) for ntype in ntypes] return HeteroNodeDataView(self, ntypes, ntids, ALL) @property def dstdata(self): """Return a node data view for setting/getting destination node features. Let ``g`` be a DGLGraph. If ``g`` is a graph of a single destination node type, ``g.dstdata[feat]`` returns the destination node feature associated with the name ``feat``. One can also set a destination node feature associated with the name ``feat`` by setting ``g.dstdata[feat]`` to a tensor. If ``g`` is a graph of multiple destination node types, ``g.dstdata[feat]`` returns a dict[str, Tensor] mapping destination node types to the node features associated with the name ``feat`` for the corresponding type. One can also set a node feature associated with the name ``feat`` for some destination node type(s) by setting ``g.dstdata[feat]`` to a dictionary as described. Notes ----- For setting features, the device of the features must be the same as the device of the graph. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Set and get feature 'h' for a graph of a single destination node type. >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): (torch.tensor([0, 1]), torch.tensor([1, 2]))}) >>> g.dstdata['h'] = torch.ones(3, 1) >>> g.dstdata['h'] tensor([[1.], [1.], [1.]]) Set and get feature 'h' for a graph of multiple destination node types. >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): (torch.tensor([1, 2]), torch.tensor([1, 2])), ... ('user', 'watches', 'movie'): (torch.tensor([2, 2]), torch.tensor([1, 1])) ... }) >>> g.dstdata['h'] = {'game': torch.zeros(3, 1), 'movie': torch.ones(2, 1)} >>> g.dstdata['h'] {'game': tensor([[0.], [0.], [0.]]), 'movie': tensor([[1.], [1.]])} >>> g.dstdata['h'] = {'game': torch.ones(3, 1)} >>> g.dstdata['h'] {'game': tensor([[1.], [1.], [1.]]), 'movie': tensor([[1.], [1.]])} See Also -------- nodes ndata dstnodes """ if len(self.dsttypes) == 1: ntype = self.dsttypes[0] ntid = self.get_ntype_id_from_dst(ntype) return HeteroNodeDataView(self, ntype, ntid, ALL) else: ntypes = self.dsttypes ntids = [self.get_ntype_id_from_dst(ntype) for ntype in ntypes] return HeteroNodeDataView(self, ntypes, ntids, ALL) @property def edges(self): """Return an edge view One can use it for: 1. Getting the edges for a single edge type. In this case, it can take the following optional arguments: - form : str, optional The return form, which can be one of the following: - ``'uv'`` (default): The returned result is a 2-tuple of 1D tensors :math:`(U, V)`, representing the source and destination nodes of all edges. For each :math:`i`, :math:`(U[i], V[i])` forms an edge. - ``'eid'``: The returned result is a 1D tensor :math:`EID`, representing the IDs of all edges. - ``'all'``: The returned result is a 3-tuple of 1D tensors :math:`(U, V, EID)`, representing the source nodes, destination nodes and IDs of all edges. For each :math:`i`, :math:`(U[i], V[i])` forms an edge with ID :math:`EID[i]`. - order : str, optional The order of the returned edges, which can be one of the following: - ``'eid'`` (default): The edges are sorted by their IDs. - ``'srcdst'``: The edges are sorted first by their source node IDs and then by their destination node IDs to break ties. - etype : str or tuple of str, optional The edge type for query, which can be an edge type (str) or a canonical edge type (3-tuple of str). When an edge type appears in multiple canonical edge types, one must use a canonical edge type. If the graph has multiple edge types, one must specify the argument. Otherwise, it can be omitted. 2. Setting/getting features for all edges of a single edge type. To set/get a feature ``feat`` for edges of type ``etype`` in a graph ``g``, one can use ``g.edges[etype].data[feat]``. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch **Get the Edges for a Single Edge Type** Create a graph with a single edge type. >>> g = dgl.graph((torch.tensor([1, 0, 0]), torch.tensor([1, 1, 0]))) >>> g.edges() (tensor([1, 0, 0]), tensor([1, 1, 0])) Specify a different value for :attr:`form` and :attr:`order`. >>> g.edges(form='all', order='srcdst') (tensor([0, 0, 1]), tensor([0, 1, 1]), tensor([2, 1, 0])) For a graph of multiple edge types, it is required to specify the edge type in query. >>> hg = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])), ... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6])) ... }) >>> hg.edges(etype='plays') (tensor([3, 4]), tensor([5, 6])) **Set/get Features for All Edges of a Single Edge Type** Create a heterogeneous graph of two edge types. >>> hg = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])), ... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6])) ... }) Set and get a feature 'h' for all edges of a single type in the heterogeneous graph. >>> hg.edges['follows'].data['h'] = torch.ones(2, 1) >>> hg.edges['follows'].data['h'] tensor([[1.], [1.]]) To set edge features for a graph with a single edge type, use :func:`DGLGraph.edata`. See Also -------- edata """ # TODO(Mufei): Replace the syntax g.edges[...].edata[...] with g.edges[...][...] return HeteroEdgeView(self) @property def edata(self): """Return an edge data view for setting/getting edge features. Let ``g`` be a DGLGraph. If ``g`` is a graph of a single edge type, ``g.edata[feat]`` returns the edge feature associated with the name ``feat``. One can also set an edge feature associated with the name ``feat`` by setting ``g.edata[feat]`` to a tensor. If ``g`` is a graph of multiple edge types, ``g.edata[feat]`` returns a dict[str, Tensor] mapping canonical edge types to the edge features associated with the name ``feat`` for the corresponding type. One can also set an edge feature associated with the name ``feat`` for some edge type(s) by setting ``g.edata[feat]`` to a dictionary as described. Notes ----- For setting features, the device of the features must be the same as the device of the graph. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Set and get feature 'h' for a graph of a single edge type. >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2]))) >>> g.edata['h'] = torch.ones(2, 1) >>> g.edata['h'] tensor([[1.], [1.]]) Set and get feature 'h' for a graph of multiple edge types. >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([1, 2]), torch.tensor([3, 4])), ... ('user', 'plays', 'user'): (torch.tensor([2, 2]), torch.tensor([1, 1])), ... ('player', 'plays', 'game'): (torch.tensor([2, 2]), torch.tensor([1, 1])) ... }) >>> g.edata['h'] = {('user', 'follows', 'user'): torch.zeros(2, 1), ... ('user', 'plays', 'user'): torch.ones(2, 1)} >>> g.edata['h'] {('user', 'follows', 'user'): tensor([[0.], [0.]]), ('user', 'plays', 'user'): tensor([[1.], [1.]])} >>> g.edata['h'] = {('user', 'follows', 'user'): torch.ones(2, 1)} >>> g.edata['h'] {('user', 'follows', 'user'): tensor([[1.], [1.]]), ('user', 'plays', 'user'): tensor([[1.], [1.]])} See Also -------- edges """ if len(self.canonical_etypes) == 1: return HeteroEdgeDataView(self, None, ALL) else: return HeteroEdgeDataView(self, self.canonical_etypes, ALL) def _find_etypes(self, key): etypes = [ i for i, (srctype, etype, dsttype) in enumerate( self._canonical_etypes ) if (key[0] == SLICE_FULL or key[0] == srctype) and (key[1] == SLICE_FULL or key[1] == etype) and (key[2] == SLICE_FULL or key[2] == dsttype) ] return etypes def __getitem__(self, key): """Return the relation slice of this graph. You can get a relation slice with ``self[srctype, etype, dsttype]``, where ``srctype``, ``etype``, and ``dsttype`` can be either a string or a full slice (``:``) representing wildcard (i.e. any source/edge/destination type). A relation slice is a homogeneous (with one node type and one edge type) or bipartite (with two node types and one edge type) graph, transformed from the original heterogeneous graph. If there is only one canonical edge type found, then the returned relation slice would be a subgraph induced from the original graph. That is, it is equivalent to ``self.edge_type_subgraph(etype)``. The node and edge features of the returned graph would be shared with thew original graph. If there are multiple canonical edge types found, then the source/edge/destination node types would be a *concatenation* of original node/edge types. The new source/destination node type would have the concatenation determined by :func:`dgl.combine_names() ` called on original source/destination types as its name. The source/destination node would be formed by concatenating the common features of the original source/destination types. Therefore they are not shared with the original graph. Edge type is similar. Parameters ---------- key : str or tuple Either a string representing the edge type name, or a tuple in the form of ``(srctype, etype, dsttype)`` where ``srctype``, ``etype``, ``dsttype`` can be either strings representing type names or a full slice object (`:`). Returns ------- DGLGraph The relation slice. Notes ----- This function returns a new graph. Changing the content of this graph does not reflect onto the original graph. If the graph combines multiple node types or edge types together, it will have the mapping of node/edge types and IDs from the new graph to the original graph. The mappings have the name ``dgl.NTYPE``, ``dgl.NID``, ``dgl.ETYPE`` and ``dgl.EID``, similar to the function :func:`dgl.to_homogenenous`. Examples -------- >>> g = dgl.heterograph({ ... ('A1', 'AB1', 'B'): ([0, 1, 2], [1, 2, 3]), ... ('A1', 'AB2', 'B'): ([1, 2, 3], [3, 4, 5]), ... ('A2', 'AB2', 'B'): ([1, 3, 5], [2, 4, 6])}) >>> new_g = g['A1', :, 'B'] # combines all edge types between A1 and B >>> new_g Graph(num_nodes={'A1': 4, 'B': 7}, num_edges={('A1', 'AB1+AB2', 'B'): 6}, metagraph=[('A1', 'B', 'AB1+AB2')]) >>> new_g.edges() (tensor([0, 1, 2, 1, 2, 3]), tensor([1, 2, 3, 3, 4, 5])) >>> new_g2 = g[:, 'AB2', 'B'] # combines all node types that are source of AB2 >>> new_g2 Graph(num_nodes={'A1+A2': 10, 'B': 7}, num_edges={('A1+A2', 'AB2+AB2', 'B'): 6}, metagraph=[('A1+A2', 'B', 'AB2+AB2')]) >>> new_g2.edges() (tensor([1, 2, 3, 5, 7, 9]), tensor([3, 4, 5, 2, 4, 6])) If a combination of multiple node types and edge types occur, one can find the mapping to the original node type and IDs like the following: >>> new_g1.edges['AB1+AB2'].data[dgl.EID] tensor([0, 1, 2, 0, 1, 2]) >>> new_g1.edges['AB1+AB2'].data[dgl.ETYPE] tensor([0, 0, 0, 1, 1, 1]) >>> new_g2.nodes['A1+A2'].data[dgl.NID] tensor([0, 1, 2, 3, 0, 1, 2, 3, 4, 5]) >>> new_g2.nodes['A1+A2'].data[dgl.NTYPE] tensor([0, 0, 0, 0, 1, 1, 1, 1, 1, 1]) """ err_msg = ( "Invalid slice syntax. Use G['etype'] or G['srctype', 'etype', 'dsttype'] " + "to get view of one relation type. Use : to slice multiple types (e.g. " + "G['srctype', :, 'dsttype'])." ) orig_key = key if not isinstance(key, tuple): key = (SLICE_FULL, key, SLICE_FULL) if len(key) != 3: raise DGLError(err_msg) etypes = self._find_etypes(key) if len(etypes) == 0: raise DGLError( 'Invalid key "{}". Must be one of the edge types.'.format( orig_key ) ) if len(etypes) == 1: # no ambiguity: return the unitgraph itself srctype, etype, dsttype = self._canonical_etypes[etypes[0]] stid = self.get_ntype_id_from_src(srctype) etid = self.get_etype_id((srctype, etype, dsttype)) dtid = self.get_ntype_id_from_dst(dsttype) new_g = self._graph.get_relation_graph(etid) if stid == dtid: new_ntypes = [srctype] new_nframes = [self._node_frames[stid]] else: new_ntypes = ([srctype], [dsttype]) new_nframes = [self._node_frames[stid], self._node_frames[dtid]] new_etypes = [etype] new_eframes = [self._edge_frames[etid]] return self.__class__( new_g, new_ntypes, new_etypes, new_nframes, new_eframes ) else: flat = self._graph.flatten_relations(etypes) new_g = flat.graph # merge frames stids = flat.induced_srctype_set.asnumpy() dtids = flat.induced_dsttype_set.asnumpy() etids = flat.induced_etype_set.asnumpy() new_ntypes = [combine_names(self.ntypes, stids)] if new_g.number_of_ntypes() == 2: new_ntypes.append(combine_names(self.ntypes, dtids)) new_nframes = [ combine_frames(self._node_frames, stids), combine_frames(self._node_frames, dtids), ] else: assert np.array_equal(stids, dtids) new_nframes = [combine_frames(self._node_frames, stids)] new_etypes = [combine_names(self.etypes, etids)] new_eframes = [combine_frames(self._edge_frames, etids)] # create new heterograph new_hg = self.__class__( new_g, new_ntypes, new_etypes, new_nframes, new_eframes ) src = new_ntypes[0] dst = new_ntypes[1] if new_g.number_of_ntypes() == 2 else src # put the parent node/edge type and IDs new_hg.nodes[src].data[NTYPE] = F.zerocopy_from_dgl_ndarray( flat.induced_srctype ) new_hg.nodes[src].data[NID] = F.zerocopy_from_dgl_ndarray( flat.induced_srcid ) new_hg.nodes[dst].data[NTYPE] = F.zerocopy_from_dgl_ndarray( flat.induced_dsttype ) new_hg.nodes[dst].data[NID] = F.zerocopy_from_dgl_ndarray( flat.induced_dstid ) new_hg.edata[ETYPE] = F.zerocopy_from_dgl_ndarray( flat.induced_etype ) new_hg.edata[EID] = F.zerocopy_from_dgl_ndarray(flat.induced_eid) return new_hg ################################################################# # Graph query ################################################################# def number_of_nodes(self, ntype=None): """Alias of :meth:`num_nodes`""" return self.num_nodes(ntype) def num_nodes(self, ntype=None): """Return the number of nodes in the graph. Parameters ---------- ntype : str, optional The node type name. If given, it returns the number of nodes of the type. If not given (default), it returns the total number of nodes of all types. Returns ------- int The number of nodes. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Create a graph with two node types -- 'user' and 'game'. >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])), ... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6])) ... }) Query for the number of nodes. >>> g.num_nodes('user') 5 >>> g.num_nodes('game') 7 >>> g.num_nodes() 12 """ if ntype is None: return sum( [ self._graph.num_nodes(ntid) for ntid in range(len(self.ntypes)) ] ) else: return self._graph.num_nodes(self.get_ntype_id(ntype)) def number_of_src_nodes(self, ntype=None): """Alias of :meth:`num_src_nodes`""" return self.num_src_nodes(ntype) def num_src_nodes(self, ntype=None): """Return the number of source nodes in the graph. If the graph can further divide its node types into two subsets A and B where all the edeges are from nodes of types in A to nodes of types in B, we call this graph a *uni-bipartite* graph and the nodes in A being the *source* nodes and the ones in B being the *destination* nodes. If the graph is not uni-bipartite, the source and destination nodes are just the entire set of nodes in the graph. Parameters ---------- ntype : str, optional The source node type name. If given, it returns the number of nodes for the source node type. If not given (default), it returns the number of nodes summed over all source node types. Returns ------- int The number of nodes See Also -------- num_dst_nodes is_unibipartite Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Create a homogeneous graph for query. >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2]))) >>> g.num_src_nodes() 3 Create a heterogeneous graph with two source node types -- 'developer' and 'user'. >>> g = dgl.heterograph({ ... ('developer', 'develops', 'game'): (torch.tensor([0, 1]), torch.tensor([1, 2])), ... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6])) ... }) Query for the number of nodes. >>> g.num_src_nodes('developer') 2 >>> g.num_src_nodes('user') 5 >>> g.num_src_nodes() 7 """ if ntype is None: return sum( [ self._graph.num_nodes(self.get_ntype_id_from_src(nty)) for nty in self.srctypes ] ) else: return self._graph.num_nodes(self.get_ntype_id_from_src(ntype)) def number_of_dst_nodes(self, ntype=None): """Alias of :func:`num_dst_nodes`""" return self.num_dst_nodes(ntype) def num_dst_nodes(self, ntype=None): """Return the number of destination nodes in the graph. If the graph can further divide its node types into two subsets A and B where all the edeges are from nodes of types in A to nodes of types in B, we call this graph a *uni-bipartite* graph and the nodes in A being the *source* nodes and the ones in B being the *destination* nodes. If the graph is not uni-bipartite, the source and destination nodes are just the entire set of nodes in the graph. Parameters ---------- ntype : str, optional The destination node type name. If given, it returns the number of nodes of the destination node type. If not given (default), it returns the number of nodes summed over all the destination node types. Returns ------- int The number of nodes See Also -------- num_src_nodes is_unibipartite Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Create a homogeneous graph for query. >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2]))) >>> g.num_dst_nodes() 3 Create a heterogeneous graph with two destination node types -- 'user' and 'game'. >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])), ... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6])) ... }) Query for the number of nodes. >>> g.num_dst_nodes('user') 5 >>> g.num_dst_nodes('game') 7 >>> g.num_dst_nodes() 12 """ if ntype is None: return sum( [ self._graph.num_nodes(self.get_ntype_id_from_dst(nty)) for nty in self.dsttypes ] ) else: return self._graph.num_nodes(self.get_ntype_id_from_dst(ntype)) def number_of_edges(self, etype=None): """Alias of :func:`num_edges`""" return self.num_edges(etype) def num_edges(self, etype=None): """Return the number of edges in the graph. Parameters ---------- etype : str or (str, str, str), optional The type name of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. If not provided, return the total number of edges regardless of the types in the graph. Returns ------- int The number of edges. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Create a graph with three canonical edge types. >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])), ... ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])), ... ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3])) ... }) Query for the number of edges. >>> g.num_edges('plays') 2 >>> g.num_edges() 7 Use a canonical edge type instead when there is ambiguity for an edge type. >>> g.num_edges(('user', 'follows', 'user')) 2 >>> g.num_edges(('user', 'follows', 'game')) 3 """ if etype is None: return sum( [ self._graph.num_edges(etid) for etid in range(len(self.canonical_etypes)) ] ) else: return self._graph.num_edges(self.get_etype_id(etype)) @property def is_multigraph(self): """Return whether the graph is a multigraph with parallel edges. A multigraph has more than one edges between the same pair of nodes, called *parallel edges*. For heterogeneous graphs, parallel edge further requires the canonical edge type to be the same (see :meth:`canonical_etypes` for the definition). Returns ------- bool True if the graph is a multigraph. Notes ----- Checking whether the graph is a multigraph could be expensive for a large one. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Check for homogeneous graphs. >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 3]))) >>> g.is_multigraph False >>> g = dgl.graph((torch.tensor([0, 1, 1]), torch.tensor([1, 3, 3]))) >>> g.is_multigraph True Check for heterogeneous graphs. >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])), ... ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])) ... }) >>> g.is_multigraph False >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([0, 1, 1]), torch.tensor([1, 2, 2])), ... ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])) ... }) >>> g.is_multigraph True """ return self._graph.is_multigraph() @property def is_homogeneous(self): """Return whether the graph is a homogeneous graph. A homogeneous graph only has one node type and one edge type. Returns ------- bool True if the graph is a homogeneous graph. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Create a homogeneous graph for check. >>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 0, 2, 3]))) >>> g.is_homogeneous True Create a heterogeneous graph for check. If the graph has multiple edge types, one need to specify the edge type. >>> g = dgl.heterograph({ ... ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3]))}) >>> g.is_homogeneous False """ return len(self.ntypes) == 1 and len(self.etypes) == 1 @property def idtype(self): """The data type for storing the structure-related graph information such as node and edge IDs. Returns ------- Framework-specific device object For example, this can be ``torch.int32`` or ``torch.int64`` for PyTorch. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch >>> src_ids = torch.tensor([0, 0, 1]) >>> dst_ids = torch.tensor([1, 2, 2]) >>> g = dgl.graph((src_ids, dst_ids)) >>> g.idtype torch.int64 >>> g = dgl.graph((src_ids, dst_ids), idtype=torch.int32) >>> g.idtype torch.int32 See Also -------- long int """ return getattr(F, self._graph.dtype) @property def _idtype_str(self): """The dtype of graph index Returns ------- backend dtype object th.int32/th.int64 or tf.int32/tf.int64 etc. """ return self._graph.dtype def has_nodes(self, vid, ntype=None): """Return whether the graph contains the given nodes. Parameters ---------- vid : node ID(s) The nodes IDs. The allowed nodes ID formats are: * ``int``: The ID of a single node. * Int Tensor: Each element is a node ID. The tensor must have the same device type and ID data type as the graph's. * iterable[int]: Each element is a node ID. ntype : str, optional The node type name. Can be omitted if there is only one type of nodes in the graph. Returns ------- bool or bool Tensor A tensor of bool flags where each element is True if the node is in the graph. If the input is a single node, return one bool value. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Create a graph with two node types -- 'user' and 'game'. >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])), ... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([0, 1])) ... }) Query for the nodes. >>> g.has_nodes(0, 'user') True >>> g.has_nodes(3, 'game') False >>> g.has_nodes(torch.tensor([3, 0, 1]), 'game') tensor([False, True, True]) """ vid_tensor = utils.prepare_tensor(self, vid, "vid") if len(vid_tensor) > 0 and F.as_scalar(F.min(vid_tensor, 0)) < 0 < len( vid_tensor ): raise DGLError("All IDs must be non-negative integers.") ret = self._graph.has_nodes(self.get_ntype_id(ntype), vid_tensor) if isinstance(vid, numbers.Integral): return bool(F.as_scalar(ret)) else: return F.astype(ret, F.bool) def has_edges_between(self, u, v, etype=None): """Return whether the graph contains the given edges. Parameters ---------- u : node IDs The source node IDs of the edges. The allowed formats are: * ``int``: A single node. * Int Tensor: Each element is a node ID. The tensor must have the same device type and ID data type as the graph's. * iterable[int]: Each element is a node ID. v : node IDs The destination node IDs of the edges. The allowed formats are: * ``int``: A single node. * Int Tensor: Each element is a node ID. The tensor must have the same device type and ID data type as the graph's. * iterable[int]: Each element is a node ID. etype : str or (str, str, str), optional The type names of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. Can be omitted if the graph has only one type of edges. Returns ------- bool or bool Tensor A tensor of bool flags where each element is True if the node is in the graph. If the input is a single node, return one bool value. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Create a homogeneous graph. >>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 0, 2, 3]))) Query for the edges. >>> g.has_edges_between(1, 2) True >>> g.has_edges_between(torch.tensor([1, 2]), torch.tensor([2, 3])) tensor([ True, False]) If the graph has multiple edge types, one need to specify the edge type. >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])), ... ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])), ... ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3])) ... }) >>> g.has_edges_between(torch.tensor([1, 2]), torch.tensor([2, 3]), 'plays') tensor([ True, False]) Use a canonical edge type instead when there is ambiguity for an edge type. >>> g.has_edges_between(torch.tensor([1, 2]), torch.tensor([2, 3]), ... ('user', 'follows', 'user')) tensor([ True, False]) >>> g.has_edges_between(torch.tensor([1, 2]), torch.tensor([2, 3]), ... ('user', 'follows', 'game')) tensor([True, True]) """ srctype, _, dsttype = self.to_canonical_etype(etype) u_tensor = utils.prepare_tensor(self, u, "u") if F.as_scalar( F.sum(self.has_nodes(u_tensor, ntype=srctype), dim=0) ) != len(u_tensor): raise DGLError("u contains invalid node IDs") v_tensor = utils.prepare_tensor(self, v, "v") if F.as_scalar( F.sum(self.has_nodes(v_tensor, ntype=dsttype), dim=0) ) != len(v_tensor): raise DGLError("v contains invalid node IDs") ret = self._graph.has_edges_between( self.get_etype_id(etype), u_tensor, v_tensor ) if isinstance(u, numbers.Integral) and isinstance(v, numbers.Integral): return bool(F.as_scalar(ret)) else: return F.astype(ret, F.bool) def predecessors(self, v, etype=None): """Return the predecessor(s) of a particular node with the specified edge type. Node ``u`` is a predecessor of node ``v`` if there is an edge ``(u, v)`` with type ``etype`` in the graph. Parameters ---------- v : int The node ID. If the graph has multiple edge types, the ID is for the destination type corresponding to the edge type. etype : str or (str, str, str), optional The type names of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. Can be omitted if the graph has only one type of edges. Returns ------- Tensor The predecessors of :attr:`v` with the specified edge type. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Create a homogeneous graph. >>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 1, 2, 3]))) Query for node 1. >>> g.predecessors(1) tensor([0, 0]) For a graph of multiple edge types, it is required to specify the edge type in query. >>> hg = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])), ... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6])) ... }) >>> hg.predecessors(1, etype='follows') tensor([0]) See Also -------- successors """ if not self.has_nodes(v, self.to_canonical_etype(etype)[-1]): raise DGLError("Non-existing node ID {}".format(v)) return self._graph.predecessors(self.get_etype_id(etype), v) def successors(self, v, etype=None): """Return the successor(s) of a particular node with the specified edge type. Node ``u`` is a successor of node ``v`` if there is an edge ``(v, u)`` with type ``etype`` in the graph. Parameters ---------- v : int The node ID. If the graph has multiple edge types, the ID is for the source type corresponding to the edge type. etype : str or (str, str, str), optional The type names of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. Can be omitted if the graph has only one type of edges. Returns ------- Tensor The successors of :attr:`v` with the specified edge type. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Create a homogeneous graph. >>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 1, 2, 3]))) Query for node 1. >>> g.successors(1) tensor([2, 3]) For a graph of multiple edge types, it is required to specify the edge type in query. >>> hg = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])), ... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6])) ... }) >>> hg.successors(1, etype='follows') tensor([2]) See Also -------- predecessors """ if not self.has_nodes(v, self.to_canonical_etype(etype)[0]): raise DGLError("Non-existing node ID {}".format(v)) return self._graph.successors(self.get_etype_id(etype), v) def edge_ids(self, u, v, return_uv=False, etype=None): """Return the edge ID(s) given the two endpoints of the edge(s). Parameters ---------- u : node IDs The source node IDs of the edges. The allowed formats are: * ``int``: A single node. * Int Tensor: Each element is a node ID. The tensor must have the same device type and ID data type as the graph's. * iterable[int]: Each element is a node ID. v : node IDs The destination node IDs of the edges. The allowed formats are: * ``int``: A single node. * Int Tensor: Each element is a node ID. The tensor must have the same device type and ID data type as the graph's. * iterable[int]: Each element is a node ID. return_uv : bool, optional Whether to return the source and destination node IDs along with the edges. If False (default), it assumes that the graph is a simple graph and there is only one edge from one node to another. If True, there can be multiple edges found from one node to another. etype : str or (str, str, str), optional The type names of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. Can be omitted if the graph has only one type of edges. Returns ------- Tensor, or (Tensor, Tensor, Tensor) * If ``return_uv=False``, it returns the edge IDs in a tensor, where the i-th element is the ID of the edge ``(u[i], v[i])``. * If ``return_uv=True``, it returns a tuple of three 1D tensors ``(eu, ev, e)``. ``e[i]`` is the ID of an edge from ``eu[i]`` to ``ev[i]``. It returns all edges (including parallel edges) from ``eu[i]`` to ``ev[i]`` in this case. Notes ----- If the graph is a simple graph, ``return_uv=False``, and there are no edges between some pairs of node(s), it will raise an error. If the graph is a multigraph, ``return_uv=False``, and there are multiple edges between some pairs of node(s), it returns an arbitrary one from them. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Create a homogeneous graph. >>> g = dgl.graph((torch.tensor([0, 0, 1, 1, 1]), torch.tensor([1, 0, 2, 3, 2]))) Query for the edges. >>> g.edge_ids(0, 0) 1 >>> g.edge_ids(torch.tensor([1, 0]), torch.tensor([3, 1])) tensor([3, 0]) Get all edges for pairs of nodes. >>> g.edge_ids(torch.tensor([1, 0]), torch.tensor([3, 1]), return_uv=True) (tensor([1, 0]), tensor([3, 1]), tensor([3, 0])) If the graph has multiple edge types, one need to specify the edge type. >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])), ... ('user', 'follows', 'game'): (torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3])), ... ('user', 'plays', 'game'): (torch.tensor([1, 3]), torch.tensor([2, 3])) ... }) >>> g.edge_ids(torch.tensor([1]), torch.tensor([2]), etype='plays') tensor([0]) Use a canonical edge type instead when there is ambiguity for an edge type. >>> g.edge_ids(torch.tensor([0, 1]), torch.tensor([1, 2]), ... etype=('user', 'follows', 'user')) tensor([0, 1]) >>> g.edge_ids(torch.tensor([1, 2]), torch.tensor([2, 3]), ... etype=('user', 'follows', 'game')) tensor([1, 2]) """ is_int = isinstance(u, numbers.Integral) and isinstance( v, numbers.Integral ) srctype, _, dsttype = self.to_canonical_etype(etype) u = utils.prepare_tensor(self, u, "u") if F.as_scalar(F.sum(self.has_nodes(u, ntype=srctype), dim=0)) != len( u ): raise DGLError("u contains invalid node IDs") v = utils.prepare_tensor(self, v, "v") if F.as_scalar(F.sum(self.has_nodes(v, ntype=dsttype), dim=0)) != len( v ): raise DGLError("v contains invalid node IDs") if return_uv: return self._graph.edge_ids_all(self.get_etype_id(etype), u, v) else: eid = self._graph.edge_ids_one(self.get_etype_id(etype), u, v) is_neg_one = F.equal(eid, -1) if F.as_scalar(F.sum(is_neg_one, 0)): # Raise error since some (u, v) pair is not a valid edge. idx = F.nonzero_1d(is_neg_one) raise DGLError( "Error: (%d, %d) does not form a valid edge." % ( F.as_scalar(F.gather_row(u, idx)), F.as_scalar(F.gather_row(v, idx)), ) ) return F.as_scalar(eid) if is_int else eid def find_edges(self, eid, etype=None): """Return the source and destination node ID(s) given the edge ID(s). Parameters ---------- eid : edge ID(s) The edge IDs. The allowed formats are: * ``int``: A single ID. * Int Tensor: Each element is an ID. The tensor must have the same device type and ID data type as the graph's. * iterable[int]: Each element is an ID. etype : str or (str, str, str), optional The type names of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. Can be omitted if the graph has only one type of edges. Returns ------- Tensor The source node IDs of the edges. The i-th element is the source node ID of the i-th edge. Tensor The destination node IDs of the edges. The i-th element is the destination node ID of the i-th edge. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Create a homogeneous graph. >>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 0, 2, 3]))) Find edges of IDs 0 and 2. >>> g.find_edges(torch.tensor([0, 2])) (tensor([0, 1]), tensor([1, 2])) For a graph of multiple edge types, it is required to specify the edge type in query. >>> hg = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])), ... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6])) ... }) >>> hg.find_edges(torch.tensor([1, 0]), 'plays') (tensor([4, 3]), tensor([6, 5])) """ eid = utils.prepare_tensor(self, eid, "eid") if len(eid) > 0: min_eid = F.as_scalar(F.min(eid, 0)) if min_eid < 0: raise DGLError("Invalid edge ID {:d}".format(min_eid)) max_eid = F.as_scalar(F.max(eid, 0)) if max_eid >= self.num_edges(etype): raise DGLError("Invalid edge ID {:d}".format(max_eid)) if len(eid) == 0: empty = F.copy_to(F.tensor([], self.idtype), self.device) return empty, empty src, dst, _ = self._graph.find_edges(self.get_etype_id(etype), eid) return src, dst def in_edges(self, v, form="uv", etype=None): """Return the incoming edges of the given nodes. Parameters ---------- v : node ID(s) The node IDs. The allowed formats are: * ``int``: A single node. * Int Tensor: Each element is a node ID. The tensor must have the same device type and ID data type as the graph's. * iterable[int]: Each element is a node ID. form : str, optional The result format, which can be one of the following: - ``'eid'``: The returned result is a 1D tensor :math:`EID`, representing the IDs of all edges. - ``'uv'`` (default): The returned result is a 2-tuple of 1D tensors :math:`(U, V)`, representing the source and destination nodes of all edges. For each :math:`i`, :math:`(U[i], V[i])` forms an edge. - ``'all'``: The returned result is a 3-tuple of 1D tensors :math:`(U, V, EID)`, representing the source nodes, destination nodes and IDs of all edges. For each :math:`i`, :math:`(U[i], V[i])` forms an edge with ID :math:`EID[i]`. etype : str or (str, str, str), optional The type names of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. Can be omitted if the graph has only one type of edges. Returns ------- Tensor or (Tensor, Tensor) or (Tensor, Tensor, Tensor) All incoming edges of the nodes with the specified type. For a description of the returned result, see the description of :attr:`form`. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Create a homogeneous graph. >>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 0, 2, 3]))) Query for the nodes 1 and 0. >>> g.in_edges(torch.tensor([1, 0])) (tensor([0, 0]), tensor([1, 0])) Specify a different value for :attr:`form`. >>> g.in_edges(torch.tensor([1, 0]), form='all') (tensor([0, 0]), tensor([1, 0]), tensor([0, 1])) For a graph of multiple edge types, it is required to specify the edge type in query. >>> hg = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])), ... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6])) ... }) >>> hg.in_edges(torch.tensor([1, 0]), etype='follows') (tensor([0]), tensor([1])) See Also -------- edges out_edges """ v = utils.prepare_tensor(self, v, "v") src, dst, eid = self._graph.in_edges(self.get_etype_id(etype), v) if form == "all": return src, dst, eid elif form == "uv": return src, dst elif form == "eid": return eid else: raise DGLError( 'Invalid form: {}. Must be "all", "uv" or "eid".'.format(form) ) def out_edges(self, u, form="uv", etype=None): """Return the outgoing edges of the given nodes. Parameters ---------- u : node ID(s) The node IDs. The allowed formats are: * ``int``: A single node. * Int Tensor: Each element is a node ID. The tensor must have the same device type and ID data type as the graph's. * iterable[int]: Each element is a node ID. form : str, optional The return form, which can be one of the following: - ``'eid'``: The returned result is a 1D tensor :math:`EID`, representing the IDs of all edges. - ``'uv'`` (default): The returned result is a 2-tuple of 1D tensors :math:`(U, V)`, representing the source and destination nodes of all edges. For each :math:`i`, :math:`(U[i], V[i])` forms an edge. - ``'all'``: The returned result is a 3-tuple of 1D tensors :math:`(U, V, EID)`, representing the source nodes, destination nodes and IDs of all edges. For each :math:`i`, :math:`(U[i], V[i])` forms an edge with ID :math:`EID[i]`. etype : str or (str, str, str), optional The type names of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. Can be omitted if the graph has only one type of edges. Returns ------- Tensor or (Tensor, Tensor) or (Tensor, Tensor, Tensor) All outgoing edges of the nodes with the specified type. For a description of the returned result, see the description of :attr:`form`. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Create a homogeneous graph. >>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 0, 2, 3]))) Query for the nodes 1 and 2. >>> g.out_edges(torch.tensor([1, 2])) (tensor([1, 1]), tensor([2, 3])) Specify a different value for :attr:`form`. >>> g.out_edges(torch.tensor([1, 2]), form='all') (tensor([1, 1]), tensor([2, 3]), tensor([2, 3])) For a graph of multiple edge types, it is required to specify the edge type in query. >>> hg = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])), ... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6])) ... }) >>> hg.out_edges(torch.tensor([1, 2]), etype='follows') (tensor([1]), tensor([2])) See Also -------- edges in_edges """ u = utils.prepare_tensor(self, u, "u") srctype, _, _ = self.to_canonical_etype(etype) if F.as_scalar(F.sum(self.has_nodes(u, ntype=srctype), dim=0)) != len( u ): raise DGLError("u contains invalid node IDs") src, dst, eid = self._graph.out_edges(self.get_etype_id(etype), u) if form == "all": return src, dst, eid elif form == "uv": return src, dst elif form == "eid": return eid else: raise DGLError( 'Invalid form: {}. Must be "all", "uv" or "eid".'.format(form) ) def all_edges(self, form="uv", order="eid", etype=None): """Return all edges with the specified edge type. Parameters ---------- form : str, optional The return form, which can be one of the following: - ``'eid'``: The returned result is a 1D tensor :math:`EID`, representing the IDs of all edges. - ``'uv'`` (default): The returned result is a 2-tuple of 1D tensors :math:`(U, V)`, representing the source and destination nodes of all edges. For each :math:`i`, :math:`(U[i], V[i])` forms an edge. - ``'all'``: The returned result is a 3-tuple of 1D tensors :math:`(U, V, EID)`, representing the source nodes, destination nodes and IDs of all edges. For each :math:`i`, :math:`(U[i], V[i])` forms an edge with ID :math:`EID[i]`. order : str, optional The order of the returned edges, which can be one of the following: - ``'srcdst'``: The edges are sorted first by their source node IDs and then by their destination node IDs to break ties. - ``'eid'`` (default): The edges are sorted by their IDs. etype : str or tuple of str, optional The edge type for query, which can be an edge type (str) or a canonical edge type (3-tuple of str). When an edge type appears in multiple canonical edge types, one must use a canonical edge type. If the graph has multiple edge types, one must specify the argument. Otherwise, it can be omitted. Returns ------- Tensor or (Tensor, Tensor) or (Tensor, Tensor, Tensor) All edges of the specified edge type. For a description of the returned result, see the description of :attr:`form`. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Create a homogeneous graph. >>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 0, 2, 3]))) Query for edges. >>> g.all_edges() (tensor([0, 0, 1, 1]), tensor([1, 0, 2, 3])) Specify a different value for :attr:`form` and :attr:`order`. >>> g.all_edges(form='all', order='srcdst') (tensor([0, 0, 1, 1]), tensor([0, 1, 2, 3]), tensor([1, 0, 2, 3])) For a graph of multiple edge types, it is required to specify the edge type in query. >>> hg = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])), ... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6])) ... }) >>> hg.all_edges(etype='plays') (tensor([3, 4]), tensor([5, 6])) See Also -------- edges in_edges out_edges """ src, dst, eid = self._graph.edges(self.get_etype_id(etype), order) if form == "all": return src, dst, eid elif form == "uv": return src, dst elif form == "eid": return eid else: raise DGLError( 'Invalid form: {}. Must be "all", "uv" or "eid".'.format(form) ) def in_degrees(self, v=ALL, etype=None): """Return the in-degree(s) of the given nodes. It computes the in-degree(s) w.r.t. to the edges of the given edge type. Parameters ---------- v : node IDs The node IDs. The allowed formats are: * ``int``: A single node. * Int Tensor: Each element is a node ID. The tensor must have the same device type and ID data type as the graph's. * iterable[int]: Each element is a node ID. If not given, return the in-degrees of all the nodes. etype : str or (str, str, str), optional The type name of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. Can be omitted if the graph has only one type of edges. Returns ------- int or Tensor The in-degree(s) of the node(s) in a Tensor. The i-th element is the in-degree of the i-th input node. If :attr:`v` is an ``int``, return an ``int`` too. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Create a homogeneous graph. >>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 1, 2, 3]))) Query for all nodes. >>> g.in_degrees() tensor([0, 2, 1, 1]) Query for nodes 1 and 2. >>> g.in_degrees(torch.tensor([1, 2])) tensor([2, 1]) For a graph of multiple edge types, it is required to specify the edge type in query. >>> hg = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])), ... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6])) ... }) >>> hg.in_degrees(torch.tensor([1, 0]), etype='follows') tensor([1, 0]) See Also -------- out_degrees """ dsttype = self.to_canonical_etype(etype)[2] etid = self.get_etype_id(etype) if is_all(v): v = self.dstnodes(dsttype) v_tensor = utils.prepare_tensor(self, v, "v") deg = self._graph.in_degrees(etid, v_tensor) if isinstance(v, numbers.Integral): return F.as_scalar(deg) else: return deg def out_degrees(self, u=ALL, etype=None): """Return the out-degree(s) of the given nodes. It computes the out-degree(s) w.r.t. to the edges of the given edge type. Parameters ---------- u : node IDs The node IDs. The allowed formats are: * ``int``: A single node. * Int Tensor: Each element is a node ID. The tensor must have the same device type and ID data type as the graph's. * iterable[int]: Each element is a node ID. If not given, return the in-degrees of all the nodes. etype : str or (str, str, str), optional The type names of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. Can be omitted if the graph has only one type of edges. Returns ------- int or Tensor The out-degree(s) of the node(s) in a Tensor. The i-th element is the out-degree of the i-th input node. If :attr:`v` is an ``int``, return an ``int`` too. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Create a homogeneous graph. >>> g = dgl.graph((torch.tensor([0, 0, 1, 1]), torch.tensor([1, 1, 2, 3]))) Query for all nodes. >>> g.out_degrees() tensor([2, 2, 0, 0]) Query for nodes 1 and 2. >>> g.out_degrees(torch.tensor([1, 2])) tensor([2, 0]) For a graph of multiple edge types, it is required to specify the edge type in query. >>> hg = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2])), ... ('user', 'plays', 'game'): (torch.tensor([3, 4]), torch.tensor([5, 6])) ... }) >>> hg.out_degrees(torch.tensor([1, 0]), etype='follows') tensor([1, 1]) See Also -------- in_degrees """ srctype = self.to_canonical_etype(etype)[0] etid = self.get_etype_id(etype) if is_all(u): u = self.srcnodes(srctype) u_tensor = utils.prepare_tensor(self, u, "u") if F.as_scalar( F.sum(self.has_nodes(u_tensor, ntype=srctype), dim=0) ) != len(u_tensor): raise DGLError("u contains invalid node IDs") deg = self._graph.out_degrees(etid, utils.prepare_tensor(self, u, "u")) if isinstance(u, numbers.Integral): return F.as_scalar(deg) else: return deg def adjacency_matrix(self, etype=None): """Alias of :meth:`adj`""" return self.adj(etype) def adj(self, etype=None, eweight_name=None): """Get the adjacency matrix of the graph. Parameters ---------- etype : str or (str, str, str), optional The type names of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. Can be omitted if the graph has only one type of edges. eweight_name : str, optional The name of edge feature used as the non-zero values. If not given, the non-zero values are all 1. Returns ------- SparseMatrix The adjacency matrix. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch >>> g = dgl.graph(([0, 1, 2], [1, 2, 3])) >>> g.adj() SparseMatrix(indices=tensor([[0, 1, 2], [1, 2, 3]]), values=tensor([1., 1., 1.]), shape=(4, 4), nnz=3) >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): ([0, 1], [0, 1]), ... ('developer', 'develops', 'game'): ([0, 1], [0, 2]) ... }) >>> g.adj(etype='develops') SparseMatrix(indices=tensor([[0, 1], [0, 2]]), values=tensor([1., 1.]), shape=(2, 3), nnz=2) >>> g.edata['h'] = {('user', 'follows', 'user'): torch.tensor([3, 2])} >>> g.adj(etype='follows', eweight_name='h') SparseMatrix(indices=tensor([[0, 1], [0, 1]]), values=tensor([3, 2]), shape=(2, 2), nnz=2) """ assert F.backend_name == "pytorch", "Only PyTorch backend supports adj." # Temporal fix to introduce a dependency on torch import torch from .sparse import spmatrix etype = self.to_canonical_etype(etype) indices = torch.stack(self.all_edges(etype=etype)) shape = (self.num_nodes(etype[0]), self.number_of_nodes(etype[2])) if eweight_name is not None: val = self.edata[eweight_name][etype] else: val = None return spmatrix( indices, val=val, shape=shape, ) def adj_external( self, transpose=False, ctx=F.cpu(), scipy_fmt=None, etype=None ): """Return the adjacency matrix in an external format, such as Scipy or backend dependent sparse tensor. By default, a row of returned adjacency matrix represents the source of an edge and the column represents the destination. When transpose is True, a row represents the destination and a column represents the source. Parameters ---------- transpose : bool, optional A flag to transpose the returned adjacency matrix. (Default: False) ctx : context, optional The context of returned adjacency matrix. (Default: cpu) scipy_fmt : str, optional If specified, return a scipy sparse matrix in the given format. Otherwise, return a backend dependent sparse tensor. (Default: None) etype : str or (str, str, str), optional The type names of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. Can be omitted if the graph has only one type of edges. Returns ------- SparseTensor or scipy.sparse.spmatrix Adjacency matrix. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Instantiate a heterogeneous graph. >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): ([0, 1], [0, 1]), ... ('developer', 'develops', 'game'): ([0, 1], [0, 2]) ... }) Get a backend dependent sparse tensor. Here we use PyTorch for example. >>> g.adj_external(etype='develops') tensor(indices=tensor([[0, 1], [0, 2]]), values=tensor([1., 1.]), size=(2, 3), nnz=2, layout=torch.sparse_coo) Get a scipy coo sparse matrix. >>> g.adj_external(scipy_fmt='coo', etype='develops') <2x3 sparse matrix of type '' with 2 stored elements in COOrdinate format> """ etid = self.get_etype_id(etype) if scipy_fmt is None: return self._graph.adjacency_matrix(etid, transpose, ctx)[0] else: return self._graph.adjacency_matrix_scipy( etid, transpose, scipy_fmt, False ) def adj_tensors(self, fmt, etype=None): """Return the adjacency matrix of edges of the given edge type as tensors of a sparse matrix representation. By default, a row of returned adjacency matrix represents the source of an edge and the column represents the destination. Parameters ---------- fmt : str Either ``coo``, ``csr`` or ``csc``. etype : str or (str, str, str), optional The type names of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. Can be omitted if the graph has only one type of edges. Returns ------- tuple[Tensor] If :attr:`fmt` is ``coo``, returns a pair of source and destination node ID tensors. If :attr:`fmt` is ``csr`` or ``csc``, return the CSR or CSC representation of the adjacency matrix as a triplet of tensors ``(indptr, indices, edge_ids)``. Namely ``edge_ids`` could be an empty tensor with 0 elements, in which case the edge IDs are consecutive integers starting from 0. Examples -------- >>> g = dgl.graph(([0, 1, 2], [1, 2, 3])) >>> g.adj_tensors('coo') (tensor([0, 1, 2]), tensor([1, 2, 3])) >>> g.adj_tensors('csr') (tensor([0, 1, 2, 3, 3]), tensor([1, 2, 3]), tensor([0, 1, 2])) """ etid = self.get_etype_id(etype) if fmt == "csc": # The first two elements are number of rows and columns return self._graph.adjacency_matrix_tensors(etid, True, "csr")[2:] else: return self._graph.adjacency_matrix_tensors(etid, False, fmt)[2:] def inc(self, typestr, ctx=F.cpu(), etype=None): """Return the incidence matrix representation of edges with the given edge type. An incidence matrix is an n-by-m sparse matrix, where n is the number of nodes and m is the number of edges. Each nnz value indicating whether the edge is incident to the node or not. There are three types of incidence matrices :math:`I`: * ``in``: - :math:`I[v, e] = 1` if :math:`e` is the in-edge of :math:`v` (or :math:`v` is the dst node of :math:`e`); - :math:`I[v, e] = 0` otherwise. * ``out``: - :math:`I[v, e] = 1` if :math:`e` is the out-edge of :math:`v` (or :math:`v` is the src node of :math:`e`); - :math:`I[v, e] = 0` otherwise. * ``both`` (only if source and destination node type are the same): - :math:`I[v, e] = 1` if :math:`e` is the in-edge of :math:`v`; - :math:`I[v, e] = -1` if :math:`e` is the out-edge of :math:`v`; - :math:`I[v, e] = 0` otherwise (including self-loop). Parameters ---------- typestr : str Can be either ``in``, ``out`` or ``both`` ctx : context, optional The context of returned incidence matrix. (Default: cpu) etype : str or (str, str, str), optional The type names of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. Can be omitted if the graph has only one type of edges. Returns ------- Framework SparseTensor The incidence matrix. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> g = dgl.graph(([0, 1], [0, 2])) >>> g.inc('in') tensor(indices=tensor([[0, 2], [0, 1]]), values=tensor([1., 1.]), size=(3, 2), nnz=2, layout=torch.sparse_coo) >>> g.inc('out') tensor(indices=tensor([[0, 1], [0, 1]]), values=tensor([1., 1.]), size=(3, 2), nnz=2, layout=torch.sparse_coo) >>> g.inc('both') tensor(indices=tensor([[1, 2], [1, 1]]), values=tensor([-1., 1.]), size=(3, 2), nnz=2, layout=torch.sparse_coo) """ etid = self.get_etype_id(etype) return self._graph.incidence_matrix(etid, typestr, ctx)[0] incidence_matrix = inc ################################################################# # Features ################################################################# def node_attr_schemes(self, ntype=None): """Return the node feature schemes for the specified type. The scheme of a feature describes the shape and data type of it. Parameters ---------- ntype : str, optional The node type name. Can be omitted if there is only one type of nodes in the graph. Returns ------- dict[str, Scheme] A dictionary mapping a feature name to its associated feature scheme. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Query for a homogeneous graph. >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2]))) >>> g.ndata['h1'] = torch.randn(3, 1) >>> g.ndata['h2'] = torch.randn(3, 2) >>> g.node_attr_schemes() {'h1': Scheme(shape=(1,), dtype=torch.float32), 'h2': Scheme(shape=(2,), dtype=torch.float32)} Query for a heterogeneous graph of multiple node types. >>> g = dgl.heterograph({('user', 'plays', 'game'): ... (torch.tensor([1, 2]), torch.tensor([3, 4]))}) >>> g.nodes['user'].data['h1'] = torch.randn(3, 1) >>> g.nodes['user'].data['h2'] = torch.randn(3, 2) >>> g.node_attr_schemes('user') {'h1': Scheme(shape=(1,), dtype=torch.float32), 'h2': Scheme(shape=(2,), dtype=torch.float32)} See Also -------- edge_attr_schemes """ return self._node_frames[self.get_ntype_id(ntype)].schemes def edge_attr_schemes(self, etype=None): """Return the edge feature schemes for the specified type. The scheme of a feature describes the shape and data type of it. Parameters ---------- etype : str or (str, str, str), optional The type names of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. Can be omitted if the graph has only one type of edges. Returns ------- dict[str, Scheme] A dictionary mapping a feature name to its associated feature scheme. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Query for a homogeneous graph. >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2]))) >>> g.edata['h1'] = torch.randn(2, 1) >>> g.edata['h2'] = torch.randn(2, 2) >>> g.edge_attr_schemes() {'h1': Scheme(shape=(1,), dtype=torch.float32), 'h2': Scheme(shape=(2,), dtype=torch.float32)} Query for a heterogeneous graph of multiple edge types. >>> g = dgl.heterograph({('user', 'plays', 'game'): ... (torch.tensor([1, 2]), torch.tensor([3, 4])), ... ('user', 'follows', 'user'): ... (torch.tensor([3, 4]), torch.tensor([5, 6]))}) >>> g.edges['plays'].data['h1'] = torch.randn(2, 1) >>> g.edges['plays'].data['h2'] = torch.randn(2, 2) >>> g.edge_attr_schemes('plays') {'h1': Scheme(shape=(1,), dtype=torch.float32), 'h2': Scheme(shape=(2,), dtype=torch.float32)} See Also -------- node_attr_schemes """ return self._edge_frames[self.get_etype_id(etype)].schemes def set_n_initializer(self, initializer, field=None, ntype=None): """Set the initializer for node features. When only part of the nodes have a feature (e.g. new nodes are added, features are set for a subset of nodes), the initializer initializes features for the rest nodes. Parameters ---------- initializer : callable A function of signature ``func(shape, dtype, ctx, id_range) -> Tensor``. The tensor will be the initialized features. The arguments are: - ``shape``: The shape of the tensor to return, which is a tuple of int. The first dimension is the number of nodes for feature initialization. - ``dtype``: The data type of the tensor to return, which is a framework-specific data type object. - ``ctx``: The device of the tensor to return, which is a framework-specific device object. - ``id_range``: The start and end ID of the nodes for feature initialization, which is a slice. field : str, optional The name of the feature that the initializer applies. If not given, the initializer applies to all features. ntype : str, optional The type name of the nodes. Can be omitted if the graph has only one type of nodes. Notes ----- Without setting a node feature initializer, zero tensors are generated for nodes without a feature. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Define a function for initializer. >>> def init_feats(shape, dtype, device, id_range): ... return torch.ones(shape, dtype=dtype, device=device) An example for a homogeneous graph. >>> g = dgl.graph((torch.tensor([0]), torch.tensor([1]))) >>> g.ndata['h1'] = torch.zeros(2, 2) >>> g.ndata['h2'] = torch.ones(2, 1) >>> # Apply the initializer to feature 'h2' only. >>> g.set_n_initializer(init_feats, field='h2') >>> g.add_nodes(1) >>> print(g.ndata['h1']) tensor([[0., 0.], [0., 0.], [0., 0.]]) >>> print(g.ndata['h2']) tensor([[1.], [1.], [1.]]) An example for a heterogeneous graph of multiple node types. >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]), ... torch.tensor([0, 0, 1, 1])), ... ('developer', 'develops', 'game'): (torch.tensor([0, 1]), ... torch.tensor([0, 1])) ... }) >>> g.nodes['user'].data['h'] = torch.zeros(3, 2) >>> g.nodes['game'].data['w'] = torch.ones(2, 2) >>> g.set_n_initializer(init_feats, ntype='game') >>> g.add_nodes(1, ntype='user') >>> # Initializer not set for 'user', use zero tensors by default >>> g.nodes['user'].data['h'] tensor([[0., 0.], [0., 0.], [0., 0.], [0., 0.]]) >>> # Initializer set for 'game' >>> g.add_nodes(1, ntype='game') >>> g.nodes['game'].data['w'] tensor([[1., 1.], [1., 1.], [1., 1.]]) """ ntid = self.get_ntype_id(ntype) self._node_frames[ntid].set_initializer(initializer, field) def set_e_initializer(self, initializer, field=None, etype=None): """Set the initializer for edge features. When only part of the edges have a feature (e.g. new edges are added, features are set for a subset of edges), the initializer initializes features for the rest edges. Parameters ---------- initializer : callable A function of signature ``func(shape, dtype, ctx, id_range) -> Tensor``. The tensor will be the initialized features. The arguments are: - ``shape``: The shape of the tensor to return, which is a tuple of int. The first dimension is the number of edges for feature initialization. - ``dtype``: The data type of the tensor to return, which is a framework-specific data type object. - ``ctx``: The device of the tensor to return, which is a framework-specific device object. - ``id_range``: The start and end ID of the edges for feature initialization, which is a slice. field : str, optional The name of the feature that the initializer applies. If not given, the initializer applies to all features. etype : str or (str, str, str), optional The type names of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. Can be omitted if the graph has only one type of edges. Notes ----- Without setting an edge feature initializer, zero tensors are generated for edges without a feature. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Define a function for initializer. >>> def init_feats(shape, dtype, device, id_range): ... return torch.ones(shape, dtype=dtype, device=device) An example for a homogeneous graph. >>> g = dgl.graph((torch.tensor([0]), torch.tensor([1]))) >>> g.edata['h1'] = torch.zeros(1, 2) >>> g.edata['h2'] = torch.ones(1, 1) >>> # Apply the initializer to feature 'h2' only. >>> g.set_e_initializer(init_feats, field='h2') >>> g.add_edges(torch.tensor([1]), torch.tensor([1])) >>> print(g.edata['h1']) tensor([[0., 0.], [0., 0.]]) >>> print(g.edata['h2']) tensor([[1.], [1.]]) An example for a heterogeneous graph of multiple edge types. >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): (torch.tensor([0, 1]), ... torch.tensor([0, 0])), ... ('developer', 'develops', 'game'): (torch.tensor([0, 1]), ... torch.tensor([0, 1])) ... }) >>> g.edges['plays'].data['h'] = torch.zeros(2, 2) >>> g.edges['develops'].data['w'] = torch.ones(2, 2) >>> g.set_e_initializer(init_feats, etype='plays') >>> # Initializer not set for 'develops', use zero tensors by default >>> g.add_edges(torch.tensor([1]), torch.tensor([1]), etype='develops') >>> g.edges['develops'].data['w'] tensor([[1., 1.], [1., 1.], [0., 0.]]) >>> # Initializer set for 'plays' >>> g.add_edges(torch.tensor([1]), torch.tensor([1]), etype='plays') >>> g.edges['plays'].data['h'] tensor([[0., 0.], [0., 0.], [1., 1.]]) """ etid = self.get_etype_id(etype) self._edge_frames[etid].set_initializer(initializer, field) def _set_n_repr(self, ntid, u, data): """Internal API to set node features. `data` is a dictionary from the feature name to feature tensor. Each tensor is of shape (B, D1, D2, ...), where B is the number of nodes to be updated, and (D1, D2, ...) be the shape of the node representation tensor. The length of the given node ids must match B (i.e, len(u) == B). All updates will be done out of place to work with autograd. Parameters ---------- ntid : int Node type id. u : node, container or tensor The node(s). data : dict of tensor Node representation. """ if is_all(u): num_nodes = self._graph.num_nodes(ntid) else: u = utils.prepare_tensor(self, u, "u") num_nodes = len(u) for key, val in data.items(): nfeats = F.shape(val)[0] if nfeats != num_nodes: raise DGLError( "Expect number of features to match number of nodes (len(u))." " Got %d and %d instead." % (nfeats, num_nodes) ) if F.context(val) != self.device: raise DGLError( 'Cannot assign node feature "{}" on device {} to a graph on' " device {}. Call DGLGraph.to() to copy the graph to the" " same device.".format(key, F.context(val), self.device) ) # To prevent users from doing things like: # # g.pin_memory_() # g.ndata['x'] = torch.randn(...) # sg = g.sample_neighbors(torch.LongTensor([...]).cuda()) # sg.ndata['x'] # Becomes a CPU tensor even if sg is on GPU due to lazy slicing if ( self.is_pinned() and F.context(val) == "cpu" and not F.is_pinned(val) ): raise DGLError( "Pinned graph requires the node data to be pinned as well. " "Please pin the node data before assignment." ) if is_all(u): self._node_frames[ntid].update(data) else: self._node_frames[ntid].update_row(u, data) def _get_n_repr(self, ntid, u): """Get node(s) representation of a single node type. The returned feature tensor batches multiple node features on the first dimension. Parameters ---------- ntid : int Node type id. u : node, container or tensor The node(s). Returns ------- dict Representation dict from feature name to feature tensor. """ if is_all(u): return self._node_frames[ntid] else: u = utils.prepare_tensor(self, u, "u") return self._node_frames[ntid].subframe(u) def _pop_n_repr(self, ntid, key): """Internal API to get and remove the specified node feature. Parameters ---------- ntid : int Node type id. key : str The attribute name. Returns ------- Tensor The popped representation """ return self._node_frames[ntid].pop(key) def _set_e_repr(self, etid, edges, data): """Internal API to set edge(s) features. `data` is a dictionary from the feature name to feature tensor. Each tensor is of shape (B, D1, D2, ...), where B is the number of edges to be updated, and (D1, D2, ...) be the shape of the edge representation tensor. All update will be done out of place to work with autograd. Parameters ---------- etid : int Edge type id. edges : edges Edges can be either * A pair of endpoint nodes (u, v), where u is the node ID of source node type and v is that of destination node type. * A tensor of edge ids of the given type. The default value is all the edges. data : tensor or dict of tensor Edge representation. """ # parse argument if not is_all(edges): eid = utils.parse_edges_arg_to_eid(self, edges, etid, "edges") # sanity check if not utils.is_dict_like(data): raise DGLError( "Expect dictionary type for feature data." ' Got "%s" instead.' % type(data) ) if is_all(edges): num_edges = self._graph.num_edges(etid) else: num_edges = len(eid) for key, val in data.items(): nfeats = F.shape(val)[0] if nfeats != num_edges: raise DGLError( "Expect number of features to match number of edges." " Got %d and %d instead." % (nfeats, num_edges) ) if F.context(val) != self.device: raise DGLError( 'Cannot assign edge feature "{}" on device {} to a graph on' " device {}. Call DGLGraph.to() to copy the graph to the" " same device.".format(key, F.context(val), self.device) ) # To prevent users from doing things like: # # g.pin_memory_() # g.edata['x'] = torch.randn(...) # sg = g.sample_neighbors(torch.LongTensor([...]).cuda()) # sg.edata['x'] # Becomes a CPU tensor even if sg is on GPU due to lazy slicing if ( self.is_pinned() and F.context(val) == "cpu" and not F.is_pinned(val) ): raise DGLError( "Pinned graph requires the edge data to be pinned as well. " "Please pin the edge data before assignment." ) # set if is_all(edges): self._edge_frames[etid].update(data) else: self._edge_frames[etid].update_row(eid, data) def _get_e_repr(self, etid, edges): """Internal API to get edge features. Parameters ---------- etid : int Edge type id. edges : edges Edges can be a pair of endpoint nodes (u, v), or a tensor of edge ids. The default value is all the edges. Returns ------- dict Representation dict """ # parse argument if is_all(edges): return self._edge_frames[etid] else: eid = utils.parse_edges_arg_to_eid(self, edges, etid, "edges") return self._edge_frames[etid].subframe(eid) def _pop_e_repr(self, etid, key): """Get and remove the specified edge repr of a single edge type. Parameters ---------- etid : int Edge type id. key : str The attribute name. Returns ------- Tensor The popped representation """ self._edge_frames[etid].pop(key) ################################################################# # Message passing ################################################################# def apply_nodes(self, func, v=ALL, ntype=None): """Update the features of the specified nodes by the provided function. Parameters ---------- func : callable The function to update node features. It must be a :ref:`apiudf`. v : node IDs The node IDs. The allowed formats are: * ``int``: A single node. * Int Tensor: Each element is a node ID. The tensor must have the same device type and ID data type as the graph's. * iterable[int]: Each element is a node ID. If not given (default), use all the nodes in the graph. ntype : str, optional The node type name. Can be omitted if there is only one type of nodes in the graph. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch **Homogeneous graph** >>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4])) >>> g.ndata['h'] = torch.ones(5, 2) >>> g.apply_nodes(lambda nodes: {'x' : nodes.data['h'] * 2}) >>> g.ndata['x'] tensor([[2., 2.], [2., 2.], [2., 2.], [2., 2.], [2., 2.]]) **Heterogeneous graph** >>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 1], [1, 2])}) >>> g.nodes['user'].data['h'] = torch.ones(3, 5) >>> g.apply_nodes(lambda nodes: {'h': nodes.data['h'] * 2}, ntype='user') >>> g.nodes['user'].data['h'] tensor([[2., 2., 2., 2., 2.], [2., 2., 2., 2., 2.], [2., 2., 2., 2., 2.]]) See Also -------- apply_edges """ ntid = self.get_ntype_id(ntype) ntype = self.ntypes[ntid] if is_all(v): v_id = self.nodes(ntype) else: v_id = utils.prepare_tensor(self, v, "v") ndata = core.invoke_node_udf(self, v_id, ntype, func, orig_nid=v_id) self._set_n_repr(ntid, v, ndata) def apply_edges(self, func, edges=ALL, etype=None): """Update the features of the specified edges by the provided function. Parameters ---------- func : dgl.function.BuiltinFunction or callable The function to generate new edge features. It must be either a :ref:`api-built-in` or a :ref:`apiudf`. edges : edges The edges to update features on. The allowed input formats are: * ``int``: A single edge ID. * Int Tensor: Each element is an edge ID. The tensor must have the same device type and ID data type as the graph's. * iterable[int]: Each element is an edge ID. * (Tensor, Tensor): The node-tensors format where the i-th elements of the two tensors specify an edge. * (iterable[int], iterable[int]): Similar to the node-tensors format but stores edge endpoints in python iterables. Default value specifies all the edges in the graph. etype : str or (str, str, str), optional The type name of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. Can be omitted if the graph has only one type of edges. Notes ----- DGL recommends using DGL's bulit-in function for the :attr:`func` argument, because DGL will invoke efficient kernels that avoids copying node features to edge features in this case. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch **Homogeneous graph** >>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4])) >>> g.ndata['h'] = torch.ones(5, 2) >>> g.apply_edges(lambda edges: {'x' : edges.src['h'] + edges.dst['h']}) >>> g.edata['x'] tensor([[2., 2.], [2., 2.], [2., 2.], [2., 2.]]) Use built-in function >>> import dgl.function as fn >>> g.apply_edges(fn.u_add_v('h', 'h', 'x')) >>> g.edata['x'] tensor([[2., 2.], [2., 2.], [2., 2.], [2., 2.]]) **Heterogeneous graph** >>> g = dgl.heterograph({('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1])}) >>> g.edges[('user', 'plays', 'game')].data['h'] = torch.ones(4, 5) >>> g.apply_edges(lambda edges: {'h': edges.data['h'] * 2}) >>> g.edges[('user', 'plays', 'game')].data['h'] tensor([[2., 2., 2., 2., 2.], [2., 2., 2., 2., 2.], [2., 2., 2., 2., 2.], [2., 2., 2., 2., 2.]]) See Also -------- apply_nodes """ # Graph with one relation type if self._graph.number_of_etypes() == 1 or etype is not None: etid = self.get_etype_id(etype) etype = self.canonical_etypes[etid] g = self if etype is None else self[etype] else: # heterogeneous graph with number of relation types > 1 if not core.is_builtin(func): raise DGLError( "User defined functions are not yet " "supported in apply_edges for heterogeneous graphs. " "Please use (apply_edges(func), etype = rel) instead." ) g = self if is_all(edges): eid = ALL else: eid = utils.parse_edges_arg_to_eid(self, edges, etid, "edges") if core.is_builtin(func): if not is_all(eid): g = g.edge_subgraph(eid, relabel_nodes=False) edata = core.invoke_gsddmm(g, func) else: edata = core.invoke_edge_udf(g, eid, etype, func) if self._graph.number_of_etypes() == 1 or etype is not None: self._set_e_repr(etid, eid, edata) else: edata_tensor = {} key = list(edata.keys())[0] out_tensor_tuples = edata[key] for etid in range(self._graph.number_of_etypes()): # TODO (Israt): Check the logic why some output tensor is None if out_tensor_tuples[etid] is not None: edata_tensor[key] = out_tensor_tuples[etid] self._set_e_repr(etid, eid, edata_tensor) def send_and_recv( self, edges, message_func, reduce_func, apply_node_func=None, etype=None ): """Send messages along the specified edges and reduce them on the destination nodes to update their features. Parameters ---------- edges : edges The edges to send and receive messages on. The allowed input formats are: * ``int``: A single edge ID. * Int Tensor: Each element is an edge ID. The tensor must have the same device type and ID data type as the graph's. * iterable[int]: Each element is an edge ID. * (Tensor, Tensor): The node-tensors format where the i-th elements of the two tensors specify an edge. * (iterable[int], iterable[int]): Similar to the node-tensors format but stores edge endpoints in python iterables. message_func : dgl.function.BuiltinFunction or callable The message function to generate messages along the edges. It must be either a :ref:`api-built-in` or a :ref:`apiudf`. reduce_func : dgl.function.BuiltinFunction or callable The reduce function to aggregate the messages. It must be either a :ref:`api-built-in` or a :ref:`apiudf`. apply_node_func : callable, optional An optional apply function to further update the node features after the message reduction. It must be a :ref:`apiudf`. etype : str or (str, str, str), optional The type name of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. Can be omitted if the graph has only one type of edges. Notes ----- DGL recommends using DGL's bulit-in function for the :attr:`message_func` and the :attr:`reduce_func` arguments, because DGL will invoke efficient kernels that avoids copying node features to edge features in this case. Examples -------- >>> import dgl >>> import dgl.function as fn >>> import torch **Homogeneous graph** >>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4])) >>> g.ndata['x'] = torch.ones(5, 2) >>> # Specify edges using (Tensor, Tensor). >>> g.send_and_recv(([1, 2], [2, 3]), fn.copy_u('x', 'm'), fn.sum('m', 'h')) >>> g.ndata['h'] tensor([[0., 0.], [0., 0.], [1., 1.], [1., 1.], [0., 0.]]) >>> # Specify edges using IDs. >>> g.send_and_recv([0, 2, 3], fn.copy_u('x', 'm'), fn.sum('m', 'h')) >>> g.ndata['h'] tensor([[0., 0.], [1., 1.], [0., 0.], [1., 1.], [1., 1.]]) **Heterogeneous graph** >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): ([0, 1], [1, 2]), ... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 1, 1]) ... }) >>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]]) >>> g.send_and_recv(g['follows'].edges(), fn.copy_u('h', 'm'), ... fn.sum('m', 'h'), etype='follows') >>> g.nodes['user'].data['h'] tensor([[0.], [0.], [1.]]) **``send_and_recv`` using user-defined functions** >>> import torch as th >>> g = dgl.graph(([0, 1], [1, 2])) >>> g.ndata['x'] = th.tensor([[1.], [2.], [3.]]) >>> # Define the function for sending node features as messages. >>> def send_source(edges): ... return {'m': edges.src['x']} >>> # Sum the messages received and use this to replace the original node feature. >>> def simple_reduce(nodes): ... return {'x': nodes.mailbox['m'].sum(1)} Send and receive messages. >>> g.send_and_recv(g.edges()) >>> g.ndata['x'] tensor([[1.], [1.], [2.]]) Note that the feature of node 0 remains the same as it has no incoming edges. """ # edge type etid = self.get_etype_id(etype) _, dtid = self._graph.metagraph.find_edge(etid) etype = self.canonical_etypes[etid] # edge IDs eid = utils.parse_edges_arg_to_eid(self, edges, etid, "edges") if len(eid) == 0: # no computation return u, v = self.find_edges(eid, etype=etype) # call message passing onsubgraph g = self if etype is None else self[etype] compute_graph, _, dstnodes, _ = _create_compute_graph(g, u, v, eid) ndata = core.message_passing( compute_graph, message_func, reduce_func, apply_node_func ) self._set_n_repr(dtid, dstnodes, ndata) def pull( self, v, message_func, reduce_func, apply_node_func=None, etype=None ): """Pull messages from the specified node(s)' predecessors along the specified edge type, aggregate them to update the node features. Parameters ---------- v : node IDs The node IDs. The allowed formats are: * ``int``: A single node. * Int Tensor: Each element is a node ID. The tensor must have the same device type and ID data type as the graph's. * iterable[int]: Each element is a node ID. message_func : dgl.function.BuiltinFunction or callable The message function to generate messages along the edges. It must be either a :ref:`api-built-in` or a :ref:`apiudf`. reduce_func : dgl.function.BuiltinFunction or callable The reduce function to aggregate the messages. It must be either a :ref:`api-built-in` or a :ref:`apiudf`. apply_node_func : callable, optional An optional apply function to further update the node features after the message reduction. It must be a :ref:`apiudf`. etype : str or (str, str, str), optional The type name of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. Can be omitted if the graph has only one type of edges. Notes ----- * If some of the given nodes :attr:`v` has no in-edges, DGL does not invoke message and reduce functions for these nodes and fill their aggregated messages with zero. Users can control the filled values via :meth:`set_n_initializer`. DGL still invokes :attr:`apply_node_func` if provided. * DGL recommends using DGL's bulit-in function for the :attr:`message_func` and the :attr:`reduce_func` arguments, because DGL will invoke efficient kernels that avoids copying node features to edge features in this case. Examples -------- >>> import dgl >>> import dgl.function as fn >>> import torch **Homogeneous graph** >>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4])) >>> g.ndata['x'] = torch.ones(5, 2) >>> g.pull([0, 3, 4], fn.copy_u('x', 'm'), fn.sum('m', 'h')) >>> g.ndata['h'] tensor([[0., 0.], [0., 0.], [0., 0.], [1., 1.], [1., 1.]]) **Heterogeneous graph** >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): ([0, 1], [1, 2]), ... ('user', 'plays', 'game'): ([0, 2], [0, 1]) ... }) >>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]]) Pull. >>> g['follows'].pull(2, fn.copy_u('h', 'm'), fn.sum('m', 'h'), etype='follows') >>> g.nodes['user'].data['h'] tensor([[0.], [1.], [1.]]) """ v = utils.prepare_tensor(self, v, "v") if len(v) == 0: # no computation return etid = self.get_etype_id(etype) _, dtid = self._graph.metagraph.find_edge(etid) etype = self.canonical_etypes[etid] g = self if etype is None else self[etype] # call message passing on subgraph src, dst, eid = g.in_edges(v, form="all") compute_graph, _, dstnodes, _ = _create_compute_graph( g, src, dst, eid, v ) ndata = core.message_passing( compute_graph, message_func, reduce_func, apply_node_func ) self._set_n_repr(dtid, dstnodes, ndata) def push( self, u, message_func, reduce_func, apply_node_func=None, etype=None ): """Send message from the specified node(s) to their successors along the specified edge type and update their node features. Parameters ---------- v : node IDs The node IDs. The allowed formats are: * ``int``: A single node. * Int Tensor: Each element is a node ID. The tensor must have the same device type and ID data type as the graph's. * iterable[int]: Each element is a node ID. message_func : dgl.function.BuiltinFunction or callable The message function to generate messages along the edges. It must be either a :ref:`api-built-in` or a :ref:`apiudf`. reduce_func : dgl.function.BuiltinFunction or callable The reduce function to aggregate the messages. It must be either a :ref:`api-built-in` or a :ref:`apiudf`. apply_node_func : callable, optional An optional apply function to further update the node features after the message reduction. It must be a :ref:`apiudf`. etype : str or (str, str, str), optional The type name of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. Can be omitted if the graph has only one type of edges. Notes ----- DGL recommends using DGL's bulit-in function for the :attr:`message_func` and the :attr:`reduce_func` arguments, because DGL will invoke efficient kernels that avoids copying node features to edge features in this case. Examples -------- >>> import dgl >>> import dgl.function as fn >>> import torch **Homogeneous graph** >>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4])) >>> g.ndata['x'] = torch.ones(5, 2) >>> g.push([0, 1], fn.copy_u('x', 'm'), fn.sum('m', 'h')) >>> g.ndata['h'] tensor([[0., 0.], [1., 1.], [1., 1.], [0., 0.], [0., 0.]]) **Heterogeneous graph** >>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 0], [1, 2])}) >>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]]) Push. >>> g['follows'].push(0, fn.copy_u('h', 'm'), fn.sum('m', 'h'), etype='follows') >>> g.nodes['user'].data['h'] tensor([[0.], [0.], [0.]]) """ edges = self.out_edges(u, form="eid", etype=etype) self.send_and_recv( edges, message_func, reduce_func, apply_node_func, etype=etype ) def update_all( self, message_func, reduce_func, apply_node_func=None, etype=None ): """Send messages along all the edges of the specified type and update all the nodes of the corresponding destination type. For heterogeneous graphs with number of relation types > 1, send messages along all the edges, reduce them by type-wisely and across different types at the same time. Then, update the node features of all the nodes. Parameters ---------- message_func : dgl.function.BuiltinFunction or callable The message function to generate messages along the edges. It must be either a :ref:`api-built-in` or a :ref:`apiudf`. reduce_func : dgl.function.BuiltinFunction or callable The reduce function to aggregate the messages. It must be either a :ref:`api-built-in` or a :ref:`apiudf`. apply_node_func : callable, optional An optional apply function to further update the node features after the message reduction. It must be a :ref:`apiudf`. etype : str or (str, str, str), optional The type name of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. Can be omitted if the graph has only one type of edges. Notes ----- * If some of the nodes in the graph has no in-edges, DGL does not invoke message and reduce functions for these nodes and fill their aggregated messages with zero. Users can control the filled values via :meth:`set_n_initializer`. DGL still invokes :attr:`apply_node_func` if provided. * DGL recommends using DGL's bulit-in function for the :attr:`message_func` and the :attr:`reduce_func` arguments, because DGL will invoke efficient kernels that avoids copying node features to edge features in this case. Examples -------- >>> import dgl >>> import dgl.function as fn >>> import torch **Homogeneous graph** >>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4])) >>> g.ndata['x'] = torch.ones(5, 2) >>> g.update_all(fn.copy_u('x', 'm'), fn.sum('m', 'h')) >>> g.ndata['h'] tensor([[0., 0.], [1., 1.], [1., 1.], [1., 1.], [1., 1.]]) **Heterogeneous graph** >>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 1, 2], [1, 2, 2])}) Update all. >>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]]) >>> g['follows'].update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h'), etype='follows') >>> g.nodes['user'].data['h'] tensor([[0.], [0.], [3.]]) **Heterogenenous graph (number relation types > 1)** >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): ([0, 1], [1, 1]), ... ('game', 'attracts', 'user'): ([0], [1]) ... }) Update all. >>> g.nodes['user'].data['h'] = torch.tensor([[1.], [2.]]) >>> g.nodes['game'].data['h'] = torch.tensor([[1.]]) >>> g.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h')) >>> g.nodes['user'].data['h'] tensor([[0.], [4.]]) """ # Graph with one relation type if self._graph.number_of_etypes() == 1 or etype is not None: etid = self.get_etype_id(etype) etype = self.canonical_etypes[etid] _, dtid = self._graph.metagraph.find_edge(etid) g = self if etype is None else self[etype] ndata = core.message_passing( g, message_func, reduce_func, apply_node_func ) if ( core.is_builtin(reduce_func) and reduce_func.name in ["min", "max"] and ndata ): # Replace infinity with zero for isolated nodes key = list(ndata.keys())[0] ndata[key] = F.replace_inf_with_zero(ndata[key]) self._set_n_repr(dtid, ALL, ndata) else: # heterogeneous graph with number of relation types > 1 if not core.is_builtin(message_func) or not core.is_builtin( reduce_func ): raise DGLError( "User defined functions are not yet " "supported in update_all for heterogeneous graphs. " "Please use multi_update_all instead." ) if reduce_func.name in ["mean"]: raise NotImplementedError( "Cannot set both intra-type and inter-type reduce " "operators as 'mean' using update_all. Please use " "multi_update_all instead." ) g = self all_out = core.message_passing( g, message_func, reduce_func, apply_node_func ) key = list(all_out.keys())[0] out_tensor_tuples = all_out[key] dst_tensor = {} for _, _, dsttype in g.canonical_etypes: dtid = g.get_ntype_id(dsttype) dst_tensor[key] = out_tensor_tuples[dtid] if core.is_builtin(reduce_func) and reduce_func.name in [ "min", "max", ]: dst_tensor[key] = F.replace_inf_with_zero(dst_tensor[key]) self._node_frames[dtid].update(dst_tensor) ################################################################# # Message passing on heterograph ################################################################# def multi_update_all(self, etype_dict, cross_reducer, apply_node_func=None): r"""Send messages along all the edges, reduce them by first type-wisely then across different types, and then update the node features of all the nodes. Parameters ---------- etype_dict : dict Arguments for edge-type-wise message passing. The keys are edge types while the values are message passing arguments. The allowed key formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. The value must be a tuple ``(message_func, reduce_func, [apply_node_func])``, where * message_func : dgl.function.BuiltinFunction or callable The message function to generate messages along the edges. It must be either a :ref:`api-built-in` or a :ref:`apiudf`. * reduce_func : dgl.function.BuiltinFunction or callable The reduce function to aggregate the messages. It must be either a :ref:`api-built-in` or a :ref:`apiudf`. * apply_node_func : callable, optional An optional apply function to further update the node features after the message reduction. It must be a :ref:`apiudf`. cross_reducer : str or callable function Cross type reducer. One of ``"sum"``, ``"min"``, ``"max"``, ``"mean"``, ``"stack"`` or a callable function. If a callable function is provided, the input argument must be a single list of tensors containing aggregation results from each edge type, and the output of function must be a single tensor. apply_node_func : callable, optional An optional apply function after the messages are reduced both type-wisely and across different types. It must be a :ref:`apiudf`. Notes ----- DGL recommends using DGL's bulit-in function for the message_func and the reduce_func in the type-wise message passing arguments, because DGL will invoke efficient kernels that avoids copying node features to edge features in this case. Examples -------- >>> import dgl >>> import dgl.function as fn >>> import torch Instantiate a heterograph. >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): ([0, 1], [1, 1]), ... ('game', 'attracts', 'user'): ([0], [1]) ... }) >>> g.nodes['user'].data['h'] = torch.tensor([[1.], [2.]]) >>> g.nodes['game'].data['h'] = torch.tensor([[1.]]) Update all. >>> g.multi_update_all( ... {'follows': (fn.copy_u('h', 'm'), fn.sum('m', 'h')), ... 'attracts': (fn.copy_u('h', 'm'), fn.sum('m', 'h'))}, ... "sum") >>> g.nodes['user'].data['h'] tensor([[0.], [4.]]) User-defined cross reducer equivalent to "sum". >>> def cross_sum(flist): ... return torch.sum(torch.stack(flist, dim=0), dim=0) if len(flist) > 1 else flist[0] Use the user-defined cross reducer. >>> g.multi_update_all( ... {'follows': (fn.copy_u('h', 'm'), fn.sum('m', 'h')), ... 'attracts': (fn.copy_u('h', 'm'), fn.sum('m', 'h'))}, ... cross_sum) """ all_out = defaultdict(list) merge_order = defaultdict(list) for etype, args in etype_dict.items(): etid = self.get_etype_id(etype) _, dtid = self._graph.metagraph.find_edge(etid) args = pad_tuple(args, 3) if args is None: raise DGLError( 'Invalid arguments for edge type "{}". Should be ' "(msg_func, reduce_func, [apply_node_func])".format(etype) ) mfunc, rfunc, afunc = args g = self if etype is None else self[etype] all_out[dtid].append(core.message_passing(g, mfunc, rfunc, afunc)) merge_order[dtid].append( etid ) # use edge type id as merge order hint for dtid, frames in all_out.items(): # merge by cross_reducer out = reduce_dict_data(frames, cross_reducer, merge_order[dtid]) # Replace infinity with zero for isolated nodes when reducer is min/max if core.is_builtin(rfunc) and rfunc.name in ["min", "max"]: for key in out.keys(): out[key] = ( F.replace_inf_with_zero(out[key]) if out[key] is not None else None ) self._node_frames[dtid].update(out) # apply if apply_node_func is not None: self.apply_nodes(apply_node_func, ALL, self.ntypes[dtid]) ################################################################# # Message propagation ################################################################# def prop_nodes( self, nodes_generator, message_func, reduce_func, apply_node_func=None, etype=None, ): """Propagate messages using graph traversal by sequentially triggering :func:`pull()` on nodes. The traversal order is specified by the ``nodes_generator``. It generates node frontiers, which is a list or a tensor of nodes. The nodes in the same frontier will be triggered together, while nodes in different frontiers will be triggered according to the generating order. Parameters ---------- nodes_generator : iterable[node IDs] The generator of node frontiers. Each frontier is a set of node IDs stored in Tensor or python iterables. It specifies which nodes perform :func:`pull` at each step. message_func : dgl.function.BuiltinFunction or callable The message function to generate messages along the edges. It must be either a :ref:`api-built-in` or a :ref:`apiudf`. reduce_func : dgl.function.BuiltinFunction or callable The reduce function to aggregate the messages. It must be either a :ref:`api-built-in` or a :ref:`apiudf`. apply_node_func : callable, optional An optional apply function to further update the node features after the message reduction. It must be a :ref:`apiudf`. etype : str or (str, str, str), optional The type name of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. Can be omitted if the graph has only one type of edges. Examples -------- >>> import torch >>> import dgl >>> import dgl.function as fn Instantiate a heterogrph and perform multiple rounds of message passing. >>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 1, 2, 3], [2, 3, 4, 4])}) >>> g.nodes['user'].data['h'] = torch.tensor([[1.], [2.], [3.], [4.], [5.]]) >>> g['follows'].prop_nodes([[2, 3], [4]], fn.copy_u('h', 'm'), ... fn.sum('m', 'h'), etype='follows') tensor([[1.], [2.], [1.], [2.], [3.]]) See Also -------- prop_edges """ for node_frontier in nodes_generator: self.pull( node_frontier, message_func, reduce_func, apply_node_func, etype=etype, ) def prop_edges( self, edges_generator, message_func, reduce_func, apply_node_func=None, etype=None, ): """Propagate messages using graph traversal by sequentially triggering :func:`send_and_recv()` on edges. The traversal order is specified by the ``edges_generator``. It generates edge frontiers. The edge frontiers should be of *valid edges type*. See :func:`send` for more details. Edges in the same frontier will be triggered together, and edges in different frontiers will be triggered according to the generating order. Parameters ---------- edges_generator : generator The generator of edge frontiers. message_func : dgl.function.BuiltinFunction or callable The message function to generate messages along the edges. It must be either a :ref:`api-built-in` or a :ref:`apiudf`. reduce_func : dgl.function.BuiltinFunction or callable The reduce function to aggregate the messages. It must be either a :ref:`api-built-in` or a :ref:`apiudf`. apply_node_func : callable, optional An optional apply function to further update the node features after the message reduction. It must be a :ref:`apiudf`. etype : str or (str, str, str), optional The type name of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. Can be omitted if the graph has only one type of edges. Examples -------- >>> import torch >>> import dgl >>> import dgl.function as fn Instantiate a heterogrph and perform multiple rounds of message passing. >>> g = dgl.heterograph({('user', 'follows', 'user'): ([0, 1, 2, 3], [2, 3, 4, 4])}) >>> g.nodes['user'].data['h'] = torch.tensor([[1.], [2.], [3.], [4.], [5.]]) >>> g['follows'].prop_edges([[0, 1], [2, 3]], fn.copy_u('h', 'm'), ... fn.sum('m', 'h'), etype='follows') >>> g.nodes['user'].data['h'] tensor([[1.], [2.], [1.], [2.], [3.]]) See Also -------- prop_nodes """ for edge_frontier in edges_generator: self.send_and_recv( edge_frontier, message_func, reduce_func, apply_node_func, etype=etype, ) ################################################################# # Misc ################################################################# def filter_nodes(self, predicate, nodes=ALL, ntype=None): """Return the IDs of the nodes with the given node type that satisfy the given predicate. Parameters ---------- predicate : callable A function of signature ``func(nodes) -> Tensor``. ``nodes`` are :class:`dgl.NodeBatch` objects. Its output tensor should be a 1D boolean tensor with each element indicating whether the corresponding node in the batch satisfies the predicate. nodes : node ID(s), optional The node(s) for query. The allowed formats are: - Tensor: A 1D tensor that contains the node(s) for query, whose data type and device should be the same as the :py:attr:`idtype` and device of the graph. - iterable[int] : Similar to the tensor, but stores node IDs in a sequence (e.g. list, tuple, numpy.ndarray). By default, it considers all nodes. ntype : str, optional The node type for query. If the graph has multiple node types, one must specify the argument. Otherwise, it can be omitted. Returns ------- Tensor A 1D tensor that contains the ID(s) of the node(s) that satisfy the predicate. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Define a predicate function. >>> def nodes_with_feature_one(nodes): ... # Whether a node has feature 1 ... return (nodes.data['h'] == 1.).squeeze(1) Filter nodes for a homogeneous graph. >>> g = dgl.graph((torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3]))) >>> g.ndata['h'] = torch.tensor([[0.], [1.], [1.], [0.]]) >>> print(g.filter_nodes(nodes_with_feature_one)) tensor([1, 2]) Filter on nodes with IDs 0 and 1 >>> print(g.filter_nodes(nodes_with_feature_one, nodes=torch.tensor([0, 1]))) tensor([1]) Filter nodes for a heterogeneous graph. >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]), ... torch.tensor([0, 0, 1, 1]))}) >>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [1.]]) >>> g.nodes['game'].data['h'] = torch.tensor([[0.], [1.]]) >>> # Filter for 'user' nodes >>> print(g.filter_nodes(nodes_with_feature_one, ntype='user')) tensor([1, 2]) """ if is_all(nodes): nodes = self.nodes(ntype) v = utils.prepare_tensor(self, nodes, "nodes") if F.as_scalar(F.sum(self.has_nodes(v, ntype=ntype), dim=0)) != len(v): raise DGLError("v contains invalid node IDs") with self.local_scope(): self.apply_nodes( lambda nbatch: {"_mask": predicate(nbatch)}, nodes, ntype ) ntype = self.ntypes[0] if ntype is None else ntype mask = self.nodes[ntype].data["_mask"] if is_all(nodes): return F.nonzero_1d(mask) else: return F.boolean_mask(v, F.gather_row(mask, v)) def filter_edges(self, predicate, edges=ALL, etype=None): """Return the IDs of the edges with the given edge type that satisfy the given predicate. Parameters ---------- predicate : callable A function of signature ``func(edges) -> Tensor``. ``edges`` are :class:`dgl.EdgeBatch` objects. Its output tensor should be a 1D boolean tensor with each element indicating whether the corresponding edge in the batch satisfies the predicate. edges : edges The edges to send and receive messages on. The allowed input formats are: * ``int``: A single edge ID. * Int Tensor: Each element is an edge ID. The tensor must have the same device type and ID data type as the graph's. * iterable[int]: Each element is an edge ID. * (Tensor, Tensor): The node-tensors format where the i-th elements of the two tensors specify an edge. * (iterable[int], iterable[int]): Similar to the node-tensors format but stores edge endpoints in python iterables. By default, it considers all the edges. etype : str or (str, str, str), optional The type name of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. Can be omitted if the graph has only one type of edges. Returns ------- Tensor A 1D tensor that contains the ID(s) of the edge(s) that satisfy the predicate. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Define a predicate function. >>> def edges_with_feature_one(edges): ... # Whether an edge has feature 1 ... return (edges.data['h'] == 1.).squeeze(1) Filter edges for a homogeneous graph. >>> g = dgl.graph((torch.tensor([0, 1, 2]), torch.tensor([1, 2, 3]))) >>> g.edata['h'] = torch.tensor([[0.], [1.], [1.]]) >>> print(g.filter_edges(edges_with_feature_one)) tensor([1, 2]) Filter on edges with IDs 0 and 1 >>> print(g.filter_edges(edges_with_feature_one, edges=torch.tensor([0, 1]))) tensor([1]) Filter edges for a heterogeneous graph. >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]), ... torch.tensor([0, 0, 1, 1])), ... ('user', 'follows', 'user'): (torch.tensor([0, 1]), torch.tensor([1, 2]))}) >>> g.edges['plays'].data['h'] = torch.tensor([[0.], [1.], [1.], [0.]]) >>> # Filter for 'plays' nodes >>> print(g.filter_edges(edges_with_feature_one, etype='plays')) tensor([1, 2]) """ if is_all(edges): pass elif isinstance(edges, tuple): u, v = edges srctype, _, dsttype = self.to_canonical_etype(etype) u = utils.prepare_tensor(self, u, "u") if F.as_scalar( F.sum(self.has_nodes(u, ntype=srctype), dim=0) ) != len(u): raise DGLError("edges[0] contains invalid node IDs") v = utils.prepare_tensor(self, v, "v") if F.as_scalar( F.sum(self.has_nodes(v, ntype=dsttype), dim=0) ) != len(v): raise DGLError("edges[1] contains invalid node IDs") elif isinstance(edges, Iterable) or F.is_tensor(edges): edges = utils.prepare_tensor(self, edges, "edges") min_eid = F.as_scalar(F.min(edges, 0)) if len(edges) > 0 > min_eid: raise DGLError("Invalid edge ID {:d}".format(min_eid)) max_eid = F.as_scalar(F.max(edges, 0)) if len(edges) > 0 and max_eid >= self.num_edges(etype): raise DGLError("Invalid edge ID {:d}".format(max_eid)) else: raise ValueError("Unsupported type of edges:", type(edges)) with self.local_scope(): self.apply_edges( lambda ebatch: {"_mask": predicate(ebatch)}, edges, etype ) etype = self.canonical_etypes[0] if etype is None else etype mask = self.edges[etype].data["_mask"] if is_all(edges): return F.nonzero_1d(mask) else: if isinstance(edges, tuple): e = self.edge_ids(edges[0], edges[1], etype=etype) else: e = utils.prepare_tensor(self, edges, "edges") return F.boolean_mask(e, F.gather_row(mask, e)) @property def device(self): """Get the device of the graph. Returns ------- device context The device of the graph, which should be a framework-specific device object (e.g., ``torch.device``). Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Create a homogeneous graph for demonstration. >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2]))) >>> print(g.device) device(type='cpu') The case of heterogeneous graphs is the same. """ return F.to_backend_ctx(self._graph.ctx) def to(self, device, **kwargs): # pylint: disable=invalid-name """Move ndata, edata and graph structure to the targeted device (cpu/gpu). If the graph is already on the specified device, the function directly returns it. Otherwise, it returns a cloned graph on the specified device. Note that data of node and edge features are not moved to the specified device before being accessed or `materialize_data()` is called. Parameters ---------- device : Framework-specific device context object The context to move data to (e.g., ``torch.device``). kwargs : Key-word arguments. Key-word arguments fed to the framework copy function. Returns ------- DGLGraph The graph on the specified device. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch >>> g = dgl.graph((torch.tensor([1, 0]), torch.tensor([1, 2]))) >>> g.ndata['h'] = torch.ones(3, 1) >>> g.edata['h'] = torch.zeros(2, 2) >>> g1 = g.to(torch.device('cuda:0')) >>> print(g1.device) device(type='cuda', index=0) >>> print(g1.ndata['h'].device) device(type='cuda', index=0) >>> print(g1.nodes().device) device(type='cuda', index=0) The original graph is still on CPU. >>> print(g.device) device(type='cpu') >>> print(g.ndata['h'].device) device(type='cpu') >>> print(g.nodes().device) device(type='cpu') The case of heterogeneous graphs is the same. """ if device is None or self.device == device: return self ret = copy.copy(self) # 1. Copy graph structure ret._graph = self._graph.copy_to(utils.to_dgl_context(device)) # 2. Copy features # TODO(minjie): handle initializer new_nframes = [] for nframe in self._node_frames: new_nframes.append(nframe.to(device, **kwargs)) ret._node_frames = new_nframes new_eframes = [] for eframe in self._edge_frames: new_eframes.append(eframe.to(device, **kwargs)) ret._edge_frames = new_eframes # 2. Copy misc info if self._batch_num_nodes is not None: new_bnn = { k: F.copy_to(num, device, **kwargs) for k, num in self._batch_num_nodes.items() } ret._batch_num_nodes = new_bnn if self._batch_num_edges is not None: new_bne = { k: F.copy_to(num, device, **kwargs) for k, num in self._batch_num_edges.items() } ret._batch_num_edges = new_bne return ret def cpu(self): """Return a new copy of this graph on CPU. Returns ------- DGLGraph Graph on CPU. See Also -------- to """ return self.to(F.cpu()) def materialize_data(self): """Materialize the graph data on the current device. This method is a no-op if the graph data is already materialized. Returns ------- DGLGraph The graph on the current device. """ for frame in itertools.chain(self._node_frames, self._edge_frames): for col in frame._columns.values(): col.data # pylint: disable=pointless-statement return self def pin_memory_(self): """Pin the graph structure and node/edge data to the page-locked memory for GPU zero-copy access. This is an **inplace** method. The graph structure must be on CPU to be pinned. If the graph struture is already pinned, the function directly returns it. Materialization of new sparse formats for pinned graphs is not allowed. To avoid implicit formats materialization during training, you should create all the needed formats before pinning. But cloning and materialization is fine. See the examples below. Returns ------- DGLGraph The pinned graph. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch >>> g = dgl.graph((torch.tensor([1, 0]), torch.tensor([1, 2]))) >>> g.pin_memory_() Materialization of new sparse formats is not allowed for pinned graphs. >>> g.create_formats_() # This would raise an error! You should do this before pinning. Cloning and materializing new formats is allowed. The returned graph is **not** pinned. >>> g1 = g.formats(['csc']) >>> assert not g1.is_pinned() The pinned graph can be access from both CPU and GPU. The concrete device depends on the context of ``query``. For example, ``eid`` in ``find_edges()`` is a query. When ``eid`` is on CPU, ``find_edges()`` is executed on CPU, and the returned values are CPU tensors >>> g.unpin_memory_() >>> g.create_formats_() >>> g.pin_memory_() >>> eid = torch.tensor([1]) >>> g.find_edges(eids) (tensor([0]), tensor([2])) Moving ``eid`` to GPU, ``find_edges()`` will be executed on GPU, and the returned values are GPU tensors. >>> eid = eid.to('cuda:0') >>> g.find_edges(eids) (tensor([0], device='cuda:0'), tensor([2], device='cuda:0')) If you don't provide a ``query``, methods will be executed on CPU by default. >>> g.in_degrees() tensor([0, 1, 1]) """ if not self._graph.is_pinned(): if F.device_type(self.device) != "cpu": raise DGLError( "The graph structure must be on CPU to be pinned." ) self._graph.pin_memory_() for frame in itertools.chain(self._node_frames, self._edge_frames): for col in frame._columns.values(): col.pin_memory_() return self def unpin_memory_(self): """Unpin the graph structure and node/edge data from the page-locked memory. This is an **inplace** method. If the graph struture is not pinned, e.g., on CPU or GPU, the function directly returns it. Returns ------- DGLGraph The unpinned graph. """ if self._graph.is_pinned(): self._graph.unpin_memory_() for frame in itertools.chain(self._node_frames, self._edge_frames): for col in frame._columns.values(): col.unpin_memory_() return self def is_pinned(self): """Check if the graph structure is pinned to the page-locked memory. Returns ------- bool True if the graph structure is pinned. """ return self._graph.is_pinned() def record_stream(self, stream): """Record the stream that is using this graph. This method only supports the PyTorch backend and requires graphs on the GPU. Parameters ---------- stream : torch.cuda.Stream The stream that is using this graph. Returns ------- DGLGraph self. """ if F.get_preferred_backend() != "pytorch": raise DGLError("record_stream only support the PyTorch backend.") if F.device_type(self.device) != "cuda": raise DGLError("The graph must be on GPU to be recorded.") self._graph.record_stream(stream) for frame in itertools.chain(self._node_frames, self._edge_frames): for col in frame._columns.values(): col.record_stream(stream) return self def clone(self): """Return a heterograph object that is a clone of current graph. Returns ------- DGLGraph The graph object that is a clone of current graph. """ # XXX(minjie): Do a shallow copy first to clone some internal metagraph information. # Not a beautiful solution though. ret = copy.copy(self) # Clone the graph structure meta_edges = [] for s_ntype, _, d_ntype in self.canonical_etypes: meta_edges.append( (self.get_ntype_id(s_ntype), self.get_ntype_id(d_ntype)) ) metagraph = graph_index.from_edge_list(meta_edges, True) # rebuild graph idx num_nodes_per_type = [ self.num_nodes(c_ntype) for c_ntype in self.ntypes ] relation_graphs = [ self._graph.get_relation_graph(self.get_etype_id(c_etype)) for c_etype in self.canonical_etypes ] ret._graph = heterograph_index.create_heterograph_from_relations( metagraph, relation_graphs, utils.toindex(num_nodes_per_type, "int64"), ) # Clone the frames ret._node_frames = [fr.clone() for fr in self._node_frames] ret._edge_frames = [fr.clone() for fr in self._edge_frames] # Copy the batch information ret._batch_num_nodes = copy.copy(self._batch_num_nodes) ret._batch_num_edges = copy.copy(self._batch_num_edges) return ret def local_var(self): """Return a graph object for usage in a local function scope. The returned graph object shares the feature data and graph structure of this graph. However, any out-place mutation to the feature data will not reflect to this graph, thus making it easier to use in a function scope (e.g. forward computation of a model). If set, the local graph object will use same initializers for node features and edge features. Returns ------- DGLGraph The graph object for a local variable. Notes ----- Inplace operations do reflect to the original graph. This function also has little overhead when the number of feature tensors in this graph is small. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Create a function for computation on graphs. >>> def foo(g): ... g = g.local_var() ... g.edata['h'] = torch.ones((g.num_edges(), 3)) ... g.edata['h2'] = torch.ones((g.num_edges(), 3)) ... return g.edata['h'] ``local_var`` avoids changing the graph features when exiting the function. >>> g = dgl.graph((torch.tensor([0, 1, 1]), torch.tensor([0, 0, 2]))) >>> g.edata['h'] = torch.zeros((g.num_edges(), 3)) >>> newh = foo(g) >>> print(g.edata['h']) # still get tensor of all zeros tensor([[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]]) >>> 'h2' in g.edata # new feature set in the function scope is not found False In-place operations will still reflect to the original graph. >>> def foo(g): ... g = g.local_var() ... # in-place operation ... g.edata['h'] += 1 ... return g.edata['h'] >>> g = dgl.graph((torch.tensor([0, 1, 1]), torch.tensor([0, 0, 2]))) >>> g.edata['h'] = torch.zeros((g.num_edges(), 1)) >>> newh = foo(g) >>> print(g.edata['h']) # the result changes tensor([[1.], [1.], [1.]]) See Also -------- local_scope """ ret = copy.copy(self) ret._node_frames = [fr.clone() for fr in self._node_frames] ret._edge_frames = [fr.clone() for fr in self._edge_frames] return ret @contextmanager def local_scope(self): """Enter a local scope context for the graph. By entering a local scope, any out-place mutation to the feature data will not reflect to the original graph, thus making it easier to use in a function scope (e.g. forward computation of a model). If set, the local scope will use same initializers for node features and edge features. Notes ----- Inplace operations do reflect to the original graph. This function also has little overhead when the number of feature tensors in this graph is small. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Create a function for computation on graphs. >>> def foo(g): ... with g.local_scope(): ... g.edata['h'] = torch.ones((g.num_edges(), 3)) ... g.edata['h2'] = torch.ones((g.num_edges(), 3)) ... return g.edata['h'] ``local_scope`` avoids changing the graph features when exiting the function. >>> g = dgl.graph((torch.tensor([0, 1, 1]), torch.tensor([0, 0, 2]))) >>> g.edata['h'] = torch.zeros((g.num_edges(), 3)) >>> newh = foo(g) >>> print(g.edata['h']) # still get tensor of all zeros tensor([[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]]) >>> 'h2' in g.edata # new feature set in the function scope is not found False In-place operations will still reflect to the original graph. >>> def foo(g): ... with g.local_scope(): ... # in-place operation ... g.edata['h'] += 1 ... return g.edata['h'] >>> g = dgl.graph((torch.tensor([0, 1, 1]), torch.tensor([0, 0, 2]))) >>> g.edata['h'] = torch.zeros((g.num_edges(), 1)) >>> newh = foo(g) >>> print(g.edata['h']) # the result changes tensor([[1.], [1.], [1.]]) See Also -------- local_var """ old_nframes = self._node_frames old_eframes = self._edge_frames self._node_frames = [fr.clone() for fr in self._node_frames] self._edge_frames = [fr.clone() for fr in self._edge_frames] try: yield finally: self._node_frames = old_nframes self._edge_frames = old_eframes def formats(self, formats=None): r"""Get a cloned graph with the specified allowed sparse format(s) or query for the usage status of sparse formats. The API copies both the graph structure and the features. If the input graph has multiple edge types, they will have the same sparse format. When ``formats`` is not None, if the intersection between `formats` and the current graph's created sparse format(s) is not empty, the returned cloned graph only retains all sparse format(s) in the intersection. If the intersection is empty, a sparse format will be selected to be created following the order of ``'coo' -> 'csr' -> 'csc'``. Parameters ---------- formats : str or list of str or None * If formats is None, return the usage status of sparse formats * Otherwise, it can be ``'coo'``/``'csr'``/``'csc'`` or a sublist of them, specifying the sparse formats to use. Returns ------- dict or DGLGraph * If formats is None, the result will be a dict recording the usage status of sparse formats. * Otherwise, a DGLGraph will be returned, which is a clone of the original graph with the specified allowed sparse format(s) ``formats``. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch **Homographs or Heterographs with A Single Edge Type** >>> g = dgl.graph(([0, 0, 1], [2, 3, 2])) >>> g.ndata['h'] = torch.ones(4, 1) >>> # Check status of format usage. >>> g.formats() {'created': ['coo'], 'not created': ['csr', 'csc']} >>> # Get a clone of the graph with 'csr' format. >>> csr_g = g.formats('csr') >>> # Only allowed formats will be displayed in the status query. >>> csr_g.formats() {'created': ['csr'], 'not created': []} >>> # Features are copied as well. >>> csr_g.ndata['h'] tensor([[1.], [1.], [1.], [1.]]) **Heterographs with Multiple Edge Types** >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]), ... torch.tensor([0, 0, 1, 1])), ... ('developer', 'develops', 'game'): (torch.tensor([0, 1]), ... torch.tensor([0, 1])) ... }) >>> g.formats() {'created': ['coo'], 'not created': ['csr', 'csc']} >>> # Get a clone of the graph with 'csr' format. >>> csr_g = g.formats('csr') >>> # Only allowed formats will be displayed in the status query. >>> csr_g.formats() {'created': ['csr'], 'not created': []} **When formats intersects with created formats** >>> g = dgl.graph(([0, 0, 1], [2, 3, 2])) >>> g = g.formats(['coo', 'csr']) >>> g.create_formats_() >>> g.formats() {'created': ['coo', 'csr'], 'not created': []} >>> # Get a clone of the graph allowed formats 'csr' and 'csc'. >>> csr_csc_g = g.formats(['csr', 'csc']) >>> # Only the intersection 'csr' will be retained. >>> csr_csc_g.formats() {'created': ['csr'], 'not created': ['csc']} **When formats doesn't intersect with created formats** >>> g = dgl.graph(([0, 0, 1], [2, 3, 2])) >>> g = g.formats('coo') >>> g.formats() {'created': ['coo'], 'not created': []} >>> # Get a clone of the graph allowed formats 'csr' and 'csc'. >>> csr_csc_g = g.formats(['csr', 'csc']) >>> # Since the intersection is empty, 'csr' will be created as it is >>> # first in the order of 'coo' -> 'csr' -> 'csc'. >>> csr_csc_g.formats() {'created': ['csr'], 'not created': ['csc']} """ if formats is None: # Return the format information. return self._graph.formats() else: # Convert the graph to use another allowed format. ret = copy.copy(self) ret._graph = self._graph.formats(formats) return ret def create_formats_(self): r"""Create all sparse matrices allowed for the graph. By default, we create sparse matrices for a graph only when necessary. In some cases we may want to create them immediately (e.g. in a multi-process data loader), which can be achieved via this API. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch **Homographs or Heterographs with A Single Edge Type** >>> g = dgl.graph(([0, 0, 1], [2, 3, 2])) >>> g.format() {'created': ['coo'], 'not created': ['csr', 'csc']} >>> g.create_formats_() >>> g.format() {'created': ['coo', 'csr', 'csc'], 'not created': []} **Heterographs with Multiple Edge Types** >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]), ... torch.tensor([0, 0, 1, 1])), ... ('developer', 'develops', 'game'): (torch.tensor([0, 1]), ... torch.tensor([0, 1])) ... }) >>> g.format() {'created': ['coo'], 'not created': ['csr', 'csc']} >>> g.create_formats_() >>> g.format() {'created': ['coo', 'csr', 'csc'], 'not created': []} """ return self._graph.create_formats_() def astype(self, idtype): """Cast this graph to use another ID type. Features are copied (shallow copy) to the new graph. Parameters ---------- idtype : Data type object. New ID type. Can only be int32 or int64. Returns ------- DGLGraph Graph in the new ID type. """ if idtype is None: return self utils.check_valid_idtype(idtype) if self.idtype == idtype: return self bits = 32 if idtype == F.int32 else 64 ret = copy.copy(self) ret._graph = self._graph.asbits(bits) return ret # TODO: Formats should not be specified, just saving all the materialized formats def shared_memory(self, name, formats=("coo", "csr", "csc")): """Return a copy of this graph in shared memory, without node data or edge data. It moves the graph index to shared memory and returns a DGLGraph object which has the same graph structure, node types and edge types but does not contain node data or edge data. Parameters ---------- name : str The name of the shared memory. formats : str or a list of str (optional) Desired formats to be materialized. Returns ------- DGLGraph The graph in shared memory """ assert len(name) > 0, "The name of shared memory cannot be empty" assert len(formats) > 0 if isinstance(formats, str): formats = [formats] for fmt in formats: assert fmt in ( "coo", "csr", "csc", ), "{} is not coo, csr or csc".format(fmt) gidx = self._graph.shared_memory( name, self.ntypes, self.etypes, formats ) return DGLGraph(gidx, self.ntypes, self.etypes) def long(self): """Cast the graph to one with idtype int64 If the graph already has idtype int64, the function directly returns it. Otherwise, it returns a cloned graph of idtype int64 with features copied (shallow copy). Returns ------- DGLGraph The graph of idtype int64. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Create a graph of idtype int32. >>> # (0, 1), (0, 2), (1, 2) >>> g = dgl.graph((torch.tensor([0, 0, 1]).int(), torch.tensor([1, 2, 2]).int())) >>> g.ndata['feat'] = torch.ones(3, 1) >>> g.idtype torch.int32 Cast the graph to one of idtype int64. >>> # A cloned graph with an idtype of int64 >>> g_long = g.long() >>> g_long.idtype torch.int64 >>> # The idtype of the original graph does not change. >>> g.idtype torch.int32 >>> g_long.edges() (tensor([0, 0, 1]), tensor([1, 2, 2])) >>> g_long.ndata {'feat': tensor([[1.], [1.], [1.]])} See Also -------- int idtype """ return self.astype(F.int64) def int(self): """Cast the graph to one with idtype int32 If the graph already has idtype int32, the function directly returns it. Otherwise, it returns a cloned graph of idtype int32 with features copied (shallow copy). Returns ------- DGLGraph The graph of idtype int32. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Create a graph of idtype int64. >>> # (0, 1), (0, 2), (1, 2) >>> g = dgl.graph((torch.tensor([0, 0, 1]), torch.tensor([1, 2, 2]))) >>> g.ndata['feat'] = torch.ones(3, 1) >>> g.idtype torch.int64 Cast the graph to one of idtype int32. >>> # A cloned graph with an idtype of int32 >>> g_int = g.int() >>> g_int.idtype torch.int32 >>> # The idtype of the original graph does not change. >>> g.idtype torch.int64 >>> g_int.edges() (tensor([0, 0, 1], dtype=torch.int32), tensor([1, 2, 2], dtype=torch.int32)) >>> g_int.ndata {'feat': tensor([[1.], [1.], [1.]])} See Also -------- long idtype """ return self.astype(F.int32) ############################################################ # Internal APIs ############################################################ def make_canonical_etypes(etypes, ntypes, metagraph): """Internal function to convert etype name to (srctype, etype, dsttype) Parameters ---------- etypes : list of str Edge type list ntypes : list of str Node type list metagraph : GraphIndex Meta graph. Returns ------- list of tuples (srctype, etype, dsttype) """ # sanity check if len(etypes) != metagraph.num_edges(): raise DGLError( "Length of edge type list must match the number of " "edges in the metagraph. {} vs {}".format( len(etypes), metagraph.num_edges() ) ) if len(ntypes) != metagraph.num_nodes(): raise DGLError( "Length of nodes type list must match the number of " "nodes in the metagraph. {} vs {}".format( len(ntypes), metagraph.num_nodes() ) ) if len(etypes) == 1 and len(ntypes) == 1: return [(ntypes[0], etypes[0], ntypes[0])] src, dst, eid = metagraph.edges(order="eid") rst = [ (ntypes[sid], etypes[eid], ntypes[did]) for sid, did, eid in zip(src, dst, eid) ] return rst def find_src_dst_ntypes(ntypes, metagraph): """Internal function to split ntypes into SRC and DST categories. If the metagraph is not a uni-bipartite graph (so that the SRC and DST categories are not well-defined), return None. For node types that are isolated (i.e, no relation is associated with it), they are assigned to the SRC category. Parameters ---------- ntypes : list of str Node type list metagraph : GraphIndex Meta graph. Returns ------- (dict[int, str], dict[int, str]) or None Node types belonging to SRC and DST categories. Types are stored in a dictionary from type name to type id. Return None if the graph is not uni-bipartite. """ ret = _CAPI_DGLFindSrcDstNtypes(metagraph) if ret is None: return None else: src, dst = ret srctypes = {ntypes[tid]: tid for tid in src} dsttypes = {ntypes[tid]: tid for tid in dst} return srctypes, dsttypes def pad_tuple(tup, length, pad_val=None): """Pad the given tuple to the given length. If the input is not a tuple, convert it to a tuple of length one. Return None if pad fails. """ if not isinstance(tup, tuple): tup = (tup,) if len(tup) > length: return None elif len(tup) == length: return tup else: return tup + (pad_val,) * (length - len(tup)) def reduce_dict_data(frames, reducer, order=None): """Merge tensor dictionaries into one. Resolve conflict fields using reducer. Parameters ---------- frames : list[dict[str, Tensor]] Input tensor dictionaries reducer : str or callable function One of "sum", "max", "min", "mean", "stack" or a callable function. If a callable function is provided, the input arguments must be a single list of tensors containing aggregation results from each edge type, and the output of function must be a single tensor. order : list[Int], optional Merge order hint. Useful for "stack" reducer. If provided, each integer indicates the relative order of the ``frames`` list. Frames are sorted according to this list in ascending order. Tie is not handled so make sure the order values are distinct. Returns ------- dict[str, Tensor] Merged frame """ if len(frames) == 1 and reducer != "stack": # Directly return the only one input. Stack reducer requires # modifying tensor shape. return frames[0] if callable(reducer): merger = reducer elif reducer == "stack": # Stack order does not matter. However, it must be consistent! if order: assert len(order) == len(frames) sorted_with_key = sorted(zip(frames, order), key=lambda x: x[1]) frames = list(zip(*sorted_with_key))[0] def merger(flist): return F.stack(flist, 1) else: redfn = getattr(F, reducer, None) if redfn is None: raise DGLError( "Invalid cross type reducer. Must be one of " '"sum", "max", "min", "mean" or "stack".' ) def merger(flist): return redfn(F.stack(flist, 0), 0) if len(flist) > 1 else flist[0] keys = set() for frm in frames: keys.update(frm.keys()) ret = {} for k in keys: flist = [] for frm in frames: if k in frm: flist.append(frm[k]) ret[k] = merger(flist) return ret def combine_frames(frames, ids, col_names=None): """Merge the frames into one frame, taking the common columns. Return None if there is no common columns. Parameters ---------- frames : List[Frame] List of frames ids : List[int] List of frame IDs col_names : List[str], optional Column names to consider. If not given, it considers all columns. Returns ------- Frame The resulting frame """ # find common columns and check if their schemes match schemes = None for frame_id in ids: frame = frames[frame_id] if frame.num_rows == 0: continue if schemes is None: schemes = frame.schemes if col_names is not None: schemes = {key: frame.schemes[key] for key in col_names} continue for key, scheme in list(schemes.items()): if key in frame.schemes: if frame.schemes[key] != scheme: raise DGLError( "Cannot concatenate column %s with shape %s and shape %s" % (key, frame.schemes[key], scheme) ) else: del schemes[key] if len(schemes) == 0: return None # concatenate the columns to_cat = lambda key: [frames[i][key] for i in ids if frames[i].num_rows > 0] cols = {key: F.cat(to_cat(key), dim=0) for key in schemes} return Frame(cols) def combine_names(names, ids=None): """Combine the selected names into one new name. Parameters ---------- names : list of str String names ids : numpy.ndarray, optional Selected index Returns ------- str """ if ids is None: return "+".join(sorted(names)) else: selected = sorted([names[i] for i in ids]) return "+".join(selected) class DGLBlock(DGLGraph): """Subclass that signifies the graph is a block created from :func:`dgl.to_block`. """ # (BarclayII) I'm making a subclass because I don't want to make another version of # serialization that contains the is_block flag. is_block = True def __repr__(self): if ( len(self.srctypes) == 1 and len(self.dsttypes) == 1 and len(self.etypes) == 1 ): ret = "Block(num_src_nodes={srcnode}, num_dst_nodes={dstnode}, num_edges={edge})" return ret.format( srcnode=self.number_of_src_nodes(), dstnode=self.number_of_dst_nodes(), edge=self.num_edges(), ) else: ret = ( "Block(num_src_nodes={srcnode},\n" " num_dst_nodes={dstnode},\n" " num_edges={edge},\n" " metagraph={meta})" ) nsrcnode_dict = { ntype: self.number_of_src_nodes(ntype) for ntype in self.srctypes } ndstnode_dict = { ntype: self.number_of_dst_nodes(ntype) for ntype in self.dsttypes } nedge_dict = { etype: self.num_edges(etype) for etype in self.canonical_etypes } meta = str(self.metagraph().edges(keys=True)) return ret.format( srcnode=nsrcnode_dict, dstnode=ndstnode_dict, edge=nedge_dict, meta=meta, ) def _create_compute_graph(graph, u, v, eid, recv_nodes=None): """Create a computation graph from the given edges. The compute graph is a uni-directional bipartite graph with only one edge type. Similar to subgraph extraction, it stores the original node IDs in the srcdata[NID] and dstdata[NID] and extracts features accordingly. Edges are not relabeled. This function is typically used during message passing to generate a graph that contains only the active set of edges. Parameters ---------- graph : DGLGraph The input graph. u : Tensor Src nodes. v : Tensor Dst nodes. eid : Tensor Edge IDs. recv_nodes : Tensor Nodes that receive messages. If None, it is equal to unique(v). Otherwise, it must be a superset of v and can contain nodes that have no incoming edges. Returns ------- DGLGraph A computation graph. """ if len(u) == 0: # The computation graph has no edge and will not trigger message # passing. However, because of the apply node phase, we still construct # an empty graph to continue. unique_src = new_u = new_v = u assert recv_nodes is not None unique_dst, _ = utils.relabel(recv_nodes) else: # relabel u and v to starting from 0 unique_src, src_map = utils.relabel(u) if recv_nodes is None: unique_dst, dst_map = utils.relabel(v) else: unique_dst, dst_map = utils.relabel(recv_nodes) new_u = F.gather_row(src_map, u) new_v = F.gather_row(dst_map, v) srctype, etype, dsttype = graph.canonical_etypes[0] # create graph hgidx = heterograph_index.create_unitgraph_from_coo( 2, len(unique_src), len(unique_dst), new_u, new_v, ["coo", "csr", "csc"] ) # create frame srcframe = graph._node_frames[graph.get_ntype_id(srctype)].subframe( unique_src ) srcframe[NID] = unique_src dstframe = graph._node_frames[graph.get_ntype_id(dsttype)].subframe( unique_dst ) dstframe[NID] = unique_dst eframe = graph._edge_frames[0].subframe(eid) eframe[EID] = eid return ( DGLGraph( hgidx, ([srctype], [dsttype]), [etype], node_frames=[srcframe, dstframe], edge_frames=[eframe], ), unique_src, unique_dst, eid, ) _init_api("dgl.heterograph") ================================================ FILE: python/dgl/heterograph_index.py ================================================ """Module for heterogeneous graph index class definition.""" from __future__ import absolute_import import itertools import sys import numpy as np import scipy from . import backend as F, utils from ._ffi.function import _init_api from ._ffi.object import ObjectBase, register_object from ._ffi.streams import to_dgl_stream_handle from .base import dgl_warning, DGLError from .graph_index import from_coo @register_object("graph.HeteroGraph") class HeteroGraphIndex(ObjectBase): """HeteroGraph index object. Note ---- Do not create GraphIndex directly. """ def __new__(cls): obj = ObjectBase.__new__(cls) obj._cache = {} return obj def __getstate__(self): """Issue: https://github.com/pytorch/pytorch/issues/32351 Need to set the tensor created in the __getstate__ function as object attribute to avoid potential bugs """ self._pk_state = _CAPI_DGLHeteroPickle(self) return self._pk_state def __setstate__(self, state): self._cache = {} # Pickle compatibility check # TODO: we should store a storage version number in later releases. if isinstance(state, HeteroPickleStates): # post-0.4.3 self.__init_handle_by_constructor__(_CAPI_DGLHeteroUnpickle, state) elif isinstance(state, tuple) and len(state) == 3: # pre-0.4.2 metagraph, num_nodes, edges = state self._cache = {} # loop over etypes and recover unit graphs rel_graphs = [] for i, edges_per_type in enumerate(edges): src_ntype, dst_ntype = metagraph.find_edge(i) num_src = num_nodes[src_ntype] num_dst = num_nodes[dst_ntype] src_id, dst_id, _ = edges_per_type rel_graphs.append( create_unitgraph_from_coo( 1 if src_ntype == dst_ntype else 2, num_src, num_dst, src_id, dst_id, ["coo", "csr", " csc"], ) ) self.__init_handle_by_constructor__( _CAPI_DGLHeteroCreateHeteroGraph, metagraph, rel_graphs ) @property def metagraph(self): """Meta graph Returns ------- GraphIndex The meta graph. """ return _CAPI_DGLHeteroGetMetaGraph(self) def is_metagraph_unibipartite(self): """Return whether or not the graph is unibiparite.""" return _CAPI_DGLHeteroIsMetaGraphUniBipartite(self) def number_of_ntypes(self): """Return number of node types.""" return self.metagraph.num_nodes() def number_of_etypes(self): """Return number of edge types.""" return self.metagraph.num_edges() def get_relation_graph(self, etype): """Get the unitgraph graph of the given edge/relation type. Parameters ---------- etype : int The edge/relation type. Returns ------- HeteroGraphIndex The unitgraph graph. """ return _CAPI_DGLHeteroGetRelationGraph(self, int(etype)) def flatten_relations(self, etypes): """Convert the list of requested unitgraph graphs into a single unitgraph graph. Parameters ---------- etypes : list[int] The edge/relation types. Returns ------- FlattenedHeteroGraph A flattened heterograph object """ return _CAPI_DGLHeteroGetFlattenedGraph(self, etypes) def add_nodes(self, ntype, num): """Add nodes. Parameters ---------- ntype : int Node type num : int Number of nodes to be added. """ _CAPI_DGLHeteroAddVertices(self, int(ntype), int(num)) self.clear_cache() def add_edge(self, etype, u, v): """Add one edge. Parameters ---------- etype : int Edge type u : int The src node. v : int The dst node. """ _CAPI_DGLHeteroAddEdge(self, int(etype), int(u), int(v)) self.clear_cache() def add_edges(self, etype, u, v): """Add many edges. Parameters ---------- etype : int Edge type u : utils.Index The src nodes. v : utils.Index The dst nodes. """ _CAPI_DGLHeteroAddEdges( self, int(etype), u.todgltensor(), v.todgltensor() ) self.clear_cache() def clear(self): """Clear the graph.""" _CAPI_DGLHeteroClear(self) self._cache.clear() @property def dtype(self): """Return the data type of this graph index. Returns ------- DGLDataType The data type of the graph. """ return _CAPI_DGLHeteroDataType(self) @property def ctx(self): """Return the context of this graph index. Returns ------- DGLContext The context of the graph. """ return _CAPI_DGLHeteroContext(self) def bits_needed(self, etype): """Return the number of integer bits needed to represent the unitgraph graph. Parameters ---------- etype : int The edge type. Returns ------- int The number of bits needed. """ stype, dtype = self.metagraph.find_edge(etype) if ( self.num_edges(etype) >= 0x80000000 or self.num_nodes(stype) >= 0x80000000 or self.num_nodes(dtype) >= 0x80000000 ): return 64 else: return 32 def asbits(self, bits): """Transform the graph to a new one with the given number of bits storage. NOTE: this method only works for immutable graph index Parameters ---------- bits : int The number of integer bits (32 or 64) Returns ------- HeteroGraphIndex The graph index stored using the given number of bits. """ return _CAPI_DGLHeteroAsNumBits(self, int(bits)) def copy_to(self, ctx): """Copy this immutable graph index to the given device context. NOTE: this method only works for immutable graph index Parameters ---------- ctx : DGLContext The target device context. Returns ------- HeteroGraphIndex The graph index on the given device context. """ return _CAPI_DGLHeteroCopyTo(self, ctx.device_type, ctx.device_id) def pin_memory(self): """Copies the graph structure to pinned memory, if it's not already pinned. NOTE: This function is similar to PyTorch's Tensor.pin_memory(), but tailored for graphs. It utilizes the same pin_memory allocator as PyTorch, so the lifecycle of the graph is also managed by PyTorch. If a batch includes a DGL graph object (HeteroGraphIndex), PyTorch's DataLoader memory pinning logic will detect it and automatically activate this function when pin_memory=True. Returns ------- HeteroGraphIndex The pinned graph index. """ return _CAPI_DGLHeteroPinMemory(self) def pin_memory_(self): """Pin this graph to the page-locked memory. NOTE: This is an inplace method to pin the current graph index, i.e., it does not require new memory allocation but simply flags the existing graph structure to be page-locked. The graph structure must be on CPU to be pinned. If the graph struture is already pinned, the function directly returns it. Returns ------- HeteroGraphIndex The pinned graph index. """ return _CAPI_DGLHeteroPinMemory_(self) def unpin_memory_(self): """Unpin this graph from the page-locked memory. NOTE: this is an inplace method. If the graph struture is not pinned, e.g., on CPU or GPU, the function directly returns it. Returns ------- HeteroGraphIndex The unpinned graph index. """ return _CAPI_DGLHeteroUnpinMemory_(self) def is_pinned(self): """Check if this graph is pinned to the page-locked memory. Returns ------- bool True if the graph is pinned. """ return bool(_CAPI_DGLHeteroIsPinned(self)) def record_stream(self, stream): """Record the stream that is using this graph. Parameters ---------- stream : torch.cuda.Stream The stream that is using this graph. Returns ------- HeteroGraphIndex self. """ return _CAPI_DGLHeteroRecordStream(self, to_dgl_stream_handle(stream)) def shared_memory( self, name, ntypes=None, etypes=None, formats=("coo", "csr", "csc") ): """Return a copy of this graph in shared memory Parameters ---------- name : str The name of the shared memory. ntypes : list of str Name of node types etypes : list of str Name of edge types format : list of str Desired formats to be materialized. Returns ------- HeteroGraphIndex The graph index in shared memory """ assert len(name) > 0, "The name of shared memory cannot be empty" assert len(formats) > 0 for fmt in formats: assert fmt in ("coo", "csr", "csc") ntypes = [] if ntypes is None else ntypes etypes = [] if etypes is None else etypes return _CAPI_DGLHeteroCopyToSharedMem( self, name, ntypes, etypes, formats ) def is_multigraph(self): """Return whether the graph is a multigraph The time cost will be O(E) Returns ------- bool True if it is a multigraph, False otherwise. """ return bool(_CAPI_DGLHeteroIsMultigraph(self)) def is_readonly(self): """Return whether the graph index is read-only. Returns ------- bool True if it is a read-only graph, False otherwise. """ return bool(_CAPI_DGLHeteroIsReadonly(self)) def num_nodes(self, ntype): """Return the number of nodes. Parameters ---------- ntype : int Node type. Returns ------- int The number of nodes. """ return _CAPI_DGLHeteroNumVertices(self, int(ntype)) def num_edges(self, etype): """Return the number of edges. Parameters ---------- etype : int Edge type. Returns ------- int The number of edges. """ return _CAPI_DGLHeteroNumEdges(self, int(etype)) # TODO(#5485): remove this method. def number_of_nodes(self, ntype): """Return the number of nodes. Parameters ---------- ntype : int Node type Returns ------- int The number of nodes """ return _CAPI_DGLHeteroNumVertices(self, int(ntype)) # TODO(#5485): remove this method. def number_of_edges(self, etype): """Return the number of edges. Parameters ---------- etype : int Edge type Returns ------- int The number of edges """ return _CAPI_DGLHeteroNumEdges(self, int(etype)) def has_nodes(self, ntype, vids): """Return true if the nodes exist. Parameters ---------- ntype : int Node type vid : Tensor Node IDs Returns ------- Tensor 0-1 array indicating existence """ return F.from_dgl_nd( _CAPI_DGLHeteroHasVertices(self, int(ntype), F.to_dgl_nd(vids)) ) def has_edges_between(self, etype, u, v): """Return true if the edge exists. Parameters ---------- etype : int Edge type u : Tensor Src node Ids. v : Tensor Dst node Ids. Returns ------- Tensor 0-1 array indicating existence """ return F.from_dgl_nd( _CAPI_DGLHeteroHasEdgesBetween( self, int(etype), F.to_dgl_nd(u), F.to_dgl_nd(v) ) ) def predecessors(self, etype, v): """Return the predecessors of the node. Assume that node_type(v) == dst_type(etype). Thus, the ntype argument is omitted. Parameters ---------- etype : int Edge type v : int The node. Returns ------- Tensor Array of predecessors """ return F.from_dgl_nd( _CAPI_DGLHeteroPredecessors(self, int(etype), int(v)) ) def successors(self, etype, v): """Return the successors of the node. Assume that node_type(v) == src_type(etype). Thus, the ntype argument is omitted. Parameters ---------- etype : int Edge type v : int The node. Returns ------- Tensor Array of successors """ return F.from_dgl_nd( _CAPI_DGLHeteroSuccessors(self, int(etype), int(v)) ) def edge_ids_all(self, etype, u, v): """Return a triplet of arrays that contains the edge IDs. Parameters ---------- etype : int Edge type u : Tensor The src nodes. v : Tensor The dst nodes. Returns ------- Tensor The src nodes. Tensor The dst nodes. Tensor The edge ids. """ edge_array = _CAPI_DGLHeteroEdgeIdsAll( self, int(etype), F.to_dgl_nd(u), F.to_dgl_nd(v) ) src = F.from_dgl_nd(edge_array(0)) dst = F.from_dgl_nd(edge_array(1)) eid = F.from_dgl_nd(edge_array(2)) return src, dst, eid def edge_ids_one(self, etype, u, v): """Return an arrays of edge IDs. Parameters ---------- etype : int Edge type u : Tensor The src nodes. v : Tensor The dst nodes. Returns ------- Tensor The edge ids. """ eid = F.from_dgl_nd( _CAPI_DGLHeteroEdgeIdsOne( self, int(etype), F.to_dgl_nd(u), F.to_dgl_nd(v) ) ) return eid def find_edges(self, etype, eid): """Return a triplet of arrays that contains the edge IDs. Parameters ---------- etype : int Edge type eid : Tensor Edge ids. Returns ------- Tensor The src nodes. Tensor The dst nodes. Tensor The edge ids. """ edge_array = _CAPI_DGLHeteroFindEdges( self, int(etype), F.to_dgl_nd(eid) ) src = F.from_dgl_nd(edge_array(0)) dst = F.from_dgl_nd(edge_array(1)) eid = F.from_dgl_nd(edge_array(2)) return src, dst, eid def in_edges(self, etype, v): """Return the in edges of the node(s). Assume that node_type(v) == dst_type(etype). Thus, the ntype argument is omitted. Parameters ---------- etype : int Edge type v : Tensor Node IDs. Returns ------- Tensor The src nodes. Tensor The dst nodes. Tensor The edge ids. """ edge_array = _CAPI_DGLHeteroInEdges_2(self, int(etype), F.to_dgl_nd(v)) src = F.from_dgl_nd(edge_array(0)) dst = F.from_dgl_nd(edge_array(1)) eid = F.from_dgl_nd(edge_array(2)) return src, dst, eid def out_edges(self, etype, v): """Return the out edges of the node(s). Assume that node_type(v) == src_type(etype). Thus, the ntype argument is omitted. Parameters ---------- etype : int Edge type v : Tensor Node IDs. Returns ------- Tensor The src nodes. Tensor The dst nodes. Tensor The edge ids. """ edge_array = _CAPI_DGLHeteroOutEdges_2(self, int(etype), F.to_dgl_nd(v)) src = F.from_dgl_nd(edge_array(0)) dst = F.from_dgl_nd(edge_array(1)) eid = F.from_dgl_nd(edge_array(2)) return src, dst, eid def edges(self, etype, order=None): """Return all the edges Parameters ---------- etype : int Edge type order : string The order of the returned edges. Currently support: - 'srcdst' : sorted by their src and dst ids. - 'eid' : sorted by edge Ids. - None : the arbitrary order. Returns ------- Tensor The src nodes. Tensor The dst nodes. Tensor The edge ids. """ if order is None: order = "" elif order not in ["srcdst", "eid"]: raise DGLError( "Expect order to be one of None, 'srcdst', 'eid', " "got {}".format(order) ) edge_array = _CAPI_DGLHeteroEdges(self, int(etype), order) src = F.from_dgl_nd(edge_array(0)) dst = F.from_dgl_nd(edge_array(1)) eid = F.from_dgl_nd(edge_array(2)) return src, dst, eid def in_degrees(self, etype, v): """Return the in degrees of the nodes. Assume that node_type(v) == dst_type(etype). Thus, the ntype argument is omitted. Parameters ---------- etype : int Edge type v : Tensor The nodes. Returns ------- Tensor The in degree array. """ return F.from_dgl_nd( _CAPI_DGLHeteroInDegrees(self, int(etype), F.to_dgl_nd(v)) ) def out_degrees(self, etype, v): """Return the out degrees of the nodes. Assume that node_type(v) == src_type(etype). Thus, the ntype argument is omitted. Parameters ---------- etype : int Edge type v : Tensor The nodes. Returns ------- Tensor The out degree array. """ return F.from_dgl_nd( _CAPI_DGLHeteroOutDegrees(self, int(etype), F.to_dgl_nd(v)) ) def adjacency_matrix(self, etype, transpose, ctx): """Return the adjacency matrix representation of this graph. By default, a row of returned adjacency matrix represents the source of an edge and the column represents the destination. When transpose is True, a row represents the destination and a column represents the source. Parameters ---------- etype : int Edge type transpose : bool A flag to transpose the returned adjacency matrix. ctx : context The context of the returned matrix. Returns ------- SparseTensor The adjacency matrix. Tensor A index for data shuffling due to sparse format change. Return None if shuffle is not required. """ if not isinstance(transpose, bool): raise DGLError( 'Expect bool value for "transpose" arg,' " but got %s." % (type(transpose)) ) fmt = F.get_preferred_sparse_format() rst = _CAPI_DGLHeteroGetAdj(self, int(etype), transpose, fmt) # convert to framework-specific sparse matrix srctype, dsttype = self.metagraph.find_edge(etype) nrows = ( self.num_nodes(dsttype) if transpose else self.num_nodes(srctype) ) ncols = ( self.num_nodes(srctype) if transpose else self.num_nodes(dsttype) ) nnz = self.num_edges(etype) if fmt == "csr": indptr = F.copy_to(F.from_dgl_nd(rst(0)), ctx) indices = F.copy_to(F.from_dgl_nd(rst(1)), ctx) shuffle = F.copy_to(F.from_dgl_nd(rst(2)), ctx) dat = F.ones( nnz, dtype=F.float32, ctx=ctx ) # FIXME(minjie): data type spmat = F.sparse_matrix( dat, ("csr", indices, indptr), (nrows, ncols) )[0] return spmat, shuffle elif fmt == "coo": idx = F.copy_to(F.from_dgl_nd(rst(0)), ctx) idx = F.reshape(idx, (2, nnz)) dat = F.ones((nnz,), dtype=F.float32, ctx=ctx) adj, shuffle_idx = F.sparse_matrix( dat, ("coo", idx), (nrows, ncols) ) return adj, shuffle_idx else: raise Exception("unknown format") def adjacency_matrix_tensors(self, etype, transpose, fmt): """Return the adjacency matrix as a triplet of tensors. By default, a row of returned adjacency matrix represents the source of an edge and the column represents the destination. When transpose is True, a row represents the destination and a column represents the source. Parameters ---------- etype : int Edge type transpose : bool A flag to transpose the returned adjacency matrix. fmt : str Indicates the format of returned adjacency matrix. Returns ------- tuple[int, int, Tensor, Tensor] or tuple[int, int, Tensor, Tensor, Tensor] The number of rows and columns, followed by the adjacency matrix tensors whose data type and device are the same as those of the graph. If :attr:`fmt` is ``'coo'``, then the triplet will be the row array and column array of the COO representation. If :attr:`fmt` is ``'csr'``, then the triplet will be the index pointer array (``indptr``), indices array, and data array of the CSR representation. The data array will contain the edge ID for each entry of the adjacency matrix. If the data array is empty, then it is equivalent to a consecutive array from zero to the number of edges minus one. """ if not isinstance(transpose, bool): raise DGLError( 'Expect bool value for "transpose" arg,' " but got %s." % (type(transpose)) ) rst = _CAPI_DGLHeteroGetAdj(self, int(etype), transpose, fmt) srctype, dsttype = self.metagraph.find_edge(etype) nrows = ( self.num_nodes(dsttype) if transpose else self.num_nodes(srctype) ) ncols = ( self.num_nodes(srctype) if transpose else self.num_nodes(dsttype) ) nnz = self.num_edges(etype) if fmt == "csr": indptr = F.from_dgl_nd(rst(0)) indices = F.from_dgl_nd(rst(1)) data = F.from_dgl_nd(rst(2)) return nrows, ncols, indptr, indices, data elif fmt == "coo": idx = F.from_dgl_nd(rst(0)) row, col = F.reshape(idx, (2, nnz)) return nrows, ncols, row, col else: raise ValueError("unknown format") def adjacency_matrix_scipy( self, etype, transpose, fmt, return_edge_ids=None ): """Return the scipy adjacency matrix representation of this graph. By default, a row of returned adjacency matrix represents the destination of an edge and the column represents the source. When transpose is True, a row represents the source and a column represents a destination. Parameters ---------- etype : int Edge type transpose : bool A flag to transpose the returned adjacency matrix. fmt : str Indicates the format of returned adjacency matrix. return_edge_ids : bool Indicates whether to return edge IDs or 1 as elements. Returns ------- scipy.sparse.spmatrix The scipy representation of adjacency matrix. """ if return_edge_ids is None: dgl_warning( "Adjacency matrix by default currently returns edge IDs." " As a result there is one 0 entry which is not eliminated." " In the next release it will return 1s by default," " and 0 will be eliminated otherwise.", FutureWarning, ) return_edge_ids = True if fmt == "csr": nrows, ncols, indptr, indices, data = self.adjacency_matrix_tensors( etype, transpose, fmt ) indptr = F.asnumpy(indptr) indices = F.asnumpy(indices) data = F.asnumpy(data) # Check if edge ID is omitted if return_edge_ids and data.shape[0] == 0: data = np.arange(self.num_edges(etype)) else: data = np.ones_like(indices) return scipy.sparse.csr_matrix( (data, indices, indptr), shape=(nrows, ncols) ) elif fmt == "coo": nrows, ncols, row, col = self.adjacency_matrix_tensors( etype, transpose, fmt ) row = F.asnumpy(row) col = F.asnumpy(col) data = ( np.arange(self.num_edges(etype)) if return_edge_ids else np.ones_like(row) ) return scipy.sparse.coo_matrix( (data, (row, col)), shape=(nrows, ncols) ) else: raise ValueError("unknown format") def incidence_matrix(self, etype, typestr, ctx): """Return the incidence matrix representation of this graph. An incidence matrix is an n x m sparse matrix, where n is the number of nodes and m is the number of edges. Each nnz value indicating whether the edge is incident to the node or not. There are three types of an incidence matrix `I`: * "in": - I[v, e] = 1 if e is the in-edge of v (or v is the dst node of e); - I[v, e] = 0 otherwise. * "out": - I[v, e] = 1 if e is the out-edge of v (or v is the src node of e); - I[v, e] = 0 otherwise. * "both": - I[v, e] = 1 if e is the in-edge of v; - I[v, e] = -1 if e is the out-edge of v; - I[v, e] = 0 otherwise (including self-loop). Parameters ---------- etype : int Edge type typestr : str Can be either "in", "out" or "both" ctx : context The context of returned incidence matrix. Returns ------- SparseTensor The incidence matrix. utils.Index A index for data shuffling due to sparse format change. Return None if shuffle is not required. """ src, dst, eid = self.edges(etype) srctype, dsttype = self.metagraph.find_edge(etype) m = self.num_edges(etype) if typestr == "in": n = self.num_nodes(dsttype) row = F.unsqueeze(dst, 0) col = F.unsqueeze(eid, 0) idx = F.copy_to(F.cat([row, col], dim=0), ctx) # FIXME(minjie): data type dat = F.ones((m,), dtype=F.float32, ctx=ctx) inc, shuffle_idx = F.sparse_matrix(dat, ("coo", idx), (n, m)) elif typestr == "out": n = self.num_nodes(srctype) row = F.unsqueeze(src, 0) col = F.unsqueeze(eid, 0) idx = F.copy_to(F.cat([row, col], dim=0), ctx) # FIXME(minjie): data type dat = F.ones((m,), dtype=F.float32, ctx=ctx) inc, shuffle_idx = F.sparse_matrix(dat, ("coo", idx), (n, m)) elif typestr == "both": assert ( srctype == dsttype ), "'both' is supported only if source and destination type are the same" n = self.num_nodes(srctype) # first remove entries for self loops mask = F.logical_not(F.equal(src, dst)) src = F.boolean_mask(src, mask) dst = F.boolean_mask(dst, mask) eid = F.boolean_mask(eid, mask) n_entries = F.shape(src)[0] # create index row = F.unsqueeze(F.cat([src, dst], dim=0), 0) col = F.unsqueeze(F.cat([eid, eid], dim=0), 0) idx = F.copy_to(F.cat([row, col], dim=0), ctx) # FIXME(minjie): data type x = -F.ones((n_entries,), dtype=F.float32, ctx=ctx) y = F.ones((n_entries,), dtype=F.float32, ctx=ctx) dat = F.cat([x, y], dim=0) inc, shuffle_idx = F.sparse_matrix(dat, ("coo", idx), (n, m)) else: raise DGLError("Invalid incidence matrix type: %s" % str(typestr)) return inc, shuffle_idx def node_subgraph(self, induced_nodes): """Return the induced node subgraph. Parameters ---------- induced_nodes : list of utils.Index Induced nodes. The length should be equal to the number of node types in this heterograph. Returns ------- SubgraphIndex The subgraph index. """ vids = [F.to_dgl_nd(nodes) for nodes in induced_nodes] return _CAPI_DGLHeteroVertexSubgraph(self, vids) def edge_subgraph(self, induced_edges, preserve_nodes): """Return the induced edge subgraph. Parameters ---------- induced_edges : list of utils.Index Induced edges. The length should be equal to the number of edge types in this heterograph. preserve_nodes : bool Indicates whether to preserve all nodes or not. If true, keep the nodes which have no edge connected in the subgraph; If false, all nodes without edge connected to it would be removed. Returns ------- SubgraphIndex The subgraph index. """ eids = [F.to_dgl_nd(edges) for edges in induced_edges] return _CAPI_DGLHeteroEdgeSubgraph(self, eids, preserve_nodes) def get_unitgraph(self, etype, ctx): """Create a unitgraph graph from given edge type and copy to the given device context. Note: this internal function is for DGL scheduler use only Parameters ---------- etype : int If the graph index is a Bipartite graph index, this argument must be None. Otherwise, it represents the edge type. ctx : DGLContext The context of the returned graph. Returns ------- HeteroGraphIndex """ g = self.get_relation_graph(etype) return g.copy_to(ctx).asbits(self.bits_needed(etype or 0)) def get_csr_shuffle_order(self, etype): """Return the edge shuffling order when a coo graph is converted to csr format Parameters ---------- etype : int The edge type Returns ------- tuple of two utils.Index The first element of the tuple is the shuffle order for outward graph The second element of the tuple is the shuffle order for inward graph """ csr = _CAPI_DGLHeteroGetAdj(self, int(etype), False, "csr") order = csr(2) rev_csr = _CAPI_DGLHeteroGetAdj(self, int(etype), True, "csr") rev_order = rev_csr(2) return utils.toindex(order, self.dtype), utils.toindex( rev_order, self.dtype ) def formats(self, formats=None): """Get a graph index with the specified allowed sparse format(s) or query for the usage status of sparse formats. If the graph has multiple edge types, they will have the same sparse format. When ``formats`` is not None, if the intersection between `formats` and the current graph's created sparse format(s) is not empty, the returned cloned graph only retains all sparse format(s) in the intersection. If the intersection is empty, a sparse format will be selected to be created following the order of ``'coo' -> 'csr' -> 'csc'``. Parameters ---------- formats : str or list of str or None * If formats is None, return the usage status of sparse formats * Otherwise, it can be ``'coo'``/``'csr'``/``'csc'`` or a sublist of them, specifying the sparse formats to use. Returns ------- dict or GraphIndex * If formats is None, the result will be a dict recording the usage status of sparse formats. * Otherwise, a GraphIndex will be returned, which is a clone of the original graph with the specified allowed sparse format(s) ``formats``. """ formats_allowed = _CAPI_DGLHeteroGetAllowedFormats(self) formats_created = _CAPI_DGLHeteroGetCreatedFormats(self) created = [] not_created = [] if formats is None: for fmt in ["coo", "csr", "csc"]: if fmt in formats_allowed: if fmt in formats_created: created.append(fmt) else: not_created.append(fmt) return {"created": created, "not created": not_created} else: if isinstance(formats, str): formats = [formats] return _CAPI_DGLHeteroGetFormatGraph(self, formats) def create_formats_(self): """Create all sparse matrices allowed for the graph.""" return _CAPI_DGLHeteroCreateFormat(self) def reverse(self): """Reverse the heterogeneous graph adjacency The node types and edge types are not changed. Returns ------- A new graph index. """ return _CAPI_DGLHeteroReverse(self) @register_object("graph.HeteroSubgraph") class HeteroSubgraphIndex(ObjectBase): """Hetero-subgraph data structure""" @property def graph(self): """The subgraph structure Returns ------- HeteroGraphIndex The subgraph """ return _CAPI_DGLHeteroSubgraphGetGraph(self) @property def induced_nodes(self): """Induced nodes for each node type. The return list length should be equal to the number of node types. Returns ------- list of utils.Index Induced nodes """ ret = _CAPI_DGLHeteroSubgraphGetInducedVertices(self) return [F.from_dgl_nd(v) for v in ret] @property def induced_edges(self): """Induced edges for each edge type. The return list length should be equal to the number of edge types. Returns ------- list of utils.Index Induced edges """ ret = _CAPI_DGLHeteroSubgraphGetInducedEdges(self) return [F.from_dgl_nd(v) for v in ret] ################################################################# # Creators ################################################################# def create_metagraph_index(ntypes, canonical_etypes): """Return a GraphIndex instance for a metagraph given the node types and canonical edge types. This function will reorder the node types and canonical edge types. Parameters ---------- ntypes : Iterable[str] The node types. canonical_etypes : Iterable[tuple[str, str, str]] The canonical edge types. Returns ------- GraphIndex The index object for metagraph. list[str] The reordered node types for each node in the metagraph. list[str] The reordered edge types for each edge in the metagraph. list[tuple[str, str, str]] The reordered canonical edge types for each edge in the metagraph. """ # Sort the ntypes and relation tuples to have a deterministic order for the same set # of type names. ntypes = list(sorted(ntypes)) relations = list(sorted(canonical_etypes)) ntype_dict = {ntype: i for i, ntype in enumerate(ntypes)} meta_edges_src = [] meta_edges_dst = [] etypes = [] for srctype, etype, dsttype in relations: meta_edges_src.append(ntype_dict[srctype]) meta_edges_dst.append(ntype_dict[dsttype]) etypes.append(etype) # metagraph is DGLGraph, currently still using int64 as index dtype metagraph = from_coo(len(ntypes), meta_edges_src, meta_edges_dst, True) return metagraph, ntypes, etypes, relations def create_unitgraph_from_coo( num_ntypes, num_src, num_dst, row, col, formats, row_sorted=False, col_sorted=False, ): """Create a unitgraph graph index from COO format Parameters ---------- num_ntypes : int Number of node types (must be 1 or 2). num_src : int Number of nodes in the src type. num_dst : int Number of nodes in the dst type. row : utils.Index Row index. col : utils.Index Col index. formats : list of str. Restrict the storage formats allowed for the unit graph. row_sorted : bool, optional Whether or not the rows of the COO are in ascending order. col_sorted : bool, optional Whether or not the columns of the COO are in ascending order within each row. This only has an effect when ``row_sorted`` is True. Returns ------- HeteroGraphIndex """ if isinstance(formats, str): formats = [formats] return _CAPI_DGLHeteroCreateUnitGraphFromCOO( int(num_ntypes), int(num_src), int(num_dst), F.to_dgl_nd(row), F.to_dgl_nd(col), formats, row_sorted, col_sorted, ) def create_unitgraph_from_csr( num_ntypes, num_src, num_dst, indptr, indices, edge_ids, formats, transpose=False, ): """Create a unitgraph graph index from CSR format Parameters ---------- num_ntypes : int Number of node types (must be 1 or 2). num_src : int Number of nodes in the src type. num_dst : int Number of nodes in the dst type. indptr : utils.Index CSR indptr. indices : utils.Index CSR indices. edge_ids : utils.Index Edge shuffle id. formats : str Restrict the storage formats allowed for the unit graph. transpose : bool, optional If True, treats the input matrix as CSC. Returns ------- HeteroGraphIndex """ if isinstance(formats, str): formats = [formats] return _CAPI_DGLHeteroCreateUnitGraphFromCSR( int(num_ntypes), int(num_src), int(num_dst), F.to_dgl_nd(indptr), F.to_dgl_nd(indices), F.to_dgl_nd(edge_ids), formats, transpose, ) def create_heterograph_from_relations( metagraph, rel_graphs, num_nodes_per_type ): """Create a heterograph from metagraph and graphs of every relation. Parameters ---------- metagraph : GraphIndex Meta-graph. rel_graphs : list of HeteroGraphIndex Bipartite graph of each relation. num_nodes_per_type : utils.Index, optional Number of nodes per node type Returns ------- HeteroGraphIndex """ if num_nodes_per_type is None: return _CAPI_DGLHeteroCreateHeteroGraph(metagraph, rel_graphs) else: return _CAPI_DGLHeteroCreateHeteroGraphWithNumNodes( metagraph, rel_graphs, num_nodes_per_type.todgltensor() ) def create_heterograph_from_shared_memory(name): """Create a heterograph from shared memory with the given name. Paramaters ---------- name : str The name of the share memory Returns ------- HeteroGraphIndex (in shared memory) ntypes : list of str Names of node types etypes : list of str Names of edge types """ g, ntypes, etypes = _CAPI_DGLHeteroCreateFromSharedMem(name) return g, list(ntypes), list(etypes) def joint_union(metagraph, gidx_list): """Return a joint union of the input heterographs. Parameters ---------- metagraph : GraphIndex Meta-graph. gidx_list : list of HeteroGraphIndex Heterographs to be joint_unioned. Returns ------- HeteroGraphIndex joint_unioned Heterograph. """ return _CAPI_DGLHeteroJointUnion(metagraph, gidx_list) def disjoint_union(metagraph, graphs): """Return a disjoint union of the input heterographs. Parameters ---------- metagraph : GraphIndex Meta-graph. graphs : list of HeteroGraphIndex Heterographs to be batched. Returns ------- HeteroGraphIndex Batched Heterograph. """ return _CAPI_DGLHeteroDisjointUnion_v2(metagraph, graphs) def disjoint_partition(graph, bnn_all_types, bne_all_types): """Partition the graph disjointly. Parameters ---------- graph : HeteroGraphIndex The graph to be partitioned. bnn_all_types : list of list of int bnn_all_types[t] gives the number of nodes with t-th type in the batch. bne_all_types : list of list of int bne_all_types[t] gives the number of edges with t-th type in the batch. Returns -------- list of HeteroGraphIndex Heterographs unbatched. """ bnn_all_types = utils.toindex( list(itertools.chain.from_iterable(bnn_all_types)) ) bne_all_types = utils.toindex( list(itertools.chain.from_iterable(bne_all_types)) ) return _CAPI_DGLHeteroDisjointPartitionBySizes_v2( graph, bnn_all_types.todgltensor(), bne_all_types.todgltensor() ) def slice_gidx(graph, num_nodes, start_nid, num_edges, start_eid): """Slice a chunk of the graph. Parameters ---------- graph : HeteroGraphIndex The batched graph to slice. num_nodes : utils.Index Number of nodes per node type in the result graph. start_nid : utils.Index Start node ID per node type in the result graph. num_edges : utils.Index Number of edges per edge type in the result graph. start_eid : utils.Index Start edge ID per edge type in the result graph. Returns ------- HeteroGraphIndex The sliced graph. """ return _CAPI_DGLHeteroSlice( graph, num_nodes.todgltensor(), start_nid.todgltensor(), num_edges.todgltensor(), start_eid.todgltensor(), ) ################################################################# # Data structure used by C APIs ################################################################# @register_object("graph.FlattenedHeteroGraph") class FlattenedHeteroGraph(ObjectBase): """FlattenedHeteroGraph object class in C++ backend.""" @register_object("graph.HeteroPickleStates") class HeteroPickleStates(ObjectBase): """Pickle states object class in C++ backend.""" @property def version(self): """Version number Returns ------- int version number """ return _CAPI_DGLHeteroPickleStatesGetVersion(self) @property def meta(self): """Meta info Returns ------- bytearray Serialized meta info """ return bytearray(_CAPI_DGLHeteroPickleStatesGetMeta(self)) @property def arrays(self): """Arrays representing the graph structure (COO or CSR) Returns ------- list of dgl.ndarray.NDArray Arrays """ num_arr = _CAPI_DGLHeteroPickleStatesGetArraysNum(self) arr_func = _CAPI_DGLHeteroPickleStatesGetArrays(self) return [arr_func(i) for i in range(num_arr)] def __getstate__(self): """Issue: https://github.com/pytorch/pytorch/issues/32351 Need to set the tensor created in the __getstate__ function as object attribute to avoid potential bugs """ self._pk_arrays = [ F.zerocopy_from_dgl_ndarray(arr) for arr in self.arrays ] return self.version, self.meta, self._pk_arrays def __setstate__(self, state): if isinstance(state[0], int): version, meta, arrays = state arrays = [F.zerocopy_to_dgl_ndarray(arr) for arr in arrays] self.__init_handle_by_constructor__( _CAPI_DGLCreateHeteroPickleStates, version, meta, arrays ) else: metagraph, num_nodes_per_type, adjs = state num_nodes_per_type = F.zerocopy_to_dgl_ndarray(num_nodes_per_type) self.__init_handle_by_constructor__( _CAPI_DGLCreateHeteroPickleStatesOld, metagraph, num_nodes_per_type, adjs, ) def _forking_rebuild(pk_state): version, meta, arrays = pk_state arrays = [F.to_dgl_nd(arr) for arr in arrays] states = _CAPI_DGLCreateHeteroPickleStates(version, meta, arrays) graph_index = _CAPI_DGLHeteroForkingUnpickle(states) graph_index._forking_pk_state = pk_state return graph_index def _forking_reduce(graph_index): # Because F.from_dgl_nd(F.to_dgl_nd(x)) loses the information of shared memory # file descriptor (because DLPack does not keep it), without caching the tensors # PyTorch will allocate one shared memory region for every single worker. # The downside is that if a graph_index is shared by forking and new formats are created # afterwards, then sharing it again will not bring together the new formats. This case # should be rare though because (1) DataLoader will create all the formats if num_workers > 0 # anyway, and (2) we require the users to explicitly create all formats before calling # mp.spawn(). if hasattr(graph_index, "_forking_pk_state"): return _forking_rebuild, (graph_index._forking_pk_state,) states = _CAPI_DGLHeteroForkingPickle(graph_index) arrays = [F.from_dgl_nd(arr) for arr in states.arrays] # Similar to what being mentioned in HeteroGraphIndex.__getstate__, we need to save # the tensors as an attribute of the original graph index object. Otherwise # PyTorch will throw weird errors like bad value(s) in fds_to_keep or unable to # resize file. graph_index._forking_pk_state = (states.version, states.meta, arrays) return _forking_rebuild, (graph_index._forking_pk_state,) if not (F.get_preferred_backend() == "mxnet" and sys.version_info.minor <= 6): # Python 3.6 MXNet crashes with the following statement; remove until we no longer support # 3.6 (which is EOL anyway). from multiprocessing.reduction import ForkingPickler ForkingPickler.register(HeteroGraphIndex, _forking_reduce) _init_api("dgl.heterograph_index") ================================================ FILE: python/dgl/homophily.py ================================================ """Utils for tracking graph homophily and heterophily""" # pylint: disable=W0611 from . import function as fn, to_bidirected try: import torch except ImportError: HAS_TORCH = False else: HAS_TORCH = True __all__ = [ "node_homophily", "edge_homophily", "linkx_homophily", "adjusted_homophily", ] def check_pytorch(): """Check if PyTorch is the backend.""" if HAS_TORCH is False: raise ModuleNotFoundError( "This function requires PyTorch to be the backend." ) def get_long_edges(graph): """Internal function for getting the edges of a graph as long tensors.""" src, dst = graph.edges() return src.long(), dst.long() def node_homophily(graph, y): r"""Homophily measure from `Geom-GCN: Geometric Graph Convolutional Networks `__ We follow the practice of a later paper `Large Scale Learning on Non-Homophilous Graphs: New Benchmarks and Strong Simple Methods `__ to call it node homophily. Mathematically it is defined as follows: .. math:: \frac{1}{|\mathcal{V}|} \sum_{v \in \mathcal{V}} \frac{ | \{u \in \mathcal{N}(v): y_v = y_u \} | } { |\mathcal{N}(v)| }, where :math:`\mathcal{V}` is the set of nodes, :math:`\mathcal{N}(v)` is the predecessors of node :math:`v`, and :math:`y_v` is the class of node :math:`v`. Parameters ---------- graph : DGLGraph The graph. y : torch.Tensor The node labels, which is a tensor of shape (|V|). Returns ------- float The node homophily value. Examples -------- >>> import dgl >>> import torch >>> graph = dgl.graph(([1, 2, 0, 4], [0, 1, 2, 3])) >>> y = torch.tensor([0, 0, 0, 0, 1]) >>> dgl.node_homophily(graph, y) 0.6000000238418579 """ check_pytorch() with graph.local_scope(): # Handle the case where graph is of dtype int32. src, dst = get_long_edges(graph) # Compute y_v = y_u for all edges. graph.edata["same_class"] = (y[src] == y[dst]).float() graph.update_all( fn.copy_e("same_class", "m"), fn.mean("m", "same_class_deg") ) return graph.ndata["same_class_deg"].mean(dim=0).item() def edge_homophily(graph, y): r"""Homophily measure from `Beyond Homophily in Graph Neural Networks: Current Limitations and Effective Designs `__ Mathematically it is defined as follows: .. math:: \frac{| \{ (u,v) : (u,v) \in \mathcal{E} \wedge y_u = y_v \} | } {|\mathcal{E}|}, where :math:`\mathcal{E}` is the set of edges, and :math:`y_u` is the class of node :math:`u`. Parameters ---------- graph : DGLGraph The graph. y : torch.Tensor The node labels, which is a tensor of shape (|V|). Returns ------- float The edge homophily ratio value. Examples -------- >>> import dgl >>> import torch >>> graph = dgl.graph(([1, 2, 0, 4], [0, 1, 2, 3])) >>> y = torch.tensor([0, 0, 0, 0, 1]) >>> dgl.edge_homophily(graph, y) 0.75 """ check_pytorch() with graph.local_scope(): # Handle the case where graph is of dtype int32. src, dst = get_long_edges(graph) # Compute y_v = y_u for all edges. edge_indicator = (y[src] == y[dst]).float() return edge_indicator.mean(dim=0).item() def linkx_homophily(graph, y): r"""Homophily measure from `Large Scale Learning on Non-Homophilous Graphs: New Benchmarks and Strong Simple Methods `__ Mathematically it is defined as follows: .. math:: \frac{1}{C-1} \sum_{k=1}^{C} \max \left(0, \frac{\sum_{v\in C_k}|\{u\in \mathcal{N}(v): y_v = y_u \}|}{\sum_{v\in C_k}|\mathcal{N}(v)|} - \frac{|\mathcal{C}_k|}{|\mathcal{V}|} \right), where :math:`C` is the number of node classes, :math:`C_k` is the set of nodes that belong to class k, :math:`\mathcal{N}(v)` are the predecessors of node :math:`v`, :math:`y_v` is the class of node :math:`v`, and :math:`\mathcal{V}` is the set of nodes. Parameters ---------- graph : DGLGraph The graph. y : torch.Tensor The node labels, which is a tensor of shape (|V|). Returns ------- float The homophily value. Examples -------- >>> import dgl >>> import torch >>> graph = dgl.graph(([0, 1, 2, 3], [1, 2, 0, 4])) >>> y = torch.tensor([0, 0, 0, 0, 1]) >>> dgl.linkx_homophily(graph, y) 0.19999998807907104 """ check_pytorch() with graph.local_scope(): # Compute |{u\in N(v): y_v = y_u}| for each node v. # Handle the case where graph is of dtype int32. src, dst = get_long_edges(graph) # Compute y_v = y_u for all edges. graph.edata["same_class"] = (y[src] == y[dst]).float() graph.update_all( fn.copy_e("same_class", "m"), fn.sum("m", "same_class_deg") ) deg = graph.in_degrees().float() num_nodes = graph.num_nodes() num_classes = y.max(dim=0).values.item() + 1 value = torch.tensor(0.0).to(graph.device) for k in range(num_classes): # Get the nodes that belong to class k. class_mask = y == k same_class_deg_k = graph.ndata["same_class_deg"][class_mask].sum() deg_k = deg[class_mask].sum() num_nodes_k = class_mask.sum() value += max(0, same_class_deg_k / deg_k - num_nodes_k / num_nodes) return value.item() / (num_classes - 1) def adjusted_homophily(graph, y): r"""Homophily measure recommended in `Characterizing Graph Datasets for Node Classification: Homophily-Heterophily Dichotomy and Beyond `__ Adjusted homophily is edge homophily adjusted for the expected number of edges connecting nodes with the same class label (taking into account the number of classes, their sizes, and the distribution of node degrees among them). Mathematically it is defined as follows: .. math:: \frac{h_{edge} - \sum_{k=1}^C \bar{p}(k)^2} {1 - \sum_{k=1}^C \bar{p}(k)^2}, where :math:`h_{edge}` denotes edge homophily, :math:`C` denotes the number of classes, and :math:`\bar{p}(\cdot)` is the empirical degree-weighted distribution of classes: :math:`\bar{p}(k) = \frac{\sum_{v\,:\,y_v = k} d(v)}{2|E|}`, where :math:`d(v)` is the degree of node :math:`v`. It has been shown that adjusted homophily satisifes more desirable properties than other homophily measures, which makes it appropriate for comparing the levels of homophily across datasets with different number of classes, different class sizes, andd different degree distributions among classes. Adjusted homophily can be negative. If adjusted homophily is zero, then the edge pattern in the graph is independent of node class labels. If it is positive, then the nodes in the graph tend to connect to nodes of the same class more often, and if it is negative, than the nodes in the graph tend to connect to nodes of different classes more often (compared to the null model where edges are independent of node class labels). Parameters ---------- graph : DGLGraph The graph. y : torch.Tensor The node labels, which is a tensor of shape (|V|). Returns ------- float The adjusted homophily value. Examples -------- >>> import dgl >>> import torch >>> graph = dgl.graph(([1, 2, 0, 4], [0, 1, 2, 3])) >>> y = torch.tensor([0, 0, 0, 0, 1]) >>> dgl.adjusted_homophily(graph, y) -0.1428571492433548 """ check_pytorch() graph = to_bidirected(graph.cpu()).to(y.device) h_edge = edge_homophily(graph, y) degrees = graph.in_degrees().float() num_classes = y.max().item() + 1 degree_sums = torch.zeros(num_classes).to(y.device) degree_sums.index_add_(dim=0, index=y, source=degrees) adjust = (degree_sums**2).sum() / graph.num_edges() ** 2 h_adj = (h_edge - adjust) / (1 - adjust) return h_adj.item() ================================================ FILE: python/dgl/init.py ================================================ """Module for common feature initializers.""" from __future__ import absolute_import from . import backend as F __all__ = ["base_initializer", "zero_initializer"] def base_initializer( shape, dtype, ctx, id_range ): # pylint: disable=unused-argument """The function signature for feature initializer. Any customized feature initializer should follow this signature (see example below). Parameters ---------- shape : tuple of int The shape of the result features. The first dimension is the batch dimension. dtype : data type object The data type of the returned features. ctx : context object The device context of the returned features. id_range : slice The start id and the end id of the features to be initialized. The id could be node or edge id depending on the scenario. Note that the step is always None. Examples -------- If PyTorch is used as backend, the following code defines an feature initializer that initializes tensor value to 1 >>> import torch >>> import dgl >>> def initializer(shape, dtype, ctx, id_range): >>> return torch.ones(shape, dtype=dtype, device=ctx) >>> g = dgl.DGLGraph() >>> g.set_n_initializer(initializer) See Also -------- dgl.DGLGraph.set_n_initializer dgl.DGLGraph.set_e_initializer """ raise NotImplementedError def zero_initializer( shape, dtype, ctx, id_range ): # pylint: disable=unused-argument """Zero feature initializer Examples -------- >>> import dgl >>> g = dgl.DGLGraph() >>> g.set_n_initializer(dgl.init.zero_initializer) See Also -------- dgl.DGLGraph.set_n_initializer dgl.DGLGraph.set_e_initializer """ return F.zeros(shape, dtype, ctx) ================================================ FILE: python/dgl/label_informativeness.py ================================================ """Utils for computing graph label informativeness""" from . import to_bidirected try: import torch except ImportError: HAS_TORCH = False else: HAS_TORCH = True __all__ = ["edge_label_informativeness", "node_label_informativeness"] def check_pytorch(): """Check if PyTorch is the backend.""" if HAS_TORCH is False: raise ModuleNotFoundError( "This function requires PyTorch to be the backend." ) def edge_label_informativeness(graph, y, eps=1e-8): r"""Label informativeness (:math:`\mathrm{LI}`) is a characteristic of labeled graphs proposed in the `Characterizing Graph Datasets for Node Classification: Homophily-Heterophily Dichotomy and Beyond `__ Label informativeness shows how much information about a node's label we get from knowing its neighbor's label. Formally, assume that we sample an edge :math:`(\xi,\eta) \in E`. The class labels of nodes :math:`\xi` and :math:`\eta` are then random variables :math:`y_\xi` and :math:`y_\eta`. We want to measure the amount of knowledge the label :math:`y_\eta` gives for predicting :math:`y_\xi`. The entropy :math:`H(y_\xi)` measures the `hardness' of predicting the label of :math:`\xi` without knowing :math:`y_\eta`. Given :math:`y_\eta`, this value is reduced to the conditional entropy :math:`H(y_\xi|y_\eta)`. In other words, :math:`y_\eta` reveals :math:`I(y_\xi,y_\eta) = H(y_\xi) - H(y_\xi|y_\eta)` information about the label. To make the obtained quantity comparable across different datasets, label informativeness is defined as the normalized mutual information of :math:`y_{\xi}` and :math:`y_{\eta}`: .. math:: \mathrm{LI} = \frac{I(y_\xi,y_\eta)}{H(y_\xi)} Depending on the distribution used for sampling an edge :math:`(\xi, \eta)`, several variants of label informativeness can be obtained. Two of them are particularly intuitive: in edge label informativeness (:math:`\mathrm{LI}_{edge}`), edges are sampled uniformly at random, and in node label informativeness (:math:`\mathrm{LI}_{node}`), first a node is sampled uniformly at random and then an edge incident to it is sampled uniformly at random. These two versions of label informativeness differ in how they weight high/low-degree nodes. In edge label informativeness, averaging is over the edges, thus high-degree nodes are given more weight. In node label informativeness, averaging is over the nodes, so all nodes are weighted equally. This function computes edge label informativeness. Parameters ---------- graph : DGLGraph The graph. y : torch.Tensor The node labels, which is a tensor of shape (|V|). eps : float, optional A small constant for numerical stability. (default: 1e-8) Returns ------- float The edge label informativeness value. Examples -------- >>> import dgl >>> import torch >>> graph = dgl.graph(([0, 1, 2, 2, 3, 4], [1, 2, 0, 3, 4, 5])) >>> y = torch.tensor([0, 0, 0, 0, 1, 1]) >>> dgl.edge_label_informativeness(graph, y) 0.25177597999572754 """ check_pytorch() graph = to_bidirected(graph.cpu()).to(y.device) degrees = graph.in_degrees().float() num_classes = y.max() + 1 class_degree_weighted_probs = torch.zeros(num_classes).to(y.device) class_degree_weighted_probs.index_add_(dim=0, index=y, source=degrees) class_degree_weighted_probs /= class_degree_weighted_probs.sum() edge_probs = torch.zeros(num_classes, num_classes).to(y.device) labels_u = y[graph.edges()[0].long()] labels_v = y[graph.edges()[1].long()] edge_probs.index_put_( indices=(labels_u, labels_v), values=torch.ones(graph.num_edges()).to(y.device), accumulate=True, ) edge_probs /= edge_probs.sum() edge_probs += eps numerator = (edge_probs * torch.log(edge_probs)).sum() denominator = ( class_degree_weighted_probs * torch.log(class_degree_weighted_probs) ).sum() li_edge = 2 - numerator / denominator return li_edge.item() def node_label_informativeness(graph, y, eps=1e-8): r"""Label informativeness (:math:`\mathrm{LI}`) is a characteristic of labeled graphs proposed in the `Characterizing Graph Datasets for Node Classification: Homophily-Heterophily Dichotomy and Beyond `__ Label informativeness shows how much information about a node's label we get from knowing its neighbor's label. Formally, assume that we sample an edge :math:`(\xi,\eta) \in E`. The class labels of nodes :math:`\xi` and :math:`\eta` are then random variables :math:`y_\xi` and :math:`y_\eta`. We want to measure the amount of knowledge the label :math:`y_\eta` gives for predicting :math:`y_\xi`. The entropy :math:`H(y_\xi)` measures the `hardness' of predicting the label of :math:`\xi` without knowing :math:`y_\eta`. Given :math:`y_\eta`, this value is reduced to the conditional entropy :math:`H(y_\xi|y_\eta)`. In other words, :math:`y_\eta` reveals :math:`I(y_\xi,y_\eta) = H(y_\xi) - H(y_\xi|y_\eta)` information about the label. To make the obtained quantity comparable across different datasets, label informativeness is defined as the normalized mutual information of :math:`y_{\xi}` and :math:`y_{\eta}`: .. math:: \mathrm{LI} = \frac{I(y_\xi,y_\eta)}{H(y_\xi)} Depending on the distribution used for sampling an edge :math:`(\xi, \eta)`, several variants of label informativeness can be obtained. Two of them are particularly intuitive: in edge label informativeness (:math:`\mathrm{LI}_{edge}`), edges are sampled uniformly at random, and in node label informativeness (:math:`\mathrm{LI}_{node}`), first a node is sampled uniformly at random and then an edge incident to it is sampled uniformly at random. These two versions of label informativeness differ in how they weight high/low-degree nodes. In edge label informativeness, averaging is over the edges, thus high-degree nodes are given more weight. In node label informativeness, averaging is over the nodes, so all nodes are weighted equally. This function computes node label informativeness. Parameters ---------- graph : DGLGraph The graph. y : torch.Tensor The node labels, which is a tensor of shape (|V|). eps : float, optional A small constant for numerical stability. (default: 1e-8) Returns ------- float The node label informativeness value. Examples -------- >>> import dgl >>> import torch >>> graph = dgl.graph(([0, 1, 2, 2, 3, 4], [1, 2, 0, 3, 4, 5])) >>> y = torch.tensor([0, 0, 0, 0, 1, 1]) >>> dgl.node_label_informativeness(graph, y) 0.3381872773170471 """ check_pytorch() graph = to_bidirected(graph.cpu()).to(y.device) degrees = graph.in_degrees().float() num_classes = y.max() + 1 class_probs = torch.zeros(num_classes).to(y.device) class_probs.index_add_( dim=0, index=y, source=torch.ones(graph.num_nodes()).to(y.device) ) class_probs /= class_probs.sum() class_degree_weighted_probs = torch.zeros(num_classes).to(y.device) class_degree_weighted_probs.index_add_(dim=0, index=y, source=degrees) class_degree_weighted_probs /= class_degree_weighted_probs.sum() num_nonzero_degree_nodes = (degrees > 0).sum() edge_probs = torch.zeros(num_classes, num_classes).to(y.device) labels_u = y[graph.edges()[0].long()] labels_v = y[graph.edges()[1].long()] degrees_u = degrees[graph.edges()[0].long()] edge_probs.index_put_( indices=(labels_u, labels_v), values=1 / (num_nonzero_degree_nodes * degrees_u), accumulate=True, ) edge_probs += eps log = torch.log( edge_probs / (class_probs[:, None] * class_degree_weighted_probs[None, :]) ) numerator = (edge_probs * log).sum() denominator = (class_probs * torch.log(class_probs)).sum() li_node = -numerator / denominator return li_node.item() ================================================ FILE: python/dgl/logging.py ================================================ """logging module for DGL""" import logging import os def enable_verbose_logging(): """ Enable debug level logging for DGL """ os.environ["DMLC_LOG_DEBUG"] = "1" logger = logging.getLogger("dgl-core") logger.setLevel(logging.DEBUG) logging.info("DGL's logging level is set to DEBUG") def _setup_logger(): """setup logger""" logger = logging.getLogger("dgl-core") console = logging.StreamHandler() formatter = logging.Formatter( "%(asctime)s %(filename)s:%(lineno)s %(levelname)s p:%(processName)s \ t:%(threadName)s: %(message)s" ) console.setFormatter(formatter) console.setLevel(logging.DEBUG) logger.addHandler(console) logger.propagate = False logger.setLevel(logging.INFO) _setup_logger() if os.environ.get("DGL_LOG_DEBUG", None) == "1": enable_verbose_logging() ================================================ FILE: python/dgl/merge.py ================================================ """Utilities for merging graphs.""" import dgl from . import backend as F from .base import DGLError __all__ = ["merge"] def merge(graphs): r"""Merge a sequence of graphs together into a single graph. Nodes and edges that exist in ``graphs[i+1]`` but not in ``dgl.merge(graphs[0:i+1])`` will be added to ``dgl.merge(graphs[0:i+1])`` along with their data. Nodes that exist in both ``dgl.merge(graphs[0:i+1])`` and ``graphs[i+1]`` will be updated with ``graphs[i+1]``'s data if they do not match. Parameters ---------- graphs : list[DGLGraph] Input graphs. Returns ------- DGLGraph The merged graph. Notes ---------- * Inplace updates are applied to a new, empty graph. * Features that exist in ``dgl.graphs[i+1]`` will be created in ``dgl.merge(dgl.graphs[i+1])`` if they do not already exist. Examples ---------- The following example uses PyTorch backend. >>> import dgl >>> import torch >>> g = dgl.graph((torch.tensor([0,1]), torch.tensor([2,3]))) >>> g.ndata["x"] = torch.zeros(4) >>> h = dgl.graph((torch.tensor([1,2]), torch.tensor([0,4]))) >>> h.ndata["x"] = torch.ones(5) >>> m = dgl.merge([g, h]) ``m`` now contains edges and nodes from ``h`` and ``g``. >>> m.edges() (tensor([0, 1, 1, 2]), tensor([2, 3, 0, 4])) >>> m.nodes() tensor([0, 1, 2, 3, 4]) ``g``'s data has updated with ``h``'s in ``m``. >>> m.ndata["x"] tensor([1., 1., 1., 1., 1.]) See Also ---------- add_nodes add_edges """ if len(graphs) == 0: raise DGLError("The input list of graphs cannot be empty.") ref = graphs[0] ntypes = ref.ntypes etypes = ref.canonical_etypes data_dict = {etype: ([], []) for etype in etypes} num_nodes_dict = {ntype: 0 for ntype in ntypes} merged = dgl.heterograph(data_dict, num_nodes_dict, ref.idtype, ref.device) # Merge edges and edge data. for etype in etypes: unmerged_us = [] unmerged_vs = [] edata_frames = [] for graph in graphs: etype_id = graph.get_etype_id(etype) us, vs = graph.edges(etype=etype) unmerged_us.append(us) unmerged_vs.append(vs) edge_data = graph._edge_frames[etype_id] edata_frames.append(edge_data) keys = ref.edges[etype].data.keys() if len(keys) == 0: edges_data = None else: edges_data = { k: F.cat([f[k] for f in edata_frames], dim=0) for k in keys } merged_us = F.copy_to( F.astype(F.cat(unmerged_us, dim=0), ref.idtype), ref.device ) merged_vs = F.copy_to( F.astype(F.cat(unmerged_vs, dim=0), ref.idtype), ref.device ) merged.add_edges(merged_us, merged_vs, edges_data, etype) # Add node data and isolated nodes from next_graph to merged. for next_graph in graphs: for ntype in ntypes: merged_ntype_id = merged.get_ntype_id(ntype) next_ntype_id = next_graph.get_ntype_id(ntype) next_ndata = next_graph._node_frames[next_ntype_id] node_diff = next_graph.num_nodes(ntype=ntype) - merged.num_nodes( ntype=ntype ) n_extra_nodes = max(0, node_diff) merged.add_nodes(n_extra_nodes, ntype=ntype) next_nodes = F.arange( 0, next_graph.num_nodes(ntype=ntype), merged.idtype, merged.device, ) merged._node_frames[merged_ntype_id].update_row( next_nodes, next_ndata ) return merged ================================================ FILE: python/dgl/mpops/__init__.py ================================================ """Message passing operator sub-package""" from .edgewise import * from .nodewise import * from .fused import * ================================================ FILE: python/dgl/mpops/edgewise.py ================================================ """Operators for computing edge data.""" import sys from .. import ops __all__ = ["copy_u", "copy_v"] ####################################################### # Edge-wise operators that fetch node data to edges ####################################################### def copy_u(g, x_node, etype=None): """Compute new edge data by fetching from source node data. Given an input graph :math:`G(V, E)` (or a unidirectional bipartite graph :math:`G(V_{src}, V_{dst}, E)`) and an input tensor :math:`X`, the operator computes a tensor :math:`Y` storing the new edge data. For each edge :math:`e=(u,v) \\in E`, it computes: .. math: Y_e = X_u Parameters ---------- g : DGLGraph The input graph. x_node : Tensor The tensor storing the source node data. Shape :math:`(|V_{src}|, *)`. etype : str or (str, str, str), optional Edge type. If not specified, the input graph must have only one type of edges. Returns ------- Tensor The tensor storing the new edge data. Shape :math:`(|E|, *)`. Examples -------- **Homogeneous graph** >>> import torch, dgl >>> g = dgl.rand_graph(100, 500) # a random graph of 100 nodes, 500 edges >>> x = torch.randn(g.num_nodes(), 5) # 5 features >>> y = dgl.copy_u(g, x) >>> print(y.shape) (500, 5) **Heterogeneous graph** >>> hg = dgl.heterograph({ ... ('user', 'follow', 'user'): ([0, 1, 2], [2, 3, 4]), ... ('user', 'like', 'movie'): ([3, 3, 1, 2], [0, 0, 1, 1]) ... }) >>> x = torch.randn(hg.num_nodes('user'), 5) >>> y = dgl.copy_u(hg, x, etype='like') >>> print(y.shape) (4, 5) """ etype_subg = g if etype is None else g[etype] return ops.gsddmm(etype_subg, "copy_lhs", x_node, None) def copy_v(g, x_node, etype=None): """Compute new edge data by fetching from destination node data. Given an input graph :math:`G(V, E)` (or a unidirectional bipartite graph :math:`G(V_{src}, V_{dst}, E)`) and an input tensor :math:`X`, the operator computes a tensor :math:`Y` storing the new edge data. For each edge :math:`e=(u,v) \\in E`, it computes: .. math: Y_e = X_v Parameters ---------- g : DGLGraph The input graph. x_node : Tensor The tensor storing the destination node data. Shape :math:`(|V_{dst}|, *)`. etype : str or (str, str, str), optional Edge type. If not specified, the input graph must have only one type of edges. Returns ------- Tensor The tensor storing the new edge data. Shape :math:`(|E|, *)`. Examples -------- **Homogeneous graph** >>> import torch, dgl >>> g = dgl.rand_graph(100, 500) # a random graph of 100 nodes, 500 edges >>> x = torch.randn(g.num_nodes(), 5) # 5 features >>> y = dgl.copy_v(g, x) >>> print(y.shape) (500, 5) **Heterogeneous graph** >>> hg = dgl.heterograph({ ... ('user', 'follow', 'user'): ([0, 1, 2], [2, 3, 4]), ... ('user', 'like', 'movie'): ([3, 3, 1, 2], [0, 0, 1, 1]) ... }) >>> x = torch.randn(hg.num_nodes('movie'), 5) >>> y = dgl.copy_v(hg, x, etype='like') >>> print(y.shape) (4, 5) """ etype_subg = g if etype is None else g[etype] return ops.gsddmm(etype_subg, "copy_rhs", None, x_node) ####################################################### # Binary edge-wise operators ####################################################### def _gen_u_op_v(op): """Internal helper function to create binary edge-wise operators. The function will return a Python function with: - Name: u_{op}_v - Docstring template Parameters ---------- op : str Binary operator name. Must be 'add', 'sub', 'mul', 'div' or 'dot'. """ name = f"u_{op}_v" op_verb = { "add": "adding", "sub": "subtracting", "mul": "multiplying", "div": "dividing", "dot": "dot-product", } docstring = f"""Compute new edge data by {op_verb[op]} the source node data and destination node data. Given an input graph :math:`G(V, E)` (or a unidirectional bipartite graph :math:`G(V_{{src}}, V_{{dst}}, E)`) and two input tensors :math:`X` and :math:`Y`, the operator computes a tensor :math:`Z` storing the new edge data. For each edge :math:`e=(u,v) \\in E`, it computes: .. math: Z_e = {op}(X_u, Y_v) If :math:`X_u` and :math:`Y_v` are vectors or high-dimensional tensors, the operation is element-wise and supports shape broadcasting. Read more about `NumPy's broadcasting semantics `_. Parameters ---------- g : DGLGraph The input graph. x_node : Tensor The tensor storing the source node data. Shape :math:`(|V_{{src}}|, *)`. y_node : Tensor The tensor storing the destination node data. Shape :math:`(|V_{{dst}}|, *)`. etype : str or (str, str, str), optional Edge type. If not specified, the input graph must have only one type of edges. Returns ------- Tensor The tensor storing the new edge data. Shape :math:`(|E|, *)`. Examples -------- **Homogeneous graph** >>> import torch, dgl >>> g = dgl.rand_graph(100, 500) # a random graph of 100 nodes, 500 edges >>> x = torch.randn(g.num_nodes(), 5) # 5 features >>> y = torch.randn(g.num_nodes(), 5) # 5 features >>> z = dgl.{name}(g, x, y) >>> print(z.shape) (500, 5) **Heterogeneous graph** >>> hg = dgl.heterograph({{ ... ('user', 'follow', 'user'): ([0, 1, 2], [2, 3, 4]), ... ('user', 'like', 'movie'): ([3, 3, 1, 2], [0, 0, 1, 1]) ... }}) >>> x = torch.randn(hg.num_nodes('user'), 5) >>> y = torch.randn(hg.num_nodes('user'), 5) >>> z = dgl.{name}(hg, x, y, etype='follow') >>> print(z.shape) (3, 5) **Shape broadcasting** >>> x = torch.randn(g.num_nodes(), 5) # 5 features >>> y = torch.randn(g.num_nodes(), 1) # one feature >>> z = dgl.{name}(g, x, y) >>> print(z.shape) (500, 5) """ def func(g, x_node, y_node, etype=None): etype_subg = g if etype is None else g[etype] return ops.gsddmm( etype_subg, op, x_node, y_node, lhs_target="u", rhs_target="v" ) func.__name__ = name func.__doc__ = docstring return func def _register_func(func): setattr(sys.modules[__name__], func.__name__, func) __all__.append(func.__name__) _register_func(_gen_u_op_v("add")) _register_func(_gen_u_op_v("sub")) _register_func(_gen_u_op_v("mul")) _register_func(_gen_u_op_v("div")) _register_func(_gen_u_op_v("dot")) ================================================ FILE: python/dgl/mpops/fused.py ================================================ """Operators that fuse the computation and aggregation of edge data.""" ================================================ FILE: python/dgl/mpops/nodewise.py ================================================ """Operators for aggregating/reducing edge data to node data.""" ================================================ FILE: python/dgl/multiprocessing/__init__.py ================================================ """Wrapper of the multiprocessing module for multi-GPU training.""" # To avoid duplicating the graph structure for node classification or link prediction # training we recommend using fork() rather than spawn() for multiple GPU training. # However, we need to work around https://github.com/pytorch/pytorch/issues/17199 to # make fork() and openmp work together. from .. import backend as F if F.get_preferred_backend() == "pytorch": # Wrap around torch.multiprocessing... from torch.multiprocessing import * # ... and override the Process initializer. from .pytorch import * else: # Just import multiprocessing module. from multiprocessing import * # pylint: disable=redefined-builtin ================================================ FILE: python/dgl/multiprocessing/pytorch.py ================================================ """PyTorch multiprocessing wrapper.""" import random import traceback from _thread import start_new_thread from functools import wraps import torch import torch.multiprocessing as mp from ..utils import create_shared_mem_array, get_shared_mem_array def thread_wrapped_func(func): """ Wraps a process entry point to make it work with OpenMP. """ @wraps(func) def decorated_function(*args, **kwargs): queue = mp.Queue() def _queue_result(): exception, trace, res = None, None, None try: res = func(*args, **kwargs) except Exception as e: # pylint: disable=broad-except exception = e trace = traceback.format_exc() queue.put((res, exception, trace)) start_new_thread(_queue_result, ()) result, exception, trace = queue.get() if exception is None: return result else: assert isinstance(exception, Exception) raise exception.__class__(trace) return decorated_function # pylint: disable=missing-docstring class Process(mp.Process): # pylint: disable=dangerous-default-value def __init__( self, group=None, target=None, name=None, args=(), kwargs={}, *, daemon=None ): target = thread_wrapped_func(target) super().__init__(group, target, name, args, kwargs, daemon=daemon) def _get_shared_mem_name(id_): return "shared" + str(id_) def call_once_and_share(func, shape, dtype, rank=0): """Invoke the function in a single process of the PyTorch distributed process group, and share the result with other processes. Parameters ---------- func : callable Any callable that accepts no arguments and returns an arbitrary object. shape : tuple[int] The shape of the shared tensor. Must match the output of :attr:`func`. dtype : torch.dtype The data type of the shared tensor. Must match the output of :attr:`func`. rank : int, optional The process ID to actually execute the function. """ current_rank = torch.distributed.get_rank() dist_buf = torch.LongTensor([1]) if torch.distributed.get_backend() == "nccl": # Use .cuda() to transfer it to the correct device. Should be OK since # PyTorch recommends the users to call set_device() after getting inside # torch.multiprocessing.spawn() dist_buf = dist_buf.cuda() # Process with the given rank creates and populates the shared memory array. if current_rank == rank: # PyTorch Lightning 1.6+ seems to set the random seed during process spawning # to the same seed value. random_ = random.Random() id_ = random_.getrandbits(32) name = _get_shared_mem_name(id_) result = create_shared_mem_array(name, shape, dtype) result[:] = func() dist_buf[0] = id_ # Broadcasts the name of the shared array to other processes. torch.distributed.broadcast(dist_buf, rank) # If no exceptions, other processes open the same shared memory object. if current_rank != rank: id_ = dist_buf.item() name = _get_shared_mem_name(id_) result = get_shared_mem_array(name, shape, dtype) return result def shared_tensor(shape, dtype=torch.float32): """Create a tensor in shared memory accessible by all processes within the same ``torch.distributed`` process group. The content is uninitialized. Parameters ---------- shape : tuple[int] The shape of the tensor. dtype : torch.dtype, optional The dtype of the tensor. Returns ------- Tensor The shared tensor. """ return call_once_and_share( lambda: torch.empty(*shape, dtype=dtype), shape, dtype ) ================================================ FILE: python/dgl/ndarray.py ================================================ """DGL Runtime NDArray API. dgl.ndarray provides a minimum runtime array structure to be used with C++ library. """ # pylint: disable=invalid-name,unused-import from __future__ import absolute_import as _abs import ctypes import functools import operator import numpy as _np from . import backend as F from ._ffi.function import _init_api from ._ffi.ndarray import ( _set_class_ndarray, context, DGLContext, DGLDataType, empty, empty_shared_mem, from_dlpack, NDArrayBase, numpyasarray, ) from ._ffi.object import ObjectBase, register_object class NDArray(NDArrayBase): """Lightweight NDArray class for DGL framework.""" def __len__(self): return functools.reduce(operator.mul, self.shape, 1) def shared_memory(self, name): """Return a copy of the ndarray in shared memory Parameters ---------- name : str The name of the shared memory Returns ------- NDArray """ return empty_shared_mem(name, True, self.shape, self.dtype).copyfrom( self ) def cpu(dev_id=0): """Construct a CPU device Parameters ---------- dev_id : int, optional The integer device id Returns ------- ctx : DGLContext The created context """ return DGLContext(1, dev_id) def gpu(dev_id=0): """Construct a CPU device Parameters ---------- dev_id : int, optional The integer device id Returns ------- ctx : DGLContext The created context """ return DGLContext(2, dev_id) def array(arr, ctx=cpu(0)): """Create an array from source arr. Parameters ---------- arr : numpy.ndarray The array to be copied from ctx : DGLContext, optional The device context to create the array Returns ------- ret : NDArray The created array """ if not isinstance(arr, (_np.ndarray, NDArray)): arr = _np.array(arr) return empty(arr.shape, arr.dtype, ctx).copyfrom(arr) def zerocopy_from_numpy(np_data): """Create an array that shares the given numpy data. Parameters ---------- np_data : numpy.ndarray The numpy data Returns ------- NDArray The array """ arr, _ = numpyasarray(np_data) handle = ctypes.pointer(arr) return NDArray(handle, is_view=True) def cast_to_signed(arr): """Cast this NDArray from unsigned integer to signed one. uint64 -> int64 uint32 -> int32 Useful for backends with poor signed integer support (e.g., TensorFlow). Parameters ---------- arr : NDArray Input array Returns ------- NDArray Cased array """ return _CAPI_DGLArrayCastToSigned(arr) def get_shared_mem_array(name, shape, dtype): """Get a tensor from shared memory with specific name Parameters ---------- name : str The unique name of the shared memory shape : tuple of int The shape of the returned tensor dtype : F.dtype The dtype of the returned tensor Returns ------- F.tensor The tensor got from shared memory. """ new_arr = empty_shared_mem( name, False, shape, F.reverse_data_type_dict[dtype] ) dlpack = new_arr.to_dlpack() return F.zerocopy_from_dlpack(dlpack) def create_shared_mem_array(name, shape, dtype): """Create a tensor from shared memory with the specific name Parameters ---------- name : str The unique name of the shared memory shape : tuple of int The shape of the returned tensor dtype : F.dtype The dtype of the returned tensor Returns ------- F.tensor The created tensor. """ new_arr = empty_shared_mem( name, True, shape, F.reverse_data_type_dict[dtype] ) dlpack = new_arr.to_dlpack() return F.zerocopy_from_dlpack(dlpack) def exist_shared_mem_array(name): """Check the existence of shared-memory array. Parameters ---------- name : str The name of the shared-memory array. Returns ------- bool The existence of the array """ return _CAPI_DGLExistSharedMemArray(name) class SparseFormat: """Format code""" ANY = 0 COO = 1 CSR = 2 CSC = 3 FORMAT2STR = { 0: "ANY", 1: "COO", 2: "CSR", 3: "CSC", } @register_object("aten.SparseMatrix") class SparseMatrix(ObjectBase): """Sparse matrix object class in C++ backend.""" @property def format(self): """Sparse format enum Returns ------- int """ return _CAPI_DGLSparseMatrixGetFormat(self) @property def num_rows(self): """Number of rows. Returns ------- int """ return _CAPI_DGLSparseMatrixGetNumRows(self) @property def num_cols(self): """Number of rows. Returns ------- int """ return _CAPI_DGLSparseMatrixGetNumCols(self) @property def indices(self): """Index arrays. Returns ------- list of ndarrays """ ret = [_CAPI_DGLSparseMatrixGetIndices(self, i) for i in range(3)] return [F.zerocopy_from_dgl_ndarray(arr) for arr in ret] @property def flags(self): """Flag arrays Returns ------- list of boolean """ return _CAPI_DGLSparseMatrixGetFlags(self) def __getstate__(self): return ( self.format, self.num_rows, self.num_cols, self.indices, self.flags, ) def __setstate__(self, state): fmt, nrows, ncols, indices, flags = state indices = [F.zerocopy_to_dgl_ndarray(idx) for idx in indices] self.__init_handle_by_constructor__( _CAPI_DGLCreateSparseMatrix, fmt, nrows, ncols, indices, flags ) def __repr__(self): return 'SparseMatrix(fmt="{}", shape=({},{}))'.format( SparseFormat.FORMAT2STR[self.format], self.num_rows, self.num_cols ) _set_class_ndarray(NDArray) _init_api("dgl.ndarray") _init_api("dgl.ndarray.uvm", __name__) # An array representing null (no value) that can be safely converted to # other backend tensors. NULL = { "int64": array(_np.array([], dtype=_np.int64)), "int32": array(_np.array([], dtype=_np.int32)), } ================================================ FILE: python/dgl/nn/__init__.py ================================================ """The ``dgl.nn`` package contains framework-specific implementations for common Graph Neural Network layers (or module in PyTorch, Block in MXNet). Users can directly import ``dgl.nn.`` (e.g., ``dgl.nn.GraphConv``), and the package will dispatch the layer name to the actual implementation according to the backend framework currently in use. Note that there are coverage differences among frameworks. If you encounter an ``ImportError: cannot import name 'XXX'`` error, that means the layer is not available to the current backend. If you wish a module to appear in DGL, please `create an issue `_ started with "[Feature Request] NN Module XXXModel". If you want to contribute a NN module, please `create a pull request `_ started with "[NN] XXX module". """ import importlib import os import sys from ..backend import backend_name from ..utils import expand_as_pair # [BarclayII] Not sure what's going on with pylint. # Possible issue: https://github.com/PyCQA/pylint/issues/2648 from . import functional # pylint: disable=import-self def _load_backend(mod_name): mod = importlib.import_module(".%s" % mod_name, __name__) thismod = sys.modules[__name__] for api, obj in mod.__dict__.items(): setattr(thismod, api, obj) _load_backend(backend_name) ================================================ FILE: python/dgl/nn/functional/__init__.py ================================================ """Functions related to DGL NN Modules.""" from ...ops import edge_softmax ================================================ FILE: python/dgl/nn/mxnet/__init__.py ================================================ """Package for mxnet-specific NN modules.""" from .conv import * from .glob import * from .hetero import * from .softmax import * from .utils import Sequential ================================================ FILE: python/dgl/nn/mxnet/conv/__init__.py ================================================ """MXNet modules for graph convolutions.""" # pylint: disable= no-member, arguments-differ, invalid-name from .agnnconv import AGNNConv from .appnpconv import APPNPConv from .chebconv import ChebConv from .densechebconv import DenseChebConv from .densegraphconv import DenseGraphConv from .densesageconv import DenseSAGEConv from .edgeconv import EdgeConv from .gatconv import GATConv from .gatedgraphconv import GatedGraphConv from .ginconv import GINConv from .gmmconv import GMMConv from .graphconv import GraphConv from .nnconv import NNConv from .relgraphconv import RelGraphConv from .sageconv import SAGEConv from .sgconv import SGConv from .tagconv import TAGConv __all__ = [ "GraphConv", "TAGConv", "RelGraphConv", "GATConv", "SAGEConv", "GatedGraphConv", "ChebConv", "AGNNConv", "APPNPConv", "DenseGraphConv", "DenseSAGEConv", "DenseChebConv", "EdgeConv", "GINConv", "GMMConv", "NNConv", "SGConv", ] ================================================ FILE: python/dgl/nn/mxnet/conv/agnnconv.py ================================================ """MXNet Module for Attention-based Graph Neural Network layer""" # pylint: disable= no-member, arguments-differ, invalid-name import mxnet as mx from mxnet.gluon import nn from .... import function as fn from ....base import DGLError from ....utils import expand_as_pair from ...functional import edge_softmax from ..utils import normalize class AGNNConv(nn.Block): r"""Attention-based Graph Neural Network layer from `Attention-based Graph Neural Network for Semi-Supervised Learning `__ .. math:: H^{l+1} = P H^{l} where :math:`P` is computed as: .. math:: P_{ij} = \mathrm{softmax}_i ( \beta \cdot \cos(h_i^l, h_j^l)) where :math:`\beta` is a single scalar parameter. Parameters ---------- init_beta : float, optional The :math:`\beta` in the formula, a single scalar parameter. learn_beta : bool, optional If True, :math:`\beta` will be learnable parameter. allow_zero_in_degree : bool, optional If there are 0-in-degree nodes in the graph, output for those nodes will be invalid since no message will be passed to those nodes. This is harmful for some applications causing silent performance regression. This module will raise a DGLError if it detects 0-in-degree nodes in input graph. By setting ``True``, it will suppress the check and let the users handle it by themselves. Default: ``False``. Note ---- Zero in-degree nodes will lead to invalid output value. This is because no message will be passed to those nodes, the aggregation function will be appied on empty input. A common practice to avoid this is to add a self-loop for each node in the graph if it is homogeneous, which can be achieved by: >>> g = ... # a DGLGraph >>> g = dgl.add_self_loop(g) Calling ``add_self_loop`` will not work for some graphs, for example, heterogeneous graph since the edge type can not be decided for self_loop edges. Set ``allow_zero_in_degree`` to ``True`` for those cases to unblock the code and handle zero-in-degree nodes manually. A common practise to handle this is to filter out the nodes with zero-in-degree when use after conv. Example ------- >>> import dgl >>> import numpy as np >>> import mxnet as mx >>> from dgl.nn import AGNNConv >>> >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> g = dgl.add_self_loop(g) >>> feat = mx.nd.ones((6, 10)) >>> conv = AGNNConv() >>> conv.initialize(ctx=mx.cpu(0)) >>> res = conv(g, feat) >>> res [[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.] [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]] """ def __init__( self, init_beta=1.0, learn_beta=True, allow_zero_in_degree=False ): super(AGNNConv, self).__init__() self._allow_zero_in_degree = allow_zero_in_degree with self.name_scope(): self.beta = self.params.get( "beta", shape=(1,), grad_req="write" if learn_beta else "null", init=mx.init.Constant(init_beta), ) def set_allow_zero_in_degree(self, set_value): r""" Description ----------- Set allow_zero_in_degree flag. Parameters ---------- set_value : bool The value to be set to the flag. """ self._allow_zero_in_degree = set_value def forward(self, graph, feat): r""" Description ----------- Compute AGNN layer. Parameters ---------- graph : DGLGraph The graph. feat : mxnet.NDArray The input feature of shape :math:`(N, *)` :math:`N` is the number of nodes, and :math:`*` could be of any shape. If a pair of mxnet.NDArray is given, the pair must contain two tensors of shape :math:`(N_{in}, *)` and :math:`(N_{out}, *)`, the :math:`*` in the later tensor must equal the previous one. Returns ------- mxnet.NDArray The output feature of shape :math:`(N, *)` where :math:`*` should be the same as input shape. Raises ------ DGLError If there are 0-in-degree nodes in the input graph, it will raise DGLError since no message will be passed to those nodes. This will cause invalid output. The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``. """ with graph.local_scope(): if not self._allow_zero_in_degree: if graph.in_degrees().min() == 0: raise DGLError( "There are 0-in-degree nodes in the graph, " "output for those nodes will be invalid. " "This is harmful for some applications, " "causing silent performance regression. " "Adding self-loop on the input graph by " "calling `g = dgl.add_self_loop(g)` will resolve " "the issue. Setting ``allow_zero_in_degree`` " "to be `True` when constructing this module will " "suppress the check and let the code run." ) feat_src, feat_dst = expand_as_pair(feat, graph) graph.srcdata["h"] = feat_src graph.srcdata["norm_h"] = normalize(feat_src, p=2, axis=-1) if isinstance(feat, tuple) or graph.is_block: graph.dstdata["norm_h"] = normalize(feat_dst, p=2, axis=-1) # compute cosine distance graph.apply_edges(fn.u_dot_v("norm_h", "norm_h", "cos")) cos = graph.edata.pop("cos") e = self.beta.data(feat_src.context) * cos graph.edata["p"] = edge_softmax(graph, e) graph.update_all(fn.u_mul_e("h", "p", "m"), fn.sum("m", "h")) return graph.dstdata.pop("h") ================================================ FILE: python/dgl/nn/mxnet/conv/appnpconv.py ================================================ """MXNet Module for APPNPConv""" # pylint: disable= no-member, arguments-differ, invalid-name import mxnet as mx from mxnet import nd from mxnet.gluon import nn from .... import function as fn class APPNPConv(nn.Block): r"""Approximate Personalized Propagation of Neural Predictions layer from `Predict then Propagate: Graph Neural Networks meet Personalized PageRank `__ .. math:: H^{0} &= X H^{l+1} &= (1-\alpha)\left(\tilde{D}^{-1/2} \tilde{A} \tilde{D}^{-1/2} H^{l}\right) + \alpha H^{0} where :math:`\tilde{A}` is :math:`A` + :math:`I`. Parameters ---------- k : int The number of iterations :math:`K`. alpha : float The teleport probability :math:`\alpha`. edge_drop : float, optional The dropout rate on edges that controls the messages received by each node. Default: ``0``. Example ------- >>> import dgl >>> import numpy as np >>> import mxnet as mx >>> from dgl.nn import APPNPConv >>> >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> feat = mx.nd.ones((6, 10)) >>> conv = APPNPConv(k=3, alpha=0.5) >>> conv.initialize(ctx=mx.cpu(0)) >>> res = conv(g, feat) >>> res [[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. ] [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. ] [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. ] [1.0303301 1.0303301 1.0303301 1.0303301 1.0303301 1.0303301 1.0303301 1.0303301 1.0303301 1.0303301 ] [0.86427665 0.86427665 0.86427665 0.86427665 0.86427665 0.86427665 0.86427665 0.86427665 0.86427665 0.86427665] [0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ]] """ def __init__(self, k, alpha, edge_drop=0.0): super(APPNPConv, self).__init__() self._k = k self._alpha = alpha with self.name_scope(): self.edge_drop = nn.Dropout(edge_drop) def forward(self, graph, feat): r""" Description ----------- Compute APPNP layer. Parameters ---------- graph : DGLGraph The graph. feat : mx.NDArray The input feature of shape :math:`(N, *)`. :math:`N` is the number of nodes, and :math:`*` could be of any shape. Returns ------- mx.NDArray The output feature of shape :math:`(N, *)` where :math:`*` should be the same as input shape. """ with graph.local_scope(): norm = mx.nd.power( mx.nd.clip( graph.in_degrees().astype(feat.dtype), a_min=1, a_max=float("inf"), ), -0.5, ) shp = norm.shape + (1,) * (feat.ndim - 1) norm = norm.reshape(shp).as_in_context(feat.context) feat_0 = feat for _ in range(self._k): # normalization by src node feat = feat * norm graph.ndata["h"] = feat graph.edata["w"] = self.edge_drop( nd.ones((graph.num_edges(), 1), ctx=feat.context) ) graph.update_all(fn.u_mul_e("h", "w", "m"), fn.sum("m", "h")) feat = graph.ndata.pop("h") # normalization by dst node feat = feat * norm feat = (1 - self._alpha) * feat + self._alpha * feat_0 return feat ================================================ FILE: python/dgl/nn/mxnet/conv/chebconv.py ================================================ """MXNet Module for Chebyshev Spectral Graph Convolution layer""" # pylint: disable= no-member, arguments-differ, invalid-name import math import mxnet as mx from mxnet import nd from mxnet.gluon import nn from .... import broadcast_nodes, function as fn from ....base import dgl_warning class ChebConv(nn.Block): r"""Chebyshev Spectral Graph Convolution layer from `Convolutional Neural Networks on Graphs with Fast Localized Spectral Filtering `__ .. math:: h_i^{l+1} &= \sum_{k=0}^{K-1} W^{k, l}z_i^{k, l} Z^{0, l} &= H^{l} Z^{1, l} &= \tilde{L} \cdot H^{l} Z^{k, l} &= 2 \cdot \tilde{L} \cdot Z^{k-1, l} - Z^{k-2, l} \tilde{L} &= 2\left(I - \tilde{D}^{-1/2} \tilde{A} \tilde{D}^{-1/2}\right)/\lambda_{max} - I where :math:`\tilde{A}` is :math:`A` + :math:`I`, :math:`W` is learnable weight. Parameters ---------- in_feats: int Dimension of input features; i.e, the number of dimensions of :math:`h_i^{(l)}`. out_feats: int Dimension of output features :math:`h_i^{(l+1)}`. k : int Chebyshev filter size :math:`K`. activation : function, optional Activation function. Default ``ReLu``. bias : bool, optional If True, adds a learnable bias to the output. Default: ``True``. Example ------- >>> import dgl >>> import numpy as np >>> import mxnet as mx >>> from dgl.nn import ChebConv >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> feat = mx.nd.ones((6, 10)) >>> conv = ChebConv(10, 2, 2) >>> conv.initialize(ctx=mx.cpu(0)) >>> res = conv(g, feat) >>> res [[ 0.832592 -0.738757 ] [ 0.832592 -0.738757 ] [ 0.832592 -0.738757 ] [ 0.43377423 -1.0455742 ] [ 1.1145986 -0.5218046 ] [ 1.7954229 0.00196505]] """ def __init__(self, in_feats, out_feats, k, bias=True): super(ChebConv, self).__init__() self._in_feats = in_feats self._out_feats = out_feats self._k = k with self.name_scope(): self.fc = nn.Sequential() for _ in range(k): self.fc.add( nn.Dense( out_feats, use_bias=False, weight_initializer=mx.init.Xavier( magnitude=math.sqrt(2.0) ), in_units=in_feats, ) ) if bias: self.bias = self.params.get( "bias", shape=(out_feats,), init=mx.init.Zero() ) else: self.bias = None def forward(self, graph, feat, lambda_max=None): r""" Description ----------- Compute ChebNet layer. Parameters ---------- graph : DGLGraph The graph. feat : mxnet.NDArray The input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. lambda_max : list or tensor or None, optional. A list(tensor) with length :math:`B`, stores the largest eigenvalue of the normalized laplacian of each individual graph in ``graph``, where :math:`B` is the batch size of the input graph. Default: None. If None, this method would set the default value to 2. One can use :func:`dgl.laplacian_lambda_max` to compute this value. Returns ------- mxnet.NDArray The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is size of output feature. """ with graph.local_scope(): degs = graph.in_degrees().astype("float32") norm = mx.nd.power( mx.nd.clip(degs, a_min=1, a_max=float("inf")), -0.5 ) norm = norm.expand_dims(-1).as_in_context(feat.context) if lambda_max is None: dgl_warning( "lambda_max is not provided, using default value of 2. " "Please use dgl.laplacian_lambda_max to compute the eigenvalues." ) lambda_max = [2] * graph.batch_size if isinstance(lambda_max, list): lambda_max = nd.array(lambda_max).as_in_context(feat.context) if lambda_max.ndim == 1: lambda_max = lambda_max.expand_dims(-1) # broadcast from (B, 1) to (N, 1) lambda_max = broadcast_nodes(graph, lambda_max) # T0(X) Tx_0 = feat rst = self.fc[0](Tx_0) # T1(X) if self._k > 1: graph.ndata["h"] = Tx_0 * norm graph.update_all(fn.copy_u("h", "m"), fn.sum("m", "h")) h = graph.ndata.pop("h") * norm # Λ = 2 * (I - D ^ -1/2 A D ^ -1/2) / lambda_max - I # = - 2(D ^ -1/2 A D ^ -1/2) / lambda_max + (2 / lambda_max - 1) I Tx_1 = -2.0 * h / lambda_max + Tx_0 * (2.0 / lambda_max - 1) rst = rst + self.fc[1](Tx_1) # Ti(x), i = 2...k for i in range(2, self._k): graph.ndata["h"] = Tx_1 * norm graph.update_all(fn.copy_u("h", "m"), fn.sum("m", "h")) h = graph.ndata.pop("h") * norm # Tx_k = 2 * Λ * Tx_(k-1) - Tx_(k-2) # = - 4(D ^ -1/2 A D ^ -1/2) / lambda_max Tx_(k-1) + # (4 / lambda_max - 2) Tx_(k-1) - # Tx_(k-2) Tx_2 = ( -4.0 * h / lambda_max + Tx_1 * (4.0 / lambda_max - 2) - Tx_0 ) rst = rst + self.fc[i](Tx_2) Tx_1, Tx_0 = Tx_2, Tx_1 # add bias if self.bias is not None: rst = rst + self.bias.data(feat.context) return rst ================================================ FILE: python/dgl/nn/mxnet/conv/densechebconv.py ================================================ """MXNet Module for DenseChebConv""" # pylint: disable= no-member, arguments-differ, invalid-name import math import mxnet as mx from mxnet import nd from mxnet.gluon import nn class DenseChebConv(nn.Block): r"""Chebyshev Spectral Graph Convolution layer from `Convolutional Neural Networks on Graphs with Fast Localized Spectral Filtering `__ We recommend to use this module when applying ChebConv on dense graphs. Parameters ---------- in_feats: int Dimension of input features :math:`h_i^{(l)}`. out_feats: int Dimension of output features :math:`h_i^{(l+1)}`. k : int Chebyshev filter size. activation : function, optional Activation function, default is ReLu. bias : bool, optional If True, adds a learnable bias to the output. Default: ``True``. See also -------- `ChebConv `__ """ def __init__(self, in_feats, out_feats, k, bias=True): super(DenseChebConv, self).__init__() self._in_feats = in_feats self._out_feats = out_feats self._k = k with self.name_scope(): self.fc = nn.Sequential() for _ in range(k): self.fc.add( nn.Dense( out_feats, in_units=in_feats, use_bias=False, weight_initializer=mx.init.Xavier( magnitude=math.sqrt(2.0) ), ) ) if bias: self.bias = self.params.get( "bias", shape=(out_feats,), init=mx.init.Zero() ) else: self.bias = None def forward(self, adj, feat, lambda_max=None): r""" Description ----------- Compute (Dense) Chebyshev Spectral Graph Convolution layer. Parameters ---------- adj : mxnet.NDArray The adjacency matrix of the graph to apply Graph Convolution on, should be of shape :math:`(N, N)`, where a row represents the destination and a column represents the source. feat : mxnet.NDArray The input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. lambda_max : float or None, optional A float value indicates the largest eigenvalue of given graph. Default: None. Returns ------- mxnet.NDArray The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is size of output feature. """ A = adj.astype(feat.dtype).as_in_context(feat.context) num_nodes = A.shape[0] in_degree = 1.0 / nd.clip(A.sum(axis=1), 1, float("inf")).sqrt() D_invsqrt = nd.diag(in_degree) I = nd.eye(num_nodes, ctx=A.context) L = I - nd.dot(D_invsqrt, nd.dot(A, D_invsqrt)) if lambda_max is None: # NOTE(zihao): this only works for directed graph. lambda_max = (nd.linalg.syevd(L)[1]).max() L_hat = 2 * L / lambda_max - I Z = [nd.eye(num_nodes, ctx=A.context)] Zh = self.fc[0](feat) for i in range(1, self._k): if i == 1: Z.append(L_hat) else: Z.append(2 * nd.dot(L_hat, Z[-1]) - Z[-2]) Zh = Zh + nd.dot(Z[i], self.fc[i](feat)) if self.bias is not None: Zh = Zh + self.bias.data(feat.context) return Zh ================================================ FILE: python/dgl/nn/mxnet/conv/densegraphconv.py ================================================ """MXNet Module for DenseGraphConv""" # pylint: disable= no-member, arguments-differ, invalid-name import math import mxnet as mx from mxnet import nd from mxnet.gluon import nn class DenseGraphConv(nn.Block): """Graph Convolutional layer from `Semi-Supervised Classification with Graph Convolutional Networks `__ We recommend user to use this module when applying graph convolution on dense graphs. Parameters ---------- in_feats : int Input feature size; i.e, the number of dimensions of :math:`h_j^{(l)}`. out_feats : int Output feature size; i.e., the number of dimensions of :math:`h_i^{(l+1)}`. norm : str, optional How to apply the normalizer. If is `'right'`, divide the aggregated messages by each node's in-degrees, which is equivalent to averaging the received messages. If is `'none'`, no normalization is applied. Default is `'both'`, where the :math:`c_{ij}` in the paper is applied. bias : bool, optional If True, adds a learnable bias to the output. Default: ``True``. activation : callable activation function/layer or None, optional If not None, applies an activation function to the updated node features. Default: ``None``. Notes ----- Zero in-degree nodes will lead to all-zero output. A common practice to avoid this is to add a self-loop for each node in the graph, which can be achieved by setting the diagonal of the adjacency matrix to be 1. See also -------- `GraphConv `__ """ def __init__( self, in_feats, out_feats, norm="both", bias=True, activation=None ): super(DenseGraphConv, self).__init__() self._in_feats = in_feats self._out_feats = out_feats self._norm = norm with self.name_scope(): self.weight = self.params.get( "weight", shape=(in_feats, out_feats), init=mx.init.Xavier(magnitude=math.sqrt(2.0)), ) if bias: self.bias = self.params.get( "bias", shape=(out_feats,), init=mx.init.Zero() ) else: self.bias = None self._activation = activation def forward(self, adj, feat): r""" Description ----------- Compute (Dense) Graph Convolution layer. Parameters ---------- adj : mxnet.NDArray The adjacency matrix of the graph to apply Graph Convolution on, when applied to a unidirectional bipartite graph, ``adj`` should be of shape should be of shape :math:`(N_{out}, N_{in})`; when applied to a homo graph, ``adj`` should be of shape :math:`(N, N)`. In both cases, a row represents a destination node while a column represents a source node. feat : mxnet.NDArray The input feature. Returns ------- mxnet.NDArray The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is size of output feature. """ adj = adj.astype(feat.dtype).as_in_context(feat.context) src_degrees = nd.clip(adj.sum(axis=0), a_min=1, a_max=float("inf")) dst_degrees = nd.clip(adj.sum(axis=1), a_min=1, a_max=float("inf")) feat_src = feat if self._norm == "both": norm_src = nd.power(src_degrees, -0.5) shp_src = norm_src.shape + (1,) * (feat.ndim - 1) norm_src = norm_src.reshape(shp_src).as_in_context(feat.context) feat_src = feat_src * norm_src if self._in_feats > self._out_feats: # mult W first to reduce the feature size for aggregation. feat_src = nd.dot(feat_src, self.weight.data(feat_src.context)) rst = nd.dot(adj, feat_src) else: # aggregate first then mult W rst = nd.dot(adj, feat_src) rst = nd.dot(rst, self.weight.data(feat_src.context)) if self._norm != "none": if self._norm == "both": norm_dst = nd.power(dst_degrees, -0.5) else: # right norm_dst = 1.0 / dst_degrees shp_dst = norm_dst.shape + (1,) * (feat.ndim - 1) norm_dst = norm_dst.reshape(shp_dst).as_in_context(feat.context) rst = rst * norm_dst if self.bias is not None: rst = rst + self.bias.data(feat.context) if self._activation is not None: rst = self._activation(rst) return rst ================================================ FILE: python/dgl/nn/mxnet/conv/densesageconv.py ================================================ """MXNet Module for DenseGraphSAGE""" # pylint: disable= no-member, arguments-differ, invalid-name import math import mxnet as mx from mxnet import nd from mxnet.gluon import nn from ....utils import check_eq_shape class DenseSAGEConv(nn.Block): """GraphSAGE layer from `Inductive Representation Learning on Large Graphs `__ We recommend to use this module when appying GraphSAGE on dense graphs. Note that we only support gcn aggregator in DenseSAGEConv. Parameters ---------- in_feats : int Input feature size; i.e, the number of dimensions of :math:`h_i^{(l)}`. out_feats : int Output feature size; i.e, the number of dimensions of :math:`h_i^{(l+1)}`. feat_drop : float, optional Dropout rate on features. Default: 0. bias : bool If True, adds a learnable bias to the output. Default: ``True``. norm : callable activation function/layer or None, optional If not None, applies normalization to the updated node features. activation : callable activation function/layer or None, optional If not None, applies an activation function to the updated node features. Default: ``None``. See also -------- `SAGEConv `__ """ def __init__( self, in_feats, out_feats, feat_drop=0.0, bias=True, norm=None, activation=None, ): super(DenseSAGEConv, self).__init__() self._in_feats = in_feats self._out_feats = out_feats self._norm = norm with self.name_scope(): self.feat_drop = nn.Dropout(feat_drop) self.activation = activation self.fc = nn.Dense( out_feats, in_units=in_feats, use_bias=bias, weight_initializer=mx.init.Xavier(magnitude=math.sqrt(2.0)), ) def forward(self, adj, feat): r""" Description ----------- Compute (Dense) Graph SAGE layer. Parameters ---------- adj : mxnet.NDArray The adjacency matrix of the graph to apply SAGE Convolution on, when applied to a unidirectional bipartite graph, ``adj`` should be of shape should be of shape :math:`(N_{out}, N_{in})`; when applied to a homo graph, ``adj`` should be of shape :math:`(N, N)`. In both cases, a row represents a destination node while a column represents a source node. feat : mxnet.NDArray or a pair of mxnet.NDArray If a mxnet.NDArray is given, the input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of mxnet.NDArray is given, the pair must contain two tensors of shape :math:`(N_{in}, D_{in})` and :math:`(N_{out}, D_{in})`. Returns ------- mxnet.NDArray The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is size of output feature. """ check_eq_shape(feat) if isinstance(feat, tuple): feat_src = self.feat_drop(feat[0]) feat_dst = self.feat_drop(feat[1]) else: feat_src = feat_dst = self.feat_drop(feat) adj = adj.astype(feat_src.dtype).as_in_context(feat_src.context) in_degrees = adj.sum(axis=1, keepdims=True) h_neigh = (nd.dot(adj, feat_src) + feat_dst) / (in_degrees + 1) rst = self.fc(h_neigh) # activation if self.activation is not None: rst = self.activation(rst) # normalization if self._norm is not None: rst = self._norm(rst) return rst ================================================ FILE: python/dgl/nn/mxnet/conv/edgeconv.py ================================================ """MXNet Module for EdgeConv Layer""" # pylint: disable= no-member, arguments-differ, invalid-name import mxnet as mx from mxnet.gluon import nn from .... import function as fn from ....base import DGLError from ....utils import expand_as_pair class EdgeConv(nn.Block): r"""EdgeConv layer from `Dynamic Graph CNN for Learning on Point Clouds `__ It can be described as follows: .. math:: h_i^{(l+1)} = \max_{j \in \mathcal{N}(i)} ( \Theta \cdot (h_j^{(l)} - h_i^{(l)}) + \Phi \cdot h_i^{(l)}) where :math:`\mathcal{N}(i)` is the neighbor of :math:`i`. :math:`\Theta` and :math:`\Phi` are linear layers. .. note:: The original formulation includes a ReLU inside the maximum operator. This is equivalent to first applying a maximum operator then applying the ReLU. Parameters ---------- in_feat : int Input feature size; i.e, the number of dimensions of :math:`h_j^{(l)}`. out_feat : int Output feature size; i.e., the number of dimensions of :math:`h_i^{(l+1)}`. batch_norm : bool Whether to include batch normalization on messages. Default: ``False``. allow_zero_in_degree : bool, optional If there are 0-in-degree nodes in the graph, output for those nodes will be invalid since no message will be passed to those nodes. This is harmful for some applications causing silent performance regression. This module will raise a DGLError if it detects 0-in-degree nodes in input graph. By setting ``True``, it will suppress the check and let the users handle it by themselves. Default: ``False``. Note ---- Zero in-degree nodes will lead to invalid output value. This is because no message will be passed to those nodes, the aggregation function will be appied on empty input. A common practice to avoid this is to add a self-loop for each node in the graph if it is homogeneous, which can be achieved by: >>> g = ... # a DGLGraph >>> g = dgl.add_self_loop(g) Calling ``add_self_loop`` will not work for some graphs, for example, heterogeneous graph since the edge type can not be decided for self_loop edges. Set ``allow_zero_in_degree`` to ``True`` for those cases to unblock the code and handle zero-in-degree nodes manually. A common practise to handle this is to filter out the nodes with zero-in-degree when use after conv. Examples -------- >>> import dgl >>> import numpy as np >>> import mxnet as mx >>> from mxnet import gluon >>> from dgl.nn import EdgeConv >>> >>> # Case 1: Homogeneous graph >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> g = dgl.add_self_loop(g) >>> feat = mx.nd.ones((6, 10)) >>> conv = EdgeConv(10, 2) >>> conv.initialize(ctx=mx.cpu(0)) >>> res = conv(g, feat) >>> res [[1.0517545 0.8091326] [1.0517545 0.8091326] [1.0517545 0.8091326] [1.0517545 0.8091326] [1.0517545 0.8091326] [1.0517545 0.8091326]] >>> # Case 2: Unidirectional bipartite graph >>> u = [0, 1, 0, 0, 1] >>> v = [0, 1, 2, 3, 2] >>> g = dgl.bipartite((u, v)) >>> u_fea = mx.nd.random.randn(2, 5) >>> v_fea = mx.nd.random.randn(4, 5) >>> conv = EdgeConv(5, 2, 3) >>> conv.initialize(ctx=mx.cpu(0)) >>> res = conv(g, (u_fea, v_fea)) >>> res [[-3.4617817 0.84700686] [ 1.3170856 -1.5731761 ] [-2.0761423 0.56653017] [-1.015364 0.78919804]] """ def __init__( self, in_feat, out_feat, batch_norm=False, allow_zero_in_degree=False ): super(EdgeConv, self).__init__() self.batch_norm = batch_norm self._allow_zero_in_degree = allow_zero_in_degree with self.name_scope(): self.theta = nn.Dense( out_feat, in_units=in_feat, weight_initializer=mx.init.Xavier() ) self.phi = nn.Dense( out_feat, in_units=in_feat, weight_initializer=mx.init.Xavier() ) if batch_norm: self.bn = nn.BatchNorm(in_channels=out_feat) def set_allow_zero_in_degree(self, set_value): r""" Description ----------- Set allow_zero_in_degree flag. Parameters ---------- set_value : bool The value to be set to the flag. """ self._allow_zero_in_degree = set_value def forward(self, g, h): """ Description ----------- Forward computation Parameters ---------- g : DGLGraph The graph. feat : mxnet.NDArray or pair of mxnet.NDArray :math:`(N, D)` where :math:`N` is the number of nodes and :math:`D` is the number of feature dimensions. If a pair of mxnet.NDArray is given, the graph must be a uni-bipartite graph with only one edge type, and the two tensors must have the same dimensionality on all except the first axis. Returns ------- mxnet.NDArray New node features. Raises ------ DGLError If there are 0-in-degree nodes in the input graph, it will raise DGLError since no message will be passed to those nodes. This will cause invalid output. The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``. """ with g.local_scope(): if not self._allow_zero_in_degree: if g.in_degrees().min() == 0: raise DGLError( "There are 0-in-degree nodes in the graph, " "output for those nodes will be invalid. " "This is harmful for some applications, " "causing silent performance regression. " "Adding self-loop on the input graph by " "calling `g = dgl.add_self_loop(g)` will resolve " "the issue. Setting ``allow_zero_in_degree`` " "to be `True` when constructing this module will " "suppress the check and let the code run." ) h_src, h_dst = expand_as_pair(h, g) g.srcdata["x"] = h_src g.dstdata["x"] = h_dst g.apply_edges(fn.v_sub_u("x", "x", "theta")) g.edata["theta"] = self.theta(g.edata["theta"]) g.dstdata["phi"] = self.phi(g.dstdata["x"]) if not self.batch_norm: g.update_all(fn.e_add_v("theta", "phi", "e"), fn.max("e", "x")) else: g.apply_edges(fn.e_add_v("theta", "phi", "e")) g.edata["e"] = self.bn(g.edata["e"]) g.update_all(fn.copy_e("e", "m"), fn.max("m", "x")) return g.dstdata["x"] ================================================ FILE: python/dgl/nn/mxnet/conv/gatconv.py ================================================ """MXNet modules for graph attention networks(GAT).""" # pylint: disable= no-member, arguments-differ, invalid-name import math import mxnet as mx from mxnet.gluon import nn from mxnet.gluon.contrib.nn import Identity from .... import function as fn from ....base import DGLError from ....utils import expand_as_pair from ...functional import edge_softmax # pylint: enable=W0235 class GATConv(nn.Block): r"""Graph attention layer from `Graph Attention Network `__ .. math:: h_i^{(l+1)} = \sum_{j\in \mathcal{N}(i)} \alpha_{i,j} W^{(l)} h_j^{(l)} where :math:`\alpha_{ij}` is the attention score bewteen node :math:`i` and node :math:`j`: .. math:: \alpha_{ij}^{l} &= \mathrm{softmax_i} (e_{ij}^{l}) e_{ij}^{l} &= \mathrm{LeakyReLU}\left(\vec{a}^T [W h_{i} \| W h_{j}]\right) Parameters ---------- in_feats : int, or pair of ints Input feature size; i.e, the number of dimensions of :math:`h_i^{(l)}`. GATConv can be applied on homogeneous graph and unidirectional `bipartite graph `__. If the layer is to be applied to a unidirectional bipartite graph, ``in_feats`` specifies the input feature size on both the source and destination nodes. If a scalar is given, the source and destination node feature size would take the same value. out_feats : int Output feature size; i.e, the number of dimensions of :math:`h_i^{(l+1)}`. num_heads : int Number of heads in Multi-Head Attention. feat_drop : float, optional Dropout rate on feature. Defaults: ``0``. attn_drop : float, optional Dropout rate on attention weight. Defaults: ``0``. negative_slope : float, optional LeakyReLU angle of negative slope. Defaults: ``0.2``. residual : bool, optional If True, use residual connection. Defaults: ``False``. activation : callable activation function/layer or None, optional. If not None, applies an activation function to the updated node features. Default: ``None``. allow_zero_in_degree : bool, optional If there are 0-in-degree nodes in the graph, output for those nodes will be invalid since no message will be passed to those nodes. This is harmful for some applications causing silent performance regression. This module will raise a DGLError if it detects 0-in-degree nodes in input graph. By setting ``True``, it will suppress the check and let the users handle it by themselves. Defaults: ``False``. Note ---- Zero in-degree nodes will lead to invalid output value. This is because no message will be passed to those nodes, the aggregation function will be appied on empty input. A common practice to avoid this is to add a self-loop for each node in the graph if it is homogeneous, which can be achieved by: >>> g = ... # a DGLGraph >>> g = dgl.add_self_loop(g) Calling ``add_self_loop`` will not work for some graphs, for example, heterogeneous graph since the edge type can not be decided for self_loop edges. Set ``allow_zero_in_degree`` to ``True`` for those cases to unblock the code and handle zero-in-degree nodes manually. A common practise to handle this is to filter out the nodes with zero-in-degree when use after conv. Examples -------- >>> import dgl >>> import numpy as np >>> import mxnet as mx >>> from mxnet import gluon >>> from dgl.nn import GATConv >>> >>> # Case 1: Homogeneous graph >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> g = dgl.add_self_loop(g) >>> feat = mx.nd.ones((6, 10)) >>> gatconv = GATConv(10, 2, num_heads=3) >>> gatconv.initialize(ctx=mx.cpu(0)) >>> res = gatconv(g, feat) >>> res [[[ 0.32368395 -0.10501936] [ 1.0839728 0.92690575] [-0.54581136 -0.84279203]] [[ 0.32368395 -0.10501936] [ 1.0839728 0.92690575] [-0.54581136 -0.84279203]] [[ 0.32368395 -0.10501936] [ 1.0839728 0.92690575] [-0.54581136 -0.84279203]] [[ 0.32368395 -0.10501937] [ 1.0839728 0.9269058 ] [-0.5458114 -0.8427921 ]] [[ 0.32368395 -0.10501936] [ 1.0839728 0.92690575] [-0.54581136 -0.84279203]] [[ 0.32368395 -0.10501936] [ 1.0839728 0.92690575] [-0.54581136 -0.84279203]]] >>> # Case 2: Unidirectional bipartite graph >>> u = [0, 1, 0, 0, 1] >>> v = [0, 1, 2, 3, 2] >>> g = dgl.heterograph({('A', 'r', 'B'): (u, v)}) >>> u_feat = mx.nd.random.randn(2, 5) >>> v_feat = mx.nd.random.randn(4, 10) >>> gatconv = GATConv((5,10), 2, 3) >>> gatconv.initialize(ctx=mx.cpu(0)) >>> res = gatconv(g, (u_feat, v_feat)) >>> res [[[-1.01624 1.8138596 ] [ 1.2322129 -0.8410206 ] [-1.9325689 1.3824553 ]] [[ 0.9915016 -1.6564168 ] [-0.32610354 0.42505783] [ 1.5278397 -0.92114615]] [[-0.32592064 0.62067866] [ 0.6162219 -0.3405491 ] [-1.356375 0.9988818 ]] [[-1.01624 1.8138596 ] [ 1.2322129 -0.8410206 ] [-1.9325689 1.3824553 ]]] """ def __init__( self, in_feats, out_feats, num_heads, feat_drop=0.0, attn_drop=0.0, negative_slope=0.2, residual=False, activation=None, allow_zero_in_degree=False, ): super(GATConv, self).__init__() self._num_heads = num_heads self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._in_feats = in_feats self._out_feats = out_feats self._allow_zero_in_degree = allow_zero_in_degree with self.name_scope(): if isinstance(in_feats, tuple): self.fc_src = nn.Dense( out_feats * num_heads, use_bias=False, weight_initializer=mx.init.Xavier(magnitude=math.sqrt(2.0)), in_units=self._in_src_feats, ) self.fc_dst = nn.Dense( out_feats * num_heads, use_bias=False, weight_initializer=mx.init.Xavier(magnitude=math.sqrt(2.0)), in_units=self._in_dst_feats, ) else: self.fc = nn.Dense( out_feats * num_heads, use_bias=False, weight_initializer=mx.init.Xavier(magnitude=math.sqrt(2.0)), in_units=in_feats, ) self.attn_l = self.params.get( "attn_l", shape=(1, num_heads, out_feats), init=mx.init.Xavier(magnitude=math.sqrt(2.0)), ) self.attn_r = self.params.get( "attn_r", shape=(1, num_heads, out_feats), init=mx.init.Xavier(magnitude=math.sqrt(2.0)), ) self.feat_drop = nn.Dropout(feat_drop) self.attn_drop = nn.Dropout(attn_drop) self.leaky_relu = nn.LeakyReLU(negative_slope) if residual: if in_feats != out_feats: self.res_fc = nn.Dense( out_feats * num_heads, use_bias=False, weight_initializer=mx.init.Xavier( magnitude=math.sqrt(2.0) ), in_units=in_feats, ) else: self.res_fc = Identity() else: self.res_fc = None self.activation = activation def set_allow_zero_in_degree(self, set_value): r""" Description ----------- Set allow_zero_in_degree flag. Parameters ---------- set_value : bool The value to be set to the flag. """ self._allow_zero_in_degree = set_value def forward(self, graph, feat, get_attention=False): r""" Description ----------- Compute graph attention network layer. Parameters ---------- graph : DGLGraph The graph. feat : mxnet.NDArray or pair of mxnet.NDArray If a mxnet.NDArray is given, the input feature of shape :math:`(N, *, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of mxnet.NDArray is given, the pair must contain two tensors of shape :math:`(N_{in}, *, D_{in_{src}})` and :math:`(N_{out}, *, D_{in_{dst}})`. get_attention : bool, optional Whether to return the attention values. Default to False. Returns ------- mxnet.NDArray The output feature of shape :math:`(N, *, H, D_{out})` where :math:`H` is the number of heads, and :math:`D_{out}` is size of output feature. mxnet.NDArray, optional The attention values of shape :math:`(E, *, H, 1)`, where :math:`E` is the number of edges. This is returned only when :attr:`get_attention` is ``True``. Raises ------ DGLError If there are 0-in-degree nodes in the input graph, it will raise DGLError since no message will be passed to those nodes. This will cause invalid output. The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``. """ with graph.local_scope(): if not self._allow_zero_in_degree: if graph.in_degrees().min() == 0: raise DGLError( "There are 0-in-degree nodes in the graph, " "output for those nodes will be invalid. " "This is harmful for some applications, " "causing silent performance regression. " "Adding self-loop on the input graph by " "calling `g = dgl.add_self_loop(g)` will resolve " "the issue. Setting ``allow_zero_in_degree`` " "to be `True` when constructing this module will " "suppress the check and let the code run." ) if isinstance(feat, tuple): src_prefix_shape = feat[0].shape[:-1] dst_prefix_shape = feat[1].shape[:-1] feat_dim = feat[0].shape[-1] h_src = self.feat_drop(feat[0]) h_dst = self.feat_drop(feat[1]) if not hasattr(self, "fc_src"): self.fc_src, self.fc_dst = self.fc, self.fc feat_src = self.fc_src(h_src.reshape(-1, feat_dim)).reshape( *src_prefix_shape, self._num_heads, self._out_feats ) feat_dst = self.fc_dst(h_dst.reshape(-1, feat_dim)).reshape( *dst_prefix_shape, self._num_heads, self._out_feats ) else: src_prefix_shape = dst_prefix_shape = feat.shape[:-1] feat_dim = feat[0].shape[-1] h_src = h_dst = self.feat_drop(feat) feat_src = feat_dst = self.fc( h_src.reshape(-1, feat_dim) ).reshape(*src_prefix_shape, self._num_heads, self._out_feats) if graph.is_block: feat_dst = feat_src[: graph.number_of_dst_nodes()] h_dst = h_dst[: graph.number_of_dst_nodes()] dst_prefix_shape = ( graph.number_of_dst_nodes(), ) + dst_prefix_shape[1:] # NOTE: GAT paper uses "first concatenation then linear projection" # to compute attention scores, while ours is "first projection then # addition", the two approaches are mathematically equivalent: # We decompose the weight vector a mentioned in the paper into # [a_l || a_r], then # a^T [Wh_i || Wh_j] = a_l Wh_i + a_r Wh_j # Our implementation is much efficient because we do not need to # save [Wh_i || Wh_j] on edges, which is not memory-efficient. Plus, # addition could be optimized with DGL's built-in function u_add_v, # which further speeds up computation and saves memory footprint. el = ( (feat_src * self.attn_l.data(feat_src.context)) .sum(axis=-1) .expand_dims(-1) ) er = ( (feat_dst * self.attn_r.data(feat_src.context)) .sum(axis=-1) .expand_dims(-1) ) graph.srcdata.update({"ft": feat_src, "el": el}) graph.dstdata.update({"er": er}) # compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively. graph.apply_edges(fn.u_add_v("el", "er", "e")) e = self.leaky_relu(graph.edata.pop("e")) # compute softmax graph.edata["a"] = self.attn_drop(edge_softmax(graph, e)) graph.update_all(fn.u_mul_e("ft", "a", "m"), fn.sum("m", "ft")) rst = graph.dstdata["ft"] # residual if self.res_fc is not None: resval = self.res_fc(h_dst.reshape(-1, feat_dim)).reshape( *dst_prefix_shape, -1, self._out_feats ) rst = rst + resval # activation if self.activation: rst = self.activation(rst) if get_attention: return rst, graph.edata["a"] else: return rst ================================================ FILE: python/dgl/nn/mxnet/conv/gatedgraphconv.py ================================================ """MXNet Module for Gated Graph Convolution layer""" # pylint: disable= no-member, arguments-differ, invalid-name, cell-var-from-loop import mxnet as mx from mxnet import gluon, nd from mxnet.gluon import nn from .... import function as fn class GatedGraphConv(nn.Block): r"""Gated Graph Convolution layer from `Gated Graph Sequence Neural Networks `__ .. math:: h_{i}^{0} &= [ x_i \| \mathbf{0} ] a_{i}^{t} &= \sum_{j\in\mathcal{N}(i)} W_{e_{ij}} h_{j}^{t} h_{i}^{t+1} &= \mathrm{GRU}(a_{i}^{t}, h_{i}^{t}) Parameters ---------- in_feats : int Input feature size; i.e, the number of dimensions of :math:`x_i`. out_feats : int Output feature size; i.e., the number of dimensions of :math:`h_i^{(t+1)}`. n_steps : int Number of recurrent steps; i.e, the :math:`t` in the above formula. n_etypes : int Number of edge types. bias : bool If True, adds a learnable bias to the output. Default: ``True``. Can only be set to True in MXNet. Example ------- >>> import dgl >>> import numpy as np >>> import mxnet as mx >>> from dgl.nn import GatedGraphConv >>> >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> feat = mx.nd.ones((6, 10)) >>> conv = GatedGraphConv(10, 10, 2, 3) >>> conv.initialize(ctx=mx.cpu(0)) >>> etype = mx.nd.array([0,1,2,0,1,2]) >>> res = conv(g, feat, etype) >>> res [[0.24378185 0.17402579 0.2644723 0.2740628 0.14041871 0.32523093 0.2703067 0.18234392 0.32777587 0.30957845] [0.17872348 0.28878236 0.2509409 0.20139427 0.3355541 0.22643831 0.2690711 0.22341749 0.27995753 0.21575949] [0.23911178 0.16696918 0.26120248 0.27397877 0.13745922 0.3223175 0.27561218 0.18071817 0.3251124 0.30608907] [0.25242943 0.3098581 0.25249368 0.27968448 0.24624602 0.12270881 0.335147 0.31550157 0.19065917 0.21087633] [0.17503153 0.29523152 0.2474858 0.20848347 0.3526433 0.23443702 0.24741334 0.21986549 0.28935105 0.21859099] [0.2159364 0.26942077 0.23083271 0.28329757 0.24758333 0.24230732 0.23958017 0.23430146 0.26431587 0.27001363]] """ def __init__(self, in_feats, out_feats, n_steps, n_etypes, bias=True): super(GatedGraphConv, self).__init__() self._in_feats = in_feats self._out_feats = out_feats self._n_steps = n_steps self._n_etypes = n_etypes if not bias: raise KeyError("MXNet do not support disabling bias in GRUCell.") with self.name_scope(): self.linears = nn.Sequential() for _ in range(n_etypes): self.linears.add( nn.Dense( out_feats, weight_initializer=mx.init.Xavier(), in_units=out_feats, ) ) self.gru = gluon.rnn.GRUCell(out_feats, input_size=out_feats) def forward(self, graph, feat, etypes): """Compute Gated Graph Convolution layer. Parameters ---------- graph : DGLGraph The graph. feat : mxnet.NDArray The input feature of shape :math:`(N, D_{in})` where :math:`N` is the number of nodes of the graph and :math:`D_{in}` is the input feature size. etypes : torch.LongTensor The edge type tensor of shape :math:`(E,)` where :math:`E` is the number of edges of the graph. Returns ------- mxnet.NDArray The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is the output feature size. """ with graph.local_scope(): assert graph.is_homogeneous, ( "not a homogeneous graph; convert it with to_homogeneous " "and pass in the edge type as argument" ) zero_pad = nd.zeros( (feat.shape[0], self._out_feats - feat.shape[1]), ctx=feat.context, ) feat = nd.concat(feat, zero_pad, dim=-1) for _ in range(self._n_steps): graph.ndata["h"] = feat for i in range(self._n_etypes): eids = (etypes.asnumpy() == i).nonzero()[0] eids = ( nd.from_numpy(eids, zero_copy=True) .as_in_context(feat.context) .astype(graph.idtype) ) if len(eids) > 0: graph.apply_edges( lambda edges: { "W_e*h": self.linears[i](edges.src["h"]) }, eids, ) graph.update_all(fn.copy_e("W_e*h", "m"), fn.sum("m", "a")) a = graph.ndata.pop("a") feat = self.gru(a, [feat])[0] return feat ================================================ FILE: python/dgl/nn/mxnet/conv/ginconv.py ================================================ """MXNet Module for Graph Isomorphism Network layer""" # pylint: disable= no-member, arguments-differ, invalid-name import mxnet as mx from mxnet.gluon import nn from .... import function as fn from ....utils import expand_as_pair class GINConv(nn.Block): r"""Graph Isomorphism layer from `How Powerful are Graph Neural Networks? `__ .. math:: h_i^{(l+1)} = f_\Theta \left((1 + \epsilon) h_i^{l} + \mathrm{aggregate}\left(\left\{h_j^{l}, j\in\mathcal{N}(i) \right\}\right)\right) Parameters ---------- apply_func : callable activation function/layer or None If not None, apply this function to the updated node feature, the :math:`f_\Theta` in the formula. aggregator_type : str Aggregator type to use (``sum``, ``max`` or ``mean``). init_eps : float, optional Initial :math:`\epsilon` value, default: ``0``. learn_eps : bool, optional If True, :math:`\epsilon` will be a learnable parameter. Default: ``False``. Example ------- >>> import dgl >>> import numpy as np >>> import mxnet as mx >>> from mxnet import gluon >>> from dgl.nn import GINConv >>> >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> feat = mx.nd.ones((6, 10)) >>> lin = gluon.nn.Dense(10) >>> lin.initialize(ctx=mx.cpu(0)) >>> conv = GINConv(lin, 'max') >>> conv.initialize(ctx=mx.cpu(0)) >>> res = conv(g, feat) >>> res [[ 0.44832918 -0.05283341 0.20823681 0.16020004 0.37311912 -0.03372726 -0.05716725 -0.20730163 0.14121324 0.46083626] [ 0.44832918 -0.05283341 0.20823681 0.16020004 0.37311912 -0.03372726 -0.05716725 -0.20730163 0.14121324 0.46083626] [ 0.44832918 -0.05283341 0.20823681 0.16020004 0.37311912 -0.03372726 -0.05716725 -0.20730163 0.14121324 0.46083626] [ 0.44832918 -0.05283341 0.20823681 0.16020004 0.37311912 -0.03372726 -0.05716725 -0.20730163 0.14121324 0.46083626] [ 0.44832918 -0.05283341 0.20823681 0.16020004 0.37311912 -0.03372726 -0.05716725 -0.20730163 0.14121324 0.46083626] [ 0.22416459 -0.0264167 0.10411841 0.08010002 0.18655956 -0.01686363 -0.02858362 -0.10365082 0.07060662 0.23041813]] """ def __init__( self, apply_func, aggregator_type, init_eps=0, learn_eps=False ): super(GINConv, self).__init__() if aggregator_type == "sum": self._reducer = fn.sum elif aggregator_type == "max": self._reducer = fn.max elif aggregator_type == "mean": self._reducer = fn.mean else: raise KeyError( "Aggregator type {} not recognized.".format(aggregator_type) ) with self.name_scope(): self.apply_func = apply_func self.eps = self.params.get( "eps", shape=(1,), grad_req="write" if learn_eps else "null", init=mx.init.Constant(init_eps), ) def forward(self, graph, feat): r""" Description ----------- Compute Graph Isomorphism Network layer. Parameters ---------- graph : DGLGraph The graph. feat : mxnet.NDArray or a pair of mxnet.NDArray If a mxnet.NDArray is given, the input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of mxnet.NDArray is given, the pair must contain two tensors of shape :math:`(N_{in}, D_{in})` and :math:`(N_{out}, D_{in})`. If ``apply_func`` is not None, :math:`D_{in}` should fit the input dimensionality requirement of ``apply_func``. Returns ------- mxnet.NDArray The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is the output dimensionality of ``apply_func``. If ``apply_func`` is None, :math:`D_{out}` should be the same as input dimensionality. """ with graph.local_scope(): feat_src, feat_dst = expand_as_pair(feat, graph) graph.srcdata["h"] = feat_src graph.update_all(fn.copy_u("h", "m"), self._reducer("m", "neigh")) rst = ( 1 + self.eps.data(feat_dst.context) ) * feat_dst + graph.dstdata["neigh"] if self.apply_func is not None: rst = self.apply_func(rst) return rst ================================================ FILE: python/dgl/nn/mxnet/conv/gmmconv.py ================================================ """Torch Module for GMM Conv""" # pylint: disable= no-member, arguments-differ, invalid-name import math import mxnet as mx from mxnet import nd from mxnet.gluon import nn from mxnet.gluon.contrib.nn import Identity from .... import function as fn from ....base import DGLError from ....utils import expand_as_pair class GMMConv(nn.Block): r"""Gaussian Mixture Model Convolution layer from `Geometric Deep Learning on Graphs and Manifolds using Mixture Model CNNs `__ .. math:: u_{ij} &= f(x_i, x_j), x_j \in \mathcal{N}(i) w_k(u) &= \exp\left(-\frac{1}{2}(u-\mu_k)^T \Sigma_k^{-1} (u - \mu_k)\right) h_i^{l+1} &= \mathrm{aggregate}\left(\left\{\frac{1}{K} \sum_{k}^{K} w_k(u_{ij}), \forall j\in \mathcal{N}(i)\right\}\right) where :math:`u` denotes the pseudo-coordinates between a vertex and one of its neighbor, computed using function :math:`f`, :math:`\Sigma_k^{-1}` and :math:`\mu_k` are learnable parameters representing the covariance matrix and mean vector of a Gaussian kernel. Parameters ---------- in_feats : int Number of input features; i.e., the number of dimensions of :math:`x_i`. out_feats : int Number of output features; i.e., the number of dimensions of :math:`h_i^{(l+1)}`. dim : int Dimensionality of pseudo-coordinte; i.e, the number of dimensions of :math:`u_{ij}`. n_kernels : int Number of kernels :math:`K`. aggregator_type : str Aggregator type (``sum``, ``mean``, ``max``). Default: ``sum``. residual : bool If True, use residual connection inside this layer. Default: ``False``. bias : bool If True, adds a learnable bias to the output. Default: ``True``. allow_zero_in_degree : bool, optional If there are 0-in-degree nodes in the graph, output for those nodes will be invalid since no message will be passed to those nodes. This is harmful for some applications causing silent performance regression. This module will raise a DGLError if it detects 0-in-degree nodes in input graph. By setting ``True``, it will suppress the check and let the users handle it by themselves. Default: ``False``. Note ---- Zero in-degree nodes will lead to invalid output value. This is because no message will be passed to those nodes, the aggregation function will be appied on empty input. A common practice to avoid this is to add a self-loop for each node in the graph if it is homogeneous, which can be achieved by: >>> g = ... # a DGLGraph >>> g = dgl.add_self_loop(g) Calling ``add_self_loop`` will not work for some graphs, for example, heterogeneous graph since the edge type can not be decided for self_loop edges. Set ``allow_zero_in_degree`` to ``True`` for those cases to unblock the code and handle zero-in-degree nodes manually. A common practise to handle this is to filter out the nodes with zero-in-degree when use after conv. Examples -------- >>> import dgl >>> import numpy as np >>> import mxnet as mx >>> from dgl.nn import GMMConv >>> >>> # Case 1: Homogeneous graph >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> g = dgl.add_self_loop(g) >>> feat = mx.nd.ones((6, 10)) >>> conv = GMMConv(10, 2, 3, 2, 'mean') >>> conv.initialize(ctx=mx.cpu(0)) >>> pseudo = mx.nd.ones((12, 3)) >>> res = conv(g, feat, pseudo) >>> res [[-0.05083769 -0.1567954 ] [-0.05083769 -0.1567954 ] [-0.05083769 -0.1567954 ] [-0.05083769 -0.1567954 ] [-0.05083769 -0.1567954 ] [-0.05083769 -0.1567954 ]] >>> # Case 2: Unidirectional bipartite graph >>> u = [0, 1, 0, 0, 1] >>> v = [0, 1, 2, 3, 2] >>> g = dgl.heterograph({('_N', '_E', '_N'):(u, v)}) >>> u_fea = mx.nd.random.randn(2, 5) >>> v_fea = mx.nd.random.randn(4, 10) >>> pseudo = mx.nd.ones((5, 3)) >>> conv = GMMConv((5, 10), 2, 3, 2, 'mean') >>> conv.initialize(ctx=mx.cpu(0)) >>> res = conv(g, (u_fea, v_fea), pseudo) >>> res [[-0.1005067 -0.09494358] [-0.0023314 -0.07597432] [-0.05141905 -0.08545895] [-0.1005067 -0.09494358]] """ def __init__( self, in_feats, out_feats, dim, n_kernels, aggregator_type="sum", residual=False, bias=True, allow_zero_in_degree=False, ): super(GMMConv, self).__init__() self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats self._dim = dim self._n_kernels = n_kernels self._allow_zero_in_degree = allow_zero_in_degree if aggregator_type == "sum": self._reducer = fn.sum elif aggregator_type == "mean": self._reducer = fn.mean elif aggregator_type == "max": self._reducer = fn.max else: raise KeyError( "Aggregator type {} not recognized.".format(aggregator_type) ) with self.name_scope(): self.mu = self.params.get( "mu", shape=(n_kernels, dim), init=mx.init.Normal(0.1) ) self.inv_sigma = self.params.get( "inv_sigma", shape=(n_kernels, dim), init=mx.init.Constant(1) ) self.fc = nn.Dense( n_kernels * out_feats, in_units=self._in_src_feats, use_bias=False, weight_initializer=mx.init.Xavier(magnitude=math.sqrt(2.0)), ) if residual: if self._in_dst_feats != out_feats: self.res_fc = nn.Dense( out_feats, in_units=self._in_dst_feats, use_bias=False ) else: self.res_fc = Identity() else: self.res_fc = None if bias: self.bias = self.params.get( "bias", shape=(out_feats,), init=mx.init.Zero() ) else: self.bias = None def set_allow_zero_in_degree(self, set_value): r""" Description ----------- Set allow_zero_in_degree flag. Parameters ---------- set_value : bool The value to be set to the flag. """ self._allow_zero_in_degree = set_value def forward(self, graph, feat, pseudo): """ Description ----------- Compute Gaussian Mixture Model Convolution layer. Parameters ---------- graph : DGLGraph The graph. feat : mxnet.NDArray If a single tensor is given, the input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of tensors are given, the pair must contain two tensors of shape :math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`. pseudo : mxnet.NDArray The pseudo coordinate tensor of shape :math:`(E, D_{u})` where :math:`E` is the number of edges of the graph and :math:`D_{u}` is the dimensionality of pseudo coordinate. Returns ------- mxnet.NDArray The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is the output feature size. Raises ------ DGLError If there are 0-in-degree nodes in the input graph, it will raise DGLError since no message will be passed to those nodes. This will cause invalid output. The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``. """ if not self._allow_zero_in_degree: if graph.in_degrees().min() == 0: raise DGLError( "There are 0-in-degree nodes in the graph, " "output for those nodes will be invalid. " "This is harmful for some applications, " "causing silent performance regression. " "Adding self-loop on the input graph by " "calling `g = dgl.add_self_loop(g)` will resolve " "the issue. Setting ``allow_zero_in_degree`` " "to be `True` when constructing this module will " "suppress the check and let the code run." ) feat_src, feat_dst = expand_as_pair(feat, graph) with graph.local_scope(): graph.srcdata["h"] = self.fc(feat_src).reshape( -1, self._n_kernels, self._out_feats ) E = graph.num_edges() # compute gaussian weight gaussian = -0.5 * ( ( pseudo.reshape(E, 1, self._dim) - self.mu.data(feat_src.context).reshape( 1, self._n_kernels, self._dim ) ) ** 2 ) gaussian = gaussian * ( self.inv_sigma.data(feat_src.context).reshape( 1, self._n_kernels, self._dim ) ** 2 ) gaussian = nd.exp(gaussian.sum(axis=-1, keepdims=True)) # (E, K, 1) graph.edata["w"] = gaussian graph.update_all(fn.u_mul_e("h", "w", "m"), self._reducer("m", "h")) rst = graph.dstdata["h"].sum(1) # residual connection if self.res_fc is not None: rst = rst + self.res_fc(feat_dst) # bias if self.bias is not None: rst = rst + self.bias.data(feat_dst.context) return rst ================================================ FILE: python/dgl/nn/mxnet/conv/graphconv.py ================================================ """MXNet modules for graph convolutions(GCN)""" # pylint: disable= no-member, arguments-differ, invalid-name import math import mxnet as mx from mxnet import gluon from .... import function as fn from ....base import DGLError from ....utils import expand_as_pair class GraphConv(gluon.Block): r"""Graph convolutional layer from `Semi-Supervised Classification with Graph Convolutional Networks `__ Mathematically it is defined as follows: .. math:: h_i^{(l+1)} = \sigma(b^{(l)} + \sum_{j\in\mathcal{N}(i)}\frac{1}{c_{ij}}h_j^{(l)}W^{(l)}) where :math:`\mathcal{N}(i)` is the set of neighbors of node :math:`i`, :math:`c_{ij}` is the product of the square root of node degrees (i.e., :math:`c_{ij} = \sqrt{|\mathcal{N}(i)|}\sqrt{|\mathcal{N}(j)|}`), and :math:`\sigma` is an activation function. Parameters ---------- in_feats : int Input feature size; i.e, the number of dimensions of :math:`h_j^{(l)}`. out_feats : int Output feature size; i.e., the number of dimensions of :math:`h_i^{(l+1)}`. norm : str, optional How to apply the normalizer. Can be one of the following values: * ``right``, to divide the aggregated messages by each node's in-degrees, which is equivalent to averaging the received messages. * ``none``, where no normalization is applied. * ``both`` (default), where the messages are scaled with :math:`1/c_{ji}` above, equivalent to symmetric normalization. * ``left``, to divide the messages sent out from each node by its out-degrees, equivalent to random walk normalization. weight : bool, optional If True, apply a linear layer. Otherwise, aggregating the messages without a weight matrix. bias : bool, optional If True, adds a learnable bias to the output. Default: ``True``. activation : callable activation function/layer or None, optional If not None, applies an activation function to the updated node features. Default: ``None``. allow_zero_in_degree : bool, optional If there are 0-in-degree nodes in the graph, output for those nodes will be invalid since no message will be passed to those nodes. This is harmful for some applications causing silent performance regression. This module will raise a DGLError if it detects 0-in-degree nodes in input graph. By setting ``True``, it will suppress the check and let the users handle it by themselves. Default: ``False``. Attributes ---------- weight : torch.Tensor The learnable weight tensor. bias : torch.Tensor The learnable bias tensor. Note ---- Zero in-degree nodes will lead to invalid output value. This is because no message will be passed to those nodes, the aggregation function will be appied on empty input. A common practice to avoid this is to add a self-loop for each node in the graph if it is homogeneous, which can be achieved by: >>> g = ... # a DGLGraph >>> g = dgl.add_self_loop(g) Calling ``add_self_loop`` will not work for some graphs, for example, heterogeneous graph since the edge type can not be decided for self_loop edges. Set ``allow_zero_in_degree`` to ``True`` for those cases to unblock the code and handle zero-in-degree nodes manually. A common practise to handle this is to filter out the nodes with zero-in-degree when use after conv. Examples -------- >>> import dgl >>> import mxnet as mx >>> from mxnet import gluon >>> import numpy as np >>> from dgl.nn import GraphConv >>> # Case 1: Homogeneous graph >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> g = dgl.add_self_loop(g) >>> feat = mx.nd.ones((6, 10)) >>> conv = GraphConv(10, 2, norm='both', weight=True, bias=True) >>> conv.initialize(ctx=mx.cpu(0)) >>> res = conv(g, feat) >>> print(res) [[1.0209361 0.22472616] [1.1240715 0.24742813] [1.0209361 0.22472616] [1.2924911 0.28450024] [1.3568745 0.29867214] [0.7948386 0.17495811]] >>> # allow_zero_in_degree example >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> conv = GraphConv(10, 2, norm='both', weight=True, bias=True, allow_zero_in_degree=True) >>> res = conv(g, feat) >>> print(res) [[1.0209361 0.22472616] [1.1240715 0.24742813] [1.0209361 0.22472616] [1.2924911 0.28450024] [1.3568745 0.29867214] [0. 0.]] >>> # Case 2: Unidirectional bipartite graph >>> u = [0, 1, 0, 0, 1] >>> v = [0, 1, 2, 3, 2] >>> g = dgl.heterograph({('_N', '_E', '_N'):(u, v)}) >>> u_fea = mx.nd.random.randn(2, 5) >>> v_fea = mx.nd.random.randn(4, 5) >>> conv = GraphConv(5, 2, norm='both', weight=True, bias=True) >>> conv.initialize(ctx=mx.cpu(0)) >>> res = conv(g, (u_fea, v_fea)) >>> res [[ 0.26967263 0.308129 ] [ 0.05143356 -0.11355402] [ 0.22705637 0.1375853 ] [ 0.26967263 0.308129 ]] """ def __init__( self, in_feats, out_feats, norm="both", weight=True, bias=True, activation=None, allow_zero_in_degree=False, ): super(GraphConv, self).__init__() if norm not in ("none", "both", "right", "left"): raise DGLError( 'Invalid norm value. Must be either "none", "both", "right" or "left".' ' But got "{}".'.format(norm) ) self._in_feats = in_feats self._out_feats = out_feats self._norm = norm self._allow_zero_in_degree = allow_zero_in_degree with self.name_scope(): if weight: self.weight = self.params.get( "weight", shape=(in_feats, out_feats), init=mx.init.Xavier(magnitude=math.sqrt(2.0)), ) else: self.weight = None if bias: self.bias = self.params.get( "bias", shape=(out_feats,), init=mx.init.Zero() ) else: self.bias = None self._activation = activation def set_allow_zero_in_degree(self, set_value): r""" Description ----------- Set allow_zero_in_degree flag. Parameters ---------- set_value : bool The value to be set to the flag. """ self._allow_zero_in_degree = set_value def forward(self, graph, feat, weight=None): r""" Description ----------- Compute graph convolution. Parameters ---------- graph : DGLGraph The graph. feat : mxnet.NDArray or pair of mxnet.NDArray If a single tensor is given, it represents the input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of tensors are given, the pair must contain two tensors of shape :math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`. Note that in the special case of graph convolutional networks, if a pair of tensors is given, the latter element will not participate in computation. weight : torch.Tensor, optional Optional external weight tensor. Returns ------- mxnet.NDArray The output feature Raises ------ DGLError If there are 0-in-degree nodes in the input graph, it will raise DGLError since no message will be passed to those nodes. This will cause invalid output. The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``. Note ---- * Input shape: :math:`(N, *, \text{in_feats})` where * means any number of additional dimensions, :math:`N` is the number of nodes. * Output shape: :math:`(N, *, \text{out_feats})` where all but the last dimension are the same shape as the input. * Weight shape: :math:`(\text{in_feats}, \text{out_feats})`. """ with graph.local_scope(): if not self._allow_zero_in_degree: if graph.in_degrees().min() == 0: raise DGLError( "There are 0-in-degree nodes in the graph, " "output for those nodes will be invalid. " "This is harmful for some applications, " "causing silent performance regression. " "Adding self-loop on the input graph by " "calling `g = dgl.add_self_loop(g)` will resolve " "the issue. Setting ``allow_zero_in_degree`` " "to be `True` when constructing this module will " "suppress the check and let the code run." ) feat_src, feat_dst = expand_as_pair(feat, graph) if self._norm in ["both", "left"]: degs = ( graph.out_degrees() .as_in_context(feat_dst.context) .astype("float32") ) degs = mx.nd.clip(degs, a_min=1, a_max=float("inf")) if self._norm == "both": norm = mx.nd.power(degs, -0.5) else: norm = 1.0 / degs shp = norm.shape + (1,) * (feat_src.ndim - 1) norm = norm.reshape(shp) feat_src = feat_src * norm if weight is not None: if self.weight is not None: raise DGLError( "External weight is provided while at the same time the" " module has defined its own weight parameter. Please" " create the module with flag weight=False." ) else: weight = self.weight.data(feat_src.context) if self._in_feats > self._out_feats: # mult W first to reduce the feature size for aggregation. if weight is not None: feat_src = mx.nd.dot(feat_src, weight) graph.srcdata["h"] = feat_src graph.update_all( fn.copy_u(u="h", out="m"), fn.sum(msg="m", out="h") ) rst = graph.dstdata.pop("h") else: # aggregate first then mult W graph.srcdata["h"] = feat_src graph.update_all( fn.copy_u(u="h", out="m"), fn.sum(msg="m", out="h") ) rst = graph.dstdata.pop("h") if weight is not None: rst = mx.nd.dot(rst, weight) if self._norm in ["both", "right"]: degs = ( graph.in_degrees() .as_in_context(feat_dst.context) .astype("float32") ) degs = mx.nd.clip(degs, a_min=1, a_max=float("inf")) if self._norm == "both": norm = mx.nd.power(degs, -0.5) else: norm = 1.0 / degs shp = norm.shape + (1,) * (feat_dst.ndim - 1) norm = norm.reshape(shp) rst = rst * norm if self.bias is not None: rst = rst + self.bias.data(rst.context) if self._activation is not None: rst = self._activation(rst) return rst def __repr__(self): summary = "GraphConv(" summary += "in={:d}, out={:d}, normalization={}, activation={}".format( self._in_feats, self._out_feats, self._norm, self._activation ) summary += ")" return summary ================================================ FILE: python/dgl/nn/mxnet/conv/nnconv.py ================================================ """MXNet Module for NNConv layer""" # pylint: disable= no-member, arguments-differ, invalid-name import mxnet as mx from mxnet.gluon import nn from mxnet.gluon.contrib.nn import Identity from .... import function as fn from ....utils import expand_as_pair class NNConv(nn.Block): r"""Graph Convolution layer from `Neural Message Passing for Quantum Chemistry `__ .. math:: h_{i}^{l+1} = h_{i}^{l} + \mathrm{aggregate}\left(\left\{ f_\Theta (e_{ij}) \cdot h_j^{l}, j\in \mathcal{N}(i) \right\}\right) where :math:`e_{ij}` is the edge feature, :math:`f_\Theta` is a function with learnable parameters. Parameters ---------- in_feats : int Input feature size; i.e, the number of dimensions of :math:`h_j^{(l)}`. NN can be applied on homogeneous graph and unidirectional `bipartite graph `__. If the layer is to be applied on a unidirectional bipartite graph, ``in_feats`` specifies the input feature size on both the source and destination nodes. If a scalar is given, the source and destination node feature size would take the same value. out_feats : int Output feature size; i.e., the number of dimensions of :math:`h_i^{(l+1)}`. edge_func : callable activation function/layer Maps each edge feature to a vector of shape ``(in_feats * out_feats)`` as weight to compute messages. Also is the :math:`f_\Theta` in the formula. aggregator_type : str Aggregator type to use (``sum``, ``mean`` or ``max``). residual : bool, optional If True, use residual connection. Default: ``False``. bias : bool, optional If True, adds a learnable bias to the output. Default: ``True``. Examples -------- >>> import dgl >>> import numpy as np >>> import mxnet as mx >>> from mxnet import gluon >>> from dgl.nn import NNConv >>> >>> # Case 1: Homogeneous graph >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> g = dgl.add_self_loop(g) >>> feat = mx.nd.ones((6, 10)) >>> lin = gluon.nn.Dense(20) >>> lin.initialize(ctx=mx.cpu(0)) >>> def edge_func(efeat): >>> return lin(efeat) >>> efeat = mx.nd.ones((12, 5)) >>> conv = NNConv(10, 2, edge_func, 'mean') >>> conv.initialize(ctx=mx.cpu(0)) >>> res = conv(g, feat, efeat) >>> res [[0.39946803 0.32098457] [0.39946803 0.32098457] [0.39946803 0.32098457] [0.39946803 0.32098457] [0.39946803 0.32098457] [0.39946803 0.32098457]] >>> # Case 2: Unidirectional bipartite graph >>> u = [0, 1, 0, 0, 1] >>> v = [0, 1, 2, 3, 2] >>> g = dgl.heterograph({('_N', '_E', '_N'):(u, v)}) >>> u_feat = mx.nd.random.randn(2, 10) >>> v_feat = mx.nd.random.randn(4, 10) >>> conv = NNConv(10, 2, edge_func, 'mean') >>> conv.initialize(ctx=mx.cpu(0)) >>> efeat = mx.nd.ones((5, 5)) >>> res = conv(g, (u_feat, v_feat), efeat) >>> res [[ 0.24425688 0.3238042 ] [-0.11651017 -0.01738572] [ 0.06387337 0.15320925] [ 0.24425688 0.3238042 ]] """ def __init__( self, in_feats, out_feats, edge_func, aggregator_type, residual=False, bias=True, ): super(NNConv, self).__init__() self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats if aggregator_type == "sum": self.reducer = fn.sum elif aggregator_type == "mean": self.reducer = fn.mean elif aggregator_type == "max": self.reducer = fn.max else: raise KeyError( "Aggregator type {} not recognized: ".format(aggregator_type) ) self._aggre_type = aggregator_type with self.name_scope(): self.edge_nn = edge_func if residual: if self._in_dst_feats != out_feats: self.res_fc = nn.Dense( out_feats, in_units=self._in_dst_feats, use_bias=False, weight_initializer=mx.init.Xavier(), ) else: self.res_fc = Identity() else: self.res_fc = None if bias: self.bias = self.params.get( "bias", shape=(out_feats,), init=mx.init.Zero() ) else: self.bias = None def forward(self, graph, feat, efeat): r"""Compute MPNN Graph Convolution layer. Parameters ---------- graph : DGLGraph The graph. feat : mxnet.NDArray or pair of mxnet.NDArray The input feature of shape :math:`(N, D_{in})` where :math:`N` is the number of nodes of the graph and :math:`D_{in}` is the input feature size. efeat : mxnet.NDArray The edge feature of shape :math:`(N, *)`, should fit the input shape requirement of ``edge_nn``. Returns ------- mxnet.NDArray The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is the output feature size. """ with graph.local_scope(): feat_src, feat_dst = expand_as_pair(feat, graph) # (n, d_in, 1) graph.srcdata["h"] = feat_src.expand_dims(-1) # (n, d_in, d_out) graph.edata["w"] = self.edge_nn(efeat).reshape( -1, self._in_src_feats, self._out_feats ) # (n, d_in, d_out) graph.update_all( fn.u_mul_e("h", "w", "m"), self.reducer("m", "neigh") ) rst = graph.dstdata.pop("neigh").sum(axis=1) # (n, d_out) # residual connection if self.res_fc is not None: rst = rst + self.res_fc(feat_dst) # bias if self.bias is not None: rst = rst + self.bias.data(feat_dst.context) return rst ================================================ FILE: python/dgl/nn/mxnet/conv/relgraphconv.py ================================================ """MXNet module for RelGraphConv""" # pylint: disable= no-member, arguments-differ, invalid-name import math import mxnet as mx import numpy as np from mxnet import gluon, nd from mxnet.gluon import nn from .... import function as fn from .. import utils class RelGraphConv(gluon.Block): r"""Relational graph convolution layer from `Modeling Relational Data with Graph Convolutional Networks `__ It can be described as below: .. math:: h_i^{(l+1)} = \sigma(\sum_{r\in\mathcal{R}} \sum_{j\in\mathcal{N}^r(i)}\frac{1}{c_{i,r}}W_r^{(l)}h_j^{(l)}+W_0^{(l)}h_i^{(l)}) where :math:`\mathcal{N}^r(i)` is the neighbor set of node :math:`i` w.r.t. relation :math:`r`. :math:`c_{i,r}` is the normalizer equal to :math:`|\mathcal{N}^r(i)|`. :math:`\sigma` is an activation function. :math:`W_0` is the self-loop weight. The basis regularization decomposes :math:`W_r` by: .. math:: W_r^{(l)} = \sum_{b=1}^B a_{rb}^{(l)}V_b^{(l)} where :math:`B` is the number of bases, :math:`V_b^{(l)}` are linearly combined with coefficients :math:`a_{rb}^{(l)}`. The block-diagonal-decomposition regularization decomposes :math:`W_r` into :math:`B` number of block diagonal matrices. We refer :math:`B` as the number of bases. The block regularization decomposes :math:`W_r` by: .. math:: W_r^{(l)} = \oplus_{b=1}^B Q_{rb}^{(l)} where :math:`B` is the number of bases, :math:`Q_{rb}^{(l)}` are block bases with shape :math:`R^{(d^{(l+1)}/B)*(d^{l}/B)}`. Parameters ---------- in_feat : int Input feature size; i.e, the number of dimensions of :math:`h_j^{(l)}`. out_feat : int Output feature size; i.e., the number of dimensions of :math:`h_i^{(l+1)}`. num_rels : int Number of relations. . regularizer : str Which weight regularizer to use "basis" or "bdd". "basis" is short for basis-diagonal-decomposition. "bdd" is short for block-diagonal-decomposition. num_bases : int, optional Number of bases. If is none, use number of relations. Default: ``None``. bias : bool, optional True if bias is added. Default: ``True``. activation : callable, optional Activation function. Default: ``None``. self_loop : bool, optional True to include self loop message. Default: ``True``. low_mem : bool, optional True to use low memory implementation of relation message passing function. Default: False. This option trades speed with memory consumption, and will slowdown the forward/backward. Turn it on when you encounter OOM problem during training or evaluation. Default: ``False``. dropout : float, optional Dropout rate. Default: ``0.0`` layer_norm: float, optional Add layer norm. Default: ``False`` Examples -------- >>> import dgl >>> import numpy as np >>> import mxnet as mx >>> from mxnet import gluon >>> from dgl.nn import RelGraphConv >>> >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> feat = mx.nd.ones((6, 10)) >>> conv = RelGraphConv(10, 2, 3, regularizer='basis', num_bases=2) >>> conv.initialize(ctx=mx.cpu(0)) >>> etype = mx.nd.array(np.array([0,1,2,0,1,2]).astype(np.int64)) >>> res = conv(g, feat, etype) [[ 0.561324 0.33745846] [ 0.61585337 0.09992217] [ 0.561324 0.33745846] [-0.01557937 0.01227859] [ 0.61585337 0.09992217] [ 0.056508 -0.00307822]] """ def __init__( self, in_feat, out_feat, num_rels, regularizer="basis", num_bases=None, bias=True, activation=None, self_loop=True, low_mem=False, dropout=0.0, layer_norm=False, ): super(RelGraphConv, self).__init__() self.in_feat = in_feat self.out_feat = out_feat self.num_rels = num_rels self.regularizer = regularizer self.num_bases = num_bases if ( self.num_bases is None or self.num_bases > self.num_rels or self.num_bases < 0 ): self.num_bases = self.num_rels self.bias = bias self.activation = activation self.self_loop = self_loop assert ( low_mem is False ), "MXNet currently does not support low-memory implementation." assert ( layer_norm is False ), "MXNet currently does not support layer norm." if regularizer == "basis": # add basis weights self.weight = self.params.get( "weight", shape=(self.num_bases, self.in_feat, self.out_feat), init=mx.init.Xavier(magnitude=math.sqrt(2.0)), ) if self.num_bases < self.num_rels: # linear combination coefficients self.w_comp = self.params.get( "w_comp", shape=(self.num_rels, self.num_bases), init=mx.init.Xavier(magnitude=math.sqrt(2.0)), ) # message func self.message_func = self.basis_message_func elif regularizer == "bdd": if in_feat % num_bases != 0 or out_feat % num_bases != 0: raise ValueError( "Feature size must be a multiplier of num_bases." ) # add block diagonal weights self.submat_in = in_feat // self.num_bases self.submat_out = out_feat // self.num_bases # assuming in_feat and out_feat are both divisible by num_bases self.weight = self.params.get( "weight", shape=( self.num_rels, self.num_bases * self.submat_in * self.submat_out, ), init=mx.init.Xavier(magnitude=math.sqrt(2.0)), ) # message func self.message_func = self.bdd_message_func else: raise ValueError("Regularizer must be either 'basis' or 'bdd'") # bias if self.bias: self.h_bias = self.params.get( "bias", shape=(out_feat,), init=mx.init.Zero() ) # weight for self loop if self.self_loop: self.loop_weight = self.params.get( "W_0", shape=(in_feat, out_feat), init=mx.init.Xavier(magnitude=math.sqrt(2.0)), ) self.dropout = nn.Dropout(dropout) def basis_message_func(self, edges): """Message function for basis regularizer""" ctx = edges.src["h"].context if self.num_bases < self.num_rels: # generate all weights from bases weight = self.weight.data(ctx).reshape( self.num_bases, self.in_feat * self.out_feat ) weight = nd.dot(self.w_comp.data(ctx), weight).reshape( self.num_rels, self.in_feat, self.out_feat ) else: weight = self.weight.data(ctx) msg = utils.bmm_maybe_select(edges.src["h"], weight, edges.data["type"]) if "norm" in edges.data: msg = msg * edges.data["norm"] return {"msg": msg} def bdd_message_func(self, edges): """Message function for block-diagonal-decomposition regularizer""" ctx = edges.src["h"].context if ( edges.src["h"].dtype in (np.int32, np.int64) and len(edges.src["h"].shape) == 1 ): raise TypeError( "Block decomposition does not allow integer ID feature." ) weight = self.weight.data(ctx)[edges.data["type"], :].reshape( -1, self.submat_in, self.submat_out ) node = edges.src["h"].reshape(-1, 1, self.submat_in) msg = nd.batch_dot(node, weight).reshape(-1, self.out_feat) if "norm" in edges.data: msg = msg * edges.data["norm"] return {"msg": msg} def forward(self, g, x, etypes, norm=None): """ Description ----------- Forward computation Parameters ---------- g : DGLGraph The graph. feat : mx.ndarray.NDArray Input node features. Could be either * :math:`(|V|, D)` dense tensor * :math:`(|V|,)` int64 vector, representing the categorical values of each node. It then treat the input feature as an one-hot encoding feature. etypes : mx.ndarray.NDArray Edge type tensor. Shape: :math:`(|E|,)` norm : mx.ndarray.NDArray Optional edge normalizer tensor. Shape: :math:`(|E|, 1)`. Returns ------- mx.ndarray.NDArray New node features. """ assert g.is_homogeneous, ( "not a homogeneous graph; convert it with to_homogeneous " "and pass in the edge type as argument" ) with g.local_scope(): g.ndata["h"] = x g.edata["type"] = etypes if norm is not None: g.edata["norm"] = norm if self.self_loop: loop_message = utils.matmul_maybe_select( x, self.loop_weight.data(x.context) ) # message passing g.update_all(self.message_func, fn.sum(msg="msg", out="h")) # apply bias and activation node_repr = g.ndata["h"] if self.bias: node_repr = node_repr + self.h_bias.data(x.context) if self.self_loop: node_repr = node_repr + loop_message if self.activation: node_repr = self.activation(node_repr) node_repr = self.dropout(node_repr) return node_repr ================================================ FILE: python/dgl/nn/mxnet/conv/sageconv.py ================================================ """MXNet Module for GraphSAGE layer""" # pylint: disable= no-member, arguments-differ, invalid-name import math import mxnet as mx from mxnet import nd from mxnet.gluon import nn from .... import function as fn from ....base import DGLError from ....utils import check_eq_shape, expand_as_pair class SAGEConv(nn.Block): r"""GraphSAGE layer from `Inductive Representation Learning on Large Graphs `__ .. math:: h_{\mathcal{N}(i)}^{(l+1)} &= \mathrm{aggregate} \left(\{h_{j}^{l}, \forall j \in \mathcal{N}(i) \}\right) h_{i}^{(l+1)} &= \sigma \left(W \cdot \mathrm{concat} (h_{i}^{l}, h_{\mathcal{N}(i)}^{l+1}) \right) h_{i}^{(l+1)} &= \mathrm{norm}(h_{i}^{(l+1)}) Parameters ---------- in_feats : int, or pair of ints Input feature size; i.e, the number of dimensions of :math:`h_i^{(l)}`. GATConv can be applied on homogeneous graph and unidirectional `bipartite graph `__. If the layer applies on a unidirectional bipartite graph, ``in_feats`` specifies the input feature size on both the source and destination nodes. If a scalar is given, the source and destination node feature size would take the same value. If aggregator type is ``gcn``, the feature size of source and destination nodes are required to be the same. out_feats : int Output feature size; i.e, the number of dimensions of :math:`h_i^{(l+1)}`. aggregator_type : str Aggregator type to use (``mean``, ``gcn``, ``pool``, ``lstm``). feat_drop : float Dropout rate on features, default: ``0``. bias : bool If True, adds a learnable bias to the output. Default: ``True``. norm : callable activation function/layer or None, optional If not None, applies normalization to the updated node features. activation : callable activation function/layer or None, optional If not None, applies an activation function to the updated node features. Default: ``None``. Examples -------- >>> import dgl >>> import numpy as np >>> import mxnet as mx >>> from dgl.nn import SAGEConv >>> >>> # Case 1: Homogeneous graph >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> g = dgl.add_self_loop(g) >>> feat = mx.nd.ones((6, 10)) >>> conv = SAGEConv(10, 2, 'pool') >>> conv.initialize(ctx=mx.cpu(0)) >>> res = conv(g, feat) >>> res [[ 0.32144994 -0.8729614 ] [ 0.32144994 -0.8729614 ] [ 0.32144994 -0.8729614 ] [ 0.32144994 -0.8729614 ] [ 0.32144994 -0.8729614 ] [ 0.32144994 -0.8729614 ]] >>> # Case 2: Unidirectional bipartite graph >>> u = [0, 1, 0, 0, 1] >>> v = [0, 1, 2, 3, 2] >>> g = dgl.heterograph({('_N', '_E', '_N'):(u, v)}) >>> u_fea = mx.nd.random.randn(2, 5) >>> v_fea = mx.nd.random.randn(4, 10) >>> conv = SAGEConv((5, 10), 2, 'pool') >>> conv.initialize(ctx=mx.cpu(0)) >>> res = conv(g, (u_fea, v_fea)) >>> res [[-0.60524774 0.7196473 ] [ 0.8832787 -0.5928619 ] [-1.8245722 1.159798 ] [-1.0509381 2.2239418 ]] """ def __init__( self, in_feats, out_feats, aggregator_type="mean", feat_drop=0.0, bias=True, norm=None, activation=None, ): super(SAGEConv, self).__init__() valid_aggre_types = {"mean", "gcn", "pool", "lstm"} if aggregator_type not in valid_aggre_types: raise DGLError( "Invalid aggregator_type. Must be one of {}. " "But got {!r} instead.".format( valid_aggre_types, aggregator_type ) ) self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats self._aggre_type = aggregator_type with self.name_scope(): self.norm = norm self.feat_drop = nn.Dropout(feat_drop) self.activation = activation if aggregator_type == "pool": self.fc_pool = nn.Dense( self._in_src_feats, use_bias=bias, weight_initializer=mx.init.Xavier(magnitude=math.sqrt(2.0)), in_units=self._in_src_feats, ) if aggregator_type == "lstm": raise NotImplementedError if aggregator_type != "gcn": self.fc_self = nn.Dense( out_feats, use_bias=bias, weight_initializer=mx.init.Xavier(magnitude=math.sqrt(2.0)), in_units=self._in_dst_feats, ) self.fc_neigh = nn.Dense( out_feats, use_bias=bias, weight_initializer=mx.init.Xavier(magnitude=math.sqrt(2.0)), in_units=self._in_src_feats, ) def forward(self, graph, feat): r"""Compute GraphSAGE layer. Parameters ---------- graph : DGLGraph The graph. feat : mxnet.NDArray or pair of mxnet.NDArray If a single tensor is given, it represents the input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of tensors are given, the pair must contain two tensors of shape :math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`. Returns ------- mxnet.NDArray The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is size of output feature. """ with graph.local_scope(): if isinstance(feat, tuple): feat_src = self.feat_drop(feat[0]) feat_dst = self.feat_drop(feat[1]) else: feat_src = feat_dst = self.feat_drop(feat) if graph.is_block: feat_dst = feat_src[: graph.number_of_dst_nodes()] h_self = feat_dst # Handle the case of graphs without edges if graph.num_edges() == 0: dst_neigh = mx.nd.zeros( (graph.number_of_dst_nodes(), self._in_src_feats) ) dst_neigh = dst_neigh.as_in_context(feat_dst.context) graph.dstdata["neigh"] = dst_neigh if self._aggre_type == "mean": graph.srcdata["h"] = feat_src graph.update_all(fn.copy_u("h", "m"), fn.mean("m", "neigh")) h_neigh = graph.dstdata["neigh"] elif self._aggre_type == "gcn": check_eq_shape(feat) graph.srcdata["h"] = feat_src graph.dstdata["h"] = feat_dst # same as above if homogeneous graph.update_all(fn.copy_u("h", "m"), fn.sum("m", "neigh")) # divide in degrees degs = graph.in_degrees().astype(feat_dst.dtype) degs = degs.as_in_context(feat_dst.context) h_neigh = (graph.dstdata["neigh"] + graph.dstdata["h"]) / ( degs.expand_dims(-1) + 1 ) elif self._aggre_type == "pool": graph.srcdata["h"] = nd.relu(self.fc_pool(feat_src)) graph.update_all(fn.copy_u("h", "m"), fn.max("m", "neigh")) h_neigh = graph.dstdata["neigh"] elif self._aggre_type == "lstm": raise NotImplementedError else: raise KeyError( "Aggregator type {} not recognized.".format( self._aggre_type ) ) if self._aggre_type == "gcn": rst = self.fc_neigh(h_neigh) else: rst = self.fc_self(h_self) + self.fc_neigh(h_neigh) # activation if self.activation is not None: rst = self.activation(rst) # normalization if self.norm is not None: rst = self.norm(rst) return rst ================================================ FILE: python/dgl/nn/mxnet/conv/sgconv.py ================================================ """MXNet Module for Simplifying Graph Convolution layer""" # pylint: disable= no-member, arguments-differ, invalid-name import mxnet as mx from mxnet import nd from mxnet.gluon import nn from .... import function as fn from ....base import DGLError class SGConv(nn.Block): r"""SGC layer from `Simplifying Graph Convolutional Networks `__ .. math:: H^{K} = (\tilde{D}^{-1/2} \tilde{A} \tilde{D}^{-1/2})^K X \Theta where :math:`\tilde{A}` is :math:`A` + :math:`I`. Thus the graph input is expected to have self-loop edges added. Parameters ---------- in_feats : int Number of input features; i.e, the number of dimensions of :math:`X`. out_feats : int Number of output features; i.e, the number of dimensions of :math:`H^{K}`. k : int Number of hops :math:`K`. Defaults:``1``. cached : bool If True, the module would cache .. math:: (\tilde{D}^{-\frac{1}{2}}\tilde{A}\tilde{D}^{-\frac{1}{2}})^K X\Theta at the first forward call. This parameter should only be set to ``True`` in Transductive Learning setting. bias : bool If True, adds a learnable bias to the output. Default: ``True``. norm : callable activation function/layer or None, optional If not None, applies normalization to the updated node features. Default: ``False``. allow_zero_in_degree : bool, optional If there are 0-in-degree nodes in the graph, output for those nodes will be invalid since no message will be passed to those nodes. This is harmful for some applications causing silent performance regression. This module will raise a DGLError if it detects 0-in-degree nodes in input graph. By setting ``True``, it will suppress the check and let the users handle it by themselves. Default: ``False``. Note ---- Zero in-degree nodes will lead to invalid output value. This is because no message will be passed to those nodes, the aggregation function will be appied on empty input. A common practice to avoid this is to add a self-loop for each node in the graph if it is homogeneous, which can be achieved by: >>> g = ... # a DGLGraph >>> g = dgl.add_self_loop(g) Calling ``add_self_loop`` will not work for some graphs, for example, heterogeneous graph since the edge type can not be decided for self_loop edges. Set ``allow_zero_in_degree`` to ``True`` for those cases to unblock the code and handle zero-in-degree nodes manually. A common practise to handle this is to filter out the nodes with zero-in-degree when use after conv. Example ------- >>> import dgl >>> import numpy as np >>> import mxnet as mx >>> from dgl.nn import SGConv >>> >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> g = dgl.add_self_loop(g) >>> feat = mx.nd.ones((6, 10)) >>> conv = SGConv(10, 2, k=2, cached=True) >>> conv.initialize(ctx=mx.cpu(0)) >>> res = conv(g, feat) >>> res [[ 2.264404 -0.26684892] [ 2.264404 -0.26684892] [ 2.264404 -0.26684892] [ 3.2273252 -0.3803246 ] [ 2.247593 -0.2648679 ] [ 2.2644043 -0.26684904]] """ def __init__( self, in_feats, out_feats, k=1, cached=False, bias=True, norm=None, allow_zero_in_degree=False, ): super(SGConv, self).__init__() self._cached = cached self._cached_h = None self._k = k self._allow_zero_in_degree = allow_zero_in_degree with self.name_scope(): self.norm = norm self.fc = nn.Dense( out_feats, in_units=in_feats, use_bias=bias, weight_initializer=mx.init.Xavier(), ) def set_allow_zero_in_degree(self, set_value): r""" Description ----------- Set allow_zero_in_degree flag. Parameters ---------- set_value : bool The value to be set to the flag. """ self._allow_zero_in_degree = set_value def forward(self, graph, feat): r""" Description ----------- Compute Simplifying Graph Convolution layer. Parameters ---------- graph : DGLGraph The graph. feat : mxnet.NDArray The input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. Returns ------- mxnet.NDArray The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is size of output feature. Raises ------ DGLError If there are 0-in-degree nodes in the input graph, it will raise DGLError since no message will be passed to those nodes. This will cause invalid output. The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``. Note ---- If ``cache`` is set to True, ``feat`` and ``graph`` should not change during training, or you will get wrong results. """ with graph.local_scope(): if not self._allow_zero_in_degree: if graph.in_degrees().min() == 0: raise DGLError( "There are 0-in-degree nodes in the graph, " "output for those nodes will be invalid. " "This is harmful for some applications, " "causing silent performance regression. " "Adding self-loop on the input graph by " "calling `g = dgl.add_self_loop(g)` will resolve " "the issue. Setting ``allow_zero_in_degree`` " "to be `True` when constructing this module will " "suppress the check and let the code run." ) if self._cached_h is not None: feat = self._cached_h else: # compute normalization degs = nd.clip( graph.in_degrees().astype(feat.dtype), 1, float("inf") ) norm = nd.power(degs, -0.5).expand_dims(1) norm = norm.as_in_context(feat.context) # compute (D^-1 A D)^k X for _ in range(self._k): feat = feat * norm graph.ndata["h"] = feat graph.update_all(fn.copy_u("h", "m"), fn.sum("m", "h")) feat = graph.ndata.pop("h") feat = feat * norm if self.norm is not None: feat = self.norm(feat) # cache feature if self._cached: self._cached_h = feat return self.fc(feat) ================================================ FILE: python/dgl/nn/mxnet/conv/tagconv.py ================================================ """MXNet module for TAGConv""" # pylint: disable= no-member, arguments-differ, invalid-name import math import mxnet as mx from mxnet import gluon from .... import function as fn class TAGConv(gluon.Block): r"""Topology Adaptive Graph Convolutional layer from `Topology Adaptive Graph Convolutional Networks `__. .. math:: H^{K} = {\sum}_{k=0}^K (D^{-1/2} A D^{-1/2})^{k} X {\Theta}_{k}, where :math:`A` denotes the adjacency matrix, :math:`D_{ii} = \sum_{j=0} A_{ij}` its diagonal degree matrix, :math:`{\Theta}_{k}` denotes the linear weights to sum the results of different hops together. Parameters ---------- in_feats : int Input feature size. i.e, the number of dimensions of :math:`X`. out_feats : int Output feature size. i.e, the number of dimensions of :math:`H^{K}`. k: int, optional Number of hops :math:`K`. Default: ``2``. bias: bool, optional If True, adds a learnable bias to the output. Default: ``True``. activation: callable activation function/layer or None, optional If not None, applies an activation function to the updated node features. Default: ``None``. Attributes ---------- lin : torch.Module The learnable linear module. Example ------- >>> import dgl >>> import numpy as np >>> import mxnet as mx >>> from mxnet import gluon >>> from dgl.nn import TAGConv >>> >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> feat = mx.nd.ones((6, 10)) >>> conv = TAGConv(10, 2, k=2) >>> conv.initialize(ctx=mx.cpu(0)) >>> res = conv(g, feat) >>> res [[-0.86147034 0.10089529] [-0.86147034 0.10089529] [-0.86147034 0.10089529] [-0.9707841 0.0360311 ] [-0.6716844 0.02247889] [ 0.32964635 -0.7669234 ]] """ def __init__(self, in_feats, out_feats, k=2, bias=True, activation=None): super(TAGConv, self).__init__() self.out_feats = out_feats self.k = k self.bias = bias self.activation = activation self.in_feats = in_feats self.lin = self.params.get( "weight", shape=(self.in_feats * (self.k + 1), self.out_feats), init=mx.init.Xavier(magnitude=math.sqrt(2.0)), ) if self.bias: self.h_bias = self.params.get( "bias", shape=(out_feats,), init=mx.init.Zero() ) def forward(self, graph, feat): r""" Description ----------- Compute topology adaptive graph convolution. Parameters ---------- graph : DGLGraph The graph. feat : mxnet.NDArray The input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. Returns ------- mxnet.NDArray The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is size of output feature. """ with graph.local_scope(): assert graph.is_homogeneous, "Graph is not homogeneous" degs = graph.in_degrees().astype("float32") norm = mx.nd.power( mx.nd.clip(degs, a_min=1, a_max=float("inf")), -0.5 ) shp = norm.shape + (1,) * (feat.ndim - 1) norm = norm.reshape(shp).as_in_context(feat.context) rst = feat for _ in range(self.k): rst = rst * norm graph.ndata["h"] = rst graph.update_all( fn.copy_u(u="h", out="m"), fn.sum(msg="m", out="h") ) rst = graph.ndata["h"] rst = rst * norm feat = mx.nd.concat(feat, rst, dim=-1) rst = mx.nd.dot(feat, self.lin.data(feat.context)) if self.bias is not None: rst = rst + self.h_bias.data(rst.context) if self.activation is not None: rst = self.activation(rst) return rst ================================================ FILE: python/dgl/nn/mxnet/glob.py ================================================ """MXNet modules for graph global pooling.""" # pylint: disable= no-member, arguments-differ, invalid-name, W0235 from mxnet import gluon, nd from mxnet.gluon import nn from ...readout import ( broadcast_nodes, max_nodes, mean_nodes, softmax_nodes, sum_nodes, topk_nodes, ) __all__ = [ "SumPooling", "AvgPooling", "MaxPooling", "SortPooling", "GlobalAttentionPooling", "Set2Set", ] class SumPooling(nn.Block): r"""Apply sum pooling over the nodes in the graph. .. math:: r^{(i)} = \sum_{k=1}^{N_i} x^{(i)}_k """ def __init__(self): super(SumPooling, self).__init__() def forward(self, graph, feat): r"""Compute sum pooling. Parameters ---------- graph : DGLGraph The graph. feat : mxnet.NDArray The input feature with shape :math:`(N, *)` where :math:`N` is the number of nodes in the graph. Returns ------- mxnet.NDArray The output feature with shape :math:`(B, *)`, where :math:`B` refers to the batch size. """ with graph.local_scope(): graph.ndata["h"] = feat readout = sum_nodes(graph, "h") graph.ndata.pop("h") return readout def __repr__(self): return "SumPooling()" class AvgPooling(nn.Block): r"""Apply average pooling over the nodes in the graph. .. math:: r^{(i)} = \frac{1}{N_i}\sum_{k=1}^{N_i} x^{(i)}_k """ def __init__(self): super(AvgPooling, self).__init__() def forward(self, graph, feat): r"""Compute average pooling. Parameters ---------- graph : DGLGraph The graph. feat : mxnet.NDArray The input feature with shape :math:`(N, *)` where :math:`N` is the number of nodes in the graph. Returns ------- mxnet.NDArray The output feature with shape :math:`(B, *)`, where :math:`B` refers to the batch size. """ with graph.local_scope(): graph.ndata["h"] = feat readout = mean_nodes(graph, "h") graph.ndata.pop("h") return readout def __repr__(self): return "AvgPooling()" class MaxPooling(nn.Block): r"""Apply max pooling over the nodes in the graph. .. math:: r^{(i)} = \max_{k=1}^{N_i} \left( x^{(i)}_k \right) """ def __init__(self): super(MaxPooling, self).__init__() def forward(self, graph, feat): r"""Compute max pooling. Parameters ---------- graph : DGLGraph The graph. feat : mxnet.NDArray The input feature with shape :math:`(N, *)` where :math:`N` is the number of nodes in the graph. Returns ------- mxnet.NDArray The output feature with shape :math:`(B, *)`, where :math:`B` refers to the batch size. """ with graph.local_scope(): graph.ndata["h"] = feat readout = max_nodes(graph, "h") graph.ndata.pop("h") return readout def __repr__(self): return "MaxPooling()" class SortPooling(nn.Block): r"""Pooling layer from `An End-to-End Deep Learning Architecture for Graph Classification `__ Parameters ---------- k : int The number of nodes to hold for each graph. """ def __init__(self, k): super(SortPooling, self).__init__() self.k = k def forward(self, graph, feat): r"""Compute sort pooling. Parameters ---------- graph : DGLGraph The graph. feat : mxnet.NDArray The input node feature with shape :math:`(N, D)` where :math:`N` is the number of nodes in the graph. Returns ------- mxnet.NDArray The output feature with shape :math:`(B, k * D)`, where :math:`B` refers to the batch size. """ # Sort the feature of each node in ascending order. with graph.local_scope(): feat = feat.sort(axis=-1) graph.ndata["h"] = feat # Sort nodes according to their last features. ret = topk_nodes(graph, "h", self.k, sortby=-1)[0].reshape( -1, self.k * feat.shape[-1] ) return ret def __repr__(self): return "SortPooling(k={})".format(self.k) class GlobalAttentionPooling(nn.Block): r"""Global Attention Pooling layer from `Gated Graph Sequence Neural Networks `__ .. math:: r^{(i)} = \sum_{k=1}^{N_i}\mathrm{softmax}\left(f_{gate} \left(x^{(i)}_k\right)\right) f_{feat}\left(x^{(i)}_k\right) Parameters ---------- gate_nn : gluon.nn.Block A neural network that computes attention scores for each feature. feat_nn : gluon.nn.Block, optional A neural network applied to each feature before combining them with attention scores. """ def __init__(self, gate_nn, feat_nn=None): super(GlobalAttentionPooling, self).__init__() with self.name_scope(): self.gate_nn = gate_nn self.feat_nn = feat_nn def forward(self, graph, feat): r"""Compute global attention pooling. Parameters ---------- graph : DGLGraph The graph. feat : mxnet.NDArray The input node feature with shape :math:`(N, D)` where :math:`N` is the number of nodes in the graph. Returns ------- mxnet.NDArray The output feature with shape :math:`(B, D)`, where :math:`B` refers to the batch size. """ with graph.local_scope(): gate = self.gate_nn(feat) assert ( gate.shape[-1] == 1 ), "The output of gate_nn should have size 1 at the last axis." feat = self.feat_nn(feat) if self.feat_nn else feat graph.ndata["gate"] = gate gate = softmax_nodes(graph, "gate") graph.ndata["r"] = feat * gate readout = sum_nodes(graph, "r") return readout class Set2Set(nn.Block): r"""Set2Set operator from `Order Matters: Sequence to sequence for sets `__ For each individual graph in the batch, set2set computes .. math:: q_t &= \mathrm{LSTM} (q^*_{t-1}) \alpha_{i,t} &= \mathrm{softmax}(x_i \cdot q_t) r_t &= \sum_{i=1}^N \alpha_{i,t} x_i q^*_t &= q_t \Vert r_t for this graph. Parameters ---------- input_dim : int Size of each input sample n_iters : int Number of iterations. n_layers : int Number of recurrent layers. """ def __init__(self, input_dim, n_iters, n_layers): super(Set2Set, self).__init__() self.input_dim = input_dim self.output_dim = 2 * input_dim self.n_iters = n_iters self.n_layers = n_layers with self.name_scope(): self.lstm = gluon.rnn.LSTM( self.input_dim, num_layers=n_layers, input_size=self.output_dim ) def forward(self, graph, feat): r"""Compute set2set pooling. Parameters ---------- graph : DGLGraph The graph. feat : mxnet.NDArray The input node feature with shape :math:`(N, D)` where :math:`N` is the number of nodes in the graph. Returns ------- mxnet.NDArray The output feature with shape :math:`(B, D)`, where :math:`B` refers to the batch size. """ with graph.local_scope(): batch_size = graph.batch_size h = ( nd.zeros( (self.n_layers, batch_size, self.input_dim), ctx=feat.context, ), nd.zeros( (self.n_layers, batch_size, self.input_dim), ctx=feat.context, ), ) q_star = nd.zeros((batch_size, self.output_dim), ctx=feat.context) for _ in range(self.n_iters): q, h = self.lstm(q_star.expand_dims(axis=0), h) q = q.reshape((batch_size, self.input_dim)) e = (feat * broadcast_nodes(graph, q)).sum( axis=-1, keepdims=True ) graph.ndata["e"] = e alpha = softmax_nodes(graph, "e") graph.ndata["r"] = feat * alpha readout = sum_nodes(graph, "r") q_star = nd.concat(q, readout, dim=-1) return q_star def __repr__(self): summary = "Set2Set(" summary += "in={}, out={}, " "n_iters={}, n_layers={}".format( self.input_dim, self.output_dim, self.n_iters, self.n_layers ) summary += ")" return summary ================================================ FILE: python/dgl/nn/mxnet/hetero.py ================================================ """Heterograph NN modules""" from mxnet import nd from mxnet.gluon import nn __all__ = ["HeteroGraphConv"] class HeteroGraphConv(nn.Block): r"""A generic module for computing convolution on heterogeneous graphs The heterograph convolution applies sub-modules on their associating relation graphs, which reads the features from source nodes and writes the updated ones to destination nodes. If multiple relations have the same destination node types, their results are aggregated by the specified method. If the relation graph has no edge, the corresponding module will not be called. Pseudo-code: .. code:: outputs = {nty : [] for nty in g.dsttypes} # Apply sub-modules on their associating relation graphs in parallel for relation in g.canonical_etypes: stype, etype, dtype = relation dstdata = relation_submodule(g[relation], ...) outputs[dtype].append(dstdata) # Aggregate the results for each destination node type rsts = {} for ntype, ntype_outputs in outputs.items(): if len(ntype_outputs) != 0: rsts[ntype] = aggregate(ntype_outputs) return rsts Examples -------- Create a heterograph with three types of relations and nodes. >>> import dgl >>> g = dgl.heterograph({ ... ('user', 'follows', 'user') : edges1, ... ('user', 'plays', 'game') : edges2, ... ('store', 'sells', 'game') : edges3}) Create a ``HeteroGraphConv`` that applies different convolution modules to different relations. Note that the modules for ``'follows'`` and ``'plays'`` do not share weights. >>> import dgl.nn.pytorch as dglnn >>> conv = dglnn.HeteroGraphConv({ ... 'follows' : dglnn.GraphConv(...), ... 'plays' : dglnn.GraphConv(...), ... 'sells' : dglnn.SAGEConv(...)}, ... aggregate='sum') Call forward with some ``'user'`` features. This computes new features for both ``'user'`` and ``'game'`` nodes. >>> import mxnet.ndarray as nd >>> h1 = {'user' : nd.random.randn(g.num_nodes('user'), 5)} >>> h2 = conv(g, h1) >>> print(h2.keys()) dict_keys(['user', 'game']) Call forward with both ``'user'`` and ``'store'`` features. Because both the ``'plays'`` and ``'sells'`` relations will update the ``'game'`` features, their results are aggregated by the specified method (i.e., summation here). >>> f1 = {'user' : ..., 'store' : ...} >>> f2 = conv(g, f1) >>> print(f2.keys()) dict_keys(['user', 'game']) Call forward with some ``'store'`` features. This only computes new features for ``'game'`` nodes. >>> g1 = {'store' : ...} >>> g2 = conv(g, g1) >>> print(g2.keys()) dict_keys(['game']) Call forward with a pair of inputs is allowed and each submodule will also be invoked with a pair of inputs. >>> x_src = {'user' : ..., 'store' : ...} >>> x_dst = {'user' : ..., 'game' : ...} >>> y_dst = conv(g, (x_src, x_dst)) >>> print(y_dst.keys()) dict_keys(['user', 'game']) Parameters ---------- mods : dict[str, nn.Module] Modules associated with every edge types. The forward function of each module must have a `DGLGraph` object as the first argument, and its second argument is either a tensor object representing the node features or a pair of tensor object representing the source and destination node features. aggregate : str, callable, optional Method for aggregating node features generated by different relations. Allowed string values are 'sum', 'max', 'min', 'mean', 'stack'. The 'stack' aggregation is performed along the second dimension, whose order is deterministic. User can also customize the aggregator by providing a callable instance. For example, aggregation by summation is equivalent to the follows: .. code:: def my_agg_func(tensors, dsttype): # tensors: is a list of tensors to aggregate # dsttype: string name of the destination node type for which the # aggregation is performed stacked = mx.nd.stack(*tensors, axis=0) return mx.nd.sum(stacked, axis=0) Attributes ---------- mods : dict[str, nn.Module] Modules associated with every edge types. """ def __init__(self, mods, aggregate="sum"): super(HeteroGraphConv, self).__init__() with self.name_scope(): for name, mod in mods.items(): self.register_child(mod, name) self.mods = mods # Do not break if graph has 0-in-degree nodes. # Because there is no general rule to add self-loop for heterograph. for _, v in self.mods.items(): set_allow_zero_in_degree_fn = getattr( v, "set_allow_zero_in_degree", None ) if callable(set_allow_zero_in_degree_fn): set_allow_zero_in_degree_fn(True) if isinstance(aggregate, str): self.agg_fn = get_aggregate_fn(aggregate) else: self.agg_fn = aggregate def forward(self, g, inputs, mod_args=None, mod_kwargs=None): """Forward computation Invoke the forward function with each module and aggregate their results. Parameters ---------- g : DGLGraph Graph data. inputs : dict[str, Tensor] or pair of dict[str, Tensor] Input node features. mod_args : dict[str, tuple[any]], optional Extra positional arguments for the sub-modules. mod_kwargs : dict[str, dict[str, any]], optional Extra key-word arguments for the sub-modules. Returns ------- dict[str, Tensor] Output representations for every types of nodes. """ if mod_args is None: mod_args = {} if mod_kwargs is None: mod_kwargs = {} outputs = {nty: [] for nty in g.dsttypes} if isinstance(inputs, tuple): src_inputs, dst_inputs = inputs for stype, etype, dtype in g.canonical_etypes: rel_graph = g[stype, etype, dtype] if stype not in src_inputs or dtype not in dst_inputs: continue dstdata = self.mods[etype]( rel_graph, (src_inputs[stype], dst_inputs[dtype]), *mod_args.get(etype, ()), **mod_kwargs.get(etype, {}) ) outputs[dtype].append(dstdata) else: for stype, etype, dtype in g.canonical_etypes: rel_graph = g[stype, etype, dtype] if stype not in inputs: continue dstdata = self.mods[etype]( rel_graph, (inputs[stype], inputs[dtype]), *mod_args.get(etype, ()), **mod_kwargs.get(etype, {}) ) outputs[dtype].append(dstdata) rsts = {} for nty, alist in outputs.items(): if len(alist) != 0: rsts[nty] = self.agg_fn(alist, nty) return rsts def __repr__(self): summary = "HeteroGraphConv({\n" for name, mod in self.mods.items(): summary += " {} : {},\n".format(name, mod) summary += "\n})" return summary def get_aggregate_fn(agg): """Internal function to get the aggregation function for node data generated from different relations. Parameters ---------- agg : str Method for aggregating node features generated by different relations. Allowed values are 'sum', 'max', 'min', 'mean', 'stack'. Returns ------- callable Aggregator function that takes a list of tensors to aggregate and returns one aggregated tensor. """ if agg == "sum": fn = nd.sum elif agg == "max": fn = nd.max elif agg == "min": fn = nd.min elif agg == "mean": fn = nd.mean elif agg == "stack": fn = None # will not be called else: raise DGLError( "Invalid cross type aggregator. Must be one of " '"sum", "max", "min", "mean" or "stack". But got "%s"' % agg ) if agg == "stack": def stack_agg(inputs, dsttype): # pylint: disable=unused-argument if len(inputs) == 0: return None return nd.stack(*inputs, axis=1) return stack_agg else: def aggfn(inputs, dsttype): # pylint: disable=unused-argument if len(inputs) == 0: return None stacked = nd.stack(*inputs, axis=0) return fn(stacked, axis=0) return aggfn ================================================ FILE: python/dgl/nn/mxnet/softmax.py ================================================ """Gluon layer for graph related softmax.""" # pylint: disable= unused-import from ..functional import edge_softmax ================================================ FILE: python/dgl/nn/mxnet/utils.py ================================================ """Utilities for pytorch NN package""" # pylint: disable=no-member, invalid-name import numpy as np from mxnet import gluon, nd from ... import DGLGraph def matmul_maybe_select(A, B): """Perform Matrix multiplication C = A * B but A could be an integer id vector. If A is an integer vector, we treat it as multiplying a one-hot encoded tensor. In this case, the expensive dense matrix multiply can be replaced by a much cheaper index lookup. For example, :: A = [2, 0, 1], B = [[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]] then matmul_maybe_select(A, B) is equivalent to :: [[0, 0, 1], [[0.1, 0.2], [1, 0, 0], * [0.3, 0.4], [0, 1, 0]] [0.5, 0.6]] In all other cases, perform a normal matmul. Parameters ---------- A : mxnet.NDArray lhs tensor B : mxnet.NDArray rhs tensor Returns ------- C : mxnet.NDArray result tensor """ if A.dtype in (np.int32, np.int64) and len(A.shape) == 1: return nd.take(B, A, axis=0) else: return nd.dot(A, B) def bmm_maybe_select(A, B, index): """Slice submatrices of A by the given index and perform bmm. B is a 3D tensor of shape (N, D1, D2), which can be viewed as a stack of N matrices of shape (D1, D2). The input index is an integer vector of length M. A could be either: (1) a dense tensor of shape (M, D1), (2) an integer vector of length M. The result C is a 2D matrix of shape (M, D2) For case (1), C is computed by bmm: :: C[i, :] = matmul(A[i, :], B[index[i], :, :]) For case (2), C is computed by index select: :: C[i, :] = B[index[i], A[i], :] Parameters ---------- A : mxnet.NDArray lhs tensor B : mxnet.NDArray rhs tensor index : mxnet.NDArray index tensor Returns ------- C : mxnet.NDArray return tensor """ if A.dtype in (np.int32, np.int64) and len(A.shape) == 1: return B[index, A, :] else: BB = nd.take(B, index, axis=0) return nd.batch_dot(A.expand_dims(1), BB).squeeze(1) def normalize(x, p=2, axis=1, eps=1e-12): r"""Performs :math:`L_p` normalization of inputs over specified dimension. For a tensor :attr:`input` of sizes :math:`(n_0, ..., n_{dim}, ..., n_k)`, each :math:`n_{dim}` -element vector :math:`v` along dimension :attr:`dim` is transformed as .. math:: v = \frac{v}{\max(\lVert v \rVert_p, \epsilon)}. With the default arguments it uses the Euclidean norm over vectors along dimension :math:`1` for normalization. Args: x: input ndarray of any shape ord (float): the exponent value in the norm formulation. Default: 2 dim (int): the dimension to reduce. Default: 1 eps (float): small value to avoid division by zero. Default: 1e-12 """ denom = nd.clip( nd.norm(x, ord=p, axis=axis, keepdims=True), eps, float("inf") ) return x / denom class Sequential(gluon.nn.Sequential): r"""A squential container for stacking graph neural network blocks We support two modes: sequentially apply GNN blocks on the same graph or a list of given graphs. In the second case, the number of graphs equals the number of blocks inside this container. Examples -------- Mode 1: sequentially apply GNN modules on the same graph >>> import dgl >>> from mxnet import nd >>> from mxnet.gluon import nn >>> import dgl.function as fn >>> from dgl.nn.mxnet import Sequential >>> class ExampleLayer(nn.Block): >>> def __init__(self, **kwargs): >>> super().__init__(**kwargs) >>> def forward(self, graph, n_feat, e_feat): >>> with graph.local_scope(): >>> graph.ndata['h'] = n_feat >>> graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h')) >>> n_feat += graph.ndata['h'] >>> graph.apply_edges(fn.u_add_v('h', 'h', 'e')) >>> e_feat += graph.edata['e'] >>> return n_feat, e_feat >>> >>> g = dgl.DGLGraph() >>> g.add_nodes(3) >>> g.add_edges([0, 1, 2, 0, 1, 2, 0, 1, 2], [0, 0, 0, 1, 1, 1, 2, 2, 2]) >>> net = Sequential() >>> net.add(ExampleLayer()) >>> net.add(ExampleLayer()) >>> net.add(ExampleLayer()) >>> net.initialize() >>> n_feat = nd.random.randn(3, 4) >>> e_feat = nd.random.randn(9, 4) >>> net(g, n_feat, e_feat) ( [[ 12.412863 99.61184 21.472883 -57.625923 ] [ 10.08097 100.68611 20.627377 -60.13458 ] [ 11.7912245 101.80654 22.427956 -58.32772 ]] , [[ 21.818504 198.12076 42.72387 -115.147736] [ 23.070837 195.49811 43.42292 -116.17203 ] [ 24.330334 197.10927 42.40048 -118.06538 ] [ 21.907919 199.11469 42.1187 -115.35658 ] [ 22.849625 198.79213 43.866085 -113.65381 ] [ 20.926125 198.116 42.64334 -114.246704] [ 23.003159 197.06662 41.796425 -117.14977 ] [ 21.391375 198.3348 41.428078 -116.30361 ] [ 21.291483 200.0701 40.8239 -118.07314 ]] ) Mode 2: sequentially apply GNN modules on different graphs >>> import dgl >>> from mxnet import nd >>> from mxnet.gluon import nn >>> import dgl.function as fn >>> import networkx as nx >>> from dgl.nn.mxnet import Sequential >>> class ExampleLayer(nn.Block): >>> def __init__(self, **kwargs): >>> super().__init__(**kwargs) >>> def forward(self, graph, n_feat): >>> with graph.local_scope(): >>> graph.ndata['h'] = n_feat >>> graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h')) >>> n_feat += graph.ndata['h'] >>> return n_feat.reshape(graph.num_nodes() // 2, 2, -1).sum(1) >>> >>> g1 = dgl.DGLGraph(nx.erdos_renyi_graph(32, 0.05)) >>> g2 = dgl.DGLGraph(nx.erdos_renyi_graph(16, 0.2)) >>> g3 = dgl.DGLGraph(nx.erdos_renyi_graph(8, 0.8)) >>> net = Sequential() >>> net.add(ExampleLayer()) >>> net.add(ExampleLayer()) >>> net.add(ExampleLayer()) >>> net.initialize() >>> n_feat = nd.random.randn(32, 4) >>> net([g1, g2, g3], n_feat) [[-101.289566 -22.584694 -89.25348 -151.6447 ] [-130.74239 -49.494812 -120.250854 -199.81546 ] [-112.32089 -50.036713 -116.13266 -190.38638 ] [-119.23065 -26.78553 -111.11185 -166.08322 ]] """ def __init__(self, prefix=None, params=None): super(Sequential, self).__init__(prefix=prefix, params=params) def forward(self, graph, *feats): r"""Sequentially apply modules to the input. Parameters ---------- graph : DGLGraph or list of DGLGraphs The graph(s) to apply modules on. *feats : Input features. The output of :math:`i`-th block should match that of the input of :math:`(i+1)`-th block. """ if isinstance(graph, list): for graph_i, module in zip(graph, self): if not isinstance(feats, tuple): feats = (feats,) feats = module(graph_i, *feats) elif isinstance(graph, DGLGraph): for module in self: if not isinstance(feats, tuple): feats = (feats,) feats = module(graph, *feats) else: raise TypeError( "The first argument of forward must be a DGLGraph" " or a list of DGLGraph s" ) return feats ================================================ FILE: python/dgl/nn/pytorch/__init__.py ================================================ """Package for pytorch-specific NN modules.""" from .conv import * from .explain import * from .link import * from .linear import * from .glob import * from .softmax import * from .factory import * from .hetero import * from .sparse_emb import NodeEmbedding from .utils import JumpingKnowledge, LabelPropagation, Sequential, WeightBasis from .network_emb import * from .gt import * ================================================ FILE: python/dgl/nn/pytorch/conv/__init__.py ================================================ """Torch modules for graph convolutions.""" # pylint: disable= no-member, arguments-differ, invalid-name from .agnnconv import AGNNConv from .appnpconv import APPNPConv from .atomicconv import AtomicConv from .cfconv import CFConv from .chebconv import ChebConv from .cugraph_gatconv import CuGraphGATConv from .cugraph_relgraphconv import CuGraphRelGraphConv from .cugraph_sageconv import CuGraphSAGEConv from .densechebconv import DenseChebConv from .densegraphconv import DenseGraphConv from .densesageconv import DenseSAGEConv from .dgnconv import DGNConv from .dotgatconv import DotGatConv from .edgeconv import EdgeConv from .edgegatconv import EdgeGATConv from .egatconv import EGATConv from .egnnconv import EGNNConv from .gatconv import GATConv from .gatedgcnconv import GatedGCNConv from .gatedgraphconv import GatedGraphConv from .gatv2conv import GATv2Conv from .gcn2conv import GCN2Conv from .ginconv import GINConv from .gineconv import GINEConv from .gmmconv import GMMConv from .graphconv import EdgeWeightNorm, GraphConv from .grouprevres import GroupRevRes from .hgtconv import HGTConv from .nnconv import NNConv from .pnaconv import PNAConv from .relgraphconv import RelGraphConv from .sageconv import SAGEConv from .sgconv import SGConv from .tagconv import TAGConv from .twirlsconv import TWIRLSConv, TWIRLSUnfoldingAndAttention __all__ = [ "GraphConv", "EdgeWeightNorm", "GATConv", "GATv2Conv", "EGATConv", "EdgeGATConv", "TAGConv", "RelGraphConv", "SAGEConv", "SGConv", "APPNPConv", "GINConv", "GINEConv", "GatedGraphConv", "GatedGCNConv", "GMMConv", "ChebConv", "AGNNConv", "NNConv", "DenseGraphConv", "DenseSAGEConv", "DenseChebConv", "EdgeConv", "AtomicConv", "CFConv", "DotGatConv", "TWIRLSConv", "TWIRLSUnfoldingAndAttention", "GCN2Conv", "HGTConv", "GroupRevRes", "EGNNConv", "PNAConv", "DGNConv", "CuGraphGATConv", "CuGraphRelGraphConv", "CuGraphSAGEConv", ] ================================================ FILE: python/dgl/nn/pytorch/conv/agnnconv.py ================================================ """Torch Module for Attention-based Graph Neural Network layer""" # pylint: disable= no-member, arguments-differ, invalid-name import torch as th from torch import nn from torch.nn import functional as F from .... import function as fn from ....base import DGLError from ....utils import expand_as_pair from ...functional import edge_softmax class AGNNConv(nn.Module): r"""Attention-based Graph Neural Network layer from `Attention-based Graph Neural Network for Semi-Supervised Learning `__ .. math:: H^{l+1} = P H^{l} where :math:`P` is computed as: .. math:: P_{ij} = \mathrm{softmax}_i ( \beta \cdot \cos(h_i^l, h_j^l)) where :math:`\beta` is a single scalar parameter. Parameters ---------- init_beta : float, optional The :math:`\beta` in the formula, a single scalar parameter. learn_beta : bool, optional If True, :math:`\beta` will be learnable parameter. allow_zero_in_degree : bool, optional If there are 0-in-degree nodes in the graph, output for those nodes will be invalid since no message will be passed to those nodes. This is harmful for some applications causing silent performance regression. This module will raise a DGLError if it detects 0-in-degree nodes in input graph. By setting ``True``, it will suppress the check and let the users handle it by themselves. Default: ``False``. Note ---- Zero in-degree nodes will lead to invalid output value. This is because no message will be passed to those nodes, the aggregation function will be appied on empty input. A common practice to avoid this is to add a self-loop for each node in the graph if it is homogeneous, which can be achieved by: >>> g = ... # a DGLGraph >>> g = dgl.add_self_loop(g) Calling ``add_self_loop`` will not work for some graphs, for example, heterogeneous graph since the edge type can not be decided for self_loop edges. Set ``allow_zero_in_degree`` to ``True`` for those cases to unblock the code and handle zero-in-degree nodes manually. A common practise to handle this is to filter out the nodes with zero-in-degree when use after conv. Example ------- >>> import dgl >>> import numpy as np >>> import torch as th >>> from dgl.nn import AGNNConv >>> >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> g = dgl.add_self_loop(g) >>> feat = th.ones(6, 10) >>> conv = AGNNConv() >>> res = conv(g, feat) >>> res tensor([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]], grad_fn=) """ def __init__( self, init_beta=1.0, learn_beta=True, allow_zero_in_degree=False ): super(AGNNConv, self).__init__() self._allow_zero_in_degree = allow_zero_in_degree if learn_beta: self.beta = nn.Parameter(th.Tensor([init_beta])) else: self.register_buffer("beta", th.Tensor([init_beta])) def set_allow_zero_in_degree(self, set_value): r""" Description ----------- Set allow_zero_in_degree flag. Parameters ---------- set_value : bool The value to be set to the flag. """ self._allow_zero_in_degree = set_value def forward(self, graph, feat): r""" Description ----------- Compute AGNN layer. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor The input feature of shape :math:`(N, *)` :math:`N` is the number of nodes, and :math:`*` could be of any shape. If a pair of torch.Tensor is given, the pair must contain two tensors of shape :math:`(N_{in}, *)` and :math:`(N_{out}, *)`, the :math:`*` in the later tensor must equal the previous one. Returns ------- torch.Tensor The output feature of shape :math:`(N, *)` where :math:`*` should be the same as input shape. Raises ------ DGLError If there are 0-in-degree nodes in the input graph, it will raise DGLError since no message will be passed to those nodes. This will cause invalid output. The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``. """ with graph.local_scope(): if not self._allow_zero_in_degree: if (graph.in_degrees() == 0).any(): raise DGLError( "There are 0-in-degree nodes in the graph, " "output for those nodes will be invalid. " "This is harmful for some applications, " "causing silent performance regression. " "Adding self-loop on the input graph by " "calling `g = dgl.add_self_loop(g)` will resolve " "the issue. Setting ``allow_zero_in_degree`` " "to be `True` when constructing this module will " "suppress the check and let the code run." ) feat_src, feat_dst = expand_as_pair(feat, graph) graph.srcdata["h"] = feat_src graph.srcdata["norm_h"] = F.normalize(feat_src, p=2, dim=-1) if isinstance(feat, tuple) or graph.is_block: graph.dstdata["norm_h"] = F.normalize(feat_dst, p=2, dim=-1) # compute cosine distance graph.apply_edges(fn.u_dot_v("norm_h", "norm_h", "cos")) cos = graph.edata.pop("cos") e = self.beta * cos graph.edata["p"] = edge_softmax(graph, e) graph.update_all(fn.u_mul_e("h", "p", "m"), fn.sum("m", "h")) return graph.dstdata.pop("h") ================================================ FILE: python/dgl/nn/pytorch/conv/appnpconv.py ================================================ """Torch Module for APPNPConv""" # pylint: disable= no-member, arguments-differ, invalid-name import torch as th from torch import nn from .... import function as fn from .graphconv import EdgeWeightNorm class APPNPConv(nn.Module): r"""Approximate Personalized Propagation of Neural Predictions layer from `Predict then Propagate: Graph Neural Networks meet Personalized PageRank `__ .. math:: H^{0} &= X H^{l+1} &= (1-\alpha)\left(\tilde{D}^{-1/2} \tilde{A} \tilde{D}^{-1/2} H^{l}\right) + \alpha H^{0} where :math:`\tilde{A}` is :math:`A` + :math:`I`. Parameters ---------- k : int The number of iterations :math:`K`. alpha : float The teleport probability :math:`\alpha`. edge_drop : float, optional The dropout rate on edges that controls the messages received by each node. Default: ``0``. Example ------- >>> import dgl >>> import numpy as np >>> import torch as th >>> from dgl.nn import APPNPConv >>> >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> feat = th.ones(6, 10) >>> conv = APPNPConv(k=3, alpha=0.5) >>> res = conv(g, feat) >>> print(res) tensor([[0.8536, 0.8536, 0.8536, 0.8536, 0.8536, 0.8536, 0.8536, 0.8536, 0.8536, 0.8536], [0.9268, 0.9268, 0.9268, 0.9268, 0.9268, 0.9268, 0.9268, 0.9268, 0.9268, 0.9268], [0.9634, 0.9634, 0.9634, 0.9634, 0.9634, 0.9634, 0.9634, 0.9634, 0.9634, 0.9634], [0.9268, 0.9268, 0.9268, 0.9268, 0.9268, 0.9268, 0.9268, 0.9268, 0.9268, 0.9268], [0.9634, 0.9634, 0.9634, 0.9634, 0.9634, 0.9634, 0.9634, 0.9634, 0.9634, 0.9634], [0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000]]) """ def __init__(self, k, alpha, edge_drop=0.0): super(APPNPConv, self).__init__() self._k = k self._alpha = alpha self.edge_drop = nn.Dropout(edge_drop) def forward(self, graph, feat, edge_weight=None): r""" Description ----------- Compute APPNP layer. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor The input feature of shape :math:`(N, *)`. :math:`N` is the number of nodes, and :math:`*` could be of any shape. edge_weight: torch.Tensor, optional edge_weight to use in the message passing process. This is equivalent to using weighted adjacency matrix in the equation above, and :math:`\tilde{D}^{-1/2}\tilde{A} \tilde{D}^{-1/2}` is based on :class:`dgl.nn.pytorch.conv.graphconv.EdgeWeightNorm`. Returns ------- torch.Tensor The output feature of shape :math:`(N, *)` where :math:`*` should be the same as input shape. """ with graph.local_scope(): if edge_weight is None: src_norm = th.pow( graph.out_degrees().to(feat).clamp(min=1), -0.5 ) shp = src_norm.shape + (1,) * (feat.dim() - 1) src_norm = th.reshape(src_norm, shp).to(feat.device) dst_norm = th.pow( graph.in_degrees().to(feat).clamp(min=1), -0.5 ) shp = dst_norm.shape + (1,) * (feat.dim() - 1) dst_norm = th.reshape(dst_norm, shp).to(feat.device) else: edge_weight = EdgeWeightNorm("both")(graph, edge_weight) feat_0 = feat for _ in range(self._k): # normalization by src node if edge_weight is None: feat = feat * src_norm graph.ndata["h"] = feat w = ( th.ones(graph.num_edges(), 1) if edge_weight is None else edge_weight ) graph.edata["w"] = self.edge_drop(w).to(feat.device) graph.update_all(fn.u_mul_e("h", "w", "m"), fn.sum("m", "h")) feat = graph.ndata.pop("h") # normalization by dst node if edge_weight is None: feat = feat * dst_norm feat = (1 - self._alpha) * feat + self._alpha * feat_0 return feat ================================================ FILE: python/dgl/nn/pytorch/conv/atomicconv.py ================================================ """Torch Module for Atomic Convolution Layer""" # pylint: disable= no-member, arguments-differ, invalid-name import numpy as np import torch as th import torch.nn as nn class RadialPooling(nn.Module): r"""Radial pooling from `Atomic Convolutional Networks for Predicting Protein-Ligand Binding Affinity `__ We denote the distance between atom :math:`i` and :math:`j` by :math:`r_{ij}`. A radial pooling layer transforms distances with radial filters. For radial filter indexed by :math:`k`, it projects edge distances with .. math:: h_{ij}^{k} = \exp(-\gamma_{k}|r_{ij}-r_{k}|^2) If :math:`r_{ij} < c_k`, .. math:: f_{ij}^{k} = 0.5 * \cos(\frac{\pi r_{ij}}{c_k} + 1), else, .. math:: f_{ij}^{k} = 0. Finally, .. math:: e_{ij}^{k} = h_{ij}^{k} * f_{ij}^{k} Parameters ---------- interaction_cutoffs : float32 tensor of shape (K) :math:`c_k` in the equations above. Roughly they can be considered as learnable cutoffs and two atoms are considered as connected if the distance between them is smaller than the cutoffs. K for the number of radial filters. rbf_kernel_means : float32 tensor of shape (K) :math:`r_k` in the equations above. K for the number of radial filters. rbf_kernel_scaling : float32 tensor of shape (K) :math:`\gamma_k` in the equations above. K for the number of radial filters. """ def __init__( self, interaction_cutoffs, rbf_kernel_means, rbf_kernel_scaling ): super(RadialPooling, self).__init__() self.interaction_cutoffs = nn.Parameter( interaction_cutoffs.reshape(-1, 1, 1), requires_grad=True ) self.rbf_kernel_means = nn.Parameter( rbf_kernel_means.reshape(-1, 1, 1), requires_grad=True ) self.rbf_kernel_scaling = nn.Parameter( rbf_kernel_scaling.reshape(-1, 1, 1), requires_grad=True ) def forward(self, distances): """ Description ----------- Apply the layer to transform edge distances. Parameters ---------- distances : Float32 tensor of shape (E, 1) Distance between end nodes of edges. E for the number of edges. Returns ------- Float32 tensor of shape (K, E, 1) Transformed edge distances. K for the number of radial filters. """ scaled_euclidean_distance = ( -self.rbf_kernel_scaling * (distances - self.rbf_kernel_means) ** 2 ) # (K, E, 1) rbf_kernel_results = th.exp(scaled_euclidean_distance) # (K, E, 1) cos_values = 0.5 * ( th.cos(np.pi * distances / self.interaction_cutoffs) + 1 ) # (K, E, 1) cutoff_values = th.where( distances <= self.interaction_cutoffs, cos_values, th.zeros_like(cos_values), ) # (K, E, 1) # Note that there appears to be an inconsistency between the paper and # DeepChem's implementation. In the paper, the scaled_euclidean_distance first # gets multiplied by cutoff_values, followed by exponentiation. Here we follow # the practice of DeepChem. return rbf_kernel_results * cutoff_values def msg_func(edges): """ Description ----------- Send messages along edges. Parameters ---------- edges : EdgeBatch A batch of edges. Returns ------- dict mapping 'm' to Float32 tensor of shape (E, K * T) Messages computed. E for the number of edges, K for the number of radial filters and T for the number of features to use (types of atomic number in the paper). """ return { "m": th.einsum("ij,ik->ijk", edges.src["hv"], edges.data["he"]).view( len(edges), -1 ) } def reduce_func(nodes): """ Description ----------- Collect messages and update node representations. Parameters ---------- nodes : NodeBatch A batch of nodes. Returns ------- dict mapping 'hv_new' to Float32 tensor of shape (V, K * T) Updated node representations. V for the number of nodes, K for the number of radial filters and T for the number of features to use (types of atomic number in the paper). """ return {"hv_new": nodes.mailbox["m"].sum(1)} class AtomicConv(nn.Module): r"""Atomic Convolution Layer from `Atomic Convolutional Networks for Predicting Protein-Ligand Binding Affinity `__ Denoting the type of atom :math:`i` by :math:`z_i` and the distance between atom :math:`i` and :math:`j` by :math:`r_{ij}`. **Distance Transformation** An atomic convolution layer first transforms distances with radial filters and then perform a pooling operation. For radial filter indexed by :math:`k`, it projects edge distances with .. math:: h_{ij}^{k} = \exp(-\gamma_{k}|r_{ij}-r_{k}|^2) If :math:`r_{ij} < c_k`, .. math:: f_{ij}^{k} = 0.5 * \cos(\frac{\pi r_{ij}}{c_k} + 1), else, .. math:: f_{ij}^{k} = 0. Finally, .. math:: e_{ij}^{k} = h_{ij}^{k} * f_{ij}^{k} **Aggregation** For each type :math:`t`, each atom collects distance information from all neighbor atoms of type :math:`t`: .. math:: p_{i, t}^{k} = \sum_{j\in N(i)} e_{ij}^{k} * 1(z_j == t) Then concatenate the results for all RBF kernels and atom types. Parameters ---------- interaction_cutoffs : float32 tensor of shape (K) :math:`c_k` in the equations above. Roughly they can be considered as learnable cutoffs and two atoms are considered as connected if the distance between them is smaller than the cutoffs. K for the number of radial filters. rbf_kernel_means : float32 tensor of shape (K) :math:`r_k` in the equations above. K for the number of radial filters. rbf_kernel_scaling : float32 tensor of shape (K) :math:`\gamma_k` in the equations above. K for the number of radial filters. features_to_use : None or float tensor of shape (T) In the original paper, these are atomic numbers to consider, representing the types of atoms. T for the number of types of atomic numbers. Default to None. Note ---- * This convolution operation is designed for molecular graphs in Chemistry, but it might be possible to extend it to more general graphs. * There seems to be an inconsistency about the definition of :math:`e_{ij}^{k}` in the paper and the author's implementation. We follow the author's implementation. In the paper, :math:`e_{ij}^{k}` was defined as :math:`\exp(-\gamma_{k}|r_{ij}-r_{k}|^2 * f_{ij}^{k})`. * :math:`\gamma_{k}`, :math:`r_k` and :math:`c_k` are all learnable. Example ------- >>> import dgl >>> import numpy as np >>> import torch as th >>> from dgl.nn import AtomicConv >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> feat = th.ones(6, 1) >>> edist = th.ones(6, 1) >>> interaction_cutoffs = th.ones(3).float() * 2 >>> rbf_kernel_means = th.ones(3).float() >>> rbf_kernel_scaling = th.ones(3).float() >>> conv = AtomicConv(interaction_cutoffs, rbf_kernel_means, rbf_kernel_scaling) >>> res = conv(g, feat, edist) >>> res tensor([[0.5000, 0.5000, 0.5000], [0.5000, 0.5000, 0.5000], [0.5000, 0.5000, 0.5000], [1.0000, 1.0000, 1.0000], [0.5000, 0.5000, 0.5000], [0.0000, 0.0000, 0.0000]], grad_fn=) """ def __init__( self, interaction_cutoffs, rbf_kernel_means, rbf_kernel_scaling, features_to_use=None, ): super(AtomicConv, self).__init__() self.radial_pooling = RadialPooling( interaction_cutoffs=interaction_cutoffs, rbf_kernel_means=rbf_kernel_means, rbf_kernel_scaling=rbf_kernel_scaling, ) if features_to_use is None: self.num_channels = 1 self.features_to_use = None else: self.num_channels = len(features_to_use) self.features_to_use = nn.Parameter( features_to_use, requires_grad=False ) def forward(self, graph, feat, distances): """ Description ----------- Apply the atomic convolution layer. Parameters ---------- graph : DGLGraph Topology based on which message passing is performed. feat : Float32 tensor of shape :math:`(V, 1)` Initial node features, which are atomic numbers in the paper. :math:`V` for the number of nodes. distances : Float32 tensor of shape :math:`(E, 1)` Distance between end nodes of edges. E for the number of edges. Returns ------- Float32 tensor of shape :math:`(V, K * T)` Updated node representations. :math:`V` for the number of nodes, :math:`K` for the number of radial filters, and :math:`T` for the number of types of atomic numbers. """ with graph.local_scope(): radial_pooled_values = self.radial_pooling(distances).to( feat ) # (K, E, 1) if self.features_to_use is not None: feat = (feat == self.features_to_use).to(feat) # (V, T) graph.ndata["hv"] = feat graph.edata["he"] = radial_pooled_values.transpose(1, 0).squeeze( -1 ) # (E, K) graph.update_all(msg_func, reduce_func) return graph.ndata["hv_new"].view( graph.num_nodes(), -1 ) # (V, K * T) ================================================ FILE: python/dgl/nn/pytorch/conv/cfconv.py ================================================ """Torch modules for interaction blocks in SchNet""" # pylint: disable= no-member, arguments-differ, invalid-name import numpy as np import torch.nn as nn from .... import function as fn class ShiftedSoftplus(nn.Module): r"""Applies the element-wise function: .. math:: \text{SSP}(x) = \frac{1}{\beta} * \log(1 + \exp(\beta * x)) - \log(\text{shift}) Attributes ---------- beta : int :math:`\beta` value for the mathematical formulation. Default to 1. shift : int :math:`\text{shift}` value for the mathematical formulation. Default to 2. """ def __init__(self, beta=1, shift=2, threshold=20): super(ShiftedSoftplus, self).__init__() self.shift = shift self.softplus = nn.Softplus(beta=beta, threshold=threshold) def forward(self, inputs): """ Description ----------- Applies the activation function. Parameters ---------- inputs : float32 tensor of shape (N, *) * denotes any number of additional dimensions. Returns ------- float32 tensor of shape (N, *) Result of applying the activation function to the input. """ return self.softplus(inputs) - np.log(float(self.shift)) class CFConv(nn.Module): r"""CFConv from `SchNet: A continuous-filter convolutional neural network for modeling quantum interactions `__ It combines node and edge features in message passing and updates node representations. .. math:: h_i^{(l+1)} = \sum_{j\in \mathcal{N}(i)} h_j^{l} \circ W^{(l)}e_ij where :math:`\circ` represents element-wise multiplication and for :math:`\text{SPP}` : .. math:: \text{SSP}(x) = \frac{1}{\beta} * \log(1 + \exp(\beta * x)) - \log(\text{shift}) Parameters ---------- node_in_feats : int Size for the input node features :math:`h_j^{(l)}`. edge_in_feats : int Size for the input edge features :math:`e_ij`. hidden_feats : int Size for the hidden representations. out_feats : int Size for the output representations :math:`h_j^{(l+1)}`. Example ------- >>> import dgl >>> import numpy as np >>> import torch as th >>> from dgl.nn import CFConv >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> nfeat = th.ones(6, 10) >>> efeat = th.ones(6, 5) >>> conv = CFConv(10, 5, 3, 2) >>> res = conv(g, nfeat, efeat) >>> res tensor([[-0.1209, -0.2289], [-0.1209, -0.2289], [-0.1209, -0.2289], [-0.1135, -0.2338], [-0.1209, -0.2289], [-0.1283, -0.2240]], grad_fn=) """ def __init__(self, node_in_feats, edge_in_feats, hidden_feats, out_feats): super(CFConv, self).__init__() self.project_edge = nn.Sequential( nn.Linear(edge_in_feats, hidden_feats), ShiftedSoftplus(), nn.Linear(hidden_feats, hidden_feats), ShiftedSoftplus(), ) self.project_node = nn.Linear(node_in_feats, hidden_feats) self.project_out = nn.Sequential( nn.Linear(hidden_feats, out_feats), ShiftedSoftplus() ) def forward(self, g, node_feats, edge_feats): """ Description ----------- Performs message passing and updates node representations. Parameters ---------- g : DGLGraph The graph. node_feats : torch.Tensor or pair of torch.Tensor The input node features. If a torch.Tensor is given, it represents the input node feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of torch.Tensor is given, which is the case for bipartite graph, the pair must contain two tensors of shape :math:`(N_{src}, D_{in_{src}})` and :math:`(N_{dst}, D_{in_{dst}})` separately for the source and destination nodes. edge_feats : torch.Tensor The input edge feature of shape :math:`(E, edge_in_feats)` where :math:`E` is the number of edges. Returns ------- torch.Tensor The output node feature of shape :math:`(N_{out}, out_feats)` where :math:`N_{out}` is the number of destination nodes. """ with g.local_scope(): if isinstance(node_feats, tuple): node_feats_src, _ = node_feats else: node_feats_src = node_feats g.srcdata["hv"] = self.project_node(node_feats_src) g.edata["he"] = self.project_edge(edge_feats) g.update_all(fn.u_mul_e("hv", "he", "m"), fn.sum("m", "h")) return self.project_out(g.dstdata["h"]) ================================================ FILE: python/dgl/nn/pytorch/conv/chebconv.py ================================================ """Torch Module for Chebyshev Spectral Graph Convolution layer""" # pylint: disable= no-member, arguments-differ, invalid-name import torch as th import torch.nn.functional as F from torch import nn from .... import broadcast_nodes, function as fn from ....base import dgl_warning class ChebConv(nn.Module): r"""Chebyshev Spectral Graph Convolution layer from `Convolutional Neural Networks on Graphs with Fast Localized Spectral Filtering `__ .. math:: h_i^{l+1} &= \sum_{k=0}^{K-1} W^{k, l}z_i^{k, l} Z^{0, l} &= H^{l} Z^{1, l} &= \tilde{L} \cdot H^{l} Z^{k, l} &= 2 \cdot \tilde{L} \cdot Z^{k-1, l} - Z^{k-2, l} \tilde{L} &= 2\left(I - \tilde{D}^{-1/2} \tilde{A} \tilde{D}^{-1/2}\right)/\lambda_{max} - I where :math:`\tilde{A}` is :math:`A` + :math:`I`, :math:`W` is learnable weight. Parameters ---------- in_feats: int Dimension of input features; i.e, the number of dimensions of :math:`h_i^{(l)}`. out_feats: int Dimension of output features :math:`h_i^{(l+1)}`. k : int Chebyshev filter size :math:`K`. activation : function, optional Activation function. Default ``ReLu``. bias : bool, optional If True, adds a learnable bias to the output. Default: ``True``. Example ------- >>> import dgl >>> import numpy as np >>> import torch as th >>> from dgl.nn import ChebConv >> >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> feat = th.ones(6, 10) >>> conv = ChebConv(10, 2, 2) >>> res = conv(g, feat) >>> res tensor([[ 0.6163, -0.1809], [ 0.6163, -0.1809], [ 0.6163, -0.1809], [ 0.9698, -1.5053], [ 0.3664, 0.7556], [-0.2370, 3.0164]], grad_fn=) """ def __init__(self, in_feats, out_feats, k, activation=F.relu, bias=True): super(ChebConv, self).__init__() self._k = k self._in_feats = in_feats self._out_feats = out_feats self.activation = activation self.linear = nn.Linear(k * in_feats, out_feats, bias) def forward(self, graph, feat, lambda_max=None): r"""Compute ChebNet layer. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor The input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. lambda_max : list or tensor or None, optional. A list(tensor) with length :math:`B`, stores the largest eigenvalue of the normalized laplacian of each individual graph in ``graph``, where :math:`B` is the batch size of the input graph. Default: None. If None, this method would set the default value to 2. One can use :func:`dgl.laplacian_lambda_max` to compute this value. Returns ------- torch.Tensor The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is size of output feature. """ def unnLaplacian(feat, D_invsqrt, graph): """Operation Feat * D^-1/2 A D^-1/2""" graph.ndata["h"] = feat * D_invsqrt graph.update_all(fn.copy_u("h", "m"), fn.sum("m", "h")) return graph.ndata.pop("h") * D_invsqrt with graph.local_scope(): D_invsqrt = th.pow( graph.in_degrees().to(feat).clamp(min=1), -0.5 ).unsqueeze(-1) if lambda_max is None: dgl_warning( "lambda_max is not provided, using default value of 2. " "Please use dgl.laplacian_lambda_max to compute the eigenvalues." ) lambda_max = [2] * graph.batch_size if isinstance(lambda_max, list): lambda_max = th.Tensor(lambda_max).to(feat) if lambda_max.dim() == 1: lambda_max = lambda_max.unsqueeze(-1) # (B,) to (B, 1) # broadcast from (B, 1) to (N, 1) lambda_max = broadcast_nodes(graph, lambda_max) re_norm = 2.0 / lambda_max # X_0 is the raw feature, Xt is the list of X_0, X_1, ... X_t X_0 = feat Xt = [X_0] # X_1(f) if self._k > 1: h = unnLaplacian(X_0, D_invsqrt, graph) X_1 = -re_norm * h + X_0 * (re_norm - 1) # Append X_1 to Xt Xt.append(X_1) # Xi(x), i = 2...k for _ in range(2, self._k): h = unnLaplacian(X_1, D_invsqrt, graph) X_i = -2 * re_norm * h + X_1 * 2 * (re_norm - 1) - X_0 # Add X_1 to Xt Xt.append(X_i) X_1, X_0 = X_i, X_1 # Create the concatenation Xt = th.cat(Xt, dim=1) # linear projection h = self.linear(Xt) # activation if self.activation: h = self.activation(h) return h ================================================ FILE: python/dgl/nn/pytorch/conv/cugraph_base.py ================================================ """An abstract base class for cugraph-ops nn module.""" import torch from torch import nn class CuGraphBaseConv(nn.Module): r"""An abstract base class for cugraph-ops nn module.""" def __init__(self): super().__init__() self._cached_offsets_fg = None def reset_parameters(self): r"""Resets all learnable parameters of the module.""" raise NotImplementedError def forward(self, *args): r"""Runs the forward pass of the module.""" raise NotImplementedError def pad_offsets(self, offsets: torch.Tensor, size: int) -> torch.Tensor: r"""Pad zero-in-degree nodes to the end of offsets to reach size. cugraph-ops often provides two variants of aggregation functions for a specific model: one intended for sampled-graph use cases, one for full-graph ones. The former is in general more performant, however, it only works when the sample size (the max of in-degrees) is small (<200), due to the limit of GPU shared memory. For graphs with a larger max in-degree, we need to fall back to the full-graph option, which requires to convert a DGL block to a full graph. With the csc-representation, this is equivalent to pad zero-in-degree nodes to the end of the offsets array (also called indptr or colptr). Parameters ---------- offsets : The (monotonically increasing) index pointer array in a CSC-format graph. size : int The length of offsets after padding. Returns ------- torch.Tensor The augmented offsets array. """ if self._cached_offsets_fg is None: self._cached_offsets_fg = torch.empty( size, dtype=offsets.dtype, device=offsets.device ) elif self._cached_offsets_fg.numel() < size: self._cached_offsets_fg.resize_(size) self._cached_offsets_fg[: offsets.numel()] = offsets self._cached_offsets_fg[offsets.numel() : size] = offsets[-1] return self._cached_offsets_fg[:size] ================================================ FILE: python/dgl/nn/pytorch/conv/cugraph_gatconv.py ================================================ """Torch Module for graph attention network layer using the aggregation primitives in cugraph-ops""" # pylint: disable=no-member, arguments-differ, invalid-name, too-many-arguments import torch from torch import nn from .cugraph_base import CuGraphBaseConv try: from pylibcugraphops.pytorch import SampledCSC, StaticCSC from pylibcugraphops.pytorch.operators import mha_gat_n2n as GATConvAgg HAS_PYLIBCUGRAPHOPS = True except ImportError: HAS_PYLIBCUGRAPHOPS = False class CuGraphGATConv(CuGraphBaseConv): r"""Graph attention layer from `Graph Attention Networks `__, with the sparse aggregation accelerated by cugraph-ops. See :class:`dgl.nn.pytorch.conv.GATConv` for mathematical model. This module depends on :code:`pylibcugraphops` package, which can be installed via :code:`conda install -c nvidia pylibcugraphops=23.04`. :code:`pylibcugraphops` 23.04 requires python 3.8.x or 3.10.x. .. note:: This is an **experimental** feature. Parameters ---------- in_feats : int Input feature size. out_feats : int Output feature size. num_heads : int Number of heads in Multi-Head Attention. feat_drop : float, optional Dropout rate on feature. Defaults: ``0``. negative_slope : float, optional LeakyReLU angle of negative slope. Defaults: ``0.2``. residual : bool, optional If True, use residual connection. Defaults: ``False``. activation : callable activation function/layer or None, optional. If not None, applies an activation function to the updated node features. Default: ``None``. bias : bool, optional If True, learns a bias term. Defaults: ``True``. Examples -------- >>> import dgl >>> import torch >>> from dgl.nn import CuGraphGATConv >>> device = 'cuda' >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])).to(device) >>> g = dgl.add_self_loop(g) >>> feat = torch.ones(6, 10).to(device) >>> conv = CuGraphGATConv(10, 2, num_heads=3).to(device) >>> res = conv(g, feat) >>> res tensor([[[ 0.2340, 1.9226], [ 1.6477, -1.9986], [ 1.1138, -1.9302]], [[ 0.2340, 1.9226], [ 1.6477, -1.9986], [ 1.1138, -1.9302]], [[ 0.2340, 1.9226], [ 1.6477, -1.9986], [ 1.1138, -1.9302]], [[ 0.2340, 1.9226], [ 1.6477, -1.9986], [ 1.1138, -1.9302]], [[ 0.2340, 1.9226], [ 1.6477, -1.9986], [ 1.1138, -1.9302]], [[ 0.2340, 1.9226], [ 1.6477, -1.9986], [ 1.1138, -1.9302]]], device='cuda:0', grad_fn=) """ MAX_IN_DEGREE_MFG = 200 def __init__( self, in_feats, out_feats, num_heads, feat_drop=0.0, negative_slope=0.2, residual=False, activation=None, bias=True, ): if HAS_PYLIBCUGRAPHOPS is False: raise ModuleNotFoundError( f"{self.__class__.__name__} requires pylibcugraphops=23.04. " f"Install via `conda install -c nvidia 'pylibcugraphops=23.04'`." f"pylibcugraphops requires Python 3.8 or 3.10." ) super().__init__() self.in_feats = in_feats self.out_feats = out_feats self.num_heads = num_heads self.feat_drop = nn.Dropout(feat_drop) self.negative_slope = negative_slope self.activation = activation self.fc = nn.Linear(in_feats, out_feats * num_heads, bias=False) self.attn_weights = nn.Parameter( torch.Tensor(2 * num_heads * out_feats) ) if bias: self.bias = nn.Parameter(torch.Tensor(num_heads * out_feats)) else: self.register_buffer("bias", None) if residual: if in_feats == out_feats * num_heads: self.res_fc = nn.Identity() else: self.res_fc = nn.Linear( in_feats, out_feats * num_heads, bias=False ) else: self.register_buffer("res_fc", None) self.reset_parameters() def reset_parameters(self): r"""Reinitialize learnable parameters.""" gain = nn.init.calculate_gain("relu") nn.init.xavier_normal_(self.fc.weight, gain=gain) nn.init.xavier_normal_( self.attn_weights.view(2, self.num_heads, self.out_feats), gain=gain ) if self.bias is not None: nn.init.zeros_(self.bias) if isinstance(self.res_fc, nn.Linear): self.res_fc.reset_parameters() def forward(self, g, feat, max_in_degree=None): r"""Forward computation. Parameters ---------- g : DGLGraph The graph. feat : torch.Tensor Input features of shape :math:`(N, D_{in})`. max_in_degree : int Maximum in-degree of destination nodes. It is only effective when :attr:`g` is a :class:`DGLBlock`, i.e., bipartite graph. When :attr:`g` is generated from a neighbor sampler, the value should be set to the corresponding :attr:`fanout`. If not given, :attr:`max_in_degree` will be calculated on-the-fly. Returns ------- torch.Tensor The output feature of shape :math:`(N, H, D_{out})` where :math:`H` is the number of heads, and :math:`D_{out}` is size of output feature. """ offsets, indices, _ = g.adj_tensors("csc") if g.is_block: if max_in_degree is None: max_in_degree = g.in_degrees().max().item() if max_in_degree < self.MAX_IN_DEGREE_MFG: _graph = SampledCSC( offsets, indices, max_in_degree, g.num_src_nodes(), ) else: offsets_fg = self.pad_offsets(offsets, g.num_src_nodes() + 1) _graph = StaticCSC(offsets_fg, indices) else: _graph = StaticCSC(offsets, indices) feat = self.feat_drop(feat) feat_transformed = self.fc(feat) out = GATConvAgg( feat_transformed, self.attn_weights, _graph, self.num_heads, "LeakyReLU", self.negative_slope, concat_heads=True, )[: g.num_dst_nodes()].view(-1, self.num_heads, self.out_feats) feat_dst = feat[: g.num_dst_nodes()] if self.res_fc is not None: out = out + self.res_fc(feat_dst).view( -1, self.num_heads, self.out_feats ) if self.bias is not None: out = out + self.bias.view(-1, self.num_heads, self.out_feats) if self.activation is not None: out = self.activation(out) return out ================================================ FILE: python/dgl/nn/pytorch/conv/cugraph_relgraphconv.py ================================================ """Torch Module for Relational graph convolution layer using the aggregation primitives in cugraph-ops""" # pylint: disable=no-member, arguments-differ, invalid-name, too-many-arguments import math import torch from torch import nn from .cugraph_base import CuGraphBaseConv try: from pylibcugraphops.pytorch import HeteroCSC from pylibcugraphops.pytorch.operators import ( agg_hg_basis_n2n_post as RelGraphConvAgg, ) HAS_PYLIBCUGRAPHOPS = True except ImportError: HAS_PYLIBCUGRAPHOPS = False class CuGraphRelGraphConv(CuGraphBaseConv): r"""An accelerated relational graph convolution layer from `Modeling Relational Data with Graph Convolutional Networks `__ that leverages the highly-optimized aggregation primitives in cugraph-ops. See :class:`dgl.nn.pytorch.conv.RelGraphConv` for mathematical model. This module depends on :code:`pylibcugraphops` package, which can be installed via :code:`conda install -c nvidia pylibcugraphops=23.04`. :code:`pylibcugraphops` 23.04 requires python 3.8.x or 3.10.x. .. note:: This is an **experimental** feature. Parameters ---------- in_feat : int Input feature size. out_feat : int Output feature size. num_rels : int Number of relations. regularizer : str, optional Which weight regularizer to use ("basis" or ``None``): - "basis" is for basis-decomposition. - ``None`` applies no regularization. Default: ``None``. num_bases : int, optional Number of bases. It comes into effect when a regularizer is applied. Default: ``None``. bias : bool, optional True if bias is added. Default: ``True``. self_loop : bool, optional True to include self loop message. Default: ``True``. dropout : float, optional Dropout rate. Default: ``0.0``. apply_norm : bool, optional True to normalize aggregation output by the in-degree of the destination node per edge type, i.e. :math:`|\mathcal{N}^r_i|`. Default: ``True``. Examples -------- >>> import dgl >>> import torch >>> from dgl.nn import CuGraphRelGraphConv ... >>> device = 'cuda' >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])).to(device) >>> feat = torch.ones(6, 10).to(device) >>> conv = CuGraphRelGraphConv( ... 10, 2, 3, regularizer='basis', num_bases=2).to(device) >>> etype = torch.tensor([0,1,2,0,1,2]).to(device) >>> res = conv(g, feat, etype) >>> res tensor([[-1.7774, -2.0184], [-1.4335, -2.3758], [-1.7774, -2.0184], [-0.4698, -3.0876], [-1.4335, -2.3758], [-1.4331, -2.3295]], device='cuda:0', grad_fn=) """ MAX_IN_DEGREE_MFG = 500 def __init__( self, in_feat, out_feat, num_rels, regularizer=None, num_bases=None, bias=True, self_loop=True, dropout=0.0, apply_norm=False, ): if HAS_PYLIBCUGRAPHOPS is False: raise ModuleNotFoundError( f"{self.__class__.__name__} requires pylibcugraphops=23.04. " f"Install via `conda install -c nvidia 'pylibcugraphops=23.04'`." f"pylibcugraphops requires Python 3.8 or 3.10." ) super().__init__() self.in_feat = in_feat self.out_feat = out_feat self.num_rels = num_rels self.apply_norm = apply_norm self.dropout = nn.Dropout(dropout) dim_self_loop = 1 if self_loop else 0 self.self_loop = self_loop if regularizer is None: self.W = nn.Parameter( torch.Tensor(num_rels + dim_self_loop, in_feat, out_feat) ) self.coeff = None elif regularizer == "basis": if num_bases is None: raise ValueError( 'Missing "num_bases" for basis regularization.' ) self.W = nn.Parameter( torch.Tensor(num_bases + dim_self_loop, in_feat, out_feat) ) self.coeff = nn.Parameter(torch.Tensor(num_rels, num_bases)) self.num_bases = num_bases else: raise ValueError( f"Supported regularizer options: 'basis' or None, but got " f"'{regularizer}'." ) self.regularizer = regularizer if bias: self.bias = nn.Parameter(torch.Tensor(out_feat)) else: self.register_parameter("bias", None) self.reset_parameters() def reset_parameters(self): r"""Reinitialize learnable parameters.""" bound = 1 / math.sqrt(self.in_feat) end = -1 if self.self_loop else None nn.init.uniform_(self.W[:end], -bound, bound) if self.regularizer == "basis": nn.init.xavier_uniform_( self.coeff, gain=nn.init.calculate_gain("relu") ) if self.self_loop: nn.init.xavier_uniform_(self.W[-1], nn.init.calculate_gain("relu")) if self.bias is not None: nn.init.zeros_(self.bias) def forward(self, g, feat, etypes, max_in_degree=None): r"""Forward computation. Parameters ---------- g : DGLGraph The graph. feat : torch.Tensor A 2D tensor of node features. Shape: :math:`(|V|, D_{in})`. etypes : torch.Tensor A 1D integer tensor of edge types. Shape: :math:`(|E|,)`. Note that cugraph-ops only accepts edge type tensors in int32, so any input of other integer types will be casted into int32, thus introducing some overhead. Pass in int32 tensors directly for best performance. max_in_degree : int, optional Maximum in-degree of destination nodes. It is only effective when :attr:`g` is a :class:`DGLBlock`, i.e., bipartite graph. When :attr:`g` is generated from a neighbor sampler, the value should be set to the corresponding :attr:`fanout`. If not given, :attr:`max_in_degree` will be calculated on-the-fly. Returns ------- torch.Tensor New node features. Shape: :math:`(|V|, D_{out})`. """ offsets, indices, edge_ids = g.adj_tensors("csc") edge_types_perm = etypes[edge_ids.long()].int() if g.is_block: if max_in_degree is None: max_in_degree = g.in_degrees().max().item() if max_in_degree < self.MAX_IN_DEGREE_MFG: _graph = HeteroCSC( offsets, indices, edge_types_perm, g.num_src_nodes(), self.num_rels, ) else: offsets_fg = self.pad_offsets(offsets, g.num_src_nodes() + 1) _graph = HeteroCSC( offsets_fg, indices, edge_types_perm, g.num_src_nodes(), self.num_rels, ) else: _graph = HeteroCSC( offsets, indices, edge_types_perm, g.num_src_nodes(), self.num_rels, ) h = RelGraphConvAgg( feat, self.coeff, _graph, concat_own=self.self_loop, norm_by_out_degree=self.apply_norm, )[: g.num_dst_nodes()] h = h @ self.W.view(-1, self.out_feat) if self.bias is not None: h = h + self.bias h = self.dropout(h) return h ================================================ FILE: python/dgl/nn/pytorch/conv/cugraph_sageconv.py ================================================ """Torch Module for GraphSAGE layer using the aggregation primitives in cugraph-ops""" # pylint: disable=no-member, arguments-differ, invalid-name, too-many-arguments from torch import nn from .cugraph_base import CuGraphBaseConv try: from pylibcugraphops.pytorch import SampledCSC, StaticCSC from pylibcugraphops.pytorch.operators import agg_concat_n2n as SAGEConvAgg HAS_PYLIBCUGRAPHOPS = True except ImportError: HAS_PYLIBCUGRAPHOPS = False class CuGraphSAGEConv(CuGraphBaseConv): r"""An accelerated GraphSAGE layer from `Inductive Representation Learning on Large Graphs `__ that leverages the highly-optimized aggregation primitives in cugraph-ops: .. math:: h_{\mathcal{N}(i)}^{(l+1)} &= \mathrm{aggregate} \left(\{h_{j}^{l}, \forall j \in \mathcal{N}(i) \}\right) h_{i}^{(l+1)} &= W \cdot \mathrm{concat} (h_{i}^{l}, h_{\mathcal{N}(i)}^{(l+1)}) This module depends on :code:`pylibcugraphops` package, which can be installed via :code:`conda install -c nvidia pylibcugraphops=23.04`. :code:`pylibcugraphops` 23.04 requires python 3.8.x or 3.10.x. .. note:: This is an **experimental** feature. Parameters ---------- in_feats : int Input feature size. out_feats : int Output feature size. aggregator_type : str Aggregator type to use (``mean``, ``sum``, ``min``, ``max``). feat_drop : float Dropout rate on features, default: ``0``. bias : bool If True, adds a learnable bias to the output. Default: ``True``. Examples -------- >>> import dgl >>> import torch >>> from dgl.nn import CuGraphSAGEConv >>> device = 'cuda' >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])).to(device) >>> g = dgl.add_self_loop(g) >>> feat = torch.ones(6, 10).to(device) >>> conv = CuGraphSAGEConv(10, 2, 'mean').to(device) >>> res = conv(g, feat) >>> res tensor([[-1.1690, 0.1952], [-1.1690, 0.1952], [-1.1690, 0.1952], [-1.1690, 0.1952], [-1.1690, 0.1952], [-1.1690, 0.1952]], device='cuda:0', grad_fn=) """ MAX_IN_DEGREE_MFG = 500 def __init__( self, in_feats, out_feats, aggregator_type="mean", feat_drop=0.0, bias=True, ): if HAS_PYLIBCUGRAPHOPS is False: raise ModuleNotFoundError( f"{self.__class__.__name__} requires pylibcugraphops=23.04. " f"Install via `conda install -c nvidia 'pylibcugraphops=23.04'`." f"pylibcugraphops requires Python 3.8 or 3.10." ) valid_aggr_types = {"max", "min", "mean", "sum"} if aggregator_type not in valid_aggr_types: raise ValueError( f"Invalid aggregator_type. Must be one of {valid_aggr_types}. " f"But got '{aggregator_type}' instead." ) super().__init__() self.in_feats = in_feats self.out_feats = out_feats self.aggr = aggregator_type self.feat_drop = nn.Dropout(feat_drop) self.linear = nn.Linear(2 * in_feats, out_feats, bias=bias) def reset_parameters(self): r"""Reinitialize learnable parameters.""" self.linear.reset_parameters() def forward(self, g, feat, max_in_degree=None): r"""Forward computation. Parameters ---------- g : DGLGraph The graph. feat : torch.Tensor Node features. Shape: :math:`(N, D_{in})`. max_in_degree : int Maximum in-degree of destination nodes. It is only effective when :attr:`g` is a :class:`DGLBlock`, i.e., bipartite graph. When :attr:`g` is generated from a neighbor sampler, the value should be set to the corresponding :attr:`fanout`. If not given, :attr:`max_in_degree` will be calculated on-the-fly. Returns ------- torch.Tensor Output node features. Shape: :math:`(N, D_{out})`. """ offsets, indices, _ = g.adj_tensors("csc") if g.is_block: if max_in_degree is None: max_in_degree = g.in_degrees().max().item() if max_in_degree < self.MAX_IN_DEGREE_MFG: _graph = SampledCSC( offsets, indices, max_in_degree, g.num_src_nodes(), ) else: offsets_fg = self.pad_offsets(offsets, g.num_src_nodes() + 1) _graph = StaticCSC(offsets_fg, indices) else: _graph = StaticCSC(offsets, indices) feat = self.feat_drop(feat) h = SAGEConvAgg(feat, _graph, self.aggr)[: g.num_dst_nodes()] h = self.linear(h) return h ================================================ FILE: python/dgl/nn/pytorch/conv/densechebconv.py ================================================ """Torch Module for DenseChebConv""" # pylint: disable= no-member, arguments-differ, invalid-name import torch as th from torch import nn from torch.nn import init class DenseChebConv(nn.Module): r"""Chebyshev Spectral Graph Convolution layer from `Convolutional Neural Networks on Graphs with Fast Localized Spectral Filtering `__ We recommend to use this module when applying ChebConv on dense graphs. Parameters ---------- in_feats: int Dimension of input features :math:`h_i^{(l)}`. out_feats: int Dimension of output features :math:`h_i^{(l+1)}`. k : int Chebyshev filter size. activation : function, optional Activation function, default is ReLu. bias : bool, optional If True, adds a learnable bias to the output. Default: ``True``. Example ------- >>> import dgl >>> import numpy as np >>> import torch as th >>> from dgl.nn import DenseChebConv >>> >>> feat = th.ones(6, 10) >>> adj = th.tensor([[0., 0., 1., 0., 0., 0.], ... [1., 0., 0., 0., 0., 0.], ... [0., 1., 0., 0., 0., 0.], ... [0., 0., 1., 0., 0., 1.], ... [0., 0., 0., 1., 0., 0.], ... [0., 0., 0., 0., 0., 0.]]) >>> conv = DenseChebConv(10, 2, 2) >>> res = conv(adj, feat) >>> res tensor([[-3.3516, -2.4797], [-3.3516, -2.4797], [-3.3516, -2.4797], [-4.5192, -3.0835], [-2.5259, -2.0527], [-0.5327, -1.0219]], grad_fn=) See also -------- `ChebConv `__ """ def __init__(self, in_feats, out_feats, k, bias=True): super(DenseChebConv, self).__init__() self._in_feats = in_feats self._out_feats = out_feats self._k = k self.W = nn.Parameter(th.Tensor(k, in_feats, out_feats)) if bias: self.bias = nn.Parameter(th.Tensor(out_feats)) else: self.register_buffer("bias", None) self.reset_parameters() def reset_parameters(self): """Reinitialize learnable parameters.""" if self.bias is not None: init.zeros_(self.bias) for i in range(self._k): init.xavier_normal_(self.W[i], init.calculate_gain("relu")) def forward(self, adj, feat, lambda_max=None): r"""Compute (Dense) Chebyshev Spectral Graph Convolution layer Parameters ---------- adj : torch.Tensor The adjacency matrix of the graph to apply Graph Convolution on, should be of shape :math:`(N, N)`, where a row represents the destination and a column represents the source. feat : torch.Tensor The input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. lambda_max : float or None, optional A float value indicates the largest eigenvalue of given graph. Default: None. Returns ------- torch.Tensor The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is size of output feature. """ A = adj.to(feat) num_nodes = A.shape[0] in_degree = 1 / A.sum(dim=1).clamp(min=1).sqrt() D_invsqrt = th.diag(in_degree) I = th.eye(num_nodes).to(A) L = I - D_invsqrt @ A @ D_invsqrt if lambda_max is None: lambda_ = th.eig(L)[0][:, 0] lambda_max = lambda_.max() L_hat = 2 * L / lambda_max - I Z = [th.eye(num_nodes).to(A)] for i in range(1, self._k): if i == 1: Z.append(L_hat) else: Z.append(2 * L_hat @ Z[-1] - Z[-2]) Zs = th.stack(Z, 0) # (k, n, n) Zh = Zs @ feat.unsqueeze(0) @ self.W Zh = Zh.sum(0) if self.bias is not None: Zh = Zh + self.bias return Zh ================================================ FILE: python/dgl/nn/pytorch/conv/densegraphconv.py ================================================ """Torch Module for DenseGraphConv""" # pylint: disable= no-member, arguments-differ, invalid-name import torch as th from torch import nn from torch.nn import init class DenseGraphConv(nn.Module): """Graph Convolutional layer from `Semi-Supervised Classification with Graph Convolutional Networks `__ We recommend user to use this module when applying graph convolution on dense graphs. Parameters ---------- in_feats : int Input feature size; i.e, the number of dimensions of :math:`h_j^{(l)}`. out_feats : int Output feature size; i.e., the number of dimensions of :math:`h_i^{(l+1)}`. norm : str, optional How to apply the normalizer. If is `'right'`, divide the aggregated messages by each node's in-degrees, which is equivalent to averaging the received messages. If is `'none'`, no normalization is applied. Default is `'both'`, where the :math:`c_{ij}` in the paper is applied. bias : bool, optional If True, adds a learnable bias to the output. Default: ``True``. activation : callable activation function/layer or None, optional If not None, applies an activation function to the updated node features. Default: ``None``. Notes ----- Zero in-degree nodes will lead to all-zero output. A common practice to avoid this is to add a self-loop for each node in the graph, which can be achieved by setting the diagonal of the adjacency matrix to be 1. Example ------- >>> import dgl >>> import numpy as np >>> import torch as th >>> from dgl.nn import DenseGraphConv >>> >>> feat = th.ones(6, 10) >>> adj = th.tensor([[0., 0., 1., 0., 0., 0.], ... [1., 0., 0., 0., 0., 0.], ... [0., 1., 0., 0., 0., 0.], ... [0., 0., 1., 0., 0., 1.], ... [0., 0., 0., 1., 0., 0.], ... [0., 0., 0., 0., 0., 0.]]) >>> conv = DenseGraphConv(10, 2) >>> res = conv(adj, feat) >>> res tensor([[0.2159, 1.9027], [0.3053, 2.6908], [0.3053, 2.6908], [0.3685, 3.2481], [0.3053, 2.6908], [0.0000, 0.0000]], grad_fn=) See also -------- `GraphConv `__ """ def __init__( self, in_feats, out_feats, norm="both", bias=True, activation=None ): super(DenseGraphConv, self).__init__() self._in_feats = in_feats self._out_feats = out_feats self._norm = norm self.weight = nn.Parameter(th.Tensor(in_feats, out_feats)) if bias: self.bias = nn.Parameter(th.Tensor(out_feats)) else: self.register_buffer("bias", None) self.reset_parameters() self._activation = activation def reset_parameters(self): """Reinitialize learnable parameters.""" init.xavier_uniform_(self.weight) if self.bias is not None: init.zeros_(self.bias) def forward(self, adj, feat): r"""Compute (Dense) Graph Convolution layer. Parameters ---------- adj : torch.Tensor The adjacency matrix of the graph to apply Graph Convolution on, when applied to a unidirectional bipartite graph, ``adj`` should be of shape should be of shape :math:`(N_{out}, N_{in})`; when applied to a homo graph, ``adj`` should be of shape :math:`(N, N)`. In both cases, a row represents a destination node while a column represents a source node. feat : torch.Tensor The input feature. Returns ------- torch.Tensor The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is size of output feature. """ adj = adj.to(feat) src_degrees = adj.sum(dim=0).clamp(min=1) dst_degrees = adj.sum(dim=1).clamp(min=1) feat_src = feat if self._norm == "both": norm_src = th.pow(src_degrees, -0.5) shp = norm_src.shape + (1,) * (feat.dim() - 1) norm_src = th.reshape(norm_src, shp).to(feat.device) feat_src = feat_src * norm_src if self._in_feats > self._out_feats: # mult W first to reduce the feature size for aggregation. feat_src = th.matmul(feat_src, self.weight) rst = adj @ feat_src else: # aggregate first then mult W rst = adj @ feat_src rst = th.matmul(rst, self.weight) if self._norm != "none": if self._norm == "both": norm_dst = th.pow(dst_degrees, -0.5) else: # right norm_dst = 1.0 / dst_degrees shp = norm_dst.shape + (1,) * (feat.dim() - 1) norm_dst = th.reshape(norm_dst, shp).to(feat.device) rst = rst * norm_dst if self.bias is not None: rst = rst + self.bias if self._activation is not None: rst = self._activation(rst) return rst ================================================ FILE: python/dgl/nn/pytorch/conv/densesageconv.py ================================================ """Torch Module for DenseSAGEConv""" # pylint: disable= no-member, arguments-differ, invalid-name from torch import nn from ....utils import check_eq_shape class DenseSAGEConv(nn.Module): """GraphSAGE layer from `Inductive Representation Learning on Large Graphs `__ We recommend to use this module when appying GraphSAGE on dense graphs. Note that we only support gcn aggregator in DenseSAGEConv. Parameters ---------- in_feats : int Input feature size; i.e, the number of dimensions of :math:`h_i^{(l)}`. out_feats : int Output feature size; i.e, the number of dimensions of :math:`h_i^{(l+1)}`. feat_drop : float, optional Dropout rate on features. Default: 0. bias : bool If True, adds a learnable bias to the output. Default: ``True``. norm : callable activation function/layer or None, optional If not None, applies normalization to the updated node features. activation : callable activation function/layer or None, optional If not None, applies an activation function to the updated node features. Default: ``None``. Example ------- >>> import dgl >>> import numpy as np >>> import torch as th >>> from dgl.nn import DenseSAGEConv >>> >>> feat = th.ones(6, 10) >>> adj = th.tensor([[0., 0., 1., 0., 0., 0.], ... [1., 0., 0., 0., 0., 0.], ... [0., 1., 0., 0., 0., 0.], ... [0., 0., 1., 0., 0., 1.], ... [0., 0., 0., 1., 0., 0.], ... [0., 0., 0., 0., 0., 0.]]) >>> conv = DenseSAGEConv(10, 2) >>> res = conv(adj, feat) >>> res tensor([[1.0401, 2.1008], [1.0401, 2.1008], [1.0401, 2.1008], [1.0401, 2.1008], [1.0401, 2.1008], [1.0401, 2.1008]], grad_fn=) See also -------- `SAGEConv `__ """ def __init__( self, in_feats, out_feats, feat_drop=0.0, bias=True, norm=None, activation=None, ): super(DenseSAGEConv, self).__init__() self._in_feats = in_feats self._out_feats = out_feats self._norm = norm self.feat_drop = nn.Dropout(feat_drop) self.activation = activation self.fc = nn.Linear(in_feats, out_feats, bias=bias) self.reset_parameters() def reset_parameters(self): r""" Description ----------- Reinitialize learnable parameters. Notes ----- The linear weights :math:`W^{(l)}` are initialized using Glorot uniform initialization. """ gain = nn.init.calculate_gain("relu") nn.init.xavier_uniform_(self.fc.weight, gain=gain) def forward(self, adj, feat): r""" Description ----------- Compute (Dense) Graph SAGE layer. Parameters ---------- adj : torch.Tensor The adjacency matrix of the graph to apply SAGE Convolution on, when applied to a unidirectional bipartite graph, ``adj`` should be of shape should be of shape :math:`(N_{out}, N_{in})`; when applied to a homo graph, ``adj`` should be of shape :math:`(N, N)`. In both cases, a row represents a destination node while a column represents a source node. feat : torch.Tensor or a pair of torch.Tensor If a torch.Tensor is given, the input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of torch.Tensor is given, the pair must contain two tensors of shape :math:`(N_{in}, D_{in})` and :math:`(N_{out}, D_{in})`. Returns ------- torch.Tensor The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is size of output feature. """ check_eq_shape(feat) if isinstance(feat, tuple): feat_src = self.feat_drop(feat[0]) feat_dst = self.feat_drop(feat[1]) else: feat_src = feat_dst = self.feat_drop(feat) adj = adj.to(feat_src) in_degrees = adj.sum(dim=1, keepdim=True) h_neigh = (adj @ feat_src + feat_dst) / (in_degrees + 1) rst = self.fc(h_neigh) # activation if self.activation is not None: rst = self.activation(rst) # normalization if self._norm is not None: rst = self._norm(rst) return rst ================================================ FILE: python/dgl/nn/pytorch/conv/dgnconv.py ================================================ """Torch Module for Directional Graph Networks Convolution Layer""" # pylint: disable= no-member, arguments-differ, invalid-name from functools import partial import torch import torch.nn as nn from .pnaconv import AGGREGATORS, PNAConv, PNAConvTower, SCALERS def aggregate_dir_av(h, eig_s, eig_d, eig_idx): """directional average aggregation""" h_mod = torch.mul( h, ( torch.abs(eig_s[:, :, eig_idx] - eig_d[:, :, eig_idx]) / ( torch.sum( torch.abs(eig_s[:, :, eig_idx] - eig_d[:, :, eig_idx]), keepdim=True, dim=1, ) + 1e-30 ) ).unsqueeze(-1), ) return torch.sum(h_mod, dim=1) def aggregate_dir_dx(h, eig_s, eig_d, h_in, eig_idx): """directional derivative aggregation""" eig_w = ( (eig_s[:, :, eig_idx] - eig_d[:, :, eig_idx]) / ( torch.sum( torch.abs(eig_s[:, :, eig_idx] - eig_d[:, :, eig_idx]), keepdim=True, dim=1, ) + 1e-30 ) ).unsqueeze(-1) h_mod = torch.mul(h, eig_w) return torch.abs(torch.sum(h_mod, dim=1) - torch.sum(eig_w, dim=1) * h_in) for k in range(1, 4): AGGREGATORS[f"dir{k}-av"] = partial(aggregate_dir_av, eig_idx=k - 1) AGGREGATORS[f"dir{k}-dx"] = partial(aggregate_dir_dx, eig_idx=k - 1) class DGNConvTower(PNAConvTower): """A single DGN tower with modified reduce function""" def message(self, edges): """message function for DGN layer""" if self.edge_feat_size > 0: f = torch.cat( [edges.src["h"], edges.dst["h"], edges.data["a"]], dim=-1 ) else: f = torch.cat([edges.src["h"], edges.dst["h"]], dim=-1) return { "msg": self.M(f), "eig_s": edges.src["eig"], "eig_d": edges.dst["eig"], } def reduce_func(self, nodes): """reduce function for DGN layer""" h_in = nodes.data["h"] eig_s = nodes.mailbox["eig_s"] eig_d = nodes.mailbox["eig_d"] msg = nodes.mailbox["msg"] degree = msg.size(1) h = [] for agg in self.aggregators: if agg.startswith("dir"): if agg.endswith("av"): h.append(AGGREGATORS[agg](msg, eig_s, eig_d)) else: h.append(AGGREGATORS[agg](msg, eig_s, eig_d, h_in)) else: h.append(AGGREGATORS[agg](msg)) h = torch.cat(h, dim=1) h = torch.cat( [ SCALERS[scaler](h, D=degree, delta=self.delta) if scaler != "identity" else h for scaler in self.scalers ], dim=1, ) return {"h_neigh": h} class DGNConv(PNAConv): r"""Directional Graph Network Layer from `Directional Graph Networks `__ DGN introduces two special directional aggregators according to the vector field :math:`F`, which is defined as the gradient of the low-frequency eigenvectors of graph laplacian. The directional average aggregator is defined as :math:`h_i' = \sum_{j\in\mathcal{N}(i)}\frac{|F_{i,j}|\cdot h_j}{||F_{i,:}||_1+\epsilon}` The directional derivative aggregator is defined as :math:`h_i' = \sum_{j\in\mathcal{N}(i)}\frac{F_{i,j}\cdot h_j}{||F_{i,:}||_1+\epsilon} -h_i\cdot\sum_{j\in\mathcal{N}(i)}\frac{F_{i,j}}{||F_{i,:}||_1+\epsilon}` :math:`\epsilon` is the infinitesimal to keep the computation numerically stable. Parameters ---------- in_size : int Input feature size; i.e. the size of :math:`h_i^l`. out_size : int Output feature size; i.e. the size of :math:`h_i^{l+1}`. aggregators : list of str List of aggregation function names(each aggregator specifies a way to aggregate messages from neighbours), selected from: * ``mean``: the mean of neighbour messages * ``max``: the maximum of neighbour messages * ``min``: the minimum of neighbour messages * ``std``: the standard deviation of neighbour messages * ``var``: the variance of neighbour messages * ``sum``: the sum of neighbour messages * ``moment3``, ``moment4``, ``moment5``: the normalized moments aggregation :math:`(E[(X-E[X])^n])^{1/n}` * ``dir{k}-av``: directional average aggregation with directions defined by the k-th smallest eigenvectors. k can be selected from 1, 2, 3. * ``dir{k}-dx``: directional derivative aggregation with directions defined by the k-th smallest eigenvectors. k can be selected from 1, 2, 3. Note that using directional aggregation requires the LaplacianPE transform on the input graph for eigenvector computation (the PE size must be >= k above). scalers: list of str List of scaler function names, selected from: * ``identity``: no scaling * ``amplification``: multiply the aggregated message by :math:`\log(d+1)/\delta`, where :math:`d` is the in-degree of the node. * ``attenuation``: multiply the aggregated message by :math:`\delta/\log(d+1)` delta: float The in-degree-related normalization factor computed over the training set, used by scalers for normalization. :math:`E[\log(d+1)]`, where :math:`d` is the in-degree for each node in the training set. dropout: float, optional The dropout ratio. Default: 0.0. num_towers: int, optional The number of towers used. Default: 1. Note that in_size and out_size must be divisible by num_towers. edge_feat_size: int, optional The edge feature size. Default: 0. residual : bool, optional The bool flag that determines whether to add a residual connection for the output. Default: True. If in_size and out_size of the DGN conv layer are not the same, this flag will be set as False forcibly. Example ------- >>> import dgl >>> import torch as th >>> from dgl.nn import DGNConv >>> from dgl import LaplacianPE >>> >>> # DGN requires precomputed eigenvectors, with 'eig' as feature name. >>> transform = LaplacianPE(k=3, feat_name='eig') >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> g = transform(g) >>> eig = g.ndata['eig'] >>> feat = th.ones(6, 10) >>> conv = DGNConv(10, 10, ['dir1-av', 'dir1-dx', 'sum'], ['identity', 'amplification'], 2.5) >>> ret = conv(g, feat, eig_vec=eig) """ def __init__( self, in_size, out_size, aggregators, scalers, delta, dropout=0.0, num_towers=1, edge_feat_size=0, residual=True, ): super(DGNConv, self).__init__( in_size, out_size, aggregators, scalers, delta, dropout, num_towers, edge_feat_size, residual, ) self.towers = nn.ModuleList( [ DGNConvTower( self.tower_in_size, self.tower_out_size, aggregators, scalers, delta, dropout=dropout, edge_feat_size=edge_feat_size, ) for _ in range(num_towers) ] ) self.use_eig_vec = False for aggr in aggregators: if aggr.startswith("dir"): self.use_eig_vec = True break def forward(self, graph, node_feat, edge_feat=None, eig_vec=None): r""" Description ----------- Compute DGN layer. Parameters ---------- graph : DGLGraph The graph. node_feat : torch.Tensor The input feature of shape :math:`(N, h_n)`. :math:`N` is the number of nodes, and :math:`h_n` must be the same as in_size. edge_feat : torch.Tensor, optional The edge feature of shape :math:`(M, h_e)`. :math:`M` is the number of edges, and :math:`h_e` must be the same as edge_feat_size. eig_vec : torch.Tensor, optional K smallest non-trivial eigenvectors of Graph Laplacian of shape :math:`(N, K)`. It is only required when :attr:`aggregators` contains directional aggregators. Returns ------- torch.Tensor The output node feature of shape :math:`(N, h_n')` where :math:`h_n'` should be the same as out_size. """ with graph.local_scope(): if self.use_eig_vec: graph.ndata["eig"] = eig_vec return super().forward(graph, node_feat, edge_feat) ================================================ FILE: python/dgl/nn/pytorch/conv/dotgatconv.py ================================================ """Torch modules for graph attention networks(GAT).""" # pylint: disable= no-member, arguments-differ, invalid-name from torch import nn from .... import function as fn from ....base import DGLError from ....utils import expand_as_pair from ...functional import edge_softmax class DotGatConv(nn.Module): r"""Apply dot product version of self attention in `Graph Attention Network `__ .. math:: h_i^{(l+1)} = \sum_{j\in \mathcal{N}(i)} \alpha_{i, j} h_j^{(l)} where :math:`\alpha_{ij}` is the attention score bewteen node :math:`i` and node :math:`j`: .. math:: \alpha_{i, j} &= \mathrm{softmax_i}(e_{ij}^{l}) e_{ij}^{l} &= ({W_i^{(l)} h_i^{(l)}})^T \cdot {W_j^{(l)} h_j^{(l)}} where :math:`W_i` and :math:`W_j` transform node :math:`i`'s and node :math:`j`'s features into the same dimension, so that when compute note features' similarity, it can use dot-product. Parameters ---------- in_feats : int, or pair of ints Input feature size; i.e, the number of dimensions of :math:`h_i^{(l)}`. DotGatConv can be applied on homogeneous graph and unidirectional `bipartite graph `__. If the layer is to be applied to a unidirectional bipartite graph, ``in_feats`` specifies the input feature size on both the source and destination nodes. If a scalar is given, the source and destination node feature size would take the same value. out_feats : int Output feature size; i.e, the number of dimensions of :math:`h_i^{(l+1)}`. num_heads : int Number of head in Multi-Head Attention allow_zero_in_degree : bool, optional If there are 0-in-degree nodes in the graph, output for those nodes will be invalid since no message will be passed to those nodes. This is harmful for some applications causing silent performance regression. This module will raise a DGLError if it detects 0-in-degree nodes in input graph. By setting ``True``, it will suppress the check and let the users handle it by themselves. Default: ``False``. Note ---- Zero in-degree nodes will lead to invalid output value. This is because no message will be passed to those nodes, the aggregation function will be appied on empty input. A common practice to avoid this is to add a self-loop for each node in the graph if it is homogeneous, which can be achieved by: >>> g = ... # a DGLGraph >>> g = dgl.add_self_loop(g) Calling ``add_self_loop`` will not work for some graphs, for example, heterogeneous graph since the edge type can not be decided for self_loop edges. Set ``allow_zero_in_degree`` to ``True`` for those cases to unblock the code and handle zero-in-degree nodes manually. A common practise to handle this is to filter out the nodes with zero-in-degree when use after conv. Examples -------- >>> import dgl >>> import numpy as np >>> import torch as th >>> from dgl.nn import DotGatConv >>> # Case 1: Homogeneous graph >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> g = dgl.add_self_loop(g) >>> feat = th.ones(6, 10) >>> dotgatconv = DotGatConv(10, 2, num_heads=3) >>> res = dotgatconv(g, feat) >>> res tensor([[[ 3.4570, 1.8634], [ 1.3805, -0.0762], [ 1.0390, -1.1479]], [[ 3.4570, 1.8634], [ 1.3805, -0.0762], [ 1.0390, -1.1479]], [[ 3.4570, 1.8634], [ 1.3805, -0.0762], [ 1.0390, -1.1479]], [[ 3.4570, 1.8634], [ 1.3805, -0.0762], [ 1.0390, -1.1479]], [[ 3.4570, 1.8634], [ 1.3805, -0.0762], [ 1.0390, -1.1479]], [[ 3.4570, 1.8634], [ 1.3805, -0.0762], [ 1.0390, -1.1479]]], grad_fn=) >>> # Case 2: Unidirectional bipartite graph >>> u = [0, 1, 0, 0, 1] >>> v = [0, 1, 2, 3, 2] >>> g = dgl.heterograph({('_N', '_E', '_N'):(u, v)}) >>> u_feat = th.tensor(np.random.rand(2, 5).astype(np.float32)) >>> v_feat = th.tensor(np.random.rand(4, 10).astype(np.float32)) >>> dotgatconv = DotGatConv((5,10), 2, 3) >>> res = dotgatconv(g, (u_feat, v_feat)) >>> res tensor([[[-0.6066, 1.0268], [-0.5945, -0.4801], [ 0.1594, 0.3825]], [[ 0.0268, 1.0783], [ 0.5041, -1.3025], [ 0.6568, 0.7048]], [[-0.2688, 1.0543], [-0.0315, -0.9016], [ 0.3943, 0.5347]], [[-0.6066, 1.0268], [-0.5945, -0.4801], [ 0.1594, 0.3825]]], grad_fn=) """ def __init__( self, in_feats, out_feats, num_heads, allow_zero_in_degree=False ): super(DotGatConv, self).__init__() self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats self._allow_zero_in_degree = allow_zero_in_degree self._num_heads = num_heads if isinstance(in_feats, tuple): self.fc_src = nn.Linear( self._in_src_feats, self._out_feats * self._num_heads, bias=False, ) self.fc_dst = nn.Linear( self._in_dst_feats, self._out_feats * self._num_heads, bias=False, ) else: self.fc = nn.Linear( self._in_src_feats, self._out_feats * self._num_heads, bias=False, ) def forward(self, graph, feat, get_attention=False): r""" Description ----------- Apply dot product version of self attention in GCN. Parameters ---------- graph: DGLGraph or bi_partities graph The graph feat: torch.Tensor or pair of torch.Tensor If a torch.Tensor is given, the input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of torch.Tensor is given, the pair must contain two tensors of shape :math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`. get_attention : bool, optional Whether to return the attention values. Default to False. Returns ------- torch.Tensor The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is size of output feature. torch.Tensor, optional The attention values of shape :math:`(E, 1)`, where :math:`E` is the number of edges. This is returned only when :attr:`get_attention` is ``True``. Raises ------ DGLError If there are 0-in-degree nodes in the input graph, it will raise DGLError since no message will be passed to those nodes. This will cause invalid output. The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``. """ graph = graph.local_var() if not self._allow_zero_in_degree: if (graph.in_degrees() == 0).any(): raise DGLError( "There are 0-in-degree nodes in the graph, " "output for those nodes will be invalid. " "This is harmful for some applications, " "causing silent performance regression. " "Adding self-loop on the input graph by " "calling `g = dgl.add_self_loop(g)` will resolve " "the issue. Setting ``allow_zero_in_degree`` " "to be `True` when constructing this module will " "suppress the check and let the code run." ) # check if feat is a tuple if isinstance(feat, tuple): h_src = feat[0] h_dst = feat[1] feat_src = self.fc_src(h_src).view( -1, self._num_heads, self._out_feats ) feat_dst = self.fc_dst(h_dst).view( -1, self._num_heads, self._out_feats ) else: h_src = feat feat_src = feat_dst = self.fc(h_src).view( -1, self._num_heads, self._out_feats ) if graph.is_block: feat_dst = feat_src[: graph.number_of_dst_nodes()] # Assign features to nodes graph.srcdata.update({"ft": feat_src}) graph.dstdata.update({"ft": feat_dst}) # Step 1. dot product graph.apply_edges(fn.u_dot_v("ft", "ft", "a")) # Step 2. edge softmax to compute attention scores graph.edata["sa"] = edge_softmax( graph, graph.edata["a"] / self._out_feats**0.5 ) # Step 3. Broadcast softmax value to each edge, and aggregate dst node graph.update_all( fn.u_mul_e("ft", "sa", "attn"), fn.sum("attn", "agg_u") ) # output results to the destination nodes rst = graph.dstdata["agg_u"] if get_attention: return rst, graph.edata["sa"] else: return rst ================================================ FILE: python/dgl/nn/pytorch/conv/edgeconv.py ================================================ """Torch Module for EdgeConv Layer""" # pylint: disable= no-member, arguments-differ, invalid-name from torch import nn from .... import function as fn from ....base import DGLError from ....utils import expand_as_pair class EdgeConv(nn.Module): r"""EdgeConv layer from `Dynamic Graph CNN for Learning on Point Clouds `__ It can be described as follows: .. math:: h_i^{(l+1)} = \max_{j \in \mathcal{N}(i)} ( \Theta \cdot (h_j^{(l)} - h_i^{(l)}) + \Phi \cdot h_i^{(l)}) where :math:`\mathcal{N}(i)` is the neighbor of :math:`i`. :math:`\Theta` and :math:`\Phi` are linear layers. .. note:: The original formulation includes a ReLU inside the maximum operator. This is equivalent to first applying a maximum operator then applying the ReLU. Parameters ---------- in_feat : int Input feature size; i.e, the number of dimensions of :math:`h_j^{(l)}`. out_feat : int Output feature size; i.e., the number of dimensions of :math:`h_i^{(l+1)}`. batch_norm : bool Whether to include batch normalization on messages. Default: ``False``. allow_zero_in_degree : bool, optional If there are 0-in-degree nodes in the graph, output for those nodes will be invalid since no message will be passed to those nodes. This is harmful for some applications causing silent performance regression. This module will raise a DGLError if it detects 0-in-degree nodes in input graph. By setting ``True``, it will suppress the check and let the users handle it by themselves. Default: ``False``. Note ---- Zero in-degree nodes will lead to invalid output value. This is because no message will be passed to those nodes, the aggregation function will be appied on empty input. A common practice to avoid this is to add a self-loop for each node in the graph if it is homogeneous, which can be achieved by: >>> g = ... # a DGLGraph >>> g = dgl.add_self_loop(g) Calling ``add_self_loop`` will not work for some graphs, for example, heterogeneous graph since the edge type can not be decided for self_loop edges. Set ``allow_zero_in_degree`` to ``True`` for those cases to unblock the code and handle zero-in-degree nodes manually. A common practise to handle this is to filter out the nodes with zero-in-degree when use after conv. Examples -------- >>> import dgl >>> import numpy as np >>> import torch as th >>> from dgl.nn import EdgeConv >>> # Case 1: Homogeneous graph >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> g = dgl.add_self_loop(g) >>> feat = th.ones(6, 10) >>> conv = EdgeConv(10, 2) >>> res = conv(g, feat) >>> res tensor([[-0.2347, 0.5849], [-0.2347, 0.5849], [-0.2347, 0.5849], [-0.2347, 0.5849], [-0.2347, 0.5849], [-0.2347, 0.5849]], grad_fn=) >>> # Case 2: Unidirectional bipartite graph >>> u = [0, 1, 0, 0, 1] >>> v = [0, 1, 2, 3, 2] >>> g = dgl.heterograph({('_N', '_E', '_N'):(u, v)}) >>> u_fea = th.rand(2, 5) >>> v_fea = th.rand(4, 5) >>> conv = EdgeConv(5, 2, 3) >>> res = conv(g, (u_fea, v_fea)) >>> res tensor([[ 1.6375, 0.2085], [-1.1925, -1.2852], [ 0.2101, 1.3466], [ 0.2342, -0.9868]], grad_fn=) """ def __init__( self, in_feat, out_feat, batch_norm=False, allow_zero_in_degree=False ): super(EdgeConv, self).__init__() self.batch_norm = batch_norm self._allow_zero_in_degree = allow_zero_in_degree self.theta = nn.Linear(in_feat, out_feat) self.phi = nn.Linear(in_feat, out_feat) if batch_norm: self.bn = nn.BatchNorm1d(out_feat) def set_allow_zero_in_degree(self, set_value): r""" Description ----------- Set allow_zero_in_degree flag. Parameters ---------- set_value : bool The value to be set to the flag. """ self._allow_zero_in_degree = set_value def forward(self, g, feat): """ Description ----------- Forward computation Parameters ---------- g : DGLGraph The graph. feat : Tensor or pair of tensors :math:`(N, D)` where :math:`N` is the number of nodes and :math:`D` is the number of feature dimensions. If a pair of tensors is given, the graph must be a uni-bipartite graph with only one edge type, and the two tensors must have the same dimensionality on all except the first axis. Returns ------- torch.Tensor New node features. Raises ------ DGLError If there are 0-in-degree nodes in the input graph, it will raise DGLError since no message will be passed to those nodes. This will cause invalid output. The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``. """ with g.local_scope(): if not self._allow_zero_in_degree: if (g.in_degrees() == 0).any(): raise DGLError( "There are 0-in-degree nodes in the graph, " "output for those nodes will be invalid. " "This is harmful for some applications, " "causing silent performance regression. " "Adding self-loop on the input graph by " "calling `g = dgl.add_self_loop(g)` will resolve " "the issue. Setting ``allow_zero_in_degree`` " "to be `True` when constructing this module will " "suppress the check and let the code run." ) h_src, h_dst = expand_as_pair(feat, g) g.srcdata["x"] = h_src g.dstdata["x"] = h_dst g.apply_edges(fn.v_sub_u("x", "x", "theta")) g.edata["theta"] = self.theta(g.edata["theta"]) g.dstdata["phi"] = self.phi(g.dstdata["x"]) if not self.batch_norm: g.update_all(fn.e_add_v("theta", "phi", "e"), fn.max("e", "x")) else: g.apply_edges(fn.e_add_v("theta", "phi", "e")) # Although the official implementation includes a per-edge # batch norm within EdgeConv, I choose to replace it with a # global batch norm for a number of reasons: # # (1) When the point clouds within each batch do not have the # same number of points, batch norm would not work. # # (2) Even if the point clouds always have the same number of # points, the points may as well be shuffled even with the # same (type of) object (and the official implementation # *does* shuffle the points of the same example for each # epoch). # # For example, the first point of a point cloud of an # airplane does not always necessarily reside at its nose. # # In this case, the learned statistics of each position # by batch norm is not as meaningful as those learned from # images. g.edata["e"] = self.bn(g.edata["e"]) g.update_all(fn.copy_e("e", "e"), fn.max("e", "x")) return g.dstdata["x"] ================================================ FILE: python/dgl/nn/pytorch/conv/edgegatconv.py ================================================ """Torch modules for graph attention networks(GAT).""" # pylint: disable= no-member, arguments-differ, invalid-name import torch as th from torch import nn from .... import function as fn from ....base import DGLError from ....utils import expand_as_pair from ...functional import edge_softmax # pylint: enable=W0235 class EdgeGATConv(nn.Module): r"""Graph attention layer with edge features from `SCENE `__ .. math:: \mathbf{v}_i^\prime = \mathbf{\Theta}_\mathrm{s} \cdot \mathbf{v}_i + \sum\limits_{j \in \mathcal{N}(v_i)} \alpha_{j, i} \left( \mathbf{\Theta}_\mathrm{n} \cdot \mathbf{v}_j + \mathbf{\Theta}_\mathrm{e} \cdot \mathbf{e}_{j,i} \right) where :math:`\mathbf{\Theta}` is used to denote learnable weight matrices for the transformation of features of the node to update (s=self), neighboring nodes (n=neighbor) and edge features (e=edge). Attention weights are obtained by .. math:: \alpha_{j, i} = \mathrm{softmax}_i \Big( \mathrm{LeakyReLU} \big( \mathbf{a}^T [ \mathbf{\Theta}_\mathrm{n} \cdot \mathbf{v}_i || \mathbf{\Theta}_\mathrm{n} \cdot \mathbf{v}_j || \mathbf{\Theta}_\mathrm{e} \cdot \mathbf{e}_{j,i} ] \big) \Big) with :math:`\mathbf{a}` corresponding to a learnable vector. :math:`\mathrm{softmax_i}` stands for the normalization by all incoming edges of node :math:`i`. Parameters ---------- in_feats : int, or pair of ints Input feature size; i.e, the number of dimensions of :math:`\mathbf{v}_i`. GATConv can be applied on homogeneous graph and unidirectional `bipartite graph `__. If the layer is to be applied to a unidirectional bipartite graph, ``in_feats`` specifies the input feature size on both the source and destination nodes. If a scalar is given, the source and destination node feature size would take the same value. edge_feats: int Edge feature size; i.e., the number of dimensions of :math:\mathbf{e}_{j,i}`. out_feats : int Output feature size; i.e, the number of dimensions of :math:`\mathbf{v}_i^\prime`. num_heads : int Number of heads in Multi-Head Attention. feat_drop : float, optional Dropout rate on feature. Defaults: ``0``. attn_drop : float, optional Dropout rate on attention weight. Defaults: ``0``. negative_slope : float, optional LeakyReLU angle of negative slope. Defaults: ``0.2``. residual : bool, optional If True, use residual connection. Defaults: ``False``. activation : callable activation function/layer or None, optional. If not None, applies an activation function to the updated node features. Default: ``None``. allow_zero_in_degree : bool, optional If there are 0-in-degree nodes in the graph, output for those nodes will be invalid since no message will be passed to those nodes. This is harmful for some applications causing silent performance regression. This module will raise a DGLError if it detects 0-in-degree nodes in input graph. By setting ``True``, it will suppress the check and let the users handle it by themselves. Defaults: ``False``. bias : bool, optional If True, learns a bias term. Defaults: ``True``. Note ---- Zero in-degree nodes will lead to invalid output value. This is because no message will be passed to those nodes, the aggregation function will be appied on empty input. A common practice to avoid this is to add a self-loop for each node in the graph if it is homogeneous, which can be achieved by: >>> g = ... # a DGLGraph >>> g = dgl.add_self_loop(g) Calling ``add_self_loop`` will not work for some graphs, for example, heterogeneous graph since the edge type can not be decided for self_loop edges. Set ``allow_zero_in_degree`` to ``True`` for those cases to unblock the code and handle zero-in-degree nodes manually. A common practise to handle this is to filter out the nodes with zero-in-degree when use after conv. Examples ---------- >>> import dgl >>> import numpy as np >>> import torch as th >>> from dgl.nn import EdgeGATConv >>> # Case 1: Homogeneous graph. >>> num_nodes, num_edges = 8, 30 >>> # Generate a graph. >>> graph = dgl.rand_graph(num_nodes,num_edges) >>> node_feats = th.rand((num_nodes, 20)) >>> edge_feats = th.rand((num_edges, 12)) >>> edge_gat = EdgeGATConv( ... in_feats=20, ... edge_feats=12, ... out_feats=15, ... num_heads=3, ... ) >>> # Forward pass. >>> new_node_feats = edge_gat(graph, node_feats, edge_feats) >>> new_node_feats.shape torch.Size([8, 3, 15]) torch.Size([30, 3, 10]) >>> # Case 2: Unidirectional bipartite graph. >>> u = [0, 1, 0, 0, 1] >>> v = [0, 1, 2, 3, 2] >>> g = dgl.heterograph({('A', 'r', 'B'): (u, v)}) >>> u_feat = th.tensor(np.random.rand(2, 25).astype(np.float32)) >>> v_feat = th.tensor(np.random.rand(4, 30).astype(np.float32)) >>> nfeats = (u_feat,v_feat) >>> efeats = th.tensor(np.random.rand(5, 15).astype(np.float32)) >>> in_feats = (25,30) >>> edge_feats = 15 >>> out_feats = 10 >>> num_heads = 3 >>> egat_model = EdgeGATConv( ... in_feats, ... edge_feats, ... out_feats, ... num_heads, ... ) >>> # Forward pass. >>> new_node_feats, attention_weights = egat_model(g, nfeats, efeats, get_attention=True) >>> new_node_feats.shape, attention_weights.shape (torch.Size([4, 3, 10]), torch.Size([5, 3, 1])) """ def __init__( self, in_feats, edge_feats, out_feats, num_heads, feat_drop=0.0, attn_drop=0.0, negative_slope=0.2, residual=True, activation=None, allow_zero_in_degree=False, bias=True, ): super(EdgeGATConv, self).__init__() self._num_heads = num_heads self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats self._allow_zero_in_degree = allow_zero_in_degree if isinstance(in_feats, tuple): self.fc_src = nn.Linear( self._in_src_feats, out_feats * num_heads, bias=False ) self.fc_dst = nn.Linear( self._in_dst_feats, out_feats * num_heads, bias=False ) else: self.fc = nn.Linear( self._in_src_feats, out_feats * num_heads, bias=False ) self.attn_l = nn.Parameter( th.FloatTensor(size=(1, num_heads, out_feats)) ) self.attn_r = nn.Parameter( th.FloatTensor(size=(1, num_heads, out_feats)) ) self.feat_drop = nn.Dropout(feat_drop) self.attn_drop = nn.Dropout(attn_drop) self.leaky_relu = nn.LeakyReLU(negative_slope) if bias: self.bias = nn.Parameter( th.FloatTensor(size=(num_heads * out_feats,)) ) else: self.register_buffer("bias", None) if residual: self.res_fc = nn.Linear( self._in_dst_feats, num_heads * out_feats, bias=False ) else: self.register_buffer("res_fc", None) self._edge_feats = edge_feats self.fc_edge = nn.Linear(edge_feats, out_feats * num_heads, bias=False) self.attn_edge = nn.Parameter( th.FloatTensor(size=(1, num_heads, out_feats)) ) self.reset_parameters() self.activation = activation def reset_parameters(self): r""" Description ----------- Reinitialize learnable parameters. Note ---- The fc weights :math:`\mathbf{\Theta}` are and the attention weights are using xavier initialization method. """ gain = nn.init.calculate_gain("relu") if hasattr(self, "fc"): nn.init.xavier_normal_(self.fc.weight, gain=gain) else: nn.init.xavier_normal_(self.fc_src.weight, gain=gain) nn.init.xavier_normal_(self.fc_dst.weight, gain=gain) nn.init.xavier_normal_(self.attn_l, gain=gain) nn.init.xavier_normal_(self.attn_r, gain=gain) nn.init.xavier_normal_(self.fc_edge.weight, gain=gain) nn.init.xavier_normal_(self.attn_edge, gain=gain) if self.bias is not None: nn.init.constant_(self.bias, 0) if isinstance(self.res_fc, nn.Linear): nn.init.xavier_normal_(self.res_fc.weight, gain=gain) def set_allow_zero_in_degree(self, set_value): r""" Description ----------- Set allow_zero_in_degree flag. Parameters ---------- set_value : bool The value to be set to the flag. """ self._allow_zero_in_degree = set_value def forward(self, graph, feat, edge_feat, get_attention=False): r""" Description ----------- Compute graph attention network layer. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor or pair of torch.Tensor If a torch.Tensor is given, the input feature of shape :math:`(N, *, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of torch.Tensor is given, the pair must contain two tensors of shape :math:`(N_{in}, *, D_{in_{src}})` and :math:`(N_{out}, *, D_{in_{dst}})`. edge_feat : torch.Tensor The input edge feature of shape :math:`(E, D_{in_{edge}})`, where :math:`E` is the number of edges and :math:`D_{in_{edge}}` the size of the edge features. get_attention : bool, optional Whether to return the attention values. Default to False. Returns ------- torch.Tensor The output feature of shape :math:`(N, *, H, D_{out})` where :math:`H` is the number of heads, and :math:`D_{out}` is size of output feature. torch.Tensor, optional The attention values of shape :math:`(E, *, H, 1)`. This is returned only when :attr:`get_attention` is ``True``. Raises ------ DGLError If there are 0-in-degree nodes in the input graph, it will raise DGLError since no message will be passed to those nodes. This will cause invalid output. The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``. """ with graph.local_scope(): if not self._allow_zero_in_degree: if (graph.in_degrees() == 0).any(): raise DGLError( "There are 0-in-degree nodes in the graph, " "output for those nodes will be invalid. " "This is harmful for some applications, " "causing silent performance regression. " "Adding self-loop on the input graph by " "calling `g = dgl.add_self_loop(g)` will resolve " "the issue. Setting ``allow_zero_in_degree`` " "to be `True` when constructing this module will " "suppress the check and let the code run." ) if isinstance(feat, tuple): src_prefix_shape = feat[0].shape[:-1] dst_prefix_shape = feat[1].shape[:-1] h_src = self.feat_drop(feat[0]) h_dst = self.feat_drop(feat[1]) if not hasattr(self, "fc_src"): feat_src = self.fc(h_src).view( *src_prefix_shape, self._num_heads, self._out_feats ) feat_dst = self.fc(h_dst).view( *dst_prefix_shape, self._num_heads, self._out_feats ) else: feat_src = self.fc_src(h_src).view( *src_prefix_shape, self._num_heads, self._out_feats ) feat_dst = self.fc_dst(h_dst).view( *dst_prefix_shape, self._num_heads, self._out_feats ) else: src_prefix_shape = dst_prefix_shape = feat.shape[:-1] h_src = h_dst = self.feat_drop(feat) feat_src = feat_dst = self.fc(h_src).view( *src_prefix_shape, self._num_heads, self._out_feats ) if graph.is_block: feat_dst = feat_src[: graph.number_of_dst_nodes()] h_dst = h_dst[: graph.number_of_dst_nodes()] dst_prefix_shape = ( graph.number_of_dst_nodes(), ) + dst_prefix_shape[1:] # Linearly tranform the edge features. n_edges = edge_feat.shape[:-1] feat_edge = self.fc_edge(edge_feat).view( *n_edges, self._num_heads, self._out_feats ) # Add edge features to graph. graph.edata["ft_edge"] = feat_edge el = (feat_src * self.attn_l).sum(dim=-1).unsqueeze(-1) er = (feat_dst * self.attn_r).sum(dim=-1).unsqueeze(-1) # Calculate scalar for each edge. ee = (feat_edge * self.attn_edge).sum(dim=-1).unsqueeze(-1) graph.edata["ee"] = ee graph.srcdata.update({"ft": feat_src, "el": el}) graph.dstdata.update({"er": er}) # Compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively. graph.apply_edges(fn.u_add_v("el", "er", "e_tmp")) # e_tmp combines attention weights of source and destination node. # Add the attention weight of the edge. graph.edata["e"] = graph.edata["e_tmp"] + graph.edata["ee"] # Create new edges features that combine the # features of the source node and the edge features. graph.apply_edges(fn.u_add_e("ft", "ft_edge", "ft_combined")) e = self.leaky_relu(graph.edata.pop("e")) # Compute softmax. graph.edata["a"] = self.attn_drop(edge_softmax(graph, e)) # For each edge, element-wise multiply the combined features with # the attention coefficient. graph.edata["m_combined"] = ( graph.edata["ft_combined"] * graph.edata["a"] ) # First copy the edge features and then sum them up. graph.update_all(fn.copy_e("m_combined", "m"), fn.sum("m", "ft")) rst = graph.dstdata["ft"] # Residual. if self.res_fc is not None: # Use -1 rather than self._num_heads to handle broadcasting. if h_dst.numel() != 0: resval = self.res_fc(h_dst).view( *dst_prefix_shape, -1, self._out_feats ) rst = rst + resval # Bias. if self.bias is not None: rst = rst + self.bias.view( *((1,) * len(dst_prefix_shape)), self._num_heads, self._out_feats ) # Activation. if self.activation: rst = self.activation(rst) if get_attention: return rst, graph.edata["a"] else: return rst ================================================ FILE: python/dgl/nn/pytorch/conv/egatconv.py ================================================ """Torch modules for graph attention networks with fully valuable edges (EGAT).""" # pylint: disable= no-member, arguments-differ, invalid-name import torch as th from torch import nn from torch.nn import init from .... import function as fn from ....base import DGLError from ....utils import expand_as_pair from ...functional import edge_softmax # pylint: enable=W0235 class EGATConv(nn.Module): r"""Graph attention layer that handles edge features from `Rossmann-Toolbox `__ (see supplementary data) The difference lies in how unnormalized attention scores :math:`e_{ij}` are obtained: .. math:: e_{ij} &= \vec{F} (f_{ij}^{\prime}) f_{ij}^{\prime} &= \mathrm{LeakyReLU}\left(A [ h_{i} \| f_{ij} \| h_{j}]\right) where :math:`f_{ij}^{\prime}` are edge features, :math:`\mathrm{A}` is weight matrix and :math:`\vec{F}` is weight vector. After that, resulting node features :math:`h_{i}^{\prime}` are updated in the same way as in regular GAT. Parameters ---------- in_node_feats : int, or pair of ints Input feature size; i.e, the number of dimensions of :math:`h_{i}`. EGATConv can be applied on homogeneous graph and unidirectional `bipartite graph `__. If the layer is to be applied to a unidirectional bipartite graph, ``in_feats`` specifies the input feature size on both the source and destination nodes. If a scalar is given, the source and destination node feature size would take the same value. in_edge_feats : int Input edge feature size :math:`f_{ij}`. out_node_feats : int Output node feature size. out_edge_feats : int Output edge feature size :math:`f_{ij}^{\prime}`. num_heads : int Number of attention heads. bias : bool, optional If True, add bias term to :math:`f_{ij}^{\prime}`. Defaults: ``True``. Examples ---------- >>> import dgl >>> import torch as th >>> from dgl.nn import EGATConv >>> # Case 1: Homogeneous graph >>> num_nodes, num_edges = 8, 30 >>> # generate a graph >>> graph = dgl.rand_graph(num_nodes,num_edges) >>> node_feats = th.rand((num_nodes, 20)) >>> edge_feats = th.rand((num_edges, 12)) >>> egat = EGATConv(in_node_feats=20, ... in_edge_feats=12, ... out_node_feats=15, ... out_edge_feats=10, ... num_heads=3) >>> #forward pass >>> new_node_feats, new_edge_feats = egat(graph, node_feats, edge_feats) >>> new_node_feats.shape, new_edge_feats.shape torch.Size([8, 3, 15]) torch.Size([30, 3, 10]) >>> # Case 2: Unidirectional bipartite graph >>> u = [0, 1, 0, 0, 1] >>> v = [0, 1, 2, 3, 2] >>> g = dgl.heterograph({('A', 'r', 'B'): (u, v)}) >>> u_feat = th.tensor(np.random.rand(2, 25).astype(np.float32)) >>> v_feat = th.tensor(np.random.rand(4, 30).astype(np.float32)) >>> nfeats = (u_feat,v_feat) >>> efeats = th.tensor(np.random.rand(5, 15).astype(np.float32)) >>> in_node_feats = (25,30) >>> in_edge_feats = 15 >>> out_node_feats = 10 >>> out_edge_feats = 5 >>> num_heads = 3 >>> egat_model = EGATConv(in_node_feats, ... in_edge_feats, ... out_node_feats, ... out_edge_feats, ... num_heads, ... bias=True) >>> #forward pass >>> new_node_feats, >>> new_edge_feats, >>> attentions = egat_model(g, nfeats, efeats, get_attention=True) >>> new_node_feats.shape, new_edge_feats.shape, attentions.shape (torch.Size([4, 3, 10]), torch.Size([5, 3, 5]), torch.Size([5, 3, 1])) """ def __init__( self, in_node_feats, in_edge_feats, out_node_feats, out_edge_feats, num_heads, bias=True, ): super().__init__() self._num_heads = num_heads self._in_src_node_feats, self._in_dst_node_feats = expand_as_pair( in_node_feats ) self._out_node_feats = out_node_feats self._out_edge_feats = out_edge_feats if isinstance(in_node_feats, tuple): self.fc_node_src = nn.Linear( self._in_src_node_feats, out_node_feats * num_heads, bias=False ) self.fc_ni = nn.Linear( self._in_src_node_feats, out_edge_feats * num_heads, bias=False ) self.fc_nj = nn.Linear( self._in_dst_node_feats, out_edge_feats * num_heads, bias=False ) else: self.fc_node_src = nn.Linear( self._in_src_node_feats, out_node_feats * num_heads, bias=False ) self.fc_ni = nn.Linear( self._in_src_node_feats, out_edge_feats * num_heads, bias=False ) self.fc_nj = nn.Linear( self._in_src_node_feats, out_edge_feats * num_heads, bias=False ) self.fc_fij = nn.Linear( in_edge_feats, out_edge_feats * num_heads, bias=False ) self.attn = nn.Parameter( th.FloatTensor(size=(1, num_heads, out_edge_feats)) ) if bias: self.bias = nn.Parameter( th.FloatTensor(size=(num_heads * out_edge_feats,)) ) else: self.register_buffer("bias", None) self.reset_parameters() def reset_parameters(self): """ Reinitialize learnable parameters. """ gain = init.calculate_gain("relu") init.xavier_normal_(self.fc_node_src.weight, gain=gain) init.xavier_normal_(self.fc_ni.weight, gain=gain) init.xavier_normal_(self.fc_fij.weight, gain=gain) init.xavier_normal_(self.fc_nj.weight, gain=gain) init.xavier_normal_(self.attn, gain=gain) init.constant_(self.bias, 0) def forward( self, graph, nfeats, efeats, edge_weight=None, get_attention=False ): r""" Compute new node and edge features. Parameters ---------- graph : DGLGraph The graph. nfeat : torch.Tensor or pair of torch.Tensor If a torch.Tensor is given, the input feature of shape :math:`(N, D_{in})` where: :math:`D_{in}` is size of input node feature, :math:`N` is the number of nodes. If a pair of torch.Tensor is given, the pair must contain two tensors of shape :math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`. efeats: torch.Tensor The input edge feature of shape :math:`(E, F_{in})` where: :math:`F_{in}` is size of input node feature, :math:`E` is the number of edges. edge_weight : torch.Tensor, optional A 1D tensor of edge weight values. Shape: :math:`(|E|,)`. get_attention : bool, optional Whether to return the attention values. Default to False. Returns ------- pair of torch.Tensor node output features followed by edge output features. The node output feature is of shape :math:`(N, H, D_{out})` The edge output feature is of shape :math:`(F, H, F_{out})` where: :math:`H` is the number of heads, :math:`D_{out}` is size of output node feature, :math:`F_{out}` is size of output edge feature. torch.Tensor, optional The attention values of shape :math:`(E, H, 1)`. This is returned only when :attr:`get_attention` is ``True``. """ with graph.local_scope(): if (graph.in_degrees() == 0).any(): raise DGLError( "There are 0-in-degree nodes in the graph, " "output for those nodes will be invalid. " "This is harmful for some applications, " "causing silent performance regression. " "Adding self-loop on the input graph by " "calling `g = dgl.add_self_loop(g)` will resolve " "the issue." ) # calc edge attention # same trick way as in dgl.nn.pytorch.GATConv, but also includes edge feats # https://github.com/dmlc/dgl/blob/master/python/dgl/nn/pytorch/conv/gatconv.py if isinstance(nfeats, tuple): nfeats_src, nfeats_dst = nfeats else: nfeats_src = nfeats_dst = nfeats f_ni = self.fc_ni(nfeats_src) f_nj = self.fc_nj(nfeats_dst) f_fij = self.fc_fij(efeats) graph.srcdata.update({"f_ni": f_ni}) graph.dstdata.update({"f_nj": f_nj}) # add ni, nj factors graph.apply_edges(fn.u_add_v("f_ni", "f_nj", "f_tmp")) # add fij to node factor f_out = graph.edata.pop("f_tmp") + f_fij if self.bias is not None: f_out = f_out + self.bias f_out = nn.functional.leaky_relu(f_out) f_out = f_out.view(-1, self._num_heads, self._out_edge_feats) # compute attention factor e = (f_out * self.attn).sum(dim=-1).unsqueeze(-1) graph.edata["a"] = edge_softmax(graph, e) if edge_weight is not None: graph.edata["a"] = graph.edata["a"] * edge_weight.tile( 1, self._num_heads, 1 ).transpose(0, 2) graph.srcdata["h_out"] = self.fc_node_src(nfeats_src).view( -1, self._num_heads, self._out_node_feats ) # calc weighted sum graph.update_all( fn.u_mul_e("h_out", "a", "m"), fn.sum("m", "h_out") ) h_out = graph.dstdata["h_out"].view( -1, self._num_heads, self._out_node_feats ) if get_attention: return h_out, f_out, graph.edata.pop("a") else: return h_out, f_out ================================================ FILE: python/dgl/nn/pytorch/conv/egnnconv.py ================================================ """Torch Module for E(n) Equivariant Graph Convolutional Layer""" # pylint: disable= no-member, arguments-differ, invalid-name import torch import torch.nn as nn from .... import function as fn class EGNNConv(nn.Module): r"""Equivariant Graph Convolutional Layer from `E(n) Equivariant Graph Neural Networks `__ .. math:: m_{ij}=\phi_e(h_i^l, h_j^l, ||x_i^l-x_j^l||^2, a_{ij}) x_i^{l+1} = x_i^l + C\sum_{j\in\mathcal{N}(i)}(x_i^l-x_j^l)\phi_x(m_{ij}) m_i = \sum_{j\in\mathcal{N}(i)} m_{ij} h_i^{l+1} = \phi_h(h_i^l, m_i) where :math:`h_i`, :math:`x_i`, :math:`a_{ij}` are node features, coordinate features, and edge features respectively. :math:`\phi_e`, :math:`\phi_h`, and :math:`\phi_x` are two-layer MLPs. :math:`C` is a constant for normalization, computed as :math:`1/|\mathcal{N}(i)|`. Parameters ---------- in_size : int Input feature size; i.e. the size of :math:`h_i^l`. hidden_size : int Hidden feature size; i.e. the size of hidden layer in the two-layer MLPs in :math:`\phi_e, \phi_x, \phi_h`. out_size : int Output feature size; i.e. the size of :math:`h_i^{l+1}`. edge_feat_size : int, optional Edge feature size; i.e. the size of :math:`a_{ij}`. Default: 0. Example ------- >>> import dgl >>> import torch as th >>> from dgl.nn import EGNNConv >>> >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> node_feat, coord_feat, edge_feat = th.ones(6, 10), th.ones(6, 3), th.ones(6, 2) >>> conv = EGNNConv(10, 10, 10, 2) >>> h, x = conv(g, node_feat, coord_feat, edge_feat) """ def __init__(self, in_size, hidden_size, out_size, edge_feat_size=0): super(EGNNConv, self).__init__() self.in_size = in_size self.hidden_size = hidden_size self.out_size = out_size self.edge_feat_size = edge_feat_size act_fn = nn.SiLU() # \phi_e self.edge_mlp = nn.Sequential( # +1 for the radial feature: ||x_i - x_j||^2 nn.Linear(in_size * 2 + edge_feat_size + 1, hidden_size), act_fn, nn.Linear(hidden_size, hidden_size), act_fn, ) # \phi_h self.node_mlp = nn.Sequential( nn.Linear(in_size + hidden_size, hidden_size), act_fn, nn.Linear(hidden_size, out_size), ) # \phi_x self.coord_mlp = nn.Sequential( nn.Linear(hidden_size, hidden_size), act_fn, nn.Linear(hidden_size, 1, bias=False), ) def message(self, edges): """message function for EGNN""" # concat features for edge mlp if self.edge_feat_size > 0: f = torch.cat( [ edges.src["h"], edges.dst["h"], edges.data["radial"], edges.data["a"], ], dim=-1, ) else: f = torch.cat( [edges.src["h"], edges.dst["h"], edges.data["radial"]], dim=-1 ) msg_h = self.edge_mlp(f) msg_x = self.coord_mlp(msg_h) * edges.data["x_diff"] return {"msg_x": msg_x, "msg_h": msg_h} def forward(self, graph, node_feat, coord_feat, edge_feat=None): r""" Description ----------- Compute EGNN layer. Parameters ---------- graph : DGLGraph The graph. node_feat : torch.Tensor The input feature of shape :math:`(N, h_n)`. :math:`N` is the number of nodes, and :math:`h_n` must be the same as in_size. coord_feat : torch.Tensor The coordinate feature of shape :math:`(N, h_x)`. :math:`N` is the number of nodes, and :math:`h_x` can be any positive integer. edge_feat : torch.Tensor, optional The edge feature of shape :math:`(M, h_e)`. :math:`M` is the number of edges, and :math:`h_e` must be the same as edge_feat_size. Returns ------- node_feat_out : torch.Tensor The output node feature of shape :math:`(N, h_n')` where :math:`h_n'` is the same as out_size. coord_feat_out: torch.Tensor The output coordinate feature of shape :math:`(N, h_x)` where :math:`h_x` is the same as the input coordinate feature dimension. """ with graph.local_scope(): # node feature graph.ndata["h"] = node_feat # coordinate feature graph.ndata["x"] = coord_feat # edge feature if self.edge_feat_size > 0: assert edge_feat is not None, "Edge features must be provided." graph.edata["a"] = edge_feat # get coordinate diff & radial features graph.apply_edges(fn.u_sub_v("x", "x", "x_diff")) graph.edata["radial"] = ( graph.edata["x_diff"].square().sum(dim=1).unsqueeze(-1) ) # normalize coordinate difference graph.edata["x_diff"] = graph.edata["x_diff"] / ( graph.edata["radial"].sqrt() + 1e-30 ) graph.apply_edges(self.message) graph.update_all(fn.copy_e("msg_x", "m"), fn.mean("m", "x_neigh")) graph.update_all(fn.copy_e("msg_h", "m"), fn.sum("m", "h_neigh")) h_neigh, x_neigh = graph.ndata["h_neigh"], graph.ndata["x_neigh"] h = self.node_mlp(torch.cat([node_feat, h_neigh], dim=-1)) x = coord_feat + x_neigh return h, x ================================================ FILE: python/dgl/nn/pytorch/conv/gatconv.py ================================================ """Torch modules for graph attention networks(GAT).""" # pylint: disable= no-member, arguments-differ, invalid-name import torch as th from torch import nn from .... import function as fn from ....base import DGLError from ....utils import expand_as_pair from ...functional import edge_softmax from ..utils import Identity # pylint: enable=W0235 class GATConv(nn.Module): r"""Graph attention layer from `Graph Attention Network `__ .. math:: h_i^{(l+1)} = \sum_{j\in \mathcal{N}(i)} \alpha_{i,j} W^{(l)} h_j^{(l)} where :math:`\alpha_{ij}` is the attention score bewteen node :math:`i` and node :math:`j`: .. math:: \alpha_{ij}^{l} &= \mathrm{softmax_i} (e_{ij}^{l}) e_{ij}^{l} &= \mathrm{LeakyReLU}\left(\vec{a}^T [W h_{i} \| W h_{j}]\right) Parameters ---------- in_feats : int, or pair of ints Input feature size; i.e, the number of dimensions of :math:`h_i^{(l)}`. GATConv can be applied on homogeneous graph and unidirectional `bipartite graph `__. If the layer is to be applied to a unidirectional bipartite graph, ``in_feats`` specifies the input feature size on both the source and destination nodes. If a scalar is given, the source and destination node feature size would take the same value. out_feats : int Output feature size; i.e, the number of dimensions of :math:`h_i^{(l+1)}`. num_heads : int Number of heads in Multi-Head Attention. feat_drop : float, optional Dropout rate on feature. Defaults: ``0``. attn_drop : float, optional Dropout rate on attention weight. Defaults: ``0``. negative_slope : float, optional LeakyReLU angle of negative slope. Defaults: ``0.2``. residual : bool, optional If True, use residual connection. Defaults: ``False``. activation : callable activation function/layer or None, optional. If not None, applies an activation function to the updated node features. Default: ``None``. allow_zero_in_degree : bool, optional If there are 0-in-degree nodes in the graph, output for those nodes will be invalid since no message will be passed to those nodes. This is harmful for some applications causing silent performance regression. This module will raise a DGLError if it detects 0-in-degree nodes in input graph. By setting ``True``, it will suppress the check and let the users handle it by themselves. Defaults: ``False``. bias : bool, optional If True, learns a bias term. Defaults: ``True``. Note ---- Zero in-degree nodes will lead to invalid output value. This is because no message will be passed to those nodes, the aggregation function will be appied on empty input. A common practice to avoid this is to add a self-loop for each node in the graph if it is homogeneous, which can be achieved by: >>> g = ... # a DGLGraph >>> g = dgl.add_self_loop(g) Calling ``add_self_loop`` will not work for some graphs, for example, heterogeneous graph since the edge type can not be decided for self_loop edges. Set ``allow_zero_in_degree`` to ``True`` for those cases to unblock the code and handle zero-in-degree nodes manually. A common practise to handle this is to filter out the nodes with zero-in-degree when use after conv. Examples -------- >>> import dgl >>> import numpy as np >>> import torch as th >>> from dgl.nn import GATConv >>> # Case 1: Homogeneous graph >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> g = dgl.add_self_loop(g) >>> feat = th.ones(6, 10) >>> gatconv = GATConv(10, 2, num_heads=3) >>> res = gatconv(g, feat) >>> res tensor([[[ 3.4570, 1.8634], [ 1.3805, -0.0762], [ 1.0390, -1.1479]], [[ 3.4570, 1.8634], [ 1.3805, -0.0762], [ 1.0390, -1.1479]], [[ 3.4570, 1.8634], [ 1.3805, -0.0762], [ 1.0390, -1.1479]], [[ 3.4570, 1.8634], [ 1.3805, -0.0762], [ 1.0390, -1.1479]], [[ 3.4570, 1.8634], [ 1.3805, -0.0762], [ 1.0390, -1.1479]], [[ 3.4570, 1.8634], [ 1.3805, -0.0762], [ 1.0390, -1.1479]]], grad_fn=) >>> # Case 2: Unidirectional bipartite graph >>> u = [0, 1, 0, 0, 1] >>> v = [0, 1, 2, 3, 2] >>> g = dgl.heterograph({('A', 'r', 'B'): (u, v)}) >>> u_feat = th.tensor(np.random.rand(2, 5).astype(np.float32)) >>> v_feat = th.tensor(np.random.rand(4, 10).astype(np.float32)) >>> gatconv = GATConv((5,10), 2, 3) >>> res = gatconv(g, (u_feat, v_feat)) >>> res tensor([[[-0.6066, 1.0268], [-0.5945, -0.4801], [ 0.1594, 0.3825]], [[ 0.0268, 1.0783], [ 0.5041, -1.3025], [ 0.6568, 0.7048]], [[-0.2688, 1.0543], [-0.0315, -0.9016], [ 0.3943, 0.5347]], [[-0.6066, 1.0268], [-0.5945, -0.4801], [ 0.1594, 0.3825]]], grad_fn=) """ def __init__( self, in_feats, out_feats, num_heads, feat_drop=0.0, attn_drop=0.0, negative_slope=0.2, residual=False, activation=None, allow_zero_in_degree=False, bias=True, ): super(GATConv, self).__init__() self._num_heads = num_heads self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats self._allow_zero_in_degree = allow_zero_in_degree if isinstance(in_feats, tuple): self.fc_src = nn.Linear( self._in_src_feats, out_feats * num_heads, bias=False ) self.fc_dst = nn.Linear( self._in_dst_feats, out_feats * num_heads, bias=False ) else: self.fc = nn.Linear( self._in_src_feats, out_feats * num_heads, bias=False ) self.attn_l = nn.Parameter( th.FloatTensor(size=(1, num_heads, out_feats)) ) self.attn_r = nn.Parameter( th.FloatTensor(size=(1, num_heads, out_feats)) ) self.feat_drop = nn.Dropout(feat_drop) self.attn_drop = nn.Dropout(attn_drop) self.leaky_relu = nn.LeakyReLU(negative_slope) self.has_linear_res = False self.has_explicit_bias = False if residual: if self._in_dst_feats != out_feats * num_heads: self.res_fc = nn.Linear( self._in_dst_feats, num_heads * out_feats, bias=bias ) self.has_linear_res = True else: self.res_fc = Identity() else: self.register_buffer("res_fc", None) if bias and not self.has_linear_res: self.bias = nn.Parameter( th.FloatTensor(size=(num_heads * out_feats,)) ) self.has_explicit_bias = True else: self.register_buffer("bias", None) self.reset_parameters() self.activation = activation def reset_parameters(self): """ Description ----------- Reinitialize learnable parameters. Note ---- The fc weights :math:`W^{(l)}` are initialized using Glorot uniform initialization. The attention weights are using xavier initialization method. """ gain = nn.init.calculate_gain("relu") if hasattr(self, "fc"): nn.init.xavier_normal_(self.fc.weight, gain=gain) else: nn.init.xavier_normal_(self.fc_src.weight, gain=gain) nn.init.xavier_normal_(self.fc_dst.weight, gain=gain) nn.init.xavier_normal_(self.attn_l, gain=gain) nn.init.xavier_normal_(self.attn_r, gain=gain) if self.has_explicit_bias: nn.init.constant_(self.bias, 0) if isinstance(self.res_fc, nn.Linear): nn.init.xavier_normal_(self.res_fc.weight, gain=gain) if self.res_fc.bias is not None: nn.init.constant_(self.res_fc.bias, 0) def set_allow_zero_in_degree(self, set_value): r""" Description ----------- Set allow_zero_in_degree flag. Parameters ---------- set_value : bool The value to be set to the flag. """ self._allow_zero_in_degree = set_value def forward(self, graph, feat, edge_weight=None, get_attention=False): r""" Description ----------- Compute graph attention network layer. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor or pair of torch.Tensor If a torch.Tensor is given, the input feature of shape :math:`(N, *, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of torch.Tensor is given, the pair must contain two tensors of shape :math:`(N_{in}, *, D_{in_{src}})` and :math:`(N_{out}, *, D_{in_{dst}})`. edge_weight : torch.Tensor, optional A 1D tensor of edge weight values. Shape: :math:`(|E|,)`. get_attention : bool, optional Whether to return the attention values. Default to False. Returns ------- torch.Tensor The output feature of shape :math:`(N, *, H, D_{out})` where :math:`H` is the number of heads, and :math:`D_{out}` is size of output feature. torch.Tensor, optional The attention values of shape :math:`(E, *, H, 1)`, where :math:`E` is the number of edges. This is returned only when :attr:`get_attention` is ``True``. Raises ------ DGLError If there are 0-in-degree nodes in the input graph, it will raise DGLError since no message will be passed to those nodes. This will cause invalid output. The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``. """ with graph.local_scope(): if not self._allow_zero_in_degree: if (graph.in_degrees() == 0).any(): raise DGLError( "There are 0-in-degree nodes in the graph, " "output for those nodes will be invalid. " "This is harmful for some applications, " "causing silent performance regression. " "Adding self-loop on the input graph by " "calling `g = dgl.add_self_loop(g)` will resolve " "the issue. Setting ``allow_zero_in_degree`` " "to be `True` when constructing this module will " "suppress the check and let the code run." ) if isinstance(feat, tuple): src_prefix_shape = feat[0].shape[:-1] dst_prefix_shape = feat[1].shape[:-1] h_src = self.feat_drop(feat[0]) h_dst = self.feat_drop(feat[1]) if not hasattr(self, "fc_src"): feat_src = self.fc(h_src).view( *src_prefix_shape, self._num_heads, self._out_feats ) feat_dst = self.fc(h_dst).view( *dst_prefix_shape, self._num_heads, self._out_feats ) else: feat_src = self.fc_src(h_src).view( *src_prefix_shape, self._num_heads, self._out_feats ) feat_dst = self.fc_dst(h_dst).view( *dst_prefix_shape, self._num_heads, self._out_feats ) else: src_prefix_shape = dst_prefix_shape = feat.shape[:-1] h_src = h_dst = self.feat_drop(feat) feat_src = feat_dst = self.fc(h_src).view( *src_prefix_shape, self._num_heads, self._out_feats ) if graph.is_block: feat_dst = feat_src[: graph.number_of_dst_nodes()] h_dst = h_dst[: graph.number_of_dst_nodes()] dst_prefix_shape = ( graph.number_of_dst_nodes(), ) + dst_prefix_shape[1:] # NOTE: GAT paper uses "first concatenation then linear projection" # to compute attention scores, while ours is "first projection then # addition", the two approaches are mathematically equivalent: # We decompose the weight vector a mentioned in the paper into # [a_l || a_r], then # a^T [Wh_i || Wh_j] = a_l Wh_i + a_r Wh_j # Our implementation is much efficient because we do not need to # save [Wh_i || Wh_j] on edges, which is not memory-efficient. Plus, # addition could be optimized with DGL's built-in function u_add_v, # which further speeds up computation and saves memory footprint. el = (feat_src * self.attn_l).sum(dim=-1).unsqueeze(-1) er = (feat_dst * self.attn_r).sum(dim=-1).unsqueeze(-1) graph.srcdata.update({"ft": feat_src, "el": el}) graph.dstdata.update({"er": er}) # compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively. graph.apply_edges(fn.u_add_v("el", "er", "e")) e = self.leaky_relu(graph.edata.pop("e")) # compute softmax graph.edata["a"] = self.attn_drop(edge_softmax(graph, e)) if edge_weight is not None: graph.edata["a"] = graph.edata["a"] * edge_weight.tile( 1, self._num_heads, 1 ).transpose(0, 2) # message passing graph.update_all(fn.u_mul_e("ft", "a", "m"), fn.sum("m", "ft")) rst = graph.dstdata["ft"] # residual if self.res_fc is not None: # Use -1 rather than self._num_heads to handle broadcasting if h_dst.numel() != 0: resval = self.res_fc(h_dst).view( *dst_prefix_shape, -1, self._out_feats ) rst = rst + resval # bias if self.has_explicit_bias: rst = rst + self.bias.view( *((1,) * len(dst_prefix_shape)), self._num_heads, self._out_feats ) # activation if self.activation: rst = self.activation(rst) if get_attention: return rst, graph.edata["a"] else: return rst ================================================ FILE: python/dgl/nn/pytorch/conv/gatedgcnconv.py ================================================ """Torch Module for GatedGCN layer""" # pylint: disable= no-member, arguments-differ, invalid-name, cell-var-from-loop import torch import torch.nn.functional as F from torch import nn from .... import function as fn class GatedGCNConv(nn.Module): r"""Gated graph convolutional layer from `Benchmarking Graph Neural Networks `__ .. math:: e_{ij}^{l+1}=D^l h_{i}^{l}+E^l h_{j}^{l}+C^l e_{ij}^{l} norm_{ij}=\Sigma_{j\in N_{i}} \sigma\left(e_{ij}^{l+1}\right)+\varepsilon \hat{e}_{ij}^{l+1}=\sigma(e_{ij}^{l+1}) / norm_{ij} h_{i}^{l+1}=A^l h_{i}^{l}+\Sigma_{j \in N_{i}} \hat{e}_{ij}^{l+1} \odot B^l h_{j}^{l} where :math:`h_{i}^{l}` is node :math:`i` feature of layer :math:`l`, :math:`e_{ij}^{l}` is edge :math:`ij` feature of layer :math:`l`, :math:`\sigma` is sigmoid function, :math:`\varepsilon` is a small fixed constant for numerical stability, :math:`A^l, B^l, C^l, D^l, E^l` are linear layers. Parameters ---------- input_feats : int Input feature size; i.e, the number of dimensions of :math:`h_{i}^{l}`. edge_feats: int Edge feature size; i.e., the number of dimensions of :math:`e_{ij}^{l}`. output_feats : int Output feature size; i.e., the number of dimensions of :math:`h_{i}^{l+1}`. dropout : float, optional Dropout rate on node and edge feature. Default: ``0``. batch_norm : bool, optional Whether to include batch normalization on node and edge feature. Default: ``True``. residual : bool, optional Whether to include residual connections. Default: ``True``. activation : callable activation function/layer or None, optional If not None, apply an activation function to the updated node features. Default: ``F.relu``. Example ------- >>> import dgl >>> import torch as th >>> import torch.nn.functional as F >>> from dgl.nn import GatedGCNConv >>> num_nodes, num_edges = 8, 30 >>> graph = dgl.rand_graph(num_nodes,num_edges) >>> node_feats = th.rand(num_nodes, 20) >>> edge_feats = th.rand(num_edges, 12) >>> gatedGCN = GatedGCNConv(20, 12, 20) >>> new_node_feats, new_edge_feats = gatedGCN(graph, node_feats, edge_feats) >>> new_node_feats.shape, new_edge_feats.shape (torch.Size([8, 20]), torch.Size([30, 20])) """ def __init__( self, input_feats, edge_feats, output_feats, dropout=0, batch_norm=True, residual=True, activation=F.relu, ): super(GatedGCNConv, self).__init__() self.dropout = nn.Dropout(dropout) self.batch_norm = batch_norm self.residual = residual if input_feats != output_feats or edge_feats != output_feats: self.residual = False # Linearly transform the node features. self.A = nn.Linear(input_feats, output_feats, bias=True) self.B = nn.Linear(input_feats, output_feats, bias=True) self.D = nn.Linear(input_feats, output_feats, bias=True) self.E = nn.Linear(input_feats, output_feats, bias=True) # Linearly transform the edge features. self.C = nn.Linear(edge_feats, output_feats, bias=True) # Batch normalization on the node/edge features. self.bn_node = nn.BatchNorm1d(output_feats) self.bn_edge = nn.BatchNorm1d(output_feats) self.activation = activation def forward(self, graph, feat, edge_feat): """ Description ----------- Compute gated graph convolution layer. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor The input feature of shape :math:`(N, D_{in})` where :math:`N` is the number of nodes of the graph and :math:`D_{in}` is the input feature size. edge_feat : torch.Tensor The input edge feature of shape :math:`(E, D_{edge})`, where :math:`E` is the number of edges and :math:`D_{edge}` is the size of the edge features. Returns ------- torch.Tensor The output node feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is the output feature size. torch.Tensor The output edge feature of shape :math:`(E, D_{out})` where :math:`D_{out}` is the output feature size. """ with graph.local_scope(): # For residual connection h_in = feat e_in = edge_feat graph.ndata["Ah"] = self.A(feat) graph.ndata["Bh"] = self.B(feat) graph.ndata["Dh"] = self.D(feat) graph.ndata["Eh"] = self.E(feat) graph.edata["Ce"] = self.C(edge_feat) graph.apply_edges(fn.u_add_v("Dh", "Eh", "DEh")) # Get edge feature graph.edata["e"] = graph.edata["DEh"] + graph.edata["Ce"] graph.edata["sigma"] = torch.sigmoid(graph.edata["e"]) graph.update_all( fn.u_mul_e("Bh", "sigma", "m"), fn.sum("m", "sum_sigma_h") ) graph.update_all(fn.copy_e("sigma", "m"), fn.sum("m", "sum_sigma")) graph.ndata["h"] = graph.ndata["Ah"] + graph.ndata[ "sum_sigma_h" ] / (graph.ndata["sum_sigma"] + 1e-6) # Result of graph convolution. feat = graph.ndata["h"] edge_feat = graph.edata["e"] # Batch normalization. if self.batch_norm: feat = self.bn_node(feat) edge_feat = self.bn_edge(edge_feat) # Non-linear activation. if self.activation: feat = self.activation(feat) edge_feat = self.activation(edge_feat) # Residual connection. if self.residual: feat = h_in + feat edge_feat = e_in + edge_feat feat = self.dropout(feat) edge_feat = self.dropout(edge_feat) return feat, edge_feat ================================================ FILE: python/dgl/nn/pytorch/conv/gatedgraphconv.py ================================================ """Torch Module for Gated Graph Convolution layer""" # pylint: disable= no-member, arguments-differ, invalid-name, cell-var-from-loop import torch as th from torch import nn from torch.nn import init from .... import function as fn class GatedGraphConv(nn.Module): r"""Gated Graph Convolution layer from `Gated Graph Sequence Neural Networks `__ .. math:: h_{i}^{0} &= [ x_i \| \mathbf{0} ] a_{i}^{t} &= \sum_{j\in\mathcal{N}(i)} W_{e_{ij}} h_{j}^{t} h_{i}^{t+1} &= \mathrm{GRU}(a_{i}^{t}, h_{i}^{t}) Parameters ---------- in_feats : int Input feature size; i.e, the number of dimensions of :math:`x_i`. out_feats : int Output feature size; i.e., the number of dimensions of :math:`h_i^{(t+1)}`. n_steps : int Number of recurrent steps; i.e, the :math:`t` in the above formula. n_etypes : int Number of edge types. bias : bool If True, adds a learnable bias to the output. Default: ``True``. Example ------- >>> import dgl >>> import numpy as np >>> import torch as th >>> from dgl.nn import GatedGraphConv >>> >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> feat = th.ones(6, 10) >>> conv = GatedGraphConv(10, 10, 2, 3) >>> etype = th.tensor([0,1,2,0,1,2]) >>> res = conv(g, feat, etype) >>> res tensor([[ 0.4652, 0.4458, 0.5169, 0.4126, 0.4847, 0.2303, 0.2757, 0.7721, 0.0523, 0.0857], [ 0.0832, 0.1388, -0.5643, 0.7053, -0.2524, -0.3847, 0.7587, 0.8245, 0.9315, 0.4063], [ 0.6340, 0.4096, 0.7692, 0.2125, 0.2106, 0.4542, -0.0580, 0.3364, -0.1376, 0.4948], [ 0.5551, 0.7946, 0.6220, 0.8058, 0.5711, 0.3063, -0.5454, 0.2272, -0.6931, -0.1607], [ 0.2644, 0.2469, -0.6143, 0.6008, -0.1516, -0.3781, 0.5878, 0.7993, 0.9241, 0.1835], [ 0.6393, 0.3447, 0.3893, 0.4279, 0.3342, 0.3809, 0.0406, 0.5030, 0.1342, 0.0425]], grad_fn=) """ def __init__(self, in_feats, out_feats, n_steps, n_etypes, bias=True): super(GatedGraphConv, self).__init__() assert in_feats <= out_feats, "out_feats must be not less than in_feats" self._in_feats = in_feats self._out_feats = out_feats self._n_steps = n_steps self._n_etypes = n_etypes self.linears = nn.ModuleList( [nn.Linear(out_feats, out_feats) for _ in range(n_etypes)] ) self.gru = nn.GRUCell(out_feats, out_feats, bias=bias) self.reset_parameters() def reset_parameters(self): r""" Description ----------- Reinitialize learnable parameters. Note ---- The model parameters are initialized using Glorot uniform initialization and the bias is initialized to be zero. """ gain = init.calculate_gain("relu") self.gru.reset_parameters() for linear in self.linears: init.xavier_normal_(linear.weight, gain=gain) init.zeros_(linear.bias) def set_allow_zero_in_degree(self, set_value): r""" Description ----------- Set allow_zero_in_degree flag. Parameters ---------- set_value : bool The value to be set to the flag. """ self._allow_zero_in_degree = set_value def forward(self, graph, feat, etypes=None): """ Description ----------- Compute Gated Graph Convolution layer. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor The input feature of shape :math:`(N, D_{in})` where :math:`N` is the number of nodes of the graph and :math:`D_{in}` is the input feature size. etypes : torch.LongTensor, or None The edge type tensor of shape :math:`(E,)` where :math:`E` is the number of edges of the graph. When there's only one edge type, this argument can be skipped Returns ------- torch.Tensor The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is the output feature size. """ with graph.local_scope(): assert graph.is_homogeneous, ( "not a homogeneous graph; convert it with to_homogeneous " "and pass in the edge type as argument" ) if self._n_etypes != 1: assert ( etypes.min() >= 0 and etypes.max() < self._n_etypes ), "edge type indices out of range [0, {})".format( self._n_etypes ) zero_pad = feat.new_zeros( (feat.shape[0], self._out_feats - feat.shape[1]) ) feat = th.cat([feat, zero_pad], -1) for _ in range(self._n_steps): if self._n_etypes == 1 and etypes is None: # Fast path when graph has only one edge type graph.ndata["h"] = self.linears[0](feat) graph.update_all(fn.copy_u("h", "m"), fn.sum("m", "a")) a = graph.ndata.pop("a") # (N, D) else: graph.ndata["h"] = feat for i in range(self._n_etypes): eids = ( th.nonzero(etypes == i, as_tuple=False) .view(-1) .type(graph.idtype) ) if len(eids) > 0: graph.apply_edges( lambda edges: { "W_e*h": self.linears[i](edges.src["h"]) }, eids, ) graph.update_all(fn.copy_e("W_e*h", "m"), fn.sum("m", "a")) a = graph.ndata.pop("a") # (N, D) feat = self.gru(a, feat) return feat ================================================ FILE: python/dgl/nn/pytorch/conv/gatv2conv.py ================================================ """Torch modules for graph attention networks v2 (GATv2).""" # pylint: disable= no-member, arguments-differ, invalid-name import torch as th from torch import nn from .... import function as fn from ....base import DGLError from ....utils import expand_as_pair from ...functional import edge_softmax from ..utils import Identity # pylint: enable=W0235 class GATv2Conv(nn.Module): r"""GATv2 from `How Attentive are Graph Attention Networks? `__ .. math:: h_i^{(l+1)} = \sum_{j\in \mathcal{N}(i)} \alpha_{ij}^{(l)} W^{(l)}_{right} h_j^{(l)} where :math:`\alpha_{ij}` is the attention score bewteen node :math:`i` and node :math:`j`: .. math:: \alpha_{ij}^{(l)} &= \mathrm{softmax_i} (e_{ij}^{(l)}) e_{ij}^{(l)} &= {\vec{a}^T}^{(l)}\mathrm{LeakyReLU}\left( W^{(l)}_{left} h_{i} + W^{(l)}_{right} h_{j}\right) Parameters ---------- in_feats : int, or pair of ints Input feature size; i.e, the number of dimensions of :math:`h_i^{(l)}`. If the layer is to be applied to a unidirectional bipartite graph, `in_feats` specifies the input feature size on both the source and destination nodes. If a scalar is given, the source and destination node feature size would take the same value. out_feats : int Output feature size; i.e, the number of dimensions of :math:`h_i^{(l+1)}`. num_heads : int Number of heads in Multi-Head Attention. feat_drop : float, optional Dropout rate on feature. Defaults: ``0``. attn_drop : float, optional Dropout rate on attention weight. Defaults: ``0``. negative_slope : float, optional LeakyReLU angle of negative slope. Defaults: ``0.2``. residual : bool, optional If True, use residual connection. Defaults: ``False``. activation : callable activation function/layer or None, optional. If not None, applies an activation function to the updated node features. Default: ``None``. allow_zero_in_degree : bool, optional If there are 0-in-degree nodes in the graph, output for those nodes will be invalid since no message will be passed to those nodes. This is harmful for some applications causing silent performance regression. This module will raise a DGLError if it detects 0-in-degree nodes in input graph. By setting ``True``, it will suppress the check and let the users handle it by themselves. Defaults: ``False``. bias : bool, optional If set to :obj:`False`, the layer will not learn an additive bias. (default: :obj:`True`) share_weights : bool, optional If set to :obj:`True`, the same matrix for :math:`W_{left}` and :math:`W_{right}` in the above equations, will be applied to the source and the target node of every edge. (default: :obj:`False`) Note ---- Zero in-degree nodes will lead to invalid output value. This is because no message will be passed to those nodes, the aggregation function will be applied on empty input. A common practice to avoid this is to add a self-loop for each node in the graph if it is homogeneous, which can be achieved by: >>> g = ... # a DGLGraph >>> g = dgl.add_self_loop(g) Calling ``add_self_loop`` will not work for some graphs, for example, heterogeneous graph since the edge type can not be decided for self_loop edges. Set ``allow_zero_in_degree`` to ``True`` for those cases to unblock the code and handle zero-in-degree nodes manually. A common practise to handle this is to filter out the nodes with zero-in-degree when use after conv. Examples -------- >>> import dgl >>> import numpy as np >>> import torch as th >>> from dgl.nn import GATv2Conv >>> # Case 1: Homogeneous graph >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> g = dgl.add_self_loop(g) >>> feat = th.ones(6, 10) >>> gatv2conv = GATv2Conv(10, 2, num_heads=3) >>> res = gatv2conv(g, feat) >>> res tensor([[[ 1.9599, 1.0239], [ 3.2015, -0.5512], [ 2.3700, -2.2182]], [[ 1.9599, 1.0239], [ 3.2015, -0.5512], [ 2.3700, -2.2182]], [[ 1.9599, 1.0239], [ 3.2015, -0.5512], [ 2.3700, -2.2182]], [[ 1.9599, 1.0239], [ 3.2015, -0.5512], [ 2.3700, -2.2182]], [[ 1.9599, 1.0239], [ 3.2015, -0.5512], [ 2.3700, -2.2182]], [[ 1.9599, 1.0239], [ 3.2015, -0.5512], [ 2.3700, -2.2182]]], grad_fn=) >>> # Case 2: Unidirectional bipartite graph >>> u = [0, 1, 0, 0, 1] >>> v = [0, 1, 2, 3, 2] >>> g = dgl.heterograph({('A', 'r', 'B'): (u, v)}) >>> u_feat = th.tensor(np.random.rand(2, 5).astype(np.float32)) >>> v_feat = th.tensor(np.random.rand(4, 10).astype(np.float32)) >>> gatv2conv = GATv2Conv((5,10), 2, 3) >>> res = gatv2conv(g, (u_feat, v_feat)) >>> res tensor([[[-0.0935, -0.4273], [-1.1850, 0.1123], [-0.2002, 0.1155]], [[ 0.1908, -1.2095], [-0.0129, 0.6408], [-0.8135, 0.1157]], [[ 0.0596, -0.8487], [-0.5421, 0.4022], [-0.4805, 0.1156]], [[-0.0935, -0.4273], [-1.1850, 0.1123], [-0.2002, 0.1155]]], grad_fn=) """ def __init__( self, in_feats, out_feats, num_heads, feat_drop=0.0, attn_drop=0.0, negative_slope=0.2, residual=False, activation=None, allow_zero_in_degree=False, bias=True, share_weights=False, ): super(GATv2Conv, self).__init__() self._num_heads = num_heads self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats self._allow_zero_in_degree = allow_zero_in_degree if isinstance(in_feats, tuple): self.fc_src = nn.Linear( self._in_src_feats, out_feats * num_heads, bias=bias ) self.fc_dst = nn.Linear( self._in_dst_feats, out_feats * num_heads, bias=bias ) else: self.fc_src = nn.Linear( self._in_src_feats, out_feats * num_heads, bias=bias ) if share_weights: self.fc_dst = self.fc_src else: self.fc_dst = nn.Linear( self._in_src_feats, out_feats * num_heads, bias=bias ) self.attn = nn.Parameter(th.FloatTensor(size=(1, num_heads, out_feats))) self.feat_drop = nn.Dropout(feat_drop) self.attn_drop = nn.Dropout(attn_drop) self.leaky_relu = nn.LeakyReLU(negative_slope) if residual: if self._in_dst_feats != out_feats * num_heads: self.res_fc = nn.Linear( self._in_dst_feats, num_heads * out_feats, bias=bias ) else: self.res_fc = Identity() else: self.register_buffer("res_fc", None) self.activation = activation self.share_weights = share_weights self.bias = bias self.reset_parameters() def reset_parameters(self): """ Description ----------- Reinitialize learnable parameters. Note ---- The fc weights :math:`W^{(l)}` are initialized using Glorot uniform initialization. The attention weights are using xavier initialization method. """ gain = nn.init.calculate_gain("relu") nn.init.xavier_normal_(self.fc_src.weight, gain=gain) if self.bias: nn.init.constant_(self.fc_src.bias, 0) if not self.share_weights: nn.init.xavier_normal_(self.fc_dst.weight, gain=gain) if self.bias: nn.init.constant_(self.fc_dst.bias, 0) nn.init.xavier_normal_(self.attn, gain=gain) if isinstance(self.res_fc, nn.Linear): nn.init.xavier_normal_(self.res_fc.weight, gain=gain) if self.bias: nn.init.constant_(self.res_fc.bias, 0) def set_allow_zero_in_degree(self, set_value): r""" Description ----------- Set allow_zero_in_degree flag. Parameters ---------- set_value : bool The value to be set to the flag. """ self._allow_zero_in_degree = set_value def forward(self, graph, feat, get_attention=False): r""" Description ----------- Compute graph attention network layer. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor or pair of torch.Tensor If a torch.Tensor is given, the input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of torch.Tensor is given, the pair must contain two tensors of shape :math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`. get_attention : bool, optional Whether to return the attention values. Default to False. Returns ------- torch.Tensor The output feature of shape :math:`(N, H, D_{out})` where :math:`H` is the number of heads, and :math:`D_{out}` is size of output feature. torch.Tensor, optional The attention values of shape :math:`(E, H, 1)`, where :math:`E` is the number of edges. This is returned only when :attr:`get_attention` is ``True``. Raises ------ DGLError If there are 0-in-degree nodes in the input graph, it will raise DGLError since no message will be passed to those nodes. This will cause invalid output. The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``. """ with graph.local_scope(): if not self._allow_zero_in_degree: if (graph.in_degrees() == 0).any(): raise DGLError( "There are 0-in-degree nodes in the graph, " "output for those nodes will be invalid. " "This is harmful for some applications, " "causing silent performance regression. " "Adding self-loop on the input graph by " "calling `g = dgl.add_self_loop(g)` will resolve " "the issue. Setting ``allow_zero_in_degree`` " "to be `True` when constructing this module will " "suppress the check and let the code run." ) if isinstance(feat, tuple): h_src = self.feat_drop(feat[0]) h_dst = self.feat_drop(feat[1]) feat_src = self.fc_src(h_src).view( -1, self._num_heads, self._out_feats ) feat_dst = self.fc_dst(h_dst).view( -1, self._num_heads, self._out_feats ) else: h_src = h_dst = self.feat_drop(feat) feat_src = self.fc_src(h_src).view( -1, self._num_heads, self._out_feats ) if self.share_weights: feat_dst = feat_src else: feat_dst = self.fc_dst(h_dst).view( -1, self._num_heads, self._out_feats ) if graph.is_block: feat_dst = feat_dst[: graph.number_of_dst_nodes()] h_dst = h_dst[: graph.number_of_dst_nodes()] graph.srcdata.update( {"el": feat_src} ) # (num_src_edge, num_heads, out_dim) graph.dstdata.update({"er": feat_dst}) graph.apply_edges(fn.u_add_v("el", "er", "e")) e = self.leaky_relu( graph.edata.pop("e") ) # (num_src_edge, num_heads, out_dim) e = ( (e * self.attn).sum(dim=-1).unsqueeze(dim=2) ) # (num_edge, num_heads, 1) # compute softmax graph.edata["a"] = self.attn_drop( edge_softmax(graph, e) ) # (num_edge, num_heads) # message passing graph.update_all(fn.u_mul_e("el", "a", "m"), fn.sum("m", "ft")) rst = graph.dstdata["ft"] # residual if self.res_fc is not None: if h_dst.numel() != 0: resval = self.res_fc(h_dst).view( h_dst.shape[0], -1, self._out_feats ) rst = rst + resval # activation if self.activation: rst = self.activation(rst) if get_attention: return rst, graph.edata["a"] else: return rst ================================================ FILE: python/dgl/nn/pytorch/conv/gcn2conv.py ================================================ """Torch Module for Graph Convolutional Network via Initial residual and Identity mapping (GCNII) layer""" # pylint: disable= no-member, arguments-differ, invalid-name import math import torch as th from torch import nn from .... import function as fn from ....base import DGLError from .graphconv import EdgeWeightNorm class GCN2Conv(nn.Module): r"""Graph Convolutional Network via Initial residual and Identity mapping (GCNII) from `Simple and Deep Graph Convolutional Networks `__ It is mathematically is defined as follows: .. math:: \mathbf{h}^{(l+1)} =\left( (1 - \alpha)(\mathbf{D}^{-1/2} \mathbf{\hat{A}} \mathbf{D}^{-1/2})\mathbf{h}^{(l)} + \alpha {\mathbf{h}^{(0)}} \right) \left( (1 - \beta_l) \mathbf{I} + \beta_l \mathbf{W} \right) where :math:`\mathbf{\hat{A}}` is the adjacency matrix with self-loops, :math:`\mathbf{D}_{ii} = \sum_{j=0} \mathbf{A}_{ij}` is its diagonal degree matrix, :math:`\mathbf{h}^{(0)}` is the initial node features, :math:`\mathbf{h}^{(l)}` is the feature of layer :math:`l`, :math:`\alpha` is the fraction of initial node features, and :math:`\beta_l` is the hyperparameter to tune the strength of identity mapping. It is defined by :math:`\beta_l = \log(\frac{\lambda}{l}+1)\approx\frac{\lambda}{l}`, where :math:`\lambda` is a hyperparameter. :math:`\beta` ensures that the decay of the weight matrix adaptively increases as we stack more layers. Parameters ---------- in_feats : int Input feature size; i.e, the number of dimensions of :math:`h_j^{(l)}`. layer : int the index of current layer. alpha : float The fraction of the initial input features. Default: ``0.1`` lambda_ : float The hyperparameter to ensure the decay of the weight matrix adaptively increases. Default: ``1`` project_initial_features : bool Whether to share a weight matrix between initial features and smoothed features. Default: ``True`` bias : bool, optional If True, adds a learnable bias to the output. Default: ``True``. activation : callable activation function/layer or None, optional If not None, applies an activation function to the updated node features. Default: ``None``. allow_zero_in_degree : bool, optional If there are 0-in-degree nodes in the graph, output for those nodes will be invalid since no message will be passed to those nodes. This is harmful for some applications causing silent performance regression. This module will raise a DGLError if it detects 0-in-degree nodes in input graph. By setting ``True``, it will suppress the check and let the users handle it by themselves. Default: ``False``. Note ---- Zero in-degree nodes will lead to invalid output value. This is because no message will be passed to those nodes, the aggregation function will be appied on empty input. A common practice to avoid this is to add a self-loop for each node in the graph if it is homogeneous, which can be achieved by: >>> g = ... # a DGLGraph >>> g = dgl.add_self_loop(g) Calling ``add_self_loop`` will not work for some graphs, for example, heterogeneous graph since the edge type can not be decided for self_loop edges. Set ``allow_zero_in_degree`` to ``True`` for those cases to unblock the code and handle zero-in-degree nodes manually. A common practise to handle this is to filter out the nodes with zero-in-degree when use after conv. Examples -------- >>> import dgl >>> import numpy as np >>> import torch as th >>> from dgl.nn import GCN2Conv >>> # Homogeneous graph >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> feat = th.ones(6, 3) >>> g = dgl.add_self_loop(g) >>> conv1 = GCN2Conv(3, layer=1, alpha=0.5, \ ... project_initial_features=True, allow_zero_in_degree=True) >>> conv2 = GCN2Conv(3, layer=2, alpha=0.5, \ ... project_initial_features=True, allow_zero_in_degree=True) >>> res = feat >>> res = conv1(g, res, feat) >>> res = conv2(g, res, feat) >>> print(res) tensor([[1.3803, 3.3191, 2.9572], [1.3803, 3.3191, 2.9572], [1.3803, 3.3191, 2.9572], [1.4770, 3.8326, 3.2451], [1.3623, 3.2102, 2.8679], [1.3803, 3.3191, 2.9572]], grad_fn=) """ def __init__( self, in_feats, layer, alpha=0.1, lambda_=1, project_initial_features=True, allow_zero_in_degree=False, bias=True, activation=None, ): super().__init__() self._in_feats = in_feats self._project_initial_features = project_initial_features self.alpha = alpha self.beta = math.log(lambda_ / layer + 1) self._bias = bias self._activation = activation self._allow_zero_in_degree = allow_zero_in_degree self.weight1 = nn.Parameter(th.Tensor(self._in_feats, self._in_feats)) if self._project_initial_features: self.register_parameter("weight2", None) else: self.weight2 = nn.Parameter( th.Tensor(self._in_feats, self._in_feats) ) if self._bias: self.bias = nn.Parameter(th.Tensor(self._in_feats)) else: self.register_parameter("bias", None) self.reset_parameters() def reset_parameters(self): r""" Description ----------- Reinitialize learnable parameters. """ nn.init.normal_(self.weight1) if not self._project_initial_features: nn.init.normal_(self.weight2) if self._bias: nn.init.zeros_(self.bias) def set_allow_zero_in_degree(self, set_value): r""" Description ----------- Set allow_zero_in_degree flag. Parameters ---------- set_value : bool The value to be set to the flag. """ self._allow_zero_in_degree = set_value def forward(self, graph, feat, feat_0, edge_weight=None): r""" Description ----------- Compute graph convolution. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor The input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is the size of input feature and :math:`N` is the number of nodes. feat_0 : torch.Tensor The initial feature of shape :math:`(N, D_{in})` edge_weight: torch.Tensor, optional edge_weight to use in the message passing process. This is equivalent to using weighted adjacency matrix in the equation above, and :math:`\tilde{D}^{-1/2}\tilde{A} \tilde{D}^{-1/2}` is based on :class:`dgl.nn.pytorch.conv.graphconv.EdgeWeightNorm`. Returns ------- torch.Tensor The output feature Raises ------ DGLError If there are 0-in-degree nodes in the input graph, it will raise DGLError since no message will be passed to those nodes. This will cause invalid output. The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``. Note ---- * Input shape: :math:`(N, *, \text{in_feats})` where * means any number of additional dimensions, :math:`N` is the number of nodes. * Output shape: :math:`(N, *, \text{out_feats})` where all but the last dimension are the same shape as the input. * Weight shape: :math:`(\text{in_feats}, \text{out_feats})`. """ with graph.local_scope(): if not self._allow_zero_in_degree: if (graph.in_degrees() == 0).any(): raise DGLError( "There are 0-in-degree nodes in the graph, " "output for those nodes will be invalid. " "This is harmful for some applications, " "causing silent performance regression. " "Adding self-loop on the input graph by " "calling `g = dgl.add_self_loop(g)` will resolve " "the issue. Setting ``allow_zero_in_degree`` " "to be `True` when constructing this module will " "suppress the check and let the code run." ) # normalize to get smoothed representation if edge_weight is None: degs = graph.in_degrees().to(feat).clamp(min=1) norm = th.pow(degs, -0.5) norm = norm.to(feat.device).unsqueeze(1) else: edge_weight = EdgeWeightNorm("both")(graph, edge_weight) if edge_weight is None: feat = feat * norm graph.ndata["h"] = feat msg_func = fn.copy_u("h", "m") if edge_weight is not None: graph.edata["_edge_weight"] = edge_weight msg_func = fn.u_mul_e("h", "_edge_weight", "m") graph.update_all(msg_func, fn.sum("m", "h")) feat = graph.ndata.pop("h") if edge_weight is None: feat = feat * norm # scale feat = feat * (1 - self.alpha) # initial residual connection to the first layer feat_0 = feat_0[: feat.size(0)] * self.alpha feat_sum = feat + feat_0 if self._project_initial_features: feat_proj_sum = feat_sum @ self.weight1 else: feat_proj_sum = feat @ self.weight1 + feat_0 @ self.weight2 rst = (1 - self.beta) * feat_sum + self.beta * feat_proj_sum if self._bias: rst = rst + self.bias if self._activation is not None: rst = self._activation(rst) return rst def extra_repr(self): """Set the extra representation of the module, which will come into effect when printing the model. """ summary = "in={_in_feats}" summary += ", alpha={alpha}, beta={beta}" if "self._bias" in self.__dict__: summary += ", bias={bias}" if "self._activation" in self.__dict__: summary += ", activation={_activation}" return summary.format(**self.__dict__) ================================================ FILE: python/dgl/nn/pytorch/conv/ginconv.py ================================================ """Torch Module for Graph Isomorphism Network layer""" # pylint: disable= no-member, arguments-differ, invalid-name import torch as th from torch import nn from .... import function as fn from ....utils import expand_as_pair class GINConv(nn.Module): r"""Graph Isomorphism Network layer from `How Powerful are Graph Neural Networks? `__ .. math:: h_i^{(l+1)} = f_\Theta \left((1 + \epsilon) h_i^{l} + \mathrm{aggregate}\left(\left\{h_j^{l}, j\in\mathcal{N}(i) \right\}\right)\right) If a weight tensor on each edge is provided, the weighted graph convolution is defined as: .. math:: h_i^{(l+1)} = f_\Theta \left((1 + \epsilon) h_i^{l} + \mathrm{aggregate}\left(\left\{e_{ji} h_j^{l}, j\in\mathcal{N}(i) \right\}\right)\right) where :math:`e_{ji}` is the weight on the edge from node :math:`j` to node :math:`i`. Please make sure that `e_{ji}` is broadcastable with `h_j^{l}`. Parameters ---------- apply_func : callable activation function/layer or None If not None, apply this function to the updated node feature, the :math:`f_\Theta` in the formula, default: None. aggregator_type : str Aggregator type to use (``sum``, ``max`` or ``mean``), default: 'sum'. init_eps : float, optional Initial :math:`\epsilon` value, default: ``0``. learn_eps : bool, optional If True, :math:`\epsilon` will be a learnable parameter. Default: ``False``. activation : callable activation function/layer or None, optional If not None, applies an activation function to the updated node features. Default: ``None``. Examples -------- >>> import dgl >>> import numpy as np >>> import torch as th >>> from dgl.nn import GINConv >>> >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> feat = th.ones(6, 10) >>> lin = th.nn.Linear(10, 10) >>> conv = GINConv(lin, 'max') >>> res = conv(g, feat) >>> res tensor([[-0.4821, 0.0207, -0.7665, 0.5721, -0.4682, -0.2134, -0.5236, 1.2855, 0.8843, -0.8764], [-0.4821, 0.0207, -0.7665, 0.5721, -0.4682, -0.2134, -0.5236, 1.2855, 0.8843, -0.8764], [-0.4821, 0.0207, -0.7665, 0.5721, -0.4682, -0.2134, -0.5236, 1.2855, 0.8843, -0.8764], [-0.4821, 0.0207, -0.7665, 0.5721, -0.4682, -0.2134, -0.5236, 1.2855, 0.8843, -0.8764], [-0.4821, 0.0207, -0.7665, 0.5721, -0.4682, -0.2134, -0.5236, 1.2855, 0.8843, -0.8764], [-0.1804, 0.0758, -0.5159, 0.3569, -0.1408, -0.1395, -0.2387, 0.7773, 0.5266, -0.4465]], grad_fn=) >>> # With activation >>> from torch.nn.functional import relu >>> conv = GINConv(lin, 'max', activation=relu) >>> res = conv(g, feat) >>> res tensor([[5.0118, 0.0000, 0.0000, 3.9091, 1.3371, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000], [5.0118, 0.0000, 0.0000, 3.9091, 1.3371, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000], [5.0118, 0.0000, 0.0000, 3.9091, 1.3371, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000], [5.0118, 0.0000, 0.0000, 3.9091, 1.3371, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000], [5.0118, 0.0000, 0.0000, 3.9091, 1.3371, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000], [2.5011, 0.0000, 0.0089, 2.0541, 0.8262, 0.0000, 0.0000, 0.1371, 0.0000, 0.0000]], grad_fn=) """ def __init__( self, apply_func=None, aggregator_type="sum", init_eps=0, learn_eps=False, activation=None, ): super(GINConv, self).__init__() self.apply_func = apply_func self._aggregator_type = aggregator_type self.activation = activation if aggregator_type not in ("sum", "max", "mean"): raise KeyError( "Aggregator type {} not recognized.".format(aggregator_type) ) # to specify whether eps is trainable or not. if learn_eps: self.eps = th.nn.Parameter(th.FloatTensor([init_eps])) else: self.register_buffer("eps", th.FloatTensor([init_eps])) def forward(self, graph, feat, edge_weight=None): r""" Description ----------- Compute Graph Isomorphism Network layer. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor or pair of torch.Tensor If a torch.Tensor is given, the input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of torch.Tensor is given, the pair must contain two tensors of shape :math:`(N_{in}, D_{in})` and :math:`(N_{out}, D_{in})`. If ``apply_func`` is not None, :math:`D_{in}` should fit the input dimensionality requirement of ``apply_func``. edge_weight : torch.Tensor, optional Optional tensor on the edge. If given, the convolution will weight with regard to the message. Returns ------- torch.Tensor The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is the output dimensionality of ``apply_func``. If ``apply_func`` is None, :math:`D_{out}` should be the same as input dimensionality. """ _reducer = getattr(fn, self._aggregator_type) with graph.local_scope(): aggregate_fn = fn.copy_u("h", "m") if edge_weight is not None: assert edge_weight.shape[0] == graph.num_edges() graph.edata["_edge_weight"] = edge_weight aggregate_fn = fn.u_mul_e("h", "_edge_weight", "m") feat_src, feat_dst = expand_as_pair(feat, graph) graph.srcdata["h"] = feat_src graph.update_all(aggregate_fn, _reducer("m", "neigh")) rst = (1 + self.eps) * feat_dst + graph.dstdata["neigh"] if self.apply_func is not None: rst = self.apply_func(rst) # activation if self.activation is not None: rst = self.activation(rst) return rst ================================================ FILE: python/dgl/nn/pytorch/conv/gineconv.py ================================================ """Torch Module for Graph Isomorphism Network layer variant with edge features""" # pylint: disable= no-member, arguments-differ, invalid-name import torch as th import torch.nn.functional as F from torch import nn from .... import function as fn from ....utils import expand_as_pair class GINEConv(nn.Module): r"""Graph Isomorphism Network with Edge Features, introduced by `Strategies for Pre-training Graph Neural Networks `__ .. math:: h_i^{(l+1)} = f_\Theta \left((1 + \epsilon) h_i^{l} + \sum_{j\in\mathcal{N}(i)}\mathrm{ReLU}(h_j^{l} + e_{j,i}^{l})\right) where :math:`e_{j,i}^{l}` is the edge feature. Parameters ---------- apply_func : callable module or None The :math:`f_\Theta` in the formula. If not None, it will be applied to the updated node features. The default value is None. init_eps : float, optional Initial :math:`\epsilon` value, default: ``0``. learn_eps : bool, optional If True, :math:`\epsilon` will be a learnable parameter. Default: ``False``. Examples -------- >>> import dgl >>> import torch >>> import torch.nn as nn >>> from dgl.nn import GINEConv >>> g = dgl.graph(([0, 1, 2], [1, 1, 3])) >>> in_feats = 10 >>> out_feats = 20 >>> nfeat = torch.randn(g.num_nodes(), in_feats) >>> efeat = torch.randn(g.num_edges(), in_feats) >>> conv = GINEConv(nn.Linear(in_feats, out_feats)) >>> res = conv(g, nfeat, efeat) >>> print(res.shape) torch.Size([4, 20]) """ def __init__(self, apply_func=None, init_eps=0, learn_eps=False): super(GINEConv, self).__init__() self.apply_func = apply_func # to specify whether eps is trainable or not. if learn_eps: self.eps = nn.Parameter(th.FloatTensor([init_eps])) else: self.register_buffer("eps", th.FloatTensor([init_eps])) def message(self, edges): r"""User-defined Message Function""" return {"m": F.relu(edges.src["hn"] + edges.data["he"])} def forward(self, graph, node_feat, edge_feat): r"""Forward computation. Parameters ---------- graph : DGLGraph The graph. node_feat : torch.Tensor or pair of torch.Tensor If a torch.Tensor is given, it is the input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of torch.Tensor is given, the pair must contain two tensors of shape :math:`(N_{in}, D_{in})` and :math:`(N_{out}, D_{in})`. If ``apply_func`` is not None, :math:`D_{in}` should fit the input feature size requirement of ``apply_func``. edge_feat : torch.Tensor Edge feature. It is a tensor of shape :math:`(E, D_{in})` where :math:`E` is the number of edges. Returns ------- torch.Tensor The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is the output feature size of ``apply_func``. If ``apply_func`` is None, :math:`D_{out}` should be the same as :math:`D_{in}`. """ with graph.local_scope(): feat_src, feat_dst = expand_as_pair(node_feat, graph) graph.srcdata["hn"] = feat_src graph.edata["he"] = edge_feat graph.update_all(self.message, fn.sum("m", "neigh")) rst = (1 + self.eps) * feat_dst + graph.dstdata["neigh"] if self.apply_func is not None: rst = self.apply_func(rst) return rst ================================================ FILE: python/dgl/nn/pytorch/conv/gmmconv.py ================================================ """Torch Module for GMM Conv""" # pylint: disable= no-member, arguments-differ, invalid-name import torch as th from torch import nn from torch.nn import init from .... import function as fn from ....base import DGLError from ....utils import expand_as_pair from ..utils import Identity class GMMConv(nn.Module): r"""Gaussian Mixture Model Convolution layer from `Geometric Deep Learning on Graphs and Manifolds using Mixture Model CNNs `__ .. math:: u_{ij} &= f(x_i, x_j), x_j \in \mathcal{N}(i) w_k(u) &= \exp\left(-\frac{1}{2}(u-\mu_k)^T \Sigma_k^{-1} (u - \mu_k)\right) h_i^{l+1} &= \mathrm{aggregate}\left(\left\{\frac{1}{K} \sum_{k}^{K} w_k(u_{ij}), \forall j\in \mathcal{N}(i)\right\}\right) where :math:`u` denotes the pseudo-coordinates between a vertex and one of its neighbor, computed using function :math:`f`, :math:`\Sigma_k^{-1}` and :math:`\mu_k` are learnable parameters representing the covariance matrix and mean vector of a Gaussian kernel. Parameters ---------- in_feats : int Number of input features; i.e., the number of dimensions of :math:`x_i`. out_feats : int Number of output features; i.e., the number of dimensions of :math:`h_i^{(l+1)}`. dim : int Dimensionality of pseudo-coordinte; i.e, the number of dimensions of :math:`u_{ij}`. n_kernels : int Number of kernels :math:`K`. aggregator_type : str Aggregator type (``sum``, ``mean``, ``max``). Default: ``sum``. residual : bool If True, use residual connection inside this layer. Default: ``False``. bias : bool If True, adds a learnable bias to the output. Default: ``True``. allow_zero_in_degree : bool, optional If there are 0-in-degree nodes in the graph, output for those nodes will be invalid since no message will be passed to those nodes. This is harmful for some applications causing silent performance regression. This module will raise a DGLError if it detects 0-in-degree nodes in input graph. By setting ``True``, it will suppress the check and let the users handle it by themselves. Default: ``False``. Note ---- Zero in-degree nodes will lead to invalid output value. This is because no message will be passed to those nodes, the aggregation function will be appied on empty input. A common practice to avoid this is to add a self-loop for each node in the graph if it is homogeneous, which can be achieved by: >>> g = ... # a DGLGraph >>> g = dgl.add_self_loop(g) Calling ``add_self_loop`` will not work for some graphs, for example, heterogeneous graph since the edge type can not be decided for self_loop edges. Set ``allow_zero_in_degree`` to ``True`` for those cases to unblock the code and handle zero-in-degree nodes manually. A common practise to handle this is to filter out the nodes with zero-in-degree when use after conv. Examples -------- >>> import dgl >>> import numpy as np >>> import torch as th >>> from dgl.nn import GMMConv >>> # Case 1: Homogeneous graph >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> g = dgl.add_self_loop(g) >>> feat = th.ones(6, 10) >>> conv = GMMConv(10, 2, 3, 2, 'mean') >>> pseudo = th.ones(12, 3) >>> res = conv(g, feat, pseudo) >>> res tensor([[-0.3462, -0.2654], [-0.3462, -0.2654], [-0.3462, -0.2654], [-0.3462, -0.2654], [-0.3462, -0.2654], [-0.3462, -0.2654]], grad_fn=) >>> # Case 2: Unidirectional bipartite graph >>> u = [0, 1, 0, 0, 1] >>> v = [0, 1, 2, 3, 2] >>> g = dgl.heterograph({('_N', '_E', '_N'):(u, v)}) >>> u_fea = th.rand(2, 5) >>> v_fea = th.rand(4, 10) >>> pseudo = th.ones(5, 3) >>> conv = GMMConv((10, 5), 2, 3, 2, 'mean') >>> res = conv(g, (u_fea, v_fea), pseudo) >>> res tensor([[-0.1107, -0.1559], [-0.1646, -0.2326], [-0.1377, -0.1943], [-0.1107, -0.1559]], grad_fn=) """ def __init__( self, in_feats, out_feats, dim, n_kernels, aggregator_type="sum", residual=False, bias=True, allow_zero_in_degree=False, ): super(GMMConv, self).__init__() self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats self._dim = dim self._n_kernels = n_kernels self._allow_zero_in_degree = allow_zero_in_degree if aggregator_type == "sum": self._reducer = fn.sum elif aggregator_type == "mean": self._reducer = fn.mean elif aggregator_type == "max": self._reducer = fn.max else: raise KeyError( "Aggregator type {} not recognized.".format(aggregator_type) ) self.mu = nn.Parameter(th.Tensor(n_kernels, dim)) self.inv_sigma = nn.Parameter(th.Tensor(n_kernels, dim)) self.fc = nn.Linear( self._in_src_feats, n_kernels * out_feats, bias=False ) if residual: if self._in_dst_feats != out_feats: self.res_fc = nn.Linear( self._in_dst_feats, out_feats, bias=False ) else: self.res_fc = Identity() else: self.register_buffer("res_fc", None) if bias: self.bias = nn.Parameter(th.Tensor(out_feats)) else: self.register_buffer("bias", None) self.reset_parameters() def reset_parameters(self): r""" Description ----------- Reinitialize learnable parameters. Note ---- The fc parameters are initialized using Glorot uniform initialization and the bias is initialized to be zero. The mu weight is initialized using normal distribution and inv_sigma is initialized with constant value 1.0. """ gain = init.calculate_gain("relu") init.xavier_normal_(self.fc.weight, gain=gain) if isinstance(self.res_fc, nn.Linear): init.xavier_normal_(self.res_fc.weight, gain=gain) init.normal_(self.mu.data, 0, 0.1) init.constant_(self.inv_sigma.data, 1) if self.bias is not None: init.zeros_(self.bias.data) def set_allow_zero_in_degree(self, set_value): r""" Description ----------- Set allow_zero_in_degree flag. Parameters ---------- set_value : bool The value to be set to the flag. """ self._allow_zero_in_degree = set_value def forward(self, graph, feat, pseudo): """ Description ----------- Compute Gaussian Mixture Model Convolution layer. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor If a single tensor is given, the input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of tensors are given, the pair must contain two tensors of shape :math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`. pseudo : torch.Tensor The pseudo coordinate tensor of shape :math:`(E, D_{u})` where :math:`E` is the number of edges of the graph and :math:`D_{u}` is the dimensionality of pseudo coordinate. Returns ------- torch.Tensor The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is the output feature size. Raises ------ DGLError If there are 0-in-degree nodes in the input graph, it will raise DGLError since no message will be passed to those nodes. This will cause invalid output. The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``. """ with graph.local_scope(): if not self._allow_zero_in_degree: if (graph.in_degrees() == 0).any(): raise DGLError( "There are 0-in-degree nodes in the graph, " "output for those nodes will be invalid. " "This is harmful for some applications, " "causing silent performance regression. " "Adding self-loop on the input graph by " "calling `g = dgl.add_self_loop(g)` will resolve " "the issue. Setting ``allow_zero_in_degree`` " "to be `True` when constructing this module will " "suppress the check and let the code run." ) feat_src, feat_dst = expand_as_pair(feat, graph) graph.srcdata["h"] = self.fc(feat_src).view( -1, self._n_kernels, self._out_feats ) E = graph.num_edges() # compute gaussian weight gaussian = -0.5 * ( ( pseudo.view(E, 1, self._dim) - self.mu.view(1, self._n_kernels, self._dim) ) ** 2 ) gaussian = gaussian * ( self.inv_sigma.view(1, self._n_kernels, self._dim) ** 2 ) gaussian = th.exp(gaussian.sum(dim=-1, keepdim=True)) # (E, K, 1) graph.edata["w"] = gaussian graph.update_all(fn.u_mul_e("h", "w", "m"), self._reducer("m", "h")) rst = graph.dstdata["h"].sum(1) # residual connection if self.res_fc is not None: rst = rst + self.res_fc(feat_dst) # bias if self.bias is not None: rst = rst + self.bias return rst ================================================ FILE: python/dgl/nn/pytorch/conv/graphconv.py ================================================ """Torch modules for graph convolutions(GCN).""" # pylint: disable= no-member, arguments-differ, invalid-name import torch as th from torch import nn from torch.nn import init from .... import function as fn from ....base import DGLError from ....convert import block_to_graph from ....heterograph import DGLBlock from ....transforms import reverse from ....utils import expand_as_pair class EdgeWeightNorm(nn.Module): r"""This module normalizes positive scalar edge weights on a graph following the form in `GCN `__. Mathematically, setting ``norm='both'`` yields the following normalization term: .. math:: c_{ji} = (\sqrt{\sum_{k\in\mathcal{N}(j)}e_{jk}}\sqrt{\sum_{k\in\mathcal{N}(i)}e_{ki}}) And, setting ``norm='right'`` yields the following normalization term: .. math:: c_{ji} = (\sum_{k\in\mathcal{N}(i)}e_{ki}) where :math:`e_{ji}` is the scalar weight on the edge from node :math:`j` to node :math:`i`. The module returns the normalized weight :math:`e_{ji} / c_{ji}`. Parameters ---------- norm : str, optional The normalizer as specified above. Default is `'both'`. eps : float, optional A small offset value in the denominator. Default is 0. Examples -------- >>> import dgl >>> import numpy as np >>> import torch as th >>> from dgl.nn import EdgeWeightNorm, GraphConv >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> g = dgl.add_self_loop(g) >>> feat = th.ones(6, 10) >>> edge_weight = th.tensor([0.5, 0.6, 0.4, 0.7, 0.9, 0.1, 1, 1, 1, 1, 1, 1]) >>> norm = EdgeWeightNorm(norm='both') >>> norm_edge_weight = norm(g, edge_weight) >>> conv = GraphConv(10, 2, norm='none', weight=True, bias=True) >>> res = conv(g, feat, edge_weight=norm_edge_weight) >>> print(res) tensor([[-1.1849, -0.7525], [-1.3514, -0.8582], [-1.2384, -0.7865], [-1.9949, -1.2669], [-1.3658, -0.8674], [-0.8323, -0.5286]], grad_fn=) """ def __init__(self, norm="both", eps=0.0): super(EdgeWeightNorm, self).__init__() self._norm = norm self._eps = eps def forward(self, graph, edge_weight): r""" Description ----------- Compute normalized edge weight for the GCN model. Parameters ---------- graph : DGLGraph The graph. edge_weight : torch.Tensor Unnormalized scalar weights on the edges. The shape is expected to be :math:`(|E|)`. Returns ------- torch.Tensor The normalized edge weight. Raises ------ DGLError Case 1: The edge weight is multi-dimensional. Currently this module only supports a scalar weight on each edge. Case 2: The edge weight has non-positive values with ``norm='both'``. This will trigger square root and division by a non-positive number. """ with graph.local_scope(): if isinstance(graph, DGLBlock): graph = block_to_graph(graph) if len(edge_weight.shape) > 1: raise DGLError( "Currently the normalization is only defined " "on scalar edge weight. Please customize the " "normalization for your high-dimensional weights." ) if self._norm == "both" and th.any(edge_weight <= 0).item(): raise DGLError( 'Non-positive edge weight detected with `norm="both"`. ' "This leads to square root of zero or negative values." ) dev = graph.device dtype = edge_weight.dtype graph.srcdata["_src_out_w"] = th.ones( graph.number_of_src_nodes(), dtype=dtype, device=dev ) graph.dstdata["_dst_in_w"] = th.ones( graph.number_of_dst_nodes(), dtype=dtype, device=dev ) graph.edata["_edge_w"] = edge_weight if self._norm == "both": reversed_g = reverse(graph) reversed_g.edata["_edge_w"] = edge_weight reversed_g.update_all( fn.copy_e("_edge_w", "m"), fn.sum("m", "out_weight") ) degs = reversed_g.dstdata["out_weight"] + self._eps norm = th.pow(degs, -0.5) graph.srcdata["_src_out_w"] = norm if self._norm != "none": graph.update_all( fn.copy_e("_edge_w", "m"), fn.sum("m", "in_weight") ) degs = graph.dstdata["in_weight"] + self._eps if self._norm == "both": norm = th.pow(degs, -0.5) else: norm = 1.0 / degs graph.dstdata["_dst_in_w"] = norm graph.apply_edges( lambda e: { "_norm_edge_weights": e.src["_src_out_w"] * e.dst["_dst_in_w"] * e.data["_edge_w"] } ) return graph.edata["_norm_edge_weights"] # pylint: disable=W0235 class GraphConv(nn.Module): r"""Graph convolutional layer from `Semi-Supervised Classification with Graph Convolutional Networks `__ Mathematically it is defined as follows: .. math:: h_i^{(l+1)} = \sigma(b^{(l)} + \sum_{j\in\mathcal{N}(i)}\frac{1}{c_{ji}}h_j^{(l)}W^{(l)}) where :math:`\mathcal{N}(i)` is the set of neighbors of node :math:`i`, :math:`c_{ji}` is the product of the square root of node degrees (i.e., :math:`c_{ji} = \sqrt{|\mathcal{N}(j)|}\sqrt{|\mathcal{N}(i)|}`), and :math:`\sigma` is an activation function. If a weight tensor on each edge is provided, the weighted graph convolution is defined as: .. math:: h_i^{(l+1)} = \sigma(b^{(l)} + \sum_{j\in\mathcal{N}(i)}\frac{e_{ji}}{c_{ji}}h_j^{(l)}W^{(l)}) where :math:`e_{ji}` is the scalar weight on the edge from node :math:`j` to node :math:`i`. This is NOT equivalent to the weighted graph convolutional network formulation in the paper. To customize the normalization term :math:`c_{ji}`, one can first set ``norm='none'`` for the model, and send the pre-normalized :math:`e_{ji}` to the forward computation. We provide :class:`~dgl.nn.pytorch.EdgeWeightNorm` to normalize scalar edge weight following the GCN paper. Parameters ---------- in_feats : int Input feature size; i.e, the number of dimensions of :math:`h_j^{(l)}`. out_feats : int Output feature size; i.e., the number of dimensions of :math:`h_i^{(l+1)}`. norm : str, optional How to apply the normalizer. Can be one of the following values: * ``right``, to divide the aggregated messages by each node's in-degrees, which is equivalent to averaging the received messages. * ``none``, where no normalization is applied. * ``both`` (default), where the messages are scaled with :math:`1/c_{ji}` above, equivalent to symmetric normalization. * ``left``, to divide the messages sent out from each node by its out-degrees, equivalent to random walk normalization. weight : bool, optional If True, apply a linear layer. Otherwise, aggregating the messages without a weight matrix. bias : bool, optional If True, adds a learnable bias to the output. Default: ``True``. activation : callable activation function/layer or None, optional If not None, applies an activation function to the updated node features. Default: ``None``. allow_zero_in_degree : bool, optional If there are 0-in-degree nodes in the graph, output for those nodes will be invalid since no message will be passed to those nodes. This is harmful for some applications causing silent performance regression. This module will raise a DGLError if it detects 0-in-degree nodes in input graph. By setting ``True``, it will suppress the check and let the users handle it by themselves. Default: ``False``. Attributes ---------- weight : torch.Tensor The learnable weight tensor. bias : torch.Tensor The learnable bias tensor. Note ---- Zero in-degree nodes will lead to invalid output value. This is because no message will be passed to those nodes, the aggregation function will be appied on empty input. A common practice to avoid this is to add a self-loop for each node in the graph if it is homogeneous, which can be achieved by: >>> g = ... # a DGLGraph >>> g = dgl.add_self_loop(g) Calling ``add_self_loop`` will not work for some graphs, for example, heterogeneous graph since the edge type can not be decided for self_loop edges. Set ``allow_zero_in_degree`` to ``True`` for those cases to unblock the code and handle zero-in-degree nodes manually. A common practise to handle this is to filter out the nodes with zero-in-degree when use after conv. Examples -------- >>> import dgl >>> import numpy as np >>> import torch as th >>> from dgl.nn import GraphConv >>> # Case 1: Homogeneous graph >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> g = dgl.add_self_loop(g) >>> feat = th.ones(6, 10) >>> conv = GraphConv(10, 2, norm='both', weight=True, bias=True) >>> res = conv(g, feat) >>> print(res) tensor([[ 1.3326, -0.2797], [ 1.4673, -0.3080], [ 1.3326, -0.2797], [ 1.6871, -0.3541], [ 1.7711, -0.3717], [ 1.0375, -0.2178]], grad_fn=) >>> # allow_zero_in_degree example >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> conv = GraphConv(10, 2, norm='both', weight=True, bias=True, allow_zero_in_degree=True) >>> res = conv(g, feat) >>> print(res) tensor([[-0.2473, -0.4631], [-0.3497, -0.6549], [-0.3497, -0.6549], [-0.4221, -0.7905], [-0.3497, -0.6549], [ 0.0000, 0.0000]], grad_fn=) >>> # Case 2: Unidirectional bipartite graph >>> u = [0, 1, 0, 0, 1] >>> v = [0, 1, 2, 3, 2] >>> g = dgl.heterograph({('_U', '_E', '_V') : (u, v)}) >>> u_fea = th.rand(2, 5) >>> v_fea = th.rand(4, 5) >>> conv = GraphConv(5, 2, norm='both', weight=True, bias=True) >>> res = conv(g, (u_fea, v_fea)) >>> res tensor([[-0.2994, 0.6106], [-0.4482, 0.5540], [-0.5287, 0.8235], [-0.2994, 0.6106]], grad_fn=) """ def __init__( self, in_feats, out_feats, norm="both", weight=True, bias=True, activation=None, allow_zero_in_degree=False, ): super(GraphConv, self).__init__() if norm not in ("none", "both", "right", "left"): raise DGLError( 'Invalid norm value. Must be either "none", "both", "right" or "left".' ' But got "{}".'.format(norm) ) self._in_feats = in_feats self._out_feats = out_feats self._norm = norm self._allow_zero_in_degree = allow_zero_in_degree if weight: self.weight = nn.Parameter(th.Tensor(in_feats, out_feats)) else: self.register_parameter("weight", None) if bias: self.bias = nn.Parameter(th.Tensor(out_feats)) else: self.register_parameter("bias", None) self.reset_parameters() self._activation = activation def reset_parameters(self): r""" Description ----------- Reinitialize learnable parameters. Note ---- The model parameters are initialized as in the `original implementation `__ where the weight :math:`W^{(l)}` is initialized using Glorot uniform initialization and the bias is initialized to be zero. """ if self.weight is not None: init.xavier_uniform_(self.weight) if self.bias is not None: init.zeros_(self.bias) def set_allow_zero_in_degree(self, set_value): r""" Description ----------- Set allow_zero_in_degree flag. Parameters ---------- set_value : bool The value to be set to the flag. """ self._allow_zero_in_degree = set_value def forward(self, graph, feat, weight=None, edge_weight=None): r""" Description ----------- Compute graph convolution. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor or pair of torch.Tensor If a torch.Tensor is given, it represents the input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of torch.Tensor is given, which is the case for bipartite graph, the pair must contain two tensors of shape :math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`. weight : torch.Tensor, optional Optional external weight tensor. edge_weight : torch.Tensor, optional Optional tensor on the edge. If given, the convolution will weight with regard to the message. Returns ------- torch.Tensor The output feature Raises ------ DGLError Case 1: If there are 0-in-degree nodes in the input graph, it will raise DGLError since no message will be passed to those nodes. This will cause invalid output. The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``. Case 2: External weight is provided while at the same time the module has defined its own weight parameter. Note ---- * Input shape: :math:`(N, *, \text{in_feats})` where * means any number of additional dimensions, :math:`N` is the number of nodes. * Output shape: :math:`(N, *, \text{out_feats})` where all but the last dimension are the same shape as the input. * Weight shape: :math:`(\text{in_feats}, \text{out_feats})`. """ with graph.local_scope(): if not self._allow_zero_in_degree: if (graph.in_degrees() == 0).any(): raise DGLError( "There are 0-in-degree nodes in the graph, " "output for those nodes will be invalid. " "This is harmful for some applications, " "causing silent performance regression. " "Adding self-loop on the input graph by " "calling `g = dgl.add_self_loop(g)` will resolve " "the issue. Setting ``allow_zero_in_degree`` " "to be `True` when constructing this module will " "suppress the check and let the code run." ) aggregate_fn = fn.copy_u("h", "m") if edge_weight is not None: assert edge_weight.shape[0] == graph.num_edges() graph.edata["_edge_weight"] = edge_weight aggregate_fn = fn.u_mul_e("h", "_edge_weight", "m") # (BarclayII) For RGCN on heterogeneous graphs we need to support GCN on bipartite. feat_src, feat_dst = expand_as_pair(feat, graph) if self._norm in ["left", "both"]: degs = graph.out_degrees().to(feat_src).clamp(min=1) if self._norm == "both": norm = th.pow(degs, -0.5) else: norm = 1.0 / degs shp = norm.shape + (1,) * (feat_src.dim() - 1) norm = th.reshape(norm, shp) feat_src = feat_src * norm if weight is not None: if self.weight is not None: raise DGLError( "External weight is provided while at the same time the" " module has defined its own weight parameter. Please" " create the module with flag weight=False." ) else: weight = self.weight if self._in_feats > self._out_feats: # mult W first to reduce the feature size for aggregation. if weight is not None: feat_src = th.matmul(feat_src, weight) graph.srcdata["h"] = feat_src graph.update_all(aggregate_fn, fn.sum(msg="m", out="h")) rst = graph.dstdata["h"] else: # aggregate first then mult W graph.srcdata["h"] = feat_src graph.update_all(aggregate_fn, fn.sum(msg="m", out="h")) rst = graph.dstdata["h"] if weight is not None: rst = th.matmul(rst, weight) if self._norm in ["right", "both"]: degs = graph.in_degrees().to(feat_dst).clamp(min=1) if self._norm == "both": norm = th.pow(degs, -0.5) else: norm = 1.0 / degs shp = norm.shape + (1,) * (feat_dst.dim() - 1) norm = th.reshape(norm, shp) rst = rst * norm if self.bias is not None: rst = rst + self.bias if self._activation is not None: rst = self._activation(rst) return rst def extra_repr(self): """Set the extra representation of the module, which will come into effect when printing the model. """ summary = "in={_in_feats}, out={_out_feats}" summary += ", normalization={_norm}" if "_activation" in self.__dict__: summary += ", activation={_activation}" return summary.format(**self.__dict__) ================================================ FILE: python/dgl/nn/pytorch/conv/grouprevres.py ================================================ """Torch module for grouped reversible residual connections for GNNs""" # pylint: disable= no-member, arguments-differ, invalid-name, C0116, R1728 from copy import deepcopy import numpy as np import torch import torch.nn as nn class InvertibleCheckpoint(torch.autograd.Function): r"""Extension of torch.autograd""" @staticmethod def forward(ctx, fn, fn_inverse, num_inputs, *inputs_and_weights): ctx.fn = fn ctx.fn_inverse = fn_inverse ctx.weights = inputs_and_weights[num_inputs:] inputs = inputs_and_weights[:num_inputs] ctx.input_requires_grad = [] with torch.no_grad(): # Make a detached copy, which shares the storage x = [] for element in inputs: if isinstance(element, torch.Tensor): x.append(element.detach()) ctx.input_requires_grad.append(element.requires_grad) else: x.append(element) ctx.input_requires_grad.append(None) # Detach the output, which then allows discarding the intermediary results outputs = ctx.fn(*x).detach_() # clear memory of input node features inputs[1].untyped_storage().resize_(0) # store for backward pass ctx.inputs = [inputs] ctx.outputs = [outputs] return outputs @staticmethod def backward(ctx, *grad_outputs): if not torch.autograd._is_checkpoint_valid(): raise RuntimeError( "InvertibleCheckpoint is not compatible with .grad(), \ please use .backward() if possible" ) # retrieve input and output tensor nodes if len(ctx.outputs) == 0: raise RuntimeError( "Trying to perform backward on the InvertibleCheckpoint \ for more than once." ) inputs = ctx.inputs.pop() outputs = ctx.outputs.pop() # reconstruct input node features with torch.no_grad(): # inputs[0] is DGLGraph and inputs[1] is input node features inputs_inverted = ctx.fn_inverse( *((inputs[0], outputs) + inputs[2:]) ) # clear memory of outputs outputs.untyped_storage().resize_(0) x = inputs[1] x.untyped_storage().resize_(int(np.prod(x.size()))) x.set_(inputs_inverted) # compute gradients with torch.set_grad_enabled(True): detached_inputs = [] for i, element in enumerate(inputs): if isinstance(element, torch.Tensor): element = element.detach() element.requires_grad = ctx.input_requires_grad[i] detached_inputs.append(element) detached_inputs = tuple(detached_inputs) temp_output = ctx.fn(*detached_inputs) filtered_detached_inputs = tuple( filter( lambda x: getattr(x, "requires_grad", False), detached_inputs ) ) gradients = torch.autograd.grad( outputs=(temp_output,), inputs=filtered_detached_inputs + ctx.weights, grad_outputs=grad_outputs, ) input_gradients = [] i = 0 for rg in ctx.input_requires_grad: if rg: input_gradients.append(gradients[i]) i += 1 else: input_gradients.append(None) gradients = tuple(input_gradients) + gradients[-len(ctx.weights) :] return (None, None, None) + gradients class GroupRevRes(nn.Module): r"""Grouped reversible residual connections for GNNs, as introduced in `Training Graph Neural Networks with 1000 Layers `__ It uniformly partitions an input node feature :math:`X` into :math:`C` groups :math:`X_1, X_2, \cdots, X_C` across the channel dimension. Besides, it makes :math:`C` copies of the input GNN module :math:`f_{w1}, \cdots, f_{wC}`. In the forward pass, each GNN module only takes the corresponding group of node features. The output node representations :math:`X^{'}` are computed as follows. .. math:: X_0^{'} = \sum_{i=2}^{C}X_i X_i^{'} = f_{wi}(X_{i-1}^{'}, g, U) + X_i, i\in\{1,\cdots,C\} X^{'} = X_1^{'} \, \Vert \, \ldots \, \Vert \, X_C^{'} where :math:`g` is the input graph, :math:`U` is arbitrary additional input arguments like edge features, and :math:`\, \Vert \,` is concatenation. Parameters ---------- gnn_module : nn.Module GNN module for message passing. :attr:`GroupRevRes` will clone the module for :attr:`groups`-1 number of times, yielding :attr:`groups` copies in total. The input and output node representation size need to be the same. Its forward function needs to take a DGLGraph and the associated input node features in order, optionally followed by additional arguments like edge features. groups : int, optional The number of groups. Examples -------- >>> import dgl >>> import torch >>> import torch.nn as nn >>> from dgl.nn import GraphConv, GroupRevRes >>> class GNNLayer(nn.Module): ... def __init__(self, feats, dropout=0.2): ... super(GNNLayer, self).__init__() ... # Use BatchNorm and dropout to prevent gradient vanishing ... # In particular if you use a large number of GNN layers ... self.norm = nn.BatchNorm1d(feats) ... self.conv = GraphConv(feats, feats) ... self.dropout = nn.Dropout(dropout) ... ... def forward(self, g, x): ... x = self.norm(x) ... x = self.dropout(x) ... return self.conv(g, x) >>> num_nodes = 5 >>> num_edges = 20 >>> feats = 32 >>> groups = 2 >>> g = dgl.rand_graph(num_nodes, num_edges) >>> x = torch.randn(num_nodes, feats) >>> conv = GNNLayer(feats // groups) >>> model = GroupRevRes(conv, groups) >>> out = model(g, x) """ def __init__(self, gnn_module, groups=2): super(GroupRevRes, self).__init__() self.gnn_modules = nn.ModuleList() for i in range(groups): if i == 0: self.gnn_modules.append(gnn_module) else: self.gnn_modules.append(deepcopy(gnn_module)) self.groups = groups def _forward(self, g, x, *args): xs = torch.chunk(x, self.groups, dim=-1) if len(args) == 0: args_chunks = [()] * self.groups else: chunked_args = list( map(lambda arg: torch.chunk(arg, self.groups, dim=-1), args) ) args_chunks = list(zip(*chunked_args)) y_in = sum(xs[1:]) ys = [] for i in range(self.groups): y_in = xs[i] + self.gnn_modules[i](g, y_in, *args_chunks[i]) ys.append(y_in) out = torch.cat(ys, dim=-1) return out def _inverse(self, g, y, *args): ys = torch.chunk(y, self.groups, dim=-1) if len(args) == 0: args_chunks = [()] * self.groups else: chunked_args = list( map(lambda arg: torch.chunk(arg, self.groups, dim=-1), args) ) args_chunks = list(zip(*chunked_args)) xs = [] for i in range(self.groups - 1, -1, -1): if i != 0: y_in = ys[i - 1] else: y_in = sum(xs) x = ys[i] - self.gnn_modules[i](g, y_in, *args_chunks[i]) xs.append(x) x = torch.cat(xs[::-1], dim=-1) return x def forward(self, g, x, *args): r"""Apply the GNN module with grouped reversible residual connection. Parameters ---------- g : DGLGraph The graph. x : torch.Tensor The input feature of shape :math:`(N, D_{in})`, where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. args Additional arguments to pass to :attr:`gnn_module`. Returns ------- torch.Tensor The output feature of shape :math:`(N, D_{in})`. """ args = (g, x) + args y = InvertibleCheckpoint.apply( self._forward, self._inverse, len(args), *(args + tuple([p for p in self.parameters() if p.requires_grad])) ) return y ================================================ FILE: python/dgl/nn/pytorch/conv/hgtconv.py ================================================ """Heterogeneous Graph Transformer""" # pylint: disable= no-member, arguments-differ, invalid-name import math import torch import torch.nn as nn from .... import function as fn from ..linear import TypedLinear from ..softmax import edge_softmax class HGTConv(nn.Module): r"""Heterogeneous graph transformer convolution from `Heterogeneous Graph Transformer `__ Given a graph :math:`G(V, E)` and input node features :math:`H^{(l-1)}`, it computes the new node features as follows: Compute a multi-head attention score for each edge :math:`(s, e, t)` in the graph: .. math:: Attention(s, e, t) = \text{Softmax}\left(||_{i\in[1,h]}ATT-head^i(s, e, t)\right) \\ ATT-head^i(s, e, t) = \left(K^i(s)W^{ATT}_{\phi(e)}Q^i(t)^{\top}\right)\cdot \frac{\mu_{(\tau(s),\phi(e),\tau(t)}}{\sqrt{d}} \\ K^i(s) = \text{K-Linear}^i_{\tau(s)}(H^{(l-1)}[s]) \\ Q^i(t) = \text{Q-Linear}^i_{\tau(t)}(H^{(l-1)}[t]) \\ Compute the message to send on each edge :math:`(s, e, t)`: .. math:: Message(s, e, t) = ||_{i\in[1, h]} MSG-head^i(s, e, t) \\ MSG-head^i(s, e, t) = \text{M-Linear}^i_{\tau(s)}(H^{(l-1)}[s])W^{MSG}_{\phi(e)} \\ Send messages to target nodes :math:`t` and aggregate: .. math:: \tilde{H}^{(l)}[t] = \sum_{\forall s\in \mathcal{N}(t)}\left( Attention(s,e,t) \cdot Message(s,e,t)\right) Compute new node features: .. math:: H^{(l)}[t]=\text{A-Linear}_{\tau(t)}(\sigma(\tilde(H)^{(l)}[t])) + H^{(l-1)}[t] Parameters ---------- in_size : int Input node feature size. head_size : int Output head size. The output node feature size is ``head_size * num_heads``. num_heads : int Number of heads. The output node feature size is ``head_size * num_heads``. num_ntypes : int Number of node types. num_etypes : int Number of edge types. dropout : optional, float Dropout rate. use_norm : optiona, bool If true, apply a layer norm on the output node feature. Examples -------- """ def __init__( self, in_size, head_size, num_heads, num_ntypes, num_etypes, dropout=0.2, use_norm=False, ): super().__init__() self.in_size = in_size self.head_size = head_size self.num_heads = num_heads self.sqrt_d = math.sqrt(head_size) self.use_norm = use_norm self.linear_k = TypedLinear(in_size, head_size * num_heads, num_ntypes) self.linear_q = TypedLinear(in_size, head_size * num_heads, num_ntypes) self.linear_v = TypedLinear(in_size, head_size * num_heads, num_ntypes) self.linear_a = TypedLinear( head_size * num_heads, head_size * num_heads, num_ntypes ) self.relation_pri = nn.ParameterList( [nn.Parameter(torch.ones(num_etypes)) for i in range(num_heads)] ) self.relation_att = nn.ModuleList( [ TypedLinear(head_size, head_size, num_etypes) for i in range(num_heads) ] ) self.relation_msg = nn.ModuleList( [ TypedLinear(head_size, head_size, num_etypes) for i in range(num_heads) ] ) self.skip = nn.Parameter(torch.ones(num_ntypes)) self.drop = nn.Dropout(dropout) if use_norm: self.norm = nn.LayerNorm(head_size * num_heads) if in_size != head_size * num_heads: self.residual_w = nn.Parameter( torch.Tensor(in_size, head_size * num_heads) ) nn.init.xavier_uniform_(self.residual_w) def forward(self, g, x, ntype, etype, *, presorted=False): """Forward computation. Parameters ---------- g : DGLGraph The input graph. x : torch.Tensor A 2D tensor of node features. Shape: :math:`(|V|, D_{in})`. ntype : torch.Tensor An 1D integer tensor of node types. Shape: :math:`(|V|,)`. etype : torch.Tensor An 1D integer tensor of edge types. Shape: :math:`(|E|,)`. presorted : bool, optional Whether *both* the nodes and the edges of the input graph have been sorted by their types. Forward on pre-sorted graph may be faster. Graphs created by :func:`~dgl.to_homogeneous` automatically satisfy the condition. Also see :func:`~dgl.reorder_graph` for manually reordering the nodes and edges. Returns ------- torch.Tensor New node features. Shape: :math:`(|V|, D_{head} * N_{head})`. """ self.presorted = presorted if g.is_block: x_src = x x_dst = x[: g.num_dst_nodes()] srcntype = ntype dstntype = ntype[: g.num_dst_nodes()] else: x_src = x x_dst = x srcntype = ntype dstntype = ntype with g.local_scope(): k = self.linear_k(x_src, srcntype, presorted).view( -1, self.num_heads, self.head_size ) q = self.linear_q(x_dst, dstntype, presorted).view( -1, self.num_heads, self.head_size ) v = self.linear_v(x_src, srcntype, presorted).view( -1, self.num_heads, self.head_size ) g.srcdata["k"] = k g.dstdata["q"] = q g.srcdata["v"] = v g.edata["etype"] = etype g.apply_edges(self.message) g.edata["m"] = g.edata["m"] * edge_softmax( g, g.edata["a"] ).unsqueeze(-1) g.update_all(fn.copy_e("m", "m"), fn.sum("m", "h")) h = g.dstdata["h"].view(-1, self.num_heads * self.head_size) # target-specific aggregation h = self.drop(self.linear_a(h, dstntype, presorted)) alpha = torch.sigmoid(self.skip[dstntype]).unsqueeze(-1) if x_dst.shape != h.shape: h = h * alpha + (x_dst @ self.residual_w) * (1 - alpha) else: h = h * alpha + x_dst * (1 - alpha) if self.use_norm: h = self.norm(h) return h def message(self, edges): """Message function.""" a, m = [], [] etype = edges.data["etype"] k = torch.unbind(edges.src["k"], dim=1) q = torch.unbind(edges.dst["q"], dim=1) v = torch.unbind(edges.src["v"], dim=1) for i in range(self.num_heads): kw = self.relation_att[i](k[i], etype, self.presorted) # (E, O) a.append( (kw * q[i]).sum(-1) * self.relation_pri[i][etype] / self.sqrt_d ) # (E,) m.append( self.relation_msg[i](v[i], etype, self.presorted) ) # (E, O) return {"a": torch.stack(a, dim=1), "m": torch.stack(m, dim=1)} ================================================ FILE: python/dgl/nn/pytorch/conv/nnconv.py ================================================ """Torch Module for NNConv layer""" # pylint: disable= no-member, arguments-differ, invalid-name import torch as th from torch import nn from torch.nn import init from .... import function as fn from ....utils import expand_as_pair from ..utils import Identity class NNConv(nn.Module): r"""Graph Convolution layer from `Neural Message Passing for Quantum Chemistry `__ .. math:: h_{i}^{l+1} = h_{i}^{l} + \mathrm{aggregate}\left(\left\{ f_\Theta (e_{ij}) \cdot h_j^{l}, j\in \mathcal{N}(i) \right\}\right) where :math:`e_{ij}` is the edge feature, :math:`f_\Theta` is a function with learnable parameters. Parameters ---------- in_feats : int Input feature size; i.e, the number of dimensions of :math:`h_j^{(l)}`. NNConv can be applied on homogeneous graph and unidirectional `bipartite graph `__. If the layer is to be applied on a unidirectional bipartite graph, ``in_feats`` specifies the input feature size on both the source and destination nodes. If a scalar is given, the source and destination node feature size would take the same value. out_feats : int Output feature size; i.e., the number of dimensions of :math:`h_i^{(l+1)}`. edge_func : callable activation function/layer Maps each edge feature to a vector of shape ``(in_feats * out_feats)`` as weight to compute messages. Also is the :math:`f_\Theta` in the formula. aggregator_type : str Aggregator type to use (``sum``, ``mean`` or ``max``). residual : bool, optional If True, use residual connection. Default: ``False``. bias : bool, optional If True, adds a learnable bias to the output. Default: ``True``. Examples -------- >>> import dgl >>> import numpy as np >>> import torch as th >>> from dgl.nn import NNConv >>> # Case 1: Homogeneous graph >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> g = dgl.add_self_loop(g) >>> feat = th.ones(6, 10) >>> lin = th.nn.Linear(5, 20) >>> def edge_func(efeat): ... return lin(efeat) >>> efeat = th.ones(6+6, 5) >>> conv = NNConv(10, 2, edge_func, 'mean') >>> res = conv(g, feat, efeat) >>> res tensor([[-1.5243, -0.2719], [-1.5243, -0.2719], [-1.5243, -0.2719], [-1.5243, -0.2719], [-1.5243, -0.2719], [-1.5243, -0.2719]], grad_fn=) >>> # Case 2: Unidirectional bipartite graph >>> u = [0, 1, 0, 0, 1] >>> v = [0, 1, 2, 3, 2] >>> g = dgl.heterograph({('_N', '_E', '_N'):(u, v)}) >>> u_feat = th.tensor(np.random.rand(2, 10).astype(np.float32)) >>> v_feat = th.tensor(np.random.rand(4, 10).astype(np.float32)) >>> conv = NNConv(10, 2, edge_func, 'mean') >>> efeat = th.ones(5, 5) >>> res = conv(g, (u_feat, v_feat), efeat) >>> res tensor([[-0.6568, 0.5042], [ 0.9089, -0.5352], [ 0.1261, -0.0155], [-0.6568, 0.5042]], grad_fn=) """ def __init__( self, in_feats, out_feats, edge_func, aggregator_type="mean", residual=False, bias=True, ): super(NNConv, self).__init__() self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats self.edge_func = edge_func if aggregator_type == "sum": self.reducer = fn.sum elif aggregator_type == "mean": self.reducer = fn.mean elif aggregator_type == "max": self.reducer = fn.max else: raise KeyError( "Aggregator type {} not recognized: ".format(aggregator_type) ) self._aggre_type = aggregator_type if residual: if self._in_dst_feats != out_feats: self.res_fc = nn.Linear( self._in_dst_feats, out_feats, bias=False ) else: self.res_fc = Identity() else: self.register_buffer("res_fc", None) if bias: self.bias = nn.Parameter(th.Tensor(out_feats)) else: self.register_buffer("bias", None) self.reset_parameters() def reset_parameters(self): r""" Description ----------- Reinitialize learnable parameters. Note ---- The model parameters are initialized using Glorot uniform initialization and the bias is initialized to be zero. """ gain = init.calculate_gain("relu") if self.bias is not None: nn.init.zeros_(self.bias) if isinstance(self.res_fc, nn.Linear): nn.init.xavier_normal_(self.res_fc.weight, gain=gain) def forward(self, graph, feat, efeat): r"""Compute MPNN Graph Convolution layer. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor or pair of torch.Tensor The input feature of shape :math:`(N, D_{in})` where :math:`N` is the number of nodes of the graph and :math:`D_{in}` is the input feature size. efeat : torch.Tensor The edge feature of shape :math:`(E, *)`, which should fit the input shape requirement of ``edge_func``. :math:`E` is the number of edges of the graph. Returns ------- torch.Tensor The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is the output feature size. """ with graph.local_scope(): feat_src, feat_dst = expand_as_pair(feat, graph) # (n, d_in, 1) graph.srcdata["h"] = feat_src.unsqueeze(-1) # (n, d_in, d_out) graph.edata["w"] = self.edge_func(efeat).view( -1, self._in_src_feats, self._out_feats ) # (n, d_in, d_out) graph.update_all( fn.u_mul_e("h", "w", "m"), self.reducer("m", "neigh") ) rst = graph.dstdata["neigh"].sum(dim=1) # (n, d_out) # residual connection if self.res_fc is not None: rst = rst + self.res_fc(feat_dst) # bias if self.bias is not None: rst = rst + self.bias return rst ================================================ FILE: python/dgl/nn/pytorch/conv/pnaconv.py ================================================ """Torch Module for Principal Neighbourhood Aggregation Convolution Layer""" # pylint: disable= no-member, arguments-differ, invalid-name import numpy as np import torch import torch.nn as nn def aggregate_mean(h): """mean aggregation""" return torch.mean(h, dim=1) def aggregate_max(h): """max aggregation""" return torch.max(h, dim=1)[0] def aggregate_min(h): """min aggregation""" return torch.min(h, dim=1)[0] def aggregate_sum(h): """sum aggregation""" return torch.sum(h, dim=1) def aggregate_std(h): """standard deviation aggregation""" return torch.sqrt(aggregate_var(h) + 1e-30) def aggregate_var(h): """variance aggregation""" h_mean_squares = torch.mean(h * h, dim=1) h_mean = torch.mean(h, dim=1) var = torch.relu(h_mean_squares - h_mean * h_mean) return var def _aggregate_moment(h, n): """moment aggregation: for each node (E[(X-E[X])^n])^{1/n}""" h_mean = torch.mean(h, dim=1, keepdim=True) h_n = torch.mean(torch.pow(h - h_mean, n), dim=1) rooted_h_n = torch.sign(h_n) * torch.pow(torch.abs(h_n) + 1e-30, 1.0 / n) return rooted_h_n def aggregate_moment_3(h): """moment aggregation with n=3""" return _aggregate_moment(h, n=3) def aggregate_moment_4(h): """moment aggregation with n=4""" return _aggregate_moment(h, n=4) def aggregate_moment_5(h): """moment aggregation with n=5""" return _aggregate_moment(h, n=5) def scale_identity(h): """identity scaling (no scaling operation)""" return h def scale_amplification(h, D, delta): """amplification scaling""" return h * (np.log(D + 1) / delta) def scale_attenuation(h, D, delta): """attenuation scaling""" return h * (delta / np.log(D + 1)) AGGREGATORS = { "mean": aggregate_mean, "sum": aggregate_sum, "max": aggregate_max, "min": aggregate_min, "std": aggregate_std, "var": aggregate_var, "moment3": aggregate_moment_3, "moment4": aggregate_moment_4, "moment5": aggregate_moment_5, } SCALERS = { "identity": scale_identity, "amplification": scale_amplification, "attenuation": scale_attenuation, } class PNAConvTower(nn.Module): """A single PNA tower in PNA layers""" def __init__( self, in_size, out_size, aggregators, scalers, delta, dropout=0.0, edge_feat_size=0, ): super(PNAConvTower, self).__init__() self.in_size = in_size self.out_size = out_size self.aggregators = aggregators self.scalers = scalers self.delta = delta self.edge_feat_size = edge_feat_size self.M = nn.Linear(2 * in_size + edge_feat_size, in_size) self.U = nn.Linear( (len(aggregators) * len(scalers) + 1) * in_size, out_size ) self.dropout = nn.Dropout(dropout) self.batchnorm = nn.BatchNorm1d(out_size) def reduce_func(self, nodes): """reduce function for PNA layer: tensordot of multiple aggregation and scaling operations""" msg = nodes.mailbox["msg"] degree = msg.size(1) h = torch.cat( [AGGREGATORS[agg](msg) for agg in self.aggregators], dim=1 ) h = torch.cat( [ SCALERS[scaler](h, D=degree, delta=self.delta) if scaler != "identity" else h for scaler in self.scalers ], dim=1, ) return {"h_neigh": h} def message(self, edges): """message function for PNA layer""" if self.edge_feat_size > 0: f = torch.cat( [edges.src["h"], edges.dst["h"], edges.data["a"]], dim=-1 ) else: f = torch.cat([edges.src["h"], edges.dst["h"]], dim=-1) return {"msg": self.M(f)} def forward(self, graph, node_feat, edge_feat=None): """compute the forward pass of a single tower in PNA convolution layer""" # calculate graph normalization factors snorm_n = torch.cat( [ torch.ones(N, 1).to(node_feat) / N for N in graph.batch_num_nodes() ], dim=0, ).sqrt() with graph.local_scope(): graph.ndata["h"] = node_feat if self.edge_feat_size > 0: assert edge_feat is not None, "Edge features must be provided." graph.edata["a"] = edge_feat graph.update_all(self.message, self.reduce_func) h = self.U(torch.cat([node_feat, graph.ndata["h_neigh"]], dim=-1)) h = h * snorm_n return self.dropout(self.batchnorm(h)) class PNAConv(nn.Module): r"""Principal Neighbourhood Aggregation Layer from `Principal Neighbourhood Aggregation for Graph Nets `__ A PNA layer is composed of multiple PNA towers. Each tower takes as input a split of the input features, and computes the message passing as below. .. math:: h_i^(l+1) = U(h_i^l, \oplus_{(i,j)\in E}M(h_i^l, e_{i,j}, h_j^l)) where :math:`h_i` and :math:`e_{i,j}` are node features and edge features, respectively. :math:`M` and :math:`U` are MLPs, taking the concatenation of input for computing output features. :math:`\oplus` represents the combination of various aggregators and scalers. Aggregators aggregate messages from neighbours and scalers scale the aggregated messages in different ways. :math:`\oplus` concatenates the output features of each combination. The output of multiple towers are concatenated and fed into a linear mixing layer for the final output. Parameters ---------- in_size : int Input feature size; i.e. the size of :math:`h_i^l`. out_size : int Output feature size; i.e. the size of :math:`h_i^{l+1}`. aggregators : list of str List of aggregation function names(each aggregator specifies a way to aggregate messages from neighbours), selected from: * ``mean``: the mean of neighbour messages * ``max``: the maximum of neighbour messages * ``min``: the minimum of neighbour messages * ``std``: the standard deviation of neighbour messages * ``var``: the variance of neighbour messages * ``sum``: the sum of neighbour messages * ``moment3``, ``moment4``, ``moment5``: the normalized moments aggregation :math:`(E[(X-E[X])^n])^{1/n}` scalers: list of str List of scaler function names, selected from: * ``identity``: no scaling * ``amplification``: multiply the aggregated message by :math:`\log(d+1)/\delta`, where :math:`d` is the degree of the node. * ``attenuation``: multiply the aggregated message by :math:`\delta/\log(d+1)` delta: float The degree-related normalization factor computed over the training set, used by scalers for normalization. :math:`E[\log(d+1)]`, where :math:`d` is the degree for each node in the training set. dropout: float, optional The dropout ratio. Default: 0.0. num_towers: int, optional The number of towers used. Default: 1. Note that in_size and out_size must be divisible by num_towers. edge_feat_size: int, optional The edge feature size. Default: 0. residual : bool, optional The bool flag that determines whether to add a residual connection for the output. Default: True. If in_size and out_size of the PNA conv layer are not the same, this flag will be set as False forcibly. Example ------- >>> import dgl >>> import torch as th >>> from dgl.nn import PNAConv >>> >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> feat = th.ones(6, 10) >>> conv = PNAConv(10, 10, ['mean', 'max', 'sum'], ['identity', 'amplification'], 2.5) >>> ret = conv(g, feat) """ def __init__( self, in_size, out_size, aggregators, scalers, delta, dropout=0.0, num_towers=1, edge_feat_size=0, residual=True, ): super(PNAConv, self).__init__() self.in_size = in_size self.out_size = out_size assert ( in_size % num_towers == 0 ), "in_size must be divisible by num_towers" assert ( out_size % num_towers == 0 ), "out_size must be divisible by num_towers" self.tower_in_size = in_size // num_towers self.tower_out_size = out_size // num_towers self.edge_feat_size = edge_feat_size self.residual = residual if self.in_size != self.out_size: self.residual = False self.towers = nn.ModuleList( [ PNAConvTower( self.tower_in_size, self.tower_out_size, aggregators, scalers, delta, dropout=dropout, edge_feat_size=edge_feat_size, ) for _ in range(num_towers) ] ) self.mixing_layer = nn.Sequential( nn.Linear(out_size, out_size), nn.LeakyReLU() ) def forward(self, graph, node_feat, edge_feat=None): r""" Description ----------- Compute PNA layer. Parameters ---------- graph : DGLGraph The graph. node_feat : torch.Tensor The input feature of shape :math:`(N, h_n)`. :math:`N` is the number of nodes, and :math:`h_n` must be the same as in_size. edge_feat : torch.Tensor, optional The edge feature of shape :math:`(M, h_e)`. :math:`M` is the number of edges, and :math:`h_e` must be the same as edge_feat_size. Returns ------- torch.Tensor The output node feature of shape :math:`(N, h_n')` where :math:`h_n'` should be the same as out_size. """ h_cat = torch.cat( [ tower( graph, node_feat[ :, ti * self.tower_in_size : (ti + 1) * self.tower_in_size, ], edge_feat, ) for ti, tower in enumerate(self.towers) ], dim=1, ) h_out = self.mixing_layer(h_cat) # add residual connection if self.residual: h_out = h_out + node_feat return h_out ================================================ FILE: python/dgl/nn/pytorch/conv/relgraphconv.py ================================================ """Torch Module for Relational graph convolution layer""" # pylint: disable= no-member, arguments-differ, invalid-name import torch as th from torch import nn from .... import function as fn from ..linear import TypedLinear class RelGraphConv(nn.Module): r"""Relational graph convolution layer from `Modeling Relational Data with Graph Convolutional Networks `__ It can be described in as below: .. math:: h_i^{(l+1)} = \sigma(\sum_{r\in\mathcal{R}} \sum_{j\in\mathcal{N}^r(i)}e_{j,i}W_r^{(l)}h_j^{(l)}+W_0^{(l)}h_i^{(l)}) where :math:`\mathcal{N}^r(i)` is the neighbor set of node :math:`i` w.r.t. relation :math:`r`. :math:`e_{j,i}` is the normalizer. :math:`\sigma` is an activation function. :math:`W_0` is the self-loop weight. The basis regularization decomposes :math:`W_r` by: .. math:: W_r^{(l)} = \sum_{b=1}^B a_{rb}^{(l)}V_b^{(l)} where :math:`B` is the number of bases, :math:`V_b^{(l)}` are linearly combined with coefficients :math:`a_{rb}^{(l)}`. The block-diagonal-decomposition regularization decomposes :math:`W_r` into :math:`B` number of block diagonal matrices. We refer :math:`B` as the number of bases. The block regularization decomposes :math:`W_r` by: .. math:: W_r^{(l)} = \oplus_{b=1}^B Q_{rb}^{(l)} where :math:`B` is the number of bases, :math:`Q_{rb}^{(l)}` are block bases with shape :math:`R^{(d^{(l+1)}/B)*(d^{l}/B)}`. Parameters ---------- in_feat : int Input feature size; i.e, the number of dimensions of :math:`h_j^{(l)}`. out_feat : int Output feature size; i.e., the number of dimensions of :math:`h_i^{(l+1)}`. num_rels : int Number of relations. regularizer : str, optional Which weight regularizer to use ("basis", "bdd" or ``None``): - "basis" is for basis-decomposition. - "bdd" is for block-diagonal-decomposition. - ``None`` applies no regularization. Default: ``None``. num_bases : int, optional Number of bases. It comes into effect when a regularizer is applied. If ``None``, it uses number of relations (``num_rels``). Default: ``None``. Note that ``in_feat`` and ``out_feat`` must be divisible by ``num_bases`` when applying "bdd" regularizer. bias : bool, optional True if bias is added. Default: ``True``. activation : callable, optional Activation function. Default: ``None``. self_loop : bool, optional True to include self loop message. Default: ``True``. dropout : float, optional Dropout rate. Default: ``0.0`` layer_norm: bool, optional True to add layer norm. Default: ``False`` Examples -------- >>> import dgl >>> import numpy as np >>> import torch as th >>> from dgl.nn import RelGraphConv >>> >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> feat = th.ones(6, 10) >>> conv = RelGraphConv(10, 2, 3, regularizer='basis', num_bases=2) >>> etype = th.tensor([0,1,2,0,1,2]) >>> res = conv(g, feat, etype) >>> res tensor([[ 0.3996, -2.3303], [-0.4323, -0.1440], [ 0.3996, -2.3303], [ 2.1046, -2.8654], [-0.4323, -0.1440], [-0.1309, -1.0000]], grad_fn=) """ def __init__( self, in_feat, out_feat, num_rels, regularizer=None, num_bases=None, bias=True, activation=None, self_loop=True, dropout=0.0, layer_norm=False, ): super().__init__() if regularizer is not None and num_bases is None: num_bases = num_rels self.linear_r = TypedLinear( in_feat, out_feat, num_rels, regularizer, num_bases ) self.bias = bias self.activation = activation self.self_loop = self_loop self.layer_norm = layer_norm # bias if self.bias: self.h_bias = nn.Parameter(th.Tensor(out_feat)) nn.init.zeros_(self.h_bias) # TODO(minjie): consider remove those options in the future to make # the module only about graph convolution. # layer norm if self.layer_norm: self.layer_norm_weight = nn.LayerNorm( out_feat, elementwise_affine=True ) # weight for self loop if self.self_loop: self.loop_weight = nn.Parameter(th.Tensor(in_feat, out_feat)) nn.init.xavier_uniform_( self.loop_weight, gain=nn.init.calculate_gain("relu") ) self.dropout = nn.Dropout(dropout) def message(self, edges): """Message function.""" m = self.linear_r(edges.src["h"], edges.data["etype"], self.presorted) if "norm" in edges.data: m = m * edges.data["norm"] return {"m": m} def forward(self, g, feat, etypes, norm=None, *, presorted=False): """Forward computation. Parameters ---------- g : DGLGraph The graph. feat : torch.Tensor A 2D tensor of node features. Shape: :math:`(|V|, D_{in})`. etypes : torch.Tensor or list[int] An 1D integer tensor of edge types. Shape: :math:`(|E|,)`. norm : torch.Tensor, optional An 1D tensor of edge norm value. Shape: :math:`(|E|,)`. presorted : bool, optional Whether the edges of the input graph have been sorted by their types. Forward on pre-sorted graph may be faster. Graphs created by :func:`~dgl.to_homogeneous` automatically satisfy the condition. Also see :func:`~dgl.reorder_graph` for sorting edges manually. Returns ------- torch.Tensor New node features. Shape: :math:`(|V|, D_{out})`. """ self.presorted = presorted with g.local_scope(): g.srcdata["h"] = feat if norm is not None: g.edata["norm"] = norm g.edata["etype"] = etypes # message passing g.update_all(self.message, fn.sum("m", "h")) # apply bias and activation h = g.dstdata["h"] if self.layer_norm: h = self.layer_norm_weight(h) if self.bias: h = h + self.h_bias if self.self_loop: h = h + feat[: g.num_dst_nodes()] @ self.loop_weight if self.activation: h = self.activation(h) h = self.dropout(h) return h ================================================ FILE: python/dgl/nn/pytorch/conv/sageconv.py ================================================ """Torch Module for GraphSAGE layer""" # pylint: disable= no-member, arguments-differ, invalid-name import torch from torch import nn from torch.nn import functional as F from .... import function as fn from ....base import DGLError from ....utils import check_eq_shape, expand_as_pair class SAGEConv(nn.Module): r"""GraphSAGE layer from `Inductive Representation Learning on Large Graphs `__ .. math:: h_{\mathcal{N}(i)}^{(l+1)} &= \mathrm{aggregate} \left(\{h_{j}^{l}, \forall j \in \mathcal{N}(i) \}\right) h_{i}^{(l+1)} &= \sigma \left(W \cdot \mathrm{concat} (h_{i}^{l}, h_{\mathcal{N}(i)}^{l+1}) \right) h_{i}^{(l+1)} &= \mathrm{norm}(h_{i}^{(l+1)}) If a weight tensor on each edge is provided, the aggregation becomes: .. math:: h_{\mathcal{N}(i)}^{(l+1)} = \mathrm{aggregate} \left(\{e_{ji} h_{j}^{l}, \forall j \in \mathcal{N}(i) \}\right) where :math:`e_{ji}` is the scalar weight on the edge from node :math:`j` to node :math:`i`. Please make sure that :math:`e_{ji}` is broadcastable with :math:`h_j^{l}`. Parameters ---------- in_feats : int, or pair of ints Input feature size; i.e, the number of dimensions of :math:`h_i^{(l)}`. SAGEConv can be applied on homogeneous graph and unidirectional `bipartite graph `__. If the layer applies on a unidirectional bipartite graph, ``in_feats`` specifies the input feature size on both the source and destination nodes. If a scalar is given, the source and destination node feature size would take the same value. If aggregator type is ``gcn``, the feature size of source and destination nodes are required to be the same. out_feats : int Output feature size; i.e, the number of dimensions of :math:`h_i^{(l+1)}`. aggregator_type : str Aggregator type to use (``mean``, ``gcn``, ``pool``, ``lstm``). feat_drop : float Dropout rate on features, default: ``0``. bias : bool If True, adds a learnable bias to the output. Default: ``True``. norm : callable activation function/layer or None, optional If not None, applies normalization to the updated node features. activation : callable activation function/layer or None, optional If not None, applies an activation function to the updated node features. Default: ``None``. Examples -------- >>> import dgl >>> import numpy as np >>> import torch as th >>> from dgl.nn import SAGEConv >>> # Case 1: Homogeneous graph >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> g = dgl.add_self_loop(g) >>> feat = th.ones(6, 10) >>> conv = SAGEConv(10, 2, 'pool') >>> res = conv(g, feat) >>> res tensor([[-1.0888, -2.1099], [-1.0888, -2.1099], [-1.0888, -2.1099], [-1.0888, -2.1099], [-1.0888, -2.1099], [-1.0888, -2.1099]], grad_fn=) >>> # Case 2: Unidirectional bipartite graph >>> u = [0, 1, 0, 0, 1] >>> v = [0, 1, 2, 3, 2] >>> g = dgl.heterograph({('_N', '_E', '_N'):(u, v)}) >>> u_fea = th.rand(2, 5) >>> v_fea = th.rand(4, 10) >>> conv = SAGEConv((5, 10), 2, 'mean') >>> res = conv(g, (u_fea, v_fea)) >>> res tensor([[ 0.3163, 3.1166], [ 0.3866, 2.5398], [ 0.5873, 1.6597], [-0.2502, 2.8068]], grad_fn=) """ def __init__( self, in_feats, out_feats, aggregator_type, feat_drop=0.0, bias=True, norm=None, activation=None, ): super(SAGEConv, self).__init__() valid_aggre_types = {"mean", "gcn", "pool", "lstm"} if aggregator_type not in valid_aggre_types: raise DGLError( "Invalid aggregator_type. Must be one of {}. " "But got {!r} instead.".format( valid_aggre_types, aggregator_type ) ) self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats self._aggre_type = aggregator_type self.norm = norm self.feat_drop = nn.Dropout(feat_drop) self.activation = activation # aggregator type: mean/pool/lstm/gcn if aggregator_type == "pool": self.fc_pool = nn.Linear(self._in_src_feats, self._in_src_feats) if aggregator_type == "lstm": self.lstm = nn.LSTM( self._in_src_feats, self._in_src_feats, batch_first=True ) self.fc_neigh = nn.Linear(self._in_src_feats, out_feats, bias=False) if aggregator_type != "gcn": self.fc_self = nn.Linear(self._in_dst_feats, out_feats, bias=bias) elif bias: self.bias = nn.parameter.Parameter(torch.zeros(self._out_feats)) else: self.register_buffer("bias", None) self.reset_parameters() def reset_parameters(self): r""" Description ----------- Reinitialize learnable parameters. Note ---- The linear weights :math:`W^{(l)}` are initialized using Glorot uniform initialization. The LSTM module is using xavier initialization method for its weights. """ gain = nn.init.calculate_gain("relu") if self._aggre_type == "pool": nn.init.xavier_uniform_(self.fc_pool.weight, gain=gain) if self._aggre_type == "lstm": self.lstm.reset_parameters() if self._aggre_type != "gcn": nn.init.xavier_uniform_(self.fc_self.weight, gain=gain) nn.init.xavier_uniform_(self.fc_neigh.weight, gain=gain) def _lstm_reducer(self, nodes): """LSTM reducer NOTE(zihao): lstm reducer with default schedule (degree bucketing) is slow, we could accelerate this with degree padding in the future. """ m = nodes.mailbox["m"] # (B, L, D) batch_size = m.shape[0] h = ( m.new_zeros((1, batch_size, self._in_src_feats)), m.new_zeros((1, batch_size, self._in_src_feats)), ) _, (rst, _) = self.lstm(m, h) return {"neigh": rst.squeeze(0)} def forward(self, graph, feat, edge_weight=None): r""" Description ----------- Compute GraphSAGE layer. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor or pair of torch.Tensor If a torch.Tensor is given, it represents the input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of torch.Tensor is given, the pair must contain two tensors of shape :math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`. edge_weight : torch.Tensor, optional Optional tensor on the edge. If given, the convolution will weight with regard to the message. Returns ------- torch.Tensor The output feature of shape :math:`(N_{dst}, D_{out})` where :math:`N_{dst}` is the number of destination nodes in the input graph, :math:`D_{out}` is the size of the output feature. """ with graph.local_scope(): if isinstance(feat, tuple): feat_src = self.feat_drop(feat[0]) feat_dst = self.feat_drop(feat[1]) else: feat_src = feat_dst = self.feat_drop(feat) if graph.is_block: feat_dst = feat_src[: graph.number_of_dst_nodes()] msg_fn = fn.copy_u("h", "m") if edge_weight is not None: assert edge_weight.shape[0] == graph.num_edges() graph.edata["_edge_weight"] = edge_weight msg_fn = fn.u_mul_e("h", "_edge_weight", "m") h_self = feat_dst # Handle the case of graphs without edges if graph.num_edges() == 0: graph.dstdata["neigh"] = torch.zeros( feat_dst.shape[0], self._in_src_feats ).to(feat_dst) # Determine whether to apply linear transformation before message passing A(XW) lin_before_mp = self._in_src_feats > self._out_feats # Message Passing if self._aggre_type == "mean": graph.srcdata["h"] = ( self.fc_neigh(feat_src) if lin_before_mp else feat_src ) graph.update_all(msg_fn, fn.mean("m", "neigh")) h_neigh = graph.dstdata["neigh"] if not lin_before_mp: h_neigh = self.fc_neigh(h_neigh) elif self._aggre_type == "gcn": check_eq_shape(feat) graph.srcdata["h"] = ( self.fc_neigh(feat_src) if lin_before_mp else feat_src ) if isinstance(feat, tuple): # heterogeneous graph.dstdata["h"] = ( self.fc_neigh(feat_dst) if lin_before_mp else feat_dst ) else: if graph.is_block: graph.dstdata["h"] = graph.srcdata["h"][ : graph.num_dst_nodes() ] else: graph.dstdata["h"] = graph.srcdata["h"] graph.update_all(msg_fn, fn.sum("m", "neigh")) # divide in_degrees degs = graph.in_degrees().to(feat_dst) h_neigh = (graph.dstdata["neigh"] + graph.dstdata["h"]) / ( degs.unsqueeze(-1) + 1 ) if not lin_before_mp: h_neigh = self.fc_neigh(h_neigh) elif self._aggre_type == "pool": graph.srcdata["h"] = F.relu(self.fc_pool(feat_src)) graph.update_all(msg_fn, fn.max("m", "neigh")) h_neigh = self.fc_neigh(graph.dstdata["neigh"]) elif self._aggre_type == "lstm": graph.srcdata["h"] = feat_src graph.update_all(msg_fn, self._lstm_reducer) h_neigh = self.fc_neigh(graph.dstdata["neigh"]) else: raise KeyError( "Aggregator type {} not recognized.".format( self._aggre_type ) ) # GraphSAGE GCN does not require fc_self. if self._aggre_type == "gcn": rst = h_neigh # add bias manually for GCN if self.bias is not None: rst = rst + self.bias else: rst = self.fc_self(h_self) + h_neigh # activation if self.activation is not None: rst = self.activation(rst) # normalization if self.norm is not None: rst = self.norm(rst) return rst ================================================ FILE: python/dgl/nn/pytorch/conv/sgconv.py ================================================ """Torch Module for Simplifying Graph Convolution layer""" # pylint: disable= no-member, arguments-differ, invalid-name import torch as th from torch import nn from .... import function as fn from ....base import DGLError from .graphconv import EdgeWeightNorm class SGConv(nn.Module): r"""SGC layer from `Simplifying Graph Convolutional Networks `__ .. math:: H^{K} = (\tilde{D}^{-1/2} \tilde{A} \tilde{D}^{-1/2})^K X \Theta where :math:`\tilde{A}` is :math:`A` + :math:`I`. Thus the graph input is expected to have self-loop edges added. Parameters ---------- in_feats : int Number of input features; i.e, the number of dimensions of :math:`X`. out_feats : int Number of output features; i.e, the number of dimensions of :math:`H^{K}`. k : int Number of hops :math:`K`. Defaults:``1``. cached : bool If True, the module would cache .. math:: (\tilde{D}^{-\frac{1}{2}}\tilde{A}\tilde{D}^{-\frac{1}{2}})^K X\Theta at the first forward call. This parameter should only be set to ``True`` in Transductive Learning setting. bias : bool If True, adds a learnable bias to the output. Default: ``True``. norm : callable activation function/layer or None, optional If not None, applies normalization to the updated node features. Default: ``False``. allow_zero_in_degree : bool, optional If there are 0-in-degree nodes in the graph, output for those nodes will be invalid since no message will be passed to those nodes. This is harmful for some applications causing silent performance regression. This module will raise a DGLError if it detects 0-in-degree nodes in input graph. By setting ``True``, it will suppress the check and let the users handle it by themselves. Default: ``False``. Note ---- Zero in-degree nodes will lead to invalid output value. This is because no message will be passed to those nodes, the aggregation function will be appied on empty input. A common practice to avoid this is to add a self-loop for each node in the graph if it is homogeneous, which can be achieved by: >>> g = ... # a DGLGraph >>> g = dgl.add_self_loop(g) Calling ``add_self_loop`` will not work for some graphs, for example, heterogeneous graph since the edge type can not be decided for self_loop edges. Set ``allow_zero_in_degree`` to ``True`` for those cases to unblock the code and handle zero-in-degree nodes manually. A common practise to handle this is to filter out the nodes with zero-in-degree when use after conv. Example ------- >>> import dgl >>> import numpy as np >>> import torch as th >>> from dgl.nn import SGConv >>> >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> g = dgl.add_self_loop(g) >>> feat = th.ones(6, 10) >>> conv = SGConv(10, 2, k=2) >>> res = conv(g, feat) >>> res tensor([[-1.9441, -0.9343], [-1.9441, -0.9343], [-1.9441, -0.9343], [-2.7709, -1.3316], [-1.9297, -0.9273], [-1.9441, -0.9343]], grad_fn=) """ def __init__( self, in_feats, out_feats, k=1, cached=False, bias=True, norm=None, allow_zero_in_degree=False, ): super(SGConv, self).__init__() self.fc = nn.Linear(in_feats, out_feats, bias=bias) self._cached = cached self._cached_h = None self._k = k self.norm = norm self._allow_zero_in_degree = allow_zero_in_degree self.reset_parameters() def reset_parameters(self): r""" Description ----------- Reinitialize learnable parameters. Note ---- The model parameters are initialized using xavier initialization and the bias is initialized to be zero. """ nn.init.xavier_uniform_(self.fc.weight) if self.fc.bias is not None: nn.init.zeros_(self.fc.bias) def set_allow_zero_in_degree(self, set_value): r""" Description ----------- Set allow_zero_in_degree flag. Parameters ---------- set_value : bool The value to be set to the flag. """ self._allow_zero_in_degree = set_value def forward(self, graph, feat, edge_weight=None): r""" Description ----------- Compute Simplifying Graph Convolution layer. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor The input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. edge_weight: torch.Tensor, optional edge_weight to use in the message passing process. This is equivalent to using weighted adjacency matrix in the equation above, and :math:`\tilde{D}^{-1/2}\tilde{A} \tilde{D}^{-1/2}` is based on :class:`dgl.nn.pytorch.conv.graphconv.EdgeWeightNorm`. Returns ------- torch.Tensor The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is size of output feature. Raises ------ DGLError If there are 0-in-degree nodes in the input graph, it will raise DGLError since no message will be passed to those nodes. This will cause invalid output. The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``. Note ---- If ``cache`` is set to True, ``feat`` and ``graph`` should not change during training, or you will get wrong results. """ with graph.local_scope(): if not self._allow_zero_in_degree: if (graph.in_degrees() == 0).any(): raise DGLError( "There are 0-in-degree nodes in the graph, " "output for those nodes will be invalid. " "This is harmful for some applications, " "causing silent performance regression. " "Adding self-loop on the input graph by " "calling `g = dgl.add_self_loop(g)` will resolve " "the issue. Setting ``allow_zero_in_degree`` " "to be `True` when constructing this module will " "suppress the check and let the code run." ) msg_func = fn.copy_u("h", "m") if edge_weight is not None: graph.edata["_edge_weight"] = EdgeWeightNorm("both")( graph, edge_weight ) msg_func = fn.u_mul_e("h", "_edge_weight", "m") if self._cached_h is not None: feat = self._cached_h else: if edge_weight is None: # compute normalization degs = graph.in_degrees().to(feat).clamp(min=1) norm = th.pow(degs, -0.5) norm = norm.to(feat.device).unsqueeze(1) # compute (D^-1 A^k D)^k X for _ in range(self._k): if edge_weight is None: feat = feat * norm graph.ndata["h"] = feat graph.update_all(msg_func, fn.sum("m", "h")) feat = graph.ndata.pop("h") if edge_weight is None: feat = feat * norm if self.norm is not None: feat = self.norm(feat) # cache feature if self._cached: self._cached_h = feat return self.fc(feat) ================================================ FILE: python/dgl/nn/pytorch/conv/tagconv.py ================================================ """Torch Module for Topology Adaptive Graph Convolutional layer""" # pylint: disable= no-member, arguments-differ, invalid-name import torch as th from torch import nn from .... import function as fn from .graphconv import EdgeWeightNorm class TAGConv(nn.Module): r"""Topology Adaptive Graph Convolutional layer from `Topology Adaptive Graph Convolutional Networks `__ .. math:: H^{K} = {\sum}_{k=0}^K (D^{-1/2} A D^{-1/2})^{k} X {\Theta}_{k}, where :math:`A` denotes the adjacency matrix, :math:`D_{ii} = \sum_{j=0} A_{ij}` its diagonal degree matrix, :math:`{\Theta}_{k}` denotes the linear weights to sum the results of different hops together. Parameters ---------- in_feats : int Input feature size. i.e, the number of dimensions of :math:`X`. out_feats : int Output feature size. i.e, the number of dimensions of :math:`H^{K}`. k: int, optional Number of hops :math:`K`. Default: ``2``. bias: bool, optional If True, adds a learnable bias to the output. Default: ``True``. activation: callable activation function/layer or None, optional If not None, applies an activation function to the updated node features. Default: ``None``. Attributes ---------- lin : torch.Module The learnable linear module. Example ------- >>> import dgl >>> import numpy as np >>> import torch as th >>> from dgl.nn import TAGConv >>> >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> feat = th.ones(6, 10) >>> conv = TAGConv(10, 2, k=2) >>> res = conv(g, feat) >>> res tensor([[ 0.5490, -1.6373], [ 0.5490, -1.6373], [ 0.5490, -1.6373], [ 0.5513, -1.8208], [ 0.5215, -1.6044], [ 0.3304, -1.9927]], grad_fn=) """ def __init__( self, in_feats, out_feats, k=2, bias=True, activation=None, ): super(TAGConv, self).__init__() self._in_feats = in_feats self._out_feats = out_feats self._k = k self._activation = activation self.lin = nn.Linear(in_feats * (self._k + 1), out_feats, bias=bias) self.reset_parameters() def reset_parameters(self): r""" Description ----------- Reinitialize learnable parameters. Note ---- The model parameters are initialized using Glorot uniform initialization. """ gain = nn.init.calculate_gain("relu") nn.init.xavier_normal_(self.lin.weight, gain=gain) def forward(self, graph, feat, edge_weight=None): r""" Description ----------- Compute topology adaptive graph convolution. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor The input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. edge_weight: torch.Tensor, optional edge_weight to use in the message passing process. This is equivalent to using weighted adjacency matrix in the equation above, and :math:`\tilde{D}^{-1/2}\tilde{A} \tilde{D}^{-1/2}` is based on :class:`dgl.nn.pytorch.conv.graphconv.EdgeWeightNorm`. Returns ------- torch.Tensor The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is size of output feature. """ with graph.local_scope(): assert graph.is_homogeneous, "Graph is not homogeneous" if edge_weight is None: norm = th.pow(graph.in_degrees().to(feat).clamp(min=1), -0.5) shp = norm.shape + (1,) * (feat.dim() - 1) norm = th.reshape(norm, shp).to(feat.device) msg_func = fn.copy_u("h", "m") if edge_weight is not None: graph.edata["_edge_weight"] = EdgeWeightNorm("both")( graph, edge_weight ) msg_func = fn.u_mul_e("h", "_edge_weight", "m") # D-1/2 A D -1/2 X fstack = [feat] for _ in range(self._k): if edge_weight is None: rst = fstack[-1] * norm else: rst = fstack[-1] graph.ndata["h"] = rst graph.update_all(msg_func, fn.sum(msg="m", out="h")) rst = graph.ndata["h"] if edge_weight is None: rst = rst * norm fstack.append(rst) rst = self.lin(th.cat(fstack, dim=-1)) if self._activation is not None: rst = self._activation(rst) return rst ================================================ FILE: python/dgl/nn/pytorch/conv/twirlsconv.py ================================================ """Torch modules for TWIRLS""" # pylint: disable=invalid-name, useless-super-delegation, no-member import torch as tc import torch.nn as nn import torch.nn.functional as F from .... import function as fn class TWIRLSConv(nn.Module): r"""Convolution together with iteratively reweighting least squre from `Graph Neural Networks Inspired by Classical Iterative Algorithms `__ Parameters ---------- input_d : int Number of input features. output_d : int Number of output features. hidden_d : int Size of hidden layers. prop_step : int Number of propagation steps num_mlp_before : int Number of mlp layers before propagation. Default: ``1``. num_mlp_after : int Number of mlp layers after propagation. Default: ``1``. norm : str The type of norm layers inside mlp layers. Can be ``'batch'``, ``'layer'`` or ``'none'``. Default: ``'none'`` precond : str If True, use pre conditioning and unormalized laplacian, else not use pre conditioning and use normalized laplacian. Default: ``True`` alp : float The :math:`\alpha` in paper. If equal to :math:`0`, will be automatically decided based on other hyper prameters. Default: ``0``. lam : float The :math:`\lambda` in paper. Default: ``1``. attention : bool If ``True``, add an attention layer inside propagations. Default: ``False``. tau : float The :math:`\tau` in paper. Default: ``0.2``. T : float The :math:`T` in paper. If < 0, :math:`T` will be set to `\infty`. Default: ``-1``. p : float The :math:`p` in paper. Default: ``1``. use_eta : bool If ``True``, add a learnable weight on each dimension in attention. Default: ``False``. attn_bef : bool If ``True``, add another attention layer before propagation. Default: ``False``. dropout : float The dropout rate in mlp layers. Default: ``0.0``. attn_dropout : float The dropout rate of attention values. Default: ``0.0``. inp_dropout : float The dropout rate on input features. Default: ``0.0``. Note ---- ``add_self_loop`` will be automatically called before propagation. Example ------- >>> import dgl >>> from dgl.nn import TWIRLSConv >>> import torch as th >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> feat = th.ones(6, 10) >>> conv = TWIRLSConv(10, 2, 128, prop_step = 64) >>> res = conv(g , feat) >>> res.size() torch.Size([6, 2]) """ def __init__( self, input_d, output_d, hidden_d, prop_step, num_mlp_before=1, num_mlp_after=1, norm="none", precond=True, alp=0, lam=1, attention=False, tau=0.2, T=-1, p=1, use_eta=False, attn_bef=False, dropout=0.0, attn_dropout=0.0, inp_dropout=0.0, ): super().__init__() self.input_d = input_d self.output_d = output_d self.hidden_d = hidden_d self.prop_step = prop_step self.num_mlp_before = num_mlp_before self.num_mlp_after = num_mlp_after self.norm = norm self.precond = precond self.attention = attention self.alp = alp self.lam = lam self.tau = tau self.T = T self.p = p self.use_eta = use_eta self.init_att = attn_bef self.dropout = dropout self.attn_dropout = attn_dropout self.inp_dropout = inp_dropout # ----- initialization of some variables ----- # where to put attention self.attn_aft = prop_step // 2 if attention else -1 # whether we can cache unfolding result self.cacheable = ( (not self.attention) and self.num_mlp_before == 0 and self.inp_dropout <= 0 ) if self.cacheable: self.cached_unfolding = None # if only one layer, then no hidden size self.size_bef_unf = self.hidden_d self.size_aft_unf = self.hidden_d if self.num_mlp_before == 0: self.size_aft_unf = self.input_d # as the input of mlp_aft if self.num_mlp_after == 0: self.size_bef_unf = self.output_d # as the output of mlp_bef # ----- computational modules ----- self.mlp_bef = MLP( self.input_d, self.hidden_d, self.size_bef_unf, self.num_mlp_before, self.dropout, self.norm, init_activate=False, ) self.unfolding = TWIRLSUnfoldingAndAttention( self.hidden_d, self.alp, self.lam, self.prop_step, self.attn_aft, self.tau, self.T, self.p, self.use_eta, self.init_att, self.attn_dropout, self.precond, ) # if there are really transformations before unfolding, then do init_activate in mlp_aft self.mlp_aft = MLP( self.size_aft_unf, self.hidden_d, self.output_d, self.num_mlp_after, self.dropout, self.norm, init_activate=(self.num_mlp_before > 0) and (self.num_mlp_after > 0), ) def forward(self, graph, feat): r""" Description ----------- Run TWIRLS forward. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor The initial node features. Returns ------- torch.Tensor The output feature Note ---- * Input shape: :math:`(N, \text{input_d})` where :math:`N` is the number of nodes. * Output shape: :math:`(N, \text{output_d})`. """ # ensure self loop graph = graph.remove_self_loop() graph = graph.add_self_loop() x = feat if self.cacheable: # to cache unfolding result becase there is no paramaters before it if self.cached_unfolding is None: self.cached_unfolding = self.unfolding(graph, x) x = self.cached_unfolding else: if self.inp_dropout > 0: x = F.dropout(x, self.inp_dropout, training=self.training) x = self.mlp_bef(x) x = self.unfolding(graph, x) x = self.mlp_aft(x) return x class Propagate(nn.Module): r""" Description ----------- The propagation method which is with pre-conditioning and reparameterizing. Correspond to eq.28 in the paper. """ def __init__(self): super().__init__() def _prop(self, graph, Y, lam): """propagation part.""" Y = D_power_bias_X(graph, Y, -0.5, lam, 1 - lam) Y = AX(graph, Y) Y = D_power_bias_X(graph, Y, -0.5, lam, 1 - lam) return Y def forward(self, graph, Y, X, alp, lam): r""" Description ----------- Propagation forward. Parameters ---------- graph : DGLGraph The graph. Y : torch.Tensor The feature under propagation. Corresponds to :math:`Z^{(k)}` in eq.28 in the paper. X : torch.Tensor The original feature. Corresponds to :math:`Z^{(0)}` in eq.28 in the paper. alp : float The step size. Corresponds to :math:`\alpha` in the paper. lam : torch.Tensor The coefficient of smoothing term. Corresponds to :math:`\lambda` in the paper. Returns ------- torch.Tensor Propagated feature. :math:`Z^{(k+1)}` in eq.28 in the paper. """ return ( (1 - alp) * Y + alp * lam * self._prop(graph, Y, lam) + alp * D_power_bias_X(graph, X, -1, lam, 1 - lam) ) class PropagateNoPrecond(nn.Module): r""" Description ----------- The propagation method which is without pre-conditioning and reparameterizing and using normalized laplacian. Correspond to eq.30 in the paper. """ def __init__(self): super().__init__() def forward(self, graph, Y, X, alp, lam): r""" Description ----------- Propagation forward. Parameters ---------- graph : DGLGraph The graph. Y : torch.Tensor The feature under propagation. Corresponds to :math:`Y^{(k)}` in eq.30 in the paper. X : torch.Tensor The original feature. Corresponds to :math:`Y^{(0)}` in eq.30 in the paper. alp : float The step size. Corresponds to :math:`\alpha` in the paper. lam : torch.Tensor The coefficient of smoothing term. Corresponds to :math:`\lambda` in the paper. Returns ------- torch.Tensor Propagated feature. :math:`Y^{(k+1)}` in eq.30 in the paper. """ return ( (1 - alp * lam - alp) * Y + alp * lam * normalized_AX(graph, Y) + alp * X ) class Attention(nn.Module): r""" Description ----------- The attention function. Correspond to :math:`s` in eq.27 the paper. Parameters ---------- tau : float The lower thresholding parameter. Correspond to :math:`\tau` in the paper. T : float The upper thresholding parameter. Correspond to :math:`T` in the paper. p : float Correspond to :math:`\rho` in the paper.. attn_dropout : float the dropout rate of attention value. Default: ``0.0``. Returns ------- torch.Tensor The output feature """ def __init__(self, tau, T, p, attn_dropout=0.0): super().__init__() self.tau = tau self.T = T self.p = p self.attn_dropout = attn_dropout def reweighting(self, graph): """Compute graph edge weight. Would be stored in ``graph.edata['w']``""" w = graph.edata["w"] # It is not activation here but to ensure w > 0. # w can be < 0 here because of some precision issue in dgl, which causes NaN afterwards. w = F.relu(w) + 1e-7 w = tc.pow(w, 1 - 0.5 * self.p) w[(w < self.tau)] = self.tau if self.T > 0: w[(w > self.T)] = float("inf") w = 1 / w # if not (w == w).all(): # raise "nan occured!" graph.edata["w"] = w + 1e-9 # avoid 0 degree def forward(self, graph, Y, etas=None): r""" Description ----------- Attention forward. Will update ``graph.edata['w']`` and ``graph.ndata['deg']``. Parameters ---------- graph : DGLGraph The graph. Y : torch.Tensor The feature to compute attention. etas : float The weight of each dimension. If ``None``, then weight of each dimension is 1. Default: ``None``. Returns ------- DGLGraph The graph. """ if etas is not None: Y = Y * etas.view(-1) # computing edge distance graph.srcdata["h"] = Y graph.srcdata["h_norm"] = (Y**2).sum(-1) graph.apply_edges(fn.u_dot_v("h", "h", "dot_")) graph.apply_edges(fn.u_add_v("h_norm", "h_norm", "norm_")) graph.edata["dot_"] = graph.edata["dot_"].view(-1) graph.edata["norm_"] = graph.edata["norm_"].view(-1) graph.edata["w"] = graph.edata["norm_"] - 2 * graph.edata["dot_"] # apply edge distance to get edge weight self.reweighting(graph) # update node degrees graph.update_all(fn.copy_e("w", "m"), fn.sum("m", "deg")) graph.ndata["deg"] = graph.ndata["deg"].view(-1) # attention dropout. the implementation can ensure the degrees do not change in expectation. # FIXME: consider if there is a better way if self.attn_dropout > 0: graph.edata["w"] = F.dropout( graph.edata["w"], self.attn_dropout, training=self.training ) return graph def normalized_AX(graph, X): """Y = D^{-1/2}AD^{-1/2}X""" Y = D_power_X(graph, X, -0.5) # Y = D^{-1/2}X Y = AX(graph, Y) # Y = AD^{-1/2}X Y = D_power_X(graph, Y, -0.5) # Y = D^{-1/2}AD^{-1/2}X return Y def AX(graph, X): """Y = AX""" graph.srcdata["h"] = X graph.update_all( fn.u_mul_e("h", "w", "m"), fn.sum("m", "h"), ) Y = graph.dstdata["h"] return Y def D_power_X(graph, X, power): """Y = D^{power}X""" degs = graph.ndata["deg"] norm = tc.pow(degs, power) Y = X * norm.view(X.size(0), 1) return Y def D_power_bias_X(graph, X, power, coeff, bias): """Y = (coeff*D + bias*I)^{power} X""" degs = graph.ndata["deg"] degs = coeff * degs + bias norm = tc.pow(degs, power) Y = X * norm.view(X.size(0), 1) return Y class TWIRLSUnfoldingAndAttention(nn.Module): r""" Description ----------- Combine propagation and attention together. Parameters ---------- d : int Size of graph feature. alp : float Step size. :math:`\alpha` in ther paper. lam : int Coefficient of graph smooth term. :math:`\lambda` in ther paper. prop_step : int Number of propagation steps attn_aft : int Where to put attention layer. i.e. number of propagation steps before attention. If set to ``-1``, then no attention. tau : float The lower thresholding parameter. Correspond to :math:`\tau` in the paper. T : float The upper thresholding parameter. Correspond to :math:`T` in the paper. p : float Correspond to :math:`\rho` in the paper.. use_eta : bool If `True`, learn a weight vector for each dimension when doing attention. init_att : bool If ``True``, add an extra attention layer before propagation. attn_dropout : float the dropout rate of attention value. Default: ``0.0``. precond : bool If ``True``, use pre-conditioned & reparameterized version propagation (eq.28), else use normalized laplacian (eq.30). Example ------- >>> import dgl >>> from dgl.nn import TWIRLSUnfoldingAndAttention >>> import torch as th >>> g = dgl.graph(([0, 1, 2, 3, 2, 5], [1, 2, 3, 4, 0, 3])).add_self_loop() >>> feat = th.ones(6,5) >>> prop = TWIRLSUnfoldingAndAttention(10, 1, 1, prop_step=3) >>> res = prop(g,feat) >>> res tensor([[2.5000, 2.5000, 2.5000, 2.5000, 2.5000], [2.5000, 2.5000, 2.5000, 2.5000, 2.5000], [2.5000, 2.5000, 2.5000, 2.5000, 2.5000], [3.7656, 3.7656, 3.7656, 3.7656, 3.7656], [2.5217, 2.5217, 2.5217, 2.5217, 2.5217], [4.0000, 4.0000, 4.0000, 4.0000, 4.0000]]) """ def __init__( self, d, alp, lam, prop_step, attn_aft=-1, tau=0.2, T=-1, p=1, use_eta=False, init_att=False, attn_dropout=0, precond=True, ): super().__init__() self.d = d self.alp = alp if alp > 0 else 1 / (lam + 1) # automatic set alpha self.lam = lam self.tau = tau self.p = p self.prop_step = prop_step self.attn_aft = attn_aft self.use_eta = use_eta self.init_att = init_att prop_method = Propagate if precond else PropagateNoPrecond self.prop_layers = nn.ModuleList( [prop_method() for _ in range(prop_step)] ) self.init_attn = ( Attention(tau, T, p, attn_dropout) if self.init_att else None ) self.attn_layer = ( Attention(tau, T, p, attn_dropout) if self.attn_aft >= 0 else None ) self.etas = nn.Parameter(tc.ones(d)) if self.use_eta else None def forward(self, g, X): r""" Description ----------- Compute forward pass of propagation & attention. Parameters ---------- g : DGLGraph The graph. X : torch.Tensor Init features. Returns ------- torch.Tensor The graph. """ Y = X g.edata["w"] = tc.ones(g.num_edges(), 1, device=g.device) g.ndata["deg"] = g.in_degrees().to(X) if self.init_att: g = self.init_attn(g, Y, self.etas) for k, layer in enumerate(self.prop_layers): # do unfolding Y = layer(g, Y, X, self.alp, self.lam) # do attention at certain layer if k == self.attn_aft - 1: g = self.attn_layer(g, Y, self.etas) return Y class MLP(nn.Module): r""" Description ----------- An MLP module. Parameters ---------- input_d : int Number of input features. output_d : int Number of output features. hidden_d : int Size of hidden layers. num_layers : int Number of mlp layers. dropout : float The dropout rate in mlp layers. norm : str The type of norm layers inside mlp layers. Can be ``'batch'``, ``'layer'`` or ``'none'``. init_activate : bool If add a relu at the beginning. """ def __init__( self, input_d, hidden_d, output_d, num_layers, dropout, norm, init_activate, ): super().__init__() self.init_activate = init_activate self.norm = norm self.dropout = dropout self.layers = nn.ModuleList([]) if num_layers == 1: self.layers.append(nn.Linear(input_d, output_d)) elif num_layers > 1: self.layers.append(nn.Linear(input_d, hidden_d)) for _ in range(num_layers - 2): self.layers.append(nn.Linear(hidden_d, hidden_d)) self.layers.append(nn.Linear(hidden_d, output_d)) # how many norm layers we have self.norm_cnt = num_layers - 1 + int(init_activate) if norm == "batch": self.norms = nn.ModuleList( [nn.BatchNorm1d(hidden_d) for _ in range(self.norm_cnt)] ) elif norm == "layer": self.norms = nn.ModuleList( [nn.LayerNorm(hidden_d) for _ in range(self.norm_cnt)] ) self.reset_params() def reset_params(self): """reset mlp parameters using xavier_norm""" for layer in self.layers: nn.init.xavier_normal_(layer.weight.data) nn.init.constant_(layer.bias.data, 0) def activate(self, x): """do normlaization and activation""" if self.norm != "none": x = self.norms[self.cur_norm_idx](x) # use the last norm layer self.cur_norm_idx += 1 x = F.relu(x) x = F.dropout(x, self.dropout, training=self.training) return x def forward(self, x): """The forward pass of mlp.""" self.cur_norm_idx = 0 if self.init_activate: x = self.activate(x) for i, layer in enumerate(self.layers): x = layer(x) if i != len(self.layers) - 1: # do not activate in the last layer x = self.activate(x) return x ================================================ FILE: python/dgl/nn/pytorch/explain/__init__.py ================================================ """Torch modules for explanation models.""" # pylint: disable= no-member, arguments-differ, invalid-name from .gnnexplainer import * from .subgraphx import * from .pgexplainer import * ================================================ FILE: python/dgl/nn/pytorch/explain/gnnexplainer.py ================================================ """Torch Module for GNNExplainer""" # pylint: disable= no-member, arguments-differ, invalid-name from math import sqrt import torch from torch import nn from tqdm.auto import tqdm from ....base import EID, NID from ....subgraph import khop_in_subgraph __all__ = ["GNNExplainer", "HeteroGNNExplainer"] class GNNExplainer(nn.Module): r"""GNNExplainer model from `GNNExplainer: Generating Explanations for Graph Neural Networks `__ It identifies compact subgraph structures and small subsets of node features that play a critical role in GNN-based node classification and graph classification. To generate an explanation, it learns an edge mask :math:`M` and a feature mask :math:`F` by optimizing the following objective function. .. math:: l(y, \hat{y}) + \alpha_1 \|M\|_1 + \alpha_2 H(M) + \beta_1 \|F\|_1 + \beta_2 H(F) where :math:`l` is the loss function, :math:`y` is the original model prediction, :math:`\hat{y}` is the model prediction with the edge and feature mask applied, :math:`H` is the entropy function. Parameters ---------- model : nn.Module The GNN model to explain. * The required arguments of its forward function are graph and feat. The latter one is for input node features. * It should also optionally take an eweight argument for edge weights and multiply the messages by it in message passing. * The output of its forward function is the logits for the predicted node/graph classes. See also the example in :func:`explain_node` and :func:`explain_graph`. num_hops : int The number of hops for GNN information aggregation. lr : float, optional The learning rate to use, default to 0.01. num_epochs : int, optional The number of epochs to train. alpha1 : float, optional A higher value will make the explanation edge masks more sparse by decreasing the sum of the edge mask. alpha2 : float, optional A higher value will make the explanation edge masks more sparse by decreasing the entropy of the edge mask. beta1 : float, optional A higher value will make the explanation node feature masks more sparse by decreasing the mean of the node feature mask. beta2 : float, optional A higher value will make the explanation node feature masks more sparse by decreasing the entropy of the node feature mask. log : bool, optional If True, it will log the computation process, default to True. """ def __init__( self, model, num_hops, lr=0.01, num_epochs=100, *, alpha1=0.005, alpha2=1.0, beta1=1.0, beta2=0.1, log=True, ): super(GNNExplainer, self).__init__() self.model = model self.num_hops = num_hops self.lr = lr self.num_epochs = num_epochs self.alpha1 = alpha1 self.alpha2 = alpha2 self.beta1 = beta1 self.beta2 = beta2 self.log = log def _init_masks(self, graph, feat): r"""Initialize learnable feature and edge mask. Parameters ---------- graph : DGLGraph Input graph. feat : Tensor Input node features. Returns ------- feat_mask : Tensor Feature mask of shape :math:`(1, D)`, where :math:`D` is the feature size. edge_mask : Tensor Edge mask of shape :math:`(E)`, where :math:`E` is the number of edges. """ num_nodes, feat_size = feat.size() num_edges = graph.num_edges() device = feat.device std = 0.1 feat_mask = nn.Parameter(torch.randn(1, feat_size, device=device) * std) std = nn.init.calculate_gain("relu") * sqrt(2.0 / (2 * num_nodes)) edge_mask = nn.Parameter(torch.randn(num_edges, device=device) * std) return feat_mask, edge_mask def _loss_regularize(self, loss, feat_mask, edge_mask): r"""Add regularization terms to the loss. Parameters ---------- loss : Tensor Loss value. feat_mask : Tensor Feature mask of shape :math:`(1, D)`, where :math:`D` is the feature size. edge_mask : Tensor Edge mask of shape :math:`(E)`, where :math:`E` is the number of edges. Returns ------- Tensor Loss value with regularization terms added. """ # epsilon for numerical stability eps = 1e-15 edge_mask = edge_mask.sigmoid() # Edge mask sparsity regularization loss = loss + self.alpha1 * torch.sum(edge_mask) # Edge mask entropy regularization ent = -edge_mask * torch.log(edge_mask + eps) - ( 1 - edge_mask ) * torch.log(1 - edge_mask + eps) loss = loss + self.alpha2 * ent.mean() feat_mask = feat_mask.sigmoid() # Feature mask sparsity regularization loss = loss + self.beta1 * torch.mean(feat_mask) # Feature mask entropy regularization ent = -feat_mask * torch.log(feat_mask + eps) - ( 1 - feat_mask ) * torch.log(1 - feat_mask + eps) loss = loss + self.beta2 * ent.mean() return loss def explain_node(self, node_id, graph, feat, **kwargs): r"""Learn and return a node feature mask and subgraph that play a crucial role to explain the prediction made by the GNN for node :attr:`node_id`. Parameters ---------- node_id : int The node to explain. graph : DGLGraph A homogeneous graph. feat : Tensor The input feature of shape :math:`(N, D)`. :math:`N` is the number of nodes, and :math:`D` is the feature size. kwargs : dict Additional arguments passed to the GNN model. Tensors whose first dimension is the number of nodes or edges will be assumed to be node/edge features. Returns ------- new_node_id : Tensor The new ID of the input center node. sg : DGLGraph The subgraph induced on the k-hop in-neighborhood of the input center node. feat_mask : Tensor Learned node feature importance mask of shape :math:`(D)`, where :math:`D` is the feature size. The values are within range :math:`(0, 1)`. The higher, the more important. edge_mask : Tensor Learned importance mask of the edges in the subgraph, which is a tensor of shape :math:`(E)`, where :math:`E` is the number of edges in the subgraph. The values are within range :math:`(0, 1)`. The higher, the more important. Examples -------- >>> import dgl >>> import dgl.function as fn >>> import torch >>> import torch.nn as nn >>> from dgl.data import CoraGraphDataset >>> from dgl.nn import GNNExplainer >>> # Load dataset >>> data = CoraGraphDataset() >>> g = data[0] >>> features = g.ndata['feat'] >>> labels = g.ndata['label'] >>> train_mask = g.ndata['train_mask'] >>> # Define a model >>> class Model(nn.Module): ... def __init__(self, in_feats, out_feats): ... super(Model, self).__init__() ... self.linear = nn.Linear(in_feats, out_feats) ... ... def forward(self, graph, feat, eweight=None): ... with graph.local_scope(): ... feat = self.linear(feat) ... graph.ndata['h'] = feat ... if eweight is None: ... graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h')) ... else: ... graph.edata['w'] = eweight ... graph.update_all(fn.u_mul_e('h', 'w', 'm'), fn.sum('m', 'h')) ... return graph.ndata['h'] >>> # Train the model >>> model = Model(features.shape[1], data.num_classes) >>> criterion = nn.CrossEntropyLoss() >>> optimizer = torch.optim.Adam(model.parameters(), lr=1e-2) >>> for epoch in range(10): ... logits = model(g, features) ... loss = criterion(logits[train_mask], labels[train_mask]) ... optimizer.zero_grad() ... loss.backward() ... optimizer.step() >>> # Explain the prediction for node 10 >>> explainer = GNNExplainer(model, num_hops=1) >>> new_center, sg, feat_mask, edge_mask = explainer.explain_node(10, g, features) >>> new_center tensor([1]) >>> sg.num_edges() 12 >>> # Old IDs of the nodes in the subgraph >>> sg.ndata[dgl.NID] tensor([ 9, 10, 11, 12]) >>> # Old IDs of the edges in the subgraph >>> sg.edata[dgl.EID] tensor([51, 53, 56, 48, 52, 57, 47, 50, 55, 46, 49, 54]) >>> feat_mask tensor([0.2638, 0.2738, 0.3039, ..., 0.2794, 0.2643, 0.2733]) >>> edge_mask tensor([0.0937, 0.1496, 0.8287, 0.8132, 0.8825, 0.8515, 0.8146, 0.0915, 0.1145, 0.9011, 0.1311, 0.8437]) """ self.model = self.model.to(graph.device) self.model.eval() num_nodes = graph.num_nodes() num_edges = graph.num_edges() # Extract node-centered k-hop subgraph and # its associated node and edge features. sg, inverse_indices = khop_in_subgraph(graph, node_id, self.num_hops) sg_nodes = sg.ndata[NID].long() sg_edges = sg.edata[EID].long() feat = feat[sg_nodes] for key, item in kwargs.items(): if torch.is_tensor(item) and item.size(0) == num_nodes: item = item[sg_nodes] elif torch.is_tensor(item) and item.size(0) == num_edges: item = item[sg_edges] kwargs[key] = item # Get the initial prediction. with torch.no_grad(): logits = self.model(graph=sg, feat=feat, **kwargs) pred_label = logits.argmax(dim=-1) feat_mask, edge_mask = self._init_masks(sg, feat) params = [feat_mask, edge_mask] optimizer = torch.optim.Adam(params, lr=self.lr) if self.log: pbar = tqdm(total=self.num_epochs) pbar.set_description(f"Explain node {node_id}") for _ in range(self.num_epochs): optimizer.zero_grad() h = feat * feat_mask.sigmoid() logits = self.model( graph=sg, feat=h, eweight=edge_mask.sigmoid(), **kwargs ) log_probs = logits.log_softmax(dim=-1) loss = -log_probs[inverse_indices, pred_label[inverse_indices]] loss = self._loss_regularize(loss, feat_mask, edge_mask) loss.backward() optimizer.step() if self.log: pbar.update(1) if self.log: pbar.close() feat_mask = feat_mask.detach().sigmoid().squeeze() edge_mask = edge_mask.detach().sigmoid() return inverse_indices, sg, feat_mask, edge_mask def explain_graph(self, graph, feat, **kwargs): r"""Learn and return a node feature mask and an edge mask that play a crucial role to explain the prediction made by the GNN for a graph. Parameters ---------- graph : DGLGraph A homogeneous graph. feat : Tensor The input feature of shape :math:`(N, D)`. :math:`N` is the number of nodes, and :math:`D` is the feature size. kwargs : dict Additional arguments passed to the GNN model. Tensors whose first dimension is the number of nodes or edges will be assumed to be node/edge features. Returns ------- feat_mask : Tensor Learned feature importance mask of shape :math:`(D)`, where :math:`D` is the feature size. The values are within range :math:`(0, 1)`. The higher, the more important. edge_mask : Tensor Learned importance mask of the edges in the graph, which is a tensor of shape :math:`(E)`, where :math:`E` is the number of edges in the graph. The values are within range :math:`(0, 1)`. The higher, the more important. Examples -------- >>> import dgl.function as fn >>> import torch >>> import torch.nn as nn >>> from dgl.data import GINDataset >>> from dgl.dataloading import GraphDataLoader >>> from dgl.nn import AvgPooling, GNNExplainer >>> # Load dataset >>> data = GINDataset('MUTAG', self_loop=True) >>> dataloader = GraphDataLoader(data, batch_size=64, shuffle=True) >>> # Define a model >>> class Model(nn.Module): ... def __init__(self, in_feats, out_feats): ... super(Model, self).__init__() ... self.linear = nn.Linear(in_feats, out_feats) ... self.pool = AvgPooling() ... ... def forward(self, graph, feat, eweight=None): ... with graph.local_scope(): ... feat = self.linear(feat) ... graph.ndata['h'] = feat ... if eweight is None: ... graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h')) ... else: ... graph.edata['w'] = eweight ... graph.update_all(fn.u_mul_e('h', 'w', 'm'), fn.sum('m', 'h')) ... return self.pool(graph, graph.ndata['h']) >>> # Train the model >>> feat_size = data[0][0].ndata['attr'].shape[1] >>> model = Model(feat_size, data.gclasses) >>> criterion = nn.CrossEntropyLoss() >>> optimizer = torch.optim.Adam(model.parameters(), lr=1e-2) >>> for bg, labels in dataloader: ... logits = model(bg, bg.ndata['attr']) ... loss = criterion(logits, labels) ... optimizer.zero_grad() ... loss.backward() ... optimizer.step() >>> # Explain the prediction for graph 0 >>> explainer = GNNExplainer(model, num_hops=1) >>> g, _ = data[0] >>> features = g.ndata['attr'] >>> feat_mask, edge_mask = explainer.explain_graph(g, features) >>> feat_mask tensor([0.2362, 0.2497, 0.2622, 0.2675, 0.2649, 0.2962, 0.2533]) >>> edge_mask tensor([0.2154, 0.2235, 0.8325, ..., 0.7787, 0.1735, 0.1847]) """ self.model = self.model.to(graph.device) self.model.eval() # Get the initial prediction. with torch.no_grad(): logits = self.model(graph=graph, feat=feat, **kwargs) pred_label = logits.argmax(dim=-1) feat_mask, edge_mask = self._init_masks(graph, feat) params = [feat_mask, edge_mask] optimizer = torch.optim.Adam(params, lr=self.lr) if self.log: pbar = tqdm(total=self.num_epochs) pbar.set_description("Explain graph") for _ in range(self.num_epochs): optimizer.zero_grad() h = feat * feat_mask.sigmoid() logits = self.model( graph=graph, feat=h, eweight=edge_mask.sigmoid(), **kwargs ) log_probs = logits.log_softmax(dim=-1) loss = -log_probs[0, pred_label[0]] loss = self._loss_regularize(loss, feat_mask, edge_mask) loss.backward() optimizer.step() if self.log: pbar.update(1) if self.log: pbar.close() feat_mask = feat_mask.detach().sigmoid().squeeze() edge_mask = edge_mask.detach().sigmoid() return feat_mask, edge_mask class HeteroGNNExplainer(nn.Module): r"""GNNExplainer model from `GNNExplainer: Generating Explanations for Graph Neural Networks `__, adapted for heterogeneous graphs It identifies compact subgraph structures and small subsets of node features that play a critical role in GNN-based node classification and graph classification. To generate an explanation, it learns an edge mask :math:`M` and a feature mask :math:`F` by optimizing the following objective function. .. math:: l(y, \hat{y}) + \alpha_1 \|M\|_1 + \alpha_2 H(M) + \beta_1 \|F\|_1 + \beta_2 H(F) where :math:`l` is the loss function, :math:`y` is the original model prediction, :math:`\hat{y}` is the model prediction with the edge and feature mask applied, :math:`H` is the entropy function. Parameters ---------- model : nn.Module The GNN model to explain. * The required arguments of its forward function are graph and feat. The latter one is for input node features. * It should also optionally take an eweight argument for edge weights and multiply the messages by it in message passing. * The output of its forward function is the logits for the predicted node/graph classes. See also the example in :func:`explain_node` and :func:`explain_graph`. num_hops : int The number of hops for GNN information aggregation. lr : float, optional The learning rate to use, default to 0.01. num_epochs : int, optional The number of epochs to train. alpha1 : float, optional A higher value will make the explanation edge masks more sparse by decreasing the sum of the edge mask. alpha2 : float, optional A higher value will make the explanation edge masks more sparse by decreasing the entropy of the edge mask. beta1 : float, optional A higher value will make the explanation node feature masks more sparse by decreasing the mean of the node feature mask. beta2 : float, optional A higher value will make the explanation node feature masks more sparse by decreasing the entropy of the node feature mask. log : bool, optional If True, it will log the computation process, default to True. """ def __init__( self, model, num_hops, lr=0.01, num_epochs=100, *, alpha1=0.005, alpha2=1.0, beta1=1.0, beta2=0.1, log=True, ): super(HeteroGNNExplainer, self).__init__() self.model = model self.num_hops = num_hops self.lr = lr self.num_epochs = num_epochs self.alpha1 = alpha1 self.alpha2 = alpha2 self.beta1 = beta1 self.beta2 = beta2 self.log = log def _init_masks(self, graph, feat): r"""Initialize learnable feature and edge mask. Parameters ---------- graph : DGLGraph Input graph. feat : dict[str, Tensor] The dictionary that associates input node features (values) with the respective node types (keys) present in the graph. Returns ------- feat_masks : dict[str, Tensor] The dictionary that associates the node feature masks (values) with the respective node types (keys). The feature masks are of shape :math:`(1, D_t)`, where :math:`D_t` is the feature size for node type :math:`t`. edge_masks : dict[tuple[str], Tensor] The dictionary that associates the edge masks (values) with the respective canonical edge types (keys). The edge masks are of shape :math:`(E_t)`, where :math:`E_t` is the number of edges for canonical edge type :math:`t`. """ device = graph.device feat_masks = {} std = 0.1 for node_type, feature in feat.items(): _, feat_size = feature.size() feat_masks[node_type] = nn.Parameter( torch.randn(1, feat_size, device=device) * std ) edge_masks = {} for canonical_etype in graph.canonical_etypes: src_num_nodes = graph.num_nodes(canonical_etype[0]) dst_num_nodes = graph.num_nodes(canonical_etype[-1]) num_nodes_sum = src_num_nodes + dst_num_nodes num_edges = graph.num_edges(canonical_etype) std = nn.init.calculate_gain("relu") if num_nodes_sum > 0: std *= sqrt(2.0 / num_nodes_sum) edge_masks[canonical_etype] = nn.Parameter( torch.randn(num_edges, device=device) * std ) return feat_masks, edge_masks def _loss_regularize(self, loss, feat_masks, edge_masks): r"""Add regularization terms to the loss. Parameters ---------- loss : Tensor Loss value. feat_masks : dict[str, Tensor] The dictionary that associates the node feature masks (values) with the respective node types (keys). The feature masks are of shape :math:`(1, D_t)`, where :math:`D_t` is the feature size for node type :math:`t`. edge_masks : dict[tuple[str], Tensor] The dictionary that associates the edge masks (values) with the respective canonical edge types (keys). The edge masks are of shape :math:`(E_t)`, where :math:`E_t` is the number of edges for canonical edge type :math:`t`. Returns ------- Tensor Loss value with regularization terms added. """ # epsilon for numerical stability eps = 1e-15 for edge_mask in edge_masks.values(): edge_mask = edge_mask.sigmoid() # Edge mask sparsity regularization loss = loss + self.alpha1 * torch.sum(edge_mask) # Edge mask entropy regularization ent = -edge_mask * torch.log(edge_mask + eps) - ( 1 - edge_mask ) * torch.log(1 - edge_mask + eps) loss = loss + self.alpha2 * ent.mean() for feat_mask in feat_masks.values(): feat_mask = feat_mask.sigmoid() # Feature mask sparsity regularization loss = loss + self.beta1 * torch.mean(feat_mask) # Feature mask entropy regularization ent = -feat_mask * torch.log(feat_mask + eps) - ( 1 - feat_mask ) * torch.log(1 - feat_mask + eps) loss = loss + self.beta2 * ent.mean() return loss def explain_node(self, ntype, node_id, graph, feat, **kwargs): r"""Learn and return node feature masks and a subgraph that play a crucial role to explain the prediction made by the GNN for node :attr:`node_id` of type :attr:`ntype`. It requires :attr:`model` to return a dictionary mapping node types to type-specific predictions. Parameters ---------- ntype : str The type of the node to explain. :attr:`model` must be trained to make predictions for this particular node type. node_id : int The ID of the node to explain. graph : DGLGraph A heterogeneous graph. feat : dict[str, Tensor] The dictionary that associates input node features (values) with the respective node types (keys) present in the graph. The input features are of shape :math:`(N_t, D_t)`. :math:`N_t` is the number of nodes for node type :math:`t`, and :math:`D_t` is the feature size for node type :math:`t` kwargs : dict Additional arguments passed to the GNN model. Returns ------- new_node_id : Tensor The new ID of the input center node. sg : DGLGraph The subgraph induced on the k-hop in-neighborhood of the input center node. feat_mask : dict[str, Tensor] The dictionary that associates the learned node feature importance masks (values) with the respective node types (keys). The masks are of shape :math:`(D_t)`, where :math:`D_t` is the node feature size for node type :attr:`t`. The values are within range :math:`(0, 1)`. The higher, the more important. edge_mask : dict[Tuple[str], Tensor] The dictionary that associates the learned edge importance masks (values) with the respective canonical edge types (keys). The masks are of shape :math:`(E_t)`, where :math:`E_t` is the number of edges for canonical edge type :math:`t` in the subgraph. The values are within range :math:`(0, 1)`. The higher, the more important. Examples -------- >>> import dgl >>> import dgl.function as fn >>> import torch as th >>> import torch.nn as nn >>> import torch.nn.functional as F >>> from dgl.nn import HeteroGNNExplainer >>> class Model(nn.Module): ... def __init__(self, in_dim, num_classes, canonical_etypes): ... super(Model, self).__init__() ... self.etype_weights = nn.ModuleDict({ ... '_'.join(c_etype): nn.Linear(in_dim, num_classes) ... for c_etype in canonical_etypes ... }) ... ... def forward(self, graph, feat, eweight=None): ... with graph.local_scope(): ... c_etype_func_dict = {} ... for c_etype in graph.canonical_etypes: ... src_type, etype, dst_type = c_etype ... wh = self.etype_weights['_'.join(c_etype)](feat[src_type]) ... graph.nodes[src_type].data[f'h_{c_etype}'] = wh ... if eweight is None: ... c_etype_func_dict[c_etype] = (fn.copy_u(f'h_{c_etype}', 'm'), ... fn.mean('m', 'h')) ... else: ... graph.edges[c_etype].data['w'] = eweight[c_etype] ... c_etype_func_dict[c_etype] = ( ... fn.u_mul_e(f'h_{c_etype}', 'w', 'm'), fn.mean('m', 'h')) ... graph.multi_update_all(c_etype_func_dict, 'sum') ... return graph.ndata['h'] >>> input_dim = 5 >>> num_classes = 2 >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 1, 1])}) >>> g.nodes['user'].data['h'] = th.randn(g.num_nodes('user'), input_dim) >>> g.nodes['game'].data['h'] = th.randn(g.num_nodes('game'), input_dim) >>> transform = dgl.transforms.AddReverse() >>> g = transform(g) >>> # define and train the model >>> model = Model(input_dim, num_classes, g.canonical_etypes) >>> feat = g.ndata['h'] >>> optimizer = th.optim.Adam(model.parameters()) >>> for epoch in range(10): ... logits = model(g, feat)['user'] ... loss = F.cross_entropy(logits, th.tensor([1, 1, 1])) ... optimizer.zero_grad() ... loss.backward() ... optimizer.step() >>> # Explain the prediction for node 0 of type 'user' >>> explainer = HeteroGNNExplainer(model, num_hops=1) >>> new_center, sg, feat_mask, edge_mask = explainer.explain_node('user', 0, g, feat) >>> new_center tensor([0]) >>> sg Graph(num_nodes={'game': 1, 'user': 1}, num_edges={('game', 'rev_plays', 'user'): 1, ('user', 'plays', 'game'): 1, ('user', 'rev_rev_plays', 'game'): 1}, metagraph=[('game', 'user', 'rev_plays'), ('user', 'game', 'plays'), ('user', 'game', 'rev_rev_plays')]) >>> feat_mask {'game': tensor([0.2348, 0.2780, 0.2611, 0.2513, 0.2823]), 'user': tensor([0.2716, 0.2450, 0.2658, 0.2876, 0.2738])} >>> edge_mask {('game', 'rev_plays', 'user'): tensor([0.0630]), ('user', 'plays', 'game'): tensor([0.1939]), ('user', 'rev_rev_plays', 'game'): tensor([0.9166])} """ self.model = self.model.to(graph.device) self.model.eval() # Extract node-centered k-hop subgraph and # its associated node and edge features. sg, inverse_indices = khop_in_subgraph( graph, {ntype: node_id}, self.num_hops ) inverse_indices = inverse_indices[ntype] sg_nodes = sg.ndata[NID] sg_feat = {} for node_type in sg_nodes.keys(): sg_feat[node_type] = feat[node_type][sg_nodes[node_type].long()] # Get the initial prediction. with torch.no_grad(): logits = self.model(graph=sg, feat=sg_feat, **kwargs)[ntype] pred_label = logits.argmax(dim=-1) feat_mask, edge_mask = self._init_masks(sg, sg_feat) params = [*feat_mask.values(), *edge_mask.values()] optimizer = torch.optim.Adam(params, lr=self.lr) if self.log: pbar = tqdm(total=self.num_epochs) pbar.set_description(f"Explain node {node_id} with type {ntype}") for _ in range(self.num_epochs): optimizer.zero_grad() h = {} for node_type, sg_node_feat in sg_feat.items(): h[node_type] = sg_node_feat * feat_mask[node_type].sigmoid() eweight = {} for canonical_etype, canonical_etype_mask in edge_mask.items(): eweight[canonical_etype] = canonical_etype_mask.sigmoid() logits = self.model(graph=sg, feat=h, eweight=eweight, **kwargs)[ ntype ] log_probs = logits.log_softmax(dim=-1) loss = -log_probs[inverse_indices, pred_label[inverse_indices]] loss = self._loss_regularize(loss, feat_mask, edge_mask) loss.backward() optimizer.step() if self.log: pbar.update(1) if self.log: pbar.close() for node_type in feat_mask: feat_mask[node_type] = ( feat_mask[node_type].detach().sigmoid().squeeze() ) for canonical_etype in edge_mask: edge_mask[canonical_etype] = ( edge_mask[canonical_etype].detach().sigmoid() ) return inverse_indices, sg, feat_mask, edge_mask def explain_graph(self, graph, feat, **kwargs): r"""Learn and return node feature masks and edge masks that play a crucial role to explain the prediction made by the GNN for a graph. Parameters ---------- graph : DGLGraph A heterogeneous graph that will be explained. feat : dict[str, Tensor] The dictionary that associates input node features (values) with the respective node types (keys) present in the graph. The input features are of shape :math:`(N_t, D_t)`. :math:`N_t` is the number of nodes for node type :math:`t`, and :math:`D_t` is the feature size for node type :math:`t` kwargs : dict Additional arguments passed to the GNN model. Returns ------- feat_mask : dict[str, Tensor] The dictionary that associates the learned node feature importance masks (values) with the respective node types (keys). The masks are of shape :math:`(D_t)`, where :math:`D_t` is the node feature size for node type :attr:`t`. The values are within range :math:`(0, 1)`. The higher, the more important. edge_mask : dict[Tuple[str], Tensor] The dictionary that associates the learned edge importance masks (values) with the respective canonical edge types (keys). The masks are of shape :math:`(E_t)`, where :math:`E_t` is the number of edges for canonical edge type :math:`t` in the graph. The values are within range :math:`(0, 1)`. The higher, the more important. Examples -------- >>> import dgl >>> import dgl.function as fn >>> import torch as th >>> import torch.nn as nn >>> import torch.nn.functional as F >>> from dgl.nn import HeteroGNNExplainer >>> class Model(nn.Module): ... def __init__(self, in_dim, num_classes, canonical_etypes): ... super(Model, self).__init__() ... self.etype_weights = nn.ModuleDict({ ... '_'.join(c_etype): nn.Linear(in_dim, num_classes) ... for c_etype in canonical_etypes ... }) ... ... def forward(self, graph, feat, eweight=None): ... with graph.local_scope(): ... c_etype_func_dict = {} ... for c_etype in graph.canonical_etypes: ... src_type, etype, dst_type = c_etype ... wh = self.etype_weights['_'.join(c_etype)](feat[src_type]) ... graph.nodes[src_type].data[f'h_{c_etype}'] = wh ... if eweight is None: ... c_etype_func_dict[c_etype] = (fn.copy_u(f'h_{c_etype}', 'm'), ... fn.mean('m', 'h')) ... else: ... graph.edges[c_etype].data['w'] = eweight[c_etype] ... c_etype_func_dict[c_etype] = ( ... fn.u_mul_e(f'h_{c_etype}', 'w', 'm'), fn.mean('m', 'h')) ... graph.multi_update_all(c_etype_func_dict, 'sum') ... hg = 0 ... for ntype in graph.ntypes: ... if graph.num_nodes(ntype): ... hg = hg + dgl.mean_nodes(graph, 'h', ntype=ntype) ... return hg >>> input_dim = 5 >>> num_classes = 2 >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 1, 1])}) >>> g.nodes['user'].data['h'] = th.randn(g.num_nodes('user'), input_dim) >>> g.nodes['game'].data['h'] = th.randn(g.num_nodes('game'), input_dim) >>> transform = dgl.transforms.AddReverse() >>> g = transform(g) >>> # define and train the model >>> model = Model(input_dim, num_classes, g.canonical_etypes) >>> feat = g.ndata['h'] >>> optimizer = th.optim.Adam(model.parameters()) >>> for epoch in range(10): ... logits = model(g, feat) ... loss = F.cross_entropy(logits, th.tensor([1])) ... optimizer.zero_grad() ... loss.backward() ... optimizer.step() >>> # Explain for the graph >>> explainer = HeteroGNNExplainer(model, num_hops=1) >>> feat_mask, edge_mask = explainer.explain_graph(g, feat) >>> feat_mask {'game': tensor([0.2684, 0.2597, 0.3135, 0.2976, 0.2607]), 'user': tensor([0.2216, 0.2908, 0.2644, 0.2738, 0.2663])} >>> edge_mask {('game', 'rev_plays', 'user'): tensor([0.8922, 0.1966, 0.8371, 0.1330]), ('user', 'plays', 'game'): tensor([0.1785, 0.1696, 0.8065, 0.2167])} """ self.model = self.model.to(graph.device) self.model.eval() # Get the initial prediction. with torch.no_grad(): logits = self.model(graph=graph, feat=feat, **kwargs) pred_label = logits.argmax(dim=-1) feat_mask, edge_mask = self._init_masks(graph, feat) params = [*feat_mask.values(), *edge_mask.values()] optimizer = torch.optim.Adam(params, lr=self.lr) if self.log: pbar = tqdm(total=self.num_epochs) pbar.set_description("Explain graph") for _ in range(self.num_epochs): optimizer.zero_grad() h = {} for node_type, node_feat in feat.items(): h[node_type] = node_feat * feat_mask[node_type].sigmoid() eweight = {} for canonical_etype, canonical_etype_mask in edge_mask.items(): eweight[canonical_etype] = canonical_etype_mask.sigmoid() logits = self.model(graph=graph, feat=h, eweight=eweight, **kwargs) log_probs = logits.log_softmax(dim=-1) loss = -log_probs[0, pred_label[0]] loss = self._loss_regularize(loss, feat_mask, edge_mask) loss.backward() optimizer.step() if self.log: pbar.update(1) if self.log: pbar.close() for node_type in feat_mask: feat_mask[node_type] = ( feat_mask[node_type].detach().sigmoid().squeeze() ) for canonical_etype in edge_mask: edge_mask[canonical_etype] = ( edge_mask[canonical_etype].detach().sigmoid() ) return feat_mask, edge_mask ================================================ FILE: python/dgl/nn/pytorch/explain/pgexplainer.py ================================================ """Torch Module for PGExplainer""" import math import torch import torch.nn as nn import torch.nn.functional as F from .... import batch, ETYPE, khop_in_subgraph, NID, to_homogeneous __all__ = ["PGExplainer", "HeteroPGExplainer"] class PGExplainer(nn.Module): r"""PGExplainer from `Parameterized Explainer for Graph Neural Network ` PGExplainer adopts a deep neural network (explanation network) to parameterize the generation process of explanations, which enables it to explain multiple instances collectively. PGExplainer models the underlying structure as edge distributions, from which the explanatory graph is sampled. Parameters ---------- model : nn.Module The GNN model to explain that tackles multiclass graph classification * Its forward function must have the form :attr:`forward(self, graph, nfeat, embed, edge_weight)`. * The output of its forward function is the logits if embed=False else the intermediate node embeddings. num_features : int Node embedding size used by :attr:`model`. num_hops : int, optional The number of hops for GNN information aggregation, which must match the number of message passing layers employed by the GNN to be explained. explain_graph : bool, optional Whether to initialize the model for graph-level or node-level predictions. coff_budget : float, optional Size regularization to constrain the explanation size. Default: 0.01. coff_connect : float, optional Entropy regularization to constrain the connectivity of explanation. Default: 5e-4. sample_bias : float, optional Some members of a population are systematically more likely to be selected in a sample than others. Default: 0.0. """ def __init__( self, model, num_features, num_hops=None, explain_graph=True, coff_budget=0.01, coff_connect=5e-4, sample_bias=0.0, ): super(PGExplainer, self).__init__() self.model = model self.graph_explanation = explain_graph # Node explanation requires additional self-embedding data. self.num_features = num_features * (2 if self.graph_explanation else 3) self.num_hops = num_hops # training hyperparameters for PGExplainer self.coff_budget = coff_budget self.coff_connect = coff_connect self.sample_bias = sample_bias self.init_bias = 0.0 # Explanation network in PGExplainer self.elayers = nn.Sequential( nn.Linear(self.num_features, 64), nn.ReLU(), nn.Linear(64, 1) ) def set_masks(self, graph, edge_mask=None): r"""Set the edge mask that plays a crucial role to explain the prediction made by the GNN for a graph. Initialize learnable edge mask if it is None. Parameters ---------- graph : DGLGraph A homogeneous graph. edge_mask : Tensor, optional Learned importance mask of the edges in the graph, which is a tensor of shape :math:`(E)`, where :math:`E` is the number of edges in the graph. The values are within range :math:`(0, 1)`. The higher, the more important. Default: None. """ if edge_mask is None: num_nodes = graph.num_nodes() num_edges = graph.num_edges() init_bias = self.init_bias std = nn.init.calculate_gain("relu") * math.sqrt( 2.0 / (2 * num_nodes) ) self.edge_mask = torch.randn(num_edges) * std + init_bias else: self.edge_mask = edge_mask self.edge_mask = self.edge_mask.to(graph.device) def clear_masks(self): r"""Clear the edge mask that play a crucial role to explain the prediction made by the GNN for a graph. """ self.edge_mask = None def parameters(self): r""" Returns an iterator over the `Parameter` objects of the `nn.Linear` layers in the `self.elayers` sequential module. Each `Parameter` object contains the weight and bias parameters of an `nn.Linear` layer, as learned during training. Returns ------- iterator An iterator over the `Parameter` objects of the `nn.Linear` layers in the `self.elayers` sequential module. """ return self.elayers.parameters() def loss(self, prob, ori_pred): r"""The loss function that is used to learn the edge distribution. Parameters ---------- prob: Tensor Tensor contains a set of probabilities for each possible class label of some model for all the batched graphs, which is of shape :math:`(B, L)`, where :math:`L` is the different types of label in the dataset and :math:`B` is the batch size. ori_pred: Tensor Tensor of shape :math:`(B, 1)`, representing the original prediction for the graph, where :math:`B` is the batch size. Returns ------- float The function that returns the sum of the three loss components, which is a scalar tensor representing the total loss. """ target_prob = prob.gather(-1, ori_pred.unsqueeze(-1)) # 1e-6 added to prob to avoid taking the logarithm of zero target_prob += 1e-6 # computing the log likelihood for a single prediction pred_loss = torch.mean(-torch.log(target_prob)) # size edge_mask = self.sparse_mask_values if self.coff_budget <= 0: size_loss = self.coff_budget * torch.sum(edge_mask) else: size_loss = self.coff_budget * F.relu( torch.sum(edge_mask) - self.coff_budget ) # entropy scale = 0.99 edge_mask = self.edge_mask * (2 * scale - 1.0) + (1.0 - scale) mask_ent = -edge_mask * torch.log(edge_mask) - ( 1 - edge_mask ) * torch.log(1 - edge_mask) mask_ent_loss = self.coff_connect * torch.mean(mask_ent) loss = pred_loss + size_loss + mask_ent_loss return loss def concrete_sample(self, w, beta=1.0, training=True): r"""Sample from the instantiation of concrete distribution when training. Parameters ---------- w : Tensor A tensor representing the log of the prior probability of choosing the edges. beta : float, optional Controls the degree of randomness in the output of the sigmoid function. training : bool, optional Randomness is injected during training. Returns ------- Tensor If training is set to True, the output is a tensor of probabilities that represent the probability of activating the gate for each input element. If training is set to False, the output is also a tensor of probabilities, but they are determined solely by the log_alpha values, without adding any random noise. """ if training: bias = self.sample_bias random_noise = torch.rand(w.size()).to(w.device) random_noise = bias + (1 - 2 * bias) * random_noise gate_inputs = torch.log(random_noise) - torch.log( 1.0 - random_noise ) gate_inputs = (gate_inputs + w) / beta gate_inputs = torch.sigmoid(gate_inputs) else: gate_inputs = torch.sigmoid(w) return gate_inputs def train_step(self, graph, feat, temperature, **kwargs): r"""Compute the loss of the explanation network for graph classification Parameters ---------- graph : DGLGraph Input batched homogeneous graph. feat : Tensor The input feature of shape :math:`(N, D)`. :math:`N` is the number of nodes, and :math:`D` is the feature size. temperature : float The temperature parameter fed to the sampling procedure. kwargs : dict Additional arguments passed to the GNN model. Returns ------- Tensor A scalar tensor representing the loss. """ assert ( self.graph_explanation ), '"explain_graph" must be True when initializing the module.' self.model = self.model.to(graph.device) self.elayers = self.elayers.to(graph.device) pred = self.model(graph, feat, embed=False, **kwargs) pred = pred.argmax(-1).data prob, _ = self.explain_graph( graph, feat, temperature, training=True, **kwargs ) loss = self.loss(prob, pred) return loss def train_step_node(self, nodes, graph, feat, temperature, **kwargs): r"""Compute the loss of the explanation network for node classification Parameters ---------- nodes : int, iterable[int], tensor The nodes from the graph used to train the explanation network, which cannot have any duplicate value. graph : DGLGraph Input homogeneous graph. feat : Tensor The input feature of shape :math:`(N, D)`. :math:`N` is the number of nodes, and :math:`D` is the feature size. temperature : float The temperature parameter fed to the sampling procedure. kwargs : dict Additional arguments passed to the GNN model. Returns ------- Tensor A scalar tensor representing the loss. """ assert ( not self.graph_explanation ), '"explain_graph" must be False when initializing the module.' self.model = self.model.to(graph.device) self.elayers = self.elayers.to(graph.device) if isinstance(nodes, torch.Tensor): nodes = nodes.tolist() if isinstance(nodes, int): nodes = [nodes] prob, _, batched_graph, inverse_indices = self.explain_node( nodes, graph, feat, temperature, training=True, **kwargs ) pred = self.model( batched_graph, self.batched_feats, embed=False, **kwargs ) pred = pred.argmax(-1).data loss = self.loss(prob[inverse_indices], pred[inverse_indices]) return loss def explain_graph( self, graph, feat, temperature=1.0, training=False, **kwargs ): r"""Learn and return an edge mask that plays a crucial role to explain the prediction made by the GNN for a graph. Also, return the prediction made with the edges chosen based on the edge mask. Parameters ---------- graph : DGLGraph A homogeneous graph. feat : Tensor The input feature of shape :math:`(N, D)`. :math:`N` is the number of nodes, and :math:`D` is the feature size. temperature : float The temperature parameter fed to the sampling procedure. training : bool Training the explanation network. kwargs : dict Additional arguments passed to the GNN model. Returns ------- Tensor Classification probabilities given the masked graph. It is a tensor of shape :math:`(B, L)`, where :math:`L` is the different types of label in the dataset, and :math:`B` is the batch size. Tensor Edge weights which is a tensor of shape :math:`(E)`, where :math:`E` is the number of edges in the graph. A higher weight suggests a larger contribution of the edge. Examples -------- >>> import torch as th >>> import torch.nn as nn >>> import dgl >>> from dgl.data import GINDataset >>> from dgl.dataloading import GraphDataLoader >>> from dgl.nn import GraphConv, PGExplainer >>> import numpy as np >>> # Define the model >>> class Model(nn.Module): ... def __init__(self, in_feats, out_feats): ... super().__init__() ... self.conv = GraphConv(in_feats, out_feats) ... self.fc = nn.Linear(out_feats, out_feats) ... nn.init.xavier_uniform_(self.fc.weight) ... ... def forward(self, g, h, embed=False, edge_weight=None): ... h = self.conv(g, h, edge_weight=edge_weight) ... ... if embed: ... return h ... ... with g.local_scope(): ... g.ndata['h'] = h ... hg = dgl.mean_nodes(g, 'h') ... return self.fc(hg) >>> # Load dataset >>> data = GINDataset('MUTAG', self_loop=True) >>> dataloader = GraphDataLoader(data, batch_size=64, shuffle=True) >>> # Train the model >>> feat_size = data[0][0].ndata['attr'].shape[1] >>> model = Model(feat_size, data.gclasses) >>> criterion = nn.CrossEntropyLoss() >>> optimizer = th.optim.Adam(model.parameters(), lr=1e-2) >>> for bg, labels in dataloader: ... preds = model(bg, bg.ndata['attr']) ... loss = criterion(preds, labels) ... optimizer.zero_grad() ... loss.backward() ... optimizer.step() >>> # Initialize the explainer >>> explainer = PGExplainer(model, data.gclasses) >>> # Train the explainer >>> # Define explainer temperature parameter >>> init_tmp, final_tmp = 5.0, 1.0 >>> optimizer_exp = th.optim.Adam(explainer.parameters(), lr=0.01) >>> for epoch in range(20): ... tmp = float(init_tmp * np.power(final_tmp / init_tmp, epoch / 20)) ... for bg, labels in dataloader: ... loss = explainer.train_step(bg, bg.ndata['attr'], tmp) ... optimizer_exp.zero_grad() ... loss.backward() ... optimizer_exp.step() >>> # Explain the prediction for graph 0 >>> graph, l = data[0] >>> graph_feat = graph.ndata.pop("attr") >>> probs, edge_weight = explainer.explain_graph(graph, graph_feat) """ assert ( self.graph_explanation ), '"explain_graph" must be True when initializing the module.' self.model = self.model.to(graph.device) self.elayers = self.elayers.to(graph.device) embed = self.model(graph, feat, embed=True, **kwargs) embed = embed.data col, row = graph.edges() col_emb = embed[col.long()] row_emb = embed[row.long()] emb = torch.cat([col_emb, row_emb], dim=-1) emb = self.elayers(emb) values = emb.reshape(-1) values = self.concrete_sample( values, beta=temperature, training=training ) self.sparse_mask_values = values reverse_eids = graph.edge_ids(row, col).long() edge_mask = (values + values[reverse_eids]) / 2 self.set_masks(graph, edge_mask) # the model prediction with the updated edge mask logits = self.model(graph, feat, edge_weight=self.edge_mask, **kwargs) probs = F.softmax(logits, dim=-1) if training: probs = probs.data else: self.clear_masks() return (probs, edge_mask) def explain_node( self, nodes, graph, feat, temperature=1.0, training=False, **kwargs ): r"""Learn and return an edge mask that plays a crucial role to explain the prediction made by the GNN for provided set of node IDs. Also, return the prediction made with the graph and edge mask. Parameters ---------- nodes : int, iterable[int], tensor The nodes from the graph, which cannot have any duplicate value. graph : DGLGraph A homogeneous graph. feat : Tensor The input feature of shape :math:`(N, D)`. :math:`N` is the number of nodes, and :math:`D` is the feature size. temperature : float The temperature parameter fed to the sampling procedure. training : bool Training the explanation network. kwargs : dict Additional arguments passed to the GNN model. Returns ------- Tensor Classification probabilities given the masked graph. It is a tensor of shape :math:`(N, L)`, where :math:`L` is the different types of node labels in the dataset, and :math:`N` is the number of nodes in the graph. Tensor Edge weights which is a tensor of shape :math:`(E)`, where :math:`E` is the number of edges in the graph. A higher weight suggests a larger contribution of the edge. DGLGraph The batched set of subgraphs induced on the k-hop in-neighborhood of the input center nodes. Tensor The new IDs of the subgraph center nodes. Examples -------- >>> import dgl >>> import numpy as np >>> import torch >>> # Define the model >>> class Model(torch.nn.Module): ... def __init__(self, in_feats, out_feats): ... super().__init__() ... self.conv1 = dgl.nn.GraphConv(in_feats, out_feats) ... self.conv2 = dgl.nn.GraphConv(out_feats, out_feats) ... ... def forward(self, g, h, embed=False, edge_weight=None): ... h = self.conv1(g, h, edge_weight=edge_weight) ... if embed: ... return h ... return self.conv2(g, h) >>> # Load dataset >>> data = dgl.data.CoraGraphDataset(verbose=False) >>> g = data[0] >>> features = g.ndata["feat"] >>> labels = g.ndata["label"] >>> # Train the model >>> model = Model(features.shape[1], data.num_classes) >>> criterion = torch.nn.CrossEntropyLoss() >>> optimizer = torch.optim.Adam(model.parameters(), lr=1e-2) >>> for epoch in range(20): ... logits = model(g, features) ... loss = criterion(logits, labels) ... optimizer.zero_grad() ... loss.backward() ... optimizer.step() >>> # Initialize the explainer >>> explainer = dgl.nn.PGExplainer( ... model, data.num_classes, num_hops=2, explain_graph=False ... ) >>> # Train the explainer >>> # Define explainer temperature parameter >>> init_tmp, final_tmp = 5.0, 1.0 >>> optimizer_exp = torch.optim.Adam(explainer.parameters(), lr=0.01) >>> epochs = 10 >>> for epoch in range(epochs): ... tmp = float(init_tmp * np.power(final_tmp / init_tmp, epoch / epochs)) ... loss = explainer.train_step_node(g.nodes(), g, features, tmp) ... optimizer_exp.zero_grad() ... loss.backward() ... optimizer_exp.step() >>> # Explain the prediction for graph 0 >>> probs, edge_weight, bg, inverse_indices = explainer.explain_node( ... 0, g, features ... ) """ assert ( not self.graph_explanation ), '"explain_graph" must be False when initializing the module.' assert ( self.num_hops is not None ), '"num_hops" must be provided when initializing the module.' if isinstance(nodes, torch.Tensor): nodes = nodes.tolist() if isinstance(nodes, int): nodes = [nodes] self.model = self.model.to(graph.device) self.elayers = self.elayers.to(graph.device) batched_graph = [] batched_embed = [] for node_id in nodes: sg, inverse_indices = khop_in_subgraph( graph, node_id, self.num_hops ) sg.ndata["feat"] = feat[sg.ndata[NID].long()] sg.ndata["train"] = torch.tensor( [nid in inverse_indices for nid in sg.nodes()], device=sg.device ) embed = self.model(sg, sg.ndata["feat"], embed=True, **kwargs) embed = embed.data col, row = sg.edges() col_emb = embed[col.long()] row_emb = embed[row.long()] self_emb = embed[inverse_indices[0]].repeat(sg.num_edges(), 1) emb = torch.cat([col_emb, row_emb, self_emb], dim=-1) batched_embed.append(emb) batched_graph.append(sg) batched_graph = batch(batched_graph) batched_embed = torch.cat(batched_embed) batched_embed = self.elayers(batched_embed) values = batched_embed.reshape(-1) values = self.concrete_sample( values, beta=temperature, training=training ) self.sparse_mask_values = values col, row = batched_graph.edges() reverse_eids = batched_graph.edge_ids(row, col).long() edge_mask = (values + values[reverse_eids]) / 2 self.set_masks(batched_graph, edge_mask) batched_feats = batched_graph.ndata["feat"] # the model prediction with the updated edge mask logits = self.model( batched_graph, batched_feats, edge_weight=self.edge_mask, **kwargs ) probs = F.softmax(logits, dim=-1) batched_inverse_indices = ( batched_graph.ndata["train"].nonzero().squeeze(1) ) if training: self.batched_feats = batched_feats probs = probs.data else: self.clear_masks() return ( probs, edge_mask, batched_graph, batched_inverse_indices, ) class HeteroPGExplainer(PGExplainer): r"""PGExplainer from `Parameterized Explainer for Graph Neural Network `__, adapted for heterogeneous graphs PGExplainer adopts a deep neural network (explanation network) to parameterize the generation process of explanations, which enables it to explain multiple instances collectively. PGExplainer models the underlying structure as edge distributions, from which the explanatory graph is sampled. Parameters ---------- model : nn.Module The GNN model to explain that tackles multiclass graph classification * Its forward function must have the form :attr:`forward(self, graph, nfeat, embed, edge_weight)`. * The output of its forward function is the logits if embed=False else the intermediate node embeddings. num_features : int Node embedding size used by :attr:`model`. coff_budget : float, optional Size regularization to constrain the explanation size. Default: 0.01. coff_connect : float, optional Entropy regularization to constrain the connectivity of explanation. Default: 5e-4. sample_bias : float, optional Some members of a population are systematically more likely to be selected in a sample than others. Default: 0.0. """ def train_step(self, graph, feat, temperature, **kwargs): # pylint: disable=useless-super-delegation r"""Compute the loss of the explanation network for graph classification Parameters ---------- graph : DGLGraph Input batched heterogeneous graph. feat : dict[str, Tensor] A dict mapping node types (keys) to feature tensors (values). The input features are of shape :math:`(N_t, D_t)`. :math:`N_t` is the number of nodes for node type :math:`t`, and :math:`D_t` is the feature size for node type :math:`t` temperature : float The temperature parameter fed to the sampling procedure. kwargs : dict Additional arguments passed to the GNN model. Returns ------- Tensor A scalar tensor representing the loss. """ return super().train_step(graph, feat, temperature, **kwargs) def train_step_node(self, nodes, graph, feat, temperature, **kwargs): r"""Compute the loss of the explanation network for node classification Parameters ---------- nodes : dict[str, Iterable[int]] A dict mapping node types (keys) to an iterable set of node ids (values). graph : DGLGraph Input heterogeneous graph. feat : dict[str, Tensor] A dict mapping node types (keys) to feature tensors (values). The input features are of shape :math:`(N_t, D_t)`. :math:`N_t` is the number of nodes for node type :math:`t`, and :math:`D_t` is the feature size for node type :math:`t` temperature : float The temperature parameter fed to the sampling procedure. kwargs : dict Additional arguments passed to the GNN model. Returns ------- Tensor A scalar tensor representing the loss. """ assert ( not self.graph_explanation ), '"explain_graph" must be False when initializing the module.' self.model = self.model.to(graph.device) self.elayers = self.elayers.to(graph.device) prob, _, batched_graph, inverse_indices = self.explain_node( nodes, graph, feat, temperature, training=True, **kwargs ) pred = self.model( batched_graph, self.batched_feats, embed=False, **kwargs ) pred = {ntype: pred[ntype].argmax(-1).data for ntype in pred.keys()} loss = self.loss( torch.cat( [prob[ntype][nid] for ntype, nid in inverse_indices.items()] ), torch.cat( [pred[ntype][nid] for ntype, nid in inverse_indices.items()] ), ) return loss def explain_graph( self, graph, feat, temperature=1.0, training=False, **kwargs ): r"""Learn and return an edge mask that plays a crucial role to explain the prediction made by the GNN for a graph. Also, return the prediction made with the edges chosen based on the edge mask. Parameters ---------- graph : DGLGraph A heterogeneous graph. feat : dict[str, Tensor] A dict mapping node types (keys) to feature tensors (values). The input features are of shape :math:`(N_t, D_t)`. :math:`N_t` is the number of nodes for node type :math:`t`, and :math:`D_t` is the feature size for node type :math:`t` temperature : float The temperature parameter fed to the sampling procedure. training : bool Training the explanation network. kwargs : dict Additional arguments passed to the GNN model. Returns ------- Tensor Classification probabilities given the masked graph. It is a tensor of shape :math:`(B, L)`, where :math:`L` is the different types of label in the dataset, and :math:`B` is the batch size. dict[str, Tensor] A dict mapping edge types (keys) to edge tensors (values) of shape :math:`(E_t)`, where :math:`E_t` is the number of edges in the graph for edge type :math:`t`. A higher weight suggests a larger contribution of the edge. Examples -------- >>> import dgl >>> import torch as th >>> import torch.nn as nn >>> import numpy as np >>> # Define the model >>> class Model(nn.Module): ... def __init__(self, in_feats, hid_feats, out_feats, rel_names): ... super().__init__() ... self.conv = dgl.nn.HeteroGraphConv( ... {rel: dgl.nn.GraphConv(in_feats, hid_feats) for rel in rel_names}, ... aggregate="sum", ... ) ... self.fc = nn.Linear(hid_feats, out_feats) ... nn.init.xavier_uniform_(self.fc.weight) ... ... def forward(self, g, h, embed=False, edge_weight=None): ... if edge_weight: ... mod_kwargs = { ... etype: {"edge_weight": mask} for etype, mask in edge_weight.items() ... } ... h = self.conv(g, h, mod_kwargs=mod_kwargs) ... else: ... h = self.conv(g, h) ... ... if embed: ... return h ... ... with g.local_scope(): ... g.ndata["h"] = h ... hg = 0 ... for ntype in g.ntypes: ... hg = hg + dgl.mean_nodes(g, "h", ntype=ntype) ... return self.fc(hg) >>> # Load dataset >>> input_dim = 5 >>> hidden_dim = 5 >>> num_classes = 2 >>> g = dgl.heterograph({("user", "plays", "game"): ([0, 1, 1, 2], [0, 0, 1, 1])}) >>> g.nodes["user"].data["h"] = th.randn(g.num_nodes("user"), input_dim) >>> g.nodes["game"].data["h"] = th.randn(g.num_nodes("game"), input_dim) >>> transform = dgl.transforms.AddReverse() >>> g = transform(g) >>> # define and train the model >>> model = Model(input_dim, hidden_dim, num_classes, g.canonical_etypes) >>> optimizer = th.optim.Adam(model.parameters()) >>> for epoch in range(10): ... logits = model(g, g.ndata["h"]) ... loss = th.nn.functional.cross_entropy(logits, th.tensor([1])) ... optimizer.zero_grad() ... loss.backward() ... optimizer.step() >>> # Initialize the explainer >>> explainer = dgl.nn.HeteroPGExplainer(model, hidden_dim) >>> # Train the explainer >>> # Define explainer temperature parameter >>> init_tmp, final_tmp = 5.0, 1.0 >>> optimizer_exp = th.optim.Adam(explainer.parameters(), lr=0.01) >>> for epoch in range(20): ... tmp = float(init_tmp * np.power(final_tmp / init_tmp, epoch / 20)) ... loss = explainer.train_step(g, g.ndata["h"], tmp) ... optimizer_exp.zero_grad() ... loss.backward() ... optimizer_exp.step() >>> # Explain the graph >>> feat = g.ndata.pop("h") >>> probs, edge_mask = explainer.explain_graph(g, feat) """ assert ( self.graph_explanation ), '"explain_graph" must be True when initializing the module.' self.model = self.model.to(graph.device) self.elayers = self.elayers.to(graph.device) embed = self.model(graph, feat, embed=True, **kwargs) for ntype, emb in embed.items(): graph.nodes[ntype].data["emb"] = emb.data homo_graph = to_homogeneous(graph, ndata=["emb"]) homo_embed = homo_graph.ndata["emb"] col, row = homo_graph.edges() col_emb = homo_embed[col.long()] row_emb = homo_embed[row.long()] emb = torch.cat([col_emb, row_emb], dim=-1) emb = self.elayers(emb) values = emb.reshape(-1) values = self.concrete_sample( values, beta=temperature, training=training ) self.sparse_mask_values = values reverse_eids = homo_graph.edge_ids(row, col).long() edge_mask = (values + values[reverse_eids]) / 2 self.set_masks(homo_graph, edge_mask) # convert the edge mask back into heterogeneous format hetero_edge_mask = self._edge_mask_to_heterogeneous( edge_mask=edge_mask, homograph=homo_graph, heterograph=graph, ) # the model prediction with the updated edge mask logits = self.model(graph, feat, edge_weight=hetero_edge_mask, **kwargs) probs = F.softmax(logits, dim=-1) if training: probs = probs.data else: self.clear_masks() return (probs, hetero_edge_mask) def explain_node( self, nodes, graph, feat, temperature=1.0, training=False, **kwargs ): r"""Learn and return an edge mask that plays a crucial role to explain the prediction made by the GNN for provided set of node IDs. Also, return the prediction made with the batched graph and edge mask. Parameters ---------- nodes : dict[str, Iterable[int]] A dict mapping node types (keys) to an iterable set of node ids (values). graph : DGLGraph A heterogeneous graph. feat : dict[str, Tensor] A dict mapping node types (keys) to feature tensors (values). The input features are of shape :math:`(N_t, D_t)`. :math:`N_t` is the number of nodes for node type :math:`t`, and :math:`D_t` is the feature size for node type :math:`t` temperature : float The temperature parameter fed to the sampling procedure. training : bool Training the explanation network. kwargs : dict Additional arguments passed to the GNN model. Returns ------- dict[str, Tensor] A dict mapping node types (keys) to classification probabilities for node labels (values). The values are tensors of shape :math:`(N_t, L)`, where :math:`L` is the different types of node labels in the dataset, and :math:`N_t` is the number of nodes in the graph for node type :math:`t`. dict[str, Tensor] A dict mapping edge types (keys) to edge tensors (values) of shape :math:`(E_t)`, where :math:`E_t` is the number of edges in the graph for edge type :math:`t`. A higher weight suggests a larger contribution of the edge. DGLGraph The batched set of subgraphs induced on the k-hop in-neighborhood of the input center nodes. dict[str, Tensor] A dict mapping node types (keys) to a tensor of node IDs (values) which correspond to the subgraph center nodes. Examples -------- >>> import dgl >>> import torch as th >>> import torch.nn as nn >>> import numpy as np >>> # Define the model >>> class Model(nn.Module): ... def __init__(self, in_feats, hid_feats, out_feats, rel_names): ... super().__init__() ... self.conv = dgl.nn.HeteroGraphConv( ... {rel: dgl.nn.GraphConv(in_feats, hid_feats) for rel in rel_names}, ... aggregate="sum", ... ) ... self.fc = nn.Linear(hid_feats, out_feats) ... nn.init.xavier_uniform_(self.fc.weight) ... ... def forward(self, g, h, embed=False, edge_weight=None): ... if edge_weight: ... mod_kwargs = { ... etype: {"edge_weight": mask} for etype, mask in edge_weight.items() ... } ... h = self.conv(g, h, mod_kwargs=mod_kwargs) ... else: ... h = self.conv(g, h) ... ... return h >>> # Load dataset >>> input_dim = 5 >>> hidden_dim = 5 >>> num_classes = 2 >>> g = dgl.heterograph({("user", "plays", "game"): ([0, 1, 1, 2], [0, 0, 1, 1])}) >>> g.nodes["user"].data["h"] = th.randn(g.num_nodes("user"), input_dim) >>> g.nodes["game"].data["h"] = th.randn(g.num_nodes("game"), input_dim) >>> transform = dgl.transforms.AddReverse() >>> g = transform(g) >>> # define and train the model >>> model = Model(input_dim, hidden_dim, num_classes, g.canonical_etypes) >>> optimizer = th.optim.Adam(model.parameters()) >>> for epoch in range(10): ... logits = model(g, g.ndata["h"])['user'] ... loss = th.nn.functional.cross_entropy(logits, th.tensor([1,1,1])) ... optimizer.zero_grad() ... loss.backward() ... optimizer.step() >>> # Initialize the explainer >>> explainer = dgl.nn.HeteroPGExplainer( ... model, hidden_dim, num_hops=2, explain_graph=False ... ) >>> # Train the explainer >>> # Define explainer temperature parameter >>> init_tmp, final_tmp = 5.0, 1.0 >>> optimizer_exp = th.optim.Adam(explainer.parameters(), lr=0.01) >>> for epoch in range(20): ... tmp = float(init_tmp * np.power(final_tmp / init_tmp, epoch / 20)) ... loss = explainer.train_step_node( ... { ntype: g.nodes(ntype) for ntype in g.ntypes }, ... g, g.ndata["h"], tmp ... ) ... optimizer_exp.zero_grad() ... loss.backward() ... optimizer_exp.step() >>> # Explain the graph >>> feat = g.ndata.pop("h") >>> probs, edge_mask, bg, inverse_indices = explainer.explain_node( ... { "user": [0] }, g, feat ... ) """ assert ( not self.graph_explanation ), '"explain_graph" must be False when initializing the module.' assert ( self.num_hops is not None ), '"num_hops" must be provided when initializing the module.' self.model = self.model.to(graph.device) self.elayers = self.elayers.to(graph.device) batched_embed = [] batched_homo_graph = [] batched_hetero_graph = [] for target_ntype, target_nids in nodes.items(): if isinstance(target_nids, torch.Tensor): target_nids = target_nids.tolist() for target_nid in target_nids: sg, inverse_indices = khop_in_subgraph( graph, {target_ntype: target_nid}, self.num_hops ) for sg_ntype in sg.ntypes: sg_feat = feat[sg_ntype][sg.ndata[NID][sg_ntype].long()] train_mask = [ sg_ntype in inverse_indices and node_id in inverse_indices[sg_ntype] for node_id in sg.nodes(sg_ntype) ] sg.nodes[sg_ntype].data["feat"] = sg_feat sg.nodes[sg_ntype].data["train"] = torch.tensor( train_mask, device=sg.device ) embed = self.model(sg, sg.ndata["feat"], embed=True, **kwargs) for ntype in embed.keys(): sg.nodes[ntype].data["emb"] = embed[ntype].data homo_sg = to_homogeneous(sg, ndata=["emb"]) homo_sg_embed = homo_sg.ndata["emb"] col, row = homo_sg.edges() col_emb = homo_sg_embed[col.long()] row_emb = homo_sg_embed[row.long()] self_emb = homo_sg_embed[ inverse_indices[target_ntype][0] ].repeat(sg.num_edges(), 1) emb = torch.cat([col_emb, row_emb, self_emb], dim=-1) batched_embed.append(emb) batched_homo_graph.append(homo_sg) batched_hetero_graph.append(sg) batched_homo_graph = batch(batched_homo_graph) batched_hetero_graph = batch(batched_hetero_graph) batched_embed = torch.cat(batched_embed) batched_embed = self.elayers(batched_embed) values = batched_embed.reshape(-1) values = self.concrete_sample( values, beta=temperature, training=training ) self.sparse_mask_values = values col, row = batched_homo_graph.edges() reverse_eids = batched_homo_graph.edge_ids(row, col).long() edge_mask = (values + values[reverse_eids]) / 2 self.set_masks(batched_homo_graph, edge_mask) # Convert the edge mask back into heterogeneous format. hetero_edge_mask = self._edge_mask_to_heterogeneous( edge_mask=edge_mask, homograph=batched_homo_graph, heterograph=batched_hetero_graph, ) batched_feats = { ntype: batched_hetero_graph.nodes[ntype].data["feat"] for ntype in batched_hetero_graph.ntypes } # The model prediction with the updated edge mask. logits = self.model( batched_hetero_graph, batched_feats, edge_weight=hetero_edge_mask, **kwargs, ) probs = { ntype: F.softmax(logits[ntype], dim=-1) for ntype in logits.keys() } batched_inverse_indices = { ntype: batched_hetero_graph.nodes[ntype] .data["train"] .nonzero() .squeeze(1) for ntype in batched_hetero_graph.ntypes } if training: self.batched_feats = batched_feats probs = {ntype: probs[ntype].data for ntype in probs.keys()} else: self.clear_masks() return ( probs, hetero_edge_mask, batched_hetero_graph, batched_inverse_indices, ) def _edge_mask_to_heterogeneous(self, edge_mask, homograph, heterograph): r"""Convert an edge mask from homogeneous mappings built through embeddings into heterogenous format by leveraging the context from the source DGLGraphs in homogenous and heterogeneous form. The `edge_mask` needs to have been built using the embedding of the homogenous graph format for the mappings to work correctly. Parameters ---------- edge_mask : dict[str, Tensor] A dict mapping node types (keys) to a tensor of edge weights (values). homograph : DGLGraph The homogeneous form of the source graph. heterograph : DGLGraph The heterogeneous form of the source graph. Returns ------- dict[str, Tensor] A dict mapping node types (keys) to tensors of node ids (values) """ return { etype: edge_mask[ (homograph.edata[ETYPE] == heterograph.get_etype_id(etype)) .nonzero() .squeeze(1) ] for etype in heterograph.canonical_etypes } ================================================ FILE: python/dgl/nn/pytorch/explain/subgraphx.py ================================================ """Torch Module for SubgraphX""" import math import networkx as nx import numpy as np import torch import torch.nn as nn from .... import to_heterogeneous, to_homogeneous from ....base import NID from ....convert import to_networkx from ....subgraph import node_subgraph from ....transforms.functional import remove_nodes __all__ = ["SubgraphX", "HeteroSubgraphX"] class MCTSNode: r"""Monte Carlo Tree Search Node Parameters ---------- nodes : Tensor The node IDs of the graph that are associated with this tree node """ def __init__(self, nodes): self.nodes = nodes self.num_visit = 0 self.total_reward = 0.0 self.immediate_reward = 0.0 self.children = [] def __repr__(self): r"""Get the string representation of the node. Returns ------- str The string representation of the node """ return str(self.nodes) class SubgraphX(nn.Module): r"""SubgraphX from `On Explainability of Graph Neural Networks via Subgraph Explorations ` It identifies the most important subgraph from the original graph that plays a critical role in GNN-based graph classification. It employs Monte Carlo tree search (MCTS) in efficiently exploring different subgraphs for explanation and uses Shapley values as the measure of subgraph importance. Parameters ---------- model : nn.Module The GNN model to explain that tackles multiclass graph classification * Its forward function must have the form :attr:`forward(self, graph, nfeat)`. * The output of its forward function is the logits. num_hops : int Number of message passing layers in the model coef : float, optional This hyperparameter controls the trade-off between exploration and exploitation. A higher value encourages the algorithm to explore relatively unvisited nodes. Default: 10.0 high2low : bool, optional If True, it will use the "High2low" strategy for pruning actions, expanding children nodes from high degree to low degree when extending the children nodes in the search tree. Otherwise, it will use the "Low2high" strategy. Default: True num_child : int, optional This is the number of children nodes to expand when extending the children nodes in the search tree. Default: 12 num_rollouts : int, optional This is the number of rollouts for MCTS. Default: 20 node_min : int, optional This is the threshold to define a leaf node based on the number of nodes in a subgraph. Default: 3 shapley_steps : int, optional This is the number of steps for Monte Carlo sampling in estimating Shapley values. Default: 100 log : bool, optional If True, it will log the progress. Default: False """ def __init__( self, model, num_hops, coef=10.0, high2low=True, num_child=12, num_rollouts=20, node_min=3, shapley_steps=100, log=False, ): super().__init__() self.num_hops = num_hops self.coef = coef self.high2low = high2low self.num_child = num_child self.num_rollouts = num_rollouts self.node_min = node_min self.shapley_steps = shapley_steps self.log = log self.model = model def shapley(self, subgraph_nodes): r"""Compute Shapley value with Monte Carlo approximation. Parameters ---------- subgraph_nodes : tensor The tensor node ids of the subgraph that are associated with this tree node Returns ------- float Shapley value """ num_nodes = self.graph.num_nodes() subgraph_nodes = subgraph_nodes.tolist() # Obtain neighboring nodes of the subgraph g_i, P'. local_region = subgraph_nodes for _ in range(self.num_hops - 1): in_neighbors, _ = self.graph.in_edges(local_region) _, out_neighbors = self.graph.out_edges(local_region) neighbors = torch.cat([in_neighbors, out_neighbors]).tolist() local_region = list(set(local_region + neighbors)) split_point = num_nodes coalition_space = list(set(local_region) - set(subgraph_nodes)) + [ split_point ] marginal_contributions = [] device = self.feat.device for _ in range(self.shapley_steps): permuted_space = np.random.permutation(coalition_space) split_idx = int(np.where(permuted_space == split_point)[0]) selected_nodes = permuted_space[:split_idx] # Mask for coalition set S_i exclude_mask = torch.ones(num_nodes) exclude_mask[local_region] = 0.0 exclude_mask[selected_nodes] = 1.0 # Mask for set S_i and g_i include_mask = exclude_mask.clone() include_mask[subgraph_nodes] = 1.0 exclude_feat = self.feat * exclude_mask.unsqueeze(1).to(device) include_feat = self.feat * include_mask.unsqueeze(1).to(device) with torch.no_grad(): exclude_probs = self.model( self.graph, exclude_feat, **self.kwargs ).softmax(dim=-1) exclude_value = exclude_probs[:, self.target_class] include_probs = self.model( self.graph, include_feat, **self.kwargs ).softmax(dim=-1) include_value = include_probs[:, self.target_class] marginal_contributions.append(include_value - exclude_value) return torch.cat(marginal_contributions).mean().item() def get_mcts_children(self, mcts_node): r"""Get the children of the MCTS node for the search. Parameters ---------- mcts_node : MCTSNode Node in MCTS Returns ------- list Children nodes after pruning """ if len(mcts_node.children) > 0: return mcts_node.children subg = node_subgraph(self.graph, mcts_node.nodes) node_degrees = subg.out_degrees() + subg.in_degrees() k = min(subg.num_nodes(), self.num_child) chosen_nodes = torch.topk( node_degrees, k, largest=self.high2low ).indices mcts_children_maps = dict() for node in chosen_nodes: new_subg = remove_nodes(subg, node.to(subg.idtype), store_ids=True) # Get the largest weakly connected component in the subgraph. nx_graph = to_networkx(new_subg.cpu()) largest_cc_nids = list( max(nx.weakly_connected_components(nx_graph), key=len) ) # Map to the original node IDs. largest_cc_nids = new_subg.ndata[NID][largest_cc_nids].long() largest_cc_nids = subg.ndata[NID][largest_cc_nids].sort().values if str(largest_cc_nids) not in self.mcts_node_maps: child_mcts_node = MCTSNode(largest_cc_nids) self.mcts_node_maps[str(child_mcts_node)] = child_mcts_node else: child_mcts_node = self.mcts_node_maps[str(largest_cc_nids)] if str(child_mcts_node) not in mcts_children_maps: mcts_children_maps[str(child_mcts_node)] = child_mcts_node mcts_node.children = list(mcts_children_maps.values()) for child_mcts_node in mcts_node.children: if child_mcts_node.immediate_reward == 0: child_mcts_node.immediate_reward = self.shapley( child_mcts_node.nodes ) return mcts_node.children def mcts_rollout(self, mcts_node): r"""Perform a MCTS rollout. Parameters ---------- mcts_node : MCTSNode Starting node for MCTS Returns ------- float Reward for visiting the node this time """ if len(mcts_node.nodes) <= self.node_min: return mcts_node.immediate_reward children_nodes = self.get_mcts_children(mcts_node) children_visit_sum = sum([child.num_visit for child in children_nodes]) children_visit_sum_sqrt = math.sqrt(children_visit_sum) chosen_child = max( children_nodes, key=lambda c: c.total_reward / max(c.num_visit, 1) + self.coef * c.immediate_reward * children_visit_sum_sqrt / (1 + c.num_visit), ) reward = self.mcts_rollout(chosen_child) chosen_child.num_visit += 1 chosen_child.total_reward += reward return reward def explain_graph(self, graph, feat, target_class, **kwargs): r"""Find the most important subgraph from the original graph for the model to classify the graph into the target class. Parameters ---------- graph : DGLGraph A homogeneous graph feat : Tensor The input node feature of shape :math:`(N, D)`, :math:`N` is the number of nodes, and :math:`D` is the feature size target_class : int The target class to explain kwargs : dict Additional arguments passed to the GNN model Returns ------- Tensor Nodes that represent the most important subgraph Examples -------- >>> import torch >>> import torch.nn as nn >>> import torch.nn.functional as F >>> from dgl.data import GINDataset >>> from dgl.dataloading import GraphDataLoader >>> from dgl.nn import GraphConv, AvgPooling, SubgraphX >>> # Define the model >>> class Model(nn.Module): ... def __init__(self, in_dim, n_classes, hidden_dim=128): ... super().__init__() ... self.conv1 = GraphConv(in_dim, hidden_dim) ... self.conv2 = GraphConv(hidden_dim, n_classes) ... self.pool = AvgPooling() ... ... def forward(self, g, h): ... h = F.relu(self.conv1(g, h)) ... h = self.conv2(g, h) ... return self.pool(g, h) >>> # Load dataset >>> data = GINDataset('MUTAG', self_loop=True) >>> dataloader = GraphDataLoader(data, batch_size=64, shuffle=True) >>> # Train the model >>> feat_size = data[0][0].ndata['attr'].shape[1] >>> model = Model(feat_size, data.gclasses) >>> criterion = nn.CrossEntropyLoss() >>> optimizer = torch.optim.Adam(model.parameters(), lr=1e-2) >>> for bg, labels in dataloader: ... logits = model(bg, bg.ndata['attr']) ... loss = criterion(logits, labels) ... optimizer.zero_grad() ... loss.backward() ... optimizer.step() >>> # Initialize the explainer >>> explainer = SubgraphX(model, num_hops=2) >>> # Explain the prediction for graph 0 >>> graph, l = data[0] >>> graph_feat = graph.ndata.pop("attr") >>> g_nodes_explain = explainer.explain_graph(graph, graph_feat, ... target_class=l) """ self.model.eval() assert ( graph.num_nodes() > self.node_min ), f"The number of nodes in the\ graph {graph.num_nodes()} should be bigger than {self.node_min}." self.graph = graph self.feat = feat self.target_class = target_class self.kwargs = kwargs # book all nodes in MCTS self.mcts_node_maps = dict() root = MCTSNode(graph.nodes()) self.mcts_node_maps[str(root)] = root for i in range(self.num_rollouts): if self.log: print( f"Rollout {i}/{self.num_rollouts}, \ {len(self.mcts_node_maps)} subgraphs have been explored." ) self.mcts_rollout(root) best_leaf = None best_immediate_reward = float("-inf") for mcts_node in self.mcts_node_maps.values(): if len(mcts_node.nodes) > self.node_min: continue if mcts_node.immediate_reward > best_immediate_reward: best_leaf = mcts_node best_immediate_reward = best_leaf.immediate_reward return best_leaf.nodes class HeteroSubgraphX(nn.Module): r"""SubgraphX from `On Explainability of Graph Neural Networks via Subgraph Explorations `__, adapted for heterogeneous graphs It identifies the most important subgraph from the original graph that plays a critical role in GNN-based graph classification. It employs Monte Carlo tree search (MCTS) in efficiently exploring different subgraphs for explanation and uses Shapley values as the measure of subgraph importance. Parameters ---------- model : nn.Module The GNN model to explain that tackles multiclass graph classification * Its forward function must have the form :attr:`forward(self, graph, nfeat)`. * The output of its forward function is the logits. num_hops : int Number of message passing layers in the model coef : float, optional This hyperparameter controls the trade-off between exploration and exploitation. A higher value encourages the algorithm to explore relatively unvisited nodes. Default: 10.0 high2low : bool, optional If True, it will use the "High2low" strategy for pruning actions, expanding children nodes from high degree to low degree when extending the children nodes in the search tree. Otherwise, it will use the "Low2high" strategy. Default: True num_child : int, optional This is the number of children nodes to expand when extending the children nodes in the search tree. Default: 12 num_rollouts : int, optional This is the number of rollouts for MCTS. Default: 20 node_min : int, optional This is the threshold to define a leaf node based on the number of nodes in a subgraph. Default: 3 shapley_steps : int, optional This is the number of steps for Monte Carlo sampling in estimating Shapley values. Default: 100 log : bool, optional If True, it will log the progress. Default: False """ def __init__( self, model, num_hops, coef=10.0, high2low=True, num_child=12, num_rollouts=20, node_min=3, shapley_steps=100, log=False, ): super().__init__() self.num_hops = num_hops self.coef = coef self.high2low = high2low self.num_child = num_child self.num_rollouts = num_rollouts self.node_min = node_min self.shapley_steps = shapley_steps self.log = log self.model = model def shapley(self, subgraph_nodes): r"""Compute Shapley value with Monte Carlo approximation. Parameters ---------- subgraph_nodes : dict[str, Tensor] subgraph_nodes[nty] gives the tensor node IDs of node type nty in the subgraph, which are associated with this tree node Returns ------- float Shapley value """ # Obtain neighboring nodes of the subgraph g_i, P'. local_regions = { ntype: nodes.tolist() for ntype, nodes in subgraph_nodes.items() } for _ in range(self.num_hops - 1): for c_etype in self.graph.canonical_etypes: src_ntype, _, dst_ntype = c_etype if ( src_ntype not in local_regions or dst_ntype not in local_regions ): continue in_neighbors, _ = self.graph.in_edges( local_regions[dst_ntype], etype=c_etype ) _, out_neighbors = self.graph.out_edges( local_regions[src_ntype], etype=c_etype ) local_regions[src_ntype] = list( set(local_regions[src_ntype] + in_neighbors.tolist()) ) local_regions[dst_ntype] = list( set(local_regions[dst_ntype] + out_neighbors.tolist()) ) split_point = self.graph.num_nodes() coalition_space = { ntype: list( set(local_regions[ntype]) - set(subgraph_nodes[ntype].tolist()) ) + [split_point] for ntype in subgraph_nodes.keys() } marginal_contributions = [] for _ in range(self.shapley_steps): selected_node_map = dict() for ntype, nodes in coalition_space.items(): permuted_space = np.random.permutation(nodes) split_idx = int(np.where(permuted_space == split_point)[0]) selected_node_map[ntype] = permuted_space[:split_idx] # Mask for coalition set S_i exclude_mask = { ntype: torch.ones(self.graph.num_nodes(ntype)) for ntype in self.graph.ntypes } for ntype, region in local_regions.items(): exclude_mask[ntype][region] = 0.0 for ntype, selected_nodes in selected_node_map.items(): exclude_mask[ntype][selected_nodes] = 1.0 # Mask for set S_i and g_i include_mask = { ntype: exclude_mask[ntype].clone() for ntype in self.graph.ntypes } for ntype, subgn in subgraph_nodes.items(): exclude_mask[ntype][subgn] = 1.0 exclude_feat = { ntype: self.feat[ntype] * exclude_mask[ntype].unsqueeze(1).to(self.feat[ntype].device) for ntype in self.graph.ntypes } include_feat = { ntype: self.feat[ntype] * include_mask[ntype].unsqueeze(1).to(self.feat[ntype].device) for ntype in self.graph.ntypes } with torch.no_grad(): exclude_probs = self.model( self.graph, exclude_feat, **self.kwargs ).softmax(dim=-1) exclude_value = exclude_probs[:, self.target_class] include_probs = self.model( self.graph, include_feat, **self.kwargs ).softmax(dim=-1) include_value = include_probs[:, self.target_class] marginal_contributions.append(include_value - exclude_value) return torch.cat(marginal_contributions).mean().item() def get_mcts_children(self, mcts_node): r"""Get the children of the MCTS node for the search. Parameters ---------- mcts_node : MCTSNode Node in MCTS Returns ------- list Children nodes after pruning """ if len(mcts_node.children) > 0: return mcts_node.children subg = node_subgraph(self.graph, mcts_node.nodes) # Choose k nodes based on the highest degree in the subgraph node_degrees_map = { ntype: torch.zeros( subg.num_nodes(ntype), device=subg.nodes(ntype).device ) for ntype in subg.ntypes } for c_etype in subg.canonical_etypes: src_ntype, _, dst_ntype = c_etype node_degrees_map[src_ntype] += subg.out_degrees(etype=c_etype) node_degrees_map[dst_ntype] += subg.in_degrees(etype=c_etype) node_degrees_list = [ ((ntype, i), degree) for ntype, node_degrees in node_degrees_map.items() for i, degree in enumerate(node_degrees) ] node_degrees = torch.stack([v for _, v in node_degrees_list]) k = min(subg.num_nodes(), self.num_child) chosen_node_indicies = torch.topk( node_degrees, k, largest=self.high2low ).indices chosen_nodes = [node_degrees_list[i][0] for i in chosen_node_indicies] mcts_children_maps = dict() for ntype, node in chosen_nodes: new_subg = remove_nodes(subg, node, ntype, store_ids=True) if new_subg.num_edges() > 0: new_subg_homo = to_homogeneous(new_subg) # Get the largest weakly connected component in the subgraph. nx_graph = to_networkx(new_subg_homo.cpu()) largest_cc_nids = list( max(nx.weakly_connected_components(nx_graph), key=len) ) largest_cc_homo = node_subgraph(new_subg_homo, largest_cc_nids) largest_cc_hetero = to_heterogeneous( largest_cc_homo, new_subg.ntypes, new_subg.etypes ) # Follow steps for backtracking to original graph node ids # 1. retrieve instanced homograph from connected-component homograph # 2. retrieve instanced heterograph from instanced homograph # 3. retrieve hetero-subgraph from instanced heterograph # 4. retrieve orignal graph ids from subgraph node ids cc_nodes = { ntype: subg.ndata[NID][ntype][ new_subg.ndata[NID][ntype][ new_subg_homo.ndata[NID][ largest_cc_homo.ndata[NID][indicies] ] ] ] for ntype, indicies in largest_cc_hetero.ndata[NID].items() } else: available_ntypes = [ ntype for ntype in new_subg.ntypes if new_subg.num_nodes(ntype) > 0 ] chosen_ntype = np.random.choice(available_ntypes) # backtrack from subgraph node ids to entire graph chosen_node = subg.ndata[NID][chosen_ntype][ np.random.choice(new_subg.nodes[chosen_ntype].data[NID]) ] cc_nodes = { chosen_ntype: torch.tensor( [chosen_node], device=subg.device, ) } if str(cc_nodes) not in self.mcts_node_maps: child_mcts_node = MCTSNode(cc_nodes) self.mcts_node_maps[str(child_mcts_node)] = child_mcts_node else: child_mcts_node = self.mcts_node_maps[str(cc_nodes)] if str(child_mcts_node) not in mcts_children_maps: mcts_children_maps[str(child_mcts_node)] = child_mcts_node mcts_node.children = list(mcts_children_maps.values()) for child_mcts_node in mcts_node.children: if child_mcts_node.immediate_reward == 0: child_mcts_node.immediate_reward = self.shapley( child_mcts_node.nodes ) return mcts_node.children def mcts_rollout(self, mcts_node): r"""Perform a MCTS rollout. Parameters ---------- mcts_node : MCTSNode Starting node for MCTS Returns ------- float Reward for visiting the node this time """ if ( sum(len(nodes) for nodes in mcts_node.nodes.values()) <= self.node_min ): return mcts_node.immediate_reward children_nodes = self.get_mcts_children(mcts_node) children_visit_sum = sum([child.num_visit for child in children_nodes]) children_visit_sum_sqrt = math.sqrt(children_visit_sum) chosen_child = max( children_nodes, key=lambda c: c.total_reward / max(c.num_visit, 1) + self.coef * c.immediate_reward * children_visit_sum_sqrt / (1 + c.num_visit), ) reward = self.mcts_rollout(chosen_child) chosen_child.num_visit += 1 chosen_child.total_reward += reward return reward def explain_graph(self, graph, feat, target_class, **kwargs): r"""Find the most important subgraph from the original graph for the model to classify the graph into the target class. Parameters ---------- graph : DGLGraph A heterogeneous graph feat : dict[str, Tensor] The dictionary that associates input node features (values) with the respective node types (keys) present in the graph. The input features are of shape :math:`(N_t, D_t)`. :math:`N_t` is the number of nodes for node type :math:`t`, and :math:`D_t` is the feature size for node type :math:`t` target_class : int The target class to explain kwargs : dict Additional arguments passed to the GNN model Returns ------- dict[str, Tensor] The dictionary associating tensor node ids (values) to node types (keys) that represents the most important subgraph Examples -------- >>> import dgl >>> import dgl.function as fn >>> import torch as th >>> import torch.nn as nn >>> import torch.nn.functional as F >>> from dgl.nn import HeteroSubgraphX >>> class Model(nn.Module): ... def __init__(self, in_dim, num_classes, canonical_etypes): ... super(Model, self).__init__() ... self.etype_weights = nn.ModuleDict( ... { ... "_".join(c_etype): nn.Linear(in_dim, num_classes) ... for c_etype in canonical_etypes ... } ... ) ... ... def forward(self, graph, feat): ... with graph.local_scope(): ... c_etype_func_dict = {} ... for c_etype in graph.canonical_etypes: ... src_type, etype, dst_type = c_etype ... wh = self.etype_weights["_".join(c_etype)](feat[src_type]) ... graph.nodes[src_type].data[f"h_{c_etype}"] = wh ... c_etype_func_dict[c_etype] = ( ... fn.copy_u(f"h_{c_etype}", "m"), ... fn.mean("m", "h"), ... ) ... graph.multi_update_all(c_etype_func_dict, "sum") ... hg = 0 ... for ntype in graph.ntypes: ... if graph.num_nodes(ntype): ... hg = hg + dgl.mean_nodes(graph, "h", ntype=ntype) ... return hg >>> input_dim = 5 >>> num_classes = 2 >>> g = dgl.heterograph({("user", "plays", "game"): ([0, 1, 1, 2], [0, 0, 1, 1])}) >>> g.nodes["user"].data["h"] = th.randn(g.num_nodes("user"), input_dim) >>> g.nodes["game"].data["h"] = th.randn(g.num_nodes("game"), input_dim) >>> transform = dgl.transforms.AddReverse() >>> g = transform(g) >>> # define and train the model >>> model = Model(input_dim, num_classes, g.canonical_etypes) >>> feat = g.ndata["h"] >>> optimizer = th.optim.Adam(model.parameters()) >>> for epoch in range(10): ... logits = model(g, feat) ... loss = F.cross_entropy(logits, th.tensor([1])) ... optimizer.zero_grad() ... loss.backward() ... optimizer.step() >>> # Explain for the graph >>> explainer = HeteroSubgraphX(model, num_hops=1) >>> explainer.explain_graph(g, feat, target_class=1) {'game': tensor([0, 1]), 'user': tensor([1, 2])} """ self.model.eval() assert ( graph.num_nodes() > self.node_min ), f"The number of nodes in the\ graph {graph.num_nodes()} should be bigger than {self.node_min}." self.graph = graph self.feat = feat self.target_class = target_class self.kwargs = kwargs # book all nodes in MCTS self.mcts_node_maps = dict() root_dict = {ntype: graph.nodes(ntype) for ntype in graph.ntypes} root = MCTSNode(root_dict) self.mcts_node_maps[str(root)] = root for i in range(self.num_rollouts): if self.log: print( f"Rollout {i}/{self.num_rollouts}, \ {len(self.mcts_node_maps)} subgraphs have been explored." ) self.mcts_rollout(root) best_leaf = None best_immediate_reward = float("-inf") for mcts_node in self.mcts_node_maps.values(): if len(mcts_node.nodes) > self.node_min: continue if mcts_node.immediate_reward > best_immediate_reward: best_leaf = mcts_node best_immediate_reward = best_leaf.immediate_reward return best_leaf.nodes ================================================ FILE: python/dgl/nn/pytorch/factory.py ================================================ """Modules that transforms between graphs and between graph and tensors.""" import torch.nn as nn from ...transforms import knn_graph, radius_graph, segmented_knn_graph def pairwise_squared_distance(x): """ x : (n_samples, n_points, dims) return : (n_samples, n_points, n_points) """ x2s = (x * x).sum(-1, keepdim=True) return x2s + x2s.transpose(-1, -2) - 2 * x @ x.transpose(-1, -2) class KNNGraph(nn.Module): r"""Layer that transforms one point set into a graph, or a batch of point sets with the same number of points into a batched union of those graphs. The KNNGraph is implemented in the following steps: 1. Compute an NxN matrix of pairwise distance for all points. 2. Pick the k points with the smallest distance for each point as their k-nearest neighbors. 3. Construct a graph with edges to each point as a node from its k-nearest neighbors. The overall computational complexity is :math:`O(N^2(logN + D)`. If a batch of point sets is provided, the point :math:`j` in point set :math:`i` is mapped to graph node ID: :math:`i \times M + j`, where :math:`M` is the number of nodes in each point set. The predecessors of each node are the k-nearest neighbors of the corresponding point. Parameters ---------- k : int The number of neighbors. Notes ----- The nearest neighbors found for a node include the node itself. Examples -------- The following example uses PyTorch backend. >>> import torch >>> from dgl.nn.pytorch.factory import KNNGraph >>> >>> kg = KNNGraph(2) >>> x = torch.tensor([[0,1], [1,2], [1,3], [100, 101], [101, 102], [50, 50]]) >>> g = kg(x) >>> print(g.edges()) (tensor([0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5]), tensor([0, 0, 1, 2, 1, 2, 5, 3, 4, 3, 4, 5])) """ def __init__(self, k): super(KNNGraph, self).__init__() self.k = k # pylint: disable=invalid-name def forward( self, x, algorithm="bruteforce-blas", dist="euclidean", exclude_self=False, ): r""" Forward computation. Parameters ---------- x : Tensor :math:`(M, D)` or :math:`(N, M, D)` where :math:`N` means the number of point sets, :math:`M` means the number of points in each point set, and :math:`D` means the size of features. algorithm : str, optional Algorithm used to compute the k-nearest neighbors. * 'bruteforce-blas' will first compute the distance matrix using BLAS matrix multiplication operation provided by backend frameworks. Then use topk algorithm to get k-nearest neighbors. This method is fast when the point set is small but has :math:`O(N^2)` memory complexity where :math:`N` is the number of points. * 'bruteforce' will compute distances pair by pair and directly select the k-nearest neighbors during distance computation. This method is slower than 'bruteforce-blas' but has less memory overhead (i.e., :math:`O(Nk)` where :math:`N` is the number of points, :math:`k` is the number of nearest neighbors per node) since we do not need to store all distances. * 'bruteforce-sharemem' (CUDA only) is similar to 'bruteforce' but use shared memory in CUDA devices for buffer. This method is faster than 'bruteforce' when the dimension of input points is not large. This method is only available on CUDA device. * 'kd-tree' will use the kd-tree algorithm (CPU only). This method is suitable for low-dimensional data (e.g. 3D point clouds) * 'nn-descent' is a approximate approach from paper `Efficient k-nearest neighbor graph construction for generic similarity measures `_. This method will search for nearest neighbor candidates in "neighbors' neighbors". (default: 'bruteforce-blas') dist : str, optional The distance metric used to compute distance between points. It can be the following metrics: * 'euclidean': Use Euclidean distance (L2 norm) :math:`\sqrt{\sum_{i} (x_{i} - y_{i})^{2}}`. * 'cosine': Use cosine distance. (default: 'euclidean') exclude_self : bool, optional If True, the output graph will not contain self loop edges, and each node will not be counted as one of its own k neighbors. If False, the output graph will contain self loop edges, and a node will be counted as one of its own k neighbors. Returns ------- DGLGraph A DGLGraph without features. """ return knn_graph( x, self.k, algorithm=algorithm, dist=dist, exclude_self=exclude_self ) class SegmentedKNNGraph(nn.Module): r"""Layer that transforms one point set into a graph, or a batch of point sets with different number of points into a batched union of those graphs. If a batch of point sets is provided, then the point :math:`j` in the point set :math:`i` is mapped to graph node ID: :math:`\sum_{p>> import torch >>> from dgl.nn.pytorch.factory import SegmentedKNNGraph >>> >>> kg = SegmentedKNNGraph(2) >>> x = torch.tensor([[0,1], ... [1,2], ... [1,3], ... [100, 101], ... [101, 102], ... [50, 50], ... [24,25], ... [25,24]]) >>> g = kg(x, [3,3,2]) >>> print(g.edges()) (tensor([0, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 6, 6, 7, 7]), tensor([0, 0, 1, 2, 1, 2, 3, 4, 5, 3, 4, 5, 6, 7, 6, 7])) >>> """ def __init__(self, k): super(SegmentedKNNGraph, self).__init__() self.k = k # pylint: disable=invalid-name def forward( self, x, segs, algorithm="bruteforce-blas", dist="euclidean", exclude_self=False, ): r"""Forward computation. Parameters ---------- x : Tensor :math:`(M, D)` where :math:`M` means the total number of points in all point sets, and :math:`D` means the size of features. segs : iterable of int :math:`(N)` integers where :math:`N` means the number of point sets. The number of elements must sum up to :math:`M`. And any :math:`N` should :math:`\ge k` algorithm : str, optional Algorithm used to compute the k-nearest neighbors. * 'bruteforce-blas' will first compute the distance matrix using BLAS matrix multiplication operation provided by backend frameworks. Then use topk algorithm to get k-nearest neighbors. This method is fast when the point set is small but has :math:`O(N^2)` memory complexity where :math:`N` is the number of points. * 'bruteforce' will compute distances pair by pair and directly select the k-nearest neighbors during distance computation. This method is slower than 'bruteforce-blas' but has less memory overhead (i.e., :math:`O(Nk)` where :math:`N` is the number of points, :math:`k` is the number of nearest neighbors per node) since we do not need to store all distances. * 'bruteforce-sharemem' (CUDA only) is similar to 'bruteforce' but use shared memory in CUDA devices for buffer. This method is faster than 'bruteforce' when the dimension of input points is not large. This method is only available on CUDA device. * 'kd-tree' will use the kd-tree algorithm (CPU only). This method is suitable for low-dimensional data (e.g. 3D point clouds) * 'nn-descent' is a approximate approach from paper `Efficient k-nearest neighbor graph construction for generic similarity measures `_. This method will search for nearest neighbor candidates in "neighbors' neighbors". (default: 'bruteforce-blas') dist : str, optional The distance metric used to compute distance between points. It can be the following metrics: * 'euclidean': Use Euclidean distance (L2 norm) :math:`\sqrt{\sum_{i} (x_{i} - y_{i})^{2}}`. * 'cosine': Use cosine distance. (default: 'euclidean') exclude_self : bool, optional If True, the output graph will not contain self loop edges, and each node will not be counted as one of its own k neighbors. If False, the output graph will contain self loop edges, and a node will be counted as one of its own k neighbors. Returns ------- DGLGraph A batched DGLGraph without features. """ return segmented_knn_graph( x, self.k, segs, algorithm=algorithm, dist=dist, exclude_self=exclude_self, ) class RadiusGraph(nn.Module): r"""Layer that transforms one point set into a bidirected graph with neighbors within given distance. The RadiusGraph is implemented in the following steps: 1. Compute an NxN matrix of pairwise distance for all points. 2. Pick the points within distance to each point as their neighbors. 3. Construct a graph with edges to each point as a node from its neighbors. The nodes of the returned graph correspond to the points, where the neighbors of each point are within given distance. Parameters ---------- r : float Radius of the neighbors. p : float, optional Power parameter for the Minkowski metric. When :attr:`p = 1` it is the equivalent of Manhattan distance (L1 norm) and Euclidean distance (L2 norm) for :attr:`p = 2`. (default: 2) self_loop : bool, optional Whether the radius graph will contain self-loops. (default: False) compute_mode : str, optional ``use_mm_for_euclid_dist_if_necessary`` - will use matrix multiplication approach to calculate euclidean distance (p = 2) if P > 25 or R > 25 ``use_mm_for_euclid_dist`` - will always use matrix multiplication approach to calculate euclidean distance (p = 2) ``donot_use_mm_for_euclid_dist`` - will never use matrix multiplication approach to calculate euclidean distance (p = 2). (default: donot_use_mm_for_euclid_dist) Examples -------- The following examples uses PyTorch backend. >>> import dgl >>> from dgl.nn.pytorch.factory import RadiusGraph >>> x = torch.tensor([[0.0, 0.0, 1.0], ... [1.0, 0.5, 0.5], ... [0.5, 0.2, 0.2], ... [0.3, 0.2, 0.4]]) >>> rg = RadiusGraph(0.75) >>> g = rg(x) # Each node has neighbors within 0.75 distance >>> g.edges() (tensor([0, 1, 2, 2, 3, 3]), tensor([3, 2, 1, 3, 0, 2])) When :attr:`get_distances` is True, forward pass returns the radius graph and distances for the corresponding edges. >>> x = torch.tensor([[0.0, 0.0, 1.0], ... [1.0, 0.5, 0.5], ... [0.5, 0.2, 0.2], ... [0.3, 0.2, 0.4]]) >>> rg = RadiusGraph(0.75) >>> g, dist = rg(x, get_distances=True) >>> g.edges() (tensor([0, 1, 2, 2, 3, 3]), tensor([3, 2, 1, 3, 0, 2])) >>> dist tensor([[0.7000], [0.6557], [0.6557], [0.2828], [0.7000], [0.2828]]) """ # pylint: disable=invalid-name def __init__( self, r, p=2, self_loop=False, compute_mode="donot_use_mm_for_euclid_dist", ): super(RadiusGraph, self).__init__() self.r = r self.p = p self.self_loop = self_loop self.compute_mode = compute_mode # pylint: disable=invalid-name def forward(self, x, get_distances=False): r""" Forward computation. Parameters ---------- x : Tensor The point coordinates. :math:`(N, D)` where :math:`N` means the number of points in the point set, and :math:`D` means the size of the features. It can be either on CPU or GPU. Device of the point coordinates specifies device of the radius graph. get_distances : bool, optional Whether to return the distances for the corresponding edges in the radius graph. (default: False) Returns ------- DGLGraph The constructed graph. The node IDs are in the same order as :attr:`x`. torch.Tensor, optional The distances for the edges in the constructed graph. The distances are in the same order as edge IDs. """ return radius_graph( x, self.r, self.p, self.self_loop, self.compute_mode, get_distances ) ================================================ FILE: python/dgl/nn/pytorch/glob.py ================================================ """Torch modules for graph global pooling.""" # pylint: disable= no-member, arguments-differ, invalid-name, W0235 import numpy as np import torch as th import torch.nn as nn from ...backend import pytorch as F from ...base import dgl_warning from ...readout import ( broadcast_nodes, max_nodes, mean_nodes, softmax_nodes, sum_nodes, topk_nodes, ) __all__ = [ "SumPooling", "AvgPooling", "MaxPooling", "SortPooling", "GlobalAttentionPooling", "Set2Set", "SetTransformerEncoder", "SetTransformerDecoder", "WeightAndSum", ] class SumPooling(nn.Module): r"""Apply sum pooling over the nodes in a graph. .. math:: r^{(i)} = \sum_{k=1}^{N_i} x^{(i)}_k Notes ----- Input: Could be one graph, or a batch of graphs. If using a batch of graphs, make sure nodes in all graphs have the same feature size, and concatenate nodes' feature together as the input. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch as th >>> from dgl.nn import SumPooling >>> >>> g1 = dgl.rand_graph(3, 4) # g1 is a random graph with 3 nodes and 4 edges >>> g1_node_feats = th.rand(3, 5) # feature size is 5 >>> g1_node_feats tensor([[0.8948, 0.0699, 0.9137, 0.7567, 0.3637], [0.8137, 0.8938, 0.8377, 0.4249, 0.6118], [0.5197, 0.9030, 0.6825, 0.5725, 0.4755]]) >>> >>> g2 = dgl.rand_graph(4, 6) # g2 is a random graph with 4 nodes and 6 edges >>> g2_node_feats = th.rand(4, 5) # feature size is 5 >>> g2_node_feats tensor([[0.2053, 0.2426, 0.4111, 0.9028, 0.5658], [0.5278, 0.6365, 0.9990, 0.2351, 0.8945], [0.3134, 0.0580, 0.4349, 0.7949, 0.3891], [0.0142, 0.2709, 0.3330, 0.8521, 0.6925]]) >>> >>> sumpool = SumPooling() # create a sum pooling layer Case 1: Input a single graph >>> sumpool(g1, g1_node_feats) tensor([[2.2282, 1.8667, 2.4338, 1.7540, 1.4511]]) Case 2: Input a batch of graphs Build a batch of DGL graphs and concatenate all graphs' node features into one tensor. >>> batch_g = dgl.batch([g1, g2]) >>> batch_f = th.cat([g1_node_feats, g2_node_feats]) >>> >>> sumpool(batch_g, batch_f) tensor([[2.2282, 1.8667, 2.4338, 1.7540, 1.4511], [1.0608, 1.2080, 2.1780, 2.7849, 2.5420]]) """ def __init__(self): super(SumPooling, self).__init__() def forward(self, graph, feat): r""" Compute sum pooling. Parameters ---------- graph : DGLGraph a DGLGraph or a batch of DGLGraphs feat : torch.Tensor The input feature with shape :math:`(N, D)`, where :math:`N` is the number of nodes in the graph, and :math:`D` means the size of features. Returns ------- torch.Tensor The output feature with shape :math:`(B, D)`, where :math:`B` refers to the batch size of input graphs. """ with graph.local_scope(): graph.ndata["h"] = feat readout = sum_nodes(graph, "h") return readout class AvgPooling(nn.Module): r"""Apply average pooling over the nodes in a graph. .. math:: r^{(i)} = \frac{1}{N_i}\sum_{k=1}^{N_i} x^{(i)}_k Notes ----- Input: Could be one graph, or a batch of graphs. If using a batch of graphs, make sure nodes in all graphs have the same feature size, and concatenate nodes' feature together as the input. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch as th >>> from dgl.nn import AvgPooling >>> >>> g1 = dgl.rand_graph(3, 4) # g1 is a random graph with 3 nodes and 4 edges >>> g1_node_feats = th.rand(3, 5) # feature size is 5 >>> g1_node_feats tensor([[0.8948, 0.0699, 0.9137, 0.7567, 0.3637], [0.8137, 0.8938, 0.8377, 0.4249, 0.6118], [0.5197, 0.9030, 0.6825, 0.5725, 0.4755]]) >>> >>> g2 = dgl.rand_graph(4, 6) # g2 is a random graph with 4 nodes and 6 edges >>> g2_node_feats = th.rand(4, 5) # feature size is 5 >>> g2_node_feats tensor([[0.2053, 0.2426, 0.4111, 0.9028, 0.5658], [0.5278, 0.6365, 0.9990, 0.2351, 0.8945], [0.3134, 0.0580, 0.4349, 0.7949, 0.3891], [0.0142, 0.2709, 0.3330, 0.8521, 0.6925]]) >>> >>> avgpool = AvgPooling() # create an average pooling layer Case 1: Input single graph >>> avgpool(g1, g1_node_feats) tensor([[0.7427, 0.6222, 0.8113, 0.5847, 0.4837]]) Case 2: Input a batch of graphs Build a batch of DGL graphs and concatenate all graphs' note features into one tensor. >>> batch_g = dgl.batch([g1, g2]) >>> batch_f = th.cat([g1_node_feats, g2_node_feats]) >>> >>> avgpool(batch_g, batch_f) tensor([[0.7427, 0.6222, 0.8113, 0.5847, 0.4837], [0.2652, 0.3020, 0.5445, 0.6962, 0.6355]]) """ def __init__(self): super(AvgPooling, self).__init__() def forward(self, graph, feat): r""" Compute average pooling. Parameters ---------- graph : DGLGraph A DGLGraph or a batch of DGLGraphs. feat : torch.Tensor The input feature with shape :math:`(N, D)`, where :math:`N` is the number of nodes in the graph, and :math:`D` means the size of features. Returns ------- torch.Tensor The output feature with shape :math:`(B, D)`, where :math:`B` refers to the batch size of input graphs. """ with graph.local_scope(): graph.ndata["h"] = feat readout = mean_nodes(graph, "h") return readout class MaxPooling(nn.Module): r"""Apply max pooling over the nodes in a graph. .. math:: r^{(i)} = \max_{k=1}^{N_i}\left( x^{(i)}_k \right) Notes ----- Input: Could be one graph, or a batch of graphs. If using a batch of graphs, make sure nodes in all graphs have the same feature size, and concatenate nodes' feature together as the input. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch as th >>> from dgl.nn import MaxPooling >>> >>> g1 = dgl.rand_graph(3, 4) # g1 is a random graph with 3 nodes and 4 edges >>> g1_node_feats = th.rand(3, 5) # feature size is 5 >>> g1_node_feats tensor([[0.8948, 0.0699, 0.9137, 0.7567, 0.3637], [0.8137, 0.8938, 0.8377, 0.4249, 0.6118], [0.5197, 0.9030, 0.6825, 0.5725, 0.4755]]) >>> >>> g2 = dgl.rand_graph(4, 6) # g2 is a random graph with 4 nodes and 6 edges >>> g2_node_feats = th.rand(4, 5) # feature size is 5 >>> g2_node_feats tensor([[0.2053, 0.2426, 0.4111, 0.9028, 0.5658], [0.5278, 0.6365, 0.9990, 0.2351, 0.8945], [0.3134, 0.0580, 0.4349, 0.7949, 0.3891], [0.0142, 0.2709, 0.3330, 0.8521, 0.6925]]) >>> >>> maxpool = MaxPooling() # create a max pooling layer Case 1: Input a single graph >>> maxpool(g1, g1_node_feats) tensor([[0.8948, 0.9030, 0.9137, 0.7567, 0.6118]]) Case 2: Input a batch of graphs Build a batch of DGL graphs and concatenate all graphs' node features into one tensor. >>> batch_g = dgl.batch([g1, g2]) >>> batch_f = th.cat([g1_node_feats, g2_node_feats]) >>> >>> maxpool(batch_g, batch_f) tensor([[0.8948, 0.9030, 0.9137, 0.7567, 0.6118], [0.5278, 0.6365, 0.9990, 0.9028, 0.8945]]) """ def __init__(self): super(MaxPooling, self).__init__() def forward(self, graph, feat): r"""Compute max pooling. Parameters ---------- graph : DGLGraph A DGLGraph or a batch of DGLGraphs. feat : torch.Tensor The input feature with shape :math:`(N, *)`, where :math:`N` is the number of nodes in the graph. Returns ------- torch.Tensor The output feature with shape :math:`(B, *)`, where :math:`B` refers to the batch size. """ with graph.local_scope(): graph.ndata["h"] = feat readout = max_nodes(graph, "h") return readout class SortPooling(nn.Module): r"""Sort Pooling from `An End-to-End Deep Learning Architecture for Graph Classification `__ It first sorts the node features in ascending order along the feature dimension, and selects the sorted features of top-k nodes (ranked by the largest value of each node). Parameters ---------- k : int The number of nodes to hold for each graph. Notes ----- Input: Could be one graph, or a batch of graphs. If using a batch of graphs, make sure nodes in all graphs have the same feature size, and concatenate nodes' feature together as the input. Examples -------- >>> import dgl >>> import torch as th >>> from dgl.nn import SortPooling >>> >>> g1 = dgl.rand_graph(3, 4) # g1 is a random graph with 3 nodes and 4 edges >>> g1_node_feats = th.rand(3, 5) # feature size is 5 >>> g1_node_feats tensor([[0.8948, 0.0699, 0.9137, 0.7567, 0.3637], [0.8137, 0.8938, 0.8377, 0.4249, 0.6118], [0.5197, 0.9030, 0.6825, 0.5725, 0.4755]]) >>> >>> g2 = dgl.rand_graph(4, 6) # g2 is a random graph with 4 nodes and 6 edges >>> g2_node_feats = th.rand(4, 5) # feature size is 5 >>> g2_node_feats tensor([[0.2053, 0.2426, 0.4111, 0.9028, 0.5658], [0.5278, 0.6365, 0.9990, 0.2351, 0.8945], [0.3134, 0.0580, 0.4349, 0.7949, 0.3891], [0.0142, 0.2709, 0.3330, 0.8521, 0.6925]]) >>> >>> sortpool = SortPooling(k=2) # create a sort pooling layer Case 1: Input a single graph >>> sortpool(g1, g1_node_feats) tensor([[0.0699, 0.3637, 0.7567, 0.8948, 0.9137, 0.4755, 0.5197, 0.5725, 0.6825, 0.9030]]) Case 2: Input a batch of graphs Build a batch of DGL graphs and concatenate all graphs' node features into one tensor. >>> batch_g = dgl.batch([g1, g2]) >>> batch_f = th.cat([g1_node_feats, g2_node_feats]) >>> >>> sortpool(batch_g, batch_f) tensor([[0.0699, 0.3637, 0.7567, 0.8948, 0.9137, 0.4755, 0.5197, 0.5725, 0.6825, 0.9030], [0.2351, 0.5278, 0.6365, 0.8945, 0.9990, 0.2053, 0.2426, 0.4111, 0.5658, 0.9028]]) """ def __init__(self, k): super(SortPooling, self).__init__() self.k = k def forward(self, graph, feat): r""" Compute sort pooling. Parameters ---------- graph : DGLGraph A DGLGraph or a batch of DGLGraphs. feat : torch.Tensor The input node feature with shape :math:`(N, D)`, where :math:`N` is the number of nodes in the graph, and :math:`D` means the size of features. Returns ------- torch.Tensor The output feature with shape :math:`(B, k * D)`, where :math:`B` refers to the batch size of input graphs. """ with graph.local_scope(): # Sort the feature of each node in ascending order. feat, _ = feat.sort(dim=-1) graph.ndata["h"] = feat # Sort nodes according to their last features. ret = topk_nodes(graph, "h", self.k, sortby=-1)[0].view( -1, self.k * feat.shape[-1] ) return ret class GlobalAttentionPooling(nn.Module): r"""Global Attention Pooling from `Gated Graph Sequence Neural Networks `__ .. math:: r^{(i)} = \sum_{k=1}^{N_i}\mathrm{softmax}\left(f_{gate} \left(x^{(i)}_k\right)\right) f_{feat}\left(x^{(i)}_k\right) Parameters ---------- gate_nn : torch.nn.Module A neural network that computes attention scores for each feature. feat_nn : torch.nn.Module, optional A neural network applied to each feature before combining them with attention scores. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch as th >>> from dgl.nn import GlobalAttentionPooling >>> >>> g1 = dgl.rand_graph(3, 4) # g1 is a random graph with 3 nodes and 4 edges >>> g1_node_feats = th.rand(3, 5) # feature size is 5 >>> g1_node_feats tensor([[0.8948, 0.0699, 0.9137, 0.7567, 0.3637], [0.8137, 0.8938, 0.8377, 0.4249, 0.6118], [0.5197, 0.9030, 0.6825, 0.5725, 0.4755]]) >>> >>> g2 = dgl.rand_graph(4, 6) # g2 is a random graph with 4 nodes and 6 edges >>> g2_node_feats = th.rand(4, 5) # feature size is 5 >>> g2_node_feats tensor([[0.2053, 0.2426, 0.4111, 0.9028, 0.5658], [0.5278, 0.6365, 0.9990, 0.2351, 0.8945], [0.3134, 0.0580, 0.4349, 0.7949, 0.3891], [0.0142, 0.2709, 0.3330, 0.8521, 0.6925]]) >>> >>> gate_nn = th.nn.Linear(5, 1) # the gate layer that maps node feature to scalar >>> gap = GlobalAttentionPooling(gate_nn) # create a Global Attention Pooling layer Case 1: Input a single graph >>> gap(g1, g1_node_feats) tensor([[0.7410, 0.6032, 0.8111, 0.5942, 0.4762]], grad_fn=) Case 2: Input a batch of graphs Build a batch of DGL graphs and concatenate all graphs' node features into one tensor. >>> batch_g = dgl.batch([g1, g2]) >>> batch_f = th.cat([g1_node_feats, g2_node_feats], 0) >>> >>> gap(batch_g, batch_f) tensor([[0.7410, 0.6032, 0.8111, 0.5942, 0.4762], [0.2417, 0.2743, 0.5054, 0.7356, 0.6146]], grad_fn=) Notes ----- See our `GGNN example `_ on how to use GatedGraphConv and GlobalAttentionPooling layer to build a Graph Neural Networks that can solve Soduku. """ def __init__(self, gate_nn, feat_nn=None): super(GlobalAttentionPooling, self).__init__() self.gate_nn = gate_nn self.feat_nn = feat_nn def forward(self, graph, feat, get_attention=False): r""" Compute global attention pooling. Parameters ---------- graph : DGLGraph A DGLGraph or a batch of DGLGraphs. feat : torch.Tensor The input node feature with shape :math:`(N, D)` where :math:`N` is the number of nodes in the graph, and :math:`D` means the size of features. get_attention : bool, optional Whether to return the attention values from gate_nn. Default to False. Returns ------- torch.Tensor The output feature with shape :math:`(B, D)`, where :math:`B` refers to the batch size. torch.Tensor, optional The attention values of shape :math:`(N, 1)`, where :math:`N` is the number of nodes in the graph. This is returned only when :attr:`get_attention` is ``True``. """ with graph.local_scope(): gate = self.gate_nn(feat) assert ( gate.shape[-1] == 1 ), "The output of gate_nn should have size 1 at the last axis." feat = self.feat_nn(feat) if self.feat_nn else feat graph.ndata["gate"] = gate gate = softmax_nodes(graph, "gate") graph.ndata.pop("gate") graph.ndata["r"] = feat * gate readout = sum_nodes(graph, "r") graph.ndata.pop("r") if get_attention: return readout, gate else: return readout class Set2Set(nn.Module): r"""Set2Set operator from `Order Matters: Sequence to sequence for sets `__ For each individual graph in the batch, set2set computes .. math:: q_t &= \mathrm{LSTM} (q^*_{t-1}) \alpha_{i,t} &= \mathrm{softmax}(x_i \cdot q_t) r_t &= \sum_{i=1}^N \alpha_{i,t} x_i q^*_t &= q_t \Vert r_t for this graph. Parameters ---------- input_dim : int The size of each input sample. n_iters : int The number of iterations. n_layers : int The number of recurrent layers. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch as th >>> from dgl.nn import Set2Set >>> >>> g1 = dgl.rand_graph(3, 4) # g1 is a random graph with 3 nodes and 4 edges >>> g1_node_feats = th.rand(3, 5) # feature size is 5 >>> g1_node_feats tensor([[0.8948, 0.0699, 0.9137, 0.7567, 0.3637], [0.8137, 0.8938, 0.8377, 0.4249, 0.6118], [0.5197, 0.9030, 0.6825, 0.5725, 0.4755]]) >>> >>> g2 = dgl.rand_graph(4, 6) # g2 is a random graph with 4 nodes and 6 edges >>> g2_node_feats = th.rand(4, 5) # feature size is 5 >>> g2_node_feats tensor([[0.2053, 0.2426, 0.4111, 0.9028, 0.5658], [0.5278, 0.6365, 0.9990, 0.2351, 0.8945], [0.3134, 0.0580, 0.4349, 0.7949, 0.3891], [0.0142, 0.2709, 0.3330, 0.8521, 0.6925]]) >>> >>> s2s = Set2Set(5, 2, 1) # create a Set2Set layer(n_iters=2, n_layers=1) Case 1: Input a single graph >>> s2s(g1, g1_node_feats) tensor([[-0.0235, -0.2291, 0.2654, 0.0376, 0.1349, 0.7560, 0.5822, 0.8199, 0.5960, 0.4760]], grad_fn=) Case 2: Input a batch of graphs Build a batch of DGL graphs and concatenate all graphs' node features into one tensor. >>> batch_g = dgl.batch([g1, g2]) >>> batch_f = th.cat([g1_node_feats, g2_node_feats], 0) >>> >>> s2s(batch_g, batch_f) tensor([[-0.0235, -0.2291, 0.2654, 0.0376, 0.1349, 0.7560, 0.5822, 0.8199, 0.5960, 0.4760], [-0.0483, -0.2010, 0.2324, 0.0145, 0.1361, 0.2703, 0.3078, 0.5529, 0.6876, 0.6399]], grad_fn=) Notes ----- Set2Set is widely used in molecular property predictions, see `dgl-lifesci's MPNN example `__ on how to use DGL's Set2Set layer in graph property prediction applications. """ def __init__(self, input_dim, n_iters, n_layers): super(Set2Set, self).__init__() self.input_dim = input_dim self.output_dim = 2 * input_dim self.n_iters = n_iters self.n_layers = n_layers self.lstm = th.nn.LSTM(self.output_dim, self.input_dim, n_layers) self.reset_parameters() def reset_parameters(self): """Reinitialize learnable parameters.""" self.lstm.reset_parameters() def forward(self, graph, feat): r""" Compute set2set pooling. Parameters ---------- graph : DGLGraph The input graph. feat : torch.Tensor The input feature with shape :math:`(N, D)` where :math:`N` is the number of nodes in the graph, and :math:`D` means the size of features. Returns ------- torch.Tensor The output feature with shape :math:`(B, D)`, where :math:`B` refers to the batch size, and :math:`D` means the size of features. """ with graph.local_scope(): batch_size = graph.batch_size h = ( feat.new_zeros((self.n_layers, batch_size, self.input_dim)), feat.new_zeros((self.n_layers, batch_size, self.input_dim)), ) q_star = feat.new_zeros(batch_size, self.output_dim) for _ in range(self.n_iters): q, h = self.lstm(q_star.unsqueeze(0), h) q = q.view(batch_size, self.input_dim) e = (feat * broadcast_nodes(graph, q)).sum(dim=-1, keepdim=True) graph.ndata["e"] = e alpha = softmax_nodes(graph, "e") graph.ndata["r"] = feat * alpha readout = sum_nodes(graph, "r") q_star = th.cat([q, readout], dim=-1) return q_star def extra_repr(self): """Set the extra representation of the module. which will come into effect when printing the model. """ summary = "n_iters={n_iters}" return summary.format(**self.__dict__) def _gen_mask(lengths_x, lengths_y, max_len_x, max_len_y): """Generate binary mask array for given x and y input pairs. Parameters ---------- lengths_x : Tensor The int tensor indicates the segment information of x. lengths_y : Tensor The int tensor indicates the segment information of y. max_len_x : int The maximum element in lengths_x. max_len_y : int The maximum element in lengths_y. Returns ------- Tensor the mask tensor with shape (batch_size, 1, max_len_x, max_len_y) """ device = lengths_x.device # x_mask: (batch_size, max_len_x) x_mask = th.arange(max_len_x, device=device).unsqueeze( 0 ) < lengths_x.unsqueeze(1) # y_mask: (batch_size, max_len_y) y_mask = th.arange(max_len_y, device=device).unsqueeze( 0 ) < lengths_y.unsqueeze(1) # mask: (batch_size, 1, max_len_x, max_len_y) mask = (x_mask.unsqueeze(-1) & y_mask.unsqueeze(-2)).unsqueeze(1) return mask class MultiHeadAttention(nn.Module): r"""Multi-Head Attention block, used in Transformer, Set Transformer and so on Parameters ---------- d_model : int The feature size (input and output) in Multi-Head Attention layer. num_heads : int The number of heads. d_head : int The hidden size per head. d_ff : int The inner hidden size in the Feed-Forward Neural Network. dropouth : float The dropout rate of each sublayer. dropouta : float The dropout rate of attention heads. Notes ----- This module was used in SetTransformer layer. """ def __init__( self, d_model, num_heads, d_head, d_ff, dropouth=0.0, dropouta=0.0 ): super(MultiHeadAttention, self).__init__() self.d_model = d_model self.num_heads = num_heads self.d_head = d_head self.d_ff = d_ff self.proj_q = nn.Linear(d_model, num_heads * d_head, bias=False) self.proj_k = nn.Linear(d_model, num_heads * d_head, bias=False) self.proj_v = nn.Linear(d_model, num_heads * d_head, bias=False) self.proj_o = nn.Linear(num_heads * d_head, d_model, bias=False) self.ffn = nn.Sequential( nn.Linear(d_model, d_ff), nn.ReLU(), nn.Dropout(dropouth), nn.Linear(d_ff, d_model), ) self.droph = nn.Dropout(dropouth) self.dropa = nn.Dropout(dropouta) self.norm_in = nn.LayerNorm(d_model) self.norm_inter = nn.LayerNorm(d_model) self.reset_parameters() def reset_parameters(self): """Reinitialize learnable parameters.""" for p in self.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) def forward(self, x, mem, lengths_x, lengths_mem): """ Compute multi-head self-attention. Parameters ---------- x : torch.Tensor The input tensor used to compute queries. mem : torch.Tensor The memory tensor used to compute keys and values. lengths_x : list The array of node numbers, used to segment x. lengths_mem : list The array of node numbers, used to segment mem. """ batch_size = len(lengths_x) max_len_x = max(lengths_x) max_len_mem = max(lengths_mem) device = x.device lengths_x = th.as_tensor(lengths_x, dtype=th.int64, device=device) lengths_mem = th.as_tensor(lengths_mem, dtype=th.int64, device=device) queries = self.proj_q(x).view(-1, self.num_heads, self.d_head) keys = self.proj_k(mem).view(-1, self.num_heads, self.d_head) values = self.proj_v(mem).view(-1, self.num_heads, self.d_head) # padding to (B, max_len_x/mem, num_heads, d_head) queries = F.pad_packed_tensor(queries, lengths_x, 0) keys = F.pad_packed_tensor(keys, lengths_mem, 0) values = F.pad_packed_tensor(values, lengths_mem, 0) # attention score with shape (B, num_heads, max_len_x, max_len_mem) e = th.einsum("bxhd,byhd->bhxy", queries, keys) # normalize e = e / np.sqrt(self.d_head) # generate mask mask = _gen_mask(lengths_x, lengths_mem, max_len_x, max_len_mem) e = e.masked_fill(mask == 0, -float("inf")) # apply softmax alpha = th.softmax(e, dim=-1) # the following line addresses the NaN issue, see # https://github.com/dmlc/dgl/issues/2657 alpha = alpha.masked_fill(mask == 0, 0.0) # sum of value weighted by alpha out = th.einsum("bhxy,byhd->bxhd", alpha, values) # project to output out = self.proj_o( out.contiguous().view( batch_size, max_len_x, self.num_heads * self.d_head ) ) # pack tensor out = F.pack_padded_tensor(out, lengths_x) # intra norm x = self.norm_in(x + out) # inter norm x = self.norm_inter(x + self.ffn(x)) return x class SetAttentionBlock(nn.Module): r"""SAB block from `Set Transformer: A Framework for Attention-based Permutation-Invariant Neural Networks `__ Parameters ---------- d_model : int The feature size (input and output) in Multi-Head Attention layer. num_heads : int The number of heads. d_head : int The hidden size per head. d_ff : int The inner hidden size in the Feed-Forward Neural Network. dropouth : float The dropout rate of each sublayer. dropouta : float The dropout rate of attention heads. Notes ----- This module was used in SetTransformer layer. """ def __init__( self, d_model, num_heads, d_head, d_ff, dropouth=0.0, dropouta=0.0 ): super(SetAttentionBlock, self).__init__() self.mha = MultiHeadAttention( d_model, num_heads, d_head, d_ff, dropouth=dropouth, dropouta=dropouta, ) def forward(self, feat, lengths): """ Compute a Set Attention Block. Parameters ---------- feat : torch.Tensor The input feature. lengths : list The array of node numbers, used to segment feat tensor. """ return self.mha(feat, feat, lengths, lengths) class InducedSetAttentionBlock(nn.Module): r"""ISAB block from `Set Transformer: A Framework for Attention-based Permutation-Invariant Neural Networks `__ Parameters ---------- m : int The number of induced vectors. d_model : int The feature size (input and output) in Multi-Head Attention layer. num_heads : int The number of heads. d_head : int The hidden size per head. d_ff : int The inner hidden size in the Feed-Forward Neural Network. dropouth : float The dropout rate of each sublayer. dropouta : float The dropout rate of attention heads. Notes ----- This module was used in SetTransformer layer. """ def __init__( self, m, d_model, num_heads, d_head, d_ff, dropouth=0.0, dropouta=0.0 ): super(InducedSetAttentionBlock, self).__init__() self.m = m if m == 1: dgl_warning( "if m is set to 1, the parameters corresponding to query and key " "projections would not get updated during training." ) self.d_model = d_model self.inducing_points = nn.Parameter(th.FloatTensor(m, d_model)) self.mha = nn.ModuleList( [ MultiHeadAttention( d_model, num_heads, d_head, d_ff, dropouth=dropouth, dropouta=dropouta, ) for _ in range(2) ] ) self.reset_parameters() def reset_parameters(self): """Reinitialize learnable parameters.""" nn.init.xavier_uniform_(self.inducing_points) def forward(self, feat, lengths): """ Compute an Induced Set Attention Block. Parameters ---------- feat : torch.Tensor The input feature. lengths : list The array of node numbers, used to segment feat tensor. Returns ------- torch.Tensor The output feature """ batch_size = len(lengths) query = self.inducing_points.repeat(batch_size, 1) memory = self.mha[0](query, feat, [self.m] * batch_size, lengths) return self.mha[1](feat, memory, lengths, [self.m] * batch_size) def extra_repr(self): """Set the extra representation of the module. which will come into effect when printing the model. """ shape_str = "({}, {})".format( self.inducing_points.shape[0], self.inducing_points.shape[1] ) return "InducedVector: " + shape_str class PMALayer(nn.Module): r"""Pooling by Multihead Attention from `Set Transformer: A Framework for Attention-based Permutation-Invariant Neural Networks `__ Parameters ---------- k : int The number of seed vectors. d_model : int The feature size (input and output) in Multi-Head Attention layer. num_heads : int The number of heads. d_head : int The hidden size per head. d_ff : int The kernel size in FFN (Positionwise Feed-Forward Network) layer. dropouth : float The dropout rate of each sublayer. dropouta : float The dropout rate of attention heads. Notes ----- This module was used in SetTransformer layer. """ def __init__( self, k, d_model, num_heads, d_head, d_ff, dropouth=0.0, dropouta=0.0 ): super(PMALayer, self).__init__() self.k = k if k == 1: dgl_warning( "if k is set to 1, the parameters corresponding to query and key " "projections would not get updated during training." ) self.d_model = d_model self.seed_vectors = nn.Parameter(th.FloatTensor(k, d_model)) self.mha = MultiHeadAttention( d_model, num_heads, d_head, d_ff, dropouth=dropouth, dropouta=dropouta, ) self.ffn = nn.Sequential( nn.Linear(d_model, d_ff), nn.ReLU(), nn.Dropout(dropouth), nn.Linear(d_ff, d_model), ) self.reset_parameters() def reset_parameters(self): """Reinitialize learnable parameters.""" nn.init.xavier_uniform_(self.seed_vectors) def forward(self, feat, lengths): """ Compute Pooling by Multihead Attention. Parameters ---------- feat : torch.Tensor The input feature. lengths : list The array of node numbers, used to segment feat tensor. Returns ------- torch.Tensor The output feature """ batch_size = len(lengths) query = self.seed_vectors.repeat(batch_size, 1) return self.mha(query, self.ffn(feat), [self.k] * batch_size, lengths) def extra_repr(self): """Set the extra representation of the module. which will come into effect when printing the model. """ shape_str = "({}, {})".format( self.seed_vectors.shape[0], self.seed_vectors.shape[1] ) return "SeedVector: " + shape_str class SetTransformerEncoder(nn.Module): r"""The Encoder module from `Set Transformer: A Framework for Attention-based Permutation-Invariant Neural Networks `__ Parameters ---------- d_model : int The hidden size of the model. n_heads : int The number of heads. d_head : int The hidden size of each head. d_ff : int The kernel size in FFN (Positionwise Feed-Forward Network) layer. n_layers : int The number of layers. block_type : str Building block type: 'sab' (Set Attention Block) or 'isab' (Induced Set Attention Block). m : int or None The number of induced vectors in ISAB Block. Set to None if block type is 'sab'. dropouth : float The dropout rate of each sublayer. dropouta : float The dropout rate of attention heads. Examples -------- >>> import dgl >>> import torch as th >>> from dgl.nn import SetTransformerEncoder >>> >>> g1 = dgl.rand_graph(3, 4) # g1 is a random graph with 3 nodes and 4 edges >>> g1_node_feats = th.rand(3, 5) # feature size is 5 >>> g1_node_feats tensor([[0.8948, 0.0699, 0.9137, 0.7567, 0.3637], [0.8137, 0.8938, 0.8377, 0.4249, 0.6118], [0.5197, 0.9030, 0.6825, 0.5725, 0.4755]]) >>> >>> g2 = dgl.rand_graph(4, 6) # g2 is a random graph with 4 nodes and 6 edges >>> g2_node_feats = th.rand(4, 5) # feature size is 5 >>> g2_node_feats tensor([[0.2053, 0.2426, 0.4111, 0.9028, 0.5658], [0.5278, 0.6365, 0.9990, 0.2351, 0.8945], [0.3134, 0.0580, 0.4349, 0.7949, 0.3891], [0.0142, 0.2709, 0.3330, 0.8521, 0.6925]]) >>> >>> set_trans_enc = SetTransformerEncoder(5, 4, 4, 20) # create a settrans encoder. Case 1: Input a single graph >>> set_trans_enc(g1, g1_node_feats) tensor([[ 0.1262, -1.9081, 0.7287, 0.1678, 0.8854], [-0.0634, -1.1996, 0.6955, -0.9230, 1.4904], [-0.9972, -0.7924, 0.6907, -0.5221, 1.6211]], grad_fn=) Case 2: Input a batch of graphs Build a batch of DGL graphs and concatenate all graphs' node features into one tensor. >>> batch_g = dgl.batch([g1, g2]) >>> batch_f = th.cat([g1_node_feats, g2_node_feats]) >>> >>> set_trans_enc(batch_g, batch_f) tensor([[ 0.1262, -1.9081, 0.7287, 0.1678, 0.8854], [-0.0634, -1.1996, 0.6955, -0.9230, 1.4904], [-0.9972, -0.7924, 0.6907, -0.5221, 1.6211], [-0.7973, -1.3203, 0.0634, 0.5237, 1.5306], [-0.4497, -1.0920, 0.8470, -0.8030, 1.4977], [-0.4940, -1.6045, 0.2363, 0.4885, 1.3737], [-0.9840, -1.0913, -0.0099, 0.4653, 1.6199]], grad_fn=) See Also -------- SetTransformerDecoder Notes ----- SetTransformerEncoder is not a readout layer, the tensor it returned is nodewise representation instead out graphwise representation, and the SetTransformerDecoder would return a graph readout tensor. """ def __init__( self, d_model, n_heads, d_head, d_ff, n_layers=1, block_type="sab", m=None, dropouth=0.0, dropouta=0.0, ): super(SetTransformerEncoder, self).__init__() self.n_layers = n_layers self.block_type = block_type self.m = m layers = [] if block_type == "isab" and m is None: raise KeyError( "The number of inducing points is not specified in ISAB block." ) for _ in range(n_layers): if block_type == "sab": layers.append( SetAttentionBlock( d_model, n_heads, d_head, d_ff, dropouth=dropouth, dropouta=dropouta, ) ) elif block_type == "isab": layers.append( InducedSetAttentionBlock( m, d_model, n_heads, d_head, d_ff, dropouth=dropouth, dropouta=dropouta, ) ) else: raise KeyError( "Unrecognized block type {}: we only support sab/isab" ) self.layers = nn.ModuleList(layers) def forward(self, graph, feat): """ Compute the Encoder part of Set Transformer. Parameters ---------- graph : DGLGraph The input graph. feat : torch.Tensor The input feature with shape :math:`(N, D)`, where :math:`N` is the number of nodes in the graph. Returns ------- torch.Tensor The output feature with shape :math:`(N, D)`. """ lengths = graph.batch_num_nodes() for layer in self.layers: feat = layer(feat, lengths) return feat class SetTransformerDecoder(nn.Module): r"""The Decoder module from `Set Transformer: A Framework for Attention-based Permutation-Invariant Neural Networks `__ Parameters ---------- d_model : int Hidden size of the model. num_heads : int The number of heads. d_head : int Hidden size of each head. d_ff : int Kernel size in FFN (Positionwise Feed-Forward Network) layer. n_layers : int The number of layers. k : int The number of seed vectors in PMA (Pooling by Multihead Attention) layer. dropouth : float Dropout rate of each sublayer. dropouta : float Dropout rate of attention heads. Examples -------- >>> import dgl >>> import torch as th >>> from dgl.nn import SetTransformerDecoder >>> >>> g1 = dgl.rand_graph(3, 4) # g1 is a random graph with 3 nodes and 4 edges >>> g1_node_feats = th.rand(3, 5) # feature size is 5 >>> g1_node_feats tensor([[0.8948, 0.0699, 0.9137, 0.7567, 0.3637], [0.8137, 0.8938, 0.8377, 0.4249, 0.6118], [0.5197, 0.9030, 0.6825, 0.5725, 0.4755]]) >>> >>> g2 = dgl.rand_graph(4, 6) # g2 is a random graph with 4 nodes and 6 edges >>> g2_node_feats = th.rand(4, 5) # feature size is 5 >>> g2_node_feats tensor([[0.2053, 0.2426, 0.4111, 0.9028, 0.5658], [0.5278, 0.6365, 0.9990, 0.2351, 0.8945], [0.3134, 0.0580, 0.4349, 0.7949, 0.3891], [0.0142, 0.2709, 0.3330, 0.8521, 0.6925]]) >>> >>> set_trans_dec = SetTransformerDecoder(5, 4, 4, 20, 1, 3) # define the layer Case 1: Input a single graph >>> set_trans_dec(g1, g1_node_feats) tensor([[-0.5538, 1.8726, -1.0470, 0.0276, -0.2994, -0.6317, 1.6754, -1.3189, 0.2291, 0.0461, -0.4042, 0.8387, -1.7091, 1.0845, 0.1902]], grad_fn=) Case 2: Input a batch of graphs Build a batch of DGL graphs and concatenate all graphs' node features into one tensor. >>> batch_g = dgl.batch([g1, g2]) >>> batch_f = th.cat([g1_node_feats, g2_node_feats]) >>> >>> set_trans_dec(batch_g, batch_f) tensor([[-0.5538, 1.8726, -1.0470, 0.0276, -0.2994, -0.6317, 1.6754, -1.3189, 0.2291, 0.0461, -0.4042, 0.8387, -1.7091, 1.0845, 0.1902], [-0.5511, 1.8869, -1.0156, 0.0028, -0.3231, -0.6305, 1.6845, -1.3105, 0.2136, 0.0428, -0.3820, 0.8043, -1.7138, 1.1126, 0.1789]], grad_fn=) See Also -------- SetTransformerEncoder """ def __init__( self, d_model, num_heads, d_head, d_ff, n_layers, k, dropouth=0.0, dropouta=0.0, ): super(SetTransformerDecoder, self).__init__() self.n_layers = n_layers self.k = k self.d_model = d_model self.pma = PMALayer( k, d_model, num_heads, d_head, d_ff, dropouth=dropouth, dropouta=dropouta, ) layers = [] for _ in range(n_layers): layers.append( SetAttentionBlock( d_model, num_heads, d_head, d_ff, dropouth=dropouth, dropouta=dropouta, ) ) self.layers = nn.ModuleList(layers) def forward(self, graph, feat): """ Compute the decoder part of Set Transformer. Parameters ---------- graph : DGLGraph The input graph. feat : torch.Tensor The input feature with shape :math:`(N, D)`, where :math:`N` is the number of nodes in the graph, and :math:`D` means the size of features. Returns ------- torch.Tensor The output feature with shape :math:`(B, D)`, where :math:`B` refers to the batch size. """ len_pma = graph.batch_num_nodes() len_sab = [self.k] * graph.batch_size feat = self.pma(feat, len_pma) for layer in self.layers: feat = layer(feat, len_sab) return feat.view(graph.batch_size, self.k * self.d_model) class WeightAndSum(nn.Module): """Compute importance weights for atoms and perform a weighted sum. Parameters ---------- in_feats : int Input atom feature size Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch as th >>> from dgl.nn import WeightAndSum >>> >>> g1 = dgl.rand_graph(3, 4) # g1 is a random graph with 3 nodes and 4 edges >>> g1_node_feats = th.rand(3, 5) # feature size is 5 >>> g1_node_feats tensor([[0.8948, 0.0699, 0.9137, 0.7567, 0.3637], [0.8137, 0.8938, 0.8377, 0.4249, 0.6118], [0.5197, 0.9030, 0.6825, 0.5725, 0.4755]]) >>> >>> g2 = dgl.rand_graph(4, 6) # g2 is a random graph with 4 nodes and 6 edges >>> g2_node_feats = th.rand(4, 5) # feature size is 5 >>> g2_node_feats tensor([[0.2053, 0.2426, 0.4111, 0.9028, 0.5658], [0.5278, 0.6365, 0.9990, 0.2351, 0.8945], [0.3134, 0.0580, 0.4349, 0.7949, 0.3891], [0.0142, 0.2709, 0.3330, 0.8521, 0.6925]]) >>> >>> weight_and_sum = WeightAndSum(5) # create a weight and sum layer(in_feats=16) Case 1: Input a single graph >>> weight_and_sum(g1, g1_node_feats) tensor([[1.2194, 0.9490, 1.3235, 0.9609, 0.7710]], grad_fn=) Case 2: Input a batch of graphs Build a batch of DGL graphs and concatenate all graphs' node features into one tensor. >>> batch_g = dgl.batch([g1, g2]) >>> batch_f = th.cat([g1_node_feats, g2_node_feats]) >>> >>> weight_and_sum(batch_g, batch_f) tensor([[1.2194, 0.9490, 1.3235, 0.9609, 0.7710], [0.5322, 0.5840, 1.0729, 1.3665, 1.2360]], grad_fn=) Notes ----- WeightAndSum module was commonly used in molecular property prediction networks, see the GCN predictor in `dgl-lifesci `__ to understand how to use WeightAndSum layer to get the graph readout output. """ def __init__(self, in_feats): super(WeightAndSum, self).__init__() self.in_feats = in_feats self.atom_weighting = nn.Sequential( nn.Linear(in_feats, 1), nn.Sigmoid() ) def forward(self, g, feats): """Compute molecule representations out of atom representations Parameters ---------- g : DGLGraph DGLGraph with batch size B for processing multiple molecules in parallel feats : FloatTensor of shape (N, self.in_feats) Representations for all atoms in the molecules * N is the total number of atoms in all molecules Returns ------- FloatTensor of shape (B, self.in_feats) Representations for B molecules """ with g.local_scope(): g.ndata["h"] = feats g.ndata["w"] = self.atom_weighting(g.ndata["h"]) h_g_sum = sum_nodes(g, "h", "w") return h_g_sum ================================================ FILE: python/dgl/nn/pytorch/gt/__init__.py ================================================ """Torch modules for Graph Transformer.""" from .biased_mha import BiasedMHA from .degree_encoder import DegreeEncoder from .egt import EGTLayer from .graphormer import GraphormerLayer from .lap_pos_encoder import LapPosEncoder from .path_encoder import PathEncoder from .spatial_encoder import SpatialEncoder, SpatialEncoder3d ================================================ FILE: python/dgl/nn/pytorch/gt/biased_mha.py ================================================ """Biased Multi-head Attention""" import torch as th import torch.nn as nn import torch.nn.functional as F class BiasedMHA(nn.Module): r"""Dense Multi-Head Attention Module with Graph Attention Bias. Compute attention between nodes with attention bias obtained from graph structures, as introduced in `Do Transformers Really Perform Bad for Graph Representation? `__ .. math:: \text{Attn}=\text{softmax}(\dfrac{QK^T}{\sqrt{d}} \circ b) :math:`Q` and :math:`K` are feature representations of nodes. :math:`d` is the corresponding :attr:`feat_size`. :math:`b` is attention bias, which can be additive or multiplicative according to the operator :math:`\circ`. Parameters ---------- feat_size : int Feature size. num_heads : int Number of attention heads, by which :attr:`feat_size` is divisible. bias : bool, optional If True, it uses bias for linear projection. Default: True. attn_bias_type : str, optional The type of attention bias used for modifying attention. Selected from 'add' or 'mul'. Default: 'add'. * 'add' is for additive attention bias. * 'mul' is for multiplicative attention bias. attn_drop : float, optional Dropout probability on attention weights. Defalt: 0.1. Examples -------- >>> import torch as th >>> from dgl.nn import BiasedMHA >>> ndata = th.rand(16, 100, 512) >>> bias = th.rand(16, 100, 100, 8) >>> net = BiasedMHA(feat_size=512, num_heads=8) >>> out = net(ndata, bias) """ def __init__( self, feat_size, num_heads, bias=True, attn_bias_type="add", attn_drop=0.1, ): super().__init__() self.feat_size = feat_size self.num_heads = num_heads self.head_dim = feat_size // num_heads assert ( self.head_dim * num_heads == feat_size ), "feat_size must be divisible by num_heads" self.scaling = self.head_dim**-0.5 self.attn_bias_type = attn_bias_type self.q_proj = nn.Linear(feat_size, feat_size, bias=bias) self.k_proj = nn.Linear(feat_size, feat_size, bias=bias) self.v_proj = nn.Linear(feat_size, feat_size, bias=bias) self.out_proj = nn.Linear(feat_size, feat_size, bias=bias) self.dropout = nn.Dropout(p=attn_drop) self.reset_parameters() def reset_parameters(self): """ Initialize parameters of projection matrices, the same settings as in the original implementation of the paper. """ nn.init.xavier_uniform_(self.q_proj.weight, gain=2**-0.5) nn.init.xavier_uniform_(self.k_proj.weight, gain=2**-0.5) nn.init.xavier_uniform_(self.v_proj.weight, gain=2**-0.5) nn.init.xavier_uniform_(self.out_proj.weight) if self.out_proj.bias is not None: nn.init.constant_(self.out_proj.bias, 0.0) def forward(self, ndata, attn_bias=None, attn_mask=None): """Forward computation. Parameters ---------- ndata : torch.Tensor A 3D input tensor. Shape: (batch_size, N, :attr:`feat_size`), where N is the maximum number of nodes. attn_bias : torch.Tensor, optional The attention bias used for attention modification. Shape: (batch_size, N, N, :attr:`num_heads`). attn_mask : torch.Tensor, optional The attention mask used for avoiding computation on invalid positions, where invalid positions are indicated by `True` values. Shape: (batch_size, N, N). Note: For rows corresponding to unexisting nodes, make sure at least one entry is set to `False` to prevent obtaining NaNs with softmax. Returns ------- y : torch.Tensor The output tensor. Shape: (batch_size, N, :attr:`feat_size`) """ q_h = self.q_proj(ndata).transpose(0, 1) k_h = self.k_proj(ndata).transpose(0, 1) v_h = self.v_proj(ndata).transpose(0, 1) bsz, N, _ = ndata.shape q_h = ( q_h.reshape(N, bsz * self.num_heads, self.head_dim).transpose(0, 1) * self.scaling ) k_h = k_h.reshape(N, bsz * self.num_heads, self.head_dim).permute( 1, 2, 0 ) v_h = v_h.reshape(N, bsz * self.num_heads, self.head_dim).transpose( 0, 1 ) attn_weights = ( th.bmm(q_h, k_h) .transpose(0, 2) .reshape(N, N, bsz, self.num_heads) .transpose(0, 2) ) if attn_bias is not None: if self.attn_bias_type == "add": attn_weights += attn_bias else: attn_weights *= attn_bias if attn_mask is not None: attn_weights[attn_mask.to(th.bool)] = float("-inf") attn_weights = F.softmax( attn_weights.transpose(0, 2) .reshape(N, N, bsz * self.num_heads) .transpose(0, 2), dim=2, ) attn_weights = self.dropout(attn_weights) attn = th.bmm(attn_weights, v_h).transpose(0, 1) attn = self.out_proj( attn.reshape(N, bsz, self.feat_size).transpose(0, 1) ) return attn ================================================ FILE: python/dgl/nn/pytorch/gt/degree_encoder.py ================================================ """Degree Encoder""" import torch as th import torch.nn as nn class DegreeEncoder(nn.Module): r"""Degree Encoder, as introduced in `Do Transformers Really Perform Bad for Graph Representation? `__ This module is a learnable degree embedding module. Parameters ---------- max_degree : int Upper bound of degrees to be encoded. Each degree will be clamped into the range [0, ``max_degree``]. embedding_dim : int Output dimension of embedding vectors. direction : str, optional Degrees of which direction to be encoded, selected from ``in``, ``out`` and ``both``. ``both`` encodes degrees from both directions and output the addition of them. Default : ``both``. Example ------- >>> import dgl >>> from dgl.nn import DegreeEncoder >>> import torch as th >>> from torch.nn.utils.rnn import pad_sequence >>> g1 = dgl.graph(([0,0,0,1,1,2,3,3], [1,2,3,0,3,0,0,1])) >>> g2 = dgl.graph(([0,1], [1,0])) >>> in_degree = pad_sequence([g1.in_degrees(), g2.in_degrees()], batch_first=True) >>> out_degree = pad_sequence([g1.out_degrees(), g2.out_degrees()], batch_first=True) >>> print(in_degree.shape) torch.Size([2, 4]) >>> degree_encoder = DegreeEncoder(5, 16) >>> degree_embedding = degree_encoder(th.stack((in_degree, out_degree))) >>> print(degree_embedding.shape) torch.Size([2, 4, 16]) """ def __init__(self, max_degree, embedding_dim, direction="both"): super(DegreeEncoder, self).__init__() self.direction = direction if direction == "both": self.encoder1 = nn.Embedding( max_degree + 1, embedding_dim, padding_idx=0 ) self.encoder2 = nn.Embedding( max_degree + 1, embedding_dim, padding_idx=0 ) else: self.encoder = nn.Embedding( max_degree + 1, embedding_dim, padding_idx=0 ) self.max_degree = max_degree def forward(self, degrees): """ Parameters ---------- degrees : Tensor If :attr:`direction` is ``both``, it should be stacked in degrees and out degrees of the batched graph with zero padding, a tensor of shape :math:`(2, B, N)`. Otherwise, it should be zero-padded in degrees or out degrees of the batched graph, a tensor of shape :math:`(B, N)`, where :math:`B` is the batch size of the batched graph, and :math:`N` is the maximum number of nodes. Returns ------- Tensor Return degree embedding vectors of shape :math:`(B, N, d)`, where :math:`d` is :attr:`embedding_dim`. """ degrees = th.clamp(degrees, min=0, max=self.max_degree) if self.direction == "in": assert len(degrees.shape) == 2 degree_embedding = self.encoder(degrees) elif self.direction == "out": assert len(degrees.shape) == 2 degree_embedding = self.encoder(degrees) elif self.direction == "both": assert len(degrees.shape) == 3 and degrees.shape[0] == 2 degree_embedding = self.encoder1(degrees[0]) + self.encoder2( degrees[1] ) else: raise ValueError( f'Supported direction options: "in", "out" and "both", ' f"but got {self.direction}" ) return degree_embedding ================================================ FILE: python/dgl/nn/pytorch/gt/egt.py ================================================ """EGT Layer""" import torch import torch.nn as nn import torch.nn.functional as F class EGTLayer(nn.Module): r"""EGTLayer for Edge-augmented Graph Transformer (EGT), as introduced in `Global Self-Attention as a Replacement for Graph Convolution Reference ``_ Parameters ---------- feat_size : int Node feature size. edge_feat_size : int Edge feature size. num_heads : int Number of attention heads, by which :attr: `feat_size` is divisible. num_virtual_nodes : int Number of virtual nodes. dropout : float, optional Dropout probability. Default: 0.0. attn_dropout : float, optional Attention dropout probability. Default: 0.0. activation : callable activation layer, optional Activation function. Default: nn.ELU(). edge_update : bool, optional Whether to update the edge embedding. Default: True. Examples -------- >>> import torch as th >>> from dgl.nn import EGTLayer >>> batch_size = 16 >>> num_nodes = 100 >>> feat_size, edge_feat_size = 128, 32 >>> nfeat = th.rand(batch_size, num_nodes, feat_size) >>> efeat = th.rand(batch_size, num_nodes, num_nodes, edge_feat_size) >>> net = EGTLayer( feat_size=feat_size, edge_feat_size=edge_feat_size, num_heads=8, num_virtual_nodes=4, ) >>> out = net(nfeat, efeat) """ def __init__( self, feat_size, edge_feat_size, num_heads, num_virtual_nodes, dropout=0, attn_dropout=0, activation=nn.ELU(), edge_update=True, ): super().__init__() self.num_heads = num_heads self.num_virtual_nodes = num_virtual_nodes self.edge_update = edge_update assert ( feat_size % num_heads == 0 ), "feat_size must be divisible by num_heads" self.dot_dim = feat_size // num_heads self.mha_ln_h = nn.LayerNorm(feat_size) self.mha_ln_e = nn.LayerNorm(edge_feat_size) self.edge_input = nn.Linear(edge_feat_size, num_heads) self.qkv_proj = nn.Linear(feat_size, feat_size * 3) self.gate = nn.Linear(edge_feat_size, num_heads) self.attn_dropout = nn.Dropout(attn_dropout) self.node_output = nn.Linear(feat_size, feat_size) self.mha_dropout_h = nn.Dropout(dropout) self.node_ffn = nn.Sequential( nn.LayerNorm(feat_size), nn.Linear(feat_size, feat_size), activation, nn.Linear(feat_size, feat_size), nn.Dropout(dropout), ) if self.edge_update: self.edge_output = nn.Linear(num_heads, edge_feat_size) self.mha_dropout_e = nn.Dropout(dropout) self.edge_ffn = nn.Sequential( nn.LayerNorm(edge_feat_size), nn.Linear(edge_feat_size, edge_feat_size), activation, nn.Linear(edge_feat_size, edge_feat_size), nn.Dropout(dropout), ) def forward(self, nfeat, efeat, mask=None): """Forward computation. Note: :attr:`nfeat` and :attr:`efeat` should be padded with embedding of virtual nodes if :attr:`num_virtual_nodes` > 0, while :attr:`mask` should be padded with `0` values for virtual nodes. The padding should be put at the beginning. Parameters ---------- nfeat : torch.Tensor A 3D input tensor. Shape: (batch_size, N, :attr:`feat_size`), where N is the sum of the maximum number of nodes and the number of virtual nodes. efeat : torch.Tensor Edge embedding used for attention computation and self update. Shape: (batch_size, N, N, :attr:`edge_feat_size`). mask : torch.Tensor, optional The attention mask used for avoiding computation on invalid positions, where valid positions are indicated by `0` and invalid positions are indicated by `-inf`. Shape: (batch_size, N, N). Default: None. Returns ------- nfeat : torch.Tensor The output node embedding. Shape: (batch_size, N, :attr:`feat_size`). efeat : torch.Tensor, optional The output edge embedding. Shape: (batch_size, N, N, :attr:`edge_feat_size`). It is returned only if :attr:`edge_update` is True. """ nfeat_r1 = nfeat efeat_r1 = efeat nfeat_ln = self.mha_ln_h(nfeat) efeat_ln = self.mha_ln_e(efeat) qkv = self.qkv_proj(nfeat_ln) e_bias = self.edge_input(efeat_ln) gates = self.gate(efeat_ln) bsz, N, _ = qkv.shape q_h, k_h, v_h = qkv.view(bsz, N, -1, self.num_heads).split( self.dot_dim, dim=2 ) attn_hat = torch.einsum("bldh,bmdh->blmh", q_h, k_h) attn_hat = attn_hat.clamp(-5, 5) + e_bias if mask is None: gates = torch.sigmoid(gates) attn_tild = F.softmax(attn_hat, dim=2) * gates else: gates = torch.sigmoid(gates + mask.unsqueeze(-1)) attn_tild = F.softmax(attn_hat + mask.unsqueeze(-1), dim=2) * gates attn_tild = self.attn_dropout(attn_tild) v_attn = torch.einsum("blmh,bmkh->blkh", attn_tild, v_h) # Scale the aggregated values by degree. degrees = torch.sum(gates, dim=2, keepdim=True) degree_scalers = torch.log(1 + degrees) degree_scalers[:, : self.num_virtual_nodes] = 1.0 v_attn = v_attn * degree_scalers v_attn = v_attn.reshape(bsz, N, self.num_heads * self.dot_dim) nfeat = self.node_output(v_attn) nfeat = self.mha_dropout_h(nfeat) nfeat.add_(nfeat_r1) nfeat_r2 = nfeat nfeat = self.node_ffn(nfeat) nfeat.add_(nfeat_r2) if self.edge_update: efeat = self.edge_output(attn_hat) efeat = self.mha_dropout_e(efeat) efeat.add_(efeat_r1) efeat_r2 = efeat efeat = self.edge_ffn(efeat) efeat.add_(efeat_r2) return nfeat, efeat return nfeat ================================================ FILE: python/dgl/nn/pytorch/gt/graphormer.py ================================================ """Graphormer Layer""" import torch.nn as nn from .biased_mha import BiasedMHA class GraphormerLayer(nn.Module): r"""Graphormer Layer with Dense Multi-Head Attention, as introduced in `Do Transformers Really Perform Bad for Graph Representation? `__ Parameters ---------- feat_size : int Feature size. hidden_size : int Hidden size of feedforward layers. num_heads : int Number of attention heads, by which :attr:`feat_size` is divisible. attn_bias_type : str, optional The type of attention bias used for modifying attention. Selected from 'add' or 'mul'. Default: 'add'. * 'add' is for additive attention bias. * 'mul' is for multiplicative attention bias. norm_first : bool, optional If True, it performs layer normalization before attention and feedforward operations. Otherwise, it applies layer normalization afterwards. Default: False. dropout : float, optional Dropout probability. Default: 0.1. attn_dropout : float, optional Attention dropout probability. Default: 0.1. activation : callable activation layer, optional Activation function. Default: nn.ReLU(). Examples -------- >>> import torch as th >>> from dgl.nn import GraphormerLayer >>> batch_size = 16 >>> num_nodes = 100 >>> feat_size = 512 >>> num_heads = 8 >>> nfeat = th.rand(batch_size, num_nodes, feat_size) >>> bias = th.rand(batch_size, num_nodes, num_nodes, num_heads) >>> net = GraphormerLayer( feat_size=feat_size, hidden_size=2048, num_heads=num_heads ) >>> out = net(nfeat, bias) """ def __init__( self, feat_size, hidden_size, num_heads, attn_bias_type="add", norm_first=False, dropout=0.1, attn_dropout=0.1, activation=nn.ReLU(), ): super().__init__() self.norm_first = norm_first self.attn = BiasedMHA( feat_size=feat_size, num_heads=num_heads, attn_bias_type=attn_bias_type, attn_drop=attn_dropout, ) self.ffn = nn.Sequential( nn.Linear(feat_size, hidden_size), activation, nn.Dropout(p=dropout), nn.Linear(hidden_size, feat_size), nn.Dropout(p=dropout), ) self.dropout = nn.Dropout(p=dropout) self.attn_layer_norm = nn.LayerNorm(feat_size) self.ffn_layer_norm = nn.LayerNorm(feat_size) def forward(self, nfeat, attn_bias=None, attn_mask=None): """Forward computation. Parameters ---------- nfeat : torch.Tensor A 3D input tensor. Shape: (batch_size, N, :attr:`feat_size`), where N is the maximum number of nodes. attn_bias : torch.Tensor, optional The attention bias used for attention modification. Shape: (batch_size, N, N, :attr:`num_heads`). attn_mask : torch.Tensor, optional The attention mask used for avoiding computation on invalid positions, where invalid positions are indicated by `True` values. Shape: (batch_size, N, N). Note: For rows corresponding to unexisting nodes, make sure at least one entry is set to `False` to prevent obtaining NaNs with softmax. Returns ------- y : torch.Tensor The output tensor. Shape: (batch_size, N, :attr:`feat_size`) """ residual = nfeat if self.norm_first: nfeat = self.attn_layer_norm(nfeat) nfeat = self.attn(nfeat, attn_bias, attn_mask) nfeat = self.dropout(nfeat) nfeat = residual + nfeat if not self.norm_first: nfeat = self.attn_layer_norm(nfeat) residual = nfeat if self.norm_first: nfeat = self.ffn_layer_norm(nfeat) nfeat = self.ffn(nfeat) nfeat = residual + nfeat if not self.norm_first: nfeat = self.ffn_layer_norm(nfeat) return nfeat ================================================ FILE: python/dgl/nn/pytorch/gt/lap_pos_encoder.py ================================================ """Laplacian Positional Encoder""" import torch as th import torch.nn as nn class LapPosEncoder(nn.Module): r"""Laplacian Positional Encoder (LPE), as introduced in `GraphGPS: General Powerful Scalable Graph Transformers `__ This module is a learned laplacian positional encoding module using Transformer or DeepSet. Parameters ---------- model_type : str Encoder model type for LPE, can only be "Transformer" or "DeepSet". num_layer : int Number of layers in Transformer/DeepSet Encoder. k : int Number of smallest non-trivial eigenvectors. dim : int Output size of final laplacian encoding. n_head : int, optional Number of heads in Transformer Encoder. Default : 1. batch_norm : bool, optional If True, apply batch normalization on raw laplacian positional encoding. Default : False. num_post_layer : int, optional If num_post_layer > 0, apply an MLP of ``num_post_layer`` layers after pooling. Default : 0. Example ------- >>> import dgl >>> from dgl import LapPE >>> from dgl.nn import LapPosEncoder >>> transform = LapPE(k=5, feat_name='eigvec', eigval_name='eigval', padding=True) >>> g = dgl.graph(([0,1,2,3,4,2,3,1,4,0], [2,3,1,4,0,0,1,2,3,4])) >>> g = transform(g) >>> eigvals, eigvecs = g.ndata['eigval'], g.ndata['eigvec'] >>> transformer_encoder = LapPosEncoder( model_type="Transformer", num_layer=3, k=5, dim=16, n_head=4 ) >>> pos_encoding = transformer_encoder(eigvals, eigvecs) >>> deepset_encoder = LapPosEncoder( model_type="DeepSet", num_layer=3, k=5, dim=16, num_post_layer=2 ) >>> pos_encoding = deepset_encoder(eigvals, eigvecs) """ def __init__( self, model_type, num_layer, k, dim, n_head=1, batch_norm=False, num_post_layer=0, ): super(LapPosEncoder, self).__init__() self.model_type = model_type self.linear = nn.Linear(2, dim) if self.model_type == "Transformer": encoder_layer = nn.TransformerEncoderLayer( d_model=dim, nhead=n_head, batch_first=True ) self.pe_encoder = nn.TransformerEncoder( encoder_layer, num_layers=num_layer ) elif self.model_type == "DeepSet": layers = [] if num_layer == 1: layers.append(nn.ReLU()) else: self.linear = nn.Linear(2, 2 * dim) layers.append(nn.ReLU()) for _ in range(num_layer - 2): layers.append(nn.Linear(2 * dim, 2 * dim)) layers.append(nn.ReLU()) layers.append(nn.Linear(2 * dim, dim)) layers.append(nn.ReLU()) self.pe_encoder = nn.Sequential(*layers) else: raise ValueError( f"model_type '{model_type}' is not allowed, must be " "'Transformer' or 'DeepSet'." ) if batch_norm: self.raw_norm = nn.BatchNorm1d(k) else: self.raw_norm = None if num_post_layer > 0: layers = [] if num_post_layer == 1: layers.append(nn.Linear(dim, dim)) layers.append(nn.ReLU()) else: layers.append(nn.Linear(dim, 2 * dim)) layers.append(nn.ReLU()) for _ in range(num_post_layer - 2): layers.append(nn.Linear(2 * dim, 2 * dim)) layers.append(nn.ReLU()) layers.append(nn.Linear(2 * dim, dim)) layers.append(nn.ReLU()) self.post_mlp = nn.Sequential(*layers) else: self.post_mlp = None def forward(self, eigvals, eigvecs): r""" Parameters ---------- eigvals : Tensor Laplacian Eigenvalues of shape :math:`(N, k)`, k different eigenvalues repeat N times, can be obtained by using `LaplacianPE`. eigvecs : Tensor Laplacian Eigenvectors of shape :math:`(N, k)`, can be obtained by using `LaplacianPE`. Returns ------- Tensor Return the laplacian positional encodings of shape :math:`(N, d)`, where :math:`N` is the number of nodes in the input graph, :math:`d` is :attr:`dim`. """ pos_encoding = th.cat( (eigvecs.unsqueeze(2), eigvals.unsqueeze(2)), dim=2 ).float() empty_mask = th.isnan(pos_encoding) pos_encoding[empty_mask] = 0 if self.raw_norm: pos_encoding = self.raw_norm(pos_encoding) pos_encoding = self.linear(pos_encoding) if self.model_type == "Transformer": pos_encoding = self.pe_encoder( src=pos_encoding, src_key_padding_mask=empty_mask[:, :, 1] ) else: pos_encoding = self.pe_encoder(pos_encoding) # Remove masked sequences. pos_encoding[empty_mask[:, :, 1]] = 0 # Sum pooling. pos_encoding = th.sum(pos_encoding, 1, keepdim=False) # MLP post pooling. if self.post_mlp: pos_encoding = self.post_mlp(pos_encoding) return pos_encoding ================================================ FILE: python/dgl/nn/pytorch/gt/path_encoder.py ================================================ """Path Encoder""" import torch as th import torch.nn as nn class PathEncoder(nn.Module): r"""Path Encoder, as introduced in Edge Encoding of `Do Transformers Really Perform Bad for Graph Representation? `__ This module is a learnable path embedding module and encodes the shortest path between each pair of nodes as attention bias. Parameters ---------- max_len : int Maximum number of edges in each path to be encoded. Exceeding part of each path will be truncated, i.e. truncating edges with serial number no less than :attr:`max_len`. feat_dim : int Dimension of edge features in the input graph. num_heads : int, optional Number of attention heads if multi-head attention mechanism is applied. Default : 1. Examples -------- >>> import torch as th >>> import dgl >>> from dgl.nn import PathEncoder >>> from dgl import shortest_dist >>> g = dgl.graph(([0,0,0,1,1,2,3,3], [1,2,3,0,3,0,0,1])) >>> edata = th.rand(8, 16) >>> # Since shortest_dist returns -1 for unreachable node pairs, >>> # edata[-1] should be filled with zero padding. >>> edata = th.cat( (edata, th.zeros(1, 16)), dim=0 ) >>> dist, path = shortest_dist(g, root=None, return_paths=True) >>> path_data = edata[path[:, :, :2]] >>> path_encoder = PathEncoder(2, 16, num_heads=8) >>> out = path_encoder(dist.unsqueeze(0), path_data.unsqueeze(0)) >>> print(out.shape) torch.Size([1, 4, 4, 8]) """ def __init__(self, max_len, feat_dim, num_heads=1): super().__init__() self.max_len = max_len self.feat_dim = feat_dim self.num_heads = num_heads self.embedding_table = nn.Embedding(max_len * num_heads, feat_dim) def forward(self, dist, path_data): """ Parameters ---------- dist : Tensor Shortest path distance matrix of the batched graph with zero padding, of shape :math:`(B, N, N)`, where :math:`B` is the batch size of the batched graph, and :math:`N` is the maximum number of nodes. path_data : Tensor Edge feature along the shortest path with zero padding, of shape :math:`(B, N, N, L, d)`, where :math:`L` is the maximum length of the shortest paths, and :math:`d` is :attr:`feat_dim`. Returns ------- torch.Tensor Return attention bias as path encoding, of shape :math:`(B, N, N, H)`, where :math:`B` is the batch size of the input graph, :math:`N` is the maximum number of nodes, and :math:`H` is :attr:`num_heads`. """ shortest_distance = th.clamp(dist, min=1, max=self.max_len) edge_embedding = self.embedding_table.weight.reshape( self.max_len, self.num_heads, -1 ) path_encoding = th.div( th.einsum("bxyld,lhd->bxyh", path_data, edge_embedding).permute( 3, 0, 1, 2 ), shortest_distance, ).permute(1, 2, 3, 0) return path_encoding ================================================ FILE: python/dgl/nn/pytorch/gt/spatial_encoder.py ================================================ """Spatial Encoder""" import torch as th import torch.nn as nn import torch.nn.functional as F def gaussian(x, mean, std): """compute gaussian basis kernel function""" const_pi = 3.14159 a = (2 * const_pi) ** 0.5 return th.exp(-0.5 * (((x - mean) / std) ** 2)) / (a * std) class SpatialEncoder(nn.Module): r"""Spatial Encoder, as introduced in `Do Transformers Really Perform Bad for Graph Representation? `__ This module is a learnable spatial embedding module, which encodes the shortest distance between each node pair for attention bias. Parameters ---------- max_dist : int Upper bound of the shortest path distance between each node pair to be encoded. All distance will be clamped into the range `[0, max_dist]`. num_heads : int, optional Number of attention heads if multi-head attention mechanism is applied. Default : 1. Examples -------- >>> import torch as th >>> import dgl >>> from dgl.nn import SpatialEncoder >>> from dgl import shortest_dist >>> g1 = dgl.graph(([0,0,0,1,1,2,3,3], [1,2,3,0,3,0,0,1])) >>> g2 = dgl.graph(([0,1], [1,0])) >>> n1, n2 = g1.num_nodes(), g2.num_nodes() >>> # use -1 padding since shortest_dist returns -1 for unreachable node pairs >>> dist = -th.ones((2, 4, 4), dtype=th.long) >>> dist[0, :n1, :n1] = shortest_dist(g1, root=None, return_paths=False) >>> dist[1, :n2, :n2] = shortest_dist(g2, root=None, return_paths=False) >>> spatial_encoder = SpatialEncoder(max_dist=2, num_heads=8) >>> out = spatial_encoder(dist) >>> print(out.shape) torch.Size([2, 4, 4, 8]) """ def __init__(self, max_dist, num_heads=1): super().__init__() self.max_dist = max_dist self.num_heads = num_heads # deactivate node pair between which the distance is -1 self.embedding_table = nn.Embedding( max_dist + 2, num_heads, padding_idx=0 ) def forward(self, dist): """ Parameters ---------- dist : Tensor Shortest path distance of the batched graph with -1 padding, a tensor of shape :math:`(B, N, N)`, where :math:`B` is the batch size of the batched graph, and :math:`N` is the maximum number of nodes. Returns ------- torch.Tensor Return attention bias as spatial encoding of shape :math:`(B, N, N, H)`, where :math:`H` is :attr:`num_heads`. """ spatial_encoding = self.embedding_table( th.clamp( dist, min=-1, max=self.max_dist, ) + 1 ) return spatial_encoding class SpatialEncoder3d(nn.Module): r"""3D Spatial Encoder, as introduced in `One Transformer Can Understand Both 2D & 3D Molecular Data `__ This module encodes pair-wise relation between node pair :math:`(i,j)` in the 3D geometric space, according to the Gaussian Basis Kernel function: :math:`\psi _{(i,j)} ^k = \frac{1}{\sqrt{2\pi} \lvert \sigma^k \rvert} \exp{\left ( -\frac{1}{2} \left( \frac{\gamma_{(i,j)} \lvert \lvert r_i - r_j \rvert \rvert + \beta_{(i,j)} - \mu^k}{\lvert \sigma^k \rvert} \right) ^2 \right)},k=1,...,K,` where :math:`K` is the number of Gaussian Basis kernels. :math:`r_i` is the Cartesian coordinate of node :math:`i`. :math:`\gamma_{(i,j)}, \beta_{(i,j)}` are learnable scaling factors and biases determined by node types. :math:`\mu^k, \sigma^k` are learnable centers and standard deviations of the Gaussian Basis kernels. Parameters ---------- num_kernels : int Number of Gaussian Basis Kernels to be applied. Each Gaussian Basis Kernel contains a learnable kernel center and a learnable standard deviation. num_heads : int, optional Number of attention heads if multi-head attention mechanism is applied. Default : 1. max_node_type : int, optional Maximum number of node types. Each node type has a corresponding learnable scaling factor and a bias. Default : 100. Examples -------- >>> import torch as th >>> import dgl >>> from dgl.nn import SpatialEncoder3d >>> coordinate = th.rand(1, 4, 3) >>> node_type = th.tensor([[1, 0, 2, 1]]) >>> spatial_encoder = SpatialEncoder3d(num_kernels=4, ... num_heads=8, ... max_node_type=3) >>> out = spatial_encoder(coordinate, node_type=node_type) >>> print(out.shape) torch.Size([1, 4, 4, 8]) """ def __init__(self, num_kernels, num_heads=1, max_node_type=100): super().__init__() self.num_kernels = num_kernels self.num_heads = num_heads self.max_node_type = max_node_type self.means = nn.Parameter(th.empty(num_kernels)) self.stds = nn.Parameter(th.empty(num_kernels)) self.linear_layer_1 = nn.Linear(num_kernels, num_kernels) self.linear_layer_2 = nn.Linear(num_kernels, num_heads) # There are 2 * max_node_type + 3 pairs of gamma and beta parameters: # 1. Parameters at position 0 are for default gamma/beta when no node # type is given # 2. Parameters at position 1 to max_node_type+1 are for src node types. # (position 1 is for padded unexisting nodes) # 3. Parameters at position max_node_type+2 to 2*max_node_type+2 are # for tgt node types. (position max_node_type+2 is for padded) # unexisting nodes) self.gamma = nn.Embedding(2 * max_node_type + 3, 1, padding_idx=0) self.beta = nn.Embedding(2 * max_node_type + 3, 1, padding_idx=0) nn.init.uniform_(self.means, 0, 3) nn.init.uniform_(self.stds, 0, 3) nn.init.constant_(self.gamma.weight, 1) nn.init.constant_(self.beta.weight, 0) def forward(self, coord, node_type=None): """ Parameters ---------- coord : torch.Tensor 3D coordinates of nodes in shape :math:`(B, N, 3)`, where :math:`B` is the batch size, :math:`N`: is the maximum number of nodes. node_type : torch.Tensor, optional Node type ids of nodes. Default : None. * If specified, :attr:`node_type` should be a tensor in shape :math:`(B, N,)`. The scaling factors in gaussian kernels of each pair of nodes are determined by their node types. * Otherwise, :attr:`node_type` will be set to zeros of the same shape by default. Returns ------- torch.Tensor Return attention bias as 3D spatial encoding of shape :math:`(B, N, N, H)`, where :math:`H` is :attr:`num_heads`. """ bsz, N = coord.shape[:2] euc_dist = th.cdist(coord, coord, p=2.0) # shape: [B, n, n] if node_type is None: node_type = th.zeros([bsz, N, N, 2], device=coord.device).long() else: src_node_type = node_type.unsqueeze(-1).repeat(1, 1, N) tgt_node_type = node_type.unsqueeze(1).repeat(1, N, 1) node_type = th.stack( [src_node_type + 2, tgt_node_type + self.max_node_type + 3], dim=-1, ) # shape: [B, n, n, 2] # scaled euclidean distance gamma = self.gamma(node_type).sum(dim=-2) # shape: [B, n, n, 1] beta = self.beta(node_type).sum(dim=-2) # shape: [B, n, n, 1] euc_dist = gamma * euc_dist.unsqueeze(-1) + beta # shape: [B, n, n, 1] # gaussian basis kernel euc_dist = euc_dist.expand(-1, -1, -1, self.num_kernels) gaussian_kernel = gaussian( euc_dist, self.means, self.stds.abs() + 1e-2 ) # shape: [B, n, n, K] # linear projection encoding = self.linear_layer_1(gaussian_kernel) encoding = F.gelu(encoding) encoding = self.linear_layer_2(encoding) # shape: [B, n, n, H] return encoding ================================================ FILE: python/dgl/nn/pytorch/hetero.py ================================================ """Heterograph NN modules""" from functools import partial import torch as th import torch.nn as nn from ...base import DGLError __all__ = ["HeteroGraphConv", "HeteroLinear", "HeteroEmbedding"] class HeteroGraphConv(nn.Module): r"""A generic module for computing convolution on heterogeneous graphs. The heterograph convolution applies sub-modules on their associating relation graphs, which reads the features from source nodes and writes the updated ones to destination nodes. If multiple relations have the same destination node types, their results are aggregated by the specified method. If the relation graph has no edge, the corresponding module will not be called. Pseudo-code: .. code:: outputs = {nty : [] for nty in g.dsttypes} # Apply sub-modules on their associating relation graphs in parallel for relation in g.canonical_etypes: stype, etype, dtype = relation dstdata = relation_submodule(g[relation], ...) outputs[dtype].append(dstdata) # Aggregate the results for each destination node type rsts = {} for ntype, ntype_outputs in outputs.items(): if len(ntype_outputs) != 0: rsts[ntype] = aggregate(ntype_outputs) return rsts Examples -------- Create a heterograph with three types of relations and nodes. >>> import dgl >>> g = dgl.heterograph({ ... ('user', 'follows', 'user') : edges1, ... ('user', 'plays', 'game') : edges2, ... ('store', 'sells', 'game') : edges3}) Create a ``HeteroGraphConv`` that applies different convolution modules to different relations. Note that the modules for ``'follows'`` and ``'plays'`` do not share weights. >>> import dgl.nn.pytorch as dglnn >>> conv = dglnn.HeteroGraphConv({ ... 'follows' : dglnn.GraphConv(...), ... 'plays' : dglnn.GraphConv(...), ... 'sells' : dglnn.SAGEConv(...)}, ... aggregate='sum') Call forward with some ``'user'`` features. This computes new features for both ``'user'`` and ``'game'`` nodes. >>> import torch as th >>> h1 = {'user' : th.randn((g.num_nodes('user'), 5))} >>> h2 = conv(g, h1) >>> print(h2.keys()) dict_keys(['user', 'game']) Call forward with both ``'user'`` and ``'store'`` features. Because both the ``'plays'`` and ``'sells'`` relations will update the ``'game'`` features, their results are aggregated by the specified method (i.e., summation here). >>> f1 = {'user' : ..., 'store' : ...} >>> f2 = conv(g, f1) >>> print(f2.keys()) dict_keys(['user', 'game']) Call forward with some ``'store'`` features. This only computes new features for ``'game'`` nodes. >>> g1 = {'store' : ...} >>> g2 = conv(g, g1) >>> print(g2.keys()) dict_keys(['game']) Call forward with a pair of inputs is allowed and each submodule will also be invoked with a pair of inputs. >>> x_src = {'user' : ..., 'store' : ...} >>> x_dst = {'user' : ..., 'game' : ...} >>> y_dst = conv(g, (x_src, x_dst)) >>> print(y_dst.keys()) dict_keys(['user', 'game']) Parameters ---------- mods : dict[str, nn.Module] Modules associated with every edge types. The forward function of each module must have a `DGLGraph` object as the first argument, and its second argument is either a tensor object representing the node features or a pair of tensor object representing the source and destination node features. aggregate : str, callable, optional Method for aggregating node features generated by different relations. Allowed string values are 'sum', 'max', 'min', 'mean', 'stack'. The 'stack' aggregation is performed along the second dimension, whose order is deterministic. User can also customize the aggregator by providing a callable instance. For example, aggregation by summation is equivalent to the follows: .. code:: def my_agg_func(tensors, dsttype): # tensors: is a list of tensors to aggregate # dsttype: string name of the destination node type for which the # aggregation is performed stacked = torch.stack(tensors, dim=0) return torch.sum(stacked, dim=0) Attributes ---------- mods : dict[str, nn.Module] Modules associated with every edge types. """ def __init__(self, mods, aggregate="sum"): super(HeteroGraphConv, self).__init__() self.mod_dict = mods mods = {str(k): v for k, v in mods.items()} # Register as child modules self.mods = nn.ModuleDict(mods) # PyTorch ModuleDict doesn't have get() method, so I have to store two # dictionaries so that I can index with both canonical edge type and # edge type with the get() method. # Do not break if graph has 0-in-degree nodes. # Because there is no general rule to add self-loop for heterograph. for _, v in self.mods.items(): set_allow_zero_in_degree_fn = getattr( v, "set_allow_zero_in_degree", None ) if callable(set_allow_zero_in_degree_fn): set_allow_zero_in_degree_fn(True) if isinstance(aggregate, str): self.agg_fn = get_aggregate_fn(aggregate) else: self.agg_fn = aggregate def _get_module(self, etype): mod = self.mod_dict.get(etype, None) if mod is not None: return mod if isinstance(etype, tuple): # etype is canonical _, etype, _ = etype return self.mod_dict[etype] raise KeyError("Cannot find module with edge type %s" % etype) def forward(self, g, inputs, mod_args=None, mod_kwargs=None): """Forward computation Invoke the forward function with each module and aggregate their results. Parameters ---------- g : DGLGraph Graph data. inputs : dict[str, Tensor] or pair of dict[str, Tensor] Input node features. mod_args : dict[str, tuple[any]], optional Extra positional arguments for the sub-modules. mod_kwargs : dict[str, dict[str, any]], optional Extra key-word arguments for the sub-modules. Returns ------- dict[str, Tensor] Output representations for every types of nodes. """ if mod_args is None: mod_args = {} if mod_kwargs is None: mod_kwargs = {} outputs = {nty: [] for nty in g.dsttypes} if isinstance(inputs, tuple) or g.is_block: if isinstance(inputs, tuple): src_inputs, dst_inputs = inputs else: src_inputs = inputs dst_inputs = { k: v[: g.number_of_dst_nodes(k)] for k, v in inputs.items() } for stype, etype, dtype in g.canonical_etypes: rel_graph = g[stype, etype, dtype] if stype not in src_inputs or dtype not in dst_inputs: continue dstdata = self._get_module((stype, etype, dtype))( rel_graph, (src_inputs[stype], dst_inputs[dtype]), *mod_args.get(etype, ()), **mod_kwargs.get(etype, {}) ) outputs[dtype].append(dstdata) else: for stype, etype, dtype in g.canonical_etypes: rel_graph = g[stype, etype, dtype] if stype not in inputs: continue dstdata = self._get_module((stype, etype, dtype))( rel_graph, (inputs[stype], inputs[dtype]), *mod_args.get(etype, ()), **mod_kwargs.get(etype, {}) ) outputs[dtype].append(dstdata) rsts = {} for nty, alist in outputs.items(): if len(alist) != 0: rsts[nty] = self.agg_fn(alist, nty) return rsts def _max_reduce_func(inputs, dim): return th.max(inputs, dim=dim)[0] def _min_reduce_func(inputs, dim): return th.min(inputs, dim=dim)[0] def _sum_reduce_func(inputs, dim): return th.sum(inputs, dim=dim) def _mean_reduce_func(inputs, dim): return th.mean(inputs, dim=dim) def _stack_agg_func(inputs, dsttype): # pylint: disable=unused-argument if len(inputs) == 0: return None return th.stack(inputs, dim=1) def _agg_func(inputs, dsttype, fn): # pylint: disable=unused-argument if len(inputs) == 0: return None stacked = th.stack(inputs, dim=0) return fn(stacked, dim=0) def get_aggregate_fn(agg): """Internal function to get the aggregation function for node data generated from different relations. Parameters ---------- agg : str Method for aggregating node features generated by different relations. Allowed values are 'sum', 'max', 'min', 'mean', 'stack'. Returns ------- callable Aggregator function that takes a list of tensors to aggregate and returns one aggregated tensor. """ if agg == "sum": fn = _sum_reduce_func elif agg == "max": fn = _max_reduce_func elif agg == "min": fn = _min_reduce_func elif agg == "mean": fn = _mean_reduce_func elif agg == "stack": fn = None # will not be called else: raise DGLError( "Invalid cross type aggregator. Must be one of " '"sum", "max", "min", "mean" or "stack". But got "%s"' % agg ) if agg == "stack": return _stack_agg_func else: return partial(_agg_func, fn=fn) class HeteroLinear(nn.Module): """Apply linear transformations on heterogeneous inputs. Parameters ---------- in_size : dict[key, int] Input feature size for heterogeneous inputs. A key can be a string or a tuple of strings. out_size : int Output feature size. bias : bool, optional If True, learns a bias term. Defaults: ``True``. Examples -------- >>> import dgl >>> import torch >>> from dgl.nn import HeteroLinear >>> layer = HeteroLinear({'user': 1, ('user', 'follows', 'user'): 2}, 3) >>> in_feats = {'user': torch.randn(2, 1), ('user', 'follows', 'user'): torch.randn(3, 2)} >>> out_feats = layer(in_feats) >>> print(out_feats['user'].shape) torch.Size([2, 3]) >>> print(out_feats[('user', 'follows', 'user')].shape) torch.Size([3, 3]) """ def __init__(self, in_size, out_size, bias=True): super(HeteroLinear, self).__init__() self.linears = nn.ModuleDict() for typ, typ_in_size in in_size.items(): self.linears[str(typ)] = nn.Linear(typ_in_size, out_size, bias=bias) def forward(self, feat): """Forward function Parameters ---------- feat : dict[key, Tensor] Heterogeneous input features. It maps keys to features. Returns ------- dict[key, Tensor] Transformed features. """ out_feat = dict() for typ, typ_feat in feat.items(): out_feat[typ] = self.linears[str(typ)](typ_feat) return out_feat class HeteroEmbedding(nn.Module): """Create a heterogeneous embedding table. It internally contains multiple ``torch.nn.Embedding`` with different dictionary sizes. Parameters ---------- num_embeddings : dict[key, int] Size of the dictionaries. A key can be a string or a tuple of strings. embedding_dim : int Size of each embedding vector. Examples -------- >>> import dgl >>> import torch >>> from dgl.nn import HeteroEmbedding >>> layer = HeteroEmbedding({'user': 2, ('user', 'follows', 'user'): 3}, 4) >>> # Get the heterogeneous embedding table >>> embeds = layer.weight >>> print(embeds['user'].shape) torch.Size([2, 4]) >>> print(embeds[('user', 'follows', 'user')].shape) torch.Size([3, 4]) >>> # Get the embeddings for a subset >>> input_ids = {'user': torch.LongTensor([0]), ... ('user', 'follows', 'user'): torch.LongTensor([0, 2])} >>> embeds = layer(input_ids) >>> print(embeds['user'].shape) torch.Size([1, 4]) >>> print(embeds[('user', 'follows', 'user')].shape) torch.Size([2, 4]) """ def __init__(self, num_embeddings, embedding_dim): super(HeteroEmbedding, self).__init__() self.embeds = nn.ModuleDict() self.raw_keys = dict() for typ, typ_num_rows in num_embeddings.items(): self.embeds[str(typ)] = nn.Embedding(typ_num_rows, embedding_dim) self.raw_keys[str(typ)] = typ @property def weight(self): """Get the heterogeneous embedding table Returns ------- dict[key, Tensor] Heterogeneous embedding table """ return { self.raw_keys[typ]: emb.weight for typ, emb in self.embeds.items() } def reset_parameters(self): """ Use the xavier method in nn.init module to make the parameters uniformly distributed """ for typ in self.embeds.keys(): nn.init.xavier_uniform_(self.embeds[typ].weight) def forward(self, input_ids): """Forward function Parameters ---------- input_ids : dict[key, Tensor] The row IDs to retrieve embeddings. It maps a key to key-specific IDs. Returns ------- dict[key, Tensor] The retrieved embeddings. """ embeds = dict() for typ, typ_ids in input_ids.items(): embeds[typ] = self.embeds[str(typ)](typ_ids) return embeds ================================================ FILE: python/dgl/nn/pytorch/linear.py ================================================ """Various commonly used linear modules""" # pylint: disable= no-member, arguments-differ, invalid-name, W0235 import math import torch import torch.nn as nn from ...ops import gather_mm, segment_mm __all__ = ["TypedLinear"] class TypedLinear(nn.Module): r"""Linear transformation according to types. For each sample of the input batch :math:`x \in X`, apply linear transformation :math:`xW_t`, where :math:`t` is the type of :math:`x`. The module supports two regularization methods (basis-decomposition and block-diagonal-decomposition) proposed by "`Modeling Relational Data with Graph Convolutional Networks `__" The basis regularization decomposes :math:`W_t` by: .. math:: W_t^{(l)} = \sum_{b=1}^B a_{tb}^{(l)}V_b^{(l)} where :math:`B` is the number of bases, :math:`V_b^{(l)}` are linearly combined with coefficients :math:`a_{tb}^{(l)}`. The block-diagonal-decomposition regularization decomposes :math:`W_t` into :math:`B` block-diagonal matrices. We refer to :math:`B` as the number of bases: .. math:: W_t^{(l)} = \oplus_{b=1}^B Q_{tb}^{(l)} where :math:`B` is the number of bases, :math:`Q_{tb}^{(l)}` are block bases with shape :math:`R^{(d^{(l+1)}/B)\times(d^{l}/B)}`. Parameters ---------- in_size : int Input feature size. out_size : int Output feature size. num_types : int Total number of types. regularizer : str, optional Which weight regularizer to use "basis" or "bdd": - "basis" is short for basis-decomposition. - "bdd" is short for block-diagonal-decomposition. Default applies no regularization. num_bases : int, optional Number of bases. Needed when ``regularizer`` is specified. Typically smaller than ``num_types``. Default: ``None``. Examples -------- No regularization. >>> from dgl.nn import TypedLinear >>> import torch >>> >>> x = torch.randn(100, 32) >>> x_type = torch.randint(0, 5, (100,)) >>> m = TypedLinear(32, 64, 5) >>> y = m(x, x_type) >>> print(y.shape) torch.Size([100, 64]) With basis regularization >>> x = torch.randn(100, 32) >>> x_type = torch.randint(0, 5, (100,)) >>> m = TypedLinear(32, 64, 5, regularizer='basis', num_bases=4) >>> y = m(x, x_type) >>> print(y.shape) torch.Size([100, 64]) """ def __init__( self, in_size, out_size, num_types, regularizer=None, num_bases=None ): super().__init__() self.in_size = in_size self.out_size = out_size self.num_types = num_types if regularizer is None: self.W = nn.Parameter(torch.Tensor(num_types, in_size, out_size)) elif regularizer == "basis": if num_bases is None: raise ValueError( 'Missing "num_bases" for basis regularization.' ) self.W = nn.Parameter(torch.Tensor(num_bases, in_size, out_size)) self.coeff = nn.Parameter(torch.Tensor(num_types, num_bases)) self.num_bases = num_bases elif regularizer == "bdd": if num_bases is None: raise ValueError('Missing "num_bases" for bdd regularization.') if in_size % num_bases != 0 or out_size % num_bases != 0: raise ValueError( "Input and output sizes must be divisible by num_bases." ) self.submat_in = in_size // num_bases self.submat_out = out_size // num_bases self.W = nn.Parameter( torch.Tensor( num_types, num_bases * self.submat_in * self.submat_out ) ) self.num_bases = num_bases else: raise ValueError( f'Supported regularizer options: "basis", "bdd", but got {regularizer}' ) self.regularizer = regularizer self.reset_parameters() def reset_parameters(self): """Reset parameters""" with torch.no_grad(): # Follow torch.nn.Linear 's initialization to use kaiming_uniform_ on in_size if self.regularizer is None: nn.init.uniform_( self.W, -1 / math.sqrt(self.in_size), 1 / math.sqrt(self.in_size), ) elif self.regularizer == "basis": nn.init.uniform_( self.W, -1 / math.sqrt(self.in_size), 1 / math.sqrt(self.in_size), ) nn.init.xavier_uniform_( self.coeff, gain=nn.init.calculate_gain("relu") ) elif self.regularizer == "bdd": nn.init.uniform_( self.W, -1 / math.sqrt(self.submat_in), 1 / math.sqrt(self.submat_in), ) else: raise ValueError( f'Supported regularizer options: "basis", "bdd", but got {regularizer}' ) def get_weight(self): """Get type-wise weight""" if self.regularizer is None: return self.W elif self.regularizer == "basis": W = self.W.view(self.num_bases, self.in_size * self.out_size) return (self.coeff @ W).view( self.num_types, self.in_size, self.out_size ) elif self.regularizer == "bdd": return self.W else: raise ValueError( f'Supported regularizer options: "basis", "bdd", but got {regularizer}' ) def forward(self, x, x_type, sorted_by_type=False): """Forward computation. Parameters ---------- x : torch.Tensor A 2D input tensor. Shape: (N, D1) x_type : torch.Tensor A 1D integer tensor storing the type of the elements in ``x`` with one-to-one correspondenc. Shape: (N,) sorted_by_type : bool, optional Whether the inputs have been sorted by the types. Forward on pre-sorted inputs may be faster. Returns ------- y : torch.Tensor The transformed output tensor. Shape: (N, D2) """ w = self.get_weight() if self.regularizer == "bdd": w = w.index_select(0, x_type).view( -1, self.submat_in, self.submat_out ) x = x.view(-1, 1, self.submat_in) return torch.bmm(x, w).view(-1, self.out_size) elif sorted_by_type: pos_l = torch.searchsorted( x_type, torch.arange(self.num_types, device=x.device) ) pos_r = torch.cat( [pos_l[1:], torch.tensor([len(x_type)], device=x.device)] ) seglen = ( pos_r - pos_l ).cpu() # XXX(minjie): cause device synchronize return segment_mm(x, w, seglen_a=seglen) else: return gather_mm(x, w, idx_b=x_type) def __repr__(self): if self.regularizer is None: return ( f"TypedLinear(in_size={self.in_size}, out_size={self.out_size}, " f"num_types={self.num_types})" ) else: return ( f"TypedLinear(in_size={self.in_size}, out_size={self.out_size}, " f"num_types={self.num_types}, regularizer={self.regularizer}, " f"num_bases={self.num_bases})" ) ================================================ FILE: python/dgl/nn/pytorch/link/__init__.py ================================================ """Torch modules for link prediction/knowledge graph completion.""" from .edgepred import EdgePredictor from .transe import TransE from .transr import TransR ================================================ FILE: python/dgl/nn/pytorch/link/edgepred.py ================================================ """Predictor for edges in homogeneous graphs.""" # pylint: disable= no-member, arguments-differ, invalid-name, W0235 import torch import torch.nn as nn import torch.nn.functional as F class EdgePredictor(nn.Module): r"""Predictor/score function for pairs of node representations Given a pair of node representations, :math:`h_i` and :math:`h_j`, it combines them with **dot product** .. math:: h_i^{T} h_j or **cosine similarity** .. math:: \frac{h_i^{T} h_j}{{\| h_i \|}_2 \cdot {\| h_j \|}_2} or **elementwise product** .. math:: h_i \odot h_j or **concatenation** .. math:: h_i \Vert h_j Optionally, it passes the combined results to a linear layer for the final prediction. Parameters ---------- op : str The operation to apply. It can be 'dot', 'cos', 'ele', or 'cat', corresponding to the equations above in order. in_feats : int, optional The input feature size of :math:`h_i` and :math:`h_j`. It is required only if a linear layer is to be applied. out_feats : int, optional The output feature size. It is reuiqred only if a linear layer is to be applied. bias : bool, optional Whether to use bias for the linear layer if it applies. Examples -------- >>> import dgl >>> import torch as th >>> from dgl.nn import EdgePredictor >>> num_nodes = 2 >>> num_edges = 3 >>> in_feats = 4 >>> g = dgl.rand_graph(num_nodes=num_nodes, num_edges=num_edges) >>> h = th.randn(num_nodes, in_feats) >>> src, dst = g.edges() >>> h_src = h[src] >>> h_dst = h[dst] Case1: dot product >>> predictor = EdgePredictor('dot') >>> predictor(h_src, h_dst).shape torch.Size([3, 1]) >>> predictor = EdgePredictor('dot', in_feats, out_feats=3) >>> predictor.reset_parameters() >>> predictor(h_src, h_dst).shape torch.Size([3, 3]) Case2: cosine similarity >>> predictor = EdgePredictor('cos') >>> predictor(h_src, h_dst).shape torch.Size([3, 1]) >>> predictor = EdgePredictor('cos', in_feats, out_feats=3) >>> predictor.reset_parameters() >>> predictor(h_src, h_dst).shape torch.Size([3, 3]) Case3: elementwise product >>> predictor = EdgePredictor('ele') >>> predictor(h_src, h_dst).shape torch.Size([3, 4]) >>> predictor = EdgePredictor('ele', in_feats, out_feats=3) >>> predictor.reset_parameters() >>> predictor(h_src, h_dst).shape torch.Size([3, 3]) Case4: concatenation >>> predictor = EdgePredictor('cat') >>> predictor(h_src, h_dst).shape torch.Size([3, 8]) >>> predictor = EdgePredictor('cat', in_feats, out_feats=3) >>> predictor.reset_parameters() >>> predictor(h_src, h_dst).shape torch.Size([3, 3]) """ def __init__(self, op, in_feats=None, out_feats=None, bias=False): super(EdgePredictor, self).__init__() assert op in [ "dot", "cos", "ele", "cat", ], "Expect op to be in ['dot', 'cos', 'ele', 'cat'], got {}".format(op) self.op = op if (in_feats is not None) and (out_feats is not None): if op in ["dot", "cos"]: in_feats = 1 elif op == "cat": in_feats = 2 * in_feats self.linear = nn.Linear(in_feats, out_feats, bias=bias) else: self.linear = None def reset_parameters(self): r""" Description ----------- Reinitialize learnable parameters. """ if self.linear is not None: self.linear.reset_parameters() def forward(self, h_src, h_dst): r""" Description ----------- Predict for pairs of node representations. Parameters ---------- h_src : torch.Tensor Source node features. The tensor is of shape :math:`(E, D_{in})`, where :math:`E` is the number of edges/node pairs, and :math:`D_{in}` is the input feature size. h_dst : torch.Tensor Destination node features. The tensor is of shape :math:`(E, D_{in})`, where :math:`E` is the number of edges/node pairs, and :math:`D_{in}` is the input feature size. Returns ------- torch.Tensor The output features. """ if self.op == "dot": N, D = h_src.shape h = torch.bmm(h_src.view(N, 1, D), h_dst.view(N, D, 1)).squeeze(-1) elif self.op == "cos": h = F.cosine_similarity(h_src, h_dst).unsqueeze(-1) elif self.op == "ele": h = h_src * h_dst else: h = torch.cat([h_src, h_dst], dim=-1) if self.linear is not None: h = self.linear(h) return h ================================================ FILE: python/dgl/nn/pytorch/link/transe.py ================================================ """TransE.""" # pylint: disable= no-member, arguments-differ, invalid-name, W0235 import torch import torch.nn as nn class TransE(nn.Module): r"""Similarity measure from `Translating Embeddings for Modeling Multi-relational Data `__ Mathematically, it is defined as follows: .. math:: - {\| h + r - t \|}_p where :math:`h` is the head embedding, :math:`r` is the relation embedding, and :math:`t` is the tail embedding. Parameters ---------- num_rels : int Number of relation types. feats : int Embedding size. p : int, optional The p to use for Lp norm, which can be 1 or 2. Attributes ---------- rel_emb : torch.nn.Embedding The learnable relation type embedding. Examples -------- >>> import dgl >>> import torch as th >>> from dgl.nn import TransE >>> # input features >>> num_nodes = 10 >>> num_edges = 30 >>> num_rels = 3 >>> feats = 4 >>> scorer = TransE(num_rels=num_rels, feats=feats) >>> g = dgl.rand_graph(num_nodes=num_nodes, num_edges=num_edges) >>> src, dst = g.edges() >>> h = th.randn(num_nodes, feats) >>> h_head = h[src] >>> h_tail = h[dst] >>> # Randomly initialize edge relation types for demonstration >>> rels = th.randint(low=0, high=num_rels, size=(num_edges,)) >>> scorer(h_head, h_tail, rels).shape torch.Size([30]) """ def __init__(self, num_rels, feats, p=1): super(TransE, self).__init__() self.rel_emb = nn.Embedding(num_rels, feats) self.p = p def reset_parameters(self): r""" Description ----------- Reinitialize learnable parameters. """ self.rel_emb.reset_parameters() def forward(self, h_head, h_tail, rels): r""" Description ----------- Score triples. Parameters ---------- h_head : torch.Tensor Head entity features. The tensor is of shape :math:`(E, D)`, where :math:`E` is the number of triples, and :math:`D` is the feature size. h_tail : torch.Tensor Tail entity features. The tensor is of shape :math:`(E, D)`, where :math:`E` is the number of triples, and :math:`D` is the feature size. rels : torch.Tensor Relation types. It is a LongTensor of shape :math:`(E)`, where :math:`E` is the number of triples. Returns ------- torch.Tensor The triple scores. The tensor is of shape :math:`(E)`. """ h_rel = self.rel_emb(rels) return -torch.norm(h_head + h_rel - h_tail, p=self.p, dim=-1) ================================================ FILE: python/dgl/nn/pytorch/link/transr.py ================================================ """TransR.""" # pylint: disable= no-member, arguments-differ, invalid-name, W0235 import torch import torch.nn as nn class TransR(nn.Module): r"""Similarity measure from `Learning entity and relation embeddings for knowledge graph completion `__ Mathematically, it is defined as follows: .. math:: - {\| M_r h + r - M_r t \|}_p where :math:`M_r` is a relation-specific projection matrix, :math:`h` is the head embedding, :math:`r` is the relation embedding, and :math:`t` is the tail embedding. Parameters ---------- num_rels : int Number of relation types. rfeats : int Relation embedding size. nfeats : int Entity embedding size. p : int, optional The p to use for Lp norm, which can be 1 or 2. Attributes ---------- rel_emb : torch.nn.Embedding The learnable relation type embedding. rel_project : torch.nn.Embedding The learnable relation-type-specific projection. Examples -------- >>> import dgl >>> import torch as th >>> from dgl.nn import TransR >>> # input features >>> num_nodes = 10 >>> num_edges = 30 >>> num_rels = 3 >>> feats = 4 >>> scorer = TransR(num_rels=num_rels, rfeats=2, nfeats=feats) >>> g = dgl.rand_graph(num_nodes=num_nodes, num_edges=num_edges) >>> src, dst = g.edges() >>> h = th.randn(num_nodes, feats) >>> h_head = h[src] >>> h_tail = h[dst] >>> # Randomly initialize edge relation types for demonstration >>> rels = th.randint(low=0, high=num_rels, size=(num_edges,)) >>> scorer(h_head, h_tail, rels).shape torch.Size([30]) """ def __init__(self, num_rels, rfeats, nfeats, p=1): super(TransR, self).__init__() self.rel_emb = nn.Embedding(num_rels, rfeats) self.rel_project = nn.Embedding(num_rels, nfeats * rfeats) self.rfeats = rfeats self.nfeats = nfeats self.p = p def reset_parameters(self): r""" Description ----------- Reinitialize learnable parameters. """ self.rel_emb.reset_parameters() self.rel_project.reset_parameters() def forward(self, h_head, h_tail, rels): r""" Score triples. Parameters ---------- h_head : torch.Tensor Head entity features. The tensor is of shape :math:`(E, D)`, where :math:`E` is the number of triples, and :math:`D` is the feature size. h_tail : torch.Tensor Tail entity features. The tensor is of shape :math:`(E, D)`, where :math:`E` is the number of triples, and :math:`D` is the feature size. rels : torch.Tensor Relation types. It is a LongTensor of shape :math:`(E)`, where :math:`E` is the number of triples. Returns ------- torch.Tensor The triple scores. The tensor is of shape :math:`(E)`. """ h_rel = self.rel_emb(rels) proj_rel = self.rel_project(rels).reshape(-1, self.nfeats, self.rfeats) h_head = (h_head.unsqueeze(1) @ proj_rel).squeeze(1) h_tail = (h_tail.unsqueeze(1) @ proj_rel).squeeze(1) return -torch.norm(h_head + h_rel - h_tail, p=self.p, dim=-1) ================================================ FILE: python/dgl/nn/pytorch/network_emb.py ================================================ """Network Embedding NN Modules""" # pylint: disable= invalid-name import random import torch import torch.nn.functional as F from torch import nn from torch.nn import init from tqdm.auto import trange from ...base import NID from ...convert import to_heterogeneous, to_homogeneous from ...random import choice from ...sampling import random_walk __all__ = ["DeepWalk", "MetaPath2Vec"] class DeepWalk(nn.Module): """DeepWalk module from `DeepWalk: Online Learning of Social Representations `__ For a graph, it learns the node representations from scratch by maximizing the similarity of node pairs that are nearby (positive node pairs) and minimizing the similarity of other random node pairs (negative node pairs). Parameters ---------- g : DGLGraph Graph for learning node embeddings emb_dim : int, optional Size of each embedding vector. Default: 128 walk_length : int, optional Number of nodes in a random walk sequence. Default: 40 window_size : int, optional In a random walk :attr:`w`, a node :attr:`w[j]` is considered close to a node :attr:`w[i]` if :attr:`i - window_size <= j <= i + window_size`. Default: 5 neg_weight : float, optional Weight of the loss term for negative samples in the total loss. Default: 1.0 negative_size : int, optional Number of negative samples to use for each positive sample. Default: 5 fast_neg : bool, optional If True, it samples negative node pairs within a batch of random walks. Default: True sparse : bool, optional If True, gradients with respect to the learnable weights will be sparse. Default: True Attributes ---------- node_embed : nn.Embedding Embedding table of the nodes Examples -------- >>> import torch >>> from dgl.data import CoraGraphDataset >>> from dgl.nn import DeepWalk >>> from torch.optim import SparseAdam >>> from torch.utils.data import DataLoader >>> from sklearn.linear_model import LogisticRegression >>> dataset = CoraGraphDataset() >>> g = dataset[0] >>> model = DeepWalk(g) >>> dataloader = DataLoader(torch.arange(g.num_nodes()), batch_size=128, ... shuffle=True, collate_fn=model.sample) >>> optimizer = SparseAdam(model.parameters(), lr=0.01) >>> num_epochs = 5 >>> for epoch in range(num_epochs): ... for batch_walk in dataloader: ... loss = model(batch_walk) ... optimizer.zero_grad() ... loss.backward() ... optimizer.step() >>> train_mask = g.ndata['train_mask'] >>> test_mask = g.ndata['test_mask'] >>> X = model.node_embed.weight.detach() >>> y = g.ndata['label'] >>> clf = LogisticRegression().fit(X[train_mask].numpy(), y[train_mask].numpy()) >>> clf.score(X[test_mask].numpy(), y[test_mask].numpy()) """ def __init__( self, g, emb_dim=128, walk_length=40, window_size=5, neg_weight=1, negative_size=5, fast_neg=True, sparse=True, ): super().__init__() assert ( walk_length >= window_size + 1 ), f"Expect walk_length >= window_size + 1, got {walk_length} and {window_size + 1}" self.g = g self.emb_dim = emb_dim self.window_size = window_size self.walk_length = walk_length self.neg_weight = neg_weight self.negative_size = negative_size self.fast_neg = fast_neg num_nodes = g.num_nodes() # center node embedding self.node_embed = nn.Embedding(num_nodes, emb_dim, sparse=sparse) self.context_embed = nn.Embedding(num_nodes, emb_dim, sparse=sparse) self.reset_parameters() if not fast_neg: neg_prob = g.out_degrees().pow(0.75) # categorical distribution for true negative sampling self.neg_prob = neg_prob / neg_prob.sum() # Get list index pairs for positive samples. # Given i, positive index pairs are (i - window_size, i), ... , # (i - 1, i), (i + 1, i), ..., (i + window_size, i) idx_list_src = [] idx_list_dst = [] for i in range(walk_length): for j in range(max(0, i - window_size), i): idx_list_src.append(j) idx_list_dst.append(i) for j in range(i + 1, min(walk_length, i + 1 + window_size)): idx_list_src.append(j) idx_list_dst.append(i) self.idx_list_src = torch.LongTensor(idx_list_src) self.idx_list_dst = torch.LongTensor(idx_list_dst) def reset_parameters(self): """Reinitialize learnable parameters""" init_range = 1.0 / self.emb_dim init.uniform_(self.node_embed.weight.data, -init_range, init_range) init.constant_(self.context_embed.weight.data, 0) def sample(self, indices): """Sample random walks Parameters ---------- indices : torch.Tensor Nodes from which we perform random walk Returns ------- torch.Tensor Random walks in the form of node ID sequences. The Tensor is of shape :attr:`(len(indices), walk_length)`. """ return random_walk(self.g, indices, length=self.walk_length - 1)[0] def forward(self, batch_walk): """Compute the loss for the batch of random walks Parameters ---------- batch_walk : torch.Tensor Random walks in the form of node ID sequences. The Tensor is of shape :attr:`(batch_size, walk_length)`. Returns ------- torch.Tensor Loss value """ batch_size = len(batch_walk) device = batch_walk.device batch_node_embed = self.node_embed(batch_walk).view(-1, self.emb_dim) batch_context_embed = self.context_embed(batch_walk).view( -1, self.emb_dim ) batch_idx_list_offset = torch.arange(batch_size) * self.walk_length batch_idx_list_offset = batch_idx_list_offset.unsqueeze(1) idx_list_src = batch_idx_list_offset + self.idx_list_src.unsqueeze(0) idx_list_dst = batch_idx_list_offset + self.idx_list_dst.unsqueeze(0) idx_list_src = idx_list_src.view(-1).to(device) idx_list_dst = idx_list_dst.view(-1).to(device) pos_src_emb = batch_node_embed[idx_list_src] pos_dst_emb = batch_context_embed[idx_list_dst] neg_idx_list_src = idx_list_dst.unsqueeze(1) + torch.zeros( self.negative_size ).unsqueeze(0).to(device) neg_idx_list_src = neg_idx_list_src.view(-1) neg_src_emb = batch_node_embed[neg_idx_list_src.long()] if self.fast_neg: neg_idx_list_dst = list(range(batch_size * self.walk_length)) * ( self.negative_size * self.window_size * 2 ) random.shuffle(neg_idx_list_dst) neg_idx_list_dst = neg_idx_list_dst[: len(neg_idx_list_src)] neg_idx_list_dst = torch.LongTensor(neg_idx_list_dst).to(device) neg_dst_emb = batch_context_embed[neg_idx_list_dst] else: neg_dst = choice( self.g.num_nodes(), size=len(neg_src_emb), prob=self.neg_prob ) neg_dst_emb = self.context_embed(neg_dst.to(device)) pos_score = torch.sum(torch.mul(pos_src_emb, pos_dst_emb), dim=1) pos_score = torch.clamp(pos_score, max=6, min=-6) pos_score = torch.mean(-F.logsigmoid(pos_score)) neg_score = torch.sum(torch.mul(neg_src_emb, neg_dst_emb), dim=1) neg_score = torch.clamp(neg_score, max=6, min=-6) neg_score = ( torch.mean(-F.logsigmoid(-neg_score)) * self.negative_size * self.neg_weight ) return torch.mean(pos_score + neg_score) class MetaPath2Vec(nn.Module): r"""metapath2vec module from `metapath2vec: Scalable Representation Learning for Heterogeneous Networks `__ To achieve efficient optimization, we leverage the negative sampling technique for the training process. Repeatedly for each node in meta-path, we treat it as the center node and sample nearby positive nodes within context size and draw negative samples among all types of nodes from all meta-paths. Then we can use the center-context paired nodes and context-negative paired nodes to update the network. Parameters ---------- g : DGLGraph Graph for learning node embeddings. Two different canonical edge types :attr:`(utype, etype, vtype)` are not allowed to have same :attr:`etype`. metapath : list[str] A sequence of edge types in the form of a string. It defines a new edge type by composing multiple edge types in order. Note that the start node type and the end one are commonly the same. window_size : int In a random walk :attr:`w`, a node :attr:`w[j]` is considered close to a node :attr:`w[i]` if :attr:`i - window_size <= j <= i + window_size`. emb_dim : int, optional Size of each embedding vector. Default: 128 negative_size : int, optional Number of negative samples to use for each positive sample. Default: 5 sparse : bool, optional If True, gradients with respect to the learnable weights will be sparse. Default: True Attributes ---------- node_embed : nn.Embedding Embedding table of all nodes local_to_global_nid : dict[str, list] Mapping from type-specific node IDs to global node IDs Examples -------- >>> import torch >>> import dgl >>> from torch.optim import SparseAdam >>> from torch.utils.data import DataLoader >>> from dgl.nn.pytorch import MetaPath2Vec >>> # Define a model >>> g = dgl.heterograph({ ... ('user', 'uc', 'company'): dgl.rand_graph(100, 1000).edges(), ... ('company', 'cp', 'product'): dgl.rand_graph(100, 1000).edges(), ... ('company', 'cu', 'user'): dgl.rand_graph(100, 1000).edges(), ... ('product', 'pc', 'company'): dgl.rand_graph(100, 1000).edges() ... }) >>> model = MetaPath2Vec(g, ['uc', 'cu'], window_size=1) >>> # Use the source node type of etype 'uc' >>> dataloader = DataLoader(torch.arange(g.num_nodes('user')), batch_size=128, ... shuffle=True, collate_fn=model.sample) >>> optimizer = SparseAdam(model.parameters(), lr=0.025) >>> for (pos_u, pos_v, neg_v) in dataloader: ... loss = model(pos_u, pos_v, neg_v) ... optimizer.zero_grad() ... loss.backward() ... optimizer.step() >>> # Get the embeddings of all user nodes >>> user_nids = torch.LongTensor(model.local_to_global_nid['user']) >>> user_emb = model.node_embed(user_nids) """ def __init__( self, g, metapath, window_size, emb_dim=128, negative_size=5, sparse=True, ): super().__init__() assert ( len(metapath) + 1 >= window_size ), f"Expect len(metapath) >= window_size - 1, got {metapath} and {window_size}" self.hg = g self.emb_dim = emb_dim self.metapath = metapath self.window_size = window_size self.negative_size = negative_size # convert edge metapath to node metapath # get initial source node type src_type, _, _ = g.to_canonical_etype(metapath[0]) node_metapath = [src_type] for etype in metapath: _, _, dst_type = g.to_canonical_etype(etype) node_metapath.append(dst_type) self.node_metapath = node_metapath # Convert the graph into a homogeneous one for global to local node ID mapping g = to_homogeneous(g) # Convert it back to the hetero one for local to global node ID mapping hg = to_heterogeneous(g, self.hg.ntypes, self.hg.etypes) local_to_global_nid = hg.ndata[NID] for key, val in local_to_global_nid.items(): local_to_global_nid[key] = list(val.cpu().numpy()) self.local_to_global_nid = local_to_global_nid num_nodes_total = hg.num_nodes() node_frequency = torch.zeros(num_nodes_total) # random walk for idx in trange(hg.num_nodes(node_metapath[0])): traces, _ = random_walk(g=hg, nodes=[idx], metapath=metapath) for tr in traces.cpu().numpy(): tr_nids = [ self.local_to_global_nid[node_metapath[i]][tr[i]] for i in range(len(tr)) ] node_frequency[torch.LongTensor(tr_nids)] += 1 neg_prob = node_frequency.pow(0.75) self.neg_prob = neg_prob / neg_prob.sum() # center node embedding self.node_embed = nn.Embedding( num_nodes_total, self.emb_dim, sparse=sparse ) self.context_embed = nn.Embedding( num_nodes_total, self.emb_dim, sparse=sparse ) self.reset_parameters() def reset_parameters(self): """Reinitialize learnable parameters""" init_range = 1.0 / self.emb_dim init.uniform_(self.node_embed.weight.data, -init_range, init_range) init.constant_(self.context_embed.weight.data, 0) def sample(self, indices): """Sample positive and negative samples Parameters ---------- indices : torch.Tensor Node IDs of the source node type from which we perform random walks Returns ------- torch.Tensor Positive center nodes torch.Tensor Positive context nodes torch.Tensor Negative context nodes """ traces, _ = random_walk( g=self.hg, nodes=indices, metapath=self.metapath ) u_list = [] v_list = [] for tr in traces.cpu().numpy(): tr_nids = [ self.local_to_global_nid[self.node_metapath[i]][tr[i]] for i in range(len(tr)) ] for i, u in enumerate(tr_nids): for j, v in enumerate( tr_nids[max(i - self.window_size, 0) : i + self.window_size] ): if i == j: continue u_list.append(u) v_list.append(v) neg_v = choice( self.hg.num_nodes(), size=len(u_list) * self.negative_size, prob=self.neg_prob, ).reshape(len(u_list), self.negative_size) return torch.LongTensor(u_list), torch.LongTensor(v_list), neg_v def forward(self, pos_u, pos_v, neg_v): r"""Compute the loss for the batch of positive and negative samples Parameters ---------- pos_u : torch.Tensor Positive center nodes pos_v : torch.Tensor Positive context nodes neg_v : torch.Tensor Negative context nodes Returns ------- torch.Tensor Loss value """ emb_u = self.node_embed(pos_u) emb_v = self.context_embed(pos_v) emb_neg_v = self.context_embed(neg_v) score = torch.sum(torch.mul(emb_u, emb_v), dim=1) score = torch.clamp(score, max=10, min=-10) score = -F.logsigmoid(score) neg_score = torch.bmm(emb_neg_v, emb_u.unsqueeze(2)).squeeze() neg_score = torch.clamp(neg_score, max=10, min=-10) neg_score = -torch.sum(F.logsigmoid(-neg_score), dim=1) return torch.mean(score + neg_score) ================================================ FILE: python/dgl/nn/pytorch/softmax.py ================================================ """Torch modules for graph related softmax.""" # pylint: disable= unused-import from ..functional import edge_softmax ================================================ FILE: python/dgl/nn/pytorch/sparse_emb.py ================================================ """Torch NodeEmbedding.""" from datetime import timedelta import torch as th from ...backend import pytorch as F from ...cuda import nccl from ...partition import NDArrayPartition from ...utils import create_shared_mem_array, get_shared_mem_array _STORE = None class NodeEmbedding: # NodeEmbedding """Class for storing node embeddings. The class is optimized for training large-scale node embeddings. It updates the embedding in a sparse way and can scale to graphs with millions of nodes. It also supports partitioning to multiple GPUs (on a single machine) for more acceleration. It does not support partitioning across machines. Currently, DGL provides two optimizers that work with this NodeEmbedding class: ``SparseAdagrad`` and ``SparseAdam``. The implementation is based on torch.distributed package. It depends on the pytorch default distributed process group to collect multi-process information and uses ``torch.distributed.TCPStore`` to share meta-data information across multiple gpu processes. It use the local address of '127.0.0.1:12346' to initialize the TCPStore. NOTE: The support of NodeEmbedding is experimental. Parameters ---------- num_embeddings : int The number of embeddings. Currently, the number of embeddings has to be the same as the number of nodes. embedding_dim : int The dimension size of embeddings. name : str The name of the embeddings. The name should uniquely identify the embeddings in the system. init_func : callable, optional The function to create the initial data. If the init function is not provided, the values of the embeddings are initialized to zero. device : th.device Device to store the embeddings on. parittion : NDArrayPartition The partition to use to distributed the embeddings between processes. Examples -------- Before launching multiple gpu processes >>> def initializer(emb): th.nn.init.xavier_uniform_(emb) return emb In each training process >>> emb = dgl.nn.NodeEmbedding(g.num_nodes(), 10, 'emb', init_func=initializer) >>> optimizer = dgl.optim.SparseAdam([emb], lr=0.001) >>> for blocks in dataloader: ... ... ... feats = emb(nids, gpu_0) ... loss = F.sum(feats + 1, 0) ... loss.backward() ... optimizer.step() """ def __init__( self, num_embeddings, embedding_dim, name, init_func=None, device=None, partition=None, ): global _STORE if device is None: device = th.device("cpu") # Check whether it is multi-gpu training or not. if th.distributed.is_initialized(): rank = th.distributed.get_rank() world_size = th.distributed.get_world_size() else: rank = -1 world_size = 0 self._rank = rank self._world_size = world_size self._store = None self._comm = None self._partition = partition host_name = "127.0.0.1" port = 12346 if rank >= 0: # for multi-gpu training, setup a TCPStore for # embeding status synchronization across GPU processes if _STORE is None: _STORE = th.distributed.TCPStore( host_name, port, world_size, rank == 0, timedelta(seconds=10 * 60), ) self._store = _STORE # embeddings is stored in CPU memory. if th.device(device) == th.device("cpu"): if rank <= 0: emb = create_shared_mem_array( name, (num_embeddings, embedding_dim), th.float32 ) if init_func is not None: emb = init_func(emb) if rank == 0: # the master gpu process for _ in range(1, world_size): # send embs self._store.set(name, name) elif rank > 0: # receive self._store.wait([name]) emb = get_shared_mem_array( name, (num_embeddings, embedding_dim), th.float32 ) self._tensor = emb else: # embeddings is stored in GPU memory. self._comm = True if not self._partition: # for communication we need a partition self._partition = NDArrayPartition( num_embeddings, self._world_size if self._world_size > 0 else 1, mode="remainder", ) # create local tensors for the weights local_size = self._partition.local_size(max(self._rank, 0)) # TODO(dlasalle): support 16-bit/half embeddings emb = th.empty( [local_size, embedding_dim], dtype=th.float32, requires_grad=False, device=device, ) if init_func: emb = init_func(emb) self._tensor = emb self._num_embeddings = num_embeddings self._embedding_dim = embedding_dim self._name = name self._optm_state = None # track optimizer state self._trace = [] # track minibatch def __call__(self, node_ids, device=th.device("cpu")): """ node_ids : th.tensor Index of the embeddings to collect. device : th.device Target device to put the collected embeddings. """ if not self._comm: # For embeddings stored on the CPU. emb = self._tensor[node_ids].to(device) else: # For embeddings stored on the GPU. # The following method is designed to perform communication # across multiple GPUs and can handle situations where only one GPU # is present gracefully, a.k.a. self._world_size == 1 or # 0 (when th.distributed.is_initialized() is false). emb = nccl.sparse_all_to_all_pull( node_ids, self._tensor, self._partition ) emb = emb.to(device) if F.is_recording(): emb = F.attach_grad(emb) self._trace.append((node_ids.to(device), emb)) return emb @property def store(self): """Return torch.distributed.TCPStore for meta data sharing across processes. Returns ------- torch.distributed.TCPStore KVStore used for meta data sharing. """ return self._store @property def partition(self): """Return the partition identifying how the tensor is split across processes. Returns ------- String The mode. """ return self._partition @property def rank(self): """Return rank of current process. Returns ------- int The rank of current process. """ return self._rank @property def world_size(self): """Return world size of the pytorch distributed training env. Returns ------- int The world size of the pytorch distributed training env. """ return self._world_size @property def name(self): """Return the name of NodeEmbedding. Returns ------- str The name of NodeEmbedding. """ return self._name @property def num_embeddings(self): """Return the number of embeddings. Returns ------- int The number of embeddings. """ return self._num_embeddings @property def embedding_dim(self): """Return the dimension of embeddings. Returns ------- int The dimension of embeddings. """ return self._embedding_dim def set_optm_state(self, state): """Store the optimizer related state tensor. Parameters ---------- state : tuple of torch.Tensor Optimizer related state. """ self._optm_state = state @property def optm_state(self): """Return the optimizer related state tensor. Returns ------- tuple of torch.Tensor The optimizer related state. """ return self._optm_state @property def trace(self): """Return a trace of the indices of embeddings used in the training step(s). Returns ------- [torch.Tensor] The indices of embeddings used in the training step(s). """ return self._trace def reset_trace(self): """Clean up the trace of the indices of embeddings used in the training step(s). """ self._trace = [] @property def weight(self): """Return the tensor storing the node embeddings Returns ------- torch.Tensor The tensor storing the node embeddings """ return self._tensor def all_set_embedding(self, values): """Set the values of the embedding. This method must be called by all processes sharing the embedding with identical tensors for :attr:`values`. NOTE: This method must be called by all processes sharing the embedding, or it may result in a deadlock. Parameters ---------- values : Tensor The global tensor to pull values from. """ if self._partition: idxs = F.copy_to( self._partition.get_local_indices( max(self._rank, 0), ctx=F.context(self._tensor), ), F.context(values), ) self._tensor[:] = F.copy_to( F.gather_row(values, idxs), ctx=F.context(self._tensor) )[:] else: if self._rank == 0: self._tensor[:] = F.copy_to( values, ctx=F.context(self._tensor) )[:] if th.distributed.is_initialized(): th.distributed.barrier() def _all_get_tensor(self, shared_name, tensor, shape): """A helper function to get model-parallel tensors. This method must and only need to be called in multi-GPU DDP training. For now, it's only used in ``all_get_embedding`` and ``_all_get_optm_state``. """ # create a shared memory tensor if self._rank == 0: # root process creates shared memory val = create_shared_mem_array( shared_name, shape, tensor.dtype, ) self._store.set(shared_name, shared_name) else: self._store.wait([shared_name]) val = get_shared_mem_array( shared_name, shape, tensor.dtype, ) # need to map indices and slice into existing tensor idxs = self._partition.map_to_global( F.arange(0, tensor.shape[0], ctx=F.context(tensor)), self._rank, ).to(val.device) val[idxs] = tensor.to(val.device) self._store.delete_key(shared_name) # wait for all processes to finish th.distributed.barrier() return val def all_get_embedding(self): """Return a copy of the embedding stored in CPU memory. If this is a multi-processing instance, the tensor will be returned in shared memory. If the embedding is currently stored on multiple GPUs, all processes must call this method in the same order. NOTE: This method must be called by all processes sharing the embedding, or it may result in a deadlock. Returns ------- torch.Tensor The tensor storing the node embeddings. """ if self._partition: if self._world_size == 0: # non-multiprocessing return self._tensor.to(th.device("cpu")) else: return self._all_get_tensor( f"{self._name}_gather", self._tensor, (self._num_embeddings, self._embedding_dim), ) else: # already stored in CPU memory return self._tensor def _all_get_optm_state(self): """Return a copy of the whole optimizer states stored in CPU memory. If this is a multi-processing instance, the states will be returned in shared memory. If the embedding is currently stored on multiple GPUs, all processes must call this method in the same order. NOTE: This method must be called by all processes sharing the embedding, or it may result in a deadlock. Returns ------- tuple of torch.Tensor The optimizer states stored in CPU memory. """ if self._partition: if self._world_size == 0: # non-multiprocessing return tuple( state.to(th.device("cpu")) for state in self._optm_state ) else: return tuple( self._all_get_tensor( f"state_gather_{self._name}_{i}", state, (self._num_embeddings, *state.shape[1:]), ) for i, state in enumerate(self._optm_state) ) else: # already stored in CPU memory return self._optm_state def _all_set_optm_state(self, states): """Set the optimizer states of the embedding. This method must be called by all processes sharing the embedding with identical :attr:`states`. NOTE: This method must be called by all processes sharing the embedding, or it may result in a deadlock. Parameters ---------- states : tuple of torch.Tensor The global states to pull values from. """ if self._partition: idxs = F.copy_to( self._partition.get_local_indices( max(self._rank, 0), ctx=F.context(self._tensor) ), F.context(states[0]), ) for state, new_state in zip(self._optm_state, states): state[:] = F.copy_to( F.gather_row(new_state, idxs), ctx=F.context(self._tensor) )[:] else: # stored in CPU memory if self._rank <= 0: for state, new_state in zip(self._optm_state, states): state[:] = F.copy_to( new_state, ctx=F.context(self._tensor) )[:] if th.distributed.is_initialized(): th.distributed.barrier() ================================================ FILE: python/dgl/nn/pytorch/utils.py ================================================ """Utilities for pytorch NN package""" # pylint: disable=no-member, invalid-name import torch as th import torch.nn.functional as F from torch import nn from ... import DGLGraph, function as fn from ...base import dgl_warning def matmul_maybe_select(A, B): """Perform Matrix multiplication C = A * B but A could be an integer id vector. If A is an integer vector, we treat it as multiplying a one-hot encoded tensor. In this case, the expensive dense matrix multiply can be replaced by a much cheaper index lookup. For example, :: A = [2, 0, 1], B = [[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]] then matmul_maybe_select(A, B) is equivalent to :: [[0, 0, 1], [[0.1, 0.2], [1, 0, 0], * [0.3, 0.4], [0, 1, 0]] [0.5, 0.6]] In all other cases, perform a normal matmul. Parameters ---------- A : torch.Tensor lhs tensor B : torch.Tensor rhs tensor Returns ------- C : torch.Tensor result tensor """ if A.dtype == th.int64 and len(A.shape) == 1: return B.index_select(0, A) else: return th.matmul(A, B) def bmm_maybe_select(A, B, index): """Slice submatrices of A by the given index and perform bmm. B is a 3D tensor of shape (N, D1, D2), which can be viewed as a stack of N matrices of shape (D1, D2). The input index is an integer vector of length M. A could be either: (1) a dense tensor of shape (M, D1), (2) an integer vector of length M. The result C is a 2D matrix of shape (M, D2) For case (1), C is computed by bmm: :: C[i, :] = matmul(A[i, :], B[index[i], :, :]) For case (2), C is computed by index select: :: C[i, :] = B[index[i], A[i], :] Parameters ---------- A : torch.Tensor lhs tensor B : torch.Tensor rhs tensor index : torch.Tensor index tensor Returns ------- C : torch.Tensor return tensor """ if A.dtype == th.int64 and len(A.shape) == 1: # following is a faster version of B[index, A, :] B = B.view(-1, B.shape[2]) flatidx = index * B.shape[1] + A return B.index_select(0, flatidx) else: BB = B.index_select(0, index) return th.bmm(A.unsqueeze(1), BB).squeeze() # pylint: disable=W0235 class Identity(nn.Module): """A placeholder identity operator that is argument-insensitive. (Identity has already been supported by PyTorch 1.2, we will directly import torch.nn.Identity in the future) """ def __init__(self): super(Identity, self).__init__() def forward(self, x): """Return input""" return x class Sequential(nn.Sequential): r"""A sequential container for stacking graph neural network modules DGL supports two modes: sequentially apply GNN modules on 1) the same graph or 2) a list of given graphs. In the second case, the number of graphs equals the number of modules inside this container. Parameters ---------- *args : Sub-modules of torch.nn.Module that will be added to the container in the order by which they are passed in the constructor. Examples -------- The following example uses PyTorch backend. Mode 1: sequentially apply GNN modules on the same graph >>> import torch >>> import dgl >>> import torch.nn as nn >>> import dgl.function as fn >>> from dgl.nn.pytorch import Sequential >>> class ExampleLayer(nn.Module): >>> def __init__(self): >>> super().__init__() >>> def forward(self, graph, n_feat, e_feat): >>> with graph.local_scope(): >>> graph.ndata['h'] = n_feat >>> graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h')) >>> n_feat += graph.ndata['h'] >>> graph.apply_edges(fn.u_add_v('h', 'h', 'e')) >>> e_feat += graph.edata['e'] >>> return n_feat, e_feat >>> >>> g = dgl.DGLGraph() >>> g.add_nodes(3) >>> g.add_edges([0, 1, 2, 0, 1, 2, 0, 1, 2], [0, 0, 0, 1, 1, 1, 2, 2, 2]) >>> net = Sequential(ExampleLayer(), ExampleLayer(), ExampleLayer()) >>> n_feat = torch.rand(3, 4) >>> e_feat = torch.rand(9, 4) >>> net(g, n_feat, e_feat) (tensor([[39.8597, 45.4542, 25.1877, 30.8086], [40.7095, 45.3985, 25.4590, 30.0134], [40.7894, 45.2556, 25.5221, 30.4220]]), tensor([[80.3772, 89.7752, 50.7762, 60.5520], [80.5671, 89.3736, 50.6558, 60.6418], [80.4620, 89.5142, 50.3643, 60.3126], [80.4817, 89.8549, 50.9430, 59.9108], [80.2284, 89.6954, 50.0448, 60.1139], [79.7846, 89.6882, 50.5097, 60.6213], [80.2654, 90.2330, 50.2787, 60.6937], [80.3468, 90.0341, 50.2062, 60.2659], [80.0556, 90.2789, 50.2882, 60.5845]])) Mode 2: sequentially apply GNN modules on different graphs >>> import torch >>> import dgl >>> import torch.nn as nn >>> import dgl.function as fn >>> import networkx as nx >>> from dgl.nn.pytorch import Sequential >>> class ExampleLayer(nn.Module): >>> def __init__(self): >>> super().__init__() >>> def forward(self, graph, n_feat): >>> with graph.local_scope(): >>> graph.ndata['h'] = n_feat >>> graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h')) >>> n_feat += graph.ndata['h'] >>> return n_feat.view(graph.num_nodes() // 2, 2, -1).sum(1) >>> >>> g1 = dgl.DGLGraph(nx.erdos_renyi_graph(32, 0.05)) >>> g2 = dgl.DGLGraph(nx.erdos_renyi_graph(16, 0.2)) >>> g3 = dgl.DGLGraph(nx.erdos_renyi_graph(8, 0.8)) >>> net = Sequential(ExampleLayer(), ExampleLayer(), ExampleLayer()) >>> n_feat = torch.rand(32, 4) >>> net([g1, g2, g3], n_feat) tensor([[209.6221, 225.5312, 193.8920, 220.1002], [250.0169, 271.9156, 240.2467, 267.7766], [220.4007, 239.7365, 213.8648, 234.9637], [196.4630, 207.6319, 184.2927, 208.7465]]) """ def __init__(self, *args): super(Sequential, self).__init__(*args) def forward(self, graph, *feats): r""" Sequentially apply modules to the input. Parameters ---------- graph : DGLGraph or list of DGLGraphs The graph(s) to apply modules on. *feats : Input features. The output of the :math:`i`-th module should match the input of the :math:`(i+1)`-th module in the sequential. """ if isinstance(graph, list): for graph_i, module in zip(graph, self): if not isinstance(feats, tuple): feats = (feats,) feats = module(graph_i, *feats) elif isinstance(graph, DGLGraph): for module in self: if not isinstance(feats, tuple): feats = (feats,) feats = module(graph, *feats) else: raise TypeError( "The first argument of forward must be a DGLGraph" " or a list of DGLGraph s" ) return feats class WeightBasis(nn.Module): r"""Basis decomposition from `Modeling Relational Data with Graph Convolutional Networks `__ It can be described as below: .. math:: W_o = \sum_{b=1}^B a_{ob} V_b Each weight output :math:`W_o` is essentially a linear combination of basis transformations :math:`V_b` with coefficients :math:`a_{ob}`. If is useful as a form of regularization on a large parameter matrix. Thus, the number of weight outputs is usually larger than the number of bases. Parameters ---------- shape : tuple[int] Shape of the basis parameter. num_bases : int Number of bases. num_outputs : int Number of outputs. """ def __init__(self, shape, num_bases, num_outputs): super(WeightBasis, self).__init__() self.shape = shape self.num_bases = num_bases self.num_outputs = num_outputs if num_outputs <= num_bases: dgl_warning( "The number of weight outputs should be larger than the number" " of bases." ) self.weight = nn.Parameter(th.Tensor(self.num_bases, *shape)) nn.init.xavier_uniform_( self.weight, gain=nn.init.calculate_gain("relu") ) # linear combination coefficients self.w_comp = nn.Parameter(th.Tensor(self.num_outputs, self.num_bases)) nn.init.xavier_uniform_( self.w_comp, gain=nn.init.calculate_gain("relu") ) def forward(self): r"""Forward computation Returns ------- weight : torch.Tensor Composed weight tensor of shape ``(num_outputs,) + shape`` """ # generate all weights from bases weight = th.matmul(self.w_comp, self.weight.view(self.num_bases, -1)) return weight.view(self.num_outputs, *self.shape) class JumpingKnowledge(nn.Module): r"""The Jumping Knowledge aggregation module from `Representation Learning on Graphs with Jumping Knowledge Networks `__ It aggregates the output representations of multiple GNN layers with **concatenation** .. math:: h_i^{(1)} \, \Vert \, \ldots \, \Vert \, h_i^{(T)} or **max pooling** .. math:: \max \left( h_i^{(1)}, \ldots, h_i^{(T)} \right) or **LSTM** .. math:: \sum_{t=1}^T \alpha_i^{(t)} h_i^{(t)} with attention scores :math:`\alpha_i^{(t)}` obtained from a BiLSTM Parameters ---------- mode : str The aggregation to apply. It can be 'cat', 'max', or 'lstm', corresponding to the equations above in order. in_feats : int, optional This argument is only required if :attr:`mode` is ``'lstm'``. The output representation size of a single GNN layer. Note that all GNN layers need to have the same output representation size. num_layers : int, optional This argument is only required if :attr:`mode` is ``'lstm'``. The number of GNN layers for output aggregation. Examples -------- >>> import dgl >>> import torch as th >>> from dgl.nn import JumpingKnowledge >>> # Output representations of two GNN layers >>> num_nodes = 3 >>> in_feats = 4 >>> feat_list = [th.zeros(num_nodes, in_feats), th.ones(num_nodes, in_feats)] >>> # Case1 >>> model = JumpingKnowledge() >>> model(feat_list).shape torch.Size([3, 8]) >>> # Case2 >>> model = JumpingKnowledge(mode='max') >>> model(feat_list).shape torch.Size([3, 4]) >>> # Case3 >>> model = JumpingKnowledge(mode='max', in_feats=in_feats, num_layers=len(feat_list)) >>> model(feat_list).shape torch.Size([3, 4]) """ def __init__(self, mode="cat", in_feats=None, num_layers=None): super(JumpingKnowledge, self).__init__() assert mode in [ "cat", "max", "lstm", ], "Expect mode to be 'cat', or 'max' or 'lstm', got {}".format(mode) self.mode = mode if mode == "lstm": assert in_feats is not None, "in_feats is required for lstm mode" assert ( num_layers is not None ), "num_layers is required for lstm mode" hidden_size = (num_layers * in_feats) // 2 self.lstm = nn.LSTM( in_feats, hidden_size, bidirectional=True, batch_first=True ) self.att = nn.Linear(2 * hidden_size, 1) def reset_parameters(self): r""" Description ----------- Reinitialize learnable parameters. This comes into effect only for the lstm mode. """ if self.mode == "lstm": self.lstm.reset_parameters() self.att.reset_parameters() def forward(self, feat_list): r""" Description ----------- Aggregate output representations across multiple GNN layers. Parameters ---------- feat_list : list[Tensor] feat_list[i] is the output representations of a GNN layer. Returns ------- Tensor The aggregated representations. """ if self.mode == "cat": return th.cat(feat_list, dim=-1) elif self.mode == "max": return th.stack(feat_list, dim=-1).max(dim=-1)[0] else: # LSTM stacked_feat_list = th.stack( feat_list, dim=1 ) # (N, num_layers, in_feats) alpha, _ = self.lstm(stacked_feat_list) alpha = self.att(alpha).squeeze(-1) # (N, num_layers) alpha = th.softmax(alpha, dim=-1) return (stacked_feat_list * alpha.unsqueeze(-1)).sum(dim=1) class LabelPropagation(nn.Module): r"""Label Propagation from `Learning from Labeled and Unlabeled Data with Label Propagation `__ .. math:: \mathbf{Y}^{(t+1)} = \alpha \tilde{A} \mathbf{Y}^{(t)} + (1 - \alpha) \mathbf{Y}^{(0)} where unlabeled data is initially set to zero and inferred from labeled data via propagation. :math:`\alpha` is a weight parameter for balancing between updated labels and initial labels. :math:`\tilde{A}` denotes the normalized adjacency matrix. Parameters ---------- k: int The number of propagation steps. alpha : float The :math:`\alpha` coefficient in range [0, 1]. norm_type : str, optional The type of normalization applied to the adjacency matrix, must be one of the following choices: * ``row``: row-normalized adjacency as :math:`D^{-1}A` * ``sym``: symmetrically normalized adjacency as :math:`D^{-1/2}AD^{-1/2}` Default: 'sym'. clamp : bool, optional A bool flag to indicate whether to clamp the labels to [0, 1] after propagation. Default: True. normalize: bool, optional A bool flag to indicate whether to apply row-normalization after propagation. Default: False. reset : bool, optional A bool flag to indicate whether to reset the known labels after each propagation step. Default: False. Examples -------- >>> import torch >>> import dgl >>> from dgl.nn import LabelPropagation >>> label_propagation = LabelPropagation(k=5, alpha=0.5, clamp=False, normalize=True) >>> g = dgl.rand_graph(5, 10) >>> labels = torch.tensor([0, 2, 1, 3, 0]).long() >>> mask = torch.tensor([0, 1, 1, 1, 0]).bool() >>> new_labels = label_propagation(g, labels, mask) """ def __init__( self, k, alpha, norm_type="sym", clamp=True, normalize=False, reset=False, ): super(LabelPropagation, self).__init__() self.k = k self.alpha = alpha self.norm_type = norm_type self.clamp = clamp self.normalize = normalize self.reset = reset def forward(self, g, labels, mask=None): r"""Compute the label propagation process. Parameters ---------- g : DGLGraph The input graph. labels : torch.Tensor The input node labels. There are three cases supported. * A LongTensor of shape :math:`(N, 1)` or :math:`(N,)` for node class labels in multiclass classification, where :math:`N` is the number of nodes. * A LongTensor of shape :math:`(N, C)` for one-hot encoding of node class labels in multiclass classification, where :math:`C` is the number of classes. * A LongTensor of shape :math:`(N, L)` for node labels in multilabel binary classification, where :math:`L` is the number of labels. mask : torch.Tensor The bool indicators of shape :math:`(N,)` with True denoting labeled nodes. Default: None, indicating all nodes are labeled. Returns ------- torch.Tensor The propagated node labels of shape :math:`(N, D)` with float type, where :math:`D` is the number of classes or labels. """ with g.local_scope(): # multi-label / multi-class if len(labels.size()) > 1 and labels.size(1) > 1: labels = labels.to(th.float32) # single-label multi-class else: labels = F.one_hot(labels.view(-1)).to(th.float32) y = labels if mask is not None: y = th.zeros_like(labels) y[mask] = labels[mask] init = (1 - self.alpha) * y in_degs = g.in_degrees().float().clamp(min=1) out_degs = g.out_degrees().float().clamp(min=1) if self.norm_type == "sym": norm_i = th.pow(in_degs, -0.5).to(labels.device).unsqueeze(1) norm_j = th.pow(out_degs, -0.5).to(labels.device).unsqueeze(1) elif self.norm_type == "row": norm_i = th.pow(in_degs, -1.0).to(labels.device).unsqueeze(1) else: raise ValueError( f"Expect norm_type to be 'sym' or 'row', got {self.norm_type}" ) for _ in range(self.k): g.ndata["h"] = y * norm_j if self.norm_type == "sym" else y g.update_all(fn.copy_u("h", "m"), fn.sum("m", "h")) y = init + self.alpha * g.ndata["h"] * norm_i if self.clamp: y = y.clamp_(0.0, 1.0) if self.normalize: y = F.normalize(y, p=1) if self.reset: y[mask] = labels[mask] return y ================================================ FILE: python/dgl/nn/tensorflow/__init__.py ================================================ """Package for Tensorflow-specific NN modules.""" from .conv import * from .glob import * from .hetero import * from .softmax import * from .utils import * ================================================ FILE: python/dgl/nn/tensorflow/conv/__init__.py ================================================ """TF NN conv module""" from .appnpconv import APPNPConv from .chebconv import ChebConv from .densechebconv import DenseChebConv from .edgeconv import EdgeConv from .gatconv import GATConv from .ginconv import GINConv from .graphconv import GraphConv from .relgraphconv import RelGraphConv from .sageconv import SAGEConv from .sgconv import SGConv ================================================ FILE: python/dgl/nn/tensorflow/conv/appnpconv.py ================================================ """TF Module for APPNPConv""" # pylint: disable= no-member, arguments-differ, invalid-name import numpy as np import tensorflow as tf from tensorflow.keras import layers from .... import function as fn class APPNPConv(layers.Layer): r"""Approximate Personalized Propagation of Neural Predictions layer from `Predict then Propagate: Graph Neural Networks meet Personalized PageRank `__ .. math:: H^{0} & = X H^{t+1} & = (1-\alpha)\left(\hat{D}^{-1/2} \hat{A} \hat{D}^{-1/2} H^{t}\right) + \alpha H^{0} Parameters ---------- k : int Number of iterations :math:`K`. alpha : float The teleport probability :math:`\alpha`. edge_drop : float, optional Dropout rate on edges that controls the messages received by each node. Default: ``0``. """ def __init__(self, k, alpha, edge_drop=0.0): super(APPNPConv, self).__init__() self._k = k self._alpha = alpha self.edge_drop = layers.Dropout(edge_drop) def call(self, graph, feat): r"""Compute APPNP layer. Parameters ---------- graph : DGLGraph The graph. feat : tf.Tensor The input feature of shape :math:`(N, *)` :math:`N` is the number of nodes, and :math:`*` could be of any shape. Returns ------- tf.Tensor The output feature of shape :math:`(N, *)` where :math:`*` should be the same as input shape. """ with graph.local_scope(): degs = tf.clip_by_value( tf.cast(graph.in_degrees(), tf.float32), clip_value_min=1, clip_value_max=np.inf, ) norm = tf.pow(degs, -0.5) shp = norm.shape + (1,) * (feat.ndim - 1) norm = tf.reshape(norm, shp) feat_0 = feat for _ in range(self._k): # normalization by src node feat = feat * norm graph.ndata["h"] = feat graph.edata["w"] = self.edge_drop(tf.ones(graph.num_edges(), 1)) graph.update_all(fn.u_mul_e("h", "w", "m"), fn.sum("m", "h")) feat = graph.ndata.pop("h") # normalization by dst node feat = feat * norm feat = (1 - self._alpha) * feat + self._alpha * feat_0 return feat ================================================ FILE: python/dgl/nn/tensorflow/conv/chebconv.py ================================================ """Tensorflow Module for Chebyshev Spectral Graph Convolution layer""" # pylint: disable= no-member, arguments-differ, invalid-name import numpy as np import tensorflow as tf from tensorflow.keras import layers from .... import broadcast_nodes, function as fn from ....base import dgl_warning class ChebConv(layers.Layer): r"""Chebyshev Spectral Graph Convolution layer from `Convolutional Neural Networks on Graphs with Fast Localized Spectral Filtering `__ .. math:: h_i^{l+1} &= \sum_{k=0}^{K-1} W^{k, l}z_i^{k, l} Z^{0, l} &= H^{l} Z^{1, l} &= \tilde{L} \cdot H^{l} Z^{k, l} &= 2 \cdot \tilde{L} \cdot Z^{k-1, l} - Z^{k-2, l} \tilde{L} &= 2\left(I - \tilde{D}^{-1/2} \tilde{A} \tilde{D}^{-1/2}\right)/\lambda_{max} - I where :math:`\tilde{A}` is :math:`A` + :math:`I`, :math:`W` is learnable weight. Parameters ---------- in_feats: int Dimension of input features; i.e, the number of dimensions of :math:`h_i^{(l)}`. out_feats: int Dimension of output features :math:`h_i^{(l+1)}`. k : int Chebyshev filter size :math:`K`. activation : function, optional Activation function. Default ``ReLu``. bias : bool, optional If True, adds a learnable bias to the output. Default: ``True``. Example ------- >>> import dgl >>> import numpy as np >>> import tensorflow as tf >>> from dgl.nn import ChebConv >>> with tf.device("CPU:0"): ... g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) ... feat = tf.ones((6, 10)) ... conv = ChebConv(10, 2, 2) ... res = conv(g, feat) ... res """ def __init__( self, in_feats, out_feats, k, activation=tf.nn.relu, bias=True ): super(ChebConv, self).__init__() self._k = k self._in_feats = in_feats self._out_feats = out_feats self.activation = activation self.linear = layers.Dense(out_feats, use_bias=bias) def call(self, graph, feat, lambda_max=None): r"""Compute ChebNet layer. Parameters ---------- graph : DGLGraph The graph. feat : tf.Tensor The input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. lambda_max : list or tensor or None, optional. A list(tensor) with length :math:`B`, stores the largest eigenvalue of the normalized laplacian of each individual graph in ``graph``, where :math:`B` is the batch size of the input graph. Default: None. If None, this method would set the default value to 2. One can use :func:`dgl.laplacian_lambda_max` to compute this value. Returns ------- tf.Tensor The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is size of output feature. """ def unnLaplacian(feat, D_invsqrt, graph): """Operation Feat * D^-1/2 A D^-1/2""" graph.ndata["h"] = feat * D_invsqrt graph.update_all(fn.copy_u("h", "m"), fn.sum("m", "h")) return graph.ndata.pop("h") * D_invsqrt with graph.local_scope(): in_degrees = tf.clip_by_value( tf.cast(graph.in_degrees(), tf.float32), clip_value_min=1, clip_value_max=np.inf, ) D_invsqrt = tf.expand_dims(tf.pow(in_degrees, -0.5), axis=-1) if lambda_max is None: dgl_warning( "lambda_max is not provided, using default value of 2. " "Please use dgl.laplacian_lambda_max to compute the eigenvalues." ) lambda_max = [2] * graph.batch_size if isinstance(lambda_max, list): lambda_max = tf.constant(lambda_max, dtype=tf.float32) if lambda_max.ndim == 1: lambda_max = tf.expand_dims( lambda_max, axis=-1 ) # (B,) to (B, 1) # broadcast from (B, 1) to (N, 1) lambda_max = broadcast_nodes(graph, lambda_max) re_norm = 2.0 / lambda_max # X_0 is the raw feature, Xt is the list of X_0, X_1, ... X_t X_0 = feat Xt = [X_0] # X_1(f) if self._k > 1: h = unnLaplacian(X_0, D_invsqrt, graph) X_1 = -re_norm * h + X_0 * (re_norm - 1) # Append X_1 to Xt Xt.append(X_1) # Xi(x), i = 2...k for _ in range(2, self._k): h = unnLaplacian(X_1, D_invsqrt, graph) X_i = -2 * re_norm * h + X_1 * 2 * (re_norm - 1) - X_0 # Append X_i to Xt Xt.append(X_i) X_1, X_0 = X_i, X_1 # Create the concatenation Xt = tf.concat(Xt, 1) # linear projection h = self.linear(Xt) # activation if self.activation: h = self.activation(h) return h ================================================ FILE: python/dgl/nn/tensorflow/conv/densechebconv.py ================================================ """Tensorflow Module for DenseChebConv""" # pylint: disable= no-member, arguments-differ, invalid-name import numpy as np import tensorflow as tf from tensorflow.keras import layers class DenseChebConv(layers.Layer): r"""Chebyshev Spectral Graph Convolution layer from `Convolutional Neural Networks on Graphs with Fast Localized Spectral Filtering `__ We recommend to use this module when applying ChebConv on dense graphs. Parameters ---------- in_feats: int Dimension of input features :math:`h_i^{(l)}`. out_feats: int Dimension of output features :math:`h_i^{(l+1)}`. k : int Chebyshev filter size. activation : function, optional Activation function, default is ReLu. bias : bool, optional If True, adds a learnable bias to the output. Default: ``True``. See also -------- `ChebConv `__ """ def __init__(self, in_feats, out_feats, k, bias=True): super(DenseChebConv, self).__init__() self._in_feats = in_feats self._out_feats = out_feats self._k = k # keras initializer assume last two dims as fan_in and fan_out xinit = tf.keras.initializers.glorot_normal() self.W = tf.Variable( initial_value=xinit( shape=(k, in_feats, out_feats), dtype="float32" ), trainable=True, ) if bias: zeroinit = tf.keras.initializers.zeros() self.bias = tf.Variable( initial_value=zeroinit(shape=(out_feats), dtype="float32"), trainable=True, ) else: self.bias = None def call(self, adj, feat, lambda_max=None): r"""Compute (Dense) Chebyshev Spectral Graph Convolution layer. Parameters ---------- adj : tf.Tensor The adjacency matrix of the graph to apply Graph Convolution on, should be of shape :math:`(N, N)`, where a row represents the destination and a column represents the source. feat : tf.Tensor The input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. lambda_max : float or None, optional A float value indicates the largest eigenvalue of given graph. Default: None. Returns ------- tf.Tensor The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is size of output feature. """ A = adj num_nodes = A.shape[0] in_degree = 1 / tf.sqrt( tf.clip_by_value( tf.reduce_sum(A, 1), clip_value_min=1, clip_value_max=np.inf ) ) D_invsqrt = tf.linalg.diag(in_degree) I = tf.eye(num_nodes) L = I - D_invsqrt @ A @ D_invsqrt if lambda_max is None: lambda_ = tf.linalg.eig(L)[0][:, 0] lambda_max = tf.reduce_max(lambda_) L_hat = 2 * L / lambda_max - I Z = [tf.eye(num_nodes)] for i in range(1, self._k): if i == 1: Z.append(L_hat) else: Z.append(2 * L_hat @ Z[-1] - Z[-2]) Zs = tf.stack(Z, 0) # (k, n, n) Zh = Zs @ tf.expand_dims(feat, axis=0) @ self.W Zh = tf.reduce_sum(Zh, 0) if self.bias is not None: Zh = Zh + self.bias return Zh ================================================ FILE: python/dgl/nn/tensorflow/conv/edgeconv.py ================================================ """Tensorflow modules for EdgeConv Layer""" # pylint: disable= no-member, arguments-differ, invalid-name import tensorflow as tf from tensorflow.keras import layers from .... import function as fn from ....base import DGLError from ....utils import expand_as_pair class EdgeConv(layers.Layer): r"""EdgeConv layer from `Dynamic Graph CNN for Learning on Point Clouds `__ It can be described as follows: .. math:: h_i^{(l+1)} = \max_{j \in \mathcal{N}(i)} ( \Theta \cdot (h_j^{(l)} - h_i^{(l)}) + \Phi \cdot h_i^{(l)}) where :math:`\mathcal{N}(i)` is the neighbor of :math:`i`, :math:`\Theta` and :math:`\Phi` are linear layers. .. note:: The original formulation includes a ReLU inside the maximum operator. This is equivalent to first applying a maximum operator then applying the ReLU. Parameters ---------- in_feat : int Input feature size; i.e, the number of dimensions of :math:`h_j^{(l)}`. out_feat : int Output feature size; i.e., the number of dimensions of :math:`h_i^{(l+1)}`. batch_norm : bool Whether to include batch normalization on messages. Default: ``False``. allow_zero_in_degree : bool, optional If there are 0-in-degree nodes in the graph, output for those nodes will be invalid since no message will be passed to those nodes. This is harmful for some applications causing silent performance regression. This module will raise a DGLError if it detects 0-in-degree nodes in input graph. By setting ``True``, it will suppress the check and let the users handle it by themselves. Default: ``False``. Note ---- Zero in-degree nodes will lead to invalid output value. This is because no message will be passed to those nodes, the aggregation function will be appied on empty input. A common practice to avoid this is to add a self-loop for each node in the graph if it is homogeneous, which can be achieved by: >>> g = ... # a DGLGraph >>> g = dgl.add_self_loop(g) Calling ``add_self_loop`` will not work for some graphs, for example, heterogeneous graph since the edge type can not be decided for self_loop edges. Set ``allow_zero_in_degree`` to ``True`` for those cases to unblock the code and handle zero-in-degree nodes manually. A common practise to handle this is to filter out the nodes with zero-in-degree when use after conv. """ def __init__(self, out_feats, batch_norm=False, allow_zero_in_degree=False): super(EdgeConv, self).__init__() self.batch_norm = batch_norm self._allow_zero_in_degree = allow_zero_in_degree self.theta = layers.Dense(out_feats) self.phi = layers.Dense(out_feats) if batch_norm: self.bn = layers.BatchNormalization() def set_allow_zero_in_degree(self, set_value): r"""Set allow_zero_in_degree flag. Parameters ---------- set_value : bool The value to be set to the flag. """ self._allow_zero_in_degree = set_value def call(self, g, feat): """Forward computation Parameters ---------- g : DGLGraph The graph. feat : tf.Tensor or pair of tf.Tensor :math:`(N, D)` where :math:`N` is the number of nodes and :math:`D` is the number of feature dimensions. If a pair of tensors is given, the graph must be a uni-bipartite graph with only one edge type, and the two tensors must have the same dimensionality on all except the first axis. Returns ------- tf.Tensor or pair of tf.Tensor New node features. Raises ------ DGLError If there are 0-in-degree nodes in the input graph, it will raise DGLError since no message will be passed to those nodes. This will cause invalid output. The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``. """ with g.local_scope(): if not self._allow_zero_in_degree: if tf.math.count_nonzero(g.in_degrees() == 0) > 0: raise DGLError( "There are 0-in-degree nodes in the graph, " "output for those nodes will be invalid. " "This is harmful for some applications, " "causing silent performance regression. " "Adding self-loop on the input graph by " "calling `g = dgl.add_self_loop(g)` will resolve " "the issue. Setting ``allow_zero_in_degree`` " "to be `True` when constructing this module will " "suppress the check and let the code run." ) h_src, h_dst = expand_as_pair(feat, g) g.srcdata["x"] = h_src g.dstdata["x"] = h_dst g.apply_edges(fn.v_sub_u("x", "x", "theta")) g.edata["theta"] = self.theta(g.edata["theta"]) g.dstdata["phi"] = self.phi(g.dstdata["x"]) if not self.batch_norm: g.update_all(fn.e_add_v("theta", "phi", "e"), fn.max("e", "x")) else: g.apply_edges(fn.e_add_v("theta", "phi", "e")) # for more comments on why global batch norm instead # of batch norm within EdgeConv go to # https://github.com/dmlc/dgl/blob/master/python/dgl/nn/pytorch/conv/edgeconv.py g.edata["e"] = self.bn(g.edata["e"]) g.update_all(fn.copy_e("e", "e"), fn.max("e", "x")) return g.dstdata["x"] ================================================ FILE: python/dgl/nn/tensorflow/conv/gatconv.py ================================================ """Tensorflow modules for graph attention networks(GAT).""" import numpy as np # pylint: disable= no-member, arguments-differ, invalid-name import tensorflow as tf from tensorflow.keras import layers from .... import function as fn from ....base import DGLError from ...functional import edge_softmax from ..utils import Identity # pylint: enable=W0235 class GATConv(layers.Layer): r"""Graph Attention Layer from `Graph Attention Network `__ .. math:: h_i^{(l+1)} = \sum_{j\in \mathcal{N}(i)} \alpha_{i,j} W^{(l)} h_j^{(l)} where :math:`\alpha_{ij}` is the attention score bewteen node :math:`i` and node :math:`j`: .. math:: \alpha_{ij}^{l} &= \mathrm{softmax_i} (e_{ij}^{l}) e_{ij}^{l} &= \mathrm{LeakyReLU}\left(\vec{a}^T [W h_{i} \| W h_{j}]\right) Parameters ---------- in_feats : int, or pair of ints Input feature size; i.e, the number of dimensions of :math:`h_i^{(l)}`. ATConv can be applied on homogeneous graph and unidirectional `bipartite graph `__. If the layer is to be applied to a unidirectional bipartite graph, ``in_feats`` specifies the input feature size on both the source and destination nodes. If a scalar is given, the source and destination node feature size would take the same value. out_feats : int Output feature size; i.e, the number of dimensions of :math:`h_i^{(l+1)}`. num_heads : int Number of heads in Multi-Head Attention. feat_drop : float, optional Dropout rate on feature. Defaults: ``0``. attn_drop : float, optional Dropout rate on attention weight. Defaults: ``0``. negative_slope : float, optional LeakyReLU angle of negative slope. Defaults: ``0.2``. residual : bool, optional If True, use residual connection. Defaults: ``False``. activation : callable activation function/layer or None, optional. If not None, applies an activation function to the updated node features. Default: ``None``. allow_zero_in_degree : bool, optional If there are 0-in-degree nodes in the graph, output for those nodes will be invalid since no message will be passed to those nodes. This is harmful for some applications causing silent performance regression. This module will raise a DGLError if it detects 0-in-degree nodes in input graph. By setting ``True``, it will suppress the check and let the users handle it by themselves. Defaults: ``False``. Note ---- Zero in-degree nodes will lead to invalid output value. This is because no message will be passed to those nodes, the aggregation function will be appied on empty input. A common practice to avoid this is to add a self-loop for each node in the graph if it is homogeneous, which can be achieved by: >>> g = ... # a DGLGraph >>> g = dgl.add_self_loop(g) Calling ``add_self_loop`` will not work for some graphs, for example, heterogeneous graph since the edge type can not be decided for self_loop edges. Set ``allow_zero_in_degree`` to ``True`` for those cases to unblock the code and handle zero-in-degree nodes manually. A common practise to handle this is to filter out the nodes with zero-in-degree when use after conv. Examples -------- >>> import dgl >>> import numpy as np >>> import tensorflow as tf >>> from dgl.nn import GATConv >>> >>> # Case 1: Homogeneous graph >>> with tf.device("CPU:0"): >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> g = dgl.add_self_loop(g) >>> feat = tf.ones((6, 10)) >>> gatconv = GATConv(10, 2, num_heads=3) >>> res = gatconv(g, feat) >>> res >>> # Case 2: Unidirectional bipartite graph >>> u = [0, 1, 0, 0, 1] >>> v = [0, 1, 2, 3, 2] >>> g = dgl.heterograph({('A', 'r', 'B'): (u, v)}) >>> with tf.device("CPU:0"): >>> u_feat = tf.convert_to_tensor(np.random.rand(2, 5)) >>> v_feat = tf.convert_to_tensor(np.random.rand(4, 10)) >>> gatconv = GATConv((5,10), 2, 3) >>> res = gatconv(g, (u_feat, v_feat)) >>> res """ def __init__( self, in_feats, out_feats, num_heads, feat_drop=0.0, attn_drop=0.0, negative_slope=0.2, residual=False, activation=None, allow_zero_in_degree=False, ): super(GATConv, self).__init__() self._num_heads = num_heads self._in_feats = in_feats self._out_feats = out_feats self._allow_zero_in_degree = allow_zero_in_degree xinit = tf.keras.initializers.VarianceScaling( scale=np.sqrt(2), mode="fan_avg", distribution="untruncated_normal" ) if isinstance(in_feats, tuple): self.fc_src = layers.Dense( out_feats * num_heads, use_bias=False, kernel_initializer=xinit ) self.fc_dst = layers.Dense( out_feats * num_heads, use_bias=False, kernel_initializer=xinit ) else: self.fc = layers.Dense( out_feats * num_heads, use_bias=False, kernel_initializer=xinit ) self.attn_l = tf.Variable( initial_value=xinit( shape=(1, num_heads, out_feats), dtype="float32" ), trainable=True, ) self.attn_r = tf.Variable( initial_value=xinit( shape=(1, num_heads, out_feats), dtype="float32" ), trainable=True, ) self.feat_drop = layers.Dropout(rate=feat_drop) self.attn_drop = layers.Dropout(rate=attn_drop) self.leaky_relu = layers.LeakyReLU(alpha=negative_slope) if residual: if in_feats != out_feats: self.res_fc = layers.Dense( num_heads * out_feats, use_bias=False, kernel_initializer=xinit, ) else: self.res_fc = Identity() else: self.res_fc = None # self.register_buffer('res_fc', None) self.activation = activation def set_allow_zero_in_degree(self, set_value): r"""Set allow_zero_in_degree flag. Parameters ---------- set_value : bool The value to be set to the flag. """ self._allow_zero_in_degree = set_value def call(self, graph, feat, get_attention=False): r"""Compute graph attention network layer. Parameters ---------- graph : DGLGraph The graph. feat : tf.Tensor or pair of tf.Tensor If a tf.Tensor is given, the input feature of shape :math:`(N, *, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of tf.Tensor is given, the pair must contain two tensors of shape :math:`(N_{in}, *, D_{in_{src}})` and :math:`(N_{out}, *, D_{in_{dst}})`. get_attention : bool, optional Whether to return the attention values. Default to False. Returns ------- tf.Tensor The output feature of shape :math:`(N, *, H, D_{out})` where :math:`H` is the number of heads, and :math:`D_{out}` is size of output feature. tf.Tensor, optional The attention values of shape :math:`(E, *, H, 1)`, where :math:`E` is the number of edges. This is returned only when :attr:`get_attention` is ``True``. Raises ------ DGLError If there are 0-in-degree nodes in the input graph, it will raise DGLError since no message will be passed to those nodes. This will cause invalid output. The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``. """ with graph.local_scope(): if not self._allow_zero_in_degree: if tf.math.count_nonzero(graph.in_degrees() == 0) > 0: raise DGLError( "There are 0-in-degree nodes in the graph, " "output for those nodes will be invalid. " "This is harmful for some applications, " "causing silent performance regression. " "Adding self-loop on the input graph by " "calling `g = dgl.add_self_loop(g)` will resolve " "the issue. Setting ``allow_zero_in_degree`` " "to be `True` when constructing this module will " "suppress the check and let the code run." ) if isinstance(feat, tuple): src_prefix_shape = tuple(feat[0].shape[:-1]) dst_prefix_shape = tuple(feat[1].shape[:-1]) h_src = self.feat_drop(feat[0]) h_dst = self.feat_drop(feat[1]) if not hasattr(self, "fc_src"): self.fc_src, self.fc_dst = self.fc, self.fc feat_src = tf.reshape( self.fc_src(h_src), src_prefix_shape + (self._num_heads, self._out_feats), ) feat_dst = tf.reshape( self.fc_dst(h_dst), dst_prefix_shape + (self._num_heads, self._out_feats), ) else: src_prefix_shape = dst_prefix_shape = tuple(feat.shape[:-1]) h_src = h_dst = self.feat_drop(feat) feat_src = feat_dst = tf.reshape( self.fc(h_src), src_prefix_shape + (self._num_heads, self._out_feats), ) if graph.is_block: feat_dst = feat_src[: graph.number_of_dst_nodes()] h_dst = h_dst[: graph.number_of_dst_nodes()] dst_prefix_shape = ( graph.number_of_dst_nodes(), ) + dst_prefix_shape[1:] # NOTE: GAT paper uses "first concatenation then linear projection" # to compute attention scores, while ours is "first projection then # addition", the two approaches are mathematically equivalent: # We decompose the weight vector a mentioned in the paper into # [a_l || a_r], then # a^T [Wh_i || Wh_j] = a_l Wh_i + a_r Wh_j # Our implementation is much efficient because we do not need to # save [Wh_i || Wh_j] on edges, which is not memory-efficient. Plus, # addition could be optimized with DGL's built-in function u_add_v, # which further speeds up computation and saves memory footprint. el = tf.reduce_sum(feat_src * self.attn_l, axis=-1, keepdims=True) er = tf.reduce_sum(feat_dst * self.attn_r, axis=-1, keepdims=True) graph.srcdata.update({"ft": feat_src, "el": el}) graph.dstdata.update({"er": er}) # compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively. graph.apply_edges(fn.u_add_v("el", "er", "e")) e = self.leaky_relu(graph.edata.pop("e")) # compute softmax graph.edata["a"] = self.attn_drop(edge_softmax(graph, e)) # message passing graph.update_all(fn.u_mul_e("ft", "a", "m"), fn.sum("m", "ft")) rst = graph.dstdata["ft"] # residual if self.res_fc is not None: resval = tf.reshape( self.res_fc(h_dst), dst_prefix_shape + (-1, self._out_feats) ) rst = rst + resval # activation if self.activation: rst = self.activation(rst) if get_attention: return rst, graph.edata["a"] else: return rst ================================================ FILE: python/dgl/nn/tensorflow/conv/ginconv.py ================================================ """Tensorflow Module for Graph Isomorphism Network layer""" # pylint: disable= no-member, arguments-differ, invalid-name import tensorflow as tf from tensorflow.keras import layers from .... import function as fn from ....utils import expand_as_pair class GINConv(layers.Layer): r"""Graph Isomorphism Network layer from `How Powerful are Graph Neural Networks? `__ .. math:: h_i^{(l+1)} = f_\Theta \left((1 + \epsilon) h_i^{l} + \mathrm{aggregate}\left(\left\{h_j^{l}, j\in\mathcal{N}(i) \right\}\right)\right) Parameters ---------- apply_func : callable activation function/layer or None If not None, apply this function to the updated node feature, the :math:`f_\Theta` in the formula. aggregator_type : str Aggregator type to use (``sum``, ``max`` or ``mean``). init_eps : float, optional Initial :math:`\epsilon` value, default: ``0``. learn_eps : bool, optional If True, :math:`\epsilon` will be a learnable parameter. Default: ``False``. Example ------- >>> import dgl >>> import numpy as np >>> import tensorflow as tf >>> from dgl.nn import GINConv >>> >>> with tf.device("CPU:0"): >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> feat = tf.ones((6, 10)) >>> lin = tf.keras.layers.Dense(10) >>> conv = GINConv(lin, 'max') >>> res = conv(g, feat) >>> res """ def __init__( self, apply_func, aggregator_type, init_eps=0, learn_eps=False ): super(GINConv, self).__init__() self.apply_func = apply_func if aggregator_type == "sum": self._reducer = fn.sum elif aggregator_type == "max": self._reducer = fn.max elif aggregator_type == "mean": self._reducer = fn.mean else: raise KeyError( "Aggregator type {} not recognized.".format(aggregator_type) ) # to specify whether eps is trainable or not. self.eps = tf.Variable( initial_value=[init_eps], dtype=tf.float32, trainable=learn_eps ) def call(self, graph, feat): r"""Compute Graph Isomorphism Network layer. Parameters ---------- graph : DGLGraph The graph. feat : tf.Tensor or pair of tf.Tensor If a tf.Tensor is given, the input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of tf.Tensor is given, the pair must contain two tensors of shape :math:`(N_{in}, D_{in})` and :math:`(N_{out}, D_{in})`. If ``apply_func`` is not None, :math:`D_{in}` should fit the input dimensionality requirement of ``apply_func``. Returns ------- tf.Tensor The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is the output dimensionality of ``apply_func``. If ``apply_func`` is None, :math:`D_{out}` should be the same as input dimensionality. """ with graph.local_scope(): feat_src, feat_dst = expand_as_pair(feat, graph) graph.srcdata["h"] = feat_src graph.update_all(fn.copy_u("h", "m"), self._reducer("m", "neigh")) rst = (1 + self.eps) * feat_dst + graph.dstdata["neigh"] if self.apply_func is not None: rst = self.apply_func(rst) return rst ================================================ FILE: python/dgl/nn/tensorflow/conv/graphconv.py ================================================ """Tensorflow modules for graph convolutions(GCN).""" import numpy as np # pylint: disable= no-member, arguments-differ, invalid-name import tensorflow as tf from tensorflow.keras import layers from .... import function as fn from ....base import DGLError from ....utils import expand_as_pair # pylint: disable=W0235 class GraphConv(layers.Layer): r"""Graph convolution from `Semi-Supervised Classification with Graph Convolutional Networks `__ Mathematically it is defined as follows: .. math:: h_i^{(l+1)} = \sigma(b^{(l)} + \sum_{j\in\mathcal{N}(i)}\frac{1}{c_{ij}}h_j^{(l)}W^{(l)}) where :math:`\mathcal{N}(i)` is the set of neighbors of node :math:`i`, :math:`c_{ij}` is the product of the square root of node degrees (i.e., :math:`c_{ij} = \sqrt{|\mathcal{N}(i)|}\sqrt{|\mathcal{N}(j)|}`), and :math:`\sigma` is an activation function. Parameters ---------- in_feats : int Input feature size; i.e, the number of dimensions of :math:`h_j^{(l)}`. out_feats : int Output feature size; i.e., the number of dimensions of :math:`h_i^{(l+1)}`. norm : str, optional How to apply the normalizer. Can be one of the following values: * ``right``, to divide the aggregated messages by each node's in-degrees, which is equivalent to averaging the received messages. * ``none``, where no normalization is applied. * ``both`` (default), where the messages are scaled with :math:`1/c_{ji}` above, equivalent to symmetric normalization. * ``left``, to divide the messages sent out from each node by its out-degrees, equivalent to random walk normalization. weight : bool, optional If True, apply a linear layer. Otherwise, aggregating the messages without a weight matrix. bias : bool, optional If True, adds a learnable bias to the output. Default: ``True``. activation : callable activation function/layer or None, optional If not None, applies an activation function to the updated node features. Default: ``None``. allow_zero_in_degree : bool, optional If there are 0-in-degree nodes in the graph, output for those nodes will be invalid since no message will be passed to those nodes. This is harmful for some applications causing silent performance regression. This module will raise a DGLError if it detects 0-in-degree nodes in input graph. By setting ``True``, it will suppress the check and let the users handle it by themselves. Default: ``False``. Attributes ---------- weight : torch.Tensor The learnable weight tensor. bias : torch.Tensor The learnable bias tensor. Note ---- Zero in-degree nodes will lead to invalid output value. This is because no message will be passed to those nodes, the aggregation function will be appied on empty input. A common practice to avoid this is to add a self-loop for each node in the graph if it is homogeneous, which can be achieved by: >>> g = ... # a DGLGraph >>> g = dgl.add_self_loop(g) Calling ``add_self_loop`` will not work for some graphs, for example, heterogeneous graph since the edge type can not be decided for self_loop edges. Set ``allow_zero_in_degree`` to ``True`` for those cases to unblock the code and handle zero-in-degree nodes manually. A common practise to handle this is to filter out the nodes with zero-in-degree when use after conv. Examples -------- >>> import dgl >>> import numpy as np >>> import tensorflow as tf >>> from dgl.nn import GraphConv >>> # Case 1: Homogeneous graph >>> with tf.device("CPU:0"): ... g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) ... g = dgl.add_self_loop(g) ... feat = tf.ones((6, 10)) ... conv = GraphConv(10, 2, norm='both', weight=True, bias=True) ... res = conv(g, feat) >>> print(res) >>> # allow_zero_in_degree example >>> with tf.device("CPU:0"): ... g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) ... conv = GraphConv(10, 2, norm='both', weight=True, bias=True, allow_zero_in_degree=True) ... res = conv(g, feat) >>> print(res) >>> # Case 2: Unidirectional bipartite graph >>> u = [0, 1, 0, 0, 1] >>> v = [0, 1, 2, 3, 2] >>> with tf.device("CPU:0"): ... g = dgl.heterograph({('_N', '_E', '_N'):(u, v)}) ... u_fea = tf.convert_to_tensor(np.random.rand(2, 5)) ... v_fea = tf.convert_to_tensor(np.random.rand(4, 5)) ... conv = GraphConv(5, 2, norm='both', weight=True, bias=True) ... res = conv(g, (u_fea, v_fea)) >>> res """ def __init__( self, in_feats, out_feats, norm="both", weight=True, bias=True, activation=None, allow_zero_in_degree=False, ): super(GraphConv, self).__init__() if norm not in ("none", "both", "right", "left"): raise DGLError( 'Invalid norm value. Must be either "none", "both", "right" or "left".' ' But got "{}".'.format(norm) ) self._in_feats = in_feats self._out_feats = out_feats self._norm = norm self._allow_zero_in_degree = allow_zero_in_degree if weight: xinit = tf.keras.initializers.glorot_uniform() self.weight = tf.Variable( initial_value=xinit( shape=(in_feats, out_feats), dtype="float32" ), trainable=True, ) else: self.weight = None if bias: zeroinit = tf.keras.initializers.zeros() self.bias = tf.Variable( initial_value=zeroinit(shape=(out_feats), dtype="float32"), trainable=True, ) else: self.bias = None self._activation = activation def set_allow_zero_in_degree(self, set_value): r"""Set allow_zero_in_degree flag. Parameters ---------- set_value : bool The value to be set to the flag. """ self._allow_zero_in_degree = set_value def call(self, graph, feat, weight=None): r"""Compute graph convolution. Parameters ---------- graph : DGLGraph The graph. feat : torch.Tensor or pair of torch.Tensor If a torch.Tensor is given, it represents the input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of torch.Tensor is given, which is the case for bipartite graph, the pair must contain two tensors of shape :math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`. weight : torch.Tensor, optional Optional external weight tensor. Returns ------- torch.Tensor The output feature Raises ------ DGLError If there are 0-in-degree nodes in the input graph, it will raise DGLError since no message will be passed to those nodes. This will cause invalid output. The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``. Note ---- * Input shape: :math:`(N, *, \text{in_feats})` where * means any number of additional dimensions, :math:`N` is the number of nodes. * Output shape: :math:`(N, *, \text{out_feats})` where all but the last dimension are the same shape as the input. * Weight shape: :math:`(\text{in_feats}, \text{out_feats})`. """ with graph.local_scope(): if not self._allow_zero_in_degree: if tf.math.count_nonzero(graph.in_degrees() == 0) > 0: raise DGLError( "There are 0-in-degree nodes in the graph, " "output for those nodes will be invalid. " "This is harmful for some applications, " "causing silent performance regression. " "Adding self-loop on the input graph by " "calling `g = dgl.add_self_loop(g)` will resolve " "the issue. Setting ``allow_zero_in_degree`` " "to be `True` when constructing this module will " "suppress the check and let the code run." ) feat_src, feat_dst = expand_as_pair(feat, graph) if self._norm in ["both", "left"]: degs = tf.clip_by_value( tf.cast(graph.out_degrees(), tf.float32), clip_value_min=1, clip_value_max=np.inf, ) if self._norm == "both": norm = tf.pow(degs, -0.5) else: norm = 1.0 / degs shp = norm.shape + (1,) * (feat_dst.ndim - 1) norm = tf.reshape(norm, shp) feat_src = feat_src * norm if weight is not None: if self.weight is not None: raise DGLError( "External weight is provided while at the same time the" " module has defined its own weight parameter. Please" " create the module with flag weight=False." ) else: weight = self.weight if self._in_feats > self._out_feats: # mult W first to reduce the feature size for aggregation. if weight is not None: feat_src = tf.matmul(feat_src, weight) graph.srcdata["h"] = feat_src graph.update_all( fn.copy_u(u="h", out="m"), fn.sum(msg="m", out="h") ) rst = graph.dstdata["h"] else: # aggregate first then mult W graph.srcdata["h"] = feat_src graph.update_all( fn.copy_u(u="h", out="m"), fn.sum(msg="m", out="h") ) rst = graph.dstdata["h"] if weight is not None: rst = tf.matmul(rst, weight) if self._norm in ["both", "right"]: degs = tf.clip_by_value( tf.cast(graph.in_degrees(), tf.float32), clip_value_min=1, clip_value_max=np.inf, ) if self._norm == "both": norm = tf.pow(degs, -0.5) else: norm = 1.0 / degs shp = norm.shape + (1,) * (feat_dst.ndim - 1) norm = tf.reshape(norm, shp) rst = rst * norm if self.bias is not None: rst = rst + self.bias if self._activation is not None: rst = self._activation(rst) return rst def extra_repr(self): """Set the extra representation of the module, which will come into effect when printing the model. """ summary = "in={_in_feats}, out={_out_feats}" summary += ", normalization={_norm}" if "_activation" in self.__dict__: summary += ", activation={_activation}" return summary.format(**self.__dict__) ================================================ FILE: python/dgl/nn/tensorflow/conv/relgraphconv.py ================================================ """Tensorflow Module for Relational graph convolution layer""" # pylint: disable= no-member, arguments-differ, invalid-name import tensorflow as tf from tensorflow.keras import layers from .... import function as fn from .. import utils class RelGraphConv(layers.Layer): r"""Relational graph convolution layer from `Modeling Relational Data with Graph Convolutional Networks `__ It can be described as below: .. math:: h_i^{(l+1)} = \sigma(\sum_{r\in\mathcal{R}} \sum_{j\in\mathcal{N}^r(i)}\frac{1}{c_{i,r}}W_r^{(l)}h_j^{(l)}+W_0^{(l)}h_i^{(l)}) where :math:`\mathcal{N}^r(i)` is the neighbor set of node :math:`i` w.r.t. relation :math:`r`. :math:`c_{i,r}` is the normalizer equal to :math:`|\mathcal{N}^r(i)|`. :math:`\sigma` is an activation function. :math:`W_0` is the self-loop weight. The basis regularization decomposes :math:`W_r` by: .. math:: W_r^{(l)} = \sum_{b=1}^B a_{rb}^{(l)}V_b^{(l)} where :math:`B` is the number of bases, :math:`V_b^{(l)}` are linearly combined with coefficients :math:`a_{rb}^{(l)}`. The block-diagonal-decomposition regularization decomposes :math:`W_r` into :math:`B` number of block diagonal matrices. We refer :math:`B` as the number of bases. The block regularization decomposes :math:`W_r` by: .. math:: W_r^{(l)} = \oplus_{b=1}^B Q_{rb}^{(l)} where :math:`B` is the number of bases, :math:`Q_{rb}^{(l)}` are block bases with shape :math:`R^{(d^{(l+1)}/B)*(d^{l}/B)}`. Parameters ---------- in_feat : int Input feature size; i.e, the number of dimensions of :math:`h_j^{(l)}`. out_feat : int Output feature size; i.e., the number of dimensions of :math:`h_i^{(l+1)}`. num_rels : int Number of relations. . regularizer : str Which weight regularizer to use "basis" or "bdd". "basis" is short for basis-diagonal-decomposition. "bdd" is short for block-diagonal-decomposition. num_bases : int, optional Number of bases. If is none, use number of relations. Default: ``None``. bias : bool, optional True if bias is added. Default: ``True``. activation : callable, optional Activation function. Default: ``None``. self_loop : bool, optional True to include self loop message. Default: ``True``. low_mem : bool, optional True to use low memory implementation of relation message passing function. Default: False. This option trades speed with memory consumption, and will slowdown the forward/backward. Turn it on when you encounter OOM problem during training or evaluation. Default: ``False``. dropout : float, optional Dropout rate. Default: ``0.0`` layer_norm: float, optional Add layer norm. Default: ``False`` Examples -------- >>> import dgl >>> import numpy as np >>> import tensorflow as tf >>> from dgl.nn import RelGraphConv >>> >>> with tf.device("CPU:0"): >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> feat = tf.ones((6, 10)) >>> conv = RelGraphConv(10, 2, 3, regularizer='basis', num_bases=2) >>> etype = tf.convert_to_tensor(np.array([0,1,2,0,1,2]).astype(np.int64)) >>> res = conv(g, feat, etype) >>> res >>> # One-hot input >>> with tf.device("CPU:0"): >>> one_hot_feat = tf.convert_to_tensor(np.array([0,1,2,3,4,5]).astype(np.int64)) >>> res = conv(g, one_hot_feat, etype) >>> res """ def __init__( self, in_feat, out_feat, num_rels, regularizer="basis", num_bases=None, bias=True, activation=None, self_loop=True, low_mem=False, dropout=0.0, layer_norm=False, ): super(RelGraphConv, self).__init__() self.in_feat = in_feat self.out_feat = out_feat self.num_rels = num_rels self.regularizer = regularizer self.num_bases = num_bases if ( self.num_bases is None or self.num_bases > self.num_rels or self.num_bases < 0 ): self.num_bases = self.num_rels self.bias = bias self.activation = activation self.self_loop = self_loop self.low_mem = low_mem assert ( layer_norm is False ), "TensorFlow currently does not support layer norm." xinit = tf.keras.initializers.glorot_uniform() zeroinit = tf.keras.initializers.zeros() if regularizer == "basis": # add basis weights self.weight = tf.Variable( initial_value=xinit( shape=(self.num_bases, self.in_feat, self.out_feat), dtype="float32", ), trainable=True, ) if self.num_bases < self.num_rels: # linear combination coefficients self.w_comp = tf.Variable( initial_value=xinit( shape=(self.num_rels, self.num_bases), dtype="float32" ), trainable=True, ) # message func self.message_func = self.basis_message_func elif regularizer == "bdd": if in_feat % num_bases != 0 or out_feat % num_bases != 0: raise ValueError( "Feature size must be a multiplier of num_bases." ) # add block diagonal weights self.submat_in = in_feat // self.num_bases self.submat_out = out_feat // self.num_bases # assuming in_feat and out_feat are both divisible by num_bases self.weight = tf.Variable( initial_value=xinit( shape=( self.num_rels, self.num_bases * self.submat_in * self.submat_out, ), dtype="float32", ), trainable=True, ) # message func self.message_func = self.bdd_message_func else: raise ValueError("Regularizer must be either 'basis' or 'bdd'") # bias if self.bias: self.h_bias = tf.Variable( initial_value=zeroinit(shape=(out_feat), dtype="float32"), trainable=True, ) # weight for self loop if self.self_loop: self.loop_weight = tf.Variable( initial_value=xinit(shape=(in_feat, out_feat), dtype="float32"), trainable=True, ) self.dropout = layers.Dropout(rate=dropout) def basis_message_func(self, edges): """Message function for basis regularizer""" if self.num_bases < self.num_rels: # generate all weights from bases weight = tf.reshape( self.weight, (self.num_bases, self.in_feat * self.out_feat) ) weight = tf.reshape( tf.matmul(self.w_comp, weight), (self.num_rels, self.in_feat, self.out_feat), ) else: weight = self.weight # calculate msg @ W_r before put msg into edge # if src is th.int64 we expect it is an index select if edges.src["h"].dtype != tf.int64 and self.low_mem: etypes, _ = tf.unique(edges.data["type"]) msg = tf.zeros([edges.src["h"].shape[0], self.out_feat]) idx = tf.range(edges.src["h"].shape[0]) for etype in etypes: loc = edges.data["type"] == etype w = weight[etype] src = tf.boolean_mask(edges.src["h"], loc) sub_msg = tf.matmul(src, w) indices = tf.reshape(tf.boolean_mask(idx, loc), (-1, 1)) msg = tf.tensor_scatter_nd_update(msg, indices, sub_msg) else: msg = utils.bmm_maybe_select( edges.src["h"], weight, edges.data["type"] ) if "norm" in edges.data: msg = msg * edges.data["norm"] return {"msg": msg} def bdd_message_func(self, edges): """Message function for block-diagonal-decomposition regularizer""" if (edges.src["h"].dtype == tf.int64) and len( edges.src["h"].shape ) == 1: raise TypeError( "Block decomposition does not allow integer ID feature." ) # calculate msg @ W_r before put msg into edge # if src is th.int64 we expect it is an index select if self.low_mem: etypes, _ = tf.unique(edges.data["type"]) msg = tf.zeros([edges.src["h"].shape[0], self.out_feat]) idx = tf.range(edges.src["h"].shape[0]) for etype in etypes: loc = edges.data["type"] == etype w = tf.reshape( self.weight[etype], (self.num_bases, self.submat_in, self.submat_out), ) src = tf.reshape( tf.boolean_mask(edges.src["h"], loc), (-1, self.num_bases, self.submat_in), ) sub_msg = tf.einsum("abc,bcd->abd", src, w) sub_msg = tf.reshape(sub_msg, (-1, self.out_feat)) indices = tf.reshape(tf.boolean_mask(idx, loc), (-1, 1)) msg = tf.tensor_scatter_nd_update(msg, indices, sub_msg) else: weight = tf.reshape( tf.gather(self.weight, edges.data["type"]), (-1, self.submat_in, self.submat_out), ) node = tf.reshape(edges.src["h"], (-1, 1, self.submat_in)) msg = tf.reshape(tf.matmul(node, weight), (-1, self.out_feat)) if "norm" in edges.data: msg = msg * edges.data["norm"] return {"msg": msg} def call(self, g, x, etypes, norm=None): """Forward computation Parameters ---------- g : DGLGraph The graph. x : tf.Tensor Input node features. Could be either * :math:`(|V|, D)` dense tensor * :math:`(|V|,)` int64 vector, representing the categorical values of each node. We then treat the input feature as an one-hot encoding feature. etypes : tf.Tensor Edge type tensor. Shape: :math:`(|E|,)` norm : tf.Tensor Optional edge normalizer tensor. Shape: :math:`(|E|, 1)` Returns ------- tf.Tensor New node features. """ assert g.is_homogeneous, ( "not a homogeneous graph; convert it with to_homogeneous " "and pass in the edge type as argument" ) with g.local_scope(): g.ndata["h"] = x g.edata["type"] = tf.cast(etypes, tf.int64) if norm is not None: g.edata["norm"] = norm if self.self_loop: loop_message = utils.matmul_maybe_select(x, self.loop_weight) # message passing g.update_all(self.message_func, fn.sum(msg="msg", out="h")) # apply bias and activation node_repr = g.ndata["h"] if self.bias: node_repr = node_repr + self.h_bias if self.self_loop: node_repr = node_repr + loop_message if self.activation: node_repr = self.activation(node_repr) node_repr = self.dropout(node_repr) return node_repr ================================================ FILE: python/dgl/nn/tensorflow/conv/sageconv.py ================================================ """Tensorflow Module for GraphSAGE layer""" # pylint: disable= no-member, arguments-differ, invalid-name import tensorflow as tf from tensorflow.keras import layers from .... import function as fn from ....base import DGLError from ....utils import check_eq_shape, expand_as_pair class SAGEConv(layers.Layer): r"""GraphSAGE layer from `Inductive Representation Learning on Large Graphs `__ .. math:: h_{\mathcal{N}(i)}^{(l+1)} &= \mathrm{aggregate} \left(\{h_{j}^{l}, \forall j \in \mathcal{N}(i) \}\right) h_{i}^{(l+1)} &= \sigma \left(W \cdot \mathrm{concat} (h_{i}^{l}, h_{\mathcal{N}(i)}^{l+1}) \right) h_{i}^{(l+1)} &= \mathrm{norm}(h_{i}^{(l+1)}) Parameters ---------- in_feats : int, or pair of ints Input feature size; i.e, the number of dimensions of :math:`h_i^{(l)}`. GATConv can be applied on homogeneous graph and unidirectional `bipartite graph `__. If the layer applies on a unidirectional bipartite graph, ``in_feats`` specifies the input feature size on both the source and destination nodes. If a scalar is given, the source and destination node feature size would take the same value. If aggregator type is ``gcn``, the feature size of source and destination nodes are required to be the same. out_feats : int Output feature size; i.e, the number of dimensions of :math:`h_i^{(l+1)}`. aggregator_type : str Aggregator type to use (``mean``, ``gcn``, ``pool``, ``lstm``). feat_drop : float Dropout rate on features, default: ``0``. bias : bool If True, adds a learnable bias to the output. Default: ``True``. norm : callable activation function/layer or None, optional If not None, applies normalization to the updated node features. activation : callable activation function/layer or None, optional If not None, applies an activation function to the updated node features. Default: ``None``. Examples -------- >>> import dgl >>> import numpy as np >>> import tensorflow as tf >>> from dgl.nn import SAGEConv >>> >>> # Case 1: Homogeneous graph >>> with tf.device("CPU:0"): >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> g = dgl.add_self_loop(g) >>> feat = tf.ones((6, 10)) >>> conv = SAGEConv(10, 2, 'pool') >>> res = conv(g, feat) >>> res >>> # Case 2: Unidirectional bipartite graph >>> with tf.device("CPU:0"): >>> u = [0, 1, 0, 0, 1] >>> v = [0, 1, 2, 3, 2] >>> g = dgl.heterograph({('_N', '_E', '_N'):(u, v)}) >>> u_fea = tf.convert_to_tensor(np.random.rand(2, 5)) >>> v_fea = tf.convert_to_tensor(np.random.rand(4, 5)) >>> conv = SAGEConv((5, 10), 2, 'mean') >>> res = conv(g, (u_fea, v_fea)) >>> res """ def __init__( self, in_feats, out_feats, aggregator_type, feat_drop=0.0, bias=True, norm=None, activation=None, ): super(SAGEConv, self).__init__() valid_aggre_types = {"mean", "gcn", "pool", "lstm"} if aggregator_type not in valid_aggre_types: raise DGLError( "Invalid aggregator_type. Must be one of {}. " "But got {!r} instead.".format( valid_aggre_types, aggregator_type ) ) self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) self._out_feats = out_feats self._aggre_type = aggregator_type self.norm = norm self.feat_drop = layers.Dropout(feat_drop) self.activation = activation # aggregator type: mean/pool/lstm/gcn if aggregator_type == "pool": self.fc_pool = layers.Dense(self._in_src_feats) if aggregator_type == "lstm": self.lstm = layers.LSTM(units=self._in_src_feats) if aggregator_type != "gcn": self.fc_self = layers.Dense(out_feats, use_bias=bias) self.fc_neigh = layers.Dense(out_feats, use_bias=bias) def _lstm_reducer(self, nodes): """LSTM reducer NOTE(zihao): lstm reducer with default schedule (degree bucketing) is slow, we could accelerate this with degree padding in the future. """ m = nodes.mailbox["m"] # (B, L, D) rst = self.lstm(m) return {"neigh": rst} def call(self, graph, feat): r"""Compute GraphSAGE layer. Parameters ---------- graph : DGLGraph The graph. feat : tf.Tensor or pair of tf.Tensor If a tf.Tensor is given, it represents the input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. If a pair of tf.Tensor is given, the pair must contain two tensors of shape :math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`. Returns ------- tf.Tensor The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is size of output feature. """ with graph.local_scope(): if isinstance(feat, tuple): feat_src = self.feat_drop(feat[0]) feat_dst = self.feat_drop(feat[1]) else: feat_src = feat_dst = self.feat_drop(feat) if graph.is_block: feat_dst = feat_src[: graph.number_of_dst_nodes()] h_self = feat_dst # Handle the case of graphs without edges if graph.num_edges() == 0: graph.dstdata["neigh"] = tf.cast( tf.zeros((graph.number_of_dst_nodes(), self._in_src_feats)), tf.float32, ) if self._aggre_type == "mean": graph.srcdata["h"] = feat_src graph.update_all(fn.copy_u("h", "m"), fn.mean("m", "neigh")) h_neigh = graph.dstdata["neigh"] elif self._aggre_type == "gcn": check_eq_shape(feat) graph.srcdata["h"] = feat_src graph.dstdata["h"] = feat_dst # same as above if homogeneous graph.update_all(fn.copy_u("h", "m"), fn.sum("m", "neigh")) # divide in_degrees degs = tf.cast(graph.in_degrees(), tf.float32) h_neigh = (graph.dstdata["neigh"] + graph.dstdata["h"]) / ( tf.expand_dims(degs, -1) + 1 ) elif self._aggre_type == "pool": graph.srcdata["h"] = tf.nn.relu(self.fc_pool(feat_src)) graph.update_all(fn.copy_u("h", "m"), fn.max("m", "neigh")) h_neigh = graph.dstdata["neigh"] elif self._aggre_type == "lstm": graph.srcdata["h"] = feat_src graph.update_all(fn.copy_u("h", "m"), self._lstm_reducer) h_neigh = graph.dstdata["neigh"] else: raise KeyError( "Aggregator type {} not recognized.".format( self._aggre_type ) ) # GraphSAGE GCN does not require fc_self. if self._aggre_type == "gcn": rst = self.fc_neigh(h_neigh) else: rst = self.fc_self(h_self) + self.fc_neigh(h_neigh) # activation if self.activation is not None: rst = self.activation(rst) # normalization if self.norm is not None: rst = self.norm(rst) return rst ================================================ FILE: python/dgl/nn/tensorflow/conv/sgconv.py ================================================ """tf Module for Simplifying Graph Convolution layer""" # pylint: disable= no-member, arguments-differ, invalid-name, W0613 import numpy as np import tensorflow as tf from tensorflow.keras import layers from .... import function as fn from ....base import DGLError class SGConv(layers.Layer): r"""SGC layer from `Simplifying Graph Convolutional Networks `__ .. math:: H^{K} = (\tilde{D}^{-1/2} \tilde{A} \tilde{D}^{-1/2})^K X \Theta where :math:`\tilde{A}` is :math:`A` + :math:`I`. Thus the graph input is expected to have self-loop edges added. Parameters ---------- in_feats : int Number of input features; i.e, the number of dimensions of :math:`X`. out_feats : int Number of output features; i.e, the number of dimensions of :math:`H^{K}`. k : int Number of hops :math:`K`. Defaults:``1``. cached : bool If True, the module would cache .. math:: (\tilde{D}^{-\frac{1}{2}}\tilde{A}\tilde{D}^{-\frac{1}{2}})^K X\Theta at the first forward call. This parameter should only be set to ``True`` in Transductive Learning setting. bias : bool If True, adds a learnable bias to the output. Default: ``True``. norm : callable activation function/layer or None, optional If not None, applies normalization to the updated node features. Default: ``False``. allow_zero_in_degree : bool, optional If there are 0-in-degree nodes in the graph, output for those nodes will be invalid since no message will be passed to those nodes. This is harmful for some applications causing silent performance regression. This module will raise a DGLError if it detects 0-in-degree nodes in input graph. By setting ``True``, it will suppress the check and let the users handle it by themselves. Default: ``False``. Note ---- Zero in-degree nodes will lead to invalid output value. This is because no message will be passed to those nodes, the aggregation function will be appied on empty input. A common practice to avoid this is to add a self-loop for each node in the graph if it is homogeneous, which can be achieved by: >>> g = ... # a DGLGraph >>> g = dgl.add_self_loop(g) Calling ``add_self_loop`` will not work for some graphs, for example, heterogeneous graph since the edge type can not be decided for self_loop edges. Set ``allow_zero_in_degree`` to ``True`` for those cases to unblock the code and handle zero-in-degree nodes manually. A common practise to handle this is to filter out the nodes with zero-in-degree when use after conv. Example ------- >>> import dgl >>> import numpy as np >>> import tensorflow as tf >>> from dgl.nn import SGConv >>> >>> with tf.device("CPU:0"): >>> g = dgl.graph(([0,1,2,3,2,5], [1,2,3,4,0,3])) >>> g = dgl.add_self_loop(g) >>> feat = tf.ones((6, 10)) >>> conv = SGConv(10, 2, k=2, cached=True) >>> res = conv(g, feat) >>> res """ def __init__( self, in_feats, out_feats, k=1, cached=False, bias=True, norm=None, allow_zero_in_degree=False, ): super(SGConv, self).__init__() self.fc = layers.Dense(out_feats, use_bias=bias) self._cached = cached self._cached_h = None self._k = k self.norm = norm self._allow_zero_in_degree = allow_zero_in_degree def set_allow_zero_in_degree(self, set_value): r"""Set allow_zero_in_degree flag. Parameters ---------- set_value : bool The value to be set to the flag. """ self._allow_zero_in_degree = set_value def call(self, graph, feat): r"""Compute Simplifying Graph Convolution layer. Parameters ---------- graph : DGLGraph The graph. feat : tf.Tensor The input feature of shape :math:`(N, D_{in})` where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes. Returns ------- tf.Tensor The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}` is size of output feature. Raises ------ DGLError If there are 0-in-degree nodes in the input graph, it will raise DGLError since no message will be passed to those nodes. This will cause invalid output. The error can be ignored by setting ``allow_zero_in_degree`` parameter to ``True``. Note ---- If ``cache`` is set to True, ``feat`` and ``graph`` should not change during training, or you will get wrong results. """ with graph.local_scope(): if not self._allow_zero_in_degree: if tf.math.count_nonzero(graph.in_degrees() == 0) > 0: raise DGLError( "There are 0-in-degree nodes in the graph, " "output for those nodes will be invalid. " "This is harmful for some applications, " "causing silent performance regression. " "Adding self-loop on the input graph by " "calling `g = dgl.add_self_loop(g)` will resolve " "the issue. Setting ``allow_zero_in_degree`` " "to be `True` when constructing this module will " "suppress the check and let the code run." ) if self._cached_h is not None: feat = self._cached_h else: # compute normalization degs = tf.clip_by_value( tf.cast(graph.in_degrees(), tf.float32), clip_value_min=1, clip_value_max=np.inf, ) norm = tf.pow(degs, -0.5) norm = tf.expand_dims(norm, 1) # compute (D^-1 A^k D)^k X for _ in range(self._k): feat = feat * norm graph.ndata["h"] = feat graph.update_all(fn.copy_u("h", "m"), fn.sum("m", "h")) feat = graph.ndata.pop("h") feat = feat * norm if self.norm is not None: feat = self.norm(feat) # cache feature if self._cached: self._cached_h = feat return self.fc(feat) ================================================ FILE: python/dgl/nn/tensorflow/glob.py ================================================ """Tensorflow modules for graph global pooling.""" # pylint: disable= no-member, arguments-differ, invalid-name, W0235 import tensorflow as tf from tensorflow.keras import layers from ...readout import ( max_nodes, mean_nodes, softmax_nodes, sum_nodes, topk_nodes, ) __all__ = [ "SumPooling", "AvgPooling", "MaxPooling", "SortPooling", "WeightAndSum", "GlobalAttentionPooling", ] class SumPooling(layers.Layer): r"""Apply sum pooling over the nodes in the graph. .. math:: r^{(i)} = \sum_{k=1}^{N_i} x^{(i)}_k """ def __init__(self): super(SumPooling, self).__init__() def call(self, graph, feat): r"""Compute sum pooling. Parameters ---------- graph : DGLGraph The graph. feat : tf.Tensor The input feature with shape :math:`(N, *)` where :math:`N` is the number of nodes in the graph. Returns ------- tf.Tensor The output feature with shape :math:`(B, *)`, where :math:`B` refers to the batch size. """ with graph.local_scope(): graph.ndata["h"] = feat readout = sum_nodes(graph, "h") return readout class AvgPooling(layers.Layer): r"""Apply average pooling over the nodes in the graph. .. math:: r^{(i)} = \frac{1}{N_i}\sum_{k=1}^{N_i} x^{(i)}_k """ def __init__(self): super(AvgPooling, self).__init__() def call(self, graph, feat): r"""Compute average pooling. Parameters ---------- graph : DGLGraph The graph. feat : tf.Tensor The input feature with shape :math:`(N, *)` where :math:`N` is the number of nodes in the graph. Returns ------- tf.Tensor The output feature with shape :math:`(B, *)`, where :math:`B` refers to the batch size. """ with graph.local_scope(): graph.ndata["h"] = feat readout = mean_nodes(graph, "h") return readout class MaxPooling(layers.Layer): r"""Apply max pooling over the nodes in the graph. .. math:: r^{(i)} = \max_{k=1}^{N_i}\left( x^{(i)}_k \right) """ def __init__(self): super(MaxPooling, self).__init__() def call(self, graph, feat): r"""Compute max pooling. Parameters ---------- graph : DGLGraph The graph. feat : tf.Tensor The input feature with shape :math:`(N, *)` where :math:`N` is the number of nodes in the graph. Returns ------- tf.Tensor The output feature with shape :math:`(B, *)`, where :math:`B` refers to the batch size. """ with graph.local_scope(): graph.ndata["h"] = feat readout = max_nodes(graph, "h") return readout class SortPooling(layers.Layer): r"""Sort Pooling from `An End-to-End Deep Learning Architecture for Graph Classification `__ Parameters ---------- k : int The number of nodes to hold for each graph. """ def __init__(self, k): super(SortPooling, self).__init__() self.k = k def call(self, graph, feat): r"""Compute sort pooling. Parameters ---------- graph : DGLGraph The graph. feat : tf.Tensor The input node feature with shape :math:`(N, D)` where :math:`N` is the number of nodes in the graph. Returns ------- tf.Tensor The output feature with shape :math:`(B, k * D)`, where :math:`B` refers to the batch size. """ with graph.local_scope(): # Sort the feature of each node in ascending order. feat = tf.sort(feat, -1) graph.ndata["h"] = feat # Sort nodes according to their last features. ret = tf.reshape( topk_nodes(graph, "h", self.k, sortby=-1)[0], (-1, self.k * feat.shape[-1]), ) return ret class GlobalAttentionPooling(layers.Layer): r"""Global Attention Pooling from `Gated Graph Sequence Neural Networks `__ .. math:: r^{(i)} = \sum_{k=1}^{N_i}\mathrm{softmax}\left(f_{gate} \left(x^{(i)}_k\right)\right) f_{feat}\left(x^{(i)}_k\right) Parameters ---------- gate_nn : tf.layers.Layer A neural network that computes attention scores for each feature. feat_nn : tf.layers.Layer, optional A neural network applied to each feature before combining them with attention scores. """ def __init__(self, gate_nn, feat_nn=None): super(GlobalAttentionPooling, self).__init__() self.gate_nn = gate_nn self.feat_nn = feat_nn def call(self, graph, feat): r"""Compute global attention pooling. Parameters ---------- graph : DGLGraph The graph. feat : tf.Tensor The input node feature with shape :math:`(N, D)` where :math:`N` is the number of nodes in the graph. Returns ------- tf.Tensor The output feature with shape :math:`(B, *)`, where :math:`B` refers to the batch size. """ with graph.local_scope(): gate = self.gate_nn(feat) assert ( gate.shape[-1] == 1 ), "The output of gate_nn should have size 1 at the last axis." feat = self.feat_nn(feat) if self.feat_nn else feat graph.ndata["gate"] = gate gate = softmax_nodes(graph, "gate") graph.ndata.pop("gate") graph.ndata["r"] = feat * gate readout = sum_nodes(graph, "r") graph.ndata.pop("r") return readout class WeightAndSum(layers.Layer): """Compute importance weights for atoms and perform a weighted sum. Parameters ---------- in_feats : int Input atom feature size """ def __init__(self, in_feats): super(WeightAndSum, self).__init__() self.in_feats = in_feats self.atom_weighting = tf.keras.Sequential( layers.Dense(1), layers.Activation(tf.nn.sigmoid) ) def call(self, g, feats): """Compute molecule representations out of atom representations Parameters ---------- g : DGLGraph DGLGraph with batch size B for processing multiple molecules in parallel feats : FloatTensor of shape (N, self.in_feats) Representations for all atoms in the molecules * N is the total number of atoms in all molecules Returns ------- FloatTensor of shape (B, self.in_feats) Representations for B molecules """ with g.local_scope(): g.ndata["h"] = feats g.ndata["w"] = self.atom_weighting(g.ndata["h"]) h_g_sum = sum_nodes(g, "h", "w") return h_g_sum ================================================ FILE: python/dgl/nn/tensorflow/hetero.py ================================================ """Heterograph NN modules""" import tensorflow as tf from tensorflow.keras import layers __all__ = ["HeteroGraphConv"] class HeteroGraphConv(layers.Layer): r"""A generic module for computing convolution on heterogeneous graphs. The heterograph convolution applies sub-modules on their associating relation graphs, which reads the features from source nodes and writes the updated ones to destination nodes. If multiple relations have the same destination node types, their results are aggregated by the specified method. If the relation graph has no edge, the corresponding module will not be called. Pseudo-code: .. code:: outputs = {nty : [] for nty in g.dsttypes} # Apply sub-modules on their associating relation graphs in parallel for relation in g.canonical_etypes: stype, etype, dtype = relation dstdata = relation_submodule(g[relation], ...) outputs[dtype].append(dstdata) # Aggregate the results for each destination node type rsts = {} for ntype, ntype_outputs in outputs.items(): if len(ntype_outputs) != 0: rsts[ntype] = aggregate(ntype_outputs) return rsts Examples -------- Create a heterograph with three types of relations and nodes. >>> import dgl >>> g = dgl.heterograph({ ... ('user', 'follows', 'user') : edges1, ... ('user', 'plays', 'game') : edges2, ... ('store', 'sells', 'game') : edges3}) Create a ``HeteroGraphConv`` that applies different convolution modules to different relations. Note that the modules for ``'follows'`` and ``'plays'`` do not share weights. >>> import dgl.nn.pytorch as dglnn >>> conv = dglnn.HeteroGraphConv({ ... 'follows' : dglnn.GraphConv(...), ... 'plays' : dglnn.GraphConv(...), ... 'sells' : dglnn.SAGEConv(...)}, ... aggregate='sum') Call forward with some ``'user'`` features. This computes new features for both ``'user'`` and ``'game'`` nodes. >>> import tensorflow as tf >>> h1 = {'user' : tf.random.normal((g.num_nodes('user'), 5))} >>> h2 = conv(g, h1) >>> print(h2.keys()) dict_keys(['user', 'game']) Call forward with both ``'user'`` and ``'store'`` features. Because both the ``'plays'`` and ``'sells'`` relations will update the ``'game'`` features, their results are aggregated by the specified method (i.e., summation here). >>> f1 = {'user' : ..., 'store' : ...} >>> f2 = conv(g, f1) >>> print(f2.keys()) dict_keys(['user', 'game']) Call forward with some ``'store'`` features. This only computes new features for ``'game'`` nodes. >>> g1 = {'store' : ...} >>> g2 = conv(g, g1) >>> print(g2.keys()) dict_keys(['game']) Call forward with a pair of inputs is allowed and each submodule will also be invoked with a pair of inputs. >>> x_src = {'user' : ..., 'store' : ...} >>> x_dst = {'user' : ..., 'game' : ...} >>> y_dst = conv(g, (x_src, x_dst)) >>> print(y_dst.keys()) dict_keys(['user', 'game']) Notes ----- HeteroGraphConv requires that there is a module for every ``'etype'`` in an input graph. If you want to apply HeteroGraphConv to a subset of a graph's ``'etypes'``, you must create a new graph using for example :func:`~dgl.edge_type_subgraph()`. Parameters ---------- mods : dict[str, nn.Module] Modules associated with every edge types. The forward function of each module must have a `DGLGraph` object as the first argument, and its second argument is either a tensor object representing the node features or a pair of tensor object representing the source and destination node features. aggregate : str, callable, optional Method for aggregating node features generated by different relations. Allowed string values are 'sum', 'max', 'min', 'mean', 'stack'. The 'stack' aggregation is performed along the second dimension, whose order is deterministic. User can also customize the aggregator by providing a callable instance. For example, aggregation by summation is equivalent to the follows: .. code:: def my_agg_func(tensors, dsttype): # tensors: is a list of tensors to aggregate # dsttype: string name of the destination node type for which the # aggregation is performed stacked = tf.stack(tensors, axis=0) return tf.reduce_sum(stacked, axis=0) Attributes ---------- mods : dict[str, nn.Module] Modules associated with every edge types. """ def __init__(self, mods, aggregate="sum"): super(HeteroGraphConv, self).__init__() self.mods = mods # Do not break if graph has 0-in-degree nodes. # Because there is no general rule to add self-loop for heterograph. for _, v in self.mods.items(): set_allow_zero_in_degree_fn = getattr( v, "set_allow_zero_in_degree", None ) if callable(set_allow_zero_in_degree_fn): set_allow_zero_in_degree_fn(True) if isinstance(aggregate, str): self.agg_fn = get_aggregate_fn(aggregate) else: self.agg_fn = aggregate def call(self, g, inputs, mod_args=None, mod_kwargs=None): """Forward computation Invoke the forward function with each module and aggregate their results. Parameters ---------- g : DGLGraph Graph data. inputs : dict[str, Tensor] or pair of dict[str, Tensor] Input node features. mod_args : dict[str, tuple[any]], optional Extra positional arguments for the sub-modules. mod_kwargs : dict[str, dict[str, any]], optional Extra key-word arguments for the sub-modules. Returns ------- dict[str, Tensor] Output representations for every types of nodes. """ if mod_args is None: mod_args = {} if mod_kwargs is None: mod_kwargs = {} outputs = {nty: [] for nty in g.dsttypes} if isinstance(inputs, tuple): src_inputs, dst_inputs = inputs for stype, etype, dtype in g.canonical_etypes: rel_graph = g[stype, etype, dtype] if stype not in src_inputs or dtype not in dst_inputs: continue dstdata = self.mods[etype]( rel_graph, (src_inputs[stype], dst_inputs[dtype]), *mod_args.get(etype, ()), **mod_kwargs.get(etype, {}) ) outputs[dtype].append(dstdata) else: for stype, etype, dtype in g.canonical_etypes: rel_graph = g[stype, etype, dtype] if stype not in inputs: continue dstdata = self.mods[etype]( rel_graph, (inputs[stype], inputs[dtype]), *mod_args.get(etype, ()), **mod_kwargs.get(etype, {}) ) outputs[dtype].append(dstdata) rsts = {} for nty, alist in outputs.items(): if len(alist) != 0: rsts[nty] = self.agg_fn(alist, nty) return rsts def get_aggregate_fn(agg): """Internal function to get the aggregation function for node data generated from different relations. Parameters ---------- agg : str Method for aggregating node features generated by different relations. Allowed values are 'sum', 'max', 'min', 'mean', 'stack'. Returns ------- callable Aggregator function that takes a list of tensors to aggregate and returns one aggregated tensor. """ if agg == "sum": fn = tf.reduce_sum elif agg == "max": fn = tf.reduce_max elif agg == "min": fn = tf.reduce_min elif agg == "mean": fn = tf.reduce_mean elif agg == "stack": fn = None # will not be called else: raise DGLError( "Invalid cross type aggregator. Must be one of " '"sum", "max", "min", "mean" or "stack". But got "%s"' % agg ) if agg == "stack": def stack_agg(inputs, dsttype): # pylint: disable=unused-argument if len(inputs) == 0: return None return tf.stack(inputs, axis=1) return stack_agg else: def aggfn(inputs, dsttype): # pylint: disable=unused-argument if len(inputs) == 0: return None stacked = tf.stack(inputs, axis=0) return fn(stacked, axis=0) return aggfn ================================================ FILE: python/dgl/nn/tensorflow/softmax.py ================================================ """tf modules for graph related softmax.""" # pylint: disable= unused-import from ..functional import edge_softmax ================================================ FILE: python/dgl/nn/tensorflow/utils.py ================================================ """Utilities for tf NN package""" # pylint: disable=no-member, invalid-name import tensorflow as tf from tensorflow.keras import layers # pylint: disable=W0235 def matmul_maybe_select(A, B): """Perform Matrix multiplication C = A * B but A could be an integer id vector. If A is an integer vector, we treat it as multiplying a one-hot encoded tensor. In this case, the expensive dense matrix multiply can be replaced by a much cheaper index lookup. For example, :: A = [2, 0, 1], B = [[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]] then matmul_maybe_select(A, B) is equivalent to :: [[0, 0, 1], [[0.1, 0.2], [1, 0, 0], * [0.3, 0.4], [0, 1, 0]] [0.5, 0.6]] In all other cases, perform a normal matmul. Parameters ---------- A : tf.Tensor lhs tensor B : tf.Tensor rhs tensor Returns ------- C : tf.Tensor result tensor """ if A.dtype == tf.int64 and len(A.shape) == 1: return tf.gather(B, A) else: return tf.matmul(A, B) def bmm_maybe_select(A, B, index): """Slice submatrices of A by the given index and perform bmm. B is a 3D tensor of shape (N, D1, D2), which can be viewed as a stack of N matrices of shape (D1, D2). The input index is an integer vector of length M. A could be either: (1) a dense tensor of shape (M, D1), (2) an integer vector of length M. The result C is a 2D matrix of shape (M, D2) For case (1), C is computed by bmm: :: C[i, :] = matmul(A[i, :], B[index[i], :, :]) For case (2), C is computed by index select: :: C[i, :] = B[index[i], A[i], :] Parameters ---------- A : tf.Tensor lhs tensor B : tf.Tensor rhs tensor index : tf.Tensor index tensor Returns ------- C : tf.Tensor return tensor """ if A.dtype == tf.int64 and len(A.shape) == 1: # following is a faster version of B[index, A, :] B = tf.reshape(B, (-1, B.shape[2])) flatidx = index * B.shape[1] + A return tf.gather(B, flatidx) else: BB = tf.gather(B, index) return tf.squeeze(tf.matmul(tf.expand_dims(A, 1), BB), 1) class Identity(layers.Layer): """A placeholder identity operator that is argument-insensitive.""" def call(self, x): """Return input""" return x ================================================ FILE: python/dgl/ops/__init__.py ================================================ """dgl operator module.""" from .edge_softmax import * from .gather_mm import * from .sddmm import * from .segment import * from .spmm import * ================================================ FILE: python/dgl/ops/edge_softmax.py ================================================ """dgl edge_softmax operator module.""" from ..backend import ( astype, edge_softmax as edge_softmax_internal, edge_softmax_hetero as edge_softmax_hetero_internal, ) from ..base import ALL, is_all __all__ = ["edge_softmax"] def edge_softmax(graph, logits, eids=ALL, norm_by="dst"): r"""Compute softmax over weights of incoming edges for every node. For a node :math:`i`, edge softmax is an operation that computes .. math:: a_{ij} = \frac{\exp(z_{ij})}{\sum_{j\in\mathcal{N}(i)}\exp(z_{ij})} where :math:`z_{ij}` is a signal of edge :math:`j\rightarrow i`, also called logits in the context of softmax. :math:`\mathcal{N}(i)` is the set of nodes that have an edge to :math:`i`. By default edge softmax is normalized by destination nodes(i.e. :math:`ij` are incoming edges of `i` in the formula above). We also support edge softmax normalized by source nodes(i.e. :math:`ij` are outgoing edges of `i` in the formula). The former case corresponds to softmax in GAT and Transformer, and the latter case corresponds to softmax in Capsule network. An example of using edge softmax is in `Graph Attention Network `__ where the attention weights are computed with this operation. Other non-GNN examples using this are `Transformer `__, `Capsule `__, etc. Parameters ---------- graph : DGLGraph The graph over which edge softmax will be performed. logits : torch.Tensor or dict of torch.Tensor The input edge feature. Heterogeneous graphs can have dict of tensors where each tensor stores the edge features of the corresponding relation type. eids : torch.Tensor or ALL, optional The IDs of the edges to apply edge softmax. If ALL, it will apply edge softmax to all edges in the graph. Default: ALL. norm_by : str, could be `src` or `dst` Normalized by source nodes or destination nodes. Default: `dst`. Returns ------- Tensor or tuple of tensors Softmax value. Notes ----- * Input shape: :math:`(E, *, 1)` where * means any number of additional dimensions, :math:`E` equals the length of eids. If the `eids` is ALL, :math:`E` equals the number of edges in the graph. * Return shape: :math:`(E, *, 1)` Examples on a homogeneous graph ------------------------------- The following example uses PyTorch backend. >>> from dgl.nn.functional import edge_softmax >>> import dgl >>> import torch as th Create a :code:`DGLGraph` object and initialize its edge features. >>> g = dgl.graph((th.tensor([0, 0, 0, 1, 1, 2]), th.tensor([0, 1, 2, 1, 2, 2]))) >>> edata = th.ones(6, 1).float() >>> edata tensor([[1.], [1.], [1.], [1.], [1.], [1.]]) Apply edge softmax over g: >>> edge_softmax(g, edata) tensor([[1.0000], [0.5000], [0.3333], [0.5000], [0.3333], [0.3333]]) Apply edge softmax over g normalized by source nodes: >>> edge_softmax(g, edata, norm_by='src') tensor([[0.3333], [0.3333], [0.3333], [0.5000], [0.5000], [1.0000]]) Apply edge softmax to first 4 edges of g: >>> edge_softmax(g, edata[:4], th.Tensor([0,1,2,3])) tensor([[1.0000], [0.5000], [1.0000], [0.5000]]) Examples on a heterogeneous graph --------------------------------- Create a heterogeneous graph and initialize its edge features. >>> hg = dgl.heterograph({ ... ('user', 'follows', 'user'): ([0, 0, 1], [0, 1, 2]), ... ('developer', 'develops', 'game'): ([0, 1], [0, 1]) ... }) >>> edata_follows = th.ones(3, 1).float() >>> edata_develops = th.ones(2, 1).float() >>> edata_dict = {('user', 'follows', 'user'): edata_follows, ... ('developer','develops', 'game'): edata_develops} Apply edge softmax over hg normalized by source nodes: >>> edge_softmax(hg, edata_dict, norm_by='src') {('developer', 'develops', 'game'): tensor([[1.], [1.]]), ('user', 'follows', 'user'): tensor([[0.5000], [0.5000], [1.0000]])} """ if not is_all(eids): eids = astype(eids, graph.idtype) if graph._graph.number_of_etypes() == 1: return edge_softmax_internal( graph._graph, logits, eids=eids, norm_by=norm_by ) else: logits_list = [None] * graph._graph.number_of_etypes() logits = {graph.to_canonical_etype(k): v for k, v in logits.items()} for rel in graph.canonical_etypes: etid = graph.get_etype_id(rel) logits_list[etid] = logits[rel] logits_tuple = tuple(logits_list) score_tuple = edge_softmax_hetero_internal( graph._graph, eids, norm_by, *logits_tuple ) score = {} for rel in graph.canonical_etypes: etid = graph.get_etype_id(rel) score[rel] = score_tuple[etid] return score ================================================ FILE: python/dgl/ops/gather_mm.py ================================================ """dgl gather_mm operator module.""" from .. import backend as F __all__ = ["gather_mm"] def gather_mm(a, b, *, idx_b): r"""Gather data according to the given indices and perform matrix multiplication. Let the result tensor be ``c``, the operator conducts the following computation: c[i] = a[i] @ b[idx_b[i]] , where len(c) == len(idx_b) Parameters ---------- a : Tensor A 2-D tensor of shape ``(N, D1)`` b : Tensor A 3-D tensor of shape ``(R, D1, D2)`` idx_b : Tensor, optional An 1-D integer tensor of shape ``(N,)``. Returns ------- Tensor The output dense matrix of shape ``(N, D2)`` """ N, D1 = F.shape(a) R, _, D2 = F.shape(b) if N > 1000000 or D1 > 8 or D2 > 8: # Use segment_mm for large workload import torch sorted_idx_b, perm = torch.sort(idx_b) _, rev_perm = torch.sort(perm) sorted_a = torch.index_select(a, 0, perm) pos_l = torch.searchsorted( sorted_idx_b, torch.arange(R, device=a.device) ) pos_r = torch.cat( [pos_l[1:], torch.tensor([len(idx_b)], device=a.device)] ) seglen = (pos_r - pos_l).cpu() # XXX(minjie): cause device synchronize return torch.index_select( F.segment_mm(sorted_a, b, seglen), 0, rev_perm ) else: return F.gather_mm(a, b, None, idx_b) ================================================ FILE: python/dgl/ops/sddmm.py ================================================ """dgl sddmm operator module.""" import sys from itertools import product from .. import backend as F from ..backend import ( gsddmm as gsddmm_internal, gsddmm_hetero as gsddmm_internal_hetero, ) __all__ = ["gsddmm", "copy_u", "copy_v", "copy_e"] def reshape_lhs_rhs(lhs_data, rhs_data): r"""Expand dims so that there will be no broadcasting issues with different number of dimensions. For example, given two shapes (N, 3, 1), (E, 5, 3, 4) that are valid broadcastable shapes, change them to (N, 1, 3, 1) and (E, 5, 3, 4) Parameters ---------- lhs_data : tensor or None The left operand, could be None if it's not required by op. rhs_data : tensor or None The right operand, could be None if it's not required by op. """ lhs_shape = F.shape(lhs_data) rhs_shape = F.shape(rhs_data) if len(lhs_shape) != len(rhs_shape): max_ndims = max(len(lhs_shape), len(rhs_shape)) lhs_pad_ndims = max_ndims - len(lhs_shape) rhs_pad_ndims = max_ndims - len(rhs_shape) new_lhs_shape = (lhs_shape[0],) + (1,) * lhs_pad_ndims + lhs_shape[1:] new_rhs_shape = (rhs_shape[0],) + (1,) * rhs_pad_ndims + rhs_shape[1:] lhs_data = F.reshape(lhs_data, new_lhs_shape) rhs_data = F.reshape(rhs_data, new_rhs_shape) return lhs_data, rhs_data def gsddmm(g, op, lhs_data, rhs_data, lhs_target="u", rhs_target="v"): r"""Generalized Sampled-Dense-Dense Matrix Multiplication interface. It computes edge features by :attr:`op` lhs features and rhs features. .. math:: x_{e} = \phi(x_{lhs}, x_{rhs}), \forall (u,e,v)\in \mathcal{G} where :math:`x_{e}` is the returned feature on edges and :math:`x_u`, :math:`x_v` refers to :attr:`u`, :attr:`v` respectively. :math:`\phi` is the binary operator :attr:`op`, and :math:`\mathcal{G}` is the graph we apply gsddmm on: :attr:`g`. :math:`lhs` and :math:`rhs` are one of :math:`u,v,e`'s. Parameters ---------- g : DGLGraph The input graph. op : str Binary operator, could be ``add``, ``sub``, ``mul``, ``div``, ``dot``, ``copy_lhs``, ``copy_rhs``. lhs_data : tensor or None The left operand, could be None if it's not required by op. rhs_data : tensor or None The right operand, could be None if it's not required by op. lhs_target: str Choice of ``u``(source), ``e``(edge) or ``v``(destination) for left operand. rhs_target: str Choice of ``u``(source), ``e``(edge) or ``v``(destination) for right operand. Returns ------- tensor The result tensor. """ if g._graph.number_of_etypes() == 1: if op not in ["copy_lhs", "copy_rhs"]: lhs_data, rhs_data = reshape_lhs_rhs(lhs_data, rhs_data) return gsddmm_internal( g._graph, op, lhs_data, rhs_data, lhs_target, rhs_target ) else: if op == "copy_lhs": rhs_data = [None] * g._graph.number_of_etypes() elif op == "copy_rhs": lhs_data = [None] * g._graph.number_of_ntypes() # TODO (Israt): Call reshape_lhs_rhs() on lhs and rhs data to match their dimension # and avoid broadcasting issue. Handle the case where different nodes have # different dimensions, and different etypes may need different broadcasting # dims for the same node. lhs_and_rhs_tuple = tuple(list(lhs_data) + list(rhs_data)) return gsddmm_internal_hetero( g._graph, op, len(lhs_data), lhs_target, rhs_target, *lhs_and_rhs_tuple ) def _gen_sddmm_func(lhs_target, rhs_target, binary_op): name = "{}_{}_{}".format(lhs_target, binary_op, rhs_target) target_dict = {"u": "source node", "e": "edge", "v": "destination node"} lhs_str = target_dict[lhs_target] rhs_str = target_dict[rhs_target] docstring = r"""Generalized SDDMM function. It computes edge features by {op} {lhs} features and {rhs} features. Parameters ---------- g : DGLGraph The input graph x : tensor The {lhs} features. y : tensor The {rhs} features. Returns ------- tensor The result tensor. Notes ----- This function supports autograd (computing input gradients given the output gradient). If the feature shape of two input operands do not match, we first broadcasts the features to a unified shape (note that the memory usage will not increase accordingly) and then performs the operation. Broadcasting follows NumPy semantics. Please see https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html for more details about the NumPy broadcasting semantics. """.format( op=binary_op, lhs=lhs_str, rhs=rhs_str ) def func(g, x, y): return gsddmm( g, binary_op, x, y, lhs_target=lhs_target, rhs_target=rhs_target ) func.__name__ = name func.__doc__ = docstring return func def _register_sddmm_func(): """Register sddmm functions""" target = ["u", "v", "e"] for lhs, rhs in product(target, target): if lhs != rhs: for binary_op in ["add", "sub", "mul", "div", "dot"]: func = _gen_sddmm_func(lhs, rhs, binary_op) setattr(sys.modules[__name__], func.__name__, func) __all__.append(func.__name__) def copy_u(g, x): r"""Generalized SDDMM function that copies source node features to edges. Parameters ---------- g : DGLGraph The input graph. x : tensor The source node features. Returns ------- tensor The result tensor. Notes ----- This function supports autograd (computing input gradients given the output gradient). """ return gsddmm(g, "copy_lhs", x, None) def copy_v(g, x): r"""Generalized SDDMM function that copies destination node features to edges. Parameters ---------- g : DGLGraph The input graph. x : tensor The destination node features. Returns ------- tensor The result tensor. Notes ----- This function supports autograd (computing input gradients given the output gradient). """ return gsddmm(g, "copy_rhs", None, x) # pylint: disable=unused-argument def copy_e(g, x): r"""Generalized SDDMM function that copies destination node features to edges.""" return x _register_sddmm_func() ================================================ FILE: python/dgl/ops/segment.py ================================================ """Segment aggregation operators implemented using DGL graph.""" from .. import backend as F from ..base import DGLError __all__ = ["segment_reduce", "segment_softmax", "segment_mm"] def segment_reduce(seglen, value, reducer="sum"): """Segment reduction operator. It aggregates the value tensor along the first dimension by segments. The first argument ``seglen`` stores the length of each segment. Its summation must be equal to the first dimension of the ``value`` tensor. Zero-length segments are allowed. Parameters ---------- seglen : Tensor Segment lengths. value : Tensor Value to aggregate. reducer : str, optional Aggregation method. Can be 'sum', 'max', 'min', 'mean'. Returns ------- Tensor Aggregated tensor of shape ``(len(seglen), value.shape[1:])``. Examples -------- >>> import dgl >>> import torch as th >>> val = th.ones(10, 3) >>> seg = th.tensor([1, 0, 5, 4]) # 4 segments >>> dgl.segment_reduce(seg, val) tensor([[1., 1., 1.], [0., 0., 0.], [5., 5., 5.], [4., 4., 4.]]) """ offsets = F.cumsum( F.cat([F.zeros((1,), F.dtype(seglen), F.context(seglen)), seglen], 0), 0 ) if reducer == "mean": rst = F.segment_reduce("sum", value, offsets) rst_shape = F.shape(rst) z = F.astype(F.clamp(seglen, 1, len(value)), F.dtype(rst)) z_shape = (rst_shape[0],) + (1,) * (len(rst_shape) - 1) return rst / F.reshape(z, z_shape) elif reducer in ["min", "sum", "max"]: rst = F.segment_reduce(reducer, value, offsets) if reducer in ["min", "max"]: rst = F.replace_inf_with_zero(rst) return rst else: raise DGLError("reducer {} not recognized.".format(reducer)) def segment_softmax(seglen, value): """Performa softmax on each segment. The first argument ``seglen`` stores the length of each segment. Its summation must be equal to the first dimension of the ``value`` tensor. Zero-length segments are allowed. Parameters ---------- seglen : Tensor Segment lengths. value : Tensor Value to aggregate. Returns ------- Tensor Result tensor of the same shape as the ``value`` tensor. Examples -------- >>> import dgl >>> import torch as th >>> val = th.ones(10, 3) >>> seg = th.tensor([1, 0, 5, 4]) # 4 segments >>> dgl.segment_softmax(seg, val) tensor([[1.0000, 1.0000, 1.0000], [0.2000, 0.2000, 0.2000], [0.2000, 0.2000, 0.2000], [0.2000, 0.2000, 0.2000], [0.2000, 0.2000, 0.2000], [0.2000, 0.2000, 0.2000], [0.2500, 0.2500, 0.2500], [0.2500, 0.2500, 0.2500], [0.2500, 0.2500, 0.2500], [0.2500, 0.2500, 0.2500]]) """ value_max = segment_reduce(seglen, value, reducer="max") value = F.exp(value - F.repeat(value_max, seglen, dim=0)) value_sum = segment_reduce(seglen, value, reducer="sum") return value / F.repeat(value_sum, seglen, dim=0) def segment_mm(a, b, seglen_a): r"""Performs matrix multiplication according to segments. Suppose ``seglen_a == [10, 5, 0, 3]``, the operator will perform four matrix multiplications:: a[0:10] @ b[0], a[10:15] @ b[1], a[15:15] @ b[2], a[15:18] @ b[3] Parameters ---------- a : Tensor The left operand, 2-D tensor of shape ``(N, D1)`` b : Tensor The right operand, 3-D tensor of shape ``(R, D1, D2)`` seglen_a : Tensor An integer tensor of shape ``(R,)``. Each element is the length of segments of input ``a``. The summation of all elements must be equal to ``N``. Returns ------- Tensor The output dense matrix of shape ``(N, D2)`` """ return F.segment_mm(a, b, seglen_a) ================================================ FILE: python/dgl/ops/spmm.py ================================================ """Internal module for general spmm operators.""" import sys from .. import backend as F from ..backend import ( gspmm as gspmm_internal, gspmm_hetero as gspmm_internal_hetero, ) __all__ = ["gspmm"] def reshape_lhs_rhs(lhs_data, rhs_data): r"""Expand dims so that there will be no broadcasting issues with different number of dimensions. For example, given two shapes (N, 3, 1), (E, 5, 3, 4) that are valid broadcastable shapes, change them to (N, 1, 3, 1) and (E, 5, 3, 4) Parameters ---------- lhs_data : tensor or None The left operand, could be None if it's not required by op. rhs_data : tensor or None The right operand, could be None if it's not required by op. """ lhs_shape = F.shape(lhs_data) rhs_shape = F.shape(rhs_data) if len(lhs_shape) != len(rhs_shape): max_ndims = max(len(lhs_shape), len(rhs_shape)) lhs_pad_ndims = max_ndims - len(lhs_shape) rhs_pad_ndims = max_ndims - len(rhs_shape) new_lhs_shape = (lhs_shape[0],) + (1,) * lhs_pad_ndims + lhs_shape[1:] new_rhs_shape = (rhs_shape[0],) + (1,) * rhs_pad_ndims + rhs_shape[1:] lhs_data = F.reshape(lhs_data, new_lhs_shape) rhs_data = F.reshape(rhs_data, new_rhs_shape) return lhs_data, rhs_data def gspmm(g, op, reduce_op, lhs_data, rhs_data): r"""Generalized Sparse Matrix Multiplication interface. It fuses two steps into one kernel. 1. Computes messages by :attr:`op` source node and edge features. 2. Aggregate the messages by :attr:`reduce_op` as the features on destination nodes. .. math:: x_v = \psi_{(u, v, e)\in \mathcal{G}}(\rho(x_u, x_e)) where :math:`x_v` is the returned feature on destination nodes, and :math:`x_u`, :math:`x_e` refers to :attr:`u`, :attr:`e` respectively. :math:`\rho` means binary operator :attr:`op` and :math:`\psi` means reduce operator :attr:`reduce_op`, :math:`\mathcal{G}` is the graph we apply gspmm on: :attr:`g`. Note that this function does not handle gradients. Parameters ---------- g : DGLGraph The input graph. op : str The binary op's name, could be ``add``, ``sub``, ``mul``, ``div``, ``copy_lhs``, ``copy_rhs``. reduce_op : str Reduce operator, could be ``sum``, ``max``, ``min``, ``mean``. lhs_data : tensor or None The left operand, could be None if it's not required by the op. rhs_data : tensor or None The right operand, could be None if it's not required by the op. Returns ------- tensor The result tensor. """ if g._graph.number_of_etypes() == 1: if op not in ["copy_lhs", "copy_rhs"]: lhs_data, rhs_data = reshape_lhs_rhs(lhs_data, rhs_data) # With max and min reducers infinity will be returned for zero degree nodes ret = gspmm_internal( g._graph, op, "sum" if reduce_op == "mean" else reduce_op, lhs_data, rhs_data, ) else: # lhs_data or rhs_data is None only in unary functions like ``copy-u`` or ``copy_e`` lhs_data = ( [None] * g._graph.number_of_ntypes() if lhs_data is None else lhs_data ) rhs_data = ( [None] * g._graph.number_of_etypes() if rhs_data is None else rhs_data ) # TODO (Israt): Call reshape func lhs_and_rhs_tuple = tuple(list(lhs_data) + list(rhs_data)) ret = gspmm_internal_hetero( g._graph, op, "sum" if reduce_op == "mean" else reduce_op, len(lhs_data), *lhs_and_rhs_tuple ) # TODO (Israt): Add support for 'mean' in heterograph # divide in degrees for mean reducer. if reduce_op == "mean": ret_shape = F.shape(ret) deg = g.in_degrees() deg = F.astype(F.clamp(deg, 1, max(g.num_edges(), 1)), F.dtype(ret)) deg_shape = (ret_shape[0],) + (1,) * (len(ret_shape) - 1) return ret / F.reshape(deg, deg_shape) else: return ret def _attach_zerodeg_note(docstring, reducer): note1 = """ The {} function will return zero for nodes with no incoming messages.""".format( reducer ) note2 = """ This is implemented by replacing all {} values to zero. """.format( "infinity" if reducer == "min" else "negative infinity" ) docstring = docstring + note1 if reducer in ("min", "max"): docstring = docstring + note2 return docstring def _gen_spmm_func(binary_op, reduce_op): name = "u_{}_e_{}".format(binary_op, reduce_op) docstring = """Generalized SpMM function. It fuses two steps into one kernel. 1. Computes messages by {} source node and edge features. 2. Aggregate the messages by {} as the features on destination nodes. Parameters ---------- g : DGLGraph The input graph x : tensor The source node features. y : tensor The edge features. Returns ------- tensor The result tensor. Notes ----- This function supports autograd (computing input gradients given the output gradient). If the feature shape of two input operands do not match, we first broadcasts the features to a unified shape (note that the memory usage will not increase accordingly) and then performs the operation. Broadcasting follows NumPy semantics. Please see https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html for more details about the NumPy broadcasting semantics. """.format( binary_op, reduce_op ) docstring = _attach_zerodeg_note(docstring, reduce_op) def func(g, x, y): return gspmm(g, binary_op, reduce_op, x, y) func.__name__ = name func.__doc__ = docstring return func def _gen_copy_reduce_func(binary_op, reduce_op): name = "{}_{}".format(binary_op, reduce_op) binary_str = { "copy_u": "It copies node feature to edge as the message.", "copy_e": "It regards edge feature as message.", } x_str = {"copy_u": "source node", "copy_e": "edge"} docstring = lambda binary_op: _attach_zerodeg_note( """Generalized SpMM function. {} Then aggregates the message by {} on destination nodes. Parameters ---------- g : DGLGraph The input graph x : tensor The {} features. Returns ------- tensor The result tensor. Notes ----- This function supports autograd (computing input gradients given the output gradient). """.format( binary_str[binary_op], reduce_op, x_str[binary_op] ), reduce_op, ) def func(g, x): if binary_op == "copy_u": return gspmm(g, "copy_lhs", reduce_op, x, None) else: return gspmm(g, "copy_rhs", reduce_op, None, x) func.__name__ = name func.__doc__ = docstring(binary_op) return func def _register_spmm_func(): """Register spmm functions - Binary operation plus reduction between u and e: u_[]_e_[] - Copy u plus reduction: copy_u_[] - Copy e plus reduction: copy_e_[] """ for binary_op in ["add", "sub", "mul", "div", "copy_u", "copy_e"]: for reduce_op in ["sum", "max", "min", "mean"]: if binary_op.startswith("copy"): func = _gen_copy_reduce_func(binary_op, reduce_op) else: func = _gen_spmm_func(binary_op, reduce_op) setattr(sys.modules[__name__], func.__name__, func) __all__.append(func.__name__) _register_spmm_func() ================================================ FILE: python/dgl/optim/__init__.py ================================================ """dgl optims.""" import importlib import os import sys from ..backend import backend_name from ..utils import expand_as_pair def _load_backend(mod_name): mod = importlib.import_module(".%s" % mod_name, __name__) thismod = sys.modules[__name__] for api, obj in mod.__dict__.items(): setattr(thismod, api, obj) _load_backend(backend_name) ================================================ FILE: python/dgl/optim/mxnet/__init__.py ================================================ ================================================ FILE: python/dgl/optim/pytorch/__init__.py ================================================ """dgl sparse optimizer for pytorch.""" from .sparse_optim import SparseAdagrad, SparseAdam ================================================ FILE: python/dgl/optim/pytorch/sparse_optim.py ================================================ """Node embedding optimizers""" import abc from abc import abstractmethod import torch as th from ...cuda import nccl from ...nn.pytorch import NodeEmbedding from ...partition import NDArrayPartition from ...utils import ( create_shared_mem_array, gather_pinned_tensor_rows, get_shared_mem_array, pin_memory_inplace, scatter_pinned_tensor_rows, ) class SparseGradOptimizer(abc.ABC): r"""The abstract sparse optimizer. Note: dgl sparse optimizer only work with dgl.NodeEmbedding Parameters ---------- params : list of NodeEmbedding The list of NodeEmbeddings. lr : float The learning rate. """ def __init__(self, params, lr): self._params = params self._lr = lr self._rank = None self._world_size = None self._shared_cache = {} self._clean_grad = False self._opt_meta = {} self._comm = None self._first_step = True self._device = None # hold released shared memory to let other process to munmap it first # otherwise it will crash the training self.shmem_buffer_holder = [] assert len(params) > 0, "Empty parameters" # if we are using shared memory for communication for emb in params: assert isinstance( emb, NodeEmbedding ), "DGL SparseOptimizer only supports dgl.nn.NodeEmbedding" if self._rank is None: self._rank = emb.rank self._world_size = emb.world_size else: assert ( self._rank == emb.rank ), "MultiGPU rank for each embedding should be same." assert ( self._world_size == emb.world_size ), "MultiGPU world_size for each embedding should be same." assert not self._rank is None assert not self._world_size is None def step(self): """The step function. The step function is invoked at the end of every batch to update embeddings """ # on the first step, check to see if the grads are on the GPU if self._first_step: for emb in self._params: for _, data in emb._trace: if data.grad.device.type == "cuda": # create a communicator if self._device: assert ( self._device == data.grad.device ), "All gradients must be on the same device" else: self._device = data.grad.device else: assert ( not self._device ), "All gradients must be on the same device" # distributed backend use nccl if self._device and ( not th.distributed.is_initialized() or th.distributed.get_backend() == "nccl" ): # device is only set if the grads are on a GPU self._comm_setup() else: self._shared_setup() self._first_step = False if self._comm: self._comm_step() else: self._shared_step() @abstractmethod def setup(self, params): """This is function where subclasses can perform any setup they need to. It will be called during the first step, and communicators or shared memory will have been setup before this call. Parameters ---------- params : list of NodeEmbedding The list of NodeEmbeddings. """ def _comm_setup(self): self._comm = True def _shared_setup(self): for emb in self._params: emb_name = emb.name if self._rank == 0: # the master gpu process opt_meta = create_shared_mem_array( emb_name + "_opt_meta", (self._world_size, self._world_size), th.int32, ).zero_() if self._rank == 0: emb.store.set(emb_name + "_opt_meta", emb_name) self._opt_meta[emb_name] = opt_meta elif self._rank > 0: # receive emb.store.wait([emb_name + "_opt_meta"]) opt_meta = get_shared_mem_array( emb_name + "_opt_meta", (self._world_size, self._world_size), th.int32, ) self._opt_meta[emb_name] = opt_meta def _comm_step(self): with th.no_grad(): idx_in = {} grad_in = {} for emb in self._params: # pylint: disable=too-many-nested-blocks emb_name = emb.name partition = emb.partition if not partition: # use default partitioning partition = NDArrayPartition( emb.num_embeddings, self._world_size if self._world_size > 0 else 1, mode="remainder", ) # we need to combine gradients from multiple forward paths if len(emb._trace) == 0: idx = th.zeros((0,), dtype=th.long, device=self._device) grad = th.zeros( (0, emb.embedding_dim), dtype=th.float32, device=self._device, ) elif len(emb._trace) == 1: # the special case where we can use the tensors as is # without any memcpy's idx, grad = emb._trace[0] grad = grad.grad.data else: idx = [] grad = [] for i, data in emb._trace: idx.append(i) grad.append(data.grad.data) idx = th.cat(idx, dim=0) grad = th.cat(grad, dim=0) ( idx_in[emb_name], grad_in[emb_name], ) = nccl.sparse_all_to_all_push(idx, grad, partition=partition) if emb.partition: # if the embedding is partitioned, map back to indexes # into the local tensor idx_in[emb_name] = partition.map_to_local(idx_in[emb_name]) if self._clean_grad: # clean gradient track for emb in self._params: emb.reset_trace() self._clean_grad = False for emb in self._params: emb_name = emb.name idx = idx_in[emb_name] grad = grad_in[emb_name] self.update(idx, grad, emb) def _shared_step(self): with th.no_grad(): # Frequently alloc and free shared memory to hold intermediate tensor is expensive # We cache shared memory buffers in shared_emb. shared_emb = {emb.name: ([], []) for emb in self._params} # Go through all sparse embeddings for emb in self._params: # pylint: disable=too-many-nested-blocks emb_name = emb.name # we need to combine gradients from multiple forward paths idx = [] grad = [] for i, data in emb._trace: idx.append(i) grad.append(data.grad.data) # If the sparse embedding is not used in the previous forward step # The idx and grad will be empty, initialize them as empty tensors to # avoid crashing the optimizer step logic. # # Note: we cannot skip the gradient exchange and update steps as other # working processes may send gradient update requests corresponding # to certain embedding to this process. idx = ( th.cat(idx, dim=0) if len(idx) != 0 else th.zeros((0,), dtype=th.long, device=th.device("cpu")) ) grad = ( th.cat(grad, dim=0) if len(grad) != 0 else th.zeros( (0, emb.embedding_dim), dtype=th.float32, device=th.device("cpu"), ) ) device = grad.device idx_dtype = idx.dtype grad_dtype = grad.dtype grad_dim = grad.shape[1] if self._world_size > 1: if emb_name not in self._shared_cache: self._shared_cache[emb_name] = {} # Each training process takes the resposibility of updating a range # of node embeddings, thus we can parallel the gradient update. # The overall progress includes: # 1. In each training process: # 1.a Deciding which process a node embedding belongs to according # to the formula: process_id = node_idx mod num_of_process(N) # 1.b Split the node index tensor and gradient tensor into N parts # according to step 1. # 1.c Write each node index sub-tensor and gradient sub-tensor into # different DGL shared memory buffers. # 2. Cross training process synchronization # 3. In each traning process: # 3.a Collect node index sub-tensors and gradient sub-tensors # 3.b Do gradient update # 4. Done idx_split = th.remainder(idx, self._world_size).long() for i in range(self._world_size): mask = idx_split == i idx_i = idx[mask] grad_i = grad[mask] if i == self._rank: shared_emb[emb_name][0].append(idx_i) shared_emb[emb_name][1].append(grad_i) else: # currently nccl does not support Alltoallv operation # we need to use CPU shared memory to share gradient # across processes idx_i = idx_i.to(th.device("cpu")) grad_i = grad_i.to(th.device("cpu")) idx_shmem_name = "idx_{}_{}_{}".format( emb_name, self._rank, i ) grad_shmem_name = "grad_{}_{}_{}".format( emb_name, self._rank, i ) # Create shared memory to hold temporary index and gradient tensor for # cross-process send and recv. if ( idx_shmem_name not in self._shared_cache[emb_name] or self._shared_cache[emb_name][ idx_shmem_name ].shape[0] < idx_i.shape[0] ): if ( idx_shmem_name in self._shared_cache[emb_name] ): self.shmem_buffer_holder.append( self._shared_cache[emb_name][ idx_shmem_name ] ) self.shmem_buffer_holder.append( self._shared_cache[emb_name][ grad_shmem_name ] ) # The total number of buffers is the number of NodeEmbeddings * # world_size * (world_size - 1). The minimun buffer size is 128. # # We extend the buffer by idx_i.shape[0] * 2 to avoid # frequent shared memory allocation. # The overall buffer cost will be smaller than three times # the maximum memory requirement for sharing gradients. buffer_size = ( 128 if idx_i.shape[0] < 128 else idx_i.shape[0] * 2 ) idx_shmem = create_shared_mem_array( "{}_{}".format(idx_shmem_name, buffer_size), (buffer_size,), idx_dtype, ) grad_shmem = create_shared_mem_array( "{}_{}".format( grad_shmem_name, buffer_size ), (buffer_size, grad_dim), grad_dtype, ) self._shared_cache[emb_name][ idx_shmem_name ] = idx_shmem self._shared_cache[emb_name][ grad_shmem_name ] = grad_shmem # Fill shared memory with temporal index tensor and gradient tensor self._shared_cache[emb_name][idx_shmem_name][ : idx_i.shape[0] ] = idx_i self._shared_cache[emb_name][grad_shmem_name][ : idx_i.shape[0] ] = grad_i self._opt_meta[emb_name][self._rank][ i ] = idx_i.shape[0] else: shared_emb[emb_name][0].append(idx) shared_emb[emb_name][1].append(grad) # make sure the idx shape is passed to each process through opt_meta if self._world_size > 1: th.distributed.barrier() for emb in self._params: # pylint: disable=too-many-nested-blocks emb_name = emb.name if self._world_size > 1: # The first element in shared_emb[emb_name][0] is the local idx device = shared_emb[emb_name][0][0].device # gather gradients from all other processes for i in range(self._world_size): if i != self._rank: idx_shmem_name = "idx_{}_{}_{}".format( emb_name, i, self._rank ) grad_shmem_name = "grad_{}_{}_{}".format( emb_name, i, self._rank ) size = self._opt_meta[emb_name][i][self._rank] # Retrive shared memory holding the temporal index and gradient # tensor that is sent to current training process if ( idx_shmem_name not in self._shared_cache[emb_name] or self._shared_cache[emb_name][ idx_shmem_name ].shape[0] < size ): buffer_size = 128 if size < 128 else size * 2 idx_shmem = get_shared_mem_array( "{}_{}".format(idx_shmem_name, buffer_size), (buffer_size,), idx_dtype, ) grad_shmem = get_shared_mem_array( "{}_{}".format( grad_shmem_name, buffer_size ), (buffer_size, grad_dim), grad_dtype, ) self._shared_cache[emb_name][ idx_shmem_name ] = idx_shmem self._shared_cache[emb_name][ grad_shmem_name ] = grad_shmem idx_i = self._shared_cache[emb_name][ idx_shmem_name ][:size] grad_i = self._shared_cache[emb_name][ grad_shmem_name ][:size] shared_emb[emb_name][0].append( idx_i.to(device, non_blocking=True) ) shared_emb[emb_name][1].append( grad_i.to(device, non_blocking=True) ) if self._clean_grad: # clean gradient track for emb in self._params: emb.reset_trace() self._clean_grad = False for emb in self._params: emb_name = emb.name idx = th.cat(shared_emb[emb_name][0], dim=0) grad = th.cat(shared_emb[emb_name][1], dim=0) self.update(idx, grad, emb) # synchronized gradient update if self._world_size > 1: th.distributed.barrier() @abstractmethod def update(self, idx, grad, emb): """Update embeddings in a sparse manner Sparse embeddings are updated in mini batches. We maintain gradient states for each embedding so they can be updated separately. Parameters ---------- idx : tensor Index of the embeddings to be updated. grad : tensor Gradient of each embedding. emb : dgl.nn.NodeEmbedding Sparse node embedding to update. """ def zero_grad(self): """clean grad cache""" self._clean_grad = True def state_dict(self, **kwargs): # pylint: disable=unused-argument """Return a copy of the whole optimizer states stored in CPU memory. If this is a multi-processing instance, the states will be returned in shared memory. If the underlying embedding is currently stored on multiple GPUs, all processes must call this method in the same order. NOTE: This method must be called by all processes sharing the underlying embedding, or it may result in a deadlock. Returns ------- dictionary of optimizer states The optimizer states stored in CPU memory. """ return { "state": { emb.name: emb._all_get_optm_state() for emb in self._params }, "param_groups": self.param_groups, } def load_state_dict( self, state_dict, **kwargs ): # pylint: disable=unused-argument """Load the optimizer states. This method must be called by all processes sharing the underlying embedding with identical :attr:`state_dict`. NOTE: This method must be called by all processes sharing the underlying embedding, or it may result in a deadlock. Parameters ---------- state_dict : dictionary of optimizer states The global states to pull values from. """ for emb in self._params: emb._all_set_optm_state(state_dict["state"][emb.name]) self._set_param_groups(state_dict["param_groups"]) @property @abstractmethod def param_groups(self): """Emulate 'param_groups' of torch.optim.Optimizer. Different from that, the returned 'param_groups' doesn't contain parameters because getting the whole embedding is very expensive. It contains other attributes, e.g., lr, eps, for debugging. """ @abstractmethod def _set_param_groups(self, groups): """A helper method to load param_groups from saved state_dict.""" class SparseAdagrad(SparseGradOptimizer): r"""Node embedding optimizer using the Adagrad algorithm. This optimizer implements a sparse version of Adagrad algorithm for optimizing :class:`dgl.nn.NodeEmbedding`. Being sparse means it only updates the embeddings whose gradients have updates, which are usually a very small portion of the total embeddings. Adagrad maintains a :math:`G_{t,i,j}` for every parameter in the embeddings, where :math:`G_{t,i,j}=G_{t-1,i,j} + g_{t,i,j}^2` and :math:`g_{t,i,j}` is the gradient of the dimension :math:`j` of embedding :math:`i` at step :math:`t`. NOTE: The support of sparse Adagrad optimizer is experimental. Parameters ---------- params : list[dgl.nn.NodeEmbedding] The list of dgl.nn.NodeEmbedding. lr : float The learning rate. eps : float, Optional The term added to the denominator to improve numerical stability Default: 1e-10 Examples -------- >>> def initializer(emb): th.nn.init.xavier_uniform_(emb) return emb >>> emb = dgl.nn.NodeEmbedding(g.num_nodes(), 10, 'emb', init_func=initializer) >>> optimizer = dgl.optim.SparseAdagrad([emb], lr=0.001) >>> for blocks in dataloader: ... ... ... feats = emb(nids, gpu_0) ... loss = F.sum(feats + 1, 0) ... loss.backward() ... optimizer.step() """ def __init__(self, params, lr, eps=1e-10): super(SparseAdagrad, self).__init__(params, lr) self._eps = eps # setup tensors for optimizer states self.setup(self._params) def setup(self, params): # We need to register a state sum for each embedding in the kvstore. for emb in params: assert isinstance( emb, NodeEmbedding ), "SparseAdagrad only supports dgl.nn.NodeEmbedding" emb_name = emb.name if th.device(emb.weight.device) == th.device("cpu"): # if our embedding is on the CPU, our state also has to be if self._rank < 0: state = th.empty( emb.weight.shape, dtype=th.float32, device=th.device("cpu"), ).zero_() elif self._rank == 0: state = create_shared_mem_array( emb_name + "_state", emb.weight.shape, th.float32 ).zero_() if self._world_size > 1: emb.store.set(emb_name + "_opt", emb_name) elif self._rank > 0: # receive emb.store.wait([emb_name + "_opt"]) state = get_shared_mem_array( emb_name + "_state", emb.weight.shape, th.float32 ) else: # distributed state on on gpu state = th.empty( emb.weight.shape, dtype=th.float32, device=emb.weight.device, ).zero_() emb.set_optm_state((state,)) def update(self, idx, grad, emb): """Update embeddings in a sparse manner Sparse embeddings are updated in mini batches. We maintain gradient states for each embedding so they can be updated separately. Parameters ---------- idx : tensor Index of the embeddings to be updated. grad : tensor Gradient of each embedding. emb : dgl.nn.NodeEmbedding Sparse embedding to update. """ eps = self._eps clr = self._lr # the update is non-linear so indices must be unique grad_indices, inverse, cnt = th.unique( idx, return_inverse=True, return_counts=True ) grad_values = th.zeros( (grad_indices.shape[0], grad.shape[1]), device=grad.device ) grad_values.index_add_(0, inverse, grad) grad_values = grad_values / cnt.unsqueeze(1) grad_sum = grad_values * grad_values (state,) = emb.optm_state state_dev = state.device state_idx = grad_indices.to(state_dev) grad_state = state[state_idx].to(grad.device) grad_state += grad_sum state[state_idx] = grad_state.to(state_dev) std_values = grad_state.add_(eps).sqrt_() tmp = clr * grad_values / std_values emb.weight[state_idx] -= tmp.to(state_dev) @property def param_groups(self): """Emulate 'param_groups' of torch.optim.Optimizer. Different from that, the returned 'param_groups' doesn't contain parameters because getting the whole embedding is very expensive. It contains other attributes, e.g., lr, eps, for debugging. """ return [{"lr": self._lr, "eps": self._eps}] def _set_param_groups(self, groups): """A helper method to load param_groups from saved state_dict.""" self._lr = groups[0]["lr"] self._eps = groups[0]["eps"] class SparseAdam(SparseGradOptimizer): r"""Node embedding optimizer using the Adam algorithm. This optimizer implements a sparse version of Adagrad algorithm for optimizing :class:`dgl.nn.NodeEmbedding`. Being sparse means it only updates the embeddings whose gradients have updates, which are usually a very small portion of the total embeddings. Adam maintains a :math:`Gm_{t,i,j}` and `Gp_{t,i,j}` for every parameter in the embeddings, where :math:`Gm_{t,i,j}=beta1 * Gm_{t-1,i,j} + (1-beta1) * g_{t,i,j}`, :math:`Gp_{t,i,j}=beta2 * Gp_{t-1,i,j} + (1-beta2) * g_{t,i,j}^2`, :math:`g_{t,i,j} = lr * Gm_{t,i,j} / (1 - beta1^t) / \sqrt{Gp_{t,i,j} / (1 - beta2^t)}` and :math:`g_{t,i,j}` is the gradient of the dimension :math:`j` of embedding :math:`i` at step :math:`t`. NOTE: The support of sparse Adam optimizer is experimental. Parameters ---------- params : list[dgl.nn.NodeEmbedding] The list of dgl.nn.NodeEmbeddings. lr : float The learning rate. betas : tuple[float, float], Optional Coefficients used for computing running averages of gradient and its square. Default: (0.9, 0.999) eps : float, Optional The term added to the denominator to improve numerical stability Default: 1e-8 use_uva : bool, Optional Whether to use pinned memory for storing 'mem' and 'power' parameters, when the embedding is stored on the CPU. This will improve training speed, but will require locking a large number of virtual memory pages. For embeddings which are stored in GPU memory, this setting will have no effect. Default: True if the gradients are generated on the GPU, and False if the gradients are on the CPU. dtype : torch.dtype, Optional The type to store optimizer state with. Default: th.float32. Examples -------- >>> def initializer(emb): th.nn.init.xavier_uniform_(emb) return emb >>> emb = dgl.nn.NodeEmbedding(g.num_nodes(), 10, 'emb', init_func=initializer) >>> optimizer = dgl.optim.SparseAdam([emb], lr=0.001) >>> for blocks in dataloader: ... ... ... feats = emb(nids, gpu_0) ... loss = F.sum(feats + 1, 0) ... loss.backward() ... optimizer.step() """ def __init__( self, params, lr, betas=(0.9, 0.999), eps=1e-08, use_uva=None, dtype=th.float32, ): super(SparseAdam, self).__init__(params, lr) self._lr = lr self._beta1 = betas[0] self._beta2 = betas[1] self._eps = eps self._use_uva = use_uva self._nd_handle = {} self._is_using_uva = {} assert dtype in [th.float16, th.float32], ( "Unsupported dtype {}. Valid choices are th.float32 " "and th.float32".format(dtype) ) self._dtype = dtype # setup tensors for optimizer states self.setup(self._params) def _setup_uva(self, name, mem, power): self._is_using_uva[name] = True mem_nd = pin_memory_inplace(mem) power_nd = pin_memory_inplace(power) self._nd_handle[name] = [mem_nd, power_nd] def setup(self, params): # We need to register a state sum for each embedding in the kvstore. for emb in params: assert isinstance( emb, NodeEmbedding ), "SparseAdam only supports dgl.nn.NodeEmbedding" emb_name = emb.name self._is_using_uva[emb_name] = self._use_uva if th.device(emb.weight.device) == th.device("cpu"): # if our embedding is on the CPU, our state also has to be if self._rank < 0: state_step = th.empty( (emb.weight.shape[0],), dtype=th.int32, device=th.device("cpu"), ).zero_() state_mem = th.empty( emb.weight.shape, dtype=self._dtype, device=th.device("cpu"), ).zero_() state_power = th.empty( emb.weight.shape, dtype=self._dtype, device=th.device("cpu"), ).zero_() elif self._rank == 0: state_step = create_shared_mem_array( emb_name + "_step", (emb.weight.shape[0],), th.int32 ).zero_() state_mem = create_shared_mem_array( emb_name + "_mem", emb.weight.shape, self._dtype ).zero_() state_power = create_shared_mem_array( emb_name + "_power", emb.weight.shape, self._dtype ).zero_() if self._world_size > 1: emb.store.set(emb_name + "_opt", emb_name) elif self._rank > 0: # receive emb.store.wait([emb_name + "_opt"]) state_step = get_shared_mem_array( emb_name + "_step", (emb.weight.shape[0],), th.int32 ) state_mem = get_shared_mem_array( emb_name + "_mem", emb.weight.shape, self._dtype ) state_power = get_shared_mem_array( emb_name + "_power", emb.weight.shape, self._dtype ) if self._is_using_uva[emb_name]: # if use_uva has been explicitly set to true, otherwise # wait until first step to decide self._setup_uva(emb_name, state_mem, state_power) else: # make sure we don't use UVA when data is on the GPU self._is_using_uva[emb_name] = False # distributed state on on gpu state_step = th.empty( [emb.weight.shape[0]], dtype=th.int32, device=emb.weight.device, ).zero_() state_mem = th.empty( emb.weight.shape, dtype=self._dtype, device=emb.weight.device, ).zero_() state_power = th.empty( emb.weight.shape, dtype=self._dtype, device=emb.weight.device, ).zero_() state = (state_step, state_mem, state_power) emb.set_optm_state(state) def update(self, idx, grad, emb): """Update embeddings in a sparse manner Sparse embeddings are updated in mini batches. We maintain gradient states for each embedding so they can be updated separately. Parameters ---------- idx : tensor Index of the embeddings to be updated. grad : tensor Gradient of each embedding. emb : dgl.nn.NodeEmbedding Sparse embedding to update. """ with th.no_grad(): state_step, state_mem, state_power = emb.optm_state exec_dtype = grad.dtype exec_dev = grad.device state_dev = state_step.device # whether or not we need to transfer data from the GPU to the CPU # while updating the weights is_d2h = state_dev.type == "cpu" and exec_dev.type == "cuda" # only perform async copies cpu -> gpu, or gpu-> gpu, but block # when copying to the cpu, so as to ensure the copy is finished # before operating on the data on the cpu state_block = is_d2h if self._is_using_uva[emb.name] is None and is_d2h: # we should use UVA going forward self._setup_uva(emb.name, state_mem, state_power) elif self._is_using_uva[emb.name] is None: # we shouldn't use UVA going forward self._is_using_uva[emb.name] = False use_uva = self._is_using_uva[emb.name] beta1 = self._beta1 beta2 = self._beta2 eps = self._eps clr = self._lr # There can be duplicated indices due to sampling. # Thus unique them here and average the gradient here. grad_indices, inverse, cnt = th.unique( idx, return_inverse=True, return_counts=True ) state_idx = grad_indices.to(state_dev) state_step[state_idx] += 1 state_step = state_step[state_idx].to(exec_dev) if use_uva: orig_mem = gather_pinned_tensor_rows(state_mem, grad_indices) orig_power = gather_pinned_tensor_rows( state_power, grad_indices ) else: orig_mem = state_mem[state_idx].to(exec_dev) orig_power = state_power[state_idx].to(exec_dev) # convert to exec dtype orig_mem = orig_mem.to(dtype=exec_dtype) orig_power = orig_power.to(dtype=exec_dtype) grad_values = th.zeros( (grad_indices.shape[0], grad.shape[1]), device=exec_dev ) grad_values.index_add_(0, inverse, grad) grad_values = grad_values / cnt.unsqueeze(1) grad_mem = grad_values grad_power = grad_values * grad_values update_mem = beta1 * orig_mem + (1.0 - beta1) * grad_mem update_power = beta2 * orig_power + (1.0 - beta2) * grad_power if use_uva: scatter_pinned_tensor_rows( state_mem, grad_indices, update_mem.to(dtype=self._dtype) ) scatter_pinned_tensor_rows( state_power, grad_indices, update_power.to(dtype=self._dtype), ) else: update_mem_dst = update_mem.to(dtype=self._dtype).to( state_dev, non_blocking=True ) update_power_dst = update_power.to(dtype=self._dtype).to( state_dev, non_blocking=True ) if state_block: # use events to try and overlap CPU and GPU as much as possible update_event = th.cuda.Event() update_event.record() update_mem_corr = update_mem / ( 1.0 - th.pow(th.tensor(beta1, device=exec_dev), state_step) ).unsqueeze(1) update_power_corr = update_power / ( 1.0 - th.pow(th.tensor(beta2, device=exec_dev), state_step) ).unsqueeze(1) std_values = ( clr * update_mem_corr / (th.sqrt(update_power_corr) + eps) ) std_values_dst = std_values.to(state_dev, non_blocking=True) if state_block: std_event = th.cuda.Event() std_event.record() if not use_uva: if state_block: # wait for our transfers from exec_dev to state_dev to finish # before we can use them update_event.wait() state_mem[state_idx] = update_mem_dst state_power[state_idx] = update_power_dst if state_block: # wait for the transfer of std_values to finish before we # can use it std_event.wait() emb.weight[state_idx] -= std_values_dst @property def param_groups(self): """Emulate 'param_groups' of torch.optim.Optimizer. Different from that, the returned 'param_groups' doesn't contain parameters because getting the whole embedding is very expensive. It contains other attributes, e.g., lr, betas, eps, for debugging. """ return [ { "lr": self._lr, "betas": (self._beta1, self._beta2), "eps": self._eps, } ] def _set_param_groups(self, groups): """A helper method to load param_groups from saved state_dict.""" self._lr = groups[0]["lr"] self._beta1, self._beta2 = groups[0]["betas"] self._eps = groups[0]["eps"] ================================================ FILE: python/dgl/optim/tensorflow/__init__.py ================================================ ================================================ FILE: python/dgl/partition.py ================================================ """Module for graph partition utilities.""" import os import re import time import numpy as np from . import backend as F, utils from ._ffi.function import _init_api from .base import EID, ETYPE, NID, NTYPE from .heterograph import DGLGraph from .ndarray import NDArray from .subgraph import edge_subgraph __all__ = [ "metis_partition", "metis_partition_assignment", "partition_graph_with_halo", ] def reorder_nodes(g, new_node_ids): """Generate a new graph with new node IDs. We assign each node in the input graph with a new node ID. This results in a new graph. Parameters ---------- g : DGLGraph The input graph new_node_ids : a tensor The new node IDs Returns ------- DGLGraph The graph with new node IDs. """ assert ( len(new_node_ids) == g.num_nodes() ), "The number of new node ids must match #nodes in the graph." new_node_ids = utils.toindex(new_node_ids) sorted_ids, idx = F.sort_1d(new_node_ids.tousertensor()) assert ( F.asnumpy(sorted_ids[0]) == 0 and F.asnumpy(sorted_ids[-1]) == g.num_nodes() - 1 ), "The new node IDs are incorrect." new_gidx = _CAPI_DGLReorderGraph_Hetero( g._graph, new_node_ids.todgltensor() ) new_g = DGLGraph(gidx=new_gidx, ntypes=["_N"], etypes=["_E"]) new_g.ndata["orig_id"] = idx return new_g def _get_halo_heterosubgraph_inner_node(halo_subg): return _CAPI_GetHaloSubgraphInnerNodes_Hetero(halo_subg) def reshuffle_graph(g, node_part=None): """Reshuffle node ids and edge IDs of a graph. This function reshuffles nodes and edges in a graph so that all nodes/edges of the same type have contiguous IDs. If a graph is partitioned and nodes are assigned to different partitions, all nodes/edges in a partition should get contiguous IDs; within a partition, all nodes/edges of the same type have contigous IDs. Parameters ---------- g : DGLGraph The input graph. node_part : Tensor This is a vector whose length is the same as the number of nodes in the input graph. Each element indicates the partition ID the corresponding node is assigned to. Returns ------- (DGLGraph, Tensor) The graph whose nodes and edges are reshuffled. The 1D tensor that indicates the partition IDs of the nodes in the reshuffled graph. """ # In this case, we don't need to reshuffle node IDs and edge IDs. if node_part is None: g.ndata["orig_id"] = F.arange(0, g.num_nodes()) g.edata["orig_id"] = F.arange(0, g.num_edges()) return g, None start = time.time() if node_part is not None: node_part = utils.toindex(node_part) node_part = node_part.tousertensor() if NTYPE in g.ndata: is_hetero = len(F.unique(g.ndata[NTYPE])) > 1 else: is_hetero = False if is_hetero: num_node_types = F.max(g.ndata[NTYPE], 0) + 1 if node_part is not None: sorted_part, new2old_map = F.sort_1d( node_part * num_node_types + g.ndata[NTYPE] ) else: sorted_part, new2old_map = F.sort_1d(g.ndata[NTYPE]) sorted_part = F.floor_div(sorted_part, num_node_types) elif node_part is not None: sorted_part, new2old_map = F.sort_1d(node_part) else: g.ndata["orig_id"] = g.ndata[NID] g.edata["orig_id"] = g.edata[EID] return g, None new_node_ids = np.zeros((g.num_nodes(),), dtype=np.int64) new_node_ids[F.asnumpy(new2old_map)] = np.arange(0, g.num_nodes()) # If the input graph is homogneous, we only need to create an empty array, so that # _CAPI_DGLReassignEdges_Hetero knows how to handle it. etype = ( g.edata[ETYPE] if ETYPE in g.edata else F.zeros((0), F.dtype(sorted_part), F.cpu()) ) g = reorder_nodes(g, new_node_ids) node_part = utils.toindex(sorted_part) # We reassign edges in in-CSR. In this way, after partitioning, we can ensure # that all edges in a partition are in the contiguous ID space. etype_idx = utils.toindex(etype) orig_eids = _CAPI_DGLReassignEdges_Hetero( g._graph, etype_idx.todgltensor(), node_part.todgltensor(), True ) orig_eids = utils.toindex(orig_eids) orig_eids = orig_eids.tousertensor() g.edata["orig_id"] = orig_eids print( "Reshuffle nodes and edges: {:.3f} seconds".format(time.time() - start) ) return g, node_part.tousertensor() def partition_graph_with_halo(g, node_part, extra_cached_hops, reshuffle=False): """Partition a graph. Based on the given node assignments for each partition, the function splits the input graph into subgraphs. A subgraph may contain HALO nodes which does not belong to the partition of a subgraph but are connected to the nodes in the partition within a fixed number of hops. If `reshuffle` is turned on, the function reshuffles node IDs and edge IDs of the input graph before partitioning. After reshuffling, all nodes and edges in a partition fall in a contiguous ID range in the input graph. The partitioend subgraphs have node data 'orig_id', which stores the node IDs in the original input graph. Parameters ------------ g: DGLGraph The graph to be partitioned node_part: 1D tensor Specify which partition a node is assigned to. The length of this tensor needs to be the same as the number of nodes of the graph. Each element indicates the partition ID of a node. extra_cached_hops: int The number of hops a HALO node can be accessed. reshuffle : bool Resuffle nodes so that nodes in the same partition are in the same ID range. Returns -------- a dict of DGLGraphs The key is the partition ID and the value is the DGLGraph of the partition. Tensor 1D tensor that stores the mapping between the reshuffled node IDs and the original node IDs if 'reshuffle=True'. Otherwise, return None. Tensor 1D tensor that stores the mapping between the reshuffled edge IDs and the original edge IDs if 'reshuffle=True'. Otherwise, return None. """ assert len(node_part) == g.num_nodes() if reshuffle: g, node_part = reshuffle_graph(g, node_part) orig_nids = g.ndata["orig_id"] orig_eids = g.edata["orig_id"] node_part = utils.toindex(node_part) start = time.time() subgs = _CAPI_DGLPartitionWithHalo_Hetero( g._graph, node_part.todgltensor(), extra_cached_hops ) # g is no longer needed. Free memory. g = None print("Split the graph: {:.3f} seconds".format(time.time() - start)) subg_dict = {} node_part = node_part.tousertensor() start = time.time() # This function determines whether an edge belongs to a partition. # An edge is assigned to a partition based on its destination node. If its destination node # is assigned to a partition, we assign the edge to the partition as well. def get_inner_edge(subg, inner_node): inner_edge = F.zeros((subg.num_edges(),), F.int8, F.cpu()) inner_nids = F.nonzero_1d(inner_node) # TODO(zhengda) we need to fix utils.toindex() to avoid the dtype cast below. inner_nids = F.astype(inner_nids, F.int64) inner_eids = subg.in_edges(inner_nids, form="eid") inner_edge = F.scatter_row( inner_edge, inner_eids, F.ones((len(inner_eids),), F.dtype(inner_edge), F.cpu()), ) return inner_edge # This creaets a subgraph from subgraphs returned from the CAPI above. def create_subgraph(subg, induced_nodes, induced_edges, inner_node): subg1 = DGLGraph(gidx=subg.graph, ntypes=["_N"], etypes=["_E"]) # If IDs are shuffled, we should shuffled edges. This will help us collect edge data # from the distributed graph after training. if reshuffle: # When we shuffle edges, we need to make sure that the inner edges are assigned with # contiguous edge IDs and their ID range starts with 0. In other words, we want to # place these edge IDs in the front of the edge list. To ensure that, we add the IDs # of outer edges with a large value, so we will get the sorted list as we want. max_eid = F.max(induced_edges[0], 0) + 1 inner_edge = get_inner_edge(subg1, inner_node) eid = F.astype(induced_edges[0], F.int64) + max_eid * F.astype( inner_edge == 0, F.int64 ) _, index = F.sort_1d(eid) subg1 = edge_subgraph(subg1, index, relabel_nodes=False) subg1.ndata[NID] = induced_nodes[0] subg1.edata[EID] = F.gather_row(induced_edges[0], index) else: subg1.ndata[NID] = induced_nodes[0] subg1.edata[EID] = induced_edges[0] return subg1 for i, subg in enumerate(subgs): inner_node = _get_halo_heterosubgraph_inner_node(subg) inner_node = F.zerocopy_from_dlpack(inner_node.to_dlpack()) subg = create_subgraph( subg, subg.induced_nodes, subg.induced_edges, inner_node ) subg.ndata["inner_node"] = inner_node subg.ndata["part_id"] = F.gather_row(node_part, subg.ndata[NID]) if reshuffle: subg.ndata["orig_id"] = F.gather_row(orig_nids, subg.ndata[NID]) subg.edata["orig_id"] = F.gather_row(orig_eids, subg.edata[EID]) if extra_cached_hops >= 1: inner_edge = get_inner_edge(subg, inner_node) else: inner_edge = F.ones((subg.num_edges(),), F.int8, F.cpu()) subg.edata["inner_edge"] = inner_edge subg_dict[i] = subg print("Construct subgraphs: {:.3f} seconds".format(time.time() - start)) if reshuffle: return subg_dict, orig_nids, orig_eids else: return subg_dict, None, None def get_peak_mem(): """Get the peak memory size. Returns ------- float The peak memory size in GB. """ if not os.path.exists("/proc/self/status"): return 0.0 for line in open("/proc/self/status", "r"): if "VmPeak" in line: mem = re.findall(r"\d+", line)[0] return int(mem) / 1024 / 1024 return 0.0 def metis_partition_assignment( g, k, balance_ntypes=None, balance_edges=False, mode="k-way", objtype="cut" ): """This assigns nodes to different partitions with Metis partitioning algorithm. When performing Metis partitioning, we can put some constraint on the partitioning. Current, it supports two constrants to balance the partitioning. By default, Metis always tries to balance the number of nodes in each partition. * `balance_ntypes` balances the number of nodes of different types in each partition. * `balance_edges` balances the number of edges in each partition. To balance the node types, a user needs to pass a vector of N elements to indicate the type of each node. N is the number of nodes in the input graph. After the partition assignment, we construct partitions. Parameters ---------- g : DGLGraph The graph to be partitioned k : int The number of partitions. balance_ntypes : tensor Node type of each node balance_edges : bool Indicate whether to balance the edges. mode : str, "k-way" or "recursive" Whether use multilevel recursive bisection or multilevel k-way paritioning. objtype : str, "cut" or "vol" Set the objective as edge-cut minimization or communication volume minimization. This argument is used by the Metis algorithm. Returns ------- a 1-D tensor A vector with each element that indicates the partition ID of a vertex. """ assert mode in ( "k-way", "recursive", ), "'mode' can only be 'k-way' or 'recursive'" assert ( g.idtype == F.int64 ), "IdType of graph is required to be int64 for now." # METIS works only on symmetric graphs. # The METIS runs on the symmetric graph to generate the node assignment to partitions. start = time.time() sym_gidx = _CAPI_DGLMakeSymmetric_Hetero(g._graph) sym_g = DGLGraph(gidx=sym_gidx) print( "Convert a graph into a bidirected graph: {:.3f} seconds, peak memory: {:.3f} GB".format( time.time() - start, get_peak_mem() ) ) vwgt = [] # To balance the node types in each partition, we can take advantage of the vertex weights # in Metis. When vertex weights are provided, Metis will tries to generate partitions with # balanced vertex weights. A vertex can be assigned with multiple weights. The vertex weights # are stored in a vector of N * w elements, where N is the number of vertices and w # is the number of weights per vertex. Metis tries to balance the first weight, and then # the second weight, and so on. # When balancing node types, we use the first weight to indicate the first node type. # if a node belongs to the first node type, its weight is set to 1; otherwise, 0. # Similary, we set the second weight for the second node type and so on. The number # of weights is the same as the number of node types. start = time.time() if balance_ntypes is not None: assert ( len(balance_ntypes) == g.num_nodes() ), "The length of balance_ntypes should be equal to #nodes in the graph" balance_ntypes = F.tensor(balance_ntypes) uniq_ntypes = F.unique(balance_ntypes) for ntype in uniq_ntypes: vwgt.append(F.astype(balance_ntypes == ntype, F.int64)) # When balancing edges in partitions, we use in-degree as one of the weights. if balance_edges: if balance_ntypes is None: vwgt.append(F.astype(g.in_degrees(), F.int64)) else: for ntype in uniq_ntypes: nids = F.asnumpy(F.nonzero_1d(balance_ntypes == ntype)) degs = np.zeros((g.num_nodes(),), np.int64) degs[nids] = F.asnumpy(g.in_degrees(nids)) vwgt.append(F.zerocopy_from_numpy(degs)) # The vertex weights have to be stored in a vector. if len(vwgt) > 0: vwgt = F.stack(vwgt, 1) shape = ( np.prod( F.shape(vwgt), ), ) vwgt = F.reshape(vwgt, shape) vwgt = F.to_dgl_nd(vwgt) else: vwgt = F.zeros((0,), F.int64, F.cpu()) vwgt = F.to_dgl_nd(vwgt) print( "Construct multi-constraint weights: {:.3f} seconds, peak memory: {:.3f} GB".format( time.time() - start, get_peak_mem() ) ) start = time.time() node_part = _CAPI_DGLMetisPartition_Hetero( sym_g._graph, k, vwgt, mode, (objtype == "cut") ) print( "Metis partitioning: {:.3f} seconds, peak memory: {:.3f} GB".format( time.time() - start, get_peak_mem() ) ) if len(node_part) == 0: return None else: node_part = utils.toindex(node_part) return node_part.tousertensor() def metis_partition( g, k, extra_cached_hops=0, reshuffle=False, balance_ntypes=None, balance_edges=False, mode="k-way", ): """This is to partition a graph with Metis partitioning. Metis assigns vertices to partitions. This API constructs subgraphs with the vertices assigned to the partitions and their incoming edges. A subgraph may contain HALO nodes which does not belong to the partition of a subgraph but are connected to the nodes in the partition within a fixed number of hops. When performing Metis partitioning, we can put some constraint on the partitioning. Current, it supports two constrants to balance the partitioning. By default, Metis always tries to balance the number of nodes in each partition. * `balance_ntypes` balances the number of nodes of different types in each partition. * `balance_edges` balances the number of edges in each partition. To balance the node types, a user needs to pass a vector of N elements to indicate the type of each node. N is the number of nodes in the input graph. If `reshuffle` is turned on, the function reshuffles node IDs and edge IDs of the input graph before partitioning. After reshuffling, all nodes and edges in a partition fall in a contiguous ID range in the input graph. The partitioend subgraphs have node data 'orig_id', which stores the node IDs in the original input graph. The partitioned subgraph is stored in DGLGraph. The DGLGraph has the `part_id` node data that indicates the partition a node belongs to. The subgraphs do not contain the node/edge data in the input graph. Parameters ------------ g: DGLGraph The graph to be partitioned k: int The number of partitions. extra_cached_hops: int The number of hops a HALO node can be accessed. reshuffle : bool Resuffle nodes so that nodes in the same partition are in the same ID range. balance_ntypes : tensor Node type of each node balance_edges : bool Indicate whether to balance the edges. mode : str, "k-way" or "recursive" Whether use multilevel recursive bisection or multilevel k-way paritioning. Returns -------- a dict of DGLGraphs The key is the partition ID and the value is the DGLGraph of the partition. """ assert mode in ( "k-way", "recursive", ), "'mode' can only be 'k-way' or 'recursive'" node_part = metis_partition_assignment( g, k, balance_ntypes, balance_edges, mode ) if node_part is None: return None # Then we split the original graph into parts based on the METIS partitioning results. return partition_graph_with_halo( g, node_part, extra_cached_hops, reshuffle )[0] class NDArrayPartition(object): """Create a new partition of an NDArray. That is, an object which assigns each row of an NDArray to a specific partition. Parameters ---------- array_size : int The first dimension of the array being partitioned. num_parts : int The number of parts to divide the array into. mode : String The type of partition. Currently, the only valid values are 'remainder' and 'range'. 'remainder' assigns rows based on remainder when dividing the row id by the number of parts (e.g., i % num_parts). 'range' assigns rows based on which part of the range 'part_ranges' they fall into. part_ranges : Tensor or dgl.NDArray, Optional Should only be specified when the mode is 'range'. Should be of the length `num_parts + 1`, and be the exclusive prefix-sum of the number of nodes in each partition. That is, for 3 partitions, we could have the list [0, a, b, 'array_size'], and all rows with index less than 'a' are assigned to partition 0, all rows with index greater than or equal to 'a' and less than 'b' are in partition 1, and all rows with index greater or equal to 'b' are in partition 2. Should have the same context as the partitioned NDArray (i.e., be on the same GPU). Examples -------- A partition of a homgeonous graph `g`, where the vertices are striped across processes can be generated via: >>> from dgl.partition import NDArrayPartition >>> part = NDArrayPartition(g.num_nodes(), num_parts, mode='remainder' ) A range based partition of a homogenous graph `g`'s nodes, where the nodes are stored in contiguous memory. This converts an existing range based partitioning (e.g. from a dgl.distributed.graph_partition_book.RangePartitionBook) 'max_node_map', to an NDArrayPartition 'part'. >>> part_range = [0] >>> for part in part_book.metadata(): >>> part_range.append(part_range[-1] + part['num_nodes']) >>> part = NDArrayPartition(g.num_nodes(), num_parts, mode='range', ... part_ranges=part_range) """ def __init__( self, array_size, num_parts, mode="remainder", part_ranges=None ): assert num_parts > 0, 'Invalid "num_parts", must be > 0.' if mode == "remainder": assert part_ranges is None, ( "When using remainder-based " 'partitioning, "part_ranges" should not be specified.' ) self._partition = _CAPI_DGLNDArrayPartitionCreateRemainderBased( array_size, num_parts ) elif mode == "range": assert part_ranges is not None, ( "When using range-based " 'partitioning, "part_ranges" must not be None.' ) assert part_ranges[0] == 0 and part_ranges[-1] == array_size, ( "part_ranges[0] must be 0, and part_ranges[-1] must be " '"array_size".' ) if F.is_tensor(part_ranges): part_ranges = F.zerocopy_to_dgl_ndarray(part_ranges) assert isinstance(part_ranges, NDArray), ( '"part_ranges" must ' "be Tensor or dgl.NDArray." ) self._partition = _CAPI_DGLNDArrayPartitionCreateRangeBased( array_size, num_parts, part_ranges ) else: assert False, 'Unknown partition mode "{}"'.format(mode) self._array_size = array_size self._num_parts = num_parts def num_parts(self): """Get the number of partitions.""" return self._num_parts def array_size(self): """Get the total size of the first dimension of the partitioned array.""" return self._array_size def get(self): """Get the C-handle for this object.""" return self._partition def get_local_indices(self, part, ctx): """Get the set of global indices in this given partition.""" return self.map_to_global( F.arange(0, self.local_size(part), ctx=ctx), part ) def local_size(self, part): """Get the number of rows/items assigned to the given part.""" return _CAPI_DGLNDArrayPartitionGetPartSize(self._partition, part) def map_to_local(self, idxs): """Convert the set of global indices to local indices""" return F.zerocopy_from_dgl_ndarray( _CAPI_DGLNDArrayPartitionMapToLocal( self._partition, F.zerocopy_to_dgl_ndarray(idxs) ) ) def map_to_global(self, idxs, part_id): """Convert the set of local indices ot global indices""" return F.zerocopy_from_dgl_ndarray( _CAPI_DGLNDArrayPartitionMapToGlobal( self._partition, F.zerocopy_to_dgl_ndarray(idxs), part_id ) ) def generate_permutation(self, idxs): """Produce a scheme that maps the given indices to separate partitions and the counts of how many indices are in each partition. Parameters ---------- idxs: torch.Tensor. A tensor with shape (`num_indices`,), representing global indices. Return ------ torch.Tensor. A tensor with shape (`num_indices`,), representing the permutation to re-order the indices by partition. torch.Tensor. A tensor with shape (`num_partition`,), representing the number of indices per partition. Examples -------- >>> import torch >>> from dgl.partition import NDArrayPartition >>> part = NDArrayPartition(10, 2, mode="remainder") >>> idx = torch.tensor([0, 2, 4, 5, 8, 8, 9], device="cuda:0") >>> perm, splits_sum = part.generate_permutation(idx) >>> perm tensor([0, 1, 2, 4, 5, 3, 6], device='cuda:0') >>> splits_sum tensor([5, 2], device='cuda:0') """ ret = _CAPI_DGLNDArrayPartitionGeneratePermutation( self._partition, F.zerocopy_to_dgl_ndarray(idxs) ) return F.zerocopy_from_dgl_ndarray(ret(0)), F.zerocopy_from_dgl_ndarray( ret(1) ) _init_api("dgl.partition") ================================================ FILE: python/dgl/propagate.py ================================================ """Module for message propagation.""" from __future__ import absolute_import from . import backend as F, traversal as trv from .heterograph import DGLGraph __all__ = [ "prop_nodes", "prop_nodes_bfs", "prop_nodes_topo", "prop_edges", "prop_edges_dfs", ] def prop_nodes( graph, nodes_generator, message_func="default", reduce_func="default", apply_node_func="default", ): """Functional method for :func:`dgl.DGLGraph.prop_nodes`. Parameters ---------- node_generators : generator The generator of node frontiers. message_func : callable, optional The message function. reduce_func : callable, optional The reduce function. apply_node_func : callable, optional The update function. See Also -------- dgl.DGLGraph.prop_nodes """ graph.prop_nodes( nodes_generator, message_func, reduce_func, apply_node_func ) def prop_edges( graph, edges_generator, message_func="default", reduce_func="default", apply_node_func="default", ): """Functional method for :func:`dgl.DGLGraph.prop_edges`. Parameters ---------- edges_generator : generator The generator of edge frontiers. message_func : callable, optional The message function. reduce_func : callable, optional The reduce function. apply_node_func : callable, optional The update function. See Also -------- dgl.DGLGraph.prop_edges """ graph.prop_edges( edges_generator, message_func, reduce_func, apply_node_func ) def prop_nodes_bfs( graph, source, message_func, reduce_func, reverse=False, apply_node_func=None, ): """Message propagation using node frontiers generated by BFS. Parameters ---------- graph : DGLGraph The graph object. source : list, tensor of nodes Source nodes. message_func : callable The message function. reduce_func : callable The reduce function. reverse : bool, optional If true, traverse following the in-edge direction. apply_node_func : callable, optional The update function. See Also -------- dgl.traversal.bfs_nodes_generator """ assert isinstance( graph, DGLGraph ), "DGLHeteroGraph is merged with DGLGraph, Please use DGLGraph" assert ( len(graph.canonical_etypes) == 1 ), "prop_nodes_bfs only support homogeneous graph" # TODO(murphy): Graph traversal currently is only supported on # CPP graphs. Move graph to CPU as a workaround, # which should be fixed in the future. nodes_gen = trv.bfs_nodes_generator(graph.cpu(), source, reverse) nodes_gen = [F.copy_to(frontier, graph.device) for frontier in nodes_gen] prop_nodes(graph, nodes_gen, message_func, reduce_func, apply_node_func) def prop_nodes_topo( graph, message_func, reduce_func, reverse=False, apply_node_func=None ): """Message propagation using node frontiers generated by topological order. Parameters ---------- graph : DGLGraph The graph object. message_func : callable The message function. reduce_func : callable The reduce function. reverse : bool, optional If true, traverse following the in-edge direction. apply_node_func : callable, optional The update function. See Also -------- dgl.traversal.topological_nodes_generator """ assert isinstance( graph, DGLGraph ), "DGLHeteroGraph is merged with DGLGraph, Please use DGLGraph" assert ( len(graph.canonical_etypes) == 1 ), "prop_nodes_topo only support homogeneous graph" # TODO(murphy): Graph traversal currently is only supported on # CPP graphs. Move graph to CPU as a workaround, # which should be fixed in the future. nodes_gen = trv.topological_nodes_generator(graph.cpu(), reverse) nodes_gen = [F.copy_to(frontier, graph.device) for frontier in nodes_gen] prop_nodes(graph, nodes_gen, message_func, reduce_func, apply_node_func) def prop_edges_dfs( graph, source, message_func, reduce_func, reverse=False, has_reverse_edge=False, has_nontree_edge=False, apply_node_func=None, ): """Message propagation using edge frontiers generated by labeled DFS. Parameters ---------- graph : DGLGraph The graph object. source : list, tensor of nodes Source nodes. message_func : callable, optional The message function. reduce_func : callable, optional The reduce function. reverse : bool, optional If true, traverse following the in-edge direction. has_reverse_edge : bool, optional If true, REVERSE edges are included. has_nontree_edge : bool, optional If true, NONTREE edges are included. apply_node_func : callable, optional The update function. See Also -------- dgl.traversal.dfs_labeled_edges_generator """ assert isinstance( graph, DGLGraph ), "DGLHeteroGraph is merged with DGLGraph, Please use DGLGraph" assert ( len(graph.canonical_etypes) == 1 ), "prop_edges_dfs only support homogeneous graph" # TODO(murphy): Graph traversal currently is only supported on # CPP graphs. Move graph to CPU as a workaround, # which should be fixed in the future. edges_gen = trv.dfs_labeled_edges_generator( graph.cpu(), source, reverse, has_reverse_edge, has_nontree_edge, return_labels=False, ) edges_gen = [F.copy_to(frontier, graph.device) for frontier in edges_gen] prop_edges(graph, edges_gen, message_func, reduce_func, apply_node_func) ================================================ FILE: python/dgl/random.py ================================================ """Python interfaces to DGL random number generators.""" import numpy as np from . import backend as F, ndarray as nd from ._ffi.function import _init_api __all__ = ["seed"] def seed(val): """Set the random seed of DGL. Parameters ---------- val : int The seed. """ _CAPI_SetSeed(val) def choice(a, size, replace=True, prob=None): # pylint: disable=invalid-name """An equivalent to :func:`numpy.random.choice`. Use this function if you: * Perform a non-uniform sampling (probability tensor is given). * Sample a small set from a very large population (ratio <5%) uniformly *without* replacement. * Have a backend tensor on hand and does not want to convert it to numpy back and forth. Compared to :func:`numpy.random.choice`, it is slower when replace is True and is comparable when replace is False. It wins when the population is very large and the number of draws are quite small (e.g., draw <5%). The reasons are two folds: * When ``a`` is a large integer, it avoids creating a large range array as numpy does. * When draw ratio is small, it switches to a hashmap based implementation. It out-performs numpy for non-uniform sampling in general cases. Parameters ---------- a : 1-D tensor or int If an ndarray, a random sample is generated from its elements. If an int, the random sample is generated as if a were F.arange(a) size : int or tuple of ints Output shape. E.g., for size ``(m, n, k)``, then ``m * n * k`` samples are drawn. replace : bool, optional If true, sample with replacement. prob : 1-D tensor, optional The probabilities associated with each entry in a. If not given the sample assumes a uniform distribution over all entries in a. Returns ------- samples : 1-D tensor The generated random samples """ # TODO(minjie): support RNG as one of the arguments. if isinstance(size, tuple): num = np.prod(size) else: num = size if F.is_tensor(a): population = F.shape(a)[0] else: population = a if prob is None: prob = nd.NULL["int64"] else: prob = F.zerocopy_to_dgl_ndarray(prob) bits = 64 # index array is in 64-bit chosen_idx = _CAPI_Choice( int(num), int(population), prob, bool(replace), bits ) chosen_idx = F.zerocopy_from_dgl_ndarray(chosen_idx) if F.is_tensor(a): chosen = F.gather_row(a, chosen_idx) else: chosen = chosen_idx if isinstance(size, tuple): return F.reshape(chosen, size) else: return chosen _init_api("dgl.rng", __name__) ================================================ FILE: python/dgl/readout.py ================================================ """Classes and functions for batching multiple graphs together.""" from __future__ import absolute_import from . import backend as F from .base import dgl_warning, DGLError from .ops import segment __all__ = [ "readout_nodes", "readout_edges", "sum_nodes", "sum_edges", "mean_nodes", "mean_edges", "max_nodes", "max_edges", "softmax_nodes", "softmax_edges", "broadcast_nodes", "broadcast_edges", "topk_nodes", "topk_edges", ] def readout_nodes(graph, feat, weight=None, *, op="sum", ntype=None): """Generate a graph-level representation by aggregating node features :attr:`feat`. The function is commonly used as a *readout* function on a batch of graphs to generate graph-level representation. Thus, the result tensor shape depends on the batch size of the input graph. Given a graph of batch size :math:`B`, and a feature size of :math:`D`, the result shape will be :math:`(B, D)`, with each row being the aggregated node features of each graph. Parameters ---------- graph : DGLGraph. Input graph. feat : str Node feature name. weight : str, optional Node weight name. None means aggregating without weights. Otherwise, multiply each node feature by node feature :attr:`weight` before aggregation. The weight feature shape must be compatible with an element-wise multiplication with the feature tensor. op : str, optional Readout operator. Can be 'sum', 'max', 'min', 'mean'. ntype : str, optional Node type. Can be omitted if there is only one node type in the graph. Returns ------- Tensor Result tensor. Examples -------- >>> import dgl >>> import torch as th Create two :class:`~dgl.DGLGraph` objects and initialize their node features. >>> g1 = dgl.graph(([0, 1], [1, 0])) # Graph 1 >>> g1.ndata['h'] = th.tensor([1., 2.]) >>> g2 = dgl.graph(([0, 1], [1, 2])) # Graph 2 >>> g2.ndata['h'] = th.tensor([1., 2., 3.]) Sum over one graph: >>> dgl.readout_nodes(g1, 'h') tensor([3.]) # 1 + 2 Sum over a batched graph: >>> bg = dgl.batch([g1, g2]) >>> dgl.readout_nodes(bg, 'h') tensor([3., 6.]) # [1 + 2, 1 + 2 + 3] Weighted sum: >>> bg.ndata['w'] = th.tensor([.1, .2, .1, .5, .2]) >>> dgl.readout_nodes(bg, 'h', 'w') tensor([.5, 1.7]) Readout by max: >>> dgl.readout_nodes(bg, 'h', op='max') tensor([2., 3.]) See Also -------- readout_edges """ x = graph.nodes[ntype].data[feat] if weight is not None: x = x * graph.nodes[ntype].data[weight] return segment.segment_reduce(graph.batch_num_nodes(ntype), x, reducer=op) def readout_edges(graph, feat, weight=None, *, op="sum", etype=None): """Sum the edge feature :attr:`feat` in :attr:`graph`, optionally multiplies it by a edge :attr:`weight`. The function is commonly used as a *readout* function on a batch of graphs to generate graph-level representation. Thus, the result tensor shape depends on the batch size of the input graph. Given a graph of batch size :math:`B`, and a feature size of :math:`D`, the result shape will be :math:`(B, D)`, with each row being the aggregated edge features of each graph. Parameters ---------- graph : DGLGraph. The input graph. feat : str The edge feature name. weight : str, optional The edge weight feature name. If None, no weighting will be performed, otherwise, weight each edge feature with field :attr:`feat`. for summation. The weight feature shape must be compatible with an element-wise multiplication with the feature tensor. op : str, optional Readout operator. Can be 'sum', 'max', 'min', 'mean'. etype : str or (str, str, str), optional The type names of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. Can be omitted if the graph has only one type of edges. Returns ------- Tensor Result tensor. Examples -------- >>> import dgl >>> import torch as th Create two :class:`~dgl.DGLGraph` objects and initialize their edge features. >>> g1 = dgl.graph(([0, 1], [1, 0])) # Graph 1 >>> g1.edata['h'] = th.tensor([1., 2.]) >>> g2 = dgl.graph(([0, 1], [1, 2])) # Graph 2 >>> g2.edata['h'] = th.tensor([2., 3.]) Sum over one graph: >>> dgl.readout_edges(g1, 'h') tensor([3.]) # 1 + 2 Sum over a batched graph: >>> bg = dgl.batch([g1, g2]) >>> dgl.readout_edges(bg, 'h') tensor([3., 5.]) # [1 + 2, 2 + 3] Weighted sum: >>> bg.edata['w'] = th.tensor([.1, .2, .1, .5]) >>> dgl.readout_edges(bg, 'h', 'w') tensor([.5, 1.7]) Readout by max: >>> dgl.readout_edges(bg, 'w', op='max') tensor([2., 3.]) See Also -------- readout_nodes """ x = graph.edges[etype].data[feat] if weight is not None: x = x * graph.edges[etype].data[weight] return segment.segment_reduce(graph.batch_num_edges(etype), x, reducer=op) def sum_nodes(graph, feat, weight=None, *, ntype=None): """Syntax sugar for ``dgl.readout_nodes(graph, feat, weight, ntype=ntype, op='sum')``. See Also -------- readout_nodes """ return readout_nodes(graph, feat, weight, ntype=ntype, op="sum") def sum_edges(graph, feat, weight=None, *, etype=None): """Syntax sugar for ``dgl.readout_edges(graph, feat, weight, etype=etype, op='sum')``. See Also -------- readout_edges """ return readout_edges(graph, feat, weight, etype=etype, op="sum") def mean_nodes(graph, feat, weight=None, *, ntype=None): """Syntax sugar for ``dgl.readout_nodes(graph, feat, weight, ntype=ntype, op='mean')``. See Also -------- readout_nodes """ return readout_nodes(graph, feat, weight, ntype=ntype, op="mean") def mean_edges(graph, feat, weight=None, *, etype=None): """Syntax sugar for ``dgl.readout_edges(graph, feat, weight, etype=etype, op='mean')``. See Also -------- readout_edges """ return readout_edges(graph, feat, weight, etype=etype, op="mean") def max_nodes(graph, feat, weight=None, *, ntype=None): """Syntax sugar for ``dgl.readout_nodes(graph, feat, weight, ntype=ntype, op='max')``. See Also -------- readout_nodes """ return readout_nodes(graph, feat, weight, ntype=ntype, op="max") def max_edges(graph, feat, weight=None, *, etype=None): """Syntax sugar for ``dgl.readout_edges(graph, feat, weight, etype=etype, op='max')``. See Also -------- readout_edges """ return readout_edges(graph, feat, weight, etype=etype, op="max") def softmax_nodes(graph, feat, *, ntype=None): r"""Perform graph-wise softmax on the node features. For each node :math:`v\in\mathcal{V}` and its feature :math:`x_v`, calculate its normalized feature as follows: .. math:: z_v = \frac{\exp(x_v)}{\sum_{u\in\mathcal{V}}\exp(x_u)} If the graph is a batch of multiple graphs, each graph computes softmax independently. The result tensor has the same shape as the original node feature. Parameters ---------- graph : DGLGraph. The input graph. feat : str The node feature name. ntype : str, optional The node type name. Can be omitted if there is only one node type in the graph. Returns ------- Tensor Result tensor. Examples -------- >>> import dgl >>> import torch as th Create two :class:`~dgl.DGLGraph` objects and initialize their node features. >>> g1 = dgl.graph(([0, 1], [1, 0])) # Graph 1 >>> g1.ndata['h'] = th.tensor([1., 1.]) >>> g2 = dgl.graph(([0, 1], [1, 2])) # Graph 2 >>> g2.ndata['h'] = th.tensor([1., 1., 1.]) Softmax over one graph: >>> dgl.softmax_nodes(g1, 'h') tensor([.5000, .5000]) Softmax over a batched graph: >>> bg = dgl.batch([g1, g2]) >>> dgl.softmax_nodes(bg, 'h') tensor([.5000, .5000, .3333, .3333, .3333]) See Also -------- softmax_edges """ x = graph.nodes[ntype].data[feat] return segment.segment_softmax(graph.batch_num_nodes(ntype), x) def softmax_edges(graph, feat, *, etype=None): r"""Perform graph-wise softmax on the edge features. For each edge :math:`e\in\mathcal{E}` and its feature :math:`x_e`, calculate its normalized feature as follows: .. math:: z_e = \frac{\exp(x_e)}{\sum_{e'\in\mathcal{E}}\exp(x_{e'})} If the graph is a batch of multiple graphs, each graph computes softmax independently. The result tensor has the same shape as the original edge feature. Parameters ---------- graph : DGLGraph. The input graph. feat : str The edge feature name. etype : str or (str, str, str), optional The type names of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. Can be omitted if the graph has only one type of edges. Returns ------- Tensor Result tensor. Examples -------- >>> import dgl >>> import torch as th Create two :class:`~dgl.DGLGraph` objects and initialize their edge features. >>> g1 = dgl.graph(([0, 1], [1, 0])) # Graph 1 >>> g1.edata['h'] = th.tensor([1., 1.]) >>> g2 = dgl.graph(([0, 1, 0], [1, 2, 2])) # Graph 2 >>> g2.edata['h'] = th.tensor([1., 1., 1.]) Softmax over one graph: >>> dgl.softmax_edges(g1, 'h') tensor([.5000, .5000]) Softmax over a batched graph: >>> bg = dgl.batch([g1, g2]) >>> dgl.softmax_edges(bg, 'h') tensor([.5000, .5000, .3333, .3333, .3333]) See Also -------- softmax_nodes """ x = graph.edges[etype].data[feat] return segment.segment_softmax(graph.batch_num_edges(etype), x) def broadcast_nodes(graph, graph_feat, *, ntype=None): """Generate a node feature equal to the graph-level feature :attr:`graph_feat`. The operation is similar to ``numpy.repeat`` (or ``torch.repeat_interleave``). It is commonly used to normalize node features by a global vector. For example, to normalize node features across graph to range :math:`[0~1)`: >>> g = dgl.batch([...]) # batch multiple graphs >>> g.ndata['h'] = ... # some node features >>> h_sum = dgl.broadcast_nodes(g, dgl.sum_nodes(g, 'h')) >>> g.ndata['h'] /= h_sum # normalize by summation Parameters ---------- graph : DGLGraph The graph. graph_feat : tensor The feature to broadcast. Tensor shape is :math:`(B, *)` for batched graph, where :math:`B` is the batch size. ntype : str, optional Node type. Can be omitted if there is only one node type. Returns ------- Tensor The node features tensor with shape :math:`(N, *)`, where :math:`N` is the number of nodes. Examples -------- >>> import dgl >>> import torch as th Create two :class:`~dgl.DGLGraph` objects and initialize their node features. >>> g1 = dgl.graph(([0], [1])) # Graph 1 >>> g2 = dgl.graph(([0, 1], [1, 2])) # Graph 2 >>> bg = dgl.batch([g1, g2]) >>> feat = th.rand(2, 5) >>> feat tensor([[0.4325, 0.7710, 0.5541, 0.0544, 0.9368], [0.2721, 0.4629, 0.7269, 0.0724, 0.1014]]) Broadcast feature to all nodes in the batched graph, feat[i] is broadcast to nodes in the i-th example in the batch. >>> dgl.broadcast_nodes(bg, feat) tensor([[0.4325, 0.7710, 0.5541, 0.0544, 0.9368], [0.4325, 0.7710, 0.5541, 0.0544, 0.9368], [0.2721, 0.4629, 0.7269, 0.0724, 0.1014], [0.2721, 0.4629, 0.7269, 0.0724, 0.1014], [0.2721, 0.4629, 0.7269, 0.0724, 0.1014]]) Broadcast feature to all nodes in the single graph (the feature tensor shape to broadcast should be :math:`(1, *)`). >>> feat0 = th.unsqueeze(feat[0], 0) >>> dgl.broadcast_nodes(g1, feat0) tensor([[0.4325, 0.7710, 0.5541, 0.0544, 0.9368], [0.4325, 0.7710, 0.5541, 0.0544, 0.9368]]) See Also -------- broadcast_edges """ if F.shape(graph_feat)[0] != graph.batch_size and graph.batch_size == 1: dgl_warning( "For a single graph, use a tensor of shape (1, *) for graph_feat." " The support of shape (*) will be deprecated." ) graph_feat = F.unsqueeze(graph_feat, dim=0) return F.repeat(graph_feat, graph.batch_num_nodes(ntype), dim=0) def broadcast_edges(graph, graph_feat, *, etype=None): """Generate an edge feature equal to the graph-level feature :attr:`graph_feat`. The operation is similar to ``numpy.repeat`` (or ``torch.repeat_interleave``). It is commonly used to normalize edge features by a global vector. For example, to normalize edge features across graph to range :math:`[0~1)`: >>> g = dgl.batch([...]) # batch multiple graphs >>> g.edata['h'] = ... # some node features >>> h_sum = dgl.broadcast_edges(g, dgl.sum_edges(g, 'h')) >>> g.edata['h'] /= h_sum # normalize by summation Parameters ---------- graph : DGLGraph The graph. graph_feat : tensor The feature to broadcast. Tensor shape is :math:`(B, *)` for batched graph, where :math:`B` is the batch size. etype : str, typle of str, optional Edge type. Can be omitted if there is only one edge type in the graph. Returns ------- Tensor The edge features tensor with shape :math:`(M, *)`, where :math:`M` is the number of edges. Examples -------- >>> import dgl >>> import torch as th Create two :class:`~dgl.DGLGraph` objects and initialize their edge features. >>> g1 = dgl.graph(([0], [1])) # Graph 1 >>> g2 = dgl.graph(([0, 1], [1, 2])) # Graph 2 >>> bg = dgl.batch([g1, g2]) >>> feat = th.rand(2, 5) >>> feat tensor([[0.4325, 0.7710, 0.5541, 0.0544, 0.9368], [0.2721, 0.4629, 0.7269, 0.0724, 0.1014]]) Broadcast feature to all edges in the batched graph, feat[i] is broadcast to edges in the i-th example in the batch. >>> dgl.broadcast_edges(bg, feat) tensor([[0.4325, 0.7710, 0.5541, 0.0544, 0.9368], [0.2721, 0.4629, 0.7269, 0.0724, 0.1014], [0.2721, 0.4629, 0.7269, 0.0724, 0.1014]]) Broadcast feature to all edges in the single graph (the feature tensor shape to broadcast should be :math:`(1, *)`). >>> feat1 = th.unsqueeze(feat[1], 0) >>> dgl.broadcast_edges(g2, feat1) tensor([[0.2721, 0.4629, 0.7269, 0.0724, 0.1014], [0.2721, 0.4629, 0.7269, 0.0724, 0.1014]]) See Also -------- broadcast_nodes """ if F.shape(graph_feat)[0] != graph.batch_size and graph.batch_size == 1: dgl_warning( "For a single graph, use a tensor of shape (1, *) for graph_feat." " The support of shape (*) will be deprecated." ) graph_feat = F.unsqueeze(graph_feat, dim=0) return F.repeat(graph_feat, graph.batch_num_edges(etype), dim=0) READOUT_ON_ATTRS = { "nodes": ("ndata", "batch_num_nodes", "number_of_nodes"), "edges": ("edata", "batch_num_edges", "number_of_edges"), } def _topk_torch(keys, k, descending, x): """Internal function to take graph-wise top-k node/edge features according to the rank given by keys, this function is PyTorch only. Parameters ---------- keys : Tensor The key for ranking. k : int The :math:`k` in "top-:math:`k`". descending : bool Indicates whether to return the feature corresponding to largest or smallest elements. x : Tensor The padded feature with shape (batch, max_len, *) Returns ------- sorted_feat : Tensor A tensor with shape :math:`(batch, k, *)`. sorted_idx : Tensor A tensor with shape :math:`(batch, k)`. """ import torch as th batch_size, max_len = x.shape[0], x.shape[1] topk_indices = keys.topk(k, -1, largest=descending)[1] # (batch_size, k) x = x.view((batch_size * max_len), -1) shift = ( th.arange(0, batch_size, device=x.device).view(batch_size, 1) * max_len ) topk_indices_ = topk_indices + shift x = x[topk_indices_].view(batch_size, k, -1) return th.masked_fill(x, th.isinf(x), 0), topk_indices def _topk_on(graph, typestr, feat, k, descending, sortby, ntype_or_etype): """Internal function to take graph-wise top-k node/edge features of field :attr:`feat` in :attr:`graph` ranked by keys at given index :attr:`sortby`. If :attr:`descending` is set to False, return the k smallest elements instead. Parameters --------- graph : DGLGraph The graph typestr : str 'nodes' or 'edges' feat : str The feature field name. k : int The :math:`k` in "top-:math`k`". descending : bool Controls whether to return the largest or smallest elements, defaults to True. sortby : int The key index we sort :attr:`feat` on, if set to None, we sort the whole :attr:`feat`. ntype_or_etype : str, tuple of str Node/edge type. Returns ------- sorted_feat : Tensor A tensor with shape :math:`(B, K, D)`, where :math:`B` is the batch size of the input graph. sorted_idx : Tensor A tensor with shape :math:`(B, K)`(:math:`(B, K, D)` if sortby is set to None), where :math:`B` is the batch size of the input graph, :math:`D` is the feature size. Notes ----- If an example has :math:`n` nodes/edges and :math:`n 2: raise DGLError( "Only support {} feature `{}` with dimension less than or" " equal to 2".format(typestr, feat) ) feat = data[feat] hidden_size = F.shape(feat)[-1] batch_num_objs = getattr(graph, batch_num_objs_attr)(ntype_or_etype) batch_size = len(batch_num_objs) length = max(max(F.asnumpy(batch_num_objs)), k) fill_val = -float("inf") if descending else float("inf") feat_ = F.pad_packed_tensor( feat, batch_num_objs, fill_val, l_min=k ) # (batch_size, l, d) if F.backend_name == "pytorch" and sortby is not None: # PyTorch's implementation of top-K keys = feat_[..., sortby] # (batch_size, l) return _topk_torch(keys, k, descending, feat_) else: # Fallback to framework-agnostic implementation of top-K if sortby is not None: keys = F.squeeze(F.slice_axis(feat_, -1, sortby, sortby + 1), -1) order = F.argsort(keys, -1, descending=descending) else: order = F.argsort(feat_, 1, descending=descending) topk_indices = F.slice_axis(order, 1, 0, k) if sortby is not None: feat_ = F.reshape(feat_, (batch_size * length, -1)) shift = F.repeat(F.arange(0, batch_size) * length, k, -1) shift = F.copy_to(shift, F.context(feat)) topk_indices_ = F.reshape(topk_indices, (-1,)) + shift else: feat_ = F.reshape(feat_, (-1,)) shift = F.repeat( F.arange(0, batch_size), k * hidden_size, -1 ) * length * hidden_size + F.cat( [F.arange(0, hidden_size)] * batch_size * k, -1 ) shift = F.copy_to(shift, F.context(feat)) topk_indices_ = F.reshape(topk_indices, (-1,)) * hidden_size + shift out = F.reshape(F.gather_row(feat_, topk_indices_), (batch_size, k, -1)) out = F.replace_inf_with_zero(out) return out, topk_indices def topk_nodes(graph, feat, k, *, descending=True, sortby=None, ntype=None): """Return a graph-level representation by a graph-wise top-k on node features :attr:`feat` in :attr:`graph` by feature at index :attr:`sortby`. If :attr:`descending` is set to False, return the k smallest elements instead. If :attr:`sortby` is set to None, the function would perform top-k on all dimensions independently, equivalent to calling :code:`torch.topk(graph.ndata[feat], dim=0)`. Parameters ---------- graph : DGLGraph The graph. feat : str The feature field. k : int The k in "top-k" descending : bool Controls whether to return the largest or smallest elements. sortby : int, optional Sort according to which feature. If is None, all features are sorted independently. ntype : str, optional Node type. Can be omitted if there is only one node type in the graph. Returns ------- sorted_feat : Tensor A tensor with shape :math:`(B, K, D)`, where :math:`B` is the batch size of the input graph. sorted_idx : Tensor A tensor with shape :math:`(B, K)`(:math:`(B, K, D)` if sortby is set to None), where :math:`B` is the batch size of the input graph, :math:`D` is the feature size. Notes ----- If an example has :math:`n` nodes and :math:`n>> import dgl >>> import torch as th Create two :class:`~dgl.DGLGraph` objects and initialize their node features. >>> g1 = dgl.graph(([0, 1], [2, 3])) # Graph 1 >>> g1.ndata['h'] = th.rand(4, 5) >>> g1.ndata['h'] tensor([[0.0297, 0.8307, 0.9140, 0.6702, 0.3346], [0.5901, 0.3030, 0.9280, 0.6893, 0.7997], [0.0880, 0.6515, 0.4451, 0.7507, 0.5297], [0.5171, 0.6379, 0.2695, 0.8954, 0.5197]]) >>> g2 = dgl.graph(([0, 1, 2], [2, 3, 4])) # Graph 2 >>> g2.ndata['h'] = th.rand(5, 5) >>> g2.ndata['h'] tensor([[0.3168, 0.3174, 0.5303, 0.0804, 0.3808], [0.1323, 0.2766, 0.4318, 0.6114, 0.1458], [0.1752, 0.9105, 0.5692, 0.8489, 0.0539], [0.1931, 0.4954, 0.3455, 0.3934, 0.0857], [0.5065, 0.5182, 0.5418, 0.1520, 0.3872]]) Top-k over node attribute :attr:`h` in a batched graph. >>> bg = dgl.batch([g1, g2], ndata=['h']) >>> dgl.topk_nodes(bg, 'h', 3) (tensor([[[0.5901, 0.8307, 0.9280, 0.8954, 0.7997], [0.5171, 0.6515, 0.9140, 0.7507, 0.5297], [0.0880, 0.6379, 0.4451, 0.6893, 0.5197]], [[0.5065, 0.9105, 0.5692, 0.8489, 0.3872], [0.3168, 0.5182, 0.5418, 0.6114, 0.3808], [0.1931, 0.4954, 0.5303, 0.3934, 0.1458]]]), tensor([[[1, 0, 1, 3, 1], [3, 2, 0, 2, 2], [2, 3, 2, 1, 3]], [[4, 2, 2, 2, 4], [0, 4, 4, 1, 0], [3, 3, 0, 3, 1]]])) Top-k over node attribute :attr:`h` along the last dimension in a batched graph. (used in SortPooling) >>> dgl.topk_nodes(bg, 'h', 3, sortby=-1) (tensor([[[0.5901, 0.3030, 0.9280, 0.6893, 0.7997], [0.0880, 0.6515, 0.4451, 0.7507, 0.5297], [0.5171, 0.6379, 0.2695, 0.8954, 0.5197]], [[0.5065, 0.5182, 0.5418, 0.1520, 0.3872], [0.3168, 0.3174, 0.5303, 0.0804, 0.3808], [0.1323, 0.2766, 0.4318, 0.6114, 0.1458]]]), tensor([[1, 2, 3], [4, 0, 1]])) Top-k over node attribute :attr:`h` in a single graph. >>> dgl.topk_nodes(g1, 'h', 3) (tensor([[[0.5901, 0.8307, 0.9280, 0.8954, 0.7997], [0.5171, 0.6515, 0.9140, 0.7507, 0.5297], [0.0880, 0.6379, 0.4451, 0.6893, 0.5197]]]), tensor([[[1, 0, 1, 3, 1], [3, 2, 0, 2, 2], [2, 3, 2, 1, 3]]])) """ return _topk_on( graph, "nodes", feat, k, descending=descending, sortby=sortby, ntype_or_etype=ntype, ) def topk_edges(graph, feat, k, *, descending=True, sortby=None, etype=None): """Return a graph-level representation by a graph-wise top-k on edge features :attr:`feat` in :attr:`graph` by feature at index :attr:`sortby`. If :attr:`descending` is set to False, return the k smallest elements instead. If :attr:`sortby` is set to None, the function would perform top-k on all dimensions independently, equivalent to calling :code:`torch.topk(graph.edata[feat], dim=0)`. Parameters ---------- graph : DGLGraph The graph. feat : str The feature field. k : int The k in "top-k" descending : bool Controls whether to return the largest or smallest elements. sortby : int, optional Sort according to which feature. If is None, all features are sorted independently. etype : str, typle of str, optional Edge type. Can be omitted if there is only one edge type in the graph. Returns ------- sorted_feat : Tensor A tensor with shape :math:`(B, K, D)`, where :math:`B` is the batch size of the input graph. sorted_idx : Tensor A tensor with shape :math:`(B, K)`(:math:`(B, K, D)` if sortby is set to None), where :math:`B` is the batch size of the input graph, :math:`D` is the feature size. Notes ----- If an example has :math:`n` nodes and :math:`n>> import dgl >>> import torch as th Create two :class:`~dgl.DGLGraph` objects and initialize their edge features. >>> g1 = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 0])) # Graph 1 >>> g1.edata['h'] = th.rand(4, 5) >>> g1.edata['h'] tensor([[0.0297, 0.8307, 0.9140, 0.6702, 0.3346], [0.5901, 0.3030, 0.9280, 0.6893, 0.7997], [0.0880, 0.6515, 0.4451, 0.7507, 0.5297], [0.5171, 0.6379, 0.2695, 0.8954, 0.5197]]) >>> g2 = dgl.graph(([0, 1, 2, 3, 4], [1, 2, 3, 4, 0])) # Graph 2 >>> g2.edata['h'] = th.rand(5, 5) >>> g2.edata['h'] tensor([[0.3168, 0.3174, 0.5303, 0.0804, 0.3808], [0.1323, 0.2766, 0.4318, 0.6114, 0.1458], [0.1752, 0.9105, 0.5692, 0.8489, 0.0539], [0.1931, 0.4954, 0.3455, 0.3934, 0.0857], [0.5065, 0.5182, 0.5418, 0.1520, 0.3872]]) Top-k over edge attribute :attr:`h` in a batched graph. >>> bg = dgl.batch([g1, g2], edata=['h']) >>> dgl.topk_edges(bg, 'h', 3) (tensor([[[0.5901, 0.8307, 0.9280, 0.8954, 0.7997], [0.5171, 0.6515, 0.9140, 0.7507, 0.5297], [0.0880, 0.6379, 0.4451, 0.6893, 0.5197]], [[0.5065, 0.9105, 0.5692, 0.8489, 0.3872], [0.3168, 0.5182, 0.5418, 0.6114, 0.3808], [0.1931, 0.4954, 0.5303, 0.3934, 0.1458]]]), tensor([[[1, 0, 1, 3, 1], [3, 2, 0, 2, 2], [2, 3, 2, 1, 3]], [[4, 2, 2, 2, 4], [0, 4, 4, 1, 0], [3, 3, 0, 3, 1]]])) Top-k over edge attribute :attr:`h` along index -1 in a batched graph. (used in SortPooling) >>> dgl.topk_edges(bg, 'h', 3, sortby=-1) (tensor([[[0.5901, 0.3030, 0.9280, 0.6893, 0.7997], [0.0880, 0.6515, 0.4451, 0.7507, 0.5297], [0.5171, 0.6379, 0.2695, 0.8954, 0.5197]], [[0.5065, 0.5182, 0.5418, 0.1520, 0.3872], [0.3168, 0.3174, 0.5303, 0.0804, 0.3808], [0.1323, 0.2766, 0.4318, 0.6114, 0.1458]]]), tensor([[1, 2, 3], [4, 0, 1]])) Top-k over edge attribute :attr:`h` in a single graph. >>> dgl.topk_edges(g1, 'h', 3) (tensor([[[0.5901, 0.8307, 0.9280, 0.8954, 0.7997], [0.5171, 0.6515, 0.9140, 0.7507, 0.5297], [0.0880, 0.6379, 0.4451, 0.6893, 0.5197]]]), tensor([[[1, 0, 1, 3, 1], [3, 2, 0, 2, 2], [2, 3, 2, 1, 3]]])) """ return _topk_on( graph, "edges", feat, k, descending=descending, sortby=sortby, ntype_or_etype=etype, ) ================================================ FILE: python/dgl/sampling/__init__.py ================================================ """The ``dgl.sampling`` package contains operators and utilities for sampling from a graph via random walks, neighbor sampling, etc. They are typically used together with the ``DataLoader`` s in the ``dgl.dataloading`` package. The user guide :ref:`guide-minibatch` gives a holistic explanation on how different components work together. """ from .randomwalks import * from .pinsage import * from .neighbor import * from .labor import * from .node2vec_randomwalk import * from .negative import * from . import utils ================================================ FILE: python/dgl/sampling/labor.py ================================================ # # Copyright (c) 2022 by Contributors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # Based off of neighbor.py # """Labor sampling APIs""" from .. import backend as F, ndarray as nd, utils from .._ffi.function import _init_api from ..base import DGLError from ..heterograph import DGLGraph from ..random import choice from .utils import EidExcluder __all__ = ["sample_labors"] def sample_labors( g, nodes, fanout, edge_dir="in", prob=None, importance_sampling=0, random_seed=None, seed2_contribution=0, copy_ndata=True, copy_edata=True, exclude_edges=None, output_device=None, ): """Sampler that builds computational dependency of node representations via labor sampling for multilayer GNN from the NeurIPS 2023 paper `Layer-Neighbor Sampling -- Defusing Neighborhood Explosion in GNNs `__ This sampler will make every node gather messages from a fixed number of neighbors per edge type. The neighbors are picked uniformly with default parameters. For every vertex t that will be considered to be sampled, there will be a single random variate r_t. For each node, a number of inbound (or outbound when ``edge_dir == 'out'``) edges will be randomly chosen. The graph returned will then contain all the nodes in the original graph, but only the sampled edges. Node/edge features are not preserved. The original IDs of the sampled edges are stored as the `dgl.EID` feature in the returned graph. Parameters ---------- g : DGLGraph The graph, allowed to have multiple node or edge types. Can be either on CPU or GPU. nodes : tensor or dict Node IDs to sample neighbors from. This argument can take a single ID tensor or a dictionary of node types and ID tensors. If a single tensor is given, the graph must only have one type of nodes. fanout : int or dict[etype, int] The number of edges to be sampled for each node on each edge type. This argument can take a single int or a dictionary of edge types and ints. If a single int is given, DGL will sample this number of edges for each node for every edge type. If -1 is given for a single edge type, all the neighboring edges with that edge type will be selected. edge_dir : str, optional Determines whether to sample inbound or outbound edges. Can take either ``in`` for inbound edges or ``out`` for outbound edges. prob : str, optional Feature name used as the (unnormalized) probabilities associated with each neighboring edge of a node. The feature must have only one element for each edge. The features must be non-negative floats, and the sum of the features of inbound/outbound edges for every node must be positive (though they don't have to sum up to one). Otherwise, the result will be undefined. If :attr:`prob` is not None, GPU sampling is not supported. importance_sampling : int, optional Whether to use importance sampling or uniform sampling, use of negative values optimizes importance sampling probabilities until convergence while use of positive values runs optimization steps that many times. If the value is i, then LABOR-i variant is used. random_seed : tensor An int64 tensor with one element. The passed random_seed makes it so that for any seed vertex ``s`` and its neighbor ``t``, the rolled random variate ``r_t`` is the same for any call to this function with the same random seed. When sampling as part of the same batch, one would want identical seeds so that LABOR can globally sample. One example is that for heterogenous graphs, there is a single random seed passed for each edge type. This will sample much fewer vertices compared to having unique random seeds for each edge type. If one called this function individually for each edge type for a heterogenous graph with different random seeds, then it would run LABOR locally for each edge type, resulting into a larger number of vertices being sampled. If this function is called without a ``random_seed``, we get the random seed by getting a random number from DGL. Use this argument with identical random_seed if multiple calls to this function are used to sample as part of a single batch. seed2_contribution : float, optional A float value between [0, 1) that determines the contribution of the second random seed to generate the random variates for the LABOR sampling algorithm. copy_ndata: bool, optional If True, the node features of the new graph are copied from the original graph. If False, the new graph will not have any node features. (Default: True) copy_edata: bool, optional If True, the edge features of the new graph are copied from the original graph. If False, the new graph will not have any edge features. (Default: True) exclude_edges: tensor or dict Edge IDs to exclude during sampling neighbors for the seed nodes. This argument can take a single ID tensor or a dictionary of edge types and ID tensors. If a single tensor is given, the graph must only have one type of nodes. output_device : Framework-specific device context object, optional The output device. Default is the same as the input graph. Returns ------- tuple(DGLGraph, list[Tensor]) A sampled subgraph containing only the sampled neighboring edges along with edge weights. Notes ----- If :attr:`copy_ndata` or :attr:`copy_edata` is True, same tensors are used as the node or edge features of the original graph and the new graph. As a result, users should avoid performing in-place operations on the node features of the new graph to avoid feature corruption. Examples -------- Assume that you have the following graph >>> g = dgl.graph(([0, 0, 1, 1, 2, 2], [1, 2, 0, 1, 2, 0])) And the weights >>> g.edata['prob'] = torch.FloatTensor([0., 1., 0., 1., 0., 1.]) To sample one inbound edge for node 0 and node 1: >>> sg = dgl.sampling.sample_labors(g, [0, 1], 1) >>> sg.edges(order='eid') (tensor([1, 0]), tensor([0, 1])) >>> sg.edata[dgl.EID] tensor([2, 0]) To sample one inbound edge for node 0 and node 1 with probability in edge feature ``prob``: >>> sg = dgl.sampling.sample_labors(g, [0, 1], 1, prob='prob') >>> sg.edges(order='eid') (tensor([2, 1]), tensor([0, 1])) With ``fanout`` greater than the number of actual neighbors and without replacement, DGL will take all neighbors instead: >>> sg = dgl.sampling.sample_labors(g, [0, 1], 3) >>> sg.edges(order='eid') (tensor([1, 2, 0, 1]), tensor([0, 0, 1, 1])) To exclude certain EID's during sampling for the seed nodes: >>> g = dgl.graph(([0, 0, 1, 1, 2, 2], [1, 2, 0, 1, 2, 0])) >>> g_edges = g.all_edges(form='all')`` (tensor([0, 0, 1, 1, 2, 2]), tensor([1, 2, 0, 1, 2, 0]), tensor([0, 1, 2, 3, 4, 5])) >>> sg = dgl.sampling.sample_labors(g, [0, 1], 3, exclude_edges=[0, 1, 2]) >>> sg.all_edges(form='all') (tensor([2, 1]), tensor([0, 1]), tensor([0, 1])) >>> sg.has_edges_between(g_edges[0][:3],g_edges[1][:3]) tensor([False, False, False]) >>> g = dgl.heterograph({ ... ('drug', 'interacts', 'drug'): ([0, 0, 1, 1, 3, 2], [1, 2, 0, 1, 2, 0]), ... ('drug', 'interacts', 'gene'): ([0, 0, 1, 1, 2, 2], [1, 2, 0, 1, 2, 0]), ... ('drug', 'treats', 'disease'): ([0, 0, 1, 1, 2, 2], [1, 2, 0, 1, 2, 0])}) >>> g_edges = g.all_edges(form='all', etype=('drug', 'interacts', 'drug')) (tensor([0, 0, 1, 1, 3, 2]), tensor([1, 2, 0, 1, 2, 0]), tensor([0, 1, 2, 3, 4, 5])) >>> excluded_edges = {('drug', 'interacts', 'drug'): g_edges[2][:3]} >>> sg = dgl.sampling.sample_labors(g, {'drug':[0, 1]}, 3, exclude_edges=excluded_edges) >>> sg.all_edges(form='all', etype=('drug', 'interacts', 'drug')) (tensor([2, 1]), tensor([0, 1]), tensor([0, 1])) >>> sg.has_edges_between(g_edges[0][:3],g_edges[1][:3],etype=('drug', 'interacts', 'drug')) tensor([False, False, False]) """ if F.device_type(g.device) == "cpu" and not g.is_pinned(): frontier, importances = _sample_labors( g, nodes, fanout, edge_dir=edge_dir, prob=prob, importance_sampling=importance_sampling, random_seed=random_seed, seed2_contribution=seed2_contribution, copy_ndata=copy_ndata, copy_edata=copy_edata, exclude_edges=exclude_edges, ) else: frontier, importances = _sample_labors( g, nodes, fanout, edge_dir=edge_dir, prob=prob, importance_sampling=importance_sampling, random_seed=random_seed, seed2_contribution=seed2_contribution, copy_ndata=copy_ndata, copy_edata=copy_edata, ) if exclude_edges is not None: eid_excluder = EidExcluder(exclude_edges) frontier, importances = eid_excluder(frontier, importances) if output_device is None: return (frontier, importances) else: return ( frontier.to(output_device), list(map(lambda x: x.to(output_device), importances)), ) def _sample_labors( g, nodes, fanout, edge_dir="in", prob=None, importance_sampling=0, random_seed=None, seed2_contribution=0, copy_ndata=True, copy_edata=True, exclude_edges=None, ): if random_seed is None: random_seed = F.to_dgl_nd(choice(1e18, 1)) if not isinstance(nodes, dict): if len(g.ntypes) > 1: raise DGLError( "Must specify node type when the graph is not homogeneous." ) nodes = {g.ntypes[0]: nodes} nodes = utils.prepare_tensor_dict(g, nodes, "nodes") if len(nodes) == 0: raise ValueError( "Got an empty dictionary in the nodes argument. " "Please pass in a dictionary with empty tensors as values instead." ) ctx = utils.to_dgl_context(F.context(next(iter(nodes.values())))) nodes_all_types = [] # nids_all_types is needed if one wants labor to work for subgraphs whose vertices have # been renamed and the rolled randoms should be rolled for global vertex ids. # It is disabled for now below by passing empty ndarrays. nids_all_types = [nd.array([], ctx=ctx) for _ in g.ntypes] for ntype in g.ntypes: if ntype in nodes: nodes_all_types.append(F.to_dgl_nd(nodes[ntype])) else: nodes_all_types.append(nd.array([], ctx=ctx)) if isinstance(fanout, nd.NDArray): fanout_array = fanout else: if not isinstance(fanout, dict): fanout_array = [int(fanout)] * len(g.etypes) else: if len(fanout) != len(g.etypes): raise DGLError( "Fan-out must be specified for each edge type " "if a dict is provided." ) fanout_array = [None] * len(g.etypes) for etype, value in fanout.items(): fanout_array[g.get_etype_id(etype)] = value fanout_array = F.to_dgl_nd(F.tensor(fanout_array, dtype=F.int64)) if ( isinstance(prob, list) and len(prob) > 0 and isinstance(prob[0], nd.NDArray) ): prob_arrays = prob elif prob is None: prob_arrays = [nd.array([], ctx=nd.cpu())] * len(g.etypes) else: prob_arrays = [] for etype in g.canonical_etypes: if prob in g.edges[etype].data: prob_arrays.append(F.to_dgl_nd(g.edges[etype].data[prob])) else: prob_arrays.append(nd.array([], ctx=nd.cpu())) excluded_edges_all_t = [] if exclude_edges is not None: if not isinstance(exclude_edges, dict): if len(g.etypes) > 1: raise DGLError( "Must specify etype when the graph is not homogeneous." ) exclude_edges = {g.canonical_etypes[0]: exclude_edges} exclude_edges = utils.prepare_tensor_dict(g, exclude_edges, "edges") for etype in g.canonical_etypes: if etype in exclude_edges: excluded_edges_all_t.append(F.to_dgl_nd(exclude_edges[etype])) else: excluded_edges_all_t.append(nd.array([], ctx=ctx)) ret_val = _CAPI_DGLSampleLabors( g._graph, nodes_all_types, fanout_array, edge_dir, prob_arrays, excluded_edges_all_t, importance_sampling, random_seed, seed2_contribution, nids_all_types, ) subgidx = ret_val[0] importances = [F.from_dgl_nd(importance) for importance in ret_val[1:]] induced_edges = subgidx.induced_edges ret = DGLGraph(subgidx.graph, g.ntypes, g.etypes) if copy_ndata: node_frames = utils.extract_node_subframes(g, None) utils.set_new_frames(ret, node_frames=node_frames) if copy_edata: edge_frames = utils.extract_edge_subframes(g, induced_edges) utils.set_new_frames(ret, edge_frames=edge_frames) return ret, importances DGLGraph.sample_labors = utils.alias_func(sample_labors) _init_api("dgl.sampling.labor", __name__) ================================================ FILE: python/dgl/sampling/negative.py ================================================ """Negative sampling APIs""" from numpy.polynomial import polynomial from .. import backend as F, utils from .._ffi.function import _init_api from ..heterograph import DGLGraph __all__ = ["global_uniform_negative_sampling"] def _calc_redundancy( k_hat, num_edges, num_pairs, r=3 ): # pylint: disable=invalid-name # pylint: disable=invalid-name # Calculates the number of samples required based on a lower-bound # of the expected number of negative samples, based on N draws from # a binomial distribution. Solves the following equation for N: # # k_hat = N*p_k - r * np.sqrt(N*p_k*(1-p_k)) # # where p_k is the probability that a node pairing is a negative edge # and r is the number of standard deviations to construct the lower bound # # Credits to @zjost p_m = num_edges / num_pairs p_k = 1 - p_m a = p_k**2 b = -p_k * (2 * k_hat + r**2 * p_m) c = k_hat**2 poly = polynomial.Polynomial([c, b, a]) N = poly.roots()[-1] redundancy = N / k_hat - 1.0 return redundancy def global_uniform_negative_sampling( g, num_samples, exclude_self_loops=True, replace=False, etype=None, redundancy=None, ): """Performs negative sampling, which generate source-destination pairs such that edges with the given type do not exist. Specifically, this function takes in an edge type and a number of samples. It returns two tensors ``src`` and ``dst``, the former in the range of ``[0, num_src)`` and the latter in the range of ``[0, num_dst)``, where ``num_src`` and ``num_dst`` represents the number of nodes with the source and destination node type respectively. It guarantees that no edge will exist between the corresponding pairs of ``src`` with the source node type and ``dst`` with the destination node type. .. note:: This negative sampler will try to generate as many negative samples as possible, but it may rarely return less than :attr:`num_samples` negative samples. This is more likely to happen when a graph is so small or dense that not many unique negative samples exist. Parameters ---------- g : DGLGraph The graph. num_samples : int The number of desired negative samples to generate. exclude_self_loops : bool, optional Whether to exclude self-loops from the negative samples. Only impacts the edge types whose source and destination node types are the same. Default: True. replace : bool, optional Whether to sample with replacement. Setting it to True will make things faster. (Default: False) etype : str or tuple of str, optional The edge type. Can be omitted if the graph only has one edge type. redundancy : float, optional Indicates how much more negative samples to actually generate during rejection sampling before finding the unique pairs. Increasing it will increase the likelihood of getting :attr:`num_samples` negative samples, but will also take more time and memory. (Default: automatically determined by the density of graph) Returns ------- tuple[Tensor, Tensor] The source and destination pairs. Examples -------- >>> g = dgl.graph(([0, 1, 2], [1, 2, 3])) >>> dgl.sampling.global_uniform_negative_sampling(g, 3) (tensor([0, 1, 3]), tensor([2, 0, 2])) """ if etype is None: etype = g.etypes[0] utype, _, vtype = g.to_canonical_etype(etype) exclude_self_loops = exclude_self_loops and (utype == vtype) redundancy = _calc_redundancy( num_samples, g.num_edges(etype), g.num_nodes(utype) * g.num_nodes(vtype) ) etype_id = g.get_etype_id(etype) src, dst = _CAPI_DGLGlobalUniformNegativeSampling( g._graph, etype_id, num_samples, 3, exclude_self_loops, replace, redundancy, ) return F.from_dgl_nd(src), F.from_dgl_nd(dst) DGLGraph.global_uniform_negative_sampling = utils.alias_func( global_uniform_negative_sampling ) _init_api("dgl.sampling.negative", __name__) ================================================ FILE: python/dgl/sampling/neighbor.py ================================================ """Neighbor sampling APIs""" import os import torch from .. import backend as F, ndarray as nd, utils from .._ffi.function import _init_api from ..base import DGLError, EID from ..heterograph import DGLBlock, DGLGraph from .utils import EidExcluder __all__ = [ "sample_etype_neighbors", "sample_neighbors", "sample_neighbors_fused", "sample_neighbors_biased", "select_topk", ] def _prepare_edge_arrays(g, arg): """Converts the argument into a list of NDArrays. If the argument is already a list of array-like objects, directly do the conversion. If the argument is a string, converts g.edata[arg] into a list of NDArrays ordered by the edge types. """ if isinstance(arg, list) and len(arg) > 0: if isinstance(arg[0], nd.NDArray): return arg else: # The list can have None as placeholders for empty arrays with # undetermined data type. dtype = None ctx = None result = [] for entry in arg: if F.is_tensor(entry): result.append(F.to_dgl_nd(entry)) dtype = F.dtype(entry) ctx = F.context(entry) else: result.append(None) result = [ ( F.to_dgl_nd(F.copy_to(F.tensor([], dtype=dtype), ctx)) if x is None else x ) for x in result ] return result elif arg is None: return [nd.array([], ctx=nd.cpu())] * len(g.etypes) else: arrays = [] for etype in g.canonical_etypes: if arg in g.edges[etype].data: arrays.append(F.to_dgl_nd(g.edges[etype].data[arg])) else: arrays.append(nd.array([], ctx=nd.cpu())) return arrays def sample_etype_neighbors( g, nodes, etype_offset, fanout, edge_dir="in", prob=None, exclude_edges=None, replace=False, copy_ndata=True, copy_edata=True, etype_sorted=False, _dist_training=False, output_device=None, ): """Sample neighboring edges of the given nodes and return the induced subgraph. For each node, a number of inbound (or outbound when ``edge_dir == 'out'``) edges will be randomly chosen. The graph returned will then contain all the nodes in the original graph, but only the sampled edges. Node/edge features are not preserved. The original IDs of the sampled edges are stored as the `dgl.EID` feature in the returned graph. Parameters ---------- g : DGLGraph The graph. Can only be in CPU. Should only have one node type and one edge type. nodes : tensor or dict Node IDs to sample neighbors from. This argument can take a single ID tensor or a dictionary of node types and ID tensors. If a single tensor is given, the graph must only have one type of nodes. etype_offset : list[int] The offset of each edge type ID. fanout : Tensor The number of edges to be sampled for each node per edge type. Must be a 1D tensor with the number of elements same as the number of edge types. If -1 is given, all of the neighbors with non-zero probability will be selected. edge_dir : str, optional Determines whether to sample inbound or outbound edges. Can take either ``in`` for inbound edges or ``out`` for outbound edges. prob : list[Tensor], optional The (unnormalized) probabilities associated with each neighboring edge of a node. The features must be non-negative floats or boolean. Otherwise, the result will be undefined. exclude_edges: tensor or dict Edge IDs to exclude during sampling neighbors for the seed nodes. This argument can take a single ID tensor or a dictionary of edge types and ID tensors. If a single tensor is given, the graph must only have one type of nodes. replace : bool, optional If True, sample with replacement. copy_ndata: bool, optional If True, the node features of the new graph are copied from the original graph. If False, the new graph will not have any node features. (Default: True) copy_edata: bool, optional If True, the edge features of the new graph are copied from the original graph. If False, the new graph will not have any edge features. (Default: True) _dist_training : bool, optional Internal argument. Do not use. (Default: False) etype_sorted: bool, optional A hint telling whether the etypes are already sorted. (Default: False) output_device : Framework-specific device context object, optional The output device. Default is the same as the input graph. Returns ------- DGLGraph A sampled subgraph containing only the sampled neighboring edges, with the same device as the input graph. Notes ----- If :attr:`copy_ndata` or :attr:`copy_edata` is True, same tensors are used as the node or edge features of the original graph and the new graph. As a result, users should avoid performing in-place operations on the node features of the new graph to avoid feature corruption. """ if exclude_edges is not None: raise DGLError( "exclude_edges is not supported for sample_etype_neighbors" ) if g.device != F.cpu(): raise DGLError("The graph should be in cpu.") # (BarclayII) because the homogenized graph no longer contains the *name* of edge # types, the fanout argument can no longer be a dict of etypes and ints, as opposed # to sample_neighbors. if not F.is_tensor(fanout): raise DGLError("The fanout should be a tensor") if isinstance(nodes, dict): assert len(nodes) == 1, "The input graph should not have node types" nodes = list(nodes.values())[0] nodes = utils.prepare_tensor(g, nodes, "nodes") device = utils.context_of(nodes) nodes = F.to_dgl_nd(nodes) # treat etypes as int32, it is much cheaper than int64 # TODO(xiangsx): int8 can be a better choice. fanout = F.to_dgl_nd(fanout) prob_array = _prepare_edge_arrays(g, prob) subgidx = _CAPI_DGLSampleNeighborsEType( g._graph, nodes, etype_offset, fanout, edge_dir, prob_array, replace, etype_sorted, ) induced_edges = subgidx.induced_edges ret = DGLGraph(subgidx.graph, g.ntypes, g.etypes) # handle features # (TODO) (BarclayII) DGL distributed fails with bus error, freezes, or other # incomprehensible errors with lazy feature copy. # So in distributed training context, we fall back to old behavior where we # only set the edge IDs. if not _dist_training: if copy_ndata: node_frames = utils.extract_node_subframes(g, device) utils.set_new_frames(ret, node_frames=node_frames) if copy_edata: edge_frames = utils.extract_edge_subframes(g, induced_edges) utils.set_new_frames(ret, edge_frames=edge_frames) else: for i, etype in enumerate(ret.canonical_etypes): ret.edges[etype].data[EID] = induced_edges[i] return ret if output_device is None else ret.to(output_device) DGLGraph.sample_etype_neighbors = utils.alias_func(sample_etype_neighbors) def sample_neighbors( g, nodes, fanout, edge_dir="in", prob=None, replace=False, copy_ndata=True, copy_edata=True, _dist_training=False, exclude_edges=None, output_device=None, ): """Sample neighboring edges of the given nodes and return the induced subgraph. For each node, a number of inbound (or outbound when ``edge_dir == 'out'``) edges will be randomly chosen. The graph returned will then contain all the nodes in the original graph, but only the sampled edges. Node/edge features are not preserved. The original IDs of the sampled edges are stored as the `dgl.EID` feature in the returned graph. GPU sampling is supported for this function. Refer to :ref:`guide-minibatch-gpu-sampling` for more details. Parameters ---------- g : DGLGraph The graph. Can be either on CPU or GPU. nodes : tensor or dict Node IDs to sample neighbors from. This argument can take a single ID tensor or a dictionary of node types and ID tensors. If a single tensor is given, the graph must only have one type of nodes. fanout : int or dict[etype, int] The number of edges to be sampled for each node on each edge type. This argument can take a single int or a dictionary of edge types and ints. If a single int is given, DGL will sample this number of edges for each node for every edge type. If -1 is given for a single edge type, all the neighboring edges with that edge type and non-zero probability will be selected. edge_dir : str, optional Determines whether to sample inbound or outbound edges. Can take either ``in`` for inbound edges or ``out`` for outbound edges. prob : str, optional Feature name used as the (unnormalized) probabilities associated with each neighboring edge of a node. The feature must have only one element for each edge. The features must be non-negative floats or boolean. Otherwise, the result will be undefined. exclude_edges: tensor or dict Edge IDs to exclude during sampling neighbors for the seed nodes. This argument can take a single ID tensor or a dictionary of edge types and ID tensors. If a single tensor is given, the graph must only have one type of nodes. replace : bool, optional If True, sample with replacement. copy_ndata: bool, optional If True, the node features of the new graph are copied from the original graph. If False, the new graph will not have any node features. (Default: True) copy_edata: bool, optional If True, the edge features of the new graph are copied from the original graph. If False, the new graph will not have any edge features. (Default: True) _dist_training : bool, optional Internal argument. Do not use. (Default: False) output_device : Framework-specific device context object, optional The output device. Default is the same as the input graph. Returns ------- DGLGraph A sampled subgraph containing only the sampled neighboring edges. Notes ----- If :attr:`copy_ndata` or :attr:`copy_edata` is True, same tensors are used as the node or edge features of the original graph and the new graph. As a result, users should avoid performing in-place operations on the node features of the new graph to avoid feature corruption. Examples -------- Assume that you have the following graph >>> g = dgl.graph(([0, 0, 1, 1, 2, 2], [1, 2, 0, 1, 2, 0])) And the weights >>> g.edata['prob'] = torch.FloatTensor([0., 1., 0., 1., 0., 1.]) To sample one inbound edge for node 0 and node 1: >>> sg = dgl.sampling.sample_neighbors(g, [0, 1], 1) >>> sg.edges(order='eid') (tensor([1, 0]), tensor([0, 1])) >>> sg.edata[dgl.EID] tensor([2, 0]) To sample one inbound edge for node 0 and node 1 with probability in edge feature ``prob``: >>> sg = dgl.sampling.sample_neighbors(g, [0, 1], 1, prob='prob') >>> sg.edges(order='eid') (tensor([2, 1]), tensor([0, 1])) With ``fanout`` greater than the number of actual neighbors and without replacement, DGL will take all neighbors instead: >>> sg = dgl.sampling.sample_neighbors(g, [0, 1], 3) >>> sg.edges(order='eid') (tensor([1, 2, 0, 1]), tensor([0, 0, 1, 1])) To exclude certain EID's during sampling for the seed nodes: >>> g = dgl.graph(([0, 0, 1, 1, 2, 2], [1, 2, 0, 1, 2, 0])) >>> g_edges = g.all_edges(form='all')`` (tensor([0, 0, 1, 1, 2, 2]), tensor([1, 2, 0, 1, 2, 0]), tensor([0, 1, 2, 3, 4, 5])) >>> sg = dgl.sampling.sample_neighbors(g, [0, 1], 3, exclude_edges=[0, 1, 2]) >>> sg.all_edges(form='all') (tensor([2, 1]), tensor([0, 1]), tensor([0, 1])) >>> sg.has_edges_between(g_edges[0][:3],g_edges[1][:3]) tensor([False, False, False]) >>> g = dgl.heterograph({ ... ('drug', 'interacts', 'drug'): ([0, 0, 1, 1, 3, 2], [1, 2, 0, 1, 2, 0]), ... ('drug', 'interacts', 'gene'): ([0, 0, 1, 1, 2, 2], [1, 2, 0, 1, 2, 0]), ... ('drug', 'treats', 'disease'): ([0, 0, 1, 1, 2, 2], [1, 2, 0, 1, 2, 0])}) >>> g_edges = g.all_edges(form='all', etype=('drug', 'interacts', 'drug')) (tensor([0, 0, 1, 1, 3, 2]), tensor([1, 2, 0, 1, 2, 0]), tensor([0, 1, 2, 3, 4, 5])) >>> excluded_edges = {('drug', 'interacts', 'drug'): g_edges[2][:3]} >>> sg = dgl.sampling.sample_neighbors(g, {'drug':[0, 1]}, 3, exclude_edges=excluded_edges) >>> sg.all_edges(form='all', etype=('drug', 'interacts', 'drug')) (tensor([2, 1]), tensor([0, 1]), tensor([0, 1])) >>> sg.has_edges_between(g_edges[0][:3],g_edges[1][:3],etype=('drug', 'interacts', 'drug')) tensor([False, False, False]) """ if F.device_type(g.device) == "cpu" and not g.is_pinned(): frontier = _sample_neighbors( g, nodes, fanout, edge_dir=edge_dir, prob=prob, replace=replace, copy_ndata=copy_ndata, copy_edata=copy_edata, exclude_edges=exclude_edges, ) else: frontier = _sample_neighbors( g, nodes, fanout, edge_dir=edge_dir, prob=prob, replace=replace, copy_ndata=copy_ndata, copy_edata=copy_edata, ) if exclude_edges is not None: eid_excluder = EidExcluder(exclude_edges) frontier = eid_excluder(frontier) return frontier if output_device is None else frontier.to(output_device) def sample_neighbors_fused( g, nodes, fanout, edge_dir="in", prob=None, replace=False, copy_ndata=True, copy_edata=True, exclude_edges=None, mapping=None, ): """Sample neighboring edges of the given nodes and return the induced subgraph. For each node, a number of inbound (or outbound when ``edge_dir == 'out'``) edges will be randomly chosen. The graph returned will then contain all the nodes in the original graph, but only the sampled edges. Nodes will be renumbered starting from id 0, which would be new node id of first seed node. Parameters ---------- g : DGLGraph The graph. Can be either on CPU or GPU. nodes : tensor or dict Node IDs to sample neighbors from. This argument can take a single ID tensor or a dictionary of node types and ID tensors. If a single tensor is given, the graph must only have one type of nodes. fanout : int or dict[etype, int] The number of edges to be sampled for each node on each edge type. This argument can take a single int or a dictionary of edge types and ints. If a single int is given, DGL will sample this number of edges for each node for every edge type. If -1 is given for a single edge type, all the neighboring edges with that edge type and non-zero probability will be selected. edge_dir : str, optional Determines whether to sample inbound or outbound edges. Can take either ``in`` for inbound edges or ``out`` for outbound edges. prob : str, optional Feature name used as the (unnormalized) probabilities associated with each neighboring edge of a node. The feature must have only one element for each edge. The features must be non-negative floats or boolean. Otherwise, the result will be undefined. exclude_edges: tensor or dict Edge IDs to exclude during sampling neighbors for the seed nodes. This argument can take a single ID tensor or a dictionary of edge types and ID tensors. If a single tensor is given, the graph must only have one type of nodes. replace : bool, optional If True, sample with replacement. copy_ndata: bool, optional If True, the node features of the new graph are copied from the original graph. If False, the new graph will not have any node features. (Default: True) copy_edata: bool, optional If True, the edge features of the new graph are copied from the original graph. If False, the new graph will not have any edge features. (Default: False) mapping : dictionary, optional Used by fused version of NeighborSampler. To avoid constant data allocation provide empty dictionary ({}) that will be allocated once with proper data and reused by each function call (Default: None) Returns ------- DGLGraph A sampled subgraph containing only the sampled neighboring edges. Notes ----- If :attr:`copy_ndata` or :attr:`copy_edata` is True, same tensors are used as the node or edge features of the original graph and the new graph. As a result, users should avoid performing in-place operations on the node features of the new graph to avoid feature corruption. """ if not g.is_pinned(): frontier = _sample_neighbors( g, nodes, fanout, edge_dir=edge_dir, prob=prob, replace=replace, copy_ndata=copy_ndata, copy_edata=copy_edata, exclude_edges=exclude_edges, fused=True, mapping=mapping, ) else: frontier = _sample_neighbors( g, nodes, fanout, edge_dir=edge_dir, prob=prob, replace=replace, copy_ndata=copy_ndata, copy_edata=copy_edata, fused=True, mapping=mapping, ) if exclude_edges is not None: eid_excluder = EidExcluder(exclude_edges) frontier = eid_excluder(frontier) return frontier def _sample_neighbors( g, nodes, fanout, edge_dir="in", prob=None, replace=False, copy_ndata=True, copy_edata=True, _dist_training=False, exclude_edges=None, fused=False, mapping=None, ): if not isinstance(nodes, dict): if len(g.ntypes) > 1: raise DGLError( "Must specify node type when the graph is not homogeneous." ) nodes = {g.ntypes[0]: nodes} nodes = utils.prepare_tensor_dict(g, nodes, "nodes") if len(nodes) == 0: raise ValueError( "Got an empty dictionary in the nodes argument. " "Please pass in a dictionary with empty tensors as values instead." ) device = utils.context_of(nodes) ctx = utils.to_dgl_context(device) nodes_all_types = [] for ntype in g.ntypes: if ntype in nodes: nodes_all_types.append(F.to_dgl_nd(nodes[ntype])) else: nodes_all_types.append(nd.array([], ctx=ctx)) if isinstance(fanout, nd.NDArray): fanout_array = fanout else: if not isinstance(fanout, dict): fanout_array = [int(fanout)] * len(g.etypes) else: if len(fanout) != len(g.etypes): raise DGLError( "Fan-out must be specified for each edge type " "if a dict is provided." ) fanout_array = [None] * len(g.etypes) for etype, value in fanout.items(): fanout_array[g.get_etype_id(etype)] = value fanout_array = F.to_dgl_nd(F.tensor(fanout_array, dtype=F.int64)) prob_arrays = _prepare_edge_arrays(g, prob) excluded_edges_all_t = [] if exclude_edges is not None: if not isinstance(exclude_edges, dict): if len(g.etypes) > 1: raise DGLError( "Must specify etype when the graph is not homogeneous." ) exclude_edges = {g.canonical_etypes[0]: exclude_edges} exclude_edges = utils.prepare_tensor_dict(g, exclude_edges, "edges") for etype in g.canonical_etypes: if etype in exclude_edges: excluded_edges_all_t.append(F.to_dgl_nd(exclude_edges[etype])) else: excluded_edges_all_t.append(nd.array([], ctx=ctx)) if fused: if _dist_training: raise DGLError( "distributed training not supported in fused sampling" ) cpu = F.device_type(g.device) == "cpu" if isinstance(nodes, dict): for ntype in list(nodes.keys()): if not cpu: break cpu = cpu and F.device_type(nodes[ntype].device) == "cpu" else: cpu = cpu and F.device_type(nodes.device) == "cpu" if not cpu or F.backend_name != "pytorch": raise DGLError( "Only PyTorch backend and cpu is supported in fused sampling" ) if mapping is None: mapping = {} mapping_name = "__mapping" + str(os.getpid()) if mapping_name not in mapping.keys(): mapping[mapping_name] = [ torch.LongTensor(g.num_nodes(ntype)).fill_(-1) for ntype in g.ntypes ] subgidx, induced_nodes, induced_edges = _CAPI_DGLSampleNeighborsFused( g._graph, nodes_all_types, [F.to_dgl_nd(m) for m in mapping[mapping_name]], fanout_array, edge_dir, prob_arrays, excluded_edges_all_t, replace, ) for mapping_vector, src_nodes in zip( mapping[mapping_name], induced_nodes ): mapping_vector[F.from_dgl_nd(src_nodes).type(F.int64)] = -1 new_ntypes = (g.ntypes, g.ntypes) ret = DGLBlock(subgidx, new_ntypes, g.etypes) assert ret.is_unibipartite else: subgidx = _CAPI_DGLSampleNeighbors( g._graph, nodes_all_types, fanout_array, edge_dir, prob_arrays, excluded_edges_all_t, replace, ) ret = DGLGraph(subgidx.graph, g.ntypes, g.etypes) induced_edges = subgidx.induced_edges # handle features # (TODO) (BarclayII) DGL distributed fails with bus error, freezes, or other # incomprehensible errors with lazy feature copy. # So in distributed training context, we fall back to old behavior where we # only set the edge IDs. if not _dist_training: if copy_ndata: if fused: src_node_ids = [F.from_dgl_nd(src) for src in induced_nodes] dst_node_ids = [ utils.toindex( nodes.get(ntype, []), g._idtype_str ).tousertensor(ctx=F.to_backend_ctx(g._graph.ctx)) for ntype in g.ntypes ] node_frames = utils.extract_node_subframes_for_block( g, src_node_ids, dst_node_ids ) utils.set_new_frames(ret, node_frames=node_frames) else: node_frames = utils.extract_node_subframes(g, device) utils.set_new_frames(ret, node_frames=node_frames) if copy_edata: if fused: edge_ids = [F.from_dgl_nd(eid) for eid in induced_edges] edge_frames = utils.extract_edge_subframes(g, edge_ids) utils.set_new_frames(ret, edge_frames=edge_frames) else: edge_frames = utils.extract_edge_subframes(g, induced_edges) utils.set_new_frames(ret, edge_frames=edge_frames) else: for i, etype in enumerate(ret.canonical_etypes): ret.edges[etype].data[EID] = induced_edges[i] return ret DGLGraph.sample_neighbors = utils.alias_func(sample_neighbors) DGLGraph.sample_neighbors_fused = utils.alias_func(sample_neighbors_fused) def sample_neighbors_biased( g, nodes, fanout, bias, edge_dir="in", tag_offset_name="_TAG_OFFSET", replace=False, copy_ndata=True, copy_edata=True, output_device=None, ): r"""Sample neighboring edges of the given nodes and return the induced subgraph, where each neighbor's probability to be picked is determined by its tag. For each node, a number of inbound (or outbound when ``edge_dir == 'out'``) edges will be randomly chosen. The graph returned will then contain all the nodes in the original graph, but only the sampled edges. This version of neighbor sampling can support the scenario where adjacent nodes with different types have different sampling probability. Each node is assigned an integer (called a *tag*) which represents its type. Tag is an analogue of node type under the framework of homogeneous graphs. Nodes with the same tag share the same probability. For example, assume a node has :math:`N+M` neighbors, and :math:`N` of them have tag 0 while :math:`M` of them have tag 1. Assume a node of tag 0 has an unnormalized probability :math:`p` to be picked while a node of tag 1 has :math:`q`. This function first chooses a tag according to the unnormalized probability distribution :math:`\frac{P(tag=0)}{P(tag=1)}=\frac{Np}{Mq}`, and then run a uniform sampling to get a node of the chosen tag. In order to make sampling more efficient, the input graph must have its CSC matrix (or CSR matrix if ``edge_dir='out'``) sorted according to the tag. The API :func:`~dgl.sort_csc_by_tag` and :func:`~dgl.sort_csr_by_tag` are designed for this purpose, which will internally reorder the neighbors by tags so that neighbors of the same tags are stored in a consecutive range. The two APIs will also store the offsets of these ranges in a node feature with :attr:`tag_offset_name` as its name. **Please make sure that the CSR (or CSC) matrix of the graph has been sorted before calling this function.** This function itself will not check whether the input graph is sorted. Note that the input :attr:`tag_offset_name` should be consistent with that in the sorting function. Only homogeneous or bipartite graphs are supported. For bipartite graphs, the tag offsets of the source nodes when ``edge_dir='in'`` (or the destination nodes when ``edge_dir='out'``) will be used in sampling. Node/edge features are not preserved. The original IDs of the sampled edges are stored as the ``dgl.EID`` feature in the returned graph. Parameters ---------- g : DGLGraph The graph. Must be homogeneous or bipartite (only one edge type). Must be on CPU. nodes : tensor or list Node IDs to sample neighbors from. fanout : int The number of edges to be sampled for each node on each edge type. If -1 is given, all the neighboring edges with non-zero probability will be selected. bias : tensor or list The (unnormalized) probabilities associated with each tag. Its length should be equal to the number of tags. Entries of this array must be non-negative floats. Otherwise, the result will be undefined. edge_dir : str, optional Determines whether to sample inbound or outbound edges. Can take either ``in`` for inbound edges or ``out`` for outbound edges. tag_offset_name : str, optional The name of the node feature storing tag offsets. (Default: "_TAG_OFFSET") replace : bool, optional If True, sample with replacement. copy_ndata: bool, optional If True, the node features of the new graph are copied from the original graph. If False, the new graph will not have any node features. (Default: True) copy_edata: bool, optional If True, the edge features of the new graph are copied from the original graph. If False, the new graph will not have any edge features. (Default: True) output_device : Framework-specific device context object, optional The output device. Default is the same as the input graph. Returns ------- DGLGraph A sampled subgraph containing only the sampled neighboring edges. It is on CPU. Notes ----- If :attr:`copy_ndata` or :attr:`copy_edata` is True, same tensors are used as the node or edge features of the original graph and the new graph. As a result, users should avoid performing in-place operations on the node features of the new graph to avoid feature corruption. See Also -------- dgl.sort_csc_by_tag dgl.sort_csr_by_tag Examples -------- Assume that you have the following graph >>> g = dgl.graph(([0, 0, 1, 1, 2, 2], [1, 2, 0, 1, 2, 0])) And the tags >>> tag = torch.IntTensor([0, 0, 1]) Sort the graph (necessary!) >>> g_sorted = dgl.transforms.sort_csr_by_tag(g, tag) >>> g_sorted.ndata['_TAG_OFFSET'] tensor([[0, 1, 2], [0, 2, 2], [0, 1, 2]]) Set the probability of each tag: >>> bias = torch.tensor([1.0, 0.001]) >>> # node 2 is almost impossible to be sampled because it has tag 1. To sample one out bound edge for node 0 and node 2: >>> sg = dgl.sampling.sample_neighbors_biased(g_sorted, [0, 2], 1, bias, edge_dir='out') >>> sg.edges(order='eid') (tensor([0, 2]), tensor([1, 0])) >>> sg.edata[dgl.EID] tensor([0, 5]) With ``fanout`` greater than the number of actual neighbors and without replacement, DGL will take all neighbors instead: >>> sg = dgl.sampling.sample_neighbors_biased(g_sorted, [0, 2], 3, bias, edge_dir='out') >>> sg.edges(order='eid') (tensor([0, 0, 2, 2]), tensor([1, 2, 0, 2])) """ if isinstance(nodes, list): nodes = F.tensor(nodes) if isinstance(bias, list): bias = F.tensor(bias) device = utils.context_of(nodes) nodes_array = F.to_dgl_nd(nodes) bias_array = F.to_dgl_nd(bias) if edge_dir == "in": tag_offset_array = F.to_dgl_nd(g.dstdata[tag_offset_name]) elif edge_dir == "out": tag_offset_array = F.to_dgl_nd(g.srcdata[tag_offset_name]) else: raise DGLError("edge_dir can only be 'in' or 'out'") subgidx = _CAPI_DGLSampleNeighborsBiased( g._graph, nodes_array, fanout, bias_array, tag_offset_array, edge_dir, replace, ) induced_edges = subgidx.induced_edges ret = DGLGraph(subgidx.graph, g.ntypes, g.etypes) if copy_ndata: node_frames = utils.extract_node_subframes(g, device) utils.set_new_frames(ret, node_frames=node_frames) if copy_edata: edge_frames = utils.extract_edge_subframes(g, induced_edges) utils.set_new_frames(ret, edge_frames=edge_frames) ret.edata[EID] = induced_edges[0] return ret if output_device is None else ret.to(output_device) DGLGraph.sample_neighbors_biased = utils.alias_func(sample_neighbors_biased) def select_topk( g, k, weight, nodes=None, edge_dir="in", ascending=False, copy_ndata=True, copy_edata=True, output_device=None, ): """Select the neighboring edges with k-largest (or k-smallest) weights of the given nodes and return the induced subgraph. For each node, a number of inbound (or outbound when ``edge_dir == 'out'``) edges with the largest (or smallest when ``ascending == True``) weights will be chosen. The graph returned will then contain all the nodes in the original graph, but only the sampled edges. Node/edge features are not preserved. The original IDs of the sampled edges are stored as the `dgl.EID` feature in the returned graph. Parameters ---------- g : DGLGraph The graph. Must be on CPU. k : int or dict[etype, int] The number of edges to be selected for each node on each edge type. This argument can take a single int or a dictionary of edge types and ints. If a single int is given, DGL will select this number of edges for each node for every edge type. If -1 is given for a single edge type, all the neighboring edges with that edge type will be selected. weight : str Feature name of the weights associated with each edge. The feature should have only one element for each edge. The feature can be either int32/64 or float32/64. nodes : tensor or dict, optional Node IDs to sample neighbors from. This argument can take a single ID tensor or a dictionary of node types and ID tensors. If a single tensor is given, the graph must only have one type of nodes. If None, DGL will select the edges for all nodes. edge_dir : str, optional Determines whether to sample inbound or outbound edges. Can take either ``in`` for inbound edges or ``out`` for outbound edges. ascending : bool, optional If True, DGL will return edges with k-smallest weights instead of k-largest weights. copy_ndata: bool, optional If True, the node features of the new graph are copied from the original graph. If False, the new graph will not have any node features. (Default: True) copy_edata: bool, optional If True, the edge features of the new graph are copied from the original graph. If False, the new graph will not have any edge features. (Default: True) output_device : Framework-specific device context object, optional The output device. Default is the same as the input graph. Returns ------- DGLGraph A sampled subgraph containing only the sampled neighboring edges. It is on CPU. Notes ----- If :attr:`copy_ndata` or :attr:`copy_edata` is True, same tensors are used as the node or edge features of the original graph and the new graph. As a result, users should avoid performing in-place operations on the node features of the new graph to avoid feature corruption. Examples -------- >>> g = dgl.graph(([0, 0, 1, 1, 2, 2], [1, 2, 0, 1, 2, 0])) >>> g.edata['weight'] = torch.FloatTensor([0, 1, 0, 1, 0, 1]) >>> sg = dgl.sampling.select_topk(g, 1, 'weight') >>> sg.edges(order='eid') (tensor([2, 1, 0]), tensor([0, 1, 2])) """ # Rectify nodes to a dictionary if nodes is None: nodes = { ntype: F.astype(F.arange(0, g.num_nodes(ntype)), g.idtype) for ntype in g.ntypes } elif not isinstance(nodes, dict): if len(g.ntypes) > 1: raise DGLError( "Must specify node type when the graph is not homogeneous." ) nodes = {g.ntypes[0]: nodes} assert g.device == F.cpu(), "Graph must be on CPU." # Parse nodes into a list of NDArrays. nodes = utils.prepare_tensor_dict(g, nodes, "nodes") device = utils.context_of(nodes) nodes_all_types = [] for ntype in g.ntypes: if ntype in nodes: nodes_all_types.append(F.to_dgl_nd(nodes[ntype])) else: nodes_all_types.append(nd.array([], ctx=nd.cpu())) if not isinstance(k, dict): k_array = [int(k)] * len(g.etypes) else: if len(k) != len(g.etypes): raise DGLError( "K value must be specified for each edge type " "if a dict is provided." ) k_array = [None] * len(g.etypes) for etype, value in k.items(): k_array[g.get_etype_id(etype)] = value k_array = F.to_dgl_nd(F.tensor(k_array, dtype=F.int64)) weight_arrays = [] for etype in g.canonical_etypes: if weight in g.edges[etype].data: weight_arrays.append(F.to_dgl_nd(g.edges[etype].data[weight])) else: raise DGLError( 'Edge weights "{}" do not exist for relation graph "{}".'.format( weight, etype ) ) subgidx = _CAPI_DGLSampleNeighborsTopk( g._graph, nodes_all_types, k_array, edge_dir, weight_arrays, bool(ascending), ) induced_edges = subgidx.induced_edges ret = DGLGraph(subgidx.graph, g.ntypes, g.etypes) # handle features if copy_ndata: node_frames = utils.extract_node_subframes(g, device) utils.set_new_frames(ret, node_frames=node_frames) if copy_edata: edge_frames = utils.extract_edge_subframes(g, induced_edges) utils.set_new_frames(ret, edge_frames=edge_frames) return ret if output_device is None else ret.to(output_device) DGLGraph.select_topk = utils.alias_func(select_topk) _init_api("dgl.sampling.neighbor", __name__) ================================================ FILE: python/dgl/sampling/node2vec_randomwalk.py ================================================ """Node2vec random walk""" from .. import backend as F, ndarray as nd, utils from .._ffi.function import _init_api # pylint: disable=invalid-name __all__ = ["node2vec_random_walk"] def node2vec_random_walk( g, nodes, p, q, walk_length, prob=None, return_eids=False ): """ Generate random walk traces from an array of starting nodes based on the node2vec model. Paper: `node2vec: Scalable Feature Learning for Networks `__. The returned traces all have length ``walk_length + 1``, where the first node is the starting node itself. Note that if a random walk stops in advance, DGL pads the trace with -1 to have the same length. Parameters ---------- g : DGLGraph The graph. Must be on CPU. Note that node2vec only support homogeneous graph. nodes : Tensor Node ID tensor from which the random walk traces starts. The tensor must be on CPU, and must have the same dtype as the ID type of the graph. p: float Likelihood of immediately revisiting a node in the walk. q: float Control parameter to interpolate between breadth-first strategy and depth-first strategy. walk_length: int Length of random walks. prob : str, optional The name of the edge feature tensor on the graph storing the (unnormalized) probabilities associated with each edge for choosing the next node. The feature tensor must be non-negative and the sum of the probabilities must be positive for the outbound edges of all nodes (although they don't have to sum up to one). The result will be undefined otherwise. If omitted, DGL assumes that the neighbors are picked uniformly. return_eids : bool, optional If True, additionally return the edge IDs traversed. Default: False. Returns ------- traces : Tensor A 2-dimensional node ID tensor with shape ``(num_seeds, walk_length + 1)``. eids : Tensor, optional A 2-dimensional edge ID tensor with shape ``(num_seeds, length)``. Only returned if :attr:`return_eids` is True. Examples -------- >>> g1 = dgl.graph(([0, 1, 1, 2, 3], [1, 2, 3, 0, 0])) >>> dgl.sampling.node2vec_random_walk(g1, [0, 1, 2, 0], 1, 1, walk_length=4) tensor([[0, 1, 3, 0, 1], [1, 2, 0, 1, 3], [2, 0, 1, 3, 0], [0, 1, 2, 0, 1]]) >>> dgl.sampling.node2vec_random_walk(g1, [0, 1, 2, 0], 1, 1, walk_length=4, return_eids=True) (tensor([[0, 1, 3, 0, 1], [1, 2, 0, 1, 2], [2, 0, 1, 2, 0], [0, 1, 2, 0, 1]]), tensor([[0, 2, 4, 0], [1, 3, 0, 1], [3, 0, 1, 3], [0, 1, 3, 0]])) """ assert g.device == F.cpu(), "Graph must be on CPU." gidx = g._graph nodes = F.to_dgl_nd(utils.prepare_tensor(g, nodes, "nodes")) if prob is None: prob_nd = nd.array([], ctx=nodes.ctx) else: prob_nd = F.to_dgl_nd(g.edata[prob]) traces, eids = _CAPI_DGLSamplingNode2vec( gidx, nodes, p, q, walk_length, prob_nd ) traces = F.from_dgl_nd(traces) eids = F.from_dgl_nd(eids) return (traces, eids) if return_eids else traces _init_api("dgl.sampling.randomwalks", __name__) ================================================ FILE: python/dgl/sampling/pinsage.py ================================================ """PinSAGE sampler & related functions and classes""" import numpy as np from .. import backend as F, convert, utils from .._ffi.function import _init_api from .randomwalks import random_walk def _select_pinsage_neighbors(src, dst, num_samples_per_node, k): """Determine the neighbors for PinSAGE algorithm from the given random walk traces. This is fusing ``to_simple()``, ``select_topk()``, and counting the number of occurrences together. """ src = F.to_dgl_nd(src) dst = F.to_dgl_nd(dst) src, dst, counts = _CAPI_DGLSamplingSelectPinSageNeighbors( src, dst, num_samples_per_node, k ) src = F.from_dgl_nd(src) dst = F.from_dgl_nd(dst) counts = F.from_dgl_nd(counts) return (src, dst, counts) class RandomWalkNeighborSampler(object): """PinSage-like neighbor sampler extended to any heterogeneous graphs. Given a heterogeneous graph and a list of nodes, this callable will generate a homogeneous graph where the neighbors of each given node are the most commonly visited nodes of the same type by multiple random walks starting from that given node. Each random walk consists of multiple metapath-based traversals, with a probability of termination after each traversal. The edges of the returned homogeneous graph will connect to the given nodes from their most commonly visited nodes, with a feature indicating the number of visits. The metapath must have the same beginning and ending node type to make the algorithm work. This is a generalization of PinSAGE sampler which only works on bidirectional bipartite graphs. UVA and GPU sampling is supported for this sampler. Refer to :ref:`guide-minibatch-gpu-sampling` for more details. Parameters ---------- G : DGLGraph The graph. num_traversals : int The maximum number of metapath-based traversals for a single random walk. Usually considered a hyperparameter. termination_prob : float Termination probability after each metapath-based traversal. Usually considered a hyperparameter. num_random_walks : int Number of random walks to try for each given node. Usually considered a hyperparameter. num_neighbors : int Number of neighbors (or most commonly visited nodes) to select for each given node. metapath : list[str] or list[tuple[str, str, str]], optional The metapath. If not given, DGL assumes that the graph is homogeneous and the metapath consists of one step over the single edge type. weight_column : str, default "weights" The name of the edge feature to be stored on the returned graph with the number of visits. Examples -------- See examples in :any:`PinSAGESampler`. """ def __init__( self, G, num_traversals, termination_prob, num_random_walks, num_neighbors, metapath=None, weight_column="weights", ): self.G = G self.weight_column = weight_column self.num_random_walks = num_random_walks self.num_neighbors = num_neighbors self.num_traversals = num_traversals if metapath is None: if len(G.ntypes) > 1 or len(G.etypes) > 1: raise ValueError( "Metapath must be specified if the graph is homogeneous." ) metapath = [G.canonical_etypes[0]] start_ntype = G.to_canonical_etype(metapath[0])[0] end_ntype = G.to_canonical_etype(metapath[-1])[-1] if start_ntype != end_ntype: raise ValueError( "The metapath must start and end at the same node type." ) self.ntype = start_ntype self.metapath_hops = len(metapath) self.metapath = metapath self.full_metapath = metapath * num_traversals restart_prob = np.zeros(self.metapath_hops * num_traversals) restart_prob[ self.metapath_hops :: self.metapath_hops ] = termination_prob restart_prob = F.tensor(restart_prob, dtype=F.float32) self.restart_prob = F.copy_to(restart_prob, G.device) # pylint: disable=no-member def __call__(self, seed_nodes): """ Parameters ---------- seed_nodes : Tensor A tensor of given node IDs of node type ``ntype`` to generate neighbors from. The node type ``ntype`` is the beginning and ending node type of the given metapath. It must be on the same device as the graph and have the same dtype as the ID type of the graph. Returns ------- g : DGLGraph A homogeneous graph constructed by selecting neighbors for each given node according to the algorithm above. """ seed_nodes = utils.prepare_tensor(self.G, seed_nodes, "seed_nodes") self.restart_prob = F.copy_to(self.restart_prob, F.context(seed_nodes)) seed_nodes = F.repeat(seed_nodes, self.num_random_walks, 0) paths, _ = random_walk( self.G, seed_nodes, metapath=self.full_metapath, restart_prob=self.restart_prob, ) src = F.reshape( paths[:, self.metapath_hops :: self.metapath_hops], (-1,) ) dst = F.repeat(paths[:, 0], self.num_traversals, 0) src, dst, counts = _select_pinsage_neighbors( src, dst, (self.num_random_walks * self.num_traversals), self.num_neighbors, ) neighbor_graph = convert.heterograph( {(self.ntype, "_E", self.ntype): (src, dst)}, {self.ntype: self.G.num_nodes(self.ntype)}, ) neighbor_graph.edata[self.weight_column] = counts return neighbor_graph class PinSAGESampler(RandomWalkNeighborSampler): """PinSAGE-like neighbor sampler. This callable works on a bidirectional bipartite graph with edge types ``(ntype, fwtype, other_type)`` and ``(other_type, bwtype, ntype)`` (where ``ntype``, ``fwtype``, ``bwtype`` and ``other_type`` could be arbitrary type names). It will generate a homogeneous graph of node type ``ntype`` where the neighbors of each given node are the most commonly visited nodes of the same type by multiple random walks starting from that given node. Each random walk consists of multiple metapath-based traversals, with a probability of termination after each traversal. The metapath is always ``[fwtype, bwtype]``, walking from node type ``ntype`` to node type ``other_type`` then back to ``ntype``. The edges of the returned homogeneous graph will connect to the given nodes from their most commonly visited nodes, with a feature indicating the number of visits. UVA and GPU sampling is supported for this sampler. Refer to :ref:`guide-minibatch-gpu-sampling` for more details. Parameters ---------- G : DGLGraph The bidirectional bipartite graph. The graph should only have two node types: ``ntype`` and ``other_type``. The graph should only have two edge types, one connecting from ``ntype`` to ``other_type``, and another connecting from ``other_type`` to ``ntype``. ntype : str The node type for which the graph would be constructed on. other_type : str The other node type. num_traversals : int The maximum number of metapath-based traversals for a single random walk. Usually considered a hyperparameter. termination_prob : int Termination probability after each metapath-based traversal. Usually considered a hyperparameter. num_random_walks : int Number of random walks to try for each given node. Usually considered a hyperparameter. num_neighbors : int Number of neighbors (or most commonly visited nodes) to select for each given node. weight_column : str, default "weights" The name of the edge feature to be stored on the returned graph with the number of visits. Examples -------- Generate a random bidirectional bipartite graph with 3000 "A" nodes and 5000 "B" nodes. >>> g = scipy.sparse.random(3000, 5000, 0.003) >>> G = dgl.heterograph({ ... ('A', 'AB', 'B'): g.nonzero(), ... ('B', 'BA', 'A'): g.T.nonzero()}) Then we create a PinSage neighbor sampler that samples a graph of node type "A". Each node would have (a maximum of) 10 neighbors. >>> sampler = dgl.sampling.PinSAGESampler(G, 'A', 'B', 3, 0.5, 200, 10) This is how we select the neighbors for node #0, #1 and #2 of type "A" according to PinSAGE algorithm: >>> seeds = torch.LongTensor([0, 1, 2]) >>> frontier = sampler(seeds) >>> frontier.all_edges(form='uv') (tensor([ 230, 0, 802, 47, 50, 1639, 1533, 406, 2110, 2687, 2408, 2823, 0, 972, 1230, 1658, 2373, 1289, 1745, 2918, 1818, 1951, 1191, 1089, 1282, 566, 2541, 1505, 1022, 812]), tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])) For an end-to-end example of PinSAGE model, including sampling on multiple layers and computing with the sampled graphs, please refer to our PinSage example in ``examples/pytorch/pinsage``. References ---------- Graph Convolutional Neural Networks for Web-Scale Recommender Systems Ying et al., 2018, https://arxiv.org/abs/1806.01973 """ def __init__( self, G, ntype, other_type, num_traversals, termination_prob, num_random_walks, num_neighbors, weight_column="weights", ): metagraph = G.metagraph() fw_etype = list(metagraph[ntype][other_type])[0] bw_etype = list(metagraph[other_type][ntype])[0] super().__init__( G, num_traversals, termination_prob, num_random_walks, num_neighbors, metapath=[fw_etype, bw_etype], weight_column=weight_column, ) _init_api("dgl.sampling.pinsage", __name__) ================================================ FILE: python/dgl/sampling/randomwalks.py ================================================ """Random walk routines """ from .. import backend as F, ndarray as nd, utils from .._ffi.function import _init_api from ..base import DGLError __all__ = ["random_walk", "pack_traces"] def random_walk( g, nodes, *, metapath=None, length=None, prob=None, restart_prob=None, return_eids=False ): """Generate random walk traces from an array of starting nodes based on the given metapath. Each starting node will have one trace generated, which 1. Start from the given node and set ``t`` to 0. 2. Pick and traverse along edge type ``metapath[t]`` from the current node. 3. If no edge can be found, halt. Otherwise, increment ``t`` and go to step 2. To generate multiple traces for a single node, you can specify the same node multiple times. The returned traces all have length ``len(metapath) + 1``, where the first node is the starting node itself. If a random walk stops in advance, DGL pads the trace with -1 to have the same length. This function supports the graph on GPU and UVA sampling. Parameters ---------- g : DGLGraph The graph. nodes : Tensor Node ID tensor from which the random walk traces starts. The tensor must have the same dtype as the ID type of the graph. The tensor must be on the same device as the graph or on the GPU when the graph is pinned (UVA sampling). metapath : list[str or tuple of str], optional Metapath, specified as a list of edge types. Mutually exclusive with :attr:`length`. If omitted, DGL assumes that ``g`` only has one node & edge type. In this case, the argument ``length`` specifies the length of random walk traces. length : int, optional Length of random walks. Mutually exclusive with :attr:`metapath`. Only used when :attr:`metapath` is None. prob : str, optional The name of the edge feature tensor on the graph storing the (unnormalized) probabilities associated with each edge for choosing the next node. The feature tensor must be non-negative and the sum of the probabilities must be positive for the outbound edges of all nodes (although they don't have to sum up to one). The result will be undefined otherwise. The feature tensor must be on the same device as the graph. If omitted, DGL assumes that the neighbors are picked uniformly. restart_prob : float or Tensor, optional Probability to terminate the current trace before each transition. If a tensor is given, :attr:`restart_prob` should be on the same device as the graph or on the GPU when the graph is pinned (UVA sampling), and have the same length as :attr:`metapath` or :attr:`length`. return_eids : bool, optional If True, additionally return the edge IDs traversed. Default: False. Returns ------- traces : Tensor A 2-dimensional node ID tensor with shape ``(num_seeds, len(metapath) + 1)`` or ``(num_seeds, length + 1)`` if :attr:`metapath` is None. eids : Tensor, optional A 2-dimensional edge ID tensor with shape ``(num_seeds, len(metapath))`` or ``(num_seeds, length)`` if :attr:`metapath` is None. Only returned if :attr:`return_eids` is True. types : Tensor A 1-dimensional node type ID tensor with shape ``(len(metapath) + 1)`` or ``(length + 1)``. The type IDs match the ones in the original graph ``g``. Examples -------- The following creates a homogeneous graph: >>> g1 = dgl.graph(([0, 1, 1, 2, 3], [1, 2, 3, 0, 0])) Normal random walk: >>> dgl.sampling.random_walk(g1, [0, 1, 2, 0], length=4) (tensor([[0, 1, 2, 0, 1], [1, 3, 0, 1, 3], [2, 0, 1, 3, 0], [0, 1, 2, 0, 1]]), tensor([0, 0, 0, 0, 0])) Or returning edge IDs: >>> dgl.sampling.random_walk(g1, [0, 1, 2, 0], length=4, return_eids=True) (tensor([[0, 1, 2, 0, 1], [1, 3, 0, 1, 2], [2, 0, 1, 3, 0], [0, 1, 3, 0, 1]]), tensor([[0, 1, 3, 0], [2, 4, 0, 1], [3, 0, 2, 4], [0, 2, 4, 0]]), tensor([0, 0, 0, 0, 0])) The first tensor indicates the random walk path for each seed node. The j-th element in the second tensor indicates the node type ID of the j-th node in every path. In this case, it is returning all 0. Random walk with restart: >>> dgl.sampling.random_walk_with_restart(g1, [0, 1, 2, 0], length=4, restart_prob=0.5) (tensor([[ 0, -1, -1, -1, -1], [ 1, 3, 0, -1, -1], [ 2, -1, -1, -1, -1], [ 0, -1, -1, -1, -1]]), tensor([0, 0, 0, 0, 0])) Non-uniform random walk: >>> g1.edata['p'] = torch.FloatTensor([1, 0, 1, 1, 1]) # disallow going from 1 to 2 >>> dgl.sampling.random_walk(g1, [0, 1, 2, 0], length=4, prob='p') (tensor([[0, 1, 3, 0, 1], [1, 3, 0, 1, 3], [2, 0, 1, 3, 0], [0, 1, 3, 0, 1]]), tensor([0, 0, 0, 0, 0])) Metapath-based random walk: >>> g2 = dgl.heterograph({ ... ('user', 'follow', 'user'): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0]), ... ('user', 'view', 'item'): ([0, 0, 1, 2, 3, 3], [0, 1, 1, 2, 2, 1]), ... ('item', 'viewed-by', 'user'): ([0, 1, 1, 2, 2, 1], [0, 0, 1, 2, 3, 3]) >>> dgl.sampling.random_walk( ... g2, [0, 1, 2, 0], metapath=['follow', 'view', 'viewed-by'] * 2) (tensor([[0, 1, 1, 1, 2, 2, 3], [1, 3, 1, 1, 2, 2, 2], [2, 0, 1, 1, 3, 1, 1], [0, 1, 1, 0, 1, 1, 3]]), tensor([0, 0, 1, 0, 0, 1, 0])) Metapath-based random walk, with restarts only on items (i.e. after traversing a "view" relationship): >>> dgl.sampling.random_walk( ... g2, [0, 1, 2, 0], metapath=['follow', 'view', 'viewed-by'] * 2, ... restart_prob=torch.FloatTensor([0, 0.5, 0, 0, 0.5, 0])) (tensor([[ 0, 1, -1, -1, -1, -1, -1], [ 1, 3, 1, 0, 1, 1, 0], [ 2, 0, 1, 1, 3, 2, 2], [ 0, 1, 1, 3, 0, 0, 0]]), tensor([0, 0, 1, 0, 0, 1, 0])) """ n_etypes = len(g.canonical_etypes) n_ntypes = len(g.ntypes) if metapath is None: if n_etypes > 1 or n_ntypes > 1: raise DGLError( "metapath not specified and the graph is not homogeneous." ) if length is None: raise ValueError( "Please specify either the metapath or the random walk length." ) metapath = [0] * length else: metapath = [g.get_etype_id(etype) for etype in metapath] gidx = g._graph nodes = utils.prepare_tensor(g, nodes, "nodes") nodes = F.to_dgl_nd(nodes) # (Xin) Since metapath array is created by us, safe to skip the check # and keep it on CPU to make max_nodes sanity check easier. metapath = F.to_dgl_nd(F.astype(F.tensor(metapath), g.idtype)) # Load the probability tensor from the edge frames ctx = utils.to_dgl_context(g.device) if prob is None: p_nd = [nd.array([], ctx=ctx) for _ in g.canonical_etypes] else: p_nd = [] for etype in g.canonical_etypes: if prob in g.edges[etype].data: prob_nd = F.to_dgl_nd(g.edges[etype].data[prob]) else: prob_nd = nd.array([], ctx=ctx) p_nd.append(prob_nd) # Actual random walk if restart_prob is None: traces, eids, types = _CAPI_DGLSamplingRandomWalk( gidx, nodes, metapath, p_nd ) elif F.is_tensor(restart_prob): restart_prob = F.to_dgl_nd(restart_prob) traces, eids, types = _CAPI_DGLSamplingRandomWalkWithStepwiseRestart( gidx, nodes, metapath, p_nd, restart_prob ) elif isinstance(restart_prob, float): traces, eids, types = _CAPI_DGLSamplingRandomWalkWithRestart( gidx, nodes, metapath, p_nd, restart_prob ) else: raise TypeError("restart_prob should be float or Tensor.") traces = F.from_dgl_nd(traces) types = F.from_dgl_nd(types) eids = F.from_dgl_nd(eids) return (traces, eids, types) if return_eids else (traces, types) def pack_traces(traces, types): """Pack the padded traces returned by ``random_walk()`` into a concatenated array. The padding values (-1) are removed, and the length and offset of each trace is returned along with the concatenated node ID and node type arrays. Parameters ---------- traces : Tensor A 2-dimensional node ID tensor. Must be on CPU and either ``int32`` or ``int64``. types : Tensor A 1-dimensional node type ID tensor. Must be on CPU and either ``int32`` or ``int64``. Returns ------- concat_vids : Tensor An array of all node IDs concatenated and padding values removed. concat_types : Tensor An array of node types corresponding for each node in ``concat_vids``. Has the same length as ``concat_vids``. lengths : Tensor Length of each trace in the original traces tensor. offsets : Tensor Offset of each trace in the originial traces tensor in the new concatenated tensor. Notes ----- The returned tensors are on CPU. Examples -------- >>> g2 = dgl.heterograph({ ... ('user', 'follow', 'user'): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0]), ... ('user', 'view', 'item'): ([0, 0, 1, 2, 3, 3], [0, 1, 1, 2, 2, 1]), ... ('item', 'viewed-by', 'user'): ([0, 1, 1, 2, 2, 1], [0, 0, 1, 2, 3, 3]) >>> traces, types = dgl.sampling.random_walk( ... g2, [0, 0], metapath=['follow', 'view', 'viewed-by'] * 2, ... restart_prob=torch.FloatTensor([0, 0.5, 0, 0, 0.5, 0])) >>> traces, types (tensor([[ 0, 1, -1, -1, -1, -1, -1], [ 0, 1, 1, 3, 0, 0, 0]]), tensor([0, 0, 1, 0, 0, 1, 0])) >>> concat_vids, concat_types, lengths, offsets = dgl.sampling.pack_traces(traces, types) >>> concat_vids tensor([0, 1, 0, 1, 1, 3, 0, 0, 0]) >>> concat_types tensor([0, 0, 0, 0, 1, 0, 0, 1, 0]) >>> lengths tensor([2, 7]) >>> offsets tensor([0, 2])) The first tensor ``concat_vids`` is the concatenation of all paths, i.e. flattened array of ``traces``, excluding all padding values (-1). The second tensor ``concat_types`` stands for the node type IDs of all corresponding nodes in the first tensor. The third and fourth tensor indicates the length and the offset of each path. With these tensors it is easy to obtain the i-th random walk path with: >>> vids = concat_vids.split(lengths.tolist()) >>> vtypes = concat_vtypes.split(lengths.tolist()) >>> vids[1], vtypes[1] (tensor([0, 1, 1, 3, 0, 0, 0]), tensor([0, 0, 1, 0, 0, 1, 0])) """ assert ( F.is_tensor(traces) and F.context(traces) == F.cpu() ), "traces must be a CPU tensor" assert ( F.is_tensor(types) and F.context(types) == F.cpu() ), "types must be a CPU tensor" traces = F.to_dgl_nd(traces) types = F.to_dgl_nd(types) concat_vids, concat_types, lengths, offsets = _CAPI_DGLSamplingPackTraces( traces, types ) concat_vids = F.from_dgl_nd(concat_vids) concat_types = F.from_dgl_nd(concat_types) lengths = F.from_dgl_nd(lengths) offsets = F.from_dgl_nd(offsets) return concat_vids, concat_types, lengths, offsets _init_api("dgl.sampling.randomwalks", __name__) ================================================ FILE: python/dgl/sampling/utils.py ================================================ """Sampling utilities""" from collections.abc import Mapping import numpy as np from .. import backend as F, transforms, utils from ..base import EID from ..utils import recursive_apply, recursive_apply_pair def _locate_eids_to_exclude(frontier_parent_eids, exclude_eids): """Find the edges whose IDs in parent graph appeared in exclude_eids. Note that both arguments are numpy arrays or numpy dicts. """ if not isinstance(frontier_parent_eids, Mapping): return np.isin(frontier_parent_eids, exclude_eids).nonzero()[0] result = {} for k, v in frontier_parent_eids.items(): if k in exclude_eids: result[k] = np.isin(v, exclude_eids[k]).nonzero()[0] return recursive_apply(result, F.zerocopy_from_numpy) class EidExcluder(object): """Class that finds the edges whose IDs in parent graph appeared in exclude_eids. The edge IDs can be both CPU and GPU tensors. """ def __init__(self, exclude_eids): device = None if isinstance(exclude_eids, Mapping): for _, v in exclude_eids.items(): if device is None: device = F.context(v) break else: device = F.context(exclude_eids) self._exclude_eids = None self._filter = None if device == F.cpu(): # TODO(nv-dlasalle): Once Filter is implemented for the CPU, we # should just use that irregardless of the device. self._exclude_eids = ( recursive_apply(exclude_eids, F.zerocopy_to_numpy) if exclude_eids is not None else None ) else: self._filter = recursive_apply(exclude_eids, utils.Filter) def _find_indices(self, parent_eids): """Find the set of edge indices to remove.""" if self._exclude_eids is not None: parent_eids_np = recursive_apply(parent_eids, F.zerocopy_to_numpy) return _locate_eids_to_exclude(parent_eids_np, self._exclude_eids) else: assert self._filter is not None func = lambda x, y: x.find_included_indices(y) return recursive_apply_pair(self._filter, parent_eids, func) def __call__(self, frontier, weights=None): parent_eids = frontier.edata[EID] located_eids = self._find_indices(parent_eids) if not isinstance(located_eids, Mapping): # (BarclayII) If frontier already has a EID field and located_eids is empty, # the returned graph will keep EID intact. Otherwise, EID will change # to the mapping from the new graph to the old frontier. # So we need to test if located_eids is empty, and do the remapping ourselves. if len(located_eids) > 0: frontier = transforms.remove_edges( frontier, located_eids, store_ids=True ) if ( weights is not None and weights[0].shape[0] == frontier.num_edges() ): weights[0] = F.gather_row(weights[0], frontier.edata[EID]) frontier.edata[EID] = F.gather_row( parent_eids, frontier.edata[EID] ) else: # (BarclayII) remove_edges only accepts removing one type of edges, # so I need to keep track of the edge IDs left one by one. new_eids = parent_eids.copy() for i, (k, v) in enumerate(located_eids.items()): if len(v) > 0: frontier = transforms.remove_edges( frontier, v, etype=k, store_ids=True ) new_eids[k] = F.gather_row( parent_eids[k], frontier.edges[k].data[EID] ) if weights is not None and weights[i].shape[ 0 ] == frontier.num_edges(k): weights[i] = F.gather_row( weights[i], frontier.edges[k].data[EID] ) frontier.edata[EID] = new_eids return frontier if weights is None else (frontier, weights) ================================================ FILE: python/dgl/sparse/__init__.py ================================================ """dgl sparse class.""" import os import sys import torch from .._ffi import libinfo from .broadcast import * from .elementwise_op import * from .elementwise_op_sp import * from .matmul import * from .reduction import * # pylint: disable=W0622 from .sddmm import * from .softmax import * from .sparse_matrix import * from .unary_op import * def load_dgl_sparse(): """Load DGL C++ sparse library""" version = torch.__version__.split("+", maxsplit=1)[0] if sys.platform.startswith("linux"): basename = f"libdgl_sparse_pytorch_{version}.so" elif sys.platform.startswith("darwin"): basename = f"libdgl_sparse_pytorch_{version}.dylib" elif sys.platform.startswith("win"): basename = f"dgl_sparse_pytorch_{version}.dll" else: raise NotImplementedError("Unsupported system: %s" % sys.platform) dirname = os.path.dirname(libinfo.find_lib_path()[0]) path = os.path.join(dirname, "dgl_sparse", basename) if not os.path.exists(path): raise FileNotFoundError(f"Cannot find DGL C++ sparse library at {path}") try: torch.classes.load_library(path) except Exception: # pylint: disable=W0703 raise ImportError("Cannot load DGL C++ sparse library") load_dgl_sparse() ================================================ FILE: python/dgl/sparse/broadcast.py ================================================ """DGL broadcast operator module.""" import operator import torch from .sparse_matrix import SparseMatrix, val_like def sp_broadcast_v(A: SparseMatrix, v: torch.Tensor, op: str) -> SparseMatrix: """Broadcast operator for sparse matrix and vector. :attr:`v` is broadcasted to the shape of :attr:`A` and then the operator is applied on the non-zero values of :attr:`A`. There are two cases regarding the shape of v: 1. :attr:`v` is a vector of shape ``(1, A.shape[1])`` or ``(A.shape[1])``. In this case, :attr:`v` is broadcasted on the row dimension of :attr:`A`. 2. :attr:`v` is a vector of shape ``(A.shape[0], 1)``. In this case, :attr:`v` is broadcasted on the column dimension of :attr:`A`. If ``A.val`` takes shape ``(nnz, D)``, then :attr:`v` will be broadcasted on the ``D`` dimension. Parameters ---------- A: SparseMatrix Sparse matrix v: torch.Tensor Vector op: str Operator in ["add", "sub", "mul", "truediv"] Returns ------- SparseMatrix Sparse matrix Examples -------- >>> indices = torch.tensor([[1, 0, 2], [0, 3, 2]]) >>> val = torch.tensor([10, 20, 30]) >>> A = dglsp.spmatrix(indices, val, shape=(3, 4)) >>> v = torch.tensor([1, 2, 3, 4]) >>> dglsp.sp_broadcast_v(A, v, "add") SparseMatrix(indices=tensor([[1, 0, 2], [0, 3, 2]]), values=tensor([11, 24, 33]), shape=(3, 4), nnz=3) >>> v = torch.tensor([1, 2, 3]).view(-1, 1) >>> dglsp.sp_broadcast_v(A, v, "add") SparseMatrix(indices=tensor([[1, 0, 2], [0, 3, 2]]), values=tensor([12, 21, 33]), shape=(3, 4), nnz=3) >>> indices = torch.tensor([[1, 0, 2], [0, 3, 2]]) >>> val = torch.tensor([[10, 20], [30, 40], [50, 60]]) >>> A = dglsp.spmatrix(indices, val, shape=(3, 4)) >>> v = torch.tensor([1, 2, 3]).view(-1, 1) >>> dglsp.sp_broadcast_v(A, v, "sub") SparseMatrix(indices=tensor([[1, 0, 2], [0, 3, 2]]), values=tensor([[ 8, 18], [29, 39], [47, 57]]), shape=(3, 4), nnz=3, val_size=(2,)) """ op = getattr(operator, op) if v.dim() == 1: v = v.view(1, -1) shape_error_message = ( f"Dimension mismatch for broadcasting. Got A.shape = {A.shape} and" f"v.shape = {v.shape}." ) assert v.dim() <= 2 and (1 in v.shape), shape_error_message broadcast_dim = None # v can be broadcasted to A if exactly one dimension of v is 1 and the other # is the same as A. for d, (dim1, dim2) in enumerate(zip(A.shape, v.shape)): assert dim2 in (1, dim1), shape_error_message if dim1 != dim2: assert broadcast_dim is None, shape_error_message broadcast_dim = d # A and v has the same shape of (1, *) or (*, 1). if broadcast_dim is None: broadcast_dim = 0 if A.shape[0] == 1 else 1 if broadcast_dim == 0: v = v.view(-1)[A.col] else: v = v.view(-1)[A.row] if A.val.dim() > 1: v = v.view(-1, 1) ret_val = op(A.val, v) return val_like(A, ret_val) def sp_add_v(A: SparseMatrix, v: torch.Tensor) -> SparseMatrix: """Broadcast addition for sparse matrix and vector. See the definition of :func:`sp_broadcast_v` for details. """ return sp_broadcast_v(A, v, "add") def sp_sub_v(A: SparseMatrix, v: torch.Tensor) -> SparseMatrix: """Broadcast substraction for sparse matrix and vector. See the definition of :func:`sp_broadcast_v` for details. """ return sp_broadcast_v(A, v, "sub") def sp_mul_v(A: SparseMatrix, v: torch.Tensor) -> SparseMatrix: """Broadcast multiply for sparse matrix and vector. See the definition of :func:`sp_broadcast_v` for details. """ return sp_broadcast_v(A, v, "mul") def sp_div_v(A: SparseMatrix, v: torch.Tensor) -> SparseMatrix: """Broadcast division for sparse matrix and vector. See the definition of :func:`sp_broadcast_v` for details. """ return sp_broadcast_v(A, v, "truediv") ================================================ FILE: python/dgl/sparse/elementwise_op.py ================================================ # pylint: disable=anomalous-backslash-in-string """DGL elementwise operator module.""" from typing import Union from .sparse_matrix import SparseMatrix from .utils import Scalar __all__ = ["add", "sub", "mul", "div", "power"] def add(A: SparseMatrix, B: SparseMatrix) -> SparseMatrix: r"""Elementwise addition for ``SparseMatrix``, equivalent to ``A + B``. Parameters ---------- A : SparseMatrix Sparse matrix B : SparseMatrix Sparse matrix Returns ------- SparseMatrix Sparse matrix Examples -------- >>> indices = torch.tensor([[1, 0, 2], [0, 1, 2]]) >>> val = torch.tensor([10, 20, 30]) >>> A = dglsp.spmatrix(indices, val) >>> B = dglsp.diag(torch.arange(1, 4)) >>> dglsp.add(A, B) SparseMatrix(indices=tensor([[0, 0, 1, 1, 2], [0, 1, 0, 1, 2]]), values=tensor([1, 20, 10, 2, 33]), shape=(3, 3), nnz=5) """ return A + B def sub(A: SparseMatrix, B: SparseMatrix) -> SparseMatrix: r"""Elementwise subtraction for ``SparseMatrix``, equivalent to ``A - B``. Parameters ---------- A : SparseMatrix Sparse matrix B : SparseMatrix Sparse matrix Returns ------- SparseMatrix Sparse matrix Examples -------- >>> indices = torch.tensor([[1, 0, 2], [0, 1, 2]]) >>> val = torch.tensor([10, 20, 30]) >>> A = dglsp.spmatrix(indices, val) >>> B = dglsp.diag(torch.arange(1, 4)) >>> dglsp.sub(A, B) SparseMatrix(indices=tensor([[0, 0, 1, 1, 2], [0, 1, 0, 1, 2]]), values=tensor([-1, 20, 10, -2, 27]), shape=(3, 3), nnz=5) """ return A - B def mul( A: Union[SparseMatrix, Scalar], B: Union[SparseMatrix, Scalar] ) -> SparseMatrix: r"""Elementwise multiplication for ``SparseMatrix``, equivalent to ``A * B``. If both :attr:`A` and :attr:`B` are sparse matrices, both of them should be diagonal matrices. Parameters ---------- A : SparseMatrix or Scalar Sparse matrix or scalar value B : SparseMatrix or Scalar Sparse matrix or scalar value Returns ------- SparseMatrix Sparse matrix Examples -------- >>> indices = torch.tensor([[1, 0, 2], [0, 3, 2]]) >>> val = torch.tensor([10, 20, 30]) >>> A = dglsp.spmatrix(indices, val) >>> dglsp.mul(A, 2) SparseMatrix(indices=tensor([[1, 0, 2], [0, 3, 2]]), values=tensor([20, 40, 60]), shape=(3, 4), nnz=3) >>> D = dglsp.diag(torch.arange(1, 4)) >>> dglsp.mul(D, 2) SparseMatrix(indices=tensor([[0, 1, 2], [0, 1, 2]]), values=tensor([2, 4, 6]), shape=(3, 3), nnz=3) >>> D = dglsp.diag(torch.arange(1, 4)) >>> dglsp.mul(D, D) SparseMatrix(indices=tensor([[0, 1, 2], [0, 1, 2]]), values=tensor([1, 4, 9]), shape=(3, 3), nnz=3) """ return A * B def div(A: SparseMatrix, B: Union[SparseMatrix, Scalar]) -> SparseMatrix: r"""Elementwise division for ``SparseMatrix``, equivalent to ``A / B``. If both :attr:`A` and :attr:`B` are sparse matrices, both of them should be diagonal matrices. Parameters ---------- A : SparseMatrix Sparse matrix B : SparseMatrix or Scalar Sparse matrix or scalar value Returns ------- SparseMatrix Sparse matrix Examples -------- >>> A = dglsp.diag(torch.arange(1, 4)) >>> B = dglsp.diag(torch.arange(10, 13)) >>> dglsp.div(A, B) SparseMatrix(indices=tensor([[0, 1, 2], [0, 1, 2]]), values=tensor([0.1000, 0.1818, 0.2500]), shape=(3, 3), nnz=3) >>> A = dglsp.diag(torch.arange(1, 4)) >>> dglsp.div(A, 2) SparseMatrix(indices=tensor([[0, 1, 2], [0, 1, 2]]), values=tensor([0.5000, 1.0000, 1.5000]), shape=(3, 3), nnz=3) >>> indices = torch.tensor([[1, 0, 2], [0, 3, 2]]) >>> val = torch.tensor([1, 2, 3]) >>> A = dglsp.spmatrix(indices, val, shape=(3, 4)) >>> dglsp.div(A, 2) SparseMatrix(indices=tensor([[1, 0, 2], [0, 3, 2]]), values=tensor([0.5000, 1.0000, 1.5000]), shape=(3, 4), nnz=3) """ return A / B def power(A: SparseMatrix, scalar: Scalar) -> SparseMatrix: r"""Elementwise exponentiation ``SparseMatrix``, equivalent to ``A ** scalar``. Parameters ---------- A : SparseMatrix Sparse matrix scalar : Scalar Exponent Returns ------- SparseMatrix Sparse matrix Examples -------- >>> indices = torch.tensor([[1, 0, 2], [0, 3, 2]]) >>> val = torch.tensor([10, 20, 30]) >>> A = dglsp.spmatrix(indices, val) >>> dglsp.power(A, 2) SparseMatrix(indices=tensor([[1, 0, 2], [0, 3, 2]]), values=tensor([100, 400, 900]), shape=(3, 4), nnz=3) >>> D = dglsp.diag(torch.arange(1, 4)) >>> dglsp.power(D, 2) SparseMatrix(indices=tensor([[0, 1, 2], [0, 1, 2]]), values=tensor([1, 4, 9]), shape=(3, 3), nnz=3) """ return A**scalar ================================================ FILE: python/dgl/sparse/elementwise_op_sp.py ================================================ """DGL elementwise operators for sparse matrix module.""" from typing import Union import torch from .sparse_matrix import SparseMatrix, val_like from .utils import is_scalar, Scalar def spsp_add(A, B): """Invoke C++ sparse library for addition""" return SparseMatrix( torch.ops.dgl_sparse.spsp_add(A.c_sparse_matrix, B.c_sparse_matrix) ) def spsp_mul(A, B): """Invoke C++ sparse library for multiplication""" return SparseMatrix( torch.ops.dgl_sparse.spsp_mul(A.c_sparse_matrix, B.c_sparse_matrix) ) def spsp_div(A, B): """Invoke C++ sparse library for division""" return SparseMatrix( torch.ops.dgl_sparse.spsp_div(A.c_sparse_matrix, B.c_sparse_matrix) ) def sp_add(A: SparseMatrix, B: SparseMatrix) -> SparseMatrix: """Elementwise addition Parameters ---------- A : SparseMatrix Sparse matrix B : SparseMatrix Sparse matrix Returns ------- SparseMatrix Sparse matrix Examples -------- >>> indices = torch.tensor([[1, 0, 2], [0, 3, 2]]) >>> val = torch.tensor([10, 20, 30]) >>> A = dglsp.spmatrix(indices, val, shape=(3, 4)) >>> A + A SparseMatrix(indices=tensor([[0, 1, 2], [3, 0, 2]]), values=tensor([40, 20, 60]), shape=(3, 4), nnz=3) """ # Python falls back to B.__radd__ then TypeError when NotImplemented is # returned. return spsp_add(A, B) if isinstance(B, SparseMatrix) else NotImplemented def sp_sub(A: SparseMatrix, B: SparseMatrix) -> SparseMatrix: """Elementwise subtraction Parameters ---------- A : SparseMatrix Sparse matrix B : SparseMatrix Sparse matrix Returns ------- SparseMatrix Sparse matrix Examples -------- >>> indices = torch.tensor([[1, 0, 2], [0, 3, 2]]) >>> val = torch.tensor([10, 20, 30]) >>> val2 = torch.tensor([5, 10, 15]) >>> A = dglsp.spmatrix(indices, val, shape=(3, 4)) >>> B = dglsp.spmatrix(indices, val2, shape=(3, 4)) >>> A - B SparseMatrix(indices=tensor([[0, 1, 2], [3, 0, 2]]), values=tensor([10, 5, 15]), shape=(3, 4), nnz=3) """ # Python falls back to B.__rsub__ then TypeError when NotImplemented is # returned. return spsp_add(A, -B) if isinstance(B, SparseMatrix) else NotImplemented def sp_mul(A: SparseMatrix, B: Union[SparseMatrix, Scalar]) -> SparseMatrix: """Elementwise multiplication Note that if both :attr:`A` and :attr:`B` are sparse matrices, both of them need to be diagonal or on CPU. Parameters ---------- A : SparseMatrix First operand B : SparseMatrix or Scalar Second operand Returns ------- SparseMatrix Result of A * B Examples -------- >>> indices = torch.tensor([[1, 0, 2], [0, 3, 2]]) >>> val = torch.tensor([1, 2, 3]) >>> A = dglsp.spmatrix(indices, val, shape=(3, 4)) >>> A * 2 SparseMatrix(indices=tensor([[1, 0, 2], [0, 3, 2]]), values=tensor([2, 4, 6]), shape=(3, 4), nnz=3) >>> 2 * A SparseMatrix(indices=tensor([[1, 0, 2], [0, 3, 2]]), values=tensor([2, 4, 6]), shape=(3, 4), nnz=3) >>> indices2 = torch.tensor([[2, 0, 1], [0, 3, 2]]) >>> val2 = torch.tensor([3, 2, 1]) >>> B = dglsp.spmatrix(indices2, val2, shape=(3, 4)) >>> A * B SparseMatrix(indices=tensor([[0], [3]]), values=tensor([4]), shape=(3, 4), nnz=1) """ if is_scalar(B): return val_like(A, A.val * B) return spsp_mul(A, B) def sp_div(A: SparseMatrix, B: Union[SparseMatrix, Scalar]) -> SparseMatrix: """Elementwise division If :attr:`B` is a sparse matrix, both :attr:`A` and :attr:`B` must have the same sparsity. And the returned matrix has the same order of non-zero entries as :attr:`A`. Parameters ---------- A : SparseMatrix First operand B : SparseMatrix or Scalar Second operand Returns ------- SparseMatrix Result of A / B Examples -------- >>> indices = torch.tensor([[1, 0, 2], [0, 3, 2]]) >>> val = torch.tensor([1, 2, 3]) >>> A = dglsp.spmatrix(indices, val, shape=(3, 4)) >>> A / 2 SparseMatrix(indices=tensor([[1, 0, 2], [0, 3, 2]]), values=tensor([0.5000, 1.0000, 1.5000]), shape=(3, 4), nnz=3) """ if is_scalar(B): return val_like(A, A.val / B) return spsp_div(A, B) def sp_power(A: SparseMatrix, scalar: Scalar) -> SparseMatrix: """Take the power of each nonzero element and return a sparse matrix with the result. Parameters ---------- A : SparseMatrix Sparse matrix scalar : float or int Exponent Returns ------- SparseMatrix Sparse matrix Examples -------- >>> indices = torch.tensor([[1, 0, 2], [0, 3, 2]]) >>> val = torch.tensor([10, 20, 30]) >>> A = dglsp.spmatrix(indices, val) >>> A ** 2 SparseMatrix(indices=tensor([[1, 0, 2], [0, 3, 2]]), values=tensor([100, 400, 900]), shape=(3, 4), nnz=3) """ # Python falls back to scalar.__rpow__ then TypeError when NotImplemented # is returned. return val_like(A, A.val**scalar) if is_scalar(scalar) else NotImplemented SparseMatrix.__add__ = sp_add SparseMatrix.__sub__ = sp_sub SparseMatrix.__mul__ = sp_mul SparseMatrix.__rmul__ = sp_mul SparseMatrix.__truediv__ = sp_div SparseMatrix.__pow__ = sp_power ================================================ FILE: python/dgl/sparse/matmul.py ================================================ """Matmul ops for SparseMatrix""" # pylint: disable=invalid-name from typing import Union import torch from .sparse_matrix import SparseMatrix __all__ = ["spmm", "bspmm", "spspmm", "matmul"] def spmm(A: SparseMatrix, X: torch.Tensor) -> torch.Tensor: """Multiplies a sparse matrix by a dense matrix, equivalent to ``A @ X``. Parameters ---------- A : SparseMatrix Sparse matrix of shape ``(L, M)`` with scalar values X : torch.Tensor Dense matrix of shape ``(M, N)`` or ``(M)`` Returns ------- torch.Tensor The dense matrix of shape ``(L, N)`` or ``(L)`` Examples -------- >>> indices = torch.tensor([[0, 1, 1], [1, 0, 1]]) >>> val = torch.randn(indices.shape[1]) >>> A = dglsp.spmatrix(indices, val) >>> X = torch.randn(2, 3) >>> result = dglsp.spmm(A, X) >>> type(result) >>> result.shape torch.Size([2, 3]) """ assert isinstance( A, SparseMatrix ), f"Expect arg1 to be a SparseMatrix object, got {type(A)}." assert isinstance( X, torch.Tensor ), f"Expect arg2 to be a torch.Tensor, got {type(X)}." return torch.ops.dgl_sparse.spmm(A.c_sparse_matrix, X) def bspmm(A: SparseMatrix, X: torch.Tensor) -> torch.Tensor: """Multiplies a sparse matrix by a dense matrix by batches, equivalent to ``A @ X``. Parameters ---------- A : SparseMatrix Sparse matrix of shape ``(L, M)`` with vector values of length ``K`` X : torch.Tensor Dense matrix of shape ``(M, N, K)`` Returns ------- torch.Tensor Dense matrix of shape ``(L, N, K)`` Examples -------- >>> indices = torch.tensor([[0, 1, 1], [1, 0, 2]]) >>> val = torch.randn(len(row), 2) >>> A = dglsp.spmatrix(indices, val, shape=(3, 3)) >>> X = torch.randn(3, 3, 2) >>> result = dglsp.bspmm(A, X) >>> type(result) >>> result.shape torch.Size([3, 3, 2]) """ assert isinstance( A, SparseMatrix ), f"Expect arg1 to be a SparseMatrix object, got {type(A)}." assert isinstance( X, torch.Tensor ), f"Expect arg2 to be a torch.Tensor, got {type(X)}." return spmm(A, X) def spspmm(A: SparseMatrix, B: SparseMatrix) -> SparseMatrix: """Multiplies a sparse matrix by a sparse matrix, equivalent to ``A @ B``. The non-zero values of the two sparse matrices must be 1D. Parameters ---------- A : SparseMatrix Sparse matrix of shape ``(L, M)`` B : SparseMatrix Sparse matrix of shape ``(M, N)`` Returns ------- SparseMatrix Sparse matrix of shape ``(L, N)``. Examples -------- >>> indices1 = torch.tensor([[0, 1, 1], [1, 0, 1]]) >>> val1 = torch.ones(len(row1)) >>> A = dglsp.spmatrix(indices1, val1) >>> indices2 = torch.tensor([[0, 1, 1], [0, 2, 1]]) >>> val2 = torch.ones(len(row2)) >>> B = dglsp.spmatrix(indices2, val2) >>> dglsp.spspmm(A, B) SparseMatrix(indices=tensor([[0, 0, 1, 1, 1], [1, 2, 0, 1, 2]]), values=tensor([1., 1., 1., 1., 1.]), shape=(2, 3), nnz=5) """ assert isinstance( A, SparseMatrix ), f"Expect A1 to be a SparseMatrix object, got {type(A)}." assert isinstance( B, SparseMatrix ), f"Expect A2 to be a SparseMatrix object, got {type(B)}." return SparseMatrix( torch.ops.dgl_sparse.spspmm(A.c_sparse_matrix, B.c_sparse_matrix) ) def matmul( A: Union[torch.Tensor, SparseMatrix], B: Union[torch.Tensor, SparseMatrix] ) -> Union[torch.Tensor, SparseMatrix]: """Multiplies two dense/sparse matrices, equivalent to ``A @ B``. This function does not support the case where :attr:`A` is a \ ``torch.Tensor`` and :attr:`B` is a ``SparseMatrix``. * If both matrices are torch.Tensor, it calls \ :func:`torch.matmul()`. The result is a dense matrix. * If both matrices are sparse, it calls :func:`dgl.sparse.spspmm`. The \ result is a sparse matrix. * If :attr:`A` is sparse while :attr:`B` is dense, it calls \ :func:`dgl.sparse.spmm`. The result is a dense matrix. * The operator supports batched sparse-dense matrix multiplication. In \ this case, the sparse matrix :attr:`A` should have shape ``(L, M)``, \ where the non-zero values have a batch dimension ``K``. The dense \ matrix :attr:`B` should have shape ``(M, N, K)``. The output \ is a dense matrix of shape ``(L, N, K)``. * Sparse-sparse matrix multiplication does not support batched computation. Parameters ---------- A : torch.Tensor or SparseMatrix The first matrix. B : torch.Tensor or SparseMatrix The second matrix. Returns ------- torch.Tensor or SparseMatrix The result matrix Examples -------- Multiplies a diagonal matrix with a dense matrix. >>> val = torch.randn(3) >>> A = dglsp.diag(val) >>> B = torch.randn(3, 2) >>> result = dglsp.matmul(A, B) >>> type(result) >>> result.shape torch.Size([3, 2]) Multiplies a sparse matrix with a dense matrix. >>> indices = torch.tensor([[0, 1, 1], [1, 0, 1]]) >>> val = torch.randn(indices.shape[1]) >>> A = dglsp.spmatrix(indices, val) >>> X = torch.randn(2, 3) >>> result = dglsp.matmul(A, X) >>> type(result) >>> result.shape torch.Size([2, 3]) Multiplies a sparse matrix with a sparse matrix. >>> indices1 = torch.tensor([[0, 1, 1], [1, 0, 1]]) >>> val1 = torch.ones(indices1.shape[1]) >>> A = dglsp.spmatrix(indices1, val1) >>> indices2 = torch.tensor([[0, 1, 1], [0, 2, 1]]) >>> val2 = torch.ones(indices2.shape[1]) >>> B = dglsp.spmatrix(indices2, val2) >>> result = dglsp.matmul(A, B) >>> type(result) >>> result.shape (2, 3) """ assert isinstance( A, (torch.Tensor, SparseMatrix) ), f"Expect arg1 to be a torch.Tensor or SparseMatrix, got {type(A)}." assert isinstance(B, (torch.Tensor, SparseMatrix)), ( f"Expect arg2 to be a torch Tensor or SparseMatrix" f"object, got {type(B)}." ) if isinstance(A, torch.Tensor) and isinstance(B, torch.Tensor): return torch.matmul(A, B) assert not isinstance(A, torch.Tensor), ( f"Expect arg2 to be a torch Tensor if arg 1 is torch Tensor, " f"got {type(B)}." ) if isinstance(B, torch.Tensor): return spmm(A, B) return spspmm(A, B) SparseMatrix.__matmul__ = matmul ================================================ FILE: python/dgl/sparse/reduction.py ================================================ """DGL sparse matrix reduce operators""" # pylint: disable=W0622 from typing import Optional import torch from .sparse_matrix import SparseMatrix def reduce(input: SparseMatrix, dim: Optional[int] = None, rtype: str = "sum"): """Computes the reduction of non-zero values of the :attr:`input` sparse matrix along the given dimension :attr:`dim`. The reduction does not count zero elements. If the row or column to be reduced does not have any non-zero elements, the result will be 0. Parameters ---------- input : SparseMatrix The input sparse matrix dim : int, optional The dimension to reduce, must be either 0 (by rows) or 1 (by columns) or None (on both rows and columns simultaneously) If :attr:`dim` is None, it reduces both the rows and the columns in the sparse matrix, producing a tensor of shape ``input.val.shape[1:]``. Otherwise, it reduces on the row (``dim=0``) or column (``dim=1``) dimension, producing a tensor of shape ``(input.shape[1],) + input.val.shape[1:]`` or ``(input.shape[0],) + input.val.shape[1:]``. rtype: str, optional Reduction type, one of ``['sum', 'smin', 'smax', 'smean', 'sprod']``, representing taking the sum, minimum, maximum, mean, and product of the non-zero elements Returns ---------- torch.Tensor Reduced tensor Examples ---------- Case1: scalar-valued sparse matrix >>> indices = torch.tensor([[0, 1, 1], [0, 0, 2]]) >>> val = torch.tensor([1, 1, 2]) >>> A = dglsp.spmatrix(indices, val, shape=(4, 3)) >>> dglsp.reduce(A, rtype='sum') tensor(4) >>> dglsp.reduce(A, 0, 'sum') tensor([2, 0, 2]) >>> dglsp.reduce(A, 1, 'sum') tensor([1, 3, 0, 0]) >>> dglsp.reduce(A, 0, 'smax') tensor([1, 0, 2]) >>> dglsp.reduce(A, 1, 'smin') tensor([1, 1, 0, 0]) Case2: vector-valued sparse matrix >>> indices = torch.tensor([[0, 1, 1], [0, 0, 2]]) >>> val = torch.tensor([[1., 2.], [2., 1.], [2., 2.]]) >>> A = dglsp.spmatrix(indices, val, shape=(4, 3)) >>> dglsp.reduce(A, rtype='sum') tensor([5., 5.]) >>> dglsp.reduce(A, 0, 'sum') tensor([[3., 3.], [0., 0.], [2., 2.]]) >>> dglsp.reduce(A, 1, 'smin') tensor([[1., 2.], [2., 1.], [0., 0.], [0., 0.]]) >>> dglsp.reduce(A, 0, 'smean') tensor([[1.5000, 1.5000], [0.0000, 0.0000], [2.0000, 2.0000]]) """ return torch.ops.dgl_sparse.reduce(input.c_sparse_matrix, rtype, dim) def sum(input: SparseMatrix, dim: Optional[int] = None): """Computes the sum of non-zero values of the :attr:`input` sparse matrix along the given dimension :attr:`dim`. Parameters ---------- input : SparseMatrix The input sparse matrix dim : int, optional The dimension to reduce, must be either 0 (by rows) or 1 (by columns) or None (on both rows and columns simultaneously) If :attr:`dim` is None, it reduces both the rows and the columns in the sparse matrix, producing a tensor of shape ``input.val.shape[1:]``. Otherwise, it reduces on the row (``dim=0``) or column (``dim=1``) dimension, producing a tensor of shape ``(input.shape[1],) + input.val.shape[1:]`` or ``(input.shape[0],) + input.val.shape[1:]``. Returns ---------- torch.Tensor Reduced tensor Examples ---------- Case1: scalar-valued sparse matrix >>> indices = torch.tensor([[0, 1, 1], [0, 0, 2]]) >>> val = torch.tensor([1, 1, 2]) >>> A = dglsp.spmatrix(indices, val, shape=(4, 3)) >>> dglsp.sum(A) tensor(4) >>> dglsp.sum(A, 0) tensor([2, 0, 2]) >>> dglsp.sum(A, 1) tensor([1, 3, 0, 0]) Case2: vector-valued sparse matrix >>> indices = torch.tensor([[0, 1, 1], [0, 0, 2]]) >>> val = torch.tensor([[1, 2], [2, 1], [2, 2]]) >>> A = dglsp.spmatrix(indices, val, shape=(4, 3)) >>> dglsp.sum(A) tensor([5, 5]) >>> dglsp.sum(A, 0) tensor([[3, 3], [0, 0], [2, 2]]) """ return torch.ops.dgl_sparse.sum(input.c_sparse_matrix, dim) def smax(input: SparseMatrix, dim: Optional[int] = None): """Computes the maximum of non-zero values of the :attr:`input` sparse matrix along the given dimension :attr:`dim`. The reduction does not count zero values. If the row or column to be reduced does not have any non-zero value, the result will be 0. Parameters ---------- input : SparseMatrix The input sparse matrix dim : int, optional The dimension to reduce, must be either 0 (by rows) or 1 (by columns) or None (on both rows and columns simultaneously) If :attr:`dim` is None, it reduces both the rows and the columns in the sparse matrix, producing a tensor of shape ``input.val.shape[1:]``. Otherwise, it reduces on the row (``dim=0``) or column (``dim=1``) dimension, producing a tensor of shape ``(input.shape[1],) + input.val.shape[1:]`` or ``(input.shape[0],) + input.val.shape[1:]``. Returns ---------- torch.Tensor Reduced tensor Examples ---------- Case1: scalar-valued sparse matrix >>> indices = torch.tensor([[0, 1, 1], [0, 0, 2]]) >>> val = torch.tensor([1, 1, 2]) >>> A = dglsp.spmatrix(indices, val, shape=(4, 3)) >>> dglsp.smax(A) tensor(2) >>> dglsp.smax(A, 0) tensor([1, 0, 2]) >>> dglsp.smax(A, 1) tensor([1, 2, 0, 0]) Case2: vector-valued sparse matrix >>> indices = torch.tensor([[0, 1, 1], [0, 0, 2]]) >>> val = torch.tensor([[1, 2], [2, 1], [2, 2]]) >>> A = dglsp.spmatrix(indices, val, shape=(4, 3)) >>> dglsp.smax(A) tensor([2, 2]) >>> dglsp.smax(A, 1) tensor([[1, 2], [2, 2], [0, 0], [0, 0]]) """ return torch.ops.dgl_sparse.smax(input.c_sparse_matrix, dim) def smin(input: SparseMatrix, dim: Optional[int] = None): """Computes the minimum of non-zero values of the :attr:`input` sparse matrix along the given dimension :attr:`dim`. The reduction does not count zero values. If the row or column to be reduced does not have any non-zero value, the result will be 0. Parameters ---------- input : SparseMatrix The input sparse matrix dim : int, optional The dimension to reduce, must be either 0 (by rows) or 1 (by columns) or None (on both rows and columns simultaneously) If :attr:`dim` is None, it reduces both the rows and the columns in the sparse matrix, producing a tensor of shape ``input.val.shape[1:]``. Otherwise, it reduces on the row (``dim=0``) or column (``dim=1``) dimension, producing a tensor of shape ``(input.shape[1],) + input.val.shape[1:]`` or ``(input.shape[0],) + input.val.shape[1:]``. Returns ---------- torch.Tensor Reduced tensor Examples ---------- Case1: scalar-valued sparse matrix >>> indices = torch.tensor([[0, 1, 1], [0, 0, 2]]) >>> val = torch.tensor([1, 1, 2]) >>> A = dglsp.spmatrix(indices, val, shape=(4, 3)) >>> dglsp.smin(A) tensor(1) >>> dglsp.smin(A, 0) tensor([1, 0, 2]) >>> dglsp.smin(A, 1) tensor([1, 1, 0, 0]) Case2: vector-valued sparse matrix >>> indices = torch.tensor([[0, 1, 1], [0, 0, 2]]) >>> val = torch.tensor([[1, 2], [2, 1], [2, 2]]) >>> A = dglsp.spmatrix(indices, val, shape=(4, 3)) >>> dglsp.smin(A) tensor([1, 1]) >>> dglsp.smin(A, 0) tensor([[1, 1], [0, 0], [2, 2]]) >>> dglsp.smin(A, 1) tensor([[1, 2], [2, 1], [0, 0], [0, 0]]) """ return torch.ops.dgl_sparse.smin(input.c_sparse_matrix, dim) def smean(input: SparseMatrix, dim: Optional[int] = None): """Computes the mean of non-zero values of the :attr:`input` sparse matrix along the given dimension :attr:`dim`. The reduction does not count zero values. If the row or column to be reduced does not have any non-zero value, the result will be 0. Parameters ---------- input : SparseMatrix The input sparse matrix dim : int, optional The dimension to reduce, must be either 0 (by rows) or 1 (by columns) or None (on both rows and columns simultaneously) If :attr:`dim` is None, it reduces both the rows and the columns in the sparse matrix, producing a tensor of shape ``input.val.shape[1:]``. Otherwise, it reduces on the row (``dim=0``) or column (``dim=1``) dimension, producing a tensor of shape ``(input.shape[1],) + input.val.shape[1:]`` or ``(input.shape[0],) + input.val.shape[1:]``. Returns ---------- torch.Tensor Reduced tensor Examples ---------- Case1: scalar-valued sparse matrix >>> indices = torch.tensor([[0, 1, 1], [0, 0, 2]]) >>> val = torch.tensor([1., 1., 2.]) >>> A = dglsp.spmatrix(indices, val, shape=(4, 3)) >>> dglsp.smean(A) tensor(1.3333) >>> dglsp.smean(A, 0) tensor([1., 0., 2.]) >>> dglsp.smean(A, 1) tensor([1.0000, 1.5000, 0.0000, 0.0000]) Case2: vector-valued sparse matrix >>> indices = torch.tensor([[0, 1, 1], [0, 0, 2]]) >>> val = torch.tensor([[1., 2.], [2., 1.], [2., 2.]]) >>> A = dglsp.spmatrix(indices, val, shape=(4, 3)) >>> dglsp.smean(A) tensor([1.6667, 1.6667]) >>> dglsp.smean(A, 0) tensor([[1.5000, 1.5000], [0.0000, 0.0000], [2.0000, 2.0000]]) >>> dglsp.smean(A, 1) tensor([[1.0000, 2.0000], [2.0000, 1.5000], [0.0000, 0.0000], [0.0000, 0.0000]]) """ return torch.ops.dgl_sparse.smean(input.c_sparse_matrix, dim) def sprod(input: SparseMatrix, dim: Optional[int] = None): """Computes the product of non-zero values of the :attr:`input` sparse matrix along the given dimension :attr:`dim`. The reduction does not count zero values. If the row or column to be reduced does not have any non-zero value, the result will be 0. Parameters ---------- input : SparseMatrix The input sparse matrix dim : int, optional The dimension to reduce, must be either 0 (by rows) or 1 (by columns) or None (on both rows and columns simultaneously) If :attr:`dim` is None, it reduces both the rows and the columns in the sparse matrix, producing a tensor of shape ``input.val.shape[1:]``. Otherwise, it reduces on the row (``dim=0``) or column (``dim=1``) dimension, producing a tensor of shape ``(input.shape[1],) + input.val.shape[1:]`` or ``(input.shape[0],) + input.val.shape[1:]``. Returns ---------- torch.Tensor Reduced tensor Examples ---------- Case1: scalar-valued sparse matrix >>> indices = torch.tensor([[0, 1, 1], [0, 0, 2]]) >>> val = torch.tensor([1, 1, 2]) >>> A = dglsp.spmatrix(indices, val, shape=(4, 3)) >>> dglsp.sprod(A) tensor(2) >>> dglsp.sprod(A, 0) tensor([1, 0, 2]) >>> dglsp.sprod(A, 1) tensor([1, 2, 0, 0]) Case2: vector-valued sparse matrix >>> indices = torch.tensor([[0, 1, 1], [0, 0, 2]]) >>> val = torch.tensor([[1, 2], [2, 1], [2, 2]]) >>> A = dglsp.spmatrix(indices, val, shape=(4, 3)) >>> dglsp.sprod(A) tensor([4, 4]) >>> dglsp.sprod(A, 0) tensor([[2, 2], [0, 0], [2, 2]]) >>> dglsp.sprod(A, 1) tensor([[1, 2], [4, 2], [0, 0], [0, 0]]) """ return torch.ops.dgl_sparse.sprod(input.c_sparse_matrix, dim) SparseMatrix.reduce = reduce SparseMatrix.sum = sum SparseMatrix.smax = smax SparseMatrix.smin = smin SparseMatrix.smean = smean SparseMatrix.sprod = sprod ================================================ FILE: python/dgl/sparse/sddmm.py ================================================ """Sampled Dense-Dense Matrix Multiplication (SDDMM) operator module.""" import torch from .sparse_matrix import SparseMatrix __all__ = ["sddmm", "bsddmm"] # pylint: disable=invalid-name def sddmm(A: SparseMatrix, X1: torch.Tensor, X2: torch.Tensor) -> SparseMatrix: r"""Sampled-Dense-Dense Matrix Multiplication (SDDMM). ``sddmm`` matrix-multiplies two dense matrices :attr:`X1` and :attr:`X2`, then elementwise-multiplies the result with sparse matrix :attr:`A` at the nonzero locations. Mathematically ``sddmm`` is formulated as: .. math:: out = (X1 @ X2) * A In particular, :attr:`X1` and :attr:`X2` can be 1-D, then ``X1 @ X2`` becomes the out-product of the two vectors (which results in a matrix). Parameters ---------- A : SparseMatrix Sparse matrix of shape ``(L, N)`` X1 : torch.Tensor Dense matrix of shape ``(L, M)`` or ``(L,)`` X2 : torch.Tensor Dense matrix of shape ``(M, N)`` or ``(N,)`` Returns ------- SparseMatrix Sparse matrix of shape ``(L, N)`` Examples -------- >>> indices = torch.tensor([[1, 1, 2], [2, 3, 3]]) >>> val = torch.arange(1, 4).float() >>> A = dglsp.spmatrix(indices, val, (3, 4)) >>> X1 = torch.randn(3, 5) >>> X2 = torch.randn(5, 4) >>> dglsp.sddmm(A, X1, X2) SparseMatrix(indices=tensor([[1, 1, 2], [2, 3, 3]]), values=tensor([-1.6585, -3.9714, -0.5406]), shape=(3, 4), nnz=3) """ return SparseMatrix(torch.ops.dgl_sparse.sddmm(A.c_sparse_matrix, X1, X2)) # pylint: disable=invalid-name def bsddmm(A: SparseMatrix, X1: torch.Tensor, X2: torch.Tensor) -> SparseMatrix: r"""Sampled-Dense-Dense Matrix Multiplication (SDDMM) by batches. ``sddmm`` matrix-multiplies two dense matrices :attr:`X1` and :attr:`X2`, then elementwise-multiplies the result with sparse matrix :attr:`A` at the nonzero locations. Mathematically ``sddmm`` is formulated as: .. math:: out = (X1 @ X2) * A The batch dimension is the last dimension for input dense matrices. In particular, if the sparse matrix has scalar non-zero values, it will be broadcasted for bsddmm. Parameters ---------- A : SparseMatrix Sparse matrix of shape ``(L, N)`` with scalar values or vector values of length ``K`` X1 : Tensor Dense matrix of shape ``(L, M, K)`` X2 : Tensor Dense matrix of shape ``(M, N, K)`` Returns ------- SparseMatrix Sparse matrix of shape ``(L, N)`` with vector values of length ``K`` Examples -------- >>> indices = torch.tensor([[1, 1, 2], [2, 3, 3]]) >>> val = torch.arange(1, 4).float() >>> A = dglsp.spmatrix(indices, val, (3, 4)) >>> X1 = torch.arange(0, 3 * 5 * 2).view(3, 5, 2).float() >>> X2 = torch.arange(0, 5 * 4 * 2).view(5, 4, 2).float() >>> dglsp.bsddmm(A, X1, X2) SparseMatrix(indices=tensor([[1, 1, 2], [2, 3, 3]]), values=tensor([[1560., 1735.], [3400., 3770.], [8400., 9105.]]), shape=(3, 4), nnz=3, val_size=(2,)) """ return sddmm(A, X1, X2) ================================================ FILE: python/dgl/sparse/softmax.py ================================================ """Softmax op for SparseMatrix""" # pylint: disable=invalid-name, W0622 import torch from .sparse_matrix import SparseMatrix __all__ = ["softmax"] def softmax(input: SparseMatrix, dim: int = 1) -> SparseMatrix: """Applies softmax to the non-zero elements of the sparse matrix on the dimension :attr:``dim``. dim = 0 or 1 indicates column-wise or row-wise softmax respectively. If :attr:`input.val` takes shape ``(nnz, D)``, then the output matrix :attr:`output` and :attr:`output.val` take the same shape as :attr:`input` and :attr:`input.val`. :attr:`output.val[:, i]` is calculated based on :attr:`input.val[:, i]`. Parameters ---------- input : SparseMatrix The input sparse matrix Returns ------- SparseMatrix The output sparse matrix Examples -------- Case1: row-wise softmax on matrix with values of shape (nnz) >>> indices = torch.tensor([[0, 0, 1, 2], [1, 2, 2, 0]]) >>> val = torch.tensor([0., 1., 2., 3.]) >>> A = dglsp.spmatrix(indices, val) >>> dglsp.softmax(A) SparseMatrix(indices=tensor([[0, 0, 1, 2], [1, 2, 2, 0]]), values=tensor([0.2689, 0.7311, 1.0000, 1.0000]), shape=(3, 3), nnz=4) Case2: row-wise softmax on matrix with values of shape (nnz, D) >>> indices = torch.tensor([[0, 0, 1, 2], [1, 2, 2, 0]]) >>> val = torch.tensor([[0., 7.], [1., 3.], [2., 2.], [3., 1.]]) >>> A = dglsp.spmatrix(indices, val) >>> dglsp.softmax(A) SparseMatrix(indices=tensor([[0, 0, 1, 2], [1, 2, 2, 0]]), values=tensor([[0.2689, 0.9820], [0.7311, 0.0180], [1.0000, 1.0000], [1.0000, 1.0000]]), shape=(3, 3), nnz=4, val_size=(2,)) Case3: column-wise softmax on matrix with values of shape (nnz) >>> indices = torch.tensor([[0, 0, 1, 2], [1, 2, 2, 0]]) >>> val = torch.tensor([0., 1., 2., 3.]) >>> A = dglsp.spmatrix(indices, val) >>> dglsp.softmax(A, 0) SparseMatrix(indices=tensor([[0, 0, 1, 2], [1, 2, 2, 0]]), values=tensor([1.0000, 0.2689, 0.7311, 1.0000]), shape=(3, 3), nnz=4) """ return SparseMatrix( torch.ops.dgl_sparse.softmax(input.c_sparse_matrix, dim) ) SparseMatrix.softmax = softmax ================================================ FILE: python/dgl/sparse/sparse_matrix.py ================================================ """DGL sparse matrix module.""" # pylint: disable= invalid-name from typing import Optional, Tuple import torch class SparseMatrix: r"""Class for sparse matrix.""" def __init__(self, c_sparse_matrix: torch.ScriptObject): self.c_sparse_matrix = c_sparse_matrix def __repr__(self): return _sparse_matrix_str(self) @property def val(self) -> torch.Tensor: """Returns the values of the non-zero elements. Returns ------- torch.Tensor Values of the non-zero elements """ return self.c_sparse_matrix.val() @property def shape(self) -> Tuple[int]: """Returns the shape of the sparse matrix. Returns ------- Tuple[int] The shape of the sparse matrix """ return tuple(self.c_sparse_matrix.shape()) @property def nnz(self) -> int: """Returns the number of non-zero elements in the sparse matrix. Returns ------- int The number of non-zero elements of the matrix """ return self.c_sparse_matrix.nnz() @property def dtype(self) -> torch.dtype: """Returns the data type of the sparse matrix. Returns ------- torch.dtype Data type of the sparse matrix """ return self.c_sparse_matrix.val().dtype @property def device(self) -> torch.device: """Returns the device the sparse matrix is on. Returns ------- torch.device The device the sparse matrix is on """ return self.c_sparse_matrix.device() @property def row(self) -> torch.Tensor: """Returns the row indices of the non-zero elements. Returns ------- torch.Tensor Row indices of the non-zero elements """ return self.coo()[0] @property def col(self) -> torch.Tensor: """Returns the column indices of the non-zero elements. Returns ------- torch.Tensor Column indices of the non-zero elements """ return self.coo()[1] def coo(self) -> Tuple[torch.Tensor, torch.Tensor]: r"""Returns the coordinate list (COO) representation of the sparse matrix. See `COO in Wikipedia `_. Returns ------- torch.Tensor Row coordinate torch.Tensor Column coordinate Examples -------- >>> indices = torch.tensor([[1, 2, 1], [2, 4, 3]]) >>> A = dglsp.spmatrix(indices) >>> A.coo() (tensor([1, 2, 1]), tensor([2, 4, 3])) """ return self.c_sparse_matrix.coo() def indices(self) -> torch.Tensor: r"""Returns the coordinate list (COO) representation in one tensor with shape ``(2, nnz)``. See `COO in Wikipedia `_. Returns ------- torch.Tensor Stacked COO tensor with shape ``(2, nnz)``. Examples -------- >>> indices = torch.tensor([[1, 2, 1], [2, 4, 3]]) >>> A = dglsp.spmatrix(indices) >>> A.indices() tensor([[1, 2, 1], [2, 4, 3]]) """ return self.c_sparse_matrix.indices() def csr(self) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: r"""Returns the compressed sparse row (CSR) representation of the sparse matrix. See `CSR in Wikipedia `_. This function also returns value indices as an index tensor, indicating the order of the values of non-zero elements in the CSR representation. A ``None`` value indices array indicates the order of the values stays the same as the values of the SparseMatrix. Returns ------- torch.Tensor Row indptr torch.Tensor Column indices torch.Tensor Value indices Examples -------- >>> indices = torch.tensor([[1, 2, 1], [2, 4, 3]]) >>> A = dglsp.spmatrix(indices) >>> A.csr() (tensor([0, 0, 2, 3]), tensor([2, 3, 4]), tensor([0, 2, 1])) """ return self.c_sparse_matrix.csr() def csc(self) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: r"""Returns the compressed sparse column (CSC) representation of the sparse matrix. See `CSC in Wikipedia `_. This function also returns value indices as an index tensor, indicating the order of the values of non-zero elements in the CSC representation. A ``None`` value indices array indicates the order of the values stays the same as the values of the SparseMatrix. Returns ------- torch.Tensor Column indptr torch.Tensor Row indices torch.Tensor Value indices Examples -------- >>> indices = torch.tensor([[1, 2, 1], [2, 4, 3]]) >>> A = dglsp.spmatrix(indices) >>> A.csc() (tensor([0, 0, 0, 1, 2, 3]), tensor([1, 1, 2]), tensor([0, 2, 1])) """ return self.c_sparse_matrix.csc() def to_dense(self) -> torch.Tensor: """Returns a copy in dense matrix format of the sparse matrix. Returns ------- torch.Tensor The copy in dense matrix format """ row, col = self.coo() val = self.val shape = self.shape + val.shape[1:] mat = torch.zeros(shape, device=self.device, dtype=self.dtype) mat[row, col] = val return mat def t(self): """Alias of :meth:`transpose()`""" return self.transpose() @property def T(self): # pylint: disable=C0103 """Alias of :meth:`transpose()`""" return self.transpose() def transpose(self): """Returns the transpose of this sparse matrix. Returns ------- SparseMatrix The transpose of this sparse matrix. Examples -------- >>> indices = torch.tensor([[1, 1, 3], [2, 1, 3]]) >>> val = torch.tensor([1, 1, 2]) >>> A = dglsp.spmatrix(indices, val) >>> A = A.transpose() SparseMatrix(indices=tensor([[2, 1, 3], [1, 1, 3]]), values=tensor([1, 1, 2]), shape=(4, 4), nnz=3) """ return SparseMatrix(self.c_sparse_matrix.transpose()) def to(self, device=None, dtype=None): """Performs matrix dtype and/or device conversion. If the target device and dtype are already in use, the original matrix will be returned. Parameters ---------- device : torch.device, optional The target device of the matrix if provided, otherwise the current device will be used dtype : torch.dtype, optional The target data type of the matrix values if provided, otherwise the current data type will be used Returns ------- SparseMatrix The converted matrix Examples -------- >>> indices = torch.tensor([[1, 1, 2], [1, 2, 0]]) >>> A = dglsp.spmatrix(indices, shape=(3, 4)) >>> A.to(device="cuda:0", dtype=torch.int32) SparseMatrix(indices=tensor([[1, 1, 2], [1, 2, 0]], device='cuda:0'), values=tensor([1, 1, 1], device='cuda:0', dtype=torch.int32), shape=(3, 4), nnz=3) """ if device is None: device = self.device if dtype is None: dtype = self.dtype if device == self.device and dtype == self.dtype: return self elif device == self.device: return val_like(self, self.val.to(dtype=dtype)) else: # TODO(#5119): Find a better moving strategy instead of always # convert to COO format. row, col = self.coo() row = row.to(device=device) col = col.to(device=device) val = self.val.to(device=device, dtype=dtype) return from_coo(row, col, val, self.shape) def cuda(self): """Moves the matrix to GPU. If the matrix is already on GPU, the original matrix will be returned. If multiple GPU devices exist, ``cuda:0`` will be selected. Returns ------- SparseMatrix The matrix on GPU Examples -------- >>> indices = torch.tensor([[1, 1, 2], [1, 2, 0]]) >>> A = dglsp.spmatrix(indices, shape=(3, 4)) >>> A.cuda() SparseMatrix(indices=tensor([[1, 1, 2], [1, 2, 0]], device='cuda:0'), values=tensor([1., 1., 1.], device='cuda:0'), shape=(3, 4), nnz=3) """ return self.to(device="cuda") def cpu(self): """Moves the matrix to CPU. If the matrix is already on CPU, the original matrix will be returned. Returns ------- SparseMatrix The matrix on CPU Examples -------- >>> indices = torch.tensor([[1, 1, 2], [1, 2, 0]]).to("cuda") >>> A = dglsp.spmatrix(indices, shape=(3, 4)) >>> A.cpu() SparseMatrix(indices=tensor([[1, 1, 2], [1, 2, 0]]), values=tensor([1., 1., 1.]), shape=(3, 4), nnz=3) """ return self.to(device="cpu") def float(self): """Converts the matrix values to float32 data type. If the matrix already uses float data type, the original matrix will be returned. Returns ------- SparseMatrix The matrix with float values Examples -------- >>> indices = torch.tensor([[1, 1, 2], [1, 2, 0]]) >>> val = torch.ones(len(row)).long() >>> A = dglsp.spmatrix(indices, val, shape=(3, 4)) >>> A.float() SparseMatrix(indices=tensor([[1, 1, 2], [1, 2, 0]]), values=tensor([1., 1., 1.]), shape=(3, 4), nnz=3) """ return self.to(dtype=torch.float) def double(self): """Converts the matrix values to double data type. If the matrix already uses double data type, the original matrix will be returned. Returns ------- SparseMatrix The matrix with double values Examples -------- >>> indices = torch.tensor([[1, 1, 2], [1, 2, 0]]) >>> A = dglsp.spmatrix(indices, shape=(3, 4)) >>> A.double() SparseMatrix(indices=tensor([[1, 1, 2], [1, 2, 0]]), values=tensor([1., 1., 1.], dtype=torch.float64), shape=(3, 4), nnz=3) """ return self.to(dtype=torch.double) def int(self): """Converts the matrix values to int32 data type. If the matrix already uses int data type, the original matrix will be returned. Returns ------- DiagMatrix The matrix with int values Examples -------- >>> indices = torch.tensor([[1, 1, 2], [1, 2, 0]]) >>> A = dglsp.spmatrix(indices, shape=(3, 4)) >>> A.int() SparseMatrix(indices=tensor([[1, 1, 2], [1, 2, 0]]), values=tensor([1, 1, 1], dtype=torch.int32), shape=(3, 4), nnz=3) """ return self.to(dtype=torch.int) def long(self): """Converts the matrix values to long data type. If the matrix already uses long data type, the original matrix will be returned. Returns ------- DiagMatrix The matrix with long values Examples -------- >>> indices = torch.tensor([[1, 1, 2], [1, 2, 0]]) >>> A = dglsp.spmatrix(indices, shape=(3, 4)) >>> A.long() SparseMatrix(indices=tensor([[1, 1, 2], [1, 2, 0]]), values=tensor([1, 1, 1]), shape=(3, 4), nnz=3) """ return self.to(dtype=torch.long) def coalesce(self): """Returns a coalesced sparse matrix. A coalesced sparse matrix satisfies the following properties: - the indices of the non-zero elements are unique, - the indices are sorted in lexicographical order. The coalescing process will accumulate the non-zero elements of the same indices by summation. The function does not support autograd. Returns ------- SparseMatrix The coalesced sparse matrix Examples -------- >>> indices = torch.tensor([[1, 0, 0, 0, 1], [1, 1, 1, 2, 2]]) >>> val = torch.tensor([0, 1, 2, 3, 4]) >>> A = dglsp.spmatrix(indices, val) >>> A.coalesce() SparseMatrix(indices=tensor([[0, 0, 1, 1], [1, 2, 1, 2]]), values=tensor([3, 3, 0, 4]), shape=(2, 3), nnz=4) """ return SparseMatrix(self.c_sparse_matrix.coalesce()) def has_duplicate(self): """Returns ``True`` if the sparse matrix contains duplicate indices. Examples -------- >>> indices = torch.tensor([[1, 0, 0, 0, 1], [1, 1, 1, 2, 2]]) >>> val = torch.tensor([0, 1, 2, 3, 4]) >>> A = dglsp.spmatrix(indices, val) >>> A.has_duplicate() True >>> A.coalesce().has_duplicate() False """ return self.c_sparse_matrix.has_duplicate() def is_diag(self): """Returns whether the sparse matrix is a diagonal matrix.""" return self.c_sparse_matrix.is_diag() def index_select(self, dim: int, index: torch.Tensor): """Returns a sub-matrix selected according to the given index. Parameters ---------- dim : int The dim to select from matrix, should be 0 or 1. `dim = 0` for rowwise selection and `dim = 1` for columnwise selection. index : torch.Tensor The selection index indicates which IDs from the `dim` should be chosen from the matrix. Note that duplicated ids are allowed. The function does not support autograd. Returns ------- SparseMatrix The sub-matrix which contains selected rows or columns. Examples -------- >>> indices = torch.tensor([0, 1, 1, 2, 3, 4], [0, 2, 4, 3, 5, 0]]) >>> val = torch.tensor([0, 1, 2, 3, 4, 5]) >>> A = dglsp.spmatrix(indices, val) Case 1: Select rows by IDs. >>> row_ids = torch.tensor([0, 1, 4]) >>> A.index_select(0, row_ids) SparseMatrix(indices=tensor([[0, 1, 1, 2], [0, 2, 4, 0]]), values=tensor([0, 1, 2, 5]), shape=(3, 6), nnz=4) Case 2: Select columns by IDs. >>> column_ids = torch.tensor([0, 4, 5]) >>> A.index_select(1, column_ids) SparseMatrix(indices=tensor([[0, 4, 1, 3], [0, 0, 1, 2]]), values=tensor([0, 5, 2, 4]), shape=(5, 3), nnz=4) """ if dim not in (0, 1): raise ValueError("The selection dimension should be 0 or 1.") if isinstance(index, torch.Tensor): return SparseMatrix(self.c_sparse_matrix.index_select(dim, index)) raise TypeError(f"{type(index).__name__} is unsupported input type.") def range_select(self, dim: int, index: slice): """Returns a sub-matrix selected according to the given range index. Parameters ---------- dim : int The dim to select from matrix, should be 0 or 1. `dim = 0` for rowwise selection and `dim = 1` for columnwise selection. index : slice The selection slice indicates ID index from the `dim` should be chosen from the matrix. The function does not support autograd. Returns ------- SparseMatrix The sub-matrix which contains selected rows or columns. Examples -------- >>> indices = torch.tensor([0, 1, 1, 2, 3, 4], [0, 2, 4, 3, 5, 0]]) >>> val = torch.tensor([0, 1, 2, 3, 4, 5]) >>> A = dglsp.spmatrix(indices, val) Case 1: Select rows with given slice object. >>> A.range_select(0, slice(1, 3)) SparseMatrix(indices=tensor([[0, 0, 1], [2, 4, 3]]), values=tensor([1, 2, 3]), shape=(2, 6), nnz=3) Case 2: Select columns with given slice object. >>> A.range_select(1, slice(3, 6)) SparseMatrix(indices=tensor([[2, 1, 3], [0, 1, 2]]), values=tensor([3, 2, 4]), shape=(5, 3), nnz=3) """ if dim not in (0, 1): raise ValueError("The selection dimension should be 0 or 1.") if isinstance(index, slice): if index.step not in (None, 1): raise NotImplementedError( "Slice with step other than 1 are not supported yet." ) start = 0 if index.start is None else index.start end = index.stop return SparseMatrix( self.c_sparse_matrix.range_select(dim, start, end) ) raise TypeError(f"{type(index).__name__} is unsupported input type.") def sample( self, dim: int, fanout: int, ids: Optional[torch.Tensor] = None, replace: Optional[bool] = False, bias: Optional[bool] = False, ): """Returns a sampled matrix on the given dimension and sample arguments. Parameters ---------- dim : int The dimension for sampling, should be 0 or 1. `dim = 0` for rowwise selection and `dim = 1` for columnwise selection. fanout : int The number of elements to randomly sample on each row or column. ids : torch.Tensor, optional An optional tensor containing row or column IDs from which to sample elements. NOTE: If `ids` is not provided (i.e., `ids = None`), the function will sample from all rows or columns. replace : bool, optional Indicates whether repeated sampling of the same element is allowed. When `replace = True`, repeated sampling is permitted; when `replace = False`, it is not allowed. NOTE: If `replace = False` and there are fewer elements than `fanout`, all non-zero elements will be sampled. bias : bool, optional A boolean flag indicating whether to enable biasing during sampling. When `bias = True`, the values of the sparse matrix will be used as bias weights. The function does not support autograd. Returns ------- SparseMatrix A submatrix with the same shape as the original matrix, containing the randomly sampled non-zero elements. Examples -------- >>> indices = torch.tensor([[0, 0, 1, 1, 2, 2, 2], [0, 2, 0, 1, 0, 1, 2]]) >>> val = torch.tensor([0, 1, 2, 3, 4, 5, 6]) >>> A = dglsp.spmatrix(indices, val) Case 1: Sample rows with the given number and disable repeated sampling. >>> row_ids = torch.tensor([0, 2]) >>> A.sample(0, 2, row_ids) SparseMatrix(indices=tensor([[0, 0, 1, 1], [0, 2, 0, 2]]), values=tensor([0, 1, 4, 6]), shape=(2, 3), nnz=4) Case 2: Sample cols with the given number and disable repeated sampling. >>> col_ids = torch.tensor([0, 2]) >>> A.sample(1, 2, col_ids) SparseMatrix(indices=tensor([[0, 1, 0, 2], [0, 0, 1, 1]]), values=tensor([0, 2, 1, 6]), shape=(3, 2), nnz=4) Case 3: Sample rows with the given number and enable repeated sampling. >>> row_ids = torch.tensor([0, 1]) >>> A.sample(0, 2, row_ids, True) SparseMatrix(indices=tensor([[0, 0, 1, 1], [0, 2, 0, 0]]), values=tensor([0, 1, 2, 2]), shape=(2, 3), nnz=3) Case 4: Sample cols with the given number and enable repeated sampling. >>> col_ids = torch.tensor([0, 1]) >>> A.sample(1, 2, col_ids, True) SparseMatrix(indices=tensor([[0, 1, 1, 1], [0, 0, 1, 1]]), values=tensor([0, 2, 3, 3]), shape=(3, 2), nnz=3) """ if ids is None: dim_size = self.shape[0] if dim == 0 else self.shape[1] ids = torch.range( 0, dim_size, dtype=torch.int64, device=self.device ) return SparseMatrix( self.c_sparse_matrix.sample(dim, fanout, ids, replace, bias) ) def compact( self, dim: int, leading_indices: Optional[torch.Tensor] = None, ): """Compact sparse matrix by removing rows or columns without non-zero elements in the sparse matrix and relabeling indices of the dimension. This function serves a dual purpose: it allows you to reorganize the indices within a specific dimension (rows or columns) of the sparse matrix and, if needed, place certain 'leading_indices' at the beginning of the relabeled dimension. In the absence of 'leading_indices' (when it's set to `None`), the order of relabeled indices remains the same as the original order, except that rows or columns without non-zero elements are removed. When 'leading_indices' are provided, they are positioned at the start of the relabeled dimension. To be precise, all rows selected by the specified indices will be remapped from 0 to length(indices) - 1. Rows that are not selected and contain any non-zero elements will be positioned after those remapped rows while maintaining their original order. This function mimics 'dgl.to_block', a method used to compress a sampled subgraph by eliminating redundant nodes. The 'leading_indices' parameter replicates the behavior of 'include_dst_in_src' in 'dgl.to_block', adding destination node information for message passing. Setting 'leading_indices' to column IDs when relabeling the row dimension, for example, achieves the same effect as including destination nodes in source nodes. Parameters ---------- dim : int The dimension to relabel. Should be 0 or 1. Use `dim = 0` for rowwise relabeling and `dim = 1` for columnwise relabeling. leading_indices : torch.Tensor, optional An optional tensor containing row or column ids that should be placed at the beginning of the relabeled dimension. Returns ------- Tuple[SparseMatrix, torch.Tensor] A tuple containing the relabeled sparse matrix and the index mapping of the relabeled dimension from the new index to the original index. Examples -------- >>> indices = torch.tensor([[0, 2], [1, 2]]) >>> A = dglsp.spmatrix(indices) >>> print(A.to_dense()) tensor([[0., 1., 0.], [0., 0., 0.], [0., 0., 1.]]) Case 1: Compact rows without indices. >>> B, original_rows = A.compact(dim=0, leading_indices=None) >>> print(B.to_dense()) tensor([[0., 1., 0.], [0., 0., 1.]]) >>> print(original_rows) torch.Tensor([0, 2]) Case 2: Compact rows with indices. >>> B, original_rows = A.compact(dim=0, leading_indices=[1, 2]) >>> print(B.to_dense()) tensor([[0., 0., 0.], [0., 0., 1.], [0., 1., 0.],]) >>> print(original_rows) torch.Tensor([1, 2, 0]) """ mat, idx = torch.ops.dgl_sparse.compact( self.c_sparse_matrix, dim, leading_indices ) return SparseMatrix(mat), idx def spmatrix( indices: torch.Tensor, val: Optional[torch.Tensor] = None, shape: Optional[Tuple[int, int]] = None, ) -> SparseMatrix: r"""Creates a sparse matrix from Coordinate format indices. Parameters ---------- indices : tensor.Tensor The indices are the coordinates of the non-zero elements in the matrix, which should have shape of ``(2, N)`` where the first row is the row indices and the second row is the column indices of non-zero elements. val : tensor.Tensor, optional The values of shape ``(nnz)`` or ``(nnz, D)``. If None, it will be a tensor of shape ``(nnz)`` filled by 1. shape : tuple[int, int], optional If not specified, it will be inferred from :attr:`row` and :attr:`col`, i.e., ``(row.max() + 1, col.max() + 1)``. Otherwise, :attr:`shape` should be no smaller than this. Returns ------- SparseMatrix Sparse matrix Examples -------- Case1: Sparse matrix with row and column indices without values. >>> indices = torch.tensor([[1, 1, 2], [2, 4, 3]]) >>> A = dglsp.spmatrix(indices) SparseMatrix(indices=tensor([[1, 1, 2], [2, 4, 3]]), values=tensor([1., 1., 1.]), shape=(3, 5), nnz=3) >>> # Specify shape >>> A = dglsp.spmatrix(indices, shape=(5, 5)) SparseMatrix(indices=tensor([[1, 1, 2], [2, 4, 3]]), values=tensor([1., 1., 1.]), shape=(5, 5), nnz=3) Case2: Sparse matrix with scalar values. >>> indices = torch.tensor([[1, 1, 2], [2, 4, 3]]) >>> val = torch.tensor([[1.], [2.], [3.]]) >>> A = dglsp.spmatrix(indices, val) SparseMatrix(indices=tensor([[1, 1, 2], [2, 4, 3]]), values=tensor([[1.], [2.], [3.]]), shape=(3, 5), nnz=3, val_size=(1,)) Case3: Sparse matrix with vector values. >>> indices = torch.tensor([[1, 1, 2], [2, 4, 3]]) >>> val = torch.tensor([[1., 1.], [2., 2.], [3., 3.]]) >>> A = dglsp.spmatrix(indices, val) SparseMatrix(indices=tensor([[1, 1, 2], [2, 4, 3]]), values=tensor([[1., 1.], [2., 2.], [3., 3.]]), shape=(3, 5), nnz=3, val_size=(2,)) """ if shape is None: shape = ( torch.max(indices[0]).item() + 1, torch.max(indices[1]).item() + 1, ) if val is None: val = torch.ones(indices.shape[1]).to(indices.device) assert ( val.dim() <= 2 ), "The values of a SparseMatrix can only be scalars or vectors." return SparseMatrix(torch.ops.dgl_sparse.from_coo(indices, val, shape)) def from_coo( row: torch.Tensor, col: torch.Tensor, val: Optional[torch.Tensor] = None, shape: Optional[Tuple[int, int]] = None, ) -> SparseMatrix: r"""Creates a sparse matrix from a coordinate list (COO), which stores a list of (row, column, value) tuples. See `COO in Wikipedia `_. Parameters ---------- row : torch.Tensor The row indices of shape ``(nnz)`` col : torch.Tensor The column indices of shape ``(nnz)`` val : torch.Tensor, optional The values of shape ``(nnz)`` or ``(nnz, D)``. If None, it will be a tensor of shape ``(nnz)`` filled by 1. shape : tuple[int, int], optional If not specified, it will be inferred from :attr:`row` and :attr:`col`, i.e., ``(row.max() + 1, col.max() + 1)``. Otherwise, :attr:`shape` should be no smaller than this. Returns ------- SparseMatrix Sparse matrix Examples -------- Case1: Sparse matrix with row and column indices without values. >>> dst = torch.tensor([1, 1, 2]) >>> src = torch.tensor([2, 4, 3]) >>> A = dglsp.from_coo(dst, src) SparseMatrix(indices=tensor([[1, 1, 2], [2, 4, 3]]), values=tensor([1., 1., 1.]), shape=(3, 5), nnz=3) >>> # Specify shape >>> A = dglsp.from_coo(dst, src, shape=(5, 5)) SparseMatrix(indices=tensor([[1, 1, 2], [2, 4, 3]]), values=tensor([1., 1., 1.]), shape=(5, 5), nnz=3) Case2: Sparse matrix with scalar values. >>> indices = torch.tensor([[1, 1, 2], [2, 4, 3]]) >>> val = torch.tensor([[1.], [2.], [3.]]) >>> A = dglsp.spmatrix(indices, val) SparseMatrix(indices=tensor([[1, 1, 2], [2, 4, 3]]), values=tensor([[1.], [2.], [3.]]), shape=(3, 5), nnz=3, val_size=(1,)) Case3: Sparse matrix with vector values. >>> dst = torch.tensor([1, 1, 2]) >>> src = torch.tensor([2, 4, 3]) >>> val = torch.tensor([[1., 1.], [2., 2.], [3., 3.]]) >>> A = dglsp.from_coo(dst, src, val) SparseMatrix(indices=tensor([[1, 1, 2], [2, 4, 3]]), values=tensor([[1., 1.], [2., 2.], [3., 3.]]), shape=(3, 5), nnz=3, val_size=(2,)) """ assert row.shape[0] == col.shape[0] return spmatrix(torch.stack([row, col]), val, shape) def from_csr( indptr: torch.Tensor, indices: torch.Tensor, val: Optional[torch.Tensor] = None, shape: Optional[Tuple[int, int]] = None, ) -> SparseMatrix: r"""Creates a sparse matrix from compress sparse row (CSR) format. See `CSR in Wikipedia `_. For row i of the sparse matrix - the column indices of the non-zero elements are stored in ``indices[indptr[i]: indptr[i+1]]`` - the corresponding values are stored in ``val[indptr[i]: indptr[i+1]]`` Parameters ---------- indptr : torch.Tensor Pointer to the column indices of shape ``(N + 1)``, where ``N`` is the number of rows indices : torch.Tensor The column indices of shape ``(nnz)`` val : torch.Tensor, optional The values of shape ``(nnz)`` or ``(nnz, D)``. If None, it will be a tensor of shape ``(nnz)`` filled by 1. shape : tuple[int, int], optional If not specified, it will be inferred from :attr:`indptr` and :attr:`indices`, i.e., ``(len(indptr) - 1, indices.max() + 1)``. Otherwise, :attr:`shape` should be no smaller than this. Returns ------- SparseMatrix Sparse matrix Examples -------- Case1: Sparse matrix without values .. code:: [[0, 1, 0], [0, 0, 1], [1, 1, 1]] >>> indptr = torch.tensor([0, 1, 2, 5]) >>> indices = torch.tensor([1, 2, 0, 1, 2]) >>> A = dglsp.from_csr(indptr, indices) SparseMatrix(indices=tensor([[0, 1, 2, 2, 2], [1, 2, 0, 1, 2]]), values=tensor([1., 1., 1., 1., 1.]), shape=(3, 3), nnz=5) >>> # Specify shape >>> A = dglsp.from_csr(indptr, indices, shape=(3, 5)) SparseMatrix(indices=tensor([[0, 1, 2, 2, 2], [1, 2, 0, 1, 2]]), values=tensor([1., 1., 1., 1., 1.]), shape=(3, 5), nnz=5) Case2: Sparse matrix with scalar/vector values. Following example is with vector data. >>> indptr = torch.tensor([0, 1, 2, 5]) >>> indices = torch.tensor([1, 2, 0, 1, 2]) >>> val = torch.tensor([[1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]) >>> A = dglsp.from_csr(indptr, indices, val) SparseMatrix(indices=tensor([[0, 1, 2, 2, 2], [1, 2, 0, 1, 2]]), values=tensor([[1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]), shape=(3, 3), nnz=5, val_size=(2,)) """ if shape is None: shape = (indptr.shape[0] - 1, torch.max(indices) + 1) if val is None: val = torch.ones(indices.shape[0]).to(indptr.device) assert ( val.dim() <= 2 ), "The values of a SparseMatrix can only be scalars or vectors." return SparseMatrix( torch.ops.dgl_sparse.from_csr(indptr, indices, val, shape) ) def from_csc( indptr: torch.Tensor, indices: torch.Tensor, val: Optional[torch.Tensor] = None, shape: Optional[Tuple[int, int]] = None, ) -> SparseMatrix: r"""Creates a sparse matrix from compress sparse column (CSC) format. See `CSC in Wikipedia `_. For column i of the sparse matrix - the row indices of the non-zero elements are stored in ``indices[indptr[i]: indptr[i+1]]`` - the corresponding values are stored in ``val[indptr[i]: indptr[i+1]]`` Parameters ---------- indptr : torch.Tensor Pointer to the row indices of shape N + 1, where N is the number of columns indices : torch.Tensor The row indices of shape nnz val : torch.Tensor, optional The values of shape ``(nnz)`` or ``(nnz, D)``. If None, it will be a tensor of shape ``(nnz)`` filled by 1. shape : tuple[int, int], optional If not specified, it will be inferred from :attr:`indptr` and :attr:`indices`, i.e., ``(indices.max() + 1, len(indptr) - 1)``. Otherwise, :attr:`shape` should be no smaller than this. Returns ------- SparseMatrix Sparse matrix Examples -------- Case1: Sparse matrix without values .. code:: [[0, 1, 0], [0, 0, 1], [1, 1, 1]] >>> indptr = torch.tensor([0, 1, 3, 5]) >>> indices = torch.tensor([2, 0, 2, 1, 2]) >>> A = dglsp.from_csc(indptr, indices) SparseMatrix(indices=tensor([[2, 0, 2, 1, 2], [0, 1, 1, 2, 2]]), values=tensor([1., 1., 1., 1., 1.]), shape=(3, 3), nnz=5) >>> # Specify shape >>> A = dglsp.from_csc(indptr, indices, shape=(5, 3)) SparseMatrix(indices=tensor([[2, 0, 2, 1, 2], [0, 1, 1, 2, 2]]), values=tensor([1., 1., 1., 1., 1.]), shape=(5, 3), nnz=5) Case2: Sparse matrix with scalar/vector values. Following example is with vector data. >>> indptr = torch.tensor([0, 1, 3, 5]) >>> indices = torch.tensor([2, 0, 2, 1, 2]) >>> val = torch.tensor([[1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]) >>> A = dglsp.from_csc(indptr, indices, val) SparseMatrix(indices=tensor([[2, 0, 2, 1, 2], [0, 1, 1, 2, 2]]), values=tensor([[1, 1], [2, 2], [3, 3], [4, 4], [5, 5]]), shape=(3, 3), nnz=5, val_size=(2,)) """ if shape is None: shape = (torch.max(indices) + 1, indptr.shape[0] - 1) if val is None: val = torch.ones(indices.shape[0]).to(indptr.device) assert ( val.dim() <= 2 ), "The values of a SparseMatrix can only be scalars or vectors." return SparseMatrix( torch.ops.dgl_sparse.from_csc(indptr, indices, val, shape) ) def val_like(mat: SparseMatrix, val: torch.Tensor) -> SparseMatrix: """Creates a sparse matrix from an existing sparse matrix using new values. The new sparse matrix will have the same non-zero indices as the given sparse matrix and use the given values as the new non-zero values. Parameters ---------- mat : SparseMatrix An existing sparse matrix with non-zero values val : torch.Tensor The new values of the non-zero elements, a tensor of shape ``(nnz)`` or ``(nnz, D)`` Returns ------- SparseMatrix New sparse matrix Examples -------- >>> indices = torch.tensor([[1, 1, 2], [2, 4, 3]]) >>> val = torch.ones(3) >>> A = dglsp.spmatrix(indices, val) >>> A = dglsp.val_like(A, torch.tensor([2, 2, 2])) SparseMatrix(indices=tensor([[1, 1, 2], [2, 4, 3]]), values=tensor([2, 2, 2]), shape=(3, 5), nnz=3) """ assert ( val.dim() <= 2 ), "The values of a SparseMatrix can only be scalars or vectors." return SparseMatrix(torch.ops.dgl_sparse.val_like(mat.c_sparse_matrix, val)) def diag( val: torch.Tensor, shape: Optional[Tuple[int, int]] = None ) -> SparseMatrix: """Creates a sparse matrix based on the diagonal values. Parameters ---------- val : torch.Tensor Diagonal of the matrix, in shape ``(N)`` or ``(N, D)`` shape : tuple[int, int], optional If specified, :attr:`len(val)` must be equal to :attr:`min(shape)`, otherwise, it will be inferred from :attr:`val`, i.e., ``(N, N)`` Returns ------- SparseMatrix Sparse matrix Examples -------- Case1: 5-by-5 diagonal matrix with scaler values on the diagonal >>> import torch >>> val = torch.ones(5) >>> dglsp.diag(val) SparseMatrix(indices=tensor([[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]), values=tensor([1., 1., 1., 1., 1.]), shape=(5, 5), nnz=5) Case2: 5-by-10 diagonal matrix with scaler values on the diagonal >>> val = torch.ones(5) >>> dglsp.diag(val, shape=(5, 10)) SparseMatrix(indices=tensor([[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]), values=tensor([1., 1., 1., 1., 1.]), shape=(5, 10), nnz=5) Case3: 5-by-5 diagonal matrix with vector values on the diagonal >>> val = torch.randn(5, 3) >>> D = dglsp.diag(val) >>> D.shape (5, 5) >>> D.nnz 5 """ assert ( val.dim() <= 2 ), "The values of a DiagMatrix can only be scalars or vectors." len_val = len(val) if shape is not None: assert len_val == min(shape), ( f"Expect len(val) to be min(shape) for a diagonal matrix, got" f"{len_val} for len(val) and {shape} for shape." ) else: shape = (len_val, len_val) return SparseMatrix(torch.ops.dgl_sparse.from_diag(val, shape)) def identity( shape: Tuple[int, int], d: Optional[int] = None, dtype: Optional[torch.dtype] = None, device: Optional[torch.device] = None, ) -> SparseMatrix: r"""Creates a sparse matrix with ones on the diagonal and zeros elsewhere. Parameters ---------- shape : tuple[int, int] Shape of the matrix. d : int, optional If None, the diagonal entries will be scaler 1. Otherwise, the diagonal entries will be a 1-valued tensor of shape ``(d)``. dtype : torch.dtype, optional The data type of the matrix device : torch.device, optional The device of the matrix Returns ------- SparseMatrix Sparse matrix Examples -------- Case1: 3-by-3 matrix with scaler diagonal values .. code:: [[1, 0, 0], [0, 1, 0], [0, 0, 1]] >>> dglsp.identity(shape=(3, 3)) SparseMatrix(indices=tensor([[0, 1, 2], [0, 1, 2]]), values=tensor([1., 1., 1.]), shape=(3, 3), nnz=3) Case2: 3-by-5 matrix with scaler diagonal values .. code:: [[1, 0, 0, 0, 0], [0, 1, 0, 0, 0], [0, 0, 1, 0, 0]] >>> dglsp.identity(shape=(3, 5)) SparseMatrix(indices=tensor([[0, 1, 2], [0, 1, 2]]), values=tensor([1., 1., 1.]), shape=(3, 5), nnz=3) Case3: 3-by-3 matrix with vector diagonal values >>> dglsp.identity(shape=(3, 3), d=2) SparseMatrix(indices=tensor([[0, 1, 2], [0, 1, 2]]), values=tensor([[1., 1.], [1., 1.], [1., 1.]]), shape=(3, 3), nnz=3, val_size=(2,)) """ len_val = min(shape) if d is None: val_shape = (len_val,) else: val_shape = (len_val, d) val = torch.ones(val_shape, dtype=dtype, device=device) return diag(val, shape) def from_torch_sparse(torch_sparse_tensor: torch.Tensor) -> SparseMatrix: """Creates a sparse matrix from a torch sparse tensor, which can have coo, csr, or csc layout. Parameters ---------- torch_sparse_tensor : torch.Tensor Torch sparse tensor Returns ------- SparseMatrix Sparse matrix Examples -------- >>> indices = torch.tensor([[1, 1, 2], [2, 4, 3]]) >>> val = torch.ones(3) >>> torch_coo = torch.sparse_coo_tensor(indices, val) >>> dglsp.from_torch_sparse(torch_coo) SparseMatrix(indices=tensor([[1, 1, 2], [2, 4, 3]]), values=tensor([1., 1., 1.]), shape=(3, 5), nnz=3) """ assert torch_sparse_tensor.layout in ( torch.sparse_coo, torch.sparse_csr, torch.sparse_csc, ), ( f"Cannot convert Pytorch sparse tensor with layout " f"{torch_sparse_tensor.layout} to DGL sparse." ) if torch_sparse_tensor.layout == torch.sparse_coo: # Use ._indices() and ._values() to access uncoalesced indices and # values. return spmatrix( torch_sparse_tensor._indices(), torch_sparse_tensor._values(), torch_sparse_tensor.shape[:2], ) elif torch_sparse_tensor.layout == torch.sparse_csr: return from_csr( torch_sparse_tensor.crow_indices(), torch_sparse_tensor.col_indices(), torch_sparse_tensor.values(), torch_sparse_tensor.shape[:2], ) else: return from_csc( torch_sparse_tensor.ccol_indices(), torch_sparse_tensor.row_indices(), torch_sparse_tensor.values(), torch_sparse_tensor.shape[:2], ) def to_torch_sparse_coo(spmat: SparseMatrix) -> torch.Tensor: """Creates a torch sparse coo tensor from a sparse matrix. Parameters ---------- spmat : SparseMatrix Sparse matrix Returns ------- torch.Tensor torch tensor with torch.sparse_coo layout Examples -------- >>> indices = torch.tensor([[1, 1, 2], [2, 4, 3]]) >>> val = torch.ones(3) >>> spmat = dglsp.spmatrix(indices, val) >>> dglsp.to_torch_sparse_coo(spmat) tensor(indices=tensor([[1, 1, 2], [2, 4, 3]]), values=tensor([1., 1., 1.]), size=(3, 5), nnz=3, layout=torch.sparse_coo) """ shape = spmat.shape if spmat.val.dim() > 1: shape += spmat.val.shape[1:] return torch.sparse_coo_tensor(spmat.indices(), spmat.val, shape) def to_torch_sparse_csr(spmat: SparseMatrix) -> torch.Tensor: """Creates a torch sparse csr tensor from a sparse matrix. Note that converting a sparse matrix to torch csr tensor could change the order of non-zero values. Parameters ---------- spmat : SparseMatrix Sparse matrix Returns ------- torch.Tensor Torch tensor with torch.sparse_csr layout Examples -------- >>> indices = torch.tensor([[1, 2, 1], [2, 4, 3]]) >>> val = torch.arange(3) >>> spmat = dglsp.spmatrix(indices, val) >>> dglsp.to_torch_sparse_csr(spmat) tensor(crow_indices=tensor([0, 0, 2, 3]), col_indices=tensor([2, 3, 4]), values=tensor([0, 2, 1]), size=(3, 5), nnz=3, layout=torch.sparse_csr) """ shape = spmat.shape if spmat.val.dim() > 1: shape += spmat.val.shape[1:] indptr, indices, value_indices = spmat.csr() val = spmat.val if value_indices is not None: val = val[value_indices] return torch.sparse_csr_tensor(indptr, indices, val, shape) def to_torch_sparse_csc(spmat: SparseMatrix) -> torch.Tensor: """Creates a torch sparse csc tensor from a sparse matrix. Note that converting a sparse matrix to torch csc tensor could change the order of non-zero values. Parameters ---------- spmat : SparseMatrix Sparse matrix Returns ------- torch.Tensor Torch tensor with torch.sparse_csc layout Examples -------- >>> indices = torch.tensor([[1, 2, 1], [2, 4, 3]]) >>> val = torch.arange(3) >>> spmat = dglsp.spmatrix(indices, val) >>> dglsp.to_torch_sparse_csc(spmat) tensor(ccol_indices=tensor([0, 0, 0, 1, 2, 3]), row_indices=tensor([1, 1, 2]), values=tensor([0, 2, 1]), size=(3, 5), nnz=3, layout=torch.sparse_csc) """ shape = spmat.shape if spmat.val.dim() > 1: shape += spmat.val.shape[1:] indptr, indices, value_indices = spmat.csc() val = spmat.val if value_indices is not None: val = val[value_indices] return torch.sparse_csc_tensor(indptr, indices, val, shape) def _sparse_matrix_str(spmat: SparseMatrix) -> str: """Internal function for converting a sparse matrix to string representation. """ indices_str = str(torch.stack(spmat.coo())) values_str = str(spmat.val) meta_str = f"shape={spmat.shape}, nnz={spmat.nnz}" if spmat.val.dim() > 1: val_size = tuple(spmat.val.shape[1:]) meta_str += f", val_size={val_size}" prefix = f"{type(spmat).__name__}(" def _add_indent(_str, indent): lines = _str.split("\n") lines = [lines[0]] + [" " * indent + line for line in lines[1:]] return "\n".join(lines) final_str = ( "indices=" + _add_indent(indices_str, len("indices=")) + ",\n" + "values=" + _add_indent(values_str, len("values=")) + ",\n" + meta_str + ")" ) final_str = prefix + _add_indent(final_str, len(prefix)) return final_str ================================================ FILE: python/dgl/sparse/unary_op.py ================================================ """DGL unary operators for sparse matrix module.""" from .sparse_matrix import diag, SparseMatrix, val_like def neg(A: SparseMatrix) -> SparseMatrix: """Returns a new sparse matrix with the negation of the original nonzero values, equivalent to ``-A``. Returns ------- SparseMatrix Negation of the sparse matrix Examples -------- >>> indices = torch.tensor([[1, 1, 3], [1, 2, 3]]) >>> val = torch.tensor([1., 1., 2.]) >>> A = dglsp.spmatrix(indices, val) >>> A = -A SparseMatrix(indices=tensor([[1, 1, 3], [1, 2, 3]]), values=tensor([-1., -1., -2.]), shape=(4, 4), nnz=3) """ return val_like(A, -A.val) def inv(A: SparseMatrix) -> SparseMatrix: """Returns the inverse of the sparse matrix. This function only supports square diagonal matrices with scalar nonzero values. Returns ------- SparseMatrix Inverse of the sparse matrix Examples -------- >>> val = torch.arange(1, 4).float() >>> D = dglsp.diag(val) >>> D.inv() SparseMatrix(indices=tensor([[0, 1, 2], [0, 1, 2]]), values=tensor([1., 2., 3.]), shape=(3, 3), nnz=3) """ num_rows, num_cols = A.shape assert A.is_diag(), "Non-diagonal sparse matrix does not support inversion." assert num_rows == num_cols, f"Expect a square matrix, got shape {A.shape}" assert len(A.val.shape) == 1, "inv only supports 1D nonzero val" return diag(1.0 / A.val, A.shape) SparseMatrix.neg = neg SparseMatrix.__neg__ = neg SparseMatrix.inv = inv ================================================ FILE: python/dgl/sparse/utils.py ================================================ """Utilities for DGL sparse module.""" from numbers import Number from typing import Union import torch def is_scalar(x): """Check if the input is a scalar.""" return isinstance(x, Number) or (torch.is_tensor(x) and x.dim() == 0) # Scalar type annotation Scalar = Union[Number, torch.Tensor] ================================================ FILE: python/dgl/storages/__init__.py ================================================ """Feature storage classes for DataLoading""" from .. import backend as F from .base import * from .numpy import * # Defines the name TensorStorage if F.get_preferred_backend() == "pytorch": from .pytorch_tensor import PyTorchTensorStorage as TensorStorage else: from .tensor import BaseTensorStorage as TensorStorage ================================================ FILE: python/dgl/storages/base.py ================================================ """Base classes and functionalities for feature storages.""" import threading STORAGE_WRAPPERS = {} def register_storage_wrapper(type_): """Decorator that associates a type to a ``FeatureStorage`` object.""" def deco(cls): STORAGE_WRAPPERS[type_] = cls return cls return deco def wrap_storage(storage): """Wrap an object into a FeatureStorage as specified by the ``register_storage_wrapper`` decorators. """ for type_, storage_cls in STORAGE_WRAPPERS.items(): if isinstance(storage, type_): return storage_cls(storage) assert isinstance( storage, FeatureStorage ), "The frame column must be a tensor or a FeatureStorage object, got {}".format( type(storage) ) return storage class _FuncWrapper(object): def __init__(self, func): self.func = func def __call__(self, buf, *args): buf[0] = self.func(*args) class ThreadedFuture(object): """Wraps a function into a future asynchronously executed by a Python ``threading.Thread`. The function is being executed upon instantiation of this object. """ def __init__(self, target, args): self.buf = [None] thread = threading.Thread( target=_FuncWrapper(target), args=[self.buf] + list(args), daemon=True, ) thread.start() self.thread = thread def wait(self): """Blocks the current thread until the result becomes available and returns it.""" self.thread.join() return self.buf[0] class FeatureStorage(object): """Feature storage object which should support a fetch() operation. It is the counterpart of a tensor for homogeneous graphs, or a dict of tensor for heterogeneous graphs where the keys are node/edge types. """ def requires_ddp(self): """Whether the FeatureStorage requires the DataLoader to set use_ddp.""" return False def fetch(self, indices, device, pin_memory=False, **kwargs): """Retrieve the features at the given indices. If :attr:`indices` is a tensor, this is equivalent to .. code:: storage[indices] If :attr:`indices` is a dict of tensor, this is equivalent to .. code:: {k: storage[k][indices[k]] for k in indices.keys()} The subclasses can choose to utilize or ignore the flag :attr:`pin_memory` depending on the underlying framework. """ raise NotImplementedError ================================================ FILE: python/dgl/storages/numpy.py ================================================ """Feature storage for ``numpy.memmap`` object.""" import numpy as np from .. import backend as F from .base import FeatureStorage, register_storage_wrapper, ThreadedFuture @register_storage_wrapper(np.memmap) class NumpyStorage(FeatureStorage): """FeatureStorage that asynchronously reads features from a ``numpy.memmap`` object.""" def __init__(self, arr): self.arr = arr # pylint: disable=unused-argument def _fetch(self, indices, device, pin_memory=False): result = F.zerocopy_from_numpy(self.arr[indices]) result = F.copy_to(result, device) return result # pylint: disable=unused-argument def fetch(self, indices, device, pin_memory=False, **kwargs): return ThreadedFuture( target=self._fetch, args=(indices, device, pin_memory) ) ================================================ FILE: python/dgl/storages/pytorch_tensor.py ================================================ """Feature storages for PyTorch tensors.""" import torch from ..utils import gather_pinned_tensor_rows from .base import register_storage_wrapper from .tensor import BaseTensorStorage def _fetch_cpu(indices, tensor, feature_shape, device, pin_memory, **kwargs): result = torch.empty( indices.shape[0], *feature_shape, dtype=tensor.dtype, pin_memory=pin_memory, ) torch.index_select(tensor, 0, indices, out=result) kwargs["non_blocking"] = pin_memory result = result.to(device, **kwargs) return result def _fetch_cuda(indices, tensor, device, **kwargs): return torch.index_select(tensor, 0, indices).to(device, **kwargs) @register_storage_wrapper(torch.Tensor) class PyTorchTensorStorage(BaseTensorStorage): """Feature storages for slicing a PyTorch tensor.""" def fetch(self, indices, device, pin_memory=False, **kwargs): device = torch.device(device) storage_device_type = self.storage.device.type indices_device_type = indices.device.type if storage_device_type != "cuda": if indices_device_type == "cuda": if self.storage.is_pinned(): return gather_pinned_tensor_rows(self.storage, indices) else: raise ValueError( f"Got indices on device {indices.device} whereas the feature tensor " f"is on {self.storage.device}. Please either (1) move the graph " f"to GPU with to() method, or (2) pin the graph with " f"pin_memory_() method." ) # CPU to CPU or CUDA - use pin_memory and async transfer if possible else: return _fetch_cpu( indices, self.storage, self.storage.shape[1:], device, pin_memory, **kwargs, ) else: # CUDA to CUDA or CPU return _fetch_cuda(indices, self.storage, device, **kwargs) ================================================ FILE: python/dgl/storages/tensor.py ================================================ """Feature storages for tensors across different frameworks.""" from .. import backend as F from .base import FeatureStorage class BaseTensorStorage(FeatureStorage): """FeatureStorage that synchronously slices features from a tensor and transfers it to the given device. """ def __init__(self, tensor): self.storage = tensor def fetch( self, indices, device, pin_memory=False, **kwargs ): # pylint: disable=unused-argument return F.copy_to(F.gather_row(self.storage, indices), device, **kwargs) ================================================ FILE: python/dgl/subgraph.py ================================================ """Functions for extracting subgraphs. The module only contains functions for extracting subgraphs deterministically. For stochastic subgraph extraction, please see functions under :mod:`dgl.sampling`. """ from collections.abc import Mapping from . import backend as F, graph_index, heterograph_index, utils from ._ffi.function import _init_api from .base import DGLError from .heterograph import DGLGraph from .utils import context_of, recursive_apply __all__ = [ "node_subgraph", "edge_subgraph", "node_type_subgraph", "edge_type_subgraph", "in_subgraph", "out_subgraph", "khop_in_subgraph", "khop_out_subgraph", ] def node_subgraph( graph, nodes, *, relabel_nodes=True, store_ids=True, output_device=None ): """Return a subgraph induced on the given nodes. A node-induced subgraph is a graph with edges whose endpoints are both in the specified node set. In addition to extracting the subgraph, DGL also copies the features of the extracted nodes and edges to the resulting graph. The copy is *lazy* and incurs data movement only when needed. If the graph is heterogeneous, DGL extracts a subgraph per relation and composes them as the resulting graph. Thus, the resulting graph has the same set of relations as the input one. Parameters ---------- graph : DGLGraph The graph to extract subgraphs from. nodes : nodes or dict[str, nodes] The nodes to form the subgraph, which cannot have any duplicate value. The result will be undefined otherwise. The allowed nodes formats are: * Int Tensor: Each element is a node ID. The tensor must have the same device type and ID data type as the graph's. * iterable[int]: Each element is a node ID. * Bool Tensor: Each :math:`i^{th}` element is a bool flag indicating whether node :math:`i` is in the subgraph. If the graph is homogeneous, one can directly pass the above formats. Otherwise, the argument must be a dictionary with keys being node types and values being the node IDs in the above formats. relabel_nodes : bool, optional If True, the extracted subgraph will only have the nodes in the specified node set and it will relabel the nodes in order. store_ids : bool, optional If True, it will store the raw IDs of the extracted edges in the ``edata`` of the resulting graph under name ``dgl.EID``; if ``relabel_nodes`` is ``True``, it will also store the raw IDs of the specified nodes in the ``ndata`` of the resulting graph under name ``dgl.NID``. output_device : Framework-specific device context object, optional The output device. Default is the same as the input graph. Returns ------- G : DGLGraph The subgraph. Notes ----- This function discards the batch information. Please use :func:`dgl.DGLGraph.set_batch_num_nodes` and :func:`dgl.DGLGraph.set_batch_num_edges` on the transformed graph to maintain the information. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Extract a subgraph from a homogeneous graph. >>> g = dgl.graph(([0, 1, 2, 3, 4], [1, 2, 3, 4, 0])) # 5-node cycle >>> sg = dgl.node_subgraph(g, [0, 1, 4]) >>> sg Graph(num_nodes=3, num_edges=2, ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)} edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}) >>> sg.edges() (tensor([0, 2]), tensor([1, 0])) >>> sg.ndata[dgl.NID] # original node IDs tensor([0, 1, 4]) >>> sg.edata[dgl.EID] # original edge IDs tensor([0, 4]) Specify nodes using a boolean mask. >>> nodes = torch.tensor([True, True, False, False, True]) # choose nodes [0, 1, 4] >>> dgl.node_subgraph(g, nodes) Graph(num_nodes=3, num_edges=2, ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)} edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}) The resulting subgraph also copies features from the parent graph. >>> g.ndata['x'] = torch.arange(10).view(5, 2) >>> sg = dgl.node_subgraph(g, [0, 1, 4]) >>> sg Graph(num_nodes=3, num_edges=2, ndata_schemes={'x': Scheme(shape=(2,), dtype=torch.int64), '_ID': Scheme(shape=(), dtype=torch.int64)} edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}) >>> sg.ndata['x'] tensor([[0, 1], [2, 3], [8, 9]]) Extract a subgraph from a hetergeneous graph. >>> g = dgl.heterograph({ >>> ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]), >>> ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2]) >>> }) >>> sub_g = dgl.node_subgraph(g, {'user': [1, 2]}) >>> sub_g Graph(num_nodes={'game': 0, 'user': 2}, num_edges={('user', 'follows', 'user'): 2, ('user', 'plays', 'game'): 0}, metagraph=[('user', 'user', 'follows'), ('user', 'game', 'plays')]) See Also -------- edge_subgraph """ if graph.is_block: raise DGLError("Extracting subgraph from a block graph is not allowed.") if not isinstance(nodes, Mapping): assert ( len(graph.ntypes) == 1 ), "need a dict of node type and IDs for graph with multiple node types" nodes = {graph.ntypes[0]: nodes} def _process_nodes(ntype, v): if F.is_tensor(v) and F.dtype(v) == F.bool: return F.astype( F.nonzero_1d(F.copy_to(v, graph.device)), graph.idtype ) else: return utils.prepare_tensor(graph, v, 'nodes["{}"]'.format(ntype)) nodes = {ntype: _process_nodes(ntype, v) for ntype, v in nodes.items()} device = context_of(nodes) induced_nodes = [ nodes.get(ntype, F.copy_to(F.tensor([], graph.idtype), device)) for ntype in graph.ntypes ] sgi = graph._graph.node_subgraph(induced_nodes) induced_edges = sgi.induced_edges if not relabel_nodes: sgi = graph._graph.edge_subgraph(induced_edges, True) # (BarclayII) should not write induced_nodes = sgi.induced_nodes due to the same # bug in #1453. induced_nodes_or_device = induced_nodes if relabel_nodes else device subg = _create_hetero_subgraph( graph, sgi, induced_nodes_or_device, induced_edges, store_ids=store_ids ) return subg if output_device is None else subg.to(output_device) DGLGraph.subgraph = utils.alias_func(node_subgraph) def edge_subgraph( graph, edges, *, relabel_nodes=True, store_ids=True, output_device=None ): """Return a subgraph induced on the given edges. An edge-induced subgraph is equivalent to creating a new graph using the given edges. In addition to extracting the subgraph, DGL also copies the features of the extracted nodes and edges to the resulting graph. The copy is *lazy* and incurs data movement only when needed. If the graph is heterogeneous, DGL extracts a subgraph per relation and composes them as the resulting graph. Thus, the resulting graph has the same set of relations as the input one. Parameters ---------- graph : DGLGraph The graph to extract the subgraph from. edges : edges or dict[(str, str, str), edges] The edges to form the subgraph. The allowed edges formats are: * Int Tensor: Each element is an edge ID. The tensor must have the same device type and ID data type as the graph's. * iterable[int]: Each element is an edge ID. * Bool Tensor: Each :math:`i^{th}` element is a bool flag indicating whether edge :math:`i` is in the subgraph. If the graph is homogeneous, one can directly pass the above formats. Otherwise, the argument must be a dictionary with keys being edge types and values being the edge IDs in the above formats. relabel_nodes : bool, optional If True, it will remove the isolated nodes and relabel the incident nodes in the extracted subgraph. store_ids : bool, optional If True, it will store the raw IDs of the extracted edges in the ``edata`` of the resulting graph under name ``dgl.EID``; if ``relabel_nodes`` is ``True``, it will also store the raw IDs of the incident nodes in the ``ndata`` of the resulting graph under name ``dgl.NID``. output_device : Framework-specific device context object, optional The output device. Default is the same as the input graph. Returns ------- G : DGLGraph The subgraph. Notes ----- This function discards the batch information. Please use :func:`dgl.DGLGraph.set_batch_num_nodes` and :func:`dgl.DGLGraph.set_batch_num_edges` on the transformed graph to maintain the information. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Extract a subgraph from a homogeneous graph. >>> g = dgl.graph(([0, 1, 2, 3, 4], [1, 2, 3, 4, 0])) # 5-node cycle >>> sg = dgl.edge_subgraph(g, [0, 4]) >>> sg Graph(num_nodes=3, num_edges=2, ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)} edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}) >>> sg.edges() (tensor([0, 1]), tensor([2, 0])) >>> sg.ndata[dgl.NID] # original node IDs tensor([0, 4, 1]) >>> sg.edata[dgl.EID] # original edge IDs tensor([0, 4]) Extract a subgraph without node relabeling. >>> sg = dgl.edge_subgraph(g, [0, 4], relabel_nodes=False) >>> sg Graph(num_nodes=5, num_edges=2, ndata_schemes={} edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}) >>> sg.edges() (tensor([0, 4]), tensor([1, 0])) Specify edges using a boolean mask. >>> nodes = torch.tensor([True, False, False, False, True]) # choose edges [0, 4] >>> dgl.edge_subgraph(g, nodes) Graph(num_nodes=3, num_edges=2, ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)} edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}) The resulting subgraph also copies features from the parent graph. >>> g.ndata['x'] = torch.arange(10).view(5, 2) >>> sg = dgl.edge_subgraph(g, [0, 4]) >>> sg Graph(num_nodes=3, num_edges=2, ndata_schemes={'x': Scheme(shape=(2,), dtype=torch.int64), '_ID': Scheme(shape=(), dtype=torch.int64)} edata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)}) >>> sg.ndata[dgl.NID] tensor([0, 4, 1]) >>> sg.ndata['x'] tensor([[0, 1], [8, 9], [2, 3]]) Extract a subgraph from a hetergeneous graph. >>> g = dgl.heterograph({ >>> ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]), >>> ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2]) >>> }) >>> sub_g = dgl.edge_subgraph(g, {('user', 'follows', 'user'): [1, 2], ... ('user', 'plays', 'game'): [2]}) >>> print(sub_g) Graph(num_nodes={'game': 1, user': 2}, num_edges={('user', 'follows', 'user'): 2, ('user', 'plays', 'game'): 1}, metagraph=[('user', 'user', 'follows'), ('user', 'game', 'plays')]) See Also -------- node_subgraph """ if graph.is_block and relabel_nodes: raise DGLError("Extracting subgraph from a block graph is not allowed.") if not isinstance(edges, Mapping): assert ( len(graph.canonical_etypes) == 1 ), "need a dict of edge type and IDs for graph with multiple edge types" edges = {graph.canonical_etypes[0]: edges} def _process_edges(etype, e): if F.is_tensor(e) and F.dtype(e) == F.bool: return F.astype( F.nonzero_1d(F.copy_to(e, graph.device)), graph.idtype ) else: return utils.prepare_tensor(graph, e, 'edges["{}"]'.format(etype)) edges = {graph.to_canonical_etype(etype): e for etype, e in edges.items()} edges = {etype: _process_edges(etype, e) for etype, e in edges.items()} device = context_of(edges) induced_edges = [ edges.get(cetype, F.copy_to(F.tensor([], graph.idtype), device)) for cetype in graph.canonical_etypes ] sgi = graph._graph.edge_subgraph(induced_edges, not relabel_nodes) induced_nodes_or_device = sgi.induced_nodes if relabel_nodes else device subg = _create_hetero_subgraph( graph, sgi, induced_nodes_or_device, induced_edges, store_ids=store_ids ) return subg if output_device is None else subg.to(output_device) DGLGraph.edge_subgraph = utils.alias_func(edge_subgraph) def in_subgraph( graph, nodes, *, relabel_nodes=False, store_ids=True, output_device=None ): """Return the subgraph induced on the inbound edges of all the edge types of the given nodes. An in subgraph is equivalent to creating a new graph using the incoming edges of the given nodes. In addition to extracting the subgraph, DGL also copies the features of the extracted nodes and edges to the resulting graph. The copy is *lazy* and incurs data movement only when needed. If the graph is heterogeneous, DGL extracts a subgraph per relation and composes them as the resulting graph. Thus, the resulting graph has the same set of relations as the input one. Parameters ---------- graph : DGLGraph The input graph. nodes : nodes or dict[str, nodes] The nodes to form the subgraph, which cannot have any duplicate value. The result will be undefined otherwise. The allowed nodes formats are: * Int Tensor: Each element is a node ID. The tensor must have the same device type and ID data type as the graph's. * iterable[int]: Each element is a node ID. If the graph is homogeneous, one can directly pass the above formats. Otherwise, the argument must be a dictionary with keys being node types and values being the node IDs in the above formats. relabel_nodes : bool, optional If True, it will remove the isolated nodes and relabel the rest nodes in the extracted subgraph. store_ids : bool, optional If True, it will store the raw IDs of the extracted edges in the ``edata`` of the resulting graph under name ``dgl.EID``; if ``relabel_nodes`` is ``True``, it will also store the raw IDs of the extracted nodes in the ``ndata`` of the resulting graph under name ``dgl.NID``. output_device : Framework-specific device context object, optional The output device. Default is the same as the input graph. Returns ------- DGLGraph The subgraph. Notes ----- This function discards the batch information. Please use :func:`dgl.DGLGraph.set_batch_num_nodes` and :func:`dgl.DGLGraph.set_batch_num_edges` on the transformed graph to maintain the information. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Extract a subgraph from a homogeneous graph. >>> g = dgl.graph(([0, 1, 2, 3, 4], [1, 2, 3, 4, 0])) # 5-node cycle >>> g.edata['w'] = torch.arange(10).view(5, 2) >>> sg = dgl.in_subgraph(g, [2, 0]) >>> sg Graph(num_nodes=5, num_edges=2, ndata_schemes={} edata_schemes={'w': Scheme(shape=(2,), dtype=torch.int64), '_ID': Scheme(shape=(), dtype=torch.int64)}) >>> sg.edges() (tensor([1, 4]), tensor([2, 0])) >>> sg.edata[dgl.EID] # original edge IDs tensor([1, 4]) >>> sg.edata['w'] # also extract the features tensor([[2, 3], [8, 9]]) Extract a subgraph with node labeling. >>> sg = dgl.in_subgraph(g, [2, 0], relabel_nodes=True) >>> sg Graph(num_nodes=4, num_edges=2, ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64} edata_schemes={'w': Scheme(shape=(2,), dtype=torch.int64), '_ID': Scheme(shape=(), dtype=torch.int64)}) >>> sg.edges() (tensor([1, 3]), tensor([2, 0])) >>> sg.edata[dgl.EID] # original edge IDs tensor([1, 4]) >>> sg.ndata[dgl.NID] # original node IDs tensor([0, 1, 2, 4]) Extract a subgraph from a heterogeneous graph. >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]), ... ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])}) >>> sub_g = g.in_subgraph({'user': [2], 'game': [2]}) >>> sub_g Graph(num_nodes={'game': 3, 'user': 3}, num_edges={('user', 'plays', 'game'): 1, ('user', 'follows', 'user'): 2}, metagraph=[('user', 'game', 'plays'), ('user', 'user', 'follows')]) See also -------- out_subgraph """ if graph.is_block: raise DGLError("Extracting subgraph of a block graph is not allowed.") if not isinstance(nodes, dict): if len(graph.ntypes) > 1: raise DGLError( "Must specify node type when the graph is not homogeneous." ) nodes = {graph.ntypes[0]: nodes} nodes = utils.prepare_tensor_dict(graph, nodes, "nodes") device = context_of(nodes) nodes_all_types = [ F.to_dgl_nd( nodes.get(ntype, F.copy_to(F.tensor([], graph.idtype), device)) ) for ntype in graph.ntypes ] sgi = _CAPI_DGLInSubgraph(graph._graph, nodes_all_types, relabel_nodes) induced_nodes_or_device = sgi.induced_nodes if relabel_nodes else device induced_edges = sgi.induced_edges subg = _create_hetero_subgraph( graph, sgi, induced_nodes_or_device, induced_edges, store_ids=store_ids ) return subg if output_device is None else subg.to(output_device) DGLGraph.in_subgraph = utils.alias_func(in_subgraph) def out_subgraph( graph, nodes, *, relabel_nodes=False, store_ids=True, output_device=None ): """Return the subgraph induced on the outbound edges of all the edge types of the given nodes. An out subgraph is equivalent to creating a new graph using the outcoming edges of the given nodes. In addition to extracting the subgraph, DGL also copies the features of the extracted nodes and edges to the resulting graph. The copy is *lazy* and incurs data movement only when needed. If the graph is heterogeneous, DGL extracts a subgraph per relation and composes them as the resulting graph. Thus, the resulting graph has the same set of relations as the input one. Parameters ---------- graph : DGLGraph The input graph. nodes : nodes or dict[str, nodes] The nodes to form the subgraph, which cannot have any duplicate value. The result will be undefined otherwise. The allowed nodes formats are: * Int Tensor: Each element is a node ID. The tensor must have the same device type and ID data type as the graph's. * iterable[int]: Each element is a node ID. If the graph is homogeneous, one can directly pass the above formats. Otherwise, the argument must be a dictionary with keys being node types and values being the node IDs in the above formats. relabel_nodes : bool, optional If True, it will remove the isolated nodes and relabel the rest nodes in the extracted subgraph. store_ids : bool, optional If True, it will store the raw IDs of the extracted edges in the ``edata`` of the resulting graph under name ``dgl.EID``; if ``relabel_nodes`` is ``True``, it will also store the raw IDs of the extracted nodes in the ``ndata`` of the resulting graph under name ``dgl.NID``. output_device : Framework-specific device context object, optional The output device. Default is the same as the input graph. Returns ------- DGLGraph The subgraph. Notes ----- This function discards the batch information. Please use :func:`dgl.DGLGraph.set_batch_num_nodes` and :func:`dgl.DGLGraph.set_batch_num_edges` on the transformed graph to maintain the information. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Extract a subgraph from a homogeneous graph. >>> g = dgl.graph(([0, 1, 2, 3, 4], [1, 2, 3, 4, 0])) # 5-node cycle >>> g.edata['w'] = torch.arange(10).view(5, 2) >>> sg = dgl.out_subgraph(g, [2, 0]) >>> sg Graph(num_nodes=5, num_edges=2, ndata_schemes={} edata_schemes={'w': Scheme(shape=(2,), dtype=torch.int64), '_ID': Scheme(shape=(), dtype=torch.int64)}) >>> sg.edges() (tensor([2, 0]), tensor([3, 1])) >>> sg.edata[dgl.EID] # original edge IDs tensor([2, 0]) >>> sg.edata['w'] # also extract the features tensor([[4, 5], [0, 1]]) Extract a subgraph with node labeling. >>> sg = dgl.out_subgraph(g, [2, 0], relabel_nodes=True) >>> sg Graph(num_nodes=4, num_edges=2, ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)} edata_schemes={'w': Scheme(shape=(2,), dtype=torch.int64), '_ID': Scheme(shape=(), dtype=torch.int64)}) >>> sg.edges() (tensor([2, 0]), tensor([3, 1])) >>> sg.edata[dgl.EID] # original edge IDs tensor([2, 0]) >>> sg.ndata[dgl.NID] # original node IDs tensor([0, 1, 2, 3]) Extract a subgraph from a heterogeneous graph. >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]), ... ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])}) >>> sub_g = g.out_subgraph({'user': [1]}) >>> sub_g Graph(num_nodes={'game': 3, 'user': 3}, num_edges={('user', 'plays', 'game'): 2, ('user', 'follows', 'user'): 2}, metagraph=[('user', 'game', 'plays'), ('user', 'user', 'follows')]) See also -------- in_subgraph """ if graph.is_block: raise DGLError("Extracting subgraph of a block graph is not allowed.") if not isinstance(nodes, dict): if len(graph.ntypes) > 1: raise DGLError( "Must specify node type when the graph is not homogeneous." ) nodes = {graph.ntypes[0]: nodes} nodes = utils.prepare_tensor_dict(graph, nodes, "nodes") device = context_of(nodes) nodes_all_types = [ F.to_dgl_nd( nodes.get(ntype, F.copy_to(F.tensor([], graph.idtype), device)) ) for ntype in graph.ntypes ] sgi = _CAPI_DGLOutSubgraph(graph._graph, nodes_all_types, relabel_nodes) induced_nodes_or_device = sgi.induced_nodes if relabel_nodes else device induced_edges = sgi.induced_edges subg = _create_hetero_subgraph( graph, sgi, induced_nodes_or_device, induced_edges, store_ids=store_ids ) return subg if output_device is None else subg.to(output_device) DGLGraph.out_subgraph = utils.alias_func(out_subgraph) def khop_in_subgraph( graph, nodes, k, *, relabel_nodes=True, store_ids=True, output_device=None ): """Return the subgraph induced by k-hop in-neighborhood of the specified node(s). We can expand a set of nodes by including the predecessors of them. From a specified node set, a k-hop in subgraph is obtained by first repeating the node set expansion for k times and then creating a node induced subgraph. In addition to extracting the subgraph, DGL also copies the features of the extracted nodes and edges to the resulting graph. The copy is *lazy* and incurs data movement only when needed. If the graph is heterogeneous, DGL extracts a subgraph per relation and composes them as the resulting graph. Thus the resulting graph has the same set of relations as the input one. Parameters ---------- graph : DGLGraph The input graph. nodes : nodes or dict[str, nodes] The starting node(s) to expand, which cannot have any duplicate value. The result will be undefined otherwise. The allowed formats are: * Int: ID of a single node. * Int Tensor: Each element is a node ID. The tensor must have the same device type and ID data type as the graph's. * iterable[int]: Each element is a node ID. If the graph is homogeneous, one can directly pass the above formats. Otherwise, the argument must be a dictionary with keys being node types and values being the node IDs in the above formats. k : int The number of hops. relabel_nodes : bool, optional If True, it will remove the isolated nodes and relabel the rest nodes in the extracted subgraph. store_ids : bool, optional If True, it will store the raw IDs of the extracted edges in the ``edata`` of the resulting graph under name ``dgl.EID``; if ``relabel_nodes`` is ``True``, it will also store the raw IDs of the extracted nodes in the ``ndata`` of the resulting graph under name ``dgl.NID``. output_device : Framework-specific device context object, optional The output device. Default is the same as the input graph. Returns ------- DGLGraph The subgraph. Tensor or dict[str, Tensor], optional The new IDs of the input :attr:`nodes` after node relabeling. This is returned only when :attr:`relabel_nodes` is True. It is in the same form as :attr:`nodes`. Notes ----- When k is 1, the result subgraph is different from the one obtained by :func:`dgl.in_subgraph`. The 1-hop in subgraph also includes the edges among the neighborhood. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Extract a two-hop subgraph from a homogeneous graph. >>> g = dgl.graph(([1, 1, 2, 3, 4], [0, 2, 0, 4, 2])) >>> g.edata['w'] = torch.arange(10).view(5, 2) >>> sg, inverse_indices = dgl.khop_in_subgraph(g, 0, k=2) >>> sg Graph(num_nodes=4, num_edges=4, ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)} edata_schemes={'w': Scheme(shape=(2,), dtype=torch.int64), '_ID': Scheme(shape=(), dtype=torch.int64)}) >>> sg.edges() (tensor([1, 1, 2, 3]), tensor([0, 2, 0, 2])) >>> sg.edata[dgl.EID] # original edge IDs tensor([0, 1, 2, 4]) >>> sg.edata['w'] # also extract the features tensor([[0, 1], [2, 3], [4, 5], [8, 9]]) >>> inverse_indices tensor([0]) Extract a subgraph from a heterogeneous graph. >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]), ... ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2])}) >>> sg, inverse_indices = dgl.khop_in_subgraph(g, {'game': 0}, k=2) >>> sg Graph(num_nodes={'game': 1, 'user': 2}, num_edges={('user', 'follows', 'user'): 1, ('user', 'plays', 'game'): 2}, metagraph=[('user', 'user', 'follows'), ('user', 'game', 'plays')]) >>> inverse_indices {'game': tensor([0])} See also -------- khop_out_subgraph """ if graph.is_block: raise DGLError("Extracting subgraph of a block graph is not allowed.") is_mapping = isinstance(nodes, Mapping) if not is_mapping: assert ( len(graph.ntypes) == 1 ), "need a dict of node type and IDs for graph with multiple node types" nodes = {graph.ntypes[0]: nodes} for nty, nty_nodes in nodes.items(): nodes[nty] = utils.prepare_tensor( graph, nty_nodes, 'nodes["{}"]'.format(nty) ) last_hop_nodes = nodes k_hop_nodes_ = [last_hop_nodes] device = context_of(nodes) place_holder = F.copy_to(F.tensor([], dtype=graph.idtype), device) for _ in range(k): current_hop_nodes = {nty: [] for nty in graph.ntypes} for cetype in graph.canonical_etypes: srctype, _, dsttype = cetype in_nbrs, _ = graph.in_edges( last_hop_nodes.get(dsttype, place_holder), etype=cetype ) current_hop_nodes[srctype].append(in_nbrs) for nty in graph.ntypes: if len(current_hop_nodes[nty]) == 0: current_hop_nodes[nty] = place_holder continue current_hop_nodes[nty] = F.unique( F.cat(current_hop_nodes[nty], dim=0) ) k_hop_nodes_.append(current_hop_nodes) last_hop_nodes = current_hop_nodes k_hop_nodes = dict() inverse_indices = dict() for nty in graph.ntypes: k_hop_nodes[nty], inverse_indices[nty] = F.unique( F.cat( [ hop_nodes.get(nty, place_holder) for hop_nodes in k_hop_nodes_ ], dim=0, ), return_inverse=True, ) sub_g = node_subgraph( graph, k_hop_nodes, relabel_nodes=relabel_nodes, store_ids=store_ids ) if output_device is not None: sub_g = sub_g.to(output_device) if relabel_nodes: if is_mapping: seed_inverse_indices = dict() for nty in nodes: seed_inverse_indices[nty] = F.slice_axis( inverse_indices[nty], axis=0, begin=0, end=len(nodes[nty]) ) else: seed_inverse_indices = F.slice_axis( inverse_indices[nty], axis=0, begin=0, end=len(nodes[nty]) ) if output_device is not None: seed_inverse_indices = recursive_apply( seed_inverse_indices, lambda x: F.copy_to(x, output_device) ) return sub_g, seed_inverse_indices else: return sub_g DGLGraph.khop_in_subgraph = utils.alias_func(khop_in_subgraph) def khop_out_subgraph( graph, nodes, k, *, relabel_nodes=True, store_ids=True, output_device=None ): """Return the subgraph induced by k-hop out-neighborhood of the specified node(s). We can expand a set of nodes by including the successors of them. From a specified node set, a k-hop out subgraph is obtained by first repeating the node set expansion for k times and then creating a node induced subgraph. In addition to extracting the subgraph, DGL also copies the features of the extracted nodes and edges to the resulting graph. The copy is *lazy* and incurs data movement only when needed. If the graph is heterogeneous, DGL extracts a subgraph per relation and composes them as the resulting graph. Thus the resulting graph has the same set of relations as the input one. Parameters ---------- graph : DGLGraph The input graph. nodes : nodes or dict[str, nodes] The starting node(s) to expand, which cannot have any duplicate value. The result will be undefined otherwise. The allowed formats are: * Int: ID of a single node. * Int Tensor: Each element is a node ID. The tensor must have the same device type and ID data type as the graph's. * iterable[int]: Each element is a node ID. If the graph is homogeneous, one can directly pass the above formats. Otherwise, the argument must be a dictionary with keys being node types and values being the node IDs in the above formats. k : int The number of hops. relabel_nodes : bool, optional If True, it will remove the isolated nodes and relabel the rest nodes in the extracted subgraph. store_ids : bool, optional If True, it will store the raw IDs of the extracted edges in the ``edata`` of the resulting graph under name ``dgl.EID``; if ``relabel_nodes`` is ``True``, it will also store the raw IDs of the extracted nodes in the ``ndata`` of the resulting graph under name ``dgl.NID``. output_device : Framework-specific device context object, optional The output device. Default is the same as the input graph. Returns ------- DGLGraph The subgraph. Tensor or dict[str, Tensor], optional The new IDs of the input :attr:`nodes` after node relabeling. This is returned only when :attr:`relabel_nodes` is True. It is in the same form as :attr:`nodes`. Notes ----- When k is 1, the result subgraph is different from the one obtained by :func:`dgl.out_subgraph`. The 1-hop out subgraph also includes the edges among the neighborhood. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Extract a two-hop subgraph from a homogeneous graph. >>> g = dgl.graph(([0, 2, 0, 4, 2], [1, 1, 2, 3, 4])) >>> g.edata['w'] = torch.arange(10).view(5, 2) >>> sg, inverse_indices = dgl.khop_out_subgraph(g, 0, k=2) >>> sg Graph(num_nodes=4, num_edges=4, ndata_schemes={'_ID': Scheme(shape=(), dtype=torch.int64)} edata_schemes={'w': Scheme(shape=(2,), dtype=torch.int64), '_ID': Scheme(shape=(), dtype=torch.int64)}) >>> sg.edges() (tensor([0, 0, 2, 2]), tensor([1, 2, 1, 3])) >>> sg.edata[dgl.EID] # original edge IDs tensor([0, 2, 1, 4]) >>> sg.edata['w'] # also extract the features tensor([[0, 1], [4, 5], [2, 3], [8, 9]]) >>> inverse_indices tensor([0]) Extract a subgraph from a heterogeneous graph. >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]), ... ('user', 'follows', 'user'): ([0, 1], [1, 3])}) >>> sg, inverse_indices = dgl.khop_out_subgraph(g, {'user': 0}, k=2) >>> sg Graph(num_nodes={'game': 2, 'user': 3}, num_edges={('user', 'follows', 'user'): 2, ('user', 'plays', 'game'): 2}, metagraph=[('user', 'user', 'follows'), ('user', 'game', 'plays')]) >>> inverse_indices {'user': tensor([0])} See also -------- khop_in_subgraph """ if graph.is_block: raise DGLError("Extracting subgraph of a block graph is not allowed.") is_mapping = isinstance(nodes, Mapping) if not is_mapping: assert ( len(graph.ntypes) == 1 ), "need a dict of node type and IDs for graph with multiple node types" nodes = {graph.ntypes[0]: nodes} for nty, nty_nodes in nodes.items(): nodes[nty] = utils.prepare_tensor( graph, nty_nodes, 'nodes["{}"]'.format(nty) ) last_hop_nodes = nodes k_hop_nodes_ = [last_hop_nodes] device = context_of(nodes) place_holder = F.copy_to(F.tensor([], dtype=graph.idtype), device) for _ in range(k): current_hop_nodes = {nty: [] for nty in graph.ntypes} for cetype in graph.canonical_etypes: srctype, _, dsttype = cetype _, out_nbrs = graph.out_edges( last_hop_nodes.get(srctype, place_holder), etype=cetype ) current_hop_nodes[dsttype].append(out_nbrs) for nty in graph.ntypes: if len(current_hop_nodes[nty]) == 0: current_hop_nodes[nty] = place_holder continue current_hop_nodes[nty] = F.unique( F.cat(current_hop_nodes[nty], dim=0) ) k_hop_nodes_.append(current_hop_nodes) last_hop_nodes = current_hop_nodes k_hop_nodes = dict() inverse_indices = dict() for nty in graph.ntypes: k_hop_nodes[nty], inverse_indices[nty] = F.unique( F.cat( [ hop_nodes.get(nty, place_holder) for hop_nodes in k_hop_nodes_ ], dim=0, ), return_inverse=True, ) sub_g = node_subgraph( graph, k_hop_nodes, relabel_nodes=relabel_nodes, store_ids=store_ids ) if output_device is not None: sub_g = sub_g.to(output_device) if relabel_nodes: if is_mapping: seed_inverse_indices = dict() for nty in nodes: seed_inverse_indices[nty] = F.slice_axis( inverse_indices[nty], axis=0, begin=0, end=len(nodes[nty]) ) else: seed_inverse_indices = F.slice_axis( inverse_indices[nty], axis=0, begin=0, end=len(nodes[nty]) ) if output_device is not None: seed_inverse_indices = recursive_apply( seed_inverse_indices, lambda x: F.copy_to(x, output_device) ) return sub_g, seed_inverse_indices else: return sub_g DGLGraph.khop_out_subgraph = utils.alias_func(khop_out_subgraph) def node_type_subgraph(graph, ntypes, output_device=None): """Return the subgraph induced on given node types. A node-type-induced subgraph contains all the nodes of the given subset of the node types of a graph and any edges whose endpoints are both in this subset. In addition to extracting the subgraph, DGL also copies the features of the extracted nodes and edges to the resulting graph. The copy is *lazy* and incurs data movement only when needed. Parameters ---------- graph : DGLGraph The graph to extract subgraphs from. ntypes : list[str] The type names of the nodes in the subgraph. output_device : Framework-specific device context object, optional The output device. Default is the same as the input graph. Returns ------- G : DGLGraph The subgraph. Notes ----- This function discards the batch information. Please use :func:`dgl.DGLGraph.set_batch_num_nodes` and :func:`dgl.DGLGraph.set_batch_num_edges` on the transformed graph to maintain the information. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Instantiate a heterograph. >>> g = dgl.heterograph({ >>> ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]), >>> ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2]) >>> }) >>> # Set node features >>> g.nodes['user'].data['h'] = torch.tensor([[0.], [1.], [2.]]) Get subgraphs. >>> sub_g = g.node_type_subgraph(['user']) >>> print(sub_g) Graph(num_nodes=3, num_edges=3, ndata_schemes={'h': Scheme(shape=(1,), dtype=torch.float32)} edata_schemes={}) Get the extracted node features. >>> sub_g.nodes['user'].data['h'] tensor([[0.], [1.], [2.]]) See Also -------- edge_type_subgraph """ ntid = [graph.get_ntype_id(ntype) for ntype in ntypes] stids, dtids, etids = graph._graph.metagraph.edges("eid") stids, dtids, etids = stids.tonumpy(), dtids.tonumpy(), etids.tonumpy() etypes = [] for stid, dtid, etid in zip(stids, dtids, etids): if stid in ntid and dtid in ntid: etypes.append(graph.canonical_etypes[etid]) if len(etypes) == 0: raise DGLError("There are no edges among nodes of the specified types.") return edge_type_subgraph(graph, etypes, output_device=output_device) DGLGraph.node_type_subgraph = utils.alias_func(node_type_subgraph) def edge_type_subgraph(graph, etypes, output_device=None): """Return the subgraph induced on given edge types. An edge-type-induced subgraph contains all the edges of the given subset of the edge types of a graph. It also contains all nodes of a particular type if some nodes of the type are incident to these edges. In addition to extracting the subgraph, DGL also copies the features of the extracted nodes and edges to the resulting graph. The copy is *lazy* and incurs data movement only when needed. Parameters ---------- graph : DGLGraph The graph to extract subgraphs from. etypes : list[str] or list[(str, str, str)] The type names of the edges in the subgraph. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` for the edge type name if the name can uniquely identify a triplet format in the graph. output_device : Framework-specific device context object, optional The output device. Default is the same as the input graph. Returns ------- G : DGLGraph The subgraph. Notes ----- This function discards the batch information. Please use :func:`dgl.DGLGraph.set_batch_num_nodes` and :func:`dgl.DGLGraph.set_batch_num_edges` on the transformed graph to maintain the information. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch Instantiate a heterograph. >>> g = dgl.heterograph({ >>> ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 2, 1]), >>> ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2]) >>> }) >>> # Set edge features >>> g.edges['follows'].data['h'] = torch.tensor([[0.], [1.], [2.]]) Get subgraphs. >>> sub_g = g.edge_type_subgraph(['follows']) >>> sub_g Graph(num_nodes=3, num_edges=3, ndata_schemes={} edata_schemes={'h': Scheme(shape=(1,), dtype=torch.float32)}) Get the shared edge features. >>> sub_g.edges['follows'].data['h'] tensor([[0.], [1.], [2.]]) See Also -------- node_type_subgraph """ etype_ids = [graph.get_etype_id(etype) for etype in etypes] # meta graph is homogeneous graph, still using int64 meta_src, meta_dst, _ = graph._graph.metagraph.find_edges( utils.toindex(etype_ids, "int64") ) rel_graphs = [graph._graph.get_relation_graph(i) for i in etype_ids] meta_src = meta_src.tonumpy() meta_dst = meta_dst.tonumpy() ntypes_invmap = {n: i for i, n in enumerate(set(meta_src) | set(meta_dst))} mapped_meta_src = [ntypes_invmap[v] for v in meta_src] mapped_meta_dst = [ntypes_invmap[v] for v in meta_dst] node_frames = [graph._node_frames[i] for i in ntypes_invmap] edge_frames = [graph._edge_frames[i] for i in etype_ids] induced_ntypes = [graph._ntypes[i] for i in ntypes_invmap] induced_etypes = [ graph._etypes[i] for i in etype_ids ] # get the "name" of edge type num_nodes_per_induced_type = [ graph.num_nodes(ntype) for ntype in induced_ntypes ] metagraph = graph_index.from_edge_list( (mapped_meta_src, mapped_meta_dst), True ) # num_nodes_per_type should be int64 hgidx = heterograph_index.create_heterograph_from_relations( metagraph, rel_graphs, utils.toindex(num_nodes_per_induced_type, "int64"), ) hg = DGLGraph( hgidx, induced_ntypes, induced_etypes, node_frames, edge_frames ) return hg if output_device is None else hg.to(output_device) DGLGraph.edge_type_subgraph = utils.alias_func(edge_type_subgraph) #################### Internal functions #################### def _create_hetero_subgraph( parent, sgi, induced_nodes_or_device, induced_edges_or_device, store_ids=True, ): """Internal function to create a subgraph. Parameters ---------- parent : DGLGraph The parent DGLGraph. sgi : HeteroSubgraphIndex Subgraph object returned by CAPI. induced_nodes_or_device : list[Tensor] or device or None Induced node IDs or the device. Will store it as the dgl.NID ndata unless it is None, which means the induced node IDs are the same as the parent node IDs. If a device is given, the features will be copied to the given device. induced_edges_or_device : list[Tensor] or device or None Induced edge IDs. Will store it as the dgl.EID ndata unless it is None, which means the induced edge IDs are the same as the parent edge IDs. If a device is given, the features will be copied to the given device. store_ids : bool If True and induced_nodes is not None, it will store the raw IDs of the extracted nodes in the ``ndata`` of the resulting graph under name ``dgl.NID``. If True and induced_edges is not None, it will store the raw IDs of the extracted edges in the ``edata`` of the resulting graph under name ``dgl.EID``. Returns ------- DGLGraph Graph """ # (BarclayII) Giving a device argument to induced_nodes_or_device is necessary for # UVA subgraphing, where the node features are not sliced but the device changed. # Not having this will give us a subgraph on GPU but node features on CPU if we don't # relabel the nodes. node_frames = utils.extract_node_subframes( parent, induced_nodes_or_device, store_ids ) edge_frames = utils.extract_edge_subframes( parent, induced_edges_or_device, store_ids ) hsg = DGLGraph(sgi.graph, parent.ntypes, parent.etypes) utils.set_new_frames(hsg, node_frames=node_frames, edge_frames=edge_frames) return hsg _init_api("dgl.subgraph") ================================================ FILE: python/dgl/transforms/__init__.py ================================================ """Transform for structures and features""" from .functional import * from .module import * from .to_block import * ================================================ FILE: python/dgl/transforms/functional.py ================================================ ## # Copyright 2019-2021 Contributors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # """Functional interface for transform""" # pylint: disable= too-many-lines import copy from collections.abc import Iterable, Mapping import numpy as np import scipy.sparse as sparse import scipy.sparse.linalg from ..utils import version try: import torch as th except ImportError: pass from .. import ( backend as F, batch, convert, function, ndarray as nd, subgraph, utils, ) from .._ffi.function import _init_api from ..base import dgl_warning, DGLError, EID, NID from ..frame import Frame from ..heterograph import DGLGraph from ..heterograph_index import ( create_heterograph_from_relations, create_metagraph_index, ) from ..partition import ( metis_partition, metis_partition_assignment, partition_graph_with_halo, ) from ..sampling.neighbor import sample_neighbors __all__ = [ "line_graph", "khop_adj", "khop_graph", "reverse", "to_bidirected", "add_reverse_edges", "laplacian_lambda_max", "knn_graph", "segmented_knn_graph", "add_edges", "add_nodes", "remove_edges", "remove_nodes", "add_self_loop", "remove_self_loop", "metapath_reachable_graph", "compact_graphs", "to_simple", "to_simple_graph", "sort_csr_by_tag", "sort_csc_by_tag", "metis_partition_assignment", "partition_graph_with_halo", "metis_partition", "adj_product_graph", "adj_sum_graph", "reorder_graph", "norm_by_dst", "radius_graph", "random_walk_pe", "laplacian_pe", "lap_pe", "to_bfloat16", "to_half", "to_float", "to_double", "double_radius_node_labeling", "shortest_dist", "svd_pe", ] def pairwise_squared_distance(x): """ x : (n_samples, n_points, dims) return : (n_samples, n_points, n_points) """ x2s = F.sum(x * x, -1, True) # assuming that __matmul__ is always implemented (true for PyTorch, MXNet and Chainer) return x2s + F.swapaxes(x2s, -1, -2) - 2 * x @ F.swapaxes(x, -1, -2) # pylint: disable=invalid-name def knn_graph( x, k, algorithm="bruteforce-blas", dist="euclidean", exclude_self=False ): r"""Construct a graph from a set of points according to k-nearest-neighbor (KNN) and return. The function transforms the coordinates/features of a point set into a directed homogeneous graph. The coordinates of the point set is specified as a matrix whose rows correspond to points and columns correspond to coordinate/feature dimensions. The nodes of the returned graph correspond to the points, where the predecessors of each point are its k-nearest neighbors measured by the chosen distance. If :attr:`x` is a 3D tensor, then each submatrix will be transformed into a separate graph. DGL then composes the graphs into a large batched graph of multiple (:math:`shape(x)[0]`) connected components. See :doc:`the benchmark <../api/python/knn_benchmark>` for a complete benchmark result. Parameters ---------- x : Tensor The point coordinates. It can be either on CPU or GPU. * If is 2D, ``x[i]`` corresponds to the i-th node in the KNN graph. * If is 3D, ``x[i]`` corresponds to the i-th KNN graph and ``x[i][j]`` corresponds to the j-th node in the i-th KNN graph. k : int The number of nearest neighbors per node. algorithm : str, optional Algorithm used to compute the k-nearest neighbors. * 'bruteforce-blas' will first compute the distance matrix using BLAS matrix multiplication operation provided by backend frameworks. Then use topk algorithm to get k-nearest neighbors. This method is fast when the point set is small but has :math:`O(N^2)` memory complexity where :math:`N` is the number of points. * 'bruteforce' will compute distances pair by pair and directly select the k-nearest neighbors during distance computation. This method is slower than 'bruteforce-blas' but has less memory overhead (i.e., :math:`O(Nk)` where :math:`N` is the number of points, :math:`k` is the number of nearest neighbors per node) since we do not need to store all distances. * 'bruteforce-sharemem' (CUDA only) is similar to 'bruteforce' but use shared memory in CUDA devices for buffer. This method is faster than 'bruteforce' when the dimension of input points is not large. This method is only available on CUDA device. * 'kd-tree' will use the kd-tree algorithm (CPU only). This method is suitable for low-dimensional data (e.g. 3D point clouds) * 'nn-descent' is an approximate approach from paper `Efficient k-nearest neighbor graph construction for generic similarity measures `_. This method will search for nearest neighbor candidates in "neighbors' neighbors". (default: 'bruteforce-blas') dist : str, optional The distance metric used to compute distance between points. It can be the following metrics: * 'euclidean': Use Euclidean distance (L2 norm) :math:`\sqrt{\sum_{i} (x_{i} - y_{i})^{2}}`. * 'cosine': Use cosine distance. (default: 'euclidean') exclude_self : bool, optional If True, the output graph will not contain self loop edges, and each node will not be counted as one of its own k neighbors. If False, the output graph will contain self loop edges, and a node will be counted as one of its own k neighbors. Returns ------- DGLGraph The constructed graph. The node IDs are in the same order as :attr:`x`. Examples -------- The following examples use PyTorch backend. >>> import dgl >>> import torch When :attr:`x` is a 2D tensor, a single KNN graph is constructed. >>> x = torch.tensor([[0.0, 0.0, 1.0], ... [1.0, 0.5, 0.5], ... [0.5, 0.2, 0.2], ... [0.3, 0.2, 0.4]]) >>> knn_g = dgl.knn_graph(x, 2) # Each node has two predecessors >>> knn_g.edges() (tensor([0, 1, 2, 2, 2, 3, 3, 3]), tensor([0, 1, 1, 2, 3, 0, 2, 3])) When :attr:`x` is a 3D tensor, DGL constructs multiple KNN graphs and and then composes them into a graph of multiple connected components. >>> x1 = torch.tensor([[0.0, 0.0, 1.0], ... [1.0, 0.5, 0.5], ... [0.5, 0.2, 0.2], ... [0.3, 0.2, 0.4]]) >>> x2 = torch.tensor([[0.0, 1.0, 1.0], ... [0.3, 0.3, 0.3], ... [0.4, 0.4, 1.0], ... [0.3, 0.8, 0.2]]) >>> x = torch.stack([x1, x2], dim=0) >>> knn_g = dgl.knn_graph(x, 2) # Each node has two predecessors >>> knn_g.edges() (tensor([0, 1, 2, 2, 2, 3, 3, 3, 4, 5, 5, 5, 6, 6, 7, 7]), tensor([0, 1, 1, 2, 3, 0, 2, 3, 4, 5, 6, 7, 4, 6, 5, 7])) """ if exclude_self: # add 1 to k, for the self edge, since it will be removed k = k + 1 # check invalid k if k <= 0: raise DGLError("Invalid k value. expect k > 0, got k = {}".format(k)) # check empty point set x_size = tuple(F.shape(x)) if x_size[0] == 0: raise DGLError("Find empty point set") d = F.ndim(x) x_seg = x_size[0] * [x_size[1]] if d == 3 else [x_size[0]] if algorithm == "bruteforce-blas": result = _knn_graph_blas(x, k, dist=dist) else: if d == 3: x = F.reshape(x, (x_size[0] * x_size[1], x_size[2])) out = knn(k, x, x_seg, algorithm=algorithm, dist=dist) row, col = out[1], out[0] result = convert.graph((row, col)) if d == 3: # set batch information if x is 3D num_nodes = F.tensor(x_seg, dtype=F.int64).to(F.context(x)) result.set_batch_num_nodes(num_nodes) # if any segment is too small for k, all algorithms reduce k for all segments clamped_k = min(k, np.min(x_seg)) result.set_batch_num_edges(clamped_k * num_nodes) if exclude_self: # remove_self_loop will update batch_num_edges as needed result = remove_self_loop(result) # If there were more than k(+1) coincident points, there may not have been self loops on # all nodes, in which case there would still be one too many out edges on some nodes. # However, if every node had a self edge, the common case, every node would still have the # same degree as each other, so we can check that condition easily. # The -1 is for the self edge removal. clamped_k = min(k, np.min(x_seg)) - 1 if result.num_edges() != clamped_k * result.num_nodes(): # edges on any nodes with too high degree should all be length zero, # so pick an arbitrary one to remove from each such node degrees = result.in_degrees() node_indices = F.nonzero_1d(degrees > clamped_k) edges_to_remove_graph = sample_neighbors( result, node_indices, 1, edge_dir="in" ) edge_ids = edges_to_remove_graph.edata[EID] result = remove_edges(result, edge_ids) return result def _knn_graph_blas(x, k, dist="euclidean"): r"""Construct a graph from a set of points according to k-nearest-neighbor (KNN). This function first compute the distance matrix using BLAS matrix multiplication operation provided by backend frameworks. Then use topk algorithm to get k-nearest neighbors. Parameters ---------- x : Tensor The point coordinates. It can be either on CPU or GPU. * If is 2D, ``x[i]`` corresponds to the i-th node in the KNN graph. * If is 3D, ``x[i]`` corresponds to the i-th KNN graph and ``x[i][j]`` corresponds to the j-th node in the i-th KNN graph. k : int The number of nearest neighbors per node. dist : str, optional The distance metric used to compute distance between points. It can be the following metrics: * 'euclidean': Use Euclidean distance (L2 norm) :math:`\sqrt{\sum_{i} (x_{i} - y_{i})^{2}}`. * 'cosine': Use cosine distance. (default: 'euclidean') """ if F.ndim(x) == 2: x = F.unsqueeze(x, 0) n_samples, n_points, _ = F.shape(x) if k > n_points: dgl_warning( "'k' should be less than or equal to the number of points in 'x'" "expect k <= {0}, got k = {1}, use k = {0}".format(n_points, k) ) k = n_points # if use cosine distance, normalize input points first # thus we can use euclidean distance to find knn equivalently. if dist == "cosine": l2_norm = lambda v: F.sqrt(F.sum(v * v, dim=2, keepdims=True)) x = x / (l2_norm(x) + 1e-5) ctx = F.context(x) dist = pairwise_squared_distance(x) k_indices = F.astype(F.argtopk(dist, k, 2, descending=False), F.int64) # index offset for each sample offset = F.arange(0, n_samples, ctx=ctx) * n_points offset = F.unsqueeze(offset, 1) src = F.reshape(k_indices, (n_samples, n_points * k)) src = F.unsqueeze(src, 0) + offset dst = F.repeat(F.arange(0, n_points, ctx=ctx), k, dim=0) dst = F.unsqueeze(dst, 0) + offset return convert.graph((F.reshape(src, (-1,)), F.reshape(dst, (-1,)))) # pylint: disable=invalid-name def segmented_knn_graph( x, k, segs, algorithm="bruteforce-blas", dist="euclidean", exclude_self=False, ): r"""Construct multiple graphs from multiple sets of points according to k-nearest-neighbor (KNN) and return. Compared with :func:`dgl.knn_graph`, this allows multiple point sets with different capacity. The points from different sets are stored contiguously in the :attr:`x` tensor. :attr:`segs` specifies the number of points in each point set. The function constructs a KNN graph for each point set, where the predecessors of each point are its k-nearest neighbors measured by the Euclidean distance. DGL then composes all KNN graphs into a batched graph with multiple (:math:`len(segs)`) connected components. Parameters ---------- x : Tensor Coordinates/features of points. Must be 2D. It can be either on CPU or GPU. k : int The number of nearest neighbors per node. segs : list[int] Number of points in each point set. The numbers in :attr:`segs` must sum up to the number of rows in :attr:`x`. algorithm : str, optional Algorithm used to compute the k-nearest neighbors. * 'bruteforce-blas' will first compute the distance matrix using BLAS matrix multiplication operation provided by backend frameworks. Then use topk algorithm to get k-nearest neighbors. This method is fast when the point set is small but has :math:`O(N^2)` memory complexity where :math:`N` is the number of points. * 'bruteforce' will compute distances pair by pair and directly select the k-nearest neighbors during distance computation. This method is slower than 'bruteforce-blas' but has less memory overhead (i.e., :math:`O(Nk)` where :math:`N` is the number of points, :math:`k` is the number of nearest neighbors per node) since we do not need to store all distances. * 'bruteforce-sharemem' (CUDA only) is similar to 'bruteforce' but use shared memory in CUDA devices for buffer. This method is faster than 'bruteforce' when the dimension of input points is not large. This method is only available on CUDA device. * 'kd-tree' will use the kd-tree algorithm (CPU only). This method is suitable for low-dimensional data (e.g. 3D point clouds) * 'nn-descent' is an approximate approach from paper `Efficient k-nearest neighbor graph construction for generic similarity measures `_. This method will search for nearest neighbor candidates in "neighbors' neighbors". (default: 'bruteforce-blas') dist : str, optional The distance metric used to compute distance between points. It can be the following metrics: * 'euclidean': Use Euclidean distance (L2 norm) :math:`\sqrt{\sum_{i} (x_{i} - y_{i})^{2}}`. * 'cosine': Use cosine distance. (default: 'euclidean') exclude_self : bool, optional If True, the output graph will not contain self loop edges, and each node will not be counted as one of its own k neighbors. If False, the output graph will contain self loop edges, and a node will be counted as one of its own k neighbors. Returns ------- DGLGraph The batched graph. The node IDs are in the same order as :attr:`x`. Examples -------- The following examples use PyTorch backend. >>> import dgl >>> import torch In the example below, the first point set has three points and the second point set has four points. >>> # Features/coordinates of the first point set >>> x1 = torch.tensor([[0.0, 0.5, 0.2], ... [0.1, 0.3, 0.2], ... [0.4, 0.2, 0.2]]) >>> # Features/coordinates of the second point set >>> x2 = torch.tensor([[0.3, 0.2, 0.1], ... [0.5, 0.2, 0.3], ... [0.1, 0.1, 0.2], ... [0.6, 0.3, 0.3]]) >>> x = torch.cat([x1, x2], dim=0) >>> segs = [x1.shape[0], x2.shape[0]] >>> knn_g = dgl.segmented_knn_graph(x, 2, segs) >>> knn_g.edges() (tensor([0, 0, 1, 1, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6]), tensor([0, 1, 0, 1, 2, 2, 3, 5, 4, 6, 3, 5, 4, 6])) """ if exclude_self: # add 1 to k, for the self edge, since it will be removed k = k + 1 # check invalid k if k <= 0: raise DGLError("Invalid k value. expect k > 0, got k = {}".format(k)) # check empty point set if F.shape(x)[0] == 0: raise DGLError("Find empty point set") if algorithm == "bruteforce-blas": result = _segmented_knn_graph_blas(x, k, segs, dist=dist) else: out = knn(k, x, segs, algorithm=algorithm, dist=dist) row, col = out[1], out[0] result = convert.graph((row, col)) num_nodes = F.tensor(segs, dtype=F.int64).to(F.context(x)) result.set_batch_num_nodes(num_nodes) # if any segment is too small for k, all algorithms reduce k for all segments clamped_k = min(k, np.min(segs)) result.set_batch_num_edges(clamped_k * num_nodes) if exclude_self: # remove_self_loop will update batch_num_edges as needed result = remove_self_loop(result) # If there were more than k(+1) coincident points, there may not have been self loops on # all nodes, in which case there would still be one too many out edges on some nodes. # However, if every node had a self edge, the common case, every node would still have the # same degree as each other, so we can check that condition easily. # The -1 is for the self edge removal. clamped_k = min(k, np.min(segs)) - 1 if result.num_edges() != clamped_k * result.num_nodes(): # edges on any nodes with too high degree should all be length zero, # so pick an arbitrary one to remove from each such node degrees = result.in_degrees() node_indices = F.nonzero_1d(degrees > clamped_k) edges_to_remove_graph = sample_neighbors( result, node_indices, 1, edge_dir="in" ) edge_ids = edges_to_remove_graph.edata[EID] result = remove_edges(result, edge_ids) return result def _segmented_knn_graph_blas(x, k, segs, dist="euclidean"): r"""Construct multiple graphs from multiple sets of points according to k-nearest-neighbor (KNN). This function first compute the distance matrix using BLAS matrix multiplication operation provided by backend frameworks. Then use topk algorithm to get k-nearest neighbors. Parameters ---------- x : Tensor Coordinates/features of points. Must be 2D. It can be either on CPU or GPU. k : int The number of nearest neighbors per node. segs : list[int] Number of points in each point set. The numbers in :attr:`segs` must sum up to the number of rows in :attr:`x`. dist : str, optional The distance metric used to compute distance between points. It can be the following metrics: * 'euclidean': Use Euclidean distance (L2 norm) :math:`\sqrt{\sum_{i} (x_{i} - y_{i})^{2}}`. * 'cosine': Use cosine distance. (default: 'euclidean') """ # if use cosine distance, normalize input points first # thus we can use euclidean distance to find knn equivalently. if dist == "cosine": l2_norm = lambda v: F.sqrt(F.sum(v * v, dim=1, keepdims=True)) x = x / (l2_norm(x) + 1e-5) n_total_points, _ = F.shape(x) offset = np.insert(np.cumsum(segs), 0, 0) min_seg_size = np.min(segs) if k > min_seg_size: dgl_warning( "'k' should be less than or equal to the number of points in 'x'" "expect k <= {0}, got k = {1}, use k = {0}".format(min_seg_size, k) ) k = min_seg_size h_list = F.split(x, segs, 0) src = [ F.argtopk(pairwise_squared_distance(h_g), k, 1, descending=False) + int(offset[i]) for i, h_g in enumerate(h_list) ] src = F.cat(src, 0) ctx = F.context(x) dst = F.repeat(F.arange(0, n_total_points, ctx=ctx), k, dim=0) return convert.graph((F.reshape(src, (-1,)), F.reshape(dst, (-1,)))) def _nndescent_knn_graph( x, k, segs, num_iters=None, max_candidates=None, delta=0.001, sample_rate=0.5, dist="euclidean", ): r"""Construct multiple graphs from multiple sets of points according to **approximate** k-nearest-neighbor using NN-descent algorithm from paper `Efficient k-nearest neighbor graph construction for generic similarity measures `_. Parameters ---------- x : Tensor Coordinates/features of points. Must be 2D. It can be either on CPU or GPU. k : int The number of nearest neighbors per node. segs : list[int] Number of points in each point set. The numbers in :attr:`segs` must sum up to the number of rows in :attr:`x`. num_iters : int, optional The maximum number of NN-descent iterations to perform. A value will be chosen based on the size of input by default. (Default: None) max_candidates : int, optional The maximum number of candidates to be considered during one iteration. Larger values will provide more accurate search results later, but potentially at non-negligible computation cost. A value will be chosen based on the number of neighbors by default. (Default: None) delta : float, optional A value controls the early abort. This function will abort if :math:`k * N * delta > c`, where :math:`N` is the number of points, :math:`c` is the number of updates during last iteration. (Default: 0.001) sample_rate : float, optional A value controls how many candidates sampled. It should be a float value between 0 and 1. Larger values will provide higher accuracy and converge speed but with higher time cost. (Default: 0.5) dist : str, optional The distance metric used to compute distance between points. It can be the following metrics: * 'euclidean': Use Euclidean distance (L2 norm) :math:`\sqrt{\sum_{i} (x_{i} - y_{i})^{2}}`. * 'cosine': Use cosine distance. (default: 'euclidean') Returns ------- DGLGraph The graph. The node IDs are in the same order as :attr:`x`. """ num_points, _ = F.shape(x) if isinstance(segs, (tuple, list)): segs = F.tensor(segs) segs = F.copy_to(segs, F.context(x)) if max_candidates is None: max_candidates = min(60, k) if num_iters is None: num_iters = max(10, int(round(np.log2(num_points)))) max_candidates = int(sample_rate * max_candidates) # if use cosine distance, normalize input points first # thus we can use euclidean distance to find knn equivalently. if dist == "cosine": l2_norm = lambda v: F.sqrt(F.sum(v * v, dim=1, keepdims=True)) x = x / (l2_norm(x) + 1e-5) # k must less than or equal to min(segs) if k > F.min(segs, dim=0): raise DGLError( "'k' must be less than or equal to the number of points in 'x'" "expect 'k' <= {}, got 'k' = {}".format(F.min(segs, dim=0), k) ) if delta < 0 or delta > 1: raise DGLError("'delta' must in [0, 1], got 'delta' = {}".format(delta)) offset = F.zeros((F.shape(segs)[0] + 1,), F.dtype(segs), F.context(segs)) offset[1:] = F.cumsum(segs, dim=0) out = F.zeros((2, num_points * k), F.dtype(segs), F.context(segs)) # points, offsets, out, k, num_iters, max_candidates, delta _CAPI_DGLNNDescent( F.to_dgl_nd(x), F.to_dgl_nd(offset), F.zerocopy_to_dgl_ndarray_for_write(out), k, num_iters, max_candidates, delta, ) return out def knn( k, x, x_segs, y=None, y_segs=None, algorithm="bruteforce", dist="euclidean" ): r"""For each element in each segment in :attr:`y`, find :attr:`k` nearest points in the same segment in :attr:`x`. If :attr:`y` is None, perform a self-query over :attr:`x`. This function allows multiple point sets with different capacity. The points from different sets are stored contiguously in the :attr:`x` and :attr:`y` tensor. :attr:`x_segs` and :attr:`y_segs` specifies the number of points in each point set. Parameters ---------- k : int The number of nearest neighbors per node. x : Tensor The point coordinates in x. It can be either on CPU or GPU (must be the same as :attr:`y`). Must be 2D. x_segs : Union[List[int], Tensor] Number of points in each point set in :attr:`x`. The numbers in :attr:`x_segs` must sum up to the number of rows in :attr:`x`. y : Tensor, optional The point coordinates in y. It can be either on CPU or GPU (must be the same as :attr:`x`). Must be 2D. (default: None) y_segs : Union[List[int], Tensor], optional Number of points in each point set in :attr:`y`. The numbers in :attr:`y_segs` must sum up to the number of rows in :attr:`y`. (default: None) algorithm : str, optional Algorithm used to compute the k-nearest neighbors. * 'bruteforce' will compute distances pair by pair and directly select the k-nearest neighbors during distance computation. This method is slower than 'bruteforce-blas' but has less memory overhead (i.e., :math:`O(Nk)` where :math:`N` is the number of points, :math:`k` is the number of nearest neighbors per node) since we do not need to store all distances. * 'bruteforce-sharemem' (CUDA only) is similar to 'bruteforce' but use shared memory in CUDA devices for buffer. This method is faster than 'bruteforce' when the dimension of input points is not large. This method is only available on CUDA device. * 'kd-tree' will use the kd-tree algorithm (CPU only). This method is suitable for low-dimensional data (e.g. 3D point clouds) * 'nn-descent' is an approximate approach from paper `Efficient k-nearest neighbor graph construction for generic similarity measures `_. This method will search for nearest neighbor candidates in "neighbors' neighbors". Note: Currently, 'nn-descent' only supports self-query cases, i.e. :attr:`y` is None. (default: 'bruteforce') dist : str, optional The distance metric used to compute distance between points. It can be the following metrics: * 'euclidean': Use Euclidean distance (L2 norm) :math:`\sqrt{\sum_{i} (x_{i} - y_{i})^{2}}`. * 'cosine': Use cosine distance. (default: 'euclidean') Returns ------- Tensor Tensor with size `(2, k * num_points(y))` The first subtensor contains point indexs in :attr:`y`. The second subtensor contains point indexs in :attr:`x` """ # TODO(lygztq) add support for querying different point sets using nn-descent. if algorithm == "nn-descent": if y is not None or y_segs is not None: raise DGLError( "Currently 'nn-descent' only supports self-query cases." ) return _nndescent_knn_graph(x, k, x_segs, dist=dist) # self query if y is None: y = x y_segs = x_segs assert F.context(x) == F.context(y) if isinstance(x_segs, (tuple, list)): x_segs = F.tensor(x_segs) if isinstance(y_segs, (tuple, list)): y_segs = F.tensor(y_segs) x_segs = F.copy_to(x_segs, F.context(x)) y_segs = F.copy_to(y_segs, F.context(y)) # k shoule be less than or equal to min(x_segs) min_num_points = F.min(x_segs, dim=0) if k > min_num_points: dgl_warning( "'k' should be less than or equal to the number of points in 'x'" "expect k <= {0}, got k = {1}, use k = {0}".format( min_num_points, k ) ) k = F.as_scalar(min_num_points) # invalid k if k <= 0: raise DGLError("Invalid k value. expect k > 0, got k = {}".format(k)) # empty point set if F.shape(x)[0] == 0 or F.shape(y)[0] == 0: raise DGLError("Find empty point set") dist = dist.lower() dist_metric_list = ["euclidean", "cosine"] if dist not in dist_metric_list: raise DGLError( "Only {} are supported for distance" "computation, got {}".format(dist_metric_list, dist) ) x_offset = F.zeros( (F.shape(x_segs)[0] + 1,), F.dtype(x_segs), F.context(x_segs) ) x_offset[1:] = F.cumsum(x_segs, dim=0) y_offset = F.zeros( (F.shape(y_segs)[0] + 1,), F.dtype(y_segs), F.context(y_segs) ) y_offset[1:] = F.cumsum(y_segs, dim=0) out = F.zeros((2, F.shape(y)[0] * k), F.dtype(x_segs), F.context(x_segs)) # if use cosine distance, normalize input points first # thus we can use euclidean distance to find knn equivalently. if dist == "cosine": l2_norm = lambda v: F.sqrt(F.sum(v * v, dim=1, keepdims=True)) x = x / (l2_norm(x) + 1e-5) y = y / (l2_norm(y) + 1e-5) _CAPI_DGLKNN( F.to_dgl_nd(x), F.to_dgl_nd(x_offset), F.to_dgl_nd(y), F.to_dgl_nd(y_offset), k, F.zerocopy_to_dgl_ndarray_for_write(out), algorithm, ) return out def to_bidirected(g, copy_ndata=False, readonly=None): r"""Convert the graph to a bi-directional simple graph and return. For an input graph :math:`G`, return a new graph :math:`G'` such that an edge :math:`(u, v)\in G'` exists if and only if there exists an edge :math:`(v, u)\in G`. The resulting graph :math:`G'` is a simple graph, meaning there is no parallel edge. The operation only works for edges whose two endpoints belong to the same node type. DGL will raise error if the input graph is heterogeneous and contains edges with different types of endpoints. Parameters ---------- g : DGLGraph The input graph. copy_ndata: bool, optional If True, the node features of the bidirected graph are copied from the original graph. If False, the bidirected graph will not have any node features. (Default: False) readonly : bool **DEPRECATED**. Returns ------- DGLGraph The bidirected graph Notes ----- If :attr:`copy_ndata` is True, the resulting graph will share the node feature tensors with the input graph. Hence, users should try to avoid in-place operations which will be visible to both graphs. This function discards the batch information. Please use :func:`dgl.DGLGraph.set_batch_num_nodes` and :func:`dgl.DGLGraph.set_batch_num_edges` on the transformed graph to maintain the information. Examples -------- The following examples use PyTorch backend. >>> import dgl >>> import torch as th >>> g = dgl.graph((th.tensor([0, 1, 2]), th.tensor([1, 2, 0]))) >>> bg1 = dgl.to_bidirected(g) >>> bg1.edges() (tensor([0, 1, 2, 1, 2, 0]), tensor([1, 2, 0, 0, 1, 2])) The graph already have i->j and j->i >>> g = dgl.graph((th.tensor([0, 1, 2, 0]), th.tensor([1, 2, 0, 2]))) >>> bg1 = dgl.to_bidirected(g) >>> bg1.edges() (tensor([0, 1, 2, 1, 2, 0]), tensor([1, 2, 0, 0, 1, 2])) **Heterogeneous graphs with Multiple Edge Types** >>> g = dgl.heterograph({ ... ('user', 'wins', 'user'): (th.tensor([0, 2, 0, 2]), th.tensor([1, 1, 2, 0])), ... ('user', 'follows', 'user'): (th.tensor([1, 2, 1]), th.tensor([2, 1, 1])) ... }) >>> bg1 = dgl.to_bidirected(g) >>> bg1.edges(etype='wins') (tensor([0, 0, 1, 1, 2, 2]), tensor([1, 2, 0, 2, 0, 1])) >>> bg1.edges(etype='follows') (tensor([1, 1, 2]), tensor([1, 2, 1])) """ if readonly is not None: dgl_warning( "Parameter readonly is deprecated" "There will be no difference between readonly and non-readonly DGLGraph" ) for c_etype in g.canonical_etypes: if c_etype[0] != c_etype[2]: assert False, ( "to_bidirected is not well defined for " "unidirectional bipartite graphs" ", but {} is unidirectional bipartite".format(c_etype) ) g = add_reverse_edges(g, copy_ndata=copy_ndata, copy_edata=False) g = to_simple( g, return_counts=None, copy_ndata=copy_ndata, copy_edata=False ) return g def add_reverse_edges( g, readonly=None, copy_ndata=True, copy_edata=False, ignore_bipartite=False, exclude_self=True, ): r"""Add a reversed edge for each edge in the input graph and return a new graph. For a graph with edges :math:`(i_1, j_1), \cdots, (i_n, j_n)`, this function creates a new graph with edges :math:`(i_1, j_1), \cdots, (i_n, j_n), (j_1, i_1), \cdots, (j_n, i_n)`. The returned graph may have duplicate edges. To create a bidirected graph without duplicate edges, use :func:`to_bidirected`. The operation only works for edges whose two endpoints belong to the same node type. DGL will raise error if the input graph is heterogeneous and contains edges with different types of endpoints. If :attr:`ignore_bipartite` is true, DGL will ignore those edges instead. Parameters ---------- g : DGLGraph The input graph. readonly : bool, default to be True Deprecated. There will be no difference between readonly and non-readonly copy_ndata: bool, optional If True, the node features of the new graph are copied from the original graph. If False, the new graph will not have any node features. (Default: True) copy_edata: bool, optional If True, the features of the reversed edges will be identical to the original ones. If False, the new graph will not have any edge features. (Default: False) ignore_bipartite: bool, optional If True, unidirectional bipartite graphs are ignored and no error is raised. If False, an error will be raised if an edge type of the input heterogeneous graph is for a unidirectional bipartite graph. exclude_self: bool, optional If True, it does not add reverse edges for self-loops, which is likely meaningless in most cases. Returns ------- DGLGraph The graph with reversed edges added. Notes ----- If :attr:`copy_ndata` is True, the resulting graph will share the node feature tensors with the input graph. Hence, users should try to avoid in-place operations which will be visible to both graphs. On the contrary, the two graphs do not share the same edge feature storage. This function discards the batch information. Please use :func:`dgl.DGLGraph.set_batch_num_nodes` and :func:`dgl.DGLGraph.set_batch_num_edges` on the transformed graph to maintain the information. Examples -------- **Homogeneous graphs** >>> g = dgl.graph((th.tensor([0, 0]), th.tensor([0, 1]))) >>> bg1 = dgl.add_reverse_edges(g) >>> bg1.edges() (tensor([0, 0, 0, 1]), tensor([0, 1, 0, 0])) **Heterogeneous graphs** >>> g = dgl.heterograph({ >>> ('user', 'wins', 'user'): (th.tensor([0, 2, 0, 2, 2]), th.tensor([1, 1, 2, 1, 0])), >>> ('user', 'plays', 'game'): (th.tensor([1, 2, 1]), th.tensor([2, 1, 1])), >>> ('user', 'follows', 'user'): (th.tensor([1, 2, 1), th.tensor([0, 0, 0])) >>> }) >>> g.nodes['game'].data['hv'] = th.ones(3, 1) >>> g.edges['wins'].data['h'] = th.tensor([0, 1, 2, 3, 4]) The :func:`add_reverse_edges` operation is applied to the edge type ``('user', 'wins', 'user')`` and the edge type ``('user', 'follows', 'user')``. The edge type ``('user', 'plays', 'game')`` is ignored. Both the node features and edge features are shared. >>> bg = dgl.add_reverse_edges(g, copy_ndata=True, copy_edata=True, ignore_bipartite=True) >>> bg.edges(('user', 'wins', 'user')) (tensor([0, 2, 0, 2, 2, 1, 1, 2, 1, 0]), tensor([1, 1, 2, 1, 0, 0, 2, 0, 2, 2])) >>> bg.edges(('user', 'follows', 'user')) (tensor([1, 2, 1, 0, 0, 0]), tensor([0, 0, 0, 1, 2, 1])) >>> bg.edges(('user', 'plays', 'game')) (th.tensor([1, 2, 1]), th.tensor([2, 1, 1])) >>> bg.nodes['game'].data['hv'] tensor([0, 0, 0]) >>> bg.edges[('user', 'wins', 'user')].data['h'] th.tensor([0, 1, 2, 3, 4, 0, 1, 2, 3, 4]) """ if readonly is not None: dgl_warning( "Parameter readonly is deprecated" "There will be no difference between readonly and non-readonly DGLGraph" ) # get node cnt for each ntype num_nodes_dict = {} for ntype in g.ntypes: num_nodes_dict[ntype] = g.num_nodes(ntype) canonical_etypes = g.canonical_etypes num_nodes_dict = {ntype: g.num_nodes(ntype) for ntype in g.ntypes} subgs = {} rev_eids = {} def add_for_etype(etype): u, v = g.edges(form="uv", order="eid", etype=etype) rev_u, rev_v = v, u eid = F.copy_to(F.arange(0, g.num_edges(etype)), g.device) if exclude_self: self_loop_mask = F.equal(rev_u, rev_v) non_self_loop_mask = F.logical_not(self_loop_mask) rev_u = F.boolean_mask(rev_u, non_self_loop_mask) rev_v = F.boolean_mask(rev_v, non_self_loop_mask) non_self_loop_eid = F.boolean_mask(eid, non_self_loop_mask) rev_eids[etype] = F.cat([eid, non_self_loop_eid], 0) else: rev_eids[etype] = F.cat([eid, eid], 0) subgs[etype] = (F.cat([u, rev_u], dim=0), F.cat([v, rev_v], dim=0)) # fast path if ignore_bipartite is False: for c_etype in canonical_etypes: if c_etype[0] != c_etype[2]: assert False, ( "add_reverse_edges is not well defined for " "unidirectional bipartite graphs" ", but {} is unidirectional bipartite".format(c_etype) ) add_for_etype(c_etype) new_g = convert.heterograph(subgs, num_nodes_dict=num_nodes_dict) else: for c_etype in canonical_etypes: if c_etype[0] != c_etype[2]: u, v = g.edges(form="uv", order="eid", etype=c_etype) subgs[c_etype] = (u, v) else: add_for_etype(c_etype) new_g = convert.heterograph(subgs, num_nodes_dict=num_nodes_dict) # handle features if copy_ndata: node_frames = utils.extract_node_subframes(g, None) utils.set_new_frames(new_g, node_frames=node_frames) if copy_edata: # find indices eids = [] for c_etype in canonical_etypes: if c_etype[0] != c_etype[2]: eids.append( F.copy_to(F.arange(0, g.num_edges(c_etype)), new_g.device) ) else: eids.append(rev_eids[c_etype]) edge_frames = utils.extract_edge_subframes(g, eids) utils.set_new_frames(new_g, edge_frames=edge_frames) return new_g def line_graph(g, backtracking=True, shared=False): """Return the line graph of this graph. The line graph ``L(G)`` of a given graph ``G`` is defined as another graph where the nodes in ``L(G)`` correspond to the edges in ``G``. For any pair of edges ``(u, v)`` and ``(v, w)`` in ``G``, the corresponding node of edge ``(u, v)`` in ``L(G)`` will have an edge connecting to the corresponding node of edge ``(v, w)``. Parameters ---------- g : DGLGraph Input graph. Must be homogeneous. backtracking : bool, optional If False, the line graph node corresponding to edge ``(u, v)`` will not have an edge connecting to the line graph node corresponding to edge ``(v, u)``. Default: True. shared : bool, optional Whether to copy the edge features of the original graph as the node features of the result line graph. Returns ------- G : DGLGraph The line graph of this graph. Notes ----- * If :attr:`shared` is True, the node features of the resulting graph share the same storage with the edge features of the input graph. Hence, users should try to avoid in-place operations which will be visible to both graphs. * The function supports input graph on GPU but copies it to CPU during computation. * This function discards the batch information. Please use :func:`dgl.DGLGraph.set_batch_num_nodes` and :func:`dgl.DGLGraph.set_batch_num_edges` on the transformed graph to maintain the information. Examples -------- Assume that the graph has the following adjacency matrix: :: A = [[0, 0, 1], [1, 0, 1], [1, 1, 0]] >>> g = dgl.graph(([0, 1, 1, 2, 2],[2, 0, 2, 0, 1]), 'user', 'follows') >>> lg = g.line_graph() >>> lg Graph(num_nodes=5, num_edges=8, ndata_schemes={} edata_schemes={}) >>> lg.edges() (tensor([0, 0, 1, 2, 2, 3, 4, 4]), tensor([3, 4, 0, 3, 4, 0, 1, 2])) >>> lg = g.line_graph(backtracking=False) >>> lg Graph(num_nodes=5, num_edges=4, ndata_schemes={} edata_schemes={}) >>> lg.edges() (tensor([0, 1, 2, 4]), tensor([4, 0, 3, 1])) """ assert g.is_homogeneous, "only homogeneous graph is supported" dev = g.device lg = DGLGraph( _CAPI_DGLHeteroLineGraph(g._graph.copy_to(nd.cpu()), backtracking) ) lg = lg.to(dev) if shared: new_frames = utils.extract_edge_subframes(g, None) utils.set_new_frames(lg, node_frames=new_frames) return lg DGLGraph.line_graph = utils.alias_func(line_graph) def khop_adj(g, k): """Return the matrix of :math:`A^k` where :math:`A` is the adjacency matrix of the graph :math:`g`. The returned matrix is a 32-bit float dense matrix on CPU. The graph must be homogeneous. Parameters ---------- g : DGLGraph The input graph. k : int The :math:`k` in :math:`A^k`. Returns ------- Tensor The returned tensor. Examples -------- >>> import dgl >>> g = dgl.graph(([0,1,2,3,4,0,1,2,3,4], [0,1,2,3,4,1,2,3,4,0])) >>> dgl.khop_adj(g, 1) tensor([[1., 1., 0., 0., 0.], [0., 1., 1., 0., 0.], [0., 0., 1., 1., 0.], [0., 0., 0., 1., 1.], [1., 0., 0., 0., 1.]]) >>> dgl.khop_adj(g, 3) tensor([[1., 3., 3., 1., 0.], [0., 1., 3., 3., 1.], [1., 0., 1., 3., 3.], [3., 1., 0., 1., 3.], [3., 3., 1., 0., 1.]]) """ assert g.is_homogeneous, "only homogeneous graph is supported" adj_k = ( g.adj_external(transpose=False, scipy_fmt=g.formats()["created"][0]) ** k ) return F.tensor(adj_k.todense().astype(np.float32)) def khop_graph(g, k, copy_ndata=True): """Return the graph whose edges connect the :attr:`k`-hop neighbors of the original graph. More specifically, an edge from node ``u`` and node ``v`` exists in the new graph if and only if a path with length :attr:`k` exists from node ``u`` to node ``v`` in the original graph. The adjacency matrix of the returned graph is :math:`A^k` (where :math:`A` is the adjacency matrix of :math:`g`). Parameters ---------- g : DGLGraph The input graph. k : int The :math:`k` in `k`-hop graph. copy_ndata: bool, optional If True, the node features of the new graph are copied from the original graph. If False, the new graph will not have any node features. (Default: True) Returns ------- DGLGraph The returned graph. Notes ----- If :attr:`copy_ndata` is True, the resulting graph will share the node feature tensors with the input graph. Hence, users should try to avoid in-place operations which will be visible to both graphs. This function discards the batch information. Please use :func:`dgl.DGLGraph.set_batch_num_nodes` and :func:`dgl.DGLGraph.set_batch_num_edges` on the transformed graph to maintain the information. Examples -------- Below gives an easy example: >>> import dgl >>> g = dgl.graph(([0, 1], [1, 2])) >>> g_2 = dgl.transforms.khop_graph(g, 2) >>> print(g_2.edges()) (tensor([0]), tensor([2])) A more complicated example: >>> import dgl >>> g = dgl.graph(([0,1,2,3,4,0,1,2,3,4], [0,1,2,3,4,1,2,3,4,0])) >>> dgl.khop_graph(g, 1) DGLGraph(num_nodes=5, num_edges=10, ndata_schemes={} edata_schemes={}) >>> dgl.khop_graph(g, 3) DGLGraph(num_nodes=5, num_edges=40, ndata_schemes={} edata_schemes={}) """ assert g.is_homogeneous, "only homogeneous graph is supported" n = g.num_nodes() adj_k = ( g.adj_external(transpose=False, scipy_fmt=g.formats()["created"][0]) ** k ) adj_k = adj_k.tocoo() multiplicity = adj_k.data row = np.repeat(adj_k.row, multiplicity) col = np.repeat(adj_k.col, multiplicity) # TODO(zihao): we should support creating multi-graph from scipy sparse matrix # in the future. new_g = convert.graph( (row, col), num_nodes=n, idtype=g.idtype, device=g.device ) # handle ndata if copy_ndata: node_frames = utils.extract_node_subframes(g, None) utils.set_new_frames(new_g, node_frames=node_frames) return new_g def reverse( g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_edata=None ): r"""Return a new graph with every edges being the reverse ones in the input graph. The reverse (also called converse, transpose) of a graph with edges :math:`(i_1, j_1), (i_2, j_2), \cdots` of type ``(U, E, V)`` is a new graph with edges :math:`(j_1, i_1), (j_2, i_2), \cdots` of type ``(V, E, U)``. The returned graph shares the data structure with the original graph, i.e. dgl.reverse will not create extra storage for the reversed graph. Parameters ---------- g : DGLGraph The input graph. copy_ndata: bool, optional If True, the node features of the reversed graph are copied from the original graph. If False, the reversed graph will not have any node features. (Default: True) copy_edata: bool, optional If True, the edge features of the reversed graph are copied from the original graph. If False, the reversed graph will not have any edge features. (Default: False) Return ------ DGLGraph The reversed graph. Notes ----- If :attr:`copy_ndata` or :attr:`copy_edata` is True, the resulting graph will share the node or edge feature tensors with the input graph. Hence, users should try to avoid in-place operations which will be visible to both graphs. This function discards the batch information. Please use :func:`dgl.DGLGraph.set_batch_num_nodes` and :func:`dgl.DGLGraph.set_batch_num_edges` on the transformed graph to maintain the information. Examples -------- **Homogeneous graphs** Create a graph to reverse. >>> import dgl >>> import torch as th >>> g = dgl.graph((th.tensor([0, 1, 2]), th.tensor([1, 2, 0]))) >>> g.ndata['h'] = th.tensor([[0.], [1.], [2.]]) >>> g.edata['h'] = th.tensor([[3.], [4.], [5.]]) Reverse the graph. >>> rg = dgl.reverse(g, copy_edata=True) >>> rg.ndata['h'] tensor([[0.], [1.], [2.]]) The i-th edge in the reversed graph corresponds to the i-th edge in the original graph. When :attr:`copy_edata` is True, they have the same features. >>> rg.edges() (tensor([1, 2, 0]), tensor([0, 1, 2])) >>> rg.edata['h'] tensor([[3.], [4.], [5.]]) **Heterogenenous graphs** >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): (th.tensor([0, 2]), th.tensor([1, 2])), ... ('user', 'plays', 'game'): (th.tensor([1, 2, 1]), th.tensor([2, 1, 1])) ... }) >>> g.nodes['game'].data['hv'] = th.ones(3, 1) >>> g.edges['plays'].data['he'] = th.zeros(3, 1) The resulting graph will have edge types ``('user', 'follows', 'user)`` and ``('game', 'plays', 'user')``. >>> rg = dgl.reverse(g, copy_ndata=True) >>> rg Graph(num_nodes={'game': 3, 'user': 3}, num_edges={('user', 'follows', 'user'): 2, ('game', 'plays', 'user'): 3}, metagraph=[('user', 'user'), ('game', 'user')]) >>> rg.edges(etype='follows') (tensor([1, 2]), tensor([0, 2])) >>> rg.edges(etype='plays') (tensor([2, 1, 1]), tensor([1, 2, 1])) >>> rg.nodes['game'].data['hv'] tensor([[1.], [1.], [1.]]) >>> rg.edges['plays'].data {} """ if share_ndata is not None: dgl_warning("share_ndata argument has been renamed to copy_ndata.") copy_ndata = share_ndata if share_edata is not None: dgl_warning("share_edata argument has been renamed to copy_edata.") copy_edata = share_edata if g.is_block: # TODO(0.5 release, xiangsx) need to handle BLOCK # currently reversing a block results in undefined behavior raise DGLError("Reversing a block graph is not supported.") gidx = g._graph.reverse() new_g = DGLGraph(gidx, g.ntypes, g.etypes) # handle ndata if copy_ndata: # for each ntype for ntype in g.ntypes: new_g.nodes[ntype].data.update(g.nodes[ntype].data) # handle edata if copy_edata: # for each etype for utype, etype, vtype in g.canonical_etypes: new_g.edges[vtype, etype, utype].data.update( g.edges[utype, etype, vtype].data ) return new_g DGLGraph.reverse = utils.alias_func(reverse) def to_simple_graph(g): """Convert the graph to a simple graph with no multi-edge. DEPRECATED: renamed to dgl.to_simple Parameters ---------- g : DGLGraph The input graph. Returns ------- DGLGraph A simple graph. Notes ----- This function discards the batch information. Please use :func:`dgl.DGLGraph.set_batch_num_nodes` and :func:`dgl.DGLGraph.set_batch_num_edges` on the transformed graph to maintain the information. """ dgl_warning("dgl.to_simple_graph is renamed to dgl.to_simple in v0.5.") return to_simple(g) def laplacian_lambda_max(g): """Return the largest eigenvalue of the normalized symmetric Laplacian of a graph. If the graph is batched from multiple graphs, return the list of the largest eigenvalue for each graph instead. Parameters ---------- g : DGLGraph The input graph, it must be a bi-directed homogeneous graph, i.e., every edge should have an accompanied reverse edge in the graph. The graph can be batched from multiple graphs. Returns ------- list[float] A list where the i-th item indicates the largest eigenvalue of i-th graph in :attr:`g`. In the case where the function takes a single graph, it will return a list consisting of a single element. Examples -------- >>> import dgl >>> g = dgl.graph(([0, 1, 2, 3, 4, 0, 1, 2, 3, 4], [1, 2, 3, 4, 0, 4, 0, 1, 2, 3])) >>> dgl.laplacian_lambda_max(g) [1.809016994374948] """ g_arr = batch.unbatch(g) rst = [] for g_i in g_arr: n = g_i.num_nodes() adj = g_i.adj_external( transpose=True, scipy_fmt=g_i.formats()["created"][0] ).astype(float) norm = sparse.diags( F.asnumpy(g_i.in_degrees()).clip(1) ** -0.5, dtype=float ) laplacian = sparse.eye(n) - norm * adj * norm rst.append( scipy.sparse.linalg.eigs( laplacian, 1, which="LM", return_eigenvectors=False )[0].real ) return rst def metapath_reachable_graph(g, metapath): """Return a graph where the successors of any node ``u`` are nodes reachable from ``u`` by the given metapath. If the beginning node type ``s`` and ending node type ``t`` are the same, it will return a homogeneous graph with node type ``s = t``. Otherwise, a unidirectional bipartite graph with source node type ``s`` and destination node type ``t`` is returned. In both cases, two nodes ``u`` and ``v`` will be connected with an edge ``(u, v)`` if there exists one path matching the metapath from ``u`` to ``v``. The result graph keeps the node set of type ``s`` and ``t`` in the original graph even if they might have no neighbor. The features of the source/destination node type in the original graph would be copied to the new graph. Parameters ---------- g : DGLGraph The input graph metapath : list[str or tuple of str] Metapath in the form of a list of edge types Returns ------- DGLGraph A homogeneous or unidirectional bipartite graph. It will be on CPU regardless of whether the input graph is on CPU or GPU. Notes ----- This function discards the batch information. Please use :func:`dgl.DGLGraph.set_batch_num_nodes` and :func:`dgl.DGLGraph.set_batch_num_edges` on the transformed graph to maintain the information. Examples -------- >>> g = dgl.heterograph({ ... ('A', 'AB', 'B'): ([0, 1, 2], [1, 2, 3]), ... ('B', 'BA', 'A'): ([1, 2, 3], [0, 1, 2])}) >>> new_g = dgl.metapath_reachable_graph(g, ['AB', 'BA']) >>> new_g.edges(order='eid') (tensor([0, 1, 2]), tensor([0, 1, 2])) """ adj = 1 for etype in metapath: adj = adj * g.adj_external( etype=etype, scipy_fmt="csr", transpose=False ) adj = (adj != 0).tocsr() srctype = g.to_canonical_etype(metapath[0])[0] dsttype = g.to_canonical_etype(metapath[-1])[2] new_g = convert.heterograph( {(srctype, "_E", dsttype): adj.nonzero()}, {srctype: adj.shape[0], dsttype: adj.shape[1]}, idtype=g.idtype, device=g.device, ) # copy srcnode features new_g.nodes[srctype].data.update(g.nodes[srctype].data) # copy dstnode features if srctype != dsttype: new_g.nodes[dsttype].data.update(g.nodes[dsttype].data) return new_g def add_nodes(g, num, data=None, ntype=None): r"""Add the given number of nodes to the graph and return a new graph. The new nodes will have IDs starting from ``g.num_nodes(ntype)``. Parameters ---------- num : int The number of nodes to add. data : dict[str, Tensor], optional Feature data of the added nodes. The keys are feature names while the values are feature data. ntype : str, optional The node type name. Can be omitted if there is only one type of nodes in the graph. Return ------ DGLGraph The graph with newly added nodes. Notes ----- * For features in :attr:`g` but not in :attr:`data`, DGL assigns zero features for the newly added nodes. * For feature in :attr:`data` but not in :attr:`g`, DGL assigns zero features for the existing nodes in the graph. * This function discards the batch information. Please use :func:`dgl.DGLGraph.set_batch_num_nodes` and :func:`dgl.DGLGraph.set_batch_num_edges` on the transformed graph to maintain the information. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch **Homogeneous Graphs** >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2]))) >>> g.num_nodes() 3 >>> g = dgl.add_nodes(g, 2) >>> g.num_nodes() 5 If the graph has some node features and new nodes are added without features, their features will be filled with zeros. >>> g.ndata['h'] = torch.ones(5, 1) >>> g = dgl.add_nodes(g, 1) >>> g.ndata['h'] tensor([[1.], [1.], [1.], [1.], [1.], [0.]]) Assign features for the new nodes. >>> g = dgl.add_nodes(g, 1, {'h': torch.ones(1, 1), 'w': torch.ones(1, 1)}) >>> g.ndata['h'] tensor([[1.], [1.], [1.], [1.], [1.], [0.], [1.]]) Since :attr:`data` contains new feature fields, the features for existing nodes will be filled with zeros. >>> g.ndata['w'] tensor([[0.], [0.], [0.], [0.], [0.], [0.], [1.]]) **Heterogeneous Graphs** >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]), ... torch.tensor([0, 0, 1, 1])), ... ('developer', 'develops', 'game'): (torch.tensor([0, 1]), ... torch.tensor([0, 1])) ... }) >>> g.num_nodes('user') 3 >>> g = dgl.add_nodes(g, 2, ntype='user') >>> g.num_nodes('user') 5 See Also -------- remove_nodes add_edges remove_edges """ g = g.clone() g.add_nodes(num, data=data, ntype=ntype) return g def add_edges(g, u, v, data=None, etype=None): r"""Add the edges to the graph and return a new graph. The i-th new edge will be from ``u[i]`` to ``v[i]``. The IDs of the new edges will start from ``g.num_edges(etype)``. Parameters ---------- u : int, Tensor or iterable[int] Source node IDs, ``u[i]`` gives the source node for the i-th new edge. v : int, Tensor or iterable[int] Destination node IDs, ``v[i]`` gives the destination node for the i-th new edge. data : dict[str, Tensor], optional Feature data of the added edges. The keys are feature names while the values are feature data. etype : str or (str, str, str), optional The type names of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. Can be omitted if the graph has only one type of edges. Return ------ DGLGraph The graph with newly added edges. Notes ----- * If the end nodes of the given edges do not exist in :attr:`g`, :func:`dgl.add_nodes` is invoked to add those nodes. The node features of the new nodes will be filled with zeros. * For features in :attr:`g` but not in :attr:`data`, DGL assigns zero features for the newly added nodes. * For feature in :attr:`data` but not in :attr:`g`, DGL assigns zero features for the existing nodes in the graph. * This function discards the batch information. Please use :func:`dgl.DGLGraph.set_batch_num_nodes` and :func:`dgl.DGLGraph.set_batch_num_edges` on the transformed graph to maintain the information. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch **Homogeneous Graphs** >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2]))) >>> g.num_edges() 2 >>> g = dgl.add_edges(g, torch.tensor([1, 3]), torch.tensor([0, 1])) >>> g.num_edges() 4 Since ``u`` or ``v`` contains a non-existing node ID, the nodes are added implicitly. >>> g.num_nodes() 4 If the graph has some edge features and new edges are added without features, their features will be filled with zeros. >>> g.edata['h'] = torch.ones(4, 1) >>> g = dgl.add_edges(g, torch.tensor([1]), torch.tensor([1])) >>> g.edata['h'] tensor([[1.], [1.], [1.], [1.], [0.]]) You can also assign features for the new edges in adding new edges. >>> g = dgl.add_edges(g, torch.tensor([0, 0]), torch.tensor([2, 2]), ... {'h': torch.tensor([[1.], [2.]]), 'w': torch.ones(2, 1)}) >>> g.edata['h'] tensor([[1.], [1.], [1.], [1.], [0.], [1.], [2.]]) Since :attr:`data` contains new feature fields, the features for old edges will be filled with zeros. >>> g.edata['w'] tensor([[0.], [0.], [0.], [0.], [0.], [1.], [1.]]) **Heterogeneous Graphs** >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]), ... torch.tensor([0, 0, 1, 1])), ... ('developer', 'develops', 'game'): (torch.tensor([0, 1]), ... torch.tensor([0, 1])) ... }) >>> g.num_edges('plays') 4 >>> g = dgl.add_edges(g, torch.tensor([3]), torch.tensor([3]), etype='plays') >>> g.num_edges('plays') 5 See Also -------- add_nodes remove_nodes remove_edges """ g = g.clone() g.add_edges(u, v, data=data, etype=etype) return g def remove_edges(g, eids, etype=None, store_ids=False): r"""Remove the specified edges and return a new graph. Also delete the features of the edges. The edges must exist in the graph. The resulting graph has the same number of the nodes as the input one, even if some nodes become isolated after the the edge removal. Parameters ---------- eids : int, Tensor, iterable[int] The IDs of the edges to remove. etype : str or (str, str, str), optional The type names of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. Can be omitted if the graph has only one type of edges. store_ids : bool, optional If True, it will store the raw IDs of the extracted nodes and edges in the ``ndata`` and ``edata`` of the resulting graph under name ``dgl.NID`` and ``dgl.EID``, respectively. Return ------ DGLGraph The graph with edges deleted. Notes ----- This function preserves the batch information. Examples -------- >>> import dgl >>> import torch **Homogeneous Graphs** >>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([0, 1, 2]))) >>> g.edata['he'] = torch.arange(3).float().reshape(-1, 1) >>> g = dgl.remove_edges(g, torch.tensor([0, 1])) >>> g Graph(num_nodes=3, num_edges=1, ndata_schemes={} edata_schemes={'he': Scheme(shape=(1,), dtype=torch.float32)}) >>> g.edges('all') (tensor([2]), tensor([2]), tensor([0])) >>> g.edata['he'] tensor([[2.]]) **Heterogeneous Graphs** >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]), ... torch.tensor([0, 0, 1, 1])), ... ('developer', 'develops', 'game'): (torch.tensor([0, 1]), ... torch.tensor([0, 1])) ... }) >>> g = dgl.remove_edges(g, torch.tensor([0, 1]), 'plays') >>> g.edges('all', etype='plays') (tensor([1, 2]), tensor([1, 1]), tensor([0, 1])) See Also -------- add_nodes add_edges remove_nodes """ g = g.clone() g.remove_edges(eids, etype=etype, store_ids=store_ids) return g def remove_nodes(g, nids, ntype=None, store_ids=False): r"""Remove the specified nodes and return a new graph. Also delete the features. Edges that connect from/to the nodes will be removed as well. After the removal, DGL re-labels the remaining nodes and edges with IDs from 0. Parameters ---------- nids : int, Tensor, iterable[int] The nodes to be removed. ntype : str, optional The type of the nodes to remove. Can be omitted if there is only one node type in the graph. store_ids : bool, optional If True, it will store the raw IDs of the extracted nodes and edges in the ``ndata`` and ``edata`` of the resulting graph under name ``dgl.NID`` and ``dgl.EID``, respectively. Return ------ DGLGraph The graph with nodes deleted. Notes ----- This function discards the batch information. Please use :func:`dgl.DGLGraph.set_batch_num_nodes` and :func:`dgl.DGLGraph.set_batch_num_edges` on the transformed graph to maintain the information. Examples -------- >>> import dgl >>> import torch **Homogeneous Graphs** >>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([0, 1, 2]))) >>> g.ndata['hv'] = torch.arange(3).float().reshape(-1, 1) >>> g.edata['he'] = torch.arange(3).float().reshape(-1, 1) >>> g = dgl.remove_nodes(g, torch.tensor([0, 1])) >>> g Graph(num_nodes=1, num_edges=1, ndata_schemes={'hv': Scheme(shape=(1,), dtype=torch.float32)} edata_schemes={'he': Scheme(shape=(1,), dtype=torch.float32)}) >>> g.ndata['hv'] tensor([[2.]]) >>> g.edata['he'] tensor([[2.]]) **Heterogeneous Graphs** >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]), ... torch.tensor([0, 0, 1, 1])), ... ('developer', 'develops', 'game'): (torch.tensor([0, 1]), ... torch.tensor([0, 1])) ... }) >>> g = dgl.remove_nodes(g, torch.tensor([0, 1]), ntype='game') >>> g.num_nodes('user') 3 >>> g.num_nodes('game') 0 >>> g.num_edges('plays') 0 See Also -------- add_nodes add_edges remove_edges """ g = g.clone() g.remove_nodes(nids, ntype=ntype, store_ids=store_ids) return g def add_self_loop(g, edge_feat_names=None, fill_data=1.0, etype=None): r"""Add self-loops for each node in the graph and return a new graph. Parameters ---------- g : DGLGraph The graph. edge_feat_names : list[str], optional The names of the self-loop features to apply `fill_data`. If None, it will apply `fill_data` to all self-loop features. Default: None. fill_data : int, float or str, optional The value to fill the self-loop features. Default: 1. * If ``fill_data`` is ``int`` or ``float``, self-loop features will be directly given by ``fill_data``. * if ``fill_data`` is ``str``, self-loop features will be generated by aggregating the features of the incoming edges of the corresponding nodes. The supported aggregation are: ``'mean'``, ``'sum'``, ``'max'``, ``'min'``. etype : str or (str, str, str), optional The type names of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. Can be omitted if the graph has only one type of edges. Return ------ DGLGraph The graph with self-loops. Notes ----- * The function only supports homogeneous graphs or heterogeneous graphs but the relation graph specified by the :attr:`etype` argument is homogeneous. * The function adds self-loops regardless of whether they already exist or not. If one wishes to have exactly one self-loop for every node, call :func:`remove_self_loop` before invoking :func:`add_self_loop`. * This function discards the batch information. Please use :func:`dgl.DGLGraph.set_batch_num_nodes` and :func:`dgl.DGLGraph.set_batch_num_edges` on the transformed graph to maintain the information. Examples -------- >>> import dgl >>> import torch **Homogeneous Graphs** >>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([2, 1, 0]))) >>> g.ndata['hv'] = torch.arange(3).float().reshape(-1, 1) >>> g.edata['he'] = torch.arange(3).float().reshape(-1, 1) >>> g = dgl.add_self_loop(g, fill_data='sum') >>> g Graph(num_nodes=3, num_edges=6, ndata_schemes={'hv': Scheme(shape=(1,), dtype=torch.float32)} edata_schemes={'he': Scheme(shape=(1,), dtype=torch.float32)}) >>> g.edata['he'] tensor([[0.], [1.], [2.], [2.], [1.], [0.]]) **Heterogeneous Graphs** >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([1, 2]), ... torch.tensor([0, 1])), ... ('user', 'plays', 'game'): (torch.tensor([0, 1]), ... torch.tensor([0, 1]))}) >>> g = dgl.add_self_loop(g, etype='follows') >>> g Graph(num_nodes={'user': 3, 'game': 2}, num_edges={('user', 'plays', 'game'): 2, ('user', 'follows', 'user'): 5}, metagraph=[('user', 'user'), ('user', 'game')]) """ etype = g.to_canonical_etype(etype) data = {} reduce_funcs = { "sum": function.sum, "mean": function.mean, "max": function.max, "min": function.min, } if edge_feat_names is None: edge_feat_names = g.edges[etype].data.keys() if etype[0] != etype[2]: raise DGLError( "add_self_loop does not support unidirectional bipartite graphs: {}." "Please make sure the types of head node and tail node are identical." "".format(etype) ) for feat_name in edge_feat_names: if isinstance(fill_data, (int, float)): dtype = g.edges[etype].data[feat_name].dtype dshape = g.edges[etype].data[feat_name].shape tmp_fill_data = F.copy_to( F.astype(F.tensor([fill_data]), dtype), g.device ) if len(dshape) > 1: data[feat_name] = ( F.zeros( (g.num_nodes(etype[0]), *dshape[1:]), dtype, g.device ) + tmp_fill_data ) else: data[feat_name] = ( F.zeros((g.num_nodes(etype[0]),), dtype, g.device) + tmp_fill_data ) elif isinstance(fill_data, str): if fill_data not in reduce_funcs.keys(): raise DGLError("Unsupported aggregation: {}".format(fill_data)) reducer = reduce_funcs[fill_data] with g.local_scope(): g.update_all( function.copy_e(feat_name, "h"), reducer("h", "h"), etype=etype, ) data[feat_name] = g.nodes[etype[0]].data["h"] nodes = g.nodes(etype[0]) if len(data): new_g = add_edges(g, nodes, nodes, data=data, etype=etype) else: new_g = add_edges(g, nodes, nodes, etype=etype) return new_g DGLGraph.add_self_loop = utils.alias_func(add_self_loop) def remove_self_loop(g, etype=None): r"""Remove self-loops for each node in the graph and return a new graph. Parameters ---------- g : DGLGraph The graph. etype : str or (str, str, str), optional The type names of the edges. The allowed type name formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. Can be omitted if the graph has only one type of edges. Notes ----- If a node has multiple self-loops, remove them all. Do nothing for nodes without self-loops. This function preserves the batch information. Examples --------- >>> import dgl >>> import torch **Homogeneous Graphs** >>> g = dgl.graph((torch.tensor([0, 0, 0, 1]), torch.tensor([1, 0, 0, 2]))) >>> g.edata['he'] = torch.arange(4).float().reshape(-1, 1) >>> g = dgl.remove_self_loop(g) >>> g Graph(num_nodes=3, num_edges=2, edata_schemes={'he': Scheme(shape=(2,), dtype=torch.float32)}) >>> g.edata['he'] tensor([[0.],[3.]]) **Heterogeneous Graphs** >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([0, 1, 1, 1, 2]), ... torch.tensor([0, 0, 1, 1, 1])), ... ('user', 'plays', 'game'): (torch.tensor([0, 1]), ... torch.tensor([0, 1])) ... }) >>> g = dgl.remove_self_loop(g, etype='follows') >>> g.num_nodes('user') 3 >>> g.num_nodes('game') 2 >>> g.num_edges('follows') 2 >>> g.num_edges('plays') 2 See Also -------- add_self_loop """ etype = g.to_canonical_etype(etype) if etype[0] != etype[2]: raise DGLError( "remove_self_loop does not support unidirectional bipartite graphs: {}." "Please make sure the types of head node and tail node are identical." "".format(etype) ) u, v = g.edges(form="uv", order="eid", etype=etype) self_loop_eids = F.tensor(F.nonzero_1d(u == v), dtype=F.dtype(u)) new_g = remove_edges(g, self_loop_eids, etype=etype) return new_g DGLGraph.remove_self_loop = utils.alias_func(remove_self_loop) def compact_graphs( graphs, always_preserve=None, copy_ndata=True, copy_edata=True ): """Given a list of graphs with the same set of nodes, find and eliminate the common isolated nodes across all graphs. This function requires the graphs to have the same set of nodes (i.e. the node types must be the same, and the number of nodes of each node type must be the same). The metagraph does not have to be the same. It finds all the nodes that have zero in-degree and zero out-degree in all the given graphs, and eliminates them from all the graphs. Useful for graph sampling where you have a giant graph but you only wish to perform message passing on a smaller graph with a (tiny) subset of nodes. Parameters ---------- graphs : DGLGraph or list[DGLGraph] The graph, or list of graphs. All graphs must be on the same devices. All graphs must have the same set of nodes. always_preserve : Tensor or dict[str, Tensor], optional If a dict of node types and node ID tensors is given, the nodes of given node types would not be removed, regardless of whether they are isolated. If a Tensor is given, DGL assumes that all the graphs have one (same) node type. copy_ndata: bool, optional If True, the node features of the returned graphs are copied from the original graphs. If False, the returned graphs will not have any node features. (Default: True) copy_edata: bool, optional If True, the edge features of the reversed graph are copied from the original graph. If False, the reversed graph will not have any edge features. (Default: True) Returns ------- DGLGraph or list[DGLGraph] The compacted graph or list of compacted graphs. Each returned graph would have a feature ``dgl.NID`` containing the mapping of node IDs for each type from the compacted graph(s) to the original graph(s). Note that the mapping is the same for all the compacted graphs. All the returned graphs are on CPU. Notes ----- This function currently requires that the same node type of all graphs should have the same node type ID, i.e. the node types are *ordered* the same. If :attr:`copy_edata` is True, the resulting graph will share the edge feature tensors with the input graph. Hence, users should try to avoid in-place operations which will be visible to both graphs. This function discards the batch information. Please use :func:`dgl.DGLGraph.set_batch_num_nodes` and :func:`dgl.DGLGraph.set_batch_num_edges` on the transformed graph to maintain the information. Examples -------- The following code constructs a bipartite graph with 20 users and 10 games, but only user #1 and #3, as well as game #3 and #5, have connections: >>> g = dgl.heterograph({('user', 'plays', 'game'): ([1, 3], [3, 5])}, >>> {'user': 20, 'game': 10}) The following would compact the graph above to another bipartite graph with only two users and two games. >>> new_g = dgl.compact_graphs(g) >>> new_g.ndata[dgl.NID] {'user': tensor([1, 3]), 'game': tensor([3, 5])} The mapping tells us that only user #1 and #3 as well as game #3 and #5 are kept. Furthermore, the first user and second user in the compacted graph maps to user #1 and #3 in the original graph. Games are similar. One can verify that the edge connections are kept the same in the compacted graph. >>> new_g.edges(form='all', order='eid', etype='plays') (tensor([0, 1]), tensor([0, 1]), tensor([0, 1])) When compacting multiple graphs, nodes that do not have any connections in any of the given graphs are removed. So if you compact ``g`` and the following ``g2`` graphs together: >>> g2 = dgl.heterograph({('user', 'plays', 'game'): ([1, 6], [6, 8])}, >>> {'user': 20, 'game': 10}) >>> new_g, new_g2 = dgl.compact_graphs([g, g2]) >>> new_g.ndata[dgl.NID] {'user': tensor([1, 3, 6]), 'game': tensor([3, 5, 6, 8])} Then one can see that user #1 from both graphs, users #3 from the first graph, as well as user #6 from the second graph, are kept. Games are similar. Similarly, one can also verify the connections: >>> new_g.edges(form='all', order='eid', etype='plays') (tensor([0, 1]), tensor([0, 1]), tensor([0, 1])) >>> new_g2.edges(form='all', order='eid', etype='plays') (tensor([0, 2]), tensor([2, 3]), tensor([0, 1])) """ return_single = False if not isinstance(graphs, Iterable): graphs = [graphs] return_single = True if len(graphs) == 0: return [] if graphs[0].is_block: raise DGLError("Compacting a block graph is not allowed.") # Ensure the node types are ordered the same. # TODO(BarclayII): we ideally need to remove this constraint. ntypes = graphs[0].ntypes idtype = graphs[0].idtype device = graphs[0].device for g in graphs: assert ntypes == g.ntypes, ( "All graphs should have the same node types in the same order, got %s and %s" % ntypes, g.ntypes, ) assert ( idtype == g.idtype ), "Expect graph data type to be {}, but got {}".format( idtype, g.idtype ) assert device == g.device, ( "All graphs must be on the same devices." "Expect graph device to be {}, but got {}".format(device, g.device) ) # Process the dictionary or tensor of "always preserve" nodes if always_preserve is None: always_preserve = {} elif not isinstance(always_preserve, Mapping): if len(ntypes) > 1: raise ValueError( "Node type must be given if multiple node types exist." ) always_preserve = {ntypes[0]: always_preserve} always_preserve = utils.prepare_tensor_dict( graphs[0], always_preserve, "always_preserve" ) always_preserve_nd = [] for ntype in ntypes: nodes = always_preserve.get(ntype, None) if nodes is None: nodes = F.copy_to(F.tensor([], idtype), device) always_preserve_nd.append(F.to_dgl_nd(nodes)) # Compact and construct heterographs new_graph_indexes, induced_nodes = _CAPI_DGLCompactGraphs( [g._graph for g in graphs], always_preserve_nd ) induced_nodes = [F.from_dgl_nd(nodes) for nodes in induced_nodes] new_graphs = [ DGLGraph(new_graph_index, graph.ntypes, graph.etypes) for new_graph_index, graph in zip(new_graph_indexes, graphs) ] if copy_ndata: for g, new_g in zip(graphs, new_graphs): node_frames = utils.extract_node_subframes(g, induced_nodes) utils.set_new_frames(new_g, node_frames=node_frames) if copy_edata: for g, new_g in zip(graphs, new_graphs): edge_frames = utils.extract_edge_subframes(g, None) utils.set_new_frames(new_g, edge_frames=edge_frames) if return_single: new_graphs = new_graphs[0] return new_graphs def _coalesce_edge_frame(g, edge_maps, counts, aggregator): r"""Coalesce edge features of duplicate edges via given aggregator in g. Parameters ---------- g : DGLGraph The input graph. edge_maps : List[Tensor] The edge mapping corresponding to each edge type in g. counts : List[Tensor] The number of duplicated edges from the original graph for each edge type. aggregator : str Indicates how to coalesce edge features, could be ``arbitrary``, ``sum`` or ``mean``. Returns ------- List[Frame] The frames corresponding to each edge type. """ if aggregator == "arbitrary": eids = [] for i in range(len(g.canonical_etypes)): feat_idx = F.asnumpy(edge_maps[i]) _, indices = np.unique(feat_idx, return_index=True) eids.append(F.zerocopy_from_numpy(indices)) edge_frames = utils.extract_edge_subframes(g, eids) elif aggregator in ["sum", "mean"]: edge_frames = [] for i in range(len(g.canonical_etypes)): feat_idx = edge_maps[i] _, indices = np.unique(F.asnumpy(feat_idx), return_index=True) _num_rows = len(indices) _data = {} for key, col in g._edge_frames[i]._columns.items(): data = col.data new_data = F.scatter_add(data, feat_idx, _num_rows) if aggregator == "mean": norm = F.astype(counts[i], F.dtype(data)) norm = F.reshape( norm, (F.shape(norm)[0],) + (1,) * (F.ndim(data) - 1) ) new_data /= norm _data[key] = new_data newf = Frame(data=_data, num_rows=_num_rows) edge_frames.append(newf) else: raise DGLError( "Aggregator {} not regonized, cannot coalesce edge feature in the " "specified way".format(aggregator) ) return edge_frames def to_simple( g, return_counts="count", writeback_mapping=False, copy_ndata=True, copy_edata=False, aggregator="arbitrary", ): r"""Convert a graph to a simple graph without parallel edges and return. For a heterogeneous graph with multiple edge types, DGL treats edges with the same edge type and endpoints as parallel edges and removes them. Optionally, one can get the the number of parallel edges by specifying the :attr:`return_counts` argument. To get the a mapping from the edge IDs in the input graph to the edge IDs in the resulting graph, set :attr:`writeback_mapping` to true. Parameters ---------- g : DGLGraph The input graph. Must be on CPU. return_counts : str, optional If given, the count of each edge in the original graph will be stored as edge features under the name ``return_counts``. The old features with the same name will be replaced. (Default: "count") writeback_mapping: bool, optional If True, return an extra write-back mapping for each edge type. The write-back mapping is a tensor recording the mapping from the edge IDs in the input graph to the edge IDs in the result graph. If the graph is heterogeneous, DGL returns a dictionary of edge types and such tensors. If False, only the simple graph is returned. (Default: False) copy_ndata: bool, optional If True, the node features of the simple graph are copied from the original graph. If False, the simple graph will not have any node features. (Default: True) copy_edata: bool, optional If True, the edge features of the simple graph are copied from the original graph. If there exists duplicate edges between two nodes (u, v), the feature of the edge is the aggregation of edge feature of duplicate edges. If False, the simple graph will not have any edge features. (Default: False) aggregator: str, optional Indicate how to coalesce edge feature of duplicate edges. If ``arbitrary``, select one of the duplicate edges' feature. If ``sum``, compute the summation of duplicate edges' feature. If ``mean``, compute the average of duplicate edges' feature. (Default: ``arbitrary``) Returns ------- DGLGraph The graph. tensor or dict of tensor The writeback mapping. Only when ``writeback_mapping`` is True. Notes ----- If :attr:`copy_ndata` is True, the resulting graph will share the node feature tensors with the input graph. Hence, users should try to avoid in-place operations which will be visible to both graphs. This function discards the batch information. Please use :func:`dgl.DGLGraph.set_batch_num_nodes` and :func:`dgl.DGLGraph.set_batch_num_edges` on the transformed graph to maintain the information. Examples -------- **Homogeneous Graphs** Create a graph for demonstrating to_simple API. In the original graph, there are multiple edges between 1 and 2. >>> import dgl >>> import torch as th >>> g = dgl.graph((th.tensor([0, 1, 2, 1]), th.tensor([1, 2, 0, 2]))) >>> g.ndata['h'] = th.tensor([[0.], [1.], [2.]]) >>> g.edata['h'] = th.tensor([[3.], [4.], [5.], [6.]]) Convert the graph to a simple graph. The return counts is stored in the edge feature 'cnt' and the writeback mapping is returned in a tensor. >>> sg, wm = dgl.to_simple(g, return_counts='cnt', writeback_mapping=True) >>> sg.ndata['h'] tensor([[0.], [1.], [2.]]) >>> u, v, eid = sg.edges(form='all') >>> u tensor([0, 1, 2]) >>> v tensor([1, 2, 0]) >>> eid tensor([0, 1, 2]) >>> sg.edata['cnt'] tensor([1, 2, 1]) >>> wm tensor([0, 1, 2, 1]) >>> 'h' in g.edata False **Heterogeneous Graphs** >>> g = dgl.heterograph({ ... ('user', 'wins', 'user'): (th.tensor([0, 2, 0, 2, 2]), th.tensor([1, 1, 2, 1, 0])), ... ('user', 'plays', 'game'): (th.tensor([1, 2, 1]), th.tensor([2, 1, 1])) ... }) >>> g.nodes['game'].data['hv'] = th.ones(3, 1) >>> g.edges['plays'].data['he'] = th.zeros(3, 1) The return counts is stored in the default edge feature 'count' for each edge type. >>> sg, wm = dgl.to_simple(g, copy_ndata=False, writeback_mapping=True) >>> sg Graph(num_nodes={'game': 3, 'user': 3}, num_edges={('user', 'wins', 'user'): 4, ('game', 'plays', 'user'): 3}, metagraph=[('user', 'user'), ('game', 'user')]) >>> sg.edges(etype='wins') (tensor([0, 2, 0, 2]), tensor([1, 1, 2, 0])) >>> wm[('user', 'wins', 'user')] tensor([0, 1, 2, 1, 3]) >>> sg.edges(etype='plays') (tensor([2, 1, 1]), tensor([1, 2, 1])) >>> wm[('user', 'plays', 'game')] tensor([0, 1, 2]) >>> 'hv' in sg.nodes['game'].data False >>> 'he' in sg.edges['plays'].data False >>> sg.edata['count'] {('user', 'wins', 'user'): tensor([1, 2, 1, 1]) ('user', 'plays', 'game'): tensor([1, 1, 1])} """ assert g.device == F.cpu(), "the graph must be on CPU" if g.is_block: raise DGLError("Cannot convert a block graph to a simple graph.") simple_graph_index, counts, edge_maps = _CAPI_DGLToSimpleHetero(g._graph) simple_graph = DGLGraph(simple_graph_index, g.ntypes, g.etypes) counts = [F.from_dgl_nd(count) for count in counts] edge_maps = [F.from_dgl_nd(edge_map) for edge_map in edge_maps] if copy_ndata: node_frames = utils.extract_node_subframes(g, None) utils.set_new_frames(simple_graph, node_frames=node_frames) if copy_edata: new_edge_frames = _coalesce_edge_frame(g, edge_maps, counts, aggregator) utils.set_new_frames(simple_graph, edge_frames=new_edge_frames) if return_counts is not None: for count, canonical_etype in zip(counts, g.canonical_etypes): simple_graph.edges[canonical_etype].data[return_counts] = count if writeback_mapping: # single edge type if len(edge_maps) == 1: return simple_graph, edge_maps[0] # multiple edge type else: wb_map = {} for edge_map, canonical_etype in zip(edge_maps, g.canonical_etypes): wb_map[canonical_etype] = edge_map return simple_graph, wb_map return simple_graph DGLGraph.to_simple = utils.alias_func(to_simple) def _unitgraph_less_than_int32(g): """Check if a graph with only one edge type has more than 2 ** 31 - 1 nodes or edges. """ num_edges = g.num_edges() num_nodes = max(g.num_nodes(g.ntypes[0]), g.num_nodes(g.ntypes[-1])) return max(num_nodes, num_edges) <= (1 << 31) - 1 def adj_product_graph(A, B, weight_name, etype="_E"): r"""Create a weighted graph whose adjacency matrix is the product of the adjacency matrices of the given two graphs. Namely, given two weighted graphs :attr:`A` and :attr:`B`, whose rows represent source nodes and columns represent destination nodes, this function returns a new graph whose weighted adjacency matrix is :math:`\mathrm{adj}(A) \times \mathrm{adj}(B)`. The two graphs must be simple graphs, and must have only one edge type. Moreover, the number of nodes of the destination node type of :attr:`A` must be the same as the number of nodes of the source node type of :attr:`B`. The source node type of the returned graph will be the same as the source node type of graph :attr:`A`. The destination node type of the returned graph will be the same as the destination node type of graph :attr:`B`. If the two node types are the same, the returned graph will be homogeneous. Otherwise, it will be a bipartite graph. Unlike ``scipy``, if an edge in the result graph has zero weight, it will not be removed from the graph. Notes ----- This function works on both CPU and GPU. For GPU, the number of nodes and edges must be less than the maximum of ``int32`` (i.e. ``2 ** 31 - 1``) due to restriction of cuSPARSE. The edge weights returned by this function is differentiable w.r.t. the input edge weights. If the graph format is restricted, both graphs must have CSR available. Parameters ---------- A : DGLGraph The graph as left operand. B : DGLGraph The graph as right operand. weight_name : str The feature name of edge weight of both graphs. The corresponding edge feature must be scalar. etype : str, optional The edge type of the returned graph. Returns ------- DGLGraph The new graph. The edge weight of the returned graph will have the same feature name as :attr:`weight_name`. Examples -------- The following shows weighted adjacency matrix multiplication between two bipartite graphs. You can also perform this between two homogeneous graphs, or one homogeneous graph and one bipartite graph, as long as the numbers of nodes of the same type match. >>> A = dgl.heterograph({ ... ('A', 'AB', 'B'): ([2, 2, 0, 2, 0, 1], [2, 1, 0, 0, 2, 2])}, ... num_nodes_dict={'A': 3, 'B': 4}) >>> B = dgl.heterograph({ ... ('B', 'BA', 'A'): ([0, 3, 2, 1, 3, 3], [1, 2, 0, 2, 1, 0])}, ... num_nodes_dict={'A': 3, 'B': 4}) If your graph is a multigraph, you will need to call :func:`dgl.to_simple` to convert it into a simple graph first. >>> A = dgl.to_simple(A) >>> B = dgl.to_simple(B) Initialize learnable edge weights. >>> A.edata['w'] = torch.randn(6).requires_grad_() >>> B.edata['w'] = torch.randn(6).requires_grad_() Take the product. >>> C = dgl.adj_product_graph(A, B, 'w') >>> C.edges() (tensor([0, 0, 1, 2, 2, 2]), tensor([0, 1, 0, 0, 2, 1])) >>> C.edata['w'] tensor([0.6906, 0.2002, 0.0591, 0.3672, 0.1066, 0.1328], grad_fn=) Note that this function is differentiable: >>> C.edata['w'].sum().backward() >>> A.edata['w'].grad tensor([0.7153, 0.2775, 0.7141, 0.7141, 0.7153, 0.7153]) >>> B.edata['w'].grad tensor([0.4664, 0.0000, 1.5614, 0.3840, 0.0000, 0.0000]) If the source node type of the left operand is the same as the destination node type of the right operand, this function returns a homogeneous graph: >>> C.ntypes ['A'] Otherwise, it returns a bipartite graph instead: >>> A = dgl.heterograph({ ... ('A', 'AB', 'B'): ([2, 2, 0, 2, 0, 1], [2, 1, 0, 0, 2, 2])}, ... num_nodes_dict={'A': 3, 'B': 4}) >>> B = dgl.heterograph({ ... ('B', 'BC', 'C'): ([0, 3, 2, 1, 3, 3], [1, 2, 0, 2, 1, 0])}, ... num_nodes_dict={'C': 3, 'B': 4}) >>> A.edata['w'] = torch.randn(6).requires_grad_() >>> B.edata['w'] = torch.randn(6).requires_grad_() >>> C = dgl.adj_product_graph(A, B, 'w') >>> C.ntypes ['A', 'C'] """ srctype, _, _ = A.canonical_etypes[0] _, _, dsttype = B.canonical_etypes[0] num_vtypes = 1 if srctype == dsttype else 2 ntypes = [srctype] if num_vtypes == 1 else [srctype, dsttype] if A.device != F.cpu(): if not ( _unitgraph_less_than_int32(A) and _unitgraph_less_than_int32(B) ): raise ValueError( "For GPU graphs the number of nodes and edges must be less than 2 ** 31 - 1." ) C_gidx, C_weights = F.csrmm( A._graph, A.edata[weight_name], B._graph, B.edata[weight_name], num_vtypes, ) num_nodes_dict = { srctype: A.num_nodes(srctype), dsttype: B.num_nodes(dsttype), } C_metagraph, ntypes, etypes, _ = create_metagraph_index( ntypes, [(srctype, etype, dsttype)] ) num_nodes_per_type = [num_nodes_dict[ntype] for ntype in ntypes] C_gidx = create_heterograph_from_relations( C_metagraph, [C_gidx], utils.toindex(num_nodes_per_type) ) C = DGLGraph(C_gidx, ntypes, etypes) C.edata[weight_name] = C_weights return C def adj_sum_graph(graphs, weight_name): r"""Create a weighted graph whose adjacency matrix is the sum of the adjacency matrices of the given graphs, whose rows represent source nodes and columns represent destination nodes. All the graphs must be simple graphs, and must have only one edge type. They also must have the same metagraph, i.e. have the same source node type and the same destination node type. Moreover, the number of nodes for every graph must also be the same. The metagraph of the returned graph will be the same as the input graphs. Unlike ``scipy``, if an edge in the result graph has zero weight, it will not be removed from the graph. Notes ----- This function works on both CPU and GPU. For GPU, the number of nodes and edges must be less than the maximum of ``int32`` (i.e. ``2 ** 31 - 1``) due to restriction of cuSPARSE. The edge weights returned by this function is differentiable w.r.t. the input edge weights. If the graph format is restricted, both graphs must have CSR available. Parameters ---------- graphs : list[DGLGraph] The list of graphs. Must have at least one element. weight_name : str The feature name of edge weight of both graphs. The corresponding edge feature must be scalar. Returns ------- DGLGraph The new graph. The edge weight of the returned graph will have the same feature name as :attr:`weight_name`. Examples -------- The following shows weighted adjacency matrix summation between two bipartite graphs. You can also perform this between homogeneous graphs. >>> A = dgl.heterograph( ... {('A', 'AB', 'B'): ([2, 2, 0, 2, 0, 1], [2, 1, 0, 0, 2, 2])}, ... num_nodes_dict={'A': 3, 'B': 4}) >>> B = dgl.heterograph( ... {('A', 'AB', 'B'): ([1, 2, 0, 2, 1, 0], [0, 3, 2, 1, 3, 3])}, ... num_nodes_dict={'A': 3, 'B': 4}) >>> A.edata['w'] = torch.randn(6).requires_grad_() >>> B.edata['w'] = torch.randn(6).requires_grad_() If your graph is a multigraph, call :func:`dgl.to_simple` to convert it into a simple graph first. >>> A = dgl.to_simple(A) >>> B = dgl.to_simple(B) Initialize learnable edge weights. >>> A.edata['w'] = torch.randn(6).requires_grad_() >>> B.edata['w'] = torch.randn(6).requires_grad_() Take the sum. >>> C = dgl.adj_sum_graph([A, B], 'w') >>> C.edges() (tensor([0, 0, 0, 1, 1, 1, 2, 2, 2, 2]), tensor([0, 2, 3, 2, 0, 3, 0, 1, 2, 3])) Note that this function is differentiable: >>> C.edata['w'].sum().backward() >>> A.edata['w'].grad tensor([1., 1., 1., 1., 1., 1.]) >>> B.edata['w'].grad tensor([1., 1., 1., 1., 1., 1.]) """ if len(graphs) == 0: raise ValueError("The list of graphs must not be empty.") if graphs[0].device != F.cpu(): if not all(_unitgraph_less_than_int32(A) for A in graphs): raise ValueError( "For GPU graphs the number of nodes and edges must be less than 2 ** 31 - 1." ) metagraph = graphs[0]._graph.metagraph num_nodes = utils.toindex( [ graphs[0]._graph.num_nodes(i) for i in range(graphs[0]._graph.number_of_ntypes()) ] ) weights = [A.edata[weight_name] for A in graphs] gidxs = [A._graph for A in graphs] C_gidx, C_weights = F.csrsum(gidxs, weights) C_gidx = create_heterograph_from_relations(metagraph, [C_gidx], num_nodes) C = DGLGraph(C_gidx, graphs[0].ntypes, graphs[0].etypes) C.edata[weight_name] = C_weights return C def sort_csr_by_tag(g, tag, tag_offset_name="_TAG_OFFSET", tag_type="node"): r"""Return a new graph whose CSR matrix is sorted by the given tag. Sort the internal CSR matrix of the graph so that the adjacency list of each node , which contains the out-edges, is sorted by the tag of the out-neighbors. After sorting, edges sharing the same tag will be arranged in a consecutive range in a node's adjacency list. Following is an example: Consider a graph as follows:: 0 -> 0, 1, 2, 3, 4 1 -> 0, 1, 2 Given node tags ``[1, 1, 0, 2, 0]``, each node's adjacency list will be sorted as follows:: 0 -> 2, 4, 0, 1, 3 1 -> 2, 0, 1 Given edge tags ``[1, 1, 0, 2, 0, 1, 1, 0]`` has the same effect as above node tags. The function will also returns the starting offsets of the tag segments in a tensor of shape :math:`(N, max\_tag+2)`. For node ``i``, its out-edges connecting to node tag ``j`` is stored between ``tag_offsets[i][j]`` ~ ``tag_offsets[i][j+1]``. Since the offsets can be viewed node data, we store it in the ``ndata`` of the returned graph. Users can specify the ndata name by the :attr:`tag_pos_name` argument. Note that the function will not change the edge ID neither how the edge features are stored. The input graph must allow CSR format. The graph must be on CPU. If the input graph is heterogenous, it must have only one edge type and two node types (i.e., source and destination node types). In this case, the provided node tags are for the destination nodes, and the tag offsets are stored in the source node data. The sorted graph and the calculated tag offsets are needed by certain operators that consider node tags. See :func:`~dgl.sampling.sample_neighbors_biased` for an example. Parameters ------------ g : DGLGraph The input graph. tag : Tensor Integer tensor of shape :math:`(N,)`, :math:`N` being the number of (destination) nodes or edges. tag_offset_name : str The name of the node feature to store tag offsets. tag_type : str Tag type which could be ``node`` or ``edge``. Returns ------- g_sorted : DGLGraph A new graph whose CSR is sorted. The node/edge features of the input graph is shallow-copied over. - ``g_sorted.ndata[tag_offset_name]`` : Tensor of shape :math:`(N, max\_tag + 2)`. - If ``g`` is heterogeneous, get from ``g_sorted.srcdata``. Examples ----------- ``tag_type`` is ``node``. >>> import dgl >>> import torch >>> g = dgl.graph(([0,0,0,0,0,1,1,1],[0,1,2,3,4,0,1,2])) >>> g.adj_external(scipy_fmt='csr').nonzero() (array([0, 0, 0, 0, 0, 1, 1, 1], dtype=int32), array([0, 1, 2, 3, 4, 0, 1, 2], dtype=int32)) >>> tag = torch.IntTensor([1,1,0,2,0]) >>> g_sorted = dgl.sort_csr_by_tag(g, tag) >>> g_sorted.adj_external(scipy_fmt='csr').nonzero() (array([0, 0, 0, 0, 0, 1, 1, 1], dtype=int32), array([2, 4, 0, 1, 3, 2, 0, 1], dtype=int32)) >>> g_sorted.ndata['_TAG_OFFSET'] tensor([[0, 2, 4, 5], [0, 1, 3, 3], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) ``tag_type`` is ``edge``. >>> g = dgl.graph(([0,0,0,0,0,1,1,1],[0,1,2,3,4,0,1,2])) >>> g.edges() (tensor([0, 0, 0, 0, 0, 1, 1, 1]), tensor([0, 1, 2, 3, 4, 0, 1, 2])) >>> tag = torch.tensor([1, 1, 0, 2, 0, 1, 1, 0]) >>> g_sorted = dgl.sort_csr_by_tag(g, tag, tag_type='edge') >>> g_sorted.adj_external(scipy_fmt='csr').nonzero() (array([0, 0, 0, 0, 0, 1, 1, 1], dtype=int32), array([2, 4, 0, 1, 3, 2, 0, 1], dtype=int32)) >>> g_sorted.srcdata['_TAG_OFFSET'] tensor([[0, 2, 4, 5], [0, 1, 3, 3], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) See Also -------- dgl.sampling.sample_neighbors_biased """ if len(g.etypes) > 1: raise DGLError("Only support homograph and bipartite graph") assert tag_type in [ "node", "edge", ], "tag_type should be either 'node' or 'edge'." if tag_type == "node": _, dst = g.edges() tag = F.gather_row(tag, F.tensor(dst)) assert len(tag) == g.num_edges() num_tags = int(F.asnumpy(F.max(tag, 0))) + 1 tag_arr = F.zerocopy_to_dgl_ndarray(tag) new_g = g.clone() new_g._graph, tag_pos_arr = _CAPI_DGLHeteroSortOutEdges( g._graph, tag_arr, num_tags ) new_g.srcdata[tag_offset_name] = F.from_dgl_nd(tag_pos_arr) return new_g def sort_csc_by_tag(g, tag, tag_offset_name="_TAG_OFFSET", tag_type="node"): r"""Return a new graph whose CSC matrix is sorted by the given tag. Sort the internal CSC matrix of the graph so that the adjacency list of each node , which contains the in-edges, is sorted by the tag of the in-neighbors. After sorting, edges sharing the same tag will be arranged in a consecutive range in a node's adjacency list. Following is an example: Consider a graph as follows:: 0 <- 0, 1, 2, 3, 4 1 <- 0, 1, 2 Given node tags ``[1, 1, 0, 2, 0]``, each node's adjacency list will be sorted as follows:: 0 <- 2, 4, 0, 1, 3 1 <- 2, 0, 1 Given edge tags ``[1, 1, 0, 2, 0, 1, 1, 0]`` has the same effect as above node tags. The function will also return the starting offsets of the tag segments in a tensor of shape :math:`(N, max\_tag+2)`. For a node ``i``, its in-edges connecting to node tag ``j`` is stored between ``tag_offsets[i][j]`` ~ ``tag_offsets[i][j+1]``. Since the offsets can be viewed node data, we store it in the ``ndata`` of the returned graph. Users can specify the ndata name by the ``tag_pos_name`` argument. Note that the function will not change the edge ID neither how the edge features are stored. The input graph must allow CSC format. The graph must be on CPU. If the input graph is heterogenous, it must have only one edge type and two node types (i.e., source and destination node types). In this case, the provided node tags are for the source nodes, and the tag offsets are stored in the destination node data. The sorted graph and the calculated tag offsets are needed by certain operators that consider node tags. See :func:`~dgl.sampling.sample_neighbors_biased` for an example. Parameters ------------ g : DGLGraph The input graph. tag : Tensor Integer tensor of shape :math:`(N,)`, :math:`N` being the number of (source) nodes or edges. tag_offset_name : str The name of the node feature to store tag offsets. tag_type : str Tag type which could be ``node`` or ``edge``. Returns ------- g_sorted : DGLGraph A new graph whose CSC matrix is sorted. The node/edge features of the input graph is shallow-copied over. - ``g_sorted.ndata[tag_offset_name]`` : Tensor of shape :math:`(N, max\_tag + 2)`. - If ``g`` is heterogeneous, get from ``g_sorted.dstdata``. Examples ----------- ``tag_type`` is ``node``. >>> import dgl >>> import torch >>> g = dgl.graph(([0,1,2,3,4,0,1,2],[0,0,0,0,0,1,1,1])) >>> g.adj_external(scipy_fmt='csr', transpose=True).nonzero() (array([0, 0, 0, 0, 0, 1, 1, 1], dtype=int32), array([0, 1, 2, 3, 4, 0, 1, 2], dtype=int32))) >>> tag = torch.IntTensor([1,1,0,2,0]) >>> g_sorted = dgl.sort_csc_by_tag(g, tag) >>> g_sorted.adj_external(scipy_fmt='csr', transpose=True).nonzero() (array([0, 0, 0, 0, 0, 1, 1, 1], dtype=int32), array([2, 4, 0, 1, 3, 2, 0, 1], dtype=int32)) >>> g_sorted.ndata['_TAG_OFFSET'] tensor([[0, 2, 4, 5], [0, 1, 3, 3], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) ``tag_type`` is ``edge``. >>> g = dgl.graph(([0,1,2,3,4,0,1,2],[0,0,0,0,0,1,1,1])) >>> tag = torch.tensor([1, 1, 0, 2, 0, 1, 1, 0]) >>> g_sorted = dgl.sort_csc_by_tag(g, tag, tag_type='edge') >>> g_sorted.adj_external(scipy_fmt='csr', transpose=True).nonzero() (array([0, 0, 0, 0, 0, 1, 1, 1], dtype=int32), array([2, 4, 0, 1, 3, 2, 0, 1], dtype=int32)) >>> g_sorted.dstdata['_TAG_OFFSET'] tensor([[0, 2, 4, 5], [0, 1, 3, 3], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) See Also -------- dgl.sampling.sample_neighbors_biased """ if len(g.etypes) > 1: raise DGLError("Only support homograph and bipartite graph") assert tag_type in [ "node", "edge", ], "tag_type should be either 'node' or 'edge'." if tag_type == "node": src, _ = g.edges() tag = F.gather_row(tag, F.tensor(src)) assert len(tag) == g.num_edges() num_tags = int(F.asnumpy(F.max(tag, 0))) + 1 tag_arr = F.zerocopy_to_dgl_ndarray(tag) new_g = g.clone() new_g._graph, tag_pos_arr = _CAPI_DGLHeteroSortInEdges( g._graph, tag_arr, num_tags ) new_g.dstdata[tag_offset_name] = F.from_dgl_nd(tag_pos_arr) return new_g def reorder_graph( g, node_permute_algo=None, edge_permute_algo="src", store_ids=True, permute_config=None, ): r"""Return a new graph with nodes and edges re-ordered/re-labeled according to the specified permute algorithm. Support homogeneous graph only for the moment. The re-ordering has two 2 steps: first re-order nodes and then re-order edges. For node permutation, users can re-order by the :attr:`node_permute_algo` argument. For edge permutation, user can re-arrange edges according to their source nodes or destination nodes by the :attr:`edge_permute_algo` argument. Some of the permutation algorithms are only implemented in CPU, so if the input graph is on GPU, it will be copied to CPU first. The storage order of the node and edge features in the graph are permuted accordingly. Parameters ---------- g : DGLGraph The homogeneous graph. node_permute_algo: str, optional The permutation algorithm to re-order nodes. If given, the options are ``rcmk`` or ``metis`` or ``custom``. * ``None``: Keep the current node order. * ``rcmk``: Use the `Reverse Cuthill–McKee `__ from ``scipy`` to generate nodes permutation. * ``metis``: Use the :func:`~dgl.metis_partition_assignment` function to partition the input graph, which gives a cluster assignment of each node. DGL then sorts the assignment array so the new node order will put nodes of the same cluster together. Please note that the generated nodes permutation of ``metis`` is non-deterministic due to algorithm's nature. * ``custom``: Reorder the graph according to the user-provided node permutation array (provided in :attr:`permute_config`). edge_permute_algo: str, optional The permutation algorithm to reorder edges. Options are ``src`` or ``dst`` or ``custom``. ``src`` is the default value. * ``src``: Edges are arranged according to their source nodes. * ``dst``: Edges are arranged according to their destination nodes. * ``custom``: Edges are arranged according to the user-provided edge permutation array (provided in :attr:`permute_config`). store_ids: bool, optional If True, DGL will store the original node and edge IDs in the ndata and edata of the resulting graph under name ``dgl.NID`` and ``dgl.EID``, respectively. permute_config: dict, optional Additional key-value config data for the specified permutation algorithm. * For ``rcmk``, this argument is not required. * For ``metis``, users should specify the number of partitions ``k`` (e.g., ``permute_config={'k':10}`` to partition the graph to 10 clusters). * For ``custom`` node reordering, users should provide a node permutation array ``nodes_perm``. The array must be an integer list or a tensor with the same device of the input graph. * For ``custom`` edge reordering, users should provide an edge permutation array ``edges_perm``. The array must be an integer list or a tensor with the same device of the input graph. Returns ------- DGLGraph The re-ordered graph. Examples -------- >>> import dgl >>> import torch >>> g = dgl.graph((torch.tensor([0, 1, 2, 3, 4]), torch.tensor([2, 2, 3, 2, 3]))) >>> g.ndata['h'] = torch.arange(g.num_nodes() * 2).view(g.num_nodes(), 2) >>> g.edata['w'] = torch.arange(g.num_edges() * 1).view(g.num_edges(), 1) >>> g.ndata {'h': tensor([[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]])} >>> g.edata {'w': tensor([[0], [1], [2], [3], [4]])} Reorder according to ``'rcmk'`` permute algorithm. >>> rg = dgl.reorder_graph(g, node_permute_algo='rcmk') >>> rg.ndata {'h': tensor([[8, 9], [6, 7], [2, 3], [4, 5], [0, 1]]), '_ID': tensor([4, 3, 1, 2, 0])} >>> rg.edata {'w': tensor([[4], [3], [1], [2], [0]]), '_ID': tensor([4, 3, 1, 2, 0])} Reorder according to ``'metis'`` permute algorithm. >>> rg = dgl.reorder_graph(g, node_permute_algo='metis', permute_config={'k':2}) >>> rg.ndata {'h': tensor([[4, 5], [2, 3], [0, 1], [8, 9], [6, 7]]), '_ID': tensor([2, 1, 0, 4, 3])} >>> rg.edata {'w': tensor([[2], [1], [0], [4], [3]]), '_ID': tensor([2, 1, 0, 4, 3])} Reorder according to ``'custom'`` permute algorithm with user-provided nodes_perm. >>> rg = dgl.reorder_graph(g, node_permute_algo='custom', ... permute_config={'nodes_perm': [3, 2, 0, 4, 1]}) >>> rg.ndata {'h': tensor([[6, 7], [4, 5], [0, 1], [8, 9], [2, 3]]), '_ID': tensor([3, 2, 0, 4, 1])} >>> rg.edata {'w': tensor([[3], [2], [0], [4], [1]]), '_ID': tensor([3, 2, 0, 4, 1])} Reorder nodes according to ``'rcmk'`` and reorder edges according to ``dst`` edge permute algorithm. >>> rg = dgl.reorder_graph(g, node_permute_algo='rcmk', edge_permute_algo='dst') >>> print(rg.ndata) {'h': tensor([[8, 9], [6, 7], [2, 3], [4, 5], [0, 1]]), '_ID': tensor([4, 3, 1, 2, 0])} >>> print(rg.edata) {'w': tensor([[4], [2], [3], [1], [0]]), '_ID': tensor([4, 2, 3, 1, 0])} Nodes are not reordered but edges are reordered according to ``'custom'`` permute algorithm with user-provided edges_perm. >>> rg = dgl.reorder_graph(g, edge_permute_algo='custom', ... permute_config={'edges_perm': [1, 2, 3, 4, 0]}) >>> print(rg.ndata) {'h': tensor([[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]]), '_ID': tensor([0, 1, 2, 3, 4])} >>> print(rg.edata) {'w': tensor([[1], [2], [3], [4], [0]]), '_ID': tensor([1, 2, 3, 4, 0])} """ # sanity checks if not g.is_homogeneous: raise DGLError("Only homogeneous graphs are supported.") expected_node_algo = ["rcmk", "metis", "custom"] if ( node_permute_algo is not None and node_permute_algo not in expected_node_algo ): raise DGLError( "Unexpected node_permute_algo is specified: {}. Expected algos: {}".format( node_permute_algo, expected_node_algo ) ) expected_edge_algo = ["src", "dst", "custom"] if edge_permute_algo not in expected_edge_algo: raise DGLError( "Unexpected edge_permute_algo is specified: {}. Expected algos: {}".format( edge_permute_algo, expected_edge_algo ) ) g.edata["__orig__"] = F.arange(0, g.num_edges(), g.idtype, g.device) # reorder nodes if node_permute_algo == "rcmk": nodes_perm = rcmk_perm(g) rg = subgraph.node_subgraph(g, nodes_perm, store_ids=False) elif node_permute_algo == "metis": if permute_config is None or "k" not in permute_config: raise DGLError( "Partition parts 'k' is required for metis. Please specify in permute_config." ) nodes_perm = metis_perm(g, permute_config["k"]) rg = subgraph.node_subgraph(g, nodes_perm, store_ids=False) elif node_permute_algo == "custom": if permute_config is None or "nodes_perm" not in permute_config: raise DGLError( "node_permute_algo is specified as custom, but no 'nodes_perm' is specified in \ permute_config." ) nodes_perm = permute_config["nodes_perm"] if len(nodes_perm) != g.num_nodes(): raise DGLError( "Length of 'nodes_perm' ({}) does not \ match graph num_nodes ({}).".format( len(nodes_perm), g.num_nodes() ) ) rg = subgraph.node_subgraph(g, nodes_perm, store_ids=False) else: nodes_perm = F.arange(0, g.num_nodes(), g.idtype, g.device) rg = g.clone() if store_ids: rg.ndata[NID] = F.copy_to(F.tensor(nodes_perm, g.idtype), g.device) g.edata.pop("__orig__") # reorder edges if edge_permute_algo == "src": edges_perm = np.argsort(F.asnumpy(rg.edges()[0])) rg = subgraph.edge_subgraph( rg, edges_perm, relabel_nodes=False, store_ids=False ) elif edge_permute_algo == "dst": edges_perm = np.argsort(F.asnumpy(rg.edges()[1])) rg = subgraph.edge_subgraph( rg, edges_perm, relabel_nodes=False, store_ids=False ) elif edge_permute_algo == "custom": if permute_config is None or "edges_perm" not in permute_config: raise DGLError( "edge_permute_algo is specified as custom, but no 'edges_perm' is specified in \ permute_config." ) edges_perm = permute_config["edges_perm"] # First revert the edge reorder caused by node reorder and then # apply user-provided edge permutation rev_id = F.argsort(rg.edata["__orig__"], 0, False) edges_perm = F.astype( F.gather_row(rev_id, F.tensor(edges_perm)), rg.idtype ) rg = subgraph.edge_subgraph( rg, edges_perm, relabel_nodes=False, store_ids=False ) if store_ids: rg.edata[EID] = rg.edata.pop("__orig__") return rg DGLGraph.reorder_graph = utils.alias_func(reorder_graph) def metis_perm(g, k): r"""Return nodes permutation according to ``'metis'`` algorithm. For internal use. Parameters ---------- g : DGLGraph The homogeneous graph. k: int The partition parts number. Returns ------- iterable[int] The nodes permutation. """ pids = metis_partition_assignment( g if g.device == F.cpu() else g.to(F.cpu()), k ) pids = F.asnumpy(pids) return np.argsort(pids).copy() def rcmk_perm(g): r"""Return nodes permutation according to ``'rcmk'`` algorithm. For internal use. Parameters ---------- g : DGLGraph The homogeneous graph. Returns ------- iterable[int] The nodes permutation. """ fmat = "csr" allowed_fmats = sum(g.formats().values(), []) if fmat not in allowed_fmats: g = g.formats(allowed_fmats + [fmat]) csr_adj = g.adj_external(scipy_fmt=fmat) perm = sparse.csgraph.reverse_cuthill_mckee(csr_adj) return perm.copy() def norm_by_dst(g, etype=None): r"""Calculate normalization coefficient per edge based on destination node degree. Parameters ---------- g : DGLGraph The input graph. etype : str or (str, str, str), optional The type of the edges to calculate. The allowed edge type formats are: * ``(str, str, str)`` for source node type, edge type and destination node type. * or one ``str`` edge type name if the name can uniquely identify a triplet format in the graph. It can be omitted if the graph has a single edge type. Returns ------- 1D Tensor The normalization coefficient of the edges. Examples -------- >>> import dgl >>> g = dgl.graph(([0, 1, 1], [1, 1, 2])) >>> print(dgl.norm_by_dst(g)) tensor([0.5000, 0.5000, 1.0000]) """ _, v, _ = g.edges(form="all", etype=etype) _, inv_index, count = F.unique(v, return_inverse=True, return_counts=True) deg = F.astype(count[inv_index], F.float32) norm = 1.0 / deg norm = F.replace_inf_with_zero(norm) return norm def radius_graph( x, r, p=2, self_loop=False, compute_mode="donot_use_mm_for_euclid_dist", get_distances=False, ): r"""Construct a graph from a set of points with neighbors within given distance. The function transforms the coordinates/features of a point set into a bidirected homogeneous graph. The coordinates of the point set is specified as a matrix whose rows correspond to points and columns correspond to coordinate/feature dimensions. The nodes of the returned graph correspond to the points, where the neighbors of each point are within given distance. The function requires the PyTorch backend. Parameters ---------- x : Tensor The point coordinates. It can be either on CPU or GPU. Device of the point coordinates specifies device of the radius graph and ``x[i]`` corresponds to the i-th node in the radius graph. r : float Radius of the neighbors. p : float, optional Power parameter for the Minkowski metric. When :attr:`p = 1` it is the equivalent of Manhattan distance (L1 norm) and Euclidean distance (L2 norm) for :attr:`p = 2`. (default: 2) self_loop : bool, optional Whether the radius graph will contain self-loops. (default: False) compute_mode : str, optional ``use_mm_for_euclid_dist_if_necessary`` - will use matrix multiplication approach to calculate euclidean distance (p = 2) if P > 25 or R > 25 ``use_mm_for_euclid_dist`` - will always use matrix multiplication approach to calculate euclidean distance (p = 2) ``donot_use_mm_for_euclid_dist`` - will never use matrix multiplication approach to calculate euclidean distance (p = 2). (default: donot_use_mm_for_euclid_dist) get_distances : bool, optional Whether to return the distances for the corresponding edges in the radius graph. (default: False) Returns ------- DGLGraph The constructed graph. The node IDs are in the same order as :attr:`x`. torch.Tensor, optional The distances for the edges in the constructed graph. The distances are in the same order as edge IDs. Examples -------- The following examples use PyTorch backend. >>> import dgl >>> import torch >>> x = torch.tensor([[0.0, 0.0, 1.0], ... [1.0, 0.5, 0.5], ... [0.5, 0.2, 0.2], ... [0.3, 0.2, 0.4]]) >>> r_g = dgl.radius_graph(x, 0.75) # Each node has neighbors within 0.75 distance >>> r_g.edges() (tensor([0, 1, 2, 2, 3, 3]), tensor([3, 2, 1, 3, 0, 2])) When :attr:`get_distances` is True, function returns the radius graph and distances for the corresponding edges. >>> x = torch.tensor([[0.0, 0.0, 1.0], ... [1.0, 0.5, 0.5], ... [0.5, 0.2, 0.2], ... [0.3, 0.2, 0.4]]) >>> r_g, dist = dgl.radius_graph(x, 0.75, get_distances=True) >>> r_g.edges() (tensor([0, 1, 2, 2, 3, 3]), tensor([3, 2, 1, 3, 0, 2])) >>> dist tensor([[0.7000], [0.6557], [0.6557], [0.2828], [0.7000], [0.2828]]) """ # check invalid r if r <= 0: raise DGLError("Invalid r value. expect r > 0, got r = {}".format(r)) # check empty point set if F.shape(x)[0] == 0: raise DGLError("Find empty point set") distances = th.cdist(x, x, p=p, compute_mode=compute_mode) if not self_loop: distances.fill_diagonal_(r + 1) edges = th.nonzero(distances <= r, as_tuple=True) g = convert.graph(edges, num_nodes=x.shape[0], device=x.device) if get_distances: distances = distances[edges].unsqueeze(-1) return g, distances return g def random_walk_pe(g, k, eweight_name=None): r"""Random Walk Positional Encoding, as introduced in `Graph Neural Networks with Learnable Structural and Positional Representations `__ This function computes the random walk positional encodings as landing probabilities from 1-step to k-step, starting from each node to itself. Parameters ---------- g : DGLGraph The input graph. Must be homogeneous. k : int The number of random walk steps. The paper found the best value to be 16 and 20 for two experiments. eweight_name : str, optional The name to retrieve the edge weights. Default: None, not using the edge weights. Returns ------- Tensor The random walk positional encodings of shape :math:`(N, k)`, where :math:`N` is the number of nodes in the input graph. Example ------- >>> import dgl >>> g = dgl.graph(([0,1,1], [1,1,0])) >>> dgl.random_walk_pe(g, 2) tensor([[0.0000, 0.5000], [0.5000, 0.7500]]) """ N = g.num_nodes() # number of nodes M = g.num_edges() # number of edges A = g.adj_external(scipy_fmt="csr") # adjacency matrix if eweight_name is not None: # add edge weights if required W = sparse.csr_matrix( (g.edata[eweight_name].squeeze(), g.find_edges(list(range(M)))), shape=(N, N), ) A = A.multiply(W) # 1-step transition probability if version.parse(scipy.__version__) < version.parse("1.11.0"): RW = np.array(A / (A.sum(1) + 1e-30)) else: # Sparse matrix divided by a dense array returns a sparse matrix in # scipy since 1.11.0. RW = (A / (A.sum(1) + 1e-30)).toarray() # Iterate for k steps PE = [F.astype(F.tensor(np.array(RW.diagonal())), F.float32)] RW_power = RW for _ in range(k - 1): RW_power = RW_power @ RW PE.append(F.astype(F.tensor(np.array(RW_power.diagonal())), F.float32)) PE = F.stack(PE, dim=-1) return PE def lap_pe(g, k, padding=False, return_eigval=False): r"""Laplacian Positional Encoding, as introduced in `Benchmarking Graph Neural Networks `__ This function computes the laplacian positional encodings as the k smallest non-trivial eigenvectors. Parameters ---------- g : DGLGraph The input graph. Must be homogeneous and bidirected. k : int Number of smallest non-trivial eigenvectors to use for positional encoding. padding : bool, optional If False, raise an exception when k>=n. Otherwise, add zero paddings in the end of eigenvectors and 'nan' paddings in the end of eigenvalues when k>=n. Default: False. n is the number of nodes in the given graph. return_eigval : bool, optional If True, return laplacian eigenvalues together with eigenvectors. Otherwise, return laplacian eigenvectors only. Default: False. Returns ------- Tensor or (Tensor, Tensor) Return the laplacian positional encodings of shape :math:`(N, k)`, where :math:`N` is the number of nodes in the input graph, when :attr:`return_eigval` is False. The eigenvalues of shape :math:`N` is additionally returned as the second element when :attr:`return_eigval` is True. Example ------- >>> import dgl >>> g = dgl.graph(([0,1,2,3,1,2,3,0], [1,2,3,0,0,1,2,3])) >>> dgl.lap_pe(g, 2) tensor([[ 7.0711e-01, -6.4921e-17], [ 3.0483e-16, -7.0711e-01], [-7.0711e-01, -2.4910e-16], [ 9.9288e-17, 7.0711e-01]]) >>> dgl.lap_pe(g, 5, padding=True) tensor([[ 7.0711e-01, -6.4921e-17, 5.0000e-01, 0.0000e+00, 0.0000e+00], [ 3.0483e-16, -7.0711e-01, -5.0000e-01, 0.0000e+00, 0.0000e+00], [-7.0711e-01, -2.4910e-16, 5.0000e-01, 0.0000e+00, 0.0000e+00], [ 9.9288e-17, 7.0711e-01, -5.0000e-01, 0.0000e+00, 0.0000e+00]]) >>> dgl.lap_pe(g, 5, padding=True, return_eigval=True) (tensor([[-7.0711e-01, 6.4921e-17, -5.0000e-01, 0.0000e+00, 0.0000e+00], [-3.0483e-16, 7.0711e-01, 5.0000e-01, 0.0000e+00, 0.0000e+00], [ 7.0711e-01, 2.4910e-16, -5.0000e-01, 0.0000e+00, 0.0000e+00], [-9.9288e-17, -7.0711e-01, 5.0000e-01, 0.0000e+00, 0.0000e+00]]), tensor([1., 1., 2., nan, nan])) """ # check for the "k < n" constraint n = g.num_nodes() if not padding and n <= k: assert ( "the number of eigenvectors k must be smaller than the number of " + f"nodes n, {k} and {n} detected." ) # get laplacian matrix as I - D^-0.5 * A * D^-0.5 A = g.adj_external(scipy_fmt="csr") # adjacency matrix N = sparse.diags( F.asnumpy(g.in_degrees()).clip(1) ** -0.5, dtype=float ) # D^-1/2 L = sparse.eye(g.num_nodes()) - N * A * N # select eigenvectors with smaller eigenvalues O(n + klogk) if k + 1 < n - 1: # Use scipy if k + 1 < n - 1 for memory efficiency. EigVal, EigVec = scipy.sparse.linalg.eigs( L, k=k + 1, which="SR", ncv=4 * k, tol=1e-2 ) max_freqs = k topk_indices = EigVal.argsort()[1:] else: # Fallback to numpy since scipy.sparse do not support this case. EigVal, EigVec = np.linalg.eig(L.toarray()) max_freqs = min(n - 1, k) kpartition_indices = np.argpartition(EigVal, max_freqs)[: max_freqs + 1] topk_eigvals = EigVal[kpartition_indices] topk_indices = kpartition_indices[topk_eigvals.argsort()][1:] # Since scipy may return complex value, to avoid crashing in NN code, # convert them to real number. topk_EigVal = EigVal[topk_indices].real topk_EigVec = EigVec[:, topk_indices].real eigvals = F.tensor(topk_EigVal, dtype=F.float32) # get random flip signs rand_sign = 2 * (np.random.rand(max_freqs) > 0.5) - 1.0 PE = F.astype(F.tensor(rand_sign * topk_EigVec), F.float32) # add paddings if n <= k: temp_EigVec = F.zeros( [n, k - n + 1], dtype=F.float32, ctx=F.context(PE) ) PE = F.cat([PE, temp_EigVec], dim=1) temp_EigVal = F.tensor(np.full(k - n + 1, np.nan), F.float32) eigvals = F.cat([eigvals, temp_EigVal], dim=0) if return_eigval: return PE, eigvals return PE def laplacian_pe(g, k, padding=False, return_eigval=False): r"""Alias of `dgl.lap_pe`.""" dgl_warning("dgl.laplacian_pe will be deprecated. Use dgl.lap_pe please.") return lap_pe(g, k, padding, return_eigval) def to_bfloat16(g): r"""Cast this graph to use bfloat16 for any floating-point edge and node feature data. A shallow copy is returned so that the original graph is not modified. Feature tensors that are not floating-point will not be modified. Returns ------- DGLGraph Clone of graph with the feature data converted to float16. """ ret = copy.copy(g) ret._edge_frames = [frame.bfloat16() for frame in ret._edge_frames] ret._node_frames = [frame.bfloat16() for frame in ret._node_frames] return ret def to_half(g): r"""Cast this graph to use float16 (half-precision) for any floating-point edge and node feature data. A shallow copy is returned so that the original graph is not modified. Feature tensors that are not floating-point will not be modified. Returns ------- DGLGraph Clone of graph with the feature data converted to float16. """ ret = copy.copy(g) ret._edge_frames = [frame.half() for frame in ret._edge_frames] ret._node_frames = [frame.half() for frame in ret._node_frames] return ret def to_float(g): r"""Cast this graph to use float32 (single-precision) for any floating-point edge and node feature data. A shallow copy is returned so that the original graph is not modified. Feature tensors that are not floating-point will not be modified. Returns ------- DGLGraph Clone of graph with the feature data converted to float32. """ ret = copy.copy(g) ret._edge_frames = [frame.float() for frame in ret._edge_frames] ret._node_frames = [frame.float() for frame in ret._node_frames] return ret def to_double(g): r"""Cast this graph to use float64 (double-precision) for any floating-point edge and node feature data. A shallow copy is returned so that the original graph is not modified. Feature tensors that are not floating-point will not be modified. Returns ------- DGLGraph Clone of graph with the feature data converted to float64. """ ret = copy.copy(g) ret._edge_frames = [frame.double() for frame in ret._edge_frames] ret._node_frames = [frame.double() for frame in ret._node_frames] return ret def double_radius_node_labeling(g, src, dst): r"""Double Radius Node Labeling, as introduced in `Link Prediction Based on Graph Neural Networks `__. This function computes the double radius node labeling for each node to mark nodes' different roles in an enclosing subgraph, given a target link. The node labels of source :math:`s` and destination :math:`t` are set to 1 and those of unreachable nodes from source or destination are set to 0. The labels of other nodes :math:`l` are defined according to the following hash function: :math:`l = 1 + min(d_s, d_t) + (d//2)[(d//2) + (d%2) - 1]` where :math:`d_s` and :math:`d_t` denote the shortest distance to the source and the target, respectively. :math:`d = d_s + d_t`. Parameters ---------- g : DGLGraph The input graph. src : int The source node ID of the target link. dst : int The destination node ID of the target link. Returns ------- Tensor Labels of all nodes. The tensor is of shape :math:`(N,)`, where :math:`N` is the number of nodes in the input graph. Example ------- >>> import dgl >>> g = dgl.graph(([0,0,0,0,1,1,2,4], [1,2,3,6,3,4,4,5])) >>> dgl.double_radius_node_labeling(g, 0, 1) tensor([1, 1, 3, 2, 3, 7, 0]) """ adj = g.adj_external(scipy_fmt="csr") src, dst = (dst, src) if src > dst else (src, dst) idx = list(range(src)) + list(range(src + 1, adj.shape[0])) adj_wo_src = adj[idx, :][:, idx] idx = list(range(dst)) + list(range(dst + 1, adj.shape[0])) adj_wo_dst = adj[idx, :][:, idx] # distance to the source node ds = sparse.csgraph.shortest_path( adj_wo_dst, directed=False, unweighted=True, indices=src ) ds = np.insert(ds, dst, 0, axis=0) # distance to the destination node dt = sparse.csgraph.shortest_path( adj_wo_src, directed=False, unweighted=True, indices=dst - 1 ) dt = np.insert(dt, src, 0, axis=0) d = ds + dt # suppress invalid value (nan) warnings with np.errstate(invalid="ignore"): z = 1 + np.stack([ds, dt]).min(axis=0) + d // 2 * (d // 2 + d % 2 - 1) z[src] = 1 z[dst] = 1 z[np.isnan(z)] = 0 # unreachable nodes return F.tensor(z, F.int64) def shortest_dist(g, root=None, return_paths=False): r"""Compute shortest distance and paths on the given graph. Only unweighted cases are supported. Only directed paths (in which the edges are all oriented in the same direction) are considered effective. Parameters ---------- g : DGLGraph The input graph. Must be homogeneous. root : int, optional Given a root node ID, it returns the shortest distance and paths (optional) between the root node and all the nodes. If None, it returns the results for all node pairs. Default: None. return_paths : bool, optional If True, it returns the shortest paths corresponding to the shortest distances. Default: False. Returns ------- dist : Tensor The shortest distance tensor. * If :attr:`root` is a node ID, it is a tensor of shape :math:`(N,)`, where :math:`N` is the number of nodes. :attr:`dist[j]` gives the shortest distance from :attr:`root` to node :attr:`j`. * Otherwise, it is a tensor of shape :math:`(N, N)`. :attr:`dist[i][j]` gives the shortest distance from node :attr:`i` to node :attr:`j`. * The distance values of unreachable node pairs are filled with -1. paths : Tensor, optional The shortest path tensor. It is only returned when :attr:`return_paths` is True. * If :attr:`root` is a node ID, it is a tensor of shape :math:`(N, L)`, where :math:`L` is the length of the longest path. :attr:`path[j]` is the shortest path from node :attr:`root` to node :attr:`j`. * Otherwise, it is a tensor of shape :math:`(N, N, L)`. :attr:`path[i][j]` is the shortest path from node :attr:`i` to node :attr:`j`. * Each path is a vector that consists of edge IDs with paddings of -1 at the end. * Shortest path between a node and itself is a vector filled with -1's. Example ------- >>> import dgl >>> g = dgl.graph(([0, 1, 1, 2], [2, 0, 3, 3])) >>> dgl.shortest_dist(g, root=0) tensor([ 0, -1, 1, 2]) >>> dist, paths = dgl.shortest_dist(g, root=None, return_paths=True) >>> print(dist) tensor([[ 0, -1, 1, 2], [ 1, 0, 2, 1], [-1, -1, 0, 1], [-1, -1, -1, 0]]) >>> print(paths) tensor([[[-1, -1], [-1, -1], [ 0, -1], [ 0, 3]], [[ 1, -1], [-1, -1], [ 1, 0], [ 2, -1]], [[-1, -1], [-1, -1], [-1, -1], [ 3, -1]], [[-1, -1], [-1, -1], [-1, -1], [-1, -1]]]) """ if root is None: dist, pred = sparse.csgraph.shortest_path( g.adj_external(scipy_fmt="csr"), return_predecessors=True, unweighted=True, directed=True, ) else: dist, pred = sparse.csgraph.dijkstra( g.adj_external(scipy_fmt="csr"), directed=True, indices=root, return_predecessors=True, unweighted=True, ) dist[np.isinf(dist)] = -1 if not return_paths: return F.copy_to(F.tensor(dist, dtype=F.int64), g.device) def _get_nodes(pred, i, j): r"""return node IDs of a path from i to j given predecessors""" if i == j: return [] prev = pred[j] nodes = [j, prev] while prev != i: prev = pred[prev] nodes.append(prev) nodes.reverse() return nodes # construct paths with given predecessors max_len = int(dist[~np.isinf(dist)].max()) N = g.num_nodes() roots = list(range(N)) if root is None else [root] paths = np.ones([len(roots), N, max_len], dtype=np.int64) * -1 masks, u, v = [], [], [] for i in roots: pred_ = pred[i] if root is None else pred masks_i = np.zeros([N, max_len], dtype=bool) for j in range(N): if pred_[j] < 0: continue nodes = _get_nodes(pred_, i, j) u.extend(nodes[:-1]) v.extend(nodes[1:]) if nodes: masks_i[j, : len(nodes) - 1] = True masks.append(masks_i) masks = np.stack(masks, axis=0) u, v = np.array(u), np.array(v) edge_ids = g.edge_ids(u, v) paths[masks] = F.asnumpy(edge_ids) if root is not None: paths = paths[0] return F.copy_to(F.tensor(dist, dtype=F.int64), g.device), F.copy_to( F.tensor(paths, dtype=F.int64), g.device ) def svd_pe(g, k, padding=False, random_flip=True): r"""SVD-based Positional Encoding, as introduced in `Global Self-Attention as a Replacement for Graph Convolution `__ This function computes the largest :math:`k` singular values and corresponding left and right singular vectors to form positional encodings. Parameters ---------- g : DGLGraph A DGLGraph to be encoded, which must be a homogeneous one. k : int Number of largest singular values and corresponding singular vectors used for positional encoding. padding : bool, optional If False, raise an error when :math:`k > N`, where :math:`N` is the number of nodes in :attr:`g`. If True, add zero paddings in the end of encoding vectors when :math:`k > N`. Default : False. random_flip : bool, optional If True, randomly flip the signs of encoding vectors. Proposed to be activated during training for better generalization. Default : True. Returns ------- Tensor Return SVD-based positional encodings of shape :math:`(N, 2k)`. Example ------- >>> import dgl >>> g = dgl.graph(([0,1,2,3,4,2,3,1,4,0], [2,3,1,4,0,0,1,2,3,4])) >>> dgl.svd_pe(g, k=2, padding=False, random_flip=True) tensor([[-6.3246e-01, -1.1373e-07, -6.3246e-01, 0.0000e+00], [-6.3246e-01, 7.6512e-01, -6.3246e-01, -7.6512e-01], [ 6.3246e-01, 4.7287e-01, 6.3246e-01, -4.7287e-01], [-6.3246e-01, -7.6512e-01, -6.3246e-01, 7.6512e-01], [ 6.3246e-01, -4.7287e-01, 6.3246e-01, 4.7287e-01]]) """ n = g.num_nodes() if not padding and n < k: raise ValueError( "The number of singular values k must be no greater than the " "number of nodes n, but " + f"got {k} and {n} respectively." ) a = g.adj_external(ctx=g.device, scipy_fmt="coo").toarray() u, d, vh = scipy.linalg.svd(a) v = vh.transpose() m = min(n, k) topm_u = u[:, 0:m] topm_v = v[:, 0:m] topm_sqrt_d = sparse.diags(np.sqrt(d[0:m])) encoding = np.concatenate( ((topm_u @ topm_sqrt_d), (topm_v @ topm_sqrt_d)), axis=1 ) # randomly flip row vectors if random_flip: rand_sign = 2 * (np.random.rand(n) > 0.5) - 1 flipped_encoding = F.tensor( rand_sign[:, np.newaxis] * encoding, dtype=F.float32 ) else: flipped_encoding = F.tensor(encoding, dtype=F.float32) if n < k: zero_padding = F.zeros( [n, 2 * (k - n)], dtype=F.float32, ctx=F.context(flipped_encoding) ) flipped_encoding = F.cat([flipped_encoding, zero_padding], dim=1) return flipped_encoding _init_api("dgl.transform", __name__) ================================================ FILE: python/dgl/transforms/module.py ================================================ ## # Copyright 2019-2021 Contributors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # """Modules for transform""" # pylint: disable= no-member, arguments-differ, invalid-name, missing-function-docstring from scipy.linalg import expm from .. import backend as F, convert, function as fn, utils from ..base import dgl_warning, DGLError from . import functional try: import torch from torch.distributions import Bernoulli except ImportError: pass __all__ = [ "BaseTransform", "RowFeatNormalizer", "FeatMask", "RandomWalkPE", "LaplacianPE", "LapPE", "AddSelfLoop", "RemoveSelfLoop", "AddReverse", "ToSimple", "LineGraph", "KHopGraph", "AddMetaPaths", "Compose", "GCNNorm", "PPR", "HeatKernel", "GDC", "NodeShuffle", "DropNode", "DropEdge", "AddEdge", "SIGNDiffusion", "ToLevi", "SVDPE", ] def update_graph_structure(g, data_dict, copy_edata=True): r"""Update the structure of a graph. Parameters ---------- g : DGLGraph The graph to update. data_dict : graph data The dictionary data for constructing a heterogeneous graph. copy_edata : bool If True, it will copy the edge features to the updated graph. Returns ------- DGLGraph The updated graph. """ device = g.device idtype = g.idtype num_nodes_dict = dict() for ntype in g.ntypes: num_nodes_dict[ntype] = g.num_nodes(ntype) new_g = convert.heterograph( data_dict, num_nodes_dict=num_nodes_dict, idtype=idtype, device=device ) # Copy features for ntype in g.ntypes: for key, feat in g.nodes[ntype].data.items(): new_g.nodes[ntype].data[key] = feat if copy_edata: for c_etype in g.canonical_etypes: for key, feat in g.edges[c_etype].data.items(): new_g.edges[c_etype].data[key] = feat return new_g class BaseTransform: r"""An abstract class for writing transforms.""" def __call__(self, g): raise NotImplementedError def __repr__(self): return self.__class__.__name__ + "()" class RowFeatNormalizer(BaseTransform): r""" Row-normalizes the features given in ``node_feat_names`` and ``edge_feat_names``. The row normalization formular is: .. math:: x = \frac{x}{\sum_i x_i} where :math:`x` denotes a row of the feature tensor. Parameters ---------- subtract_min: bool If True, the minimum value of whole feature tensor will be subtracted before normalization. Default: False. Subtraction will make all values non-negative. If all values are negative, after normalisation, the sum of each row of the feature tensor will be 1. node_feat_names : list[str], optional The names of the node feature tensors to be row-normalized. Default: `None`, which will not normalize any node feature tensor. edge_feat_names : list[str], optional The names of the edge feature tensors to be row-normalized. Default: `None`, which will not normalize any edge feature tensor. Example ------- The following example uses PyTorch backend. >>> import dgl >>> import torch >>> from dgl import RowFeatNormalizer Case1: Row normalize features of a homogeneous graph. >>> transform = RowFeatNormalizer(subtract_min=True, ... node_feat_names=['h'], edge_feat_names=['w']) >>> g = dgl.rand_graph(5, 20) >>> g.ndata['h'] = torch.randn((g.num_nodes(), 5)) >>> g.edata['w'] = torch.randn((g.num_edges(), 5)) >>> g = transform(g) >>> print(g.ndata['h'].sum(1)) tensor([1., 1., 1., 1., 1.]) >>> print(g.edata['w'].sum(1)) tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]) Case2: Row normalize features of a heterogeneous graph. >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([1, 2]), torch.tensor([3, 4])), ... ('player', 'plays', 'game'): (torch.tensor([2, 2]), torch.tensor([1, 1])) ... }) >>> g.ndata['h'] = {'game': torch.randn(2, 5), 'player': torch.randn(3, 5)} >>> g.edata['w'] = { ... ('user', 'follows', 'user'): torch.randn(2, 5), ... ('player', 'plays', 'game'): torch.randn(2, 5) ... } >>> g = transform(g) >>> print(g.ndata['h']['game'].sum(1), g.ndata['h']['player'].sum(1)) tensor([1., 1.]) tensor([1., 1., 1.]) >>> print(g.edata['w'][('user', 'follows', 'user')].sum(1), ... g.edata['w'][('player', 'plays', 'game')].sum(1)) tensor([1., 1.]) tensor([1., 1.]) """ def __init__( self, subtract_min=False, node_feat_names=None, edge_feat_names=None ): self.node_feat_names = ( [] if node_feat_names is None else node_feat_names ) self.edge_feat_names = ( [] if edge_feat_names is None else edge_feat_names ) self.subtract_min = subtract_min def row_normalize(self, feat): r""" Description ----------- Row-normalize the given feature. Parameters ---------- feat : Tensor The feature to be normalized. Returns ------- Tensor The normalized feature. """ if self.subtract_min: feat = feat - feat.min() feat.div_(feat.sum(dim=-1, keepdim=True).clamp_(min=1.0)) return feat def __call__(self, g): for node_feat_name in self.node_feat_names: if isinstance(g.ndata[node_feat_name], torch.Tensor): g.ndata[node_feat_name] = self.row_normalize( g.ndata[node_feat_name] ) else: for ntype in g.ndata[node_feat_name].keys(): g.nodes[ntype].data[node_feat_name] = self.row_normalize( g.nodes[ntype].data[node_feat_name] ) for edge_feat_name in self.edge_feat_names: if isinstance(g.edata[edge_feat_name], torch.Tensor): g.edata[edge_feat_name] = self.row_normalize( g.edata[edge_feat_name] ) else: for etype in g.edata[edge_feat_name].keys(): g.edges[etype].data[edge_feat_name] = self.row_normalize( g.edges[etype].data[edge_feat_name] ) return g class FeatMask(BaseTransform): r"""Randomly mask columns of the node and edge feature tensors, as described in `Graph Contrastive Learning with Augmentations `__. Parameters ---------- p : float, optional Probability of masking a column of a feature tensor. Default: `0.5`. node_feat_names : list[str], optional The names of the node feature tensors to be masked. Default: `None`, which will not mask any node feature tensor. edge_feat_names : list[str], optional The names of the edge features to be masked. Default: `None`, which will not mask any edge feature tensor. Example ------- The following example uses PyTorch backend. >>> import dgl >>> import torch >>> from dgl import FeatMask Case1 : Mask node and edge feature tensors of a homogeneous graph. >>> transform = FeatMask(node_feat_names=['h'], edge_feat_names=['w']) >>> g = dgl.rand_graph(5, 10) >>> g.ndata['h'] = torch.ones((g.num_nodes(), 10)) >>> g.edata['w'] = torch.ones((g.num_edges(), 10)) >>> g = transform(g) >>> print(g.ndata['h']) tensor([[0., 0., 1., 1., 0., 0., 1., 1., 1., 0.], [0., 0., 1., 1., 0., 0., 1., 1., 1., 0.], [0., 0., 1., 1., 0., 0., 1., 1., 1., 0.], [0., 0., 1., 1., 0., 0., 1., 1., 1., 0.], [0., 0., 1., 1., 0., 0., 1., 1., 1., 0.]]) >>> print(g.edata['w']) tensor([[1., 1., 0., 1., 0., 1., 0., 0., 0., 1.], [1., 1., 0., 1., 0., 1., 0., 0., 0., 1.], [1., 1., 0., 1., 0., 1., 0., 0., 0., 1.], [1., 1., 0., 1., 0., 1., 0., 0., 0., 1.], [1., 1., 0., 1., 0., 1., 0., 0., 0., 1.], [1., 1., 0., 1., 0., 1., 0., 0., 0., 1.], [1., 1., 0., 1., 0., 1., 0., 0., 0., 1.], [1., 1., 0., 1., 0., 1., 0., 0., 0., 1.], [1., 1., 0., 1., 0., 1., 0., 0., 0., 1.], [1., 1., 0., 1., 0., 1., 0., 0., 0., 1.]]) Case2 : Mask node and edge feature tensors of a heterogeneous graph. >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([1, 2]), torch.tensor([3, 4])), ... ('player', 'plays', 'game'): (torch.tensor([2, 2]), torch.tensor([1, 1])) ... }) >>> g.ndata['h'] = {'game': torch.ones(2, 5), 'player': torch.ones(3, 5)} >>> g.edata['w'] = {('user', 'follows', 'user'): torch.ones(2, 5)} >>> print(g.ndata['h']['game']) tensor([[1., 1., 1., 1., 1.], [1., 1., 1., 1., 1.]]) >>> print(g.edata['w'][('user', 'follows', 'user')]) tensor([[1., 1., 1., 1., 1.], [1., 1., 1., 1., 1.]]) >>> g = transform(g) >>> print(g.ndata['h']['game']) tensor([[1., 1., 0., 1., 0.], [1., 1., 0., 1., 0.]]) >>> print(g.edata['w'][('user', 'follows', 'user')]) tensor([[0., 1., 0., 1., 0.], [0., 1., 0., 1., 0.]]) """ def __init__(self, p=0.5, node_feat_names=None, edge_feat_names=None): self.p = p self.node_feat_names = ( [] if node_feat_names is None else node_feat_names ) self.edge_feat_names = ( [] if edge_feat_names is None else edge_feat_names ) self.dist = Bernoulli(p) def __call__(self, g): # Fast path if self.p == 0: return g for node_feat_name in self.node_feat_names: if isinstance(g.ndata[node_feat_name], torch.Tensor): feat_mask = self.dist.sample( torch.Size( [ g.ndata[node_feat_name].shape[-1], ] ) ) g.ndata[node_feat_name][:, feat_mask.bool().to(g.device)] = 0 else: for ntype in g.ndata[node_feat_name].keys(): mask_shape = g.ndata[node_feat_name][ntype].shape[-1] feat_mask = self.dist.sample( torch.Size( [ mask_shape, ] ) ) g.ndata[node_feat_name][ntype][ :, feat_mask.bool().to(g.device) ] = 0 for edge_feat_name in self.edge_feat_names: if isinstance(g.edata[edge_feat_name], torch.Tensor): feat_mask = self.dist.sample( torch.Size( [ g.edata[edge_feat_name].shape[-1], ] ) ) g.edata[edge_feat_name][:, feat_mask.bool().to(g.device)] = 0 else: for etype in g.edata[edge_feat_name].keys(): mask_shape = g.edata[edge_feat_name][etype].shape[-1] feat_mask = self.dist.sample( torch.Size( [ mask_shape, ] ) ) g.edata[edge_feat_name][etype][ :, feat_mask.bool().to(g.device) ] = 0 return g class RandomWalkPE(BaseTransform): r"""Random Walk Positional Encoding, as introduced in `Graph Neural Networks with Learnable Structural and Positional Representations `__ This module only works for homogeneous graphs. Parameters ---------- k : int Number of random walk steps. The paper found the best value to be 16 and 20 for two experiments. feat_name : str, optional Name to store the computed positional encodings in ndata. eweight_name : str, optional Name to retrieve the edge weights. Default: None, not using the edge weights. Example ------- >>> import dgl >>> from dgl import RandomWalkPE >>> transform = RandomWalkPE(k=2) >>> g = dgl.graph(([0, 1, 1], [1, 1, 0])) >>> g = transform(g) >>> print(g.ndata['PE']) tensor([[0.0000, 0.5000], [0.5000, 0.7500]]) """ def __init__(self, k, feat_name="PE", eweight_name=None): self.k = k self.feat_name = feat_name self.eweight_name = eweight_name def __call__(self, g): PE = functional.random_walk_pe( g, k=self.k, eweight_name=self.eweight_name ) g.ndata[self.feat_name] = F.copy_to(PE, g.device) return g class LapPE(BaseTransform): r"""Laplacian Positional Encoding, as introduced in `Benchmarking Graph Neural Networks `__ This module only works for homogeneous bidirected graphs. Parameters ---------- k : int Number of smallest non-trivial eigenvectors to use for positional encoding. feat_name : str, optional Name to store the computed positional encodings in ndata. eigval_name : str, optional If None, store laplacian eigenvectors only. Otherwise, it's the name to store corresponding laplacian eigenvalues in ndata. Default: None. padding : bool, optional If False, raise an exception when k>=n. Otherwise, add zero paddings in the end of eigenvectors and 'nan' paddings in the end of eigenvalues when k>=n. Default: False. n is the number of nodes in the given graph. Example ------- >>> import dgl >>> from dgl import LapPE >>> transform1 = LapPE(k=3) >>> transform2 = LapPE(k=5, padding=True) >>> transform3 = LapPE(k=5, feat_name='eigvec', eigval_name='eigval', padding=True) >>> g = dgl.graph(([0,1,2,3,4,2,3,1,4,0], [2,3,1,4,0,0,1,2,3,4])) >>> g1 = transform1(g) >>> print(g1.ndata['PE']) tensor([[ 0.6325, 0.1039, 0.3489], [-0.5117, 0.2826, 0.6095], [ 0.1954, 0.6254, -0.5923], [-0.5117, -0.4508, -0.3938], [ 0.1954, -0.5612, 0.0278]]) >>> g2 = transform2(g) >>> print(g2.ndata['PE']) tensor([[-0.6325, -0.1039, 0.3489, -0.2530, 0.0000], [ 0.5117, -0.2826, 0.6095, 0.4731, 0.0000], [-0.1954, -0.6254, -0.5923, -0.1361, 0.0000], [ 0.5117, 0.4508, -0.3938, -0.6295, 0.0000], [-0.1954, 0.5612, 0.0278, 0.5454, 0.0000]]) >>> g3 = transform3(g) >>> print(g3.ndata['eigval']) tensor([[0.6910, 0.6910, 1.8090, 1.8090, nan], [0.6910, 0.6910, 1.8090, 1.8090, nan], [0.6910, 0.6910, 1.8090, 1.8090, nan], [0.6910, 0.6910, 1.8090, 1.8090, nan], [0.6910, 0.6910, 1.8090, 1.8090, nan]]) >>> print(g3.ndata['eigvec']) tensor([[ 0.6325, -0.1039, 0.3489, 0.2530, 0.0000], [-0.5117, -0.2826, 0.6095, -0.4731, 0.0000], [ 0.1954, -0.6254, -0.5923, 0.1361, 0.0000], [-0.5117, 0.4508, -0.3938, 0.6295, 0.0000], [ 0.1954, 0.5612, 0.0278, -0.5454, 0.0000]]) """ def __init__(self, k, feat_name="PE", eigval_name=None, padding=False): self.k = k self.feat_name = feat_name self.eigval_name = eigval_name self.padding = padding def __call__(self, g): if self.eigval_name: PE, eigval = functional.lap_pe( g, k=self.k, padding=self.padding, return_eigval=True ) eigval = F.repeat(F.reshape(eigval, [1, -1]), g.num_nodes(), dim=0) g.ndata[self.eigval_name] = F.copy_to(eigval, g.device) else: PE = functional.lap_pe(g, k=self.k, padding=self.padding) g.ndata[self.feat_name] = F.copy_to(PE, g.device) return g class LaplacianPE(LapPE): r"""Alias of `LapPE`.""" def __init__(self, k, feat_name="PE", eigval_name=None, padding=False): super().__init__(k, feat_name, eigval_name, padding) dgl_warning("LaplacianPE will be deprecated. Use LapPE please.") class AddSelfLoop(BaseTransform): r"""Add self-loops for each node in the graph and return a new graph. For heterogeneous graphs, self-loops are added only for edge types with same source and destination node types. Parameters ---------- allow_duplicate : bool, optional If False, it will first remove self-loops to prevent duplicate self-loops. new_etypes : bool, optional If True, it will add an edge type 'self' per node type, which holds self-loops. edge_feat_names : list[str], optional The names of the self-loop features to apply `fill_data`. If None, it will apply `fill_data` to all self-loop features. Default: None. fill_data : int, float or str, optional The value to fill the self-loop features. Default: 1. * If ``fill_data`` is ``int`` or ``float``, self-loop features will be directly given by ``fill_data``. * if ``fill_data`` is ``str``, self-loop features will be generated by aggregating the features of the incoming edges of the corresponding nodes. The supported aggregation are: ``'mean'``, ``'sum'``, ``'max'``, ``'min'``. Example ------- >>> import dgl >>> from dgl import AddSelfLoop Case1: Add self-loops for a homogeneous graph >>> transform = AddSelfLoop(fill_data='sum') >>> g = dgl.graph(([0, 0, 2], [2, 1, 0])) >>> g.edata['he'] = torch.arange(3).float().reshape(-1, 1) >>> new_g = transform(g) >>> print(new_g.edges()) (tensor([0, 0, 2, 0, 1, 2]), tensor([2, 1, 0, 0, 1, 2])) >>> print(new_g.edata('he')) tensor([[0.], [1.], [2.], [2.], [1.], [0.]]) Case2: Add self-loops for a heterogeneous graph >>> transform = AddSelfLoop(fill_data='sum') >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): (torch.tensor([1, 2]), ... torch.tensor([0, 1])), ... ('user', 'plays', 'game'): (torch.tensor([0, 1]), ... torch.tensor([0, 1]))}) >>> g.edata['feat'] = {('user', 'follows', 'user'): torch.randn(2, 5), ... ('user', 'plays', 'game'): torch.randn(2, 5)} >>> g.edata['feat1'] = {('user', 'follows', 'user'): torch.randn(2, 15), ... ('user', 'plays', 'game'): torch.randn(2, 15)} >>> new_g = transform(g) >>> print(new_g.edges(etype='plays')) (tensor([0, 1]), tensor([0, 1])) >>> print(new_g.edges(etype='follows')) (tensor([1, 2, 0, 1, 2]), tensor([0, 1, 0, 1, 2])) >>> print(new_g.edata['feat'][('user', 'follows', 'user')].shape) torch.Size([5, 5]) Case3: Add self-etypes for a heterogeneous graph >>> transform = AddSelfLoop(new_etypes=True) >>> new_g = transform(g) >>> print(new_g.edges(etype='follows')) (tensor([1, 2, 0, 1, 2]), tensor([0, 1, 0, 1, 2])) >>> print(new_g.edges(etype=('game', 'self', 'game'))) (tensor([0, 1]), tensor([0, 1])) """ def __init__( self, allow_duplicate=False, new_etypes=False, edge_feat_names=None, fill_data=1.0, ): self.allow_duplicate = allow_duplicate self.new_etypes = new_etypes self.edge_feat_names = edge_feat_names self.fill_data = fill_data def transform_etype(self, c_etype, g): r""" Description ----------- Transform the graph corresponding to a canonical edge type. Parameters ---------- c_etype : tuple of str A canonical edge type. g : DGLGraph The graph. Returns ------- DGLGraph The transformed graph. """ utype, _, vtype = c_etype if utype != vtype: return g if not self.allow_duplicate: g = functional.remove_self_loop(g, etype=c_etype) return functional.add_self_loop( g, edge_feat_names=self.edge_feat_names, fill_data=self.fill_data, etype=c_etype, ) def __call__(self, g): for c_etype in g.canonical_etypes: g = self.transform_etype(c_etype, g) if self.new_etypes: device = g.device idtype = g.idtype data_dict = dict() # Add self etypes for ntype in g.ntypes: nids = F.arange(0, g.num_nodes(ntype), idtype, device) data_dict[(ntype, "self", ntype)] = (nids, nids) # Copy edges for c_etype in g.canonical_etypes: data_dict[c_etype] = g.edges(etype=c_etype) g = update_graph_structure(g, data_dict) return g class RemoveSelfLoop(BaseTransform): r"""Remove self-loops for each node in the graph and return a new graph. For heterogeneous graphs, this operation only applies to edge types with same source and destination node types. Example ------- >>> import dgl >>> from dgl import RemoveSelfLoop Case1: Remove self-loops for a homogeneous graph >>> transform = RemoveSelfLoop() >>> g = dgl.graph(([1, 1], [1, 2])) >>> new_g = transform(g) >>> print(new_g.edges()) (tensor([1]), tensor([2])) Case2: Remove self-loops for a heterogeneous graph >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): ([0, 1], [1, 1]), ... ('user', 'follows', 'user'): ([1, 2], [2, 2]) ... }) >>> new_g = transform(g) >>> print(new_g.edges(etype='plays')) (tensor([0, 1]), tensor([1, 1])) >>> print(new_g.edges(etype='follows')) (tensor([1]), tensor([2])) """ def transform_etype(self, c_etype, g): r"""Transform the graph corresponding to a canonical edge type. Parameters ---------- c_etype : tuple of str A canonical edge type. g : DGLGraph The graph. Returns ------- DGLGraph The transformed graph. """ utype, _, vtype = c_etype if utype == vtype: g = functional.remove_self_loop(g, etype=c_etype) return g def __call__(self, g): for c_etype in g.canonical_etypes: g = self.transform_etype(c_etype, g) return g class AddReverse(BaseTransform): r"""Add a reverse edge :math:`(i,j)` for each edge :math:`(j,i)` in the input graph and return a new graph. For a heterogeneous graph, it adds a "reverse" edge type for each edge type to hold the reverse edges. For example, for a canonical edge type ('A', 'r', 'B'), it adds a canonical edge type ('B', 'rev_r', 'A'). Parameters ---------- copy_edata : bool, optional If True, the features of the reverse edges will be identical to the original ones. sym_new_etype : bool, optional If False, it will not add a reverse edge type if the source and destination node type in a canonical edge type are identical. Instead, it will directly add edges to the original edge type. Example ------- The following example uses PyTorch backend. >>> import dgl >>> import torch >>> from dgl import AddReverse Case1: Add reverse edges for a homogeneous graph >>> transform = AddReverse() >>> g = dgl.graph(([0], [1])) >>> g.edata['w'] = torch.ones(1, 2) >>> new_g = transform(g) >>> print(new_g.edges()) (tensor([0, 1]), tensor([1, 0])) >>> print(new_g.edata['w']) tensor([[1., 1.], [0., 0.]]) Case2: Add reverse edges for a homogeneous graph and copy edata >>> transform = AddReverse(copy_edata=True) >>> new_g = transform(g) >>> print(new_g.edata['w']) tensor([[1., 1.], [1., 1.]]) Case3: Add reverse edges for a heterogeneous graph >>> g = dgl.heterograph({ ... ('user', 'plays', 'game'): ([0, 1], [1, 1]), ... ('user', 'follows', 'user'): ([1, 2], [2, 2]) ... }) >>> new_g = transform(g) >>> print(new_g.canonical_etypes) [('game', 'rev_plays', 'user'), ('user', 'follows', 'user'), ('user', 'plays', 'game')] >>> print(new_g.edges(etype='rev_plays')) (tensor([1, 1]), tensor([0, 1])) >>> print(new_g.edges(etype='follows')) (tensor([1, 2, 2, 2]), tensor([2, 2, 1, 2])) """ def __init__(self, copy_edata=False, sym_new_etype=False): self.copy_edata = copy_edata self.sym_new_etype = sym_new_etype def transform_symmetric_etype(self, c_etype, g, data_dict): r"""Transform the graph corresponding to a symmetric canonical edge type. Parameters ---------- c_etype : tuple of str A canonical edge type. g : DGLGraph The graph. data_dict : dict The edge data to update. """ if self.sym_new_etype: self.transform_asymmetric_etype(c_etype, g, data_dict) else: src, dst = g.edges(etype=c_etype) src, dst = F.cat([src, dst], dim=0), F.cat([dst, src], dim=0) data_dict[c_etype] = (src, dst) def transform_asymmetric_etype(self, c_etype, g, data_dict): r"""Transform the graph corresponding to an asymmetric canonical edge type. Parameters ---------- c_etype : tuple of str A canonical edge type. g : DGLGraph The graph. data_dict : dict The edge data to update. """ utype, etype, vtype = c_etype src, dst = g.edges(etype=c_etype) data_dict.update( { c_etype: (src, dst), (vtype, "rev_{}".format(etype), utype): (dst, src), } ) def transform_etype(self, c_etype, g, data_dict): r"""Transform the graph corresponding to a canonical edge type. Parameters ---------- c_etype : tuple of str A canonical edge type. g : DGLGraph The graph. data_dict : dict The edge data to update. """ utype, _, vtype = c_etype if utype == vtype: self.transform_symmetric_etype(c_etype, g, data_dict) else: self.transform_asymmetric_etype(c_etype, g, data_dict) def __call__(self, g): data_dict = dict() for c_etype in g.canonical_etypes: self.transform_etype(c_etype, g, data_dict) new_g = update_graph_structure(g, data_dict, copy_edata=False) # Copy and expand edata for c_etype in g.canonical_etypes: utype, etype, vtype = c_etype if utype != vtype or self.sym_new_etype: rev_c_etype = (vtype, "rev_{}".format(etype), utype) for key, feat in g.edges[c_etype].data.items(): new_g.edges[c_etype].data[key] = feat if self.copy_edata: new_g.edges[rev_c_etype].data[key] = feat else: for key, feat in g.edges[c_etype].data.items(): new_feat = ( feat if self.copy_edata else F.zeros( F.shape(feat), F.dtype(feat), F.context(feat) ) ) new_g.edges[c_etype].data[key] = F.cat( [feat, new_feat], dim=0 ) return new_g class ToSimple(BaseTransform): r"""Convert a graph to a simple graph without parallel edges and return a new graph. Parameters ---------- return_counts : str, optional The edge feature name to hold the edge count in the original graph. aggregator : str, optional The way to coalesce features of duplicate edges. * ``'arbitrary'``: select arbitrarily from one of the duplicate edges * ``'sum'``: take the sum over the duplicate edges * ``'mean'``: take the mean over the duplicate edges Example ------- The following example uses PyTorch backend. >>> import dgl >>> import torch >>> from dgl import ToSimple Case1: Convert a homogeneous graph to a simple graph >>> transform = ToSimple() >>> g = dgl.graph(([0, 1, 1], [1, 2, 2])) >>> g.edata['w'] = torch.tensor([[0.1], [0.2], [0.3]]) >>> sg = transform(g) >>> print(sg.edges()) (tensor([0, 1]), tensor([1, 2])) >>> print(sg.edata['count']) tensor([1, 2]) >>> print(sg.edata['w']) tensor([[0.1000], [0.2000]]) Case2: Convert a heterogeneous graph to a simple graph >>> g = dgl.heterograph({ ... ('user', 'follows', 'user'): ([0, 1, 1], [1, 2, 2]), ... ('user', 'plays', 'game'): ([0, 1, 0], [1, 1, 1]) ... }) >>> sg = transform(g) >>> print(sg.edges(etype='follows')) (tensor([0, 1]), tensor([1, 2])) >>> print(sg.edges(etype='plays')) (tensor([0, 1]), tensor([1, 1])) """ def __init__(self, return_counts="count", aggregator="arbitrary"): self.return_counts = return_counts self.aggregator = aggregator def __call__(self, g): return functional.to_simple( g, return_counts=self.return_counts, copy_edata=True, aggregator=self.aggregator, ) class LineGraph(BaseTransform): r"""Return the line graph of the input graph. The line graph :math:`L(G)` of a given graph :math:`G` is a graph where the nodes in :math:`L(G)` correspond to the edges in :math:`G`. For a pair of edges :math:`(u, v)` and :math:`(v, w)` in :math:`G`, there will be an edge from the node corresponding to :math:`(u, v)` to the node corresponding to :math:`(v, w)` in :math:`L(G)`. This module only works for homogeneous graphs. Parameters ---------- backtracking : bool, optional If False, there will be an edge from the line graph node corresponding to :math:`(u, v)` to the line graph node corresponding to :math:`(v, u)`. Example ------- The following example uses PyTorch backend. >>> import dgl >>> import torch >>> from dgl import LineGraph Case1: Backtracking is True >>> transform = LineGraph() >>> g = dgl.graph(([0, 1, 1], [1, 0, 2])) >>> g.ndata['h'] = torch.tensor([[0.], [1.], [2.]]) >>> g.edata['w'] = torch.tensor([[0.], [0.1], [0.2]]) >>> new_g = transform(g) >>> print(new_g) Graph(num_nodes=3, num_edges=3, ndata_schemes={'w': Scheme(shape=(1,), dtype=torch.float32)} edata_schemes={}) >>> print(new_g.edges()) (tensor([0, 0, 1]), tensor([1, 2, 0])) Case2: Backtracking is False >>> transform = LineGraph(backtracking=False) >>> new_g = transform(g) >>> print(new_g.edges()) (tensor([0]), tensor([2])) """ def __init__(self, backtracking=True): self.backtracking = backtracking def __call__(self, g): return functional.line_graph( g, backtracking=self.backtracking, shared=True ) class KHopGraph(BaseTransform): r"""Return the graph whose edges connect the :math:`k`-hop neighbors of the original graph. This module only works for homogeneous graphs. Parameters ---------- k : int The number of hops. Example ------- >>> import dgl >>> from dgl import KHopGraph >>> transform = KHopGraph(2) >>> g = dgl.graph(([0, 1], [1, 2])) >>> new_g = transform(g) >>> print(new_g.edges()) (tensor([0]), tensor([2])) """ def __init__(self, k): self.k = k def __call__(self, g): return functional.khop_graph(g, self.k) class AddMetaPaths(BaseTransform): r"""Add new edges to an input graph based on given metapaths, as described in `Heterogeneous Graph Attention Network `__. Formally, a metapath is a path of the form .. math:: \mathcal{V}_1 \xrightarrow{R_1} \mathcal{V}_2 \xrightarrow{R_2} \ldots \xrightarrow{R_{\ell-1}} \mathcal{V}_{\ell} in which :math:`\mathcal{V}_i` represents a node type and :math:`\xrightarrow{R_j}` represents a relation type connecting its two adjacent node types. The adjacency matrix corresponding to the metapath is obtained by sequential multiplication of adjacency matrices along the metapath. Parameters ---------- metapaths : dict[str, list] The metapaths to add, mapping a metapath name to a metapath. For example, :attr:`{'co-author': [('person', 'author', 'paper'), ('paper', 'authored by', 'person')]}` keep_orig_edges : bool, optional If True, it will keep the edges of the original graph. Otherwise, it will drop them. Example ------- >>> import dgl >>> from dgl import AddMetaPaths >>> transform = AddMetaPaths({ ... 'accepted': [('person', 'author', 'paper'), ('paper', 'accepted', 'venue')], ... 'rejected': [('person', 'author', 'paper'), ('paper', 'rejected', 'venue')] ... }) >>> g = dgl.heterograph({ ... ('person', 'author', 'paper'): ([0, 0, 1], [1, 2, 2]), ... ('paper', 'accepted', 'venue'): ([1], [0]), ... ('paper', 'rejected', 'venue'): ([2], [1]) ... }) >>> new_g = transform(g) >>> print(new_g.edges(etype=('person', 'accepted', 'venue'))) (tensor([0]), tensor([0])) >>> print(new_g.edges(etype=('person', 'rejected', 'venue'))) (tensor([0, 1]), tensor([1, 1])) """ def __init__(self, metapaths, keep_orig_edges=True): self.metapaths = metapaths self.keep_orig_edges = keep_orig_edges def __call__(self, g): data_dict = dict() for meta_etype, metapath in self.metapaths.items(): meta_g = functional.metapath_reachable_graph(g, metapath) u_type = metapath[0][0] v_type = metapath[-1][-1] data_dict[(u_type, meta_etype, v_type)] = meta_g.edges() if self.keep_orig_edges: for c_etype in g.canonical_etypes: data_dict[c_etype] = g.edges(etype=c_etype) new_g = update_graph_structure(g, data_dict, copy_edata=True) else: new_g = update_graph_structure(g, data_dict, copy_edata=False) return new_g class Compose(BaseTransform): r"""Create a transform composed of multiple transforms in sequence. Parameters ---------- transforms : list of Callable A list of transform objects to apply in order. A transform object should inherit :class:`~dgl.BaseTransform` and implement :func:`~dgl.BaseTransform.__call__`. Example ------- >>> import dgl >>> from dgl import transforms as T >>> g = dgl.graph(([0, 0], [1, 1])) >>> transform = T.Compose([T.ToSimple(), T.AddReverse()]) >>> new_g = transform(g) >>> print(new_g.edges()) (tensor([0, 1]), tensor([1, 0])) """ def __init__(self, transforms): self.transforms = transforms def __call__(self, g): for transform in self.transforms: g = transform(g) return g def __repr__(self): args = [" " + str(transform) for transform in self.transforms] return self.__class__.__name__ + "([\n" + ",\n".join(args) + "\n])" class GCNNorm(BaseTransform): r"""Apply symmetric adjacency normalization to an input graph and save the result edge weights, as described in `Semi-Supervised Classification with Graph Convolutional Networks `__. For a heterogeneous graph, this only applies to symmetric canonical edge types, whose source and destination node types are identical. Parameters ---------- eweight_name : str, optional :attr:`edata` name to retrieve and store edge weights. The edge weights are optional. Example ------- >>> import dgl >>> import torch >>> from dgl import GCNNorm >>> transform = GCNNorm() >>> g = dgl.graph(([0, 1, 2], [0, 0, 1])) Case1: Transform an unweighted graph >>> g = transform(g) >>> print(g.edata['w']) tensor([0.5000, 0.7071, 0.0000]) Case2: Transform a weighted graph >>> g.edata['w'] = torch.tensor([0.1, 0.2, 0.3]) >>> g = transform(g) >>> print(g.edata['w']) tensor([0.3333, 0.6667, 0.0000]) """ def __init__(self, eweight_name="w"): self.eweight_name = eweight_name def calc_etype(self, c_etype, g): r""" Description ----------- Get edge weights for an edge type. """ ntype = c_etype[0] with g.local_scope(): if self.eweight_name in g.edges[c_etype].data: g.update_all( fn.copy_e(self.eweight_name, "m"), fn.sum("m", "deg"), etype=c_etype, ) deg_inv_sqrt = 1.0 / F.sqrt(g.nodes[ntype].data["deg"]) g.nodes[ntype].data["w"] = F.replace_inf_with_zero(deg_inv_sqrt) g.apply_edges( lambda edge: { "w": edge.src["w"] * edge.data[self.eweight_name] * edge.dst["w"] }, etype=c_etype, ) else: deg = g.in_degrees(etype=c_etype) deg_inv_sqrt = 1.0 / F.sqrt(F.astype(deg, F.float32)) g.nodes[ntype].data["w"] = F.replace_inf_with_zero(deg_inv_sqrt) g.apply_edges( lambda edges: {"w": edges.src["w"] * edges.dst["w"]}, etype=c_etype, ) return g.edges[c_etype].data["w"] def __call__(self, g): result = dict() for c_etype in g.canonical_etypes: utype, _, vtype = c_etype if utype == vtype: result[c_etype] = self.calc_etype(c_etype, g) for c_etype, eweight in result.items(): g.edges[c_etype].data[self.eweight_name] = eweight return g class PPR(BaseTransform): r"""Apply personalized PageRank (PPR) to an input graph for diffusion, as introduced in `The pagerank citation ranking: Bringing order to the web `__. A sparsification will be applied to the weighted adjacency matrix after diffusion. Specifically, edges whose weight is below a threshold will be dropped. This module only works for homogeneous graphs. Parameters ---------- alpha : float, optional Restart probability, which commonly lies in :math:`[0.05, 0.2]`. eweight_name : str, optional :attr:`edata` name to retrieve and store edge weights. If it does not exist in an input graph, this module initializes a weight of 1 for all edges. The edge weights should be a tensor of shape :math:`(E)`, where E is the number of edges. eps : float, optional The threshold to preserve edges in sparsification after diffusion. Edges of a weight smaller than eps will be dropped. avg_degree : int, optional The desired average node degree of the result graph. This is the other way to control the sparsity of the result graph and will only be effective if :attr:`eps` is not given. Example ------- >>> import dgl >>> import torch >>> from dgl import PPR >>> transform = PPR(avg_degree=2) >>> g = dgl.graph(([0, 1, 2, 3, 4], [2, 3, 4, 5, 3])) >>> g.edata['w'] = torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5]) >>> new_g = transform(g) >>> print(new_g.edata['w']) tensor([0.1500, 0.1500, 0.1500, 0.0255, 0.0163, 0.1500, 0.0638, 0.0383, 0.1500, 0.0510, 0.0217, 0.1500]) """ def __init__(self, alpha=0.15, eweight_name="w", eps=None, avg_degree=5): self.alpha = alpha self.eweight_name = eweight_name self.eps = eps self.avg_degree = avg_degree def get_eps(self, num_nodes, mat): r"""Get the threshold for graph sparsification.""" if self.eps is None: # Infer from self.avg_degree if self.avg_degree > num_nodes: return float("-inf") sorted_weights = torch.sort(mat.flatten(), descending=True).values return sorted_weights[self.avg_degree * num_nodes - 1] else: return self.eps def __call__(self, g): # Step1: PPR diffusion # (α - 1) A device = g.device eweight = (self.alpha - 1) * g.edata.get( self.eweight_name, F.ones((g.num_edges(),), F.float32, device) ) num_nodes = g.num_nodes() mat = F.zeros((num_nodes, num_nodes), F.float32, device) src, dst = g.edges() src, dst = F.astype(src, F.int64), F.astype(dst, F.int64) mat[dst, src] = eweight # I_n + (α - 1) A nids = F.astype(g.nodes(), F.int64) mat[nids, nids] = mat[nids, nids] + 1 # α (I_n + (α - 1) A)^-1 diff_mat = self.alpha * F.inverse(mat) # Step2: sparsification num_nodes = g.num_nodes() eps = self.get_eps(num_nodes, diff_mat) dst, src = (diff_mat >= eps).nonzero(as_tuple=False).t() data_dict = {g.canonical_etypes[0]: (src, dst)} new_g = update_graph_structure(g, data_dict, copy_edata=False) new_g.edata[self.eweight_name] = diff_mat[dst, src] return new_g def is_bidirected(g): """Return whether the graph is a bidirected graph. A graph is bidirected if for any edge :math:`(u, v)` in :math:`G` with weight :math:`w`, there exists an edge :math:`(v, u)` in :math:`G` with the same weight. """ src, dst = g.edges() num_nodes = g.num_nodes() # Sort first by src then dst idx_src_dst = src * num_nodes + dst perm_src_dst = F.argsort(idx_src_dst, dim=0, descending=False) src1, dst1 = src[perm_src_dst], dst[perm_src_dst] # Sort first by dst then src idx_dst_src = dst * num_nodes + src perm_dst_src = F.argsort(idx_dst_src, dim=0, descending=False) src2, dst2 = src[perm_dst_src], dst[perm_dst_src] return F.allclose(src1, dst2) and F.allclose(src2, dst1) # pylint: disable=C0103 class HeatKernel(BaseTransform): r"""Apply heat kernel to an input graph for diffusion, as introduced in `Diffusion kernels on graphs and other discrete structures `__. A sparsification will be applied to the weighted adjacency matrix after diffusion. Specifically, edges whose weight is below a threshold will be dropped. This module only works for homogeneous graphs. Parameters ---------- t : float, optional Diffusion time, which commonly lies in :math:`[2, 10]`. eweight_name : str, optional :attr:`edata` name to retrieve and store edge weights. If it does not exist in an input graph, this module initializes a weight of 1 for all edges. The edge weights should be a tensor of shape :math:`(E)`, where E is the number of edges. eps : float, optional The threshold to preserve edges in sparsification after diffusion. Edges of a weight smaller than eps will be dropped. avg_degree : int, optional The desired average node degree of the result graph. This is the other way to control the sparsity of the result graph and will only be effective if :attr:`eps` is not given. Example ------- >>> import dgl >>> import torch >>> from dgl import HeatKernel >>> transform = HeatKernel(avg_degree=2) >>> g = dgl.graph(([0, 1, 2, 3, 4], [2, 3, 4, 5, 3])) >>> g.edata['w'] = torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5]) >>> new_g = transform(g) >>> print(new_g.edata['w']) tensor([0.1353, 0.1353, 0.1353, 0.0541, 0.0406, 0.1353, 0.1353, 0.0812, 0.1353, 0.1083, 0.0541, 0.1353]) """ def __init__(self, t=2.0, eweight_name="w", eps=None, avg_degree=5): self.t = t self.eweight_name = eweight_name self.eps = eps self.avg_degree = avg_degree def get_eps(self, num_nodes, mat): r"""Get the threshold for graph sparsification.""" if self.eps is None: # Infer from self.avg_degree if self.avg_degree > num_nodes: return float("-inf") sorted_weights = torch.sort(mat.flatten(), descending=True).values return sorted_weights[self.avg_degree * num_nodes - 1] else: return self.eps def __call__(self, g): # Step1: heat kernel diffusion # t A device = g.device eweight = self.t * g.edata.get( self.eweight_name, F.ones((g.num_edges(),), F.float32, device) ) num_nodes = g.num_nodes() mat = F.zeros((num_nodes, num_nodes), F.float32, device) src, dst = g.edges() src, dst = F.astype(src, F.int64), F.astype(dst, F.int64) mat[dst, src] = eweight # t (A - I_n) nids = F.astype(g.nodes(), F.int64) mat[nids, nids] = mat[nids, nids] - self.t if is_bidirected(g): e, V = torch.linalg.eigh(mat, UPLO="U") diff_mat = V @ torch.diag(e.exp()) @ V.t() else: diff_mat_np = expm(mat.cpu().numpy()) diff_mat = torch.Tensor(diff_mat_np).to(device) # Step2: sparsification num_nodes = g.num_nodes() eps = self.get_eps(num_nodes, diff_mat) dst, src = (diff_mat >= eps).nonzero(as_tuple=False).t() data_dict = {g.canonical_etypes[0]: (src, dst)} new_g = update_graph_structure(g, data_dict, copy_edata=False) new_g.edata[self.eweight_name] = diff_mat[dst, src] return new_g class GDC(BaseTransform): r"""Apply graph diffusion convolution (GDC) to an input graph, as introduced in `Diffusion Improves Graph Learning `__. A sparsification will be applied to the weighted adjacency matrix after diffusion. Specifically, edges whose weight is below a threshold will be dropped. This module only works for homogeneous graphs. Parameters ---------- coefs : list[float], optional List of coefficients. :math:`\theta_k` for each power of the adjacency matrix. eweight_name : str, optional :attr:`edata` name to retrieve and store edge weights. If it does not exist in an input graph, this module initializes a weight of 1 for all edges. The edge weights should be a tensor of shape :math:`(E)`, where E is the number of edges. eps : float, optional The threshold to preserve edges in sparsification after diffusion. Edges of a weight smaller than eps will be dropped. avg_degree : int, optional The desired average node degree of the result graph. This is the other way to control the sparsity of the result graph and will only be effective if :attr:`eps` is not given. Example ------- >>> import dgl >>> import torch >>> from dgl import GDC >>> transform = GDC([0.3, 0.2, 0.1], avg_degree=2) >>> g = dgl.graph(([0, 1, 2, 3, 4], [2, 3, 4, 5, 3])) >>> g.edata['w'] = torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5]) >>> new_g = transform(g) >>> print(new_g.edata['w']) tensor([0.3000, 0.3000, 0.0200, 0.3000, 0.0400, 0.3000, 0.1000, 0.0600, 0.3000, 0.0800, 0.0200, 0.3000]) """ def __init__(self, coefs, eweight_name="w", eps=None, avg_degree=5): self.coefs = coefs self.eweight_name = eweight_name self.eps = eps self.avg_degree = avg_degree def get_eps(self, num_nodes, mat): r"""Get the threshold for graph sparsification.""" if self.eps is None: # Infer from self.avg_degree if self.avg_degree > num_nodes: return float("-inf") sorted_weights = torch.sort(mat.flatten(), descending=True).values return sorted_weights[self.avg_degree * num_nodes - 1] else: return self.eps def __call__(self, g): # Step1: diffusion # A device = g.device eweight = g.edata.get( self.eweight_name, F.ones((g.num_edges(),), F.float32, device) ) num_nodes = g.num_nodes() adj = F.zeros((num_nodes, num_nodes), F.float32, device) src, dst = g.edges() src, dst = F.astype(src, F.int64), F.astype(dst, F.int64) adj[dst, src] = eweight # theta_0 I_n mat = torch.eye(num_nodes, device=device) diff_mat = self.coefs[0] * mat # add theta_k A^k for coef in self.coefs[1:]: mat = mat @ adj diff_mat += coef * mat # Step2: sparsification num_nodes = g.num_nodes() eps = self.get_eps(num_nodes, diff_mat) dst, src = (diff_mat >= eps).nonzero(as_tuple=False).t() data_dict = {g.canonical_etypes[0]: (src, dst)} new_g = update_graph_structure(g, data_dict, copy_edata=False) new_g.edata[self.eweight_name] = diff_mat[dst, src] return new_g class NodeShuffle(BaseTransform): r"""Randomly shuffle the nodes. Example ------- >>> import dgl >>> import torch >>> from dgl import NodeShuffle >>> transform = NodeShuffle() >>> g = dgl.graph(([0, 1], [1, 2])) >>> g.ndata['h1'] = torch.tensor([[1., 2.], [3., 4.], [5., 6.]]) >>> g.ndata['h2'] = torch.tensor([[7., 8.], [9., 10.], [11., 12.]]) >>> g = transform(g) >>> print(g.ndata['h1']) tensor([[5., 6.], [3., 4.], [1., 2.]]) >>> print(g.ndata['h2']) tensor([[11., 12.], [ 9., 10.], [ 7., 8.]]) """ def __call__(self, g): g = g.clone() for ntype in g.ntypes: nids = F.astype(g.nodes(ntype), F.int64) perm = F.rand_shuffle(nids) for key, feat in g.nodes[ntype].data.items(): g.nodes[ntype].data[key] = feat[perm] return g # pylint: disable=C0103 class DropNode(BaseTransform): r"""Randomly drop nodes, as described in `Graph Contrastive Learning with Augmentations `__. Parameters ---------- p : float, optional Probability of a node to be dropped. Example ------- >>> import dgl >>> import torch >>> from dgl import DropNode >>> transform = DropNode() >>> g = dgl.rand_graph(5, 20) >>> g.ndata['h'] = torch.arange(g.num_nodes()) >>> g.edata['h'] = torch.arange(g.num_edges()) >>> new_g = transform(g) >>> print(new_g) Graph(num_nodes=3, num_edges=7, ndata_schemes={'h': Scheme(shape=(), dtype=torch.int64)} edata_schemes={'h': Scheme(shape=(), dtype=torch.int64)}) >>> print(new_g.ndata['h']) tensor([0, 1, 2]) >>> print(new_g.edata['h']) tensor([0, 6, 14, 5, 17, 3, 11]) """ def __init__(self, p=0.5): self.p = p self.dist = Bernoulli(p) def __call__(self, g): g = g.clone() # Fast path if self.p == 0: return g for ntype in g.ntypes: samples = self.dist.sample(torch.Size([g.num_nodes(ntype)])) nids_to_remove = g.nodes(ntype)[samples.bool().to(g.device)] g.remove_nodes(nids_to_remove, ntype=ntype) return g # pylint: disable=C0103 class DropEdge(BaseTransform): r"""Randomly drop edges, as described in `DropEdge: Towards Deep Graph Convolutional Networks on Node Classification `__ and `Graph Contrastive Learning with Augmentations `__. Parameters ---------- p : float, optional Probability of an edge to be dropped. Example ------- >>> import dgl >>> import torch >>> from dgl import DropEdge >>> transform = DropEdge() >>> g = dgl.rand_graph(5, 20) >>> g.edata['h'] = torch.arange(g.num_edges()) >>> new_g = transform(g) >>> print(new_g) Graph(num_nodes=5, num_edges=12, ndata_schemes={} edata_schemes={'h': Scheme(shape=(), dtype=torch.int64)}) >>> print(new_g.edata['h']) tensor([0, 1, 3, 7, 8, 10, 11, 12, 13, 15, 18, 19]) """ def __init__(self, p=0.5): self.p = p self.dist = Bernoulli(p) def __call__(self, g): g = g.clone() # Fast path if self.p == 0: return g for c_etype in g.canonical_etypes: samples = self.dist.sample(torch.Size([g.num_edges(c_etype)])) eids_to_remove = g.edges(form="eid", etype=c_etype)[ samples.bool().to(g.device) ] g.remove_edges(eids_to_remove, etype=c_etype) return g class AddEdge(BaseTransform): r"""Randomly add edges, as described in `Graph Contrastive Learning with Augmentations `__. Parameters ---------- ratio : float, optional Number of edges to add divided by the number of existing edges. Example ------- >>> import dgl >>> from dgl import AddEdge >>> transform = AddEdge() >>> g = dgl.rand_graph(5, 20) >>> new_g = transform(g) >>> print(new_g.num_edges()) 24 """ def __init__(self, ratio=0.2): self.ratio = ratio def __call__(self, g): # Fast path if self.ratio == 0.0: return g device = g.device idtype = g.idtype g = g.clone() for c_etype in g.canonical_etypes: utype, _, vtype = c_etype num_edges_to_add = int(g.num_edges(c_etype) * self.ratio) src = F.randint( [num_edges_to_add], idtype, device, low=0, high=g.num_nodes(utype), ) dst = F.randint( [num_edges_to_add], idtype, device, low=0, high=g.num_nodes(vtype), ) g.add_edges(src, dst, etype=c_etype) return g class SIGNDiffusion(BaseTransform): r"""The diffusion operator from `SIGN: Scalable Inception Graph Neural Networks `__ It performs node feature diffusion with :math:`TX, \cdots, T^{k}X`, where :math:`T` is a diffusion matrix and :math:`X` is the input node features. Specifically, this module provides four options for :math:`T`. **raw**: raw adjacency matrix :math:`A` **rw**: random walk (row-normalized) adjacency matrix :math:`D^{-1}A`, where :math:`D` is the degree matrix. **gcn**: symmetrically normalized adjacency matrix used by `GCN `__, :math:`D^{-1/2}AD^{-1/2}` **ppr**: approximate personalized PageRank used by `APPNP `__ .. math:: H^{0} &= X H^{l+1} &= (1-\alpha)\left(D^{-1/2}AD^{-1/2} H^{l}\right) + \alpha X This module only works for homogeneous graphs. Parameters ---------- k : int The maximum number of times for node feature diffusion. in_feat_name : str, optional :attr:`g.ndata[{in_feat_name}]` should store the input node features. Default: 'feat' out_feat_name : str, optional :attr:`g.ndata[{out_feat_name}_i]` will store the result of diffusing input node features for i times. Default: 'out_feat' eweight_name : str, optional Name to retrieve edge weights from :attr:`g.edata`. Default: None, treating the graph as unweighted. diffuse_op : str, optional The diffusion operator to use, which can be 'raw', 'rw', 'gcn', or 'ppr'. Default: 'raw' alpha : float, optional Restart probability if :attr:`diffuse_op` is :attr:`'ppr'`, which commonly lies in :math:`[0.05, 0.2]`. Default: 0.2 Example ------- >>> import dgl >>> import torch >>> from dgl import SIGNDiffusion >>> transform = SIGNDiffusion(k=2, eweight_name='w') >>> num_nodes = 5 >>> num_edges = 20 >>> g = dgl.rand_graph(num_nodes, num_edges) >>> g.ndata['feat'] = torch.randn(num_nodes, 10) >>> g.edata['w'] = torch.randn(num_edges) >>> transform(g) Graph(num_nodes=5, num_edges=20, ndata_schemes={'feat': Scheme(shape=(10,), dtype=torch.float32), 'out_feat_1': Scheme(shape=(10,), dtype=torch.float32), 'out_feat_2': Scheme(shape=(10,), dtype=torch.float32)} edata_schemes={'w': Scheme(shape=(), dtype=torch.float32)}) """ def __init__( self, k, in_feat_name="feat", out_feat_name="out_feat", eweight_name=None, diffuse_op="raw", alpha=0.2, ): self.k = k self.in_feat_name = in_feat_name self.out_feat_name = out_feat_name self.eweight_name = eweight_name self.diffuse_op = diffuse_op self.alpha = alpha if diffuse_op == "raw": self.diffuse = self.raw elif diffuse_op == "rw": self.diffuse = self.rw elif diffuse_op == "gcn": self.diffuse = self.gcn elif diffuse_op == "ppr": self.diffuse = self.ppr else: raise DGLError( "Expect diffuse_op to be from ['raw', 'rw', 'gcn', 'ppr'], \ got {}".format( diffuse_op ) ) def __call__(self, g): feat_list = self.diffuse(g) for i in range(1, self.k + 1): g.ndata[self.out_feat_name + "_" + str(i)] = feat_list[i - 1] return g def raw(self, g): use_eweight = False if (self.eweight_name is not None) and self.eweight_name in g.edata: use_eweight = True feat_list = [] with g.local_scope(): if use_eweight: message_func = fn.u_mul_e( self.in_feat_name, self.eweight_name, "m" ) else: message_func = fn.copy_u(self.in_feat_name, "m") for _ in range(self.k): g.update_all(message_func, fn.sum("m", self.in_feat_name)) feat_list.append(g.ndata[self.in_feat_name]) return feat_list def rw(self, g): use_eweight = False if (self.eweight_name is not None) and self.eweight_name in g.edata: use_eweight = True feat_list = [] with g.local_scope(): g.ndata["h"] = g.ndata[self.in_feat_name] if use_eweight: message_func = fn.u_mul_e("h", self.eweight_name, "m") reduce_func = fn.sum("m", "h") # Compute the diagonal entries of D from the weighted A g.update_all( fn.copy_e(self.eweight_name, "m"), fn.sum("m", "z") ) else: message_func = fn.copy_u("h", "m") reduce_func = fn.mean("m", "h") for _ in range(self.k): g.update_all(message_func, reduce_func) if use_eweight: g.ndata["h"] = g.ndata["h"] / F.reshape( g.ndata["z"], (g.num_nodes(), 1) ) feat_list.append(g.ndata["h"]) return feat_list def gcn(self, g): feat_list = [] with g.local_scope(): if self.eweight_name is None: eweight_name = "w" if eweight_name in g.edata: g.edata.pop(eweight_name) else: eweight_name = self.eweight_name transform = GCNNorm(eweight_name=eweight_name) transform(g) for _ in range(self.k): g.update_all( fn.u_mul_e(self.in_feat_name, eweight_name, "m"), fn.sum("m", self.in_feat_name), ) feat_list.append(g.ndata[self.in_feat_name]) return feat_list def ppr(self, g): feat_list = [] with g.local_scope(): if self.eweight_name is None: eweight_name = "w" if eweight_name in g.edata: g.edata.pop(eweight_name) else: eweight_name = self.eweight_name transform = GCNNorm(eweight_name=eweight_name) transform(g) in_feat = g.ndata[self.in_feat_name] for _ in range(self.k): g.update_all( fn.u_mul_e(self.in_feat_name, eweight_name, "m"), fn.sum("m", self.in_feat_name), ) g.ndata[self.in_feat_name] = (1 - self.alpha) * g.ndata[ self.in_feat_name ] + self.alpha * in_feat feat_list.append(g.ndata[self.in_feat_name]) return feat_list class ToLevi(BaseTransform): r"""This function transforms the original graph to its heterogeneous Levi graph, by converting edges to intermediate nodes, only support homogeneous directed graph. Example ------- >>> import dgl >>> import torch as th >>> from dgl import ToLevi >>> transform = ToLevi() >>> g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 0])) >>> g.ndata['h'] = th.randn((g.num_nodes(), 2)) >>> g.edata['w'] = th.randn((g.num_edges(), 2)) >>> lg = transform(g) >>> lg Grpah(num_nodes={'edge': 4, 'node': 4}, num_edges={('edge', 'e2n', 'node'): 4, ('node', 'n2e', 'edge'): 4}, metagraph=[('edge', 'node', 'e2n'), ('node', 'edge', 'n2e')]) >>> lg.nodes('node') tensor([0, 1, 2, 3]) >>> lg.nodes('edge') tensor([0, 1, 2, 3]) >>> lg.nodes['node'].data['h'].shape torch.Size([4, 2]) >>> lg.nodes['edge'].data['w'].shape torch.Size([4, 2]) """ def __init__(self): pass def __call__(self, g): r""" Parameters ---------- g : DGLGraph The input graph, should be a homogeneous directed graph. Returns ------- DGLGraph The Levi graph of input, will be a heterogeneous graph, where nodes of ntypes ``'node'`` and ``'edge'`` have corresponding IDs of nodes and edges in the original graph. Edge features of the input graph are copied to corresponding new nodes of ntype ``'edge'``. """ device = g.device idtype = g.idtype edge_list = g.edges() n2e = edge_list[0], F.arange(0, g.num_edges(), idtype, device) e2n = F.arange(0, g.num_edges(), idtype, device), edge_list[1] graph_data = { ("node", "n2e", "edge"): n2e, ("edge", "e2n", "node"): e2n, } levi_g = convert.heterograph(graph_data, idtype=idtype, device=device) # Copy ndata and edata # Since the node types in dgl.heterograph are in alphabetical order # ('edge' < 'node'), edge_frames should be in front of node_frames. node_frames = utils.extract_node_subframes(g, nodes_or_device=device) edge_frames = utils.extract_edge_subframes(g, edges_or_device=device) utils.set_new_frames(levi_g, node_frames=edge_frames + node_frames) return levi_g class SVDPE(BaseTransform): r"""SVD-based Positional Encoding, as introduced in `Global Self-Attention as a Replacement for Graph Convolution `__ This function computes the largest :math:`k` singular values and corresponding left and right singular vectors to form positional encodings, which could be stored in ndata. Parameters ---------- k : int Number of largest singular values and corresponding singular vectors used for positional encoding. feat_name : str, optional Name to store the computed positional encodings in ndata. Default : ``svd_pe`` padding : bool, optional If False, raise an error when :math:`k > N`, where :math:`N` is the number of nodes in :attr:`g`. If True, add zero paddings in the end of encodings when :math:`k > N`. Default : False. random_flip : bool, optional If True, randomly flip the signs of encoding vectors. Proposed to be activated during training for better generalization. Default : True. Example ------- >>> import dgl >>> from dgl import SVDPE >>> transform = SVDPE(k=2, feat_name="svd_pe") >>> g = dgl.graph(([0,1,2,3,4,2,3,1,4,0], [2,3,1,4,0,0,1,2,3,4])) >>> g_ = transform(g) >>> print(g_.ndata['svd_pe']) tensor([[-6.3246e-01, -1.1373e-07, -6.3246e-01, 0.0000e+00], [-6.3246e-01, 7.6512e-01, -6.3246e-01, -7.6512e-01], [ 6.3246e-01, 4.7287e-01, 6.3246e-01, -4.7287e-01], [-6.3246e-01, -7.6512e-01, -6.3246e-01, 7.6512e-01], [ 6.3246e-01, -4.7287e-01, 6.3246e-01, 4.7287e-01]]) """ def __init__(self, k, feat_name="svd_pe", padding=False, random_flip=True): self.k = k self.feat_name = feat_name self.padding = padding self.random_flip = random_flip def __call__(self, g): encoding = functional.svd_pe( g, k=self.k, padding=self.padding, random_flip=self.random_flip ) g.ndata[self.feat_name] = F.copy_to(encoding, g.device) return g ================================================ FILE: python/dgl/transforms/to_block.py ================================================ # Copyright (c) 2023, DGL Team # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """To block method.""" from collections import defaultdict from collections.abc import Mapping from .. import backend as F, utils from ..base import DGLError from ..heterograph import DGLBlock from .._ffi.capi import * __all__ = ["to_block"] def to_block(g, dst_nodes=None, include_dst_in_src=True, src_nodes=None): """Convert a graph into a bipartite-structured *block* for message passing. A block is a graph consisting of two sets of nodes: the *source* nodes and *destination* nodes. The source and destination nodes can have multiple node types. All the edges connect from source nodes to destination nodes. Specifically, the source nodes and destination nodes will have the same node types as the ones in the original graph. DGL maps each edge ``(u, v)`` with edge type ``(utype, etype, vtype)`` in the original graph to the edge with type ``etype`` connecting from node ID ``u`` of type ``utype`` in the source side to node ID ``v`` of type ``vtype`` in the destination side. For blocks returned by :func:`to_block`, the destination nodes of the block will only contain the nodes that have at least one inbound edge of any type. The source nodes of the block will only contain the nodes that appear in the destination nodes, as well as the nodes that have at least one outbound edge connecting to one of the destination nodes. The destination nodes are specified by the :attr:`dst_nodes` argument if it is not None. Parameters ---------- graph : DGLGraph The graph. Can be either on CPU or GPU. dst_nodes : Tensor or dict[str, Tensor], optional The list of destination nodes. If a tensor is given, the graph must have only one node type. If given, it must be a superset of all the nodes that have at least one inbound edge. An error will be raised otherwise. include_dst_in_src : bool If False, do not include destination nodes in source nodes. (Default: True) src_nodes : Tensor or disct[str, Tensor], optional The list of source nodes (and prefixed by destination nodes if `include_dst_in_src` is True). If a tensor is given, the graph must have only one node type. Returns ------- DGLBlock The new graph describing the block. The node IDs induced for each type in both sides would be stored in feature ``dgl.NID``. The edge IDs induced for each type would be stored in feature ``dgl.EID``. Raises ------ DGLError If :attr:`dst_nodes` is specified but it is not a superset of all the nodes that have at least one inbound edge. If :attr:`dst_nodes` is not None, and :attr:`g` and :attr:`dst_nodes` are not in the same context. Notes ----- :func:`to_block` is most commonly used in customizing neighborhood sampling for stochastic training on a large graph. Please refer to the user guide :ref:`guide-minibatch` for a more thorough discussion about the methodology of stochastic training. See also :func:`create_block` for more flexible construction of blocks. Examples -------- Converting a homogeneous graph to a block as described above: >>> g = dgl.graph(([1, 2], [2, 3])) >>> block = dgl.to_block(g, torch.LongTensor([3, 2])) The destination nodes would be exactly the same as the ones given: [3, 2]. >>> induced_dst = block.dstdata[dgl.NID] >>> induced_dst tensor([3, 2]) The first few source nodes would also be exactly the same as the ones given. The rest of the nodes are the ones necessary for message passing into nodes 3, 2. This means that the node 1 would be included. >>> induced_src = block.srcdata[dgl.NID] >>> induced_src tensor([3, 2, 1]) You can notice that the first two nodes are identical to the given nodes as well as the destination nodes. The induced edges can also be obtained by the following: >>> block.edata[dgl.EID] tensor([2, 1]) This indicates that edge (2, 3) and (1, 2) are included in the result graph. You can verify that the first edge in the block indeed maps to the edge (2, 3), and the second edge in the block indeed maps to the edge (1, 2): >>> src, dst = block.edges(order='eid') >>> induced_src[src], induced_dst[dst] (tensor([2, 1]), tensor([3, 2])) The destination nodes specified must be a superset of the nodes that have edges connecting to them. For example, the following will raise an error since the destination nodes does not contain node 3, which has an edge connecting to it. >>> g = dgl.graph(([1, 2], [2, 3])) >>> dgl.to_block(g, torch.LongTensor([2])) # error Converting a heterogeneous graph to a block is similar, except that when specifying the destination nodes, you have to give a dict: >>> g = dgl.heterograph({('A', '_E', 'B'): ([1, 2], [2, 3])}) If you don't specify any node of type A on the destination side, the node type ``A`` in the block would have zero nodes on the destination side. >>> block = dgl.to_block(g, {'B': torch.LongTensor([3, 2])}) >>> block.number_of_dst_nodes('A') 0 >>> block.number_of_dst_nodes('B') 2 >>> block.dstnodes['B'].data[dgl.NID] tensor([3, 2]) The source side would contain all the nodes on the destination side: >>> block.srcnodes['B'].data[dgl.NID] tensor([3, 2]) As well as all the nodes that have connections to the nodes on the destination side: >>> block.srcnodes['A'].data[dgl.NID] tensor([2, 1]) See also -------- create_block """ if dst_nodes is None: # Find all nodes that appeared as destinations dst_nodes = defaultdict(list) for etype in g.canonical_etypes: _, dst = g.edges(etype=etype) dst_nodes[etype[2]].append(dst) dst_nodes = { ntype: F.unique(F.cat(values, 0)) for ntype, values in dst_nodes.items() } elif not isinstance(dst_nodes, Mapping): # dst_nodes is a Tensor, check if the g has only one type. if len(g.ntypes) > 1: raise DGLError( "Graph has more than one node type; please specify a dict for dst_nodes." ) dst_nodes = {g.ntypes[0]: dst_nodes} dst_node_ids = [ utils.toindex(dst_nodes.get(ntype, []), g._idtype_str).tousertensor( ctx=F.to_backend_ctx(g._graph.ctx) ) for ntype in g.ntypes ] dst_node_ids_nd = [F.to_dgl_nd(nodes) for nodes in dst_node_ids] for d in dst_node_ids_nd: if g._graph.ctx != d.ctx: raise ValueError("g and dst_nodes need to have the same context.") src_node_ids = None src_node_ids_nd = None if src_nodes is not None and not isinstance(src_nodes, Mapping): # src_nodes is a Tensor, check if the g has only one type. if len(g.ntypes) > 1: raise DGLError( "Graph has more than one node type; please specify a dict for src_nodes." ) src_nodes = {g.ntypes[0]: src_nodes} src_node_ids = [ F.copy_to( F.tensor(src_nodes.get(ntype, []), dtype=g.idtype), F.to_backend_ctx(g._graph.ctx), ) for ntype in g.ntypes ] src_node_ids_nd = [F.to_dgl_nd(nodes) for nodes in src_node_ids] for d in src_node_ids_nd: if g._graph.ctx != d.ctx: raise ValueError( "g and src_nodes need to have the same context." ) else: # use an empty list to signal we need to generate it src_node_ids_nd = [] new_graph_index, src_nodes_ids_nd, induced_edges_nd = _CAPI_DGLToBlock( g._graph, dst_node_ids_nd, include_dst_in_src, src_node_ids_nd ) # The new graph duplicates the original node types to SRC and DST sets. new_ntypes = (g.ntypes, g.ntypes) new_graph = DGLBlock(new_graph_index, new_ntypes, g.etypes) assert new_graph.is_unibipartite # sanity check src_node_ids = [F.from_dgl_nd(src) for src in src_nodes_ids_nd] edge_ids = [F.from_dgl_nd(eid) for eid in induced_edges_nd] node_frames = utils.extract_node_subframes_for_block( g, src_node_ids, dst_node_ids ) edge_frames = utils.extract_edge_subframes(g, edge_ids) utils.set_new_frames( new_graph, node_frames=node_frames, edge_frames=edge_frames ) return new_graph ================================================ FILE: python/dgl/traversal.py ================================================ """Module for graph traversal methods.""" from __future__ import absolute_import from . import backend as F, utils from ._ffi.function import _init_api from .heterograph import DGLGraph __all__ = [ "bfs_nodes_generator", "bfs_edges_generator", "topological_nodes_generator", "dfs_edges_generator", "dfs_labeled_edges_generator", ] def bfs_nodes_generator(graph, source, reverse=False): """Node frontiers generator using breadth-first search. Parameters ---------- graph : DGLGraph The graph object. source : list, tensor of nodes Source nodes. reverse : bool, default False If True, traverse following the in-edge direction. Returns ------- list of node frontiers Each node frontier is a list or tensor of node ids. Examples -------- Given a graph (directed, edges from small node id to large): :: 2 - 4 / \\ 0 - 1 - 3 - 5 >>> g = dgl.graph(([0, 1, 1, 2, 2, 3], [1, 2, 3, 3, 4, 5])) >>> list(dgl.bfs_nodes_generator(g, 0)) [tensor([0]), tensor([1]), tensor([2, 3]), tensor([4, 5])] """ assert isinstance( graph, DGLGraph ), "DGLHeteroGraph is merged with DGLGraph, Please use DGLGraph" assert ( len(graph.canonical_etypes) == 1 ), "bfs_nodes_generator only support homogeneous graph" # Workaround before support for GPU graph gidx = graph._graph.copy_to(utils.to_dgl_context(F.cpu())) source = utils.toindex(source, dtype=graph._idtype_str) ret = _CAPI_DGLBFSNodes_v2(gidx, source.todgltensor(), reverse) all_nodes = utils.toindex(ret(0), dtype=graph._idtype_str).tousertensor() # TODO(minjie): how to support directly creating python list sections = utils.toindex(ret(1)).tonumpy().tolist() node_frontiers = F.split(all_nodes, sections, dim=0) return node_frontiers def bfs_edges_generator(graph, source, reverse=False): """Edges frontiers generator using breadth-first search. Parameters ---------- graph : DGLGraph The graph object. source : list, tensor of nodes Source nodes. reverse : bool, default False If True, traverse following the in-edge direction. Returns ------- list of edge frontiers Each edge frontier is a list or tensor of edge ids. Examples -------- Given a graph (directed, edges from small node id to large, sorted in lexicographical order of source-destination node id tuple): :: 2 - 4 / \\ 0 - 1 - 3 - 5 >>> g = dgl.graph(([0, 1, 1, 2, 2, 3], [1, 2, 3, 3, 4, 5])) >>> list(dgl.bfs_edges_generator(g, 0)) [tensor([0]), tensor([1, 2]), tensor([4, 5])] """ assert isinstance( graph, DGLGraph ), "DGLHeteroGraph is merged with DGLGraph, Please use DGLGraph" assert ( len(graph.canonical_etypes) == 1 ), "bfs_edges_generator only support homogeneous graph" # Workaround before support for GPU graph gidx = graph._graph.copy_to(utils.to_dgl_context(F.cpu())) source = utils.toindex(source, dtype=graph._idtype_str) ret = _CAPI_DGLBFSEdges_v2(gidx, source.todgltensor(), reverse) all_edges = utils.toindex(ret(0), dtype=graph._idtype_str).tousertensor() # TODO(minjie): how to support directly creating python list sections = utils.toindex(ret(1)).tonumpy().tolist() edge_frontiers = F.split(all_edges, sections, dim=0) return edge_frontiers def topological_nodes_generator(graph, reverse=False): """Node frontiers generator using topological traversal. Parameters ---------- graph : DGLGraph The graph object. reverse : bool, optional If True, traverse following the in-edge direction. Returns ------- list of node frontiers Each node frontier is a list or tensor of node ids. Examples -------- Given a graph (directed, edges from small node id to large): :: 2 - 4 / \\ 0 - 1 - 3 - 5 >>> g = dgl.graph(([0, 1, 1, 2, 2, 3], [1, 2, 3, 3, 4, 5])) >>> list(dgl.topological_nodes_generator(g)) [tensor([0]), tensor([1]), tensor([2]), tensor([3, 4]), tensor([5])] """ assert isinstance( graph, DGLGraph ), "DGLHeteroGraph is merged with DGLGraph, Please use DGLGraph" assert ( len(graph.canonical_etypes) == 1 ), "topological_nodes_generator only support homogeneous graph" # Workaround before support for GPU graph gidx = graph._graph.copy_to(utils.to_dgl_context(F.cpu())) ret = _CAPI_DGLTopologicalNodes_v2(gidx, reverse) all_nodes = utils.toindex(ret(0), dtype=graph._idtype_str).tousertensor() # TODO(minjie): how to support directly creating python list sections = utils.toindex(ret(1)).tonumpy().tolist() return F.split(all_nodes, sections, dim=0) def dfs_edges_generator(graph, source, reverse=False): """Edge frontiers generator using depth-first-search (DFS). Multiple source nodes can be specified to start the DFS traversal. One needs to make sure that each source node belongs to different connected component, so the frontiers can be easily merged. Otherwise, the behavior is undefined. Parameters ---------- graph : DGLGraph The graph object. source : list, tensor of nodes Source nodes. reverse : bool, optional If True, traverse following the in-edge direction. Returns ------- list of edge frontiers Each edge frontier is a list or tensor of edge ids. Examples -------- Given a graph (directed, edges from small node id to large): :: 2 - 4 / \\ 0 - 1 - 3 - 5 Edge addition order [(0, 1), (1, 2), (1, 3), (2, 3), (2, 4), (3, 5)] >>> g = dgl.graph(([0, 1, 1, 2, 2, 3], [1, 2, 3, 3, 4, 5])) >>> list(dgl.dfs_edges_generator(g, 0)) [tensor([0]), tensor([1]), tensor([3]), tensor([5]), tensor([4])] """ assert isinstance( graph, DGLGraph ), "DGLHeteroGraph is merged with DGLGraph, Please use DGLGraph" assert ( len(graph.canonical_etypes) == 1 ), "dfs_edges_generator only support homogeneous graph" # Workaround before support for GPU graph gidx = graph._graph.copy_to(utils.to_dgl_context(F.cpu())) source = utils.toindex(source, dtype=graph._idtype_str) ret = _CAPI_DGLDFSEdges_v2(gidx, source.todgltensor(), reverse) all_edges = utils.toindex(ret(0), dtype=graph._idtype_str).tousertensor() # TODO(minjie): how to support directly creating python list sections = utils.toindex(ret(1)).tonumpy().tolist() return F.split(all_edges, sections, dim=0) def dfs_labeled_edges_generator( graph, source, reverse=False, has_reverse_edge=False, has_nontree_edge=False, return_labels=True, ): """Produce edges in a depth-first-search (DFS) labeled by type. There are three labels: FORWARD(0), REVERSE(1), NONTREE(2) A FORWARD edge is one in which `u` has been visited but `v` has not. A REVERSE edge is one in which both `u` and `v` have been visited and the edge is in the DFS tree. A NONTREE edge is one in which both `u` and `v` have been visited but the edge is NOT in the DFS tree. See ``networkx``'s :func:`dfs_labeled_edges ` for more details. Multiple source nodes can be specified to start the DFS traversal. One needs to make sure that each source node belongs to different connected component, so the frontiers can be easily merged. Otherwise, the behavior is undefined. Parameters ---------- graph : DGLGraph The graph object. source : list, tensor of nodes Source nodes. reverse : bool, optional If true, traverse following the in-edge direction. has_reverse_edge : bool, optional True to include reverse edges. has_nontree_edge : bool, optional True to include nontree edges. return_labels : bool, optional True to return the labels of each edge. Returns ------- list of edge frontiers Each edge frontier is a list or tensor of edge ids. list of list of int Label of each edge, organized in the same order as the edge frontiers. Examples -------- Given a graph (directed, edges from small node id to large): :: 2 - 4 / \\ 0 - 1 - 3 - 5 Edge addition order [(0, 1), (1, 2), (1, 3), (2, 3), (2, 4), (3, 5)] >>> g = dgl.graph(([0, 1, 1, 2, 2, 3], [1, 2, 3, 3, 4, 5])) >>> list(dgl.dfs_labeled_edges_generator(g, 0, has_nontree_edge=True)) (tensor([0]), tensor([1]), tensor([3]), tensor([5]), tensor([4]), tensor([2])), (tensor([0]), tensor([0]), tensor([0]), tensor([0]), tensor([0]), tensor([2])) """ assert isinstance( graph, DGLGraph ), "DGLHeteroGraph is merged with DGLGraph, Please use DGLGraph" assert ( len(graph.canonical_etypes) == 1 ), "dfs_labeled_edges_generator only support homogeneous graph" # Workaround before support for GPU graph gidx = graph._graph.copy_to(utils.to_dgl_context(F.cpu())) source = utils.toindex(source, dtype=graph._idtype_str) ret = _CAPI_DGLDFSLabeledEdges_v2( gidx, source.todgltensor(), reverse, has_reverse_edge, has_nontree_edge, return_labels, ) all_edges = utils.toindex(ret(0), dtype=graph._idtype_str).tousertensor() # TODO(minjie): how to support directly creating python list if return_labels: all_labels = utils.toindex(ret(1)).tousertensor() sections = utils.toindex(ret(2)).tonumpy().tolist() return ( F.split(all_edges, sections, dim=0), F.split(all_labels, sections, dim=0), ) else: sections = utils.toindex(ret(1)).tonumpy().tolist() return F.split(all_edges, sections, dim=0) _init_api("dgl.traversal") ================================================ FILE: python/dgl/udf.py ================================================ """User-defined function related data structures.""" from __future__ import absolute_import class EdgeBatch(object): """The class that can represent a batch of edges. Parameters ---------- graph : DGLGraph Graph object. eid : Tensor Edge IDs. etype : (str, str, str) Edge type. src_data : dict[str, Tensor] Src node features. edge_data : dict[str, Tensor] Edge features. dst_data : dict[str, Tensor] Dst node features. """ def __init__(self, graph, eid, etype, src_data, edge_data, dst_data): self._graph = graph self._eid = eid self._etype = etype self._src_data = src_data self._edge_data = edge_data self._dst_data = dst_data @property def src(self): """Return a view of the source node features for the edges in the batch. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch >>> # Instantiate a graph and set a node feature 'h'. >>> g = dgl.graph((torch.tensor([0, 1, 1]), torch.tensor([1, 1, 0]))) >>> g.ndata['h'] = torch.ones(2, 1) >>> # Define a UDF that retrieves the source node features for edges. >>> def edge_udf(edges): >>> # edges.src['h'] is a tensor of shape (E, 1), >>> # where E is the number of edges in the batch. >>> return {'src': edges.src['h']} >>> # Copy features from source nodes to edges. >>> g.apply_edges(edge_udf) >>> g.edata['src'] tensor([[1.], [1.], [1.]]) >>> # Use edge UDF in message passing, which is equivalent to >>> # dgl.function.copy_u. >>> import dgl.function as fn >>> g.update_all(edge_udf, fn.sum('src', 'h')) >>> g.ndata['h'] tensor([[1.], [2.]]) """ return self._src_data @property def dst(self): """Return a view of the destination node features for the edges in the batch. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch >>> # Instantiate a graph and set a node feature 'h'. >>> g = dgl.graph((torch.tensor([0, 1, 1]), torch.tensor([1, 1, 0]))) >>> g.ndata['h'] = torch.tensor([[0.], [1.]]) >>> # Define a UDF that retrieves the destination node features for >>> # edges. >>> def edge_udf(edges): >>> # edges.dst['h'] is a tensor of shape (E, 1), >>> # where E is the number of edges in the batch. >>> return {'dst': edges.dst['h']} >>> # Copy features from destination nodes to edges. >>> g.apply_edges(edge_udf) >>> g.edata['dst'] tensor([[1.], [1.], [1.]]) >>> # Use edge UDF in message passing. >>> import dgl.function as fn >>> g.update_all(edge_udf, fn.sum('dst', 'h')) >>> g.ndata['h'] tensor([[0.], [2.]]) """ return self._dst_data @property def data(self): """Return a view of the edge features for the edges in the batch. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch >>> # Instantiate a graph and set an edge feature 'h'. >>> g = dgl.graph((torch.tensor([0, 1, 1]), torch.tensor([1, 1, 0]))) >>> g.edata['h'] = torch.tensor([[1.], [1.], [1.]]) >>> # Define a UDF that retrieves the feature 'h' for all edges. >>> def edge_udf(edges): >>> # edges.data['h'] is a tensor of shape (E, 1), >>> # where E is the number of edges in the batch. >>> return {'data': edges.data['h']} >>> # Make a copy of the feature with name 'data'. >>> g.apply_edges(edge_udf) >>> g.edata['data'] tensor([[1.], [1.], [1.]]) >>> # Use edge UDF in message passing, which is equivalent to >>> # dgl.function.copy_e. >>> import dgl.function as fn >>> g.update_all(edge_udf, fn.sum('data', 'h')) >>> g.ndata['h'] tensor([[1.], [2.]]) """ return self._edge_data def edges(self): """Return the edges in the batch. Returns ------- (U, V, EID) : (Tensor, Tensor, Tensor) The edges in the batch. For each :math:`i`, :math:`(U[i], V[i])` is an edge from :math:`U[i]` to :math:`V[i]` with ID :math:`EID[i]`. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch >>> # Instantiate a graph. >>> g = dgl.graph((torch.tensor([0, 1, 1]), torch.tensor([1, 1, 0]))) >>> # Define a UDF that retrieves and concatenates the end nodes of the >>> # edges. >>> def edge_udf(edges): >>> src, dst, _ = edges.edges() >>> return {'uv': torch.stack([src, dst], dim=1).float()} >>> # Create a feature 'uv' with the end nodes of the edges. >>> g.apply_edges(edge_udf) >>> g.edata['uv'] tensor([[0., 1.], [1., 1.], [1., 0.]]) >>> # Use edge UDF in message passing. >>> import dgl.function as fn >>> g.update_all(edge_udf, fn.sum('uv', 'h')) >>> g.ndata['h'] tensor([[1., 0.], [1., 2.]]) """ u, v = self._graph.find_edges(self._eid, etype=self.canonical_etype) return u, v, self._eid def batch_size(self): """Return the number of edges in the batch. Returns ------- int Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch >>> # Instantiate a graph. >>> g = dgl.graph((torch.tensor([0, 1, 1]), torch.tensor([1, 1, 0]))) >>> # Define a UDF that returns one for each edge. >>> def edge_udf(edges): >>> return {'h': torch.ones(edges.batch_size(), 1)} >>> # Creates a feature 'h'. >>> g.apply_edges(edge_udf) >>> g.edata['h'] tensor([[1.], [1.], [1.]]) >>> # Use edge UDF in message passing. >>> import dgl.function as fn >>> g.update_all(edge_udf, fn.sum('h', 'h')) >>> g.ndata['h'] tensor([[1.], [2.]]) """ return len(self._eid) def __len__(self): """Return the number of edges in this edge batch. Returns ------- int """ return self.batch_size() @property def canonical_etype(self): """Return the canonical edge type (i.e. triplet of source, edge, and destination node type) for this edge batch.""" return self._etype class NodeBatch(object): """The class to represent a batch of nodes. Parameters ---------- graph : DGLGraph Graph object. nodes : Tensor Node ids. ntype : str, optional The node type of this node batch, data : dict[str, Tensor] Node feature data. msgs : dict[str, Tensor], optional Messages data. """ def __init__(self, graph, nodes, ntype, data, msgs=None): self._graph = graph self._nodes = nodes self._ntype = ntype self._data = data self._msgs = msgs @property def data(self): """Return a view of the node features for the nodes in the batch. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch >>> # Instantiate a graph and set a feature 'h'. >>> g = dgl.graph((torch.tensor([0, 1, 1]), torch.tensor([1, 1, 0]))) >>> g.ndata['h'] = torch.ones(2, 1) >>> # Define a UDF that computes the sum of the messages received and >>> # the original feature for each node. >>> def node_udf(nodes): >>> # nodes.data['h'] is a tensor of shape (N, 1), >>> # nodes.mailbox['m'] is a tensor of shape (N, D, 1), >>> # where N is the number of nodes in the batch, D is the number >>> # of messages received per node for this node batch. >>> return {'h': nodes.data['h'] + nodes.mailbox['m'].sum(1)} >>> # Use node UDF in message passing. >>> import dgl.function as fn >>> g.update_all(fn.copy_u('h', 'm'), node_udf) >>> g.ndata['h'] tensor([[2.], [3.]]) """ return self._data @property def mailbox(self): """Return a view of the messages received. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch >>> # Instantiate a graph and set a feature 'h'. >>> g = dgl.graph((torch.tensor([0, 1, 1]), torch.tensor([1, 1, 0]))) >>> g.ndata['h'] = torch.ones(2, 1) >>> # Define a UDF that computes the sum of the messages received and >>> # the original feature for each node. >>> def node_udf(nodes): >>> # nodes.data['h'] is a tensor of shape (N, 1), >>> # nodes.mailbox['m'] is a tensor of shape (N, D, 1), >>> # where N is the number of nodes in the batch, D is the number >>> # of messages received per node for this node batch. >>> return {'h': nodes.data['h'] + nodes.mailbox['m'].sum(1)} >>> # Use node UDF in message passing. >>> import dgl.function as fn >>> g.update_all(fn.copy_u('h', 'm'), node_udf) >>> g.ndata['h'] tensor([[2.], [3.]]) """ return self._msgs def nodes(self): """Return the nodes in the batch. Returns ------- NID : Tensor The IDs of the nodes in the batch. :math:`NID[i]` gives the ID of the i-th node. Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch >>> # Instantiate a graph and set a feature 'h'. >>> g = dgl.graph((torch.tensor([0, 1, 1]), torch.tensor([1, 1, 0]))) >>> g.ndata['h'] = torch.ones(2, 1) >>> # Define a UDF that computes the sum of the messages received and >>> # the original ID for each node. >>> def node_udf(nodes): >>> # nodes.nodes() is a tensor of shape (N), >>> # nodes.mailbox['m'] is a tensor of shape (N, D, 1), >>> # where N is the number of nodes in the batch, D is the number >>> # of messages received per node for this node batch. >>> return {'h': nodes.nodes().unsqueeze(-1).float() >>> + nodes.mailbox['m'].sum(1)} >>> # Use node UDF in message passing. >>> import dgl.function as fn >>> g.update_all(fn.copy_u('h', 'm'), node_udf) >>> g.ndata['h'] tensor([[1.], [3.]]) """ return self._nodes def batch_size(self): """Return the number of nodes in the batch. Returns ------- int Examples -------- The following example uses PyTorch backend. >>> import dgl >>> import torch >>> # Instantiate a graph. >>> g = dgl.graph((torch.tensor([0, 1, 1]), torch.tensor([1, 1, 0]))) >>> g.ndata['h'] = torch.ones(2, 1) >>> # Define a UDF that computes the sum of the messages received for >>> # each node and increments the result by 1. >>> def node_udf(nodes): >>> return {'h': torch.ones(nodes.batch_size(), 1) >>> + nodes.mailbox['m'].sum(1)} >>> # Use node UDF in message passing. >>> import dgl.function as fn >>> g.update_all(fn.copy_u('h', 'm'), node_udf) >>> g.ndata['h'] tensor([[2.], [3.]]) """ return len(self._nodes) def __len__(self): """Return the number of nodes in this node batch. Returns ------- int """ return self.batch_size() @property def ntype(self): """Return the node type of this node batch, if available.""" return self._ntype ================================================ FILE: python/dgl/utils/__init__.py ================================================ """Internal utilities.""" from .checks import * from .data import * from .exception import * from .filter import * from .internal import * from .pin_memory import * from .shared_mem import * try: from packaging import version except ImportError: # If packaging isn't installed, try and use the vendored copy in setuptools from setuptools.extern.packaging import version ================================================ FILE: python/dgl/utils/checks.py ================================================ """Checking and logging utilities.""" # pylint: disable=invalid-name from __future__ import absolute_import, division from collections.abc import Mapping from .. import backend as F from .._ffi.function import _init_api from ..base import DGLError def prepare_tensor(g, data, name): """Convert the data to ID tensor and check its ID type and context. If the data is already in tensor type, raise error if its ID type and context does not match the graph's. Otherwise, convert it to tensor type of the graph's ID type and ctx and return. Parameters ---------- g : DGLGraph Graph. data : int, iterable of int, tensor Data. name : str Name of the data. Returns ------- Tensor Data in tensor object. """ if F.is_tensor(data): if F.dtype(data) != g.idtype: raise DGLError( f'Expect argument "{name}" to have data type {g.idtype}. ' f"But got {F.dtype(data)}." ) if F.context(data) != g.device and not g.is_pinned(): raise DGLError( f'Expect argument "{name}" to have device {g.device}. ' f"But got {F.context(data)}." ) ret = data else: data = F.tensor(data) if not ( F.ndim(data) > 0 and F.shape(data)[0] == 0 ) and F.dtype( # empty tensor data ) not in ( F.int32, F.int64, ): raise DGLError( 'Expect argument "{}" to have data type int32 or int64,' " but got {}.".format(name, F.dtype(data)) ) ret = F.copy_to(F.astype(data, g.idtype), g.device) if F.ndim(ret) == 0: ret = F.unsqueeze(ret, 0) if F.ndim(ret) > 1: raise DGLError( 'Expect a 1-D tensor for argument "{}". But got {}.'.format( name, ret ) ) return ret def prepare_tensor_dict(g, data, name): """Convert a dictionary of data to a dictionary of ID tensors. Calls ``prepare_tensor`` on each key-value pair. Parameters ---------- g : DGLGraph Graph. data : dict[str, (int, iterable of int, tensor)] Data dict. name : str Name of the data. Returns ------- dict[str, tensor] """ return { key: prepare_tensor(g, val, '{}["{}"]'.format(name, key)) for key, val in data.items() } def prepare_tensor_or_dict(g, data, name): """Convert data to either a tensor or a dictionary depending on input type. Parameters ---------- g : DGLGraph Graph. data : dict[str, (int, iterable of int, tensor)] Data dict. name : str Name of the data. Returns ------- tensor or dict[str, tensor] """ return ( prepare_tensor_dict(g, data, name) if isinstance(data, Mapping) else prepare_tensor(g, data, name) ) def parse_edges_arg_to_eid(g, edges, etid, argname="edges"): """Parse the :attr:`edges` argument and return an edge ID tensor. The resulting edge ID tensor has the same ID type and device of :attr:`g`. Parameters ---------- g : DGLGraph Graph edges : pair of Tensor, Tensor, iterable[int] Argument for specifying edges. etid : int Edge type ID. argname : str, optional Argument name. Returns ------- Tensor Edge ID tensor """ if isinstance(edges, tuple): u, v = edges u = prepare_tensor(g, u, "{}[0]".format(argname)) v = prepare_tensor(g, v, "{}[1]".format(argname)) eid = g.edge_ids(u, v, etype=g.canonical_etypes[etid]) else: eid = prepare_tensor(g, edges, argname) return eid def check_all_same_idtype(glist, name): """Check all the graphs have the same idtype.""" if len(glist) == 0: return idtype = glist[0].idtype for i, g in enumerate(glist): if g.idtype != idtype: raise DGLError( "Expect {}[{}] to have {} type ID, but got {}.".format( name, i, idtype, g.idtype ) ) def check_device(data, device): """Check if data is on the target device. Parameters ---------- data : Tensor or dict[str, Tensor] device: Backend device. Returns ------- Bool: True if the data is on the target device. """ if isinstance(data, dict): for v in data.values(): if v.device != device: return False elif data.device != device: return False return True def check_all_same_device(glist, name): """Check all the graphs have the same device.""" if len(glist) == 0: return device = glist[0].device for i, g in enumerate(glist): if g.device != device: raise DGLError( "Expect {}[{}] to be on device {}, but got {}.".format( name, i, device, g.device ) ) def check_all_same_schema(schemas, name): """Check the list of schemas are the same.""" if len(schemas) == 0: return for i, schema in enumerate(schemas): if schema != schemas[0]: raise DGLError( "Expect all graphs to have the same schema on {}, " "but graph {} got\n\t{}\nwhich is different from\n\t{}.".format( name, i, schema, schemas[0] ) ) def check_all_same_schema_for_keys(schemas, keys, name): """Check the list of schemas are the same on the given keys.""" if len(schemas) == 0: return head = None keys = set(keys) for i, schema in enumerate(schemas): if not keys.issubset(schema.keys()): raise DGLError( "Expect all graphs to have keys {} on {}, " "but graph {} got keys {}.".format(keys, name, i, schema.keys()) ) if head is None: head = {k: schema[k] for k in keys} else: target = {k: schema[k] for k in keys} if target != head: raise DGLError( "Expect all graphs to have the same schema for keys {} on {}, " "but graph {} got \n\t{}\n which is different from\n\t{}.".format( keys, name, i, target, head ) ) def check_valid_idtype(idtype): """Check whether the value of the idtype argument is valid (int32/int64) Parameters ---------- idtype : data type The framework object of a data type. """ if idtype not in [None, F.int32, F.int64]: raise DGLError( "Expect idtype to be a framework object of int32/int64, " "got {}".format(idtype) ) def is_sorted_srcdst(src, dst, num_src=None, num_dst=None): """Checks whether an edge list is in ascending src-major order (e.g., first sorted by ``src`` and then by ``dst``). Parameters ---------- src : IdArray The tensor of source nodes for each edge. dst : IdArray The tensor of destination nodes for each edge. num_src : int, optional The number of source nodes. num_dst : int, optional The number of destination nodes. Returns ------- bool, bool Whether ``src`` is in ascending order, and whether ``dst`` is in ascending order with respect to ``src``. """ # for some versions of MXNET and TensorFlow, num_src and num_dst get # incorrectly marked as floats, so force them as integers here if num_src is None: num_src = int(F.as_scalar(F.max(src, dim=0) + 1)) if num_dst is None: num_dst = int(F.as_scalar(F.max(dst, dim=0) + 1)) src = F.zerocopy_to_dgl_ndarray(src) dst = F.zerocopy_to_dgl_ndarray(dst) sorted_status = _CAPI_DGLCOOIsSorted(src, dst, num_src, num_dst) row_sorted = sorted_status > 0 col_sorted = sorted_status > 1 return row_sorted, col_sorted _init_api("dgl.utils.checks") ================================================ FILE: python/dgl/utils/data.py ================================================ """Data utilities.""" from collections import namedtuple import networkx as nx import scipy as sp from .. import backend as F from ..base import DGLError from . import checks def elist2tensor(elist, idtype): """Function to convert an edge list to edge tensors. Parameters ---------- elist : iterable of int pairs List of (src, dst) node ID pairs. idtype : int32, int64, optional Integer ID type. Must be int32 or int64. Returns ------- (Tensor, Tensor) Edge tensors. """ if len(elist) == 0: u, v = [], [] else: u, v = zip(*elist) u = list(u) v = list(v) return F.tensor(u, idtype), F.tensor(v, idtype) def scipy2tensor(spmat, idtype): """Function to convert a scipy matrix to a sparse adjacency matrix tuple. Note that the data array of the scipy matrix is discarded. Parameters ---------- spmat : scipy.sparse.spmatrix SciPy sparse matrix. idtype : int32, int64, optional Integer ID type. Must be int32 or int64. Returns ------- (str, tuple[Tensor]) A tuple containing the format as well as the list of tensors representing the sparse matrix. """ if spmat.format in ["csr", "csc"]: indptr = F.tensor(spmat.indptr, idtype) indices = F.tensor(spmat.indices, idtype) data = F.tensor([], idtype) return SparseAdjTuple(spmat.format, (indptr, indices, data)) else: spmat = spmat.tocoo() row = F.tensor(spmat.row, idtype) col = F.tensor(spmat.col, idtype) return SparseAdjTuple("coo", (row, col)) def networkx2tensor(nx_graph, idtype, edge_id_attr_name=None): """Function to convert a networkx graph to edge tensors. Parameters ---------- nx_graph : nx.Graph NetworkX graph. idtype : int32, int64, optional Integer ID type. Must be int32 or int64. edge_id_attr_name : str, optional Key name for edge ids in the NetworkX graph. If not found, we will consider the graph not to have pre-specified edge ids. (Default: None) Returns ------- (Tensor, Tensor) Edge tensors. """ if not nx_graph.is_directed(): nx_graph = nx_graph.to_directed() # Relabel nodes using consecutive integers nx_graph = nx.convert_node_labels_to_integers(nx_graph, ordering="sorted") has_edge_id = edge_id_attr_name is not None if has_edge_id: num_edges = nx_graph.number_of_edges() src = [0] * num_edges dst = [0] * num_edges for u, v, attr in nx_graph.edges(data=True): eid = int(attr[edge_id_attr_name]) if eid < 0 or eid >= nx_graph.number_of_edges(): raise DGLError( "Expect edge IDs to be a non-negative integer smaller than {:d}, " "got {:d}".format(num_edges, eid) ) src[eid] = u dst[eid] = v else: src = [] dst = [] for e in nx_graph.edges: src.append(e[0]) dst.append(e[1]) src = F.tensor(src, idtype) dst = F.tensor(dst, idtype) return src, dst SparseAdjTuple = namedtuple("SparseAdjTuple", ["format", "arrays"]) def graphdata2tensors( data, idtype=None, bipartite=False, infer_node_count=True, **kwargs ): """Function to convert various types of data to edge tensors and infer the number of nodes. Parameters ---------- data : graph data Various kinds of graph data. Possible data types are: - ``(row, col)`` - ``('coo', (row, col))`` - ``('csr', (indptr, indices, edge_ids))`` - ``('csc', (indptr, indices, edge_ids))`` - SciPy sparse matrix - NetworkX graph idtype : int32, int64, optional Integer ID type. If None, try infer from the data and if fail use int64. bipartite : bool, optional Whether infer number of nodes of a bipartite graph -- num_src and num_dst can be different. infer_node_count : bool, optional Whether infer number of nodes at all. If False, num_src and num_dst are returned as None. kwargs - edge_id_attr_name : The name (str) of the edge attribute that stores the edge IDs in the NetworkX graph. - top_map : The dictionary mapping the original IDs of the source nodes to the new ones. - bottom_map : The dictionary mapping the original IDs of the destination nodes to the new ones. Returns ------- data : SparseAdjTuple A tuple with the sparse matrix format and the adjacency matrix tensors. num_src : int Number of source nodes. num_dst : int Number of destination nodes. """ # Convert tuple to SparseAdjTuple if isinstance(data, tuple): if not isinstance(data[0], str): # (row, col) format, convert to ('coo', (row, col)) data = ("coo", data) data = SparseAdjTuple(*data) if idtype is None and not ( isinstance(data, SparseAdjTuple) and F.is_tensor(data.arrays[0]) ): # preferred default idtype is int64 # if data is tensor and idtype is None, infer the idtype from tensor idtype = F.int64 checks.check_valid_idtype(idtype) if isinstance(data, SparseAdjTuple) and ( not all(F.is_tensor(a) for a in data.arrays) ): # (Iterable, Iterable) type data, convert it to (Tensor, Tensor) if len(data.arrays[0]) == 0: # force idtype for empty list data = SparseAdjTuple( data.format, tuple(F.tensor(a, idtype) for a in data.arrays) ) else: # convert the iterable to tensor and keep its native data type so we can check # its validity later data = SparseAdjTuple( data.format, tuple(F.tensor(a) for a in data.arrays) ) num_src, num_dst = None, None if isinstance(data, SparseAdjTuple): if idtype is not None: data = SparseAdjTuple( data.format, tuple(F.astype(a, idtype) for a in data.arrays) ) if infer_node_count: num_src, num_dst = infer_num_nodes(data, bipartite=bipartite) elif isinstance(data, list): src, dst = elist2tensor(data, idtype) data = SparseAdjTuple("coo", (src, dst)) if infer_node_count: num_src, num_dst = infer_num_nodes(data, bipartite=bipartite) elif isinstance(data, sp.sparse.spmatrix): # We can get scipy matrix's number of rows and columns easily. if infer_node_count: num_src, num_dst = infer_num_nodes(data, bipartite=bipartite) data = scipy2tensor(data, idtype) elif isinstance(data, nx.Graph): # We can get networkx graph's number of sources and destinations easily. if infer_node_count: num_src, num_dst = infer_num_nodes(data, bipartite=bipartite) edge_id_attr_name = kwargs.get("edge_id_attr_name", None) if bipartite: top_map = kwargs.get("top_map") bottom_map = kwargs.get("bottom_map") src, dst = networkxbipartite2tensors( data, idtype, top_map=top_map, bottom_map=bottom_map, edge_id_attr_name=edge_id_attr_name, ) else: src, dst = networkx2tensor( data, idtype, edge_id_attr_name=edge_id_attr_name ) data = SparseAdjTuple("coo", (src, dst)) else: raise DGLError("Unsupported graph data type:", type(data)) return data, num_src, num_dst def networkxbipartite2tensors( nx_graph, idtype, top_map, bottom_map, edge_id_attr_name=None ): """Function to convert a networkx bipartite to edge tensors. Parameters ---------- nx_graph : nx.Graph NetworkX graph. It must follow the bipartite graph convention of networkx. Each node has an attribute ``bipartite`` with values 0 and 1 indicating which set it belongs to. top_map : dict The dictionary mapping the original node labels to the node IDs for the source type. bottom_map : dict The dictionary mapping the original node labels to the node IDs for the destination type. idtype : int32, int64, optional Integer ID type. Must be int32 or int64. edge_id_attr_name : str, optional Key name for edge ids in the NetworkX graph. If not found, we will consider the graph not to have pre-specified edge ids. (Default: None) Returns ------- (Tensor, Tensor) Edge tensors. """ has_edge_id = edge_id_attr_name is not None if has_edge_id: num_edges = nx_graph.number_of_edges() src = [0] * num_edges dst = [0] * num_edges for u, v, attr in nx_graph.edges(data=True): if u not in top_map: raise DGLError( "Expect the node {} to have attribute bipartite=0 " "with edge {}".format(u, (u, v)) ) if v not in bottom_map: raise DGLError( "Expect the node {} to have attribute bipartite=1 " "with edge {}".format(v, (u, v)) ) eid = int(attr[edge_id_attr_name]) if eid < 0 or eid >= nx_graph.number_of_edges(): raise DGLError( "Expect edge IDs to be a non-negative integer smaller than {:d}, " "got {:d}".format(num_edges, eid) ) src[eid] = top_map[u] dst[eid] = bottom_map[v] else: src = [] dst = [] for e in nx_graph.edges: u, v = e[0], e[1] if u not in top_map: raise DGLError( "Expect the node {} to have attribute bipartite=0 " "with edge {}".format(u, (u, v)) ) if v not in bottom_map: raise DGLError( "Expect the node {} to have attribute bipartite=1 " "with edge {}".format(v, (u, v)) ) src.append(top_map[u]) dst.append(bottom_map[v]) src = F.tensor(src, dtype=idtype) dst = F.tensor(dst, dtype=idtype) return src, dst def infer_num_nodes(data, bipartite=False): """Function for inferring the number of nodes. Parameters ---------- data : graph data Supported types are: * SparseTuple ``(sparse_fmt, arrays)`` where ``arrays`` can be either ``(src, dst)`` or ``(indptr, indices, data)``. * SciPy matrix. * NetworkX graph. bipartite : bool, optional Whether infer number of nodes of a bipartite graph -- num_src and num_dst can be different. Returns ------- num_src : int Number of source nodes. num_dst : int Number of destination nodes. or None If the inference failed. """ if isinstance(data, tuple) and len(data) == 2: if not isinstance(data[0], str): raise TypeError( "Expected sparse format as a str, but got %s" % type(data[0]) ) if data[0] == "coo": # ('coo', (src, dst)) format u, v = data[1] nsrc = F.as_scalar(F.max(u, dim=0)) + 1 if len(u) > 0 else 0 ndst = F.as_scalar(F.max(v, dim=0)) + 1 if len(v) > 0 else 0 elif data[0] == "csr": # ('csr', (indptr, indices, eids)) format indptr, indices, _ = data[1] nsrc = F.shape(indptr)[0] - 1 ndst = ( F.as_scalar(F.max(indices, dim=0)) + 1 if len(indices) > 0 else 0 ) elif data[0] == "csc": # ('csc', (indptr, indices, eids)) format indptr, indices, _ = data[1] ndst = F.shape(indptr)[0] - 1 nsrc = ( F.as_scalar(F.max(indices, dim=0)) + 1 if len(indices) > 0 else 0 ) else: raise ValueError("unknown format %s" % data[0]) elif isinstance(data, sp.sparse.spmatrix): nsrc, ndst = data.shape[0], data.shape[1] elif isinstance(data, nx.Graph): if data.number_of_nodes() == 0: nsrc = ndst = 0 elif not bipartite: nsrc = ndst = data.number_of_nodes() else: nsrc = len( {n for n, d in data.nodes(data=True) if d["bipartite"] == 0} ) ndst = data.number_of_nodes() - nsrc else: return None if not bipartite: nsrc = ndst = max(nsrc, ndst) return nsrc, ndst def to_device(data, device): """Transfer the tensor or dictionary of tensors to the given device. Nothing will happen if the device of the original tensor is the same as target device. Parameters ---------- data : Tensor or dict[str, Tensor] The data. device : device The target device. Returns ------- Tensor or dict[str, Tensor] The output data. """ if isinstance(data, dict): return {k: F.copy_to(v, device) for k, v in data.items()} else: return F.copy_to(data, device) ================================================ FILE: python/dgl/utils/exception.py ================================================ """Exception wrapper classes to properly display exceptions under multithreading or multiprocessing. """ import sys import traceback # The following code is borrowed from PyTorch. Basically when a subprocess or thread # throws an exception, you will need to wrap the exception with ExceptionWrapper class # and put it in the queue you are normally retrieving from. # NOTE [ Python Traceback Reference Cycle Problem ] # # When using sys.exc_info(), it is important to **not** store the exc_info[2], # which is the traceback, because otherwise you will run into the traceback # reference cycle problem, i.e., the traceback holding reference to the frame, # and the frame (which holds reference to all the object in its temporary scope) # holding reference the traceback. class KeyErrorMessage(str): r"""str subclass that returns itself in repr""" def __repr__(self): # pylint: disable=invalid-repr-returned return self class ExceptionWrapper(object): r"""Wraps an exception plus traceback to communicate across threads""" def __init__(self, exc_info=None, where="in background"): # It is important that we don't store exc_info, see # NOTE [ Python Traceback Reference Cycle Problem ] if exc_info is None: exc_info = sys.exc_info() self.exc_type = exc_info[0] self.exc_msg = "".join(traceback.format_exception(*exc_info)) self.where = where def reraise(self): r"""Reraises the wrapped exception in the current thread""" # Format a message such as: "Caught ValueError in DataLoader worker # process 2. Original Traceback:", followed by the traceback. msg = "Caught {} {}.\nOriginal {}".format( self.exc_type.__name__, self.where, self.exc_msg ) if self.exc_type == KeyError: # KeyError calls repr() on its argument (usually a dict key). This # makes stack traces unreadable. It will not be changed in Python # (https://bugs.python.org/issue2651), so we work around it. msg = KeyErrorMessage(msg) elif getattr(self.exc_type, "message", None): # Some exceptions have first argument as non-str but explicitly # have message field raise self.exc_type(message=msg) try: exception = self.exc_type(msg) except TypeError: # If the exception takes multiple arguments, don't try to # instantiate since we don't know how to raise RuntimeError(msg) from None raise exception ================================================ FILE: python/dgl/utils/filter.py ================================================ """Utilities for finding overlap or missing items in arrays.""" from .. import backend as F from .._ffi.function import _init_api class Filter(object): """Class used to either find the subset of IDs that are in this filter, or the subset of IDs that are not in this filter given a second set of IDs. Examples -------- >>> import torch as th >>> from dgl.utils import Filter >>> f = Filter(th.tensor([3,2,9], device=th.device('cuda'))) >>> f.find_included_indices(th.tensor([0,2,8,9], device=th.device('cuda'))) tensor([1,3]) >>> f.find_excluded_indices(th.tensor([0,2,8,9], device=th.device('cuda'))) tensor([0,2], device='cuda') """ def __init__(self, ids): """Create a new filter from a given set of IDs. This currently is only implemented for the GPU. Parameters ---------- ids : IdArray The unique set of IDs to keep in the filter. """ self._filter = _CAPI_DGLFilterCreateFromSet( F.zerocopy_to_dgl_ndarray(ids) ) def find_included_indices(self, test): """Find the index of the IDs in `test` that are in this filter. Parameters ---------- test : IdArray The set of IDs to to test with. Returns ------- IdArray The index of IDs in `test` that are also in this filter. """ return F.zerocopy_from_dgl_ndarray( _CAPI_DGLFilterFindIncludedIndices( self._filter, F.zerocopy_to_dgl_ndarray(test) ) ) def find_excluded_indices(self, test): """Find the index of the IDs in `test` that are not in this filter. Parameters ---------- test : IdArray The set of IDs to to test with. Returns ------- IdArray The index of IDs in `test` that are not in this filter. """ return F.zerocopy_from_dgl_ndarray( _CAPI_DGLFilterFindExcludedIndices( self._filter, F.zerocopy_to_dgl_ndarray(test) ) ) _init_api("dgl.utils.filter") ================================================ FILE: python/dgl/utils/internal.py ================================================ """Internal utilities.""" from __future__ import absolute_import, division import glob import os from collections import defaultdict from collections.abc import Iterable, Mapping, Sequence from functools import wraps import numpy as np from .. import backend as F, ndarray as nd from .._ffi.function import _init_api from ..base import dgl_warning, DGLError, EID, NID def is_listlike(data): """Return if the data is a sequence but not a string.""" return isinstance(data, Sequence) and not isinstance(data, str) class InconsistentDtypeException(DGLError): """Exception class for inconsistent dtype between graph and tensor""" def __init__(self, msg="", *args, **kwargs): # pylint: disable=W1113 prefix_message = "DGL now requires the input tensor to have\ the same dtype as the graph index's dtype(which you can get by g.idype). " super().__init__(prefix_message + msg, *args, **kwargs) class Index(object): """Index class that can be easily converted to list/tensor.""" def __init__(self, data, dtype="int64"): assert dtype in ["int32", "int64"] self.dtype = dtype self._initialize_data(data) def _initialize_data(self, data): self._pydata = None # a numpy type data self._user_tensor_data = dict() # dictionary of user tensors self._dgl_tensor_data = None # a dgl ndarray self._slice_data = None # a slice type data self._dispatch(data) def __iter__(self): for i in self.tonumpy(): yield int(i) def __len__(self): if self._slice_data is not None: slc = self._slice_data return slc.stop - slc.start elif self._pydata is not None: return len(self._pydata) elif len(self._user_tensor_data) > 0: data = next(iter(self._user_tensor_data.values())) return len(data) else: return len(self._dgl_tensor_data) def __getitem__(self, i): return int(self.tonumpy()[i]) def _dispatch(self, data): """Store data based on its type.""" if F.is_tensor(data): if F.dtype(data) != F.data_type_dict[self.dtype]: raise InconsistentDtypeException( "Index data specified as %s, but got: %s" % (self.dtype, F.reverse_data_type_dict[F.dtype(data)]) ) if len(F.shape(data)) > 1: raise InconsistentDtypeException( "Index data must be 1D int32/int64 vector,\ but got shape: %s" % str(F.shape(data)) ) if len(F.shape(data)) == 0: # a tensor of one int self._dispatch(int(data)) else: self._user_tensor_data[F.context(data)] = data elif isinstance(data, nd.NDArray): if not (data.dtype == self.dtype and len(data.shape) == 1): raise InconsistentDtypeException( "Index data must be 1D %s vector, but got: %s" % (self.dtype, data.dtype) ) self._dgl_tensor_data = data elif isinstance(data, slice): # save it in the _pydata temporarily; materialize it if `tonumpy` is called assert ( data.step == 1 or data.step is None ), "step for slice type must be 1" self._slice_data = slice(data.start, data.stop) else: try: data = np.asarray(data, dtype=self.dtype) except Exception: # pylint: disable=broad-except raise DGLError("Error index data: %s" % str(data)) if data.ndim == 0: # scalar array data = np.expand_dims(data, 0) elif data.ndim != 1: raise DGLError( "Index data must be 1D int64 vector," " but got: %s" % str(data) ) self._pydata = data self._user_tensor_data[F.cpu()] = F.zerocopy_from_numpy( self._pydata ) def tonumpy(self): """Convert to a numpy ndarray.""" if self._pydata is None: if self._slice_data is not None: slc = self._slice_data self._pydata = np.arange(slc.start, slc.stop).astype(self.dtype) elif self._dgl_tensor_data is not None: self._pydata = self._dgl_tensor_data.asnumpy() else: data = self.tousertensor() self._pydata = F.zerocopy_to_numpy(data) return self._pydata def tousertensor(self, ctx=None): """Convert to user tensor (defined in `backend`).""" if ctx is None: ctx = F.cpu() if len(self._user_tensor_data) == 0: if self._dgl_tensor_data is not None: # zero copy from dgl tensor dlpack = self._dgl_tensor_data.to_dlpack() self._user_tensor_data[F.cpu()] = F.zerocopy_from_dlpack(dlpack) else: # zero copy from numpy array self._user_tensor_data[F.cpu()] = F.zerocopy_from_numpy( self.tonumpy() ) if ctx not in self._user_tensor_data: # copy from cpu to another device data = next(iter(self._user_tensor_data.values())) self._user_tensor_data[ctx] = F.copy_to(data, ctx) return self._user_tensor_data[ctx] def todgltensor(self): """Convert to dgl.NDArray.""" if self._dgl_tensor_data is None: # zero copy from user tensor tsor = self.tousertensor() dlpack = F.zerocopy_to_dlpack(tsor) self._dgl_tensor_data = nd.from_dlpack(dlpack) return self._dgl_tensor_data def slice_data(self): """Return the internal slice data. If this index is not initialized from slice, the return will be None. """ return self._slice_data def is_slice(self, start, stop): """Check if Index wraps a slice data with given start and stop""" return self._slice_data == slice(start, stop) def __getstate__(self): if self._slice_data is not None: # the index can be represented by a slice return self._slice_data, self.dtype else: return self.tousertensor(), self.dtype def __setstate__(self, state): # Pickle compatibility check # TODO: we should store a storage version number in later releases. if isinstance(state, tuple) and len(state) == 2: # post-0.4.4 data, self.dtype = state self._initialize_data(data) else: # pre-0.4.3 dgl_warning( "The object is pickled before 0.4.3. Setting dtype of graph to int64" ) self.dtype = "int64" self._initialize_data(state) def get_items(self, index): """Return values at given positions of an Index Parameters ---------- index: utils.Index Returns ------- utils.Index The values at the given position. """ if self._slice_data is not None and self._slice_data.start == 0: # short-cut for identical mapping # NOTE: we don't check for out-of-bound error return index elif index._slice_data is None: # the provided index is not a slice tensor = self.tousertensor() index = index.tousertensor() # TODO(Allen): Change F.gather_row to dgl operation return Index(F.gather_row(tensor, index), self.dtype) elif self._slice_data is None: # the current index is not a slice but the provided is a slice tensor = self.tousertensor() index = index._slice_data # TODO(Allen): Change F.narrow_row to dgl operation return Index( F.astype( F.narrow_row(tensor, index.start, index.stop), F.data_type_dict[self.dtype], ), self.dtype, ) else: # both self and index wrap a slice object, then return another # Index wrapping a slice start = self._slice_data.start index = index._slice_data return Index( slice(start + index.start, start + index.stop), self.dtype ) def set_items(self, index, value): """Set values at given positions of an Index. Set is not done in place, instead, a new Index object will be returned. Parameters ---------- index: utils.Index Positions to set values value: int or utils.Index Values to set. If value is an integer, then all positions are set to the same value Returns ------- utils.Index The new values. """ tensor = self.tousertensor() index = index.tousertensor() if isinstance(value, int): value = F.full_1d(len(index), value, dtype=F.int64, ctx=F.cpu()) else: value = value.tousertensor() return Index(F.scatter_row(tensor, index, value), self.dtype) def append_zeros(self, num): """Append zeros to an Index Parameters ---------- num: int number of zeros to append """ if num == 0: return self new_items = F.zeros((num,), dtype=F.int64, ctx=F.cpu()) if len(self) == 0: return Index(new_items, self.dtype) else: tensor = self.tousertensor() tensor = F.cat((tensor, new_items), dim=0) return Index(tensor, self.dtype) def nonzero(self): """Return the nonzero positions""" tensor = self.tousertensor() mask = F.nonzero_1d(tensor != 0) return Index(mask, self.dtype) def has_nonzero(self): """Check if there is any nonzero value in this Index""" tensor = self.tousertensor() return F.sum(tensor, 0) > 0 def toindex(data, dtype="int64"): """Convert the given data to Index object. Parameters ---------- data : index data Data to create the index. Returns ------- Index The index object. See Also -------- Index """ return data if isinstance(data, Index) else Index(data, dtype) def zero_index(size, dtype="int64"): """Create a index with provided size initialized to zero Parameters ---------- size: int """ return Index( F.zeros((size,), dtype=F.data_type_dict[dtype], ctx=F.cpu()), dtype=dtype, ) def set_diff(ar1, ar2): """Find the set difference of two index arrays. Return the unique values in ar1 that are not in ar2. Parameters ---------- ar1: utils.Index Input index array. ar2: utils.Index Input comparison index array. Returns ------- setdiff: Array of values in ar1 that are not in ar2. """ ar1_np = ar1.tonumpy() ar2_np = ar2.tonumpy() setdiff = np.setdiff1d(ar1_np, ar2_np) setdiff = toindex(setdiff) return setdiff class LazyDict(Mapping): """A readonly dictionary that does not materialize the storage.""" def __init__(self, fn, keys): self._fn = fn self._keys = keys def __getitem__(self, key): if key not in self._keys: raise KeyError(key) return self._fn(key) def __contains__(self, key): return key in self._keys def __iter__(self): return iter(self._keys) def __len__(self): return len(self._keys) def keys(self): return self._keys class HybridDict(Mapping): """A readonly dictonary that merges several dict-like (python dict, LazyDict). If there are duplicate keys, early keys have priority over latter ones. """ def __init__(self, *dict_like_list): self._dict_like_list = dict_like_list self._keys = set() for obj in dict_like_list: self._keys.update(obj.keys()) def keys(self): return self._keys def __getitem__(self, key): for obj in self._dict_like_list: if key in obj: return obj[key] raise KeyError(key) def __contains__(self, key): return key in self.keys() def __iter__(self): return iter(self.keys()) def __len__(self): return len(self.keys()) class ReadOnlyDict(Mapping): """A readonly dictionary wrapper.""" def __init__(self, dict_like): self._dict_like = dict_like def keys(self): return self._dict_like.keys() def __getitem__(self, key): return self._dict_like[key] def __contains__(self, key): return key in self._dict_like def __iter__(self): return iter(self._dict_like) def __len__(self): return len(self._dict_like) def build_relabel_map(x, is_sorted=False): """Relabel the input ids to continuous ids that starts from zero. Ids are assigned new ids according to their ascending order. Examples -------- >>> x = [1, 5, 3, 6] >>> n2o, o2n = build_relabel_map(x) >>> n2o [1, 3, 5, 6] >>> o2n [n/a, 0, n/a, 1, n/a, 2, 3] "n/a" will be filled with 0 Parameters ---------- x : Index The input ids. is_sorted : bool, default=False Whether the input has already been unique and sorted. Returns ------- new_to_old : tensor The mapping from new id to old id. old_to_new : tensor The mapping from old id to new id. It is a vector of length MAX(x). One can use advanced indexing to convert an old id tensor to a new id tensor: new_id = old_to_new[old_id] """ x = x.tousertensor() if not is_sorted: unique_x, _ = F.sort_1d(F.unique(x)) else: unique_x = x map_len = int(F.asnumpy(F.max(unique_x, dim=0))) + 1 old_to_new = F.zeros((map_len,), dtype=F.int64, ctx=F.cpu()) old_to_new = F.scatter_row(old_to_new, unique_x, F.arange(0, len(unique_x))) return unique_x, old_to_new def build_relabel_dict(x): """Relabel the input ids to continuous ids that starts from zero. The new id follows the order of the given node id list. Parameters ---------- x : list The input ids. Returns ------- relabel_dict : dict Dict from old id to new id. """ relabel_dict = {} for i, v in enumerate(x): relabel_dict[v] = i return relabel_dict class CtxCachedObject(object): """A wrapper to cache object generated by different context. Note: such wrapper may incur significant overhead if the wrapped object is very light. Parameters ---------- generator : callable A callable function that can create the object given ctx as the only argument. """ def __init__(self, generator): self._generator = generator self._ctx_dict = {} def __call__(self, ctx): if ctx not in self._ctx_dict: self._ctx_dict[ctx] = self._generator(ctx) return self._ctx_dict[ctx] def cached_member(cache, prefix): """A member function decorator to memorize the result. Note that the member function cannot support kwargs after being decorated. The member function must be functional. Otherwise, the behavior is undefined. Parameters ---------- cache : str The cache name. The cache should be a dictionary attribute in the class object. prefix : str The key prefix to save the result of the function. """ def _creator(func): @wraps(func) def wrapper(self, *args, **kwargs): dic = getattr(self, cache) key = "%s-%s-%s" % ( prefix, "-".join([str(a) for a in args]), "-".join([str(k) + ":" + str(v) for k, v in kwargs.items()]), ) if key not in dic: dic[key] = func(self, *args, **kwargs) return dic[key] return wrapper return _creator def is_dict_like(obj): """Return true if the object can be treated as a dictionary.""" return isinstance(obj, Mapping) def reorder(dict_like, index): """Reorder each column in the dict according to the index. Parameters ---------- dict_like : dict of tensors The dict to be reordered. index : dgl.utils.Index The reorder index. """ new_dict = {} for key, val in dict_like.items(): idx_ctx = index.tousertensor(F.context(val)) new_dict[key] = F.gather_row(val, idx_ctx) return new_dict def reorder_index(idx, order): """Reorder the idx according to the given order Parameters ---------- idx : utils.Index The index to be reordered. order : utils.Index The order to follow. """ idx = idx.tousertensor() order = order.tousertensor() new_idx = F.gather_row(idx, order) return toindex(new_idx) def is_iterable(obj): """Return true if the object is an iterable.""" return isinstance(obj, Iterable) def to_dgl_context(ctx): """Convert a backend context to DGLContext""" device_type = nd.DGLContext.STR2MASK[F.device_type(ctx)] device_id = F.device_id(ctx) return nd.DGLContext(device_type, device_id) def to_nbits_int(tensor, nbits): """Change the dtype of integer tensor The dtype of returned tensor uses nbits, nbits can only be 32 or 64 """ assert nbits in (32, 64), "nbits can either be 32 or 64" if nbits == 32: return F.astype(tensor, F.int32) else: return F.astype(tensor, F.int64) def make_invmap(array, use_numpy=True): """Find the unique elements of the array and return another array with indices to the array of unique elements.""" if use_numpy: uniques = np.unique(array) else: uniques = list(set(array)) invmap = {x: i for i, x in enumerate(uniques)} remapped = np.asarray([invmap[x] for x in array]) return uniques, invmap, remapped def expand_as_pair(input_, g=None): """Return a pair of same element if the input is not a pair. If the graph is a block, obtain the feature of destination nodes from the source nodes. Parameters ---------- input_ : Tensor, dict[str, Tensor], or their pairs The input features g : DGLGraph or None The graph. If None, skip checking if the graph is a block. Returns ------- tuple[Tensor, Tensor] or tuple[dict[str, Tensor], dict[str, Tensor]] The features for input and output nodes """ if isinstance(input_, tuple): return input_ elif g is not None and g.is_block: if isinstance(input_, Mapping): input_dst = { k: F.narrow_row(v, 0, g.number_of_dst_nodes(k)) for k, v in input_.items() } else: input_dst = F.narrow_row(input_, 0, g.number_of_dst_nodes()) return input_, input_dst else: return input_, input_ def check_eq_shape(input_): """If input_ is a pair of features, check if the feature shape of source nodes is equal to the feature shape of destination nodes. """ srcdata, dstdata = expand_as_pair(input_) src_feat_shape = tuple(F.shape(srcdata))[1:] dst_feat_shape = tuple(F.shape(dstdata))[1:] if src_feat_shape != dst_feat_shape: raise DGLError( "The feature shape of source nodes: {} \ should be equal to the feature shape of destination \ nodes: {}.".format( src_feat_shape, dst_feat_shape ) ) def retry_method_with_fix(fix_method): """Decorator that executes a fix method before retrying again when the decorated method fails once with any exception. If the decorated method fails again, the execution fails with that exception. Notes ----- This decorator only works on class methods, and the fix function must also be a class method. It would not work on functions. Parameters ---------- fix_func : callable The fix method to execute. It should not accept any arguments. Its return values are ignored. """ def _creator(func): @wraps(func) def wrapper(self, *args, **kwargs): # pylint: disable=W0703,bare-except try: return func(self, *args, **kwargs) except: fix_method(self) return func(self, *args, **kwargs) return wrapper return _creator def group_as_dict(pairs): """Combines a list of key-value pairs to a dictionary of keys and value lists. Does not require the pairs to be sorted by keys. Parameters ---------- pairs : iterable Iterable of key-value pairs Returns ------- dict The dictionary of keys and value lists. """ dic = defaultdict(list) for key, value in pairs: dic[key].append(value) return dic class FlattenedDict(object): """Iterates over each item in a dictionary of groups. Parameters ---------- groups : dict The item groups. Examples -------- >>> groups = FlattenedDict({'a': [1, 3], 'b': [2, 5, 8], 'c': [7]}) >>> list(groups) [('a', 1), ('a', 3), ('b', 2), ('b', 5), ('b', 8), ('c', 7)] >>> groups[2] ('b', 2) >>> len(groups) 6 """ def __init__(self, groups): self._groups = groups group_sizes = {k: len(v) for k, v in groups.items()} self._group_keys, self._group_sizes = zip(*group_sizes.items()) self._group_offsets = np.insert(np.cumsum(self._group_sizes), 0, 0) # TODO: this is faster (37s -> 21s per epoch compared to searchsorted in GCMC) but takes # O(E) memory. self._idx_to_group = np.zeros(self._group_offsets[-1], dtype="int32") for i in range(len(self._groups)): self._idx_to_group[ self._group_offsets[i] : self._group_offsets[i + 1] ] = i def __len__(self): """Return the total number of items.""" return self._group_offsets[-1] def __iter__(self): """Return the iterator of all items with the key of its original group.""" for i, k in enumerate(self._group_keys): for j in range(self._group_sizes[i]): yield k, self._groups[k][j] def __getitem__(self, idx): """Return the item at the given position with the key of its original group.""" i = self._idx_to_group[idx] k = self._group_keys[i] j = idx - self._group_offsets[i] g = self._groups[k] return k, g[j] def maybe_flatten_dict(data): """Return a FlattenedDict if the input is a Mapping, or the data itself otherwise.""" return FlattenedDict(data) if isinstance(data, Mapping) else data def compensate(ids, origin_ids): """computing the compensate set of ids from origin_ids Note: ids should be a subset of origin_ids. Any of ids and origin_ids can be non-consecutive, and origin_ids should be sorted. Example: >>> ids = th.Tensor([0, 2, 4]) >>> origin_ids = th.Tensor([0, 1, 2, 4, 5]) >>> compensate(ids, origin_ids) th.Tensor([1, 5]) """ # trick here, eid_0 or nid_0 can be 0. mask = F.scatter_row( origin_ids, F.copy_to(F.tensor(0, dtype=F.int64), F.context(origin_ids)), F.copy_to( F.tensor(1, dtype=F.dtype(origin_ids)), F.context(origin_ids) ), ) mask = F.scatter_row( mask, ids, F.full_1d(len(ids), 0, F.dtype(ids), F.context(ids)) ) return F.tensor(F.nonzero_1d(mask), dtype=F.dtype(ids)) def relabel(x): """Relabel the input ids to continuous ids that starts from zero. Ids are assigned new ids according to their ascending order. Examples -------- >>> x = [1, 5, 3, 6] >>> n2o, o2n = build_relabel_map(x) >>> n2o [1, 3, 5, 6] >>> o2n [n/a, 0, n/a, 1, n/a, 2, 3] "n/a" will be filled with 0 Parameters ---------- x : Tensor ID tensor. Returns ------- new_to_old : Tensor The mapping from new id to old id. old_to_new : Tensor The mapping from old id to new id. It is a vector of length MAX(x). One can use advanced indexing to convert an old id tensor to a new id tensor: new_id = old_to_new[old_id] """ unique_x = F.unique(x) map_len = F.as_scalar(F.max(unique_x, dim=0)) + 1 ctx = F.context(x) dtype = F.dtype(x) old_to_new = F.zeros((map_len,), dtype=dtype, ctx=ctx) old_to_new = F.scatter_row( old_to_new, unique_x, F.copy_to(F.arange(0, len(unique_x), dtype), ctx) ) return unique_x, old_to_new def extract_node_subframes(graph, nodes_or_device, store_ids=True): """Extract node features of the given nodes from :attr:`graph` and return them in frames on the given device. Note that this function does not perform actual tensor memory copy but using `Frame.subframe` to get the features. If :attr:`nodes` is None, it performs a shallow copy of the original node frames that only copies the dictionary structure but not the tensor contents. Parameters ---------- graph : DGLGraph The graph to extract features from. nodes : list[Tensor] or device or None Node IDs or device. If a list, the list length must be equal to the number of node types in the graph. If None, the whole frame is shallow-copied. store_ids : bool If True, the returned frames will store :attr:`nodes` in the ``dgl.NID`` field unless it is None. Returns ------- list[Frame] Extracted node frames. """ if nodes_or_device is None: node_frames = [nf.clone() for nf in graph._node_frames] elif is_listlike(nodes_or_device): node_frames = [] for i, ind_nodes in enumerate(nodes_or_device): subf = graph._node_frames[i].subframe(ind_nodes) if store_ids: subf[NID] = ind_nodes node_frames.append(subf) else: # device object node_frames = [nf.to(nodes_or_device) for nf in graph._node_frames] return node_frames def extract_node_subframes_for_block(graph, srcnodes, dstnodes): """Extract the input node features and output node features of the given nodes from :attr:`graph` and return them in frames ready for a block. Note that this function does not perform actual tensor memory copy but using `Frame.subframe` to get the features. If :attr:`srcnodes` or :attr:`dstnodes` is None, it performs a shallow copy of the original node frames that only copies the dictionary structure but not the tensor contents. Parameters ---------- graph : DGLGraph The graph to extract features from. srcnodes : list[Tensor] Input node IDs. The list length must be equal to the number of node types in the graph. The returned frames store the node IDs in the ``dgl.NID`` field. dstnodes : list[Tensor] Output node IDs. The list length must be equal to the number of node types in the graph. The returned frames store the node IDs in the ``dgl.NID`` field. Returns ------- list[Frame] Extracted node frames. """ node_frames = [] for i, ind_nodes in enumerate(srcnodes): subf = graph._node_frames[i].subframe(ind_nodes) subf[NID] = ind_nodes node_frames.append(subf) for i, ind_nodes in enumerate(dstnodes): subf = graph._node_frames[i].subframe(ind_nodes) subf[NID] = ind_nodes node_frames.append(subf) return node_frames def extract_edge_subframes(graph, edges_or_device, store_ids=True): """Extract edge features of the given edges from :attr:`graph` and return them in frames. Note that this function does not perform actual tensor memory copy but using `Frame.subframe` to get the features. If :attr:`edges` is None, it performs a shallow copy of the original edge frames that only copies the dictionary structure but not the tensor contents. Parameters ---------- graph : DGLGraph The graph to extract features from. edges_or_device : list[Tensor] or device or None Edge IDs. If a list, the list length must be equal to the number of edge types in the graph. If None, the whole frame is shallow-copied. store_ids : bool If True, the returned frames will store :attr:`edges` in the ``dgl.EID`` field unless it is None. Returns ------- list[Frame] Extracted edge frames. """ if edges_or_device is None: edge_frames = [nf.clone() for nf in graph._edge_frames] elif is_listlike(edges_or_device): edge_frames = [] for i, ind_edges in enumerate(edges_or_device): subf = graph._edge_frames[i].subframe(ind_edges) if store_ids: subf[EID] = ind_edges edge_frames.append(subf) else: # device object edge_frames = [nf.to(edges_or_device) for nf in graph._edge_frames] return edge_frames def set_new_frames(graph, *, node_frames=None, edge_frames=None): """Set the node and edge frames of a given graph to new ones. Parameters ---------- graph : DGLGraph The graph whose node and edge frames are to be updated. node_frames : list[Frame], optional New node frames. Default is None, where the node frames are not updated. edge_frames : list[Frame], optional New edge frames Default is None, where the edge frames are not updated. """ if node_frames is not None: assert len(node_frames) == len( graph.ntypes ), "[BUG] number of node frames different from number of node types" graph._node_frames = node_frames if edge_frames is not None: assert len(edge_frames) == len( graph.etypes ), "[BUG] number of edge frames different from number of edge types" graph._edge_frames = edge_frames def set_num_threads(num_threads): """Set the number of OMP threads in the process. Parameters ---------- num_threads : int The number of OMP threads in the process. """ _CAPI_DGLSetOMPThreads(num_threads) def get_num_threads(): """Get the number of OMP threads in the process""" return _CAPI_DGLGetOMPThreads() def get_numa_nodes_cores(): """Returns numa nodes info, format: {: [(, [, , ...]), ...], ...} E.g.: {0: [(0, [0, 4]), (1, [1, 5])], 1: [(2, [2, 6]), (3, [3, 7])]} If not available, returns {} """ numa_node_paths = glob.glob("/sys/devices/system/node/node[0-9]*") if not numa_node_paths: return {} nodes = {} try: for node_path in numa_node_paths: numa_node_id = int(os.path.basename(node_path)[4:]) thread_siblings = {} for cpu_dir in glob.glob(os.path.join(node_path, "cpu[0-9]*")): cpu_id = int(os.path.basename(cpu_dir)[3:]) with open( os.path.join(cpu_dir, "topology", "core_id") ) as core_id_file: core_id = int(core_id_file.read().strip()) if core_id in thread_siblings: thread_siblings[core_id].append(cpu_id) else: thread_siblings[core_id] = [cpu_id] nodes[numa_node_id] = sorted( [(k, sorted(v)) for k, v in thread_siblings.items()] ) except (OSError, ValueError, IndexError, IOError): dgl_warning("Failed to read NUMA info") return {} return nodes def alias_func(func): """Return an alias function with proper docstring.""" @wraps(func) def _fn(*args, **kwargs): return func(*args, **kwargs) _fn.__doc__ = """Alias of :func:`dgl.{}`.""".format(func.__name__) return _fn def apply_each(data, fn, *args, **kwargs): """Apply a function to every element in a container. If the input data is a list or any sequence other than a string, returns a list whose elements are the same elements applied with the given function. If the input data is a dict or any mapping, returns a dict whose keys are the same and values are the elements applied with the given function. The first argument of the function will be passed with the individual elements from the input data, followed by the arguments in :attr:`args` and :attr:`kwargs`. Parameters ---------- data : any Any object. fn : callable Any function. args, kwargs : Additional arguments and keyword-arguments passed to the function. Examples -------- Applying a ReLU function to a dictionary of tensors: >>> h = {k: torch.randn(3) for k in ['A', 'B', 'C']} >>> h = apply_each(h, torch.nn.functional.relu) >>> assert all((v >= 0).all() for v in h.values()) """ if isinstance(data, Mapping): return {k: fn(v, *args, **kwargs) for k, v in data.items()} elif is_listlike(data): return [fn(v, *args, **kwargs) for v in data] else: return fn(data, *args, **kwargs) def recursive_apply(data, fn, *args, **kwargs): """Recursively apply a function to every element in a container. If the input data is a list or any sequence other than a string, returns a list whose elements are the same elements applied with the given function. If the input data is a dict or any mapping, returns a dict whose keys are the same and values are the elements applied with the given function. If the input data is a nested container, the result will have the same nested structure where each element is transformed recursively. The first argument of the function will be passed with the individual elements from the input data, followed by the arguments in :attr:`args` and :attr:`kwargs`. Parameters ---------- data : any Any object. fn : callable Any function. args, kwargs : Additional arguments and keyword-arguments passed to the function. Examples -------- Applying a ReLU function to a dictionary of tensors: >>> h = {k: torch.randn(3) for k in ['A', 'B', 'C']} >>> h = recursive_apply(h, torch.nn.functional.relu) >>> assert all((v >= 0).all() for v in h.values()) """ if isinstance(data, Mapping): return { k: recursive_apply(v, fn, *args, **kwargs) for k, v in data.items() } elif isinstance(data, tuple): return tuple(recursive_apply(v, fn, *args, **kwargs) for v in data) elif is_listlike(data): return [recursive_apply(v, fn, *args, **kwargs) for v in data] else: return fn(data, *args, **kwargs) def recursive_apply_pair(data1, data2, fn, *args, **kwargs): """Recursively apply a function to every pair of elements in two containers with the same nested structure. """ if isinstance(data1, Mapping) and isinstance(data2, Mapping): return { k: recursive_apply_pair(data1[k], data2[k], fn, *args, **kwargs) for k in data1.keys() } elif isinstance(data1, tuple) and isinstance(data2, tuple): return tuple( recursive_apply_pair(x, y, fn, *args, **kwargs) for x, y in zip(data1, data2) ) elif is_listlike(data1) and is_listlike(data2): return [ recursive_apply_pair(x, y, fn, *args, **kwargs) for x, y in zip(data1, data2) ] else: return fn(data1, data2, *args, **kwargs) def context_of(data): """Return the device of the data which can be either a tensor or a list/dict of tensors.""" if isinstance(data, Mapping): return F.context(next(iter(data.values()))) elif is_listlike(data): return F.context(next(iter(data))) else: return F.context(data) def dtype_of(data): """Return the dtype of the data which can be either a tensor or a dict of tensors.""" return F.dtype( next(iter(data.values())) if isinstance(data, Mapping) else data ) _init_api("dgl.utils.internal") ================================================ FILE: python/dgl/utils/pin_memory.py ================================================ """Utility functions related to pinned memory tensors.""" from .. import backend as F from .._ffi.function import _init_api from ..base import DGLError def pin_memory_inplace(tensor): """Register the tensor into pinned memory in-place (i.e. without copying). Users are required to save the returned dgl.ndarray object to avoid being unpinned. Parameters ---------- tensor : Tensor The tensor to be pinned. Returns ------- dgl.ndarray The dgl.ndarray object that holds the pinning status and shares the same underlying data with the tensor. """ if F.backend_name in ["mxnet", "tensorflow"]: raise DGLError( "The {} backend does not support pinning " "tensors in-place.".format(F.backend_name) ) # needs to be writable to allow in-place modification try: nd_array = F.zerocopy_to_dgl_ndarray_for_write(tensor) nd_array.pin_memory_() return nd_array except Exception as e: raise DGLError("Failed to pin memory in-place due to: {}".format(e)) def gather_pinned_tensor_rows(tensor, rows): """Directly gather rows from a CPU tensor given an indices array on CUDA devices, and returns the result on the same CUDA device without copying. Parameters ---------- tensor : Tensor The tensor. Must be in pinned memory. rows : Tensor The rows to gather. Must be a CUDA tensor. Returns ------- Tensor The result with the same device as :attr:`rows`. """ return F.from_dgl_nd( _CAPI_DGLIndexSelectCPUFromGPU(F.to_dgl_nd(tensor), F.to_dgl_nd(rows)) ) def scatter_pinned_tensor_rows(dest, rows, source): """Directly scatter rows from a GPU tensor given an indices array on CUDA devices, to a pinned tensor on the CPU. Parameters ---------- dest : Tensor The tensor on the CPU to scatter rows to. Must be in pinned memory. rows : Tensor The rows to scatter. Must be a CUDA tensor with unique entries. source : Tensor The tensor on the GPU to scatter rows from. """ _CAPI_DGLIndexScatterGPUToCPU( F.to_dgl_nd(dest), F.to_dgl_nd(rows), F.to_dgl_nd(source) ) _init_api("dgl.ndarray.uvm", __name__) ================================================ FILE: python/dgl/utils/shared_mem.py ================================================ """Shared memory utilities. For compatibility with older code that uses ``dgl.utils.shared_mem`` namespace; the content has been moved to ``dgl.ndarray`` module. """ from ..ndarray import ( # pylint: disable=unused-import create_shared_mem_array, get_shared_mem_array, ) ================================================ FILE: python/dgl/view.py ================================================ """Views of DGLGraph.""" from __future__ import absolute_import from collections import defaultdict, namedtuple from collections.abc import MutableMapping from . import backend as F from .base import ALL, DGLError from .frame import LazyFeature NodeSpace = namedtuple("NodeSpace", ["data"]) EdgeSpace = namedtuple("EdgeSpace", ["data"]) class HeteroNodeView(object): """A NodeView class to act as G.nodes for a DGLGraph.""" __slots__ = ["_graph", "_typeid_getter"] def __init__(self, graph, typeid_getter): self._graph = graph self._typeid_getter = typeid_getter def __getitem__(self, key): if isinstance(key, slice): # slice if not ( key.start is None and key.stop is None and key.step is None ): raise DGLError('Currently only full slice ":" is supported') nodes = ALL ntype = None elif isinstance(key, tuple): nodes, ntype = key elif key is None or isinstance(key, str): nodes = ALL ntype = key else: nodes = key ntype = None ntid = self._typeid_getter(ntype) return NodeSpace( data=HeteroNodeDataView(self._graph, ntype, ntid, nodes) ) def __call__(self, ntype=None): """Return the nodes.""" ntid = self._typeid_getter(ntype) ret = F.arange( 0, self._graph._graph.num_nodes(ntid), dtype=self._graph.idtype, ctx=self._graph.device, ) return ret class HeteroNodeDataView(MutableMapping): """The data view class when G.ndata[ntype] is called.""" __slots__ = ["_graph", "_ntype", "_ntid", "_nodes"] def __init__(self, graph, ntype, ntid, nodes): self._graph = graph self._ntype = ntype self._ntid = ntid self._nodes = nodes def __getitem__(self, key): if isinstance(self._ntype, list): ret = {} for (i, ntype) in enumerate(self._ntype): value = self._graph._get_n_repr(self._ntid[i], self._nodes).get( key, None ) if value is not None: ret[ntype] = value return ret else: return self._graph._get_n_repr(self._ntid, self._nodes)[key] def __setitem__(self, key, val): if isinstance(val, LazyFeature): self._graph._node_frames[self._ntid][key] = val elif isinstance(self._ntype, list): assert isinstance(val, dict), ( "Current HeteroNodeDataView has multiple node types, " "please passing the node type and the corresponding data through a dict." ) for (ntype, data) in val.items(): ntid = self._graph.get_ntype_id(ntype) self._graph._set_n_repr(ntid, self._nodes, {key: data}) else: assert isinstance(val, dict) is False, ( "The HeteroNodeDataView has only one node type. " "please pass a tensor directly" ) self._graph._set_n_repr(self._ntid, self._nodes, {key: val}) def __delitem__(self, key): if isinstance(self._ntype, list): for ntid in self._ntid: if self._graph._get_n_repr(ntid, ALL).get(key, None) is None: continue self._graph._pop_n_repr(ntid, key) else: self._graph._pop_n_repr(self._ntid, key) def _transpose(self, as_dict=False): if isinstance(self._ntype, list): ret = defaultdict(dict) for (i, ntype) in enumerate(self._ntype): data = self._graph._get_n_repr(self._ntid[i], self._nodes) for key in self._graph._node_frames[self._ntid[i]]: ret[key][ntype] = data[key] else: ret = self._graph._get_n_repr(self._ntid, self._nodes) if as_dict: ret = { key: ret[key] for key in self._graph._node_frames[self._ntid] } return ret def __len__(self): return len(self._transpose()) def __iter__(self): return iter(self._transpose()) def keys(self): return self._transpose().keys() def values(self): return self._transpose().values() def __repr__(self): return repr(self._transpose(as_dict=True)) class HeteroEdgeView(object): """A EdgeView class to act as G.edges for a DGLGraph.""" __slots__ = ["_graph"] def __init__(self, graph): self._graph = graph def __getitem__(self, key): if isinstance(key, slice): # slice if not ( key.start is None and key.stop is None and key.step is None ): raise DGLError('Currently only full slice ":" is supported') edges = ALL etype = None elif key is None: edges = ALL etype = None elif isinstance(key, tuple): if len(key) == 3: edges = ALL etype = key else: edges = key etype = None elif isinstance(key, str): edges = ALL etype = key else: edges = key etype = None return EdgeSpace(data=HeteroEdgeDataView(self._graph, etype, edges)) def __call__(self, *args, **kwargs): """Return all the edges.""" return self._graph.all_edges(*args, **kwargs) class HeteroEdgeDataView(MutableMapping): """The data view class when G.edata[etype] is called.""" __slots__ = ["_graph", "_etype", "_etid", "_edges"] def __init__(self, graph, etype, edges): self._graph = graph self._etype = etype self._etid = ( [self._graph.get_etype_id(t) for t in etype] if isinstance(etype, list) else self._graph.get_etype_id(etype) ) self._edges = edges def __getitem__(self, key): if isinstance(self._etype, list): ret = {} for (i, etype) in enumerate(self._etype): value = self._graph._get_e_repr(self._etid[i], self._edges).get( key, None ) if value is not None: ret[etype] = value return ret else: return self._graph._get_e_repr(self._etid, self._edges)[key] def __setitem__(self, key, val): if isinstance(val, LazyFeature): self._graph._edge_frames[self._etid][key] = val elif isinstance(self._etype, list): assert isinstance(val, dict), ( "Current HeteroEdgeDataView has multiple edge types, " "please pass the edge type and the corresponding data through a dict." ) for (etype, data) in val.items(): etid = self._graph.get_etype_id(etype) self._graph._set_e_repr(etid, self._edges, {key: data}) else: assert isinstance(val, dict) is False, ( "The HeteroEdgeDataView has only one edge type. " "please pass a tensor directly" ) self._graph._set_e_repr(self._etid, self._edges, {key: val}) def __delitem__(self, key): if isinstance(self._etype, list): for etid in self._etid: if self._graph._get_e_repr(etid, ALL).get(key, None) is None: continue self._graph._pop_e_repr(etid, key) else: self._graph._pop_e_repr(self._etid, key) def _transpose(self, as_dict=False): if isinstance(self._etype, list): ret = defaultdict(dict) for (i, etype) in enumerate(self._etype): data = self._graph._get_e_repr(self._etid[i], self._edges) for key in self._graph._edge_frames[self._etid[i]]: ret[key][etype] = data[key] else: ret = self._graph._get_e_repr(self._etid, self._edges) if as_dict: ret = { key: ret[key] for key in self._graph._edge_frames[self._etid] } return ret def __len__(self): return len(self._transpose()) def __iter__(self): return iter(self._transpose()) def keys(self): return self._transpose().keys() def values(self): return self._transpose().values() def __repr__(self): return repr(self._transpose(as_dict=True)) ================================================ FILE: python/setup.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- import glob import os import shutil import sys import sysconfig from setuptools import find_packages, setup from setuptools.dist import Distribution from setuptools.extension import Extension class BinaryDistribution(Distribution): def has_ext_modules(self): return True CURRENT_DIR = os.path.dirname(__file__) def get_lib_path(): """Get library path, name and version""" # We can not import `libinfo.py` in setup.py directly since __init__.py # Will be invoked which introduces dependences libinfo_py = os.path.join(CURRENT_DIR, "./dgl/_ffi/libinfo.py") libinfo = {"__file__": libinfo_py} exec( compile(open(libinfo_py, "rb").read(), libinfo_py, "exec"), libinfo, libinfo, ) version = libinfo["__version__"] lib_path = libinfo["find_lib_path"]() libs = [lib_path[0]] return libs, version def get_lib_pattern(lib_name): if sys.platform.startswith("linux"): lib_pattern = f"lib{lib_name}_*.so" elif sys.platform.startswith("darwin"): lib_pattern = f"lib{lib_name}_*.dylib" elif sys.platform.startswith("win"): lib_pattern = f"{lib_name}_*.dll" else: raise NotImplementedError("Unsupported system: %s" % sys.platform) return lib_pattern LIBS, VERSION = get_lib_path() BACKENDS = ["pytorch"] def remove_lib(lib_name): for lib_path in glob.glob( os.path.join(CURRENT_DIR, "dgl", lib_name, get_lib_pattern(lib_name)) ): try: os.remove(lib_path) except BaseException: pass def cleanup(): # Wheel cleanup try: os.remove("MANIFEST.in") except BaseException: pass for path in LIBS: _, libname = os.path.split(path) try: os.remove(os.path.join("dgl", libname)) except BaseException: pass for backend in BACKENDS: remove_lib("tensoradapter") if backend == "pytorch": remove_lib("dgl_sparse") remove_lib("graphbolt") # Remove build artifacts. dir_to_remove = ["build", "dgl.egg-info"] for dir_ in dir_to_remove: print(f"Removing {dir_}") if os.path.isdir(dir_): shutil.rmtree(dir_) def config_cython(): """Try to configure cython and return cython configuration""" if sys.platform.startswith("win"): print( "WARNING: Cython is not supported on Windows, will compile without cython module" ) return [] sys_cflags = sysconfig.get_config_var("CFLAGS") if "i386" in sys_cflags and "x86_64" in sys_cflags: print( "WARNING: Cython library may not be compiled correctly with both i386 and x64" ) return [] try: from Cython.Build import cythonize # from setuptools.extension import Extension if sys.version_info >= (3, 0): subdir = "_cy3" else: subdir = "_cy2" ret = [] path = "dgl/_ffi/_cython" library_dirs = ["dgl", "../build/Release", "../build"] libraries = ["dgl"] for fn in os.listdir(path): if not fn.endswith(".pyx"): continue ret.append( Extension( "dgl._ffi.%s.%s" % (subdir, fn[:-4]), ["dgl/_ffi/_cython/%s" % fn], include_dirs=[ "../include/", "../third_party/dmlc-core/include", "../third_party/dlpack/include", ], library_dirs=library_dirs, libraries=libraries, # Crashes without this flag with GCC 5.3.1 extra_compile_args=["-std=c++17"], language="c++", ) ) return cythonize( ret, force=True, compiler_directives={"language_level": "3"} ) except ImportError: print( "WARNING: Cython is not installed, will compile without cython module" ) return [] def copy_lib(lib_name, backend=""): for lib_path in glob.glob( os.path.join(dir_, lib_name, backend, get_lib_pattern(lib_name)) ): lib_file_name = os.path.basename(lib_path) dst_dir_ = os.path.join(CURRENT_DIR, "dgl", lib_name, backend) os.makedirs( dst_dir_, exist_ok=True, ) shutil.copy( os.path.join(dir_, lib_name, backend, lib_file_name), dst_dir_, ) fo.write(f"include dgl/{lib_name}/{backend}/{lib_file_name}\n") include_libs = False wheel_include_libs = False if "bdist_wheel" in sys.argv or os.getenv("CONDA_BUILD"): wheel_include_libs = True elif "clean" in sys.argv: cleanup() else: include_libs = True setup_kwargs = {} # For bdist_wheel only if wheel_include_libs: with open("MANIFEST.in", "w") as fo: for path in LIBS: shutil.copy(path, os.path.join(CURRENT_DIR, "dgl")) dir_, libname = os.path.split(path) fo.write("include dgl/%s\n" % libname) for backend in BACKENDS: copy_lib("tensoradapter", backend) if backend == "pytorch": copy_lib("dgl_sparse") copy_lib("graphbolt") setup_kwargs = {"include_package_data": True} def get_lib_file_path(lib_name, backend=""): return ( f"dgl/{lib_name}/{backend}", glob.glob( os.path.join( os.path.dirname(os.path.relpath(path, CURRENT_DIR)), lib_name, backend, get_lib_pattern(lib_name), ) ), ) # For source tree setup # Conda build also includes the binary library if include_libs: rpath = [os.path.relpath(path, CURRENT_DIR) for path in LIBS] data_files = [("dgl", rpath)] for path in LIBS: for backend in BACKENDS: data_files.append(get_lib_file_path("tensoradapter", backend)) if backend == "pytorch": data_files.append(get_lib_file_path("dgl_sparse")) data_files.append(get_lib_file_path("graphbolt")) setup_kwargs = {"include_package_data": True, "data_files": data_files} # Configure dependencies. install_requires = [ "networkx>=2.1", "numpy>=1.14.0", "packaging", "pandas", "psutil>=5.8.0", "pydantic>=2.0", "pyyaml", "requests>=2.19.0", "scipy>=1.1.0", "tqdm", ] setup( name="dgl" + os.getenv("DGL_PACKAGE_SUFFIX", ""), version=VERSION, description="Deep Graph Library", zip_safe=False, maintainer="DGL Team", maintainer_email="wmjlyjemaine@gmail.com", packages=find_packages(), install_requires=install_requires, url="https://github.com/dmlc/dgl", distclass=BinaryDistribution, ext_modules=config_cython(), classifiers=[ "Development Status :: 3 - Alpha", "Programming Language :: Python :: 3", "License :: OSI Approved :: Apache Software License", ], license="APACHE", **setup_kwargs, ) if wheel_include_libs: cleanup() ================================================ FILE: python/update_version.py ================================================ """ This is the global script that set the version information of DGL. This script runs and update all the locations that related to versions List of affected files: - dgl-root/python/dgl/_ffi/libinfo.py - dgl-root/include/dgl/runtime/c_runtime_api.h - dgl-root/conda/dgl/meta.yaml """ import os import re # current version # We use the version of the incoming release for code # that is under development # The environment variable DGL_PRERELEASE is the prerelase suffix # (usually "aYYMMDD") # The environment variable DGL_VERSION_SUFFIX is the local version label # suffix for indicating CPU and CUDA versions as in PEP 440 (e.g. "+cu102") __version__ = "2.5" + os.getenv("DGL_PRERELEASE", "") __version__ += os.getenv("DGL_VERSION_SUFFIX", "") print(__version__) # Implementations def update(file_name, pattern, repl): update = [] hit_counter = 0 need_update = False for l in open(file_name): result = re.findall(pattern, l) if result: assert len(result) == 1 hit_counter += 1 if result[0] != repl: l = re.sub(pattern, repl, l) need_update = True print("%s: %s->%s" % (file_name, result[0], repl)) else: print("%s: version is already %s" % (file_name, repl)) update.append(l) if hit_counter != 1: raise RuntimeError("Cannot find version in %s" % file_name) if need_update: with open(file_name, "w") as output_file: for l in update: output_file.write(l) def main(): curr_dir = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) proj_root = os.path.abspath(os.path.join(curr_dir, "..")) # python path update( os.path.join(proj_root, "python", "dgl", "_ffi", "libinfo.py"), r"(?<=__version__ = \")[.0-9a-z+_]+", __version__, ) # C++ header update( os.path.join(proj_root, "include", "dgl", "runtime", "c_runtime_api.h"), '(?<=DGL_VERSION ")[.0-9a-z+_]+', __version__, ) # conda for path in ["dgl"]: update( os.path.join(proj_root, "conda", path, "meta.yaml"), "(?<=version: )[.0-9a-z+_]+", __version__, ) if __name__ == "__main__": main() ================================================ FILE: readthedocs.yml ================================================ build: image: latest formats: [] python: version: 3.6 use_system_site_packages: true setup_py_install: false ================================================ FILE: script/build_dgl.sh ================================================ #!/bin/bash set -e usage() { cat << EOF usage: bash $0 OPTIONS examples: Start a CPU only build: bash $0 -c Start a CUDA build: bash $0 -g Build incrementally: bash $0 Remove all intermediate output and restart a CPU only build: bash $0 -c -r Build with extra cmake arguments: bash $0 -c -e '-DBUILD_TORCH=ON' Build DGL. By default, build incrementally on top of the current state. OPTIONS: -h Show this message. -c Restart CPU only build. -e Extra arguments of cmake. -g Restart CUDA build. -r Remove all intermediate output. -t Type of the build: dev, dogfood or release (default: dev). EOF } # Parse flags. while getopts "ce:ghrt:" flag; do if [[ ${flag} == "c" ]]; then cuda="OFF" elif [[ ${flag} == "e" ]]; then extra_args=${OPTARG} elif [[ ${flag} == "g" ]]; then cuda="ON" elif [[ ${flag} == "r" ]]; then remove="YES" elif [[ ${flag} == "t" ]]; then build_type=${OPTARG} elif [[ ${flag} == "h" ]]; then usage exit 0 else usage exit 1 fi done if [[ -z ${DGL_HOME} ]]; then echo "ERROR: Please make sure environment variable DGL_HOME is set correctly." exit 1 fi if [[ ! ${PWD} == ${DGL_HOME} ]]; then echo "ERROR: This script only works properly from DGL root directory." echo " Current: ${PWD}" echo "DGL_HOME: ${DGL_HOME}" exit 1 fi if [[ ${remove} == "YES" ]]; then rm -rf build rm -rf graphbolt/build rm -rf dgl_sparse/build rm -rf tensoradapter/pytorch/build fi if [[ -z ${build_type} ]]; then build_type="dev" fi if [[ -z ${cuda} ]]; then if [[ -d build ]]; then cd build else echo "ERROR: No existing build status found, unable to build incrementally." usage exit 1 fi else mkdir -p build cd build cmake -DBUILD_TYPE=${build_type} -DUSE_CUDA=${cuda} ${extra_args} .. fi if [[ ${PWD} == "${DGL_HOME}/build" ]]; then make -j else echo "ERROR: unexpected working directory." echo " Current: ${PWD}" echo "Expected: ${DGL_HOME}/build" fi exit 0 ================================================ FILE: script/build_doc.sh ================================================ #!/bin/bash set -e usage() { cat << EOF usage: bash $0 OPTIONS examples: Build doc with PyTorch-backend: bash $0 -p Build doc with MXNet-backend: bash $0 -m Build doc with TensorFlow-backend: bash $0 -t Build incrementally with PyTorch-backend: bash $0 Remove all outputs and restart a PyTorch build: bash $0 -p -r Build DGL documentation. By default, build incrementally on top of the current state. OPTIONS: -h Show this message. -p Build doc with PyTorch backend. -m Build doc with MXNet backend. -t Build doc with TensorFlow backend. -r Remove all outputs. EOF } backend="pytorch" # Parse flags. while getopts "hpmtr" flag; do if [[ ${flag} == "p" ]]; then backend="pytorch" elif [[ ${flag} == "m" ]]; then backend="mxnet" elif [[ ${flag} == "t" ]]; then backend="tensorflow" elif [[ ${flag} == "r" ]]; then remove="YES" elif [[ ${flag} == "h" ]]; then usage exit 0 else usage exit 1 fi done if [[ -z ${DGL_HOME} ]]; then echo "ERROR: Please make sure environment variable DGL_HOME is set correctly." exit 1 fi if [[ ! ${PWD} == ${DGL_HOME} ]]; then echo "ERROR: This script only works properly from DGL root directory." echo " Current: ${PWD}" echo "DGL_HOME: ${DGL_HOME}" exit 1 fi cd ${DGL_HOME}/docs if [[ ${remove} == "YES" ]]; then bash clean.sh fi export DGLBACKEND=$backend export DGL_LIBRARY_PATH=${DGL_HOME}/build export PYTHONPATH=${DGL_HOME}/python:$PYTHONPATH make $backend exit 0 ================================================ FILE: script/create_dev_conda_env.sh ================================================ #!/bin/bash readonly CUDA_VERSIONS="11.8,12.1,12.4" readonly TORCH_VERSION="2.1.0" readonly PYTHON_VERSION="3.10" usage() { cat << EOF usage: bash $0 OPTIONS examples: bash $0 -c bash $0 -g 12.1 bash $0 -g 12.1 -p 3.10 bash $0 -g 12.1 -p 3.10 -t 2.1.0 bash $0 -c -n dgl-dev-cpu Create a developement environment for DGL developers. OPTIONS: -h Show this message. -c Create dev environment in CPU mode. -d Only display environment YAML file instead of creating it. -f Force creation of environment (removing a previously existing environment of the same name). -g Create dev environment in GPU mode with specified CUDA version, supported: ${CUDA_VERSIONS}. -n Specify the name of the environment. -o Save environment YAML file to specified path. -p Create dev environment based on specified python version. -s Run silently which indicates always 'yes' for any confirmation. -t Create dev environment based on specified PyTorch version such as '2.0.0'. EOF } validate() { values=$(echo "$1" | tr "," "\n") for value in ${values} do if [[ "${value}" == $2 ]]; then return 0 fi done return 1 } confirm() { echo "Continue? [yes/no]:" read confirm if [[ ! ${confirm} == "yes" ]]; then exit 0 fi } # Parse flags. while getopts "cdfg:hn:o:p:st:" flag; do case "${flag}" in c) cpu=1 ;; d) dry_run=1 ;; f) force_create=1 ;; g) cuda_version=${OPTARG} ;; h) usage exit 0 ;; n) name=${OPTARG} ;; o) output_path=${OPTARG} ;; p) python_version=${OPTARG} ;; s) always_yes=1 ;; t) torch_version=${OPTARG} ;; :) echo "Error: -${OPTARG} requires an argument." exit 1 ;; *) usage exit 1 ;; esac done if [[ -n ${cuda_version} && ${cpu} -eq 1 ]]; then echo "Only one mode can be specified." exit 1 fi if [[ -z ${cuda_version} && -z ${cpu} ]]; then usage exit 1 fi if [[ -z "${torch_version}" ]]; then torch_version=${TORCH_VERSION} fi # Set up CPU mode. if [[ ${cpu} -eq 1 ]]; then torchversion=${torch_version}"+cpu" if [[ -z "${name}" ]]; then name="dgl-dev-cpu" fi fi # Set up GPU mode. if [[ -n ${cuda_version} ]]; then if ! validate ${CUDA_VERSIONS} ${cuda_version}; then echo "Error: Invalid CUDA version." usage exit 1 fi echo "Confirm the installed CUDA version matches the specified one." [[ -n "${always_yes}" ]] || confirm torchversion=${torch_version}"+cu"${cuda_version//[-._]/} if [[ -z "${name}" ]]; then name="dgl-dev-gpu-"${cuda_version//[-._]/} fi fi # Set python version. if [[ -z "${python_version}" ]]; then python_version=${PYTHON_VERSION} fi echo "Confirm you are excuting the script from your DGL root directory." echo "Current working directory: ${PWD}" [[ -n "${always_yes}" ]] || confirm # Prepare the conda environment yaml file. rand=$(echo "${RANDOM}" | md5sum | head -c 20) mkdir -p /tmp/${rand} yaml_path="/tmp/${rand}/dgl_dev.yml" cp script/dgl_dev.yml.template ${yaml_path} sed -i "s|__NAME__|${name}|g" ${yaml_path} sed -i "s|__PYTHON_VERSION__|${python_version}|g" ${yaml_path} sed -i "s|__TORCH_VERSION__|${torchversion}|g" ${yaml_path} sed -i "s|__DGL_HOME__|${PWD}|g" ${yaml_path} # Ask for final confirmation. echo "--------------------------------------------------" cat ${yaml_path} echo "--------------------------------------------------" echo "Create a conda enviroment with the config?" [[ -n "${always_yes}" ]] || confirm # Save YAML file to specified path if [[ -n "${output_path}" ]]; then cp ${yaml_path} ${output_path} echo "Environment YAML file has been saved to ${output_path}." fi # Create conda environment. if [[ -z "${dry_run}" ]]; then conda_args="" if [[ -n "${force_create}" ]]; then conda_args="${conda_args} --force " fi conda env create -f ${yaml_path} ${conda_args} else echo "Running in dry mode, so creation of conda environment is skipped." fi # Clean up created tmp conda environment yaml file. rm -rf /tmp/${rand} exit 0 ================================================ FILE: script/dgl_dev.yml.template ================================================ name: __NAME__ channels: - conda-forge - defaults dependencies: - libstdcxx-ng>=9.5.0 - python=__PYTHON_VERSION__ - pip - graphviz - pandoc - pygraphviz - pip: - --find-links https://download.pytorch.org/whl/torch/ - cmake>=3.18 - cython - filelock - matplotlib - networkx - nltk - nose - numpy - ogb - pandas - psutil - pyarrow - pydantic>=2.0 - pytest - pyyaml - rdflib - requests[security] - scikit-learn - scipy - torch==__TORCH_VERSION__ - torcheval - torchmetrics - torch_geometric - tqdm - boto3 # AWS SDK for python - sphinx - sphinx-gallery - sphinx_rtd_theme - sphinx_copybutton - sphinxemoji - nbsphinx - nbsphinx-link - pillow - seaborn - jupyter_http_over_ws - ufmt - clang-format - pylint - lintrunner - jupyterlab - ipywidgets - expecttest variables: DGL_HOME: __DGL_HOME__ ================================================ FILE: script/run_pytest.sh ================================================ #!/bin/bash set -e usage() { cat << EOF usage: bash $0 OPTIONS TARGETS examples: Run python tests on CPU: bash $0 -c tests/compute/test_subgraph.py Run python tests on GPU: bash $0 -g tests/compute/test_subgraph.py Run DGL python tests. OPTIONS: -h Show this message. -c Run python tests on CPU. -g Run python tests on GPU. EOF } # Parse flags. while getopts "cgh" flag; do if [[ ${flag} == "c" ]]; then device="cpu" elif [[ ${flag} == "g" ]]; then device="gpu" elif [[ ${flag} == "h" ]]; then usage exit 0 else usage exit 1 fi done if [[ -z ${DGL_HOME} ]]; then echo "ERROR: Please make sure environment variable DGL_HOME is set correctly." exit 1 fi if [[ ! ${PWD} == ${DGL_HOME} ]]; then echo "ERROR: This script only works properly from DGL root directory." echo " Current: ${PWD}" echo "DGL_HOME: ${DGL_HOME}" exit 1 fi if [[ -z ${device} ]]; then echo "ERROR: Test device unspecified." usage exit 1 fi # Reset the index for non-option arguments. shift $(($OPTIND-1)) export DGLBACKEND=pytorch export DGL_LIBRARY_PATH=${DGL_HOME}/build export PYTHONPATH=${DGL_HOME}/python:${DGL_HOME}/tests:${DGL_HOME}/tests/python/pytorch/graphbolt:$PYTHONPATH export DGLTESTDEV=${device} export DGL_DOWNLOAD_DIR=${DGL_HOME}/_download if [[ -z $@ ]]; then echo "ERROR: Missing test targets." usage exit 1 fi python3 -m pytest -v $@ ================================================ FILE: src/api/api_container.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file api/api_container.cc * @brief Runtime container APIs. (reference: tvm/src/api/api_lang.cc) */ #include #include #include #include namespace dgl { namespace runtime { DGL_REGISTER_GLOBAL("_List").set_body([](DGLArgs args, DGLRetValue* rv) { auto ret_obj = std::make_shared(); for (int i = 0; i < args.size(); ++i) { ret_obj->data.push_back(args[i].obj_sptr()); } *rv = ret_obj; }); DGL_REGISTER_GLOBAL("_ListGetItem").set_body([](DGLArgs args, DGLRetValue* rv) { auto& sptr = args[0].obj_sptr(); CHECK(sptr->is_type()); auto* o = static_cast(sptr.get()); int64_t i = args[1]; CHECK_LT(i, o->data.size()) << "list out of bound"; *rv = o->data[i]; }); DGL_REGISTER_GLOBAL("_ListSize").set_body([](DGLArgs args, DGLRetValue* rv) { auto& sptr = args[0].obj_sptr(); CHECK(sptr->is_type()); auto* o = static_cast(sptr.get()); *rv = static_cast(o->data.size()); }); DGL_REGISTER_GLOBAL("_Map").set_body([](DGLArgs args, DGLRetValue* rv) { CHECK_EQ(args.size() % 2, 0); if (args.size() != 0 && args[0].type_code() == kStr) { // StrMap StrMapObject::ContainerType data; for (int i = 0; i < args.size(); i += 2) { CHECK(args[i].type_code() == kStr) << "The key of the map must be string"; CHECK(args[i + 1].type_code() == kObjectHandle) << "The value of the map must be an object type"; data.emplace(std::make_pair( args[i].operator std::string(), args[i + 1].obj_sptr())); } auto obj = std::make_shared(); obj->data = std::move(data); *rv = obj; } else { // object container MapObject::ContainerType data; for (int i = 0; i < args.size(); i += 2) { CHECK(args[i].type_code() == kObjectHandle) << "The key of the map must be an object type"; CHECK(args[i + 1].type_code() == kObjectHandle) << "The value of the map must be an object type"; data.emplace(std::make_pair(args[i].obj_sptr(), args[i + 1].obj_sptr())); } auto obj = std::make_shared(); obj->data = std::move(data); *rv = obj; } }); DGL_REGISTER_GLOBAL("_EmptyStrMap").set_body([](DGLArgs args, DGLRetValue* rv) { StrMapObject::ContainerType data; auto obj = std::make_shared(); obj->data = std::move(data); *rv = obj; }); DGL_REGISTER_GLOBAL("_MapSize").set_body([](DGLArgs args, DGLRetValue* rv) { auto& sptr = args[0].obj_sptr(); if (sptr->is_type()) { auto* o = static_cast(sptr.get()); *rv = static_cast(o->data.size()); } else { CHECK(sptr->is_type()); auto* o = static_cast(sptr.get()); *rv = static_cast(o->data.size()); } }); DGL_REGISTER_GLOBAL("_MapGetItem").set_body([](DGLArgs args, DGLRetValue* rv) { auto& sptr = args[0].obj_sptr(); if (sptr->is_type()) { auto* o = static_cast(sptr.get()); auto it = o->data.find(args[1].obj_sptr()); CHECK(it != o->data.end()) << "cannot find the key in the map"; *rv = (*it).second; } else { CHECK(sptr->is_type()); auto* o = static_cast(sptr.get()); auto it = o->data.find(args[1].operator std::string()); CHECK(it != o->data.end()) << "cannot find the key in the map"; *rv = (*it).second; } }); DGL_REGISTER_GLOBAL("_MapItems").set_body([](DGLArgs args, DGLRetValue* rv) { auto& sptr = args[0].obj_sptr(); if (sptr->is_type()) { auto* o = static_cast(sptr.get()); auto rkvs = std::make_shared(); for (const auto& kv : o->data) { rkvs->data.push_back(kv.first); rkvs->data.push_back(kv.second); } *rv = rkvs; } else { CHECK(sptr->is_type()); auto* o = static_cast(sptr.get()); auto rkvs = std::make_shared(); for (const auto& kv : o->data) { rkvs->data.push_back(MakeValue(kv.first)); rkvs->data.push_back(kv.second); } *rv = rkvs; } }); DGL_REGISTER_GLOBAL("_MapCount").set_body([](DGLArgs args, DGLRetValue* rv) { auto& sptr = args[0].obj_sptr(); if (sptr->is_type()) { auto* o = static_cast(sptr.get()); *rv = static_cast(o->data.count(args[1].obj_sptr())); } else { CHECK(sptr->is_type()); auto* o = static_cast(sptr.get()); *rv = static_cast(o->data.count(args[1].operator std::string())); } }); DGL_REGISTER_GLOBAL("_Value").set_body([](DGLArgs args, DGLRetValue* rv) { *rv = MakeValue(args[0]); }); DGL_REGISTER_GLOBAL("_ValueGet").set_body([](DGLArgs args, DGLRetValue* rv) { auto& sptr = args[0].obj_sptr(); CHECK(sptr->is_type()); auto* o = static_cast(sptr.get()); *rv = o->data; }); } // namespace runtime } // namespace dgl ================================================ FILE: src/api/api_test.cc ================================================ /** * Copyright (c) 2022 by Contributors * @file api/api_test.cc * @brief C APIs for testing FFI */ #include #include #include #include #include namespace dgl { namespace runtime { // Register an internal API for testing python callback. // It receives two arguments: // - The python callback function. // - The argument to pass to the python callback // It returns what python callback returns DGL_REGISTER_GLOBAL("_TestPythonCallback") .set_body([](DGLArgs args, DGLRetValue* rv) { LOG(INFO) << "Inside C API"; PackedFunc fn = args[0]; DGLArgs cb_args(args.values + 1, args.type_codes + 1, 1); fn.CallPacked(cb_args, rv); }); // Register an internal API for testing python callback. // It receives two arguments: // - The python callback function. // - The argument to pass to the python callback // It returns what python callback returns // // The API runs the python callback in a separate thread to test // python GIL is properly released. DGL_REGISTER_GLOBAL("_TestPythonCallbackThread") .set_body([](DGLArgs args, DGLRetValue* rv) { LOG(INFO) << "Inside C API"; PackedFunc fn = args[0]; auto thr = std::make_shared([fn, args, rv]() { LOG(INFO) << "Callback thread " << std::this_thread::get_id(); DGLArgs cb_args(args.values + 1, args.type_codes + 1, 1); fn.CallPacked(cb_args, rv); }); thr->join(); }); } // namespace runtime } // namespace dgl ================================================ FILE: src/array/arith.h ================================================ /** * Copyright (c) 2019 by Contributors * @file array/arith.h * @brief Arithmetic functors */ #ifndef DGL_ARRAY_ARITH_H_ #define DGL_ARRAY_ARITH_H_ #ifdef __CUDACC__ #define DGLDEVICE __device__ #define DGLINLINE __forceinline__ #else #define DGLDEVICE #define DGLINLINE inline #endif // __CUDACC__ namespace dgl { namespace aten { namespace arith { struct Add { template static DGLINLINE DGLDEVICE T Call(const T& t1, const T& t2) { return t1 + t2; } }; struct Sub { template static DGLINLINE DGLDEVICE T Call(const T& t1, const T& t2) { return t1 - t2; } }; struct Mul { template static DGLINLINE DGLDEVICE T Call(const T& t1, const T& t2) { return t1 * t2; } }; struct Div { template static DGLINLINE DGLDEVICE T Call(const T& t1, const T& t2) { return t1 / t2; } }; struct Mod { template static DGLINLINE DGLDEVICE T Call(const T& t1, const T& t2) { return t1 % t2; } }; struct GT { template static DGLINLINE DGLDEVICE bool Call(const T& t1, const T& t2) { return t1 > t2; } }; struct LT { template static DGLINLINE DGLDEVICE bool Call(const T& t1, const T& t2) { return t1 < t2; } }; struct GE { template static DGLINLINE DGLDEVICE bool Call(const T& t1, const T& t2) { return t1 >= t2; } }; struct LE { template static DGLINLINE DGLDEVICE bool Call(const T& t1, const T& t2) { return t1 <= t2; } }; struct EQ { template static DGLINLINE DGLDEVICE bool Call(const T& t1, const T& t2) { return t1 == t2; } }; struct NE { template static DGLINLINE DGLDEVICE bool Call(const T& t1, const T& t2) { return t1 != t2; } }; struct Neg { template static DGLINLINE DGLDEVICE T Call(const T& t1) { return -t1; } }; } // namespace arith } // namespace aten } // namespace dgl #endif // DGL_ARRAY_ARITH_H_ ================================================ FILE: src/array/array.cc ================================================ /** * Copyright (c) 2019-2022 by Contributors * @file array/array.cc * @brief DGL array utilities implementation */ #include #include #include #include #include #include #include #include #include "../c_api_common.h" #include "./arith.h" #include "./array_op.h" #include "./kernel_decl.h" using namespace dgl::runtime; namespace dgl { namespace aten { IdArray NewIdArray(int64_t length, DGLContext ctx, uint8_t nbits) { return IdArray::Empty({length}, DGLDataType{kDGLInt, nbits, 1}, ctx); } FloatArray NewFloatArray(int64_t length, DGLContext ctx, uint8_t nbits) { return FloatArray::Empty({length}, DGLDataType{kDGLFloat, nbits, 1}, ctx); } IdArray Clone(IdArray arr) { IdArray ret = NewIdArray(arr->shape[0], arr->ctx, arr->dtype.bits); ret.CopyFrom(arr); return ret; } IdArray Range(int64_t low, int64_t high, uint8_t nbits, DGLContext ctx) { IdArray ret; ATEN_XPU_SWITCH_CUDA(ctx.device_type, XPU, "Range", { if (nbits == 32) { ret = impl::Range(low, high, ctx); } else if (nbits == 64) { ret = impl::Range(low, high, ctx); } else { LOG(FATAL) << "Only int32 or int64 is supported."; } }); return ret; } IdArray Full(int64_t val, int64_t length, uint8_t nbits, DGLContext ctx) { IdArray ret; ATEN_XPU_SWITCH_CUDA(ctx.device_type, XPU, "Full", { if (nbits == 32) { ret = impl::Full(val, length, ctx); } else if (nbits == 64) { ret = impl::Full(val, length, ctx); } else { LOG(FATAL) << "Only int32 or int64 is supported."; } }); return ret; } template NDArray Full(DType val, int64_t length, DGLContext ctx) { NDArray ret; ATEN_XPU_SWITCH_CUDA(ctx.device_type, XPU, "Full", { ret = impl::Full(val, length, ctx); }); return ret; } template NDArray Full(int32_t val, int64_t length, DGLContext ctx); template NDArray Full(int64_t val, int64_t length, DGLContext ctx); template NDArray Full(float val, int64_t length, DGLContext ctx); template NDArray Full(double val, int64_t length, DGLContext ctx); IdArray AsNumBits(IdArray arr, uint8_t bits) { CHECK(bits == 32 || bits == 64) << "Invalid ID type. Must be int32 or int64, but got int" << static_cast(bits) << "."; if (arr->dtype.bits == bits) return arr; if (arr.NumElements() == 0) return NewIdArray(arr->shape[0], arr->ctx, bits); IdArray ret; ATEN_XPU_SWITCH_CUDA(arr->ctx.device_type, XPU, "AsNumBits", { ATEN_ID_TYPE_SWITCH( arr->dtype, IdType, { ret = impl::AsNumBits(arr, bits); }); }); return ret; } IdArray HStack(IdArray lhs, IdArray rhs) { IdArray ret; CHECK_SAME_CONTEXT(lhs, rhs); CHECK_SAME_DTYPE(lhs, rhs); CHECK_EQ(lhs->shape[0], rhs->shape[0]); auto device = runtime::DeviceAPI::Get(lhs->ctx); const auto& ctx = lhs->ctx; ATEN_ID_TYPE_SWITCH(lhs->dtype, IdType, { const int64_t len = lhs->shape[0]; ret = NewIdArray(2 * len, lhs->ctx, lhs->dtype.bits); device->CopyDataFromTo( lhs.Ptr(), 0, ret.Ptr(), 0, len * sizeof(IdType), ctx, ctx, lhs->dtype); device->CopyDataFromTo( rhs.Ptr(), 0, ret.Ptr(), len * sizeof(IdType), len * sizeof(IdType), ctx, ctx, lhs->dtype); }); return ret; } NDArray IndexSelect(NDArray array, IdArray index) { NDArray ret; CHECK_GE(array->ndim, 1) << "Only support array with at least 1 dimension"; CHECK_EQ(index->ndim, 1) << "Index array must be an 1D array."; // if array is not pinned, index has the same context as array // if array is pinned, op dispatching depends on the context of index CHECK_VALID_CONTEXT(array, index); ATEN_XPU_SWITCH_CUDA(index->ctx.device_type, XPU, "IndexSelect", { ATEN_DTYPE_SWITCH(array->dtype, DType, "values", { ATEN_ID_TYPE_SWITCH(index->dtype, IdType, { ret = impl::IndexSelect(array, index); }); }); }); return ret; } template ValueType IndexSelect(NDArray array, int64_t index) { CHECK_EQ(array->ndim, 1) << "Only support select values from 1D array."; CHECK(index >= 0 && index < array.NumElements()) << "Index " << index << " is out of bound."; ValueType ret = 0; ATEN_XPU_SWITCH_CUDA(array->ctx.device_type, XPU, "IndexSelect", { ATEN_DTYPE_SWITCH(array->dtype, DType, "values", { ret = impl::IndexSelect(array, index); }); }); return ret; } template int32_t IndexSelect(NDArray array, int64_t index); template int64_t IndexSelect(NDArray array, int64_t index); template uint32_t IndexSelect(NDArray array, int64_t index); template uint64_t IndexSelect(NDArray array, int64_t index); template float IndexSelect(NDArray array, int64_t index); template double IndexSelect(NDArray array, int64_t index); NDArray IndexSelect(NDArray array, int64_t start, int64_t end) { CHECK_EQ(array->ndim, 1) << "Only support select values from 1D array."; CHECK(start >= 0 && start < array.NumElements()) << "Index " << start << " is out of bound."; CHECK(end >= 0 && end <= array.NumElements()) << "Index " << end << " is out of bound."; CHECK_LE(start, end); auto device = runtime::DeviceAPI::Get(array->ctx); const int64_t len = end - start; NDArray ret = NDArray::Empty({len}, array->dtype, array->ctx); ATEN_DTYPE_SWITCH(array->dtype, DType, "values", { device->CopyDataFromTo( array->data, start * sizeof(DType), ret->data, 0, len * sizeof(DType), array->ctx, ret->ctx, array->dtype); }); return ret; } NDArray Scatter(NDArray array, IdArray indices) { NDArray ret; ATEN_XPU_SWITCH(array->ctx.device_type, XPU, "Scatter", { ATEN_DTYPE_SWITCH(array->dtype, DType, "values", { ATEN_ID_TYPE_SWITCH(indices->dtype, IdType, { ret = impl::Scatter(array, indices); }); }); }); return ret; } void Scatter_(IdArray index, NDArray value, NDArray out) { CHECK_SAME_DTYPE(value, out); CHECK_SAME_CONTEXT(index, value); CHECK_SAME_CONTEXT(index, out); CHECK_EQ(value->shape[0], index->shape[0]); if (index->shape[0] == 0) return; ATEN_XPU_SWITCH_CUDA(value->ctx.device_type, XPU, "Scatter_", { ATEN_DTYPE_SWITCH(value->dtype, DType, "values", { ATEN_ID_TYPE_SWITCH(index->dtype, IdType, { impl::Scatter_(index, value, out); }); }); }); } NDArray Repeat(NDArray array, IdArray repeats) { NDArray ret; ATEN_XPU_SWITCH(array->ctx.device_type, XPU, "Repeat", { ATEN_DTYPE_SWITCH(array->dtype, DType, "values", { ATEN_ID_TYPE_SWITCH(repeats->dtype, IdType, { ret = impl::Repeat(array, repeats); }); }); }); return ret; } IdArray Relabel_(const std::vector& arrays) { IdArray ret; ATEN_XPU_SWITCH_CUDA(arrays[0]->ctx.device_type, XPU, "Relabel_", { ATEN_ID_TYPE_SWITCH(arrays[0]->dtype, IdType, { ret = impl::Relabel_(arrays); }); }); return ret; } NDArray Concat(const std::vector& arrays) { IdArray ret; int64_t len = 0, offset = 0; for (size_t i = 0; i < arrays.size(); ++i) { len += arrays[i]->shape[0]; CHECK_SAME_DTYPE(arrays[0], arrays[i]); CHECK_SAME_CONTEXT(arrays[0], arrays[i]); } NDArray ret_arr = NDArray::Empty({len}, arrays[0]->dtype, arrays[0]->ctx); auto device = runtime::DeviceAPI::Get(arrays[0]->ctx); for (size_t i = 0; i < arrays.size(); ++i) { ATEN_DTYPE_SWITCH(arrays[i]->dtype, DType, "array", { device->CopyDataFromTo( static_cast(arrays[i]->data), 0, static_cast(ret_arr->data), offset, arrays[i]->shape[0] * sizeof(DType), arrays[i]->ctx, ret_arr->ctx, arrays[i]->dtype); offset += arrays[i]->shape[0] * sizeof(DType); }); } return ret_arr; } template std::tuple Pack(NDArray array, ValueType pad_value) { std::tuple ret; ATEN_XPU_SWITCH(array->ctx.device_type, XPU, "Pack", { ATEN_DTYPE_SWITCH(array->dtype, DType, "array", { ret = impl::Pack(array, static_cast(pad_value)); }); }); return ret; } template std::tuple Pack(NDArray, int32_t); template std::tuple Pack(NDArray, int64_t); template std::tuple Pack( NDArray, uint32_t); template std::tuple Pack( NDArray, uint64_t); template std::tuple Pack(NDArray, float); template std::tuple Pack(NDArray, double); std::pair ConcatSlices(NDArray array, IdArray lengths) { std::pair ret; ATEN_XPU_SWITCH(array->ctx.device_type, XPU, "ConcatSlices", { ATEN_DTYPE_SWITCH(array->dtype, DType, "array", { ATEN_ID_TYPE_SWITCH(lengths->dtype, IdType, { ret = impl::ConcatSlices(array, lengths); }); }); }); return ret; } IdArray CumSum(IdArray array, bool prepend_zero) { IdArray ret; ATEN_XPU_SWITCH_CUDA(array->ctx.device_type, XPU, "CumSum", { ATEN_ID_TYPE_SWITCH(array->dtype, IdType, { ret = impl::CumSum(array, prepend_zero); }); }); return ret; } IdArray NonZero(NDArray array) { IdArray ret; ATEN_XPU_SWITCH_CUDA(array->ctx.device_type, XPU, "NonZero", { ATEN_ID_TYPE_SWITCH( array->dtype, DType, { ret = impl::NonZero(array); }); }); return ret; } std::pair Sort(IdArray array, const int num_bits) { if (array.NumElements() == 0) { IdArray idx = NewIdArray(0, array->ctx, 64); return std::make_pair(array, idx); } std::pair ret; ATEN_XPU_SWITCH_CUDA(array->ctx.device_type, XPU, "Sort", { ATEN_ID_TYPE_SWITCH(array->dtype, IdType, { ret = impl::Sort(array, num_bits); }); }); return ret; } std::string ToDebugString(NDArray array) { std::ostringstream oss; NDArray a = array.CopyTo(DGLContext{kDGLCPU, 0}); oss << "array(["; ATEN_DTYPE_SWITCH(a->dtype, DType, "array", { for (int64_t i = 0; i < std::min(a.NumElements(), 10L); ++i) { oss << a.Ptr()[i] << ", "; } }); if (a.NumElements() > 10) oss << "..."; oss << "], dtype=" << array->dtype << ", ctx=" << array->ctx << ")"; return oss.str(); } ///////////////////////// CSR routines ////////////////////////// bool CSRIsNonZero(CSRMatrix csr, int64_t row, int64_t col) { CHECK(row >= 0 && row < csr.num_rows) << "Invalid row index: " << row; CHECK(col >= 0 && col < csr.num_cols) << "Invalid col index: " << col; bool ret = false; ATEN_CSR_SWITCH_CUDA(csr, XPU, IdType, "CSRIsNonZero", { ret = impl::CSRIsNonZero(csr, row, col); }); return ret; } NDArray CSRIsNonZero(CSRMatrix csr, NDArray row, NDArray col) { NDArray ret; CHECK_SAME_DTYPE(csr.indices, row); CHECK_SAME_DTYPE(csr.indices, col); CHECK_SAME_CONTEXT(row, col); ATEN_CSR_SWITCH_CUDA_UVA(csr, row, XPU, IdType, "CSRIsNonZero", { ret = impl::CSRIsNonZero(csr, row, col); }); return ret; } bool CSRHasDuplicate(CSRMatrix csr) { bool ret = false; ATEN_CSR_SWITCH_CUDA(csr, XPU, IdType, "CSRHasDuplicate", { ret = impl::CSRHasDuplicate(csr); }); return ret; } int64_t CSRGetRowNNZ(CSRMatrix csr, int64_t row) { CHECK(row >= 0 && row < csr.num_rows) << "Invalid row index: " << row; int64_t ret = 0; ATEN_CSR_SWITCH_CUDA(csr, XPU, IdType, "CSRGetRowNNZ", { ret = impl::CSRGetRowNNZ(csr, row); }); return ret; } NDArray CSRGetRowNNZ(CSRMatrix csr, NDArray row) { NDArray ret; CHECK_SAME_DTYPE(csr.indices, row); ATEN_CSR_SWITCH_CUDA_UVA(csr, row, XPU, IdType, "CSRGetRowNNZ", { ret = impl::CSRGetRowNNZ(csr, row); }); return ret; } NDArray CSRGetRowColumnIndices(CSRMatrix csr, int64_t row) { CHECK(row >= 0 && row < csr.num_rows) << "Invalid row index: " << row; NDArray ret; ATEN_CSR_SWITCH_CUDA(csr, XPU, IdType, "CSRGetRowColumnIndices", { ret = impl::CSRGetRowColumnIndices(csr, row); }); return ret; } NDArray CSRGetRowData(CSRMatrix csr, int64_t row) { CHECK(row >= 0 && row < csr.num_rows) << "Invalid row index: " << row; NDArray ret; ATEN_CSR_SWITCH_CUDA(csr, XPU, IdType, "CSRGetRowData", { ret = impl::CSRGetRowData(csr, row); }); return ret; } bool CSRIsSorted(CSRMatrix csr) { if (csr.indices->shape[0] <= 1) return true; bool ret = false; ATEN_CSR_SWITCH_CUDA(csr, XPU, IdType, "CSRIsSorted", { ret = impl::CSRIsSorted(csr); }); return ret; } NDArray CSRGetData(CSRMatrix csr, NDArray rows, NDArray cols) { NDArray ret; CHECK_SAME_DTYPE(csr.indices, rows); CHECK_SAME_DTYPE(csr.indices, cols); CHECK_SAME_CONTEXT(rows, cols); ATEN_CSR_SWITCH_CUDA_UVA(csr, rows, XPU, IdType, "CSRGetData", { ret = impl::CSRGetData(csr, rows, cols); }); return ret; } template NDArray CSRGetData( CSRMatrix csr, NDArray rows, NDArray cols, NDArray weights, DType filler) { NDArray ret; CHECK_SAME_DTYPE(csr.indices, rows); CHECK_SAME_DTYPE(csr.indices, cols); CHECK_SAME_CONTEXT(rows, cols); CHECK_SAME_CONTEXT(rows, weights); ATEN_CSR_SWITCH_CUDA_UVA(csr, rows, XPU, IdType, "CSRGetData", { ret = impl::CSRGetData(csr, rows, cols, weights, filler); }); return ret; } runtime::NDArray CSRGetFloatingData( CSRMatrix csr, runtime::NDArray rows, runtime::NDArray cols, runtime::NDArray weights, double filler) { if (weights->dtype.bits == 64) { return CSRGetData(csr, rows, cols, weights, filler); } else { CHECK(weights->dtype.bits == 32) << "CSRGetFloatingData only supports 32 or 64 bits floaring number"; return CSRGetData(csr, rows, cols, weights, filler); } } template NDArray CSRGetData( CSRMatrix csr, NDArray rows, NDArray cols, NDArray weights, float filler); template NDArray CSRGetData( CSRMatrix csr, NDArray rows, NDArray cols, NDArray weights, double filler); std::vector CSRGetDataAndIndices( CSRMatrix csr, NDArray rows, NDArray cols) { CHECK_SAME_DTYPE(csr.indices, rows); CHECK_SAME_DTYPE(csr.indices, cols); CHECK_SAME_CONTEXT(rows, cols); std::vector ret; ATEN_CSR_SWITCH_CUDA_UVA(csr, rows, XPU, IdType, "CSRGetDataAndIndices", { ret = impl::CSRGetDataAndIndices(csr, rows, cols); }); return ret; } CSRMatrix CSRTranspose(CSRMatrix csr) { CSRMatrix ret; ATEN_XPU_SWITCH_CUDA(csr.indptr->ctx.device_type, XPU, "CSRTranspose", { ATEN_ID_TYPE_SWITCH(csr.indptr->dtype, IdType, { ret = impl::CSRTranspose(csr); }); }); return ret; } COOMatrix CSRToCOO(CSRMatrix csr, bool data_as_order) { COOMatrix ret; if (data_as_order) { ATEN_XPU_SWITCH_CUDA( csr.indptr->ctx.device_type, XPU, "CSRToCOODataAsOrder", { ATEN_ID_TYPE_SWITCH(csr.indptr->dtype, IdType, { ret = impl::CSRToCOODataAsOrder(csr); }); }); } else { ATEN_XPU_SWITCH_CUDA(csr.indptr->ctx.device_type, XPU, "CSRToCOO", { ATEN_ID_TYPE_SWITCH(csr.indptr->dtype, IdType, { ret = impl::CSRToCOO(csr); }); }); } return ret; } CSRMatrix CSRSliceRows(CSRMatrix csr, int64_t start, int64_t end) { CHECK(start >= 0 && start < csr.num_rows) << "Invalid start index: " << start; CHECK(end >= 0 && end <= csr.num_rows) << "Invalid end index: " << end; CHECK_GE(end, start); CSRMatrix ret; ATEN_CSR_SWITCH_CUDA(csr, XPU, IdType, "CSRSliceRows", { ret = impl::CSRSliceRows(csr, start, end); }); return ret; } CSRMatrix CSRSliceRows(CSRMatrix csr, NDArray rows) { CHECK_SAME_DTYPE(csr.indices, rows); CSRMatrix ret; ATEN_CSR_SWITCH_CUDA_UVA(csr, rows, XPU, IdType, "CSRSliceRows", { ret = impl::CSRSliceRows(csr, rows); }); return ret; } CSRMatrix CSRSliceMatrix(CSRMatrix csr, NDArray rows, NDArray cols) { CHECK_SAME_DTYPE(csr.indices, rows); CHECK_SAME_DTYPE(csr.indices, cols); CHECK_SAME_CONTEXT(rows, cols); CSRMatrix ret; ATEN_CSR_SWITCH_CUDA_UVA(csr, rows, XPU, IdType, "CSRSliceMatrix", { ret = impl::CSRSliceMatrix(csr, rows, cols); }); return ret; } void CSRSort_(CSRMatrix* csr) { if (csr->sorted) return; ATEN_CSR_SWITCH_CUDA( *csr, XPU, IdType, "CSRSort_", { impl::CSRSort_(csr); }); } std::pair CSRSortByTag( const CSRMatrix& csr, IdArray tag, int64_t num_tags) { CHECK_EQ(csr.indices->shape[0], tag->shape[0]) << "The length of the tag array should be equal to the number of " "non-zero data."; CHECK_SAME_CONTEXT(csr.indices, tag); CHECK_INT(tag, "tag"); std::pair ret; ATEN_CSR_SWITCH(csr, XPU, IdType, "CSRSortByTag", { ATEN_ID_TYPE_SWITCH(tag->dtype, TagType, { ret = impl::CSRSortByTag(csr, tag, num_tags); }); }); return ret; } CSRMatrix CSRReorder( CSRMatrix csr, runtime::NDArray new_row_ids, runtime::NDArray new_col_ids) { CSRMatrix ret; ATEN_CSR_SWITCH(csr, XPU, IdType, "CSRReorder", { ret = impl::CSRReorder(csr, new_row_ids, new_col_ids); }); return ret; } CSRMatrix CSRRemove(CSRMatrix csr, IdArray entries) { CSRMatrix ret; ATEN_CSR_SWITCH(csr, XPU, IdType, "CSRRemove", { ret = impl::CSRRemove(csr, entries); }); return ret; } std::pair CSRLaborSampling( CSRMatrix mat, IdArray rows, int64_t num_samples, FloatArray prob, int importance_sampling, IdArray random_seed, float seed2_contribution, IdArray NIDs) { std::pair ret; ATEN_CSR_SWITCH_CUDA_UVA(mat, rows, XPU, IdType, "CSRLaborSampling", { const auto dtype = IsNullArray(prob) ? DGLDataTypeTraits::dtype : prob->dtype; ATEN_FLOAT_TYPE_SWITCH(dtype, FloatType, "probability", { ret = impl::CSRLaborSampling( mat, rows, num_samples, prob, importance_sampling, random_seed, seed2_contribution, NIDs); }); }); return ret; } COOMatrix CSRRowWiseSampling( CSRMatrix mat, IdArray rows, int64_t num_samples, NDArray prob_or_mask, bool replace) { COOMatrix ret; if (IsNullArray(prob_or_mask)) { ATEN_CSR_SWITCH_CUDA_UVA( mat, rows, XPU, IdType, "CSRRowWiseSamplingUniform", { ret = impl::CSRRowWiseSamplingUniform( mat, rows, num_samples, replace); }); } else { // prob_or_mask is pinned and rows on GPU is valid CHECK_VALID_CONTEXT(prob_or_mask, rows); ATEN_CSR_SWITCH_CUDA_UVA(mat, rows, XPU, IdType, "CSRRowWiseSampling", { CHECK(!(prob_or_mask->dtype.bits == 8 && XPU == kDGLCUDA)) << "GPU sampling with masks is currently not supported yet."; ATEN_FLOAT_INT8_UINT8_TYPE_SWITCH( prob_or_mask->dtype, FloatType, "probability or mask", { ret = impl::CSRRowWiseSampling( mat, rows, num_samples, prob_or_mask, replace); }); }); } return ret; } template std::pair CSRRowWiseSamplingFused( CSRMatrix mat, IdArray rows, IdArray seed_mapping, std::vector* new_seed_nodes, int64_t num_samples, NDArray prob_or_mask, bool replace) { std::pair ret; if (IsNullArray(prob_or_mask)) { ATEN_XPU_SWITCH( rows->ctx.device_type, XPU, "CSRRowWiseSamplingUniformFused", { ret = impl::CSRRowWiseSamplingUniformFused( mat, rows, seed_mapping, new_seed_nodes, num_samples, replace); }); } else { CHECK_VALID_CONTEXT(prob_or_mask, rows); ATEN_XPU_SWITCH(rows->ctx.device_type, XPU, "CSRRowWiseSamplingFused", { ATEN_FLOAT_INT8_UINT8_TYPE_SWITCH( prob_or_mask->dtype, FloatType, "probability or mask", { ret = impl::CSRRowWiseSamplingFused< XPU, IdType, FloatType, map_seed_nodes>( mat, rows, seed_mapping, new_seed_nodes, num_samples, prob_or_mask, replace); }); }); } return ret; } template std::pair CSRRowWiseSamplingFused( CSRMatrix, IdArray, IdArray, std::vector*, int64_t, NDArray, bool); template std::pair CSRRowWiseSamplingFused( CSRMatrix, IdArray, IdArray, std::vector*, int64_t, NDArray, bool); template std::pair CSRRowWiseSamplingFused( CSRMatrix, IdArray, IdArray, std::vector*, int64_t, NDArray, bool); template std::pair CSRRowWiseSamplingFused( CSRMatrix, IdArray, IdArray, std::vector*, int64_t, NDArray, bool); COOMatrix CSRRowWisePerEtypeSampling( CSRMatrix mat, IdArray rows, const std::vector& eid2etype_offset, const std::vector& num_samples, const std::vector& prob_or_mask, bool replace, bool rowwise_etype_sorted) { COOMatrix ret; CHECK(prob_or_mask.size() > 0) << "probability or mask array is empty"; ATEN_CSR_SWITCH(mat, XPU, IdType, "CSRRowWisePerEtypeSampling", { if (std::all_of(prob_or_mask.begin(), prob_or_mask.end(), IsNullArray)) { ret = impl::CSRRowWisePerEtypeSamplingUniform( mat, rows, eid2etype_offset, num_samples, replace, rowwise_etype_sorted); } else { ATEN_FLOAT_INT8_UINT8_TYPE_SWITCH( prob_or_mask[0]->dtype, DType, "probability or mask", { ret = impl::CSRRowWisePerEtypeSampling( mat, rows, eid2etype_offset, num_samples, prob_or_mask, replace, rowwise_etype_sorted); }); } }); return ret; } COOMatrix CSRRowWiseTopk( CSRMatrix mat, IdArray rows, int64_t k, NDArray weight, bool ascending) { COOMatrix ret; ATEN_CSR_SWITCH(mat, XPU, IdType, "CSRRowWiseTopk", { ATEN_DTYPE_SWITCH(weight->dtype, DType, "weight", { ret = impl::CSRRowWiseTopk( mat, rows, k, weight, ascending); }); }); return ret; } COOMatrix CSRRowWiseSamplingBiased( CSRMatrix mat, IdArray rows, int64_t num_samples, NDArray tag_offset, FloatArray bias, bool replace) { COOMatrix ret; ATEN_CSR_SWITCH(mat, XPU, IdType, "CSRRowWiseSamplingBiased", { ATEN_FLOAT_TYPE_SWITCH(bias->dtype, FloatType, "bias", { ret = impl::CSRRowWiseSamplingBiased( mat, rows, num_samples, tag_offset, bias, replace); }); }); return ret; } std::pair CSRGlobalUniformNegativeSampling( const CSRMatrix& csr, int64_t num_samples, int num_trials, bool exclude_self_loops, bool replace, double redundancy) { CHECK_GT(num_samples, 0) << "Number of samples must be positive"; CHECK_GT(num_trials, 0) << "Number of sampling trials must be positive"; std::pair result; ATEN_CSR_SWITCH_CUDA(csr, XPU, IdType, "CSRGlobalUniformNegativeSampling", { result = impl::CSRGlobalUniformNegativeSampling( csr, num_samples, num_trials, exclude_self_loops, replace, redundancy); }); return result; } CSRMatrix UnionCsr(const std::vector& csrs) { CSRMatrix ret; CHECK_GT(csrs.size(), 1) << "UnionCsr creates a union of multiple CSRMatrixes"; // sanity check for (size_t i = 1; i < csrs.size(); ++i) { CHECK_EQ(csrs[0].num_rows, csrs[i].num_rows) << "UnionCsr requires both CSRMatrix have same number of rows"; CHECK_EQ(csrs[0].num_cols, csrs[i].num_cols) << "UnionCsr requires both CSRMatrix have same number of cols"; CHECK_SAME_CONTEXT(csrs[0].indptr, csrs[i].indptr); CHECK_SAME_DTYPE(csrs[0].indptr, csrs[i].indptr); } ATEN_CSR_SWITCH(csrs[0], XPU, IdType, "UnionCsr", { ret = impl::UnionCsr(csrs); }); return ret; } std::tuple CSRToSimple(const CSRMatrix& csr) { std::tuple ret; CSRMatrix sorted_csr = (CSRIsSorted(csr)) ? csr : CSRSort(csr); ATEN_CSR_SWITCH(csr, XPU, IdType, "CSRToSimple", { ret = impl::CSRToSimple(sorted_csr); }); return ret; } ///////////////////////// COO routines ////////////////////////// bool COOIsNonZero(COOMatrix coo, int64_t row, int64_t col) { bool ret = false; ATEN_COO_SWITCH(coo, XPU, IdType, "COOIsNonZero", { ret = impl::COOIsNonZero(coo, row, col); }); return ret; } NDArray COOIsNonZero(COOMatrix coo, NDArray row, NDArray col) { NDArray ret; ATEN_COO_SWITCH(coo, XPU, IdType, "COOIsNonZero", { ret = impl::COOIsNonZero(coo, row, col); }); return ret; } bool COOHasDuplicate(COOMatrix coo) { bool ret = false; ATEN_COO_SWITCH(coo, XPU, IdType, "COOHasDuplicate", { ret = impl::COOHasDuplicate(coo); }); return ret; } int64_t COOGetRowNNZ(COOMatrix coo, int64_t row) { int64_t ret = 0; ATEN_COO_SWITCH_CUDA(coo, XPU, IdType, "COOGetRowNNZ", { ret = impl::COOGetRowNNZ(coo, row); }); return ret; } NDArray COOGetRowNNZ(COOMatrix coo, NDArray row) { NDArray ret; ATEN_COO_SWITCH_CUDA(coo, XPU, IdType, "COOGetRowNNZ", { ret = impl::COOGetRowNNZ(coo, row); }); return ret; } std::pair COOGetRowDataAndIndices( COOMatrix coo, int64_t row) { std::pair ret; ATEN_COO_SWITCH(coo, XPU, IdType, "COOGetRowDataAndIndices", { ret = impl::COOGetRowDataAndIndices(coo, row); }); return ret; } std::vector COOGetDataAndIndices( COOMatrix coo, NDArray rows, NDArray cols) { std::vector ret; ATEN_COO_SWITCH(coo, XPU, IdType, "COOGetDataAndIndices", { ret = impl::COOGetDataAndIndices(coo, rows, cols); }); return ret; } NDArray COOGetData(COOMatrix coo, NDArray rows, NDArray cols) { NDArray ret; ATEN_COO_SWITCH(coo, XPU, IdType, "COOGetData", { ret = impl::COOGetData(coo, rows, cols); }); return ret; } COOMatrix COOTranspose(COOMatrix coo) { return COOMatrix(coo.num_cols, coo.num_rows, coo.col, coo.row, coo.data); } CSRMatrix COOToCSR(COOMatrix coo) { CSRMatrix ret; ATEN_XPU_SWITCH_CUDA(coo.row->ctx.device_type, XPU, "COOToCSR", { ATEN_ID_TYPE_SWITCH( coo.row->dtype, IdType, { ret = impl::COOToCSR(coo); }); }); return ret; } COOMatrix COOSliceRows(COOMatrix coo, int64_t start, int64_t end) { COOMatrix ret; ATEN_COO_SWITCH(coo, XPU, IdType, "COOSliceRows", { ret = impl::COOSliceRows(coo, start, end); }); return ret; } COOMatrix COOSliceRows(COOMatrix coo, NDArray rows) { COOMatrix ret; ATEN_COO_SWITCH(coo, XPU, IdType, "COOSliceRows", { ret = impl::COOSliceRows(coo, rows); }); return ret; } COOMatrix COOSliceMatrix(COOMatrix coo, NDArray rows, NDArray cols) { COOMatrix ret; ATEN_COO_SWITCH(coo, XPU, IdType, "COOSliceMatrix", { ret = impl::COOSliceMatrix(coo, rows, cols); }); return ret; } void COOSort_(COOMatrix* mat, bool sort_column) { if ((mat->row_sorted && !sort_column) || mat->col_sorted) return; ATEN_XPU_SWITCH_CUDA(mat->row->ctx.device_type, XPU, "COOSort_", { ATEN_ID_TYPE_SWITCH(mat->row->dtype, IdType, { impl::COOSort_(mat, sort_column); }); }); } std::pair COOIsSorted(COOMatrix coo) { if (coo.row->shape[0] <= 1) return {true, true}; std::pair ret; ATEN_COO_SWITCH_CUDA(coo, XPU, IdType, "COOIsSorted", { ret = impl::COOIsSorted(coo); }); return ret; } COOMatrix COOReorder( COOMatrix coo, runtime::NDArray new_row_ids, runtime::NDArray new_col_ids) { COOMatrix ret; ATEN_COO_SWITCH(coo, XPU, IdType, "COOReorder", { ret = impl::COOReorder(coo, new_row_ids, new_col_ids); }); return ret; } COOMatrix COORemove(COOMatrix coo, IdArray entries) { COOMatrix ret; ATEN_COO_SWITCH(coo, XPU, IdType, "COORemove", { ret = impl::COORemove(coo, entries); }); return ret; } std::pair COOLaborSampling( COOMatrix mat, IdArray rows, int64_t num_samples, FloatArray prob, int importance_sampling, IdArray random_seed, float seed2_contribution, IdArray NIDs) { std::pair ret; ATEN_COO_SWITCH(mat, XPU, IdType, "COOLaborSampling", { const auto dtype = IsNullArray(prob) ? DGLDataTypeTraits::dtype : prob->dtype; ATEN_FLOAT_TYPE_SWITCH(dtype, FloatType, "probability", { ret = impl::COOLaborSampling( mat, rows, num_samples, prob, importance_sampling, random_seed, seed2_contribution, NIDs); }); }); return ret; } COOMatrix COORowWiseSampling( COOMatrix mat, IdArray rows, int64_t num_samples, NDArray prob_or_mask, bool replace) { COOMatrix ret; ATEN_COO_SWITCH(mat, XPU, IdType, "COORowWiseSampling", { if (IsNullArray(prob_or_mask)) { ret = impl::COORowWiseSamplingUniform( mat, rows, num_samples, replace); } else { ATEN_FLOAT_INT8_UINT8_TYPE_SWITCH( prob_or_mask->dtype, DType, "probability or mask", { ret = impl::COORowWiseSampling( mat, rows, num_samples, prob_or_mask, replace); }); } }); return ret; } COOMatrix COORowWisePerEtypeSampling( COOMatrix mat, IdArray rows, const std::vector& eid2etype_offset, const std::vector& num_samples, const std::vector& prob_or_mask, bool replace) { COOMatrix ret; CHECK(prob_or_mask.size() > 0) << "probability or mask array is empty"; ATEN_COO_SWITCH(mat, XPU, IdType, "COORowWisePerEtypeSampling", { if (std::all_of(prob_or_mask.begin(), prob_or_mask.end(), IsNullArray)) { ret = impl::COORowWisePerEtypeSamplingUniform( mat, rows, eid2etype_offset, num_samples, replace); } else { ATEN_FLOAT_INT8_UINT8_TYPE_SWITCH( prob_or_mask[0]->dtype, DType, "probability or mask", { ret = impl::COORowWisePerEtypeSampling( mat, rows, eid2etype_offset, num_samples, prob_or_mask, replace); }); } }); return ret; } COOMatrix COORowWiseTopk( COOMatrix mat, IdArray rows, int64_t k, FloatArray weight, bool ascending) { COOMatrix ret; ATEN_COO_SWITCH(mat, XPU, IdType, "COORowWiseTopk", { ATEN_DTYPE_SWITCH(weight->dtype, DType, "weight", { ret = impl::COORowWiseTopk( mat, rows, k, weight, ascending); }); }); return ret; } std::pair COOCoalesce(COOMatrix coo) { std::pair ret; ATEN_COO_SWITCH(coo, XPU, IdType, "COOCoalesce", { ret = impl::COOCoalesce(coo); }); return ret; } COOMatrix DisjointUnionCoo(const std::vector& coos) { COOMatrix ret; ATEN_XPU_SWITCH_CUDA(coos[0].row->ctx.device_type, XPU, "DisjointUnionCoo", { ATEN_ID_TYPE_SWITCH(coos[0].row->dtype, IdType, { ret = impl::DisjointUnionCoo(coos); }); }); return ret; } COOMatrix COOLineGraph(const COOMatrix& coo, bool backtracking) { COOMatrix ret; ATEN_COO_SWITCH(coo, XPU, IdType, "COOLineGraph", { ret = impl::COOLineGraph(coo, backtracking); }); return ret; } COOMatrix UnionCoo(const std::vector& coos) { COOMatrix ret; CHECK_GT(coos.size(), 1) << "UnionCoo creates a union of multiple COOMatrixes"; // sanity check for (size_t i = 1; i < coos.size(); ++i) { CHECK_EQ(coos[0].num_rows, coos[i].num_rows) << "UnionCoo requires both COOMatrix have same number of rows"; CHECK_EQ(coos[0].num_cols, coos[i].num_cols) << "UnionCoo requires both COOMatrix have same number of cols"; CHECK_SAME_CONTEXT(coos[0].row, coos[i].row); CHECK_SAME_DTYPE(coos[0].row, coos[i].row); } // we assume the number of coos is not large in common cases std::vector coo_row; std::vector coo_col; bool has_data = false; for (size_t i = 0; i < coos.size(); ++i) { coo_row.push_back(coos[i].row); coo_col.push_back(coos[i].col); has_data |= COOHasData(coos[i]); } IdArray row = Concat(coo_row); IdArray col = Concat(coo_col); IdArray data = NullArray(); if (has_data) { std::vector eid_data; eid_data.push_back( COOHasData(coos[0]) ? coos[0].data : Range( 0, coos[0].row->shape[0], coos[0].row->dtype.bits, coos[0].row->ctx)); int64_t num_edges = coos[0].row->shape[0]; for (size_t i = 1; i < coos.size(); ++i) { eid_data.push_back( COOHasData(coos[i]) ? coos[i].data + num_edges : Range( num_edges, num_edges + coos[i].row->shape[0], coos[i].row->dtype.bits, coos[i].row->ctx)); num_edges += coos[i].row->shape[0]; } data = Concat(eid_data); } return COOMatrix( coos[0].num_rows, coos[0].num_cols, row, col, data, false, false); } std::tuple COOToSimple(const COOMatrix& coo) { // coo column sorted const COOMatrix sorted_coo = COOSort(coo, true); const IdArray eids_shuffled = COOHasData(sorted_coo) ? sorted_coo.data : Range( 0, sorted_coo.row->shape[0], sorted_coo.row->dtype.bits, sorted_coo.row->ctx); const auto& coalesced_result = COOCoalesce(sorted_coo); const COOMatrix& coalesced_adj = coalesced_result.first; const IdArray& count = coalesced_result.second; /** * eids_shuffled actually already contains the mapping from old edge space to * the new one: * * * eids_shuffled[0:count[0]] indicates the original edge IDs that coalesced * into new edge #0. * * eids_shuffled[count[0]:count[0] + count[1]] indicates those that * coalesced into new edge #1. * * eids_shuffled[count[0] + count[1]:count[0] + count[1] + count[2]] * indicates those that coalesced into new edge #2. * * etc. * * Here, we need to translate eids_shuffled to an array "eids_remapped" such * that eids_remapped[i] indicates the new edge ID the old edge #i is mapped * to. The translation can simply be achieved by (in numpy code): * * new_eid_for_eids_shuffled = np.range(len(count)).repeat(count) * eids_remapped = np.zeros_like(new_eid_for_eids_shuffled) * eids_remapped[eids_shuffled] = new_eid_for_eids_shuffled */ const IdArray new_eids = Range( 0, coalesced_adj.row->shape[0], coalesced_adj.row->dtype.bits, coalesced_adj.row->ctx); const IdArray eids_remapped = Scatter(Repeat(new_eids, count), eids_shuffled); COOMatrix ret = COOMatrix( coalesced_adj.num_rows, coalesced_adj.num_cols, coalesced_adj.row, coalesced_adj.col, NullArray(), true, true); return std::make_tuple(ret, count, eids_remapped); } ///////////////////////// Graph Traverse routines ////////////////////////// Frontiers BFSNodesFrontiers(const CSRMatrix& csr, IdArray source) { Frontiers ret; CHECK_EQ(csr.indptr->ctx.device_type, source->ctx.device_type) << "Graph and source should in the same device context"; CHECK_EQ(csr.indices->dtype, source->dtype) << "Graph and source should in the same dtype"; CHECK_EQ(csr.num_rows, csr.num_cols) << "Graph traversal can only work on square-shaped CSR."; ATEN_XPU_SWITCH(source->ctx.device_type, XPU, "BFSNodesFrontiers", { ATEN_ID_TYPE_SWITCH(source->dtype, IdType, { ret = impl::BFSNodesFrontiers(csr, source); }); }); return ret; } Frontiers BFSEdgesFrontiers(const CSRMatrix& csr, IdArray source) { Frontiers ret; CHECK_EQ(csr.indptr->ctx.device_type, source->ctx.device_type) << "Graph and source should in the same device context"; CHECK_EQ(csr.indices->dtype, source->dtype) << "Graph and source should in the same dtype"; CHECK_EQ(csr.num_rows, csr.num_cols) << "Graph traversal can only work on square-shaped CSR."; ATEN_XPU_SWITCH(source->ctx.device_type, XPU, "BFSEdgesFrontiers", { ATEN_ID_TYPE_SWITCH(source->dtype, IdType, { ret = impl::BFSEdgesFrontiers(csr, source); }); }); return ret; } Frontiers TopologicalNodesFrontiers(const CSRMatrix& csr) { Frontiers ret; CHECK_EQ(csr.num_rows, csr.num_cols) << "Graph traversal can only work on square-shaped CSR."; ATEN_XPU_SWITCH( csr.indptr->ctx.device_type, XPU, "TopologicalNodesFrontiers", { ATEN_ID_TYPE_SWITCH(csr.indices->dtype, IdType, { ret = impl::TopologicalNodesFrontiers(csr); }); }); return ret; } Frontiers DGLDFSEdges(const CSRMatrix& csr, IdArray source) { Frontiers ret; CHECK_EQ(csr.indptr->ctx.device_type, source->ctx.device_type) << "Graph and source should in the same device context"; CHECK_EQ(csr.indices->dtype, source->dtype) << "Graph and source should in the same dtype"; CHECK_EQ(csr.num_rows, csr.num_cols) << "Graph traversal can only work on square-shaped CSR."; ATEN_XPU_SWITCH(source->ctx.device_type, XPU, "DGLDFSEdges", { ATEN_ID_TYPE_SWITCH(source->dtype, IdType, { ret = impl::DGLDFSEdges(csr, source); }); }); return ret; } Frontiers DGLDFSLabeledEdges( const CSRMatrix& csr, IdArray source, const bool has_reverse_edge, const bool has_nontree_edge, const bool return_labels) { Frontiers ret; CHECK_EQ(csr.indptr->ctx.device_type, source->ctx.device_type) << "Graph and source should in the same device context"; CHECK_EQ(csr.indices->dtype, source->dtype) << "Graph and source should in the same dtype"; CHECK_EQ(csr.num_rows, csr.num_cols) << "Graph traversal can only work on square-shaped CSR."; ATEN_XPU_SWITCH(source->ctx.device_type, XPU, "DGLDFSLabeledEdges", { ATEN_ID_TYPE_SWITCH(source->dtype, IdType, { ret = impl::DGLDFSLabeledEdges( csr, source, has_reverse_edge, has_nontree_edge, return_labels); }); }); return ret; } void CSRSpMM( const std::string& op, const std::string& reduce, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux) { const auto& bcast = CalcBcastOff(op, ufeat, efeat); ATEN_XPU_SWITCH_CUDA(csr.indptr->ctx.device_type, XPU, "SpMM", { ATEN_ID_TYPE_SWITCH(csr.indptr->dtype, IdType, { ATEN_FLOAT_TYPE_SWITCH_16BITS(out->dtype, Dtype, XPU, "Feature data", { SpMMCsr( op, reduce, bcast, csr, ufeat, efeat, out, out_aux); }); }); }); } void CSRSpMM( const char* op, const char* reduce, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux) { CSRSpMM( std::string(op), std::string(reduce), csr, ufeat, efeat, out, out_aux); } void CSRSDDMM( const std::string& op, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out, int lhs_target, int rhs_target) { const auto& bcast = CalcBcastOff(op, ufeat, efeat); ATEN_XPU_SWITCH_CUDA(csr.indptr->ctx.device_type, XPU, "SDDMM", { ATEN_ID_TYPE_SWITCH(csr.indptr->dtype, IdType, { ATEN_FLOAT_TYPE_SWITCH_16BITS(out->dtype, Dtype, XPU, "Feature data", { SDDMMCsr( op, bcast, csr, ufeat, efeat, out, lhs_target, rhs_target); }); }); }); } void CSRSDDMM( const char* op, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out, int lhs_target, int rhs_target) { return CSRSDDMM( std::string(op), csr, ufeat, efeat, out, lhs_target, rhs_target); } void COOSpMM( const std::string& op, const std::string& reduce, const COOMatrix& coo, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux) { const auto& bcast = CalcBcastOff(op, ufeat, efeat); ATEN_XPU_SWITCH_CUDA(coo.row->ctx.device_type, XPU, "SpMM", { ATEN_ID_TYPE_SWITCH(coo.row->dtype, IdType, { ATEN_FLOAT_TYPE_SWITCH_16BITS(out->dtype, Dtype, XPU, "Feature data", { SpMMCoo( op, reduce, bcast, coo, ufeat, efeat, out, out_aux); }); }); }); } void COOSpMM( const char* op, const char* reduce, const COOMatrix& coo, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux) { COOSpMM( std::string(op), std::string(reduce), coo, ufeat, efeat, out, out_aux); } void COOSDDMM( const std::string& op, const COOMatrix& coo, NDArray ufeat, NDArray efeat, NDArray out, int lhs_target, int rhs_target) { const auto& bcast = CalcBcastOff(op, ufeat, efeat); ATEN_XPU_SWITCH_CUDA(coo.row->ctx.device_type, XPU, "SDDMM", { ATEN_ID_TYPE_SWITCH(coo.row->dtype, IdType, { ATEN_FLOAT_TYPE_SWITCH_16BITS(out->dtype, Dtype, XPU, "Feature data", { SDDMMCoo( op, bcast, coo, ufeat, efeat, out, lhs_target, rhs_target); }); }); }); } void COOSDDMM( const char* op, const COOMatrix& coo, NDArray ufeat, NDArray efeat, NDArray out, int lhs_target, int rhs_target) { COOSDDMM(std::string(op), coo, ufeat, efeat, out, lhs_target, rhs_target); } ///////////////////////// C APIs ///////////////////////// DGL_REGISTER_GLOBAL("ndarray._CAPI_DGLSparseMatrixGetFormat") .set_body([](DGLArgs args, DGLRetValue* rv) { SparseMatrixRef spmat = args[0]; *rv = spmat->format; }); DGL_REGISTER_GLOBAL("ndarray._CAPI_DGLSparseMatrixGetNumRows") .set_body([](DGLArgs args, DGLRetValue* rv) { SparseMatrixRef spmat = args[0]; *rv = spmat->num_rows; }); DGL_REGISTER_GLOBAL("ndarray._CAPI_DGLSparseMatrixGetNumCols") .set_body([](DGLArgs args, DGLRetValue* rv) { SparseMatrixRef spmat = args[0]; *rv = spmat->num_cols; }); DGL_REGISTER_GLOBAL("ndarray._CAPI_DGLSparseMatrixGetIndices") .set_body([](DGLArgs args, DGLRetValue* rv) { SparseMatrixRef spmat = args[0]; const int64_t i = args[1]; *rv = spmat->indices[i]; }); DGL_REGISTER_GLOBAL("ndarray._CAPI_DGLSparseMatrixGetFlags") .set_body([](DGLArgs args, DGLRetValue* rv) { SparseMatrixRef spmat = args[0]; List flags; for (bool flg : spmat->flags) { flags.push_back(Value(MakeValue(flg))); } *rv = flags; }); DGL_REGISTER_GLOBAL("ndarray._CAPI_DGLCreateSparseMatrix") .set_body([](DGLArgs args, DGLRetValue* rv) { const int32_t format = args[0]; const int64_t nrows = args[1]; const int64_t ncols = args[2]; const List indices = args[3]; const List flags = args[4]; std::shared_ptr spmat(new SparseMatrix( format, nrows, ncols, ListValueToVector(indices), ListValueToVector(flags))); *rv = SparseMatrixRef(spmat); }); DGL_REGISTER_GLOBAL("ndarray._CAPI_DGLExistSharedMemArray") .set_body([](DGLArgs args, DGLRetValue* rv) { const std::string name = args[0]; #ifndef _WIN32 *rv = SharedMemory::Exist(name); #else *rv = false; #endif // _WIN32 }); DGL_REGISTER_GLOBAL("ndarray._CAPI_DGLArrayCastToSigned") .set_body([](DGLArgs args, DGLRetValue* rv) { NDArray array = args[0]; CHECK_EQ(array->dtype.code, kDGLUInt); std::vector shape(array->shape, array->shape + array->ndim); DGLDataType dtype = array->dtype; dtype.code = kDGLInt; *rv = array.CreateView(shape, dtype, 0); }); } // namespace aten } // namespace dgl std::ostream& operator<<(std::ostream& os, dgl::runtime::NDArray array) { return os << dgl::aten::ToDebugString(array); } ================================================ FILE: src/array/array_arith.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file array/array_aritch.cc * @brief DGL array arithmetic operations */ #include #include #include #include "../c_api_common.h" #include "./arith.h" #include "./array_op.h" using namespace dgl::runtime; namespace dgl { namespace aten { // Generate operators with both operations being NDArrays. #define BINARY_ELEMENT_OP(name, op) \ IdArray name(IdArray lhs, IdArray rhs) { \ IdArray ret; \ CHECK_SAME_DTYPE(lhs, rhs); \ CHECK_SAME_CONTEXT(lhs, rhs); \ ATEN_XPU_SWITCH_CUDA(lhs->ctx.device_type, XPU, #name, { \ ATEN_ID_TYPE_SWITCH(lhs->dtype, IdType, { \ ret = impl::BinaryElewise(lhs, rhs); \ }); \ }); \ return ret; \ } // Generate operators with only lhs being NDArray. #define BINARY_ELEMENT_OP_L(name, op) \ IdArray name(IdArray lhs, int64_t rhs) { \ IdArray ret; \ ATEN_XPU_SWITCH_CUDA(lhs->ctx.device_type, XPU, #name, { \ ATEN_ID_TYPE_SWITCH(lhs->dtype, IdType, { \ ret = impl::BinaryElewise(lhs, rhs); \ }); \ }); \ return ret; \ } // Generate operators with only lhs being NDArray. #define BINARY_ELEMENT_OP_R(name, op) \ IdArray name(int64_t lhs, IdArray rhs) { \ IdArray ret; \ ATEN_XPU_SWITCH_CUDA(rhs->ctx.device_type, XPU, #name, { \ ATEN_ID_TYPE_SWITCH(rhs->dtype, IdType, { \ ret = impl::BinaryElewise(lhs, rhs); \ }); \ }); \ return ret; \ } // Generate operators with only lhs being NDArray. #define UNARY_ELEMENT_OP(name, op) \ IdArray name(IdArray lhs) { \ IdArray ret; \ ATEN_XPU_SWITCH_CUDA(lhs->ctx.device_type, XPU, #name, { \ ATEN_ID_TYPE_SWITCH(lhs->dtype, IdType, { \ ret = impl::UnaryElewise(lhs); \ }); \ }); \ return ret; \ } BINARY_ELEMENT_OP(Add, Add) BINARY_ELEMENT_OP(Sub, Sub) BINARY_ELEMENT_OP(Mul, Mul) BINARY_ELEMENT_OP(Div, Div) BINARY_ELEMENT_OP(Mod, Mod) BINARY_ELEMENT_OP(GT, GT) BINARY_ELEMENT_OP(LT, LT) BINARY_ELEMENT_OP(GE, GE) BINARY_ELEMENT_OP(LE, LE) BINARY_ELEMENT_OP(EQ, EQ) BINARY_ELEMENT_OP(NE, NE) BINARY_ELEMENT_OP_L(Add, Add) BINARY_ELEMENT_OP_L(Sub, Sub) BINARY_ELEMENT_OP_L(Mul, Mul) BINARY_ELEMENT_OP_L(Div, Div) BINARY_ELEMENT_OP_L(Mod, Mod) BINARY_ELEMENT_OP_L(GT, GT) BINARY_ELEMENT_OP_L(LT, LT) BINARY_ELEMENT_OP_L(GE, GE) BINARY_ELEMENT_OP_L(LE, LE) BINARY_ELEMENT_OP_L(EQ, EQ) BINARY_ELEMENT_OP_L(NE, NE) BINARY_ELEMENT_OP_R(Add, Add) BINARY_ELEMENT_OP_R(Sub, Sub) BINARY_ELEMENT_OP_R(Mul, Mul) BINARY_ELEMENT_OP_R(Div, Div) BINARY_ELEMENT_OP_R(Mod, Mod) BINARY_ELEMENT_OP_R(GT, GT) BINARY_ELEMENT_OP_R(LT, LT) BINARY_ELEMENT_OP_R(GE, GE) BINARY_ELEMENT_OP_R(LE, LE) BINARY_ELEMENT_OP_R(EQ, EQ) BINARY_ELEMENT_OP_R(NE, NE) UNARY_ELEMENT_OP(Neg, Neg) } // namespace aten } // namespace dgl ///////////////// Operator overloading for NDArray ///////////////// NDArray operator+(const NDArray& lhs, const NDArray& rhs) { return dgl::aten::Add(lhs, rhs); } NDArray operator-(const NDArray& lhs, const NDArray& rhs) { return dgl::aten::Sub(lhs, rhs); } NDArray operator*(const NDArray& lhs, const NDArray& rhs) { return dgl::aten::Mul(lhs, rhs); } NDArray operator/(const NDArray& lhs, const NDArray& rhs) { return dgl::aten::Div(lhs, rhs); } NDArray operator%(const NDArray& lhs, const NDArray& rhs) { return dgl::aten::Mod(lhs, rhs); } NDArray operator+(const NDArray& lhs, int64_t rhs) { return dgl::aten::Add(lhs, rhs); } NDArray operator-(const NDArray& lhs, int64_t rhs) { return dgl::aten::Sub(lhs, rhs); } NDArray operator*(const NDArray& lhs, int64_t rhs) { return dgl::aten::Mul(lhs, rhs); } NDArray operator/(const NDArray& lhs, int64_t rhs) { return dgl::aten::Div(lhs, rhs); } NDArray operator%(const NDArray& lhs, int64_t rhs) { return dgl::aten::Mod(lhs, rhs); } NDArray operator+(int64_t lhs, const NDArray& rhs) { return dgl::aten::Add(lhs, rhs); } NDArray operator-(int64_t lhs, const NDArray& rhs) { return dgl::aten::Sub(lhs, rhs); } NDArray operator*(int64_t lhs, const NDArray& rhs) { return dgl::aten::Mul(lhs, rhs); } NDArray operator/(int64_t lhs, const NDArray& rhs) { return dgl::aten::Div(lhs, rhs); } NDArray operator%(int64_t lhs, const NDArray& rhs) { return dgl::aten::Mod(lhs, rhs); } NDArray operator-(const NDArray& array) { return dgl::aten::Neg(array); } NDArray operator>(const NDArray& lhs, const NDArray& rhs) { return dgl::aten::GT(lhs, rhs); } NDArray operator<(const NDArray& lhs, const NDArray& rhs) { return dgl::aten::LT(lhs, rhs); } NDArray operator>=(const NDArray& lhs, const NDArray& rhs) { return dgl::aten::GE(lhs, rhs); } NDArray operator<=(const NDArray& lhs, const NDArray& rhs) { return dgl::aten::LE(lhs, rhs); } NDArray operator==(const NDArray& lhs, const NDArray& rhs) { return dgl::aten::EQ(lhs, rhs); } NDArray operator!=(const NDArray& lhs, const NDArray& rhs) { return dgl::aten::NE(lhs, rhs); } NDArray operator>(const NDArray& lhs, int64_t rhs) { return dgl::aten::GT(lhs, rhs); } NDArray operator<(const NDArray& lhs, int64_t rhs) { return dgl::aten::LT(lhs, rhs); } NDArray operator>=(const NDArray& lhs, int64_t rhs) { return dgl::aten::GE(lhs, rhs); } NDArray operator<=(const NDArray& lhs, int64_t rhs) { return dgl::aten::LE(lhs, rhs); } NDArray operator==(const NDArray& lhs, int64_t rhs) { return dgl::aten::EQ(lhs, rhs); } NDArray operator!=(const NDArray& lhs, int64_t rhs) { return dgl::aten::NE(lhs, rhs); } NDArray operator>(int64_t lhs, const NDArray& rhs) { return dgl::aten::GT(lhs, rhs); } NDArray operator<(int64_t lhs, const NDArray& rhs) { return dgl::aten::LT(lhs, rhs); } NDArray operator>=(int64_t lhs, const NDArray& rhs) { return dgl::aten::GE(lhs, rhs); } NDArray operator<=(int64_t lhs, const NDArray& rhs) { return dgl::aten::LE(lhs, rhs); } NDArray operator==(int64_t lhs, const NDArray& rhs) { return dgl::aten::EQ(lhs, rhs); } NDArray operator!=(int64_t lhs, const NDArray& rhs) { return dgl::aten::NE(lhs, rhs); } ================================================ FILE: src/array/array_op.h ================================================ /** * Copyright (c) 2019-2022 by Contributors * @file array/array_op.h * @brief Array operator templates */ #ifndef DGL_ARRAY_ARRAY_OP_H_ #define DGL_ARRAY_ARRAY_OP_H_ #include #include #include #include #include namespace dgl { namespace aten { namespace impl { template IdArray Full(IdType val, int64_t length, DGLContext ctx); template IdArray Range(IdType low, IdType high, DGLContext ctx); template IdArray AsNumBits(IdArray arr, uint8_t bits); template IdArray BinaryElewise(IdArray lhs, IdArray rhs); template IdArray BinaryElewise(IdArray lhs, IdType rhs); template IdArray BinaryElewise(IdType lhs, IdArray rhs); template IdArray UnaryElewise(IdArray array); template NDArray IndexSelect(NDArray array, IdArray index); template DType IndexSelect(NDArray array, int64_t index); template IdArray NonZero(BoolArray bool_arr); template IdArray NonZero(NDArray array); template std::pair Sort(IdArray array, int num_bits); template NDArray Scatter(NDArray array, IdArray indices); template void Scatter_(IdArray index, NDArray value, NDArray out); template NDArray Repeat(NDArray array, IdArray repeats); template IdArray Relabel_(const std::vector& arrays); template NDArray Concat(const std::vector& arrays); template std::tuple Pack(NDArray array, DType pad_value); template std::pair ConcatSlices(NDArray array, IdArray lengths); template IdArray CumSum(IdArray array, bool prepend_zero); // sparse arrays template bool CSRIsNonZero(CSRMatrix csr, int64_t row, int64_t col); template runtime::NDArray CSRIsNonZero( CSRMatrix csr, runtime::NDArray row, runtime::NDArray col); template bool CSRHasDuplicate(CSRMatrix csr); template int64_t CSRGetRowNNZ(CSRMatrix csr, int64_t row); template runtime::NDArray CSRGetRowNNZ(CSRMatrix csr, runtime::NDArray row); template runtime::NDArray CSRGetRowColumnIndices(CSRMatrix csr, int64_t row); template runtime::NDArray CSRGetRowData(CSRMatrix csr, int64_t row); template bool CSRIsSorted(CSRMatrix csr); template runtime::NDArray CSRGetData( CSRMatrix csr, runtime::NDArray rows, runtime::NDArray cols, bool return_eids, runtime::NDArray weights, DType filler); template runtime::NDArray CSRGetData( CSRMatrix csr, runtime::NDArray rows, runtime::NDArray cols, runtime::NDArray weights, DType filler) { return CSRGetData( csr, rows, cols, false, weights, filler); } template NDArray CSRGetData(CSRMatrix csr, NDArray rows, NDArray cols) { return CSRGetData( csr, rows, cols, true, NullArray(rows->dtype), -1); } template std::vector CSRGetDataAndIndices( CSRMatrix csr, runtime::NDArray rows, runtime::NDArray cols); template CSRMatrix CSRTranspose(CSRMatrix csr); // Convert CSR to COO template COOMatrix CSRToCOO(CSRMatrix csr); // Convert CSR to COO using data array as order template COOMatrix CSRToCOODataAsOrder(CSRMatrix csr); template CSRMatrix CSRSliceRows(CSRMatrix csr, int64_t start, int64_t end); template CSRMatrix CSRSliceRows(CSRMatrix csr, runtime::NDArray rows); template CSRMatrix CSRSliceMatrix( CSRMatrix csr, runtime::NDArray rows, runtime::NDArray cols); template void CSRSort_(CSRMatrix* csr); template std::pair CSRSortByTag( const CSRMatrix& csr, IdArray tag_array, int64_t num_tags); template CSRMatrix CSRReorder( CSRMatrix csr, runtime::NDArray new_row_ids, runtime::NDArray new_col_ids); template COOMatrix COOReorder( COOMatrix coo, runtime::NDArray new_row_ids, runtime::NDArray new_col_ids); template CSRMatrix CSRRemove(CSRMatrix csr, IdArray entries); template std::pair CSRLaborSampling( CSRMatrix mat, IdArray rows, int64_t num_samples, FloatArray prob, int importance_sampling, IdArray random_seed, float seed2_contribution, IdArray NIDs); // FloatType is the type of probability data. template COOMatrix CSRRowWiseSampling( CSRMatrix mat, IdArray rows, int64_t num_samples, NDArray prob_or_mask, bool replace); // FloatType is the type of probability data. template < DGLDeviceType XPU, typename IdxType, typename DType, bool map_seed_nodes> std::pair CSRRowWiseSamplingFused( CSRMatrix mat, IdArray rows, IdArray seed_mapping, std::vector* new_seed_nodes, int64_t num_samples, NDArray prob_or_mask, bool replace); // FloatType is the type of probability data. template COOMatrix CSRRowWisePerEtypeSampling( CSRMatrix mat, IdArray rows, const std::vector& eid2etype_offset, const std::vector& num_samples, const std::vector& prob_or_mask, bool replace, bool rowwise_etype_sorted); template COOMatrix CSRRowWiseSamplingUniform( CSRMatrix mat, IdArray rows, int64_t num_samples, bool replace); template std::pair CSRRowWiseSamplingUniformFused( CSRMatrix mat, IdArray rows, IdArray seed_mapping, std::vector* new_seed_nodes, int64_t num_samples, bool replace); template COOMatrix CSRRowWisePerEtypeSamplingUniform( CSRMatrix mat, IdArray rows, const std::vector& eid2etype_offset, const std::vector& num_samples, bool replace, bool rowwise_etype_sorted); // FloatType is the type of weight data. template COOMatrix CSRRowWiseTopk( CSRMatrix mat, IdArray rows, int64_t k, NDArray weight, bool ascending); template COOMatrix CSRRowWiseSamplingBiased( CSRMatrix mat, IdArray rows, int64_t num_samples, NDArray tag_offset, FloatArray bias, bool replace); template std::pair CSRGlobalUniformNegativeSampling( const CSRMatrix& csr, int64_t num_samples, int num_trials, bool exclude_self_loops, bool replace, double redundancy); // Union CSRMatrixes template CSRMatrix UnionCsr(const std::vector& csrs); template std::tuple CSRToSimple(CSRMatrix csr); //////////////////////////////////////////////////////////////////////////////// template bool COOIsNonZero(COOMatrix coo, int64_t row, int64_t col); template runtime::NDArray COOIsNonZero( COOMatrix coo, runtime::NDArray row, runtime::NDArray col); template bool COOHasDuplicate(COOMatrix coo); template int64_t COOGetRowNNZ(COOMatrix coo, int64_t row); template runtime::NDArray COOGetRowNNZ(COOMatrix coo, runtime::NDArray row); template std::pair COOGetRowDataAndIndices( COOMatrix coo, int64_t row); template std::vector COOGetDataAndIndices( COOMatrix coo, runtime::NDArray rows, runtime::NDArray cols); template runtime::NDArray COOGetData( COOMatrix mat, runtime::NDArray rows, runtime::NDArray cols); template COOMatrix COOTranspose(COOMatrix coo); template CSRMatrix COOToCSR(COOMatrix coo); template COOMatrix COOSliceRows(COOMatrix coo, int64_t start, int64_t end); template COOMatrix COOSliceRows(COOMatrix coo, runtime::NDArray rows); template COOMatrix COOSliceMatrix( COOMatrix coo, runtime::NDArray rows, runtime::NDArray cols); template std::pair COOCoalesce(COOMatrix coo); template COOMatrix DisjointUnionCoo(const std::vector& coos); template void COOSort_(COOMatrix* mat, bool sort_column); template std::pair COOIsSorted(COOMatrix coo); template COOMatrix COORemove(COOMatrix coo, IdArray entries); template std::pair COOLaborSampling( COOMatrix mat, IdArray rows, int64_t num_samples, FloatArray prob, int importance_sampling, IdArray random_seed, float seed2_contribution, IdArray NIDs); // FloatType is the type of probability data. template COOMatrix COORowWiseSampling( COOMatrix mat, IdArray rows, int64_t num_samples, NDArray prob_or_mask, bool replace); // FloatType is the type of probability data. template COOMatrix COORowWisePerEtypeSampling( COOMatrix mat, IdArray rows, const std::vector& eid2etype_offset, const std::vector& num_samples, const std::vector& prob_or_mask, bool replace); template COOMatrix COORowWiseSamplingUniform( COOMatrix mat, IdArray rows, int64_t num_samples, bool replace); template COOMatrix COORowWisePerEtypeSamplingUniform( COOMatrix mat, IdArray rows, const std::vector& eid2etype_offset, const std::vector& num_samples, bool replace); // FloatType is the type of weight data. template COOMatrix COORowWiseTopk( COOMatrix mat, IdArray rows, int64_t k, FloatArray weight, bool ascending); ///////////////////////// Graph Traverse routines ////////////////////////// template Frontiers BFSNodesFrontiers(const CSRMatrix& csr, IdArray source); template Frontiers BFSEdgesFrontiers(const CSRMatrix& csr, IdArray source); template Frontiers TopologicalNodesFrontiers(const CSRMatrix& csr); template Frontiers DGLDFSEdges(const CSRMatrix& csr, IdArray source); template Frontiers DGLDFSLabeledEdges( const CSRMatrix& csr, IdArray source, const bool has_reverse_edge, const bool has_nontree_edge, const bool return_labels); template COOMatrix COOLineGraph(const COOMatrix& coo, bool backtracking); } // namespace impl } // namespace aten } // namespace dgl #endif // DGL_ARRAY_ARRAY_OP_H_ ================================================ FILE: src/array/check.h ================================================ /** * Copyright (c) 2019 by Contributors * @file array/check.h * @brief DGL check utilities */ #ifndef DGL_ARRAY_CHECK_H_ #define DGL_ARRAY_CHECK_H_ #include #include #include #include namespace dgl { namespace aten { // Check whether the given arguments have the same context. inline void CheckCtx( const DGLContext& ctx, const std::vector& arrays, const std::vector& names) { for (size_t i = 0; i < arrays.size(); ++i) { if (IsNullArray(arrays[i])) continue; CHECK_EQ(ctx, arrays[i]->ctx) << "Expected device context " << ctx << ". But got " << arrays[i]->ctx << " for " << names[i] << "."; } } // Check whether input tensors are contiguous. inline void CheckContiguous( const std::vector& arrays, const std::vector& names) { for (size_t i = 0; i < arrays.size(); ++i) { if (IsNullArray(arrays[i])) continue; CHECK(arrays[i].IsContiguous()) << "Expect " << names[i] << " to be a contiguous tensor"; } } // Check whether input tensors have valid shape. inline void CheckShape( const std::vector& gdim, const std::vector& uev_idx, const std::vector& arrays, const std::vector& names) { for (size_t i = 0; i < arrays.size(); ++i) { if (IsNullArray(arrays[i])) continue; CHECK_GE(arrays[i]->ndim, 2) << "Expect " << names[i] << " to have ndim >= 2, " << "Note that for scalar feature we expand its " << "dimension with an additional dimension of " << "length one."; CHECK_EQ(gdim[uev_idx[i]], arrays[i]->shape[0]) << "Expect " << names[i] << " to have size " << gdim[uev_idx[i]] << " on the first dimension, " << "but got " << arrays[i]->shape[0]; } } } // namespace aten } // namespace dgl #endif // DGL_ARRAY_CHECK_H_ ================================================ FILE: src/array/cpu/array_cumsum.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cpu/array_cumsum.cc * @brief Array cumsum CPU implementation */ #include namespace dgl { using runtime::NDArray; namespace aten { namespace impl { template IdArray CumSum(IdArray array, bool prepend_zero) { const int64_t len = array.NumElements(); if (len == 0) return !prepend_zero ? array : aten::Full(0, 1, array->dtype.bits, array->ctx); if (prepend_zero) { IdArray ret = aten::NewIdArray(len + 1, array->ctx, array->dtype.bits); const IdType* in_d = array.Ptr(); IdType* out_d = ret.Ptr(); out_d[0] = 0; for (int64_t i = 0; i < len; ++i) out_d[i + 1] = out_d[i] + in_d[i]; return ret; } else { IdArray ret = aten::NewIdArray(len, array->ctx, array->dtype.bits); const IdType* in_d = array.Ptr(); IdType* out_d = ret.Ptr(); out_d[0] = in_d[0]; for (int64_t i = 1; i < len; ++i) out_d[i] = out_d[i - 1] + in_d[i]; return ret; } } template IdArray CumSum(IdArray, bool); template IdArray CumSum(IdArray, bool); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cpu/array_index_select.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file array/cpu/array_index_select.cc * @brief Array index select CPU implementation */ #include namespace dgl { using runtime::NDArray; namespace aten { namespace impl { template NDArray IndexSelect(NDArray array, IdArray index) { CHECK_EQ(array->shape[0], array.NumElements()) << "Only support tensor" << " whose first dimension equals number of elements, e.g. (5,), (5, 1)"; const DType* array_data = static_cast(array->data); const IdType* idx_data = static_cast(index->data); const int64_t arr_len = array->shape[0]; const int64_t len = index->shape[0]; NDArray ret = NDArray::Empty({len}, array->dtype, array->ctx); DType* ret_data = static_cast(ret->data); for (int64_t i = 0; i < len; ++i) { CHECK_LT(idx_data[i], arr_len) << "Index out of range."; ret_data[i] = array_data[idx_data[i]]; } return ret; } template NDArray IndexSelect(NDArray, IdArray); template NDArray IndexSelect(NDArray, IdArray); template NDArray IndexSelect(NDArray, IdArray); template NDArray IndexSelect(NDArray, IdArray); template NDArray IndexSelect(NDArray, IdArray); template NDArray IndexSelect(NDArray, IdArray); template NDArray IndexSelect(NDArray, IdArray); template NDArray IndexSelect(NDArray, IdArray); template DType IndexSelect(NDArray array, int64_t index) { const DType* data = static_cast(array->data); return data[index]; } template int32_t IndexSelect(NDArray array, int64_t index); template int64_t IndexSelect(NDArray array, int64_t index); template float IndexSelect(NDArray array, int64_t index); template double IndexSelect(NDArray array, int64_t index); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cpu/array_nonzero.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cpu/array_nonzero.cc * @brief Array nonzero CPU implementation */ #include namespace dgl { using runtime::NDArray; namespace aten { namespace impl { template IdArray NonZero(IdArray array) { std::vector ret; const IdType* data = array.Ptr(); for (int64_t i = 0; i < array->shape[0]; ++i) if (data[i] != 0) ret.push_back(i); return NDArray::FromVector(ret, array->ctx); } template IdArray NonZero(IdArray); template IdArray NonZero(IdArray); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cpu/array_op_impl.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file array/cpu/array_op_impl.cc * @brief Array operator CPU implementation */ #include #include #include #include #include "../arith.h" namespace dgl { using runtime::NDArray; using runtime::parallel_for; namespace aten { namespace impl { ///////////////////////////// AsNumBits ///////////////////////////// template IdArray AsNumBits(IdArray arr, uint8_t bits) { CHECK(bits == 32 || bits == 64) << "invalid number of integer bits"; if (sizeof(IdType) * 8 == bits) { return arr; } const int64_t len = arr->shape[0]; IdArray ret = NewIdArray(len, arr->ctx, bits); const IdType* arr_data = static_cast(arr->data); if (bits == 32) { int32_t* ret_data = static_cast(ret->data); for (int64_t i = 0; i < len; ++i) { ret_data[i] = arr_data[i]; } } else { int64_t* ret_data = static_cast(ret->data); for (int64_t i = 0; i < len; ++i) { ret_data[i] = arr_data[i]; } } return ret; } template IdArray AsNumBits(IdArray arr, uint8_t bits); template IdArray AsNumBits(IdArray arr, uint8_t bits); ///////////////////////////// BinaryElewise ///////////////////////////// template IdArray BinaryElewise(IdArray lhs, IdArray rhs) { IdArray ret = NewIdArray(lhs->shape[0], lhs->ctx, lhs->dtype.bits); const IdType* lhs_data = static_cast(lhs->data); const IdType* rhs_data = static_cast(rhs->data); IdType* ret_data = static_cast(ret->data); // TODO(BarclayII): this usually incurs lots of overhead in thread spawning, // scheduling, etc., especially since the workload is very light. Need to // replace with parallel_for. for (int64_t i = 0; i < lhs->shape[0]; i++) { ret_data[i] = Op::Call(lhs_data[i], rhs_data[i]); } return ret; } template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise(IdArray lhs, IdType rhs) { IdArray ret = NewIdArray(lhs->shape[0], lhs->ctx, lhs->dtype.bits); const IdType* lhs_data = static_cast(lhs->data); IdType* ret_data = static_cast(ret->data); // TODO(BarclayII): this usually incurs lots of overhead in thread spawning, // scheduling, etc., especially since the workload is very light. Need to // replace with parallel_for. for (int64_t i = 0; i < lhs->shape[0]; i++) { ret_data[i] = Op::Call(lhs_data[i], rhs); } return ret; } template IdArray BinaryElewise( IdArray lhs, int32_t rhs); template IdArray BinaryElewise( IdArray lhs, int32_t rhs); template IdArray BinaryElewise( IdArray lhs, int32_t rhs); template IdArray BinaryElewise( IdArray lhs, int32_t rhs); template IdArray BinaryElewise( IdArray lhs, int32_t rhs); template IdArray BinaryElewise( IdArray lhs, int32_t rhs); template IdArray BinaryElewise( IdArray lhs, int32_t rhs); template IdArray BinaryElewise( IdArray lhs, int32_t rhs); template IdArray BinaryElewise( IdArray lhs, int32_t rhs); template IdArray BinaryElewise( IdArray lhs, int32_t rhs); template IdArray BinaryElewise( IdArray lhs, int32_t rhs); template IdArray BinaryElewise( IdArray lhs, int64_t rhs); template IdArray BinaryElewise( IdArray lhs, int64_t rhs); template IdArray BinaryElewise( IdArray lhs, int64_t rhs); template IdArray BinaryElewise( IdArray lhs, int64_t rhs); template IdArray BinaryElewise( IdArray lhs, int64_t rhs); template IdArray BinaryElewise( IdArray lhs, int64_t rhs); template IdArray BinaryElewise( IdArray lhs, int64_t rhs); template IdArray BinaryElewise( IdArray lhs, int64_t rhs); template IdArray BinaryElewise( IdArray lhs, int64_t rhs); template IdArray BinaryElewise( IdArray lhs, int64_t rhs); template IdArray BinaryElewise( IdArray lhs, int64_t rhs); template IdArray BinaryElewise(IdType lhs, IdArray rhs) { IdArray ret = NewIdArray(rhs->shape[0], rhs->ctx, rhs->dtype.bits); const IdType* rhs_data = static_cast(rhs->data); IdType* ret_data = static_cast(ret->data); // TODO(BarclayII): this usually incurs lots of overhead in thread spawning, // scheduling, etc., especially since the workload is very light. Need to // replace with parallel_for. for (int64_t i = 0; i < rhs->shape[0]; i++) { ret_data[i] = Op::Call(lhs, rhs_data[i]); } return ret; } template IdArray BinaryElewise( int32_t lhs, IdArray rhs); template IdArray BinaryElewise( int32_t lhs, IdArray rhs); template IdArray BinaryElewise( int32_t lhs, IdArray rhs); template IdArray BinaryElewise( int32_t lhs, IdArray rhs); template IdArray BinaryElewise( int32_t lhs, IdArray rhs); template IdArray BinaryElewise( int32_t lhs, IdArray rhs); template IdArray BinaryElewise( int32_t lhs, IdArray rhs); template IdArray BinaryElewise( int32_t lhs, IdArray rhs); template IdArray BinaryElewise( int32_t lhs, IdArray rhs); template IdArray BinaryElewise( int32_t lhs, IdArray rhs); template IdArray BinaryElewise( int32_t lhs, IdArray rhs); template IdArray BinaryElewise( int64_t lhs, IdArray rhs); template IdArray BinaryElewise( int64_t lhs, IdArray rhs); template IdArray BinaryElewise( int64_t lhs, IdArray rhs); template IdArray BinaryElewise( int64_t lhs, IdArray rhs); template IdArray BinaryElewise( int64_t lhs, IdArray rhs); template IdArray BinaryElewise( int64_t lhs, IdArray rhs); template IdArray BinaryElewise( int64_t lhs, IdArray rhs); template IdArray BinaryElewise( int64_t lhs, IdArray rhs); template IdArray BinaryElewise( int64_t lhs, IdArray rhs); template IdArray BinaryElewise( int64_t lhs, IdArray rhs); template IdArray BinaryElewise( int64_t lhs, IdArray rhs); template IdArray UnaryElewise(IdArray lhs) { IdArray ret = NewIdArray(lhs->shape[0], lhs->ctx, lhs->dtype.bits); const IdType* lhs_data = static_cast(lhs->data); IdType* ret_data = static_cast(ret->data); // TODO(BarclayII): this usually incurs lots of overhead in thread spawning, // scheduling, etc., especially since the workload is very light. Need to // replace with parallel_for. for (int64_t i = 0; i < lhs->shape[0]; i++) { ret_data[i] = Op::Call(lhs_data[i]); } return ret; } template IdArray UnaryElewise(IdArray lhs); template IdArray UnaryElewise(IdArray lhs); ///////////////////////////// Full ///////////////////////////// template NDArray Full(DType val, int64_t length, DGLContext ctx) { NDArray ret = NDArray::Empty({length}, DGLDataTypeTraits::dtype, ctx); DType* ret_data = static_cast(ret->data); std::fill(ret_data, ret_data + length, val); return ret; } template NDArray Full( int32_t val, int64_t length, DGLContext ctx); template NDArray Full( int64_t val, int64_t length, DGLContext ctx); template NDArray Full( float val, int64_t length, DGLContext ctx); template NDArray Full( double val, int64_t length, DGLContext ctx); ///////////////////////////// Range ///////////////////////////// template IdArray Range(IdType low, IdType high, DGLContext ctx) { CHECK(high >= low) << "high must be bigger than low"; IdArray ret = NewIdArray(high - low, ctx, sizeof(IdType) * 8); IdType* ret_data = static_cast(ret->data); std::iota(ret_data, ret_data + high - low, low); return ret; } template IdArray Range(int32_t, int32_t, DGLContext); template IdArray Range(int64_t, int64_t, DGLContext); ///////////////////////////// Relabel_ ///////////////////////////// template IdArray Relabel_(const std::vector& arrays) { // build map & relabel IdType newid = 0; std::unordered_map oldv2newv; for (IdArray arr : arrays) { for (int64_t i = 0; i < arr->shape[0]; ++i) { const IdType id = static_cast(arr->data)[i]; if (!oldv2newv.count(id)) { oldv2newv[id] = newid++; } static_cast(arr->data)[i] = oldv2newv[id]; } } // map array IdArray maparr = NewIdArray(newid, DGLContext{kDGLCPU, 0}, sizeof(IdType) * 8); IdType* maparr_data = static_cast(maparr->data); for (const auto& kv : oldv2newv) { maparr_data[kv.second] = kv.first; } return maparr; } template IdArray Relabel_(const std::vector& arrays); template IdArray Relabel_(const std::vector& arrays); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cpu/array_pack.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file array/cpu/array_index_select.cc * @brief Array index select CPU implementation */ #include #include #include #include namespace dgl { using runtime::NDArray; using runtime::parallel_for; namespace aten { namespace impl { template std::pair ConcatSlices(NDArray array, IdArray lengths) { const int64_t rows = lengths->shape[0]; const int64_t cols = (array->ndim == 1 ? array->shape[0] : array->shape[1]); const int64_t stride = (array->ndim == 1 ? 0 : cols); const DType *array_data = static_cast(array->data); const IdType *length_data = static_cast(lengths->data); IdArray offsets = NewIdArray(rows, array->ctx, sizeof(IdType) * 8); IdType *offsets_data = static_cast(offsets->data); for (int64_t i = 0; i < rows; ++i) offsets_data[i] = (i == 0 ? 0 : length_data[i - 1] + offsets_data[i - 1]); const int64_t total_length = offsets_data[rows - 1] + length_data[rows - 1]; NDArray concat = NDArray::Empty({total_length}, array->dtype, array->ctx); DType *concat_data = static_cast(concat->data); parallel_for(0, rows, [=](size_t b, size_t e) { for (auto i = b; i < e; ++i) { for (int64_t j = 0; j < length_data[i]; ++j) concat_data[offsets_data[i] + j] = array_data[i * stride + j]; } }); return std::make_pair(concat, offsets); } template std::pair ConcatSlices( NDArray, IdArray); template std::pair ConcatSlices( NDArray, IdArray); template std::pair ConcatSlices( NDArray, IdArray); template std::pair ConcatSlices( NDArray, IdArray); template std::pair ConcatSlices( NDArray, IdArray); template std::pair ConcatSlices( NDArray, IdArray); template std::pair ConcatSlices( NDArray, IdArray); template std::pair ConcatSlices( NDArray, IdArray); template std::tuple Pack(NDArray array, DType pad_value) { CHECK_NDIM(array, 2, "array"); const DType *array_data = static_cast(array->data); const int64_t rows = array->shape[0]; const int64_t cols = array->shape[1]; IdArray length = NewIdArray(rows, array->ctx); int64_t *length_data = static_cast(length->data); parallel_for(0, rows, [=](size_t b, size_t e) { for (auto i = b; i < e; ++i) { int64_t j; for (j = 0; j < cols; ++j) { const DType val = array_data[i * cols + j]; if (val == pad_value) break; } length_data[i] = j; } }); auto ret = ConcatSlices(array, length); return std::make_tuple(ret.first, length, ret.second); } template std::tuple Pack( NDArray, int32_t); template std::tuple Pack( NDArray, int64_t); template std::tuple Pack( NDArray, float); template std::tuple Pack( NDArray, double); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cpu/array_repeat.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cpu/array_repeat.cc * @brief Array repeat CPU implementation */ #include #include namespace dgl { using runtime::NDArray; namespace aten { namespace impl { template NDArray Repeat(NDArray array, IdArray repeats) { CHECK(array->shape[0] == repeats->shape[0]) << "shape of array and repeats mismatch"; const int64_t len = array->shape[0]; const DType *array_data = static_cast(array->data); const IdType *repeats_data = static_cast(repeats->data); IdType num_elements = 0; for (int64_t i = 0; i < len; ++i) num_elements += repeats_data[i]; NDArray result = NDArray::Empty({num_elements}, array->dtype, array->ctx); DType *result_data = static_cast(result->data); IdType curr = 0; for (int64_t i = 0; i < len; ++i) { std::fill( result_data + curr, result_data + curr + repeats_data[i], array_data[i]); curr += repeats_data[i]; } return result; } template NDArray Repeat(NDArray, IdArray); template NDArray Repeat(NDArray, IdArray); template NDArray Repeat(NDArray, IdArray); template NDArray Repeat(NDArray, IdArray); template NDArray Repeat(NDArray, IdArray); template NDArray Repeat(NDArray, IdArray); template NDArray Repeat(NDArray, IdArray); template NDArray Repeat(NDArray, IdArray); }; // namespace impl }; // namespace aten }; // namespace dgl ================================================ FILE: src/array/cpu/array_scatter.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file array/cpu/array_scatter.cc * @brief Array scatter CPU implementation */ #include #include namespace dgl { using runtime::NDArray; namespace aten { namespace impl { template NDArray Scatter(NDArray array, IdArray indices) { NDArray result = NDArray::Empty({indices->shape[0]}, array->dtype, array->ctx); const DType *array_data = static_cast(array->data); const IdType *indices_data = static_cast(indices->data); DType *result_data = static_cast(result->data); for (int64_t i = 0; i < indices->shape[0]; ++i) result_data[indices_data[i]] = array_data[i]; return result; } template NDArray Scatter(NDArray, IdArray); template NDArray Scatter(NDArray, IdArray); template NDArray Scatter(NDArray, IdArray); template NDArray Scatter(NDArray, IdArray); template NDArray Scatter(NDArray, IdArray); template NDArray Scatter(NDArray, IdArray); template NDArray Scatter(NDArray, IdArray); template NDArray Scatter(NDArray, IdArray); template void Scatter_(IdArray index, NDArray value, NDArray out) { const int64_t len = index->shape[0]; const IdType *idx = index.Ptr(); const DType *val = value.Ptr(); DType *outd = out.Ptr(); runtime::parallel_for(0, len, [&](size_t b, size_t e) { for (auto i = b; i < e; ++i) { outd[idx[i]] = val[i]; } }); } template void Scatter_(IdArray, NDArray, NDArray); template void Scatter_(IdArray, NDArray, NDArray); template void Scatter_(IdArray, NDArray, NDArray); template void Scatter_(IdArray, NDArray, NDArray); template void Scatter_(IdArray, NDArray, NDArray); template void Scatter_(IdArray, NDArray, NDArray); template void Scatter_(IdArray, NDArray, NDArray); template void Scatter_(IdArray, NDArray, NDArray); }; // namespace impl }; // namespace aten }; // namespace dgl ================================================ FILE: src/array/cpu/array_sort.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cpu/array_sort.cc * @brief Array sort CPU implementation */ #include #ifdef PARALLEL_ALGORITHMS #include #endif #include #include namespace { template struct PairRef { PairRef() = delete; PairRef(const PairRef& other) = default; PairRef(PairRef&& other) = default; PairRef(V1* const r, V2* const c) : row(r), col(c) {} PairRef& operator=(const PairRef& other) { *row = *other.row; *col = *other.col; return *this; } PairRef& operator=(const std::pair& val) { *row = std::get<0>(val); *col = std::get<1>(val); return *this; } operator std::pair() const { return std::make_pair(*row, *col); } void Swap(const PairRef& other) const { std::swap(*row, *other.row); std::swap(*col, *other.col); } V1* row; V2* col; }; using std::swap; template void swap(const PairRef& r1, const PairRef& r2) { r1.Swap(r2); } template struct PairIterator : public std::iterator< std::random_access_iterator_tag, std::pair, std::ptrdiff_t, std::pair, PairRef> { PairIterator() = default; PairIterator(const PairIterator& other) = default; PairIterator(PairIterator&& other) = default; PairIterator(V1* r, V2* c) : row(r), col(c) {} PairIterator& operator=(const PairIterator& other) = default; PairIterator& operator=(PairIterator&& other) = default; ~PairIterator() = default; bool operator==(const PairIterator& other) const { return row == other.row; } bool operator!=(const PairIterator& other) const { return row != other.row; } bool operator<(const PairIterator& other) const { return row < other.row; } bool operator>(const PairIterator& other) const { return row > other.row; } bool operator<=(const PairIterator& other) const { return row <= other.row; } bool operator>=(const PairIterator& other) const { return row >= other.row; } PairIterator& operator+=(const std::ptrdiff_t& movement) { row += movement; col += movement; return *this; } PairIterator& operator-=(const std::ptrdiff_t& movement) { row -= movement; col -= movement; return *this; } PairIterator& operator++() { return operator+=(1); } PairIterator& operator--() { return operator-=(1); } PairIterator operator++(int) { PairIterator ret(*this); operator++(); return ret; } PairIterator operator--(int) { PairIterator ret(*this); operator--(); return ret; } PairIterator operator+(const std::ptrdiff_t& movement) const { PairIterator ret(*this); ret += movement; return ret; } PairIterator operator-(const std::ptrdiff_t& movement) const { PairIterator ret(*this); ret -= movement; return ret; } std::ptrdiff_t operator-(const PairIterator& other) const { return row - other.row; } PairRef operator*() const { return PairRef(row, col); } PairRef operator*() { return PairRef(row, col); } // required for random access iterators in VS2019 PairRef operator[](size_t offset) const { return PairRef(row + offset, col + offset); } V1* row; V2* col; }; } // namespace namespace dgl { using runtime::NDArray; namespace aten { namespace impl { template std::pair Sort(IdArray array, int /* num_bits */) { const int64_t nitem = array->shape[0]; IdArray val = array.Clone(); IdArray idx = aten::Range(0, nitem, 64, array->ctx); IdType* val_data = val.Ptr(); int64_t* idx_data = idx.Ptr(); typedef std::pair Pair; #ifdef PARALLEL_ALGORITHMS __gnu_parallel::sort( #else std::sort( #endif PairIterator(val_data, idx_data), PairIterator(val_data, idx_data) + nitem, [](const Pair& a, const Pair& b) { return std::get<0>(a) < std::get<0>(b); }); return std::make_pair(val, idx); } template std::pair Sort( IdArray, int num_bits); template std::pair Sort( IdArray, int num_bits); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cpu/array_utils.h ================================================ /** * Copyright (c) 2019 by Contributors * @file dgl/array_utils.h * @brief Utility classes and functions for DGL arrays. */ #ifndef DGL_ARRAY_CPU_ARRAY_UTILS_H_ #define DGL_ARRAY_CPU_ARRAY_UTILS_H_ #include #include #include #include #include #include "../../c_api_common.h" namespace dgl { namespace aten { /** * @brief A hashmap that maps each ids in the given array to new ids starting * from zero. * * Useful for relabeling integers and finding unique integers. * * Usually faster than std::unordered_map in existence checking. */ template class IdHashMap { public: // default ctor IdHashMap() : filter_(kFilterSize, false) {} // Construct the hashmap using the given id array. // The id array could contain duplicates. // If the id array has no duplicates, the array will be relabeled to // consecutive integers starting from 0. explicit IdHashMap(IdArray ids) : filter_(kFilterSize, false) { oldv2newv_.reserve(ids->shape[0]); Update(ids); } // copy ctor IdHashMap(const IdHashMap& other) = default; void Reserve(const int64_t size) { oldv2newv_.reserve(size); } // Update the hashmap with given id array. // The id array could contain duplicates. void Update(IdArray ids) { const IdType* ids_data = static_cast(ids->data); const int64_t len = ids->shape[0]; for (int64_t i = 0; i < len; ++i) { const IdType id = ids_data[i]; // Insertion will not happen if the key already exists. oldv2newv_.insert({id, oldv2newv_.size()}); filter_[id & kFilterMask] = true; } } // Return true if the given id is contained in this hashmap. bool Contains(IdType id) const { return filter_[id & kFilterMask] && oldv2newv_.count(id); } // Return the new id of the given id. If the given id is not contained // in the hash map, returns the default_val instead. IdType Map(IdType id, IdType default_val) const { if (filter_[id & kFilterMask]) { auto it = oldv2newv_.find(id); return (it == oldv2newv_.end()) ? default_val : it->second; } else { return default_val; } } // Return the new id of each id in the given array. IdArray Map(IdArray ids, IdType default_val) const { const IdType* ids_data = static_cast(ids->data); const int64_t len = ids->shape[0]; IdArray values = NewIdArray(len, ids->ctx, ids->dtype.bits); IdType* values_data = static_cast(values->data); for (int64_t i = 0; i < len; ++i) values_data[i] = Map(ids_data[i], default_val); return values; } // Return all the old ids collected so far, ordered by new id. IdArray Values() const { IdArray values = NewIdArray( oldv2newv_.size(), DGLContext{kDGLCPU, 0}, sizeof(IdType) * 8); IdType* values_data = static_cast(values->data); for (auto pair : oldv2newv_) values_data[pair.second] = pair.first; return values; } inline size_t Size() const { return oldv2newv_.size(); } private: static constexpr int32_t kFilterMask = 0xFFFFFF; static constexpr int32_t kFilterSize = kFilterMask + 1; // This bitmap is used as a bloom filter to remove some lookups. // Hashtable is very slow. Using bloom filter can significantly speed up // lookups. std::vector filter_; // The hashmap from old vid to new vid tsl::robin_map oldv2newv_; }; /** * @brief Hash type for building maps/sets with pairs as keys. */ struct PairHash { template std::size_t operator()(const std::pair& pair) const { return std::hash()(pair.first) ^ std::hash()(pair.second); } }; } // namespace aten } // namespace dgl #endif // DGL_ARRAY_CPU_ARRAY_UTILS_H_ ================================================ FILE: src/array/cpu/concurrent_id_hash_map.cc ================================================ /** * Copyright (c) 2023 by Contributors * @file array/cpu/concurrent_id_hash_map.cc * @brief Class about id hash map */ #include "concurrent_id_hash_map.h" #ifdef _MSC_VER #include #endif // _MSC_VER #include #include #include #include #include using namespace dgl::runtime; namespace { static constexpr int64_t kEmptyKey = -1; static constexpr int kGrainSize = 256; // The formula is established from experience which is used // to get the hashmap size from the input array size. inline size_t GetMapSize(size_t num) { size_t capacity = 1; return capacity << static_cast(1 + std::log2(num * 3)); } } // namespace namespace dgl { namespace aten { template IdType ConcurrentIdHashMap::CompareAndSwap( IdType* ptr, IdType old_val, IdType new_val) { #ifdef _MSC_VER if (sizeof(IdType) == 4) { return _InterlockedCompareExchange( reinterpret_cast(ptr), new_val, old_val); } else if (sizeof(IdType) == 8) { return _InterlockedCompareExchange64( reinterpret_cast(ptr), new_val, old_val); } else { LOG(FATAL) << "ID can only be int32 or int64"; } #elif __GNUC__ // _MSC_VER return __sync_val_compare_and_swap(ptr, old_val, new_val); #else // _MSC_VER #error "CompareAndSwap is not supported on this platform." #endif // _MSC_VER } template ConcurrentIdHashMap::ConcurrentIdHashMap() : mask_(0) { // Used to deallocate the memory in hash_map_ with device api // when the pointer is freed. auto deleter = [](Mapping* mappings) { if (mappings != nullptr) { DGLContext ctx = DGLContext{kDGLCPU, 0}; auto device = DeviceAPI::Get(ctx); device->FreeWorkspace(ctx, mappings); } }; hash_map_ = {nullptr, deleter}; } template IdArray ConcurrentIdHashMap::Init( const IdArray& ids, size_t num_seeds) { CHECK_EQ(ids.defined(), true); const IdType* ids_data = ids.Ptr(); const size_t num_ids = static_cast(ids->shape[0]); // Make sure `ids` is not 0 dim. CHECK_GE(num_seeds, 0); CHECK_GE(num_ids, num_seeds); size_t capacity = GetMapSize(num_ids); mask_ = static_cast(capacity - 1); auto ctx = DGLContext{kDGLCPU, 0}; auto device = DeviceAPI::Get(ctx); hash_map_.reset(static_cast( device->AllocWorkspace(ctx, sizeof(Mapping) * capacity))); memset(hash_map_.get(), -1, sizeof(Mapping) * capacity); // This code block is to fill the ids into hash_map_. IdArray unique_ids = NewIdArray(num_ids, ctx, sizeof(IdType) * 8); IdType* unique_ids_data = unique_ids.Ptr(); // Fill in the first `num_seeds` ids. parallel_for(0, num_seeds, kGrainSize, [&](int64_t s, int64_t e) { for (int64_t i = s; i < e; i++) { InsertAndSet(ids_data[i], static_cast(i)); } }); // Place the first `num_seeds` ids. device->CopyDataFromTo( ids_data, 0, unique_ids_data, 0, sizeof(IdType) * num_seeds, ctx, ctx, ids->dtype); // An auxiliary array indicates whether the corresponding elements // are inserted into hash map or not. Use `int16_t` instead of `bool` as // vector is unsafe when updating different elements from different // threads. See https://en.cppreference.com/w/cpp/container#Thread_safety. std::vector valid(num_ids); auto thread_num = compute_num_threads(0, num_ids, kGrainSize); std::vector block_offset(thread_num + 1, 0); // Insert all elements in this loop. parallel_for(num_seeds, num_ids, kGrainSize, [&](int64_t s, int64_t e) { size_t count = 0; for (int64_t i = s; i < e; i++) { valid[i] = Insert(ids_data[i]); count += valid[i]; } block_offset[omp_get_thread_num() + 1] = count; }); // Get ExclusiveSum of each block. std::partial_sum( block_offset.begin() + 1, block_offset.end(), block_offset.begin() + 1); unique_ids->shape[0] = num_seeds + block_offset.back(); // Get unique array from ids and set value for hash map. parallel_for(num_seeds, num_ids, kGrainSize, [&](int64_t s, int64_t e) { auto tid = omp_get_thread_num(); auto pos = block_offset[tid] + num_seeds; for (int64_t i = s; i < e; i++) { if (valid[i]) { unique_ids_data[pos] = ids_data[i]; Set(ids_data[i], pos); pos = pos + 1; } } }); return unique_ids; } template IdArray ConcurrentIdHashMap::MapIds(const IdArray& ids) const { CHECK_EQ(ids.defined(), true); const IdType* ids_data = ids.Ptr(); const size_t num_ids = static_cast(ids->shape[0]); CHECK_GT(num_ids, 0); DGLContext ctx = DGLContext{kDGLCPU, 0}; IdArray new_ids = NewIdArray(num_ids, ctx, sizeof(IdType) * 8); IdType* values_data = new_ids.Ptr(); parallel_for(0, num_ids, kGrainSize, [&](int64_t s, int64_t e) { for (int64_t i = s; i < e; i++) { values_data[i] = MapId(ids_data[i]); } }); return new_ids; } template inline void ConcurrentIdHashMap::Next( IdType* pos, IdType* delta) const { // Use Quadric probing. *pos = (*pos + (*delta) * (*delta)) & mask_; *delta = *delta + 1; } template inline IdType ConcurrentIdHashMap::MapId(IdType id) const { IdType pos = (id & mask_), delta = 1; IdType empty_key = static_cast(kEmptyKey); while (hash_map_[pos].key != empty_key && hash_map_[pos].key != id) { Next(&pos, &delta); } return hash_map_[pos].value; } template bool ConcurrentIdHashMap::Insert(IdType id) { IdType pos = (id & mask_), delta = 1; InsertState state = AttemptInsertAt(pos, id); while (state == InsertState::OCCUPIED) { Next(&pos, &delta); state = AttemptInsertAt(pos, id); } return state == InsertState::INSERTED; } template inline void ConcurrentIdHashMap::Set(IdType key, IdType value) { IdType pos = (key & mask_), delta = 1; while (hash_map_[pos].key != key) { Next(&pos, &delta); } hash_map_[pos].value = value; } template inline void ConcurrentIdHashMap::InsertAndSet(IdType id, IdType value) { IdType pos = (id & mask_), delta = 1; while (AttemptInsertAt(pos, id) == InsertState::OCCUPIED) { Next(&pos, &delta); } hash_map_[pos].value = value; } template inline typename ConcurrentIdHashMap::InsertState ConcurrentIdHashMap::AttemptInsertAt(int64_t pos, IdType key) { IdType empty_key = static_cast(kEmptyKey); IdType old_val = CompareAndSwap(&(hash_map_[pos].key), empty_key, key); if (old_val == empty_key) { return InsertState::INSERTED; } else if (old_val == key) { return InsertState::EXISTED; } else { return InsertState::OCCUPIED; } } template class ConcurrentIdHashMap; template class ConcurrentIdHashMap; template bool BoolCompareAndSwap(IdType* ptr) { #ifdef _MSC_VER if (sizeof(IdType) == 4) { return _InterlockedCompareExchange(reinterpret_cast(ptr), 0, -1) == -1; } else if (sizeof(IdType) == 8) { return _InterlockedCompareExchange64( reinterpret_cast(ptr), 0, -1) == -1; } else { LOG(FATAL) << "ID can only be int32 or int64"; } #elif __GNUC__ // _MSC_VER return __sync_bool_compare_and_swap(ptr, -1, 0); #else // _MSC_VER #error "CompareAndSwap is not supported on this platform." #endif // _MSC_VER } template bool BoolCompareAndSwap(int32_t*); template bool BoolCompareAndSwap(int64_t*); } // namespace aten } // namespace dgl ================================================ FILE: src/array/cpu/concurrent_id_hash_map.h ================================================ /** * Copyright (c) 2023 by Contributors * @file array/cpu/concurrent_id_hash_map.h * @brief Class about concurrent id hash map */ #ifndef DGL_ARRAY_CPU_CONCURRENT_ID_HASH_MAP_H_ #define DGL_ARRAY_CPU_CONCURRENT_ID_HASH_MAP_H_ #include #include #include #include namespace dgl { namespace aten { /** * @brief A CPU targeted hashmap for mapping duplicate and non-consecutive ids * in the provided array to unique and consecutive ones. It utilizes * multi-threading to accelerate the insert and search speed. Currently it is * only designed to be used in `ToBlockCpu` for optimizing, so it only support * key insertions once with Init function, and it does not support key deletion. * * The hash map should be prepared in two phases before using. With the first * being creating the hashmap, and then initialize it with an id array which is * divided into 2 parts: [`seed ids`, `sampled ids`]. `Seed ids` refer to * a set ids chosen as the input for sampling process and `sampled ids` are the * ids new sampled from the process (note the the `seed ids` might also be * sampled in the process and included in the `sampled ids`). In result `seed * ids` are mapped to [0, num_seed_ids) and `sampled ids` to [num_seed_ids, * num_unique_ids). Notice that mapping order is stable for `seed ids` while not * for the `sampled ids`. * * For example, for an array `A` having 4 seed ids with following entries: * [99, 98, 100, 97, 97, 101, 101, 102, 101] * Create the hashmap `H` with: * `H = ConcurrentIdHashMap()` (1) * And Init it with: * `U = H.Init(A)` (2) (U is an id array used to store the unqiue * ids in A). * Then `U` should be (U is not exclusive as the overall mapping is not stable): * [99, 98, 100, 97, 102, 101] * And the hashmap should generate following mappings: * * [ * {key: 99, value: 0}, * {key: 98, value: 1}, * {key: 100, value: 2}, * {key: 97, value: 3}, * {key: 102, value: 4}, * {key: 101, value: 5} * ] * Search the hashmap with array `I`=[98, 99, 102]: * R = H.Map(I) (3) * R should be: * [1, 0, 4] **/ template class ConcurrentIdHashMap { private: /** * @brief The result state of an attempt to insert. */ enum class InsertState { OCCUPIED, // Indicates that the space where an insertion is being // attempted is already occupied by another element. EXISTED, // Indicates that the element being inserted already exists in the // map, and thus no insertion is performed. INSERTED // Indicates that the insertion was successful and a new element // was added to the map. }; public: /** * @brief An entry in the hashtable. */ struct Mapping { /** * @brief The ID of the item inserted. */ IdType key; /** * @brief The value of the item inserted. */ IdType value; }; /** * @brief Cross platform CAS operation. * It is an atomic operation that compares the contents of a memory * location with a given value and, only if they are the same, modifies * the contents of that memory location to a new given value. * * @param ptr The pointer to the object to test and modify . * @param old_val The value expected to be found in `ptr`. * @param new_val The value to store in `ptr` if it is as expected. * * @return Old value pointed by the `ptr`. */ static IdType CompareAndSwap(IdType* ptr, IdType old_val, IdType new_val); ConcurrentIdHashMap(); ConcurrentIdHashMap(const ConcurrentIdHashMap& other) = delete; ConcurrentIdHashMap& operator=(const ConcurrentIdHashMap& other) = delete; /** * @brief Initialize the hashmap with an array of ids. The first `num_seeds` * ids are unique and must be mapped to a contiguous array starting * from 0. The left can be duplicated and the mapping result is not stable. * * @param ids The array of the ids to be inserted. * @param num_seeds The number of seed ids. * * @return Unique ids from the input `ids`. */ IdArray Init(const IdArray& ids, size_t num_seeds); /** * @brief Find mappings of given keys. * * @param ids The keys to map for. * * @return Mapping results corresponding to `ids`. */ IdArray MapIds(const IdArray& ids) const; private: /** * @brief Get the next position and delta for probing. * * @param[in,out] pos Calculate the next position with quadric probing. * @param[in,out] delta Calculate the next delta by adding 1. */ inline void Next(IdType* pos, IdType* delta) const; /** * @brief Find the mapping of a given key. * * @param id The key to map for. * * @return Mapping result corresponding to `id`. */ inline IdType MapId(const IdType id) const; /** * @brief Insert an id into the hash map. * * @param id The id to be inserted. * * @return Whether the `id` is inserted or not. */ inline bool Insert(IdType id); /** * @brief Set the value for the key in the hash map. * * @param key The key to set for. * @param value The value to be set for the `key`. * * @warning Key must exist. */ inline void Set(IdType key, IdType value); /** * @brief Insert a key into the hash map. * * @param id The key to be inserted. * @param value The value to be set for the `key`. * */ inline void InsertAndSet(IdType key, IdType value); /** * @brief Attempt to insert the key into the hash map at the given position. * * @param pos The position in the hash map to be inserted at. * @param key The key to be inserted. * * @return The state of the insertion. */ inline InsertState AttemptInsertAt(int64_t pos, IdType key); private: /** * @brief Hash maps which is used to store all elements. */ std::unique_ptr> hash_map_; /** * @brief Mask which is assisted to get the position in the table * for a key by performing `&` operation with it. */ IdType mask_; }; template bool BoolCompareAndSwap(IdType* ptr); } // namespace aten } // namespace dgl #endif // DGL_ARRAY_CPU_CONCURRENT_ID_HASH_MAP_H_ ================================================ FILE: src/array/cpu/coo_coalesce.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file array/cpu/coo_coalesce.cc * @brief COO coalescing */ #include #include namespace dgl { namespace aten { namespace impl { template std::pair COOCoalesce(COOMatrix coo) { const int64_t nnz = coo.row->shape[0]; const IdType* coo_row_data = static_cast(coo.row->data); const IdType* coo_col_data = static_cast(coo.col->data); if (!coo.row_sorted || !coo.col_sorted) coo = COOSort(coo, true); std::vector new_row, new_col, count; IdType prev_row = -1, prev_col = -1; for (int64_t i = 0; i < nnz; ++i) { const IdType curr_row = coo_row_data[i]; const IdType curr_col = coo_col_data[i]; if (curr_row == prev_row && curr_col == prev_col) { ++count[count.size() - 1]; } else { new_row.push_back(curr_row); new_col.push_back(curr_col); count.push_back(1); prev_row = curr_row; prev_col = curr_col; } } COOMatrix coo_result = COOMatrix{ coo.num_rows, coo.num_cols, NDArray::FromVector(new_row), NDArray::FromVector(new_col), NullArray(), true}; return std::make_pair(coo_result, NDArray::FromVector(count)); } template std::pair COOCoalesce(COOMatrix); template std::pair COOCoalesce(COOMatrix); }; // namespace impl }; // namespace aten }; // namespace dgl ================================================ FILE: src/array/cpu/coo_linegraph.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cpu/coo_line_graph.cc * @brief COO LineGraph */ #include #include #include #include #include namespace dgl { namespace aten { namespace impl { template COOMatrix COOLineGraph(const COOMatrix& coo, bool backtracking) { const int64_t nnz = coo.row->shape[0]; IdType* coo_row = coo.row.Ptr(); IdType* coo_col = coo.col.Ptr(); IdArray data = COOHasData(coo) ? coo.data : Range(0, nnz, coo.row->dtype.bits, coo.row->ctx); IdType* data_data = data.Ptr(); std::vector new_row; std::vector new_col; for (int64_t i = 0; i < nnz; ++i) { IdType u = coo_row[i]; IdType v = coo_col[i]; for (int64_t j = 0; j < nnz; ++j) { // no self-loop if (i == j) continue; // succ_u == v // if not backtracking succ_u != u if (v == coo_row[j] && (backtracking || u != coo_col[j])) { new_row.push_back(data_data[i]); new_col.push_back(data_data[j]); } } } COOMatrix res = COOMatrix( nnz, nnz, NDArray::FromVector(new_row), NDArray::FromVector(new_col), NullArray(), false, false); return res; } template COOMatrix COOLineGraph( const COOMatrix& coo, bool backtracking); template COOMatrix COOLineGraph( const COOMatrix& coo, bool backtracking); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cpu/coo_remove.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cpu/coo_remove.cc * @brief COO matrix remove entries CPU implementation */ #include #include #include #include "array_utils.h" namespace dgl { using runtime::NDArray; namespace aten { namespace impl { namespace { /** @brief COORemove implementation for COOMatrix with default consecutive edge * IDs */ template void COORemoveConsecutive( COOMatrix coo, IdArray entries, std::vector *new_rows, std::vector *new_cols, std::vector *new_eids) { const int64_t nnz = coo.row->shape[0]; const int64_t n_entries = entries->shape[0]; const IdType *row_data = static_cast(coo.row->data); const IdType *col_data = static_cast(coo.col->data); const IdType *entry_data = static_cast(entries->data); std::vector entry_data_sorted(entry_data, entry_data + n_entries); std::sort(entry_data_sorted.begin(), entry_data_sorted.end()); int64_t j = 0; for (int64_t i = 0; i < nnz; ++i) { if (j < n_entries && entry_data_sorted[j] == i) { // Move on to the next different entry while (j < n_entries && entry_data_sorted[j] == i) ++j; continue; } new_rows->push_back(row_data[i]); new_cols->push_back(col_data[i]); new_eids->push_back(i); } } /** @brief COORemove implementation for COOMatrix with shuffled edge IDs */ template void COORemoveShuffled( COOMatrix coo, IdArray entries, std::vector *new_rows, std::vector *new_cols, std::vector *new_eids) { const int64_t nnz = coo.row->shape[0]; const IdType *row_data = static_cast(coo.row->data); const IdType *col_data = static_cast(coo.col->data); const IdType *eid_data = static_cast(coo.data->data); IdHashMap eid_map(entries); for (int64_t i = 0; i < nnz; ++i) { const IdType eid = eid_data[i]; if (eid_map.Contains(eid)) continue; new_rows->push_back(row_data[i]); new_cols->push_back(col_data[i]); new_eids->push_back(eid); } } }; // namespace template COOMatrix COORemove(COOMatrix coo, IdArray entries) { const int64_t nnz = coo.row->shape[0]; const int64_t n_entries = entries->shape[0]; if (n_entries == 0) return coo; std::vector new_rows, new_cols, new_eids; new_rows.reserve(nnz - n_entries); new_cols.reserve(nnz - n_entries); new_eids.reserve(nnz - n_entries); if (COOHasData(coo)) COORemoveShuffled( coo, entries, &new_rows, &new_cols, &new_eids); else // Removing from COO ordered by eid has more efficient implementation. COORemoveConsecutive( coo, entries, &new_rows, &new_cols, &new_eids); return COOMatrix( coo.num_rows, coo.num_cols, IdArray::FromVector(new_rows), IdArray::FromVector(new_cols), IdArray::FromVector(new_eids)); } template COOMatrix COORemove(COOMatrix coo, IdArray entries); template COOMatrix COORemove(COOMatrix coo, IdArray entries); }; // namespace impl }; // namespace aten }; // namespace dgl ================================================ FILE: src/array/cpu/coo_sort.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cpu/coo_sort.cc * @brief COO sorting */ #include #ifdef PARALLEL_ALGORITHMS #include #endif #include #include #include #include #include namespace { template struct TupleRef { TupleRef() = delete; TupleRef(const TupleRef& other) = default; TupleRef(TupleRef&& other) = default; TupleRef(IdType* const r, IdType* const c, IdType* const d) : row(r), col(c), data(d) {} TupleRef& operator=(const TupleRef& other) { *row = *other.row; *col = *other.col; *data = *other.data; return *this; } TupleRef& operator=(const std::tuple& val) { *row = std::get<0>(val); *col = std::get<1>(val); *data = std::get<2>(val); return *this; } operator std::tuple() const { return std::make_tuple(*row, *col, *data); } void Swap(const TupleRef& other) const { std::swap(*row, *other.row); std::swap(*col, *other.col); std::swap(*data, *other.data); } IdType *row, *col, *data; }; using std::swap; template void swap(const TupleRef& r1, const TupleRef& r2) { r1.Swap(r2); } template struct CooIterator : public std::iterator< std::random_access_iterator_tag, std::tuple, std::ptrdiff_t, std::tuple, TupleRef> { CooIterator() = default; CooIterator(const CooIterator& other) = default; CooIterator(CooIterator&& other) = default; CooIterator(IdType* r, IdType* c, IdType* d) : row(r), col(c), data(d) {} CooIterator& operator=(const CooIterator& other) = default; CooIterator& operator=(CooIterator&& other) = default; ~CooIterator() = default; bool operator==(const CooIterator& other) const { return row == other.row; } bool operator!=(const CooIterator& other) const { return row != other.row; } bool operator<(const CooIterator& other) const { return row < other.row; } bool operator>(const CooIterator& other) const { return row > other.row; } bool operator<=(const CooIterator& other) const { return row <= other.row; } bool operator>=(const CooIterator& other) const { return row >= other.row; } CooIterator& operator+=(const std::ptrdiff_t& movement) { row += movement; col += movement; data += movement; return *this; } CooIterator& operator-=(const std::ptrdiff_t& movement) { row -= movement; col -= movement; data -= movement; return *this; } CooIterator& operator++() { return operator+=(1); } CooIterator& operator--() { return operator-=(1); } CooIterator operator++(int) { CooIterator ret(*this); operator++(); return ret; } CooIterator operator--(int) { CooIterator ret(*this); operator--(); return ret; } CooIterator operator+(const std::ptrdiff_t& movement) const { CooIterator ret(*this); ret += movement; return ret; } CooIterator operator-(const std::ptrdiff_t& movement) const { CooIterator ret(*this); ret -= movement; return ret; } std::ptrdiff_t operator-(const CooIterator& other) const { return row - other.row; } TupleRef operator*() const { return TupleRef(row, col, data); } TupleRef operator*() { return TupleRef(row, col, data); } // required for random access iterators in VS2019 TupleRef operator[](size_t offset) const { return TupleRef(row + offset, col + offset, data + offset); } IdType *row, *col, *data; }; } // namespace namespace dgl { namespace aten { namespace impl { ///////////////////////////// COOSort_ ///////////////////////////// template void COOSort_(COOMatrix* coo, bool sort_column) { const int64_t nnz = coo->row->shape[0]; IdType* coo_row = coo->row.Ptr(); IdType* coo_col = coo->col.Ptr(); if (!COOHasData(*coo)) coo->data = aten::Range(0, nnz, coo->row->dtype.bits, coo->row->ctx); IdType* coo_data = coo->data.Ptr(); typedef std::tuple Tuple; // Arg sort if (sort_column) { #ifdef PARALLEL_ALGORITHMS __gnu_parallel::sort( #else std::sort( #endif CooIterator(coo_row, coo_col, coo_data), CooIterator(coo_row, coo_col, coo_data) + nnz, [](const Tuple& a, const Tuple& b) { return (std::get<0>(a) != std::get<0>(b)) ? (std::get<0>(a) < std::get<0>(b)) : (std::get<1>(a) < std::get<1>(b)); }); } else { #ifdef PARALLEL_ALGORITHMS __gnu_parallel::sort( #else std::sort( #endif CooIterator(coo_row, coo_col, coo_data), CooIterator(coo_row, coo_col, coo_data) + nnz, [](const Tuple& a, const Tuple& b) { return std::get<0>(a) < std::get<0>(b); }); } coo->row_sorted = true; coo->col_sorted = sort_column; } template void COOSort_(COOMatrix*, bool); template void COOSort_(COOMatrix*, bool); ///////////////////////////// COOIsSorted ///////////////////////////// template std::pair COOIsSorted(COOMatrix coo) { const int64_t nnz = coo.row->shape[0]; IdType* row = coo.row.Ptr(); IdType* col = coo.col.Ptr(); bool row_sorted = true; bool col_sorted = true; for (int64_t i = 1; row_sorted && i < nnz; ++i) { row_sorted = (row[i - 1] <= row[i]); col_sorted = col_sorted && (row[i - 1] < row[i] || col[i - 1] <= col[i]); } if (!row_sorted) col_sorted = false; return {row_sorted, col_sorted}; } template std::pair COOIsSorted(COOMatrix coo); template std::pair COOIsSorted(COOMatrix coo); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cpu/csr_get_data.cc ================================================ /** * Copyright (c) 2021 by Contributors * @file array/cpu/csr_get_data.cc * @brief Retrieve entries of a CSR matrix */ #include #include #include #include #include #include "array_utils.h" namespace dgl { using runtime::NDArray; using runtime::parallel_for; namespace aten { namespace impl { template void CollectDataFromSorted( const IdType* indices_data, const IdType* data, const IdType start, const IdType end, const IdType col, std::vector* ret_vec) { const IdType* start_ptr = indices_data + start; const IdType* end_ptr = indices_data + end; auto it = std::lower_bound(start_ptr, end_ptr, col); // This might be a multi-graph. We need to collect all of the matched // columns. for (; it != end_ptr; it++) { // If the col exist if (*it == col) { IdType idx = it - indices_data; ret_vec->push_back(data ? data[idx] : idx); } else { // If we find a column that is different, we can stop searching now. break; } } } template NDArray CSRGetData( CSRMatrix csr, NDArray rows, NDArray cols, bool return_eids, NDArray weights, DType filler) { const int64_t rowlen = rows->shape[0]; const int64_t collen = cols->shape[0]; CHECK((rowlen == collen) || (rowlen == 1) || (collen == 1)) << "Invalid row and col id array."; const int64_t row_stride = (rowlen == 1 && collen != 1) ? 0 : 1; const int64_t col_stride = (collen == 1 && rowlen != 1) ? 0 : 1; const IdType* row_data = static_cast(rows->data); const IdType* col_data = static_cast(cols->data); const IdType* indptr_data = static_cast(csr.indptr->data); const IdType* indices_data = static_cast(csr.indices->data); const IdType* data = CSRHasData(csr) ? static_cast(csr.data->data) : nullptr; const int64_t retlen = std::max(rowlen, collen); const DType* weight_data = return_eids ? nullptr : weights.Ptr(); if (return_eids) BUG_IF_FAIL(DGLDataTypeTraits::dtype == rows->dtype) << "DType does not match row's dtype."; NDArray ret = Full(filler, retlen, rows->ctx); DType* ret_data = ret.Ptr(); // NOTE: In most cases, the input csr is already sorted. If not, we might need // to // consider sorting it especially when the number of (row, col) pairs is // large. Need more benchmarks to justify the choice. if (csr.sorted) { // use binary search on each row parallel_for(0, retlen, [&](size_t b, size_t e) { for (auto p = b; p < e; ++p) { const IdType row_id = row_data[p * row_stride], col_id = col_data[p * col_stride]; CHECK(row_id >= 0 && row_id < csr.num_rows) << "Invalid row index: " << row_id; CHECK(col_id >= 0 && col_id < csr.num_cols) << "Invalid col index: " << col_id; const IdType* start_ptr = indices_data + indptr_data[row_id]; const IdType* end_ptr = indices_data + indptr_data[row_id + 1]; auto it = std::lower_bound(start_ptr, end_ptr, col_id); if (it != end_ptr && *it == col_id) { const IdType idx = it - indices_data; IdType eid = data ? data[idx] : idx; ret_data[p] = return_eids ? eid : weight_data[eid]; } } }); } else { // linear search on each row parallel_for(0, retlen, [&](size_t b, size_t e) { for (auto p = b; p < e; ++p) { const IdType row_id = row_data[p * row_stride], col_id = col_data[p * col_stride]; CHECK(row_id >= 0 && row_id < csr.num_rows) << "Invalid row index: " << row_id; CHECK(col_id >= 0 && col_id < csr.num_cols) << "Invalid col index: " << col_id; for (IdType idx = indptr_data[row_id]; idx < indptr_data[row_id + 1]; ++idx) { if (indices_data[idx] == col_id) { IdType eid = data ? data[idx] : idx; ret_data[p] = return_eids ? eid : weight_data[eid]; break; } } } }); } return ret; } template NDArray CSRGetData( CSRMatrix csr, NDArray rows, NDArray cols, bool return_eids, NDArray weights, float filler); template NDArray CSRGetData( CSRMatrix csr, NDArray rows, NDArray cols, bool return_eids, NDArray weights, float filler); template NDArray CSRGetData( CSRMatrix csr, NDArray rows, NDArray cols, bool return_eids, NDArray weights, double filler); template NDArray CSRGetData( CSRMatrix csr, NDArray rows, NDArray cols, bool return_eids, NDArray weights, double filler); // For CSRGetData(CSRMatrix, NDArray, NDArray) template NDArray CSRGetData( CSRMatrix csr, NDArray rows, NDArray cols, bool return_eids, NDArray weights, int32_t filler); template NDArray CSRGetData( CSRMatrix csr, NDArray rows, NDArray cols, bool return_eids, NDArray weights, int64_t filler); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cpu/csr_mm.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file array/cpu/csr_mm.cc * @brief CSR Matrix Multiplication */ #include #include #include #include #include #include "array_utils.h" namespace dgl { using dgl::runtime::NDArray; using dgl::runtime::parallel_for; namespace aten { namespace { // TODO(BarclayII): avoid using map for sorted CSRs template void CountNNZPerRow( const IdType* A_indptr, const IdType* A_indices, const IdType* B_indptr, const IdType* B_indices, IdType* C_indptr_data, int64_t M) { parallel_for(0, M, [=](size_t b, size_t e) { for (auto i = b; i < e; ++i) { tsl::robin_set set; for (IdType u = A_indptr[i]; u < A_indptr[i + 1]; ++u) { IdType w = A_indices[u]; for (IdType v = B_indptr[w]; v < B_indptr[w + 1]; ++v) set.insert(B_indices[v]); } C_indptr_data[i] = set.size(); } }); } template int64_t ComputeIndptrInPlace(IdType* C_indptr_data, int64_t M) { int64_t nnz = 0; IdType len = 0; for (IdType i = 0; i < M; ++i) { len = C_indptr_data[i]; C_indptr_data[i] = nnz; nnz += len; } C_indptr_data[M] = nnz; return nnz; } template void ComputeIndicesAndData( const IdType* A_indptr, const IdType* A_indices, const IdType* A_eids, const DType* A_data, const IdType* B_indptr, const IdType* B_indices, const IdType* B_eids, const DType* B_data, const IdType* C_indptr_data, IdType* C_indices_data, DType* C_weights_data, int64_t M) { parallel_for(0, M, [=](size_t b, size_t e) { for (auto i = b; i < e; ++i) { tsl::robin_map map; for (IdType u = A_indptr[i]; u < A_indptr[i + 1]; ++u) { IdType w = A_indices[u]; DType vA = A_data[A_eids ? A_eids[u] : u]; for (IdType v = B_indptr[w]; v < B_indptr[w + 1]; ++v) { IdType t = B_indices[v]; DType vB = B_data[B_eids ? B_eids[v] : v]; map[t] += vA * vB; } } IdType v = C_indptr_data[i]; for (auto it : map) { C_indices_data[v] = it.first; C_weights_data[v] = it.second; ++v; } } }); } }; // namespace template std::pair CSRMM( const CSRMatrix& A, NDArray A_weights, const CSRMatrix& B, NDArray B_weights) { CHECK_EQ(A.num_cols, B.num_rows) << "A's number of columns must equal to B's number of rows"; const bool A_has_eid = !IsNullArray(A.data); const bool B_has_eid = !IsNullArray(B.data); const IdType* A_indptr = A.indptr.Ptr(); const IdType* A_indices = A.indices.Ptr(); const IdType* A_eids = A_has_eid ? A.data.Ptr() : nullptr; const IdType* B_indptr = B.indptr.Ptr(); const IdType* B_indices = B.indices.Ptr(); const IdType* B_eids = B_has_eid ? B.data.Ptr() : nullptr; const DType* A_data = A_weights.Ptr(); const DType* B_data = B_weights.Ptr(); const int64_t M = A.num_rows; const int64_t P = B.num_cols; IdArray C_indptr = IdArray::Empty({M + 1}, A.indptr->dtype, A.indptr->ctx); IdType* C_indptr_data = C_indptr.Ptr(); CountNNZPerRow( A_indptr, A_indices, B_indptr, B_indices, C_indptr_data, M); int64_t nnz = ComputeIndptrInPlace(C_indptr_data, M); // Allocate indices and weights array IdArray C_indices = IdArray::Empty({nnz}, A.indices->dtype, A.indices->ctx); NDArray C_weights = NDArray::Empty({nnz}, A_weights->dtype, A_weights->ctx); IdType* C_indices_data = C_indices.Ptr(); DType* C_weights_data = C_weights.Ptr(); ComputeIndicesAndData( A_indptr, A_indices, A_eids, A_data, B_indptr, B_indices, B_eids, B_data, C_indptr_data, C_indices_data, C_weights_data, M); return { CSRMatrix( M, P, C_indptr, C_indices, NullArray(C_indptr->dtype, C_indptr->ctx)), C_weights}; } template std::pair CSRMM( const CSRMatrix&, NDArray, const CSRMatrix&, NDArray); template std::pair CSRMM( const CSRMatrix&, NDArray, const CSRMatrix&, NDArray); template std::pair CSRMM( const CSRMatrix&, NDArray, const CSRMatrix&, NDArray); template std::pair CSRMM( const CSRMatrix&, NDArray, const CSRMatrix&, NDArray); }; // namespace aten }; // namespace dgl ================================================ FILE: src/array/cpu/csr_remove.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cpu/coo_remove.cc * @brief CSR matrix remove entries CPU implementation */ #include #include #include #include "array_utils.h" namespace dgl { using runtime::NDArray; namespace aten { namespace impl { namespace { template void CSRRemoveConsecutive( CSRMatrix csr, IdArray entries, std::vector *new_indptr, std::vector *new_indices, std::vector *new_eids) { CHECK_SAME_DTYPE(csr.indices, entries); const int64_t n_entries = entries->shape[0]; const IdType *indptr_data = static_cast(csr.indptr->data); const IdType *indices_data = static_cast(csr.indices->data); const IdType *entry_data = static_cast(entries->data); std::vector entry_data_sorted(entry_data, entry_data + n_entries); std::sort(entry_data_sorted.begin(), entry_data_sorted.end()); int64_t k = 0; new_indptr->push_back(0); for (int64_t i = 0; i < csr.num_rows; ++i) { for (IdType j = indptr_data[i]; j < indptr_data[i + 1]; ++j) { if (k < n_entries && entry_data_sorted[k] == j) { // Move on to the next different entry while (k < n_entries && entry_data_sorted[k] == j) ++k; continue; } new_indices->push_back(indices_data[j]); new_eids->push_back(k); } new_indptr->push_back(new_indices->size()); } } template void CSRRemoveShuffled( CSRMatrix csr, IdArray entries, std::vector *new_indptr, std::vector *new_indices, std::vector *new_eids) { CHECK_SAME_DTYPE(csr.indices, entries); const IdType *indptr_data = static_cast(csr.indptr->data); const IdType *indices_data = static_cast(csr.indices->data); const IdType *eid_data = static_cast(csr.data->data); IdHashMap eid_map(entries); new_indptr->push_back(0); for (int64_t i = 0; i < csr.num_rows; ++i) { for (IdType j = indptr_data[i]; j < indptr_data[i + 1]; ++j) { const IdType eid = eid_data ? eid_data[j] : j; if (eid_map.Contains(eid)) continue; new_indices->push_back(indices_data[j]); new_eids->push_back(eid); } new_indptr->push_back(new_indices->size()); } } }; // namespace template CSRMatrix CSRRemove(CSRMatrix csr, IdArray entries) { CHECK_SAME_DTYPE(csr.indices, entries); const int64_t nnz = csr.indices->shape[0]; const int64_t n_entries = entries->shape[0]; if (n_entries == 0) return csr; std::vector new_indptr, new_indices, new_eids; new_indptr.reserve(nnz - n_entries); new_indices.reserve(nnz - n_entries); new_eids.reserve(nnz - n_entries); if (CSRHasData(csr)) CSRRemoveShuffled( csr, entries, &new_indptr, &new_indices, &new_eids); else // Removing from CSR ordered by eid has more efficient implementation CSRRemoveConsecutive( csr, entries, &new_indptr, &new_indices, &new_eids); return CSRMatrix( csr.num_rows, csr.num_cols, IdArray::FromVector(new_indptr), IdArray::FromVector(new_indices), IdArray::FromVector(new_eids)); } template CSRMatrix CSRRemove(CSRMatrix csr, IdArray entries); template CSRMatrix CSRRemove(CSRMatrix csr, IdArray entries); }; // namespace impl }; // namespace aten }; // namespace dgl ================================================ FILE: src/array/cpu/csr_sort.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cpu/csr_sort.cc * @brief CSR sorting */ #include #include #include #include #include namespace dgl { namespace aten { namespace impl { ///////////////////////////// CSRIsSorted ///////////////////////////// template bool CSRIsSorted(CSRMatrix csr) { const IdType *indptr = csr.indptr.Ptr(); const IdType *indices = csr.indices.Ptr(); return runtime::parallel_reduce( 0, csr.num_rows, 1, 1, [indptr, indices](size_t b, size_t e, bool ident) { for (size_t row = b; row < e; ++row) { for (IdType i = indptr[row] + 1; i < indptr[row + 1]; ++i) { if (indices[i - 1] > indices[i]) return false; } } return ident; }, [](bool a, bool b) { return a && b; }); } template bool CSRIsSorted(CSRMatrix csr); template bool CSRIsSorted(CSRMatrix csr); ///////////////////////////// CSRSort ///////////////////////////// template void CSRSort_(CSRMatrix *csr) { typedef std::pair ShufflePair; const int64_t num_rows = csr->num_rows; const int64_t nnz = csr->indices->shape[0]; const IdType *indptr_data = static_cast(csr->indptr->data); IdType *indices_data = static_cast(csr->indices->data); if (CSRIsSorted(*csr)) { csr->sorted = true; return; } if (!CSRHasData(*csr)) { csr->data = aten::Range(0, nnz, csr->indptr->dtype.bits, csr->indptr->ctx); } IdType *eid_data = static_cast(csr->data->data); runtime::parallel_for(0, num_rows, [=](size_t b, size_t e) { for (auto row = b; row < e; ++row) { const int64_t num_cols = indptr_data[row + 1] - indptr_data[row]; std::vector reorder_vec(num_cols); IdType *col = indices_data + indptr_data[row]; IdType *eid = eid_data + indptr_data[row]; for (int64_t i = 0; i < num_cols; i++) { reorder_vec[i].first = col[i]; reorder_vec[i].second = eid[i]; } std::sort( reorder_vec.begin(), reorder_vec.end(), [](const ShufflePair &e1, const ShufflePair &e2) { return e1.first < e2.first; }); for (int64_t i = 0; i < num_cols; i++) { col[i] = reorder_vec[i].first; eid[i] = reorder_vec[i].second; } } }); csr->sorted = true; } template void CSRSort_(CSRMatrix *csr); template void CSRSort_(CSRMatrix *csr); template std::pair CSRSortByTag( const CSRMatrix &csr, const IdArray tag_array, int64_t num_tags) { const auto indptr_data = static_cast(csr.indptr->data); const auto indices_data = static_cast(csr.indices->data); const auto eid_data = aten::CSRHasData(csr) ? static_cast(csr.data->data) : nullptr; const auto tag_data = static_cast(tag_array->data); const int64_t num_rows = csr.num_rows; NDArray tag_pos = NDArray::Empty( {csr.num_rows, num_tags + 1}, csr.indptr->dtype, csr.indptr->ctx); auto tag_pos_data = static_cast(tag_pos->data); std::fill(tag_pos_data, tag_pos_data + csr.num_rows * (num_tags + 1), 0); aten::CSRMatrix output( csr.num_rows, csr.num_cols, csr.indptr.Clone(), csr.indices.Clone(), NDArray::Empty( {csr.indices->shape[0]}, csr.indices->dtype, csr.indices->ctx), csr.sorted); auto out_indices_data = static_cast(output.indices->data); auto out_eid_data = static_cast(output.data->data); runtime::parallel_for(0, num_rows, [&](size_t b, size_t e) { for (auto src = b; src < e; ++src) { const IdType start = indptr_data[src]; const IdType end = indptr_data[src + 1]; auto tag_pos_row = tag_pos_data + src * (num_tags + 1); std::vector pointer(num_tags, 0); for (IdType ptr = start; ptr < end; ++ptr) { const IdType eid = eid_data ? eid_data[ptr] : ptr; const TagType tag = tag_data[eid]; CHECK_LT(tag, num_tags); ++tag_pos_row[tag + 1]; } // count for (TagType tag = 1; tag <= num_tags; ++tag) { tag_pos_row[tag] += tag_pos_row[tag - 1]; } // cumulate for (IdType ptr = start; ptr < end; ++ptr) { const IdType dst = indices_data[ptr]; const IdType eid = eid_data ? eid_data[ptr] : ptr; const TagType tag = tag_data[eid]; const IdType offset = tag_pos_row[tag] + pointer[tag]; CHECK_LT(offset, tag_pos_row[tag + 1]); ++pointer[tag]; out_indices_data[start + offset] = dst; out_eid_data[start + offset] = eid; } } }); output.sorted = false; return std::make_pair(output, tag_pos); } template std::pair CSRSortByTag( const CSRMatrix &csr, const IdArray tag, int64_t num_tags); template std::pair CSRSortByTag( const CSRMatrix &csr, const IdArray tag, int64_t num_tags); template std::pair CSRSortByTag( const CSRMatrix &csr, const IdArray tag, int64_t num_tags); template std::pair CSRSortByTag( const CSRMatrix &csr, const IdArray tag, int64_t num_tags); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cpu/csr_sum.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file array/cpu/csr_sum.cc * @brief CSR Summation */ #include #include #include #include #include #include "array_utils.h" namespace dgl { using dgl::runtime::NDArray; namespace aten { namespace { // TODO(BarclayII): avoid using map for sorted CSRs template void CountNNZPerRow( const std::vector& A_indptr, const std::vector& A_indices, IdType* C_indptr_data, int64_t M) { int64_t n = A_indptr.size(); runtime::parallel_for(0, M, [=](size_t b, size_t e) { for (size_t i = b; i < e; ++i) { tsl::robin_set set; for (int64_t k = 0; k < n; ++k) { for (IdType u = A_indptr[k][i]; u < A_indptr[k][i + 1]; ++u) set.insert(A_indices[k][u]); } C_indptr_data[i] = set.size(); } }); } template int64_t ComputeIndptrInPlace(IdType* C_indptr_data, int64_t M) { int64_t nnz = 0; IdType len = 0; for (IdType i = 0; i < M; ++i) { len = C_indptr_data[i]; C_indptr_data[i] = nnz; nnz += len; } C_indptr_data[M] = nnz; return nnz; } template void ComputeIndicesAndData( const std::vector& A_indptr, const std::vector& A_indices, const std::vector& A_eids, const std::vector& A_data, const IdType* C_indptr_data, IdType* C_indices_data, DType* C_weights_data, int64_t M) { int64_t n = A_indptr.size(); runtime::parallel_for(0, M, [=](size_t b, size_t e) { for (auto i = b; i < e; ++i) { tsl::robin_map map; for (int64_t k = 0; k < n; ++k) { for (IdType u = A_indptr[k][i]; u < A_indptr[k][i + 1]; ++u) { IdType kA = A_indices[k][u]; DType vA = A_data[k][A_eids[k] ? A_eids[k][u] : u]; map[kA] += vA; } } IdType j = C_indptr_data[i]; for (auto it : map) { C_indices_data[j] = it.first; C_weights_data[j] = it.second; ++j; } } }); } }; // namespace template std::pair CSRSum( const std::vector& A, const std::vector& A_weights) { CHECK(A.size() > 0) << "List of matrices can't be empty."; CHECK_EQ(A.size(), A_weights.size()) << "List of matrices and weights must have same length"; const int64_t M = A[0].num_rows; const int64_t N = A[0].num_cols; const int64_t n = A.size(); std::vector A_has_eid(n); std::vector A_indptr(n); std::vector A_indices(n); std::vector A_eids(n); std::vector A_data(n); for (int64_t i = 0; i < n; ++i) { const CSRMatrix& csr = A[i]; const NDArray& data = A_weights[i]; A_has_eid[i] = !IsNullArray(csr.data); A_indptr[i] = csr.indptr.Ptr(); A_indices[i] = csr.indices.Ptr(); A_eids[i] = A_has_eid[i] ? csr.data.Ptr() : nullptr; A_data[i] = data.Ptr(); } IdArray C_indptr = IdArray::Empty({M + 1}, A[0].indptr->dtype, A[0].indptr->ctx); IdType* C_indptr_data = C_indptr.Ptr(); CountNNZPerRow(A_indptr, A_indices, C_indptr_data, M); IdType nnz = ComputeIndptrInPlace(C_indptr_data, M); // Allocate indices and weights array IdArray C_indices = IdArray::Empty({nnz}, A[0].indices->dtype, A[0].indices->ctx); NDArray C_weights = NDArray::Empty({nnz}, A_weights[0]->dtype, A_weights[0]->ctx); IdType* C_indices_data = C_indices.Ptr(); DType* C_weights_data = C_weights.Ptr(); ComputeIndicesAndData( A_indptr, A_indices, A_eids, A_data, C_indptr_data, C_indices_data, C_weights_data, M); return { CSRMatrix( M, N, C_indptr, C_indices, NullArray(C_indptr->dtype, C_indptr->ctx)), C_weights}; } template std::pair CSRSum( const std::vector&, const std::vector&); template std::pair CSRSum( const std::vector&, const std::vector&); template std::pair CSRSum( const std::vector&, const std::vector&); template std::pair CSRSum( const std::vector&, const std::vector&); }; // namespace aten }; // namespace dgl ================================================ FILE: src/array/cpu/csr_to_simple.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cpu/csr_to_simple.cc * @brief CSR sorting */ #include #include #include #include namespace dgl { namespace aten { namespace impl { template std::tuple CSRToSimple(CSRMatrix csr) { if (!csr.sorted) csr = CSRSort(csr); const IdType *indptr_data = static_cast(csr.indptr->data); const IdType *indices_data = static_cast(csr.indices->data); std::vector indptr; std::vector indices; std::vector count; indptr.resize(csr.indptr->shape[0]); indptr[0] = 0; for (int64_t i = 1; i < csr.indptr->shape[0]; ++i) { if (indptr_data[i - 1] == indptr_data[i]) { indptr[i] = indptr[i - 1]; continue; } int64_t cnt = 1; int64_t dup_cnt = 1; indices.push_back(indices_data[indptr_data[i - 1]]); for (int64_t j = indptr_data[i - 1] + 1; j < indptr_data[i]; ++j) { if (indices_data[j - 1] == indices_data[j]) { ++dup_cnt; continue; } count.push_back(dup_cnt); dup_cnt = 1; indices.push_back(indices_data[j]); ++cnt; } count.push_back(dup_cnt); indptr[i] = indptr[i - 1] + cnt; } CSRMatrix res_csr = CSRMatrix( csr.num_rows, csr.num_cols, IdArray::FromVector(indptr), IdArray::FromVector(indices), NullArray(), true); const IdArray &edge_count = IdArray::FromVector(count); const IdArray new_eids = Range(0, res_csr.indices->shape[0], sizeof(IdType) * 8, csr.indptr->ctx); const IdArray eids_remapped = CSRHasData(csr) ? Scatter(Repeat(new_eids, edge_count), csr.data) : Repeat(new_eids, edge_count); return std::make_tuple(res_csr, edge_count, eids_remapped); } template std::tuple CSRToSimple( CSRMatrix); template std::tuple CSRToSimple( CSRMatrix); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cpu/csr_union.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cpu/coo_sort.cc * @brief COO sorting */ #include #include #include #include #include #include namespace dgl { namespace aten { namespace impl { template CSRMatrix UnionCsr(const std::vector &csrs) { std::vector res_indptr; std::vector res_indices; std::vector res_data; // some preprocess // we assume the number of csrs is not large in common cases std::vector data; std::vector data_data; std::vector indptr_data; std::vector indices_data; int64_t num_edges = 0; bool sorted = true; for (size_t i = 0; i < csrs.size(); ++i) { // eids of csrs[0] remains unchanged // eids of csrs[1] will be increased by number of edges of csrs[0], etc. data.push_back( CSRHasData(csrs[i]) ? csrs[i].data + num_edges : Range( num_edges, num_edges + csrs[i].indices->shape[0], csrs[i].indptr->dtype.bits, csrs[i].indptr->ctx)); data_data.push_back(data[i].Ptr()); indptr_data.push_back(csrs[i].indptr.Ptr()); indices_data.push_back(csrs[i].indices.Ptr()); num_edges += csrs[i].indices->shape[0]; sorted &= csrs[i].sorted; } res_indptr.resize(csrs[0].num_rows + 1); res_indices.resize(num_edges); res_data.resize(num_edges); res_indptr[0] = 0; if (sorted) { // all csrs are sorted #pragma omp for for (int64_t i = 1; i <= csrs[0].num_rows; ++i) { std::vector indices_off; res_indptr[i] = indptr_data[0][i]; indices_off.push_back(indptr_data[0][i - 1]); for (size_t j = 1; j < csrs.size(); ++j) { res_indptr[i] += indptr_data[j][i]; indices_off.push_back(indptr_data[j][i - 1]); } IdType off = res_indptr[i - 1]; while (off < res_indptr[i]) { IdType min = csrs[0].num_cols + 1; int64_t min_idx = -1; for (size_t j = 0; j < csrs.size(); ++j) { if (indices_off[j] < indptr_data[j][i]) { if (min <= indices_data[j][indices_off[j]]) { continue; } else { min = indices_data[j][indices_off[j]]; min_idx = j; } } // for check out of bound } // for res_indices[off] = min; res_data[off] = data_data[min_idx][indices_off[min_idx]]; indices_off[min_idx] += 1; ++off; } // while } // omp for } else { // some csrs are not sorted #pragma omp for for (int64_t i = 1; i <= csrs[0].num_rows; ++i) { IdType off = res_indptr[i - 1]; res_indptr[i] = 0; for (size_t j = 0; j < csrs.size(); ++j) { std::memcpy( &res_indices[off], &indices_data[j][indptr_data[j][i - 1]], sizeof(IdType) * (indptr_data[j][i] - indptr_data[j][i - 1])); std::memcpy( &res_data[off], &data_data[j][indptr_data[j][i - 1]], sizeof(IdType) * (indptr_data[j][i] - indptr_data[j][i - 1])); off += indptr_data[j][i] - indptr_data[j][i - 1]; } res_indptr[i] = off; } // omp for } return CSRMatrix( csrs[0].num_rows, csrs[0].num_cols, IdArray::FromVector(res_indptr), IdArray::FromVector(res_indices), IdArray::FromVector(res_data), sorted); } template CSRMatrix UnionCsr(const std::vector &); template CSRMatrix UnionCsr(const std::vector &); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cpu/disjoint_union.cc ================================================ /** * Copyright (c) 2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file array/cpu/disjoint_union.cc * @brief Disjoint union CPU implementation. */ #include #include #include namespace dgl { using runtime::NDArray; namespace aten { namespace impl { template std::tuple _ComputePrefixSums( const std::vector& coos) { IdArray prefix_src_arr = NewIdArray(coos.size(), coos[0].row->ctx, coos[0].row->dtype.bits); IdArray prefix_dst_arr = NewIdArray(coos.size(), coos[0].row->ctx, coos[0].row->dtype.bits); IdArray prefix_elm_arr = NewIdArray(coos.size(), coos[0].row->ctx, coos[0].row->dtype.bits); auto prefix_src = prefix_src_arr.Ptr(); auto prefix_dst = prefix_dst_arr.Ptr(); auto prefix_elm = prefix_elm_arr.Ptr(); dgl::runtime::parallel_for(0, coos.size(), [&](IdType b, IdType e) { for (IdType i = b; i < e; ++i) { prefix_src[i] = coos[i].num_rows; prefix_dst[i] = coos[i].num_cols; prefix_elm[i] = coos[i].row->shape[0]; } }); return std::make_tuple( CumSum(prefix_src_arr, true), CumSum(prefix_dst_arr, true), CumSum(prefix_elm_arr, true)); } template COOMatrix DisjointUnionCoo(const std::vector& coos) { bool has_data = false; bool row_sorted = true; bool col_sorted = true; // check if data index array for (size_t i = 0; i < coos.size(); ++i) { CHECK_SAME_DTYPE(coos[0].row, coos[i].row); CHECK_SAME_CONTEXT(coos[0].row, coos[i].row); has_data |= COOHasData(coos[i]); } auto prefixes = _ComputePrefixSums(coos); auto prefix_src = static_cast(std::get<0>(prefixes)).Ptr(); auto prefix_dst = static_cast(std::get<1>(prefixes)).Ptr(); auto prefix_elm = static_cast(std::get<2>(prefixes)).Ptr(); IdArray result_src = NewIdArray( prefix_elm[coos.size()], coos[0].row->ctx, coos[0].row->dtype.bits); IdArray result_dst = NewIdArray( prefix_elm[coos.size()], coos[0].col->ctx, coos[0].col->dtype.bits); IdArray result_dat = NullArray(); if (has_data) { result_dat = NewIdArray( prefix_elm[coos.size()], coos[0].row->ctx, coos[0].row->dtype.bits); } auto res_src_data = result_src.Ptr(); auto res_dst_data = result_dst.Ptr(); auto res_dat_data = result_dat.Ptr(); // 32 is a number obtained from experience. If a user set the grain size // explicitly via env, use that value instead. size_t grain_size = dgl::runtime::DefaultGrainSizeT(32)(); dgl::runtime::parallel_for( 0, coos.size(), grain_size, [&](IdType b, IdType e) { for (IdType i = b; i < e; ++i) { const aten::COOMatrix& coo = coos[i]; if (!coo.row_sorted) row_sorted = false; if (!coo.col_sorted) col_sorted = false; auto edges_src = coo.row.Ptr(); auto edges_dst = coo.col.Ptr(); auto edges_dat = coo.data.Ptr(); for (IdType j = 0; j < coo.row->shape[0]; j++) { res_src_data[prefix_elm[i] + j] = edges_src[j] + prefix_src[i]; } for (IdType j = 0; j < coo.row->shape[0]; j++) { res_dst_data[prefix_elm[i] + j] = edges_dst[j] + prefix_dst[i]; } if (has_data) { for (IdType j = 0; j < coo.row->shape[0]; j++) { const auto d = (!COOHasData(coo)) ? j : edges_dat[j]; res_dat_data[prefix_elm[i] + j] = d + prefix_elm[i]; } } } }); return COOMatrix( prefix_src[coos.size()], prefix_dst[coos.size()], result_src, result_dst, result_dat, row_sorted, col_sorted); } template COOMatrix DisjointUnionCoo( const std::vector& coos); template COOMatrix DisjointUnionCoo( const std::vector& coos); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cpu/gather_mm.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file kernel/cpu/gaher_mm.cc * @brief GatherMM C APIs and definitions. */ #include "./gather_mm.h" #include namespace dgl { namespace aten { /** @brief Generalized SegmentMM. */ template void SegmentMM( const NDArray A, const NDArray B, NDArray C, const NDArray seglen_A, bool a_trans, bool b_trans) { LOG(FATAL) << "Unsupported CPU kernel for SegmentMM."; } template void SegmentMMBackwardB( const NDArray A, const NDArray dC, NDArray dB, const NDArray seglen) { LOG(FATAL) << "Unsupported CPU kernel for SegmentMMBackwardB."; } /** @brief Generalized GatherMM. */ template void GatherMM( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b) { LOG(FATAL) << "Unsupported CPU kernel for GatherMM."; } /** @brief Generalized GatherMM_scatter. */ template void GatherMMScatter( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b, const NDArray idx_c) { LOG(FATAL) << "Unsupported CPU kernel for GatherMM."; } template void GatherMM( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b); template void GatherMM( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b); template void GatherMM( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b); template void GatherMM( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b); template void GatherMM( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b); template void GatherMM( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b); template void GatherMMScatter( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b, const NDArray idx_c); template void GatherMMScatter( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b, const NDArray idx_c); template void GatherMMScatter( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b, const NDArray idx_c); template void GatherMMScatter( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b, const NDArray idx_c); template void GatherMMScatter( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b, const NDArray idx_c); template void GatherMMScatter( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b, const NDArray idx_c); template void SegmentMM( const NDArray A, const NDArray B, NDArray C, const NDArray seglen_A, bool a_trans, bool b_trans); template void SegmentMM( const NDArray A, const NDArray B, NDArray C, const NDArray seglen_A, bool a_trans, bool b_trans); template void SegmentMM( const NDArray A, const NDArray B, NDArray C, const NDArray seglen_A, bool a_trans, bool b_trans); template void SegmentMM( const NDArray A, const NDArray B, NDArray C, const NDArray seglen_A, bool a_trans, bool b_trans); template void SegmentMM( const NDArray A, const NDArray B, NDArray C, const NDArray seglen_A, bool a_trans, bool b_trans); template void SegmentMM( const NDArray A, const NDArray B, NDArray C, const NDArray seglen_A, bool a_trans, bool b_trans); template void SegmentMMBackwardB( const NDArray A, const NDArray dC, NDArray dB, const NDArray seglen); template void SegmentMMBackwardB( const NDArray A, const NDArray dC, NDArray dB, const NDArray seglen); template void SegmentMMBackwardB( const NDArray A, const NDArray dC, NDArray dB, const NDArray seglen); template void SegmentMMBackwardB( const NDArray A, const NDArray dC, NDArray dB, const NDArray seglen); template void SegmentMMBackwardB( const NDArray A, const NDArray dC, NDArray dB, const NDArray seglen); template void SegmentMMBackwardB( const NDArray A, const NDArray dC, NDArray dB, const NDArray seglen); } // namespace aten } // namespace dgl ================================================ FILE: src/array/cpu/gather_mm.h ================================================ /** * Copyright (c) 2022 by Contributors * @file array/cpu/gather_mm.h * @brief GATHER_MM CPU kernel function header. */ #ifndef DGL_ARRAY_CPU_GATHER_MM_H_ #define DGL_ARRAY_CPU_GATHER_MM_H_ #include #include #include namespace dgl { namespace aten { namespace cpu { template void transpose(const DType *in, DType *out, const int N, const int M) { #pragma omp parallel for for (int n = 0; n < N * M; n++) { int i = n / N; int j = n % N; out[n] = in[M * j + i]; } } template void matmul( const DType *A, const DType *B, DType *C, const int M, const int N, const int K) { #pragma omp parallel { int i, j, k; #pragma omp for for (i = 0; i < M; i++) { for (j = 0; j < N; j++) { DType local_accum = 0; for (k = 0; k < K; k++) { local_accum += A[i * K + k] * B[k * N + j]; } C[i * N + j] = local_accum; } } } } /** * @brief CPU kernel of Gather_mm. The input matrix A is expected to be * sorted according to relation type. * @param A The input dense matrix of dimension m x k * @param B The input dense matrix of dimension k x n * @param C The output dense matrix od dimension m x n * @param A_dim1_per_rel The number of rows in each relation in A * @param B_dim1_per_rel The number of rows in each relation in B * @param a_trans Matrix A to be transposed * @param b_trans Matrix B to be transposed */ template void gatherMM_SortedEtype( const NDArray A, const NDArray B, NDArray C, const NDArray A_dim1_per_rel, const NDArray B_dim1_per_rel, bool a_trans, bool b_trans) { assert(A_dim1_per_rel.NumElements() == B_dim1_per_rel.NumElements()); int64_t num_rel = A_dim1_per_rel.NumElements(); const DType *A_data = A.Ptr(); const DType *B_data = B.Ptr(); const IdType *A_rel_data = A_dim1_per_rel.Ptr(); const IdType *B_rel_data = B_dim1_per_rel.Ptr(); DType *C_data = C.Ptr(); int64_t A_offset = 0, B_offset = 0, C_offset = 0; int64_t m, n, k, h_col, w_row; for (int etype = 0; etype < num_rel; ++etype) { assert( (a_trans) ? A_rel_data[etype] : A->shape[1] == (b_trans) ? B->shape[1] : B_rel_data[etype]); m = A_rel_data[etype]; // rows of A n = B->shape[1]; // cols of B k = B_rel_data[etype]; // rows of B == cols of A NDArray A_trans, B_trans; if (a_trans) { A_trans = NDArray::Empty({m * k}, A->dtype, A->ctx); transpose( A_data + A_offset, static_cast(A_trans->data), m, k); } if (b_trans) { B_trans = NDArray::Empty({k * n}, B->dtype, B->ctx); transpose( B_data + B_offset, static_cast(B_trans->data), k, n); } if (a_trans || b_trans) { int64_t tmp = k; if (a_trans) std::swap(m, k); if (b_trans) { k = tmp; std::swap(n, k); } } matmul( (a_trans) ? static_cast(A_trans->data) : A_data + A_offset, (b_trans) ? static_cast(B_trans->data) : B_data + B_offset, C_data + C_offset, m, n, k); A_offset += m * k; B_offset += k * n; C_offset += m * n; } } } // namespace cpu } // namespace aten } // namespace dgl #endif // DGL_ARRAY_CPU_GATHER_MM_H_ ================================================ FILE: src/array/cpu/labor_pick.h ================================================ /** * Copyright (c) 2022, NVIDIA Corporation * Copyright (c) 2022, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file array/cpu/labor_pick.h * @brief Template implementation for layerwise pick operators. */ #ifndef DGL_ARRAY_CPU_LABOR_PICK_H_ #define DGL_ARRAY_CPU_LABOR_PICK_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "../../random/continuous_seed.h" namespace dgl { namespace aten { namespace impl { using dgl::random::continuous_seed; template using map_t = tsl::robin_map; template auto& mutable_value_ref(iterator it) { return it.value(); } constexpr double eps = 0.0001; template auto compute_importance_sampling_probabilities( DGLContext ctx, DGLDataType dtype, const IdxType max_degree, const IdxType num_rows, const int importance_sampling, const bool weighted, const IdxType* rows_data, const IdxType* indptr, const FloatType* A, const IdxType* indices, const IdxType num_picks, const FloatType* ds, FloatType* cs) { constexpr FloatType ONE = 1; // ps stands for \pi in arXiv:2210.13339 FloatArray ps_array = NDArray::Empty({max_degree + 1}, dtype, ctx); FloatType* ps = ps_array.Ptr(); double prev_ex_nodes = max_degree * num_rows; map_t hop_map, hop_map2; for (int iters = 0; iters < importance_sampling || importance_sampling < 0; iters++) { // NOTE(mfbalin) When the graph is unweighted, the first c values in // the first iteration can be computed in O(1) as k / d where k is fanout // and d is the degree. // If the graph is weighted, the first c values are computed in the inner // for loop instead. Therefore the importance_sampling argument should be // increased by one in the caller. // The later iterations will have correct c values so the if block will be // executed. if (!weighted || iters) { hop_map2.clear(); for (int64_t i = 0; i < num_rows; ++i) { const FloatType c = cs[i]; const IdxType rid = rows_data[i]; for (auto j = indptr[rid]; j < indptr[rid + 1]; j++) { const auto ct = c * (weighted && iters == 1 ? A[j] : 1); auto itb = hop_map2.emplace(indices[j], ct); if (!itb.second) { mutable_value_ref(itb.first) = std::max(ct, itb.first->second); } } } if (hop_map.empty()) hop_map = std::move(hop_map2); else // Update the pi array according to Eq 18. for (auto it : hop_map2) hop_map[it.first] *= it.second; } // Compute c_s according to Equation (15), (17) is slower because sorting is // required. for (int64_t i = 0; i < num_rows; ++i) { const IdxType rid = rows_data[i]; const auto d = indptr[rid + 1] - indptr[rid]; if (d == 0) continue; const auto k = std::min(num_picks, d); if (hop_map.empty()) { // weighted first iter, pi = A for (auto j = indptr[rid]; j < indptr[rid + 1]; j++) ps[j - indptr[rid]] = A[j]; } else { for (auto j = indptr[rid]; j < indptr[rid + 1]; j++) ps[j - indptr[rid]] = hop_map[indices[j]]; } // stands for RHS of Equation (22) in arXiv:2210.13339 after moving the // other terms without c_s to RHS. double var_target = ds[i] * ds[i] / k; if (weighted) { var_target -= ds[i] * ds[i] / d; for (auto j = indptr[rid]; j < indptr[rid + 1]; j++) var_target += A[j] * A[j]; } FloatType c = cs[i]; // stands for left handside of Equation (22) in arXiv:2210.13339 after // moving the other terms without c_s to RHS. double var_1; // Compute c_s in Equation (22) via fixed-point iteration. do { var_1 = 0; if (weighted) { for (auto j = indptr[rid]; j < indptr[rid + 1]; j++) // The check for zero is necessary for numerical stability var_1 += A[j] > 0 ? A[j] * A[j] / std::min(ONE, c * ps[j - indptr[rid]]) : 0; } else { for (auto j = indptr[rid]; j < indptr[rid + 1]; j++) var_1 += ONE / std::min(ONE, c * ps[j - indptr[rid]]); } c *= var_1 / var_target; } while (std::min(var_1, var_target) / std::max(var_1, var_target) < 1 - eps); cs[i] = c; } // Check convergence if (!weighted || iters) { double cur_ex_nodes = 0; for (auto it : hop_map) cur_ex_nodes += std::min((FloatType)1, it.second); if (cur_ex_nodes / prev_ex_nodes >= 1 - eps) break; prev_ex_nodes = cur_ex_nodes; } } return hop_map; } // Template for picking non-zero values row-wise. template std::pair CSRLaborPick( CSRMatrix mat, IdArray rows, int64_t num_picks, FloatArray prob, int importance_sampling, IdArray random_seed_arr, float seed2_contribution, IdArray NIDs) { using namespace aten; const IdxType* indptr = mat.indptr.Ptr(); const IdxType* indices = mat.indices.Ptr(); const IdxType* data = CSRHasData(mat) ? mat.data.Ptr() : nullptr; const IdxType* rows_data = rows.Ptr(); const IdxType* nids = IsNullArray(NIDs) ? nullptr : NIDs.Ptr(); const auto num_rows = rows->shape[0]; const auto& ctx = mat.indptr->ctx; const bool weighted = !IsNullArray(prob); // O(1) c computation not possible, so one more iteration is needed. if (importance_sampling >= 0) importance_sampling += weighted; // A stands for the same notation in arXiv:2210.13339, i.e. the edge weights. auto A_arr = prob; FloatType* A = A_arr.Ptr(); constexpr FloatType ONE = 1; constexpr auto dtype = DGLDataTypeTraits::dtype; // cs stands for c_s in arXiv:2210.13339 FloatArray cs_array = NDArray::Empty({num_rows}, dtype, ctx); FloatType* cs = cs_array.Ptr(); // ds stands for A_{*s} in arXiv:2210.13339 FloatArray ds_array = NDArray::Empty({num_rows}, dtype, ctx); FloatType* ds = ds_array.Ptr(); IdxType max_degree = 1; IdxType hop_size = 0; for (int64_t i = 0; i < num_rows; ++i) { const IdxType rid = rows_data[i]; const auto act_degree = indptr[rid + 1] - indptr[rid]; max_degree = std::max(act_degree, max_degree); double d = weighted ? std::accumulate(A + indptr[rid], A + indptr[rid + 1], 0.0) : act_degree; // O(1) c computation, samples more than needed for weighted case, mentioned // in the sentence between (10) and (11) in arXiv:2210.13339 cs[i] = num_picks / d; ds[i] = d; hop_size += act_degree; } map_t hop_map; if (importance_sampling) hop_map = compute_importance_sampling_probabilities( ctx, dtype, max_degree, num_rows, importance_sampling, weighted, rows_data, indptr, A, indices, (IdxType)num_picks, ds, cs); constexpr auto vidtype = DGLDataTypeTraits::dtype; IdArray picked_row = NDArray::Empty({hop_size}, vidtype, ctx); IdArray picked_col = NDArray::Empty({hop_size}, vidtype, ctx); IdArray picked_idx = NDArray::Empty({hop_size}, vidtype, ctx); FloatArray picked_imp = importance_sampling ? NDArray::Empty({hop_size}, dtype, ctx) : NullArray(); IdxType* picked_rdata = picked_row.Ptr(); IdxType* picked_cdata = picked_col.Ptr(); IdxType* picked_idata = picked_idx.Ptr(); FloatType* picked_imp_data = picked_imp.Ptr(); const continuous_seed random_seed = IsNullArray(random_seed_arr) ? continuous_seed(RandomEngine::ThreadLocal()->RandInt(1000000000)) : continuous_seed(random_seed_arr, seed2_contribution); // compute number of edges first and do sampling IdxType num_edges = 0; for (int64_t i = 0; i < num_rows; i++) { const IdxType rid = rows_data[i]; const auto c = cs[i]; FloatType norm_inv_p = 0; const auto off = num_edges; for (auto j = indptr[rid]; j < indptr[rid + 1]; j++) { const auto v = indices[j]; const uint64_t t = nids ? nids[v] : v; // t in the paper // rolled random number r_t is a function of the random_seed and t const auto rnd = random_seed.uniform(t); const auto w = (weighted ? A[j] : 1); // if hop_map is initialized, get ps from there, otherwise get it from the // alternative. const auto ps = std::min( ONE, importance_sampling - weighted ? c * hop_map[v] : c * w); if (rnd <= ps) { picked_rdata[num_edges] = rid; picked_cdata[num_edges] = v; picked_idata[num_edges] = data ? data[j] : j; if (importance_sampling) { const auto edge_weight = w / ps; norm_inv_p += edge_weight; picked_imp_data[num_edges] = edge_weight; } num_edges++; } } if (importance_sampling) { const auto norm_factor = (num_edges - off) / norm_inv_p; for (auto i = off; i < num_edges; i++) // so that fn.mean can be used picked_imp_data[i] *= norm_factor; } } picked_row = picked_row.CreateView({num_edges}, picked_row->dtype); picked_col = picked_col.CreateView({num_edges}, picked_col->dtype); picked_idx = picked_idx.CreateView({num_edges}, picked_idx->dtype); if (importance_sampling) picked_imp = picked_imp.CreateView({num_edges}, picked_imp->dtype); return std::make_pair( COOMatrix(mat.num_rows, mat.num_cols, picked_row, picked_col, picked_idx), picked_imp); } // Template for picking non-zero values row-wise. The implementation first // slices out the corresponding rows and then converts it to CSR format. It then // performs row-wise pick on the CSR matrix and rectifies the returned results. template std::pair COOLaborPick( COOMatrix mat, IdArray rows, int64_t num_picks, FloatArray prob, int importance_sampling, IdArray random_seed, float seed2_contribution, IdArray NIDs) { using namespace aten; const auto& csr = COOToCSR(COOSliceRows(mat, rows)); const IdArray new_rows = Range(0, rows->shape[0], rows->dtype.bits, rows->ctx); const auto&& picked_importances = CSRLaborPick( csr, new_rows, num_picks, prob, importance_sampling, random_seed, seed2_contribution, NIDs); const auto& picked = picked_importances.first; const auto& importances = picked_importances.second; return std::make_pair( COOMatrix( mat.num_rows, mat.num_cols, IndexSelect( rows, picked.row), // map the row index to the correct one picked.col, picked.data), importances); } } // namespace impl } // namespace aten } // namespace dgl #endif // DGL_ARRAY_CPU_LABOR_PICK_H_ ================================================ FILE: src/array/cpu/labor_sampling.cc ================================================ /*! * Copyright (c) 2022, NVIDIA Corporation * Copyright (c) 2022, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * \file array/cuda/labor_sampling.cc * \brief labor sampling */ #include "./labor_pick.h" namespace dgl { namespace aten { namespace impl { /////////////////////////////// CSR /////////////////////////////// template std::pair CSRLaborSampling( CSRMatrix mat, IdArray rows, int64_t num_samples, FloatArray prob, int importance_sampling, IdArray random_seed, float seed2_contribution, IdArray NIDs) { return CSRLaborPick( mat, rows, num_samples, prob, importance_sampling, random_seed, seed2_contribution, NIDs); } template std::pair CSRLaborSampling( CSRMatrix, IdArray, int64_t, FloatArray, int, IdArray, float, IdArray); template std::pair CSRLaborSampling( CSRMatrix, IdArray, int64_t, FloatArray, int, IdArray, float, IdArray); template std::pair CSRLaborSampling( CSRMatrix, IdArray, int64_t, FloatArray, int, IdArray, float, IdArray); template std::pair CSRLaborSampling( CSRMatrix, IdArray, int64_t, FloatArray, int, IdArray, float, IdArray); /////////////////////////////// COO /////////////////////////////// template std::pair COOLaborSampling( COOMatrix mat, IdArray rows, int64_t num_samples, FloatArray prob, int importance_sampling, IdArray random_seed, float seed2_contribution, IdArray NIDs) { return COOLaborPick( mat, rows, num_samples, prob, importance_sampling, random_seed, seed2_contribution, NIDs); } template std::pair COOLaborSampling( COOMatrix, IdArray, int64_t, FloatArray, int, IdArray, float, IdArray); template std::pair COOLaborSampling( COOMatrix, IdArray, int64_t, FloatArray, int, IdArray, float, IdArray); template std::pair COOLaborSampling( COOMatrix, IdArray, int64_t, FloatArray, int, IdArray, float, IdArray); template std::pair COOLaborSampling( COOMatrix, IdArray, int64_t, FloatArray, int, IdArray, float, IdArray); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cpu/negative_sampling.cc ================================================ /** * Copyright (c) 2021 by Contributors * @file array/cpu/negative_sampling.cc * @brief Uniform negative sampling on CSR. */ #include #include #include #include #include #include using namespace dgl::runtime; namespace dgl { namespace aten { namespace impl { template std::pair CSRGlobalUniformNegativeSampling( const CSRMatrix& csr, int64_t num_samples, int num_trials, bool exclude_self_loops, bool replace, double redundancy) { const int64_t num_row = csr.num_rows; const int64_t num_col = csr.num_cols; const int64_t num_actual_samples = static_cast(num_samples * (1 + redundancy)); IdArray row = Full(-1, num_actual_samples, csr.indptr->ctx); IdArray col = Full(-1, num_actual_samples, csr.indptr->ctx); IdType* row_data = row.Ptr(); IdType* col_data = col.Ptr(); parallel_for(0, num_actual_samples, 1, [&](int64_t b, int64_t e) { for (int64_t i = b; i < e; ++i) { for (int trial = 0; trial < num_trials; ++trial) { IdType u = RandomEngine::ThreadLocal()->RandInt(num_row); IdType v = RandomEngine::ThreadLocal()->RandInt(num_col); if (!(exclude_self_loops && (u == v)) && !CSRIsNonZero(csr, u, v)) { row_data[i] = u; col_data[i] = v; break; } } } }); PairIterator begin(row_data, col_data); PairIterator end = std::remove_if( begin, begin + num_actual_samples, [](const std::pair& val) { return val.first == -1; }); if (!replace) { std::sort( begin, end, [](const std::pair& a, const std::pair& b) { return a.first < b.first || (a.first == b.first && a.second < b.second); }); end = std::unique(begin, end); } int64_t num_sampled = std::min(static_cast(end - begin), num_samples); return { row.CreateView({num_sampled}, row->dtype), col.CreateView({num_sampled}, col->dtype)}; } template std::pair CSRGlobalUniformNegativeSampling< kDGLCPU, int32_t>(const CSRMatrix&, int64_t, int, bool, bool, double); template std::pair CSRGlobalUniformNegativeSampling< kDGLCPU, int64_t>(const CSRMatrix&, int64_t, int, bool, bool, double); }; // namespace impl }; // namespace aten }; // namespace dgl ================================================ FILE: src/array/cpu/rowwise_pick.h ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cpu/rowwise_pick.h * @brief Template implementation for rowwise pick operators. */ #ifndef DGL_ARRAY_CPU_ROWWISE_PICK_H_ #define DGL_ARRAY_CPU_ROWWISE_PICK_H_ #include #include #include #include #include #include #include #include #include namespace dgl { namespace aten { namespace impl { // User-defined function for picking elements from one row. // // The column indices of the given row are stored in // [col + off, col + off + len) // // Similarly, the data indices are stored in // [data + off, data + off + len) // Data index pointer could be NULL, which means data[i] == i // // *ATTENTION*: This function will be invoked concurrently. Please make sure // it is thread-safe. // // @param rowid The row to pick from. // @param off Starting offset of this row. // @param len NNZ of the row. // @param num_picks Number of picks on the row. // @param col Pointer of the column indices. // @param data Pointer of the data indices. // @param out_idx Picked indices in [off, off + len). template using PickFn = std::function; // User-defined function for determining the number of elements to pick from one // row. // // The column indices of the given row are stored in // [col + off, col + off + len) // // Similarly, the data indices are stored in // [data + off, data + off + len) // Data index pointer could be NULL, which means data[i] == i // // *ATTENTION*: This function will be invoked concurrently. Please make sure // it is thread-safe. // // @param rowid The row to pick from. // @param off Starting offset of this row. // @param len NNZ of the row. // @param col Pointer of the column indices. // @param data Pointer of the data indices. template using NumPicksFn = std::function; // User-defined function for picking elements from a range within a row. // // The column indices of each element is in // off + et_idx[et_offset+i]), where i is in [et_offset, et_offset+et_len) // // Similarly, the data indices are stored in // data[off+et_idx[et_offset+i])] // Data index pointer could be NULL, which means data[i] == // off+et_idx[et_offset+i]) // // *ATTENTION*: This function will be invoked concurrently. Please make sure // it is thread-safe. // // @param off Starting offset of this row. // @param et_offset Starting offset of this range. // @param cur_et The edge type. // @param et_len Length of the range. // @param et_idx A map from local idx to column id. // @param et_eid Edge-type-specific id array. // @param eid Pointer of the homogenized edge id array. // @param out_idx Picked indices in [et_offset, et_offset + et_len). template using EtypeRangePickFn = std::function& et_idx, const std::vector& et_eid, const IdxType* eid, IdxType* out_idx)>; template std::pair CSRRowWisePickFused( CSRMatrix mat, IdArray rows, IdArray seed_mapping, std::vector* new_seed_nodes, int64_t num_picks, bool replace, PickFn pick_fn, NumPicksFn num_picks_fn) { using namespace aten; const IdxType* indptr = static_cast(mat.indptr->data); const IdxType* indices = static_cast(mat.indices->data); const IdxType* data = CSRHasData(mat) ? static_cast(mat.data->data) : nullptr; const IdxType* rows_data = static_cast(rows->data); const int64_t num_rows = rows->shape[0]; const auto& ctx = mat.indptr->ctx; const auto& idtype = mat.indptr->dtype; IdxType* seed_mapping_data = nullptr; if (map_seed_nodes) seed_mapping_data = seed_mapping.Ptr(); const int num_threads = runtime::compute_num_threads(0, num_rows, 1); std::vector global_prefix(num_threads + 1, 0); IdArray picked_col, picked_idx, picked_coo_rows; IdArray block_csr_indptr = IdArray::Empty({num_rows + 1}, idtype, ctx); IdxType* block_csr_indptr_data = block_csr_indptr.Ptr(); #pragma omp parallel num_threads(num_threads) { const int thread_id = omp_get_thread_num(); const int64_t start_i = thread_id * (num_rows / num_threads) + std::min(static_cast(thread_id), num_rows % num_threads); const int64_t end_i = (thread_id + 1) * (num_rows / num_threads) + std::min(static_cast(thread_id + 1), num_rows % num_threads); assert(thread_id + 1 < num_threads || end_i == num_rows); const int64_t num_local = end_i - start_i; std::unique_ptr local_prefix(new int64_t[num_local + 1]); local_prefix[0] = 0; for (int64_t i = start_i; i < end_i; ++i) { // build prefix-sum const int64_t local_i = i - start_i; const IdxType rid = rows_data[i]; if (map_seed_nodes) seed_mapping_data[rid] = i; IdxType len = num_picks_fn( rid, indptr[rid], indptr[rid + 1] - indptr[rid], indices, data); local_prefix[local_i + 1] = local_prefix[local_i] + len; } global_prefix[thread_id + 1] = local_prefix[num_local]; #pragma omp barrier #pragma omp master { for (int t = 0; t < num_threads; ++t) { global_prefix[t + 1] += global_prefix[t]; } picked_col = IdArray::Empty({global_prefix[num_threads]}, idtype, ctx); picked_idx = IdArray::Empty({global_prefix[num_threads]}, idtype, ctx); picked_coo_rows = IdArray::Empty({global_prefix[num_threads]}, idtype, ctx); } #pragma omp barrier IdxType* picked_cdata = picked_col.Ptr(); IdxType* picked_idata = picked_idx.Ptr(); IdxType* picked_rows = picked_coo_rows.Ptr(); const IdxType thread_offset = global_prefix[thread_id]; for (int64_t i = start_i; i < end_i; ++i) { const IdxType rid = rows_data[i]; const int64_t local_i = i - start_i; block_csr_indptr_data[i] = local_prefix[local_i] + thread_offset; const IdxType off = indptr[rid]; const IdxType len = indptr[rid + 1] - off; if (len == 0) continue; const int64_t row_offset = local_prefix[local_i] + thread_offset; const int64_t num_picks = local_prefix[local_i + 1] + thread_offset - row_offset; pick_fn( rid, off, len, num_picks, indices, data, picked_idata + row_offset); for (int64_t j = 0; j < num_picks; ++j) { const IdxType picked = picked_idata[row_offset + j]; picked_cdata[row_offset + j] = indices[picked]; picked_idata[row_offset + j] = data ? data[picked] : picked; picked_rows[row_offset + j] = i; } } } block_csr_indptr_data[num_rows] = global_prefix.back(); const IdxType num_cols = picked_col->shape[0]; if (map_seed_nodes) { (*new_seed_nodes).resize(num_rows); memcpy((*new_seed_nodes).data(), rows_data, sizeof(IdxType) * num_rows); } return std::make_pair( CSRMatrix(num_rows, num_cols, block_csr_indptr, picked_col, picked_idx), picked_coo_rows); } // Template for picking non-zero values row-wise. The implementation utilizes // OpenMP parallelization on rows because each row performs computation // independently. template COOMatrix CSRRowWisePick( CSRMatrix mat, IdArray rows, int64_t num_picks, bool replace, PickFn pick_fn, NumPicksFn num_picks_fn) { using namespace aten; const IdxType* indptr = static_cast(mat.indptr->data); const IdxType* indices = static_cast(mat.indices->data); const IdxType* data = CSRHasData(mat) ? static_cast(mat.data->data) : nullptr; const IdxType* rows_data = static_cast(rows->data); const int64_t num_rows = rows->shape[0]; const auto& ctx = mat.indptr->ctx; const auto& idtype = mat.indptr->dtype; // To leverage OMP parallelization, we create two arrays to store // picked src and dst indices. Each array is of length num_rows * num_picks. // For rows whose nnz < num_picks, the indices are padded with -1. // // We check whether all the given rows // have at least num_picks number of nnz when replace is false. // // If the check holds, remove -1 elements by remove_if operation, which simply // moves valid elements to the head of arrays and create a view of the // original array. The implementation consumes a little extra memory than the // actual requirement. // // Otherwise, directly use the row and col arrays to construct the result COO // matrix. // // [02/29/2020 update]: OMP is disabled for now since batch-wise parallelism // is more // significant. (minjie) // Do not use omp_get_max_threads() since that doesn't work for compiling // without OpenMP. const int num_threads = runtime::compute_num_threads(0, num_rows, 1); std::vector global_prefix(num_threads + 1, 0); // TODO(BarclayII) Using OMP parallel directly instead of using // runtime::parallel_for does not handle exceptions well (directly aborts when // an exception pops up). It runs faster though because there is less // scheduling. Need to handle exceptions better. IdArray picked_row, picked_col, picked_idx; #pragma omp parallel num_threads(num_threads) { const int thread_id = omp_get_thread_num(); const int64_t start_i = thread_id * (num_rows / num_threads) + std::min(static_cast(thread_id), num_rows % num_threads); const int64_t end_i = (thread_id + 1) * (num_rows / num_threads) + std::min(static_cast(thread_id + 1), num_rows % num_threads); assert(thread_id + 1 < num_threads || end_i == num_rows); const int64_t num_local = end_i - start_i; // make sure we don't have to pay initialization cost std::unique_ptr local_prefix(new int64_t[num_local + 1]); local_prefix[0] = 0; for (int64_t i = start_i; i < end_i; ++i) { // build prefix-sum const int64_t local_i = i - start_i; const IdxType rid = rows_data[i]; IdxType len = num_picks_fn( rid, indptr[rid], indptr[rid + 1] - indptr[rid], indices, data); local_prefix[local_i + 1] = local_prefix[local_i] + len; } global_prefix[thread_id + 1] = local_prefix[num_local]; #pragma omp barrier #pragma omp master { for (int t = 0; t < num_threads; ++t) { global_prefix[t + 1] += global_prefix[t]; } picked_row = IdArray::Empty({global_prefix[num_threads]}, idtype, ctx); picked_col = IdArray::Empty({global_prefix[num_threads]}, idtype, ctx); picked_idx = IdArray::Empty({global_prefix[num_threads]}, idtype, ctx); } #pragma omp barrier IdxType* picked_rdata = picked_row.Ptr(); IdxType* picked_cdata = picked_col.Ptr(); IdxType* picked_idata = picked_idx.Ptr(); const IdxType thread_offset = global_prefix[thread_id]; for (int64_t i = start_i; i < end_i; ++i) { const IdxType rid = rows_data[i]; const IdxType off = indptr[rid]; const IdxType len = indptr[rid + 1] - off; if (len == 0) continue; const int64_t local_i = i - start_i; const int64_t row_offset = thread_offset + local_prefix[local_i]; const int64_t num_picks = thread_offset + local_prefix[local_i + 1] - row_offset; pick_fn( rid, off, len, num_picks, indices, data, picked_idata + row_offset); for (int64_t j = 0; j < num_picks; ++j) { const IdxType picked = picked_idata[row_offset + j]; picked_rdata[row_offset + j] = rid; picked_cdata[row_offset + j] = indices[picked]; picked_idata[row_offset + j] = data ? data[picked] : picked; } } } const int64_t new_len = global_prefix.back(); return COOMatrix( mat.num_rows, mat.num_cols, picked_row.CreateView({new_len}, picked_row->dtype), picked_col.CreateView({new_len}, picked_row->dtype), picked_idx.CreateView({new_len}, picked_row->dtype)); } // Template for picking non-zero values row-wise. The implementation utilizes // OpenMP parallelization on rows because each row performs computation // independently. template COOMatrix CSRRowWisePerEtypePick( CSRMatrix mat, IdArray rows, const std::vector& eid2etype_offset, const std::vector& num_picks, bool replace, bool rowwise_etype_sorted, EtypeRangePickFn pick_fn, const std::vector& prob_or_mask) { using namespace aten; const IdxType* indptr = mat.indptr.Ptr(); const IdxType* indices = mat.indices.Ptr(); const IdxType* eid = CSRHasData(mat) ? mat.data.Ptr() : nullptr; const IdxType* rows_data = rows.Ptr(); const int64_t num_rows = rows->shape[0]; const auto& ctx = mat.indptr->ctx; const int64_t num_etypes = num_picks.size(); const bool has_probs = (prob_or_mask.size() > 0); std::vector picked_rows(rows->shape[0]); std::vector picked_cols(rows->shape[0]); std::vector picked_idxs(rows->shape[0]); // Check if the number of picks have the same value. // If so, we can potentially speed up if we have a node with total number of // neighbors less than the given number of picks with replace=False. bool same_num_pick = true; int64_t num_pick_value = num_picks[0]; for (int64_t num_pick : num_picks) { if (num_pick_value != num_pick) { same_num_pick = false; break; } } runtime::parallel_for(0, num_rows, [&](size_t b, size_t e) { for (size_t i = b; i < e; ++i) { const IdxType rid = rows_data[i]; CHECK_LT(rid, mat.num_rows); const IdxType off = indptr[rid]; const IdxType len = indptr[rid + 1] - off; // do something here if (len == 0) { picked_rows[i] = NewIdArray(0, ctx, sizeof(IdxType) * 8); picked_cols[i] = NewIdArray(0, ctx, sizeof(IdxType) * 8); picked_idxs[i] = NewIdArray(0, ctx, sizeof(IdxType) * 8); continue; } // fast path if (same_num_pick && len <= num_pick_value && !replace) { IdArray rows = Full(rid, len, sizeof(IdxType) * 8, ctx); IdArray cols = Full(-1, len, sizeof(IdxType) * 8, ctx); IdArray idx = Full(-1, len, sizeof(IdxType) * 8, ctx); IdxType* cdata = cols.Ptr(); IdxType* idata = idx.Ptr(); int64_t k = 0; for (int64_t j = 0; j < len; ++j) { const IdxType homogenized_eid = eid ? eid[off + j] : off + j; auto it = std::upper_bound( eid2etype_offset.begin(), eid2etype_offset.end(), homogenized_eid); const IdxType heterogenized_etype = it - eid2etype_offset.begin() - 1; const IdxType heterogenized_eid = homogenized_eid - eid2etype_offset[heterogenized_etype]; if (!has_probs || IsNullArray(prob_or_mask[heterogenized_etype])) { // No probability array, select all cdata[k] = indices[off + j]; idata[k] = homogenized_eid; ++k; } else { // Select the entries with non-zero probability const NDArray& p = prob_or_mask[heterogenized_etype]; const DType* pdata = p.Ptr(); if (pdata[heterogenized_eid] > 0) { cdata[k] = indices[off + j]; idata[k] = homogenized_eid; ++k; } } } picked_rows[i] = rows.CreateView({k}, rows->dtype); picked_cols[i] = cols.CreateView({k}, cols->dtype); picked_idxs[i] = idx.CreateView({k}, idx->dtype); } else { // need to do per edge type sample std::vector rows; std::vector cols; std::vector idx; std::vector et(len); std::vector et_idx(len); std::vector et_eid(len); std::iota(et_idx.begin(), et_idx.end(), 0); for (int64_t j = 0; j < len; ++j) { const IdxType homogenized_eid = eid ? eid[off + j] : off + j; auto it = std::upper_bound( eid2etype_offset.begin(), eid2etype_offset.end(), homogenized_eid); const IdxType heterogenized_etype = it - eid2etype_offset.begin() - 1; const IdxType heterogenized_eid = homogenized_eid - eid2etype_offset[heterogenized_etype]; et[j] = heterogenized_etype; et_eid[j] = heterogenized_eid; } if (!rowwise_etype_sorted) // the edge type is sorted, not need to sort // it std::sort( et_idx.begin(), et_idx.end(), [&et](IdxType i1, IdxType i2) { return et[i1] < et[i2]; }); CHECK_LT(et[et_idx[len - 1]], num_etypes) << "etype values exceed the number of fanouts"; IdxType cur_et = et[et_idx[0]]; int64_t et_offset = 0; int64_t et_len = 1; for (int64_t j = 0; j < len; ++j) { CHECK((j + 1 == len) || (et[et_idx[j]] <= et[et_idx[j + 1]])) << "Edge type is not sorted. Please sort in advance or specify " "'rowwise_etype_sorted' as false."; if ((j + 1 == len) || cur_et != et[et_idx[j + 1]]) { // 1 end of the current etype // 2 end of the row // random pick for current etype if ((num_picks[cur_et] == -1) || (et_len <= num_picks[cur_et] && !replace)) { // fast path, select all for (int64_t k = 0; k < et_len; ++k) { const IdxType eid_offset = off + et_idx[et_offset + k]; const IdxType homogenized_eid = eid ? eid[eid_offset] : eid_offset; auto it = std::upper_bound( eid2etype_offset.begin(), eid2etype_offset.end(), homogenized_eid); const IdxType heterogenized_etype = it - eid2etype_offset.begin() - 1; const IdxType heterogenized_eid = homogenized_eid - eid2etype_offset[heterogenized_etype]; if (!has_probs || IsNullArray(prob_or_mask[heterogenized_etype])) { // No probability, select all rows.push_back(rid); cols.push_back(indices[eid_offset]); idx.push_back(homogenized_eid); } else { // Select the entries with non-zero probability const NDArray& p = prob_or_mask[heterogenized_etype]; const DType* pdata = p.Ptr(); if (pdata[heterogenized_eid] > 0) { rows.push_back(rid); cols.push_back(indices[eid_offset]); idx.push_back(homogenized_eid); } } } } else { IdArray picked_idx = Full(-1, num_picks[cur_et], sizeof(IdxType) * 8, ctx); IdxType* picked_idata = picked_idx.Ptr(); // need call random pick pick_fn( off, et_offset, cur_et, et_len, et_idx, et_eid, eid, picked_idata); for (int64_t k = 0; k < num_picks[cur_et]; ++k) { const IdxType picked = picked_idata[k]; if (picked == -1) continue; rows.push_back(rid); cols.push_back(indices[off + et_idx[et_offset + picked]]); if (eid) { idx.push_back(eid[off + et_idx[et_offset + picked]]); } else { idx.push_back(off + et_idx[et_offset + picked]); } } } if (j + 1 == len) break; // next etype cur_et = et[et_idx[j + 1]]; et_offset = j + 1; et_len = 1; } else { et_len++; } } picked_rows[i] = VecToIdArray(rows, sizeof(IdxType) * 8, ctx); picked_cols[i] = VecToIdArray(cols, sizeof(IdxType) * 8, ctx); picked_idxs[i] = VecToIdArray(idx, sizeof(IdxType) * 8, ctx); } // end processing one row CHECK_EQ(picked_rows[i]->shape[0], picked_cols[i]->shape[0]); CHECK_EQ(picked_rows[i]->shape[0], picked_idxs[i]->shape[0]); } // end processing all rows }); IdArray picked_row = Concat(picked_rows); IdArray picked_col = Concat(picked_cols); IdArray picked_idx = Concat(picked_idxs); return COOMatrix( mat.num_rows, mat.num_cols, picked_row, picked_col, picked_idx); } // Template for picking non-zero values row-wise. The implementation first // slices out the corresponding rows and then converts it to CSR format. It then // performs row-wise pick on the CSR matrix and rectifies the returned results. template COOMatrix COORowWisePick( COOMatrix mat, IdArray rows, int64_t num_picks, bool replace, PickFn pick_fn, NumPicksFn num_picks_fn) { using namespace aten; const auto& csr = COOToCSR(COOSliceRows(mat, rows)); const IdArray new_rows = Range(0, rows->shape[0], rows->dtype.bits, rows->ctx); const auto& picked = CSRRowWisePick( csr, new_rows, num_picks, replace, pick_fn, num_picks_fn); return COOMatrix( mat.num_rows, mat.num_cols, IndexSelect(rows, picked.row), // map the row index to the correct one picked.col, picked.data); } // Template for picking non-zero values row-wise. The implementation first // slices out the corresponding rows and then converts it to CSR format. It then // performs row-wise pick on the CSR matrix and rectifies the returned results. template COOMatrix COORowWisePerEtypePick( COOMatrix mat, IdArray rows, const std::vector& eid2etype_offset, const std::vector& num_picks, bool replace, EtypeRangePickFn pick_fn, const std::vector& prob_or_mask) { using namespace aten; const auto& csr = COOToCSR(COOSliceRows(mat, rows)); const IdArray new_rows = Range(0, rows->shape[0], rows->dtype.bits, rows->ctx); const auto& picked = CSRRowWisePerEtypePick( csr, new_rows, eid2etype_offset, num_picks, replace, false, pick_fn, prob_or_mask); return COOMatrix( mat.num_rows, mat.num_cols, IndexSelect(rows, picked.row), // map the row index to the correct one picked.col, picked.data); } } // namespace impl } // namespace aten } // namespace dgl #endif // DGL_ARRAY_CPU_ROWWISE_PICK_H_ ================================================ FILE: src/array/cpu/rowwise_sampling.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cpu/rowwise_sampling.cc * @brief rowwise sampling */ #include #include #include "./rowwise_pick.h" namespace dgl { namespace aten { namespace impl { namespace { // Equivalent to numpy expression: array[idx[off:off + len]] template inline FloatArray DoubleSlice( FloatArray array, const IdxType* idx_data, IdxType off, IdxType len) { const FloatType* array_data = static_cast(array->data); FloatArray ret = FloatArray::Empty({len}, array->dtype, array->ctx); FloatType* ret_data = static_cast(ret->data); for (int64_t j = 0; j < len; ++j) { if (idx_data) ret_data[j] = array_data[idx_data[off + j]]; else ret_data[j] = array_data[off + j]; } return ret; } template inline NumPicksFn GetSamplingNumPicksFn( int64_t num_samples, NDArray prob_or_mask, bool replace) { NumPicksFn num_picks_fn = [prob_or_mask, num_samples, replace]( IdxType rowid, IdxType off, IdxType len, const IdxType* col, const IdxType* data) { const int64_t max_num_picks = (num_samples == -1) ? len : num_samples; const DType* prob_or_mask_data = prob_or_mask.Ptr(); IdxType nnz = 0; for (IdxType i = off; i < off + len; ++i) { const IdxType eid = data ? data[i] : i; if (prob_or_mask_data[eid] > 0) { ++nnz; } } if (replace) { return static_cast(nnz == 0 ? 0 : max_num_picks); } else { return std::min(static_cast(max_num_picks), nnz); } }; return num_picks_fn; } template inline PickFn GetSamplingPickFn( int64_t num_samples, NDArray prob_or_mask, bool replace) { PickFn pick_fn = [prob_or_mask, num_samples, replace]( IdxType rowid, IdxType off, IdxType len, IdxType num_picks, const IdxType* col, const IdxType* data, IdxType* out_idx) { NDArray prob_or_mask_selected = DoubleSlice(prob_or_mask, data, off, len); RandomEngine::ThreadLocal()->Choice( num_picks, prob_or_mask_selected, out_idx, replace); for (int64_t j = 0; j < num_picks; ++j) { out_idx[j] += off; } }; return pick_fn; } template inline EtypeRangePickFn GetSamplingRangePickFn( const std::vector& num_samples, const std::vector& prob, bool replace) { EtypeRangePickFn pick_fn = [prob, num_samples, replace]( IdxType off, IdxType et_offset, IdxType cur_et, IdxType et_len, const std::vector& et_idx, const std::vector& et_eid, const IdxType* eid, IdxType* out_idx) { const FloatArray& p = prob[cur_et]; const FloatType* p_data = IsNullArray(p) ? nullptr : p.Ptr(); FloatArray probs = FloatArray::Empty({et_len}, p->dtype, p->ctx); FloatType* probs_data = probs.Ptr(); for (int64_t j = 0; j < et_len; ++j) { const IdxType cur_eid = et_eid[et_idx[et_offset + j]]; probs_data[j] = p_data ? p_data[cur_eid] : static_cast(1.); } RandomEngine::ThreadLocal()->Choice( num_samples[cur_et], probs, out_idx, replace); }; return pick_fn; } template inline NumPicksFn GetSamplingUniformNumPicksFn( int64_t num_samples, bool replace) { NumPicksFn num_picks_fn = [num_samples, replace]( IdxType rowid, IdxType off, IdxType len, const IdxType* col, const IdxType* data) { const int64_t max_num_picks = (num_samples == -1) ? len : num_samples; if (replace) { return static_cast(len == 0 ? 0 : max_num_picks); } else { return std::min(static_cast(max_num_picks), len); } }; return num_picks_fn; } template inline PickFn GetSamplingUniformPickFn( int64_t num_samples, bool replace) { PickFn pick_fn = [num_samples, replace]( IdxType rowid, IdxType off, IdxType len, IdxType num_picks, const IdxType* col, const IdxType* data, IdxType* out_idx) { RandomEngine::ThreadLocal()->UniformChoice( num_picks, len, out_idx, replace); for (int64_t j = 0; j < num_picks; ++j) { out_idx[j] += off; } }; return pick_fn; } template inline EtypeRangePickFn GetSamplingUniformRangePickFn( const std::vector& num_samples, bool replace) { EtypeRangePickFn pick_fn = [num_samples, replace]( IdxType off, IdxType et_offset, IdxType cur_et, IdxType et_len, const std::vector& et_idx, const std::vector& et_eid, const IdxType* data, IdxType* out_idx) { RandomEngine::ThreadLocal()->UniformChoice( num_samples[cur_et], et_len, out_idx, replace); }; return pick_fn; } template inline NumPicksFn GetSamplingBiasedNumPicksFn( int64_t num_samples, IdArray split, FloatArray bias, bool replace) { NumPicksFn num_picks_fn = [num_samples, split, bias, replace]( IdxType rowid, IdxType off, IdxType len, const IdxType* col, const IdxType* data) { const int64_t max_num_picks = (num_samples == -1) ? len : num_samples; const int64_t num_tags = split->shape[1] - 1; const IdxType* tag_offset = split.Ptr() + rowid * split->shape[1]; const FloatType* bias_data = bias.Ptr(); IdxType nnz = 0; for (int64_t j = 0; j < num_tags; ++j) { if (bias_data[j] > 0) { nnz += tag_offset[j + 1] - tag_offset[j]; } } if (replace) { return static_cast(nnz == 0 ? 0 : max_num_picks); } else { return std::min(static_cast(max_num_picks), nnz); } }; return num_picks_fn; } template inline PickFn GetSamplingBiasedPickFn( int64_t num_samples, IdArray split, FloatArray bias, bool replace) { PickFn pick_fn = [num_samples, split, bias, replace]( IdxType rowid, IdxType off, IdxType len, IdxType num_picks, const IdxType* col, const IdxType* data, IdxType* out_idx) { const IdxType* tag_offset = split.Ptr() + rowid * split->shape[1]; RandomEngine::ThreadLocal()->BiasedChoice( num_picks, tag_offset, bias, out_idx, replace); for (int64_t j = 0; j < num_picks; ++j) { out_idx[j] += off; } }; return pick_fn; } } // namespace /////////////////////////////// CSR /////////////////////////////// template COOMatrix CSRRowWiseSampling( CSRMatrix mat, IdArray rows, int64_t num_samples, NDArray prob_or_mask, bool replace) { // If num_samples is -1, select all neighbors without replacement. replace = (replace && num_samples != -1); CHECK(prob_or_mask.defined()); auto num_picks_fn = GetSamplingNumPicksFn(num_samples, prob_or_mask, replace); auto pick_fn = GetSamplingPickFn(num_samples, prob_or_mask, replace); return CSRRowWisePick(mat, rows, num_samples, replace, pick_fn, num_picks_fn); } template COOMatrix CSRRowWiseSampling( CSRMatrix, IdArray, int64_t, NDArray, bool); template COOMatrix CSRRowWiseSampling( CSRMatrix, IdArray, int64_t, NDArray, bool); template COOMatrix CSRRowWiseSampling( CSRMatrix, IdArray, int64_t, NDArray, bool); template COOMatrix CSRRowWiseSampling( CSRMatrix, IdArray, int64_t, NDArray, bool); template COOMatrix CSRRowWiseSampling( CSRMatrix, IdArray, int64_t, NDArray, bool); template COOMatrix CSRRowWiseSampling( CSRMatrix, IdArray, int64_t, NDArray, bool); template COOMatrix CSRRowWiseSampling( CSRMatrix, IdArray, int64_t, NDArray, bool); template COOMatrix CSRRowWiseSampling( CSRMatrix, IdArray, int64_t, NDArray, bool); template < DGLDeviceType XPU, typename IdxType, typename DType, bool map_seed_nodes> std::pair CSRRowWiseSamplingFused( CSRMatrix mat, IdArray rows, IdArray seed_mapping, std::vector* new_seed_nodes, int64_t num_samples, NDArray prob_or_mask, bool replace) { // If num_samples is -1, select all neighbors without replacement. replace = (replace && num_samples != -1); CHECK(prob_or_mask.defined()); auto num_picks_fn = GetSamplingNumPicksFn(num_samples, prob_or_mask, replace); auto pick_fn = GetSamplingPickFn(num_samples, prob_or_mask, replace); return CSRRowWisePickFused( mat, rows, seed_mapping, new_seed_nodes, num_samples, replace, pick_fn, num_picks_fn); } template std::pair CSRRowWiseSamplingFused( CSRMatrix, IdArray, IdArray, std::vector*, int64_t, NDArray, bool); template std::pair CSRRowWiseSamplingFused( CSRMatrix, IdArray, IdArray, std::vector*, int64_t, NDArray, bool); template std::pair CSRRowWiseSamplingFused( CSRMatrix, IdArray, IdArray, std::vector*, int64_t, NDArray, bool); template std::pair CSRRowWiseSamplingFused( CSRMatrix, IdArray, IdArray, std::vector*, int64_t, NDArray, bool); template std::pair CSRRowWiseSamplingFused( CSRMatrix, IdArray, IdArray, std::vector*, int64_t, NDArray, bool); template std::pair CSRRowWiseSamplingFused( CSRMatrix, IdArray, IdArray, std::vector*, int64_t, NDArray, bool); template std::pair CSRRowWiseSamplingFused( CSRMatrix, IdArray, IdArray, std::vector*, int64_t, NDArray, bool); template std::pair CSRRowWiseSamplingFused( CSRMatrix, IdArray, IdArray, std::vector*, int64_t, NDArray, bool); template std::pair CSRRowWiseSamplingFused( CSRMatrix, IdArray, IdArray, std::vector*, int64_t, NDArray, bool); template std::pair CSRRowWiseSamplingFused( CSRMatrix, IdArray, IdArray, std::vector*, int64_t, NDArray, bool); template std::pair CSRRowWiseSamplingFused( CSRMatrix, IdArray, IdArray, std::vector*, int64_t, NDArray, bool); template std::pair CSRRowWiseSamplingFused( CSRMatrix, IdArray, IdArray, std::vector*, int64_t, NDArray, bool); template std::pair CSRRowWiseSamplingFused( CSRMatrix, IdArray, IdArray, std::vector*, int64_t, NDArray, bool); template std::pair CSRRowWiseSamplingFused( CSRMatrix, IdArray, IdArray, std::vector*, int64_t, NDArray, bool); template std::pair CSRRowWiseSamplingFused( CSRMatrix, IdArray, IdArray, std::vector*, int64_t, NDArray, bool); template std::pair CSRRowWiseSamplingFused( CSRMatrix, IdArray, IdArray, std::vector*, int64_t, NDArray, bool); template COOMatrix CSRRowWisePerEtypeSampling( CSRMatrix mat, IdArray rows, const std::vector& eid2etype_offset, const std::vector& num_samples, const std::vector& prob_or_mask, bool replace, bool rowwise_etype_sorted) { CHECK(prob_or_mask.size() == num_samples.size()) << "the number of probability tensors does not match the number of edge " "types."; for (auto& p : prob_or_mask) CHECK(p.defined()); auto pick_fn = GetSamplingRangePickFn( num_samples, prob_or_mask, replace); return CSRRowWisePerEtypePick( mat, rows, eid2etype_offset, num_samples, replace, rowwise_etype_sorted, pick_fn, prob_or_mask); } template COOMatrix CSRRowWisePerEtypeSampling( CSRMatrix, IdArray, const std::vector&, const std::vector&, const std::vector&, bool, bool); template COOMatrix CSRRowWisePerEtypeSampling( CSRMatrix, IdArray, const std::vector&, const std::vector&, const std::vector&, bool, bool); template COOMatrix CSRRowWisePerEtypeSampling( CSRMatrix, IdArray, const std::vector&, const std::vector&, const std::vector&, bool, bool); template COOMatrix CSRRowWisePerEtypeSampling( CSRMatrix, IdArray, const std::vector&, const std::vector&, const std::vector&, bool, bool); template COOMatrix CSRRowWisePerEtypeSampling( CSRMatrix, IdArray, const std::vector&, const std::vector&, const std::vector&, bool, bool); template COOMatrix CSRRowWisePerEtypeSampling( CSRMatrix, IdArray, const std::vector&, const std::vector&, const std::vector&, bool, bool); template COOMatrix CSRRowWisePerEtypeSampling( CSRMatrix, IdArray, const std::vector&, const std::vector&, const std::vector&, bool, bool); template COOMatrix CSRRowWisePerEtypeSampling( CSRMatrix, IdArray, const std::vector&, const std::vector&, const std::vector&, bool, bool); template COOMatrix CSRRowWiseSamplingUniform( CSRMatrix mat, IdArray rows, int64_t num_samples, bool replace) { // If num_samples is -1, select all neighbors without replacement. replace = (replace && num_samples != -1); auto num_picks_fn = GetSamplingUniformNumPicksFn(num_samples, replace); auto pick_fn = GetSamplingUniformPickFn(num_samples, replace); return CSRRowWisePick(mat, rows, num_samples, replace, pick_fn, num_picks_fn); } template COOMatrix CSRRowWiseSamplingUniform( CSRMatrix, IdArray, int64_t, bool); template COOMatrix CSRRowWiseSamplingUniform( CSRMatrix, IdArray, int64_t, bool); template std::pair CSRRowWiseSamplingUniformFused( CSRMatrix mat, IdArray rows, IdArray seed_mapping, std::vector* new_seed_nodes, int64_t num_samples, bool replace) { // If num_samples is -1, select all neighbors without replacement. replace = (replace && num_samples != -1); auto num_picks_fn = GetSamplingUniformNumPicksFn(num_samples, replace); auto pick_fn = GetSamplingUniformPickFn(num_samples, replace); return CSRRowWisePickFused( mat, rows, seed_mapping, new_seed_nodes, num_samples, replace, pick_fn, num_picks_fn); } template std::pair CSRRowWiseSamplingUniformFused( CSRMatrix, IdArray, IdArray, std::vector*, int64_t, bool); template std::pair CSRRowWiseSamplingUniformFused( CSRMatrix, IdArray, IdArray, std::vector*, int64_t, bool); template std::pair CSRRowWiseSamplingUniformFused( CSRMatrix, IdArray, IdArray, std::vector*, int64_t, bool); template std::pair CSRRowWiseSamplingUniformFused( CSRMatrix, IdArray, IdArray, std::vector*, int64_t, bool); template COOMatrix CSRRowWisePerEtypeSamplingUniform( CSRMatrix mat, IdArray rows, const std::vector& eid2etype_offset, const std::vector& num_samples, bool replace, bool rowwise_etype_sorted) { auto pick_fn = GetSamplingUniformRangePickFn(num_samples, replace); return CSRRowWisePerEtypePick( mat, rows, eid2etype_offset, num_samples, replace, rowwise_etype_sorted, pick_fn, {}); } template COOMatrix CSRRowWisePerEtypeSamplingUniform( CSRMatrix, IdArray, const std::vector&, const std::vector&, bool, bool); template COOMatrix CSRRowWisePerEtypeSamplingUniform( CSRMatrix, IdArray, const std::vector&, const std::vector&, bool, bool); template COOMatrix CSRRowWiseSamplingBiased( CSRMatrix mat, IdArray rows, int64_t num_samples, NDArray tag_offset, FloatArray bias, bool replace) { // If num_samples is -1, select all neighbors without replacement. replace = (replace && num_samples != -1); auto num_picks_fn = GetSamplingBiasedNumPicksFn( num_samples, tag_offset, bias, replace); auto pick_fn = GetSamplingBiasedPickFn( num_samples, tag_offset, bias, replace); return CSRRowWisePick(mat, rows, num_samples, replace, pick_fn, num_picks_fn); } template COOMatrix CSRRowWiseSamplingBiased( CSRMatrix, IdArray, int64_t, NDArray, FloatArray, bool); template COOMatrix CSRRowWiseSamplingBiased( CSRMatrix, IdArray, int64_t, NDArray, FloatArray, bool); template COOMatrix CSRRowWiseSamplingBiased( CSRMatrix, IdArray, int64_t, NDArray, FloatArray, bool); template COOMatrix CSRRowWiseSamplingBiased( CSRMatrix, IdArray, int64_t, NDArray, FloatArray, bool); /////////////////////////////// COO /////////////////////////////// template COOMatrix COORowWiseSampling( COOMatrix mat, IdArray rows, int64_t num_samples, NDArray prob_or_mask, bool replace) { // If num_samples is -1, select all neighbors without replacement. replace = (replace && num_samples != -1); CHECK(prob_or_mask.defined()); auto num_picks_fn = GetSamplingNumPicksFn(num_samples, prob_or_mask, replace); auto pick_fn = GetSamplingPickFn(num_samples, prob_or_mask, replace); return COORowWisePick(mat, rows, num_samples, replace, pick_fn, num_picks_fn); } template COOMatrix COORowWiseSampling( COOMatrix, IdArray, int64_t, NDArray, bool); template COOMatrix COORowWiseSampling( COOMatrix, IdArray, int64_t, NDArray, bool); template COOMatrix COORowWiseSampling( COOMatrix, IdArray, int64_t, NDArray, bool); template COOMatrix COORowWiseSampling( COOMatrix, IdArray, int64_t, NDArray, bool); template COOMatrix COORowWiseSampling( COOMatrix, IdArray, int64_t, NDArray, bool); template COOMatrix COORowWiseSampling( COOMatrix, IdArray, int64_t, NDArray, bool); template COOMatrix COORowWiseSampling( COOMatrix, IdArray, int64_t, NDArray, bool); template COOMatrix COORowWiseSampling( COOMatrix, IdArray, int64_t, NDArray, bool); template COOMatrix COORowWisePerEtypeSampling( COOMatrix mat, IdArray rows, const std::vector& eid2etype_offset, const std::vector& num_samples, const std::vector& prob_or_mask, bool replace) { CHECK(prob_or_mask.size() == num_samples.size()) << "the number of probability tensors do not match the number of edge " "types."; for (auto& p : prob_or_mask) CHECK(p.defined()); auto pick_fn = GetSamplingRangePickFn( num_samples, prob_or_mask, replace); return COORowWisePerEtypePick( mat, rows, eid2etype_offset, num_samples, replace, pick_fn, prob_or_mask); } template COOMatrix COORowWisePerEtypeSampling( COOMatrix, IdArray, const std::vector&, const std::vector&, const std::vector&, bool); template COOMatrix COORowWisePerEtypeSampling( COOMatrix, IdArray, const std::vector&, const std::vector&, const std::vector&, bool); template COOMatrix COORowWisePerEtypeSampling( COOMatrix, IdArray, const std::vector&, const std::vector&, const std::vector&, bool); template COOMatrix COORowWisePerEtypeSampling( COOMatrix, IdArray, const std::vector&, const std::vector&, const std::vector&, bool); template COOMatrix COORowWisePerEtypeSampling( COOMatrix, IdArray, const std::vector&, const std::vector&, const std::vector&, bool); template COOMatrix COORowWisePerEtypeSampling( COOMatrix, IdArray, const std::vector&, const std::vector&, const std::vector&, bool); template COOMatrix COORowWisePerEtypeSampling( COOMatrix, IdArray, const std::vector&, const std::vector&, const std::vector&, bool); template COOMatrix COORowWisePerEtypeSampling( COOMatrix, IdArray, const std::vector&, const std::vector&, const std::vector&, bool); template COOMatrix COORowWiseSamplingUniform( COOMatrix mat, IdArray rows, int64_t num_samples, bool replace) { // If num_samples is -1, select all neighbors without replacement. replace = (replace && num_samples != -1); auto num_picks_fn = GetSamplingUniformNumPicksFn(num_samples, replace); auto pick_fn = GetSamplingUniformPickFn(num_samples, replace); return COORowWisePick(mat, rows, num_samples, replace, pick_fn, num_picks_fn); } template COOMatrix COORowWiseSamplingUniform( COOMatrix, IdArray, int64_t, bool); template COOMatrix COORowWiseSamplingUniform( COOMatrix, IdArray, int64_t, bool); template COOMatrix COORowWisePerEtypeSamplingUniform( COOMatrix mat, IdArray rows, const std::vector& eid2etype_offset, const std::vector& num_samples, bool replace) { auto pick_fn = GetSamplingUniformRangePickFn(num_samples, replace); return COORowWisePerEtypePick( mat, rows, eid2etype_offset, num_samples, replace, pick_fn, {}); } template COOMatrix COORowWisePerEtypeSamplingUniform( COOMatrix, IdArray, const std::vector&, const std::vector&, bool); template COOMatrix COORowWisePerEtypeSamplingUniform( COOMatrix, IdArray, const std::vector&, const std::vector&, bool); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cpu/rowwise_topk.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cpu/rowwise_topk.cc * @brief rowwise topk */ #include #include #include "./rowwise_pick.h" namespace dgl { namespace aten { namespace impl { namespace { template inline NumPicksFn GetTopkNumPicksFn(int64_t k) { NumPicksFn num_picks_fn = [k](IdxType rowid, IdxType off, IdxType len, const IdxType* col, const IdxType* data) { const int64_t max_num_picks = (k == -1) ? len : k; return std::min(static_cast(max_num_picks), len); }; return num_picks_fn; } template inline PickFn GetTopkPickFn(NDArray weight, bool ascending) { const DType* wdata = static_cast(weight->data); PickFn pick_fn = [ascending, wdata]( IdxType rowid, IdxType off, IdxType len, IdxType num_picks, const IdxType* col, const IdxType* data, IdxType* out_idx) { std::function compare_fn; if (ascending) { if (data) { compare_fn = [wdata, data](IdxType i, IdxType j) { return wdata[data[i]] < wdata[data[j]]; }; } else { compare_fn = [wdata](IdxType i, IdxType j) { return wdata[i] < wdata[j]; }; } } else { if (data) { compare_fn = [wdata, data](IdxType i, IdxType j) { return wdata[data[i]] > wdata[data[j]]; }; } else { compare_fn = [wdata](IdxType i, IdxType j) { return wdata[i] > wdata[j]; }; } } std::vector idx(len); std::iota(idx.begin(), idx.end(), off); std::sort(idx.begin(), idx.end(), compare_fn); for (int64_t j = 0; j < num_picks; ++j) { out_idx[j] = idx[j]; } }; return pick_fn; } } // namespace template COOMatrix CSRRowWiseTopk( CSRMatrix mat, IdArray rows, int64_t k, NDArray weight, bool ascending) { auto num_picks_fn = GetTopkNumPicksFn(k); auto pick_fn = GetTopkPickFn(weight, ascending); return CSRRowWisePick(mat, rows, k, false, pick_fn, num_picks_fn); } template COOMatrix CSRRowWiseTopk( CSRMatrix, IdArray, int64_t, NDArray, bool); template COOMatrix CSRRowWiseTopk( CSRMatrix, IdArray, int64_t, NDArray, bool); template COOMatrix CSRRowWiseTopk( CSRMatrix, IdArray, int64_t, NDArray, bool); template COOMatrix CSRRowWiseTopk( CSRMatrix, IdArray, int64_t, NDArray, bool); template COOMatrix CSRRowWiseTopk( CSRMatrix, IdArray, int64_t, NDArray, bool); template COOMatrix CSRRowWiseTopk( CSRMatrix, IdArray, int64_t, NDArray, bool); template COOMatrix CSRRowWiseTopk( CSRMatrix, IdArray, int64_t, NDArray, bool); template COOMatrix CSRRowWiseTopk( CSRMatrix, IdArray, int64_t, NDArray, bool); template COOMatrix COORowWiseTopk( COOMatrix mat, IdArray rows, int64_t k, NDArray weight, bool ascending) { auto num_picks_fn = GetTopkNumPicksFn(k); auto pick_fn = GetTopkPickFn(weight, ascending); return COORowWisePick(mat, rows, k, false, pick_fn, num_picks_fn); } template COOMatrix COORowWiseTopk( COOMatrix, IdArray, int64_t, NDArray, bool); template COOMatrix COORowWiseTopk( COOMatrix, IdArray, int64_t, NDArray, bool); template COOMatrix COORowWiseTopk( COOMatrix, IdArray, int64_t, NDArray, bool); template COOMatrix COORowWiseTopk( COOMatrix, IdArray, int64_t, NDArray, bool); template COOMatrix COORowWiseTopk( COOMatrix, IdArray, int64_t, NDArray, bool); template COOMatrix COORowWiseTopk( COOMatrix, IdArray, int64_t, NDArray, bool); template COOMatrix COORowWiseTopk( COOMatrix, IdArray, int64_t, NDArray, bool); template COOMatrix COORowWiseTopk( COOMatrix, IdArray, int64_t, NDArray, bool); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cpu/sddmm.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file aten/cpu/sddmm.cc * @brief SDDMM C APIs and definitions. */ #include "./sddmm.h" #include namespace dgl { namespace aten { #define SWITCH_RHS(rhs_target, RhsTarget, ...) \ do { \ if ((rhs_target) == 0) { \ constexpr int RhsTarget = 0; \ { __VA_ARGS__ } \ } else if ((rhs_target) == 1) { \ constexpr int RhsTarget = 1; \ { __VA_ARGS__ } \ } else if ((rhs_target) == 2) { \ constexpr int RhsTarget = 2; \ { __VA_ARGS__ } \ } else { \ LOG(INFO) << "Invalid rhs target: " << (rhs_target); \ } \ } while (0) #define SWITCH_TARGET(lhs_target, rhs_target, LhsTarget, RhsTarget, ...) \ do { \ if ((lhs_target) == 0) { \ constexpr int LhsTarget = 0; \ SWITCH_RHS(rhs_target, RhsTarget, __VA_ARGS__); \ } else if ((lhs_target) == 1) { \ constexpr int LhsTarget = 1; \ SWITCH_RHS(rhs_target, RhsTarget, __VA_ARGS__); \ } else if ((lhs_target) == 2) { \ constexpr int LhsTarget = 2; \ SWITCH_RHS(rhs_target, RhsTarget, __VA_ARGS__); \ } else { \ LOG(INFO) << "Invalid lhs target: " << (lhs_target); \ } \ } while (0) /** @brief Generalized SDDMM on Csr format. */ template void SDDMMCsr( const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target) { SWITCH_OP(op, Op, { SWITCH_TARGET(lhs_target, rhs_target, LhsTarget, RhsTarget, { cpu::SDDMMCsr( bcast, csr, lhs, rhs, out); }); }); } /** @brief Generalized SDDMM on Csr format with Heterograph support. */ template void SDDMMCsrHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_csr, const std::vector& vec_lhs, const std::vector& vec_rhs, std::vector vec_out, int lhs_target, int rhs_target, const std::vector& lhs_nid, const std::vector& rhs_nid) { SWITCH_OP(op, Op, { SWITCH_TARGET(lhs_target, rhs_target, LhsTarget, RhsTarget, { /* Call SDDMM for each relation type */ for (dgl_type_t etype = 0; etype < lhs_nid.size(); ++etype) { CSRMatrix csr = vec_csr[etype]; NDArray lhs = vec_lhs[lhs_nid[etype]]; NDArray rhs = vec_rhs[rhs_nid[etype]]; NDArray out = vec_out[etype]; cpu::SDDMMCsr( bcast, csr, lhs, rhs, out); } }); }); } template void SDDMMCsr( const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target); template void SDDMMCsr( const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target); template void SDDMMCsr( const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target); template void SDDMMCsr( const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target); template void SDDMMCsr( const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target); template void SDDMMCsr( const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target); template void SDDMMCsrHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_csr, const std::vector& lhs, const std::vector& rhs, std::vector out, int lhs_target, int rhs_target, const std::vector& in_eid, const std::vector& out_eid); template void SDDMMCsrHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_csr, const std::vector& lhs, const std::vector& rhs, std::vector out, int lhs_target, int rhs_target, const std::vector& in_eid, const std::vector& out_eid); template void SDDMMCsrHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_csr, const std::vector& lhs, const std::vector& rhs, std::vector out, int lhs_target, int rhs_target, const std::vector& in_eid, const std::vector& out_eid); template void SDDMMCsrHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_csr, const std::vector& lhs, const std::vector& rhs, std::vector out, int lhs_target, int rhs_target, const std::vector& in_eid, const std::vector& out_eid); template void SDDMMCsrHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_csr, const std::vector& lhs, const std::vector& rhs, std::vector out, int lhs_target, int rhs_target, const std::vector& in_eid, const std::vector& out_eid); template void SDDMMCsrHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_csr, const std::vector& lhs, const std::vector& rhs, std::vector out, int lhs_target, int rhs_target, const std::vector& in_eid, const std::vector& out_eid); /** @brief Generalized SDDMM on Coo format. */ template void SDDMMCoo( const std::string& op, const BcastOff& bcast, const COOMatrix& coo, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target) { SWITCH_OP(op, Op, { SWITCH_TARGET(lhs_target, rhs_target, LhsTarget, RhsTarget, { cpu::SDDMMCoo( bcast, coo, lhs, rhs, out); }); }); } /** @brief Generalized SDDMM on Coo format with Heterograph support. */ template void SDDMMCooHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_coo, const std::vector& vec_lhs, const std::vector& vec_rhs, std::vector vec_out, int lhs_target, int rhs_target, const std::vector& lhs_nid, const std::vector& rhs_nid) { SWITCH_OP(op, Op, { SWITCH_TARGET(lhs_target, rhs_target, LhsTarget, RhsTarget, { /* Call SDDMM for each relation type */ for (dgl_type_t etype = 0; etype < lhs_nid.size(); ++etype) { COOMatrix coo = vec_coo[etype]; NDArray lhs = vec_lhs[lhs_nid[etype]]; NDArray rhs = vec_rhs[rhs_nid[etype]]; NDArray out = vec_out[etype]; cpu::SDDMMCoo( bcast, coo, lhs, rhs, out); } }); }); } template void SDDMMCoo( const std::string& op, const BcastOff& bcast, const COOMatrix& coo, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target); template void SDDMMCoo( const std::string& op, const BcastOff& bcast, const COOMatrix& coo, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target); template void SDDMMCoo( const std::string& op, const BcastOff& bcast, const COOMatrix& coo, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target); template void SDDMMCoo( const std::string& op, const BcastOff& bcast, const COOMatrix& coo, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target); template void SDDMMCoo( const std::string& op, const BcastOff& bcast, const COOMatrix& coo, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target); template void SDDMMCoo( const std::string& op, const BcastOff& bcast, const COOMatrix& coo, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target); template void SDDMMCooHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_coo, const std::vector& lhs, const std::vector& rhs, std::vector out, int lhs_target, int rhs_target, const std::vector& in_eid, const std::vector& out_eid); template void SDDMMCooHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_coo, const std::vector& lhs, const std::vector& rhs, std::vector out, int lhs_target, int rhs_target, const std::vector& in_eid, const std::vector& out_eid); template void SDDMMCooHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_coo, const std::vector& lhs, const std::vector& rhs, std::vector out, int lhs_target, int rhs_target, const std::vector& in_eid, const std::vector& out_eid); template void SDDMMCooHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_coo, const std::vector& lhs, const std::vector& rhs, std::vector out, int lhs_target, int rhs_target, const std::vector& in_eid, const std::vector& out_eid); template void SDDMMCooHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_coo, const std::vector& lhs, const std::vector& rhs, std::vector out, int lhs_target, int rhs_target, const std::vector& in_eid, const std::vector& out_eid); template void SDDMMCooHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_coo, const std::vector& lhs, const std::vector& rhs, std::vector out, int lhs_target, int rhs_target, const std::vector& in_eid, const std::vector& out_eid); } // namespace aten } // namespace dgl ================================================ FILE: src/array/cpu/sddmm.h ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cpu/sddmm.h * @brief SDDMM CPU kernel function header. */ #ifndef DGL_ARRAY_CPU_SDDMM_H_ #define DGL_ARRAY_CPU_SDDMM_H_ #include #include #include #include "../selector.h" namespace dgl { namespace aten { namespace cpu { /** * @brief CPU kernel of g-SDDMM on Csr format. * @param bcast Broadcast information. * @param csr The Csr matrix. * @param lhs The left hand side operand feature. * @param rhs The right hand size operand feature. * @param out The result feature on edges. * @note it uses node parallel strategy, different threads are responsible * for the computation of different nodes. */ template < typename IdType, typename DType, typename Op, int LhsTarget = 0, int RhsTarget = 2> void SDDMMCsr( const BcastOff& bcast, const CSRMatrix& csr, NDArray lhs, NDArray rhs, NDArray out) { const bool has_idx = !IsNullArray(csr.data); const IdType* indptr = csr.indptr.Ptr(); const IdType* indices = csr.indices.Ptr(); const IdType* edges = csr.data.Ptr(); const DType* X = lhs.Ptr(); const DType* Y = rhs.Ptr(); const int64_t dim = bcast.out_len, lhs_dim = bcast.lhs_len, rhs_dim = bcast.rhs_len, reduce_size = bcast.reduce_size; DType* O = out.Ptr(); runtime::parallel_for(0, csr.num_rows, [=](IdType b, IdType e) { for (auto rid = b; rid < e; ++rid) { const IdType row_start = indptr[rid], row_end = indptr[rid + 1]; for (IdType j = row_start; j < row_end; ++j) { const IdType cid = indices[j]; const IdType eid = has_idx ? edges[j] : j; DType* out_off = O + eid * dim; for (int64_t k = 0; k < dim; ++k) { const int64_t lhs_add = bcast.use_bcast ? bcast.lhs_offset[k] : k; const int64_t rhs_add = bcast.use_bcast ? bcast.rhs_offset[k] : k; const DType* lhs_off = Op::use_lhs ? X + Selector::Call(rid, eid, cid) * lhs_dim + lhs_add * reduce_size : nullptr; const DType* rhs_off = Op::use_rhs ? Y + Selector::Call(rid, eid, cid) * rhs_dim + rhs_add * reduce_size : nullptr; out_off[k] = Op::Call(lhs_off, rhs_off, reduce_size); } } } }); } /** * @brief CPU kernel of g-SDDMM on Coo format. * @param bcast Broadcast information. * @param coo The COO matrix. * @param lhs The left hand side operand feature. * @param rhs The right hand size operand feature. * @param out The result feature on edges. * @note it uses edge parallel strategy, different threads are responsible * for the computation of different edges. */ template < typename IdType, typename DType, typename Op, int LhsTarget = 0, int RhsTarget = 2> void SDDMMCoo( const BcastOff& bcast, const COOMatrix& coo, NDArray lhs, NDArray rhs, NDArray out) { const bool has_idx = !IsNullArray(coo.data); const IdType* row = coo.row.Ptr(); const IdType* col = coo.col.Ptr(); const IdType* edges = coo.data.Ptr(); const DType* X = lhs.Ptr(); const DType* Y = rhs.Ptr(); const int64_t dim = bcast.out_len, lhs_dim = bcast.lhs_len, rhs_dim = bcast.rhs_len, reduce_size = bcast.reduce_size; DType* O = out.Ptr(); #pragma omp parallel for for (int64_t i = 0; i < coo.row->shape[0]; ++i) { const IdType rid = row[i]; const IdType cid = col[i]; const IdType eid = has_idx ? edges[i] : i; DType* out_off = O + eid * dim; for (int64_t k = 0; k < dim; ++k) { const int64_t lhs_add = bcast.use_bcast ? bcast.lhs_offset[k] : k; const int64_t rhs_add = bcast.use_bcast ? bcast.rhs_offset[k] : k; const DType* lhs_off = Op::use_lhs ? X + Selector::Call(rid, eid, cid) * lhs_dim + lhs_add * reduce_size : nullptr; const DType* rhs_off = Op::use_rhs ? Y + Selector::Call(rid, eid, cid) * rhs_dim + rhs_add * reduce_size : nullptr; out_off[k] = Op::Call(lhs_off, rhs_off, bcast.reduce_size); } } } namespace op { ////////////////////////// binary operators on CPU ///////////////////////////// template struct Add { static constexpr bool use_lhs = true; static constexpr bool use_rhs = true; inline static DType Call( const DType* lhs_off, const DType* rhs_off, int64_t len = 1) { return *lhs_off + *rhs_off; } }; template struct Sub { static constexpr bool use_lhs = true; static constexpr bool use_rhs = true; inline static DType Call( const DType* lhs_off, const DType* rhs_off, int64_t len = 1) { return *lhs_off - *rhs_off; } }; template struct Mul { static constexpr bool use_lhs = true; static constexpr bool use_rhs = true; inline static DType Call( const DType* lhs_off, const DType* rhs_off, int64_t len = 1) { return *lhs_off * *rhs_off; } }; template struct Div { static constexpr bool use_lhs = true; static constexpr bool use_rhs = true; inline static DType Call( const DType* lhs_off, const DType* rhs_off, int64_t len = 1) { return *lhs_off / *rhs_off; } }; template struct CopyLhs { static constexpr bool use_lhs = true; static constexpr bool use_rhs = false; inline static DType Call( const DType* lhs_off, const DType*, int64_t len = 1) { return *lhs_off; } }; template struct CopyRhs { static constexpr bool use_lhs = false; static constexpr bool use_rhs = true; inline static DType Call( const DType*, const DType* rhs_off, int64_t len = 1) { return *rhs_off; } }; template struct Dot { static constexpr bool use_lhs = true; static constexpr bool use_rhs = true; inline static DType Call( const DType* lhs_off, const DType* rhs_off, int64_t len = 1) { DType rst = 0; for (int64_t l = 0; l < len; ++l) { rst += lhs_off[l] * rhs_off[l]; } return rst; } }; #define SWITCH_OP(op, Op, ...) \ do { \ if ((op) == "add") { \ typedef dgl::aten::cpu::op::Add Op; \ { __VA_ARGS__ } \ } else if ((op) == "sub") { \ typedef dgl::aten::cpu::op::Sub Op; \ { __VA_ARGS__ } \ } else if ((op) == "mul") { \ typedef dgl::aten::cpu::op::Mul Op; \ { __VA_ARGS__ } \ } else if ((op) == "div") { \ typedef dgl::aten::cpu::op::Div Op; \ { __VA_ARGS__ } \ } else if ((op) == "copy_lhs") { \ typedef dgl::aten::cpu::op::CopyLhs Op; \ { __VA_ARGS__ } \ } else if ((op) == "copy_rhs") { \ typedef dgl::aten::cpu::op::CopyRhs Op; \ { __VA_ARGS__ } \ } else if ((op) == "dot") { \ typedef dgl::aten::cpu::op::Dot Op; \ { __VA_ARGS__ } \ } else { \ LOG(FATAL) << "Unsupported SDDMM binary operator: " << op; \ } \ } while (0) } // namespace op } // namespace cpu } // namespace aten } // namespace dgl #endif // DGL_ARRAY_CPU_SDDMM_H_ ================================================ FILE: src/array/cpu/segment_reduce.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file kernel/cpu/segment_reduce.cc * @brief Segment reduce C APIs and definitions. */ #include "./segment_reduce.h" #include #include #include "./spmm_binary_ops.h" namespace dgl { namespace aten { /** @brief Segment Reduce operator. */ template void SegmentReduce( const std::string& op, NDArray feat, NDArray offsets, NDArray out, NDArray arg) { if (op == "sum") { cpu::SegmentSum(feat, offsets, out); } else if (op == "max" || op == "min") { if (op == "max") { cpu::SegmentCmp>( feat, offsets, out, arg); } else { cpu::SegmentCmp>( feat, offsets, out, arg); } } else { LOG(FATAL) << "Unsupported reduce function " << op; } } /** @brief Scatter Add.*/ template void ScatterAdd(NDArray feat, NDArray idx, NDArray out) { cpu::ScatterAdd(feat, idx, out); } /** @brief Update gradients for reduce operator max/min on heterogeneous * graph.*/ template void UpdateGradMinMax_hetero( const HeteroGraphPtr& g, const std::string& op, const std::vector& feat, const std::vector& idx, const std::vector& idx_etype, std::vector* out) { cpu::UpdateGradMinMax_hetero(g, op, feat, idx, idx_etype, out); } /** @brief Backward function of segment cmp.*/ template void BackwardSegmentCmp(NDArray feat, NDArray arg, NDArray out) { cpu::BackwardSegmentCmp(feat, arg, out); } template void SegmentReduce( const std::string& op, NDArray feat, NDArray offsets, NDArray out, NDArray arg); template void SegmentReduce( const std::string& op, NDArray feat, NDArray offsets, NDArray out, NDArray arg); template void SegmentReduce( const std::string& op, NDArray feat, NDArray offsets, NDArray out, NDArray arg); template void SegmentReduce( const std::string& op, NDArray feat, NDArray offsets, NDArray out, NDArray arg); template void SegmentReduce( const std::string& op, NDArray feat, NDArray offsets, NDArray out, NDArray arg); template void SegmentReduce( const std::string& op, NDArray feat, NDArray offsets, NDArray out, NDArray arg); template <> void ScatterAdd( NDArray feat, NDArray idx, NDArray out) { LOG(FATAL) << "Unsupported CPU kernel for ScatterAdd for BF16."; } template <> void ScatterAdd( NDArray feat, NDArray idx, NDArray out) { LOG(FATAL) << "Unsupported CPU kernel for ScatterAdd for BF16."; } template void ScatterAdd( NDArray feat, NDArray idx, NDArray out); template void ScatterAdd( NDArray feat, NDArray idx, NDArray out); template void ScatterAdd( NDArray feat, NDArray idx, NDArray out); template void ScatterAdd( NDArray feat, NDArray arg, NDArray out); template <> void UpdateGradMinMax_hetero( const HeteroGraphPtr& g, const std::string& op, const std::vector& feat, const std::vector& idx, const std::vector& idx_etype, std::vector* out) { LOG(FATAL) << "Unsupported CPU kernel for UpdateGradMinMax_hetero for BF16."; } template <> void UpdateGradMinMax_hetero( const HeteroGraphPtr& g, const std::string& op, const std::vector& feat, const std::vector& idx, const std::vector& idx_etype, std::vector* out) { LOG(FATAL) << "Unsupported CPU kernel for UpdateGradMinMax_hetero for BF16."; } template void UpdateGradMinMax_hetero( const HeteroGraphPtr& g, const std::string& op, const std::vector& feat, const std::vector& idx, const std::vector& idx_etype, std::vector* out); template void UpdateGradMinMax_hetero( const HeteroGraphPtr& g, const std::string& op, const std::vector& feat, const std::vector& idx, const std::vector& idx_etype, std::vector* out); template void UpdateGradMinMax_hetero( const HeteroGraphPtr& g, const std::string& op, const std::vector& feat, const std::vector& idx, const std::vector& idx_etype, std::vector* out); template void UpdateGradMinMax_hetero( const HeteroGraphPtr& g, const std::string& op, const std::vector& feat, const std::vector& idx, const std::vector& idx_etype, std::vector* out); template void BackwardSegmentCmp( NDArray feat, NDArray arg, NDArray out); template void BackwardSegmentCmp( NDArray feat, NDArray arg, NDArray out); template void BackwardSegmentCmp( NDArray feat, NDArray arg, NDArray out); template void BackwardSegmentCmp( NDArray feat, NDArray arg, NDArray out); template void BackwardSegmentCmp( NDArray feat, NDArray arg, NDArray out); template void BackwardSegmentCmp( NDArray feat, NDArray arg, NDArray out); } // namespace aten } // namespace dgl ================================================ FILE: src/array/cpu/segment_reduce.h ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cpu/spmm.h * @brief Segment reduce kernel function header. */ #ifndef DGL_ARRAY_CPU_SEGMENT_REDUCE_H_ #define DGL_ARRAY_CPU_SEGMENT_REDUCE_H_ #include #include #include #include #include namespace dgl { namespace aten { namespace cpu { /** * @brief CPU kernel of segment sum. * @param feat The input tensor. * @param offsets The offset tensor storing the ranges of segments. * @param out The output tensor. */ template void SegmentSum(NDArray feat, NDArray offsets, NDArray out) { if (std::is_same::value) LOG(FATAL) << "Unsupported CPU kernel for SegmentSum for BF16."; int n = out->shape[0]; int dim = 1; for (int i = 1; i < out->ndim; ++i) dim *= out->shape[i]; const DType* feat_data = feat.Ptr(); const IdType* offsets_data = offsets.Ptr(); DType* out_data = out.Ptr(); runtime::parallel_for(0, n, [=](int b, int e) { for (auto i = b; i < e; ++i) { for (IdType j = offsets_data[i]; j < offsets_data[i + 1]; ++j) { for (int k = 0; k < dim; ++k) { out_data[i * dim + k] += feat_data[j * dim + k]; } } } }); } /** * @brief CPU kernel of segment min/max. * @param feat The input tensor. * @param offsets The offset tensor storing the ranges of segments. * @param out The output tensor. * @param arg An auxiliary tensor storing the argmin/max information * used in backward phase. */ template void SegmentCmp(NDArray feat, NDArray offsets, NDArray out, NDArray arg) { int n = out->shape[0]; int dim = 1; for (int i = 1; i < out->ndim; ++i) dim *= out->shape[i]; const DType* feat_data = feat.Ptr(); const IdType* offsets_data = offsets.Ptr(); DType* out_data = out.Ptr(); IdType* arg_data = arg.Ptr(); std::fill(out_data, out_data + out.NumElements(), Cmp::zero); std::fill(arg_data, arg_data + arg.NumElements(), -1); runtime::parallel_for(0, n, [=](int b, int e) { for (auto i = b; i < e; ++i) { for (IdType j = offsets_data[i]; j < offsets_data[i + 1]; ++j) { for (int k = 0; k < dim; ++k) { const DType val = feat_data[j * dim + k]; if (Cmp::Call(out_data[i * dim + k], val)) { out_data[i * dim + k] = val; arg_data[i * dim + k] = j; } } } } }); } /** * @brief CPU kernel of Scatter Add (on first dimension) operator. * @note math equation: out[idx[i], *] += feat[i, *] * @param feat The input tensor. * @param idx The indices tensor. * @param out The output tensor. */ template void ScatterAdd(NDArray feat, NDArray idx, NDArray out) { int n = feat->shape[0]; int dim = 1; for (int i = 1; i < out->ndim; ++i) dim *= out->shape[i]; const DType* feat_data = feat.Ptr(); const IdType* idx_data = idx.Ptr(); DType* out_data = out.Ptr(); #pragma omp parallel for for (int i = 0; i < n; ++i) { const int write_row = idx_data[i]; for (int k = 0; k < dim; ++k) { #pragma omp atomic out_data[write_row * dim + k] += feat_data[i * dim + k]; } } } /** * @brief CPU kernel to update gradients for reduce op max/min * @param graph The input heterogeneous graph. * @param op The binary operator, could be `copy_u`, `copy_e'. * @param list_feat List of the input tensors. * @param list_idx List of the indices tensors. * @param list_idx_etype List of the node- or edge-type tensors. * @param list_out List of the output tensors. */ template void UpdateGradMinMax_hetero( HeteroGraphPtr graph, const std::string& op, const std::vector& list_feat, const std::vector& list_idx, const std::vector& list_idx_types, std::vector* list_out) { if (op == "copy_lhs" || op == "copy_rhs") { std::vector> src_dst_ntypes( graph->NumVertexTypes(), std::vector()); for (dgl_type_t etype = 0; etype < graph->NumEdgeTypes(); ++etype) { auto pair = graph->meta_graph()->FindEdge(etype); const dgl_id_t dst_ntype = pair.first; // graph is reversed const dgl_id_t src_ntype = pair.second; auto same_src_dst_ntype = std::find( std::begin(src_dst_ntypes[dst_ntype]), std::end(src_dst_ntypes[dst_ntype]), src_ntype); // if op is "copy_lhs", relation type with same src and dst node type will // be updated once if (op == "copy_lhs" && same_src_dst_ntype != std::end(src_dst_ntypes[dst_ntype])) continue; src_dst_ntypes[dst_ntype].push_back(src_ntype); const DType* feat_data = list_feat[dst_ntype].Ptr(); const IdType* idx_data = list_idx[dst_ntype].Ptr(); const IdType* idx_type_data = list_idx_types[dst_ntype].Ptr(); int type = (op == "copy_lhs") ? src_ntype : etype; DType* out_data = (*list_out)[type].Ptr(); int dim = 1; for (int i = 1; i < (*list_out)[type]->ndim; ++i) dim *= (*list_out)[type]->shape[i]; int n = list_feat[dst_ntype]->shape[0]; #pragma omp parallel for for (int i = 0; i < n; ++i) { for (int k = 0; k < dim; ++k) { if (type == idx_type_data[i * dim + k]) { const int write_row = idx_data[i * dim + k]; #pragma omp atomic out_data[write_row * dim + k] += feat_data[i * dim + k]; // feat = dZ } } } } } else { LOG(FATAL) << "Unsupported binary operator: " << op; } } /** * @brief CPU kernel of backward phase of segment min/max. * @note math equation: out[arg[i, k], k] = feat[i, k] * @param feat The input tensor. * @param arg The argmin/argmax tensor. * @param out The output tensor. */ template void BackwardSegmentCmp(NDArray feat, NDArray arg, NDArray out) { int n = feat->shape[0]; int dim = 1; for (int i = 1; i < out->ndim; ++i) dim *= out->shape[i]; const DType* feat_data = feat.Ptr(); const IdType* arg_data = arg.Ptr(); DType* out_data = out.Ptr(); runtime::parallel_for(0, n, [=](int b, int e) { for (auto i = b; i < e; ++i) { for (int k = 0; k < dim; ++k) { int write_row = arg_data[i * dim + k]; if (write_row >= 0) out_data[write_row * dim + k] = feat_data[i * dim + k]; } } }); } } // namespace cpu } // namespace aten } // namespace dgl #endif // DGL_ARRAY_CPU_SEGMENT_REDUCE_H_ ================================================ FILE: src/array/cpu/spmat_op_impl_coo.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file array/cpu/spmat_op_impl.cc * @brief CPU implementation of COO sparse matrix operators */ #include #include #include #include #include #include #include #include "array_utils.h" namespace dgl { using runtime::NDArray; using runtime::parallel_for; namespace aten { namespace impl { /** * TODO(BarclayII): * For row-major sorted COOs, we have faster implementation with binary search, * sorted search, etc. Later we should benchmark how much we can gain with * sorted COOs on hypersparse graphs. */ ///////////////////////////// COOIsNonZero ///////////////////////////// template bool COOIsNonZero(COOMatrix coo, int64_t row, int64_t col) { CHECK(row >= 0 && row < coo.num_rows) << "Invalid row index: " << row; CHECK(col >= 0 && col < coo.num_cols) << "Invalid col index: " << col; const IdType *coo_row_data = static_cast(coo.row->data); const IdType *coo_col_data = static_cast(coo.col->data); for (int64_t i = 0; i < coo.row->shape[0]; ++i) { if (coo_row_data[i] == row && coo_col_data[i] == col) return true; } return false; } template bool COOIsNonZero(COOMatrix, int64_t, int64_t); template bool COOIsNonZero(COOMatrix, int64_t, int64_t); template NDArray COOIsNonZero(COOMatrix coo, NDArray row, NDArray col) { const auto rowlen = row->shape[0]; const auto collen = col->shape[0]; const auto rstlen = std::max(rowlen, collen); NDArray rst = NDArray::Empty({rstlen}, row->dtype, row->ctx); IdType *rst_data = static_cast(rst->data); const IdType *row_data = static_cast(row->data); const IdType *col_data = static_cast(col->data); const int64_t row_stride = (rowlen == 1 && collen != 1) ? 0 : 1; const int64_t col_stride = (collen == 1 && rowlen != 1) ? 0 : 1; const int64_t kmax = std::max(rowlen, collen); parallel_for(0, kmax, [=](size_t b, size_t e) { for (auto k = b; k < e; ++k) { int64_t i = row_stride * k; int64_t j = col_stride * k; rst_data[k] = COOIsNonZero(coo, row_data[i], col_data[j]) ? 1 : 0; } }); return rst; } template NDArray COOIsNonZero(COOMatrix, NDArray, NDArray); template NDArray COOIsNonZero(COOMatrix, NDArray, NDArray); ///////////////////////////// COOHasDuplicate ///////////////////////////// template bool COOHasDuplicate(COOMatrix coo) { std::unordered_set, PairHash> hashmap; const IdType *src_data = static_cast(coo.row->data); const IdType *dst_data = static_cast(coo.col->data); const auto nnz = coo.row->shape[0]; for (IdType eid = 0; eid < nnz; ++eid) { const auto &p = std::make_pair(src_data[eid], dst_data[eid]); if (hashmap.count(p)) { return true; } else { hashmap.insert(p); } } return false; } template bool COOHasDuplicate(COOMatrix coo); template bool COOHasDuplicate(COOMatrix coo); ///////////////////////////// COOGetRowNNZ ///////////////////////////// template int64_t COOGetRowNNZ(COOMatrix coo, int64_t row) { CHECK(row >= 0 && row < coo.num_rows) << "Invalid row index: " << row; const IdType *coo_row_data = static_cast(coo.row->data); int64_t result = 0; for (int64_t i = 0; i < coo.row->shape[0]; ++i) { if (coo_row_data[i] == row) ++result; } return result; } template int64_t COOGetRowNNZ(COOMatrix, int64_t); template int64_t COOGetRowNNZ(COOMatrix, int64_t); template NDArray COOGetRowNNZ(COOMatrix coo, NDArray rows) { CHECK_SAME_DTYPE(coo.col, rows); const auto len = rows->shape[0]; const IdType *vid_data = static_cast(rows->data); NDArray rst = NDArray::Empty({len}, rows->dtype, rows->ctx); IdType *rst_data = static_cast(rst->data); #pragma omp parallel for for (int64_t i = 0; i < len; ++i) { rst_data[i] = COOGetRowNNZ(coo, vid_data[i]); } return rst; } template NDArray COOGetRowNNZ(COOMatrix, NDArray); template NDArray COOGetRowNNZ(COOMatrix, NDArray); ////////////////////////// COOGetRowDataAndIndices ///////////////////////////// template std::pair COOGetRowDataAndIndices( COOMatrix coo, int64_t row) { CHECK(row >= 0 && row < coo.num_rows) << "Invalid row index: " << row; const IdType *coo_row_data = static_cast(coo.row->data); const IdType *coo_col_data = static_cast(coo.col->data); const IdType *coo_data = COOHasData(coo) ? static_cast(coo.data->data) : nullptr; std::vector indices; std::vector data; for (int64_t i = 0; i < coo.row->shape[0]; ++i) { if (coo_row_data[i] == row) { indices.push_back(coo_col_data[i]); data.push_back(coo_data ? coo_data[i] : i); } } return std::make_pair( NDArray::FromVector(data), NDArray::FromVector(indices)); } template std::pair COOGetRowDataAndIndices( COOMatrix, int64_t); template std::pair COOGetRowDataAndIndices( COOMatrix, int64_t); ///////////////////////////// COOGetData ///////////////////////////// template IdArray COOGetData(COOMatrix coo, IdArray rows, IdArray cols) { const int64_t rowlen = rows->shape[0]; const int64_t collen = cols->shape[0]; CHECK((rowlen == collen) || (rowlen == 1) || (collen == 1)) << "Invalid row and col Id array:" << rows << " " << cols; const int64_t row_stride = (rowlen == 1 && collen != 1) ? 0 : 1; const int64_t col_stride = (collen == 1 && rowlen != 1) ? 0 : 1; const IdType *row_data = rows.Ptr(); const IdType *col_data = cols.Ptr(); const IdType *coo_row = coo.row.Ptr(); const IdType *coo_col = coo.col.Ptr(); const IdType *data = COOHasData(coo) ? coo.data.Ptr() : nullptr; const int64_t nnz = coo.row->shape[0]; const int64_t retlen = std::max(rowlen, collen); IdArray ret = Full(-1, retlen, rows->dtype.bits, rows->ctx); IdType *ret_data = ret.Ptr(); // TODO(minjie): We might need to consider sorting the COO beforehand // especially when the number of (row, col) pairs is large. Need more // benchmarks to justify the choice. if (coo.row_sorted) { parallel_for(0, retlen, [&](size_t b, size_t e) { for (auto p = b; p < e; ++p) { const IdType row_id = row_data[p * row_stride], col_id = col_data[p * col_stride]; auto it = std::lower_bound(coo_row, coo_row + nnz, row_id); for (; it < coo_row + nnz && *it == row_id; ++it) { const auto idx = it - coo_row; if (coo_col[idx] == col_id) { ret_data[p] = data ? data[idx] : idx; break; } } } }); } else { #pragma omp parallel for for (int64_t p = 0; p < retlen; ++p) { const IdType row_id = row_data[p * row_stride], col_id = col_data[p * col_stride]; for (int64_t idx = 0; idx < nnz; ++idx) { if (coo_row[idx] == row_id && coo_col[idx] == col_id) { ret_data[p] = data ? data[idx] : idx; break; } } } } return ret; } template IdArray COOGetData(COOMatrix, IdArray, IdArray); template IdArray COOGetData(COOMatrix, IdArray, IdArray); ///////////////////////////// COOGetDataAndIndices ///////////////////////////// template std::vector COOGetDataAndIndices( COOMatrix coo, NDArray rows, NDArray cols) { CHECK_SAME_DTYPE(coo.col, rows); CHECK_SAME_DTYPE(coo.col, cols); const int64_t rowlen = rows->shape[0]; const int64_t collen = cols->shape[0]; const int64_t len = std::max(rowlen, collen); CHECK((rowlen == collen) || (rowlen == 1) || (collen == 1)) << "Invalid row and col id array."; const int64_t row_stride = (rowlen == 1 && collen != 1) ? 0 : 1; const int64_t col_stride = (collen == 1 && rowlen != 1) ? 0 : 1; const IdType *row_data = static_cast(rows->data); const IdType *col_data = static_cast(cols->data); const IdType *coo_row_data = static_cast(coo.row->data); const IdType *coo_col_data = static_cast(coo.col->data); const IdType *data = COOHasData(coo) ? static_cast(coo.data->data) : nullptr; std::vector ret_rows, ret_cols; std::vector ret_data; ret_rows.reserve(len); ret_cols.reserve(len); ret_data.reserve(len); // NOTE(BarclayII): With a small number of lookups, linear scan is faster. // The threshold 200 comes from benchmarking both algorithms on a P3.8x // instance. I also tried sorting plus binary search. The speed gain is only // significant for medium-sized graphs and lookups, so I didn't include it. if (len >= 200) { // TODO(BarclayII) Ideally we would want to cache this object. However I'm // not sure what is the best way to do so since this object is valid for CPU // only. std::unordered_multimap, IdType, PairHash> pair_map; pair_map.reserve(coo.row->shape[0]); for (int64_t k = 0; k < coo.row->shape[0]; ++k) pair_map.emplace( std::make_pair(coo_row_data[k], coo_col_data[k]), data ? data[k] : k); for (int64_t i = 0, j = 0; i < rowlen && j < collen; i += row_stride, j += col_stride) { const IdType row_id = row_data[i], col_id = col_data[j]; CHECK(row_id >= 0 && row_id < coo.num_rows) << "Invalid row index: " << row_id; CHECK(col_id >= 0 && col_id < coo.num_cols) << "Invalid col index: " << col_id; auto range = pair_map.equal_range({row_id, col_id}); for (auto it = range.first; it != range.second; ++it) { ret_rows.push_back(row_id); ret_cols.push_back(col_id); ret_data.push_back(it->second); } } } else { for (int64_t i = 0, j = 0; i < rowlen && j < collen; i += row_stride, j += col_stride) { const IdType row_id = row_data[i], col_id = col_data[j]; CHECK(row_id >= 0 && row_id < coo.num_rows) << "Invalid row index: " << row_id; CHECK(col_id >= 0 && col_id < coo.num_cols) << "Invalid col index: " << col_id; for (int64_t k = 0; k < coo.row->shape[0]; ++k) { if (coo_row_data[k] == row_id && coo_col_data[k] == col_id) { ret_rows.push_back(row_id); ret_cols.push_back(col_id); ret_data.push_back(data ? data[k] : k); } } } } return { NDArray::FromVector(ret_rows), NDArray::FromVector(ret_cols), NDArray::FromVector(ret_data)}; } template std::vector COOGetDataAndIndices( COOMatrix coo, NDArray rows, NDArray cols); template std::vector COOGetDataAndIndices( COOMatrix coo, NDArray rows, NDArray cols); ///////////////////////////// COOTranspose ///////////////////////////// template COOMatrix COOTranspose(COOMatrix coo) { return COOMatrix{coo.num_cols, coo.num_rows, coo.col, coo.row, coo.data}; } template COOMatrix COOTranspose(COOMatrix coo); template COOMatrix COOTranspose(COOMatrix coo); ///////////////////////////// COOToCSR ///////////////////////////// namespace { template CSRMatrix SortedCOOToCSR(const COOMatrix &coo) { const int64_t N = coo.num_rows; const int64_t NNZ = coo.row->shape[0]; const IdType *const row_data = static_cast(coo.row->data); const IdType *const data = COOHasData(coo) ? static_cast(coo.data->data) : nullptr; NDArray ret_indptr = NDArray::Empty({N + 1}, coo.row->dtype, coo.row->ctx); NDArray ret_indices = coo.col; NDArray ret_data = data == nullptr ? NDArray::Empty({NNZ}, coo.row->dtype, coo.row->ctx) : coo.data; // compute indptr IdType *const Bp = static_cast(ret_indptr->data); Bp[0] = 0; IdType *const fill_data = data ? nullptr : static_cast(ret_data->data); if (NNZ > 0) { auto num_threads = omp_get_max_threads(); parallel_for(0, num_threads, [&](int b, int e) { for (auto thread_id = b; thread_id < e; ++thread_id) { // We partition the set the of non-zeros among the threads const int64_t nz_chunk = (NNZ + num_threads - 1) / num_threads; const int64_t nz_start = thread_id * nz_chunk; const int64_t nz_end = std::min(NNZ, nz_start + nz_chunk); // Each thread searchs the row array for a change, and marks it's // location in Bp. Threads, other than the first, start at the last // index covered by the previous, in order to detect changes in the row // array between thread partitions. This means that each thread after // the first, searches the range [nz_start-1, nz_end). That is, // if we had 10 non-zeros, and 4 threads, the indexes searched by each // thread would be: // 0: [0, 1, 2] // 1: [2, 3, 4, 5] // 2: [5, 6, 7, 8] // 3: [8, 9] // // That way, if the row array were [0, 0, 1, 2, 2, 2, 4, 5, 5, 6], each // change in row would be captured by one thread: // // 0: [0, 0, 1] - row 0 // 1: [1, 2, 2, 2] - row 1 // 2: [2, 4, 5, 5] - rows 2, 3, and 4 // 3: [5, 6] - rows 5 and 6 // int64_t row = 0; if (nz_start < nz_end) { row = nz_start == 0 ? 0 : row_data[nz_start - 1]; for (int64_t i = nz_start; i < nz_end; ++i) { while (row != row_data[i]) { ++row; Bp[row] = i; } } // We will not detect the row change for the last row, nor any empty // rows at the end of the matrix, so the last active thread needs // mark all remaining rows in Bp with NNZ. if (nz_end == NNZ) { while (row < N) { ++row; Bp[row] = NNZ; } } if (fill_data) { // TODO(minjie): Many of our current implementation assumes that CSR // must have // a data array. This is a temporary workaround. Remove this // after: // - The old immutable graph implementation is deprecated. // - The old binary reduce kernel is deprecated. std::iota(fill_data + nz_start, fill_data + nz_end, nz_start); } } } }); } else { std::fill(Bp, Bp + N + 1, 0); } return CSRMatrix( coo.num_rows, coo.num_cols, ret_indptr, ret_indices, ret_data, coo.col_sorted); } template CSRMatrix UnSortedSparseCOOToCSR(const COOMatrix &coo) { // Unsigned version of the original integer index data type. // It avoids overflow in (N + num_threads) and (n_start + n_chunk) below. typedef typename std::make_unsigned::type UIdType; const UIdType N = coo.num_rows; const int64_t NNZ = coo.row->shape[0]; const IdType *const row_data = static_cast(coo.row->data); const IdType *const col_data = static_cast(coo.col->data); const IdType *const data = COOHasData(coo) ? static_cast(coo.data->data) : nullptr; NDArray ret_indptr = NDArray::Empty( {static_cast(N) + 1}, coo.row->dtype, coo.row->ctx); NDArray ret_indices = NDArray::Empty({NNZ}, coo.row->dtype, coo.row->ctx); NDArray ret_data = NDArray::Empty({NNZ}, coo.row->dtype, coo.row->ctx); IdType *const Bp = static_cast(ret_indptr->data); Bp[N] = 0; IdType *const Bi = static_cast(ret_indices->data); IdType *const Bx = static_cast(ret_data->data); // store sorted data and original index. NDArray sorted_data = NDArray::Empty({NNZ}, coo.row->dtype, coo.row->ctx); NDArray sorted_data_pos = NDArray::Empty({NNZ}, coo.row->dtype, coo.row->ctx); IdType *const Sx = static_cast(sorted_data->data); IdType *const Si = static_cast(sorted_data_pos->data); // Lower number of threads if cost of parallelization is grater than gain // from making calculation parallel. const int64_t min_chunk_size = 1000; const int64_t num_threads_for_batch = 2 + (NNZ + N) / min_chunk_size; const int num_threads_required = std::min( static_cast(omp_get_max_threads()), num_threads_for_batch); // record row_idx in each thread. std::vector> p_sum( num_threads_required, std::vector(num_threads_required)); #pragma omp parallel num_threads(num_threads_required) { const int num_threads = omp_get_num_threads(); const int thread_id = omp_get_thread_num(); CHECK_LT(thread_id, num_threads); const int64_t nz_chunk = (NNZ + num_threads - 1) / num_threads; const int64_t nz_start = thread_id * nz_chunk; const int64_t nz_end = std::min(NNZ, nz_start + nz_chunk); const UIdType n_chunk = (N + num_threads - 1) / num_threads; const UIdType n_start = thread_id * n_chunk; const UIdType n_end = std::min(N, n_start + n_chunk); for (auto i = n_start; i < n_end; ++i) { Bp[i] = 0; } // iterate on NNZ data and count row_idx. for (auto i = nz_start; i < nz_end; ++i) { const IdType row_idx = row_data[i]; const IdType row_thread_id = row_idx / n_chunk; ++p_sum[thread_id][row_thread_id]; } #pragma omp barrier #pragma omp master // accumulate row_idx. { int64_t cum = 0; for (int j = 0; j < num_threads; ++j) { for (int i = 0; i < num_threads; ++i) { auto tmp = p_sum[i][j]; p_sum[i][j] = cum; cum += tmp; } } CHECK_EQ(cum, NNZ); } #pragma omp barrier const int64_t i_start = p_sum[0][thread_id]; const int64_t i_end = thread_id + 1 == num_threads ? NNZ : p_sum[0][thread_id + 1]; #pragma omp barrier // sort data by row_idx and place into Sx/Si. auto &data_pos = p_sum[thread_id]; for (auto i = nz_start; i < nz_end; ++i) { const IdType row_idx = row_data[i]; const IdType row_thread_id = row_idx / n_chunk; const int64_t pos = data_pos[row_thread_id]++; Sx[pos] = data == nullptr ? i : data[i]; Si[pos] = i; } #pragma omp barrier // Now we're able to do coo2csr on sorted data in each thread in parallel. // compute data number on each row_idx. for (auto i = i_start; i < i_end; ++i) { const UIdType row_idx = row_data[Si[i]]; ++Bp[row_idx + 1]; } // accumulate on each row IdType cumsum = i_start; for (auto i = n_start + 1; i <= n_end; ++i) { const auto tmp = Bp[i]; Bp[i] = cumsum; cumsum += tmp; } // update Bi/Bp/Bx for (auto i = i_start; i < i_end; ++i) { const UIdType row_idx = row_data[Si[i]]; const int64_t dest = (Bp[row_idx + 1]++); Bi[dest] = col_data[Si[i]]; Bx[dest] = Sx[i]; } } return CSRMatrix( coo.num_rows, coo.num_cols, ret_indptr, ret_indices, ret_data, coo.col_sorted); } template CSRMatrix UnSortedDenseCOOToCSR(const COOMatrix &coo) { // Unsigned version of the original integer index data type. // It avoids overflow in (N + num_threads) and (n_start + n_chunk) below. typedef typename std::make_unsigned::type UIdType; const UIdType N = coo.num_rows; const int64_t NNZ = coo.row->shape[0]; const IdType *const row_data = static_cast(coo.row->data); const IdType *const col_data = static_cast(coo.col->data); const IdType *const data = COOHasData(coo) ? static_cast(coo.data->data) : nullptr; NDArray ret_indptr = NDArray::Empty( {static_cast(N) + 1}, coo.row->dtype, coo.row->ctx); NDArray ret_indices = NDArray::Empty({NNZ}, coo.row->dtype, coo.row->ctx); NDArray ret_data = NDArray::Empty({NNZ}, coo.row->dtype, coo.row->ctx); IdType *const Bp = static_cast(ret_indptr->data); Bp[0] = 0; IdType *const Bi = static_cast(ret_indices->data); IdType *const Bx = static_cast(ret_data->data); // the offset within each row, that each thread will write to std::vector> local_ptrs; std::vector thread_prefixsum; #pragma omp parallel { const int num_threads = omp_get_num_threads(); const int thread_id = omp_get_thread_num(); CHECK_LT(thread_id, num_threads); const int64_t nz_chunk = (NNZ + num_threads - 1) / num_threads; const int64_t nz_start = thread_id * nz_chunk; const int64_t nz_end = std::min(NNZ, nz_start + nz_chunk); const UIdType n_chunk = (N + num_threads - 1) / num_threads; const UIdType n_start = thread_id * n_chunk; const UIdType n_end = std::min(N, n_start + n_chunk); #pragma omp master { local_ptrs.resize(num_threads); thread_prefixsum.resize(num_threads + 1); } #pragma omp barrier local_ptrs[thread_id].resize(N, 0); for (int64_t i = nz_start; i < nz_end; ++i) { ++local_ptrs[thread_id][row_data[i]]; } #pragma omp barrier // compute prefixsum in parallel int64_t sum = 0; for (UIdType i = n_start; i < n_end; ++i) { IdType tmp = 0; for (int j = 0; j < num_threads; ++j) { auto previous = local_ptrs[j][i]; local_ptrs[j][i] = tmp; tmp += previous; } sum += tmp; Bp[i + 1] = sum; } thread_prefixsum[thread_id + 1] = sum; #pragma omp barrier #pragma omp master { for (int i = 0; i < num_threads; ++i) { thread_prefixsum[i + 1] += thread_prefixsum[i]; } CHECK_EQ(thread_prefixsum[num_threads], NNZ); } #pragma omp barrier sum = thread_prefixsum[thread_id]; for (UIdType i = n_start; i < n_end; ++i) { Bp[i + 1] += sum; } #pragma omp barrier for (int64_t i = nz_start; i < nz_end; ++i) { const IdType r = row_data[i]; const int64_t index = Bp[r] + local_ptrs[thread_id][r]++; Bi[index] = col_data[i]; Bx[index] = data ? data[i] : i; } } CHECK_EQ(Bp[N], NNZ); return CSRMatrix( coo.num_rows, coo.num_cols, ret_indptr, ret_indices, ret_data, coo.col_sorted); } // complexity: time O(NNZ), space O(1) template CSRMatrix UnSortedSmallCOOToCSR(COOMatrix coo) { const int64_t N = coo.num_rows; const int64_t NNZ = coo.row->shape[0]; const IdType *row_data = static_cast(coo.row->data); const IdType *col_data = static_cast(coo.col->data); const IdType *data = COOHasData(coo) ? static_cast(coo.data->data) : nullptr; NDArray ret_indptr = NDArray::Empty({N + 1}, coo.row->dtype, coo.row->ctx); NDArray ret_indices = NDArray::Empty({NNZ}, coo.row->dtype, coo.row->ctx); NDArray ret_data = NDArray::Empty({NNZ}, coo.row->dtype, coo.row->ctx); IdType *Bp = static_cast(ret_indptr->data); IdType *Bi = static_cast(ret_indices->data); IdType *Bx = static_cast(ret_data->data); // Count elements in each row std::fill(Bp, Bp + N, 0); for (int64_t i = 0; i < NNZ; ++i) { Bp[row_data[i]]++; } // Convert to indexes for (IdType i = 0, cumsum = 0; i < N; ++i) { const IdType temp = Bp[i]; Bp[i] = cumsum; cumsum += temp; } for (int64_t i = 0; i < NNZ; ++i) { const IdType r = row_data[i]; Bi[Bp[r]] = col_data[i]; Bx[Bp[r]] = data ? data[i] : i; Bp[r]++; } // Restore the indptr for (int64_t i = N; i > 0; --i) { Bp[i] = Bp[i - 1]; } Bp[0] = 0; return CSRMatrix( coo.num_rows, coo.num_cols, ret_indptr, ret_indices, ret_data, coo.col_sorted); } enum class COOToCSRAlg { sorted = 0, unsortedSmall, unsortedSparse, unsortedDense }; /** * Chose COO to CSR format conversion algorithm for given COO matrix according * to heuristic based on measured performance. * * Implementation and complexity details. N: num_nodes, NNZ: num_edges, P: * num_threads. * 1. If row is sorted in COO, SortedCOOToCSR<> is applied. Time: O(NNZ/P), * space: O(1). * 2 If row is NOT sorted in COO and graph is small (small number of NNZ), * UnSortedSmallCOOToCSR<> is applied. Time: O(NNZ), space O(N). * 3 If row is NOT sorted in COO and graph is sparse (low average degree), * UnSortedSparseCOOToCSR<> is applied. Time: O(NNZ/P + N/P + P^2), * space O(NNZ + P^2). * 4. If row is NOT sorted in COO and graph is dense (medium/high average * degree), UnSortedDenseCOOToCSR<> is applied. Time: O(NNZ/P + N/P), * space O(NNZ + N*P). * * Note: * If you change this function, change also _TestCOOToCSRAlgs in * tests/cpp/test_spmat_coo.cc */ template inline COOToCSRAlg WhichCOOToCSR(const COOMatrix &coo) { if (coo.row_sorted) { return COOToCSRAlg::sorted; } else { #ifdef _WIN32 // On Windows omp_get_max_threads() gives larger value than later OMP can // spawn. int64_t num_threads; #pragma omp parallel #pragma master { num_threads = omp_get_num_threads(); } #else const int64_t num_threads = omp_get_max_threads(); #endif const int64_t N = coo.num_rows; const int64_t NNZ = coo.row->shape[0]; // Parameters below are heuristically chosen according to measured // performance. const int64_t type_scale = sizeof(IdType) >> 1; const int64_t small = 50 * num_threads * type_scale * type_scale; if (NNZ < small || num_threads == 1) { // For relatively small number of non zero elements cost of spread // algorithm between threads is bigger than improvements from using // many cores return COOToCSRAlg::unsortedSmall; } else if (type_scale * NNZ < num_threads * N) { // For relatively small number of non zero elements in matrix, sparse // parallel version of algorithm is more efficient than dense. return COOToCSRAlg::unsortedSparse; } return COOToCSRAlg::unsortedDense; } } } // namespace template CSRMatrix COOToCSR(COOMatrix coo) { CHECK_NO_OVERFLOW(coo.row->dtype, coo.row->shape[0]); switch (WhichCOOToCSR(coo)) { case COOToCSRAlg::sorted: return SortedCOOToCSR(coo); case COOToCSRAlg::unsortedSmall: default: return UnSortedSmallCOOToCSR(coo); case COOToCSRAlg::unsortedSparse: return UnSortedSparseCOOToCSR(coo); case COOToCSRAlg::unsortedDense: return UnSortedDenseCOOToCSR(coo); } } template CSRMatrix COOToCSR(COOMatrix coo); template CSRMatrix COOToCSR(COOMatrix coo); ///////////////////////////// COOSliceRows ///////////////////////////// template COOMatrix COOSliceRows(COOMatrix coo, int64_t start, int64_t end) { // TODO(minjie): use binary search when coo.row_sorted is true CHECK(start >= 0 && start < coo.num_rows) << "Invalid start row " << start; CHECK(end > 0 && end <= coo.num_rows) << "Invalid end row " << end; const IdType *coo_row_data = static_cast(coo.row->data); const IdType *coo_col_data = static_cast(coo.col->data); const IdType *coo_data = COOHasData(coo) ? static_cast(coo.data->data) : nullptr; std::vector ret_row, ret_col; std::vector ret_data; for (int64_t i = 0; i < coo.row->shape[0]; ++i) { const IdType row_id = coo_row_data[i]; const IdType col_id = coo_col_data[i]; if (row_id < end && row_id >= start) { ret_row.push_back(row_id - start); ret_col.push_back(col_id); ret_data.push_back(coo_data ? coo_data[i] : i); } } return COOMatrix( end - start, coo.num_cols, NDArray::FromVector(ret_row), NDArray::FromVector(ret_col), NDArray::FromVector(ret_data), coo.row_sorted, coo.col_sorted); } template COOMatrix COOSliceRows(COOMatrix, int64_t, int64_t); template COOMatrix COOSliceRows(COOMatrix, int64_t, int64_t); template COOMatrix COOSliceRows(COOMatrix coo, NDArray rows) { const IdType *coo_row_data = static_cast(coo.row->data); const IdType *coo_col_data = static_cast(coo.col->data); const IdType *coo_data = COOHasData(coo) ? static_cast(coo.data->data) : nullptr; std::vector ret_row, ret_col; std::vector ret_data; IdHashMap hashmap(rows); for (int64_t i = 0; i < coo.row->shape[0]; ++i) { const IdType row_id = coo_row_data[i]; const IdType col_id = coo_col_data[i]; const IdType mapped_row_id = hashmap.Map(row_id, -1); if (mapped_row_id != -1) { ret_row.push_back(mapped_row_id); ret_col.push_back(col_id); ret_data.push_back(coo_data ? coo_data[i] : i); } } return COOMatrix{ rows->shape[0], coo.num_cols, NDArray::FromVector(ret_row), NDArray::FromVector(ret_col), NDArray::FromVector(ret_data), coo.row_sorted, coo.col_sorted}; } template COOMatrix COOSliceRows(COOMatrix, NDArray); template COOMatrix COOSliceRows(COOMatrix, NDArray); ///////////////////////////// COOSliceMatrix ///////////////////////////// template COOMatrix COOSliceMatrix( COOMatrix coo, runtime::NDArray rows, runtime::NDArray cols) { const IdType *coo_row_data = static_cast(coo.row->data); const IdType *coo_col_data = static_cast(coo.col->data); const IdType *coo_data = COOHasData(coo) ? static_cast(coo.data->data) : nullptr; IdHashMap row_map(rows), col_map(cols); std::vector ret_row, ret_col; std::vector ret_data; for (int64_t i = 0; i < coo.row->shape[0]; ++i) { const IdType row_id = coo_row_data[i]; const IdType col_id = coo_col_data[i]; const IdType mapped_row_id = row_map.Map(row_id, -1); if (mapped_row_id != -1) { const IdType mapped_col_id = col_map.Map(col_id, -1); if (mapped_col_id != -1) { ret_row.push_back(mapped_row_id); ret_col.push_back(mapped_col_id); ret_data.push_back(coo_data ? coo_data[i] : i); } } } return COOMatrix( rows->shape[0], cols->shape[0], NDArray::FromVector(ret_row), NDArray::FromVector(ret_col), NDArray::FromVector(ret_data), coo.row_sorted, coo.col_sorted); } template COOMatrix COOSliceMatrix( COOMatrix coo, runtime::NDArray rows, runtime::NDArray cols); template COOMatrix COOSliceMatrix( COOMatrix coo, runtime::NDArray rows, runtime::NDArray cols); ///////////////////////////// COOReorder ///////////////////////////// template COOMatrix COOReorder( COOMatrix coo, runtime::NDArray new_row_id_arr, runtime::NDArray new_col_id_arr) { CHECK_SAME_DTYPE(coo.row, new_row_id_arr); CHECK_SAME_DTYPE(coo.col, new_col_id_arr); // Input COO const IdType *in_rows = static_cast(coo.row->data); const IdType *in_cols = static_cast(coo.col->data); int64_t num_rows = coo.num_rows; int64_t num_cols = coo.num_cols; int64_t nnz = coo.row->shape[0]; CHECK_EQ(num_rows, new_row_id_arr->shape[0]) << "The new row Id array needs to be the same as the number of rows of " "COO"; CHECK_EQ(num_cols, new_col_id_arr->shape[0]) << "The new col Id array needs to be the same as the number of cols of " "COO"; // New row/col Ids. const IdType *new_row_ids = static_cast(new_row_id_arr->data); const IdType *new_col_ids = static_cast(new_col_id_arr->data); // Output COO NDArray out_row_arr = NDArray::Empty({nnz}, coo.row->dtype, coo.row->ctx); NDArray out_col_arr = NDArray::Empty({nnz}, coo.col->dtype, coo.col->ctx); NDArray out_data_arr = COOHasData(coo) ? coo.data : NullArray(); IdType *out_row = static_cast(out_row_arr->data); IdType *out_col = static_cast(out_col_arr->data); parallel_for(0, nnz, [=](size_t b, size_t e) { for (auto i = b; i < e; ++i) { out_row[i] = new_row_ids[in_rows[i]]; out_col[i] = new_col_ids[in_cols[i]]; } }); return COOMatrix(num_rows, num_cols, out_row_arr, out_col_arr, out_data_arr); } template COOMatrix COOReorder( COOMatrix csr, runtime::NDArray new_row_ids, runtime::NDArray new_col_ids); template COOMatrix COOReorder( COOMatrix csr, runtime::NDArray new_row_ids, runtime::NDArray new_col_ids); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cpu/spmat_op_impl_csr.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file array/cpu/spmat_op_impl_csr.cc * @brief CSR matrix operator CPU implementation */ #include #include #include #include #include #include #include "array_utils.h" namespace dgl { using runtime::NDArray; using runtime::parallel_for; namespace aten { namespace impl { ///////////////////////////// CSRIsNonZero ///////////////////////////// template bool CSRIsNonZero(CSRMatrix csr, int64_t row, int64_t col) { const IdType* indptr_data = static_cast(csr.indptr->data); const IdType* indices_data = static_cast(csr.indices->data); if (csr.sorted) { const IdType* start = indices_data + indptr_data[row]; const IdType* end = indices_data + indptr_data[row + 1]; return std::binary_search(start, end, col); } else { for (IdType i = indptr_data[row]; i < indptr_data[row + 1]; ++i) { if (indices_data[i] == col) { return true; } } } return false; } template bool CSRIsNonZero(CSRMatrix, int64_t, int64_t); template bool CSRIsNonZero(CSRMatrix, int64_t, int64_t); template NDArray CSRIsNonZero(CSRMatrix csr, NDArray row, NDArray col) { const auto rowlen = row->shape[0]; const auto collen = col->shape[0]; const auto rstlen = std::max(rowlen, collen); NDArray rst = NDArray::Empty({rstlen}, row->dtype, row->ctx); IdType* rst_data = static_cast(rst->data); const IdType* row_data = static_cast(row->data); const IdType* col_data = static_cast(col->data); const int64_t row_stride = (rowlen == 1 && collen != 1) ? 0 : 1; const int64_t col_stride = (collen == 1 && rowlen != 1) ? 0 : 1; runtime::parallel_for( 0, std::max(rowlen, collen), 1, [=](int64_t b, int64_t e) { int64_t i = (row_stride == 0) ? 0 : b; int64_t j = (col_stride == 0) ? 0 : b; for (int64_t k = b; i < e && j < e; i += row_stride, j += col_stride, ++k) rst_data[k] = CSRIsNonZero(csr, row_data[i], col_data[j]) ? 1 : 0; }); return rst; } template NDArray CSRIsNonZero(CSRMatrix, NDArray, NDArray); template NDArray CSRIsNonZero(CSRMatrix, NDArray, NDArray); ///////////////////////////// CSRHasDuplicate ///////////////////////////// template bool CSRHasDuplicate(CSRMatrix csr) { const IdType* indptr_data = static_cast(csr.indptr->data); const IdType* indices_data = static_cast(csr.indices->data); for (IdType src = 0; src < csr.num_rows; ++src) { std::unordered_set hashmap; for (IdType eid = indptr_data[src]; eid < indptr_data[src + 1]; ++eid) { const IdType dst = indices_data[eid]; if (hashmap.count(dst)) { return true; } else { hashmap.insert(dst); } } } return false; } template bool CSRHasDuplicate(CSRMatrix csr); template bool CSRHasDuplicate(CSRMatrix csr); ///////////////////////////// CSRGetRowNNZ ///////////////////////////// template int64_t CSRGetRowNNZ(CSRMatrix csr, int64_t row) { const IdType* indptr_data = static_cast(csr.indptr->data); return indptr_data[row + 1] - indptr_data[row]; } template int64_t CSRGetRowNNZ(CSRMatrix, int64_t); template int64_t CSRGetRowNNZ(CSRMatrix, int64_t); template NDArray CSRGetRowNNZ(CSRMatrix csr, NDArray rows) { CHECK_SAME_DTYPE(csr.indices, rows); const auto len = rows->shape[0]; const IdType* vid_data = static_cast(rows->data); const IdType* indptr_data = static_cast(csr.indptr->data); NDArray rst = NDArray::Empty({len}, rows->dtype, rows->ctx); IdType* rst_data = static_cast(rst->data); for (int64_t i = 0; i < len; ++i) { const auto vid = vid_data[i]; rst_data[i] = indptr_data[vid + 1] - indptr_data[vid]; } return rst; } template NDArray CSRGetRowNNZ(CSRMatrix, NDArray); template NDArray CSRGetRowNNZ(CSRMatrix, NDArray); /////////////////////////// CSRGetRowColumnIndices ///////////////////////////// template NDArray CSRGetRowColumnIndices(CSRMatrix csr, int64_t row) { const int64_t len = impl::CSRGetRowNNZ(csr, row); const IdType* indptr_data = static_cast(csr.indptr->data); const int64_t offset = indptr_data[row] * sizeof(IdType); return csr.indices.CreateView({len}, csr.indices->dtype, offset); } template NDArray CSRGetRowColumnIndices(CSRMatrix, int64_t); template NDArray CSRGetRowColumnIndices(CSRMatrix, int64_t); ///////////////////////////// CSRGetRowData ///////////////////////////// template NDArray CSRGetRowData(CSRMatrix csr, int64_t row) { const int64_t len = impl::CSRGetRowNNZ(csr, row); const IdType* indptr_data = static_cast(csr.indptr->data); const int64_t offset = indptr_data[row] * sizeof(IdType); if (CSRHasData(csr)) return csr.data.CreateView({len}, csr.data->dtype, offset); else return aten::Range( offset, offset + len, csr.indptr->dtype.bits, csr.indptr->ctx); } template NDArray CSRGetRowData(CSRMatrix, int64_t); template NDArray CSRGetRowData(CSRMatrix, int64_t); ///////////////////////////// CSRGetData ///////////////////////////// ///////////////////////////// CSRGetDataAndIndices ///////////////////////////// template void CollectDataIndicesFromSorted( const IdType* indices_data, const IdType* data, const IdType start, const IdType end, const IdType col, std::vector* col_vec, std::vector* ret_vec) { const IdType* start_ptr = indices_data + start; const IdType* end_ptr = indices_data + end; auto it = std::lower_bound(start_ptr, end_ptr, col); // This might be a multi-graph. We need to collect all of the matched // columns. for (; it != end_ptr; it++) { // If the col exist if (*it == col) { IdType idx = it - indices_data; col_vec->push_back(indices_data[idx]); ret_vec->push_back(data[idx]); } else { // If we find a column that is different, we can stop searching now. break; } } } template std::vector CSRGetDataAndIndices( CSRMatrix csr, NDArray rows, NDArray cols) { // TODO(minjie): more efficient implementation for matrix without duplicate // entries const int64_t rowlen = rows->shape[0]; const int64_t collen = cols->shape[0]; CHECK((rowlen == collen) || (rowlen == 1) || (collen == 1)) << "Invalid row and col id array."; const int64_t row_stride = (rowlen == 1 && collen != 1) ? 0 : 1; const int64_t col_stride = (collen == 1 && rowlen != 1) ? 0 : 1; const IdType* row_data = static_cast(rows->data); const IdType* col_data = static_cast(cols->data); const IdType* indptr_data = static_cast(csr.indptr->data); const IdType* indices_data = static_cast(csr.indices->data); const IdType* data = CSRHasData(csr) ? static_cast(csr.data->data) : nullptr; std::vector ret_rows, ret_cols; std::vector ret_data; for (int64_t i = 0, j = 0; i < rowlen && j < collen; i += row_stride, j += col_stride) { const IdType row_id = row_data[i], col_id = col_data[j]; CHECK(row_id >= 0 && row_id < csr.num_rows) << "Invalid row index: " << row_id; CHECK(col_id >= 0 && col_id < csr.num_cols) << "Invalid col index: " << col_id; if (csr.sorted) { // Here we collect col indices and data. CollectDataIndicesFromSorted( indices_data, data, indptr_data[row_id], indptr_data[row_id + 1], col_id, &ret_cols, &ret_data); // We need to add row Ids. while (ret_rows.size() < ret_data.size()) { ret_rows.push_back(row_id); } } else { for (IdType i = indptr_data[row_id]; i < indptr_data[row_id + 1]; ++i) { if (indices_data[i] == col_id) { ret_rows.push_back(row_id); ret_cols.push_back(col_id); ret_data.push_back(data ? data[i] : i); } } } } return { NDArray::FromVector(ret_rows, csr.indptr->ctx), NDArray::FromVector(ret_cols, csr.indptr->ctx), NDArray::FromVector(ret_data, csr.data->ctx)}; } template std::vector CSRGetDataAndIndices( CSRMatrix csr, NDArray rows, NDArray cols); template std::vector CSRGetDataAndIndices( CSRMatrix csr, NDArray rows, NDArray cols); ///////////////////////////// CSRTranspose ///////////////////////////// // for a matrix of shape (N, M) and NNZ // complexity: time O(NNZ + max(N, M)), space O(1) template CSRMatrix CSRTranspose(CSRMatrix csr) { const int64_t N = csr.num_rows; const int64_t M = csr.num_cols; const int64_t nnz = csr.indices->shape[0]; const IdType* Ap = static_cast(csr.indptr->data); const IdType* Aj = static_cast(csr.indices->data); const IdType* Ax = CSRHasData(csr) ? static_cast(csr.data->data) : nullptr; NDArray ret_indptr = NDArray::Empty({M + 1}, csr.indptr->dtype, csr.indptr->ctx); NDArray ret_indices = NDArray::Empty({nnz}, csr.indices->dtype, csr.indices->ctx); NDArray ret_data = NDArray::Empty({nnz}, csr.indptr->dtype, csr.indptr->ctx); IdType* Bp = static_cast(ret_indptr->data); IdType* Bi = static_cast(ret_indices->data); IdType* Bx = static_cast(ret_data->data); std::fill(Bp, Bp + M, 0); for (int64_t j = 0; j < nnz; ++j) { Bp[Aj[j]]++; } // cumsum for (int64_t i = 0, cumsum = 0; i < M; ++i) { const IdType temp = Bp[i]; Bp[i] = cumsum; cumsum += temp; } Bp[M] = nnz; for (int64_t i = 0; i < N; ++i) { for (IdType j = Ap[i]; j < Ap[i + 1]; ++j) { const IdType dst = Aj[j]; Bi[Bp[dst]] = i; Bx[Bp[dst]] = Ax ? Ax[j] : j; Bp[dst]++; } } // correct the indptr for (int64_t i = 0, last = 0; i <= M; ++i) { IdType temp = Bp[i]; Bp[i] = last; last = temp; } return CSRMatrix{ csr.num_cols, csr.num_rows, ret_indptr, ret_indices, ret_data}; } template CSRMatrix CSRTranspose(CSRMatrix csr); template CSRMatrix CSRTranspose(CSRMatrix csr); ///////////////////////////// CSRToCOO ///////////////////////////// template COOMatrix CSRToCOO(CSRMatrix csr) { const int64_t nnz = csr.indices->shape[0]; const IdType* indptr_data = static_cast(csr.indptr->data); NDArray ret_row = NDArray::Empty({nnz}, csr.indices->dtype, csr.indices->ctx); IdType* ret_row_data = static_cast(ret_row->data); parallel_for(0, csr.indptr->shape[0] - 1, 10000, [=](int64_t b, int64_t e) { for (auto i = b; i < e; ++i) { std::fill( ret_row_data + indptr_data[i], ret_row_data + indptr_data[i + 1], i); } }); return COOMatrix( csr.num_rows, csr.num_cols, ret_row, csr.indices, csr.data, true, csr.sorted); } template COOMatrix CSRToCOO(CSRMatrix csr); template COOMatrix CSRToCOO(CSRMatrix csr); // complexity: time O(NNZ), space O(1) template COOMatrix CSRToCOODataAsOrder(CSRMatrix csr) { const int64_t N = csr.num_rows; const int64_t M = csr.num_cols; const int64_t nnz = csr.indices->shape[0]; const IdType* indptr_data = static_cast(csr.indptr->data); const IdType* indices_data = static_cast(csr.indices->data); // data array should have the same type as the indices arrays const IdType* data = CSRHasData(csr) ? static_cast(csr.data->data) : nullptr; NDArray ret_row = NDArray::Empty({nnz}, csr.indices->dtype, csr.indices->ctx); NDArray ret_col = NDArray::Empty({nnz}, csr.indices->dtype, csr.indices->ctx); IdType* ret_row_data = static_cast(ret_row->data); IdType* ret_col_data = static_cast(ret_col->data); // scatter using the indices in the data array parallel_for(0, N, 10000, [=](int64_t b, int64_t e) { for (auto row = b; row < e; ++row) { for (IdType j = indptr_data[row]; j < indptr_data[row + 1]; ++j) { const IdType col = indices_data[j]; ret_row_data[data ? data[j] : j] = row; ret_col_data[data ? data[j] : j] = col; } } }); return COOMatrix(N, M, ret_row, ret_col); } template COOMatrix CSRToCOODataAsOrder(CSRMatrix csr); template COOMatrix CSRToCOODataAsOrder(CSRMatrix csr); ///////////////////////////// CSRSliceRows ///////////////////////////// template CSRMatrix CSRSliceRows(CSRMatrix csr, int64_t start, int64_t end) { const IdType* indptr = static_cast(csr.indptr->data); const int64_t num_rows = end - start; const int64_t nnz = indptr[end] - indptr[start]; IdArray ret_indptr = IdArray::Empty({num_rows + 1}, csr.indptr->dtype, csr.indices->ctx); IdType* r_indptr = static_cast(ret_indptr->data); for (int64_t i = start; i < end + 1; ++i) { r_indptr[i - start] = indptr[i] - indptr[start]; } // indices and data can be view arrays IdArray ret_indices = csr.indices.CreateView( {nnz}, csr.indices->dtype, indptr[start] * sizeof(IdType)); IdArray ret_data; if (CSRHasData(csr)) ret_data = csr.data.CreateView( {nnz}, csr.data->dtype, indptr[start] * sizeof(IdType)); else ret_data = aten::Range( indptr[start], indptr[end], csr.indptr->dtype.bits, csr.indptr->ctx); return CSRMatrix( num_rows, csr.num_cols, ret_indptr, ret_indices, ret_data, csr.sorted); } template CSRMatrix CSRSliceRows(CSRMatrix, int64_t, int64_t); template CSRMatrix CSRSliceRows(CSRMatrix, int64_t, int64_t); template CSRMatrix CSRSliceRows(CSRMatrix csr, NDArray rows) { CHECK_SAME_DTYPE(csr.indices, rows); const IdType* indptr_data = static_cast(csr.indptr->data); const IdType* indices_data = static_cast(csr.indices->data); const IdType* data = CSRHasData(csr) ? static_cast(csr.data->data) : nullptr; const auto len = rows->shape[0]; const IdType* rows_data = static_cast(rows->data); int64_t nnz = 0; CSRMatrix ret; ret.num_rows = len; ret.num_cols = csr.num_cols; ret.indptr = NDArray::Empty({len + 1}, csr.indptr->dtype, csr.indices->ctx); IdType* ret_indptr_data = static_cast(ret.indptr->data); ret_indptr_data[0] = 0; std::vector sums; std::atomic_flag err_flag = ATOMIC_FLAG_INIT; bool err = false; std::stringstream err_msg_stream; // Perform two-round parallel prefix sum using OpenMP #pragma omp parallel { int64_t tid = omp_get_thread_num(); int64_t num_threads = omp_get_num_threads(); #pragma omp single { sums.resize(num_threads + 1); sums[0] = 0; } int64_t sum = 0; // First round of parallel prefix sum. All threads perform local prefix sums. #pragma omp for schedule(static) nowait for (int64_t i = 0; i < len; ++i) { int64_t rid = rows_data[i]; if (rid >= csr.num_rows) { if (!err_flag.test_and_set()) { err_msg_stream << "expect row ID " << rid << " to be less than number of rows " << csr.num_rows; err = true; } } else { sum += indptr_data[rid + 1] - indptr_data[rid]; ret_indptr_data[i + 1] = sum; } } sums[tid + 1] = sum; #pragma omp barrier #pragma omp single { for (int64_t i = 1; i < num_threads; ++i) sums[i] += sums[i - 1]; } int64_t offset = sums[tid]; // Second round of parallel prefix sum. Update the local prefix sums. #pragma omp for schedule(static) for (int64_t i = 0; i < len; ++i) ret_indptr_data[i + 1] += offset; } if (err) { LOG(FATAL) << err_msg_stream.str(); return ret; } // After the prefix sum, the last element of ret_indptr_data holds the // sum of all elements nnz = ret_indptr_data[len]; ret.indices = NDArray::Empty({nnz}, csr.indices->dtype, csr.indices->ctx); ret.data = NDArray::Empty({nnz}, csr.indptr->dtype, csr.indptr->ctx); ret.sorted = csr.sorted; IdType* ret_indices_data = static_cast(ret.indices->data); IdType* ret_data = static_cast(ret.data->data); parallel_for(0, len, [=](int64_t b, int64_t e) { for (auto i = b; i < e; ++i) { const IdType rid = rows_data[i]; // note: zero is allowed std::copy( indices_data + indptr_data[rid], indices_data + indptr_data[rid + 1], ret_indices_data + ret_indptr_data[i]); if (data) std::copy( data + indptr_data[rid], data + indptr_data[rid + 1], ret_data + ret_indptr_data[i]); else std::iota( ret_data + ret_indptr_data[i], ret_data + ret_indptr_data[i + 1], indptr_data[rid]); } }); return ret; } template CSRMatrix CSRSliceRows(CSRMatrix, NDArray); template CSRMatrix CSRSliceRows(CSRMatrix, NDArray); ///////////////////////////// CSRSliceMatrix ///////////////////////////// template CSRMatrix CSRSliceMatrix( CSRMatrix csr, runtime::NDArray rows, runtime::NDArray cols) { IdHashMap hashmap(cols); const int64_t new_nrows = rows->shape[0]; const int64_t new_ncols = cols->shape[0]; const IdType* rows_data = static_cast(rows->data); const bool has_data = CSRHasData(csr); const IdType* indptr_data = static_cast(csr.indptr->data); const IdType* indices_data = static_cast(csr.indices->data); const IdType* data = has_data ? static_cast(csr.data->data) : nullptr; std::vector sub_indptr, sub_indices; std::vector sub_data; sub_indptr.resize(new_nrows + 1, 0); const IdType kInvalidId = new_ncols + 1; for (int64_t i = 0; i < new_nrows; ++i) { // NOTE: newi == i const IdType oldi = rows_data[i]; CHECK(oldi >= 0 && oldi < csr.num_rows) << "Invalid row index: " << oldi; for (IdType p = indptr_data[oldi]; p < indptr_data[oldi + 1]; ++p) { const IdType oldj = indices_data[p]; const IdType newj = hashmap.Map(oldj, kInvalidId); if (newj != kInvalidId) { ++sub_indptr[i]; sub_indices.push_back(newj); sub_data.push_back(has_data ? data[p] : p); } } } // cumsum sub_indptr for (int64_t i = 0, cumsum = 0; i < new_nrows; ++i) { const IdType temp = sub_indptr[i]; sub_indptr[i] = cumsum; cumsum += temp; } sub_indptr[new_nrows] = sub_indices.size(); const int64_t nnz = sub_data.size(); NDArray sub_data_arr = NDArray::Empty({nnz}, csr.indptr->dtype, csr.indptr->ctx); IdType* ptr = static_cast(sub_data_arr->data); std::copy(sub_data.begin(), sub_data.end(), ptr); return CSRMatrix{ new_nrows, new_ncols, NDArray::FromVector(sub_indptr, csr.indptr->ctx), NDArray::FromVector(sub_indices, csr.indptr->ctx), sub_data_arr}; } template CSRMatrix CSRSliceMatrix( CSRMatrix csr, runtime::NDArray rows, runtime::NDArray cols); template CSRMatrix CSRSliceMatrix( CSRMatrix csr, runtime::NDArray rows, runtime::NDArray cols); ///////////////////////////// CSRReorder ///////////////////////////// template CSRMatrix CSRReorder( CSRMatrix csr, runtime::NDArray new_row_id_arr, runtime::NDArray new_col_id_arr) { CHECK_SAME_DTYPE(csr.indices, new_row_id_arr); CHECK_SAME_DTYPE(csr.indices, new_col_id_arr); // Input CSR const IdType* in_indptr = static_cast(csr.indptr->data); const IdType* in_indices = static_cast(csr.indices->data); const IdType* in_data = static_cast(csr.data->data); int64_t num_rows = csr.num_rows; int64_t num_cols = csr.num_cols; int64_t nnz = csr.indices->shape[0]; CHECK_EQ(nnz, in_indptr[num_rows]); CHECK_EQ(num_rows, new_row_id_arr->shape[0]) << "The new row Id array needs to be the same as the number of rows of " "CSR"; CHECK_EQ(num_cols, new_col_id_arr->shape[0]) << "The new col Id array needs to be the same as the number of cols of " "CSR"; // New row/col Ids. const IdType* new_row_ids = static_cast(new_row_id_arr->data); const IdType* new_col_ids = static_cast(new_col_id_arr->data); // Output CSR NDArray out_indptr_arr = NDArray::Empty({num_rows + 1}, csr.indptr->dtype, csr.indptr->ctx); NDArray out_indices_arr = NDArray::Empty({nnz}, csr.indices->dtype, csr.indices->ctx); NDArray out_data_arr = NDArray::Empty({nnz}, csr.data->dtype, csr.data->ctx); IdType* out_indptr = static_cast(out_indptr_arr->data); IdType* out_indices = static_cast(out_indices_arr->data); IdType* out_data = static_cast(out_data_arr->data); // Compute the length of rows for the new matrix. std::vector new_row_lens(num_rows, -1); parallel_for(0, num_rows, [=, &new_row_lens](size_t b, size_t e) { for (auto i = b; i < e; ++i) { int64_t new_row_id = new_row_ids[i]; new_row_lens[new_row_id] = in_indptr[i + 1] - in_indptr[i]; } }); // Compute the starting location of each row in the new matrix. out_indptr[0] = 0; // This is sequential. It should be pretty fast. for (int64_t i = 0; i < num_rows; i++) { CHECK_GE(new_row_lens[i], 0); out_indptr[i + 1] = out_indptr[i] + new_row_lens[i]; } CHECK_EQ(out_indptr[num_rows], nnz); // Copy indieces and data with the new order. // Here I iterate rows in the order of the old matrix. parallel_for(0, num_rows, [=](size_t b, size_t e) { for (auto i = b; i < e; ++i) { const IdType* in_row = in_indices + in_indptr[i]; const IdType* in_row_data = in_data + in_indptr[i]; int64_t new_row_id = new_row_ids[i]; IdType* out_row = out_indices + out_indptr[new_row_id]; IdType* out_row_data = out_data + out_indptr[new_row_id]; int64_t row_len = new_row_lens[new_row_id]; // Here I iterate col indices in a row in the order of the old matrix. for (int64_t j = 0; j < row_len; j++) { out_row[j] = new_col_ids[in_row[j]]; out_row_data[j] = in_row_data[j]; } // TODO(zhengda) maybe we should sort the column indices. } }); return CSRMatrix( num_rows, num_cols, out_indptr_arr, out_indices_arr, out_data_arr); } template CSRMatrix CSRReorder( CSRMatrix csr, runtime::NDArray new_row_ids, runtime::NDArray new_col_ids); template CSRMatrix CSRReorder( CSRMatrix csr, runtime::NDArray new_row_ids, runtime::NDArray new_col_ids); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cpu/spmm.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file kernel/cpu/spmm.cc * @brief SPMM C APIs and definitions. */ #include "./spmm.h" #include namespace dgl { namespace aten { /** @brief Generalized SpMM on Csr format. */ template void SpMMCsr( const std::string& op, const std::string& reduce, const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux) { const int64_t dim = bcast.out_len; if (reduce == "sum") { SWITCH_OP(op, Op, { cpu::SpMMSumCsr(bcast, csr, ufeat, efeat, out); }); } else if (reduce == "max" || reduce == "min") { SWITCH_OP(op, Op, { DType* out_off = out.Ptr(); if (reduce == "max") { std::fill( out_off, out_off + csr.num_rows * dim, cpu::op::Max::zero); cpu::SpMMCmpCsr>( bcast, csr, ufeat, efeat, out, out_aux[0], out_aux[1]); } else { std::fill( out_off, out_off + csr.num_rows * dim, cpu::op::Min::zero); cpu::SpMMCmpCsr>( bcast, csr, ufeat, efeat, out, out_aux[0], out_aux[1]); } }); } else { LOG(FATAL) << "Unsupported SpMM reducer: " << reduce; } } /** @brief Generalized SpMM on Csr format. */ template void SpMMCsrHetero( const std::string& op, const std::string& reduce, const BcastOff& bcast, const std::vector& vec_csr, const std::vector& vec_ufeat, const std::vector& vec_efeat, std::vector* vec_out, std::vector>* out_aux, const std::vector& ufeat_node_tids, const std::vector& out_node_tids) { const int64_t dim = bcast.out_len; if (reduce == "sum") { SWITCH_OP(op, Op, { /* Call SpMM for each relation type */ for (dgl_type_t etype = 0; etype < ufeat_node_tids.size(); ++etype) { const dgl_type_t src_id = ufeat_node_tids[etype]; const dgl_type_t dst_id = out_node_tids[etype]; CSRMatrix csr = vec_csr[etype]; NDArray ufeat = (vec_ufeat.size() == 0) ? NullArray() : vec_ufeat[src_id]; NDArray efeat = (vec_efeat.size() == 0) ? NullArray() : vec_efeat[etype]; NDArray out = (*vec_out)[dst_id]; cpu::SpMMSumCsr(bcast, csr, ufeat, efeat, out); } }); } else if (reduce == "max" || reduce == "min") { SWITCH_OP(op, Op, { std::vector updated((*vec_out).size(), false); // TODO(Israt): use vector updated to fill(out...) too for (dgl_type_t etype = 0; etype < ufeat_node_tids.size(); ++etype) { DType* out_off = (*vec_out)[out_node_tids[etype]].Ptr(); if (reduce == "max") std::fill( out_off, out_off + vec_csr[etype].num_rows * dim, cpu::op::Max::zero); else std::fill( out_off, out_off + vec_csr[etype].num_rows * dim, cpu::op::Min::zero); const dgl_type_t dst_id = out_node_tids[etype]; if (!updated[dst_id]) { updated[dst_id] = true; if (Op::use_lhs) { IdType* argu_ntype = (*out_aux)[2][dst_id].Ptr(); std::fill( argu_ntype, argu_ntype + vec_csr[etype].num_rows * dim, -1); } if (Op::use_rhs) { IdType* arge_etype = (*out_aux)[3][dst_id].Ptr(); std::fill( arge_etype, arge_etype + vec_csr[etype].num_rows * dim, -1); } } } /* Call SpMM for each relation type */ for (dgl_type_t etype = 0; etype < ufeat_node_tids.size(); ++etype) { const dgl_type_t src_id = ufeat_node_tids[etype]; const dgl_type_t dst_id = out_node_tids[etype]; CSRMatrix csr = vec_csr[etype]; NDArray ufeat = (vec_ufeat.size() == 0) ? NullArray() : vec_ufeat[src_id]; NDArray efeat = (vec_efeat.size() == 0) ? NullArray() : vec_efeat[etype]; NDArray out = (*vec_out)[dst_id]; if (reduce == "max") { cpu::SpMMCmpCsrHetero>( bcast, csr, ufeat, efeat, out, (*out_aux)[0][dst_id], (*out_aux)[1][dst_id], (*out_aux)[2][dst_id], (*out_aux)[3][dst_id], src_id, etype); } else { cpu::SpMMCmpCsrHetero>( bcast, csr, ufeat, efeat, out, (*out_aux)[0][dst_id], (*out_aux)[1][dst_id], (*out_aux)[2][dst_id], (*out_aux)[3][dst_id], src_id, etype); } } }); } else { LOG(FATAL) << "Unsupported SpMM reducer: " << reduce; } } template void SpMMCsr( const std::string& op, const std::string& reduce, const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); template void SpMMCsr( const std::string& op, const std::string& reduce, const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); template void SpMMCsr( const std::string& op, const std::string& reduce, const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); template void SpMMCsr( const std::string& op, const std::string& reduce, const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); template void SpMMCsr( const std::string& op, const std::string& reduce, const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); template void SpMMCsr( const std::string& op, const std::string& reduce, const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); template void SpMMCsrHetero( const std::string& op, const std::string& reduce, const BcastOff& bcast, const std::vector& csr, const std::vector& ufeat, const std::vector& efeat, std::vector* out, std::vector>* out_aux, const std::vector& ufeat_node_tids, const std::vector& out_node_tids); template void SpMMCsrHetero( const std::string& op, const std::string& reduce, const BcastOff& bcast, const std::vector& csr, const std::vector& ufeat, const std::vector& efeat, std::vector* out, std::vector>* out_aux, const std::vector& ufeat_node_tids, const std::vector& out_node_tids); template void SpMMCsrHetero( const std::string& op, const std::string& reduce, const BcastOff& bcast, const std::vector& csr, const std::vector& ufeat, const std::vector& efeat, std::vector* out, std::vector>* out_aux, const std::vector& ufeat_node_tids, const std::vector& out_node_tids); template void SpMMCsrHetero( const std::string& op, const std::string& reduce, const BcastOff& bcast, const std::vector& csr, const std::vector& ufeat, const std::vector& efeat, std::vector* out, std::vector>* out_aux, const std::vector& ufeat_node_tids, const std::vector& out_node_tids); template void SpMMCsrHetero( const std::string& op, const std::string& reduce, const BcastOff& bcast, const std::vector& csr, const std::vector& ufeat, const std::vector& efeat, std::vector* out, std::vector>* out_aux, const std::vector& ufeat_node_tids, const std::vector& out_node_tids); template void SpMMCsrHetero( const std::string& op, const std::string& reduce, const BcastOff& bcast, const std::vector& csr, const std::vector& ufeat, const std::vector& efeat, std::vector* out, std::vector>* out_aux, const std::vector& ufeat_node_tids, const std::vector& out_node_tids); /** @brief Edge_softmax_csr forward op on Csr format. */ template void Edge_softmax_csr_forward( const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out) { SWITCH_OP(op, Op, { cpu::Edge_softmax_csr_forward( bcast, csr, ufeat, efeat, out); }); } /** @brief Edge_softmax_csr backward op on Csr format. */ template void Edge_softmax_csr_backward( const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, NDArray out, NDArray sds, NDArray back_out) { SWITCH_OP(op, Op, { cpu::Edge_softmax_csr_backward( bcast, csr, out, sds, back_out); }); } template void Edge_softmax_csr_forward( const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out); template void Edge_softmax_csr_forward( const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out); template void Edge_softmax_csr_forward( const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out); template void Edge_softmax_csr_forward( const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out); template void Edge_softmax_csr_forward( const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out); template void Edge_softmax_csr_forward( const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out); template void Edge_softmax_csr_backward( const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out); template void Edge_softmax_csr_backward( const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out); template void Edge_softmax_csr_backward( const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out); template void Edge_softmax_csr_backward( const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out); template void Edge_softmax_csr_backward( const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out); template void Edge_softmax_csr_backward( const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out); /** @brief Generalized SpMM on Coo format. */ template void SpMMCoo( const std::string& op, const std::string& reduce, const BcastOff& bcast, const COOMatrix& coo, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux) { if (reduce == "sum") { SWITCH_OP(op, Op, { cpu::SpMMSumCoo(bcast, coo, ufeat, efeat, out); }); } else if (reduce == "max" || reduce == "min") { SWITCH_OP(op, Op, { if (reduce == "max") cpu::SpMMCmpCoo>( bcast, coo, ufeat, efeat, out, out_aux[0], out_aux[1]); else cpu::SpMMCmpCoo>( bcast, coo, ufeat, efeat, out, out_aux[0], out_aux[1]); }); } else { LOG(FATAL) << "Unsupported SpMM reducer: " << reduce; } } template void SpMMCoo( const std::string& op, const std::string& reduce, const BcastOff& bcast, const COOMatrix& coo, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); template void SpMMCoo( const std::string& op, const std::string& reduce, const BcastOff& bcast, const COOMatrix& coo, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); template void SpMMCoo( const std::string& op, const std::string& reduce, const BcastOff& bcast, const COOMatrix& coo, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); template void SpMMCoo( const std::string& op, const std::string& reduce, const BcastOff& bcast, const COOMatrix& coo, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); template void SpMMCoo( const std::string& op, const std::string& reduce, const BcastOff& bcast, const COOMatrix& coo, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); template void SpMMCoo( const std::string& op, const std::string& reduce, const BcastOff& bcast, const COOMatrix& coo, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); } // namespace aten } // namespace dgl ================================================ FILE: src/array/cpu/spmm.h ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cpu/spmm.h * @brief SPMM CPU kernel function header. */ #ifndef DGL_ARRAY_CPU_SPMM_H_ #define DGL_ARRAY_CPU_SPMM_H_ #include #include #include #include #include #include #include #include #include #include "spmm_binary_ops.h" #if !defined(_WIN32) #ifdef USE_LIBXSMM #include "spmm_blocking_libxsmm.h" #endif // USE_LIBXSMM #endif // _WIN32 namespace dgl { namespace aten { namespace cpu { template using AccType = typename std::conditional< std::is_same::value, float, DType>::type; /** * @brief Naive CPU kernel of SpMM on Csr format. * @param cpu_spec JIT'ed kernel * @param bcast Broadcast information. * @param csr The Csr matrix. * @param X The feature on source nodes. * @param W The feature on edges. * @param O The result feature on destination nodes. * @note it uses node parallel strategy, different threads are responsible * for the computation of different nodes. */ template typename std::enable_if::value, void>::type SpMMSumCsrNaive( const BcastOff& bcast, const CSRMatrix& csr, const DType* X, const DType* W, DType* O) { const bool has_idx = !IsNullArray(csr.data); const IdType* indptr = csr.indptr.Ptr(); const IdType* indices = csr.indices.Ptr(); const IdType* edges = csr.data.Ptr(); int64_t dim = bcast.out_len, lhs_dim = bcast.lhs_len, rhs_dim = bcast.rhs_len; runtime::parallel_for(0, csr.num_rows, [&](size_t b, size_t e) { for (auto rid = b; rid < e; ++rid) { const IdType row_start = indptr[rid], row_end = indptr[rid + 1]; DType* out_off = O + rid * dim; for (IdType j = row_start; j < row_end; ++j) { const IdType cid = indices[j]; const IdType eid = has_idx ? edges[j] : j; for (int64_t k = 0; k < dim; ++k) { const int64_t lhs_add = bcast.use_bcast ? bcast.lhs_offset[k] : k; const int64_t rhs_add = bcast.use_bcast ? bcast.rhs_offset[k] : k; const DType* lhs_off = Op::use_lhs ? X + cid * lhs_dim + lhs_add : nullptr; const DType* rhs_off = Op::use_rhs ? W + eid * rhs_dim + rhs_add : nullptr; out_off[k] += Op::Call(lhs_off, rhs_off); } } } }); } // Naive implementation with additional accumulator, which prevents accuracy // degradation in less precise data types, like bfloat16. template typename std::enable_if::value, void>::type SpMMSumCsrNaive( const BcastOff& bcast, const CSRMatrix& csr, const DType* X, const DType* W, DType* O) { const bool has_idx = !IsNullArray(csr.data); const IdType* indptr = csr.indptr.Ptr(); const IdType* indices = csr.indices.Ptr(); const IdType* edges = csr.data.Ptr(); int64_t dim = bcast.out_len, lhs_dim = bcast.lhs_len, rhs_dim = bcast.rhs_len; runtime::parallel_for(0, csr.num_rows, [&](size_t b, size_t e) { for (auto rid = b; rid < e; ++rid) { const IdType row_start = indptr[rid], row_end = indptr[rid + 1]; DType* out_off = O + rid * dim; for (int64_t k = 0; k < dim; ++k) { AccType acc = 0.; for (IdType j = row_start; j < row_end; ++j) { const IdType cid = indices[j]; const IdType eid = has_idx ? edges[j] : j; const int64_t lhs_add = bcast.use_bcast ? bcast.lhs_offset[k] : k; const int64_t rhs_add = bcast.use_bcast ? bcast.rhs_offset[k] : k; const DType* lhs_off = Op::use_lhs ? X + cid * lhs_dim + lhs_add : nullptr; const DType* rhs_off = Op::use_rhs ? W + eid * rhs_dim + rhs_add : nullptr; acc += Op::Call(lhs_off, rhs_off); } out_off[k] += acc; } } }); } /** * @brief CPU kernel of SpMM on Csr format. * @param bcast Broadcast information. * @param csr The Csr matrix. * @param ufeat The feature on source nodes. * @param efeat The feature on edges. * @param out The result feature on destination nodes. * @note it uses node parallel strategy, different threads are responsible * for the computation of different nodes. */ template void SpMMSumCsr( const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out) { const bool has_idx = !IsNullArray(csr.data); const IdType* indptr = csr.indptr.Ptr(); const IdType* indices = csr.indices.Ptr(); const IdType* edges = csr.data.Ptr(); const DType* X = ufeat.Ptr(); const DType* W = efeat.Ptr(); DType* O = out.Ptr(); CHECK_NOTNULL(indptr); CHECK_NOTNULL(O); if (Op::use_lhs) { CHECK_NOTNULL(indices); CHECK_NOTNULL(X); } if (Op::use_rhs) { if (has_idx) CHECK_NOTNULL(edges); CHECK_NOTNULL(W); } #if !defined(_WIN32) #ifdef USE_LIBXSMM int cpu_id = libxsmm_cpuid_x86(); const bool no_libxsmm = bcast.use_bcast || std::is_same::value || (std::is_same::value && cpu_id < LIBXSMM_X86_AVX512) || !dgl::runtime::Config::Global()->IsLibxsmmAvailable(); if (!no_libxsmm) { SpMMSumCsrLibxsmm(bcast, csr, ufeat, efeat, out); } else { #endif // USE_LIBXSMM #endif // _WIN32 SpMMSumCsrNaive(bcast, csr, X, W, O); #if !defined(_WIN32) #ifdef USE_LIBXSMM } #endif // USE_LIBXSMM #endif // _WIN32 } /** * @brief CPU kernel of SpMM on Coo format. * @param bcast Broadcast information. * @param coo The Coo matrix. * @param ufeat The feature on source nodes. * @param efeat The feature on edges. * @param out The result feature on destination nodes. * @note it uses node parallel strategy, different threads are responsible * for the computation of different nodes. To avoid possible data hazard, * we use atomic operators in the reduction phase. */ template typename std::enable_if::value, void>::type SpMMSumCoo( const BcastOff& bcast, const COOMatrix& coo, NDArray ufeat, NDArray efeat, NDArray out) { const bool has_idx = !IsNullArray(coo.data); const IdType* row = coo.row.Ptr(); const IdType* col = coo.col.Ptr(); const IdType* edges = coo.data.Ptr(); const DType* X = ufeat.Ptr(); const DType* W = efeat.Ptr(); int64_t dim = bcast.out_len, lhs_dim = bcast.lhs_len, rhs_dim = bcast.rhs_len; DType* O = out.Ptr(); const int64_t nnz = coo.row->shape[0]; // fill zero elements memset(O, 0, out.GetSize()); // spmm #pragma omp parallel for for (IdType i = 0; i < nnz; ++i) { const IdType rid = row[i]; const IdType cid = col[i]; const IdType eid = has_idx ? edges[i] : i; DType* out_off = O + cid * dim; for (int64_t k = 0; k < dim; ++k) { const int64_t lhs_add = bcast.use_bcast ? bcast.lhs_offset[k] : k; const int64_t rhs_add = bcast.use_bcast ? bcast.rhs_offset[k] : k; const DType* lhs_off = Op::use_lhs ? X + rid * lhs_dim + lhs_add : nullptr; const DType* rhs_off = Op::use_rhs ? W + eid * rhs_dim + rhs_add : nullptr; const DType val = Op::Call(lhs_off, rhs_off); if (val != 0) { #pragma omp atomic out_off[k] += val; } } } } template typename std::enable_if::value, void>::type SpMMSumCoo( const BcastOff& bcast, const COOMatrix& coo, NDArray ufeat, NDArray efeat, NDArray out) { LOG(FATAL) << "Unsupported CPU kernel for SpMMSumCoo for BF16."; } /** * @brief CPU kernel of SpMM-Min/Max on Csr format. * @param bcast Broadcast information. * @param csr The Csr matrix. * @param ufeat The feature on source nodes. * @param efeat The feature on edges. * @param out The result feature on destination nodes. * @param argu Arg-Min/Max on source nodes, which refers the source node indices * correspond to the minimum/maximum values of reduction result on * destination nodes. It's useful in computing gradients of Min/Max * reducer. * @param arge Arg-Min/Max on edges. which refers the source node indices correspond to the minimum/maximum values of reduction result on * destination nodes. It's useful in computing gradients of Min/Max * reducer. * @note It uses node parallel strategy, different threads are responsible for * the computation of different nodes. * @note The result will contain infinity for zero-degree nodes. */ template void SpMMCmpCsr( const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out, NDArray argu, NDArray arge) { const bool has_idx = !IsNullArray(csr.data); const IdType* indptr = static_cast(csr.indptr->data); const IdType* indices = static_cast(csr.indices->data); const IdType* edges = has_idx ? static_cast(csr.data->data) : nullptr; const DType* X = Op::use_lhs ? static_cast(ufeat->data) : nullptr; const DType* W = Op::use_rhs ? static_cast(efeat->data) : nullptr; const int64_t dim = bcast.out_len, lhs_dim = bcast.lhs_len, rhs_dim = bcast.rhs_len; DType* O = static_cast(out->data); IdType* argX = Op::use_lhs ? static_cast(argu->data) : nullptr; IdType* argW = Op::use_rhs ? static_cast(arge->data) : nullptr; CHECK_NOTNULL(indptr); CHECK_NOTNULL(O); if (Op::use_lhs) { CHECK_NOTNULL(indices); CHECK_NOTNULL(X); CHECK_NOTNULL(argX); } if (Op::use_rhs) { if (has_idx) CHECK_NOTNULL(edges); CHECK_NOTNULL(W); CHECK_NOTNULL(argW); } #if !defined(_WIN32) #ifdef USE_LIBXSMM int cpu_id = libxsmm_cpuid_x86(); const bool no_libxsmm = bcast.use_bcast || std::is_same::value || cpu_id < LIBXSMM_X86_AVX512 || !dgl::runtime::Config::Global()->IsLibxsmmAvailable(); if (!no_libxsmm) { SpMMCmpCsrLibxsmm( bcast, csr, ufeat, efeat, out, argu, arge); } else { #endif // USE_LIBXSMM #endif // _WIN32 runtime::parallel_for(0, csr.num_rows, [&](size_t b, size_t e) { for (auto rid = b; rid < e; ++rid) { const IdType row_start = indptr[rid], row_end = indptr[rid + 1]; DType* out_off = O + rid * dim; IdType* argx_off = argX + rid * dim; IdType* argw_off = argW + rid * dim; for (IdType j = row_start; j < row_end; ++j) { const IdType cid = indices[j]; const IdType eid = has_idx ? edges[j] : j; for (int64_t k = 0; k < dim; ++k) { const int64_t lhs_add = bcast.use_bcast ? bcast.lhs_offset[k] : k; const int64_t rhs_add = bcast.use_bcast ? bcast.rhs_offset[k] : k; const DType* lhs_off = Op::use_lhs ? X + cid * lhs_dim + lhs_add : nullptr; const DType* rhs_off = Op::use_rhs ? W + eid * rhs_dim + rhs_add : nullptr; const DType val = Op::Call(lhs_off, rhs_off); if (Cmp::Call(out_off[k], val)) { out_off[k] = val; if (Op::use_lhs) argx_off[k] = cid; if (Op::use_rhs) argw_off[k] = eid; } } } } }); #if !defined(_WIN32) #ifdef USE_LIBXSMM } #endif // USE_LIBXSMM #endif // _WIN32 } /** * @brief CPU kernel of SpMM-Min/Max on Csr format. * @param bcast Broadcast information. * @param csr The Csr matrix. * @param ufeat The feature on source nodes. * @param efeat The feature on edges. * @param out The result feature on destination nodes. * @param argu Arg-Min/Max on source nodes, which refers the source node indices * correspond to the minimum/maximum values of reduction result on * destination nodes. It's useful in computing gradients of Min/Max * reducer. * @param arge Arg-Min/Max on edges. which refers the source node indices * correspond to the minimum/maximum values of reduction result on * destination nodes. It's useful in computing gradients of Min/Max * reducer. * @param argu_ntype Node type of the arg-Min/Max on source nodes, which refers * the source node types correspond to the minimum/maximum values of * reduction result on destination nodes. It's useful in computing * gradients of Min/Max reducer. * @param arge_etype Edge-type of the arg-Min/Max on edges. which refers the * source node indices correspond to the minimum/maximum values of * reduction result on destination nodes. It's useful in computing * gradients of Min/Max reducer. * @param src_type Node type of the source nodes of an etype * @param etype Edge type */ template void SpMMCmpCsrHetero( const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out, NDArray argu, NDArray arge, NDArray argu_ntype, NDArray arge_etype, const int ntype, const int etype) { const bool has_idx = !IsNullArray(csr.data); const IdType* indptr = static_cast(csr.indptr->data); const IdType* indices = static_cast(csr.indices->data); const IdType* edges = has_idx ? static_cast(csr.data->data) : nullptr; const DType* X = Op::use_lhs ? static_cast(ufeat->data) : nullptr; const DType* W = Op::use_rhs ? static_cast(efeat->data) : nullptr; const int64_t dim = bcast.out_len, lhs_dim = bcast.lhs_len, rhs_dim = bcast.rhs_len; DType* O = static_cast(out->data); IdType* argX = Op::use_lhs ? static_cast(argu->data) : nullptr; IdType* argW = Op::use_rhs ? static_cast(arge->data) : nullptr; IdType* argX_ntype = Op::use_lhs ? static_cast(argu_ntype->data) : nullptr; IdType* argW_etype = Op::use_rhs ? static_cast(arge_etype->data) : nullptr; CHECK_NOTNULL(indptr); CHECK_NOTNULL(O); if (Op::use_lhs) { CHECK_NOTNULL(indices); CHECK_NOTNULL(X); CHECK_NOTNULL(argX); } if (Op::use_rhs) { if (has_idx) CHECK_NOTNULL(edges); CHECK_NOTNULL(W); CHECK_NOTNULL(argW); } // TODO(Israt): Use LIBXSMM. Homogeneous graph uses LIBXMM when enabled. runtime::parallel_for(0, csr.num_rows, [&](size_t b, size_t e) { for (auto rid = b; rid < e; ++rid) { const IdType row_start = indptr[rid], row_end = indptr[rid + 1]; DType* out_off = O + rid * dim; IdType* argx_off = argX + rid * dim; IdType* argw_off = argW + rid * dim; IdType* argx_ntype = argX_ntype + rid * dim; IdType* argw_etype = argW_etype + rid * dim; for (IdType j = row_start; j < row_end; ++j) { const IdType cid = indices[j]; const IdType eid = has_idx ? edges[j] : j; for (int64_t k = 0; k < dim; ++k) { const int64_t lhs_add = bcast.use_bcast ? bcast.lhs_offset[k] : k; const int64_t rhs_add = bcast.use_bcast ? bcast.rhs_offset[k] : k; const DType* lhs_off = Op::use_lhs ? X + cid * lhs_dim + lhs_add : nullptr; const DType* rhs_off = Op::use_rhs ? W + eid * rhs_dim + rhs_add : nullptr; const DType val = Op::Call(lhs_off, rhs_off); if (Cmp::Call(out_off[k], val)) { out_off[k] = val; if (Op::use_lhs) { argx_off[k] = cid; argx_ntype[k] = ntype; } if (Op::use_rhs) { argw_off[k] = eid; argw_etype[k] = etype; } } } } } }); } /** * @brief CPU kernel of SpMM-Min/Max on Coo format. * @param bcast Broadcast information. * @param coo The Coo matrix. * @param ufeat The feature on source nodes. * @param efeat The feature on edges. * @param out The result feature on destination nodes. * @param argu Arg-Min/Max on source nodes, which refers the source node indices * correspond to the minimum/maximum values of reduction result on * destination nodes. It's useful in computing gradients of Min/Max * reducer. * @param arge Arg-Min/Max on edges. which refers the source node indices * correspond to the minimum/maximum values of reduction result on * destination nodes. It's useful in computing gradients of Min/Max * reducer. * @note it uses node parallel strategy, different threads are responsible for * the computation of different nodes. To avoid possible data hazard, we * use atomic operators in the reduction phase. * @note The result will contain infinity for zero-degree nodes. */ template void SpMMCmpCoo( const BcastOff& bcast, const COOMatrix& coo, NDArray ufeat, NDArray efeat, NDArray out, NDArray argu, NDArray arge) { const bool has_idx = !IsNullArray(coo.data); const IdType* row = static_cast(coo.row->data); const IdType* col = static_cast(coo.col->data); const IdType* edges = has_idx ? static_cast(coo.data->data) : nullptr; const DType* X = Op::use_lhs ? static_cast(ufeat->data) : nullptr; const DType* W = Op::use_rhs ? static_cast(efeat->data) : nullptr; const int64_t dim = bcast.out_len, lhs_dim = bcast.lhs_len, rhs_dim = bcast.rhs_len; DType* O = static_cast(out->data); IdType* argX = Op::use_lhs ? static_cast(argu->data) : nullptr; IdType* argW = Op::use_rhs ? static_cast(arge->data) : nullptr; const int64_t nnz = coo.row->shape[0]; // fill zero elements std::fill(O, O + out.NumElements(), Cmp::zero); // spmm #pragma omp parallel for for (IdType i = 0; i < nnz; ++i) { const IdType rid = row[i]; const IdType cid = col[i]; const IdType eid = has_idx ? edges[i] : i; DType* out_off = O + cid * dim; IdType* argx_off = Op::use_lhs ? argX + cid * dim : nullptr; IdType* argw_off = Op::use_rhs ? argW + cid * dim : nullptr; for (int64_t k = 0; k < dim; ++k) { const int64_t lhs_add = bcast.use_bcast ? bcast.lhs_offset[k] : k; const int64_t rhs_add = bcast.use_bcast ? bcast.rhs_offset[k] : k; const DType* lhs_off = Op::use_lhs ? X + rid * lhs_dim + lhs_add : nullptr; const DType* rhs_off = Op::use_rhs ? W + eid * rhs_dim + rhs_add : nullptr; const DType val = Op::Call(lhs_off, rhs_off); #pragma omp critical if (Cmp::Call(out_off[k], val)) { out_off[k] = val; if (Op::use_lhs) argx_off[k] = rid; if (Op::use_rhs) argw_off[k] = eid; } } } } /** * @brief CPU kernel of Edge_softmax_csr_forward on Csr format. * @param bcast Broadcast information. * @param csr The Csr matrix. * @param ufeat The feature on source nodes. * @param efeat The feature on edges. * @param out The result of edge_softmax_forward. */ template void Edge_softmax_csr_forward( const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out) { const bool has_idx = !IsNullArray(csr.data); const IdType* indptr = static_cast(csr.indptr->data); const IdType* edges = has_idx ? static_cast(csr.data->data) : nullptr; const DType* W = Op::use_rhs ? static_cast(efeat->data) : nullptr; const int64_t dim = bcast.out_len, rhs_dim = bcast.rhs_len; runtime::parallel_for(0, csr.num_rows, [&](size_t b, size_t e) { for (auto rid = b; rid < e; ++rid) { const IdType row_start = indptr[rid], row_end = indptr[rid + 1]; std::vector> data_e(row_end - row_start, 0); std::vector num(row_end - row_start, 0); for (int64_t k = 0; k < dim; ++k) { DType max_v = -std::numeric_limits::infinity(); for (IdType j = row_start; j < row_end; ++j) { const IdType eid = has_idx ? edges[j] : j; const int64_t rhs_add = bcast.use_bcast ? bcast.rhs_offset[k] : k; const DType* rhs_off = Op::use_rhs ? W + eid * rhs_dim + rhs_add : nullptr; data_e[j - row_start] = *rhs_off; num[j - row_start] = eid * rhs_dim + rhs_add; max_v = std::max(max_v, (*rhs_off)); } DType exp_sum = 0; for (auto& element : data_e) { element -= max_v; element = std::exp(element); exp_sum += element; } for (int i = 0; i < row_end - row_start; i++) { out.Ptr()[num[i]] = data_e[i] / exp_sum; } } } }); } /** * @brief CPU kernel of Edge_softmax_csr_backward on Csr format. * @param bcast Broadcast information. * @param csr The Csr matrix. * @param out The result of forward. * @param sds The result of gradiet * out. * @param back_out The result of edge_softmax_backward. */ template void Edge_softmax_csr_backward( const BcastOff& bcast, const CSRMatrix& csr, NDArray out, NDArray sds, NDArray back_out) { typedef typename std::conditional< std::is_same::value, float, DType>::type AccType; const bool has_idx = !IsNullArray(csr.data); const IdType* indptr = static_cast(csr.indptr->data); const IdType* edges = has_idx ? static_cast(csr.data->data) : nullptr; const DType* W_out = Op::use_rhs ? static_cast(out->data) : nullptr; const DType* W_sds = Op::use_rhs ? static_cast(sds->data) : nullptr; const int64_t dim = bcast.out_len, rhs_dim = bcast.rhs_len; runtime::parallel_for(0, csr.num_rows, [&](size_t b, size_t e) { for (auto rid = b; rid < e; ++rid) { const IdType row_start = indptr[rid], row_end = indptr[rid + 1]; for (int64_t k = 0; k < dim; ++k) { AccType sum_sds = 0; for (IdType j = row_start; j < row_end; ++j) { const IdType eid = has_idx ? edges[j] : j; const int64_t rhs_add = bcast.use_bcast ? bcast.rhs_offset[k] : k; const DType* rhs_off_sds = Op::use_rhs ? W_sds + eid * rhs_dim + rhs_add : nullptr; sum_sds += (*rhs_off_sds); } for (IdType j = row_start; j < row_end; ++j) { const IdType eid = has_idx ? edges[j] : j; const int64_t rhs_add = bcast.use_bcast ? bcast.rhs_offset[k] : k; const DType* rhs_off_out = Op::use_rhs ? W_out + eid * rhs_dim + rhs_add : nullptr; const DType* rhs_off_sds = Op::use_rhs ? W_sds + eid * rhs_dim + rhs_add : nullptr; back_out.Ptr()[eid * rhs_dim + rhs_add] = (*rhs_off_sds) - sum_sds * (*rhs_off_out); } } } }); } } // namespace cpu } // namespace aten } // namespace dgl #endif // DGL_ARRAY_CPU_SPMM_H_ ================================================ FILE: src/array/cpu/spmm_binary_ops.h ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cpu/spmm_binary_ops.h * @brief SPMM CPU Binary ops. */ #ifndef DGL_ARRAY_CPU_SPMM_BINARY_OPS_H_ #define DGL_ARRAY_CPU_SPMM_BINARY_OPS_H_ #include #include #include namespace dgl { namespace aten { namespace cpu { namespace op { //////////////////////////////// binary operators on CPU /////////////////////////////////// template struct Add { typedef DType type; static constexpr bool use_lhs = true; static constexpr bool use_rhs = true; inline static DType Call(const DType* lhs_off, const DType* rhs_off) { return *lhs_off + *rhs_off; } }; template constexpr bool Add::use_lhs; template constexpr bool Add::use_rhs; template struct Sub { typedef DType type; static constexpr bool use_lhs = true; static constexpr bool use_rhs = true; inline static DType Call(const DType* lhs_off, const DType* rhs_off) { return *lhs_off - *rhs_off; } }; template constexpr bool Sub::use_lhs; template constexpr bool Sub::use_rhs; template struct Mul { typedef DType type; static constexpr bool use_lhs = true; static constexpr bool use_rhs = true; inline static DType Call(const DType* lhs_off, const DType* rhs_off) { return *lhs_off * *rhs_off; } }; template constexpr bool Mul::use_lhs; template constexpr bool Mul::use_rhs; template struct Div { typedef DType type; static constexpr bool use_lhs = true; static constexpr bool use_rhs = true; inline static DType Call(const DType* lhs_off, const DType* rhs_off) { return *lhs_off / *rhs_off; } }; template constexpr bool Div::use_lhs; template constexpr bool Div::use_rhs; template struct CopyLhs { typedef DType type; static constexpr bool use_lhs = true; static constexpr bool use_rhs = false; inline static DType Call(const DType* lhs_off, const DType*) { return *lhs_off; } }; template constexpr bool CopyLhs::use_lhs; template constexpr bool CopyLhs::use_rhs; template struct CopyRhs { typedef DType type; static constexpr bool use_lhs = false; static constexpr bool use_rhs = true; inline static DType Call(const DType*, const DType* rhs_off) { return *rhs_off; } }; template constexpr bool CopyRhs::use_lhs; template constexpr bool CopyRhs::use_rhs; //////////////////////////////// Reduce operators on CPU /////////////////////////////////// template constexpr DType MinDType() { if (std::is_same::value) return BFloat16::Min(); else return -std::numeric_limits::infinity(); } template struct Max { typedef DType type; static constexpr DType zero = MinDType(); // return true if accum should be replaced inline static DType Call(DType accum, DType val) { return accum < val; } }; template constexpr DType Max::zero; template constexpr DType MaxDType() { if (std::is_same::value) return BFloat16::Max(); else return std::numeric_limits::infinity(); } template struct Min { typedef DType type; static constexpr DType zero = MaxDType(); // return true if accum should be replaced inline static DType Call(DType accum, DType val) { return accum > val; } }; template constexpr DType Min::zero; #define SWITCH_OP(op, Op, ...) \ do { \ if ((op) == "add") { \ typedef dgl::aten::cpu::op::Add Op; \ { __VA_ARGS__ } \ } else if ((op) == "sub") { \ typedef dgl::aten::cpu::op::Sub Op; \ { __VA_ARGS__ } \ } else if ((op) == "mul") { \ typedef dgl::aten::cpu::op::Mul Op; \ { __VA_ARGS__ } \ } else if ((op) == "div") { \ typedef dgl::aten::cpu::op::Div Op; \ { __VA_ARGS__ } \ } else if ((op) == "copy_lhs") { \ typedef dgl::aten::cpu::op::CopyLhs Op; \ { __VA_ARGS__ } \ } else if ((op) == "copy_rhs") { \ typedef dgl::aten::cpu::op::CopyRhs Op; \ { __VA_ARGS__ } \ } else { \ LOG(FATAL) << "Unsupported SpMM binary operator: " << op; \ } \ } while (0) } // namespace op } // namespace cpu } // namespace aten } // namespace dgl #endif // DGL_ARRAY_CPU_SPMM_BINARY_OPS_H_ ================================================ FILE: src/array/cpu/spmm_blocking_libxsmm.h ================================================ /** * Copyright (c) 2021 Intel Corporation * @file array/cpu/spmm.h * @brief SPMM CPU kernel function header. * @author Sanchit Misra , * Ramanarayan Mohanty , * Vasimuddin Md , * Sasikanth Avancha */ #ifndef DGL_ARRAY_CPU_SPMM_BLOCKING_LIBXSMM_H_ #define DGL_ARRAY_CPU_SPMM_BLOCKING_LIBXSMM_H_ #include #include #include #include #if !defined(_WIN32) #ifdef USE_LIBXSMM #include #include #ifdef DEBUG #include #endif // DEBUG #include #define NUM_BLOCKS_PER_THREAD 20 #define BLOCKING_HEURISTIC_PARAM 500 namespace dgl { namespace aten { namespace cpu { template struct CSRMatrixInternal { IdType num_rows; IdType num_cols; IdType *indptr; IdType *indices; DType *data; }; int32_t GetLLCSize() { #ifdef _SC_LEVEL3_CACHE_SIZE int32_t cache_size = sysconf(_SC_LEVEL3_CACHE_SIZE); if (cache_size < 0) cache_size = DGL_CPU_LLC_SIZE; #else int32_t cache_size = DGL_CPU_LLC_SIZE; #endif return cache_size; } /** * @brief Tile the CSR matrix to roughly make sure that the column tiles and * corresponding neighbor features fit into LLC and the row tiles * are assigned to OMP threads. * @param csr The Csr matrix. * @param block_csr_array The array containing csr matrices of all blocks. * @param num_M_blocks Number of blocks to create along the rows of adjacency * matrix. * @param num_K_blocks Number of blocks to create along the columns of adjacency * matrix. * @param M_block_size block size along the rows of adjacency matrix. * @param K_block_size block size along the columns of adjacency matrix. * @param use_lhs Whether to use lhs. * @param use_rhs Whether to use rhs. */ template inline void SpMMCreateBlocks( const CSRMatrix &csr, CSRMatrixInternal *block_csr_array, IdType num_M_blocks, IdType num_K_blocks, IdType M_block_size, IdType K_block_size, bool use_lhs, bool use_rhs) { const IdType M = csr.num_rows; const IdType K = csr.num_cols; IdType *indptr = csr.indptr.Ptr(); IdType *indices = csr.indices.Ptr(); IdType *edges = csr.data.Ptr(); CHECK_NOTNULL(indptr); if (use_lhs) CHECK_NOTNULL(indices); if (use_rhs) CHECK_NOTNULL(edges); if (num_K_blocks > 1) { IdType *indptr_block_buf = reinterpret_cast(aligned_alloc( 64, (M_block_size + 1) * num_M_blocks * num_K_blocks * sizeof(IdType))); IdType *indices_block_buf = nullptr; if (use_lhs) { indices_block_buf = reinterpret_cast( aligned_alloc(64, indptr[M] * sizeof(IdType))); } IdType *edges_block_buf = nullptr; if (use_rhs) { edges_block_buf = reinterpret_cast( aligned_alloc(64, indptr[M] * sizeof(IdType))); } #pragma omp parallel { IdType *my_cur_col_id = reinterpret_cast( aligned_alloc(64, 2 * M_block_size * sizeof(IdType))); #pragma omp for for (IdType m = 0; m < num_M_blocks; m++) { const IdType M_start = m * M_block_size; const IdType M_end = std::min((m + 1) * M_block_size, M); const IdType nnz = indptr[M_end] - indptr[M_start]; IdType cur_indices_id = 0; IdType *my_indices_block_buf, *my_edges_block_buf; if (use_lhs) my_indices_block_buf = indices_block_buf + indptr[M_start]; if (use_rhs) my_edges_block_buf = edges_block_buf + indptr[M_start]; for (IdType i = M_start; i < M_end; i++) { my_cur_col_id[(i - M_start) * 2] = indptr[i]; my_cur_col_id[(i - M_start) * 2 + 1] = indptr[i + 1]; } for (IdType k = 0; k < num_K_blocks; k++) { const IdType K_start = k * K_block_size; const IdType K_end = std::min((k + 1) * K_block_size, K); CSRMatrixInternal cur_csr; cur_csr.num_rows = M_end - M_start; cur_csr.num_cols = K_end - K_start; // Create csr_ij IdType *cur_csr_indptr = indptr_block_buf + (m * num_K_blocks + k) * (M_block_size + 1); IdType *cur_csr_indices = nullptr, *cur_csr_edges = nullptr; if (use_lhs) cur_csr_indices = my_indices_block_buf + cur_indices_id; if (use_rhs) cur_csr_edges = my_edges_block_buf + cur_indices_id; IdType cur_nnz = 0; for (IdType i = M_start; i < M_end; i++) { const IdType row_start = my_cur_col_id[(i - M_start) * 2]; const IdType row_end = my_cur_col_id[(i - M_start) * 2 + 1]; cur_csr_indptr[i - M_start] = cur_nnz; IdType eid; for (eid = row_start; eid < row_end; eid++) { const IdType src = indices[eid]; const IdType edge = edges[eid]; if (src >= K_end) { break; } CHECK_LT(cur_indices_id + cur_nnz, nnz); if (use_lhs) cur_csr_indices[cur_nnz] = src; if (use_rhs) cur_csr_edges[cur_nnz] = edge; cur_nnz++; } my_cur_col_id[(i - M_start) * 2] = eid; } cur_csr_indptr[cur_csr.num_rows] = cur_nnz; cur_indices_id += cur_nnz; cur_csr.indptr = cur_csr_indptr; if (use_lhs) cur_csr.indices = cur_csr_indices; if (use_rhs) cur_csr.data = cur_csr_edges; block_csr_array[m * num_K_blocks + k] = cur_csr; } CHECK_EQ(nnz, cur_indices_id); } free(my_cur_col_id); } } else { for (IdType m = 0; m < num_M_blocks; m++) { const IdType M_start = m * M_block_size; const IdType M_end = std::min((m + 1) * M_block_size, M); CSRMatrixInternal cur_csr; cur_csr.num_rows = M_end - M_start; cur_csr.num_cols = K; cur_csr.indptr = indptr + M_start; cur_csr.indices = indices; cur_csr.data = edges; block_csr_array[m] = cur_csr; } } } /** * @brief Create libxsmm kernel. * @param has_idx For the edge features, are there indices available. * @param N Feature size. * @param redop_flag Flag specifying the reduction operation. * @param is_cmp Is the reduction operation a compare operation. * @note libxsmm_dispatch_meltw_opreduce_vecs_idx creates a JIT'ed kernel. * Given a node u, the kernel performs an elementwise "Op" on the * features of the neighbors and/or the edges incident on u. * Subsequently, it performs an elementwise "Redop" on all such * features created and stores into the feature of node u. * It uses a SIMD and a cache efficient design and also provides * support to enable software prefetching if needed. For IdType, * it supports INT32 and INT64. For DType, it supports BF16 and FP32. * It supports all the "Ops" and "Redops" supported by DGL. Once a * kernel is generated by libxsmm_dispatch_meltw_opreduce_vecs_idx, * it is cached for the entire duration of the execution of a program * so that subsequently if the kernel is needed again, it just returns * the cached copy. */ template inline libxsmm_meltwfunction_opreduce_vecs_idx SpMMCreateLibxsmmKernel( bool has_idx, IdType N, libxsmm_meltw_opreduce_vecs_flags redop_flag, bool is_cmp) { int _ld = N; libxsmm_meltw_opreduce_vecs_flags opredop_flags; // First, set the Op in the opredop_flags if (std::is_same>::value) { opredop_flags = LIBXSMM_MELTW_FLAG_OPREDUCE_VECS_OP_ADD; } else if (std::is_same>::value) { opredop_flags = LIBXSMM_MELTW_FLAG_OPREDUCE_VECS_OP_SUB; } else if (std::is_same>::value) { opredop_flags = LIBXSMM_MELTW_FLAG_OPREDUCE_VECS_OP_MUL; } else if (std::is_same>::value) { opredop_flags = LIBXSMM_MELTW_FLAG_OPREDUCE_VECS_OP_DIV; } else if (std::is_same>::value) { opredop_flags = LIBXSMM_MELTW_FLAG_OPREDUCE_VECS_OP_COPY; } else if (std::is_same>::value) { opredop_flags = LIBXSMM_MELTW_FLAG_OPREDUCE_VECS_OP_COPY; } // Second, set which of lhs or rhs is considered first and second operand. // This is needed since libxsmm assumes that the copy operation always copies // the first operand. So, if we need to copy rhs, we need to set that as the // first operand. For rhs, we also set whether to use implicit indices or // provided indices. // TODO(Steve): fix this long line in a separate PR. if (std::is_same>::value) { opredop_flags = (libxsmm_meltw_opreduce_vecs_flags)(opredop_flags | LIBXSMM_MELTW_FLAG_OPREDUCE_VECS_OPORDER_VECIDX_VECIN); // NOLINT } else if (std::is_same>::value) { opredop_flags = (libxsmm_meltw_opreduce_vecs_flags)(opredop_flags | LIBXSMM_MELTW_FLAG_OPREDUCE_VECS_OPORDER_VECIN_VECIDX); // NOLINT if (!has_idx) { opredop_flags = (libxsmm_meltw_opreduce_vecs_flags)(opredop_flags | LIBXSMM_MELTW_FLAG_OPREDUCE_VECS_IMPLICIT_INDEXED_VECIDX); // NOLINT } } else { opredop_flags = (libxsmm_meltw_opreduce_vecs_flags)(opredop_flags | LIBXSMM_MELTW_FLAG_OPREDUCE_VECS_OPORDER_VECIDX_VECIN); // NOLINT if (has_idx) { opredop_flags = (libxsmm_meltw_opreduce_vecs_flags)(opredop_flags | LIBXSMM_MELTW_FLAG_OPREDUCE_VECS_INDEXED_VEC); // NOLINT } else { opredop_flags = (libxsmm_meltw_opreduce_vecs_flags)(opredop_flags | LIBXSMM_MELTW_FLAG_OPREDUCE_VECS_IMPLICIT_INDEXED_VEC); // NOLINT } } // Third, we set the Redop in the opredop_flags opredop_flags = (libxsmm_meltw_opreduce_vecs_flags)(opredop_flags | redop_flag); // Fourth, in case of Cmp Redop, set whether to record argmax/argmin for // lhs/rhs if (is_cmp) { if (Op::use_lhs) { opredop_flags = (libxsmm_meltw_opreduce_vecs_flags)(opredop_flags | LIBXSMM_MELTW_FLAG_OPREDUCE_VECS_RECORD_ARGOP_OFF_VEC_0); // NOLINT } if (Op::use_rhs) { opredop_flags = (libxsmm_meltw_opreduce_vecs_flags)(opredop_flags | LIBXSMM_MELTW_FLAG_OPREDUCE_VECS_RECORD_ARGOP_OFF_VEC_1); // NOLINT } } libxsmm_meltwfunction_opreduce_vecs_idx kernel = nullptr; if (std::is_same::value) { kernel = libxsmm_dispatch_meltw_opreduce_vecs_idx( N, &_ld, &_ld, LIBXSMM_DATATYPE_F32, LIBXSMM_DATATYPE_F32, (sizeof(IdType) == 8) ? LIBXSMM_DATATYPE_I64 : LIBXSMM_DATATYPE_I32, opredop_flags, 0); } else { // assume bf16 kernel = libxsmm_dispatch_meltw_opreduce_vecs_idx( N, &_ld, &_ld, LIBXSMM_DATATYPE_BF16, LIBXSMM_DATATYPE_BF16, (sizeof(IdType) == 8) ? LIBXSMM_DATATYPE_I64 : LIBXSMM_DATATYPE_I32, opredop_flags, 0); } if (kernel == nullptr) { LOG(FATAL) << "Failed to generate libxsmm kernel for the SpMM operation." "To disable libxsmm, use dgl.use_libxsmm(false)."; } return kernel; } /** * @brief Use libxsmm to perform SpMM-Sum on all blocks. * @param block_csr_array The array containing csr matrices of all blocks. * @param B The feature on source nodes. * @param E The feature on edges. * @param C The result feature on destination nodes. * @param has_idx For the edge features, are there indices available. * @param N Feature size. * @param num_M_blocks Number of blocks to create along the rows of adjacency * matrix. * @param num_K_blocks Number of blocks to create along the columns of adjacency * matrix. * @param M_block_size block size along the rows of adjacency matrix. * @param kernel The libxsmm kernel. */ template inline void SpMMBlockwiseOpSum( CSRMatrixInternal *block_csr_array, const DType *B, const DType *E, DType *C, bool has_idx, IdType N, IdType num_M_blocks, IdType num_K_blocks, IdType M_block_size, libxsmm_meltwfunction_opreduce_vecs_idx kernel) { const DType *in_matrix1 = B; const DType *in_matrix2 = E; DType *output = C; #pragma omp parallel { for (IdType k = 0; k < num_K_blocks; k++) { #pragma omp for schedule(dynamic) for (IdType m = 0; m < num_M_blocks; m++) { CSRMatrixInternal cur_csr = block_csr_array[m * num_K_blocks + k]; const IdType M_start = m * M_block_size; for (IdType i = 0; i < cur_csr.num_rows; i++) { const IdType row_start = cur_csr.indptr[i]; const IdType row_end = cur_csr.indptr[i + 1]; const IdType dst = i + M_start; libxsmm_meltw_opreduce_vecs_idx_param params; params.n = row_end - row_start; params.indices = &cur_csr.indices[row_start]; params.in_matrix = in_matrix1; params.out_vec = &output[dst * N]; params.scale_vals = nullptr; if (has_idx) { params.in_matrix2 = in_matrix2; params.indices2 = &cur_csr.data[row_start]; } else { params.in_matrix2 = &in_matrix2[row_start * N]; } kernel(¶ms); } } } } } /** * @brief Use libxsmm to perform SpMM-Max/Min on all blocks. * @param block_csr_array The array containing csr matrices of all blocks. * @param B The feature on source nodes. * @param E The feature on edges. * @param C The result feature on destination nodes. * @param argB Arg-Min/Max on source nodes. * @param argE Arg-Min/Max on edges. * @param has_idx For the edge features, are there indices available. * @param N Feature size. * @param num_M_blocks Number of blocks to create along the rows of adjacency * matrix. * @param num_K_blocks Number of blocks to create along the columns of adjacency * matrix. * @param M_block_size block size along the rows of adjacency matrix. * @param kernel The libxsmm kernel. */ template inline void SpMMBlockwiseOpCmp( CSRMatrixInternal *block_csr_array, const DType *B, const DType *E, DType *C, IdType *argB, IdType *argE, bool has_idx, IdType N, IdType num_M_blocks, IdType num_K_blocks, IdType M_block_size, libxsmm_meltwfunction_opreduce_vecs_idx kernel) { const DType *in_matrix1 = B; const DType *in_matrix2 = E; DType *output = C; IdType *out_matrix1 = argB; IdType *out_matrix2 = argE; #pragma omp parallel { for (IdType k = 0; k < num_K_blocks; k++) { #pragma omp for schedule(dynamic) for (IdType m = 0; m < num_M_blocks; m++) { CSRMatrixInternal cur_csr = block_csr_array[m * num_K_blocks + k]; const IdType M_start = m * M_block_size; for (IdType i = 0; i < cur_csr.num_rows; i++) { const IdType row_start = cur_csr.indptr[i]; const IdType row_end = cur_csr.indptr[i + 1]; const IdType dst = i + M_start; libxsmm_meltw_opreduce_vecs_idx_param params; params.n = row_end - row_start; params.indices = &cur_csr.indices[row_start]; params.in_matrix = in_matrix1; params.out_vec = &output[dst * N]; params.argop_off_vec_0 = &out_matrix1[dst * N]; params.argop_off_vec_1 = &out_matrix2[dst * N]; params.scale_vals = nullptr; if (has_idx) { params.in_matrix2 = in_matrix2; params.indices2 = &cur_csr.data[row_start]; } else { params.in_matrix2 = &in_matrix2[row_start * N]; } kernel(¶ms); } } } } } /** * @brief Free the tiled CSR matrix data. * @param block_csr_array The array containing csr matrices of all blocks. * @param num_M_blocks Number of blocks to create along the rows of adjacency * matrix. * @param num_K_blocks Number of blocks to create along the columns of adjacency * matrix. * @param use_lhs Whether to use lhs. * @param use_rhs Whether to use rhs. */ template inline void SpMMFreeBlocks( CSRMatrixInternal *block_csr_array, IdType num_M_blocks, IdType num_K_blocks, bool use_lhs, bool use_rhs) { if (num_K_blocks > 1) { free(block_csr_array[0].indptr); if (use_lhs) free(block_csr_array[0].indices); if (use_rhs) free(block_csr_array[0].data); } free(block_csr_array); } /** * @brief Optimized CPU kernel of SpMM-Sum/Max/Min on Csr format. * @param bcast Broadcast information. * @param csr The Csr matrix. * @param ufeat The feature on source nodes. * @param efeat The feature on edges. * @param out The result feature on destination nodes. * @param argu Arg-Min/Max on source nodes. * @param arge Arg-Min/Max on edges. * @note it uses libxsmm, blocking and dynamic thread scheduling. */ template void SpMMRedopCsrOpt( const BcastOff &bcast, const CSRMatrix &csr, NDArray ufeat, NDArray efeat, NDArray out, NDArray argu, NDArray arge) { int32_t llc_size = GetLLCSize(); #ifdef DEBUG uint64_t startTick, endTick; startTick = __rdtsc(); #endif // DEBUG const bool has_idx = !IsNullArray(csr.data); DType *C = out.Ptr(); const DType *B = ufeat.Ptr(); const DType *E = efeat.Ptr(); IdType *argB, *argE; if (std::is_same>::value || std::is_same>::value) { argB = argu.Ptr(); argE = arge.Ptr(); } const int nthreads = omp_get_max_threads(); const IdType M = csr.num_rows; const IdType N = bcast.out_len; const IdType K = csr.num_cols; const IdType *indptr = csr.indptr.Ptr(); CHECK_NOTNULL(indptr); const IdType total_nnz = indptr[M]; if (M <= 0 || K <= 0 || N <= 0 || total_nnz <= 0) return; const double avg_degree = total_nnz * 1.0 / M; const double nnz_prob = avg_degree / K; IdType K_block_size = std::min( (int64_t)K, (int64_t)(llc_size / (N * sizeof(DType) * nnz_prob * BLOCKING_HEURISTIC_PARAM))); // NOLINT IdType M_block_size = M / (nthreads * NUM_BLOCKS_PER_THREAD); if (M_block_size == 0) M_block_size = 1; if (K_block_size == 0) K_block_size = 1; IdType num_M_blocks = (M + M_block_size - 1) / M_block_size; IdType num_K_blocks = (K + K_block_size - 1) / K_block_size; CSRMatrixInternal *block_csr_array = (CSRMatrixInternal *)aligned_alloc( 64, sizeof(CSRMatrixInternal) * num_M_blocks * num_K_blocks); #ifdef DEBUG endTick = __rdtsc(); if (std::is_same>::value) { LOG(INFO) << "Redop = Max"; } else if (std::is_same>::value) { LOG(INFO) << "Redop = Min"; } else if (std::is_same>::value) { LOG(INFO) << "Redop = Add"; } LOG(INFO) << "nthreads = " << nthreads << ", llc_size = " << llc_size; LOG(INFO) << "M = " << M << ", K = " << K << ", N = " << N; LOG(INFO) << "use_lhs = " << Op::use_lhs << ", use_rhs = " << Op::use_rhs; LOG(INFO) << "total_nnz = " << total_nnz << ", avg_degree = " << avg_degree; LOG(INFO) << "has_idx = " << has_idx; LOG(INFO) << "nnz_prob = " << nnz_prob; LOG(INFO) << "K_block_size = " << K_block_size << ", M_block_size = " << M_block_size; LOG(INFO) << "num_K_blocks = " << num_K_blocks << ", num_M_blocks = " << num_M_blocks; LOG(INFO) << "stage0 ticks = " << (endTick - startTick); startTick = __rdtsc(); #endif // DEBUG SpMMCreateBlocks( csr, block_csr_array, num_M_blocks, num_K_blocks, M_block_size, K_block_size, Op::use_lhs, Op::use_rhs); #ifdef DEBUG endTick = __rdtsc(); LOG(INFO) << "stage1 ticks = " << (endTick - startTick); startTick = __rdtsc(); #endif // DEBUG libxsmm_meltwfunction_opreduce_vecs_idx kernel = nullptr; if (std::is_same>::value) { kernel = SpMMCreateLibxsmmKernel( has_idx, N, LIBXSMM_MELTW_FLAG_OPREDUCE_VECS_REDOP_MAX, true); } else if (std::is_same>::value) { kernel = SpMMCreateLibxsmmKernel( has_idx, N, LIBXSMM_MELTW_FLAG_OPREDUCE_VECS_REDOP_MIN, true); } else if (std::is_same>::value) { kernel = SpMMCreateLibxsmmKernel( has_idx, N, LIBXSMM_MELTW_FLAG_OPREDUCE_VECS_REDOP_SUM, false); } #ifdef DEBUG endTick = __rdtsc(); LOG(INFO) << "stage2 ticks = " << (endTick - startTick); startTick = __rdtsc(); #endif // DEBUG if (std::is_same>::value || std::is_same>::value) { SpMMBlockwiseOpCmp( block_csr_array, B, E, C, argB, argE, has_idx, N, num_M_blocks, num_K_blocks, M_block_size, kernel); } else { SpMMBlockwiseOpSum( block_csr_array, B, E, C, has_idx, N, num_M_blocks, num_K_blocks, M_block_size, kernel); } #ifdef DEBUG endTick = __rdtsc(); LOG(INFO) << "stage3 ticks = " << (endTick - startTick); startTick = __rdtsc(); #endif // DEBUG SpMMFreeBlocks( block_csr_array, num_M_blocks, num_K_blocks, Op::use_lhs, Op::use_rhs); #ifdef DEBUG endTick = __rdtsc(); LOG(INFO) << "stage4 ticks = " << (endTick - startTick); #endif // DEBUG } /** * @brief Optimized CPU kernel of SpMM-Sum on Csr format. * @param bcast Broadcast information. * @param csr The Csr matrix. * @param ufeat The feature on source nodes. * @param efeat The feature on edges. * @param out The result feature on destination nodes. * @note it uses libxsmm, blocking and dynamic thread scheduling. */ template void SpMMSumCsrLibxsmm( const BcastOff &bcast, const CSRMatrix &csr, NDArray ufeat, NDArray efeat, NDArray out) { NDArray dummy; SpMMRedopCsrOpt>( bcast, csr, ufeat, efeat, out, dummy, dummy); } /** * @brief Optimized CPU kernel of SpMM-Min/Max on Csr format. * @param bcast Broadcast information. * @param csr The Csr matrix. * @param ufeat The feature on source nodes. * @param efeat The feature on edges. * @param out The result feature on destination nodes. * @param argu Arg-Min/Max on source nodes. * @param arge Arg-Min/Max on edges. * @note it uses libxsmm, blocking and dynamic thread scheduling. */ template void SpMMCmpCsrLibxsmm( const BcastOff &bcast, const CSRMatrix &csr, NDArray ufeat, NDArray efeat, NDArray out, NDArray argu, NDArray arge) { SpMMRedopCsrOpt( bcast, csr, ufeat, efeat, out, argu, arge); } } // namespace cpu } // namespace aten } // namespace dgl #endif // USE_LIBXSMM #endif // _WIN32 #endif // DGL_ARRAY_CPU_SPMM_BLOCKING_LIBXSMM_H_ ================================================ FILE: src/array/cpu/traversal.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cpu/traversal.cc * @brief Graph traversal implementation */ #include "./traversal.h" #include #include #include namespace dgl { namespace aten { namespace impl { namespace { // A utility view class to wrap a vector into a queue. template struct VectorQueueWrapper { std::vector* vec; size_t head = 0; explicit VectorQueueWrapper(std::vector* vec) : vec(vec) {} void push(const DType& elem) { vec->push_back(elem); } DType top() const { return vec->operator[](head); } void pop() { ++head; } bool empty() const { return head == vec->size(); } size_t size() const { return vec->size() - head; } }; // Internal function to merge multiple traversal traces into one ndarray. // It is similar to zip the vectors together. template IdArray MergeMultipleTraversals(const std::vector>& traces) { int64_t max_len = 0, total_len = 0; for (size_t i = 0; i < traces.size(); ++i) { const int64_t tracelen = traces[i].size(); max_len = std::max(max_len, tracelen); total_len += traces[i].size(); } IdArray ret = IdArray::Empty( {total_len}, DGLDataType{kDGLInt, sizeof(DType) * 8, 1}, DGLContext{kDGLCPU, 0}); DType* ret_data = static_cast(ret->data); for (int64_t i = 0; i < max_len; ++i) { for (size_t j = 0; j < traces.size(); ++j) { const int64_t tracelen = traces[j].size(); if (i >= tracelen) { continue; } *(ret_data++) = traces[j][i]; } } return ret; } // Internal function to compute sections if multiple traversal traces // are merged into one ndarray. template IdArray ComputeMergedSections(const std::vector>& traces) { int64_t max_len = 0; for (size_t i = 0; i < traces.size(); ++i) { const int64_t tracelen = traces[i].size(); max_len = std::max(max_len, tracelen); } IdArray ret = IdArray::Empty( {max_len}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0}); int64_t* ret_data = static_cast(ret->data); for (int64_t i = 0; i < max_len; ++i) { int64_t sec_len = 0; for (size_t j = 0; j < traces.size(); ++j) { const int64_t tracelen = traces[j].size(); if (i < tracelen) { ++sec_len; } } *(ret_data++) = sec_len; } return ret; } } // namespace template Frontiers BFSNodesFrontiers(const CSRMatrix& csr, IdArray source) { std::vector ids; std::vector sections; VectorQueueWrapper queue(&ids); auto visit = [&](const int64_t v) {}; auto make_frontier = [&]() { if (!queue.empty()) { // do not push zero-length frontier sections.push_back(queue.size()); } }; BFSTraverseNodes(csr, source, &queue, visit, make_frontier); Frontiers front; front.ids = VecToIdArray(ids, sizeof(IdType) * 8); front.sections = VecToIdArray(sections, sizeof(int64_t) * 8); return front; } template Frontiers BFSNodesFrontiers( const CSRMatrix&, IdArray); template Frontiers BFSNodesFrontiers( const CSRMatrix&, IdArray); template Frontiers BFSEdgesFrontiers(const CSRMatrix& csr, IdArray source) { std::vector ids; std::vector sections; // NOTE: std::queue has no top() method. std::vector nodes; VectorQueueWrapper queue(&nodes); auto visit = [&](const IdType e) { ids.push_back(e); }; bool first_frontier = true; auto make_frontier = [&] { if (first_frontier) { first_frontier = false; // do not push the first section when doing edges } else if (!queue.empty()) { // do not push zero-length frontier sections.push_back(queue.size()); } }; BFSTraverseEdges(csr, source, &queue, visit, make_frontier); Frontiers front; front.ids = VecToIdArray(ids, sizeof(IdType) * 8); front.sections = VecToIdArray(sections, sizeof(int64_t) * 8); return front; } template Frontiers BFSEdgesFrontiers( const CSRMatrix&, IdArray); template Frontiers BFSEdgesFrontiers( const CSRMatrix&, IdArray); template Frontiers TopologicalNodesFrontiers(const CSRMatrix& csr) { std::vector ids; std::vector sections; VectorQueueWrapper queue(&ids); auto visit = [&](const uint64_t v) {}; auto make_frontier = [&]() { if (!queue.empty()) { // do not push zero-length frontier sections.push_back(queue.size()); } }; TopologicalNodes(csr, &queue, visit, make_frontier); Frontiers front; front.ids = VecToIdArray(ids, sizeof(IdType) * 8); front.sections = VecToIdArray(sections, sizeof(int64_t) * 8); return front; } template Frontiers TopologicalNodesFrontiers( const CSRMatrix&); template Frontiers TopologicalNodesFrontiers( const CSRMatrix&); template Frontiers DGLDFSEdges(const CSRMatrix& csr, IdArray source) { const int64_t len = source->shape[0]; const IdType* src_data = static_cast(source->data); std::vector> edges(len); for (int64_t i = 0; i < len; ++i) { auto visit = [&](IdType e, int tag) { edges[i].push_back(e); }; DFSLabeledEdges(csr, src_data[i], false, false, visit); } Frontiers front; front.ids = MergeMultipleTraversals(edges); front.sections = ComputeMergedSections(edges); return front; } template Frontiers DGLDFSEdges(const CSRMatrix&, IdArray); template Frontiers DGLDFSEdges(const CSRMatrix&, IdArray); template Frontiers DGLDFSLabeledEdges( const CSRMatrix& csr, IdArray source, const bool has_reverse_edge, const bool has_nontree_edge, const bool return_labels) { const int64_t len = source->shape[0]; const IdType* src_data = static_cast(source->data); std::vector> edges(len); std::vector> tags; if (return_labels) { tags.resize(len); } for (int64_t i = 0; i < len; ++i) { auto visit = [&](IdType e, int64_t tag) { edges[i].push_back(e); if (return_labels) { tags[i].push_back(tag); } }; DFSLabeledEdges( csr, src_data[i], has_reverse_edge, has_nontree_edge, visit); } Frontiers front; front.ids = MergeMultipleTraversals(edges); front.sections = ComputeMergedSections(edges); if (return_labels) { front.tags = MergeMultipleTraversals(tags); } return front; } template Frontiers DGLDFSLabeledEdges( const CSRMatrix&, IdArray, const bool, const bool, const bool); template Frontiers DGLDFSLabeledEdges( const CSRMatrix&, IdArray, const bool, const bool, const bool); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cpu/traversal.h ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cpu/traversal.h * @brief Graph traversal routines. * * Traversal routines generate frontiers. Frontiers can be node frontiers or * edge frontiers depending on the traversal function. Each frontier is a list * of nodes/edges (specified by their ids). An optional tag can be specified for * each node/edge (represented by an int value). */ #ifndef DGL_ARRAY_CPU_TRAVERSAL_H_ #define DGL_ARRAY_CPU_TRAVERSAL_H_ #include #include #include #include namespace dgl { namespace aten { namespace impl { /** * @brief Traverse the graph in a breadth-first-search (BFS) order. * * The queue object must suffice following interface: * Members: * void push(IdType); // push one node * IdType top(); // get the first node * void pop(); // pop one node * bool empty(); // return true if the queue is empty * size_t size(); // return the size of the queue * For example, std::queue is a valid queue type. * * The visit function must be compatible with following interface: * void (*visit)(IdType ); * * The frontier function must be compatible with following interface: * void (*make_frontier)(void); * * @param graph The graph. * @param sources Source nodes. * @param reversed If true, BFS follows the in-edge direction * @param queue The queue used to do bfs. * @param visit The function to call when a node is visited. * @param make_frontier The function to indicate that a new froniter can be * made; */ template < typename IdType, typename Queue, typename VisitFn, typename FrontierFn> void BFSTraverseNodes( const CSRMatrix &csr, IdArray source, Queue *queue, VisitFn visit, FrontierFn make_frontier) { const int64_t len = source->shape[0]; const IdType *src_data = static_cast(source->data); const IdType *indptr_data = static_cast(csr.indptr->data); const IdType *indices_data = static_cast(csr.indices->data); const int64_t num_nodes = csr.num_rows; std::vector visited(num_nodes); for (int64_t i = 0; i < len; ++i) { const IdType u = src_data[i]; visited[u] = true; visit(u); queue->push(u); } make_frontier(); while (!queue->empty()) { const size_t size = queue->size(); for (size_t i = 0; i < size; ++i) { const IdType u = queue->top(); queue->pop(); for (auto idx = indptr_data[u]; idx < indptr_data[u + 1]; ++idx) { auto v = indices_data[idx]; if (!visited[v]) { visited[v] = true; visit(v); queue->push(v); } } } make_frontier(); } } /** * @brief Traverse the graph in a breadth-first-search (BFS) order, returning * the edges of the BFS tree. * * The queue object must suffice following interface: * Members: * void push(IdType); // push one node * IdType top(); // get the first node * void pop(); // pop one node * bool empty(); // return true if the queue is empty * size_t size(); // return the size of the queue * For example, std::queue is a valid queue type. * * The visit function must be compatible with following interface: * void (*visit)(IdType ); * * The frontier function must be compatible with following interface: * void (*make_frontier)(void); * * @param graph The graph. * @param sources Source nodes. * @param reversed If true, BFS follows the in-edge direction * @param queue The queue used to do bfs. * @param visit The function to call when a node is visited. * The argument would be edge ID. * @param make_frontier The function to indicate that a new frontier can be * made; */ template < typename IdType, typename Queue, typename VisitFn, typename FrontierFn> void BFSTraverseEdges( const CSRMatrix &csr, IdArray source, Queue *queue, VisitFn visit, FrontierFn make_frontier) { const int64_t len = source->shape[0]; const IdType *src_data = static_cast(source->data); const IdType *indptr_data = static_cast(csr.indptr->data); const IdType *indices_data = static_cast(csr.indices->data); const IdType *eid_data = static_cast(csr.data->data); const int64_t num_nodes = csr.num_rows; std::vector visited(num_nodes); for (int64_t i = 0; i < len; ++i) { const IdType u = src_data[i]; visited[u] = true; queue->push(u); } make_frontier(); while (!queue->empty()) { const size_t size = queue->size(); for (size_t i = 0; i < size; ++i) { const IdType u = queue->top(); queue->pop(); for (auto idx = indptr_data[u]; idx < indptr_data[u + 1]; ++idx) { auto e = eid_data ? eid_data[idx] : idx; const IdType v = indices_data[idx]; if (!visited[v]) { visited[v] = true; visit(e); queue->push(v); } } } make_frontier(); } } /** * @brief Traverse the graph in topological order. * * The queue object must suffice following interface: * Members: * void push(IdType); // push one node * IdType top(); // get the first node * void pop(); // pop one node * bool empty(); // return true if the queue is empty * size_t size(); // return the size of the queue * For example, std::queue is a valid queue type. * * The visit function must be compatible with following interface: * void (*visit)(IdType ); * * The frontier function must be compatible with following interface: * void (*make_frontier)(void); * * @param graph The graph. * @param reversed If true, follows the in-edge direction * @param queue The queue used to do bfs. * @param visit The function to call when a node is visited. * @param make_frontier The function to indicate that a new froniter can be * made; */ template < typename IdType, typename Queue, typename VisitFn, typename FrontierFn> void TopologicalNodes( const CSRMatrix &csr, Queue *queue, VisitFn visit, FrontierFn make_frontier) { int64_t num_visited_nodes = 0; const IdType *indptr_data = static_cast(csr.indptr->data); const IdType *indices_data = static_cast(csr.indices->data); const int64_t num_nodes = csr.num_rows; const int64_t num_edges = csr.indices->shape[0]; std::vector degrees(num_nodes, 0); for (int64_t eid = 0; eid < num_edges; ++eid) { degrees[indices_data[eid]]++; } for (int64_t vid = 0; vid < num_nodes; ++vid) { if (degrees[vid] == 0) { visit(vid); queue->push(static_cast(vid)); ++num_visited_nodes; } } make_frontier(); while (!queue->empty()) { const size_t size = queue->size(); for (size_t i = 0; i < size; ++i) { const IdType u = queue->top(); queue->pop(); for (auto idx = indptr_data[u]; idx < indptr_data[u + 1]; ++idx) { const IdType v = indices_data[idx]; if (--(degrees[v]) == 0) { visit(v); queue->push(v); ++num_visited_nodes; } } } make_frontier(); } if (num_visited_nodes != num_nodes) { LOG(FATAL) << "Error in topological traversal: loop detected in the given graph."; } } /** @brief Tags for ``DFSEdges``. */ enum DFSEdgeTag { kForward = 0, kReverse, kNonTree, }; /** * @brief Traverse the graph in a depth-first-search (DFS) order. * * The traversal visit edges in its DFS order. Edges have three tags: * FORWARD(0), REVERSE(1), NONTREE(2) * * A FORWARD edge is one in which `u` has been visisted but `v` has not. * A REVERSE edge is one in which both `u` and `v` have been visisted and the * edge is in the DFS tree. A NONTREE edge is one in which both `u` and `v` have * been visisted but the edge is NOT in the DFS tree. * * @param source Source node. * @param reversed If true, DFS follows the in-edge direction * @param has_reverse_edge If true, REVERSE edges are included * @param has_nontree_edge If true, NONTREE edges are included * @param visit The function to call when an edge is visited; the edge id and * its tag will be given as the arguments. */ template void DFSLabeledEdges( const CSRMatrix &csr, IdType source, bool has_reverse_edge, bool has_nontree_edge, VisitFn visit) { const int64_t num_nodes = csr.num_rows; CHECK_GE(num_nodes, source) << "source " << source << " is out of range [0," << num_nodes << "]"; const IdType *indptr_data = static_cast(csr.indptr->data); const IdType *indices_data = static_cast(csr.indices->data); const IdType *eid_data = static_cast(csr.data->data); if (indptr_data[source + 1] - indptr_data[source] == 0) { // no out-going edges from the source node return; } typedef std::tuple StackEntry; std::stack stack; std::vector visited(num_nodes); visited[source] = true; stack.push(std::make_tuple(source, 0, false)); IdType u = 0; int64_t i = 0; bool on_tree = false; while (!stack.empty()) { std::tie(u, i, on_tree) = stack.top(); const IdType v = indices_data[indptr_data[u] + i]; const IdType uv = eid_data ? eid_data[indptr_data[u] + i] : indptr_data[u] + i; if (visited[v]) { if (!on_tree && has_nontree_edge) { visit(uv, kNonTree); } else if (on_tree && has_reverse_edge) { visit(uv, kReverse); } stack.pop(); // find next one. if (indptr_data[u] + i < indptr_data[u + 1] - 1) { stack.push(std::make_tuple(u, i + 1, false)); } } else { visited[v] = true; std::get<2>(stack.top()) = true; visit(uv, kForward); // expand if (indptr_data[v] < indptr_data[v + 1]) { stack.push(std::make_tuple(v, 0, false)); } } } } } // namespace impl } // namespace aten } // namespace dgl #endif // DGL_ARRAY_CPU_TRAVERSAL_H_ ================================================ FILE: src/array/cuda/array_cumsum.cu ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cpu/array_cumsum.cu * @brief Array cumsum GPU implementation */ #include #include #include "../../runtime/cuda/cuda_common.h" #include "./utils.h" namespace dgl { using runtime::NDArray; namespace aten { namespace impl { template IdArray CumSum(IdArray array, bool prepend_zero) { const int64_t len = array.NumElements(); if (len == 0) return !prepend_zero ? array : aten::Full(0, 1, array->dtype.bits, array->ctx); auto device = runtime::DeviceAPI::Get(array->ctx); cudaStream_t stream = runtime::getCurrentCUDAStream(); const IdType* in_d = array.Ptr(); IdArray ret; IdType* out_d = nullptr; if (prepend_zero) { ret = aten::Full(0, len + 1, array->dtype.bits, array->ctx); out_d = ret.Ptr() + 1; } else { ret = aten::NewIdArray(len, array->ctx, array->dtype.bits); out_d = ret.Ptr(); } // Allocate workspace size_t workspace_size = 0; CUDA_CALL(cub::DeviceScan::InclusiveSum( nullptr, workspace_size, in_d, out_d, len, stream)); void* workspace = device->AllocWorkspace(array->ctx, workspace_size); // Compute cumsum CUDA_CALL(cub::DeviceScan::InclusiveSum( workspace, workspace_size, in_d, out_d, len, stream)); device->FreeWorkspace(array->ctx, workspace); return ret; } template IdArray CumSum(IdArray, bool); template IdArray CumSum(IdArray, bool); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cuda/array_index_select.cu ================================================ /** * Copyright (c) 2019 by Contributors * @file array/cpu/array_index_select.cu * @brief Array index select GPU implementation */ #include #include "../../runtime/cuda/cuda_common.h" #include "./array_index_select.cuh" #include "./utils.h" namespace dgl { using runtime::NDArray; namespace aten { namespace impl { template NDArray IndexSelect(NDArray array, IdArray index) { const int64_t arr_len = array->shape[0]; const int64_t len = index->shape[0]; int64_t num_feat = 1; std::vector shape{len}; for (int d = 1; d < array->ndim; ++d) { num_feat *= array->shape[d]; shape.emplace_back(array->shape[d]); } // use index->ctx for pinned array NDArray ret = NDArray::Empty(shape, array->dtype, index->ctx); if (len == 0 || arr_len * num_feat == 0) return ret; DType* ret_data = static_cast(ret->data); const DType* array_data = static_cast(cuda::GetDevicePointer(array)); const IdType* idx_data = static_cast(index->data); cudaStream_t stream = runtime::getCurrentCUDAStream(); if (num_feat == 1) { const int nt = cuda::FindNumThreads(len); const int nb = (len + nt - 1) / nt; CUDA_KERNEL_CALL( IndexSelectSingleKernel, nb, nt, 0, stream, array_data, idx_data, len, arr_len, ret_data); } else { dim3 block(256, 1); while (static_cast(block.x) >= 2 * num_feat) { block.x /= 2; block.y *= 2; } const dim3 grid((len + block.y - 1) / block.y); CUDA_KERNEL_CALL( IndexSelectMultiKernel, grid, block, 0, stream, array_data, num_feat, idx_data, len, arr_len, ret_data); } return ret; } template NDArray IndexSelect(NDArray, IdArray); template NDArray IndexSelect(NDArray, IdArray); template NDArray IndexSelect(NDArray, IdArray); template NDArray IndexSelect(NDArray, IdArray); template NDArray IndexSelect(NDArray, IdArray); template NDArray IndexSelect(NDArray, IdArray); #if BF16_ENABLED template NDArray IndexSelect( NDArray, IdArray); template NDArray IndexSelect( NDArray, IdArray); #endif // BF16_ENABLED template NDArray IndexSelect(NDArray, IdArray); template NDArray IndexSelect(NDArray, IdArray); template NDArray IndexSelect(NDArray, IdArray); template NDArray IndexSelect(NDArray, IdArray); template DType IndexSelect(NDArray array, int64_t index) { auto device = runtime::DeviceAPI::Get(array->ctx); DType ret = static_cast(0.0f); device->CopyDataFromTo( static_cast(array->data) + index, 0, &ret, 0, sizeof(DType), array->ctx, DGLContext{kDGLCPU, 0}, array->dtype); return ret; } template int32_t IndexSelect(NDArray array, int64_t index); template int64_t IndexSelect(NDArray array, int64_t index); template uint32_t IndexSelect(NDArray array, int64_t index); template uint64_t IndexSelect(NDArray array, int64_t index); template __half IndexSelect(NDArray array, int64_t index); #if BF16_ENABLED template __nv_bfloat16 IndexSelect( NDArray array, int64_t index); #endif // BF16_ENABLED template float IndexSelect(NDArray array, int64_t index); template double IndexSelect(NDArray array, int64_t index); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cuda/array_index_select.cuh ================================================ /** * Copyright (c) 2021-2022 by Contributors * @file array/cuda/array_index_select.cuh * @brief Array index select GPU kernel implementation */ #ifndef DGL_ARRAY_CUDA_ARRAY_INDEX_SELECT_CUH_ #define DGL_ARRAY_CUDA_ARRAY_INDEX_SELECT_CUH_ namespace dgl { namespace aten { namespace impl { template __global__ void IndexSelectSingleKernel( const DType* array, const IdType* index, const int64_t length, const int64_t arr_len, DType* out, const int64_t* perm = nullptr) { int64_t tx = blockIdx.x * blockDim.x + threadIdx.x; int stride_x = gridDim.x * blockDim.x; while (tx < length) { assert(index[tx] >= 0 && index[tx] < arr_len); const auto out_row = perm ? perm[tx] : tx; out[out_row] = array[index[tx]]; tx += stride_x; } } template __global__ void IndexSelectMultiKernel( const DType* const array, const int64_t num_feat, const IdType* const index, const int64_t length, const int64_t arr_len, DType* const out, const int64_t* perm = nullptr) { int64_t out_row_index = blockIdx.x * blockDim.y + threadIdx.y; const int64_t stride = blockDim.y * gridDim.x; while (out_row_index < length) { int64_t col = threadIdx.x; const int64_t in_row = index[out_row_index]; assert(in_row >= 0 && in_row < arr_len); const auto out_row = perm ? perm[out_row_index] : out_row_index; while (col < num_feat) { out[out_row * num_feat + col] = array[in_row * num_feat + col]; col += blockDim.x; } out_row_index += stride; } } template __global__ void IndexScatterSingleKernel( const DType* array, const IdType* index, const int64_t length, const int64_t arr_len, DType* out) { int tx = blockIdx.x * blockDim.x + threadIdx.x; int stride_x = gridDim.x * blockDim.x; while (tx < length) { assert(index[tx] >= 0 && index[tx] < arr_len); out[index[tx]] = array[tx]; tx += stride_x; } } template __global__ void IndexScatterMultiKernel( const DType* const array, const int64_t num_feat, const IdType* const index, const int64_t length, const int64_t arr_len, DType* const out) { int64_t in_row = blockIdx.x * blockDim.y + threadIdx.y; const int64_t stride = blockDim.y * gridDim.x; while (in_row < length) { int64_t col = threadIdx.x; const int64_t out_row = index[in_row]; assert(out_row >= 0 && out_row < arr_len); while (col < num_feat) { out[out_row * num_feat + col] = array[in_row * num_feat + col]; col += blockDim.x; } in_row += stride; } } } // namespace impl } // namespace aten } // namespace dgl #endif // DGL_ARRAY_CUDA_ARRAY_INDEX_SELECT_CUH_ ================================================ FILE: src/array/cuda/array_nonzero.cu ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cpu/array_nonzero.cc * @brief Array nonzero CPU implementation */ #include #include #include "../../runtime/cuda/cuda_common.h" #include "./utils.h" namespace dgl { using runtime::NDArray; namespace aten { namespace impl { template struct IsNonZeroIndex { explicit IsNonZeroIndex(const IdType* array) : array_(array) {} __device__ bool operator()(const int64_t index) { return array_[index] != 0; } const IdType* array_; }; template IdArray NonZero(IdArray array) { const auto& ctx = array->ctx; auto device = runtime::DeviceAPI::Get(ctx); const int64_t len = array->shape[0]; IdArray ret = NewIdArray(len, ctx, 64); cudaStream_t stream = runtime::getCurrentCUDAStream(); const IdType* const in_data = static_cast(array->data); int64_t* const out_data = static_cast(ret->data); IsNonZeroIndex comp(in_data); cub::CountingInputIterator counter(0); // room for cub to output on GPU int64_t* d_num_nonzeros = static_cast(device->AllocWorkspace(ctx, sizeof(int64_t))); size_t temp_size = 0; CUDA_CALL(cub::DeviceSelect::If( nullptr, temp_size, counter, out_data, d_num_nonzeros, len, comp, stream)); void* temp = device->AllocWorkspace(ctx, temp_size); CUDA_CALL(cub::DeviceSelect::If( temp, temp_size, counter, out_data, d_num_nonzeros, len, comp, stream)); device->FreeWorkspace(ctx, temp); // copy number of selected elements from GPU to CPU int64_t num_nonzeros = cuda::GetCUDAScalar(device, ctx, d_num_nonzeros); device->FreeWorkspace(ctx, d_num_nonzeros); device->StreamSync(ctx, stream); // truncate array to size return ret.CreateView({num_nonzeros}, ret->dtype, 0); } template IdArray NonZero(IdArray); template IdArray NonZero(IdArray); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cuda/array_op_impl.cu ================================================ /** * Copyright (c) 2020-2021 by Contributors * @file array/cuda/array_op_impl.cu * @brief Array operator GPU implementation */ #include #include "../../runtime/cuda/cuda_common.h" #include "../../runtime/cuda/cuda_hashtable.cuh" #include "../arith.h" #include "./utils.h" namespace dgl { using runtime::NDArray; using namespace runtime::cuda; namespace aten { namespace impl { ///////////////////////////// BinaryElewise ///////////////////////////// template __global__ void _BinaryElewiseKernel( const IdType* lhs, const IdType* rhs, IdType* out, int64_t length) { int tx = blockIdx.x * blockDim.x + threadIdx.x; int stride_x = gridDim.x * blockDim.x; while (tx < length) { out[tx] = Op::Call(lhs[tx], rhs[tx]); tx += stride_x; } } template IdArray BinaryElewise(IdArray lhs, IdArray rhs) { const int64_t len = lhs->shape[0]; IdArray ret = NewIdArray(lhs->shape[0], lhs->ctx, lhs->dtype.bits); const IdType* lhs_data = static_cast(lhs->data); const IdType* rhs_data = static_cast(rhs->data); IdType* ret_data = static_cast(ret->data); cudaStream_t stream = runtime::getCurrentCUDAStream(); int nt = cuda::FindNumThreads(len); int nb = (len + nt - 1) / nt; CUDA_KERNEL_CALL( (_BinaryElewiseKernel), nb, nt, 0, stream, lhs_data, rhs_data, ret_data, len); return ret; } template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template IdArray BinaryElewise( IdArray lhs, IdArray rhs); template __global__ void _BinaryElewiseKernel( const IdType* lhs, IdType rhs, IdType* out, int64_t length) { int tx = blockIdx.x * blockDim.x + threadIdx.x; int stride_x = gridDim.x * blockDim.x; while (tx < length) { out[tx] = Op::Call(lhs[tx], rhs); tx += stride_x; } } template IdArray BinaryElewise(IdArray lhs, IdType rhs) { const int64_t len = lhs->shape[0]; IdArray ret = NewIdArray(lhs->shape[0], lhs->ctx, lhs->dtype.bits); const IdType* lhs_data = static_cast(lhs->data); IdType* ret_data = static_cast(ret->data); cudaStream_t stream = runtime::getCurrentCUDAStream(); int nt = cuda::FindNumThreads(len); int nb = (len + nt - 1) / nt; CUDA_KERNEL_CALL( (_BinaryElewiseKernel), nb, nt, 0, stream, lhs_data, rhs, ret_data, len); return ret; } template IdArray BinaryElewise( IdArray lhs, int32_t rhs); template IdArray BinaryElewise( IdArray lhs, int32_t rhs); template IdArray BinaryElewise( IdArray lhs, int32_t rhs); template IdArray BinaryElewise( IdArray lhs, int32_t rhs); template IdArray BinaryElewise( IdArray lhs, int32_t rhs); template IdArray BinaryElewise( IdArray lhs, int32_t rhs); template IdArray BinaryElewise( IdArray lhs, int32_t rhs); template IdArray BinaryElewise( IdArray lhs, int32_t rhs); template IdArray BinaryElewise( IdArray lhs, int32_t rhs); template IdArray BinaryElewise( IdArray lhs, int32_t rhs); template IdArray BinaryElewise( IdArray lhs, int32_t rhs); template IdArray BinaryElewise( IdArray lhs, int64_t rhs); template IdArray BinaryElewise( IdArray lhs, int64_t rhs); template IdArray BinaryElewise( IdArray lhs, int64_t rhs); template IdArray BinaryElewise( IdArray lhs, int64_t rhs); template IdArray BinaryElewise( IdArray lhs, int64_t rhs); template IdArray BinaryElewise( IdArray lhs, int64_t rhs); template IdArray BinaryElewise( IdArray lhs, int64_t rhs); template IdArray BinaryElewise( IdArray lhs, int64_t rhs); template IdArray BinaryElewise( IdArray lhs, int64_t rhs); template IdArray BinaryElewise( IdArray lhs, int64_t rhs); template IdArray BinaryElewise( IdArray lhs, int64_t rhs); template __global__ void _BinaryElewiseKernel( IdType lhs, const IdType* rhs, IdType* out, int64_t length) { int tx = blockIdx.x * blockDim.x + threadIdx.x; int stride_x = gridDim.x * blockDim.x; while (tx < length) { out[tx] = Op::Call(lhs, rhs[tx]); tx += stride_x; } } template IdArray BinaryElewise(IdType lhs, IdArray rhs) { const int64_t len = rhs->shape[0]; IdArray ret = NewIdArray(rhs->shape[0], rhs->ctx, rhs->dtype.bits); const IdType* rhs_data = static_cast(rhs->data); IdType* ret_data = static_cast(ret->data); cudaStream_t stream = runtime::getCurrentCUDAStream(); int nt = cuda::FindNumThreads(len); int nb = (len + nt - 1) / nt; CUDA_KERNEL_CALL( (_BinaryElewiseKernel), nb, nt, 0, stream, lhs, rhs_data, ret_data, len); return ret; } template IdArray BinaryElewise( int32_t lhs, IdArray rhs); template IdArray BinaryElewise( int32_t lhs, IdArray rhs); template IdArray BinaryElewise( int32_t lhs, IdArray rhs); template IdArray BinaryElewise( int32_t lhs, IdArray rhs); template IdArray BinaryElewise( int32_t lhs, IdArray rhs); template IdArray BinaryElewise( int32_t lhs, IdArray rhs); template IdArray BinaryElewise( int32_t lhs, IdArray rhs); template IdArray BinaryElewise( int32_t lhs, IdArray rhs); template IdArray BinaryElewise( int32_t lhs, IdArray rhs); template IdArray BinaryElewise( int32_t lhs, IdArray rhs); template IdArray BinaryElewise( int32_t lhs, IdArray rhs); template IdArray BinaryElewise( int64_t lhs, IdArray rhs); template IdArray BinaryElewise( int64_t lhs, IdArray rhs); template IdArray BinaryElewise( int64_t lhs, IdArray rhs); template IdArray BinaryElewise( int64_t lhs, IdArray rhs); template IdArray BinaryElewise( int64_t lhs, IdArray rhs); template IdArray BinaryElewise( int64_t lhs, IdArray rhs); template IdArray BinaryElewise( int64_t lhs, IdArray rhs); template IdArray BinaryElewise( int64_t lhs, IdArray rhs); template IdArray BinaryElewise( int64_t lhs, IdArray rhs); template IdArray BinaryElewise( int64_t lhs, IdArray rhs); template IdArray BinaryElewise( int64_t lhs, IdArray rhs); template __global__ void _UnaryElewiseKernel( const IdType* lhs, IdType* out, int64_t length) { int tx = blockIdx.x * blockDim.x + threadIdx.x; int stride_x = gridDim.x * blockDim.x; while (tx < length) { out[tx] = Op::Call(lhs[tx]); tx += stride_x; } } template IdArray UnaryElewise(IdArray lhs) { const int64_t len = lhs->shape[0]; IdArray ret = NewIdArray(lhs->shape[0], lhs->ctx, lhs->dtype.bits); const IdType* lhs_data = static_cast(lhs->data); IdType* ret_data = static_cast(ret->data); cudaStream_t stream = runtime::getCurrentCUDAStream(); int nt = cuda::FindNumThreads(len); int nb = (len + nt - 1) / nt; CUDA_KERNEL_CALL( (_UnaryElewiseKernel), nb, nt, 0, stream, lhs_data, ret_data, len); return ret; } template IdArray UnaryElewise(IdArray lhs); template IdArray UnaryElewise(IdArray lhs); ///////////////////////////// Full ///////////////////////////// template __global__ void _FullKernel(DType* out, int64_t length, DType val) { int tx = blockIdx.x * blockDim.x + threadIdx.x; int stride_x = gridDim.x * blockDim.x; while (tx < length) { out[tx] = val; tx += stride_x; } } template NDArray Full(DType val, int64_t length, DGLContext ctx) { NDArray ret = NDArray::Empty({length}, DGLDataTypeTraits::dtype, ctx); DType* ret_data = static_cast(ret->data); cudaStream_t stream = runtime::getCurrentCUDAStream(); int nt = cuda::FindNumThreads(length); int nb = (length + nt - 1) / nt; CUDA_KERNEL_CALL( (_FullKernel), nb, nt, 0, stream, ret_data, length, val); return ret; } template IdArray Full( int32_t val, int64_t length, DGLContext ctx); template IdArray Full( int64_t val, int64_t length, DGLContext ctx); template IdArray Full( __half val, int64_t length, DGLContext ctx); #if BF16_ENABLED template IdArray Full( __nv_bfloat16 val, int64_t length, DGLContext ctx); #endif // BF16_ENABLED template IdArray Full( float val, int64_t length, DGLContext ctx); template IdArray Full( double val, int64_t length, DGLContext ctx); ///////////////////////////// Range ///////////////////////////// template __global__ void _RangeKernel(IdType* out, IdType low, IdType length) { int tx = blockIdx.x * blockDim.x + threadIdx.x; int stride_x = gridDim.x * blockDim.x; while (tx < length) { out[tx] = low + tx; tx += stride_x; } } template IdArray Range(IdType low, IdType high, DGLContext ctx) { CHECK(high >= low) << "high must be bigger than low"; const IdType length = high - low; IdArray ret = NewIdArray(length, ctx, sizeof(IdType) * 8); if (length == 0) return ret; IdType* ret_data = static_cast(ret->data); cudaStream_t stream = runtime::getCurrentCUDAStream(); int nt = cuda::FindNumThreads(length); int nb = (length + nt - 1) / nt; CUDA_KERNEL_CALL( (_RangeKernel), nb, nt, 0, stream, ret_data, low, length); return ret; } template IdArray Range(int32_t, int32_t, DGLContext); template IdArray Range(int64_t, int64_t, DGLContext); ///////////////////////////// Relabel_ ////////////////////////////// template __global__ void _RelabelKernel( IdType* out, int64_t length, DeviceOrderedHashTable table) { int tx = blockIdx.x * blockDim.x + threadIdx.x; int stride_x = gridDim.x * blockDim.x; while (tx < length) { out[tx] = table.Search(out[tx])->local; tx += stride_x; } } template IdArray Relabel_(const std::vector& arrays) { IdArray all_nodes = Concat(arrays); const int64_t total_length = all_nodes->shape[0]; if (total_length == 0) { return all_nodes; } const auto& ctx = arrays[0]->ctx; auto device = runtime::DeviceAPI::Get(ctx); cudaStream_t stream = runtime::getCurrentCUDAStream(); // build node maps and get the induced nodes OrderedHashTable node_map(total_length, ctx, stream); int64_t num_induced = 0; int64_t* num_induced_device = static_cast(device->AllocWorkspace(ctx, sizeof(int64_t))); IdArray induced_nodes = NewIdArray(total_length, ctx, sizeof(IdType) * 8); CUDA_CALL(cudaMemsetAsync( num_induced_device, 0, sizeof(*num_induced_device), stream)); node_map.FillWithDuplicates( all_nodes.Ptr(), all_nodes->shape[0], induced_nodes.Ptr(), num_induced_device, stream); // copy using the internal current stream device->CopyDataFromTo( num_induced_device, 0, &num_induced, 0, sizeof(num_induced), ctx, DGLContext{kDGLCPU, 0}, DGLDataType{kDGLInt, 64, 1}); device->StreamSync(ctx, stream); device->FreeWorkspace(ctx, num_induced_device); // resize the induced nodes induced_nodes->shape[0] = num_induced; // relabel const int nt = 128; for (IdArray arr : arrays) { const int64_t length = arr->shape[0]; int nb = (length + nt - 1) / nt; CUDA_KERNEL_CALL( (_RelabelKernel), nb, nt, 0, stream, arr.Ptr(), length, node_map.DeviceHandle()); } return induced_nodes; } template IdArray Relabel_( const std::vector& arrays); template IdArray Relabel_( const std::vector& arrays); ///////////////////////////// AsNumBits ///////////////////////////// template __global__ void _CastKernel(const InType* in, OutType* out, size_t length) { int tx = blockIdx.x * blockDim.x + threadIdx.x; int stride_x = gridDim.x * blockDim.x; while (tx < length) { out[tx] = in[tx]; tx += stride_x; } } template IdArray AsNumBits(IdArray arr, uint8_t bits) { const std::vector shape(arr->shape, arr->shape + arr->ndim); IdArray ret = IdArray::Empty(shape, DGLDataType{kDGLInt, bits, 1}, arr->ctx); const int64_t length = ret.NumElements(); cudaStream_t stream = runtime::getCurrentCUDAStream(); int nt = cuda::FindNumThreads(length); int nb = (length + nt - 1) / nt; if (bits == 32) { CUDA_KERNEL_CALL( (_CastKernel), nb, nt, 0, stream, static_cast(arr->data), static_cast(ret->data), length); } else { CUDA_KERNEL_CALL( (_CastKernel), nb, nt, 0, stream, static_cast(arr->data), static_cast(ret->data), length); } return ret; } template IdArray AsNumBits(IdArray arr, uint8_t bits); template IdArray AsNumBits(IdArray arr, uint8_t bits); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cuda/array_scatter.cu ================================================ /** * Copyright (c) 2019 by Contributors * @file array/cuda/array_scatter.cu * @brief Array scatter GPU implementation */ #include #include "../../runtime/cuda/cuda_common.h" #include "./utils.h" namespace dgl { using runtime::NDArray; namespace aten { namespace impl { template __global__ void _ScatterKernel( const IdType* index, const DType* value, int64_t length, DType* out) { int tx = blockIdx.x * blockDim.x + threadIdx.x; int stride_x = gridDim.x * blockDim.x; while (tx < length) { out[index[tx]] = value[tx]; tx += stride_x; } } template void Scatter_(IdArray index, NDArray value, NDArray out) { const int64_t len = index->shape[0]; const IdType* idx = index.Ptr(); const DType* val = value.Ptr(); DType* outd = out.Ptr(); cudaStream_t stream = runtime::getCurrentCUDAStream(); const int nt = cuda::FindNumThreads(len); const int nb = (len + nt - 1) / nt; CUDA_KERNEL_CALL(_ScatterKernel, nb, nt, 0, stream, idx, val, len, outd); } template void Scatter_(IdArray, NDArray, NDArray); template void Scatter_(IdArray, NDArray, NDArray); template void Scatter_(IdArray, NDArray, NDArray); #if BF16_ENABLED template void Scatter_( IdArray, NDArray, NDArray); #endif // BF16_ENABLED template void Scatter_(IdArray, NDArray, NDArray); template void Scatter_(IdArray, NDArray, NDArray); template void Scatter_(IdArray, NDArray, NDArray); template void Scatter_(IdArray, NDArray, NDArray); template void Scatter_(IdArray, NDArray, NDArray); #if BF16_ENABLED template void Scatter_( IdArray, NDArray, NDArray); #endif // BF16_ENABLED template void Scatter_(IdArray, NDArray, NDArray); template void Scatter_(IdArray, NDArray, NDArray); }; // namespace impl }; // namespace aten }; // namespace dgl ================================================ FILE: src/array/cuda/array_sort.cu ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cpu/array_sort.cu * @brief Array sort GPU implementation */ #include #include #include "../../runtime/cuda/cuda_common.h" #include "./utils.h" namespace dgl { using runtime::NDArray; namespace aten { namespace impl { template std::pair Sort(IdArray array, int num_bits) { const auto& ctx = array->ctx; auto device = runtime::DeviceAPI::Get(ctx); const int64_t nitems = array->shape[0]; IdArray orig_idx = Range(0, nitems, 64, ctx); IdArray sorted_array = NewIdArray(nitems, ctx, array->dtype.bits); IdArray sorted_idx = NewIdArray(nitems, ctx, 64); const IdType* keys_in = array.Ptr(); const int64_t* values_in = orig_idx.Ptr(); IdType* keys_out = sorted_array.Ptr(); int64_t* values_out = sorted_idx.Ptr(); cudaStream_t stream = runtime::getCurrentCUDAStream(); if (num_bits == 0) { num_bits = sizeof(IdType) * 8; } // Allocate workspace size_t workspace_size = 0; CUDA_CALL(cub::DeviceRadixSort::SortPairs( nullptr, workspace_size, keys_in, keys_out, values_in, values_out, nitems, 0, num_bits, stream)); void* workspace = device->AllocWorkspace(ctx, workspace_size); // Compute CUDA_CALL(cub::DeviceRadixSort::SortPairs( workspace, workspace_size, keys_in, keys_out, values_in, values_out, nitems, 0, num_bits, stream)); device->FreeWorkspace(ctx, workspace); return std::make_pair(sorted_array, sorted_idx); } template std::pair Sort( IdArray, int num_bits); template std::pair Sort( IdArray, int num_bits); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cuda/atomic.cuh ================================================ /** * Copyright (c) 2019 by Contributors * @file array/cuda/atomic.cuh * @brief Atomic functions */ #ifndef DGL_ARRAY_CUDA_ATOMIC_CUH_ #define DGL_ARRAY_CUDA_ATOMIC_CUH_ #include #include #include #include #include "bf16.cuh" #include "fp16.cuh" #if __CUDA_ARCH__ >= 600 #include #endif namespace dgl { namespace aten { namespace cuda { // Type trait for selecting code type template struct Code {}; template <> struct Code<2> { typedef unsigned short int Type; // NOLINT }; template <> struct Code<4> { typedef unsigned int Type; // NOLINT }; template <> struct Code<8> { typedef unsigned long long int Type; // NOLINT }; // Helper class for converting to/from atomicCAS compatible types. template struct Cast { typedef typename Code::Type Type; static __device__ __forceinline__ Type Encode(T val) { return static_cast(val); } static __device__ __forceinline__ T Decode(Type code) { return static_cast(code); } }; template <> struct Cast { typedef Code::Type Type; static __device__ __forceinline__ Type Encode(half val) { return __half_as_ushort(val); } static __device__ __forceinline__ half Decode(Type code) { return __ushort_as_half(code); } }; #if BF16_ENABLED template <> struct Cast<__nv_bfloat16> { typedef Code::Type Type; static __device__ __forceinline__ Type Encode(__nv_bfloat16 val) { #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 return __bfloat16_as_ushort(val); #else printf( "Atomic operations are not supported for bfloat16 (BF16) " "on GPUs with compute capability less than 8.0.\n"); __trap(); return static_cast(0); #endif } static __device__ __forceinline__ __nv_bfloat16 Decode(Type code) { #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 return __ushort_as_bfloat16(code); #else printf( "Atomic operations are not supported for bfloat16 (BF16) " "on GPUs with compute capability less than 8.0.\n"); __trap(); return static_cast<__nv_bfloat16>(0.0f); #endif } }; #endif // BF16_ENABLED template <> struct Cast { typedef Code::Type Type; static __device__ __forceinline__ Type Encode(float val) { return __float_as_uint(val); } static __device__ __forceinline__ float Decode(Type code) { return __uint_as_float(code); } }; template <> struct Cast { typedef Code::Type Type; static __device__ __forceinline__ Type Encode(double val) { return __double_as_longlong(val); } static __device__ __forceinline__ double Decode(Type code) { return __longlong_as_double(code); } }; static __device__ __forceinline__ unsigned short int atomicCASshort( // NOLINT unsigned short int* address, // NOLINT unsigned short int compare, // NOLINT unsigned short int val) { // NOLINT static_assert(CUDART_VERSION >= 10000, "Requires at least CUDA 10"); #if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__) >= 700) return atomicCAS(address, compare, val); #else (void)address; (void)compare; (void)val; printf( "Atomic operations are not supported for half precision (FP16) " "on this GPU.\n"); __trap(); return val; #endif // (defined(__CUDA_ARCH__) && (__CUDA_ARCH__) >= 700) } #define DEFINE_ATOMIC(NAME) \ template \ __device__ __forceinline__ T Atomic##NAME(T* addr, T val) { \ typedef typename Cast::Type CT; \ CT* addr_as_ui = reinterpret_cast(addr); \ CT old = *addr_as_ui; \ CT assumed = old; \ do { \ assumed = old; \ old = atomicCAS( \ addr_as_ui, assumed, \ Cast::Encode(OP(val, Cast::Decode(old)))); \ } while (assumed != old); \ return Cast::Decode(old); \ } #define DEFINE_ATOMIC_16BIT(NAME, dtype) \ template <> \ __device__ __forceinline__ dtype Atomic##NAME( \ dtype * addr, dtype val) { \ typedef uint16_t CT; \ CT* addr_as_ui = reinterpret_cast(addr); \ CT old = *addr_as_ui; \ CT assumed = old; \ do { \ assumed = old; \ old = atomicCASshort( \ addr_as_ui, assumed, \ Cast::Encode(OP(val, Cast::Decode(old)))); \ } while (assumed != old); \ return Cast::Decode(old); \ } #define OP(a, b) max(a, b) DEFINE_ATOMIC(Max) DEFINE_ATOMIC_16BIT(Max, half) #if BF16_ENABLED DEFINE_ATOMIC_16BIT(Max, __nv_bfloat16) #endif // BF16_ENABLED #undef OP #define OP(a, b) min(a, b) DEFINE_ATOMIC(Min) DEFINE_ATOMIC_16BIT(Min, half) #if BF16_ENABLED DEFINE_ATOMIC_16BIT(Min, __nv_bfloat16) #endif // BF16_ENABLED #undef OP #define OP(a, b) a + b DEFINE_ATOMIC(Add) #undef OP /** * @brief Performs an atomic compare-and-swap on 64 bit integers. That is, * it the word `old` at the memory location `address`, computes * `(old == compare ? val : old)` , and stores the result back to memory at * the same address. * * @param address The address to perform the atomic operation on. * @param compare The value to compare to. * @param val The new value to conditionally store. * * @return The old value at the address. */ inline __device__ int64_t AtomicCAS(int64_t* const address, const int64_t compare, const int64_t val) { // match the type of "::atomicCAS", so ignore lint warning using Type = unsigned long long int; // NOLINT static_assert(sizeof(Type) == sizeof(*address), "Type width must match"); return atomicCAS( reinterpret_cast(address), static_cast(compare), static_cast(val)); } /** * @brief Performs an atomic compare-and-swap on 32 bit integers. That is, * it the word `old` at the memory location `address`, computes * `(old == compare ? val : old)` , and stores the result back to memory at * the same address. * * @param address The address to perform the atomic operation on. * @param compare The value to compare to. * @param val The new value to conditionally store. * * @return The old value at the address. */ inline __device__ int32_t AtomicCAS(int32_t* const address, const int32_t compare, const int32_t val) { // match the type of "::atomicCAS", so ignore lint warning using Type = int; // NOLINT static_assert(sizeof(Type) == sizeof(*address), "Type width must match"); return atomicCAS( reinterpret_cast(address), static_cast(compare), static_cast(val)); } inline __device__ int64_t AtomicMax(int64_t* const address, const int64_t val) { // match the type of "::atomicCAS", so ignore lint warning using Type = unsigned long long int; // NOLINT static_assert(sizeof(Type) == sizeof(*address), "Type width must match"); return atomicMax(reinterpret_cast(address), static_cast(val)); } inline __device__ int32_t AtomicMax(int32_t* const address, const int32_t val) { // match the type of "::atomicCAS", so ignore lint warning using Type = int; // NOLINT static_assert(sizeof(Type) == sizeof(*address), "Type width must match"); return atomicMax(reinterpret_cast(address), static_cast(val)); } template <> __device__ __forceinline__ float AtomicAdd(float* addr, float val) { #if __CUDA_ARCH__ >= 200 return atomicAdd(addr, val); #else typedef float T; typedef typename Cast::Type CT; CT* addr_as_ui = reinterpret_cast(addr); CT old = *addr_as_ui; CT assumed = old; do { assumed = old; old = atomicCAS( addr_as_ui, assumed, Cast::Encode(Cast::Decode(old) + val)); } while (assumed != old); return Cast::Decode(old); #endif // __CUDA_ARCH__ } template <> __device__ __forceinline__ double AtomicAdd(double* addr, double val) { #if __CUDA_ARCH__ >= 600 return atomicAdd(addr, val); #else typedef double T; typedef typename Cast::Type CT; CT* addr_as_ui = reinterpret_cast(addr); CT old = *addr_as_ui; CT assumed = old; do { assumed = old; old = atomicCAS( addr_as_ui, assumed, Cast::Encode(Cast::Decode(old) + val)); } while (assumed != old); return Cast::Decode(old); #endif } #if defined(CUDART_VERSION) && CUDART_VERSION >= 10000 template <> __device__ __forceinline__ half AtomicAdd(half* addr, half val) { // make sure we have half support #if __CUDA_ARCH__ >= 700 return atomicAdd(addr, val); #else (void)addr; (void)val; printf( "Atomic operations are not supported for half precision (FP16) " "on this GPU.\n"); __trap(); return val; #endif // __CUDA_ARCH__ >= 700 } #endif // defined(CUDART_VERSION) && CUDART_VERSION >= 10000 #if BF16_ENABLED template <> __device__ __forceinline__ __nv_bfloat16 AtomicAdd<__nv_bfloat16>(__nv_bfloat16* addr, __nv_bfloat16 val) { // make sure we have bfloat16 support #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 return atomicAdd(addr, val); #else (void)addr; (void)val; printf( "Atomic operations are not supported for bfloat16 (BF16) " "on GPUs with compute capability less than 8.0.\n"); __trap(); return val; #endif // defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 } #endif // BF16_ENABLED } // namespace cuda } // namespace aten } // namespace dgl #endif // DGL_ARRAY_CUDA_ATOMIC_CUH_ ================================================ FILE: src/array/cuda/bf16.cuh ================================================ /** * Copyright (c) 2022 by Contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file array/cuda/bf16.cuh * @brief bfloat16 related functions. */ #ifndef DGL_ARRAY_CUDA_BF16_CUH_ #define DGL_ARRAY_CUDA_BF16_CUH_ #if BF16_ENABLED #include #include static __device__ __forceinline__ __nv_bfloat16 max(__nv_bfloat16 a, __nv_bfloat16 b) { #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 return __hmax(a, b); #else return __nv_bfloat16(max(float(a), float(b))); // NOLINT #endif } static __device__ __forceinline__ __nv_bfloat16 min(__nv_bfloat16 a, __nv_bfloat16 b) { #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 800 return __hmin(a, b); #else return __nv_bfloat16(min(float(a), float(b))); // NOLINT #endif } #ifdef __CUDACC__ // Arithmetic BF16 operations for architecture >= 8.0 are already defined in // cuda_bf16.h #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 800) // CUDA 12.2 adds "emulated" support for older architectures. #if defined(CUDART_VERSION) && (CUDART_VERSION < 12020) __device__ __forceinline__ __nv_bfloat16 operator+(const __nv_bfloat16& lh, const __nv_bfloat16& rh) { return __nv_bfloat16(float(lh) + float(rh)); // NOLINT } __device__ __forceinline__ __nv_bfloat16 operator-(const __nv_bfloat16& lh, const __nv_bfloat16& rh) { return __nv_bfloat16(float(lh) - float(rh)); // NOLINT } __device__ __forceinline__ __nv_bfloat16 operator*(const __nv_bfloat16& lh, const __nv_bfloat16& rh) { return __nv_bfloat16(float(lh) * float(rh)); // NOLINT } __device__ __forceinline__ __nv_bfloat16 operator/(const __nv_bfloat16& lh, const __nv_bfloat16& rh) { return __nv_bfloat16(float(lh) / float(rh)); // NOLINT } __device__ __forceinline__ __nv_bfloat16& operator+=( __nv_bfloat16& lh, const __nv_bfloat16& rh) { // NOLINT lh = __nv_bfloat16(float(lh) + float(rh)); // NOLINT return lh; } __device__ __forceinline__ __nv_bfloat16& operator-=( __nv_bfloat16& lh, const __nv_bfloat16& rh) { // NOLINT lh = __nv_bfloat16(float(lh) - float(rh)); // NOLINT return lh; } __device__ __forceinline__ __nv_bfloat16& operator*=( __nv_bfloat16& lh, const __nv_bfloat16& rh) { // NOLINT lh = __nv_bfloat16(float(lh) * float(rh)); // NOLINT return lh; } __device__ __forceinline__ __nv_bfloat16& operator/=( __nv_bfloat16& lh, const __nv_bfloat16& rh) { // NOLINT lh = __nv_bfloat16(float(lh) / float(rh)); // NOLINT return lh; } __device__ __forceinline__ __nv_bfloat16& operator++( __nv_bfloat16& h) { // NOLINT h = __nv_bfloat16(float(h) + 1.0f); // NOLINT return h; } __device__ __forceinline__ __nv_bfloat16& operator--( __nv_bfloat16& h) { // NOLINT h = __nv_bfloat16(float(h) - 1.0f); // NOLINT return h; } __device__ __forceinline__ __nv_bfloat16 operator++(__nv_bfloat16& h, int) { // NOLINT __nv_bfloat16 ret = h; h = __nv_bfloat16(float(h) + 1.0f); // NOLINT return ret; } __device__ __forceinline__ __nv_bfloat16 operator--(__nv_bfloat16& h, int) { // NOLINT __nv_bfloat16 ret = h; h = __nv_bfloat16(float(h) - 1.0f); // NOLINT return ret; } __device__ __forceinline__ __nv_bfloat16 operator+(const __nv_bfloat16& h) { return h; } __device__ __forceinline__ __nv_bfloat16 operator-(const __nv_bfloat16& h) { return __nv_bfloat16(-float(h)); // NOLINT } __device__ __forceinline__ bool operator==( const __nv_bfloat16& lh, const __nv_bfloat16& rh) { return float(lh) == float(rh); // NOLINT } __device__ __forceinline__ bool operator!=( const __nv_bfloat16& lh, const __nv_bfloat16& rh) { return float(lh) != float(rh); // NOLINT } __device__ __forceinline__ bool operator>( const __nv_bfloat16& lh, const __nv_bfloat16& rh) { return float(lh) > float(rh); // NOLINT } __device__ __forceinline__ bool operator<( const __nv_bfloat16& lh, const __nv_bfloat16& rh) { return float(lh) < float(rh); // NOLINT } __device__ __forceinline__ bool operator>=( const __nv_bfloat16& lh, const __nv_bfloat16& rh) { return float(lh) >= float(rh); // NOLINT } __device__ __forceinline__ bool operator<=( const __nv_bfloat16& lh, const __nv_bfloat16& rh) { return float(lh) <= float(rh); // NOLINT } #endif // defined(CUDART_VERSION) && (CUDART_VERSION < 12020) #endif // defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 800) #endif // __CUDACC__ #endif // BF16_ENABLED #endif // DGL_ARRAY_CUDA_BF16_CUH_ ================================================ FILE: src/array/cuda/coo2csr.cu ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cuda/coo2csr.cc * @brief COO2CSR */ #include #include "../../runtime/cuda/cuda_common.h" #include "./utils.h" namespace dgl { using runtime::NDArray; namespace aten { namespace impl { template CSRMatrix COOToCSR(COOMatrix coo) { LOG(FATAL) << "Unreachable code."; return {}; } template <> CSRMatrix COOToCSR(COOMatrix coo) { auto* thr_entry = runtime::CUDAThreadEntry::ThreadLocal(); cudaStream_t stream = runtime::getCurrentCUDAStream(); // allocate cusparse handle if needed if (!thr_entry->cusparse_handle) { CUSPARSE_CALL(cusparseCreate(&(thr_entry->cusparse_handle))); } CUSPARSE_CALL(cusparseSetStream(thr_entry->cusparse_handle, stream)); bool row_sorted = coo.row_sorted; bool col_sorted = coo.col_sorted; if (!row_sorted) { // we only need to sort the rows to perform conversion coo = COOSort(coo, false); col_sorted = coo.col_sorted; } const int64_t nnz = coo.row->shape[0]; CHECK_NO_OVERFLOW(coo.row->dtype, nnz); // TODO(minjie): Many of our current implementation assumes that CSR must have // a data array. This is a temporary workaround. Remove this after: // - The old immutable graph implementation is deprecated. // - The old binary reduce kernel is deprecated. if (!COOHasData(coo)) coo.data = aten::Range(0, nnz, coo.row->dtype.bits, coo.row->ctx); NDArray indptr = aten::NewIdArray(coo.num_rows + 1, coo.row->ctx, coo.row->dtype.bits); int32_t* indptr_ptr = static_cast(indptr->data); CUSPARSE_CALL(cusparseXcoo2csr( thr_entry->cusparse_handle, coo.row.Ptr(), nnz, coo.num_rows, indptr_ptr, CUSPARSE_INDEX_BASE_ZERO)); return CSRMatrix( coo.num_rows, coo.num_cols, indptr, coo.col, coo.data, col_sorted); } /** * @brief Search for the insertion positions for needle in the hay. * * The hay is a list of sorted elements and the result is the insertion position * of each needle so that the insertion still gives sorted order. * * It essentially perform binary search to find upper bound for each needle * elements. * * For example: * hay = [0, 0, 1, 2, 2] * needle = [0, 1, 2, 3] * then, * out = [2, 3, 5, 5] */ template __global__ void _SortedSearchKernelUpperBound( const IdType* hay, int64_t hay_size, const IdType* needles, int64_t num_needles, IdType* pos) { int tx = blockIdx.x * blockDim.x + threadIdx.x; const int stride_x = gridDim.x * blockDim.x; while (tx < num_needles) { const IdType ele = needles[tx]; // binary search IdType lo = 0, hi = hay_size; while (lo < hi) { IdType mid = (lo + hi) >> 1; if (hay[mid] <= ele) { lo = mid + 1; } else { hi = mid; } } pos[tx] = lo; tx += stride_x; } } template <> CSRMatrix COOToCSR(COOMatrix coo) { const auto& ctx = coo.row->ctx; const auto nbits = coo.row->dtype.bits; cudaStream_t stream = runtime::getCurrentCUDAStream(); bool row_sorted = coo.row_sorted; bool col_sorted = coo.col_sorted; if (!row_sorted) { coo = COOSort(coo, false); col_sorted = coo.col_sorted; } const int64_t nnz = coo.row->shape[0]; // TODO(minjie): Many of our current implementation assumes that CSR must have // a data array. This is a temporary workaround. Remove this after: // - The old immutable graph implementation is deprecated. // - The old binary reduce kernel is deprecated. if (!COOHasData(coo)) coo.data = aten::Range(0, nnz, coo.row->dtype.bits, coo.row->ctx); IdArray rowids = Range(0, coo.num_rows, nbits, ctx); const int nt = cuda::FindNumThreads(coo.num_rows); const int nb = (coo.num_rows + nt - 1) / nt; IdArray indptr = Full(0, coo.num_rows + 1, nbits, ctx); CUDA_KERNEL_CALL( _SortedSearchKernelUpperBound, nb, nt, 0, stream, coo.row.Ptr(), nnz, rowids.Ptr(), coo.num_rows, indptr.Ptr() + 1); return CSRMatrix( coo.num_rows, coo.num_cols, indptr, coo.col, coo.data, col_sorted); } template CSRMatrix COOToCSR(COOMatrix coo); template CSRMatrix COOToCSR(COOMatrix coo); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cuda/coo_sort.cu ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cuda/coo_sort.cc * @brief Sort COO index */ #include #include "../../c_api_common.h" #include "../../runtime/cuda/cuda_common.h" #include "./utils.h" namespace dgl { using runtime::NDArray; namespace aten { namespace impl { ///////////////////////////// COOSort_ ///////////////////////////// /** * @brief Encode row and column IDs into a single scalar per edge. * * @tparam IdType The type to encode as. * @param row The row (src) IDs per edge. * @param col The column (dst) IDs per edge. * @param nnz The number of edges. * @param col_bits The number of bits used to encode the destination. The row * information is packed into the remaining bits. * @param key The encoded edges (output). */ template __global__ void _COOEncodeEdgesKernel( const IdType* const row, const IdType* const col, const int64_t nnz, const int col_bits, IdType* const key) { int64_t tx = static_cast(blockIdx.x) * blockDim.x + threadIdx.x; if (tx < nnz) { key[tx] = row[tx] << col_bits | col[tx]; } } /** * @brief Decode row and column IDs from the encoded edges. * * @tparam IdType The type the edges are encoded as. * @param key The encoded edges. * @param nnz The number of edges. * @param col_bits The number of bits used to store the column/dst ID. * @param row The row (src) IDs per edge (output). * @param col The col (dst) IDs per edge (output). */ template __global__ void _COODecodeEdgesKernel( const IdType* const key, const int64_t nnz, const int col_bits, IdType* const row, IdType* const col) { int64_t tx = static_cast(blockIdx.x) * blockDim.x + threadIdx.x; if (tx < nnz) { const IdType k = key[tx]; row[tx] = k >> col_bits; col[tx] = k & ((1 << col_bits) - 1); } } template void COOSort_(COOMatrix* coo, bool sort_column) { cudaStream_t stream = runtime::getCurrentCUDAStream(); const int row_bits = cuda::_NumberOfBits(coo->num_rows); const int64_t nnz = coo->row->shape[0]; if (sort_column) { const int col_bits = cuda::_NumberOfBits(coo->num_cols); const int num_bits = row_bits + col_bits; const int nt = 256; const int nb = (nnz + nt - 1) / nt; CHECK(static_cast(nb) * nt >= nnz); IdArray pos = aten::NewIdArray(nnz, coo->row->ctx, coo->row->dtype.bits); CUDA_KERNEL_CALL( _COOEncodeEdgesKernel, nb, nt, 0, stream, coo->row.Ptr(), coo->col.Ptr(), nnz, col_bits, pos.Ptr()); auto sorted = Sort(pos, num_bits); CUDA_KERNEL_CALL( _COODecodeEdgesKernel, nb, nt, 0, stream, sorted.first.Ptr(), nnz, col_bits, coo->row.Ptr(), coo->col.Ptr()); if (aten::COOHasData(*coo)) coo->data = IndexSelect(coo->data, sorted.second); else coo->data = AsNumBits(sorted.second, coo->row->dtype.bits); coo->row_sorted = coo->col_sorted = true; } else { const int num_bits = row_bits; auto sorted = Sort(coo->row, num_bits); coo->row = sorted.first; coo->col = IndexSelect(coo->col, sorted.second); if (aten::COOHasData(*coo)) coo->data = IndexSelect(coo->data, sorted.second); else coo->data = AsNumBits(sorted.second, coo->row->dtype.bits); coo->row_sorted = true; } } template void COOSort_(COOMatrix* coo, bool sort_column); template void COOSort_(COOMatrix* coo, bool sort_column); ///////////////////////////// COOIsSorted ///////////////////////////// template __global__ void _COOIsSortedKernel( const IdType* row, const IdType* col, int64_t nnz, int8_t* row_sorted, int8_t* col_sorted) { int tx = blockIdx.x * blockDim.x + threadIdx.x; const int stride_x = gridDim.x * blockDim.x; while (tx < nnz) { if (tx == 0) { row_sorted[0] = 1; col_sorted[0] = 1; } else { row_sorted[tx] = static_cast(row[tx - 1] <= row[tx]); col_sorted[tx] = static_cast(row[tx - 1] < row[tx] || col[tx - 1] <= col[tx]); } tx += stride_x; } } template std::pair COOIsSorted(COOMatrix coo) { const int64_t nnz = coo.row->shape[0]; const auto& ctx = coo.row->ctx; cudaStream_t stream = runtime::getCurrentCUDAStream(); auto device = runtime::DeviceAPI::Get(ctx); // We allocate a workspace of 2*nnz bytes. It wastes a little bit memory but // should be fine. int8_t* row_flags = static_cast(device->AllocWorkspace(ctx, nnz)); int8_t* col_flags = static_cast(device->AllocWorkspace(ctx, nnz)); const int nt = cuda::FindNumThreads(nnz); const int nb = (nnz + nt - 1) / nt; CUDA_KERNEL_CALL( _COOIsSortedKernel, nb, nt, 0, stream, coo.row.Ptr(), coo.col.Ptr(), nnz, row_flags, col_flags); const bool row_sorted = cuda::AllTrue(row_flags, nnz, ctx); const bool col_sorted = row_sorted ? cuda::AllTrue(col_flags, nnz, ctx) : false; device->FreeWorkspace(ctx, row_flags); device->FreeWorkspace(ctx, col_flags); return {row_sorted, col_sorted}; } template std::pair COOIsSorted(COOMatrix coo); template std::pair COOIsSorted(COOMatrix coo); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cuda/csr2coo.cu ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cuda/csr2coo.cc * @brief CSR2COO */ #include #include #include #include #include #include "../../runtime/cuda/cuda_common.h" #include "./utils.h" namespace dgl { using runtime::NDArray; namespace aten { namespace impl { template COOMatrix CSRToCOO(CSRMatrix csr) { LOG(FATAL) << "Unreachable codes"; return {}; } template <> COOMatrix CSRToCOO(CSRMatrix csr) { auto* thr_entry = runtime::CUDAThreadEntry::ThreadLocal(); cudaStream_t stream = runtime::getCurrentCUDAStream(); // allocate cusparse handle if needed if (!thr_entry->cusparse_handle) { CUSPARSE_CALL(cusparseCreate(&(thr_entry->cusparse_handle))); } CUSPARSE_CALL(cusparseSetStream(thr_entry->cusparse_handle, stream)); NDArray indptr = csr.indptr, indices = csr.indices, data = csr.data; const int32_t* indptr_ptr = static_cast(indptr->data); NDArray row = aten::NewIdArray(indices->shape[0], indptr->ctx, indptr->dtype.bits); int32_t* row_ptr = static_cast(row->data); CUSPARSE_CALL(cusparseXcsr2coo( thr_entry->cusparse_handle, indptr_ptr, indices->shape[0], csr.num_rows, row_ptr, CUSPARSE_INDEX_BASE_ZERO)); return COOMatrix( csr.num_rows, csr.num_cols, row, indices, data, true, csr.sorted); } struct RepeatIndex { template __host__ __device__ auto operator()(IdType i) { return thrust::make_constant_iterator(i); } }; template struct OutputBufferIndexer { const IdType* indptr; IdType* buffer; __host__ __device__ auto operator()(IdType i) { return buffer + indptr[i]; } }; template struct AdjacentDifference { const IdType* indptr; __host__ __device__ auto operator()(IdType i) { return indptr[i + 1] - indptr[i]; } }; template <> COOMatrix CSRToCOO(CSRMatrix csr) { const auto& ctx = csr.indptr->ctx; cudaStream_t stream = runtime::getCurrentCUDAStream(); const int64_t nnz = csr.indices->shape[0]; const auto nbits = csr.indptr->dtype.bits; IdArray ret_row = NewIdArray(nnz, ctx, nbits); runtime::CUDAWorkspaceAllocator allocator(csr.indptr->ctx); thrust::counting_iterator iota(0); auto input_buffer = thrust::make_transform_iterator(iota, RepeatIndex{}); auto output_buffer = thrust::make_transform_iterator( iota, OutputBufferIndexer{ csr.indptr.Ptr(), ret_row.Ptr()}); auto buffer_sizes = thrust::make_transform_iterator( iota, AdjacentDifference{csr.indptr.Ptr()}); constexpr int64_t max_copy_at_once = std::numeric_limits::max(); for (int64_t i = 0; i < csr.num_rows; i += max_copy_at_once) { std::size_t temp_storage_bytes = 0; CUDA_CALL(cub::DeviceCopy::Batched( nullptr, temp_storage_bytes, input_buffer + i, output_buffer + i, buffer_sizes + i, std::min(csr.num_rows - i, max_copy_at_once), stream)); auto temp = allocator.alloc_unique(temp_storage_bytes); CUDA_CALL(cub::DeviceCopy::Batched( temp.get(), temp_storage_bytes, input_buffer + i, output_buffer + i, buffer_sizes + i, std::min(csr.num_rows - i, max_copy_at_once), stream)); } return COOMatrix( csr.num_rows, csr.num_cols, ret_row, csr.indices, csr.data, true, csr.sorted); } template COOMatrix CSRToCOO(CSRMatrix csr); template COOMatrix CSRToCOO(CSRMatrix csr); template COOMatrix CSRToCOODataAsOrder(CSRMatrix csr) { LOG(FATAL) << "Unreachable codes"; return {}; } template <> COOMatrix CSRToCOODataAsOrder(CSRMatrix csr) { COOMatrix coo = CSRToCOO(csr); if (aten::IsNullArray(coo.data)) return coo; auto* thr_entry = runtime::CUDAThreadEntry::ThreadLocal(); auto device = runtime::DeviceAPI::Get(coo.row->ctx); cudaStream_t stream = runtime::getCurrentCUDAStream(); // allocate cusparse handle if needed if (!thr_entry->cusparse_handle) { CUSPARSE_CALL(cusparseCreate(&(thr_entry->cusparse_handle))); } CUSPARSE_CALL(cusparseSetStream(thr_entry->cusparse_handle, stream)); NDArray row = coo.row, col = coo.col, data = coo.data; int32_t* row_ptr = static_cast(row->data); int32_t* col_ptr = static_cast(col->data); int32_t* data_ptr = static_cast(data->data); size_t workspace_size = 0; CUSPARSE_CALL(cusparseXcoosort_bufferSizeExt( thr_entry->cusparse_handle, coo.num_rows, coo.num_cols, row->shape[0], data_ptr, row_ptr, &workspace_size)); void* workspace = device->AllocWorkspace(row->ctx, workspace_size); CUSPARSE_CALL(cusparseXcoosortByRow( thr_entry->cusparse_handle, coo.num_rows, coo.num_cols, row->shape[0], data_ptr, row_ptr, col_ptr, workspace)); device->FreeWorkspace(row->ctx, workspace); // The row and column field have already been reordered according // to data, thus the data field will be deprecated. coo.data = aten::NullArray(); coo.row_sorted = false; coo.col_sorted = false; return coo; } template <> COOMatrix CSRToCOODataAsOrder(CSRMatrix csr) { COOMatrix coo = CSRToCOO(csr); if (aten::IsNullArray(coo.data)) return coo; const auto& sorted = Sort(coo.data); coo.row = IndexSelect(coo.row, sorted.second); coo.col = IndexSelect(coo.col, sorted.second); // The row and column field have already been reordered according // to data, thus the data field will be deprecated. coo.data = aten::NullArray(); coo.row_sorted = false; coo.col_sorted = false; return coo; } template COOMatrix CSRToCOODataAsOrder(CSRMatrix csr); template COOMatrix CSRToCOODataAsOrder(CSRMatrix csr); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cuda/csr_get_data.cu ================================================ /** * Copyright (c) 2021 by Contributors * @file array/cuda/csr_get_data.cu * @brief Retrieve entries of a CSR matrix */ #include #include #include #include #include "../../runtime/cuda/cuda_common.h" #include "./utils.h" namespace dgl { using runtime::NDArray; namespace aten { namespace impl { template NDArray CSRGetData( CSRMatrix csr, NDArray rows, NDArray cols, bool return_eids, NDArray weights, DType filler) { const int64_t rowlen = rows->shape[0]; const int64_t collen = cols->shape[0]; CHECK((rowlen == collen) || (rowlen == 1) || (collen == 1)) << "Invalid row and col id array."; const int64_t row_stride = (rowlen == 1 && collen != 1) ? 0 : 1; const int64_t col_stride = (collen == 1 && rowlen != 1) ? 0 : 1; const int64_t rstlen = std::max(rowlen, collen); IdArray rst = NDArray::Empty({rstlen}, weights->dtype, rows->ctx); if (rstlen == 0) return rst; cudaStream_t stream = runtime::getCurrentCUDAStream(); const int nt = cuda::FindNumThreads(rstlen); const int nb = (rstlen + nt - 1) / nt; if (return_eids) BUG_IF_FAIL(DGLDataTypeTraits::dtype == rows->dtype) << "DType does not match row's dtype."; const IdType* indptr_data = static_cast(cuda::GetDevicePointer(csr.indptr)); const IdType* indices_data = static_cast(cuda::GetDevicePointer(csr.indices)); const IdType* data_data = CSRHasData(csr) ? static_cast(cuda::GetDevicePointer(csr.data)) : nullptr; // TODO(minjie): use binary search for sorted csr CUDA_KERNEL_CALL( cuda::_LinearSearchKernel, nb, nt, 0, stream, indptr_data, indices_data, data_data, rows.Ptr(), cols.Ptr(), row_stride, col_stride, rstlen, return_eids ? nullptr : weights.Ptr(), filler, rst.Ptr()); return rst; } template NDArray CSRGetData( CSRMatrix csr, NDArray rows, NDArray cols, bool return_eids, NDArray weights, __half filler); template NDArray CSRGetData( CSRMatrix csr, NDArray rows, NDArray cols, bool return_eids, NDArray weights, __half filler); #if BF16_ENABLED template NDArray CSRGetData( CSRMatrix csr, NDArray rows, NDArray cols, bool return_eids, NDArray weights, __nv_bfloat16 filler); template NDArray CSRGetData( CSRMatrix csr, NDArray rows, NDArray cols, bool return_eids, NDArray weights, __nv_bfloat16 filler); #endif // BF16_ENABLED template NDArray CSRGetData( CSRMatrix csr, NDArray rows, NDArray cols, bool return_eids, NDArray weights, float filler); template NDArray CSRGetData( CSRMatrix csr, NDArray rows, NDArray cols, bool return_eids, NDArray weights, float filler); template NDArray CSRGetData( CSRMatrix csr, NDArray rows, NDArray cols, bool return_eids, NDArray weights, double filler); template NDArray CSRGetData( CSRMatrix csr, NDArray rows, NDArray cols, bool return_eids, NDArray weights, double filler); // For CSRGetData(CSRMatrix, NDArray, NDArray) template NDArray CSRGetData( CSRMatrix csr, NDArray rows, NDArray cols, bool return_eids, NDArray weights, int32_t filler); template NDArray CSRGetData( CSRMatrix csr, NDArray rows, NDArray cols, bool return_eids, NDArray weights, int64_t filler); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cuda/csr_mm.cu ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cuda/csr_mm.cu * @brief SpSpMM/SpGEMM C APIs and definitions. */ #include #include #include #include "../../runtime/cuda/cuda_common.h" #include "./cusparse_dispatcher.cuh" #include "./functor.cuh" namespace dgl { using namespace dgl::runtime; namespace aten { namespace cusparse { #if CUDART_VERSION >= 12000 /** @brief Cusparse implementation of SpGEMM on Csr format for CUDA 12.0+ */ template std::pair CusparseSpgemm( const CSRMatrix& A, const NDArray A_weights_array, const CSRMatrix& B, const NDArray B_weights_array) { // We use Spgemm (SpSpMM) to perform following operation: // C = A x B, where A, B and C are sparse matrices in csr format. const int nnzA = A.indices->shape[0]; const int nnzB = B.indices->shape[0]; const DType alpha = 1.0; const DType beta = 0.0; auto transA = CUSPARSE_OPERATION_NON_TRANSPOSE; auto transB = CUSPARSE_OPERATION_NON_TRANSPOSE; // device auto ctx = A.indptr->ctx; auto device = runtime::DeviceAPI::Get(ctx); auto* thr_entry = runtime::CUDAThreadEntry::ThreadLocal(); cudaStream_t stream = runtime::getCurrentCUDAStream(); const DType* A_weights = A_weights_array.Ptr(); const DType* B_weights = B_weights_array.Ptr(); // allocate cusparse handle if needed if (!thr_entry->cusparse_handle) { CUSPARSE_CALL(cusparseCreate(&(thr_entry->cusparse_handle))); } CUSPARSE_CALL(cusparseSetStream(thr_entry->cusparse_handle, stream)); // all one data array cusparseSpMatDescr_t matA, matB, matC; IdArray dC_csrOffsets = IdArray::Empty({A.num_rows + 1}, A.indptr->dtype, A.indptr->ctx); IdType* dC_csrOffsets_data = dC_csrOffsets.Ptr(); constexpr auto idtype = cusparse_idtype::value; constexpr auto dtype = cuda_dtype::value; // Create sparse matrix A, B and C in CSR format CUSPARSE_CALL(cusparseCreateCsr( &matA, A.num_rows, A.num_cols, nnzA, A.indptr.Ptr(), A.indices.Ptr(), // cusparseCreateCsr only accepts non-const pointers. const_cast(A_weights), idtype, idtype, CUSPARSE_INDEX_BASE_ZERO, dtype)); CUSPARSE_CALL(cusparseCreateCsr( &matB, B.num_rows, B.num_cols, nnzB, B.indptr.Ptr(), B.indices.Ptr(), // cusparseCreateCsr only accepts non-const pointers. const_cast(B_weights), idtype, idtype, CUSPARSE_INDEX_BASE_ZERO, dtype)); CUSPARSE_CALL(cusparseCreateCsr( &matC, A.num_rows, B.num_cols, 0, dC_csrOffsets_data, nullptr, nullptr, idtype, idtype, CUSPARSE_INDEX_BASE_ZERO, dtype)); // SpGEMM Computation cusparseSpGEMMDescr_t spgemmDesc; cusparseSpGEMMAlg_t alg = CUSPARSE_SPGEMM_DEFAULT; CUSPARSE_CALL(cusparseSpGEMM_createDescr(&spgemmDesc)); size_t workspace_size1 = 0, workspace_size2 = 0, workspace_size3 = 0; // ask bufferSize1 bytes for external memory CUSPARSE_CALL(cusparseSpGEMM_workEstimation( thr_entry->cusparse_handle, transA, transB, &alpha, matA, matB, &beta, matC, dtype, alg, spgemmDesc, &workspace_size1, NULL)); void* workspace1 = (device->AllocWorkspace(ctx, workspace_size1)); // inspect the matrices A and B to understand the memory requiremnent cusparseStatus_t e = cusparseSpGEMM_workEstimation( thr_entry->cusparse_handle, transA, transB, &alpha, matA, matB, &beta, matC, dtype, alg, spgemmDesc, &workspace_size1, workspace1); // CUSPARSE_SPGEMM_DEFAULT not support getting num_prods > 2^31 -1 // and throws insufficient memory error within workEstimation call if (e == CUSPARSE_STATUS_INSUFFICIENT_RESOURCES) { // fall back to ALG2 to estimate num_prods alg = CUSPARSE_SPGEMM_ALG2; device->FreeWorkspace(ctx, workspace1); // rerun cusparseSpGEMM_workEstimation CUSPARSE_CALL(cusparseSpGEMM_workEstimation( thr_entry->cusparse_handle, transA, transB, &alpha, matA, matB, &beta, matC, dtype, alg, spgemmDesc, &workspace_size1, NULL)); workspace1 = (device->AllocWorkspace(ctx, workspace_size1)); CUSPARSE_CALL(cusparseSpGEMM_workEstimation( thr_entry->cusparse_handle, transA, transB, &alpha, matA, matB, &beta, matC, dtype, alg, spgemmDesc, &workspace_size1, workspace1)); } else { CHECK(e == CUSPARSE_STATUS_SUCCESS) << "CUSPARSE ERROR in SpGEMM: " << e; } // get the number of intermediate products required for SpGEMM compute // num_prods indicates device memory consumption for SpGEMM if using ALG2/3 int64_t num_prods; CUSPARSE_CALL(cusparseSpGEMM_getNumProducts(spgemmDesc, &num_prods)); // assume free GPU mem at least ~15G for below heuristics to work // user-defined medium problem size (below will use DEFAULT) int64_t MEDIUM_NUM_PRODUCTS = 400000000; // 400*1000*1000; // user-defined large problem size (above will use ALG3) int64_t LARGE_NUM_PRODUCTS = 800000000; // 800*1000*1000; // switch to ALG2/ALG3 for medium & large problem size if (alg == CUSPARSE_SPGEMM_DEFAULT && num_prods > MEDIUM_NUM_PRODUCTS) { // use ALG3 for very large problem alg = num_prods > LARGE_NUM_PRODUCTS ? CUSPARSE_SPGEMM_ALG3 : CUSPARSE_SPGEMM_ALG2; device->FreeWorkspace(ctx, workspace1); // rerun cusparseSpGEMM_workEstimation CUSPARSE_CALL(cusparseSpGEMM_workEstimation( thr_entry->cusparse_handle, transA, transB, &alpha, matA, matB, &beta, matC, dtype, alg, spgemmDesc, &workspace_size1, NULL)); workspace1 = (device->AllocWorkspace(ctx, workspace_size1)); CUSPARSE_CALL(cusparseSpGEMM_workEstimation( thr_entry->cusparse_handle, transA, transB, &alpha, matA, matB, &beta, matC, dtype, alg, spgemmDesc, &workspace_size1, workspace1)); } else if (alg == CUSPARSE_SPGEMM_ALG2 && num_prods > LARGE_NUM_PRODUCTS) { // no need to rerun cusparseSpGEMM_workEstimation between ALG2 and ALG3 alg = CUSPARSE_SPGEMM_ALG3; } if (alg == CUSPARSE_SPGEMM_ALG2 || alg == CUSPARSE_SPGEMM_ALG3) { // estimate memory for ALG2/ALG3; note chunk_fraction is only used by ALG3 // reduce chunk_fraction if crash due to mem., but it trades off speed float chunk_fraction = num_prods < 4 * LARGE_NUM_PRODUCTS ? 0.15 : 0.05; CUSPARSE_CALL(cusparseSpGEMM_estimateMemory( thr_entry->cusparse_handle, transA, transB, &alpha, matA, matB, &beta, matC, dtype, alg, spgemmDesc, chunk_fraction, &workspace_size3, NULL, NULL)); void* workspace3 = (device->AllocWorkspace(ctx, workspace_size3)); CUSPARSE_CALL(cusparseSpGEMM_estimateMemory( thr_entry->cusparse_handle, transA, transB, &alpha, matA, matB, &beta, matC, dtype, alg, spgemmDesc, chunk_fraction, &workspace_size3, workspace3, &workspace_size2)); device->FreeWorkspace(ctx, workspace3); } else { CUSPARSE_CALL(cusparseSpGEMM_compute( thr_entry->cusparse_handle, transA, transB, &alpha, matA, matB, &beta, matC, dtype, alg, spgemmDesc, &workspace_size2, NULL)); } // ask bufferSize2 bytes for external memory void* workspace2 = device->AllocWorkspace(ctx, workspace_size2); // compute the intermediate product of A * B CUSPARSE_CALL(cusparseSpGEMM_compute( thr_entry->cusparse_handle, transA, transB, &alpha, matA, matB, &beta, matC, dtype, alg, spgemmDesc, &workspace_size2, workspace2)); // get matrix C non-zero entries C_nnz1 int64_t C_num_rows1, C_num_cols1, C_nnz1; CUSPARSE_CALL( cusparseSpMatGetSize(matC, &C_num_rows1, &C_num_cols1, &C_nnz1)); IdArray dC_columns = IdArray::Empty({C_nnz1}, A.indptr->dtype, A.indptr->ctx); NDArray dC_weights = NDArray::Empty({C_nnz1}, A_weights_array->dtype, A.indptr->ctx); IdType* dC_columns_data = dC_columns.Ptr(); DType* dC_weights_data = dC_weights.Ptr(); // update matC with the new pointers CUSPARSE_CALL(cusparseCsrSetPointers( matC, dC_csrOffsets_data, dC_columns_data, dC_weights_data)); // copy the final products to the matrix C CUSPARSE_CALL(cusparseSpGEMM_copy( thr_entry->cusparse_handle, transA, transB, &alpha, matA, matB, &beta, matC, dtype, alg, spgemmDesc)); device->FreeWorkspace(ctx, workspace1); device->FreeWorkspace(ctx, workspace2); // destroy matrix/vector descriptors CUSPARSE_CALL(cusparseSpGEMM_destroyDescr(spgemmDesc)); CUSPARSE_CALL(cusparseDestroySpMat(matA)); CUSPARSE_CALL(cusparseDestroySpMat(matB)); CUSPARSE_CALL(cusparseDestroySpMat(matC)); return { CSRMatrix( A.num_rows, B.num_cols, dC_csrOffsets, dC_columns, NullArray(dC_csrOffsets->dtype, dC_csrOffsets->ctx)), dC_weights}; } #else // CUDART_VERSION < 12000 /** @brief Cusparse implementation of SpGEMM on Csr format for older CUDA * versions */ template std::pair CusparseSpgemm( const CSRMatrix& A, const NDArray A_weights_array, const CSRMatrix& B, const NDArray B_weights_array) { int nnzC; csrgemm2Info_t info = nullptr; size_t workspace_size; const DType alpha = 1.; const int nnzA = A.indices->shape[0]; const int nnzB = B.indices->shape[0]; const int m = A.num_rows; const int n = A.num_cols; const int k = B.num_cols; auto ctx = A.indptr->ctx; auto device = runtime::DeviceAPI::Get(ctx); auto* thr_entry = runtime::CUDAThreadEntry::ThreadLocal(); cudaStream_t stream = runtime::getCurrentCUDAStream(); auto idtype = A.indptr->dtype; auto dtype = A_weights_array->dtype; const DType* A_weights = A_weights_array.Ptr(); const DType* B_weights = B_weights_array.Ptr(); if (!thr_entry->cusparse_handle) { CUSPARSE_CALL(cusparseCreate(&(thr_entry->cusparse_handle))); } CUSPARSE_CALL(cusparseSetStream(thr_entry->cusparse_handle, stream)); CUSPARSE_CALL(cusparseSetPointerMode( thr_entry->cusparse_handle, CUSPARSE_POINTER_MODE_HOST)); CUSPARSE_CALL(cusparseCreateCsrgemm2Info(&info)); cusparseMatDescr_t matA, matB, matC, matD; CUSPARSE_CALL(cusparseCreateMatDescr(&matA)); CUSPARSE_CALL(cusparseCreateMatDescr(&matB)); CUSPARSE_CALL(cusparseCreateMatDescr(&matC)); CUSPARSE_CALL(cusparseCreateMatDescr(&matD)); // needed even if D is null CUSPARSE_CALL(CSRGEMM::bufferSizeExt( thr_entry->cusparse_handle, m, n, k, &alpha, matA, nnzA, A.indptr.Ptr(), A.indices.Ptr(), matB, nnzB, B.indptr.Ptr(), B.indices.Ptr(), nullptr, matD, 0, nullptr, nullptr, info, &workspace_size)); void* workspace = device->AllocWorkspace(ctx, workspace_size); IdArray C_indptr = IdArray::Empty({m + 1}, idtype, ctx); CUSPARSE_CALL(CSRGEMM::nnz( thr_entry->cusparse_handle, m, n, k, matA, nnzA, A.indptr.Ptr(), A.indices.Ptr(), matB, nnzB, B.indptr.Ptr(), B.indices.Ptr(), matD, 0, nullptr, nullptr, matC, C_indptr.Ptr(), &nnzC, info, workspace)); IdArray C_indices = IdArray::Empty({nnzC}, idtype, ctx); NDArray C_weights = NDArray::Empty({nnzC}, dtype, ctx); CUSPARSE_CALL(CSRGEMM::compute( thr_entry->cusparse_handle, m, n, k, &alpha, matA, nnzA, A_weights, A.indptr.Ptr(), A.indices.Ptr(), matB, nnzB, B_weights, B.indptr.Ptr(), B.indices.Ptr(), nullptr, matD, 0, nullptr, nullptr, nullptr, matC, C_weights.Ptr(), C_indptr.Ptr(), C_indices.Ptr(), info, workspace)); device->FreeWorkspace(ctx, workspace); CUSPARSE_CALL(cusparseDestroyCsrgemm2Info(info)); CUSPARSE_CALL(cusparseDestroyMatDescr(matA)); CUSPARSE_CALL(cusparseDestroyMatDescr(matB)); CUSPARSE_CALL(cusparseDestroyMatDescr(matC)); CUSPARSE_CALL(cusparseDestroyMatDescr(matD)); return { CSRMatrix( m, k, C_indptr, C_indices, NullArray(C_indptr->dtype, C_indptr->ctx)), C_weights}; } #endif // CUDART_VERSION >= 12000 } // namespace cusparse template std::pair CSRMM( const CSRMatrix& A, NDArray A_weights, const CSRMatrix& B, NDArray B_weights) { auto ctx = A.indptr->ctx; auto device = runtime::DeviceAPI::Get(ctx); CSRMatrix newA, newB; bool cast = false; // Cast 64 bit indices to 32 bit. if (A.indptr->dtype.bits == 64) { newA = CSRMatrix( A.num_rows, A.num_cols, AsNumBits(A.indptr, 32), AsNumBits(A.indices, 32), AsNumBits(A.data, 32)); newB = CSRMatrix( B.num_rows, B.num_cols, AsNumBits(B.indptr, 32), AsNumBits(B.indices, 32), AsNumBits(B.data, 32)); cast = true; } // Reorder weights if A or B has edge IDs NDArray newA_weights, newB_weights; if (CSRHasData(A)) newA_weights = IndexSelect(A_weights, A.data); if (CSRHasData(B)) newB_weights = IndexSelect(B_weights, B.data); auto result = cusparse::CusparseSpgemm( cast ? newA : A, CSRHasData(A) ? newA_weights : A_weights, cast ? newB : B, CSRHasData(B) ? newB_weights : B_weights); // Cast 32 bit indices back to 64 bit if necessary if (cast) { CSRMatrix C = result.first; return { CSRMatrix( C.num_rows, C.num_cols, AsNumBits(C.indptr, 64), AsNumBits(C.indices, 64), AsNumBits(C.data, 64)), result.second}; } else { return result; } } template std::pair CSRMM( const CSRMatrix&, NDArray, const CSRMatrix&, NDArray); template std::pair CSRMM( const CSRMatrix&, NDArray, const CSRMatrix&, NDArray); #if BF16_ENABLED template std::pair CSRMM( const CSRMatrix&, NDArray, const CSRMatrix&, NDArray); template std::pair CSRMM( const CSRMatrix&, NDArray, const CSRMatrix&, NDArray); #endif // BF16_ENABLED template std::pair CSRMM( const CSRMatrix&, NDArray, const CSRMatrix&, NDArray); template std::pair CSRMM( const CSRMatrix&, NDArray, const CSRMatrix&, NDArray); template std::pair CSRMM( const CSRMatrix&, NDArray, const CSRMatrix&, NDArray); template std::pair CSRMM( const CSRMatrix&, NDArray, const CSRMatrix&, NDArray); } // namespace aten } // namespace dgl ================================================ FILE: src/array/cuda/csr_sort.cu ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cuda/csr_sort.cc * @brief Sort CSR index */ #include #include #include "../../runtime/cuda/cuda_common.h" #include "./utils.h" namespace dgl { using runtime::NDArray; namespace aten { namespace impl { /** * @brief Check whether each row is sorted. */ template __global__ void _SegmentIsSorted( const IdType* indptr, const IdType* indices, int64_t num_rows, int8_t* flags) { int tx = blockIdx.x * blockDim.x + threadIdx.x; const int stride_x = gridDim.x * blockDim.x; while (tx < num_rows) { bool f = true; for (IdType i = indptr[tx] + 1; f && i < indptr[tx + 1]; ++i) { f = (indices[i - 1] <= indices[i]); } flags[tx] = static_cast(f); tx += stride_x; } } template bool CSRIsSorted(CSRMatrix csr) { const auto& ctx = csr.indptr->ctx; cudaStream_t stream = runtime::getCurrentCUDAStream(); auto device = runtime::DeviceAPI::Get(ctx); // We allocate a workspace of num_rows bytes. It wastes a little bit memory // but should be fine. int8_t* flags = static_cast(device->AllocWorkspace(ctx, csr.num_rows)); const int nt = cuda::FindNumThreads(csr.num_rows); const int nb = (csr.num_rows + nt - 1) / nt; CUDA_KERNEL_CALL( _SegmentIsSorted, nb, nt, 0, stream, csr.indptr.Ptr(), csr.indices.Ptr(), csr.num_rows, flags); bool ret = cuda::AllTrue(flags, csr.num_rows, ctx); device->FreeWorkspace(ctx, flags); return ret; } template bool CSRIsSorted(CSRMatrix csr); template bool CSRIsSorted(CSRMatrix csr); template void CSRSort_(CSRMatrix* csr) { LOG(FATAL) << "Unreachable codes"; } template <> void CSRSort_(CSRMatrix* csr) { auto* thr_entry = runtime::CUDAThreadEntry::ThreadLocal(); auto device = runtime::DeviceAPI::Get(csr->indptr->ctx); cudaStream_t stream = runtime::getCurrentCUDAStream(); // allocate cusparse handle if needed if (!thr_entry->cusparse_handle) { CUSPARSE_CALL(cusparseCreate(&(thr_entry->cusparse_handle))); } CUSPARSE_CALL(cusparseSetStream(thr_entry->cusparse_handle, stream)); NDArray indptr = csr->indptr; NDArray indices = csr->indices; const auto& ctx = indptr->ctx; const int64_t nnz = indices->shape[0]; if (!aten::CSRHasData(*csr)) csr->data = aten::Range(0, nnz, indices->dtype.bits, ctx); NDArray data = csr->data; size_t workspace_size = 0; CUSPARSE_CALL(cusparseXcsrsort_bufferSizeExt( thr_entry->cusparse_handle, csr->num_rows, csr->num_cols, nnz, indptr.Ptr(), indices.Ptr(), &workspace_size)); void* workspace = device->AllocWorkspace(ctx, workspace_size); cusparseMatDescr_t descr; CUSPARSE_CALL(cusparseCreateMatDescr(&descr)); CUSPARSE_CALL(cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL)); CUSPARSE_CALL(cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO)); CUSPARSE_CALL(cusparseXcsrsort( thr_entry->cusparse_handle, csr->num_rows, csr->num_cols, nnz, descr, indptr.Ptr(), indices.Ptr(), data.Ptr(), workspace)); csr->sorted = true; // free resources CUSPARSE_CALL(cusparseDestroyMatDescr(descr)); device->FreeWorkspace(ctx, workspace); } template <> void CSRSort_(CSRMatrix* csr) { cudaStream_t stream = runtime::getCurrentCUDAStream(); auto device = runtime::DeviceAPI::Get(csr->indptr->ctx); const auto& ctx = csr->indptr->ctx; const int64_t nnz = csr->indices->shape[0]; const auto nbits = csr->indptr->dtype.bits; if (!aten::CSRHasData(*csr)) csr->data = aten::Range(0, nnz, nbits, ctx); IdArray new_indices = csr->indices.Clone(); IdArray new_data = csr->data.Clone(); const int64_t* offsets = csr->indptr.Ptr(); const int64_t* key_in = csr->indices.Ptr(); int64_t* key_out = new_indices.Ptr(); const int64_t* value_in = csr->data.Ptr(); int64_t* value_out = new_data.Ptr(); // Allocate workspace size_t workspace_size = 0; CUDA_CALL(cub::DeviceSegmentedRadixSort::SortPairs( nullptr, workspace_size, key_in, key_out, value_in, value_out, nnz, csr->num_rows, offsets, offsets + 1, 0, sizeof(int64_t) * 8, stream)); void* workspace = device->AllocWorkspace(ctx, workspace_size); // Compute CUDA_CALL(cub::DeviceSegmentedRadixSort::SortPairs( workspace, workspace_size, key_in, key_out, value_in, value_out, nnz, csr->num_rows, offsets, offsets + 1, 0, sizeof(int64_t) * 8, stream)); csr->sorted = true; csr->indices = new_indices; csr->data = new_data; // free resources device->FreeWorkspace(ctx, workspace); } template void CSRSort_(CSRMatrix* csr); template void CSRSort_(CSRMatrix* csr); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cuda/csr_sum.cu ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cuda/spmm.cu * @brief SpGEAM C APIs and definitions. */ #include #include #include "../../runtime/cuda/cuda_common.h" #include "./cusparse_dispatcher.cuh" #include "./functor.cuh" namespace dgl { using namespace dgl::runtime; namespace aten { namespace cusparse { /** Cusparse implementation of SpSum on Csr format. */ template std::pair CusparseCsrgeam2( const CSRMatrix& A, const NDArray A_weights_array, const CSRMatrix& B, const NDArray B_weights_array) { const int m = A.num_rows; const int n = A.num_cols; const int nnzA = A.indices->shape[0]; const int nnzB = B.indices->shape[0]; int nnzC; const DType alpha = 1.0; const DType beta = 1.0; auto ctx = A.indptr->ctx; auto device = runtime::DeviceAPI::Get(ctx); auto* thr_entry = runtime::CUDAThreadEntry::ThreadLocal(); cudaStream_t stream = runtime::getCurrentCUDAStream(); const DType* A_weights = A_weights_array.Ptr(); const DType* B_weights = B_weights_array.Ptr(); // allocate cusparse handle if needed if (!thr_entry->cusparse_handle) CUSPARSE_CALL(cusparseCreate(&(thr_entry->cusparse_handle))); CUSPARSE_CALL(cusparseSetStream(thr_entry->cusparse_handle, stream)); cusparseMatDescr_t matA, matB, matC; CUSPARSE_CALL(cusparseCreateMatDescr(&matA)); CUSPARSE_CALL(cusparseCreateMatDescr(&matB)); CUSPARSE_CALL(cusparseCreateMatDescr(&matC)); cusparseSetPointerMode( thr_entry->cusparse_handle, CUSPARSE_POINTER_MODE_HOST); size_t workspace_size = 0; /* prepare output C */ IdArray dC_csrOffsets = IdArray::Empty({m + 1}, A.indptr->dtype, ctx); IdType* dC_csrOffsets_data = dC_csrOffsets.Ptr(); IdArray dC_columns; NDArray dC_weights; IdType* dC_columns_data = dC_columns.Ptr(); DType* dC_weights_data = dC_weights.Ptr(); /* prepare buffer */ CUSPARSE_CALL(CSRGEAM::bufferSizeExt( thr_entry->cusparse_handle, m, n, &alpha, matA, nnzA, A_weights, A.indptr.Ptr(), A.indices.Ptr(), &beta, matB, nnzB, B_weights, B.indptr.Ptr(), B.indices.Ptr(), matC, dC_weights_data, dC_csrOffsets_data, dC_columns_data, &workspace_size)); void* workspace = device->AllocWorkspace(ctx, workspace_size); CUSPARSE_CALL(CSRGEAM::nnz( thr_entry->cusparse_handle, m, n, matA, nnzA, A.indptr.Ptr(), A.indices.Ptr(), matB, nnzB, B.indptr.Ptr(), B.indices.Ptr(), matC, dC_csrOffsets_data, &nnzC, workspace)); dC_columns = IdArray::Empty({nnzC}, A.indptr->dtype, ctx); dC_weights = NDArray::Empty({nnzC}, A_weights_array->dtype, ctx); dC_columns_data = dC_columns.Ptr(); dC_weights_data = dC_weights.Ptr(); CUSPARSE_CALL(CSRGEAM::compute( thr_entry->cusparse_handle, m, n, &alpha, matA, nnzA, A_weights, A.indptr.Ptr(), A.indices.Ptr(), &beta, matB, nnzB, B_weights, B.indptr.Ptr(), B.indices.Ptr(), matC, dC_weights_data, dC_csrOffsets_data, dC_columns_data, workspace)); device->FreeWorkspace(ctx, workspace); // destroy matrix/vector descriptors CUSPARSE_CALL(cusparseDestroyMatDescr(matA)); CUSPARSE_CALL(cusparseDestroyMatDescr(matB)); CUSPARSE_CALL(cusparseDestroyMatDescr(matC)); return { CSRMatrix( A.num_rows, A.num_cols, dC_csrOffsets, dC_columns, NullArray(dC_csrOffsets->dtype, dC_csrOffsets->ctx), true), dC_weights}; } } // namespace cusparse template std::pair CSRSum( const std::vector& As, const std::vector& A_weights) { const int64_t M = As[0].num_rows; const int64_t N = As[0].num_cols; const int64_t n = As.size(); // Cast 64 bit indices to 32 bit std::vector newAs; newAs.reserve(n); bool cast = false; if (As[0].indptr->dtype.bits == 64) { for (int i = 0; i < n; ++i) newAs.emplace_back( As[i].num_rows, As[i].num_cols, AsNumBits(As[i].indptr, 32), AsNumBits(As[i].indices, 32), AsNumBits(As[i].data, 32)); cast = true; } else { for (int i = 0; i < n; ++i) newAs.push_back(As[i]); } // cuSPARSE csrgeam2 requires the CSR to be sorted. // TODO(BarclayII): ideally the sorted CSR should be cached but I'm not sure // how to do it. for (int i = 0; i < n; ++i) { if (!newAs[i].sorted) newAs[i] = CSRSort(newAs[i]); } // Reorder weights if A[i] has edge IDs std::vector A_weights_reordered(n); for (int i = 0; i < n; ++i) { if (CSRHasData(newAs[i])) A_weights_reordered[i] = IndexSelect(A_weights[i], newAs[i].data); else A_weights_reordered[i] = A_weights[i]; } // Loop and sum auto result = std::make_pair( CSRMatrix( newAs[0].num_rows, newAs[0].num_cols, newAs[0].indptr, newAs[0].indices, NullArray(newAs[0].indptr->dtype, newAs[0].indptr->ctx)), A_weights_reordered[0]); // Weights already reordered so we don't need // As[0].data for (int64_t i = 1; i < n; ++i) result = cusparse::CusparseCsrgeam2( result.first, result.second, newAs[i], A_weights_reordered[i]); // Cast 32 bit indices back to 64 bit if necessary if (cast) { CSRMatrix C = result.first; return { CSRMatrix( C.num_rows, C.num_cols, AsNumBits(C.indptr, 64), AsNumBits(C.indices, 64), AsNumBits(C.data, 64), true), result.second}; } else { return result; } } template std::pair CSRSum( const std::vector&, const std::vector&); template std::pair CSRSum( const std::vector&, const std::vector&); #if BF16_ENABLED template std::pair CSRSum( const std::vector&, const std::vector&); template std::pair CSRSum( const std::vector&, const std::vector&); #endif // BF16_ENABLED template std::pair CSRSum( const std::vector&, const std::vector&); template std::pair CSRSum( const std::vector&, const std::vector&); template std::pair CSRSum( const std::vector&, const std::vector&); template std::pair CSRSum( const std::vector&, const std::vector&); } // namespace aten } // namespace dgl ================================================ FILE: src/array/cuda/csr_transpose.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cuda/csr_transpose.cc * @brief CSR transpose (convert to CSC) */ #include #include "../../runtime/cuda/cuda_common.h" namespace dgl { using runtime::NDArray; namespace aten { namespace impl { template CSRMatrix CSRTranspose(CSRMatrix csr) { LOG(FATAL) << "Unreachable codes"; return {}; } template <> CSRMatrix CSRTranspose(CSRMatrix csr) { #if CUDART_VERSION < 12000 auto* thr_entry = runtime::CUDAThreadEntry::ThreadLocal(); cudaStream_t stream = runtime::getCurrentCUDAStream(); // allocate cusparse handle if needed if (!thr_entry->cusparse_handle) { CUSPARSE_CALL(cusparseCreate(&(thr_entry->cusparse_handle))); } CUSPARSE_CALL(cusparseSetStream(thr_entry->cusparse_handle, stream)); NDArray indptr = csr.indptr, indices = csr.indices, data = csr.data; const int64_t nnz = indices->shape[0]; const auto& ctx = indptr->ctx; const auto bits = indptr->dtype.bits; if (aten::IsNullArray(data)) data = aten::Range(0, nnz, bits, ctx); const int32_t* indptr_ptr = static_cast(indptr->data); const int32_t* indices_ptr = static_cast(indices->data); const void* data_ptr = data->data; // (BarclayII) csr2csc doesn't seem to clear the content of cscColPtr if nnz // == 0. We need to do it ourselves. NDArray t_indptr = aten::Full(0, csr.num_cols + 1, bits, ctx); NDArray t_indices = aten::NewIdArray(nnz, ctx, bits); NDArray t_data = aten::NewIdArray(nnz, ctx, bits); int32_t* t_indptr_ptr = static_cast(t_indptr->data); int32_t* t_indices_ptr = static_cast(t_indices->data); void* t_data_ptr = t_data->data; #if CUDART_VERSION >= 10010 auto device = runtime::DeviceAPI::Get(csr.indptr->ctx); // workspace size_t workspace_size; CUSPARSE_CALL(cusparseCsr2cscEx2_bufferSize( thr_entry->cusparse_handle, csr.num_rows, csr.num_cols, nnz, data_ptr, indptr_ptr, indices_ptr, t_data_ptr, t_indptr_ptr, t_indices_ptr, CUDA_R_32F, CUSPARSE_ACTION_NUMERIC, CUSPARSE_INDEX_BASE_ZERO, CUSPARSE_CSR2CSC_ALG1, // see cusparse doc for reference &workspace_size)); void* workspace = device->AllocWorkspace(ctx, workspace_size); CUSPARSE_CALL(cusparseCsr2cscEx2( thr_entry->cusparse_handle, csr.num_rows, csr.num_cols, nnz, data_ptr, indptr_ptr, indices_ptr, t_data_ptr, t_indptr_ptr, t_indices_ptr, CUDA_R_32F, CUSPARSE_ACTION_NUMERIC, CUSPARSE_INDEX_BASE_ZERO, CUSPARSE_CSR2CSC_ALG1, // see cusparse doc for reference workspace)); device->FreeWorkspace(ctx, workspace); #else CUSPARSE_CALL(cusparseScsr2csc( thr_entry->cusparse_handle, csr.num_rows, csr.num_cols, nnz, static_cast(data_ptr), indptr_ptr, indices_ptr, static_cast(t_data_ptr), t_indices_ptr, t_indptr_ptr, CUSPARSE_ACTION_NUMERIC, CUSPARSE_INDEX_BASE_ZERO)); #endif return CSRMatrix( csr.num_cols, csr.num_rows, t_indptr, t_indices, t_data, false); #else return COOToCSR(COOTranspose(CSRToCOO(csr, false))); #endif } template <> CSRMatrix CSRTranspose(CSRMatrix csr) { return COOToCSR(COOTranspose(CSRToCOO(csr, false))); } template CSRMatrix CSRTranspose(CSRMatrix csr); template CSRMatrix CSRTranspose(CSRMatrix csr); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cuda/cuda_filter.cu ================================================ /** * Copyright (c) 2021 by Contributors * @file array/cuda/cuda_filter.cc * @brief Object for selecting items in a set, or selecting items not in a set. */ #include #include #include "../../runtime/cuda/cuda_common.h" #include "../../runtime/cuda/cuda_hashtable.cuh" #include "../filter.h" using namespace dgl::runtime::cuda; namespace dgl { namespace array { namespace { template __global__ void _IsInKernel( DeviceOrderedHashTable table, const IdType* const array, const int64_t size, IdType* const mark) { const int64_t idx = threadIdx.x + blockDim.x * blockIdx.x; if (idx < size) { mark[idx] = table.Contains(array[idx]) ^ (!include); } } template __global__ void _InsertKernel( const IdType* const prefix, const int64_t size, IdType* const result) { const int64_t idx = threadIdx.x + blockDim.x * blockIdx.x; if (idx < size) { if (prefix[idx] != prefix[idx + 1]) { result[prefix[idx]] = idx; } } } template IdArray _PerformFilter(const OrderedHashTable& table, IdArray test) { const auto& ctx = test->ctx; auto device = runtime::DeviceAPI::Get(ctx); const int64_t size = test->shape[0]; cudaStream_t cudaStream = runtime::getCurrentCUDAStream(); if (size == 0) { return test; } // we need two arrays: 1) to act as a prefixsum // for the number of entries that will be inserted, and // 2) to collect the included items. IdType* prefix = static_cast( device->AllocWorkspace(ctx, sizeof(IdType) * (size + 1))); // will resize down later IdArray result = aten::NewIdArray(size, ctx, sizeof(IdType) * 8); // mark each index based on it's existence in the hashtable { const dim3 block(256); const dim3 grid((size + block.x - 1) / block.x); CUDA_KERNEL_CALL( (_IsInKernel), grid, block, 0, cudaStream, table.DeviceHandle(), static_cast(test->data), size, prefix); } // generate prefix-sum { size_t workspace_bytes; CUDA_CALL(cub::DeviceScan::ExclusiveSum( nullptr, workspace_bytes, static_cast(nullptr), static_cast(nullptr), size + 1, cudaStream)); void* workspace = device->AllocWorkspace(ctx, workspace_bytes); CUDA_CALL(cub::DeviceScan::ExclusiveSum( workspace, workspace_bytes, prefix, prefix, size + 1, cudaStream)); device->FreeWorkspace(ctx, workspace); } // copy number using the internal current stream; IdType num_unique; device->CopyDataFromTo( prefix + size, 0, &num_unique, 0, sizeof(num_unique), ctx, DGLContext{kDGLCPU, 0}, test->dtype); // insert items into set { const dim3 block(256); const dim3 grid((size + block.x - 1) / block.x); CUDA_KERNEL_CALL( _InsertKernel, grid, block, 0, cudaStream, prefix, size, static_cast(result->data)); } device->FreeWorkspace(ctx, prefix); return result.CreateView({num_unique}, result->dtype); } template class CudaFilterSet : public Filter { public: explicit CudaFilterSet(IdArray array) : table_(array->shape[0], array->ctx, runtime::getCurrentCUDAStream()) { cudaStream_t cudaStream = runtime::getCurrentCUDAStream(); table_.FillWithUnique( static_cast(array->data), array->shape[0], cudaStream); } IdArray find_included_indices(IdArray test) override { return _PerformFilter(table_, test); } IdArray find_excluded_indices(IdArray test) override { return _PerformFilter(table_, test); } private: OrderedHashTable table_; }; } // namespace template FilterRef CreateSetFilter(IdArray set) { return FilterRef(std::make_shared>(set)); } template FilterRef CreateSetFilter(IdArray set); template FilterRef CreateSetFilter(IdArray set); } // namespace array } // namespace dgl ================================================ FILE: src/array/cuda/cusparse_dispatcher.cuh ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cuda/dispatcher.cuh * @brief Templates to dispatch into different cuSPARSE routines based on the * type argument. */ #ifndef DGL_ARRAY_CUDA_CUSPARSE_DISPATCHER_CUH_ #define DGL_ARRAY_CUDA_CUSPARSE_DISPATCHER_CUH_ #include #include #include "bf16.cuh" #include "fp16.cuh" namespace dgl { namespace aten { /** @brief cusparseXcsrgemm dispatcher */ template struct CSRGEMM { template static inline cusparseStatus_t bufferSizeExt(Args... args) { BUG_IF_FAIL(false) << "This piece of code should not be reached."; return static_cast(0); } template static inline cusparseStatus_t nnz(Args... args) { return cusparseXcsrgemm2Nnz(args...); } template static inline cusparseStatus_t compute(Args... args) { BUG_IF_FAIL(false) << "This piece of code should not be reached."; return static_cast(0); } }; template <> struct CSRGEMM<__half> { template static inline cusparseStatus_t bufferSizeExt(Args... args) { // TODO(ndickson): There is no cusparseHcsrgemm2_bufferSizeExt, so a // different implementation would be required. LOG(FATAL) << "CSRGEMM::bufferSizeExt does not support dtype half (FP16)."; return static_cast(0); } template static inline cusparseStatus_t nnz(Args... args) { return cusparseXcsrgemm2Nnz(args...); } template static inline cusparseStatus_t compute(Args... args) { // TODO(ndickson): There is no cusparseHcsrgemm2, so a different // implementation would be required. LOG(FATAL) << "CSRGEMM::compute does not support dtype half (FP16)."; return static_cast(0); } }; #if BF16_ENABLED template <> struct CSRGEMM<__nv_bfloat16> { template static inline cusparseStatus_t bufferSizeExt(Args... args) { // TODO(ndickson): There is no cusparseHcsrgemm2_bufferSizeExt, so a // different implementation would be required. LOG(FATAL) << "CSRGEMM::bufferSizeExt does not support dtype bfloat16 (BF16)."; return static_cast(0); } template static inline cusparseStatus_t nnz(Args... args) { return cusparseXcsrgemm2Nnz(args...); } template static inline cusparseStatus_t compute(Args... args) { // TODO(ndickson): There is no cusparseHcsrgemm2, so a different // implementation would be required. LOG(FATAL) << "CSRGEMM::compute does not support dtype bfloat16 (BF16)."; return static_cast(0); } }; #endif // BF16_ENABLED template <> struct CSRGEMM { template static inline cusparseStatus_t bufferSizeExt(Args... args) { return cusparseScsrgemm2_bufferSizeExt(args...); } template static inline cusparseStatus_t nnz(Args... args) { return cusparseXcsrgemm2Nnz(args...); } template static inline cusparseStatus_t compute(Args... args) { return cusparseScsrgemm2(args...); } }; template <> struct CSRGEMM { template static inline cusparseStatus_t bufferSizeExt(Args... args) { return cusparseDcsrgemm2_bufferSizeExt(args...); } template static inline cusparseStatus_t nnz(Args... args) { return cusparseXcsrgemm2Nnz(args...); } template static inline cusparseStatus_t compute(Args... args) { return cusparseDcsrgemm2(args...); } }; /** @brief cusparseXcsrgeam dispatcher */ template struct CSRGEAM { template static inline cusparseStatus_t bufferSizeExt(Args... args) { BUG_IF_FAIL(false) << "This piece of code should not be reached."; return static_cast(0); } template static inline cusparseStatus_t nnz(Args... args) { return cusparseXcsrgeam2Nnz(args...); } template static inline cusparseStatus_t compute(Args... args) { BUG_IF_FAIL(false) << "This piece of code should not be reached."; return static_cast(0); } }; template <> struct CSRGEAM<__half> { template static inline cusparseStatus_t bufferSizeExt(Args... args) { // TODO(ndickson): There is no cusparseHcsrgeam2_bufferSizeExt, so a // different implementation would be required. LOG(FATAL) << "CSRGEAM::bufferSizeExt does not support dtype half (FP16)."; return static_cast(0); } template static inline cusparseStatus_t nnz(Args... args) { return cusparseXcsrgeam2Nnz(args...); } template static inline cusparseStatus_t compute(Args... args) { // TODO(ndickson): There is no cusparseHcsrgeam2, so a different // implementation would be required. LOG(FATAL) << "CSRGEAM::compute does not support dtype half (FP16)."; return static_cast(0); } }; #if BF16_ENABLED template <> struct CSRGEAM<__nv_bfloat16> { template static inline cusparseStatus_t bufferSizeExt(Args... args) { // TODO(ndickson): There is no cusparseHcsrgeam2_bufferSizeExt, so a // different implementation would be required. LOG(FATAL) << "CSRGEAM::bufferSizeExt does not support dtype bfloat16 (BF16)."; return static_cast(0); } template static inline cusparseStatus_t nnz(Args... args) { return cusparseXcsrgeam2Nnz(args...); } template static inline cusparseStatus_t compute(Args... args) { // TODO(ndickson): There is no cusparseHcsrgeam2, so a different // implementation would be required. LOG(FATAL) << "CSRGEAM::compute does not support dtype bfloat16 (BF16)."; return static_cast(0); } }; #endif // BF16_ENABLED template <> struct CSRGEAM { template static inline cusparseStatus_t bufferSizeExt(Args... args) { return cusparseScsrgeam2_bufferSizeExt(args...); } template static inline cusparseStatus_t nnz(Args... args) { return cusparseXcsrgeam2Nnz(args...); } template static inline cusparseStatus_t compute(Args... args) { return cusparseScsrgeam2(args...); } }; template <> struct CSRGEAM { template static inline cusparseStatus_t bufferSizeExt(Args... args) { return cusparseDcsrgeam2_bufferSizeExt(args...); } template static inline cusparseStatus_t nnz(Args... args) { return cusparseXcsrgeam2Nnz(args...); } template static inline cusparseStatus_t compute(Args... args) { return cusparseDcsrgeam2(args...); } }; }; // namespace aten }; // namespace dgl #endif // DGL_ARRAY_CUDA_CUSPARSE_DISPATCHER_CUH_ ================================================ FILE: src/array/cuda/disjoint_union.cu ================================================ /** * Copyright (c) 2022, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file array/gpu/disjoint_union.cu * @brief Disjoint union GPU implementation. */ #include #include #include #include #include "../../runtime/cuda/cuda_common.h" #include "./utils.h" namespace dgl { using runtime::NDArray; namespace aten { namespace impl { template __global__ void _DisjointUnionKernel( IdType** arrs, IdType* prefix, IdType* offset, IdType* out, int64_t n_arrs, int n_elms) { IdType tx = static_cast(blockIdx.x) * blockDim.x + threadIdx.x; const int stride_x = gridDim.x * blockDim.x; while (tx < n_elms) { IdType i = dgl::cuda::_UpperBound(offset, n_arrs, tx) - 1; if (arrs[i] == NULL) { out[tx] = tx; } else { IdType j = tx - offset[i]; out[tx] = arrs[i][j] + prefix[i]; } tx += stride_x; } } template std::tuple _ComputePrefixSums( const std::vector& coos) { IdType n = coos.size(), nbits = coos[0].row->dtype.bits; IdArray n_rows = NewIdArray(n, CPU, nbits); IdArray n_cols = NewIdArray(n, CPU, nbits); IdArray n_elms = NewIdArray(n, CPU, nbits); IdType* n_rows_data = n_rows.Ptr(); IdType* n_cols_data = n_cols.Ptr(); IdType* n_elms_data = n_elms.Ptr(); dgl::runtime::parallel_for(0, coos.size(), [&](IdType b, IdType e) { for (IdType i = b; i < e; ++i) { n_rows_data[i] = coos[i].num_rows; n_cols_data[i] = coos[i].num_cols; n_elms_data[i] = coos[i].row->shape[0]; } }); return std::make_tuple( CumSum(n_rows.CopyTo(coos[0].row->ctx), true), CumSum(n_cols.CopyTo(coos[0].row->ctx), true), CumSum(n_elms.CopyTo(coos[0].row->ctx), true)); } template void _Merge( IdType** arrs, IdType* prefix, IdType* offset, IdType* out, int64_t n_arrs, int n_elms, DGLContext ctx, DGLDataType dtype, cudaStream_t stream) { auto device = runtime::DeviceAPI::Get(ctx); int nt = 256; int nb = (n_elms + nt - 1) / nt; IdType** arrs_dev = static_cast( device->AllocWorkspace(ctx, n_arrs * sizeof(IdType*))); device->CopyDataFromTo( arrs, 0, arrs_dev, 0, sizeof(IdType*) * n_arrs, DGLContext{kDGLCPU, 0}, ctx, dtype); CUDA_KERNEL_CALL( _DisjointUnionKernel, nb, nt, 0, stream, arrs_dev, prefix, offset, out, n_arrs, n_elms); device->FreeWorkspace(ctx, arrs_dev); } template COOMatrix DisjointUnionCoo(const std::vector& coos) { cudaStream_t stream = runtime::getCurrentCUDAStream(); auto device = runtime::DeviceAPI::Get(coos[0].row->ctx); uint64_t src_offset = 0, dst_offset = 0; bool has_data = false; bool row_sorted = true; bool col_sorted = true; // check if data index array for (size_t i = 0; i < coos.size(); ++i) { CHECK_SAME_DTYPE(coos[0].row, coos[i].row); CHECK_SAME_CONTEXT(coos[0].row, coos[i].row); has_data |= COOHasData(coos[i]); } auto prefixes = _ComputePrefixSums(coos); auto prefix_src = static_cast(std::get<0>(prefixes)->data); auto prefix_dst = static_cast(std::get<1>(prefixes)->data); auto prefix_elm = static_cast(std::get<2>(prefixes)->data); std::unique_ptr rows(new IdType*[coos.size()]); std::unique_ptr cols(new IdType*[coos.size()]); std::unique_ptr data(new IdType*[coos.size()]); for (size_t i = 0; i < coos.size(); i++) { row_sorted &= coos[i].row_sorted; col_sorted &= coos[i].col_sorted; rows[i] = coos[i].row.Ptr(); cols[i] = coos[i].col.Ptr(); data[i] = coos[i].data.Ptr(); } auto ctx = coos[0].row->ctx; auto dtype = coos[0].row->dtype; IdType n_elements = 0; device->CopyDataFromTo( &prefix_elm[coos.size()], 0, &n_elements, 0, sizeof(IdType), coos[0].row->ctx, DGLContext{kDGLCPU, 0}, coos[0].row->dtype); device->CopyDataFromTo( &prefix_src[coos.size()], 0, &src_offset, 0, sizeof(IdType), coos[0].row->ctx, DGLContext{kDGLCPU, 0}, coos[0].row->dtype); device->CopyDataFromTo( &prefix_dst[coos.size()], 0, &dst_offset, 0, sizeof(IdType), coos[0].row->ctx, DGLContext{kDGLCPU, 0}, coos[0].row->dtype); // Union src array IdArray result_src = NewIdArray(n_elements, coos[0].row->ctx, coos[0].row->dtype.bits); _Merge( rows.get(), prefix_src, prefix_elm, result_src.Ptr(), coos.size(), n_elements, ctx, dtype, stream); // Union dst array IdArray result_dst = NewIdArray(n_elements, coos[0].col->ctx, coos[0].col->dtype.bits); _Merge( cols.get(), prefix_dst, prefix_elm, result_dst.Ptr(), coos.size(), n_elements, ctx, dtype, stream); // Union data array if exists and fetch number of elements IdArray result_dat = NullArray(); if (has_data) { result_dat = NewIdArray(n_elements, coos[0].row->ctx, coos[0].row->dtype.bits); _Merge( data.get(), prefix_elm, prefix_elm, result_dat.Ptr(), coos.size(), n_elements, ctx, dtype, stream); } return COOMatrix( src_offset, dst_offset, result_src, result_dst, result_dat, row_sorted, col_sorted); } template COOMatrix DisjointUnionCoo( const std::vector& coos); template COOMatrix DisjointUnionCoo( const std::vector& coos); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cuda/fp16.cuh ================================================ /** * Copyright (c) 2020-2022 by Contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file array/cuda/fp16.cuh * @brief float16 related functions. * @note this file is modified from TVM project: * https://github.com/apache/tvm/blob/e561007f0c330e3d14c2bc8a3ef40fb741db9004/src/target/source/literal/cuda_half_t.h. */ #ifndef DGL_ARRAY_CUDA_FP16_CUH_ #define DGL_ARRAY_CUDA_FP16_CUH_ #include #include static __device__ __forceinline__ half max(half a, half b) { #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 return __hgt(__half(a), __half(b)) ? a : b; #else return __half(max(float(a), float(b))); // NOLINT #endif } static __device__ __forceinline__ half min(half a, half b) { #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 530 return __hlt(__half(a), __half(b)) ? a : b; #else return __half(min(float(a), float(b))); // NOLINT #endif } #ifdef __CUDACC__ // Arithmetic FP16 operations for architecture >= 5.3 are already defined in // cuda_fp16.h #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 530) // CUDA 12.2 adds "emulated" support for older architectures. #if defined(CUDART_VERSION) && (CUDART_VERSION < 12020) __device__ __forceinline__ __half operator+(const __half& lh, const __half& rh) { return __half(float(lh) + float(rh)); // NOLINT } __device__ __forceinline__ __half operator-(const __half& lh, const __half& rh) { return __half(float(lh) - float(rh)); // NOLINT } __device__ __forceinline__ __half operator*(const __half& lh, const __half& rh) { return __half(float(lh) * float(rh)); // NOLINT } __device__ __forceinline__ __half operator/(const __half& lh, const __half& rh) { return __half(float(lh) / float(rh)); // NOLINT } __device__ __forceinline__ __half& operator+=( __half& lh, const __half& rh) { // NOLINT lh = __half(float(lh) + float(rh)); // NOLINT return lh; } __device__ __forceinline__ __half& operator-=( __half& lh, const __half& rh) { // NOLINT lh = __half(float(lh) - float(rh)); // NOLINT return lh; } __device__ __forceinline__ __half& operator*=( __half& lh, const __half& rh) { // NOLINT lh = __half(float(lh) * float(rh)); // NOLINT return lh; } __device__ __forceinline__ __half& operator/=( __half& lh, const __half& rh) { // NOLINT lh = __half(float(lh) / float(rh)); // NOLINT return lh; } __device__ __forceinline__ __half& operator++(__half& h) { // NOLINT h = __half(float(h) + 1.0f); // NOLINT return h; } __device__ __forceinline__ __half& operator--(__half& h) { // NOLINT h = __half(float(h) - 1.0f); // NOLINT return h; } __device__ __forceinline__ __half operator++(__half& h, int) { // NOLINT __half ret = h; h = __half(float(h) + 1.0f); // NOLINT return ret; } __device__ __forceinline__ __half operator--(__half& h, int) { // NOLINT __half ret = h; h = __half(float(h) - 1.0f); // NOLINT return ret; } __device__ __forceinline__ __half operator+(const __half& h) { return h; } __device__ __forceinline__ __half operator-(const __half& h) { return __half(-float(h)); // NOLINT } __device__ __forceinline__ bool operator==(const __half& lh, const __half& rh) { return float(lh) == float(rh); // NOLINT } __device__ __forceinline__ bool operator!=(const __half& lh, const __half& rh) { return float(lh) != float(rh); // NOLINT } __device__ __forceinline__ bool operator>(const __half& lh, const __half& rh) { return float(lh) > float(rh); // NOLINT } __device__ __forceinline__ bool operator<(const __half& lh, const __half& rh) { return float(lh) < float(rh); // NOLINT } __device__ __forceinline__ bool operator>=(const __half& lh, const __half& rh) { return float(lh) >= float(rh); // NOLINT } __device__ __forceinline__ bool operator<=(const __half& lh, const __half& rh) { return float(lh) <= float(rh); // NOLINT } #endif // defined(CUDART_VERSION) && (CUDART_VERSION < 12020) #endif // defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 530) #endif // __CUDACC__ #endif // DGL_ARRAY_CUDA_FP16_CUH_ ================================================ FILE: src/array/cuda/functor.cuh ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cuda/functor.cuh * @brief Functors for template on CUDA */ #ifndef DGL_ARRAY_CUDA_FUNCTOR_CUH_ #define DGL_ARRAY_CUDA_FUNCTOR_CUH_ #include #include #include "./atomic.cuh" #include "./fp16.cuh" #include "bf16.cuh" namespace dgl { namespace aten { namespace cuda { /////////////////////////// CUDA binary operators ////////////////////////////// namespace binary { template struct Add { static constexpr bool use_lhs = true; static constexpr bool use_rhs = true; static constexpr bool reduce_last_dim = false; static __device__ __forceinline__ DType Call(const DType *lhs, const DType *rhs, int64_t len = 1) { return lhs[0] + rhs[0]; } }; template constexpr bool Add::use_lhs; template constexpr bool Add::use_rhs; template constexpr bool Add::reduce_last_dim; template struct Sub { static constexpr bool use_lhs = true; static constexpr bool use_rhs = true; static constexpr bool reduce_last_dim = false; static __device__ __forceinline__ DType Call(const DType *lhs, const DType *rhs, int64_t len = 1) { return lhs[0] - rhs[0]; } }; template constexpr bool Sub::use_lhs; template constexpr bool Sub::use_rhs; template constexpr bool Sub::reduce_last_dim; template struct Mul { static constexpr bool use_lhs = true; static constexpr bool use_rhs = true; static constexpr bool reduce_last_dim = false; static __device__ __forceinline__ DType Call(const DType *lhs, const DType *rhs, int64_t len = 1) { return lhs[0] * rhs[0]; } }; template constexpr bool Mul::use_lhs; template constexpr bool Mul::use_rhs; template constexpr bool Mul::reduce_last_dim; template struct Div { static constexpr bool use_lhs = true; static constexpr bool use_rhs = true; static constexpr bool reduce_last_dim = false; static __device__ __forceinline__ DType Call(const DType *lhs, const DType *rhs, int64_t len = 1) { return lhs[0] / rhs[0]; } }; template constexpr bool Div::use_lhs; template constexpr bool Div::use_rhs; template constexpr bool Div::reduce_last_dim; template struct CopyLhs { static constexpr bool use_lhs = true; static constexpr bool use_rhs = false; static constexpr bool reduce_last_dim = false; static __device__ __forceinline__ DType Call(const DType *lhs, const DType *rhs, int64_t len = 1) { return lhs[0]; } }; template constexpr bool CopyLhs::use_lhs; template constexpr bool CopyLhs::use_rhs; template constexpr bool CopyLhs::reduce_last_dim; template struct CopyRhs { static constexpr bool use_lhs = false; static constexpr bool use_rhs = true; static constexpr bool reduce_last_dim = false; static __device__ __forceinline__ DType Call(const DType *lhs, const DType *rhs, int64_t len = 1) { return rhs[0]; } }; template constexpr bool CopyRhs::use_lhs; template constexpr bool CopyRhs::use_rhs; template constexpr bool CopyRhs::reduce_last_dim; template struct Dot { static constexpr bool use_lhs = true; static constexpr bool use_rhs = true; static constexpr bool reduce_last_dim = true; static __device__ __forceinline__ DType Call(const DType *lhs, const DType *rhs, int64_t len = 1) { DType rst = static_cast(0.0f); for (int64_t i = 0; i < len; ++i) { rst += lhs[i] * rhs[i]; } return rst; } }; template constexpr bool Dot::use_lhs; template constexpr bool Dot::use_rhs; template constexpr bool Dot::reduce_last_dim; } // end of namespace binary /////////////////////////// CUDA reduce operators ////////////////////////////// namespace reduce { template struct _Sum { static constexpr __host__ __device__ __forceinline__ DType zero() { return 0.; } static constexpr bool require_arg = false; static __device__ __forceinline__ void Call( DType *out_buf, Idx *arg_u_buf, Idx *arg_e_buf, DType val, Idx uid, Idx eid) { if (!atomic) { *out_buf += val; } else { cuda::AtomicAdd(out_buf, val); } } static __device__ __forceinline__ void Call( DType *out_buf, Idx *arg_buf, DType val, Idx id) { if (!atomic) { *out_buf += val; } else { cuda::AtomicAdd(out_buf, val); } } static __device__ __forceinline__ void CallArg( Idx fid, Idx *arg_u_buf, Idx *arg_e_buf, DType val, DType val_ref, Idx uid, Idx eid) {} }; template struct Sum : _Sum {}; template struct Sum : _Sum { static constexpr __host__ __device__ __forceinline__ __half zero() { return __float2half_rn(0.); } static __device__ __forceinline__ void Call( __half *out_buf, Idx *arg_u_buf, Idx *arg_e_buf, __half val, Idx uid, Idx eid) { _Sum::Call( out_buf, arg_u_buf, arg_e_buf, val, uid, eid); } static __device__ __forceinline__ void Call( __half *out_buf, Idx *arg_buf, __half val, Idx id) { _Sum::Call(out_buf, arg_buf, val, id); } // sometimes we have to use float in reduction for better precision static __device__ __forceinline__ void Call( float *out_buf, Idx *arg_u_buf, Idx *arg_e_buf, __half val, Idx uid, Idx eid) { _Sum::Call(out_buf, arg_u_buf, arg_e_buf, static_cast(val), uid, eid); } static __device__ __forceinline__ void Call( float *out_buf, Idx *arg_buf, __half val, Idx id) { _Sum::Call(out_buf, arg_buf, static_cast(val), id); } }; #if BF16_ENABLED template struct Sum : _Sum { static constexpr __host__ __device__ __forceinline__ __nv_bfloat16 zero() { return __float2bfloat16_rn(0.); } static __device__ __forceinline__ void Call( __nv_bfloat16 *out_buf, Idx *arg_u_buf, Idx *arg_e_buf, __nv_bfloat16 val, Idx uid, Idx eid) { _Sum::Call( out_buf, arg_u_buf, arg_e_buf, val, uid, eid); } static __device__ __forceinline__ void Call( __nv_bfloat16 *out_buf, Idx *arg_buf, __nv_bfloat16 val, Idx id) { _Sum::Call(out_buf, arg_buf, val, id); } // sometimes we have to use float in reduction for better precision static __device__ __forceinline__ void Call( float *out_buf, Idx *arg_u_buf, Idx *arg_e_buf, __nv_bfloat16 val, Idx uid, Idx eid) { _Sum::Call(out_buf, arg_u_buf, arg_e_buf, static_cast(val), uid, eid); } static __device__ __forceinline__ void Call( float *out_buf, Idx *arg_buf, __nv_bfloat16 val, Idx id) { _Sum::Call(out_buf, arg_buf, static_cast(val), id); } }; #endif // BF16_ENABLED template struct _Max { static constexpr __host__ __device__ __forceinline__ DType zero() { return -std::numeric_limits::infinity(); } static constexpr bool require_arg = true; static __device__ __forceinline__ void Call( DType *out_buf, Idx *arg_u_buf, Idx *arg_e_buf, DType val, Idx uid, Idx eid) { if (!atomic) { if (*out_buf < val) { *out_buf = val; *arg_u_buf = uid; *arg_e_buf = eid; } } else { cuda::AtomicMax(out_buf, val); } } static __device__ __forceinline__ void Call( DType *out_buf, Idx *arg_buf, DType val, Idx id) { if (!atomic) { if (*out_buf < val) { *out_buf = val; *arg_buf = id; } } else { cuda::AtomicMax(out_buf, val); } } static __device__ __forceinline__ void CallArg( Idx fid, Idx *arg_u_buf, Idx *arg_e_buf, DType val, DType val_ref, Idx uid, Idx eid) { if (atomic) { if (val == val_ref) { if (arg_u_buf) arg_u_buf[fid] = uid; if (arg_e_buf) arg_e_buf[fid] = eid; } } } }; template struct Max : _Max {}; template struct Max : _Max { static constexpr __host__ __device__ __forceinline__ __half zero() { return __float2half_rn(-6.550400e+04f); } static __device__ __forceinline__ void Call( __half *out_buf, Idx *arg_u_buf, Idx *arg_e_buf, __half val, Idx uid, Idx eid) { _Max::Call( out_buf, arg_u_buf, arg_e_buf, val, uid, eid); } static __device__ __forceinline__ void Call( __half *out_buf, Idx *arg_buf, __half val, Idx id) { _Max::Call(out_buf, arg_buf, val, id); } // sometimes we have to use float in reduction for better precision static __device__ __forceinline__ void Call( float *out_buf, Idx *arg_u_buf, Idx *arg_e_buf, __half val, Idx uid, Idx eid) { _Max::Call(out_buf, arg_u_buf, arg_e_buf, static_cast(val), uid, eid); } static __device__ __forceinline__ void Call( float *out_buf, Idx *arg_buf, __half val, Idx id) { _Max::Call(out_buf, arg_buf, static_cast(val), id); } }; #if BF16_ENABLED template struct Max : _Max { static constexpr __host__ __device__ __forceinline__ __nv_bfloat16 zero() { return __float2bfloat16_rn(-std::numeric_limits::infinity()); } static __device__ __forceinline__ void Call( __nv_bfloat16 *out_buf, Idx *arg_u_buf, Idx *arg_e_buf, __nv_bfloat16 val, Idx uid, Idx eid) { _Max::Call( out_buf, arg_u_buf, arg_e_buf, val, uid, eid); } static __device__ __forceinline__ void Call( __nv_bfloat16 *out_buf, Idx *arg_buf, __nv_bfloat16 val, Idx id) { _Max::Call(out_buf, arg_buf, val, id); } // sometimes we have to use float in reduction for better precision static __device__ __forceinline__ void Call( float *out_buf, Idx *arg_u_buf, Idx *arg_e_buf, __nv_bfloat16 val, Idx uid, Idx eid) { _Max::Call(out_buf, arg_u_buf, arg_e_buf, static_cast(val), uid, eid); } static __device__ __forceinline__ void Call( float *out_buf, Idx *arg_buf, __nv_bfloat16 val, Idx id) { _Max::Call(out_buf, arg_buf, static_cast(val), id); } }; #endif // BF16_ENABLED template struct _Min { static constexpr __host__ __device__ __forceinline__ DType zero() { return std::numeric_limits::infinity(); } static constexpr bool require_arg = true; static __device__ __forceinline__ void Call( DType *out_buf, Idx *arg_u_buf, Idx *arg_e_buf, DType val, Idx uid, Idx eid) { if (!atomic) { if (*out_buf > val) { *out_buf = val; *arg_u_buf = uid; *arg_e_buf = eid; } } else { cuda::AtomicMin(out_buf, val); } } static __device__ __forceinline__ void Call( DType *out_buf, Idx *arg_buf, DType val, Idx id) { if (!atomic) { if (*out_buf > val) { *out_buf = val; *arg_buf = id; } } else { cuda::AtomicMin(out_buf, val); } } static __device__ __forceinline__ void CallArg( Idx fid, Idx *arg_u_buf, Idx *arg_e_buf, DType val, DType val_ref, Idx uid, Idx eid) { if (atomic) { if (val == val_ref) { if (arg_u_buf) arg_u_buf[fid] = uid; if (arg_e_buf) arg_e_buf[fid] = eid; } } } }; template struct Min : _Min {}; template struct Min : _Min { static constexpr __host__ __device__ __forceinline__ __half zero() { return __float2half_rn(6.550400e+04f); } static __device__ __forceinline__ void Call( __half *out_buf, Idx *arg_u_buf, Idx *arg_e_buf, __half val, Idx uid, Idx eid) { _Min::Call( out_buf, arg_u_buf, arg_e_buf, val, uid, eid); } static __device__ __forceinline__ void Call( __half *out_buf, Idx *arg_buf, __half val, Idx id) { _Min::Call(out_buf, arg_buf, val, id); } // sometimes we have to use float in reduction for better precision static __device__ __forceinline__ void Call( float *out_buf, Idx *arg_u_buf, Idx *arg_e_buf, __half val, Idx uid, Idx eid) { _Min::Call(out_buf, arg_u_buf, arg_e_buf, static_cast(val), uid, eid); } static __device__ __forceinline__ void Call( float *out_buf, Idx *arg_buf, __half val, Idx id) { _Min::Call(out_buf, arg_buf, static_cast(val), id); } }; #if BF16_ENABLED template struct Min : _Min { static constexpr __host__ __device__ __forceinline__ __nv_bfloat16 zero() { return __float2bfloat16_rn(std::numeric_limits::infinity()); } static __device__ __forceinline__ void Call( __nv_bfloat16 *out_buf, Idx *arg_u_buf, Idx *arg_e_buf, __nv_bfloat16 val, Idx uid, Idx eid) { _Min::Call( out_buf, arg_u_buf, arg_e_buf, val, uid, eid); } static __device__ __forceinline__ void Call( __nv_bfloat16 *out_buf, Idx *arg_buf, __nv_bfloat16 val, Idx id) { _Min::Call(out_buf, arg_buf, val, id); } // sometimes we have to use float in reduction for better precision static __device__ __forceinline__ void Call( float *out_buf, Idx *arg_u_buf, Idx *arg_e_buf, __nv_bfloat16 val, Idx uid, Idx eid) { _Min::Call(out_buf, arg_u_buf, arg_e_buf, static_cast(val), uid, eid); } static __device__ __forceinline__ void Call( float *out_buf, Idx *arg_buf, __nv_bfloat16 val, Idx id) { _Min::Call(out_buf, arg_buf, static_cast(val), id); } }; #endif // BF16_ENABLED } // namespace reduce } // namespace cuda } // namespace aten } // namespace dgl #endif // DGL_ARRAY_CUDA_FUNCTOR_CUH_ ================================================ FILE: src/array/cuda/gather_mm.cu ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cuda/gather_mm.cu * @brief GatherMM C APIs and definitions. */ #include #include // std::swap #include "./atomic.cuh" #include "./functor.cuh" #include "./utils.h" namespace dgl { using namespace cuda; namespace aten { namespace { /** @brief Call cuBLAS GEMM API for dense matmul operation for float and double. */ template cublasStatus_t cublasGemm( cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const DType* alpha, const DType* A, int lda, const DType* B, int ldb, const DType* beta, DType* C, int ldc) { LOG(INFO) << "Not supported dtype"; return CUBLAS_STATUS_EXECUTION_FAILED; } template <> cublasStatus_t cublasGemm<__half>( cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const __half* alpha, const __half* A, int lda, const __half* B, int ldb, const __half* beta, __half* C, int ldc) { return cublasHgemm( handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } #if BF16_ENABLED template <> cublasStatus_t cublasGemm<__nv_bfloat16>( cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const __nv_bfloat16* alpha, const __nv_bfloat16* A, int lda, const __nv_bfloat16* B, int ldb, const __nv_bfloat16* beta, __nv_bfloat16* C, int ldc) { float alpha_float = __bfloat162float(*alpha); float beta_float = __bfloat162float(*beta); return cublasGemmEx( handle, transa, transb, m, n, k, &alpha_float, A, CUDA_R_16BF, lda, B, CUDA_R_16BF, ldb, &beta_float, C, CUDA_R_16BF, ldc, CUBLAS_COMPUTE_32F, CUBLAS_GEMM_DEFAULT_TENSOR_OP); } #endif // BF16_ENABLED template <> cublasStatus_t cublasGemm( cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const float* alpha, const float* A, int lda, const float* B, int ldb, const float* beta, float* C, int ldc) { return cublasSgemm( handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> cublasStatus_t cublasGemm( cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, int k, const double* alpha, const double* A, int lda, const double* B, int ldb, const double* beta, double* C, int ldc) { return cublasDgemm( handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } } // namespace namespace cuda { /** * @note Each row of A multiplies a segment of matrix of B of dimension in_len * * outlen. One warp is assigned to process one row of A. Each WARP sequentially * multiplies one element of A and a row of B to compute partial result of the * output. A is loaded in shared memory in a coalesced way. Output matrix is * loaded in registers. B should get benefit from L2 cache. */ template __global__ void GatherMMScatterKernel( const DType* __restrict__ A, const DType* __restrict__ B, DType* __restrict__ C, const Idx* __restrict__ idx_a, const Idx* __restrict__ idx_b, const Idx* __restrict__ idx_c, const int64_t num_rows, const int64_t in_len, const int64_t out_len) { unsigned int tId = threadIdx.x; unsigned int laneId = tId & 31; unsigned int gId = (blockIdx.x * blockDim.x + threadIdx.x); unsigned int warpId = gId >> 5; unsigned int row = warpId; if (row < num_rows) { const unsigned int local_row = row & 3; // hardcoded for TB size 128 (4 warps) const Idx cur_rowA = (idx_a) ? idx_a[row] : row; const Idx cur_rowB = (idx_b) ? idx_b[row] : row; const Idx cur_rowC = (idx_c) ? idx_c[row] : row; const Idx B_offset = cur_rowB * in_len * out_len; const int sh_a_tile = 64; __shared__ DType sh_A[4 * sh_a_tile]; int a_tile = sh_a_tile; for (unsigned int k_start = 0; k_start < in_len; k_start += 64) { if ((in_len - k_start) < a_tile) a_tile = in_len - k_start; // Load A in shared mem in a coalesced way for (unsigned int l = laneId; l < a_tile; l += 32) sh_A[local_row * sh_a_tile + l] = A[cur_rowA * in_len + (k_start + l)]; __syncwarp(); for (unsigned int outloop = 0; outloop < out_len; outloop += 32) { DType out_reg = static_cast(0.0f); // thread private const unsigned int l = laneId; if (l < out_len) { // iterate over elements of a row of A for (unsigned int i = 0; i < a_tile; i++) { const DType a_val = sh_A[local_row * sh_a_tile + i]; // iterate over elements of a row of B in parallel out_reg += a_val * B[B_offset + ((i + k_start) * out_len + (outloop + l))]; } if (idx_c) { AtomicAdd(C + cur_rowC * out_len + (outloop + l), out_reg); } else { C[cur_rowC * out_len + (outloop + l)] += out_reg; } } } } } } /** * @note Output matrix is accumulated via atomic operations. Rest of the * strategies are similar to GatherMMKernel. One warp is assigned to process one * row of A. Each WARP sequentially multiplies one element of A and a row of B * to compute partial result of the output. A is loaded in shared memory in a * coalesced way. B should get benefit from L2 cache. */ template __global__ void GatherMMScatterKernel2( const DType* __restrict__ A, const DType* __restrict__ B, DType* __restrict__ C, const Idx* __restrict__ idx_a, const Idx* __restrict__ idx_b, const Idx* __restrict__ idx_c, const int64_t num_rows, const int64_t in_len, const int64_t out_len) { unsigned int tId = threadIdx.x; unsigned int laneId = tId & 31; unsigned int gId = (blockIdx.x * blockDim.x + threadIdx.x); unsigned int warpId = gId >> 5; unsigned int row = warpId; if (row < num_rows) { const unsigned int local_row = row & 3; // hardcoded for TB size 128 (4 warps) const Idx row_a = (idx_a) ? idx_a[row] : row; const Idx row_b = (idx_b) ? idx_b[row] : row; const Idx row_c = (idx_c) ? idx_c[row] : row; const Idx C_offset = row_c * in_len * out_len; const int sh_a_tile = 64; __shared__ DType sh_A[4 * sh_a_tile]; int a_tile = sh_a_tile; for (unsigned int k_start = 0; k_start < in_len; k_start += 64) { if ((in_len - k_start) < a_tile) a_tile = in_len - k_start; /* Load A in shared mem in a coalesced way */ for (unsigned int l = laneId; l < a_tile; l += 32) sh_A[local_row * sh_a_tile + l] = A[row_a * in_len + (k_start + l)]; __syncwarp(); for (unsigned int outloop = 0; outloop < out_len; outloop += 32) { DType out_reg = static_cast(0.0f); // thread private const unsigned int l = laneId; if (l < out_len) { const DType b_val = B[row_b * out_len + (outloop + l)]; /* iterate over elements of a row of A */ for (unsigned int i = 0; i < a_tile; i++) { const DType a_val = sh_A[local_row * sh_a_tile + i]; const Idx C_idx = C_offset + ((i + k_start) * out_len + (outloop + l)); AtomicAdd(C + C_idx, a_val * b_val); } } } } } } } // namespace cuda /** * @brief Implementation of Gather_mm operator. The input matrix A is * expected to be sorted according to relation type. * @param A The input dense matrix of dimension m x k * @param B The input dense matrix of dimension k x n * @param C The output dense matrix of dimension m x n * @param seglen_A The input vector of size R. Each element * is the length of segments of input ``A`` * @param a_trans Matrix A to be transposed * @param b_trans Matrix B to be transposed */ template void SegmentMM( const NDArray A, const NDArray B, NDArray C, const NDArray seglen_A, bool a_trans, bool b_trans) { auto device = runtime::DeviceAPI::Get(A->ctx); cudaStream_t stream = runtime::getCurrentCUDAStream(); const DType* A_data = A.Ptr(); const DType* B_data = B.Ptr(); const IdType* seglen_A_data = seglen_A.Ptr(); DType* C_data = C.Ptr(); int64_t A_offset = 0, B_offset = 0, C_offset = 0; int64_t m, n, k; int64_t num_rel = seglen_A.NumElements(); DType alpha = 1., beta = 0.; auto* thr_entry = runtime::CUDAThreadEntry::ThreadLocal(); if (!thr_entry->cublas_handle) CUBLAS_CALL(cublasCreate(&(thr_entry->cublas_handle))); CUBLAS_CALL(cublasSetStream(thr_entry->cublas_handle, stream)); IdType m_offset = 0; for (IdType etype = 0; etype < num_rel; ++etype) { m = seglen_A_data[etype]; // rows of A CHECK_LE(m_offset + m, A->shape[0]) << "Segment index out of bound of A->shape[0]."; n = B->shape[2]; // cols of B k = B->shape[1]; // cols of A == rows of B int ldb = n, lda = k, ldc = n; cublasOperation_t transB = CUBLAS_OP_N; cublasOperation_t transA = CUBLAS_OP_N; if (b_trans) { transB = CUBLAS_OP_T; ldb = n, lda = n, ldc = k; std::swap(n, k); } CUBLAS_CALL(cublasGemm( thr_entry->cublas_handle, transB, transA, n, m, k, &alpha, B_data + B_offset, ldb, A_data + A_offset, lda, &beta, C_data + C_offset, ldc)); A_offset += m * k; B_offset += k * n; C_offset += m * n; m_offset += m; } } template void SegmentMMBackwardB( const NDArray A, const NDArray dC, NDArray dB, const NDArray seglen) { auto device = runtime::DeviceAPI::Get(A->ctx); cudaStream_t stream = runtime::getCurrentCUDAStream(); const DType* A_data = A.Ptr(); const DType* dC_data = dC.Ptr(); const IdType* seglen_data = seglen.Ptr(); DType* dB_data = dB.Ptr(); int64_t A_offset = 0, dC_offset = 0, dB_offset = 0; int64_t m, n, k; int64_t num_rel = seglen.NumElements(); DType alpha = 1., beta = 0.; auto* thr_entry = runtime::CUDAThreadEntry::ThreadLocal(); if (!thr_entry->cublas_handle) CUBLAS_CALL(cublasCreate(&(thr_entry->cublas_handle))); CUBLAS_CALL(cublasSetStream(thr_entry->cublas_handle, stream)); IdType k_offset = 0; for (IdType etype = 0; etype < num_rel; ++etype) { m = dC->shape[1]; n = A->shape[1]; k = seglen_data[etype]; CHECK_LE(k_offset + k, A->shape[0]) << "Segement index out of bound of A->shape[0]."; int lddC = m, ldA = n, lddB = m; cublasOperation_t trans_dC = CUBLAS_OP_N; cublasOperation_t trans_A = CUBLAS_OP_T; CUBLAS_CALL(cublasGemm( thr_entry->cublas_handle, trans_dC, trans_A, m, n, k, &alpha, dC_data + dC_offset, lddC, A_data + A_offset, ldA, &beta, dB_data + dB_offset, lddB)); dC_offset += m * k; A_offset += n * k; dB_offset += m * n; k_offset += k; } } /** * @brief Implementation of Gather_mm operator. The input matrix A is * expected to be sorted according to relation type. * @param A The input dense matrix of dimension m x k * @param B The input dense matrix of dimension k x n * @param C The output dense matrix of dimension m x n * @param idx_a The input vector to gather left hand operand on * @param idx_b The input vector to gather right hand operand on */ template void GatherMM( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b) { auto device = runtime::DeviceAPI::Get(A->ctx); cudaStream_t stream = runtime::getCurrentCUDAStream(); int64_t out_len = B->shape[2]; // cols of B int64_t in_len = A->shape[1]; // cols of A const int64_t tot_num_rows = A->shape[0]; const int ntx = 128; const int warp_size = 32; const int nbx = ((tot_num_rows * warp_size + ntx - 1) / ntx); const dim3 nblks(nbx); const dim3 nthrs(ntx); CUDA_KERNEL_CALL( (cuda::GatherMMScatterKernel), nblks, nthrs, 0, stream, A.Ptr(), B.Ptr(), C.Ptr(), idx_a.Ptr(), idx_b.Ptr(), nullptr, tot_num_rows, in_len, out_len); } /** * @brief Implementation of Gather_mm operator. The input matrix A is * expected to be sorted according to relation type. * @param A The input dense matrix of dimension m x k * @param B The input dense matrix of dimension k x n * @param C The output dense matrix of dimension m x n * @param idx_a The input vector to gather left hand operand on * @param idx_b The input vector to gather right hand operand on * @param idx_c The input vector to gather output operand on * @param num_rel The number of idx types in idx_b * @param a_trans Matrix A to be transposed * @param b_trans Matrix B to be transposed */ template void GatherMMScatter( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b, const NDArray idx_c) { auto device = runtime::DeviceAPI::Get(A->ctx); cudaStream_t stream = runtime::getCurrentCUDAStream(); const IdType* idx_c_data = idx_c.Ptr(); int64_t out_len = (B->ndim == 2) ? B->shape[1] : B->shape[2]; // cols of B int64_t in_len = A->shape[1]; // cols of A int64_t tot_num_rows = A->shape[0]; const int ntx = 128; const int warp_size = 32; const int nbx = ((tot_num_rows * warp_size + ntx - 1) / ntx); const dim3 nblks(nbx); const dim3 nthrs(ntx); if (B->ndim == 3) { CUDA_KERNEL_CALL( (cuda::GatherMMScatterKernel), nblks, nthrs, 0, stream, A.Ptr(), B.Ptr(), C.Ptr(), idx_a.Ptr(), idx_b.Ptr(), idx_c.Ptr(), tot_num_rows, in_len, out_len); } else { // Custom kernel for W_grad[idx_c[i]] = H^T[i] * C.grad[i] // This kernel accesses rows of A in a transposed way w/o explicitly // converting A CUDA_KERNEL_CALL( (cuda::GatherMMScatterKernel2), nblks, nthrs, 0, stream, A.Ptr(), B.Ptr(), C.Ptr(), idx_a.Ptr(), idx_b.Ptr(), idx_c.Ptr(), tot_num_rows, in_len, out_len); } } template void GatherMM( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b); template void GatherMM( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b); #if BF16_ENABLED template void GatherMM( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b); template void GatherMM( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b); #endif // BF16_ENABLED template void GatherMM( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b); template void GatherMM( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b); template void GatherMM( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b); template void GatherMM( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b); template void GatherMMScatter( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b, const NDArray idx_c); template void GatherMMScatter( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b, const NDArray idx_c); #if BF16_ENABLED template void GatherMMScatter( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b, const NDArray idx_c); template void GatherMMScatter( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b, const NDArray idx_c); #endif // BF16_ENABLED template void GatherMMScatter( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b, const NDArray idx_c); template void GatherMMScatter( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b, const NDArray idx_c); template void GatherMMScatter( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b, const NDArray idx_c); template void GatherMMScatter( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b, const NDArray idx_c); template void SegmentMM( const NDArray A, const NDArray B, NDArray C, const NDArray seglen_A, bool a_trans, bool b_trans); template void SegmentMM( const NDArray A, const NDArray B, NDArray C, const NDArray seglen_A, bool a_trans, bool b_trans); #if BF16_ENABLED template void SegmentMM( const NDArray A, const NDArray B, NDArray C, const NDArray seglen_A, bool a_trans, bool b_trans); template void SegmentMM( const NDArray A, const NDArray B, NDArray C, const NDArray seglen_A, bool a_trans, bool b_trans); #endif // BF16_ENABLED template void SegmentMM( const NDArray A, const NDArray B, NDArray C, const NDArray seglen_A, bool a_trans, bool b_trans); template void SegmentMM( const NDArray A, const NDArray B, NDArray C, const NDArray seglen_A, bool a_trans, bool b_trans); template void SegmentMM( const NDArray A, const NDArray B, NDArray C, const NDArray seglen_A, bool a_trans, bool b_trans); template void SegmentMM( const NDArray A, const NDArray B, NDArray C, const NDArray seglen_A, bool a_trans, bool b_trans); template void SegmentMMBackwardB( const NDArray A, const NDArray dC, NDArray dB, const NDArray seglen); template void SegmentMMBackwardB( const NDArray A, const NDArray dC, NDArray dB, const NDArray seglen); #if BF16_ENABLED template void SegmentMMBackwardB( const NDArray A, const NDArray dC, NDArray dB, const NDArray seglen); template void SegmentMMBackwardB( const NDArray A, const NDArray dC, NDArray dB, const NDArray seglen); #endif // BF16_ENABLED template void SegmentMMBackwardB( const NDArray A, const NDArray dC, NDArray dB, const NDArray seglen); template void SegmentMMBackwardB( const NDArray A, const NDArray dC, NDArray dB, const NDArray seglen); template void SegmentMMBackwardB( const NDArray A, const NDArray dC, NDArray dB, const NDArray seglen); template void SegmentMMBackwardB( const NDArray A, const NDArray dC, NDArray dB, const NDArray seglen); } // namespace aten } // namespace dgl ================================================ FILE: src/array/cuda/ge_spmm.cuh ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cuda/ge_spmm.cuh * @brief GE-SpMM CUDA kernel function header. */ #ifndef DGL_ARRAY_CUDA_GE_SPMM_CUH_ #define DGL_ARRAY_CUDA_GE_SPMM_CUH_ #include "../../runtime/cuda/cuda_common.h" #include "./utils.h" #include "atomic.cuh" #include "macro.cuh" namespace dgl { using namespace cuda; namespace aten { namespace cuda { /** * @brief CUDA kernel of GE-SpMM on Csr. * @note GE-SpMM: https://arxiv.org/pdf/2007.03179.pdf * The grid dimension x and y are reordered for better performance. */ template __global__ void GESpMMKernel( const DType* __restrict__ ufeat, const DType* __restrict__ efeat, DType* __restrict__ out, const Idx* __restrict__ indptr, const Idx* __restrict__ indices, const int64_t num_rows, const int64_t num_cols, const int64_t feat_len) { const Idx rid = blockIdx.x * blockDim.y + threadIdx.y; // over vertices dimension const Idx fid = (blockIdx.y * 64) + threadIdx.x; // over feature dimension if (rid < num_rows && fid < feat_len) { const Idx low = __ldg(indptr + rid), high = __ldg(indptr + rid + 1); DType accum_0 = 0., accum_1 = 0.; if (blockIdx.y != gridDim.y - 1) { // fid + 32 < feat_len for (Idx left = low; left < high; left += 32) { if (left + 32 <= high) { #pragma unroll for (Idx i = 0; i < 32; ++i) { const Idx eid = left + i; const Idx cid = __ldg(indices + eid); const Idx offset = feat_len * cid + fid; if (BinaryOp::use_rhs) { accum_0 += BinaryOp::Call(ufeat + offset, efeat + eid); accum_1 += BinaryOp::Call(ufeat + offset + 32, efeat + eid); } else { accum_0 += ufeat[offset]; accum_1 += ufeat[offset + 32]; } } } else { for (Idx i = 0; left + i < high; ++i) { const Idx eid = left + i; const Idx cid = __ldg(indices + eid); const Idx offset = feat_len * cid + fid; if (BinaryOp::use_rhs) { accum_0 += BinaryOp::Call(ufeat + offset, efeat + eid); accum_1 += BinaryOp::Call(ufeat + offset + 32, efeat + eid); } else { accum_0 += ufeat[offset]; accum_1 += ufeat[offset + 32]; } } } out[feat_len * rid + fid] = accum_0; out[feat_len * rid + fid + 32] = accum_1; } } else { const Idx fid_0 = fid < feat_len ? fid : 0, fid_1 = fid + 32 < feat_len ? fid + 32 : 0; for (int left = low; left < high; left += 32) { if (left + 32 <= high) { #pragma unroll for (int i = 0; i < 32; ++i) { const Idx eid = left + i; const Idx cid = __ldg(indices + eid); const Idx offset = feat_len * cid; if (BinaryOp::use_rhs) { accum_0 += BinaryOp::Call(ufeat + offset + fid_0, efeat + eid); accum_1 += BinaryOp::Call(ufeat + offset + fid_1, efeat + eid); } else { accum_0 += ufeat[offset + fid_0]; accum_1 += ufeat[offset + fid_1]; } } } else { for (int i = 0; i + left < high; ++i) { const Idx eid = left + i; const Idx cid = __ldg(indices + eid); const Idx offset = feat_len * cid; if (BinaryOp::use_rhs) { accum_0 += BinaryOp::Call(ufeat + offset + fid_0, efeat + eid); accum_1 += BinaryOp::Call(ufeat + offset + fid_1, efeat + eid); } else { accum_0 += ufeat[offset + fid_0]; accum_1 += ufeat[offset + fid_1]; } } } out[feat_len * rid + fid] = accum_0; if (fid + 32 < feat_len) out[feat_len * rid + fid + 32] = accum_1; } } } } template void GESpMMCsr( const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out, int64_t feat_len) { const Idx* indptr = csr.indptr.Ptr(); const Idx* indices = csr.indices.Ptr(); const DType* ufeat_data = ufeat.Ptr(); const DType* efeat_data = efeat.Ptr(); DType* out_data = out.Ptr(); cudaStream_t stream = runtime::getCurrentCUDAStream(); const int ntx = 32; const int nty = 32; const int nby = (feat_len + (ntx * 2) - 1) / (ntx * 2); const int nbx = (csr.num_rows + nty - 1) / nty; const dim3 nblks(nbx, nby); const dim3 nthrs(ntx, nty); const int sh_mem_size = 0; CUDA_KERNEL_CALL( (GESpMMKernel), nblks, nthrs, sh_mem_size, stream, ufeat_data, efeat_data, out_data, indptr, indices, csr.num_rows, csr.num_cols, feat_len); } } // namespace cuda } // namespace aten } // namespace dgl #endif // DGL_ARRAY_CUDA_GE_SPMM_CUH_ ================================================ FILE: src/array/cuda/labor_sampling.cu ================================================ /*! * Copyright (c) 2022, NVIDIA Corporation * Copyright (c) 2022, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file array/cuda/labor_sampling.cu * @brief labor sampling */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // NOLINT #include #include #include #include #include "../../array/cuda/utils.h" #include "../../random/continuous_seed.h" #include "../../runtime/cuda/cuda_common.h" #include "./functor.cuh" #include "./spmm.cuh" namespace dgl { namespace aten { namespace impl { using dgl::random::continuous_seed; constexpr int BLOCK_SIZE = 128; constexpr int CTA_SIZE = 128; constexpr double eps = 0.0001; namespace { template struct TransformOp { const IdType* idx_coo; const IdType* rows; const IdType* indptr; const IdType* subindptr; const IdType* indices; const IdType* data_arr; bool is_pinned; __host__ __device__ auto operator()(IdType idx) { const auto in_row = idx_coo[idx]; const auto row = rows[in_row]; const auto in_idx = indptr[in_row] + idx - subindptr[in_row]; const auto u = indices[is_pinned ? idx : in_idx]; const auto data = data_arr ? data_arr[in_idx] : in_idx; return thrust::make_tuple(row, u, data); } }; template < typename IdType, typename FloatType, typename probs_t, typename A_t, typename B_t> struct TransformOpImp { probs_t probs; A_t A; B_t B; const IdType* idx_coo; const IdType* rows; const FloatType* cs; const IdType* indptr; const IdType* subindptr; const IdType* indices; const IdType* data_arr; bool is_pinned; __host__ __device__ auto operator()(IdType idx) { const auto ps = probs[idx]; const auto in_row = idx_coo[idx]; const auto c = cs[in_row]; const auto row = rows[in_row]; const auto in_idx = indptr[in_row] + idx - subindptr[in_row]; const auto u = indices[is_pinned ? idx : in_idx]; const auto w = A[in_idx]; const auto w2 = B[in_idx]; const auto data = data_arr ? data_arr[in_idx] : in_idx; return thrust::make_tuple( in_row, row, u, data, w / min((FloatType)1, c * w2 * ps)); } }; template struct StencilOp { const FloatType* cs; template __host__ __device__ auto operator()( IdType in_row, FloatType ps, FloatType rnd) { return rnd <= cs[in_row] * ps; } }; template struct StencilOpFused { const continuous_seed seed; const IdType* idx_coo; const FloatType* cs; const ps_t probs; const A_t A; const IdType* subindptr; const IdType* indptr; const IdType* indices; const IdType* nids; bool is_pinned; __device__ auto operator()(IdType idx) { const auto in_row = idx_coo[idx]; const auto ps = probs[idx]; IdType rofs = idx - subindptr[in_row]; const auto in_idx = indptr[in_row] + rofs; const auto u = indices[is_pinned ? idx : in_idx]; const auto t = nids ? nids[u] : u; // t in the paper // rolled random number r_t is a function of the random_seed and t const float rnd = seed.uniform(t); return rnd <= cs[in_row] * A[in_idx] * ps; } }; template struct TransformOpMean { const IdType* ds; const FloatType* ws; __host__ __device__ auto operator()(IdType idx, FloatType ps) { return ps * ds[idx] / ws[idx]; } }; struct TransformOpMinWith1 { template __host__ __device__ auto operator()(FloatType x) { return min((FloatType)1, x); } }; template struct IndptrFunc { const IdType* indptr; const IdType* in_deg; __host__ __device__ auto operator()(IdType row) { return indptr[row] + (in_deg ? in_deg[row] : 0); } }; template struct SquareFunc { __host__ __device__ auto operator()(FloatType x) { return thrust::make_tuple(x, x * x); } }; struct TupleSum { template __host__ __device__ T operator()(const T& a, const T& b) const { return thrust::make_tuple( thrust::get<0>(a) + thrust::get<0>(b), thrust::get<1>(a) + thrust::get<1>(b)); } }; template struct DegreeFunc { const IdType num_picks; const IdType* rows; const IdType* indptr; IdType* in_deg; IdType* inrow_indptr; FloatType* cs; __host__ __device__ auto operator()(IdType tIdx) { const auto out_row = rows[tIdx]; const auto indptr_val = indptr[out_row]; const auto d = indptr[out_row + 1] - indptr_val; in_deg[tIdx] = d; inrow_indptr[tIdx] = indptr_val; cs[tIdx] = num_picks / (FloatType)d; } }; template __global__ void _CSRRowWiseOneHopExtractorKernel( const continuous_seed seed, const IdType hop_size, const IdType* const indptr, const IdType* const subindptr, const IdType* const indices, const IdType* const idx_coo, const IdType* const nids, const FloatType* const A, FloatType* const rands, IdType* const hop, FloatType* const A_l) { IdType tx = static_cast(blockIdx.x) * blockDim.x + threadIdx.x; const int stride_x = gridDim.x * blockDim.x; while (tx < hop_size) { IdType rpos = idx_coo[tx]; IdType rofs = tx - subindptr[rpos]; const auto in_idx = indptr[rpos] + rofs; const auto not_pinned = indices != hop; const auto u = indices[not_pinned ? in_idx : tx]; if (not_pinned) hop[tx] = u; const auto t = nids ? nids[u] : u; if (A) A_l[tx] = A[in_idx]; // rolled random number r_t is a function of the random_seed and t rands[tx] = (FloatType)seed.uniform(t); tx += stride_x; } } constexpr int CACHE_LINE_SIZE = 128; template struct AlignmentFunc { static_assert(CACHE_LINE_SIZE % sizeof(IdType) == 0); const IdType* in_deg; const int64_t* perm; IdType num_rows; __host__ __device__ auto operator()(IdType row) { constexpr int num_elements = CACHE_LINE_SIZE / sizeof(IdType); return in_deg[perm ? perm[row % num_rows] : row] + num_elements - 1; } }; template __global__ void _CSRRowWiseOneHopExtractorAlignedKernel( const IdType hop_size, const IdType num_rows, const IdType* const indptr, const IdType* const subindptr, const IdType* const subindptr_aligned, const IdType* const indices, IdType* const hop, const int64_t* const perm) { IdType tx = static_cast(blockIdx.x) * blockDim.x + threadIdx.x; const int stride_x = gridDim.x * blockDim.x; while (tx < hop_size) { const IdType rpos_ = dgl::cuda::_UpperBound(subindptr_aligned, num_rows, tx) - 1; const IdType rpos = perm ? perm[rpos_] : rpos_; const auto out_row = subindptr[rpos]; const auto d = subindptr[rpos + 1] - out_row; const int offset = ((uint64_t)(indices + indptr[rpos] - subindptr_aligned[rpos_]) % CACHE_LINE_SIZE) / sizeof(IdType); const IdType rofs = tx - subindptr_aligned[rpos_] - offset; if (rofs >= 0 && rofs < d) { const auto in_idx = indptr[rpos] + rofs; assert((uint64_t)(indices + in_idx - tx) % CACHE_LINE_SIZE == 0); const auto u = indices[in_idx]; hop[out_row + rofs] = u; } tx += stride_x; } } template __global__ void _CSRRowWiseLayerSampleDegreeKernel( const IdType num_picks, const IdType num_rows, FloatType* const cs, const FloatType* const ds, const FloatType* const d2s, const IdType* const indptr, const FloatType* const probs, const FloatType* const A, const IdType* const subindptr) { typedef cub::BlockReduce BlockReduce; __shared__ typename BlockReduce::TempStorage temp_storage; __shared__ FloatType var_1_bcast[BLOCK_CTAS]; // we assign one warp per row assert(blockDim.x == CTA_SIZE); assert(blockDim.y == BLOCK_CTAS); IdType out_row = blockIdx.x * TILE_SIZE + threadIdx.y; const auto last_row = min(static_cast(blockIdx.x + 1) * TILE_SIZE, num_rows); constexpr FloatType ONE = 1; while (out_row < last_row) { const auto in_row_start = indptr[out_row]; const auto out_row_start = subindptr[out_row]; const IdType degree = subindptr[out_row + 1] - out_row_start; if (degree > 0) { // stands for k in in arXiv:2210.13339, i.e. fanout const auto k = min(num_picks, degree); // slightly better than NS const FloatType d_ = ds ? ds[out_row] : degree; // stands for right handside of Equation (22) in arXiv:2210.13339 FloatType var_target = d_ * d_ / k + (ds ? d2s[out_row] - d_ * d_ / degree : 0); auto c = cs[out_row]; const int num_valid = min(degree, (IdType)CTA_SIZE); // stands for left handside of Equation (22) in arXiv:2210.13339 FloatType var_1; do { var_1 = 0; if (A) { for (int idx = threadIdx.x; idx < degree; idx += CTA_SIZE) { const auto w = A[in_row_start + idx]; const auto ps = probs ? probs[out_row_start + idx] : w; var_1 += w > 0 ? w * w / min(ONE, c * ps) : 0; } } else { for (int idx = threadIdx.x; idx < degree; idx += CTA_SIZE) { const auto ps = probs[out_row_start + idx]; var_1 += 1 / min(ONE, c * ps); } } var_1 = BlockReduce(temp_storage).Sum(var_1, num_valid); if (threadIdx.x == 0) var_1_bcast[threadIdx.y] = var_1; __syncthreads(); var_1 = var_1_bcast[threadIdx.y]; c *= var_1 / var_target; } while (min(var_1, var_target) / max(var_1, var_target) < 1 - eps); if (threadIdx.x == 0) cs[out_row] = c; } out_row += BLOCK_CTAS; } } } // namespace template int log_size(const IdType size) { if (size <= 0) return 0; for (int i = 0; i < static_cast(sizeof(IdType)) * 8; i++) if (((size - 1) >> i) == 0) return i; return sizeof(IdType) * 8; } template void compute_importance_sampling_probabilities( CSRMatrix mat, const IdType hop_size, cudaStream_t stream, const continuous_seed seed, const IdType num_rows, const IdType* indptr, const IdType* subindptr, const IdType* indices, IdArray idx_coo_arr, const IdType* nids, FloatArray cs_arr, // holds the computed cs values, has size num_rows const bool weighted, const FloatType* A, const FloatType* ds, const FloatType* d2s, const IdType num_picks, DGLContext ctx, const runtime::CUDAWorkspaceAllocator& allocator, const exec_policy_t& exec_policy, const int importance_sampling, IdType* hop_1, // holds the contiguous one-hop neighborhood, has size |E| FloatType* rands, // holds the rolled random numbers r_t for each edge, has // size |E| FloatType* probs_found) { // holds the computed pi_t values for each edge, // has size |E| auto device = runtime::DeviceAPI::Get(ctx); auto idx_coo = idx_coo_arr.Ptr(); auto cs = cs_arr.Ptr(); FloatArray A_l_arr = weighted ? NewFloatArray(hop_size, ctx, sizeof(FloatType) * 8) : NullArray(); auto A_l = A_l_arr.Ptr(); const int max_log_num_vertices = log_size(mat.num_cols); { // extracts the onehop neighborhood cols to a contiguous range into hop_1 const dim3 block(BLOCK_SIZE); const dim3 grid((hop_size + BLOCK_SIZE - 1) / BLOCK_SIZE); CUDA_KERNEL_CALL( (_CSRRowWiseOneHopExtractorKernel), grid, block, 0, stream, seed, hop_size, indptr, subindptr, indices, idx_coo, nids, weighted ? A : nullptr, rands, hop_1, A_l); } int64_t hop_uniq_size = 0; IdArray hop_new_arr = NewIdArray(hop_size, ctx, sizeof(IdType) * 8); auto hop_new = hop_new_arr.Ptr(); auto hop_unique = allocator.alloc_unique(hop_size); // After this block, hop_unique holds the unique set of one-hop neighborhood // and hop_new holds the relabeled hop_1, idx_coo already holds relabeled // destination. hop_unique[hop_new] == hop_1 holds { auto hop_2 = allocator.alloc_unique(hop_size); auto hop_3 = allocator.alloc_unique(hop_size); device->CopyDataFromTo( hop_1, 0, hop_2.get(), 0, sizeof(IdType) * hop_size, ctx, ctx, mat.indptr->dtype); cub::DoubleBuffer hop_b(hop_2.get(), hop_3.get()); { std::size_t temp_storage_bytes = 0; CUDA_CALL(cub::DeviceRadixSort::SortKeys( nullptr, temp_storage_bytes, hop_b, hop_size, 0, max_log_num_vertices, stream)); auto temp = allocator.alloc_unique(temp_storage_bytes); CUDA_CALL(cub::DeviceRadixSort::SortKeys( temp.get(), temp_storage_bytes, hop_b, hop_size, 0, max_log_num_vertices, stream)); } auto hop_counts = allocator.alloc_unique(hop_size + 1); auto hop_unique_size = allocator.alloc_unique(1); { std::size_t temp_storage_bytes = 0; CUDA_CALL(cub::DeviceRunLengthEncode::Encode( nullptr, temp_storage_bytes, hop_b.Current(), hop_unique.get(), hop_counts.get(), hop_unique_size.get(), hop_size, stream)); auto temp = allocator.alloc_unique(temp_storage_bytes); CUDA_CALL(cub::DeviceRunLengthEncode::Encode( temp.get(), temp_storage_bytes, hop_b.Current(), hop_unique.get(), hop_counts.get(), hop_unique_size.get(), hop_size, stream)); device->CopyDataFromTo( hop_unique_size.get(), 0, &hop_uniq_size, 0, sizeof(hop_uniq_size), ctx, DGLContext{kDGLCPU, 0}, mat.indptr->dtype); } thrust::lower_bound( exec_policy, hop_unique.get(), hop_unique.get() + hop_uniq_size, hop_1, hop_1 + hop_size, hop_new); } // @todo Consider creating a CSC because the SpMV will be done multiple times. COOMatrix rmat( num_rows, hop_uniq_size, idx_coo_arr, hop_new_arr, NullArray(), true, mat.sorted); BcastOff bcast_off; bcast_off.use_bcast = false; bcast_off.out_len = 1; bcast_off.lhs_len = 1; bcast_off.rhs_len = 1; FloatArray probs_arr = NewFloatArray(hop_uniq_size, ctx, sizeof(FloatType) * 8); auto probs_1 = probs_arr.Ptr(); FloatArray probs_arr_2 = NewFloatArray(hop_uniq_size, ctx, sizeof(FloatType) * 8); auto probs = probs_arr_2.Ptr(); auto arg_u = NewIdArray(hop_uniq_size, ctx, sizeof(IdType) * 8); auto arg_e = NewIdArray(hop_size, ctx, sizeof(IdType) * 8); double prev_ex_nodes = hop_uniq_size; for (int iters = 0; iters < importance_sampling || importance_sampling < 0; iters++) { if (weighted && iters == 0) { cuda::SpMMCoo< IdType, FloatType, cuda::binary::Mul, cuda::reduce::Max>( bcast_off, rmat, cs_arr, A_l_arr, probs_arr_2, arg_u, arg_e); } else { cuda::SpMMCoo< IdType, FloatType, cuda::binary::CopyLhs, cuda::reduce::Max>( bcast_off, rmat, cs_arr, NullArray(), iters ? probs_arr : probs_arr_2, arg_u, arg_e); } if (iters) thrust::transform( exec_policy, probs_1, probs_1 + hop_uniq_size, probs, probs, thrust::multiplies{}); thrust::gather( exec_policy, hop_new, hop_new + hop_size, probs, probs_found); { constexpr int BLOCK_CTAS = BLOCK_SIZE / CTA_SIZE; // the number of rows each thread block will cover constexpr int TILE_SIZE = BLOCK_CTAS; const dim3 block(CTA_SIZE, BLOCK_CTAS); const dim3 grid((num_rows + TILE_SIZE - 1) / TILE_SIZE); CUDA_KERNEL_CALL( (_CSRRowWiseLayerSampleDegreeKernel< IdType, FloatType, BLOCK_CTAS, TILE_SIZE>), grid, block, 0, stream, (IdType)num_picks, num_rows, cs, weighted ? ds : nullptr, weighted ? d2s : nullptr, indptr, probs_found, A, subindptr); } { auto probs_min_1 = thrust::make_transform_iterator(probs, TransformOpMinWith1{}); const double cur_ex_nodes = thrust::reduce( exec_policy, probs_min_1, probs_min_1 + hop_uniq_size, 0.0); if (cur_ex_nodes / prev_ex_nodes >= 1 - eps) break; prev_ex_nodes = cur_ex_nodes; } } } /////////////////////////////// CSR /////////////////////////////// template std::pair CSRLaborSampling( CSRMatrix mat, IdArray rows_arr, const int64_t num_picks, FloatArray prob_arr, const int importance_sampling, IdArray random_seed_arr, float seed2_contribution, IdArray NIDs) { const bool weighted = !IsNullArray(prob_arr); const auto& ctx = rows_arr->ctx; runtime::CUDAWorkspaceAllocator allocator(ctx); const auto stream = runtime::getCurrentCUDAStream(); const auto exec_policy = thrust::cuda::par_nosync(allocator).on(stream); auto device = runtime::DeviceAPI::Get(ctx); const IdType num_rows = rows_arr->shape[0]; IdType* const rows = rows_arr.Ptr(); IdType* const nids = IsNullArray(NIDs) ? nullptr : NIDs.Ptr(); FloatType* const A = prob_arr.Ptr(); IdType* const indptr_ = mat.indptr.Ptr(); IdType* const indices_ = mat.indices.Ptr(); IdType* const data = CSRHasData(mat) ? mat.data.Ptr() : nullptr; // Read indptr only once in case it is pinned and access is slow. auto indptr = allocator.alloc_unique(num_rows); // compute in-degrees auto in_deg = allocator.alloc_unique(num_rows + 1); // cs stands for c_s in arXiv:2210.13339 FloatArray cs_arr = NewFloatArray(num_rows, ctx, sizeof(FloatType) * 8); auto cs = cs_arr.Ptr(); // ds stands for A_{*s} in arXiv:2210.13339 FloatArray ds_arr = weighted ? NewFloatArray(num_rows, ctx, sizeof(FloatType) * 8) : NullArray(); auto ds = ds_arr.Ptr(); // d2s stands for (A^2)_{*s} in arXiv:2210.13339, ^2 is elementwise. FloatArray d2s_arr = weighted ? NewFloatArray(num_rows, ctx, sizeof(FloatType) * 8) : NullArray(); auto d2s = d2s_arr.Ptr(); thrust::counting_iterator iota(0); thrust::for_each( exec_policy, iota, iota + num_rows, DegreeFunc{ (IdType)num_picks, rows, indptr_, in_deg.get(), indptr.get(), cs}); if (weighted) { auto b_offsets = thrust::make_transform_iterator( iota, IndptrFunc{indptr.get(), nullptr}); auto e_offsets = thrust::make_transform_iterator( iota, IndptrFunc{indptr.get(), in_deg.get()}); auto A_A2 = thrust::make_transform_iterator(A, SquareFunc{}); auto ds_d2s = thrust::make_zip_iterator(ds, d2s); size_t prefix_temp_size = 0; CUDA_CALL(cub::DeviceSegmentedReduce::Reduce( nullptr, prefix_temp_size, A_A2, ds_d2s, num_rows, b_offsets, e_offsets, TupleSum{}, thrust::make_tuple((FloatType)0, (FloatType)0), stream)); auto temp = allocator.alloc_unique(prefix_temp_size); CUDA_CALL(cub::DeviceSegmentedReduce::Reduce( temp.get(), prefix_temp_size, A_A2, ds_d2s, num_rows, b_offsets, e_offsets, TupleSum{}, thrust::make_tuple((FloatType)0, (FloatType)0), stream)); } // fill subindptr IdArray subindptr_arr = NewIdArray(num_rows + 1, ctx, sizeof(IdType) * 8); auto subindptr = subindptr_arr.Ptr(); IdType hop_size; { size_t prefix_temp_size = 0; CUDA_CALL(cub::DeviceScan::ExclusiveSum( nullptr, prefix_temp_size, in_deg.get(), subindptr, num_rows + 1, stream)); auto temp = allocator.alloc_unique(prefix_temp_size); CUDA_CALL(cub::DeviceScan::ExclusiveSum( temp.get(), prefix_temp_size, in_deg.get(), subindptr, num_rows + 1, stream)); device->CopyDataFromTo( subindptr, num_rows * sizeof(hop_size), &hop_size, 0, sizeof(hop_size), ctx, DGLContext{kDGLCPU, 0}, mat.indptr->dtype); } IdArray hop_arr = NewIdArray(hop_size, ctx, sizeof(IdType) * 8); CSRMatrix smat( num_rows, mat.num_cols, subindptr_arr, hop_arr, NullArray(), mat.sorted); // @todo Consider fusing CSRToCOO into StencilOpFused kernel auto smatcoo = CSRToCOO(smat, false); auto idx_coo_arr = smatcoo.row; auto idx_coo = idx_coo_arr.Ptr(); auto hop_1 = hop_arr.Ptr(); const bool is_pinned = mat.indices.IsPinned(); if (is_pinned) { const auto res = Sort(rows_arr, log_size(mat.num_rows)); const int64_t* perm = static_cast(res.second->data); IdType hop_size; // Shadows the original one as this is temporary auto subindptr_aligned = allocator.alloc_unique(num_rows + 1); { auto modified_in_deg = thrust::make_transform_iterator( iota, AlignmentFunc{in_deg.get(), perm, num_rows}); size_t prefix_temp_size = 0; CUDA_CALL(cub::DeviceScan::ExclusiveSum( nullptr, prefix_temp_size, modified_in_deg, subindptr_aligned.get(), num_rows + 1, stream)); auto temp = allocator.alloc_unique(prefix_temp_size); CUDA_CALL(cub::DeviceScan::ExclusiveSum( temp.get(), prefix_temp_size, modified_in_deg, subindptr_aligned.get(), num_rows + 1, stream)); device->CopyDataFromTo( subindptr_aligned.get(), num_rows * sizeof(hop_size), &hop_size, 0, sizeof(hop_size), ctx, DGLContext{kDGLCPU, 0}, mat.indptr->dtype); } const dim3 block(BLOCK_SIZE); const dim3 grid((hop_size + BLOCK_SIZE - 1) / BLOCK_SIZE); CUDA_KERNEL_CALL( (_CSRRowWiseOneHopExtractorAlignedKernel), grid, block, 0, stream, hop_size, num_rows, indptr.get(), subindptr, subindptr_aligned.get(), indices_, hop_1, perm); } const auto indices = is_pinned ? hop_1 : indices_; auto rands = allocator.alloc_unique(importance_sampling ? hop_size : 1); auto probs_found = allocator.alloc_unique(importance_sampling ? hop_size : 1); if (weighted) { // Recompute c for weighted graphs. constexpr int BLOCK_CTAS = BLOCK_SIZE / CTA_SIZE; // the number of rows each thread block will cover constexpr int TILE_SIZE = BLOCK_CTAS; const dim3 block(CTA_SIZE, BLOCK_CTAS); const dim3 grid((num_rows + TILE_SIZE - 1) / TILE_SIZE); CUDA_KERNEL_CALL( (_CSRRowWiseLayerSampleDegreeKernel< IdType, FloatType, BLOCK_CTAS, TILE_SIZE>), grid, block, 0, stream, (IdType)num_picks, num_rows, cs, ds, d2s, indptr.get(), nullptr, A, subindptr); } const continuous_seed random_seed = IsNullArray(random_seed_arr) ? continuous_seed(RandomEngine::ThreadLocal()->RandInt(1000000000)) : continuous_seed(random_seed_arr, seed2_contribution); if (importance_sampling) compute_importance_sampling_probabilities< IdType, FloatType, decltype(exec_policy)>( mat, hop_size, stream, random_seed, num_rows, indptr.get(), subindptr, indices, idx_coo_arr, nids, cs_arr, weighted, A, ds, d2s, (IdType)num_picks, ctx, allocator, exec_policy, importance_sampling, hop_1, rands.get(), probs_found.get()); IdArray picked_row = NewIdArray(hop_size, ctx, sizeof(IdType) * 8); IdArray picked_col = NewIdArray(hop_size, ctx, sizeof(IdType) * 8); IdArray picked_idx = NewIdArray(hop_size, ctx, sizeof(IdType) * 8); FloatArray picked_imp = importance_sampling || weighted ? NewFloatArray(hop_size, ctx, sizeof(FloatType) * 8) : NullArray(); IdType* const picked_row_data = picked_row.Ptr(); IdType* const picked_col_data = picked_col.Ptr(); IdType* const picked_idx_data = picked_idx.Ptr(); FloatType* const picked_imp_data = picked_imp.Ptr(); auto picked_inrow = allocator.alloc_unique( importance_sampling || weighted ? hop_size : 1); // Sample edges here IdType num_edges; { thrust::constant_iterator one(1); if (importance_sampling) { auto output = thrust::make_zip_iterator( picked_inrow.get(), picked_row_data, picked_col_data, picked_idx_data, picked_imp_data); if (weighted) { auto transformed_output = thrust::make_transform_output_iterator( output, TransformOpImp< IdType, FloatType, FloatType*, FloatType*, decltype(one)>{ probs_found.get(), A, one, idx_coo, rows, cs, indptr.get(), subindptr, indices, data, is_pinned}); auto stencil = thrust::make_zip_iterator(idx_coo, probs_found.get(), rands.get()); num_edges = thrust::copy_if( exec_policy, iota, iota + hop_size, stencil, transformed_output, thrust::make_zip_function(StencilOp{cs})) - transformed_output; } else { auto transformed_output = thrust::make_transform_output_iterator( output, TransformOpImp< IdType, FloatType, FloatType*, decltype(one), decltype(one)>{ probs_found.get(), one, one, idx_coo, rows, cs, indptr.get(), subindptr, indices, data, is_pinned}); auto stencil = thrust::make_zip_iterator(idx_coo, probs_found.get(), rands.get()); num_edges = thrust::copy_if( exec_policy, iota, iota + hop_size, stencil, transformed_output, thrust::make_zip_function(StencilOp{cs})) - transformed_output; } } else { if (weighted) { auto output = thrust::make_zip_iterator( picked_inrow.get(), picked_row_data, picked_col_data, picked_idx_data, picked_imp_data); auto transformed_output = thrust::make_transform_output_iterator( output, TransformOpImp< IdType, FloatType, decltype(one), FloatType*, FloatType*>{ one, A, A, idx_coo, rows, cs, indptr.get(), subindptr, indices, data, is_pinned}); const auto pred = StencilOpFused{ random_seed, idx_coo, cs, one, A, subindptr, indptr.get(), indices, nids, is_pinned}; num_edges = thrust::copy_if( exec_policy, iota, iota + hop_size, iota, transformed_output, pred) - transformed_output; } else { auto output = thrust::make_zip_iterator( picked_row_data, picked_col_data, picked_idx_data); auto transformed_output = thrust::make_transform_output_iterator( output, TransformOp{ idx_coo, rows, indptr.get(), subindptr, indices, data, is_pinned}); const auto pred = StencilOpFused{ random_seed, idx_coo, cs, one, one, subindptr, indptr.get(), indices, nids, is_pinned}; num_edges = thrust::copy_if( exec_policy, iota, iota + hop_size, iota, transformed_output, pred) - transformed_output; } } } // Normalize edge weights here if (importance_sampling || weighted) { thrust::constant_iterator one(1); // contains degree information auto ds = allocator.alloc_unique(num_rows); // contains sum of edge weights auto ws = allocator.alloc_unique(num_rows); // contains degree information only for vertices with nonzero degree auto ds_2 = allocator.alloc_unique(num_rows); // contains sum of edge weights only for vertices with nonzero degree auto ws_2 = allocator.alloc_unique(num_rows); auto output_ = thrust::make_zip_iterator(ds.get(), ws.get()); // contains row ids only for vertices with nonzero degree auto keys = allocator.alloc_unique(num_rows); auto input = thrust::make_zip_iterator(one, picked_imp_data); auto new_end = thrust::reduce_by_key( exec_policy, picked_inrow.get(), picked_inrow.get() + num_edges, input, keys.get(), output_, thrust::equal_to{}, TupleSum{}); { thrust::constant_iterator zero_int(0); thrust::constant_iterator zero_float(0); auto input = thrust::make_zip_iterator(zero_int, zero_float); auto output = thrust::make_zip_iterator(ds_2.get(), ws_2.get()); thrust::copy(exec_policy, input, input + num_rows, output); { const auto num_rows_2 = new_end.first - keys.get(); thrust::scatter( exec_policy, output_, output_ + num_rows_2, keys.get(), output); } } { auto input = thrust::make_zip_iterator(picked_inrow.get(), picked_imp_data); auto transformed_input = thrust::make_transform_iterator( input, thrust::make_zip_function(TransformOpMean{ ds_2.get(), ws_2.get()})); thrust::copy( exec_policy, transformed_input, transformed_input + num_edges, picked_imp_data); } } picked_row = picked_row.CreateView({num_edges}, picked_row->dtype); picked_col = picked_col.CreateView({num_edges}, picked_col->dtype); picked_idx = picked_idx.CreateView({num_edges}, picked_idx->dtype); if (importance_sampling || weighted) picked_imp = picked_imp.CreateView({num_edges}, picked_imp->dtype); return std::make_pair( COOMatrix(mat.num_rows, mat.num_cols, picked_row, picked_col, picked_idx), picked_imp); } template std::pair CSRLaborSampling( CSRMatrix, IdArray, int64_t, FloatArray, int, IdArray, float, IdArray); template std::pair CSRLaborSampling( CSRMatrix, IdArray, int64_t, FloatArray, int, IdArray, float, IdArray); template std::pair CSRLaborSampling( CSRMatrix, IdArray, int64_t, FloatArray, int, IdArray, float, IdArray); template std::pair CSRLaborSampling( CSRMatrix, IdArray, int64_t, FloatArray, int, IdArray, float, IdArray); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cuda/macro.cuh ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cuda/macro.cuh * @brief Macro to call SPMM/SDDMM cuda kernels. */ #ifndef DGL_ARRAY_CUDA_MACRO_CUH_ #define DGL_ARRAY_CUDA_MACRO_CUH_ ///////////////////////// Dispatchers ////////////////////////// /* Macro used for switching between broadcasting and non-broadcasting kernels. * It also copies the auxiliary information for calculating broadcasting offsets * to GPU. */ #define BCAST_IDX_CTX_SWITCH(BCAST, EDGE_MAP, CTX, LHS_OFF, RHS_OFF, ...) \ do { \ const BcastOff &info = (BCAST); \ if (!info.use_bcast) { \ constexpr bool UseBcast = false; \ if ((EDGE_MAP)) { \ constexpr bool UseIdx = true; \ { __VA_ARGS__ } \ } else { \ constexpr bool UseIdx = false; \ { __VA_ARGS__ } \ } \ } else { \ constexpr bool UseBcast = true; \ const DGLContext ctx = (CTX); \ const auto device = runtime::DeviceAPI::Get(ctx); \ (LHS_OFF) = static_cast(device->AllocWorkspace( \ ctx, sizeof(int64_t) * info.lhs_offset.size())); \ CUDA_CALL(cudaMemcpy( \ (LHS_OFF), &info.lhs_offset[0], \ sizeof(int64_t) * info.lhs_offset.size(), cudaMemcpyHostToDevice)); \ (RHS_OFF) = static_cast(device->AllocWorkspace( \ ctx, sizeof(int64_t) * info.rhs_offset.size())); \ CUDA_CALL(cudaMemcpy( \ (RHS_OFF), &info.rhs_offset[0], \ sizeof(int64_t) * info.rhs_offset.size(), cudaMemcpyHostToDevice)); \ if ((EDGE_MAP)) { \ constexpr bool UseIdx = true; \ { __VA_ARGS__ } \ } else { \ constexpr bool UseIdx = false; \ { __VA_ARGS__ } \ } \ device->FreeWorkspace(ctx, (LHS_OFF)); \ device->FreeWorkspace(ctx, (RHS_OFF)); \ } \ } while (0) #endif // DGL_ARRAY_CUDA_MACRO_CUH_ ================================================ FILE: src/array/cuda/negative_sampling.cu ================================================ /** * Copyright (c) 2021 by Contributors * @file array/cuda/negative_sampling.cu * @brief rowwise sampling */ #include #include #include #include #include #include "../../runtime/cuda/cuda_common.h" #include "./utils.h" using namespace dgl::runtime; namespace dgl { namespace aten { namespace impl { namespace { template __global__ void _GlobalUniformNegativeSamplingKernel( const IdType* __restrict__ indptr, const IdType* __restrict__ indices, IdType* __restrict__ row, IdType* __restrict__ col, int64_t num_row, int64_t num_col, int64_t num_samples, int num_trials, bool exclude_self_loops, int32_t random_seed) { int64_t tx = blockIdx.x * blockDim.x + threadIdx.x; const int stride_x = gridDim.x * blockDim.x; curandStatePhilox4_32_10_t rng; // this allows generating 4 32-bit ints at a time curand_init(random_seed * gridDim.x + blockIdx.x, threadIdx.x, 0, &rng); while (tx < num_samples) { for (int i = 0; i < num_trials; ++i) { uint4 result = curand4(&rng); // Turns out that result.x is always 0 with the above RNG. uint64_t y_hi = result.y >> 16; uint64_t y_lo = result.y & 0xFFFF; uint64_t z = static_cast(result.z); uint64_t w = static_cast(result.w); int64_t u = static_cast(((y_lo << 32L) | z) % num_row); int64_t v = static_cast(((y_hi << 32L) | w) % num_col); if (exclude_self_loops && (u == v)) continue; // binary search of v among indptr[u:u+1] int64_t b = indptr[u], e = indptr[u + 1] - 1; bool found = false; while (b <= e) { int64_t m = (b + e) / 2; if (indices[m] == v) { found = true; break; } else if (indices[m] < v) { b = m + 1; } else { e = m - 1; } } if (!found) { row[tx] = u; col[tx] = v; break; } } tx += stride_x; } } template struct IsNotMinusOne { __device__ __forceinline__ bool operator()(const std::pair& a) { return a.first != -1; } }; /** * @brief Sort ordered pairs in ascending order, using \a tmp_major and \a * tmp_minor as temporary buffers, each with \a n elements. */ template void SortOrderedPairs( runtime::DeviceAPI* device, DGLContext ctx, IdType* major, IdType* minor, IdType* tmp_major, IdType* tmp_minor, int64_t n, cudaStream_t stream) { // Sort ordered pairs in lexicographical order by two radix sorts since // cub's radix sorts are stable. // We need a 2*n auxiliary storage to store the results form the first radix // sort. size_t s1 = 0, s2 = 0; void* tmp1 = nullptr; void* tmp2 = nullptr; // Radix sort by minor key first, reorder the major key in the progress. CUDA_CALL(cub::DeviceRadixSort::SortPairs( tmp1, s1, minor, tmp_minor, major, tmp_major, n, 0, sizeof(IdType) * 8, stream)); tmp1 = device->AllocWorkspace(ctx, s1); CUDA_CALL(cub::DeviceRadixSort::SortPairs( tmp1, s1, minor, tmp_minor, major, tmp_major, n, 0, sizeof(IdType) * 8, stream)); // Radix sort by major key next. CUDA_CALL(cub::DeviceRadixSort::SortPairs( tmp2, s2, tmp_major, major, tmp_minor, minor, n, 0, sizeof(IdType) * 8, stream)); tmp2 = (s2 > s1) ? device->AllocWorkspace(ctx, s2) : tmp1; // reuse buffer if s2 <= s1 CUDA_CALL(cub::DeviceRadixSort::SortPairs( tmp2, s2, tmp_major, major, tmp_minor, minor, n, 0, sizeof(IdType) * 8, stream)); if (tmp1 != tmp2) device->FreeWorkspace(ctx, tmp2); device->FreeWorkspace(ctx, tmp1); } }; // namespace template std::pair CSRGlobalUniformNegativeSampling( const CSRMatrix& csr, int64_t num_samples, int num_trials, bool exclude_self_loops, bool replace, double redundancy) { auto ctx = csr.indptr->ctx; auto dtype = csr.indptr->dtype; const int64_t num_row = csr.num_rows; const int64_t num_col = csr.num_cols; const int64_t num_actual_samples = static_cast(num_samples * (1 + redundancy)); IdArray row = Full(-1, num_actual_samples, ctx); IdArray col = Full(-1, num_actual_samples, ctx); IdArray out_row = IdArray::Empty({num_actual_samples}, dtype, ctx); IdArray out_col = IdArray::Empty({num_actual_samples}, dtype, ctx); IdType* row_data = row.Ptr(); IdType* col_data = col.Ptr(); IdType* out_row_data = out_row.Ptr(); IdType* out_col_data = out_col.Ptr(); auto device = runtime::DeviceAPI::Get(ctx); cudaStream_t stream = runtime::getCurrentCUDAStream(); const int nt = cuda::FindNumThreads(num_actual_samples); const int nb = (num_actual_samples + nt - 1) / nt; std::pair result; int64_t num_out; CUDA_KERNEL_CALL( _GlobalUniformNegativeSamplingKernel, nb, nt, 0, stream, csr.indptr.Ptr(), csr.indices.Ptr(), row_data, col_data, num_row, num_col, num_actual_samples, num_trials, exclude_self_loops, RandomEngine::ThreadLocal()->RandInt32()); size_t tmp_size = 0; int64_t* num_out_cuda = static_cast(device->AllocWorkspace(ctx, sizeof(int64_t))); IsNotMinusOne op; PairIterator begin(row_data, col_data); PairIterator out_begin(out_row_data, out_col_data); CUDA_CALL(cub::DeviceSelect::If( nullptr, tmp_size, begin, out_begin, num_out_cuda, num_actual_samples, op, stream)); void* tmp = device->AllocWorkspace(ctx, tmp_size); CUDA_CALL(cub::DeviceSelect::If( tmp, tmp_size, begin, out_begin, num_out_cuda, num_actual_samples, op, stream)); num_out = cuda::GetCUDAScalar(device, ctx, num_out_cuda); if (!replace) { IdArray unique_row = IdArray::Empty({num_out}, dtype, ctx); IdArray unique_col = IdArray::Empty({num_out}, dtype, ctx); IdType* unique_row_data = unique_row.Ptr(); IdType* unique_col_data = unique_col.Ptr(); PairIterator unique_begin(unique_row_data, unique_col_data); SortOrderedPairs( device, ctx, out_row_data, out_col_data, unique_row_data, unique_col_data, num_out, stream); size_t tmp_size_unique = 0; void* tmp_unique = nullptr; CUDA_CALL(cub::DeviceSelect::Unique( nullptr, tmp_size_unique, out_begin, unique_begin, num_out_cuda, num_out, stream)); tmp_unique = (tmp_size_unique > tmp_size) ? device->AllocWorkspace(ctx, tmp_size_unique) : tmp; // reuse buffer CUDA_CALL(cub::DeviceSelect::Unique( tmp_unique, tmp_size_unique, out_begin, unique_begin, num_out_cuda, num_out, stream)); num_out = cuda::GetCUDAScalar(device, ctx, num_out_cuda); num_out = std::min(num_samples, num_out); result = { unique_row.CreateView({num_out}, dtype), unique_col.CreateView({num_out}, dtype)}; if (tmp_unique != tmp) device->FreeWorkspace(ctx, tmp_unique); } else { num_out = std::min(num_samples, num_out); result = { out_row.CreateView({num_out}, dtype), out_col.CreateView({num_out}, dtype)}; } device->FreeWorkspace(ctx, tmp); device->FreeWorkspace(ctx, num_out_cuda); return result; } template std::pair CSRGlobalUniformNegativeSampling< kDGLCUDA, int32_t>(const CSRMatrix&, int64_t, int, bool, bool, double); template std::pair CSRGlobalUniformNegativeSampling< kDGLCUDA, int64_t>(const CSRMatrix&, int64_t, int, bool, bool, double); }; // namespace impl }; // namespace aten }; // namespace dgl ================================================ FILE: src/array/cuda/rowwise_sampling.cu ================================================ /** * Copyright (c) 2021 by Contributors * @file array/cuda/rowwise_sampling.cu * @brief uniform rowwise sampling */ #include #include #include #include #include #include #include "../../array/cuda/atomic.cuh" #include "../../runtime/cuda/cuda_common.h" #include "./utils.h" using namespace dgl::cuda; using namespace dgl::aten::cuda; using TensorDispatcher = dgl::runtime::TensorDispatcher; namespace dgl { namespace aten { namespace impl { namespace { constexpr int BLOCK_SIZE = 128; /** * @brief Compute the size of each row in the sampled CSR, without replacement. * * @tparam IdType The type of node and edge indexes. * @param num_picks The number of non-zero entries to pick per row. * @param num_rows The number of rows to pick. * @param in_rows The set of rows to pick. * @param in_ptr The index where each row's edges start. * @param out_deg The size of each row in the sampled matrix, as indexed by * `in_rows` (output). */ template __global__ void _CSRRowWiseSampleDegreeKernel( const int64_t num_picks, const int64_t num_rows, const IdType* const in_rows, const IdType* const in_ptr, IdType* const out_deg) { const int tIdx = threadIdx.x + blockIdx.x * blockDim.x; if (tIdx < num_rows) { const int in_row = in_rows[tIdx]; const int out_row = tIdx; out_deg[out_row] = min( static_cast(num_picks), in_ptr[in_row + 1] - in_ptr[in_row]); if (out_row == num_rows - 1) { // make the prefixsum work out_deg[num_rows] = 0; } } } /** * @brief Compute the size of each row in the sampled CSR, with replacement. * * @tparam IdType The type of node and edge indexes. * @param num_picks The number of non-zero entries to pick per row. * @param num_rows The number of rows to pick. * @param in_rows The set of rows to pick. * @param in_ptr The index where each row's edges start. * @param out_deg The size of each row in the sampled matrix, as indexed by * `in_rows` (output). */ template __global__ void _CSRRowWiseSampleDegreeReplaceKernel( const int64_t num_picks, const int64_t num_rows, const IdType* const in_rows, const IdType* const in_ptr, IdType* const out_deg) { const int tIdx = threadIdx.x + blockIdx.x * blockDim.x; if (tIdx < num_rows) { const int64_t in_row = in_rows[tIdx]; const int64_t out_row = tIdx; if (in_ptr[in_row + 1] - in_ptr[in_row] == 0) { out_deg[out_row] = 0; } else { out_deg[out_row] = static_cast(num_picks); } if (out_row == num_rows - 1) { // make the prefixsum work out_deg[num_rows] = 0; } } } /** * @brief Perform row-wise uniform sampling on a CSR matrix, * and generate a COO matrix, without replacement. * * @tparam IdType The ID type used for matrices. * @tparam TILE_SIZE The number of rows covered by each threadblock. * @param rand_seed The random seed to use. * @param num_picks The number of non-zeros to pick per row. * @param num_rows The number of rows to pick. * @param in_rows The set of rows to pick. * @param in_ptr The indptr array of the input CSR. * @param in_index The indices array of the input CSR. * @param data The data array of the input CSR. * @param out_ptr The offset to write each row to in the output COO. * @param out_rows The rows of the output COO (output). * @param out_cols The columns of the output COO (output). * @param out_idxs The data array of the output COO (output). */ template __global__ void _CSRRowWiseSampleUniformKernel( const uint64_t rand_seed, const int64_t num_picks, const int64_t num_rows, const IdType* const in_rows, const IdType* const in_ptr, const IdType* const in_index, const IdType* const data, const IdType* const out_ptr, IdType* const out_rows, IdType* const out_cols, IdType* const out_idxs) { // we assign one warp per row assert(blockDim.x == BLOCK_SIZE); int64_t out_row = blockIdx.x * TILE_SIZE; const int64_t last_row = min(static_cast(blockIdx.x + 1) * TILE_SIZE, num_rows); curandStatePhilox4_32_10_t rng; curand_init(rand_seed * gridDim.x + blockIdx.x, threadIdx.x, 0, &rng); while (out_row < last_row) { const int64_t row = in_rows[out_row]; const int64_t in_row_start = in_ptr[row]; const int64_t deg = in_ptr[row + 1] - in_row_start; const int64_t out_row_start = out_ptr[out_row]; if (deg <= num_picks) { // just copy row when there is not enough nodes to sample. for (int idx = threadIdx.x; idx < deg; idx += BLOCK_SIZE) { const IdType in_idx = in_row_start + idx; out_rows[out_row_start + idx] = row; out_cols[out_row_start + idx] = in_index[in_idx]; out_idxs[out_row_start + idx] = data ? data[in_idx] : in_idx; } } else { // generate permutation list via reservoir algorithm for (int idx = threadIdx.x; idx < num_picks; idx += BLOCK_SIZE) { out_idxs[out_row_start + idx] = idx; } __syncthreads(); for (int idx = num_picks + threadIdx.x; idx < deg; idx += BLOCK_SIZE) { const int num = curand(&rng) % (idx + 1); if (num < num_picks) { // use max so as to achieve the replacement order the serial // algorithm would have AtomicMax(out_idxs + out_row_start + num, idx); } } __syncthreads(); // copy permutation over for (int idx = threadIdx.x; idx < num_picks; idx += BLOCK_SIZE) { const IdType perm_idx = out_idxs[out_row_start + idx] + in_row_start; out_rows[out_row_start + idx] = row; out_cols[out_row_start + idx] = in_index[perm_idx]; out_idxs[out_row_start + idx] = data ? data[perm_idx] : perm_idx; } } out_row += 1; } } /** * @brief Perform row-wise uniform sampling on a CSR matrix, * and generate a COO matrix, with replacement. * * @tparam IdType The ID type used for matrices. * @tparam TILE_SIZE The number of rows covered by each threadblock. * @param rand_seed The random seed to use. * @param num_picks The number of non-zeros to pick per row. * @param num_rows The number of rows to pick. * @param in_rows The set of rows to pick. * @param in_ptr The indptr array of the input CSR. * @param in_index The indices array of the input CSR. * @param data The data array of the input CSR. * @param out_ptr The offset to write each row to in the output COO. * @param out_rows The rows of the output COO (output). * @param out_cols The columns of the output COO (output). * @param out_idxs The data array of the output COO (output). */ template __global__ void _CSRRowWiseSampleUniformReplaceKernel( const uint64_t rand_seed, const int64_t num_picks, const int64_t num_rows, const IdType* const in_rows, const IdType* const in_ptr, const IdType* const in_index, const IdType* const data, const IdType* const out_ptr, IdType* const out_rows, IdType* const out_cols, IdType* const out_idxs) { // we assign one warp per row assert(blockDim.x == BLOCK_SIZE); int64_t out_row = blockIdx.x * TILE_SIZE; const int64_t last_row = min(static_cast(blockIdx.x + 1) * TILE_SIZE, num_rows); curandStatePhilox4_32_10_t rng; curand_init(rand_seed * gridDim.x + blockIdx.x, threadIdx.x, 0, &rng); while (out_row < last_row) { const int64_t row = in_rows[out_row]; const int64_t in_row_start = in_ptr[row]; const int64_t out_row_start = out_ptr[out_row]; const int64_t deg = in_ptr[row + 1] - in_row_start; if (deg > 0) { // each thread then blindly copies in rows only if deg > 0. for (int idx = threadIdx.x; idx < num_picks; idx += BLOCK_SIZE) { const int64_t edge = curand(&rng) % deg; const int64_t out_idx = out_row_start + idx; out_rows[out_idx] = row; out_cols[out_idx] = in_index[in_row_start + edge]; out_idxs[out_idx] = data ? data[in_row_start + edge] : in_row_start + edge; } } out_row += 1; } } } // namespace ///////////////////////////// CSR sampling ////////////////////////// template COOMatrix _CSRRowWiseSamplingUniform( CSRMatrix mat, IdArray rows, const int64_t num_picks, const bool replace) { const auto& ctx = rows->ctx; auto device = runtime::DeviceAPI::Get(ctx); cudaStream_t stream = runtime::getCurrentCUDAStream(); const int64_t num_rows = rows->shape[0]; const IdType* const slice_rows = static_cast(rows->data); IdArray picked_row = NewIdArray(num_rows * num_picks, ctx, sizeof(IdType) * 8); IdArray picked_col = NewIdArray(num_rows * num_picks, ctx, sizeof(IdType) * 8); IdArray picked_idx = NewIdArray(num_rows * num_picks, ctx, sizeof(IdType) * 8); IdType* const out_rows = static_cast(picked_row->data); IdType* const out_cols = static_cast(picked_col->data); IdType* const out_idxs = static_cast(picked_idx->data); const IdType* in_ptr = static_cast(GetDevicePointer(mat.indptr)); const IdType* in_cols = static_cast(GetDevicePointer(mat.indices)); const IdType* data = CSRHasData(mat) ? static_cast(GetDevicePointer(mat.data)) : nullptr; // compute degree IdType* out_deg = static_cast( device->AllocWorkspace(ctx, (num_rows + 1) * sizeof(IdType))); if (replace) { const dim3 block(512); const dim3 grid((num_rows + block.x - 1) / block.x); CUDA_KERNEL_CALL( _CSRRowWiseSampleDegreeReplaceKernel, grid, block, 0, stream, num_picks, num_rows, slice_rows, in_ptr, out_deg); } else { const dim3 block(512); const dim3 grid((num_rows + block.x - 1) / block.x); CUDA_KERNEL_CALL( _CSRRowWiseSampleDegreeKernel, grid, block, 0, stream, num_picks, num_rows, slice_rows, in_ptr, out_deg); } // fill out_ptr IdType* out_ptr = static_cast( device->AllocWorkspace(ctx, (num_rows + 1) * sizeof(IdType))); size_t prefix_temp_size = 0; CUDA_CALL(cub::DeviceScan::ExclusiveSum( nullptr, prefix_temp_size, out_deg, out_ptr, num_rows + 1, stream)); void* prefix_temp = device->AllocWorkspace(ctx, prefix_temp_size); CUDA_CALL(cub::DeviceScan::ExclusiveSum( prefix_temp, prefix_temp_size, out_deg, out_ptr, num_rows + 1, stream)); device->FreeWorkspace(ctx, prefix_temp); device->FreeWorkspace(ctx, out_deg); cudaEvent_t copyEvent; CUDA_CALL(cudaEventCreate(©Event)); NDArray new_len_tensor; if (TensorDispatcher::Global()->IsAvailable()) { new_len_tensor = NDArray::PinnedEmpty( {1}, DGLDataTypeTraits::dtype, DGLContext{kDGLCPU, 0}); } else { // use pageable memory, it will unecessarily block but be functional new_len_tensor = NDArray::Empty( {1}, DGLDataTypeTraits::dtype, DGLContext{kDGLCPU, 0}); } // copy using the internal current stream CUDA_CALL(cudaMemcpyAsync( new_len_tensor->data, out_ptr + num_rows, sizeof(IdType), cudaMemcpyDeviceToHost, stream)); CUDA_CALL(cudaEventRecord(copyEvent, stream)); const uint64_t random_seed = RandomEngine::ThreadLocal()->RandInt(1000000000); // select edges // the number of rows each thread block will cover constexpr int TILE_SIZE = 128 / BLOCK_SIZE; if (replace) { // with replacement const dim3 block(BLOCK_SIZE); const dim3 grid((num_rows + TILE_SIZE - 1) / TILE_SIZE); CUDA_KERNEL_CALL( (_CSRRowWiseSampleUniformReplaceKernel), grid, block, 0, stream, random_seed, num_picks, num_rows, slice_rows, in_ptr, in_cols, data, out_ptr, out_rows, out_cols, out_idxs); } else { // without replacement const dim3 block(BLOCK_SIZE); const dim3 grid((num_rows + TILE_SIZE - 1) / TILE_SIZE); CUDA_KERNEL_CALL( (_CSRRowWiseSampleUniformKernel), grid, block, 0, stream, random_seed, num_picks, num_rows, slice_rows, in_ptr, in_cols, data, out_ptr, out_rows, out_cols, out_idxs); } device->FreeWorkspace(ctx, out_ptr); // wait for copying `new_len` to finish CUDA_CALL(cudaEventSynchronize(copyEvent)); CUDA_CALL(cudaEventDestroy(copyEvent)); const IdType new_len = static_cast(new_len_tensor->data)[0]; picked_row = picked_row.CreateView({new_len}, picked_row->dtype); picked_col = picked_col.CreateView({new_len}, picked_col->dtype); picked_idx = picked_idx.CreateView({new_len}, picked_idx->dtype); return COOMatrix( mat.num_rows, mat.num_cols, picked_row, picked_col, picked_idx); } template COOMatrix CSRRowWiseSamplingUniform( CSRMatrix mat, IdArray rows, const int64_t num_picks, const bool replace) { if (num_picks == -1) { // Basically this is UnitGraph::InEdges(). COOMatrix coo = CSRToCOO(CSRSliceRows(mat, rows), false); IdArray sliced_rows = IndexSelect(rows, coo.row); return COOMatrix( mat.num_rows, mat.num_cols, sliced_rows, coo.col, coo.data); } else { return _CSRRowWiseSamplingUniform( mat, rows, num_picks, replace); } } template COOMatrix CSRRowWiseSamplingUniform( CSRMatrix, IdArray, int64_t, bool); template COOMatrix CSRRowWiseSamplingUniform( CSRMatrix, IdArray, int64_t, bool); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cuda/rowwise_sampling_prob.cu ================================================ /** * Copyright (c) 2022 by Contributors * @file array/cuda/rowwise_sampling_prob.cu * @brief weighted rowwise sampling. The degree computing kernels and * host-side functions are partially borrowed from the uniform rowwise * sampling code rowwise_sampling.cu. * @author pengqirong (OPPO), dlasalle and Xin from Nvidia. */ #include #include #include #include #include #include "../../array/cuda/atomic.cuh" #include "../../runtime/cuda/cuda_common.h" #include "./utils.h" // require CUB 1.17 to use DeviceSegmentedSort static_assert( CUB_VERSION >= 101700, "Require CUB >= 1.17 to use DeviceSegmentedSort"); namespace dgl { using namespace cuda; using namespace aten::cuda; namespace aten { namespace impl { namespace { constexpr int BLOCK_SIZE = 128; /** * @brief Compute the size of each row in the sampled CSR, without replacement. * temp_deg is calculated for rows with deg > num_picks. * For these rows, we will calculate their A-Res values and sort them to get * top-num_picks. * * @tparam IdType The type of node and edge indexes. * @param num_picks The number of non-zero entries to pick per row. * @param num_rows The number of rows to pick. * @param in_rows The set of rows to pick. * @param in_ptr The index where each row's edges start. * @param out_deg The size of each row in the sampled matrix, as indexed by * `in_rows` (output). * @param temp_deg The size of each row in the input matrix, as indexed by * `in_rows` (output). */ template __global__ void _CSRRowWiseSampleDegreeKernel( const int64_t num_picks, const int64_t num_rows, const IdType* const in_rows, const IdType* const in_ptr, IdType* const out_deg, IdType* const temp_deg) { const int64_t tIdx = threadIdx.x + blockIdx.x * blockDim.x; if (tIdx < num_rows) { const int64_t in_row = in_rows[tIdx]; const int64_t out_row = tIdx; const IdType deg = in_ptr[in_row + 1] - in_ptr[in_row]; // temp_deg is used to generate ares_ptr temp_deg[out_row] = deg > static_cast(num_picks) ? deg : 0; out_deg[out_row] = min(static_cast(num_picks), deg); if (out_row == num_rows - 1) { // make the prefixsum work out_deg[num_rows] = 0; temp_deg[num_rows] = 0; } } } /** * @brief Compute the size of each row in the sampled CSR, with replacement. * We need the actual in degree of each row to store CDF values. * * @tparam IdType The type of node and edge indexes. * @param num_picks The number of non-zero entries to pick per row. * @param num_rows The number of rows to pick. * @param in_rows The set of rows to pick. * @param in_ptr The index where each row's edges start. * @param out_deg The size of each row in the sampled matrix, as indexed by * `in_rows` (output). * @param temp_deg The size of each row in the input matrix, as indexed by * `in_rows` (output). */ template __global__ void _CSRRowWiseSampleDegreeReplaceKernel( const int64_t num_picks, const int64_t num_rows, const IdType* const in_rows, const IdType* const in_ptr, IdType* const out_deg, IdType* const temp_deg) { const int64_t tIdx = threadIdx.x + blockIdx.x * blockDim.x; if (tIdx < num_rows) { const int64_t in_row = in_rows[tIdx]; const int64_t out_row = tIdx; const IdType deg = in_ptr[in_row + 1] - in_ptr[in_row]; temp_deg[out_row] = deg; out_deg[out_row] = deg == 0 ? 0 : static_cast(num_picks); if (out_row == num_rows - 1) { // make the prefixsum work out_deg[num_rows] = 0; temp_deg[num_rows] = 0; } } } /** * @brief Equivalent to numpy expression: array[idx[off:off + len]] * * @tparam IdType The ID type used for indices. * @tparam FloatType The float type used for array values. * @param array The array to be selected. * @param idx_data The index mapping array. * @param index The index of value to be selected. * @param offset The offset to start. * @param out The selected value (output). */ template __device__ void _DoubleSlice( const FloatType* const array, const IdType* const idx_data, const IdType idx, const IdType offset, FloatType* const out) { if (idx_data) { *out = array[idx_data[offset + idx]]; } else { *out = array[offset + idx]; } } /** * @brief Compute A-Res value. A-Res value needs to be calculated only if deg * is greater than num_picks in weighted rowwise sampling without replacement. * * @tparam IdType The ID type used for matrices. * @tparam FloatType The Float type used for matrices. * @tparam TILE_SIZE The number of rows covered by each threadblock. * @param rand_seed The random seed to use. * @param num_picks The number of non-zeros to pick per row. * @param num_rows The number of rows to pick. * @param in_rows The set of rows to pick. * @param in_ptr The indptr array of the input CSR. * @param data The data array of the input CSR. * @param prob The probability array of the input CSR. * @param ares_ptr The offset to write each row to in the A-res array. * @param ares_idxs The A-Res value corresponding index array, the index of * input CSR (output). * @param ares The A-Res value array (output). * @author pengqirong (OPPO) */ template __global__ void _CSRAResValueKernel( const uint64_t rand_seed, const int64_t num_picks, const int64_t num_rows, const IdType* const in_rows, const IdType* const in_ptr, const IdType* const data, const FloatType* const prob, const IdType* const ares_ptr, IdType* const ares_idxs, FloatType* const ares) { int64_t out_row = blockIdx.x * TILE_SIZE; const int64_t last_row = min(static_cast(blockIdx.x + 1) * TILE_SIZE, num_rows); curandStatePhilox4_32_10_t rng; curand_init(rand_seed * gridDim.x + blockIdx.x, threadIdx.x, 0, &rng); while (out_row < last_row) { const int64_t row = in_rows[out_row]; const int64_t in_row_start = in_ptr[row]; const int64_t deg = in_ptr[row + 1] - in_row_start; // A-Res value needs to be calculated only if deg is greater than num_picks // in weighted rowwise sampling without replacement if (deg > num_picks) { const int64_t ares_row_start = ares_ptr[out_row]; for (int64_t idx = threadIdx.x; idx < deg; idx += BLOCK_SIZE) { const int64_t in_idx = in_row_start + idx; const int64_t ares_idx = ares_row_start + idx; FloatType item_prob; _DoubleSlice( prob, data, idx, in_row_start, &item_prob); // compute A-Res value ares[ares_idx] = static_cast( __powf(curand_uniform(&rng), 1.0f / item_prob)); ares_idxs[ares_idx] = static_cast(in_idx); } } out_row += 1; } } /** * @brief Perform weighted row-wise sampling on a CSR matrix, and generate a COO * matrix, without replacement. After sorting, we select top-num_picks items. * * @tparam IdType The ID type used for matrices. * @tparam FloatType The Float type used for matrices. * @tparam TILE_SIZE The number of rows covered by each threadblock. * @param num_picks The number of non-zeros to pick per row. * @param num_rows The number of rows to pick. * @param in_rows The set of rows to pick. * @param in_ptr The indptr array of the input CSR. * @param in_cols The columns array of the input CSR. * @param data The data array of the input CSR. * @param out_ptr The offset to write each row to in the output COO. * @param ares_ptr The offset to write each row to in the ares array. * @param sort_ares_idxs The sorted A-Res value corresponding index array, the * index of input CSR. * @param out_rows The rows of the output COO (output). * @param out_cols The columns of the output COO (output). * @param out_idxs The data array of the output COO (output). * @author pengqirong (OPPO) */ template __global__ void _CSRRowWiseSampleKernel( const int64_t num_picks, const int64_t num_rows, const IdType* const in_rows, const IdType* const in_ptr, const IdType* const in_cols, const IdType* const data, const IdType* const out_ptr, const IdType* const ares_ptr, const IdType* const sort_ares_idxs, IdType* const out_rows, IdType* const out_cols, IdType* const out_idxs) { // we assign one warp per row assert(blockDim.x == BLOCK_SIZE); int64_t out_row = blockIdx.x * TILE_SIZE; const int64_t last_row = min(static_cast(blockIdx.x + 1) * TILE_SIZE, num_rows); while (out_row < last_row) { const int64_t row = in_rows[out_row]; const int64_t in_row_start = in_ptr[row]; const int64_t out_row_start = out_ptr[out_row]; const int64_t deg = in_ptr[row + 1] - in_row_start; if (deg > num_picks) { const int64_t ares_row_start = ares_ptr[out_row]; for (int64_t idx = threadIdx.x; idx < num_picks; idx += BLOCK_SIZE) { // get in and out index, the in_idx is one of top num_picks A-Res value // corresponding index in input CSR. const int64_t out_idx = out_row_start + idx; const int64_t ares_idx = ares_row_start + idx; const int64_t in_idx = sort_ares_idxs[ares_idx]; // copy permutation over out_rows[out_idx] = static_cast(row); out_cols[out_idx] = in_cols[in_idx]; out_idxs[out_idx] = static_cast(data ? data[in_idx] : in_idx); } } else { for (int64_t idx = threadIdx.x; idx < deg; idx += BLOCK_SIZE) { // get in and out index const int64_t out_idx = out_row_start + idx; const int64_t in_idx = in_row_start + idx; // copy permutation over out_rows[out_idx] = static_cast(row); out_cols[out_idx] = in_cols[in_idx]; out_idxs[out_idx] = static_cast(data ? data[in_idx] : in_idx); } } out_row += 1; } } // A stateful callback functor that maintains a running prefix to be applied // during consecutive scan operations. template struct BlockPrefixCallbackOp { // Running prefix FloatType running_total; // Constructor __device__ BlockPrefixCallbackOp(FloatType running_total) : running_total(running_total) {} // Callback operator to be entered by the first warp of threads in the block. // Thread-0 is responsible for returning a value for seeding the block-wide // scan. __device__ FloatType operator()(FloatType block_aggregate) { FloatType old_prefix = running_total; running_total += block_aggregate; return old_prefix; } }; /** * @brief Perform weighted row-wise sampling on a CSR matrix, and generate a COO * matrix, with replacement. We store the CDF (unnormalized) of all neighbors of * a row in global memory and use binary search to find inverse indices as * selected items. * * @tparam IdType The ID type used for matrices. * @tparam FloatType The Float type used for matrices. * @tparam TILE_SIZE The number of rows covered by each threadblock. * @param rand_seed The random seed to use. * @param num_picks The number of non-zeros to pick per row. * @param num_rows The number of rows to pick. * @param in_rows The set of rows to pick. * @param in_ptr The indptr array of the input CSR. * @param in_cols The columns array of the input CSR. * @param data The data array of the input CSR. * @param prob The probability array of the input CSR. * @param out_ptr The offset to write each row to in the output COO. * @param cdf_ptr The offset of each cdf segment. * @param cdf The global buffer to store cdf segments. * @param out_rows The rows of the output COO (output). * @param out_cols The columns of the output COO (output). * @param out_idxs The data array of the output COO (output). * @author pengqirong (OPPO) */ template __global__ void _CSRRowWiseSampleReplaceKernel( const uint64_t rand_seed, const int64_t num_picks, const int64_t num_rows, const IdType* const in_rows, const IdType* const in_ptr, const IdType* const in_cols, const IdType* const data, const FloatType* const prob, const IdType* const out_ptr, const IdType* const cdf_ptr, FloatType* const cdf, IdType* const out_rows, IdType* const out_cols, IdType* const out_idxs) { // we assign one warp per row assert(blockDim.x == BLOCK_SIZE); int64_t out_row = blockIdx.x * TILE_SIZE; const int64_t last_row = min(static_cast(blockIdx.x + 1) * TILE_SIZE, num_rows); curandStatePhilox4_32_10_t rng; curand_init(rand_seed * gridDim.x + blockIdx.x, threadIdx.x, 0, &rng); while (out_row < last_row) { const int64_t row = in_rows[out_row]; const int64_t in_row_start = in_ptr[row]; const int64_t out_row_start = out_ptr[out_row]; const int64_t cdf_row_start = cdf_ptr[out_row]; const int64_t deg = in_ptr[row + 1] - in_row_start; const FloatType MIN_THREAD_DATA = static_cast(0.0f); if (deg > 0) { // Specialize BlockScan for a 1D block of BLOCK_SIZE threads typedef cub::BlockScan BlockScan; // Allocate shared memory for BlockScan __shared__ typename BlockScan::TempStorage temp_storage; // Initialize running total BlockPrefixCallbackOp prefix_op(MIN_THREAD_DATA); int64_t max_iter = (1 + (deg - 1) / BLOCK_SIZE) * BLOCK_SIZE; // Have the block iterate over segments of items for (int64_t idx = threadIdx.x; idx < max_iter; idx += BLOCK_SIZE) { // Load a segment of consecutive items that are blocked across threads FloatType thread_data; if (idx < deg) _DoubleSlice( prob, data, idx, in_row_start, &thread_data); else thread_data = MIN_THREAD_DATA; thread_data = max(thread_data, MIN_THREAD_DATA); // Collectively compute the block-wide inclusive prefix sum BlockScan(temp_storage) .InclusiveSum(thread_data, thread_data, prefix_op); __syncthreads(); // Store scanned items to cdf array if (idx < deg) { cdf[cdf_row_start + idx] = thread_data; } } __syncthreads(); for (int64_t idx = threadIdx.x; idx < num_picks; idx += BLOCK_SIZE) { // get random value FloatType sum = cdf[cdf_row_start + deg - 1]; FloatType rand = static_cast(curand_uniform(&rng) * sum); // get the offset of the first value within cdf array which is greater // than random value. int64_t item = cub::UpperBound( &cdf[cdf_row_start], deg, rand); item = min(item, deg - 1); // get in and out index const int64_t in_idx = in_row_start + item; const int64_t out_idx = out_row_start + idx; // copy permutation over out_rows[out_idx] = static_cast(row); out_cols[out_idx] = in_cols[in_idx]; out_idxs[out_idx] = static_cast(data ? data[in_idx] : in_idx); } } out_row += 1; } } template __global__ void _GenerateFlagsKernel( int64_t n, const IdType* idx, const DType* values, DType criteria, BoolType* output) { int tx = blockIdx.x * blockDim.x + threadIdx.x; const int stride_x = gridDim.x * blockDim.x; while (tx < n) { output[tx] = (values[idx ? idx[tx] : tx] != criteria); tx += stride_x; } } template COOMatrix COOGeneralRemoveIf(const COOMatrix& coo, MaskGen maskgen) { using namespace dgl::cuda; const auto idtype = coo.row->dtype; const auto ctx = coo.row->ctx; const int64_t nnz = coo.row->shape[0]; const IdType* row = coo.row.Ptr(); const IdType* col = coo.col.Ptr(); const IdArray& eid = COOHasData(coo) ? coo.data : Range(0, nnz, sizeof(IdType) * 8, ctx); const IdType* data = coo.data.Ptr(); IdArray new_row = IdArray::Empty({nnz}, idtype, ctx); IdArray new_col = IdArray::Empty({nnz}, idtype, ctx); IdArray new_eid = IdArray::Empty({nnz}, idtype, ctx); IdType* new_row_data = new_row.Ptr(); IdType* new_col_data = new_col.Ptr(); IdType* new_eid_data = new_eid.Ptr(); auto stream = runtime::getCurrentCUDAStream(); auto device = runtime::DeviceAPI::Get(ctx); int8_t* flags = static_cast(device->AllocWorkspace(ctx, nnz)); int nt = dgl::cuda::FindNumThreads(nnz); int64_t nb = (nnz + nt - 1) / nt; maskgen(nb, nt, stream, nnz, data, flags); int64_t* rst = static_cast(device->AllocWorkspace(ctx, sizeof(int64_t))); MaskSelect(device, ctx, row, flags, new_row_data, nnz, rst, stream); MaskSelect(device, ctx, col, flags, new_col_data, nnz, rst, stream); MaskSelect(device, ctx, data, flags, new_eid_data, nnz, rst, stream); int64_t new_len = GetCUDAScalar(device, ctx, rst); device->FreeWorkspace(ctx, flags); device->FreeWorkspace(ctx, rst); return COOMatrix( coo.num_rows, coo.num_cols, new_row.CreateView({new_len}, idtype, 0), new_col.CreateView({new_len}, idtype, 0), new_eid.CreateView({new_len}, idtype, 0)); } template COOMatrix _COORemoveIf( const COOMatrix& coo, const NDArray& values, DType criteria) { const DType* val = values.Ptr(); auto maskgen = [val, criteria]( int nb, int nt, cudaStream_t stream, int64_t nnz, const IdType* data, int8_t* flags) { CUDA_KERNEL_CALL( (_GenerateFlagsKernel), nb, nt, 0, stream, nnz, data, val, criteria, flags); }; return COOGeneralRemoveIf( coo, maskgen); } } // namespace /////////////////////////////// CSR /////////////////////////////// /** * @brief Perform weighted row-wise sampling on a CSR matrix, and generate a COO * matrix. Use CDF sampling algorithm for with replacement: * 1) Calculate the CDF of all neighbor's prob. * 2) For each [0, num_picks), generate a rand ~ U(0, 1). Use binary search to * find its index in the CDF array as a chosen item. * Use A-Res sampling algorithm for without replacement: * 1) For rows with deg > num_picks, calculate A-Res values for all neighbors. * 2) Sort the A-Res array and select top-num_picks as chosen items. * * @tparam XPU The device type used for matrices. * @tparam IdType The ID type used for matrices. * @tparam FloatType The Float type used for matrices. * @param mat The CSR matrix. * @param rows The set of rows to pick. * @param num_picks The number of non-zeros to pick per row. * @param prob The probability array of the input CSR. * @param replace Is replacement sampling? * @author pengqirong (OPPO), dlasalle and Xin from Nvidia. */ template COOMatrix _CSRRowWiseSampling( const CSRMatrix& mat, const IdArray& rows, int64_t num_picks, const FloatArray& prob, bool replace) { const auto& ctx = rows->ctx; auto device = runtime::DeviceAPI::Get(ctx); cudaStream_t stream = runtime::getCurrentCUDAStream(); const int64_t num_rows = rows->shape[0]; const IdType* const slice_rows = static_cast(rows->data); IdArray picked_row = NewIdArray(num_rows * num_picks, ctx, sizeof(IdType) * 8); IdArray picked_col = NewIdArray(num_rows * num_picks, ctx, sizeof(IdType) * 8); IdArray picked_idx = NewIdArray(num_rows * num_picks, ctx, sizeof(IdType) * 8); IdType* const out_rows = static_cast(picked_row->data); IdType* const out_cols = static_cast(picked_col->data); IdType* const out_idxs = static_cast(picked_idx->data); const IdType* in_ptr = static_cast(GetDevicePointer(mat.indptr)); const IdType* in_cols = static_cast(GetDevicePointer(mat.indices)); const IdType* data = CSRHasData(mat) ? static_cast(GetDevicePointer(mat.data)) : nullptr; const FloatType* prob_data = static_cast(GetDevicePointer(prob)); // compute degree // out_deg: the size of each row in the sampled matrix // temp_deg: the size of each row we will manipulate in sampling // 1) for w/o replacement: in degree if it's greater than num_picks else 0 // 2) for w/ replacement: in degree IdType* out_deg = static_cast( device->AllocWorkspace(ctx, (num_rows + 1) * sizeof(IdType))); IdType* temp_deg = static_cast( device->AllocWorkspace(ctx, (num_rows + 1) * sizeof(IdType))); if (replace) { const dim3 block(512); const dim3 grid((num_rows + block.x - 1) / block.x); CUDA_KERNEL_CALL( _CSRRowWiseSampleDegreeReplaceKernel, grid, block, 0, stream, num_picks, num_rows, slice_rows, in_ptr, out_deg, temp_deg); } else { const dim3 block(512); const dim3 grid((num_rows + block.x - 1) / block.x); CUDA_KERNEL_CALL( _CSRRowWiseSampleDegreeKernel, grid, block, 0, stream, num_picks, num_rows, slice_rows, in_ptr, out_deg, temp_deg); } // fill temp_ptr IdType* temp_ptr = static_cast( device->AllocWorkspace(ctx, (num_rows + 1) * sizeof(IdType))); size_t prefix_temp_size = 0; CUDA_CALL(cub::DeviceScan::ExclusiveSum( nullptr, prefix_temp_size, temp_deg, temp_ptr, num_rows + 1, stream)); void* prefix_temp = device->AllocWorkspace(ctx, prefix_temp_size); CUDA_CALL(cub::DeviceScan::ExclusiveSum( prefix_temp, prefix_temp_size, temp_deg, temp_ptr, num_rows + 1, stream)); device->FreeWorkspace(ctx, prefix_temp); device->FreeWorkspace(ctx, temp_deg); // TODO(Xin): The copy here is too small, and the overhead of creating // cuda events cannot be ignored. Just use synchronized copy. IdType temp_len; // copy using the internal current stream. device->CopyDataFromTo( temp_ptr, num_rows * sizeof(temp_len), &temp_len, 0, sizeof(temp_len), ctx, DGLContext{kDGLCPU, 0}, mat.indptr->dtype); device->StreamSync(ctx, stream); // fill out_ptr IdType* out_ptr = static_cast( device->AllocWorkspace(ctx, (num_rows + 1) * sizeof(IdType))); prefix_temp_size = 0; CUDA_CALL(cub::DeviceScan::ExclusiveSum( nullptr, prefix_temp_size, out_deg, out_ptr, num_rows + 1, stream)); prefix_temp = device->AllocWorkspace(ctx, prefix_temp_size); CUDA_CALL(cub::DeviceScan::ExclusiveSum( prefix_temp, prefix_temp_size, out_deg, out_ptr, num_rows + 1, stream)); device->FreeWorkspace(ctx, prefix_temp); device->FreeWorkspace(ctx, out_deg); cudaEvent_t copyEvent; CUDA_CALL(cudaEventCreate(©Event)); // TODO(dlasalle): use pinned memory to overlap with the actual sampling, and // wait on a cudaevent IdType new_len; // copy using the internal current stream. device->CopyDataFromTo( out_ptr, num_rows * sizeof(new_len), &new_len, 0, sizeof(new_len), ctx, DGLContext{kDGLCPU, 0}, mat.indptr->dtype); CUDA_CALL(cudaEventRecord(copyEvent, stream)); // allocate workspace // 1) for w/ replacement, it's a global buffer to store cdf segments (one // segment for each row). // 2) for w/o replacement, it's used to store a-res segments (one segment for // each row with degree > num_picks) FloatType* temp = static_cast( device->AllocWorkspace(ctx, temp_len * sizeof(FloatType))); const uint64_t rand_seed = RandomEngine::ThreadLocal()->RandInt(1000000000); // select edges // the number of rows each thread block will cover constexpr int TILE_SIZE = 128 / BLOCK_SIZE; if (replace) { // with replacement. const dim3 block(BLOCK_SIZE); const dim3 grid((num_rows + TILE_SIZE - 1) / TILE_SIZE); CUDA_KERNEL_CALL( (_CSRRowWiseSampleReplaceKernel), grid, block, 0, stream, rand_seed, num_picks, num_rows, slice_rows, in_ptr, in_cols, data, prob_data, out_ptr, temp_ptr, temp, out_rows, out_cols, out_idxs); device->FreeWorkspace(ctx, temp); } else { // without replacement IdType* temp_idxs = static_cast( device->AllocWorkspace(ctx, (temp_len) * sizeof(IdType))); // Compute A-Res value. A-Res value needs to be calculated only if deg // is greater than num_picks in weighted rowwise sampling without // replacement. const dim3 block(BLOCK_SIZE); const dim3 grid((num_rows + TILE_SIZE - 1) / TILE_SIZE); CUDA_KERNEL_CALL( (_CSRAResValueKernel), grid, block, 0, stream, rand_seed, num_picks, num_rows, slice_rows, in_ptr, data, prob_data, temp_ptr, temp_idxs, temp); // sort A-Res value array. FloatType* sort_temp = static_cast( device->AllocWorkspace(ctx, temp_len * sizeof(FloatType))); IdType* sort_temp_idxs = static_cast( device->AllocWorkspace(ctx, temp_len * sizeof(IdType))); cub::DoubleBuffer sort_keys(temp, sort_temp); cub::DoubleBuffer sort_values(temp_idxs, sort_temp_idxs); void* d_temp_storage = nullptr; size_t temp_storage_bytes = 0; CUDA_CALL(cub::DeviceSegmentedSort::SortPairsDescending( d_temp_storage, temp_storage_bytes, sort_keys, sort_values, temp_len, num_rows, temp_ptr, temp_ptr + 1, stream)); d_temp_storage = device->AllocWorkspace(ctx, temp_storage_bytes); CUDA_CALL(cub::DeviceSegmentedSort::SortPairsDescending( d_temp_storage, temp_storage_bytes, sort_keys, sort_values, temp_len, num_rows, temp_ptr, temp_ptr + 1, stream)); device->FreeWorkspace(ctx, d_temp_storage); device->FreeWorkspace(ctx, temp); device->FreeWorkspace(ctx, temp_idxs); device->FreeWorkspace(ctx, sort_temp); device->FreeWorkspace(ctx, sort_temp_idxs); // select tok-num_picks as results CUDA_KERNEL_CALL( (_CSRRowWiseSampleKernel), grid, block, 0, stream, num_picks, num_rows, slice_rows, in_ptr, in_cols, data, out_ptr, temp_ptr, sort_values.Current(), out_rows, out_cols, out_idxs); } device->FreeWorkspace(ctx, temp_ptr); device->FreeWorkspace(ctx, out_ptr); // wait for copying `new_len` to finish CUDA_CALL(cudaEventSynchronize(copyEvent)); CUDA_CALL(cudaEventDestroy(copyEvent)); picked_row = picked_row.CreateView({new_len}, picked_row->dtype); picked_col = picked_col.CreateView({new_len}, picked_col->dtype); picked_idx = picked_idx.CreateView({new_len}, picked_idx->dtype); return COOMatrix( mat.num_rows, mat.num_cols, picked_row, picked_col, picked_idx); } template COOMatrix CSRRowWiseSampling( CSRMatrix mat, IdArray rows, int64_t num_picks, FloatArray prob, bool replace) { COOMatrix result; if (num_picks == -1) { // Basically this is UnitGraph::InEdges(). COOMatrix coo = CSRToCOO(CSRSliceRows(mat, rows), false); IdArray sliced_rows = IndexSelect(rows, coo.row); result = COOMatrix(mat.num_rows, mat.num_cols, sliced_rows, coo.col, coo.data); } else { result = _CSRRowWiseSampling( mat, rows, num_picks, prob, replace); } // NOTE(BarclayII): I'm removing the entries with zero probability after // sampling. Is there a better way? return _COORemoveIf(result, prob, static_cast(0)); } template COOMatrix CSRRowWiseSampling( CSRMatrix, IdArray, int64_t, FloatArray, bool); template COOMatrix CSRRowWiseSampling( CSRMatrix, IdArray, int64_t, FloatArray, bool); template COOMatrix CSRRowWiseSampling( CSRMatrix, IdArray, int64_t, FloatArray, bool); template COOMatrix CSRRowWiseSampling( CSRMatrix, IdArray, int64_t, FloatArray, bool); // These are not being called, but we instantiate them anyway to prevent missing // symbols in Debug build template COOMatrix CSRRowWiseSampling( CSRMatrix, IdArray, int64_t, FloatArray, bool); template COOMatrix CSRRowWiseSampling( CSRMatrix, IdArray, int64_t, FloatArray, bool); template COOMatrix CSRRowWiseSampling( CSRMatrix, IdArray, int64_t, FloatArray, bool); template COOMatrix CSRRowWiseSampling( CSRMatrix, IdArray, int64_t, FloatArray, bool); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cuda/sddmm.cu ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cuda/sddmm.cu * @brief SDDMM C APIs and definitions. */ #include #include "./functor.cuh" #include "./sddmm.cuh" namespace dgl { namespace aten { /** * @brief CUDA implementation of g-SDDMM on Csr format. */ template void SDDMMCsr( const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target) { SWITCH_OP(op, Op, { SWITCH_TARGET(lhs_target, rhs_target, LhsTarget, RhsTarget, { cuda::SDDMMCsr( bcast, csr, lhs, rhs, out); }); }); } /** * @brief CUDA implementation of g-SDDMM on Coo format. */ template void SDDMMCoo( const std::string& op, const BcastOff& bcast, const COOMatrix& coo, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target) { SWITCH_OP(op, Op, { SWITCH_TARGET(lhs_target, rhs_target, LhsTarget, RhsTarget, { cuda::SDDMMCoo( bcast, coo, lhs, rhs, out); }); }); } template void SDDMMCsr( const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target); template void SDDMMCsr( const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target); #if BF16_ENABLED template void SDDMMCsr( const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target); template void SDDMMCsr( const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target); #endif // BF16_ENABLED template void SDDMMCsr( const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target); template void SDDMMCsr( const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target); template void SDDMMCsr( const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target); template void SDDMMCsr( const std::string& op, const BcastOff& bcast, const CSRMatrix& csr, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target); template void SDDMMCoo( const std::string& op, const BcastOff& bcast, const COOMatrix& coo, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target); template void SDDMMCoo( const std::string& op, const BcastOff& bcast, const COOMatrix& coo, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target); #if BF16_ENABLED template void SDDMMCoo( const std::string& op, const BcastOff& bcast, const COOMatrix& coo, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target); template void SDDMMCoo( const std::string& op, const BcastOff& bcast, const COOMatrix& coo, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target); #endif // BF16_ENABLED template void SDDMMCoo( const std::string& op, const BcastOff& bcast, const COOMatrix& coo, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target); template void SDDMMCoo( const std::string& op, const BcastOff& bcast, const COOMatrix& coo, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target); template void SDDMMCoo( const std::string& op, const BcastOff& bcast, const COOMatrix& coo, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target); template void SDDMMCoo( const std::string& op, const BcastOff& bcast, const COOMatrix& coo, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target); } // namespace aten } // namespace dgl ================================================ FILE: src/array/cuda/sddmm.cuh ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cuda/sddmm.cuh * @brief SDDMM CUDA kernel function header. */ #ifndef DGL_ARRAY_CUDA_SDDMM_CUH_ #define DGL_ARRAY_CUDA_SDDMM_CUH_ #include #include "../../runtime/cuda/cuda_common.h" #include "../selector.h" #include "./functor.cuh" #include "./utils.h" #include "atomic.cuh" #include "bf16.cuh" #include "fp16.cuh" #include "functor.cuh" #include "macro.cuh" namespace dgl { using namespace cuda; namespace aten { namespace cuda { #define SWITCH_OP(op, Op, ...) \ do { \ if ((op) == "add") { \ typedef cuda::binary::Add Op; \ { __VA_ARGS__ } \ } else if ((op) == "sub") { \ typedef cuda::binary::Sub Op; \ { __VA_ARGS__ } \ } else if ((op) == "mul") { \ typedef cuda::binary::Mul Op; \ { __VA_ARGS__ } \ } else if ((op) == "div") { \ typedef cuda::binary::Div Op; \ { __VA_ARGS__ } \ } else if ((op) == "copy_lhs") { \ typedef cuda::binary::CopyLhs Op; \ { __VA_ARGS__ } \ } else if ((op) == "copy_rhs") { \ typedef cuda::binary::CopyRhs Op; \ { __VA_ARGS__ } \ } else if ((op) == "dot") { \ typedef cuda::binary::Dot Op; \ { __VA_ARGS__ } \ } else { \ LOG(FATAL) << "Unsupported SpMM/SDDMM binary operator: " << op; \ } \ } while (0) #define SWITCH_RHS(rhs_target, RhsTarget, ...) \ do { \ if ((rhs_target) == 0) { \ constexpr int RhsTarget = 0; \ { __VA_ARGS__ } \ } else if ((rhs_target) == 1) { \ constexpr int RhsTarget = 1; \ { __VA_ARGS__ } \ } else if ((rhs_target) == 2) { \ constexpr int RhsTarget = 2; \ { __VA_ARGS__ } \ } else { \ LOG(INFO) << "Invalid rhs target: " << (rhs_target); \ } \ } while (0) #define SWITCH_TARGET(lhs_target, rhs_target, LhsTarget, RhsTarget, ...) \ do { \ if ((lhs_target) == 0) { \ constexpr int LhsTarget = 0; \ SWITCH_RHS(rhs_target, RhsTarget, __VA_ARGS__); \ } else if ((lhs_target) == 1) { \ constexpr int LhsTarget = 1; \ SWITCH_RHS(rhs_target, RhsTarget, __VA_ARGS__); \ } else if ((lhs_target) == 2) { \ constexpr int LhsTarget = 2; \ SWITCH_RHS(rhs_target, RhsTarget, __VA_ARGS__); \ } else { \ LOG(INFO) << "Invalid lhs target: " << (lhs_target); \ } \ } while (0) constexpr unsigned int full_mask = 0xffffffff; /** * @brief CUDA kernel of g-SDDMM on Coo format. * @note it uses edge parallel strategy, different threadblocks (on y-axis) * is responsible for the computation on different edges. Threadblocks * on the x-axis are responsible for the computation on different * positions in feature dimension. */ template < typename Idx, typename DType, typename BinaryOp, bool UseBcast = false, bool UseIdx = false, int LhsTarget = 0, int RhsTarget = 2> __global__ void SDDMMCooKernel( const DType* __restrict__ lhs, const DType* __restrict__ rhs, DType* __restrict__ out, const Idx* __restrict__ row, const Idx* __restrict__ col, const Idx* __restrict__ edge_map, int64_t N, int64_t M, int64_t E, int64_t reduce_size, const int64_t* __restrict__ lhs_off, const int64_t* __restrict__ rhs_off, int64_t lhs_len, int64_t rhs_len, int64_t out_len) { // SDDMM with COO. Idx ty = blockIdx.y * blockDim.y + threadIdx.y; const Idx stride_y = blockDim.y * gridDim.y; while (ty < E) { const Idx src = _ldg(row + ty); const Idx dst = _ldg(col + ty); const Idx eid = UseIdx ? _ldg(edge_map + ty) : ty; const DType* lhsoff = BinaryOp::use_lhs ? (lhs + Selector::Call(src, eid, dst) * lhs_len) : nullptr; const DType* rhsoff = BinaryOp::use_rhs ? (rhs + Selector::Call(src, eid, dst) * rhs_len) : nullptr; DType* outoff = out + eid * out_len; int tx = blockIdx.x * blockDim.x + threadIdx.x; const int stride_x = blockDim.x * gridDim.x; while (tx < out_len) { const Idx lhs_add = UseBcast ? lhs_off[tx] : tx; const Idx rhs_add = UseBcast ? rhs_off[tx] : tx; DType val = BinaryOp::Call( lhsoff + lhs_add * reduce_size, rhsoff + rhs_add * reduce_size, reduce_size); outoff[tx] = val; tx += stride_x; } ty += stride_y; } } /** * @brief CUDA kernel of SDDMM-dot on Coo format, accelerated with tree * reduction. * @note it uses edge parallel strategy, different threadblocks (on y-axis) * is responsible for the computation on different edges. Threadblocks * on the x-axis are responsible for the computation on different * positions in feature dimension. */ template < typename Idx, typename DType, bool UseBcast = false, bool UseIdx = false, int LhsTarget = 0, int RhsTarget = 2> __global__ void SDDMMCooTreeReduceKernel( const DType* __restrict__ lhs, const DType* __restrict__ rhs, DType* __restrict__ out, const Idx* __restrict__ row, const Idx* __restrict__ col, const Idx* __restrict__ edge_map, int64_t N, int64_t M, int64_t E, int64_t reduce_size, const int64_t* __restrict__ lhs_off, const int64_t* __restrict__ rhs_off, int64_t lhs_len, int64_t rhs_len, int64_t out_len) { Idx ty = blockIdx.x * blockDim.y + threadIdx.y; if (ty < E) { const Idx src = _ldg(row + ty); const Idx dst = _ldg(col + ty); const Idx eid = UseIdx ? _ldg(edge_map + ty) : ty; const DType* lhsoff = lhs + Selector::Call(src, eid, dst) * lhs_len; const DType* rhsoff = rhs + Selector::Call(src, eid, dst) * rhs_len; DType* outoff = out + eid * out_len; int tx = threadIdx.x; // tx < 32 for (int i = blockIdx.y; i < out_len; i += gridDim.y) { // over output feature dimension const Idx lhs_add = UseBcast ? __ldg(lhs_off + i) : i; const Idx rhs_add = UseBcast ? __ldg(rhs_off + i) : i; DType val = reduce::Sum::zero(); for (int j = tx; j < reduce_size; j += 64) { val += lhsoff[lhs_add * reduce_size + j] * rhsoff[rhs_add * reduce_size + j]; if (j + 32 < reduce_size) val += lhsoff[lhs_add * reduce_size + j + 32] * rhsoff[rhs_add * reduce_size + j + 32]; } #pragma unroll for (int offset = 16; offset > 0; offset /= 2) val += __shfl_down_sync(full_mask, val, offset); if (tx == 0) outoff[i] = val; } } } // Binary search the row_offsets to find the source node of the edge id. template __device__ __forceinline__ Idx BinarySearchSrc(const Idx* array, Idx length, Idx eid) { Idx lo = 0, hi = length - 1; while (lo < hi) { Idx mid = (lo + hi) >> 1; if (_ldg(array + mid) <= eid) { lo = mid + 1; } else { hi = mid; } } // INVARIANT: lo == hi if (_ldg(array + hi) == eid) { return hi; } else { return hi - 1; } } /** * @brief CUDA kernel of g-SDDMM on Csr format. * @note it uses edge parallel strategy, different threadblocks (on y-axis) * is responsible for the computation on different edges. Threadblocks * on the x-axis are responsible for the computation on different * positions in feature dimension. To efficiently find the source node idx and * destination node index of an given edge on Csr format, it uses binary search * (time complexity O(log N)). */ template < typename Idx, typename DType, typename BinaryOp, bool UseBcast = false, bool UseIdx = false, int LhsTarget = 0, int RhsTarget = 2> __global__ void SDDMMCsrKernel( const DType* __restrict__ lhs, const DType* __restrict__ rhs, DType* __restrict__ out, const Idx* __restrict__ indptr, const Idx* __restrict__ indices, const Idx* __restrict__ edge_map, int64_t N, int64_t M, int64_t E, int64_t reduce_size, const int64_t* __restrict__ lhs_off, const int64_t* __restrict__ rhs_off, int64_t lhs_len, int64_t rhs_len, int64_t out_len) { // SDDMM with Csr. Idx ty = blockIdx.y * blockDim.y + threadIdx.y; const Idx stride_y = blockDim.y * gridDim.y; while (ty < E) { const Idx src = BinarySearchSrc(indptr, N + 1, ty); const Idx dst = _ldg(indices + ty); const Idx eid = UseIdx ? _ldg(edge_map + ty) : ty; int64_t tx = blockIdx.x * blockDim.x + threadIdx.x; const int64_t stride_x = blockDim.x * gridDim.x; const DType* lhsoff = BinaryOp::use_lhs ? (lhs + Selector::Call(src, eid, dst) * lhs_len) : nullptr; const DType* rhsoff = BinaryOp::use_rhs ? (rhs + Selector::Call(src, eid, dst) * rhs_len) : nullptr; DType* outoff = out + eid * out_len; while (tx < out_len) { const Idx lhs_add = UseBcast ? lhs_off[tx] : tx; const Idx rhs_add = UseBcast ? rhs_off[tx] : tx; DType val = BinaryOp::Call( lhsoff + lhs_add * reduce_size, rhsoff + rhs_add * reduce_size, reduce_size); outoff[tx] = val; tx += stride_x; } ty += stride_y; } } /** * @brief CUDA implementation of g-SDDMM on Coo format. * @param bcast Broadcast information. * @param coo The Coo matrix. * @param lhs The left hand side operand feature. * @param rhs The right hand size operand feature. * @param out The result feature on edges. */ template < typename Idx, typename DType, typename Op, int LhsTarget = 0, int RhsTarget = 2> void SDDMMCoo( const BcastOff& bcast, const COOMatrix& coo, NDArray lhs, NDArray rhs, NDArray out) { const Idx* row = coo.row.Ptr(); const Idx* col = coo.col.Ptr(); const Idx* edge_map = coo.data.Ptr(); const DType* lhs_data = lhs.Ptr(); const DType* rhs_data = rhs.Ptr(); DType* out_data = out.Ptr(); cudaStream_t stream = runtime::getCurrentCUDAStream(); int64_t *lhs_off = nullptr, *rhs_off = nullptr; int64_t len = bcast.out_len, lhs_len = bcast.lhs_len, rhs_len = bcast.rhs_len; int64_t reduce_dim = bcast.reduce_size; const int64_t nnz = coo.row->shape[0]; const bool use_idx = !IsNullArray(coo.data); if (std::is_same >::value && reduce_dim >= 32) { const int ntx = 32; // on feature dimension const int nty = 8; // on out dimension const int nbx = (nnz + nty - 1) / nty; const int nby = FindNumBlocks<'y'>(len); const dim3 nblks(nbx, nby); const dim3 nthrs(ntx, nty); BCAST_IDX_CTX_SWITCH(bcast, use_idx, out->ctx, lhs_off, rhs_off, { CUDA_KERNEL_CALL( (SDDMMCooTreeReduceKernel< Idx, DType, UseBcast, UseIdx, LhsTarget, RhsTarget>), nblks, nthrs, 0, stream, lhs_data, rhs_data, out_data, row, col, edge_map, coo.num_rows, coo.num_cols, nnz, reduce_dim, lhs_off, rhs_off, lhs_len, rhs_len, len); }); } else { const int ntx = FindNumThreads(len); const int nty = CUDA_MAX_NUM_THREADS / ntx; const int nbx = (len + ntx - 1) / ntx; const int nby = FindNumBlocks<'y'>((nnz + nty - 1) / nty); const dim3 nblks(nbx, nby); const dim3 nthrs(ntx, nty); BCAST_IDX_CTX_SWITCH(bcast, use_idx, out->ctx, lhs_off, rhs_off, { CUDA_KERNEL_CALL( (SDDMMCooKernel< Idx, DType, Op, UseBcast, UseIdx, LhsTarget, RhsTarget>), nblks, nthrs, 0, stream, lhs_data, rhs_data, out_data, row, col, edge_map, coo.num_rows, coo.num_cols, nnz, reduce_dim, lhs_off, rhs_off, lhs_len, rhs_len, len); }); } } /** * @brief CUDA implementation of g-SDDMM on Csr format. * @param bcast Broadcast information. * @param csr The Csr matrix. * @param lhs The left hand side operand feature. * @param rhs The right hand size operand feature. * @param out The result feature on edges. */ template < typename Idx, typename DType, typename Op, int LhsTarget = 0, int RhsTarget = 2> void SDDMMCsr( const BcastOff& bcast, const CSRMatrix& csr, NDArray lhs, NDArray rhs, NDArray out) { const Idx* indptr = csr.indptr.Ptr(); const Idx* indices = csr.indices.Ptr(); const Idx* edge_map = csr.data.Ptr(); const DType* lhs_data = lhs.Ptr(); const DType* rhs_data = rhs.Ptr(); DType* out_data = out.Ptr(); cudaStream_t stream = runtime::getCurrentCUDAStream(); int64_t N = csr.num_rows, M = csr.num_cols, E = csr.indices->shape[0]; int64_t *lhs_off = nullptr, *rhs_off = nullptr; int64_t len = bcast.out_len, lhs_len = bcast.lhs_len, rhs_len = bcast.rhs_len; int64_t reduce_dim = bcast.reduce_size; const int ntx = FindNumThreads(len); const int nty = CUDA_MAX_NUM_THREADS / ntx; const int nbx = (len + ntx - 1) / ntx; const int nby = FindNumBlocks<'y'>((E + nty - 1) / nty); const dim3 nblks(nbx, nby); const dim3 nthrs(ntx, nty); const bool use_idx = !IsNullArray(csr.data); BCAST_IDX_CTX_SWITCH(bcast, use_idx, out->ctx, lhs_off, rhs_off, { CUDA_KERNEL_CALL( (SDDMMCsrKernel< Idx, DType, Op, UseBcast, UseIdx, LhsTarget, RhsTarget>), nblks, nthrs, 0, stream, lhs_data, rhs_data, out_data, indptr, indices, edge_map, N, M, E, reduce_dim, lhs_off, rhs_off, lhs_len, rhs_len, len); }); } } // namespace cuda } // namespace aten } // namespace dgl #endif // DGL_ARRAY_CUDA_SDDMM_CUH_ ================================================ FILE: src/array/cuda/sddmm_hetero_coo.cu ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cuda/sddmm.cu * @brief SDDMM C APIs and definitions. */ #include #include "./sddmm.cuh" namespace dgl { namespace aten { /** * @brief CUDA implementation of g-SDDMM on heterograph using Csr format. */ template void SDDMMCooHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_coo, const std::vector& vec_lhs, const std::vector& vec_rhs, std::vector vec_out, int lhs_target, int rhs_target, const std::vector& lhs_eid, const std::vector& rhs_eid) { SWITCH_OP(op, Op, { SWITCH_TARGET(lhs_target, rhs_target, LhsTarget, RhsTarget, { /* Call SDDMM CUDA kernel for each relation type sequentially */ for (dgl_type_t etype = 0; etype < lhs_eid.size(); ++etype) { COOMatrix coo = vec_coo[etype]; NDArray lhs = vec_lhs[lhs_eid[etype]]; NDArray rhs = vec_rhs[rhs_eid[etype]]; NDArray out = vec_out[etype]; cuda::SDDMMCoo( bcast, coo, lhs, rhs, out); } }); }); } template void SDDMMCooHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_coo, const std::vector& lhs, const std::vector& rhs, std::vector out, int lhs_target, int rhs_target, const std::vector& in_eid, const std::vector& out_eid); template void SDDMMCooHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_coo, const std::vector& lhs, const std::vector& rhs, std::vector out, int lhs_target, int rhs_target, const std::vector& in_eid, const std::vector& out_eid); #if BF16_ENABLED template void SDDMMCooHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_coo, const std::vector& lhs, const std::vector& rhs, std::vector out, int lhs_target, int rhs_target, const std::vector& in_eid, const std::vector& out_eid); template void SDDMMCooHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_coo, const std::vector& lhs, const std::vector& rhs, std::vector out, int lhs_target, int rhs_target, const std::vector& in_eid, const std::vector& out_eid); #endif // BF16_ENABLED template void SDDMMCooHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_coo, const std::vector& lhs, const std::vector& rhs, std::vector out, int lhs_target, int rhs_target, const std::vector& in_eid, const std::vector& out_eid); template void SDDMMCooHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_coo, const std::vector& lhs, const std::vector& rhs, std::vector out, int lhs_target, int rhs_target, const std::vector& in_eid, const std::vector& out_eid); template void SDDMMCooHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_coo, const std::vector& lhs, const std::vector& rhs, std::vector out, int lhs_target, int rhs_target, const std::vector& in_eid, const std::vector& out_eid); template void SDDMMCooHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_coo, const std::vector& lhs, const std::vector& rhs, std::vector out, int lhs_target, int rhs_target, const std::vector& in_eid, const std::vector& out_eid); } // namespace aten } // namespace dgl ================================================ FILE: src/array/cuda/sddmm_hetero_csr.cu ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cuda/sddmm.cu * @brief SDDMM C APIs and definitions. */ #include #include "./sddmm.cuh" namespace dgl { namespace aten { /** * @brief CUDA implementation of g-SDDMM on heterograph using Csr format. */ template void SDDMMCsrHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_csr, const std::vector& vec_lhs, const std::vector& vec_rhs, std::vector vec_out, int lhs_target, int rhs_target, const std::vector& lhs_eid, const std::vector& rhs_eid) { SWITCH_OP(op, Op, { SWITCH_TARGET(lhs_target, rhs_target, LhsTarget, RhsTarget, { /* Call SDDMM CUDA kernel for each relation type sequentially */ for (dgl_type_t etype = 0; etype < lhs_eid.size(); ++etype) { CSRMatrix csr = vec_csr[etype]; NDArray lhs = vec_lhs[lhs_eid[etype]]; NDArray rhs = vec_rhs[rhs_eid[etype]]; NDArray out = vec_out[etype]; cuda::SDDMMCsr( bcast, csr, lhs, rhs, out); } }); }); } template void SDDMMCsrHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_csr, const std::vector& lhs, const std::vector& rhs, std::vector out, int lhs_target, int rhs_target, const std::vector& in_eid, const std::vector& out_eid); template void SDDMMCsrHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_csr, const std::vector& lhs, const std::vector& rhs, std::vector out, int lhs_target, int rhs_target, const std::vector& in_eid, const std::vector& out_eid); #if BF16_ENABLED template void SDDMMCsrHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_csr, const std::vector& lhs, const std::vector& rhs, std::vector out, int lhs_target, int rhs_target, const std::vector& in_eid, const std::vector& out_eid); template void SDDMMCsrHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_csr, const std::vector& lhs, const std::vector& rhs, std::vector out, int lhs_target, int rhs_target, const std::vector& in_eid, const std::vector& out_eid); #endif // BF16_ENABLED template void SDDMMCsrHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_csr, const std::vector& lhs, const std::vector& rhs, std::vector out, int lhs_target, int rhs_target, const std::vector& in_eid, const std::vector& out_eid); template void SDDMMCsrHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_csr, const std::vector& lhs, const std::vector& rhs, std::vector out, int lhs_target, int rhs_target, const std::vector& in_eid, const std::vector& out_eid); template void SDDMMCsrHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_csr, const std::vector& lhs, const std::vector& rhs, std::vector out, int lhs_target, int rhs_target, const std::vector& in_eid, const std::vector& out_eid); template void SDDMMCsrHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_csr, const std::vector& lhs, const std::vector& rhs, std::vector out, int lhs_target, int rhs_target, const std::vector& in_eid, const std::vector& out_eid); } // namespace aten } // namespace dgl ================================================ FILE: src/array/cuda/segment_reduce.cu ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cuda/segment_reduce.cu * @brief Segment reduce C APIs and definitions. */ #include #include #include "./functor.cuh" #include "./segment_reduce.cuh" #include "./utils.h" namespace dgl { using namespace cuda; namespace aten { template void SegmentReduce( const std::string& op, NDArray feat, NDArray offsets, NDArray out, NDArray arg) { if (op == "sum") { cuda::SegmentReduce>( feat, offsets, out, arg); } else if (op == "max") { cuda::SegmentReduce>( feat, offsets, out, arg); } else if (op == "min") { cuda::SegmentReduce>( feat, offsets, out, arg); } else { LOG(FATAL) << "Not implemented"; } } template void ScatterAdd(NDArray feat, NDArray idx, NDArray out) { cuda::ScatterAdd(feat, idx, out); } template void UpdateGradMinMax_hetero( const HeteroGraphPtr& g, const std::string& op, const std::vector& feat, const std::vector& idx, const std::vector& idx_etype, std::vector* out) { cuda::UpdateGradMinMax_hetero( g, op, feat, idx, idx_etype, out); } template void BackwardSegmentCmp(NDArray feat, NDArray arg, NDArray out) { cuda::BackwardSegmentCmp(feat, arg, out); } template void SegmentReduce( const std::string& op, NDArray feat, NDArray offsets, NDArray out, NDArray arg); template void SegmentReduce( const std::string& op, NDArray feat, NDArray offsets, NDArray out, NDArray arg); #if BF16_ENABLED template void SegmentReduce( const std::string& op, NDArray feat, NDArray offsets, NDArray out, NDArray arg); template void SegmentReduce( const std::string& op, NDArray feat, NDArray offsets, NDArray out, NDArray arg); #endif // BF16_ENABLED template void SegmentReduce( const std::string& op, NDArray feat, NDArray offsets, NDArray out, NDArray arg); template void SegmentReduce( const std::string& op, NDArray feat, NDArray offsets, NDArray out, NDArray arg); template void SegmentReduce( const std::string& op, NDArray feat, NDArray offsets, NDArray out, NDArray arg); template void SegmentReduce( const std::string& op, NDArray feat, NDArray offsets, NDArray out, NDArray arg); template void ScatterAdd( NDArray feat, NDArray idx, NDArray out); template void ScatterAdd( NDArray feat, NDArray idx, NDArray out); #if BF16_ENABLED template void ScatterAdd( NDArray feat, NDArray idx, NDArray out); template void ScatterAdd( NDArray feat, NDArray idx, NDArray out); #endif // BF16_ENABLED template void ScatterAdd( NDArray feat, NDArray idx, NDArray out); template void ScatterAdd( NDArray feat, NDArray idx, NDArray out); template void ScatterAdd( NDArray feat, NDArray idx, NDArray out); template void ScatterAdd( NDArray feat, NDArray idx, NDArray out); template void UpdateGradMinMax_hetero( const HeteroGraphPtr& g, const std::string& op, const std::vector& feat, const std::vector& idx, const std::vector& idx_etype, std::vector* out); template void UpdateGradMinMax_hetero( const HeteroGraphPtr& g, const std::string& op, const std::vector& feat, const std::vector& idx, const std::vector& idx_etype, std::vector* out); #if BF16_ENABLED template void UpdateGradMinMax_hetero( const HeteroGraphPtr& g, const std::string& op, const std::vector& feat, const std::vector& idx, const std::vector& idx_etype, std::vector* out); template void UpdateGradMinMax_hetero( const HeteroGraphPtr& g, const std::string& op, const std::vector& feat, const std::vector& idx, const std::vector& idx_etype, std::vector* out); #endif // BF16_ENABLED template void UpdateGradMinMax_hetero( const HeteroGraphPtr& g, const std::string& op, const std::vector& feat, const std::vector& idx, const std::vector& idx_etype, std::vector* out); template void UpdateGradMinMax_hetero( const HeteroGraphPtr& g, const std::string& op, const std::vector& feat, const std::vector& idx, const std::vector& idx_etype, std::vector* out); template void UpdateGradMinMax_hetero( const HeteroGraphPtr& g, const std::string& op, const std::vector& feat, const std::vector& idx, const std::vector& idx_etype, std::vector* out); template void UpdateGradMinMax_hetero( const HeteroGraphPtr& g, const std::string& op, const std::vector& feat, const std::vector& idx, const std::vector& idx_etype, std::vector* out); template void BackwardSegmentCmp( NDArray feat, NDArray arg, NDArray out); template void BackwardSegmentCmp( NDArray feat, NDArray arg, NDArray out); #if BF16_ENABLED template void BackwardSegmentCmp( NDArray feat, NDArray arg, NDArray out); template void BackwardSegmentCmp( NDArray feat, NDArray arg, NDArray out); #endif // BF16_ENABLED template void BackwardSegmentCmp( NDArray feat, NDArray arg, NDArray out); template void BackwardSegmentCmp( NDArray feat, NDArray arg, NDArray out); template void BackwardSegmentCmp( NDArray feat, NDArray arg, NDArray out); template void BackwardSegmentCmp( NDArray feat, NDArray arg, NDArray out); } // namespace aten } // namespace dgl ================================================ FILE: src/array/cuda/segment_reduce.cuh ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cuda/segment_reduce.cuh * @brief Segment reduce kernel function header. */ #ifndef DGL_ARRAY_CUDA_SEGMENT_REDUCE_CUH_ #define DGL_ARRAY_CUDA_SEGMENT_REDUCE_CUH_ #include #include #include "../../runtime/cuda/cuda_common.h" #include "./atomic.cuh" #include "./utils.h" namespace dgl { using namespace cuda; using namespace runtime; namespace aten { namespace cuda { /** * @brief CUDA kernel of segment reduce. * @note each blockthread is responsible for aggregation on a row * in the result tensor. */ template __global__ void SegmentReduceKernel( const DType* feat, const IdType* offsets, DType* out, IdType* arg, int64_t n, int64_t dim) { for (int row = blockIdx.x; row < n; row += gridDim.x) { int col = blockIdx.y * blockDim.x + threadIdx.x; while (col < dim) { typename accum_dtype::type local_accum = ReduceOp::zero(); IdType local_arg = -1; for (IdType i = offsets[row]; i < offsets[row + 1]; ++i) { ReduceOp::Call(&local_accum, &local_arg, feat[i * dim + col], i); } out[row * dim + col] = static_cast(local_accum); if (ReduceOp::require_arg) arg[row * dim + col] = local_arg; col += gridDim.y * blockDim.x; } } } /** * @brief CUDA kernel of scatter add. * @note each blockthread is responsible for adding a row in feature tensor * to a target row in output tensor. */ template __global__ void ScatterAddKernel( const DType* feat, const IdType* idx, DType* out, int64_t n, int64_t dim) { for (int row = blockIdx.x; row < n; row += gridDim.x) { const int write_row = idx[row]; int col = blockIdx.y * blockDim.x + threadIdx.x; while (col < dim) { cuda::AtomicAdd(out + write_row * dim + col, feat[row * dim + col]); col += gridDim.y * blockDim.x; } } } /** * @brief CUDA kernel to update gradients for reduce op max/min * @note each WARP (group of 32 threads) is responsible for adding a row in * feature tensor to a target row in output tensor. */ template __global__ void UpdateGradMinMaxHeteroKernel( const DType* feat, const IdType* idx, const IdType* idx_type, DType* out, int64_t n, int64_t dim, int type) { unsigned int tId = threadIdx.x; unsigned int laneId = tId & 31; unsigned int gId = blockIdx.x * blockDim.x + threadIdx.x; unsigned int warpId = gId >> 5; unsigned int warp_size = 32; unsigned int row = warpId; while (row < n) { for (unsigned int col = laneId; col < dim; col += warp_size) { if (type == idx_type[row * dim + col]) { const int write_row = idx[row * dim + col]; cuda::AtomicAdd(out + write_row * dim + col, feat[row * dim + col]); } } row += blockDim.x * gridDim.x; } } /** * @brief CUDA kernel of backward phase in segment min/max. * @note each blockthread is responsible for writing a row in the * result gradient tensor by lookup the ArgMin/Max for index information. */ template __global__ void BackwardSegmentCmpKernel( const DType* feat, const IdType* arg, DType* out, int64_t n, int64_t dim) { for (int row = blockIdx.x; row < n; row += gridDim.x) { int col = blockIdx.y * blockDim.x + threadIdx.x; while (col < dim) { int write_row = arg[row * dim + col]; if (write_row >= 0) { out[write_row * dim + col] = feat[row * dim + col]; } col += gridDim.y * blockDim.x; } } } /** * @brief CUDA implementation of forward phase of Segment Reduce. * @param feat The input tensor. * @param offsets The offsets tensor. * @param out The output tensor. * @param arg An auxiliary tensor storing ArgMax/Min information, */ template void SegmentReduce(NDArray feat, NDArray offsets, NDArray out, NDArray arg) { const DType* feat_data = feat.Ptr(); const IdType* offsets_data = offsets.Ptr(); DType* out_data = out.Ptr(); IdType* arg_data = arg.Ptr(); cudaStream_t stream = runtime::getCurrentCUDAStream(); int64_t n = out->shape[0]; int64_t dim = 1; for (int i = 1; i < out->ndim; ++i) dim *= out->shape[i]; const int nbx = FindNumBlocks<'x'>(n); const int ntx = FindNumThreads(dim); const int nby = FindNumBlocks<'y'>((dim + ntx - 1) / ntx); const int nty = 1; const dim3 nblks(nbx, nby); const dim3 nthrs(ntx, nty); // TODO(zihao): try cub's DeviceSegmentedReduce and compare the performance. CUDA_KERNEL_CALL( (SegmentReduceKernel), nblks, nthrs, 0, stream, feat_data, offsets_data, out_data, arg_data, n, dim); } /** * @brief CUDA implementation of Scatter Add (on first dimension). * @note math equation: out[idx[i], *] += feat[i, *] * @param feat The input tensor. * @param idx The indices tensor. * @param out The output tensor. */ template void ScatterAdd(NDArray feat, NDArray idx, NDArray out) { const DType* feat_data = feat.Ptr(); const IdType* idx_data = idx.Ptr(); DType* out_data = out.Ptr(); cudaStream_t stream = runtime::getCurrentCUDAStream(); int64_t n = feat->shape[0]; int64_t dim = 1; for (int i = 1; i < out->ndim; ++i) dim *= out->shape[i]; const int nbx = FindNumBlocks<'x'>(n); const int ntx = FindNumThreads(dim); const int nby = FindNumBlocks<'y'>((dim + ntx - 1) / ntx); const int nty = 1; const dim3 nblks(nbx, nby); const dim3 nthrs(ntx, nty); CUDA_KERNEL_CALL( (ScatterAddKernel), nblks, nthrs, 0, stream, feat_data, idx_data, out_data, n, dim); } /** * @brief CUDA implementation to update gradients for reduce op max/min * @param graph The input heterogeneous graph. * @param op The binary operator, could be `copy_u`, `copy_e'. * @param list_feat List of the input tensors. * @param list_idx List of the indices tensors. * @param list_idx_etype List of the node- or edge-type tensors. * @param list_out List of the output tensors. */ template void UpdateGradMinMax_hetero( const HeteroGraphPtr& graph, const std::string& op, const std::vector& list_feat, const std::vector& list_idx, const std::vector& list_idx_types, std::vector* list_out) { cudaStream_t stream = runtime::getCurrentCUDAStream(); if (op == "copy_lhs" || op == "copy_rhs") { std::vector> src_dst_ntypes( graph->NumVertexTypes(), std::vector()); for (dgl_type_t etype = 0; etype < graph->NumEdgeTypes(); ++etype) { auto pair = graph->meta_graph()->FindEdge(etype); const dgl_id_t dst_ntype = pair.first; // graph is reversed const dgl_id_t src_ntype = pair.second; auto same_src_dst_ntype = std::find( std::begin(src_dst_ntypes[dst_ntype]), std::end(src_dst_ntypes[dst_ntype]), src_ntype); // if op is "copy_lhs", relation type with same src and dst node type will // be updated once if (op == "copy_lhs" && same_src_dst_ntype != std::end(src_dst_ntypes[dst_ntype])) continue; src_dst_ntypes[dst_ntype].push_back(src_ntype); const DType* feat_data = list_feat[dst_ntype].Ptr(); const IdType* idx_data = list_idx[dst_ntype].Ptr(); const IdType* idx_type_data = list_idx_types[dst_ntype].Ptr(); int type = (op == "copy_lhs") ? src_ntype : etype; DType* out_data = (*list_out)[type].Ptr(); int dim = 1; for (int i = 1; i < (*list_out)[type]->ndim; ++i) dim *= (*list_out)[type]->shape[i]; int n = list_feat[dst_ntype]->shape[0]; const int th_per_row = 32; const int ntx = 128; const int nbx = FindNumBlocks<'x'>((n * th_per_row + ntx - 1) / ntx); const dim3 nblks(nbx); const dim3 nthrs(ntx); CUDA_KERNEL_CALL( (UpdateGradMinMaxHeteroKernel), nblks, nthrs, 0, stream, feat_data, idx_data, idx_type_data, out_data, n, dim, type); } } } /** * @brief CUDA implementation of backward phase of Segment Reduce with Min/Max * reducer. * @note math equation: out[arg[i, k], k] = feat[i, k] * @param feat The input * tensor. * @param arg The ArgMin/Max information, used for indexing. * @param out The output tensor. */ template void BackwardSegmentCmp(NDArray feat, NDArray arg, NDArray out) { const DType* feat_data = feat.Ptr(); const IdType* arg_data = arg.Ptr(); DType* out_data = out.Ptr(); cudaStream_t stream = runtime::getCurrentCUDAStream(); int64_t n = feat->shape[0]; int64_t dim = 1; for (int i = 1; i < out->ndim; ++i) dim *= out->shape[i]; const int nbx = FindNumBlocks<'x'>(n); const int ntx = FindNumThreads(dim); const int nby = FindNumBlocks<'y'>((dim + ntx - 1) / ntx); const int nty = 1; const dim3 nblks(nbx, nby); const dim3 nthrs(ntx, nty); CUDA_KERNEL_CALL( (BackwardSegmentCmpKernel), nblks, nthrs, 0, stream, feat_data, arg_data, out_data, n, dim); } } // namespace cuda } // namespace aten } // namespace dgl #endif // DGL_ARRAY_CUDA_SEGMENT_REDUCE_CUH_ ================================================ FILE: src/array/cuda/spmat_op_impl_coo.cu ================================================ /** * Copyright (c) 2021 by contributors. * @file array/cuda/spmat_op_impl_coo.cu * @brief COO operator GPU implementation */ #include #include #include #include #include "../../runtime/cuda/cuda_common.h" #include "./atomic.cuh" #include "./utils.h" namespace dgl { using runtime::NDArray; using namespace cuda; namespace aten { namespace impl { template __device__ void _warpReduce(volatile IdType* sdata, IdType tid) { sdata[tid] += sdata[tid + 32]; sdata[tid] += sdata[tid + 16]; sdata[tid] += sdata[tid + 8]; sdata[tid] += sdata[tid + 4]; sdata[tid] += sdata[tid + 2]; sdata[tid] += sdata[tid + 1]; } template __global__ void _COOGetRowNNZKernel( const IdType* __restrict__ row_indices, IdType* __restrict__ glb_cnt, const int64_t row_query, IdType nnz) { __shared__ IdType local_cnt[1024]; IdType tx = threadIdx.x; IdType bx = blockIdx.x; local_cnt[tx] = 0; IdType start = bx * blockDim.x; while (start < nnz) { if (start + tx < nnz) local_cnt[tx] = (row_indices[start + tx] == row_query); __syncthreads(); if (tx < 512) { local_cnt[tx] += local_cnt[tx + 512]; __syncthreads(); } if (tx < 256) { local_cnt[tx] += local_cnt[tx + 256]; __syncthreads(); } if (tx < 128) { local_cnt[tx] += local_cnt[tx + 128]; __syncthreads(); } if (tx < 64) { local_cnt[tx] += local_cnt[tx + 64]; __syncthreads(); } if (tx < 32) { _warpReduce(local_cnt, tx); } if (tx == 0) { cuda::AtomicAdd(glb_cnt, local_cnt[tx]); } start += blockDim.x * gridDim.x; } } template int64_t COOGetRowNNZ(COOMatrix coo, int64_t row) { cudaStream_t stream = runtime::getCurrentCUDAStream(); const auto& ctx = coo.row->ctx; IdType nnz = coo.row->shape[0]; IdType nt = 1024; IdType nb = dgl::cuda::FindNumBlocks<'x'>((nnz + nt - 1) / nt); NDArray rst = NDArray::Empty({1}, coo.row->dtype, coo.row->ctx); _Fill(rst.Ptr(), 1, IdType(0)); CUDA_KERNEL_CALL( _COOGetRowNNZKernel, nb, nt, 0, stream, coo.row.Ptr(), rst.Ptr(), row, nnz); rst = rst.CopyTo(DGLContext{kDGLCPU, 0}); return *rst.Ptr(); } template int64_t COOGetRowNNZ(COOMatrix, int64_t); template int64_t COOGetRowNNZ(COOMatrix, int64_t); template __global__ void _COOGetAllRowNNZKernel( const IdType* __restrict__ row_indices, IdType* __restrict__ glb_cnts, IdType nnz) { IdType eid = blockIdx.x * blockDim.x + threadIdx.x; while (eid < nnz) { IdType row = row_indices[eid]; cuda::AtomicAdd(glb_cnts + row, IdType(1)); eid += blockDim.x * gridDim.x; } } template NDArray COOGetRowNNZ(COOMatrix coo, NDArray rows) { cudaStream_t stream = runtime::getCurrentCUDAStream(); const auto& ctx = coo.row->ctx; IdType nnz = coo.row->shape[0]; IdType num_rows = coo.num_rows; IdType num_queries = rows->shape[0]; if (num_queries == 1) { auto rows_cpu = rows.CopyTo(DGLContext{kDGLCPU, 0}); int64_t row = *rows_cpu.Ptr(); IdType nt = 1024; IdType nb = dgl::cuda::FindNumBlocks<'x'>((nnz + nt - 1) / nt); NDArray rst = NDArray::Empty({1}, coo.row->dtype, coo.row->ctx); _Fill(rst.Ptr(), 1, IdType(0)); CUDA_KERNEL_CALL( _COOGetRowNNZKernel, nb, nt, 0, stream, coo.row.Ptr(), rst.Ptr(), row, nnz); return rst; } else { IdType nt = 1024; IdType nb = dgl::cuda::FindNumBlocks<'x'>((nnz + nt - 1) / nt); NDArray in_degrees = NDArray::Empty({num_rows}, rows->dtype, rows->ctx); _Fill(in_degrees.Ptr(), num_rows, IdType(0)); CUDA_KERNEL_CALL( _COOGetAllRowNNZKernel, nb, nt, 0, stream, coo.row.Ptr(), in_degrees.Ptr(), nnz); return IndexSelect(in_degrees, rows); } } template NDArray COOGetRowNNZ(COOMatrix, NDArray); template NDArray COOGetRowNNZ(COOMatrix, NDArray); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cuda/spmat_op_impl_csr.cu ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cuda/spmat_op_impl_csr.cu * @brief CSR operator CPU implementation */ #include #include #include #include #include #include #include #include "../../runtime/cuda/cuda_common.h" #include "./atomic.cuh" #include "./utils.h" namespace dgl { using runtime::NDArray; using namespace cuda; namespace aten { namespace impl { ///////////////////////////// CSRIsNonZero ///////////////////////////// template bool CSRIsNonZero(CSRMatrix csr, int64_t row, int64_t col) { cudaStream_t stream = runtime::getCurrentCUDAStream(); const auto& ctx = csr.indptr->ctx; IdArray rows = aten::VecToIdArray({row}, sizeof(IdType) * 8, ctx); IdArray cols = aten::VecToIdArray({col}, sizeof(IdType) * 8, ctx); rows = rows.CopyTo(ctx); cols = cols.CopyTo(ctx); IdArray out = aten::NewIdArray(1, ctx, sizeof(IdType) * 8); const IdType* data = nullptr; // TODO(minjie): use binary search for sorted csr CUDA_KERNEL_CALL( dgl::cuda::_LinearSearchKernel, 1, 1, 0, stream, csr.indptr.Ptr(), csr.indices.Ptr(), data, rows.Ptr(), cols.Ptr(), 1, 1, 1, static_cast(nullptr), static_cast(-1), out.Ptr()); out = out.CopyTo(DGLContext{kDGLCPU, 0}); return *out.Ptr() != -1; } template bool CSRIsNonZero(CSRMatrix, int64_t, int64_t); template bool CSRIsNonZero(CSRMatrix, int64_t, int64_t); template NDArray CSRIsNonZero(CSRMatrix csr, NDArray row, NDArray col) { const auto rowlen = row->shape[0]; const auto collen = col->shape[0]; const auto rstlen = std::max(rowlen, collen); NDArray rst = NDArray::Empty({rstlen}, row->dtype, row->ctx); if (rstlen == 0) return rst; const int64_t row_stride = (rowlen == 1 && collen != 1) ? 0 : 1; const int64_t col_stride = (collen == 1 && rowlen != 1) ? 0 : 1; cudaStream_t stream = runtime::getCurrentCUDAStream(); const int nt = dgl::cuda::FindNumThreads(rstlen); const int nb = (rstlen + nt - 1) / nt; const IdType* data = nullptr; const IdType* indptr_data = static_cast(GetDevicePointer(csr.indptr)); const IdType* indices_data = static_cast(GetDevicePointer(csr.indices)); // TODO(minjie): use binary search for sorted csr CUDA_KERNEL_CALL( dgl::cuda::_LinearSearchKernel, nb, nt, 0, stream, indptr_data, indices_data, data, row.Ptr(), col.Ptr(), row_stride, col_stride, rstlen, static_cast(nullptr), static_cast(-1), rst.Ptr()); return rst != -1; } template NDArray CSRIsNonZero(CSRMatrix, NDArray, NDArray); template NDArray CSRIsNonZero(CSRMatrix, NDArray, NDArray); ///////////////////////////// CSRHasDuplicate ///////////////////////////// /** * @brief Check whether each row does not have any duplicate entries. * Assume the CSR is sorted. */ template __global__ void _SegmentHasNoDuplicate( const IdType* indptr, const IdType* indices, int64_t num_rows, int8_t* flags) { int tx = blockIdx.x * blockDim.x + threadIdx.x; const int stride_x = gridDim.x * blockDim.x; while (tx < num_rows) { bool f = true; for (IdType i = indptr[tx] + 1; f && i < indptr[tx + 1]; ++i) { f = (indices[i - 1] != indices[i]); } flags[tx] = static_cast(f); tx += stride_x; } } template bool CSRHasDuplicate(CSRMatrix csr) { if (!csr.sorted) csr = CSRSort(csr); const auto& ctx = csr.indptr->ctx; cudaStream_t stream = runtime::getCurrentCUDAStream(); auto device = runtime::DeviceAPI::Get(ctx); // We allocate a workspace of num_rows bytes. It wastes a little bit memory // but should be fine. int8_t* flags = static_cast(device->AllocWorkspace(ctx, csr.num_rows)); const int nt = dgl::cuda::FindNumThreads(csr.num_rows); const int nb = (csr.num_rows + nt - 1) / nt; CUDA_KERNEL_CALL( _SegmentHasNoDuplicate, nb, nt, 0, stream, csr.indptr.Ptr(), csr.indices.Ptr(), csr.num_rows, flags); bool ret = dgl::cuda::AllTrue(flags, csr.num_rows, ctx); device->FreeWorkspace(ctx, flags); return !ret; } template bool CSRHasDuplicate(CSRMatrix csr); template bool CSRHasDuplicate(CSRMatrix csr); ///////////////////////////// CSRGetRowNNZ ///////////////////////////// template int64_t CSRGetRowNNZ(CSRMatrix csr, int64_t row) { const IdType cur = aten::IndexSelect(csr.indptr, row); const IdType next = aten::IndexSelect(csr.indptr, row + 1); return next - cur; } template int64_t CSRGetRowNNZ(CSRMatrix, int64_t); template int64_t CSRGetRowNNZ(CSRMatrix, int64_t); template __global__ void _CSRGetRowNNZKernel( const IdType* vid, const IdType* indptr, IdType* out, int64_t length) { int tx = blockIdx.x * blockDim.x + threadIdx.x; int stride_x = gridDim.x * blockDim.x; while (tx < length) { const IdType vv = vid[tx]; out[tx] = indptr[vv + 1] - indptr[vv]; tx += stride_x; } } template NDArray CSRGetRowNNZ(CSRMatrix csr, NDArray rows) { cudaStream_t stream = runtime::getCurrentCUDAStream(); const auto len = rows->shape[0]; const IdType* vid_data = rows.Ptr(); const IdType* indptr_data = static_cast(GetDevicePointer(csr.indptr)); NDArray rst = NDArray::Empty({len}, rows->dtype, rows->ctx); IdType* rst_data = static_cast(rst->data); const int nt = dgl::cuda::FindNumThreads(len); const int nb = (len + nt - 1) / nt; CUDA_KERNEL_CALL( _CSRGetRowNNZKernel, nb, nt, 0, stream, vid_data, indptr_data, rst_data, len); return rst; } template NDArray CSRGetRowNNZ(CSRMatrix, NDArray); template NDArray CSRGetRowNNZ(CSRMatrix, NDArray); ////////////////////////// CSRGetRowColumnIndices ////////////////////////////// template NDArray CSRGetRowColumnIndices(CSRMatrix csr, int64_t row) { const int64_t len = impl::CSRGetRowNNZ(csr, row); const int64_t offset = aten::IndexSelect(csr.indptr, row) * sizeof(IdType); return csr.indices.CreateView({len}, csr.indices->dtype, offset); } template NDArray CSRGetRowColumnIndices(CSRMatrix, int64_t); template NDArray CSRGetRowColumnIndices(CSRMatrix, int64_t); ///////////////////////////// CSRGetRowData ///////////////////////////// template NDArray CSRGetRowData(CSRMatrix csr, int64_t row) { const int64_t len = impl::CSRGetRowNNZ(csr, row); const int64_t offset = aten::IndexSelect(csr.indptr, row) * sizeof(IdType); if (aten::CSRHasData(csr)) return csr.data.CreateView({len}, csr.data->dtype, offset); else return aten::Range( offset, offset + len, csr.indptr->dtype.bits, csr.indptr->ctx); } template NDArray CSRGetRowData(CSRMatrix, int64_t); template NDArray CSRGetRowData(CSRMatrix, int64_t); ///////////////////////////// CSRSliceRows ///////////////////////////// template CSRMatrix CSRSliceRows(CSRMatrix csr, int64_t start, int64_t end) { const int64_t num_rows = end - start; const IdType st_pos = aten::IndexSelect(csr.indptr, start); const IdType ed_pos = aten::IndexSelect(csr.indptr, end); const IdType nnz = ed_pos - st_pos; IdArray ret_indptr = aten::IndexSelect(csr.indptr, start, end + 1) - st_pos; // indices and data can be view arrays IdArray ret_indices = csr.indices.CreateView( {nnz}, csr.indices->dtype, st_pos * sizeof(IdType)); IdArray ret_data; if (CSRHasData(csr)) ret_data = csr.data.CreateView({nnz}, csr.data->dtype, st_pos * sizeof(IdType)); else ret_data = aten::Range(st_pos, ed_pos, csr.indptr->dtype.bits, csr.indptr->ctx); return CSRMatrix( num_rows, csr.num_cols, ret_indptr, ret_indices, ret_data, csr.sorted); } template CSRMatrix CSRSliceRows(CSRMatrix, int64_t, int64_t); template CSRMatrix CSRSliceRows(CSRMatrix, int64_t, int64_t); /** * @brief Copy data segment to output buffers * * For the i^th row r = row[i], copy the data from indptr[r] ~ indptr[r+1] * to the out_data from out_indptr[i] ~ out_indptr[i+1] * * If the provided `data` array is nullptr, write the read index to the * out_data. * */ template __global__ void _SegmentCopyKernel( const IdType* indptr, const DType* data, const IdType* row, int64_t length, int64_t n_row, const IdType* out_indptr, DType* out_data) { IdType tx = static_cast(blockIdx.x) * blockDim.x + threadIdx.x; const int stride_x = gridDim.x * blockDim.x; while (tx < length) { IdType rpos = dgl::cuda::_UpperBound(out_indptr, n_row, tx) - 1; IdType rofs = tx - out_indptr[rpos]; const IdType u = row[rpos]; out_data[tx] = data ? data[indptr[u] + rofs] : indptr[u] + rofs; tx += stride_x; } } template CSRMatrix CSRSliceRows(CSRMatrix csr, NDArray rows) { cudaStream_t stream = runtime::getCurrentCUDAStream(); const int64_t len = rows->shape[0]; IdArray ret_indptr = aten::CumSum(aten::CSRGetRowNNZ(csr, rows), true); const int64_t nnz = aten::IndexSelect(ret_indptr, len); const int nt = 256; // for better GPU usage of small invocations const int nb = (nnz + nt - 1) / nt; // Copy indices. IdArray ret_indices = NDArray::Empty({nnz}, csr.indptr->dtype, rows->ctx); const IdType* indptr_data = static_cast(GetDevicePointer(csr.indptr)); const IdType* indices_data = static_cast(GetDevicePointer(csr.indices)); const IdType* data_data = CSRHasData(csr) ? static_cast(GetDevicePointer(csr.data)) : nullptr; CUDA_KERNEL_CALL( _SegmentCopyKernel, nb, nt, 0, stream, indptr_data, indices_data, rows.Ptr(), nnz, len, ret_indptr.Ptr(), ret_indices.Ptr()); // Copy data. IdArray ret_data = NDArray::Empty({nnz}, csr.indptr->dtype, rows->ctx); CUDA_KERNEL_CALL( _SegmentCopyKernel, nb, nt, 0, stream, indptr_data, data_data, rows.Ptr(), nnz, len, ret_indptr.Ptr(), ret_data.Ptr()); return CSRMatrix( len, csr.num_cols, ret_indptr, ret_indices, ret_data, csr.sorted); } template CSRMatrix CSRSliceRows(CSRMatrix, NDArray); template CSRMatrix CSRSliceRows(CSRMatrix, NDArray); ///////////////////////////// CSRGetDataAndIndices ///////////////////////////// /** * @brief Generate a 0-1 mask for each index that hits the provided (row, col) * index. * * Examples: * Given a CSR matrix (with duplicate entries) as follows: * [[0, 1, 2, 0, 0], * [1, 0, 0, 0, 0], * [0, 0, 1, 1, 0], * [0, 0, 0, 0, 0]] * Given rows: [0, 1], cols: [0, 2, 3] * The result mask is: [0, 1, 1, 1, 0, 0] */ template __global__ void _SegmentMaskKernel( const IdType* indptr, const IdType* indices, const IdType* row, const IdType* col, int64_t row_stride, int64_t col_stride, int64_t length, IdType* mask) { int tx = blockIdx.x * blockDim.x + threadIdx.x; const int stride_x = gridDim.x * blockDim.x; while (tx < length) { int rpos = tx * row_stride, cpos = tx * col_stride; const IdType r = row[rpos], c = col[cpos]; for (IdType i = indptr[r]; i < indptr[r + 1]; ++i) { if (indices[i] == c) { mask[i] = 1; } } tx += stride_x; } } /** * @brief Search for the insertion positions for needle in the hay. * * The hay is a list of sorted elements and the result is the insertion position * of each needle so that the insertion still gives sorted order. * * It essentially perform binary search to find lower bound for each needle * elements. Require the largest elements in the hay is larger than the given * needle elements. Commonly used in searching for row IDs of a given set of * coordinates. */ template __global__ void _SortedSearchKernel( const IdType* hay, int64_t hay_size, const IdType* needles, int64_t num_needles, IdType* pos) { int tx = blockIdx.x * blockDim.x + threadIdx.x; const int stride_x = gridDim.x * blockDim.x; while (tx < num_needles) { const IdType ele = needles[tx]; // binary search IdType lo = 0, hi = hay_size - 1; while (lo < hi) { IdType mid = (lo + hi) >> 1; if (hay[mid] <= ele) { lo = mid + 1; } else { hi = mid; } } pos[tx] = (hay[hi] == ele) ? hi : hi - 1; tx += stride_x; } } template std::vector CSRGetDataAndIndices( CSRMatrix csr, NDArray row, NDArray col) { const auto rowlen = row->shape[0]; const auto collen = col->shape[0]; const auto len = std::max(rowlen, collen); if (len == 0) return {NullArray(), NullArray(), NullArray()}; const auto& ctx = row->ctx; const auto nbits = row->dtype.bits; const int64_t nnz = csr.indices->shape[0]; const int64_t row_stride = (rowlen == 1 && collen != 1) ? 0 : 1; const int64_t col_stride = (collen == 1 && rowlen != 1) ? 0 : 1; cudaStream_t stream = runtime::getCurrentCUDAStream(); const IdType* indptr_data = static_cast(GetDevicePointer(csr.indptr)); const IdType* indices_data = static_cast(GetDevicePointer(csr.indices)); // Generate a 0-1 mask for matched (row, col) positions. IdArray mask = Full(0, nnz, nbits, ctx); const int nt = dgl::cuda::FindNumThreads(len); const int nb = (len + nt - 1) / nt; CUDA_KERNEL_CALL( _SegmentMaskKernel, nb, nt, 0, stream, indptr_data, indices_data, row.Ptr(), col.Ptr(), row_stride, col_stride, len, mask.Ptr()); IdArray idx = AsNumBits(NonZero(mask), nbits); if (idx->shape[0] == 0) // No data. Return three empty arrays. return {idx, idx, idx}; // Search for row index IdArray ret_row = NewIdArray(idx->shape[0], ctx, nbits); const int nt2 = dgl::cuda::FindNumThreads(idx->shape[0]); const int nb2 = (idx->shape[0] + nt - 1) / nt; CUDA_KERNEL_CALL( _SortedSearchKernel, nb2, nt2, 0, stream, indptr_data, csr.num_rows, idx.Ptr(), idx->shape[0], ret_row.Ptr()); // Column & data can be obtained by index select. IdArray ret_col = IndexSelect(csr.indices, idx); IdArray ret_data = CSRHasData(csr) ? IndexSelect(csr.data, idx) : idx; return {ret_row, ret_col, ret_data}; } template std::vector CSRGetDataAndIndices( CSRMatrix csr, NDArray rows, NDArray cols); template std::vector CSRGetDataAndIndices( CSRMatrix csr, NDArray rows, NDArray cols); ///////////////////////////// CSRSliceMatrix ///////////////////////////// int64_t _UpPower(int64_t numel) { uint64_t ret = 1 << static_cast(std::log2(numel) + 1); return ret; } /** * @brief Thomas Wang's 32 bit Mix Function. * Source link: https://gist.github.com/badboy/6267743 */ __device__ inline uint32_t _Hash32Shift(uint32_t key) { key = ~key + (key << 15); key = key ^ (key >> 12); key = key + (key << 2); key = key ^ (key >> 4); key = key * 2057; key = key ^ (key >> 16); return key; } /** * @brief Thomas Wang's 64 bit Mix Function. * Source link: https://gist.github.com/badboy/6267743 */ __device__ inline uint64_t _Hash64Shift(uint64_t key) { key = (~key) + (key << 21); key = key ^ (key >> 24); key = (key + (key << 3)) + (key << 8); key = key ^ (key >> 14); key = (key + (key << 2)) + (key << 4); key = key ^ (key >> 28); key = key + (key << 31); return key; } /** * @brief A hashmap designed for CSRSliceMatrix, similar in function to set. For * performance, it can only be created and called in the cuda kernel. */ template struct NodeQueryHashmap { __device__ inline NodeQueryHashmap(IdType* Kptr, size_t numel) : kptr_(Kptr), capacity_(numel) {} /** * @brief Insert a key. It must be called by cuda threads. * * @param key The key to be inserted. */ __device__ inline void Insert(IdType key) { uint32_t delta = 1; uint32_t pos = Hash(key); IdType prev = dgl::aten::cuda::AtomicCAS(&kptr_[pos], kEmptyKey_, key); while (prev != key && prev != kEmptyKey_) { pos = Hash(pos + delta); delta += 1; prev = dgl::aten::cuda::AtomicCAS(&kptr_[pos], kEmptyKey_, key); } } /** * @brief Check whether a key exists within the hashtable. It must be called * by cuda threads. * * @param key The key to check for. * @return True if the key exists in the hashtable. */ __device__ inline bool Query(IdType key) { uint32_t delta = 1; uint32_t pos = Hash(key); while (true) { if (kptr_[pos] == key) return true; if (kptr_[pos] == kEmptyKey_) return false; pos = Hash(pos + delta); delta += 1; } return false; } __device__ inline uint32_t Hash(int32_t key) { return _Hash32Shift(key) & (capacity_ - 1); } __device__ inline uint32_t Hash(uint32_t key) { return _Hash32Shift(key) & (capacity_ - 1); } __device__ inline uint32_t Hash(int64_t key) { return static_cast(_Hash64Shift(key)) & (capacity_ - 1); } __device__ inline uint32_t Hash(uint64_t key) { return static_cast(_Hash64Shift(key)) & (capacity_ - 1); } IdType kEmptyKey_{-1}; IdType* kptr_; uint32_t capacity_{0}; }; /** * @brief Generate a 0-1 mask for each index whose column is in the provided * hashmap. It also counts the number of masked values per row. * * @tparam IdType The ID type used for matrices. * @tparam WARP_SIZE The number of cuda threads in a cuda warp. * @tparam BLOCK_WARPS The number of warps in a cuda block. * @tparam TILE_SIZE The number of rows covered by each threadblock. */ template __global__ void _SegmentMaskColKernel( const IdType* indptr, const IdType* indices, int64_t num_rows, IdType* hashmap_buffer, int64_t buffer_size, IdType* mask, IdType* count) { assert(blockDim.x == WARP_SIZE); assert(blockDim.y == BLOCK_WARPS); int warp_id = threadIdx.y; int laneid = threadIdx.x; IdType out_row = blockIdx.x * TILE_SIZE + threadIdx.y; IdType last_row = min(static_cast((blockIdx.x + 1) * TILE_SIZE), static_cast(num_rows)); NodeQueryHashmap hashmap(hashmap_buffer, buffer_size); typedef cub::WarpReduce WarpReduce; __shared__ typename WarpReduce::TempStorage temp_storage[BLOCK_WARPS]; while (out_row < last_row) { IdType local_count = 0; IdType in_row_start = indptr[out_row]; IdType in_row_end = indptr[out_row + 1]; for (int idx = in_row_start + laneid; idx < in_row_end; idx += WARP_SIZE) { bool is_in = hashmap.Query(indices[idx]); if (is_in) { local_count += 1; mask[idx] = 1; } } IdType reduce_count = WarpReduce(temp_storage[warp_id]).Sum(local_count); if (laneid == 0) { count[out_row] = reduce_count; } out_row += BLOCK_WARPS; } } template CSRMatrix CSRSliceMatrix( CSRMatrix csr, runtime::NDArray rows, runtime::NDArray cols) { cudaStream_t stream = runtime::getCurrentCUDAStream(); const auto& ctx = rows->ctx; const auto& dtype = rows->dtype; const auto nbits = dtype.bits; const int64_t new_nrows = rows->shape[0]; const int64_t new_ncols = cols->shape[0]; if (new_nrows == 0 || new_ncols == 0) return CSRMatrix( new_nrows, new_ncols, Full(0, new_nrows + 1, nbits, ctx), NullArray(dtype, ctx), NullArray(dtype, ctx)); // First slice rows csr = CSRSliceRows(csr, rows); if (csr.indices->shape[0] == 0) return CSRMatrix( new_nrows, new_ncols, Full(0, new_nrows + 1, nbits, ctx), NullArray(dtype, ctx), NullArray(dtype, ctx)); // Generate a 0-1 mask for matched (row, col) positions. IdArray mask = Full(0, csr.indices->shape[0], nbits, ctx); // A count for how many masked values per row. IdArray count = NewIdArray(csr.num_rows, ctx, nbits); CUDA_CALL( cudaMemset(count.Ptr(), 0, sizeof(IdType) * (csr.num_rows))); // Generate a NodeQueryHashmap buffer. The key of the hashmap is col. // For performance, the load factor of the hashmap is in (0.25, 0.5); // Because num_cols is usually less than 1 Million (on GPU), the // memory overhead is not significant (less than 31MB) at a low load factor. int64_t buffer_size = _UpPower(new_ncols) * 2; IdArray hashmap_buffer = Full(-1, buffer_size, nbits, ctx); using it = thrust::counting_iterator; runtime::CUDAWorkspaceAllocator allocator(ctx); const auto exec_policy = thrust::cuda::par_nosync(allocator).on(stream); thrust::for_each( exec_policy, it(0), it(new_ncols), [key = cols.Ptr(), buffer = hashmap_buffer.Ptr(), buffer_size] __device__(int64_t i) { NodeQueryHashmap hashmap(buffer, buffer_size); hashmap.Insert(key[i]); }); const IdType* indptr_data = static_cast(GetDevicePointer(csr.indptr)); const IdType* indices_data = static_cast(GetDevicePointer(csr.indices)); // Execute SegmentMaskColKernel const int64_t num_rows = csr.num_rows; constexpr int WARP_SIZE = 32; // With a simple fine-tuning, TILE_SIZE=16 gives a good performance. constexpr int TILE_SIZE = 16; constexpr int BLOCK_WARPS = CUDA_MAX_NUM_THREADS / WARP_SIZE; IdType nb = dgl::cuda::FindNumBlocks<'x'>((num_rows + TILE_SIZE - 1) / TILE_SIZE); const dim3 nthrs(WARP_SIZE, BLOCK_WARPS); const dim3 nblks(nb); CUDA_KERNEL_CALL( (_SegmentMaskColKernel), nblks, nthrs, 0, stream, indptr_data, indices_data, num_rows, hashmap_buffer.Ptr(), buffer_size, mask.Ptr(), count.Ptr()); IdArray idx = AsNumBits(NonZero(mask), nbits); if (idx->shape[0] == 0) return CSRMatrix( new_nrows, new_ncols, Full(0, new_nrows + 1, nbits, ctx), NullArray(dtype, ctx), NullArray(dtype, ctx)); // Indptr needs to be adjusted according to the new nnz per row. IdArray ret_indptr = CumSum(count, true); // Column & data can be obtained by index select. IdArray ret_col = IndexSelect(csr.indices, idx); IdArray ret_data = CSRHasData(csr) ? IndexSelect(csr.data, idx) : idx; // Relabel column IdArray col_hash = NewIdArray(csr.num_cols, ctx, nbits); Scatter_(cols, Range(0, cols->shape[0], nbits, ctx), col_hash); ret_col = IndexSelect(col_hash, ret_col); return CSRMatrix(new_nrows, new_ncols, ret_indptr, ret_col, ret_data); } template CSRMatrix CSRSliceMatrix( CSRMatrix csr, runtime::NDArray rows, runtime::NDArray cols); template CSRMatrix CSRSliceMatrix( CSRMatrix csr, runtime::NDArray rows, runtime::NDArray cols); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cuda/spmm.cu ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cuda/spmm.cu * @brief SPMM C APIs and definitions. */ #include #include #include "../../runtime/cuda/cuda_common.h" #include "./functor.cuh" #include "./ge_spmm.cuh" #include "./spmm.cuh" namespace dgl { using namespace cuda; namespace aten { /** * @brief CUDA implementation of g-SpMM on Csr format. * @note use cusparse if the reduce operator is `sum` and there is * no broadcast, use dgl's kernel in other cases. */ template void SpMMCsr( const std::string& op, const std::string& reduce, const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux) { bool is_scalar_efeat = efeat.NumElements() == csr.indices->shape[0]; bool use_efeat = op != "copy_lhs"; bool use_deterministic_alg_only = false; if (NULL != std::getenv("USE_DETERMINISTIC_ALG")) use_deterministic_alg_only = true; if (reduce == "sum") { bool more_nnz = (csr.indices->shape[0] > csr.num_rows * csr.num_cols); if (op == "copy_lhs" && cusparse_available(more_nnz)) { // cusparse int64_t x_length = 1; for (int i = 1; i < ufeat->ndim; ++i) x_length *= ufeat->shape[i]; CusparseCsrmm2( ufeat->ctx, csr, static_cast(ufeat->data), nullptr, static_cast(out->data), x_length, use_deterministic_alg_only); } else if ( op == "mul" && is_scalar_efeat && cusparse_available(more_nnz)) { // cusparse int64_t x_length = 1; for (int i = 1; i < ufeat->ndim; ++i) x_length *= ufeat->shape[i]; if (!IsNullArray(csr.data)) { efeat = IndexSelect(efeat, csr.data); } CusparseCsrmm2( ufeat->ctx, csr, static_cast(ufeat->data), static_cast(efeat->data), static_cast(out->data), x_length, use_deterministic_alg_only); } else { // general kernel SWITCH_OP(op, Op, { cuda::SpMMCsr >( bcast, csr, ufeat, efeat, out, NullArray(), NullArray()); }); } } else if (reduce == "max") { SWITCH_OP(op, Op, { cuda::SpMMCsr >( bcast, csr, ufeat, efeat, out, out_aux[0], out_aux[1]); }); } else if (reduce == "min") { SWITCH_OP(op, Op, { cuda::SpMMCsr >( bcast, csr, ufeat, efeat, out, out_aux[0], out_aux[1]); }); } else { LOG(FATAL) << "Not implemented"; } } /** * @brief CUDA implementation of g-SpMM on Coo format. */ template void SpMMCoo( const std::string& op, const std::string& reduce, const BcastOff& bcast, const COOMatrix& coo, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux) { if (reduce == "sum") { SWITCH_OP(op, Op, { cuda::SpMMCoo >( bcast, coo, ufeat, efeat, out, NullArray(), NullArray()); }); } else if (reduce == "max") { SWITCH_OP(op, Op, { cuda::SpMMCoo >( bcast, coo, ufeat, efeat, out, out_aux[0], out_aux[1]); }); } else if (reduce == "min") { SWITCH_OP(op, Op, { cuda::SpMMCoo >( bcast, coo, ufeat, efeat, out, out_aux[0], out_aux[1]); }); } else { LOG(FATAL) << "Not implemented"; } } template void SpMMCsr( const std::string& op, const std::string& reduce, const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); template void SpMMCsr( const std::string& op, const std::string& reduce, const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); #if BF16_ENABLED template void SpMMCsr( const std::string& op, const std::string& reduce, const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); template void SpMMCsr( const std::string& op, const std::string& reduce, const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); #endif // BF16_ENABLED template void SpMMCsr( const std::string& op, const std::string& reduce, const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); template void SpMMCsr( const std::string& op, const std::string& reduce, const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); template void SpMMCsr( const std::string& op, const std::string& reduce, const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); template void SpMMCsr( const std::string& op, const std::string& reduce, const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); template void SpMMCoo( const std::string& op, const std::string& reduce, const BcastOff& bcast, const COOMatrix& coo, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); template void SpMMCoo( const std::string& op, const std::string& reduce, const BcastOff& bcast, const COOMatrix& coo, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); #if BF16_ENABLED template void SpMMCoo( const std::string& op, const std::string& reduce, const BcastOff& bcast, const COOMatrix& coo, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); template void SpMMCoo( const std::string& op, const std::string& reduce, const BcastOff& bcast, const COOMatrix& coo, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); #endif // BF16_ENABLED template void SpMMCoo( const std::string& op, const std::string& reduce, const BcastOff& bcast, const COOMatrix& coo, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); template void SpMMCoo( const std::string& op, const std::string& reduce, const BcastOff& bcast, const COOMatrix& coo, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); template void SpMMCoo( const std::string& op, const std::string& reduce, const BcastOff& bcast, const COOMatrix& coo, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); template void SpMMCoo( const std::string& op, const std::string& reduce, const BcastOff& bcast, const COOMatrix& coo, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); } // namespace aten } // namespace dgl ================================================ FILE: src/array/cuda/spmm.cuh ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cuda/spmm.cuh * @brief SPMM CUDA kernel function header. */ #ifndef DGL_ARRAY_CUDA_SPMM_CUH_ #define DGL_ARRAY_CUDA_SPMM_CUH_ #include #include #include "../../runtime/cuda/cuda_common.h" #include "./utils.h" #include "atomic.cuh" #include "bf16.cuh" #include "fp16.cuh" #include "macro.cuh" namespace dgl { using namespace cuda; namespace aten { /** * @brief Determine whether cusparse SpMM function is applicable. */ template inline bool cusparse_available(bool more_nnz_than_matrix_size) { #if CUDART_VERSION < 11000 if (std::is_same::value && (std::is_same::value || std::is_same::value)) return true; return false; #else if (std::is_same::value || std::is_same::value) return false; // cusparse's SpMM on fp16 is slow, temporally disabled. // If the CSR matrix has more NNZ than matrix size, we should not use // cuSPARSE 11.1. return !more_nnz_than_matrix_size; #endif } namespace { /** @brief Call cuBLAS geam API for transpose operation for float and double. */ template cublasStatus_t Xgeam( cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, const DType* alpha, const DType* A, int lda, const DType* beta, const DType* B, int ldb, DType* C, int ldc) { LOG(FATAL) << "Not supported dtype"; return CUBLAS_STATUS_EXECUTION_FAILED; } template <> cublasStatus_t Xgeam<__half>( cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, const __half* alpha, const __half* A, int lda, const __half* beta, const __half* B, int ldb, __half* C, int ldc) { // TODO(ndickson): There is no cublasHgeam, so a different // implementation would be required. LOG(FATAL) << "Xgeam does not support dtype half (FP16)"; return CUBLAS_STATUS_EXECUTION_FAILED; } #if BF16_ENABLED template <> cublasStatus_t Xgeam<__nv_bfloat16>( cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, const __nv_bfloat16* alpha, const __nv_bfloat16* A, int lda, const __nv_bfloat16* beta, const __nv_bfloat16* B, int ldb, __nv_bfloat16* C, int ldc) { // TODO(ndickson): There is no cublasHgeam, so a different // implementation would be required. LOG(FATAL) << "Xgeam does not support dtype bfloat16 (BF16)"; return CUBLAS_STATUS_EXECUTION_FAILED; } #endif // BF16_ENABLED template <> cublasStatus_t Xgeam( cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, const float* alpha, const float* A, int lda, const float* beta, const float* B, int ldb, float* C, int ldc) { return cublasSgeam( handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc); } template <> cublasStatus_t Xgeam( cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int m, int n, const double* alpha, const double* A, int lda, const double* beta, const double* B, int ldb, double* C, int ldc) { return cublasDgeam( handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc); } /** * @brief Transpose operator kernel implementation. * @note not efficient but it's not a bottleneck, used for float16 dtype. */ template __global__ void _TransposeKernel( const DType* __restrict__ in, DType* __restrict__ out, int n, int m) { int i = blockIdx.x; for (int j = threadIdx.x; j < m; j += blockDim.x) out[i * m + j] = in[j * n + i]; } /** * @brief Tranpose the input matrix. * @param row number of rows of input matrix. * @param col number of columns of input matrix. */ template void _Transpose(const DType* in, DType* out, int row, int col) { DType alpha = 1., beta = 0.; auto* thr_entry = runtime::CUDAThreadEntry::ThreadLocal(); cudaStream_t stream = runtime::getCurrentCUDAStream(); if (!thr_entry->cublas_handle) CUBLAS_CALL(cublasCreate(&(thr_entry->cublas_handle))); CUBLAS_CALL(cublasSetStream(thr_entry->cublas_handle, stream)); CUBLAS_CALL(Xgeam( thr_entry->cublas_handle, CUBLAS_OP_T, CUBLAS_OP_N, row, col, &alpha, in, col, &beta, nullptr, row, out, row)); } /** * @brief Tranpose the input matrix for data type half. * @note cuBLAS has no geam API for half data type, fallback to our kernel. */ template <> void _Transpose<__half>(const __half* in, __half* out, int row, int col) { cudaStream_t stream = runtime::getCurrentCUDAStream(); int nt = FindNumThreads(row); int nb = col; CUDA_KERNEL_CALL(_TransposeKernel, nb, nt, 0, stream, in, out, col, row); } #if BF16_ENABLED /** * @brief Tranpose the input matrix for data type half. * @note cuBLAS has no geam API for bf16 data type, fallback to our kernel. */ template <> void _Transpose<__nv_bfloat16>( const __nv_bfloat16* in, __nv_bfloat16* out, int row, int col) { cudaStream_t stream = runtime::getCurrentCUDAStream(); int nt = FindNumThreads(row); int nb = col; CUDA_KERNEL_CALL(_TransposeKernel, nb, nt, 0, stream, in, out, col, row); } #endif // BF16_ENABLED #if CUDART_VERSION < 11000 template cusparseStatus_t Xcsrmm2( cusparseHandle_t handle, cusparseOperation_t transA, cusparseOperation_t transB, int m, int n, int k, int nnz, const DType* alpha, const cusparseMatDescr_t descrA, const DType* csrValA, const int* csrRowPtrA, const int* csrColIndA, const DType* B, int ldb, const DType* beta, DType* C, int ldc) { LOG(INFO) << "Not supported dtype"; return CUSPARSE_STATUS_EXECUTION_FAILED; } template <> cusparseStatus_t Xcsrmm2( cusparseHandle_t handle, cusparseOperation_t transA, cusparseOperation_t transB, int m, int n, int k, int nnz, const float* alpha, const cusparseMatDescr_t descrA, const float* csrValA, const int* csrRowPtrA, const int* csrColIndA, const float* B, int ldb, const float* beta, float* C, int ldc) { return cusparseScsrmm2( handle, transA, transB, m, n, k, nnz, alpha, descrA, csrValA, csrRowPtrA, csrColIndA, B, ldb, beta, C, ldc); } template <> cusparseStatus_t Xcsrmm2( cusparseHandle_t handle, cusparseOperation_t transA, cusparseOperation_t transB, int m, int n, int k, int nnz, const double* alpha, const cusparseMatDescr_t descrA, const double* csrValA, const int* csrRowPtrA, const int* csrColIndA, const double* B, int ldb, const double* beta, double* C, int ldc) { return cusparseDcsrmm2( handle, transA, transB, m, n, k, nnz, alpha, descrA, csrValA, csrRowPtrA, csrColIndA, B, ldb, beta, C, ldc); } #endif /** Cusparse implementation of SpMM on Csr format. */ template void CusparseCsrmm2( const DGLContext& ctx, const CSRMatrix& csr, const DType* B_data, const DType* A_data, DType* C_data, int x_length, bool use_deterministic_alg_only = false) { // We use csrmm2 to perform following operation: // C = A x B, where A is a sparse matrix in csr format, B is the dense matrix // for node feature tensor. However, since cusparse only supports // column-major, while our tensor is stored in row-major, the actual // computation is: C = trans(A x trans(B)). Currently, we use cublasXgeam to // implement transposition and allocate intermediate workspace memory for // this. const int m = csr.num_rows; const int n = x_length; const int k = csr.num_cols; const int nnz = csr.indices->shape[0]; const DType alpha = 1.0; const DType beta = 0.0; // device auto device = runtime::DeviceAPI::Get(ctx); auto* thr_entry = runtime::CUDAThreadEntry::ThreadLocal(); cudaStream_t stream = runtime::getCurrentCUDAStream(); // allocate cusparse handle if needed if (!thr_entry->cusparse_handle) { CUSPARSE_CALL(cusparseCreate(&(thr_entry->cusparse_handle))); } CUSPARSE_CALL(cusparseSetStream(thr_entry->cusparse_handle, stream)); // all one data array DType* valptr = nullptr; if (!A_data) { valptr = static_cast(device->AllocWorkspace(ctx, nnz * sizeof(DType))); _Fill(valptr, nnz, static_cast(1.)); } #if CUDART_VERSION >= 11000 cusparseSpMatDescr_t matA; cusparseDnMatDescr_t matB, matC; constexpr auto dtype = cuda_dtype::value; constexpr auto idtype = cusparse_idtype::value; CUSPARSE_CALL(cusparseCreateCsr( &matA, m, k, nnz, static_cast(csr.indptr->data), static_cast(csr.indices->data), const_cast(valptr ? valptr : A_data), idtype, idtype, CUSPARSE_INDEX_BASE_ZERO, dtype)); CUSPARSE_CALL(cusparseCreateDnMat( &matB, k, n, n, const_cast(B_data), dtype, CUSPARSE_ORDER_ROW)); CUSPARSE_CALL( cusparseCreateDnMat(&matC, m, n, n, C_data, dtype, CUSPARSE_ORDER_ROW)); auto transA = CUSPARSE_OPERATION_NON_TRANSPOSE; auto transB = CUSPARSE_OPERATION_NON_TRANSPOSE; size_t workspace_size; cusparseSpMMAlg_t spmm_alg = use_deterministic_alg_only ? CUSPARSE_SPMM_CSR_ALG3 : CUSPARSE_SPMM_CSR_ALG2; CUSPARSE_CALL(cusparseSpMM_bufferSize( thr_entry->cusparse_handle, transA, transB, &alpha, matA, matB, &beta, matC, dtype, spmm_alg, &workspace_size)); void* workspace = device->AllocWorkspace(ctx, workspace_size); CUSPARSE_CALL(cusparseSpMM( thr_entry->cusparse_handle, transA, transB, &alpha, matA, matB, &beta, matC, dtype, spmm_alg, workspace)); device->FreeWorkspace(ctx, workspace); CUSPARSE_CALL(cusparseDestroySpMat(matA)); CUSPARSE_CALL(cusparseDestroyDnMat(matB)); CUSPARSE_CALL(cusparseDestroyDnMat(matC)); #else // allocate matrix for temporary transposed output DType* trans_out = static_cast(device->AllocWorkspace(ctx, m * n * sizeof(DType))); cusparseMatDescr_t descr; CUSPARSE_CALL(cusparseCreateMatDescr(&descr)); CUSPARSE_CALL(cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL)); CUSPARSE_CALL(cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO)); CUSPARSE_CALL(Xcsrmm2( thr_entry->cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_TRANSPOSE, m, n, k, nnz, &alpha, descr, (valptr) ? valptr : A_data, static_cast(csr.indptr->data), static_cast(csr.indices->data), B_data, n, &beta, trans_out, m)); CUSPARSE_CALL(cusparseDestroyMatDescr(descr)); // transpose the output matrix _Transpose(trans_out, C_data, n, m); device->FreeWorkspace(ctx, trans_out); #endif if (valptr) device->FreeWorkspace(ctx, valptr); } /** Cusparse implementation of SpMM on Csr format. */ template void CusparseCsrmm2Hetero( const DGLContext& ctx, const CSRMatrix& csr, const DType* B_data, const DType* A_data, DType* C_data, int64_t x_length, cudaStream_t strm_id, bool use_deterministic_alg_only = false) { // We use csrmm2 to perform following operation: // C = A x B, where A is a sparse matrix in csr format, B is the dense matrix // for node feature tensor. However, since cusparse only supports // column-major, while our tensor is stored in row-major, the actual // computation is: C = trans(A x trans(B)). Currently, we use cublasXgeam to // implement transposition and allocate intermediate workspace memory for // this. int int_maxlimit = std::numeric_limits::max(); CHECK_GE(int_maxlimit, (csr.num_rows)); CHECK_GE(int_maxlimit, csr.num_cols); CHECK_GE(int_maxlimit, csr.indices->shape[0]); const int m = csr.num_rows; const int n = x_length; const int k = csr.num_cols; const int nnz = csr.indices->shape[0]; const DType alpha = 1.0; const DType beta = 1.0; // device auto device = runtime::DeviceAPI::Get(ctx); auto* thr_entry = runtime::CUDAThreadEntry::ThreadLocal(); // allocate cusparse handle if needed if (!thr_entry->cusparse_handle) { CUSPARSE_CALL(cusparseCreate(&(thr_entry->cusparse_handle))); } CUSPARSE_CALL(cusparseSetStream(thr_entry->cusparse_handle, strm_id)); // all one data array DType* valptr = nullptr; if (!A_data) { valptr = static_cast(device->AllocWorkspace(ctx, nnz * sizeof(DType))); _Fill(valptr, nnz, static_cast(1.)); } #if CUDART_VERSION >= 11000 cusparseSpMatDescr_t matA; cusparseDnMatDescr_t matB, matC; constexpr auto dtype = cuda_dtype::value; constexpr auto idtype = cusparse_idtype::value; CUSPARSE_CALL(cusparseCreateCsr( &matA, m, k, nnz, static_cast(csr.indptr->data), static_cast(csr.indices->data), const_cast(valptr ? valptr : A_data), idtype, idtype, CUSPARSE_INDEX_BASE_ZERO, dtype)); CUSPARSE_CALL(cusparseCreateDnMat( &matB, k, n, n, const_cast(B_data), dtype, CUSPARSE_ORDER_ROW)); CUSPARSE_CALL( cusparseCreateDnMat(&matC, m, n, n, C_data, dtype, CUSPARSE_ORDER_ROW)); auto transA = CUSPARSE_OPERATION_NON_TRANSPOSE; auto transB = CUSPARSE_OPERATION_NON_TRANSPOSE; size_t workspace_size; cusparseSpMMAlg_t spmm_alg = use_deterministic_alg_only ? CUSPARSE_SPMM_CSR_ALG3 : CUSPARSE_SPMM_CSR_ALG2; CUSPARSE_CALL(cusparseSpMM_bufferSize( thr_entry->cusparse_handle, transA, transB, &alpha, matA, matB, &beta, matC, dtype, spmm_alg, &workspace_size)); void* workspace = device->AllocWorkspace(ctx, workspace_size); CUSPARSE_CALL(cusparseSpMM( thr_entry->cusparse_handle, transA, transB, &alpha, matA, matB, &beta, matC, dtype, spmm_alg, workspace)); device->FreeWorkspace(ctx, workspace); CUSPARSE_CALL(cusparseDestroySpMat(matA)); CUSPARSE_CALL(cusparseDestroyDnMat(matB)); CUSPARSE_CALL(cusparseDestroyDnMat(matC)); #else cusparseMatDescr_t descr; CUSPARSE_CALL(cusparseCreateMatDescr(&descr)); CUSPARSE_CALL(cusparseSetMatType(descr, CUSPARSE_MATRIX_TYPE_GENERAL)); CUSPARSE_CALL(cusparseSetMatIndexBase(descr, CUSPARSE_INDEX_BASE_ZERO)); CHECK_EQ(sizeof(IdType), sizeof(int32_t)); CUSPARSE_CALL(Xcsrmm2( thr_entry->cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_TRANSPOSE, m, n, k, nnz, &alpha, descr, (valptr) ? valptr : A_data, static_cast(csr.indptr->data), static_cast(csr.indices->data), B_data, n, &beta, C_data, m)); CUSPARSE_CALL(cusparseDestroyMatDescr(descr)); #endif if (valptr) device->FreeWorkspace(ctx, valptr); } } // namespace #define SWITCH_OP(op, Op, ...) \ do { \ if ((op) == "add") { \ typedef cuda::binary::Add Op; \ { __VA_ARGS__ } \ } else if ((op) == "sub") { \ typedef cuda::binary::Sub Op; \ { __VA_ARGS__ } \ } else if ((op) == "mul") { \ typedef cuda::binary::Mul Op; \ { __VA_ARGS__ } \ } else if ((op) == "div") { \ typedef cuda::binary::Div Op; \ { __VA_ARGS__ } \ } else if ((op) == "copy_lhs") { \ typedef cuda::binary::CopyLhs Op; \ { __VA_ARGS__ } \ } else if ((op) == "copy_rhs") { \ typedef cuda::binary::CopyRhs Op; \ { __VA_ARGS__ } \ } else { \ LOG(FATAL) << "Unsupported SpMM binary operator: " << op; \ } \ } while (0) namespace cuda { /** * @brief CUDA kernel of g-SpMM on Coo format. * @note it uses edge parallel strategy, different threadblocks (on y-axis) * is responsible for the computation on different edges. Threadblocks * on the x-axis are responsible for the computation on different * positions in feature dimension. To avoid possible data hazards, it uses * atomic operators for reduction. */ template < typename Idx, typename DType, typename BinaryOp, typename ReduceOp, bool UseBcast = false, bool UseIdx = false> __global__ void SpMMCooKernel( const DType* __restrict__ ufeat, const DType* __restrict__ efeat, DType* __restrict__ out, Idx* __restrict__ arg_u, Idx* __restrict__ arg_e, const Idx* __restrict__ row, const Idx* __restrict__ col, const Idx* __restrict__ edge_map, int64_t N, int64_t M, int64_t E, const int64_t* __restrict__ ubcast_off, const int64_t* __restrict__ ebcast_off, int64_t ufeat_len, int64_t efeat_len, int64_t out_len) { // SPMM with COO. Idx ty = blockIdx.y * blockDim.y + threadIdx.y; const Idx stride_y = blockDim.y * gridDim.y; while (ty < E) { const Idx src = _ldg(row + ty); const Idx dst = _ldg(col + ty); const Idx eid = UseIdx ? _ldg(edge_map + ty) : ty; int64_t tx = blockIdx.x * blockDim.x + threadIdx.x; const int64_t stride_x = blockDim.x * gridDim.x; const DType* uoff = BinaryOp::use_lhs ? (ufeat + src * ufeat_len) : nullptr; const DType* eoff = BinaryOp::use_rhs ? (efeat + eid * efeat_len) : nullptr; DType* outoff = out + dst * out_len; while (tx < out_len) { const int64_t lhs_add = UseBcast ? ubcast_off[tx] : tx; const int64_t rhs_add = UseBcast ? ebcast_off[tx] : tx; DType val = BinaryOp::Call(uoff + lhs_add, eoff + rhs_add); Idx* arguoff = nullptr; // arguoff is not used in SpMMCoo. Idx* argeoff = nullptr; // argeoff is not used in SpMMCoo. ReduceOp::Call(outoff + tx, arguoff, argeoff, val, src, eid); tx += stride_x; } ty += stride_y; } } /** * @brief CUDA kernel to compute argu and arge in g-SpMM on Coo format. * @note it uses edge parallel strategy, different threadblocks (on y-axis) * is responsible for the computation on different edges. Threadblocks * on the x-axis are responsible for the computation on different * positions in feature dimension. */ template < typename Idx, typename DType, typename BinaryOp, typename ReduceOp, bool UseBcast = false, bool UseIdx = false> __global__ void ArgSpMMCooKernel( const DType* __restrict__ ufeat, const DType* __restrict__ efeat, DType* __restrict__ out, Idx* __restrict__ arg_u, Idx* __restrict__ arg_e, const Idx* __restrict__ row, const Idx* __restrict__ col, const Idx* __restrict__ edge_map, int64_t N, int64_t M, int64_t E, const int64_t* __restrict__ ubcast_off, const int64_t* __restrict__ ebcast_off, int64_t ufeat_len, int64_t efeat_len, int64_t out_len) { // SPMM with COO arg max/min. Idx ty = blockIdx.y * blockDim.y + threadIdx.y; const Idx stride_y = blockDim.y * gridDim.y; while (ty < E) { const Idx src = _ldg(row + ty); const Idx dst = _ldg(col + ty); const Idx eid = UseIdx ? _ldg(edge_map + ty) : ty; int64_t tx = blockIdx.x * blockDim.x + threadIdx.x; const int64_t stride_x = blockDim.x * gridDim.x; const DType* uoff = BinaryOp::use_lhs ? (ufeat + src * ufeat_len) : nullptr; const DType* eoff = BinaryOp::use_rhs ? (efeat + eid * efeat_len) : nullptr; const DType* outoff = out + dst * out_len; Idx* arguoff = BinaryOp::use_lhs ? (arg_u + dst * out_len) : nullptr; Idx* argeoff = BinaryOp::use_rhs ? (arg_e + dst * out_len) : nullptr; while (tx < out_len) { int64_t lhs_add = UseBcast ? ubcast_off[tx] : tx; int64_t rhs_add = UseBcast ? ebcast_off[tx] : tx; DType val = BinaryOp::Call(uoff + lhs_add, eoff + rhs_add); ReduceOp::CallArg(tx, arguoff, argeoff, val, outoff[tx], src, eid); tx += stride_x; } ty += stride_y; } } /** * @brief CUDA kernel of g-SpMM on Csr format. * @note it uses node parallel strategy, different threadblocks (on y-axis) * is responsible for the computation on different destination nodes. * Threadblocks on the x-axis are responsible for the computation on * different positions in feature dimension. */ template < typename Idx, typename DType, typename BinaryOp, typename ReduceOp, bool UseBcast = false, bool UseIdx = false> __global__ void SpMMCsrKernel( const DType* __restrict__ ufeat, const DType* __restrict__ efeat, DType* __restrict__ out, Idx* __restrict__ arg_u, Idx* __restrict__ arg_e, const Idx* __restrict__ indptr, const Idx* __restrict__ indices, const Idx* __restrict__ edge_map, int64_t num_rows, int64_t num_cols, const int64_t* __restrict__ ubcast_off, const int64_t* __restrict__ ebcast_off, int64_t ufeat_len, int64_t efeat_len, int64_t out_len) { // SPMM with CSR. int ty = blockIdx.x * blockDim.y + threadIdx.y; const Idx stride_y = blockDim.y * gridDim.x; const int stride_x = blockDim.x * gridDim.y; while (ty < num_rows) { int tx = blockIdx.y * blockDim.x + threadIdx.x; while (tx < out_len) { typename accum_dtype::type local_accum = ReduceOp::zero(); Idx local_argu = 0, local_arge = 0; const int lhs_add = UseBcast ? ubcast_off[tx] : tx; const int rhs_add = UseBcast ? ebcast_off[tx] : tx; for (Idx i = indptr[ty]; i < indptr[ty + 1]; ++i) { const Idx eid = UseIdx ? _ldg(edge_map + i) : i; const Idx cid = _ldg(indices + i); const DType* uoff = BinaryOp::use_lhs ? (ufeat + cid * ufeat_len) : nullptr; const DType* eoff = BinaryOp::use_rhs ? (efeat + eid * efeat_len) : nullptr; DType out = BinaryOp::Call(uoff + lhs_add, eoff + rhs_add); ReduceOp::Call(&local_accum, &local_argu, &local_arge, out, cid, eid); } // The use of += is to compute cross-type reducing on heterogeneous graph // when reduce op is `sum`. // C = SpMM(SpA, B) + C // Separate kernel `SpMMCmpCsrHeteroKernel` is used for max- and // min-reducer. It does not affect the output on homogeneous graph as // `out` is initialized to zero. out[ty * out_len + tx] += static_cast(local_accum); if (ReduceOp::require_arg && BinaryOp::use_lhs) arg_u[ty * out_len + tx] = local_argu; if (ReduceOp::require_arg && BinaryOp::use_rhs) arg_e[ty * out_len + tx] = local_arge; tx += stride_x; } ty += stride_y; } } /** * @brief CUDA kernel of SpMM-Min/Max on Csr format. * @note it uses node parallel strategy, different threadblocks (on y-axis) * is responsible for the computation on different destination nodes. * Threadblocks on the x-axis are responsible for the computation on * different positions in feature dimension. */ template < typename Idx, typename DType, typename BinaryOp, typename ReduceOp, bool UseBcast = false, bool UseIdx = false> __global__ void SpMMCmpCsrHeteroKernel( const DType* __restrict__ ufeat, const DType* __restrict__ efeat, DType* __restrict__ out, Idx* __restrict__ arg_u, Idx* __restrict__ arg_e, Idx* __restrict__ arg_u_ntype, Idx* __restrict__ arg_e_etype, const Idx* __restrict__ indptr, const Idx* __restrict__ indices, const Idx* __restrict__ edge_map, int64_t num_rows, int64_t num_cols, const int64_t* __restrict__ ubcast_off, const int64_t* __restrict__ ebcast_off, int64_t ufeat_len, int64_t efeat_len, int64_t out_len, const int src_type, const int etype) { // SPMM with CSR. int ty = blockIdx.y * blockDim.y + threadIdx.y; const Idx stride_y = blockDim.y * gridDim.y; const int stride_x = blockDim.x * gridDim.x; while (ty < num_rows) { int tx = blockIdx.x * blockDim.x + threadIdx.x; while (tx < out_len) { using accum_type = typename accum_dtype::type; accum_type local_accum = static_cast(out[ty * out_len + tx]); // ReduceOp::zero(); Idx local_argu = 0, local_arge = 0; const int lhs_add = UseBcast ? ubcast_off[tx] : tx; const int rhs_add = UseBcast ? ebcast_off[tx] : tx; for (Idx i = indptr[ty]; i < indptr[ty + 1]; ++i) { const Idx eid = UseIdx ? _ldg(edge_map + i) : i; const Idx cid = _ldg(indices + i); const DType* uoff = BinaryOp::use_lhs ? (ufeat + cid * ufeat_len) : nullptr; const DType* eoff = BinaryOp::use_rhs ? (efeat + eid * efeat_len) : nullptr; DType tmp_out = BinaryOp::Call(uoff + lhs_add, eoff + rhs_add); ReduceOp::Call( &local_accum, &local_argu, &local_arge, tmp_out, cid, eid); } // Update output only when max/min values are different that original // output DType new_out = static_cast(local_accum); if (out[ty * out_len + tx] != new_out) { out[ty * out_len + tx] = new_out; if (ReduceOp::require_arg && BinaryOp::use_lhs) { arg_u[ty * out_len + tx] = local_argu; arg_u_ntype[ty * out_len + tx] = src_type; } if (ReduceOp::require_arg && BinaryOp::use_rhs) { arg_e[ty * out_len + tx] = local_arge; arg_e_etype[ty * out_len + tx] = etype; } } tx += stride_x; } ty += stride_y; } } /** * @brief CUDA implementation of g-SpMM on Coo format. * @param bcast Broadcast information. * @param coo The Coo matrix. * @param ufeat The feature on source nodes. * @param efeat The feature on edges. * @param out The result feature on destination nodes. * @param argu Arg-Min/Max on source nodes, which refers the source node indices * correspond to the minimum/maximum values of reduction result on * destination nodes. It's useful in computing gradients of Min/Max * reducer. * @param arge Arg-Min/Max on edges. which refers the source node indices * correspond to the minimum/maximum values of reduction result on * destination nodes. It's useful in computing gradients of Min/Max * reducer. */ template void SpMMCoo( const BcastOff& bcast, const COOMatrix& coo, NDArray ufeat, NDArray efeat, NDArray out, NDArray argu, NDArray arge) { /** * TODO(Xin): Disable half precision for SpMMCoo due to the round-off error. * We should use fp32 for the accumulation but it's hard to modify the * current implementation. */ #if BF16_ENABLED if (std::is_same::value || std::is_same::value) #else if (std::is_same::value) #endif // BF16_ENABLED LOG(FATAL) << "SpMMCoo doesn't support half precision fow now. " << "Please use SpMMCsr instead by allowing the graph " << "materialize CSR/CSC formats."; const Idx *row = coo.row.Ptr(), *col = coo.col.Ptr(), *edge_map = coo.data.Ptr(); const DType *ufeat_data = ufeat.Ptr(), *efeat_data = efeat.Ptr(); DType* out_data = out.Ptr(); Idx *argu_data = argu.Ptr(), *arge_data = arge.Ptr(); cudaStream_t stream = runtime::getCurrentCUDAStream(); const int64_t N = coo.num_rows, M = coo.num_cols, E = coo.row->shape[0]; int64_t *ubcast_off = nullptr, *ebcast_off = nullptr; int64_t len = bcast.out_len, lhs_len = bcast.lhs_len, rhs_len = bcast.rhs_len; int64_t out_size = out.NumElements(); const int nt = FindNumThreads(out_size); const int nb = (out_size + nt - 1) / nt; CUDA_KERNEL_CALL( _FillKernel, nb, nt, 0, stream, out_data, out_size, ReduceOp::zero()); const int ntx = FindNumThreads(len); const int nty = CUDA_MAX_NUM_THREADS / ntx; const int nbx = (len + ntx - 1) / ntx; const int nby = FindNumBlocks<'y'>((E + nty - 1) / nty); const dim3 nblks(nbx, nby); const dim3 nthrs(ntx, nty); const bool use_idx = !IsNullArray(coo.data); BCAST_IDX_CTX_SWITCH(bcast, use_idx, ufeat->ctx, ubcast_off, ebcast_off, { CUDA_KERNEL_CALL( (SpMMCooKernel), nblks, nthrs, 0, stream, ufeat_data, efeat_data, out_data, argu_data, arge_data, row, col, edge_map, N, M, E, ubcast_off, ebcast_off, lhs_len, rhs_len, len); if (ReduceOp::require_arg) { CUDA_KERNEL_CALL( (ArgSpMMCooKernel), nblks, nthrs, 0, stream, ufeat_data, efeat_data, out_data, argu_data, arge_data, row, col, edge_map, N, M, E, ubcast_off, ebcast_off, lhs_len, rhs_len, len); } }); } /** * @brief CUDA implementation of g-SpMM on Csr format. * @param bcast Broadcast information. * @param csr The Csr matrix. * @param ufeat The feature on source nodes. * @param efeat The feature on edges. * @param out The result feature on destination nodes. * @param argu Arg-Min/Max on source nodes, which refers the source node indices * correspond to the minimum/maximum values of reduction result on * destination nodes. It's useful in computing gradients of Min/Max * reducer. * @param arge Arg-Min/Max on edges. which refers the source node indices * correspond to the minimum/maximum values of reduction result on * destination nodes. It's useful in computing gradients of Min/Max * reducer. */ template void SpMMCsr( const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out, NDArray argu, NDArray arge) { const Idx* indptr = csr.indptr.Ptr(); const Idx* indices = csr.indices.Ptr(); const Idx* edge_map = csr.data.Ptr(); const DType* ufeat_data = ufeat.Ptr(); const DType* efeat_data = efeat.Ptr(); DType* out_data = out.Ptr(); Idx* argu_data = argu.Ptr(); Idx* arge_data = arge.Ptr(); cudaStream_t stream = runtime::getCurrentCUDAStream(); int64_t *ubcast_off = nullptr, *ebcast_off = nullptr; int64_t len = bcast.out_len, lhs_len = bcast.lhs_len, rhs_len = bcast.rhs_len; const int ntx = FindNumThreads(len); const int nty = CUDA_MAX_NUM_THREADS / ntx; const int nby = (len + ntx - 1) / ntx; const int nbx = FindNumBlocks<'x'>((csr.num_rows + nty - 1) / nty); const dim3 nblks(nbx, nby); const dim3 nthrs(ntx, nty); const bool use_idx = !IsNullArray(csr.data); BCAST_IDX_CTX_SWITCH( bcast, use_idx, ufeat->ctx, ubcast_off, ebcast_off, {CUDA_KERNEL_CALL( (SpMMCsrKernel), nblks, nthrs, 0, stream, ufeat_data, efeat_data, out_data, argu_data, arge_data, indptr, indices, edge_map, csr.num_rows, csr.num_cols, ubcast_off, ebcast_off, lhs_len, rhs_len, len)}); } /** * @brief CUDA kernel of SpMM-Min/Max on Csr format on heterogeneous graph. * @param bcast Broadcast information. * @param csr The Csr matrix. * @param ufeat The feature on source nodes. * @param efeat The feature on edges. * @param out The result feature on destination nodes. * @param argu Arg-Min/Max on source nodes, which refers the source node indices * correspond to the minimum/maximum values of reduction result on * destination nodes. It's useful in computing gradients of Min/Max * reducer. * @param arge Arg-Min/Max on edges. which refers the source node indices * correspond to the minimum/maximum values of reduction result on * destination nodes. It's useful in computing gradients of Min/Max * reducer. * @param argu_ntype Node type of the arg-Min/Max on source nodes, which refers * the source node types correspond to the minimum/maximum values of reduction * result on destination nodes. It's useful in computing gradients of Min/Max * reducer. * @param arge_etype Edge-type of the arg-Min/Max on edges. which refers the * source node indices correspond to the minimum/maximum values of reduction * result on destination nodes. It's useful in computing gradients of Min/Max * reducer. * @param src_type Node type of the source nodes of an etype * @param etype Edge type */ template void SpMMCmpCsrHetero( const BcastOff& bcast, const CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out, NDArray argu, NDArray arge, NDArray argu_ntype, NDArray arge_etype, const int src_type, const int etype) { const Idx* indptr = csr.indptr.Ptr(); const Idx* indices = csr.indices.Ptr(); const Idx* edge_map = csr.data.Ptr(); const DType* ufeat_data = ufeat.Ptr(); const DType* efeat_data = efeat.Ptr(); DType* out_data = out.Ptr(); Idx* argu_data = argu.Ptr(); Idx* arge_data = arge.Ptr(); cudaStream_t stream = runtime::getCurrentCUDAStream(); int64_t *ubcast_off = nullptr, *ebcast_off = nullptr; int64_t len = bcast.out_len, lhs_len = bcast.lhs_len, rhs_len = bcast.rhs_len; const int ntx = FindNumThreads(len); const int nty = CUDA_MAX_NUM_THREADS / ntx; const int nbx = (len + ntx - 1) / ntx; const int nby = FindNumBlocks<'y'>((csr.num_rows + nty - 1) / nty); const dim3 nblks(nbx, nby); const dim3 nthrs(ntx, nty); const bool use_idx = !IsNullArray(csr.data); BCAST_IDX_CTX_SWITCH( bcast, use_idx, ufeat->ctx, ubcast_off, ebcast_off, {CUDA_KERNEL_CALL( (SpMMCmpCsrHeteroKernel< Idx, DType, BinaryOp, ReduceOp, UseBcast, UseIdx>), nblks, nthrs, 0, stream, ufeat_data, efeat_data, out_data, argu_data, arge_data, static_cast(argu_ntype->data), static_cast(arge_etype->data), indptr, indices, edge_map, csr.num_rows, csr.num_cols, ubcast_off, ebcast_off, lhs_len, rhs_len, len, src_type, etype)}); } } // namespace cuda } // namespace aten } // namespace dgl #endif // DGL_ARRAY_CUDA_SPMM_CUH_ ================================================ FILE: src/array/cuda/spmm_hetero.cu ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cuda/spmm.cu * @brief SPMM C APIs and definitions. */ #include #include #include "../../runtime/cuda/cuda_common.h" #include "./functor.cuh" #include "./ge_spmm.cuh" #include "./spmm.cuh" namespace dgl { using namespace cuda; namespace aten { /** * @brief CUDA implementation of g-SpMM on Csr format. * @note use cusparse if the reduce operator is `sum` and there is * no broadcast, use dgl's kernel in other cases. */ template void SpMMCsrHetero( const std::string& op, const std::string& reduce, const BcastOff& bcast, const std::vector& vec_csr, const std::vector& vec_ufeat, const std::vector& vec_efeat, std::vector* vec_out, std::vector>* out_aux, const std::vector& ufeat_ntids, // ufeat node type id const std::vector& out_ntids) { // output node type id bool is_scalar_efeat = vec_efeat[0].NumElements() == vec_csr[0].indices->shape[0]; bool use_efeat = op != "copy_lhs"; auto device = runtime::DeviceAPI::Get(vec_csr[0].indptr->ctx); std::vector trans_out((*vec_out).size(), NULL); bool use_deterministic_alg_only = false; if (NULL != std::getenv("USE_DETERMINISTIC_ALG")) use_deterministic_alg_only = true; bool use_legacy_cusparsemm = (CUDART_VERSION < 11000) && (reduce == "sum") && // legacy cuSPARSE does not care about NNZ, hence the argument "false". ((op == "copy_lhs" && cusparse_available(false)) || (op == "mul" && is_scalar_efeat && cusparse_available(false))); // Create temporary output buffer to store non-transposed output if (use_legacy_cusparsemm) { for (dgl_type_t ntype = 0; ntype < (*vec_out).size(); ++ntype) { const int m = (*vec_out)[ntype]->shape[0]; const int n = (*vec_out)[ntype]->shape[1]; if (m == 0) continue; DType* out = static_cast(device->AllocWorkspace( vec_csr[0].indptr->ctx, m * n * sizeof(DType))); CUDA_CALL(cudaMemset(out, 0, m * n * sizeof(DType))); trans_out[ntype] = out; } } // Check shape of ufeat for all relation type and compute feature size int64_t x_length = 1; for (dgl_type_t etype = 0; etype < (ufeat_ntids.size() - 1); ++etype) { NDArray ufeat = vec_ufeat[ufeat_ntids[etype]]; NDArray next_ufeat = vec_ufeat[ufeat_ntids[etype + 1]]; CHECK_EQ(ufeat->ndim, next_ufeat->ndim) << "Input features have different shapes"; for (int i = 1; i < ufeat->ndim; ++i) { if (ufeat->shape[i] != next_ufeat->shape[i]) { if (ufeat->shape[i] == 1 || next_ufeat->shape[i] == 1) LOG(FATAL) << "Homogenized message passing on heterogeneous graphs " "does not support " << "automatic broadcasting. Please manually broadcast it " "before calling " << "message passing functions."; else LOG(FATAL) << "Input features have different shapes."; return; } if (etype == 0) x_length *= ufeat->shape[i]; } } // TODO(Israt): Can python do the following initializations while creating the // tensors? if (reduce == "max" || reduce == "min") { const int64_t dim = bcast.out_len; std::vector updated((*vec_out).size(), false); for (dgl_type_t etype = 0; etype < ufeat_ntids.size(); ++etype) { DType* out_off = (*vec_out)[out_ntids[etype]].Ptr(); if (reduce == "max") _Fill( out_off, vec_csr[etype].num_rows * dim, cuda::reduce::Max::zero()); else // min _Fill( out_off, vec_csr[etype].num_rows * dim, cuda::reduce::Min::zero()); const dgl_type_t dst_id = out_ntids[etype]; if (!updated[dst_id]) { updated[dst_id] = true; if (op == "copy_lhs") { IdType* argu_ntype = (*out_aux)[2][dst_id].Ptr(); _Fill( argu_ntype, vec_csr[etype].num_rows * dim, static_cast(-1)); } if (op == "copy_rhs") { IdType* arge_etype = (*out_aux)[3][dst_id].Ptr(); _Fill( arge_etype, vec_csr[etype].num_rows * dim, static_cast(-1)); } } } } cudaStream_t stream = runtime::getCurrentCUDAStream(); for (dgl_type_t etype = 0; etype < ufeat_ntids.size(); ++etype) { const dgl_type_t src_id = ufeat_ntids[etype]; const dgl_type_t dst_id = out_ntids[etype]; CSRMatrix csr = vec_csr[etype]; if (reduce == "sum") { bool more_nnz = (csr.indices->shape[0] > csr.num_rows * csr.num_cols); /* Call SpMM for each relation type */ if (op == "copy_lhs" && cusparse_available(more_nnz)) { // cusparse /* If CUDA is less than 11.0, put the output in trans_out for later * transposition */ DType* out = (CUDART_VERSION < 11000) ? trans_out[dst_id] : static_cast((*vec_out)[dst_id]->data); CusparseCsrmm2Hetero( csr.indptr->ctx, csr, static_cast(vec_ufeat[src_id]->data), nullptr, out, x_length, stream, use_deterministic_alg_only); } else if ( op == "mul" && is_scalar_efeat && cusparse_available(more_nnz)) { // cusparse NDArray efeat = vec_efeat[etype]; if (!IsNullArray(csr.data)) efeat = IndexSelect(efeat, csr.data); CusparseCsrmm2Hetero( csr.indptr->ctx, csr, static_cast(vec_ufeat[src_id]->data), static_cast(efeat->data), // TODO(Israt): Change (*vec_out) to trans_out to support CUDA // version < 11 static_cast((*vec_out)[dst_id]->data), x_length, stream, use_deterministic_alg_only); } else { // general kernel NDArray ufeat = (vec_ufeat.size() == 0) ? NullArray() : vec_ufeat[src_id]; NDArray efeat = (vec_efeat.size() == 0) ? NullArray() : vec_efeat[etype]; SWITCH_OP(op, Op, { cuda::SpMMCsr>( bcast, csr, ufeat, efeat, (*vec_out)[dst_id], NullArray(), NullArray()); }); } } else if (reduce == "max") { SWITCH_OP(op, Op, { NDArray ufeat = (vec_ufeat.size() == 0) ? NullArray() : vec_ufeat[src_id]; NDArray efeat = (vec_efeat.size() == 0) ? NullArray() : vec_efeat[etype]; cuda::SpMMCmpCsrHetero< IdType, DType, Op, cuda::reduce::Max>( bcast, csr, ufeat, efeat, (*vec_out)[dst_id], (*out_aux)[0][dst_id], (*out_aux)[1][dst_id], (*out_aux)[2][dst_id], (*out_aux)[3][dst_id], src_id, etype); }); } else if (reduce == "min") { SWITCH_OP(op, Op, { NDArray ufeat = (vec_ufeat.size() == 0) ? NullArray() : vec_ufeat[src_id]; NDArray efeat = (vec_efeat.size() == 0) ? NullArray() : vec_efeat[etype]; cuda::SpMMCmpCsrHetero< IdType, DType, Op, cuda::reduce::Min>( bcast, csr, ufeat, efeat, (*vec_out)[dst_id], (*out_aux)[0][dst_id], (*out_aux)[1][dst_id], (*out_aux)[2][dst_id], (*out_aux)[3][dst_id], src_id, etype); }); } else { LOG(FATAL) << "Not implemented"; } } if (use_legacy_cusparsemm) { // transpose output for (dgl_type_t ntype = 0; ntype < (*vec_out).size(); ++ntype) { const int m = (*vec_out)[ntype]->shape[0]; const int n = (*vec_out)[ntype]->shape[1]; if (m == 0) continue; DType* C_data = static_cast((*vec_out)[ntype]->data); _Transpose(trans_out[ntype], C_data, n, m); device->FreeWorkspace(vec_csr[0].indptr->ctx, trans_out[ntype]); } } } template void SpMMCsrHetero( const std::string& op, const std::string& reduce, const BcastOff& bcast, const std::vector& csr, const std::vector& ufeat, const std::vector& efeat, std::vector* out, std::vector>* out_aux, const std::vector& ufeat_ntids, const std::vector& out_ntids); template void SpMMCsrHetero( const std::string& op, const std::string& reduce, const BcastOff& bcast, const std::vector& csr, const std::vector& ufeat, const std::vector& efeat, std::vector* out, std::vector>* out_aux, const std::vector& ufeat_ntids, const std::vector& out_ntids); #if BF16_ENABLED template void SpMMCsrHetero( const std::string& op, const std::string& reduce, const BcastOff& bcast, const std::vector& csr, const std::vector& ufeat, const std::vector& efeat, std::vector* out, std::vector>* out_aux, const std::vector& ufeat_ntids, const std::vector& out_ntids); template void SpMMCsrHetero( const std::string& op, const std::string& reduce, const BcastOff& bcast, const std::vector& csr, const std::vector& ufeat, const std::vector& efeat, std::vector* out, std::vector>* out_aux, const std::vector& ufeat_ntids, const std::vector& out_ntids); #endif // BF16_ENABLED template void SpMMCsrHetero( const std::string& op, const std::string& reduce, const BcastOff& bcast, const std::vector& csr, const std::vector& ufeat, const std::vector& efeat, std::vector* out, std::vector>* out_aux, const std::vector& ufeat_ntids, const std::vector& out_ntids); template void SpMMCsrHetero( const std::string& op, const std::string& reduce, const BcastOff& bcast, const std::vector& csr, const std::vector& ufeat, const std::vector& efeat, std::vector* out, std::vector>* out_aux, const std::vector& ufeat_ntids, const std::vector& out_ntids); template void SpMMCsrHetero( const std::string& op, const std::string& reduce, const BcastOff& bcast, const std::vector& csr, const std::vector& ufeat, const std::vector& efeat, std::vector* out, std::vector>* out_aux, const std::vector& ufeat_ntids, const std::vector& out_ntids); template void SpMMCsrHetero( const std::string& op, const std::string& reduce, const BcastOff& bcast, const std::vector& csr, const std::vector& ufeat, const std::vector& efeat, std::vector* out, std::vector>* out_aux, const std::vector& ufeat_ntids, const std::vector& out_ntids); } // namespace aten } // namespace dgl ================================================ FILE: src/array/cuda/utils.cu ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cuda/utils.cu * @brief Utilities for CUDA kernels. */ #include #include "../../runtime/cuda/cuda_common.h" #include "./utils.h" namespace dgl { namespace cuda { bool AllTrue(int8_t* flags, int64_t length, const DGLContext& ctx) { auto device = runtime::DeviceAPI::Get(ctx); int8_t* rst = static_cast(device->AllocWorkspace(ctx, 1)); // Call CUB's reduction size_t workspace_size = 0; cudaStream_t stream = runtime::getCurrentCUDAStream(); CUDA_CALL(cub::DeviceReduce::Min( nullptr, workspace_size, flags, rst, length, stream)); void* workspace = device->AllocWorkspace(ctx, workspace_size); CUDA_CALL(cub::DeviceReduce::Min( workspace, workspace_size, flags, rst, length, stream)); int8_t cpu_rst = GetCUDAScalar(device, ctx, rst); device->FreeWorkspace(ctx, workspace); device->FreeWorkspace(ctx, rst); return cpu_rst == 1; } } // namespace cuda } // namespace dgl ================================================ FILE: src/array/cuda/utils.h ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cuda/utils.h * @brief Utilities for CUDA kernels. */ #ifndef DGL_ARRAY_CUDA_UTILS_H_ #define DGL_ARRAY_CUDA_UTILS_H_ #include #include #include #include #include #include #include "../../runtime/cuda/cuda_common.h" namespace dgl { namespace cuda { #define CUDA_MAX_NUM_BLOCKS_X 0x7FFFFFFF #define CUDA_MAX_NUM_BLOCKS_Y 0xFFFF #define CUDA_MAX_NUM_BLOCKS_Z 0xFFFF // The max number of threads per block #define CUDA_MAX_NUM_THREADS 256 /** @brief Calculate the number of threads needed given the dimension length. * * It finds the biggest number that is smaller than min(dim, max_nthrs) * and is also power of two. */ inline int FindNumThreads(int dim, int max_nthrs = CUDA_MAX_NUM_THREADS) { CHECK_GE(dim, 0); if (dim == 0) return 1; int ret = max_nthrs; while (ret > dim) { ret = ret >> 1; } return ret; } template int _NumberOfBits(const T& range) { if (range <= 1) { // ranges of 0 or 1 require no bits to store return 0; } int bits = 1; const auto urange = static_cast>(range); while (bits < static_cast(sizeof(T) * 8) && (1ull << bits) < urange) { ++bits; } if (bits < static_cast(sizeof(T) * 8)) { CHECK_EQ((range - 1) >> bits, 0); } CHECK_NE((range - 1) >> (bits - 1), 0); return bits; } /** * @brief Find number of blocks is smaller than nblks and max_nblks * on the given axis ('x', 'y' or 'z'). */ template inline int FindNumBlocks(int nblks, int max_nblks = -1) { int default_max_nblks = -1; switch (axis) { case 'x': default_max_nblks = CUDA_MAX_NUM_BLOCKS_X; break; case 'y': default_max_nblks = CUDA_MAX_NUM_BLOCKS_Y; break; case 'z': default_max_nblks = CUDA_MAX_NUM_BLOCKS_Z; break; default: LOG(FATAL) << "Axis " << axis << " not recognized"; break; } if (max_nblks == -1) max_nblks = default_max_nblks; CHECK_NE(nblks, 0); if (nblks < max_nblks) return nblks; return max_nblks; } template __device__ __forceinline__ T _ldg(T* addr) { #if __CUDA_ARCH__ >= 350 return __ldg(addr); #else return *addr; #endif } /** * @brief Return true if the given bool flag array is all true. * The input bool array is in int8_t type so it is aligned with byte address. * * @param flags The bool array. * @param length The length. * @param ctx Device context. * @return True if all the flags are true. */ bool AllTrue(int8_t* flags, int64_t length, const DGLContext& ctx); /** * @brief CUDA Kernel of filling the vector started from ptr of size length * with val. * @note internal use only. */ template __global__ void _FillKernel(DType* ptr, size_t length, DType val) { int tx = blockIdx.x * blockDim.x + threadIdx.x; int stride_x = gridDim.x * blockDim.x; while (tx < length) { ptr[tx] = val; tx += stride_x; } } /** @brief Fill the vector started from ptr of size length with val */ template void _Fill(DType* ptr, size_t length, DType val) { cudaStream_t stream = runtime::getCurrentCUDAStream(); int nt = FindNumThreads(length); int nb = (length + nt - 1) / nt; // on x-axis, no need to worry about upperbound. CUDA_KERNEL_CALL(cuda::_FillKernel, nb, nt, 0, stream, ptr, length, val); } /** * @brief Search adjacency list linearly for each (row, col) pair and * write the data under the matched position in the indices array to the output. * * If there is no match, the value in \c filler is written. * If there are multiple matches, only the first match is written. * If the given data array is null, write the matched position to the output. */ template __global__ void _LinearSearchKernel( const IdType* indptr, const IdType* indices, const IdType* data, const IdType* row, const IdType* col, int64_t row_stride, int64_t col_stride, int64_t length, const DType* weights, DType filler, DType* out) { int tx = blockIdx.x * blockDim.x + threadIdx.x; const int stride_x = gridDim.x * blockDim.x; while (tx < length) { int rpos = tx * row_stride, cpos = tx * col_stride; IdType v = -1; const IdType r = row[rpos], c = col[cpos]; for (IdType i = indptr[r]; i < indptr[r + 1]; ++i) { if (indices[i] == c) { v = data ? data[i] : i; break; } } if (v == -1) { out[tx] = filler; } else { // The casts here are to be able to handle DType being __half. // GCC treats int64_t as a distinct type from long long, so // without the explcit cast to long long, it errors out saying // that the implicit cast results in an ambiguous choice of // constructor for __half. // The using statement is to avoid a linter error about using // long or long long. using LongLong = long long; // NOLINT out[tx] = weights ? weights[v] : DType(LongLong(v)); } tx += stride_x; } } #if BF16_ENABLED /** * @brief Specialization for bf16 because conversion from long long to bfloat16 * doesn't exist before SM80. */ template __global__ void _LinearSearchKernel( const IdType* indptr, const IdType* indices, const IdType* data, const IdType* row, const IdType* col, int64_t row_stride, int64_t col_stride, int64_t length, const __nv_bfloat16* weights, __nv_bfloat16 filler, __nv_bfloat16* out) { int tx = blockIdx.x * blockDim.x + threadIdx.x; const int stride_x = gridDim.x * blockDim.x; while (tx < length) { int rpos = tx * row_stride, cpos = tx * col_stride; IdType v = -1; const IdType r = row[rpos], c = col[cpos]; for (IdType i = indptr[r]; i < indptr[r + 1]; ++i) { if (indices[i] == c) { v = data ? data[i] : i; break; } } if (v == -1) { out[tx] = filler; } else { // If the result is saved in bf16, it should be fine to convert it to // float first out[tx] = weights ? weights[v] : __nv_bfloat16(static_cast(v)); } tx += stride_x; } } #endif // BF16_ENABLED template inline DType GetCUDAScalar( runtime::DeviceAPI* device_api, DGLContext ctx, const DType* cuda_ptr) { DType result; device_api->CopyDataFromTo( cuda_ptr, 0, &result, 0, sizeof(result), ctx, DGLContext{kDGLCPU, 0}, DGLDataTypeTraits::dtype); return result; } /** * @brief Given a sorted array and a value this function returns the index * of the first element which compares greater than value. * * This function assumes 0-based index * @param A: ascending sorted array * @param n: size of the A * @param x: value to search in A * @return index, i, of the first element st. A[i]>x. If x>=A[n-1] returns n. * if x __device__ IdType _UpperBound(const IdType* A, int64_t n, IdType x) { IdType l = 0, r = n, m = 0; while (l < r) { m = l + (r - l) / 2; if (x >= A[m]) { l = m + 1; } else { r = m; } } return l; } /** * @brief Given a sorted array and a value this function returns the index * of the element who is equal to val. If not exist returns n+1 * * This function assumes 0-based index * @param A: ascending sorted array * @param n: size of the A * @param x: value to search in A * @return index, i, st. A[i]==x. If such an index not exists returns 'n'. */ template __device__ IdType _BinarySearch(const IdType* A, int64_t n, IdType x) { IdType l = 0, r = n - 1, m = 0; while (l <= r) { m = l + (r - l) / 2; if (A[m] == x) { return m; } if (A[m] < x) { l = m + 1; } else { r = m - 1; } } return n; // not found } template void MaskSelect( runtime::DeviceAPI* device, const DGLContext& ctx, const DType* input, const BoolType* mask, DType* output, int64_t n, int64_t* rst, cudaStream_t stream) { size_t workspace_size = 0; CUDA_CALL(cub::DeviceSelect::Flagged( nullptr, workspace_size, input, mask, output, rst, n, stream)); void* workspace = device->AllocWorkspace(ctx, workspace_size); CUDA_CALL(cub::DeviceSelect::Flagged( workspace, workspace_size, input, mask, output, rst, n, stream)); device->FreeWorkspace(ctx, workspace); } inline void* GetDevicePointer(runtime::NDArray array) { void* ptr = array->data; if (array.IsPinned()) { CUDA_CALL(cudaHostGetDevicePointer(&ptr, ptr, 0)); } return ptr; } } // namespace cuda } // namespace dgl #endif // DGL_ARRAY_CUDA_UTILS_H_ ================================================ FILE: src/array/cuda/uvm/array_index_select_uvm.cu ================================================ /** * Copyright (c) 2019-2022 by Contributors * @file array/cuda/uvm/array_index_select_uvm.cu * @brief Array index select GPU implementation */ #include #include "../../../runtime/cuda/cuda_common.h" #include "../array_index_select.cuh" #include "../utils.h" #include "./array_index_select_uvm.cuh" namespace dgl { using runtime::NDArray; namespace aten { namespace impl { template NDArray IndexSelectCPUFromGPU(NDArray array, IdArray index) { cudaStream_t stream = runtime::getCurrentCUDAStream(); const int64_t arr_len = array->shape[0]; const int64_t len = index->shape[0]; int64_t num_feat = 1; std::vector shape{len}; CHECK(array.IsPinned()); const DType* array_data = static_cast(cuda::GetDevicePointer(array)); CHECK_EQ(index->ctx.device_type, kDGLCUDA); for (int d = 1; d < array->ndim; ++d) { num_feat *= array->shape[d]; shape.emplace_back(array->shape[d]); } NDArray ret = NDArray::Empty(shape, array->dtype, index->ctx); if (len == 0 || arr_len * num_feat == 0) return ret; DType* ret_data = static_cast(ret->data); auto res = Sort(index, cuda::_NumberOfBits(arr_len)); const IdType* idx_data = static_cast(res.first->data); const int64_t* perm_data = static_cast(res.second->data); if (num_feat == 1) { const int nt = cuda::FindNumThreads(len); const int nb = (len + nt - 1) / nt; CUDA_KERNEL_CALL( IndexSelectSingleKernel, nb, nt, 0, stream, array_data, idx_data, len, arr_len, ret_data, perm_data); } else { dim3 block(256, 1); while (static_cast(block.x) >= 2 * num_feat) { block.x /= 2; block.y *= 2; } const dim3 grid((len + block.y - 1) / block.y); if (num_feat * sizeof(DType) < 2 * CACHE_LINE_SIZE) { CUDA_KERNEL_CALL( IndexSelectMultiKernel, grid, block, 0, stream, array_data, num_feat, idx_data, len, arr_len, ret_data, perm_data); } else { CUDA_KERNEL_CALL( IndexSelectMultiKernelAligned, grid, block, 0, stream, array_data, num_feat, idx_data, len, arr_len, ret_data, perm_data); } } return ret; } // floating point types are treated as their equal width integer types template NDArray IndexSelectCPUFromGPU(NDArray, IdArray); template NDArray IndexSelectCPUFromGPU(NDArray, IdArray); template NDArray IndexSelectCPUFromGPU(NDArray, IdArray); template NDArray IndexSelectCPUFromGPU(NDArray, IdArray); template NDArray IndexSelectCPUFromGPU(NDArray, IdArray); template NDArray IndexSelectCPUFromGPU(NDArray, IdArray); template NDArray IndexSelectCPUFromGPU(NDArray, IdArray); template NDArray IndexSelectCPUFromGPU(NDArray, IdArray); template void IndexScatterGPUToCPU(NDArray dest, IdArray index, NDArray source) { cudaStream_t stream = runtime::getCurrentCUDAStream(); const DType* source_data = static_cast(source->data); const IdType* idx_data = static_cast(index->data); const int64_t arr_len = dest->shape[0]; const int64_t len = index->shape[0]; int64_t num_feat = 1; std::vector shape{len}; CHECK(dest.IsPinned()); DType* dest_data = static_cast(cuda::GetDevicePointer(dest)); CHECK_EQ(index->ctx.device_type, kDGLCUDA); CHECK_EQ(source->ctx.device_type, kDGLCUDA); for (int d = 1; d < source->ndim; ++d) { num_feat *= source->shape[d]; } if (len == 0) return; if (num_feat == 1) { const int nt = cuda::FindNumThreads(len); const int nb = (len + nt - 1) / nt; CUDA_KERNEL_CALL( IndexScatterSingleKernel, nb, nt, 0, stream, source_data, idx_data, len, arr_len, dest_data); } else { dim3 block(256, 1); while (static_cast(block.x) >= 2 * num_feat) { block.x /= 2; block.y *= 2; } const dim3 grid((len + block.y - 1) / block.y); CUDA_KERNEL_CALL( IndexScatterMultiKernel, grid, block, 0, stream, source_data, num_feat, idx_data, len, arr_len, dest_data); } } // floating point types are treated as their equal width integer types template void IndexScatterGPUToCPU(NDArray, IdArray, NDArray); template void IndexScatterGPUToCPU(NDArray, IdArray, NDArray); template void IndexScatterGPUToCPU(NDArray, IdArray, NDArray); template void IndexScatterGPUToCPU(NDArray, IdArray, NDArray); template void IndexScatterGPUToCPU(NDArray, IdArray, NDArray); template void IndexScatterGPUToCPU(NDArray, IdArray, NDArray); template void IndexScatterGPUToCPU(NDArray, IdArray, NDArray); template void IndexScatterGPUToCPU(NDArray, IdArray, NDArray); } // namespace impl } // namespace aten } // namespace dgl ================================================ FILE: src/array/cuda/uvm/array_index_select_uvm.cuh ================================================ /** * Copyright (c) 2021 by Contributors * @file array/cpu/array_index_select_uvm.cuh * @brief Array index select GPU kernel implementation */ #ifndef DGL_ARRAY_CUDA_UVM_ARRAY_INDEX_SELECT_UVM_CUH_ #define DGL_ARRAY_CUDA_UVM_ARRAY_INDEX_SELECT_UVM_CUH_ #define CACHE_LINE_SIZE 128 namespace dgl { namespace aten { namespace impl { /** * This is a cross-device access version of IndexSelectMultiKernel. * Since the memory access over PCIe is more sensitive to the * data access aligment (cacheline), we need a separate version here. */ template __global__ void IndexSelectMultiKernelAligned( const DType* const array, const int64_t num_feat, const IdType* const index, const int64_t length, const int64_t arr_len, DType* const out, const int64_t* perm = nullptr) { int64_t out_row_index = blockIdx.x * blockDim.y + threadIdx.y; const int64_t stride = blockDim.y * gridDim.x; while (out_row_index < length) { int64_t col = threadIdx.x; const int64_t in_row = index[out_row_index]; assert(in_row >= 0 && in_row < arr_len); const int64_t idx_offset = ((uint64_t)(&array[in_row * num_feat]) % CACHE_LINE_SIZE) / sizeof(DType); col = col - idx_offset; const auto out_row = perm ? perm[out_row_index] : out_row_index; while (col < num_feat) { if (col >= 0) out[out_row * num_feat + col] = array[in_row * num_feat + col]; col += blockDim.x; } out_row_index += stride; } } } // namespace impl } // namespace aten } // namespace dgl #endif // DGL_ARRAY_CUDA_UVM_ARRAY_INDEX_SELECT_UVM_CUH_ ================================================ FILE: src/array/filter.cc ================================================ /** * Copyright (c) 2021 by Contributors * @file array/filter.cc * @brief Object for selecting items in a set, or selecting items not in a set. */ #include "./filter.h" #include #include #include namespace dgl { namespace array { using namespace dgl::runtime; template FilterRef CreateSetFilter(IdArray set); DGL_REGISTER_GLOBAL("utils.filter._CAPI_DGLFilterCreateFromSet") .set_body([](DGLArgs args, DGLRetValue* rv) { IdArray array = args[0]; auto ctx = array->ctx; // TODO(nv-dlasalle): Implement CPU version. if (ctx.device_type == kDGLCUDA) { #ifdef DGL_USE_CUDA ATEN_ID_TYPE_SWITCH(array->dtype, IdType, { *rv = CreateSetFilter(array); }); #else LOG(FATAL) << "GPU support not compiled."; #endif } else { LOG(FATAL) << "CPU support not yet implemented."; } }); DGL_REGISTER_GLOBAL("utils.filter._CAPI_DGLFilterFindIncludedIndices") .set_body([](DGLArgs args, DGLRetValue* rv) { FilterRef filter = args[0]; IdArray array = args[1]; *rv = filter->find_included_indices(array); }); DGL_REGISTER_GLOBAL("utils.filter._CAPI_DGLFilterFindExcludedIndices") .set_body([](DGLArgs args, DGLRetValue* rv) { FilterRef filter = args[0]; IdArray array = args[1]; *rv = filter->find_excluded_indices(array); }); } // namespace array } // namespace dgl ================================================ FILE: src/array/filter.h ================================================ /** * Copyright (c) 2021 by Contributors * @file array/filter.h * @brief Object for selecting items in a set, or selecting items not in a set. */ #ifndef DGL_ARRAY_FILTER_H_ #define DGL_ARRAY_FILTER_H_ #include #include namespace dgl { namespace array { class Filter : public runtime::Object { public: static constexpr const char* _type_key = "array.Filter"; DGL_DECLARE_OBJECT_TYPE_INFO(Filter, Object); /** * @brief From the test set of items, get the index of those which are * included by this filter. * * @param test The set of items to check for. * * @return The indices of the items from `test` that are selected by * this filter. */ virtual IdArray find_included_indices(IdArray test) = 0; /** * @brief From the test set of items, get the indices of those which are * excluded by this filter. * * @param test The set of items to check for. * * @return The indices of the items from `test` that are not selected by this * filter. */ virtual IdArray find_excluded_indices(IdArray test) = 0; }; DGL_DEFINE_OBJECT_REF(FilterRef, Filter); } // namespace array } // namespace dgl #endif // DGL_ARRAY_FILTER_H_ ================================================ FILE: src/array/kernel.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file array/kernel.cc * @brief New kernels */ #include #include #include "../c_api_common.h" #include "./check.h" #include "kernel_decl.h" using namespace dgl::runtime; namespace dgl { namespace aten { namespace {} // namespace /** @brief Generalized Sparse Matrix-Matrix Multiplication. */ void SpMM( const std::string& op, const std::string& reduce, HeteroGraphPtr graph, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux) { // TODO(zihao): format tuning SparseFormat format = graph->SelectFormat(0, CSC_CODE); const auto& bcast = CalcBcastOff(op, ufeat, efeat); ATEN_XPU_SWITCH_CUDA(graph->Context().device_type, XPU, "SpMM", { ATEN_ID_TYPE_SWITCH(graph->DataType(), IdType, { ATEN_FLOAT_TYPE_SWITCH_16BITS(out->dtype, Dtype, XPU, "Feature data", { if (format == SparseFormat::kCSC) { SpMMCsr( op, reduce, bcast, graph->GetCSCMatrix(0), ufeat, efeat, out, out_aux); } else if (format == SparseFormat::kCOO) { SpMMCoo( op, reduce, bcast, graph->GetCOOMatrix(0), ufeat, efeat, out, out_aux); } else { LOG(FATAL) << "SpMM only supports CSC and COO formats"; } }); }); }); } /** @brief Generalized segmented dense Matrix-Matrix Multiplication. */ void SegmentMM( const NDArray A, const NDArray B, NDArray C, const NDArray seglen_A, bool A_trans, bool B_trans) { CHECK_EQ(A->ndim, 2) << "segment_mm expects a 2D tensor for the first input."; CHECK_EQ(B->ndim, 3) << "segment_mm expects a 3D tensor for the second input."; CHECK(!A_trans); if (B_trans) { CHECK_EQ(A->shape[1], B->shape[2]) << "segment_mm expects A.shape[1] == B.shape[2] when B_trans=True"; } else { CHECK_EQ(A->shape[1], B->shape[1]) << "segment_mm expects A.shape[1] == B.shape[1]"; } CHECK_EQ(B->shape[0], seglen_A.NumElements()) << "segment_mm expects len(seglen_A) == B.shape[0]"; CHECK_EQ(seglen_A->ctx.device_type, kDGLCPU) << "segment_mm expects seglen_A to be on CPU."; CHECK(A->ctx == B->ctx) << "segment_mm expects A and B to be of the same device"; ATEN_XPU_SWITCH_CUDA(A->ctx.device_type, XPU, "SegmentMM", { ATEN_ID_TYPE_SWITCH(seglen_A->dtype, IdType, { ATEN_FLOAT_TYPE_SWITCH_16BITS(A->dtype, Dtype, XPU, "Feature data", { SegmentMM(A, B, C, seglen_A, A_trans, B_trans); }); }); }); } void SegmentMMBackwardB( const NDArray A, const NDArray dC, NDArray dB, const NDArray seglen) { CHECK_EQ(A->ndim, 2) << "segment_mm_backward operator expects a 2D tensor " "for the first input."; CHECK_EQ(dC->ndim, 2) << "segment_mm_backward operator expects a 2D tensor " "for the second input."; CHECK_EQ(seglen->ctx.device_type, kDGLCPU) << "segment_mm expects seglen to be on CPU."; ATEN_XPU_SWITCH_CUDA(A->ctx.device_type, XPU, "SegmentMMBackwardB", { ATEN_ID_TYPE_SWITCH(seglen->dtype, IdType, { ATEN_FLOAT_TYPE_SWITCH_16BITS(A->dtype, Dtype, XPU, "Feature data", { SegmentMMBackwardB(A, dC, dB, seglen); }); }); }); } /** @brief Generalized Dense Matrix-Matrix Multiplication according to relation * types. */ void GatherMM( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b) { CHECK_EQ(A->ndim, 2) << "gather_mm operator expects a 2D tensor for the first input."; CHECK_EQ(B->ndim, 3) << "gather_mm operator expects a 3D tensor for the second input."; CHECK(A->ctx == B->ctx) << "gather_mm expects all arguments to be on the same device."; if (aten::IsNullArray(idx_a)) { CHECK_EQ(A->shape[0], idx_b->shape[0]) << "gather_mm expects len(idx_b) == A.shape[0] when idx_a is None."; CHECK(A->ctx == idx_b->ctx) << "gather_mm expects all arguments to be on the same device."; } else if (aten::IsNullArray(idx_b)) { CHECK_EQ(B->shape[0], idx_a->shape[0]) << "gather_mm expects len(idx_a) == B.shape[0] when idx_b is None."; CHECK(A->ctx == idx_a->ctx) << "gather_mm expects all arguments to be on the same device."; } else { CHECK_EQ(idx_a->shape[0], idx_b->shape[0]) << "gather_mm expects len(idx_a) == len(idx_b) when both idx_a and " "idx_b are given."; CHECK(A->ctx == idx_a->ctx && A->ctx == idx_b->ctx) << "gather_mm expects all arguments to be on the same device."; } const auto idtype = aten::IsNullArray(idx_a) ? idx_b->dtype : idx_a->dtype; ATEN_XPU_SWITCH_CUDA(A->ctx.device_type, XPU, "GatherMM", { ATEN_ID_TYPE_SWITCH(idtype, IdType, { ATEN_FLOAT_TYPE_SWITCH_16BITS(A->dtype, Dtype, XPU, "Feature data", { GatherMM(A, B, C, idx_a, idx_b); }); }); }); } /** @brief Generalized Dense Matrix-Matrix Multiplication according to relation * types. */ void GatherMMScatter( const NDArray A, const NDArray B, NDArray C, const NDArray idx_a, const NDArray idx_b, const NDArray idx_c) { CHECK_EQ(A->ndim, 2) << "gather_mm_scatter expects a 2D tensor for the first input."; CHECK(A->ctx == B->ctx) << "gather_mm_scatter expects all arguments to be on the same device."; if (!aten::IsNullArray(idx_c)) CHECK(A->ctx == idx_c->ctx) << "gather_mm_scatter expects all arguments to be on the same device."; if (aten::IsNullArray(idx_a) && !aten::IsNullArray(idx_b)) { CHECK_EQ(A->shape[0], idx_b->shape[0]) << "gather_mm_scatter expects len(idx_b) == A.shape[0] when idx_a is " "None."; CHECK(A->ctx == idx_b->ctx) << "gather_mm_scatter expects all arguments to be on the same device."; } else if (aten::IsNullArray(idx_b) && !aten::IsNullArray(idx_a)) { CHECK_EQ(B->shape[0], idx_a->shape[0]) << "gather_mm_scatter expects len(idx_a) == B.shape[0] when idx_b is " "None."; CHECK(A->ctx == idx_a->ctx) << "gather_mm_scatter expects all arguments to be on the same device."; } else if (!aten::IsNullArray(idx_b) && !aten::IsNullArray(idx_a)) { CHECK_EQ(idx_a->shape[0], idx_b->shape[0]) << "gather_mm_scatter expects len(idx_a) == len(idx_b) " << "when both idx_a and idx_b are given."; CHECK(A->ctx == idx_a->ctx && A->ctx == idx_b->ctx) << "gather_mm_scatter expects all arguments to be on the same device."; } ATEN_XPU_SWITCH_CUDA(A->ctx.device_type, XPU, "GatherMM", { ATEN_ID_TYPE_SWITCH(idx_c->dtype, IdType, { ATEN_FLOAT_TYPE_SWITCH_16BITS(A->dtype, Dtype, XPU, "Feature data", { GatherMMScatter(A, B, C, idx_a, idx_b, idx_c); }); }); }); } /** @brief Generalized Sparse Matrix-Matrix Multiplication with hetero-graph * support. */ void SpMMHetero( const std::string& op, const std::string& reduce, HeteroGraphPtr graph, const std::vector& ufeat_vec, const std::vector& efeat_vec, std::vector* out, std::vector>* out_aux) { SparseFormat format = graph->SelectFormat(0, CSC_CODE); std::vector vec_graph; std::vector ufeat_eid; std::vector efeat_eid; std::vector out_eid; auto pair = graph->meta_graph()->FindEdge(0); // first etype NDArray ufeat_etype0 = (ufeat_vec.size() == 0) ? NullArray() : ufeat_vec[pair.first]; NDArray efeat_etype0 = (efeat_vec.size() == 0) ? NullArray() : efeat_vec[0]; for (dgl_type_t etype = 0; etype < graph->NumEdgeTypes(); ++etype) { vec_graph.push_back(graph->GetCSCMatrix(etype)); auto pair = graph->meta_graph()->FindEdge(etype); ufeat_eid.push_back(pair.first); efeat_eid.push_back(etype); out_eid.push_back(pair.second); if (ufeat_etype0->shape[1] != ufeat_vec[pair.first]->shape[1]) LOG(FATAL) << "Column width of the input node features of all etypes " "must be same."; if (efeat_etype0->shape[1] != efeat_vec[etype]->shape[1]) LOG(FATAL) << "Column width of the input edge features of all etypes " "must be same."; } const auto& bcast = CalcBcastOff(op, ufeat_etype0, efeat_etype0); ATEN_XPU_SWITCH_CUDA(graph->Context().device_type, XPU, "SpMM", { ATEN_ID_TYPE_SWITCH( graph->DataType(), IdType, { ATEN_FLOAT_TYPE_SWITCH_16BITS( (*out)[out_eid[0]]->dtype, Dtype, XPU, "Feature data", { if (format == SparseFormat::kCSC) { SpMMCsrHetero( op, reduce, bcast, vec_graph, ufeat_vec, efeat_vec, out, out_aux, ufeat_eid, out_eid); } else { // TODO(Israt): Add support for COO format LOG(FATAL) << "SpMM only supports CSC format for graphs with number " << "of relation types > 1"; } }); }); }); } /** @brief Generalized Sampled Dense-Dense Matrix Multiplication. */ void SDDMM( const std::string& op, HeteroGraphPtr graph, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target) { // TODO(zihao): format tuning SparseFormat format = graph->SelectFormat(0, COO_CODE); const auto& bcast = CalcBcastOff(op, lhs, rhs); ATEN_XPU_SWITCH_CUDA(graph->Context().device_type, XPU, "SDDMM", { ATEN_ID_TYPE_SWITCH(graph->DataType(), IdType, { ATEN_FLOAT_TYPE_SWITCH_16BITS(out->dtype, Dtype, XPU, "Feature data", { if (format == SparseFormat::kCSR) { SDDMMCsr( op, bcast, graph->GetCSRMatrix(0), lhs, rhs, out, lhs_target, rhs_target); } else if (format == SparseFormat::kCOO) { SDDMMCoo( op, bcast, graph->GetCOOMatrix(0), lhs, rhs, out, lhs_target, rhs_target); } else { LOG(FATAL) << "SDDMM only supports CSR and COO formats"; } }); }); }); } /** * @brief Find the src/dst/etype id based on the target 'u', 'v' or 'e'. * * @param graph The input graph. * @param target 'u', 'v' or 'e'. The target of the lhs or rhs data of an etype. * @param etype Relation type of the input graph. */ int get_typeid_by_target(HeteroGraphPtr graph, int target, dgl_type_t etype) { auto pair = graph->meta_graph()->FindEdge(etype); if (target == 0) return pair.first; if (target == 2) return pair.second; return etype; } /** @brief Generalized Sampled Dense-Dense Matrix Multiplication. */ void SDDMMHetero( const std::string& op, HeteroGraphPtr graph, std::vector lhs, std::vector rhs, std::vector out, int lhs_target, int rhs_target) { SparseFormat format = graph->SelectFormat(0, COO_CODE); std::vector lhs_eid; std::vector rhs_eid; for (dgl_type_t etype = 0; etype < graph->NumEdgeTypes(); ++etype) { lhs_eid.push_back(get_typeid_by_target(graph, lhs_target, etype)); rhs_eid.push_back(get_typeid_by_target(graph, rhs_target, etype)); } const auto& bcast = CalcBcastOff(op, lhs[lhs_eid[0]], rhs[rhs_eid[0]]); ATEN_XPU_SWITCH_CUDA(graph->Context().device_type, XPU, "SDDMM", { ATEN_ID_TYPE_SWITCH(graph->DataType(), IdType, { ATEN_FLOAT_TYPE_SWITCH_16BITS( out[rhs_eid[0]]->dtype, Dtype, XPU, "Feature data", { if (format == SparseFormat::kCSR) { std::vector vec_csr; for (dgl_type_t etype = 0; etype < graph->NumEdgeTypes(); ++etype) { vec_csr.push_back(graph->GetCSRMatrix(etype)); } SDDMMCsrHetero( op, bcast, vec_csr, lhs, rhs, out, lhs_target, rhs_target, lhs_eid, rhs_eid); } else if (format == SparseFormat::kCOO) { std::vector vec_coo; for (dgl_type_t etype = 0; etype < graph->NumEdgeTypes(); ++etype) { vec_coo.push_back(graph->GetCOOMatrix(etype)); } SDDMMCooHetero( op, bcast, vec_coo, lhs, rhs, out, lhs_target, rhs_target, lhs_eid, rhs_eid); } else { LOG(FATAL) << "SDDMM only supports CSR and COO formats"; } }); }); }); } /** @brief Generalized Edge_softmax op for forward */ void Edge_softmax_forward( const std::string& op, HeteroGraphPtr graph, NDArray ufeat, NDArray efeat, NDArray out) { // TODO(zhejiang): add gpu op for edge_softmax const auto& bcast = CalcBcastOff(op, ufeat, efeat); ATEN_XPU_SWITCH(graph->Context().device_type, XPU, "edge_softmax", { ATEN_ID_TYPE_SWITCH(graph->DataType(), IdType, { ATEN_FLOAT_TYPE_SWITCH_16BITS( out->dtype, Dtype, XPU, "edge_softmax out data", { Edge_softmax_csr_forward( op, bcast, graph->GetCSCMatrix(0), ufeat, efeat, out); }); }); }); } /** @brief Generalized Edge_softmax op for backward */ void Edge_softmax_backward( const std::string& op, HeteroGraphPtr graph, NDArray out, NDArray sds, NDArray back_out, NDArray ufeat) { // TODO(zhejiang): add gpu op for edge_softmax const auto& bcast = CalcBcastOff(op, ufeat, sds); ATEN_XPU_SWITCH(graph->Context().device_type, XPU, "edge_softmax_back", { ATEN_ID_TYPE_SWITCH(graph->DataType(), IdType, { ATEN_FLOAT_TYPE_SWITCH_16BITS( out->dtype, Dtype, XPU, "edge_softmax out data_back", { Edge_softmax_csr_backward( op, bcast, graph->GetCSCMatrix(0), out, sds, back_out); }); }); }); } NDArray GetEdgeMapping(HeteroGraphRef graph) { SparseFormat format = graph->SelectFormat(0, CSC_CODE); if (format == SparseFormat::kCSC) { return graph.sptr()->GetCSCMatrix(0).data; } else { return NullArray(); } } /** @brief Segment reduce dispatch function. */ void SegmentReduceDispatch( const std::string& op, NDArray feat, NDArray offsets, NDArray out, NDArray arg) { ATEN_XPU_SWITCH_CUDA(feat->ctx.device_type, XPU, "SegmentReduce", { ATEN_ID_TYPE_SWITCH(offsets->dtype, IdType, { ATEN_FLOAT_TYPE_SWITCH_16BITS(feat->dtype, Dtype, XPU, "Feature data", { SegmentReduce(op, feat, offsets, out, arg); }); }); }); } /** @brief Scatter Add (on first dimension) dispatch function. */ void ScatterAddDispatch(NDArray feat, NDArray idx, NDArray out) { ATEN_XPU_SWITCH_CUDA(feat->ctx.device_type, XPU, "ScatterAdd", { ATEN_ID_TYPE_SWITCH(idx->dtype, IdType, { ATEN_FLOAT_TYPE_SWITCH_16BITS(feat->dtype, Dtype, XPU, "Feature data", { ScatterAdd(feat, idx, out); }); }); }); } /** @brief Update gradients (reduce op max/min) dispatch function on * heterogeneous graph. */ void UpdateGradMinMaxDispatchHetero( const HeteroGraphPtr& graph, const std::string& op, const std::vector& feat, const std::vector& idx, const std::vector& idx_etype, std::vector* out) { auto pair = graph->meta_graph()->FindEdge(0); // checking the first etype auto src_id = pair.first; ATEN_XPU_SWITCH_CUDA(feat[src_id]->ctx.device_type, XPU, "ScatterAdd", { ATEN_ID_TYPE_SWITCH(idx[src_id]->dtype, IdType, { ATEN_FLOAT_TYPE_SWITCH_16BITS( feat[src_id]->dtype, Dtype, XPU, "Feature data", { UpdateGradMinMax_hetero( graph, op, feat, idx, idx_etype, out); }); }); }); } /** @brief Backward segment cmp dispatch function.*/ void BackwardSegmentCmpDispatch(NDArray feat, NDArray arg, NDArray out) { ATEN_XPU_SWITCH_CUDA(feat->ctx.device_type, XPU, "BackwardSegmentCmp", { ATEN_ID_TYPE_SWITCH(arg->dtype, IdType, { ATEN_FLOAT_TYPE_SWITCH_16BITS(feat->dtype, Dtype, XPU, "Feature data", { BackwardSegmentCmp(feat, arg, out); }); }); }); } std::pair CSRMM( CSRMatrix A, NDArray A_weights, CSRMatrix B, NDArray B_weights) { CHECK_EQ(A.num_cols, B.num_rows) << "The number of nodes of destination node type of the first graph must " "be the " "same as the number of nodes of source node type of the second graph."; CheckCtx( A.indptr->ctx, {A_weights, B_weights}, {"A's edge weights", "B's edge weights"}); CHECK_EQ(A.indptr->ctx, B.indptr->ctx) << "Device of two graphs must match."; CHECK_EQ(A.indptr->dtype, B.indptr->dtype) << "ID types of two graphs must match."; CHECK_EQ(A_weights->dtype, B_weights->dtype) << "Data types of two edge weights must match."; std::pair ret; ATEN_XPU_SWITCH_CUDA(A.indptr->ctx.device_type, XPU, "CSRMM", { ATEN_ID_TYPE_SWITCH(A.indptr->dtype, IdType, { ATEN_FLOAT_TYPE_SWITCH(A_weights->dtype, DType, "Edge weights", { ret = CSRMM(A, A_weights, B, B_weights); }); }); }); return ret; } std::pair CSRSum( const std::vector& A, const std::vector& A_weights) { CHECK(A.size() > 0) << "The list of graphs must not be empty."; CHECK_EQ(A.size(), A_weights.size()) << "The list of edge weights must have the same length as the list of " "graphs."; const auto ctx = A[0].indptr->ctx; const auto idtype = A[0].indptr->dtype; const auto dtype = A_weights[0]->dtype; const auto num_rows = A[0].num_rows; const auto num_cols = A[0].num_cols; for (size_t i = 0; i < A.size(); ++i) { CHECK_EQ(A[i].indptr->ctx, ctx) << "The devices of all graphs must be equal."; CHECK_EQ(A[i].indptr->dtype, idtype) << "The ID types of all graphs must be equal."; CHECK_EQ(A[i].indices->shape[0], A_weights[i]->shape[0]) << "Shape of edge weights does not match the number of edges."; CHECK_EQ(A_weights[i]->ctx, ctx) << "The devices of edge weights must be " "the same as that of the graphs."; CHECK_EQ(A_weights[i]->dtype, dtype) << "The data types of all edge weights must be equal."; CHECK_EQ(A[i].num_rows, num_rows) << "Graphs must have the same number of nodes."; CHECK_EQ(A[i].num_cols, num_cols) << "Graphs must have the same number of nodes."; } std::pair ret; ATEN_XPU_SWITCH_CUDA(ctx.device_type, XPU, "CSRSum", { ATEN_ID_TYPE_SWITCH(idtype, IdType, { ATEN_FLOAT_TYPE_SWITCH(dtype, DType, "Edge weights", { ret = CSRSum(A, A_weights); }); }); }); return ret; } DGL_REGISTER_GLOBAL("sparse._CAPI_DGLKernelSpMM") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef graph = args[0]; const std::string op = args[1]; const std::string reduce_op = args[2]; NDArray U = args[3]; NDArray E = args[4]; NDArray V = args[5]; NDArray ArgU = args[6]; NDArray ArgE = args[7]; CheckCtx( graph->Context(), {U, E, V, ArgU, ArgE}, {"U_data", "E_data", "out", "Arg_U", "Arg_E"}); CheckContiguous( {U, E, V, ArgU, ArgE}, {"U_data", "E_data", "out", "Arg_U", "Arg_E"}); CHECK_EQ(graph->NumEdgeTypes(), 1); auto pair = graph->meta_graph()->FindEdge(0); // only one etype in the graph. const dgl_type_t src_vtype = pair.first; const dgl_type_t dst_vtype = pair.second; CheckShape( {graph->NumVertices(src_vtype), graph->NumEdges(0), graph->NumVertices(dst_vtype)}, {0, 1, 2, 2, 2}, {U, E, V, ArgU, ArgE}, {"U_data", "E_data", "out", "Arg_U", "Arg_E"}); SpMM(op, reduce_op, graph.sptr(), U, E, V, {ArgU, ArgE}); }); DGL_REGISTER_GLOBAL("sparse._CAPI_DGLKernelGATHERMM") .set_body([](DGLArgs args, DGLRetValue* rv) { NDArray A = args[0]; NDArray B = args[1]; NDArray C = args[2]; NDArray idx_a = args[3]; NDArray idx_b = args[4]; GatherMM(A, B, C, idx_a, idx_b); }); DGL_REGISTER_GLOBAL("sparse._CAPI_DGLKernelGATHERMMSCATTER") .set_body([](DGLArgs args, DGLRetValue* rv) { NDArray A = args[0]; NDArray B = args[1]; NDArray C = args[2]; NDArray idx_a = args[3]; NDArray idx_b = args[4]; NDArray idx_c = args[5]; GatherMMScatter(A, B, C, idx_a, idx_b, idx_c); }); DGL_REGISTER_GLOBAL("sparse._CAPI_DGLKernelSEGMENTMM") .set_body([](DGLArgs args, DGLRetValue* rv) { NDArray A = args[0]; NDArray B = args[1]; NDArray C = args[2]; NDArray seglen_A = args[3]; bool A_trans = args[4]; bool B_trans = args[5]; SegmentMM(A, B, C, seglen_A, A_trans, B_trans); }); DGL_REGISTER_GLOBAL("sparse._CAPI_DGLKernelSEGMENTMMBackwardB") .set_body([](DGLArgs args, DGLRetValue* rv) { NDArray A = args[0]; NDArray dC = args[1]; NDArray dB = args[2]; NDArray seglen = args[3]; SegmentMMBackwardB(A, dC, dB, seglen); }); DGL_REGISTER_GLOBAL("sparse._CAPI_DGLKernelEdge_softmax_forward") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef graph = args[0]; const std::string op = args[1]; NDArray U = args[2]; NDArray E = args[3]; NDArray V = args[4]; Edge_softmax_forward(op, graph.sptr(), U, E, V); }); DGL_REGISTER_GLOBAL("sparse._CAPI_DGLKernelEdge_softmax_backward") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef graph = args[0]; const std::string op = args[1]; NDArray out = args[2]; NDArray sds = args[3]; NDArray back_out = args[4]; NDArray ufeat = args[5]; Edge_softmax_backward(op, graph.sptr(), out, sds, back_out, ufeat); }); DGL_REGISTER_GLOBAL("sparse._CAPI_DGLKernelSpMMHetero") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef graph = args[0]; const std::string op = args[1]; const std::string reduce_op = args[2]; List list_U = args[3]; List list_E = args[4]; List list_V = args[5]; List list_ArgU = args[6]; List list_ArgE = args[7]; List list_ArgU_ntype = args[8]; List list_ArgE_etype = args[9]; std::vector> Arg_vec; // ArgU + ArgE for (int i = 0; i < 4; ++i) { // ArgU + ArgE + ArgU_ntype + ArgE_etype Arg_vec.push_back(std::vector()); } std::vector U_vec = ListValueToVector(list_U); std::vector V_vec = ListValueToVector(list_V); std::vector E_vec = ListValueToVector(list_E); Arg_vec[0] = ListValueToVector(list_ArgU); Arg_vec[1] = ListValueToVector(list_ArgE); Arg_vec[2] = ListValueToVector(list_ArgU_ntype); Arg_vec[3] = ListValueToVector(list_ArgE_etype); for (dgl_type_t etype = 0; etype < graph->NumEdgeTypes(); ++etype) { auto pair = graph->meta_graph()->FindEdge(etype); const dgl_id_t src_id = pair.first; const dgl_id_t dst_id = pair.second; NDArray U = (U_vec.size() == 0) ? NullArray() : U_vec[src_id]; NDArray E = (E_vec.size() == 0) ? NullArray() : E_vec[etype]; CheckCtx( graph->Context(), {U, E, V_vec[dst_id], Arg_vec[0][dst_id], Arg_vec[1][dst_id]}, {"U_data", "E_data", "out", "Arg_U", "Arg_E"}); CheckContiguous( {U, E, V_vec[dst_id], Arg_vec[0][dst_id], Arg_vec[1][dst_id]}, {"U_data", "E_data", "out", "Arg_U", "Arg_E"}); } SpMMHetero(op, reduce_op, graph.sptr(), U_vec, E_vec, &V_vec, &Arg_vec); }); DGL_REGISTER_GLOBAL("sparse._CAPI_DGLKernelSDDMM") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef graph = args[0]; const std::string op = args[1]; NDArray lhs = args[2]; NDArray rhs = args[3]; NDArray out = args[4]; int lhs_target = args[5]; int rhs_target = args[6]; CheckCtx(graph->Context(), {lhs, rhs, out}, {"lhs", "rhs", "out"}); CheckContiguous({lhs, rhs, out}, {"lhs", "rhs", "out"}); CHECK_EQ(graph->NumEdgeTypes(), 1); auto pair = graph->meta_graph()->FindEdge(0); // only one etype in the graph. const dgl_type_t src_vtype = pair.first; const dgl_type_t dst_vtype = pair.second; CheckShape( {graph->NumVertices(src_vtype), graph->NumEdges(0), graph->NumVertices(dst_vtype)}, {lhs_target, rhs_target, 1}, {lhs, rhs, out}, {"U_data", "E_data", "V_data"}); SDDMM(op, graph.sptr(), lhs, rhs, out, lhs_target, rhs_target); }); DGL_REGISTER_GLOBAL("sparse._CAPI_DGLKernelSDDMMHetero") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef graph = args[0]; const std::string op = args[1]; List list_lhs = args[2]; List list_rhs = args[3]; List list_out = args[4]; int lhs_target = args[5]; int rhs_target = args[6]; std::vector vec_lhs; std::vector vec_rhs; std::vector vec_out; vec_lhs.reserve(list_lhs.size()); vec_rhs.reserve(list_rhs.size()); vec_out.reserve(list_out.size()); for (Value val : list_lhs) { vec_lhs.push_back(val->data); } for (Value val : list_rhs) { vec_rhs.push_back(val->data); } for (Value val : list_out) { vec_out.push_back(val->data); } SDDMMHetero( op, graph.sptr(), vec_lhs, vec_rhs, vec_out, lhs_target, rhs_target); }); DGL_REGISTER_GLOBAL("sparse._CAPI_DGLKernelSegmentReduce") .set_body([](DGLArgs args, DGLRetValue* rv) { const std::string op = args[0]; NDArray feat = args[1]; NDArray offsets = args[2]; NDArray out = args[3]; NDArray arg = args[4]; CheckCtx(feat->ctx, {feat, offsets, out}, {"feat", "offsets", "out"}); CheckContiguous({feat, offsets, out}, {"feat", "offsets", "out"}); SegmentReduceDispatch(op, feat, offsets, out, arg); }); DGL_REGISTER_GLOBAL("sparse._CAPI_DGLKernelScatterAdd") .set_body([](DGLArgs args, DGLRetValue* rv) { NDArray feat = args[0]; NDArray idx = args[1]; NDArray out = args[2]; CheckCtx(feat->ctx, {feat, idx, out}, {"feat", "idx", "out"}); CheckContiguous({feat, idx, out}, {"feat", "idx", "out"}); ScatterAddDispatch(feat, idx, out); }); DGL_REGISTER_GLOBAL("sparse._CAPI_DGLKernelUpdateGradMinMaxHetero") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef graph = args[0]; const std::string op = args[1]; List list_feat = args[2]; List list_idx = args[3]; List list_idx_etype = args[4]; List list_out = args[5]; std::vector vec_feat = ListValueToVector(list_feat); std::vector vec_idx = ListValueToVector(list_idx); std::vector vec_idx_etype = ListValueToVector(list_idx_etype); std::vector vec_out = ListValueToVector(list_out); // CheckCtx(feat->ctx, {feat, idx, out}, {"feat", "idx", "out"}); // CheckContiguous({feat, idx, out}, {"feat", "idx", "out"}); UpdateGradMinMaxDispatchHetero( graph.sptr(), op, vec_feat, vec_idx, vec_idx_etype, &vec_out); }); DGL_REGISTER_GLOBAL("sparse._CAPI_DGLKernelBwdSegmentCmp") .set_body([](DGLArgs args, DGLRetValue* rv) { NDArray feat = args[0]; NDArray arg = args[1]; NDArray out = args[2]; CheckCtx(feat->ctx, {feat, arg, out}, {"feat", "arg", "out"}); CheckContiguous({feat, arg, out}, {"feat", "arg", "out"}); BackwardSegmentCmpDispatch(feat, arg, out); }); DGL_REGISTER_GLOBAL("sparse._CAPI_DGLKernelGetEdgeMapping") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef graph = args[0]; *rv = GetEdgeMapping(graph); }); /** * @brief Sparse matrix multiplication with graph interface. * * @param A_ref The left operand. * @param A_weights The edge weights of graph A. * @param B_ref The right operand. * @param B_weights The edge weights of graph B. * @param num_vtypes The number of vertex types of the graph to be returned. * @return A pair consisting of the new graph as well as its edge weights. */ DGL_REGISTER_GLOBAL("sparse._CAPI_DGLCSRMM") .set_body([](DGLArgs args, DGLRetValue* rv) { const HeteroGraphRef A_ref = args[0]; NDArray A_weights = args[1]; const HeteroGraphRef B_ref = args[2]; NDArray B_weights = args[3]; int num_vtypes = args[4]; const HeteroGraphPtr A = A_ref.sptr(); const HeteroGraphPtr B = B_ref.sptr(); CHECK_EQ(A->NumEdgeTypes(), 1) << "The first graph must have only one edge type."; CHECK_EQ(B->NumEdgeTypes(), 1) << "The second graph must have only one edge type."; const auto A_csr = A->GetCSRMatrix(0); const auto B_csr = B->GetCSRMatrix(0); auto result = CSRMM(A_csr, A_weights, B_csr, B_weights); List ret; ret.push_back( HeteroGraphRef(CreateFromCSR(num_vtypes, result.first, ALL_CODE))); ret.push_back(Value(MakeValue(result.second))); *rv = ret; }); DGL_REGISTER_GLOBAL("sparse._CAPI_DGLCSRSum") .set_body([](DGLArgs args, DGLRetValue* rv) { List A_refs = args[0]; List A_weights = args[1]; std::vector weights = ListValueToVector(A_weights); std::vector mats; mats.reserve(A_refs.size()); int num_vtypes = 0; for (auto A_ref : A_refs) { const HeteroGraphPtr A = A_ref.sptr(); CHECK_EQ(A->NumEdgeTypes(), 1) << "Graphs must have only one edge type."; mats.push_back(A->GetCSRMatrix(0)); if (num_vtypes == 0) num_vtypes = A->NumVertexTypes(); } auto result = CSRSum(mats, weights); List ret; ret.push_back( HeteroGraphRef(CreateFromCSR(num_vtypes, result.first, ALL_CODE))); ret.push_back(Value(MakeValue(result.second))); *rv = ret; }); DGL_REGISTER_GLOBAL("sparse._CAPI_DGLCSRMask") .set_body([](DGLArgs args, DGLRetValue* rv) { const HeteroGraphRef A_ref = args[0]; NDArray A_weights = args[1]; const HeteroGraphRef B_ref = args[2]; const HeteroGraphPtr A = A_ref.sptr(); const HeteroGraphPtr B = B_ref.sptr(); CHECK_EQ(A->NumEdgeTypes(), 1) << "Both graphs must have only one edge type."; CHECK_EQ(B->NumEdgeTypes(), 1) << "Both graphs must have only one edge type."; const CSRMatrix& A_csr = A->GetCSRMatrix(0); const COOMatrix& B_coo = B->GetCOOMatrix(0); CHECK_EQ(A_csr.num_rows, B_coo.num_rows) << "Both graphs must have the same number of nodes."; CHECK_EQ(A_csr.num_cols, B_coo.num_cols) << "Both graphs must have the same number of nodes."; NDArray result; ATEN_FLOAT_TYPE_SWITCH(A_weights->dtype, DType, "Edge weights", { result = aten::CSRGetData(A_csr, B_coo.row, B_coo.col, A_weights, 0.); }); *rv = result; }); } // namespace aten } // namespace dgl ================================================ FILE: src/array/kernel_decl.h ================================================ /** * Copyright (c) 2020 by Contributors * @file array/kernel_decl.h * @brief Sparse matrix format-specific operator declarations. */ #ifndef DGL_ARRAY_KERNEL_DECL_H_ #define DGL_ARRAY_KERNEL_DECL_H_ #include #include #include #include #include #include namespace dgl { namespace aten { /** * @brief Generalized Sparse Matrix Dense Matrix Multiplication on Csr format. */ template void SpMMCsr( const std::string& op, const std::string& reduce, const BcastOff& bcast, const aten::CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); /** * @brief Generalized Sparse Matrix Dense Matrix Multiplication on Csr format * with heterograph support. */ template void SpMMCsrHetero( const std::string& op, const std::string& reduce, const BcastOff& bcast, const std::vector& csr, const std::vector& ufeat, const std::vector& efeat, std::vector* out, std::vector>* out_aux, const std::vector& ufeat_eid, const std::vector& out_eid); /** * @brief Generalized Sparse Matrix Dense Matrix Multiplication on Coo format. */ template void SpMMCoo( const std::string& op, const std::string& reduce, const BcastOff& bcast, const aten::COOMatrix& coo, NDArray ufeat, NDArray efeat, NDArray out, std::vector out_aux); /** * @brief Generalized Sampled Dense-Dense Matrix Multiplication on Csr format. */ template void SDDMMCsr( const std::string& op, const BcastOff& bcast, const aten::CSRMatrix& csr, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target); /** * @brief Generalized Sampled Dense-Dense Matrix Multiplication on Csr format * with heterograph support. */ template void SDDMMCsrHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_csr, const std::vector& vec_lhs, const std::vector& vec_rhs, std::vector vec_out, int lhs_target, int rhs_target, const std::vector& ufeat_eid, const std::vector& out_eid); /** * @brief Generalized Sampled Dense-Dense Matrix Multiplication on Coo format. */ template void SDDMMCoo( const std::string& op, const BcastOff& bcast, const aten::COOMatrix& coo, NDArray lhs, NDArray rhs, NDArray out, int lhs_target, int rhs_target); /** * @brief Generalized Sampled Dense-Dense Matrix Multiplication on Coo format * with heterograph support. */ template void SDDMMCooHetero( const std::string& op, const BcastOff& bcast, const std::vector& vec_coo, const std::vector& vec_lhs, const std::vector& vec_rhs, std::vector vec_out, int lhs_target, int rhs_target, const std::vector& lhs_eid, const std::vector& rhs_eid); /** * @brief Generalized Dense Matrix-Matrix Multiplication according to relation * types. */ template void GatherMM( const NDArray A, const NDArray B, NDArray out, const NDArray idx_a, const NDArray idx_b); /** * @brief Generalized Dense Matrix-Matrix Multiplication according to relation * types. */ template void GatherMMScatter( const NDArray A, const NDArray B, NDArray out, const NDArray idx_a, const NDArray idx_b, const NDArray idx_c); /** * @brief Generalized segmented dense Matrix-Matrix Multiplication. */ template void SegmentMM( const NDArray A, const NDArray B, NDArray out, const NDArray seglen_A, bool a_trans, bool b_trans); template void SegmentMMBackwardB( const NDArray A, const NDArray dC, NDArray dB, const NDArray seglen); /** * @brief Segment reduce. */ template void SegmentReduce( const std::string& op, NDArray feat, NDArray offsets, NDArray out, NDArray arg); /** * @brief Scatter Add on first dimension. */ template void ScatterAdd(NDArray feat, NDArray idx, NDArray out); /** * @brief Update gradients for reduce operator max and min on first dimension. */ template void UpdateGradMinMax_hetero( const HeteroGraphPtr& g, const std::string& op, const std::vector& feat, const std::vector& idx, const std::vector& idx_etype, std::vector* out); /** * @brief Backward function of segment cmp. */ template void BackwardSegmentCmp(NDArray feat, NDArray arg, NDArray out); /** * @brief Sparse-sparse matrix multiplication * * @param A The left operand. * @param A_weights The weights of matrix as a 1D tensor. * @param B The right operand. * @param B_weights The weights of matrix as a 1D tensor. * * @note GPU implementation will cast the indices to 32 bit. * @note The zero entries in the result are not removed. * @note The CSR matrix should not have duplicate entries. */ template std::pair CSRMM( const CSRMatrix& A, NDArray A_weights, const CSRMatrix& B, NDArray B_weights); /** * @brief Sparse-sparse matrix summation. * * @param A The sparse matrices with the same size. * @param A_weights The weights of each sparse matrix as a 1D tensor. * * @note GPU implementation will cast the indices to 32 bit. * @note The zero entries in the result are not removed. * @note The CSR matrix should not have duplicate entries. */ template std::pair CSRSum( const std::vector& A, const std::vector& A_weights); /** * @brief Edge_softmax_csr forward function on Csr format. */ template void Edge_softmax_csr_forward( const std::string& op, const BcastOff& bcast, const aten::CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out); /** * @brief Edge_softmax_csr backward function on Csr format. */ template void Edge_softmax_csr_backward( const std::string& op, const BcastOff& bcast, const aten::CSRMatrix& csr, NDArray ufeat, NDArray efeat, NDArray out); } // namespace aten } // namespace dgl #endif // DGL_ARRAY_KERNEL_DECL_H_ ================================================ FILE: src/array/libra_partition.cc ================================================ /** * Copyright (c) 2021 Intel Corporation * * @file distgnn/partition/main_Libra.py * @brief Libra - Vertex-cut based graph partitioner for distirbuted training * @author Vasimuddin Md , * Guixiang Ma * Sanchit Misra , * Ramanarayan Mohanty , * Sasikanth Avancha * Nesreen K. Ahmed */ #include #include #include #include #include #include #include #include "../c_api_common.h" #include "./check.h" #include "kernel_decl.h" using namespace dgl::runtime; namespace dgl { namespace aten { template int32_t Ver2partition(IdType in_val, int64_t *node_map, int32_t num_parts) { int32_t pos = 0; for (int32_t p = 0; p < num_parts; p++) { if (in_val < node_map[p]) return pos; pos = pos + 1; } LOG(FATAL) << "Error: Unexpected output in Ver2partition!"; return -1; } /** * @brief Identifies the lead loaded partition/community for a given edge * assignment. */ int32_t LeastLoad(int64_t *community_edges, int32_t nc) { std::vector loc; int32_t min = 1e9; for (int32_t i = 0; i < nc; i++) { if (community_edges[i] < min) { min = community_edges[i]; } } for (int32_t i = 0; i < nc; i++) { if (community_edges[i] == min) { loc.push_back(i); } } int32_t r = RandomEngine::ThreadLocal()->RandInt(loc.size()); CHECK(loc[r] < nc); return loc[r]; } /** * @brief Libra - vertexcut based graph partitioning. * It takes list of edges from input DGL graph and distributed them among nc * partitions During edge distribution, Libra assign a given edge to a partition * based on the end vertices, in doing so, it tries to minimized the splitting * of the graph vertices. In case of conflict Libra assigns an edge to the least * loaded partition/community. * @param[in] nc Number of partitions/communities * @param[in] node_degree per node degree * @param[in] edgenum_unassigned node degree * @param[out] community_weights weight of the created partitions * @param[in] u src nodes * @param[in] v dst nodes * @param[out] w weight per edge * @param[out] out partition assignment of the edges * @param[in] N_n number of nodes in the input graph * @param[in] N_e number of edges in the input graph * @param[in] prefix output/partition storage location */ template void LibraVertexCut( int32_t nc, NDArray node_degree, NDArray edgenum_unassigned, NDArray community_weights, NDArray u, NDArray v, NDArray w, NDArray out, int64_t N_n, int64_t N_e, const std::string &prefix) { int32_t *out_ptr = out.Ptr(); IdType2 *node_degree_ptr = node_degree.Ptr(); IdType2 *edgenum_unassigned_ptr = edgenum_unassigned.Ptr(); IdType *u_ptr = u.Ptr(); IdType *v_ptr = v.Ptr(); int64_t *w_ptr = w.Ptr(); int64_t *community_weights_ptr = community_weights.Ptr(); std::vector > node_assignments(N_n); std::vector replication_list; // local allocations int64_t *community_edges = new int64_t[nc](); int64_t *cache = new int64_t[nc](); int64_t meter = static_cast(N_e / 100); for (int64_t i = 0; i < N_e; i++) { IdType u = u_ptr[i]; // edge end vertex 1 IdType v = v_ptr[i]; // edge end vertex 2 int64_t w = w_ptr[i]; // edge weight CHECK(u < N_n); CHECK(v < N_n); if (i % meter == 0) { fprintf(stderr, "."); fflush(0); } if (node_assignments[u].size() == 0 && node_assignments[v].size() == 0) { int32_t c = LeastLoad(community_edges, nc); out_ptr[i] = c; CHECK_LT(c, nc); community_edges[c]++; community_weights_ptr[c] = community_weights_ptr[c] + w; node_assignments[u].push_back(c); if (u != v) node_assignments[v].push_back(c); CHECK(node_assignments[u].size() <= static_cast(nc)) << "[bug] 1. generated splits (u) are greater than nc!"; CHECK(node_assignments[v].size() <= static_cast(nc)) << "[bug] 1. generated splits (v) are greater than nc!"; edgenum_unassigned_ptr[u]--; edgenum_unassigned_ptr[v]--; } else if ( node_assignments[u].size() != 0 && node_assignments[v].size() == 0) { for (uint32_t j = 0; j < node_assignments[u].size(); j++) { int32_t cind = node_assignments[u][j]; cache[j] = community_edges[cind]; } int32_t cindex = LeastLoad(cache, node_assignments[u].size()); int32_t c = node_assignments[u][cindex]; out_ptr[i] = c; community_edges[c]++; community_weights_ptr[c] = community_weights_ptr[c] + w; node_assignments[v].push_back(c); CHECK(node_assignments[v].size() <= static_cast(nc)) << "[bug] 2. generated splits (v) are greater than nc!"; edgenum_unassigned_ptr[u]--; edgenum_unassigned_ptr[v]--; } else if ( node_assignments[v].size() != 0 && node_assignments[u].size() == 0) { for (uint32_t j = 0; j < node_assignments[v].size(); j++) { int32_t cind = node_assignments[v][j]; cache[j] = community_edges[cind]; } int32_t cindex = LeastLoad(cache, node_assignments[v].size()); int32_t c = node_assignments[v][cindex]; CHECK(c < nc) << "[bug] 2. partition greater than nc !!"; out_ptr[i] = c; community_edges[c]++; community_weights_ptr[c] = community_weights_ptr[c] + w; node_assignments[u].push_back(c); CHECK(node_assignments[u].size() <= static_cast(nc)) << "[bug] 3. generated splits (u) are greater than nc!"; edgenum_unassigned_ptr[u]--; edgenum_unassigned_ptr[v]--; } else { std::vector setv(nc), intersetv; for (int32_t j = 0; j < nc; j++) setv[j] = 0; int32_t interset = 0; CHECK(node_assignments[u].size() <= static_cast(nc)) << "[bug] 4. generated splits (u) are greater than nc!"; CHECK(node_assignments[v].size() <= static_cast(nc)) << "[bug] 4. generated splits (v) are greater than nc!"; for (size_t j = 0; j < node_assignments[v].size(); j++) { CHECK(node_assignments[v][j] < nc) << "[bug] 4. Part assigned (v) greater than nc!"; setv[node_assignments[v][j]]++; } for (size_t j = 0; j < node_assignments[u].size(); j++) { CHECK(node_assignments[u][j] < nc) << "[bug] 4. Part assigned (u) greater than nc!"; setv[node_assignments[u][j]]++; } for (int32_t j = 0; j < nc; j++) { CHECK(setv[j] <= 2) << "[bug] 4. unexpected computed value !!!"; if (setv[j] == 2) { interset++; intersetv.push_back(j); } } if (interset) { for (size_t j = 0; j < intersetv.size(); j++) { int32_t cind = intersetv[j]; cache[j] = community_edges[cind]; } int32_t cindex = LeastLoad(cache, intersetv.size()); int32_t c = intersetv[cindex]; CHECK(c < nc) << "[bug] 4. partition greater than nc !!"; out_ptr[i] = c; community_edges[c]++; community_weights_ptr[c] = community_weights_ptr[c] + w; edgenum_unassigned_ptr[u]--; edgenum_unassigned_ptr[v]--; } else { if (node_degree_ptr[u] < node_degree_ptr[v]) { for (uint32_t j = 0; j < node_assignments[u].size(); j++) { int32_t cind = node_assignments[u][j]; cache[j] = community_edges[cind]; } int32_t cindex = LeastLoad(cache, node_assignments[u].size()); int32_t c = node_assignments[u][cindex]; CHECK(c < nc) << "[bug] 5. partition greater than nc !!"; out_ptr[i] = c; community_edges[c]++; community_weights_ptr[c] = community_weights_ptr[c] + w; for (uint32_t j = 0; j < node_assignments[v].size(); j++) { CHECK(node_assignments[v][j] != c) << "[bug] 5. duplicate partition (v) assignment !!"; } node_assignments[v].push_back(c); CHECK(node_assignments[v].size() <= static_cast(nc)) << "[bug] 5. generated splits (v) greater than nc!!"; replication_list.push_back(v); edgenum_unassigned_ptr[u]--; edgenum_unassigned_ptr[v]--; } else { for (uint32_t j = 0; j < node_assignments[v].size(); j++) { int32_t cind = node_assignments[v][j]; cache[j] = community_edges[cind]; } int32_t cindex = LeastLoad(cache, node_assignments[v].size()); int32_t c = node_assignments[v][cindex]; CHECK(c < nc) << "[bug] 6. partition greater than nc !!"; out_ptr[i] = c; community_edges[c]++; community_weights_ptr[c] = community_weights_ptr[c] + w; for (uint32_t j = 0; j < node_assignments[u].size(); j++) { CHECK(node_assignments[u][j] != c) << "[bug] 6. duplicate partition (u) assignment !!"; } if (u != v) node_assignments[u].push_back(c); CHECK(node_assignments[u].size() <= static_cast(nc)) << "[bug] 6. generated splits (u) greater than nc!!"; replication_list.push_back(u); edgenum_unassigned_ptr[u]--; edgenum_unassigned_ptr[v]--; } } } } delete cache; for (int64_t c = 0; c < nc; c++) { std::string path = prefix + "/community" + std::to_string(c) + ".txt"; FILE *fp = fopen(path.c_str(), "w"); CHECK_NE(fp, static_cast(NULL)) << "Error: can not open file: " << path.c_str(); for (int64_t i = 0; i < N_e; i++) { if (out_ptr[i] == c) fprintf( fp, "%ld,%ld,%ld\n", static_cast(u_ptr[i]), static_cast(v_ptr[i]), w_ptr[i]); } fclose(fp); } std::string path = prefix + "/replicationlist.csv"; FILE *fp = fopen(path.c_str(), "w"); CHECK_NE(fp, static_cast(NULL)) << "Error: can not open file: " << path.c_str(); fprintf(fp, "## The Indices of Nodes that are replicated :: Header"); printf("\nTotal replication: %ld\n", replication_list.size()); for (uint64_t i = 0; i < replication_list.size(); i++) fprintf(fp, "%ld\n", static_cast(replication_list[i])); printf("Community weights:\n"); for (int64_t c = 0; c < nc; c++) printf("%ld ", community_weights_ptr[c]); printf("\n"); printf("Community edges:\n"); for (int64_t c = 0; c < nc; c++) printf("%ld ", community_edges[c]); printf("\n"); delete[] community_edges; fclose(fp); } DGL_REGISTER_GLOBAL("sparse._CAPI_DGLLibraVertexCut") .set_body([](DGLArgs args, DGLRetValue *rv) { int32_t nc = args[0]; NDArray node_degree = args[1]; NDArray edgenum_unassigned = args[2]; NDArray community_weights = args[3]; NDArray u = args[4]; NDArray v = args[5]; NDArray w = args[6]; NDArray out = args[7]; int64_t N = args[8]; int64_t N_e = args[9]; std::string prefix = args[10]; ATEN_ID_TYPE_SWITCH(node_degree->dtype, IdType2, { ATEN_ID_TYPE_SWITCH(u->dtype, IdType, { LibraVertexCut( nc, node_degree, edgenum_unassigned, community_weights, u, v, w, out, N, N_e, prefix); }); }); }); /** * @brief * 1. Builds dictionary (ldt) for assigning local node IDs to nodes in the * partitions * 2. Builds dictionary (gdt) for storing copies (local ID) of split nodes * These dictionaries will be used in the subsequesnt stages to setup * tracking of split nodes copies across the partition, setting up partition * `ndata` dictionaries. * @param[out] a local src node ID of an edge in a partition * @param[out] b local dst node ID of an edge in a partition * @param[-] indices temporary memory, keeps track of global node ID to local * node ID in a partition * @param[out] ldt_key per partition dict for storing global and local node IDs * (consecutive) * @param[out] gdt_key global dict for storing number of local nodes (or split * nodes) for a given global node ID * @param[out] gdt_value global dict, stores local node IDs (due to split) * across partitions for a given global node ID * @param[out] node_map keeps track of range of local node IDs (consecutive) * given to the nodes in the partitions * @param[in, out] offset start of the range of local node IDs for this * partition * @param[in] nc number of partitions/communities * @param[in] c current partition number * @param[in] fsize size of pre-allocated * memory tensor * @param[in] prefix input Libra partition file location */ List Libra2dglBuildDict( NDArray a, NDArray b, NDArray indices, NDArray ldt_key, NDArray gdt_key, NDArray gdt_value, NDArray node_map, NDArray offset, int32_t nc, int32_t c, int64_t fsize, const std::string &prefix) { int64_t *indices_ptr = indices.Ptr(); // 1D temp array int64_t *ldt_key_ptr = ldt_key.Ptr(); // 1D local nodes <-> global nodes int64_t *gdt_key_ptr = gdt_key.Ptr(); // 1D #split copies per node int64_t *gdt_value_ptr = gdt_value.Ptr(); // 2D tensor int64_t *node_map_ptr = node_map.Ptr(); // 1D tensor int64_t *offset_ptr = offset.Ptr(); // 1D tensor int32_t width = nc; int64_t *a_ptr = a.Ptr(); // stores local src and dst node ID, int64_t *b_ptr = b.Ptr(); // to create the partition graph int64_t N_n = indices->shape[0]; int64_t num_nodes = ldt_key->shape[0]; for (int64_t i = 0; i < N_n; i++) { indices_ptr[i] = -100; } int64_t pos = 0; int64_t edge = 0; std::string path = prefix + "/community" + std::to_string(c) + ".txt"; FILE *fp = fopen(path.c_str(), "r"); CHECK_NE(fp, static_cast(NULL)) << "Error: can not open file: " << path.c_str(); while (!feof(fp) && edge < fsize) { int64_t u, v; float w; CHECK_EQ( fscanf(fp, "%ld,%ld,%f\n", &u, &v, &w), 3); // reading an edge - the src and dst global node IDs if (indices_ptr[u] == -100) { // if already not assigned a local node ID, local node ID is ldt_key_ptr[pos] = u; // already assigned for this global node ID CHECK(pos < num_nodes); // Sanity check indices_ptr[u] = pos++; // consecutive local node ID for a given global node ID } if (indices_ptr[v] == -100) { // if already not assigned a local node ID ldt_key_ptr[pos] = v; CHECK(pos < num_nodes); // Sanity check indices_ptr[v] = pos++; } a_ptr[edge] = indices_ptr[u]; // new local ID for an edge b_ptr[edge++] = indices_ptr[v]; // new local ID for an edge } CHECK(edge <= fsize) << "[Bug] memory allocated for #edges per partition is not enough."; fclose(fp); List ret; ret.push_back(Value( MakeValue(pos))); // returns total number of nodes in this partition ret.push_back(Value( MakeValue(edge))); // returns total number of edges in this partition for (int64_t i = 0; i < pos; i++) { int64_t u = ldt_key_ptr[i]; // global node ID // int64_t v = indices_ptr[u]; int64_t v = i; // local node ID int64_t *ind = &gdt_key_ptr[u]; // global dict, total number of local node IDs (an // offset) as of now for a given global node ID int64_t *ptr = gdt_value_ptr + u * width; ptr[*ind] = offset_ptr[0] + v; // stores a local node ID for the global node ID (*ind)++; CHECK_NE(v, -100); CHECK(*ind <= nc); } node_map_ptr[c] = offset_ptr[0] + pos; // since local node IDs for a partition are consecutive, // we maintain the range of local node IDs like this offset_ptr[0] += pos; return ret; } DGL_REGISTER_GLOBAL("sparse._CAPI_DGLLibra2dglBuildDict") .set_body([](DGLArgs args, DGLRetValue *rv) { NDArray a = args[0]; NDArray b = args[1]; NDArray indices = args[2]; NDArray ldt_key = args[3]; NDArray gdt_key = args[4]; NDArray gdt_value = args[5]; NDArray node_map = args[6]; NDArray offset = args[7]; int32_t nc = args[8]; int32_t c = args[9]; int64_t fsize = args[10]; std::string prefix = args[11]; List ret = Libra2dglBuildDict( a, b, indices, ldt_key, gdt_key, gdt_value, node_map, offset, nc, c, fsize, prefix); *rv = ret; }); /** * @brief sets up the 1-level tree among the clones of the split-nodes. * @param[in] gdt_key global dict for assigning consecutive node IDs to nodes * across all the partitions * @param[in] gdt_value global dict for assigning consecutive node IDs to nodes * across all the partition * @param[out] lrtensor keeps the root node ID of 1-level tree * @param[in] nc number of partitions/communities * @param[in] Nn number of nodes in the input graph */ void Libra2dglSetLR( NDArray gdt_key, NDArray gdt_value, NDArray lrtensor, int32_t nc, int64_t Nn) { int64_t *gdt_key_ptr = gdt_key.Ptr(); // 1D tensor int64_t *gdt_value_ptr = gdt_value.Ptr(); // 2D tensor int64_t *lrtensor_ptr = lrtensor.Ptr(); // 1D tensor int32_t width = nc; int64_t cnt = 0; int64_t avg_split_copy = 0, scnt = 0; for (int64_t i = 0; i < Nn; i++) { if (gdt_key_ptr[i] <= 0) { cnt++; } else { int32_t val = RandomEngine::ThreadLocal()->RandInt(gdt_key_ptr[i]); CHECK(val >= 0 && val < gdt_key_ptr[i]); CHECK(gdt_key_ptr[i] <= nc); int64_t *ptr = gdt_value_ptr + i * width; lrtensor_ptr[i] = ptr[val]; } if (gdt_key_ptr[i] > 1) { avg_split_copy += gdt_key_ptr[i]; scnt++; } } } DGL_REGISTER_GLOBAL("sparse._CAPI_DGLLibra2dglSetLR") .set_body([](DGLArgs args, DGLRetValue *rv) { NDArray gdt_key = args[0]; NDArray gdt_value = args[1]; NDArray lrtensor = args[2]; int32_t nc = args[3]; int64_t Nn = args[4]; Libra2dglSetLR(gdt_key, gdt_value, lrtensor, nc, Nn); }); /** * @brief For each node in a partition, it creates a list of remote clone IDs; * also, for each node in a partition, it gathers the data (feats, label, * trian, test) from input graph. * @param[out] feat node features in current partition c. * @param[in] gfeat input graph node features. * @param[out] adj list of node IDs of remote clones. * @param[out] inner_nodes marks whether a node is split or not. * @param[in] ldt_key per partition dict for tracking global to local node IDs * @param[out] gdt_key global dict for storing number of local nodes (or split * nodes) for a given global node ID * @param[out] gdt_value global * dict, stores local node IDs (due to split) across partitions for * a given global node ID. * @param[in] node_map keeps track of range of local node IDs (consecutive) * given to the nodes in the partitions. * @param[out] lr 1-level tree marking for local split nodes. * @param[in] lrtensor global (all the partitions) 1-level tree. * @param[in] num_nodes number of nodes in current partition. * @param[in] nc number of partitions/communities. * @param[in] c current partition/community. * @param[in] feat_size node feature vector size. * @param[out] labels local (for this partition) labels. * @param[out] trainm local (for this partition) training nodes. * @param[out] testm local (for this partition) testing nodes. * @param[out] valm local (for this partition) validation nodes. * @param[in] glabels global (input graph) labels. * @param[in] gtrainm glabal (input graph) training nodes. * @param[in] gtestm glabal (input graph) testing nodes. * @param[in] gvalm glabal (input graph) validation nodes. * @param[out] Nn number of nodes in the input graph. */ template void Libra2dglBuildAdjlist( NDArray feat, NDArray gfeat, NDArray adj, NDArray inner_node, NDArray ldt_key, NDArray gdt_key, NDArray gdt_value, NDArray node_map, NDArray lr, NDArray lrtensor, int64_t num_nodes, int32_t nc, int32_t c, int32_t feat_size, NDArray labels, NDArray trainm, NDArray testm, NDArray valm, NDArray glabels, NDArray gtrainm, NDArray gtestm, NDArray gvalm, int64_t Nn) { DType *feat_ptr = feat.Ptr(); // 2D tensor DType *gfeat_ptr = gfeat.Ptr(); // 2D tensor int64_t *adj_ptr = adj.Ptr(); // 2D tensor int32_t *inner_node_ptr = inner_node.Ptr(); int64_t *ldt_key_ptr = ldt_key.Ptr(); int64_t *gdt_key_ptr = gdt_key.Ptr(); int64_t *gdt_value_ptr = gdt_value.Ptr(); // 2D tensor int64_t *node_map_ptr = node_map.Ptr(); int64_t *lr_ptr = lr.Ptr(); int64_t *lrtensor_ptr = lrtensor.Ptr(); int32_t width = nc - 1; runtime::parallel_for(0, num_nodes, [&](int64_t s, int64_t e) { for (int64_t i = s; i < e; i++) { int64_t k = ldt_key_ptr[i]; int64_t v = i; int64_t ind = gdt_key_ptr[k]; int64_t *adj_ptr_ptr = adj_ptr + v * width; if (ind == 1) { for (int32_t j = 0; j < width; j++) adj_ptr_ptr[j] = -1; inner_node_ptr[i] = 1; lr_ptr[i] = -200; } else { lr_ptr[i] = lrtensor_ptr[k]; int64_t *ptr = gdt_value_ptr + k * nc; int64_t pos = 0; CHECK(ind <= nc); int32_t flg = 0; for (int64_t j = 0; j < ind; j++) { if (ptr[j] == lr_ptr[i]) flg = 1; if (c != Ver2partition(ptr[j], node_map_ptr, nc)) adj_ptr_ptr[pos++] = ptr[j]; } CHECK_EQ(flg, 1); CHECK(pos == ind - 1); for (; pos < width; pos++) adj_ptr_ptr[pos] = -1; inner_node_ptr[i] = 0; } } }); // gather runtime::parallel_for(0, num_nodes, [&](int64_t s, int64_t e) { for (int64_t i = s; i < e; i++) { int64_t k = ldt_key_ptr[i]; int64_t ind = i * feat_size; DType *optr = gfeat_ptr + ind; DType *iptr = feat_ptr + k * feat_size; for (int32_t j = 0; j < feat_size; j++) optr[j] = iptr[j]; } IdType *labels_ptr = labels.Ptr(); IdType *glabels_ptr = glabels.Ptr(); IdType2 *trainm_ptr = trainm.Ptr(); IdType2 *gtrainm_ptr = gtrainm.Ptr(); IdType2 *testm_ptr = testm.Ptr(); IdType2 *gtestm_ptr = gtestm.Ptr(); IdType2 *valm_ptr = valm.Ptr(); IdType2 *gvalm_ptr = gvalm.Ptr(); for (int64_t i = 0; i < num_nodes; i++) { int64_t k = ldt_key_ptr[i]; CHECK(k >= 0 && k < Nn); glabels_ptr[i] = labels_ptr[k]; gtrainm_ptr[i] = trainm_ptr[k]; gtestm_ptr[i] = testm_ptr[k]; gvalm_ptr[i] = valm_ptr[k]; } }); } DGL_REGISTER_GLOBAL("sparse._CAPI_DGLLibra2dglBuildAdjlist") .set_body([](DGLArgs args, DGLRetValue *rv) { NDArray feat = args[0]; NDArray gfeat = args[1]; NDArray adj = args[2]; NDArray inner_node = args[3]; NDArray ldt_key = args[4]; NDArray gdt_key = args[5]; NDArray gdt_value = args[6]; NDArray node_map = args[7]; NDArray lr = args[8]; NDArray lrtensor = args[9]; int64_t num_nodes = args[10]; int32_t nc = args[11]; int32_t c = args[12]; int32_t feat_size = args[13]; NDArray labels = args[14]; NDArray trainm = args[15]; NDArray testm = args[16]; NDArray valm = args[17]; NDArray glabels = args[18]; NDArray gtrainm = args[19]; NDArray gtestm = args[20]; NDArray gvalm = args[21]; int64_t Nn = args[22]; ATEN_FLOAT_TYPE_SWITCH(feat->dtype, DType, "Features", { ATEN_ID_TYPE_SWITCH(trainm->dtype, IdType2, { ATEN_ID_BITS_SWITCH((glabels->dtype).bits, IdType, { Libra2dglBuildAdjlist( feat, gfeat, adj, inner_node, ldt_key, gdt_key, gdt_value, node_map, lr, lrtensor, num_nodes, nc, c, feat_size, labels, trainm, testm, valm, glabels, gtrainm, gtestm, gvalm, Nn); }); }); }); }); } // namespace aten } // namespace dgl ================================================ FILE: src/array/selector.h ================================================ /** * Copyright (c) 2020 by Contributors * @file array/selector.h * @brief Selector functions to select among src/edge/dst attributes. */ #ifndef DGL_ARRAY_SELECTOR_H_ #define DGL_ARRAY_SELECTOR_H_ #include namespace dgl { namespace { #ifdef __CUDACC__ #define DGLDEVICE __device__ #define DGLINLINE __forceinline__ #else #define DGLDEVICE #define DGLINLINE inline #endif // __CUDACC__ } // namespace /** * @brief Select among src/edge/dst feature/idx. * @note the integer argument target specifies which target * to choose, 0: src, 1: edge, 2: dst. */ template struct Selector { template static DGLDEVICE DGLINLINE T Call(T src, T edge, T dst) { LOG(INFO) << "Target " << target << " not recognized."; return src; } }; template <> template DGLDEVICE DGLINLINE T Selector<0>::Call(T src, T edge, T dst) { return src; } template <> template DGLDEVICE DGLINLINE T Selector<1>::Call(T src, T edge, T dst) { return edge; } template <> template DGLDEVICE DGLINLINE T Selector<2>::Call(T src, T edge, T dst) { return dst; } } // namespace dgl #endif // DGL_ARRAY_SELECTOR_H_ ================================================ FILE: src/array/union_partition.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file array/cpu/coo_union_partition.cc * @brief COO union and partition */ #include #include namespace dgl { namespace aten { ///////////////////////// COO Based Operations///////////////////////// std::vector DisjointPartitionCooBySizes( const COOMatrix &coo, const uint64_t batch_size, const std::vector &edge_cumsum, const std::vector &src_vertex_cumsum, const std::vector &dst_vertex_cumsum) { CHECK_EQ(edge_cumsum.size(), batch_size + 1); CHECK_EQ(src_vertex_cumsum.size(), batch_size + 1); CHECK_EQ(dst_vertex_cumsum.size(), batch_size + 1); std::vector ret; ret.resize(batch_size); for (size_t g = 0; g < batch_size; ++g) { IdArray result_src = IndexSelect(coo.row, edge_cumsum[g], edge_cumsum[g + 1]) - src_vertex_cumsum[g]; IdArray result_dst = IndexSelect(coo.col, edge_cumsum[g], edge_cumsum[g + 1]) - dst_vertex_cumsum[g]; IdArray result_data = NullArray(); // has data index array if (COOHasData(coo)) { result_data = IndexSelect(coo.data, edge_cumsum[g], edge_cumsum[g + 1]) - edge_cumsum[g]; } COOMatrix sub_coo = COOMatrix( src_vertex_cumsum[g + 1] - src_vertex_cumsum[g], dst_vertex_cumsum[g + 1] - dst_vertex_cumsum[g], result_src, result_dst, result_data, coo.row_sorted, coo.col_sorted); ret[g] = sub_coo; } return ret; } COOMatrix COOSliceContiguousChunk( const COOMatrix &coo, const std::vector &edge_range, const std::vector &src_vertex_range, const std::vector &dst_vertex_range) { IdArray result_src = NullArray(coo.row->dtype, coo.row->ctx); IdArray result_dst = NullArray(coo.row->dtype, coo.row->ctx); if (edge_range[1] != edge_range[0]) { // The chunk has edges result_src = IndexSelect(coo.row, edge_range[0], edge_range[1]) - src_vertex_range[0]; result_dst = IndexSelect(coo.col, edge_range[0], edge_range[1]) - dst_vertex_range[0]; } IdArray result_data = NullArray(); // has data index array if (COOHasData(coo)) { result_data = IndexSelect(coo.data, edge_range[0], edge_range[1]) - edge_range[0]; } COOMatrix sub_coo = COOMatrix( src_vertex_range[1] - src_vertex_range[0], dst_vertex_range[1] - dst_vertex_range[0], result_src, result_dst, result_data, coo.row_sorted, coo.col_sorted); return sub_coo; } ///////////////////////// CSR Based Operations///////////////////////// CSRMatrix DisjointUnionCsr(const std::vector &csrs) { uint64_t src_offset = 0, dst_offset = 0; int64_t indices_offset = 0; bool has_data = false; bool sorted = true; // check if data index array for (size_t i = 0; i < csrs.size(); ++i) { CHECK_SAME_DTYPE(csrs[0].indptr, csrs[i].indptr); CHECK_SAME_CONTEXT(csrs[0].indices, csrs[i].indices); has_data |= CSRHasData(csrs[i]); } std::vector res_indptr; std::vector res_indices; std::vector res_data; res_indptr.resize(csrs.size()); res_indices.resize(csrs.size()); for (size_t i = 0; i < csrs.size(); ++i) { const aten::CSRMatrix &csr = csrs[i]; sorted &= csr.sorted; IdArray indptr = csr.indptr + indices_offset; IdArray indices = csr.indices + dst_offset; if (i > 0) indptr = IndexSelect(indptr, 1, indptr->shape[0]); res_indptr[i] = indptr; res_indices[i] = indices; src_offset += csr.num_rows; dst_offset += csr.num_cols; // any one of input csr has data index array if (has_data) { IdArray edges_data; if (CSRHasData(csr) == false) { edges_data = Range( indices_offset, indices_offset + csr.indices->shape[0], csr.indices->dtype.bits, csr.indices->ctx); } else { edges_data = csr.data + indices_offset; } res_data.push_back(edges_data); indices_offset += csr.indices->shape[0]; } } IdArray result_indptr = Concat(res_indptr); IdArray result_indices = Concat(res_indices); IdArray result_data = has_data ? Concat(res_data) : NullArray(); return CSRMatrix( src_offset, dst_offset, result_indptr, result_indices, result_data, sorted); } std::vector DisjointPartitionCsrBySizes( const CSRMatrix &csr, const uint64_t batch_size, const std::vector &edge_cumsum, const std::vector &src_vertex_cumsum, const std::vector &dst_vertex_cumsum) { CHECK_EQ(edge_cumsum.size(), batch_size + 1); CHECK_EQ(src_vertex_cumsum.size(), batch_size + 1); CHECK_EQ(dst_vertex_cumsum.size(), batch_size + 1); std::vector ret; ret.resize(batch_size); for (size_t g = 0; g < batch_size; ++g) { uint64_t num_src = src_vertex_cumsum[g + 1] - src_vertex_cumsum[g]; IdArray result_indptr; if (g == 0) { result_indptr = IndexSelect(csr.indptr, 0, src_vertex_cumsum[1] + 1) - edge_cumsum[0]; } else { result_indptr = IndexSelect( csr.indptr, src_vertex_cumsum[g], src_vertex_cumsum[g + 1] + 1) - edge_cumsum[g]; } IdArray result_indices = IndexSelect(csr.indices, edge_cumsum[g], edge_cumsum[g + 1]) - dst_vertex_cumsum[g]; IdArray result_data = NullArray(); // has data index array if (CSRHasData(csr)) { result_data = IndexSelect(csr.data, edge_cumsum[g], edge_cumsum[g + 1]) - edge_cumsum[g]; } CSRMatrix sub_csr = CSRMatrix( num_src, dst_vertex_cumsum[g + 1] - dst_vertex_cumsum[g], result_indptr, result_indices, result_data, csr.sorted); ret[g] = sub_csr; } return ret; } CSRMatrix CSRSliceContiguousChunk( const CSRMatrix &csr, const std::vector &edge_range, const std::vector &src_vertex_range, const std::vector &dst_vertex_range) { int64_t indptr_len = src_vertex_range[1] - src_vertex_range[0] + 1; IdArray result_indptr = Full(0, indptr_len, csr.indptr->dtype.bits, csr.indptr->ctx); IdArray result_indices = NullArray(csr.indptr->dtype, csr.indptr->ctx); IdArray result_data = NullArray(); if (edge_range[1] != edge_range[0]) { // The chunk has edges result_indptr = IndexSelect(csr.indptr, src_vertex_range[0], src_vertex_range[1] + 1) - edge_range[0]; result_indices = IndexSelect(csr.indices, edge_range[0], edge_range[1]) - dst_vertex_range[0]; if (CSRHasData(csr)) { result_data = IndexSelect(csr.data, edge_range[0], edge_range[1]) - edge_range[0]; } } CSRMatrix sub_csr = CSRMatrix( src_vertex_range[1] - src_vertex_range[0], dst_vertex_range[1] - dst_vertex_range[0], result_indptr, result_indices, result_data, csr.sorted); return sub_csr; } } // namespace aten } // namespace dgl ================================================ FILE: src/array/uvm_array.cc ================================================ /** * Copyright (c) 2019-2022 by Contributors * @file array/uvm_array.cc * @brief DGL array utilities implementation */ #include #include #include "../c_api_common.h" #include "./uvm_array_op.h" using namespace dgl::runtime; namespace dgl { namespace aten { NDArray IndexSelectCPUFromGPU(NDArray array, IdArray index) { #ifdef DGL_USE_CUDA CHECK(array.IsPinned()) << "Input array must be in pinned memory."; CHECK_EQ(index->ctx.device_type, kDGLCUDA) << "Index must be on the GPU."; CHECK_GE(array->ndim, 1) << "Input array must have at least 1 dimension."; CHECK_EQ(index->ndim, 1) << "Index must be a 1D array."; ATEN_DTYPE_BITS_ONLY_SWITCH(array->dtype, DType, "values", { ATEN_ID_TYPE_SWITCH(index->dtype, IdType, { return impl::IndexSelectCPUFromGPU(array, index); }); }); #endif LOG(FATAL) << "IndexSelectCPUFromGPU requires CUDA."; // Should be unreachable return NDArray{}; } void IndexScatterGPUToCPU(NDArray dest, IdArray index, NDArray source) { #ifdef DGL_USE_CUDA CHECK(dest.IsPinned()) << "Destination array must be in pinned memory."; CHECK_EQ(index->ctx.device_type, kDGLCUDA) << "Index must be on the GPU."; CHECK_EQ(source->ctx.device_type, kDGLCUDA) << "Source array must be on the GPU."; CHECK_EQ(dest->dtype, source->dtype) << "Destination array and source " "array must have the same dtype."; CHECK_GE(dest->ndim, 1) << "Destination array must have at least 1 dimension."; CHECK_EQ(index->ndim, 1) << "Index must be a 1D array."; ATEN_DTYPE_BITS_ONLY_SWITCH(source->dtype, DType, "values", { ATEN_ID_TYPE_SWITCH(index->dtype, IdType, { impl::IndexScatterGPUToCPU(dest, index, source); }); }); #else LOG(FATAL) << "IndexScatterGPUToCPU requires CUDA."; #endif } DGL_REGISTER_GLOBAL("ndarray.uvm._CAPI_DGLIndexSelectCPUFromGPU") .set_body([](DGLArgs args, DGLRetValue* rv) { NDArray array = args[0]; IdArray index = args[1]; *rv = IndexSelectCPUFromGPU(array, index); }); DGL_REGISTER_GLOBAL("ndarray.uvm._CAPI_DGLIndexScatterGPUToCPU") .set_body([](DGLArgs args, DGLRetValue* rv) { NDArray dest = args[0]; IdArray index = args[1]; NDArray source = args[2]; IndexScatterGPUToCPU(dest, index, source); }); } // namespace aten } // namespace dgl ================================================ FILE: src/array/uvm_array_op.h ================================================ /** * Copyright (c) 2019-2022 by Contributors * @file array/uvm_array_op.h * @brief Array operator templates */ #ifndef DGL_ARRAY_UVM_ARRAY_OP_H_ #define DGL_ARRAY_UVM_ARRAY_OP_H_ #include #include namespace dgl { namespace aten { namespace impl { // Take CPU array and GPU index, and then index with GPU. template NDArray IndexSelectCPUFromGPU(NDArray array, IdArray index); template void IndexScatterGPUToCPU(NDArray dest, IdArray index, NDArray source); } // namespace impl } // namespace aten } // namespace dgl #endif // DGL_ARRAY_UVM_ARRAY_OP_H_ ================================================ FILE: src/bcast.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file kernel/bcast.h * @brief Broadcast related function implementations. */ #include #include #include namespace dgl { namespace { /** * @brief Determine whether use broadcasting or not, given the operator * type, lhs array and rhs array. */ bool UseBcast(const std::string& op, NDArray lhs, NDArray rhs) { if (op == "copy_lhs" || op == "copy_rhs") return false; // broadcasting is not required for copy_u/copy_e if (lhs->ndim != rhs->ndim) return true; for (int i = 1; i < lhs->ndim; ++i) { if (lhs->shape[i] != rhs->shape[i]) return true; } return false; } } // namespace /** * @brief: Compute broadcast and auxiliary information given operator * and operands for kernel computation. * @note: Expect lhs, rhs to have ndim >= 2 and the shape of lhs/rhs * valid for the op computation. */ BcastOff CalcBcastOff(const std::string& op, NDArray lhs, NDArray rhs) { BcastOff rst; rst.lhs_len = 1; rst.rhs_len = 1; for (int i = 1; i < lhs->ndim; ++i) rst.lhs_len *= lhs->shape[i]; for (int i = 1; i < rhs->ndim; ++i) rst.rhs_len *= rhs->shape[i]; rst.use_bcast = UseBcast(op, lhs, rhs); rst.reduce_size = 1; // defaults to 1, except for the case op == 'dot'. if (rst.use_bcast) { const int max_ndim = std::max(lhs->ndim, rhs->ndim) - 1; int out_len = 1, j = 0; if (op == "dot") { rst.reduce_size = lhs->shape[lhs->ndim - 1]; // set reduce_size for dot. ++j; // do not consider reduce axis in computing lhs_offset and // rhs_offset. } int stride_l = 1, stride_r = 1; rst.lhs_offset.push_back(0); // lhs_offset[0] is always 0 rst.rhs_offset.push_back(0); // rhs_offset[0] is always 0 for (; j < max_ndim; ++j) { // iterate the axis from back to front. // dl refers to the size of lhs array in the current axis, likewise for // dr. const int dl = (lhs->ndim - 1 - j < 1) ? 1 : lhs->shape[lhs->ndim - 1 - j]; const int dr = (rhs->ndim - 1 - j < 1) ? 1 : rhs->shape[rhs->ndim - 1 - j]; for (int i = 1; i < std::max(dl, dr); ++i) { for (int k = 0; k < out_len; ++k) { /* Explaination: * if current dimension is not broadcast dimension for lhs array * lhs_offset[i * out_len + k] = lhs_offset[k] + i * stride_l * else * lhs_offset[i * out_len + k] = lhs_offset[k] * likewise for rhs_offset. */ rst.lhs_offset.push_back(rst.lhs_offset[k] + i * (i < dl) * stride_l); rst.rhs_offset.push_back(rst.rhs_offset[k] + i * (i < dr) * stride_r); } } out_len *= std::max(dl, dr); stride_l *= dl; stride_r *= dr; } rst.out_len = out_len; } else { rst.out_len = (op == "copy_rhs") ? rst.rhs_len : rst.lhs_len; if (op == "dot") { // set reduce_size for dot. rst.reduce_size = lhs->shape[lhs->ndim - 1]; // out_len is divied by reduce_size in dot. rst.out_len /= rst.reduce_size; } } return rst; } } // namespace dgl ================================================ FILE: src/c_api_common.cc ================================================ /** * Copyright (c) 2018 by Contributors * @file c_api_common.cc * @brief DGL C API common implementations */ #include "c_api_common.h" #include using dgl::runtime::DGLArgs; using dgl::runtime::DGLArgValue; using dgl::runtime::DGLRetValue; using dgl::runtime::NDArray; using dgl::runtime::PackedFunc; namespace dgl { PackedFunc ConvertNDArrayVectorToPackedFunc(const std::vector& vec) { auto body = [vec](DGLArgs args, DGLRetValue* rv) { const uint64_t which = args[0]; if (which >= vec.size()) { LOG(FATAL) << "invalid choice"; } else { *rv = std::move(vec[which]); } }; return PackedFunc(body); } PackedFunc ConvertEdgeArrayToPackedFunc(const EdgeArray& ea) { auto body = [ea](DGLArgs args, DGLRetValue* rv) { const int which = args[0]; if (which == 0) { *rv = std::move(ea.src); } else if (which == 1) { *rv = std::move(ea.dst); } else if (which == 2) { *rv = std::move(ea.id); } else { LOG(FATAL) << "invalid choice"; } }; return PackedFunc(body); } } // namespace dgl ================================================ FILE: src/c_api_common.h ================================================ /** * Copyright (c) 2018 by Contributors * @file c_api_common.h * @brief DGL C API common util functions */ #ifndef DGL_C_API_COMMON_H_ #define DGL_C_API_COMMON_H_ #include #include #include #include #include #include #include #include #include namespace dgl { // Communicator handler type typedef void* CommunicatorHandle; // KVstore message handler type typedef void* KVMsgHandle; /** * @brief Convert a vector of NDArray to PackedFunc. */ dgl::runtime::PackedFunc ConvertNDArrayVectorToPackedFunc( const std::vector& vec); /** * @brief Copy a vector to an NDArray. * * The data type of the NDArray will be IdType, which must be an integer type. * The element type (DType) of the vector must be convertible to IdType. */ template dgl::runtime::NDArray CopyVectorToNDArray(const std::vector& vec) { using dgl::runtime::NDArray; const int64_t len = vec.size(); NDArray a = NDArray::Empty( {len}, DGLDataType{kDGLInt, sizeof(IdType) * 8, 1}, DGLContext{kDGLCPU, 0}); std::copy(vec.begin(), vec.end(), static_cast(a->data)); return a; } runtime::PackedFunc ConvertEdgeArrayToPackedFunc(const EdgeArray& ea); } // namespace dgl #endif // DGL_C_API_COMMON_H_ ================================================ FILE: src/geometry/cpu/geometry_op_impl.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file array/cpu/geometry_op_impl.cc * @brief Geometry operator CPU implementation */ #include #include #include #include #include "../geometry_op.h" namespace dgl { using runtime::NDArray; namespace geometry { namespace impl { /** @brief Knuth shuffle algorithm */ template void IndexShuffle(IdType *idxs, int64_t num_elems) { for (int64_t i = num_elems - 1; i > 0; --i) { int64_t j = dgl::RandomEngine::ThreadLocal()->RandInt(i); std::swap(idxs[i], idxs[j]); } } template void IndexShuffle(int32_t *idxs, int64_t num_elems); template void IndexShuffle(int64_t *idxs, int64_t num_elems); /** @brief Groupwise index shuffle algorithm. This function will perform shuffle * in subarrays indicated by group index. The group index is similar to indptr * in CSRMatrix. * * @param group_idxs group index array. * @param idxs index array for shuffle. * @param num_groups_idxs length of group_idxs * @param num_elems length of idxs */ template void GroupIndexShuffle( const IdType *group_idxs, IdType *idxs, int64_t num_groups_idxs, int64_t num_elems) { if (num_groups_idxs < 2) return; // empty idxs array CHECK_LE(group_idxs[num_groups_idxs - 1], num_elems) << "group_idxs out of range"; for (int64_t i = 0; i < num_groups_idxs - 1; ++i) { auto subarray_len = group_idxs[i + 1] - group_idxs[i]; IndexShuffle(idxs + group_idxs[i], subarray_len); } } template void GroupIndexShuffle( const int32_t *group_idxs, int32_t *idxs, int64_t num_groups_idxs, int64_t num_elems); template void GroupIndexShuffle( const int64_t *group_idxs, int64_t *idxs, int64_t num_groups_idxs, int64_t num_elems); template IdArray RandomPerm(int64_t num_nodes) { IdArray perm = aten::NewIdArray(num_nodes, DGLContext{kDGLCPU, 0}, sizeof(IdType) * 8); IdType *perm_data = static_cast(perm->data); std::iota(perm_data, perm_data + num_nodes, 0); IndexShuffle(perm_data, num_nodes); return perm; } template IdArray GroupRandomPerm( const IdType *group_idxs, int64_t num_group_idxs, int64_t num_nodes) { IdArray perm = aten::NewIdArray(num_nodes, DGLContext{kDGLCPU, 0}, sizeof(IdType) * 8); IdType *perm_data = static_cast(perm->data); std::iota(perm_data, perm_data + num_nodes, 0); GroupIndexShuffle(group_idxs, perm_data, num_group_idxs, num_nodes); return perm; } /** * @brief Farthest Point Sampler without the need to compute all pairs of * distance. * * The input array has shape (N, d), where N is the number of points, and d is * the dimension. It consists of a (flatten) batch of point clouds. * * In each batch, the algorithm starts with the sample index specified by * ``start_idx``. Then for each point, we maintain the minimum to-sample * distance. Finally, we pick the point with the maximum such distance. This * process will be repeated for ``sample_points`` - 1 times. */ template void FarthestPointSampler( NDArray array, int64_t batch_size, int64_t sample_points, NDArray dist, IdArray start_idx, IdArray result) { const FloatType *array_data = static_cast(array->data); const int64_t point_in_batch = array->shape[0] / batch_size; const int64_t dim = array->shape[1]; // distance FloatType *dist_data = static_cast(dist->data); // sample for each cloud in the batch IdType *start_idx_data = static_cast(start_idx->data); // return value IdType *ret_data = static_cast(result->data); int64_t array_start = 0, ret_start = 0; // loop for each point cloud sample in this batch for (auto b = 0; b < batch_size; b++) { // random init start sample int64_t sample_idx = (int64_t)start_idx_data[b]; ret_data[ret_start] = (IdType)(sample_idx); // sample the rest `sample_points - 1` points for (auto i = 0; i < sample_points - 1; i++) { // re-init distance and the argmax int64_t dist_argmax = 0; FloatType dist_max = -1; // update the distance for (auto j = 0; j < point_in_batch; j++) { // compute the distance on dimensions FloatType one_dist = 0; for (auto d = 0; d < dim; d++) { FloatType tmp = array_data[(array_start + j) * dim + d] - array_data[(array_start + sample_idx) * dim + d]; one_dist += tmp * tmp; } // for each out-of-set point, keep its nearest to-the-set distance if (i == 0 || dist_data[j] > one_dist) { dist_data[j] = one_dist; } // look for the farthest sample if (dist_data[j] > dist_max) { dist_argmax = j; dist_max = dist_data[j]; } } // sample the `dist_argmax`-th point sample_idx = dist_argmax; ret_data[ret_start + i + 1] = (IdType)(sample_idx); } array_start += point_in_batch; ret_start += sample_points; } } template void FarthestPointSampler( NDArray array, int64_t batch_size, int64_t sample_points, NDArray dist, IdArray start_idx, IdArray result); template void FarthestPointSampler( NDArray array, int64_t batch_size, int64_t sample_points, NDArray dist, IdArray start_idx, IdArray result); template void FarthestPointSampler( NDArray array, int64_t batch_size, int64_t sample_points, NDArray dist, IdArray start_idx, IdArray result); template void FarthestPointSampler( NDArray array, int64_t batch_size, int64_t sample_points, NDArray dist, IdArray start_idx, IdArray result); template void WeightedNeighborMatching( const aten::CSRMatrix &csr, const NDArray weight, IdArray result) { const int64_t num_nodes = result->shape[0]; const IdType *indptr_data = static_cast(csr.indptr->data); const IdType *indices_data = static_cast(csr.indices->data); IdType *result_data = static_cast(result->data); FloatType *weight_data = static_cast(weight->data); // build node visiting order IdArray vis_order = RandomPerm(num_nodes); IdType *vis_order_data = static_cast(vis_order->data); for (int64_t n = 0; n < num_nodes; ++n) { auto u = vis_order_data[n]; // if marked if (result_data[u] >= 0) continue; auto v_max = u; FloatType weight_max = 0; for (auto e = indptr_data[u]; e < indptr_data[u + 1]; ++e) { auto v = indices_data[e]; if (result_data[v] >= 0) continue; if (weight_data[e] >= weight_max) { v_max = v; weight_max = weight_data[e]; } } result_data[u] = std::min(u, v_max); result_data[v_max] = result_data[u]; } } template void WeightedNeighborMatching( const aten::CSRMatrix &csr, const NDArray weight, IdArray result); template void WeightedNeighborMatching( const aten::CSRMatrix &csr, const NDArray weight, IdArray result); template void WeightedNeighborMatching( const aten::CSRMatrix &csr, const NDArray weight, IdArray result); template void WeightedNeighborMatching( const aten::CSRMatrix &csr, const NDArray weight, IdArray result); template void NeighborMatching(const aten::CSRMatrix &csr, IdArray result) { const int64_t num_nodes = result->shape[0]; const IdType *indptr_data = static_cast(csr.indptr->data); const IdType *indices_data = static_cast(csr.indices->data); IdType *result_data = static_cast(result->data); // build vis order IdArray u_vis_order = RandomPerm(num_nodes); IdType *u_vis_order_data = static_cast(u_vis_order->data); IdArray v_vis_order = GroupRandomPerm( indptr_data, csr.indptr->shape[0], csr.indices->shape[0]); IdType *v_vis_order_data = static_cast(v_vis_order->data); for (int64_t n = 0; n < num_nodes; ++n) { auto u = u_vis_order_data[n]; // if marked if (result_data[u] >= 0) continue; result_data[u] = u; for (auto e = indptr_data[u]; e < indptr_data[u + 1]; ++e) { auto v = indices_data[v_vis_order_data[e]]; if (result_data[v] >= 0) continue; result_data[u] = std::min(u, v); result_data[v] = result_data[u]; break; } } } template void NeighborMatching( const aten::CSRMatrix &csr, IdArray result); template void NeighborMatching( const aten::CSRMatrix &csr, IdArray result); } // namespace impl } // namespace geometry } // namespace dgl ================================================ FILE: src/geometry/cuda/edge_coarsening_impl.cu ================================================ /** * Copyright (c) 2019 by Contributors * @file geometry/cuda/edge_coarsening_impl.cu * @brief Edge coarsening CUDA implementation */ #include #include #include #include #include #include "../../array/cuda/utils.h" #include "../../runtime/cuda/cuda_common.h" #include "../geometry_op.h" #define BLOCKS(N, T) (N + T - 1) / T namespace dgl { namespace geometry { namespace impl { constexpr float BLUE_P = 0.53406; constexpr int BLUE = -1; constexpr int RED = -2; constexpr int EMPTY_IDX = -1; __device__ bool done_d; __global__ void init_done_kernel() { done_d = true; } __global__ void generate_uniform_kernel( float *ret_values, size_t num, uint64_t seed) { size_t id = blockIdx.x * blockDim.x + threadIdx.x; if (id < num) { curandState state; curand_init(seed, id, 0, &state); ret_values[id] = curand_uniform(&state); } } template __global__ void colorize_kernel( const float *prop, int64_t num_elem, IdType *result) { const IdType idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < num_elem) { if (result[idx] < 0) { // if unmatched result[idx] = (prop[idx] > BLUE_P) ? RED : BLUE; done_d = false; } } } template __global__ void weighted_propose_kernel( const IdType *indptr, const IdType *indices, const FloatType *weights, int64_t num_elem, IdType *proposal, IdType *result) { const IdType idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < num_elem) { if (result[idx] != BLUE) return; bool has_unmatched_neighbor = false; FloatType weight_max = 0.; IdType v_max = EMPTY_IDX; for (IdType i = indptr[idx]; i < indptr[idx + 1]; ++i) { auto v = indices[i]; if (result[v] < 0) has_unmatched_neighbor = true; if (result[v] == RED && weights[i] >= weight_max) { v_max = v; weight_max = weights[i]; } } proposal[idx] = v_max; if (!has_unmatched_neighbor) result[idx] = idx; } } template __global__ void weighted_respond_kernel( const IdType *indptr, const IdType *indices, const FloatType *weights, int64_t num_elem, IdType *proposal, IdType *result) { const IdType idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < num_elem) { if (result[idx] != RED) return; bool has_unmatched_neighbors = false; IdType v_max = -1; FloatType weight_max = 0.; for (IdType i = indptr[idx]; i < indptr[idx + 1]; ++i) { auto v = indices[i]; if (result[v] < 0) { has_unmatched_neighbors = true; } if (result[v] == BLUE && proposal[v] == idx && weights[i] >= weight_max) { v_max = v; weight_max = weights[i]; } } if (v_max >= 0) { result[v_max] = min(idx, v_max); result[idx] = min(idx, v_max); } if (!has_unmatched_neighbors) result[idx] = idx; } } /** @brief The colorize procedure. This procedure randomly marks unmarked * nodes with BLUE(-1) and RED(-2) and checks whether the node matching * process has finished. */ template bool Colorize(IdType *result_data, int64_t num_nodes, float *const prop) { // initial done signal cudaStream_t stream = runtime::getCurrentCUDAStream(); CUDA_KERNEL_CALL(init_done_kernel, 1, 1, 0, stream); // generate color prop for each node uint64_t seed = dgl::RandomEngine::ThreadLocal()->RandInt(UINT64_MAX); auto num_threads = cuda::FindNumThreads(num_nodes); auto num_blocks = cuda::FindNumBlocks<'x'>(BLOCKS(num_nodes, num_threads)); CUDA_KERNEL_CALL( generate_uniform_kernel, num_blocks, num_threads, 0, stream, prop, num_nodes, seed); // call kernel CUDA_KERNEL_CALL( colorize_kernel, num_blocks, num_threads, 0, stream, prop, num_nodes, result_data); bool done_h = false; CUDA_CALL(cudaMemcpyFromSymbol( &done_h, done_d, sizeof(done_h), 0, cudaMemcpyDeviceToHost)); return done_h; } /** @brief Weighted neighbor matching procedure (GPU version). * This implementation is from `A GPU Algorithm for Greedy Graph Matching * `__ * * This algorithm has three parts: colorize, propose and respond. * In colorize procedure, each unmarked node will be marked as BLUE or * RED randomly. If all nodes are marked, finish and return. * In propose procedure, each BLUE node will propose to the RED * neighbor with the largest weight (or randomly choose one if without weight). * If all its neighbors are marked, mark this node with its id. * In respond procedure, each RED node will respond to the BLUE neighbor * that has proposed to it and has the largest weight. If all neighbors * are marked, mark this node with its id. Else match this (BLUE, RED) node * pair and mark them with the smaller id between them. */ template void WeightedNeighborMatching( const aten::CSRMatrix &csr, const NDArray weight, IdArray result) { cudaStream_t stream = runtime::getCurrentCUDAStream(); const auto &ctx = result->ctx; auto device = runtime::DeviceAPI::Get(ctx); device->SetDevice(ctx); // create proposal tensor const int64_t num_nodes = result->shape[0]; IdArray proposal = aten::Full(-1, num_nodes, sizeof(IdType) * 8, ctx); // get data ptrs IdType *indptr_data = static_cast(csr.indptr->data); IdType *indices_data = static_cast(csr.indices->data); IdType *result_data = static_cast(result->data); IdType *proposal_data = static_cast(proposal->data); FloatType *weight_data = static_cast(weight->data); // allocate workspace for prop used in Colorize() float *prop = static_cast( device->AllocWorkspace(ctx, num_nodes * sizeof(float))); auto num_threads = cuda::FindNumThreads(num_nodes); auto num_blocks = cuda::FindNumBlocks<'x'>(BLOCKS(num_nodes, num_threads)); while (!Colorize(result_data, num_nodes, prop)) { CUDA_KERNEL_CALL( weighted_propose_kernel, num_blocks, num_threads, 0, stream, indptr_data, indices_data, weight_data, num_nodes, proposal_data, result_data); CUDA_KERNEL_CALL( weighted_respond_kernel, num_blocks, num_threads, 0, stream, indptr_data, indices_data, weight_data, num_nodes, proposal_data, result_data); } device->FreeWorkspace(ctx, prop); } template void WeightedNeighborMatching( const aten::CSRMatrix &csr, const NDArray weight, IdArray result); template void WeightedNeighborMatching( const aten::CSRMatrix &csr, const NDArray weight, IdArray result); template void WeightedNeighborMatching( const aten::CSRMatrix &csr, const NDArray weight, IdArray result); template void WeightedNeighborMatching( const aten::CSRMatrix &csr, const NDArray weight, IdArray result); /** @brief Unweighted neighbor matching procedure (GPU version). * Instead of directly sample neighbors, we assign each neighbor * with a random weight. We use random weight for 2 reasons: * 1. Random sample for each node in GPU is expensive. Although * we can perform a global group-wise (neighborhood of each * node as a group) random permutation as in CPU version, * it still cost too much compared to directly using random weights. * 2. Graph is sparse, thus neighborhood of each node is small, * which is suitable for GPU implementation. */ template void NeighborMatching(const aten::CSRMatrix &csr, IdArray result) { const int64_t num_edges = csr.indices->shape[0]; const auto &ctx = result->ctx; auto device = runtime::DeviceAPI::Get(ctx); device->SetDevice(ctx); // generate random weights cudaStream_t stream = runtime::getCurrentCUDAStream(); NDArray weight = NDArray::Empty( {num_edges}, DGLDataType{kDGLFloat, sizeof(float) * 8, 1}, ctx); float *weight_data = static_cast(weight->data); uint64_t seed = dgl::RandomEngine::ThreadLocal()->RandInt(UINT64_MAX); auto num_threads = cuda::FindNumThreads(num_edges); auto num_blocks = cuda::FindNumBlocks<'x'>(BLOCKS(num_edges, num_threads)); CUDA_KERNEL_CALL( generate_uniform_kernel, num_blocks, num_threads, 0, stream, weight_data, num_edges, seed); WeightedNeighborMatching(csr, weight, result); } template void NeighborMatching( const aten::CSRMatrix &csr, IdArray result); template void NeighborMatching( const aten::CSRMatrix &csr, IdArray result); } // namespace impl } // namespace geometry } // namespace dgl ================================================ FILE: src/geometry/cuda/geometry_op_impl.cu ================================================ /** * Copyright (c) 2019 by Contributors * @file geometry/cuda/geometry_op_impl.cc * @brief Geometry operator CUDA implementation */ #include #include "../../c_api_common.h" #include "../../runtime/cuda/cuda_common.h" #include "../geometry_op.h" #define THREADS 1024 namespace dgl { namespace geometry { namespace impl { /** * @brief Farthest Point Sampler without the need to compute all pairs of * distance. * * The input array has shape (N, d), where N is the number of points, and d is * the dimension. It consists of a (flatten) batch of point clouds. * * In each batch, the algorithm starts with the sample index specified by * ``start_idx``. Then for each point, we maintain the minimum to-sample * distance. Finally, we pick the point with the maximum such distance. This * process will be repeated for ``sample_points`` - 1 times. */ template __global__ void fps_kernel( const FloatType* array_data, const int64_t batch_size, const int64_t sample_points, const int64_t point_in_batch, const int64_t dim, const IdType* start_idx, FloatType* dist_data, IdType* ret_data) { const int64_t thread_idx = threadIdx.x; const int64_t batch_idx = blockIdx.x; const int64_t array_start = point_in_batch * batch_idx; const int64_t ret_start = sample_points * batch_idx; __shared__ FloatType dist_max_ht[THREADS]; __shared__ int64_t dist_argmax_ht[THREADS]; // start with random initialization if (thread_idx == 0) { ret_data[ret_start] = (IdType)(start_idx[batch_idx]); } // sample the rest `sample_points - 1` points for (auto i = 0; i < sample_points - 1; i++) { __syncthreads(); // the last sampled point int64_t sample_idx = (int64_t)(ret_data[ret_start + i]); dist_argmax_ht[thread_idx] = 0; dist_max_ht[thread_idx] = (FloatType)(-1.); // multi-thread distance calculation for (auto j = thread_idx; j < point_in_batch; j += THREADS) { FloatType one_dist = (FloatType)(0.); for (auto d = 0; d < dim; d++) { FloatType tmp = array_data[(array_start + j) * dim + d] - array_data[(array_start + sample_idx) * dim + d]; one_dist += tmp * tmp; } if (i == 0 || dist_data[array_start + j] > one_dist) { dist_data[array_start + j] = one_dist; } if (dist_data[array_start + j] > dist_max_ht[thread_idx]) { dist_argmax_ht[thread_idx] = j; dist_max_ht[thread_idx] = dist_data[array_start + j]; } } __syncthreads(); if (thread_idx == 0) { FloatType best = dist_max_ht[0]; int64_t best_idx = dist_argmax_ht[0]; for (auto j = 1; j < THREADS; j++) { if (dist_max_ht[j] > best) { best = dist_max_ht[j]; best_idx = dist_argmax_ht[j]; } } ret_data[ret_start + i + 1] = (IdType)(best_idx); } } } template void FarthestPointSampler( NDArray array, int64_t batch_size, int64_t sample_points, NDArray dist, IdArray start_idx, IdArray result) { cudaStream_t stream = runtime::getCurrentCUDAStream(); const FloatType* array_data = static_cast(array->data); const int64_t point_in_batch = array->shape[0] / batch_size; const int64_t dim = array->shape[1]; // return value IdType* ret_data = static_cast(result->data); // distance FloatType* dist_data = static_cast(dist->data); // sample for each cloud in the batch IdType* start_idx_data = static_cast(start_idx->data); CUDA_CALL(cudaSetDevice(array->ctx.device_id)); CUDA_KERNEL_CALL( fps_kernel, batch_size, THREADS, 0, stream, array_data, batch_size, sample_points, point_in_batch, dim, start_idx_data, dist_data, ret_data); } template void FarthestPointSampler( NDArray array, int64_t batch_size, int64_t sample_points, NDArray dist, IdArray start_idx, IdArray result); template void FarthestPointSampler( NDArray array, int64_t batch_size, int64_t sample_points, NDArray dist, IdArray start_idx, IdArray result); template void FarthestPointSampler( NDArray array, int64_t batch_size, int64_t sample_points, NDArray dist, IdArray start_idx, IdArray result); template void FarthestPointSampler( NDArray array, int64_t batch_size, int64_t sample_points, NDArray dist, IdArray start_idx, IdArray result); } // namespace impl } // namespace geometry } // namespace dgl ================================================ FILE: src/geometry/geometry.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file geometry/geometry.cc * @brief DGL geometry utilities implementation */ #include #include #include #include #include "../array/check.h" #include "../c_api_common.h" #include "./geometry_op.h" using namespace dgl::runtime; namespace dgl { namespace geometry { void FarthestPointSampler( NDArray array, int64_t batch_size, int64_t sample_points, NDArray dist, IdArray start_idx, IdArray result) { CHECK_EQ(array->ctx, result->ctx) << "Array and the result should be on the same device."; CHECK_EQ(array->shape[0], dist->shape[0]) << "Shape of array and dist mismatch"; CHECK_EQ(start_idx->shape[0], batch_size) << "Shape of start_idx and batch_size mismatch"; CHECK_EQ(result->shape[0], batch_size * sample_points) << "Invalid shape of result"; ATEN_FLOAT_TYPE_SWITCH(array->dtype, FloatType, "values", { ATEN_ID_TYPE_SWITCH(result->dtype, IdType, { ATEN_XPU_SWITCH_CUDA( array->ctx.device_type, XPU, "FarthestPointSampler", { impl::FarthestPointSampler( array, batch_size, sample_points, dist, start_idx, result); }); }); }); } void NeighborMatching( HeteroGraphPtr graph, const NDArray weight, IdArray result) { if (!aten::IsNullArray(weight)) { ATEN_XPU_SWITCH_CUDA( graph->Context().device_type, XPU, "NeighborMatching", { ATEN_FLOAT_TYPE_SWITCH(weight->dtype, FloatType, "weight", { ATEN_ID_TYPE_SWITCH(graph->DataType(), IdType, { impl::WeightedNeighborMatching( graph->GetCSRMatrix(0), weight, result); }); }); }); } else { ATEN_XPU_SWITCH_CUDA( graph->Context().device_type, XPU, "NeighborMatching", { ATEN_ID_TYPE_SWITCH(graph->DataType(), IdType, { impl::NeighborMatching(graph->GetCSRMatrix(0), result); }); }); } } ///////////////////////// C APIs ///////////////////////// DGL_REGISTER_GLOBAL("geometry._CAPI_FarthestPointSampler") .set_body([](DGLArgs args, DGLRetValue* rv) { const NDArray data = args[0]; const int64_t batch_size = args[1]; const int64_t sample_points = args[2]; NDArray dist = args[3]; IdArray start_idx = args[4]; IdArray result = args[5]; FarthestPointSampler( data, batch_size, sample_points, dist, start_idx, result); }); DGL_REGISTER_GLOBAL("geometry._CAPI_NeighborMatching") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef graph = args[0]; const NDArray weight = args[1]; IdArray result = args[2]; // sanity check aten::CheckCtx( graph->Context(), {weight, result}, {"edge_weight, result"}); aten::CheckContiguous({weight, result}, {"edge_weight", "result"}); CHECK_EQ(graph->NumEdgeTypes(), 1) << "homogeneous graph has only one edge type"; CHECK_EQ(result->ndim, 1) << "result should be an 1D tensor."; auto pair = graph->meta_graph()->FindEdge(0); const dgl_type_t node_type = pair.first; CHECK_EQ(graph->NumVertices(node_type), result->shape[0]) << "The number of nodes should be the same as the length of result " "tensor."; if (!aten::IsNullArray(weight)) { CHECK_EQ(weight->ndim, 1) << "weight should be an 1D tensor."; CHECK_EQ(graph->NumEdges(0), weight->shape[0]) << "number of edges in graph should be the same " << "as the length of edge weight tensor."; } // call implementation NeighborMatching(graph.sptr(), weight, result); }); } // namespace geometry } // namespace dgl ================================================ FILE: src/geometry/geometry_op.h ================================================ /** * Copyright (c) 2019 by Contributors * @file geometry/geometry_op.h * @brief Geometry operator templates */ #ifndef DGL_GEOMETRY_GEOMETRY_OP_H_ #define DGL_GEOMETRY_GEOMETRY_OP_H_ #include namespace dgl { namespace geometry { namespace impl { template void FarthestPointSampler( NDArray array, int64_t batch_size, int64_t sample_points, NDArray dist, IdArray start_idx, IdArray result); /** @brief Implementation of weighted neighbor matching process of edge * coarsening used in Metis and Graclus for homogeneous graph coarsening. This * procedure keeps picking an unmarked vertex and matching it with one its * unmarked neighbors (that maximizes its edge weight) until no match can be * done. */ template void WeightedNeighborMatching( const aten::CSRMatrix &csr, const NDArray weight, IdArray result); /** @brief Implementation of neighbor matching process of edge coarsening used * in Metis and Graclus for homogeneous graph coarsening. This procedure keeps * picking an unmarked vertex and matching it with one its unmarked neighbors * (that maximizes its edge weight) until no match can be done. */ template void NeighborMatching(const aten::CSRMatrix &csr, IdArray result); } // namespace impl } // namespace geometry } // namespace dgl #endif // DGL_GEOMETRY_GEOMETRY_OP_H_ ================================================ FILE: src/graph/creators.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file graph/creators.cc * @brief Functions for constructing graphs. */ #include "./heterograph.h" using namespace dgl::runtime; namespace dgl { // creator implementation HeteroGraphPtr CreateHeteroGraph( GraphPtr meta_graph, const std::vector& rel_graphs, const std::vector& num_nodes_per_type) { return HeteroGraphPtr( new HeteroGraph(meta_graph, rel_graphs, num_nodes_per_type)); } HeteroGraphPtr CreateFromCOO( int64_t num_vtypes, int64_t num_src, int64_t num_dst, IdArray row, IdArray col, bool row_sorted, bool col_sorted, dgl_format_code_t formats) { auto unit_g = UnitGraph::CreateFromCOO( num_vtypes, num_src, num_dst, row, col, row_sorted, col_sorted, formats); return HeteroGraphPtr(new HeteroGraph(unit_g->meta_graph(), {unit_g})); } HeteroGraphPtr CreateFromCOO( int64_t num_vtypes, const aten::COOMatrix& mat, dgl_format_code_t formats) { auto unit_g = UnitGraph::CreateFromCOO(num_vtypes, mat, formats); return HeteroGraphPtr(new HeteroGraph(unit_g->meta_graph(), {unit_g})); } HeteroGraphPtr CreateFromCSR( int64_t num_vtypes, int64_t num_src, int64_t num_dst, IdArray indptr, IdArray indices, IdArray edge_ids, dgl_format_code_t formats) { auto unit_g = UnitGraph::CreateFromCSR( num_vtypes, num_src, num_dst, indptr, indices, edge_ids, formats); return HeteroGraphPtr(new HeteroGraph(unit_g->meta_graph(), {unit_g})); } HeteroGraphPtr CreateFromCSR( int64_t num_vtypes, const aten::CSRMatrix& mat, dgl_format_code_t formats) { auto unit_g = UnitGraph::CreateFromCSR(num_vtypes, mat, formats); auto ret = HeteroGraphPtr(new HeteroGraph(unit_g->meta_graph(), {unit_g})); return HeteroGraphPtr(new HeteroGraph(unit_g->meta_graph(), {unit_g})); } HeteroGraphPtr CreateFromCSC( int64_t num_vtypes, int64_t num_src, int64_t num_dst, IdArray indptr, IdArray indices, IdArray edge_ids, dgl_format_code_t formats) { auto unit_g = UnitGraph::CreateFromCSC( num_vtypes, num_src, num_dst, indptr, indices, edge_ids, formats); return HeteroGraphPtr(new HeteroGraph(unit_g->meta_graph(), {unit_g})); } HeteroGraphPtr CreateFromCSC( int64_t num_vtypes, const aten::CSRMatrix& mat, dgl_format_code_t formats) { auto unit_g = UnitGraph::CreateFromCSC(num_vtypes, mat, formats); return HeteroGraphPtr(new HeteroGraph(unit_g->meta_graph(), {unit_g})); } } // namespace dgl ================================================ FILE: src/graph/gk_ops.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file graph/gk_ops.cc * @brief Graph operation implemented in GKlib */ #if !defined(_WIN32) #include #endif // !defined(_WIN32) #include namespace dgl { #if !defined(_WIN32) /** * Convert DGL CSR to GKLib CSR. * GKLib CSR actually stores a CSR object and a CSC object of a graph. * @param mat the DGL CSR matrix. * @param is_row the input DGL matrix is CSR or CSC. * @return a GKLib CSR. */ gk_csr_t *Convert2GKCsr(const aten::CSRMatrix mat, bool is_row) { // TODO(zhengda) The conversion will be zero-copy in the future. CHECK_EQ(mat.indptr->dtype.bits, sizeof(dgl_id_t) * CHAR_BIT); CHECK_EQ(mat.indices->dtype.bits, sizeof(dgl_id_t) * CHAR_BIT); const dgl_id_t *indptr = static_cast(mat.indptr->data); const dgl_id_t *indices = static_cast(mat.indices->data); gk_csr_t *gk_csr = gk_csr_Create(); gk_csr->nrows = mat.num_rows; gk_csr->ncols = mat.num_cols; uint64_t nnz = mat.indices->shape[0]; auto gk_indptr = gk_csr->rowptr; auto gk_indices = gk_csr->rowind; size_t num_ptrs; if (is_row) { num_ptrs = gk_csr->nrows + 1; gk_indptr = gk_csr->rowptr = gk_zmalloc( gk_csr->nrows + 1, const_cast("gk_csr_ExtractPartition: rowptr")); gk_indices = gk_csr->rowind = gk_imalloc(nnz, const_cast("gk_csr_ExtractPartition: rowind")); } else { num_ptrs = gk_csr->ncols + 1; gk_indptr = gk_csr->colptr = gk_zmalloc( gk_csr->ncols + 1, const_cast("gk_csr_ExtractPartition: colptr")); gk_indices = gk_csr->colind = gk_imalloc(nnz, const_cast("gk_csr_ExtractPartition: colind")); } for (size_t i = 0; i < num_ptrs; i++) { gk_indptr[i] = indptr[i]; } for (size_t i = 0; i < nnz; i++) { gk_indices[i] = indices[i]; } return gk_csr; } /** * Convert GKLib CSR to DGL CSR. * GKLib CSR actually stores a CSR object and a CSC object of a graph. * @param gk_csr the GKLib CSR. * @param is_row specify whether to convert the CSR or CSC object of GKLib CSR. * @return a DGL CSR matrix. */ aten::CSRMatrix Convert2DGLCsr(gk_csr_t *gk_csr, bool is_row) { // TODO(zhengda) The conversion will be zero-copy in the future. size_t num_ptrs; size_t nnz; auto gk_indptr = gk_csr->rowptr; auto gk_indices = gk_csr->rowind; if (is_row) { num_ptrs = gk_csr->nrows + 1; nnz = gk_csr->rowptr[num_ptrs - 1]; gk_indptr = gk_csr->rowptr; gk_indices = gk_csr->rowind; } else { num_ptrs = gk_csr->ncols + 1; nnz = gk_csr->colptr[num_ptrs - 1]; gk_indptr = gk_csr->colptr; gk_indices = gk_csr->colind; } IdArray indptr_arr = aten::NewIdArray(num_ptrs); IdArray indices_arr = aten::NewIdArray(nnz); IdArray eids_arr = aten::NewIdArray(nnz); dgl_id_t *indptr = static_cast(indptr_arr->data); dgl_id_t *indices = static_cast(indices_arr->data); dgl_id_t *eids = static_cast(eids_arr->data); for (size_t i = 0; i < num_ptrs; i++) { indptr[i] = gk_indptr[i]; } for (size_t i = 0; i < nnz; i++) { indices[i] = gk_indices[i]; eids[i] = i; } return aten::CSRMatrix( gk_csr->nrows, gk_csr->ncols, indptr_arr, indices_arr, eids_arr); } #endif // !defined(_WIN32) GraphPtr GraphOp::ToBidirectedSimpleImmutableGraph(ImmutableGraphPtr ig) { #if !defined(_WIN32) // TODO(zhengda) should we get whatever CSR exists in the graph. CSRPtr csr = ig->GetInCSR(); gk_csr_t *gk_csr = Convert2GKCsr(csr->ToCSRMatrix(), true); gk_csr_t *sym_gk_csr = gk_csr_MakeSymmetric(gk_csr, GK_CSR_SYM_SUM); auto mat = Convert2DGLCsr(sym_gk_csr, true); gk_csr_Free(&gk_csr); gk_csr_Free(&sym_gk_csr); // This is a symmetric graph now. The in-csr and out-csr are the same. csr = CSRPtr(new CSR(mat.indptr, mat.indices, mat.data)); return GraphPtr(new ImmutableGraph(csr, csr)); #else return GraphPtr(); #endif // !defined(_WIN32) } } // namespace dgl ================================================ FILE: src/graph/graph.cc ================================================ /** * Copyright (c) 2018 by Contributors * @file graph/graph.cc * @brief DGL graph index implementation */ #include #include #include #include #include #include #include #include "../c_api_common.h" namespace dgl { Graph::Graph(IdArray src_ids, IdArray dst_ids, size_t num_nodes) { CHECK(aten::IsValidIdArray(src_ids)); CHECK(aten::IsValidIdArray(dst_ids)); this->AddVertices(num_nodes); num_edges_ = src_ids->shape[0]; CHECK(static_cast(num_edges_) == dst_ids->shape[0]) << "vectors in COO must have the same length"; const dgl_id_t* src_data = static_cast(src_ids->data); const dgl_id_t* dst_data = static_cast(dst_ids->data); all_edges_src_.reserve(num_edges_); all_edges_dst_.reserve(num_edges_); for (uint64_t i = 0; i < num_edges_; i++) { auto src = src_data[i]; auto dst = dst_data[i]; CHECK(HasVertex(src) && HasVertex(dst)) << "Invalid vertices: src=" << src << " dst=" << dst; adjlist_[src].succ.push_back(dst); adjlist_[src].edge_id.push_back(i); reverse_adjlist_[dst].succ.push_back(src); reverse_adjlist_[dst].edge_id.push_back(i); all_edges_src_.push_back(src); all_edges_dst_.push_back(dst); } } bool Graph::IsMultigraph() const { if (num_edges_ <= 1) { return false; } typedef std::pair Pair; std::vector pairs; pairs.reserve(num_edges_); for (uint64_t eid = 0; eid < num_edges_; ++eid) { pairs.emplace_back(all_edges_src_[eid], all_edges_dst_[eid]); } // sort according to src and dst ids std::sort(pairs.begin(), pairs.end(), [](const Pair& t1, const Pair& t2) { return std::get<0>(t1) < std::get<0>(t2) || (std::get<0>(t1) == std::get<0>(t2) && std::get<1>(t1) < std::get<1>(t2)); }); for (uint64_t eid = 0; eid < num_edges_ - 1; ++eid) { // As src and dst are all sorted, we only need to compare i and i+1 if (std::get<0>(pairs[eid]) == std::get<0>(pairs[eid + 1]) && std::get<1>(pairs[eid]) == std::get<1>(pairs[eid + 1])) return true; } return false; } void Graph::AddVertices(uint64_t num_vertices) { CHECK(!read_only_) << "Graph is read-only. Mutations are not allowed."; adjlist_.resize(adjlist_.size() + num_vertices); reverse_adjlist_.resize(reverse_adjlist_.size() + num_vertices); } void Graph::AddEdge(dgl_id_t src, dgl_id_t dst) { CHECK(!read_only_) << "Graph is read-only. Mutations are not allowed."; CHECK(HasVertex(src) && HasVertex(dst)) << "Invalid vertices: src=" << src << " dst=" << dst; dgl_id_t eid = num_edges_++; adjlist_[src].succ.push_back(dst); adjlist_[src].edge_id.push_back(eid); reverse_adjlist_[dst].succ.push_back(src); reverse_adjlist_[dst].edge_id.push_back(eid); all_edges_src_.push_back(src); all_edges_dst_.push_back(dst); } void Graph::AddEdges(IdArray src_ids, IdArray dst_ids) { CHECK(!read_only_) << "Graph is read-only. Mutations are not allowed."; CHECK(aten::IsValidIdArray(src_ids)) << "Invalid src id array."; CHECK(aten::IsValidIdArray(dst_ids)) << "Invalid dst id array."; const auto srclen = src_ids->shape[0]; const auto dstlen = dst_ids->shape[0]; const int64_t* src_data = static_cast(src_ids->data); const int64_t* dst_data = static_cast(dst_ids->data); if (srclen == 1) { // one-many for (int64_t i = 0; i < dstlen; ++i) { AddEdge(src_data[0], dst_data[i]); } } else if (dstlen == 1) { // many-one for (int64_t i = 0; i < srclen; ++i) { AddEdge(src_data[i], dst_data[0]); } } else { // many-many CHECK(srclen == dstlen) << "Invalid src and dst id array."; for (int64_t i = 0; i < srclen; ++i) { AddEdge(src_data[i], dst_data[i]); } } } BoolArray Graph::HasVertices(IdArray vids) const { CHECK(aten::IsValidIdArray(vids)) << "Invalid vertex id array."; const auto len = vids->shape[0]; BoolArray rst = BoolArray::Empty({len}, vids->dtype, vids->ctx); const int64_t* vid_data = static_cast(vids->data); int64_t* rst_data = static_cast(rst->data); const int64_t nverts = NumVertices(); for (int64_t i = 0; i < len; ++i) { rst_data[i] = (vid_data[i] < nverts && vid_data[i] >= 0) ? 1 : 0; } return rst; } // O(E) bool Graph::HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const { if (!HasVertex(src) || !HasVertex(dst)) return false; const auto& succ = adjlist_[src].succ; return std::find(succ.begin(), succ.end(), dst) != succ.end(); } // O(E*k) pretty slow BoolArray Graph::HasEdgesBetween(IdArray src_ids, IdArray dst_ids) const { CHECK(aten::IsValidIdArray(src_ids)) << "Invalid src id array."; CHECK(aten::IsValidIdArray(dst_ids)) << "Invalid dst id array."; const auto srclen = src_ids->shape[0]; const auto dstlen = dst_ids->shape[0]; const auto rstlen = std::max(srclen, dstlen); BoolArray rst = BoolArray::Empty({rstlen}, src_ids->dtype, src_ids->ctx); int64_t* rst_data = static_cast(rst->data); const int64_t* src_data = static_cast(src_ids->data); const int64_t* dst_data = static_cast(dst_ids->data); if (srclen == 1) { // one-many for (int64_t i = 0; i < dstlen; ++i) { rst_data[i] = HasEdgeBetween(src_data[0], dst_data[i]) ? 1 : 0; } } else if (dstlen == 1) { // many-one for (int64_t i = 0; i < srclen; ++i) { rst_data[i] = HasEdgeBetween(src_data[i], dst_data[0]) ? 1 : 0; } } else { // many-many CHECK(srclen == dstlen) << "Invalid src and dst id array."; for (int64_t i = 0; i < srclen; ++i) { rst_data[i] = HasEdgeBetween(src_data[i], dst_data[i]) ? 1 : 0; } } return rst; } // The data is copy-out; support zero-copy? IdArray Graph::Predecessors(dgl_id_t vid, uint64_t radius) const { CHECK(HasVertex(vid)) << "invalid vertex: " << vid; CHECK(radius >= 1) << "invalid radius: " << radius; std::set vset; for (auto& it : reverse_adjlist_[vid].succ) vset.insert(it); const int64_t len = vset.size(); IdArray rst = IdArray::Empty( {len}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0}); int64_t* rst_data = static_cast(rst->data); std::copy(vset.begin(), vset.end(), rst_data); return rst; } // The data is copy-out; support zero-copy? IdArray Graph::Successors(dgl_id_t vid, uint64_t radius) const { CHECK(HasVertex(vid)) << "invalid vertex: " << vid; CHECK(radius >= 1) << "invalid radius: " << radius; std::set vset; for (auto& it : adjlist_[vid].succ) vset.insert(it); const int64_t len = vset.size(); IdArray rst = IdArray::Empty( {len}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0}); int64_t* rst_data = static_cast(rst->data); std::copy(vset.begin(), vset.end(), rst_data); return rst; } // O(E) IdArray Graph::EdgeId(dgl_id_t src, dgl_id_t dst) const { CHECK(HasVertex(src) && HasVertex(dst)) << "invalid edge: " << src << " -> " << dst; const auto& succ = adjlist_[src].succ; std::vector edgelist; for (size_t i = 0; i < succ.size(); ++i) { if (succ[i] == dst) edgelist.push_back(adjlist_[src].edge_id[i]); } // FIXME: signed? Also it seems that we are using int64_t everywhere... const int64_t len = edgelist.size(); IdArray rst = IdArray::Empty( {len}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0}); // FIXME: signed? int64_t* rst_data = static_cast(rst->data); std::copy(edgelist.begin(), edgelist.end(), rst_data); return rst; } // O(E*k) pretty slow EdgeArray Graph::EdgeIds(IdArray src_ids, IdArray dst_ids) const { CHECK(aten::IsValidIdArray(src_ids)) << "Invalid src id array."; CHECK(aten::IsValidIdArray(dst_ids)) << "Invalid dst id array."; const auto srclen = src_ids->shape[0]; const auto dstlen = dst_ids->shape[0]; int64_t i, j; CHECK((srclen == dstlen) || (srclen == 1) || (dstlen == 1)) << "Invalid src and dst id array."; const int64_t src_stride = (srclen == 1 && dstlen != 1) ? 0 : 1; const int64_t dst_stride = (dstlen == 1 && srclen != 1) ? 0 : 1; const int64_t* src_data = static_cast(src_ids->data); const int64_t* dst_data = static_cast(dst_ids->data); std::vector src, dst, eid; for (i = 0, j = 0; i < srclen && j < dstlen; i += src_stride, j += dst_stride) { const dgl_id_t src_id = src_data[i], dst_id = dst_data[j]; CHECK(HasVertex(src_id) && HasVertex(dst_id)) << "invalid edge: " << src_id << " -> " << dst_id; const auto& succ = adjlist_[src_id].succ; for (size_t k = 0; k < succ.size(); ++k) { if (succ[k] == dst_id) { src.push_back(src_id); dst.push_back(dst_id); eid.push_back(adjlist_[src_id].edge_id[k]); } } } int64_t rstlen = src.size(); IdArray rst_src = IdArray::Empty({rstlen}, src_ids->dtype, src_ids->ctx); IdArray rst_dst = IdArray::Empty({rstlen}, src_ids->dtype, src_ids->ctx); IdArray rst_eid = IdArray::Empty({rstlen}, src_ids->dtype, src_ids->ctx); int64_t* rst_src_data = static_cast(rst_src->data); int64_t* rst_dst_data = static_cast(rst_dst->data); int64_t* rst_eid_data = static_cast(rst_eid->data); std::copy(src.begin(), src.end(), rst_src_data); std::copy(dst.begin(), dst.end(), rst_dst_data); std::copy(eid.begin(), eid.end(), rst_eid_data); return EdgeArray{rst_src, rst_dst, rst_eid}; } EdgeArray Graph::FindEdges(IdArray eids) const { CHECK(aten::IsValidIdArray(eids)) << "Invalid edge id array"; int64_t len = eids->shape[0]; IdArray rst_src = IdArray::Empty({len}, eids->dtype, eids->ctx); IdArray rst_dst = IdArray::Empty({len}, eids->dtype, eids->ctx); IdArray rst_eid = IdArray::Empty({len}, eids->dtype, eids->ctx); int64_t* eid_data = static_cast(eids->data); int64_t* rst_src_data = static_cast(rst_src->data); int64_t* rst_dst_data = static_cast(rst_dst->data); int64_t* rst_eid_data = static_cast(rst_eid->data); for (uint64_t i = 0; i < (uint64_t)len; ++i) { dgl_id_t eid = eid_data[i]; if (eid >= num_edges_) LOG(FATAL) << "invalid edge id:" << eid; rst_src_data[i] = all_edges_src_[eid]; rst_dst_data[i] = all_edges_dst_[eid]; rst_eid_data[i] = eid; } return EdgeArray{rst_src, rst_dst, rst_eid}; } // O(E) EdgeArray Graph::InEdges(dgl_id_t vid) const { CHECK(HasVertex(vid)) << "invalid vertex: " << vid; const int64_t len = reverse_adjlist_[vid].succ.size(); IdArray src = IdArray::Empty( {len}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0}); IdArray dst = IdArray::Empty( {len}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0}); IdArray eid = IdArray::Empty( {len}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0}); int64_t* src_data = static_cast(src->data); int64_t* dst_data = static_cast(dst->data); int64_t* eid_data = static_cast(eid->data); for (int64_t i = 0; i < len; ++i) { src_data[i] = reverse_adjlist_[vid].succ[i]; eid_data[i] = reverse_adjlist_[vid].edge_id[i]; } std::fill(dst_data, dst_data + len, vid); return EdgeArray{src, dst, eid}; } // O(E) EdgeArray Graph::InEdges(IdArray vids) const { CHECK(aten::IsValidIdArray(vids)) << "Invalid vertex id array."; const auto len = vids->shape[0]; const int64_t* vid_data = static_cast(vids->data); int64_t rstlen = 0; for (int64_t i = 0; i < len; ++i) { CHECK(HasVertex(vid_data[i])) << "Invalid vertex: " << vid_data[i]; rstlen += reverse_adjlist_[vid_data[i]].succ.size(); } IdArray src = IdArray::Empty({rstlen}, vids->dtype, vids->ctx); IdArray dst = IdArray::Empty({rstlen}, vids->dtype, vids->ctx); IdArray eid = IdArray::Empty({rstlen}, vids->dtype, vids->ctx); int64_t* src_ptr = static_cast(src->data); int64_t* dst_ptr = static_cast(dst->data); int64_t* eid_ptr = static_cast(eid->data); for (int64_t i = 0; i < len; ++i) { const auto& pred = reverse_adjlist_[vid_data[i]].succ; const auto& eids = reverse_adjlist_[vid_data[i]].edge_id; for (size_t j = 0; j < pred.size(); ++j) { *(src_ptr++) = pred[j]; *(dst_ptr++) = vid_data[i]; *(eid_ptr++) = eids[j]; } } return EdgeArray{src, dst, eid}; } // O(E) EdgeArray Graph::OutEdges(dgl_id_t vid) const { CHECK(HasVertex(vid)) << "invalid vertex: " << vid; const int64_t len = adjlist_[vid].succ.size(); IdArray src = IdArray::Empty( {len}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0}); IdArray dst = IdArray::Empty( {len}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0}); IdArray eid = IdArray::Empty( {len}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0}); int64_t* src_data = static_cast(src->data); int64_t* dst_data = static_cast(dst->data); int64_t* eid_data = static_cast(eid->data); for (int64_t i = 0; i < len; ++i) { dst_data[i] = adjlist_[vid].succ[i]; eid_data[i] = adjlist_[vid].edge_id[i]; } std::fill(src_data, src_data + len, vid); return EdgeArray{src, dst, eid}; } // O(E) EdgeArray Graph::OutEdges(IdArray vids) const { CHECK(aten::IsValidIdArray(vids)) << "Invalid vertex id array."; const auto len = vids->shape[0]; const int64_t* vid_data = static_cast(vids->data); int64_t rstlen = 0; for (int64_t i = 0; i < len; ++i) { CHECK(HasVertex(vid_data[i])) << "Invalid vertex: " << vid_data[i]; rstlen += adjlist_[vid_data[i]].succ.size(); } IdArray src = IdArray::Empty({rstlen}, vids->dtype, vids->ctx); IdArray dst = IdArray::Empty({rstlen}, vids->dtype, vids->ctx); IdArray eid = IdArray::Empty({rstlen}, vids->dtype, vids->ctx); int64_t* src_ptr = static_cast(src->data); int64_t* dst_ptr = static_cast(dst->data); int64_t* eid_ptr = static_cast(eid->data); for (int64_t i = 0; i < len; ++i) { const auto& succ = adjlist_[vid_data[i]].succ; const auto& eids = adjlist_[vid_data[i]].edge_id; for (size_t j = 0; j < succ.size(); ++j) { *(src_ptr++) = vid_data[i]; *(dst_ptr++) = succ[j]; *(eid_ptr++) = eids[j]; } } return EdgeArray{src, dst, eid}; } // O(E*log(E)) if sort is required; otherwise, O(E) EdgeArray Graph::Edges(const std::string& order) const { const int64_t len = num_edges_; IdArray src = IdArray::Empty( {len}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0}); IdArray dst = IdArray::Empty( {len}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0}); IdArray eid = IdArray::Empty( {len}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0}); if (order == "srcdst") { typedef std::tuple Tuple; std::vector tuples; tuples.reserve(len); for (uint64_t eid = 0; eid < num_edges_; ++eid) { tuples.emplace_back(all_edges_src_[eid], all_edges_dst_[eid], eid); } // sort according to src and dst ids std::sort( tuples.begin(), tuples.end(), [](const Tuple& t1, const Tuple& t2) { return std::get<0>(t1) < std::get<0>(t2) || (std::get<0>(t1) == std::get<0>(t2) && std::get<1>(t1) < std::get<1>(t2)); }); // make return arrays int64_t* src_ptr = static_cast(src->data); int64_t* dst_ptr = static_cast(dst->data); int64_t* eid_ptr = static_cast(eid->data); for (size_t i = 0; i < tuples.size(); ++i) { src_ptr[i] = std::get<0>(tuples[i]); dst_ptr[i] = std::get<1>(tuples[i]); eid_ptr[i] = std::get<2>(tuples[i]); } } else { int64_t* src_ptr = static_cast(src->data); int64_t* dst_ptr = static_cast(dst->data); int64_t* eid_ptr = static_cast(eid->data); std::copy(all_edges_src_.begin(), all_edges_src_.end(), src_ptr); std::copy(all_edges_dst_.begin(), all_edges_dst_.end(), dst_ptr); for (uint64_t eid = 0; eid < num_edges_; ++eid) { eid_ptr[eid] = eid; } } return EdgeArray{src, dst, eid}; } // O(V) DegreeArray Graph::InDegrees(IdArray vids) const { CHECK(aten::IsValidIdArray(vids)) << "Invalid vertex id array."; const auto len = vids->shape[0]; const int64_t* vid_data = static_cast(vids->data); DegreeArray rst = DegreeArray::Empty({len}, vids->dtype, vids->ctx); int64_t* rst_data = static_cast(rst->data); for (int64_t i = 0; i < len; ++i) { const auto vid = vid_data[i]; CHECK(HasVertex(vid)) << "Invalid vertex: " << vid; rst_data[i] = reverse_adjlist_[vid].succ.size(); } return rst; } // O(V) DegreeArray Graph::OutDegrees(IdArray vids) const { CHECK(aten::IsValidIdArray(vids)) << "Invalid vertex id array."; const auto len = vids->shape[0]; const int64_t* vid_data = static_cast(vids->data); DegreeArray rst = DegreeArray::Empty({len}, vids->dtype, vids->ctx); int64_t* rst_data = static_cast(rst->data); for (int64_t i = 0; i < len; ++i) { const auto vid = vid_data[i]; CHECK(HasVertex(vid)) << "Invalid vertex: " << vid; rst_data[i] = adjlist_[vid].succ.size(); } return rst; } Subgraph Graph::VertexSubgraph(IdArray vids) const { CHECK(aten::IsValidIdArray(vids)) << "Invalid vertex id array."; const auto len = vids->shape[0]; std::unordered_map oldv2newv; std::vector edges; const int64_t* vid_data = static_cast(vids->data); for (int64_t i = 0; i < len; ++i) { oldv2newv[vid_data[i]] = i; } Subgraph rst; rst.graph = std::make_shared(); rst.induced_vertices = vids; rst.graph->AddVertices(len); for (int64_t i = 0; i < len; ++i) { const dgl_id_t oldvid = vid_data[i]; const dgl_id_t newvid = i; for (size_t j = 0; j < adjlist_[oldvid].succ.size(); ++j) { const dgl_id_t oldsucc = adjlist_[oldvid].succ[j]; if (oldv2newv.count(oldsucc)) { const dgl_id_t newsucc = oldv2newv[oldsucc]; edges.push_back(adjlist_[oldvid].edge_id[j]); rst.graph->AddEdge(newvid, newsucc); } } } rst.induced_edges = IdArray::Empty( {static_cast(edges.size())}, vids->dtype, vids->ctx); std::copy( edges.begin(), edges.end(), static_cast(rst.induced_edges->data)); return rst; } Subgraph Graph::EdgeSubgraph(IdArray eids, bool preserve_nodes) const { CHECK(aten::IsValidIdArray(eids)) << "Invalid edge id array."; const auto len = eids->shape[0]; std::vector nodes; const int64_t* eid_data = static_cast(eids->data); Subgraph rst; if (!preserve_nodes) { std::unordered_map oldv2newv; for (int64_t i = 0; i < len; ++i) { const dgl_id_t src_id = all_edges_src_[eid_data[i]]; const dgl_id_t dst_id = all_edges_dst_[eid_data[i]]; if (oldv2newv.insert(std::make_pair(src_id, oldv2newv.size())).second) nodes.push_back(src_id); if (oldv2newv.insert(std::make_pair(dst_id, oldv2newv.size())).second) nodes.push_back(dst_id); } rst.graph = std::make_shared(); rst.induced_edges = eids; rst.graph->AddVertices(nodes.size()); for (int64_t i = 0; i < len; ++i) { const dgl_id_t src_id = all_edges_src_[eid_data[i]]; const dgl_id_t dst_id = all_edges_dst_[eid_data[i]]; rst.graph->AddEdge(oldv2newv[src_id], oldv2newv[dst_id]); } rst.induced_vertices = IdArray::Empty( {static_cast(nodes.size())}, eids->dtype, eids->ctx); std::copy( nodes.begin(), nodes.end(), static_cast(rst.induced_vertices->data)); } else { rst.graph = std::make_shared(); rst.induced_edges = eids; rst.graph->AddVertices(NumVertices()); for (int64_t i = 0; i < len; ++i) { dgl_id_t src_id = all_edges_src_[eid_data[i]]; dgl_id_t dst_id = all_edges_dst_[eid_data[i]]; rst.graph->AddEdge(src_id, dst_id); } for (uint64_t i = 0; i < NumVertices(); ++i) nodes.push_back(i); rst.induced_vertices = IdArray::Empty( {static_cast(nodes.size())}, eids->dtype, eids->ctx); std::copy( nodes.begin(), nodes.end(), static_cast(rst.induced_vertices->data)); } return rst; } std::vector Graph::GetAdj( bool transpose, const std::string& fmt) const { uint64_t num_edges = NumEdges(); uint64_t num_nodes = NumVertices(); if (fmt == "coo") { IdArray idx = IdArray::Empty( {2 * static_cast(num_edges)}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0}); int64_t* idx_data = static_cast(idx->data); if (transpose) { std::copy(all_edges_src_.begin(), all_edges_src_.end(), idx_data); std::copy( all_edges_dst_.begin(), all_edges_dst_.end(), idx_data + num_edges); } else { std::copy(all_edges_dst_.begin(), all_edges_dst_.end(), idx_data); std::copy( all_edges_src_.begin(), all_edges_src_.end(), idx_data + num_edges); } IdArray eid = IdArray::Empty( {static_cast(num_edges)}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0}); int64_t* eid_data = static_cast(eid->data); for (uint64_t eid = 0; eid < num_edges; ++eid) { eid_data[eid] = eid; } return std::vector{idx, eid}; } else if (fmt == "csr") { IdArray indptr = IdArray::Empty( {static_cast(num_nodes) + 1}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0}); IdArray indices = IdArray::Empty( {static_cast(num_edges)}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0}); IdArray eid = IdArray::Empty( {static_cast(num_edges)}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0}); int64_t* indptr_data = static_cast(indptr->data); int64_t* indices_data = static_cast(indices->data); int64_t* eid_data = static_cast(eid->data); const AdjacencyList* adjlist; if (transpose) { // Out-edges. adjlist = &adjlist_; } else { // In-edges. adjlist = &reverse_adjlist_; } indptr_data[0] = 0; for (size_t i = 0; i < adjlist->size(); i++) { indptr_data[i + 1] = indptr_data[i] + adjlist->at(i).succ.size(); std::copy( adjlist->at(i).succ.begin(), adjlist->at(i).succ.end(), indices_data + indptr_data[i]); std::copy( adjlist->at(i).edge_id.begin(), adjlist->at(i).edge_id.end(), eid_data + indptr_data[i]); } return std::vector{indptr, indices, eid}; } else { LOG(FATAL) << "unsupported format"; return std::vector(); } } } // namespace dgl ================================================ FILE: src/graph/graph_apis.cc ================================================ /** * Copyright (c) 2018 by Contributors * @file graph/graph.cc * @brief DGL graph index APIs */ #include #include #include #include #include #include #include "../c_api_common.h" using dgl::runtime::DGLArgs; using dgl::runtime::DGLArgValue; using dgl::runtime::DGLRetValue; using dgl::runtime::NDArray; using dgl::runtime::PackedFunc; namespace dgl { ///////////////////////////// Graph API /////////////////////////////////// DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCreateMutable") .set_body([](DGLArgs args, DGLRetValue* rv) { *rv = GraphRef(Graph::Create()); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCreate") .set_body([](DGLArgs args, DGLRetValue* rv) { const IdArray src_ids = args[0]; const IdArray dst_ids = args[1]; const int64_t num_nodes = args[2]; const bool readonly = args[3]; if (readonly) { *rv = GraphRef( ImmutableGraph::CreateFromCOO(num_nodes, src_ids, dst_ids)); } else { *rv = GraphRef(Graph::CreateFromCOO(num_nodes, src_ids, dst_ids)); } }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCSRCreate") .set_body([](DGLArgs args, DGLRetValue* rv) { const IdArray indptr = args[0]; const IdArray indices = args[1]; const std::string edge_dir = args[2]; IdArray edge_ids = IdArray::Empty( {indices->shape[0]}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0}); int64_t* edge_data = static_cast(edge_ids->data); for (int64_t i = 0; i < edge_ids->shape[0]; i++) edge_data[i] = i; *rv = GraphRef( ImmutableGraph::CreateFromCSR(indptr, indices, edge_ids, edge_dir)); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCSRCreateMMap") .set_body([](DGLArgs args, DGLRetValue* rv) { const std::string shared_mem_name = args[0]; *rv = GraphRef(ImmutableGraph::CreateFromCSR(shared_mem_name)); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphAddVertices") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; uint64_t num_vertices = args[1]; g->AddVertices(num_vertices); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphAddEdge") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; const dgl_id_t src = args[1]; const dgl_id_t dst = args[2]; g->AddEdge(src, dst); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphAddEdges") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; const IdArray src = args[1]; const IdArray dst = args[2]; g->AddEdges(src, dst); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphClear") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; g->Clear(); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphIsMultigraph") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; *rv = g->IsMultigraph(); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphIsReadonly") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; *rv = g->IsReadonly(); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphNumVertices") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; *rv = static_cast(g->NumVertices()); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphNumEdges") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; *rv = static_cast(g->NumEdges()); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasVertex") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; const dgl_id_t vid = args[1]; *rv = g->HasVertex(vid); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasVertices") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; const IdArray vids = args[1]; *rv = g->HasVertices(vids); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasEdgeBetween") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; const dgl_id_t src = args[1]; const dgl_id_t dst = args[2]; *rv = g->HasEdgeBetween(src, dst); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasEdgesBetween") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; const IdArray src = args[1]; const IdArray dst = args[2]; *rv = g->HasEdgesBetween(src, dst); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphPredecessors") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; const dgl_id_t vid = args[1]; const uint64_t radius = args[2]; *rv = g->Predecessors(vid, radius); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphSuccessors") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; const dgl_id_t vid = args[1]; const uint64_t radius = args[2]; *rv = g->Successors(vid, radius); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdgeId") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; const dgl_id_t src = args[1]; const dgl_id_t dst = args[2]; *rv = g->EdgeId(src, dst); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdgeIds") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; const IdArray src = args[1]; const IdArray dst = args[2]; *rv = ConvertEdgeArrayToPackedFunc(g->EdgeIds(src, dst)); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphFindEdge") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; const dgl_id_t eid = args[1]; const auto& pair = g->FindEdge(eid); *rv = PackedFunc([pair](DGLArgs args, DGLRetValue* rv) { const int choice = args[0]; const int64_t ret = (choice == 0 ? pair.first : pair.second); *rv = ret; }); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphFindEdges") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; const IdArray eids = args[1]; *rv = ConvertEdgeArrayToPackedFunc(g->FindEdges(eids)); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInEdges_1") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; const dgl_id_t vid = args[1]; *rv = ConvertEdgeArrayToPackedFunc(g->InEdges(vid)); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInEdges_2") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; const IdArray vids = args[1]; *rv = ConvertEdgeArrayToPackedFunc(g->InEdges(vids)); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutEdges_1") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; const dgl_id_t vid = args[1]; *rv = ConvertEdgeArrayToPackedFunc(g->OutEdges(vid)); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutEdges_2") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; const IdArray vids = args[1]; *rv = ConvertEdgeArrayToPackedFunc(g->OutEdges(vids)); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdges") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; std::string order = args[1]; *rv = ConvertEdgeArrayToPackedFunc(g->Edges(order)); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInDegree") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; const dgl_id_t vid = args[1]; *rv = static_cast(g->InDegree(vid)); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInDegrees") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; const IdArray vids = args[1]; *rv = g->InDegrees(vids); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutDegree") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; const dgl_id_t vid = args[1]; *rv = static_cast(g->OutDegree(vid)); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutDegrees") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; const IdArray vids = args[1]; *rv = g->OutDegrees(vids); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphVertexSubgraph") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; const IdArray vids = args[1]; std::shared_ptr subg(new Subgraph(g->VertexSubgraph(vids))); *rv = SubgraphRef(subg); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdgeSubgraph") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; const IdArray eids = args[1]; bool preserve_nodes = args[2]; std::shared_ptr subg( new Subgraph(g->EdgeSubgraph(eids, preserve_nodes))); *rv = SubgraphRef(subg); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphGetAdj") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; bool transpose = args[1]; std::string format = args[2]; auto res = g->GetAdj(transpose, format); *rv = ConvertNDArrayVectorToPackedFunc(res); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphContext") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; *rv = g->Context(); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphNumBits") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; *rv = g->NumBits(); }); // Subgraph C APIs DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLSubgraphGetGraph") .set_body([](DGLArgs args, DGLRetValue* rv) { SubgraphRef subg = args[0]; *rv = GraphRef(subg->graph); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLSubgraphGetInducedVertices") .set_body([](DGLArgs args, DGLRetValue* rv) { SubgraphRef subg = args[0]; *rv = subg->induced_vertices; }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLSubgraphGetInducedEdges") .set_body([](DGLArgs args, DGLRetValue* rv) { SubgraphRef subg = args[0]; *rv = subg->induced_edges; }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLSortAdj") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; g->SortCSR(); }); } // namespace dgl ================================================ FILE: src/graph/graph_op.cc ================================================ /** * Copyright (c) 2018 by Contributors * @file graph/graph.cc * @brief Graph operation implementation */ #include #include #include #include #include #include #include #include "../c_api_common.h" using namespace dgl::runtime; namespace dgl { namespace { // generate consecutive dgl ids class RangeIter : public std::iterator { public: explicit RangeIter(dgl_id_t from) : cur_(from) {} RangeIter& operator++() { ++cur_; return *this; } RangeIter operator++(int) { RangeIter retval = *this; ++cur_; return retval; } bool operator==(RangeIter other) const { return cur_ == other.cur_; } bool operator!=(RangeIter other) const { return cur_ != other.cur_; } dgl_id_t operator*() const { return cur_; } private: dgl_id_t cur_; }; bool IsMutable(GraphPtr g) { MutableGraphPtr mg = std::dynamic_pointer_cast(g); return mg != nullptr; } } // namespace GraphPtr GraphOp::Reverse(GraphPtr g) { ImmutableGraphPtr ig = std::dynamic_pointer_cast(g); CHECK(ig) << "Reverse is only supported on immutable graph"; return ig->Reverse(); } GraphPtr GraphOp::LineGraph(GraphPtr g, bool backtracking) { MutableGraphPtr mg = std::dynamic_pointer_cast(g); CHECK(mg) << "Line graph transformation is only supported on mutable graph"; MutableGraphPtr lg = Graph::Create(); lg->AddVertices(g->NumEdges()); for (size_t i = 0; i < mg->all_edges_src_.size(); ++i) { const auto u = mg->all_edges_src_[i]; const auto v = mg->all_edges_dst_[i]; for (size_t j = 0; j < mg->adjlist_[v].succ.size(); ++j) { if (backtracking || (!backtracking && mg->adjlist_[v].succ[j] != u)) { lg->AddEdge(i, mg->adjlist_[v].edge_id[j]); } } } return lg; } GraphPtr GraphOp::DisjointUnion(std::vector graphs) { CHECK_GT(graphs.size(), 0) << "Input graph list is empty"; if (IsMutable(graphs[0])) { // Disjointly union of a list of mutable graph inputs. The result is // also a mutable graph. MutableGraphPtr rst = Graph::Create(); uint64_t cumsum = 0; for (GraphPtr gr : graphs) { MutableGraphPtr mg = std::dynamic_pointer_cast(gr); CHECK(mg) << "All the input graphs should be mutable graphs."; rst->AddVertices(gr->NumVertices()); for (uint64_t i = 0; i < gr->NumEdges(); ++i) { // TODO(minjie): quite ugly to expose internal members rst->AddEdge( mg->all_edges_src_[i] + cumsum, mg->all_edges_dst_[i] + cumsum); } cumsum += gr->NumVertices(); } return rst; } else { // Disjointly union of a list of immutable graph inputs. The result is // also an immutable graph. int64_t num_nodes = 0; int64_t num_edges = 0; for (auto gr : graphs) { num_nodes += gr->NumVertices(); num_edges += gr->NumEdges(); } IdArray indptr_arr = aten::NewIdArray(num_nodes + 1); IdArray indices_arr = aten::NewIdArray(num_edges); IdArray edge_ids_arr = aten::NewIdArray(num_edges); dgl_id_t* indptr = static_cast(indptr_arr->data); dgl_id_t* indices = static_cast(indices_arr->data); dgl_id_t* edge_ids = static_cast(edge_ids_arr->data); indptr[0] = 0; dgl_id_t cum_num_nodes = 0; dgl_id_t cum_num_edges = 0; for (auto g : graphs) { ImmutableGraphPtr gr = std::dynamic_pointer_cast(g); CHECK(gr) << "All the input graphs should be immutable graphs."; // TODO(minjie): why in csr? const CSRPtr g_csrptr = gr->GetInCSR(); const uint64_t g_num_nodes = g_csrptr->NumVertices(); const uint64_t g_num_edges = g_csrptr->NumEdges(); dgl_id_t* g_indptr = static_cast(g_csrptr->indptr()->data); dgl_id_t* g_indices = static_cast(g_csrptr->indices()->data); dgl_id_t* g_edge_ids = static_cast(g_csrptr->edge_ids()->data); for (dgl_id_t i = 1; i < g_num_nodes + 1; ++i) { indptr[cum_num_nodes + i] = g_indptr[i] + cum_num_edges; } for (dgl_id_t i = 0; i < g_num_edges; ++i) { indices[cum_num_edges + i] = g_indices[i] + cum_num_nodes; } for (dgl_id_t i = 0; i < g_num_edges; ++i) { edge_ids[cum_num_edges + i] = g_edge_ids[i] + cum_num_edges; } cum_num_nodes += g_num_nodes; cum_num_edges += g_num_edges; } return ImmutableGraph::CreateFromCSR( indptr_arr, indices_arr, edge_ids_arr, "in"); } } std::vector GraphOp::DisjointPartitionByNum( GraphPtr graph, int64_t num) { CHECK(num != 0 && graph->NumVertices() % num == 0) << "Number of partitions must evenly divide the number of nodes."; IdArray sizes = IdArray::Empty( {num}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0}); int64_t* sizes_data = static_cast(sizes->data); std::fill(sizes_data, sizes_data + num, graph->NumVertices() / num); return DisjointPartitionBySizes(graph, sizes); } std::vector GraphOp::DisjointPartitionBySizes( GraphPtr batched_graph, IdArray sizes) { const int64_t len = sizes->shape[0]; const int64_t* sizes_data = static_cast(sizes->data); std::vector cumsum; cumsum.push_back(0); for (int64_t i = 0; i < len; ++i) { cumsum.push_back(cumsum[i] + sizes_data[i]); } CHECK_EQ(cumsum[len], batched_graph->NumVertices()) << "Sum of the given sizes must equal to the number of nodes."; std::vector rst; if (IsMutable(batched_graph)) { // Input is a mutable graph. Partition it into several mutable graphs. MutableGraphPtr graph = std::dynamic_pointer_cast(batched_graph); dgl_id_t node_offset = 0, edge_offset = 0; for (int64_t i = 0; i < len; ++i) { MutableGraphPtr mg = Graph::Create(); // TODO(minjie): quite ugly to expose internal members // copy adj mg->adjlist_.insert( mg->adjlist_.end(), graph->adjlist_.begin() + node_offset, graph->adjlist_.begin() + node_offset + sizes_data[i]); mg->reverse_adjlist_.insert( mg->reverse_adjlist_.end(), graph->reverse_adjlist_.begin() + node_offset, graph->reverse_adjlist_.begin() + node_offset + sizes_data[i]); // relabel adjs size_t num_edges = 0; for (auto& elist : mg->adjlist_) { for (size_t j = 0; j < elist.succ.size(); ++j) { elist.succ[j] -= node_offset; elist.edge_id[j] -= edge_offset; } num_edges += elist.succ.size(); } for (auto& elist : mg->reverse_adjlist_) { for (size_t j = 0; j < elist.succ.size(); ++j) { elist.succ[j] -= node_offset; elist.edge_id[j] -= edge_offset; } } // copy edges mg->all_edges_src_.reserve(num_edges); mg->all_edges_dst_.reserve(num_edges); mg->num_edges_ = num_edges; for (size_t j = edge_offset; j < edge_offset + num_edges; ++j) { mg->all_edges_src_.push_back(graph->all_edges_src_[j] - node_offset); mg->all_edges_dst_.push_back(graph->all_edges_dst_[j] - node_offset); } // push to rst rst.push_back(mg); // update offset CHECK_EQ(rst[i]->NumVertices(), sizes_data[i]); CHECK_EQ(rst[i]->NumEdges(), num_edges); node_offset += sizes_data[i]; edge_offset += num_edges; } } else { // Input is an immutable graph. Partition it into several multiple graphs. ImmutableGraphPtr graph = std::dynamic_pointer_cast(batched_graph); // TODO(minjie): why in csr? CSRPtr in_csr_ptr = graph->GetInCSR(); const dgl_id_t* indptr = static_cast(in_csr_ptr->indptr()->data); const dgl_id_t* indices = static_cast(in_csr_ptr->indices()->data); const dgl_id_t* edge_ids = static_cast(in_csr_ptr->edge_ids()->data); dgl_id_t cum_sum_edges = 0; for (int64_t i = 0; i < len; ++i) { const int64_t start_pos = cumsum[i]; const int64_t end_pos = cumsum[i + 1]; const int64_t g_num_nodes = sizes_data[i]; const int64_t g_num_edges = indptr[end_pos] - indptr[start_pos]; IdArray indptr_arr = aten::NewIdArray(g_num_nodes + 1); IdArray indices_arr = aten::NewIdArray(g_num_edges); IdArray edge_ids_arr = aten::NewIdArray(g_num_edges); dgl_id_t* g_indptr = static_cast(indptr_arr->data); dgl_id_t* g_indices = static_cast(indices_arr->data); dgl_id_t* g_edge_ids = static_cast(edge_ids_arr->data); const dgl_id_t idoff = indptr[start_pos]; g_indptr[0] = 0; for (int l = start_pos + 1; l < end_pos + 1; ++l) { g_indptr[l - start_pos] = indptr[l] - indptr[start_pos]; } for (dgl_id_t j = indptr[start_pos]; j < indptr[end_pos]; ++j) { g_indices[j - idoff] = indices[j] - cumsum[i]; } for (dgl_id_t k = indptr[start_pos]; k < indptr[end_pos]; ++k) { g_edge_ids[k - idoff] = edge_ids[k] - cum_sum_edges; } cum_sum_edges += g_num_edges; rst.push_back(ImmutableGraph::CreateFromCSR( indptr_arr, indices_arr, edge_ids_arr, "in")); } } return rst; } IdArray GraphOp::MapParentIdToSubgraphId(IdArray parent_vids, IdArray query) { CHECK(aten::IsValidIdArray(parent_vids)) << "Invalid parent id array."; CHECK(aten::IsValidIdArray(query)) << "Invalid query id array."; const auto parent_len = parent_vids->shape[0]; const auto query_len = query->shape[0]; const dgl_id_t* parent_data = static_cast(parent_vids->data); const dgl_id_t* query_data = static_cast(query->data); IdArray rst = IdArray::Empty( {query_len}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0}); dgl_id_t* rst_data = static_cast(rst->data); const bool is_sorted = std::is_sorted(parent_data, parent_data + parent_len); if (is_sorted) { runtime::parallel_for(0, query_len, [&](size_t b, size_t e) { for (auto i = b; i < e; ++i) { const dgl_id_t id = query_data[i]; const auto it = std::find(parent_data, parent_data + parent_len, id); // If the vertex Id doesn't exist, the vid in the subgraph is -1. if (it != parent_data + parent_len) { rst_data[i] = it - parent_data; } else { rst_data[i] = -1; } } }); } else { std::unordered_map parent_map; for (int64_t i = 0; i < parent_len; i++) { const dgl_id_t id = parent_data[i]; parent_map[id] = i; } runtime::parallel_for(0, query_len, [&](size_t b, size_t e) { for (auto i = b; i < e; ++i) { const dgl_id_t id = query_data[i]; auto it = parent_map.find(id); // If the vertex Id doesn't exist, the vid in the subgraph is -1. if (it != parent_map.end()) { rst_data[i] = it->second; } else { rst_data[i] = -1; } } }); } return rst; } IdArray GraphOp::ExpandIds(IdArray ids, IdArray offset) { const auto id_len = ids->shape[0]; const auto off_len = offset->shape[0]; CHECK_EQ(id_len + 1, off_len); const dgl_id_t* id_data = static_cast(ids->data); const dgl_id_t* off_data = static_cast(offset->data); const int64_t len = off_data[off_len - 1]; IdArray rst = IdArray::Empty( {len}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0}); dgl_id_t* rst_data = static_cast(rst->data); for (int64_t i = 0; i < id_len; i++) { const int64_t local_len = off_data[i + 1] - off_data[i]; for (int64_t j = 0; j < local_len; j++) { rst_data[off_data[i] + j] = id_data[i]; } } return rst; } GraphPtr GraphOp::ToSimpleGraph(GraphPtr graph) { std::vector indptr(graph->NumVertices() + 1), indices; indptr[0] = 0; for (dgl_id_t src = 0; src < graph->NumVertices(); ++src) { std::unordered_set hashmap; for (const dgl_id_t dst : graph->SuccVec(src)) { if (!hashmap.count(dst)) { indices.push_back(dst); hashmap.insert(dst); } } indptr[src + 1] = indices.size(); } CSRPtr csr(new CSR( graph->NumVertices(), indices.size(), indptr.begin(), indices.begin(), RangeIter(0))); return std::make_shared(csr); } GraphPtr GraphOp::ToBidirectedMutableGraph(GraphPtr g) { std::unordered_map> n_e; for (dgl_id_t u = 0; u < g->NumVertices(); ++u) { for (const dgl_id_t v : g->SuccVec(u)) { n_e[u][v]++; } } GraphPtr bg = Graph::Create(); bg->AddVertices(g->NumVertices()); for (dgl_id_t u = 0; u < g->NumVertices(); ++u) { for (dgl_id_t v = u; v < g->NumVertices(); ++v) { const auto new_n_e = std::max(n_e[u][v], n_e[v][u]); if (new_n_e > 0) { IdArray us = aten::NewIdArray(new_n_e); dgl_id_t* us_data = static_cast(us->data); std::fill(us_data, us_data + new_n_e, u); if (u == v) { bg->AddEdges(us, us); } else { IdArray vs = aten::NewIdArray(new_n_e); dgl_id_t* vs_data = static_cast(vs->data); std::fill(vs_data, vs_data + new_n_e, v); bg->AddEdges(us, vs); bg->AddEdges(vs, us); } } } } return bg; } GraphPtr GraphOp::ToBidirectedImmutableGraph(GraphPtr g) { std::unordered_map> n_e; for (dgl_id_t u = 0; u < g->NumVertices(); ++u) { for (const dgl_id_t v : g->SuccVec(u)) { n_e[u][v]++; } } std::vector srcs, dsts; for (dgl_id_t u = 0; u < g->NumVertices(); ++u) { std::unordered_set hashmap; std::vector nbrs; for (const dgl_id_t v : g->PredVec(u)) { if (!hashmap.count(v)) { nbrs.push_back(v); hashmap.insert(v); } } for (const dgl_id_t v : g->SuccVec(u)) { if (!hashmap.count(v)) { nbrs.push_back(v); hashmap.insert(v); } } for (const dgl_id_t v : nbrs) { const auto new_n_e = std::max(n_e[u][v], n_e[v][u]); for (int i = 0; i < new_n_e; ++i) { srcs.push_back(v); dsts.push_back(u); } } } IdArray srcs_array = aten::VecToIdArray(srcs); IdArray dsts_array = aten::VecToIdArray(dsts); return ImmutableGraph::CreateFromCOO( g->NumVertices(), srcs_array, dsts_array); } HaloSubgraph GraphOp::GetSubgraphWithHalo( GraphPtr g, IdArray nodes, int num_hops) { const dgl_id_t* nid = static_cast(nodes->data); const auto id_len = nodes->shape[0]; // A map contains all nodes in the subgraph. // The key is the old node Ids, the value indicates whether a node is a inner // node. std::unordered_map all_nodes; // The old Ids of all nodes. We want to preserve the order of the nodes in the // vector. The first few nodes are the inner nodes in the subgraph. std::vector old_node_ids(nid, nid + id_len); std::vector> outer_nodes(num_hops); for (int64_t i = 0; i < id_len; i++) all_nodes[nid[i]] = true; auto orig_nodes = all_nodes; std::vector edge_src, edge_dst, edge_eid; // When we deal with in-edges, we need to do two things: // * find the edges inside the partition and the edges between partitions. // * find the nodes outside the partition that connect the partition. EdgeArray in_edges = g->InEdges(nodes); auto src = in_edges.src; auto dst = in_edges.dst; auto eid = in_edges.id; auto num_edges = eid->shape[0]; const dgl_id_t* src_data = static_cast(src->data); const dgl_id_t* dst_data = static_cast(dst->data); const dgl_id_t* eid_data = static_cast(eid->data); for (int64_t i = 0; i < num_edges; i++) { // We check if the source node is in the original node. auto it1 = orig_nodes.find(src_data[i]); if (it1 != orig_nodes.end() || num_hops > 0) { edge_src.push_back(src_data[i]); edge_dst.push_back(dst_data[i]); edge_eid.push_back(eid_data[i]); } // We need to expand only if the node hasn't been seen before. auto it = all_nodes.find(src_data[i]); if (it == all_nodes.end() && num_hops > 0) { all_nodes[src_data[i]] = false; old_node_ids.push_back(src_data[i]); outer_nodes[0].push_back(src_data[i]); } } // Now we need to traverse the graph with the in-edges to access nodes // and edges more hops away. for (int k = 1; k < num_hops; k++) { const std::vector& nodes = outer_nodes[k - 1]; EdgeArray in_edges = g->InEdges(aten::VecToIdArray(nodes)); auto src = in_edges.src; auto dst = in_edges.dst; auto eid = in_edges.id; auto num_edges = eid->shape[0]; const dgl_id_t* src_data = static_cast(src->data); const dgl_id_t* dst_data = static_cast(dst->data); const dgl_id_t* eid_data = static_cast(eid->data); for (int64_t i = 0; i < num_edges; i++) { edge_src.push_back(src_data[i]); edge_dst.push_back(dst_data[i]); edge_eid.push_back(eid_data[i]); // If we haven't seen this node. auto it = all_nodes.find(src_data[i]); if (it == all_nodes.end()) { all_nodes[src_data[i]] = false; old_node_ids.push_back(src_data[i]); outer_nodes[k].push_back(src_data[i]); } } } // We assign new Ids to the nodes in the subgraph. We ensure that the HALO // nodes are behind the input nodes. std::unordered_map old2new; for (size_t i = 0; i < old_node_ids.size(); i++) { old2new[old_node_ids[i]] = i; } num_edges = edge_src.size(); IdArray new_src = IdArray::Empty( {num_edges}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0}); IdArray new_dst = IdArray::Empty( {num_edges}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0}); dgl_id_t* new_src_data = static_cast(new_src->data); dgl_id_t* new_dst_data = static_cast(new_dst->data); for (size_t i = 0; i < edge_src.size(); i++) { new_src_data[i] = old2new[edge_src[i]]; new_dst_data[i] = old2new[edge_dst[i]]; } std::vector inner_nodes(old_node_ids.size()); for (size_t i = 0; i < old_node_ids.size(); i++) { dgl_id_t old_nid = old_node_ids[i]; inner_nodes[i] = all_nodes[old_nid]; } GraphPtr subg = ImmutableGraph::CreateFromCOO(old_node_ids.size(), new_src, new_dst); HaloSubgraph halo_subg; halo_subg.graph = subg; halo_subg.induced_vertices = aten::VecToIdArray(old_node_ids); halo_subg.induced_edges = aten::VecToIdArray(edge_eid); // TODO(zhengda) we need to switch to 8 bytes afterwards. halo_subg.inner_nodes = aten::VecToIdArray(inner_nodes, 32); return halo_subg; } GraphPtr GraphOp::ReorderImmutableGraph( ImmutableGraphPtr ig, IdArray new_order) { CSRPtr in_csr, out_csr; COOPtr coo; // We only need to reorder one of the graph structure. if (ig->HasInCSR()) { in_csr = ig->GetInCSR(); auto csrmat = in_csr->ToCSRMatrix(); auto new_csrmat = aten::CSRReorder(csrmat, new_order, new_order); in_csr = CSRPtr(new CSR(new_csrmat.indptr, new_csrmat.indices, new_csrmat.data)); } else if (ig->HasOutCSR()) { out_csr = ig->GetOutCSR(); auto csrmat = out_csr->ToCSRMatrix(); auto new_csrmat = aten::CSRReorder(csrmat, new_order, new_order); out_csr = CSRPtr(new CSR(new_csrmat.indptr, new_csrmat.indices, new_csrmat.data)); } else { coo = ig->GetCOO(); auto coomat = coo->ToCOOMatrix(); auto new_coomat = aten::COOReorder(coomat, new_order, new_order); coo = COOPtr(new COO(ig->NumVertices(), new_coomat.row, new_coomat.col)); } if (in_csr || out_csr) return GraphPtr(new ImmutableGraph(in_csr, out_csr)); else return GraphPtr(new ImmutableGraph(coo)); } DGL_REGISTER_GLOBAL("transform._CAPI_DGLPartitionWithHalo") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef graph = args[0]; IdArray node_parts = args[1]; int num_hops = args[2]; const dgl_id_t* part_data = static_cast(node_parts->data); int64_t num_nodes = node_parts->shape[0]; std::unordered_map> part_map; for (int64_t i = 0; i < num_nodes; i++) { dgl_id_t part_id = part_data[i]; auto it = part_map.find(part_id); if (it == part_map.end()) { std::vector vec; vec.push_back(i); part_map[part_id] = vec; } else { it->second.push_back(i); } } std::vector part_ids; std::vector> part_nodes; int max_part_id = 0; for (auto it = part_map.begin(); it != part_map.end(); it++) { max_part_id = std::max(it->first, max_part_id); part_ids.push_back(it->first); part_nodes.push_back(it->second); } auto graph_ptr = std::dynamic_pointer_cast(graph.sptr()); CHECK(graph_ptr) << "The input graph has to be an immutable graph"; // When we construct subgraphs, we only access in-edges. // We need to make sure the in-CSR exists. Otherwise, we'll // try to construct in-CSR in openmp for loop, which will lead // to some unexpected results. graph_ptr->GetInCSR(); std::vector> subgs(max_part_id + 1); int num_partitions = part_nodes.size(); runtime::parallel_for(0, num_partitions, [&](size_t b, size_t e) { for (auto i = b; i < e; ++i) { auto nodes = aten::VecToIdArray(part_nodes[i]); HaloSubgraph subg = GraphOp::GetSubgraphWithHalo(graph_ptr, nodes, num_hops); std::shared_ptr subg_ptr(new HaloSubgraph(subg)); int part_id = part_ids[i]; subgs[part_id] = subg_ptr; } }); List ret_list; for (size_t i = 0; i < subgs.size(); i++) { ret_list.push_back(SubgraphRef(subgs[i])); } *rv = ret_list; }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGetSubgraphWithHalo") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef graph = args[0]; IdArray nodes = args[1]; int num_hops = args[2]; HaloSubgraph subg = GraphOp::GetSubgraphWithHalo(graph.sptr(), nodes, num_hops); std::shared_ptr subg_ptr(new HaloSubgraph(subg)); *rv = SubgraphRef(subg_ptr); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_GetHaloSubgraphInnerNodes") .set_body([](DGLArgs args, DGLRetValue* rv) { SubgraphRef g = args[0]; auto gptr = std::dynamic_pointer_cast(g.sptr()); CHECK(gptr) << "The input graph has to be immutable graph"; *rv = gptr->inner_nodes; }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointUnion") .set_body([](DGLArgs args, DGLRetValue* rv) { List graphs = args[0]; std::vector ptrs(graphs.size()); for (size_t i = 0; i < graphs.size(); ++i) { ptrs[i] = graphs[i].sptr(); } *rv = GraphOp::DisjointUnion(ptrs); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointPartitionByNum") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; int64_t num = args[1]; const auto& ret = GraphOp::DisjointPartitionByNum(g.sptr(), num); List ret_list; for (GraphPtr gp : ret) { ret_list.push_back(GraphRef(gp)); } *rv = ret_list; }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointPartitionBySizes") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; const IdArray sizes = args[1]; const auto& ret = GraphOp::DisjointPartitionBySizes(g.sptr(), sizes); List ret_list; for (GraphPtr gp : ret) { ret_list.push_back(GraphRef(gp)); } *rv = ret_list; }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphLineGraph") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; bool backtracking = args[1]; *rv = GraphOp::LineGraph(g.sptr(), backtracking); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLToImmutable") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; *rv = ImmutableGraph::ToImmutable(g.sptr()); }); DGL_REGISTER_GLOBAL("transform._CAPI_DGLToSimpleGraph") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; *rv = GraphOp::ToSimpleGraph(g.sptr()); }); DGL_REGISTER_GLOBAL("transform._CAPI_DGLToBidirectedMutableGraph") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; *rv = GraphOp::ToBidirectedMutableGraph(g.sptr()); }); DGL_REGISTER_GLOBAL("transform._CAPI_DGLReorderGraph") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; const IdArray new_order = args[1]; auto gptr = std::dynamic_pointer_cast(g.sptr()); CHECK(gptr) << "The input graph has to be immutable graph"; *rv = GraphOp::ReorderImmutableGraph(gptr, new_order); }); DGL_REGISTER_GLOBAL("transform._CAPI_DGLReassignEdges") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef graph = args[0]; bool is_incsr = args[1]; auto gptr = std::dynamic_pointer_cast(graph.sptr()); CHECK(gptr) << "We can only reassign edge Ids on immutable graphs"; CSRPtr csr = is_incsr ? gptr->GetInCSR() : gptr->GetOutCSR(); auto csrmat = csr->ToCSRMatrix(); int64_t num_edges = csrmat.data->shape[0]; IdArray new_data = IdArray::Empty({num_edges}, csrmat.data->dtype, csrmat.data->ctx); // Return the original edge Ids. *rv = new_data; // TODO(zhengda) I need to invalidate out-CSR and COO. // Generate new edge Ids. // TODO(zhengda) after assignment, we actually don't need to store them // physically. ATEN_ID_TYPE_SWITCH(new_data->dtype, IdType, { IdType* typed_new_data = static_cast(new_data->data); IdType* typed_data = static_cast(csrmat.data->data); for (int64_t i = 0; i < num_edges; i++) { typed_new_data[i] = typed_data[i]; typed_data[i] = i; } }); }); DGL_REGISTER_GLOBAL("transform._CAPI_DGLToBidirectedImmutableGraph") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; auto gptr = g.sptr(); auto immutable_g = std::dynamic_pointer_cast(gptr); GraphPtr ret; // For immutable graphs, we can try a faster version. if (immutable_g) { ret = GraphOp::ToBidirectedSimpleImmutableGraph(immutable_g); } // If the above option doesn't work, we call a general implementation. if (!ret) { ret = GraphOp::ToBidirectedImmutableGraph(gptr); } *rv = ret; }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLMapSubgraphNID") .set_body([](DGLArgs args, DGLRetValue* rv) { const IdArray parent_vids = args[0]; const IdArray query = args[1]; *rv = GraphOp::MapParentIdToSubgraphId(parent_vids, query); }); template IdArray MapIds( IdArray ids, IdArray range_starts, IdArray range_ends, IdArray typed_map, int num_parts, int num_types) { int64_t num_ids = ids->shape[0]; int64_t num_ranges = range_starts->shape[0]; IdArray ret = IdArray::Empty({num_ids * 2}, ids->dtype, ids->ctx); const IdType* range_start_data = static_cast(range_starts->data); const IdType* range_end_data = static_cast(range_ends->data); const IdType* ids_data = static_cast(ids->data); const IdType* typed_map_data = static_cast(typed_map->data); IdType* types_data = static_cast(ret->data); IdType* per_type_ids_data = static_cast(ret->data) + num_ids; runtime::parallel_for(0, ids->shape[0], [&](size_t b, size_t e) { for (auto i = b; i < e; ++i) { IdType id = ids_data[i]; auto it = std::lower_bound(range_end_data, range_end_data + num_ranges, id); // The range must exist. BUG_IF_FAIL(it != range_end_data + num_ranges); size_t range_id = it - range_end_data; int type_id = range_id % num_types; types_data[i] = type_id; int part_id = range_id / num_types; BUG_IF_FAIL(part_id < num_parts); if (part_id == 0) { per_type_ids_data[i] = id - range_start_data[range_id]; } else { per_type_ids_data[i] = id - range_start_data[range_id] + typed_map_data[num_parts * type_id + part_id - 1]; } } }); return ret; } DGL_REGISTER_GLOBAL("distributed.id_map._CAPI_DGLHeteroMapIds") .set_body([](DGLArgs args, DGLRetValue* rv) { const IdArray ids = args[0]; const IdArray range_starts = args[1]; const IdArray range_ends = args[2]; const IdArray typed_map = args[3]; int num_parts = args[4]; int num_types = args[5]; int num_ranges = range_starts->shape[0]; CHECK_EQ(range_starts->dtype.bits, ids->dtype.bits); CHECK_EQ(range_ends->dtype.bits, ids->dtype.bits); CHECK_EQ(typed_map->dtype.bits, ids->dtype.bits); CHECK_EQ(num_ranges, num_parts * num_types); CHECK_EQ(num_ranges, range_ends->shape[0]); IdArray ret; ATEN_ID_TYPE_SWITCH(ids->dtype, IdType, { ret = MapIds( ids, range_starts, range_ends, typed_map, num_parts, num_types); }); *rv = ret; }); } // namespace dgl ================================================ FILE: src/graph/graph_traversal.cc ================================================ /** * Copyright (c) 2018 by Contributors * @file graph/traversal.cc * @brief Graph traversal implementation */ #include #include #include "../c_api_common.h" using namespace dgl::runtime; namespace dgl { namespace traverse { DGL_REGISTER_GLOBAL("traversal._CAPI_DGLBFSNodes_v2") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef g = args[0]; const IdArray src = args[1]; bool reversed = args[2]; aten::CSRMatrix csr; if (reversed) { csr = g.sptr()->GetCSCMatrix(0); } else { csr = g.sptr()->GetCSRMatrix(0); } const auto& front = aten::BFSNodesFrontiers(csr, src); *rv = ConvertNDArrayVectorToPackedFunc({front.ids, front.sections}); }); DGL_REGISTER_GLOBAL("traversal._CAPI_DGLBFSEdges_v2") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef g = args[0]; const IdArray src = args[1]; bool reversed = args[2]; aten::CSRMatrix csr; if (reversed) { csr = g.sptr()->GetCSCMatrix(0); } else { csr = g.sptr()->GetCSRMatrix(0); } const auto& front = aten::BFSEdgesFrontiers(csr, src); *rv = ConvertNDArrayVectorToPackedFunc({front.ids, front.sections}); }); DGL_REGISTER_GLOBAL("traversal._CAPI_DGLTopologicalNodes_v2") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef g = args[0]; bool reversed = args[1]; aten::CSRMatrix csr; if (reversed) { csr = g.sptr()->GetCSCMatrix(0); } else { csr = g.sptr()->GetCSRMatrix(0); } const auto& front = aten::TopologicalNodesFrontiers(csr); *rv = ConvertNDArrayVectorToPackedFunc({front.ids, front.sections}); }); DGL_REGISTER_GLOBAL("traversal._CAPI_DGLDFSEdges_v2") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef g = args[0]; const IdArray source = args[1]; const bool reversed = args[2]; CHECK(aten::IsValidIdArray(source)) << "Invalid source node id array."; aten::CSRMatrix csr; if (reversed) { csr = g.sptr()->GetCSCMatrix(0); } else { csr = g.sptr()->GetCSRMatrix(0); } const auto& front = aten::DGLDFSEdges(csr, source); *rv = ConvertNDArrayVectorToPackedFunc({front.ids, front.sections}); }); DGL_REGISTER_GLOBAL("traversal._CAPI_DGLDFSLabeledEdges_v2") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef g = args[0]; const IdArray source = args[1]; const bool reversed = args[2]; const bool has_reverse_edge = args[3]; const bool has_nontree_edge = args[4]; const bool return_labels = args[5]; aten::CSRMatrix csr; if (reversed) { csr = g.sptr()->GetCSCMatrix(0); } else { csr = g.sptr()->GetCSRMatrix(0); } const auto& front = aten::DGLDFSLabeledEdges( csr, source, has_reverse_edge, has_nontree_edge, return_labels); if (return_labels) { *rv = ConvertNDArrayVectorToPackedFunc( {front.ids, front.tags, front.sections}); } else { *rv = ConvertNDArrayVectorToPackedFunc({front.ids, front.sections}); } }); } // namespace traverse } // namespace dgl ================================================ FILE: src/graph/heterograph.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file graph/heterograph.cc * @brief Heterograph implementation */ #include "./heterograph.h" #include #include #include #include #include #include #include #include using namespace dgl::runtime; namespace dgl { namespace { using dgl::ImmutableGraph; HeteroSubgraph EdgeSubgraphPreserveNodes( const HeteroGraph* hg, const std::vector& eids) { CHECK_EQ(eids.size(), hg->NumEdgeTypes()) << "Invalid input: the input list size must be the same as the number of " "edge type."; HeteroSubgraph ret; ret.induced_vertices.resize(hg->NumVertexTypes()); ret.induced_edges = eids; // When preserve_nodes is true, simply compute EdgeSubgraph for each bipartite std::vector subrels(hg->NumEdgeTypes()); for (dgl_type_t etype = 0; etype < hg->NumEdgeTypes(); ++etype) { auto pair = hg->meta_graph()->FindEdge(etype); const dgl_type_t src_vtype = pair.first; const dgl_type_t dst_vtype = pair.second; const auto& rel_vsg = hg->GetRelationGraph(etype)->EdgeSubgraph({eids[etype]}, true); subrels[etype] = rel_vsg.graph; ret.induced_vertices[src_vtype] = rel_vsg.induced_vertices[0]; ret.induced_vertices[dst_vtype] = rel_vsg.induced_vertices[1]; } ret.graph = HeteroGraphPtr( new HeteroGraph(hg->meta_graph(), subrels, hg->NumVerticesPerType())); return ret; } HeteroSubgraph EdgeSubgraphNoPreserveNodes( const HeteroGraph* hg, const std::vector& eids) { // TODO(minjie): In general, all relabeling should be separated with subgraph // operations. CHECK_EQ(eids.size(), hg->NumEdgeTypes()) << "Invalid input: the input list size must be the same as the number of " "edge type."; HeteroSubgraph ret; ret.induced_vertices.resize(hg->NumVertexTypes()); ret.induced_edges = eids; // NOTE(minjie): EdgeSubgraph when preserve_nodes is false is quite // complicated in heterograph. This is because we need to make sure bipartite // graphs that incident on the same vertex type must have the same ID space. // For example, suppose we have following heterograph: // // Meta graph: A -> B -> C // UnitGraph graphs: // * A -> B: (0, 0), (0, 1) // * B -> C: (1, 0), (1, 1) // // Suppose for A->B, we only keep edge (0, 0), while for B->C we only keep (1, // 0). We need to make sure that in the result subgraph, node type B still has // two nodes. This means we cannot simply compute EdgeSubgraph for B->C which // will relabel node#1 of type B to be node #0. // // One implementation is as follows: // (1) For each bipartite graph, slice out the edges using the given eids. // (2) Make a dictionary map>, where the key is the // vertex type // and the value is the incident nodes from the bipartite graphs that has // the vertex type as either srctype or dsttype. // (3) Then for each vertex type, use aten::Relabel_ on its vector. // aten::Relabel_ computes the union of the vertex sets and relabel // the unique elements from zero. The returned mapping array is the final // induced vertex set for that vertex type. // (4) Use the relabeled edges to construct the bipartite graph. // step (1) & (2) std::vector subedges(hg->NumEdgeTypes()); std::vector> vtype2incnodes(hg->NumVertexTypes()); for (dgl_type_t etype = 0; etype < hg->NumEdgeTypes(); ++etype) { auto pair = hg->meta_graph()->FindEdge(etype); const dgl_type_t src_vtype = pair.first; const dgl_type_t dst_vtype = pair.second; auto earray = hg->GetRelationGraph(etype)->FindEdges(0, eids[etype]); vtype2incnodes[src_vtype].push_back(earray.src); vtype2incnodes[dst_vtype].push_back(earray.dst); subedges[etype] = earray; } // step (3) std::vector num_vertices_per_type(hg->NumVertexTypes()); for (dgl_type_t vtype = 0; vtype < hg->NumVertexTypes(); ++vtype) { ret.induced_vertices[vtype] = aten::Relabel_(vtype2incnodes[vtype]); num_vertices_per_type[vtype] = ret.induced_vertices[vtype]->shape[0]; } // step (4) std::vector subrels(hg->NumEdgeTypes()); for (dgl_type_t etype = 0; etype < hg->NumEdgeTypes(); ++etype) { auto pair = hg->meta_graph()->FindEdge(etype); const dgl_type_t src_vtype = pair.first; const dgl_type_t dst_vtype = pair.second; subrels[etype] = UnitGraph::CreateFromCOO( (src_vtype == dst_vtype) ? 1 : 2, ret.induced_vertices[src_vtype]->shape[0], ret.induced_vertices[dst_vtype]->shape[0], subedges[etype].src, subedges[etype].dst); } ret.graph = HeteroGraphPtr(new HeteroGraph( hg->meta_graph(), subrels, std::move(num_vertices_per_type))); return ret; } void HeteroGraphSanityCheck( GraphPtr meta_graph, const std::vector& rel_graphs) { // Sanity check CHECK_EQ(meta_graph->NumEdges(), rel_graphs.size()); CHECK(!rel_graphs.empty()) << "Empty heterograph is not allowed."; // all relation graphs must have only one edge type for (const auto& rg : rel_graphs) { CHECK_EQ(rg->NumEdgeTypes(), 1) << "Each relation graph must have only one edge type."; } auto ctx = rel_graphs[0]->Context(); for (const auto& rg : rel_graphs) { CHECK_EQ(rg->Context(), ctx) << "Each relation graph must have the same context."; } } std::vector InferNumVerticesPerType( GraphPtr meta_graph, const std::vector& rel_graphs) { // create num verts per type std::vector num_verts_per_type(meta_graph->NumVertices(), -1); EdgeArray etype_array = meta_graph->Edges(); dgl_type_t* srctypes = static_cast(etype_array.src->data); dgl_type_t* dsttypes = static_cast(etype_array.dst->data); dgl_type_t* etypes = static_cast(etype_array.id->data); for (size_t i = 0; i < meta_graph->NumEdges(); ++i) { dgl_type_t srctype = srctypes[i]; dgl_type_t dsttype = dsttypes[i]; dgl_type_t etype = etypes[i]; const auto& rg = rel_graphs[etype]; const auto sty = 0; const auto dty = rg->NumVertexTypes() == 1 ? 0 : 1; size_t nv; // # nodes of source type nv = rg->NumVertices(sty); if (num_verts_per_type[srctype] < 0) num_verts_per_type[srctype] = nv; else CHECK_EQ(num_verts_per_type[srctype], nv) << "Mismatch number of vertices for vertex type " << srctype; // # nodes of destination type nv = rg->NumVertices(dty); if (num_verts_per_type[dsttype] < 0) num_verts_per_type[dsttype] = nv; else CHECK_EQ(num_verts_per_type[dsttype], nv) << "Mismatch number of vertices for vertex type " << dsttype; } return num_verts_per_type; } std::vector CastToUnitGraphs( const std::vector& rel_graphs) { std::vector relation_graphs(rel_graphs.size()); for (size_t i = 0; i < rel_graphs.size(); ++i) { HeteroGraphPtr relg = rel_graphs[i]; if (std::dynamic_pointer_cast(relg)) { relation_graphs[i] = std::dynamic_pointer_cast(relg); } else { relation_graphs[i] = CHECK_NOTNULL( std::dynamic_pointer_cast(relg->GetRelationGraph(0))); } } return relation_graphs; } } // namespace HeteroGraph::HeteroGraph( GraphPtr meta_graph, const std::vector& rel_graphs, const std::vector& num_nodes_per_type) : BaseHeteroGraph(meta_graph) { if (num_nodes_per_type.size() == 0) num_verts_per_type_ = InferNumVerticesPerType(meta_graph, rel_graphs); else num_verts_per_type_ = num_nodes_per_type; HeteroGraphSanityCheck(meta_graph, rel_graphs); relation_graphs_ = CastToUnitGraphs(rel_graphs); } bool HeteroGraph::IsMultigraph() const { for (const auto& hg : relation_graphs_) { if (hg->IsMultigraph()) { return true; } } return false; } BoolArray HeteroGraph::HasVertices(dgl_type_t vtype, IdArray vids) const { CHECK(aten::IsValidIdArray(vids)) << "Invalid id array input"; return aten::LT(vids, NumVertices(vtype)); } HeteroSubgraph HeteroGraph::VertexSubgraph( const std::vector& vids) const { CHECK_EQ(vids.size(), NumVertexTypes()) << "Invalid input: the input list size must be the same as the number of " "vertex types."; HeteroSubgraph ret; ret.induced_vertices = vids; std::vector num_vertices_per_type(NumVertexTypes()); for (dgl_type_t vtype = 0; vtype < NumVertexTypes(); ++vtype) num_vertices_per_type[vtype] = vids[vtype]->shape[0]; ret.induced_edges.resize(NumEdgeTypes()); std::vector subrels(NumEdgeTypes()); for (dgl_type_t etype = 0; etype < NumEdgeTypes(); ++etype) { auto pair = meta_graph_->FindEdge(etype); const dgl_type_t src_vtype = pair.first; const dgl_type_t dst_vtype = pair.second; const std::vector rel_vids = (src_vtype == dst_vtype) ? std::vector({vids[src_vtype]}) : std::vector({vids[src_vtype], vids[dst_vtype]}); const auto& rel_vsg = GetRelationGraph(etype)->VertexSubgraph(rel_vids); subrels[etype] = rel_vsg.graph; ret.induced_edges[etype] = rel_vsg.induced_edges[0]; } ret.graph = HeteroGraphPtr( new HeteroGraph(meta_graph_, subrels, std::move(num_vertices_per_type))); return ret; } HeteroSubgraph HeteroGraph::EdgeSubgraph( const std::vector& eids, bool preserve_nodes) const { if (preserve_nodes) { return EdgeSubgraphPreserveNodes(this, eids); } else { return EdgeSubgraphNoPreserveNodes(this, eids); } } HeteroGraphPtr HeteroGraph::AsNumBits(HeteroGraphPtr g, uint8_t bits) { auto hgindex = std::dynamic_pointer_cast(g); CHECK_NOTNULL(hgindex); std::vector rel_graphs; for (auto g : hgindex->relation_graphs_) { rel_graphs.push_back(UnitGraph::AsNumBits(g, bits)); } return HeteroGraphPtr(new HeteroGraph( hgindex->meta_graph_, rel_graphs, hgindex->num_verts_per_type_)); } HeteroGraphPtr HeteroGraph::CopyTo(HeteroGraphPtr g, const DGLContext& ctx) { if (ctx == g->Context()) { return g; } auto hgindex = std::dynamic_pointer_cast(g); CHECK_NOTNULL(hgindex); std::vector rel_graphs; for (auto g : hgindex->relation_graphs_) { rel_graphs.push_back(UnitGraph::CopyTo(g, ctx)); } return HeteroGraphPtr(new HeteroGraph( hgindex->meta_graph_, rel_graphs, hgindex->num_verts_per_type_)); } HeteroGraphPtr HeteroGraph::PinMemory(HeteroGraphPtr g) { auto casted_ptr = std::dynamic_pointer_cast(g); CHECK_NOTNULL(casted_ptr); auto relation_graphs = casted_ptr->relation_graphs_; auto it = std::find_if_not( relation_graphs.begin(), relation_graphs.end(), [](auto& underlying_g) { return underlying_g->IsPinned(); }); // All underlying relation graphs are pinned, return the input hetero-graph // directly. if (it == relation_graphs.end()) return g; std::vector pinned_relation_graphs(relation_graphs.size()); for (size_t i = 0; i < pinned_relation_graphs.size(); ++i) { if (!relation_graphs[i]->IsPinned()) { pinned_relation_graphs[i] = relation_graphs[i]->PinMemory(); } else { pinned_relation_graphs[i] = relation_graphs[i]; } } return HeteroGraphPtr(new HeteroGraph( casted_ptr->meta_graph_, pinned_relation_graphs, casted_ptr->num_verts_per_type_)); } void HeteroGraph::PinMemory_() { for (auto g : relation_graphs_) g->PinMemory_(); } void HeteroGraph::UnpinMemory_() { for (auto g : relation_graphs_) g->UnpinMemory_(); } void HeteroGraph::RecordStream(DGLStreamHandle stream) { for (auto g : relation_graphs_) g->RecordStream(stream); } std::string HeteroGraph::SharedMemName() const { return shared_mem_ ? shared_mem_->GetName() : ""; } HeteroGraphPtr HeteroGraph::CopyToSharedMem( HeteroGraphPtr g, const std::string& name, const std::vector& ntypes, const std::vector& etypes, const std::set& fmts) { // TODO(JJ): Raise error when calling shared_memory if graph index is on gpu auto hg = std::dynamic_pointer_cast(g); CHECK_NOTNULL(hg); if (hg->SharedMemName() == name) return g; // Copy buffer to share memory auto mem = std::make_shared(name); auto mem_buf = mem->CreateNew(SHARED_MEM_METAINFO_SIZE_MAX); dmlc::MemoryFixedSizeStream strm(mem_buf, SHARED_MEM_METAINFO_SIZE_MAX); SharedMemManager shm(name, &strm); bool has_coo = fmts.find("coo") != fmts.end(); bool has_csr = fmts.find("csr") != fmts.end(); bool has_csc = fmts.find("csc") != fmts.end(); shm.Write(g->NumBits()); shm.Write(has_coo); shm.Write(has_csr); shm.Write(has_csc); shm.Write(ImmutableGraph::ToImmutable(hg->meta_graph_)); shm.Write(hg->num_verts_per_type_); std::vector relgraphs(g->NumEdgeTypes()); for (dgl_type_t etype = 0; etype < g->NumEdgeTypes(); ++etype) { auto src_dst_type = g->GetEndpointTypes(etype); int num_vtypes = (src_dst_type.first == src_dst_type.second ? 1 : 2); aten::COOMatrix coo; aten::CSRMatrix csr, csc; std::string prefix = name + "_" + std::to_string(etype); if (has_coo) { coo = shm.CopyToSharedMem(hg->GetCOOMatrix(etype), prefix + "_coo"); } if (has_csr) { csr = shm.CopyToSharedMem(hg->GetCSRMatrix(etype), prefix + "_csr"); } if (has_csc) { csc = shm.CopyToSharedMem(hg->GetCSCMatrix(etype), prefix + "_csc"); } relgraphs[etype] = UnitGraph::CreateUnitGraphFrom( num_vtypes, csc, csr, coo, has_csc, has_csr, has_coo); } auto ret = std::shared_ptr( new HeteroGraph(hg->meta_graph_, relgraphs, hg->num_verts_per_type_)); ret->shared_mem_ = mem; shm.Write(ntypes); shm.Write(etypes); return ret; } std::tuple, std::vector> HeteroGraph::CreateFromSharedMem(const std::string& name) { bool exist = SharedMemory::Exist(name); if (!exist) { return std::make_tuple( nullptr, std::vector(), std::vector()); } auto mem = std::make_shared(name); auto mem_buf = mem->Open(SHARED_MEM_METAINFO_SIZE_MAX); dmlc::MemoryFixedSizeStream strm(mem_buf, SHARED_MEM_METAINFO_SIZE_MAX); SharedMemManager shm(name, &strm); uint8_t nbits; CHECK(shm.Read(&nbits)) << "invalid nbits (unit8_t)"; bool has_coo, has_csr, has_csc; CHECK(shm.Read(&has_coo)) << "invalid nbits (unit8_t)"; CHECK(shm.Read(&has_csr)) << "invalid csr (unit8_t)"; CHECK(shm.Read(&has_csc)) << "invalid csc (unit8_t)"; auto meta_imgraph = Serializer::make_shared(); CHECK(shm.Read(&meta_imgraph)) << "Invalid meta graph"; GraphPtr metagraph = meta_imgraph; std::vector num_verts_per_type; CHECK(shm.Read(&num_verts_per_type)) << "Invalid number of vertices per type"; std::vector relgraphs(metagraph->NumEdges()); for (dgl_type_t etype = 0; etype < metagraph->NumEdges(); ++etype) { auto src_dst = metagraph->FindEdge(etype); int num_vtypes = (src_dst.first == src_dst.second) ? 1 : 2; aten::COOMatrix coo; aten::CSRMatrix csr, csc; std::string prefix = name + "_" + std::to_string(etype); if (has_coo) { shm.CreateFromSharedMem(&coo, prefix + "_coo"); } if (has_csr) { shm.CreateFromSharedMem(&csr, prefix + "_csr"); } if (has_csc) { shm.CreateFromSharedMem(&csc, prefix + "_csc"); } relgraphs[etype] = UnitGraph::CreateUnitGraphFrom( num_vtypes, csc, csr, coo, has_csc, has_csr, has_coo); } auto ret = std::make_shared(metagraph, relgraphs, num_verts_per_type); ret->shared_mem_ = mem; std::vector ntypes; std::vector etypes; CHECK(shm.Read(&ntypes)) << "invalid ntypes"; CHECK(shm.Read(&etypes)) << "invalid etypes"; return std::make_tuple(ret, ntypes, etypes); } HeteroGraphPtr HeteroGraph::GetGraphInFormat(dgl_format_code_t formats) const { std::vector format_rels(NumEdgeTypes()); for (dgl_type_t etype = 0; etype < NumEdgeTypes(); ++etype) { auto relgraph = std::dynamic_pointer_cast(GetRelationGraph(etype)); format_rels[etype] = relgraph->GetGraphInFormat(formats); } return HeteroGraphPtr( new HeteroGraph(meta_graph_, format_rels, NumVerticesPerType())); } FlattenedHeteroGraphPtr HeteroGraph::Flatten( const std::vector& etypes) const { const int64_t bits = NumBits(); if (bits == 32) { return FlattenImpl(etypes); } else { return FlattenImpl(etypes); } } template FlattenedHeteroGraphPtr HeteroGraph::FlattenImpl( const std::vector& etypes) const { std::unordered_map srctype_offsets, dsttype_offsets; size_t src_nodes = 0, dst_nodes = 0; std::vector induced_srctype, induced_dsttype; std::vector induced_srcid, induced_dstid; std::vector srctype_set, dsttype_set; // XXXtype_offsets contain the mapping from node type and number of nodes // after this loop. for (dgl_type_t etype : etypes) { auto src_dsttype = meta_graph_->FindEdge(etype); dgl_type_t srctype = src_dsttype.first; dgl_type_t dsttype = src_dsttype.second; size_t num_srctype_nodes = NumVertices(srctype); size_t num_dsttype_nodes = NumVertices(dsttype); if (srctype_offsets.count(srctype) == 0) { srctype_offsets[srctype] = num_srctype_nodes; srctype_set.push_back(srctype); } if (dsttype_offsets.count(dsttype) == 0) { dsttype_offsets[dsttype] = num_dsttype_nodes; dsttype_set.push_back(dsttype); } } // Sort the node types so that we can compare the sets and decide whether a // homogeneous graph should be returned. std::sort(srctype_set.begin(), srctype_set.end()); std::sort(dsttype_set.begin(), dsttype_set.end()); bool homograph = (srctype_set.size() == dsttype_set.size()) && std::equal(srctype_set.begin(), srctype_set.end(), dsttype_set.begin()); // XXXtype_offsets contain the mapping from node type to node ID offsets after // these two loops. for (size_t i = 0; i < srctype_set.size(); ++i) { dgl_type_t ntype = srctype_set[i]; size_t num_nodes = srctype_offsets[ntype]; srctype_offsets[ntype] = src_nodes; src_nodes += num_nodes; for (size_t j = 0; j < num_nodes; ++j) { induced_srctype.push_back(ntype); induced_srcid.push_back(j); } } for (size_t i = 0; i < dsttype_set.size(); ++i) { dgl_type_t ntype = dsttype_set[i]; size_t num_nodes = dsttype_offsets[ntype]; dsttype_offsets[ntype] = dst_nodes; dst_nodes += num_nodes; for (size_t j = 0; j < num_nodes; ++j) { induced_dsttype.push_back(ntype); induced_dstid.push_back(j); } } // TODO(minjie): Using concat operations cause many fragmented memory. // Need to optimize it in the future. std::vector src_arrs, dst_arrs, eid_arrs, induced_etypes; src_arrs.reserve(etypes.size()); dst_arrs.reserve(etypes.size()); eid_arrs.reserve(etypes.size()); induced_etypes.reserve(etypes.size()); for (dgl_type_t etype : etypes) { auto src_dsttype = meta_graph_->FindEdge(etype); dgl_type_t srctype = src_dsttype.first; dgl_type_t dsttype = src_dsttype.second; size_t srctype_offset = srctype_offsets[srctype]; size_t dsttype_offset = dsttype_offsets[dsttype]; EdgeArray edges = Edges(etype); size_t num_edges = NumEdges(etype); src_arrs.push_back(edges.src + srctype_offset); dst_arrs.push_back(edges.dst + dsttype_offset); eid_arrs.push_back(edges.id); induced_etypes.push_back( aten::Full(etype, num_edges, NumBits(), Context())); } HeteroGraphPtr gptr = UnitGraph::CreateFromCOO( homograph ? 1 : 2, src_nodes, dst_nodes, aten::Concat(src_arrs), aten::Concat(dst_arrs)); // Sanity check CHECK_EQ(gptr->Context(), Context()); CHECK_EQ(gptr->NumBits(), NumBits()); FlattenedHeteroGraph* result = new FlattenedHeteroGraph; result->graph = HeteroGraphRef( HeteroGraphPtr(new HeteroGraph(gptr->meta_graph(), {gptr}))); result->induced_srctype = aten::VecToIdArray(induced_srctype).CopyTo(Context()); result->induced_srctype_set = aten::VecToIdArray(srctype_set).CopyTo(Context()); result->induced_srcid = aten::VecToIdArray(induced_srcid).CopyTo(Context()); result->induced_etype = aten::Concat(induced_etypes); result->induced_etype_set = aten::VecToIdArray(etypes).CopyTo(Context()); result->induced_eid = aten::Concat(eid_arrs); result->induced_dsttype = aten::VecToIdArray(induced_dsttype).CopyTo(Context()); result->induced_dsttype_set = aten::VecToIdArray(dsttype_set).CopyTo(Context()); result->induced_dstid = aten::VecToIdArray(induced_dstid).CopyTo(Context()); return FlattenedHeteroGraphPtr(result); } constexpr uint64_t kDGLSerialize_HeteroGraph = 0xDD589FBE35224ABF; bool HeteroGraph::Load(dmlc::Stream* fs) { uint64_t magicNum; CHECK(fs->Read(&magicNum)) << "Invalid Magic Number"; CHECK_EQ(magicNum, kDGLSerialize_HeteroGraph) << "Invalid HeteroGraph Data"; auto meta_imgraph = Serializer::make_shared(); CHECK(fs->Read(&meta_imgraph)) << "Invalid meta graph"; meta_graph_ = meta_imgraph; CHECK(fs->Read(&relation_graphs_)) << "Invalid relation_graphs_"; CHECK(fs->Read(&num_verts_per_type_)) << "Invalid num_verts_per_type_"; return true; } void HeteroGraph::Save(dmlc::Stream* fs) const { fs->Write(kDGLSerialize_HeteroGraph); auto meta_graph_ptr = ImmutableGraph::ToImmutable(meta_graph()); fs->Write(meta_graph_ptr); fs->Write(relation_graphs_); fs->Write(num_verts_per_type_); } GraphPtr HeteroGraph::AsImmutableGraph() const { CHECK(NumVertexTypes() == 1) << "graph has more than one node types"; CHECK(NumEdgeTypes() == 1) << "graph has more than one edge types"; auto unit_graph = CHECK_NOTNULL(std::dynamic_pointer_cast(GetRelationGraph(0))); return unit_graph->AsImmutableGraph(); } HeteroGraphPtr HeteroGraph::LineGraph(bool backtracking) const { CHECK_EQ(1, meta_graph_->NumEdges()) << "Only support Homogeneous graph now (one edge type)"; CHECK_EQ(1, meta_graph_->NumVertices()) << "Only support Homogeneous graph now (one node type)"; CHECK_EQ(1, relation_graphs_.size()) << "Only support Homogeneous graph now"; UnitGraphPtr ug = relation_graphs_[0]; const auto& ulg = ug->LineGraph(backtracking); std::vector rel_graph = {ulg}; std::vector num_nodes_per_type = { static_cast(ulg->NumVertices(0))}; return HeteroGraphPtr( new HeteroGraph(meta_graph_, rel_graph, std::move(num_nodes_per_type))); } } // namespace dgl ================================================ FILE: src/graph/heterograph.h ================================================ /** * Copyright (c) 2019 by Contributors * @file graph/heterograph.h * @brief Heterograph */ #ifndef DGL_GRAPH_HETEROGRAPH_H_ #define DGL_GRAPH_HETEROGRAPH_H_ #include #include #include #include #include #include #include #include #include #include "./unit_graph.h" #include "shared_mem_manager.h" namespace dgl { /** @brief Heterograph */ class HeteroGraph : public BaseHeteroGraph { public: HeteroGraph( GraphPtr meta_graph, const std::vector& rel_graphs, const std::vector& num_nodes_per_type = {}); HeteroGraphPtr GetRelationGraph(dgl_type_t etype) const override { CHECK_LT(etype, meta_graph_->NumEdges()) << "Invalid edge type: " << etype; return relation_graphs_[etype]; } void AddVertices(dgl_type_t vtype, uint64_t num_vertices) override { LOG(FATAL) << "Bipartite graph is not mutable."; } void AddEdge(dgl_type_t etype, dgl_id_t src, dgl_id_t dst) override { LOG(FATAL) << "Bipartite graph is not mutable."; } void AddEdges(dgl_type_t etype, IdArray src_ids, IdArray dst_ids) override { LOG(FATAL) << "Bipartite graph is not mutable."; } void Clear() override { LOG(FATAL) << "Bipartite graph is not mutable."; } DGLDataType DataType() const override { return relation_graphs_[0]->DataType(); } DGLContext Context() const override { return relation_graphs_[0]->Context(); } bool IsPinned() const override { return relation_graphs_[0]->IsPinned(); } uint8_t NumBits() const override { return relation_graphs_[0]->NumBits(); } bool IsMultigraph() const override; bool IsReadonly() const override { return true; } uint64_t NumVertices(dgl_type_t vtype) const override { CHECK(meta_graph_->HasVertex(vtype)) << "Invalid vertex type: " << vtype; return num_verts_per_type_[vtype]; } inline std::vector NumVerticesPerType() const override { return num_verts_per_type_; } uint64_t NumEdges(dgl_type_t etype) const override { return GetRelationGraph(etype)->NumEdges(0); } bool HasVertex(dgl_type_t vtype, dgl_id_t vid) const override { return vid < NumVertices(vtype); } BoolArray HasVertices(dgl_type_t vtype, IdArray vids) const override; bool HasEdgeBetween( dgl_type_t etype, dgl_id_t src, dgl_id_t dst) const override { return GetRelationGraph(etype)->HasEdgeBetween(0, src, dst); } BoolArray HasEdgesBetween( dgl_type_t etype, IdArray src_ids, IdArray dst_ids) const override { return GetRelationGraph(etype)->HasEdgesBetween(0, src_ids, dst_ids); } IdArray Predecessors(dgl_type_t etype, dgl_id_t dst) const override { return GetRelationGraph(etype)->Predecessors(0, dst); } IdArray Successors(dgl_type_t etype, dgl_id_t src) const override { return GetRelationGraph(etype)->Successors(0, src); } IdArray EdgeId(dgl_type_t etype, dgl_id_t src, dgl_id_t dst) const override { return GetRelationGraph(etype)->EdgeId(0, src, dst); } EdgeArray EdgeIdsAll( dgl_type_t etype, IdArray src, IdArray dst) const override { return GetRelationGraph(etype)->EdgeIdsAll(0, src, dst); } IdArray EdgeIdsOne( dgl_type_t etype, IdArray src, IdArray dst) const override { return GetRelationGraph(etype)->EdgeIdsOne(0, src, dst); } std::pair FindEdge( dgl_type_t etype, dgl_id_t eid) const override { return GetRelationGraph(etype)->FindEdge(0, eid); } EdgeArray FindEdges(dgl_type_t etype, IdArray eids) const override { return GetRelationGraph(etype)->FindEdges(0, eids); } EdgeArray InEdges(dgl_type_t etype, dgl_id_t vid) const override { return GetRelationGraph(etype)->InEdges(0, vid); } EdgeArray InEdges(dgl_type_t etype, IdArray vids) const override { return GetRelationGraph(etype)->InEdges(0, vids); } EdgeArray OutEdges(dgl_type_t etype, dgl_id_t vid) const override { return GetRelationGraph(etype)->OutEdges(0, vid); } EdgeArray OutEdges(dgl_type_t etype, IdArray vids) const override { return GetRelationGraph(etype)->OutEdges(0, vids); } EdgeArray Edges( dgl_type_t etype, const std::string& order = "") const override { return GetRelationGraph(etype)->Edges(0, order); } uint64_t InDegree(dgl_type_t etype, dgl_id_t vid) const override { return GetRelationGraph(etype)->InDegree(0, vid); } DegreeArray InDegrees(dgl_type_t etype, IdArray vids) const override { return GetRelationGraph(etype)->InDegrees(0, vids); } uint64_t OutDegree(dgl_type_t etype, dgl_id_t vid) const override { return GetRelationGraph(etype)->OutDegree(0, vid); } DegreeArray OutDegrees(dgl_type_t etype, IdArray vids) const override { return GetRelationGraph(etype)->OutDegrees(0, vids); } DGLIdIters SuccVec(dgl_type_t etype, dgl_id_t vid) const override { return GetRelationGraph(etype)->SuccVec(0, vid); } DGLIdIters OutEdgeVec(dgl_type_t etype, dgl_id_t vid) const override { return GetRelationGraph(etype)->OutEdgeVec(0, vid); } DGLIdIters PredVec(dgl_type_t etype, dgl_id_t vid) const override { return GetRelationGraph(etype)->PredVec(0, vid); } DGLIdIters InEdgeVec(dgl_type_t etype, dgl_id_t vid) const override { return GetRelationGraph(etype)->InEdgeVec(0, vid); } std::vector GetAdj( dgl_type_t etype, bool transpose, const std::string& fmt) const override { return GetRelationGraph(etype)->GetAdj(0, transpose, fmt); } aten::COOMatrix GetCOOMatrix(dgl_type_t etype) const override { return GetRelationGraph(etype)->GetCOOMatrix(0); } aten::CSRMatrix GetCSCMatrix(dgl_type_t etype) const override { return GetRelationGraph(etype)->GetCSCMatrix(0); } aten::CSRMatrix GetCSRMatrix(dgl_type_t etype) const override { return GetRelationGraph(etype)->GetCSRMatrix(0); } SparseFormat SelectFormat( dgl_type_t etype, dgl_format_code_t preferred_formats) const override { return GetRelationGraph(etype)->SelectFormat(0, preferred_formats); } dgl_format_code_t GetAllowedFormats() const override { return GetRelationGraph(0)->GetAllowedFormats(); } dgl_format_code_t GetCreatedFormats() const override { return GetRelationGraph(0)->GetCreatedFormats(); } HeteroSubgraph VertexSubgraph( const std::vector& vids) const override; HeteroSubgraph EdgeSubgraph( const std::vector& eids, bool preserve_nodes = false) const override; HeteroGraphPtr GetGraphInFormat(dgl_format_code_t formats) const override; FlattenedHeteroGraphPtr Flatten( const std::vector& etypes) const override; GraphPtr AsImmutableGraph() const override; /** @return Load HeteroGraph from stream, using CSRMatrix*/ bool Load(dmlc::Stream* fs); /** @return Save HeteroGraph to stream, using CSRMatrix */ void Save(dmlc::Stream* fs) const; /** @brief Convert the graph to use the given number of bits for storage */ static HeteroGraphPtr AsNumBits(HeteroGraphPtr g, uint8_t bits); /** @brief Copy the data to another context */ static HeteroGraphPtr CopyTo(HeteroGraphPtr g, const DGLContext& ctx); /** * @brief Pin all relation graphs of the current graph. * @note The graph will be pinned inplace. Behavior depends on the current * context, kDGLCPU: will be pinned; IsPinned: directly return; kDGLCUDA: * invalid, will throw an error. The context check is deferred to pinning the * NDArray. */ void PinMemory_() override; /** * @brief Unpin all relation graphs of the current graph. * @note The graph will be unpinned inplace. Behavior depends on the current * context, IsPinned: will be unpinned; others: directly return. The context * check is deferred to unpinning the NDArray. */ void UnpinMemory_(); /** * @brief Copy the current graph to pinned memory managed by * PyTorch CachingHostAllocator for each relation graph. * @note If any of the underlying relation graphs are already pinned, the * function will utilize their existing copies. If all of them are * pinned, the function will return the original input hetero-graph * directly. */ static HeteroGraphPtr PinMemory(HeteroGraphPtr g); /** * @brief Record stream for this graph. * @param stream The stream that is using the graph */ void RecordStream(DGLStreamHandle stream) override; /** * @brief Copy the data to shared memory. * * Also save names of node types and edge types of the HeteroGraph object to * shared memory */ static HeteroGraphPtr CopyToSharedMem( HeteroGraphPtr g, const std::string& name, const std::vector& ntypes, const std::vector& etypes, const std::set& fmts); /** * @brief Create a heterograph from * * @return the HeteroGraphPtr, names of node types, names of edge types */ static std::tuple< HeteroGraphPtr, std::vector, std::vector> CreateFromSharedMem(const std::string& name); /** @brief Creat a LineGraph of self */ HeteroGraphPtr LineGraph(bool backtracking) const; const std::vector& relation_graphs() const { return relation_graphs_; } private: // To create empty class friend class Serializer; // Empty Constructor, only for serializer HeteroGraph() : BaseHeteroGraph() {} /** @brief A map from edge type to unit graph */ std::vector relation_graphs_; /** @brief A map from vert type to the number of verts in the type */ std::vector num_verts_per_type_; /** @brief The shared memory object for meta info*/ std::shared_ptr shared_mem_; /** * @brief The name of the shared memory. Return empty string if it is not in * shared memory. */ std::string SharedMemName() const; /** * @brief template class for Flatten operation * * @tparam IdType Graph's index data type, can be int32_t or int64_t * @param etypes vector of etypes to be falttened * @return pointer of FlattenedHeteroGraphh */ template FlattenedHeteroGraphPtr FlattenImpl( const std::vector& etypes) const; }; } // namespace dgl namespace dmlc { DMLC_DECLARE_TRAITS(has_saveload, dgl::HeteroGraph, true); } // namespace dmlc #endif // DGL_GRAPH_HETEROGRAPH_H_ ================================================ FILE: src/graph/heterograph_capi.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file graph/heterograph_capi.cc * @brief Heterograph CAPI bindings. */ #include #include #include #include #include #include #include #include #include "../c_api_common.h" #include "./heterograph.h" #include "unit_graph.h" using namespace dgl::runtime; namespace dgl { ///////////////////////// Unitgraph functions ///////////////////////// // XXX(minjie): Ideally, Unitgraph should be invisible to python side DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroCreateUnitGraphFromCOO") .set_body([](DGLArgs args, DGLRetValue* rv) { int64_t nvtypes = args[0]; int64_t num_src = args[1]; int64_t num_dst = args[2]; IdArray row = args[3]; IdArray col = args[4]; List formats = args[5]; bool row_sorted = args[6]; bool col_sorted = args[7]; std::vector formats_vec; for (Value val : formats) { std::string fmt = val->data; formats_vec.push_back(ParseSparseFormat(fmt)); } const auto code = SparseFormatsToCode(formats_vec); auto hgptr = CreateFromCOO( nvtypes, num_src, num_dst, row, col, row_sorted, col_sorted, code); *rv = HeteroGraphRef(hgptr); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroCreateUnitGraphFromCSR") .set_body([](DGLArgs args, DGLRetValue* rv) { int64_t nvtypes = args[0]; int64_t num_src = args[1]; int64_t num_dst = args[2]; IdArray indptr = args[3]; IdArray indices = args[4]; IdArray edge_ids = args[5]; List formats = args[6]; bool transpose = args[7]; std::vector formats_vec; for (Value val : formats) { std::string fmt = val->data; formats_vec.push_back(ParseSparseFormat(fmt)); } const auto code = SparseFormatsToCode(formats_vec); if (!transpose) { auto hgptr = CreateFromCSR( nvtypes, num_src, num_dst, indptr, indices, edge_ids, code); *rv = HeteroGraphRef(hgptr); } else { auto hgptr = CreateFromCSC( nvtypes, num_src, num_dst, indptr, indices, edge_ids, code); *rv = HeteroGraphRef(hgptr); } }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroCreateHeteroGraph") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef meta_graph = args[0]; List rel_graphs = args[1]; std::vector rel_ptrs; rel_ptrs.reserve(rel_graphs.size()); for (const auto& ref : rel_graphs) { rel_ptrs.push_back(ref.sptr()); } auto hgptr = CreateHeteroGraph(meta_graph.sptr(), rel_ptrs); *rv = HeteroGraphRef(hgptr); }); DGL_REGISTER_GLOBAL( "heterograph_index._CAPI_DGLHeteroCreateHeteroGraphWithNumNodes") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef meta_graph = args[0]; List rel_graphs = args[1]; IdArray num_nodes_per_type = args[2]; std::vector rel_ptrs; rel_ptrs.reserve(rel_graphs.size()); for (const auto& ref : rel_graphs) { rel_ptrs.push_back(ref.sptr()); } auto hgptr = CreateHeteroGraph( meta_graph.sptr(), rel_ptrs, num_nodes_per_type.ToVector()); *rv = HeteroGraphRef(hgptr); }); ///////////////////////// HeteroGraph member functions ///////////////////////// DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroGetMetaGraph") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; *rv = hg->meta_graph(); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroIsMetaGraphUniBipartite") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; GraphPtr mg = hg->meta_graph(); *rv = mg->IsUniBipartite(); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroGetRelationGraph") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; dgl_type_t etype = args[1]; CHECK_LE(etype, hg->NumEdgeTypes()) << "invalid edge type " << etype; auto unit_graph = hg->GetRelationGraph(etype); auto meta_graph = unit_graph->meta_graph(); auto hgptr = CreateHeteroGraph( meta_graph, {unit_graph}, unit_graph->NumVerticesPerType()); *rv = HeteroGraphRef(hgptr); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroGetFlattenedGraph") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; List etypes = args[1]; std::vector etypes_vec; for (Value val : etypes) { // (gq) have to decompose it into two statements because of a weird MSVC // internal error dgl_id_t id = val->data; etypes_vec.push_back(id); } *rv = FlattenedHeteroGraphRef(hg->Flatten(etypes_vec)); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroAddVertices") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; dgl_type_t vtype = args[1]; int64_t num = args[2]; hg->AddVertices(vtype, num); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroAddEdge") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; dgl_type_t etype = args[1]; dgl_id_t src = args[2]; dgl_id_t dst = args[3]; hg->AddEdge(etype, src, dst); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroAddEdges") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; dgl_type_t etype = args[1]; IdArray src = args[2]; IdArray dst = args[3]; hg->AddEdges(etype, src, dst); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroClear") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; hg->Clear(); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroDataType") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; *rv = hg->DataType(); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroContext") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; *rv = hg->Context(); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroIsPinned") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; *rv = hg->IsPinned(); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroNumBits") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; *rv = hg->NumBits(); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroIsMultigraph") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; *rv = hg->IsMultigraph(); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroIsReadonly") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; *rv = hg->IsReadonly(); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroNumVertices") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; dgl_type_t vtype = args[1]; *rv = static_cast(hg->NumVertices(vtype)); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroNumEdges") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; dgl_type_t etype = args[1]; *rv = static_cast(hg->NumEdges(etype)); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroHasVertex") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; dgl_type_t vtype = args[1]; dgl_id_t vid = args[2]; *rv = hg->HasVertex(vtype, vid); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroHasVertices") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; dgl_type_t vtype = args[1]; IdArray vids = args[2]; *rv = hg->HasVertices(vtype, vids); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroHasEdgeBetween") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; dgl_type_t etype = args[1]; dgl_id_t src = args[2]; dgl_id_t dst = args[3]; *rv = hg->HasEdgeBetween(etype, src, dst); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroHasEdgesBetween") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; dgl_type_t etype = args[1]; IdArray src = args[2]; IdArray dst = args[3]; *rv = hg->HasEdgesBetween(etype, src, dst); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroPredecessors") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; dgl_type_t etype = args[1]; dgl_id_t dst = args[2]; *rv = hg->Predecessors(etype, dst); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroSuccessors") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; dgl_type_t etype = args[1]; dgl_id_t src = args[2]; *rv = hg->Successors(etype, src); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroEdgeId") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; dgl_type_t etype = args[1]; dgl_id_t src = args[2]; dgl_id_t dst = args[3]; *rv = hg->EdgeId(etype, src, dst); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroEdgeIdsAll") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; dgl_type_t etype = args[1]; IdArray src = args[2]; IdArray dst = args[3]; const auto& ret = hg->EdgeIdsAll(etype, src, dst); *rv = ConvertEdgeArrayToPackedFunc(ret); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroEdgeIdsOne") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; dgl_type_t etype = args[1]; IdArray src = args[2]; IdArray dst = args[3]; *rv = hg->EdgeIdsOne(etype, src, dst); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroFindEdges") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; dgl_type_t etype = args[1]; IdArray eids = args[2]; const auto& ret = hg->FindEdges(etype, eids); *rv = ConvertEdgeArrayToPackedFunc(ret); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroInEdges_1") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; dgl_type_t etype = args[1]; dgl_id_t vid = args[2]; const auto& ret = hg->InEdges(etype, vid); *rv = ConvertEdgeArrayToPackedFunc(ret); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroInEdges_2") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; dgl_type_t etype = args[1]; IdArray vids = args[2]; const auto& ret = hg->InEdges(etype, vids); *rv = ConvertEdgeArrayToPackedFunc(ret); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroOutEdges_1") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; dgl_type_t etype = args[1]; dgl_id_t vid = args[2]; const auto& ret = hg->OutEdges(etype, vid); *rv = ConvertEdgeArrayToPackedFunc(ret); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroOutEdges_2") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; dgl_type_t etype = args[1]; IdArray vids = args[2]; const auto& ret = hg->OutEdges(etype, vids); *rv = ConvertEdgeArrayToPackedFunc(ret); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroEdges") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; dgl_type_t etype = args[1]; std::string order = args[2]; const auto& ret = hg->Edges(etype, order); *rv = ConvertEdgeArrayToPackedFunc(ret); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroInDegree") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; dgl_type_t etype = args[1]; dgl_id_t vid = args[2]; *rv = static_cast(hg->InDegree(etype, vid)); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroInDegrees") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; dgl_type_t etype = args[1]; IdArray vids = args[2]; *rv = hg->InDegrees(etype, vids); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroOutDegree") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; dgl_type_t etype = args[1]; dgl_id_t vid = args[2]; *rv = static_cast(hg->OutDegree(etype, vid)); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroOutDegrees") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; dgl_type_t etype = args[1]; IdArray vids = args[2]; *rv = hg->OutDegrees(etype, vids); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroGetAdj") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; dgl_type_t etype = args[1]; bool transpose = args[2]; std::string fmt = args[3]; *rv = ConvertNDArrayVectorToPackedFunc(hg->GetAdj(etype, transpose, fmt)); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroVertexSubgraph") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; List vids = args[1]; std::vector vid_vec; vid_vec.reserve(vids.size()); for (Value val : vids) { vid_vec.push_back(val->data); } std::shared_ptr subg( new HeteroSubgraph(hg->VertexSubgraph(vid_vec))); *rv = HeteroSubgraphRef(subg); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroEdgeSubgraph") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; List eids = args[1]; bool preserve_nodes = args[2]; std::vector eid_vec; eid_vec.reserve(eids.size()); for (Value val : eids) { eid_vec.push_back(val->data); } std::shared_ptr subg( new HeteroSubgraph(hg->EdgeSubgraph(eid_vec, preserve_nodes))); *rv = HeteroSubgraphRef(subg); }); ///////////////////////// HeteroSubgraph members ///////////////////////// DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroSubgraphGetGraph") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroSubgraphRef subg = args[0]; *rv = HeteroGraphRef(subg->graph); }); DGL_REGISTER_GLOBAL( "heterograph_index._CAPI_DGLHeteroSubgraphGetInducedVertices") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroSubgraphRef subg = args[0]; List induced_verts; for (IdArray arr : subg->induced_vertices) { induced_verts.push_back(Value(MakeValue(arr))); } *rv = induced_verts; }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroSubgraphGetInducedEdges") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroSubgraphRef subg = args[0]; List induced_edges; for (IdArray arr : subg->induced_edges) { induced_edges.push_back(Value(MakeValue(arr))); } *rv = induced_edges; }); ///////////////////////// Global functions and algorithms //////////////////////////// DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroAsNumBits") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; int bits = args[1]; HeteroGraphPtr bhg_ptr = hg.sptr(); auto hg_ptr = std::dynamic_pointer_cast(bhg_ptr); HeteroGraphPtr hg_new; if (hg_ptr) { hg_new = HeteroGraph::AsNumBits(hg_ptr, bits); } else { hg_new = UnitGraph::AsNumBits(bhg_ptr, bits); } *rv = HeteroGraphRef(hg_new); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroCopyTo") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; int device_type = args[1]; int device_id = args[2]; DGLContext ctx; ctx.device_type = static_cast(device_type); ctx.device_id = device_id; HeteroGraphPtr hg_new = HeteroGraph::CopyTo(hg.sptr(), ctx); *rv = HeteroGraphRef(hg_new); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroPinMemory") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; HeteroGraphPtr hg_new = HeteroGraph::PinMemory(hg.sptr()); *rv = HeteroGraphRef(hg_new); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroPinMemory_") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; auto hgindex = std::dynamic_pointer_cast(hg.sptr()); hgindex->PinMemory_(); *rv = hg; }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroUnpinMemory_") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; auto hgindex = std::dynamic_pointer_cast(hg.sptr()); hgindex->UnpinMemory_(); *rv = hg; }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroRecordStream") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; DGLStreamHandle stream = args[1]; auto hgindex = std::dynamic_pointer_cast(hg.sptr()); hgindex->RecordStream(stream); *rv = hg; }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroCopyToSharedMem") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; std::string name = args[1]; List ntypes = args[2]; List etypes = args[3]; List fmts = args[4]; auto ntypes_vec = ListValueToVector(ntypes); auto etypes_vec = ListValueToVector(etypes); std::set fmts_set; for (const auto& fmt : fmts) { std::string fmt_data = fmt->data; fmts_set.insert(fmt_data); } auto hg_share = HeteroGraph::CopyToSharedMem( hg.sptr(), name, ntypes_vec, etypes_vec, fmts_set); *rv = HeteroGraphRef(hg_share); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroCreateFromSharedMem") .set_body([](DGLArgs args, DGLRetValue* rv) { std::string name = args[0]; HeteroGraphPtr hg; std::vector ntypes; std::vector etypes; std::tie(hg, ntypes, etypes) = HeteroGraph::CreateFromSharedMem(name); List ntypes_list; List etypes_list; for (const auto& ntype : ntypes) ntypes_list.push_back(Value(MakeValue(ntype))); for (const auto& etype : etypes) etypes_list.push_back(Value(MakeValue(etype))); List ret; ret.push_back(HeteroGraphRef(hg)); ret.push_back(ntypes_list); ret.push_back(etypes_list); *rv = ret; }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroJointUnion") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef meta_graph = args[0]; List component_graphs = args[1]; CHECK(component_graphs.size() > 1) << "Expect graph list to have at least two graphs"; std::vector component_ptrs; component_ptrs.reserve(component_graphs.size()); const int64_t bits = component_graphs[0]->NumBits(); const DGLContext ctx = component_graphs[0]->Context(); for (const auto& component : component_graphs) { component_ptrs.push_back(component.sptr()); CHECK_EQ(component->NumBits(), bits) << "Expect graphs to joint union have the same index dtype(int" << bits << "), but got int" << component->NumBits(); CHECK_EQ(component->Context(), ctx) << "Expect graphs to joint union have the same context" << ctx << "), but got " << component->Context(); } auto hgptr = JointUnionHeteroGraph(meta_graph.sptr(), component_ptrs); *rv = HeteroGraphRef(hgptr); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroDisjointUnion_v2") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef meta_graph = args[0]; List component_graphs = args[1]; CHECK(component_graphs.size() > 0) << "Expect graph list has at least one graph"; std::vector component_ptrs; component_ptrs.reserve(component_graphs.size()); const int64_t bits = component_graphs[0]->NumBits(); const DGLContext ctx = component_graphs[0]->Context(); for (const auto& component : component_graphs) { component_ptrs.push_back(component.sptr()); CHECK_EQ(component->NumBits(), bits) << "Expect graphs to batch have the same index dtype(int" << bits << "), but got int" << component->NumBits(); CHECK_EQ(component->Context(), ctx) << "Expect graphs to batch have the same context" << ctx << "), but got " << component->Context(); } auto hgptr = DisjointUnionHeteroGraph2(meta_graph.sptr(), component_ptrs); *rv = HeteroGraphRef(hgptr); }); DGL_REGISTER_GLOBAL( "heterograph_index._CAPI_DGLHeteroDisjointPartitionBySizes_v2") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; const IdArray vertex_sizes = args[1]; const IdArray edge_sizes = args[2]; std::vector ret; ret = DisjointPartitionHeteroBySizes2( hg->meta_graph(), hg.sptr(), vertex_sizes, edge_sizes); List ret_list; for (HeteroGraphPtr hgptr : ret) { ret_list.push_back(HeteroGraphRef(hgptr)); } *rv = ret_list; }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroDisjointPartitionBySizes") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; const IdArray vertex_sizes = args[1]; const IdArray edge_sizes = args[2]; const int64_t bits = hg->NumBits(); std::vector ret; ATEN_ID_BITS_SWITCH(bits, IdType, { ret = DisjointPartitionHeteroBySizes( hg->meta_graph(), hg.sptr(), vertex_sizes, edge_sizes); }); List ret_list; for (HeteroGraphPtr hgptr : ret) { ret_list.push_back(HeteroGraphRef(hgptr)); } *rv = ret_list; }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroSlice") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; const IdArray num_nodes_per_type = args[1]; const IdArray start_nid_per_type = args[2]; const IdArray num_edges_per_type = args[3]; const IdArray start_eid_per_type = args[4]; auto hgptr = SliceHeteroGraph( hg->meta_graph(), hg.sptr(), num_nodes_per_type, start_nid_per_type, num_edges_per_type, start_eid_per_type); *rv = HeteroGraphRef(hgptr); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroGetCreatedFormats") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; List format_list; dgl_format_code_t code = hg->GetRelationGraph(0)->GetCreatedFormats(); for (auto format : CodeToSparseFormats(code)) { format_list.push_back(Value(MakeValue(ToStringSparseFormat(format)))); } *rv = format_list; }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroGetAllowedFormats") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; List format_list; dgl_format_code_t code = hg->GetRelationGraph(0)->GetAllowedFormats(); for (auto format : CodeToSparseFormats(code)) { format_list.push_back(Value(MakeValue(ToStringSparseFormat(format)))); } *rv = format_list; }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroCreateFormat") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; dgl_format_code_t code = hg->GetRelationGraph(0)->GetAllowedFormats(); auto get_format_f = [&](size_t etype_b, size_t etype_e) { for (auto etype = etype_b; etype < etype_e; ++etype) { auto bg = std::dynamic_pointer_cast(hg->GetRelationGraph(etype)); for (auto format : CodeToSparseFormats(code)) bg->GetFormat(format); } }; #if !(defined(DGL_USE_CUDA)) runtime::parallel_for(0, hg->NumEdgeTypes(), get_format_f); #else get_format_f(0, hg->NumEdgeTypes()); #endif }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroGetFormatGraph") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; List formats = args[1]; std::vector formats_vec; for (Value val : formats) { std::string fmt = val->data; formats_vec.push_back(ParseSparseFormat(fmt)); } auto hgptr = hg->GetGraphInFormat(SparseFormatsToCode(formats_vec)); *rv = HeteroGraphRef(hgptr); }); DGL_REGISTER_GLOBAL("subgraph._CAPI_DGLInSubgraph") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; const auto& nodes = ListValueToVector(args[1]); bool relabel_nodes = args[2]; std::shared_ptr ret(new HeteroSubgraph); *ret = InEdgeGraph(hg.sptr(), nodes, relabel_nodes); *rv = HeteroGraphRef(ret); }); DGL_REGISTER_GLOBAL("subgraph._CAPI_DGLOutSubgraph") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; const auto& nodes = ListValueToVector(args[1]); bool relabel_nodes = args[2]; std::shared_ptr ret(new HeteroSubgraph); *ret = OutEdgeGraph(hg.sptr(), nodes, relabel_nodes); *rv = HeteroGraphRef(ret); }); DGL_REGISTER_GLOBAL("transform._CAPI_DGLAsImmutableGraph") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; *rv = GraphRef(hg->AsImmutableGraph()); }); DGL_REGISTER_GLOBAL("transform._CAPI_DGLHeteroSortOutEdges") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; NDArray tag = args[1]; int64_t num_tag = args[2]; CHECK_EQ(hg->Context().device_type, kDGLCPU) << "Only support sorting by tag on cpu"; CHECK(aten::IsValidIdArray(tag)); CHECK_EQ(tag->ctx.device_type, kDGLCPU) << "Only support sorting by tag on cpu"; const auto csr = hg->GetCSRMatrix(0); NDArray tag_pos = aten::NullArray(); aten::CSRMatrix output; std::tie(output, tag_pos) = aten::CSRSortByTag(csr, tag, num_tag); HeteroGraphPtr output_hg = CreateFromCSR(hg->NumVertexTypes(), output, ALL_CODE); List ret; ret.push_back(HeteroGraphRef(output_hg)); ret.push_back(Value(MakeValue(tag_pos))); *rv = ret; }); DGL_REGISTER_GLOBAL("transform._CAPI_DGLHeteroSortInEdges") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; NDArray tag = args[1]; int64_t num_tag = args[2]; CHECK_EQ(hg->Context().device_type, kDGLCPU) << "Only support sorting by tag on cpu"; CHECK(aten::IsValidIdArray(tag)); CHECK_EQ(tag->ctx.device_type, kDGLCPU) << "Only support sorting by tag on cpu"; const auto csc = hg->GetCSCMatrix(0); NDArray tag_pos = aten::NullArray(); aten::CSRMatrix output; std::tie(output, tag_pos) = aten::CSRSortByTag(csc, tag, num_tag); HeteroGraphPtr output_hg = CreateFromCSC(hg->NumVertexTypes(), output, ALL_CODE); List ret; ret.push_back(HeteroGraphRef(output_hg)); ret.push_back(Value(MakeValue(tag_pos))); *rv = ret; }); DGL_REGISTER_GLOBAL("heterograph._CAPI_DGLFindSrcDstNtypes") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef metagraph = args[0]; std::unordered_set dst_set; std::unordered_set src_set; for (uint64_t eid = 0; eid < metagraph->NumEdges(); ++eid) { auto edge = metagraph->FindEdge(eid); auto src = edge.first; auto dst = edge.second; dst_set.insert(dst); src_set.insert(src); } List srclist, dstlist; List> ret_list; for (uint64_t nid = 0; nid < metagraph->NumVertices(); ++nid) { auto is_dst = dst_set.count(nid); auto is_src = src_set.count(nid); if (is_dst && is_src) return; else if (is_dst) dstlist.push_back(Value(MakeValue(static_cast(nid)))); else // If a node type is isolated, put it in srctype as defined in the // Python docstring. srclist.push_back(Value(MakeValue(static_cast(nid)))); } ret_list.push_back(srclist); ret_list.push_back(dstlist); *rv = ret_list; }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroReverse") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; CHECK_GT(hg->NumEdgeTypes(), 0); auto g = std::dynamic_pointer_cast(hg.sptr()); std::vector rev_ugs; const auto& ugs = g->relation_graphs(); rev_ugs.resize(ugs.size()); for (size_t i = 0; i < ugs.size(); ++i) { const auto& rev_ug = ugs[i]->Reverse(); rev_ugs[i] = rev_ug; } // node types are not changed const auto& num_nodes = g->NumVerticesPerType(); const auto& meta_edges = hg->meta_graph()->Edges("eid"); // reverse the metagraph const auto& rev_meta = ImmutableGraph::CreateFromCOO( hg->meta_graph()->NumVertices(), meta_edges.dst, meta_edges.src); *rv = CreateHeteroGraph(rev_meta, rev_ugs, num_nodes); }); } // namespace dgl ================================================ FILE: src/graph/immutable_graph.cc ================================================ /** * Copyright (c) 2018 by Contributors * @file graph/immutable_graph.cc * @brief DGL immutable graph index implementation */ #include #include #include #include #include #include #include #include #include #include #include "../c_api_common.h" #include "heterograph.h" #include "unit_graph.h" using namespace dgl::runtime; namespace dgl { namespace { inline std::string GetSharedMemName( const std::string &name, const std::string &edge_dir) { return name + "_" + edge_dir; } /** * The metadata of a graph index that are needed for shared-memory graph. */ struct GraphIndexMetadata { int64_t num_nodes; int64_t num_edges; bool has_in_csr; bool has_out_csr; bool has_coo; }; /** * Serialize the metadata of a graph index and place it in a shared-memory * tensor. In this way, another process can reconstruct a GraphIndex from a * shared-memory tensor. */ NDArray SerializeMetadata(ImmutableGraphPtr gidx, const std::string &name) { #ifndef _WIN32 GraphIndexMetadata meta; meta.num_nodes = gidx->NumVertices(); meta.num_edges = gidx->NumEdges(); meta.has_in_csr = gidx->HasInCSR(); meta.has_out_csr = gidx->HasOutCSR(); meta.has_coo = false; NDArray meta_arr = NDArray::EmptyShared( name, {sizeof(meta)}, DGLDataType{kDGLInt, 8, 1}, DGLContext{kDGLCPU, 0}, true); memcpy(meta_arr->data, &meta, sizeof(meta)); return meta_arr; #else LOG(FATAL) << "CSR graph doesn't support shared memory in Windows yet"; return NDArray(); #endif // _WIN32 } /** * Deserialize the metadata of a graph index. */ GraphIndexMetadata DeserializeMetadata(const std::string &name) { GraphIndexMetadata meta; #ifndef _WIN32 NDArray meta_arr = NDArray::EmptyShared( name, {sizeof(meta)}, DGLDataType{kDGLInt, 8, 1}, DGLContext{kDGLCPU, 0}, false); memcpy(&meta, meta_arr->data, sizeof(meta)); #else LOG(FATAL) << "CSR graph doesn't support shared memory in Windows yet"; #endif // _WIN32 return meta; } std::tuple MapFromSharedMemory( const std::string &shared_mem_name, int64_t num_verts, int64_t num_edges, bool is_create) { #ifndef _WIN32 const int64_t file_size = (num_verts + 1 + num_edges * 2) * sizeof(dgl_id_t); IdArray sm_array = IdArray::EmptyShared( shared_mem_name, {file_size}, DGLDataType{kDGLInt, 8, 1}, DGLContext{kDGLCPU, 0}, is_create); // Create views from the shared memory array. Note that we don't need to save // the sm_array because the refcount is maintained by the view arrays. IdArray indptr = sm_array.CreateView({num_verts + 1}, DGLDataType{kDGLInt, 64, 1}); IdArray indices = sm_array.CreateView( {num_edges}, DGLDataType{kDGLInt, 64, 1}, (num_verts + 1) * sizeof(dgl_id_t)); IdArray edge_ids = sm_array.CreateView( {num_edges}, DGLDataType{kDGLInt, 64, 1}, (num_verts + 1 + num_edges) * sizeof(dgl_id_t)); return std::make_tuple(indptr, indices, edge_ids); #else LOG(FATAL) << "CSR graph doesn't support shared memory in Windows yet"; return {}; #endif // _WIN32 } } // namespace ////////////////////////////////////////////////////////// // // CSR graph implementation // ////////////////////////////////////////////////////////// CSR::CSR(int64_t num_vertices, int64_t num_edges) { CHECK(!(num_vertices == 0 && num_edges != 0)); adj_ = aten::CSRMatrix{ num_vertices, num_vertices, aten::NewIdArray(num_vertices + 1), aten::NewIdArray(num_edges), aten::NewIdArray(num_edges)}; adj_.sorted = false; } CSR::CSR(IdArray indptr, IdArray indices, IdArray edge_ids) { CHECK(aten::IsValidIdArray(indptr)); CHECK(aten::IsValidIdArray(indices)); CHECK(aten::IsValidIdArray(edge_ids)); CHECK_EQ(indices->shape[0], edge_ids->shape[0]); const int64_t N = indptr->shape[0] - 1; adj_ = aten::CSRMatrix{N, N, indptr, indices, edge_ids}; adj_.sorted = false; } CSR::CSR( IdArray indptr, IdArray indices, IdArray edge_ids, const std::string &shared_mem_name) : shared_mem_name_(shared_mem_name) { CHECK(aten::IsValidIdArray(indptr)); CHECK(aten::IsValidIdArray(indices)); CHECK(aten::IsValidIdArray(edge_ids)); CHECK_EQ(indices->shape[0], edge_ids->shape[0]); const int64_t num_verts = indptr->shape[0] - 1; const int64_t num_edges = indices->shape[0]; adj_.num_rows = num_verts; adj_.num_cols = num_verts; std::tie(adj_.indptr, adj_.indices, adj_.data) = MapFromSharedMemory(shared_mem_name, num_verts, num_edges, true); // copy the given data into the shared memory arrays adj_.indptr.CopyFrom(indptr); adj_.indices.CopyFrom(indices); adj_.data.CopyFrom(edge_ids); adj_.sorted = false; } CSR::CSR( const std::string &shared_mem_name, int64_t num_verts, int64_t num_edges) : shared_mem_name_(shared_mem_name) { CHECK(!(num_verts == 0 && num_edges != 0)); adj_.num_rows = num_verts; adj_.num_cols = num_verts; std::tie(adj_.indptr, adj_.indices, adj_.data) = MapFromSharedMemory(shared_mem_name, num_verts, num_edges, false); adj_.sorted = false; } bool CSR::IsMultigraph() const { return aten::CSRHasDuplicate(adj_); } EdgeArray CSR::OutEdges(dgl_id_t vid) const { CHECK(HasVertex(vid)) << "invalid vertex: " << vid; IdArray ret_dst = aten::CSRGetRowColumnIndices(adj_, vid); IdArray ret_eid = aten::CSRGetRowData(adj_, vid); IdArray ret_src = aten::Full(vid, ret_dst->shape[0], NumBits(), ret_dst->ctx); return EdgeArray{ret_src, ret_dst, ret_eid}; } EdgeArray CSR::OutEdges(IdArray vids) const { CHECK(aten::IsValidIdArray(vids)) << "Invalid vertex id array."; auto csrsubmat = aten::CSRSliceRows(adj_, vids); auto coosubmat = aten::CSRToCOO(csrsubmat, false); // Note that the row id in the csr submat is relabled, so // we need to recover it using an index select. auto row = aten::IndexSelect(vids, coosubmat.row); return EdgeArray{row, coosubmat.col, coosubmat.data}; } DegreeArray CSR::OutDegrees(IdArray vids) const { CHECK(aten::IsValidIdArray(vids)) << "Invalid vertex id array."; return aten::CSRGetRowNNZ(adj_, vids); } bool CSR::HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const { CHECK(HasVertex(src)) << "Invalid vertex id: " << src; CHECK(HasVertex(dst)) << "Invalid vertex id: " << dst; return aten::CSRIsNonZero(adj_, src, dst); } BoolArray CSR::HasEdgesBetween(IdArray src_ids, IdArray dst_ids) const { CHECK(aten::IsValidIdArray(src_ids)) << "Invalid vertex id array."; CHECK(aten::IsValidIdArray(dst_ids)) << "Invalid vertex id array."; return aten::CSRIsNonZero(adj_, src_ids, dst_ids); } IdArray CSR::Successors(dgl_id_t vid, uint64_t radius) const { CHECK(HasVertex(vid)) << "invalid vertex: " << vid; CHECK(radius == 1) << "invalid radius: " << radius; return aten::CSRGetRowColumnIndices(adj_, vid); } IdArray CSR::EdgeId(dgl_id_t src, dgl_id_t dst) const { CHECK(HasVertex(src)) << "invalid vertex: " << src; CHECK(HasVertex(dst)) << "invalid vertex: " << dst; return aten::CSRGetAllData(adj_, src, dst); } EdgeArray CSR::EdgeIds(IdArray src_ids, IdArray dst_ids) const { const auto &arrs = aten::CSRGetDataAndIndices(adj_, src_ids, dst_ids); return EdgeArray{arrs[0], arrs[1], arrs[2]}; } EdgeArray CSR::Edges(const std::string &order) const { CHECK(order.empty() || order == std::string("srcdst")) << "CSR only support Edges of order \"srcdst\"," << " but got \"" << order << "\"."; const auto &coo = aten::CSRToCOO(adj_, false); return EdgeArray{coo.row, coo.col, coo.data}; } Subgraph CSR::VertexSubgraph(IdArray vids) const { CHECK(aten::IsValidIdArray(vids)) << "Invalid vertex id array."; const auto &submat = aten::CSRSliceMatrix(adj_, vids, vids); IdArray sub_eids = aten::Range(0, submat.data->shape[0], NumBits(), Context()); CSRPtr subcsr(new CSR(submat.indptr, submat.indices, sub_eids)); subcsr->adj_.sorted = this->adj_.sorted; Subgraph subg; subg.graph = subcsr; subg.induced_vertices = vids; subg.induced_edges = submat.data; return subg; } CSRPtr CSR::Transpose() const { const auto &trans = aten::CSRTranspose(adj_); return CSRPtr(new CSR(trans.indptr, trans.indices, trans.data)); } COOPtr CSR::ToCOO() const { const auto &coo = aten::CSRToCOO(adj_, true); return COOPtr(new COO(NumVertices(), coo.row, coo.col)); } CSR CSR::CopyTo(const DGLContext &ctx) const { if (Context() == ctx) { return *this; } else { CSR ret( adj_.indptr.CopyTo(ctx), adj_.indices.CopyTo(ctx), adj_.data.CopyTo(ctx)); return ret; } } CSR CSR::CopyToSharedMem(const std::string &name) const { if (IsSharedMem()) { CHECK(name == shared_mem_name_); return *this; } else { // TODO(zhengda) we need to set sorted_ properly. return CSR(adj_.indptr, adj_.indices, adj_.data, name); } } CSR CSR::AsNumBits(uint8_t bits) const { if (NumBits() == bits) { return *this; } else { CSR ret( aten::AsNumBits(adj_.indptr, bits), aten::AsNumBits(adj_.indices, bits), aten::AsNumBits(adj_.data, bits)); return ret; } } DGLIdIters CSR::SuccVec(dgl_id_t vid) const { // TODO(minjie): This still assumes the data type and device context // of this graph. Should fix later. const dgl_id_t *indptr_data = static_cast(adj_.indptr->data); const dgl_id_t *indices_data = static_cast(adj_.indices->data); const dgl_id_t start = indptr_data[vid]; const dgl_id_t end = indptr_data[vid + 1]; return DGLIdIters(indices_data + start, indices_data + end); } DGLIdIters CSR::OutEdgeVec(dgl_id_t vid) const { // TODO(minjie): This still assumes the data type and device context // of this graph. Should fix later. const dgl_id_t *indptr_data = static_cast(adj_.indptr->data); const dgl_id_t *eid_data = static_cast(adj_.data->data); const dgl_id_t start = indptr_data[vid]; const dgl_id_t end = indptr_data[vid + 1]; return DGLIdIters(eid_data + start, eid_data + end); } bool CSR::Load(dmlc::Stream *fs) { fs->Read(const_cast(&adj_)); return true; } void CSR::Save(dmlc::Stream *fs) const { fs->Write(adj_); } ////////////////////////////////////////////////////////// // // COO graph implementation // ////////////////////////////////////////////////////////// COO::COO( int64_t num_vertices, IdArray src, IdArray dst, bool row_sorted, bool col_sorted) { CHECK(aten::IsValidIdArray(src)); CHECK(aten::IsValidIdArray(dst)); CHECK_EQ(src->shape[0], dst->shape[0]); adj_ = aten::COOMatrix{num_vertices, num_vertices, src, dst, aten::NullArray(), row_sorted, col_sorted}; } bool COO::IsMultigraph() const { return aten::COOHasDuplicate(adj_); } std::pair COO::FindEdge(dgl_id_t eid) const { CHECK(eid < NumEdges()) << "Invalid edge id: " << eid; const dgl_id_t src = aten::IndexSelect(adj_.row, eid); const dgl_id_t dst = aten::IndexSelect(adj_.col, eid); return std::pair(src, dst); } EdgeArray COO::FindEdges(IdArray eids) const { CHECK(aten::IsValidIdArray(eids)) << "Invalid edge id array"; BUG_IF_FAIL(aten::IsNullArray(adj_.data)) << "FindEdges requires the internal COO matrix not having EIDs."; return EdgeArray{ aten::IndexSelect(adj_.row, eids), aten::IndexSelect(adj_.col, eids), eids}; } EdgeArray COO::Edges(const std::string &order) const { CHECK(order.empty() || order == std::string("eid")) << "COO only support Edges of order \"eid\", but got \"" << order << "\"."; IdArray rst_eid = aten::Range(0, NumEdges(), NumBits(), Context()); return EdgeArray{adj_.row, adj_.col, rst_eid}; } Subgraph COO::EdgeSubgraph(IdArray eids, bool preserve_nodes) const { CHECK(aten::IsValidIdArray(eids)) << "Invalid edge id array."; COOPtr subcoo; IdArray induced_nodes; if (!preserve_nodes) { IdArray new_src = aten::IndexSelect(adj_.row, eids); IdArray new_dst = aten::IndexSelect(adj_.col, eids); induced_nodes = aten::Relabel_({new_src, new_dst}); const auto new_nnodes = induced_nodes->shape[0]; subcoo = COOPtr(new COO(new_nnodes, new_src, new_dst)); } else { IdArray new_src = aten::IndexSelect(adj_.row, eids); IdArray new_dst = aten::IndexSelect(adj_.col, eids); induced_nodes = aten::Range(0, NumVertices(), NumBits(), Context()); subcoo = COOPtr(new COO(NumVertices(), new_src, new_dst)); } Subgraph subg; subg.graph = subcoo; subg.induced_vertices = induced_nodes; subg.induced_edges = eids; return subg; } CSRPtr COO::ToCSR() const { const auto &csr = aten::COOToCSR(adj_); return CSRPtr(new CSR(csr.indptr, csr.indices, csr.data)); } COO COO::CopyTo(const DGLContext &ctx) const { if (Context() == ctx) { return *this; } else { COO ret(NumVertices(), adj_.row.CopyTo(ctx), adj_.col.CopyTo(ctx)); return ret; } } COO COO::CopyToSharedMem(const std::string &name) const { LOG(FATAL) << "COO doesn't supprt shared memory yet"; return COO(); } COO COO::AsNumBits(uint8_t bits) const { if (NumBits() == bits) { return *this; } else { COO ret( NumVertices(), aten::AsNumBits(adj_.row, bits), aten::AsNumBits(adj_.col, bits)); return ret; } } ////////////////////////////////////////////////////////// // // immutable graph implementation // ////////////////////////////////////////////////////////// BoolArray ImmutableGraph::HasVertices(IdArray vids) const { CHECK(aten::IsValidIdArray(vids)) << "Invalid id array input"; return aten::LT(vids, NumVertices()); } CSRPtr ImmutableGraph::GetInCSR() const { if (!in_csr_) { if (out_csr_) { const_cast(this)->in_csr_ = out_csr_->Transpose(); if (out_csr_->IsSharedMem()) LOG(WARNING) << "We just construct an in-CSR from a shared-memory out CSR. " << "It may dramatically increase memory consumption."; } else { CHECK(coo_) << "None of CSR, COO exist"; const_cast(this)->in_csr_ = coo_->Transpose()->ToCSR(); } } return in_csr_; } /** @brief Return out csr. If not exist, transpose the other one.*/ CSRPtr ImmutableGraph::GetOutCSR() const { if (!out_csr_) { if (in_csr_) { const_cast(this)->out_csr_ = in_csr_->Transpose(); if (in_csr_->IsSharedMem()) LOG(WARNING) << "We just construct an out-CSR from a shared-memory in CSR. " << "It may dramatically increase memory consumption."; } else { CHECK(coo_) << "None of CSR, COO exist"; const_cast(this)->out_csr_ = coo_->ToCSR(); } } return out_csr_; } /** @brief Return coo. If not exist, create from csr.*/ COOPtr ImmutableGraph::GetCOO() const { if (!coo_) { if (in_csr_) { const_cast(this)->coo_ = in_csr_->ToCOO()->Transpose(); } else { CHECK(out_csr_) << "Both CSR are missing."; const_cast(this)->coo_ = out_csr_->ToCOO(); } } return coo_; } EdgeArray ImmutableGraph::Edges(const std::string &order) const { if (order.empty()) { // arbitrary order if (in_csr_) { // transpose const auto &edges = in_csr_->Edges(order); return EdgeArray{edges.dst, edges.src, edges.id}; } else { return AnyGraph()->Edges(order); } } else if (order == std::string("srcdst")) { // TODO(minjie): CSR only guarantees "src" to be sorted. // Maybe we should relax this requirement? return GetOutCSR()->Edges(order); } else if (order == std::string("eid")) { return GetCOO()->Edges(order); } else { LOG(FATAL) << "Unsupported order request: " << order; } return {}; } Subgraph ImmutableGraph::VertexSubgraph(IdArray vids) const { // We prefer to generate a subgraph from out-csr. auto sg = GetOutCSR()->VertexSubgraph(vids); CSRPtr subcsr = std::dynamic_pointer_cast(sg.graph); sg.graph = GraphPtr(new ImmutableGraph(subcsr)); return sg; } Subgraph ImmutableGraph::EdgeSubgraph(IdArray eids, bool preserve_nodes) const { auto sg = GetCOO()->EdgeSubgraph(eids, preserve_nodes); COOPtr subcoo = std::dynamic_pointer_cast(sg.graph); sg.graph = GraphPtr(new ImmutableGraph(subcoo)); return sg; } std::vector ImmutableGraph::GetAdj( bool transpose, const std::string &fmt) const { // TODO(minjie): Our current semantics of adjacency matrix is row for dst // nodes and col for // src nodes. Therefore, we need to flip the transpose flag. For example, // transpose=False is equal to in edge CSR. We have this behavior because // previously we use framework's SPMM and we don't cache reverse adj. This // is not intuitive and also not consistent with networkx's // to_scipy_sparse_matrix. With the upcoming custom kernel change, we should // change the behavior and make row for src and col for dst. if (fmt == std::string("csr")) { return transpose ? GetOutCSR()->GetAdj(false, "csr") : GetInCSR()->GetAdj(false, "csr"); } else if (fmt == std::string("coo")) { return GetCOO()->GetAdj(!transpose, fmt); } else { LOG(FATAL) << "unsupported adjacency matrix format: " << fmt; return {}; } } ImmutableGraphPtr ImmutableGraph::CreateFromCSR( IdArray indptr, IdArray indices, IdArray edge_ids, const std::string &edge_dir) { CSRPtr csr(new CSR(indptr, indices, edge_ids)); if (edge_dir == "in") { return ImmutableGraphPtr(new ImmutableGraph(csr, nullptr)); } else if (edge_dir == "out") { return ImmutableGraphPtr(new ImmutableGraph(nullptr, csr)); } else { LOG(FATAL) << "Unknown edge direction: " << edge_dir; return ImmutableGraphPtr(); } } ImmutableGraphPtr ImmutableGraph::CreateFromCSR(const std::string &name) { // If the shared memory graph index doesn't exist, we return null directly. #ifndef _WIN32 if (!SharedMemory::Exist(GetSharedMemName(name, "meta"))) { return nullptr; } #endif // _WIN32 GraphIndexMetadata meta = DeserializeMetadata(GetSharedMemName(name, "meta")); CSRPtr in_csr, out_csr; if (meta.has_in_csr) { in_csr = CSRPtr( new CSR(GetSharedMemName(name, "in"), meta.num_nodes, meta.num_edges)); } if (meta.has_out_csr) { out_csr = CSRPtr( new CSR(GetSharedMemName(name, "out"), meta.num_nodes, meta.num_edges)); } return ImmutableGraphPtr(new ImmutableGraph(in_csr, out_csr, name)); } ImmutableGraphPtr ImmutableGraph::CreateFromCOO( int64_t num_vertices, IdArray src, IdArray dst, bool row_sorted, bool col_sorted) { COOPtr coo(new COO(num_vertices, src, dst, row_sorted, col_sorted)); return std::make_shared(coo); } ImmutableGraphPtr ImmutableGraph::ToImmutable(GraphPtr graph) { ImmutableGraphPtr ig = std::dynamic_pointer_cast(graph); if (ig) { return ig; } else { const auto &adj = graph->GetAdj(true, "csr"); CSRPtr csr(new CSR(adj[0], adj[1], adj[2])); return ImmutableGraph::CreateFromCSR(adj[0], adj[1], adj[2], "out"); } } ImmutableGraphPtr ImmutableGraph::CopyTo( ImmutableGraphPtr g, const DGLContext &ctx) { if (ctx == g->Context()) { return g; } // TODO(minjie): since we don't have GPU implementation of COO<->CSR, // we make sure that this graph (on CPU) has materialized CSR, // and then copy them to other context (usually GPU). This should // be fixed later. CSRPtr new_incsr = CSRPtr(new CSR(g->GetInCSR()->CopyTo(ctx))); CSRPtr new_outcsr = CSRPtr(new CSR(g->GetOutCSR()->CopyTo(ctx))); return ImmutableGraphPtr(new ImmutableGraph(new_incsr, new_outcsr)); } ImmutableGraphPtr ImmutableGraph::CopyToSharedMem( ImmutableGraphPtr g, const std::string &name) { CSRPtr new_incsr, new_outcsr; std::string shared_mem_name = GetSharedMemName(name, "in"); new_incsr = CSRPtr(new CSR(g->GetInCSR()->CopyToSharedMem(shared_mem_name))); shared_mem_name = GetSharedMemName(name, "out"); new_outcsr = CSRPtr(new CSR(g->GetOutCSR()->CopyToSharedMem(shared_mem_name))); auto new_g = ImmutableGraphPtr(new ImmutableGraph(new_incsr, new_outcsr, name)); new_g->serialized_shared_meta_ = SerializeMetadata(new_g, GetSharedMemName(name, "meta")); return new_g; } ImmutableGraphPtr ImmutableGraph::AsNumBits(ImmutableGraphPtr g, uint8_t bits) { if (g->NumBits() == bits) { return g; } else { // TODO(minjie): since we don't have int32 operations, // we make sure that this graph (on CPU) has materialized CSR, // and then copy them to other context (usually GPU). This should // be fixed later. CSRPtr new_incsr = CSRPtr(new CSR(g->GetInCSR()->AsNumBits(bits))); CSRPtr new_outcsr = CSRPtr(new CSR(g->GetOutCSR()->AsNumBits(bits))); return ImmutableGraphPtr(new ImmutableGraph(new_incsr, new_outcsr)); } } ImmutableGraphPtr ImmutableGraph::Reverse() const { if (coo_) { return ImmutableGraphPtr( new ImmutableGraph(out_csr_, in_csr_, coo_->Transpose())); } else { return ImmutableGraphPtr(new ImmutableGraph(out_csr_, in_csr_)); } } constexpr uint64_t kDGLSerialize_ImGraph = 0xDD3c5FFE20046ABF; /** @return Load HeteroGraph from stream, using OutCSR Matrix*/ bool ImmutableGraph::Load(dmlc::Stream *fs) { uint64_t magicNum; aten::CSRMatrix out_csr_matrix; CHECK(fs->Read(&magicNum)) << "Invalid Magic Number"; CHECK_EQ(magicNum, kDGLSerialize_ImGraph) << "Invalid ImmutableGraph Magic Number"; CHECK(fs->Read(&out_csr_)) << "Invalid csr matrix"; return true; } /** @return Save HeteroGraph to stream, using OutCSR Matrix */ void ImmutableGraph::Save(dmlc::Stream *fs) const { fs->Write(kDGLSerialize_ImGraph); fs->Write(GetOutCSR()); } HeteroGraphPtr ImmutableGraph::AsHeteroGraph() const { aten::CSRMatrix in_csr, out_csr; aten::COOMatrix coo; if (in_csr_) in_csr = GetInCSR()->ToCSRMatrix(); if (out_csr_) out_csr = GetOutCSR()->ToCSRMatrix(); if (coo_) coo = GetCOO()->ToCOOMatrix(); auto g = UnitGraph::CreateUnitGraphFrom( 1, in_csr, out_csr, coo, in_csr_ != nullptr, out_csr_ != nullptr, coo_ != nullptr); return HeteroGraphPtr(new HeteroGraph(g->meta_graph(), {g})); } DGL_REGISTER_GLOBAL("transform._CAPI_DGLAsHeteroGraph") .set_body([](DGLArgs args, DGLRetValue *rv) { GraphRef g = args[0]; ImmutableGraphPtr ig = std::dynamic_pointer_cast(g.sptr()); CHECK(ig) << "graph is not readonly"; *rv = HeteroGraphRef(ig->AsHeteroGraph()); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLImmutableGraphCopyTo") .set_body([](DGLArgs args, DGLRetValue *rv) { GraphRef g = args[0]; const int device_type = args[1]; const int device_id = args[2]; DGLContext ctx; ctx.device_type = static_cast(device_type); ctx.device_id = device_id; ImmutableGraphPtr ig = CHECK_NOTNULL(std::dynamic_pointer_cast(g.sptr())); *rv = ImmutableGraph::CopyTo(ig, ctx); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLImmutableGraphCopyToSharedMem") .set_body([](DGLArgs args, DGLRetValue *rv) { GraphRef g = args[0]; std::string name = args[1]; ImmutableGraphPtr ig = CHECK_NOTNULL(std::dynamic_pointer_cast(g.sptr())); *rv = ImmutableGraph::CopyToSharedMem(ig, name); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLImmutableGraphAsNumBits") .set_body([](DGLArgs args, DGLRetValue *rv) { GraphRef g = args[0]; int bits = args[1]; ImmutableGraphPtr ig = CHECK_NOTNULL(std::dynamic_pointer_cast(g.sptr())); *rv = ImmutableGraph::AsNumBits(ig, bits); }); } // namespace dgl ================================================ FILE: src/graph/metis_partition.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file graph/metis_partition.cc * @brief Call Metis partitioning */ #include #include #include #include "../c_api_common.h" using namespace dgl::runtime; namespace dgl { #if !defined(_WIN32) IdArray MetisPartition(GraphPtr g, int k, NDArray vwgt_arr, bool obj_cut) { // The index type of Metis needs to be compatible with DGL index type. CHECK_EQ(sizeof(idx_t), sizeof(dgl_id_t)); ImmutableGraphPtr ig = std::dynamic_pointer_cast(g); CHECK(ig) << "The input graph must be an immutable graph."; // This is a symmetric graph, so in-csr and out-csr are the same. const auto mat = ig->GetInCSR()->ToCSRMatrix(); idx_t nvtxs = g->NumVertices(); idx_t ncon = 1; // # balacing constraints. idx_t *xadj = static_cast(mat.indptr->data); idx_t *adjncy = static_cast(mat.indices->data); idx_t nparts = k; IdArray part_arr = aten::NewIdArray(nvtxs); idx_t objval = 0; idx_t *part = static_cast(part_arr->data); int64_t vwgt_len = vwgt_arr->shape[0]; CHECK_EQ(sizeof(idx_t), vwgt_arr->dtype.bits / 8) << "The vertex weight array doesn't have right type"; CHECK(vwgt_len % g->NumVertices() == 0) << "The vertex weight array doesn't have right number of elements"; idx_t *vwgt = NULL; if (vwgt_len > 0) { ncon = vwgt_len / g->NumVertices(); vwgt = static_cast(vwgt_arr->data); } idx_t options[METIS_NOPTIONS]; METIS_SetDefaultOptions(options); options[METIS_OPTION_ONDISK] = 1; options[METIS_OPTION_NITER] = 1; options[METIS_OPTION_NIPARTS] = 1; options[METIS_OPTION_DROPEDGES] = 1; if (obj_cut) { options[METIS_OPTION_OBJTYPE] = METIS_OBJTYPE_CUT; } else { options[METIS_OPTION_OBJTYPE] = METIS_OBJTYPE_VOL; } int ret = METIS_PartGraphKway( &nvtxs, // The number of vertices &ncon, // The number of balancing constraints. xadj, // indptr adjncy, // indices vwgt, // the weights of the vertices NULL, // The size of the vertices for computing // the total communication volume NULL, // The weights of the edges &nparts, // The number of partitions. NULL, // the desired weight for each partition and constraint NULL, // the allowed load imbalance tolerance options, // the array of options &objval, // the edge-cut or the total communication volume of // the partitioning solution part); if (obj_cut) { LOG(INFO) << "Partition a graph with " << g->NumVertices() << " nodes and " << g->NumEdges() << " edges into " << k << " parts and " << "get " << objval << " edge cuts"; } else { LOG(INFO) << "Partition a graph with " << g->NumVertices() << " nodes and " << g->NumEdges() << " edges into " << k << " parts and " << "the communication volume is " << objval; } switch (ret) { case METIS_OK: return part_arr; case METIS_ERROR_INPUT: LOG(FATAL) << "Error in Metis partitioning: input error"; case METIS_ERROR_MEMORY: LOG(FATAL) << "Error in Metis partitioning: cannot allocate memory"; default: LOG(FATAL) << "Error in Metis partitioning: other errors"; } // return an array of 0 elements to indicate the error. return aten::NullArray(); } #endif // !defined(_WIN32) DGL_REGISTER_GLOBAL("transform._CAPI_DGLMetisPartition") .set_body([](DGLArgs args, DGLRetValue *rv) { GraphRef g = args[0]; int k = args[1]; NDArray vwgt = args[2]; bool obj_cut = args[3]; #if !defined(_WIN32) *rv = MetisPartition(g.sptr(), k, vwgt, obj_cut); #else LOG(FATAL) << "Metis partition does not support Windows."; #endif // !defined(_WIN32) }); } // namespace dgl ================================================ FILE: src/graph/nodeflow.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file graph/nodeflow.cc * @brief DGL NodeFlow related functions. */ #include #include #include #include #include "../c_api_common.h" using dgl::runtime::DGLArgs; using dgl::runtime::DGLArgValue; using dgl::runtime::DGLRetValue; using dgl::runtime::PackedFunc; namespace dgl { std::vector GetNodeFlowSlice( const ImmutableGraph &graph, const std::string &fmt, size_t layer0_size, size_t layer1_start, size_t layer1_end, bool remap) { CHECK_GE(layer1_start, layer0_size); if (fmt == std::string("csr")) { dgl_id_t first_vid = layer1_start - layer0_size; auto csr = aten::CSRSliceRows( graph.GetInCSR()->ToCSRMatrix(), layer1_start, layer1_end); if (remap) { dgl_id_t *eid_data = static_cast(csr.data->data); const dgl_id_t first_eid = eid_data[0]; IdArray new_indices = aten::Sub(csr.indices, first_vid); IdArray new_data = aten::Sub(csr.data, first_eid); return {csr.indptr, new_indices, new_data}; } else { return {csr.indptr, csr.indices, csr.data}; } } else if (fmt == std::string("coo")) { auto csr = graph.GetInCSR()->ToCSRMatrix(); const dgl_id_t *indptr = static_cast(csr.indptr->data); const dgl_id_t *indices = static_cast(csr.indices->data); const dgl_id_t *edge_ids = static_cast(csr.data->data); int64_t nnz = indptr[layer1_end] - indptr[layer1_start]; IdArray idx = aten::NewIdArray(2 * nnz); IdArray eid = aten::NewIdArray(nnz); int64_t *idx_data = static_cast(idx->data); dgl_id_t *eid_data = static_cast(eid->data); size_t num_edges = 0; for (size_t i = layer1_start; i < layer1_end; i++) { for (dgl_id_t j = indptr[i]; j < indptr[i + 1]; j++) { // These nodes are all in a layer. We need to remap them to the node id // local to the layer. idx_data[num_edges] = remap ? i - layer1_start : i; num_edges++; } } CHECK_EQ(num_edges, nnz); if (remap) { size_t edge_start = indptr[layer1_start]; dgl_id_t first_eid = edge_ids[edge_start]; dgl_id_t first_vid = layer1_start - layer0_size; for (int64_t i = 0; i < nnz; i++) { CHECK_GE(indices[edge_start + i], first_vid); idx_data[nnz + i] = indices[edge_start + i] - first_vid; eid_data[i] = edge_ids[edge_start + i] - first_eid; } } else { std::copy( indices + indptr[layer1_start], indices + indptr[layer1_end], idx_data + nnz); std::copy( edge_ids + indptr[layer1_start], edge_ids + indptr[layer1_end], eid_data); } return std::vector{idx, eid}; } else { LOG(FATAL) << "unsupported adjacency matrix format"; return {}; } } DGL_REGISTER_GLOBAL("_deprecate.nodeflow._CAPI_NodeFlowGetBlockAdj") .set_body([](DGLArgs args, DGLRetValue *rv) { GraphRef g = args[0]; std::string format = args[1]; int64_t layer0_size = args[2]; int64_t start = args[3]; int64_t end = args[4]; const bool remap = args[5]; auto ig = CHECK_NOTNULL(std::dynamic_pointer_cast(g.sptr())); auto res = GetNodeFlowSlice(*ig, format, layer0_size, start, end, remap); *rv = ConvertNDArrayVectorToPackedFunc(res); }); } // namespace dgl ================================================ FILE: src/graph/pickle.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file graph/pickle.cc * @brief Functions for pickle and unpickle a graph */ #include #include #include #include #include #include "../c_api_common.h" #include "./heterograph.h" #include "unit_graph.h" using namespace dgl::runtime; namespace dgl { HeteroPickleStates HeteroPickle(HeteroGraphPtr graph) { HeteroPickleStates states; states.version = 2; dmlc::MemoryStringStream ofs(&states.meta); dmlc::Stream *strm = &ofs; strm->Write(ImmutableGraph::ToImmutable(graph->meta_graph())); strm->Write(graph->NumVerticesPerType()); strm->Write(graph->IsPinned()); for (dgl_type_t etype = 0; etype < graph->NumEdgeTypes(); ++etype) { SparseFormat fmt = graph->SelectFormat(etype, ALL_CODE); switch (fmt) { case SparseFormat::kCOO: { strm->Write(SparseFormat::kCOO); const auto &coo = graph->GetCOOMatrix(etype); strm->Write(coo.row_sorted); strm->Write(coo.col_sorted); states.arrays.push_back(coo.row); states.arrays.push_back(coo.col); break; } case SparseFormat::kCSR: case SparseFormat::kCSC: { strm->Write(SparseFormat::kCSR); const auto &csr = graph->GetCSRMatrix(etype); strm->Write(csr.sorted); states.arrays.push_back(csr.indptr); states.arrays.push_back(csr.indices); states.arrays.push_back(csr.data); break; } default: LOG(FATAL) << "Unsupported sparse format."; } } return states; } HeteroPickleStates HeteroForkingPickle(HeteroGraphPtr graph) { HeteroPickleStates states; states.version = 2; dmlc::MemoryStringStream ofs(&states.meta); dmlc::Stream *strm = &ofs; strm->Write(ImmutableGraph::ToImmutable(graph->meta_graph())); strm->Write(graph->NumVerticesPerType()); strm->Write(graph->IsPinned()); for (dgl_type_t etype = 0; etype < graph->NumEdgeTypes(); ++etype) { auto created_formats = graph->GetCreatedFormats(); auto allowed_formats = graph->GetAllowedFormats(); strm->Write(created_formats); strm->Write(allowed_formats); if (created_formats & COO_CODE) { const auto &coo = graph->GetCOOMatrix(etype); strm->Write(coo.row_sorted); strm->Write(coo.col_sorted); states.arrays.push_back(coo.row); states.arrays.push_back(coo.col); } if (created_formats & CSR_CODE) { const auto &csr = graph->GetCSRMatrix(etype); strm->Write(csr.sorted); states.arrays.push_back(csr.indptr); states.arrays.push_back(csr.indices); states.arrays.push_back(csr.data); } if (created_formats & CSC_CODE) { const auto &csc = graph->GetCSCMatrix(etype); strm->Write(csc.sorted); states.arrays.push_back(csc.indptr); states.arrays.push_back(csc.indices); states.arrays.push_back(csc.data); } } return states; } HeteroGraphPtr HeteroUnpickle(const HeteroPickleStates &states) { char *buf = const_cast(states.meta.c_str()); // a readonly stream? dmlc::MemoryFixedSizeStream ifs(buf, states.meta.size()); dmlc::Stream *strm = &ifs; auto meta_imgraph = Serializer::make_shared(); CHECK(strm->Read(&meta_imgraph)) << "Invalid meta graph"; GraphPtr metagraph = meta_imgraph; std::vector relgraphs(metagraph->NumEdges()); std::vector num_nodes_per_type; CHECK(strm->Read(&num_nodes_per_type)) << "Invalid num_nodes_per_type"; bool is_pinned = false; if (states.version > 1) { CHECK(strm->Read(&is_pinned)) << "Invalid flag 'is_pinned'"; } auto array_itr = states.arrays.begin(); for (dgl_type_t etype = 0; etype < metagraph->NumEdges(); ++etype) { const auto &pair = metagraph->FindEdge(etype); const dgl_type_t srctype = pair.first; const dgl_type_t dsttype = pair.second; const int64_t num_vtypes = (srctype == dsttype) ? 1 : 2; int64_t num_src = num_nodes_per_type[srctype]; int64_t num_dst = num_nodes_per_type[dsttype]; SparseFormat fmt; CHECK(strm->Read(&fmt)) << "Invalid SparseFormat"; HeteroGraphPtr relgraph; switch (fmt) { case SparseFormat::kCOO: { CHECK_GE(states.arrays.end() - array_itr, 2); const auto &row = *(array_itr++); const auto &col = *(array_itr++); bool rsorted; bool csorted; CHECK(strm->Read(&rsorted)) << "Invalid flag 'rsorted'"; CHECK(strm->Read(&csorted)) << "Invalid flag 'csorted'"; auto coo = aten::COOMatrix( num_src, num_dst, row, col, aten::NullArray(), rsorted, csorted); // TODO(zihao) fix relgraph = CreateFromCOO(num_vtypes, coo, ALL_CODE); break; } case SparseFormat::kCSR: { CHECK_GE(states.arrays.end() - array_itr, 3); const auto &indptr = *(array_itr++); const auto &indices = *(array_itr++); const auto &edge_id = *(array_itr++); bool sorted; CHECK(strm->Read(&sorted)) << "Invalid flag 'sorted'"; auto csr = aten::CSRMatrix(num_src, num_dst, indptr, indices, edge_id, sorted); // TODO(zihao) fix relgraph = CreateFromCSR(num_vtypes, csr, ALL_CODE); break; } case SparseFormat::kCSC: default: LOG(FATAL) << "Unsupported sparse format."; } relgraphs[etype] = relgraph; } auto graph = CreateHeteroGraph(metagraph, relgraphs, num_nodes_per_type); if (is_pinned) { graph->PinMemory_(); } return graph; } // For backward compatibility HeteroGraphPtr HeteroUnpickleOld(const HeteroPickleStates &states) { const auto metagraph = states.metagraph; const auto &num_nodes_per_type = states.num_nodes_per_type; CHECK_EQ(states.adjs.size(), metagraph->NumEdges()); std::vector relgraphs(metagraph->NumEdges()); for (dgl_type_t etype = 0; etype < metagraph->NumEdges(); ++etype) { const auto &pair = metagraph->FindEdge(etype); const dgl_type_t srctype = pair.first; const dgl_type_t dsttype = pair.second; const int64_t num_vtypes = (srctype == dsttype) ? 1 : 2; const SparseFormat fmt = static_cast(states.adjs[etype]->format); switch (fmt) { case SparseFormat::kCOO: relgraphs[etype] = UnitGraph::CreateFromCOO( num_vtypes, aten::COOMatrix(*states.adjs[etype])); break; case SparseFormat::kCSR: relgraphs[etype] = UnitGraph::CreateFromCSR( num_vtypes, aten::CSRMatrix(*states.adjs[etype])); break; case SparseFormat::kCSC: default: LOG(FATAL) << "Unsupported sparse format."; } } return CreateHeteroGraph(metagraph, relgraphs, num_nodes_per_type); } HeteroGraphPtr HeteroForkingUnpickle(const HeteroPickleStates &states) { char *buf = const_cast(states.meta.c_str()); // a readonly stream? dmlc::MemoryFixedSizeStream ifs(buf, states.meta.size()); dmlc::Stream *strm = &ifs; auto meta_imgraph = Serializer::make_shared(); CHECK(strm->Read(&meta_imgraph)) << "Invalid meta graph"; GraphPtr metagraph = meta_imgraph; std::vector relgraphs(metagraph->NumEdges()); std::vector num_nodes_per_type; CHECK(strm->Read(&num_nodes_per_type)) << "Invalid num_nodes_per_type"; bool is_pinned = false; if (states.version > 1) { CHECK(strm->Read(&is_pinned)) << "Invalid flag 'is_pinned'"; } auto array_itr = states.arrays.begin(); for (dgl_type_t etype = 0; etype < metagraph->NumEdges(); ++etype) { const auto &pair = metagraph->FindEdge(etype); const dgl_type_t srctype = pair.first; const dgl_type_t dsttype = pair.second; const int64_t num_vtypes = (srctype == dsttype) ? 1 : 2; int64_t num_src = num_nodes_per_type[srctype]; int64_t num_dst = num_nodes_per_type[dsttype]; dgl_format_code_t created_formats, allowed_formats; CHECK(strm->Read(&created_formats)) << "Invalid code for created formats"; CHECK(strm->Read(&allowed_formats)) << "Invalid code for allowed formats"; aten::COOMatrix coo; aten::CSRMatrix csr; aten::CSRMatrix csc; bool has_coo = (created_formats & COO_CODE); bool has_csr = (created_formats & CSR_CODE); bool has_csc = (created_formats & CSC_CODE); if (created_formats & COO_CODE) { CHECK_GE(states.arrays.end() - array_itr, 2); const auto &row = *(array_itr++); const auto &col = *(array_itr++); bool rsorted; bool csorted; CHECK(strm->Read(&rsorted)) << "Invalid flag 'rsorted'"; CHECK(strm->Read(&csorted)) << "Invalid flag 'csorted'"; coo = aten::COOMatrix( num_src, num_dst, row, col, aten::NullArray(), rsorted, csorted); } if (created_formats & CSR_CODE) { CHECK_GE(states.arrays.end() - array_itr, 3); const auto &indptr = *(array_itr++); const auto &indices = *(array_itr++); const auto &edge_id = *(array_itr++); bool sorted; CHECK(strm->Read(&sorted)) << "Invalid flag 'sorted'"; csr = aten::CSRMatrix(num_src, num_dst, indptr, indices, edge_id, sorted); } if (created_formats & CSC_CODE) { CHECK_GE(states.arrays.end() - array_itr, 3); const auto &indptr = *(array_itr++); const auto &indices = *(array_itr++); const auto &edge_id = *(array_itr++); bool sorted; CHECK(strm->Read(&sorted)) << "Invalid flag 'sorted'"; csc = aten::CSRMatrix(num_dst, num_src, indptr, indices, edge_id, sorted); } relgraphs[etype] = UnitGraph::CreateUnitGraphFrom( num_vtypes, csc, csr, coo, has_csc, has_csr, has_coo, allowed_formats); } auto graph = CreateHeteroGraph(metagraph, relgraphs, num_nodes_per_type); if (is_pinned) { graph->PinMemory_(); } return graph; } DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroPickleStatesGetVersion") .set_body([](DGLArgs args, DGLRetValue *rv) { HeteroPickleStatesRef st = args[0]; *rv = st->version; }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroPickleStatesGetMeta") .set_body([](DGLArgs args, DGLRetValue *rv) { HeteroPickleStatesRef st = args[0]; DGLByteArray buf; buf.data = st->meta.c_str(); buf.size = st->meta.size(); *rv = buf; }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroPickleStatesGetArrays") .set_body([](DGLArgs args, DGLRetValue *rv) { HeteroPickleStatesRef st = args[0]; *rv = ConvertNDArrayVectorToPackedFunc(st->arrays); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroPickleStatesGetArraysNum") .set_body([](DGLArgs args, DGLRetValue *rv) { HeteroPickleStatesRef st = args[0]; *rv = static_cast(st->arrays.size()); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLCreateHeteroPickleStates") .set_body([](DGLArgs args, DGLRetValue *rv) { const int version = args[0]; std::string meta = args[1]; const List arrays = args[2]; std::shared_ptr st(new HeteroPickleStates); st->version = version == 0 ? 1 : version; st->meta = meta; st->arrays.reserve(arrays.size()); for (const auto &ref : arrays) { st->arrays.push_back(ref->data); } *rv = HeteroPickleStatesRef(st); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroPickle") .set_body([](DGLArgs args, DGLRetValue *rv) { HeteroGraphRef ref = args[0]; std::shared_ptr st(new HeteroPickleStates); *st = HeteroPickle(ref.sptr()); *rv = HeteroPickleStatesRef(st); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroForkingPickle") .set_body([](DGLArgs args, DGLRetValue *rv) { HeteroGraphRef ref = args[0]; std::shared_ptr st(new HeteroPickleStates); *st = HeteroForkingPickle(ref.sptr()); *rv = HeteroPickleStatesRef(st); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroUnpickle") .set_body([](DGLArgs args, DGLRetValue *rv) { HeteroPickleStatesRef ref = args[0]; HeteroGraphPtr graph; switch (ref->version) { case 0: graph = HeteroUnpickleOld(*ref.sptr()); break; case 1: case 2: graph = HeteroUnpickle(*ref.sptr()); break; default: LOG(FATAL) << "Version can only be 0 or 1 or 2."; } *rv = HeteroGraphRef(graph); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroForkingUnpickle") .set_body([](DGLArgs args, DGLRetValue *rv) { HeteroPickleStatesRef ref = args[0]; HeteroGraphPtr graph = HeteroForkingUnpickle(*ref.sptr()); *rv = HeteroGraphRef(graph); }); DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLCreateHeteroPickleStatesOld") .set_body([](DGLArgs args, DGLRetValue *rv) { GraphRef metagraph = args[0]; IdArray num_nodes_per_type = args[1]; List adjs = args[2]; std::shared_ptr st(new HeteroPickleStates); st->version = 0; st->metagraph = metagraph.sptr(); st->num_nodes_per_type = num_nodes_per_type.ToVector(); st->adjs.reserve(adjs.size()); for (const auto &ref : adjs) st->adjs.push_back(ref.sptr()); *rv = HeteroPickleStatesRef(st); }); } // namespace dgl ================================================ FILE: src/graph/sampler.cc ================================================ /** * Copyright (c) 2018 by Contributors * @file graph/sampler.cc * @brief DGL sampler implementation */ #include #include #include #include #include #include #include #include #include #include #include #include #include "../c_api_common.h" using namespace dgl::runtime; namespace dgl { namespace { /** * ArrayHeap is used to sample elements from vector */ template class ArrayHeap { public: explicit ArrayHeap(const std::vector &prob) { vec_size_ = prob.size(); bit_len_ = ceil(log2(vec_size_)); limit_ = 1UL << bit_len_; // allocate twice the size heap_.resize(limit_ << 1, 0); // allocate the leaves for (size_t i = limit_; i < vec_size_ + limit_; ++i) { heap_[i] = prob[i - limit_]; } // iterate up the tree (this is O(m)) for (int i = bit_len_ - 1; i >= 0; --i) { for (size_t j = (1UL << i); j < (1UL << (i + 1)); ++j) { heap_[j] = heap_[j << 1] + heap_[(j << 1) + 1]; } } } ~ArrayHeap() {} /** * Remove term from index (this costs O(log m) steps) */ void Delete(size_t index) { size_t i = index + limit_; heap_[i] = 0; i /= 2; for (int j = bit_len_ - 1; j >= 0; --j) { // Using heap_[i] = heap_[i] - w will loss some precision in float. // Using addition to re-calculate the weight layer by layer. heap_[i] = heap_[i << 1] + heap_[(i << 1) + 1]; i /= 2; } } /** * Add value w to index (this costs O(log m) steps) */ void Add(size_t index, ValueType w) { size_t i = index + limit_; for (int j = bit_len_; j >= 0; --j) { heap_[i] += w; i = i >> 1; } } /** * Sample from arrayHeap */ size_t Sample() { // heap_ is empty ValueType xi = heap_[1] * RandomEngine::ThreadLocal()->Uniform(); size_t i = 1; while (i < limit_) { i = i << 1; if (xi >= heap_[i]) { xi -= heap_[i]; i += 1; } } return i - limit_; } /** * Sample a vector by given the size n */ size_t SampleWithoutReplacement(size_t n, std::vector *samples) { // sample n elements size_t i = 0; for (; i < n; ++i) { // heap is empty if (heap_[1] == 0) { break; } samples->at(i) = this->Sample(); this->Delete(samples->at(i)); } return i; } private: size_t vec_size_; // sample size int bit_len_; // bit size size_t limit_; std::vector heap_; }; ///////////////////////// Samplers ////////////////////////// class EdgeSamplerObject : public Object { public: EdgeSamplerObject( const GraphPtr gptr, IdArray seed_edges, const int64_t batch_size, const int64_t num_workers, const bool replacement, const bool reset, const std::string neg_mode, const int64_t neg_sample_size, const int64_t chunk_size, const bool exclude_positive, const bool check_false_neg, IdArray relations) { gptr_ = gptr; seed_edges_ = seed_edges; relations_ = relations; batch_size_ = batch_size; num_workers_ = num_workers; replacement_ = replacement; reset_ = reset; neg_mode_ = neg_mode; neg_sample_size_ = neg_sample_size; exclude_positive_ = exclude_positive; check_false_neg_ = check_false_neg; chunk_size_ = chunk_size; } ~EdgeSamplerObject() {} virtual void Fetch(DGLRetValue *rv) = 0; virtual void Reset() = 0; protected: virtual void randomSample( size_t set_size, size_t num, std::vector *out) = 0; virtual void randomSample( size_t set_size, size_t num, const std::vector &exclude, std::vector *out) = 0; NegSubgraph genNegEdgeSubgraph( const Subgraph &pos_subg, const std::string &neg_mode, int64_t neg_sample_size, bool exclude_positive, bool check_false_neg); NegSubgraph genChunkedNegEdgeSubgraph( const Subgraph &pos_subg, const std::string &neg_mode, int64_t neg_sample_size, bool exclude_positive, bool check_false_neg); GraphPtr gptr_; IdArray seed_edges_; IdArray relations_; int64_t batch_size_; int64_t num_workers_; bool replacement_; int64_t reset_; std::string neg_mode_; int64_t neg_sample_size_; bool exclude_positive_; bool check_false_neg_; int64_t chunk_size_; }; /** * Uniformly sample integers from [0, set_size) without replacement. */ void RandomSample(size_t set_size, size_t num, std::vector *out) { if (num < set_size) { std::unordered_set sampled_idxs; while (sampled_idxs.size() < num) { sampled_idxs.insert(RandomEngine::ThreadLocal()->RandInt(set_size)); } out->insert(out->end(), sampled_idxs.begin(), sampled_idxs.end()); } else { // If we need to sample all elements in the set, we don't need to // generate random numbers. for (size_t i = 0; i < set_size; i++) out->push_back(i); } } void RandomSample( size_t set_size, size_t num, const std::vector &exclude, std::vector *out) { std::unordered_map sampled_idxs; for (auto v : exclude) { sampled_idxs.insert(std::pair(v, 0)); } if (num + exclude.size() < set_size) { while (sampled_idxs.size() < num + exclude.size()) { size_t rand = RandomEngine::ThreadLocal()->RandInt(set_size); sampled_idxs.insert(std::pair(rand, 1)); } for (auto it = sampled_idxs.begin(); it != sampled_idxs.end(); it++) { if (it->second) { out->push_back(it->first); } } } else { // If we need to sample all elements in the set, we don't need to // generate random numbers. for (size_t i = 0; i < set_size; i++) { // If the element doesn't exist in exclude. if (sampled_idxs.find(i) == sampled_idxs.end()) { out->push_back(i); } } } } /** * For a sparse array whose non-zeros are represented by nz_idxs, * negate the sparse array and outputs the non-zeros in the negated array. */ void NegateArray( const std::vector &nz_idxs, size_t arr_size, std::vector *out) { // nz_idxs must have been sorted. auto it = nz_idxs.begin(); size_t i = 0; CHECK_GT(arr_size, nz_idxs.back()); for (; i < arr_size && it != nz_idxs.end(); i++) { if (*it == i) { it++; continue; } out->push_back(i); } for (; i < arr_size; i++) { out->push_back(i); } } /** * Uniform sample vertices from a list of vertices. */ void GetUniformSample( const dgl_id_t *edge_id_list, const dgl_id_t *vid_list, const size_t ver_len, const size_t max_num_neighbor, std::vector *out_ver, std::vector *out_edge) { // Copy vid_list to output if (ver_len <= max_num_neighbor) { out_ver->insert(out_ver->end(), vid_list, vid_list + ver_len); out_edge->insert(out_edge->end(), edge_id_list, edge_id_list + ver_len); return; } // If we just sample a small number of elements from a large neighbor list. std::vector sorted_idxs; if (ver_len > max_num_neighbor * 2) { sorted_idxs.reserve(max_num_neighbor); RandomSample(ver_len, max_num_neighbor, &sorted_idxs); std::sort(sorted_idxs.begin(), sorted_idxs.end()); } else { std::vector negate; negate.reserve(ver_len - max_num_neighbor); RandomSample(ver_len, ver_len - max_num_neighbor, &negate); std::sort(negate.begin(), negate.end()); NegateArray(negate, ver_len, &sorted_idxs); } // verify the result. CHECK_EQ(sorted_idxs.size(), max_num_neighbor); for (size_t i = 1; i < sorted_idxs.size(); i++) { CHECK_GT(sorted_idxs[i], sorted_idxs[i - 1]); } for (auto idx : sorted_idxs) { out_ver->push_back(vid_list[idx]); out_edge->push_back(edge_id_list[idx]); } } /** * Non-uniform sample via ArrayHeap * * @param probability Transition probability on the entire graph, indexed by * edge ID */ template void GetNonUniformSample( const ValueType *probability, const dgl_id_t *edge_id_list, const dgl_id_t *vid_list, const size_t ver_len, const size_t max_num_neighbor, std::vector *out_ver, std::vector *out_edge) { // Copy vid_list to output if (ver_len <= max_num_neighbor) { out_ver->insert(out_ver->end(), vid_list, vid_list + ver_len); out_edge->insert(out_edge->end(), edge_id_list, edge_id_list + ver_len); return; } // Make sample std::vector sp_index(max_num_neighbor); std::vector sp_prob(ver_len); for (size_t i = 0; i < ver_len; ++i) { sp_prob[i] = probability[edge_id_list[i]]; } ArrayHeap arrayHeap(sp_prob); arrayHeap.SampleWithoutReplacement(max_num_neighbor, &sp_index); out_ver->resize(max_num_neighbor); out_edge->resize(max_num_neighbor); for (size_t i = 0; i < max_num_neighbor; ++i) { size_t idx = sp_index[i]; out_ver->at(i) = vid_list[idx]; out_edge->at(i) = edge_id_list[idx]; } sort(out_ver->begin(), out_ver->end()); sort(out_edge->begin(), out_edge->end()); } /** * Used for subgraph sampling */ struct neigh_list { std::vector neighs; std::vector edges; neigh_list( const std::vector &_neighs, const std::vector &_edges) : neighs(_neighs), edges(_edges) {} }; struct neighbor_info { dgl_id_t id; size_t pos; size_t num_edges; neighbor_info(dgl_id_t id, size_t pos, size_t num_edges) { this->id = id; this->pos = pos; this->num_edges = num_edges; } }; NodeFlow ConstructNodeFlow( std::vector neighbor_list, std::vector edge_list, std::vector layer_offsets, std::vector> *sub_vers, std::vector *neigh_pos, const std::string &edge_type, int64_t num_edges, int num_hops) { NodeFlow nf = NodeFlow::Create(); uint64_t num_vertices = sub_vers->size(); nf->node_mapping = aten::NewIdArray(num_vertices); nf->edge_mapping = aten::NewIdArray(num_edges); nf->layer_offsets = aten::NewIdArray(num_hops + 1); nf->flow_offsets = aten::NewIdArray(num_hops); dgl_id_t *node_map_data = static_cast(nf->node_mapping->data); dgl_id_t *layer_off_data = static_cast(nf->layer_offsets->data); dgl_id_t *flow_off_data = static_cast(nf->flow_offsets->data); dgl_id_t *edge_map_data = static_cast(nf->edge_mapping->data); // Construct sub_csr_graph, we treat nodeflow as multigraph by default auto subg_csr = CSRPtr(new CSR(num_vertices, num_edges)); dgl_id_t *indptr_out = static_cast(subg_csr->indptr()->data); dgl_id_t *col_list_out = static_cast(subg_csr->indices()->data); dgl_id_t *eid_out = static_cast(subg_csr->edge_ids()->data); size_t collected_nedges = 0; // The data from the previous steps: // * node data: sub_vers (vid, layer), neigh_pos, // * edge data: neighbor_list, edge_list, probability. // * layer_offsets: the offset in sub_vers. dgl_id_t ver_id = 0; std::vector> layer_ver_maps; layer_ver_maps.resize(num_hops); size_t out_node_idx = 0; for (int layer_id = num_hops - 1; layer_id >= 0; layer_id--) { // We sort the vertices in a layer so that we don't need to sort the // neighbor Ids after remap to a subgraph. However, we don't need to sort // the first layer because we want the order of the nodes in the first layer // is the same as the input seed nodes. if (layer_id > 0) { std::sort( sub_vers->begin() + layer_offsets[layer_id], sub_vers->begin() + layer_offsets[layer_id + 1], [](const std::pair &a1, const std::pair &a2) { return a1.first < a2.first; }); } // Save the sampled vertices and its layer Id. for (size_t i = layer_offsets[layer_id]; i < layer_offsets[layer_id + 1]; i++) { node_map_data[out_node_idx++] = sub_vers->at(i).first; layer_ver_maps[layer_id].insert( std::pair(sub_vers->at(i).first, ver_id++)); CHECK_EQ(sub_vers->at(i).second, layer_id); } } CHECK(out_node_idx == num_vertices); // sampling algorithms have to start from the seed nodes, so the seed nodes // are in the first layer and the input nodes are in the last layer. When we // expose the sampled graph to a Python user, we say the input nodes are in // the first layer and the seed nodes are in the last layer. Thus, when we // copy sampled results to a CSR, we need to reverse the order of layers. std::fill(indptr_out, indptr_out + num_vertices + 1, 0); size_t row_idx = layer_offsets[num_hops] - layer_offsets[num_hops - 1]; layer_off_data[0] = 0; layer_off_data[1] = layer_offsets[num_hops] - layer_offsets[num_hops - 1]; int out_layer_idx = 1; for (int layer_id = num_hops - 2; layer_id >= 0; layer_id--) { // Because we don't sort the vertices in the first layer above, we can't // sort the neighbor positions of the vertices in the first layer either. if (layer_id > 0) { std::sort( neigh_pos->begin() + layer_offsets[layer_id], neigh_pos->begin() + layer_offsets[layer_id + 1], [](const neighbor_info &a1, const neighbor_info &a2) { return a1.id < a2.id; }); } for (size_t i = layer_offsets[layer_id]; i < layer_offsets[layer_id + 1]; i++) { dgl_id_t dst_id = sub_vers->at(i).first; CHECK_EQ(dst_id, neigh_pos->at(i).id); size_t pos = neigh_pos->at(i).pos; CHECK_LE(pos, neighbor_list.size()); const size_t nedges = neigh_pos->at(i).num_edges; if (neighbor_list.empty()) CHECK_EQ(nedges, 0); // We need to map the Ids of the neighbors to the subgraph. auto neigh_it = neighbor_list.begin() + pos; for (size_t i = 0; i < nedges; i++) { dgl_id_t neigh = *(neigh_it + i); CHECK( layer_ver_maps[layer_id + 1].find(neigh) != layer_ver_maps[layer_id + 1].end()); col_list_out[collected_nedges + i] = layer_ver_maps[layer_id + 1][neigh]; } // We can simply copy the edge Ids. std::copy_n( edge_list.begin() + pos, nedges, edge_map_data + collected_nedges); collected_nedges += nedges; indptr_out[row_idx + 1] = indptr_out[row_idx] + nedges; row_idx++; } layer_off_data[out_layer_idx + 1] = layer_off_data[out_layer_idx] + layer_offsets[layer_id + 1] - layer_offsets[layer_id]; out_layer_idx++; } CHECK_EQ(row_idx, num_vertices); CHECK_EQ(indptr_out[row_idx], num_edges); CHECK_EQ(out_layer_idx, num_hops); CHECK_EQ(layer_off_data[out_layer_idx], num_vertices); // Copy flow offsets. flow_off_data[0] = 0; int out_flow_idx = 0; for (size_t i = 0; i < layer_offsets.size() - 2; i++) { size_t num_edges = indptr_out[layer_off_data[i + 2]] - indptr_out[layer_off_data[i + 1]]; flow_off_data[out_flow_idx + 1] = flow_off_data[out_flow_idx] + num_edges; out_flow_idx++; } CHECK(out_flow_idx == num_hops - 1); CHECK(flow_off_data[num_hops - 1] == static_cast(num_edges)); std::iota(eid_out, eid_out + num_edges, 0); if (edge_type == std::string("in")) { nf->graph = GraphPtr(new ImmutableGraph(subg_csr, nullptr)); } else { nf->graph = GraphPtr(new ImmutableGraph(nullptr, subg_csr)); } return nf; } template NodeFlow SampleSubgraph( const ImmutableGraph *graph, const std::vector &seeds, const ValueType *probability, const std::string &edge_type, int num_hops, size_t num_neighbor, const bool add_self_loop) { CHECK_EQ(graph->NumBits(), 64) << "32 bit graph is not supported yet"; const size_t num_seeds = seeds.size(); auto orig_csr = edge_type == "in" ? graph->GetInCSR() : graph->GetOutCSR(); const dgl_id_t *val_list = static_cast(orig_csr->edge_ids()->data); const dgl_id_t *col_list = static_cast(orig_csr->indices()->data); const dgl_id_t *indptr = static_cast(orig_csr->indptr()->data); std::unordered_set sub_ver_map; // The vertex Ids in a layer. std::vector> sub_vers; sub_vers.reserve(num_seeds * 10); // add seed vertices for (size_t i = 0; i < num_seeds; ++i) { auto ret = sub_ver_map.insert(seeds[i]); // If the vertex is inserted successfully. if (ret.second) { sub_vers.emplace_back(seeds[i], 0); } } std::vector tmp_sampled_src_list; std::vector tmp_sampled_edge_list; // ver_id, position std::vector neigh_pos; neigh_pos.reserve(num_seeds); std::vector neighbor_list; std::vector edge_list; std::vector layer_offsets(num_hops + 1); int64_t num_edges = 0; layer_offsets[0] = 0; layer_offsets[1] = sub_vers.size(); for (int layer_id = 1; layer_id < num_hops; layer_id++) { // We need to avoid resampling the same node in a layer, but we allow a node // to be resampled in multiple layers. We use `sub_ver_map` to keep track of // sampled nodes in a layer, and clear it when entering a new layer. sub_ver_map.clear(); // Previous iteration collects all nodes in sub_vers, which are collected // in the previous layer. sub_vers is used both as a node collection and a // queue. for (size_t idx = layer_offsets[layer_id - 1]; idx < layer_offsets[layer_id]; idx++) { dgl_id_t dst_id = sub_vers[idx].first; const int cur_node_level = sub_vers[idx].second; tmp_sampled_src_list.clear(); tmp_sampled_edge_list.clear(); dgl_id_t ver_len = *(indptr + dst_id + 1) - *(indptr + dst_id); if (probability == nullptr) { // uniform-sample GetUniformSample( val_list + *(indptr + dst_id), col_list + *(indptr + dst_id), ver_len, num_neighbor, &tmp_sampled_src_list, &tmp_sampled_edge_list); } else { // non-uniform-sample GetNonUniformSample( probability, val_list + *(indptr + dst_id), col_list + *(indptr + dst_id), ver_len, num_neighbor, &tmp_sampled_src_list, &tmp_sampled_edge_list); } // If we need to add self loop and it doesn't exist in the sampled // neighbor list. if (add_self_loop && std::find( tmp_sampled_src_list.begin(), tmp_sampled_src_list.end(), dst_id) == tmp_sampled_src_list.end()) { tmp_sampled_src_list.push_back(dst_id); const dgl_id_t *src_list = col_list + *(indptr + dst_id); const dgl_id_t *eid_list = val_list + *(indptr + dst_id); // TODO(zhengda) this operation has O(N) complexity. It can be pretty // slow. const dgl_id_t *src = std::find(src_list, src_list + ver_len, dst_id); // If there doesn't exist a self loop in the graph. // we have to add -1 as the edge id for the self-loop edge. if (src == src_list + ver_len) tmp_sampled_edge_list.push_back(-1); else tmp_sampled_edge_list.push_back(eid_list[src - src_list]); } CHECK_EQ(tmp_sampled_src_list.size(), tmp_sampled_edge_list.size()); neigh_pos.emplace_back( dst_id, neighbor_list.size(), tmp_sampled_src_list.size()); // Then push the vertices for (size_t i = 0; i < tmp_sampled_src_list.size(); ++i) { neighbor_list.push_back(tmp_sampled_src_list[i]); } // Finally we push the edge list for (size_t i = 0; i < tmp_sampled_edge_list.size(); ++i) { edge_list.push_back(tmp_sampled_edge_list[i]); } num_edges += tmp_sampled_src_list.size(); for (size_t i = 0; i < tmp_sampled_src_list.size(); ++i) { // We need to add the neighbor in the hashtable here. This ensures that // the vertex in the queue is unique. If we see a vertex before, we // don't need to add it to the queue again. auto ret = sub_ver_map.insert(tmp_sampled_src_list[i]); // If the sampled neighbor is inserted to the map successfully. if (ret.second) { sub_vers.emplace_back(tmp_sampled_src_list[i], cur_node_level + 1); } } } layer_offsets[layer_id + 1] = layer_offsets[layer_id] + sub_ver_map.size(); CHECK_EQ(layer_offsets[layer_id + 1], sub_vers.size()); } return ConstructNodeFlow( neighbor_list, edge_list, layer_offsets, &sub_vers, &neigh_pos, edge_type, num_edges, num_hops); } } // namespace DGL_REGISTER_GLOBAL("_deprecate.nodeflow._CAPI_NodeFlowGetGraph") .set_body([](DGLArgs args, DGLRetValue *rv) { NodeFlow nflow = args[0]; *rv = nflow->graph; }); DGL_REGISTER_GLOBAL("_deprecate.nodeflow._CAPI_NodeFlowGetNodeMapping") .set_body([](DGLArgs args, DGLRetValue *rv) { NodeFlow nflow = args[0]; *rv = nflow->node_mapping; }); DGL_REGISTER_GLOBAL("_deprecate.nodeflow._CAPI_NodeFlowGetEdgeMapping") .set_body([](DGLArgs args, DGLRetValue *rv) { NodeFlow nflow = args[0]; *rv = nflow->edge_mapping; }); DGL_REGISTER_GLOBAL("_deprecate.nodeflow._CAPI_NodeFlowGetLayerOffsets") .set_body([](DGLArgs args, DGLRetValue *rv) { NodeFlow nflow = args[0]; *rv = nflow->layer_offsets; }); DGL_REGISTER_GLOBAL("_deprecate.nodeflow._CAPI_NodeFlowGetBlockOffsets") .set_body([](DGLArgs args, DGLRetValue *rv) { NodeFlow nflow = args[0]; *rv = nflow->flow_offsets; }); template NodeFlow SamplerOp::NeighborSample( const ImmutableGraph *graph, const std::vector &seeds, const std::string &edge_type, int num_hops, int expand_factor, const bool add_self_loop, const ValueType *probability) { return SampleSubgraph( graph, seeds, probability, edge_type, num_hops + 1, expand_factor, add_self_loop); } namespace { void ConstructLayers( const dgl_id_t *indptr, const dgl_id_t *indices, const std::vector &seed_array, IdArray layer_sizes, std::vector *layer_offsets, std::vector *node_mapping, std::vector *actl_layer_sizes, std::vector *probabilities) { /** * Given a graph and a collection of seed nodes, this function constructs * NodeFlow layers via uniform layer-wise sampling, and return the resultant * layers and their corresponding probabilities. */ std::copy( seed_array.begin(), seed_array.end(), std::back_inserter(*node_mapping)); actl_layer_sizes->push_back(node_mapping->size()); probabilities->insert(probabilities->end(), node_mapping->size(), 1); const int64_t *layer_sizes_data = static_cast(layer_sizes->data); const int64_t num_layers = layer_sizes->shape[0]; size_t curr = 0; size_t next = node_mapping->size(); for (int64_t i = num_layers - 1; i >= 0; --i) { const int64_t layer_size = layer_sizes_data[i]; std::unordered_set candidate_set; for (auto j = curr; j != next; ++j) { auto src = (*node_mapping)[j]; candidate_set.insert(indices + indptr[src], indices + indptr[src + 1]); } std::vector candidate_vector; std::copy( candidate_set.begin(), candidate_set.end(), std::back_inserter(candidate_vector)); std::unordered_map n_occurrences; auto n_candidates = candidate_vector.size(); for (int64_t j = 0; j != layer_size; ++j) { auto dst = candidate_vector[RandomEngine::ThreadLocal()->RandInt(n_candidates)]; if (!n_occurrences.insert(std::make_pair(dst, 1)).second) { ++n_occurrences[dst]; } } for (auto const &pair : n_occurrences) { node_mapping->push_back(pair.first); float p = pair.second * n_candidates / static_cast(layer_size); probabilities->push_back(p); } actl_layer_sizes->push_back(node_mapping->size() - next); curr = next; next = node_mapping->size(); } std::reverse(node_mapping->begin(), node_mapping->end()); std::reverse(actl_layer_sizes->begin(), actl_layer_sizes->end()); layer_offsets->push_back(0); for (const auto &size : *actl_layer_sizes) { layer_offsets->push_back(size + layer_offsets->back()); } } void ConstructFlows( const dgl_id_t *indptr, const dgl_id_t *indices, const dgl_id_t *eids, const std::vector &node_mapping, const std::vector &actl_layer_sizes, std::vector *sub_indptr, std::vector *sub_indices, std::vector *sub_eids, std::vector *flow_offsets, std::vector *edge_mapping) { /** * Given a graph and a sequence of NodeFlow layers, this function constructs * dense subgraphs (flows) between consecutive layers. */ auto n_flows = actl_layer_sizes.size() - 1; for (int64_t i = 0; i < actl_layer_sizes.front() + 1; i++) sub_indptr->push_back(0); flow_offsets->push_back(0); int64_t first = 0; for (size_t i = 0; i < n_flows; ++i) { auto src_size = actl_layer_sizes[i]; std::unordered_map source_map; for (int64_t j = 0; j < src_size; ++j) { source_map.insert(std::make_pair(node_mapping[first + j], first + j)); } auto dst_size = actl_layer_sizes[i + 1]; for (int64_t j = 0; j < dst_size; ++j) { auto dst = node_mapping[first + src_size + j]; typedef std::pair id_pair; std::vector neighbor_indices; for (dgl_id_t k = indptr[dst]; k < indptr[dst + 1]; ++k) { // TODO(gaiyu): accelerate hash table lookup auto ret = source_map.find(indices[k]); if (ret != source_map.end()) { neighbor_indices.push_back(std::make_pair(ret->second, eids[k])); } } auto cmp = [](const id_pair p, const id_pair q) -> bool { return p.first < q.first; }; std::sort(neighbor_indices.begin(), neighbor_indices.end(), cmp); for (const auto &pair : neighbor_indices) { sub_indices->push_back(pair.first); edge_mapping->push_back(pair.second); } sub_indptr->push_back(sub_indices->size()); } flow_offsets->push_back(sub_indices->size()); first += src_size; } sub_eids->resize(sub_indices->size()); std::iota(sub_eids->begin(), sub_eids->end(), 0); } } // namespace NodeFlow SamplerOp::LayerUniformSample( const ImmutableGraph *graph, const std::vector &seeds, const std::string &neighbor_type, IdArray layer_sizes) { const auto g_csr = neighbor_type == "in" ? graph->GetInCSR() : graph->GetOutCSR(); const dgl_id_t *indptr = static_cast(g_csr->indptr()->data); const dgl_id_t *indices = static_cast(g_csr->indices()->data); const dgl_id_t *eids = static_cast(g_csr->edge_ids()->data); std::vector layer_offsets; std::vector node_mapping; std::vector actl_layer_sizes; std::vector probabilities; ConstructLayers( indptr, indices, seeds, layer_sizes, &layer_offsets, &node_mapping, &actl_layer_sizes, &probabilities); std::vector sub_indptr, sub_indices, sub_edge_ids; std::vector flow_offsets; std::vector edge_mapping; ConstructFlows( indptr, indices, eids, node_mapping, actl_layer_sizes, &sub_indptr, &sub_indices, &sub_edge_ids, &flow_offsets, &edge_mapping); // sanity check CHECK_GT(sub_indptr.size(), 0); CHECK_EQ(sub_indptr[0], 0); CHECK_EQ(sub_indptr.back(), sub_indices.size()); CHECK_EQ(sub_indices.size(), sub_edge_ids.size()); NodeFlow nf = NodeFlow::Create(); auto sub_csr = CSRPtr(new CSR( aten::VecToIdArray(sub_indptr), aten::VecToIdArray(sub_indices), aten::VecToIdArray(sub_edge_ids))); if (neighbor_type == std::string("in")) { nf->graph = GraphPtr(new ImmutableGraph(sub_csr, nullptr)); } else { nf->graph = GraphPtr(new ImmutableGraph(nullptr, sub_csr)); } nf->node_mapping = aten::VecToIdArray(node_mapping); nf->edge_mapping = aten::VecToIdArray(edge_mapping); nf->layer_offsets = aten::VecToIdArray(layer_offsets); nf->flow_offsets = aten::VecToIdArray(flow_offsets); return nf; } void BuildCsr(const ImmutableGraph &g, const std::string neigh_type) { if (neigh_type == "in") { auto csr = g.GetInCSR(); assert(csr); } else if (neigh_type == "out") { auto csr = g.GetOutCSR(); assert(csr); } else { LOG(FATAL) << "We don't support sample from neighbor type " << neigh_type; } } template std::vector NeighborSamplingImpl( const ImmutableGraphPtr gptr, const IdArray seed_nodes, const int64_t batch_start_id, const int64_t batch_size, const int64_t max_num_workers, const int64_t expand_factor, const int64_t num_hops, const std::string neigh_type, const bool add_self_loop, const ValueType *probability) { // process args CHECK(aten::IsValidIdArray(seed_nodes)); const dgl_id_t *seed_nodes_data = static_cast(seed_nodes->data); const int64_t num_seeds = seed_nodes->shape[0]; const int64_t num_workers = std::min( max_num_workers, (num_seeds + batch_size - 1) / batch_size - batch_start_id); // We need to make sure we have the right CSR before we enter parallel // sampling. BuildCsr(*gptr, neigh_type); // generate node flows std::vector nflows(num_workers); runtime::parallel_for(0, num_workers, [&](size_t b, size_t e) { for (auto i = b; i < e; ++i) { // create per-worker seed nodes. const int64_t start = (batch_start_id + i) * batch_size; const int64_t end = std::min(start + batch_size, num_seeds); // TODO(minjie): the vector allocation/copy is unnecessary std::vector worker_seeds(end - start); std::copy( seed_nodes_data + start, seed_nodes_data + end, worker_seeds.begin()); nflows[i] = SamplerOp::NeighborSample( gptr.get(), worker_seeds, neigh_type, num_hops, expand_factor, add_self_loop, probability); } }); return nflows; } DGL_REGISTER_GLOBAL("sampling._CAPI_UniformSampling") .set_body([](DGLArgs args, DGLRetValue *rv) { // arguments const GraphRef g = args[0]; const IdArray seed_nodes = args[1]; const int64_t batch_start_id = args[2]; const int64_t batch_size = args[3]; const int64_t max_num_workers = args[4]; const int64_t expand_factor = args[5]; const int64_t num_hops = args[6]; const std::string neigh_type = args[7]; const bool add_self_loop = args[8]; auto gptr = std::dynamic_pointer_cast(g.sptr()); CHECK(gptr) << "sampling isn't implemented in mutable graph"; CHECK(aten::IsValidIdArray(seed_nodes)); CHECK_EQ(seed_nodes->ctx.device_type, kDGLCPU) << "UniformSampler only support CPU sampling"; std::vector nflows = NeighborSamplingImpl( gptr, seed_nodes, batch_start_id, batch_size, max_num_workers, expand_factor, num_hops, neigh_type, add_self_loop, nullptr); *rv = List(nflows); }); DGL_REGISTER_GLOBAL("sampling._CAPI_NeighborSampling") .set_body([](DGLArgs args, DGLRetValue *rv) { // arguments const GraphRef g = args[0]; const IdArray seed_nodes = args[1]; const int64_t batch_start_id = args[2]; const int64_t batch_size = args[3]; const int64_t max_num_workers = args[4]; const int64_t expand_factor = args[5]; const int64_t num_hops = args[6]; const std::string neigh_type = args[7]; const bool add_self_loop = args[8]; const NDArray probability = args[9]; auto gptr = std::dynamic_pointer_cast(g.sptr()); CHECK(gptr) << "sampling isn't implemented in mutable graph"; CHECK(aten::IsValidIdArray(seed_nodes)); CHECK_EQ(seed_nodes->ctx.device_type, kDGLCPU) << "NeighborSampler only support CPU sampling"; std::vector nflows; CHECK(probability->dtype.code == kDGLFloat) << "transition probability must be float"; CHECK(probability->ndim == 1) << "transition probability must be a 1-dimensional vector"; CHECK_EQ(probability->ctx.device_type, kDGLCPU) << "NeighborSampling only support CPU sampling"; ATEN_FLOAT_TYPE_SWITCH( probability->dtype, FloatType, "transition probability", { const FloatType *prob; if (aten::IsNullArray(probability)) { prob = nullptr; } else { CHECK( probability->shape[0] == static_cast(gptr->NumEdges())) << "transition probability must have same number of elements " "as edges"; CHECK(probability.IsContiguous()) << "transition probability must be contiguous tensor"; prob = static_cast(probability->data); } nflows = NeighborSamplingImpl( gptr, seed_nodes, batch_start_id, batch_size, max_num_workers, expand_factor, num_hops, neigh_type, add_self_loop, prob); }); *rv = List(nflows); }); DGL_REGISTER_GLOBAL("sampling._CAPI_LayerSampling") .set_body([](DGLArgs args, DGLRetValue *rv) { // arguments GraphRef g = args[0]; const IdArray seed_nodes = args[1]; const int64_t batch_start_id = args[2]; const int64_t batch_size = args[3]; const int64_t max_num_workers = args[4]; const IdArray layer_sizes = args[5]; const std::string neigh_type = args[6]; // process args auto gptr = std::dynamic_pointer_cast(g.sptr()); CHECK(gptr) << "sampling isn't implemented in mutable graph"; CHECK(aten::IsValidIdArray(seed_nodes)); CHECK_EQ(seed_nodes->ctx.device_type, kDGLCPU) << "LayerSampler only support CPU sampling"; CHECK(aten::IsValidIdArray(layer_sizes)); CHECK_EQ(layer_sizes->ctx.device_type, kDGLCPU) << "LayerSampler only support CPU sampling"; const dgl_id_t *seed_nodes_data = static_cast(seed_nodes->data); const int64_t num_seeds = seed_nodes->shape[0]; const int64_t num_workers = std::min( max_num_workers, (num_seeds + batch_size - 1) / batch_size - batch_start_id); // We need to make sure we have the right CSR before we enter parallel // sampling. BuildCsr(*gptr, neigh_type); // generate node flows std::vector nflows(num_workers); runtime::parallel_for(0, num_workers, [&](size_t b, size_t e) { for (auto i = b; i < e; ++i) { // create per-worker seed nodes. const int64_t start = (batch_start_id + i) * batch_size; const int64_t end = std::min(start + batch_size, num_seeds); // TODO(minjie): the vector allocation/copy is unnecessary std::vector worker_seeds(end - start); std::copy( seed_nodes_data + start, seed_nodes_data + end, worker_seeds.begin()); nflows[i] = SamplerOp::LayerUniformSample( gptr.get(), worker_seeds, neigh_type, layer_sizes); } }); *rv = List(nflows); }); namespace { void BuildCoo(const ImmutableGraph &g) { auto coo = g.GetCOO(); assert(coo); } dgl_id_t global2local_map( dgl_id_t global_id, std::unordered_map *map) { auto it = map->find(global_id); if (it == map->end()) { dgl_id_t local_id = map->size(); map->insert(std::pair(global_id, local_id)); return local_id; } else { return it->second; } } inline bool IsNegativeHeadMode(const std::string &mode) { return mode == "head"; } IdArray GetGlobalVid(IdArray induced_nid, IdArray subg_nid) { IdArray gnid = IdArray::Empty({subg_nid->shape[0]}, subg_nid->dtype, subg_nid->ctx); const dgl_id_t *induced_nid_data = static_cast(induced_nid->data); const dgl_id_t *subg_nid_data = static_cast(subg_nid->data); dgl_id_t *gnid_data = static_cast(gnid->data); for (int64_t i = 0; i < subg_nid->shape[0]; i++) { gnid_data[i] = induced_nid_data[subg_nid_data[i]]; } return gnid; } IdArray CheckExistence( GraphPtr gptr, IdArray neg_src, IdArray neg_dst, IdArray induced_nid) { return gptr->HasEdgesBetween( GetGlobalVid(induced_nid, neg_src), GetGlobalVid(induced_nid, neg_dst)); } IdArray CheckExistence( GraphPtr gptr, IdArray relations, IdArray neg_src, IdArray neg_dst, IdArray induced_nid, IdArray neg_eid) { neg_src = GetGlobalVid(induced_nid, neg_src); neg_dst = GetGlobalVid(induced_nid, neg_dst); BoolArray exist = gptr->HasEdgesBetween(neg_src, neg_dst); dgl_id_t *neg_dst_data = static_cast(neg_dst->data); dgl_id_t *neg_src_data = static_cast(neg_src->data); dgl_id_t *neg_eid_data = static_cast(neg_eid->data); dgl_id_t *relation_data = static_cast(relations->data); // TODO(zhengda) is this right? dgl_id_t *exist_data = static_cast(exist->data); int64_t num_neg_edges = neg_src->shape[0]; for (int64_t i = 0; i < num_neg_edges; i++) { // If the edge doesn't exist, we don't need to do anything. if (!exist_data[i]) continue; // If the edge exists, we need to double check if the relations match. // If they match, this negative edge isn't really a negative edge. dgl_id_t eid1 = neg_eid_data[i]; dgl_id_t orig_neg_rel1 = relation_data[eid1]; IdArray eids = gptr->EdgeId(neg_src_data[i], neg_dst_data[i]); dgl_id_t *eid_data = static_cast(eids->data); int64_t num_edges_between = eids->shape[0]; bool same_rel = false; for (int64_t j = 0; j < num_edges_between; j++) { dgl_id_t neg_rel1 = relation_data[eid_data[j]]; if (neg_rel1 == orig_neg_rel1) { same_rel = true; break; } } exist_data[i] = same_rel; } return exist; } std::vector Global2Local( const std::vector &ids, const std::unordered_map &map) { std::vector local_ids(ids.size()); for (size_t i = 0; i < ids.size(); i++) { auto it = map.find(ids[i]); assert(it != map.end()); local_ids[i] = it->second; } return local_ids; } NegSubgraph EdgeSamplerObject::genNegEdgeSubgraph( const Subgraph &pos_subg, const std::string &neg_mode, int64_t neg_sample_size, bool exclude_positive, bool check_false_neg) { int64_t num_tot_nodes = gptr_->NumVertices(); if (neg_sample_size > num_tot_nodes) neg_sample_size = num_tot_nodes; std::vector adj = pos_subg.graph->GetAdj(false, "coo"); IdArray coo = adj[0]; int64_t num_pos_edges = coo->shape[0] / 2; int64_t num_neg_edges = num_pos_edges * neg_sample_size; IdArray neg_dst = IdArray::Empty({num_neg_edges}, coo->dtype, coo->ctx); IdArray neg_src = IdArray::Empty({num_neg_edges}, coo->dtype, coo->ctx); IdArray induced_neg_eid = IdArray::Empty({num_neg_edges}, coo->dtype, coo->ctx); // These are vids in the positive subgraph. const dgl_id_t *dst_data = static_cast(coo->data); const dgl_id_t *src_data = static_cast(coo->data) + num_pos_edges; const dgl_id_t *induced_vid_data = static_cast(pos_subg.induced_vertices->data); const dgl_id_t *induced_eid_data = static_cast(pos_subg.induced_edges->data); size_t num_pos_nodes = pos_subg.graph->NumVertices(); std::vector pos_nodes( induced_vid_data, induced_vid_data + num_pos_nodes); dgl_id_t *neg_dst_data = static_cast(neg_dst->data); dgl_id_t *neg_src_data = static_cast(neg_src->data); dgl_id_t *induced_neg_eid_data = static_cast(induced_neg_eid->data); const dgl_id_t *unchanged; dgl_id_t *neg_unchanged; dgl_id_t *neg_changed; if (IsNegativeHeadMode(neg_mode)) { unchanged = dst_data; neg_unchanged = neg_dst_data; neg_changed = neg_src_data; } else { unchanged = src_data; neg_unchanged = neg_src_data; neg_changed = neg_dst_data; } std::unordered_map neg_map; std::vector local_pos_vids; local_pos_vids.reserve(num_pos_edges); std::vector neg_vids; neg_vids.reserve(neg_sample_size); // If we don't exclude positive edges, we are actually sampling more than // the total number of nodes in the graph. if (!exclude_positive && neg_sample_size >= num_tot_nodes) { // We add all nodes as negative nodes. for (int64_t i = 0; i < num_tot_nodes; i++) { neg_vids.push_back(i); neg_map[i] = i; } // Get all nodes in the positive side. for (int64_t i = 0; i < num_pos_edges; i++) { dgl_id_t vid = induced_vid_data[unchanged[i]]; local_pos_vids.push_back(neg_map[vid]); } // There is no guarantee that the nodes in the vector are unique. std::sort(local_pos_vids.begin(), local_pos_vids.end()); auto it = std::unique(local_pos_vids.begin(), local_pos_vids.end()); local_pos_vids.resize(it - local_pos_vids.begin()); } else { // Collect nodes in the positive side. dgl_id_t local_vid = 0; for (int64_t i = 0; i < num_pos_edges; i++) { dgl_id_t vid = induced_vid_data[unchanged[i]]; auto it = neg_map.find(vid); if (it == neg_map.end()) { local_pos_vids.push_back(local_vid); neg_map.insert(std::pair(vid, local_vid++)); } } } int64_t prev_neg_offset = 0; for (int64_t i = 0; i < num_pos_edges; i++) { size_t neg_idx = i * neg_sample_size; std::vector neighbors; DGLIdIters neigh_it; if (IsNegativeHeadMode(neg_mode)) { neigh_it = gptr_->PredVec(induced_vid_data[unchanged[i]]); } else { neigh_it = gptr_->SuccVec(induced_vid_data[unchanged[i]]); } // If the number of negative nodes is smaller than the number of total nodes // in the graph. if (exclude_positive && neg_sample_size < num_tot_nodes) { std::vector exclude; for (auto it = neigh_it.begin(); it != neigh_it.end(); it++) { dgl_id_t global_vid = *it; exclude.push_back(global_vid); } prev_neg_offset = neg_vids.size(); randomSample(num_tot_nodes, neg_sample_size, exclude, &neg_vids); assert( static_cast(prev_neg_offset + neg_sample_size) == neg_vids.size()); } else if (neg_sample_size < num_tot_nodes) { prev_neg_offset = neg_vids.size(); randomSample(num_tot_nodes, neg_sample_size, &neg_vids); assert( static_cast(prev_neg_offset + neg_sample_size) == neg_vids.size()); } else if (exclude_positive) { LOG(FATAL) << "We can't exclude positive edges" "when sampling negative edges with all nodes."; } else { // We don't need to do anything here. // In this case, every edge has the same negative edges. That is, // neg_vids contains all nodes of the graph. They have been generated // before the for loop. } dgl_id_t global_unchanged = induced_vid_data[unchanged[i]]; dgl_id_t local_unchanged = global2local_map(global_unchanged, &neg_map); for (int64_t j = 0; j < neg_sample_size; j++) { neg_unchanged[neg_idx + j] = local_unchanged; dgl_id_t local_changed = global2local_map(neg_vids[j + prev_neg_offset], &neg_map); neg_changed[neg_idx + j] = local_changed; // induced negative eid references to the positive one. induced_neg_eid_data[neg_idx + j] = induced_eid_data[i]; } } // Now we know the number of vertices in the negative graph. int64_t num_neg_nodes = neg_map.size(); IdArray induced_neg_vid = IdArray::Empty({num_neg_nodes}, coo->dtype, coo->ctx); dgl_id_t *induced_neg_vid_data = static_cast(induced_neg_vid->data); for (auto it = neg_map.begin(); it != neg_map.end(); it++) { induced_neg_vid_data[it->second] = it->first; } NegSubgraph neg_subg; // We sample negative vertices without replacement. // There shouldn't be duplicated edges. COOPtr neg_coo(new COO(num_neg_nodes, neg_src, neg_dst)); neg_subg.graph = GraphPtr(new ImmutableGraph(neg_coo)); neg_subg.induced_vertices = induced_neg_vid; neg_subg.induced_edges = induced_neg_eid; if (IsNegativeHeadMode(neg_mode)) { neg_subg.head_nid = aten::VecToIdArray(Global2Local(neg_vids, neg_map)); neg_subg.tail_nid = aten::VecToIdArray(local_pos_vids); } else { neg_subg.head_nid = aten::VecToIdArray(local_pos_vids); neg_subg.tail_nid = aten::VecToIdArray(Global2Local(neg_vids, neg_map)); } // TODO(zhengda) we should provide an array of 1s if exclude_positive if (check_false_neg) { if (aten::IsNullArray(relations_)) { neg_subg.exist = CheckExistence(gptr_, neg_src, neg_dst, induced_neg_vid); } else { neg_subg.exist = CheckExistence( gptr_, relations_, neg_src, neg_dst, induced_neg_vid, induced_neg_eid); } } return neg_subg; } NegSubgraph EdgeSamplerObject::genChunkedNegEdgeSubgraph( const Subgraph &pos_subg, const std::string &neg_mode, int64_t neg_sample_size, bool exclude_positive, bool check_false_neg) { int64_t num_tot_nodes = gptr_->NumVertices(); std::vector adj = pos_subg.graph->GetAdj(false, "coo"); IdArray coo = adj[0]; int64_t num_pos_edges = coo->shape[0] / 2; if (neg_sample_size > num_tot_nodes) neg_sample_size = num_tot_nodes; int64_t chunk_size = chunk_size_; CHECK_GT(chunk_size, 0) << "chunk size has to be positive"; // If num_pos_edges isn't divisible by chunk_size, the actual number of chunks // is num_chunks + 1 and the last chunk size is last_chunk_size. // Otherwise, the actual number of chunks is num_chunks, the last chunk size // is 0. int64_t num_chunks = num_pos_edges / chunk_size; int64_t last_chunk_size = num_pos_edges - num_chunks * chunk_size; // The number of negative edges. int64_t num_neg_edges = neg_sample_size * chunk_size * num_chunks; int64_t num_neg_edges_last_chunk = neg_sample_size * last_chunk_size; int64_t num_all_neg_edges = num_neg_edges + num_neg_edges_last_chunk; // We should include the last chunk. if (last_chunk_size > 0) num_chunks++; IdArray neg_dst = IdArray::Empty({num_all_neg_edges}, coo->dtype, coo->ctx); IdArray neg_src = IdArray::Empty({num_all_neg_edges}, coo->dtype, coo->ctx); IdArray induced_neg_eid = IdArray::Empty({num_all_neg_edges}, coo->dtype, coo->ctx); // These are vids in the positive subgraph. const dgl_id_t *dst_data = static_cast(coo->data); const dgl_id_t *src_data = static_cast(coo->data) + num_pos_edges; const dgl_id_t *induced_vid_data = static_cast(pos_subg.induced_vertices->data); const dgl_id_t *induced_eid_data = static_cast(pos_subg.induced_edges->data); int64_t num_pos_nodes = pos_subg.graph->NumVertices(); std::vector pos_nodes( induced_vid_data, induced_vid_data + num_pos_nodes); dgl_id_t *neg_dst_data = static_cast(neg_dst->data); dgl_id_t *neg_src_data = static_cast(neg_src->data); dgl_id_t *induced_neg_eid_data = static_cast(induced_neg_eid->data); const dgl_id_t *unchanged; dgl_id_t *neg_unchanged; dgl_id_t *neg_changed; if (IsNegativeHeadMode(neg_mode)) { unchanged = dst_data; neg_unchanged = neg_dst_data; neg_changed = neg_src_data; } else { unchanged = src_data; neg_unchanged = neg_src_data; neg_changed = neg_dst_data; } // We first sample all negative edges. std::vector global_neg_vids; std::vector local_neg_vids; randomSample(num_tot_nodes, num_chunks * neg_sample_size, &global_neg_vids); CHECK_EQ(num_chunks * neg_sample_size, global_neg_vids.size()); std::unordered_map neg_map; dgl_id_t local_vid = 0; // Collect nodes in the positive side. std::vector local_pos_vids; local_pos_vids.reserve(num_pos_edges); for (int64_t i = 0; i < num_pos_edges; i++) { dgl_id_t vid = induced_vid_data[unchanged[i]]; auto it = neg_map.find(vid); if (it == neg_map.end()) { local_pos_vids.push_back(local_vid); neg_map.insert(std::pair(vid, local_vid++)); } } // We should map the global negative nodes to local Ids in advance // to reduce computation overhead. local_neg_vids.resize(global_neg_vids.size()); for (size_t i = 0; i < global_neg_vids.size(); i++) { local_neg_vids[i] = global2local_map(global_neg_vids[i], &neg_map); } for (int64_t i_chunk = 0; i_chunk < num_chunks; i_chunk++) { // for each chunk. int64_t neg_idx = neg_sample_size * chunk_size * i_chunk; int64_t pos_edge_idx = chunk_size * i_chunk; int64_t neg_node_idx = neg_sample_size * i_chunk; // The actual chunk size. It'll be different for the last chunk. int64_t chunk_size1; if (i_chunk == num_chunks - 1 && last_chunk_size > 0) chunk_size1 = last_chunk_size; else chunk_size1 = chunk_size; for (int64_t in_chunk = 0; in_chunk != chunk_size1; ++in_chunk) { // For each positive node in a chunk. dgl_id_t global_unchanged = induced_vid_data[unchanged[pos_edge_idx + in_chunk]]; dgl_id_t local_unchanged = global2local_map(global_unchanged, &neg_map); for (int64_t j = 0; j < neg_sample_size; ++j) { neg_unchanged[neg_idx] = local_unchanged; neg_changed[neg_idx] = local_neg_vids[neg_node_idx + j]; induced_neg_eid_data[neg_idx] = induced_eid_data[pos_edge_idx + in_chunk]; neg_idx++; } } } // Now we know the number of vertices in the negative graph. int64_t num_neg_nodes = neg_map.size(); IdArray induced_neg_vid = IdArray::Empty({num_neg_nodes}, coo->dtype, coo->ctx); dgl_id_t *induced_neg_vid_data = static_cast(induced_neg_vid->data); for (auto it = neg_map.begin(); it != neg_map.end(); it++) { induced_neg_vid_data[it->second] = it->first; } NegSubgraph neg_subg; // We sample negative vertices without replacement. // There shouldn't be duplicated edges. COOPtr neg_coo(new COO(num_neg_nodes, neg_src, neg_dst)); neg_subg.graph = GraphPtr(new ImmutableGraph(neg_coo)); neg_subg.induced_vertices = induced_neg_vid; neg_subg.induced_edges = induced_neg_eid; if (IsNegativeHeadMode(neg_mode)) { neg_subg.head_nid = aten::VecToIdArray(Global2Local(global_neg_vids, neg_map)); neg_subg.tail_nid = aten::VecToIdArray(local_pos_vids); } else { neg_subg.head_nid = aten::VecToIdArray(local_pos_vids); neg_subg.tail_nid = aten::VecToIdArray(Global2Local(global_neg_vids, neg_map)); } if (check_false_neg) { if (aten::IsNullArray(relations_)) { neg_subg.exist = CheckExistence(gptr_, neg_src, neg_dst, induced_neg_vid); } else { neg_subg.exist = CheckExistence( gptr_, relations_, neg_src, neg_dst, induced_neg_vid, induced_neg_eid); } } return neg_subg; } inline SubgraphRef ConvertRef(const Subgraph &subg) { return SubgraphRef(std::shared_ptr(new Subgraph(subg))); } inline SubgraphRef ConvertRef(const NegSubgraph &subg) { return SubgraphRef(std::shared_ptr(new NegSubgraph(subg))); } } // namespace DGL_REGISTER_GLOBAL("sampling._CAPI_GetNegEdgeExistence") .set_body([](DGLArgs args, DGLRetValue *rv) { SubgraphRef g = args[0]; auto gptr = std::dynamic_pointer_cast(g.sptr()); *rv = gptr->exist; }); DGL_REGISTER_GLOBAL("sampling._CAPI_GetEdgeSubgraphHead") .set_body([](DGLArgs args, DGLRetValue *rv) { SubgraphRef g = args[0]; auto gptr = std::dynamic_pointer_cast(g.sptr()); *rv = gptr->head_nid; }); DGL_REGISTER_GLOBAL("sampling._CAPI_GetEdgeSubgraphTail") .set_body([](DGLArgs args, DGLRetValue *rv) { SubgraphRef g = args[0]; auto gptr = std::dynamic_pointer_cast(g.sptr()); *rv = gptr->tail_nid; }); class UniformEdgeSamplerObject : public EdgeSamplerObject { public: explicit UniformEdgeSamplerObject( const GraphPtr gptr, IdArray seed_edges, const int64_t batch_size, const int64_t num_workers, const bool replacement, const bool reset, const std::string neg_mode, const int64_t neg_sample_size, const int64_t chunk_size, const bool exclude_positive, const bool check_false_neg, IdArray relations) : EdgeSamplerObject( gptr, seed_edges, batch_size, num_workers, replacement, reset, neg_mode, neg_sample_size, chunk_size, exclude_positive, check_false_neg, relations) { batch_curr_id_ = 0; num_seeds_ = seed_edges->shape[0]; max_batch_id_ = (num_seeds_ + batch_size - 1) / batch_size; // TODO(song): Tricky thing here to make sure gptr_ has coo cache gptr_->FindEdge(0); } ~UniformEdgeSamplerObject() {} void Fetch(DGLRetValue *rv) { const int64_t num_workers = std::min(num_workers_, max_batch_id_ - batch_curr_id_); // generate subgraphs. std::vector positive_subgs(num_workers); std::vector negative_subgs(num_workers); runtime::parallel_for(0, num_workers, [&](size_t b, size_t e) { for (auto i = b; i < e; ++i) { const int64_t start = (batch_curr_id_ + i) * batch_size_; const int64_t end = std::min(start + batch_size_, num_seeds_); const int64_t num_edges = end - start; IdArray worker_seeds; if (replacement_ == false) { worker_seeds = seed_edges_.CreateView( {num_edges}, DGLDataType{kDGLInt, 64, 1}, sizeof(dgl_id_t) * start); } else { std::vector seeds; const dgl_id_t *seed_edge_ids = static_cast(seed_edges_->data); // sampling of each edge is a standalone event for (int64_t i = 0; i < num_edges; ++i) { int64_t seed = static_cast( RandomEngine::ThreadLocal()->RandInt(num_seeds_)); seeds.push_back(seed_edge_ids[seed]); } worker_seeds = aten::VecToIdArray(seeds, seed_edges_->dtype.bits); } EdgeArray arr = gptr_->FindEdges(worker_seeds); const dgl_id_t *src_ids = static_cast(arr.src->data); const dgl_id_t *dst_ids = static_cast(arr.dst->data); std::vector src_vec(src_ids, src_ids + num_edges); std::vector dst_vec(dst_ids, dst_ids + num_edges); // TODO(zhengda) what if there are duplicates in the src and dst // vectors. Subgraph subg = gptr_->EdgeSubgraph(worker_seeds, false); positive_subgs[i] = ConvertRef(subg); // For chunked negative sampling, we accept "chunk-head" for corrupting // head nodes and "chunk-tail" for corrupting tail nodes. if (neg_mode_.substr(0, 5) == "chunk") { NegSubgraph neg_subg = genChunkedNegEdgeSubgraph( subg, neg_mode_.substr(6), neg_sample_size_, exclude_positive_, check_false_neg_); negative_subgs[i] = ConvertRef(neg_subg); } else if (neg_mode_ == "head" || neg_mode_ == "tail") { NegSubgraph neg_subg = genNegEdgeSubgraph( subg, neg_mode_, neg_sample_size_, exclude_positive_, check_false_neg_); negative_subgs[i] = ConvertRef(neg_subg); } } }); if (neg_mode_.size() > 0) { positive_subgs.insert( positive_subgs.end(), negative_subgs.begin(), negative_subgs.end()); } batch_curr_id_ += num_workers; if (batch_curr_id_ >= max_batch_id_ && reset_ == true) { Reset(); } *rv = List(positive_subgs); } void Reset() { batch_curr_id_ = 0; if (replacement_ == false) { // Now we should shuffle the data and reset the sampler. dgl_id_t *seed_ids = static_cast(seed_edges_->data); std::shuffle( seed_ids, seed_ids + seed_edges_->shape[0], std::default_random_engine()); } } DGL_DECLARE_OBJECT_TYPE_INFO(UniformEdgeSamplerObject, Object); private: void randomSample(size_t set_size, size_t num, std::vector *out) { RandomSample(set_size, num, out); } void randomSample( size_t set_size, size_t num, const std::vector &exclude, std::vector *out) { RandomSample(set_size, num, exclude, out); } int64_t batch_curr_id_; int64_t max_batch_id_; int64_t num_seeds_; }; class UniformEdgeSampler : public ObjectRef { public: UniformEdgeSampler() {} explicit UniformEdgeSampler(std::shared_ptr obj) : ObjectRef(obj) {} UniformEdgeSamplerObject *operator->() const { return static_cast(obj_.get()); } std::shared_ptr sptr() const { return CHECK_NOTNULL( std::dynamic_pointer_cast(obj_)); } operator bool() const { return this->defined(); } using ContainerType = UniformEdgeSamplerObject; }; DGL_REGISTER_GLOBAL("sampling._CAPI_CreateUniformEdgeSampler") .set_body([](DGLArgs args, DGLRetValue *rv) { // arguments GraphRef g = args[0]; IdArray seed_edges = args[1]; const int64_t batch_size = args[2]; const int64_t max_num_workers = args[3]; const bool replacement = args[4]; const bool reset = args[5]; const std::string neg_mode = args[6]; const int neg_sample_size = args[7]; const bool exclude_positive = args[8]; const bool check_false_neg = args[9]; IdArray relations = args[10]; const int64_t chunk_size = args[11]; // process args auto gptr = std::dynamic_pointer_cast(g.sptr()); CHECK(gptr) << "sampling isn't implemented in mutable graph"; CHECK(aten::IsValidIdArray(seed_edges)); CHECK_EQ(seed_edges->ctx.device_type, kDGLCPU) << "UniformEdgeSampler only support CPU sampling"; if (relations->shape[0] > 0) { CHECK(aten::IsValidIdArray(relations)); CHECK_EQ(relations->ctx.device_type, kDGLCPU) << "WeightedEdgeSampler only support CPU sampling"; } BuildCoo(*gptr); auto o = std::make_shared( gptr, seed_edges, batch_size, max_num_workers, replacement, reset, neg_mode, neg_sample_size, chunk_size, exclude_positive, check_false_neg, relations); *rv = o; }); DGL_REGISTER_GLOBAL("sampling._CAPI_FetchUniformEdgeSample") .set_body([](DGLArgs args, DGLRetValue *rv) { UniformEdgeSampler sampler = args[0]; sampler->Fetch(rv); }); DGL_REGISTER_GLOBAL("sampling._CAPI_ResetUniformEdgeSample") .set_body([](DGLArgs args, DGLRetValue *rv) { UniformEdgeSampler sampler = args[0]; sampler->Reset(); }); template class WeightedEdgeSamplerObject : public EdgeSamplerObject { public: explicit WeightedEdgeSamplerObject( const GraphPtr gptr, IdArray seed_edges, NDArray edge_weight, NDArray node_weight, const int64_t batch_size, const int64_t num_workers, const bool replacement, const bool reset, const std::string neg_mode, const int64_t neg_sample_size, const int64_t chunk_size, const bool exclude_positive, const bool check_false_neg, IdArray relations) : EdgeSamplerObject( gptr, seed_edges, batch_size, num_workers, replacement, reset, neg_mode, neg_sample_size, chunk_size, exclude_positive, check_false_neg, relations) { const int64_t num_edges = edge_weight->shape[0]; const ValueType *edge_prob = static_cast(edge_weight->data); std::vector eprob(num_edges); for (int64_t i = 0; i < num_edges; ++i) { eprob[i] = edge_prob[i]; } edge_selector_ = std::make_shared>(eprob); edge_weight_ = edge_weight; const size_t num_nodes = node_weight->shape[0]; if (num_nodes == 0) { node_selector_ = nullptr; } else { const ValueType *node_prob = static_cast(node_weight->data); std::vector nprob(num_nodes); for (size_t i = 0; i < num_nodes; ++i) { nprob[i] = node_prob[i]; } node_selector_ = std::make_shared>(nprob); } curr_batch_id_ = 0; // handle int64 overflow here max_batch_id_ = (num_edges + batch_size - 1) / batch_size; // TODO(song): Tricky thing here to make sure gptr_ has coo cache gptr_->FindEdge(0); } ~WeightedEdgeSamplerObject() {} void Fetch(DGLRetValue *rv) { const int64_t num_workers = std::min(num_workers_, max_batch_id_ - curr_batch_id_); // generate subgraphs. std::vector positive_subgs(num_workers); std::vector negative_subgs(num_workers); #pragma omp parallel for for (int i = 0; i < num_workers; i++) { const dgl_id_t *seed_edge_ids = static_cast(seed_edges_->data); std::vector edge_ids(batch_size_); if (replacement_ == false) { size_t n = batch_size_; size_t num_ids = 0; #pragma omp critical { num_ids = edge_selector_->SampleWithoutReplacement(n, &edge_ids); } edge_ids.resize(num_ids); for (size_t i = 0; i < num_ids; ++i) { edge_ids[i] = seed_edge_ids[edge_ids[i]]; } } else { // sampling of each edge is a standalone event for (int i = 0; i < batch_size_; ++i) { size_t edge_id = edge_selector_->Sample(); edge_ids[i] = seed_edge_ids[edge_id]; } } auto worker_seeds = aten::VecToIdArray(edge_ids, seed_edges_->dtype.bits); EdgeArray arr = gptr_->FindEdges(worker_seeds); const dgl_id_t *src_ids = static_cast(arr.src->data); const dgl_id_t *dst_ids = static_cast(arr.dst->data); std::vector src_vec(src_ids, src_ids + batch_size_); std::vector dst_vec(dst_ids, dst_ids + batch_size_); // TODO(zhengda) what if there are duplicates in the src and dst vectors. Subgraph subg = gptr_->EdgeSubgraph(worker_seeds, false); positive_subgs[i] = ConvertRef(subg); // For chunked negative sampling, we accept "chunk-head" for corrupting // head nodes and "chunk-tail" for corrupting tail nodes. if (neg_mode_.substr(0, 5) == "chunk") { NegSubgraph neg_subg = genChunkedNegEdgeSubgraph( subg, neg_mode_.substr(6), neg_sample_size_, exclude_positive_, check_false_neg_); negative_subgs[i] = ConvertRef(neg_subg); } else if (neg_mode_ == "head" || neg_mode_ == "tail") { NegSubgraph neg_subg = genNegEdgeSubgraph( subg, neg_mode_, neg_sample_size_, exclude_positive_, check_false_neg_); negative_subgs[i] = ConvertRef(neg_subg); } } curr_batch_id_ += num_workers; if (curr_batch_id_ >= max_batch_id_ && reset_ == true) { Reset(); } if (neg_mode_.size() > 0) { positive_subgs.insert( positive_subgs.end(), negative_subgs.begin(), negative_subgs.end()); } *rv = List(positive_subgs); } void Reset() { curr_batch_id_ = 0; if (replacement_ == false) { const int64_t num_edges = edge_weight_->shape[0]; const ValueType *edge_prob = static_cast(edge_weight_->data); std::vector eprob(num_edges); for (int64_t i = 0; i < num_edges; ++i) { eprob[i] = edge_prob[i]; } // rebuild the edge_selector_ edge_selector_ = std::make_shared>(eprob); } } DGL_DECLARE_OBJECT_TYPE_INFO(WeightedEdgeSamplerObject, Object); private: void randomSample(size_t set_size, size_t num, std::vector *out) { if (num < set_size) { std::unordered_set sampled_idxs; while (sampled_idxs.size() < num) { if (node_selector_ == nullptr) { sampled_idxs.insert(RandomEngine::ThreadLocal()->RandInt(set_size)); } else { size_t id = node_selector_->Sample(); sampled_idxs.insert(id); } } out->insert(out->end(), sampled_idxs.begin(), sampled_idxs.end()); } else { // If we need to sample all elements in the set, we don't need to // generate random numbers. for (size_t i = 0; i < set_size; i++) out->push_back(i); } } void randomSample( size_t set_size, size_t num, const std::vector &exclude, std::vector *out) { std::unordered_map sampled_idxs; for (auto v : exclude) { sampled_idxs.insert(std::pair(v, 0)); } if (num + exclude.size() < set_size) { while (sampled_idxs.size() < num + exclude.size()) { size_t rand; if (node_selector_ == nullptr) { rand = RandomEngine::ThreadLocal()->RandInt(set_size); } else { rand = node_selector_->Sample(); } sampled_idxs.insert(std::pair(rand, 1)); } for (auto it = sampled_idxs.begin(); it != sampled_idxs.end(); it++) { if (it->second) { out->push_back(it->first); } } } else { // If we need to sample all elements in the set, we don't need to // generate random numbers. for (size_t i = 0; i < set_size; i++) { // If the element doesn't exist in exclude. if (sampled_idxs.find(i) == sampled_idxs.end()) { out->push_back(i); } } } } private: std::shared_ptr> edge_selector_; std::shared_ptr> node_selector_; NDArray edge_weight_; int64_t curr_batch_id_; int64_t max_batch_id_; }; template class WeightedEdgeSamplerObject; class FloatWeightedEdgeSampler : public ObjectRef { public: FloatWeightedEdgeSampler() {} explicit FloatWeightedEdgeSampler(std::shared_ptr obj) : ObjectRef(obj) {} WeightedEdgeSamplerObject *operator->() const { return static_cast *>(obj_.get()); } std::shared_ptr> sptr() const { return CHECK_NOTNULL( std::dynamic_pointer_cast>(obj_)); } operator bool() const { return this->defined(); } using ContainerType = WeightedEdgeSamplerObject; }; DGL_REGISTER_GLOBAL("sampling._CAPI_CreateWeightedEdgeSampler") .set_body([](DGLArgs args, DGLRetValue *rv) { // arguments GraphRef g = args[0]; IdArray seed_edges = args[1]; NDArray edge_weight = args[2]; NDArray node_weight = args[3]; const int64_t batch_size = args[4]; const int64_t max_num_workers = args[5]; const bool replacement = args[6]; const bool reset = args[7]; const std::string neg_mode = args[8]; const int64_t neg_sample_size = args[9]; const bool exclude_positive = args[10]; const bool check_false_neg = args[11]; IdArray relations = args[12]; const int64_t chunk_size = args[13]; auto gptr = std::dynamic_pointer_cast(g.sptr()); CHECK(gptr) << "sampling isn't implemented in mutable graph"; CHECK(aten::IsValidIdArray(seed_edges)); CHECK_EQ(seed_edges->ctx.device_type, kDGLCPU) << "WeightedEdgeSampler only support CPU sampling"; CHECK(edge_weight->dtype.code == kDGLFloat) << "edge_weight should be FloatType"; CHECK(edge_weight->dtype.bits == 32) << "WeightedEdgeSampler only support float weight"; CHECK_EQ(edge_weight->ctx.device_type, kDGLCPU) << "WeightedEdgeSampler only support CPU sampling"; if (node_weight->shape[0] > 0) { CHECK(node_weight->dtype.code == kDGLFloat) << "node_weight should be FloatType"; CHECK(node_weight->dtype.bits == 32) << "WeightedEdgeSampler only support float weight"; CHECK_EQ(node_weight->ctx.device_type, kDGLCPU) << "WeightedEdgeSampler only support CPU sampling"; } if (relations->shape[0] > 0) { CHECK(aten::IsValidIdArray(relations)); CHECK_EQ(relations->ctx.device_type, kDGLCPU) << "WeightedEdgeSampler only support CPU sampling"; } BuildCoo(*gptr); const int64_t num_seeds = seed_edges->shape[0]; const int64_t num_workers = std::min(max_num_workers, (num_seeds + batch_size - 1) / batch_size); auto o = std::make_shared>( gptr, seed_edges, edge_weight, node_weight, batch_size, num_workers, replacement, reset, neg_mode, neg_sample_size, chunk_size, exclude_positive, check_false_neg, relations); *rv = o; }); DGL_REGISTER_GLOBAL("sampling._CAPI_FetchWeightedEdgeSample") .set_body([](DGLArgs args, DGLRetValue *rv) { FloatWeightedEdgeSampler sampler = args[0]; sampler->Fetch(rv); }); DGL_REGISTER_GLOBAL("sampling._CAPI_ResetWeightedEdgeSample") .set_body([](DGLArgs args, DGLRetValue *rv) { FloatWeightedEdgeSampler sampler = args[0]; sampler->Reset(); }); } // namespace dgl ================================================ FILE: src/graph/sampling/negative/global_uniform.cc ================================================ /** * Copyright (c) 2021 by Contributors * @file graph/sampling/negative/global_uniform.cc * @brief Global uniform negative sampling. */ #include #include #include #include #include #include #include "../../../c_api_common.h" using namespace dgl::runtime; using namespace dgl::aten; namespace dgl { namespace sampling { std::pair GlobalUniformNegativeSampling( HeteroGraphPtr hg, dgl_type_t etype, int64_t num_samples, int num_trials, bool exclude_self_loops, bool replace, double redundancy) { auto format = hg->SelectFormat(etype, CSC_CODE | CSR_CODE); if (format == SparseFormat::kCSC) { CSRMatrix csc = hg->GetCSCMatrix(etype); CSRSort_(&csc); std::pair result = CSRGlobalUniformNegativeSampling( csc, num_samples, num_trials, exclude_self_loops, replace, redundancy); // reverse the pair since it is CSC return {result.second, result.first}; } else if (format == SparseFormat::kCSR) { CSRMatrix csr = hg->GetCSRMatrix(etype); CSRSort_(&csr); return CSRGlobalUniformNegativeSampling( csr, num_samples, num_trials, exclude_self_loops, replace, redundancy); } else { LOG(FATAL) << "COO format is not supported in global uniform negative sampling"; return {IdArray(), IdArray()}; } } DGL_REGISTER_GLOBAL("sampling.negative._CAPI_DGLGlobalUniformNegativeSampling") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; dgl_type_t etype = args[1]; CHECK_LE(etype, hg->NumEdgeTypes()) << "invalid edge type " << etype; int64_t num_samples = args[2]; int num_trials = args[3]; bool exclude_self_loops = args[4]; bool replace = args[5]; double redundancy = args[6]; List result; std::pair ret = GlobalUniformNegativeSampling( hg.sptr(), etype, num_samples, num_trials, exclude_self_loops, replace, redundancy); result.push_back(Value(MakeValue(ret.first))); result.push_back(Value(MakeValue(ret.second))); *rv = result; }); }; // namespace sampling }; // namespace dgl ================================================ FILE: src/graph/sampling/neighbor/neighbor.cc ================================================ /** * Copyright (c) 2020-2022 by Contributors * @file graph/sampling/neighbor.cc * @brief Definition of neighborhood-based sampler APIs. */ #include #include #include #include #include #include #include #include #include #include "../../../array/cpu/concurrent_id_hash_map.h" #include "../../../c_api_common.h" #include "../../unit_graph.h" using namespace dgl::runtime; using namespace dgl::aten; namespace dgl { namespace sampling { template void ExcludeCertainEdgesFused( std::vector* sampled_graphs, std::vector* induced_edges, std::vector* sampled_coo_rows, const std::vector& exclude_edges, std::vector* weights = nullptr) { int etypes = (*sampled_graphs).size(); std::vector remain_induced_edges(etypes); std::vector remain_indptrs(etypes); std::vector remain_indices(etypes); std::vector remain_coo_rows(etypes); std::vector remain_weights(etypes); for (int etype = 0; etype < etypes; ++etype) { if (exclude_edges[etype].GetSize() == 0 || (*sampled_graphs)[etype].num_rows == 0) { remain_induced_edges[etype] = (*induced_edges)[etype]; if (weights) remain_weights[etype] = (*weights)[etype]; continue; } const auto dtype = weights && (*weights)[etype]->shape[0] ? (*weights)[etype]->dtype : DGLDataType{kDGLFloat, 8 * sizeof(float), 1}; ATEN_FLOAT_TYPE_SWITCH(dtype, FloatType, "weights", { IdType* indptr = (*sampled_graphs)[etype].indptr.Ptr(); IdType* indices = (*sampled_graphs)[etype].indices.Ptr(); IdType* coo_rows = (*sampled_coo_rows)[etype].Ptr(); IdType* induced_edges_data = (*induced_edges)[etype].Ptr(); FloatType* weights_data = weights && (*weights)[etype]->shape[0] ? (*weights)[etype].Ptr() : nullptr; const IdType exclude_edges_len = exclude_edges[etype]->shape[0]; std::sort( exclude_edges[etype].Ptr(), exclude_edges[etype].Ptr() + exclude_edges_len); const IdType* exclude_edges_data = exclude_edges[etype].Ptr(); IdType outIndices = 0; for (IdType row = 0; row < (*sampled_graphs)[etype].indptr->shape[0] - 1; ++row) { auto tmp_row = indptr[row]; if (outIndices != indptr[row]) indptr[row] = outIndices; for (IdType col = tmp_row; col < indptr[row + 1]; ++col) { if (!std::binary_search( exclude_edges_data, exclude_edges_data + exclude_edges_len, induced_edges_data[col])) { indices[outIndices] = indices[col]; induced_edges_data[outIndices] = induced_edges_data[col]; coo_rows[outIndices] = coo_rows[col]; if (weights_data) weights_data[outIndices] = weights_data[col]; ++outIndices; } } } indptr[(*sampled_graphs)[etype].indptr->shape[0] - 1] = outIndices; remain_induced_edges[etype] = aten::IndexSelect((*induced_edges)[etype], 0, outIndices); remain_weights[etype] = weights_data ? aten::IndexSelect((*weights)[etype], 0, outIndices) : NullArray(); remain_indices[etype] = aten::IndexSelect((*sampled_graphs)[etype].indices, 0, outIndices); (*sampled_coo_rows)[etype] = aten::IndexSelect((*sampled_coo_rows)[etype], 0, outIndices); (*sampled_graphs)[etype] = CSRMatrix( (*sampled_graphs)[etype].num_rows, outIndices, (*sampled_graphs)[etype].indptr, remain_indices[etype], remain_induced_edges[etype]); }); } } std::pair> ExcludeCertainEdges( const HeteroSubgraph& sg, const std::vector& exclude_edges, const std::vector* weights = nullptr) { HeteroGraphPtr hg_view = HeteroGraphRef(sg.graph).sptr(); std::vector remain_induced_edges(hg_view->NumEdgeTypes()); std::vector remain_edges(hg_view->NumEdgeTypes()); std::vector remain_weights(hg_view->NumEdgeTypes()); for (dgl_type_t etype = 0; etype < hg_view->NumEdgeTypes(); ++etype) { IdArray edge_ids = Range( 0, sg.induced_edges[etype]->shape[0], sg.induced_edges[etype]->dtype.bits, sg.induced_edges[etype]->ctx); if (exclude_edges[etype].GetSize() == 0 || edge_ids.GetSize() == 0) { remain_edges[etype] = edge_ids; remain_induced_edges[etype] = sg.induced_edges[etype]; if (weights) remain_weights[etype] = (*weights)[etype]; continue; } ATEN_ID_TYPE_SWITCH(hg_view->DataType(), IdType, { const auto dtype = weights && (*weights)[etype]->shape[0] ? (*weights)[etype]->dtype : DGLDataType{kDGLFloat, 8 * sizeof(float), 1}; ATEN_FLOAT_TYPE_SWITCH(dtype, FloatType, "weights", { IdType* idx_data = edge_ids.Ptr(); IdType* induced_edges_data = sg.induced_edges[etype].Ptr(); FloatType* weights_data = weights && (*weights)[etype]->shape[0] ? (*weights)[etype].Ptr() : nullptr; const IdType exclude_edges_len = exclude_edges[etype]->shape[0]; std::sort( exclude_edges[etype].Ptr(), exclude_edges[etype].Ptr() + exclude_edges_len); const IdType* exclude_edges_data = exclude_edges[etype].Ptr(); IdType outId = 0; for (IdType i = 0; i != sg.induced_edges[etype]->shape[0]; ++i) { // the following binary search is the bottleneck, excluding weights // together with edges should almost be free. if (!std::binary_search( exclude_edges_data, exclude_edges_data + exclude_edges_len, induced_edges_data[i])) { induced_edges_data[outId] = induced_edges_data[i]; idx_data[outId] = idx_data[i]; if (weights_data) weights_data[outId] = weights_data[i]; ++outId; } } remain_edges[etype] = aten::IndexSelect(edge_ids, 0, outId); remain_induced_edges[etype] = aten::IndexSelect(sg.induced_edges[etype], 0, outId); remain_weights[etype] = weights_data ? aten::IndexSelect((*weights)[etype], 0, outId) : NullArray(); }); }); } HeteroSubgraph subg = hg_view->EdgeSubgraph(remain_edges, true); subg.induced_edges = std::move(remain_induced_edges); return std::make_pair(subg, remain_weights); } std::pair> SampleLabors( const HeteroGraphPtr hg, const std::vector& nodes, const std::vector& fanouts, EdgeDir dir, const std::vector& prob, const std::vector& exclude_edges, const int importance_sampling, const IdArray random_seed, const float seed2_contribution, const std::vector& NIDs) { // sanity check CHECK_EQ(nodes.size(), hg->NumVertexTypes()) << "Number of node ID tensors must match the number of node types."; CHECK_EQ(fanouts.size(), hg->NumEdgeTypes()) << "Number of fanout values must match the number of edge types."; DGLContext ctx = aten::GetContextOf(nodes); std::vector subrels(hg->NumEdgeTypes()); std::vector subimportances(hg->NumEdgeTypes()); std::vector induced_edges(hg->NumEdgeTypes()); for (dgl_type_t etype = 0; etype < hg->NumEdgeTypes(); ++etype) { auto pair = hg->meta_graph()->FindEdge(etype); const dgl_type_t src_vtype = pair.first; const dgl_type_t dst_vtype = pair.second; const IdArray nodes_ntype = nodes[(dir == EdgeDir::kOut) ? src_vtype : dst_vtype]; const IdArray NIDs_ntype = NIDs[(dir == EdgeDir::kIn) ? src_vtype : dst_vtype]; const int64_t num_nodes = nodes_ntype->shape[0]; if (num_nodes == 0 || fanouts[etype] == 0) { // Nothing to sample for this etype, create a placeholder relation graph subrels[etype] = UnitGraph::Empty( hg->GetRelationGraph(etype)->NumVertexTypes(), hg->NumVertices(src_vtype), hg->NumVertices(dst_vtype), hg->DataType(), ctx); induced_edges[etype] = aten::NullArray(hg->DataType(), ctx); subimportances[etype] = NullArray(); } else { // sample from one relation graph auto req_fmt = (dir == EdgeDir::kOut) ? CSR_CODE : CSC_CODE; auto avail_fmt = hg->SelectFormat(etype, req_fmt); COOMatrix sampled_coo; FloatArray importances; const int64_t fanout = fanouts[etype] >= 0 ? fanouts[etype] : std::max( hg->NumVertices(dst_vtype), hg->NumVertices(src_vtype)); switch (avail_fmt) { case SparseFormat::kCOO: if (dir == EdgeDir::kIn) { auto fs = aten::COOLaborSampling( aten::COOTranspose(hg->GetCOOMatrix(etype)), nodes_ntype, fanout, prob[etype], importance_sampling, random_seed, seed2_contribution, NIDs_ntype); sampled_coo = aten::COOTranspose(fs.first); importances = fs.second; } else { std::tie(sampled_coo, importances) = aten::COOLaborSampling( hg->GetCOOMatrix(etype), nodes_ntype, fanout, prob[etype], importance_sampling, random_seed, seed2_contribution, NIDs_ntype); } break; case SparseFormat::kCSR: CHECK(dir == EdgeDir::kOut) << "Cannot sample out edges on CSC matrix."; std::tie(sampled_coo, importances) = aten::CSRLaborSampling( hg->GetCSRMatrix(etype), nodes_ntype, fanout, prob[etype], importance_sampling, random_seed, seed2_contribution, NIDs_ntype); break; case SparseFormat::kCSC: CHECK(dir == EdgeDir::kIn) << "Cannot sample in edges on CSR matrix."; std::tie(sampled_coo, importances) = aten::CSRLaborSampling( hg->GetCSCMatrix(etype), nodes_ntype, fanout, prob[etype], importance_sampling, random_seed, seed2_contribution, NIDs_ntype); sampled_coo = aten::COOTranspose(sampled_coo); break; default: LOG(FATAL) << "Unsupported sparse format."; } subrels[etype] = UnitGraph::CreateFromCOO( hg->GetRelationGraph(etype)->NumVertexTypes(), sampled_coo.num_rows, sampled_coo.num_cols, sampled_coo.row, sampled_coo.col); subimportances[etype] = importances; induced_edges[etype] = sampled_coo.data; } } HeteroSubgraph ret; ret.graph = CreateHeteroGraph(hg->meta_graph(), subrels, hg->NumVerticesPerType()); ret.induced_vertices.resize(hg->NumVertexTypes()); ret.induced_edges = std::move(induced_edges); if (!exclude_edges.empty()) return ExcludeCertainEdges(ret, exclude_edges, &subimportances); return std::make_pair(ret, std::move(subimportances)); } HeteroSubgraph SampleNeighbors( const HeteroGraphPtr hg, const std::vector& nodes, const std::vector& fanouts, EdgeDir dir, const std::vector& prob_or_mask, const std::vector& exclude_edges, bool replace) { // sanity check CHECK_EQ(nodes.size(), hg->NumVertexTypes()) << "Number of node ID tensors must match the number of node types."; CHECK_EQ(fanouts.size(), hg->NumEdgeTypes()) << "Number of fanout values must match the number of edge types."; CHECK_EQ(prob_or_mask.size(), hg->NumEdgeTypes()) << "Number of probability tensors must match the number of edge types."; DGLContext ctx = aten::GetContextOf(nodes); std::vector subrels(hg->NumEdgeTypes()); std::vector induced_edges(hg->NumEdgeTypes()); for (dgl_type_t etype = 0; etype < hg->NumEdgeTypes(); ++etype) { auto pair = hg->meta_graph()->FindEdge(etype); const dgl_type_t src_vtype = pair.first; const dgl_type_t dst_vtype = pair.second; const IdArray nodes_ntype = nodes[(dir == EdgeDir::kOut) ? src_vtype : dst_vtype]; const int64_t num_nodes = nodes_ntype->shape[0]; if (num_nodes == 0 || fanouts[etype] == 0) { // Nothing to sample for this etype, create a placeholder relation graph subrels[etype] = UnitGraph::Empty( hg->GetRelationGraph(etype)->NumVertexTypes(), hg->NumVertices(src_vtype), hg->NumVertices(dst_vtype), hg->DataType(), ctx); induced_edges[etype] = aten::NullArray(hg->DataType(), ctx); } else { COOMatrix sampled_coo; // sample from one relation graph auto req_fmt = (dir == EdgeDir::kOut) ? CSR_CODE : CSC_CODE; auto avail_fmt = hg->SelectFormat(etype, req_fmt); switch (avail_fmt) { case SparseFormat::kCOO: if (dir == EdgeDir::kIn) { sampled_coo = aten::COOTranspose(aten::COORowWiseSampling( aten::COOTranspose(hg->GetCOOMatrix(etype)), nodes_ntype, fanouts[etype], prob_or_mask[etype], replace)); } else { sampled_coo = aten::COORowWiseSampling( hg->GetCOOMatrix(etype), nodes_ntype, fanouts[etype], prob_or_mask[etype], replace); } break; case SparseFormat::kCSR: CHECK(dir == EdgeDir::kOut) << "Cannot sample out edges on CSC matrix."; sampled_coo = aten::CSRRowWiseSampling( hg->GetCSRMatrix(etype), nodes_ntype, fanouts[etype], prob_or_mask[etype], replace); break; case SparseFormat::kCSC: CHECK(dir == EdgeDir::kIn) << "Cannot sample in edges on CSR matrix."; sampled_coo = aten::CSRRowWiseSampling( hg->GetCSCMatrix(etype), nodes_ntype, fanouts[etype], prob_or_mask[etype], replace); sampled_coo = aten::COOTranspose(sampled_coo); break; default: LOG(FATAL) << "Unsupported sparse format."; } subrels[etype] = UnitGraph::CreateFromCOO( hg->GetRelationGraph(etype)->NumVertexTypes(), sampled_coo.num_rows, sampled_coo.num_cols, sampled_coo.row, sampled_coo.col); induced_edges[etype] = sampled_coo.data; } } HeteroSubgraph ret; ret.graph = CreateHeteroGraph(hg->meta_graph(), subrels, hg->NumVerticesPerType()); ret.induced_vertices.resize(hg->NumVertexTypes()); ret.induced_edges = std::move(induced_edges); if (!exclude_edges.empty()) { return ExcludeCertainEdges(ret, exclude_edges).first; } return ret; } template std::tuple, std::vector> SampleNeighborsFused( const HeteroGraphPtr hg, const std::vector& nodes, const std::vector& mapping, const std::vector& fanouts, EdgeDir dir, const std::vector& prob_or_mask, const std::vector& exclude_edges, bool replace) { CHECK_EQ(nodes.size(), hg->NumVertexTypes()) << "Number of node ID tensors must match the number of node types."; CHECK_EQ(fanouts.size(), hg->NumEdgeTypes()) << "Number of fanout values must match the number of edge types."; CHECK_EQ(prob_or_mask.size(), hg->NumEdgeTypes()) << "Number of probability tensors must match the number of edge types."; DGLContext ctx = aten::GetContextOf(nodes); std::vector sampled_graphs; std::vector sampled_coo_rows; std::vector induced_edges; std::vector induced_vertices; std::vector num_nodes_per_type; std::vector> new_nodes_vec(hg->NumVertexTypes()); std::vector seed_nodes_mapped(hg->NumVertexTypes(), 0); for (dgl_type_t etype = 0; etype < hg->NumEdgeTypes(); ++etype) { auto pair = hg->meta_graph()->FindEdge(etype); const dgl_type_t src_vtype = pair.first; const dgl_type_t dst_vtype = pair.second; const dgl_type_t rhs_node_type = (dir == EdgeDir::kOut) ? src_vtype : dst_vtype; const IdArray nodes_ntype = nodes[rhs_node_type]; const int64_t num_nodes = nodes_ntype->shape[0]; if (num_nodes == 0 || fanouts[etype] == 0) { // Nothing to sample for this etype, create a placeholder sampled_graphs.push_back(CSRMatrix()); sampled_coo_rows.push_back(IdArray()); induced_edges.push_back(aten::NullArray(hg->DataType(), ctx)); } else { bool map_seed_nodes = !seed_nodes_mapped[rhs_node_type]; // sample from one relation graph std::pair sampled_graph; auto sampling_fn = map_seed_nodes ? aten::CSRRowWiseSamplingFused : aten::CSRRowWiseSamplingFused; auto req_fmt = (dir == EdgeDir::kOut) ? CSR_CODE : CSC_CODE; auto avail_fmt = hg->SelectFormat(etype, req_fmt); switch (avail_fmt) { case SparseFormat::kCSR: CHECK(dir == EdgeDir::kOut) << "Cannot sample out edges on CSC matrix."; // In heterographs nodes of two diffrent types can be connected // therefore two diffrent mappings and node vectors are needed sampled_graph = sampling_fn( hg->GetCSRMatrix(etype), nodes_ntype, mapping[src_vtype], &new_nodes_vec[src_vtype], fanouts[etype], prob_or_mask[etype], replace); break; case SparseFormat::kCSC: CHECK(dir == EdgeDir::kIn) << "Cannot sample in edges on CSR matrix."; sampled_graph = sampling_fn( hg->GetCSCMatrix(etype), nodes_ntype, mapping[dst_vtype], &new_nodes_vec[dst_vtype], fanouts[etype], prob_or_mask[etype], replace); break; default: LOG(FATAL) << "Unsupported sparse format."; } seed_nodes_mapped[rhs_node_type]++; sampled_graphs.push_back(sampled_graph.first); if (sampled_graph.first.data.defined()) induced_edges.push_back(sampled_graph.first.data); else induced_edges.push_back( aten::NullArray(DGLDataType{kDGLInt, sizeof(IdType) * 8, 1}, ctx)); sampled_coo_rows.push_back(sampled_graph.second); } } if (!exclude_edges.empty()) { ExcludeCertainEdgesFused( &sampled_graphs, &induced_edges, &sampled_coo_rows, exclude_edges); for (size_t i = 0; i < hg->NumEdgeTypes(); i++) { if (sampled_graphs[i].data.defined()) induced_edges[i] = std::move(sampled_graphs[i].data); else induced_edges[i] = aten::NullArray(DGLDataType{kDGLInt, sizeof(IdType) * 8, 1}, ctx); } } // map indices for (dgl_type_t etype = 0; etype < hg->NumEdgeTypes(); ++etype) { auto pair = hg->meta_graph()->FindEdge(etype); const dgl_type_t src_vtype = pair.first; const dgl_type_t dst_vtype = pair.second; const dgl_type_t lhs_node_type = (dir == EdgeDir::kIn) ? src_vtype : dst_vtype; if (sampled_graphs[etype].num_cols != 0) { auto num_cols = sampled_graphs[etype].num_cols; int num_threads_col = runtime::compute_num_threads(0, num_cols, 1); std::vector global_prefix_col(num_threads_col + 1, 0); std::vector> src_nodes_local(num_threads_col); IdType* mapping_data_dst = mapping[lhs_node_type].Ptr(); IdType* cdata = sampled_graphs[etype].indices.Ptr(); #pragma omp parallel num_threads(num_threads_col) { const int thread_id = omp_get_thread_num(); num_threads_col = omp_get_num_threads(); const int64_t start_i = thread_id * (num_cols / num_threads_col) + std::min( static_cast(thread_id), num_cols % num_threads_col); const int64_t end_i = (thread_id + 1) * (num_cols / num_threads_col) + std::min( static_cast(thread_id + 1), num_cols % num_threads_col); assert(thread_id + 1 < num_threads_col || end_i == num_cols); for (int64_t i = start_i; i < end_i; ++i) { int64_t picked_idx = cdata[i]; bool spot_claimed = BoolCompareAndSwap(&mapping_data_dst[picked_idx]); if (spot_claimed) src_nodes_local[thread_id].push_back(picked_idx); } global_prefix_col[thread_id + 1] = src_nodes_local[thread_id].size(); #pragma omp barrier #pragma omp master { global_prefix_col[0] = new_nodes_vec[lhs_node_type].size(); for (int t = 0; t < num_threads_col; ++t) { global_prefix_col[t + 1] += global_prefix_col[t]; } } #pragma omp barrier int64_t mapping_shift = global_prefix_col[thread_id]; for (size_t i = 0; i < src_nodes_local[thread_id].size(); ++i) mapping_data_dst[src_nodes_local[thread_id][i]] = mapping_shift + i; #pragma omp barrier for (int64_t i = start_i; i < end_i; ++i) { IdType picked_idx = cdata[i]; IdType mapped_idx = mapping_data_dst[picked_idx]; cdata[i] = mapped_idx; } } IdType offset = new_nodes_vec[lhs_node_type].size(); new_nodes_vec[lhs_node_type].resize(global_prefix_col.back()); for (int thread_id = 0; thread_id < num_threads_col; ++thread_id) { memcpy( new_nodes_vec[lhs_node_type].data() + offset, &src_nodes_local[thread_id][0], src_nodes_local[thread_id].size() * sizeof(IdType)); offset += src_nodes_local[thread_id].size(); } } } // counting how many nodes of each ntype were sampled num_nodes_per_type.resize(2 * hg->NumVertexTypes()); for (size_t i = 0; i < hg->NumVertexTypes(); i++) { num_nodes_per_type[i] = new_nodes_vec[i].size(); num_nodes_per_type[hg->NumVertexTypes() + i] = nodes[i]->shape[0]; induced_vertices.push_back( VecToIdArray(new_nodes_vec[i], sizeof(IdType) * 8)); } std::vector subrels(hg->NumEdgeTypes()); for (dgl_type_t etype = 0; etype < hg->NumEdgeTypes(); ++etype) { auto pair = hg->meta_graph()->FindEdge(etype); const dgl_type_t src_vtype = pair.first; const dgl_type_t dst_vtype = pair.second; if (sampled_graphs[etype].num_rows == 0) { subrels[etype] = UnitGraph::Empty( 2, new_nodes_vec[src_vtype].size(), nodes[dst_vtype]->shape[0], hg->DataType(), ctx); } else { CSRMatrix graph = sampled_graphs[etype]; if (dir == EdgeDir::kOut) { subrels[etype] = UnitGraph::CreateFromCSRAndCOO( 2, CSRMatrix( nodes[src_vtype]->shape[0], new_nodes_vec[dst_vtype].size(), graph.indptr, graph.indices, Range( 0, graph.indices->shape[0], graph.indices->dtype.bits, ctx)), COOMatrix( nodes[src_vtype]->shape[0], new_nodes_vec[dst_vtype].size(), sampled_coo_rows[etype], graph.indices), ALL_CODE); } else { subrels[etype] = UnitGraph::CreateFromCSCAndCOO( 2, CSRMatrix( nodes[dst_vtype]->shape[0], new_nodes_vec[src_vtype].size(), graph.indptr, graph.indices, Range( 0, graph.indices->shape[0], graph.indices->dtype.bits, ctx)), COOMatrix( new_nodes_vec[src_vtype].size(), nodes[dst_vtype]->shape[0], graph.indices, sampled_coo_rows[etype]), ALL_CODE); } } } HeteroSubgraph ret; const auto meta_graph = hg->meta_graph(); const EdgeArray etypes = meta_graph->Edges("eid"); const IdArray new_dst = Add(etypes.dst, hg->NumVertexTypes()); const auto new_meta_graph = ImmutableGraph::CreateFromCOO( hg->NumVertexTypes() * 2, etypes.src, new_dst); HeteroGraphPtr new_graph = CreateHeteroGraph(new_meta_graph, subrels, num_nodes_per_type); return std::make_tuple(new_graph, induced_edges, induced_vertices); } template std::tuple, std::vector> SampleNeighborsFused( const HeteroGraphPtr, const std::vector&, const std::vector&, const std::vector&, EdgeDir, const std::vector&, const std::vector&, bool); template std::tuple, std::vector> SampleNeighborsFused( const HeteroGraphPtr, const std::vector&, const std::vector&, const std::vector&, EdgeDir, const std::vector&, const std::vector&, bool); HeteroSubgraph SampleNeighborsEType( const HeteroGraphPtr hg, const IdArray nodes, const std::vector& eid2etype_offset, const std::vector& fanouts, EdgeDir dir, const std::vector& prob, bool replace, bool rowwise_etype_sorted) { CHECK_EQ(1, hg->NumVertexTypes()) << "SampleNeighborsEType only work with homogeneous graph"; CHECK_EQ(1, hg->NumEdgeTypes()) << "SampleNeighborsEType only work with homogeneous graph"; std::vector subrels(1); std::vector induced_edges(1); const int64_t num_nodes = nodes->shape[0]; dgl_type_t etype = 0; const dgl_type_t src_vtype = 0; const dgl_type_t dst_vtype = 0; bool same_fanout = true; int64_t fanout_value = fanouts[0]; for (auto fanout : fanouts) { if (fanout != fanout_value) { same_fanout = false; break; } } if (num_nodes == 0 || (same_fanout && fanout_value == 0)) { subrels[etype] = UnitGraph::Empty( 1, hg->NumVertices(src_vtype), hg->NumVertices(dst_vtype), hg->DataType(), hg->Context()); induced_edges[etype] = aten::NullArray(); } else { COOMatrix sampled_coo; // sample from graph // the edge type is stored in etypes auto req_fmt = (dir == EdgeDir::kOut) ? CSR_CODE : CSC_CODE; auto avail_fmt = hg->SelectFormat(etype, req_fmt); switch (avail_fmt) { case SparseFormat::kCOO: if (dir == EdgeDir::kIn) { sampled_coo = aten::COOTranspose(aten::COORowWisePerEtypeSampling( aten::COOTranspose(hg->GetCOOMatrix(etype)), nodes, eid2etype_offset, fanouts, prob, replace)); } else { sampled_coo = aten::COORowWisePerEtypeSampling( hg->GetCOOMatrix(etype), nodes, eid2etype_offset, fanouts, prob, replace); } break; case SparseFormat::kCSR: CHECK(dir == EdgeDir::kOut) << "Cannot sample out edges on CSC matrix."; sampled_coo = aten::CSRRowWisePerEtypeSampling( hg->GetCSRMatrix(etype), nodes, eid2etype_offset, fanouts, prob, replace, rowwise_etype_sorted); break; case SparseFormat::kCSC: CHECK(dir == EdgeDir::kIn) << "Cannot sample in edges on CSR matrix."; sampled_coo = aten::CSRRowWisePerEtypeSampling( hg->GetCSCMatrix(etype), nodes, eid2etype_offset, fanouts, prob, replace, rowwise_etype_sorted); sampled_coo = aten::COOTranspose(sampled_coo); break; default: LOG(FATAL) << "Unsupported sparse format."; } subrels[etype] = UnitGraph::CreateFromCOO( 1, sampled_coo.num_rows, sampled_coo.num_cols, sampled_coo.row, sampled_coo.col); induced_edges[etype] = sampled_coo.data; } HeteroSubgraph ret; ret.graph = CreateHeteroGraph(hg->meta_graph(), subrels, hg->NumVerticesPerType()); ret.induced_vertices.resize(hg->NumVertexTypes()); ret.induced_edges = std::move(induced_edges); return ret; } HeteroSubgraph SampleNeighborsTopk( const HeteroGraphPtr hg, const std::vector& nodes, const std::vector& k, EdgeDir dir, const std::vector& weight, bool ascending) { // sanity check CHECK_EQ(nodes.size(), hg->NumVertexTypes()) << "Number of node ID tensors must match the number of node types."; CHECK_EQ(k.size(), hg->NumEdgeTypes()) << "Number of k values must match the number of edge types."; CHECK_EQ(weight.size(), hg->NumEdgeTypes()) << "Number of weight tensors must match the number of edge types."; std::vector subrels(hg->NumEdgeTypes()); std::vector induced_edges(hg->NumEdgeTypes()); for (dgl_type_t etype = 0; etype < hg->NumEdgeTypes(); ++etype) { auto pair = hg->meta_graph()->FindEdge(etype); const dgl_type_t src_vtype = pair.first; const dgl_type_t dst_vtype = pair.second; const IdArray nodes_ntype = nodes[(dir == EdgeDir::kOut) ? src_vtype : dst_vtype]; const int64_t num_nodes = nodes_ntype->shape[0]; if (num_nodes == 0 || k[etype] == 0) { // Nothing to sample for this etype, create a placeholder relation graph subrels[etype] = UnitGraph::Empty( hg->GetRelationGraph(etype)->NumVertexTypes(), hg->NumVertices(src_vtype), hg->NumVertices(dst_vtype), hg->DataType(), hg->Context()); induced_edges[etype] = aten::NullArray(); } else { // sample from one relation graph auto req_fmt = (dir == EdgeDir::kOut) ? CSR_CODE : CSC_CODE; auto avail_fmt = hg->SelectFormat(etype, req_fmt); COOMatrix sampled_coo; switch (avail_fmt) { case SparseFormat::kCOO: if (dir == EdgeDir::kIn) { sampled_coo = aten::COOTranspose(aten::COORowWiseTopk( aten::COOTranspose(hg->GetCOOMatrix(etype)), nodes_ntype, k[etype], weight[etype], ascending)); } else { sampled_coo = aten::COORowWiseTopk( hg->GetCOOMatrix(etype), nodes_ntype, k[etype], weight[etype], ascending); } break; case SparseFormat::kCSR: CHECK(dir == EdgeDir::kOut) << "Cannot sample out edges on CSC matrix."; sampled_coo = aten::CSRRowWiseTopk( hg->GetCSRMatrix(etype), nodes_ntype, k[etype], weight[etype], ascending); break; case SparseFormat::kCSC: CHECK(dir == EdgeDir::kIn) << "Cannot sample in edges on CSR matrix."; sampled_coo = aten::CSRRowWiseTopk( hg->GetCSCMatrix(etype), nodes_ntype, k[etype], weight[etype], ascending); sampled_coo = aten::COOTranspose(sampled_coo); break; default: LOG(FATAL) << "Unsupported sparse format."; } subrels[etype] = UnitGraph::CreateFromCOO( hg->GetRelationGraph(etype)->NumVertexTypes(), sampled_coo.num_rows, sampled_coo.num_cols, sampled_coo.row, sampled_coo.col); induced_edges[etype] = sampled_coo.data; } } HeteroSubgraph ret; ret.graph = CreateHeteroGraph(hg->meta_graph(), subrels, hg->NumVerticesPerType()); ret.induced_vertices.resize(hg->NumVertexTypes()); ret.induced_edges = std::move(induced_edges); return ret; } HeteroSubgraph SampleNeighborsBiased( const HeteroGraphPtr hg, const IdArray& nodes, const int64_t fanout, const NDArray& bias, const NDArray& tag_offset, const EdgeDir dir, const bool replace) { CHECK_EQ(hg->NumEdgeTypes(), 1) << "Only homogeneous or bipartite graphs are supported"; auto pair = hg->meta_graph()->FindEdge(0); const dgl_type_t src_vtype = pair.first; const dgl_type_t dst_vtype = pair.second; const dgl_type_t nodes_ntype = (dir == EdgeDir::kOut) ? src_vtype : dst_vtype; // sanity check CHECK_EQ(tag_offset->ndim, 2) << "The shape of tag_offset should be [num_nodes, num_tags + 1]"; CHECK_EQ(tag_offset->shape[0], hg->NumVertices(nodes_ntype)) << "The shape of tag_offset should be [num_nodes, num_tags + 1]"; CHECK_EQ(tag_offset->shape[1], bias->shape[0] + 1) << "The sizes of tag_offset and bias are inconsistent"; const int64_t num_nodes = nodes->shape[0]; HeteroGraphPtr subrel; IdArray induced_edges; const dgl_type_t etype = 0; if (num_nodes == 0 || fanout == 0) { // Nothing to sample for this etype, create a placeholder relation graph subrel = UnitGraph::Empty( hg->GetRelationGraph(etype)->NumVertexTypes(), hg->NumVertices(src_vtype), hg->NumVertices(dst_vtype), hg->DataType(), hg->Context()); induced_edges = aten::NullArray(); } else { // sample from one relation graph const auto req_fmt = (dir == EdgeDir::kOut) ? CSR_CODE : CSC_CODE; const auto created_fmt = hg->GetCreatedFormats(); COOMatrix sampled_coo; switch (req_fmt) { case CSR_CODE: CHECK(created_fmt & CSR_CODE) << "A sorted CSR Matrix is required."; sampled_coo = aten::CSRRowWiseSamplingBiased( hg->GetCSRMatrix(etype), nodes, fanout, tag_offset, bias, replace); break; case CSC_CODE: CHECK(created_fmt & CSC_CODE) << "A sorted CSC Matrix is required."; sampled_coo = aten::CSRRowWiseSamplingBiased( hg->GetCSCMatrix(etype), nodes, fanout, tag_offset, bias, replace); sampled_coo = aten::COOTranspose(sampled_coo); break; default: LOG(FATAL) << "Unsupported sparse format."; } subrel = UnitGraph::CreateFromCOO( hg->GetRelationGraph(etype)->NumVertexTypes(), sampled_coo.num_rows, sampled_coo.num_cols, sampled_coo.row, sampled_coo.col); induced_edges = sampled_coo.data; } HeteroSubgraph ret; ret.graph = CreateHeteroGraph(hg->meta_graph(), {subrel}, hg->NumVerticesPerType()); ret.induced_vertices.resize(hg->NumVertexTypes()); ret.induced_edges = {induced_edges}; return ret; } DGL_REGISTER_GLOBAL("sampling.neighbor._CAPI_DGLSampleNeighborsEType") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; IdArray nodes = args[1]; const std::vector& eid2etype_offset = ListValueToVector(args[2]); IdArray fanout = args[3]; const std::string dir_str = args[4]; const auto& prob = ListValueToVector(args[5]); const bool replace = args[6]; const bool rowwise_etype_sorted = args[7]; CHECK(dir_str == "in" || dir_str == "out") << "Invalid edge direction. Must be \"in\" or \"out\"."; EdgeDir dir = (dir_str == "in") ? EdgeDir::kIn : EdgeDir::kOut; CHECK_INT64(fanout, "fanout"); std::vector fanout_vec = fanout.ToVector(); std::shared_ptr subg(new HeteroSubgraph); *subg = sampling::SampleNeighborsEType( hg.sptr(), nodes, eid2etype_offset, fanout_vec, dir, prob, replace, rowwise_etype_sorted); *rv = HeteroSubgraphRef(subg); }); DGL_REGISTER_GLOBAL("sampling.labor._CAPI_DGLSampleLabors") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; const auto& nodes = ListValueToVector(args[1]); IdArray fanouts_array = args[2]; const auto& fanouts = fanouts_array.ToVector(); const std::string dir_str = args[3]; const auto& prob = ListValueToVector(args[4]); const auto& exclude_edges = ListValueToVector(args[5]); const int importance_sampling = args[6]; const IdArray random_seed = args[7]; const double seed2_contribution = args[8]; const auto& NIDs = ListValueToVector(args[9]); CHECK(dir_str == "in" || dir_str == "out") << "Invalid edge direction. Must be \"in\" or \"out\"."; EdgeDir dir = (dir_str == "in") ? EdgeDir::kIn : EdgeDir::kOut; std::shared_ptr subg_ptr(new HeteroSubgraph); auto&& subg_importances = sampling::SampleLabors( hg.sptr(), nodes, fanouts, dir, prob, exclude_edges, importance_sampling, random_seed, seed2_contribution, NIDs); *subg_ptr = subg_importances.first; List ret_val; ret_val.push_back(Value(subg_ptr)); for (auto& imp : subg_importances.second) ret_val.push_back(Value(MakeValue(imp))); *rv = ret_val; }); DGL_REGISTER_GLOBAL("sampling.neighbor._CAPI_DGLSampleNeighbors") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; const auto& nodes = ListValueToVector(args[1]); IdArray fanouts_array = args[2]; const auto& fanouts = fanouts_array.ToVector(); const std::string dir_str = args[3]; const auto& prob_or_mask = ListValueToVector(args[4]); const auto& exclude_edges = ListValueToVector(args[5]); const bool replace = args[6]; CHECK(dir_str == "in" || dir_str == "out") << "Invalid edge direction. Must be \"in\" or \"out\"."; EdgeDir dir = (dir_str == "in") ? EdgeDir::kIn : EdgeDir::kOut; std::shared_ptr subg(new HeteroSubgraph); *subg = sampling::SampleNeighbors( hg.sptr(), nodes, fanouts, dir, prob_or_mask, exclude_edges, replace); *rv = HeteroSubgraphRef(subg); }); DGL_REGISTER_GLOBAL("sampling.neighbor._CAPI_DGLSampleNeighborsFused") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; const auto& nodes = ListValueToVector(args[1]); auto mapping = ListValueToVector(args[2]); IdArray fanouts_array = args[3]; const auto& fanouts = fanouts_array.ToVector(); const std::string dir_str = args[4]; const auto& prob_or_mask = ListValueToVector(args[5]); const auto& exclude_edges = ListValueToVector(args[6]); const bool replace = args[7]; CHECK(dir_str == "in" || dir_str == "out") << "Invalid edge direction. Must be \"in\" or \"out\"."; EdgeDir dir = (dir_str == "in") ? EdgeDir::kIn : EdgeDir::kOut; HeteroGraphPtr new_graph; std::vector induced_edges; std::vector induced_vertices; ATEN_ID_TYPE_SWITCH(hg->DataType(), IdType, { std::tie(new_graph, induced_edges, induced_vertices) = SampleNeighborsFused( hg.sptr(), nodes, mapping, fanouts, dir, prob_or_mask, exclude_edges, replace); }); List lhs_nodes_ref; for (IdArray& array : induced_vertices) lhs_nodes_ref.push_back(Value(MakeValue(array))); List induced_edges_ref; for (IdArray& array : induced_edges) induced_edges_ref.push_back(Value(MakeValue(array))); List ret; ret.push_back(HeteroGraphRef(new_graph)); ret.push_back(lhs_nodes_ref); ret.push_back(induced_edges_ref); *rv = ret; }); DGL_REGISTER_GLOBAL("sampling.neighbor._CAPI_DGLSampleNeighborsTopk") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; const auto& nodes = ListValueToVector(args[1]); IdArray k_array = args[2]; const auto& k = k_array.ToVector(); const std::string dir_str = args[3]; const auto& weight = ListValueToVector(args[4]); const bool ascending = args[5]; CHECK(dir_str == "in" || dir_str == "out") << "Invalid edge direction. Must be \"in\" or \"out\"."; EdgeDir dir = (dir_str == "in") ? EdgeDir::kIn : EdgeDir::kOut; std::shared_ptr subg(new HeteroSubgraph); *subg = sampling::SampleNeighborsTopk( hg.sptr(), nodes, k, dir, weight, ascending); *rv = HeteroGraphRef(subg); }); DGL_REGISTER_GLOBAL("sampling.neighbor._CAPI_DGLSampleNeighborsBiased") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; const IdArray nodes = args[1]; const int64_t fanout = args[2]; const NDArray bias = args[3]; const NDArray tag_offset = args[4]; const std::string dir_str = args[5]; const bool replace = args[6]; CHECK(dir_str == "in" || dir_str == "out") << "Invalid edge direction. Must be \"in\" or \"out\"."; EdgeDir dir = (dir_str == "in") ? EdgeDir::kIn : EdgeDir::kOut; std::shared_ptr subg(new HeteroSubgraph); *subg = sampling::SampleNeighborsBiased( hg.sptr(), nodes, fanout, bias, tag_offset, dir, replace); *rv = HeteroGraphRef(subg); }); } // namespace sampling } // namespace dgl ================================================ FILE: src/graph/sampling/randomwalks/frequency_hashmap.cu ================================================ /** * Copyright (c) 2021 by Contributors * @file graph/sampling/frequency_hashmap.cu * @brief frequency hashmap - used to select top-k frequency edges of each node */ #include #include // NOLINT #include #include #include "../../../array/cuda/atomic.cuh" #include "../../../runtime/cuda/cuda_common.h" #include "frequency_hashmap.cuh" namespace dgl { namespace sampling { namespace impl { namespace { int64_t _table_size(const int64_t num, const int64_t scale) { /** * Calculate the number of buckets in the hashtable. To guarantee we can * fill the hashtable in the worst case, we must use a number of buckets which * is a power of two. * https://en.wikipedia.org/wiki/Quadratic_probing#Limitations */ const int64_t next_pow2 = 1 << static_cast(1 + std::log2(num >> 1)); return next_pow2 << scale; } template __global__ void _init_edge_table(void *edge_hashmap, int64_t edges_len) { using EdgeItem = typename DeviceEdgeHashmap::EdgeItem; auto edge_hashmap_t = static_cast(edge_hashmap); int64_t start_idx = (blockIdx.x * TILE_SIZE) + threadIdx.x; int64_t last_idx = start_idx + TILE_SIZE; #pragma unroll(4) for (int64_t idx = start_idx; idx < last_idx; idx += BLOCK_SIZE) { if (idx < edges_len) { EdgeItem *edge = (edge_hashmap_t + idx); edge->src = static_cast(-1); edge->cnt = static_cast(0); } } } template __global__ void _count_frequency( const IdxType *src_data, const int64_t num_edges, const int64_t num_edges_per_node, IdxType *edge_blocks_prefix, bool *is_first_position, DeviceEdgeHashmap device_edge_hashmap) { int64_t start_idx = (blockIdx.x * TILE_SIZE) + threadIdx.x; int64_t last_idx = start_idx + TILE_SIZE; IdxType count = 0; for (int64_t idx = start_idx; idx < last_idx; idx += BLOCK_SIZE) { if (idx < num_edges) { IdxType src = src_data[idx]; if (src == static_cast(-1)) { continue; } IdxType dst_idx = (idx / num_edges_per_node); if (device_edge_hashmap.InsertEdge(src, dst_idx) == 0) { is_first_position[idx] = true; ++count; } } } using BlockReduce = typename cub::BlockReduce; __shared__ typename BlockReduce::TempStorage temp_space; count = BlockReduce(temp_space).Sum(count); if (threadIdx.x == 0) { edge_blocks_prefix[blockIdx.x] = count; if (blockIdx.x == 0) { edge_blocks_prefix[gridDim.x] = 0; } } } /** * This structure is used with cub's block-level prefixscan in order to * keep a running sum as items are iteratively processed. */ template struct BlockPrefixCallbackOp { T _running_total; __device__ BlockPrefixCallbackOp(const T running_total) : _running_total(running_total) {} __device__ T operator()(const T block_aggregate) { const T old_prefix = _running_total; _running_total += block_aggregate; return old_prefix; } }; template __global__ void _compact_frequency( const IdxType *src_data, const IdxType *dst_data, const int64_t num_edges, const int64_t num_edges_per_node, const IdxType *edge_blocks_prefix, const bool *is_first_position, IdxType *num_unique_each_node, IdxType *unique_src_edges, Idx64Type *unique_frequency, DeviceEdgeHashmap device_edge_hashmap) { int64_t start_idx = (blockIdx.x * TILE_SIZE) + threadIdx.x; int64_t last_idx = start_idx + TILE_SIZE; const IdxType block_offset = edge_blocks_prefix[blockIdx.x]; using BlockScan = typename cub::BlockScan; __shared__ typename BlockScan::TempStorage temp_space; BlockPrefixCallbackOp prefix_op(0); for (int64_t idx = start_idx; idx < last_idx; idx += BLOCK_SIZE) { IdxType flag = 0; if (idx < num_edges) { IdxType src = src_data[idx]; IdxType dst_idx = (idx / num_edges_per_node); if (idx % num_edges_per_node == 0) { num_unique_each_node[dst_idx] = device_edge_hashmap.GetDstCount(dst_idx); } if (is_first_position[idx] == true) { flag = 1; } BlockScan(temp_space).ExclusiveSum(flag, flag, prefix_op); __syncthreads(); if (is_first_position[idx] == true) { const IdxType pos = (block_offset + flag); unique_src_edges[pos] = src; if (sizeof(IdxType) != sizeof(Idx64Type) && sizeof(IdxType) == 4) { // if IdxType is a 32-bit data unique_frequency[pos] = ((static_cast(num_edges / num_edges_per_node - dst_idx) << 32) | device_edge_hashmap.GetEdgeCount(src, dst_idx)); } else { unique_frequency[pos] = device_edge_hashmap.GetEdgeCount(src, dst_idx); } } } } } template __global__ void _get_pick_num( IdxType *num_unique_each_node, const int64_t num_pick, const int64_t num_dst_nodes) { int64_t start_idx = (blockIdx.x * TILE_SIZE) + threadIdx.x; int64_t last_idx = start_idx + TILE_SIZE; #pragma unroll(4) for (int64_t idx = start_idx; idx < last_idx; idx += BLOCK_SIZE) { if (idx < num_dst_nodes) { IdxType &num_unique = num_unique_each_node[idx]; num_unique = min(num_unique, static_cast(num_pick)); } } } template __global__ void _pick_data( const Idx64Type *unique_frequency, const IdxType *unique_src_edges, const IdxType *unique_input_offsets, const IdxType *dst_data, const int64_t num_edges_per_node, const int64_t num_dst_nodes, const int64_t num_edges, const IdxType *unique_output_offsets, IdxType *output_src, IdxType *output_dst, IdxType *output_frequency) { int64_t start_idx = (blockIdx.x * TILE_SIZE) + threadIdx.x; int64_t last_idx = start_idx + TILE_SIZE; for (int64_t idx = start_idx; idx < last_idx; idx += BLOCK_SIZE) { if (idx < num_dst_nodes) { const int64_t dst_pos = (idx * num_edges_per_node); assert(dst_pos < num_edges); const IdxType dst = dst_data[dst_pos]; const IdxType last_output_offset = unique_output_offsets[idx + 1]; assert( (last_output_offset - unique_output_offsets[idx]) <= (unique_input_offsets[idx + 1] - unique_input_offsets[idx])); for (IdxType output_idx = unique_output_offsets[idx], input_idx = unique_input_offsets[idx]; output_idx < last_output_offset; ++output_idx, ++input_idx) { output_src[output_idx] = unique_src_edges[input_idx]; output_dst[output_idx] = dst; output_frequency[output_idx] = static_cast(unique_frequency[input_idx]); } } } } } // namespace // return the old cnt of this edge template inline __device__ IdxType DeviceEdgeHashmap::InsertEdge( const IdxType &src, const IdxType &dst_idx) { IdxType start_off = dst_idx * _num_items_each_dst; IdxType pos = EdgeHash(src); IdxType delta = 1; IdxType old_cnt = static_cast(-1); while (true) { IdxType old_src = dgl::aten::cuda::AtomicCAS( &_edge_hashmap[start_off + pos].src, static_cast(-1), src); if (old_src == static_cast(-1) || old_src == src) { // first insert old_cnt = dgl::aten::cuda::AtomicAdd( &_edge_hashmap[start_off + pos].cnt, static_cast(1)); if (old_src == static_cast(-1)) { assert(dst_idx < _num_dst); dgl::aten::cuda::AtomicAdd( &_dst_unique_edges[dst_idx], static_cast(1)); } break; } pos = EdgeHash(pos + delta); delta += 1; } return old_cnt; } template inline __device__ IdxType DeviceEdgeHashmap::GetDstCount(const IdxType &dst_idx) { return _dst_unique_edges[dst_idx]; } template inline __device__ IdxType DeviceEdgeHashmap::GetEdgeCount( const IdxType &src, const IdxType &dst_idx) { IdxType start_off = dst_idx * _num_items_each_dst; IdxType pos = EdgeHash(src); IdxType delta = 1; while (_edge_hashmap[start_off + pos].src != src) { pos = EdgeHash(pos + delta); delta += 1; } return _edge_hashmap[start_off + pos].cnt; } template FrequencyHashmap::FrequencyHashmap( int64_t num_dst, int64_t num_items_each_dst, DGLContext ctx, cudaStream_t stream, int64_t edge_table_scale) { _ctx = ctx; _stream = stream; num_items_each_dst = _table_size(num_items_each_dst, edge_table_scale); auto device = dgl::runtime::DeviceAPI::Get(_ctx); auto dst_unique_edges = static_cast( device->AllocWorkspace(_ctx, (num_dst) * sizeof(IdxType))); auto edge_hashmap = static_cast(device->AllocWorkspace( _ctx, (num_dst * num_items_each_dst) * sizeof(EdgeItem))); constexpr int BLOCK_SIZE = 256; constexpr int TILE_SIZE = BLOCK_SIZE * 8; dim3 block(BLOCK_SIZE); dim3 grid((num_dst * num_items_each_dst + TILE_SIZE - 1) / TILE_SIZE); CUDA_CALL(cudaMemset(dst_unique_edges, 0, (num_dst) * sizeof(IdxType))); CUDA_KERNEL_CALL( (_init_edge_table), grid, block, 0, _stream, edge_hashmap, (num_dst * num_items_each_dst)); _device_edge_hashmap = new DeviceEdgeHashmap( num_dst, num_items_each_dst, dst_unique_edges, edge_hashmap); _dst_unique_edges = dst_unique_edges; _edge_hashmap = edge_hashmap; } template FrequencyHashmap::~FrequencyHashmap() { auto device = dgl::runtime::DeviceAPI::Get(_ctx); delete _device_edge_hashmap; _device_edge_hashmap = nullptr; device->FreeWorkspace(_ctx, _dst_unique_edges); _dst_unique_edges = nullptr; device->FreeWorkspace(_ctx, _edge_hashmap); _edge_hashmap = nullptr; } template std::tuple FrequencyHashmap::Topk( const IdxType *src_data, const IdxType *dst_data, DGLDataType dtype, const int64_t num_edges, const int64_t num_edges_per_node, const int64_t num_pick) { using Idx64Type = int64_t; const int64_t num_dst_nodes = (num_edges / num_edges_per_node); constexpr int BLOCK_SIZE = 256; // XXX: a experienced value, best performance in GV100 constexpr int TILE_SIZE = BLOCK_SIZE * 32; const dim3 block(BLOCK_SIZE); const dim3 edges_grid((num_edges + TILE_SIZE - 1) / TILE_SIZE); auto device = dgl::runtime::DeviceAPI::Get(_ctx); const IdxType num_edge_blocks = static_cast(edges_grid.x); IdxType num_unique_edges = 0; // to mark if this position of edges is the first inserting position for // _edge_hashmap bool *is_first_position = static_cast( device->AllocWorkspace(_ctx, sizeof(bool) * (num_edges))); CUDA_CALL(cudaMemset(is_first_position, 0, sizeof(bool) * (num_edges))); // double space to use ExclusiveSum auto edge_blocks_prefix_data = static_cast(device->AllocWorkspace( _ctx, 2 * sizeof(IdxType) * (num_edge_blocks + 1))); IdxType *edge_blocks_prefix = edge_blocks_prefix_data; IdxType *edge_blocks_prefix_alternate = (edge_blocks_prefix_data + (num_edge_blocks + 1)); // triple space to use ExclusiveSum and unique_output_offsets auto num_unique_each_node_data = static_cast( device->AllocWorkspace(_ctx, 3 * sizeof(IdxType) * (num_dst_nodes + 1))); IdxType *num_unique_each_node = num_unique_each_node_data; IdxType *num_unique_each_node_alternate = (num_unique_each_node_data + (num_dst_nodes + 1)); IdxType *unique_output_offsets = (num_unique_each_node_data + 2 * (num_dst_nodes + 1)); // 1. Scan the all edges and count the unique edges and unique edges for each // dst node CUDA_KERNEL_CALL( (_count_frequency), edges_grid, block, 0, _stream, src_data, num_edges, num_edges_per_node, edge_blocks_prefix, is_first_position, *_device_edge_hashmap); // 2. Compact the unique edges frequency // 2.1 ExclusiveSum the edge_blocks_prefix void *d_temp_storage = nullptr; size_t temp_storage_bytes = 0; CUDA_CALL(cub::DeviceScan::ExclusiveSum( d_temp_storage, temp_storage_bytes, edge_blocks_prefix, edge_blocks_prefix_alternate, num_edge_blocks + 1, _stream)); d_temp_storage = device->AllocWorkspace(_ctx, temp_storage_bytes); CUDA_CALL(cub::DeviceScan::ExclusiveSum( d_temp_storage, temp_storage_bytes, edge_blocks_prefix, edge_blocks_prefix_alternate, num_edge_blocks + 1, _stream)); device->FreeWorkspace(_ctx, d_temp_storage); std::swap(edge_blocks_prefix, edge_blocks_prefix_alternate); device->CopyDataFromTo( &edge_blocks_prefix[num_edge_blocks], 0, &num_unique_edges, 0, sizeof(num_unique_edges), _ctx, DGLContext{kDGLCPU, 0}, dtype); device->StreamSync(_ctx, _stream); // 2.2 Allocate the data of unique edges and frequency // double space to use SegmentedRadixSort auto unique_src_edges_data = static_cast( device->AllocWorkspace(_ctx, 2 * sizeof(IdxType) * (num_unique_edges))); IdxType *unique_src_edges = unique_src_edges_data; IdxType *unique_src_edges_alternate = unique_src_edges_data + num_unique_edges; // double space to use SegmentedRadixSort auto unique_frequency_data = static_cast( device->AllocWorkspace(_ctx, 2 * sizeof(Idx64Type) * (num_unique_edges))); Idx64Type *unique_frequency = unique_frequency_data; Idx64Type *unique_frequency_alternate = unique_frequency_data + num_unique_edges; // 2.3 Compact the unique edges and their frequency CUDA_KERNEL_CALL( (_compact_frequency), edges_grid, block, 0, _stream, src_data, dst_data, num_edges, num_edges_per_node, edge_blocks_prefix, is_first_position, num_unique_each_node, unique_src_edges, unique_frequency, *_device_edge_hashmap); // 3. SegmentedRadixSort the unique edges and unique_frequency // 3.1 ExclusiveSum the num_unique_each_node d_temp_storage = nullptr; temp_storage_bytes = 0; CUDA_CALL(cub::DeviceScan::ExclusiveSum( d_temp_storage, temp_storage_bytes, num_unique_each_node, num_unique_each_node_alternate, num_dst_nodes + 1, _stream)); d_temp_storage = device->AllocWorkspace(_ctx, temp_storage_bytes); CUDA_CALL(cub::DeviceScan::ExclusiveSum( d_temp_storage, temp_storage_bytes, num_unique_each_node, num_unique_each_node_alternate, num_dst_nodes + 1, _stream)); device->FreeWorkspace(_ctx, d_temp_storage); // 3.2 SegmentedRadixSort the unique_src_edges and unique_frequency // Create a set of DoubleBuffers to wrap pairs of device pointers cub::DoubleBuffer d_unique_frequency( unique_frequency, unique_frequency_alternate); cub::DoubleBuffer d_unique_src_edges( unique_src_edges, unique_src_edges_alternate); // Determine temporary device storage requirements d_temp_storage = nullptr; temp_storage_bytes = 0; // the DeviceRadixSort is faster than DeviceSegmentedRadixSort, // especially when num_dst_nodes is large (about ~10000) if (dtype.bits == 32) { CUDA_CALL(cub::DeviceRadixSort::SortPairsDescending( d_temp_storage, temp_storage_bytes, d_unique_frequency, d_unique_src_edges, num_unique_edges, 0, sizeof(Idx64Type) * 8, _stream)); } else { CUDA_CALL(cub::DeviceSegmentedRadixSort::SortPairsDescending( d_temp_storage, temp_storage_bytes, d_unique_frequency, d_unique_src_edges, num_unique_edges, num_dst_nodes, num_unique_each_node_alternate, num_unique_each_node_alternate + 1, 0, sizeof(Idx64Type) * 8, _stream)); } d_temp_storage = device->AllocWorkspace(_ctx, temp_storage_bytes); if (dtype.bits == 32) { CUDA_CALL(cub::DeviceRadixSort::SortPairsDescending( d_temp_storage, temp_storage_bytes, d_unique_frequency, d_unique_src_edges, num_unique_edges, 0, sizeof(Idx64Type) * 8, _stream)); } else { CUDA_CALL(cub::DeviceSegmentedRadixSort::SortPairsDescending( d_temp_storage, temp_storage_bytes, d_unique_frequency, d_unique_src_edges, num_unique_edges, num_dst_nodes, num_unique_each_node_alternate, num_unique_each_node_alternate + 1, 0, sizeof(Idx64Type) * 8, _stream)); } device->FreeWorkspace(_ctx, d_temp_storage); // 4. Get the final pick number for each dst node // 4.1 Reset the min(num_pick, num_unique_each_node) to num_unique_each_node constexpr int NODE_TILE_SIZE = BLOCK_SIZE * 2; const dim3 nodes_grid((num_dst_nodes + NODE_TILE_SIZE - 1) / NODE_TILE_SIZE); CUDA_KERNEL_CALL( (_get_pick_num), nodes_grid, block, 0, _stream, num_unique_each_node, num_pick, num_dst_nodes); // 4.2 ExclusiveSum the new num_unique_each_node as unique_output_offsets // use unique_output_offsets; d_temp_storage = nullptr; temp_storage_bytes = 0; CUDA_CALL(cub::DeviceScan::ExclusiveSum( d_temp_storage, temp_storage_bytes, num_unique_each_node, unique_output_offsets, num_dst_nodes + 1, _stream)); d_temp_storage = device->AllocWorkspace(_ctx, temp_storage_bytes); CUDA_CALL(cub::DeviceScan::ExclusiveSum( d_temp_storage, temp_storage_bytes, num_unique_each_node, unique_output_offsets, num_dst_nodes + 1, _stream)); device->FreeWorkspace(_ctx, d_temp_storage); // 5. Pick the data to result IdxType num_output = 0; device->CopyDataFromTo( &unique_output_offsets[num_dst_nodes], 0, &num_output, 0, sizeof(num_output), _ctx, DGLContext{kDGLCPU, 0}, dtype); device->StreamSync(_ctx, _stream); IdArray res_src = IdArray::Empty({static_cast(num_output)}, dtype, _ctx); IdArray res_dst = IdArray::Empty({static_cast(num_output)}, dtype, _ctx); IdArray res_cnt = IdArray::Empty({static_cast(num_output)}, dtype, _ctx); CUDA_KERNEL_CALL( (_pick_data), nodes_grid, block, 0, _stream, d_unique_frequency.Current(), d_unique_src_edges.Current(), num_unique_each_node_alternate, dst_data, num_edges_per_node, num_dst_nodes, num_edges, unique_output_offsets, res_src.Ptr(), res_dst.Ptr(), res_cnt.Ptr()); device->FreeWorkspace(_ctx, is_first_position); device->FreeWorkspace(_ctx, edge_blocks_prefix_data); device->FreeWorkspace(_ctx, num_unique_each_node_data); device->FreeWorkspace(_ctx, unique_src_edges_data); device->FreeWorkspace(_ctx, unique_frequency_data); return std::make_tuple(res_src, res_dst, res_cnt); } template class FrequencyHashmap; template class FrequencyHashmap; }; // namespace impl }; // namespace sampling }; // namespace dgl ================================================ FILE: src/graph/sampling/randomwalks/frequency_hashmap.cuh ================================================ /** * Copyright (c) 2021 by Contributors * @file graph/sampling/frequency_hashmap.cuh * @brief frequency hashmap - used to select top-k frequency edges of each node */ #ifndef DGL_GRAPH_SAMPLING_RANDOMWALKS_FREQUENCY_HASHMAP_CUH_ #define DGL_GRAPH_SAMPLING_RANDOMWALKS_FREQUENCY_HASHMAP_CUH_ #include #include #include namespace dgl { namespace sampling { namespace impl { template class DeviceEdgeHashmap { public: struct EdgeItem { IdxType src; IdxType cnt; }; DeviceEdgeHashmap() = delete; DeviceEdgeHashmap( int64_t num_dst, int64_t num_items_each_dst, IdxType *dst_unique_edges, EdgeItem *edge_hashmap) : _num_dst(num_dst), _num_items_each_dst(num_items_each_dst), _dst_unique_edges(dst_unique_edges), _edge_hashmap(edge_hashmap) {} // return the old cnt of this edge inline __device__ IdxType InsertEdge(const IdxType &src, const IdxType &dst_idx); inline __device__ IdxType GetDstCount(const IdxType &dst_idx); inline __device__ IdxType GetEdgeCount(const IdxType &src, const IdxType &dst_idx); private: int64_t _num_dst; int64_t _num_items_each_dst; IdxType *_dst_unique_edges; EdgeItem *_edge_hashmap; inline __device__ IdxType EdgeHash(const IdxType &id) const { return id % _num_items_each_dst; } }; template class FrequencyHashmap { public: static constexpr int64_t kDefaultEdgeTableScale = 3; FrequencyHashmap() = delete; FrequencyHashmap( int64_t num_dst, int64_t num_items_each_dst, DGLContext ctx, cudaStream_t stream, int64_t edge_table_scale = kDefaultEdgeTableScale); ~FrequencyHashmap(); using EdgeItem = typename DeviceEdgeHashmap::EdgeItem; std::tuple Topk( const IdxType *src_data, const IdxType *dst_data, DGLDataType dtype, const int64_t num_edges, const int64_t num_edges_per_node, const int64_t num_pick); private: DGLContext _ctx; cudaStream_t _stream; DeviceEdgeHashmap *_device_edge_hashmap; IdxType *_dst_unique_edges; EdgeItem *_edge_hashmap; }; }; // namespace impl }; // namespace sampling }; // namespace dgl #endif // DGL_GRAPH_SAMPLING_RANDOMWALKS_FREQUENCY_HASHMAP_CUH_ ================================================ FILE: src/graph/sampling/randomwalks/get_node_types_cpu.cc ================================================ /** * Copyright (c) 2018 by Contributors * @file graph/sampling/get_node_types_cpu.cc * @brief DGL sampler - CPU implementation of random walks with OpenMP */ #include #include #include #include "randomwalks_impl.h" namespace dgl { using namespace dgl::runtime; using namespace dgl::aten; namespace sampling { namespace impl { template TypeArray GetNodeTypesFromMetapath( const HeteroGraphPtr hg, const TypeArray metapath) { uint64_t num_etypes = metapath->shape[0]; TypeArray result = TypeArray::Empty( {metapath->shape[0] + 1}, metapath->dtype, metapath->ctx); const IdxType *metapath_data = static_cast(metapath->data); IdxType *result_data = static_cast(result->data); dgl_type_t curr_type = hg->GetEndpointTypes(metapath_data[0]).first; result_data[0] = curr_type; for (uint64_t i = 0; i < num_etypes; ++i) { auto src_dst_type = hg->GetEndpointTypes(metapath_data[i]); dgl_type_t srctype = src_dst_type.first; dgl_type_t dsttype = src_dst_type.second; if (srctype != curr_type) { LOG(FATAL) << "source of edge type #" << i << " does not match destination of edge type #" << i - 1; return result; } curr_type = dsttype; result_data[i + 1] = dsttype; } return result; } template TypeArray GetNodeTypesFromMetapath( const HeteroGraphPtr hg, const TypeArray metapath); template TypeArray GetNodeTypesFromMetapath( const HeteroGraphPtr hg, const TypeArray metapath); }; // namespace impl }; // namespace sampling }; // namespace dgl ================================================ FILE: src/graph/sampling/randomwalks/get_node_types_gpu.cu ================================================ /** * Copyright (c) 2021 by Contributors * @file graph/sampling/get_node_types_gpu.cu * @brief DGL sampler */ #include #include #include #include #include #include "randomwalks_impl.h" namespace dgl { using namespace dgl::runtime; using namespace dgl::aten; namespace sampling { namespace impl { template TypeArray GetNodeTypesFromMetapath( const HeteroGraphPtr hg, const TypeArray metapath) { uint64_t num_etypes = metapath->shape[0]; auto cpu_ctx = DGLContext{kDGLCPU, 0}; auto metapath_ctx = metapath->ctx; auto stream = DeviceAPI::Get(metapath_ctx)->GetStream(); TypeArray h_result = TypeArray::Empty({metapath->shape[0] + 1}, metapath->dtype, cpu_ctx); auto h_result_data = h_result.Ptr(); auto h_metapath = metapath.CopyTo(cpu_ctx); DeviceAPI::Get(metapath_ctx)->StreamSync(metapath_ctx, stream); const IdxType *h_metapath_data = h_metapath.Ptr(); dgl_type_t curr_type = hg->GetEndpointTypes(h_metapath_data[0]).first; h_result_data[0] = curr_type; for (uint64_t i = 0; i < num_etypes; ++i) { auto src_dst_type = hg->GetEndpointTypes(h_metapath_data[i]); dgl_type_t srctype = src_dst_type.first; dgl_type_t dsttype = src_dst_type.second; if (srctype != curr_type) { LOG(FATAL) << "source of edge type #" << i << " does not match destination of edge type #" << i - 1; } curr_type = dsttype; h_result_data[i + 1] = dsttype; } auto result = h_result.CopyTo(metapath->ctx); DeviceAPI::Get(metapath_ctx)->StreamSync(metapath_ctx, stream); return result; } template TypeArray GetNodeTypesFromMetapath( const HeteroGraphPtr hg, const TypeArray metapath); template TypeArray GetNodeTypesFromMetapath( const HeteroGraphPtr hg, const TypeArray metapath); }; // namespace impl }; // namespace sampling }; // namespace dgl ================================================ FILE: src/graph/sampling/randomwalks/metapath_randomwalk.h ================================================ /** * Copyright (c) 2018 by Contributors * @file graph/sampler/generic_randomwalk_cpu.h * @brief DGL sampler - templated implementation definition of random walks on * CPU. */ #ifndef DGL_GRAPH_SAMPLING_RANDOMWALKS_METAPATH_RANDOMWALK_H_ #define DGL_GRAPH_SAMPLING_RANDOMWALKS_METAPATH_RANDOMWALK_H_ #include #include #include #include #include #include #include "randomwalks_cpu.h" #include "randomwalks_impl.h" namespace dgl { using namespace dgl::runtime; using namespace dgl::aten; namespace sampling { namespace impl { namespace { template using TerminatePredicate = std::function; /** * @brief Select one successor of metapath-based random walk, given the path * generated so far. * * @param data The path generated so far, of type \c IdxType. * @param curr The last node ID generated. * @param len The number of nodes generated so far. Note that the seed node is * always included as \c data[0], and the successors start from \c data[1]. * * @param edges_by_type Vector of results from \c GetAdj() by edge type. * @param metapath_data Edge types of given metapath. * @param prob Transition probability per edge type. * @param terminate Predicate for terminating the current random walk path. * * @return A tuple of ID of next successor (-1 if not exist), the last traversed * edge ID, as well as whether to terminate. */ template std::tuple MetapathRandomWalkStep( IdxType *data, dgl_id_t curr, int64_t len, const std::vector &edges_by_type, const std::vector &csr_has_data, const IdxType *metapath_data, const std::vector &prob, TerminatePredicate terminate) { dgl_type_t etype = metapath_data[len]; // Note that since the selection of successors is very lightweight (especially // in the uniform case), we want to reduce the overheads (even from object // copies or object construction) as much as possible. Using Successors() // slows down by 2x. Using OutEdges() slows down by 10x. const CSRMatrix &csr = edges_by_type[etype]; const IdxType *offsets = csr.indptr.Ptr(); const IdxType *all_succ = csr.indices.Ptr(); const IdxType *all_eids = csr_has_data[etype] ? csr.data.Ptr() : nullptr; const IdxType *succ = all_succ + offsets[curr]; const IdxType *eids = all_eids ? (all_eids + offsets[curr]) : nullptr; const int64_t size = offsets[curr + 1] - offsets[curr]; if (size == 0) return std::make_tuple(-1, -1, true); // Use a reference to the original array instead of copying. This avoids // updating the ref counts atomically from different threads and avoids cache // ping-ponging in the tight loop. const FloatArray &prob_etype = prob[etype]; IdxType idx = 0; if (IsNullArray(prob_etype)) { // empty probability array; assume uniform idx = RandomEngine::ThreadLocal()->RandInt(size); } else { ATEN_FLOAT_TYPE_SWITCH(prob_etype->dtype, DType, "probability", { FloatArray prob_selected = FloatArray::Empty({size}, prob_etype->dtype, prob_etype->ctx); DType *prob_selected_data = prob_selected.Ptr(); const DType *prob_etype_data = prob_etype.Ptr(); for (int64_t j = 0; j < size; ++j) prob_selected_data[j] = prob_etype_data[eids ? eids[j] : j + offsets[curr]]; idx = RandomEngine::ThreadLocal()->Choice(prob_selected); }); } dgl_id_t eid = eids ? eids[idx] : (idx + offsets[curr]); return std::make_tuple(succ[idx], eid, terminate(data, curr, len)); } /** * @brief Select one successor of metapath-based random walk, given the path * generated so far specifically for the uniform probability distribution. * * @param data The path generated so far, of type \c IdxType. * @param curr The last node ID generated. * @param len The number of nodes generated so far. Note that the seed node is * always included as \c data[0], and the successors start from \c data[1]. * * @param edges_by_type Vector of results from \c GetAdj() by edge type. * @param metapath_data Edge types of given metapath. * @param prob Transition probability per edge type, for this special case this * will be a NullArray. * @param terminate Predicate for terminating the current random walk path. * * @return A pair of ID of next successor (-1 if not exist), as well as whether * to terminate. \note This function is called only if all the probability * arrays are null. */ template std::tuple MetapathRandomWalkStepUniform( IdxType *data, dgl_id_t curr, int64_t len, const std::vector &edges_by_type, const std::vector &csr_has_data, const IdxType *metapath_data, const std::vector &prob, TerminatePredicate terminate) { dgl_type_t etype = metapath_data[len]; // Note that since the selection of successors is very lightweight (especially // in the uniform case), we want to reduce the overheads (even from object // copies or object construction) as much as possible. Using Successors() // slows down by 2x. Using OutEdges() slows down by 10x. const CSRMatrix &csr = edges_by_type[etype]; const IdxType *offsets = csr.indptr.Ptr(); const IdxType *all_succ = csr.indices.Ptr(); const IdxType *all_eids = csr_has_data[etype] ? csr.data.Ptr() : nullptr; const IdxType *succ = all_succ + offsets[curr]; const IdxType *eids = all_eids ? (all_eids + offsets[curr]) : nullptr; const int64_t size = offsets[curr + 1] - offsets[curr]; if (size == 0) return std::make_tuple(-1, -1, true); IdxType idx = 0; // Guaranteed uniform distribution idx = RandomEngine::ThreadLocal()->RandInt(size); dgl_id_t eid = eids ? eids[idx] : (idx + offsets[curr]); return std::make_tuple(succ[idx], eid, terminate(data, curr, len)); } /** * @brief Metapath-based random walk. * @param hg The heterograph. * @param seeds A 1D array of seed nodes, with the type the source type of the * first edge type in the metapath. * @param metapath A 1D array of edge types representing the metapath. * @param prob A vector of 1D float arrays, indicating the transition * probability of each edge by edge type. An empty float array assumes * uniform transition. * @param terminate Predicate for terminating a random walk path. * @return A 2D array of shape (len(seeds), len(metapath) + 1) with node IDs, * and A 2D array of shape (len(seeds), len(metapath)) with edge IDs. */ template std::pair MetapathBasedRandomWalk( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, const std::vector &prob, TerminatePredicate terminate) { int64_t max_num_steps = metapath->shape[0]; const IdxType *metapath_data = static_cast(metapath->data); const int64_t begin_ntype = hg->meta_graph()->FindEdge(metapath_data[0]).first; const int64_t max_nodes = hg->NumVertices(begin_ntype); // Prefetch all edges. // This forces the heterograph to materialize all OutCSR's before the OpenMP // loop; otherwise data races will happen. // TODO(BarclayII): should we later on materialize COO/CSR/CSC anyway unless // told otherwise? int64_t num_etypes = hg->NumEdgeTypes(); std::vector edges_by_type(num_etypes); std::vector csr_has_data(num_etypes); for (int64_t etype = 0; etype < num_etypes; ++etype) { const CSRMatrix &csr = hg->GetCSRMatrix(etype); edges_by_type[etype] = csr; csr_has_data[etype] = CSRHasData(csr); } // Hoist the check for Uniform vs Non uniform edge distribution // to avoid putting it on the hot path bool isUniform = true; for (const auto &etype_prob : prob) { if (!IsNullArray(etype_prob)) { isUniform = false; break; } } if (!isUniform) { StepFunc step = [&edges_by_type, &csr_has_data, metapath_data, &prob, terminate]( IdxType *data, dgl_id_t curr, int64_t len) { return MetapathRandomWalkStep( data, curr, len, edges_by_type, csr_has_data, metapath_data, prob, terminate); }; return GenericRandomWalk( seeds, max_num_steps, step, max_nodes); } else { StepFunc step = [&edges_by_type, &csr_has_data, metapath_data, &prob, terminate]( IdxType *data, dgl_id_t curr, int64_t len) { return MetapathRandomWalkStepUniform( data, curr, len, edges_by_type, csr_has_data, metapath_data, prob, terminate); }; return GenericRandomWalk( seeds, max_num_steps, step, max_nodes); } } }; // namespace }; // namespace impl }; // namespace sampling }; // namespace dgl #endif // DGL_GRAPH_SAMPLING_RANDOMWALKS_METAPATH_RANDOMWALK_H_ ================================================ FILE: src/graph/sampling/randomwalks/node2vec.cc ================================================ /** * Copyright (c) 2021 by Contributors * @file graph/sampling/node2vec.cc * @brief Dispatcher of DGL node2vec random walks */ #include #include #include #include "../../../c_api_common.h" #include "node2vec_impl.h" using namespace dgl::runtime; using namespace dgl::aten; namespace dgl { namespace sampling { namespace { void CheckNode2vecInputs( const HeteroGraphPtr hg, const IdArray seeds, const double p, const double q, const int64_t walk_length, const FloatArray &prob) { CHECK_INT(seeds, "seeds"); CHECK_NDIM(seeds, 1, "seeds"); CHECK_FLOAT(prob, "probability"); CHECK_NDIM(prob, 1, "probability"); } std::pair Node2vec( const HeteroGraphPtr hg, const IdArray seeds, const double p, const double q, const int64_t walk_length, const FloatArray &prob) { CheckNode2vecInputs(hg, seeds, p, q, walk_length, prob); std::pair result; ATEN_XPU_SWITCH(hg->Context().device_type, XPU, "Node2vec", { ATEN_ID_TYPE_SWITCH(seeds->dtype, IdxType, { result = impl::Node2vec(hg, seeds, p, q, walk_length, prob); }); }); return result; } DGL_REGISTER_GLOBAL("sampling.randomwalks._CAPI_DGLSamplingNode2vec") .set_body([](DGLArgs args, DGLRetValue *rv) { HeteroGraphRef hg = args[0]; IdArray seeds = args[1]; double p = args[2]; double q = args[3]; int64_t walk_length = args[4]; FloatArray prob = args[5]; auto result = sampling::Node2vec(hg.sptr(), seeds, p, q, walk_length, prob); List ret; ret.push_back(Value(MakeValue(result.first))); ret.push_back(Value(MakeValue(result.second))); *rv = ret; }); } // namespace } // namespace sampling } // namespace dgl ================================================ FILE: src/graph/sampling/randomwalks/node2vec_cpu.cc ================================================ /** * Copyright (c) 2021 by Contributors * @file graph/sampling/node2vec_cpu.cc * @brief DGL sampler - CPU implementation of node2vec random walk with OpenMP */ #include #include #include #include "node2vec_randomwalk.h" namespace dgl { using namespace dgl::runtime; using namespace dgl::aten; namespace sampling { namespace impl { template std::pair Node2vec( const HeteroGraphPtr hg, const IdArray seeds, const double p, const double q, const int64_t walk_length, const FloatArray &prob) { TerminatePredicate terminate = [](IdxType *data, dgl_id_t curr, int64_t len) { return false; }; return Node2vecRandomWalk( hg, seeds, p, q, walk_length, prob, terminate); } template std::pair Node2vec( const HeteroGraphPtr hg, const IdArray seeds, const double p, const double q, const int64_t walk_length, const FloatArray &prob); template std::pair Node2vec( const HeteroGraphPtr hg, const IdArray seeds, const double p, const double q, const int64_t walk_length, const FloatArray &prob); }; // namespace impl }; // namespace sampling }; // namespace dgl ================================================ FILE: src/graph/sampling/randomwalks/node2vec_impl.h ================================================ /** * Copyright (c) 2021 by Contributors * @file graph/sampling/node2vec_impl.h * @brief DGL sampler - templated implementation definition of node2vec random * walks */ #ifndef DGL_GRAPH_SAMPLING_RANDOMWALKS_NODE2VEC_IMPL_H_ #define DGL_GRAPH_SAMPLING_RANDOMWALKS_NODE2VEC_IMPL_H_ #include #include #include #include #include #include namespace dgl { using namespace dgl::runtime; using namespace dgl::aten; namespace sampling { namespace impl { /** * @brief Node2vec random walk. * @param hg The heterograph. * @param seeds A 1D array of seed nodes, with the type the source type of the * first edge type in the metapath. * @param p Float, indicating likelihood of immediately revisiting a node in the * walk. * @param q Float, control parameter to interpolate between breadth-first * strategy and depth-first strategy. * @param walk_length Int, length of walk. * @param prob A vector of 1D float arrays, indicating the transition * probability of each edge by edge type. An empty float array assumes * uniform transition. * @return A 2D array of shape (len(seeds), len(walk_length) * + 1) with node IDs. The paths that terminated early are padded with -1. */ template std::pair Node2vec( const HeteroGraphPtr hg, const IdArray seeds, const double p, const double q, const int64_t walk_length, const FloatArray &prob); }; // namespace impl }; // namespace sampling }; // namespace dgl #endif // DGL_GRAPH_SAMPLING_RANDOMWALKS_NODE2VEC_IMPL_H_ ================================================ FILE: src/graph/sampling/randomwalks/node2vec_randomwalk.h ================================================ /** * Copyright (c) 2021 by Contributors * @file graph/sampling/node2vec_randomwalk.cc * @brief DGL sampler - CPU implementation of node2vec random walk. */ #ifndef DGL_GRAPH_SAMPLING_RANDOMWALKS_NODE2VEC_RANDOMWALK_H_ #define DGL_GRAPH_SAMPLING_RANDOMWALKS_NODE2VEC_RANDOMWALK_H_ #include #include #include #include #include #include #include #include #include #include "metapath_randomwalk.h" // for TerminatePredicate #include "node2vec_impl.h" #include "randomwalks_cpu.h" namespace dgl { using namespace dgl::runtime; using namespace dgl::aten; namespace sampling { namespace impl { namespace { template bool has_edge_between(const CSRMatrix &csr, dgl_id_t u, dgl_id_t v) { const IdxType *offsets = csr.indptr.Ptr(); const IdxType *all_succ = csr.indices.Ptr(); const IdxType *u_succ = all_succ + offsets[u]; const int64_t size = offsets[u + 1] - offsets[u]; if (csr.sorted) return std::binary_search(u_succ, u_succ + size, v); else return std::find(u_succ, u_succ + size, v) != u_succ + size; } /** * @brief Node2vec random walk step function * @param data The path generated so far, of type \c IdxType. * @param curr The last node ID generated. * @param pre The last last node ID generated * @param p Float, indicating likelihood of immediately revisiting a node in the * walk. * @param q Float, control parameter to interpolate between breadth-first * strategy and depth-first strategy. * @param len The number of nodes generated so far. Note that the seed node is * always included as \c data[0], and the successors start from \c data[1]. * @param csr The CSR matrix * @param prob Transition probability * @param terminate Predicate for terminating the current random walk path. * @return A tuple of ID of next successor (-1 if not exist), the edge ID * traversed, as well as whether to terminate. */ template std::tuple Node2vecRandomWalkStep( IdxType *data, dgl_id_t curr, dgl_id_t pre, const double p, const double q, int64_t len, const CSRMatrix &csr, bool csr_has_data, const FloatArray &probs, TerminatePredicate terminate) { const IdxType *offsets = csr.indptr.Ptr(); const IdxType *all_succ = csr.indices.Ptr(); const IdxType *all_eids = csr_has_data ? csr.data.Ptr() : nullptr; const IdxType *succ = all_succ + offsets[curr]; const IdxType *eids = all_eids ? (all_eids + offsets[curr]) : nullptr; const int64_t size = offsets[curr + 1] - offsets[curr]; // Isolated node if (size == 0) return std::make_tuple(-1, -1, true); IdxType idx = 0; // Normalize the weights to compute rejection probabilities double max_prob = std::max({1 / p, 1.0, 1 / q}); // rejection prob for back to the previous node double prob0 = 1 / p / max_prob; // rejection prob for visiting the node with the distance of 1 between the // previous node double prob1 = 1 / max_prob; // rejection prob for visiting the node with the distance of 2 between the // previous node double prob2 = 1 / q / max_prob; dgl_id_t next_node; double r; // rejection probability. if (IsNullArray(probs)) { if (len == 0) { idx = RandomEngine::ThreadLocal()->RandInt(size); next_node = succ[idx]; } else { while (true) { idx = RandomEngine::ThreadLocal()->RandInt(size); r = RandomEngine::ThreadLocal()->Uniform(0., 1.); next_node = succ[idx]; if (next_node == pre) { if (r < prob0) break; } else if (has_edge_between(csr, next_node, pre)) { if (r < prob1) break; } else if (r < prob2) { break; } } } } else { FloatArray prob_selected; ATEN_FLOAT_TYPE_SWITCH(probs->dtype, DType, "probability", { prob_selected = FloatArray::Empty({size}, probs->dtype, probs->ctx); DType *prob_selected_data = prob_selected.Ptr(); const DType *prob_etype_data = probs.Ptr(); for (int64_t j = 0; j < size; ++j) prob_selected_data[j] = prob_etype_data[eids ? eids[j] : j + offsets[curr]]; }); if (len == 0) { idx = RandomEngine::ThreadLocal()->Choice(prob_selected); next_node = succ[idx]; } else { while (true) { idx = RandomEngine::ThreadLocal()->Choice(prob_selected); r = RandomEngine::ThreadLocal()->Uniform(0., 1.); next_node = succ[idx]; if (next_node == pre) { if (r < prob0) break; } else if (has_edge_between(csr, next_node, pre)) { if (r < prob1) break; } else if (r < prob2) { break; } } } } dgl_id_t eid = eids ? eids[idx] : (idx + offsets[curr]); return std::make_tuple(next_node, eid, terminate(data, next_node, len)); } template std::pair Node2vecRandomWalk( const HeteroGraphPtr g, const IdArray seeds, const double p, const double q, const int64_t max_num_steps, const FloatArray &prob, TerminatePredicate terminate) { const CSRMatrix &edges = g->GetCSRMatrix(0); // homogeneous graph. bool csr_has_data = CSRHasData(edges); StepFunc step = [&edges, csr_has_data, &prob, p, q, terminate]( IdxType *data, dgl_id_t curr, int64_t len) { dgl_id_t pre = (len != 0) ? data[len - 1] : curr; return Node2vecRandomWalkStep( data, curr, pre, p, q, len, edges, csr_has_data, prob, terminate); }; return GenericRandomWalk( seeds, max_num_steps, step, g->NumVertices(0)); } }; // namespace }; // namespace impl }; // namespace sampling }; // namespace dgl #endif // DGL_GRAPH_SAMPLING_RANDOMWALKS_NODE2VEC_RANDOMWALK_H_ ================================================ FILE: src/graph/sampling/randomwalks/randomwalk_cpu.cc ================================================ /** * Copyright (c) 2018 by Contributors * @file graph/sampling/randomwalk_cpu.cc * @brief DGL sampler - CPU implementation of metapath-based random walk with * OpenMP */ #include #include #include #include #include #include #include "metapath_randomwalk.h" #include "randomwalks_cpu.h" #include "randomwalks_impl.h" namespace dgl { using namespace dgl::runtime; using namespace dgl::aten; namespace sampling { namespace impl { template std::pair RandomWalk( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, const std::vector &prob) { TerminatePredicate terminate = [](IdxType *data, dgl_id_t curr, int64_t len) { return false; }; return MetapathBasedRandomWalk( hg, seeds, metapath, prob, terminate); } template std::tuple SelectPinSageNeighbors( const IdArray src, const IdArray dst, const int64_t num_samples_per_node, const int64_t k) { CHECK(src->ctx.device_type == kDGLCPU) << "IdArray needs be on CPU!"; int64_t len = src->shape[0] / num_samples_per_node; IdxType *src_data = src.Ptr(); const IdxType *dst_data = dst.Ptr(); std::vector res_src_vec, res_dst_vec, res_cnt_vec; for (int64_t i = 0; i < len; ++i) { int64_t start_idx = (i * num_samples_per_node); int64_t end_idx = (start_idx + num_samples_per_node); IdxType dst_node = dst_data[start_idx]; std::sort(src_data + start_idx, src_data + end_idx); int64_t cnt = 0; std::vector> vec; for (int64_t j = start_idx; j < end_idx; ++j) { if ((j != start_idx) && (src_data[j] != src_data[j - 1])) { if (src_data[j - 1] != -1) { vec.emplace_back(std::make_pair(cnt, src_data[j - 1])); } cnt = 0; } ++cnt; } // add last count if (src_data[end_idx - 1] != -1) { vec.emplace_back(std::make_pair(cnt, src_data[end_idx - 1])); } std::sort( vec.begin(), vec.end(), std::greater>()); int64_t len = std::min(vec.size(), static_cast(k)); for (int64_t j = 0; j < len; ++j) { auto pair_item = vec[j]; res_src_vec.emplace_back(pair_item.second); res_dst_vec.emplace_back(dst_node); res_cnt_vec.emplace_back(pair_item.first); } } IdArray res_src = IdArray::Empty( {static_cast(res_src_vec.size())}, src->dtype, src->ctx); IdArray res_dst = IdArray::Empty( {static_cast(res_dst_vec.size())}, dst->dtype, dst->ctx); IdArray res_cnt = IdArray::Empty( {static_cast(res_cnt_vec.size())}, src->dtype, src->ctx); // copy data from vector to NDArray auto device = runtime::DeviceAPI::Get(src->ctx); device->CopyDataFromTo( static_cast(res_src_vec.data()), 0, res_src.Ptr(), 0, sizeof(IdxType) * res_src_vec.size(), DGLContext{kDGLCPU, 0}, res_src->ctx, res_src->dtype); device->CopyDataFromTo( static_cast(res_dst_vec.data()), 0, res_dst.Ptr(), 0, sizeof(IdxType) * res_dst_vec.size(), DGLContext{kDGLCPU, 0}, res_dst->ctx, res_dst->dtype); device->CopyDataFromTo( static_cast(res_cnt_vec.data()), 0, res_cnt.Ptr(), 0, sizeof(IdxType) * res_cnt_vec.size(), DGLContext{kDGLCPU, 0}, res_cnt->ctx, res_cnt->dtype); return std::make_tuple(res_src, res_dst, res_cnt); } template std::pair RandomWalk( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, const std::vector &prob); template std::pair RandomWalk( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, const std::vector &prob); template std::tuple SelectPinSageNeighbors( const IdArray src, const IdArray dst, const int64_t num_samples_per_node, const int64_t k); template std::tuple SelectPinSageNeighbors( const IdArray src, const IdArray dst, const int64_t num_samples_per_node, const int64_t k); }; // namespace impl }; // namespace sampling }; // namespace dgl ================================================ FILE: src/graph/sampling/randomwalks/randomwalk_gpu.cu ================================================ /** * Copyright (c) 2021-2022 by Contributors * @file graph/sampling/randomwalk_gpu.cu * @brief CUDA random walk sampleing */ #include #include #include #include #include #include #include #include #include #include "../../../runtime/cuda/cuda_common.h" #include "frequency_hashmap.cuh" namespace dgl { using namespace dgl::runtime; using namespace dgl::aten; namespace sampling { namespace impl { namespace { template struct GraphKernelData { const IdType *in_ptr; const IdType *in_cols; const IdType *data; }; template __global__ void _RandomWalkKernel( const uint64_t rand_seed, const IdType *seed_data, const int64_t num_seeds, const IdType *metapath_data, const uint64_t max_num_steps, const GraphKernelData *graphs, const FloatType *restart_prob_data, const int64_t restart_prob_size, const int64_t max_nodes, IdType *out_traces_data, IdType *out_eids_data) { assert(BLOCK_SIZE == blockDim.x); int64_t idx = blockIdx.x * TILE_SIZE + threadIdx.x; int64_t last_idx = min(static_cast(blockIdx.x + 1) * TILE_SIZE, num_seeds); int64_t trace_length = (max_num_steps + 1); curandState rng; // reference: // https://docs.nvidia.com/cuda/curand/device-api-overview.html#performance-notes curand_init(rand_seed + idx, 0, 0, &rng); while (idx < last_idx) { IdType curr = seed_data[idx]; assert(curr < max_nodes); IdType *traces_data_ptr = &out_traces_data[idx * trace_length]; IdType *eids_data_ptr = &out_eids_data[idx * max_num_steps]; *(traces_data_ptr++) = curr; int64_t step_idx; for (step_idx = 0; step_idx < max_num_steps; ++step_idx) { IdType metapath_id = metapath_data[step_idx]; const GraphKernelData &graph = graphs[metapath_id]; const int64_t in_row_start = graph.in_ptr[curr]; const int64_t deg = graph.in_ptr[curr + 1] - graph.in_ptr[curr]; if (deg == 0) { // the degree is zero break; } const int64_t num = curand(&rng) % deg; IdType pick = graph.in_cols[in_row_start + num]; IdType eid = (graph.data ? graph.data[in_row_start + num] : in_row_start + num); *traces_data_ptr = pick; *eids_data_ptr = eid; if ((restart_prob_size > 1) && (curand_uniform(&rng) < restart_prob_data[step_idx])) { break; } else if ( (restart_prob_size == 1) && (curand_uniform(&rng) < restart_prob_data[0])) { break; } ++traces_data_ptr; ++eids_data_ptr; curr = pick; } for (; step_idx < max_num_steps; ++step_idx) { *(traces_data_ptr++) = -1; *(eids_data_ptr++) = -1; } idx += BLOCK_SIZE; } } template __global__ void _RandomWalkBiasedKernel( const uint64_t rand_seed, const IdType *seed_data, const int64_t num_seeds, const IdType *metapath_data, const uint64_t max_num_steps, const GraphKernelData *graphs, const FloatType **probs, const FloatType **prob_sums, const FloatType *restart_prob_data, const int64_t restart_prob_size, const int64_t max_nodes, IdType *out_traces_data, IdType *out_eids_data) { assert(BLOCK_SIZE == blockDim.x); int64_t idx = blockIdx.x * TILE_SIZE + threadIdx.x; int64_t last_idx = min(static_cast(blockIdx.x + 1) * TILE_SIZE, num_seeds); int64_t trace_length = (max_num_steps + 1); curandState rng; // reference: // https://docs.nvidia.com/cuda/curand/device-api-overview.html#performance-notes curand_init(rand_seed + idx, 0, 0, &rng); while (idx < last_idx) { IdType curr = seed_data[idx]; assert(curr < max_nodes); IdType *traces_data_ptr = &out_traces_data[idx * trace_length]; IdType *eids_data_ptr = &out_eids_data[idx * max_num_steps]; *(traces_data_ptr++) = curr; int64_t step_idx; for (step_idx = 0; step_idx < max_num_steps; ++step_idx) { IdType metapath_id = metapath_data[step_idx]; const GraphKernelData &graph = graphs[metapath_id]; const int64_t in_row_start = graph.in_ptr[curr]; const int64_t deg = graph.in_ptr[curr + 1] - graph.in_ptr[curr]; if (deg == 0) { // the degree is zero break; } // randomly select by weight const FloatType *prob_sum = prob_sums[metapath_id]; const FloatType *prob = probs[metapath_id]; int64_t num; if (prob == nullptr) { num = curand(&rng) % deg; } else { auto rnd_sum_w = prob_sum[curr] * curand_uniform(&rng); FloatType sum_w{0.}; for (num = 0; num < deg; ++num) { sum_w += prob[in_row_start + num]; if (sum_w >= rnd_sum_w) break; } } IdType pick = graph.in_cols[in_row_start + num]; IdType eid = (graph.data ? graph.data[in_row_start + num] : in_row_start + num); *traces_data_ptr = pick; *eids_data_ptr = eid; if ((restart_prob_size > 1) && (curand_uniform(&rng) < restart_prob_data[step_idx])) { break; } else if ( (restart_prob_size == 1) && (curand_uniform(&rng) < restart_prob_data[0])) { break; } ++traces_data_ptr; ++eids_data_ptr; curr = pick; } for (; step_idx < max_num_steps; ++step_idx) { *(traces_data_ptr++) = -1; *(eids_data_ptr++) = -1; } idx += BLOCK_SIZE; } } } // namespace // random walk for uniform choice template std::pair RandomWalkUniform( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, FloatArray restart_prob) { const int64_t max_num_steps = metapath->shape[0]; const IdType *metapath_data = static_cast(metapath->data); const int64_t begin_ntype = hg->meta_graph()->FindEdge(metapath_data[0]).first; const int64_t max_nodes = hg->NumVertices(begin_ntype); int64_t num_etypes = hg->NumEdgeTypes(); auto ctx = seeds->ctx; const IdType *seed_data = static_cast(seeds->data); CHECK(seeds->ndim == 1) << "seeds shape is not one dimension."; const int64_t num_seeds = seeds->shape[0]; int64_t trace_length = max_num_steps + 1; IdArray traces = IdArray::Empty({num_seeds, trace_length}, seeds->dtype, ctx); IdArray eids = IdArray::Empty({num_seeds, max_num_steps}, seeds->dtype, ctx); IdType *traces_data = traces.Ptr(); IdType *eids_data = eids.Ptr(); std::vector> h_graphs(num_etypes); for (int64_t etype = 0; etype < num_etypes; ++etype) { const CSRMatrix &csr = hg->GetCSRMatrix(etype); h_graphs[etype].in_ptr = static_cast(csr.indptr->data); h_graphs[etype].in_cols = static_cast(csr.indices->data); h_graphs[etype].data = (CSRHasData(csr) ? static_cast(csr.data->data) : nullptr); } // use cuda stream from local thread cudaStream_t stream = runtime::getCurrentCUDAStream(); auto device = DeviceAPI::Get(ctx); auto d_graphs = static_cast *>(device->AllocWorkspace( ctx, (num_etypes) * sizeof(GraphKernelData))); // copy graph metadata pointers to GPU device->CopyDataFromTo( h_graphs.data(), 0, d_graphs, 0, (num_etypes) * sizeof(GraphKernelData), DGLContext{kDGLCPU, 0}, ctx, hg->GetCSRMatrix(0).indptr->dtype); // copy metapath to GPU auto d_metapath = metapath.CopyTo(ctx); const IdType *d_metapath_data = static_cast(d_metapath->data); constexpr int BLOCK_SIZE = 256; constexpr int TILE_SIZE = BLOCK_SIZE * 4; dim3 block(256); dim3 grid((num_seeds + TILE_SIZE - 1) / TILE_SIZE); const uint64_t random_seed = RandomEngine::ThreadLocal()->RandInt(1000000000); ATEN_FLOAT_TYPE_SWITCH( restart_prob->dtype, FloatType, "random walk GPU kernel", { CHECK(restart_prob->ctx.device_type == kDGLCUDA) << "restart prob should be in GPU."; CHECK(restart_prob->ndim == 1) << "restart prob dimension should be 1."; const FloatType *restart_prob_data = restart_prob.Ptr(); const int64_t restart_prob_size = restart_prob->shape[0]; CUDA_KERNEL_CALL( (_RandomWalkKernel), grid, block, 0, stream, random_seed, seed_data, num_seeds, d_metapath_data, max_num_steps, d_graphs, restart_prob_data, restart_prob_size, max_nodes, traces_data, eids_data); }); device->FreeWorkspace(ctx, d_graphs); return std::make_pair(traces, eids); } /** * @brief Random walk for biased choice. We use inverse transform sampling to * choose the next step. */ template std::pair RandomWalkBiased( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, const std::vector &prob, FloatArray restart_prob) { const int64_t max_num_steps = metapath->shape[0]; const IdType *metapath_data = static_cast(metapath->data); const int64_t begin_ntype = hg->meta_graph()->FindEdge(metapath_data[0]).first; const int64_t max_nodes = hg->NumVertices(begin_ntype); int64_t num_etypes = hg->NumEdgeTypes(); auto ctx = seeds->ctx; const IdType *seed_data = static_cast(seeds->data); CHECK(seeds->ndim == 1) << "seeds shape is not one dimension."; const int64_t num_seeds = seeds->shape[0]; int64_t trace_length = max_num_steps + 1; IdArray traces = IdArray::Empty({num_seeds, trace_length}, seeds->dtype, ctx); IdArray eids = IdArray::Empty({num_seeds, max_num_steps}, seeds->dtype, ctx); IdType *traces_data = traces.Ptr(); IdType *eids_data = eids.Ptr(); cudaStream_t stream = runtime::getCurrentCUDAStream(); auto device = DeviceAPI::Get(ctx); // new probs and prob sums pointers assert(num_etypes == static_cast(prob.size())); std::unique_ptr probs(new FloatType *[prob.size()]); std::unique_ptr prob_sums(new FloatType *[prob.size()]); std::vector prob_sums_arr; prob_sums_arr.reserve(prob.size()); // graphs std::vector> h_graphs(num_etypes); for (int64_t etype = 0; etype < num_etypes; ++etype) { const CSRMatrix &csr = hg->GetCSRMatrix(etype); h_graphs[etype].in_ptr = static_cast(csr.indptr->data); h_graphs[etype].in_cols = static_cast(csr.indices->data); h_graphs[etype].data = (CSRHasData(csr) ? static_cast(csr.data->data) : nullptr); int64_t num_segments = csr.indptr->shape[0] - 1; // will handle empty probs in the kernel if (IsNullArray(prob[etype])) { probs[etype] = nullptr; prob_sums[etype] = nullptr; continue; } probs[etype] = prob[etype].Ptr(); prob_sums_arr.push_back( FloatArray::Empty({num_segments}, prob[etype]->dtype, ctx)); prob_sums[etype] = prob_sums_arr[etype].Ptr(); // calculate the sum of the neighbor weights const IdType *d_offsets = static_cast(csr.indptr->data); size_t temp_storage_size = 0; CUDA_CALL(cub::DeviceSegmentedReduce::Sum( nullptr, temp_storage_size, probs[etype], prob_sums[etype], num_segments, d_offsets, d_offsets + 1, stream)); void *temp_storage = device->AllocWorkspace(ctx, temp_storage_size); CUDA_CALL(cub::DeviceSegmentedReduce::Sum( temp_storage, temp_storage_size, probs[etype], prob_sums[etype], num_segments, d_offsets, d_offsets + 1, stream)); device->FreeWorkspace(ctx, temp_storage); } // copy graph metadata pointers to GPU auto d_graphs = static_cast *>(device->AllocWorkspace( ctx, (num_etypes) * sizeof(GraphKernelData))); device->CopyDataFromTo( h_graphs.data(), 0, d_graphs, 0, (num_etypes) * sizeof(GraphKernelData), DGLContext{kDGLCPU, 0}, ctx, hg->GetCSRMatrix(0).indptr->dtype); // copy probs pointers to GPU const FloatType **probs_dev = static_cast( device->AllocWorkspace(ctx, num_etypes * sizeof(FloatType *))); device->CopyDataFromTo( probs.get(), 0, probs_dev, 0, (num_etypes) * sizeof(FloatType *), DGLContext{kDGLCPU, 0}, ctx, prob[0]->dtype); // copy probs_sum pointers to GPU const FloatType **prob_sums_dev = static_cast( device->AllocWorkspace(ctx, num_etypes * sizeof(FloatType *))); device->CopyDataFromTo( prob_sums.get(), 0, prob_sums_dev, 0, (num_etypes) * sizeof(FloatType *), DGLContext{kDGLCPU, 0}, ctx, prob[0]->dtype); // copy metapath to GPU auto d_metapath = metapath.CopyTo(ctx); const IdType *d_metapath_data = static_cast(d_metapath->data); constexpr int BLOCK_SIZE = 256; constexpr int TILE_SIZE = BLOCK_SIZE * 4; dim3 block(256); dim3 grid((num_seeds + TILE_SIZE - 1) / TILE_SIZE); const uint64_t random_seed = RandomEngine::ThreadLocal()->RandInt(1000000000); CHECK(restart_prob->ctx.device_type == kDGLCUDA) << "restart prob should be in GPU."; CHECK(restart_prob->ndim == 1) << "restart prob dimension should be 1."; const FloatType *restart_prob_data = restart_prob.Ptr(); const int64_t restart_prob_size = restart_prob->shape[0]; CUDA_KERNEL_CALL( (_RandomWalkBiasedKernel), grid, block, 0, stream, random_seed, seed_data, num_seeds, d_metapath_data, max_num_steps, d_graphs, probs_dev, prob_sums_dev, restart_prob_data, restart_prob_size, max_nodes, traces_data, eids_data); device->FreeWorkspace(ctx, d_graphs); device->FreeWorkspace(ctx, probs_dev); device->FreeWorkspace(ctx, prob_sums_dev); return std::make_pair(traces, eids); } template std::pair RandomWalk( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, const std::vector &prob) { bool isUniform = true; for (const auto &etype_prob : prob) { if (!IsNullArray(etype_prob)) { isUniform = false; break; } } auto restart_prob = NDArray::Empty({0}, DGLDataType{kDGLFloat, 32, 1}, DGLContext{XPU, 0}); if (!isUniform) { std::pair ret; ATEN_FLOAT_TYPE_SWITCH(prob[0]->dtype, FloatType, "probability", { ret = RandomWalkBiased( hg, seeds, metapath, prob, restart_prob); }); return ret; } else { return RandomWalkUniform(hg, seeds, metapath, restart_prob); } } template std::pair RandomWalkWithRestart( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, const std::vector &prob, double restart_prob) { bool isUniform = true; for (const auto &etype_prob : prob) { if (!IsNullArray(etype_prob)) { isUniform = false; break; } } auto device_ctx = seeds->ctx; auto restart_prob_array = NDArray::Empty({1}, DGLDataType{kDGLFloat, 64, 1}, device_ctx); auto device = dgl::runtime::DeviceAPI::Get(device_ctx); // use cuda stream from local thread cudaStream_t stream = runtime::getCurrentCUDAStream(); device->CopyDataFromTo( &restart_prob, 0, restart_prob_array.Ptr(), 0, sizeof(double), DGLContext{kDGLCPU, 0}, device_ctx, restart_prob_array->dtype); device->StreamSync(device_ctx, stream); if (!isUniform) { std::pair ret; ATEN_FLOAT_TYPE_SWITCH(prob[0]->dtype, FloatType, "probability", { ret = RandomWalkBiased( hg, seeds, metapath, prob, restart_prob_array); }); return ret; } else { return RandomWalkUniform( hg, seeds, metapath, restart_prob_array); } } template std::pair RandomWalkWithStepwiseRestart( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, const std::vector &prob, FloatArray restart_prob) { bool isUniform = true; for (const auto &etype_prob : prob) { if (!IsNullArray(etype_prob)) { isUniform = false; break; } } if (!isUniform) { std::pair ret; ATEN_FLOAT_TYPE_SWITCH(prob[0]->dtype, FloatType, "probability", { ret = RandomWalkBiased( hg, seeds, metapath, prob, restart_prob); }); return ret; } else { return RandomWalkUniform(hg, seeds, metapath, restart_prob); } } template std::tuple SelectPinSageNeighbors( const IdArray src, const IdArray dst, const int64_t num_samples_per_node, const int64_t k) { CHECK(src->ctx.device_type == kDGLCUDA) << "IdArray needs be on GPU!"; const IdxType *src_data = src.Ptr(); const IdxType *dst_data = dst.Ptr(); const int64_t num_dst_nodes = (dst->shape[0] / num_samples_per_node); auto ctx = src->ctx; // use cuda stream from local thread cudaStream_t stream = runtime::getCurrentCUDAStream(); auto frequency_hashmap = FrequencyHashmap( num_dst_nodes, num_samples_per_node, ctx, stream); auto ret = frequency_hashmap.Topk( src_data, dst_data, src->dtype, src->shape[0], num_samples_per_node, k); return ret; } template std::pair RandomWalk( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, const std::vector &prob); template std::pair RandomWalk( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, const std::vector &prob); template std::pair RandomWalkWithRestart( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, const std::vector &prob, double restart_prob); template std::pair RandomWalkWithRestart( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, const std::vector &prob, double restart_prob); template std::pair RandomWalkWithStepwiseRestart( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, const std::vector &prob, FloatArray restart_prob); template std::pair RandomWalkWithStepwiseRestart( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, const std::vector &prob, FloatArray restart_prob); template std::tuple SelectPinSageNeighbors( const IdArray src, const IdArray dst, const int64_t num_samples_per_node, const int64_t k); template std::tuple SelectPinSageNeighbors( const IdArray src, const IdArray dst, const int64_t num_samples_per_node, const int64_t k); }; // namespace impl }; // namespace sampling }; // namespace dgl ================================================ FILE: src/graph/sampling/randomwalks/randomwalk_with_restart_cpu.cc ================================================ /** * Copyright (c) 2018 by Contributors * @file graph/sampling/randomwalk_with_restart_cpu.cc * @brief DGL sampler - CPU implementation of metapath-based random walk with * restart with OpenMP */ #include #include #include #include #include #include "metapath_randomwalk.h" #include "randomwalks_cpu.h" #include "randomwalks_impl.h" namespace dgl { using namespace dgl::runtime; using namespace dgl::aten; namespace sampling { namespace impl { template std::pair RandomWalkWithRestart( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, const std::vector &prob, double restart_prob) { TerminatePredicate terminate = [restart_prob](IdxType *data, dgl_id_t curr, int64_t len) { return RandomEngine::ThreadLocal()->Uniform() < restart_prob; }; return MetapathBasedRandomWalk( hg, seeds, metapath, prob, terminate); } template std::pair RandomWalkWithRestart( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, const std::vector &prob, double restart_prob); template std::pair RandomWalkWithRestart( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, const std::vector &prob, double restart_prob); template std::pair RandomWalkWithStepwiseRestart( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, const std::vector &prob, FloatArray restart_prob) { std::pair result; ATEN_FLOAT_TYPE_SWITCH(restart_prob->dtype, DType, "restart probability", { DType *restart_prob_data = static_cast(restart_prob->data); TerminatePredicate terminate = [restart_prob_data](IdxType *data, dgl_id_t curr, int64_t len) { return RandomEngine::ThreadLocal()->Uniform() < restart_prob_data[len]; }; result = MetapathBasedRandomWalk( hg, seeds, metapath, prob, terminate); }); return result; } template std::pair RandomWalkWithStepwiseRestart( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, const std::vector &prob, FloatArray restart_prob); template std::pair RandomWalkWithStepwiseRestart( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, const std::vector &prob, FloatArray restart_prob); }; // namespace impl }; // namespace sampling }; // namespace dgl ================================================ FILE: src/graph/sampling/randomwalks/randomwalks.cc ================================================ /** * Copyright (c) 2018 by Contributors * @file graph/sampling/randomwalks.cc * @brief Dispatcher of different DGL random walks by device type */ #include #include #include #include #include #include #include #include "../../../c_api_common.h" #include "randomwalks_impl.h" using namespace dgl::runtime; using namespace dgl::aten; namespace dgl { namespace sampling { namespace { void CheckRandomWalkInputs( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, const std::vector &prob) { CHECK_INT(seeds, "seeds"); CHECK_INT(metapath, "metapath"); CHECK_NDIM(seeds, 1, "seeds"); CHECK_NDIM(metapath, 1, "metapath"); // (Xin): metapath is copied to GPU in CUDA random walk code // CHECK_SAME_CONTEXT(seeds, metapath); if (hg->IsPinned()) { CHECK_EQ(seeds->ctx.device_type, kDGLCUDA) << "Expected seeds (" << seeds->ctx << ")" << " to be on the GPU when the graph is pinned."; } else if (hg->Context() != seeds->ctx) { LOG(FATAL) << "Expected seeds (" << seeds->ctx << ")" << " to have the same " << "context as graph (" << hg->Context() << ")."; } for (uint64_t i = 0; i < prob.size(); ++i) { FloatArray p = prob[i]; CHECK_EQ(hg->Context(), p->ctx) << "Expected prob (" << p->ctx << ")" << " to have the same " << "context as graph (" << hg->Context() << ")."; CHECK_FLOAT(p, "probability"); if (p.GetSize() != 0) { CHECK_EQ(hg->IsPinned(), p.IsPinned()) << "The prob array should have the same pinning status as the graph"; CHECK_NDIM(p, 1, "probability"); } } } }; // namespace std::tuple RandomWalk( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, const std::vector &prob) { CheckRandomWalkInputs(hg, seeds, metapath, prob); TypeArray vtypes; std::pair result; ATEN_XPU_SWITCH_CUDA(seeds->ctx.device_type, XPU, "RandomWalk", { ATEN_ID_TYPE_SWITCH(seeds->dtype, IdxType, { vtypes = impl::GetNodeTypesFromMetapath(hg, metapath); result = impl::RandomWalk(hg, seeds, metapath, prob); }); }); return std::make_tuple(result.first, result.second, vtypes); } std::tuple RandomWalkWithRestart( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, const std::vector &prob, double restart_prob) { CheckRandomWalkInputs(hg, seeds, metapath, prob); CHECK(restart_prob >= 0 && restart_prob < 1) << "restart probability must belong to [0, 1)"; TypeArray vtypes; std::pair result; ATEN_XPU_SWITCH_CUDA(seeds->ctx.device_type, XPU, "RandomWalkWithRestart", { ATEN_ID_TYPE_SWITCH(seeds->dtype, IdxType, { vtypes = impl::GetNodeTypesFromMetapath(hg, metapath); result = impl::RandomWalkWithRestart( hg, seeds, metapath, prob, restart_prob); }); }); return std::make_tuple(result.first, result.second, vtypes); } std::tuple RandomWalkWithStepwiseRestart( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, const std::vector &prob, FloatArray restart_prob) { CheckRandomWalkInputs(hg, seeds, metapath, prob); // TODO(BarclayII): check the elements of restart probability TypeArray vtypes; std::pair result; ATEN_XPU_SWITCH_CUDA( seeds->ctx.device_type, XPU, "RandomWalkWithStepwiseRestart", { ATEN_ID_TYPE_SWITCH(seeds->dtype, IdxType, { vtypes = impl::GetNodeTypesFromMetapath(hg, metapath); result = impl::RandomWalkWithStepwiseRestart( hg, seeds, metapath, prob, restart_prob); }); }); return std::make_tuple(result.first, result.second, vtypes); } std::tuple SelectPinSageNeighbors( const IdArray src, const IdArray dst, const int64_t num_samples_per_node, const int64_t k) { assert( (src->ndim == 1) && (dst->ndim == 1) && (src->shape[0] % num_samples_per_node == 0) && (src->shape[0] == dst->shape[0])); std::tuple result; ATEN_XPU_SWITCH_CUDA((src->ctx).device_type, XPU, "SelectPinSageNeighbors", { ATEN_ID_TYPE_SWITCH(src->dtype, IdxType, { result = impl::SelectPinSageNeighbors( src, dst, num_samples_per_node, k); }); }); return result; } }; // namespace sampling DGL_REGISTER_GLOBAL("sampling.randomwalks._CAPI_DGLSamplingRandomWalk") .set_body([](DGLArgs args, DGLRetValue *rv) { HeteroGraphRef hg = args[0]; IdArray seeds = args[1]; TypeArray metapath = args[2]; List prob = args[3]; const auto &prob_vec = ListValueToVector(prob); auto result = sampling::RandomWalk(hg.sptr(), seeds, metapath, prob_vec); List ret; ret.push_back(Value(MakeValue(std::get<0>(result)))); ret.push_back(Value(MakeValue(std::get<1>(result)))); ret.push_back(Value(MakeValue(std::get<2>(result)))); *rv = ret; }); DGL_REGISTER_GLOBAL("sampling.pinsage._CAPI_DGLSamplingSelectPinSageNeighbors") .set_body([](DGLArgs args, DGLRetValue *rv) { IdArray src = args[0]; IdArray dst = args[1]; int64_t num_travelsals = static_cast(args[2]); int64_t k = static_cast(args[3]); auto result = sampling::SelectPinSageNeighbors(src, dst, num_travelsals, k); List ret; ret.push_back(Value(MakeValue(std::get<0>(result)))); ret.push_back(Value(MakeValue(std::get<1>(result)))); ret.push_back(Value(MakeValue(std::get<2>(result)))); *rv = ret; }); DGL_REGISTER_GLOBAL( "sampling.randomwalks._CAPI_DGLSamplingRandomWalkWithRestart") .set_body([](DGLArgs args, DGLRetValue *rv) { HeteroGraphRef hg = args[0]; IdArray seeds = args[1]; TypeArray metapath = args[2]; List prob = args[3]; double restart_prob = args[4]; const auto &prob_vec = ListValueToVector(prob); auto result = sampling::RandomWalkWithRestart( hg.sptr(), seeds, metapath, prob_vec, restart_prob); List ret; ret.push_back(Value(MakeValue(std::get<0>(result)))); ret.push_back(Value(MakeValue(std::get<1>(result)))); ret.push_back(Value(MakeValue(std::get<2>(result)))); *rv = ret; }); DGL_REGISTER_GLOBAL( "sampling.randomwalks._CAPI_DGLSamplingRandomWalkWithStepwiseRestart") .set_body([](DGLArgs args, DGLRetValue *rv) { HeteroGraphRef hg = args[0]; IdArray seeds = args[1]; TypeArray metapath = args[2]; List prob = args[3]; FloatArray restart_prob = args[4]; const auto &prob_vec = ListValueToVector(prob); auto result = sampling::RandomWalkWithStepwiseRestart( hg.sptr(), seeds, metapath, prob_vec, restart_prob); List ret; ret.push_back(Value(MakeValue(std::get<0>(result)))); ret.push_back(Value(MakeValue(std::get<1>(result)))); ret.push_back(Value(MakeValue(std::get<2>(result)))); *rv = ret; }); DGL_REGISTER_GLOBAL("sampling.randomwalks._CAPI_DGLSamplingPackTraces") .set_body([](DGLArgs args, DGLRetValue *rv) { IdArray vids = args[0]; TypeArray vtypes = args[1]; IdArray concat_vids, concat_vtypes, lengths, offsets; std::tie(concat_vids, lengths, offsets) = Pack(vids, -1); std::tie(concat_vtypes, std::ignore) = ConcatSlices(vtypes, lengths); List ret; ret.push_back(Value(MakeValue(concat_vids))); ret.push_back(Value(MakeValue(concat_vtypes))); ret.push_back(Value(MakeValue(lengths))); ret.push_back(Value(MakeValue(offsets))); *rv = ret; }); }; // namespace dgl ================================================ FILE: src/graph/sampling/randomwalks/randomwalks_cpu.h ================================================ /** * Copyright (c) 2018 by Contributors * @file graph/sampler/generic_randomwalk_cpu.h * @brief DGL sampler - templated implementation definition of random walks on * CPU */ #ifndef DGL_GRAPH_SAMPLING_RANDOMWALKS_RANDOMWALKS_CPU_H_ #define DGL_GRAPH_SAMPLING_RANDOMWALKS_RANDOMWALKS_CPU_H_ #include #include #include #include #include #include "randomwalks_impl.h" namespace dgl { using namespace dgl::runtime; using namespace dgl::aten; namespace sampling { namespace impl { namespace { /** * @brief Generic Random Walk. * @param seeds A 1D array of seed nodes, with the type the source type of the * first edge type in the metapath. * @param max_num_steps The maximum number of steps of a random walk path. * @param step The random walk step function with type \c StepFunc. * @param max_nodes Throws an error if one of the values in \c seeds exceeds * this argument. * @return A 2D array of shape (len(seeds), max_num_steps + 1) with node IDs. * @note The graph itself should be bounded in the closure of \c step. */ template std::pair GenericRandomWalk( const IdArray seeds, int64_t max_num_steps, StepFunc step, int64_t max_nodes) { int64_t num_seeds = seeds->shape[0]; int64_t trace_length = max_num_steps + 1; IdArray traces = IdArray::Empty({num_seeds, trace_length}, seeds->dtype, seeds->ctx); IdArray eids = IdArray::Empty({num_seeds, max_num_steps}, seeds->dtype, seeds->ctx); const IdxType *seed_data = seeds.Ptr(); IdxType *traces_data = traces.Ptr(); IdxType *eids_data = eids.Ptr(); runtime::parallel_for(0, num_seeds, [&](size_t seed_begin, size_t seed_end) { for (auto seed_id = seed_begin; seed_id < seed_end; seed_id++) { int64_t i; dgl_id_t curr = seed_data[seed_id]; traces_data[seed_id * trace_length] = curr; CHECK_LT(curr, max_nodes) << "Seed node ID exceeds the maximum number of nodes."; for (i = 0; i < max_num_steps; ++i) { const auto &succ = step(traces_data + seed_id * trace_length, curr, i); traces_data[seed_id * trace_length + i + 1] = curr = std::get<0>(succ); eids_data[seed_id * max_num_steps + i] = std::get<1>(succ); if (std::get<2>(succ)) break; } for (; i < max_num_steps; ++i) { traces_data[seed_id * trace_length + i + 1] = -1; eids_data[seed_id * max_num_steps + i] = -1; } } }); return std::make_pair(traces, eids); } }; // namespace }; // namespace impl }; // namespace sampling }; // namespace dgl #endif // DGL_GRAPH_SAMPLING_RANDOMWALKS_RANDOMWALKS_CPU_H_ ================================================ FILE: src/graph/sampling/randomwalks/randomwalks_impl.h ================================================ /** * Copyright (c) 2018 by Contributors * @file graph/sampling/randomwalks_impl.h * @brief DGL sampler - templated implementation definition of random walks */ #ifndef DGL_GRAPH_SAMPLING_RANDOMWALKS_RANDOMWALKS_IMPL_H_ #define DGL_GRAPH_SAMPLING_RANDOMWALKS_RANDOMWALKS_IMPL_H_ #include #include #include #include #include #include namespace dgl { using namespace dgl::runtime; using namespace dgl::aten; namespace sampling { namespace impl { /** * @brief Random walk step function */ template using StepFunc = std::function< // ID Edge ID terminate? std::tuple( IdxType *, // node IDs generated so far dgl_id_t, // last node ID int64_t)>; // # of steps /** * @brief Get the node types traversed by the metapath. * @return A 1D array of shape (len(metapath) + 1,) with node type IDs. */ template TypeArray GetNodeTypesFromMetapath( const HeteroGraphPtr hg, const TypeArray metapath); /** * @brief Metapath-based random walk. * @param hg The heterograph. * @param seeds A 1D array of seed nodes, with the type the source type of the * first edge type in the metapath. * @param metapath A 1D array of edge types * representing the metapath. * @param prob A vector of 1D float arrays, * indicating the transition probability of each edge by edge type. An empty * float array assumes uniform transition. * @return A 2D array of shape * (len(seeds), len(metapath) + 1) with node IDs. The paths that terminated * early are padded with -1. A 2D array of shape (len(seeds), len(metapath)) * with edge IDs. The paths that terminated early are padded with -1. \note * This function should be called together with GetNodeTypesFromMetapath to * determine the node type of each node in the random walk traces. */ template std::pair RandomWalk( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, const std::vector &prob); /** * @brief Metapath-based random walk with restart probability. * @param hg The heterograph. * @param seeds A 1D array of seed nodes, with the type the source type of the * first edge type in the metapath. * @param metapath A 1D array of edge types * representing the metapath. * @param prob A vector of 1D float arrays, * indicating the transition probability of each edge by edge type. An empty * float array assumes uniform transition. * @param restart_prob Restart * probability * @return A 2D array of shape (len(seeds), len(metapath) + 1) with * node IDs. The paths that terminated early are padded with -1. A 2D array of * shape (len(seeds), len(metapath)) with edge IDs. The paths that terminated * early are padded with -1. \note This function should be called together with * GetNodeTypesFromMetapath to determine the node type of each node in the * random walk traces. */ template std::pair RandomWalkWithRestart( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, const std::vector &prob, double restart_prob); /** * @brief Metapath-based random walk with stepwise restart probability. Useful * for PinSAGE-like models. * @param hg The heterograph. * @param seeds A 1D array of seed nodes, with the type the source type of the * first edge type in the metapath. * @param metapath A 1D array of edge types * representing the metapath. * @param prob A vector of 1D float arrays, * indicating the transition probability of each edge by edge type. An empty * float array assumes uniform transition. * @param restart_prob Restart * probability array which has the same number of elements as \c metapath, * indicating the probability to terminate after transition. * @return A 2D array * of shape (len(seeds), len(metapath) + 1) with node IDs. The paths that * terminated early are padded with -1. A 2D array of shape (len(seeds), * len(metapath)) with edge IDs. The paths that terminated early are padded * with -1. \note This function should be called together with * GetNodeTypesFromMetapath to determine the node type of each node in the * random walk traces. */ template std::pair RandomWalkWithStepwiseRestart( const HeteroGraphPtr hg, const IdArray seeds, const TypeArray metapath, const std::vector &prob, FloatArray restart_prob); template std::tuple SelectPinSageNeighbors( const IdArray src, const IdArray dst, const int64_t num_samples_per_node, const int64_t k); }; // namespace impl }; // namespace sampling }; // namespace dgl #endif // DGL_GRAPH_SAMPLING_RANDOMWALKS_RANDOMWALKS_IMPL_H_ ================================================ FILE: src/graph/serialize/dglgraph_data.h ================================================ /** * Copyright (c) 2019 by Contributors * @file graph/serialize/dglgraph_data.h * @brief Graph serialization header */ #ifndef DGL_GRAPH_SERIALIZE_DGLGRAPH_DATA_H_ #define DGL_GRAPH_SERIALIZE_DGLGRAPH_DATA_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "../../c_api_common.h" using dgl::ImmutableGraph; using dgl::runtime::NDArray; using namespace dgl::runtime; namespace dgl { namespace serialize { typedef std::pair NamedTensor; class GraphDataObject : public runtime::Object { public: ImmutableGraphPtr gptr; std::vector node_tensors; std::vector edge_tensors; static constexpr const char *_type_key = "graph_serialize.GraphData"; void SetData( ImmutableGraphPtr gptr, Map node_tensors, Map edge_tensors); void Save(dmlc::Stream *fs) const; bool Load(dmlc::Stream *fs); DGL_DECLARE_OBJECT_TYPE_INFO(GraphDataObject, runtime::Object); }; class GraphData : public runtime::ObjectRef { public: DGL_DEFINE_OBJECT_REF_METHODS(GraphData, runtime::ObjectRef, GraphDataObject); /** @brief create a new GraphData reference */ static GraphData Create() { return GraphData(std::make_shared()); } }; ImmutableGraphPtr ToImmutableGraph(GraphPtr g); } // namespace serialize } // namespace dgl #endif // DGL_GRAPH_SERIALIZE_DGLGRAPH_DATA_H_ ================================================ FILE: src/graph/serialize/dglgraph_serialize.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file graph/serialize/graph_serialize.cc * @brief Graph serialization implementation * * The storage structure is * { * // MetaData Section * uint64_t kDGLSerializeMagic * uint64_t kVersion * uint64_t GraphType * ** Reserved Area till 4kB ** * * dgl_id_t num_graphs * vector graph_indices (start address of each graph) * vector nodes_num_list (list of number of nodes for each graph) * vector edges_num_list (list of number of edges for each graph) * * vector graph_datas; * * } * * Storage of GraphData is * { * // Everything uses in csr * NDArray indptr * NDArray indices * NDArray edge_ids * vector> node_tensors; * vector> edge_tensors; * } * */ #include #include #include #include #include #include #include #include #include #include #include #include #include "graph_serialize.h" using namespace dgl::runtime; using dgl::COO; using dgl::COOPtr; using dgl::ImmutableGraph; using dgl::runtime::NDArray; using dgl::serialize::GraphData; using dgl::serialize::GraphDataObject; using dmlc::SeekStream; using std::vector; namespace dmlc { DMLC_DECLARE_TRAITS(has_saveload, GraphDataObject, true); } namespace dgl { namespace serialize { bool SaveDGLGraphs( std::string filename, List graph_data, std::vector labels_list) { auto fs = std::unique_ptr(dynamic_cast( SeekStream::Create(filename.c_str(), "w", true))); CHECK(fs) << "File name " << filename << " is not a valid local file name"; // Write DGL MetaData const uint64_t kVersion = 1; fs->Write(kDGLSerializeMagic); fs->Write(kVersion); fs->Write(GraphType::kImmutableGraph); fs->Seek(4096); // Write Graph Meta Data dgl_id_t num_graph = graph_data.size(); std::vector graph_indices(num_graph); std::vector nodes_num_list(num_graph); std::vector edges_num_list(num_graph); for (uint64_t i = 0; i < num_graph; ++i) { nodes_num_list[i] = graph_data[i]->gptr->NumVertices(); edges_num_list[i] = graph_data[i]->gptr->NumEdges(); } // Reserve spaces for graph indices fs->Write(num_graph); dgl_id_t indices_start_ptr = fs->Tell(); fs->Write(graph_indices); fs->Write(nodes_num_list); fs->Write(edges_num_list); fs->Write(labels_list); // Write GraphData for (uint64_t i = 0; i < num_graph; ++i) { graph_indices[i] = fs->Tell(); GraphDataObject gdata = *graph_data[i].as(); fs->Write(gdata); } fs->Seek(indices_start_ptr); fs->Write(graph_indices); return true; } StorageMetaData LoadDGLGraphs( const std::string &filename, std::vector idx_list, bool onlyMeta) { auto fs = std::unique_ptr( SeekStream::CreateForRead(filename.c_str(), true)); CHECK(fs) << "Filename is invalid"; // Read DGL MetaData uint64_t magicNum, graphType, version; fs->Read(&magicNum); fs->Read(&version); fs->Read(&graphType); fs->Seek(4096); CHECK_EQ(magicNum, kDGLSerializeMagic) << "Invalid DGL files"; CHECK_EQ(version, 1) << "Invalid DGL files"; StorageMetaData metadata = StorageMetaData::Create(); // Read Graph MetaData dgl_id_t num_graph; CHECK(fs->Read(&num_graph)) << "Invalid num of graph"; std::vector graph_indices; std::vector nodes_num_list; std::vector edges_num_list; std::vector labels_list; CHECK(fs->Read(&graph_indices)) << "Invalid graph indices"; CHECK(fs->Read(&nodes_num_list)) << "Invalid node num list"; CHECK(fs->Read(&edges_num_list)) << "Invalid edge num list"; CHECK(fs->Read(&labels_list)) << "Invalid label list"; metadata->SetMetaData(num_graph, nodes_num_list, edges_num_list, labels_list); std::vector gdata_refs; // Early Return if (onlyMeta) { return metadata; } if (idx_list.empty()) { // Read All Graphs gdata_refs.reserve(num_graph); for (uint64_t i = 0; i < num_graph; ++i) { GraphData gdata = GraphData::Create(); GraphDataObject *gdata_ptr = const_cast(gdata.as()); fs->Read(gdata_ptr); gdata_refs.push_back(gdata); } } else { // Read Selected Graphss gdata_refs.reserve(idx_list.size()); // Would be better if idx_list is sorted. However the returned the graphs // should be the same order as the idx_list for (uint64_t i = 0; i < idx_list.size(); ++i) { auto gid = idx_list[i]; CHECK((gid < graph_indices.size()) && (gid >= 0)) << "ID " << gid << " in idx_list is out of bound. Please check your idx_list."; fs->Seek(graph_indices[gid]); GraphData gdata = GraphData::Create(); GraphDataObject *gdata_ptr = const_cast(gdata.as()); fs->Read(gdata_ptr); gdata_refs.push_back(gdata); } } metadata->SetGraphData(gdata_refs); return metadata; } void GraphDataObject::SetData( ImmutableGraphPtr gptr, Map node_tensors, Map edge_tensors) { this->gptr = gptr; for (auto kv : node_tensors) { std::string name = kv.first; Value v = kv.second; NDArray ndarray = static_cast(v->data); this->node_tensors.emplace_back(name, ndarray); } for (auto kv : edge_tensors) { std::string &name = kv.first; Value v = kv.second; const NDArray &ndarray = static_cast(v->data); this->edge_tensors.emplace_back(name, ndarray); } } void GraphDataObject::Save(dmlc::Stream *fs) const { // Using in csr for storage const CSRPtr g_csr = this->gptr->GetInCSR(); fs->Write(g_csr->indptr()); fs->Write(g_csr->indices()); fs->Write(g_csr->edge_ids()); fs->Write(node_tensors); fs->Write(edge_tensors); } bool GraphDataObject::Load(dmlc::Stream *fs) { NDArray indptr, indices, edge_ids; fs->Read(&indptr); fs->Read(&indices); fs->Read(&edge_ids); this->gptr = ImmutableGraph::CreateFromCSR(indptr, indices, edge_ids, "in"); fs->Read(&this->node_tensors); fs->Read(&this->edge_tensors); return true; } ImmutableGraphPtr BatchLoadedGraphs(std::vector gdata_list) { std::vector gptrs; gptrs.reserve(gdata_list.size()); for (auto gdata : gdata_list) { gptrs.push_back(static_cast(gdata->gptr)); } ImmutableGraphPtr imGPtr = std::dynamic_pointer_cast(GraphOp::DisjointUnion(gptrs)); return imGPtr; } ImmutableGraphPtr ToImmutableGraph(GraphPtr g) { ImmutableGraphPtr imgr = std::dynamic_pointer_cast(g); if (imgr) { return imgr; } else { MutableGraphPtr mgr = std::dynamic_pointer_cast(g); CHECK(mgr) << "Invalid Graph Pointer"; EdgeArray earray = mgr->Edges("eid"); IdArray srcs_array = earray.src; IdArray dsts_array = earray.dst; bool row_sorted, col_sorted; std::tie(row_sorted, col_sorted) = COOIsSorted(aten::COOMatrix( mgr->NumVertices(), mgr->NumVertices(), srcs_array, dsts_array)); ImmutableGraphPtr imgptr = ImmutableGraph::CreateFromCOO( mgr->NumVertices(), srcs_array, dsts_array, row_sorted, col_sorted); return imgptr; } } void StorageMetaDataObject::SetMetaData( dgl_id_t num_graph, std::vector nodes_num_list, std::vector edges_num_list, std::vector labels_list) { this->num_graph = num_graph; this->nodes_num_list = Value(MakeValue(aten::VecToIdArray(nodes_num_list))); this->edges_num_list = Value(MakeValue(aten::VecToIdArray(edges_num_list))); for (auto kv : labels_list) { this->labels_list.Set(kv.first, Value(MakeValue(kv.second))); } } void StorageMetaDataObject::SetGraphData(std::vector gdata) { this->graph_data = List(gdata); } } // namespace serialize } // namespace dgl ================================================ FILE: src/graph/serialize/dglstream.h ================================================ /** * Copyright (c) 2019 by Contributors * @file graph/serialize/dglstream.h * @brief Graph serialization header */ #ifndef DGL_GRAPH_SERIALIZE_DGLSTREAM_H_ #define DGL_GRAPH_SERIALIZE_DGLSTREAM_H_ #include #include #include #include namespace dgl { namespace serialize { /** * @brief DGLStream counts the bytes that already written into the * underlying stream. */ class DGLStream : public dmlc::Stream { public: /** @brief create a new DGLStream instance */ static DGLStream *Create( const char *uri, const char *const flag, bool allow_null, dgl_format_code_t formats) { return new DGLStream(uri, flag, allow_null, formats); } size_t Read(void *ptr, size_t size) override { return strm_->Read(ptr, size); } void Write(const void *ptr, size_t size) override { count_ += size; strm_->Write(ptr, size); } using dmlc::Stream::Read; using dmlc::Stream::Write; bool IsValid() { return strm_.get(); } uint64_t Count() const { return count_; } uint64_t FormatsToSave() const { return formats_to_save_; } private: DGLStream( const char *uri, const char *const flag, bool allow_null, dgl_format_code_t formats) : strm_(dmlc::Stream::Create(uri, flag, allow_null)), formats_to_save_(formats) {} // stream for serialization std::unique_ptr strm_; // size of already written to stream uint64_t count_ = 0; // formats to use when saving graph const dgl_format_code_t formats_to_save_ = ANY_CODE; }; } // namespace serialize } // namespace dgl #endif // DGL_GRAPH_SERIALIZE_DGLSTREAM_H_ ================================================ FILE: src/graph/serialize/graph_serialize.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file graph/serialize/graph_serialize.cc * @brief Graph serialization implementation * * The storage structure is * { * // MetaData Section * uint64_t kDGLSerializeMagic * uint64_t kVersion * uint64_t GraphType * ** Reserved Area till 4kB ** * * dgl_id_t num_graphs * vector graph_indices (start address of each graph) * vector nodes_num_list (list of number of nodes for each graph) * vector edges_num_list (list of number of edges for each graph) * * vector graph_datas; * * } * * Storage of GraphData is * { * // Everything uses in csr * NDArray indptr * NDArray indices * NDArray edge_ids * vector> node_tensors; * vector> edge_tensors; * } * */ #include "graph_serialize.h" #include #include #include #include #include #include #include #include #include #include #include #include using namespace dgl::runtime; using dgl::COO; using dgl::COOPtr; using dgl::ImmutableGraph; using dgl::runtime::NDArray; using dgl::serialize::GraphData; using dgl::serialize::GraphDataObject; using dmlc::SeekStream; using dmlc::Stream; using std::vector; namespace dmlc { DMLC_DECLARE_TRAITS(has_saveload, GraphDataObject, true); } namespace dgl { namespace serialize { DGL_REGISTER_GLOBAL("data.graph_serialize._CAPI_MakeGraphData") .set_body([](DGLArgs args, DGLRetValue *rv) { GraphRef gptr = args[0]; ImmutableGraphPtr imGPtr = ToImmutableGraph(gptr.sptr()); Map node_tensors = args[1]; Map edge_tensors = args[2]; GraphData gd = GraphData::Create(); gd->SetData(imGPtr, node_tensors, edge_tensors); *rv = gd; }); DGL_REGISTER_GLOBAL("data.graph_serialize._CAPI_SaveDGLGraphs_V0") .set_body([](DGLArgs args, DGLRetValue *rv) { std::string filename = args[0]; List graph_data = args[1]; Map labels = args[2]; std::vector labels_list; for (auto kv : labels) { std::string name = kv.first; Value v = kv.second; NDArray ndarray = static_cast(v->data); labels_list.emplace_back(name, ndarray); } SaveDGLGraphs(filename, graph_data, labels_list); }); DGL_REGISTER_GLOBAL("data.graph_serialize._CAPI_GDataGraphHandle") .set_body([](DGLArgs args, DGLRetValue *rv) { GraphData gdata = args[0]; *rv = gdata->gptr; }); DGL_REGISTER_GLOBAL("data.graph_serialize._CAPI_GDataNodeTensors") .set_body([](DGLArgs args, DGLRetValue *rv) { GraphData gdata = args[0]; Map rvmap; for (auto kv : gdata->node_tensors) { rvmap.Set(kv.first, Value(MakeValue(kv.second))); } *rv = rvmap; }); DGL_REGISTER_GLOBAL("data.graph_serialize._CAPI_GDataEdgeTensors") .set_body([](DGLArgs args, DGLRetValue *rv) { GraphData gdata = args[0]; Map rvmap; for (auto kv : gdata->edge_tensors) { rvmap.Set(kv.first, Value(MakeValue(kv.second))); } *rv = rvmap; }); uint64_t GetFileVersion(const std::string &filename) { auto fs = std::unique_ptr( SeekStream::CreateForRead(filename.c_str(), false)); CHECK(fs) << "File " << filename << " not found"; uint64_t magicNum, version; fs->Read(&magicNum); fs->Read(&version); CHECK_EQ(magicNum, kDGLSerializeMagic) << "Invalid DGL files"; return version; } DGL_REGISTER_GLOBAL("data.graph_serialize._CAPI_GetFileVersion") .set_body([](DGLArgs args, DGLRetValue *rv) { std::string filename = args[0]; *rv = static_cast(GetFileVersion(filename)); }); DGL_REGISTER_GLOBAL("data.graph_serialize._CAPI_LoadGraphFiles_V1") .set_body([](DGLArgs args, DGLRetValue *rv) { std::string filename = args[0]; List idxs = args[1]; bool onlyMeta = args[2]; auto idx_list = ListValueToVector(idxs); *rv = LoadDGLGraphs(filename, idx_list, onlyMeta); }); DGL_REGISTER_GLOBAL("data.graph_serialize._CAPI_DGLAsHeteroGraph") .set_body([](DGLArgs args, DGLRetValue *rv) { GraphRef g = args[0]; ImmutableGraphPtr ig = std::dynamic_pointer_cast(g.sptr()); CHECK(ig) << "graph is not readonly"; *rv = HeteroGraphRef(ig->AsHeteroGraph()); }); DGL_REGISTER_GLOBAL("data.graph_serialize._CAPI_LoadGraphFiles_V2") .set_body([](DGLArgs args, DGLRetValue *rv) { std::string filename = args[0]; List idxs = args[1]; auto idx_list = ListValueToVector(idxs); *rv = List(LoadHeteroGraphs(filename, idx_list)); }); } // namespace serialize } // namespace dgl ================================================ FILE: src/graph/serialize/graph_serialize.h ================================================ /** * Copyright (c) 2019 by Contributors * @file graph/serialize/graph_serialize.h * @brief Graph serialization header */ #ifndef DGL_GRAPH_SERIALIZE_GRAPH_SERIALIZE_H_ #define DGL_GRAPH_SERIALIZE_GRAPH_SERIALIZE_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "../../c_api_common.h" #include "dglgraph_data.h" #include "heterograph_data.h" using dgl::ImmutableGraph; using dgl::runtime::NDArray; using namespace dgl::runtime; namespace dgl { namespace serialize { enum GraphType : uint64_t { kMutableGraph = 0ull, kImmutableGraph = 1ull, kHeteroGraph = 2ull }; constexpr uint64_t kDGLSerializeMagic = 0xDD2E4FF046B4A13F; class StorageMetaDataObject : public runtime::Object { public: // For saving DGLGraph dgl_id_t num_graph; Value nodes_num_list; Value edges_num_list; Map labels_list; List graph_data; static constexpr const char *_type_key = "graph_serialize.StorageMetaData"; void SetMetaData( dgl_id_t num_graph, std::vector nodes_num_list, std::vector edges_num_list, std::vector labels_list); void SetGraphData(std::vector gdata); void VisitAttrs(AttrVisitor *v) final { v->Visit("num_graph", &num_graph); v->Visit("nodes_num_list", &nodes_num_list); v->Visit("edges_num_list", &edges_num_list); v->Visit("labels", &labels_list); v->Visit("graph_data", &graph_data); } DGL_DECLARE_OBJECT_TYPE_INFO(StorageMetaDataObject, runtime::Object); }; class StorageMetaData : public runtime::ObjectRef { public: DGL_DEFINE_OBJECT_REF_METHODS( StorageMetaData, runtime::ObjectRef, StorageMetaDataObject); /** @brief create a new StorageMetaData reference */ static StorageMetaData Create() { return StorageMetaData(std::make_shared()); } }; StorageMetaData LoadDGLGraphFiles( const std::string &filename, std::vector idx_list, bool onlyMeta); StorageMetaData LoadDGLGraphs( const std::string &filename, std::vector idx_list, bool onlyMeta); bool SaveDGLGraphs( std::string filename, List graph_data, std::vector labels_list); std::vector LoadHeteroGraphs( const std::string &filename, std::vector idx_list); ImmutableGraphPtr ToImmutableGraph(GraphPtr g); } // namespace serialize } // namespace dgl #endif // DGL_GRAPH_SERIALIZE_GRAPH_SERIALIZE_H_ ================================================ FILE: src/graph/serialize/heterograph_data.h ================================================ /** * Copyright (c) 2019 by Contributors * @file graph/serialize/heterograph_data.h * @brief Graph serialization header */ #ifndef DGL_GRAPH_SERIALIZE_HETEROGRAPH_DATA_H_ #define DGL_GRAPH_SERIALIZE_HETEROGRAPH_DATA_H_ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "../../c_api_common.h" #include "../heterograph.h" using dgl::runtime::NDArray; using namespace dgl::runtime; namespace dgl { namespace serialize { typedef std::pair NamedTensor; class HeteroGraphDataObject : public runtime::Object { public: std::shared_ptr gptr; std::vector> node_tensors; std::vector> edge_tensors; std::vector etype_names; std::vector ntype_names; static constexpr const char *_type_key = "heterograph_serialize.HeteroGraphData"; HeteroGraphDataObject() {} HeteroGraphDataObject( HeteroGraphPtr gptr, List> ndata, List> edata, List ntype_names, List etype_names) { this->gptr = std::dynamic_pointer_cast(gptr); CHECK_NOTNULL(this->gptr); for (auto nd_dict : ndata) { node_tensors.emplace_back(); for (auto kv : nd_dict) { auto last = &node_tensors.back(); NDArray ndarray = kv.second->data; last->emplace_back(kv.first, ndarray); } } for (auto nd_dict : edata) { edge_tensors.emplace_back(); for (auto kv : nd_dict) { auto last = &edge_tensors.back(); NDArray ndarray = kv.second->data; last->emplace_back(kv.first, ndarray); } } this->ntype_names = ListValueToVector(ntype_names); this->etype_names = ListValueToVector(etype_names); } void Save(dmlc::Stream *fs) const { fs->Write(gptr); fs->Write(node_tensors); fs->Write(edge_tensors); fs->Write(ntype_names); fs->Write(etype_names); } bool Load(dmlc::Stream *fs) { fs->Read(&gptr); fs->Read(&node_tensors); fs->Read(&edge_tensors); fs->Read(&ntype_names); fs->Read(&etype_names); return true; } DGL_DECLARE_OBJECT_TYPE_INFO(HeteroGraphDataObject, runtime::Object); }; class HeteroGraphData : public runtime::ObjectRef { public: DGL_DEFINE_OBJECT_REF_METHODS( HeteroGraphData, runtime::ObjectRef, HeteroGraphDataObject); /** @brief create a new GraphData reference */ static HeteroGraphData Create( HeteroGraphPtr gptr, List> node_tensors, List> edge_tensors, List ntype_names, List etype_names) { return HeteroGraphData(std::make_shared( gptr, node_tensors, edge_tensors, ntype_names, etype_names)); } /** @brief create an empty GraphData reference */ static HeteroGraphData Create() { return HeteroGraphData(std::make_shared()); } }; } // namespace serialize } // namespace dgl namespace dmlc { DMLC_DECLARE_TRAITS(has_saveload, dgl::serialize::HeteroGraphDataObject, true); } #endif // DGL_GRAPH_SERIALIZE_HETEROGRAPH_DATA_H_ ================================================ FILE: src/graph/serialize/heterograph_serialize.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file graph/serialize/heterograph_serialize.cc * @brief DGLHeteroGraph serialization implementation * * The storage structure is * { * // MetaData Section * uint64_t kDGLSerializeMagic * uint64_t kVersion = 2 * uint64_t GraphType = kDGLHeteroGraph * dgl_id_t num_graphs * ** Reserved Area till 4kB ** * * uint64_t gdata_start_pos (This stores the start position of graph_data, * which is used to skip label dict part if unnecessary) * vector> label_dict (To store the dict[str, NDArray]) * * vector graph_datas; * vector graph_indices (start address of each graph) * uint64_t size_of_graph_indices_vector (Used to seek to graph_indices * vector) * * } * * Storage of HeteroGraphData is * { * HeteroGraphPtr ptr; * vector>> node_tensors; * vector>> edge_tensors; * vector ntype_name; * vector etype_name; * } * */ #include #include #include #include #include #include #include #include #include #include #include #include #include "../heterograph.h" #include "./dglstream.h" #include "./graph_serialize.h" #include "dmlc/memory_io.h" namespace dgl { namespace serialize { using namespace dgl::runtime; using dmlc::SeekStream; using dmlc::Stream; using dmlc::io::FileSystem; using dmlc::io::URI; bool SaveHeteroGraphs( std::string filename, List hdata, const std::vector &nd_list, dgl_format_code_t formats) { auto fs = std::unique_ptr( DGLStream::Create(filename.c_str(), "w", false, formats)); CHECK(fs->IsValid()) << "File name " << filename << " is not a valid name"; // Write DGL MetaData const uint64_t kVersion = 2; std::array meta_buffer; // Write metadata into char buffer with size 4096 dmlc::MemoryFixedSizeStream meta_fs_(meta_buffer.data(), 4096); auto meta_fs = static_cast(&meta_fs_); meta_fs->Write(kDGLSerializeMagic); meta_fs->Write(kVersion); meta_fs->Write(GraphType::kHeteroGraph); uint64_t num_graph = hdata.size(); meta_fs->Write(num_graph); // Write metadata into files fs->Write(meta_buffer.data(), 4096); // Calculate label dict binary size std::string labels_blob; dmlc::MemoryStringStream label_fs_(&labels_blob); auto label_fs = static_cast(&label_fs_); label_fs->Write(nd_list); uint64_t gdata_start_pos = fs->Count() + sizeof(uint64_t) + labels_blob.size(); // Write start position of gdata, which can be skipped when only reading gdata // And label dict fs->Write(gdata_start_pos); fs->Write(labels_blob.c_str(), labels_blob.size()); std::vector graph_indices(num_graph); // Write HeteroGraphData for (uint64_t i = 0; i < num_graph; ++i) { graph_indices[i] = fs->Count(); auto gdata = hdata[i].sptr(); fs->Write(gdata); } // Write indptr into string to count size std::string indptr_blob; dmlc::MemoryStringStream indptr_fs_(&indptr_blob); auto indptr_fs = static_cast(&indptr_fs_); indptr_fs->Write(graph_indices); uint64_t indptr_buffer_size = indptr_blob.size(); fs->Write(indptr_blob); fs->Write(indptr_buffer_size); return true; } std::vector LoadHeteroGraphs( const std::string &filename, std::vector idx_list) { auto fs = std::unique_ptr( SeekStream::CreateForRead(filename.c_str(), false)); CHECK(fs) << "File name " << filename << " is not a valid name"; // Read DGL MetaData uint64_t magicNum, graphType, version, num_graph; fs->Read(&magicNum); fs->Read(&version); fs->Read(&graphType); CHECK(fs->Read(&num_graph)) << "Invalid num of graph"; fs->Seek(4096); CHECK_EQ(magicNum, kDGLSerializeMagic) << "Invalid DGL files"; CHECK_EQ(version, 2) << "Invalid GraphType"; CHECK_EQ(graphType, GraphType::kHeteroGraph) << "Invalid GraphType"; uint64_t gdata_start_pos; fs->Read(&gdata_start_pos); // Skip labels part fs->Seek(gdata_start_pos); std::vector gdata_refs; if (idx_list.empty()) { // Read All Graphs gdata_refs.reserve(num_graph); for (uint64_t i = 0; i < num_graph; ++i) { HeteroGraphData gdata = HeteroGraphData::Create(); auto hetero_data = gdata.sptr(); fs->Read(&hetero_data); gdata_refs.push_back(gdata); } } else { uint64_t gdata_start_pos = fs->Tell(); // Read Selected Graphss gdata_refs.reserve(idx_list.size()); URI uri(filename.c_str()); uint64_t filesize = FileSystem::GetInstance(uri)->GetPathInfo(uri).size; fs->Seek(filesize - sizeof(uint64_t)); uint64_t indptr_buffer_size; fs->Read(&indptr_buffer_size); std::vector graph_indices(num_graph); fs->Seek(filesize - sizeof(uint64_t) - indptr_buffer_size); fs->Read(&graph_indices); fs->Seek(gdata_start_pos); // Would be better if idx_list is sorted. However the returned the graphs // should be the same order as the idx_list for (uint64_t i = 0; i < idx_list.size(); ++i) { auto gid = idx_list[i]; CHECK((gid < graph_indices.size()) && (gid >= 0)) << "ID " << gid << " in idx_list is out of bound. Please check your idx_list."; fs->Seek(graph_indices[gid]); HeteroGraphData gdata = HeteroGraphData::Create(); auto hetero_data = gdata.sptr(); fs->Read(&hetero_data); gdata_refs.push_back(gdata); } } return gdata_refs; } std::vector LoadLabels_V2(const std::string &filename) { auto fs = std::unique_ptr( SeekStream::CreateForRead(filename.c_str(), false)); CHECK(fs) << "File name " << filename << " is not a valid name"; // Read DGL MetaData uint64_t magicNum, graphType, version, num_graph; fs->Read(&magicNum); fs->Read(&version); fs->Read(&graphType); CHECK(fs->Read(&num_graph)) << "Invalid num of graph"; fs->Seek(4096); uint64_t gdata_start_pos; fs->Read(&gdata_start_pos); std::vector labels_list; fs->Read(&labels_list); return labels_list; } DGL_REGISTER_GLOBAL("data.heterograph_serialize._CAPI_MakeHeteroGraphData") .set_body([](DGLArgs args, DGLRetValue *rv) { HeteroGraphRef hg = args[0]; List> ndata = args[1]; List> edata = args[2]; List ntype_names = args[3]; List etype_names = args[4]; *rv = HeteroGraphData::Create( hg.sptr(), ndata, edata, ntype_names, etype_names); }); DGL_REGISTER_GLOBAL("data.heterograph_serialize._CAPI_SaveHeteroGraphData") .set_body([](DGLArgs args, DGLRetValue *rv) { std::string filename = args[0]; List hgdata = args[1]; Map nd_map = args[2]; List formats = args[3]; std::vector formats_vec; for (const auto &val : formats) { formats_vec.push_back(ParseSparseFormat(val->data)); } const auto formats_code = SparseFormatsToCode(formats_vec); std::vector nd_list; for (auto kv : nd_map) { NDArray ndarray = static_cast(kv.second->data); nd_list.emplace_back(kv.first, ndarray); } *rv = dgl::serialize::SaveHeteroGraphs( filename, hgdata, nd_list, formats_code); }); DGL_REGISTER_GLOBAL( "data.heterograph_serialize._CAPI_GetGindexFromHeteroGraphData") .set_body([](DGLArgs args, DGLRetValue *rv) { HeteroGraphData hdata = args[0]; *rv = HeteroGraphRef(hdata->gptr); }); DGL_REGISTER_GLOBAL( "data.heterograph_serialize._CAPI_GetEtypesFromHeteroGraphData") .set_body([](DGLArgs args, DGLRetValue *rv) { HeteroGraphData hdata = args[0]; List etype_names; for (const auto &name : hdata->etype_names) { etype_names.push_back(Value(MakeValue(name))); } *rv = etype_names; }); DGL_REGISTER_GLOBAL( "data.heterograph_serialize._CAPI_GetNtypesFromHeteroGraphData") .set_body([](DGLArgs args, DGLRetValue *rv) { HeteroGraphData hdata = args[0]; List ntype_names; for (auto name : hdata->ntype_names) { ntype_names.push_back(Value(MakeValue(name))); } *rv = ntype_names; }); DGL_REGISTER_GLOBAL( "data.heterograph_serialize._CAPI_GetNDataFromHeteroGraphData") .set_body([](DGLArgs args, DGLRetValue *rv) { HeteroGraphData hdata = args[0]; List> ntensors; for (auto tensor_list : hdata->node_tensors) { List nlist; for (const auto &kv : tensor_list) { nlist.push_back(Value(MakeValue(kv.first))); nlist.push_back(Value(MakeValue(kv.second))); } ntensors.push_back(nlist); } *rv = ntensors; }); DGL_REGISTER_GLOBAL( "data.heterograph_serialize._CAPI_GetEDataFromHeteroGraphData") .set_body([](DGLArgs args, DGLRetValue *rv) { HeteroGraphData hdata = args[0]; List> etensors; for (auto tensor_list : hdata->edge_tensors) { List elist; for (const auto &kv : tensor_list) { elist.push_back(Value(MakeValue(kv.first))); elist.push_back(Value(MakeValue(kv.second))); } etensors.push_back(elist); } *rv = etensors; }); DGL_REGISTER_GLOBAL("data.graph_serialize._CAPI_LoadLabels_V2") .set_body([](DGLArgs args, DGLRetValue *rv) { std::string filename = args[0]; auto labels_list = LoadLabels_V2(filename); Map rvmap; for (auto kv : labels_list) { rvmap.Set(kv.first, Value(MakeValue(kv.second))); } *rv = rvmap; }); } // namespace serialize } // namespace dgl ================================================ FILE: src/graph/serialize/tensor_serialize.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file graph/serialize/tensor_serialize.cc * @brief Graph serialization implementation */ #include #include #include #include #include #include "../../c_api_common.h" using namespace dgl::runtime; using dmlc::SeekStream; namespace dgl { namespace serialize { typedef std::pair NamedTensor; constexpr uint64_t kDGLSerialize_Tensors = 0xDD5A9FBE3FA2443F; DGL_REGISTER_GLOBAL("data.tensor_serialize._CAPI_SaveNDArrayDict") .set_body([](DGLArgs args, DGLRetValue *rv) { std::string filename = args[0]; auto fs = std::unique_ptr( dmlc::Stream::Create(filename.c_str(), "w")); CHECK(fs) << "Filename is invalid"; fs->Write(kDGLSerialize_Tensors); bool empty_dict = args[2]; Map nd_dict; if (!empty_dict) { nd_dict = args[1]; } std::vector namedTensors; fs->Write(static_cast(nd_dict.size())); for (auto kv : nd_dict) { NDArray ndarray = static_cast(kv.second->data); namedTensors.emplace_back(kv.first, ndarray); } fs->Write(namedTensors); *rv = true; }); DGL_REGISTER_GLOBAL("data.tensor_serialize._CAPI_LoadNDArrayDict") .set_body([](DGLArgs args, DGLRetValue *rv) { std::string filename = args[0]; auto fs = std::unique_ptr( dmlc::Stream::Create(filename.c_str(), "r")); CHECK(fs) << "Filename is invalid or file doesn't exists"; uint64_t magincNum, num_elements; CHECK(fs->Read(&magincNum)) << "Invalid file"; CHECK_EQ(magincNum, kDGLSerialize_Tensors) << "Invalid DGL tensor file"; CHECK(fs->Read(&num_elements)) << "Invalid num of elements"; Map nd_dict; std::vector namedTensors; fs->Read(&namedTensors); for (auto kv : namedTensors) { Value ndarray = Value(MakeValue(kv.second)); nd_dict.Set(kv.first, ndarray); } *rv = nd_dict; }); } // namespace serialize } // namespace dgl ================================================ FILE: src/graph/serialize/zerocopy_serializer.cc ================================================ /** * Copyright (c) 2020-2022 by Contributors * @file graph/serailize/zerocopy_serializer.cc * @brief serializer implementation. */ #include #include "dgl/runtime/ndarray.h" #include "dmlc/memory_io.h" namespace dgl { using dgl::runtime::NDArray; NDArray CreateNDArrayFromRawData( std::vector shape, DGLDataType dtype, DGLContext ctx, void* raw) { return NDArray::CreateFromRaw(shape, dtype, ctx, raw, true); } void StreamWithBuffer::PushNDArray(const NDArray& tensor) { #ifndef _WIN32 this->Write(tensor->ndim); this->Write(tensor->dtype); int ndim = tensor->ndim; this->WriteArray(tensor->shape, ndim); CHECK(tensor.IsContiguous()) << "StreamWithBuffer only supports contiguous tensor"; CHECK_EQ(tensor->byte_offset, 0) << "StreamWithBuffer only supports zero byte offset tensor"; int type_bytes = tensor->dtype.bits / 8; int64_t num_elems = 1; for (int i = 0; i < ndim; ++i) { num_elems *= tensor->shape[i]; } int64_t data_byte_size = type_bytes * num_elems; auto mem = tensor.GetSharedMem(); if (send_to_remote_ || !mem) { // If the stream is for remote communication or the data is not stored in // shared memory, serialize the data content as a buffer. this->Write(false); // If this is a null ndarray, we will not push it into the underlying // buffer_list if (data_byte_size != 0) { buffer_list_.emplace_back(tensor, tensor->data, data_byte_size); } } else { CHECK(mem) << "Tried to send non-shared-memroy tensor to local " "StreamWithBuffer"; // Serialize only the shared memory name. this->Write(true); this->Write(mem->GetName()); } #else LOG(FATAL) << "StreamWithBuffer is not supported on windows"; #endif // _WIN32 return; } NDArray StreamWithBuffer::PopNDArray() { #ifndef _WIN32 int ndim; DGLDataType dtype; CHECK(this->Read(&ndim)) << "Invalid DGLArray file format"; CHECK(this->Read(&dtype)) << "Invalid DGLArray file format"; std::vector shape(ndim); if (ndim != 0) { CHECK(this->ReadArray(&shape[0], ndim)) << "Invalid DGLArray file format"; } DGLContext cpu_ctx; cpu_ctx.device_type = kDGLCPU; cpu_ctx.device_id = 0; bool is_shared_mem; CHECK(this->Read(&is_shared_mem)) << "Invalid stream read"; std::string sharedmem_name; if (is_shared_mem) { CHECK(!send_to_remote_) << "Invalid attempt to deserialize from shared " "memory with send_to_remote=true"; CHECK(this->Read(&sharedmem_name)) << "Invalid stream read"; return NDArray::EmptyShared(sharedmem_name, shape, dtype, cpu_ctx, false); } else { CHECK(send_to_remote_) << "Invalid attempt to deserialize from raw data " "pointer with send_to_remote=false"; NDArray ret; if (ndim == 0 || shape[0] == 0) { // Mean this is a null ndarray ret = CreateNDArrayFromRawData(shape, dtype, cpu_ctx, nullptr); } else { ret = CreateNDArrayFromRawData( shape, dtype, cpu_ctx, buffer_list_.front().data); buffer_list_.pop_front(); } return ret; } #else LOG(FATAL) << "StreamWithBuffer is not supported on windows"; return NDArray(); #endif // _WIN32 } } // namespace dgl ================================================ FILE: src/graph/shared_mem_manager.cc ================================================ /** * Copyright (c) 2018 by Contributors * @file graph/shared_mem_manager.cc * @brief DGL sampler implementation */ #include "shared_mem_manager.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "../c_api_common.h" #include "heterograph.h" using namespace dgl::runtime; using namespace dgl::aten; namespace dgl { template <> NDArray SharedMemManager::CopyToSharedMem( const NDArray &data, std::string name) { DGLContext ctx = {kDGLCPU, 0}; std::vector shape(data->shape, data->shape + data->ndim); strm_->Write(data->ndim); strm_->Write(data->dtype); int ndim = data->ndim; strm_->WriteArray(data->shape, ndim); bool is_null = IsNullArray(data); strm_->Write(is_null); if (is_null) { return data; } else { auto nd = NDArray::EmptyShared(graph_name_ + name, shape, data->dtype, ctx, true); nd.CopyFrom(data); return nd; } } template <> CSRMatrix SharedMemManager::CopyToSharedMem( const CSRMatrix &csr, std::string name) { auto indptr_shared_mem = CopyToSharedMem(csr.indptr, name + "_indptr"); auto indices_shared_mem = CopyToSharedMem(csr.indices, name + "_indices"); auto data_shared_mem = CopyToSharedMem(csr.data, name + "_data"); strm_->Write(csr.num_rows); strm_->Write(csr.num_cols); strm_->Write(csr.sorted); return CSRMatrix( csr.num_rows, csr.num_cols, indptr_shared_mem, indices_shared_mem, data_shared_mem, csr.sorted); } template <> COOMatrix SharedMemManager::CopyToSharedMem( const COOMatrix &coo, std::string name) { auto row_shared_mem = CopyToSharedMem(coo.row, name + "_row"); auto col_shared_mem = CopyToSharedMem(coo.col, name + "_col"); auto data_shared_mem = CopyToSharedMem(coo.data, name + "_data"); strm_->Write(coo.num_rows); strm_->Write(coo.num_cols); strm_->Write(coo.row_sorted); strm_->Write(coo.col_sorted); return COOMatrix( coo.num_rows, coo.num_cols, row_shared_mem, col_shared_mem, data_shared_mem, coo.row_sorted, coo.col_sorted); } template <> bool SharedMemManager::CreateFromSharedMem( NDArray *nd, std::string name) { int ndim; DGLContext ctx = {kDGLCPU, 0}; DGLDataType dtype; CHECK(this->Read(&ndim)) << "Invalid DGLArray file format"; CHECK(this->Read(&dtype)) << "Invalid DGLArray file format"; std::vector shape(ndim); if (ndim != 0) { CHECK(this->ReadArray(&shape[0], ndim)) << "Invalid DGLArray file format"; } bool is_null; this->Read(&is_null); if (is_null) { *nd = NDArray::Empty(shape, dtype, ctx); } else { *nd = NDArray::EmptyShared(graph_name_ + name, shape, dtype, ctx, false); } return true; } template <> bool SharedMemManager::CreateFromSharedMem( COOMatrix *coo, std::string name) { CreateFromSharedMem(&coo->row, name + "_row"); CreateFromSharedMem(&coo->col, name + "_col"); CreateFromSharedMem(&coo->data, name + "_data"); strm_->Read(&coo->num_rows); strm_->Read(&coo->num_cols); strm_->Read(&coo->row_sorted); strm_->Read(&coo->col_sorted); return true; } template <> bool SharedMemManager::CreateFromSharedMem( CSRMatrix *csr, std::string name) { CreateFromSharedMem(&csr->indptr, name + "_indptr"); CreateFromSharedMem(&csr->indices, name + "_indices"); CreateFromSharedMem(&csr->data, name + "_data"); strm_->Read(&csr->num_rows); strm_->Read(&csr->num_cols); strm_->Read(&csr->sorted); return true; } } // namespace dgl ================================================ FILE: src/graph/shared_mem_manager.h ================================================ /** * Copyright (c) 2018 by Contributors * @file graph/shared_mem_manager.cc * @brief DGL shared mem manager APIs */ #ifndef DGL_GRAPH_SHARED_MEM_MANAGER_H_ #define DGL_GRAPH_SHARED_MEM_MANAGER_H_ #include #include #include #include #include #include #include #include #include #include namespace dgl { using dgl::runtime::SharedMemory; const size_t SHARED_MEM_METAINFO_SIZE_MAX = 1024 * 32; // Utility class to copy objects to shared memory and record metadatas class SharedMemManager : public dmlc::Stream { public: explicit SharedMemManager(std::string graph_name, dmlc::Stream* strm) : graph_name_(graph_name), strm_(strm) {} template T CopyToSharedMem(const T& data, std::string name); template bool CreateFromSharedMem(T* out_data, std::string name); // delegate methods to strm_ virtual size_t Read(void* ptr, size_t size) { return strm_->Read(ptr, size); } virtual void Write(const void* ptr, size_t size) { strm_->Write(ptr, size); } using dmlc::Stream::Read; using dmlc::Stream::Write; private: std::string graph_name_; dmlc::Stream* strm_; }; } // namespace dgl #endif // DGL_GRAPH_SHARED_MEM_MANAGER_H_ ================================================ FILE: src/graph/subgraph.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file graph/subgraph.cc * @brief Functions for extracting subgraphs. */ #include "./heterograph.h" using namespace dgl::runtime; namespace dgl { HeteroSubgraph InEdgeGraphRelabelNodes( const HeteroGraphPtr graph, const std::vector& vids) { CHECK_EQ(vids.size(), graph->NumVertexTypes()) << "Invalid input: the input list size must be the same as the number of " "vertex types."; std::vector eids(graph->NumEdgeTypes()); DGLContext ctx = aten::GetContextOf(vids); for (dgl_type_t etype = 0; etype < graph->NumEdgeTypes(); ++etype) { auto pair = graph->meta_graph()->FindEdge(etype); const dgl_type_t dst_vtype = pair.second; if (aten::IsNullArray(vids[dst_vtype])) { eids[etype] = IdArray::Empty({0}, graph->DataType(), ctx); } else { const auto& earr = graph->InEdges(etype, {vids[dst_vtype]}); eids[etype] = earr.id; } } return graph->EdgeSubgraph(eids, false); } HeteroSubgraph InEdgeGraphNoRelabelNodes( const HeteroGraphPtr graph, const std::vector& vids) { // TODO(mufei): This should also use EdgeSubgraph once it is supported for CSR // graphs CHECK_EQ(vids.size(), graph->NumVertexTypes()) << "Invalid input: the input list size must be the same as the number of " "vertex types."; std::vector subrels(graph->NumEdgeTypes()); std::vector induced_edges(graph->NumEdgeTypes()); DGLContext ctx = aten::GetContextOf(vids); for (dgl_type_t etype = 0; etype < graph->NumEdgeTypes(); ++etype) { auto pair = graph->meta_graph()->FindEdge(etype); const dgl_type_t src_vtype = pair.first; const dgl_type_t dst_vtype = pair.second; auto relgraph = graph->GetRelationGraph(etype); if (aten::IsNullArray(vids[dst_vtype])) { // create a placeholder graph subrels[etype] = UnitGraph::Empty( relgraph->NumVertexTypes(), graph->NumVertices(src_vtype), graph->NumVertices(dst_vtype), graph->DataType(), ctx); induced_edges[etype] = IdArray::Empty({0}, graph->DataType(), graph->Context()); } else { const auto& earr = graph->InEdges(etype, {vids[dst_vtype]}); subrels[etype] = UnitGraph::CreateFromCOO( relgraph->NumVertexTypes(), graph->NumVertices(src_vtype), graph->NumVertices(dst_vtype), earr.src, earr.dst); induced_edges[etype] = earr.id; } } HeteroSubgraph ret; ret.graph = CreateHeteroGraph( graph->meta_graph(), subrels, graph->NumVerticesPerType()); ret.induced_edges = std::move(induced_edges); return ret; } HeteroSubgraph InEdgeGraph( const HeteroGraphPtr graph, const std::vector& vids, bool relabel_nodes) { if (relabel_nodes) { return InEdgeGraphRelabelNodes(graph, vids); } else { return InEdgeGraphNoRelabelNodes(graph, vids); } } HeteroSubgraph OutEdgeGraphRelabelNodes( const HeteroGraphPtr graph, const std::vector& vids) { CHECK_EQ(vids.size(), graph->NumVertexTypes()) << "Invalid input: the input list size must be the same as the number of " "vertex types."; std::vector eids(graph->NumEdgeTypes()); DGLContext ctx = aten::GetContextOf(vids); for (dgl_type_t etype = 0; etype < graph->NumEdgeTypes(); ++etype) { auto pair = graph->meta_graph()->FindEdge(etype); const dgl_type_t src_vtype = pair.first; if (aten::IsNullArray(vids[src_vtype])) { eids[etype] = IdArray::Empty({0}, graph->DataType(), ctx); } else { const auto& earr = graph->OutEdges(etype, {vids[src_vtype]}); eids[etype] = earr.id; } } return graph->EdgeSubgraph(eids, false); } HeteroSubgraph OutEdgeGraphNoRelabelNodes( const HeteroGraphPtr graph, const std::vector& vids) { // TODO(mufei): This should also use EdgeSubgraph once it is supported for CSR // graphs CHECK_EQ(vids.size(), graph->NumVertexTypes()) << "Invalid input: the input list size must be the same as the number of " "vertex types."; std::vector subrels(graph->NumEdgeTypes()); std::vector induced_edges(graph->NumEdgeTypes()); DGLContext ctx = aten::GetContextOf(vids); for (dgl_type_t etype = 0; etype < graph->NumEdgeTypes(); ++etype) { auto pair = graph->meta_graph()->FindEdge(etype); const dgl_type_t src_vtype = pair.first; const dgl_type_t dst_vtype = pair.second; auto relgraph = graph->GetRelationGraph(etype); if (aten::IsNullArray(vids[src_vtype])) { // create a placeholder graph subrels[etype] = UnitGraph::Empty( relgraph->NumVertexTypes(), graph->NumVertices(src_vtype), graph->NumVertices(dst_vtype), graph->DataType(), ctx); induced_edges[etype] = IdArray::Empty({0}, graph->DataType(), graph->Context()); } else { const auto& earr = graph->OutEdges(etype, {vids[src_vtype]}); subrels[etype] = UnitGraph::CreateFromCOO( relgraph->NumVertexTypes(), graph->NumVertices(src_vtype), graph->NumVertices(dst_vtype), earr.src, earr.dst); induced_edges[etype] = earr.id; } } HeteroSubgraph ret; ret.graph = CreateHeteroGraph( graph->meta_graph(), subrels, graph->NumVerticesPerType()); ret.induced_edges = std::move(induced_edges); return ret; } HeteroSubgraph OutEdgeGraph( const HeteroGraphPtr graph, const std::vector& vids, bool relabel_nodes) { if (relabel_nodes) { return OutEdgeGraphRelabelNodes(graph, vids); } else { return OutEdgeGraphNoRelabelNodes(graph, vids); } } } // namespace dgl ================================================ FILE: src/graph/transform/compact.cc ================================================ /** * Copyright 2019-2021 Contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file graph/transform/compact.cc * @brief Compact graph implementation */ #include "compact.h" #include #include #include #include #include #include #include #include #include "../../c_api_common.h" #include "../unit_graph.h" // TODO(BarclayII): currently CompactGraphs depend on IdHashMap implementation // which only works on CPU. Should fix later to make it device agnostic. #include "../../array/cpu/array_utils.h" namespace dgl { using namespace dgl::runtime; using namespace dgl::aten; namespace transform { namespace { template std::pair, std::vector> CompactGraphsCPU( const std::vector &graphs, const std::vector &always_preserve) { // TODO(BarclayII): check whether the node space and metagraph of each graph // is the same. Step 1: Collect the nodes that has connections for each type. const int64_t num_ntypes = graphs[0]->NumVertexTypes(); std::vector> hashmaps(num_ntypes); std::vector> all_edges( graphs.size()); // all_edges[i][etype] std::vector max_vertex_cnt(num_ntypes, 0); for (size_t i = 0; i < graphs.size(); ++i) { const HeteroGraphPtr curr_graph = graphs[i]; const int64_t num_etypes = curr_graph->NumEdgeTypes(); for (IdType etype = 0; etype < num_etypes; ++etype) { IdType srctype, dsttype; std::tie(srctype, dsttype) = curr_graph->GetEndpointTypes(etype); const int64_t n_edges = curr_graph->NumEdges(etype); max_vertex_cnt[srctype] += n_edges; max_vertex_cnt[dsttype] += n_edges; } } // Reserve the space for hash maps before ahead to aoivd rehashing for (size_t i = 0; i < static_cast(num_ntypes); ++i) { if (i < always_preserve.size()) hashmaps[i].Reserve(always_preserve[i]->shape[0] + max_vertex_cnt[i]); else hashmaps[i].Reserve(max_vertex_cnt[i]); } for (size_t i = 0; i < always_preserve.size(); ++i) { hashmaps[i].Update(always_preserve[i]); } for (size_t i = 0; i < graphs.size(); ++i) { const HeteroGraphPtr curr_graph = graphs[i]; const int64_t num_etypes = curr_graph->NumEdgeTypes(); all_edges[i].reserve(num_etypes); for (IdType etype = 0; etype < num_etypes; ++etype) { IdType srctype, dsttype; std::tie(srctype, dsttype) = curr_graph->GetEndpointTypes(etype); const EdgeArray edges = curr_graph->Edges(etype, "eid"); hashmaps[srctype].Update(edges.src); hashmaps[dsttype].Update(edges.dst); all_edges[i].push_back(edges); } } // Step 2: Relabel the nodes for each type to a smaller ID space and save the // mapping. std::vector induced_nodes(num_ntypes); std::vector num_induced_nodes(num_ntypes); for (int64_t i = 0; i < num_ntypes; ++i) { induced_nodes[i] = hashmaps[i].Values(); num_induced_nodes[i] = hashmaps[i].Size(); } // Step 3: Remap the edges of each graph. std::vector new_graphs; for (size_t i = 0; i < graphs.size(); ++i) { std::vector rel_graphs; const HeteroGraphPtr curr_graph = graphs[i]; const auto meta_graph = curr_graph->meta_graph(); const int64_t num_etypes = curr_graph->NumEdgeTypes(); for (IdType etype = 0; etype < num_etypes; ++etype) { IdType srctype, dsttype; std::tie(srctype, dsttype) = curr_graph->GetEndpointTypes(etype); const EdgeArray &edges = all_edges[i][etype]; const IdArray mapped_rows = hashmaps[srctype].Map(edges.src, -1); const IdArray mapped_cols = hashmaps[dsttype].Map(edges.dst, -1); rel_graphs.push_back(UnitGraph::CreateFromCOO( srctype == dsttype ? 1 : 2, induced_nodes[srctype]->shape[0], induced_nodes[dsttype]->shape[0], mapped_rows, mapped_cols)); } new_graphs.push_back( CreateHeteroGraph(meta_graph, rel_graphs, num_induced_nodes)); } return std::make_pair(new_graphs, induced_nodes); } }; // namespace template <> std::pair, std::vector> CompactGraphs( const std::vector &graphs, const std::vector &always_preserve) { return CompactGraphsCPU(graphs, always_preserve); } template <> std::pair, std::vector> CompactGraphs( const std::vector &graphs, const std::vector &always_preserve) { return CompactGraphsCPU(graphs, always_preserve); } DGL_REGISTER_GLOBAL("transform._CAPI_DGLCompactGraphs") .set_body([](DGLArgs args, DGLRetValue *rv) { List graph_refs = args[0]; List always_preserve_refs = args[1]; std::vector graphs; std::vector always_preserve; for (HeteroGraphRef gref : graph_refs) graphs.push_back(gref.sptr()); for (Value array : always_preserve_refs) always_preserve.push_back(array->data); // TODO(BarclayII): check for all IdArrays CHECK(graphs[0]->DataType() == always_preserve[0]->dtype) << "data type mismatch."; std::pair, std::vector> result_pair; ATEN_XPU_SWITCH_CUDA( graphs[0]->Context().device_type, XPU, "CompactGraphs", { ATEN_ID_TYPE_SWITCH(graphs[0]->DataType(), IdType, { result_pair = CompactGraphs(graphs, always_preserve); }); }); List compacted_graph_refs; List induced_nodes; for (const HeteroGraphPtr &g : result_pair.first) compacted_graph_refs.push_back(HeteroGraphRef(g)); for (const IdArray &ids : result_pair.second) induced_nodes.push_back(Value(MakeValue(ids))); List result; result.push_back(compacted_graph_refs); result.push_back(induced_nodes); *rv = result; }); }; // namespace transform }; // namespace dgl ================================================ FILE: src/graph/transform/compact.h ================================================ /** * Copyright 2021 Contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file graph/transform/compact.h * @brief Functions to find and eliminate the common isolated nodes across * all given graphs with the same set of nodes. */ #ifndef DGL_GRAPH_TRANSFORM_COMPACT_H_ #define DGL_GRAPH_TRANSFORM_COMPACT_H_ #include #include #include #include namespace dgl { namespace transform { /** * @brief Given a list of graphs with the same set of nodes, find and eliminate * the common isolated nodes across all graphs. * * @tparam XPU The type of device to operate on. * @tparam IdType The type to use as an index. * @param graphs The list of graphs to be compacted. * @param always_preserve The vector of nodes to be preserved. * * @return The vector of compacted graphs and the vector of induced nodes. */ template std::pair, std::vector> CompactGraphs( const std::vector &graphs, const std::vector &always_preserve); } // namespace transform } // namespace dgl #endif // DGL_GRAPH_TRANSFORM_COMPACT_H_ ================================================ FILE: src/graph/transform/cpu/kdtree_ndarray_adapter.h ================================================ /** * Copyright (c) 2021 by Contributors * @file graph/transform/cpu/kdtree_ndarray_adapter.h * @brief NDArray adapter for nanoflann, without * duplicating the storage */ #ifndef DGL_GRAPH_TRANSFORM_CPU_KDTREE_NDARRAY_ADAPTER_H_ #define DGL_GRAPH_TRANSFORM_CPU_KDTREE_NDARRAY_ADAPTER_H_ #include #include #include #include "../../../c_api_common.h" namespace dgl { namespace transform { namespace knn_utils { /** * @brief A simple 2D NDArray adapter for nanoflann, without duplicating the * storage. * * @tparam FloatType: The type of the point coordinates (typically, double or * float). * @tparam IdType: The type for indices in the KD-tree index (typically, * size_t of int) * @tparam FeatureDim: If set to > 0, it specifies a compile-time fixed * dimensionality for the points in the data set, allowing more compiler * optimizations. * @tparam Dist: The distance metric to use: nanoflann::metric_L1, nanoflann::metric_L2, nanoflann::metric_L2_Simple, etc. * @note The spelling of dgl's adapter ("adapter") is different from naneflann * ("adaptor") */ template < typename FloatType, typename IdType, int FeatureDim = -1, typename Dist = nanoflann::metric_L2> class KDTreeNDArrayAdapter { public: using self_type = KDTreeNDArrayAdapter; using metric_type = typename Dist::template traits::distance_t; using index_type = nanoflann::KDTreeSingleIndexAdaptor< metric_type, self_type, FeatureDim, IdType>; KDTreeNDArrayAdapter( const size_t /* dims */, const NDArray data_points, const int leaf_max_size = 10) : data_(data_points) { CHECK(data_points->shape[0] != 0 && data_points->shape[1] != 0) << "Tensor containing input data point set must be 2D."; const size_t dims = data_points->shape[1]; CHECK(!(FeatureDim > 0 && static_cast(dims) != FeatureDim)) << "Data set feature dimension does not match the 'FeatureDim' " << "template argument."; index_ = new index_type( static_cast(dims), *this, nanoflann::KDTreeSingleIndexAdaptorParams(leaf_max_size)); index_->buildIndex(); } ~KDTreeNDArrayAdapter() { delete index_; } index_type* GetIndex() { return index_; } /** * @brief Query for the \a num_closest points to a given point * Note that this is a short-cut method for GetIndex()->findNeighbors(). */ void query( const FloatType* query_pt, const size_t num_closest, IdType* out_idxs, FloatType* out_dists) const { nanoflann::KNNResultSet resultSet(num_closest); resultSet.init(out_idxs, out_dists); index_->findNeighbors(resultSet, query_pt, nanoflann::SearchParams()); } /** @brief Interface expected by KDTreeSingleIndexAdaptor */ const self_type& derived() const { return *this; } /** @brief Interface expected by KDTreeSingleIndexAdaptor */ self_type& derived() { return *this; } /** * @brief Interface expected by KDTreeSingleIndexAdaptor, * return the number of data points */ size_t kdtree_get_point_count() const { return data_->shape[0]; } /** * @brief Interface expected by KDTreeSingleIndexAdaptor, * return the dim'th component of the idx'th point */ FloatType kdtree_get_pt(const size_t idx, const size_t dim) const { return data_.Ptr()[idx * data_->shape[1] + dim]; } /** * @brief Interface expected by KDTreeSingleIndexAdaptor. * Optional bounding-box computation: return false to * default to a standard bbox computation loop. * */ template bool kdtree_get_bbox(BBOX& /* bb */) const { return false; } private: index_type* index_; // The kd tree index const NDArray data_; // data points }; } // namespace knn_utils } // namespace transform } // namespace dgl #endif // DGL_GRAPH_TRANSFORM_CPU_KDTREE_NDARRAY_ADAPTER_H_ ================================================ FILE: src/graph/transform/cpu/knn.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file graph/transform/cpu/knn.cc * @brief k-nearest-neighbor (KNN) implementation */ #include "../knn.h" #include #include #include #include #include #include #include #include #include "kdtree_ndarray_adapter.h" using namespace dgl::runtime; using namespace dgl::transform::knn_utils; namespace dgl { namespace transform { namespace impl { // This value is directly from pynndescent static constexpr int NN_DESCENT_BLOCK_SIZE = 16384; /** * @brief Compute Euclidean distance between two vectors, return positive * infinite value if the intermediate distance is greater than the worst * distance. */ template FloatType EuclideanDistWithCheck( const FloatType* vec1, const FloatType* vec2, int64_t dim, FloatType worst_dist = std::numeric_limits::max()) { FloatType dist = 0; bool early_stop = false; for (IdType idx = 0; idx < dim; ++idx) { dist += (vec1[idx] - vec2[idx]) * (vec1[idx] - vec2[idx]); if (dist > worst_dist) { early_stop = true; break; } } if (early_stop) { return std::numeric_limits::max(); } else { return dist; } } /** @brief Compute Euclidean distance between two vectors */ template FloatType EuclideanDist( const FloatType* vec1, const FloatType* vec2, int64_t dim) { FloatType dist = 0; for (IdType idx = 0; idx < dim; ++idx) { dist += (vec1[idx] - vec2[idx]) * (vec1[idx] - vec2[idx]); } return dist; } /** @brief Insert a new element into a heap */ template void HeapInsert( IdType* out, FloatType* dist, IdType new_id, FloatType new_dist, int k, bool check_repeat = false) { if (new_dist > dist[0]) return; // check if we have it if (check_repeat) { for (IdType i = 0; i < k; ++i) { if (out[i] == new_id) return; } } IdType left_idx = 0, right_idx = 0, curr_idx = 0, swap_idx = 0; dist[0] = new_dist; out[0] = new_id; while (true) { left_idx = 2 * curr_idx + 1; right_idx = left_idx + 1; swap_idx = curr_idx; if (left_idx < k && dist[left_idx] > dist[swap_idx]) { swap_idx = left_idx; } if (right_idx < k && dist[right_idx] > dist[swap_idx]) { swap_idx = right_idx; } if (swap_idx != curr_idx) { std::swap(dist[curr_idx], dist[swap_idx]); std::swap(out[curr_idx], out[swap_idx]); curr_idx = swap_idx; } else { break; } } } /** @brief Insert a new element and its flag into heap, return 1 if insert * successfully */ template int FlaggedHeapInsert( IdType* out, FloatType* dist, bool* flag, IdType new_id, FloatType new_dist, bool new_flag, int k, bool check_repeat = false) { if (new_dist > dist[0]) return 0; if (check_repeat) { for (IdType i = 0; i < k; ++i) { if (out[i] == new_id) return 0; } } IdType left_idx = 0, right_idx = 0, curr_idx = 0, swap_idx = 0; dist[0] = new_dist; out[0] = new_id; flag[0] = new_flag; while (true) { left_idx = 2 * curr_idx + 1; right_idx = left_idx + 1; swap_idx = curr_idx; if (left_idx < k && dist[left_idx] > dist[swap_idx]) { swap_idx = left_idx; } if (right_idx < k && dist[right_idx] > dist[swap_idx]) { swap_idx = right_idx; } if (swap_idx != curr_idx) { std::swap(dist[curr_idx], dist[swap_idx]); std::swap(out[curr_idx], out[swap_idx]); std::swap(flag[curr_idx], flag[swap_idx]); curr_idx = swap_idx; } else { break; } } return 1; } /** @brief Build heap for each point. Used by NN-descent */ template void BuildHeap(IdType* index, FloatType* dist, int k) { for (int i = k / 2 - 1; i >= 0; --i) { IdType idx = i; while (true) { IdType largest = idx; IdType left = idx * 2 + 1; IdType right = left + 1; if (left < k && dist[left] > dist[largest]) { largest = left; } if (right < k && dist[right] > dist[largest]) { largest = right; } if (largest != idx) { std::swap(index[largest], index[idx]); std::swap(dist[largest], dist[idx]); idx = largest; } else { break; } } } } /** * @brief Neighbor update process in NN-descent. The distance between * two points are computed. If this new distance is less than any worst * distance of these two points, we update the neighborhood of that point. */ template int UpdateNeighbors( IdType* neighbors, FloatType* dists, const FloatType* points, bool* flags, IdType c1, IdType c2, IdType point_start, int64_t feature_size, int k) { IdType c1_local = c1 - point_start, c2_local = c2 - point_start; FloatType worst_c1_dist = dists[c1_local * k]; FloatType worst_c2_dist = dists[c2_local * k]; FloatType new_dist = EuclideanDistWithCheck( points + c1 * feature_size, points + c2 * feature_size, feature_size, std::max(worst_c1_dist, worst_c2_dist)); int num_updates = 0; if (new_dist < worst_c1_dist) { ++num_updates; #pragma omp critical { FlaggedHeapInsert( neighbors + c1 * k, dists + c1_local * k, flags + c1_local * k, c2, new_dist, true, k, true); } } if (new_dist < worst_c2_dist) { ++num_updates; #pragma omp critical { FlaggedHeapInsert( neighbors + c2 * k, dists + c2_local * k, flags + c2_local * k, c1, new_dist, true, k, true); } } return num_updates; } /** @brief The kd-tree implementation of K-Nearest Neighbors */ template void KdTreeKNN( const NDArray& data_points, const IdArray& data_offsets, const NDArray& query_points, const IdArray& query_offsets, const int k, IdArray result) { const int64_t batch_size = data_offsets->shape[0] - 1; const int64_t feature_size = data_points->shape[1]; const IdType* data_offsets_data = data_offsets.Ptr(); const IdType* query_offsets_data = query_offsets.Ptr(); const FloatType* query_points_data = query_points.Ptr(); IdType* query_out = result.Ptr(); IdType* data_out = query_out + k * query_points->shape[0]; for (int64_t b = 0; b < batch_size; ++b) { auto d_offset = data_offsets_data[b]; auto d_length = data_offsets_data[b + 1] - d_offset; auto q_offset = query_offsets_data[b]; auto q_length = query_offsets_data[b + 1] - q_offset; auto out_offset = k * q_offset; // create view for each segment const NDArray current_data_points = const_cast(&data_points) ->CreateView( {d_length, feature_size}, data_points->dtype, d_offset * feature_size * sizeof(FloatType)); const FloatType* current_query_pts_data = query_points_data + q_offset * feature_size; KDTreeNDArrayAdapter kdtree( feature_size, current_data_points); // query parallel_for(0, q_length, [&](IdType b, IdType e) { for (auto q = b; q < e; ++q) { std::vector out_buffer(k); std::vector out_dist_buffer(k); auto curr_out_offset = k * q + out_offset; const FloatType* q_point = current_query_pts_data + q * feature_size; size_t num_matches = kdtree.GetIndex()->knnSearch( q_point, k, out_buffer.data(), out_dist_buffer.data()); for (size_t i = 0; i < num_matches; ++i) { query_out[curr_out_offset] = q + q_offset; data_out[curr_out_offset] = out_buffer[i] + d_offset; curr_out_offset++; } } }); } } template void BruteForceKNN( const NDArray& data_points, const IdArray& data_offsets, const NDArray& query_points, const IdArray& query_offsets, const int k, IdArray result) { const int64_t batch_size = data_offsets->shape[0] - 1; const int64_t feature_size = data_points->shape[1]; const IdType* data_offsets_data = data_offsets.Ptr(); const IdType* query_offsets_data = query_offsets.Ptr(); const FloatType* data_points_data = data_points.Ptr(); const FloatType* query_points_data = query_points.Ptr(); IdType* query_out = result.Ptr(); IdType* data_out = query_out + k * query_points->shape[0]; for (int64_t b = 0; b < batch_size; ++b) { IdType d_start = data_offsets_data[b], d_end = data_offsets_data[b + 1]; IdType q_start = query_offsets_data[b], q_end = query_offsets_data[b + 1]; std::vector dist_buffer(k); parallel_for(q_start, q_end, [&](IdType b, IdType e) { for (auto q_idx = b; q_idx < e; ++q_idx) { std::vector dist_buffer(k); for (IdType k_idx = 0; k_idx < k; ++k_idx) { query_out[q_idx * k + k_idx] = q_idx; dist_buffer[k_idx] = std::numeric_limits::max(); } FloatType worst_dist = std::numeric_limits::max(); for (IdType d_idx = d_start; d_idx < d_end; ++d_idx) { FloatType tmp_dist = EuclideanDistWithCheck( query_points_data + q_idx * feature_size, data_points_data + d_idx * feature_size, feature_size, worst_dist); if (tmp_dist == std::numeric_limits::max()) { continue; } IdType out_offset = q_idx * k; HeapInsert( data_out + out_offset, dist_buffer.data(), d_idx, tmp_dist, k); worst_dist = dist_buffer[0]; } } }); } } } // namespace impl template void KNN( const NDArray& data_points, const IdArray& data_offsets, const NDArray& query_points, const IdArray& query_offsets, const int k, IdArray result, const std::string& algorithm) { if (algorithm == std::string("kd-tree")) { impl::KdTreeKNN( data_points, data_offsets, query_points, query_offsets, k, result); } else if (algorithm == std::string("bruteforce")) { impl::BruteForceKNN( data_points, data_offsets, query_points, query_offsets, k, result); } else { LOG(FATAL) << "Algorithm " << algorithm << " is not supported on CPU"; } } template void NNDescent( const NDArray& points, const IdArray& offsets, IdArray result, const int k, const int num_iters, const int num_candidates, const double delta) { using nnd_updates_t = std::vector>>; const auto& ctx = points->ctx; auto device = runtime::DeviceAPI::Get(ctx); const int64_t num_nodes = points->shape[0]; const int64_t batch_size = offsets->shape[0] - 1; const int64_t feature_size = points->shape[1]; const IdType* offsets_data = offsets.Ptr(); const FloatType* points_data = points.Ptr(); IdType* central_nodes = result.Ptr(); IdType* neighbors = central_nodes + k * num_nodes; int64_t max_segment_size = 0; // find max segment for (IdType b = 0; b < batch_size; ++b) { if (max_segment_size < offsets_data[b + 1] - offsets_data[b]) max_segment_size = offsets_data[b + 1] - offsets_data[b]; } // allocate memory for candidate, sampling pool, distance and flag IdType* new_candidates = static_cast(device->AllocWorkspace( ctx, max_segment_size * num_candidates * sizeof(IdType))); IdType* old_candidates = static_cast(device->AllocWorkspace( ctx, max_segment_size * num_candidates * sizeof(IdType))); FloatType* new_candidates_dists = static_cast(device->AllocWorkspace( ctx, max_segment_size * num_candidates * sizeof(FloatType))); FloatType* old_candidates_dists = static_cast(device->AllocWorkspace( ctx, max_segment_size * num_candidates * sizeof(FloatType))); FloatType* neighbors_dists = static_cast( device->AllocWorkspace(ctx, max_segment_size * k * sizeof(FloatType))); bool* flags = static_cast( device->AllocWorkspace(ctx, max_segment_size * k * sizeof(bool))); for (IdType b = 0; b < batch_size; ++b) { IdType point_idx_start = offsets_data[b], point_idx_end = offsets_data[b + 1]; IdType segment_size = point_idx_end - point_idx_start; // random initialization runtime::parallel_for( point_idx_start, point_idx_end, [&](size_t b, size_t e) { for (auto i = b; i < e; ++i) { IdType local_idx = i - point_idx_start; dgl::RandomEngine::ThreadLocal()->UniformChoice( k, segment_size, neighbors + i * k, false); for (IdType n = 0; n < k; ++n) { central_nodes[i * k + n] = i; neighbors[i * k + n] += point_idx_start; flags[local_idx * k + n] = true; neighbors_dists[local_idx * k + n] = impl::EuclideanDist( points_data + i * feature_size, points_data + neighbors[i * k + n] * feature_size, feature_size); } impl::BuildHeap( neighbors + i * k, neighbors_dists + local_idx * k, k); } }); size_t num_updates = 0; for (int iter = 0; iter < num_iters; ++iter) { num_updates = 0; // initialize candidates array as empty value runtime::parallel_for( point_idx_start, point_idx_end, [&](size_t b, size_t e) { for (auto i = b; i < e; ++i) { IdType local_idx = i - point_idx_start; for (IdType c = 0; c < num_candidates; ++c) { new_candidates[local_idx * num_candidates + c] = num_nodes; old_candidates[local_idx * num_candidates + c] = num_nodes; new_candidates_dists[local_idx * num_candidates + c] = std::numeric_limits::max(); old_candidates_dists[local_idx * num_candidates + c] = std::numeric_limits::max(); } } }); // randomly select neighbors as candidates int num_threads = omp_get_max_threads(); runtime::parallel_for(0, num_threads, [&](IdType b, IdType e) { for (auto tid = b; tid < e; ++tid) { for (IdType i = point_idx_start; i < point_idx_end; ++i) { IdType local_idx = i - point_idx_start; for (IdType n = 0; n < k; ++n) { IdType neighbor_idx = neighbors[i * k + n]; bool is_new = flags[local_idx * k + n]; IdType local_neighbor_idx = neighbor_idx - point_idx_start; FloatType random_dist = dgl::RandomEngine::ThreadLocal()->Uniform(); if (is_new) { if (local_idx % num_threads == tid) { impl::HeapInsert( new_candidates + local_idx * num_candidates, new_candidates_dists + local_idx * num_candidates, neighbor_idx, random_dist, num_candidates, true); } if (local_neighbor_idx % num_threads == tid) { impl::HeapInsert( new_candidates + local_neighbor_idx * num_candidates, new_candidates_dists + local_neighbor_idx * num_candidates, i, random_dist, num_candidates, true); } } else { if (local_idx % num_threads == tid) { impl::HeapInsert( old_candidates + local_idx * num_candidates, old_candidates_dists + local_idx * num_candidates, neighbor_idx, random_dist, num_candidates, true); } if (local_neighbor_idx % num_threads == tid) { impl::HeapInsert( old_candidates + local_neighbor_idx * num_candidates, old_candidates_dists + local_neighbor_idx * num_candidates, i, random_dist, num_candidates, true); } } } } } }); // mark all elements in new_candidates as false runtime::parallel_for( point_idx_start, point_idx_end, [&](size_t b, size_t e) { for (auto i = b; i < e; ++i) { IdType local_idx = i - point_idx_start; for (IdType n = 0; n < k; ++n) { IdType n_idx = neighbors[i * k + n]; for (IdType c = 0; c < num_candidates; ++c) { if (new_candidates[local_idx * num_candidates + c] == n_idx) { flags[local_idx * k + n] = false; break; } } } } }); // update neighbors block by block for (IdType block_start = point_idx_start; block_start < point_idx_end; block_start += impl::NN_DESCENT_BLOCK_SIZE) { IdType block_end = std::min(point_idx_end, block_start + impl::NN_DESCENT_BLOCK_SIZE); IdType block_size = block_end - block_start; nnd_updates_t updates(block_size); // generate updates runtime::parallel_for(block_start, block_end, [&](size_t b, size_t e) { for (auto i = b; i < e; ++i) { IdType local_idx = i - point_idx_start; for (IdType c1 = 0; c1 < num_candidates; ++c1) { IdType new_c1 = new_candidates[local_idx * num_candidates + c1]; if (new_c1 == num_nodes) continue; IdType c1_local = new_c1 - point_idx_start; // new-new for (IdType c2 = c1; c2 < num_candidates; ++c2) { IdType new_c2 = new_candidates[local_idx * num_candidates + c2]; if (new_c2 == num_nodes) continue; IdType c2_local = new_c2 - point_idx_start; FloatType worst_c1_dist = neighbors_dists[c1_local * k]; FloatType worst_c2_dist = neighbors_dists[c2_local * k]; FloatType new_dist = impl::EuclideanDistWithCheck( points_data + new_c1 * feature_size, points_data + new_c2 * feature_size, feature_size, std::max(worst_c1_dist, worst_c2_dist)); if (new_dist < worst_c1_dist || new_dist < worst_c2_dist) { updates[i - block_start].push_back( std::make_tuple(new_c1, new_c2, new_dist)); } } // new-old for (IdType c2 = 0; c2 < num_candidates; ++c2) { IdType old_c2 = old_candidates[local_idx * num_candidates + c2]; if (old_c2 == num_nodes) continue; IdType c2_local = old_c2 - point_idx_start; FloatType worst_c1_dist = neighbors_dists[c1_local * k]; FloatType worst_c2_dist = neighbors_dists[c2_local * k]; FloatType new_dist = impl::EuclideanDistWithCheck( points_data + new_c1 * feature_size, points_data + old_c2 * feature_size, feature_size, std::max(worst_c1_dist, worst_c2_dist)); if (new_dist < worst_c1_dist || new_dist < worst_c2_dist) { updates[i - block_start].push_back( std::make_tuple(new_c1, old_c2, new_dist)); } } } } }); int tid; #pragma omp parallel private(tid, num_threads) reduction(+ : num_updates) { tid = omp_get_thread_num(); num_threads = omp_get_num_threads(); for (IdType i = 0; i < block_size; ++i) { for (const auto& u : updates[i]) { IdType p1, p2; FloatType d; std::tie(p1, p2, d) = u; IdType p1_local = p1 - point_idx_start; IdType p2_local = p2 - point_idx_start; if (p1 % num_threads == tid) { num_updates += impl::FlaggedHeapInsert( neighbors + p1 * k, neighbors_dists + p1_local * k, flags + p1_local * k, p2, d, true, k, true); } if (p2 % num_threads == tid) { num_updates += impl::FlaggedHeapInsert( neighbors + p2 * k, neighbors_dists + p2_local * k, flags + p2_local * k, p1, d, true, k, true); } } } } } // early abort if (num_updates <= static_cast(delta * k * segment_size)) { break; } } } device->FreeWorkspace(ctx, new_candidates); device->FreeWorkspace(ctx, old_candidates); device->FreeWorkspace(ctx, new_candidates_dists); device->FreeWorkspace(ctx, old_candidates_dists); device->FreeWorkspace(ctx, neighbors_dists); device->FreeWorkspace(ctx, flags); } template void KNN( const NDArray& data_points, const IdArray& data_offsets, const NDArray& query_points, const IdArray& query_offsets, const int k, IdArray result, const std::string& algorithm); template void KNN( const NDArray& data_points, const IdArray& data_offsets, const NDArray& query_points, const IdArray& query_offsets, const int k, IdArray result, const std::string& algorithm); template void KNN( const NDArray& data_points, const IdArray& data_offsets, const NDArray& query_points, const IdArray& query_offsets, const int k, IdArray result, const std::string& algorithm); template void KNN( const NDArray& data_points, const IdArray& data_offsets, const NDArray& query_points, const IdArray& query_offsets, const int k, IdArray result, const std::string& algorithm); template void NNDescent( const NDArray& points, const IdArray& offsets, IdArray result, const int k, const int num_iters, const int num_candidates, const double delta); template void NNDescent( const NDArray& points, const IdArray& offsets, IdArray result, const int k, const int num_iters, const int num_candidates, const double delta); template void NNDescent( const NDArray& points, const IdArray& offsets, IdArray result, const int k, const int num_iters, const int num_candidates, const double delta); template void NNDescent( const NDArray& points, const IdArray& offsets, IdArray result, const int k, const int num_iters, const int num_candidates, const double delta); } // namespace transform } // namespace dgl ================================================ FILE: src/graph/transform/cuda/cuda_compact_graph.cu ================================================ /** * Copyright 2021 Contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file graph/transform/cuda/cuda_compact_graph.cu * @brief Functions to find and eliminate the common isolated nodes across * all given graphs with the same set of nodes. */ #include #include #include #include #include #include #include "../../../runtime/cuda/cuda_common.h" #include "../../heterograph.h" #include "../compact.h" #include "cuda_map_edges.cuh" using namespace dgl::aten; using namespace dgl::runtime::cuda; using namespace dgl::transform::cuda; namespace dgl { namespace transform { namespace { /** * @brief This function builds node maps for each node type, preserving the * order of the input nodes. Here it is assumed the nodes are not unique, * and thus a unique list is generated. * * @param input_nodes The set of input nodes. * @param node_maps The node maps to be constructed. * @param count_unique_device The number of unique nodes (on the GPU). * @param unique_nodes_device The unique nodes (on the GPU). * @param stream The stream to operate on. */ template void BuildNodeMaps( const std::vector &input_nodes, DeviceNodeMap *const node_maps, int64_t *const count_unique_device, std::vector *const unique_nodes_device, cudaStream_t stream) { const int64_t num_ntypes = static_cast(input_nodes.size()); CUDA_CALL(cudaMemsetAsync( count_unique_device, 0, num_ntypes * sizeof(*count_unique_device), stream)); // possibly duplicated nodes for (int64_t ntype = 0; ntype < num_ntypes; ++ntype) { const IdArray &nodes = input_nodes[ntype]; if (nodes->shape[0] > 0) { CHECK_EQ(nodes->ctx.device_type, kDGLCUDA); node_maps->LhsHashTable(ntype).FillWithDuplicates( nodes.Ptr(), nodes->shape[0], (*unique_nodes_device)[ntype].Ptr(), count_unique_device + ntype, stream); } } } template std::pair, std::vector> CompactGraphsGPU( const std::vector &graphs, const std::vector &always_preserve) { const auto &ctx = graphs[0]->Context(); auto device = runtime::DeviceAPI::Get(ctx); cudaStream_t stream = runtime::getCurrentCUDAStream(); CHECK_EQ(ctx.device_type, kDGLCUDA); // Step 1: Collect the nodes that has connections for each type. const uint64_t num_ntypes = graphs[0]->NumVertexTypes(); std::vector> all_edges( graphs.size()); // all_edges[i][etype] // count the number of nodes per type std::vector max_vertex_cnt(num_ntypes, 0); for (size_t i = 0; i < graphs.size(); ++i) { const HeteroGraphPtr curr_graph = graphs[i]; const int64_t num_etypes = curr_graph->NumEdgeTypes(); for (IdType etype = 0; etype < num_etypes; ++etype) { IdType srctype, dsttype; std::tie(srctype, dsttype) = curr_graph->GetEndpointTypes(etype); const int64_t n_edges = curr_graph->NumEdges(etype); max_vertex_cnt[srctype] += n_edges; max_vertex_cnt[dsttype] += n_edges; } } for (size_t i = 0; i < always_preserve.size(); ++i) { max_vertex_cnt[i] += always_preserve[i]->shape[0]; } // gather all nodes std::vector all_nodes(num_ntypes); std::vector node_offsets(num_ntypes, 0); for (uint64_t ntype = 0; ntype < num_ntypes; ++ntype) { all_nodes[ntype] = NewIdArray(max_vertex_cnt[ntype], ctx, sizeof(IdType) * 8); // copy the nodes in always_preserve if (ntype < always_preserve.size() && always_preserve[ntype]->shape[0] > 0) { device->CopyDataFromTo( always_preserve[ntype].Ptr(), 0, all_nodes[ntype].Ptr(), node_offsets[ntype], sizeof(IdType) * always_preserve[ntype]->shape[0], always_preserve[ntype]->ctx, all_nodes[ntype]->ctx, always_preserve[ntype]->dtype); node_offsets[ntype] += sizeof(IdType) * always_preserve[ntype]->shape[0]; } } for (size_t i = 0; i < graphs.size(); ++i) { const HeteroGraphPtr curr_graph = graphs[i]; const int64_t num_etypes = curr_graph->NumEdgeTypes(); all_edges[i].reserve(num_etypes); for (int64_t etype = 0; etype < num_etypes; ++etype) { dgl_type_t srctype, dsttype; std::tie(srctype, dsttype) = curr_graph->GetEndpointTypes(etype); const EdgeArray edges = curr_graph->Edges(etype, "eid"); if (edges.src.defined()) { device->CopyDataFromTo( edges.src.Ptr(), 0, all_nodes[srctype].Ptr(), node_offsets[srctype], sizeof(IdType) * edges.src->shape[0], edges.src->ctx, all_nodes[srctype]->ctx, edges.src->dtype); node_offsets[srctype] += sizeof(IdType) * edges.src->shape[0]; } if (edges.dst.defined()) { device->CopyDataFromTo( edges.dst.Ptr(), 0, all_nodes[dsttype].Ptr(), node_offsets[dsttype], sizeof(IdType) * edges.dst->shape[0], edges.dst->ctx, all_nodes[dsttype]->ctx, edges.dst->dtype); node_offsets[dsttype] += sizeof(IdType) * edges.dst->shape[0]; } all_edges[i].push_back(edges); } } // Step 2: Relabel the nodes for each type to a smaller ID space // using BuildNodeMaps // allocate space for map creation // the hashmap on GPU DeviceNodeMap node_maps(max_vertex_cnt, 0, ctx, stream); // number of unique nodes per type on CPU std::vector num_induced_nodes(num_ntypes); // number of unique nodes per type on GPU int64_t *count_unique_device = static_cast( device->AllocWorkspace(ctx, sizeof(int64_t) * num_ntypes)); // the set of unique nodes per type std::vector induced_nodes(num_ntypes); for (uint64_t ntype = 0; ntype < num_ntypes; ++ntype) { induced_nodes[ntype] = NewIdArray(max_vertex_cnt[ntype], ctx, sizeof(IdType) * 8); } BuildNodeMaps( all_nodes, &node_maps, count_unique_device, &induced_nodes, stream); device->CopyDataFromTo( count_unique_device, 0, num_induced_nodes.data(), 0, sizeof(*num_induced_nodes.data()) * num_ntypes, ctx, DGLContext{kDGLCPU, 0}, DGLDataType{kDGLInt, 64, 1}); device->StreamSync(ctx, stream); // wait for the node counts to finish transferring device->FreeWorkspace(ctx, count_unique_device); // resize induced nodes for (uint64_t ntype = 0; ntype < num_ntypes; ++ntype) { induced_nodes[ntype]->shape[0] = num_induced_nodes[ntype]; } // Step 3: Remap the edges of each graph using MapEdges std::vector new_graphs; for (size_t i = 0; i < graphs.size(); ++i) { const HeteroGraphPtr curr_graph = graphs[i]; const auto meta_graph = curr_graph->meta_graph(); const int64_t num_etypes = curr_graph->NumEdgeTypes(); std::vector rel_graphs; rel_graphs.reserve(num_etypes); std::vector new_src; std::vector new_dst; std::tie(new_src, new_dst) = MapEdges(curr_graph, all_edges[i], node_maps, stream); for (IdType etype = 0; etype < num_etypes; ++etype) { IdType srctype, dsttype; std::tie(srctype, dsttype) = curr_graph->GetEndpointTypes(etype); rel_graphs.push_back(UnitGraph::CreateFromCOO( srctype == dsttype ? 1 : 2, induced_nodes[srctype]->shape[0], induced_nodes[dsttype]->shape[0], new_src[etype], new_dst[etype])); } new_graphs.push_back( CreateHeteroGraph(meta_graph, rel_graphs, num_induced_nodes)); } return std::make_pair(new_graphs, induced_nodes); } } // namespace template <> std::pair, std::vector> CompactGraphs( const std::vector &graphs, const std::vector &always_preserve) { return CompactGraphsGPU(graphs, always_preserve); } template <> std::pair, std::vector> CompactGraphs( const std::vector &graphs, const std::vector &always_preserve) { return CompactGraphsGPU(graphs, always_preserve); } } // namespace transform } // namespace dgl ================================================ FILE: src/graph/transform/cuda/cuda_map_edges.cuh ================================================ /** * Copyright 2020-2022 Contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file graph/transform/cuda/cuda_map_edges.cuh * @brief Device level functions for mapping edges. */ #ifndef DGL_GRAPH_TRANSFORM_CUDA_CUDA_MAP_EDGES_CUH_ #define DGL_GRAPH_TRANSFORM_CUDA_CUDA_MAP_EDGES_CUH_ #include #include #include #include #include #include #include #include #include #include "../../../runtime/cuda/cuda_common.h" #include "../../../runtime/cuda/cuda_hashtable.cuh" using namespace dgl::aten; using namespace dgl::runtime::cuda; namespace dgl { namespace transform { namespace cuda { template __device__ void map_vertex_ids( const IdType* const global, IdType* const new_global, const IdType num_vertices, const DeviceOrderedHashTable& table) { assert(BLOCK_SIZE == blockDim.x); using Mapping = typename OrderedHashTable::Mapping; const IdType tile_start = TILE_SIZE * blockIdx.x; const IdType tile_end = min(TILE_SIZE * (blockIdx.x + 1), num_vertices); for (IdType idx = threadIdx.x + tile_start; idx < tile_end; idx += BLOCK_SIZE) { const Mapping& mapping = *table.Search(global[idx]); new_global[idx] = mapping.local; } } /** * @brief Generate mapped edge endpoint ids. * * @tparam IdType The type of id. * @tparam BLOCK_SIZE The size of each thread block. * @tparam TILE_SIZE The number of edges to process per thread block. * @param global_srcs_device The source ids to map. * @param new_global_srcs_device The mapped source ids (output). * @param global_dsts_device The destination ids to map. * @param new_global_dsts_device The mapped destination ids (output). * @param num_edges The number of edges to map. * @param src_mapping The mapping of sources ids. * @param src_hash_size The the size of source id hash table/mapping. * @param dst_mapping The mapping of destination ids. * @param dst_hash_size The the size of destination id hash table/mapping. */ template __global__ void map_edge_ids( const IdType* const global_srcs_device, IdType* const new_global_srcs_device, const IdType* const global_dsts_device, IdType* const new_global_dsts_device, const IdType num_edges, DeviceOrderedHashTable src_mapping, DeviceOrderedHashTable dst_mapping) { assert(BLOCK_SIZE == blockDim.x); assert(2 == gridDim.y); if (blockIdx.y == 0) { map_vertex_ids( global_srcs_device, new_global_srcs_device, num_edges, src_mapping); } else { map_vertex_ids( global_dsts_device, new_global_dsts_device, num_edges, dst_mapping); } } /** * @brief Device level node maps for each node type. * * @param num_nodes Number of nodes per type. * @param offset When offset is set to 0, LhsHashTable is identical to * RhsHashTable. Or set to num_nodes.size()/2 to use seperated * LhsHashTable and RhsHashTable. * @param ctx The DGL context. * @param stream The stream to operate on. */ template class DeviceNodeMap { public: using Mapping = typename OrderedHashTable::Mapping; DeviceNodeMap( const std::vector& num_nodes, const int64_t offset, DGLContext ctx, cudaStream_t stream) : num_types_(num_nodes.size()), rhs_offset_(offset), hash_tables_(), ctx_(ctx) { auto device = runtime::DeviceAPI::Get(ctx); hash_tables_.reserve(num_types_); for (int64_t i = 0; i < num_types_; ++i) { hash_tables_.emplace_back( new OrderedHashTable(num_nodes[i], ctx_, stream)); } } OrderedHashTable& LhsHashTable(const size_t index) { return HashData(index); } OrderedHashTable& RhsHashTable(const size_t index) { return HashData(index + rhs_offset_); } const OrderedHashTable& LhsHashTable(const size_t index) const { return HashData(index); } const OrderedHashTable& RhsHashTable(const size_t index) const { return HashData(index + rhs_offset_); } IdType LhsHashSize(const size_t index) const { return HashSize(index); } IdType RhsHashSize(const size_t index) const { return HashSize(rhs_offset_ + index); } size_t Size() const { return hash_tables_.size(); } private: int64_t num_types_; size_t rhs_offset_; std::vector>> hash_tables_; DGLContext ctx_; inline OrderedHashTable& HashData(const size_t index) { CHECK_LT(index, hash_tables_.size()); return *hash_tables_[index]; } inline const OrderedHashTable& HashData(const size_t index) const { CHECK_LT(index, hash_tables_.size()); return *hash_tables_[index]; } inline IdType HashSize(const size_t index) const { return HashData(index).size(); } }; template inline size_t RoundUpDiv(const IdType num, const size_t divisor) { return static_cast(num / divisor) + (num % divisor == 0 ? 0 : 1); } template inline IdType RoundUp(const IdType num, const size_t unit) { return RoundUpDiv(num, unit) * unit; } template std::tuple, std::vector> MapEdges( HeteroGraphPtr graph, const std::vector& edge_sets, const DeviceNodeMap& node_map, cudaStream_t stream) { constexpr const int BLOCK_SIZE = 128; constexpr const size_t TILE_SIZE = 1024; const auto& ctx = graph->Context(); std::vector new_lhs; new_lhs.reserve(edge_sets.size()); std::vector new_rhs; new_rhs.reserve(edge_sets.size()); // The next peformance optimization here, is to perform mapping of all edge // types in a single kernel launch. const int64_t num_edge_sets = static_cast(edge_sets.size()); for (int64_t etype = 0; etype < num_edge_sets; ++etype) { const EdgeArray& edges = edge_sets[etype]; if (edges.id.defined() && edges.src->shape[0] > 0) { const int64_t num_edges = edges.src->shape[0]; new_lhs.emplace_back(NewIdArray(num_edges, ctx, sizeof(IdType) * 8)); new_rhs.emplace_back(NewIdArray(num_edges, ctx, sizeof(IdType) * 8)); const auto src_dst_types = graph->GetEndpointTypes(etype); const int src_type = src_dst_types.first; const int dst_type = src_dst_types.second; const dim3 grid(RoundUpDiv(num_edges, TILE_SIZE), 2); const dim3 block(BLOCK_SIZE); // map the srcs CUDA_KERNEL_CALL( (map_edge_ids), grid, block, 0, stream, edges.src.Ptr(), new_lhs.back().Ptr(), edges.dst.Ptr(), new_rhs.back().Ptr(), num_edges, node_map.LhsHashTable(src_type).DeviceHandle(), node_map.RhsHashTable(dst_type).DeviceHandle()); } else { new_lhs.emplace_back( aten::NullArray(DGLDataType{kDGLInt, sizeof(IdType) * 8, 1}, ctx)); new_rhs.emplace_back( aten::NullArray(DGLDataType{kDGLInt, sizeof(IdType) * 8, 1}, ctx)); } } return std::tuple, std::vector>( std::move(new_lhs), std::move(new_rhs)); } } // namespace cuda } // namespace transform } // namespace dgl #endif // DGL_GRAPH_TRANSFORM_CUDA_CUDA_MAP_EDGES_CUH_ ================================================ FILE: src/graph/transform/cuda/cuda_to_block.cu ================================================ /** * Copyright 2020-2021 Contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file graph/transform/cuda/cuda_to_block.cu * @brief Functions to convert a set of edges into a graph block with local * ids. * * Tested via python wrapper: python/dgl/path/to/to_block.py */ #include #include #include #include #include #include #include #include "../../../runtime/cuda/cuda_common.h" #include "../../heterograph.h" #include "../to_block.h" #include "cuda_map_edges.cuh" using namespace dgl::aten; using namespace dgl::runtime::cuda; using namespace dgl::transform::cuda; using TensorDispatcher = dgl::runtime::TensorDispatcher; namespace dgl { namespace transform { namespace { template class DeviceNodeMapMaker { public: explicit DeviceNodeMapMaker(const std::vector& maxNodesPerType) : max_num_nodes_(0) { max_num_nodes_ = *std::max_element(maxNodesPerType.begin(), maxNodesPerType.end()); } /** * @brief This function builds node maps for each node type, preserving the * order of the input nodes. Here it is assumed the lhs_nodes are not unique, * and thus a unique list is generated. * * @param lhs_nodes The set of source input nodes. * @param rhs_nodes The set of destination input nodes. * @param node_maps The node maps to be constructed. * @param count_lhs_device The number of unique source nodes (on the GPU). * @param lhs_device The unique source nodes (on the GPU). * @param stream The stream to operate on. */ void Make( const std::vector& lhs_nodes, const std::vector& rhs_nodes, DeviceNodeMap* const node_maps, int64_t* const count_lhs_device, std::vector* const lhs_device, cudaStream_t stream) { const int64_t num_ntypes = lhs_nodes.size() + rhs_nodes.size(); CUDA_CALL(cudaMemsetAsync( count_lhs_device, 0, num_ntypes * sizeof(*count_lhs_device), stream)); // possibly dublicate lhs nodes const int64_t lhs_num_ntypes = static_cast(lhs_nodes.size()); for (int64_t ntype = 0; ntype < lhs_num_ntypes; ++ntype) { const IdArray& nodes = lhs_nodes[ntype]; if (nodes->shape[0] > 0) { CHECK_EQ(nodes->ctx.device_type, kDGLCUDA); node_maps->LhsHashTable(ntype).FillWithDuplicates( nodes.Ptr(), nodes->shape[0], (*lhs_device)[ntype].Ptr(), count_lhs_device + ntype, stream); } } // unique rhs nodes const int64_t rhs_num_ntypes = static_cast(rhs_nodes.size()); for (int64_t ntype = 0; ntype < rhs_num_ntypes; ++ntype) { const IdArray& nodes = rhs_nodes[ntype]; if (nodes->shape[0] > 0) { node_maps->RhsHashTable(ntype).FillWithUnique( nodes.Ptr(), nodes->shape[0], stream); } } } /** * @brief This function builds node maps for each node type, preserving the * order of the input nodes. Here it is assumed both lhs_nodes and rhs_nodes * are unique. * * @param lhs_nodes The set of source input nodes. * @param rhs_nodes The set of destination input nodes. * @param node_maps The node maps to be constructed. * @param stream The stream to operate on. */ void Make( const std::vector& lhs_nodes, const std::vector& rhs_nodes, DeviceNodeMap* const node_maps, cudaStream_t stream) { const int64_t num_ntypes = lhs_nodes.size() + rhs_nodes.size(); // unique lhs nodes const int64_t lhs_num_ntypes = static_cast(lhs_nodes.size()); for (int64_t ntype = 0; ntype < lhs_num_ntypes; ++ntype) { const IdArray& nodes = lhs_nodes[ntype]; if (nodes->shape[0] > 0) { CHECK_EQ(nodes->ctx.device_type, kDGLCUDA); node_maps->LhsHashTable(ntype).FillWithUnique( nodes.Ptr(), nodes->shape[0], stream); } } // unique rhs nodes const int64_t rhs_num_ntypes = static_cast(rhs_nodes.size()); for (int64_t ntype = 0; ntype < rhs_num_ntypes; ++ntype) { const IdArray& nodes = rhs_nodes[ntype]; if (nodes->shape[0] > 0) { node_maps->RhsHashTable(ntype).FillWithUnique( nodes.Ptr(), nodes->shape[0], stream); } } } private: IdType max_num_nodes_; }; template struct CUDAIdsMapper { std::tuple, std::vector> operator()( const HeteroGraphPtr& graph, bool include_rhs_in_lhs, int64_t num_ntypes, const DGLContext& ctx, const std::vector& maxNodesPerType, const std::vector& edge_arrays, const std::vector& src_nodes, const std::vector& rhs_nodes, std::vector* const lhs_nodes_ptr, std::vector* const num_nodes_per_type_ptr) { std::vector& lhs_nodes = *lhs_nodes_ptr; std::vector& num_nodes_per_type = *num_nodes_per_type_ptr; const bool generate_lhs_nodes = lhs_nodes.empty(); auto device = runtime::DeviceAPI::Get(ctx); cudaStream_t stream = runtime::getCurrentCUDAStream(); // Allocate space for map creation process. DeviceNodeMapMaker maker(maxNodesPerType); DeviceNodeMap node_maps(maxNodesPerType, num_ntypes, ctx, stream); if (generate_lhs_nodes) { lhs_nodes.reserve(num_ntypes); for (int64_t ntype = 0; ntype < num_ntypes; ++ntype) { lhs_nodes.emplace_back( NewIdArray(maxNodesPerType[ntype], ctx, sizeof(IdType) * 8)); } } cudaEvent_t copyEvent; NDArray new_len_tensor; // Populate the mappings. if (generate_lhs_nodes) { int64_t* count_lhs_device = static_cast( device->AllocWorkspace(ctx, sizeof(int64_t) * num_ntypes * 2)); maker.Make( src_nodes, rhs_nodes, &node_maps, count_lhs_device, &lhs_nodes, stream); CUDA_CALL(cudaEventCreate(©Event)); if (TensorDispatcher::Global()->IsAvailable()) { new_len_tensor = NDArray::PinnedEmpty( {num_ntypes}, DGLDataTypeTraits::dtype, DGLContext{kDGLCPU, 0}); } else { // use pageable memory, it will unecessarily block but be functional new_len_tensor = NDArray::Empty( {num_ntypes}, DGLDataTypeTraits::dtype, DGLContext{kDGLCPU, 0}); } CUDA_CALL(cudaMemcpyAsync( new_len_tensor->data, count_lhs_device, sizeof(*num_nodes_per_type.data()) * num_ntypes, cudaMemcpyDeviceToHost, stream)); CUDA_CALL(cudaEventRecord(copyEvent, stream)); device->FreeWorkspace(ctx, count_lhs_device); } else { maker.Make(lhs_nodes, rhs_nodes, &node_maps, stream); for (int64_t ntype = 0; ntype < num_ntypes; ++ntype) { num_nodes_per_type[ntype] = lhs_nodes[ntype]->shape[0]; } } // Map node numberings from global to local, and build pointer for CSR. auto ret = MapEdges(graph, edge_arrays, node_maps, stream); if (generate_lhs_nodes) { // wait for the previous copy CUDA_CALL(cudaEventSynchronize(copyEvent)); CUDA_CALL(cudaEventDestroy(copyEvent)); // Resize lhs nodes. for (int64_t ntype = 0; ntype < num_ntypes; ++ntype) { num_nodes_per_type[ntype] = static_cast(new_len_tensor->data)[ntype]; lhs_nodes[ntype]->shape[0] = num_nodes_per_type[ntype]; } } return ret; } }; template std::tuple> ToBlockGPU( HeteroGraphPtr graph, const std::vector& rhs_nodes, bool include_rhs_in_lhs, std::vector* const lhs_nodes_ptr) { return dgl::transform::ProcessToBlock( graph, rhs_nodes, include_rhs_in_lhs, lhs_nodes_ptr, CUDAIdsMapper()); } } // namespace // Use explicit names to get around MSVC's broken mangling that thinks the // following two functions are the same. Using template<> fails to export the // symbols. std::tuple> // ToBlock ToBlockGPU32( HeteroGraphPtr graph, const std::vector& rhs_nodes, bool include_rhs_in_lhs, std::vector* const lhs_nodes) { return ToBlockGPU(graph, rhs_nodes, include_rhs_in_lhs, lhs_nodes); } std::tuple> // ToBlock ToBlockGPU64( HeteroGraphPtr graph, const std::vector& rhs_nodes, bool include_rhs_in_lhs, std::vector* const lhs_nodes) { return ToBlockGPU(graph, rhs_nodes, include_rhs_in_lhs, lhs_nodes); } } // namespace transform } // namespace dgl ================================================ FILE: src/graph/transform/cuda/knn.cu ================================================ /** * Copyright (c) 2020 by Contributors * @file graph/transform/cuda/knn.cu * @brief k-nearest-neighbor (KNN) implementation (cuda) */ #include #include #include #include #include #include // NOLINT #include #include #include #include #include "../../../array/cuda/utils.h" #include "../../../runtime/cuda/cuda_common.h" #include "../knn.h" namespace dgl { namespace transform { namespace impl { /** * @brief Given input `size`, find the smallest value * greater or equal to `size` that is a multiple of `align`. * * e.g. Pow2Align(17, 4) = 20, Pow2Align(17, 8) = 24 */ template static __host__ __device__ std::enable_if_t::value, Type> Pow2Align(Type size, Type align) { if (align <= 1 || size <= 0) return size; return ((size - 1) | (align - 1)) + 1; } /** * @brief Utility class used to avoid linker errors with extern * unsized shared memory arrays with templated type */ template struct SharedMemory { __device__ inline operator Type*() { extern __shared__ int __smem[]; return reinterpret_cast(__smem); } __device__ inline operator const Type*() const { extern __shared__ int __smem[]; return reinterpret_cast(__smem); } }; // specialize for double to avoid unaligned memory // access compile errors template <> struct SharedMemory { __device__ inline operator double*() { extern __shared__ double __smem_d[]; return reinterpret_cast(__smem_d); } __device__ inline operator const double*() const { extern __shared__ double __smem_d[]; return reinterpret_cast(__smem_d); } }; /** @brief Compute Euclidean distance between two vectors in a cuda kernel */ template __device__ FloatType EuclideanDist(const FloatType* vec1, const FloatType* vec2, const int64_t dim) { FloatType dist = 0; IdType idx = 0; for (; idx < dim - 3; idx += 4) { FloatType diff0 = vec1[idx] - vec2[idx]; FloatType diff1 = vec1[idx + 1] - vec2[idx + 1]; FloatType diff2 = vec1[idx + 2] - vec2[idx + 2]; FloatType diff3 = vec1[idx + 3] - vec2[idx + 3]; dist += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3; } for (; idx < dim; ++idx) { FloatType diff = vec1[idx] - vec2[idx]; dist += diff * diff; } return dist; } /** * @brief Compute Euclidean distance between two vectors in a cuda kernel, * return positive infinite value if the intermediate distance is greater * than the worst distance. */ template __device__ FloatType EuclideanDistWithCheck( const FloatType* vec1, const FloatType* vec2, const int64_t dim, const FloatType worst_dist) { FloatType dist = 0; IdType idx = 0; bool early_stop = false; for (; idx < dim - 3; idx += 4) { FloatType diff0 = vec1[idx] - vec2[idx]; FloatType diff1 = vec1[idx + 1] - vec2[idx + 1]; FloatType diff2 = vec1[idx + 2] - vec2[idx + 2]; FloatType diff3 = vec1[idx + 3] - vec2[idx + 3]; dist += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3; if (dist > worst_dist) { early_stop = true; idx = dim; break; } } for (; idx < dim; ++idx) { FloatType diff = vec1[idx] - vec2[idx]; dist += diff * diff; if (dist > worst_dist) { early_stop = true; break; } } if (early_stop) { return std::numeric_limits::max(); } else { return dist; } } template __device__ void BuildHeap(IdType* indices, FloatType* dists, int size) { for (int i = size / 2 - 1; i >= 0; --i) { IdType idx = i; while (true) { IdType largest = idx; IdType left = idx * 2 + 1; IdType right = left + 1; if (left < size && dists[left] > dists[largest]) { largest = left; } if (right < size && dists[right] > dists[largest]) { largest = right; } if (largest != idx) { IdType tmp_idx = indices[largest]; indices[largest] = indices[idx]; indices[idx] = tmp_idx; FloatType tmp_dist = dists[largest]; dists[largest] = dists[idx]; dists[idx] = tmp_dist; idx = largest; } else { break; } } } } template __device__ void HeapInsert( IdType* indices, FloatType* dist, IdType new_idx, FloatType new_dist, int size, bool check_repeat = false) { if (new_dist > dist[0]) return; // check if we have it if (check_repeat) { for (IdType i = 0; i < size; ++i) { if (indices[i] == new_idx) return; } } IdType left = 0, right = 0, idx = 0, largest = 0; dist[0] = new_dist; indices[0] = new_idx; while (true) { left = idx * 2 + 1; right = left + 1; if (left < size && dist[left] > dist[largest]) { largest = left; } if (right < size && dist[right] > dist[largest]) { largest = right; } if (largest != idx) { IdType tmp_idx = indices[idx]; indices[idx] = indices[largest]; indices[largest] = tmp_idx; FloatType tmp_dist = dist[idx]; dist[idx] = dist[largest]; dist[largest] = tmp_dist; idx = largest; } else { break; } } } template __device__ bool FlaggedHeapInsert( IdType* indices, FloatType* dist, bool* flags, IdType new_idx, FloatType new_dist, bool new_flag, int size, bool check_repeat = false) { if (new_dist > dist[0]) return false; // check if we have it if (check_repeat) { for (IdType i = 0; i < size; ++i) { if (indices[i] == new_idx) return false; } } IdType left = 0, right = 0, idx = 0, largest = 0; dist[0] = new_dist; indices[0] = new_idx; flags[0] = new_flag; while (true) { left = idx * 2 + 1; right = left + 1; if (left < size && dist[left] > dist[largest]) { largest = left; } if (right < size && dist[right] > dist[largest]) { largest = right; } if (largest != idx) { IdType tmp_idx = indices[idx]; indices[idx] = indices[largest]; indices[largest] = tmp_idx; FloatType tmp_dist = dist[idx]; dist[idx] = dist[largest]; dist[largest] = tmp_dist; bool tmp_flag = flags[idx]; flags[idx] = flags[largest]; flags[largest] = tmp_flag; idx = largest; } else { break; } } return true; } /** * @brief Brute force kNN kernel. Compute distance for each pair of input points * and get the result directly (without a distance matrix). */ template __global__ void BruteforceKnnKernel( const FloatType* data_points, const IdType* data_offsets, const FloatType* query_points, const IdType* query_offsets, const int k, FloatType* dists, IdType* query_out, IdType* data_out, const int64_t num_batches, const int64_t feature_size) { const IdType q_idx = blockIdx.x * blockDim.x + threadIdx.x; if (q_idx >= query_offsets[num_batches]) return; IdType batch_idx = 0; for (IdType b = 0; b < num_batches + 1; ++b) { if (query_offsets[b] > q_idx) { batch_idx = b - 1; break; } } const IdType data_start = data_offsets[batch_idx], data_end = data_offsets[batch_idx + 1]; for (IdType k_idx = 0; k_idx < k; ++k_idx) { query_out[q_idx * k + k_idx] = q_idx; dists[q_idx * k + k_idx] = std::numeric_limits::max(); } FloatType worst_dist = std::numeric_limits::max(); for (IdType d_idx = data_start; d_idx < data_end; ++d_idx) { FloatType tmp_dist = EuclideanDistWithCheck( query_points + q_idx * feature_size, data_points + d_idx * feature_size, feature_size, worst_dist); IdType out_offset = q_idx * k; HeapInsert( data_out + out_offset, dists + out_offset, d_idx, tmp_dist, k); worst_dist = dists[q_idx * k]; } } /** * @brief Same as BruteforceKnnKernel, but use shared memory as buffer. * This kernel divides query points and data points into blocks. For each * query block, it will make a loop over all data blocks and compute distances. * This kernel is faster when the dimension of input points is not large. */ template __global__ void BruteforceKnnShareKernel( const FloatType* data_points, const IdType* data_offsets, const FloatType* query_points, const IdType* query_offsets, const IdType* block_batch_id, const IdType* local_block_id, const int k, FloatType* dists, IdType* query_out, IdType* data_out, const int64_t num_batches, const int64_t feature_size) { const IdType block_idx = static_cast(blockIdx.x); const IdType block_size = static_cast(blockDim.x); const IdType batch_idx = block_batch_id[block_idx]; const IdType local_bid = local_block_id[block_idx]; const IdType query_start = query_offsets[batch_idx] + block_size * local_bid; const IdType query_end = min(query_start + block_size, query_offsets[batch_idx + 1]); if (query_start >= query_end) return; const IdType query_idx = query_start + threadIdx.x; const IdType data_start = data_offsets[batch_idx]; const IdType data_end = data_offsets[batch_idx + 1]; // shared memory: points in block + distance buffer + result buffer FloatType* data_buff = SharedMemory(); FloatType* query_buff = data_buff + block_size * feature_size; FloatType* dist_buff = query_buff + block_size * feature_size; IdType* res_buff = reinterpret_cast(Pow2Align( reinterpret_cast(dist_buff + block_size * k), sizeof(IdType))); FloatType worst_dist = std::numeric_limits::max(); // initialize dist buff with inf value for (auto i = 0; i < k; ++i) { dist_buff[threadIdx.x + i * block_size] = std::numeric_limits::max(); } // load query data to shared memory // TODO(tianqi): could be better here to exploit coalesce global memory // access. if (query_idx < query_end) { for (auto i = 0; i < feature_size; ++i) { // to avoid bank conflict, we use transpose here query_buff[threadIdx.x + i * block_size] = query_points[query_idx * feature_size + i]; } } // perform computation on each tile for (auto tile_start = data_start; tile_start < data_end; tile_start += block_size) { // each thread load one data point into the shared memory IdType load_idx = tile_start + threadIdx.x; if (load_idx < data_end) { for (auto i = 0; i < feature_size; ++i) { data_buff[threadIdx.x * feature_size + i] = data_points[load_idx * feature_size + i]; } } __syncthreads(); // compute distance for one tile IdType true_block_size = min(data_end - tile_start, block_size); if (query_idx < query_end) { for (IdType d_idx = 0; d_idx < true_block_size; ++d_idx) { FloatType tmp_dist = 0; bool early_stop = false; IdType dim_idx = 0; for (; dim_idx < feature_size - 3; dim_idx += 4) { FloatType diff0 = query_buff[threadIdx.x + block_size * (dim_idx)] - data_buff[d_idx * feature_size + dim_idx]; FloatType diff1 = query_buff[threadIdx.x + block_size * (dim_idx + 1)] - data_buff[d_idx * feature_size + dim_idx + 1]; FloatType diff2 = query_buff[threadIdx.x + block_size * (dim_idx + 2)] - data_buff[d_idx * feature_size + dim_idx + 2]; FloatType diff3 = query_buff[threadIdx.x + block_size * (dim_idx + 3)] - data_buff[d_idx * feature_size + dim_idx + 3]; tmp_dist += diff0 * diff0 + diff1 * diff1 + diff2 * diff2 + diff3 * diff3; if (tmp_dist > worst_dist) { early_stop = true; dim_idx = feature_size; break; } } for (; dim_idx < feature_size; ++dim_idx) { const FloatType diff = query_buff[threadIdx.x + dim_idx * block_size] - data_buff[d_idx * feature_size + dim_idx]; tmp_dist += diff * diff; if (tmp_dist > worst_dist) { early_stop = true; break; } } if (early_stop) continue; HeapInsert( res_buff + threadIdx.x * k, dist_buff + threadIdx.x * k, d_idx + tile_start, tmp_dist, k); worst_dist = dist_buff[threadIdx.x * k]; } } __syncthreads(); } // copy result to global memory if (query_idx < query_end) { for (auto i = 0; i < k; ++i) { dists[query_idx * k + i] = dist_buff[threadIdx.x * k + i]; data_out[query_idx * k + i] = res_buff[threadIdx.x * k + i]; query_out[query_idx * k + i] = query_idx; } } } /** @brief determine the number of blocks for each segment */ template __global__ void GetNumBlockPerSegment( const IdType* offsets, IdType* out, const int64_t batch_size, const int64_t block_size) { const IdType idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < batch_size) { out[idx] = (offsets[idx + 1] - offsets[idx] - 1) / block_size + 1; } } /** @brief Get the batch index and local index in segment for each block */ template __global__ void GetBlockInfo( const IdType* num_block_prefixsum, IdType* block_batch_id, IdType* local_block_id, size_t batch_size, size_t num_blocks) { const IdType idx = blockIdx.x * blockDim.x + threadIdx.x; IdType i = 0; if (idx < num_blocks) { for (; i < batch_size; ++i) { if (num_block_prefixsum[i] > idx) break; } i--; block_batch_id[idx] = i; local_block_id[idx] = idx - num_block_prefixsum[i]; } } /** * @brief Brute force kNN. Compute distance for each pair of input points and * get the result directly (without a distance matrix). * * @tparam FloatType The type of input points. * @tparam IdType The type of id. * @param data_points NDArray of dataset points. * @param data_offsets offsets of point index in data points. * @param query_points NDArray of query points * @param query_offsets offsets of point index in query points. * @param k the number of nearest points * @param result output array */ template void BruteForceKNNCuda( const NDArray& data_points, const IdArray& data_offsets, const NDArray& query_points, const IdArray& query_offsets, const int k, IdArray result) { cudaStream_t stream = runtime::getCurrentCUDAStream(); const auto& ctx = data_points->ctx; auto device = runtime::DeviceAPI::Get(ctx); const int64_t batch_size = data_offsets->shape[0] - 1; const int64_t feature_size = data_points->shape[1]; const IdType* data_offsets_data = data_offsets.Ptr(); const IdType* query_offsets_data = query_offsets.Ptr(); const FloatType* data_points_data = data_points.Ptr(); const FloatType* query_points_data = query_points.Ptr(); IdType* query_out = result.Ptr(); IdType* data_out = query_out + k * query_points->shape[0]; FloatType* dists = static_cast(device->AllocWorkspace( ctx, k * query_points->shape[0] * sizeof(FloatType))); const int64_t block_size = cuda::FindNumThreads(query_points->shape[0]); const int64_t num_blocks = (query_points->shape[0] - 1) / block_size + 1; CUDA_KERNEL_CALL( BruteforceKnnKernel, num_blocks, block_size, 0, stream, data_points_data, data_offsets_data, query_points_data, query_offsets_data, k, dists, query_out, data_out, batch_size, feature_size); device->FreeWorkspace(ctx, dists); } /** * @brief Brute force kNN with shared memory. * This function divides query points and data points into blocks. For each * query block, it will make a loop over all data blocks and compute distances. * It will be faster when the dimension of input points is not large. * * @tparam FloatType The type of input points. * @tparam IdType The type of id. * @param data_points NDArray of dataset points. * @param data_offsets offsets of point index in data points. * @param query_points NDArray of query points * @param query_offsets offsets of point index in query points. * @param k the number of nearest points * @param result output array */ template void BruteForceKNNSharedCuda( const NDArray& data_points, const IdArray& data_offsets, const NDArray& query_points, const IdArray& query_offsets, const int k, IdArray result) { cudaStream_t stream = runtime::getCurrentCUDAStream(); const auto& ctx = data_points->ctx; auto device = runtime::DeviceAPI::Get(ctx); const int64_t batch_size = data_offsets->shape[0] - 1; const int64_t feature_size = data_points->shape[1]; const IdType* data_offsets_data = data_offsets.Ptr(); const IdType* query_offsets_data = query_offsets.Ptr(); const FloatType* data_points_data = data_points.Ptr(); const FloatType* query_points_data = query_points.Ptr(); IdType* query_out = result.Ptr(); IdType* data_out = query_out + k * query_points->shape[0]; constexpr size_t smem_align = std::max(sizeof(IdType), sizeof(FloatType)); // get max shared memory per block in bytes // determine block size according to this value int max_sharedmem_per_block = 0; CUDA_CALL(cudaDeviceGetAttribute( &max_sharedmem_per_block, cudaDevAttrMaxSharedMemoryPerBlock, ctx.device_id)); const int64_t single_shared_mem = static_cast(Pow2Align( (k + 2 * feature_size) * sizeof(FloatType) + k * sizeof(IdType), smem_align)); const int64_t block_size = cuda::FindNumThreads(max_sharedmem_per_block / single_shared_mem); // Determine the number of blocks. We first get the number of blocks for each // segment. Then we get the block id offset via prefix sum. IdType* num_block_per_segment = static_cast( device->AllocWorkspace(ctx, batch_size * sizeof(IdType))); IdType* num_block_prefixsum = static_cast( device->AllocWorkspace(ctx, batch_size * sizeof(IdType))); // block size for GetNumBlockPerSegment computation int64_t temp_block_size = cuda::FindNumThreads(batch_size); int64_t temp_num_blocks = (batch_size - 1) / temp_block_size + 1; CUDA_KERNEL_CALL( GetNumBlockPerSegment, temp_num_blocks, temp_block_size, 0, stream, query_offsets_data, num_block_per_segment, batch_size, block_size); size_t prefix_temp_size = 0; CUDA_CALL(cub::DeviceScan::ExclusiveSum( nullptr, prefix_temp_size, num_block_per_segment, num_block_prefixsum, batch_size, stream)); void* prefix_temp = device->AllocWorkspace(ctx, prefix_temp_size); CUDA_CALL(cub::DeviceScan::ExclusiveSum( prefix_temp, prefix_temp_size, num_block_per_segment, num_block_prefixsum, batch_size, stream)); device->FreeWorkspace(ctx, prefix_temp); // wait for results CUDA_CALL(cudaStreamSynchronize(stream)); int64_t num_blocks = 0, final_elem = 0, copyoffset = (batch_size - 1) * sizeof(IdType); device->CopyDataFromTo( num_block_prefixsum, copyoffset, &num_blocks, 0, sizeof(IdType), ctx, DGLContext{kDGLCPU, 0}, query_offsets->dtype); device->CopyDataFromTo( num_block_per_segment, copyoffset, &final_elem, 0, sizeof(IdType), ctx, DGLContext{kDGLCPU, 0}, query_offsets->dtype); num_blocks += final_elem; device->FreeWorkspace(ctx, num_block_per_segment); // get batch id and local id in segment temp_block_size = cuda::FindNumThreads(num_blocks); temp_num_blocks = (num_blocks - 1) / temp_block_size + 1; IdType* block_batch_id = static_cast( device->AllocWorkspace(ctx, num_blocks * sizeof(IdType))); IdType* local_block_id = static_cast( device->AllocWorkspace(ctx, num_blocks * sizeof(IdType))); CUDA_KERNEL_CALL( GetBlockInfo, temp_num_blocks, temp_block_size, 0, stream, num_block_prefixsum, block_batch_id, local_block_id, batch_size, num_blocks); FloatType* dists = static_cast(device->AllocWorkspace( ctx, k * query_points->shape[0] * sizeof(FloatType))); CUDA_KERNEL_CALL( BruteforceKnnShareKernel, num_blocks, block_size, single_shared_mem * block_size, stream, data_points_data, data_offsets_data, query_points_data, query_offsets_data, block_batch_id, local_block_id, k, dists, query_out, data_out, batch_size, feature_size); device->FreeWorkspace(ctx, num_block_prefixsum); device->FreeWorkspace(ctx, dists); device->FreeWorkspace(ctx, local_block_id); device->FreeWorkspace(ctx, block_batch_id); } /** @brief Setup rng state for nn-descent */ __global__ void SetupRngKernel( curandState* states, const uint64_t seed, const size_t n) { size_t id = blockIdx.x * blockDim.x + threadIdx.x; if (id < n) { curand_init(seed, id, 0, states + id); } } /** * @brief Randomly initialize neighbors (sampling without replacement) * for each nodes */ template __global__ void RandomInitNeighborsKernel( const FloatType* points, const IdType* offsets, IdType* central_nodes, IdType* neighbors, FloatType* dists, bool* flags, const int k, const int64_t feature_size, const int64_t batch_size, const uint64_t seed) { const IdType point_idx = blockIdx.x * blockDim.x + threadIdx.x; IdType batch_idx = 0; if (point_idx >= offsets[batch_size]) return; curandState state; curand_init(seed, point_idx, 0, &state); // find the segment location in the input batch for (IdType b = 0; b < batch_size + 1; ++b) { if (offsets[b] > point_idx) { batch_idx = b - 1; break; } } const IdType segment_size = offsets[batch_idx + 1] - offsets[batch_idx]; IdType* current_neighbors = neighbors + point_idx * k; IdType* current_central_nodes = central_nodes + point_idx * k; bool* current_flags = flags + point_idx * k; FloatType* current_dists = dists + point_idx * k; IdType segment_start = offsets[batch_idx]; // reservoir sampling for (IdType i = 0; i < k; ++i) { current_neighbors[i] = i + segment_start; current_central_nodes[i] = point_idx; } for (IdType i = k; i < segment_size; ++i) { const IdType j = static_cast(curand(&state) % (i + 1)); if (j < k) current_neighbors[j] = i + segment_start; } // compute distances and set flags for (IdType i = 0; i < k; ++i) { current_flags[i] = true; current_dists[i] = EuclideanDist( points + point_idx * feature_size, points + current_neighbors[i] * feature_size, feature_size); } // build heap BuildHeap(neighbors + point_idx * k, current_dists, k); } /** * @brief Randomly select candidates from current knn and reverse-knn graph for * nn-descent. */ template __global__ void FindCandidatesKernel( const IdType* offsets, IdType* new_candidates, IdType* old_candidates, IdType* neighbors, bool* flags, const uint64_t seed, const int64_t batch_size, const int num_candidates, const int k) { const IdType point_idx = blockIdx.x * blockDim.x + threadIdx.x; IdType batch_idx = 0; if (point_idx >= offsets[batch_size]) return; curandState state; curand_init(seed, point_idx, 0, &state); // find the segment location in the input batch for (IdType b = 0; b < batch_size + 1; ++b) { if (offsets[b] > point_idx) { batch_idx = b - 1; break; } } IdType segment_start = offsets[batch_idx], segment_end = offsets[batch_idx + 1]; IdType* current_neighbors = neighbors + point_idx * k; bool* current_flags = flags + point_idx * k; // reset candidates IdType* new_candidates_ptr = new_candidates + point_idx * (num_candidates + 1); IdType* old_candidates_ptr = old_candidates + point_idx * (num_candidates + 1); new_candidates_ptr[0] = 0; old_candidates_ptr[0] = 0; // select candidates from current knn graph // here we use candidate[0] for reservoir sampling temporarily for (IdType i = 0; i < k; ++i) { IdType candidate = current_neighbors[i]; IdType* candidate_array = current_flags[i] ? new_candidates_ptr : old_candidates_ptr; IdType curr_num = candidate_array[0]; IdType* candidate_data = candidate_array + 1; // reservoir sampling if (curr_num < num_candidates) { candidate_data[curr_num] = candidate; } else { IdType pos = static_cast(curand(&state) % (curr_num + 1)); if (pos < num_candidates) candidate_data[pos] = candidate; } ++candidate_array[0]; } // select candidates from current reverse knn graph // here we use candidate[0] for reservoir sampling temporarily IdType index_start = segment_start * k, index_end = segment_end * k; for (IdType i = index_start; i < index_end; ++i) { if (neighbors[i] == point_idx) { IdType reverse_candidate = (i - index_start) / k + segment_start; IdType* candidate_array = flags[i] ? new_candidates_ptr : old_candidates_ptr; IdType curr_num = candidate_array[0]; IdType* candidate_data = candidate_array + 1; // reservoir sampling if (curr_num < num_candidates) { candidate_data[curr_num] = reverse_candidate; } else { IdType pos = static_cast(curand(&state) % (curr_num + 1)); if (pos < num_candidates) candidate_data[pos] = reverse_candidate; } ++candidate_array[0]; } } // set candidate[0] back to length if (new_candidates_ptr[0] > num_candidates) new_candidates_ptr[0] = num_candidates; if (old_candidates_ptr[0] > num_candidates) old_candidates_ptr[0] = num_candidates; // mark new_candidates as old IdType num_new_candidates = new_candidates_ptr[0]; for (IdType i = 0; i < k; ++i) { IdType neighbor_idx = current_neighbors[i]; if (current_flags[i]) { for (IdType j = 1; j < num_new_candidates + 1; ++j) { if (new_candidates_ptr[j] == neighbor_idx) { current_flags[i] = false; break; } } } } } /** @brief Update knn graph according to selected candidates for nn-descent */ template __global__ void UpdateNeighborsKernel( const FloatType* points, const IdType* offsets, IdType* neighbors, IdType* new_candidates, IdType* old_candidates, FloatType* distances, bool* flags, IdType* num_updates, const int64_t batch_size, const int num_candidates, const int k, const int64_t feature_size) { const IdType point_idx = blockIdx.x * blockDim.x + threadIdx.x; if (point_idx >= offsets[batch_size]) return; IdType* current_neighbors = neighbors + point_idx * k; bool* current_flags = flags + point_idx * k; FloatType* current_dists = distances + point_idx * k; IdType* new_candidates_ptr = new_candidates + point_idx * (num_candidates + 1); IdType* old_candidates_ptr = old_candidates + point_idx * (num_candidates + 1); IdType num_new_candidates = new_candidates_ptr[0]; IdType num_old_candidates = old_candidates_ptr[0]; IdType current_num_updates = 0; // process new candidates for (IdType i = 1; i <= num_new_candidates; ++i) { IdType new_c = new_candidates_ptr[i]; // new/old candidates of the current new candidate IdType* twohop_new_ptr = new_candidates + new_c * (num_candidates + 1); IdType* twohop_old_ptr = old_candidates + new_c * (num_candidates + 1); IdType num_twohop_new = twohop_new_ptr[0]; IdType num_twohop_old = twohop_old_ptr[0]; FloatType worst_dist = current_dists[0]; // new - new for (IdType j = 1; j <= num_twohop_new; ++j) { IdType twohop_new_c = twohop_new_ptr[j]; FloatType new_dist = EuclideanDistWithCheck( points + point_idx * feature_size, points + twohop_new_c * feature_size, feature_size, worst_dist); if (FlaggedHeapInsert( current_neighbors, current_dists, current_flags, twohop_new_c, new_dist, true, k, true)) { ++current_num_updates; worst_dist = current_dists[0]; } } // new - old for (IdType j = 1; j <= num_twohop_old; ++j) { IdType twohop_old_c = twohop_old_ptr[j]; FloatType new_dist = EuclideanDistWithCheck( points + point_idx * feature_size, points + twohop_old_c * feature_size, feature_size, worst_dist); if (FlaggedHeapInsert( current_neighbors, current_dists, current_flags, twohop_old_c, new_dist, true, k, true)) { ++current_num_updates; worst_dist = current_dists[0]; } } } // process old candidates for (IdType i = 1; i <= num_old_candidates; ++i) { IdType old_c = old_candidates_ptr[i]; // new candidates of the current old candidate IdType* twohop_new_ptr = new_candidates + old_c * (num_candidates + 1); IdType num_twohop_new = twohop_new_ptr[0]; FloatType worst_dist = current_dists[0]; // old - new for (IdType j = 1; j <= num_twohop_new; ++j) { IdType twohop_new_c = twohop_new_ptr[j]; FloatType new_dist = EuclideanDistWithCheck( points + point_idx * feature_size, points + twohop_new_c * feature_size, feature_size, worst_dist); if (FlaggedHeapInsert( current_neighbors, current_dists, current_flags, twohop_new_c, new_dist, true, k, true)) { ++current_num_updates; worst_dist = current_dists[0]; } } } num_updates[point_idx] = current_num_updates; } } // namespace impl template void KNN( const NDArray& data_points, const IdArray& data_offsets, const NDArray& query_points, const IdArray& query_offsets, const int k, IdArray result, const std::string& algorithm) { if (algorithm == std::string("bruteforce")) { impl::BruteForceKNNCuda( data_points, data_offsets, query_points, query_offsets, k, result); } else if (algorithm == std::string("bruteforce-sharemem")) { impl::BruteForceKNNSharedCuda( data_points, data_offsets, query_points, query_offsets, k, result); } else { LOG(FATAL) << "Algorithm " << algorithm << " is not supported on CUDA."; } } template void NNDescent( const NDArray& points, const IdArray& offsets, IdArray result, const int k, const int num_iters, const int num_candidates, const double delta) { cudaStream_t stream = runtime::getCurrentCUDAStream(); const auto& ctx = points->ctx; auto device = runtime::DeviceAPI::Get(ctx); const int64_t num_nodes = points->shape[0]; const int64_t feature_size = points->shape[1]; const int64_t batch_size = offsets->shape[0] - 1; const IdType* offsets_data = offsets.Ptr(); const FloatType* points_data = points.Ptr(); IdType* central_nodes = result.Ptr(); IdType* neighbors = central_nodes + k * num_nodes; uint64_t seed; int warp_size = 0; CUDA_CALL( cudaDeviceGetAttribute(&warp_size, cudaDevAttrWarpSize, ctx.device_id)); // We don't need large block sizes, since there's not much inter-thread // communication int64_t block_size = warp_size; int64_t num_blocks = (num_nodes - 1) / block_size + 1; // allocate space for candidates, distances and flags // we use the first element in candidate array to represent length IdType* new_candidates = static_cast(device->AllocWorkspace( ctx, num_nodes * (num_candidates + 1) * sizeof(IdType))); IdType* old_candidates = static_cast(device->AllocWorkspace( ctx, num_nodes * (num_candidates + 1) * sizeof(IdType))); IdType* num_updates = static_cast( device->AllocWorkspace(ctx, num_nodes * sizeof(IdType))); FloatType* distances = static_cast( device->AllocWorkspace(ctx, num_nodes * k * sizeof(IdType))); bool* flags = static_cast( device->AllocWorkspace(ctx, num_nodes * k * sizeof(IdType))); size_t sum_temp_size = 0; IdType total_num_updates = 0; IdType* total_num_updates_d = static_cast(device->AllocWorkspace(ctx, sizeof(IdType))); CUDA_CALL(cub::DeviceReduce::Sum( nullptr, sum_temp_size, num_updates, total_num_updates_d, num_nodes, stream)); IdType* sum_temp_storage = static_cast(device->AllocWorkspace(ctx, sum_temp_size)); // random initialize neighbors seed = RandomEngine::ThreadLocal()->RandInt( std::numeric_limits::max()); CUDA_KERNEL_CALL( impl::RandomInitNeighborsKernel, num_blocks, block_size, 0, stream, points_data, offsets_data, central_nodes, neighbors, distances, flags, k, feature_size, batch_size, seed); for (int i = 0; i < num_iters; ++i) { // select candidates seed = RandomEngine::ThreadLocal()->RandInt( std::numeric_limits::max()); CUDA_KERNEL_CALL( impl::FindCandidatesKernel, num_blocks, block_size, 0, stream, offsets_data, new_candidates, old_candidates, neighbors, flags, seed, batch_size, num_candidates, k); // update CUDA_KERNEL_CALL( impl::UpdateNeighborsKernel, num_blocks, block_size, 0, stream, points_data, offsets_data, neighbors, new_candidates, old_candidates, distances, flags, num_updates, batch_size, num_candidates, k, feature_size); total_num_updates = 0; CUDA_CALL(cub::DeviceReduce::Sum( sum_temp_storage, sum_temp_size, num_updates, total_num_updates_d, num_nodes, stream)); device->CopyDataFromTo( total_num_updates_d, 0, &total_num_updates, 0, sizeof(IdType), ctx, DGLContext{kDGLCPU, 0}, offsets->dtype); if (total_num_updates <= static_cast(delta * k * num_nodes)) { break; } } device->FreeWorkspace(ctx, new_candidates); device->FreeWorkspace(ctx, old_candidates); device->FreeWorkspace(ctx, num_updates); device->FreeWorkspace(ctx, distances); device->FreeWorkspace(ctx, flags); device->FreeWorkspace(ctx, total_num_updates_d); device->FreeWorkspace(ctx, sum_temp_storage); } template void KNN( const NDArray& data_points, const IdArray& data_offsets, const NDArray& query_points, const IdArray& query_offsets, const int k, IdArray result, const std::string& algorithm); template void KNN( const NDArray& data_points, const IdArray& data_offsets, const NDArray& query_points, const IdArray& query_offsets, const int k, IdArray result, const std::string& algorithm); template void KNN( const NDArray& data_points, const IdArray& data_offsets, const NDArray& query_points, const IdArray& query_offsets, const int k, IdArray result, const std::string& algorithm); template void KNN( const NDArray& data_points, const IdArray& data_offsets, const NDArray& query_points, const IdArray& query_offsets, const int k, IdArray result, const std::string& algorithm); template void NNDescent( const NDArray& points, const IdArray& offsets, IdArray result, const int k, const int num_iters, const int num_candidates, const double delta); template void NNDescent( const NDArray& points, const IdArray& offsets, IdArray result, const int k, const int num_iters, const int num_candidates, const double delta); template void NNDescent( const NDArray& points, const IdArray& offsets, IdArray result, const int k, const int num_iters, const int num_candidates, const double delta); template void NNDescent( const NDArray& points, const IdArray& offsets, IdArray result, const int k, const int num_iters, const int num_candidates, const double delta); } // namespace transform } // namespace dgl ================================================ FILE: src/graph/transform/knn.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file graph/transform/knn.cc * @brief k-nearest-neighbor (KNN) interface */ #include "knn.h" #include #include #include "../../array/check.h" using namespace dgl::runtime; namespace dgl { namespace transform { DGL_REGISTER_GLOBAL("transform._CAPI_DGLKNN") .set_body([](DGLArgs args, DGLRetValue* rv) { const NDArray data_points = args[0]; const IdArray data_offsets = args[1]; const NDArray query_points = args[2]; const IdArray query_offsets = args[3]; const int k = args[4]; IdArray result = args[5]; const std::string algorithm = args[6]; aten::CheckContiguous( {data_points, data_offsets, query_points, query_offsets, result}, {"data_points", "data_offsets", "query_points", "query_offsets", "result"}); aten::CheckCtx( data_points->ctx, {data_offsets, query_points, query_offsets, result}, {"data_offsets", "query_points", "query_offsets", "result"}); ATEN_XPU_SWITCH_CUDA(data_points->ctx.device_type, XPU, "KNN", { ATEN_FLOAT_TYPE_SWITCH(data_points->dtype, FloatType, "data_points", { ATEN_ID_TYPE_SWITCH(result->dtype, IdType, { KNN( data_points, data_offsets, query_points, query_offsets, k, result, algorithm); }); }); }); }); DGL_REGISTER_GLOBAL("transform._CAPI_DGLNNDescent") .set_body([](DGLArgs args, DGLRetValue* rv) { const NDArray points = args[0]; const IdArray offsets = args[1]; const IdArray result = args[2]; const int k = args[3]; const int num_iters = args[4]; const int num_candidates = args[5]; const double delta = args[6]; aten::CheckContiguous( {points, offsets, result}, {"points", "offsets", "result"}); aten::CheckCtx( points->ctx, {points, offsets, result}, {"points", "offsets", "result"}); ATEN_XPU_SWITCH_CUDA(points->ctx.device_type, XPU, "NNDescent", { ATEN_FLOAT_TYPE_SWITCH(points->dtype, FloatType, "points", { ATEN_ID_TYPE_SWITCH(result->dtype, IdType, { NNDescent( points, offsets, result, k, num_iters, num_candidates, delta); }); }); }); }); } // namespace transform } // namespace dgl ================================================ FILE: src/graph/transform/knn.h ================================================ /** * Copyright (c) 2021 by Contributors * @file graph/transform/knn.h * @brief k-nearest-neighbor (KNN) implementation */ #ifndef DGL_GRAPH_TRANSFORM_KNN_H_ #define DGL_GRAPH_TRANSFORM_KNN_H_ #include #include namespace dgl { namespace transform { /** * @brief For each point in each segment in \a query_points, find \a k nearest * points in the same segment in \a data_points. \a data_offsets and \a * query_offsets determine the start index of each segment in \a * data_points and \a query_points. * * @param data_points dataset points. * @param data_offsets offsets of point index in \a data_points. * @param query_points query points. * @param query_offsets offsets of point index in \a query_points. * @param k the number of nearest points. * @param result output array. A 2D tensor indicating the index relation * between \a query_points and \a data_points. * @param algorithm algorithm used to compute the k-nearest neighbors. */ template void KNN( const NDArray& data_points, const IdArray& data_offsets, const NDArray& query_points, const IdArray& query_offsets, const int k, IdArray result, const std::string& algorithm); /** * @brief For each input point, find \a k approximate nearest points in the same * segment using NN-descent algorithm. * * @param points input points. * @param offsets offsets of point index. * @param result output array. A 2D tensor indicating the index relation between * points. * @param k the number of nearest points. * @param num_iters The maximum number of NN-descent iterations to perform. * @param num_candidates The maximum number of candidates to be considered * during one iteration. * @param delta A value controls the early abort. */ template void NNDescent( const NDArray& points, const IdArray& offsets, IdArray result, const int k, const int num_iters, const int num_candidates, const double delta); } // namespace transform } // namespace dgl #endif // DGL_GRAPH_TRANSFORM_KNN_H_ ================================================ FILE: src/graph/transform/line_graph.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file graph/transform/line_graph.cc * @brief Line graph implementation */ #include #include #include #include #include #include #include "../../c_api_common.h" #include "../heterograph.h" namespace dgl { using namespace dgl::runtime; using namespace dgl::aten; namespace transform { /** * @brief Create Line Graph. * @param hg Graph. * @param backtracking whether the pair of (v, u) (u, v) edges are treated as * linked. * @return The Line Graph. */ HeteroGraphPtr CreateLineGraph(HeteroGraphPtr hg, bool backtracking) { const auto hgp = std::dynamic_pointer_cast(hg); return hgp->LineGraph(backtracking); } DGL_REGISTER_GLOBAL("transform._CAPI_DGLHeteroLineGraph") .set_body([](DGLArgs args, DGLRetValue* rv) { HeteroGraphRef hg = args[0]; bool backtracking = args[1]; auto hgptr = CreateLineGraph(hg.sptr(), backtracking); *rv = HeteroGraphRef(hgptr); }); }; // namespace transform }; // namespace dgl ================================================ FILE: src/graph/transform/metis_partition_hetero.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file graph/metis_partition.cc * @brief Call Metis partitioning */ #include #include #include #include "../heterograph.h" #include "../unit_graph.h" using namespace dgl::runtime; namespace dgl { namespace transform { #if !defined(_WIN32) IdArray MetisPartition( UnitGraphPtr g, int k, NDArray vwgt_arr, const std::string &mode, bool obj_cut) { // Mode can only be "k-way" or "recursive" CHECK(mode == "k-way" || mode == "recursive") << "mode can only be \"k-way\" or \"recursive\""; // The index type of Metis needs to be compatible with DGL index type. CHECK_EQ(sizeof(idx_t), sizeof(int64_t)) << "Metis only supports int64 graph for now"; // This is a symmetric graph, so in-csr and out-csr are the same. const auto mat = g->GetCSCMatrix(0); // const auto mat = g->GetInCSR()->ToCSRMatrix(); idx_t nvtxs = g->NumVertices(0); idx_t ncon = 1; // # balacing constraints. idx_t *xadj = static_cast(mat.indptr->data); idx_t *adjncy = static_cast(mat.indices->data); idx_t nparts = k; IdArray part_arr = aten::NewIdArray(nvtxs); idx_t objval = 0; idx_t *part = static_cast(part_arr->data); int64_t vwgt_len = vwgt_arr->shape[0]; CHECK_EQ(sizeof(idx_t), vwgt_arr->dtype.bits / 8) << "The vertex weight array doesn't have right type"; CHECK(vwgt_len % g->NumVertices(0) == 0) << "The vertex weight array doesn't have right number of elements"; idx_t *vwgt = NULL; if (vwgt_len > 0) { ncon = vwgt_len / g->NumVertices(0); vwgt = static_cast(vwgt_arr->data); } auto partition_func = (mode == "k-way") ? METIS_PartGraphKway : METIS_PartGraphRecursive; idx_t options[METIS_NOPTIONS]; METIS_SetDefaultOptions(options); options[METIS_OPTION_ONDISK] = 1; options[METIS_OPTION_NITER] = 1; options[METIS_OPTION_NIPARTS] = 1; options[METIS_OPTION_DROPEDGES] = 1; if (obj_cut) { options[METIS_OPTION_OBJTYPE] = METIS_OBJTYPE_CUT; } else { options[METIS_OPTION_OBJTYPE] = METIS_OBJTYPE_VOL; } int ret = partition_func( &nvtxs, // The number of vertices &ncon, // The number of balancing constraints. xadj, // indptr adjncy, // indices vwgt, // the weights of the vertices NULL, // The size of the vertices for computing // the total communication volume NULL, // The weights of the edges &nparts, // The number of partitions. NULL, // the desired weight for each partition and constraint NULL, // the allowed load imbalance tolerance options, // the array of options &objval, // the edge-cut or the total communication volume of // the partitioning solution part); if (obj_cut) { LOG(INFO) << "Partition a graph with " << g->NumVertices(0) << " nodes and " << g->NumEdges(0) << " edges into " << k << " parts and " << "get " << objval << " edge cuts"; } else { LOG(INFO) << "Partition a graph with " << g->NumVertices(0) << " nodes and " << g->NumEdges(0) << " edges into " << k << " parts and " << "the communication volume is " << objval; } switch (ret) { case METIS_OK: return part_arr; case METIS_ERROR_INPUT: LOG(FATAL) << "Error in Metis partitioning: input error"; case METIS_ERROR_MEMORY: LOG(FATAL) << "Error in Metis partitioning: cannot allocate memory"; default: LOG(FATAL) << "Error in Metis partitioning: other errors"; } // return an array of 0 elements to indicate the error. return aten::NullArray(); } #endif // !defined(_WIN32) DGL_REGISTER_GLOBAL("partition._CAPI_DGLMetisPartition_Hetero") .set_body([](DGLArgs args, DGLRetValue *rv) { HeteroGraphRef g = args[0]; auto hgptr = std::dynamic_pointer_cast(g.sptr()); CHECK(hgptr) << "Invalid HeteroGraph object"; CHECK_EQ(hgptr->relation_graphs().size(), 1) << "Metis partition only supports HomoGraph"; auto ugptr = hgptr->relation_graphs()[0]; int k = args[1]; NDArray vwgt = args[2]; std::string mode = args[3]; bool obj_cut = args[4]; #if !defined(_WIN32) *rv = MetisPartition(ugptr, k, vwgt, mode, obj_cut); #else LOG(FATAL) << "Metis partition does not support Windows."; #endif // !defined(_WIN32) }); } // namespace transform } // namespace dgl ================================================ FILE: src/graph/transform/partition_hetero.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file graph/metis_partition.cc * @brief Call Metis partitioning */ #include #include #include #include "../heterograph.h" #include "../unit_graph.h" #if !defined(_WIN32) #include #endif // !defined(_WIN32) using namespace dgl::runtime; namespace dgl { #if !defined(_WIN32) gk_csr_t *Convert2GKCsr(const aten::CSRMatrix mat, bool is_row); aten::CSRMatrix Convert2DGLCsr(gk_csr_t *gk_csr, bool is_row); #endif // !defined(_WIN32) namespace transform { class HaloHeteroSubgraph : public HeteroSubgraph { public: std::vector inner_nodes; }; HeteroGraphPtr ReorderUnitGraph(UnitGraphPtr ug, IdArray new_order) { auto format = ug->GetCreatedFormats(); // We only need to reorder one of the graph structure. if (format & CSC_CODE) { auto cscmat = ug->GetCSCMatrix(0); auto new_cscmat = aten::CSRReorder(cscmat, new_order, new_order); return UnitGraph::CreateFromCSC( ug->NumVertexTypes(), new_cscmat, ug->GetAllowedFormats()); } else if (format & CSR_CODE) { auto csrmat = ug->GetCSRMatrix(0); auto new_csrmat = aten::CSRReorder(csrmat, new_order, new_order); return UnitGraph::CreateFromCSR( ug->NumVertexTypes(), new_csrmat, ug->GetAllowedFormats()); } else { auto coomat = ug->GetCOOMatrix(0); auto new_coomat = aten::COOReorder(coomat, new_order, new_order); return UnitGraph::CreateFromCOO( ug->NumVertexTypes(), new_coomat, ug->GetAllowedFormats()); } } HaloHeteroSubgraph GetSubgraphWithHalo( std::shared_ptr hg, IdArray nodes, int num_hops) { CHECK_EQ(hg->NumBits(), 64) << "halo subgraph only supports 64bits graph"; CHECK_EQ(hg->relation_graphs().size(), 1) << "halo subgraph only supports homogeneous graph"; CHECK_EQ(nodes->dtype.bits, 64) << "halo subgraph only supports 64bits nodes tensor"; const dgl_id_t *nid = static_cast(nodes->data); const auto id_len = nodes->shape[0]; // A map contains all nodes in the subgraph. // The key is the old node Ids, the value indicates whether a node is a inner // node. std::unordered_map all_nodes; // The old Ids of all nodes. We want to preserve the order of the nodes in the // vector. The first few nodes are the inner nodes in the subgraph. std::vector old_node_ids(nid, nid + id_len); std::vector> outer_nodes(num_hops); for (int64_t i = 0; i < id_len; i++) all_nodes[nid[i]] = true; auto orig_nodes = all_nodes; std::vector edge_src, edge_dst, edge_eid; // When we deal with in-edges, we need to do two things: // * find the edges inside the partition and the edges between partitions. // * find the nodes outside the partition that connect the partition. EdgeArray in_edges = hg->InEdges(0, nodes); auto src = in_edges.src; auto dst = in_edges.dst; auto eid = in_edges.id; auto num_edges = eid->shape[0]; const dgl_id_t *src_data = static_cast(src->data); const dgl_id_t *dst_data = static_cast(dst->data); const dgl_id_t *eid_data = static_cast(eid->data); for (int64_t i = 0; i < num_edges; i++) { // We check if the source node is in the original node. auto it1 = orig_nodes.find(src_data[i]); if (it1 != orig_nodes.end() || num_hops > 0) { edge_src.push_back(src_data[i]); edge_dst.push_back(dst_data[i]); edge_eid.push_back(eid_data[i]); } // We need to expand only if the node hasn't been seen before. auto it = all_nodes.find(src_data[i]); if (it == all_nodes.end() && num_hops > 0) { all_nodes[src_data[i]] = false; old_node_ids.push_back(src_data[i]); outer_nodes[0].push_back(src_data[i]); } } // Now we need to traverse the graph with the in-edges to access nodes // and edges more hops away. for (int k = 1; k < num_hops; k++) { const std::vector &nodes = outer_nodes[k - 1]; EdgeArray in_edges = hg->InEdges(0, aten::VecToIdArray(nodes)); auto src = in_edges.src; auto dst = in_edges.dst; auto eid = in_edges.id; auto num_edges = eid->shape[0]; const dgl_id_t *src_data = static_cast(src->data); const dgl_id_t *dst_data = static_cast(dst->data); const dgl_id_t *eid_data = static_cast(eid->data); for (int64_t i = 0; i < num_edges; i++) { auto it1 = orig_nodes.find(src_data[i]); // If the source node is in the partition, we have got this edge when we // iterate over the out-edges above. if (it1 == orig_nodes.end()) { edge_src.push_back(src_data[i]); edge_dst.push_back(dst_data[i]); edge_eid.push_back(eid_data[i]); } // If we haven't seen this node. auto it = all_nodes.find(src_data[i]); if (it == all_nodes.end()) { all_nodes[src_data[i]] = false; old_node_ids.push_back(src_data[i]); outer_nodes[k].push_back(src_data[i]); } } } if (num_hops > 0) { EdgeArray out_edges = hg->OutEdges(0, nodes); auto src = out_edges.src; auto dst = out_edges.dst; auto eid = out_edges.id; auto num_edges = eid->shape[0]; const dgl_id_t *src_data = static_cast(src->data); const dgl_id_t *dst_data = static_cast(dst->data); const dgl_id_t *eid_data = static_cast(eid->data); for (int64_t i = 0; i < num_edges; i++) { // If the outer edge isn't in the partition. auto it1 = orig_nodes.find(dst_data[i]); if (it1 == orig_nodes.end()) { edge_src.push_back(src_data[i]); edge_dst.push_back(dst_data[i]); edge_eid.push_back(eid_data[i]); } // We don't expand along the out-edges. auto it = all_nodes.find(dst_data[i]); if (it == all_nodes.end()) { all_nodes[dst_data[i]] = false; old_node_ids.push_back(dst_data[i]); } } } // We assign new Ids to the nodes in the subgraph. We ensure that the HALO // nodes are behind the input nodes. std::unordered_map old2new; for (size_t i = 0; i < old_node_ids.size(); i++) { old2new[old_node_ids[i]] = i; } num_edges = edge_src.size(); IdArray new_src = IdArray::Empty( {num_edges}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0}); IdArray new_dst = IdArray::Empty( {num_edges}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0}); dgl_id_t *new_src_data = static_cast(new_src->data); dgl_id_t *new_dst_data = static_cast(new_dst->data); for (size_t i = 0; i < edge_src.size(); i++) { new_src_data[i] = old2new[edge_src[i]]; new_dst_data[i] = old2new[edge_dst[i]]; } std::vector inner_nodes(old_node_ids.size()); for (size_t i = 0; i < old_node_ids.size(); i++) { dgl_id_t old_nid = old_node_ids[i]; inner_nodes[i] = all_nodes[old_nid]; } aten::COOMatrix coo( old_node_ids.size(), old_node_ids.size(), new_src, new_dst); HeteroGraphPtr ugptr = UnitGraph::CreateFromCOO(1, coo); HeteroGraphPtr subg = CreateHeteroGraph(hg->meta_graph(), {ugptr}); HaloHeteroSubgraph halo_subg; halo_subg.graph = subg; halo_subg.induced_vertices = {aten::VecToIdArray(old_node_ids)}; halo_subg.induced_edges = {aten::VecToIdArray(edge_eid)}; // TODO(zhengda) we need to switch to 8 bytes afterwards. halo_subg.inner_nodes = {aten::VecToIdArray(inner_nodes, 32)}; return halo_subg; } DGL_REGISTER_GLOBAL("partition._CAPI_DGLReorderGraph_Hetero") .set_body([](DGLArgs args, DGLRetValue *rv) { HeteroGraphRef g = args[0]; auto hgptr = std::dynamic_pointer_cast(g.sptr()); CHECK(hgptr) << "Invalid HeteroGraph object"; CHECK_EQ(hgptr->relation_graphs().size(), 1) << "Reorder only supports HomoGraph"; auto ugptr = hgptr->relation_graphs()[0]; const IdArray new_order = args[1]; auto reorder_ugptr = ReorderUnitGraph(ugptr, new_order); std::vector rel_graphs = {reorder_ugptr}; *rv = HeteroGraphRef(std::make_shared( hgptr->meta_graph(), rel_graphs, hgptr->NumVerticesPerType())); }); DGL_REGISTER_GLOBAL("partition._CAPI_DGLPartitionWithHalo_Hetero") .set_body([](DGLArgs args, DGLRetValue *rv) { HeteroGraphRef g = args[0]; auto hgptr = std::dynamic_pointer_cast(g.sptr()); CHECK(hgptr) << "Invalid HeteroGraph object"; CHECK_EQ(hgptr->relation_graphs().size(), 1) << "Metis partition only supports HomoGraph"; auto ugptr = hgptr->relation_graphs()[0]; IdArray node_parts = args[1]; int num_hops = args[2]; CHECK_EQ(node_parts->dtype.bits, 64) << "Only supports 64bits tensor for now"; const int64_t *part_data = static_cast(node_parts->data); int64_t num_nodes = node_parts->shape[0]; std::unordered_map> part_map; for (int64_t i = 0; i < num_nodes; i++) { dgl_id_t part_id = part_data[i]; auto it = part_map.find(part_id); if (it == part_map.end()) { std::vector vec; vec.push_back(i); part_map[part_id] = vec; } else { it->second.push_back(i); } } std::vector part_ids; std::vector> part_nodes; int max_part_id = 0; for (auto it = part_map.begin(); it != part_map.end(); it++) { max_part_id = std::max(it->first, max_part_id); part_ids.push_back(it->first); part_nodes.push_back(it->second); } // When we construct subgraphs, we need to access both in-edges and // out-edges. We need to make sure the in-CSR and out-CSR exist. // Otherwise, we'll try to construct in-CSR and out-CSR in openmp for // loop, which will lead to some unexpected results. ugptr->GetInCSR(); ugptr->GetOutCSR(); std::vector> subgs(max_part_id + 1); int num_partitions = part_nodes.size(); runtime::parallel_for(0, num_partitions, [&](int b, int e) { for (auto i = b; i < e; i++) { auto nodes = aten::VecToIdArray(part_nodes[i]); HaloHeteroSubgraph subg = GetSubgraphWithHalo(hgptr, nodes, num_hops); std::shared_ptr subg_ptr( new HaloHeteroSubgraph(subg)); int part_id = part_ids[i]; subgs[part_id] = subg_ptr; } }); List ret_list; for (size_t i = 0; i < subgs.size(); i++) { ret_list.push_back(HeteroSubgraphRef(subgs[i])); } *rv = ret_list; }); template struct EdgeProperty { IdType eid; int64_t idx; int part_id; }; // Reassign edge IDs so that all edges in a partition have contiguous edge IDs. // The original edge IDs are returned. DGL_REGISTER_GLOBAL("partition._CAPI_DGLReassignEdges_Hetero") .set_body([](DGLArgs args, DGLRetValue *rv) { HeteroGraphRef g = args[0]; auto hgptr = std::dynamic_pointer_cast(g.sptr()); CHECK(hgptr) << "Invalid HeteroGraph object"; CHECK_EQ(hgptr->relation_graphs().size(), 1) << "Reorder only supports HomoGraph"; auto ugptr = hgptr->relation_graphs()[0]; IdArray etype = args[1]; IdArray part_id = args[2]; bool is_incsr = args[3]; auto csrmat = is_incsr ? ugptr->GetCSCMatrix(0) : ugptr->GetCSRMatrix(0); int64_t num_edges = csrmat.data->shape[0]; int64_t num_rows = csrmat.indptr->shape[0] - 1; IdArray new_data = IdArray::Empty({num_edges}, csrmat.data->dtype, csrmat.data->ctx); // Return the original edge Ids. *rv = new_data; // Generate new edge Ids. ATEN_ID_TYPE_SWITCH(new_data->dtype, IdType, { CHECK(etype->dtype.bits == sizeof(IdType) * 8); CHECK(part_id->dtype.bits == sizeof(IdType) * 8); const IdType *part_id_data = static_cast(part_id->data); const IdType *etype_data = static_cast(etype->data); const IdType *indptr_data = static_cast(csrmat.indptr->data); IdType *typed_data = static_cast(csrmat.data->data); IdType *typed_new_data = static_cast(new_data->data); std::vector> indexed_eids(num_edges); for (int64_t i = 0; i < num_rows; i++) { for (int64_t j = indptr_data[i]; j < indptr_data[i + 1]; j++) { indexed_eids[j].eid = typed_data[j]; indexed_eids[j].idx = j; indexed_eids[j].part_id = part_id_data[i]; } } auto comp = [etype_data]( const EdgeProperty &a, const EdgeProperty &b) { if (a.part_id == b.part_id) { return etype_data[a.eid] < etype_data[b.eid]; } else { return a.part_id < b.part_id; } }; // We only need to sort the edges if the input graph has multiple // relations. If it's a homogeneous grap, we'll just assign edge Ids // based on its previous order. if (etype->shape[0] > 0) { std::sort(indexed_eids.begin(), indexed_eids.end(), comp); } for (int64_t new_eid = 0; new_eid < num_edges; new_eid++) { int64_t orig_idx = indexed_eids[new_eid].idx; typed_new_data[new_eid] = typed_data[orig_idx]; typed_data[orig_idx] = new_eid; } }); ugptr->InvalidateCSR(); ugptr->InvalidateCOO(); }); DGL_REGISTER_GLOBAL("partition._CAPI_GetHaloSubgraphInnerNodes_Hetero") .set_body([](DGLArgs args, DGLRetValue *rv) { HeteroSubgraphRef g = args[0]; auto gptr = std::dynamic_pointer_cast(g.sptr()); CHECK(gptr) << "The input graph has to be HaloHeteroSubgraph"; *rv = gptr->inner_nodes[0]; }); DGL_REGISTER_GLOBAL("partition._CAPI_DGLMakeSymmetric_Hetero") .set_body([](DGLArgs args, DGLRetValue *rv) { HeteroGraphRef g = args[0]; auto hgptr = std::dynamic_pointer_cast(g.sptr()); CHECK(hgptr) << "Invalid HeteroGraph object"; CHECK_EQ(hgptr->relation_graphs().size(), 1) << "Metis partition only supports homogeneous graph"; auto ugptr = hgptr->relation_graphs()[0]; #if !defined(_WIN32) // TODO(zhengda) should we get whatever CSR exists in the graph. gk_csr_t *gk_csr = Convert2GKCsr(ugptr->GetCSCMatrix(0), true); gk_csr_t *sym_gk_csr = gk_csr_MakeSymmetric(gk_csr, GK_CSR_SYM_SUM); auto mat = Convert2DGLCsr(sym_gk_csr, true); gk_csr_Free(&gk_csr); gk_csr_Free(&sym_gk_csr); auto new_ugptr = UnitGraph::CreateFromCSC( ugptr->NumVertexTypes(), mat, ugptr->GetAllowedFormats()); std::vector rel_graphs = {new_ugptr}; *rv = HeteroGraphRef(std::make_shared( hgptr->meta_graph(), rel_graphs, hgptr->NumVerticesPerType())); #else LOG(FATAL) << "The fast version of making symmetric graph is not " "supported in Windows."; #endif // !defined(_WIN32) }); } // namespace transform } // namespace dgl ================================================ FILE: src/graph/transform/remove_edges.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file graph/transform/remove_edges.cc * @brief Remove edges. */ #include #include #include #include #include #include #include #include #include namespace dgl { using namespace dgl::runtime; using namespace dgl::aten; namespace transform { std::pair> RemoveEdges( const HeteroGraphPtr graph, const std::vector &eids) { std::vector induced_eids; std::vector rel_graphs; const int64_t num_etypes = graph->NumEdgeTypes(); for (int64_t etype = 0; etype < num_etypes; ++etype) { const SparseFormat fmt = graph->SelectFormat(etype, COO_CODE); const auto src_dst_types = graph->GetEndpointTypes(etype); const dgl_type_t srctype = src_dst_types.first; const dgl_type_t dsttype = src_dst_types.second; const int num_ntypes_rel = (srctype == dsttype) ? 1 : 2; HeteroGraphPtr new_rel_graph; IdArray induced_eids_rel; if (fmt == SparseFormat::kCOO) { const COOMatrix &coo = graph->GetCOOMatrix(etype); const COOMatrix &result = COORemove(coo, eids[etype]); new_rel_graph = CreateFromCOO( num_ntypes_rel, result.num_rows, result.num_cols, result.row, result.col); induced_eids_rel = result.data; } else if (fmt == SparseFormat::kCSR) { const CSRMatrix &csr = graph->GetCSRMatrix(etype); const CSRMatrix &result = CSRRemove(csr, eids[etype]); new_rel_graph = CreateFromCSR( num_ntypes_rel, result.num_rows, result.num_cols, result.indptr, result.indices, // TODO(BarclayII): make CSR support null eid array Range( 0, result.indices->shape[0], result.indices->dtype.bits, result.indices->ctx)); induced_eids_rel = result.data; } else if (fmt == SparseFormat::kCSC) { const CSRMatrix &csc = graph->GetCSCMatrix(etype); const CSRMatrix &result = CSRRemove(csc, eids[etype]); new_rel_graph = CreateFromCSC( num_ntypes_rel, result.num_rows, result.num_cols, result.indptr, result.indices, // TODO(BarclayII): make CSR support null eid array Range( 0, result.indices->shape[0], result.indices->dtype.bits, result.indices->ctx)); induced_eids_rel = result.data; } rel_graphs.push_back(new_rel_graph); induced_eids.push_back(induced_eids_rel); } const HeteroGraphPtr new_graph = CreateHeteroGraph( graph->meta_graph(), rel_graphs, graph->NumVerticesPerType()); return std::make_pair(new_graph, induced_eids); } DGL_REGISTER_GLOBAL("transform._CAPI_DGLRemoveEdges") .set_body([](DGLArgs args, DGLRetValue *rv) { const HeteroGraphRef graph_ref = args[0]; const std::vector &eids = ListValueToVector(args[1]); HeteroGraphPtr new_graph; std::vector induced_eids; std::tie(new_graph, induced_eids) = RemoveEdges(graph_ref.sptr(), eids); List induced_eids_ref; for (IdArray &array : induced_eids) induced_eids_ref.push_back(Value(MakeValue(array))); List ret; ret.push_back(HeteroGraphRef(new_graph)); ret.push_back(induced_eids_ref); *rv = ret; }); }; // namespace transform }; // namespace dgl ================================================ FILE: src/graph/transform/to_block.cc ================================================ /** * Copyright 2019-2021 Contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file graph/transform/to_block.cc * @brief Convert a graph to a bipartite-structured graph. * * Tested via python wrapper: python/dgl/path/to/to_block.py */ #include "to_block.h" #include #include #include #include #include #include #include #include #include #include #include #include "../../array/cpu/concurrent_id_hash_map.h" namespace dgl { using namespace dgl::runtime; using namespace dgl::aten; namespace transform { namespace { template struct CPUIdsMapper { std::tuple, std::vector> operator()( const HeteroGraphPtr &graph, bool include_rhs_in_lhs, int64_t num_ntypes, const DGLContext &ctx, const std::vector &max_nodes_per_type, const std::vector &edge_arrays, const std::vector &src_nodes, const std::vector &rhs_nodes, std::vector *const lhs_nodes_ptr, std::vector *const num_nodes_per_type_ptr) { std::vector &lhs_nodes = *lhs_nodes_ptr; std::vector &num_nodes_per_type = *num_nodes_per_type_ptr; const bool generate_lhs_nodes = lhs_nodes.empty(); if (generate_lhs_nodes) { lhs_nodes.reserve(num_ntypes); } std::vector> lhs_nodes_map(num_ntypes); std::vector> rhs_nodes_map(num_ntypes); for (int64_t ntype = 0; ntype < num_ntypes; ++ntype) { IdArray unique_ids = aten::NullArray(DGLDataTypeTraits::dtype, ctx); if (!aten::IsNullArray(src_nodes[ntype])) { auto num_seeds = include_rhs_in_lhs ? rhs_nodes[ntype]->shape[0] : 0; unique_ids = lhs_nodes_map[ntype].Init(src_nodes[ntype], num_seeds); } if (generate_lhs_nodes) { num_nodes_per_type[ntype] = unique_ids->shape[0]; lhs_nodes.emplace_back(unique_ids); } } // Skip rhs mapping construction to save efforts when rhs is already // contained in lhs. if (!include_rhs_in_lhs) { for (int64_t ntype = 0; ntype < num_ntypes; ++ntype) { if (!aten::IsNullArray(rhs_nodes[ntype])) { rhs_nodes_map[ntype].Init( rhs_nodes[ntype], rhs_nodes[ntype]->shape[0]); } } } // Map node numberings from global to local, and build pointer for CSR. std::vector new_lhs; std::vector new_rhs; new_lhs.reserve(edge_arrays.size()); new_rhs.reserve(edge_arrays.size()); const int64_t num_etypes = static_cast(edge_arrays.size()); for (int64_t etype = 0; etype < num_etypes; ++etype) { const EdgeArray &edges = edge_arrays[etype]; if (edges.id.defined() && !aten::IsNullArray(edges.src)) { const auto src_dst_types = graph->GetEndpointTypes(etype); const int src_type = src_dst_types.first; const int dst_type = src_dst_types.second; new_lhs.emplace_back(lhs_nodes_map[src_type].MapIds(edges.src)); if (include_rhs_in_lhs) { new_rhs.emplace_back(lhs_nodes_map[dst_type].MapIds(edges.dst)); } else { new_rhs.emplace_back(rhs_nodes_map[dst_type].MapIds(edges.dst)); } } else { new_lhs.emplace_back( aten::NullArray(DGLDataTypeTraits::dtype, ctx)); new_rhs.emplace_back( aten::NullArray(DGLDataTypeTraits::dtype, ctx)); } } return std::tuple, std::vector>( std::move(new_lhs), std::move(new_rhs)); } }; // Since partial specialization is not allowed for functions, use this as an // intermediate for ToBlock where XPU = kDGLCPU. template std::tuple> ToBlockCPU( HeteroGraphPtr graph, const std::vector &rhs_nodes, bool include_rhs_in_lhs, std::vector *const lhs_nodes_ptr) { return dgl::transform::ProcessToBlock( graph, rhs_nodes, include_rhs_in_lhs, lhs_nodes_ptr, CPUIdsMapper()); } } // namespace template std::tuple> ProcessToBlock( HeteroGraphPtr graph, const std::vector &rhs_nodes, bool include_rhs_in_lhs, std::vector *const lhs_nodes_ptr, IdsMapper &&ids_mapper) { std::vector &lhs_nodes = *lhs_nodes_ptr; const bool generate_lhs_nodes = lhs_nodes.empty(); const auto &ctx = graph->Context(); auto device = runtime::DeviceAPI::Get(ctx); // Since DST nodes are included in SRC nodes, a common requirement is to fetch // the DST node features from the SRC nodes features. To avoid expensive // sparse lookup, the function assures that the DST nodes in both SRC and DST // sets have the same ids. As a result, given the node feature tensor ``X`` of // type ``utype``, the following code finds the corresponding DST node // features of type ``vtype``: const int64_t num_etypes = graph->NumEdgeTypes(); const int64_t num_ntypes = graph->NumVertexTypes(); CHECK(rhs_nodes.size() == static_cast(num_ntypes)) << "rhs_nodes not given for every node type"; std::vector edge_arrays(num_etypes); for (int64_t etype = 0; etype < num_etypes; ++etype) { const auto src_dst_types = graph->GetEndpointTypes(etype); const dgl_type_t dsttype = src_dst_types.second; if (!aten::IsNullArray(rhs_nodes[dsttype])) { edge_arrays[etype] = graph->Edges(etype); } } // Count lhs and rhs nodes. std::vector maxNodesPerType(num_ntypes * 2, 0); for (int64_t ntype = 0; ntype < num_ntypes; ++ntype) { maxNodesPerType[ntype + num_ntypes] += rhs_nodes[ntype]->shape[0]; if (generate_lhs_nodes) { if (include_rhs_in_lhs) { maxNodesPerType[ntype] += rhs_nodes[ntype]->shape[0]; } } else { maxNodesPerType[ntype] += lhs_nodes[ntype]->shape[0]; } } if (generate_lhs_nodes) { // We don't have lhs_nodes, see we need to count inbound edges to get an // upper bound. for (int64_t etype = 0; etype < num_etypes; ++etype) { const auto src_dst_types = graph->GetEndpointTypes(etype); const dgl_type_t srctype = src_dst_types.first; if (edge_arrays[etype].src.defined()) { maxNodesPerType[srctype] += edge_arrays[etype].src->shape[0]; } } } // Gather lhs_nodes. std::vector src_nodes(num_ntypes); if (generate_lhs_nodes) { std::vector src_node_offsets(num_ntypes, 0); for (int64_t ntype = 0; ntype < num_ntypes; ++ntype) { src_nodes[ntype] = NewIdArray(maxNodesPerType[ntype], ctx, sizeof(IdType) * 8); if (include_rhs_in_lhs) { // Place rhs nodes first. device->CopyDataFromTo( rhs_nodes[ntype].Ptr(), 0, src_nodes[ntype].Ptr(), src_node_offsets[ntype], sizeof(IdType) * rhs_nodes[ntype]->shape[0], rhs_nodes[ntype]->ctx, src_nodes[ntype]->ctx, rhs_nodes[ntype]->dtype); src_node_offsets[ntype] += sizeof(IdType) * rhs_nodes[ntype]->shape[0]; } } for (int64_t etype = 0; etype < num_etypes; ++etype) { const auto src_dst_types = graph->GetEndpointTypes(etype); const dgl_type_t srctype = src_dst_types.first; if (edge_arrays[etype].src.defined()) { device->CopyDataFromTo( edge_arrays[etype].src.Ptr(), 0, src_nodes[srctype].Ptr(), src_node_offsets[srctype], sizeof(IdType) * edge_arrays[etype].src->shape[0], rhs_nodes[srctype]->ctx, src_nodes[srctype]->ctx, rhs_nodes[srctype]->dtype); src_node_offsets[srctype] += sizeof(IdType) * edge_arrays[etype].src->shape[0]; } } } else { for (int64_t ntype = 0; ntype < num_ntypes; ++ntype) { src_nodes[ntype] = lhs_nodes[ntype]; } } std::vector num_nodes_per_type(num_ntypes * 2); // Populate RHS nodes from what we already know. for (int64_t ntype = 0; ntype < num_ntypes; ++ntype) { num_nodes_per_type[num_ntypes + ntype] = rhs_nodes[ntype]->shape[0]; } std::vector new_lhs; std::vector new_rhs; std::tie(new_lhs, new_rhs) = ids_mapper( graph, include_rhs_in_lhs, num_ntypes, ctx, maxNodesPerType, edge_arrays, src_nodes, rhs_nodes, lhs_nodes_ptr, &num_nodes_per_type); std::vector induced_edges; induced_edges.reserve(num_etypes); for (int64_t etype = 0; etype < num_etypes; ++etype) { if (edge_arrays[etype].id.defined()) { induced_edges.push_back(edge_arrays[etype].id); } else { induced_edges.push_back( aten::NullArray(DGLDataType{kDGLInt, sizeof(IdType) * 8, 1}, ctx)); } } // Build metagraph. const auto meta_graph = graph->meta_graph(); const EdgeArray etypes = meta_graph->Edges("eid"); const IdArray new_dst = Add(etypes.dst, num_ntypes); const auto new_meta_graph = ImmutableGraph::CreateFromCOO(num_ntypes * 2, etypes.src, new_dst); // Allocate vector for graph relations while GPU is busy. std::vector rel_graphs; rel_graphs.reserve(num_etypes); // Build the heterograph. for (int64_t etype = 0; etype < num_etypes; ++etype) { const auto src_dst_types = graph->GetEndpointTypes(etype); const dgl_type_t srctype = src_dst_types.first; const dgl_type_t dsttype = src_dst_types.second; if (rhs_nodes[dsttype]->shape[0] == 0) { // No rhs nodes are given for this edge type. Create an empty graph. rel_graphs.push_back(CreateFromCOO( 2, lhs_nodes[srctype]->shape[0], rhs_nodes[dsttype]->shape[0], aten::NullArray(DGLDataType{kDGLInt, sizeof(IdType) * 8, 1}, ctx), aten::NullArray(DGLDataType{kDGLInt, sizeof(IdType) * 8, 1}, ctx))); } else { rel_graphs.push_back(CreateFromCOO( 2, lhs_nodes[srctype]->shape[0], rhs_nodes[dsttype]->shape[0], new_lhs[etype], new_rhs[etype])); } } HeteroGraphPtr new_graph = CreateHeteroGraph(new_meta_graph, rel_graphs, num_nodes_per_type); // Return the new graph, the new src nodes, and new edges. return std::make_tuple(new_graph, induced_edges); } template std::tuple> ProcessToBlock( HeteroGraphPtr graph, const std::vector &rhs_nodes, bool include_rhs_in_lhs, std::vector *const lhs_nodes_ptr, IdsMapper &&get_maping_ids); template std::tuple> ProcessToBlock( HeteroGraphPtr graph, const std::vector &rhs_nodes, bool include_rhs_in_lhs, std::vector *const lhs_nodes_ptr, IdsMapper &&get_maping_ids); template <> std::tuple> ToBlock( HeteroGraphPtr graph, const std::vector &rhs_nodes, bool include_rhs_in_lhs, std::vector *const lhs_nodes) { return ToBlockCPU(graph, rhs_nodes, include_rhs_in_lhs, lhs_nodes); } template <> std::tuple> ToBlock( HeteroGraphPtr graph, const std::vector &rhs_nodes, bool include_rhs_in_lhs, std::vector *const lhs_nodes) { return ToBlockCPU(graph, rhs_nodes, include_rhs_in_lhs, lhs_nodes); } #ifdef DGL_USE_CUDA // Forward declaration of GPU ToBlock implementations - actual implementation is // in // ./cuda/cuda_to_block.cu // This is to get around the broken name mangling in VS2019 CL 16.5.5 + // CUDA 11.3 which complains that the two template specializations have the same // signature. std::tuple> ToBlockGPU32( HeteroGraphPtr, const std::vector &, bool, std::vector *const); std::tuple> ToBlockGPU64( HeteroGraphPtr, const std::vector &, bool, std::vector *const); template <> std::tuple> ToBlock( HeteroGraphPtr graph, const std::vector &rhs_nodes, bool include_rhs_in_lhs, std::vector *const lhs_nodes) { return ToBlockGPU32(graph, rhs_nodes, include_rhs_in_lhs, lhs_nodes); } template <> std::tuple> ToBlock( HeteroGraphPtr graph, const std::vector &rhs_nodes, bool include_rhs_in_lhs, std::vector *const lhs_nodes) { return ToBlockGPU64(graph, rhs_nodes, include_rhs_in_lhs, lhs_nodes); } #endif // DGL_USE_CUDA DGL_REGISTER_GLOBAL("capi._CAPI_DGLToBlock") .set_body([](DGLArgs args, DGLRetValue *rv) { const HeteroGraphRef graph_ref = args[0]; const std::vector &rhs_nodes = ListValueToVector(args[1]); const bool include_rhs_in_lhs = args[2]; std::vector lhs_nodes = ListValueToVector(args[3]); HeteroGraphPtr new_graph; std::vector induced_edges; ATEN_XPU_SWITCH_CUDA(graph_ref->Context().device_type, XPU, "ToBlock", { ATEN_ID_TYPE_SWITCH(graph_ref->DataType(), IdType, { std::tie(new_graph, induced_edges) = ToBlock( graph_ref.sptr(), rhs_nodes, include_rhs_in_lhs, &lhs_nodes); }); }); List lhs_nodes_ref; for (IdArray &array : lhs_nodes) lhs_nodes_ref.push_back(Value(MakeValue(array))); List induced_edges_ref; for (IdArray &array : induced_edges) induced_edges_ref.push_back(Value(MakeValue(array))); List ret; ret.push_back(HeteroGraphRef(new_graph)); ret.push_back(lhs_nodes_ref); ret.push_back(induced_edges_ref); *rv = ret; }); }; // namespace transform }; // namespace dgl ================================================ FILE: src/graph/transform/to_block.h ================================================ /** * Copyright 2021 Contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file graph/transform/to_block.h * @brief Functions to convert a set of edges into a graph block with local * ids. */ #ifndef DGL_GRAPH_TRANSFORM_TO_BLOCK_H_ #define DGL_GRAPH_TRANSFORM_TO_BLOCK_H_ #include #include #include #include #include namespace dgl { namespace transform { /** @brief Mapper used in block generation which maps left and right Id arrays * in the original MFG to new arrays with continuous numbers. */ using IdsMapper = std::function, std::vector>( const HeteroGraphPtr&, bool, int64_t, const DGLContext&, const std::vector&, const std::vector&, const std::vector&, const std::vector&, std::vector* const, std::vector* const)>; /** * @brief Create a graph block from the set of * src and dst nodes (lhs and rhs respectively). * * @tparam XPU The type of device to operate on. * @tparam IdType The type to use as an index. * @param graph The graph from which to extract the block. * @param rhs_nodes The destination nodes of the block. * @param include_rhs_in_lhs Whether or not to include the * destination nodes of the block in the sources nodes. * @param [in/out] lhs_nodes The source nodes of the block. * * @return The block and the induced edges. */ template std::tuple> ToBlock( HeteroGraphPtr graph, const std::vector& rhs_nodes, bool include_rhs_in_lhs, std::vector* lhs_nodes); /** * @brief A warpper function shared by CPU and GPU ```ToBlock``` * which deal with the common preprocess and postprocess work of them. * * @tparam IdType The type to use as an index. * @param graph The graph from which to extract the block. * @param rhs_nodes The destination nodes of the block. * @param include_rhs_in_lhs Whether or not to include the * destination nodes of the block in the sources nodes. * @param [in/out] lhs_nodes The source nodes of the block. * @param MappingIdsFunc The function to get mapped ids from original ids. * * @return The block and the induced edges. */ template std::tuple> ProcessToBlock( HeteroGraphPtr graph, const std::vector& rhs_nodes, bool include_rhs_in_lhs, std::vector* const lhs_nodes_ptr, IdsMapper&& get_maping_ids); } // namespace transform } // namespace dgl #endif // DGL_GRAPH_TRANSFORM_TO_BLOCK_H_ ================================================ FILE: src/graph/transform/to_simple.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file graph/transform/to_simple.cc * @brief Convert multigraphs to simple graphs */ #include #include #include #include #include #include #include "../../c_api_common.h" #include "../heterograph.h" #include "../unit_graph.h" namespace dgl { using namespace dgl::runtime; using namespace dgl::aten; namespace transform { std::tuple, std::vector> ToSimpleGraph(const HeteroGraphPtr graph) { const int64_t num_etypes = graph->NumEdgeTypes(); const auto metagraph = graph->meta_graph(); const auto &ugs = std::dynamic_pointer_cast(graph)->relation_graphs(); std::vector counts(num_etypes), edge_maps(num_etypes); std::vector rel_graphs(num_etypes); for (int64_t etype = 0; etype < num_etypes; ++etype) { const auto result = ugs[etype]->ToSimple(); std::tie(rel_graphs[etype], counts[etype], edge_maps[etype]) = result; } const HeteroGraphPtr result = CreateHeteroGraph(metagraph, rel_graphs, graph->NumVerticesPerType()); return std::make_tuple(result, counts, edge_maps); } DGL_REGISTER_GLOBAL("transform._CAPI_DGLToSimpleHetero") .set_body([](DGLArgs args, DGLRetValue *rv) { const HeteroGraphRef graph_ref = args[0]; const auto result = ToSimpleGraph(graph_ref.sptr()); List counts, edge_maps; for (const IdArray &count : std::get<1>(result)) counts.push_back(Value(MakeValue(count))); for (const IdArray &edge_map : std::get<2>(result)) edge_maps.push_back(Value(MakeValue(edge_map))); List ret; ret.push_back(HeteroGraphRef(std::get<0>(result))); ret.push_back(counts); ret.push_back(edge_maps); *rv = ret; }); }; // namespace transform }; // namespace dgl ================================================ FILE: src/graph/transform/union_partition.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file graph/transform/union_partition.cc * @brief Functions for partition, union multiple graphs. */ #include "../heterograph.h" using namespace dgl::runtime; namespace dgl { HeteroGraphPtr JointUnionHeteroGraph( GraphPtr meta_graph, const std::vector& component_graphs) { CHECK_GT(component_graphs.size(), 0) << "Input graph list has at least two graphs"; std::vector rel_graphs(meta_graph->NumEdges()); std::vector num_nodes_per_type(meta_graph->NumVertices(), 0); // Loop over all canonical etypes for (dgl_type_t etype = 0; etype < meta_graph->NumEdges(); ++etype) { auto pair = meta_graph->FindEdge(etype); const dgl_type_t src_vtype = pair.first; const dgl_type_t dst_vtype = pair.second; uint64_t num_src_v = component_graphs[0]->NumVertices(src_vtype); uint64_t num_dst_v = component_graphs[0]->NumVertices(dst_vtype); HeteroGraphPtr rgptr = nullptr; // ALL = CSC | CSR | COO const dgl_format_code_t code = component_graphs[0]->GetRelationGraph(etype)->GetAllowedFormats(); // get common format for (size_t i = 0; i < component_graphs.size(); ++i) { const auto& cg = component_graphs[i]; CHECK_EQ(num_src_v, component_graphs[i]->NumVertices(src_vtype)) << "Input graph[" << i << "] should have same number of src vertices as input graph[0]"; CHECK_EQ(num_dst_v, component_graphs[i]->NumVertices(dst_vtype)) << "Input graph[" << i << "] should have same number of dst vertices as input graph[0]"; const dgl_format_code_t curr_code = cg->GetRelationGraph(etype)->GetAllowedFormats(); if (curr_code != code) LOG(FATAL) << "All components should have the same formats"; } // prefer COO if (FORMAT_HAS_COO(code)) { std::vector coos; for (size_t i = 0; i < component_graphs.size(); ++i) { const auto& cg = component_graphs[i]; aten::COOMatrix coo = cg->GetCOOMatrix(etype); coos.push_back(coo); } aten::COOMatrix res = aten::UnionCoo(coos); rgptr = UnitGraph::CreateFromCOO((src_vtype == dst_vtype) ? 1 : 2, res, code); } else if (FORMAT_HAS_CSR(code)) { std::vector csrs; for (size_t i = 0; i < component_graphs.size(); ++i) { const auto& cg = component_graphs[i]; aten::CSRMatrix csr = cg->GetCSRMatrix(etype); csrs.push_back(csr); } aten::CSRMatrix res = aten::UnionCsr(csrs); rgptr = UnitGraph::CreateFromCSR((src_vtype == dst_vtype) ? 1 : 2, res, code); } else if (FORMAT_HAS_CSC(code)) { // CSR and CSC have the same storage format, i.e. CSRMatrix std::vector cscs; for (size_t i = 0; i < component_graphs.size(); ++i) { const auto& cg = component_graphs[i]; aten::CSRMatrix csc = cg->GetCSCMatrix(etype); cscs.push_back(csc); } aten::CSRMatrix res = aten::UnionCsr(cscs); rgptr = UnitGraph::CreateFromCSC((src_vtype == dst_vtype) ? 1 : 2, res, code); } rel_graphs[etype] = rgptr; num_nodes_per_type[src_vtype] = num_src_v; num_nodes_per_type[dst_vtype] = num_dst_v; } return CreateHeteroGraph( meta_graph, rel_graphs, std::move(num_nodes_per_type)); } HeteroGraphPtr DisjointUnionHeteroGraph2( GraphPtr meta_graph, const std::vector& component_graphs) { CHECK_GT(component_graphs.size(), 0) << "Input graph list is empty"; std::vector rel_graphs(meta_graph->NumEdges()); std::vector num_nodes_per_type(meta_graph->NumVertices(), 0); // Loop over all ntypes for (dgl_type_t vtype = 0; vtype < meta_graph->NumVertices(); ++vtype) { uint64_t offset = 0; for (const auto& cg : component_graphs) offset += cg->NumVertices(vtype); num_nodes_per_type[vtype] = offset; } // Loop over all canonical etypes for (dgl_type_t etype = 0; etype < meta_graph->NumEdges(); ++etype) { auto pair = meta_graph->FindEdge(etype); const dgl_type_t src_vtype = pair.first; const dgl_type_t dst_vtype = pair.second; HeteroGraphPtr rgptr = nullptr; const dgl_format_code_t code = component_graphs[0]->GetRelationGraph(etype)->GetAllowedFormats(); // do some preprocess for (const auto& cg : component_graphs) { const dgl_format_code_t cur_code = cg->GetRelationGraph(etype)->GetAllowedFormats(); if (cur_code != code) LOG(FATAL) << "All components should have the same formats"; } // prefer COO if (FORMAT_HAS_COO(code)) { std::vector coos; for (const auto& cg : component_graphs) { aten::COOMatrix coo = cg->GetCOOMatrix(etype); coos.push_back(coo); } aten::COOMatrix res = aten::DisjointUnionCoo(coos); rgptr = UnitGraph::CreateFromCOO((src_vtype == dst_vtype) ? 1 : 2, res, code); } else if (FORMAT_HAS_CSR(code)) { std::vector csrs; for (const auto& cg : component_graphs) { aten::CSRMatrix csr = cg->GetCSRMatrix(etype); csrs.push_back(csr); } aten::CSRMatrix res = aten::DisjointUnionCsr(csrs); rgptr = UnitGraph::CreateFromCSR((src_vtype == dst_vtype) ? 1 : 2, res, code); } else if (FORMAT_HAS_CSC(code)) { // CSR and CSC have the same storage format, i.e. CSRMatrix std::vector cscs; for (const auto& cg : component_graphs) { aten::CSRMatrix csc = cg->GetCSCMatrix(etype); cscs.push_back(csc); } aten::CSRMatrix res = aten::DisjointUnionCsr(cscs); rgptr = UnitGraph::CreateFromCSC((src_vtype == dst_vtype) ? 1 : 2, res, code); } rel_graphs[etype] = rgptr; } return CreateHeteroGraph( meta_graph, rel_graphs, std::move(num_nodes_per_type)); } std::vector DisjointPartitionHeteroBySizes2( GraphPtr meta_graph, HeteroGraphPtr batched_graph, IdArray vertex_sizes, IdArray edge_sizes) { // Sanity check for vertex sizes CHECK_EQ(vertex_sizes->dtype.bits, 64) << "dtype of vertex_sizes should be int64"; CHECK_EQ(edge_sizes->dtype.bits, 64) << "dtype of edge_sizes should be int64"; const uint64_t len_vertex_sizes = vertex_sizes->shape[0]; const uint64_t* vertex_sizes_data = static_cast(vertex_sizes->data); const uint64_t num_vertex_types = meta_graph->NumVertices(); const uint64_t batch_size = len_vertex_sizes / num_vertex_types; // Map vertex type to the corresponding node cum sum std::vector> vertex_cumsum; vertex_cumsum.resize(num_vertex_types); // Loop over all vertex types for (uint64_t vtype = 0; vtype < num_vertex_types; ++vtype) { vertex_cumsum[vtype].push_back(0); for (uint64_t g = 0; g < batch_size; ++g) { // We've flattened the number of vertices in the batch for all types vertex_cumsum[vtype].push_back( vertex_cumsum[vtype][g] + vertex_sizes_data[vtype * batch_size + g]); } CHECK_EQ( vertex_cumsum[vtype][batch_size], batched_graph->NumVertices(vtype)) << "Sum of the given sizes must equal to the number of nodes for type " << vtype; } // Sanity check for edge sizes const uint64_t* edge_sizes_data = static_cast(edge_sizes->data); const uint64_t num_edge_types = meta_graph->NumEdges(); // Map edge type to the corresponding edge cum sum std::vector> edge_cumsum; edge_cumsum.resize(num_edge_types); // Loop over all edge types for (uint64_t etype = 0; etype < num_edge_types; ++etype) { edge_cumsum[etype].push_back(0); for (uint64_t g = 0; g < batch_size; ++g) { // We've flattened the number of edges in the batch for all types edge_cumsum[etype].push_back( edge_cumsum[etype][g] + edge_sizes_data[etype * batch_size + g]); } CHECK_EQ(edge_cumsum[etype][batch_size], batched_graph->NumEdges(etype)) << "Sum of the given sizes must equal to the number of edges for type " << etype; } // Construct relation graphs for unbatched graphs std::vector> rel_graphs; rel_graphs.resize(batch_size); // Loop over all edge types auto code = batched_graph->GetRelationGraph(0)->GetAllowedFormats(); if (FORMAT_HAS_COO(code)) { for (uint64_t etype = 0; etype < num_edge_types; ++etype) { auto pair = meta_graph->FindEdge(etype); const dgl_type_t src_vtype = pair.first; const dgl_type_t dst_vtype = pair.second; aten::COOMatrix coo = batched_graph->GetCOOMatrix(etype); auto res = aten::DisjointPartitionCooBySizes( coo, batch_size, edge_cumsum[etype], vertex_cumsum[src_vtype], vertex_cumsum[dst_vtype]); for (uint64_t g = 0; g < batch_size; ++g) { HeteroGraphPtr rgptr = UnitGraph::CreateFromCOO( (src_vtype == dst_vtype) ? 1 : 2, res[g], code); rel_graphs[g].push_back(rgptr); } } } else if (FORMAT_HAS_CSR(code)) { for (uint64_t etype = 0; etype < num_edge_types; ++etype) { auto pair = meta_graph->FindEdge(etype); const dgl_type_t src_vtype = pair.first; const dgl_type_t dst_vtype = pair.second; aten::CSRMatrix csr = batched_graph->GetCSRMatrix(etype); auto res = aten::DisjointPartitionCsrBySizes( csr, batch_size, edge_cumsum[etype], vertex_cumsum[src_vtype], vertex_cumsum[dst_vtype]); for (uint64_t g = 0; g < batch_size; ++g) { HeteroGraphPtr rgptr = UnitGraph::CreateFromCSR( (src_vtype == dst_vtype) ? 1 : 2, res[g], code); rel_graphs[g].push_back(rgptr); } } } else if (FORMAT_HAS_CSC(code)) { for (uint64_t etype = 0; etype < num_edge_types; ++etype) { auto pair = meta_graph->FindEdge(etype); const dgl_type_t src_vtype = pair.first; const dgl_type_t dst_vtype = pair.second; // CSR and CSC have the same storage format, i.e. CSRMatrix aten::CSRMatrix csc = batched_graph->GetCSCMatrix(etype); auto res = aten::DisjointPartitionCsrBySizes( csc, batch_size, edge_cumsum[etype], vertex_cumsum[dst_vtype], vertex_cumsum[src_vtype]); for (uint64_t g = 0; g < batch_size; ++g) { HeteroGraphPtr rgptr = UnitGraph::CreateFromCSC( (src_vtype == dst_vtype) ? 1 : 2, res[g], code); rel_graphs[g].push_back(rgptr); } } } std::vector rst; std::vector num_nodes_per_type(num_vertex_types); for (uint64_t g = 0; g < batch_size; ++g) { for (uint64_t i = 0; i < num_vertex_types; ++i) num_nodes_per_type[i] = vertex_sizes_data[i * batch_size + g]; rst.push_back( CreateHeteroGraph(meta_graph, rel_graphs[g], num_nodes_per_type)); } return rst; } HeteroGraphPtr SliceHeteroGraph( GraphPtr meta_graph, HeteroGraphPtr batched_graph, IdArray num_nodes_per_type, IdArray start_nid_per_type, IdArray num_edges_per_type, IdArray start_eid_per_type) { std::vector rel_graphs(meta_graph->NumEdges()); const uint64_t* start_nid_per_type_data = static_cast(start_nid_per_type->data); const uint64_t* num_nodes_per_type_data = static_cast(num_nodes_per_type->data); const uint64_t* start_eid_per_type_data = static_cast(start_eid_per_type->data); const uint64_t* num_edges_per_type_data = static_cast(num_edges_per_type->data); // Map vertex type to the corresponding node range const uint64_t num_vertex_types = meta_graph->NumVertices(); std::vector> vertex_range; vertex_range.resize(num_vertex_types); // Loop over all vertex types for (uint64_t vtype = 0; vtype < num_vertex_types; ++vtype) { vertex_range[vtype].push_back(start_nid_per_type_data[vtype]); vertex_range[vtype].push_back( start_nid_per_type_data[vtype] + num_nodes_per_type_data[vtype]); } // Loop over all canonical etypes for (dgl_type_t etype = 0; etype < meta_graph->NumEdges(); ++etype) { auto pair = meta_graph->FindEdge(etype); const dgl_type_t src_vtype = pair.first; const dgl_type_t dst_vtype = pair.second; HeteroGraphPtr rgptr = nullptr; const dgl_format_code_t code = batched_graph->GetRelationGraph(etype)->GetAllowedFormats(); // handle graph without edges std::vector edge_range; edge_range.push_back(start_eid_per_type_data[etype]); edge_range.push_back( start_eid_per_type_data[etype] + num_edges_per_type_data[etype]); // prefer COO if (FORMAT_HAS_COO(code)) { aten::COOMatrix coo = batched_graph->GetCOOMatrix(etype); aten::COOMatrix res = aten::COOSliceContiguousChunk( coo, edge_range, vertex_range[src_vtype], vertex_range[dst_vtype]); rgptr = UnitGraph::CreateFromCOO((src_vtype == dst_vtype) ? 1 : 2, res, code); } else if (FORMAT_HAS_CSR(code)) { aten::CSRMatrix csr = batched_graph->GetCSRMatrix(etype); aten::CSRMatrix res = aten::CSRSliceContiguousChunk( csr, edge_range, vertex_range[src_vtype], vertex_range[dst_vtype]); rgptr = UnitGraph::CreateFromCSR((src_vtype == dst_vtype) ? 1 : 2, res, code); } else if (FORMAT_HAS_CSC(code)) { // CSR and CSC have the same storage format, i.e. CSRMatrix aten::CSRMatrix csc = batched_graph->GetCSCMatrix(etype); aten::CSRMatrix res = aten::CSRSliceContiguousChunk( csc, edge_range, vertex_range[dst_vtype], vertex_range[src_vtype]); rgptr = UnitGraph::CreateFromCSC((src_vtype == dst_vtype) ? 1 : 2, res, code); } rel_graphs[etype] = rgptr; } return CreateHeteroGraph( meta_graph, rel_graphs, num_nodes_per_type.ToVector()); } template std::vector DisjointPartitionHeteroBySizes( GraphPtr meta_graph, HeteroGraphPtr batched_graph, IdArray vertex_sizes, IdArray edge_sizes) { // Sanity check for vertex sizes const uint64_t len_vertex_sizes = vertex_sizes->shape[0]; const uint64_t* vertex_sizes_data = static_cast(vertex_sizes->data); const uint64_t num_vertex_types = meta_graph->NumVertices(); const uint64_t batch_size = len_vertex_sizes / num_vertex_types; // Map vertex type to the corresponding node cum sum std::vector> vertex_cumsum; vertex_cumsum.resize(num_vertex_types); // Loop over all vertex types for (uint64_t vtype = 0; vtype < num_vertex_types; ++vtype) { vertex_cumsum[vtype].push_back(0); for (uint64_t g = 0; g < batch_size; ++g) { // We've flattened the number of vertices in the batch for all types vertex_cumsum[vtype].push_back( vertex_cumsum[vtype][g] + vertex_sizes_data[vtype * batch_size + g]); } CHECK_EQ( vertex_cumsum[vtype][batch_size], batched_graph->NumVertices(vtype)) << "Sum of the given sizes must equal to the number of nodes for type " << vtype; } // Sanity check for edge sizes const uint64_t* edge_sizes_data = static_cast(edge_sizes->data); const uint64_t num_edge_types = meta_graph->NumEdges(); // Map edge type to the corresponding edge cum sum std::vector> edge_cumsum; edge_cumsum.resize(num_edge_types); // Loop over all edge types for (uint64_t etype = 0; etype < num_edge_types; ++etype) { edge_cumsum[etype].push_back(0); for (uint64_t g = 0; g < batch_size; ++g) { // We've flattened the number of edges in the batch for all types edge_cumsum[etype].push_back( edge_cumsum[etype][g] + edge_sizes_data[etype * batch_size + g]); } CHECK_EQ(edge_cumsum[etype][batch_size], batched_graph->NumEdges(etype)) << "Sum of the given sizes must equal to the number of edges for type " << etype; } // Construct relation graphs for unbatched graphs std::vector> rel_graphs; rel_graphs.resize(batch_size); // Loop over all edge types for (uint64_t etype = 0; etype < num_edge_types; ++etype) { auto pair = meta_graph->FindEdge(etype); const dgl_type_t src_vtype = pair.first; const dgl_type_t dst_vtype = pair.second; EdgeArray edges = batched_graph->Edges(etype); const IdType* edges_src_data = static_cast(edges.src->data); const IdType* edges_dst_data = static_cast(edges.dst->data); // Loop over all graphs to be unbatched for (uint64_t g = 0; g < batch_size; ++g) { std::vector result_src, result_dst; // Loop over the chunk of edges for the specified graph and edge type for (uint64_t e = edge_cumsum[etype][g]; e < edge_cumsum[etype][g + 1]; ++e) { // TODO(mufei): Should use array operations to implement this. result_src.push_back(edges_src_data[e] - vertex_cumsum[src_vtype][g]); result_dst.push_back(edges_dst_data[e] - vertex_cumsum[dst_vtype][g]); } HeteroGraphPtr rgptr = UnitGraph::CreateFromCOO( (src_vtype == dst_vtype) ? 1 : 2, vertex_sizes_data[src_vtype * batch_size + g], vertex_sizes_data[dst_vtype * batch_size + g], aten::VecToIdArray(result_src, sizeof(IdType) * 8), aten::VecToIdArray(result_dst, sizeof(IdType) * 8)); rel_graphs[g].push_back(rgptr); } } std::vector rst; std::vector num_nodes_per_type(num_vertex_types); for (uint64_t g = 0; g < batch_size; ++g) { for (uint64_t i = 0; i < num_vertex_types; ++i) num_nodes_per_type[i] = vertex_sizes_data[i * batch_size + g]; rst.push_back( CreateHeteroGraph(meta_graph, rel_graphs[g], num_nodes_per_type)); } return rst; } template std::vector DisjointPartitionHeteroBySizes( GraphPtr meta_graph, HeteroGraphPtr batched_graph, IdArray vertex_sizes, IdArray edge_sizes); template std::vector DisjointPartitionHeteroBySizes( GraphPtr meta_graph, HeteroGraphPtr batched_graph, IdArray vertex_sizes, IdArray edge_sizes); } // namespace dgl ================================================ FILE: src/graph/traversal.cc ================================================ /** * Copyright (c) 2018 by Contributors * @file graph/traversal.cc * @brief Graph traversal implementation */ #include "./traversal.h" #include #include #include #include "../c_api_common.h" using namespace dgl::runtime; namespace dgl { namespace traverse { namespace { // A utility view class to wrap a vector into a queue. template struct VectorQueueWrapper { std::vector* vec; size_t head = 0; explicit VectorQueueWrapper(std::vector* vec) : vec(vec) {} void push(const DType& elem) { vec->push_back(elem); } DType top() const { return vec->operator[](head); } void pop() { ++head; } bool empty() const { return head == vec->size(); } size_t size() const { return vec->size() - head; } }; // Internal function to merge multiple traversal traces into one ndarray. // It is similar to zip the vectors together. template IdArray MergeMultipleTraversals(const std::vector>& traces) { int64_t max_len = 0, total_len = 0; for (size_t i = 0; i < traces.size(); ++i) { const int64_t tracelen = traces[i].size(); max_len = std::max(max_len, tracelen); total_len += traces[i].size(); } IdArray ret = IdArray::Empty( {total_len}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0}); int64_t* ret_data = static_cast(ret->data); for (int64_t i = 0; i < max_len; ++i) { for (size_t j = 0; j < traces.size(); ++j) { const int64_t tracelen = traces[j].size(); if (i >= tracelen) { continue; } *(ret_data++) = traces[j][i]; } } return ret; } // Internal function to compute sections if multiple traversal traces // are merged into one ndarray. template IdArray ComputeMergedSections(const std::vector>& traces) { int64_t max_len = 0; for (size_t i = 0; i < traces.size(); ++i) { const int64_t tracelen = traces[i].size(); max_len = std::max(max_len, tracelen); } IdArray ret = IdArray::Empty( {max_len}, DGLDataType{kDGLInt, 64, 1}, DGLContext{kDGLCPU, 0}); int64_t* ret_data = static_cast(ret->data); for (int64_t i = 0; i < max_len; ++i) { int64_t sec_len = 0; for (size_t j = 0; j < traces.size(); ++j) { const int64_t tracelen = traces[j].size(); if (i < tracelen) { ++sec_len; } } *(ret_data++) = sec_len; } return ret; } } // namespace /** * @brief Class for representing frontiers. * * Each frontier is a list of nodes/edges (specified by their ids). * An optional tag can be specified on each node/edge (represented by an int * value). */ struct Frontiers { /** @brief a vector store for the nodes/edges in all the frontiers */ std::vector ids; /** * @brief a vector store for node/edge tags. Empty if no tags are requested */ std::vector tags; /** @brief a section vector to indicate each frontier */ std::vector sections; }; Frontiers BFSNodesFrontiers( const GraphInterface& graph, IdArray source, bool reversed) { Frontiers front; VectorQueueWrapper queue(&front.ids); auto visit = [&](const dgl_id_t v) {}; auto make_frontier = [&]() { if (!queue.empty()) { // do not push zero-length frontier front.sections.push_back(queue.size()); } }; BFSNodes(graph, source, reversed, &queue, visit, make_frontier); return front; } DGL_REGISTER_GLOBAL("traversal._CAPI_DGLBFSNodes") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; const IdArray src = args[1]; bool reversed = args[2]; const auto& front = BFSNodesFrontiers(*(g.sptr()), src, reversed); IdArray node_ids = CopyVectorToNDArray(front.ids); IdArray sections = CopyVectorToNDArray(front.sections); *rv = ConvertNDArrayVectorToPackedFunc({node_ids, sections}); }); Frontiers BFSEdgesFrontiers( const GraphInterface& graph, IdArray source, bool reversed) { Frontiers front; // NOTE: std::queue has no top() method. std::vector nodes; VectorQueueWrapper queue(&nodes); auto visit = [&](const dgl_id_t e) { front.ids.push_back(e); }; bool first_frontier = true; auto make_frontier = [&] { if (first_frontier) { first_frontier = false; // do not push the first section when doing edges } else if (!queue.empty()) { // do not push zero-length frontier front.sections.push_back(queue.size()); } }; BFSEdges(graph, source, reversed, &queue, visit, make_frontier); return front; } DGL_REGISTER_GLOBAL("traversal._CAPI_DGLBFSEdges") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; const IdArray src = args[1]; bool reversed = args[2]; const auto& front = BFSEdgesFrontiers(*(g.sptr()), src, reversed); IdArray edge_ids = CopyVectorToNDArray(front.ids); IdArray sections = CopyVectorToNDArray(front.sections); *rv = ConvertNDArrayVectorToPackedFunc({edge_ids, sections}); }); Frontiers TopologicalNodesFrontiers( const GraphInterface& graph, bool reversed) { Frontiers front; VectorQueueWrapper queue(&front.ids); auto visit = [&](const dgl_id_t v) {}; auto make_frontier = [&]() { if (!queue.empty()) { // do not push zero-length frontier front.sections.push_back(queue.size()); } }; TopologicalNodes(graph, reversed, &queue, visit, make_frontier); return front; } DGL_REGISTER_GLOBAL("traversal._CAPI_DGLTopologicalNodes") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; bool reversed = args[1]; const auto& front = TopologicalNodesFrontiers(*g.sptr(), reversed); IdArray node_ids = CopyVectorToNDArray(front.ids); IdArray sections = CopyVectorToNDArray(front.sections); *rv = ConvertNDArrayVectorToPackedFunc({node_ids, sections}); }); DGL_REGISTER_GLOBAL("traversal._CAPI_DGLDFSEdges") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; const IdArray source = args[1]; const bool reversed = args[2]; CHECK(aten::IsValidIdArray(source)) << "Invalid source node id array."; const int64_t len = source->shape[0]; const int64_t* src_data = static_cast(source->data); std::vector> edges(len); for (int64_t i = 0; i < len; ++i) { auto visit = [&](dgl_id_t e, int tag) { edges[i].push_back(e); }; DFSLabeledEdges(*g.sptr(), src_data[i], reversed, false, false, visit); } IdArray ids = MergeMultipleTraversals(edges); IdArray sections = ComputeMergedSections(edges); *rv = ConvertNDArrayVectorToPackedFunc({ids, sections}); }); DGL_REGISTER_GLOBAL("traversal._CAPI_DGLDFSLabeledEdges") .set_body([](DGLArgs args, DGLRetValue* rv) { GraphRef g = args[0]; const IdArray source = args[1]; const bool reversed = args[2]; const bool has_reverse_edge = args[3]; const bool has_nontree_edge = args[4]; const bool return_labels = args[5]; CHECK(aten::IsValidIdArray(source)) << "Invalid source node id array."; const int64_t len = source->shape[0]; const int64_t* src_data = static_cast(source->data); std::vector> edges(len); std::vector> tags; if (return_labels) { tags.resize(len); } for (int64_t i = 0; i < len; ++i) { auto visit = [&](dgl_id_t e, int tag) { edges[i].push_back(e); if (return_labels) { tags[i].push_back(tag); } }; DFSLabeledEdges( *g.sptr(), src_data[i], reversed, has_reverse_edge, has_nontree_edge, visit); } IdArray ids = MergeMultipleTraversals(edges); IdArray sections = ComputeMergedSections(edges); if (return_labels) { IdArray labels = MergeMultipleTraversals(tags); *rv = ConvertNDArrayVectorToPackedFunc({ids, labels, sections}); } else { *rv = ConvertNDArrayVectorToPackedFunc({ids, sections}); } }); } // namespace traverse } // namespace dgl ================================================ FILE: src/graph/traversal.h ================================================ /** * Copyright (c) 2018 by Contributors * @file graph/traversal.h * @brief Graph traversal routines. * * Traversal routines generate frontiers. Frontiers can be node frontiers or * edge frontiers depending on the traversal function. Each frontier is a list * of nodes/edges (specified by their ids). An optional tag can be specified for * each node/edge (represented by an int value). */ #ifndef DGL_GRAPH_TRAVERSAL_H_ #define DGL_GRAPH_TRAVERSAL_H_ #include #include #include #include namespace dgl { namespace traverse { /** * @brief Traverse the graph in a breadth-first-search (BFS) order. * * The queue object must suffice following interface: * Members: * void push(dgl_id_t); // push one node * dgl_id_t top(); // get the first node * void pop(); // pop one node * bool empty(); // return true if the queue is empty * size_t size(); // return the size of the queue * For example, std::queue is a valid queue type. * * The visit function must be compatible with following interface: * void (*visit)(dgl_id_t ); * * The frontier function must be compatible with following interface: * void (*make_frontier)(void); * * @param graph The graph. * @param sources Source nodes. * @param reversed If true, BFS follows the in-edge direction. * @param queue The queue used to do bfs. * @param visit The function to call when a node is visited. * @param make_frontier The function to indicate that a new froniter can be * made. */ template void BFSNodes( const GraphInterface& graph, IdArray source, bool reversed, Queue* queue, VisitFn visit, FrontierFn make_frontier) { const int64_t len = source->shape[0]; const int64_t* src_data = static_cast(source->data); std::vector visited(graph.NumVertices()); for (int64_t i = 0; i < len; ++i) { const dgl_id_t u = src_data[i]; visited[u] = true; visit(u); queue->push(u); } make_frontier(); const auto neighbor_iter = reversed ? &GraphInterface::PredVec : &GraphInterface::SuccVec; while (!queue->empty()) { const size_t size = queue->size(); for (size_t i = 0; i < size; ++i) { const dgl_id_t u = queue->top(); queue->pop(); for (auto v : (graph.*neighbor_iter)(u)) { if (!visited[v]) { visited[v] = true; visit(v); queue->push(v); } } } make_frontier(); } } /** * @brief Traverse the graph in a breadth-first-search (BFS) order, returning * the edges of the BFS tree. * * The queue object must suffice following interface: * Members: * void push(dgl_id_t); // push one node * dgl_id_t top(); // get the first node * void pop(); // pop one node * bool empty(); // return true if the queue is empty * size_t size(); // return the size of the queue * For example, std::queue is a valid queue type. * * The visit function must be compatible with following interface: * void (*visit)(dgl_id_t ); * * The frontier function must be compatible with following interface: * void (*make_frontier)(void); * * @param graph The graph. * @param sources Source nodes. * @param reversed If true, BFS follows the in-edge direction. * @param queue The queue used to do bfs. * @param visit The function to call when a node is visited. * The argument would be edge ID. * @param make_frontier The function to indicate that a new frontier can be * made. */ template void BFSEdges( const GraphInterface& graph, IdArray source, bool reversed, Queue* queue, VisitFn visit, FrontierFn make_frontier) { const int64_t len = source->shape[0]; const int64_t* src_data = static_cast(source->data); std::vector visited(graph.NumVertices()); for (int64_t i = 0; i < len; ++i) { const dgl_id_t u = src_data[i]; visited[u] = true; queue->push(u); } make_frontier(); const auto neighbor_iter = reversed ? &GraphInterface::InEdgeVec : &GraphInterface::OutEdgeVec; while (!queue->empty()) { const size_t size = queue->size(); for (size_t i = 0; i < size; ++i) { const dgl_id_t u = queue->top(); queue->pop(); for (auto e : (graph.*neighbor_iter)(u)) { const auto uv = graph.FindEdge(e); const dgl_id_t v = (reversed ? uv.first : uv.second); if (!visited[v]) { visited[v] = true; visit(e); queue->push(v); } } } make_frontier(); } } /** * @brief Traverse the graph in topological order. * * The queue object must suffice following interface: * Members: * void push(dgl_id_t); // push one node * dgl_id_t top(); // get the first node * void pop(); // pop one node * bool empty(); // return true if the queue is empty * size_t size(); // return the size of the queue * For example, std::queue is a valid queue type. * * The visit function must be compatible with following interface: * void (*visit)(dgl_id_t ); * * The frontier function must be compatible with following interface: * void (*make_frontier)(void); * * @param graph The graph. * @param reversed If true, follows the in-edge direction. * @param queue The queue used to do bfs. * @param visit The function to call when a node is visited. * @param make_frontier The function to indicate that a new froniter can be * made. */ template void TopologicalNodes( const GraphInterface& graph, bool reversed, Queue* queue, VisitFn visit, FrontierFn make_frontier) { const auto get_degree = reversed ? &GraphInterface::OutDegree : &GraphInterface::InDegree; const auto neighbor_iter = reversed ? &GraphInterface::PredVec : &GraphInterface::SuccVec; uint64_t num_visited_nodes = 0; std::vector degrees(graph.NumVertices(), 0); for (dgl_id_t vid = 0; vid < graph.NumVertices(); ++vid) { degrees[vid] = (graph.*get_degree)(vid); if (degrees[vid] == 0) { visit(vid); queue->push(vid); ++num_visited_nodes; } } make_frontier(); while (!queue->empty()) { const size_t size = queue->size(); for (size_t i = 0; i < size; ++i) { const dgl_id_t u = queue->top(); queue->pop(); for (auto v : (graph.*neighbor_iter)(u)) { if (--(degrees[v]) == 0) { visit(v); queue->push(v); ++num_visited_nodes; } } } make_frontier(); } if (num_visited_nodes != graph.NumVertices()) { LOG(FATAL) << "Error in topological traversal: loop detected in the given graph."; } } /** @brief Tags for ``DFSEdges``. */ enum DFSEdgeTag { kForward = 0, kReverse, kNonTree, }; /** * @brief Traverse the graph in a depth-first-search (DFS) order. * * The traversal visit edges in its DFS order. Edges have three tags: * FORWARD(0), REVERSE(1), NONTREE(2). * * A FORWARD edge is one in which `u` has been visisted but `v` has not. * A REVERSE edge is one in which both `u` and `v` have been visisted and the * edge is in the DFS tree. A NONTREE edge is one in which both `u` and `v` have * been visisted but the edge is NOT in the DFS tree. * * @param source Source node. * @param reversed If true, DFS follows the in-edge direction. * @param has_reverse_edge If true, REVERSE edges are included. * @param has_nontree_edge If true, NONTREE edges are included. * @param visit The function to call when an edge is visited; the edge id and * its tag will be given as the arguments. */ template void DFSLabeledEdges( const GraphInterface& graph, dgl_id_t source, bool reversed, bool has_reverse_edge, bool has_nontree_edge, VisitFn visit) { const auto succ = reversed ? &GraphInterface::PredVec : &GraphInterface::SuccVec; const auto out_edge = reversed ? &GraphInterface::InEdgeVec : &GraphInterface::OutEdgeVec; if ((graph.*succ)(source).size() == 0) { // no out-going edges from the source node return; } typedef std::tuple StackEntry; std::stack stack; std::vector visited(graph.NumVertices()); visited[source] = true; stack.push(std::make_tuple(source, 0, false)); dgl_id_t u = 0; size_t i = 0; bool on_tree = false; while (!stack.empty()) { std::tie(u, i, on_tree) = stack.top(); const dgl_id_t v = (graph.*succ)(u)[i]; const dgl_id_t uv = (graph.*out_edge)(u)[i]; if (visited[v]) { if (!on_tree && has_nontree_edge) { visit(uv, kNonTree); } else if (on_tree && has_reverse_edge) { visit(uv, kReverse); } stack.pop(); // find next one. if (i < (graph.*succ)(u).size() - 1) { stack.push(std::make_tuple(u, i + 1, false)); } } else { visited[v] = true; std::get<2>(stack.top()) = true; visit(uv, kForward); // expand if ((graph.*succ)(v).size() > 0) { stack.push(std::make_tuple(v, 0, false)); } } } } } // namespace traverse } // namespace dgl #endif // DGL_GRAPH_TRAVERSAL_H_ ================================================ FILE: src/graph/unit_graph.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file graph/unit_graph.cc * @brief UnitGraph graph implementation */ #include "./unit_graph.h" #include #include #include #include #include "../c_api_common.h" #include "./serialize/dglstream.h" namespace dgl { namespace { using namespace dgl::aten; // create metagraph of one node type inline GraphPtr CreateUnitGraphMetaGraph1() { // a self-loop edge 0->0 std::vector row_vec(1, 0); std::vector col_vec(1, 0); IdArray row = aten::VecToIdArray(row_vec); IdArray col = aten::VecToIdArray(col_vec); GraphPtr g = ImmutableGraph::CreateFromCOO(1, row, col); return g; } // create metagraph of two node types inline GraphPtr CreateUnitGraphMetaGraph2() { // an edge 0->1 std::vector row_vec(1, 0); std::vector col_vec(1, 1); IdArray row = aten::VecToIdArray(row_vec); IdArray col = aten::VecToIdArray(col_vec); GraphPtr g = ImmutableGraph::CreateFromCOO(2, row, col); return g; } inline GraphPtr CreateUnitGraphMetaGraph(int num_vtypes) { static GraphPtr mg1 = CreateUnitGraphMetaGraph1(); static GraphPtr mg2 = CreateUnitGraphMetaGraph2(); if (num_vtypes == 1) return mg1; else if (num_vtypes == 2) return mg2; else LOG(FATAL) << "Invalid number of vertex types. Must be 1 or 2."; return {}; } }; // namespace ////////////////////////////////////////////////////////// // // COO graph implementation // ////////////////////////////////////////////////////////// class UnitGraph::COO : public BaseHeteroGraph { public: COO(GraphPtr metagraph, int64_t num_src, int64_t num_dst, IdArray src, IdArray dst, bool row_sorted = false, bool col_sorted = false) : BaseHeteroGraph(metagraph) { CHECK(aten::IsValidIdArray(src)); CHECK(aten::IsValidIdArray(dst)); CHECK_EQ(src->shape[0], dst->shape[0]) << "Input arrays should have the same length."; adj_ = aten::COOMatrix{num_src, num_dst, src, dst, NullArray(), row_sorted, col_sorted}; } COO(GraphPtr metagraph, const aten::COOMatrix& coo) : BaseHeteroGraph(metagraph), adj_(coo) { // Data index should not be inherited. Edges in COO format are always // assigned ids from 0 to num_edges - 1. CHECK(!COOHasData(coo)) << "[BUG] COO should not contain data."; adj_.data = aten::NullArray(); } COO() { // set magic num_rows/num_cols to mark it as undefined // adj_.num_rows == 0 and adj_.num_cols == 0 means empty UnitGraph which is // supported adj_.num_rows = -1; adj_.num_cols = -1; }; bool defined() const { return (adj_.num_rows >= 0) && (adj_.num_cols >= 0); } inline dgl_type_t SrcType() const { return 0; } inline dgl_type_t DstType() const { return NumVertexTypes() == 1 ? 0 : 1; } inline dgl_type_t EdgeType() const { return 0; } HeteroGraphPtr GetRelationGraph(dgl_type_t etype) const override { LOG(FATAL) << "The method shouldn't be called for UnitGraph graph. " << "The relation graph is simply this graph itself."; return {}; } void AddVertices(dgl_type_t vtype, uint64_t num_vertices) override { LOG(FATAL) << "UnitGraph graph is not mutable."; } void AddEdge(dgl_type_t etype, dgl_id_t src, dgl_id_t dst) override { LOG(FATAL) << "UnitGraph graph is not mutable."; } void AddEdges(dgl_type_t etype, IdArray src_ids, IdArray dst_ids) override { LOG(FATAL) << "UnitGraph graph is not mutable."; } void Clear() override { LOG(FATAL) << "UnitGraph graph is not mutable."; } DGLDataType DataType() const override { return adj_.row->dtype; } DGLContext Context() const override { return adj_.row->ctx; } bool IsPinned() const override { return adj_.is_pinned; } uint8_t NumBits() const override { return adj_.row->dtype.bits; } COO AsNumBits(uint8_t bits) const { if (NumBits() == bits) return *this; COO ret( meta_graph_, adj_.num_rows, adj_.num_cols, aten::AsNumBits(adj_.row, bits), aten::AsNumBits(adj_.col, bits)); return ret; } COO CopyTo(const DGLContext& ctx) const { if (Context() == ctx) return *this; return COO(meta_graph_, adj_.CopyTo(ctx)); } /** * @brief Copy the adj_ to pinned memory. * @return COOMatrix of the COO graph. */ COO PinMemory() { if (adj_.is_pinned) return *this; return COO(meta_graph_, adj_.PinMemory()); } /** @brief Pin the adj_: COOMatrix of the COO graph. */ void PinMemory_() { adj_.PinMemory_(); } /** @brief Unpin the adj_: COOMatrix of the COO graph. */ void UnpinMemory_() { adj_.UnpinMemory_(); } /** @brief Record stream for the adj_: COOMatrix of the COO graph. */ void RecordStream(DGLStreamHandle stream) override { adj_.RecordStream(stream); } bool IsMultigraph() const override { return aten::COOHasDuplicate(adj_); } bool IsReadonly() const override { return true; } uint64_t NumVertices(dgl_type_t vtype) const override { if (vtype == SrcType()) { return adj_.num_rows; } else if (vtype == DstType()) { return adj_.num_cols; } else { LOG(FATAL) << "Invalid vertex type: " << vtype; return 0; } } uint64_t NumEdges(dgl_type_t etype) const override { return adj_.row->shape[0]; } bool HasVertex(dgl_type_t vtype, dgl_id_t vid) const override { return vid < NumVertices(vtype); } BoolArray HasVertices(dgl_type_t vtype, IdArray vids) const override { LOG(FATAL) << "Not enabled for COO graph"; return {}; } bool HasEdgeBetween( dgl_type_t etype, dgl_id_t src, dgl_id_t dst) const override { CHECK(HasVertex(SrcType(), src)) << "Invalid src vertex id: " << src; CHECK(HasVertex(DstType(), dst)) << "Invalid dst vertex id: " << dst; return aten::COOIsNonZero(adj_, src, dst); } BoolArray HasEdgesBetween( dgl_type_t etype, IdArray src_ids, IdArray dst_ids) const override { CHECK(aten::IsValidIdArray(src_ids)) << "Invalid vertex id array."; CHECK(aten::IsValidIdArray(dst_ids)) << "Invalid vertex id array."; return aten::COOIsNonZero(adj_, src_ids, dst_ids); } IdArray Predecessors(dgl_type_t etype, dgl_id_t dst) const override { CHECK(HasVertex(DstType(), dst)) << "Invalid dst vertex id: " << dst; return aten::COOGetRowDataAndIndices(aten::COOTranspose(adj_), dst).second; } IdArray Successors(dgl_type_t etype, dgl_id_t src) const override { CHECK(HasVertex(SrcType(), src)) << "Invalid src vertex id: " << src; return aten::COOGetRowDataAndIndices(adj_, src).second; } IdArray EdgeId(dgl_type_t etype, dgl_id_t src, dgl_id_t dst) const override { CHECK(HasVertex(SrcType(), src)) << "Invalid src vertex id: " << src; CHECK(HasVertex(DstType(), dst)) << "Invalid dst vertex id: " << dst; return aten::COOGetAllData(adj_, src, dst); } EdgeArray EdgeIdsAll( dgl_type_t etype, IdArray src, IdArray dst) const override { CHECK(aten::IsValidIdArray(src)) << "Invalid vertex id array."; CHECK(aten::IsValidIdArray(dst)) << "Invalid vertex id array."; const auto& arrs = aten::COOGetDataAndIndices(adj_, src, dst); return EdgeArray{arrs[0], arrs[1], arrs[2]}; } IdArray EdgeIdsOne( dgl_type_t etype, IdArray src, IdArray dst) const override { return aten::COOGetData(adj_, src, dst); } std::pair FindEdge( dgl_type_t etype, dgl_id_t eid) const override { CHECK(eid < NumEdges(etype)) << "Invalid edge id: " << eid; const dgl_id_t src = aten::IndexSelect(adj_.row, eid); const dgl_id_t dst = aten::IndexSelect(adj_.col, eid); return std::pair(src, dst); } EdgeArray FindEdges(dgl_type_t etype, IdArray eids) const override { CHECK(aten::IsValidIdArray(eids)) << "Invalid edge id array"; BUG_IF_FAIL(aten::IsNullArray(adj_.data)) << "FindEdges requires the internal COO matrix not having EIDs."; return EdgeArray{ aten::IndexSelect(adj_.row, eids), aten::IndexSelect(adj_.col, eids), eids}; } EdgeArray InEdges(dgl_type_t etype, dgl_id_t vid) const override { IdArray ret_src, ret_eid; std::tie(ret_eid, ret_src) = aten::COOGetRowDataAndIndices(aten::COOTranspose(adj_), vid); IdArray ret_dst = aten::Full(vid, ret_src->shape[0], NumBits(), ret_src->ctx); return EdgeArray{ret_src, ret_dst, ret_eid}; } EdgeArray InEdges(dgl_type_t etype, IdArray vids) const override { CHECK(aten::IsValidIdArray(vids)) << "Invalid vertex id array."; auto coosubmat = aten::COOSliceRows(aten::COOTranspose(adj_), vids); auto row = aten::IndexSelect(vids, coosubmat.row); return EdgeArray{coosubmat.col, row, coosubmat.data}; } EdgeArray OutEdges(dgl_type_t etype, dgl_id_t vid) const override { IdArray ret_dst, ret_eid; std::tie(ret_eid, ret_dst) = aten::COOGetRowDataAndIndices(adj_, vid); IdArray ret_src = aten::Full(vid, ret_dst->shape[0], NumBits(), ret_dst->ctx); return EdgeArray{ret_src, ret_dst, ret_eid}; } EdgeArray OutEdges(dgl_type_t etype, IdArray vids) const override { CHECK(aten::IsValidIdArray(vids)) << "Invalid vertex id array."; auto coosubmat = aten::COOSliceRows(adj_, vids); auto row = aten::IndexSelect(vids, coosubmat.row); return EdgeArray{row, coosubmat.col, coosubmat.data}; } EdgeArray Edges( dgl_type_t etype, const std::string& order = "") const override { CHECK(order.empty() || order == std::string("eid")) << "COO only support Edges of order \"eid\", but got \"" << order << "\"."; IdArray rst_eid = aten::Range(0, NumEdges(etype), NumBits(), Context()); return EdgeArray{adj_.row, adj_.col, rst_eid}; } uint64_t InDegree(dgl_type_t etype, dgl_id_t vid) const override { CHECK(HasVertex(DstType(), vid)) << "Invalid dst vertex id: " << vid; return aten::COOGetRowNNZ(aten::COOTranspose(adj_), vid); } DegreeArray InDegrees(dgl_type_t etype, IdArray vids) const override { CHECK(aten::IsValidIdArray(vids)) << "Invalid vertex id array."; return aten::COOGetRowNNZ(aten::COOTranspose(adj_), vids); } uint64_t OutDegree(dgl_type_t etype, dgl_id_t vid) const override { CHECK(HasVertex(SrcType(), vid)) << "Invalid src vertex id: " << vid; return aten::COOGetRowNNZ(adj_, vid); } DegreeArray OutDegrees(dgl_type_t etype, IdArray vids) const override { CHECK(aten::IsValidIdArray(vids)) << "Invalid vertex id array."; return aten::COOGetRowNNZ(adj_, vids); } DGLIdIters SuccVec(dgl_type_t etype, dgl_id_t vid) const override { LOG(INFO) << "Not enabled for COO graph."; return {}; } DGLIdIters OutEdgeVec(dgl_type_t etype, dgl_id_t vid) const override { LOG(INFO) << "Not enabled for COO graph."; return {}; } DGLIdIters PredVec(dgl_type_t etype, dgl_id_t vid) const override { LOG(INFO) << "Not enabled for COO graph."; return {}; } DGLIdIters InEdgeVec(dgl_type_t etype, dgl_id_t vid) const override { LOG(INFO) << "Not enabled for COO graph."; return {}; } std::vector GetAdj( dgl_type_t etype, bool transpose, const std::string& fmt) const override { CHECK(fmt == "coo") << "Not valid adj format request."; if (transpose) { return {aten::HStack(adj_.col, adj_.row)}; } else { return {aten::HStack(adj_.row, adj_.col)}; } } aten::COOMatrix GetCOOMatrix(dgl_type_t etype) const override { return adj_; } aten::CSRMatrix GetCSCMatrix(dgl_type_t etype) const override { LOG(FATAL) << "Not enabled for COO graph"; return aten::CSRMatrix(); } aten::CSRMatrix GetCSRMatrix(dgl_type_t etype) const override { LOG(FATAL) << "Not enabled for COO graph"; return aten::CSRMatrix(); } SparseFormat SelectFormat( dgl_type_t etype, dgl_format_code_t preferred_formats) const override { LOG(FATAL) << "Not enabled for COO graph"; return SparseFormat::kCOO; } dgl_format_code_t GetAllowedFormats() const override { LOG(FATAL) << "Not enabled for COO graph"; return 0; } dgl_format_code_t GetCreatedFormats() const override { LOG(FATAL) << "Not enabled for COO graph"; return 0; } HeteroSubgraph VertexSubgraph( const std::vector& vids) const override { CHECK_EQ(vids.size(), NumVertexTypes()) << "Number of vertex types mismatch"; auto srcvids = vids[SrcType()], dstvids = vids[DstType()]; CHECK(aten::IsValidIdArray(srcvids)) << "Invalid vertex id array."; CHECK(aten::IsValidIdArray(dstvids)) << "Invalid vertex id array."; HeteroSubgraph subg; const auto& submat = aten::COOSliceMatrix(adj_, srcvids, dstvids); DGLContext ctx = aten::GetContextOf(vids); IdArray sub_eids = aten::Range(0, submat.data->shape[0], NumBits(), ctx); subg.graph = std::make_shared( meta_graph(), submat.num_rows, submat.num_cols, submat.row, submat.col); subg.induced_vertices = vids; subg.induced_edges.emplace_back(submat.data); return subg; } HeteroSubgraph EdgeSubgraph( const std::vector& eids, bool preserve_nodes = false) const override { CHECK_EQ(eids.size(), 1) << "Edge type number mismatch."; HeteroSubgraph subg; if (!preserve_nodes) { IdArray new_src = aten::IndexSelect(adj_.row, eids[0]); IdArray new_dst = aten::IndexSelect(adj_.col, eids[0]); subg.induced_vertices.emplace_back(aten::Relabel_({new_src})); subg.induced_vertices.emplace_back(aten::Relabel_({new_dst})); const auto new_nsrc = subg.induced_vertices[0]->shape[0]; const auto new_ndst = subg.induced_vertices[1]->shape[0]; subg.graph = std::make_shared( meta_graph(), new_nsrc, new_ndst, new_src, new_dst); subg.induced_edges = eids; } else { IdArray new_src = aten::IndexSelect(adj_.row, eids[0]); IdArray new_dst = aten::IndexSelect(adj_.col, eids[0]); subg.induced_vertices.emplace_back( aten::NullArray(DGLDataType{kDGLInt, NumBits(), 1}, Context())); subg.induced_vertices.emplace_back( aten::NullArray(DGLDataType{kDGLInt, NumBits(), 1}, Context())); subg.graph = std::make_shared( meta_graph(), NumVertices(SrcType()), NumVertices(DstType()), new_src, new_dst); subg.induced_edges = eids; } return subg; } HeteroGraphPtr GetGraphInFormat(dgl_format_code_t formats) const override { LOG(FATAL) << "Not enabled for COO graph."; return nullptr; } aten::COOMatrix adj() const { return adj_; } /** * @brief Determines whether the graph is "hypersparse", i.e. having * significantly more nodes than edges. */ bool IsHypersparse() const { return (NumVertices(SrcType()) / 8 > NumEdges(EdgeType())) && (NumVertices(SrcType()) > 1000000); } bool Load(dmlc::Stream* fs) { auto meta_imgraph = Serializer::make_shared(); CHECK(fs->Read(&meta_imgraph)) << "Invalid meta graph"; meta_graph_ = meta_imgraph; CHECK(fs->Read(&adj_)) << "Invalid adj matrix"; return true; } void Save(dmlc::Stream* fs) const { auto meta_graph_ptr = ImmutableGraph::ToImmutable(meta_graph()); fs->Write(meta_graph_ptr); fs->Write(adj_); } private: friend class Serializer; /** @brief internal adjacency matrix. Data array is empty */ aten::COOMatrix adj_; }; ////////////////////////////////////////////////////////// // // CSR graph implementation // ////////////////////////////////////////////////////////// /** @brief CSR graph */ class UnitGraph::CSR : public BaseHeteroGraph { public: CSR(GraphPtr metagraph, int64_t num_src, int64_t num_dst, IdArray indptr, IdArray indices, IdArray edge_ids) : BaseHeteroGraph(metagraph) { CHECK(aten::IsValidIdArray(indptr)); CHECK(aten::IsValidIdArray(indices)); if (aten::IsValidIdArray(edge_ids)) CHECK( (indices->shape[0] == edge_ids->shape[0]) || aten::IsNullArray(edge_ids)) << "edge id arrays should have the same length as indices if not " "empty"; CHECK_EQ(num_src, indptr->shape[0] - 1) << "number of nodes do not match the length of indptr minus 1."; adj_ = aten::CSRMatrix{num_src, num_dst, indptr, indices, edge_ids}; } CSR(GraphPtr metagraph, const aten::CSRMatrix& csr) : BaseHeteroGraph(metagraph), adj_(csr) {} CSR() { // set magic num_rows/num_cols to mark it as undefined // adj_.num_rows == 0 and adj_.num_cols == 0 means empty UnitGraph which is // supported adj_.num_rows = -1; adj_.num_cols = -1; }; bool defined() const { return (adj_.num_rows >= 0) || (adj_.num_cols >= 0); } inline dgl_type_t SrcType() const { return 0; } inline dgl_type_t DstType() const { return NumVertexTypes() == 1 ? 0 : 1; } inline dgl_type_t EdgeType() const { return 0; } HeteroGraphPtr GetRelationGraph(dgl_type_t etype) const override { LOG(FATAL) << "The method shouldn't be called for UnitGraph graph. " << "The relation graph is simply this graph itself."; return {}; } void AddVertices(dgl_type_t vtype, uint64_t num_vertices) override { LOG(FATAL) << "UnitGraph graph is not mutable."; } void AddEdge(dgl_type_t etype, dgl_id_t src, dgl_id_t dst) override { LOG(FATAL) << "UnitGraph graph is not mutable."; } void AddEdges(dgl_type_t etype, IdArray src_ids, IdArray dst_ids) override { LOG(FATAL) << "UnitGraph graph is not mutable."; } void Clear() override { LOG(FATAL) << "UnitGraph graph is not mutable."; } DGLDataType DataType() const override { return adj_.indices->dtype; } DGLContext Context() const override { return adj_.indices->ctx; } bool IsPinned() const override { return adj_.is_pinned; } uint8_t NumBits() const override { return adj_.indices->dtype.bits; } CSR AsNumBits(uint8_t bits) const { if (NumBits() == bits) { return *this; } else { CSR ret( meta_graph_, adj_.num_rows, adj_.num_cols, aten::AsNumBits(adj_.indptr, bits), aten::AsNumBits(adj_.indices, bits), aten::AsNumBits(adj_.data, bits)); return ret; } } CSR CopyTo(const DGLContext& ctx) const { if (Context() == ctx) { return *this; } else { return CSR(meta_graph_, adj_.CopyTo(ctx)); } } /** * @brief Copy the adj_ to pinned memory. * @return CSRMatrix of the CSR graph. */ CSR PinMemory() { if (adj_.is_pinned) return *this; return CSR(meta_graph_, adj_.PinMemory()); } /** @brief Pin the adj_: CSRMatrix of the CSR graph. */ void PinMemory_() { adj_.PinMemory_(); } /** @brief Unpin the adj_: CSRMatrix of the CSR graph. */ void UnpinMemory_() { adj_.UnpinMemory_(); } /** @brief Record stream for the adj_: CSRMatrix of the CSR graph. */ void RecordStream(DGLStreamHandle stream) override { adj_.RecordStream(stream); } bool IsMultigraph() const override { return aten::CSRHasDuplicate(adj_); } bool IsReadonly() const override { return true; } uint64_t NumVertices(dgl_type_t vtype) const override { if (vtype == SrcType()) { return adj_.num_rows; } else if (vtype == DstType()) { return adj_.num_cols; } else { LOG(FATAL) << "Invalid vertex type: " << vtype; return 0; } } uint64_t NumEdges(dgl_type_t etype) const override { return adj_.indices->shape[0]; } bool HasVertex(dgl_type_t vtype, dgl_id_t vid) const override { return vid < NumVertices(vtype); } BoolArray HasVertices(dgl_type_t vtype, IdArray vids) const override { LOG(FATAL) << "Not enabled for COO graph"; return {}; } bool HasEdgeBetween( dgl_type_t etype, dgl_id_t src, dgl_id_t dst) const override { CHECK(HasVertex(SrcType(), src)) << "Invalid src vertex id: " << src; CHECK(HasVertex(DstType(), dst)) << "Invalid dst vertex id: " << dst; return aten::CSRIsNonZero(adj_, src, dst); } BoolArray HasEdgesBetween( dgl_type_t etype, IdArray src_ids, IdArray dst_ids) const override { CHECK(aten::IsValidIdArray(src_ids)) << "Invalid vertex id array."; CHECK(aten::IsValidIdArray(dst_ids)) << "Invalid vertex id array."; return aten::CSRIsNonZero(adj_, src_ids, dst_ids); } IdArray Predecessors(dgl_type_t etype, dgl_id_t dst) const override { LOG(INFO) << "Not enabled for CSR graph."; return {}; } IdArray Successors(dgl_type_t etype, dgl_id_t src) const override { CHECK(HasVertex(SrcType(), src)) << "Invalid src vertex id: " << src; return aten::CSRGetRowColumnIndices(adj_, src); } IdArray EdgeId(dgl_type_t etype, dgl_id_t src, dgl_id_t dst) const override { CHECK(HasVertex(SrcType(), src)) << "Invalid src vertex id: " << src; CHECK(HasVertex(DstType(), dst)) << "Invalid dst vertex id: " << dst; return aten::CSRGetAllData(adj_, src, dst); } EdgeArray EdgeIdsAll( dgl_type_t etype, IdArray src, IdArray dst) const override { CHECK(aten::IsValidIdArray(src)) << "Invalid vertex id array."; CHECK(aten::IsValidIdArray(dst)) << "Invalid vertex id array."; const auto& arrs = aten::CSRGetDataAndIndices(adj_, src, dst); return EdgeArray{arrs[0], arrs[1], arrs[2]}; } IdArray EdgeIdsOne( dgl_type_t etype, IdArray src, IdArray dst) const override { return aten::CSRGetData(adj_, src, dst); } std::pair FindEdge( dgl_type_t etype, dgl_id_t eid) const override { LOG(FATAL) << "Not enabled for CSR graph."; return {}; } EdgeArray FindEdges(dgl_type_t etype, IdArray eids) const override { LOG(FATAL) << "Not enabled for CSR graph."; return {}; } EdgeArray InEdges(dgl_type_t etype, dgl_id_t vid) const override { LOG(FATAL) << "Not enabled for CSR graph."; return {}; } EdgeArray InEdges(dgl_type_t etype, IdArray vids) const override { LOG(FATAL) << "Not enabled for CSR graph."; return {}; } EdgeArray OutEdges(dgl_type_t etype, dgl_id_t vid) const override { CHECK(HasVertex(SrcType(), vid)) << "Invalid src vertex id: " << vid; IdArray ret_dst = aten::CSRGetRowColumnIndices(adj_, vid); IdArray ret_eid = aten::CSRGetRowData(adj_, vid); IdArray ret_src = aten::Full(vid, ret_dst->shape[0], NumBits(), ret_dst->ctx); return EdgeArray{ret_src, ret_dst, ret_eid}; } EdgeArray OutEdges(dgl_type_t etype, IdArray vids) const override { CHECK(aten::IsValidIdArray(vids)) << "Invalid vertex id array."; auto csrsubmat = aten::CSRSliceRows(adj_, vids); auto coosubmat = aten::CSRToCOO(csrsubmat, false); // Note that the row id in the csr submat is relabled, so // we need to recover it using an index select. auto row = aten::IndexSelect(vids, coosubmat.row); return EdgeArray{row, coosubmat.col, coosubmat.data}; } EdgeArray Edges( dgl_type_t etype, const std::string& order = "") const override { CHECK(order.empty() || order == std::string("srcdst")) << "CSR only support Edges of order \"srcdst\"," << " but got \"" << order << "\"."; auto coo = aten::CSRToCOO(adj_, false); if (order == std::string("srcdst")) { // make sure the coo is sorted if an order is requested coo = aten::COOSort(coo, true); } return EdgeArray{coo.row, coo.col, coo.data}; } uint64_t InDegree(dgl_type_t etype, dgl_id_t vid) const override { LOG(FATAL) << "Not enabled for CSR graph."; return {}; } DegreeArray InDegrees(dgl_type_t etype, IdArray vids) const override { LOG(FATAL) << "Not enabled for CSR graph."; return {}; } uint64_t OutDegree(dgl_type_t etype, dgl_id_t vid) const override { CHECK(HasVertex(SrcType(), vid)) << "Invalid src vertex id: " << vid; return aten::CSRGetRowNNZ(adj_, vid); } DegreeArray OutDegrees(dgl_type_t etype, IdArray vids) const override { CHECK(aten::IsValidIdArray(vids)) << "Invalid vertex id array."; return aten::CSRGetRowNNZ(adj_, vids); } DGLIdIters SuccVec(dgl_type_t etype, dgl_id_t vid) const override { // TODO(minjie): This still assumes the data type and device context // of this graph. Should fix later. CHECK_EQ(NumBits(), 64); const dgl_id_t* indptr_data = static_cast(adj_.indptr->data); const dgl_id_t* indices_data = static_cast(adj_.indices->data); const dgl_id_t start = indptr_data[vid]; const dgl_id_t end = indptr_data[vid + 1]; return DGLIdIters(indices_data + start, indices_data + end); } DGLIdIters32 SuccVec32(dgl_type_t etype, dgl_id_t vid) { // TODO(minjie): This still assumes the data type and device context // of this graph. Should fix later. const int32_t* indptr_data = static_cast(adj_.indptr->data); const int32_t* indices_data = static_cast(adj_.indices->data); const int32_t start = indptr_data[vid]; const int32_t end = indptr_data[vid + 1]; return DGLIdIters32(indices_data + start, indices_data + end); } DGLIdIters OutEdgeVec(dgl_type_t etype, dgl_id_t vid) const override { // TODO(minjie): This still assumes the data type and device context // of this graph. Should fix later. CHECK_EQ(NumBits(), 64); const dgl_id_t* indptr_data = static_cast(adj_.indptr->data); const dgl_id_t* eid_data = static_cast(adj_.data->data); const dgl_id_t start = indptr_data[vid]; const dgl_id_t end = indptr_data[vid + 1]; return DGLIdIters(eid_data + start, eid_data + end); } DGLIdIters PredVec(dgl_type_t etype, dgl_id_t vid) const override { LOG(FATAL) << "Not enabled for CSR graph."; return {}; } DGLIdIters InEdgeVec(dgl_type_t etype, dgl_id_t vid) const override { LOG(FATAL) << "Not enabled for CSR graph."; return {}; } std::vector GetAdj( dgl_type_t etype, bool transpose, const std::string& fmt) const override { CHECK(!transpose && fmt == "csr") << "Not valid adj format request."; return {adj_.indptr, adj_.indices, adj_.data}; } aten::COOMatrix GetCOOMatrix(dgl_type_t etype) const override { LOG(FATAL) << "Not enabled for CSR graph"; return aten::COOMatrix(); } aten::CSRMatrix GetCSCMatrix(dgl_type_t etype) const override { LOG(FATAL) << "Not enabled for CSR graph"; return aten::CSRMatrix(); } aten::CSRMatrix GetCSRMatrix(dgl_type_t etype) const override { return adj_; } SparseFormat SelectFormat( dgl_type_t etype, dgl_format_code_t preferred_formats) const override { LOG(FATAL) << "Not enabled for CSR graph"; return SparseFormat::kCSR; } dgl_format_code_t GetAllowedFormats() const override { LOG(FATAL) << "Not enabled for COO graph"; return 0; } dgl_format_code_t GetCreatedFormats() const override { LOG(FATAL) << "Not enabled for CSR graph"; return 0; } HeteroSubgraph VertexSubgraph( const std::vector& vids) const override { CHECK_EQ(vids.size(), NumVertexTypes()) << "Number of vertex types mismatch"; auto srcvids = vids[SrcType()], dstvids = vids[DstType()]; CHECK(aten::IsValidIdArray(srcvids)) << "Invalid vertex id array."; CHECK(aten::IsValidIdArray(dstvids)) << "Invalid vertex id array."; HeteroSubgraph subg; const auto& submat = aten::CSRSliceMatrix(adj_, srcvids, dstvids); DGLContext ctx = aten::GetContextOf(vids); IdArray sub_eids = aten::Range(0, submat.data->shape[0], NumBits(), ctx); subg.graph = std::make_shared( meta_graph(), submat.num_rows, submat.num_cols, submat.indptr, submat.indices, sub_eids); subg.induced_vertices = vids; subg.induced_edges.emplace_back(submat.data); return subg; } HeteroSubgraph EdgeSubgraph( const std::vector& eids, bool preserve_nodes = false) const override { LOG(FATAL) << "Not enabled for CSR graph."; return {}; } HeteroGraphPtr GetGraphInFormat(dgl_format_code_t formats) const override { LOG(FATAL) << "Not enabled for CSR graph."; return nullptr; } aten::CSRMatrix adj() const { return adj_; } bool Load(dmlc::Stream* fs) { auto meta_imgraph = Serializer::make_shared(); CHECK(fs->Read(&meta_imgraph)) << "Invalid meta graph"; meta_graph_ = meta_imgraph; CHECK(fs->Read(&adj_)) << "Invalid adj matrix"; return true; } void Save(dmlc::Stream* fs) const { auto meta_graph_ptr = ImmutableGraph::ToImmutable(meta_graph()); fs->Write(meta_graph_ptr); fs->Write(adj_); } private: friend class Serializer; /** @brief internal adjacency matrix. Data array stores edge ids */ aten::CSRMatrix adj_; }; ////////////////////////////////////////////////////////// // // unit graph implementation // ////////////////////////////////////////////////////////// DGLDataType UnitGraph::DataType() const { return GetAny()->DataType(); } DGLContext UnitGraph::Context() const { return GetAny()->Context(); } bool UnitGraph::IsPinned() const { return GetAny()->IsPinned(); } uint8_t UnitGraph::NumBits() const { return GetAny()->NumBits(); } bool UnitGraph::IsMultigraph() const { const SparseFormat fmt = SelectFormat(CSC_CODE); const auto ptr = GetFormat(fmt); return ptr->IsMultigraph(); } uint64_t UnitGraph::NumVertices(dgl_type_t vtype) const { const SparseFormat fmt = SelectFormat(ALL_CODE); const auto ptr = GetFormat(fmt); // TODO(BarclayII): we have a lot of special handling for CSC. // Need to have a UnitGraph::CSC backend instead. if (fmt == SparseFormat::kCSC) vtype = (vtype == SrcType()) ? DstType() : SrcType(); return ptr->NumVertices(vtype); } uint64_t UnitGraph::NumEdges(dgl_type_t etype) const { return GetAny()->NumEdges(etype); } bool UnitGraph::HasVertex(dgl_type_t vtype, dgl_id_t vid) const { const SparseFormat fmt = SelectFormat(ALL_CODE); const auto ptr = GetFormat(fmt); if (fmt == SparseFormat::kCSC) vtype = (vtype == SrcType()) ? DstType() : SrcType(); return ptr->HasVertex(vtype, vid); } BoolArray UnitGraph::HasVertices(dgl_type_t vtype, IdArray vids) const { CHECK(aten::IsValidIdArray(vids)) << "Invalid id array input"; return aten::LT(vids, NumVertices(vtype)); } bool UnitGraph::HasEdgeBetween( dgl_type_t etype, dgl_id_t src, dgl_id_t dst) const { const SparseFormat fmt = SelectFormat(CSC_CODE); const auto ptr = GetFormat(fmt); if (fmt == SparseFormat::kCSC) return ptr->HasEdgeBetween(etype, dst, src); else return ptr->HasEdgeBetween(etype, src, dst); } BoolArray UnitGraph::HasEdgesBetween( dgl_type_t etype, IdArray src, IdArray dst) const { const SparseFormat fmt = SelectFormat(CSC_CODE); const auto ptr = GetFormat(fmt); if (fmt == SparseFormat::kCSC) return ptr->HasEdgesBetween(etype, dst, src); else return ptr->HasEdgesBetween(etype, src, dst); } IdArray UnitGraph::Predecessors(dgl_type_t etype, dgl_id_t dst) const { const SparseFormat fmt = SelectFormat(CSC_CODE); const auto ptr = GetFormat(fmt); if (fmt == SparseFormat::kCSC) return ptr->Successors(etype, dst); else return ptr->Predecessors(etype, dst); } IdArray UnitGraph::Successors(dgl_type_t etype, dgl_id_t src) const { const SparseFormat fmt = SelectFormat(CSR_CODE); const auto ptr = GetFormat(fmt); return ptr->Successors(etype, src); } IdArray UnitGraph::EdgeId(dgl_type_t etype, dgl_id_t src, dgl_id_t dst) const { const SparseFormat fmt = SelectFormat(CSR_CODE); const auto ptr = GetFormat(fmt); if (fmt == SparseFormat::kCSC) return ptr->EdgeId(etype, dst, src); else return ptr->EdgeId(etype, src, dst); } EdgeArray UnitGraph::EdgeIdsAll( dgl_type_t etype, IdArray src, IdArray dst) const { const SparseFormat fmt = SelectFormat(CSR_CODE); const auto ptr = GetFormat(fmt); if (fmt == SparseFormat::kCSC) { EdgeArray edges = ptr->EdgeIdsAll(etype, dst, src); return EdgeArray{edges.dst, edges.src, edges.id}; } else { return ptr->EdgeIdsAll(etype, src, dst); } } IdArray UnitGraph::EdgeIdsOne( dgl_type_t etype, IdArray src, IdArray dst) const { const SparseFormat fmt = SelectFormat(CSR_CODE); const auto ptr = GetFormat(fmt); if (fmt == SparseFormat::kCSC) { return ptr->EdgeIdsOne(etype, dst, src); } else { return ptr->EdgeIdsOne(etype, src, dst); } } std::pair UnitGraph::FindEdge( dgl_type_t etype, dgl_id_t eid) const { const SparseFormat fmt = SelectFormat(COO_CODE); const auto ptr = GetFormat(fmt); return ptr->FindEdge(etype, eid); } EdgeArray UnitGraph::FindEdges(dgl_type_t etype, IdArray eids) const { const SparseFormat fmt = SelectFormat(COO_CODE); const auto ptr = GetFormat(fmt); return ptr->FindEdges(etype, eids); } EdgeArray UnitGraph::InEdges(dgl_type_t etype, dgl_id_t vid) const { const SparseFormat fmt = SelectFormat(CSC_CODE); const auto ptr = GetFormat(fmt); if (fmt == SparseFormat::kCSC) { const EdgeArray& ret = ptr->OutEdges(etype, vid); return {ret.dst, ret.src, ret.id}; } else { return ptr->InEdges(etype, vid); } } EdgeArray UnitGraph::InEdges(dgl_type_t etype, IdArray vids) const { const SparseFormat fmt = SelectFormat(CSC_CODE); const auto ptr = GetFormat(fmt); if (fmt == SparseFormat::kCSC) { const EdgeArray& ret = ptr->OutEdges(etype, vids); return {ret.dst, ret.src, ret.id}; } else { return ptr->InEdges(etype, vids); } } EdgeArray UnitGraph::OutEdges(dgl_type_t etype, dgl_id_t vid) const { const SparseFormat fmt = SelectFormat(CSR_CODE); const auto ptr = GetFormat(fmt); return ptr->OutEdges(etype, vid); } EdgeArray UnitGraph::OutEdges(dgl_type_t etype, IdArray vids) const { const SparseFormat fmt = SelectFormat(CSR_CODE); const auto ptr = GetFormat(fmt); return ptr->OutEdges(etype, vids); } EdgeArray UnitGraph::Edges(dgl_type_t etype, const std::string& order) const { SparseFormat fmt; if (order == std::string("eid")) { fmt = SelectFormat(COO_CODE); } else if (order.empty()) { // arbitrary order fmt = SelectFormat(ALL_CODE); } else if (order == std::string("srcdst")) { fmt = SelectFormat(CSR_CODE); } else { LOG(FATAL) << "Unsupported order request: " << order; return {}; } const auto& edges = GetFormat(fmt)->Edges(etype, order); if (fmt == SparseFormat::kCSC) return EdgeArray{edges.dst, edges.src, edges.id}; else return edges; } uint64_t UnitGraph::InDegree(dgl_type_t etype, dgl_id_t vid) const { SparseFormat fmt = SelectFormat(CSC_CODE); const auto ptr = GetFormat(fmt); CHECK(fmt == SparseFormat::kCSC || fmt == SparseFormat::kCOO) << "In degree cannot be computed as neither CSC nor COO format is " "allowed for this graph. Please enable one of them at least."; return fmt == SparseFormat::kCSC ? ptr->OutDegree(etype, vid) : ptr->InDegree(etype, vid); } DegreeArray UnitGraph::InDegrees(dgl_type_t etype, IdArray vids) const { SparseFormat fmt = SelectFormat(CSC_CODE); const auto ptr = GetFormat(fmt); CHECK(fmt == SparseFormat::kCSC || fmt == SparseFormat::kCOO) << "In degree cannot be computed as neither CSC nor COO format is " "allowed for this graph. Please enable one of them at least."; return fmt == SparseFormat::kCSC ? ptr->OutDegrees(etype, vids) : ptr->InDegrees(etype, vids); } uint64_t UnitGraph::OutDegree(dgl_type_t etype, dgl_id_t vid) const { SparseFormat fmt = SelectFormat(CSR_CODE); const auto ptr = GetFormat(fmt); CHECK(fmt == SparseFormat::kCSR || fmt == SparseFormat::kCOO) << "Out degree cannot be computed as neither CSR nor COO format is " "allowed for this graph. Please enable one of them at least."; return ptr->OutDegree(etype, vid); } DegreeArray UnitGraph::OutDegrees(dgl_type_t etype, IdArray vids) const { SparseFormat fmt = SelectFormat(CSR_CODE); const auto ptr = GetFormat(fmt); CHECK(fmt == SparseFormat::kCSR || fmt == SparseFormat::kCOO) << "Out degree cannot be computed as neither CSR nor COO format is " "allowed for this graph. Please enable one of them at least."; return ptr->OutDegrees(etype, vids); } DGLIdIters UnitGraph::SuccVec(dgl_type_t etype, dgl_id_t vid) const { SparseFormat fmt = SelectFormat(CSR_CODE); const auto ptr = GetFormat(fmt); return ptr->SuccVec(etype, vid); } DGLIdIters32 UnitGraph::SuccVec32(dgl_type_t etype, dgl_id_t vid) const { SparseFormat fmt = SelectFormat(CSR_CODE); const auto ptr = std::dynamic_pointer_cast(GetFormat(fmt)); CHECK_NOTNULL(ptr); return ptr->SuccVec32(etype, vid); } DGLIdIters UnitGraph::OutEdgeVec(dgl_type_t etype, dgl_id_t vid) const { SparseFormat fmt = SelectFormat(CSR_CODE); const auto ptr = GetFormat(fmt); return ptr->OutEdgeVec(etype, vid); } DGLIdIters UnitGraph::PredVec(dgl_type_t etype, dgl_id_t vid) const { SparseFormat fmt = SelectFormat(CSC_CODE); const auto ptr = GetFormat(fmt); if (fmt == SparseFormat::kCSC) return ptr->SuccVec(etype, vid); else return ptr->PredVec(etype, vid); } DGLIdIters UnitGraph::InEdgeVec(dgl_type_t etype, dgl_id_t vid) const { SparseFormat fmt = SelectFormat(CSC_CODE); const auto ptr = GetFormat(fmt); if (fmt == SparseFormat::kCSC) return ptr->OutEdgeVec(etype, vid); else return ptr->InEdgeVec(etype, vid); } std::vector UnitGraph::GetAdj( dgl_type_t etype, bool transpose, const std::string& fmt) const { // TODO(minjie): Our current semantics of adjacency matrix is row for dst // nodes and col for src nodes. Therefore, we need to flip the transpose flag. // For example, // transpose=False is equal to in edge CSR. We have this behavior because // previously we use framework's SPMM and we don't cache reverse adj. This // is not intuitive and also not consistent with networkx's // to_scipy_sparse_matrix. With the upcoming custom kernel change, we should // change the behavior and make row for src and col for dst. if (fmt == std::string("csr")) { return !transpose ? GetOutCSR()->GetAdj(etype, false, "csr") : GetInCSR()->GetAdj(etype, false, "csr"); } else if (fmt == std::string("coo")) { return GetCOO()->GetAdj(etype, transpose, fmt); } else { LOG(FATAL) << "unsupported adjacency matrix format: " << fmt; return {}; } } HeteroSubgraph UnitGraph::VertexSubgraph( const std::vector& vids) const { // We prefer to generate a subgraph from out-csr. SparseFormat fmt = SelectFormat(CSR_CODE); HeteroSubgraph sg = GetFormat(fmt)->VertexSubgraph(vids); HeteroSubgraph ret; CSRPtr subcsr = nullptr; CSRPtr subcsc = nullptr; COOPtr subcoo = nullptr; switch (fmt) { case SparseFormat::kCSR: subcsr = std::dynamic_pointer_cast(sg.graph); break; case SparseFormat::kCSC: subcsc = std::dynamic_pointer_cast(sg.graph); break; case SparseFormat::kCOO: subcoo = std::dynamic_pointer_cast(sg.graph); break; default: LOG(FATAL) << "[BUG] unsupported format " << static_cast(fmt); return ret; } ret.graph = HeteroGraphPtr(new UnitGraph(meta_graph(), subcsc, subcsr, subcoo)); ret.induced_vertices = std::move(sg.induced_vertices); ret.induced_edges = std::move(sg.induced_edges); return ret; } HeteroSubgraph UnitGraph::EdgeSubgraph( const std::vector& eids, bool preserve_nodes) const { SparseFormat fmt = SelectFormat(COO_CODE); auto sg = GetFormat(fmt)->EdgeSubgraph(eids, preserve_nodes); HeteroSubgraph ret; CSRPtr subcsr = nullptr; CSRPtr subcsc = nullptr; COOPtr subcoo = nullptr; switch (fmt) { case SparseFormat::kCSR: subcsr = std::dynamic_pointer_cast(sg.graph); break; case SparseFormat::kCSC: subcsc = std::dynamic_pointer_cast(sg.graph); break; case SparseFormat::kCOO: subcoo = std::dynamic_pointer_cast(sg.graph); break; default: LOG(FATAL) << "[BUG] unsupported format " << static_cast(fmt); return ret; } ret.graph = HeteroGraphPtr(new UnitGraph(meta_graph(), subcsc, subcsr, subcoo)); ret.induced_vertices = std::move(sg.induced_vertices); ret.induced_edges = std::move(sg.induced_edges); return ret; } HeteroGraphPtr UnitGraph::CreateFromCOO( int64_t num_vtypes, int64_t num_src, int64_t num_dst, IdArray row, IdArray col, bool row_sorted, bool col_sorted, dgl_format_code_t formats) { CHECK(num_vtypes == 1 || num_vtypes == 2); if (num_vtypes == 1) CHECK_EQ(num_src, num_dst); auto mg = CreateUnitGraphMetaGraph(num_vtypes); COOPtr coo(new COO(mg, num_src, num_dst, row, col, row_sorted, col_sorted)); return HeteroGraphPtr(new UnitGraph(mg, nullptr, nullptr, coo, formats)); } HeteroGraphPtr UnitGraph::CreateFromCOO( int64_t num_vtypes, const aten::COOMatrix& mat, dgl_format_code_t formats) { CHECK(num_vtypes == 1 || num_vtypes == 2); if (num_vtypes == 1) CHECK_EQ(mat.num_rows, mat.num_cols); auto mg = CreateUnitGraphMetaGraph(num_vtypes); COOPtr coo(new COO(mg, mat)); return HeteroGraphPtr(new UnitGraph(mg, nullptr, nullptr, coo, formats)); } HeteroGraphPtr UnitGraph::CreateFromCSR( int64_t num_vtypes, int64_t num_src, int64_t num_dst, IdArray indptr, IdArray indices, IdArray edge_ids, dgl_format_code_t formats) { CHECK(num_vtypes == 1 || num_vtypes == 2); if (num_vtypes == 1) CHECK_EQ(num_src, num_dst); auto mg = CreateUnitGraphMetaGraph(num_vtypes); CSRPtr csr(new CSR(mg, num_src, num_dst, indptr, indices, edge_ids)); return HeteroGraphPtr(new UnitGraph(mg, nullptr, csr, nullptr, formats)); } HeteroGraphPtr UnitGraph::CreateFromCSR( int64_t num_vtypes, const aten::CSRMatrix& mat, dgl_format_code_t formats) { CHECK(num_vtypes == 1 || num_vtypes == 2); if (num_vtypes == 1) CHECK_EQ(mat.num_rows, mat.num_cols); auto mg = CreateUnitGraphMetaGraph(num_vtypes); CSRPtr csr(new CSR(mg, mat)); return HeteroGraphPtr(new UnitGraph(mg, nullptr, csr, nullptr, formats)); } HeteroGraphPtr UnitGraph::CreateFromCSRAndCOO( int64_t num_vtypes, const aten::CSRMatrix& csr, const aten::COOMatrix& coo, dgl_format_code_t formats) { CHECK(num_vtypes == 1 || num_vtypes == 2); CHECK_EQ(coo.num_rows, csr.num_rows); CHECK_EQ(coo.num_cols, csr.num_cols); if (num_vtypes == 1) { CHECK_EQ(csr.num_rows, csr.num_cols); } auto mg = CreateUnitGraphMetaGraph(num_vtypes); CSRPtr csrPtr(new CSR(mg, csr)); COOPtr cooPtr(new COO(mg, coo)); return HeteroGraphPtr(new UnitGraph(mg, nullptr, csrPtr, cooPtr, formats)); } HeteroGraphPtr UnitGraph::CreateFromCSC( int64_t num_vtypes, int64_t num_src, int64_t num_dst, IdArray indptr, IdArray indices, IdArray edge_ids, dgl_format_code_t formats) { CHECK(num_vtypes == 1 || num_vtypes == 2); if (num_vtypes == 1) CHECK_EQ(num_src, num_dst); auto mg = CreateUnitGraphMetaGraph(num_vtypes); CSRPtr csc(new CSR(mg, num_dst, num_src, indptr, indices, edge_ids)); return HeteroGraphPtr(new UnitGraph(mg, csc, nullptr, nullptr, formats)); } HeteroGraphPtr UnitGraph::CreateFromCSC( int64_t num_vtypes, const aten::CSRMatrix& mat, dgl_format_code_t formats) { CHECK(num_vtypes == 1 || num_vtypes == 2); if (num_vtypes == 1) CHECK_EQ(mat.num_rows, mat.num_cols); auto mg = CreateUnitGraphMetaGraph(num_vtypes); CSRPtr csc(new CSR(mg, mat)); return HeteroGraphPtr(new UnitGraph(mg, csc, nullptr, nullptr, formats)); } HeteroGraphPtr UnitGraph::CreateFromCSCAndCOO( int64_t num_vtypes, const aten::CSRMatrix& csc, const aten::COOMatrix& coo, dgl_format_code_t formats) { CHECK(num_vtypes == 1 || num_vtypes == 2); CHECK_EQ(coo.num_rows, csc.num_cols); CHECK_EQ(coo.num_cols, csc.num_rows); if (num_vtypes == 1) { CHECK_EQ(csc.num_rows, csc.num_cols); } auto mg = CreateUnitGraphMetaGraph(num_vtypes); CSRPtr cscPtr(new CSR(mg, csc)); COOPtr cooPtr(new COO(mg, coo)); return HeteroGraphPtr(new UnitGraph(mg, cscPtr, nullptr, cooPtr, formats)); } HeteroGraphPtr UnitGraph::AsNumBits(HeteroGraphPtr g, uint8_t bits) { if (g->NumBits() == bits) { return g; } else { auto bg = std::dynamic_pointer_cast(g); CHECK_NOTNULL(bg); CSRPtr new_incsr = (bg->in_csr_->defined()) ? CSRPtr(new CSR(bg->in_csr_->AsNumBits(bits))) : nullptr; CSRPtr new_outcsr = (bg->out_csr_->defined()) ? CSRPtr(new CSR(bg->out_csr_->AsNumBits(bits))) : nullptr; COOPtr new_coo = (bg->coo_->defined()) ? COOPtr(new COO(bg->coo_->AsNumBits(bits))) : nullptr; return HeteroGraphPtr(new UnitGraph( g->meta_graph(), new_incsr, new_outcsr, new_coo, bg->formats_)); } } HeteroGraphPtr UnitGraph::CopyTo(HeteroGraphPtr g, const DGLContext& ctx) { if (ctx == g->Context()) { return g; } else { auto bg = std::dynamic_pointer_cast(g); CHECK_NOTNULL(bg); CSRPtr new_incsr = (bg->in_csr_->defined()) ? CSRPtr(new CSR(bg->in_csr_->CopyTo(ctx))) : nullptr; CSRPtr new_outcsr = (bg->out_csr_->defined()) ? CSRPtr(new CSR(bg->out_csr_->CopyTo(ctx))) : nullptr; COOPtr new_coo = (bg->coo_->defined()) ? COOPtr(new COO(bg->coo_->CopyTo(ctx))) : nullptr; return HeteroGraphPtr(new UnitGraph( g->meta_graph(), new_incsr, new_outcsr, new_coo, bg->formats_)); } } HeteroGraphPtr UnitGraph::PinMemory() { CSRPtr pinned_in_csr, pinned_out_csr; COOPtr pinned_coo; if (this->in_csr_->defined() && this->in_csr_->IsPinned()) { pinned_in_csr = this->in_csr_; } else if (this->in_csr_->defined()) { pinned_in_csr = CSRPtr(new CSR(this->in_csr_->PinMemory())); } else { pinned_in_csr = nullptr; } if (this->out_csr_->defined() && this->out_csr_->IsPinned()) { pinned_out_csr = this->out_csr_; } else if (this->out_csr_->defined()) { pinned_out_csr = CSRPtr(new CSR(this->out_csr_->PinMemory())); } else { pinned_out_csr = nullptr; } if (this->coo_->defined() && this->coo_->IsPinned()) { pinned_coo = this->coo_; } else if (this->coo_->defined()) { pinned_coo = COOPtr(new COO(this->coo_->PinMemory())); } else { pinned_coo = nullptr; } return HeteroGraphPtr(new UnitGraph( meta_graph(), pinned_in_csr, pinned_out_csr, pinned_coo, this->formats_)); } void UnitGraph::PinMemory_() { if (this->in_csr_->defined()) this->in_csr_->PinMemory_(); if (this->out_csr_->defined()) this->out_csr_->PinMemory_(); if (this->coo_->defined()) this->coo_->PinMemory_(); } void UnitGraph::UnpinMemory_() { if (this->in_csr_->defined()) this->in_csr_->UnpinMemory_(); if (this->out_csr_->defined()) this->out_csr_->UnpinMemory_(); if (this->coo_->defined()) this->coo_->UnpinMemory_(); } void UnitGraph::RecordStream(DGLStreamHandle stream) { if (this->in_csr_->defined()) this->in_csr_->RecordStream(stream); if (this->out_csr_->defined()) this->out_csr_->RecordStream(stream); if (this->coo_->defined()) this->coo_->RecordStream(stream); this->recorded_streams.push_back(stream); } void UnitGraph::InvalidateCSR() { this->out_csr_ = CSRPtr(new CSR()); } void UnitGraph::InvalidateCSC() { this->in_csr_ = CSRPtr(new CSR()); } void UnitGraph::InvalidateCOO() { this->coo_ = COOPtr(new COO()); } UnitGraph::UnitGraph( GraphPtr metagraph, CSRPtr in_csr, CSRPtr out_csr, COOPtr coo, dgl_format_code_t formats) : BaseHeteroGraph(metagraph), in_csr_(in_csr), out_csr_(out_csr), coo_(coo) { if (!in_csr_) { in_csr_ = CSRPtr(new CSR()); } if (!out_csr_) { out_csr_ = CSRPtr(new CSR()); } if (!coo_) { coo_ = COOPtr(new COO()); } formats_ = formats; dgl_format_code_t created = GetCreatedFormats(); if ((formats | created) != formats) LOG(FATAL) << "Graph created from formats: " << CodeToStr(created) << ", which is not compatible with available formats: " << CodeToStr(formats); CHECK(GetAny()) << "At least one graph structure should exist."; } HeteroGraphPtr UnitGraph::CreateUnitGraphFrom( int num_vtypes, const aten::CSRMatrix& in_csr, const aten::CSRMatrix& out_csr, const aten::COOMatrix& coo, bool has_in_csr, bool has_out_csr, bool has_coo, dgl_format_code_t formats) { auto mg = CreateUnitGraphMetaGraph(num_vtypes); CSRPtr in_csr_ptr = nullptr; CSRPtr out_csr_ptr = nullptr; COOPtr coo_ptr = nullptr; if (has_in_csr) in_csr_ptr = CSRPtr(new CSR(mg, in_csr)); else in_csr_ptr = CSRPtr(new CSR()); if (has_out_csr) out_csr_ptr = CSRPtr(new CSR(mg, out_csr)); else out_csr_ptr = CSRPtr(new CSR()); if (has_coo) coo_ptr = COOPtr(new COO(mg, coo)); else coo_ptr = COOPtr(new COO()); return HeteroGraphPtr( new UnitGraph(mg, in_csr_ptr, out_csr_ptr, coo_ptr, formats)); } UnitGraph::CSRPtr UnitGraph::GetInCSR(bool inplace) const { if (inplace) if (!(formats_ & CSC_CODE)) LOG(FATAL) << "The graph have restricted sparse format " << CodeToStr(formats_) << ", cannot create CSC matrix."; CSRPtr ret = in_csr_; // Prefers converting from COO since it is parallelized. // TODO(BarclayII): need benchmarking. if (!in_csr_->defined()) { if (coo_->defined()) { const auto& newadj = aten::COOToCSR(aten::COOTranspose(coo_->adj())); if (inplace) *(const_cast(this)->in_csr_) = CSR(meta_graph(), newadj); else ret = std::make_shared(meta_graph(), newadj); } else { CHECK(out_csr_->defined()) << "None of CSR, COO exist"; const auto& newadj = aten::CSRTranspose(out_csr_->adj()); if (inplace) *(const_cast(this)->in_csr_) = CSR(meta_graph(), newadj); else ret = std::make_shared(meta_graph(), newadj); } if (inplace) { if (IsPinned()) in_csr_->PinMemory_(); for (auto stream : recorded_streams) in_csr_->RecordStream(stream); } } return ret; } /** @brief Return out csr. If not exist, transpose the other one.*/ UnitGraph::CSRPtr UnitGraph::GetOutCSR(bool inplace) const { if (inplace) if (!(formats_ & CSR_CODE)) LOG(FATAL) << "The graph have restricted sparse format " << CodeToStr(formats_) << ", cannot create CSR matrix."; CSRPtr ret = out_csr_; // Prefers converting from COO since it is parallelized. // TODO(BarclayII): need benchmarking. if (!out_csr_->defined()) { if (coo_->defined()) { const auto& newadj = aten::COOToCSR(coo_->adj()); if (inplace) *(const_cast(this)->out_csr_) = CSR(meta_graph(), newadj); else ret = std::make_shared(meta_graph(), newadj); } else { CHECK(in_csr_->defined()) << "None of CSR, COO exist"; const auto& newadj = aten::CSRTranspose(in_csr_->adj()); if (inplace) *(const_cast(this)->out_csr_) = CSR(meta_graph(), newadj); else ret = std::make_shared(meta_graph(), newadj); } if (inplace) { if (IsPinned()) out_csr_->PinMemory_(); for (auto stream : recorded_streams) out_csr_->RecordStream(stream); } } return ret; } /** @brief Return coo. If not exist, create from csr.*/ UnitGraph::COOPtr UnitGraph::GetCOO(bool inplace) const { if (inplace) if (!(formats_ & COO_CODE)) LOG(FATAL) << "The graph have restricted sparse format " << CodeToStr(formats_) << ", cannot create COO matrix."; COOPtr ret = coo_; if (!coo_->defined()) { if (in_csr_->defined()) { const auto& newadj = aten::COOTranspose(aten::CSRToCOO(in_csr_->adj(), true)); if (inplace) *(const_cast(this)->coo_) = COO(meta_graph(), newadj); else ret = std::make_shared(meta_graph(), newadj); } else { CHECK(out_csr_->defined()) << "Both CSR are missing."; const auto& newadj = aten::CSRToCOO(out_csr_->adj(), true); if (inplace) *(const_cast(this)->coo_) = COO(meta_graph(), newadj); else ret = std::make_shared(meta_graph(), newadj); } if (inplace) { if (IsPinned()) coo_->PinMemory_(); for (auto stream : recorded_streams) coo_->RecordStream(stream); } } return ret; } aten::CSRMatrix UnitGraph::GetCSCMatrix(dgl_type_t etype) const { return GetInCSR()->adj(); } aten::CSRMatrix UnitGraph::GetCSRMatrix(dgl_type_t etype) const { return GetOutCSR()->adj(); } aten::COOMatrix UnitGraph::GetCOOMatrix(dgl_type_t etype) const { return GetCOO()->adj(); } HeteroGraphPtr UnitGraph::GetAny() const { if (in_csr_->defined()) { return in_csr_; } else if (out_csr_->defined()) { return out_csr_; } else { return coo_; } } dgl_format_code_t UnitGraph::GetCreatedFormats() const { dgl_format_code_t ret = 0; if (in_csr_->defined()) ret |= CSC_CODE; if (out_csr_->defined()) ret |= CSR_CODE; if (coo_->defined()) ret |= COO_CODE; return ret; } dgl_format_code_t UnitGraph::GetAllowedFormats() const { return formats_; } HeteroGraphPtr UnitGraph::GetFormat(SparseFormat format) const { switch (format) { case SparseFormat::kCSR: return GetOutCSR(); case SparseFormat::kCSC: return GetInCSR(); default: return GetCOO(); } } HeteroGraphPtr UnitGraph::GetGraphInFormat(dgl_format_code_t formats) const { // Get the created formats. auto created_formats = GetCreatedFormats(); // Get the intersection of formats and created_formats. auto intersection = formats & created_formats; // If the intersection of formats and created_formats is not empty. // The format(s) in the intersection will be retained. if (intersection != 0) { COOPtr coo_ptr = COO_CODE & intersection ? GetCOO(false) : nullptr; CSRPtr in_csr_ptr = CSC_CODE & intersection ? GetInCSR(false) : nullptr; CSRPtr out_csr_ptr = CSR_CODE & intersection ? GetOutCSR(false) : nullptr; return HeteroGraphPtr( new UnitGraph(meta_graph_, in_csr_ptr, out_csr_ptr, coo_ptr, formats)); } // If the intersection of formats and created_formats is empty. // Create a format in the order of COO -> CSR -> CSC. int64_t num_vtypes = NumVertexTypes(); if (COO_CODE & formats) return CreateFromCOO(num_vtypes, GetCOO(false)->adj(), formats); if (CSR_CODE & formats) return CreateFromCSR(num_vtypes, GetOutCSR(false)->adj(), formats); return CreateFromCSC(num_vtypes, GetInCSR(false)->adj(), formats); } SparseFormat UnitGraph::SelectFormat( dgl_format_code_t preferred_formats) const { dgl_format_code_t common = preferred_formats & formats_; dgl_format_code_t created = GetCreatedFormats(); if (common & created) return DecodeFormat(common & created); // NOTE(zihao): hypersparse is currently disabled since many CUDA operators on // COO have not been implmented yet. if (coo_->defined() && // coo_->IsHypersparse()) // only allow coo for hypersparse graph. // return SparseFormat::kCOO; if (common) return DecodeFormat(common); return DecodeFormat(created); } GraphPtr UnitGraph::AsImmutableGraph() const { CHECK(NumVertexTypes() == 1) << "not a homogeneous graph"; dgl::CSRPtr in_csr_ptr = nullptr, out_csr_ptr = nullptr; dgl::COOPtr coo_ptr = nullptr; if (in_csr_->defined()) { aten::CSRMatrix csc = GetCSCMatrix(0); in_csr_ptr = dgl::CSRPtr(new dgl::CSR(csc.indptr, csc.indices, csc.data)); } if (out_csr_->defined()) { aten::CSRMatrix csr = GetCSRMatrix(0); out_csr_ptr = dgl::CSRPtr(new dgl::CSR(csr.indptr, csr.indices, csr.data)); } if (coo_->defined()) { aten::COOMatrix coo = GetCOOMatrix(0); if (!COOHasData(coo)) { coo_ptr = dgl::COOPtr(new dgl::COO(NumVertices(0), coo.row, coo.col)); } else { IdArray new_src = Scatter(coo.row, coo.data); IdArray new_dst = Scatter(coo.col, coo.data); coo_ptr = dgl::COOPtr(new dgl::COO(NumVertices(0), new_src, new_dst)); } } return GraphPtr(new dgl::ImmutableGraph(in_csr_ptr, out_csr_ptr, coo_ptr)); } HeteroGraphPtr UnitGraph::LineGraph(bool backtracking) const { // TODO(xiangsx) currently we only support homogeneous graph auto fmt = SelectFormat(ALL_CODE); switch (fmt) { case SparseFormat::kCOO: { return CreateFromCOO(1, aten::COOLineGraph(coo_->adj(), backtracking)); } case SparseFormat::kCSR: { const aten::CSRMatrix csr = GetCSRMatrix(0); const aten::COOMatrix coo = aten::COOLineGraph(aten::CSRToCOO(csr, true), backtracking); return CreateFromCOO(1, coo); } case SparseFormat::kCSC: { const aten::CSRMatrix csc = GetCSCMatrix(0); const aten::CSRMatrix csr = aten::CSRTranspose(csc); const aten::COOMatrix coo = aten::COOLineGraph(aten::CSRToCOO(csr, true), backtracking); return CreateFromCOO(1, coo); } default: LOG(FATAL) << "None of CSC, CSR, COO exist"; break; } return nullptr; } constexpr uint64_t kDGLSerialize_UnitGraphMagic = 0xDD2E60F0F6B4A127; bool UnitGraph::Load(dmlc::Stream* fs) { uint64_t magicNum; CHECK(fs->Read(&magicNum)) << "Invalid Magic Number"; CHECK_EQ(magicNum, kDGLSerialize_UnitGraphMagic) << "Invalid UnitGraph Data"; int64_t save_format_code, formats_code; CHECK(fs->Read(&save_format_code)) << "Invalid format"; CHECK(fs->Read(&formats_code)) << "Invalid format"; dgl_format_code_t save_formats = ANY_CODE; if (save_format_code >> 32) { save_formats = static_cast(0xffffffff & save_format_code); } else { save_formats = SparseFormatsToCode({static_cast(save_format_code)}); } if (formats_code >> 32) { formats_ = static_cast(0xffffffff & formats_code); } else { // NOTE(zihao): to be compatible with old formats. switch (formats_code & 0xffffffff) { case 0: formats_ = ALL_CODE; break; case 1: formats_ = COO_CODE; break; case 2: formats_ = CSR_CODE; break; case 3: formats_ = CSC_CODE; break; default: LOG(FATAL) << "Load graph failed, formats code " << formats_code << "not recognized."; } } if (save_formats & COO_CODE) { fs->Read(&coo_); } if (save_formats & CSR_CODE) { fs->Read(&out_csr_); } if (save_formats & CSC_CODE) { fs->Read(&in_csr_); } if (!coo_ && !out_csr_ && !in_csr_) { LOG(FATAL) << "unsupported format code"; } if (!in_csr_) { in_csr_ = CSRPtr(new CSR()); } if (!out_csr_) { out_csr_ = CSRPtr(new CSR()); } if (!coo_) { coo_ = COOPtr(new COO()); } meta_graph_ = GetAny()->meta_graph(); return true; } void UnitGraph::Save(dmlc::Stream* fs) const { fs->Write(kDGLSerialize_UnitGraphMagic); // Didn't write UnitGraph::meta_graph_, since it's included in the underlying // sparse matrix auto save_formats = SparseFormatsToCode({SelectFormat(ALL_CODE)}); auto fstream = dynamic_cast(fs); if (fstream) { auto formats = fstream->FormatsToSave(); save_formats = formats == ANY_CODE ? SparseFormatsToCode({SelectFormat(ALL_CODE)}) : formats; } fs->Write(static_cast(save_formats | 0x100000000)); fs->Write(static_cast(formats_ | 0x100000000)); if (save_formats & COO_CODE) { fs->Write(GetCOO()); } if (save_formats & CSR_CODE) { fs->Write(GetOutCSR()); } if (save_formats & CSC_CODE) { fs->Write(GetInCSR()); } } UnitGraphPtr UnitGraph::Reverse() const { CSRPtr new_incsr = out_csr_, new_outcsr = in_csr_; COOPtr new_coo = nullptr; if (coo_->defined()) { new_coo = COOPtr(new COO(coo_->meta_graph(), aten::COOTranspose(coo_->adj()))); } return UnitGraphPtr( new UnitGraph(meta_graph(), new_incsr, new_outcsr, new_coo)); } std::tuple UnitGraph::ToSimple() const { CSRPtr new_incsr = nullptr, new_outcsr = nullptr; COOPtr new_coo = nullptr; IdArray count; IdArray edge_map; auto avail_fmt = SelectFormat(ALL_CODE); switch (avail_fmt) { case SparseFormat::kCOO: { auto ret = aten::COOToSimple(GetCOO()->adj()); count = std::get<1>(ret); edge_map = std::get<2>(ret); new_coo = COOPtr(new COO(meta_graph(), std::get<0>(ret))); break; } case SparseFormat::kCSR: { auto ret = aten::CSRToSimple(GetOutCSR()->adj()); count = std::get<1>(ret); edge_map = std::get<2>(ret); new_outcsr = CSRPtr(new CSR(meta_graph(), std::get<0>(ret))); break; } case SparseFormat::kCSC: { auto ret = aten::CSRToSimple(GetInCSR()->adj()); count = std::get<1>(ret); edge_map = std::get<2>(ret); new_incsr = CSRPtr(new CSR(meta_graph(), std::get<0>(ret))); break; } default: LOG(FATAL) << "At lease one of COO, CSR or CSC adj should exist."; break; } return std::make_tuple( UnitGraphPtr(new UnitGraph(meta_graph(), new_incsr, new_outcsr, new_coo)), count, edge_map); } } // namespace dgl ================================================ FILE: src/graph/unit_graph.h ================================================ /** * Copyright (c) 2019 by Contributors * @file graph/unit_graph.h * @brief UnitGraph graph */ #ifndef DGL_GRAPH_UNIT_GRAPH_H_ #define DGL_GRAPH_UNIT_GRAPH_H_ #include #include #include #include #include #include #include #include #include #include #include "../c_api_common.h" namespace dgl { class HeteroGraph; class UnitGraph; typedef std::shared_ptr UnitGraphPtr; /** * @brief UnitGraph graph * * UnitGraph graph is a special type of heterograph which * (1) Have two types of nodes: "Src" and "Dst". All the edges are * from "Src" type nodes to "Dst" type nodes, so there is no edge among * nodes of the same type. Thus, its metagraph has two nodes and one edge * between them. * (2) Have only one type of nodes and edges. Thus, its metagraph has one node * and one self-loop edge. */ class UnitGraph : public BaseHeteroGraph { public: // internal data structure class COO; class CSR; typedef std::shared_ptr COOPtr; typedef std::shared_ptr CSRPtr; inline dgl_type_t SrcType() const { return 0; } inline dgl_type_t DstType() const { return NumVertexTypes() == 1 ? 0 : 1; } inline dgl_type_t EdgeType() const { return 0; } HeteroGraphPtr GetRelationGraph(dgl_type_t etype) const override { LOG(FATAL) << "The method shouldn't be called for UnitGraph graph. " << "The relation graph is simply this graph itself."; return {}; } void AddVertices(dgl_type_t vtype, uint64_t num_vertices) override { LOG(FATAL) << "UnitGraph graph is not mutable."; } void AddEdge(dgl_type_t etype, dgl_id_t src, dgl_id_t dst) override { LOG(FATAL) << "UnitGraph graph is not mutable."; } void AddEdges(dgl_type_t etype, IdArray src_ids, IdArray dst_ids) override { LOG(FATAL) << "UnitGraph graph is not mutable."; } void Clear() override { LOG(FATAL) << "UnitGraph graph is not mutable."; } DGLDataType DataType() const override; DGLContext Context() const override; bool IsPinned() const override; uint8_t NumBits() const override; bool IsMultigraph() const override; bool IsReadonly() const override { return true; } uint64_t NumVertices(dgl_type_t vtype) const override; inline std::vector NumVerticesPerType() const override { std::vector num_nodes_per_type; for (dgl_type_t vtype = 0; vtype < NumVertexTypes(); ++vtype) num_nodes_per_type.push_back(NumVertices(vtype)); return num_nodes_per_type; } uint64_t NumEdges(dgl_type_t etype) const override; bool HasVertex(dgl_type_t vtype, dgl_id_t vid) const override; BoolArray HasVertices(dgl_type_t vtype, IdArray vids) const override; bool HasEdgeBetween( dgl_type_t etype, dgl_id_t src, dgl_id_t dst) const override; BoolArray HasEdgesBetween( dgl_type_t etype, IdArray src_ids, IdArray dst_ids) const override; IdArray Predecessors(dgl_type_t etype, dgl_id_t dst) const override; IdArray Successors(dgl_type_t etype, dgl_id_t src) const override; IdArray EdgeId(dgl_type_t etype, dgl_id_t src, dgl_id_t dst) const override; EdgeArray EdgeIdsAll( dgl_type_t etype, IdArray src, IdArray dst) const override; IdArray EdgeIdsOne(dgl_type_t etype, IdArray src, IdArray dst) const override; std::pair FindEdge( dgl_type_t etype, dgl_id_t eid) const override; EdgeArray FindEdges(dgl_type_t etype, IdArray eids) const override; EdgeArray InEdges(dgl_type_t etype, dgl_id_t vid) const override; EdgeArray InEdges(dgl_type_t etype, IdArray vids) const override; EdgeArray OutEdges(dgl_type_t etype, dgl_id_t vid) const override; EdgeArray OutEdges(dgl_type_t etype, IdArray vids) const override; EdgeArray Edges( dgl_type_t etype, const std::string& order = "") const override; uint64_t InDegree(dgl_type_t etype, dgl_id_t vid) const override; DegreeArray InDegrees(dgl_type_t etype, IdArray vids) const override; uint64_t OutDegree(dgl_type_t etype, dgl_id_t vid) const override; DegreeArray OutDegrees(dgl_type_t etype, IdArray vids) const override; DGLIdIters SuccVec(dgl_type_t etype, dgl_id_t vid) const override; // 32bit version functions, patch for SuccVec DGLIdIters32 SuccVec32(dgl_type_t etype, dgl_id_t vid) const; DGLIdIters OutEdgeVec(dgl_type_t etype, dgl_id_t vid) const override; DGLIdIters PredVec(dgl_type_t etype, dgl_id_t vid) const override; DGLIdIters InEdgeVec(dgl_type_t etype, dgl_id_t vid) const override; std::vector GetAdj( dgl_type_t etype, bool transpose, const std::string& fmt) const override; HeteroSubgraph VertexSubgraph( const std::vector& vids) const override; HeteroSubgraph EdgeSubgraph( const std::vector& eids, bool preserve_nodes = false) const override; // creators /** @brief Create a graph with no edges */ static HeteroGraphPtr Empty( int64_t num_vtypes, int64_t num_src, int64_t num_dst, DGLDataType dtype, DGLContext ctx) { IdArray row = IdArray::Empty({0}, dtype, ctx); IdArray col = IdArray::Empty({0}, dtype, ctx); return CreateFromCOO(num_vtypes, num_src, num_dst, row, col); } /** @brief Create a graph from COO arrays */ static HeteroGraphPtr CreateFromCOO( int64_t num_vtypes, int64_t num_src, int64_t num_dst, IdArray row, IdArray col, bool row_sorted = false, bool col_sorted = false, dgl_format_code_t formats = ALL_CODE); static HeteroGraphPtr CreateFromCOO( int64_t num_vtypes, const aten::COOMatrix& mat, dgl_format_code_t formats = ALL_CODE); /** @brief Create a graph from (out) CSR arrays */ static HeteroGraphPtr CreateFromCSR( int64_t num_vtypes, int64_t num_src, int64_t num_dst, IdArray indptr, IdArray indices, IdArray edge_ids, dgl_format_code_t formats = ALL_CODE); static HeteroGraphPtr CreateFromCSR( int64_t num_vtypes, const aten::CSRMatrix& mat, dgl_format_code_t formats = ALL_CODE); /** @brief Create a graph from (out) CSR and COO arrays, both representing the * same graph */ static HeteroGraphPtr CreateFromCSRAndCOO( int64_t num_vtypes, const aten::CSRMatrix& csr, const aten::COOMatrix& coo, dgl_format_code_t formats = ALL_CODE); /** @brief Create a graph from (in) CSC arrays */ static HeteroGraphPtr CreateFromCSC( int64_t num_vtypes, int64_t num_src, int64_t num_dst, IdArray indptr, IdArray indices, IdArray edge_ids, dgl_format_code_t formats = ALL_CODE); static HeteroGraphPtr CreateFromCSC( int64_t num_vtypes, const aten::CSRMatrix& mat, dgl_format_code_t formats = ALL_CODE); /** @brief Create a graph from (in) CSC and COO arrays, both representing the * same graph */ static HeteroGraphPtr CreateFromCSCAndCOO( int64_t num_vtypes, const aten::CSRMatrix& csc, const aten::COOMatrix& coo, dgl_format_code_t formats = ALL_CODE); /** @brief Convert the graph to use the given number of bits for storage */ static HeteroGraphPtr AsNumBits(HeteroGraphPtr g, uint8_t bits); /** @brief Copy the data to another context */ static HeteroGraphPtr CopyTo(HeteroGraphPtr g, const DGLContext& ctx); /** * @brief Pin the in_csr_, out_scr_ and coo_ of the current graph. * @note The graph will be pinned inplace. Behavior depends on the current * context, kDGLCPU: will be pinned; IsPinned: directly return; kDGLCUDA: * invalid, will throw an error. The context check is deferred to pinning the * NDArray. */ void PinMemory_() override; /** * @brief Unpin the in_csr_, out_scr_ and coo_ of the current graph. * @note The graph will be unpinned inplace. Behavior depends on the current * context, IsPinned: will be unpinned; others: directly return. The context * check is deferred to unpinning the NDArray. */ void UnpinMemory_(); /** * @brief Create a copy of the current graph in pinned memory. * @note The graph will be pinned outplace through PyTorch * CachingHostAllocator, if available. Otherwise, an error will be thrown. * If any of the underlying structures (incsr, outcsr, coo) are already * pinned, the function will simply use its original copy. */ HeteroGraphPtr PinMemory(); /** * @brief Record stream for this graph. * @param stream The stream that is using the graph */ void RecordStream(DGLStreamHandle stream) override; /** * @brief Create in-edge CSR format of the unit graph. * @param inplace if true and the in-edge CSR format does not exist, the * created format will be cached in this object unless the format is * restricted. * @return Return the in-edge CSR format. Create from other format if not * exist. */ CSRPtr GetInCSR(bool inplace = true) const; /** * @brief Create out-edge CSR format of the unit graph. * @param inplace if true and the out-edge CSR format does not exist, the * created format will be cached in this object unless the format is * restricted. * @return Return the out-edge CSR format. Create from other format if not * exist. */ CSRPtr GetOutCSR(bool inplace = true) const; /** * @brief Create COO format of the unit graph. * @param inplace if true and the COO format does not exist, the created * format will be cached in this object unless the format is * restricted. * @return Return the COO format. Create from other format if not exist. */ COOPtr GetCOO(bool inplace = true) const; /** @return Return the COO matrix form */ aten::COOMatrix GetCOOMatrix(dgl_type_t etype) const override; /** @return Return the in-edge CSC in the matrix form */ aten::CSRMatrix GetCSCMatrix(dgl_type_t etype) const override; /** @return Return the out-edge CSR in the matrix form */ aten::CSRMatrix GetCSRMatrix(dgl_type_t etype) const override; SparseFormat SelectFormat( dgl_type_t etype, dgl_format_code_t preferred_formats) const override { return SelectFormat(preferred_formats); } /** * @brief Return the graph in the given format. Perform format conversion if * the requested format does not exist. * * @return A graph in the requested format. */ HeteroGraphPtr GetFormat(SparseFormat format) const; dgl_format_code_t GetCreatedFormats() const override; dgl_format_code_t GetAllowedFormats() const override; HeteroGraphPtr GetGraphInFormat(dgl_format_code_t formats) const override; /** @return Load UnitGraph from stream, using CSRMatrix*/ bool Load(dmlc::Stream* fs); /** @return Save UnitGraph to stream, using CSRMatrix */ void Save(dmlc::Stream* fs) const; /** @brief Creat a LineGraph of self */ HeteroGraphPtr LineGraph(bool backtracking) const; /** @return the reversed graph */ UnitGraphPtr Reverse() const; /** @return the simpled (no-multi-edge) graph * the count recording the number of duplicated edges from the * original graph. the edge mapping from the edge IDs of original graph to * those of the returned graph. */ std::tuple ToSimple() const; void InvalidateCSR(); void InvalidateCSC(); void InvalidateCOO(); private: friend class Serializer; friend class HeteroGraph; friend class ImmutableGraph; friend HeteroGraphPtr HeteroForkingUnpickle(const HeteroPickleStates& states); // private empty constructor UnitGraph() {} /** * @brief constructor * @param metagraph metagraph * @param in_csr in edge csr * @param out_csr out edge csr * @param coo coo */ UnitGraph( GraphPtr metagraph, CSRPtr in_csr, CSRPtr out_csr, COOPtr coo, dgl_format_code_t formats = ALL_CODE); /** * @brief constructor * @param num_vtypes number of vertex types (1 or 2) * @param metagraph metagraph * @param in_csr in edge csr * @param out_csr out edge csr * @param coo coo * @param has_in_csr whether in_csr is valid * @param has_out_csr whether out_csr is valid * @param has_coo whether coo is valid */ static HeteroGraphPtr CreateUnitGraphFrom( int num_vtypes, const aten::CSRMatrix& in_csr, const aten::CSRMatrix& out_csr, const aten::COOMatrix& coo, bool has_in_csr, bool has_out_csr, bool has_coo, dgl_format_code_t formats = ALL_CODE); /** @return Return any existing format. */ HeteroGraphPtr GetAny() const; /** * @brief Determine which format to use with a preference. * * If the storage of unit graph is "locked", i.e. no conversion is allowed, * then it will return the locked format. * * Otherwise, it will return whatever DGL thinks is the most appropriate given * the arguments. */ SparseFormat SelectFormat(dgl_format_code_t preferred_formats) const; /** @return Whether the graph is hypersparse */ bool IsHypersparse() const; GraphPtr AsImmutableGraph() const override; // Graph stored in different format. We use an on-demand strategy: the format // is only materialized if the operation that suitable for it is invoked. /** @brief CSR graph that stores reverse edges */ CSRPtr in_csr_; /** @brief CSR representation */ CSRPtr out_csr_; /** @brief COO representation */ COOPtr coo_; /** * @brief Storage format restriction. */ dgl_format_code_t formats_; /** @brief which streams have recorded the graph */ std::vector recorded_streams; }; }; // namespace dgl namespace dmlc { DMLC_DECLARE_TRAITS(has_saveload, dgl::UnitGraph, true); DMLC_DECLARE_TRAITS(has_saveload, dgl::UnitGraph::CSR, true); DMLC_DECLARE_TRAITS(has_saveload, dgl::UnitGraph::COO, true); } // namespace dmlc #endif // DGL_GRAPH_UNIT_GRAPH_H_ ================================================ FILE: src/partition/cuda/partition_op.cu ================================================ /** * Copyright (c) 2021 by Contributors * @file ndarray_partition.h * @brief Operations on partition implemented in CUDA. */ #include #include #include "../../runtime/cuda/cuda_common.h" #include "../../runtime/workspace.h" #include "../partition_op.h" using namespace dgl::runtime; namespace dgl { namespace partition { namespace impl { namespace { /** * @brief Kernel to map global element IDs to partition IDs by remainder. * * @tparam IdType The type of ID. * @param global The global element IDs. * @param num_elements The number of element IDs. * @param num_parts The number of partitions. * @param part_id The mapped partition ID (outupt). */ template __global__ void _MapProcByRemainderKernel( const IdType* const global, const int64_t num_elements, const int64_t num_parts, IdType* const part_id) { assert(num_elements <= gridDim.x * blockDim.x); const int64_t idx = blockDim.x * static_cast(blockIdx.x) + threadIdx.x; if (idx < num_elements) { part_id[idx] = global[idx] % num_parts; } } /** * @brief Kernel to map global element IDs to partition IDs, using a bit-mask. * The number of partitions must be a power a two. * * @tparam IdType The type of ID. * @param global The global element IDs. * @param num_elements The number of element IDs. * @param mask The bit-mask with 1's for each bit to keep from the element ID to * extract the partition ID (e.g., an 8 partition mask would be 0x07). * @param part_id The mapped partition ID (outupt). */ template __global__ void _MapProcByMaskRemainderKernel( const IdType* const global, const int64_t num_elements, const IdType mask, IdType* const part_id) { assert(num_elements <= gridDim.x * blockDim.x); const int64_t idx = blockDim.x * static_cast(blockIdx.x) + threadIdx.x; if (idx < num_elements) { part_id[idx] = global[idx] & mask; } } /** * @brief Kernel to map global element IDs to local element IDs. * * @tparam IdType The type of ID. * @param global The global element IDs. * @param num_elements The number of IDs. * @param num_parts The number of partitions. * @param local The local element IDs (output). */ template __global__ void _MapLocalIndexByRemainderKernel( const IdType* const global, const int64_t num_elements, const int num_parts, IdType* const local) { assert(num_elements <= gridDim.x * blockDim.x); const int64_t idx = threadIdx.x + blockDim.x * blockIdx.x; if (idx < num_elements) { local[idx] = global[idx] / num_parts; } } /** * @brief Kernel to map local element IDs within a partition to their global * IDs, using the remainder over the number of partitions. * * @tparam IdType The type of ID. * @param local The local element IDs. * @param part_id The partition to map local elements from. * @param num_elements The number of elements to map. * @param num_parts The number of partitions. * @param global The global element IDs (output). */ template __global__ void _MapGlobalIndexByRemainderKernel( const IdType* const local, const int part_id, const int64_t num_elements, const int num_parts, IdType* const global) { assert(num_elements <= gridDim.x * blockDim.x); const int64_t idx = threadIdx.x + blockDim.x * blockIdx.x; assert(part_id < num_parts); if (idx < num_elements) { global[idx] = (local[idx] * num_parts) + part_id; } } /** * @brief Device function to perform a binary search to find to which partition * a given ID belongs. * * @tparam RangeType The type of range. * @param range The prefix-sum of IDs assigned to partitions. * @param num_parts The number of partitions. * @param target The element ID to find the partition of. * * @return The partition. */ template __device__ RangeType _SearchRange( const RangeType* const range, const int num_parts, const RangeType target) { int start = 0; int end = num_parts; int cur = (end + start) / 2; assert(range[0] == 0); assert(target < range[num_parts]); while (start + 1 < end) { if (target < range[cur]) { end = cur; } else { start = cur; } cur = (start + end) / 2; } return cur; } /** * @brief Kernel to map element IDs to partition IDs. * * @tparam IdType The type of element ID. * @tparam RangeType The type of of the range. * @param range The prefix-sum of IDs assigned to partitions. * @param global The global element IDs. * @param num_elements The number of element IDs. * @param num_parts The number of partitions. * @param part_id The partition ID assigned to each element (output). */ template __global__ void _MapProcByRangeKernel( const RangeType* const range, const IdType* const global, const int64_t num_elements, const int64_t num_parts, IdType* const part_id) { assert(num_elements <= gridDim.x * blockDim.x); const int64_t idx = blockDim.x * static_cast(blockIdx.x) + threadIdx.x; // rely on caching to load the range into L1 cache if (idx < num_elements) { part_id[idx] = static_cast(_SearchRange( range, static_cast(num_parts), static_cast(global[idx]))); } } /** * @brief Kernel to map global element IDs to their ID within their respective * partition. * * @tparam IdType The type of element ID. * @tparam RangeType The type of the range. * @param range The prefix-sum of IDs assigned to partitions. * @param global The global element IDs. * @param num_elements The number of elements. * @param num_parts The number of partitions. * @param local The local element IDs (output). */ template __global__ void _MapLocalIndexByRangeKernel( const RangeType* const range, const IdType* const global, const int64_t num_elements, const int num_parts, IdType* const local) { assert(num_elements <= gridDim.x * blockDim.x); const int64_t idx = threadIdx.x + blockDim.x * blockIdx.x; // rely on caching to load the range into L1 cache if (idx < num_elements) { const int proc = _SearchRange( range, static_cast(num_parts), static_cast(global[idx])); local[idx] = global[idx] - range[proc]; } } /** * @brief Kernel to map local element IDs within a partition to their global * IDs. * * @tparam IdType The type of ID. * @tparam RangeType The type of the range. * @param range The prefix-sum of IDs assigend to partitions. * @param local The local element IDs. * @param part_id The partition to map local elements from. * @param num_elements The number of elements to map. * @param num_parts The number of partitions. * @param global The global element IDs (output). */ template __global__ void _MapGlobalIndexByRangeKernel( const RangeType* const range, const IdType* const local, const int part_id, const int64_t num_elements, const int num_parts, IdType* const global) { assert(num_elements <= gridDim.x * blockDim.x); const int64_t idx = threadIdx.x + blockDim.x * blockIdx.x; assert(part_id < num_parts); // rely on caching to load the range into L1 cache if (idx < num_elements) { global[idx] = local[idx] + range[part_id]; } } } // namespace // Remainder Based Partition Operations template std::pair GeneratePermutationFromRemainder( int64_t array_size, int num_parts, IdArray in_idx) { std::pair result; const auto& ctx = in_idx->ctx; auto device = DeviceAPI::Get(ctx); cudaStream_t stream = runtime::getCurrentCUDAStream(); const int64_t num_in = in_idx->shape[0]; CHECK_GE(num_parts, 1) << "The number of partitions (" << num_parts << ") must be at least 1."; if (num_parts == 1) { // no permutation result.first = aten::Range(0, num_in, sizeof(IdType) * 8, ctx); result.second = aten::Full(num_in, num_parts, sizeof(int64_t) * 8, ctx); return result; } result.first = aten::NewIdArray(num_in, ctx, sizeof(IdType) * 8); result.second = aten::Full(0, num_parts, sizeof(int64_t) * 8, ctx); int64_t* out_counts = static_cast(result.second->data); if (num_in == 0) { // now that we've zero'd out_counts, nothing left to do for an empty // mapping return result; } const int64_t part_bits = static_cast(std::ceil(std::log2(num_parts))); // First, generate a mapping of indexes to processors Workspace proc_id_in(device, ctx, num_in); { const dim3 block(256); const dim3 grid((num_in + block.x - 1) / block.x); if (num_parts < (1 << part_bits)) { // num_parts is not a power of 2 CUDA_KERNEL_CALL( _MapProcByRemainderKernel, grid, block, 0, stream, static_cast(in_idx->data), num_in, num_parts, proc_id_in.get()); } else { // num_parts is a power of 2 CUDA_KERNEL_CALL( _MapProcByMaskRemainderKernel, grid, block, 0, stream, static_cast(in_idx->data), num_in, static_cast(num_parts - 1), // bit mask proc_id_in.get()); } } // then create a permutation array that groups processors together by // performing a radix sort Workspace proc_id_out(device, ctx, num_in); IdType* perm_out = static_cast(result.first->data); { IdArray perm_in = aten::Range(0, num_in, sizeof(IdType) * 8, ctx); size_t sort_workspace_size; CUDA_CALL(cub::DeviceRadixSort::SortPairs( nullptr, sort_workspace_size, proc_id_in.get(), proc_id_out.get(), static_cast(perm_in->data), perm_out, num_in, 0, part_bits, stream)); Workspace sort_workspace(device, ctx, sort_workspace_size); CUDA_CALL(cub::DeviceRadixSort::SortPairs( sort_workspace.get(), sort_workspace_size, proc_id_in.get(), proc_id_out.get(), static_cast(perm_in->data), perm_out, num_in, 0, part_bits, stream)); } // explicitly free so workspace can be re-used proc_id_in.free(); // perform a histogram and then prefixsum on the sorted proc_id vector // Count the number of values to be sent to each processor { using AtomicCount = unsigned long long; // NOLINT static_assert( sizeof(AtomicCount) == sizeof(*out_counts), "AtomicCount must be the same width as int64_t for atomicAdd " "in cub::DeviceHistogram::HistogramEven() to work"); // TODO(dlasalle): Once https://github.com/NVIDIA/cub/pull/287 is merged, // add a compile time check against the cub version to allow // num_in > (2 << 31). CHECK(num_in < static_cast(std::numeric_limits::max())) << "number of values to insert into histogram must be less than max " "value of int."; size_t hist_workspace_size; CUDA_CALL(cub::DeviceHistogram::HistogramEven( nullptr, hist_workspace_size, proc_id_out.get(), reinterpret_cast(out_counts), num_parts + 1, static_cast(0), static_cast(num_parts), static_cast(num_in), stream)); Workspace hist_workspace(device, ctx, hist_workspace_size); CUDA_CALL(cub::DeviceHistogram::HistogramEven( hist_workspace.get(), hist_workspace_size, proc_id_out.get(), reinterpret_cast(out_counts), num_parts + 1, static_cast(0), static_cast(num_parts), static_cast(num_in), stream)); } return result; } template std::pair GeneratePermutationFromRemainder< kDGLCUDA, int32_t>(int64_t array_size, int num_parts, IdArray in_idx); template std::pair GeneratePermutationFromRemainder< kDGLCUDA, int64_t>(int64_t array_size, int num_parts, IdArray in_idx); template IdArray MapToLocalFromRemainder(const int num_parts, IdArray global_idx) { const auto& ctx = global_idx->ctx; cudaStream_t stream = runtime::getCurrentCUDAStream(); if (num_parts > 1) { IdArray local_idx = aten::NewIdArray(global_idx->shape[0], ctx, sizeof(IdType) * 8); const dim3 block(128); const dim3 grid((global_idx->shape[0] + block.x - 1) / block.x); CUDA_KERNEL_CALL( _MapLocalIndexByRemainderKernel, grid, block, 0, stream, static_cast(global_idx->data), global_idx->shape[0], num_parts, static_cast(local_idx->data)); return local_idx; } else { // no mapping to be done return global_idx; } } template IdArray MapToLocalFromRemainder( int num_parts, IdArray in_idx); template IdArray MapToLocalFromRemainder( int num_parts, IdArray in_idx); template IdArray MapToGlobalFromRemainder( const int num_parts, IdArray local_idx, const int part_id) { CHECK_LT(part_id, num_parts) << "Invalid partition id " << part_id << "/" << num_parts; CHECK_GE(part_id, 0) << "Invalid partition id " << part_id << "/" << num_parts; const auto& ctx = local_idx->ctx; cudaStream_t stream = runtime::getCurrentCUDAStream(); if (num_parts > 1) { IdArray global_idx = aten::NewIdArray(local_idx->shape[0], ctx, sizeof(IdType) * 8); const dim3 block(128); const dim3 grid((local_idx->shape[0] + block.x - 1) / block.x); CUDA_KERNEL_CALL( _MapGlobalIndexByRemainderKernel, grid, block, 0, stream, static_cast(local_idx->data), part_id, global_idx->shape[0], num_parts, static_cast(global_idx->data)); return global_idx; } else { // no mapping to be done return local_idx; } } template IdArray MapToGlobalFromRemainder( int num_parts, IdArray in_idx, int part_id); template IdArray MapToGlobalFromRemainder( int num_parts, IdArray in_idx, int part_id); // Range Based Partition Operations template std::pair GeneratePermutationFromRange( int64_t array_size, int num_parts, IdArray range, IdArray in_idx) { std::pair result; const auto& ctx = in_idx->ctx; auto device = DeviceAPI::Get(ctx); cudaStream_t stream = runtime::getCurrentCUDAStream(); const int64_t num_in = in_idx->shape[0]; CHECK_GE(num_parts, 1) << "The number of partitions (" << num_parts << ") must be at least 1."; if (num_parts == 1) { // no permutation result.first = aten::Range(0, num_in, sizeof(IdType) * 8, ctx); result.second = aten::Full(num_in, num_parts, sizeof(int64_t) * 8, ctx); return result; } result.first = aten::NewIdArray(num_in, ctx, sizeof(IdType) * 8); result.second = aten::Full(0, num_parts, sizeof(int64_t) * 8, ctx); int64_t* out_counts = static_cast(result.second->data); if (num_in == 0) { // now that we've zero'd out_counts, nothing left to do for an empty // mapping return result; } const int64_t part_bits = static_cast(std::ceil(std::log2(num_parts))); // First, generate a mapping of indexes to processors Workspace proc_id_in(device, ctx, num_in); { const dim3 block(256); const dim3 grid((num_in + block.x - 1) / block.x); CUDA_KERNEL_CALL( _MapProcByRangeKernel, grid, block, 0, stream, static_cast(range->data), static_cast(in_idx->data), num_in, num_parts, proc_id_in.get()); } // then create a permutation array that groups processors together by // performing a radix sort Workspace proc_id_out(device, ctx, num_in); IdType* perm_out = static_cast(result.first->data); { IdArray perm_in = aten::Range(0, num_in, sizeof(IdType) * 8, ctx); size_t sort_workspace_size; CUDA_CALL(cub::DeviceRadixSort::SortPairs( nullptr, sort_workspace_size, proc_id_in.get(), proc_id_out.get(), static_cast(perm_in->data), perm_out, num_in, 0, part_bits, stream)); Workspace sort_workspace(device, ctx, sort_workspace_size); CUDA_CALL(cub::DeviceRadixSort::SortPairs( sort_workspace.get(), sort_workspace_size, proc_id_in.get(), proc_id_out.get(), static_cast(perm_in->data), perm_out, num_in, 0, part_bits, stream)); } // explicitly free so workspace can be re-used proc_id_in.free(); // perform a histogram and then prefixsum on the sorted proc_id vector // Count the number of values to be sent to each processor { using AtomicCount = unsigned long long; // NOLINT static_assert( sizeof(AtomicCount) == sizeof(*out_counts), "AtomicCount must be the same width as int64_t for atomicAdd " "in cub::DeviceHistogram::HistogramEven() to work"); // TODO(dlasalle): Once https://github.com/NVIDIA/cub/pull/287 is merged, // add a compile time check against the cub version to allow // num_in > (2 << 31). CHECK(num_in < static_cast(std::numeric_limits::max())) << "number of values to insert into histogram must be less than max " "value of int."; size_t hist_workspace_size; CUDA_CALL(cub::DeviceHistogram::HistogramEven( nullptr, hist_workspace_size, proc_id_out.get(), reinterpret_cast(out_counts), num_parts + 1, static_cast(0), static_cast(num_parts), static_cast(num_in), stream)); Workspace hist_workspace(device, ctx, hist_workspace_size); CUDA_CALL(cub::DeviceHistogram::HistogramEven( hist_workspace.get(), hist_workspace_size, proc_id_out.get(), reinterpret_cast(out_counts), num_parts + 1, static_cast(0), static_cast(num_parts), static_cast(num_in), stream)); } return result; } template std::pair GeneratePermutationFromRange( int64_t array_size, int num_parts, IdArray range, IdArray in_idx); template std::pair GeneratePermutationFromRange( int64_t array_size, int num_parts, IdArray range, IdArray in_idx); template std::pair GeneratePermutationFromRange( int64_t array_size, int num_parts, IdArray range, IdArray in_idx); template std::pair GeneratePermutationFromRange( int64_t array_size, int num_parts, IdArray range, IdArray in_idx); template IdArray MapToLocalFromRange( const int num_parts, IdArray range, IdArray global_idx) { const auto& ctx = global_idx->ctx; cudaStream_t stream = runtime::getCurrentCUDAStream(); if (num_parts > 1 && global_idx->shape[0] > 0) { IdArray local_idx = aten::NewIdArray(global_idx->shape[0], ctx, sizeof(IdType) * 8); const dim3 block(128); const dim3 grid((global_idx->shape[0] + block.x - 1) / block.x); CUDA_KERNEL_CALL( _MapLocalIndexByRangeKernel, grid, block, 0, stream, static_cast(range->data), static_cast(global_idx->data), global_idx->shape[0], num_parts, static_cast(local_idx->data)); return local_idx; } else { // no mapping to be done return global_idx; } } template IdArray MapToLocalFromRange( int num_parts, IdArray range, IdArray in_idx); template IdArray MapToLocalFromRange( int num_parts, IdArray range, IdArray in_idx); template IdArray MapToLocalFromRange( int num_parts, IdArray range, IdArray in_idx); template IdArray MapToLocalFromRange( int num_parts, IdArray range, IdArray in_idx); template IdArray MapToGlobalFromRange( const int num_parts, IdArray range, IdArray local_idx, const int part_id) { CHECK_LT(part_id, num_parts) << "Invalid partition id " << part_id << "/" << num_parts; CHECK_GE(part_id, 0) << "Invalid partition id " << part_id << "/" << num_parts; const auto& ctx = local_idx->ctx; cudaStream_t stream = runtime::getCurrentCUDAStream(); if (num_parts > 1 && local_idx->shape[0] > 0) { IdArray global_idx = aten::NewIdArray(local_idx->shape[0], ctx, sizeof(IdType) * 8); const dim3 block(128); const dim3 grid((local_idx->shape[0] + block.x - 1) / block.x); CUDA_KERNEL_CALL( _MapGlobalIndexByRangeKernel, grid, block, 0, stream, static_cast(range->data), static_cast(local_idx->data), part_id, global_idx->shape[0], num_parts, static_cast(global_idx->data)); return global_idx; } else { // no mapping to be done return local_idx; } } template IdArray MapToGlobalFromRange( int num_parts, IdArray range, IdArray in_idx, int part_id); template IdArray MapToGlobalFromRange( int num_parts, IdArray range, IdArray in_idx, int part_id); template IdArray MapToGlobalFromRange( int num_parts, IdArray range, IdArray in_idx, int part_id); template IdArray MapToGlobalFromRange( int num_parts, IdArray range, IdArray in_idx, int part_id); } // namespace impl } // namespace partition } // namespace dgl ================================================ FILE: src/partition/ndarray_partition.cc ================================================ /** * Copyright (c) 2021 by Contributors * @file ndarray_partition.cc * @brief DGL utilities for working with the partitioned NDArrays */ #include "ndarray_partition.h" #include #include #include #include #include "../c_api_common.h" #include "partition_op.h" using namespace dgl::runtime; namespace dgl { namespace partition { NDArrayPartition::NDArrayPartition( const int64_t array_size, const int num_parts) : array_size_(array_size), num_parts_(num_parts) {} int64_t NDArrayPartition::ArraySize() const { return array_size_; } int NDArrayPartition::NumParts() const { return num_parts_; } class RemainderPartition : public NDArrayPartition { public: RemainderPartition(const int64_t array_size, const int num_parts) : NDArrayPartition(array_size, num_parts) { // do nothing } std::pair GeneratePermutation( IdArray in_idx) const override { #ifdef DGL_USE_CUDA auto ctx = in_idx->ctx; if (ctx.device_type == kDGLCUDA) { ATEN_ID_TYPE_SWITCH(in_idx->dtype, IdType, { return impl::GeneratePermutationFromRemainder( ArraySize(), NumParts(), in_idx); }); } #endif LOG(FATAL) << "Remainder based partitioning for the CPU is not yet " "implemented."; // should be unreachable return std::pair{}; } IdArray MapToLocal(IdArray in_idx) const override { #ifdef DGL_USE_CUDA auto ctx = in_idx->ctx; if (ctx.device_type == kDGLCUDA) { ATEN_ID_TYPE_SWITCH(in_idx->dtype, IdType, { return impl::MapToLocalFromRemainder( NumParts(), in_idx); }); } #endif LOG(FATAL) << "Remainder based partitioning for the CPU is not yet " "implemented."; // should be unreachable return IdArray{}; } IdArray MapToGlobal(IdArray in_idx, const int part_id) const override { #ifdef DGL_USE_CUDA auto ctx = in_idx->ctx; if (ctx.device_type == kDGLCUDA) { ATEN_ID_TYPE_SWITCH(in_idx->dtype, IdType, { return impl::MapToGlobalFromRemainder( NumParts(), in_idx, part_id); }); } #endif LOG(FATAL) << "Remainder based partitioning for the CPU is not yet " "implemented."; // should be unreachable return IdArray{}; } int64_t PartSize(const int part_id) const override { CHECK_LT(part_id, NumParts()) << "Invalid part ID (" << part_id << ") for " "partition of size " << NumParts() << "."; return ArraySize() / NumParts() + (part_id < ArraySize() % NumParts()); } }; class RangePartition : public NDArrayPartition { public: RangePartition(const int64_t array_size, const int num_parts, IdArray range) : NDArrayPartition(array_size, num_parts), range_(range), // We also need a copy of the range on the CPU, to compute partition // sizes. We require the input range on the GPU, as if we have multiple // GPUs, we can't know which is the proper one to copy the array to, but // we have only one CPU context, and can safely copy the array to that. range_cpu_(range.CopyTo(DGLContext{kDGLCPU, 0})) { auto ctx = range->ctx; if (ctx.device_type != kDGLCUDA) { LOG(FATAL) << "The range for an NDArrayPartition is only supported " " on GPUs. Transfer the range to the target device before " "creating the partition."; } } std::pair GeneratePermutation( IdArray in_idx) const override { #ifdef DGL_USE_CUDA auto ctx = in_idx->ctx; if (ctx.device_type == kDGLCUDA) { if (ctx.device_type != range_->ctx.device_type || ctx.device_id != range_->ctx.device_id) { LOG(FATAL) << "The range for the NDArrayPartition and the input " "array must be on the same device: " << ctx << " vs. " << range_->ctx; } ATEN_ID_TYPE_SWITCH(in_idx->dtype, IdType, { ATEN_ID_TYPE_SWITCH(range_->dtype, RangeType, { return impl::GeneratePermutationFromRange< kDGLCUDA, IdType, RangeType>( ArraySize(), NumParts(), range_, in_idx); }); }); } #endif LOG(FATAL) << "Remainder based partitioning for the CPU is not yet " "implemented."; // should be unreachable return std::pair{}; } IdArray MapToLocal(IdArray in_idx) const override { #ifdef DGL_USE_CUDA auto ctx = in_idx->ctx; if (ctx.device_type == kDGLCUDA) { ATEN_ID_TYPE_SWITCH(in_idx->dtype, IdType, { ATEN_ID_TYPE_SWITCH(range_->dtype, RangeType, { return impl::MapToLocalFromRange( NumParts(), range_, in_idx); }); }); } #endif LOG(FATAL) << "Remainder based partitioning for the CPU is not yet " "implemented."; // should be unreachable return IdArray{}; } IdArray MapToGlobal(IdArray in_idx, const int part_id) const override { #ifdef DGL_USE_CUDA auto ctx = in_idx->ctx; if (ctx.device_type == kDGLCUDA) { ATEN_ID_TYPE_SWITCH(in_idx->dtype, IdType, { ATEN_ID_TYPE_SWITCH(range_->dtype, RangeType, { return impl::MapToGlobalFromRange( NumParts(), range_, in_idx, part_id); }); }); } #endif LOG(FATAL) << "Remainder based partitioning for the CPU is not yet " "implemented."; // should be unreachable return IdArray{}; } int64_t PartSize(const int part_id) const override { CHECK_LT(part_id, NumParts()) << "Invalid part ID (" << part_id << ") for " "partition of size " << NumParts() << "."; int64_t part_size = -1; ATEN_ID_TYPE_SWITCH(range_cpu_->dtype, RangeType, { const RangeType* const ptr = static_cast(range_cpu_->data); part_size = ptr[part_id + 1] - ptr[part_id]; }); return part_size; } private: IdArray range_; IdArray range_cpu_; }; NDArrayPartitionRef CreatePartitionRemainderBased( const int64_t array_size, const int num_parts) { return NDArrayPartitionRef( std::make_shared(array_size, num_parts)); } NDArrayPartitionRef CreatePartitionRangeBased( const int64_t array_size, const int num_parts, IdArray range) { return NDArrayPartitionRef( std::make_shared(array_size, num_parts, range)); } DGL_REGISTER_GLOBAL("partition._CAPI_DGLNDArrayPartitionCreateRemainderBased") .set_body([](DGLArgs args, DGLRetValue* rv) { int64_t array_size = args[0]; int num_parts = args[1]; *rv = CreatePartitionRemainderBased(array_size, num_parts); }); DGL_REGISTER_GLOBAL("partition._CAPI_DGLNDArrayPartitionCreateRangeBased") .set_body([](DGLArgs args, DGLRetValue* rv) { const int64_t array_size = args[0]; const int num_parts = args[1]; IdArray range = args[2]; *rv = CreatePartitionRangeBased(array_size, num_parts, range); }); DGL_REGISTER_GLOBAL("partition._CAPI_DGLNDArrayPartitionGetPartSize") .set_body([](DGLArgs args, DGLRetValue* rv) { NDArrayPartitionRef part = args[0]; int part_id = args[1]; *rv = part->PartSize(part_id); }); DGL_REGISTER_GLOBAL("partition._CAPI_DGLNDArrayPartitionMapToLocal") .set_body([](DGLArgs args, DGLRetValue* rv) { NDArrayPartitionRef part = args[0]; IdArray idxs = args[1]; *rv = part->MapToLocal(idxs); }); DGL_REGISTER_GLOBAL("partition._CAPI_DGLNDArrayPartitionMapToGlobal") .set_body([](DGLArgs args, DGLRetValue* rv) { NDArrayPartitionRef part = args[0]; IdArray idxs = args[1]; const int part_id = args[2]; *rv = part->MapToGlobal(idxs, part_id); }); DGL_REGISTER_GLOBAL("partition._CAPI_DGLNDArrayPartitionGeneratePermutation") .set_body([](DGLArgs args, DGLRetValue* rv) { NDArrayPartitionRef part = args[0]; IdArray idxs = args[1]; std::pair part_perm = part->GeneratePermutation(idxs); *rv = ConvertNDArrayVectorToPackedFunc({part_perm.first, part_perm.second}); }); } // namespace partition } // namespace dgl ================================================ FILE: src/partition/ndarray_partition.h ================================================ /** * Copyright (c) 2021 by Contributors * @file ndarray_partition.h * @brief DGL utilities for working with the partitioned NDArrays */ #ifndef DGL_PARTITION_NDARRAY_PARTITION_H_ #define DGL_PARTITION_NDARRAY_PARTITION_H_ #include #include #include #include namespace dgl { namespace partition { /** * @brief The top-level partition class. Specific types of partitions should be * sub-classes of this. */ class NDArrayPartition : public runtime::Object { public: /** * @brief Create a new partition. * * @param array_size The first dimension of the partitioned array. * @param num_parts The number parts to the array is split into. */ NDArrayPartition(int64_t array_size, int num_parts); virtual ~NDArrayPartition() = default; static constexpr const char* _type_key = "partition.NDArrayPartition"; DGL_DECLARE_OBJECT_TYPE_INFO(NDArrayPartition, Object); /** * @brief Create a mapping for the given indices to different partitions, * and a count of the number of indices per part. * * A prefix-sum of the counts, can be used to select the continuous sets of * indices destined for each part. * * @param in_idx The input indices to map. * * @return A pair containing 0) the permutation to re-order the indices by * partition, 1) the number of indices per partition (int64_t). */ virtual std::pair GeneratePermutation( IdArray in_idx) const = 0; /** * @brief Generate the local indices (the numbering within each processor) * from a set of global indices. * * @param in_idx The global indices. * * @return The local indices. */ virtual IdArray MapToLocal(IdArray in_idx) const = 0; /** * @brief Generate the global indices (the numbering unique across all * processors) from a set of local indices. * * @param in_idx The local indices. * @param part_id The part id. * * @return The global indices. */ virtual IdArray MapToGlobal(IdArray in_idx, int part_id) const = 0; /** * @brief Get the number of rows/items assigned to the given part. * * @param part_id The part id. * * @return The size. */ virtual int64_t PartSize(int part_id) const = 0; /** * @brief Get the first dimension of the partitioned array. * * @return The size. */ int64_t ArraySize() const; /** * @brief Get the number of parts in this partition. * * @return The number of parts. */ int NumParts() const; private: int64_t array_size_; int num_parts_; }; DGL_DEFINE_OBJECT_REF(NDArrayPartitionRef, NDArrayPartition); /** * @brief Create a new partition object, using the remainder of the row id * divided by the number of parts, to assign rows to parts. * * @param array_size The first dimension of the array. * @param num_parts The number of parts. * * @return The partition object. */ NDArrayPartitionRef CreatePartitionRemainderBased( int64_t array_size, int num_parts); /** * @brief Create a new partition object, using the range (exclusive prefix-sum) * provided to identify which rows belong to which partitions. * * @param array_size The size of the partitioned array. * @param num_parts The number of parts the array is partitioned into. * @param range The exclusive prefix-sum of the number of rows owned by each * partition. The first value must be zero, and the last value must be the * total number of rows. It should be of length `num_parts+1`. * * @return The partition object. */ NDArrayPartitionRef CreatePartitionRangeBased( int64_t array_size, int num_parts, IdArray range); } // namespace partition } // namespace dgl #endif // DGL_PARTITION_NDARRAY_PARTITION_H_ ================================================ FILE: src/partition/partition_op.h ================================================ /** * Copyright (c) 2021 by Contributors * @file ndarray_partition.h * @brief DGL utilities for working with the partitioned NDArrays */ #ifndef DGL_PARTITION_PARTITION_OP_H_ #define DGL_PARTITION_PARTITION_OP_H_ #include #include namespace dgl { namespace partition { namespace impl { /** * @brief Create a permutation that groups indices by the part id when used for * slicing, via the remainder. That is, for the input indices A, find I * such that A[I] is grouped by part ID. * * For example, if we have the set of indices [3, 9, 2, 4, 1, 7] and two * partitions, the permutation vector would be [2, 3, 0, 1, 4, 5]. * * @tparam XPU The type of device to run on. * @tparam IdType The type of the index. * @param array_size The total size of the partitioned array. * @param num_parts The number parts the array id divided into. * @param in_idx The array of indices to group by part id. * * @return The permutation to group the indices by part id, and the number of * indices in each part. */ template std::pair GeneratePermutationFromRemainder( int64_t array_size, int num_parts, IdArray in_idx); /** * @brief Generate the set of local indices from the global indices, using * remainder. That is, for each index `i` in `global_idx`, the local index * is computed as `global_idx[i] / num_parts`. * * @tparam XPU The type of device to run on. * @tparam IdType The type of the index. * @param num_parts The number parts the array id divided into. * @param global_idx The array of global indices to map. * * @return The array of local indices. */ template IdArray MapToLocalFromRemainder(int num_parts, IdArray global_idx); /** * @brief Generate the set of global indices from the local indices, using * remainder. That is, for each index `i` in `local_idx`, the global index * is computed as `local_idx[i] * num_parts + part_id`. * * @tparam XPU The type of device to run on. * @tparam IdType The type of the index. * @param num_parts The number parts the array id divided into. * @param local_idx The array of local indices to map. * @param part_id The id of the current part. * * @return The array of global indices. */ template IdArray MapToGlobalFromRemainder(int num_parts, IdArray local_idx, int part_id); /** * @brief Create a permutation that groups indices by the part id when used for * slicing. That is, for the input indices A, find I such that A[I] is grouped * by part ID. * * For example, if we have a range of [0, 5, 10] and the set of indices * [3, 9, 2, 4, 1, 7], the permutation vector would be [0, 2, 3, 4, 1, 5]. * * @tparam XPU The type of device to run on. * @tparam IdType The type of the index. * @tparam RangeType THe type of the range. * @param array_size The total size of the partitioned array. * @param num_parts The number parts the array id divided into. * @param range The exclusive prefix-sum, representing the range of rows * assigned to each partition. Must be on the same context as `in_idx`. * @param in_idx The array of indices to group by part id. * * @return The permutation to group the indices by part id, and the number of * indices in each part. */ template std::pair GeneratePermutationFromRange( int64_t array_size, int num_parts, IdArray range, IdArray in_idx); /** * @brief Generate the set of local indices from the global indices, using * remainder. That is, for each index `i` in `global_idx`, the local index * is computed as `global_idx[i] / num_parts`. * * @tparam XPU The type of device to run on. * @tparam IdType The type of the index. * @tparam RangeType THe type of the range. * @param num_parts The number parts the array id divided into. * @param range The exclusive prefix-sum, representing the range of rows * assigned to each partition. Must be on the same context as `global_idx`. * @param global_idx The array of global indices to map. * * @return The array of local indices. */ template IdArray MapToLocalFromRange(int num_parts, IdArray range, IdArray global_idx); /** * @brief Generate the set of global indices from the local indices, using * remainder. That is, for each index `i` in `local_idx`, the global index * is computed as `local_idx[i] * num_parts + part_id`. * * @tparam XPU The type of device to run on. * @tparam IdType The type of the index. * @tparam RangeType THe type of the range. * @param num_parts The number parts the array id divided into. * @param range The exclusive prefix-sum, representing the range of rows * assigned to each partition. Must be on the same context as `local_idx`. * @param local_idx The array of local indices to map. * @param part_id The id of the current part. * * @return The array of global indices. */ template IdArray MapToGlobalFromRange( int num_parts, IdArray range, IdArray local_idx, int part_id); } // namespace impl } // namespace partition } // namespace dgl #endif // DGL_PARTITION_PARTITION_OP_H_ ================================================ FILE: src/random/continuous_seed.h ================================================ /*! * Copyright (c) 2023, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek) * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * @file dgl/continuous_seed.h * @brief CPU and CUDA implementation for continuous random seeds */ #ifndef DGL_RANDOM_CONTINUOUS_SEED_H_ #define DGL_RANDOM_CONTINUOUS_SEED_H_ #include #include #ifdef __NVCC__ #include #else #include #include "pcg_random.hpp" #endif // __CUDA_ARCH__ #ifndef M_SQRT1_2 #define M_SQRT1_2 0.707106781186547524401 #endif // M_SQRT1_2 namespace dgl { namespace random { class continuous_seed { uint64_t s[2]; float c[2]; public: /* implicit */ continuous_seed(const int64_t seed) { // NOLINT s[0] = s[1] = seed; c[0] = c[1] = 0; } continuous_seed(IdArray seed_arr, float r) { auto seed = seed_arr.Ptr(); s[0] = seed[0]; s[1] = seed[seed_arr->shape[0] - 1]; const auto pi = std::acos(-1.0); c[0] = std::cos(pi * r / 2); c[1] = std::sin(pi * r / 2); } #ifdef __CUDA_ARCH__ __device__ inline float uniform(const uint64_t t) const { const uint64_t kCurandSeed = 999961; // Could be any random number. curandStatePhilox4_32_10_t rng; curand_init(kCurandSeed, s[0], t, &rng); float rnd; if (s[0] != s[1]) { rnd = c[0] * curand_normal(&rng); curand_init(kCurandSeed, s[1], t, &rng); rnd += c[1] * curand_normal(&rng); rnd = normcdff(rnd); } else { rnd = curand_uniform(&rng); } return rnd; } #else inline float uniform(const uint64_t t) const { pcg32 ng0(s[0], t); float rnd; if (s[0] != s[1]) { std::normal_distribution norm; rnd = c[0] * norm(ng0); pcg32 ng1(s[1], t); norm.reset(); rnd += c[1] * norm(ng1); rnd = std::erfc(-rnd * static_cast(M_SQRT1_2)) / 2.0f; } else { std::uniform_real_distribution uni; rnd = uni(ng0); } return rnd; } #endif // __CUDA_ARCH__ }; } // namespace random } // namespace dgl #endif // DGL_RANDOM_CONTINUOUS_SEED_H_ ================================================ FILE: src/random/cpu/choice.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file random/choice.cc * @brief Non-uniform discrete sampling implementation */ #include #include #include #include #include "sample_utils.h" namespace dgl { template IdxType RandomEngine::Choice(FloatArray prob) { IdxType ret = 0; ATEN_FLOAT_TYPE_SWITCH(prob->dtype, ValueType, "probability", { // TODO(minjie): allow choosing different sampling algorithms utils::TreeSampler sampler(this, prob); ret = sampler.Draw(); }); return ret; } template int32_t RandomEngine::Choice(FloatArray); template int64_t RandomEngine::Choice(FloatArray); template void RandomEngine::Choice( IdxType num, FloatArray prob, IdxType* out, bool replace) { const IdxType N = prob->shape[0]; if (!replace) CHECK_LE(num, N) << "Cannot take more sample than population when 'replace=false'"; if (num == N && !replace) std::iota(out, out + num, 0); utils::BaseSampler* sampler = nullptr; if (replace) { sampler = new utils::TreeSampler(this, prob); } else { sampler = new utils::TreeSampler(this, prob); } for (IdxType i = 0; i < num; ++i) out[i] = sampler->Draw(); delete sampler; } template void RandomEngine::Choice( int32_t num, FloatArray prob, int32_t* out, bool replace); template void RandomEngine::Choice( int64_t num, FloatArray prob, int64_t* out, bool replace); template void RandomEngine::Choice( int32_t num, FloatArray prob, int32_t* out, bool replace); template void RandomEngine::Choice( int64_t num, FloatArray prob, int64_t* out, bool replace); template void RandomEngine::Choice( int32_t num, FloatArray prob, int32_t* out, bool replace); template void RandomEngine::Choice( int64_t num, FloatArray prob, int64_t* out, bool replace); template void RandomEngine::Choice( int32_t num, FloatArray prob, int32_t* out, bool replace); template void RandomEngine::Choice( int64_t num, FloatArray prob, int64_t* out, bool replace); template void RandomEngine::UniformChoice( IdxType num, IdxType population, IdxType* out, bool replace) { CHECK_GE(num, 0) << "The numbers to sample should be non-negative."; CHECK_GE(population, 0) << "The population size should be non-negative."; if (!replace) CHECK_LE(num, population) << "Cannot take more sample than population when 'replace=false'"; if (replace) { for (IdxType i = 0; i < num; ++i) out[i] = RandInt(population); } else { if (num < population / 10) { // TODO(minjie): may need a better threshold here // if set of numbers is small (up to 128) use linear search to verify // uniqueness this operation is cheaper for CPU. if (num && num < 64) { *out = RandInt(population); auto b = out + 1; auto e = b + num - 1; while (b != e) { // put the new value at the end *b = RandInt(population); // Check if a new value doesn't exist in current range(out,b) // otherwise get a new value until we haven't unique range of // elements. auto it = std::find(out, b, *b); if (it != b) continue; ++b; } } else { // use hash set // In the best scenario, time complexity is O(num), i.e., no conflict. // // Let k be num / population, the expected number of extra sampling // steps is roughly k^2 / (1-k) * population, which means in the worst // case scenario, the time complexity is O(population^2). In practice, // we use 1/10 since std::unordered_set is pretty slow. std::unordered_set selected; while (static_cast(selected.size()) < num) { selected.insert(RandInt(population)); } std::copy(selected.begin(), selected.end(), out); } } else { // In this case, `num >= population / 10`. To reduce the computation // overhead, we should reduce the number of random number generations. // Even though reservior algorithm is more memory effficient (it has // O(num) memory complexity), it generates O(population) random numbers, // which is computationally expensive. This algorithm has memory // complexity of O(population) but generates much fewer random numbers // O(num). In the case of `num >= population/10`, we don't need to worry // about memory complexity because `num` is usually small. So is // `population`. Allocating a small piece of memory is very efficient. std::vector seq(population); for (size_t i = 0; i < seq.size(); i++) seq[i] = i; for (IdxType i = 0; i < num; i++) { IdxType j = RandInt(i, population); std::swap(seq[i], seq[j]); } // Save the randomly sampled numbers. for (IdxType i = 0; i < num; i++) { out[i] = seq[i]; } } } } template void RandomEngine::UniformChoice( int32_t num, int32_t population, int32_t* out, bool replace); template void RandomEngine::UniformChoice( int64_t num, int64_t population, int64_t* out, bool replace); template void RandomEngine::BiasedChoice( IdxType num, const IdxType* split, FloatArray bias, IdxType* out, bool replace) { const int64_t num_tags = bias->shape[0]; const FloatType* bias_data = static_cast(bias->data); IdxType total_node_num = 0; FloatArray prob = NDArray::Empty({num_tags}, bias->dtype, bias->ctx); FloatType* prob_data = static_cast(prob->data); for (int64_t tag = 0; tag < num_tags; ++tag) { int64_t tag_num_nodes = split[tag + 1] - split[tag]; total_node_num += tag_num_nodes; FloatType tag_bias = bias_data[tag]; prob_data[tag] = tag_num_nodes * tag_bias; } if (replace) { auto sampler = utils::TreeSampler(this, prob); for (IdxType i = 0; i < num; ++i) { const int64_t tag = sampler.Draw(); const IdxType tag_num_nodes = split[tag + 1] - split[tag]; out[i] = RandInt(tag_num_nodes) + split[tag]; } } else { utils::TreeSampler sampler( this, prob, bias_data); CHECK_GE(total_node_num, num) << "Cannot take more sample than population when 'replace=false'"; // we use hash set here. Maybe in the future we should support reservoir // algorithm std::vector> selected(num_tags); for (IdxType i = 0; i < num; ++i) { const int64_t tag = sampler.Draw(); bool inserted = false; const IdxType tag_num_nodes = split[tag + 1] - split[tag]; IdxType selected_node; while (!inserted) { CHECK_LT(selected[tag].size(), tag_num_nodes) << "Cannot take more sample than population when 'replace=false'"; selected_node = RandInt(tag_num_nodes); inserted = selected[tag].insert(selected_node).second; } out[i] = selected_node + split[tag]; } } } template void RandomEngine::BiasedChoice( int32_t, const int32_t*, FloatArray, int32_t*, bool); template void RandomEngine::BiasedChoice( int32_t, const int32_t*, FloatArray, int32_t*, bool); template void RandomEngine::BiasedChoice( int64_t, const int64_t*, FloatArray, int64_t*, bool); template void RandomEngine::BiasedChoice( int64_t, const int64_t*, FloatArray, int64_t*, bool); }; // namespace dgl ================================================ FILE: src/random/cpu/sample_utils.h ================================================ /** * Copyright (c) 2019 by Contributors * @file dgl/sample_utils.h * @brief Sampling utilities */ #ifndef DGL_RANDOM_CPU_SAMPLE_UTILS_H_ #define DGL_RANDOM_CPU_SAMPLE_UTILS_H_ #include #include #include #include #include #include #include #include #include #include namespace dgl { namespace utils { /** @brief Base sampler class */ template class BaseSampler { public: virtual ~BaseSampler() = default; /** @brief Draw one integer sample */ virtual Idx Draw() { LOG(INFO) << "Not implemented yet."; return 0; } }; // (BarclayII 2022.9.20) Changing the internal data type of probabilities to // double since we are using non-uniform sampling to sample on boolean masks, // where False represents probability 0. DType could be uint8 in this case, // which will give incorrect arithmetic results due to overflowing and/or // integer division. /** * AliasSampler is used to sample elements from a given discrete categorical * distribution. Algorithm: Alias * Method(https://en.wikipedia.org/wiki/Alias_method) Sampler building * complexity: O(n) Sample w/ replacement complexity: O(1) Sample w/o * replacement complexity: O(log n) */ template class AliasSampler : public BaseSampler { private: RandomEngine *re; Idx N; double accum, taken; // accumulated likelihood std::vector K; // alias table std::vector U; // probability table FloatArray _prob; // category distribution std::vector used; // indicate availability, activated when replace=false; std::vector id_mapping; // index mapping, activated when replace=false; inline Idx Map(Idx x) const { // Map consecutive indices to unused elements if (replace) return x; else return id_mapping[x]; } void Reconstruct(FloatArray prob) { // Reconstruct alias table const int64_t prob_size = prob->shape[0]; const DType *prob_data = prob.Ptr(); N = 0; accum = 0.; taken = 0.; if (!replace) id_mapping.clear(); for (Idx i = 0; i < prob_size; ++i) if (!used[i]) { N++; accum += prob_data[i]; if (!replace) id_mapping.push_back(i); } if (N == 0) LOG(FATAL) << "Cannot take more sample than population when 'replace=false'"; K.resize(N); U.resize(N); double avg = accum / static_cast(N); std::fill(U.begin(), U.end(), avg); // initialize U std::queue > under, over; for (Idx i = 0; i < N; ++i) { double p = prob_data[Map(i)]; if (p > avg) over.push(std::make_pair(i, p)); else under.push(std::make_pair(i, p)); K[i] = i; // initialize K } while (!under.empty() && !over.empty()) { auto u_pair = under.front(), o_pair = over.front(); Idx i_u = u_pair.first, i_o = o_pair.first; double p_u = u_pair.second, p_o = o_pair.second; K[i_u] = i_o; U[i_u] = p_u; if (p_o + p_u > 2 * avg) over.push(std::make_pair(i_o, p_o + p_u - avg)); else if (p_o + p_u < 2 * avg) under.push(std::make_pair(i_o, p_o + p_u - avg)); under.pop(); over.pop(); } } public: void ResetState(FloatArray prob) { used.resize(prob->shape[0]); if (!replace) _prob = prob; std::fill(used.begin(), used.end(), false); Reconstruct(prob); } explicit AliasSampler(RandomEngine *re, FloatArray prob) : re(re) { ResetState(prob); } ~AliasSampler() {} Idx Draw() { if (!replace) { const DType *_prob_data = _prob.Ptr(); if (2 * taken >= accum) Reconstruct(_prob); if (accum <= 0) return -1; // accum changes after Reconstruct(), so avg should be computed after // that. double avg = accum / N; while (true) { double dice = re->Uniform(0, N); Idx i = static_cast(dice), rst; double p = (dice - i) * avg; if (p <= U[i]) { rst = Map(i); } else { rst = Map(K[i]); } double cap = _prob_data[rst]; if (!used[rst]) { used[rst] = true; taken += cap; return rst; } } } if (accum <= 0) return -1; double avg = accum / N; double dice = re->Uniform(0, N); Idx i = static_cast(dice); double p = (dice - i) * avg; if (p <= U[i]) return Map(i); else return Map(K[i]); } }; /** * CDFSampler is used to sample elements from a given discrete categorical * distribution. Algorithm: create a cumulative distribution function and * conduct binary search for sampling. Reference: * https://github.com/numpy/numpy/blob/d37908/numpy/random/mtrand.pyx#L804 * Sampler building complexity: O(n) * Sample w/ and w/o replacement complexity: O(log n) */ template class CDFSampler : public BaseSampler { private: RandomEngine *re; Idx N; double accum, taken; FloatArray _prob; // categorical distribution std::vector cdf; // cumulative distribution function std::vector used; // indicate availability, activated when replace=false; std::vector id_mapping; // indicate index mapping, activated when replace=false; inline Idx Map(Idx x) const { // Map consecutive indices to unused elements if (replace) return x; else return id_mapping[x]; } void Reconstruct(FloatArray prob) { // Reconstruct CDF int64_t prob_size = prob->shape[0]; const DType *prob_data = prob.Ptr(); N = 0; accum = 0.; taken = 0.; if (!replace) id_mapping.clear(); cdf.clear(); cdf.push_back(0); for (Idx i = 0; i < prob_size; ++i) if (!used[i]) { N++; accum += prob_data[i]; if (!replace) id_mapping.push_back(i); cdf.push_back(accum); } if (N == 0) LOG(FATAL) << "Cannot take more sample than population when 'replace=false'"; } public: void ResetState(FloatArray prob) { used.resize(prob->shape[0]); if (!replace) _prob = prob; std::fill(used.begin(), used.end(), false); Reconstruct(prob); } explicit CDFSampler(RandomEngine *re, FloatArray prob) : re(re) { ResetState(prob); } ~CDFSampler() {} Idx Draw() { double eps = std::numeric_limits::min(); if (!replace) { const DType *_prob_data = _prob.Ptr(); if (2 * taken >= accum) Reconstruct(_prob); if (accum <= 0) return -1; while (true) { double p = std::max(re->Uniform(0., accum), eps); Idx rst = Map(std::lower_bound(cdf.begin(), cdf.end(), p) - cdf.begin() - 1); double cap = static_cast(_prob_data[rst]); if (!used[rst]) { used[rst] = true; taken += cap; return rst; } } } if (accum <= 0) return -1; double p = std::max(re->Uniform(0., accum), eps); return Map(std::lower_bound(cdf.begin(), cdf.end(), p) - cdf.begin() - 1); } }; /** * TreeSampler is used to sample elements from a given discrete categorical * distribution. Algorithm: create a heap that stores accumulated likelihood of * its leaf descendents. Reference: https://blog.smola.org/post/1016514759 * Sampler building complexity: O(n) * Sample w/ and w/o replacement complexity: O(log n) */ template class TreeSampler : public BaseSampler { private: RandomEngine *re; std::vector weight; // accumulated likelihood of subtrees. int64_t N; int64_t num_leafs; const DType *decrease; public: void ResetState(FloatArray prob) { int64_t prob_size = prob->shape[0]; const DType *prob_data = prob.Ptr(); std::fill(weight.begin(), weight.end(), 0); for (int64_t i = 0; i < prob_size; ++i) weight[num_leafs + i] = prob_data[i]; for (int64_t i = num_leafs - 1; i >= 1; --i) weight[i] = weight[i * 2] + weight[i * 2 + 1]; } explicit TreeSampler( RandomEngine *re, FloatArray prob, const DType *decrease = nullptr) : re(re), decrease(decrease) { num_leafs = 1; while (num_leafs < prob->shape[0]) num_leafs *= 2; N = num_leafs * 2; weight.resize(N); ResetState(prob); } /* Pick an element from the given distribution and update the tree. * * The parameter decrease is an array of which the length is the number of * categories. Every time an element in the category x is picked, the weight * of this category is subtracted by decrease[x]. It is used to support the * case where a category might contains multiple candidates and decrease[x] is * the weight of one candidate of the category x. * * When decrease == nullptr, it means there is only one candidate in each * category and will directly set the weight of the chosen category as 0. * */ Idx Draw() { if (weight[1] <= 0) return -1; int64_t cur = 1; double p = re->Uniform(0, weight[cur]); double accum = 0.; while (cur < num_leafs) { double w_l = weight[cur * 2], w_r = weight[cur * 2 + 1]; double pivot = accum + w_l; // w_r > 0 can suppress some numerical problems. Idx shift = static_cast(p > pivot && w_r > 0); cur = cur * 2 + shift; if (shift == 1) accum = pivot; } Idx rst = cur - num_leafs; if (!replace) { while (cur >= 1) { if (cur >= num_leafs) weight[cur] = this->decrease ? weight[cur] - static_cast(this->decrease[rst]) : 0.; else weight[cur] = weight[cur * 2] + weight[cur * 2 + 1]; cur /= 2; } } return rst; } }; }; // namespace utils }; // namespace dgl #endif // DGL_RANDOM_CPU_SAMPLE_UTILS_H_ ================================================ FILE: src/random/random.cc ================================================ /** * Copyright (c) 2017 by Contributors * @file random.cc * @brief Random number generator interfaces */ #include #include #include #include #include #include using namespace dgl::runtime; namespace dgl { DGL_REGISTER_GLOBAL("rng._CAPI_SetSeed") .set_body([](DGLArgs args, DGLRetValue *rv) { const int seed = args[0]; runtime::parallel_for(0, omp_get_max_threads(), [&](size_t b, size_t e) { for (auto i = b; i < e; ++i) { RandomEngine::ThreadLocal()->SetSeed(seed); } }); }); DGL_REGISTER_GLOBAL("rng._CAPI_Choice") .set_body([](DGLArgs args, DGLRetValue *rv) { const int64_t num = args[0]; const int64_t population = args[1]; const NDArray prob = args[2]; const bool replace = args[3]; const int bits = args[4]; CHECK(bits == 32 || bits == 64) << "Supported bit widths are 32 and 64, but got " << bits << "."; if (aten::IsNullArray(prob)) { if (bits == 32) { *rv = RandomEngine::ThreadLocal()->UniformChoice( num, population, replace); } else { *rv = RandomEngine::ThreadLocal()->UniformChoice( num, population, replace); } } else { if (bits == 32) { ATEN_FLOAT_TYPE_SWITCH(prob->dtype, FloatType, "probability", { *rv = RandomEngine::ThreadLocal()->Choice( num, prob, replace); }); } else { ATEN_FLOAT_TYPE_SWITCH(prob->dtype, FloatType, "probability", { *rv = RandomEngine::ThreadLocal()->Choice( num, prob, replace); }); } } }); }; // namespace dgl ================================================ FILE: src/rpc/network/common.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file common.cc * @brief This file provide basic facilities for string * to make programming convenient. */ #include "common.h" #include #include using std::string; namespace dgl { namespace network { // In most cases, delim contains only one character. In this case, we // use CalculateReserveForVector to count the number of elements should // be reserved in result vector, and thus optimize SplitStringUsing. static int CalculateReserveForVector( const std::string& full, const char* delim) { int count = 0; if (delim[0] != '\0' && delim[1] == '\0') { // Optimize the common case where delim is a single character. char c = delim[0]; const char* p = full.data(); const char* end = p + full.size(); while (p != end) { if (*p == c) { // This could be optimized with hasless(v,1) trick. ++p; } else { while (++p != end && *p != c) { // Skip to the next occurence of the delimiter. } ++count; } } } return count; } void SplitStringUsing( const std::string& full, const char* delim, std::vector* result) { CHECK(delim != NULL); CHECK(result != NULL); result->reserve(CalculateReserveForVector(full, delim)); back_insert_iterator > it(*result); SplitStringToIteratorUsing(full, delim, &it); } void SplitStringToSetUsing( const std::string& full, const char* delim, std::set* result) { CHECK(delim != NULL); CHECK(result != NULL); simple_insert_iterator > it(result); SplitStringToIteratorUsing(full, delim, &it); } static void StringAppendV(string* dst, const char* format, va_list ap) { // First try with a small fixed size buffer char space[1024]; // It's possible for methods that use a va_list to invalidate // the data in it upon use. The fix is to make a copy // of the structure before using it and use that copy instead. va_list backup_ap; va_copy(backup_ap, ap); int result = vsnprintf(space, sizeof(space), format, backup_ap); va_end(backup_ap); if ((result >= 0) && (result < static_cast(sizeof(space)))) { // It fit dst->append(space, result); return; } // Repeatedly increase buffer size until it fits int length = sizeof(space); while (true) { if (result < 0) { // Older behavior: just try doubling the buffer size length *= 2; } else { // We need exactly "result+1" characters length = result + 1; } char* buf = new char[length]; // Restore the va_list before we use it again va_copy(backup_ap, ap); result = vsnprintf(buf, length, format, backup_ap); va_end(backup_ap); if ((result >= 0) && (result < length)) { // It fit dst->append(buf, result); delete[] buf; return; } delete[] buf; } } string StringPrintf(const char* format, ...) { va_list ap; va_start(ap, format); string result; StringAppendV(&result, format, ap); va_end(ap); return result; } void SStringPrintf(string* dst, const char* format, ...) { va_list ap; va_start(ap, format); dst->clear(); StringAppendV(dst, format, ap); va_end(ap); } void StringAppendF(string* dst, const char* format, ...) { va_list ap; va_start(ap, format); StringAppendV(dst, format, ap); va_end(ap); } } // namespace network } // namespace dgl ================================================ FILE: src/rpc/network/common.h ================================================ /** * Copyright (c) 2019 by Contributors * @file common.h * @brief This file provide basic facilities for string * to make programming convenient. */ #ifndef DGL_RPC_NETWORK_COMMON_H_ #define DGL_RPC_NETWORK_COMMON_H_ #include #include #include #include namespace dgl { namespace network { //------------------------------------------------------------------------------ // Subdivide string |full| into substrings according to delimitors // given in |delim|. |delim| should pointing to a string including // one or more characters. Each character is considerred a possible // delimitor. For example: // // vector substrings; // SplitStringUsing("apple orange\tbanana", "\t ", &substrings); // // results in three substrings: // // substrings.size() == 3 // substrings[0] == "apple" // substrings[1] == "orange" // substrings[2] == "banana" //------------------------------------------------------------------------------ void SplitStringUsing( const std::string& full, const char* delim, std::vector* result); // This function has the same semnatic as SplitStringUsing. Results // are saved in an STL set container. void SplitStringToSetUsing( const std::string& full, const char* delim, std::set* result); template struct simple_insert_iterator { explicit simple_insert_iterator(T* t) : t_(t) {} simple_insert_iterator& operator=(const typename T::value_type& value) { t_->insert(value); return *this; } simple_insert_iterator& operator*() { return *this; } simple_insert_iterator& operator++() { return *this; } simple_insert_iterator& operator++(int placeholder) { return *this; } T* t_; }; template struct back_insert_iterator { explicit back_insert_iterator(T& t) : t_(t) {} back_insert_iterator& operator=(const typename T::value_type& value) { t_.push_back(value); return *this; } back_insert_iterator& operator*() { return *this; } back_insert_iterator& operator++() { return *this; } back_insert_iterator operator++(int placeholder) { return *this; } T& t_; }; template static inline void SplitStringToIteratorUsing( const StringType& full, const char* delim, ITR* result) { CHECK_NOTNULL(delim); // Optimize the common case where delim is a single character. if (delim[0] != '\0' && delim[1] == '\0') { char c = delim[0]; const char* p = full.data(); const char* end = p + full.size(); while (p != end) { if (*p == c) { ++p; } else { const char* start = p; while (++p != end && *p != c) { // Skip to the next occurence of the delimiter. } *(*result)++ = StringType(start, p - start); } } return; } std::string::size_type begin_index, end_index; begin_index = full.find_first_not_of(delim); while (begin_index != std::string::npos) { end_index = full.find_first_of(delim, begin_index); if (end_index == std::string::npos) { *(*result)++ = full.substr(begin_index); return; } *(*result)++ = full.substr(begin_index, (end_index - begin_index)); begin_index = full.find_first_not_of(delim, end_index); } } //------------------------------------------------------------------------------ // StringPrintf: // // For example: // // std::string str = StringPrintf("%d", 1); /* str = "1" */ // SStringPrintf(&str, "%d", 2); /* str = "2" */ // StringAppendF(&str, "%d", 3); /* str = "23" */ //------------------------------------------------------------------------------ std::string StringPrintf(const char* format, ...); void SStringPrintf(std::string* dst, const char* format, ...); void StringAppendF(std::string* dst, const char* format, ...); } // namespace network } // namespace dgl #endif // DGL_RPC_NETWORK_COMMON_H_ ================================================ FILE: src/rpc/network/communicator.h ================================================ /** * Copyright (c) 2019 by Contributors * @file communicator.h * @brief Communicator for DGL distributed training. */ #ifndef DGL_RPC_NETWORK_COMMUNICATOR_H_ #define DGL_RPC_NETWORK_COMMUNICATOR_H_ #include #include #include "msg_queue.h" namespace dgl { namespace network { /** * @brief Network Sender for DGL distributed training. * * Sender is an abstract class that defines a set of APIs for sending binary * data message over network. It can be implemented by different underlying * networking libraries such TCP socket and MPI. One Sender can connect to * multiple receivers and it can send data to specified receiver via receiver's * ID. */ class Sender { public: /** * @brief Sender constructor * @param queue_size size (bytes) of message queue. * @param max_thread_count size of thread pool. 0 for no limit * Note that, the queue_size parameter is optional. */ explicit Sender(int64_t queue_size = 0, int max_thread_count = 0) { CHECK_GE(queue_size, 0); CHECK_GE(max_thread_count, 0); queue_size_ = queue_size; max_thread_count_ = max_thread_count; } virtual ~Sender() {} /** * @brief Send data to specified Receiver. * @param msg data message * @param recv_id receiver's ID * @return Status code * * (1) The send is non-blocking. There is no guarantee that the message has * been physically sent out when the function returns. (2) The communicator * will assume the responsibility of the given message. (3) The API is * multi-thread safe. (4) Messages sent to the same receiver are guaranteed to * be received in the same order. There is no guarantee for messages sent to * different receivers. */ virtual STATUS Send(Message msg, int recv_id) = 0; protected: /** * @brief Size of message queue */ int64_t queue_size_; /** * @brief Size of thread pool. 0 for no limit */ int max_thread_count_; }; /** * @brief Network Receiver for DGL distributed training. * * Receiver is an abstract class that defines a set of APIs for receiving binary * data message over network. It can be implemented by different underlying * networking libraries such as TCP socket and MPI. One Receiver can connect * with multiple Senders and it can receive data from multiple Senders * concurrently. */ class Receiver { public: /** * @brief Receiver constructor * @param queue_size size of message queue. * @param max_thread_count size of thread pool. 0 for no limit * Note that, the queue_size parameter is optional. */ explicit Receiver(int64_t queue_size = 0, int max_thread_count = 0) { if (queue_size < 0) { LOG(FATAL) << "queue_size cannot be a negative number."; } CHECK_GE(max_thread_count, 0); queue_size_ = queue_size; max_thread_count_ = max_thread_count; } virtual ~Receiver() {} /** * @brief Recv data from Sender * @param msg pointer of data message * @param send_id which sender current msg comes from * @param timeout The timeout value in milliseconds. If zero, wait * indefinitely. * @return Status code * * (1) The Recv() API is thread-safe. * (2) Memory allocated by communicator but will not own it after the function * returns. */ virtual STATUS Recv(Message* msg, int* send_id, int timeout = 0) = 0; /** * @brief Recv data from a specified Sender * @param msg pointer of data message * @param send_id sender's ID * @param timeout The timeout value in milliseconds. If zero, wait * indefinitely. * @return Status code * * (1) The RecvFrom() API is thread-safe. * (2) Memory allocated by communicator but will not own it after the function * returns. */ virtual STATUS RecvFrom(Message* msg, int send_id, int timeout = 0) = 0; protected: /** * @brief Size of message queue */ int64_t queue_size_; /** * @brief Size of thread pool. 0 for no limit */ int max_thread_count_; }; } // namespace network } // namespace dgl #endif // DGL_RPC_NETWORK_COMMUNICATOR_H_ ================================================ FILE: src/rpc/network/msg_queue.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file msg_queue.cc * @brief Message queue for DGL distributed training. */ #include "msg_queue.h" #include #include namespace dgl { namespace network { using std::string; MessageQueue::MessageQueue(int64_t queue_size, int num_producers) { CHECK_GE(queue_size, 0); CHECK_GE(num_producers, 0); queue_size_ = queue_size; free_size_ = queue_size; num_producers_ = num_producers; } STATUS MessageQueue::Add(Message msg, bool is_blocking) { // check if message is too long to fit into the queue if (msg.size > queue_size_) { LOG(WARNING) << "Message is larger than the queue."; return MSG_GT_SIZE; } if (msg.size <= 0) { LOG(WARNING) << "Message size (" << msg.size << ") is negative or zero."; return MSG_LE_ZERO; } std::unique_lock lock(mutex_); if (finished_producers_.size() >= num_producers_) { return QUEUE_CLOSE; } if (msg.size > free_size_ && !is_blocking) { return QUEUE_FULL; } cond_not_full_.wait(lock, [&]() { return msg.size <= free_size_; }); // Add data pointer to queue queue_.push(msg); free_size_ -= msg.size; // not empty signal cond_not_empty_.notify_one(); return ADD_SUCCESS; } STATUS MessageQueue::Remove(Message* msg, bool is_blocking) { std::unique_lock lock(mutex_); if (queue_.empty()) { if (!is_blocking) { return QUEUE_EMPTY; } if (finished_producers_.size() >= num_producers_) { return QUEUE_CLOSE; } } cond_not_empty_.wait( lock, [this] { return !queue_.empty() || exit_flag_.load(); }); if (finished_producers_.size() >= num_producers_ && queue_.empty()) { return QUEUE_CLOSE; } Message old_msg = queue_.front(); queue_.pop(); msg->data = old_msg.data; msg->size = old_msg.size; msg->receiver_id = old_msg.receiver_id; msg->deallocator = old_msg.deallocator; free_size_ += old_msg.size; cond_not_full_.notify_one(); return REMOVE_SUCCESS; } void MessageQueue::SignalFinished(int producer_id) { std::lock_guard lock(mutex_); finished_producers_.insert(producer_id); // if all producers have finished, consumers should be // waken up to get this signal if (finished_producers_.size() >= num_producers_) { exit_flag_.store(true); cond_not_empty_.notify_all(); } } bool MessageQueue::Empty() const { std::lock_guard lock(mutex_); return queue_.size() == 0; } bool MessageQueue::EmptyAndNoMoreAdd() const { std::lock_guard lock(mutex_); return queue_.size() == 0 && finished_producers_.size() >= num_producers_; } } // namespace network } // namespace dgl ================================================ FILE: src/rpc/network/msg_queue.h ================================================ /** * Copyright (c) 2019 by Contributors * @file msg_queue.h * @brief Message queue for DGL distributed training. */ #ifndef DGL_RPC_NETWORK_MSG_QUEUE_H_ #define DGL_RPC_NETWORK_MSG_QUEUE_H_ #include #include #include #include #include #include #include #include #include // for pair namespace dgl { namespace network { typedef int STATUS; /** * @brief Status code of message queue */ #define ADD_SUCCESS 3400 // Add message successfully #define MSG_GT_SIZE 3401 // Message size beyond queue size #define MSG_LE_ZERO 3402 // Message size is not a positive number #define QUEUE_CLOSE 3403 // Cannot add message when queue is closed #define QUEUE_FULL 3404 // Cannot add message when queue is full #define REMOVE_SUCCESS 3405 // Remove message successfully #define QUEUE_EMPTY 3406 // Cannot remove when queue is empty /** * @brief Message used by network communicator and message queue. */ struct Message { /** * @brief Constructor */ Message() {} /** * @brief Constructor */ Message(char* data_ptr, int64_t data_size) : data(data_ptr), size(data_size) {} /** * @brief message data */ char* data; /** * @brief message size in bytes */ int64_t size; /** * @brief message receiver id */ int receiver_id = -1; /** * @brief user-defined deallocator, which can be nullptr */ std::function deallocator = nullptr; }; /** * @brief Free memory buffer of message */ inline void DefaultMessageDeleter(Message* msg) { delete[] msg->data; } /** * @brief Message Queue for network communication. * * MessageQueue is FIFO queue that adopts producer/consumer model for data * message. It supports one or more producer threads and one or more consumer * threads. Producers invokes Add() to push data message into the queue, and * consumers invokes Remove() to pop data message from queue. Add() and Remove() * use two condition variables to synchronize producer threads and consumer * threads. Each producer invokes SignalFinished(producer_id) to claim that it * is about to finish, where producer_id is an integer uniquely identify a * producer thread. This signaling mechanism prevents consumers from waiting * after all producers have finished their jobs. * * MessageQueue is thread-safe. * */ class MessageQueue { public: /** * @brief MessageQueue constructor * @param queue_size size (bytes) of message queue * @param num_producers number of producers, use 1 by default */ explicit MessageQueue( int64_t queue_size /* in bytes */, int num_producers = 1); /** * @brief MessageQueue deconstructor */ ~MessageQueue() {} /** * @brief Add message to the queue * @param msg data message * @param is_blocking Blocking if cannot add, else return * @return Status code */ STATUS Add(Message msg, bool is_blocking = true); /** * @brief Remove message from the queue * @param msg pointer of data msg * @param is_blocking Blocking if cannot remove, else return * @return Status code */ STATUS Remove(Message* msg, bool is_blocking = true); /** * @brief Signal that producer producer_id will no longer produce anything * @param producer_id An integer uniquely to identify a producer thread */ void SignalFinished(int producer_id); /** * @return true if queue is empty. */ bool Empty() const; /** * @return true if queue is empty and all num_producers have signaled. */ bool EmptyAndNoMoreAdd() const; protected: /** * @brief message queue */ std::queue queue_; /** * @brief Size of the queue in bytes */ int64_t queue_size_; /** * @brief Free size of the queue */ int64_t free_size_; /** * @brief Used to check all producers will no longer produce anything */ size_t num_producers_; /** * @brief Store finished producer id */ std::set finished_producers_; /** * @brief Condition when consumer should wait */ std::condition_variable cond_not_full_; /** * @brief Condition when producer should wait */ std::condition_variable cond_not_empty_; /** * @brief Signal for exit wait */ std::atomic exit_flag_{false}; /** * @brief Protect all above data and conditions */ mutable std::mutex mutex_; }; } // namespace network } // namespace dgl #endif // DGL_RPC_NETWORK_MSG_QUEUE_H_ ================================================ FILE: src/rpc/network/socket_communicator.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file communicator.cc * @brief SocketCommunicator for DGL distributed training. */ #include "socket_communicator.h" #include #include #include #include #include #include "../../c_api_common.h" #include "socket_pool.h" #ifdef _WIN32 #include #else // !_WIN32 #include #endif // _WIN32 namespace dgl { namespace network { /////////////////////////////////////// SocketSender ////////////////////////////////////////////// bool SocketSender::ConnectReceiver(const std::string& addr, int recv_id) { if (recv_id < 0) { LOG(FATAL) << "recv_id cannot be a negative number."; } std::vector substring; std::vector ip_and_port; SplitStringUsing(addr, "//", &substring); // Check address format if (substring[0] != "tcp:" || substring.size() != 2) { LOG(FATAL) << "Incorrect address format:" << addr << " Please provide right address format, " << "e.g, 'tcp://127.0.0.1:50051'. "; } // Get IP and port SplitStringUsing(substring[1], ":", &ip_and_port); if (ip_and_port.size() != 2) { LOG(FATAL) << "Incorrect address format:" << addr << " Please provide right address format, " << "e.g, 'tcp://127.0.0.1:50051'. "; } IPAddr address; address.ip = ip_and_port[0]; address.port = std::stoi(ip_and_port[1]); receiver_addrs_[recv_id] = address; return true; } bool SocketSender::ConnectReceiverFinalize(const int max_try_times) { // Create N sockets for Receiver int receiver_count = static_cast(receiver_addrs_.size()); if (max_thread_count_ == 0 || max_thread_count_ > receiver_count) { max_thread_count_ = receiver_count; } sockets_.resize(max_thread_count_); for (const auto& r : receiver_addrs_) { int receiver_id = r.first; int thread_id = receiver_id % max_thread_count_; sockets_[thread_id][receiver_id] = std::make_shared(); TCPSocket* client_socket = sockets_[thread_id][receiver_id].get(); bool bo = false; int try_count = 0; const char* ip = r.second.ip.c_str(); int port = r.second.port; while (bo == false && try_count < max_try_times) { if (client_socket->Connect(ip, port)) { bo = true; } else { if (try_count % 200 == 0 && try_count != 0) { // every 600 seconds show this message LOG(INFO) << "Trying to connect receiver: " << ip << ":" << port; } try_count++; std::this_thread::sleep_for(std::chrono::seconds(3)); } } if (bo == false) { return bo; } } for (int thread_id = 0; thread_id < max_thread_count_; ++thread_id) { msg_queue_.push_back(std::make_shared(queue_size_)); // Create a new thread for this socket connection threads_.push_back(std::make_shared( SendLoop, sockets_[thread_id], msg_queue_[thread_id])); } return true; } void SocketSender::Send(const rpc::RPCMessage& msg, int recv_id) { std::shared_ptr zerocopy_blob(new std::string()); StreamWithBuffer zc_write_strm(zerocopy_blob.get(), true); zc_write_strm.Write(msg); int32_t nonempty_ndarray_count = zc_write_strm.buffer_list().size(); zerocopy_blob->append( reinterpret_cast(&nonempty_ndarray_count), sizeof(int32_t)); Message rpc_meta_msg; rpc_meta_msg.data = const_cast(zerocopy_blob->data()); rpc_meta_msg.size = zerocopy_blob->size(); rpc_meta_msg.deallocator = [zerocopy_blob](Message*) {}; CHECK_EQ(Send(rpc_meta_msg, recv_id), ADD_SUCCESS); // send real ndarray data for (auto ptr : zc_write_strm.buffer_list()) { Message ndarray_data_msg; ndarray_data_msg.data = reinterpret_cast(ptr.data); if (ptr.size == 0) { LOG(FATAL) << "Cannot send a empty NDArray."; } ndarray_data_msg.size = ptr.size; NDArray tensor = ptr.tensor; ndarray_data_msg.deallocator = [tensor](Message*) {}; CHECK_EQ(Send(ndarray_data_msg, recv_id), ADD_SUCCESS); } } STATUS SocketSender::Send(Message msg, int recv_id) { CHECK_NOTNULL(msg.data); CHECK_GT(msg.size, 0); CHECK_GE(recv_id, 0); msg.receiver_id = recv_id; // Add data message to message queue STATUS code = msg_queue_[recv_id % max_thread_count_]->Add(msg); return code; } void SocketSender::Finalize() { // Send a signal to tell the msg_queue to finish its job for (int i = 0; i < max_thread_count_; ++i) { // wait until queue is empty auto& mq = msg_queue_[i]; while (mq->Empty() == false) { std::this_thread::sleep_for(std::chrono::seconds(1)); } // All queues have only one producer, which is main thread, so // the producerID argument here should be zero. mq->SignalFinished(0); } // Block main thread until all socket-threads finish their jobs for (auto& thread : threads_) { thread->join(); } // Clear all sockets for (auto& group_sockets_ : sockets_) { for (auto& socket : group_sockets_) { socket.second->Close(); } } } void SendCore(Message msg, TCPSocket* socket) { // First send the size // If exit == true, we will send zero size to reciever int64_t sent_bytes = 0; while (static_cast(sent_bytes) < sizeof(int64_t)) { int64_t max_len = sizeof(int64_t) - sent_bytes; int64_t tmp = socket->Send(reinterpret_cast(&msg.size) + sent_bytes, max_len); CHECK_NE(tmp, -1); sent_bytes += tmp; } // Then send the data sent_bytes = 0; while (sent_bytes < msg.size) { int64_t max_len = msg.size - sent_bytes; int64_t tmp = socket->Send(msg.data + sent_bytes, max_len); CHECK_NE(tmp, -1); sent_bytes += tmp; } // delete msg if (msg.deallocator != nullptr) { msg.deallocator(&msg); } } void SocketSender::SendLoop( std::unordered_map> sockets, std::shared_ptr queue) { for (;;) { Message msg; STATUS code = queue->Remove(&msg); if (code == QUEUE_CLOSE) { msg.size = 0; // send an end-signal to receiver for (auto& socket : sockets) { SendCore(msg, socket.second.get()); } break; } SendCore(msg, sockets[msg.receiver_id].get()); } } /////////////////////////////////////// SocketReceiver ////////////////////////////////////////////// bool SocketReceiver::Wait(const std::string& addr, int num_sender) { CHECK_GT(num_sender, 0); std::vector substring; std::vector ip_and_port; SplitStringUsing(addr, "//", &substring); // Check address format if (substring[0] != "tcp:" || substring.size() != 2) { LOG(FATAL) << "Incorrect address format:" << addr << " Please provide right address format, " << "e.g, 'tcp://127.0.0.1:50051'. "; } // Get IP and port SplitStringUsing(substring[1], ":", &ip_and_port); if (ip_and_port.size() != 2) { LOG(FATAL) << "Incorrect address format:" << addr << " Please provide right address format, " << "e.g, 'tcp://127.0.0.1:50051'. "; } std::string ip = ip_and_port[0]; int port = stoi(ip_and_port[1]); // Initialize message queue for each connection num_sender_ = num_sender; #ifdef USE_EPOLL if (max_thread_count_ == 0 || max_thread_count_ > num_sender_) { max_thread_count_ = num_sender_; } #else max_thread_count_ = num_sender_; #endif // Initialize socket and socket-thread server_socket_ = new TCPSocket(); // Bind socket if (server_socket_->Bind(ip.c_str(), port) == false) { LOG(FATAL) << "Cannot bind to " << ip << ":" << port; } // Listen if (server_socket_->Listen(kMaxConnection) == false) { LOG(FATAL) << "Cannot listen on " << ip << ":" << port; } // Accept all sender sockets std::string accept_ip; int accept_port; sockets_.resize(max_thread_count_); for (int i = 0; i < num_sender_; ++i) { int thread_id = i % max_thread_count_; auto socket = std::make_shared(); sockets_[thread_id][i] = socket; msg_queue_[i] = std::make_shared(queue_size_); if (server_socket_->Accept(socket.get(), &accept_ip, &accept_port) == false) { LOG(WARNING) << "Error on accept socket."; return false; } } mq_iter_ = msg_queue_.begin(); for (int thread_id = 0; thread_id < max_thread_count_; ++thread_id) { // create new thread for each socket threads_.push_back(std::make_shared( RecvLoop, sockets_[thread_id], msg_queue_, &queue_sem_)); } return true; } rpc::RPCStatus SocketReceiver::Recv(rpc::RPCMessage* msg, int timeout) { Message rpc_meta_msg; int send_id; auto status = Recv(&rpc_meta_msg, &send_id, timeout); if (status == QUEUE_EMPTY) { DLOG(WARNING) << "Timed out when trying to receive rpc meta data after " << timeout << " milliseconds."; return rpc::kRPCTimeOut; } CHECK_EQ(status, REMOVE_SUCCESS); char* count_ptr = rpc_meta_msg.data + rpc_meta_msg.size - sizeof(int32_t); int32_t nonempty_ndarray_count = *(reinterpret_cast(count_ptr)); // Recv real ndarray data std::vector buffer_list(nonempty_ndarray_count); for (int i = 0; i < nonempty_ndarray_count; ++i) { Message ndarray_data_msg; // As meta message has been received, data message is always expected unless // connection is closed. STATUS status; do { status = RecvFrom(&ndarray_data_msg, send_id, timeout); if (status == QUEUE_EMPTY) { DLOG(WARNING) << "Timed out when trying to receive rpc ndarray data after " << timeout << " milliseconds."; } } while (status == QUEUE_EMPTY); CHECK_EQ(status, REMOVE_SUCCESS); buffer_list[i] = ndarray_data_msg.data; } StreamWithBuffer zc_read_strm( rpc_meta_msg.data, rpc_meta_msg.size - sizeof(int32_t), buffer_list); zc_read_strm.Read(msg); rpc_meta_msg.deallocator(&rpc_meta_msg); return rpc::kRPCSuccess; } STATUS SocketReceiver::Recv(Message* msg, int* send_id, int timeout) { // queue_sem_ is a semaphore indicating how many elements in multiple // message queues. // When calling queue_sem_.Wait(), this Recv will be suspended until // queue_sem_ > 0 or specified timeout expires, decrease queue_sem_ by 1, // then start to fetch a message. if (!queue_sem_.TimedWait(timeout)) { return QUEUE_EMPTY; } for (;;) { for (; mq_iter_ != msg_queue_.end(); ++mq_iter_) { STATUS code = mq_iter_->second->Remove(msg, false); if (code == QUEUE_EMPTY) { continue; // jump to the next queue } else { *send_id = mq_iter_->first; ++mq_iter_; return code; } } mq_iter_ = msg_queue_.begin(); } LOG(ERROR) << "Failed to remove message from queue due to unexpected queue status."; return QUEUE_CLOSE; } STATUS SocketReceiver::RecvFrom(Message* msg, int send_id, int timeout) { // Get message from specified message queue if (!queue_sem_.TimedWait(timeout)) { return QUEUE_EMPTY; } STATUS code = msg_queue_[send_id]->Remove(msg); return code; } void SocketReceiver::Finalize() { // Send a signal to tell the message queue to finish its job for (auto& mq : msg_queue_) { // wait until queue is empty while (mq.second->Empty() == false) { std::this_thread::sleep_for(std::chrono::seconds(1)); } mq.second->SignalFinished(mq.first); } // Block main thread until all socket-threads finish their jobs for (auto& thread : threads_) { thread->join(); } // Clear all sockets for (auto& group_sockets : sockets_) { for (auto& socket : group_sockets) { socket.second->Close(); } } server_socket_->Close(); delete server_socket_; } int64_t RecvDataSize(TCPSocket* socket) { int64_t received_bytes = 0; int64_t data_size = 0; while (static_cast(received_bytes) < sizeof(int64_t)) { int64_t max_len = sizeof(int64_t) - received_bytes; int64_t tmp = socket->Receive( reinterpret_cast(&data_size) + received_bytes, max_len); if (tmp == -1) { if (received_bytes > 0) { // We want to finish reading full data_size continue; } return -1; } received_bytes += tmp; } return data_size; } void RecvData( TCPSocket* socket, char* buffer, const int64_t& data_size, int64_t* received_bytes) { while (*received_bytes < data_size) { int64_t max_len = data_size - *received_bytes; int64_t tmp = socket->Receive(buffer + *received_bytes, max_len); if (tmp == -1) { // Socket not ready, no more data to read return; } *received_bytes += tmp; } } void SocketReceiver::RecvLoop( std::unordered_map< int /* Sender (virtual) ID */, std::shared_ptr> sockets, std::unordered_map< int /* Sender (virtual) ID */, std::shared_ptr> queues, runtime::Semaphore* queue_sem) { std::unordered_map> recv_contexts; SocketPool socket_pool; for (auto& socket : sockets) { auto& sender_id = socket.first; socket_pool.AddSocket(socket.second, sender_id); recv_contexts[sender_id] = std::unique_ptr(new RecvContext()); } // Main loop to receive messages for (;;) { int sender_id; // Get active socket using epoll std::shared_ptr socket = socket_pool.GetActiveSocket(&sender_id); if (queues[sender_id]->EmptyAndNoMoreAdd()) { // This sender has already stopped if (socket_pool.RemoveSocket(socket) == 0) { return; } continue; } // Nonblocking socket might be interrupted at any point. So we need to // store the partially received data std::unique_ptr& ctx = recv_contexts[sender_id]; int64_t& data_size = ctx->data_size; int64_t& received_bytes = ctx->received_bytes; char*& buffer = ctx->buffer; if (data_size == -1) { // This is a new message, so receive the data size first data_size = RecvDataSize(socket.get()); if (data_size > 0) { try { buffer = new char[data_size]; } catch (const std::bad_alloc&) { LOG(FATAL) << "Cannot allocate enough memory for message, " << "(message size: " << data_size << ")"; } received_bytes = 0; } else if (data_size == 0) { // Received stop signal if (socket_pool.RemoveSocket(socket) == 0) { return; } } } RecvData(socket.get(), buffer, data_size, &received_bytes); if (received_bytes >= data_size) { // Full data received, create Message and push to queue Message msg; msg.data = buffer; msg.size = data_size; msg.deallocator = DefaultMessageDeleter; queues[sender_id]->Add(msg); // Reset recv context data_size = -1; // Signal queue semaphore queue_sem->Post(); } } } } // namespace network } // namespace dgl ================================================ FILE: src/rpc/network/socket_communicator.h ================================================ /** * Copyright (c) 2019 by Contributors * @file communicator.h * @brief SocketCommunicator for DGL distributed training. */ #ifndef DGL_RPC_NETWORK_SOCKET_COMMUNICATOR_H_ #define DGL_RPC_NETWORK_SOCKET_COMMUNICATOR_H_ #include #include #include #include #include #include "../../runtime/semaphore_wrapper.h" #include "../rpc_msg.h" #include "common.h" #include "communicator.h" #include "msg_queue.h" #include "tcp_socket.h" namespace dgl { namespace network { static constexpr int kTimeOut = 10 * 60; // 10 minutes (in seconds) for socket timeout static constexpr int kMaxConnection = 1024; // maximal connection: 1024 /** * @breif Networking address */ struct IPAddr { std::string ip; int port; }; /** * @brief SocketSender for DGL distributed training. * * SocketSender is the communicator implemented by tcp socket. */ class SocketSender : public Sender { public: /** * @brief Sender constructor * @param queue_size size of message queue * @param max_thread_count size of thread pool. 0 for no limit */ SocketSender(int64_t queue_size, int max_thread_count) : Sender(queue_size, max_thread_count) {} /** * @brief Connect to a receiver. * * When there are multiple receivers to be connected, application will call * `ConnectReceiver` for each and then call `ConnectReceiverFinalize` to make * sure that either all the connections are successfully established or some * of them fail. * * @param addr Networking address, e.g., 'tcp://127.0.0.1:50091' * @param recv_id receiver's ID * @return True for success and False for fail * * The function is *not* thread-safe; only one thread can invoke this API. */ bool ConnectReceiver(const std::string& addr, int recv_id); /** * @brief Finalize the action to connect to receivers. Make sure that either * all connections are successfully established or connection fails. * @return True for success and False for fail * * The function is *not* thread-safe; only one thread can invoke this API. */ bool ConnectReceiverFinalize(const int max_try_times); /** * @brief Send RPCMessage to specified Receiver. * @param msg data message * @param recv_id receiver's ID */ void Send(const rpc::RPCMessage& msg, int recv_id); /** * @brief Finalize TPSender */ void Finalize(); /** * @brief Send data to specified Receiver. Actually pushing message to message * queue. * @param msg data message. * @param recv_id receiver's ID. * @return Status code. * * (1) The send is non-blocking. There is no guarantee that the message has * been physically sent out when the function returns. (2) The communicator * will assume the responsibility of the given message. (3) The API is * multi-thread safe. (4) Messages sent to the same receiver are guaranteed to * be received in the same order. There is no guarantee for messages sent to * different receivers. */ STATUS Send(Message msg, int recv_id) override; private: /** * @brief socket for each connection of receiver */ std::vector< std::unordered_map>> sockets_; /** * @brief receivers' address */ std::unordered_map receiver_addrs_; /** * @brief message queue for each thread */ std::vector> msg_queue_; /** * @brief Independent thread */ std::vector> threads_; /** * @brief Send-loop for each thread * @param sockets TCPSockets for current thread * @param queue message_queue for current thread * * Note that, the SendLoop will finish its loop-job and exit thread * when the main thread invokes Signal() API on the message queue. */ static void SendLoop( std::unordered_map< int /* Receiver (virtual) ID */, std::shared_ptr> sockets, std::shared_ptr queue); }; /** * @brief SocketReceiver for DGL distributed training. * * SocketReceiver is the communicator implemented by tcp socket. */ class SocketReceiver : public Receiver { public: /** * @brief Receiver constructor * @param queue_size size of message queue. * @param max_thread_count size of thread pool. 0 for no limit */ SocketReceiver(int64_t queue_size, int max_thread_count) : Receiver(queue_size, max_thread_count) {} /** * @brief Wait for all the Senders to connect * @param addr Networking address, e.g., 'tcp://127.0.0.1:50051', 'mpi://0' * @param num_sender total number of Senders * @return True for success and False for fail * * Wait() is not thread-safe and only one thread can invoke this API. */ bool Wait(const std::string& addr, int num_sender); /** * @brief Recv RPCMessage from Sender. Actually removing data from queue. * @param msg pointer of RPCmessage * @param timeout The timeout value in milliseconds. If zero, wait * indefinitely. * @return RPCStatus: kRPCSuccess or kRPCTimeOut. */ rpc::RPCStatus Recv(rpc::RPCMessage* msg, int timeout); /** * @brief Recv data from Sender. Actually removing data from msg_queue. * @param msg pointer of data message * @param send_id which sender current msg comes from * @param timeout The timeout value in milliseconds. If zero, wait * indefinitely. * @return Status code * * (1) The Recv() API is thread-safe. * (2) Memory allocated by communicator but will not own it after the function * returns. */ STATUS Recv(Message* msg, int* send_id, int timeout = 0) override; /** * @brief Recv data from a specified Sender. Actually removing data from * msg_queue. * @param msg pointer of data message. * @param send_id sender's ID * @param timeout The timeout value in milliseconds. If zero, wait * indefinitely. * @return Status code * * (1) The RecvFrom() API is thread-safe. * (2) Memory allocated by communicator but will not own it after the function * returns. */ STATUS RecvFrom(Message* msg, int send_id, int timeout = 0) override; /** * @brief Finalize SocketReceiver * * Finalize() is not thread-safe and only one thread can invoke this API. */ void Finalize(); private: struct RecvContext { int64_t data_size = -1; int64_t received_bytes = 0; char* buffer = nullptr; }; /** * @brief number of sender */ int num_sender_; /** * @brief server socket for listening connections */ TCPSocket* server_socket_; /** * @brief socket for each client connections */ std::vector>> sockets_; /** * @brief Message queue for each socket connection */ std::unordered_map< int /* Sender (virtual) ID */, std::shared_ptr> msg_queue_; std::unordered_map>::iterator mq_iter_; /** * @brief Independent thead */ std::vector> threads_; /** * @brief queue_sem_ semphore to indicate number of messages in multiple * message queues to prevent busy wait of Recv */ runtime::Semaphore queue_sem_; /** * @brief Recv-loop for each thread * @param sockets client sockets of current thread * @param queue message queues of current thread * * Note that, the RecvLoop will finish its loop-job and exit thread * when the main thread invokes Signal() API on the message queue. */ static void RecvLoop( std::unordered_map< int /* Sender (virtual) ID */, std::shared_ptr> sockets, std::unordered_map< int /* Sender (virtual) ID */, std::shared_ptr> queues, runtime::Semaphore* queue_sem); }; } // namespace network } // namespace dgl #endif // DGL_RPC_NETWORK_SOCKET_COMMUNICATOR_H_ ================================================ FILE: src/rpc/network/socket_pool.cc ================================================ /** * Copyright (c) 2021 by Contributors * @file socket_pool.cc * @brief Socket pool of nonblocking sockets for DGL distributed training. */ #include "socket_pool.h" #include #include "tcp_socket.h" #ifdef USE_EPOLL #include #endif namespace dgl { namespace network { SocketPool::SocketPool() { #ifdef USE_EPOLL epfd_ = epoll_create1(0); if (epfd_ < 0) { LOG(FATAL) << "SocketPool cannot create epfd"; } #endif } void SocketPool::AddSocket( std::shared_ptr socket, int socket_id, int events) { int fd = socket->Socket(); tcp_sockets_[fd] = socket; socket_ids_[fd] = socket_id; #ifdef USE_EPOLL epoll_event e; e.data.fd = fd; if (events == READ) { e.events = EPOLLIN; } else if (events == WRITE) { e.events = EPOLLOUT; } else if (events == READ + WRITE) { e.events = EPOLLIN | EPOLLOUT; } if (epoll_ctl(epfd_, EPOLL_CTL_ADD, fd, &e) < 0) { LOG(FATAL) << "SocketPool cannot add socket"; } socket->SetNonBlocking(true); #else if (tcp_sockets_.size() > 1) { LOG(FATAL) << "SocketPool supports only one socket if not use epoll." "Please turn on USE_EPOLL on building"; } #endif } size_t SocketPool::RemoveSocket(std::shared_ptr socket) { int fd = socket->Socket(); socket_ids_.erase(fd); tcp_sockets_.erase(fd); #ifdef USE_EPOLL epoll_ctl(epfd_, EPOLL_CTL_DEL, fd, NULL); #endif return socket_ids_.size(); } SocketPool::~SocketPool() { #ifdef USE_EPOLL for (auto& id : socket_ids_) { int fd = id.first; epoll_ctl(epfd_, EPOLL_CTL_DEL, fd, NULL); } #endif } std::shared_ptr SocketPool::GetActiveSocket(int* socket_id) { if (socket_ids_.empty()) { return nullptr; } for (;;) { while (pending_fds_.empty()) { Wait(); } int fd = pending_fds_.front(); pending_fds_.pop(); // Check if this socket is not removed if (socket_ids_.find(fd) != socket_ids_.end()) { *socket_id = socket_ids_[fd]; return tcp_sockets_[fd]; } } return nullptr; } void SocketPool::Wait() { #ifdef USE_EPOLL static const int MAX_EVENTS = 10; epoll_event events[MAX_EVENTS]; int nfd = epoll_wait(epfd_, events, MAX_EVENTS, -1 /*Timeout*/); for (int i = 0; i < nfd; ++i) { pending_fds_.push(events[i].data.fd); } #else pending_fds_.push(tcp_sockets_.begin()->second->Socket()); #endif } } // namespace network } // namespace dgl ================================================ FILE: src/rpc/network/socket_pool.h ================================================ /** * Copyright (c) 2021 by Contributors * @file socket_pool.h * @brief Socket pool of nonblocking sockets for DGL distributed training. */ #ifndef DGL_RPC_NETWORK_SOCKET_POOL_H_ #define DGL_RPC_NETWORK_SOCKET_POOL_H_ #include #include #include namespace dgl { namespace network { class TCPSocket; /** * @brief SocketPool maintains a group of nonblocking sockets, and can provide * active sockets. * Currently SocketPool is based on epoll, a scalable I/O event notification * mechanism in Linux operating system. */ class SocketPool { public: /** * @brief socket mode read/receive */ static const int READ = 1; /** * @brief socket mode write/send */ static const int WRITE = 2; /** * @brief SocketPool constructor */ SocketPool(); /** * @brief Add a socket to SocketPool * @param socket tcp socket to add * @param socket_id receiver/sender id of the socket * @param events READ, WRITE or READ + WRITE */ void AddSocket( std::shared_ptr socket, int socket_id, int events = READ); /** * @brief Remove socket from SocketPool * @param socket tcp socket to remove * @return number of remaing sockets in the pool */ size_t RemoveSocket(std::shared_ptr socket); /** * @brief SocketPool destructor */ ~SocketPool(); /** * @brief Get current active socket. This is a blocking method * @param socket_id output parameter of the socket_id of active socket * @return active TCPSocket */ std::shared_ptr GetActiveSocket(int* socket_id); private: /** * @brief Wait for event notification */ void Wait(); /** * @brief map from fd to TCPSocket */ std::unordered_map> tcp_sockets_; /** * @brief map from fd to socket_id */ std::unordered_map socket_ids_; /** * @brief fd for epoll base */ int epfd_; /** * @brief queue for current active fds */ std::queue pending_fds_; }; } // namespace network } // namespace dgl #endif // DGL_RPC_NETWORK_SOCKET_POOL_H_ ================================================ FILE: src/rpc/network/tcp_socket.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file tcp_socket.cc * @brief TCP socket for DGL distributed training. */ #include "tcp_socket.h" #include #ifndef _WIN32 #include #include #include #include #include #include #endif // !_WIN32 #include #include namespace dgl { namespace network { typedef struct sockaddr_in SAI; typedef struct sockaddr SA; TCPSocket::TCPSocket() { // init socket socket_ = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); if (socket_ < 0) { LOG(FATAL) << "Can't create new socket. Error: " << strerror(errno); } #ifndef _WIN32 // This is to make sure the same port can be reused right after the socket is // closed. int enable = 1; if (setsockopt(socket_, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(int)) < 0) { LOG(WARNING) << "cannot make the socket reusable. Error: " << strerror(errno); } #endif // _WIN32 } TCPSocket::~TCPSocket() { Close(); } bool TCPSocket::Connect(const char *ip, int port) { SAI sa_server; sa_server.sin_family = AF_INET; sa_server.sin_port = htons(port); int retval = 0; do { // retry if EINTR failure appears if (0 < inet_pton(AF_INET, ip, &sa_server.sin_addr) && 0 <= (retval = connect( socket_, reinterpret_cast(&sa_server), sizeof(sa_server)))) { return true; } } while (retval == -1 && errno == EINTR); return false; } bool TCPSocket::Bind(const char *ip, int port) { SAI sa_server; sa_server.sin_family = AF_INET; sa_server.sin_port = htons(port); int ret = 0; ret = inet_pton(AF_INET, ip, &sa_server.sin_addr); if (ret == 0) { LOG(ERROR) << "Invalid IP: " << ip; return false; } else if (ret < 0) { LOG(ERROR) << "Failed to convert [" << ip << "] to binary form, error: " << strerror(errno); return false; } do { // retry if EINTR failure appears if (0 <= (ret = bind( socket_, reinterpret_cast(&sa_server), sizeof(sa_server)))) { return true; } } while (ret == -1 && errno == EINTR); LOG(ERROR) << "Failed bind on " << ip << ":" << port << " , error: " << strerror(errno); return false; } bool TCPSocket::Listen(int max_connection) { int retval; do { // retry if EINTR failure appears if (0 <= (retval = listen(socket_, max_connection))) { return true; } } while (retval == -1 && errno == EINTR); LOG(ERROR) << "Failed listen on socket fd: " << socket_ << " , error: " << strerror(errno); return false; } bool TCPSocket::Accept(TCPSocket *socket, std::string *ip, int *port) { int sock_client; SAI sa_client; socklen_t len = sizeof(sa_client); do { // retry if EINTR failure appears sock_client = accept(socket_, reinterpret_cast(&sa_client), &len); } while (sock_client == -1 && errno == EINTR); if (sock_client < 0) { LOG(ERROR) << "Failed accept connection on " << *ip << ":" << *port << ", error: " << strerror(errno) << (errno == EAGAIN ? " SO_RCVTIMEO timeout reached" : ""); return false; } char tmp[INET_ADDRSTRLEN]; const char *ip_client = inet_ntop(AF_INET, &sa_client.sin_addr, tmp, sizeof(tmp)); CHECK(ip_client != nullptr); ip->assign(ip_client); *port = ntohs(sa_client.sin_port); socket->socket_ = sock_client; return true; } #ifdef _WIN32 bool TCPSocket::SetNonBlocking(bool flag) { int result; u_long argp = flag ? 1 : 0; // XXX Non-blocking Windows Sockets apparently has tons of issues: // http://www.sockets.com/winsock.htm#Overview_BlockingNonBlocking // Since SetBlocking() is not used at all, I'm leaving a default // implementation here. But be warned that this is not fully tested. if ((result = ioctlsocket(socket_, FIONBIO, &argp)) != NO_ERROR) { LOG(ERROR) << "Failed to set socket status."; return false; } return true; } #else // !_WIN32 bool TCPSocket::SetNonBlocking(bool flag) { int opts; if ((opts = fcntl(socket_, F_GETFL)) < 0) { LOG(ERROR) << "Failed to get socket status."; return false; } if (flag) { opts |= O_NONBLOCK; } else { opts &= ~O_NONBLOCK; } if (fcntl(socket_, F_SETFL, opts) < 0) { LOG(ERROR) << "Failed to set socket status."; return false; } return true; } #endif // _WIN32 void TCPSocket::SetTimeout(int timeout) { #ifdef _WIN32 timeout = timeout * 1000; // WIN API accepts millsec setsockopt( socket_, SOL_SOCKET, SO_RCVTIMEO, reinterpret_cast(&timeout), sizeof(timeout)); #else // !_WIN32 struct timeval tv; tv.tv_sec = timeout; tv.tv_usec = 0; setsockopt(socket_, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); #endif // _WIN32 } bool TCPSocket::ShutDown(int ways) { return 0 == shutdown(socket_, ways); } void TCPSocket::Close() { if (socket_ >= 0) { #ifdef _WIN32 CHECK_EQ(0, closesocket(socket_)); #else // !_WIN32 CHECK_EQ(0, close(socket_)); #endif // _WIN32 socket_ = -1; } } int64_t TCPSocket::Send(const char *data, int64_t len_data) { int64_t number_send; do { // retry if EINTR failure appears number_send = send(socket_, data, len_data, 0); } while (number_send == -1 && errno == EINTR); if (number_send == -1) { LOG(ERROR) << "send error: " << strerror(errno); } return number_send; } int64_t TCPSocket::Receive(char *buffer, int64_t size_buffer) { int64_t number_recv; do { // retry if EINTR failure appears number_recv = recv(socket_, buffer, size_buffer, 0); } while (number_recv == -1 && errno == EINTR); if (number_recv == -1 && errno != EAGAIN && errno != EWOULDBLOCK) { LOG(ERROR) << "recv error: " << strerror(errno); } return number_recv; } int TCPSocket::Socket() const { return socket_; } } // namespace network } // namespace dgl ================================================ FILE: src/rpc/network/tcp_socket.h ================================================ /** * Copyright (c) 2019 by Contributors * @file tcp_socket.h * @brief TCP socket for DGL distributed training. */ #ifndef DGL_RPC_NETWORK_TCP_SOCKET_H_ #define DGL_RPC_NETWORK_TCP_SOCKET_H_ #ifdef _WIN32 #include #include #pragma comment(lib, "Ws2_32.lib") #else // !_WIN32 #include #endif // _WIN32 #include namespace dgl { namespace network { /** * @brief TCPSocket is a simple wrapper around a socket. * It supports only TCP connections. */ class TCPSocket { public: /** * @brief TCPSocket constructor */ TCPSocket(); /** * @brief TCPSocket deconstructor */ ~TCPSocket(); /** * @brief Connect to a given server address * @param ip ip address * @param port end port * @return true for success and false for failure */ bool Connect(const char* ip, int port); /** * @brief Bind on the given IP and PORT * @param ip ip address * @param port end port * @return true for success and false for failure */ bool Bind(const char* ip, int port); /** * @brief listen for remote connection * @param max_connection maximal connection * @return true for success and false for failure */ bool Listen(int max_connection); /** * @brief wait doe a new connection * @param socket new SOCKET will be stored to socket * @param ip_client new IP will be stored to ip_client * @param port_client new PORT will be stored to port_client * @return true for success and false for failure */ bool Accept(TCPSocket* socket, std::string* ip_client, int* port_client); /** * @brief SetNonBlocking() is needed refering to this example of epoll: * http://www.kernel.org/doc/man-pages/online/pages/man4/epoll.4.html * @param flag true for nonblocking, false for blocking * @return true for success and false for failure */ bool SetNonBlocking(bool flag); /** * @brief Set timeout for socket * @param timeout seconds timeout */ void SetTimeout(int timeout); /** * @brief Shut down one or both halves of the connection. * @param ways ways for shutdown * If ways is SHUT_RD, further receives are disallowed. * If ways is SHUT_WR, further sends are disallowed. * If ways is SHUT_RDWR, further sends and receives are disallowed. * @return true for success and false for failure */ bool ShutDown(int ways); /** * @brief close socket. */ void Close(); /** * @brief Send data. * @param data data for sending * @param len_data length of data * @return return number of bytes sent if OK, -1 on error */ int64_t Send(const char* data, int64_t len_data); /** * @brief Receive data. * @param buffer buffer for receving * @param size_buffer size of buffer * @return return number of bytes received if OK, -1 on error */ int64_t Receive(char* buffer, int64_t size_buffer); /** * @brief Get socket's file descriptor * @return socket's file descriptor */ int Socket() const; private: /** * @brief socket's file descriptor */ int socket_; }; } // namespace network } // namespace dgl #endif // DGL_RPC_NETWORK_TCP_SOCKET_H_ ================================================ FILE: src/rpc/rpc.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file rpc/rpc.cc * @brief Implementation of RPC utilities used by both server and client sides. */ #if defined(__linux__) #include "./rpc.h" #include #include #include #include #include #include #include #include #include #include "../c_api_common.h" #include "../runtime/resource_manager.h" using dgl::network::StringPrintf; using namespace dgl::runtime; namespace dgl { namespace rpc { // Borrow from PyTorch const char kSocketIfnameEnvVar[] = "TP_SOCKET_IFNAME"; const char kDefaultUvAddress[] = "127.0.0.1"; RPCStatus SendRPCMessage(const RPCMessage& msg, const int32_t target_id) { RPCContext::getInstance()->sender->Send(msg, target_id); return kRPCSuccess; } RPCStatus RecvRPCMessage(RPCMessage* msg, int32_t timeout) { static constexpr int32_t retry_timeout = 5 * 1000; // milliseconds RPCStatus status; const int32_t real_timeout = timeout == 0 ? retry_timeout : timeout; do { status = RPCContext::getInstance()->receiver->Recv(msg, real_timeout); if (status == kRPCTimeOut) { static const std::string log_str = [real_timeout, timeout]() { std::ostringstream oss; oss << "Recv RPCMessage timeout in " << real_timeout << " ms." << (timeout == 0 ? " Retrying ..." : ""); return oss.str(); }(); DLOG(WARNING) << log_str; } } while (timeout == 0 && status == kRPCTimeOut); return status; } //////////////////////////// C APIs //////////////////////////// DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCReset") .set_body([](DGLArgs args, DGLRetValue* rv) { RPCContext::Reset(); }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCCreateSender") .set_body([](DGLArgs args, DGLRetValue* rv) { int64_t msg_queue_size = args[0]; int max_thread_count = args[1]; RPCContext::getInstance()->sender.reset( new network::SocketSender(msg_queue_size, max_thread_count)); }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCCreateReceiver") .set_body([](DGLArgs args, DGLRetValue* rv) { int64_t msg_queue_size = args[0]; int max_thread_count = args[1]; RPCContext::getInstance()->receiver.reset( new network::SocketReceiver(msg_queue_size, max_thread_count)); }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCFinalizeSender") .set_body([](DGLArgs args, DGLRetValue* rv) { RPCContext::getInstance()->sender->Finalize(); }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCFinalizeReceiver") .set_body([](DGLArgs args, DGLRetValue* rv) { RPCContext::getInstance()->receiver->Finalize(); }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCWaitForSenders") .set_body([](DGLArgs args, DGLRetValue* rv) { std::string ip = args[0]; int port = args[1]; int num_sender = args[2]; std::string addr; addr = StringPrintf("tcp://%s:%d", ip.c_str(), port); if (RPCContext::getInstance()->receiver->Wait(addr, num_sender) == false) { LOG(FATAL) << "Wait sender socket failed."; } }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCConnectReceiver") .set_body([](DGLArgs args, DGLRetValue* rv) { std::string ip = args[0]; int port = args[1]; int recv_id = args[2]; std::string addr; addr = StringPrintf("tcp://%s:%d", ip.c_str(), port); *rv = RPCContext::getInstance()->sender->ConnectReceiver(addr, recv_id); }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCConnectReceiverFinalize") .set_body([](DGLArgs args, DGLRetValue* rv) { const int max_try_times = args[0]; *rv = RPCContext::getInstance()->sender->ConnectReceiverFinalize( max_try_times); }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCSetRank") .set_body([](DGLArgs args, DGLRetValue* rv) { const int32_t rank = args[0]; RPCContext::getInstance()->rank = rank; }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCGetRank") .set_body([](DGLArgs args, DGLRetValue* rv) { *rv = RPCContext::getInstance()->rank; }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCSetNumServer") .set_body([](DGLArgs args, DGLRetValue* rv) { const int32_t num_servers = args[0]; *rv = RPCContext::getInstance()->num_servers = num_servers; }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCGetNumServer") .set_body([](DGLArgs args, DGLRetValue* rv) { *rv = RPCContext::getInstance()->num_servers; }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCSetNumClient") .set_body([](DGLArgs args, DGLRetValue* rv) { const int32_t num_clients = args[0]; *rv = RPCContext::getInstance()->num_clients = num_clients; }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCGetNumClient") .set_body([](DGLArgs args, DGLRetValue* rv) { *rv = RPCContext::getInstance()->num_clients; }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCSetNumServerPerMachine") .set_body([](DGLArgs args, DGLRetValue* rv) { const int32_t num_servers = args[0]; *rv = RPCContext::getInstance()->num_servers_per_machine = num_servers; }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCGetNumServerPerMachine") .set_body([](DGLArgs args, DGLRetValue* rv) { *rv = RPCContext::getInstance()->num_servers_per_machine; }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCIncrMsgSeq") .set_body([](DGLArgs args, DGLRetValue* rv) { *rv = (RPCContext::getInstance()->msg_seq)++; }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCGetMsgSeq") .set_body([](DGLArgs args, DGLRetValue* rv) { *rv = RPCContext::getInstance()->msg_seq; }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCSetMsgSeq") .set_body([](DGLArgs args, DGLRetValue* rv) { const int64_t msg_seq = args[0]; RPCContext::getInstance()->msg_seq = msg_seq; }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCGetBarrierCount") .set_body([](DGLArgs args, DGLRetValue* rv) { const int32_t group_id = args[0]; auto&& cnt = RPCContext::getInstance()->barrier_count; if (cnt.find(group_id) == cnt.end()) { cnt.emplace(group_id, 0x0); } *rv = cnt[group_id]; }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCSetBarrierCount") .set_body([](DGLArgs args, DGLRetValue* rv) { const int32_t count = args[0]; const int32_t group_id = args[1]; RPCContext::getInstance()->barrier_count[group_id] = count; }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCGetMachineID") .set_body([](DGLArgs args, DGLRetValue* rv) { *rv = RPCContext::getInstance()->machine_id; }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCSetMachineID") .set_body([](DGLArgs args, DGLRetValue* rv) { const int32_t machine_id = args[0]; RPCContext::getInstance()->machine_id = machine_id; }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCGetNumMachines") .set_body([](DGLArgs args, DGLRetValue* rv) { *rv = RPCContext::getInstance()->num_machines; }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCSetNumMachines") .set_body([](DGLArgs args, DGLRetValue* rv) { const int32_t num_machines = args[0]; RPCContext::getInstance()->num_machines = num_machines; }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCSendRPCMessage") .set_body([](DGLArgs args, DGLRetValue* rv) { RPCMessageRef msg = args[0]; const int32_t target_id = args[1]; *rv = SendRPCMessage(*(msg.sptr()), target_id); }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCRecvRPCMessage") .set_body([](DGLArgs args, DGLRetValue* rv) { int32_t timeout = args[0]; RPCMessageRef msg = args[1]; *rv = RecvRPCMessage(msg.sptr().get(), timeout); }); //////////////////////////// RPCMessage //////////////////////////// DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCCreateEmptyRPCMessage") .set_body([](DGLArgs args, DGLRetValue* rv) { std::shared_ptr rst(new RPCMessage); *rv = rst; }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCCreateRPCMessage") .set_body([](DGLArgs args, DGLRetValue* rv) { std::shared_ptr rst(new RPCMessage); rst->service_id = args[0]; rst->msg_seq = args[1]; rst->client_id = args[2]; rst->server_id = args[3]; const std::string data = args[4]; // directly assigning string value raises errors :( rst->data = data; rst->tensors = ListValueToVector(args[5]); rst->group_id = args[6]; *rv = rst; }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCMessageGetServiceId") .set_body([](DGLArgs args, DGLRetValue* rv) { const RPCMessageRef msg = args[0]; *rv = msg->service_id; }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCMessageGetMsgSeq") .set_body([](DGLArgs args, DGLRetValue* rv) { const RPCMessageRef msg = args[0]; *rv = msg->msg_seq; }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCMessageGetClientId") .set_body([](DGLArgs args, DGLRetValue* rv) { const RPCMessageRef msg = args[0]; *rv = msg->client_id; }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCMessageGetServerId") .set_body([](DGLArgs args, DGLRetValue* rv) { const RPCMessageRef msg = args[0]; *rv = msg->server_id; }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCMessageGetData") .set_body([](DGLArgs args, DGLRetValue* rv) { const RPCMessageRef msg = args[0]; DGLByteArray barr{msg->data.c_str(), msg->data.size()}; *rv = barr; }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCMessageGetTensors") .set_body([](DGLArgs args, DGLRetValue* rv) { const RPCMessageRef msg = args[0]; List ret; for (size_t i = 0; i < msg->tensors.size(); ++i) { ret.push_back(Value(MakeValue(msg->tensors[i]))); } *rv = ret; }); #if defined(__linux__) /** * @brief The signal handler. * @param s signal */ void SigHandler(int s) { LOG(INFO) << "\nUser pressed Ctrl+C, Exiting"; CleanupResources(); exit(1); } DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCHandleSignal") .set_body([](DGLArgs args, DGLRetValue* rv) { // Ctrl+C handler struct sigaction sigHandler; sigHandler.sa_handler = SigHandler; sigemptyset(&sigHandler.sa_mask); sigHandler.sa_flags = 0; sigaction(SIGINT, &sigHandler, nullptr); sigaction(SIGTERM, &sigHandler, nullptr); }); #endif //////////////////////////// ServerState //////////////////////////// DGL_REGISTER_GLOBAL("distributed.server_state._CAPI_DGLRPCGetServerState") .set_body([](DGLArgs args, DGLRetValue* rv) { auto st = RPCContext::getInstance()->server_state; if (st.get() == nullptr) { RPCContext::getInstance()->server_state = std::make_shared(); } *rv = st; }); //////////////////////////// KVStore //////////////////////////// DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCGetGlobalIDFromLocalPartition") .set_body([](DGLArgs args, DGLRetValue* rv) { NDArray ID = args[0]; NDArray part_id = args[1]; int local_machine_id = args[2]; int64_t* ID_data = static_cast(ID->data); int64_t* part_id_data = static_cast(part_id->data); int64_t ID_size = ID.GetSize() / sizeof(int64_t); std::vector global_id; for (int64_t i = 0; i < ID_size; ++i) { if (part_id_data[i] == local_machine_id) { global_id.push_back(ID_data[i]); } } NDArray res_tensor = dgl::aten::VecToIdArray(global_id); *rv = res_tensor; }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCFastPull") .set_body([](DGLArgs args, DGLRetValue* rv) { // Input std::string name = args[0]; int local_machine_id = args[1]; int machine_count = args[2]; int group_count = args[3]; int client_id = args[4]; int service_id = args[5]; int64_t msg_seq = args[6]; std::string pickle_data = args[7]; NDArray ID = args[8]; NDArray part_id = args[9]; NDArray local_id = args[10]; NDArray local_data = args[11]; // Data dgl_id_t ID_size = ID.GetSize() / sizeof(dgl_id_t); dgl_id_t* ID_data = static_cast(ID->data); dgl_id_t* part_id_data = static_cast(part_id->data); dgl_id_t* local_id_data = static_cast(local_id->data); char* local_data_char = static_cast(local_data->data); std::vector local_ids; std::vector local_ids_orginal; std::vector local_data_shape; std::vector> remote_ids(machine_count); std::vector> remote_ids_original(machine_count); // Get row size (in bytes) int row_size = 1; for (int i = 0; i < local_data->ndim; ++i) { local_data_shape.push_back(local_data->shape[i]); if (i != 0) { row_size *= local_data->shape[i]; } } row_size *= (local_data->dtype.bits / 8); size_t data_size = local_data.GetSize(); CHECK_GT(local_data_shape.size(), 0); CHECK_EQ(row_size * local_data_shape[0], data_size); // Get local id (used in local machine) and // remote id (send to remote machine) dgl_id_t idx = 0; for (dgl_id_t i = 0; i < ID_size; ++i) { dgl_id_t p_id = part_id_data[i]; if (static_cast(p_id) == local_machine_id) { dgl_id_t l_id = local_id_data[idx++]; CHECK_LT(l_id, local_data_shape[0]); CHECK_GE(l_id, 0); local_ids.push_back(l_id); local_ids_orginal.push_back(i); } else { CHECK_LT(p_id, machine_count) << "Invalid partition ID."; dgl_id_t id = ID_data[i]; remote_ids[p_id].push_back(id); remote_ids_original[p_id].push_back(i); } } // Send remote id int msg_count = 0; for (size_t i = 0; i < remote_ids.size(); ++i) { if (remote_ids[i].size() != 0) { RPCMessage msg; msg.service_id = service_id; msg.msg_seq = msg_seq; msg.client_id = client_id; int lower = i * group_count; int upper = (i + 1) * group_count; msg.server_id = dgl::RandomEngine::ThreadLocal()->RandInt(lower, upper); msg.data = pickle_data; NDArray tensor = dgl::aten::VecToIdArray(remote_ids[i]); msg.tensors.push_back(tensor); msg.group_id = RPCContext::getInstance()->group_id; SendRPCMessage(msg, msg.server_id); msg_count++; } } local_data_shape[0] = ID_size; NDArray res_tensor = NDArray::Empty( local_data_shape, local_data->dtype, DGLContext{kDGLCPU, 0}); char* return_data = static_cast(res_tensor->data); // Copy local data parallel_for(0, local_ids.size(), [&](size_t b, size_t e) { for (auto i = b; i < e; ++i) { CHECK_GE( ID_size * row_size, local_ids_orginal[i] * row_size + row_size); CHECK_GE(data_size, local_ids[i] * row_size + row_size); CHECK_GE(local_ids[i], 0); memcpy( return_data + local_ids_orginal[i] * row_size, local_data_char + local_ids[i] * row_size, row_size); } }); // Recv remote message int recv_cnt = 0; while (recv_cnt < msg_count) { RPCMessage msg; auto status = RecvRPCMessage(&msg, 0); CHECK_EQ(status, kRPCSuccess); ++recv_cnt; int part_id = msg.server_id / group_count; char* data_char = static_cast(msg.tensors[0]->data); dgl_id_t id_size = remote_ids[part_id].size(); for (size_t n = 0; n < id_size; ++n) { memcpy( return_data + remote_ids_original[part_id][n] * row_size, data_char + n * row_size, row_size); } } *rv = res_tensor; }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCGetGroupID") .set_body([](DGLArgs args, DGLRetValue* rv) { *rv = RPCContext::getInstance()->group_id; }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCSetGroupID") .set_body([](DGLArgs args, DGLRetValue* rv) { const int32_t group_id = args[0]; RPCContext::getInstance()->group_id = group_id; }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCMessageGetGroupId") .set_body([](DGLArgs args, DGLRetValue* rv) { const RPCMessageRef msg = args[0]; *rv = msg->group_id; }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCRegisterClient") .set_body([](DGLArgs args, DGLRetValue* rv) { const int32_t client_id = args[0]; const int32_t group_id = args[1]; *rv = RPCContext::getInstance()->RegisterClient(client_id, group_id); }); DGL_REGISTER_GLOBAL("distributed.rpc._CAPI_DGLRPCGetClient") .set_body([](DGLArgs args, DGLRetValue* rv) { const int32_t client_id = args[0]; const int32_t group_id = args[1]; *rv = RPCContext::getInstance()->GetClient(client_id, group_id); }); } // namespace rpc } // namespace dgl #endif ================================================ FILE: src/rpc/rpc.h ================================================ /** * Copyright (c) 2020 by Contributors * @file rpc/rpc.h * @brief Common headers for remote process call (RPC). */ #ifndef DGL_RPC_RPC_H_ #define DGL_RPC_RPC_H_ #include #include #include #include #include #include #include #include #include #include #include #include "./network/common.h" #include "./rpc_msg.h" #include "./server_state.h" #include "network/socket_communicator.h" namespace dgl { namespace rpc { struct RPCContext; // Communicator handler type typedef void* CommunicatorHandle; /** @brief Context information for RPC communication */ struct RPCContext { /** * @brief Rank of this process. * * If the process is a client, this is equal to client ID. Otherwise, the * process is a server and this is equal to server ID. */ int32_t rank = -1; /** * @brief Cuurent machine ID */ int32_t machine_id = -1; /** * @brief Total number of machines. */ int32_t num_machines = 0; /** * @brief Message sequence number. */ std::atomic msg_seq{0}; /** * @brief Total number of server. */ int32_t num_servers = 0; /** * @brief Total number of client. */ int32_t num_clients = 0; /** * @brief Current barrier count */ std::unordered_map barrier_count; /** * @brief Total number of server per machine. */ int32_t num_servers_per_machine = 0; /** * @brief Sender communicator. */ std::shared_ptr sender; /** * @brief Receiver communicator. */ std::shared_ptr receiver; /** * @brief Server state data. * * If the process is a server, this stores necessary * server-side data. Otherwise, the process is a client and it stores a cache * of the server co-located with the client (if available). When the client * invokes a RPC to the co-located server, it can thus perform computation * locally without an actual remote call. */ std::shared_ptr server_state; /** * @brief Cuurent group ID */ int32_t group_id = -1; int32_t curr_client_id = -1; std::unordered_map> clients_; /** @brief Get the RPC context singleton */ static RPCContext* getInstance() { static RPCContext ctx; return &ctx; } /** @brief Reset the RPC context */ static void Reset() { auto* t = getInstance(); t->rank = -1; t->machine_id = -1; t->num_machines = 0; t->msg_seq = 0; t->num_servers = 0; t->num_clients = 0; t->barrier_count.clear(); t->num_servers_per_machine = 0; t->sender.reset(); t->receiver.reset(); t->server_state.reset(); t->group_id = -1; t->curr_client_id = -1; t->clients_.clear(); } int32_t RegisterClient(int32_t client_id, int32_t group_id) { auto&& m = clients_[group_id]; if (m.find(client_id) != m.end()) { return -1; } m[client_id] = ++curr_client_id; return curr_client_id; } int32_t GetClient(int32_t client_id, int32_t group_id) const { if (clients_.find(group_id) == clients_.end()) { return -1; } const auto& m = clients_.at(group_id); if (m.find(client_id) == m.end()) { return -1; } return m.at(client_id); } }; /** * @brief Send out one RPC message. * * The operation is non-blocking -- it does not guarantee the payloads have * reached the target or even have left the sender process. However, * all the payloads (i.e., data and arrays) can be safely freed after this * function returns. * * The data buffer in the requst will be copied to internal buffer for actual * transmission, while no memory copy for tensor payloads (a.k.a. zero-copy). * The underlying sending threads will hold references to the tensors until * the contents have been transmitted. * * @param msg RPC message to send * @return status flag */ RPCStatus SendRPCMessage(const RPCMessage& msg); /** * @brief Receive one RPC message. * * The operation is blocking -- it returns when it receives any message * * @param msg The received message * @param timeout The timeout value in milliseconds. If zero, wait indefinitely. * @return status flag */ RPCStatus RecvRPCMessage(RPCMessage* msg, int32_t timeout = 0); } // namespace rpc } // namespace dgl #endif // DGL_RPC_RPC_H_ ================================================ FILE: src/rpc/rpc_msg.h ================================================ /** * Copyright (c) 2020 by Contributors * @file rpc/rpc_msg.h * @brief Common headers for remote process call (RPC). */ #ifndef DGL_RPC_RPC_MSG_H_ #define DGL_RPC_RPC_MSG_H_ #include #include #include #include #include namespace dgl { namespace rpc { /** @brief RPC message data structure * * This structure is exposed to Python and can be used as argument or return * value in C API. */ struct RPCMessage : public runtime::Object { /** @brief Service ID */ int32_t service_id; /** @brief Sequence number of this message. */ int64_t msg_seq; /** @brief Client ID. */ int32_t client_id; /** @brief Server ID. */ int32_t server_id; /** @brief Payload buffer carried by this request.*/ std::string data; /** @brief Extra payloads in the form of tensors.*/ std::vector tensors; /** @brief Group ID. */ int32_t group_id{0}; bool Load(dmlc::Stream* stream) { stream->Read(&service_id); stream->Read(&msg_seq); stream->Read(&client_id); stream->Read(&server_id); stream->Read(&data); stream->Read(&tensors); stream->Read(&group_id); return true; } void Save(dmlc::Stream* stream) const { stream->Write(service_id); stream->Write(msg_seq); stream->Write(client_id); stream->Write(server_id); stream->Write(data); stream->Write(tensors); stream->Write(group_id); } static constexpr const char* _type_key = "rpc.RPCMessage"; DGL_DECLARE_OBJECT_TYPE_INFO(RPCMessage, runtime::Object); }; DGL_DEFINE_OBJECT_REF(RPCMessageRef, RPCMessage); /** @brief RPC status flag */ enum RPCStatus { kRPCSuccess = 0, kRPCTimeOut, }; } // namespace rpc } // namespace dgl namespace dmlc { DMLC_DECLARE_TRAITS(has_saveload, dgl::rpc::RPCMessage, true); } // namespace dmlc #endif // DGL_RPC_RPC_MSG_H_ ================================================ FILE: src/rpc/server_state.h ================================================ /** * Copyright (c) 2020 by Contributors * @file rpc/server_state.h * @brief Implementation of RPC utilities used by both server and client sides. */ #ifndef DGL_RPC_SERVER_STATE_H_ #define DGL_RPC_SERVER_STATE_H_ #include #include #include #include #include namespace dgl { namespace rpc { /** * @brief Data stored in one DGL server. * * In a distributed setting, DGL partitions all data associated with the graph * (e.g., node and edge features, graph structure, etc.) to multiple partitions, * each handled by one DGL server. Hence, the ServerState class includes all * the data associated with a graph partition. * * Under some setup, users may want to deploy servers in a heterogeneous way * -- servers are further divided into special groups for fetching/updating * node/edge data and for sampling/querying on graph structure respectively. * In this case, the ServerState can be configured to include only node/edge * data or graph structure. * * Each machine can have multiple server and client processes, but only one * server is the *master* server while all the others are backup servers. All * clients and backup servers share the state of the master server via shared * memory, which means the ServerState class must be serializable and large * bulk data (e.g., node/edge features) must be stored in NDArray to leverage * shared memory. */ struct ServerState : public runtime::Object { /** @brief Key value store for NDArray data */ std::unordered_map kv_store; /** @brief Graph structure of one partition */ HeteroGraphPtr graph; /** @brief Total number of nodes */ int64_t total_num_nodes = 0; /** @brief Total number of edges */ int64_t total_num_edges = 0; static constexpr const char* _type_key = "server_state.ServerState"; DGL_DECLARE_OBJECT_TYPE_INFO(ServerState, runtime::Object); }; DGL_DEFINE_OBJECT_REF(ServerStateRef, ServerState); } // namespace rpc } // namespace dgl #endif // DGL_RPC_SERVER_STATE_H_ ================================================ FILE: src/runtime/c_object_api.cc ================================================ /** * Copyright (c) 2016 by Contributors * Implementation of C API (reference: tvm/src/api/c_api.cc) * @file c_api.cc */ #include #include #include #include #include #include #include #include #include #include #include "runtime_base.h" /** @brief entry to to easily hold returning information */ struct DGLAPIThreadLocalEntry { /** @brief result holder for returning strings */ std::vector ret_vec_str; /** @brief result holder for returning string pointers */ std::vector ret_vec_charp; /** @brief result holder for retruning string */ std::string ret_str; }; using namespace dgl::runtime; /** @brief Thread local store that can be used to hold return values. */ typedef dmlc::ThreadLocalStore DGLAPIThreadLocalStore; using DGLAPIObject = std::shared_ptr; struct APIAttrGetter : public AttrVisitor { std::string skey; DGLRetValue* ret; bool found_object_ref{false}; void Visit(const char* key, double* value) final { if (skey == key) *ret = value[0]; } void Visit(const char* key, int64_t* value) final { if (skey == key) *ret = value[0]; } void Visit(const char* key, uint64_t* value) final { CHECK_LE( value[0], static_cast(std::numeric_limits::max())) << "cannot return too big constant"; if (skey == key) *ret = static_cast(value[0]); } void Visit(const char* key, int* value) final { if (skey == key) *ret = static_cast(value[0]); } void Visit(const char* key, bool* value) final { if (skey == key) *ret = static_cast(value[0]); } void Visit(const char* key, std::string* value) final { if (skey == key) *ret = value[0]; } void Visit(const char* key, ObjectRef* value) final { if (skey == key) { *ret = value[0]; found_object_ref = true; } } void Visit(const char* key, NDArray* value) final { if (skey == key) *ret = value[0]; } }; struct APIAttrDir : public AttrVisitor { std::vector* names; void Visit(const char* key, double* value) final { names->push_back(key); } void Visit(const char* key, int64_t* value) final { names->push_back(key); } void Visit(const char* key, uint64_t* value) final { names->push_back(key); } void Visit(const char* key, bool* value) final { names->push_back(key); } void Visit(const char* key, int* value) final { names->push_back(key); } void Visit(const char* key, std::string* value) final { names->push_back(key); } void Visit(const char* key, ObjectRef* value) final { names->push_back(key); } void Visit(const char* key, NDArray* value) final { names->push_back(key); } }; int DGLObjectFree(ObjectHandle handle) { API_BEGIN(); delete static_cast(handle); API_END(); } int DGLObjectTypeKey2Index(const char* type_key, int* out_index) { API_BEGIN(); *out_index = static_cast(Object::TypeKey2Index(type_key)); API_END(); } int DGLObjectGetTypeIndex(ObjectHandle handle, int* out_index) { API_BEGIN(); *out_index = static_cast((*static_cast(handle))->type_index()); API_END(); } int DGLObjectGetAttr( ObjectHandle handle, const char* key, DGLValue* ret_val, int* ret_type_code, int* ret_success) { API_BEGIN(); DGLRetValue rv; APIAttrGetter getter; getter.skey = key; getter.ret = &rv; DGLAPIObject* tobject = static_cast(handle); if (getter.skey == "type_key") { ret_val->v_str = (*tobject)->type_key(); *ret_type_code = kStr; *ret_success = 1; } else { (*tobject)->VisitAttrs(&getter); *ret_success = getter.found_object_ref || rv.type_code() != kNull; if (rv.type_code() == kStr || rv.type_code() == kDGLDataType) { DGLAPIThreadLocalEntry* e = DGLAPIThreadLocalStore::Get(); e->ret_str = rv.operator std::string(); *ret_type_code = kStr; ret_val->v_str = e->ret_str.c_str(); } else { rv.MoveToCHost(ret_val, ret_type_code); } } API_END(); } int DGLObjectListAttrNames( ObjectHandle handle, int* out_size, const char*** out_array) { DGLAPIThreadLocalEntry* ret = DGLAPIThreadLocalStore::Get(); API_BEGIN(); ret->ret_vec_str.clear(); DGLAPIObject* tobject = static_cast(handle); APIAttrDir dir; dir.names = &(ret->ret_vec_str); (*tobject)->VisitAttrs(&dir); ret->ret_vec_charp.clear(); for (size_t i = 0; i < ret->ret_vec_str.size(); ++i) { ret->ret_vec_charp.push_back(ret->ret_vec_str[i].c_str()); } *out_array = dmlc::BeginPtr(ret->ret_vec_charp); *out_size = static_cast(ret->ret_vec_str.size()); API_END(); } ================================================ FILE: src/runtime/c_runtime_api.cc ================================================ /** * Copyright (c) 2016-2022 by Contributors * @file c_runtime_api.cc * @brief Runtime API implementation */ #include #include #include #include #include #include #include #include #include #include #include #include #include "runtime_base.h" namespace dgl { namespace runtime { /** * @brief The name of Device API factory. * @param type The device type. */ inline std::string DeviceName(int type) { switch (type) { case kDGLCPU: return "cpu"; case kDGLCUDA: return "cuda"; // add more device here once supported default: LOG(FATAL) << "unknown type =" << type; return "Unknown"; } } class DeviceAPIManager { public: static const int kMaxDeviceAPI = 32; // Get API static DeviceAPI* Get(const DGLContext& ctx) { return Get(ctx.device_type); } static DeviceAPI* Get(int dev_type, bool allow_missing = false) { return Global()->GetAPI(dev_type, allow_missing); } private: std::array api_; DeviceAPI* rpc_api_{nullptr}; std::mutex mutex_; // constructor DeviceAPIManager() { std::fill(api_.begin(), api_.end(), nullptr); } // Global static variable. static DeviceAPIManager* Global() { static DeviceAPIManager inst; return &inst; } // Get or initialize API. DeviceAPI* GetAPI(int type, bool allow_missing) { if (type < kRPCSessMask) { if (api_[type] != nullptr) return api_[type]; std::lock_guard lock(mutex_); if (api_[type] != nullptr) return api_[type]; api_[type] = GetAPI(DeviceName(type), allow_missing); return api_[type]; } else { if (rpc_api_ != nullptr) return rpc_api_; std::lock_guard lock(mutex_); if (rpc_api_ != nullptr) return rpc_api_; rpc_api_ = GetAPI("rpc", allow_missing); return rpc_api_; } } DeviceAPI* GetAPI(const std::string name, bool allow_missing) { std::string factory = "device_api." + name; auto* f = Registry::Get(factory); if (f == nullptr) { CHECK(allow_missing) << "Device API " << name << " is not enabled. Please install the cuda version of dgl."; return nullptr; } void* ptr = (*f)(); return static_cast(ptr); } }; DeviceAPI* DeviceAPI::Get(DGLContext ctx, bool allow_missing) { return DeviceAPIManager::Get( static_cast(ctx.device_type), allow_missing); } DeviceAPI* DeviceAPI::Get(DGLDeviceType dev_type, bool allow_missing) { return DeviceAPIManager::Get(static_cast(dev_type), allow_missing); } void* DeviceAPI::AllocWorkspace( DGLContext ctx, size_t size, DGLDataType type_hint) { return AllocDataSpace(ctx, size, kTempAllocaAlignment, type_hint); } void DeviceAPI::FreeWorkspace(DGLContext ctx, void* ptr) { FreeDataSpace(ctx, ptr); } DGLStreamHandle DeviceAPI::CreateStream(DGLContext ctx) { LOG(FATAL) << "Device does not support stream api."; return 0; } void DeviceAPI::FreeStream(DGLContext ctx, DGLStreamHandle stream) { LOG(FATAL) << "Device does not support stream api."; } void DeviceAPI::SyncStreamFromTo( DGLContext ctx, DGLStreamHandle event_src, DGLStreamHandle event_dst) { LOG(FATAL) << "Device does not support stream api."; } bool DeviceAPI::PinData(void* ptr, size_t nbytes) { LOG(FATAL) << "Device does not support cudaHostRegister api."; return false; } void* DeviceAPI::AllocPinnedDataSpace( size_t nbytes, void** ctx, void** deleter) { LOG(FATAL) << "Device does not support cudaHostAlloc api."; return nullptr; } void DeviceAPI::FreePinnedDataSpace(void** deleter) { LOG(FATAL) << "Device does not support cudaHostFree api."; } void DeviceAPI::UnpinData(void* ptr) { LOG(FATAL) << "Device does not support cudaHostUnregister api."; } } // namespace runtime } // namespace dgl using namespace dgl::runtime; struct DGLRuntimeEntry { std::string ret_str; std::string last_error; DGLByteArray ret_bytes; }; typedef dmlc::ThreadLocalStore DGLAPIRuntimeStore; const char* DGLGetLastError() { return DGLAPIRuntimeStore::Get()->last_error.c_str(); } void DGLAPISetLastError(const char* msg) { #ifndef _LIBCPP_SGX_CONFIG DGLAPIRuntimeStore::Get()->last_error = msg; #else sgx::OCallPackedFunc("__sgx_set_last_error__", msg); #endif } int DGLModLoadFromFile( const char* file_name, const char* format, DGLModuleHandle* out) { API_BEGIN(); Module m = Module::LoadFromFile(file_name, format); *out = new Module(m); API_END(); } int DGLModImport(DGLModuleHandle mod, DGLModuleHandle dep) { API_BEGIN(); static_cast(mod)->Import(*static_cast(dep)); API_END(); } int DGLModGetFunction( DGLModuleHandle mod, const char* func_name, int query_imports, DGLFunctionHandle* func) { API_BEGIN(); PackedFunc pf = static_cast(mod)->GetFunction(func_name, query_imports != 0); if (pf != nullptr) { *func = new PackedFunc(pf); } else { *func = nullptr; } API_END(); } int DGLModFree(DGLModuleHandle mod) { API_BEGIN(); delete static_cast(mod); API_END(); } int DGLBackendGetFuncFromEnv( void* mod_node, const char* func_name, DGLFunctionHandle* func) { API_BEGIN(); *func = (DGLFunctionHandle)(static_cast(mod_node)->GetFuncFromEnv( func_name)); API_END(); } void* DGLBackendAllocWorkspace( int device_type, int device_id, uint64_t size, int dtype_code_hint, int dtype_bits_hint) { DGLContext ctx; ctx.device_type = static_cast(device_type); ctx.device_id = device_id; DGLDataType type_hint; type_hint.code = static_cast(dtype_code_hint); type_hint.bits = static_cast(dtype_bits_hint); type_hint.lanes = 1; return DeviceAPIManager::Get(ctx)->AllocWorkspace( ctx, static_cast(size), type_hint); } int DGLBackendFreeWorkspace(int device_type, int device_id, void* ptr) { DGLContext ctx; ctx.device_type = static_cast(device_type); ctx.device_id = device_id; DeviceAPIManager::Get(ctx)->FreeWorkspace(ctx, ptr); return 0; } int DGLBackendRunOnce(void** handle, int (*f)(void*), void* cdata, int nbytes) { if (*handle == nullptr) { *handle = reinterpret_cast(1); return (*f)(cdata); } return 0; } int DGLFuncFree(DGLFunctionHandle func) { API_BEGIN(); delete static_cast(func); API_END(); } int DGLFuncCall( DGLFunctionHandle func, DGLValue* args, int* arg_type_codes, int num_args, DGLValue* ret_val, int* ret_type_code) { API_BEGIN(); DGLRetValue rv; (*static_cast(func)) .CallPacked(DGLArgs(args, arg_type_codes, num_args), &rv); // handle return string. if (rv.type_code() == kStr || rv.type_code() == kDGLDataType || rv.type_code() == kBytes) { DGLRuntimeEntry* e = DGLAPIRuntimeStore::Get(); if (rv.type_code() != kDGLDataType) { e->ret_str = *rv.ptr(); } else { e->ret_str = rv.operator std::string(); } if (rv.type_code() == kBytes) { e->ret_bytes.data = e->ret_str.c_str(); e->ret_bytes.size = e->ret_str.length(); *ret_type_code = kBytes; ret_val->v_handle = &(e->ret_bytes); } else { *ret_type_code = kStr; ret_val->v_str = e->ret_str.c_str(); } } else { rv.MoveToCHost(ret_val, ret_type_code); } API_END(); } int DGLCFuncSetReturn( DGLRetValueHandle ret, DGLValue* value, int* type_code, int num_ret) { API_BEGIN(); CHECK_EQ(num_ret, 1); DGLRetValue* rv = static_cast(ret); *rv = DGLArgValue(value[0], type_code[0]); API_END(); } int DGLFuncCreateFromCFunc( DGLPackedCFunc func, void* resource_handle, DGLPackedCFuncFinalizer fin, DGLFunctionHandle* out) { API_BEGIN(); if (fin == nullptr) { *out = new PackedFunc([func, resource_handle](DGLArgs args, DGLRetValue* rv) { int ret = func( (DGLValue*)args.values, (int*)args.type_codes, // NOLINT(*) args.num_args, rv, resource_handle); if (ret != 0) { std::string err = "DGLCall CFunc Error:\n"; err += DGLGetLastError(); throw dmlc::Error(err); } }); } else { // wrap it in a shared_ptr, with fin as deleter. // so fin will be called when the lambda went out of scope. std::shared_ptr rpack(resource_handle, fin); *out = new PackedFunc([func, rpack](DGLArgs args, DGLRetValue* rv) { int ret = func( (DGLValue*)args.values, (int*)args.type_codes, // NOLINT(*) args.num_args, rv, rpack.get()); if (ret != 0) { std::string err = "DGLCall CFunc Error:\n"; err += DGLGetLastError(); throw dmlc::Error(err); } }); } API_END(); } int DGLStreamCreate(int device_type, int device_id, DGLStreamHandle* out) { API_BEGIN(); DGLContext ctx; ctx.device_type = static_cast(device_type); ctx.device_id = device_id; *out = DeviceAPIManager::Get(ctx)->CreateStream(ctx); API_END(); } int DGLStreamFree(int device_type, int device_id, DGLStreamHandle stream) { API_BEGIN(); DGLContext ctx; ctx.device_type = static_cast(device_type); ctx.device_id = device_id; DeviceAPIManager::Get(ctx)->FreeStream(ctx, stream); API_END(); } int DGLSetStream(int device_type, int device_id, DGLStreamHandle stream) { API_BEGIN(); DGLContext ctx; ctx.device_type = static_cast(device_type); ctx.device_id = device_id; DeviceAPIManager::Get(ctx)->SetStream(ctx, stream); API_END(); } int DGLGetStream(int device_type, int device_id, DGLStreamHandle* stream) { API_BEGIN(); DGLContext ctx; ctx.device_type = static_cast(device_type); ctx.device_id = device_id; *stream = DeviceAPIManager::Get(ctx)->GetStream(); API_END(); } int DGLSynchronize(int device_type, int device_id, DGLStreamHandle stream) { API_BEGIN(); DGLContext ctx; ctx.device_type = static_cast(device_type); ctx.device_id = device_id; DeviceAPIManager::Get(ctx)->StreamSync(ctx, stream); API_END(); } int DGLStreamStreamSynchronize( int device_type, int device_id, DGLStreamHandle src, DGLStreamHandle dst) { API_BEGIN(); DGLContext ctx; ctx.device_type = static_cast(device_type); ctx.device_id = device_id; DeviceAPIManager::Get(ctx)->SyncStreamFromTo(ctx, src, dst); API_END(); } int DGLCbArgToReturn(DGLValue* value, int code) { API_BEGIN(); dgl::runtime::DGLRetValue rv; rv = dgl::runtime::DGLArgValue(*value, code); int tcode; rv.MoveToCHost(value, &tcode); CHECK_EQ(tcode, code); API_END(); } int DGLLoadTensorAdapter(const char* path) { return TensorDispatcher::Global()->Load(path) ? 0 : -1; } // set device api DGL_REGISTER_GLOBAL(dgl::runtime::symbol::dgl_set_device) .set_body([](DGLArgs args, DGLRetValue* ret) { DGLContext ctx; ctx.device_type = static_cast(args[0].operator int()); ctx.device_id = args[1]; DeviceAPIManager::Get(ctx)->SetDevice(ctx); }); // set device api DGL_REGISTER_GLOBAL("_GetDeviceAttr") .set_body([](DGLArgs args, DGLRetValue* ret) { DGLContext ctx; ctx.device_type = static_cast(args[0].operator int()); ctx.device_id = args[1]; DeviceAttrKind kind = static_cast(args[2].operator int()); if (kind == kExist) { DeviceAPI* api = DeviceAPIManager::Get(ctx.device_type, true); if (api != nullptr) { api->GetAttr(ctx, kind, ret); } else { *ret = 0; } } else { DeviceAPIManager::Get(ctx)->GetAttr(ctx, kind, ret); } }); ================================================ FILE: src/runtime/config.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file runtime/config.cc * @brief DGL runtime config */ #include #include #if !defined(_WIN32) && defined(USE_LIBXSMM) #include #endif using namespace dgl::runtime; namespace dgl { namespace runtime { Config::Config() { #if !defined(_WIN32) && defined(USE_LIBXSMM) int cpu_id = libxsmm_cpuid_x86(); // Enable libxsmm on AVX machines by default libxsmm_ = LIBXSMM_X86_AVX2 <= cpu_id && cpu_id <= LIBXSMM_X86_ALLFEAT; #else libxsmm_ = false; #endif } void Config::EnableLibxsmm(bool b) { libxsmm_ = b; } bool Config::IsLibxsmmAvailable() const { return libxsmm_; } DGL_REGISTER_GLOBAL("global_config._CAPI_DGLConfigSetLibxsmm") .set_body([](DGLArgs args, DGLRetValue* rv) { bool use_libxsmm = args[0]; dgl::runtime::Config::Global()->EnableLibxsmm(use_libxsmm); }); DGL_REGISTER_GLOBAL("global_config._CAPI_DGLConfigGetLibxsmm") .set_body([](DGLArgs args, DGLRetValue* rv) { *rv = dgl::runtime::Config::Global()->IsLibxsmmAvailable(); }); } // namespace runtime } // namespace dgl ================================================ FILE: src/runtime/cpu_device_api.cc ================================================ /** * Copyright (c) 2016-2022 by Contributors * @file cpu_device_api.cc */ #include #include #include #include #include #include #include #include "workspace_pool.h" namespace dgl { namespace runtime { class CPUDeviceAPI final : public DeviceAPI { public: void SetDevice(DGLContext ctx) final {} void GetAttr(DGLContext ctx, DeviceAttrKind kind, DGLRetValue* rv) final { if (kind == kExist) { *rv = 1; } } void* AllocDataSpace( DGLContext ctx, size_t nbytes, size_t alignment, DGLDataType type_hint) final { TensorDispatcher* tensor_dispatcher = TensorDispatcher::Global(); if (tensor_dispatcher->IsAvailable()) return tensor_dispatcher->CPUAllocWorkspace(nbytes); void* ptr; #if _MSC_VER || defined(__MINGW32__) ptr = _aligned_malloc(nbytes, alignment); if (ptr == nullptr) throw std::bad_alloc(); #elif defined(_LIBCPP_SGX_CONFIG) ptr = memalign(alignment, nbytes); if (ptr == nullptr) throw std::bad_alloc(); #else int ret = posix_memalign(&ptr, alignment, nbytes); if (ret != 0) throw std::bad_alloc(); #endif return ptr; } void FreeDataSpace(DGLContext ctx, void* ptr) final { TensorDispatcher* tensor_dispatcher = TensorDispatcher::Global(); if (tensor_dispatcher->IsAvailable()) return tensor_dispatcher->CPUFreeWorkspace(ptr); #if _MSC_VER || defined(__MINGW32__) _aligned_free(ptr); #else free(ptr); #endif } void CopyDataFromTo( const void* from, size_t from_offset, void* to, size_t to_offset, size_t size, DGLContext ctx_from, DGLContext ctx_to, DGLDataType type_hint) final { memcpy( static_cast(to) + to_offset, static_cast(from) + from_offset, size); } void RecordedCopyDataFromTo( void* from, size_t from_offset, void* to, size_t to_offset, size_t size, DGLContext ctx_from, DGLContext ctx_to, DGLDataType type_hint, void* pytorch_ctx) final { BUG_IF_FAIL(false) << "This piece of code should not be reached."; } DGLStreamHandle CreateStream(DGLContext) final { return nullptr; } void StreamSync(DGLContext ctx, DGLStreamHandle stream) final {} void* AllocWorkspace( DGLContext ctx, size_t size, DGLDataType type_hint) final; void FreeWorkspace(DGLContext ctx, void* data) final; static const std::shared_ptr& Global() { static std::shared_ptr inst = std::make_shared(); return inst; } }; struct CPUWorkspacePool : public WorkspacePool { CPUWorkspacePool() : WorkspacePool(kDGLCPU, CPUDeviceAPI::Global()) {} }; void* CPUDeviceAPI::AllocWorkspace( DGLContext ctx, size_t size, DGLDataType type_hint) { TensorDispatcher* tensor_dispatcher = TensorDispatcher::Global(); if (tensor_dispatcher->IsAvailable()) { return tensor_dispatcher->CPUAllocWorkspace(size); } return dmlc::ThreadLocalStore::Get()->AllocWorkspace( ctx, size); } void CPUDeviceAPI::FreeWorkspace(DGLContext ctx, void* data) { TensorDispatcher* tensor_dispatcher = TensorDispatcher::Global(); if (tensor_dispatcher->IsAvailable()) { return tensor_dispatcher->CPUFreeWorkspace(data); } dmlc::ThreadLocalStore::Get()->FreeWorkspace(ctx, data); } DGL_REGISTER_GLOBAL("device_api.cpu") .set_body([](DGLArgs args, DGLRetValue* rv) { DeviceAPI* ptr = CPUDeviceAPI::Global().get(); *rv = static_cast(ptr); }); } // namespace runtime } // namespace dgl ================================================ FILE: src/runtime/cuda/cuda_common.h ================================================ /** * Copyright (c) 2017 by Contributors * @file cuda_common.h * @brief Common utilities for CUDA */ #ifndef DGL_RUNTIME_CUDA_CUDA_COMMON_H_ #define DGL_RUNTIME_CUDA_CUDA_COMMON_H_ #include #include #include #include #include #include #include #include "../workspace_pool.h" namespace dgl { namespace runtime { /* How to use this class to get a nonblocking thrust execution policy that uses DGL's memory pool and the current cuda stream runtime::CUDAWorkspaceAllocator allocator(ctx); const auto stream = runtime::getCurrentCUDAStream(); const auto exec_policy = thrust::cuda::par_nosync(allocator).on(stream); now, one can pass exec_policy to thrust functions to get an integer array of size 1000 whose lifetime is managed by unique_ptr, use: auto int_array = allocator.alloc_unique(1000); int_array.get() gives the raw pointer. */ class CUDAWorkspaceAllocator { DGLContext ctx; public: typedef char value_type; void operator()(void* ptr) const { runtime::DeviceAPI::Get(ctx)->FreeWorkspace(ctx, ptr); } explicit CUDAWorkspaceAllocator(DGLContext ctx) : ctx(ctx) {} CUDAWorkspaceAllocator& operator=(const CUDAWorkspaceAllocator&) = default; template std::unique_ptr alloc_unique( std::size_t size) const { return std::unique_ptr( reinterpret_cast(runtime::DeviceAPI::Get(ctx)->AllocWorkspace( ctx, sizeof(T) * size)), *this); } char* allocate(std::ptrdiff_t size) const { return reinterpret_cast( runtime::DeviceAPI::Get(ctx)->AllocWorkspace(ctx, size)); } void deallocate(char* ptr, std::size_t) const { runtime::DeviceAPI::Get(ctx)->FreeWorkspace(ctx, ptr); } }; template inline bool is_zero(T size) { return size == 0; } template <> inline bool is_zero(dim3 size) { return size.x == 0 || size.y == 0 || size.z == 0; } #define CUDA_DRIVER_CALL(x) \ { \ CUresult result = x; \ if (result != CUDA_SUCCESS && result != CUDA_ERROR_DEINITIALIZED) { \ const char* msg; \ cuGetErrorName(result, &msg); \ LOG(FATAL) << "CUDAError: " #x " failed with error: " << msg; \ } \ } #define CUDA_CALL(func) \ { \ cudaError_t e = (func); \ CHECK(e == cudaSuccess || e == cudaErrorCudartUnloading) \ << "CUDA: " << cudaGetErrorString(e); \ } #define CUDA_KERNEL_CALL(kernel, nblks, nthrs, shmem, stream, ...) \ { \ if (!dgl::runtime::is_zero((nblks)) && !dgl::runtime::is_zero((nthrs))) { \ (kernel)<<<(nblks), (nthrs), (shmem), (stream)>>>(__VA_ARGS__); \ cudaError_t e = cudaGetLastError(); \ CHECK(e == cudaSuccess || e == cudaErrorCudartUnloading) \ << "CUDA kernel launch error: " << cudaGetErrorString(e); \ } \ } #define CUSPARSE_CALL(func) \ { \ cusparseStatus_t e = (func); \ CHECK(e == CUSPARSE_STATUS_SUCCESS) << "CUSPARSE ERROR: " << e; \ } #define CUBLAS_CALL(func) \ { \ cublasStatus_t e = (func); \ CHECK(e == CUBLAS_STATUS_SUCCESS) << "CUBLAS ERROR: " << e; \ } #define CURAND_CALL(func) \ { \ curandStatus_t e = (func); \ CHECK(e == CURAND_STATUS_SUCCESS) \ << "CURAND Error: " << dgl::runtime::curandGetErrorString(e) << " at " \ << __FILE__ << ":" << __LINE__; \ } inline const char* curandGetErrorString(curandStatus_t error) { switch (error) { case CURAND_STATUS_SUCCESS: return "CURAND_STATUS_SUCCESS"; case CURAND_STATUS_VERSION_MISMATCH: return "CURAND_STATUS_VERSION_MISMATCH"; case CURAND_STATUS_NOT_INITIALIZED: return "CURAND_STATUS_NOT_INITIALIZED"; case CURAND_STATUS_ALLOCATION_FAILED: return "CURAND_STATUS_ALLOCATION_FAILED"; case CURAND_STATUS_TYPE_ERROR: return "CURAND_STATUS_TYPE_ERROR"; case CURAND_STATUS_OUT_OF_RANGE: return "CURAND_STATUS_OUT_OF_RANGE"; case CURAND_STATUS_LENGTH_NOT_MULTIPLE: return "CURAND_STATUS_LENGTH_NOT_MULTIPLE"; case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED: return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED"; case CURAND_STATUS_LAUNCH_FAILURE: return "CURAND_STATUS_LAUNCH_FAILURE"; case CURAND_STATUS_PREEXISTING_FAILURE: return "CURAND_STATUS_PREEXISTING_FAILURE"; case CURAND_STATUS_INITIALIZATION_FAILED: return "CURAND_STATUS_INITIALIZATION_FAILED"; case CURAND_STATUS_ARCH_MISMATCH: return "CURAND_STATUS_ARCH_MISMATCH"; case CURAND_STATUS_INTERNAL_ERROR: return "CURAND_STATUS_INTERNAL_ERROR"; } // To suppress compiler warning. return "Unrecognized curand error string"; } /** * @brief Cast data type to cudaDataType_t. */ template struct cuda_dtype { static constexpr cudaDataType_t value = CUDA_R_32F; }; template <> struct cuda_dtype<__half> { static constexpr cudaDataType_t value = CUDA_R_16F; }; #if BF16_ENABLED template <> struct cuda_dtype<__nv_bfloat16> { static constexpr cudaDataType_t value = CUDA_R_16BF; }; #endif // BF16_ENABLED template <> struct cuda_dtype { static constexpr cudaDataType_t value = CUDA_R_32F; }; template <> struct cuda_dtype { static constexpr cudaDataType_t value = CUDA_R_64F; }; /* * \brief Accumulator type for SpMM. */ template struct accum_dtype { typedef float type; }; template <> struct accum_dtype<__half> { typedef float type; }; #if BF16_ENABLED template <> struct accum_dtype<__nv_bfloat16> { typedef float type; }; #endif // BF16_ENABLED template <> struct accum_dtype { typedef float type; }; template <> struct accum_dtype { typedef double type; }; #if CUDART_VERSION >= 11000 /** * @brief Cast index data type to cusparseIndexType_t. */ template struct cusparse_idtype { static constexpr cusparseIndexType_t value = CUSPARSE_INDEX_32I; }; template <> struct cusparse_idtype { static constexpr cusparseIndexType_t value = CUSPARSE_INDEX_32I; }; template <> struct cusparse_idtype { static constexpr cusparseIndexType_t value = CUSPARSE_INDEX_64I; }; #endif /** @brief Thread local workspace */ class CUDAThreadEntry { public: /** @brief The cusparse handler */ cusparseHandle_t cusparse_handle{nullptr}; /** @brief The cublas handler */ cublasHandle_t cublas_handle{nullptr}; /** @brief thread local pool*/ WorkspacePool pool; /** @brief constructor */ CUDAThreadEntry(); // get the threadlocal workspace static CUDAThreadEntry* ThreadLocal(); }; /** @brief Get the current CUDA stream */ cudaStream_t getCurrentCUDAStream(); } // namespace runtime } // namespace dgl #endif // DGL_RUNTIME_CUDA_CUDA_COMMON_H_ ================================================ FILE: src/runtime/cuda/cuda_device_api.cc ================================================ /** * Copyright (c) 2017-2022 by Contributors * @file cuda_device_api.cc * @brief GPU specific API */ #include #include #include #include #include #include "cuda_common.h" namespace dgl { namespace runtime { class CUDADeviceAPI final : public DeviceAPI { public: CUDADeviceAPI() { int count; auto err = cudaGetDeviceCount(&count); switch (err) { case cudaSuccess: break; default: count = 0; cudaGetLastError(); } is_available_ = count > 0; } bool IsAvailable() final { return is_available_; } void SetDevice(DGLContext ctx) final { CUDA_CALL(cudaSetDevice(ctx.device_id)); } void GetAttr(DGLContext ctx, DeviceAttrKind kind, DGLRetValue* rv) final { int value = 0; switch (kind) { case kExist: value = (cudaDeviceGetAttribute( &value, cudaDevAttrMaxThreadsPerBlock, ctx.device_id) == cudaSuccess); break; case kMaxThreadsPerBlock: { CUDA_CALL(cudaDeviceGetAttribute( &value, cudaDevAttrMaxThreadsPerBlock, ctx.device_id)); break; } case kWarpSize: { CUDA_CALL( cudaDeviceGetAttribute(&value, cudaDevAttrWarpSize, ctx.device_id)); break; } case kMaxSharedMemoryPerBlock: { CUDA_CALL(cudaDeviceGetAttribute( &value, cudaDevAttrMaxSharedMemoryPerBlock, ctx.device_id)); break; } case kComputeVersion: { std::ostringstream os; CUDA_CALL(cudaDeviceGetAttribute( &value, cudaDevAttrComputeCapabilityMajor, ctx.device_id)); os << value << "."; CUDA_CALL(cudaDeviceGetAttribute( &value, cudaDevAttrComputeCapabilityMinor, ctx.device_id)); os << value; *rv = os.str(); return; } case kDeviceName: { cudaDeviceProp props; CUDA_CALL(cudaGetDeviceProperties(&props, ctx.device_id)); *rv = std::string(props.name); return; } case kMaxClockRate: { CUDA_CALL(cudaDeviceGetAttribute( &value, cudaDevAttrClockRate, ctx.device_id)); break; } case kMultiProcessorCount: { CUDA_CALL(cudaDeviceGetAttribute( &value, cudaDevAttrMultiProcessorCount, ctx.device_id)); break; } case kMaxThreadDimensions: { int dims[3]; CUDA_CALL(cudaDeviceGetAttribute( &dims[0], cudaDevAttrMaxBlockDimX, ctx.device_id)); CUDA_CALL(cudaDeviceGetAttribute( &dims[1], cudaDevAttrMaxBlockDimY, ctx.device_id)); CUDA_CALL(cudaDeviceGetAttribute( &dims[2], cudaDevAttrMaxBlockDimZ, ctx.device_id)); std::stringstream ss; // use json string to return multiple int values; ss << "[" << dims[0] << ", " << dims[1] << ", " << dims[2] << "]"; *rv = ss.str(); return; } } *rv = value; } void* AllocDataSpace( DGLContext ctx, size_t nbytes, size_t alignment, DGLDataType type_hint) final { SetDevice(ctx); // Redirect to PyTorch's allocator when available. TensorDispatcher* tensor_dispatcher = TensorDispatcher::Global(); if (tensor_dispatcher->IsAvailable()) { return tensor_dispatcher->CUDAAllocWorkspace( nbytes, getCurrentCUDAStream()); } CHECK_EQ(256 % alignment, 0U) << "CUDA space is aligned at 256 bytes"; void* ret; CUDA_CALL(cudaMalloc(&ret, nbytes)); return ret; } void FreeDataSpace(DGLContext ctx, void* ptr) final { SetDevice(ctx); TensorDispatcher* tensor_dispatcher = TensorDispatcher::Global(); if (tensor_dispatcher->IsAvailable()) { return tensor_dispatcher->CUDAFreeWorkspace(ptr); } CUDA_CALL(cudaFree(ptr)); } void CopyDataFromTo( const void* from, size_t from_offset, void* to, size_t to_offset, size_t size, DGLContext ctx_from, DGLContext ctx_to, DGLDataType type_hint, DGLStreamHandle stream) { cudaStream_t cu_stream = static_cast(stream); from = static_cast(from) + from_offset; to = static_cast(to) + to_offset; if (ctx_from.device_type == kDGLCUDA && ctx_to.device_type == kDGLCUDA) { CUDA_CALL(cudaSetDevice(ctx_from.device_id)); if (ctx_from.device_id == ctx_to.device_id) { GPUCopy(from, to, size, cudaMemcpyDeviceToDevice, cu_stream); } else { CUDA_CALL(cudaMemcpyPeerAsync( to, ctx_to.device_id, from, ctx_from.device_id, size, cu_stream)); } } else if ( ctx_from.device_type == kDGLCUDA && ctx_to.device_type == kDGLCPU) { CUDA_CALL(cudaSetDevice(ctx_from.device_id)); GPUCopy(from, to, size, cudaMemcpyDeviceToHost, cu_stream); } else if ( ctx_from.device_type == kDGLCPU && ctx_to.device_type == kDGLCUDA) { CUDA_CALL(cudaSetDevice(ctx_to.device_id)); GPUCopy(from, to, size, cudaMemcpyHostToDevice, cu_stream); } else { LOG(FATAL) << "expect copy from/to GPU or between GPU"; } } void CopyDataFromTo( const void* from, size_t from_offset, void* to, size_t to_offset, size_t size, DGLContext ctx_from, DGLContext ctx_to, DGLDataType type_hint) final { auto stream = GetStream(); CopyDataFromTo( from, from_offset, to, to_offset, size, ctx_from, ctx_to, type_hint, stream); } // To ensure correct behavior, `record_event` must be invoked anytime a // pointer from PyTorch CachingHostAllocator is used in a cudaMemcpyAsync // call. It provides a way to re-use freed pinned (page-locked) memory // allocations and avoid device sync due to cudaFreeHost calls. void RecordedCopyDataFromTo( void* from, size_t from_offset, void* to, size_t to_offset, size_t size, DGLContext ctx_from, DGLContext ctx_to, DGLDataType type_hint, void* pytorch_ctx) final { auto stream = GetStream(); CopyDataFromTo( from, from_offset, to, to_offset, size, ctx_from, ctx_to, type_hint, stream); auto tensor_dispatcher = TensorDispatcher::Global(); if (tensor_dispatcher->IsAvailable()) { auto custream = static_cast(stream); void* ptr = ctx_to.device_type == kDGLCPU ? to : from; int id = ctx_to.device_type == kDGLCPU ? ctx_from.device_id : ctx_to.device_id; tensor_dispatcher->CUDARecordHostAlloc(ptr, pytorch_ctx, custream, id); } } DGLStreamHandle CreateStream(DGLContext ctx) { CUDA_CALL(cudaSetDevice(ctx.device_id)); cudaStream_t retval; // make sure the legacy default stream won't block on this stream CUDA_CALL(cudaStreamCreateWithFlags(&retval, cudaStreamNonBlocking)); return static_cast(retval); } void FreeStream(DGLContext ctx, DGLStreamHandle stream) { CUDA_CALL(cudaSetDevice(ctx.device_id)); cudaStream_t cu_stream = static_cast(stream); CUDA_CALL(cudaStreamDestroy(cu_stream)); } void SyncStreamFromTo( DGLContext ctx, DGLStreamHandle event_src, DGLStreamHandle event_dst) { CUDA_CALL(cudaSetDevice(ctx.device_id)); cudaStream_t src_stream = static_cast(event_src); cudaStream_t dst_stream = static_cast(event_dst); cudaEvent_t evt; CUDA_CALL(cudaEventCreate(&evt)); CUDA_CALL(cudaEventRecord(evt, src_stream)); CUDA_CALL(cudaStreamWaitEvent(dst_stream, evt, 0)); CUDA_CALL(cudaEventDestroy(evt)); } void StreamSync(DGLContext ctx, DGLStreamHandle stream) final { CUDA_CALL(cudaSetDevice(ctx.device_id)); CUDA_CALL(cudaStreamSynchronize(static_cast(stream))); } /** NOTE: If the backend is PyTorch, we will use PyTorch's stream management, * so just avoid calling our SetStream/CreateStream unless * you really need advanced stream control. * TODO(Xin): Redirect this to PyTorch or remove it. * PyTorch allows external CUDA streams to be set as current since v1.11. */ void SetStream(DGLContext ctx, DGLStreamHandle stream) final {} DGLStreamHandle GetStream() const final { return static_cast(getCurrentCUDAStream()); } /** NOTE: cudaHostRegister can be called from an arbitrary GPU device, * so we don't need to specify a ctx. * The pinned memory can be seen by all CUDA contexts, * not just the one that performed the allocation */ bool PinData(void* ptr, size_t nbytes) override { // prevent users from pinning empty tensors or graphs if (ptr == nullptr || nbytes == 0) return false; TensorDispatcher* tensor_dispatcher = TensorDispatcher::Global(); // Minimize the pinned memory pool allocated by backend (via tensoradapter) // to preserve enough memory for DGL inherited in-place pin-memory operation if (tensor_dispatcher->IsAvailable()) { tensor_dispatcher->CUDAHostAllocatorEmptyCache(); } CUDA_CALL(cudaHostRegister(ptr, nbytes, cudaHostRegisterDefault)); return true; } void UnpinData(void* ptr) { if (ptr == nullptr) return; CUDA_CALL(cudaHostUnregister(ptr)); } void* AllocPinnedDataSpace( size_t nbytes, void** ctx, void** deleter) override { // prevent pinning empty tensors or graphs if (nbytes == 0) return nullptr; TensorDispatcher* tensor_dispatcher = TensorDispatcher::Global(); CHECK(tensor_dispatcher->IsAvailable()) << "CachingHostAllocator is not available in the current backend " "PyTorch. Please update the PyTorch version to 1.11+"; return tensor_dispatcher->CUDAAllocHostWorkspace(nbytes, ctx, deleter); } void FreePinnedDataSpace(void** deleter) override { TensorDispatcher* tensor_dispatcher = TensorDispatcher::Global(); CHECK(tensor_dispatcher->IsAvailable()) << "CachingHostAllocator is not available in the current backend " "PyTorch. Please update the PyTorch version to 1.11+"; tensor_dispatcher->CUDAFreeHostWorkspace(deleter); } bool IsPinned(const void* ptr) override { // can't be a pinned tensor if CUDA context is unavailable. if (!is_available_) return false; cudaPointerAttributes attr; cudaError_t status = cudaPointerGetAttributes(&attr, ptr); bool result = false; switch (status) { case cudaErrorInvalidValue: // might be a normal CPU tensor in CUDA 10.2- cudaGetLastError(); // clear error break; case cudaSuccess: result = (attr.type == cudaMemoryTypeHost); break; case cudaErrorInitializationError: case cudaErrorNoDevice: case cudaErrorInsufficientDriver: case cudaErrorInvalidDevice: // We don't want to fail in these particular cases since this function // can be called when users only want to run on CPU even if CUDA API is // enabled, or in a forked subprocess where CUDA context cannot be // initialized. So we just mark the CUDA context to unavailable and // return. is_available_ = false; cudaGetLastError(); // clear error break; default: LOG(FATAL) << "error while determining memory status: " << cudaGetErrorString(status); break; } return result; } void* AllocWorkspace( DGLContext ctx, size_t size, DGLDataType type_hint) final { SetDevice(ctx); // Redirect to PyTorch's allocator when available. TensorDispatcher* tensor_dispatcher = TensorDispatcher::Global(); if (tensor_dispatcher->IsAvailable()) return tensor_dispatcher->CUDAAllocWorkspace( size, getCurrentCUDAStream()); return CUDAThreadEntry::ThreadLocal()->pool.AllocWorkspace(ctx, size); } void FreeWorkspace(DGLContext ctx, void* data) final { SetDevice(ctx); TensorDispatcher* tensor_dispatcher = TensorDispatcher::Global(); if (tensor_dispatcher->IsAvailable()) return tensor_dispatcher->CUDAFreeWorkspace(data); CUDAThreadEntry::ThreadLocal()->pool.FreeWorkspace(ctx, data); } static const std::shared_ptr& Global() { static std::shared_ptr inst = std::make_shared(); return inst; } private: static void GPUCopy( const void* from, void* to, size_t size, cudaMemcpyKind kind, cudaStream_t stream) { CUDA_CALL(cudaMemcpyAsync(to, from, size, kind, stream)); if (stream == 0 && kind == cudaMemcpyDeviceToHost) { // only wait for the copy, when it's on the default stream, and it's to // host memory CUDA_CALL(cudaStreamSynchronize(stream)); } } bool is_available_ = true; }; typedef dmlc::ThreadLocalStore CUDAThreadStore; CUDAThreadEntry::CUDAThreadEntry() : pool(kDGLCUDA, CUDADeviceAPI::Global()) {} CUDAThreadEntry* CUDAThreadEntry::ThreadLocal() { return CUDAThreadStore::Get(); } cudaStream_t getCurrentCUDAStream() { TensorDispatcher* tensor_dispatcher = TensorDispatcher::Global(); if (tensor_dispatcher->IsAvailable()) return tensor_dispatcher->CUDAGetCurrentStream(); else // return the default stream when TA is not available return nullptr; } DGL_REGISTER_GLOBAL("device_api.cuda") .set_body([](DGLArgs args, DGLRetValue* rv) { DeviceAPI* ptr = CUDADeviceAPI::Global().get(); *rv = static_cast(ptr); }); } // namespace runtime } // namespace dgl ================================================ FILE: src/runtime/cuda/cuda_hashtable.cu ================================================ /** * Copyright (c) 2021 by Contributors * @file runtime/cuda/cuda_device_common.cuh * @brief Device level functions for within cuda kernels. */ #include #include // NOLINT #include "../../array/cuda/atomic.cuh" #include "cuda_common.h" #include "cuda_hashtable.cuh" using namespace dgl::aten::cuda; namespace dgl { namespace runtime { namespace cuda { namespace { constexpr static const int BLOCK_SIZE = 256; constexpr static const size_t TILE_SIZE = 1024; /** * @brief This is the mutable version of the DeviceOrderedHashTable, for use in * inserting elements into the hashtable. * * @tparam IdType The type of ID to store in the hashtable. */ template class MutableDeviceOrderedHashTable : public DeviceOrderedHashTable { public: typedef typename DeviceOrderedHashTable::Mapping* Iterator; static constexpr IdType kEmptyKey = DeviceOrderedHashTable::kEmptyKey; /** * @brief Create a new mutable hashtable for use on the device. * * @param hostTable The original hash table on the host. */ explicit MutableDeviceOrderedHashTable( OrderedHashTable* const hostTable) : DeviceOrderedHashTable(hostTable->DeviceHandle()) {} /** * @brief Find the mutable mapping of a given key within the hash table. * * WARNING: The key must exist within the hashtable. Searching for a key not * in the hashtable is undefined behavior. * * @param id The key to search for. * * @return The mapping. */ inline __device__ Iterator Search(const IdType id) { const IdType pos = SearchForPosition(id); return GetMutable(pos); } /** * @brief Attempt to insert into the hash table at a specific location. * * @param pos The position to insert at. * @param id The ID to insert into the hash table. * @param index The original index of the item being inserted. * * @return True, if the insertion was successful. */ inline __device__ bool AttemptInsertAt( const size_t pos, const IdType id, const size_t index) { const IdType key = AtomicCAS(&GetMutable(pos)->key, kEmptyKey, id); if (key == kEmptyKey || key == id) { // we either set a match key, or found a matching key, so then place the // minimum index in position. Match the type of atomicMin, so ignore // linting atomicMin( reinterpret_cast( // NOLINT &GetMutable(pos)->index), static_cast(index)); // NOLINT return true; } else { // we need to search elsewhere return false; } } /** * @brief Insert key-index pair into the hashtable. * * @param id The ID to insert. * @param index The index at which the ID occured. * * @return An iterator to inserted mapping. */ inline __device__ Iterator Insert(const IdType id, const size_t index) { size_t pos = Hash(id); // linearly scan for an empty slot or matching entry IdType delta = 1; while (!AttemptInsertAt(pos, id, index)) { pos = Hash(pos + delta); delta += 1; } return GetMutable(pos); } private: /** * @brief Get a mutable iterator to the given bucket in the hashtable. * * @param pos The given bucket. * * @return The iterator. */ inline __device__ Iterator GetMutable(const size_t pos) { assert(pos < this->size_); // The parent class Device is read-only, but we ensure this can only be // constructed from a mutable version of OrderedHashTable, making this // a safe cast to perform. return const_cast(this->table_ + pos); } }; /** * @brief Calculate the number of buckets in the hashtable. To guarantee we can * fill the hashtable in the worst case, we must use a number of buckets which * is a power of two. * https://en.wikipedia.org/wiki/Quadratic_probing#Limitations * * @param num The number of items to insert (should be an upper bound on the * number of unique keys). * @param scale The power of two larger the number of buckets should be than the * unique keys. * * @return The number of buckets the table should contain. */ size_t TableSize(const size_t num, const int scale) { const size_t next_pow2 = 1 << static_cast(1 + std::log2(num >> 1)); return next_pow2 << scale; } /** * @brief This structure is used with cub's block-level prefixscan in order to * keep a running sum as items are iteratively processed. * * @tparam IdType The type to perform the prefixsum on. */ template struct BlockPrefixCallbackOp { IdType running_total_; __device__ BlockPrefixCallbackOp(const IdType running_total) : running_total_(running_total) {} __device__ IdType operator()(const IdType block_aggregate) { const IdType old_prefix = running_total_; running_total_ += block_aggregate; return old_prefix; } }; } // namespace /** * @brief This generates a hash map where the keys are the global item numbers, * and the values are indexes, and inputs may have duplciates. * * @tparam IdType The type of of id. * @tparam BLOCK_SIZE The size of the thread block. * @tparam TILE_SIZE The number of entries each thread block will process. * @param items The items to insert. * @param num_items The number of items to insert. * @param table The hash table. */ template __global__ void generate_hashmap_duplicates( const IdType* const items, const int64_t num_items, MutableDeviceOrderedHashTable table) { assert(BLOCK_SIZE == blockDim.x); const size_t block_start = TILE_SIZE * blockIdx.x; const size_t block_end = TILE_SIZE * (blockIdx.x + 1); #pragma unroll for (size_t index = threadIdx.x + block_start; index < block_end; index += BLOCK_SIZE) { if (index < num_items) { table.Insert(items[index], index); } } } /** * @brief This generates a hash map where the keys are the global item numbers, * and the values are indexes, and all inputs are unique. * * @tparam IdType The type of of id. * @tparam BLOCK_SIZE The size of the thread block. * @tparam TILE_SIZE The number of entries each thread block will process. * @param items The unique items to insert. * @param num_items The number of items to insert. * @param table The hash table. */ template __global__ void generate_hashmap_unique( const IdType* const items, const int64_t num_items, MutableDeviceOrderedHashTable table) { assert(BLOCK_SIZE == blockDim.x); using Iterator = typename MutableDeviceOrderedHashTable::Iterator; const size_t block_start = TILE_SIZE * blockIdx.x; const size_t block_end = TILE_SIZE * (blockIdx.x + 1); #pragma unroll for (size_t index = threadIdx.x + block_start; index < block_end; index += BLOCK_SIZE) { if (index < num_items) { const Iterator pos = table.Insert(items[index], index); // since we are only inserting unique items, we know their local id // will be equal to their index pos->local = static_cast(index); } } } /** * @brief This counts the number of nodes inserted per thread block. * * @tparam IdType The type of of id. * @tparam BLOCK_SIZE The size of the thread block. * @tparam TILE_SIZE The number of entries each thread block will process. * @param input The nodes to insert. * @param num_input The number of nodes to insert. * @param table The hash table. * @param num_unique The number of nodes inserted into the hash table per thread * block. */ template __global__ void count_hashmap( const IdType* items, const size_t num_items, DeviceOrderedHashTable table, IdType* const num_unique) { assert(BLOCK_SIZE == blockDim.x); using BlockReduce = typename cub::BlockReduce; using Mapping = typename DeviceOrderedHashTable::Mapping; const size_t block_start = TILE_SIZE * blockIdx.x; const size_t block_end = TILE_SIZE * (blockIdx.x + 1); IdType count = 0; #pragma unroll for (size_t index = threadIdx.x + block_start; index < block_end; index += BLOCK_SIZE) { if (index < num_items) { const Mapping& mapping = *table.Search(items[index]); if (mapping.index == index) { ++count; } } } __shared__ typename BlockReduce::TempStorage temp_space; count = BlockReduce(temp_space).Sum(count); if (threadIdx.x == 0) { num_unique[blockIdx.x] = count; if (blockIdx.x == 0) { num_unique[gridDim.x] = 0; } } } /** * @brief Update the local numbering of elements in the hashmap. * * @tparam IdType The type of id. * @tparam BLOCK_SIZE The size of the thread blocks. * @tparam TILE_SIZE The number of elements each thread block works on. * @param items The set of non-unique items to update from. * @param num_items The number of non-unique items. * @param table The hash table. * @param num_items_prefix The number of unique items preceding each thread * block. * @param unique_items The set of unique items (output). * @param num_unique_items The number of unique items (output). */ template __global__ void compact_hashmap( const IdType* const items, const size_t num_items, MutableDeviceOrderedHashTable table, const IdType* const num_items_prefix, IdType* const unique_items, int64_t* const num_unique_items) { assert(BLOCK_SIZE == blockDim.x); using FlagType = uint16_t; using BlockScan = typename cub::BlockScan; using Mapping = typename DeviceOrderedHashTable::Mapping; constexpr const int32_t VALS_PER_THREAD = TILE_SIZE / BLOCK_SIZE; __shared__ typename BlockScan::TempStorage temp_space; const IdType offset = num_items_prefix[blockIdx.x]; BlockPrefixCallbackOp prefix_op(0); // count successful placements for (int32_t i = 0; i < VALS_PER_THREAD; ++i) { const IdType index = threadIdx.x + i * BLOCK_SIZE + blockIdx.x * TILE_SIZE; FlagType flag; Mapping* kv; if (index < num_items) { kv = table.Search(items[index]); flag = kv->index == index; } else { flag = 0; } if (!flag) { kv = nullptr; } BlockScan(temp_space).ExclusiveSum(flag, flag, prefix_op); __syncthreads(); if (kv) { const IdType pos = offset + flag; kv->local = pos; unique_items[pos] = items[index]; } } if (threadIdx.x == 0 && blockIdx.x == 0) { *num_unique_items = num_items_prefix[gridDim.x]; } } // DeviceOrderedHashTable implementation template DeviceOrderedHashTable::DeviceOrderedHashTable( const Mapping* const table, const size_t size) : table_(table), size_(size) {} template DeviceOrderedHashTable OrderedHashTable::DeviceHandle() const { return DeviceOrderedHashTable(table_, size_); } // OrderedHashTable implementation template OrderedHashTable::OrderedHashTable( const size_t size, DGLContext ctx, cudaStream_t stream, const int scale) : table_(nullptr), size_(TableSize(size, scale)), ctx_(ctx) { // make sure we will at least as many buckets as items. CHECK_GT(scale, 0); auto device = runtime::DeviceAPI::Get(ctx_); table_ = static_cast( device->AllocWorkspace(ctx_, sizeof(Mapping) * size_)); CUDA_CALL(cudaMemsetAsync( table_, DeviceOrderedHashTable::kEmptyKey, sizeof(Mapping) * size_, stream)); } template OrderedHashTable::~OrderedHashTable() { auto device = runtime::DeviceAPI::Get(ctx_); device->FreeWorkspace(ctx_, table_); } template void OrderedHashTable::FillWithDuplicates( const IdType* const input, const size_t num_input, IdType* const unique, int64_t* const num_unique, cudaStream_t stream) { auto device = runtime::DeviceAPI::Get(ctx_); const int64_t num_tiles = (num_input + TILE_SIZE - 1) / TILE_SIZE; const dim3 grid(num_tiles); const dim3 block(BLOCK_SIZE); auto device_table = MutableDeviceOrderedHashTable(this); CUDA_KERNEL_CALL( (generate_hashmap_duplicates), grid, block, 0, stream, input, num_input, device_table); IdType* item_prefix = static_cast( device->AllocWorkspace(ctx_, sizeof(IdType) * (num_input + 1))); CUDA_KERNEL_CALL( (count_hashmap), grid, block, 0, stream, input, num_input, device_table, item_prefix); size_t workspace_bytes; CUDA_CALL(cub::DeviceScan::ExclusiveSum( nullptr, workspace_bytes, static_cast(nullptr), static_cast(nullptr), grid.x + 1, stream)); void* workspace = device->AllocWorkspace(ctx_, workspace_bytes); CUDA_CALL(cub::DeviceScan::ExclusiveSum( workspace, workspace_bytes, item_prefix, item_prefix, grid.x + 1, stream)); device->FreeWorkspace(ctx_, workspace); CUDA_KERNEL_CALL( (compact_hashmap), grid, block, 0, stream, input, num_input, device_table, item_prefix, unique, num_unique); device->FreeWorkspace(ctx_, item_prefix); } template void OrderedHashTable::FillWithUnique( const IdType* const input, const size_t num_input, cudaStream_t stream) { const int64_t num_tiles = (num_input + TILE_SIZE - 1) / TILE_SIZE; const dim3 grid(num_tiles); const dim3 block(BLOCK_SIZE); auto device_table = MutableDeviceOrderedHashTable(this); CUDA_KERNEL_CALL( (generate_hashmap_unique), grid, block, 0, stream, input, num_input, device_table); } template class OrderedHashTable; template class OrderedHashTable; } // namespace cuda } // namespace runtime } // namespace dgl ================================================ FILE: src/runtime/cuda/cuda_hashtable.cuh ================================================ /** * Copyright (c) 2021 by Contributors * @file runtime/cuda/cuda_device_common.cuh * @brief Device level functions for within cuda kernels. */ #ifndef DGL_RUNTIME_CUDA_CUDA_HASHTABLE_CUH_ #define DGL_RUNTIME_CUDA_CUDA_HASHTABLE_CUH_ #include #include "cuda_common.h" #include "cuda_runtime.h" namespace dgl { namespace runtime { namespace cuda { template class OrderedHashTable; /** * @brief A device-side handle for a GPU hashtable for mapping items to the * first index at which they appear in the provided data array. * * For any ID array A, one can view it as a mapping from the index `i` * (continuous integer range from zero) to its element `A[i]`. This hashtable * serves as a reverse mapping, i.e., from element `A[i]` to its index `i`. * Quadratic probing is used for collision resolution. See * DeviceOrderedHashTable's documentation for how the Mapping structure is * used. * * The hash table should be used in two phases, with the first being populating * the hash table with the OrderedHashTable object, and then generating this * handle from it. This object can then be used to search the hash table, * to find mappings, from with CUDA code. * * If a device-side handle is created from a hash table with the following * entries: * [ * {key: 0, local: 0, index: 0}, * {key: 3, local: 1, index: 1}, * {key: 2, local: 2, index: 2}, * {key: 8, local: 3, index: 4}, * {key: 4, local: 4, index: 5}, * {key: 1, local: 5, index: 8} * ] * The array [0, 3, 2, 0, 8, 4, 3, 2, 1, 8] could have `Search()` called on * each id, to be mapped via: * ``` * __global__ void map(int32_t * array, * size_t size, * DeviceOrderedHashTable table) { * int idx = threadIdx.x + blockIdx.x*blockDim.x; * if (idx < size) { * array[idx] = table.Search(array[idx])->local; * } * } * ``` * to get the remaped array: * [0, 1, 2, 0, 3, 4, 1, 2, 5, 3] * * @tparam IdType The type of the IDs. */ template class DeviceOrderedHashTable { public: /** * @brief An entry in the hashtable. */ struct Mapping { /** * @brief The ID of the item inserted. */ IdType key; /** * @brief The index of the item in the unique list. */ IdType local; /** * @brief The index of the item when inserted into the hashtable (e.g., * the index within the array passed into FillWithDuplicates()). */ int64_t index; }; typedef const Mapping* ConstIterator; DeviceOrderedHashTable(const DeviceOrderedHashTable& other) = default; DeviceOrderedHashTable& operator=(const DeviceOrderedHashTable& other) = default; /** * @brief Find the non-mutable mapping of a given key within the hash table. * * WARNING: The key must exist within the hashtable. Searching for a key not * in the hashtable is undefined behavior. * * @param id The key to search for. * * @return An iterator to the mapping. */ inline __device__ ConstIterator Search(const IdType id) const { const IdType pos = SearchForPosition(id); return &table_[pos]; } /** * @brief Check whether a key exists within the hashtable. * * @param id The key to check for. * * @return True if the key exists in the hashtable. */ inline __device__ bool Contains(const IdType id) const { IdType pos = Hash(id); IdType delta = 1; while (table_[pos].key != kEmptyKey) { if (table_[pos].key == id) { return true; } pos = Hash(pos + delta); delta += 1; } return false; } protected: // Must be uniform bytes for memset to work static constexpr IdType kEmptyKey = static_cast(-1); const Mapping* table_; size_t size_; /** * @brief Create a new device-side handle to the hash table. * * @param table The table stored in GPU memory. * @param size The size of the table. */ explicit DeviceOrderedHashTable(const Mapping* table, size_t size); /** * @brief Search for an item in the hash table which is known to exist. * * WARNING: If the ID searched for does not exist within the hashtable, this * function will never return. * * @param id The ID of the item to search for. * * @return The the position of the item in the hashtable. */ inline __device__ IdType SearchForPosition(const IdType id) const { IdType pos = Hash(id); // linearly scan for matching entry IdType delta = 1; while (table_[pos].key != id) { assert(table_[pos].key != kEmptyKey); pos = Hash(pos + delta); delta += 1; } assert(pos < size_); return pos; } /** * @brief Hash an ID to a to a position in the hash table. * * @param id The ID to hash. * * @return The hash. */ inline __device__ size_t Hash(const IdType id) const { return id % size_; } friend class OrderedHashTable; }; /** * @brief A host-side handle for a GPU hashtable for mapping items to the * first index at which they appear in the provided data array. This host-side * handle is responsible for allocating and free the GPU memory of the * hashtable. * * For any ID array A, one can view it as a mapping from the index `i` * (continuous integer range from zero) to its element `A[i]`. This hashtable * serves as a reverse mapping, i.e., from element `A[i]` to its index `i`. * Quadratic probing is used for collision resolution. * * The hash table should be used in two phases, the first is filling the hash * table via 'FillWithDuplicates()' or 'FillWithUnique()'. Then, the * 'DeviceHandle()' method can be called, to get a version suitable for * searching from device and kernel functions. * * If 'FillWithDuplicates()' was called with an array of: * [0, 3, 2, 0, 8, 4, 3, 2, 1, 8] * * The resulting entries in the hash-table would be: * [ * {key: 0, local: 0, index: 0}, * {key: 3, local: 1, index: 1}, * {key: 2, local: 2, index: 2}, * {key: 8, local: 3, index: 4}, * {key: 4, local: 4, index: 5}, * {key: 1, local: 5, index: 8} * ] * * @tparam IdType The type of the IDs. */ template class OrderedHashTable { public: static constexpr int kDefaultScale = 3; using Mapping = typename DeviceOrderedHashTable::Mapping; /** * @brief Create a new ordered hash table. The amoutn of GPU memory * consumed by the resulting hashtable is O(`size` * 2^`scale`). * * @param size The number of items to insert into the hashtable. * @param ctx The device context to store the hashtable on. * @param scale The power of two times larger the number of buckets should * be than the number of items. * @param stream The stream to use for initializing the hashtable. */ OrderedHashTable( const size_t size, DGLContext ctx, cudaStream_t stream, const int scale = kDefaultScale); /** * @brief Cleanup after the hashtable. */ ~OrderedHashTable(); // Disable copying OrderedHashTable(const OrderedHashTable& other) = delete; OrderedHashTable& operator=(const OrderedHashTable& other) = delete; /** * @brief Fill the hashtable with the array containing possibly duplicate * IDs. * * @param input The array of IDs to insert. * @param num_input The number of IDs to insert. * @param unique The list of unique IDs inserted. * @param num_unique The number of unique IDs inserted. * @param stream The stream to perform operations on. */ void FillWithDuplicates( const IdType* const input, const size_t num_input, IdType* const unique, int64_t* const num_unique, cudaStream_t stream); /** * @brief Fill the hashtable with an array of unique keys. * * @param input The array of unique IDs. * @param num_input The number of keys. * @param stream The stream to perform operations on. */ void FillWithUnique( const IdType* const input, const size_t num_input, cudaStream_t stream); /** * @brief Get a verison of the hashtable usable from device functions. * * @return This hashtable. */ DeviceOrderedHashTable DeviceHandle() const; private: Mapping* table_; size_t size_; DGLContext ctx_; }; } // namespace cuda } // namespace runtime } // namespace dgl #endif // DGL_RUNTIME_CUDA_CUDA_HASHTABLE_CUH_ ================================================ FILE: src/runtime/cuda/gpu_cache.cu ================================================ /*! * Copyright (c) 2022 by Contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * \file gpu_cache.cu * \brief Implementation of wrapper HugeCTR gpu_cache routines. */ #ifndef DGL_RUNTIME_CUDA_GPU_CACHE_H_ #define DGL_RUNTIME_CUDA_GPU_CACHE_H_ #include #include #include #include #include #include #include #include #include #include "../../runtime/cuda/cuda_common.h" namespace dgl { namespace runtime { namespace cuda { template class GpuCache : public runtime::Object { constexpr static int set_associativity = 2; constexpr static int WARP_SIZE = 32; constexpr static int bucket_size = WARP_SIZE * set_associativity; using gpu_cache_t = gpu_cache::gpu_cache< key_t, uint64_t, std::numeric_limits::max(), set_associativity, WARP_SIZE>; public: static constexpr const char *_type_key = sizeof(key_t) == 4 ? "cuda.GpuCache32" : "cuda.GpuCache64"; DGL_DECLARE_OBJECT_TYPE_INFO(GpuCache, Object); GpuCache(size_t num_items, size_t num_feats) : num_feats(num_feats), cache(std::make_unique( (num_items + bucket_size - 1) / bucket_size, num_feats)) { CUDA_CALL(cudaGetDevice(&cuda_device)); } std::tuple Query(IdArray keys) { const auto &ctx = keys->ctx; cudaStream_t stream = dgl::runtime::getCurrentCUDAStream(); auto device = dgl::runtime::DeviceAPI::Get(ctx); CHECK_EQ(ctx.device_type, kDGLCUDA) << "The keys should be on a CUDA device"; CHECK_EQ(ctx.device_id, cuda_device) << "The keys should be on the correct CUDA device"; CHECK_EQ(keys->ndim, 1) << "The tensor of requested indices must be of dimension one."; NDArray values = NDArray::Empty( {keys->shape[0], (int64_t)num_feats}, DGLDataType{kDGLFloat, 32, 1}, ctx); IdArray missing_index = aten::NewIdArray(keys->shape[0], ctx, 64); IdArray missing_keys = aten::NewIdArray(keys->shape[0], ctx, sizeof(key_t) * 8); size_t *missing_len = static_cast(device->AllocWorkspace(ctx, sizeof(size_t))); cache->Query( static_cast(keys->data), keys->shape[0], static_cast(values->data), static_cast(missing_index->data), static_cast(missing_keys->data), missing_len, stream); size_t missing_len_host; device->CopyDataFromTo( missing_len, 0, &missing_len_host, 0, sizeof(missing_len_host), ctx, DGLContext{kDGLCPU, 0}, keys->dtype); device->FreeWorkspace(ctx, missing_len); missing_index = missing_index.CreateView( {(int64_t)missing_len_host}, missing_index->dtype); missing_keys = missing_keys.CreateView({(int64_t)missing_len_host}, keys->dtype); return std::make_tuple(values, missing_index, missing_keys); } void Replace(IdArray keys, NDArray values) { cudaStream_t stream = dgl::runtime::getCurrentCUDAStream(); CHECK_EQ(keys->ctx.device_type, kDGLCUDA) << "The keys should be on a CUDA device"; CHECK_EQ(keys->ctx.device_id, cuda_device) << "The keys should be on the correct CUDA device"; CHECK_EQ(values->ctx.device_type, kDGLCUDA) << "The values should be on a CUDA device"; CHECK_EQ(values->ctx.device_id, cuda_device) << "The values should be on the correct CUDA device"; CHECK_EQ(keys->shape[0], values->shape[0]) << "First dimensions of keys and values must match"; CHECK_EQ(values->shape[1], num_feats) << "Embedding dimension must match"; cache->Replace( static_cast(keys->data), keys->shape[0], static_cast(values->data), stream); } private: size_t num_feats; std::unique_ptr cache; int cuda_device; }; static_assert(sizeof(unsigned int) == 4); DGL_DEFINE_OBJECT_REF(GpuCacheRef32, GpuCache); // The cu file in HugeCTR gpu cache uses unsigned int and long long. // Changing to int64_t results in a mismatch of template arguments. static_assert(sizeof(long long) == 8); // NOLINT DGL_DEFINE_OBJECT_REF(GpuCacheRef64, GpuCache); // NOLINT /* CAPI **********************************************************************/ using namespace dgl::runtime; DGL_REGISTER_GLOBAL("cuda._CAPI_DGLGpuCacheCreate") .set_body([](DGLArgs args, DGLRetValue *rv) { const size_t num_items = args[0]; const size_t num_feats = args[1]; const int num_bits = args[2]; if (num_bits == 32) *rv = GpuCacheRef32( std::make_shared>(num_items, num_feats)); else *rv = GpuCacheRef64(std::make_shared>( // NOLINT num_items, num_feats)); }); DGL_REGISTER_GLOBAL("cuda._CAPI_DGLGpuCacheQuery") .set_body([](DGLArgs args, DGLRetValue *rv) { IdArray keys = args[1]; List ret; if (keys->dtype.bits == 32) { GpuCacheRef32 cache = args[0]; auto result = cache->Query(keys); ret.push_back(Value(MakeValue(std::get<0>(result)))); ret.push_back(Value(MakeValue(std::get<1>(result)))); ret.push_back(Value(MakeValue(std::get<2>(result)))); } else { GpuCacheRef64 cache = args[0]; auto result = cache->Query(keys); ret.push_back(Value(MakeValue(std::get<0>(result)))); ret.push_back(Value(MakeValue(std::get<1>(result)))); ret.push_back(Value(MakeValue(std::get<2>(result)))); } *rv = ret; }); DGL_REGISTER_GLOBAL("cuda._CAPI_DGLGpuCacheReplace") .set_body([](DGLArgs args, DGLRetValue *rv) { IdArray keys = args[1]; NDArray values = args[2]; if (keys->dtype.bits == 32) { GpuCacheRef32 cache = args[0]; cache->Replace(keys, values); } else { GpuCacheRef64 cache = args[0]; cache->Replace(keys, values); } *rv = List{}; }); } // namespace cuda } // namespace runtime } // namespace dgl #endif ================================================ FILE: src/runtime/dlpack_convert.cc ================================================ /** * Copyright (c) 2022 by Contributors * @file src/runtime/dlpack_convert.cc * @brief Conversion between NDArray and DLPack. */ #include #include #include #include #include #include #include "runtime_base.h" // deleter for arrays used by DLPack exporter extern "C" void NDArrayDLPackDeleter(DLManagedTensor* tensor); namespace dgl { namespace runtime { void NDArrayDLPackDeleter(DLManagedTensor* tensor) { static_cast(tensor->manager_ctx)->DecRef(); delete tensor; } inline DGLContext ToDGLContext(const DLDevice& device) { DGLContext ctx; ctx.device_type = static_cast(device.device_type); ctx.device_id = device.device_id; return ctx; } inline DGLDataType ToDGLDataType(const DLDataType& src) { DGLDataType ret; ret.code = src.code; ret.bits = src.bits; ret.lanes = src.lanes; return ret; } inline DLDevice ToDLDevice(const DGLContext& ctx) { DLDevice device; device.device_type = static_cast(ctx.device_type); device.device_id = ctx.device_id; return device; } inline DLDataType ToDLDataType(const DGLDataType& src) { DLDataType ret; ret.code = src.code; ret.bits = src.bits; ret.lanes = src.lanes; return ret; } NDArray DLPackConvert::FromDLPack(DLManagedTensor* tensor) { NDArray::Container* data = new NDArray::Container(); data->deleter = DLPackConvert::DLPackDeleter; data->manager_ctx = tensor; data->dl_tensor.data = tensor->dl_tensor.data; data->dl_tensor.ctx = ToDGLContext(tensor->dl_tensor.device); data->dl_tensor.ndim = tensor->dl_tensor.ndim; data->dl_tensor.dtype = ToDGLDataType(tensor->dl_tensor.dtype); data->dl_tensor.shape = tensor->dl_tensor.shape; data->dl_tensor.strides = tensor->dl_tensor.strides; data->dl_tensor.byte_offset = tensor->dl_tensor.byte_offset; return NDArray(data); } void DLPackConvert::DLPackDeleter(NDArray::Container* ptr) { // if the array is pinned by dgl, unpin it before freeing if (ptr->pinned_by_dgl_) NDArray::UnpinContainer(ptr); DLManagedTensor* tensor = static_cast(ptr->manager_ctx); if (tensor->deleter != nullptr) { (*tensor->deleter)(tensor); } delete ptr; } DLManagedTensor* ContainerToDLPack(NDArray::Container* from) { CHECK(from != nullptr); DLManagedTensor* ret = new DLManagedTensor(); ret->dl_tensor.data = from->dl_tensor.data; ret->dl_tensor.device = ToDLDevice(from->dl_tensor.ctx); ret->dl_tensor.ndim = from->dl_tensor.ndim; ret->dl_tensor.dtype = ToDLDataType(from->dl_tensor.dtype); ret->dl_tensor.shape = from->dl_tensor.shape; ret->dl_tensor.strides = from->dl_tensor.strides; ret->dl_tensor.byte_offset = from->dl_tensor.byte_offset; ret->manager_ctx = from; from->IncRef(); ret->deleter = NDArrayDLPackDeleter; return ret; } DLManagedTensor* DLPackConvert::ToDLPack(const NDArray& from) { return ContainerToDLPack(from.data_); } } // namespace runtime } // namespace dgl using namespace dgl::runtime; void DGLDLManagedTensorCallDeleter(DLManagedTensor* dltensor) { (*(dltensor->deleter))(dltensor); } inline bool IsAligned(const void* ptr, std::uintptr_t alignment) noexcept { auto iptr = reinterpret_cast(ptr); return !(iptr % alignment); } int DGLArrayFromDLPack(DLManagedTensor* from, DGLArrayHandle* out) { API_BEGIN(); *out = NDArray::Internal::MoveAsDGLArray(DLPackConvert::FromDLPack(from)); API_END(); } int DGLArrayToDLPack( DGLArrayHandle from, DLManagedTensor** out, int alignment) { API_BEGIN(); auto* nd_container = reinterpret_cast(from); DGLArray* nd = &(nd_container->dl_tensor); // If the source DGLArray is not aligned, we should create a new aligned one if (alignment != 0 && !IsAligned(nd->data, alignment)) { std::vector shape_vec(nd->shape, nd->shape + nd->ndim); NDArray copy_ndarray = NDArray::Empty(shape_vec, nd->dtype, nd->ctx); copy_ndarray.CopyFrom(nd); *out = DLPackConvert::ToDLPack(copy_ndarray); } else { *out = ContainerToDLPack(nd_container); } API_END(); } ================================================ FILE: src/runtime/dso_module.cc ================================================ /** * Copyright (c) 2017 by Contributors * @file dso_dll_module.cc * @brief Module to load from dynamic shared library. */ #include #include #include #include "module_util.h" #if defined(_WIN32) #include #else #include #endif namespace dgl { namespace runtime { // Module to load from dynamic shared libary. // This is the default module DGL used for host-side AOT class DSOModuleNode final : public ModuleNode { public: ~DSOModuleNode() { if (lib_handle_) Unload(); } const char* type_key() const final { return "dso"; } PackedFunc GetFunction( const std::string& name, const std::shared_ptr& sptr_to_self) final { BackendPackedCFunc faddr; if (name == runtime::symbol::dgl_module_main) { const char* entry_name = reinterpret_cast( GetSymbol(runtime::symbol::dgl_module_main)); CHECK(entry_name != nullptr) << "Symbol " << runtime::symbol::dgl_module_main << " is not presented"; faddr = reinterpret_cast(GetSymbol(entry_name)); } else { faddr = reinterpret_cast(GetSymbol(name.c_str())); } if (faddr == nullptr) return PackedFunc(); return WrapPackedFunc(faddr, sptr_to_self); } void Init(const std::string& name) { Load(name); if (auto* ctx_addr = reinterpret_cast( GetSymbol(runtime::symbol::dgl_module_ctx))) { *ctx_addr = this; } InitContextFunctions( [this](const char* fname) { return GetSymbol(fname); }); // Load the imported modules const char* dev_mblob = reinterpret_cast( GetSymbol(runtime::symbol::dgl_dev_mblob)); if (dev_mblob != nullptr) { ImportModuleBlob(dev_mblob, &imports_); } } private: // Platform dependent handling. #if defined(_WIN32) // library handle HMODULE lib_handle_{nullptr}; // Load the library void Load(const std::string& name) { // use wstring version that is needed by LLVM. std::wstring wname(name.begin(), name.end()); lib_handle_ = LoadLibraryW(wname.c_str()); CHECK(lib_handle_ != nullptr) << "Failed to load dynamic shared library " << name; } void* GetSymbol(const char* name) { return reinterpret_cast( GetProcAddress(lib_handle_, (LPCSTR)name)); // NOLINT(*) } void Unload() { FreeLibrary(lib_handle_); } #else // Library handle void* lib_handle_{nullptr}; // load the library void Load(const std::string& name) { lib_handle_ = dlopen(name.c_str(), RTLD_LAZY | RTLD_LOCAL); CHECK(lib_handle_ != nullptr) << "Failed to load dynamic shared library " << name << " " << dlerror(); } void* GetSymbol(const char* name) { return dlsym(lib_handle_, name); } void Unload() { dlclose(lib_handle_); } #endif }; DGL_REGISTER_GLOBAL("module.loadfile_so") .set_body([](DGLArgs args, DGLRetValue* rv) { std::shared_ptr n = std::make_shared(); n->Init(args[0]); *rv = runtime::Module(n); }); } // namespace runtime } // namespace dgl ================================================ FILE: src/runtime/file_util.cc ================================================ /** * Copyright (c) 2017 by Contributors * @file file_util.cc */ #include "file_util.h" #include #include #include #include #include #include namespace dgl { namespace runtime { void FunctionInfo::Save(dmlc::JSONWriter* writer) const { std::vector sarg_types(arg_types.size()); for (size_t i = 0; i < arg_types.size(); ++i) { sarg_types[i] = DGLDataType2String(arg_types[i]); } writer->BeginObject(); writer->WriteObjectKeyValue("name", name); writer->WriteObjectKeyValue("arg_types", sarg_types); writer->WriteObjectKeyValue("thread_axis_tags", thread_axis_tags); writer->EndObject(); } void FunctionInfo::Load(dmlc::JSONReader* reader) { dmlc::JSONObjectReadHelper helper; std::vector sarg_types; helper.DeclareField("name", &name); helper.DeclareField("arg_types", &sarg_types); helper.DeclareField("thread_axis_tags", &thread_axis_tags); helper.ReadAllFields(reader); arg_types.resize(sarg_types.size()); for (size_t i = 0; i < arg_types.size(); ++i) { arg_types[i] = String2DGLDataType(sarg_types[i]); } } void FunctionInfo::Save(dmlc::Stream* writer) const { writer->Write(name); writer->Write(arg_types); writer->Write(thread_axis_tags); } bool FunctionInfo::Load(dmlc::Stream* reader) { if (!reader->Read(&name)) return false; if (!reader->Read(&arg_types)) return false; if (!reader->Read(&thread_axis_tags)) return false; return true; } std::string GetFileFormat( const std::string& file_name, const std::string& format) { std::string fmt = format; if (fmt.length() == 0) { if (file_name.find(".signed.so") != std::string::npos) return "sgx"; size_t pos = file_name.find_last_of("."); if (pos != std::string::npos) { return file_name.substr(pos + 1, file_name.length() - pos - 1); } else { return ""; } } else { return format; } } std::string GetCacheDir() { char* env_cache_dir; if ((env_cache_dir = getenv("DGL_CACHE_DIR"))) return env_cache_dir; if ((env_cache_dir = getenv("XDG_CACHE_HOME"))) { return std::string(env_cache_dir) + "/dgl"; } if ((env_cache_dir = getenv("HOME"))) { return std::string(env_cache_dir) + "/.cache/dgl"; } return "."; } std::string GetFileBasename(const std::string& file_name) { size_t last_slash = file_name.find_last_of("/"); if (last_slash == std::string::npos) return file_name; return file_name.substr(last_slash + 1); } std::string GetMetaFilePath(const std::string& file_name) { size_t pos = file_name.find_last_of("."); if (pos != std::string::npos) { return file_name.substr(0, pos) + ".dgl_meta.json"; } else { return file_name + ".dgl_meta.json"; } } void LoadBinaryFromFile(const std::string& file_name, std::string* data) { std::ifstream fs(file_name, std::ios::in | std::ios::binary); CHECK(!fs.fail()) << "Cannot open " << file_name; // get its size: fs.seekg(0, std::ios::end); size_t size = static_cast(fs.tellg()); fs.seekg(0, std::ios::beg); data->resize(size); fs.read(&(*data)[0], size); } void SaveBinaryToFile(const std::string& file_name, const std::string& data) { std::ofstream fs(file_name, std::ios::out | std::ios::binary); CHECK(!fs.fail()) << "Cannot open " << file_name; fs.write(&data[0], data.length()); } void SaveMetaDataToFile( const std::string& file_name, const std::unordered_map& fmap) { std::string version = "0.1.0"; std::ofstream fs(file_name.c_str()); CHECK(!fs.fail()) << "Cannot open file " << file_name; dmlc::JSONWriter writer(&fs); writer.BeginObject(); writer.WriteObjectKeyValue("dgl_version", version); writer.WriteObjectKeyValue("func_info", fmap); writer.EndObject(); fs.close(); } void LoadMetaDataFromFile( const std::string& file_name, std::unordered_map* fmap) { std::ifstream fs(file_name.c_str()); CHECK(!fs.fail()) << "Cannot open file " << file_name; std::string version; dmlc::JSONReader reader(&fs); dmlc::JSONObjectReadHelper helper; helper.DeclareField("dgl_version", &version); helper.DeclareField("func_info", fmap); helper.ReadAllFields(&reader); fs.close(); } } // namespace runtime } // namespace dgl ================================================ FILE: src/runtime/file_util.h ================================================ /** * Copyright (c) 2017 by Contributors * @file file_util.h * @brief Minimum file manipulation util for runtime. */ #ifndef DGL_RUNTIME_FILE_UTIL_H_ #define DGL_RUNTIME_FILE_UTIL_H_ #include #include #include "meta_data.h" namespace dgl { namespace runtime { /** * @brief Get file format from given file name or format argument. * @param file_name The name of the file. * @param format The format of the file. */ std::string GetFileFormat( const std::string& file_name, const std::string& format); /** * @return the directory in which DGL stores cached files. * May be set using DGL_CACHE_DIR; defaults to system locations. */ std::string GetCacheDir(); /** * @brief Get meta file path given file name and format. * @param file_name The name of the file. */ std::string GetMetaFilePath(const std::string& file_name); /** * @brief Get file basename (i.e. without leading directories) * @param file_name The name of the file. * @return the base name */ std::string GetFileBasename(const std::string& file_name); /** * @brief Load binary file into a in-memory buffer. * @param file_name The name of the file. * @param data The data to be loaded. */ void LoadBinaryFromFile(const std::string& file_name, std::string* data); /** * @brief Load binary file into a in-memory buffer. * @param file_name The name of the file. * @param data The binary data to be saved. */ void SaveBinaryToFile(const std::string& file_name, const std::string& data); /** * @brief Save meta data to file. * @param file_name The name of the file. * @param fmap The function info map. */ void SaveMetaDataToFile( const std::string& file_name, const std::unordered_map& fmap); /** * @brief Load meta data to file. * @param file_name The name of the file. * @param fmap The function info map. */ void LoadMetaDataFromFile( const std::string& file_name, std::unordered_map* fmap); } // namespace runtime } // namespace dgl #endif // DGL_RUNTIME_FILE_UTIL_H_ ================================================ FILE: src/runtime/meta_data.h ================================================ /** * Copyright (c) 2017 by Contributors * @file meta_data.h * @brief Meta data related utilities */ #ifndef DGL_RUNTIME_META_DATA_H_ #define DGL_RUNTIME_META_DATA_H_ #include #include #include #include #include #include "runtime_base.h" namespace dgl { namespace runtime { /** @brief function information needed by device */ struct FunctionInfo { std::string name; std::vector arg_types; std::vector thread_axis_tags; void Save(dmlc::JSONWriter *writer) const; void Load(dmlc::JSONReader *reader); void Save(dmlc::Stream *writer) const; bool Load(dmlc::Stream *reader); }; } // namespace runtime } // namespace dgl namespace dmlc { DMLC_DECLARE_TRAITS(has_saveload, ::dgl::runtime::FunctionInfo, true); } // namespace dmlc #endif // DGL_RUNTIME_META_DATA_H_ ================================================ FILE: src/runtime/module.cc ================================================ /** * Copyright (c) 2017 by Contributors * @file module.cc * @brief DGL module system */ #include #include #include #include #include #ifndef _LIBCPP_SGX_CONFIG #include "file_util.h" #endif namespace dgl { namespace runtime { void Module::Import(Module other) { // specially handle rpc if (!std::strcmp((*this)->type_key(), "rpc")) { static const PackedFunc* fimport_ = nullptr; if (fimport_ == nullptr) { fimport_ = runtime::Registry::Get("rpc._ImportRemoteModule"); CHECK(fimport_ != nullptr); } (*fimport_)(*this, other); return; } // cyclic detection. std::unordered_set visited{other.node_.get()}; std::vector stack{other.node_.get()}; while (!stack.empty()) { const ModuleNode* n = stack.back(); stack.pop_back(); for (const Module& m : n->imports_) { const ModuleNode* next = m.node_.get(); if (visited.count(next)) continue; visited.insert(next); stack.push_back(next); } } CHECK(!visited.count(node_.get())) << "Cyclic dependency detected during import"; node_->imports_.emplace_back(std::move(other)); } Module Module::LoadFromFile( const std::string& file_name, const std::string& format) { #ifndef _LIBCPP_SGX_CONFIG std::string fmt = GetFileFormat(file_name, format); CHECK(fmt.length() != 0) << "Cannot deduce format of file " << file_name; if (fmt == "dll" || fmt == "dylib" || fmt == "dso") { fmt = "so"; } std::string load_f_name = "module.loadfile_" + fmt; const PackedFunc* f = Registry::Get(load_f_name); CHECK(f != nullptr) << "Loader of " << format << "(" << load_f_name << ") is not presented."; Module m = (*f)(file_name, format); return m; #else LOG(FATAL) << "SGX does not support LoadFromFile"; #endif } void ModuleNode::SaveToFile( const std::string& file_name, const std::string& format) { LOG(FATAL) << "Module[" << type_key() << "] does not support SaveToFile"; } void ModuleNode::SaveToBinary(dmlc::Stream* stream) { LOG(FATAL) << "Module[" << type_key() << "] does not support SaveToBinary"; } std::string ModuleNode::GetSource(const std::string& format) { LOG(FATAL) << "Module[" << type_key() << "] does not support GetSource"; return ""; } const PackedFunc* ModuleNode::GetFuncFromEnv(const std::string& name) { auto it = import_cache_.find(name); if (it != import_cache_.end()) return it->second.get(); PackedFunc pf; for (Module& m : this->imports_) { pf = m.GetFunction(name, false); if (pf != nullptr) break; } if (pf == nullptr) { const PackedFunc* f = Registry::Get(name); CHECK(f != nullptr) << "Cannot find function " << name << " in the imported modules or global registry"; return f; } else { std::unique_ptr f(new PackedFunc(pf)); import_cache_[name] = std::move(f); return import_cache_.at(name).get(); } } bool RuntimeEnabled(const std::string& target) { std::string f_name; if (target == "cpu") { return true; } else if (target == "cuda" || target == "gpu") { f_name = "device_api.cuda"; } else if (target == "cl" || target == "opencl" || target == "sdaccel") { f_name = "device_api.opencl"; } else if (target == "gl" || target == "opengl") { f_name = "device_api.opengl"; } else if (target == "mtl" || target == "metal") { f_name = "device_api.metal"; } else if (target == "vulkan") { f_name = "device_api.vulkan"; } else if (target == "stackvm") { f_name = "codegen.build_stackvm"; } else if (target == "rpc") { f_name = "device_api.rpc"; } else if (target == "vpi" || target == "verilog") { f_name = "device_api.vpi"; } else if (target.length() >= 5 && target.substr(0, 5) == "nvptx") { f_name = "device_api.cuda"; } else if (target.length() >= 4 && target.substr(0, 4) == "rocm") { f_name = "device_api.rocm"; } else if (target.length() >= 4 && target.substr(0, 4) == "llvm") { const PackedFunc* pf = runtime::Registry::Get("codegen.llvm_target_enabled"); if (pf == nullptr) return false; return (*pf)(target); } else { LOG(FATAL) << "Unknown optional runtime " << target; } return runtime::Registry::Get(f_name) != nullptr; } DGL_REGISTER_GLOBAL("module._Enabled") .set_body([](DGLArgs args, DGLRetValue* ret) { *ret = RuntimeEnabled(args[0]); }); DGL_REGISTER_GLOBAL("module._GetSource") .set_body([](DGLArgs args, DGLRetValue* ret) { *ret = args[0].operator Module()->GetSource(args[1]); }); DGL_REGISTER_GLOBAL("module._ImportsSize") .set_body([](DGLArgs args, DGLRetValue* ret) { *ret = static_cast(args[0].operator Module()->imports().size()); }); DGL_REGISTER_GLOBAL("module._GetImport") .set_body([](DGLArgs args, DGLRetValue* ret) { *ret = args[0].operator Module()->imports().at(args[1].operator int()); }); DGL_REGISTER_GLOBAL("module._GetTypeKey") .set_body([](DGLArgs args, DGLRetValue* ret) { *ret = std::string(args[0].operator Module()->type_key()); }); DGL_REGISTER_GLOBAL("module._LoadFromFile") .set_body([](DGLArgs args, DGLRetValue* ret) { *ret = Module::LoadFromFile(args[0], args[1]); }); DGL_REGISTER_GLOBAL("module._SaveToFile") .set_body([](DGLArgs args, DGLRetValue* ret) { args[0].operator Module()->SaveToFile(args[1], args[2]); }); } // namespace runtime } // namespace dgl ================================================ FILE: src/runtime/module_util.cc ================================================ /** * Copyright (c) 2017 by Contributors * @file module_util.cc * @brief Utilities for module. */ #ifndef _LIBCPP_SGX_CONFIG #include #endif #include #include #include #include #include "module_util.h" namespace dgl { namespace runtime { void ImportModuleBlob(const char* mblob, std::vector* mlist) { #ifndef _LIBCPP_SGX_CONFIG CHECK(mblob != nullptr); uint64_t nbytes = 0; for (size_t i = 0; i < sizeof(nbytes); ++i) { uint64_t c = mblob[i]; nbytes |= (c & 0xffUL) << (i * 8); } dmlc::MemoryFixedSizeStream fs( const_cast(mblob + sizeof(nbytes)), static_cast(nbytes)); dmlc::Stream* stream = &fs; uint64_t size; CHECK(stream->Read(&size)); for (uint64_t i = 0; i < size; ++i) { std::string tkey; CHECK(stream->Read(&tkey)); std::string fkey = "module.loadbinary_" + tkey; const PackedFunc* f = Registry::Get(fkey); CHECK(f != nullptr) << "Loader of " << tkey << "(" << fkey << ") is not presented."; Module m = (*f)(static_cast(stream)); mlist->push_back(m); } #else LOG(FATAL) << "SGX does not support ImportModuleBlob"; #endif } PackedFunc WrapPackedFunc( BackendPackedCFunc faddr, const std::shared_ptr& sptr_to_self) { return PackedFunc([faddr, sptr_to_self](DGLArgs args, DGLRetValue* rv) { int ret = (*faddr)( const_cast(args.values), const_cast(args.type_codes), args.num_args); CHECK_EQ(ret, 0) << DGLGetLastError(); }); } } // namespace runtime } // namespace dgl ================================================ FILE: src/runtime/module_util.h ================================================ /** * Copyright (c) 2017 by Contributors * @file module_util.h * @brief Helper utilities for module building */ #ifndef DGL_RUNTIME_MODULE_UTIL_H_ #define DGL_RUNTIME_MODULE_UTIL_H_ #include #include #include #include #include extern "C" { // Function signature for generated packed function in shared library typedef int (*BackendPackedCFunc)(void* args, int* type_codes, int num_args); } // extern "C" namespace dgl { namespace runtime { /** * @brief Wrap a BackendPackedCFunc to packed function. * @param faddr The function address * @param mptr The module pointer node. */ PackedFunc WrapPackedFunc( BackendPackedCFunc faddr, const std::shared_ptr& mptr); /** * @brief Load and append module blob to module list * @param mblob The module blob. * @param module_list The module list to append to */ void ImportModuleBlob(const char* mblob, std::vector* module_list); /** * @brief Utility to initialize conext function symbols during startup * @param flookup A symbol lookup function. * @tparam FLookup a function of signature string->void* */ template void InitContextFunctions(FLookup flookup) { #define DGL_INIT_CONTEXT_FUNC(FuncName) \ if (auto* fp = \ reinterpret_cast(flookup("__" #FuncName))) { \ *fp = FuncName; \ } // Initialize the functions DGL_INIT_CONTEXT_FUNC(DGLFuncCall); DGL_INIT_CONTEXT_FUNC(DGLAPISetLastError); DGL_INIT_CONTEXT_FUNC(DGLBackendGetFuncFromEnv); DGL_INIT_CONTEXT_FUNC(DGLBackendAllocWorkspace); DGL_INIT_CONTEXT_FUNC(DGLBackendFreeWorkspace); DGL_INIT_CONTEXT_FUNC(DGLBackendParallelLaunch); DGL_INIT_CONTEXT_FUNC(DGLBackendParallelBarrier); #undef DGL_INIT_CONTEXT_FUNC } } // namespace runtime } // namespace dgl #endif // DGL_RUNTIME_MODULE_UTIL_H_ ================================================ FILE: src/runtime/ndarray.cc ================================================ /** * Copyright (c) 2017-2022 by Contributors * @file ndarray.cc * @brief NDArray container infratructure. */ #include #include #include #include #include #include #include #include #include "runtime_base.h" namespace dgl { constexpr DGLDataType DGLDataTypeTraits::dtype; constexpr DGLDataType DGLDataTypeTraits::dtype; constexpr DGLDataType DGLDataTypeTraits::dtype; constexpr DGLDataType DGLDataTypeTraits::dtype; constexpr DGLDataType DGLDataTypeTraits::dtype; constexpr DGLDataType DGLDataTypeTraits::dtype; constexpr DGLDataType DGLDataTypeTraits::dtype; #ifdef DGL_USE_CUDA constexpr DGLDataType DGLDataTypeTraits<__half>::dtype; #if BF16_ENABLED constexpr DGLDataType DGLDataTypeTraits<__nv_bfloat16>::dtype; #endif // BF16_ENABLED #endif // DGL_USE_CUDA constexpr DGLDataType DGLDataTypeTraits::dtype; constexpr DGLDataType DGLDataTypeTraits::dtype; namespace runtime { inline void VerifyDataType(DGLDataType dtype) { CHECK_GE(dtype.lanes, 1); if (dtype.code == kDGLFloat) { CHECK_EQ(dtype.bits % 8, 0); } else { CHECK_EQ(dtype.bits % 8, 0); } CHECK_EQ(dtype.bits & (dtype.bits - 1), 0); } inline size_t GetDataSize(const DGLArray& arr) { size_t size = 1; for (dgl_index_t i = 0; i < arr.ndim; ++i) { size *= arr.shape[i]; } size *= (arr.dtype.bits * arr.dtype.lanes + 7) / 8; return size; } inline size_t GetDataAlignment(const DGLArray& arr) { size_t align = (arr.dtype.bits / 8) * arr.dtype.lanes; if (align < kAllocAlignment) return kAllocAlignment; return align; } void NDArray::Internal::DefaultDeleter(NDArray::Container* ptr) { using dgl::runtime::NDArray; if (ptr->manager_ctx != nullptr) { static_cast(ptr->manager_ctx)->DecRef(); } else if (ptr->mem) { ptr->mem = nullptr; } else if (ptr->dl_tensor.data != nullptr) { // if the array is still pinned before freeing, unpin it. if (ptr->pinned_by_dgl_) UnpinContainer(ptr); if (ptr->pinned_by_pytorch_) { DeviceAPI::Get(kDGLCUDA)->FreePinnedDataSpace( &(ptr->pytorch_raw_deleter_)); CHECK(ptr->pytorch_raw_deleter_ == nullptr); ptr->pinned_by_pytorch_ = false; ptr->pytorch_ctx_ = nullptr; } else { dgl::runtime::DeviceAPI::Get(ptr->dl_tensor.ctx) ->FreeDataSpace(ptr->dl_tensor.ctx, ptr->dl_tensor.data); } } delete ptr; } NDArray NDArray::Internal::Create( std::vector shape, DGLDataType dtype, DGLContext ctx) { VerifyDataType(dtype); // critical zone NDArray::Container* data = new NDArray::Container(); data->deleter = DefaultDeleter; NDArray ret(data); ret.data_ = data; // RAII now in effect // setup shape data->shape_ = std::move(shape); data->dl_tensor.shape = dmlc::BeginPtr(data->shape_); data->dl_tensor.ndim = static_cast(data->shape_.size()); // setup stride (this should be optional, but some framework // does not support NULL stride and thus will crash the program). data->stride_.resize(data->dl_tensor.ndim, 1); for (int i = data->dl_tensor.ndim - 2; i >= 0; --i) { data->stride_[i] = data->shape_[i + 1] * data->stride_[i + 1]; } data->dl_tensor.strides = dmlc::BeginPtr(data->stride_); // setup dtype data->dl_tensor.dtype = dtype; // setup ctx data->dl_tensor.ctx = ctx; return ret; } DGLArray* NDArray::Internal::MoveAsDGLArray(NDArray arr) { DGLArray* tensor = reinterpret_cast(arr.data_); CHECK(tensor == const_cast(arr.operator->())); arr.data_ = nullptr; return tensor; } size_t NDArray::GetSize() const { return GetDataSize(data_->dl_tensor); } int64_t NDArray::NumElements() const { if (data_->dl_tensor.ndim == 0) return 0; int64_t size = 1; for (int i = 0; i < data_->dl_tensor.ndim; ++i) { size *= data_->dl_tensor.shape[i]; } return size; } bool NDArray::IsContiguous() const { CHECK(data_ != nullptr); if (data_->dl_tensor.strides == nullptr) return true; // See https://github.com/dmlc/dgl/issues/2118 and PyTorch's // compute_contiguous() implementation int64_t z = 1; for (int64_t i = data_->dl_tensor.ndim - 1; i >= 0; --i) { if (data_->dl_tensor.shape[i] != 1) { if (data_->dl_tensor.strides[i] == z) z *= data_->dl_tensor.shape[i]; else return false; } } return true; } NDArray NDArray::CreateView( std::vector shape, DGLDataType dtype, int64_t offset) { CHECK(data_ != nullptr); CHECK(IsContiguous()) << "Can only create view for compact tensor"; NDArray ret = Internal::Create(shape, dtype, data_->dl_tensor.ctx); ret.data_->dl_tensor.byte_offset = this->data_->dl_tensor.byte_offset; size_t curr_size = GetDataSize(this->data_->dl_tensor); size_t view_size = GetDataSize(ret.data_->dl_tensor); CHECK_LE(view_size, curr_size) << "Tries to create a view that has bigger memory than current one"; // increase ref count this->data_->IncRef(); ret.data_->manager_ctx = this->data_; ret.data_->dl_tensor.data = static_cast(this->data_->dl_tensor.data) + offset; return ret; } NDArray NDArray::EmptyShared( const std::string& name, std::vector shape, DGLDataType dtype, DGLContext ctx, bool is_create) { NDArray ret = Internal::Create(shape, dtype, ctx); size_t size = GetDataSize(ret.data_->dl_tensor); auto mem = std::make_shared(name); if (is_create) { ret.data_->dl_tensor.data = mem->CreateNew(size); } else { ret.data_->dl_tensor.data = mem->Open(size); } ret.data_->mem = mem; return ret; } NDArray NDArray::Empty( std::vector shape, DGLDataType dtype, DGLContext ctx) { NDArray ret = Internal::Create(shape, dtype, ctx); size_t size = GetDataSize(ret.data_->dl_tensor); size_t alignment = GetDataAlignment(ret.data_->dl_tensor); if (size > 0) ret.data_->dl_tensor.data = DeviceAPI::Get(ret->ctx)->AllocDataSpace( ret->ctx, size, alignment, ret->dtype); return ret; } void NDArray::CopyFromTo(DGLArray* from, DGLArray* to) { size_t from_size = GetDataSize(*from); size_t to_size = GetDataSize(*to); CHECK_EQ(from_size, to_size) << "DGLArrayCopyFromTo: The size must exactly match"; CHECK( from->ctx.device_type == to->ctx.device_type || from->ctx.device_type == kDGLCPU || to->ctx.device_type == kDGLCPU) << "Can not copy across different ctx types directly"; // Use the context that is *not* a cpu context to get the correct device // api manager. DGLContext ctx = from->ctx.device_type != kDGLCPU ? from->ctx : to->ctx; // default: local current cuda stream DeviceAPI::Get(ctx)->CopyDataFromTo( from->data, static_cast(from->byte_offset), to->data, static_cast(to->byte_offset), from_size, from->ctx, to->ctx, from->dtype); } void NDArray::RecordedCopyFromTo( DGLArray* from, DGLArray* to, void* pytorch_ctx) { size_t from_size = GetDataSize(*from); size_t to_size = GetDataSize(*to); CHECK_EQ(from_size, to_size) << "DGLArrayCopyFromTo: The size must exactly match."; CHECK(from->ctx.device_type != to->ctx.device_type) << "Recoding event is only called for the copy between CPU and GPU."; CHECK(from->ctx.device_type == kDGLCUDA || to->ctx.device_type == kDGLCUDA) << "At least one CUDA ctx needs to be involved."; DeviceAPI::Get(kDGLCUDA)->RecordedCopyDataFromTo( from->data, static_cast(from->byte_offset), to->data, static_cast(to->byte_offset), from_size, from->ctx, to->ctx, from->dtype, pytorch_ctx); } NDArray NDArray::PinnedEmpty( std::vector shape, DGLDataType dtype, DGLContext ctx) { CHECK_EQ(ctx.device_type, kDGLCPU) << "Only NDArray on CPU can be pinned"; NDArray ret = Internal::Create(shape, dtype, ctx); size_t size = GetDataSize(ret.data_->dl_tensor); if (size > 0) { ret.data_->dl_tensor.data = DeviceAPI::Get(kDGLCUDA)->AllocPinnedDataSpace( size, &(ret.data_->pytorch_ctx_), &(ret.data_->pytorch_raw_deleter_)); CHECK( ret.data_->pytorch_ctx_ != nullptr && ret.data_->pytorch_raw_deleter_ != nullptr) << "The allocation failed in PyTorch's CachingHostAllocator. " << "The returned context pointer is " << ret.data_->pytorch_ctx_ << " and the function deleter is " << ret.data_->pytorch_raw_deleter_; ret.data_->pinned_by_pytorch_ = true; } return ret; } void NDArray::PinContainer(NDArray::Container* ptr) { if (IsContainerPinned(ptr)) return; auto* tensor = &(ptr->dl_tensor); CHECK_EQ(tensor->ctx.device_type, kDGLCPU) << "Only NDArray on CPU can be pinned"; ptr->pinned_by_dgl_ = DeviceAPI::Get(kDGLCUDA)->PinData(tensor->data, GetDataSize(*tensor)); } void NDArray::UnpinContainer(NDArray::Container* ptr) { auto container_is_pinned = IsContainerPinned(ptr); // The tensor may be pinned outside of DGL via a different CUDA API, // so we cannot unpin it with cudaHostUnregister. CHECK(ptr->pinned_by_dgl_ || !container_is_pinned) << "Cannot unpin a tensor that is pinned outside of DGL."; // 1. not pinned, do nothing if (!container_is_pinned) return; // 2. pinned by DGL, unpin it DeviceAPI::Get(kDGLCUDA)->UnpinData(ptr->dl_tensor.data); ptr->pinned_by_dgl_ = false; } void NDArray::RecordStream(DGLArray* tensor, DGLStreamHandle stream) { TensorDispatcher* tensor_dispatcher = TensorDispatcher::Global(); CHECK(tensor_dispatcher->IsAvailable()) << "RecordStream only works when TensorAdapter is available."; CHECK_EQ(tensor->ctx.device_type, kDGLCUDA) << "RecordStream only works with GPU tensors."; tensor_dispatcher->RecordStream(tensor->data, stream, tensor->ctx.device_id); } template NDArray NDArray::FromVector(const std::vector& vec, DGLContext ctx) { const DGLDataType dtype = DGLDataTypeTraits::dtype; int64_t size = static_cast(vec.size()); NDArray ret = NDArray::Empty({size}, dtype, ctx); DeviceAPI::Get(ctx)->CopyDataFromTo( vec.data(), 0, static_cast(ret->data), 0, size * sizeof(T), DGLContext{kDGLCPU, 0}, ctx, dtype); return ret; } NDArray NDArray::CreateFromRaw( const std::vector& shape, DGLDataType dtype, DGLContext ctx, void* raw, bool auto_free) { NDArray ret = Internal::Create(shape, dtype, ctx); ret.data_->dl_tensor.data = raw; if (!auto_free) ret.data_->deleter = nullptr; return ret; } // export specializations template NDArray NDArray::FromVector( const std::vector&, DGLContext); template NDArray NDArray::FromVector( const std::vector&, DGLContext); template NDArray NDArray::FromVector( const std::vector&, DGLContext); template NDArray NDArray::FromVector( const std::vector&, DGLContext); template NDArray NDArray::FromVector( const std::vector&, DGLContext); template NDArray NDArray::FromVector( const std::vector&, DGLContext); template std::vector NDArray::ToVector() const { const DGLDataType dtype = DGLDataTypeTraits::dtype; CHECK(data_->dl_tensor.ndim == 1) << "ToVector() only supported for 1D arrays"; CHECK(data_->dl_tensor.dtype == dtype) << "dtype mismatch"; int64_t size = data_->dl_tensor.shape[0]; std::vector vec(size); const DGLContext& ctx = data_->dl_tensor.ctx; DeviceAPI::Get(ctx)->CopyDataFromTo( static_cast(data_->dl_tensor.data), 0, vec.data(), 0, size * sizeof(T), ctx, DGLContext{kDGLCPU, 0}, dtype); return vec; } template std::vector NDArray::ToVector() const; template std::vector NDArray::ToVector() const; template std::vector NDArray::ToVector() const; template std::vector NDArray::ToVector() const; template std::vector NDArray::ToVector() const; template std::vector NDArray::ToVector() const; std::shared_ptr NDArray::GetSharedMem() const { return this->data_->mem; } bool NDArray::IsContainerPinned(NDArray::Container* ptr) { if (ptr->pinned_by_dgl_ || ptr->pinned_by_pytorch_) return true; auto* tensor = &(ptr->dl_tensor); // Can only be pinned if on CPU... if (tensor->ctx.device_type != kDGLCPU) return false; // ... and CUDA device API is enabled, and the tensor is indeed in pinned // memory. auto device = DeviceAPI::Get(kDGLCUDA, true); return device && device->IsPinned(tensor->data); } void NDArray::Save(dmlc::Stream* strm) const { auto zc_strm = dynamic_cast(strm); if (zc_strm) { zc_strm->PushNDArray(*this); return; } SaveDGLArray(strm, const_cast(operator->())); } bool NDArray::Load(dmlc::Stream* strm) { auto zc_strm = dynamic_cast(strm); if (zc_strm) { *this = zc_strm->PopNDArray(); return true; } uint64_t header, reserved; CHECK(strm->Read(&header)) << "Invalid DGLArray file format"; CHECK(strm->Read(&reserved)) << "Invalid DGLArray file format"; CHECK(header == kDGLNDArrayMagic) << "Invalid DGLArray file format"; DGLContext ctx; int ndim; DGLDataType dtype; CHECK(strm->Read(&ctx)) << "Invalid DGLArray file format"; CHECK(strm->Read(&ndim)) << "Invalid DGLArray file format"; CHECK(strm->Read(&dtype)) << "Invalid DGLArray file format"; CHECK_EQ(ctx.device_type, kDGLCPU) << "Invalid DGLArray context: can only save as CPU tensor"; std::vector shape(ndim); if (ndim != 0) { CHECK(strm->ReadArray(&shape[0], ndim)) << "Invalid DGLArray file format"; } NDArray ret = NDArray::Empty(shape, dtype, ctx); int64_t num_elems = 1; int elem_bytes = (ret->dtype.bits + 7) / 8; for (int i = 0; i < ret->ndim; ++i) { num_elems *= ret->shape[i]; } int64_t data_byte_size; CHECK(strm->Read(&data_byte_size)) << "Invalid DGLArray file format"; CHECK(data_byte_size == num_elems * elem_bytes) << "Invalid DGLArray file format"; if (data_byte_size != 0) { // strm->Read will return the total number of elements successfully read. // Therefore if data_byte_size is zero, the CHECK below would fail. CHECK(strm->Read(ret->data, data_byte_size)) << "Invalid DGLArray file format"; } if (!DMLC_IO_NO_ENDIAN_SWAP) { dmlc::ByteSwap(ret->data, elem_bytes, num_elems); } *this = ret; return true; } } // namespace runtime } // namespace dgl using namespace dgl::runtime; int DGLArrayAlloc( const dgl_index_t* shape, int ndim, int dtype_code, int dtype_bits, int dtype_lanes, int device_type, int device_id, DGLArrayHandle* out) { API_BEGIN(); DGLDataType dtype; dtype.code = static_cast(dtype_code); dtype.bits = static_cast(dtype_bits); dtype.lanes = static_cast(dtype_lanes); DGLContext ctx; ctx.device_type = static_cast(device_type); ctx.device_id = device_id; *out = NDArray::Internal::MoveAsDGLArray( NDArray::Empty(std::vector(shape, shape + ndim), dtype, ctx)); API_END(); } int DGLArrayAllocSharedMem( const char* mem_name, const dgl_index_t* shape, int ndim, int dtype_code, int dtype_bits, int dtype_lanes, bool is_create, DGLArrayHandle* out) { API_BEGIN(); DGLDataType dtype; dtype.code = static_cast(dtype_code); dtype.bits = static_cast(dtype_bits); dtype.lanes = static_cast(dtype_lanes); std::vector shape_vec(shape, shape + ndim); NDArray arr = NDArray::EmptyShared( mem_name, shape_vec, dtype, DGLContext{kDGLCPU, 0}, is_create); *out = NDArray::Internal::MoveAsDGLArray(arr); API_END(); } int DGLArrayFree(DGLArrayHandle handle) { API_BEGIN(); reinterpret_cast(handle)->DecRef(); API_END(); } int DGLArrayCopyFromTo(DGLArrayHandle from, DGLArrayHandle to) { API_BEGIN(); NDArray::CopyFromTo(from, to); API_END(); } int DGLArrayCopyFromBytes(DGLArrayHandle handle, void* data, size_t nbytes) { API_BEGIN(); DGLContext cpu_ctx; cpu_ctx.device_type = kDGLCPU; cpu_ctx.device_id = 0; size_t arr_size = GetDataSize(*handle); CHECK_EQ(arr_size, nbytes) << "DGLArrayCopyFromBytes: size mismatch"; DeviceAPI::Get(handle->ctx) ->CopyDataFromTo( data, 0, handle->data, static_cast(handle->byte_offset), nbytes, cpu_ctx, handle->ctx, handle->dtype); API_END(); } int DGLArrayCopyToBytes(DGLArrayHandle handle, void* data, size_t nbytes) { API_BEGIN(); DGLContext cpu_ctx; cpu_ctx.device_type = kDGLCPU; cpu_ctx.device_id = 0; size_t arr_size = GetDataSize(*handle); CHECK_EQ(arr_size, nbytes) << "DGLArrayCopyToBytes: size mismatch"; DeviceAPI::Get(handle->ctx) ->CopyDataFromTo( handle->data, static_cast(handle->byte_offset), data, 0, nbytes, handle->ctx, cpu_ctx, handle->dtype); API_END(); } int DGLArrayPinData(DGLArrayHandle handle, DGLContext ctx) { API_BEGIN(); auto* nd_container = reinterpret_cast(handle); NDArray::PinContainer(nd_container); API_END(); } int DGLArrayUnpinData(DGLArrayHandle handle, DGLContext ctx) { API_BEGIN(); auto* nd_container = reinterpret_cast(handle); NDArray::UnpinContainer(nd_container); API_END(); } int DGLArrayRecordStream(DGLArrayHandle handle, DGLStreamHandle stream) { API_BEGIN(); NDArray::RecordStream(handle, stream); API_END(); } ================================================ FILE: src/runtime/object.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file runtime/object.cc * @brief Implementation of runtime object APIs. */ #include #include #include #include #include namespace dgl { namespace runtime { namespace { // single manager of operator information. struct TypeManager { // mutex to avoid registration from multiple threads. // recursive is needed for trigger(which calls UpdateAttrMap) std::mutex mutex; std::atomic type_counter{0}; std::unordered_map key2index; std::vector index2key; // get singleton of the static TypeManager* Global() { static TypeManager inst; return &inst; } }; } // namespace bool Object::_DerivedFrom(uint32_t tid) const { static uint32_t tindex = TypeKey2Index(Object::_type_key); return tid == tindex; } // this is slow, usually caller always hold the result in a static variable. uint32_t Object::TypeKey2Index(const char* key) { TypeManager* t = TypeManager::Global(); std::lock_guard lock(t->mutex); std::string skey = key; auto it = t->key2index.find(skey); if (it != t->key2index.end()) { return it->second; } uint32_t tid = ++(t->type_counter); t->key2index[skey] = tid; t->index2key.push_back(skey); return tid; } const char* Object::TypeIndex2Key(uint32_t index) { TypeManager* t = TypeManager::Global(); std::lock_guard lock(t->mutex); CHECK_NE(index, 0); return t->index2key.at(index - 1).c_str(); } } // namespace runtime } // namespace dgl ================================================ FILE: src/runtime/pack_args.h ================================================ /** * Copyright (c) 2017 by Contributors * @file pack_args.h * @brief Utility to pack DGLArgs to other type-erased fution calling * convention. * * Two type erased function signatures are supported. * - cuda_style(void** args, int num_args); * - Pack everything by address * - metal_style(void** buffers, int num_buffers, * union_32bit args[N], int num_args); * - Pack buffer by address, pack rest parameter into 32bit union buffer. */ #ifndef DGL_RUNTIME_PACK_ARGS_H_ #define DGL_RUNTIME_PACK_ARGS_H_ #include #include #include #include namespace dgl { namespace runtime { /** * @brief argument union type of 32bit. * Choose 32 bit because most GPU API do not work well with 64 bit. */ union ArgUnion { int32_t v_int32; uint32_t v_uint32; float v_float32; }; /** * @brief Create a packed function from void addr types. * * @param f with signiture (DGLArgs args, DGLRetValue* rv, void* void_args) * @param arg_types The arguments type information. * @tparam F the function type * * @return The wrapped packed function. */ template inline PackedFunc PackFuncVoidAddr( F f, const std::vector& arg_types); /** * @brief Create a packed function that from function only packs buffer * arguments. * * @param f with signiture (DGLArgs args, DGLRetValue* rv, ArgUnion* pack_args) * @param arg_types The arguments type information. * @tparam F the function type * * @return The wrapped packed function. */ template inline PackedFunc PackFuncNonBufferArg( F f, const std::vector& arg_types); /** * @brief Create a packed function that from function that takes a packed * arguments. * * @param f with signature (DGLArgs args, DGLRetValue* rv, void* pack_args, * size_t nbytes) * @param arg_types The arguments that wish to get from * @tparam F the function type * * @return The wrapped packed function. */ template inline PackedFunc PackFuncPackedArg( F f, const std::vector& arg_types); /** * @brief Extract number of buffer argument from the argument types. * @param arg_types The argument types. * @return number of buffer arguments */ inline size_t NumBufferArgs(const std::vector& arg_types); // implementations details namespace detail { template class TempArray { public: explicit TempArray(int size) {} T* data() { return data_; } private: T data_[kSize]; }; template class TempArray { public: explicit TempArray(int size) : data_(size) {} T* data() { return data_.data(); } private: std::vector data_; }; /** @brief conversion code used in void arg. */ enum ArgConvertCode { INT64_TO_INT64, INT64_TO_INT32, INT64_TO_UINT32, FLOAT64_TO_FLOAT32, FLOAT64_TO_FLOAT64, HANDLE_TO_HANDLE }; inline ArgConvertCode GetArgConvertCode(DGLDataType t) { CHECK_EQ(t.lanes, 1U) << "Cannot pass vector type argument to devic function for now"; if (t.code == kDGLInt) { if (t.bits == 64U) return INT64_TO_INT64; if (t.bits == 32U) return INT64_TO_INT32; } else if (t.code == kDGLUInt) { if (t.bits == 32U) return INT64_TO_UINT32; } else if (t.code == kDGLFloat) { if (t.bits == 64U) return FLOAT64_TO_FLOAT64; if (t.bits == 32U) return FLOAT64_TO_FLOAT32; } else if (t.code == kHandle) { return HANDLE_TO_HANDLE; } LOG(FATAL) << "Cannot handle " << t << " as device function argument"; return HANDLE_TO_HANDLE; } template inline PackedFunc PackFuncVoidAddr_( F f, const std::vector& codes) { int num_args = static_cast(codes.size()); auto ret = [f, codes, num_args](DGLArgs args, DGLRetValue* ret) { TempArray addr_(num_args); TempArray holder_(num_args); void** addr = addr_.data(); ArgUnion* holder = holder_.data(); for (int i = 0; i < num_args; ++i) { switch (codes[i]) { case INT64_TO_INT64: case FLOAT64_TO_FLOAT64: case HANDLE_TO_HANDLE: { addr[i] = (void*)&(args.values[i]); // NOLINT(*) break; } case INT64_TO_INT32: { holder[i].v_int32 = static_cast(args.values[i].v_int64); addr[i] = &(holder[i]); break; } case INT64_TO_UINT32: { holder[i].v_uint32 = static_cast(args.values[i].v_int64); addr[i] = &(holder[i]); break; } case FLOAT64_TO_FLOAT32: { holder[i].v_float32 = static_cast(args.values[i].v_float64); addr[i] = &(holder[i]); break; } } } f(args, ret, addr); }; return PackedFunc(ret); } template inline PackedFunc PackFuncNonBufferArg_( F f, int base, const std::vector& codes) { int num_args = static_cast(codes.size()); auto ret = [f, codes, base, num_args](DGLArgs args, DGLRetValue* ret) { TempArray holder_(num_args); ArgUnion* holder = holder_.data(); for (int i = 0; i < num_args; ++i) { switch (codes[i]) { case INT64_TO_INT64: case FLOAT64_TO_FLOAT64: { LOG(FATAL) << "Donot support 64bit argument to device function"; break; } case INT64_TO_INT32: { holder[i].v_int32 = static_cast(args.values[base + i].v_int64); break; } case INT64_TO_UINT32: { holder[i].v_uint32 = static_cast(args.values[base + i].v_int64); break; } case FLOAT64_TO_FLOAT32: { holder[i].v_float32 = static_cast(args.values[base + i].v_float64); break; } case HANDLE_TO_HANDLE: { LOG(FATAL) << "not reached"; break; } } } f(args, ret, holder); }; return PackedFunc(ret); } template inline PackedFunc PackFuncPackedArg_( F f, const std::vector& codes) { int num_args = static_cast(codes.size()); auto ret = [f, codes, num_args](DGLArgs args, DGLRetValue* ret) { TempArray pack_(num_args); int32_t* pack = reinterpret_cast(pack_.data()); int32_t* ptr = pack; static_assert(sizeof(DGLValue) == 8, "invariant"); static_assert(sizeof(void*) % sizeof(int32_t) == 0, "invariant"); for (int i = 0; i < num_args; ++i) { switch (codes[i]) { case HANDLE_TO_HANDLE: { std::memcpy(ptr, &(args.values[i].v_handle), sizeof(void*)); ptr += sizeof(void*) / sizeof(int32_t); break; } case INT64_TO_INT64: case FLOAT64_TO_FLOAT64: { std::memcpy(ptr, &args.values[i], sizeof(DGLValue)); ptr += 2; break; } case INT64_TO_INT32: { *ptr = static_cast(args.values[i].v_int64); ++ptr; break; } case INT64_TO_UINT32: { *reinterpret_cast(ptr) = static_cast(args.values[i].v_int64); ++ptr; break; } case FLOAT64_TO_FLOAT32: { *reinterpret_cast(ptr) = static_cast(args.values[i].v_float64); ++ptr; break; } default: { LOG(FATAL) << "not reached"; break; } } } f(args, ret, pack, (ptr - pack) * sizeof(int32_t)); }; return PackedFunc(ret); } } // namespace detail template inline PackedFunc PackFuncVoidAddr( F f, const std::vector& arg_types) { std::vector codes(arg_types.size()); for (size_t i = 0; i < arg_types.size(); ++i) { codes[i] = detail::GetArgConvertCode(arg_types[i]); } size_t num_void_args = arg_types.size(); // specialization if (num_void_args <= 4) { return detail::PackFuncVoidAddr_<4>(f, codes); } else if (num_void_args <= 8) { return detail::PackFuncVoidAddr_<8>(f, codes); } else { return detail::PackFuncVoidAddr_<0>(f, codes); } } inline size_t NumBufferArgs(const std::vector& arg_types) { size_t base = arg_types.size(); for (size_t i = 0; i < arg_types.size(); ++i) { if (arg_types[i].code != kHandle) { base = i; break; } } for (size_t i = base; i < arg_types.size(); ++i) { CHECK(arg_types[i].code != kHandle) << "Device function need to be organized"; } return base; } template inline PackedFunc PackFuncNonBufferArg( F f, const std::vector& arg_types) { size_t num_buffer = NumBufferArgs(arg_types); std::vector codes; for (size_t i = num_buffer; i < arg_types.size(); ++i) { codes.push_back(detail::GetArgConvertCode(arg_types[i])); } int base = static_cast(num_buffer); size_t nargs = codes.size(); // specialization if (nargs <= 4) { return detail::PackFuncNonBufferArg_<4>(f, base, codes); } else { return detail::PackFuncNonBufferArg_<0>(f, base, codes); } } template inline PackedFunc PackFuncPackedArg( F f, const std::vector& arg_types) { std::vector codes; for (size_t i = 0; i < arg_types.size(); ++i) { codes.push_back(detail::GetArgConvertCode(arg_types[i])); } size_t nargs = codes.size(); // specialization if (nargs <= 4) { return detail::PackFuncPackedArg_<4>(f, codes); } else { return detail::PackFuncPackedArg_<0>(f, codes); } } } // namespace runtime } // namespace dgl #endif // DGL_RUNTIME_PACK_ARGS_H_ ================================================ FILE: src/runtime/parallel_for.cpp ================================================ /** * Copyright (c) 2016 by Contributors * Implementation of C API (reference: tvm/src/api/c_api.cc) * @file c_api.cc */ namespace dgl { namespace runtime { DefaultGrainSizeT default_grain_size; } // namespace runtime } // namespace dgl ================================================ FILE: src/runtime/registry.cc ================================================ /** * Copyright (c) 2017 by Contributors * @file registry.cc * @brief The global registry of packed function. */ #include #include #include #include #include #include #include #include "runtime_base.h" namespace dgl { namespace runtime { struct Registry::Manager { // map storing the functions. // We delibrately used raw pointer // This is because PackedFunc can contain callbacks into the host // languge(python) and the resource can become invalid because of // indeterminstic order of destruction. The resources will only be recycled // during program exit. std::unordered_map fmap; // vtable for extension type std::array ext_vtable; // mutex std::mutex mutex; Manager() { for (auto& x : ext_vtable) { x.destroy = nullptr; } } static Manager* Global() { static Manager inst; return &inst; } }; Registry& Registry::set_body(PackedFunc f) { // NOLINT(*) func_ = f; return *this; } Registry& Registry::Register( const std::string& name, bool override) { // NOLINT(*) Manager* m = Manager::Global(); std::lock_guard lock(m->mutex); auto it = m->fmap.find(name); if (it == m->fmap.end()) { Registry* r = new Registry(); r->name_ = name; m->fmap[name] = r; return *r; } else { CHECK(override) << "Global PackedFunc " << name << " is already registered"; return *it->second; } } bool Registry::Remove(const std::string& name) { Manager* m = Manager::Global(); std::lock_guard lock(m->mutex); auto it = m->fmap.find(name); if (it == m->fmap.end()) return false; m->fmap.erase(it); return true; } const PackedFunc* Registry::Get(const std::string& name) { Manager* m = Manager::Global(); std::lock_guard lock(m->mutex); auto it = m->fmap.find(name); if (it == m->fmap.end()) return nullptr; return &(it->second->func_); } std::vector Registry::ListNames() { Manager* m = Manager::Global(); std::lock_guard lock(m->mutex); std::vector keys; keys.reserve(m->fmap.size()); for (const auto& kv : m->fmap) { keys.push_back(kv.first); } return keys; } ExtTypeVTable* ExtTypeVTable::Get(int type_code) { CHECK(type_code > kExtBegin && type_code < kExtEnd); Registry::Manager* m = Registry::Manager::Global(); ExtTypeVTable* vt = &(m->ext_vtable[type_code]); CHECK(vt->destroy != nullptr) << "Extension type not registered"; return vt; } ExtTypeVTable* ExtTypeVTable::RegisterInternal( int type_code, const ExtTypeVTable& vt) { CHECK(type_code > kExtBegin && type_code < kExtEnd); Registry::Manager* m = Registry::Manager::Global(); std::lock_guard lock(m->mutex); ExtTypeVTable* pvt = &(m->ext_vtable[type_code]); pvt[0] = vt; return pvt; } } // namespace runtime } // namespace dgl /** @brief entry to to easily hold returning information */ struct DGLFuncThreadLocalEntry { /** @brief result holder for returning strings */ std::vector ret_vec_str; /** @brief result holder for returning string pointers */ std::vector ret_vec_charp; }; /** @brief Thread local store that can be used to hold return values. */ typedef dmlc::ThreadLocalStore DGLFuncThreadLocalStore; int DGLExtTypeFree(void* handle, int type_code) { API_BEGIN(); dgl::runtime::ExtTypeVTable::Get(type_code)->destroy(handle); API_END(); } int DGLFuncRegisterGlobal(const char* name, DGLFunctionHandle f, int override) { API_BEGIN(); dgl::runtime::Registry::Register(name, override != 0) .set_body(*static_cast(f)); API_END(); } int DGLFuncGetGlobal(const char* name, DGLFunctionHandle* out) { API_BEGIN(); const dgl::runtime::PackedFunc* fp = dgl::runtime::Registry::Get(name); if (fp != nullptr) { *out = new dgl::runtime::PackedFunc(*fp); // NOLINT(*) } else { *out = nullptr; } API_END(); } int DGLFuncListGlobalNames(int* out_size, const char*** out_array) { API_BEGIN(); DGLFuncThreadLocalEntry* ret = DGLFuncThreadLocalStore::Get(); ret->ret_vec_str = dgl::runtime::Registry::ListNames(); ret->ret_vec_charp.clear(); for (size_t i = 0; i < ret->ret_vec_str.size(); ++i) { ret->ret_vec_charp.push_back(ret->ret_vec_str[i].c_str()); } *out_array = dmlc::BeginPtr(ret->ret_vec_charp); *out_size = static_cast(ret->ret_vec_str.size()); API_END(); } ================================================ FILE: src/runtime/resource_manager.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file resource_manager.cc * @brief Manage the resources. */ #include "resource_manager.h" #include #include namespace dgl { namespace runtime { /** * The runtime allocates resources during the computation. Some of the resources * cannot be destroyed after the process exits especially when the process * doesn't exits normally. We need to keep track of the resources in the system * and clean them up properly. */ class ResourceManager { std::unordered_map> resources; public: void Add(const std::string &key, std::shared_ptr resource) { auto it = resources.find(key); CHECK(it == resources.end()) << key << " already exists"; resources.insert( std::pair>(key, resource)); } void Erase(const std::string &key) { resources.erase(key); } void Cleanup() { for (auto it = resources.begin(); it != resources.end(); it++) { it->second->Destroy(); } resources.clear(); } }; static ResourceManager manager; void AddResource(const std::string &key, std::shared_ptr resource) { manager.Add(key, resource); } void DeleteResource(const std::string &key) { manager.Erase(key); } void CleanupResources() { manager.Cleanup(); } } // namespace runtime } // namespace dgl ================================================ FILE: src/runtime/resource_manager.h ================================================ /** * Copyright (c) 2020 by Contributors * @file resource_manager.h * @brief Manage the resources in the runtime system. */ #ifndef DGL_RUNTIME_RESOURCE_MANAGER_H_ #define DGL_RUNTIME_RESOURCE_MANAGER_H_ #include #include #include namespace dgl { namespace runtime { /** * A class that provides the interface to describe a resource that can be * managed by a resource manager. Some of the resources cannot be free'd * automatically when the process exits, especially when the process doesn't * exit normally. One example is shared memory. We can keep track of this kind * of resources and manage them properly. */ class Resource { public: virtual ~Resource() {} virtual void Destroy() = 0; }; // Add resource. void AddResource(const std::string &key, std::shared_ptr resource); // Delete resource. void DeleteResource(const std::string &key); // Clean up all resources. void CleanupResources(); } // namespace runtime } // namespace dgl #endif // DGL_RUNTIME_RESOURCE_MANAGER_H_ ================================================ FILE: src/runtime/runtime_base.h ================================================ /** * Copyright (c) 2016 by Contributors * @file runtime_base.h * @brief Base of all C APIs */ #ifndef DGL_RUNTIME_RUNTIME_BASE_H_ #define DGL_RUNTIME_RUNTIME_BASE_H_ #include #include /** @brief macro to guard beginning and end section of all functions */ #define API_BEGIN() try { /** @brief every function starts with API_BEGIN(); and finishes with API_END() or API_END_HANDLE_ERROR */ #define API_END() \ } \ catch (std::runtime_error & _except_) { \ return DGLAPIHandleException(_except_); \ } \ return 0; // NOLINT(*) /** * @brief every function starts with API_BEGIN(); * and finishes with API_END() or API_END_HANDLE_ERROR * The finally clause contains procedure to cleanup states when an error * happens. */ #define API_END_HANDLE_ERROR(Finalize) \ } \ catch (std::runtime_error & _except_) { \ Finalize; \ return DGLAPIHandleException(_except_); \ } \ return 0; // NOLINT(*) /** * @brief handle exception throwed out * @param e the exception * @return the return value of API after exception is handled */ inline int DGLAPIHandleException(const std::runtime_error &e) { DGLAPISetLastError(e.what()); return -1; } #endif // DGL_RUNTIME_RUNTIME_BASE_H_ ================================================ FILE: src/runtime/semaphore_wrapper.cc ================================================ /** * Copyright (c) 2021 by Contributors * @file semaphore_wrapper.cc * @brief A simple corss platform semaphore wrapper */ #include "semaphore_wrapper.h" #include #ifndef _WIN32 #include #include #include #endif namespace dgl { namespace runtime { #ifdef _WIN32 Semaphore::Semaphore() { sem_ = CreateSemaphore(nullptr, 0, INT_MAX, nullptr); if (!sem_) { LOG(FATAL) << "Cannot create semaphore"; } } void Semaphore::Wait() { WaitForSingleObject(sem_, INFINITE); } bool Semaphore::TimedWait(int) { // Timed wait is not supported on WIN32. Wait(); return true; } void Semaphore::Post() { ReleaseSemaphore(sem_, 1, nullptr); } #else Semaphore::Semaphore() { sem_init(&sem_, 0, 0); } void Semaphore::Wait() { sem_wait(&sem_); } bool Semaphore::TimedWait(int timeout) { // sem_timedwait does not exist in Mac OS. #ifdef __APPLE__ DLOG(WARNING) << "Timeout is not supported in semaphore's wait on Mac OS."; Wait(); #else // zero timeout means wait infinitely if (timeout == 0) { DLOG(WARNING) << "Will wait infinitely on semaphore until posted."; Wait(); return true; } timespec ts; if (clock_gettime(CLOCK_REALTIME, &ts) != 0) { LOG(ERROR) << "Failed to get current time via clock_gettime. Errno: " << errno; return false; } ts.tv_sec += timeout / MILLISECONDS_PER_SECOND; ts.tv_nsec += (timeout % MILLISECONDS_PER_SECOND) * NANOSECONDS_PER_MILLISECOND; if (ts.tv_nsec >= NANOSECONDS_PER_SECOND) { ts.tv_nsec -= NANOSECONDS_PER_SECOND; ++ts.tv_sec; } int ret = 0; while ((ret = sem_timedwait(&sem_, &ts) != 0) && errno == EINTR) { continue; } if (ret != 0) { if (errno == ETIMEDOUT) { DLOG(WARNING) << "sem_timedwait timed out after " << timeout << " milliseconds."; } else { LOG(ERROR) << "sem_timedwait returns unexpectedly. Errno: " << errno; } return false; } #endif return true; } void Semaphore::Post() { sem_post(&sem_); } #endif } // namespace runtime } // namespace dgl ================================================ FILE: src/runtime/semaphore_wrapper.h ================================================ /** * Copyright (c) 2021 by Contributors * @file semaphore_wrapper.h * @brief A simple corss platform semaphore wrapper */ #ifndef DGL_RUNTIME_SEMAPHORE_WRAPPER_H_ #define DGL_RUNTIME_SEMAPHORE_WRAPPER_H_ #ifdef _WIN32 #include #else #include #endif namespace dgl { namespace runtime { /** * @brief A simple crossplatform Semaphore wrapper */ class Semaphore { public: /** * @brief Semaphore constructor */ Semaphore(); /** * @brief blocking wait, decrease semaphore by 1 */ void Wait(); /** * @brief timed wait, decrease semaphore by 1 or returns if times out * @param timeout The timeout value in milliseconds. If zero, wait * indefinitely. */ bool TimedWait(int timeout); /** * @brief increase semaphore by 1 */ void Post(); private: #ifdef _WIN32 HANDLE sem_; #else sem_t sem_; #endif enum { MILLISECONDS_PER_SECOND = 1000, NANOSECONDS_PER_MILLISECOND = 1000 * 1000, NANOSECONDS_PER_SECOND = 1000 * 1000 * 1000 }; }; } // namespace runtime } // namespace dgl #endif // DGL_RUNTIME_SEMAPHORE_WRAPPER_H_ ================================================ FILE: src/runtime/shared_mem.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file shared_mem.cc * @brief Shared memory management. */ #ifndef _WIN32 #include #include #include #endif #include #include #include #include #include "resource_manager.h" namespace dgl { namespace runtime { /** * Shared memory is a resource that cannot be cleaned up if the process doesn't * exit normally. We'll manage the resource with ResourceManager. */ class SharedMemoryResource : public Resource { std::string name; public: explicit SharedMemoryResource(const std::string &name) { this->name = name; } void Destroy() { // LOG(INFO) << "remove " << name << " for shared memory"; #ifndef _WIN32 shm_unlink(name.c_str()); #else // _WIN32 // NOTHING; Windows automatically removes the shared memory object once all // handles are unmapped. #endif } }; SharedMemory::SharedMemory(const std::string &name) { this->name = name; this->own_ = false; #ifndef _WIN32 this->fd_ = -1; #else this->handle_ = nullptr; #endif this->ptr_ = nullptr; this->size_ = 0; } SharedMemory::~SharedMemory() { #ifndef _WIN32 if (ptr_ && size_ != 0) CHECK(munmap(ptr_, size_) != -1) << strerror(errno); if (fd_ != -1) close(fd_); if (own_) { // LOG(INFO) << "remove " << name << " for shared memory"; if (name != "") { shm_unlink(name.c_str()); // The resource has been deleted. We don't need to keep track of it any // more. DeleteResource(name); } } #else if (ptr_) CHECK(UnmapViewOfFile(ptr_)) << "Win32 Error: " << GetLastError(); if (handle_) CloseHandle(handle_); // Windows do not need a separate shm_unlink step. #endif // _WIN32 } void *SharedMemory::CreateNew(size_t sz) { #ifndef _WIN32 this->own_ = true; // We need to create a shared-memory file. // TODO(zhengda) we need to report error if the shared-memory file exists. int flag = O_RDWR | O_CREAT; fd_ = shm_open(name.c_str(), flag, S_IRUSR | S_IWUSR); CHECK_NE(fd_, -1) << "fail to open " << name << ": " << strerror(errno); // Shared memory cannot be deleted if the process exits abnormally in Linux. AddResource(name, std::shared_ptr(new SharedMemoryResource(name))); auto res = ftruncate(fd_, sz); CHECK_NE(res, -1) << "Failed to truncate the file. " << strerror(errno); ptr_ = mmap(NULL, sz, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0); CHECK_NE(ptr_, MAP_FAILED) << "Failed to map shared memory. mmap failed with error " << strerror(errno); this->size_ = sz; return ptr_; #else handle_ = CreateFileMapping( INVALID_HANDLE_VALUE, nullptr, PAGE_READWRITE, static_cast(sz >> 32), static_cast(sz & 0xFFFFFFFF), name.c_str()); CHECK(handle_ != nullptr) << "fail to open " << name << ", Win32 error: " << GetLastError(); ptr_ = MapViewOfFile(handle_, FILE_MAP_ALL_ACCESS, 0, 0, sz); if (ptr_ == nullptr) { LOG(FATAL) << "Memory mapping failed, Win32 error: " << GetLastError(); CloseHandle(handle_); return nullptr; } this->size_ = sz; return ptr_; #endif // _WIN32 } void *SharedMemory::Open(size_t sz) { #ifndef _WIN32 int flag = O_RDWR; fd_ = shm_open(name.c_str(), flag, S_IRUSR | S_IWUSR); CHECK_NE(fd_, -1) << "fail to open " << name << ": " << strerror(errno); ptr_ = mmap(NULL, sz, PROT_READ | PROT_WRITE, MAP_SHARED, fd_, 0); CHECK_NE(ptr_, MAP_FAILED) << "Failed to map shared memory. mmap failed with error " << strerror(errno); this->size_ = sz; return ptr_; #else handle_ = OpenFileMapping(FILE_MAP_ALL_ACCESS, FALSE, name.c_str()); CHECK(handle_ != nullptr) << "fail to open " << name << ", Win32 Error: " << GetLastError(); ptr_ = MapViewOfFile(handle_, FILE_MAP_ALL_ACCESS, 0, 0, sz); if (ptr_ == nullptr) { LOG(FATAL) << "Memory mapping failed, Win32 error: " << GetLastError(); CloseHandle(handle_); return nullptr; } this->size_ = sz; return ptr_; #endif // _WIN32 } bool SharedMemory::Exist(const std::string &name) { #ifndef _WIN32 int fd = shm_open(name.c_str(), O_RDONLY, S_IRUSR | S_IWUSR); if (fd >= 0) { close(fd); return true; } else { return false; } #else HANDLE handle = OpenFileMapping(FILE_MAP_ALL_ACCESS, FALSE, name.c_str()); if (handle != nullptr) { CloseHandle(handle); return true; } else { return false; } #endif // _WIN32 } } // namespace runtime } // namespace dgl ================================================ FILE: src/runtime/system_lib_module.cc ================================================ /** * Copyright (c) 2017 by Contributors * @file system_lib_module.cc * @brief SystemLib module. */ #include #include #include #include "module_util.h" namespace dgl { namespace runtime { class SystemLibModuleNode : public ModuleNode { public: SystemLibModuleNode() = default; const char* type_key() const final { return "system_lib"; } PackedFunc GetFunction( const std::string& name, const std::shared_ptr& sptr_to_self) final { std::lock_guard lock(mutex_); if (module_blob_ != nullptr) { // If we previously recorded submodules, load them now. ImportModuleBlob(reinterpret_cast(module_blob_), &imports_); module_blob_ = nullptr; } auto it = tbl_.find(name); if (it != tbl_.end()) { return WrapPackedFunc( reinterpret_cast(it->second), sptr_to_self); } else { return PackedFunc(); } } void RegisterSymbol(const std::string& name, void* ptr) { std::lock_guard lock(mutex_); if (name == symbol::dgl_module_ctx) { void** ctx_addr = reinterpret_cast(ptr); *ctx_addr = this; } else if (name == symbol::dgl_dev_mblob) { // Record pointer to content of submodules to be loaded. // We defer loading submodules to the first call to GetFunction(). // The reason is that RegisterSymbol() gets called when initializing the // syslib (i.e. library loading time), and the registeries aren't ready // yet. Therefore, we might not have the functionality to load submodules // now. CHECK(module_blob_ == nullptr) << "Resetting mobule blob?"; module_blob_ = ptr; } else { auto it = tbl_.find(name); if (it != tbl_.end() && ptr != it->second) { LOG(WARNING) << "SystemLib symbol " << name << " get overriden to a different address " << ptr << "->" << it->second; } tbl_[name] = ptr; } } static const std::shared_ptr& Global() { static std::shared_ptr inst = std::make_shared(); return inst; } private: // Internal mutex std::mutex mutex_; // Internal symbol table std::unordered_map tbl_; // Module blob to be imported void* module_blob_{nullptr}; }; DGL_REGISTER_GLOBAL("module._GetSystemLib") .set_body([](DGLArgs args, DGLRetValue* rv) { *rv = runtime::Module(SystemLibModuleNode::Global()); }); } // namespace runtime } // namespace dgl int DGLBackendRegisterSystemLibSymbol(const char* name, void* ptr) { dgl::runtime::SystemLibModuleNode::Global()->RegisterSymbol(name, ptr); return 0; } ================================================ FILE: src/runtime/tensordispatch.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file runtime/tensordispatch.cc * @brief Adapter library caller */ #include #include #include #if defined(WIN32) || defined(_WIN32) #include #else // !WIN32 #include #endif // WIN32 #include namespace dgl { namespace runtime { constexpr const char *TensorDispatcher::names_[]; bool TensorDispatcher::Load(const char *path) { CHECK(!available_) << "The tensor adapter can only load once."; if (path == nullptr || strlen(path) == 0) // does not have dispatcher library; all operators fall back to DGL's // implementation return false; #if defined(WIN32) || defined(_WIN32) handle_ = LoadLibrary(path); if (!handle_) return false; for (int i = 0; i < num_entries_; ++i) { entrypoints_[i] = reinterpret_cast(GetProcAddress(handle_, names_[i])); CHECK(entrypoints_[i]) << "cannot locate symbol " << names_[i]; } #else // !WIN32 handle_ = dlopen(path, RTLD_LAZY); if (!handle_) { DLOG(WARNING) << "Could not open file: " << dlerror() << ". This does not affect DGL's but might impact its performance."; return false; } for (int i = 0; i < num_entries_; ++i) { entrypoints_[i] = dlsym(handle_, names_[i]); CHECK(entrypoints_[i]) << "cannot locate symbol " << names_[i]; } #endif // WIN32 available_ = true; return true; } TensorDispatcher::~TensorDispatcher() { if (handle_) { #if defined(WIN32) || defined(_WIN32) FreeLibrary(handle_); #else // !WIN32 dlclose(handle_); #endif // WIN32 } } }; // namespace runtime }; // namespace dgl ================================================ FILE: src/runtime/thread_pool.cc ================================================ /** * Copyright (c) 2017 by Contributors * @file thread_pool.cc * @brief Threadpool for multi-threading runtime. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include const constexpr int kL1CacheBytes = 64; namespace dgl { namespace runtime { // stride in the page, fit to cache line. constexpr int kSyncStride = 64 / sizeof(std::atomic); /** * @brief Thread local master environment. */ class ParallelLauncher { public: // Reset the the task request. void Init( FDGLParallelLambda flambda, void* cdata, int num_task, bool need_sync) { num_pending_.store(num_task); this->cdata = cdata; this->flambda = flambda; this->env.num_task = num_task; has_error_.store(false); // reshape if (static_cast(num_task) > par_errors_.size()) { par_errors_.resize(num_task + 1); if (need_sync) { delete[] sync_counter_; sync_counter_ = new std::atomic[num_task * kSyncStride]; } } if (need_sync) { for (int i = 0; i < num_task; ++i) { sync_counter_[i * kSyncStride].store(0, std::memory_order_relaxed); } this->env.sync_handle = sync_counter_; } else { this->env.sync_handle = nullptr; } } ~ParallelLauncher() { delete[] sync_counter_; } // Wait n jobs to finish int WaitForJobs() { while (num_pending_.load() != 0) { dgl::runtime::threading::YieldThread(); } if (!has_error_.load()) return 0; // the following is intended to use string due to // security issue raised in SGX backend std::string err(""); for (size_t i = 0; i < par_errors_.size(); ++i) { if (par_errors_[i].length() != 0) { err += "Task " + std::to_string(i) + " error: " + par_errors_[i] + '\n'; par_errors_[i].clear(); } } DGLAPISetLastError(err.c_str()); return -1; } // Signal that one job has finished. void SignalJobError(int task_id) { num_pending_.fetch_sub(1); par_errors_[task_id] = DGLGetLastError(); has_error_.store(true); } // Signal that one job has finished. void SignalJobFinish() { num_pending_.fetch_sub(1); } // Get thread local version of the store. static ParallelLauncher* ThreadLocal() { return dmlc::ThreadLocalStore::Get(); } // The parallel lambda FDGLParallelLambda flambda; // The closure data void* cdata; // Local env DGLParallelGroupEnv env; // Whether this thread is worker of the pool. // used to prevent recursive launch. bool is_worker{false}; private: // The pending jobs. std::atomic num_pending_; // Whether error has been countered. std::atomic has_error_; // The counter page. std::atomic* sync_counter_{nullptr}; // The error message std::vector par_errors_; }; /** @brief Lock-free single-producer-single-consumer queue for each thread */ class SpscTaskQueue { public: /** @brief The task entry */ struct Task { ParallelLauncher* launcher; int32_t task_id; }; SpscTaskQueue() : buffer_(new Task[kRingSize]), head_(0), tail_(0) {} ~SpscTaskQueue() { delete[] buffer_; } /** * @brief Push a task into the queue and notify the comsumer if it is on wait. * @param input The task to be dequeued. */ void Push(const Task& input) { while (!Enqueue(input)) { dgl::runtime::threading::YieldThread(); } if (pending_.fetch_add(1) == -1) { std::unique_lock lock(mutex_); cv_.notify_one(); } } /** * @brief Pop a task out of the queue and condition wait if no tasks. * @param output The pointer to the task to be dequeued. * @param spin_count The number of iterations to spin before sleep. * @return Whether pop is successful (true) or we need to exit now (false). */ bool Pop(Task* output, uint32_t spin_count = 300000) { // Busy wait a bit when the queue is empty. // If a new task comes to the queue quickly, this wait avoid the worker from // sleeping. The default spin count is set by following the typical omp // convention for (uint32_t i = 0; i < spin_count && pending_.load() == 0; ++i) { dgl::runtime::threading::YieldThread(); } if (pending_.fetch_sub(1) == 0) { std::unique_lock lock(mutex_); cv_.wait( lock, [this] { return pending_.load() >= 0 || exit_now_.load(); }); } if (exit_now_.load(std::memory_order_relaxed)) { return false; } const uint32_t head = head_.load(std::memory_order_relaxed); // sanity check if the queue is empty CHECK(tail_.load(std::memory_order_acquire) != head); *output = buffer_[head]; head_.store((head + 1) % kRingSize, std::memory_order_release); return true; } /** * @brief Signal to terminate the worker. */ void SignalForKill() { std::lock_guard lock(mutex_); exit_now_.store(true); cv_.notify_all(); } protected: /** * @brief Lock-free enqueue. * @param input The task to be enqueued. * @return Whether the task is enqueued. */ bool Enqueue(const Task& input) { if (exit_now_.load(std::memory_order_relaxed)) return false; const uint32_t tail = tail_.load(std::memory_order_relaxed); if ((tail + 1) % kRingSize != (head_.load(std::memory_order_acquire))) { buffer_[tail] = input; tail_.store((tail + 1) % kRingSize, std::memory_order_release); return true; } return false; } // the cache line paddings are used for avoid false sharing between atomic // variables typedef char cache_line_pad_t[kL1CacheBytes]; cache_line_pad_t pad0_; // size of the queue, the queue can host size_ - 1 items at most // define it as a constant for better compiler optimization static constexpr const int kRingSize = 2; // pointer to access the item Task* const buffer_; cache_line_pad_t pad1_; // queue head, where one gets a task from the queue std::atomic head_; cache_line_pad_t pad2_; // queue tail, when one puts a task to the queue std::atomic tail_; cache_line_pad_t pad3_; // pending tasks in the queue std::atomic pending_{0}; cache_line_pad_t pad4_; // signal for exit now std::atomic exit_now_{false}; // internal mutex std::mutex mutex_; // cv for consumer std::condition_variable cv_; }; // The thread pool class ThreadPool { public: ThreadPool() : num_workers_(dgl::runtime::threading::MaxConcurrency()) { for (int i = 0; i < num_workers_; ++i) { // The SpscTaskQueue only hosts ONE item at a time queues_.emplace_back(std::unique_ptr(new SpscTaskQueue())); } threads_ = std::unique_ptr( new dgl::runtime::threading::ThreadGroup( num_workers_, [this](int worker_id) { this->RunWorker(worker_id); }, exclude_worker0_ /* include_main_thread */)); num_workers_used_ = threads_->Configure(threading::ThreadGroup::kBig, 0, exclude_worker0_); } ~ThreadPool() { for (std::unique_ptr& q : queues_) { q->SignalForKill(); } threads_.reset(); } int Launch( FDGLParallelLambda flambda, void* cdata, int num_task, int need_sync) { ParallelLauncher* launcher = ParallelLauncher::ThreadLocal(); CHECK(!launcher->is_worker) << "Cannot launch parallel job inside worker, " "consider fuse then parallel"; if (num_task == 0) { num_task = num_workers_used_; } if (need_sync != 0) { CHECK_LE(num_task, num_workers_used_) << "Request parallel sync task larger than number of threads used " << " workers=" << num_workers_used_ << " request=" << num_task; } launcher->Init(flambda, cdata, num_task, need_sync != 0); SpscTaskQueue::Task tsk; tsk.launcher = launcher; // if worker0 is taken by the master, queues_[0] is abandoned for (int i = exclude_worker0_; i < num_task; ++i) { tsk.task_id = i; queues_[i]->Push(tsk); } // use the master thread to run task 0 if (exclude_worker0_) { DGLParallelGroupEnv* penv = &(tsk.launcher->env); if ((*tsk.launcher->flambda)(0, penv, cdata) == 0) { tsk.launcher->SignalJobFinish(); } else { tsk.launcher->SignalJobError(tsk.task_id); } } int res = launcher->WaitForJobs(); return res; } static ThreadPool* ThreadLocal() { return dmlc::ThreadLocalStore::Get(); } void UpdateWorkerConfiguration( threading::ThreadGroup::AffinityMode mode, int nthreads) { // this will also reset the affinity of the ThreadGroup // may use less than the MaxConcurrency number of workers num_workers_used_ = threads_->Configure(mode, nthreads, exclude_worker0_); // if MaxConcurrency restricted the number of workers (e.g., due to // hyperthreading), respect the restriction num_workers_used_ = std::min(num_workers_, num_workers_used_); } private: // Internal worker function. void RunWorker(int worker_id) { SpscTaskQueue* queue = queues_[worker_id].get(); SpscTaskQueue::Task task; ParallelLauncher::ThreadLocal()->is_worker = true; while (queue->Pop(&task)) { CHECK(task.launcher != nullptr); DGLParallelGroupEnv* penv = &(task.launcher->env); void* cdata = task.launcher->cdata; if ((*task.launcher->flambda)(task.task_id, penv, cdata) == 0) { task.launcher->SignalJobFinish(); } else { task.launcher->SignalJobError(task.task_id); } } } int num_workers_; // number of workers used (can be restricted with affinity pref) int num_workers_used_; // if excluding worker 0 and using master to run task 0 #ifndef _LIBCPP_SGX_CONFIG bool exclude_worker0_{true}; #else bool exclude_worker0_{false}; #endif std::vector > queues_; std::unique_ptr threads_; }; DGL_REGISTER_GLOBAL("runtime.config_threadpool") .set_body([](DGLArgs args, DGLRetValue* rv) { threading::ThreadGroup::AffinityMode mode = static_cast( static_cast(args[0])); int nthreads = args[1]; ThreadPool::ThreadLocal()->UpdateWorkerConfiguration(mode, nthreads); }); } // namespace runtime } // namespace dgl int DGLBackendParallelLaunch( FDGLParallelLambda flambda, void* cdata, int num_task) { int res = dgl::runtime::ThreadPool::ThreadLocal()->Launch( flambda, cdata, num_task, 1); return res; } int DGLBackendParallelBarrier(int task_id, DGLParallelGroupEnv* penv) { using dgl::runtime::kSyncStride; int num_task = penv->num_task; std::atomic* sync_counter = reinterpret_cast*>(penv->sync_handle); int old_counter = sync_counter[task_id * kSyncStride].fetch_add( 1, std::memory_order_release); for (int i = 0; i < num_task; ++i) { if (i != task_id) { while (sync_counter[i * kSyncStride].load(std::memory_order_relaxed) <= old_counter) { dgl::runtime::threading::YieldThread(); } } } std::atomic_thread_fence(std::memory_order_acquire); return 0; } ================================================ FILE: src/runtime/thread_storage_scope.h ================================================ /** * Copyright (c) 2017 by Contributors * @file thread_storage_scope.h * @brief Extract thread axis configuration from DGLArgs. */ #ifndef DGL_RUNTIME_THREAD_STORAGE_SCOPE_H_ #define DGL_RUNTIME_THREAD_STORAGE_SCOPE_H_ #include #include #include namespace dgl { namespace runtime { /** * @brief Memory hierachy rank in the storage system * @note The global rank and shared rank have one to one * correspondence to the thread rank. */ enum class StorageRank { /** @brief global memory */ kGlobal = 0, /** @brief shared memory among thread group */ kShared = 1, /** * @brief reserved for warp memory. * This is only used by programming model. * There is no such memory usually in GPU. * Instead, we can simulate it by registers and shuffle. */ kWarp = 2, /** @brief thread local memory */ kLocal = 3 }; /** * @param thread_scope_rank The thread scope rank * @return default storage rank given the thread scope */ inline StorageRank DefaultStorageRank(int thread_scope_rank) { switch (thread_scope_rank) { case -1: return StorageRank::kGlobal; case 0: return StorageRank::kShared; case 1: return StorageRank::kLocal; default: { LOG(FATAL) << "unknown rank"; return StorageRank::kGlobal; } } } /** @brief class to represent storage scope */ struct StorageScope { /** @brief The rank of the storage */ StorageRank rank{StorageRank::kGlobal}; /** @brief tag for special purpose memory. */ std::string tag; // comparator inline bool operator==(const StorageScope& other) const { return rank == other.rank && tag == other.tag; } inline bool operator!=(const StorageScope& other) const { return !(*this == other); } inline std::string to_string() const { std::string ret; switch (rank) { case StorageRank::kGlobal: return "global" + tag; case StorageRank::kShared: return "shared" + tag; case StorageRank::kWarp: return "warp" + tag; case StorageRank::kLocal: return "local" + tag; default: LOG(FATAL) << "unknown storage scope"; return ""; } } /** * @brief make storage scope from string * @param s The string to be parsed. * @return The storage scope. */ static StorageScope make(const std::string& s) { StorageScope r; if (s.compare(0, 6, "global") == 0) { r.rank = StorageRank::kGlobal; r.tag = s.substr(6, std::string::npos); } else if (s.compare(0, 6, "shared") == 0) { r.rank = StorageRank::kShared; r.tag = s.substr(6, std::string::npos); } else if (s.compare(0, 4, "warp") == 0) { r.rank = StorageRank::kWarp; r.tag = s.substr(4, std::string::npos); } else if (s.compare(0, 5, "local") == 0) { r.rank = StorageRank::kLocal; r.tag = s.substr(5, std::string::npos); } else { LOG(FATAL) << "unknown storage scope " << s; } return r; } }; /** @brief class to represent thread scope */ struct ThreadScope { /** @brief The rank of thread scope */ int rank{0}; /** @brief the dimension index under the rank */ int dim_index{0}; /** * @brief make storage scope from string * @param s The string to be parsed. * @return The storage scope. */ static ThreadScope make(const std::string& s) { ThreadScope r; if (s == "vthread" || s == "cthread") { // virtual thread at the same level as local r.rank = 1; r.dim_index = -1; } else if (s.compare(0, 9, "blockIdx.") == 0) { r.rank = 0; r.dim_index = static_cast(s[9] - 'x'); } else if (s.compare(0, 10, "threadIdx.") == 0) { r.rank = 1; r.dim_index = static_cast(s[10] - 'x'); } else { LOG(FATAL) << "Unknown threadscope " << s; } return r; } }; /** @brief workload speccification */ struct ThreadWorkLoad { // array, first three are thread configuration. size_t work_size[6]; /** * @param i The block dimension. * @return i-th block dim */ inline size_t block_dim(size_t i) const { return work_size[i + 3]; } /** * @param i The grid dimension. * @return i-th grid dim */ inline size_t grid_dim(size_t i) const { return work_size[i]; } }; /** @brief Thread axis configuration */ class ThreadAxisConfig { public: void Init(size_t base, const std::vector& thread_axis_tags) { base_ = base; std::vector filled(6, false); for (size_t i = 0; i < thread_axis_tags.size(); ++i) { const std::string& tag = thread_axis_tags[i]; ThreadScope ts = ThreadScope::make(tag); arg_index_map_.push_back(ts.rank * 3 + ts.dim_index); filled[ts.rank * 3 + ts.dim_index] = true; } work_dim_ = 1; for (int i = 0; i < 3; ++i) { if (filled[i] || filled[i + 3]) { work_dim_ = i + 1; } } } // extract workload from arguments. ThreadWorkLoad Extract(DGLArgs x) const { ThreadWorkLoad w; std::fill(w.work_size, w.work_size + 6, 1); for (size_t i = 0; i < arg_index_map_.size(); ++i) { w.work_size[arg_index_map_[i]] = static_cast(x.values[base_ + i].v_int64); } return w; } // return the work dim size_t work_dim() const { return work_dim_; } private: /** @brief base axis */ size_t base_; /** @brief The worker dimension */ size_t work_dim_; /** @brief The index mapping. */ std::vector arg_index_map_; }; } // namespace runtime } // namespace dgl namespace std { template <> struct hash<::dgl::runtime::StorageScope> { std::size_t operator()(const ::dgl::runtime::StorageScope& k) const { return static_cast(k.rank); } }; } // namespace std #endif // DGL_RUNTIME_THREAD_STORAGE_SCOPE_H_ ================================================ FILE: src/runtime/threading_backend.cc ================================================ /** * Copyright (c) 2018 by Contributors * @file threading_backend.cc * @brief Native threading backend */ #include #include #include #include #if defined(__linux__) || defined(__ANDROID__) #include #else #endif #if defined(__linux__) #include #endif namespace dgl { namespace runtime { namespace threading { class ThreadGroup::Impl { public: Impl( int num_workers, std::function worker_callback, bool exclude_worker0) : num_workers_(num_workers) { CHECK_GE(num_workers, 1) << "Requested a non-positive number of worker threads."; for (int i = exclude_worker0; i < num_workers_; ++i) { threads_.emplace_back([worker_callback, i] { worker_callback(i); }); } InitSortedOrder(); } ~Impl() { Join(); } void Join() { for (auto &t : threads_) { if (t.joinable()) t.join(); } } int Configure(AffinityMode mode, int nthreads, bool exclude_worker0) { int num_workers_used = 0; if (mode == kLittle) { num_workers_used = little_count_; } else if (mode == kBig) { num_workers_used = big_count_; } else { // use default num_workers_used = threading::MaxConcurrency(); } // if a specific number was given, use that if (nthreads) { num_workers_used = nthreads; } // if MaxConcurrency restricted the number of workers (e.g., due to // hyperthreading), respect the restriction. On CPUs with N logical cores // and N/2 physical cores this will set affinity to the first N/2 logical // ones. num_workers_used = std::min(num_workers_, num_workers_used); const char *val = getenv("DGL_BIND_THREADS"); if (val == nullptr || atoi(val) == 1) { // Do not set affinity if there are more workers than found cores if (sorted_order_.size() >= static_cast(num_workers_)) { SetAffinity(exclude_worker0, mode == kLittle); } else { LOG(WARNING) << "The thread affinity cannot be set when the number of workers" << "is larger than the number of available cores in the system."; } } return num_workers_used; } private: // bind worker threads to disjoint cores // if worker 0 is offloaded to master, i.e. exclude_worker0 is true, // the master thread is bound to core 0. void SetAffinity(bool exclude_worker0, bool reverse = false) { #if defined(__ANDROID__) #ifndef CPU_SET #define CPU_SETSIZE 1024 #define __NCPUBITS (8 * sizeof(uint64_t)) typedef struct { uint64_t __bits[CPU_SETSIZE / __NCPUBITS]; } cpu_set_t; #define CPU_SET(cpu, cpusetp) \ ((cpusetp)->__bits[(cpu) / __NCPUBITS] |= (1UL << ((cpu) % __NCPUBITS))) #define CPU_ZERO(cpusetp) memset((cpusetp), 0, sizeof(cpu_set_t)) #endif #endif #if defined(__linux__) || defined(__ANDROID__) CHECK_GE(sorted_order_.size(), num_workers_); for (unsigned i = 0; i < threads_.size(); ++i) { unsigned core_id; if (reverse) { core_id = sorted_order_[sorted_order_.size() - (i + exclude_worker0) - 1]; } else { core_id = sorted_order_[i + exclude_worker0]; } cpu_set_t cpuset; CPU_ZERO(&cpuset); CPU_SET(core_id, &cpuset); #if defined(__ANDROID__) sched_setaffinity( threads_[i].native_handle(), sizeof(cpu_set_t), &cpuset); #else pthread_setaffinity_np( threads_[i].native_handle(), sizeof(cpu_set_t), &cpuset); #endif } if (exclude_worker0) { // bind the master thread to core 0 cpu_set_t cpuset; CPU_ZERO(&cpuset); if (reverse) { CPU_SET(sorted_order_[sorted_order_.size() - 1], &cpuset); } else { CPU_SET(sorted_order_[0], &cpuset); } #if defined(__ANDROID__) sched_setaffinity(pthread_self(), sizeof(cpu_set_t), &cpuset); #else pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); #endif } #endif } void InitSortedOrder() { unsigned int threads = std::thread::hardware_concurrency(); std::vector > max_freqs; for (unsigned int i = 0; i < threads; ++i) { int64_t cur_freq = 0; #if defined(__linux__) || defined(__ANDROID__) std::ostringstream filepath; filepath << "/sys/devices/system/cpu/cpu" << i << "/cpufreq/cpuinfo_max_freq"; std::ifstream ifs(filepath.str()); if (!ifs.fail()) { if (!(ifs >> cur_freq)) { cur_freq = -1; } ifs.close(); } #endif max_freqs.push_back(std::make_pair(i, cur_freq)); } auto fcmpbyfreq = [](const std::pair &a, const std::pair &b) { return a.second == b.second ? a.first < b.first : a.second > b.second; }; std::sort(max_freqs.begin(), max_freqs.end(), fcmpbyfreq); int64_t big_freq = max_freqs.begin()->second; int64_t little_freq = max_freqs.rbegin()->second; for (auto it = max_freqs.begin(); it != max_freqs.end(); it++) { sorted_order_.push_back(it->first); if (big_freq == it->second) { big_count_++; } if (big_freq != little_freq && little_freq == it->second) { little_count_++; } } if (big_count_ + little_count_ != static_cast(sorted_order_.size())) { LOG(WARNING) << "more than two frequencies detected!"; } } int num_workers_; std::vector threads_; std::vector sorted_order_; int big_count_ = 0; int little_count_ = 0; }; ThreadGroup::ThreadGroup( int num_workers, std::function worker_callback, bool exclude_worker0) : impl_(new ThreadGroup::Impl( num_workers, worker_callback, exclude_worker0)) {} ThreadGroup::~ThreadGroup() { delete impl_; } void ThreadGroup::Join() { impl_->Join(); } int ThreadGroup::Configure( AffinityMode mode, int nthreads, bool exclude_worker0) { return impl_->Configure(mode, nthreads, exclude_worker0); } void YieldThread() { std::this_thread::yield(); } int MaxConcurrency() { int max_concurrency = 1; const char *val = getenv("DGL_NUM_THREADS"); if (val == nullptr) { val = getenv("OMP_NUM_THREADS"); } if (val != nullptr) { max_concurrency = atoi(val); } else { max_concurrency = std::thread::hardware_concurrency(); #if defined(_M_X64) || defined(__x86_64__) max_concurrency /= 2; // ignore hyper-threading #endif } return std::max(max_concurrency, 1); } } // namespace threading } // namespace runtime } // namespace dgl ================================================ FILE: src/runtime/utils.cc ================================================ /** * Copyright (c) 2020 by Contributors * @file utils.cc * @brief DGL util functions */ #include #include #include #include #include "../array/array_op.h" #include "../c_api_common.h" using namespace dgl::runtime; using namespace dgl::aten::impl; namespace dgl { DGL_REGISTER_GLOBAL("utils.internal._CAPI_DGLSetOMPThreads") .set_body([](DGLArgs args, DGLRetValue* rv) { int num_threads = args[0]; omp_set_num_threads(num_threads); }); DGL_REGISTER_GLOBAL("utils.internal._CAPI_DGLGetOMPThreads") .set_body([](DGLArgs args, DGLRetValue* rv) { *rv = omp_get_max_threads(); }); DGL_REGISTER_GLOBAL("utils.checks._CAPI_DGLCOOIsSorted") .set_body([](DGLArgs args, DGLRetValue* rv) { IdArray src = args[0]; IdArray dst = args[1]; int64_t num_src = args[2]; int64_t num_dst = args[3]; bool row_sorted, col_sorted; std::tie(row_sorted, col_sorted) = COOIsSorted(aten::COOMatrix(num_src, num_dst, src, dst)); // make sure col_sorted is only true when row_sorted is true assert(!(!row_sorted && col_sorted)); // 0 for unosrted, 1 for row sorted, 2 for row and col sorted int64_t sorted_status = row_sorted + col_sorted; *rv = sorted_status; }); } // namespace dgl ================================================ FILE: src/runtime/workspace.h ================================================ /** * Copyright (c) 2021 by Contributors * @file ndarray_partition.h * @brief Operations on partition implemented in CUDA. */ #ifndef DGL_RUNTIME_WORKSPACE_H_ #define DGL_RUNTIME_WORKSPACE_H_ #include #include namespace dgl { namespace runtime { template class Workspace { public: Workspace(DeviceAPI* device, DGLContext ctx, const size_t size) : device_(device), ctx_(ctx), size_(size * sizeof(T)), ptr_(static_cast(device_->AllocWorkspace(ctx_, size_))) {} ~Workspace() { if (*this) { free(); } } operator bool() const { return ptr_ != nullptr; } T* get() { assert(size_ == 0 || *this); return ptr_; } T const* get() const { assert(size_ == 0 || *this); return ptr_; } void free() { assert(size_ == 0 || *this); device_->FreeWorkspace(ctx_, ptr_); ptr_ = nullptr; } private: DeviceAPI* device_; DGLContext ctx_; size_t size_; T* ptr_; }; template <> class Workspace { public: Workspace(DeviceAPI* device, DGLContext ctx, const size_t size) : device_(device), ctx_(ctx), size_(size), ptr_(static_cast(device_->AllocWorkspace(ctx_, size_))) {} ~Workspace() { if (*this) { free(); } } operator bool() const { return ptr_ != nullptr; } void* get() { assert(size_ == 0 || *this); return ptr_; } void const* get() const { assert(size_ == 0 || *this); return ptr_; } void free() { assert(size_ == 0 || *this); device_->FreeWorkspace(ctx_, ptr_); ptr_ = nullptr; } private: DeviceAPI* device_; DGLContext ctx_; size_t size_; void* ptr_; }; } // namespace runtime } // namespace dgl #endif // DGL_RUNTIME_WORKSPACE_H_ ================================================ FILE: src/runtime/workspace_pool.cc ================================================ /** * Copyright (c) 2017 by Contributors * @file workspace_pool.h * @brief Workspace pool utility. */ #include "workspace_pool.h" #include namespace dgl { namespace runtime { // page size. constexpr size_t kWorkspacePageSize = 4 << 10; class WorkspacePool::Pool { public: // constructor Pool() { // safe guard header on each list. Entry e; e.data = nullptr; e.size = 0; free_list_.push_back(e); allocated_.push_back(e); } // allocate from pool void* Alloc(DGLContext ctx, DeviceAPI* device, size_t nbytes) { // Allocate align to page. nbytes = (nbytes + (kWorkspacePageSize - 1)) / kWorkspacePageSize * kWorkspacePageSize; if (nbytes == 0) nbytes = kWorkspacePageSize; Entry e; DGLDataType type; type.code = kDGLUInt; type.bits = 8; type.lanes = 1; if (free_list_.size() == 2) { e = free_list_.back(); free_list_.pop_back(); if (e.size < nbytes) { // resize the page device->FreeDataSpace(ctx, e.data); e.data = device->AllocDataSpace(ctx, nbytes, kTempAllocaAlignment, type); e.size = nbytes; } } else if (free_list_.size() == 1) { e.data = device->AllocDataSpace(ctx, nbytes, kTempAllocaAlignment, type); e.size = nbytes; } else { if (free_list_.back().size >= nbytes) { // find smallest fit auto it = free_list_.end() - 2; for (; it->size >= nbytes; --it) { } e = *(it + 1); free_list_.erase(it + 1); } else { // resize the page e = free_list_.back(); free_list_.pop_back(); device->FreeDataSpace(ctx, e.data); e.data = device->AllocDataSpace(ctx, nbytes, kTempAllocaAlignment, type); e.size = nbytes; } } allocated_.push_back(e); return e.data; } // free resource back to pool void Free(void* data) { Entry e; if (allocated_.back().data == data) { // quick path, last allocated. e = allocated_.back(); allocated_.pop_back(); } else { int index = static_cast(allocated_.size()) - 2; for (; index > 0 && allocated_[index].data != data; --index) { } CHECK_GT(index, 0) << "trying to free things that has not been allocated"; e = allocated_[index]; allocated_.erase(allocated_.begin() + index); } if (free_list_.back().size < e.size) { free_list_.push_back(e); } else if (free_list_.size() == 2) { free_list_.push_back(free_list_.back()); free_list_[1] = e; } else { size_t i = free_list_.size() - 1; free_list_.resize(free_list_.size() + 1); for (; e.size < free_list_[i].size; --i) { free_list_[i + 1] = free_list_[i]; } free_list_[i + 1] = e; } } // Release all resources void Release(DGLContext ctx, DeviceAPI* device) { CHECK_EQ(allocated_.size(), 1); for (size_t i = 1; i < free_list_.size(); ++i) { device->FreeDataSpace(ctx, free_list_[i].data); } free_list_.clear(); } private: /** @brief a single entry in the pool */ struct Entry { void* data; size_t size; }; /** @brief List of free items, sorted from small to big size */ std::vector free_list_; /** @brief List of allocated items */ std::vector allocated_; }; WorkspacePool::WorkspacePool( DGLDeviceType device_type, std::shared_ptr device) : device_type_(device_type), device_(device) {} WorkspacePool::~WorkspacePool() { /** * Note that the following code will cause Segmentation fault with MXNet. * Since we're phasing out MXNet, it's acceptable to keep it as it is. * Commenting out the following code will cause memory leak. */ for (size_t i = 0; i < array_.size(); ++i) { if (array_[i] != nullptr) { DGLContext ctx; ctx.device_type = device_type_; ctx.device_id = static_cast(i); array_[i]->Release(ctx, device_.get()); delete array_[i]; } } } void* WorkspacePool::AllocWorkspace(DGLContext ctx, size_t size) { if (static_cast(ctx.device_id) >= array_.size()) { array_.resize(ctx.device_id + 1, nullptr); } if (array_[ctx.device_id] == nullptr) { array_[ctx.device_id] = new Pool(); } return array_[ctx.device_id]->Alloc(ctx, device_.get(), size); } void WorkspacePool::FreeWorkspace(DGLContext ctx, void* ptr) { CHECK( static_cast(ctx.device_id) < array_.size() && array_[ctx.device_id] != nullptr); array_[ctx.device_id]->Free(ptr); } } // namespace runtime } // namespace dgl ================================================ FILE: src/runtime/workspace_pool.h ================================================ /** * Copyright (c) 2017 by Contributors * @file workspace_pool.h * @brief Workspace pool utility. */ #ifndef DGL_RUNTIME_WORKSPACE_POOL_H_ #define DGL_RUNTIME_WORKSPACE_POOL_H_ #include #include #include namespace dgl { namespace runtime { /** * @brief A workspace pool to manage * * \note We have the following assumption about backend temporal * workspace allocation, and will optimize for such assumption, * some of these assumptions can be enforced by the compiler. * * - Only a few allocation will happen, and space will be released after use. * - The release order is usually in reverse order of allocate * - Repeative pattern of same allocations over different runs. */ class WorkspacePool { public: /** * @brief Create pool with specific device type and device. * @param device_type The device type. * @param device The device API. */ WorkspacePool(DGLDeviceType device_type, std::shared_ptr device); /** @brief destructor */ ~WorkspacePool(); /** * @brief Allocate temporal workspace. * @param ctx The context of allocation. * @param size The size to be allocated. */ void* AllocWorkspace(DGLContext ctx, size_t size); /** * @brief Free temporal workspace in backend execution. * * @param ctx The context of allocation. * @param ptr The pointer to be freed. */ void FreeWorkspace(DGLContext ctx, void* ptr); private: class Pool; /** @brief pool of device local array */ std::vector array_; /** @brief device type this pool support */ DGLDeviceType device_type_; /** @brief The device API */ std::shared_ptr device_; }; } // namespace runtime } // namespace dgl #endif // DGL_RUNTIME_WORKSPACE_POOL_H_ ================================================ FILE: src/scheduler/scheduler.cc ================================================ /** * Copyright (c) 2018 by Contributors * @file scheduler/scheduler.cc * @brief DGL Scheduler implementation */ #include #include #include namespace dgl { namespace sched { template std::vector DegreeBucketing( const IdArray& msg_ids, const IdArray& vids, const IdArray& recv_ids) { auto n_msgs = msg_ids->shape[0]; const IdType* vid_data = static_cast(vids->data); const IdType* msg_id_data = static_cast(msg_ids->data); const IdType* recv_id_data = static_cast(recv_ids->data); // in edge: dst->msgs std::unordered_map> in_edges; for (IdType i = 0; i < n_msgs; ++i) { in_edges[vid_data[i]].push_back(msg_id_data[i]); } // bkt: deg->dsts std::unordered_map> bkt; for (const auto& it : in_edges) { bkt[it.second.size()].push_back(it.first); } std::unordered_set zero_deg_nodes; for (IdType i = 0; i < recv_ids->shape[0]; ++i) { if (in_edges.find(recv_id_data[i]) == in_edges.end()) { zero_deg_nodes.insert(recv_id_data[i]); } } auto n_zero_deg = zero_deg_nodes.size(); // calc output size IdType n_deg = bkt.size(); IdType n_dst = in_edges.size(); IdType n_mid_sec = bkt.size(); // zero deg won't affect message size if (n_zero_deg > 0) { n_deg += 1; n_dst += n_zero_deg; } // initialize output IdArray degs = IdArray::Empty({n_deg}, vids->dtype, vids->ctx); IdArray nids = IdArray::Empty({n_dst}, vids->dtype, vids->ctx); IdArray nid_section = IdArray::Empty({n_deg}, vids->dtype, vids->ctx); IdArray mids = IdArray::Empty({n_msgs}, vids->dtype, vids->ctx); IdArray mid_section = IdArray::Empty({n_mid_sec}, vids->dtype, vids->ctx); IdType* deg_ptr = static_cast(degs->data); IdType* nid_ptr = static_cast(nids->data); IdType* nsec_ptr = static_cast(nid_section->data); IdType* mid_ptr = static_cast(mids->data); IdType* msec_ptr = static_cast(mid_section->data); // fill in bucketing ordering for (const auto& it : bkt) { // for each bucket const IdType deg = it.first; const IdType bucket_size = it.second.size(); *deg_ptr++ = deg; *nsec_ptr++ = bucket_size; *msec_ptr++ = deg * bucket_size; for (const auto dst : it.second) { // for each dst in this bucket *nid_ptr++ = dst; for (const auto mid : in_edges[dst]) { // for each in edge of dst *mid_ptr++ = mid; } } } if (n_zero_deg > 0) { *deg_ptr = 0; *nsec_ptr = n_zero_deg; for (const auto dst : zero_deg_nodes) { *nid_ptr++ = dst; } } std::vector ret; ret.push_back(std::move(degs)); ret.push_back(std::move(nids)); ret.push_back(std::move(nid_section)); ret.push_back(std::move(mids)); ret.push_back(std::move(mid_section)); return ret; } template std::vector DegreeBucketing( const IdArray& msg_ids, const IdArray& vids, const IdArray& recv_ids); template std::vector DegreeBucketing( const IdArray& msg_ids, const IdArray& vids, const IdArray& recv_ids); template std::vector GroupEdgeByNodeDegree( const IdArray& uids, const IdArray& vids, const IdArray& eids) { auto n_edge = eids->shape[0]; const IdType* eid_data = static_cast(eids->data); const IdType* uid_data = static_cast(uids->data); const IdType* vid_data = static_cast(vids->data); // node2edge: group_by nodes uid -> (eid, the other end vid) std::unordered_map>> node2edge; for (IdType i = 0; i < n_edge; ++i) { node2edge[uid_data[i]].emplace_back(eid_data[i], vid_data[i]); } // bkt: deg -> group_by node uid std::unordered_map> bkt; for (const auto& it : node2edge) { bkt[it.second.size()].push_back(it.first); } // number of unique degree IdType n_deg = bkt.size(); // initialize output IdArray degs = IdArray::Empty({n_deg}, eids->dtype, eids->ctx); IdArray new_uids = IdArray::Empty({n_edge}, uids->dtype, uids->ctx); IdArray new_vids = IdArray::Empty({n_edge}, vids->dtype, vids->ctx); IdArray new_eids = IdArray::Empty({n_edge}, eids->dtype, eids->ctx); IdArray sections = IdArray::Empty({n_deg}, eids->dtype, eids->ctx); IdType* deg_ptr = static_cast(degs->data); IdType* uid_ptr = static_cast(new_uids->data); IdType* vid_ptr = static_cast(new_vids->data); IdType* eid_ptr = static_cast(new_eids->data); IdType* sec_ptr = static_cast(sections->data); // fill in bucketing ordering for (const auto& it : bkt) { // for each bucket // degree of this bucket const IdType deg = it.first; // number of edges in this bucket const IdType bucket_size = it.second.size(); *deg_ptr++ = deg; *sec_ptr++ = deg * bucket_size; for (const auto u : it.second) { // for uid in this bucket for (const auto& pair : node2edge[u]) { // for each edge of uid *uid_ptr++ = u; *vid_ptr++ = pair.second; *eid_ptr++ = pair.first; } } } std::vector ret; ret.push_back(std::move(degs)); ret.push_back(std::move(new_uids)); ret.push_back(std::move(new_vids)); ret.push_back(std::move(new_eids)); ret.push_back(std::move(sections)); return ret; } template std::vector GroupEdgeByNodeDegree( const IdArray& uids, const IdArray& vids, const IdArray& eids); template std::vector GroupEdgeByNodeDegree( const IdArray& uids, const IdArray& vids, const IdArray& eids); } // namespace sched } // namespace dgl ================================================ FILE: src/scheduler/scheduler_apis.cc ================================================ /** * Copyright (c) 2018 by Contributors * @file scheduler/scheduler_apis.cc * @brief DGL scheduler APIs */ #include #include #include #include "../array/cpu/array_utils.h" #include "../c_api_common.h" using dgl::runtime::DGLArgs; using dgl::runtime::DGLRetValue; using dgl::runtime::NDArray; namespace dgl { DGL_REGISTER_GLOBAL( "_deprecate.runtime.degree_bucketing._CAPI_DGLDegreeBucketing") .set_body([](DGLArgs args, DGLRetValue* rv) { const IdArray msg_ids = args[0]; const IdArray vids = args[1]; const IdArray nids = args[2]; CHECK_SAME_DTYPE(msg_ids, vids); CHECK_SAME_DTYPE(msg_ids, nids); ATEN_ID_TYPE_SWITCH(msg_ids->dtype, IdType, { *rv = ConvertNDArrayVectorToPackedFunc( sched::DegreeBucketing(msg_ids, vids, nids)); }); }); DGL_REGISTER_GLOBAL( "_deprecate.runtime.degree_bucketing._CAPI_DGLGroupEdgeByNodeDegree") .set_body([](DGLArgs args, DGLRetValue* rv) { const IdArray uids = args[0]; const IdArray vids = args[1]; const IdArray eids = args[2]; CHECK_SAME_DTYPE(uids, vids); CHECK_SAME_DTYPE(uids, eids); ATEN_ID_TYPE_SWITCH(uids->dtype, IdType, { *rv = ConvertNDArrayVectorToPackedFunc( sched::GroupEdgeByNodeDegree(uids, vids, eids)); }); }); } // namespace dgl ================================================ FILE: tensoradapter/include/tensoradapter.h ================================================ /** * Copyright (c) 2020-2022 by Contributors * @file tensoradapter.h * @brief Header file for functions exposed by the adapter library. * * Functions in this library must be exported with extern "C" so that DGL can * locate them with dlsym(3) (or GetProcAddress on Windows). */ #ifndef TENSORADAPTER_H_ #define TENSORADAPTER_H_ #ifdef DGL_USE_CUDA #include #endif // DGL_USE_CUDA namespace tensoradapter { extern "C" { /** * @brief Allocate a piece of CPU memory via * PyTorch's CPUAllocator * * @param nbytes The size to be allocated. * @return Pointer to the allocated memory. */ void* CPURawAlloc(size_t nbytes); /** * @brief Free the CPU memory. * * @param ptr Pointer to the memory to be freed. */ void CPURawDelete(void* ptr); #ifdef DGL_USE_CUDA /** * @brief Allocate a piece of GPU memory via * PyTorch's THCCachingAllocator. * * @param nbytes The size to be allocated. * @param stream The stream to be allocated on. * @return Pointer to the allocated memory. */ void* CUDARawAlloc(size_t nbytes, cudaStream_t stream); /** * @brief Free the GPU memory. * * @param ptr Pointer to the memory to be freed. */ void CUDARawDelete(void* ptr); /** * @brief Get the current CUDA stream. */ cudaStream_t CUDACurrentStream(); /** * @brief Let the caching allocator know which streams are using this tensor. * * @param ptr Pointer of the tensor to be recorded. * @param stream The stream that is using this tensor. * @param device_id Device of the tensor. */ void RecordStream(void* ptr, cudaStream_t stream, int device_id); /** * @brief Allocate a piece of pinned CPU memory via * PyTorch's CachingHostAllocator. * * @param nbytes The size to be allocated. * @param ctx Pointer to the PyTorch storage ctx ptr returned from the * allocator. * @param deleter Pointer to the delete function ptr returned from the * allocator. * @return Raw pointer to the allocated memory. */ void* CUDARawHostAlloc(size_t nbytes, void** ctx, void** raw_deleter); /** * @brief 'Free' the pinned CPU memory via * inserting the memory block back to the free list. * * @param deleter Pointer to the delete function ptr returned from the * allocator. */ void CUDARawHostDelete(void** raw_deleter); /** * @brief 'Record' a CUDA stream (usually from a copy kernel) for the pinned * memory via PyTorch's CachingHostAllocator. * * @param data Pointer of the tensor to be recorded. * @param ctx PyTorch storage ctx ptr returned from the allocator. * @param stream The stream that currently consumes this tensor. * @param device_id Device of the tensor. */ void CUDARecordHostAlloc( void* data, void* ctx, cudaStream_t stream, int device_id); /** * @brief Release cached pinned memory allocations via cudaHostFree. */ void CUDAHostAllocatorEmptyCache(); #endif // DGL_USE_CUDA } }; // namespace tensoradapter #endif // TENSORADAPTER_H_ ================================================ FILE: tensoradapter/include/tensoradapter_exports.h ================================================ /** * Copyright (c) 2020 by Contributors * @file tensoradapter_exports.h * @brief Header file for functions exposed by the adapter library. */ #ifndef TENSORADAPTER_EXPORTS_H_ #define TENSORADAPTER_EXPORTS_H_ #if defined(WIN32) || defined(_WIN32) #define TA_EXPORTS __declspec(dllexport) #else #define TA_EXPORTS #endif #endif // TENSORADAPTER_EXPORTS_H_ ================================================ FILE: tensoradapter/pytorch/CMakeLists.txt ================================================ cmake_minimum_required(VERSION 3.5) project(tensoradapter_pytorch C CXX) # Find PyTorch cmake files and PyTorch versions with the python interpreter $PYTHON_INTERP # ("python3" or "python" if empty) if(NOT PYTHON_INTERP) find_program(PYTHON_INTERP NAMES python3 python) endif() message(STATUS "Using Python interpreter: ${PYTHON_INTERP}") file(TO_NATIVE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/find_cmake.py FIND_CMAKE_PY) execute_process( COMMAND ${PYTHON_INTERP} ${FIND_CMAKE_PY} OUTPUT_VARIABLE TORCH_PREFIX_VER OUTPUT_STRIP_TRAILING_WHITESPACE) message(STATUS "find_cmake.py output: ${TORCH_PREFIX_VER}") list(GET TORCH_PREFIX_VER 0 TORCH_PREFIX) list(GET TORCH_PREFIX_VER 1 TORCH_VER) message(STATUS "Configuring for PyTorch ${TORCH_VER}") if(USE_CUDA) add_definitions(-DDGL_USE_CUDA) endif() set(Torch_DIR "${TORCH_PREFIX}/Torch") message(STATUS "Setting directory to ${Torch_DIR}") find_package(Torch REQUIRED) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${TORCH_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g3 -ggdb") set(TORCH_TARGET_NAME "tensoradapter_pytorch_${TORCH_VER}") file(GLOB TA_TORCH_SRC *.cpp) add_library(${TORCH_TARGET_NAME} SHARED "${TA_TORCH_SRC}") # use the library name rather than the path set(TENSORADAPTER_TORCH_LIBS torch) message(STATUS "tensoradapter found PyTorch includes: ${TORCH_INCLUDE_DIRS}") message(STATUS "tensoradapter found PyTorch lib: ${TENSORADAPTER_TORCH_LIBS}") target_include_directories( ${TORCH_TARGET_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../include") target_include_directories( ${TORCH_TARGET_NAME} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/../../third_party/dlpack/include") target_include_directories( ${TORCH_TARGET_NAME} PRIVATE "${TORCH_INCLUDE_DIRS}") target_link_libraries(${TORCH_TARGET_NAME} PRIVATE "${TENSORADAPTER_TORCH_LIBS}") set_property(TARGET ${TORCH_TARGET_NAME} PROPERTY CXX_STANDARD 17) message(STATUS "Configured target ${TORCH_TARGET_NAME}") ================================================ FILE: tensoradapter/pytorch/build.bat ================================================ REM Helper script to build tensor adapter libraries for PyTorch @ECHO OFF SETLOCAL EnableDelayedExpansion MD "%BINDIR%\tensoradapter\pytorch" DEL /S /Q build MD build PUSHD build IF x%1x == xx GOTO single FOR %%X IN (%*) DO ( DEL /S /Q * "%CMAKE_COMMAND%" -DCMAKE_CONFIGURATION_TYPES=Release -DCUDA_TOOLKIT_ROOT_DIR="%CUDA_TOOLKIT_ROOT_DIR%" -DTORCH_CUDA_ARCH_LIST=%TORCH_CUDA_ARCH_LIST% -DUSE_CUDA=%USE_CUDA% -DPYTHON_INTERP=%%X .. -G "Visual Studio 16 2019" || EXIT /B 1 msbuild tensoradapter_pytorch.sln /m /nr:false || EXIT /B 1 COPY /Y Release\*.dll "%BINDIR%\tensoradapter\pytorch" || EXIT /B 1 ) GOTO end :single DEL /S /Q * "%CMAKE_COMMAND%" -DCMAKE_CONFIGURATION_TYPES=Release -DCUDA_TOOLKIT_ROOT_DIR="%CUDA_TOOLKIT_ROOT_DIR%" -DTORCH_CUDA_ARCH_LIST=%TORCH_CUDA_ARCH_LIST% -DUSE_CUDA=%USE_CUDA% .. -G "Visual Studio 16 2019" || EXIT /B 1 msbuild tensoradapter_pytorch.sln /m /nr:false || EXIT /B 1 COPY /Y Release\*.dll "%BINDIR%\tensoradapter\pytorch" || EXIT /B 1 :end POPD ENDLOCAL ================================================ FILE: tensoradapter/pytorch/build.sh ================================================ #!/bin/bash # Helper script to build tensor adapter libraries for PyTorch set -e mkdir -p build mkdir -p $BINDIR/tensoradapter/pytorch cd build if [ $(uname) = 'Darwin' ]; then CPSOURCE=*.dylib else CPSOURCE=*.so fi CMAKE_FLAGS="-DCUDA_TOOLKIT_ROOT_DIR=$CUDA_TOOLKIT_ROOT_DIR -DTORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST -DUSE_CUDA=$USE_CUDA" if [ $# -eq 0 ]; then $CMAKE_COMMAND $CMAKE_FLAGS .. make -j cp -v $CPSOURCE $BINDIR/tensoradapter/pytorch else for PYTHON_INTERP in $@; do TORCH_VER=$($PYTHON_INTERP -c 'import torch; print(torch.__version__.split("+")[0])') mkdir -p $TORCH_VER cd $TORCH_VER $CMAKE_COMMAND $CMAKE_FLAGS -DPYTHON_INTERP=$PYTHON_INTERP ../.. make -j cp -v $CPSOURCE $BINDIR/tensoradapter/pytorch cd .. done fi ================================================ FILE: tensoradapter/pytorch/find_cmake.py ================================================ import os import torch cmake_prefix_path = getattr( torch.utils, "cmake_prefix_path", os.path.join(os.path.dirname(torch.__file__), "share", "cmake"), ) version = torch.__version__.split("+")[0] print(";".join([cmake_prefix_path, version])) ================================================ FILE: tensoradapter/pytorch/torch.cpp ================================================ /** * Copyright (c) 2020-2022 by Contributors * @file torch/torch.cpp * @brief Implementation of PyTorch adapter library. */ #include #include #ifdef DGL_USE_CUDA #include #include #include #include #include #endif // DGL_USE_CUDA namespace tensoradapter { extern "C" { TA_EXPORTS void* CPURawAlloc(size_t nbytes) { return c10::GetCPUAllocator()->raw_allocate(nbytes); } TA_EXPORTS void CPURawDelete(void* ptr) { c10::GetCPUAllocator()->raw_deallocate(ptr); } #ifdef DGL_USE_CUDA TA_EXPORTS void* CUDARawAlloc(size_t nbytes, cudaStream_t stream) { at::globalContext().lazyInitDevice(at::kCUDA); return c10::cuda::CUDACachingAllocator::raw_alloc_with_stream(nbytes, stream); } TA_EXPORTS void CUDARawDelete(void* ptr) { c10::cuda::CUDACachingAllocator::raw_delete(ptr); } TA_EXPORTS cudaStream_t CUDACurrentStream() { return at::cuda::getCurrentCUDAStream(); } TA_EXPORTS void RecordStream(void* ptr, cudaStream_t stream, int device_id) { c10::DataPtr data_ptr{ ptr, ptr, c10::cuda::CUDACachingAllocator::get()->raw_deleter(), c10::Device(c10::DeviceType::CUDA, device_id)}; c10::cuda::CUDACachingAllocator::recordStream( data_ptr, // getStreamFromExternal doesn't exist before PyTorch 1.10, just copy it // here c10::cuda::CUDAStream( c10::cuda::CUDAStream::UNCHECKED, c10::Stream( c10::Stream::UNSAFE, c10::Device(c10::DeviceType::CUDA, device_id), reinterpret_cast(stream)))); data_ptr.release_context(); } class CUDAHostDeleter { public: explicit CUDAHostDeleter(std::unique_ptr ptr) : ptr_(std::move(ptr)) {} private: std::unique_ptr ptr_; }; TA_EXPORTS void* CUDARawHostAlloc( size_t nbytes, void** ctx, void** raw_deleter) { auto data_ptr = at::cuda::getCachingHostAllocator()->allocate(nbytes); auto raw = data_ptr.get(); // Return the raw ctx ptr for recording event. *ctx = data_ptr.get_context(); // Transfer ownership to raw_deleter. auto* data_deleter = new CUDAHostDeleter(data_ptr.move_context()); *raw_deleter = static_cast(data_deleter); return raw; } // Designated CUDAHostDeleter for CUDARawHostAlloc. TA_EXPORTS void CUDARawHostDelete(void** raw_deleter) { delete static_cast(*raw_deleter); *raw_deleter = nullptr; } TA_EXPORTS void CUDARecordHostAlloc( void* ptr, void* ctx, cudaStream_t stream, int device_id) { at::cuda::CachingHostAllocator_recordEvent( ptr, ctx, c10::cuda::CUDAStream( c10::cuda::CUDAStream::UNCHECKED, c10::Stream( c10::Stream::UNSAFE, c10::Device(c10::DeviceType::CUDA, device_id), reinterpret_cast(stream)))); } TA_EXPORTS void CUDAHostAllocatorEmptyCache() { at::cuda::CachingHostAllocator_emptyCache(); } #endif // DGL_USE_CUDA }; }; // namespace tensoradapter ================================================ FILE: tests/README.md ================================================ Unit test === ## Python Unittest The code organization goes as follows: * `backend`: Additional unified tensor interface for supported frameworks. The functions there are only used in unit tests, not DGL itself. Note that the code there are not unit tests by themselves. * `compute`: All framework-agnostic computation-related unit tests go there. * `${DGLBACKEND}` (e.g. `pytorch` and `mxnet`): All framework-specific computation-related unit tests go there. * `graph_index`: All unit tests for C++ graph structure implementation go there. The Python API being tested in this directory, if any, should be as minimal as possible (usually simple wrappers of corresponding C++ functions). * `lint`: Pylint-related files. * `scripts`: Automated test scripts for CI. ## C++ Unittest Compile with unittest by executing the command below ``` # Assume current directory is the root directory of dgl, and googletest submodule is initialized bash script/build_dgl.sh -c -r ./runUnitTests ``` ================================================ FILE: tests/backend/__init__.py ================================================ import importlib import os import sys import numpy as np from dgl.backend import * from dgl.nn import * from . import backend_unittest mod = importlib.import_module(".%s" % backend_name, __name__) thismod = sys.modules[__name__] for api in backend_unittest.__dict__.keys(): if api.startswith("__"): continue elif callable(mod.__dict__[api]): # Tensor APIs used in unit tests MUST be supported across all backends globals()[api] = mod.__dict__[api] # Tensor creation with default dtype and context _zeros = zeros _ones = ones _randn = randn _tensor = tensor _arange = arange _full = full _full_1d = full_1d _softmax = softmax _default_context_str = os.getenv("DGLTESTDEV", "cpu") _context_dict = { "cpu": cpu(), "gpu": cuda(), } _default_context = _context_dict[_default_context_str] def ctx(): return _default_context def gpu_ctx(): return _default_context_str == "gpu" def zeros(shape, dtype=float32, ctx=_default_context): return _zeros(shape, dtype, ctx) def ones(shape, dtype=float32, ctx=_default_context): return _ones(shape, dtype, ctx) def randn(shape): return copy_to(_randn(shape), _default_context) def tensor(data, dtype=None): return copy_to(_tensor(data, dtype), _default_context) def arange(start, stop, dtype=int64, ctx=None): return _arange( start, stop, dtype, ctx if ctx is not None else _default_context ) def full(shape, fill_value, dtype, ctx=_default_context): return _full(shape, fill_value, dtype, ctx) def full_1d(length, fill_value, dtype, ctx=_default_context): return _full_1d(length, fill_value, dtype, ctx) def softmax(x, dim): return _softmax(x, dim) ================================================ FILE: tests/backend/backend_unittest.py ================================================ """This file defines the unified tensor framework interface required by DGL unit testing, other than the ones used in the framework itself. """ ############################################################################### # Tensor, data type and context interfaces def cuda(): """Context object for CUDA.""" pass def is_cuda_available(): """Check whether CUDA is available.""" pass ############################################################################### # Tensor functions on feature data # -------------------------------- # These functions are performance critical, so it's better to have efficient # implementation in each framework. def array_equal(a, b): """Check whether the two tensors are *exactly* equal.""" pass def allclose(a, b, rtol=1e-4, atol=1e-4): """Check whether the two tensors are numerically close to each other.""" pass def randn(shape): """Generate a tensor with elements from standard normal distribution.""" pass def full(shape, fill_value, dtype, ctx): pass def narrow_row_set(x, start, stop, new): """Set a slice of the given tensor to a new value.""" pass def sparse_to_numpy(x): """Convert a sparse tensor to a numpy array.""" pass def clone(x): pass def reduce_sum(x): """Sums all the elements into a single scalar.""" pass def softmax(x, dim): """Softmax Operation on Tensors""" pass def spmm(x, y): """Sparse dense matrix multiply""" pass def add(a, b): """Compute a + b""" pass def sub(a, b): """Compute a - b""" pass def mul(a, b): """Compute a * b""" pass def div(a, b): """Compute a / b""" pass def sum(x, dim, keepdims=False): """Computes the sum of array elements over given axes""" pass def max(x, dim): """Computes the max of array elements over given axes""" pass def min(x, dim): """Computes the min of array elements over given axes""" pass def prod(x, dim): """Computes the prod of array elements over given axes""" pass def matmul(a, b): """Compute Matrix Multiplication between a and b""" pass def dot(a, b): """Compute Dot between a and b""" pass def abs(a): """Compute the absolute value of a""" pass def seed(a): """Set seed to for random generator""" pass ############################################################################### # Tensor functions used *only* on index tensor # ---------------- # These operators are light-weighted, so it is acceptable to fallback to # numpy operators if currently missing in the framework. Ideally in the future, # DGL should contain all the operations on index, so this set of operators # should be gradually removed. ############################################################################### # Other interfaces # ---------------- # These are not related to tensors. Some of them are temporary workarounds that # should be included in DGL in the future. ================================================ FILE: tests/backend/mxnet/__init__.py ================================================ from __future__ import absolute_import import mxnet as mx import mxnet.ndarray as nd import numpy as np def cuda(): return mx.gpu() def is_cuda_available(): # TODO: Does MXNet have a convenient function to test GPU availability/compilation? try: a = nd.array([1, 2, 3], ctx=mx.gpu()) return True except mx.MXNetError: return False def array_equal(a, b): return nd.equal(a, b).asnumpy().all() def allclose(a, b, rtol=1e-4, atol=1e-4): return np.allclose(a.asnumpy(), b.asnumpy(), rtol=rtol, atol=atol) def randn(shape): return nd.random.randn(*shape) def full(shape, fill_value, dtype, ctx): return nd.full(shape, fill_value, dtype=dtype, ctx=ctx) def narrow_row_set(x, start, stop, new): x[start:stop] = new def sparse_to_numpy(x): return x.asscipy().todense().A def clone(x): return x.copy() def reduce_sum(x): return x.sum() def softmax(x, dim): return nd.softmax(x, axis=dim) def spmm(x, y): return nd.dot(x, y) def add(a, b): return a + b def sub(a, b): return a - b def mul(a, b): return a * b def div(a, b): return a / b def sum(x, dim, keepdims=False): return x.sum(dim, keepdims=keepdims) def max(x, dim): return x.max(dim) def min(x, dim): return x.min(dim) def prod(x, dim): return x.prod(dim) def matmul(a, b): return nd.dot(a, b) def dot(a, b): return nd.sum(mul(a, b), axis=-1) def abs(a): return nd.abs(a) def seed(a): return mx.random.seed(a) ================================================ FILE: tests/backend/pytorch/__init__.py ================================================ from __future__ import absolute_import import torch as th def cuda(): return th.device("cuda:0") def is_cuda_available(): return th.cuda.is_available() def array_equal(a, b): return th.equal(a.cpu(), b.cpu()) def allclose(a, b, rtol=1e-4, atol=1e-4): return th.allclose(a.float().cpu(), b.float().cpu(), rtol=rtol, atol=atol) def randn(shape): return th.randn(*shape) def full(shape, fill_value, dtype, ctx): return th.full(shape, fill_value, dtype=dtype, device=ctx) def narrow_row_set(x, start, stop, new): x[start:stop] = new def sparse_to_numpy(x): return x.to_dense().numpy() def clone(x): return x.clone() def reduce_sum(x): return x.sum() def softmax(x, dim): return th.softmax(x, dim) def spmm(x, y): return th.spmm(x, y) def add(a, b): return a + b def sub(a, b): return a - b def mul(a, b): return a * b def div(a, b): return a / b def sum(x, dim, keepdims=False): return x.sum(dim, keepdims=keepdims) def max(x, dim): return x.max(dim)[0] def min(x, dim): return x.min(dim)[0] def prod(x, dim): return x.prod(dim) def matmul(a, b): return a @ b def dot(a, b): return sum(mul(a, b), dim=-1) def abs(a): return a.abs() def seed(a): return th.manual_seed(a) ================================================ FILE: tests/backend/tensorflow/__init__.py ================================================ from __future__ import absolute_import import numpy as np import tensorflow as tf from scipy.sparse import coo_matrix def cuda(): return "/gpu:0" def is_cuda_available(): return tf.test.is_gpu_available(cuda_only=True) def array_equal(a, b): return np.array_equal(a.numpy(), b.numpy()) def allclose(a, b, rtol=1e-4, atol=1e-4): return np.allclose( tf.convert_to_tensor(a).numpy(), tf.convert_to_tensor(b).numpy(), rtol=rtol, atol=atol, ) def randn(shape): return tf.random.normal(shape) def full(shape, fill_value, dtype, ctx): with tf.device(ctx): t = tf.constant(fill_value, shape=shape, dtype=dtype) return t def narrow_row_set(x, start, stop, new): # x[start:stop] = new raise NotImplementedError("TF doesn't support inplace update") def sparse_to_numpy(x): # tf.sparse.to_dense assume sorted indices, need to turn off validate_indices in our cases return tf.sparse.to_dense(x, validate_indices=False).numpy() def clone(x): return tf.identity(x) def reduce_sum(x): return tf.reduce_sum(x) def softmax(x, dim): return tf.math.softmax(x, axis=dim) def spmm(x, y): return tf.sparse.sparse_dense_matmul(x, y) def add(a, b): return a + b def sub(a, b): return a - b def mul(a, b): return a * b def div(a, b): return a / b def sum(x, dim, keepdims=False): return tf.reduce_sum(x, axis=dim, keepdims=keepdims) def max(x, dim): return tf.reduce_max(x, axis=dim) def min(x, dim): return tf.reduce_min(x, axis=dim) def prod(x, dim): return tf.reduce_prod(x, axis=dim) def matmul(a, b): return tf.linalg.matmul(a, b) def dot(a, b): return sum(mul(a, b), dim=-1) def abs(a): return tf.abs(a) def seed(a): return tf.random.set_seed(a) ================================================ FILE: tests/cpp/common.h ================================================ #ifndef TEST_COMMON_H_ #define TEST_COMMON_H_ #include static constexpr DGLContext CTX = DGLContext{kDGLCPU, 0}; static constexpr DGLContext CPU = DGLContext{kDGLCPU, 0}; #ifdef DGL_USE_CUDA static constexpr DGLContext GPU = DGLContext{kDGLCUDA, 0}; #endif template inline T* Ptr(dgl::runtime::NDArray nd) { return static_cast(nd->data); } inline int64_t* PI64(dgl::runtime::NDArray nd) { return static_cast(nd->data); } inline int32_t* PI32(dgl::runtime::NDArray nd) { return static_cast(nd->data); } inline int64_t Len(dgl::runtime::NDArray nd) { return nd->shape[0]; } template inline bool ArrayEQ(dgl::runtime::NDArray a1, dgl::runtime::NDArray a2) { if (a1->ndim != a2->ndim) return false; if (a1->dtype != a2->dtype) return false; if (a1->ctx != a2->ctx) return false; if (a1.NumElements() != a2.NumElements()) return false; if (a1.NumElements() == 0) return true; int64_t num = 1; for (int i = 0; i < a1->ndim; ++i) { if (a1->shape[i] != a2->shape[i]) return false; num *= a1->shape[i]; } a1 = a1.CopyTo(CPU); a2 = a2.CopyTo(CPU); for (int64_t i = 0; i < num; ++i) if (static_cast(a1->data)[i] != static_cast(a2->data)[i]) return false; return true; } template inline bool IsInArray(dgl::runtime::NDArray a, T x) { if (!a.defined() || a->shape[0] == 0) return false; for (int64_t i = 0; i < a->shape[0]; ++i) { if (x == static_cast(a->data)[i]) return true; } return false; } #endif // TEST_COMMON_H_ ================================================ FILE: tests/cpp/graph_index_test.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file graph_index_test.cc * @brief Test GraphIndex */ #include #include TEST(GraphTest, TestNumVertices) { dgl::Graph g; g.AddVertices(10); ASSERT_EQ(g.NumVertices(), 10); }; ================================================ FILE: tests/cpp/message_queue_test.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file msg_queue.cc * @brief Message queue for DGL distributed training. */ #include #include #include #include #include "../src/rpc/network/msg_queue.h" using dgl::network::Message; using dgl::network::MessageQueue; using std::string; TEST(MessageQueueTest, AddRemove) { MessageQueue queue(5, 1); // size:5, num_of_producer:1 // msg 1 std::string str_1("111"); Message msg_1 = {const_cast(str_1.data()), 3}; EXPECT_EQ(queue.Add(msg_1), ADD_SUCCESS); // msg 2 std::string str_2("22"); Message msg_2 = {const_cast(str_2.data()), 2}; EXPECT_EQ(queue.Add(msg_2), ADD_SUCCESS); // msg 3 std::string str_3("xxxx"); Message msg_3 = {const_cast(str_3.data()), 4}; EXPECT_EQ(queue.Add(msg_3, false), QUEUE_FULL); // msg 4 Message msg_4; EXPECT_EQ(queue.Remove(&msg_4), REMOVE_SUCCESS); EXPECT_EQ(string(msg_4.data, msg_4.size), string("111")); // msg 5 Message msg_5; EXPECT_EQ(queue.Remove(&msg_5), REMOVE_SUCCESS); EXPECT_EQ(string(msg_5.data, msg_5.size), string("22")); // msg 6 std::string str_6("33333"); Message msg_6 = {const_cast(str_6.data()), 5}; EXPECT_EQ(queue.Add(msg_6), ADD_SUCCESS); // msg 7 Message msg_7; EXPECT_EQ(queue.Remove(&msg_7), REMOVE_SUCCESS); EXPECT_EQ(string(msg_7.data, msg_7.size), string("33333")); // msg 8 Message msg_8; EXPECT_EQ(queue.Remove(&msg_8, false), QUEUE_EMPTY); // non-blocking remove // msg 9 std::string str_9("666666"); Message msg_9 = {const_cast(str_9.data()), 6}; EXPECT_EQ(queue.Add(msg_9), MSG_GT_SIZE); // exceed queue size // msg 10 std::string str_10("55555"); Message msg_10 = {const_cast(str_10.data()), 5}; EXPECT_EQ(queue.Add(msg_10), ADD_SUCCESS); // msg 11 Message msg_11; EXPECT_EQ(queue.Remove(&msg_11), REMOVE_SUCCESS); } TEST(MessageQueueTest, EmptyAndNoMoreAdd) { MessageQueue queue(5, 2); // size:5, num_of_producer:2 EXPECT_EQ(queue.EmptyAndNoMoreAdd(), false); EXPECT_EQ(queue.Empty(), true); queue.SignalFinished(1); queue.SignalFinished(1); EXPECT_EQ(queue.EmptyAndNoMoreAdd(), false); queue.SignalFinished(2); EXPECT_EQ(queue.EmptyAndNoMoreAdd(), true); } const int kNumOfProducer = 100; const int kNumOfMessage = 100; std::string str_apple("apple"); void start_add(MessageQueue* queue, int id) { for (int i = 0; i < kNumOfMessage; ++i) { Message msg = {const_cast(str_apple.data()), 5}; EXPECT_EQ(queue->Add(msg), ADD_SUCCESS); } queue->SignalFinished(id); } TEST(MessageQueueTest, MultiThread) { MessageQueue queue(100000, kNumOfProducer); EXPECT_EQ(queue.EmptyAndNoMoreAdd(), false); EXPECT_EQ(queue.Empty(), true); std::vector thread_pool(kNumOfProducer); for (int i = 0; i < kNumOfProducer; ++i) { thread_pool[i] = std::thread(start_add, &queue, i); } for (int i = 0; i < kNumOfProducer * kNumOfMessage; ++i) { Message msg; EXPECT_EQ(queue.Remove(&msg), REMOVE_SUCCESS); EXPECT_EQ(string(msg.data, msg.size), string("apple")); } for (int i = 0; i < kNumOfProducer; ++i) { thread_pool[i].join(); } EXPECT_EQ(queue.EmptyAndNoMoreAdd(), true); } ================================================ FILE: tests/cpp/socket_communicator_test.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file socket_communicator_test.cc * @brief Test SocketCommunicator */ #include "../src/rpc/network/socket_communicator.h" #include #include #include #include #include #include #include #include #include #include #include "../src/rpc/network/msg_queue.h" using std::string; using dgl::network::DefaultMessageDeleter; using dgl::network::Message; using dgl::network::SocketReceiver; using dgl::network::SocketSender; const int64_t kQueueSize = 500 * 1024; const int kThreadNum = 2; const int kMaxTryTimes = 1024; #ifndef WIN32 const int kNumSender = 3; const int kNumReceiver = 3; const int kNumMessage = 10; const char* ip_addr[] = { "tcp://127.0.0.1:50091", "tcp://127.0.0.1:50092", "tcp://127.0.0.1:50093"}; static void start_client(); static void start_server(int id); TEST(SocketCommunicatorTest, SendAndRecv) { // start 10 client std::vector client_thread(kNumSender); for (int i = 0; i < kNumSender; ++i) { client_thread[i] = std::thread(start_client); } // start 10 server std::vector server_thread(kNumReceiver); for (int i = 0; i < kNumReceiver; ++i) { server_thread[i] = std::thread(start_server, i); } for (int i = 0; i < kNumSender; ++i) { client_thread[i].join(); } for (int i = 0; i < kNumReceiver; ++i) { server_thread[i].join(); } } TEST(SocketCommunicatorTest, SendAndRecvTimeout) { std::atomic_bool stop{false}; // start 1 client, connect to 1 server, send 2 messsage auto client = std::thread([&stop]() { SocketSender sender(kQueueSize, kThreadNum); sender.ConnectReceiver(ip_addr[0], 0); sender.ConnectReceiverFinalize(kMaxTryTimes); for (int i = 0; i < 2; ++i) { char* str_data = new char[9]; memcpy(str_data, "123456789", 9); Message msg = {str_data, 9}; msg.deallocator = DefaultMessageDeleter; EXPECT_EQ(sender.Send(msg, 0), ADD_SUCCESS); } while (!stop) { } sender.Finalize(); }); // start 1 server, accept 1 client, receive 2 message auto server = std::thread([&stop]() { SocketReceiver receiver(kQueueSize, kThreadNum); receiver.Wait(ip_addr[0], 1); Message msg; int recv_id; // receive 1st message EXPECT_EQ(receiver.RecvFrom(&msg, 0, 0), REMOVE_SUCCESS); EXPECT_EQ(string(msg.data, msg.size), string("123456789")); msg.deallocator(&msg); // receive 2nd message EXPECT_EQ(receiver.Recv(&msg, &recv_id, 0), REMOVE_SUCCESS); EXPECT_EQ(string(msg.data, msg.size), string("123456789")); msg.deallocator(&msg); // timed out EXPECT_EQ(receiver.RecvFrom(&msg, 0, 1000), QUEUE_EMPTY); EXPECT_EQ(receiver.Recv(&msg, &recv_id, 1000), QUEUE_EMPTY); stop = true; receiver.Finalize(); }); // join client.join(); server.join(); } void start_client() { SocketSender sender(kQueueSize, kThreadNum); for (int i = 0; i < kNumReceiver; ++i) { sender.ConnectReceiver(ip_addr[i], i); } sender.ConnectReceiverFinalize(kMaxTryTimes); for (int i = 0; i < kNumMessage; ++i) { for (int n = 0; n < kNumReceiver; ++n) { char* str_data = new char[9]; memcpy(str_data, "123456789", 9); Message msg = {str_data, 9}; msg.deallocator = DefaultMessageDeleter; EXPECT_EQ(sender.Send(msg, n), ADD_SUCCESS); } } for (int i = 0; i < kNumMessage; ++i) { for (int n = 0; n < kNumReceiver; ++n) { char* str_data = new char[9]; memcpy(str_data, "123456789", 9); Message msg = {str_data, 9}; msg.deallocator = DefaultMessageDeleter; EXPECT_EQ(sender.Send(msg, n), ADD_SUCCESS); } } sender.Finalize(); } void start_server(int id) { sleep(5); SocketReceiver receiver(kQueueSize, kThreadNum); receiver.Wait(ip_addr[id], kNumSender); for (int i = 0; i < kNumMessage; ++i) { for (int n = 0; n < kNumSender; ++n) { Message msg; EXPECT_EQ(receiver.RecvFrom(&msg, n), REMOVE_SUCCESS); EXPECT_EQ(string(msg.data, msg.size), string("123456789")); msg.deallocator(&msg); } } for (int n = 0; n < kNumSender * kNumMessage; ++n) { Message msg; int recv_id; EXPECT_EQ(receiver.Recv(&msg, &recv_id), REMOVE_SUCCESS); EXPECT_EQ(string(msg.data, msg.size), string("123456789")); msg.deallocator(&msg); } receiver.Finalize(); } TEST(SocketCommunicatorTest, TCPSocketBind) { dgl::network::TCPSocket socket; testing::internal::CaptureStderr(); EXPECT_EQ(socket.Bind("127.0.0", 50001), false); const std::string stderr = testing::internal::GetCapturedStderr(); EXPECT_NE(stderr.find("Invalid IP: 127.0.0"), std::string::npos); } #else #include #include #pragma comment(lib, "ws2_32.lib") void sleep(int seconds) { Sleep(seconds * 1000); } static void start_client(); static bool start_server(); DWORD WINAPI _ClientThreadFunc(LPVOID param) { start_client(); return 0; } DWORD WINAPI _ServerThreadFunc(LPVOID param) { return start_server() ? 1 : 0; } TEST(SocketCommunicatorTest, SendAndRecv) { HANDLE hThreads[2]; WSADATA wsaData; DWORD retcode, exitcode; srand((unsigned)time(NULL)); int port = (rand() % (5000 - 3000 + 1)) + 3000; std::string ip_addr = "tcp://127.0.0.1:" + std::to_string(port); std::ofstream out("addr.txt"); out << ip_addr; out.close(); ASSERT_EQ(::WSAStartup(MAKEWORD(2, 2), &wsaData), 0); hThreads[0] = ::CreateThread(NULL, 0, _ClientThreadFunc, NULL, 0, NULL); // client ASSERT_TRUE(hThreads[0] != NULL); hThreads[1] = ::CreateThread(NULL, 0, _ServerThreadFunc, NULL, 0, NULL); // server ASSERT_TRUE(hThreads[1] != NULL); retcode = ::WaitForMultipleObjects(2, hThreads, TRUE, INFINITE); EXPECT_TRUE((retcode <= WAIT_OBJECT_0 + 1) && (retcode >= WAIT_OBJECT_0)); EXPECT_EQ(::GetExitCodeThread(hThreads[1], &exitcode), TRUE); EXPECT_EQ(exitcode, 1); EXPECT_EQ(::CloseHandle(hThreads[0]), TRUE); EXPECT_EQ(::CloseHandle(hThreads[1]), TRUE); ::WSACleanup(); } static void start_client() { std::ifstream t("addr.txt"); std::string ip_addr( (std::istreambuf_iterator(t)), std::istreambuf_iterator()); t.close(); SocketSender sender(kQueueSize, kThreadNum); sender.ConnectReceiver(ip_addr.c_str(), 0); sender.ConnectReceiverFinalize(kMaxTryTimes); char* str_data = new char[9]; memcpy(str_data, "123456789", 9); Message msg = {str_data, 9}; msg.deallocator = DefaultMessageDeleter; sender.Send(msg, 0); sender.Finalize(); } static bool start_server() { sleep(5); std::ifstream t("addr.txt"); std::string ip_addr( (std::istreambuf_iterator(t)), std::istreambuf_iterator()); t.close(); SocketReceiver receiver(kQueueSize, kThreadNum); receiver.Wait(ip_addr.c_str(), 1); Message msg; EXPECT_EQ(receiver.RecvFrom(&msg, 0), REMOVE_SUCCESS); receiver.Finalize(); return string("123456789") == string(msg.data, msg.size); } #endif ================================================ FILE: tests/cpp/string_test.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file string_test.cc * @brief Test String Common */ #include #include #include #include "../src/rpc/network/common.h" using dgl::network::SplitStringUsing; using dgl::network::SStringPrintf; using dgl::network::StringAppendF; using dgl::network::StringPrintf; TEST(SplitStringTest, SplitStringUsingCompoundDelim) { std::string full(" apple \torange "); std::vector subs; SplitStringUsing(full, " \t", &subs); EXPECT_EQ(subs.size(), 2); EXPECT_EQ(subs[0], std::string("apple")); EXPECT_EQ(subs[1], std::string("orange")); } TEST(SplitStringTest, testSplitStringUsingSingleDelim) { std::string full(" apple orange "); std::vector subs; SplitStringUsing(full, " ", &subs); EXPECT_EQ(subs.size(), 2); EXPECT_EQ(subs[0], std::string("apple")); EXPECT_EQ(subs[1], std::string("orange")); } TEST(SplitStringTest, testSplitingNoDelimString) { std::string full("apple"); std::vector subs; SplitStringUsing(full, " ", &subs); EXPECT_EQ(subs.size(), 1); EXPECT_EQ(subs[0], std::string("apple")); } TEST(StringPrintf, normal) { using std::string; EXPECT_EQ(StringPrintf("%d", 1), string("1")); string target; SStringPrintf(&target, "%d", 1); EXPECT_EQ(target, string("1")); StringAppendF(&target, "%d", 2); EXPECT_EQ(target, string("12")); } ================================================ FILE: tests/cpp/test_aten.cc ================================================ #include #include #include "./common.h" using namespace dgl; using namespace dgl::runtime; TEST(ArrayTest, TestCreate) { IdArray a = aten::NewIdArray(100, CTX, 32); ASSERT_EQ(a->dtype.bits, 32); ASSERT_EQ(a->shape[0], 100); a = aten::NewIdArray(0); ASSERT_EQ(a->shape[0], 0); std::vector vec = {2, 94, 232, 30}; a = aten::VecToIdArray(vec, 32); ASSERT_EQ(Len(a), vec.size()); ASSERT_EQ(a->dtype.bits, 32); for (int i = 0; i < Len(a); ++i) { ASSERT_EQ(Ptr(a)[i], vec[i]); } a = aten::VecToIdArray(std::vector()); ASSERT_EQ(Len(a), 0); }; void _TestRange(DGLContext ctx) { IdArray a = aten::Range(10, 10, 64, ctx); ASSERT_EQ(Len(a), 0); a = aten::Range(10, 20, 32, ctx); ASSERT_EQ(Len(a), 10); ASSERT_EQ(a->dtype.bits, 32); a = a.CopyTo(CPU); for (int i = 0; i < 10; ++i) ASSERT_EQ(Ptr(a)[i], i + 10); } TEST(ArrayTest, TestRange) { _TestRange(CPU); #ifdef DGL_USE_CUDA _TestRange(GPU); #endif }; TEST(ArrayTest, TestFull) { IdArray a = aten::Full(-100, 0, 32, CTX); ASSERT_EQ(Len(a), 0); a = aten::Full(-100, 13, 64, CTX); ASSERT_EQ(Len(a), 13); ASSERT_EQ(a->dtype.bits, 64); for (int i = 0; i < 13; ++i) ASSERT_EQ(Ptr(a)[i], -100); }; TEST(ArrayTest, TestClone) { IdArray a = aten::NewIdArray(0); IdArray b = aten::Clone(a); ASSERT_EQ(Len(b), 0); a = aten::Range(0, 10, 32, CTX); b = aten::Clone(a); for (int i = 0; i < 10; ++i) { ASSERT_EQ(PI32(b)[i], i); } PI32(b)[0] = -1; for (int i = 0; i < 10; ++i) { ASSERT_EQ(PI32(a)[i], i); } }; void _TestNumBits(DGLContext ctx) { IdArray a = aten::Range(0, 10, 32, ctx); a = aten::AsNumBits(a, 64); ASSERT_EQ(a->dtype.bits, 64); a = a.CopyTo(CPU); for (int i = 0; i < 10; ++i) ASSERT_EQ(PI64(a)[i], i); } TEST(ArrayTest, TestAsNumBits) { _TestNumBits(CPU); #ifdef DGL_USE_CUDA _TestNumBits(GPU); #endif }; template void _TestArith(DGLContext ctx) { const int N = 100; IdArray a = aten::Full(-10, N, sizeof(IDX) * 8, ctx); IdArray b = aten::Full(7, N, sizeof(IDX) * 8, ctx); IdArray c = a + b; c = c.CopyTo(CPU); for (int i = 0; i < N; ++i) ASSERT_EQ(Ptr(c)[i], -3); c = a - b; c = c.CopyTo(CPU); for (int i = 0; i < N; ++i) ASSERT_EQ(Ptr(c)[i], -17); c = a * b; c = c.CopyTo(CPU); for (int i = 0; i < N; ++i) ASSERT_EQ(Ptr(c)[i], -70); c = a / b; c = c.CopyTo(CPU); for (int i = 0; i < N; ++i) ASSERT_EQ(Ptr(c)[i], -1); c = -a; c = c.CopyTo(CPU); for (int i = 0; i < N; ++i) ASSERT_EQ(Ptr(c)[i], 10); c = (-a) % b; c = c.CopyTo(CPU); for (int i = 0; i < N; ++i) ASSERT_EQ(Ptr(c)[i], 3); const int val = -3; c = aten::Add(a, val); c = c.CopyTo(CPU); for (int i = 0; i < N; ++i) ASSERT_EQ(Ptr(c)[i], -13); c = aten::Sub(a, val); c = c.CopyTo(CPU); for (int i = 0; i < N; ++i) ASSERT_EQ(Ptr(c)[i], -7); c = aten::Mul(a, val); c = c.CopyTo(CPU); for (int i = 0; i < N; ++i) ASSERT_EQ(Ptr(c)[i], 30); c = aten::Div(a, val); c = c.CopyTo(CPU); for (int i = 0; i < N; ++i) ASSERT_EQ(Ptr(c)[i], 3); c = b % 3; c = c.CopyTo(CPU); for (int i = 0; i < N; ++i) ASSERT_EQ(Ptr(c)[i], 1); c = aten::Add(val, b); c = c.CopyTo(CPU); for (int i = 0; i < N; ++i) ASSERT_EQ(Ptr(c)[i], 4); c = aten::Sub(val, b); c = c.CopyTo(CPU); for (int i = 0; i < N; ++i) ASSERT_EQ(Ptr(c)[i], -10); c = aten::Mul(val, b); c = c.CopyTo(CPU); for (int i = 0; i < N; ++i) ASSERT_EQ(Ptr(c)[i], -21); c = aten::Div(val, b); c = c.CopyTo(CPU); for (int i = 0; i < N; ++i) ASSERT_EQ(Ptr(c)[i], 0); c = 3 % b; c = c.CopyTo(CPU); for (int i = 0; i < N; ++i) ASSERT_EQ(Ptr(c)[i], 3); a = aten::Range(0, N, sizeof(IDX) * 8, ctx); c = a < 50; c = c.CopyTo(CPU); for (int i = 0; i < N; ++i) ASSERT_EQ(Ptr(c)[i], (int)(i < 50)); c = a > 50; c = c.CopyTo(CPU); for (int i = 0; i < N; ++i) ASSERT_EQ(Ptr(c)[i], (int)(i > 50)); c = a >= 50; c = c.CopyTo(CPU); for (int i = 0; i < N; ++i) ASSERT_EQ(Ptr(c)[i], (int)(i >= 50)); c = a <= 50; c = c.CopyTo(CPU); for (int i = 0; i < N; ++i) ASSERT_EQ(Ptr(c)[i], (int)(i <= 50)); c = a == 50; c = c.CopyTo(CPU); for (int i = 0; i < N; ++i) ASSERT_EQ(Ptr(c)[i], (int)(i == 50)); c = a != 50; c = c.CopyTo(CPU); for (int i = 0; i < N; ++i) ASSERT_EQ(Ptr(c)[i], (int)(i != 50)); } TEST(ArrayTest, Arith) { _TestArith(CPU); _TestArith(CPU); #ifdef DGL_USE_CUDA _TestArith(GPU); _TestArith(GPU); #endif }; template void _TestHStack(DGLContext ctx) { IdArray a = aten::Range(0, 100, sizeof(IDX) * 8, ctx); IdArray b = aten::Range(100, 200, sizeof(IDX) * 8, ctx); IdArray c = aten::HStack(a, b).CopyTo(aten::CPU); ASSERT_EQ(c->ndim, 1); ASSERT_EQ(c->shape[0], 200); for (int i = 0; i < 200; ++i) ASSERT_EQ(Ptr(c)[i], i); } TEST(ArrayTest, HStack) { _TestHStack(CPU); _TestHStack(CPU); #ifdef DGL_USE_CUDA _TestHStack(GPU); _TestHStack(GPU); #endif } template void _TestIndexSelect(DGLContext ctx) { IdArray a = aten::Range(0, 100, sizeof(IDX) * 8, ctx); ASSERT_EQ(aten::IndexSelect(a, 50), 50); ASSERT_TRUE(ArrayEQ( aten::IndexSelect(a, 10, 20), aten::Range(10, 20, sizeof(IDX) * 8, ctx))); IdArray b = aten::VecToIdArray(std::vector({0, 20, 10}), sizeof(IDX) * 8, ctx); IdArray c = aten::IndexSelect(a, b); ASSERT_TRUE(ArrayEQ(b, c)); } TEST(ArrayTest, TestIndexSelect) { _TestIndexSelect(CPU); _TestIndexSelect(CPU); #ifdef DGL_USE_CUDA _TestIndexSelect(GPU); _TestIndexSelect(GPU); #endif } template void _TestRelabel_(DGLContext ctx) { IdArray a = aten::VecToIdArray(std::vector({0, 20, 10}), sizeof(IDX) * 8, ctx); IdArray b = aten::VecToIdArray(std::vector({20, 5, 6}), sizeof(IDX) * 8, ctx); IdArray c = aten::Relabel_({a, b}); IdArray ta = aten::VecToIdArray(std::vector({0, 1, 2}), sizeof(IDX) * 8, ctx); IdArray tb = aten::VecToIdArray(std::vector({1, 3, 4}), sizeof(IDX) * 8, ctx); IdArray tc = aten::VecToIdArray( std::vector({0, 20, 10, 5, 6}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(a, ta)); ASSERT_TRUE(ArrayEQ(b, tb)); ASSERT_TRUE(ArrayEQ(c, tc)); } TEST(ArrayTest, TestRelabel_) { _TestRelabel_(CPU); _TestRelabel_(CPU); #ifdef DGL_USE_CUDA _TestRelabel_(GPU); _TestRelabel_(GPU); #endif } template void _TestConcat(DGLContext ctx) { IdArray a = aten::VecToIdArray(std::vector({1, 2, 3}), sizeof(IDX) * 8, CTX); IdArray b = aten::VecToIdArray(std::vector({4, 5, 6}), sizeof(IDX) * 8, CTX); IdArray tc = aten::VecToIdArray( std::vector({1, 2, 3, 4, 5, 6}), sizeof(IDX) * 8, CTX); IdArray c = aten::Concat(std::vector{a, b}); ASSERT_TRUE(ArrayEQ(c, tc)); IdArray d = aten::Concat(std::vector{a, b, c}); IdArray td = aten::VecToIdArray( std::vector({1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6}), sizeof(IDX) * 8, CTX); ASSERT_TRUE(ArrayEQ(d, td)); } template void _TestToSimpleCsr(DGLContext ctx) { /** * A = [[0, 0, 0, 0], * [1, 0, 0, 1], * [1, 1, 1, 1], * [3, 2, 2, 3], * [2, 0, 0, 2]] * * B = CSRToSimple(A) * B = [[0, 0, 0, 0], * [1, 0, 0, 1], * [1, 1, 1, 1], * [1, 1, 1, 1], * [1, 0, 0, 1]] */ IdArray a_indptr = aten::VecToIdArray( std::vector({0, 0, 2, 6, 16, 20}), sizeof(IdType) * 8, CTX); IdArray a_indices = aten::VecToIdArray( std::vector( {0, 3, 0, 1, 2, 3, 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 0, 0, 3, 3}), sizeof(IdType) * 8, CTX); IdArray b_indptr = aten::VecToIdArray( std::vector({0, 0, 2, 6, 10, 12}), sizeof(IdType) * 8, CTX); IdArray b_indices = aten::VecToIdArray( std::vector({0, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 3}), sizeof(IdType) * 8, CTX); IdArray cnt = aten::VecToIdArray( std::vector({1, 1, 1, 1, 1, 1, 3, 2, 2, 3, 2, 2}), sizeof(IdType) * 8, CTX); IdArray map = aten::VecToIdArray( std::vector( {0, 1, 2, 3, 4, 5, 6, 6, 6, 7, 7, 8, 8, 9, 9, 9, 10, 10, 11, 11}), sizeof(IdType) * 8, CTX); const aten::CSRMatrix &csr_a = aten::CSRMatrix(5, 4, a_indptr, a_indices, aten::NullArray(), true); auto ret = CSRToSimple(csr_a); aten::CSRMatrix csr_b = std::get<0>(ret); IdArray ecnt = std::get<1>(ret); IdArray emap = std::get<2>(ret); ASSERT_EQ(csr_b.num_rows, 5); ASSERT_EQ(csr_b.num_cols, 4); ASSERT_TRUE(ArrayEQ(csr_b.indptr, b_indptr)); ASSERT_TRUE(ArrayEQ(csr_b.indices, b_indices)); ASSERT_TRUE(ArrayEQ(ecnt, cnt)); ASSERT_TRUE(ArrayEQ(emap, map)); ASSERT_TRUE(csr_b.sorted); // a not sorted a_indices = aten::VecToIdArray( std::vector( {0, 3, 0, 1, 2, 3, 3, 0, 0, 1, 1, 2, 2, 3, 3, 0, 0, 3, 0, 3}), sizeof(IdType) * 8, CTX); map = aten::VecToIdArray( std::vector( {0, 1, 2, 3, 4, 5, 9, 6, 6, 7, 7, 8, 8, 9, 9, 6, 10, 11, 10, 11}), sizeof(IdType) * 8, CTX); const aten::CSRMatrix &csr_a2 = aten::CSRMatrix(5, 4, a_indptr, a_indices, aten::NullArray(), false); ret = CSRToSimple(csr_a2); csr_b = std::get<0>(ret); ecnt = std::get<1>(ret); emap = std::get<2>(ret); ASSERT_EQ(csr_b.num_rows, 5); ASSERT_EQ(csr_b.num_cols, 4); ASSERT_TRUE(ArrayEQ(csr_b.indptr, b_indptr)); ASSERT_TRUE(ArrayEQ(csr_b.indices, b_indices)); ASSERT_TRUE(ArrayEQ(ecnt, cnt)); ASSERT_TRUE(ArrayEQ(emap, map)); ASSERT_TRUE(csr_b.sorted); } TEST(MatrixTest, TestToSimpleCsr) { _TestToSimpleCsr(CPU); _TestToSimpleCsr(CPU); } template void _TestToSimpleCoo(DGLContext ctx) { /** * A = [[0, 0, 0, 0], * [1, 0, 0, 1], * [1, 1, 1, 1], * [3, 2, 2, 3], * [2, 0, 0, 2]] * * B = CSRToSimple(A) * B = [[0, 0, 0, 0], * [1, 0, 0, 1], * [1, 1, 1, 1], * [1, 1, 1, 1], * [1, 0, 0, 1]] */ IdArray a_row = aten::VecToIdArray( std::vector( {1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4}), sizeof(IdType) * 8, CTX); IdArray a_col = aten::VecToIdArray( std::vector( {0, 3, 0, 1, 2, 3, 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 0, 0, 3, 3}), sizeof(IdType) * 8, CTX); IdArray b_row = aten::VecToIdArray( std::vector({1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4}), sizeof(IdType) * 8, CTX); IdArray b_col = aten::VecToIdArray( std::vector({0, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 3}), sizeof(IdType) * 8, CTX); IdArray cnt = aten::VecToIdArray( std::vector({1, 1, 1, 1, 1, 1, 3, 2, 2, 3, 2, 2}), sizeof(IdType) * 8, CTX); IdArray map = aten::VecToIdArray( std::vector( {0, 1, 2, 3, 4, 5, 6, 6, 6, 7, 7, 8, 8, 9, 9, 9, 10, 10, 11, 11}), sizeof(IdType) * 8, CTX); const aten::COOMatrix &coo_a = aten::COOMatrix(5, 4, a_row, a_col, aten::NullArray(), true, true); auto ret = COOToSimple(coo_a); aten::COOMatrix coo_b = std::get<0>(ret); IdArray ecnt = std::get<1>(ret); IdArray emap = std::get<2>(ret); ASSERT_EQ(coo_b.num_rows, 5); ASSERT_EQ(coo_b.num_cols, 4); ASSERT_TRUE(ArrayEQ(coo_b.row, b_row)); ASSERT_TRUE(ArrayEQ(coo_b.col, b_col)); ASSERT_TRUE(ArrayEQ(ecnt, cnt)); ASSERT_TRUE(ArrayEQ(emap, map)); ASSERT_FALSE(COOHasData(coo_b)); ASSERT_TRUE(coo_b.row_sorted); ASSERT_TRUE(coo_b.col_sorted); // a not sorted a_row = aten::VecToIdArray( std::vector( {1, 2, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4}), sizeof(IdType) * 8, CTX); a_col = aten::VecToIdArray( std::vector( {0, 0, 3, 1, 2, 3, 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 0, 3, 0, 3}), sizeof(IdType) * 8, CTX); map = aten::VecToIdArray( std::vector( {0, 2, 1, 3, 4, 5, 6, 6, 6, 7, 7, 8, 8, 9, 9, 9, 10, 11, 10, 11}), sizeof(IdType) * 8, CTX); const aten::COOMatrix &coo_a2 = aten::COOMatrix(5, 4, a_row, a_col, aten::NullArray(), false, false); ret = COOToSimple(coo_a2); coo_b = std::get<0>(ret); ecnt = std::get<1>(ret); emap = std::get<2>(ret); ASSERT_EQ(coo_b.num_rows, 5); ASSERT_EQ(coo_b.num_cols, 4); ASSERT_TRUE(ArrayEQ(coo_b.row, b_row)); ASSERT_TRUE(ArrayEQ(coo_b.col, b_col)); ASSERT_TRUE(ArrayEQ(ecnt, cnt)); ASSERT_TRUE(ArrayEQ(emap, map)); ASSERT_FALSE(COOHasData(coo_b)); ASSERT_TRUE(coo_b.row_sorted); ASSERT_TRUE(coo_b.col_sorted); } TEST(MatrixTest, TestToSimpleCoo) { _TestToSimpleCoo(CPU); _TestToSimpleCoo(CPU); } template void _TestDisjointUnionPartitionCoo(DGLContext ctx) { /** * A = [[0, 0, 1], * [1, 0, 1], * [0, 1, 0]] * * B = [[1, 1, 0], * [0, 1, 0]] * * C = [[1]] * * AB = [[0, 0, 1, 0, 0, 0], * [1, 0, 1, 0, 0, 0], * [0, 1, 0, 0, 0, 0], * [0, 0, 0, 1, 1, 0], * [0, 0, 0, 0, 1, 0]] * * ABC = [[0, 0, 1, 0, 0, 0, 0], * [1, 0, 1, 0, 0, 0, 0], * [0, 1, 0, 0, 0, 0, 0], * [0, 0, 0, 1, 1, 0, 0], * [0, 0, 0, 0, 1, 0, 0], * [0, 0, 0, 0, 0, 0, 1]] */ IdArray a_row = aten::VecToIdArray( std::vector({0, 1, 1, 2}), sizeof(IdType) * 8, CTX); IdArray a_col = aten::VecToIdArray( std::vector({2, 0, 2, 1}), sizeof(IdType) * 8, CTX); IdArray b_row = aten::VecToIdArray( std::vector({0, 0, 1}), sizeof(IdType) * 8, CTX); IdArray b_col = aten::VecToIdArray( std::vector({0, 1, 1}), sizeof(IdType) * 8, CTX); IdArray b_data = aten::VecToIdArray( std::vector({2, 0, 1}), sizeof(IdType) * 8, CTX); IdArray c_row = aten::VecToIdArray(std::vector({0}), sizeof(IdType) * 8, CTX); IdArray c_col = aten::VecToIdArray(std::vector({0}), sizeof(IdType) * 8, CTX); IdArray ab_row = aten::VecToIdArray( std::vector({0, 1, 1, 2, 3, 3, 4}), sizeof(IdType) * 8, CTX); IdArray ab_col = aten::VecToIdArray( std::vector({2, 0, 2, 1, 3, 4, 4}), sizeof(IdType) * 8, CTX); IdArray ab_data = aten::VecToIdArray( std::vector({0, 1, 2, 3, 6, 4, 5}), sizeof(IdType) * 8, CTX); IdArray abc_row = aten::VecToIdArray( std::vector({0, 1, 1, 2, 3, 3, 4, 5}), sizeof(IdType) * 8, CTX); IdArray abc_col = aten::VecToIdArray( std::vector({2, 0, 2, 1, 3, 4, 4, 6}), sizeof(IdType) * 8, CTX); IdArray abc_data = aten::VecToIdArray( std::vector({0, 1, 2, 3, 6, 4, 5, 7}), sizeof(IdType) * 8, CTX); const aten::COOMatrix &coo_a = aten::COOMatrix(3, 3, a_row, a_col, aten::NullArray(), true, false); const aten::COOMatrix &coo_b = aten::COOMatrix(2, 3, b_row, b_col, b_data, true, true); const aten::COOMatrix &coo_c = aten::COOMatrix(1, 1, c_row, c_col, aten::NullArray(), true, true); const std::vector coos_ab({coo_a, coo_b}); const aten::COOMatrix &coo_ab = aten::DisjointUnionCoo(coos_ab); ASSERT_EQ(coo_ab.num_rows, 5); ASSERT_EQ(coo_ab.num_cols, 6); ASSERT_TRUE(ArrayEQ(coo_ab.row, ab_row)); ASSERT_TRUE(ArrayEQ(coo_ab.col, ab_col)); ASSERT_TRUE(ArrayEQ(coo_ab.data, ab_data)); ASSERT_TRUE(coo_ab.row_sorted); ASSERT_FALSE(coo_ab.col_sorted); const std::vector edge_cumsum({0, 4, 7}); const std::vector src_vertex_cumsum({0, 3, 5}); const std::vector dst_vertex_cumsum({0, 3, 6}); const std::vector &p_coos = aten::DisjointPartitionCooBySizes( coo_ab, 2, edge_cumsum, src_vertex_cumsum, dst_vertex_cumsum); ASSERT_EQ(p_coos[0].num_rows, coo_a.num_rows); ASSERT_EQ(p_coos[0].num_cols, coo_a.num_cols); ASSERT_EQ(p_coos[1].num_rows, coo_b.num_rows); ASSERT_EQ(p_coos[1].num_cols, coo_b.num_cols); ASSERT_TRUE(ArrayEQ(p_coos[0].row, coo_a.row)); ASSERT_TRUE(ArrayEQ(p_coos[0].col, coo_a.col)); ASSERT_TRUE(ArrayEQ(p_coos[1].row, coo_b.row)); ASSERT_TRUE(ArrayEQ(p_coos[1].col, coo_b.col)); ASSERT_TRUE(ArrayEQ(p_coos[1].data, coo_b.data)); ASSERT_TRUE(p_coos[0].row_sorted); ASSERT_FALSE(p_coos[0].col_sorted); ASSERT_TRUE(p_coos[1].row_sorted); ASSERT_FALSE(p_coos[1].col_sorted); const std::vector coos_abc({coo_a, coo_b, coo_c}); const aten::COOMatrix &coo_abc = aten::DisjointUnionCoo(coos_abc); ASSERT_EQ(coo_abc.num_rows, 6); ASSERT_EQ(coo_abc.num_cols, 7); ASSERT_TRUE(ArrayEQ(coo_abc.row, abc_row)); ASSERT_TRUE(ArrayEQ(coo_abc.col, abc_col)); ASSERT_TRUE(ArrayEQ(coo_abc.data, abc_data)); ASSERT_TRUE(coo_abc.row_sorted); ASSERT_FALSE(coo_abc.col_sorted); const std::vector edge_cumsum_abc({0, 4, 7, 8}); const std::vector src_vertex_cumsum_abc({0, 3, 5, 6}); const std::vector dst_vertex_cumsum_abc({0, 3, 6, 7}); const std::vector &p_coos_abc = aten::DisjointPartitionCooBySizes( coo_abc, 3, edge_cumsum_abc, src_vertex_cumsum_abc, dst_vertex_cumsum_abc); ASSERT_EQ(p_coos_abc[0].num_rows, coo_a.num_rows); ASSERT_EQ(p_coos_abc[0].num_cols, coo_a.num_cols); ASSERT_EQ(p_coos_abc[1].num_rows, coo_b.num_rows); ASSERT_EQ(p_coos_abc[1].num_cols, coo_b.num_cols); ASSERT_EQ(p_coos_abc[2].num_rows, coo_c.num_rows); ASSERT_EQ(p_coos_abc[2].num_cols, coo_c.num_cols); ASSERT_TRUE(ArrayEQ(p_coos_abc[0].row, coo_a.row)); ASSERT_TRUE(ArrayEQ(p_coos_abc[0].col, coo_a.col)); ASSERT_TRUE(ArrayEQ(p_coos_abc[1].row, coo_b.row)); ASSERT_TRUE(ArrayEQ(p_coos_abc[1].col, coo_b.col)); ASSERT_TRUE(ArrayEQ(p_coos_abc[1].data, coo_b.data)); ASSERT_TRUE(ArrayEQ(p_coos_abc[2].row, coo_c.row)); ASSERT_TRUE(ArrayEQ(p_coos_abc[2].col, coo_c.col)); ASSERT_TRUE(p_coos_abc[0].row_sorted); ASSERT_FALSE(p_coos_abc[0].col_sorted); ASSERT_TRUE(p_coos_abc[1].row_sorted); ASSERT_FALSE(p_coos_abc[1].col_sorted); ASSERT_TRUE(p_coos_abc[2].row_sorted); ASSERT_FALSE(p_coos_abc[2].col_sorted); } TEST(DisjointUnionTest, TestDisjointUnionPartitionCoo) { _TestDisjointUnionPartitionCoo(CPU); _TestDisjointUnionPartitionCoo(CPU); #ifdef DGL_USE_CUDA _TestDisjointUnionPartitionCoo(GPU); _TestDisjointUnionPartitionCoo(GPU); #endif } template void _TestDisjointUnionPartitionCsr(DGLContext ctx) { /** * A = [[0, 0, 1], * [1, 0, 1], * [0, 1, 0]] * * B = [[1, 1, 0], * [0, 1, 0]] * * C = [[1]] * * BC = [[1, 1, 0, 0], * [0, 1, 0, 0], * [0, 0, 0, 1]], * * ABC = [[0, 0, 1, 0, 0, 0, 0], * [1, 0, 1, 0, 0, 0, 0], * [0, 1, 0, 0, 0, 0, 0], * [0, 0, 0, 1, 1, 0, 0], * [0, 0, 0, 0, 1, 0, 0], * [0, 0, 0, 0, 0, 0, 1]] */ IdArray a_indptr = aten::VecToIdArray( std::vector({0, 1, 3, 4}), sizeof(IdType) * 8, CTX); IdArray a_indices = aten::VecToIdArray( std::vector({2, 0, 2, 1}), sizeof(IdType) * 8, CTX); IdArray b_indptr = aten::VecToIdArray( std::vector({0, 2, 3}), sizeof(IdType) * 8, CTX); IdArray b_indices = aten::VecToIdArray( std::vector({0, 1, 1}), sizeof(IdType) * 8, CTX); IdArray b_data = aten::VecToIdArray( std::vector({2, 0, 1}), sizeof(IdType) * 8, CTX); IdArray c_indptr = aten::VecToIdArray(std::vector({0, 1}), sizeof(IdType) * 8, CTX); IdArray c_indices = aten::VecToIdArray(std::vector({0}), sizeof(IdType) * 8, CTX); IdArray bc_indptr = aten::VecToIdArray( std::vector({0, 2, 3, 4}), sizeof(IdType) * 8, CTX); IdArray bc_indices = aten::VecToIdArray( std::vector({0, 1, 1, 3}), sizeof(IdType) * 8, CTX); IdArray bc_data = aten::VecToIdArray( std::vector({2, 0, 1, 3}), sizeof(IdType) * 8, CTX); IdArray abc_indptr = aten::VecToIdArray( std::vector({0, 1, 3, 4, 6, 7, 8}), sizeof(IdType) * 8, CTX); IdArray abc_indices = aten::VecToIdArray( std::vector({2, 0, 2, 1, 3, 4, 4, 6}), sizeof(IdType) * 8, CTX); IdArray abc_data = aten::VecToIdArray( std::vector({0, 1, 2, 3, 6, 4, 5, 7}), sizeof(IdType) * 8, CTX); const aten::CSRMatrix &csr_a = aten::CSRMatrix(3, 3, a_indptr, a_indices, aten::NullArray(), false); const aten::CSRMatrix &csr_b = aten::CSRMatrix(2, 3, b_indptr, b_indices, b_data, true); const aten::CSRMatrix &csr_c = aten::CSRMatrix(1, 1, c_indptr, c_indices, aten::NullArray(), true); const std::vector csrs_bc({csr_b, csr_c}); const aten::CSRMatrix &csr_bc = aten::DisjointUnionCsr(csrs_bc); ASSERT_EQ(csr_bc.num_rows, 3); ASSERT_EQ(csr_bc.num_cols, 4); ASSERT_TRUE(ArrayEQ(csr_bc.indptr, bc_indptr)); ASSERT_TRUE(ArrayEQ(csr_bc.indices, bc_indices)); ASSERT_TRUE(ArrayEQ(csr_bc.data, bc_data)); ASSERT_TRUE(csr_bc.sorted); const std::vector edge_cumsum({0, 3, 4}); const std::vector src_vertex_cumsum({0, 2, 3}); const std::vector dst_vertex_cumsum({0, 3, 4}); const std::vector &p_csrs = aten::DisjointPartitionCsrBySizes( csr_bc, 2, edge_cumsum, src_vertex_cumsum, dst_vertex_cumsum); ASSERT_EQ(p_csrs[0].num_rows, csr_b.num_rows); ASSERT_EQ(p_csrs[0].num_cols, csr_b.num_cols); ASSERT_EQ(p_csrs[1].num_rows, csr_c.num_rows); ASSERT_EQ(p_csrs[1].num_cols, csr_c.num_cols); ASSERT_TRUE(ArrayEQ(p_csrs[0].indptr, csr_b.indptr)); ASSERT_TRUE(ArrayEQ(p_csrs[0].indices, csr_b.indices)); ASSERT_TRUE(ArrayEQ(p_csrs[0].data, csr_b.data)); ASSERT_TRUE(ArrayEQ(p_csrs[1].indptr, csr_c.indptr)); ASSERT_TRUE(ArrayEQ(p_csrs[1].indices, csr_c.indices)); ASSERT_TRUE(p_csrs[0].sorted); ASSERT_TRUE(p_csrs[1].sorted); const std::vector csrs_abc({csr_a, csr_b, csr_c}); const aten::CSRMatrix &csr_abc = aten::DisjointUnionCsr(csrs_abc); ASSERT_EQ(csr_abc.num_rows, 6); ASSERT_EQ(csr_abc.num_cols, 7); ASSERT_TRUE(ArrayEQ(csr_abc.indptr, abc_indptr)); ASSERT_TRUE(ArrayEQ(csr_abc.indices, abc_indices)); ASSERT_TRUE(ArrayEQ(csr_abc.data, abc_data)); ASSERT_FALSE(csr_abc.sorted); const std::vector edge_cumsum_abc({0, 4, 7, 8}); const std::vector src_vertex_cumsum_abc({0, 3, 5, 6}); const std::vector dst_vertex_cumsum_abc({0, 3, 6, 7}); const std::vector &p_csrs_abc = aten::DisjointPartitionCsrBySizes( csr_abc, 3, edge_cumsum_abc, src_vertex_cumsum_abc, dst_vertex_cumsum_abc); ASSERT_EQ(p_csrs_abc[0].num_rows, csr_a.num_rows); ASSERT_EQ(p_csrs_abc[0].num_cols, csr_a.num_cols); ASSERT_EQ(p_csrs_abc[1].num_rows, csr_b.num_rows); ASSERT_EQ(p_csrs_abc[1].num_cols, csr_b.num_cols); ASSERT_EQ(p_csrs_abc[2].num_rows, csr_c.num_rows); ASSERT_EQ(p_csrs_abc[2].num_cols, csr_c.num_cols); ASSERT_TRUE(ArrayEQ(p_csrs_abc[0].indptr, csr_a.indptr)); ASSERT_TRUE(ArrayEQ(p_csrs_abc[0].indices, csr_a.indices)); ASSERT_TRUE(ArrayEQ(p_csrs_abc[1].indptr, csr_b.indptr)); ASSERT_TRUE(ArrayEQ(p_csrs_abc[1].indices, csr_b.indices)); ASSERT_TRUE(ArrayEQ(p_csrs_abc[1].data, csr_b.data)); ASSERT_TRUE(ArrayEQ(p_csrs_abc[2].indptr, csr_c.indptr)); ASSERT_TRUE(ArrayEQ(p_csrs_abc[2].indices, csr_c.indices)); ASSERT_FALSE(p_csrs_abc[0].sorted); ASSERT_FALSE(p_csrs_abc[1].sorted); ASSERT_FALSE(p_csrs_abc[2].sorted); } TEST(DisjointUnionTest, TestDisjointUnionPartitionCsr) { _TestDisjointUnionPartitionCsr(CPU); _TestDisjointUnionPartitionCsr(CPU); #ifdef DGL_USE_CUDA _TestDisjointUnionPartitionCsr(GPU); _TestDisjointUnionPartitionCsr(GPU); #endif } template void _TestSliceContiguousChunkCoo(DGLContext ctx) { /** * A = [[1, 0, 0, 0], * [0, 0, 1, 0], * [0, 0, 0, 0]] * * B = [[1, 0, 0], * [0, 0, 1]] * * C = [[0]] * */ IdArray a_row = aten::VecToIdArray(std::vector({0, 1}), sizeof(IdType) * 8, CTX); IdArray a_col = aten::VecToIdArray(std::vector({0, 2}), sizeof(IdType) * 8, CTX); const aten::COOMatrix &coo_a = aten::COOMatrix(3, 4, a_row, a_col, aten::NullArray(), true, false); IdArray b_row = aten::VecToIdArray(std::vector({0, 1}), sizeof(IdType) * 8, CTX); IdArray b_col = aten::VecToIdArray(std::vector({0, 2}), sizeof(IdType) * 8, CTX); const aten::COOMatrix &coo_b_raw = aten::COOMatrix(2, 3, b_row, b_col, aten::NullArray(), true, false); const std::vector edge_range_b({0, 2}); const std::vector src_vertex_range_b({0, 2}); const std::vector dst_vertex_range_b({0, 3}); const aten::COOMatrix &coo_b = aten::COOSliceContiguousChunk( coo_a, edge_range_b, src_vertex_range_b, dst_vertex_range_b); ASSERT_EQ(coo_b_raw.num_rows, coo_b.num_rows); ASSERT_EQ(coo_b_raw.num_cols, coo_b.num_cols); ASSERT_TRUE(ArrayEQ(coo_b_raw.row, coo_b.row)); ASSERT_TRUE(ArrayEQ(coo_b_raw.col, coo_b.col)); ASSERT_TRUE(coo_b.row_sorted); ASSERT_FALSE(coo_b.col_sorted); IdArray c_row = aten::VecToIdArray(std::vector({}), sizeof(IdType) * 8, CTX); IdArray c_col = aten::VecToIdArray(std::vector({}), sizeof(IdType) * 8, CTX); const aten::COOMatrix &coo_c_raw = aten::COOMatrix(1, 1, c_row, c_col, aten::NullArray(), true, false); const std::vector edge_range_c({2, 2}); const std::vector src_vertex_range_c({2, 3}); const std::vector dst_vertex_range_c({3, 4}); const aten::COOMatrix &coo_c = aten::COOSliceContiguousChunk( coo_a, edge_range_c, src_vertex_range_c, dst_vertex_range_c); ASSERT_EQ(coo_c_raw.num_rows, coo_c.num_rows); ASSERT_EQ(coo_c_raw.num_cols, coo_c.num_cols); ASSERT_TRUE(ArrayEQ(coo_c.row, c_row)); ASSERT_TRUE(ArrayEQ(coo_c.col, c_col)); ASSERT_TRUE(coo_c.row_sorted); ASSERT_FALSE(coo_c.col_sorted); } TEST(SliceContiguousChunk, TestSliceContiguousChunkCoo) { _TestSliceContiguousChunkCoo(CPU); _TestSliceContiguousChunkCoo(CPU); #ifdef DGL_USE_CUDA _TestSliceContiguousChunkCoo(GPU); _TestSliceContiguousChunkCoo(GPU); #endif } template void _TestSliceContiguousChunkCsr(DGLContext ctx) { /** * A = [[1, 0, 0, 0], * [0, 0, 1, 0], * [0, 0, 0, 0]] * * B = [[1, 0, 0], * [0, 0, 1]] * * C = [[0]] * */ IdArray a_indptr = aten::VecToIdArray( std::vector({0, 1, 2, 2}), sizeof(IdType) * 8, CTX); IdArray a_indices = aten::VecToIdArray(std::vector({0, 2}), sizeof(IdType) * 8, CTX); const aten::CSRMatrix &csr_a = aten::CSRMatrix(3, 4, a_indptr, a_indices, aten::NullArray(), false); IdArray b_indptr = aten::VecToIdArray( std::vector({0, 1, 2}), sizeof(IdType) * 8, CTX); IdArray b_indices = aten::VecToIdArray(std::vector({0, 2}), sizeof(IdType) * 8, CTX); const aten::CSRMatrix &csr_b_raw = aten::CSRMatrix(2, 3, b_indptr, b_indices, aten::NullArray(), false); const std::vector edge_range_b({0, 2}); const std::vector src_vertex_range_b({0, 2}); const std::vector dst_vertex_range_b({0, 3}); const aten::CSRMatrix &csr_b = aten::CSRSliceContiguousChunk( csr_a, edge_range_b, src_vertex_range_b, dst_vertex_range_b); ASSERT_EQ(csr_b.num_rows, csr_b_raw.num_rows); ASSERT_EQ(csr_b.num_cols, csr_b_raw.num_cols); ASSERT_TRUE(ArrayEQ(csr_b.indptr, csr_b_raw.indptr)); ASSERT_TRUE(ArrayEQ(csr_b.indices, csr_b_raw.indices)); ASSERT_FALSE(csr_b.sorted); const std::vector edge_range_c({2, 2}); const std::vector src_vertex_range_c({2, 3}); const std::vector dst_vertex_range_c({3, 4}); const aten::CSRMatrix &csr_c = aten::CSRSliceContiguousChunk( csr_a, edge_range_c, src_vertex_range_c, dst_vertex_range_c); int64_t indptr_len = src_vertex_range_c[1] - src_vertex_range_c[0] + 1; IdArray c_indptr = aten::Full(0, indptr_len, sizeof(IdType) * 8, CTX); IdArray c_indices = aten::VecToIdArray(std::vector({}), sizeof(IdType) * 8, CTX); const aten::CSRMatrix &csr_c_raw = aten::CSRMatrix(1, 1, c_indptr, c_indices, aten::NullArray(), false); ASSERT_EQ(csr_c.num_rows, csr_c_raw.num_rows); ASSERT_EQ(csr_c.num_cols, csr_c_raw.num_cols); ASSERT_TRUE(ArrayEQ(csr_c.indptr, c_indptr)); ASSERT_TRUE(ArrayEQ(csr_c.indices, c_indices)); ASSERT_FALSE(csr_c.sorted); } TEST(SliceContiguousChunk, TestSliceContiguousChunkCsr) { _TestSliceContiguousChunkCsr(CPU); _TestSliceContiguousChunkCsr(CPU); #ifdef DGL_USE_CUDA _TestSliceContiguousChunkCsr(GPU); _TestSliceContiguousChunkCsr(GPU); #endif } template void _TestMatrixUnionCsr(DGLContext ctx) { /** * A = [[0, 0, 0, 0], * [0, 0, 0, 0], * [0, 1, 0, 0], * [1, 1, 1, 1], * [0, 1, 1, 0], * [1, 0, 0, 1]] * * B = [[0, 0, 0, 0], * [1, 0, 0, 1], * [0, 0, 1, 0], * [1, 0, 0, 1], * [1, 0, 0, 1]] * [1, 0, 0, 1]] * * C = UnionCsr({A, B}) * * C = [[0, 0, 0, 0], * [1, 0, 0, 1], * [0, 1, 1, 0], * [2, 1, 1, 2], * [1, 1, 1, 1]] * [2, 0, 0, 2]] * * D = [[1, 0, 0, 0], * [0, 0, 0, 0], * [0, 0, 0, 0], * [0, 0, 0, 0], * [0, 0, 0, 0], * [1, 0, 0, 1]] * * C = UnionCsr({A, B, D}) * * C = [[1, 0, 0, 0], * [1, 0, 0, 1], * [0, 1, 1, 0], * [2, 1, 1, 2], * [1, 1, 1, 1]] * [3, 0, 0, 3]] */ IdArray a_indptr = aten::VecToIdArray( std::vector({0, 0, 0, 1, 5, 7, 9}), sizeof(IdType) * 8, CTX); IdArray a_indices = aten::VecToIdArray( std::vector({1, 0, 1, 2, 3, 1, 2, 0, 3}), sizeof(IdType) * 8, CTX); IdArray b_indptr = aten::VecToIdArray( std::vector({0, 0, 2, 3, 5, 7, 9}), sizeof(IdType) * 8, CTX); IdArray b_indices = aten::VecToIdArray( std::vector({0, 3, 2, 0, 3, 0, 3, 0, 3}), sizeof(IdType) * 8, CTX); IdArray c_indptr = aten::VecToIdArray( std::vector({0, 0, 2, 4, 10, 14, 18}), sizeof(IdType) * 8, CTX); IdArray c_indices = aten::VecToIdArray( std::vector( {0, 3, 1, 2, 0, 0, 1, 2, 3, 3, 0, 1, 2, 3, 0, 0, 3, 3}), sizeof(IdType) * 8, CTX); IdArray c_data = aten::VecToIdArray( std::vector( {9, 10, 0, 11, 1, 12, 2, 3, 4, 13, 14, 5, 6, 15, 7, 16, 8, 17}), sizeof(IdType) * 8, CTX); const aten::CSRMatrix &csr_a = aten::CSRMatrix(6, 4, a_indptr, a_indices, aten::NullArray(), true); const aten::CSRMatrix &csr_b = aten::CSRMatrix(6, 4, b_indptr, b_indices, aten::NullArray(), true); const aten::CSRMatrix &csr_aUb = aten::UnionCsr({csr_a, csr_b}); ASSERT_EQ(csr_aUb.num_rows, 6); ASSERT_EQ(csr_aUb.num_cols, 4); ASSERT_TRUE(ArrayEQ(csr_aUb.indptr, c_indptr)); ASSERT_TRUE(ArrayEQ(csr_aUb.indices, c_indices)); ASSERT_TRUE(ArrayEQ(csr_aUb.data, c_data)); ASSERT_TRUE(csr_aUb.sorted); IdArray a_data = aten::VecToIdArray( std::vector({8, 7, 6, 5, 4, 3, 2, 1, 0}), sizeof(IdType) * 8, CTX); c_data = aten::VecToIdArray( std::vector( {9, 10, 8, 11, 7, 12, 6, 5, 4, 13, 14, 3, 2, 15, 1, 16, 0, 17}), sizeof(IdType) * 8, CTX); const aten::CSRMatrix &csr_ad = aten::CSRMatrix(6, 4, a_indptr, a_indices, a_data, true); const aten::CSRMatrix &csr_adUb = aten::UnionCsr({csr_ad, csr_b}); ASSERT_EQ(csr_adUb.num_rows, 6); ASSERT_EQ(csr_adUb.num_cols, 4); ASSERT_TRUE(ArrayEQ(csr_adUb.indptr, c_indptr)); ASSERT_TRUE(ArrayEQ(csr_adUb.indices, c_indices)); ASSERT_TRUE(ArrayEQ(csr_adUb.data, c_data)); ASSERT_TRUE(csr_adUb.sorted); IdArray b_indices2 = aten::VecToIdArray( std::vector({0, 3, 2, 0, 3, 3, 0, 0, 3}), sizeof(IdType) * 8, CTX); c_indices = aten::VecToIdArray( std::vector( {0, 3, 1, 2, 0, 1, 2, 3, 0, 3, 1, 2, 3, 0, 0, 3, 0, 3}), sizeof(IdType) * 8, CTX); c_data = aten::VecToIdArray( std::vector( {9, 10, 0, 11, 1, 2, 3, 4, 12, 13, 5, 6, 14, 15, 7, 8, 16, 17}), sizeof(IdType) * 8, CTX); const aten::CSRMatrix &csr_b2 = aten::CSRMatrix(6, 4, b_indptr, b_indices2, aten::NullArray(), false); const aten::CSRMatrix &csr_aUb2 = aten::UnionCsr({csr_a, csr_b2}); ASSERT_EQ(csr_aUb2.num_rows, 6); ASSERT_EQ(csr_aUb2.num_cols, 4); ASSERT_TRUE(ArrayEQ(csr_aUb2.indptr, c_indptr)); ASSERT_TRUE(ArrayEQ(csr_aUb2.indices, c_indices)); ASSERT_TRUE(ArrayEQ(csr_aUb2.data, c_data)); ASSERT_FALSE(csr_aUb2.sorted); IdArray a_indices2 = aten::VecToIdArray( std::vector({1, 3, 2, 1, 0, 1, 2, 0, 3}), sizeof(IdType) * 8, CTX); c_indices = aten::VecToIdArray( std::vector( {0, 3, 1, 2, 3, 2, 1, 0, 0, 3, 1, 2, 0, 3, 0, 3, 0, 3}), sizeof(IdType) * 8, CTX); const aten::CSRMatrix &csr_a2 = aten::CSRMatrix(6, 4, a_indptr, a_indices2, aten::NullArray(), false); const aten::CSRMatrix &csr_aUb3 = aten::UnionCsr({csr_a2, csr_b}); ASSERT_EQ(csr_aUb3.num_rows, 6); ASSERT_EQ(csr_aUb3.num_cols, 4); ASSERT_TRUE(ArrayEQ(csr_aUb3.indptr, c_indptr)); ASSERT_TRUE(ArrayEQ(csr_aUb3.indices, c_indices)); ASSERT_TRUE(ArrayEQ(csr_aUb3.data, c_data)); ASSERT_FALSE(csr_aUb3.sorted); c_indices = aten::VecToIdArray( std::vector( {0, 3, 1, 2, 3, 2, 1, 0, 0, 3, 1, 2, 3, 0, 0, 3, 0, 3}), sizeof(IdType) * 8, CTX); const aten::CSRMatrix &csr_aUb4 = aten::UnionCsr({csr_a2, csr_b2}); ASSERT_EQ(csr_aUb4.num_rows, 6); ASSERT_EQ(csr_aUb4.num_cols, 4); ASSERT_TRUE(ArrayEQ(csr_aUb4.indptr, c_indptr)); ASSERT_TRUE(ArrayEQ(csr_aUb4.indices, c_indices)); ASSERT_TRUE(ArrayEQ(csr_aUb4.data, c_data)); ASSERT_FALSE(csr_aUb4.sorted); IdArray d_indptr = aten::VecToIdArray( std::vector({0, 1, 1, 1, 1, 1, 3}), sizeof(IdType) * 8, CTX); IdArray d_indices = aten::VecToIdArray( std::vector({0, 0, 3}), sizeof(IdType) * 8, CTX); c_indptr = aten::VecToIdArray( std::vector({0, 1, 3, 5, 11, 15, 21}), sizeof(IdType) * 8, CTX); c_indices = aten::VecToIdArray( std::vector( {0, 0, 3, 1, 2, 0, 0, 1, 2, 3, 3, 0, 1, 2, 3, 0, 0, 0, 3, 3, 3}), sizeof(IdType) * 8, CTX); c_data = aten::VecToIdArray( std::vector({18, 9, 10, 8, 11, 7, 12, 6, 5, 4, 13, 14, 3, 2, 15, 1, 16, 19, 0, 17, 20}), sizeof(IdType) * 8, CTX); const aten::CSRMatrix &csr_d = aten::CSRMatrix(6, 4, d_indptr, d_indices, aten::NullArray(), true); const aten::CSRMatrix &csr_aUbUd = aten::UnionCsr({csr_ad, csr_b, csr_d}); ASSERT_EQ(csr_aUbUd.num_rows, 6); ASSERT_EQ(csr_aUbUd.num_cols, 4); ASSERT_TRUE(ArrayEQ(csr_aUbUd.indptr, c_indptr)); ASSERT_TRUE(ArrayEQ(csr_aUbUd.indices, c_indices)); ASSERT_TRUE(ArrayEQ(csr_aUbUd.data, c_data)); ASSERT_TRUE(csr_aUbUd.sorted); c_indices = aten::VecToIdArray( std::vector( {0, 0, 3, 1, 2, 3, 2, 1, 0, 0, 3, 1, 2, 3, 0, 0, 3, 0, 3, 0, 3}), sizeof(IdType) * 8, CTX); c_data = aten::VecToIdArray( std::vector({18, 9, 10, 0, 11, 1, 2, 3, 4, 12, 13, 5, 6, 14, 15, 7, 8, 16, 17, 19, 20}), sizeof(IdType) * 8, CTX); const aten::CSRMatrix &csr_aUbUd2 = aten::UnionCsr({csr_a2, csr_b2, csr_d}); ASSERT_EQ(csr_aUbUd2.num_rows, 6); ASSERT_EQ(csr_aUbUd2.num_cols, 4); ASSERT_TRUE(ArrayEQ(csr_aUbUd2.indptr, c_indptr)); ASSERT_TRUE(ArrayEQ(csr_aUbUd2.indices, c_indices)); ASSERT_TRUE(ArrayEQ(csr_aUbUd2.data, c_data)); ASSERT_FALSE(csr_aUbUd2.sorted); } TEST(MatrixUnionTest, TestMatrixUnionCsr) { _TestMatrixUnionCsr(CPU); _TestMatrixUnionCsr(CPU); } template void _TestMatrixUnionCoo(DGLContext ctx) { /** * A = [[0, 0, 0, 0], * [0, 0, 0, 0], * [0, 1, 0, 0], * [1, 1, 1, 1], * [0, 1, 1, 0], * [1, 0, 0, 1]] * * B = [[0, 0, 0, 0], * [1, 0, 0, 1], * [0, 0, 1, 0], * [1, 0, 0, 1], * [1, 0, 0, 1]] * [1, 0, 0, 1]] * * C = UnionCsr({A, B}) * * C = [[0, 0, 0, 0], * [1, 0, 0, 1], * [0, 1, 1, 0], * [2, 1, 1, 2], * [1, 1, 1, 1]] * [2, 0, 0, 2]] * * D = [[1, 0, 0, 0], * [0, 0, 0, 0], * [0, 0, 0, 0], * [0, 0, 0, 0], * [0, 0, 0, 0], * [1, 0, 0, 1]] * * C = UnionCsr({A, B, D}) * * C = [[1, 0, 0, 0], * [1, 0, 0, 1], * [0, 1, 1, 0], * [2, 1, 1, 2], * [1, 1, 1, 1]] * [3, 0, 0, 3]] */ IdArray a_row = aten::VecToIdArray( std::vector({2, 3, 3, 3, 3, 4, 4, 5, 5}), sizeof(IdType) * 8, CTX); IdArray a_col = aten::VecToIdArray( std::vector({1, 0, 1, 2, 3, 1, 2, 0, 3}), sizeof(IdType) * 8, CTX); IdArray b_row = aten::VecToIdArray( std::vector({1, 1, 2, 3, 3, 4, 4, 5, 5}), sizeof(IdType) * 8, CTX); IdArray b_col = aten::VecToIdArray( std::vector({0, 3, 2, 0, 3, 0, 3, 0, 3}), sizeof(IdType) * 8, CTX); IdArray c_row = aten::VecToIdArray( std::vector( {2, 3, 3, 3, 3, 4, 4, 5, 5, 1, 1, 2, 3, 3, 4, 4, 5, 5}), sizeof(IdType) * 8, CTX); IdArray c_col = aten::VecToIdArray( std::vector( {1, 0, 1, 2, 3, 1, 2, 0, 3, 0, 3, 2, 0, 3, 0, 3, 0, 3}), sizeof(IdType) * 8, CTX); const aten::COOMatrix &coo_a = aten::COOMatrix(6, 4, a_row, a_col, aten::NullArray(), true, true); const aten::COOMatrix &coo_b = aten::COOMatrix(6, 4, b_row, b_col, aten::NullArray(), true, true); const std::vector coos_ab({coo_a, coo_b}); const aten::COOMatrix &coo_ab = aten::UnionCoo(coos_ab); ASSERT_EQ(coo_ab.num_rows, 6); ASSERT_EQ(coo_ab.num_cols, 4); ASSERT_TRUE(ArrayEQ(coo_ab.row, c_row)); ASSERT_TRUE(ArrayEQ(coo_ab.col, c_col)); ASSERT_FALSE(COOHasData(coo_ab)); ASSERT_FALSE(coo_ab.row_sorted); ASSERT_FALSE(coo_ab.col_sorted); IdArray a_data = aten::VecToIdArray( std::vector({2, 1, 0, 3, 4, 5, 6, 7, 8}), sizeof(IdType) * 8, CTX); IdArray c_data = aten::VecToIdArray( std::vector( {2, 1, 0, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}), sizeof(IdType) * 8, CTX); const aten::COOMatrix &coo_a2 = aten::COOMatrix(6, 4, a_row, a_col, a_data, true, true); const std::vector coos_ab2({coo_a2, coo_b}); const aten::COOMatrix &coo_ab2 = aten::UnionCoo(coos_ab2); ASSERT_EQ(coo_ab2.num_rows, 6); ASSERT_EQ(coo_ab2.num_cols, 4); ASSERT_TRUE(ArrayEQ(coo_ab2.row, c_row)); ASSERT_TRUE(ArrayEQ(coo_ab2.col, c_col)); ASSERT_TRUE(COOHasData(coo_ab2)); ASSERT_TRUE(ArrayEQ(coo_ab2.data, c_data)); ASSERT_FALSE(coo_ab2.row_sorted); ASSERT_FALSE(coo_ab2.col_sorted); IdArray b_data = aten::VecToIdArray( std::vector({0, 1, 2, 3, 4, 5, 6, 8, 7}), sizeof(IdType) * 8, CTX); c_data = aten::VecToIdArray( std::vector( {2, 1, 0, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 16}), sizeof(IdType) * 8, CTX); const aten::COOMatrix &coo_b2 = aten::COOMatrix(6, 4, b_row, b_col, b_data, true, true); const std::vector coos_ab3({coo_a2, coo_b2}); const aten::COOMatrix &coo_ab3 = aten::UnionCoo(coos_ab3); ASSERT_EQ(coo_ab3.num_rows, 6); ASSERT_EQ(coo_ab3.num_cols, 4); ASSERT_TRUE(ArrayEQ(coo_ab3.row, c_row)); ASSERT_TRUE(ArrayEQ(coo_ab3.col, c_col)); ASSERT_TRUE(COOHasData(coo_ab3)); ASSERT_TRUE(ArrayEQ(coo_ab3.data, c_data)); ASSERT_FALSE(coo_ab3.row_sorted); ASSERT_FALSE(coo_ab3.col_sorted); c_data = aten::VecToIdArray( std::vector( {2, 1, 0, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 16}), sizeof(IdType) * 8, CTX); const std::vector coos_ab4({coo_a2, coo_b2}); const aten::COOMatrix &coo_ab4 = aten::UnionCoo(coos_ab4); ASSERT_EQ(coo_ab4.num_rows, 6); ASSERT_EQ(coo_ab4.num_cols, 4); ASSERT_TRUE(ArrayEQ(coo_ab4.row, c_row)); ASSERT_TRUE(ArrayEQ(coo_ab4.col, c_col)); ASSERT_TRUE(COOHasData(coo_ab4)); ASSERT_TRUE(ArrayEQ(coo_ab4.data, c_data)); ASSERT_FALSE(coo_ab4.row_sorted); ASSERT_FALSE(coo_ab4.col_sorted); IdArray d_row = aten::VecToIdArray( std::vector({0, 5, 5}), sizeof(IdType) * 8, CTX); IdArray d_col = aten::VecToIdArray( std::vector({0, 0, 3}), sizeof(IdType) * 8, CTX); c_row = aten::VecToIdArray( std::vector( {2, 3, 3, 3, 3, 4, 4, 5, 5, 1, 1, 2, 3, 3, 4, 4, 5, 5, 0, 5, 5}), sizeof(IdType) * 8, CTX); c_col = aten::VecToIdArray( std::vector( {1, 0, 1, 2, 3, 1, 2, 0, 3, 0, 3, 2, 0, 3, 0, 3, 0, 3, 0, 0, 3}), sizeof(IdType) * 8, CTX); const aten::COOMatrix &coo_d = aten::COOMatrix(6, 4, d_row, d_col, aten::NullArray(), true, true); const aten::COOMatrix &csr_aUbUd = aten::UnionCoo({coo_a, coo_b, coo_d}); ASSERT_EQ(csr_aUbUd.num_rows, 6); ASSERT_EQ(csr_aUbUd.num_cols, 4); ASSERT_TRUE(ArrayEQ(csr_aUbUd.row, c_row)); ASSERT_TRUE(ArrayEQ(csr_aUbUd.col, c_col)); ASSERT_FALSE(COOHasData(csr_aUbUd)); ASSERT_FALSE(csr_aUbUd.row_sorted); ASSERT_FALSE(csr_aUbUd.col_sorted); c_data = aten::VecToIdArray( std::vector({2, 1, 0, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 16, 18, 19, 20}), sizeof(IdType) * 8, CTX); const aten::COOMatrix &csr_aUbUd2 = aten::UnionCoo({coo_a2, coo_b2, coo_d}); ASSERT_EQ(csr_aUbUd2.num_rows, 6); ASSERT_EQ(csr_aUbUd2.num_cols, 4); ASSERT_TRUE(ArrayEQ(csr_aUbUd2.row, c_row)); ASSERT_TRUE(ArrayEQ(csr_aUbUd2.col, c_col)); ASSERT_TRUE(COOHasData(csr_aUbUd2)); ASSERT_TRUE(ArrayEQ(csr_aUbUd2.data, c_data)); ASSERT_FALSE(csr_aUbUd2.row_sorted); ASSERT_FALSE(csr_aUbUd2.col_sorted); } TEST(MatrixUnionTest, TestMatrixUnionCoo) { _TestMatrixUnionCoo(CPU); _TestMatrixUnionCoo(CPU); } template void _TestCumSum(DGLContext ctx) { IdArray a = aten::VecToIdArray( std::vector({8, 6, 7, 5, 3, 0, 9}), sizeof(IDX) * 8, ctx); { IdArray tb = aten::VecToIdArray( std::vector({8, 14, 21, 26, 29, 29, 38}), sizeof(IDX) * 8, ctx); IdArray b = aten::CumSum(a); ASSERT_TRUE(ArrayEQ(b, tb)); } { IdArray tb = aten::VecToIdArray( std::vector({0, 8, 14, 21, 26, 29, 29, 38}), sizeof(IDX) * 8, ctx); IdArray b = aten::CumSum(a, true); ASSERT_TRUE(ArrayEQ(b, tb)); } a = aten::VecToIdArray(std::vector({}), sizeof(IDX) * 8, ctx); { IdArray tb = aten::VecToIdArray(std::vector({}), sizeof(IDX) * 8, ctx); IdArray b = aten::CumSum(a); ASSERT_TRUE(ArrayEQ(b, tb)); } { IdArray tb = aten::VecToIdArray(std::vector({}), sizeof(IDX) * 8, ctx); IdArray b = aten::CumSum(a); ASSERT_TRUE(ArrayEQ(b, tb)); } } TEST(ArrayTest, CumSum) { _TestCumSum(CPU); _TestCumSum(CPU); #ifdef DGL_USE_CUDA _TestCumSum(GPU); _TestCumSum(GPU); #endif } template void _TestScatter_(DGLContext ctx) { IdArray out = aten::Full(1, 10, 8 * sizeof(IDX), ctx); IdArray idx = aten::VecToIdArray(std::vector({2, 3, 9}), sizeof(IDX) * 8, ctx); IdArray val = aten::VecToIdArray(std::vector({-20, 30, 90}), sizeof(IDX) * 8, ctx); aten::Scatter_(idx, val, out); IdArray tout = aten::VecToIdArray( std::vector({1, 1, -20, 30, 1, 1, 1, 1, 1, 90}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(out, tout)); } TEST(ArrayTest, Scatter_) { _TestScatter_(CPU); _TestScatter_(CPU); _TestScatter_(CPU); _TestScatter_(CPU); #ifdef DGL_USE_CUDA _TestScatter_(GPU); _TestScatter_(GPU); _TestScatter_(GPU); _TestScatter_(GPU); #endif } template void _TestNonZero(DGLContext ctx) { auto val = aten::VecToIdArray( std::vector({0, 1, 2, 0, -10, 0, 0, 23}), sizeof(IDX) * 8, ctx); auto idx = aten::NonZero(val); auto tidx = aten::VecToIdArray(std::vector({1, 2, 4, 7}), 64, ctx); ASSERT_TRUE(ArrayEQ(idx, tidx)); val = aten::VecToIdArray(std::vector({}), sizeof(IDX) * 8, ctx); idx = aten::NonZero(val); tidx = aten::VecToIdArray(std::vector({}), 64, ctx); ASSERT_TRUE(ArrayEQ(idx, tidx)); val = aten::VecToIdArray(std::vector({0, 0, 0, 0}), sizeof(IDX) * 8, ctx); idx = aten::NonZero(val); tidx = aten::VecToIdArray(std::vector({}), 64, ctx); ASSERT_TRUE(ArrayEQ(idx, tidx)); val = aten::Full(1, 3, sizeof(IDX) * 8, ctx); idx = aten::NonZero(val); tidx = aten::VecToIdArray(std::vector({0, 1, 2}), 64, ctx); ASSERT_TRUE(ArrayEQ(idx, tidx)); } TEST(ArrayTest, NonZero) { _TestNonZero(CPU); _TestNonZero(CPU); #ifdef DGL_USE_CUDA _TestNonZero(GPU); _TestNonZero(GPU); #endif } template void _TestLineGraphCOO(DGLContext ctx) { /** * A = [[0, 0, 1, 0], * [1, 0, 1, 0], * [1, 1, 0, 0], * [0, 0, 0, 1]] * row: 0 1 1 2 2 3 * col: 2 0 2 0 1 3 * ID: 0 1 2 3 4 5 * * B = COOLineGraph(A, backtracking=False) * * B = [[0, 0, 0, 0, 1, 0], * [1, 0, 0, 0, 0, 0], * [0, 0, 0, 1, 0, 0], * [0, 0, 0, 0, 0, 0], * [0, 1, 0, 0, 0, 0], * [0, 0, 0, 0, 0, 0]] * * C = COOLineGraph(A, backtracking=True) * * C = [[0, 0, 0, 1, 1, 0], * [1, 0, 0, 0, 0, 0], * [0, 0, 0, 1, 1, 0], * [1, 0, 0, 0, 0, 0], * [0, 1, 1, 0, 0, 0], * [0, 0, 0, 0, 0, 0]] */ IdArray a_row = aten::VecToIdArray( std::vector({0, 1, 1, 2, 2, 3}), sizeof(IdType) * 8, ctx); IdArray a_col = aten::VecToIdArray( std::vector({2, 0, 2, 0, 1, 3}), sizeof(IdType) * 8, ctx); IdArray b_row = aten::VecToIdArray( std::vector({0, 1, 2, 4}), sizeof(IdType) * 8, ctx); IdArray b_col = aten::VecToIdArray( std::vector({4, 0, 3, 1}), sizeof(IdType) * 8, ctx); IdArray c_row = aten::VecToIdArray( std::vector({0, 0, 1, 2, 2, 3, 4, 4}), sizeof(IdType) * 8, ctx); IdArray c_col = aten::VecToIdArray( std::vector({3, 4, 0, 3, 4, 0, 1, 2}), sizeof(IdType) * 8, ctx); const aten::COOMatrix &coo_a = aten::COOMatrix(4, 4, a_row, a_col, aten::NullArray(), true, false); const aten::COOMatrix &l_coo = COOLineGraph(coo_a, false); ASSERT_EQ(l_coo.num_rows, 6); ASSERT_EQ(l_coo.num_cols, 6); ASSERT_TRUE(ArrayEQ(l_coo.row, b_row)); ASSERT_TRUE(ArrayEQ(l_coo.col, b_col)); ASSERT_FALSE(l_coo.row_sorted); ASSERT_FALSE(l_coo.col_sorted); const aten::COOMatrix &l_coo2 = COOLineGraph(coo_a, true); ASSERT_EQ(l_coo2.num_rows, 6); ASSERT_EQ(l_coo2.num_cols, 6); ASSERT_TRUE(ArrayEQ(l_coo2.row, c_row)); ASSERT_TRUE(ArrayEQ(l_coo2.col, c_col)); ASSERT_FALSE(l_coo2.row_sorted); ASSERT_FALSE(l_coo2.col_sorted); IdArray a_data = aten::VecToIdArray( std::vector({4, 5, 0, 1, 2, 3}), sizeof(IdType) * 8, ctx); b_row = aten::VecToIdArray( std::vector({4, 5, 0, 2}), sizeof(IdType) * 8, ctx); b_col = aten::VecToIdArray( std::vector({2, 4, 1, 5}), sizeof(IdType) * 8, ctx); c_row = aten::VecToIdArray( std::vector({4, 4, 5, 0, 0, 1, 2, 2}), sizeof(IdType) * 8, ctx); c_col = aten::VecToIdArray( std::vector({1, 2, 4, 1, 2, 4, 5, 0}), sizeof(IdType) * 8, ctx); const aten::COOMatrix &coo_ad = aten::COOMatrix(4, 4, a_row, a_col, a_data, true, false); const aten::COOMatrix &ld_coo = COOLineGraph(coo_ad, false); ASSERT_EQ(ld_coo.num_rows, 6); ASSERT_EQ(ld_coo.num_cols, 6); ASSERT_TRUE(ArrayEQ(ld_coo.row, b_row)); ASSERT_TRUE(ArrayEQ(ld_coo.col, b_col)); ASSERT_FALSE(ld_coo.row_sorted); ASSERT_FALSE(ld_coo.col_sorted); const aten::COOMatrix &ld_coo2 = COOLineGraph(coo_ad, true); ASSERT_EQ(ld_coo2.num_rows, 6); ASSERT_EQ(ld_coo2.num_cols, 6); ASSERT_TRUE(ArrayEQ(ld_coo2.row, c_row)); ASSERT_TRUE(ArrayEQ(ld_coo2.col, c_col)); ASSERT_FALSE(ld_coo2.row_sorted); ASSERT_FALSE(ld_coo2.col_sorted); } TEST(LineGraphTest, LineGraphCOO) { _TestLineGraphCOO(CPU); _TestLineGraphCOO(CPU); } template void _TestSort(DGLContext ctx) { // case 1 IdArray a = aten::VecToIdArray( std::vector({8, 6, 7, 5, 3, 0, 9}), sizeof(IDX) * 8, ctx); IdArray sorted_a = aten::VecToIdArray( std::vector({0, 3, 5, 6, 7, 8, 9}), sizeof(IDX) * 8, ctx); IdArray sorted_idx = aten::VecToIdArray(std::vector({5, 4, 3, 1, 2, 0, 6}), 64, ctx); IdArray sorted, idx; std::tie(sorted, idx) = aten::Sort(a); ASSERT_TRUE(ArrayEQ(sorted, sorted_a)); ASSERT_TRUE(ArrayEQ(idx, sorted_idx)); // case 2: empty array a = aten::VecToIdArray(std::vector({}), sizeof(IDX) * 8, ctx); sorted_a = aten::VecToIdArray(std::vector({}), sizeof(IDX) * 8, ctx); sorted_idx = aten::VecToIdArray(std::vector({}), 64, ctx); std::tie(sorted, idx) = aten::Sort(a); ASSERT_TRUE(ArrayEQ(sorted, sorted_a)); ASSERT_TRUE(ArrayEQ(idx, sorted_idx)); // case 3: array with one element a = aten::VecToIdArray(std::vector({2}), sizeof(IDX) * 8, ctx); sorted_a = aten::VecToIdArray(std::vector({2}), sizeof(IDX) * 8, ctx); sorted_idx = aten::VecToIdArray(std::vector({0}), 64, ctx); std::tie(sorted, idx) = aten::Sort(a); ASSERT_TRUE(ArrayEQ(sorted, sorted_a)); ASSERT_TRUE(ArrayEQ(idx, sorted_idx)); } TEST(ArrayTest, Sort) { _TestSort(CPU); _TestSort(CPU); #ifdef DGL_USE_CUDA _TestSort(GPU); _TestSort(GPU); #endif } TEST(ArrayTest, BFloatCast) { for (int i = -100; i < 100; ++i) { float a = i; BFloat16 b = a; float a_casted = b; ASSERT_FLOAT_EQ(a, a_casted); } } ================================================ FILE: tests/cpp/test_concurrent_id_hash_map.cc ================================================ #include #include #include #include #include #include "../../src/array/cpu/concurrent_id_hash_map.h" #include "./common.h" using namespace dgl; using namespace dgl::runtime; using namespace dgl::aten; namespace { template size_t ConstructRandomSet( size_t size, IdType range, std::vector& id_vec) { id_vec.resize(size); std::srand(std::time(nullptr)); for (size_t i = 0; i < size; i++) { id_vec[i] = static_cast(std::rand() % range); } size_t num_seeds = size / 5 + 1; std::sort(id_vec.begin(), id_vec.begin() + num_seeds); return std::unique(id_vec.begin(), id_vec.begin() + num_seeds) - id_vec.begin(); } template void _TestIdMap() { std::vector id_vec; auto num_seeds = ConstructRandomSet(size, range, id_vec); std::set id_set(id_vec.begin(), id_vec.end()); IdArray ids = VecToIdArray(id_vec, sizeof(IdType) * 8, CTX); ConcurrentIdHashMap id_map; IdArray unique_ids = id_map.Init(ids, num_seeds); auto unique_num = static_cast(unique_ids->shape[0]); IdType* unique_id_data = unique_ids.Ptr(); EXPECT_EQ(id_set.size(), unique_num); parallel_for(0, num_seeds, 64, [&](int64_t s, int64_t e) { for (int64_t i = s; i < e; i++) { EXPECT_EQ(id_vec[i], unique_id_data[i]); } }); parallel_for(num_seeds, unique_num, 128, [&](int64_t s, int64_t e) { for (int64_t i = s; i < e; i++) { EXPECT_TRUE(id_set.find(unique_id_data[i]) != id_set.end()); } }); IdArray new_ids = id_map.MapIds(unique_ids); EXPECT_TRUE(new_ids.IsContiguous()); ids->shape[0] = num_seeds; IdArray new_seed_ids = id_map.MapIds(ids); EXPECT_TRUE(new_seed_ids.IsContiguous()); EXPECT_EQ(new_seed_ids.Ptr()[0], static_cast(0)); } TEST(ConcurrentIdHashMapTest, TestConcurrentIdHashMap) { _TestIdMap(); _TestIdMap(); _TestIdMap(); _TestIdMap(); _TestIdMap(); _TestIdMap(); _TestIdMap(); _TestIdMap(); } }; // namespace ================================================ FILE: tests/cpp/test_csrmm.cc ================================================ #include #include #include #include "../../src/array/cpu/array_utils.h" // PairHash #include "./common.h" using namespace dgl; using namespace dgl::runtime; namespace { // Unit tests: // CSRMM(A, B) == A_mm_B // CSRSum({A, C}) == A_plus_C // CSRMask(A, C) = A_mask_C template std::unordered_map, DType, aten::PairHash> COOToMap( aten::COOMatrix coo, NDArray weights) { std::unordered_map, DType, aten::PairHash> map; for (int64_t i = 0; i < coo.row->shape[0]; ++i) { IdType irow = aten::IndexSelect(coo.row, i); IdType icol = aten::IndexSelect(coo.col, i); IdType ieid = aten::COOHasData(coo) ? aten::IndexSelect(coo.data, i) : i; DType idata = aten::IndexSelect(weights, ieid); map.insert({{irow, icol}, idata}); } return map; } template bool CSRIsClose( aten::CSRMatrix A, aten::CSRMatrix B, NDArray A_weights, NDArray B_weights, DType rtol, DType atol) { auto Amap = COOToMap(CSRToCOO(A, false), A_weights); auto Bmap = COOToMap(CSRToCOO(B, false), B_weights); if (Amap.size() != Bmap.size()) return false; for (auto itA : Amap) { auto itB = Bmap.find(itA.first); if (itB == Bmap.end()) return false; if (fabs(itA.second - itB->second) >= rtol * fabs(itA.second) + atol) return false; } return true; } template std::pair CSR_A(DGLContext ctx = CTX) { // matrix([[0. , 0. , 1. , 0.7, 0. ], // [0. , 0. , 0.5, 0.+, 0. ], // [0.4, 0.7, 0. , 0.2, 0. ], // [0. , 0. , 0. , 0. , 0.2]]) // (0.+ indicates that the entry exists but the value is 0.) auto csr = aten::CSRMatrix( 4, 5, NDArray::FromVector(std::vector({0, 2, 4, 7, 8}), ctx), NDArray::FromVector(std::vector({2, 3, 2, 3, 0, 1, 3, 4}), ctx), NDArray::FromVector(std::vector({1, 0, 2, 3, 4, 5, 6, 7}), ctx)); auto weights = NDArray::FromVector( std::vector({0.7, 1.0, 0.5, 0.0, 0.4, 0.7, 0.2, 0.2}), ctx); return {csr, weights}; } template std::pair CSR_B(DGLContext ctx = CTX) { // matrix([[0. , 0.9, 0. , 0.6, 0. , 0.3], // [0. , 0. , 0. , 0. , 0. , 0.4], // [0.+, 0. , 0. , 0. , 0. , 0.9], // [0.8, 0.2, 0.3, 0.2, 0. , 0. ], // [0.2, 0.4, 0. , 0. , 0. , 0. ]]) // (0.+ indicates that the entry exists but the value is 0.) auto csr = aten::CSRMatrix( 5, 6, NDArray::FromVector(std::vector({0, 3, 4, 6, 10, 12}), ctx), NDArray::FromVector( std::vector({1, 3, 5, 5, 0, 5, 0, 1, 2, 3, 0, 1}), ctx)); auto weights = NDArray::FromVector( std::vector( {0.9, 0.6, 0.3, 0.4, 0.0, 0.9, 0.8, 0.2, 0.3, 0.2, 0.2, 0.4}), ctx); return {csr, weights}; } template std::pair CSR_C(DGLContext ctx = CTX) { // matrix([[0. , 0. , 0. , 0.2, 0. ], // [0. , 0. , 0. , 0.5, 0.4], // [0. , 0.2, 0. , 0.9, 0.2], // [0. , 1. , 0. , 0.7, 0. ]]) auto csr = aten::CSRMatrix( 4, 5, NDArray::FromVector(std::vector({0, 1, 3, 6, 8}), ctx), NDArray::FromVector(std::vector({3, 3, 4, 1, 3, 4, 1, 3}), ctx)); auto weights = NDArray::FromVector( std::vector({0.2, 0.5, 0.4, 0.2, 0.9, 0.2, 1., 0.7}), ctx); return {csr, weights}; } template std::pair CSR_A_mm_B(DGLContext ctx = CTX) { // matrix([[0.56, 0.14, 0.21, 0.14, 0. , 0.9 ], // [0.+ , 0.+ , 0.+ , 0.+ , 0. , 0.45], // [0.16, 0.4 , 0.06, 0.28, 0. , 0.4 ], // [0.04, 0.08, 0. , 0. , 0. , 0. ]]) // (0.+ indicates that the entry exists but the value is 0.) auto csr = aten::CSRMatrix( 4, 6, NDArray::FromVector(std::vector({0, 5, 10, 15, 17}), ctx), NDArray::FromVector( std::vector( {0, 1, 2, 3, 5, 0, 1, 2, 3, 5, 0, 1, 2, 3, 5, 0, 1}), ctx)); auto weights = NDArray::FromVector( std::vector( {0.56, 0.14, 0.21, 0.14, 0.9, 0., 0., 0., 0., 0.45, 0.16, 0.4, 0.06, 0.28, 0.4, 0.04, 0.08}), ctx); return {csr, weights}; } template std::pair CSR_A_plus_C(DGLContext ctx = CTX) { auto csr = aten::CSRMatrix( 4, 5, NDArray::FromVector(std::vector({0, 2, 5, 9, 12}), ctx), NDArray::FromVector( std::vector({2, 3, 2, 3, 4, 0, 1, 3, 4, 1, 3, 4}), ctx)); auto weights = NDArray::FromVector( std::vector( {1., 0.9, 0.5, 0.5, 0.4, 0.4, 0.9, 1.1, 0.2, 1., 0.7, 0.2}), ctx); return {csr, weights}; } template NDArray CSR_A_mask_C(DGLContext ctx = CTX) { return NDArray::FromVector( std::vector({0.7, 0.0, 0.0, 0.7, 0.2, 0.0, 0.0, 0.0}), ctx); } template void _TestCsrmm(DGLContext ctx = CTX) { auto A = CSR_A(ctx); auto B = CSR_B(ctx); auto A_mm_B = aten::CSRMM(A.first, A.second, B.first, B.second); auto A_mm_B2 = CSR_A_mm_B(ctx); bool result = CSRIsClose( A_mm_B.first, A_mm_B2.first, A_mm_B.second, A_mm_B2.second, 1e-4, 1e-4); ASSERT_TRUE(result); } template void _TestCsrsum(DGLContext ctx = CTX) { auto A = CSR_A(ctx); auto C = CSR_C(ctx); auto A_plus_C = aten::CSRSum({A.first, C.first}, {A.second, C.second}); auto A_plus_C2 = CSR_A_plus_C(ctx); bool result = CSRIsClose( A_plus_C.first, A_plus_C2.first, A_plus_C.second, A_plus_C2.second, 1e-4, 1e-4); ASSERT_TRUE(result); } template void _TestCsrmask(DGLContext ctx = CTX) { auto A = CSR_A(ctx); auto C = CSR_C(ctx); auto C_coo = CSRToCOO(C.first, false); auto A_mask_C = aten::CSRGetData(A.first, C_coo.row, C_coo.col, A.second, 0); auto A_mask_C2 = CSR_A_mask_C(ctx); ASSERT_TRUE(ArrayEQ(A_mask_C, A_mask_C2)); } TEST(CsrmmTest, TestCsrmm) { _TestCsrmm(CPU); _TestCsrmm(CPU); _TestCsrmm(CPU); _TestCsrmm(CPU); #ifdef DGL_USE_CUDA _TestCsrmm(GPU); _TestCsrmm(GPU); _TestCsrmm(GPU); _TestCsrmm(GPU); #endif } TEST(CsrmmTest, TestCsrsum) { _TestCsrsum(CPU); _TestCsrsum(CPU); _TestCsrsum(CPU); _TestCsrsum(CPU); #ifdef DGL_USE_CUDA _TestCsrsum(GPU); _TestCsrsum(GPU); _TestCsrsum(GPU); _TestCsrsum(GPU); #endif } TEST(CsrmmTest, TestCsrmask) { _TestCsrmask(CPU); _TestCsrmask(CPU); _TestCsrmask(CPU); _TestCsrmask(CPU); #ifdef DGL_USE_CUDA _TestCsrmask(GPU); _TestCsrmask(GPU); _TestCsrmask(GPU); _TestCsrmask(GPU); #endif } }; // namespace ================================================ FILE: tests/cpp/test_partition.cc ================================================ #include #include "../../src/partition/ndarray_partition.h" #include "./common.h" using namespace dgl; using namespace dgl::partition; template void _TestRemainder_GeneratePermutation() { const int64_t size = 160000; const int num_parts = 7; NDArrayPartitionRef part = CreatePartitionRemainderBased(size, num_parts); IdArray idxs = aten::Range(0, size / 10, sizeof(IdType) * 8, DGLContext{XPU, 0}); std::pair result = part->GeneratePermutation(idxs); // first part of result should be the permutation IdArray perm = result.first.CopyTo(DGLContext{kDGLCPU, 0}); ASSERT_TRUE(perm.Ptr() != nullptr); ASSERT_EQ(perm->shape[0], idxs->shape[0]); const IdType* const perm_cpu = static_cast(perm->data); // second part of result should be the counts IdArray counts = result.second.CopyTo(DGLContext{kDGLCPU, 0}); ASSERT_TRUE(counts.Ptr() != nullptr); ASSERT_EQ(counts->shape[0], num_parts); const int64_t* const counts_cpu = static_cast(counts->data); std::vector prefix(num_parts + 1, 0); for (int p = 0; p < num_parts; ++p) { prefix[p + 1] = prefix[p] + counts_cpu[p]; } ASSERT_EQ(prefix.back(), idxs->shape[0]); // copy original indexes to cpu idxs = idxs.CopyTo(DGLContext{kDGLCPU, 0}); const IdType* const idxs_cpu = static_cast(idxs->data); for (int p = 0; p < num_parts; ++p) { for (int64_t i = prefix[p]; i < prefix[p + 1]; ++i) { EXPECT_EQ(idxs_cpu[perm_cpu[i]] % num_parts, p); } } } template void _TestRemainder_MapToX() { const int64_t size = 160000; const int num_parts = 7; NDArrayPartitionRef part = CreatePartitionRemainderBased(size, num_parts); for (int part_id = 0; part_id < num_parts; ++part_id) { IdArray local = aten::Range( 0, part->PartSize(part_id), sizeof(IdType) * 8, DGLContext{XPU, 0}); IdArray global = part->MapToGlobal(local, part_id); IdArray act_local = part->MapToLocal(global).CopyTo(CPU); // every global index should have the same remainder as the part id ASSERT_EQ(global->shape[0], local->shape[0]); global = global.CopyTo(CPU); for (int64_t i = 0; i < global->shape[0]; ++i) { EXPECT_EQ(Ptr(global)[i] % num_parts, part_id) << "i=" << i << ", num_parts=" << num_parts << ", part_id=" << part_id; } // the remapped local indices to should match the original local = local.CopyTo(CPU); ASSERT_EQ(local->shape[0], act_local->shape[0]); for (int64_t i = 0; i < act_local->shape[0]; ++i) { EXPECT_EQ(Ptr(local)[i], Ptr(act_local)[i]); } } } TEST(PartitionTest, TestRemainderPartition) { #ifdef DGL_USE_CUDA _TestRemainder_GeneratePermutation(); _TestRemainder_GeneratePermutation(); _TestRemainder_MapToX(); _TestRemainder_MapToX(); #endif // CPU is not implemented } template int _FindPart(const INDEX idx, const RANGE* const range, const int num_parts) { for (int i = 0; i < num_parts; ++i) { if (range[i + 1] > idx) { return i; } } return -1; } template void _TestRange_GeneratePermutation() { const int64_t size = 160000; const int num_parts = 7; IdArray range = aten::NewIdArray( num_parts + 1, DGLContext{kDGLCPU, 0}, sizeof(IdType) * 8); for (int i = 0; i < num_parts; ++i) { range.Ptr()[i] = (size / num_parts) * i; } range.Ptr()[num_parts] = size; NDArrayPartitionRef part = CreatePartitionRangeBased( size, num_parts, range.CopyTo(DGLContext{XPU, 0})); IdArray idxs = aten::Range(0, size / 10, sizeof(IdType) * 8, DGLContext{XPU, 0}); std::pair result = part->GeneratePermutation(idxs); // first part of result should be the permutation IdArray perm = result.first.CopyTo(DGLContext{kDGLCPU, 0}); ASSERT_TRUE(perm.Ptr() != nullptr); ASSERT_EQ(perm->shape[0], idxs->shape[0]); const IdType* const perm_cpu = static_cast(perm->data); // second part of result should be the counts IdArray counts = result.second.CopyTo(DGLContext{kDGLCPU, 0}); ASSERT_TRUE(counts.Ptr() != nullptr); ASSERT_EQ(counts->shape[0], num_parts); const int64_t* const counts_cpu = static_cast(counts->data); std::vector prefix(num_parts + 1, 0); for (int p = 0; p < num_parts; ++p) { prefix[p + 1] = prefix[p] + counts_cpu[p]; } ASSERT_EQ(prefix.back(), idxs->shape[0]); // copy original indexes to cpu idxs = idxs.CopyTo(DGLContext{kDGLCPU, 0}); const IdType* const idxs_cpu = static_cast(idxs->data); for (int p = 0; p < num_parts; ++p) { for (int64_t i = prefix[p]; i < prefix[p + 1]; ++i) { EXPECT_EQ( _FindPart(idxs_cpu[perm_cpu[i]], range.Ptr(), num_parts), p); } } } template void _TestRange_MapToX() { const int64_t size = 160000; const int num_parts = 7; IdArray range = aten::NewIdArray( num_parts + 1, DGLContext{kDGLCPU, 0}, sizeof(IdType) * 8); for (int i = 0; i < num_parts; ++i) { Ptr(range)[i] = (size / num_parts) * i; } range.Ptr()[num_parts] = size; NDArrayPartitionRef part = CreatePartitionRangeBased( size, num_parts, range.CopyTo(DGLContext{XPU, 0})); for (int part_id = 0; part_id < num_parts; ++part_id) { IdArray local = aten::Range( 0, part->PartSize(part_id), sizeof(IdType) * 8, DGLContext{XPU, 0}); IdArray global = part->MapToGlobal(local, part_id); IdArray act_local = part->MapToLocal(global).CopyTo(CPU); ASSERT_EQ(global->shape[0], local->shape[0]); global = global.CopyTo(CPU); for (int64_t i = 0; i < global->shape[0]; ++i) { EXPECT_EQ( _FindPart(Ptr(global)[i], Ptr(range), num_parts), part_id) << "i=" << i << ", num_parts=" << num_parts << ", part_id=" << part_id << ", shape=" << global->shape[0]; } // the remapped local indices to should match the original local = local.CopyTo(CPU); ASSERT_EQ(local->shape[0], act_local->shape[0]); for (int64_t i = 0; i < act_local->shape[0]; ++i) { EXPECT_EQ(Ptr(local)[i], Ptr(act_local)[i]); } } } TEST(PartitionTest, TestRangePartition) { #ifdef DGL_USE_CUDA _TestRange_GeneratePermutation(); _TestRange_GeneratePermutation(); _TestRange_MapToX(); _TestRange_MapToX(); #endif // CPU is not implemented } ================================================ FILE: tests/cpp/test_rowwise.cc ================================================ #include #include #include #include #include "./common.h" using namespace dgl; using namespace dgl::runtime; using namespace dgl::aten; template using ETuple = std::tuple; template std::set> AllEdgeSet(bool has_data) { if (has_data) { std::set> eset; eset.insert(ETuple{0, 0, 2}); eset.insert(ETuple{0, 1, 3}); eset.insert(ETuple{1, 1, 0}); eset.insert(ETuple{3, 2, 1}); eset.insert(ETuple{3, 3, 4}); return eset; } else { std::set> eset; eset.insert(ETuple{0, 0, 0}); eset.insert(ETuple{0, 1, 1}); eset.insert(ETuple{1, 1, 2}); eset.insert(ETuple{3, 2, 3}); eset.insert(ETuple{3, 3, 4}); return eset; } } template std::set> AllEdgePerEtypeSet(bool has_data) { if (has_data) { std::set> eset; eset.insert(ETuple{0, 0, 0}); eset.insert(ETuple{0, 1, 1}); eset.insert(ETuple{0, 2, 4}); eset.insert(ETuple{0, 3, 6}); eset.insert(ETuple{3, 2, 5}); eset.insert(ETuple{3, 3, 3}); return eset; } else { std::set> eset; eset.insert(ETuple{0, 0, 0}); eset.insert(ETuple{0, 1, 1}); eset.insert(ETuple{0, 2, 2}); eset.insert(ETuple{0, 3, 3}); eset.insert(ETuple{3, 3, 5}); eset.insert(ETuple{3, 2, 6}); return eset; } } template std::set> ToEdgeSet(COOMatrix mat) { std::set> eset; Idx* row = static_cast(mat.row->data); Idx* col = static_cast(mat.col->data); Idx* data = static_cast(mat.data->data); for (int64_t i = 0; i < mat.row->shape[0]; ++i) { // std::cout << row[i] << " " << col[i] << " " << data[i] << std::endl; eset.emplace(row[i], col[i], data[i]); } return eset; } template void CheckSampledResult(COOMatrix mat, IdArray rows, bool has_data) { ASSERT_EQ(mat.num_rows, 4); ASSERT_EQ(mat.num_cols, 4); Idx* row = static_cast(mat.row->data); Idx* col = static_cast(mat.col->data); Idx* data = static_cast(mat.data->data); const auto& gt = AllEdgeSet(has_data); for (int64_t i = 0; i < mat.row->shape[0]; ++i) { ASSERT_TRUE(gt.count(std::make_tuple(row[i], col[i], data[i]))); ASSERT_TRUE(IsInArray(rows, row[i])); } } template void CheckSampledPerEtypeResult(COOMatrix mat, IdArray rows, bool has_data) { ASSERT_EQ(mat.num_rows, 4); ASSERT_EQ(mat.num_cols, 4); Idx* row = static_cast(mat.row->data); Idx* col = static_cast(mat.col->data); Idx* data = static_cast(mat.data->data); const auto& gt = AllEdgePerEtypeSet(has_data); for (int64_t i = 0; i < mat.row->shape[0]; ++i) { int64_t count = gt.count(std::make_tuple(row[i], col[i], data[i])); ASSERT_TRUE(count); ASSERT_TRUE(IsInArray(rows, row[i])); } } template CSRMatrix CSR(bool has_data) { IdArray indptr = NDArray::FromVector(std::vector({0, 2, 3, 3, 5})); IdArray indices = NDArray::FromVector(std::vector({0, 1, 1, 2, 3})); IdArray data = NDArray::FromVector(std::vector({2, 3, 0, 1, 4})); if (has_data) return CSRMatrix(4, 4, indptr, indices, data); else return CSRMatrix(4, 4, indptr, indices); } template COOMatrix COO(bool has_data) { IdArray row = NDArray::FromVector(std::vector({0, 0, 1, 3, 3})); IdArray col = NDArray::FromVector(std::vector({0, 1, 1, 2, 3})); IdArray data = NDArray::FromVector(std::vector({2, 3, 0, 1, 4})); if (has_data) return COOMatrix(4, 4, row, col, data); else return COOMatrix(4, 4, row, col); } template std::pair> CSREtypes(bool has_data) { IdArray indptr = NDArray::FromVector(std::vector({0, 4, 5, 5, 7})); IdArray indices = NDArray::FromVector(std::vector({0, 1, 2, 3, 1, 3, 2})); IdArray data = NDArray::FromVector(std::vector({0, 1, 4, 6, 2, 3, 5})); auto eid2etype_offsets = std::vector({0, 4, 5, 6, 7}); if (has_data) return {CSRMatrix(4, 4, indptr, indices, data), eid2etype_offsets}; else return {CSRMatrix(4, 4, indptr, indices), eid2etype_offsets}; } template std::pair> COOEtypes(bool has_data) { IdArray row = NDArray::FromVector(std::vector({0, 0, 0, 0, 1, 3, 3})); IdArray col = NDArray::FromVector(std::vector({0, 1, 2, 3, 1, 3, 2})); IdArray data = NDArray::FromVector(std::vector({0, 1, 4, 6, 2, 3, 5})); auto eid2etype_offsets = std::vector({0, 4, 5, 6, 7}); if (has_data) return {COOMatrix(4, 4, row, col, data), eid2etype_offsets}; else return {COOMatrix(4, 4, row, col), eid2etype_offsets}; } template void _TestCSRSampling(bool has_data) { auto mat = CSR(has_data); FloatArray prob = NDArray::FromVector(std::vector({.5, .5, .5, .5, .5})); IdArray rows = NDArray::FromVector(std::vector({0, 3})); for (int k = 0; k < 10; ++k) { auto rst = CSRRowWiseSampling(mat, rows, 2, prob, true); CheckSampledResult(rst, rows, has_data); } for (int k = 0; k < 10; ++k) { auto rst = CSRRowWiseSampling(mat, rows, 2, prob, false); CheckSampledResult(rst, rows, has_data); auto eset = ToEdgeSet(rst); ASSERT_EQ(eset.size(), 4); if (has_data) { ASSERT_TRUE(eset.count(std::make_tuple(0, 0, 2))); ASSERT_TRUE(eset.count(std::make_tuple(0, 1, 3))); ASSERT_TRUE(eset.count(std::make_tuple(3, 2, 1))); ASSERT_TRUE(eset.count(std::make_tuple(3, 3, 4))); } else { ASSERT_TRUE(eset.count(std::make_tuple(0, 0, 0))); ASSERT_TRUE(eset.count(std::make_tuple(0, 1, 1))); ASSERT_TRUE(eset.count(std::make_tuple(3, 2, 3))); ASSERT_TRUE(eset.count(std::make_tuple(3, 3, 4))); } } prob = NDArray::FromVector(std::vector({.0, .5, .5, .0, .5})); for (int k = 0; k < 100; ++k) { auto rst = CSRRowWiseSampling(mat, rows, 2, prob, true); CheckSampledResult(rst, rows, has_data); auto eset = ToEdgeSet(rst); if (has_data) { ASSERT_FALSE(eset.count(std::make_tuple(0, 1, 3))); } else { ASSERT_FALSE(eset.count(std::make_tuple(0, 0, 0))); ASSERT_FALSE(eset.count(std::make_tuple(3, 2, 3))); } } } TEST(RowwiseTest, TestCSRSampling) { _TestCSRSampling(true); _TestCSRSampling(true); _TestCSRSampling(true); _TestCSRSampling(true); _TestCSRSampling(false); _TestCSRSampling(false); _TestCSRSampling(false); _TestCSRSampling(false); } template void _TestCSRSamplingUniform(bool has_data) { auto mat = CSR(has_data); FloatArray prob = aten::NullArray(); IdArray rows = NDArray::FromVector(std::vector({0, 3})); for (int k = 0; k < 10; ++k) { auto rst = CSRRowWiseSampling(mat, rows, 2, prob, true); CheckSampledResult(rst, rows, has_data); } for (int k = 0; k < 10; ++k) { auto rst = CSRRowWiseSampling(mat, rows, 2, prob, false); CheckSampledResult(rst, rows, has_data); auto eset = ToEdgeSet(rst); if (has_data) { ASSERT_TRUE(eset.count(std::make_tuple(0, 0, 2))); ASSERT_TRUE(eset.count(std::make_tuple(0, 1, 3))); ASSERT_TRUE(eset.count(std::make_tuple(3, 2, 1))); ASSERT_TRUE(eset.count(std::make_tuple(3, 3, 4))); } else { ASSERT_TRUE(eset.count(std::make_tuple(0, 0, 0))); ASSERT_TRUE(eset.count(std::make_tuple(0, 1, 1))); ASSERT_TRUE(eset.count(std::make_tuple(3, 2, 3))); ASSERT_TRUE(eset.count(std::make_tuple(3, 3, 4))); } } } TEST(RowwiseTest, TestCSRSamplingUniform) { _TestCSRSamplingUniform(true); _TestCSRSamplingUniform(true); _TestCSRSamplingUniform(true); _TestCSRSamplingUniform(true); _TestCSRSamplingUniform(false); _TestCSRSamplingUniform(false); _TestCSRSamplingUniform(false); _TestCSRSamplingUniform(false); } template void _TestCSRPerEtypeSampling(bool has_data) { auto pair = CSREtypes(has_data); auto mat = pair.first; auto eid2etype_offset = pair.second; std::vector prob = { NDArray::FromVector(std::vector({.5, .5, .5, .5})), NDArray::FromVector(std::vector({.5})), NDArray::FromVector(std::vector({.5})), NDArray::FromVector(std::vector({.5}))}; IdArray rows = NDArray::FromVector(std::vector({0, 3})); for (int k = 0; k < 10; ++k) { auto rst = CSRRowWisePerEtypeSampling( mat, rows, eid2etype_offset, {2, 2, 2, 2}, prob, true); CheckSampledPerEtypeResult(rst, rows, has_data); } for (int k = 0; k < 10; ++k) { auto rst = CSRRowWisePerEtypeSampling( mat, rows, eid2etype_offset, {2, 2, 2, 2}, prob, false); CheckSampledPerEtypeResult(rst, rows, has_data); auto eset = ToEdgeSet(rst); if (has_data) { int counts = 0; counts += eset.count(std::make_tuple(0, 0, 0)); counts += eset.count(std::make_tuple(0, 1, 1)); ASSERT_EQ(counts, 2); counts = 0; counts += eset.count(std::make_tuple(0, 2, 4)); ASSERT_EQ(counts, 1); counts = 0; counts += eset.count(std::make_tuple(0, 3, 6)); ASSERT_EQ(counts, 1); counts = 0; counts += eset.count(std::make_tuple(1, 1, 2)); ASSERT_EQ(counts, 0); counts = 0; counts += eset.count(std::make_tuple(3, 2, 5)); ASSERT_EQ(counts, 1); counts = 0; counts += eset.count(std::make_tuple(3, 3, 3)); ASSERT_EQ(counts, 1); } else { int counts = 0; counts += eset.count(std::make_tuple(0, 0, 0)); counts += eset.count(std::make_tuple(0, 1, 1)); counts += eset.count(std::make_tuple(0, 2, 2)); counts += eset.count(std::make_tuple(0, 3, 3)); ASSERT_EQ(counts, 2); counts = 0; counts += eset.count(std::make_tuple(1, 1, 4)); ASSERT_EQ(counts, 0); counts = 0; counts += eset.count(std::make_tuple(3, 3, 5)); ASSERT_EQ(counts, 1); counts = 0; counts += eset.count(std::make_tuple(3, 2, 6)); ASSERT_EQ(counts, 1); } } prob = { NDArray::FromVector(std::vector({.0, .5, .0, .0})), NDArray::FromVector(std::vector({.5})), NDArray::FromVector(std::vector({.5})), NDArray::FromVector(std::vector({.5}))}; for (int k = 0; k < 10; ++k) { auto rst = CSRRowWisePerEtypeSampling( mat, rows, eid2etype_offset, {2, 2, 2, 2}, prob, true); CheckSampledPerEtypeResult(rst, rows, has_data); auto eset = ToEdgeSet(rst); if (has_data) { ASSERT_FALSE(eset.count(std::make_tuple(0, 0, 0))); } else { ASSERT_FALSE(eset.count(std::make_tuple(0, 0, 0))); ASSERT_FALSE(eset.count(std::make_tuple(0, 2, 2))); ASSERT_FALSE(eset.count(std::make_tuple(0, 3, 3))); } } } template void _TestCSRPerEtypeSamplingSorted() { auto pair = CSREtypes(true); auto mat = pair.first; auto eid2etype_offset = pair.second; std::vector prob = { NDArray::FromVector(std::vector({.5, .5, .5, .5})), NDArray::FromVector(std::vector({.5})), NDArray::FromVector(std::vector({.5})), NDArray::FromVector(std::vector({.5}))}; IdArray rows = NDArray::FromVector(std::vector({0, 3})); for (int k = 0; k < 10; ++k) { auto rst = CSRRowWisePerEtypeSampling( mat, rows, eid2etype_offset, {2, 2, 2, 2}, prob, true, true); CheckSampledPerEtypeResult(rst, rows, true); } for (int k = 0; k < 10; ++k) { auto rst = CSRRowWisePerEtypeSampling( mat, rows, eid2etype_offset, {2, 2, 2, 2}, prob, false, true); CheckSampledPerEtypeResult(rst, rows, true); auto eset = ToEdgeSet(rst); int counts = 0; counts += eset.count(std::make_tuple(0, 0, 0)); counts += eset.count(std::make_tuple(0, 1, 1)); ASSERT_EQ(counts, 2); counts = 0; counts += eset.count(std::make_tuple(0, 2, 4)); ASSERT_EQ(counts, 1); counts = 0; counts += eset.count(std::make_tuple(0, 3, 6)); ASSERT_EQ(counts, 1); counts = 0; counts += eset.count(std::make_tuple(1, 1, 2)); ASSERT_EQ(counts, 0); counts = 0; counts += eset.count(std::make_tuple(3, 2, 5)); ASSERT_EQ(counts, 1); counts = 0; counts += eset.count(std::make_tuple(3, 3, 3)); ASSERT_EQ(counts, 1); } prob = { NDArray::FromVector(std::vector({.0, .5, .0, .0})), NDArray::FromVector(std::vector({.5})), NDArray::FromVector(std::vector({.5})), NDArray::FromVector(std::vector({.5}))}; for (int k = 0; k < 10; ++k) { auto rst = CSRRowWisePerEtypeSampling( mat, rows, eid2etype_offset, {2, 2, 2, 2}, prob, true, true); CheckSampledPerEtypeResult(rst, rows, true); auto eset = ToEdgeSet(rst); ASSERT_FALSE(eset.count(std::make_tuple(0, 0, 0))); } } TEST(RowwiseTest, TestCSRPerEtypeSampling) { _TestCSRPerEtypeSampling(true); _TestCSRPerEtypeSampling(true); _TestCSRPerEtypeSampling(true); _TestCSRPerEtypeSampling(true); _TestCSRPerEtypeSampling(false); _TestCSRPerEtypeSampling(false); _TestCSRPerEtypeSampling(false); _TestCSRPerEtypeSampling(false); _TestCSRPerEtypeSamplingSorted(); _TestCSRPerEtypeSamplingSorted(); _TestCSRPerEtypeSamplingSorted(); _TestCSRPerEtypeSamplingSorted(); } template void _TestCSRPerEtypeSamplingUniform(bool has_data) { auto pair = CSREtypes(has_data); auto mat = pair.first; auto eid2etype_offset = pair.second; std::vector prob = { aten::NullArray(), aten::NullArray(), aten::NullArray(), aten::NullArray()}; IdArray rows = NDArray::FromVector(std::vector({0, 3})); for (int k = 0; k < 10; ++k) { auto rst = CSRRowWisePerEtypeSampling( mat, rows, eid2etype_offset, {2, 2, 2, 2}, prob, true); CheckSampledPerEtypeResult(rst, rows, has_data); } for (int k = 0; k < 10; ++k) { auto rst = CSRRowWisePerEtypeSampling( mat, rows, eid2etype_offset, {2, 2, 2, 2}, prob, false); CheckSampledPerEtypeResult(rst, rows, has_data); auto eset = ToEdgeSet(rst); if (has_data) { int counts = 0; counts += eset.count(std::make_tuple(0, 0, 0)); counts += eset.count(std::make_tuple(0, 1, 1)); ASSERT_EQ(counts, 2); counts = 0; counts += eset.count(std::make_tuple(0, 2, 4)); ASSERT_EQ(counts, 1); counts = 0; counts += eset.count(std::make_tuple(0, 3, 6)); ASSERT_EQ(counts, 1); counts = 0; counts += eset.count(std::make_tuple(1, 1, 2)); ASSERT_EQ(counts, 0); counts = 0; counts += eset.count(std::make_tuple(3, 2, 5)); ASSERT_EQ(counts, 1); counts = 0; counts += eset.count(std::make_tuple(3, 3, 3)); ASSERT_EQ(counts, 1); } else { int counts = 0; counts += eset.count(std::make_tuple(0, 0, 0)); counts += eset.count(std::make_tuple(0, 1, 1)); counts += eset.count(std::make_tuple(0, 2, 2)); counts += eset.count(std::make_tuple(0, 3, 3)); ASSERT_EQ(counts, 2); counts = 0; counts += eset.count(std::make_tuple(1, 1, 4)); ASSERT_EQ(counts, 0); counts = 0; counts += eset.count(std::make_tuple(3, 3, 5)); ASSERT_EQ(counts, 1); counts = 0; counts += eset.count(std::make_tuple(3, 2, 6)); ASSERT_EQ(counts, 1); } } } template void _TestCSRPerEtypeSamplingUniformSorted() { auto pair = CSREtypes(true); auto mat = pair.first; auto eid2etype_offset = pair.second; std::vector prob = { aten::NullArray(), aten::NullArray(), aten::NullArray(), aten::NullArray()}; IdArray rows = NDArray::FromVector(std::vector({0, 3})); for (int k = 0; k < 10; ++k) { auto rst = CSRRowWisePerEtypeSampling( mat, rows, eid2etype_offset, {2, 2, 2, 2}, prob, true, true); CheckSampledPerEtypeResult(rst, rows, true); } for (int k = 0; k < 10; ++k) { auto rst = CSRRowWisePerEtypeSampling( mat, rows, eid2etype_offset, {2, 2, 2, 2}, prob, false, true); CheckSampledPerEtypeResult(rst, rows, true); auto eset = ToEdgeSet(rst); int counts = 0; counts += eset.count(std::make_tuple(0, 0, 0)); counts += eset.count(std::make_tuple(0, 1, 1)); ASSERT_EQ(counts, 2); counts = 0; counts += eset.count(std::make_tuple(0, 2, 4)); ASSERT_EQ(counts, 1); counts = 0; counts += eset.count(std::make_tuple(0, 3, 6)); ASSERT_EQ(counts, 1); counts = 0; counts += eset.count(std::make_tuple(1, 1, 2)); ASSERT_EQ(counts, 0); counts = 0; counts += eset.count(std::make_tuple(3, 2, 5)); ASSERT_EQ(counts, 1); counts = 0; counts += eset.count(std::make_tuple(3, 3, 3)); ASSERT_EQ(counts, 1); } } TEST(RowwiseTest, TestCSRPerEtypeSamplingUniform) { _TestCSRPerEtypeSamplingUniform(true); _TestCSRPerEtypeSamplingUniform(true); _TestCSRPerEtypeSamplingUniform(true); _TestCSRPerEtypeSamplingUniform(true); _TestCSRPerEtypeSamplingUniform(false); _TestCSRPerEtypeSamplingUniform(false); _TestCSRPerEtypeSamplingUniform(false); _TestCSRPerEtypeSamplingUniform(false); _TestCSRPerEtypeSamplingUniformSorted(); _TestCSRPerEtypeSamplingUniformSorted(); _TestCSRPerEtypeSamplingUniformSorted(); _TestCSRPerEtypeSamplingUniformSorted(); } template void _TestCOOSampling(bool has_data) { auto mat = COO(has_data); FloatArray prob = NDArray::FromVector(std::vector({.5, .5, .5, .5, .5})); IdArray rows = NDArray::FromVector(std::vector({0, 3})); for (int k = 0; k < 10; ++k) { auto rst = COORowWiseSampling(mat, rows, 2, prob, true); CheckSampledResult(rst, rows, has_data); } for (int k = 0; k < 10; ++k) { auto rst = COORowWiseSampling(mat, rows, 2, prob, false); CheckSampledResult(rst, rows, has_data); auto eset = ToEdgeSet(rst); ASSERT_EQ(eset.size(), 4); if (has_data) { ASSERT_TRUE(eset.count(std::make_tuple(0, 0, 2))); ASSERT_TRUE(eset.count(std::make_tuple(0, 1, 3))); ASSERT_TRUE(eset.count(std::make_tuple(3, 2, 1))); ASSERT_TRUE(eset.count(std::make_tuple(3, 3, 4))); } else { ASSERT_TRUE(eset.count(std::make_tuple(0, 0, 0))); ASSERT_TRUE(eset.count(std::make_tuple(0, 1, 1))); ASSERT_TRUE(eset.count(std::make_tuple(3, 2, 3))); ASSERT_TRUE(eset.count(std::make_tuple(3, 3, 4))); } } prob = NDArray::FromVector(std::vector({.0, .5, .5, .0, .5})); for (int k = 0; k < 100; ++k) { auto rst = COORowWiseSampling(mat, rows, 2, prob, true); CheckSampledResult(rst, rows, has_data); auto eset = ToEdgeSet(rst); if (has_data) { ASSERT_FALSE(eset.count(std::make_tuple(0, 1, 3))); } else { ASSERT_FALSE(eset.count(std::make_tuple(0, 0, 0))); ASSERT_FALSE(eset.count(std::make_tuple(3, 2, 3))); } } } TEST(RowwiseTest, TestCOOSampling) { _TestCOOSampling(true); _TestCOOSampling(true); _TestCOOSampling(true); _TestCOOSampling(true); _TestCOOSampling(false); _TestCOOSampling(false); _TestCOOSampling(false); _TestCOOSampling(false); } template void _TestCOOSamplingUniform(bool has_data) { auto mat = COO(has_data); FloatArray prob = aten::NullArray(); IdArray rows = NDArray::FromVector(std::vector({0, 3})); for (int k = 0; k < 10; ++k) { auto rst = COORowWiseSampling(mat, rows, 2, prob, true); CheckSampledResult(rst, rows, has_data); } for (int k = 0; k < 10; ++k) { auto rst = COORowWiseSampling(mat, rows, 2, prob, false); CheckSampledResult(rst, rows, has_data); auto eset = ToEdgeSet(rst); if (has_data) { ASSERT_TRUE(eset.count(std::make_tuple(0, 0, 2))); ASSERT_TRUE(eset.count(std::make_tuple(0, 1, 3))); ASSERT_TRUE(eset.count(std::make_tuple(3, 2, 1))); ASSERT_TRUE(eset.count(std::make_tuple(3, 3, 4))); } else { ASSERT_TRUE(eset.count(std::make_tuple(0, 0, 0))); ASSERT_TRUE(eset.count(std::make_tuple(0, 1, 1))); ASSERT_TRUE(eset.count(std::make_tuple(3, 2, 3))); ASSERT_TRUE(eset.count(std::make_tuple(3, 3, 4))); } } } TEST(RowwiseTest, TestCOOSamplingUniform) { _TestCOOSamplingUniform(true); _TestCOOSamplingUniform(true); _TestCOOSamplingUniform(true); _TestCOOSamplingUniform(true); _TestCOOSamplingUniform(false); _TestCOOSamplingUniform(false); _TestCOOSamplingUniform(false); _TestCOOSamplingUniform(false); } // COOPerEtypeSampling with rowwise_etype_sorted == true is not meaningful as // it's never used in practice. template void _TestCOOPerEtypeSampling(bool has_data) { auto pair = COOEtypes(has_data); auto mat = pair.first; auto eid2etype_offset = pair.second; std::vector prob = { NDArray::FromVector(std::vector({.5, .5, .5, .5})), NDArray::FromVector(std::vector({.5})), NDArray::FromVector(std::vector({.5})), NDArray::FromVector(std::vector({.5}))}; IdArray rows = NDArray::FromVector(std::vector({0, 3})); for (int k = 0; k < 10; ++k) { auto rst = COORowWisePerEtypeSampling( mat, rows, eid2etype_offset, {2, 2, 2, 2}, prob, true); CheckSampledPerEtypeResult(rst, rows, has_data); } for (int k = 0; k < 10; ++k) { auto rst = COORowWisePerEtypeSampling( mat, rows, eid2etype_offset, {2, 2, 2, 2}, prob, false); CheckSampledPerEtypeResult(rst, rows, has_data); auto eset = ToEdgeSet(rst); if (has_data) { int counts = 0; counts += eset.count(std::make_tuple(0, 0, 0)); counts += eset.count(std::make_tuple(0, 1, 1)); ASSERT_EQ(counts, 2); counts = 0; counts += eset.count(std::make_tuple(0, 2, 4)); ASSERT_EQ(counts, 1); counts = 0; counts += eset.count(std::make_tuple(0, 3, 6)); ASSERT_EQ(counts, 1); counts = 0; counts += eset.count(std::make_tuple(1, 1, 2)); ASSERT_EQ(counts, 0); counts = 0; counts += eset.count(std::make_tuple(3, 2, 5)); ASSERT_EQ(counts, 1); counts = 0; counts += eset.count(std::make_tuple(3, 3, 3)); ASSERT_EQ(counts, 1); } else { int counts = 0; counts += eset.count(std::make_tuple(0, 0, 0)); counts += eset.count(std::make_tuple(0, 1, 1)); counts += eset.count(std::make_tuple(0, 2, 2)); counts += eset.count(std::make_tuple(0, 3, 3)); ASSERT_EQ(counts, 2); counts = 0; counts += eset.count(std::make_tuple(1, 1, 4)); ASSERT_EQ(counts, 0); counts = 0; counts += eset.count(std::make_tuple(3, 3, 5)); ASSERT_EQ(counts, 1); counts = 0; counts += eset.count(std::make_tuple(3, 2, 6)); ASSERT_EQ(counts, 1); } } prob = { NDArray::FromVector(std::vector({.0, .5, .0, .0})), NDArray::FromVector(std::vector({.5})), NDArray::FromVector(std::vector({.5})), NDArray::FromVector(std::vector({.5}))}; for (int k = 0; k < 10; ++k) { auto rst = COORowWisePerEtypeSampling( mat, rows, eid2etype_offset, {2, 2, 2, 2}, prob, true); CheckSampledPerEtypeResult(rst, rows, has_data); auto eset = ToEdgeSet(rst); if (has_data) { ASSERT_FALSE(eset.count(std::make_tuple(0, 0, 0))); } else { ASSERT_FALSE(eset.count(std::make_tuple(0, 0, 0))); ASSERT_FALSE(eset.count(std::make_tuple(0, 2, 2))); ASSERT_FALSE(eset.count(std::make_tuple(0, 3, 3))); } } } TEST(RowwiseTest, TestCOOPerEtypeSampling) { _TestCOOPerEtypeSampling(true); _TestCOOPerEtypeSampling(true); _TestCOOPerEtypeSampling(true); _TestCOOPerEtypeSampling(true); _TestCOOPerEtypeSampling(false); _TestCOOPerEtypeSampling(false); _TestCOOPerEtypeSampling(false); _TestCOOPerEtypeSampling(false); } template void _TestCOOPerEtypeSamplingUniform(bool has_data) { auto pair = COOEtypes(has_data); auto mat = pair.first; auto eid2etype_offset = pair.second; std::vector prob = { aten::NullArray(), aten::NullArray(), aten::NullArray(), aten::NullArray()}; IdArray rows = NDArray::FromVector(std::vector({0, 3})); for (int k = 0; k < 10; ++k) { auto rst = COORowWisePerEtypeSampling( mat, rows, eid2etype_offset, {2, 2, 2, 2}, prob, true); CheckSampledPerEtypeResult(rst, rows, has_data); } for (int k = 0; k < 10; ++k) { auto rst = COORowWisePerEtypeSampling( mat, rows, eid2etype_offset, {2, 2, 2, 2}, prob, false); CheckSampledPerEtypeResult(rst, rows, has_data); auto eset = ToEdgeSet(rst); if (has_data) { int counts = 0; counts += eset.count(std::make_tuple(0, 0, 0)); counts += eset.count(std::make_tuple(0, 1, 1)); ASSERT_EQ(counts, 2); counts = 0; counts += eset.count(std::make_tuple(0, 2, 4)); ASSERT_EQ(counts, 1); counts = 0; counts += eset.count(std::make_tuple(0, 3, 6)); ASSERT_EQ(counts, 1); counts = 0; counts += eset.count(std::make_tuple(1, 1, 2)); ASSERT_EQ(counts, 0); counts = 0; counts += eset.count(std::make_tuple(3, 2, 5)); ASSERT_EQ(counts, 1); counts = 0; counts += eset.count(std::make_tuple(3, 3, 3)); ASSERT_EQ(counts, 1); } else { int counts = 0; counts += eset.count(std::make_tuple(0, 0, 0)); counts += eset.count(std::make_tuple(0, 1, 1)); counts += eset.count(std::make_tuple(0, 2, 2)); counts += eset.count(std::make_tuple(0, 3, 3)); ASSERT_EQ(counts, 2); counts = 0; counts += eset.count(std::make_tuple(1, 1, 4)); ASSERT_EQ(counts, 0); counts = 0; counts += eset.count(std::make_tuple(3, 3, 5)); ASSERT_EQ(counts, 1); counts = 0; counts += eset.count(std::make_tuple(3, 2, 6)); ASSERT_EQ(counts, 1); } } } TEST(RowwiseTest, TestCOOPerEtypeSamplingUniform) { _TestCOOPerEtypeSamplingUniform(true); _TestCOOPerEtypeSamplingUniform(true); _TestCOOPerEtypeSamplingUniform(true); _TestCOOPerEtypeSamplingUniform(true); _TestCOOPerEtypeSamplingUniform(false); _TestCOOPerEtypeSamplingUniform(false); _TestCOOPerEtypeSamplingUniform(false); _TestCOOPerEtypeSamplingUniform(false); } template void _TestCSRTopk(bool has_data) { auto mat = CSR(has_data); FloatArray weight = NDArray::FromVector(std::vector({.1f, .0f, -.1f, .2f, .5f})); // -.1, .2, .1, .0, .5 IdArray rows = NDArray::FromVector(std::vector({0, 3})); { auto rst = CSRRowWiseTopk(mat, rows, 1, weight, true); auto eset = ToEdgeSet(rst); ASSERT_EQ(eset.size(), 2); if (has_data) { ASSERT_TRUE(eset.count(std::make_tuple(0, 0, 2))); ASSERT_TRUE(eset.count(std::make_tuple(3, 2, 1))); } else { ASSERT_TRUE(eset.count(std::make_tuple(0, 1, 1))); ASSERT_TRUE(eset.count(std::make_tuple(3, 2, 3))); } } { auto rst = CSRRowWiseTopk(mat, rows, 1, weight, false); auto eset = ToEdgeSet(rst); ASSERT_EQ(eset.size(), 2); if (has_data) { ASSERT_TRUE(eset.count(std::make_tuple(0, 1, 3))); ASSERT_TRUE(eset.count(std::make_tuple(3, 3, 4))); } else { ASSERT_TRUE(eset.count(std::make_tuple(0, 0, 0))); ASSERT_TRUE(eset.count(std::make_tuple(3, 3, 4))); } } } TEST(RowwiseTest, TestCSRTopk) { _TestCSRTopk(true); _TestCSRTopk(true); _TestCSRTopk(true); _TestCSRTopk(true); _TestCSRTopk(false); _TestCSRTopk(false); _TestCSRTopk(false); _TestCSRTopk(false); } template void _TestCOOTopk(bool has_data) { auto mat = COO(has_data); FloatArray weight = NDArray::FromVector(std::vector({.1f, .0f, -.1f, .2f, .5f})); // -.1, .2, .1, .0, .5 IdArray rows = NDArray::FromVector(std::vector({0, 3})); { auto rst = COORowWiseTopk(mat, rows, 1, weight, true); auto eset = ToEdgeSet(rst); ASSERT_EQ(eset.size(), 2); if (has_data) { ASSERT_TRUE(eset.count(std::make_tuple(0, 0, 2))); ASSERT_TRUE(eset.count(std::make_tuple(3, 2, 1))); } else { ASSERT_TRUE(eset.count(std::make_tuple(0, 1, 1))); ASSERT_TRUE(eset.count(std::make_tuple(3, 2, 3))); } } { auto rst = COORowWiseTopk(mat, rows, 1, weight, false); auto eset = ToEdgeSet(rst); ASSERT_EQ(eset.size(), 2); if (has_data) { ASSERT_TRUE(eset.count(std::make_tuple(0, 1, 3))); ASSERT_TRUE(eset.count(std::make_tuple(3, 3, 4))); } else { ASSERT_TRUE(eset.count(std::make_tuple(0, 0, 0))); ASSERT_TRUE(eset.count(std::make_tuple(3, 3, 4))); } } } TEST(RowwiseTest, TestCOOTopk) { _TestCOOTopk(true); _TestCOOTopk(true); _TestCOOTopk(true); _TestCOOTopk(true); _TestCOOTopk(false); _TestCOOTopk(false); _TestCOOTopk(false); _TestCOOTopk(false); } template void _TestCSRSamplingBiased(bool has_data) { auto mat = CSR(has_data); // 0 - 0,1 // 1 - 1 // 3 - 2,3 NDArray tag_offset = NDArray::FromVector( std::vector({0, 1, 2, 0, 0, 1, 0, 0, 0, 0, 1, 2})); tag_offset = tag_offset.CreateView({4, 3}, tag_offset->dtype); IdArray rows = NDArray::FromVector(std::vector({0, 1, 3})); FloatArray bias = NDArray::FromVector(std::vector({0, 0.5})); for (int k = 0; k < 10; ++k) { auto rst = CSRRowWiseSamplingBiased(mat, rows, 1, tag_offset, bias, false); CheckSampledResult(rst, rows, has_data); auto eset = ToEdgeSet(rst); if (has_data) { ASSERT_TRUE(eset.count(std::make_tuple(0, 1, 3))); ASSERT_TRUE(eset.count(std::make_tuple(1, 1, 0))); ASSERT_TRUE(eset.count(std::make_tuple(3, 3, 4))); } else { ASSERT_TRUE(eset.count(std::make_tuple(0, 1, 1))); ASSERT_TRUE(eset.count(std::make_tuple(1, 1, 2))); ASSERT_TRUE(eset.count(std::make_tuple(3, 3, 4))); } } for (int k = 0; k < 10; ++k) { auto rst = CSRRowWiseSamplingBiased(mat, rows, 3, tag_offset, bias, true); CheckSampledResult(rst, rows, has_data); auto eset = ToEdgeSet(rst); if (has_data) { ASSERT_TRUE(eset.count(std::make_tuple(0, 1, 3))); ASSERT_TRUE(eset.count(std::make_tuple(1, 1, 0))); ASSERT_TRUE(eset.count(std::make_tuple(3, 3, 4))); ASSERT_FALSE(eset.count(std::make_tuple(0, 0, 2))); ASSERT_FALSE(eset.count(std::make_tuple(3, 2, 1))); } else { ASSERT_TRUE(eset.count(std::make_tuple(0, 1, 1))); ASSERT_TRUE(eset.count(std::make_tuple(1, 1, 2))); ASSERT_TRUE(eset.count(std::make_tuple(3, 3, 4))); ASSERT_FALSE(eset.count(std::make_tuple(0, 0, 0))); ASSERT_FALSE(eset.count(std::make_tuple(3, 2, 3))); } } } TEST(RowwiseTest, TestCSRSamplingBiased) { _TestCSRSamplingBiased(true); _TestCSRSamplingBiased(false); _TestCSRSamplingBiased(true); _TestCSRSamplingBiased(false); _TestCSRSamplingBiased(true); _TestCSRSamplingBiased(false); _TestCSRSamplingBiased(true); _TestCSRSamplingBiased(false); } ================================================ FILE: tests/cpp/test_sampler.cc ================================================ #include #include #include #include #include "../../src/random/cpu/sample_utils.h" #include "./common.h" using namespace dgl; using namespace dgl::aten; // TODO: adapt this to Random::Choice template void _TestWithReplacement(RandomEngine* re) { Idx n_categories = 100; Idx n_rolls = 1000000; std::vector _prob; DType accum = 0.; for (Idx i = 0; i < n_categories; ++i) { _prob.push_back(re->Uniform()); accum += _prob.back(); } for (Idx i = 0; i < n_categories; ++i) _prob[i] /= accum; FloatArray prob = NDArray::FromVector(_prob); auto _check_given_sampler = [n_categories, n_rolls, &_prob](utils::BaseSampler* s) { std::vector counter(n_categories, 0); for (Idx i = 0; i < n_rolls; ++i) { Idx dice = s->Draw(); counter[dice]++; } for (Idx i = 0; i < n_categories; ++i) ASSERT_NEAR(static_cast(counter[i]) / n_rolls, _prob[i], 1e-2); }; auto _check_random_choice = [n_categories, n_rolls, &_prob, prob]() { std::vector counter(n_categories, 0); for (Idx i = 0; i < n_rolls; ++i) { Idx dice = RandomEngine::ThreadLocal()->Choice(prob); counter[dice]++; } for (Idx i = 0; i < n_categories; ++i) ASSERT_NEAR(static_cast(counter[i]) / n_rolls, _prob[i], 1e-2); }; utils::AliasSampler as(re, prob); utils::CDFSampler cs(re, prob); utils::TreeSampler ts(re, prob); _check_given_sampler(&as); _check_given_sampler(&cs); _check_given_sampler(&ts); _check_random_choice(); } TEST(SampleUtilsTest, TestWithReplacement) { RandomEngine* re = RandomEngine::ThreadLocal(); re->SetSeed(42); _TestWithReplacement(re); re->SetSeed(42); _TestWithReplacement(re); re->SetSeed(42); _TestWithReplacement(re); re->SetSeed(42); _TestWithReplacement(re); }; template void _TestWithoutReplacementOrder(RandomEngine* re) { // TODO(BarclayII): is there a reliable way to do this test? std::vector _prob = {1e6f, 1e-6f, 1e-2f, 1e2f}; FloatArray prob = NDArray::FromVector(_prob); std::vector ground_truth = {0, 3, 2, 1}; auto _check_given_sampler = [&ground_truth](utils::BaseSampler* s) { for (size_t i = 0; i < ground_truth.size(); ++i) { Idx dice = s->Draw(); ASSERT_EQ(dice, ground_truth[i]); } }; utils::AliasSampler as(re, prob); utils::CDFSampler cs(re, prob); utils::TreeSampler ts(re, prob); _check_given_sampler(&as); _check_given_sampler(&cs); _check_given_sampler(&ts); } TEST(SampleUtilsTest, TestWithoutReplacementOrder) { RandomEngine* re = RandomEngine::ThreadLocal(); re->SetSeed(42); _TestWithoutReplacementOrder(re); re->SetSeed(42); _TestWithoutReplacementOrder(re); re->SetSeed(42); _TestWithoutReplacementOrder(re); re->SetSeed(42); _TestWithoutReplacementOrder(re); }; template void _TestWithoutReplacementUnique(RandomEngine* re) { Idx N = 1000000; std::vector _likelihood; for (Idx i = 0; i < N; ++i) _likelihood.push_back(re->Uniform()); FloatArray likelihood = NDArray::FromVector(_likelihood); auto _check_given_sampler = [N](utils::BaseSampler* s) { std::vector cnt(N, 0); for (Idx i = 0; i < N; ++i) { Idx dice = s->Draw(); cnt[dice]++; } for (Idx i = 0; i < N; ++i) ASSERT_EQ(cnt[i], 1); }; utils::AliasSampler as(re, likelihood); utils::CDFSampler cs(re, likelihood); utils::TreeSampler ts(re, likelihood); _check_given_sampler(&as); _check_given_sampler(&cs); _check_given_sampler(&ts); } TEST(SampleUtilsTest, TestWithoutReplacementUnique) { RandomEngine* re = RandomEngine::ThreadLocal(); re->SetSeed(42); _TestWithoutReplacementUnique(re); re->SetSeed(42); _TestWithoutReplacementUnique(re); re->SetSeed(42); _TestWithoutReplacementUnique(re); re->SetSeed(42); _TestWithoutReplacementUnique(re); }; template void _TestChoice(RandomEngine* re) { re->SetSeed(42); std::vector prob_vec = {1., 0., 0., 0., 2., 2., 0., 0.}; FloatArray prob = FloatArray::FromVector(prob_vec); { for (int k = 0; k < 1000; ++k) { Idx x = re->Choice(prob); ASSERT_TRUE(x == 0 || x == 4 || x == 5); } } // num = 0 { IdArray rst = re->Choice(0, prob, true); ASSERT_EQ(rst->shape[0], 0); } // w/ replacement { IdArray rst = re->Choice(1000, prob, true); ASSERT_EQ(rst->shape[0], 1000); for (int64_t i = 0; i < 1000; ++i) { Idx x = static_cast(rst->data)[i]; ASSERT_TRUE(x == 0 || x == 4 || x == 5); } } // w/o replacement { IdArray rst = re->Choice(3, prob, false); ASSERT_EQ(rst->shape[0], 3); std::set idxset; for (int64_t i = 0; i < 3; ++i) { Idx x = static_cast(rst->data)[i]; idxset.insert(x); } ASSERT_EQ(idxset.size(), 3); ASSERT_EQ(idxset.count(0), 1); ASSERT_EQ(idxset.count(4), 1); ASSERT_EQ(idxset.count(5), 1); } } TEST(RandomTest, TestChoice) { RandomEngine* re = RandomEngine::ThreadLocal(); _TestChoice(re); _TestChoice(re); _TestChoice(re); _TestChoice(re); } template void _TestUniformChoice(RandomEngine* re) { re->SetSeed(42); // num == 0 { IdArray rst = re->UniformChoice(0, 100, true); ASSERT_EQ(rst->shape[0], 0); } // w/ replacement { IdArray rst = re->UniformChoice(1000, 100, true); ASSERT_EQ(rst->shape[0], 1000); for (int64_t i = 0; i < 1000; ++i) { Idx x = static_cast(rst->data)[i]; ASSERT_TRUE(x >= 0 && x < 100); } } // w/o replacement { IdArray rst = re->UniformChoice(99, 100, false); ASSERT_EQ(rst->shape[0], 99); std::set idxset; for (int64_t i = 0; i < 99; ++i) { Idx x = static_cast(rst->data)[i]; ASSERT_TRUE(x >= 0 && x < 100); idxset.insert(x); } ASSERT_EQ(idxset.size(), 99); } } TEST(RandomTest, TestUniformChoice) { RandomEngine* re = RandomEngine::ThreadLocal(); _TestUniformChoice(re); _TestUniformChoice(re); _TestUniformChoice(re); _TestUniformChoice(re); } template void _TestBiasedChoice(RandomEngine* re) { re->SetSeed(42); // num == 0 { Idx split[] = {0, 1, 2}; FloatArray bias = NDArray::FromVector(std::vector({1, 3})); IdArray rst = re->BiasedChoice(0, split, bias, true); ASSERT_EQ(rst->shape[0], 0); } // basic test { Idx sample_num = 100000; Idx population = 1000000; Idx split[] = {0, population / 2, population}; FloatArray bias = NDArray::FromVector(std::vector({1, 3})); IdArray rst = re->BiasedChoice(sample_num, split, bias, true); auto rst_data = static_cast(rst->data); Idx larger = 0; for (Idx i = 0; i < sample_num; ++i) if (rst_data[i] >= population / 2) larger++; ASSERT_LE(fabs((double)larger / sample_num - 0.75), 1e-2); } // without replacement { Idx sample_num = 500; Idx population = 1000; Idx split[] = {0, sample_num, population}; FloatArray bias = NDArray::FromVector(std::vector({1, 0})); IdArray rst = re->BiasedChoice(sample_num, split, bias, false); auto rst_data = static_cast(rst->data); std::set idxset; for (int64_t i = 0; i < sample_num; ++i) { Idx x = rst_data[i]; ASSERT_LT(x, sample_num); idxset.insert(x); } ASSERT_EQ(idxset.size(), sample_num); } } TEST(RandomTest, TestBiasedChoice) { RandomEngine* re = RandomEngine::ThreadLocal(); _TestBiasedChoice(re); _TestBiasedChoice(re); _TestBiasedChoice(re); _TestBiasedChoice(re); } ================================================ FILE: tests/cpp/test_serialize.cc ================================================ #include #include #include #include #include #include #include #include #include "../../src/graph/heterograph.h" #include "../../src/graph/unit_graph.h" #include "./common.h" using namespace dgl; using namespace dgl::aten; using namespace dmlc; TEST(Serialize, UnitGraph_COO) { aten::CSRMatrix csr_matrix; auto src = VecToIdArray({1, 2, 5, 3}); auto dst = VecToIdArray({1, 6, 2, 6}); auto mg = std::dynamic_pointer_cast( dgl::UnitGraph::CreateFromCOO(2, 9, 8, src, dst, COO_CODE)); std::string blob; dmlc::MemoryStringStream ifs(&blob); static_cast(&ifs)->Write(mg); dmlc::MemoryStringStream ofs(&blob); auto ug2 = Serializer::make_shared(); static_cast(&ofs)->Read(&ug2); EXPECT_EQ(ug2->NumVertices(0), 9); EXPECT_EQ(ug2->NumVertices(1), 8); EXPECT_EQ(ug2->NumEdges(0), 4); EXPECT_EQ(ug2->FindEdge(0, 1).first, 2); EXPECT_EQ(ug2->FindEdge(0, 1).second, 6); } TEST(Serialize, UnitGraph_CSR) { aten::CSRMatrix csr_matrix; auto src = VecToIdArray({1, 2, 5, 3}); auto dst = VecToIdArray({1, 6, 2, 6}); auto coo_g = std::dynamic_pointer_cast( dgl::UnitGraph::CreateFromCOO(2, 9, 8, src, dst)); auto csr_g = std::dynamic_pointer_cast(coo_g->GetGraphInFormat(CSR_CODE)); std::string blob; dmlc::MemoryStringStream ifs(&blob); static_cast(&ifs)->Write(csr_g); dmlc::MemoryStringStream ofs(&blob); auto ug2 = Serializer::make_shared(); static_cast(&ofs)->Read(&ug2); // Query operation is not supported on CSR, how to check it? } TEST(Serialize, ImmutableGraph) { auto src = VecToIdArray({1, 2, 5, 3}); auto dst = VecToIdArray({1, 6, 2, 6}); auto gptr = ImmutableGraph::CreateFromCOO(10, src, dst); std::string blob; dmlc::MemoryStringStream ifs(&blob); static_cast(&ifs)->Write(gptr); dmlc::MemoryStringStream ofs(&blob); auto rptr_read = dgl::Serializer::make_shared(); static_cast(&ofs)->Read(&rptr_read); EXPECT_EQ(rptr_read->NumEdges(), 4); EXPECT_EQ(rptr_read->NumVertices(), 10); EXPECT_EQ(rptr_read->FindEdge(2).first, 5); EXPECT_EQ(rptr_read->FindEdge(2).second, 2); } TEST(Serialize, HeteroGraph) { auto src = VecToIdArray({1, 2, 5, 3}); auto dst = VecToIdArray({1, 6, 2, 6}); auto mg1 = dgl::UnitGraph::CreateFromCOO(2, 9, 8, src, dst); src = VecToIdArray({6, 2, 5, 1, 8}); dst = VecToIdArray({5, 2, 4, 8, 0}); auto mg2 = dgl::UnitGraph::CreateFromCOO(1, 9, 9, src, dst); std::vector relgraphs; relgraphs.push_back(mg1); relgraphs.push_back(mg2); src = VecToIdArray({0, 0}); dst = VecToIdArray({1, 0}); auto meta_gptr = ImmutableGraph::CreateFromCOO(3, src, dst); auto hrptr = std::make_shared(meta_gptr, relgraphs); std::string blob; dmlc::MemoryStringStream ifs(&blob); static_cast(&ifs)->Write(hrptr); dmlc::MemoryStringStream ofs(&blob); auto gptr = dgl::Serializer::make_shared(); static_cast(&ofs)->Read(&gptr); EXPECT_EQ(gptr->NumVertices(0), 9); EXPECT_EQ(gptr->NumVertices(1), 8); } ================================================ FILE: tests/cpp/test_smart_ptr_serialize.cc ================================================ #include #include #include #include #include #include #include #include #include #include #include using namespace std; class MyClass { public: MyClass() {} MyClass(std::string data) : data_(data) {} inline void Save(dmlc::Stream *strm) const { strm->Write(this->data_); } inline bool Load(dmlc::Stream *strm) { return strm->Read(&data_); } inline bool operator==(const MyClass &other) const { return data_ == other.data_; } public: std::string data_; }; // need to declare the traits property of my class to dmlc namespace dmlc { DMLC_DECLARE_TRAITS(has_saveload, MyClass, true); } template class SmartPtrTest : public ::testing::Test { public: typedef T SmartPtr; }; using SmartPtrTypes = ::testing::Types, std::unique_ptr>; TYPED_TEST_SUITE(SmartPtrTest, SmartPtrTypes); TYPED_TEST(SmartPtrTest, Obj_Test) { std::string blob; dmlc::MemoryStringStream fs(&blob); using SmartPtr = typename TestFixture::SmartPtr; auto myc = SmartPtr(new MyClass("1111")); { static_cast(&fs)->Write(myc); } fs.Seek(0); auto copy_data = SmartPtr(new MyClass()); CHECK(static_cast(&fs)->Read(©_data)); EXPECT_EQ(myc->data_, copy_data->data_); } TYPED_TEST(SmartPtrTest, Vector_Test1) { std::string blob; dmlc::MemoryStringStream fs(&blob); using SmartPtr = typename TestFixture::SmartPtr; typedef std::pair Pair; std::vector myclasses; myclasses.emplace_back("a", SmartPtr(new MyClass("@A@B"))); myclasses.emplace_back("b", SmartPtr(new MyClass("2222"))); static_cast(&fs)->Write>(myclasses); dmlc::MemoryStringStream ofs(&blob); std::vector copy_myclasses; static_cast(&ofs)->Read>(©_myclasses); EXPECT_TRUE(std::equal( myclasses.begin(), myclasses.end(), copy_myclasses.begin(), [](const Pair &left, const Pair &right) { return (left.second->data_ == right.second->data_) && (left.first == right.first); })); } TYPED_TEST(SmartPtrTest, Vector_Test2) { std::string blob; dmlc::MemoryStringStream fs(&blob); using SmartPtr = typename TestFixture::SmartPtr; std::vector myclasses; myclasses.emplace_back(new MyClass("@A@")); myclasses.emplace_back(new MyClass("2222")); static_cast(&fs)->Write>(myclasses); dmlc::MemoryStringStream ofs(&blob); std::vector copy_myclasses; static_cast(&ofs)->Read>( ©_myclasses); EXPECT_TRUE(std::equal( myclasses.begin(), myclasses.end(), copy_myclasses.begin(), [](const SmartPtr &left, const SmartPtr &right) { return left->data_ == right->data_; })); } ================================================ FILE: tests/cpp/test_spmat_coo.cc ================================================ #include #include #include #include #include #include "./common.h" using namespace dgl; using namespace dgl::runtime; namespace { template aten::CSRMatrix CSR1(DGLContext ctx = CTX) { // [[0, 1, 1, 0, 0], // [1, 0, 0, 0, 0], // [0, 0, 1, 1, 0], // [0, 0, 0, 0, 0]] // data: [0, 2, 3, 1, 4] return aten::CSRMatrix( 4, 5, aten::VecToIdArray( std::vector({0, 2, 3, 5, 5}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({1, 2, 0, 2, 3}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({0, 2, 3, 4, 1}), sizeof(IDX) * 8, ctx), false); } template aten::CSRMatrix CSR2(DGLContext ctx = CTX) { // has duplicate entries // [[0, 1, 2, 0, 0], // [1, 0, 0, 0, 0], // [0, 0, 1, 1, 0], // [0, 0, 0, 0, 0]] // data: [0, 2, 5, 3, 1, 4] return aten::CSRMatrix( 4, 5, aten::VecToIdArray( std::vector({0, 3, 4, 6, 6}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({1, 2, 2, 0, 2, 3}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({0, 2, 5, 3, 1, 4}), sizeof(IDX) * 8, ctx), false); } template aten::COOMatrix COO1(DGLContext ctx = CTX) { // [[0, 1, 1, 0, 0], // [1, 0, 0, 0, 0], // [0, 0, 1, 1, 0], // [0, 0, 0, 0, 0]] // data: [0, 2, 3, 1, 4] // row : [0, 2, 0, 1, 2] // col : [1, 2, 2, 0, 3] return aten::COOMatrix( 4, 5, aten::VecToIdArray( std::vector({0, 2, 0, 1, 2}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({1, 2, 2, 0, 3}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({0, 3, 1, 2, 4}), sizeof(IDX) * 8, ctx)); } template aten::COOMatrix COO2(DGLContext ctx = CTX) { // has duplicate entries // [[0, 1, 2, 0, 0], // [1, 0, 0, 0, 0], // [0, 0, 1, 1, 0], // [0, 0, 0, 0, 0]] // data: [0, 2, 5, 3, 1, 4] // row : [0, 2, 0, 1, 2, 0] // col : [1, 2, 2, 0, 3, 2] return aten::COOMatrix( 4, 5, aten::VecToIdArray( std::vector({0, 2, 0, 1, 2, 0}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({1, 2, 2, 0, 3, 2}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({0, 1, 2, 3, 4, 5}), sizeof(IDX) * 8, ctx)); } template aten::CSRMatrix SR_CSR3(DGLContext ctx) { // [[0, 1, 2, 0, 0], // [1, 0, 0, 0, 0], // [0, 0, 1, 1, 0], // [0, 0, 0, 0, 0]] return aten::CSRMatrix( 4, 5, aten::VecToIdArray( std::vector({0, 3, 4, 6, 6}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({2, 1, 2, 0, 2, 3}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({0, 2, 5, 3, 1, 4}), sizeof(IDX) * 8, ctx), false); } template aten::CSRMatrix SRC_CSR3(DGLContext ctx) { // [[0, 1, 2, 0, 0], // [1, 0, 0, 0, 0], // [0, 0, 1, 1, 0], // [0, 0, 0, 0, 0]] return aten::CSRMatrix( 4, 5, aten::VecToIdArray( std::vector({0, 3, 4, 6, 6}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({1, 2, 2, 0, 2, 3}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({2, 0, 5, 3, 1, 4}), sizeof(IDX) * 8, ctx), false); } template aten::COOMatrix COO3(DGLContext ctx) { // has duplicate entries // [[0, 1, 2, 0, 0], // [1, 0, 0, 0, 0], // [0, 0, 1, 1, 0], // [0, 0, 0, 0, 0]] // row : [0, 2, 0, 1, 2, 0] // col : [2, 2, 1, 0, 3, 2] return aten::COOMatrix( 4, 5, aten::VecToIdArray( std::vector({0, 2, 0, 1, 2, 0}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({2, 2, 1, 0, 3, 2}), sizeof(IDX) * 8, ctx)); } template aten::COOMatrix COORandomized(IDX rows_and_cols, int64_t nnz, int seed) { std::vector vec_rows(nnz); std::vector vec_cols(nnz); std::vector vec_data(nnz); #pragma omp parallel { const int64_t num_threads = omp_get_num_threads(); const int64_t thread_id = omp_get_thread_num(); const int64_t chunk = nnz / num_threads; const int64_t size = (thread_id == num_threads - 1) ? nnz - chunk * (num_threads - 1) : chunk; auto rows = vec_rows.data() + thread_id * chunk; auto cols = vec_cols.data() + thread_id * chunk; auto data = vec_data.data() + thread_id * chunk; std::mt19937_64 gen64(seed + thread_id); std::mt19937 gen32(seed + thread_id); for (int64_t i = 0; i < size; ++i) { rows[i] = gen64() % rows_and_cols; cols[i] = gen64() % rows_and_cols; data[i] = gen32() % 90 + 1; } } return aten::COOMatrix( rows_and_cols, rows_and_cols, aten::VecToIdArray(vec_rows, sizeof(IDX) * 8, CTX), aten::VecToIdArray(vec_cols, sizeof(IDX) * 8, CTX), aten::VecToIdArray(vec_data, sizeof(IDX) * 8, CTX), false, false); } struct SparseCOOCSR { static constexpr uint64_t NUM_ROWS = 100; static constexpr uint64_t NUM_COLS = 150; static constexpr uint64_t NUM_NZ = 5; template static aten::COOMatrix COOSparse(const DGLContext &ctx = CTX) { return aten::COOMatrix( NUM_ROWS, NUM_COLS, aten::VecToIdArray( std::vector({0, 1, 2, 3, 4}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({1, 2, 3, 4, 5}), sizeof(IDX) * 8, ctx)); } template static aten::CSRMatrix CSRSparse(const DGLContext &ctx = CTX) { auto &&indptr = std::vector(NUM_ROWS + 1, NUM_NZ); for (size_t i = 0; i < NUM_NZ; ++i) { indptr[i + 1] = static_cast(i + 1); } indptr[0] = 0; return aten::CSRMatrix( NUM_ROWS, NUM_COLS, aten::VecToIdArray(indptr, sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({1, 2, 3, 4, 5}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({1, 1, 1, 1, 1}), sizeof(IDX) * 8, ctx), false); } }; template aten::COOMatrix RowSorted_NullData_COO(DGLContext ctx = CTX) { // [[0, 1, 1, 0, 0], // [1, 0, 0, 0, 0], // [0, 0, 1, 1, 0], // [0, 0, 0, 0, 0]] // row : [0, 0, 1, 2, 2] // col : [1, 2, 0, 2, 3] return aten::COOMatrix( 4, 5, aten::VecToIdArray( std::vector({0, 0, 1, 2, 2}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({1, 2, 0, 2, 3}), sizeof(IDX) * 8, ctx), aten::NullArray(), true, false); } template aten::CSRMatrix RowSorted_NullData_CSR(DGLContext ctx = CTX) { // [[0, 1, 1, 0, 0], // [1, 0, 0, 0, 0], // [0, 0, 1, 1, 0], // [0, 0, 0, 0, 0]] // data: [0, 1, 2, 3, 4] return aten::CSRMatrix( 4, 5, aten::VecToIdArray( std::vector({0, 2, 3, 5, 5}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({1, 2, 0, 2, 3}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({0, 1, 2, 3, 4}), sizeof(IDX) * 8, ctx), false); } } // namespace template void _TestCOOToCSR(DGLContext ctx) { auto coo = COO1(ctx); auto csr = CSR1(ctx); auto tcsr = aten::COOToCSR(coo); ASSERT_FALSE(coo.row_sorted); ASSERT_EQ(csr.num_rows, tcsr.num_rows); ASSERT_EQ(csr.num_cols, tcsr.num_cols); ASSERT_TRUE(ArrayEQ(csr.indptr, tcsr.indptr)); ASSERT_TRUE(ArrayEQ(csr.indices, tcsr.indices)); coo = COO2(ctx); csr = CSR2(ctx); tcsr = aten::COOToCSR(coo); ASSERT_EQ(coo.num_rows, csr.num_rows); ASSERT_EQ(coo.num_cols, csr.num_cols); ASSERT_TRUE(ArrayEQ(csr.indptr, tcsr.indptr)); // Convert from row sorted coo coo = COO1(ctx); auto rs_coo = aten::COOSort(coo, false); auto rs_csr = CSR1(ctx); auto rs_tcsr = aten::COOToCSR(rs_coo); ASSERT_TRUE(rs_coo.row_sorted); ASSERT_EQ(coo.num_rows, rs_tcsr.num_rows); ASSERT_EQ(coo.num_cols, rs_tcsr.num_cols); ASSERT_TRUE(ArrayEQ(rs_csr.indptr, rs_tcsr.indptr)); ASSERT_TRUE(ArrayEQ(rs_tcsr.indices, rs_coo.col)); ASSERT_TRUE(ArrayEQ(rs_tcsr.data, rs_coo.data)); coo = COO3(ctx); rs_coo = aten::COOSort(coo, false); rs_csr = SR_CSR3(ctx); rs_tcsr = aten::COOToCSR(rs_coo); ASSERT_EQ(coo.num_rows, rs_tcsr.num_rows); ASSERT_EQ(coo.num_cols, rs_tcsr.num_cols); ASSERT_TRUE(ArrayEQ(rs_csr.indptr, rs_tcsr.indptr)); ASSERT_TRUE(ArrayEQ(rs_tcsr.indices, rs_coo.col)); ASSERT_TRUE(ArrayEQ(rs_tcsr.data, rs_coo.data)); rs_coo = RowSorted_NullData_COO(ctx); ASSERT_TRUE(rs_coo.row_sorted); rs_csr = RowSorted_NullData_CSR(ctx); rs_tcsr = aten::COOToCSR(rs_coo); ASSERT_EQ(coo.num_rows, rs_tcsr.num_rows); ASSERT_EQ(rs_csr.num_rows, rs_tcsr.num_rows); ASSERT_EQ(coo.num_cols, rs_tcsr.num_cols); ASSERT_EQ(rs_csr.num_cols, rs_tcsr.num_cols); ASSERT_TRUE(ArrayEQ(rs_csr.indptr, rs_tcsr.indptr)); ASSERT_TRUE(ArrayEQ(rs_csr.indices, rs_tcsr.indices)); ASSERT_TRUE(ArrayEQ(rs_csr.data, rs_tcsr.data)); ASSERT_TRUE(ArrayEQ(rs_coo.col, rs_tcsr.indices)); ASSERT_FALSE(ArrayEQ(rs_coo.data, rs_tcsr.data)); // Convert from col sorted coo coo = COO1(ctx); auto src_coo = aten::COOSort(coo, true); auto src_csr = CSR1(ctx); auto src_tcsr = aten::COOToCSR(src_coo); ASSERT_EQ(coo.num_rows, src_tcsr.num_rows); ASSERT_EQ(coo.num_cols, src_tcsr.num_cols); ASSERT_TRUE(src_tcsr.sorted); ASSERT_TRUE(ArrayEQ(src_tcsr.indptr, src_csr.indptr)); ASSERT_TRUE(ArrayEQ(src_tcsr.indices, src_coo.col)); ASSERT_TRUE(ArrayEQ(src_tcsr.data, src_coo.data)); coo = COO3(ctx); src_coo = aten::COOSort(coo, true); src_csr = SRC_CSR3(ctx); src_tcsr = aten::COOToCSR(src_coo); ASSERT_EQ(coo.num_rows, src_tcsr.num_rows); ASSERT_EQ(coo.num_cols, src_tcsr.num_cols); ASSERT_TRUE(src_tcsr.sorted); ASSERT_TRUE(ArrayEQ(src_tcsr.indptr, src_csr.indptr)); ASSERT_TRUE(ArrayEQ(src_tcsr.indices, src_coo.col)); ASSERT_TRUE(ArrayEQ(src_tcsr.data, src_coo.data)); coo = SparseCOOCSR::COOSparse(ctx); csr = SparseCOOCSR::CSRSparse(ctx); tcsr = aten::COOToCSR(coo); ASSERT_FALSE(coo.row_sorted); ASSERT_EQ(csr.num_rows, tcsr.num_rows); ASSERT_EQ(csr.num_cols, tcsr.num_cols); ASSERT_TRUE(ArrayEQ(csr.indptr, tcsr.indptr)); ASSERT_TRUE(ArrayEQ(csr.indices, tcsr.indices)); } TEST(SpmatTest, COOToCSR) { _TestCOOToCSR(CPU); _TestCOOToCSR(CPU); #ifdef DGL_USE_CUDA _TestCOOToCSR(GPU); _TestCOOToCSR(GPU); #endif } template void _TestCOOHasDuplicate() { auto coo = COO1(); ASSERT_FALSE(aten::COOHasDuplicate(coo)); coo = COO2(); ASSERT_TRUE(aten::COOHasDuplicate(coo)); } TEST(SpmatTest, TestCOOHasDuplicate) { _TestCOOHasDuplicate(); _TestCOOHasDuplicate(); } template void _TestCOOSort(DGLContext ctx) { auto coo = COO3(ctx); auto sr_coo = COOSort(coo, false); ASSERT_EQ(coo.num_rows, sr_coo.num_rows); ASSERT_EQ(coo.num_cols, sr_coo.num_cols); ASSERT_TRUE(sr_coo.row_sorted); auto flags = COOIsSorted(sr_coo); ASSERT_TRUE(flags.first); flags = COOIsSorted(coo); // original coo should stay the same ASSERT_FALSE(flags.first); ASSERT_FALSE(flags.second); auto src_coo = COOSort(coo, true); ASSERT_EQ(coo.num_rows, src_coo.num_rows); ASSERT_EQ(coo.num_cols, src_coo.num_cols); ASSERT_TRUE(src_coo.row_sorted); ASSERT_TRUE(src_coo.col_sorted); flags = COOIsSorted(src_coo); ASSERT_TRUE(flags.first); ASSERT_TRUE(flags.second); // sort inplace COOSort_(&coo); ASSERT_TRUE(coo.row_sorted); flags = COOIsSorted(coo); ASSERT_TRUE(flags.first); COOSort_(&coo, true); ASSERT_TRUE(coo.row_sorted); ASSERT_TRUE(coo.col_sorted); flags = COOIsSorted(coo); ASSERT_TRUE(flags.first); ASSERT_TRUE(flags.second); // COO3 // [[0, 1, 2, 0, 0], // [1, 0, 0, 0, 0], // [0, 0, 1, 1, 0], // [0, 0, 0, 0, 0]] // data: [0, 1, 2, 3, 4, 5] // row : [0, 2, 0, 1, 2, 0] // col : [2, 2, 1, 0, 3, 2] // Row Sorted // data: [0, 2, 5, 3, 1, 4] // row : [0, 0, 0, 1, 2, 2] // col : [2, 1, 2, 0, 2, 3] // Row Col Sorted // data: [2, 0, 5, 3, 1, 4] // row : [0, 0, 0, 1, 2, 2] // col : [1, 2, 2, 0, 2, 3] auto sort_row = aten::VecToIdArray( std::vector({0, 0, 0, 1, 2, 2}), sizeof(IDX) * 8, ctx); auto sort_col = aten::VecToIdArray( std::vector({1, 2, 2, 0, 2, 3}), sizeof(IDX) * 8, ctx); auto sort_col_data = aten::VecToIdArray( std::vector({2, 0, 5, 3, 1, 4}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(sr_coo.row, sort_row)); ASSERT_TRUE(ArrayEQ(src_coo.row, sort_row)); ASSERT_TRUE(ArrayEQ(src_coo.col, sort_col)); ASSERT_TRUE(ArrayEQ(src_coo.data, sort_col_data)); } TEST(SpmatTest, COOSort) { _TestCOOSort(CPU); _TestCOOSort(CPU); #ifdef DGL_USE_CUDA _TestCOOSort(GPU); _TestCOOSort(GPU); #endif } template void _TestCOOReorder() { auto coo = COO2(); auto new_row = aten::VecToIdArray(std::vector({2, 0, 3, 1}), sizeof(IDX) * 8, CTX); auto new_col = aten::VecToIdArray( std::vector({2, 0, 4, 3, 1}), sizeof(IDX) * 8, CTX); auto new_coo = COOReorder(coo, new_row, new_col); ASSERT_EQ(new_coo.num_rows, coo.num_rows); ASSERT_EQ(new_coo.num_cols, coo.num_cols); } TEST(SpmatTest, TestCOOReorder) { _TestCOOReorder(); _TestCOOReorder(); } template void _TestCOOGetData(DGLContext ctx) { auto coo = COO2(ctx); // test get all data auto x = aten::COOGetAllData(coo, 0, 0); auto tx = aten::VecToIdArray(std::vector({}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x, tx)); x = aten::COOGetAllData(coo, 0, 2); tx = aten::VecToIdArray(std::vector({2, 5}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x, tx)); // test get data auto r = aten::VecToIdArray(std::vector({0, 0, 0}), sizeof(IDX) * 8, ctx); auto c = aten::VecToIdArray(std::vector({0, 1, 2}), sizeof(IDX) * 8, ctx); x = aten::COOGetData(coo, r, c); tx = aten::VecToIdArray(std::vector({-1, 0, 2}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x, tx)); // test get data on sorted coo = aten::COOSort(coo); r = aten::VecToIdArray(std::vector({0, 0, 0}), sizeof(IDX) * 8, ctx); c = aten::VecToIdArray(std::vector({0, 1, 2}), sizeof(IDX) * 8, ctx); x = aten::COOGetData(coo, r, c); tx = aten::VecToIdArray(std::vector({-1, 0, 2}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x, tx)); // test get data w/ broadcasting r = aten::VecToIdArray(std::vector({0}), sizeof(IDX) * 8, ctx); c = aten::VecToIdArray(std::vector({0, 1, 2}), sizeof(IDX) * 8, ctx); x = aten::COOGetData(coo, r, c); tx = aten::VecToIdArray(std::vector({-1, 0, 2}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x, tx)); } TEST(SpmatTest, COOGetData) { _TestCOOGetData(CPU); _TestCOOGetData(CPU); // #ifdef DGL_USE_CUDA //_TestCOOGetData(GPU); //_TestCOOGetData(GPU); // #endif } template void _TestCOOGetDataAndIndices() { auto coo = COO2(); auto r = aten::VecToIdArray(std::vector({0, 0, 0}), sizeof(IDX) * 8, CTX); auto c = aten::VecToIdArray(std::vector({0, 1, 2}), sizeof(IDX) * 8, CTX); auto x = aten::COOGetDataAndIndices(coo, r, c); auto tr = aten::VecToIdArray(std::vector({0, 0, 0}), sizeof(IDX) * 8, CTX); auto tc = aten::VecToIdArray(std::vector({1, 2, 2}), sizeof(IDX) * 8, CTX); auto td = aten::VecToIdArray(std::vector({0, 2, 5}), sizeof(IDX) * 8, CTX); ASSERT_TRUE(ArrayEQ(x[0], tr)); ASSERT_TRUE(ArrayEQ(x[1], tc)); ASSERT_TRUE(ArrayEQ(x[2], td)); } TEST(SpmatTest, COOGetDataAndIndices) { _TestCOOGetDataAndIndices(); _TestCOOGetDataAndIndices(); } template void _TestCOOToCSRAlgs() { // Compare results between different CPU COOToCSR implementations. // NNZ is chosen to be bigger than the limit for the "small" matrix algorithm. // N is set to lay on border between "sparse" and "dense" algorithm choice. const int64_t num_threads = std::min(256, omp_get_max_threads()); const int64_t min_num_threads = 3; if (num_threads < min_num_threads) { std::cerr << "[ ] [ INFO ]" << "This test requires at least 3 OMP threads to work properly" << std::endl; GTEST_SKIP(); return; } // Select N and NNZ for COO matrix in a way than depending on number of // threads different algorithm will be used. // See WhichCOOToCSR in src/array/cpu/spmat_op_impl_coo.cc for details const int64_t type_scale = sizeof(IDX) >> 1; const int64_t small = 50 * num_threads * type_scale * type_scale; // NNZ should be bigger than limit for small matrix algorithm const int64_t nnz = small + 1234; // N is chosen to lay on sparse/dense border const int64_t n = type_scale * nnz / num_threads; const IDX rows_nad_cols = n + 1; // should be bigger than sparse/dense border // Note that it will be better to set the seed to a random value when gtest // allows to use --gtest_random_seed without --gtest_shuffle and report this // value for reproduction. This way we can find unforeseen situations and // potential bugs. const auto seed = 123321; auto coo = COORandomized(rows_nad_cols, nnz, seed); omp_set_num_threads(1); // UnSortedSmallCOOToCSR will be used auto tcsr_small = aten::COOToCSR(coo); ASSERT_EQ(coo.num_rows, tcsr_small.num_rows); ASSERT_EQ(coo.num_cols, tcsr_small.num_cols); omp_set_num_threads(num_threads - 1); // UnSortedDenseCOOToCSR will be used auto tcsr_dense = aten::COOToCSR(coo); ASSERT_EQ(tcsr_small.num_rows, tcsr_dense.num_rows); ASSERT_EQ(tcsr_small.num_cols, tcsr_dense.num_cols); ASSERT_TRUE(ArrayEQ(tcsr_small.indptr, tcsr_dense.indptr)); ASSERT_TRUE(ArrayEQ(tcsr_small.indices, tcsr_dense.indices)); ASSERT_TRUE(ArrayEQ(tcsr_small.data, tcsr_dense.data)); omp_set_num_threads(num_threads); // UnSortedSparseCOOToCSR will be used auto tcsr_sparse = aten::COOToCSR(coo); ASSERT_EQ(tcsr_small.num_rows, tcsr_sparse.num_rows); ASSERT_EQ(tcsr_small.num_cols, tcsr_sparse.num_cols); ASSERT_TRUE(ArrayEQ(tcsr_small.indptr, tcsr_sparse.indptr)); ASSERT_TRUE(ArrayEQ(tcsr_small.indices, tcsr_sparse.indices)); ASSERT_TRUE(ArrayEQ(tcsr_small.data, tcsr_sparse.data)); return; } TEST(SpmatTest, COOToCSRAlgs) { _TestCOOToCSRAlgs(); _TestCOOToCSRAlgs(); } ================================================ FILE: tests/cpp/test_spmat_csr.cc ================================================ #include #include #include "./common.h" using namespace dgl; using namespace dgl::runtime; namespace { template aten::CSRMatrix CSR1(DGLContext ctx = CTX) { // [[0, 1, 1, 0, 0], // [1, 0, 0, 0, 0], // [0, 0, 1, 1, 0], // [0, 0, 0, 0, 0]] // data: [0, 2, 3, 1, 4] return aten::CSRMatrix( 4, 5, aten::VecToIdArray( std::vector({0, 2, 3, 5, 5}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({1, 2, 0, 3, 2}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({0, 2, 3, 4, 1}), sizeof(IDX) * 8, ctx), false); } template aten::CSRMatrix CSR2(DGLContext ctx = CTX) { // has duplicate entries // [[0, 1, 2, 0, 0], // [1, 0, 0, 0, 0], // [0, 0, 1, 1, 0], // [0, 0, 0, 0, 0]] // data: [0, 2, 5, 3, 1, 4] return aten::CSRMatrix( 4, 5, aten::VecToIdArray( std::vector({0, 3, 4, 6, 6}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({1, 2, 2, 0, 2, 3}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({0, 2, 5, 3, 1, 4}), sizeof(IDX) * 8, ctx), false); } template aten::CSRMatrix CSR3(DGLContext ctx = CTX) { // has duplicate entries and the columns are not sorted // [[0, 1, 1, 1, 0, 0], // [1, 0, 0, 0, 0, 0], // [0, 0, 1, 1, 0, 0], // [0, 0, 0, 0, 0, 0], // [1, 1, 1, 0, 0, 0], // [0, 0, 0, 1, 0, 0], // [0, 0, 0, 0, 0, 0], // [1, 2, 1, 1, 0, 0], // [0, 1, 0, 0, 0, 1]], // data: [5, 2, 0, 3, 1, 4, 8, 7, 6, 9, 12, 13, 11, 10, 14, 15, 16] return aten::CSRMatrix( 9, 6, aten::VecToIdArray( std::vector({0, 3, 4, 6, 6, 9, 10, 10, 15, 17}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({3, 2, 1, 0, 2, 3, 1, 2, 0, 3, 1, 2, 1, 3, 0, 5, 1}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector( {0, 2, 5, 3, 1, 4, 6, 8, 7, 9, 13, 10, 11, 14, 12, 16, 15}), sizeof(IDX) * 8, ctx), false); } template aten::COOMatrix COO1(DGLContext ctx = CTX) { // [[0, 1, 1, 0, 0], // [1, 0, 0, 0, 0], // [0, 0, 1, 1, 0], // [0, 0, 0, 0, 0]] // data: [0, 2, 3, 1, 4] // row : [0, 2, 0, 1, 2] // col : [1, 2, 2, 0, 3] return aten::COOMatrix( 4, 5, aten::VecToIdArray( std::vector({0, 2, 0, 1, 2}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({1, 2, 2, 0, 3}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({0, 3, 1, 2, 4}), sizeof(IDX) * 8, ctx)); } template aten::COOMatrix COO2(DGLContext ctx = CTX) { // has duplicate entries // [[0, 1, 2, 0, 0], // [1, 0, 0, 0, 0], // [0, 0, 1, 1, 0], // [0, 0, 0, 0, 0]] // data: [0, 2, 5, 3, 1, 4] // row : [0, 2, 0, 1, 2, 0] // col : [1, 2, 2, 0, 3, 2] return aten::COOMatrix( 4, 5, aten::VecToIdArray( std::vector({0, 2, 0, 1, 2, 0}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({1, 2, 2, 0, 3, 2}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({0, 1, 2, 3, 4, 5}), sizeof(IDX) * 8, ctx)); } template aten::CSRMatrix SR_CSR3(DGLContext ctx) { // [[0, 1, 2, 0, 0], // [1, 0, 0, 0, 0], // [0, 0, 1, 1, 0], // [0, 0, 0, 0, 0]] return aten::CSRMatrix( 4, 5, aten::VecToIdArray( std::vector({0, 3, 4, 6, 6}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({2, 1, 2, 0, 2, 3}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({0, 2, 5, 3, 1, 4}), sizeof(IDX) * 8, ctx), false); } template aten::CSRMatrix SRC_CSR3(DGLContext ctx) { // [[0, 1, 2, 0, 0], // [1, 0, 0, 0, 0], // [0, 0, 1, 1, 0], // [0, 0, 0, 0, 0]] return aten::CSRMatrix( 4, 5, aten::VecToIdArray( std::vector({0, 3, 4, 6, 6}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({1, 2, 2, 0, 2, 3}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({2, 0, 5, 3, 1, 4}), sizeof(IDX) * 8, ctx), false); } template aten::COOMatrix COO3(DGLContext ctx) { // has duplicate entries // [[0, 1, 2, 0, 0], // [1, 0, 0, 0, 0], // [0, 0, 1, 1, 0], // [0, 0, 0, 0, 0]] // row : [0, 2, 0, 1, 2, 0] // col : [2, 2, 1, 0, 3, 2] return aten::COOMatrix( 4, 5, aten::VecToIdArray( std::vector({0, 2, 0, 1, 2, 0}), sizeof(IDX) * 8, ctx), aten::VecToIdArray( std::vector({2, 2, 1, 0, 3, 2}), sizeof(IDX) * 8, ctx)); } } // namespace template void _TestCSRIsNonZero1(DGLContext ctx) { auto csr = CSR1(ctx); ASSERT_TRUE(aten::CSRIsNonZero(csr, 0, 1)); ASSERT_FALSE(aten::CSRIsNonZero(csr, 0, 0)); IdArray r = aten::VecToIdArray(std::vector({2, 2, 0, 0}), sizeof(IDX) * 8, ctx); IdArray c = aten::VecToIdArray(std::vector({1, 1, 1, 3}), sizeof(IDX) * 8, ctx); IdArray x = aten::CSRIsNonZero(csr, r, c); IdArray tx = aten::VecToIdArray(std::vector({0, 0, 1, 0}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x, tx)); } template void _TestCSRIsNonZero2(DGLContext ctx) { auto csr = CSR3(ctx); ASSERT_TRUE(aten::CSRIsNonZero(csr, 0, 1)); ASSERT_FALSE(aten::CSRIsNonZero(csr, 0, 0)); IdArray r = aten::VecToIdArray( std::vector({ 0, 0, 0, 0, 0, }), sizeof(IDX) * 8, ctx); IdArray c = aten::VecToIdArray( std::vector({ 0, 1, 2, 3, 4, }), sizeof(IDX) * 8, ctx); IdArray x = aten::CSRIsNonZero(csr, r, c); IdArray tx = aten::VecToIdArray( std::vector({0, 1, 1, 1, 0}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x, tx)) << " x = " << x << ", tx = " << tx; } TEST(SpmatTest, TestCSRIsNonZero) { _TestCSRIsNonZero1(CPU); _TestCSRIsNonZero1(CPU); _TestCSRIsNonZero2(CPU); _TestCSRIsNonZero2(CPU); #ifdef DGL_USE_CUDA _TestCSRIsNonZero1(GPU); _TestCSRIsNonZero1(GPU); _TestCSRIsNonZero2(GPU); _TestCSRIsNonZero2(GPU); #endif } template void _TestCSRGetRowNNZ(DGLContext ctx) { auto csr = CSR2(ctx); ASSERT_EQ(aten::CSRGetRowNNZ(csr, 0), 3); ASSERT_EQ(aten::CSRGetRowNNZ(csr, 3), 0); IdArray r = aten::VecToIdArray(std::vector({0, 3}), sizeof(IDX) * 8, ctx); IdArray x = aten::CSRGetRowNNZ(csr, r); IdArray tx = aten::VecToIdArray(std::vector({3, 0}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x, tx)); } TEST(SpmatTest, TestCSRGetRowNNZ) { _TestCSRGetRowNNZ(CPU); _TestCSRGetRowNNZ(CPU); #ifdef DGL_USE_CUDA _TestCSRGetRowNNZ(GPU); _TestCSRGetRowNNZ(GPU); #endif } template void _TestCSRGetRowColumnIndices(DGLContext ctx) { auto csr = CSR2(ctx); auto x = aten::CSRGetRowColumnIndices(csr, 0); auto tx = aten::VecToIdArray(std::vector({1, 2, 2}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x, tx)); x = aten::CSRGetRowColumnIndices(csr, 1); tx = aten::VecToIdArray(std::vector({0}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x, tx)); x = aten::CSRGetRowColumnIndices(csr, 3); tx = aten::VecToIdArray(std::vector({}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x, tx)); } TEST(SpmatTest, TestCSRGetRowColumnIndices) { _TestCSRGetRowColumnIndices(CPU); _TestCSRGetRowColumnIndices(CPU); #ifdef DGL_USE_CUDA _TestCSRGetRowColumnIndices(GPU); _TestCSRGetRowColumnIndices(GPU); #endif } template void _TestCSRGetRowData(DGLContext ctx) { auto csr = CSR2(ctx); auto x = aten::CSRGetRowData(csr, 0); auto tx = aten::VecToIdArray(std::vector({0, 2, 5}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x, tx)); x = aten::CSRGetRowData(csr, 1); tx = aten::VecToIdArray(std::vector({3}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x, tx)); x = aten::CSRGetRowData(csr, 3); tx = aten::VecToIdArray(std::vector({}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x, tx)); } TEST(SpmatTest, TestCSRGetRowData) { _TestCSRGetRowData(CPU); _TestCSRGetRowData(CPU); #ifdef DGL_USE_CUDA _TestCSRGetRowData(GPU); _TestCSRGetRowData(GPU); #endif } template void _TestCSRGetData(DGLContext ctx) { auto csr = CSR2(ctx); // test get all data auto x = aten::CSRGetAllData(csr, 0, 0); auto tx = aten::VecToIdArray(std::vector({}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x, tx)); x = aten::CSRGetAllData(csr, 0, 2); tx = aten::VecToIdArray(std::vector({2, 5}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x, tx)); // test get data auto r = aten::VecToIdArray(std::vector({0, 0, 0}), sizeof(IDX) * 8, ctx); auto c = aten::VecToIdArray(std::vector({0, 1, 2}), sizeof(IDX) * 8, ctx); x = aten::CSRGetData(csr, r, c); tx = aten::VecToIdArray(std::vector({-1, 0, 2}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x, tx)); // test get data on sorted csr = aten::CSRSort(csr); r = aten::VecToIdArray(std::vector({0, 0, 0}), sizeof(IDX) * 8, ctx); c = aten::VecToIdArray(std::vector({0, 1, 2}), sizeof(IDX) * 8, ctx); x = aten::CSRGetData(csr, r, c); tx = aten::VecToIdArray(std::vector({-1, 0, 2}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x, tx)); // test get data w/ broadcasting r = aten::VecToIdArray(std::vector({0}), sizeof(IDX) * 8, ctx); c = aten::VecToIdArray(std::vector({0, 1, 2}), sizeof(IDX) * 8, ctx); x = aten::CSRGetData(csr, r, c); tx = aten::VecToIdArray(std::vector({-1, 0, 2}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x, tx)); } TEST(SpmatTest, CSRGetData) { _TestCSRGetData(CPU); _TestCSRGetData(CPU); #ifdef DGL_USE_CUDA _TestCSRGetData(GPU); _TestCSRGetData(GPU); #endif } template void _TestCSRGetDataAndIndices(DGLContext ctx) { auto csr = CSR2(ctx); auto r = aten::VecToIdArray(std::vector({0, 0, 0}), sizeof(IDX) * 8, ctx); auto c = aten::VecToIdArray(std::vector({0, 1, 2}), sizeof(IDX) * 8, ctx); auto x = aten::CSRGetDataAndIndices(csr, r, c); auto tr = aten::VecToIdArray(std::vector({0, 0, 0}), sizeof(IDX) * 8, ctx); auto tc = aten::VecToIdArray(std::vector({1, 2, 2}), sizeof(IDX) * 8, ctx); auto td = aten::VecToIdArray(std::vector({0, 2, 5}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x[0], tr)); ASSERT_TRUE(ArrayEQ(x[1], tc)); ASSERT_TRUE(ArrayEQ(x[2], td)); } TEST(SpmatTest, CSRGetDataAndIndices) { _TestCSRGetDataAndIndices(CPU); _TestCSRGetDataAndIndices(CPU); #ifdef DGL_USE_CUDA _TestCSRGetDataAndIndices(GPU); _TestCSRGetDataAndIndices(GPU); #endif } template void _TestCSRTranspose(DGLContext ctx) { auto csr = CSR2(ctx); auto csr_t = aten::CSRTranspose(csr); // [[0, 1, 0, 0], // [1, 0, 0, 0], // [2, 0, 1, 0], // [0, 0, 1, 0], // [0, 0, 0, 0]] // data: [3, 0, 2, 5, 1, 4] ASSERT_EQ(csr_t.num_rows, 5); ASSERT_EQ(csr_t.num_cols, 4); auto tp = aten::VecToIdArray( std::vector({0, 1, 2, 5, 6, 6}), sizeof(IDX) * 8, ctx); auto ti = aten::VecToIdArray( std::vector({1, 0, 0, 0, 2, 2}), sizeof(IDX) * 8, ctx); auto td = aten::VecToIdArray( std::vector({3, 0, 2, 5, 1, 4}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(csr_t.indptr, tp)); ASSERT_TRUE(ArrayEQ(csr_t.indices, ti)); ASSERT_TRUE(ArrayEQ(csr_t.data, td)); } TEST(SpmatTest, CSRTranspose) { _TestCSRTranspose(CPU); _TestCSRTranspose(CPU); #ifdef DGL_USE_CUDA _TestCSRTranspose(GPU); _TestCSRTranspose(GPU); #endif } template void _TestCSRToCOO(DGLContext ctx) { auto csr = CSR2(ctx); { auto coo = CSRToCOO(csr, false); ASSERT_EQ(coo.num_rows, 4); ASSERT_EQ(coo.num_cols, 5); ASSERT_TRUE(coo.row_sorted); auto tr = aten::VecToIdArray( std::vector({0, 0, 0, 1, 2, 2}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(coo.row, tr)); ASSERT_TRUE(ArrayEQ(coo.col, csr.indices)); ASSERT_TRUE(ArrayEQ(coo.data, csr.data)); // convert from sorted csr auto s_csr = CSRSort(csr); coo = CSRToCOO(s_csr, false); ASSERT_EQ(coo.num_rows, 4); ASSERT_EQ(coo.num_cols, 5); ASSERT_TRUE(coo.row_sorted); ASSERT_TRUE(coo.col_sorted); tr = aten::VecToIdArray( std::vector({0, 0, 0, 1, 2, 2}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(coo.row, tr)); ASSERT_TRUE(ArrayEQ(coo.col, s_csr.indices)); ASSERT_TRUE(ArrayEQ(coo.data, s_csr.data)); } { auto coo = CSRToCOO(csr, true); ASSERT_EQ(coo.num_rows, 4); ASSERT_EQ(coo.num_cols, 5); auto tcoo = COO2(ctx); ASSERT_TRUE(ArrayEQ(coo.row, tcoo.row)); ASSERT_TRUE(ArrayEQ(coo.col, tcoo.col)); } } TEST(SpmatTest, CSRToCOO) { _TestCSRToCOO(CPU); _TestCSRToCOO(CPU); #if DGL_USE_CUDA _TestCSRToCOO(GPU); _TestCSRToCOO(GPU); #endif } template void _TestCSRSliceRows(DGLContext ctx) { auto csr = CSR2(ctx); auto x = aten::CSRSliceRows(csr, 1, 4); // [1, 0, 0, 0, 0], // [0, 0, 1, 1, 0], // [0, 0, 0, 0, 0]] // data: [3, 1, 4] ASSERT_EQ(x.num_rows, 3); ASSERT_EQ(x.num_cols, 5); auto tp = aten::VecToIdArray(std::vector({0, 1, 3, 3}), sizeof(IDX) * 8, ctx); auto ti = aten::VecToIdArray(std::vector({0, 2, 3}), sizeof(IDX) * 8, ctx); auto td = aten::VecToIdArray(std::vector({3, 1, 4}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x.indptr, tp)); ASSERT_TRUE(ArrayEQ(x.indices, ti)); ASSERT_TRUE(ArrayEQ(x.data, td)); auto r = aten::VecToIdArray(std::vector({0, 1, 3}), sizeof(IDX) * 8, ctx); x = aten::CSRSliceRows(csr, r); // [[0, 1, 2, 0, 0], // [1, 0, 0, 0, 0], // [0, 0, 0, 0, 0]] // data: [0, 2, 5, 3] tp = aten::VecToIdArray(std::vector({0, 3, 4, 4}), sizeof(IDX) * 8, ctx); ti = aten::VecToIdArray(std::vector({1, 2, 2, 0}), sizeof(IDX) * 8, ctx); td = aten::VecToIdArray(std::vector({0, 2, 5, 3}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x.indptr, tp)); ASSERT_TRUE(ArrayEQ(x.indices, ti)); ASSERT_TRUE(ArrayEQ(x.data, td)); // Testing non-increasing row id based slicing r = aten::VecToIdArray(std::vector({3, 2, 1}), sizeof(IDX) * 8, ctx); x = aten::CSRSliceRows(csr, r); // [[0, 0, 0, 0, 0], // [0, 0, 1, 1, 0], // [1, 0, 0, 0, 0]] // data: [1, 4, 3] tp = aten::VecToIdArray(std::vector({0, 0, 2, 3}), sizeof(IDX) * 8, ctx); ti = aten::VecToIdArray(std::vector({2, 3, 0}), sizeof(IDX) * 8, ctx); td = aten::VecToIdArray(std::vector({1, 4, 3}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x.indptr, tp)); ASSERT_TRUE(ArrayEQ(x.indices, ti)); ASSERT_TRUE(ArrayEQ(x.data, td)); // Testing zero-degree row slicing with different rows r = aten::VecToIdArray( std::vector({1, 3, 0, 3, 2}), sizeof(IDX) * 8, ctx); x = aten::CSRSliceRows(csr, r); // [[1, 0, 0, 0, 0], // [0, 0, 0, 0, 0], // [0, 1, 2, 0, 0], // [0, 0, 0, 0, 0], // [0, 0, 1, 1, 0]] // data: [3, 0, 2, 5, 1, 4] tp = aten::VecToIdArray( std::vector({0, 1, 1, 4, 4, 6}), sizeof(IDX) * 8, ctx); ti = aten::VecToIdArray( std::vector({0, 1, 2, 2, 2, 3}), sizeof(IDX) * 8, ctx); td = aten::VecToIdArray( std::vector({3, 0, 2, 5, 1, 4}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x.indptr, tp)); ASSERT_TRUE(ArrayEQ(x.indices, ti)); ASSERT_TRUE(ArrayEQ(x.data, td)); // Testing empty output (i.e. sliced rows will be zero-degree) r = aten::VecToIdArray(std::vector({3, 3, 3}), sizeof(IDX) * 8, ctx); x = aten::CSRSliceRows(csr, r); // [[0, 0, 0, 0, 0], // [0, 0, 0, 0, 0], // [0, 0, 0, 0, 0]] // data: [] tp = aten::VecToIdArray(std::vector({0, 0, 0, 0}), sizeof(IDX) * 8, ctx); ti = aten::VecToIdArray(std::vector({}), sizeof(IDX) * 8, ctx); td = aten::VecToIdArray(std::vector({}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x.indptr, tp)); ASSERT_TRUE(ArrayEQ(x.indices, ti)); ASSERT_TRUE(ArrayEQ(x.data, td)); // Testing constant output: we pick last row with at least one nnz r = aten::VecToIdArray(std::vector({2, 2, 2}), sizeof(IDX) * 8, ctx); x = aten::CSRSliceRows(csr, r); // [[0, 0, 1, 1, 0], // [0, 0, 1, 1, 0], // [0, 0, 1, 1, 0]] // data: [1, 4, 1, 4, 1, 4] tp = aten::VecToIdArray(std::vector({0, 2, 4, 6}), sizeof(IDX) * 8, ctx); ti = aten::VecToIdArray( std::vector({2, 3, 2, 3, 2, 3}), sizeof(IDX) * 8, ctx); td = aten::VecToIdArray( std::vector({1, 4, 1, 4, 1, 4}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x.indptr, tp)); ASSERT_TRUE(ArrayEQ(x.indices, ti)); ASSERT_TRUE(ArrayEQ(x.data, td)); } TEST(SpmatTest, TestCSRSliceRows) { _TestCSRSliceRows(CPU); _TestCSRSliceRows(CPU); #ifdef DGL_USE_CUDA _TestCSRSliceRows(GPU); _TestCSRSliceRows(GPU); #endif } template void _TestCSRSliceMatrix1(DGLContext ctx) { auto csr = CSR2(ctx); { // square auto r = aten::VecToIdArray(std::vector({0, 1, 3}), sizeof(IDX) * 8, ctx); auto c = aten::VecToIdArray(std::vector({1, 2, 3}), sizeof(IDX) * 8, ctx); auto x = aten::CSRSliceMatrix(csr, r, c); // [[1, 2, 0], // [0, 0, 0], // [0, 0, 0]] // data: [0, 2, 5] ASSERT_EQ(x.num_rows, 3); ASSERT_EQ(x.num_cols, 3); auto tp = aten::VecToIdArray( std::vector({0, 3, 3, 3}), sizeof(IDX) * 8, ctx); auto ti = aten::VecToIdArray(std::vector({0, 1, 1}), sizeof(IDX) * 8, ctx); auto td = aten::VecToIdArray(std::vector({0, 2, 5}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x.indptr, tp)); ASSERT_TRUE(ArrayEQ(x.indices, ti)); ASSERT_TRUE(ArrayEQ(x.data, td)); } { // non-square auto r = aten::VecToIdArray(std::vector({0, 1, 2}), sizeof(IDX) * 8, ctx); auto c = aten::VecToIdArray(std::vector({0, 1}), sizeof(IDX) * 8, ctx); auto x = aten::CSRSliceMatrix(csr, r, c); // [[0, 1], // [1, 0], // [0, 0]] // data: [0, 3] ASSERT_EQ(x.num_rows, 3); ASSERT_EQ(x.num_cols, 2); auto tp = aten::VecToIdArray( std::vector({0, 1, 2, 2}), sizeof(IDX) * 8, ctx); auto ti = aten::VecToIdArray(std::vector({1, 0}), sizeof(IDX) * 8, ctx); auto td = aten::VecToIdArray(std::vector({0, 3}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x.indptr, tp)); ASSERT_TRUE(ArrayEQ(x.indices, ti)); ASSERT_TRUE(ArrayEQ(x.data, td)); } { // empty slice auto r = aten::VecToIdArray(std::vector({2, 3}), sizeof(IDX) * 8, ctx); auto c = aten::VecToIdArray(std::vector({0, 1}), sizeof(IDX) * 8, ctx); auto x = aten::CSRSliceMatrix(csr, r, c); // [[0, 0], // [0, 0]] // data: [] ASSERT_EQ(x.num_rows, 2); ASSERT_EQ(x.num_cols, 2); auto tp = aten::VecToIdArray(std::vector({0, 0, 0}), sizeof(IDX) * 8, ctx); auto ti = aten::VecToIdArray(std::vector({}), sizeof(IDX) * 8, ctx); auto td = aten::VecToIdArray(std::vector({}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x.indptr, tp)); ASSERT_TRUE(ArrayEQ(x.indices, ti)); ASSERT_TRUE(ArrayEQ(x.data, td)); } } template void _TestCSRSliceMatrix2(DGLContext ctx) { auto csr = CSR3(ctx); { // square auto r = aten::VecToIdArray(std::vector({0, 1, 3}), sizeof(IDX) * 8, ctx); auto c = aten::VecToIdArray(std::vector({1, 2, 3}), sizeof(IDX) * 8, ctx); auto x = aten::CSRSliceMatrix(csr, r, c); // [[1, 1, 1], // [0, 0, 0], // [0, 0, 0]] // data: [5, 2, 0] ASSERT_EQ(x.num_rows, 3); ASSERT_EQ(x.num_cols, 3); auto tp = aten::VecToIdArray( std::vector({0, 3, 3, 3}), sizeof(IDX) * 8, ctx); // indexes are in reverse order in CSR3 auto ti = aten::VecToIdArray(std::vector({2, 1, 0}), sizeof(IDX) * 8, ctx); auto td = aten::VecToIdArray(std::vector({0, 2, 5}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x.indptr, tp)); ASSERT_TRUE(ArrayEQ(x.indices, ti)); ASSERT_TRUE(ArrayEQ(x.data, td)); } { // non-square auto r = aten::VecToIdArray(std::vector({0, 1, 2}), sizeof(IDX) * 8, ctx); auto c = aten::VecToIdArray(std::vector({0, 1}), sizeof(IDX) * 8, ctx); auto x = aten::CSRSliceMatrix(csr, r, c); // [[0, 1], // [1, 0], // [0, 0]] // data: [0, 3] ASSERT_EQ(x.num_rows, 3); ASSERT_EQ(x.num_cols, 2); auto tp = aten::VecToIdArray( std::vector({0, 1, 2, 2}), sizeof(IDX) * 8, ctx); auto ti = aten::VecToIdArray(std::vector({1, 0}), sizeof(IDX) * 8, ctx); auto td = aten::VecToIdArray(std::vector({5, 3}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x.indptr, tp)); ASSERT_TRUE(ArrayEQ(x.indices, ti)); ASSERT_TRUE(ArrayEQ(x.data, td)); } { // empty slice auto r = aten::VecToIdArray(std::vector({2, 3}), sizeof(IDX) * 8, ctx); auto c = aten::VecToIdArray(std::vector({0, 1}), sizeof(IDX) * 8, ctx); auto x = aten::CSRSliceMatrix(csr, r, c); // [[0, 0], // [0, 0]] // data: [] ASSERT_EQ(x.num_rows, 2); ASSERT_EQ(x.num_cols, 2); auto tp = aten::VecToIdArray(std::vector({0, 0, 0}), sizeof(IDX) * 8, ctx); auto ti = aten::VecToIdArray(std::vector({}), sizeof(IDX) * 8, ctx); auto td = aten::VecToIdArray(std::vector({}), sizeof(IDX) * 8, ctx); ASSERT_TRUE(ArrayEQ(x.indptr, tp)); ASSERT_TRUE(ArrayEQ(x.indices, ti)); ASSERT_TRUE(ArrayEQ(x.data, td)); } } TEST(SpmatTest, CSRSliceMatrix) { _TestCSRSliceMatrix1(CPU); _TestCSRSliceMatrix1(CPU); _TestCSRSliceMatrix2(CPU); _TestCSRSliceMatrix2(CPU); #ifdef DGL_USE_CUDA _TestCSRSliceMatrix1(GPU); _TestCSRSliceMatrix1(GPU); _TestCSRSliceMatrix2(GPU); _TestCSRSliceMatrix2(GPU); #endif } template void _TestCSRHasDuplicate(DGLContext ctx) { auto csr = CSR1(ctx); ASSERT_FALSE(aten::CSRHasDuplicate(csr)); csr = CSR2(ctx); ASSERT_TRUE(aten::CSRHasDuplicate(csr)); } TEST(SpmatTest, CSRHasDuplicate) { _TestCSRHasDuplicate(CPU); _TestCSRHasDuplicate(CPU); #ifdef DGL_USE_CUDA _TestCSRHasDuplicate(GPU); _TestCSRHasDuplicate(GPU); #endif } template void _TestCSRSort(DGLContext ctx) { auto csr = CSR1(ctx); ASSERT_FALSE(aten::CSRIsSorted(csr)); auto csr1 = aten::CSRSort(csr); ASSERT_FALSE(aten::CSRIsSorted(csr)); ASSERT_TRUE(aten::CSRIsSorted(csr1)); ASSERT_TRUE(csr1.sorted); aten::CSRSort_(&csr); ASSERT_TRUE(aten::CSRIsSorted(csr)); ASSERT_TRUE(csr.sorted); csr = CSR2(ctx); ASSERT_TRUE(aten::CSRIsSorted(csr)); } TEST(SpmatTest, CSRSort) { _TestCSRSort(CPU); _TestCSRSort(CPU); #ifdef DGL_USE_CUDA _TestCSRSort(GPU); _TestCSRSort(GPU); #endif } template void _TestCSRReorder() { auto csr = CSR2(); auto new_row = aten::VecToIdArray(std::vector({2, 0, 3, 1}), sizeof(IDX) * 8, CTX); auto new_col = aten::VecToIdArray( std::vector({2, 0, 4, 3, 1}), sizeof(IDX) * 8, CTX); auto new_csr = CSRReorder(csr, new_row, new_col); ASSERT_EQ(new_csr.num_rows, csr.num_rows); ASSERT_EQ(new_csr.num_cols, csr.num_cols); } TEST(SpmatTest, TestCSRReorder) { _TestCSRReorder(); _TestCSRReorder(); } ================================================ FILE: tests/cpp/test_spmm.cc ================================================ #if !defined(_WIN32) #include <../src/array/cpu/spmm.h> #include #include #include #include #include "./common.h" using namespace dgl; using namespace dgl::runtime; int sizes[] = {1, 7, 8, 9, 31, 32, 33, 54, 63, 64, 65, 256, 257}; namespace ns_op = dgl::aten::cpu::op; namespace { template void GenerateData(T* data, int dim, T mul) { for (int i = 0; i < dim; i++) { data[i] = (i + 1) * mul; } } template void GenerateRandomData(T* data, int dim) { std::mt19937 rng(std::random_device{}()); std::uniform_int_distribution<> dist(0, 10000); for (int i = 0; i < dim; i++) { data[i] = (dist(rng) / 100); } } template void GenerateZeroData(T* data, int dim) { for (int i = 0; i < dim; i++) { data[i] = 0; } } template void Copy(T* exp, T* out, T* hs, int dim) { for (int i = 0; i < dim; i++) { exp[i] = out[i] + hs[i]; } } template void Add(T* exp, T* out, T* lhs, T* rhs, int dim) { for (int i = 0; i < dim; i++) { exp[i] = out[i] + lhs[i] + rhs[i]; } } template void Sub(T* exp, T* out, T* lhs, T* rhs, int dim) { for (int i = 0; i < dim; i++) { exp[i] = out[i] + lhs[i] - rhs[i]; } } template void Mul(T* exp, T* out, T* lhs, T* rhs, int dim) { for (int i = 0; i < dim; i++) { exp[i] = (out[i] + (lhs[i] * rhs[i])); } } template void Div(T* exp, T* out, T* lhs, T* rhs, int dim) { for (int i = 0; i < dim; i++) { exp[i] = (out[i] + (lhs[i] / rhs[i])); } } template void CheckResult(T* exp, T* out, int dim) { for (int i = 0; i < dim; i++) { ASSERT_TRUE(exp[i] == out[i]); } } } // namespace template void _TestSpmmCopyLhs() { for (size_t i = 0; i < sizeof(sizes) / sizeof(int); i++) { int dim = sizes[i]; IDX out[dim], exp[dim], lhs[dim]; GenerateZeroData(out, dim); GenerateRandomData(lhs, dim); // Calculation of expected output - 'exp' Copy(exp, out, lhs, dim); // Calculation of output using legacy path - 'out' for (int k = 0; k < dim; k++) { out[k] += ns_op::CopyLhs::Call(lhs + k, nullptr); } CheckResult(exp, out, dim); } } TEST(SpmmTest, TestSpmmCopyLhs) { _TestSpmmCopyLhs(); _TestSpmmCopyLhs(); _TestSpmmCopyLhs(); } template void _TestSpmmCopyRhs() { for (size_t i = 0; i < sizeof(sizes) / sizeof(int); i++) { int dim = sizes[i]; IDX out[dim], exp[dim], rhs[dim]; GenerateZeroData(out, dim); GenerateRandomData(rhs, dim); // Calculation of expected output - 'exp' Copy(exp, out, rhs, dim); // Calculation of output using legacy path - 'out' for (int k = 0; k < dim; k++) { out[k] += ns_op::CopyRhs::Call(nullptr, rhs + k); } CheckResult(exp, out, dim); } } TEST(SpmmTest, TestSpmmCopyRhs) { _TestSpmmCopyRhs(); _TestSpmmCopyRhs(); _TestSpmmCopyRhs(); } template void _TestSpmmAdd() { for (size_t i = 0; i < sizeof(sizes) / sizeof(int); i++) { int dim = sizes[i]; IDX out[dim], exp[dim], lhs[dim], rhs[dim]; GenerateZeroData(out, dim); GenerateRandomData(lhs, dim); GenerateRandomData(rhs, dim); // Calculation of expected output - 'exp' Add(exp, out, lhs, rhs, dim); // Calculation of output using legacy path - 'out' for (int k = 0; k < dim; k++) { out[k] += ns_op::Add::Call(lhs + k, rhs + k); } CheckResult(exp, out, dim); } } TEST(SpmmTest, TestSpmmAdd) { _TestSpmmAdd(); _TestSpmmAdd(); _TestSpmmAdd(); } template void _TestSpmmSub() { for (size_t i = 0; i < sizeof(sizes) / sizeof(int); i++) { int dim = sizes[i]; IDX out[dim], exp[dim], lhs[dim], rhs[dim]; GenerateZeroData(out, dim); GenerateRandomData(lhs, dim); GenerateRandomData(rhs, dim); // Calculation of expected output - 'exp' Sub(exp, out, lhs, rhs, dim); // Calculation of output using legacy path - 'out' for (int k = 0; k < dim; k++) { out[k] += ns_op::Sub::Call(lhs + k, rhs + k); } CheckResult(exp, out, dim); } } TEST(SpmmTest, TestSpmmSub) { _TestSpmmSub(); _TestSpmmSub(); _TestSpmmSub(); } template void _TestSpmmMul() { for (size_t i = 0; i < sizeof(sizes) / sizeof(int); i++) { int dim = sizes[i]; IDX out[dim], exp[dim], lhs[dim], rhs[dim]; GenerateZeroData(out, dim); GenerateRandomData(lhs, dim); GenerateRandomData(rhs, dim); // Calculation of expected output - 'exp' Mul(exp, out, lhs, rhs, dim); // Calculation of output using legacy path - 'out' for (int k = 0; k < dim; k++) { out[k] += ns_op::Mul::Call(lhs + k, rhs + k); } CheckResult(exp, out, dim); } } TEST(SpmmTest, TestSpmmMul) { _TestSpmmMul(); _TestSpmmMul(); _TestSpmmMul(); } template void _TestSpmmDiv() { for (size_t i = 0; i < sizeof(sizes) / sizeof(int); i++) { int dim = sizes[i]; IDX out[dim], exp[dim], lhs[dim], rhs[dim]; GenerateZeroData(out, dim); GenerateData(lhs, dim, (IDX)15); GenerateData(rhs, dim, (IDX)1); // Calculation of expected output - 'exp' Div(exp, out, lhs, rhs, dim); // Calculation of output using legacy path - 'out' for (int k = 0; k < dim; k++) { out[k] += ns_op::Div::Call(lhs + k, rhs + k); } CheckResult(exp, out, dim); } } TEST(SpmmTest, TestSpmmDiv) { _TestSpmmDiv(); _TestSpmmDiv(); _TestSpmmDiv(); } #endif // _WIN32 ================================================ FILE: tests/cpp/test_unit_graph.cc ================================================ /** * Copyright (c) 2019 by Contributors * @file test_unit_graph.cc * @brief Test UnitGraph */ #include #include #include #include #include #include #include "../../src/graph/unit_graph.h" #include "./../src/graph/heterograph.h" #include "./common.h" using namespace dgl; using namespace dgl::runtime; template aten::CSRMatrix CSR1(DGLContext ctx) { /** * G = [[0, 0, 1], * [1, 0, 1], * [0, 1, 0], * [1, 0, 1]] */ IdArray g_indptr = aten::VecToIdArray( std::vector({0, 1, 3, 4, 6}), sizeof(IdType) * 8, CTX); IdArray g_indices = aten::VecToIdArray( std::vector({2, 0, 2, 1, 0, 2}), sizeof(IdType) * 8, CTX); const aten::CSRMatrix &csr_a = aten::CSRMatrix(4, 3, g_indptr, g_indices, aten::NullArray(), false); return csr_a; } template aten::CSRMatrix CSR1(DGLContext ctx); template aten::CSRMatrix CSR1(DGLContext ctx); template aten::COOMatrix COO1(DGLContext ctx) { /** * G = [[1, 1, 0], * [0, 1, 0]] */ IdArray g_row = aten::VecToIdArray( std::vector({0, 0, 1}), sizeof(IdType) * 8, CTX); IdArray g_col = aten::VecToIdArray( std::vector({0, 1, 1}), sizeof(IdType) * 8, CTX); const aten::COOMatrix &coo = aten::COOMatrix(2, 3, g_row, g_col, aten::NullArray(), true, true); return coo; } template aten::COOMatrix COO1(DGLContext ctx); template aten::COOMatrix COO1(DGLContext ctx); template void _TestUnitGraph_InOutDegrees(DGLContext ctx) { /** InDegree(s) is available only if COO or CSC formats permitted. OutDegree(s) is available only if COO or CSR formats permitted. */ // COO { const aten::COOMatrix &coo = COO1(ctx); auto &&g = CreateFromCOO(2, coo, COO_CODE); ASSERT_EQ(g->InDegree(0, 0), 1); auto &&nids = aten::Range(0, g->NumVertices(0), g->NumBits(), g->Context()); ASSERT_TRUE(ArrayEQ( g->InDegrees(0, nids), aten::VecToIdArray({1, 2}, g->NumBits(), g->Context()))); ASSERT_EQ(g->OutDegree(0, 0), 2); ASSERT_TRUE(ArrayEQ( g->OutDegrees(0, nids), aten::VecToIdArray({2, 1}, g->NumBits(), g->Context()))); } // CSC { const aten::CSRMatrix &csr = CSR1(ctx); auto &&g = CreateFromCSC(2, csr, CSC_CODE); ASSERT_EQ(g->InDegree(0, 0), 1); auto &&nids = aten::Range(0, g->NumVertices(0), g->NumBits(), g->Context()); ASSERT_TRUE(ArrayEQ( g->InDegrees(0, nids), aten::VecToIdArray({1, 2, 1}, g->NumBits(), g->Context()))); EXPECT_ANY_THROW(g->OutDegree(0, 0)); EXPECT_ANY_THROW(g->OutDegrees(0, nids)); } // CSR { const aten::CSRMatrix &csr = CSR1(ctx); auto &&g = CreateFromCSR(2, csr, CSR_CODE); ASSERT_EQ(g->OutDegree(0, 0), 1); auto &&nids = aten::Range(0, g->NumVertices(0), g->NumBits(), g->Context()); ASSERT_TRUE(ArrayEQ( g->OutDegrees(0, nids), aten::VecToIdArray({1, 2, 1, 2}, g->NumBits(), g->Context()))); EXPECT_ANY_THROW(g->InDegree(0, 0)); EXPECT_ANY_THROW(g->InDegrees(0, nids)); } } template void _TestUnitGraph(DGLContext ctx) { const aten::CSRMatrix &csr = CSR1(ctx); const aten::COOMatrix &coo = COO1(ctx); auto g = CreateFromCSC(2, csr); ASSERT_EQ(g->GetCreatedFormats(), 4); g = CreateFromCSR(2, csr); ASSERT_EQ(g->GetCreatedFormats(), 2); g = CreateFromCOO(2, coo); ASSERT_EQ(g->GetCreatedFormats(), 1); auto src = aten::VecToIdArray({1, 2, 5, 3}); auto dst = aten::VecToIdArray({1, 6, 2, 6}); auto mg = dgl::UnitGraph::CreateFromCOO(2, 9, 8, src, dst, COO_CODE); ASSERT_EQ(mg->GetCreatedFormats(), 1); auto hmg = dgl::UnitGraph::CreateFromCOO(1, 8, 8, src, dst, COO_CODE); auto img = std::dynamic_pointer_cast(hmg->AsImmutableGraph()); ASSERT_TRUE(img != nullptr); mg = dgl::UnitGraph::CreateFromCOO(2, 9, 8, src, dst, CSR_CODE | COO_CODE); ASSERT_EQ(mg->GetCreatedFormats(), 1); hmg = dgl::UnitGraph::CreateFromCOO(1, 8, 8, src, dst, CSR_CODE | COO_CODE); img = std::dynamic_pointer_cast(hmg->AsImmutableGraph()); ASSERT_TRUE(img != nullptr); mg = dgl::UnitGraph::CreateFromCOO(2, 9, 8, src, dst, CSC_CODE | COO_CODE); ASSERT_EQ(mg->GetCreatedFormats(), 1); hmg = dgl::UnitGraph::CreateFromCOO(1, 8, 8, src, dst, CSC_CODE | COO_CODE); img = std::dynamic_pointer_cast(hmg->AsImmutableGraph()); ASSERT_TRUE(img != nullptr); g = CreateFromCSC(2, csr); ASSERT_EQ(g->GetCreatedFormats(), 4); g = CreateFromCSR(2, csr); ASSERT_EQ(g->GetCreatedFormats(), 2); g = CreateFromCOO(2, coo); ASSERT_EQ(g->GetCreatedFormats(), 1); } template void _TestUnitGraph_GetInCSR(DGLContext ctx) { const aten::CSRMatrix &csr = CSR1(ctx); const aten::COOMatrix &coo = COO1(ctx); auto g = CreateFromCSC(2, csr); auto in_csr_matrix = g->GetCSCMatrix(0); ASSERT_EQ(in_csr_matrix.num_rows, csr.num_rows); ASSERT_EQ(in_csr_matrix.num_cols, csr.num_cols); ASSERT_EQ(g->GetCreatedFormats(), 4); // test out csr g = CreateFromCSR(2, csr); auto g_ptr = g->GetGraphInFormat(CSC_CODE); in_csr_matrix = g_ptr->GetCSCMatrix(0); ASSERT_EQ(in_csr_matrix.num_cols, csr.num_rows); ASSERT_EQ(in_csr_matrix.num_rows, csr.num_cols); ASSERT_EQ(g->GetCreatedFormats(), 2); in_csr_matrix = g->GetCSCMatrix(0); ASSERT_EQ(in_csr_matrix.num_cols, csr.num_rows); ASSERT_EQ(in_csr_matrix.num_rows, csr.num_cols); ASSERT_EQ(g->GetCreatedFormats(), 6); // test out coo g = CreateFromCOO(2, coo); g_ptr = g->GetGraphInFormat(CSC_CODE); in_csr_matrix = g_ptr->GetCSCMatrix(0); ASSERT_EQ(in_csr_matrix.num_cols, coo.num_rows); ASSERT_EQ(in_csr_matrix.num_rows, coo.num_cols); ASSERT_EQ(g->GetCreatedFormats(), 1); in_csr_matrix = g->GetCSCMatrix(0); ASSERT_EQ(in_csr_matrix.num_cols, coo.num_rows); ASSERT_EQ(in_csr_matrix.num_rows, coo.num_cols); ASSERT_EQ(g->GetCreatedFormats(), 5); } template void _TestUnitGraph_GetOutCSR(DGLContext ctx) { const aten::CSRMatrix &csr = CSR1(ctx); const aten::COOMatrix &coo = COO1(ctx); auto g = CreateFromCSC(2, csr); auto g_ptr = g->GetGraphInFormat(CSR_CODE); auto out_csr_matrix = g_ptr->GetCSRMatrix(0); ASSERT_EQ(out_csr_matrix.num_cols, csr.num_rows); ASSERT_EQ(out_csr_matrix.num_rows, csr.num_cols); ASSERT_EQ(g->GetCreatedFormats(), 4); out_csr_matrix = g->GetCSRMatrix(0); ASSERT_EQ(out_csr_matrix.num_cols, csr.num_rows); ASSERT_EQ(out_csr_matrix.num_rows, csr.num_cols); ASSERT_EQ(g->GetCreatedFormats(), 6); // test out csr g = CreateFromCSR(2, csr); out_csr_matrix = g->GetCSRMatrix(0); ASSERT_EQ(out_csr_matrix.num_rows, csr.num_rows); ASSERT_EQ(out_csr_matrix.num_cols, csr.num_cols); ASSERT_EQ(g->GetCreatedFormats(), 2); // test out coo g = CreateFromCOO(2, coo); g_ptr = g->GetGraphInFormat(CSR_CODE); out_csr_matrix = g_ptr->GetCSRMatrix(0); ASSERT_EQ(out_csr_matrix.num_rows, coo.num_rows); ASSERT_EQ(out_csr_matrix.num_cols, coo.num_cols); ASSERT_EQ(g->GetCreatedFormats(), 1); out_csr_matrix = g->GetCSRMatrix(0); ASSERT_EQ(out_csr_matrix.num_rows, coo.num_rows); ASSERT_EQ(out_csr_matrix.num_cols, coo.num_cols); ASSERT_EQ(g->GetCreatedFormats(), 3); } template void _TestUnitGraph_GetCOO(DGLContext ctx) { const aten::CSRMatrix &csr = CSR1(ctx); const aten::COOMatrix &coo = COO1(ctx); auto g = CreateFromCSC(2, csr); auto g_ptr = g->GetGraphInFormat(COO_CODE); auto out_coo_matrix = g_ptr->GetCOOMatrix(0); ASSERT_EQ(out_coo_matrix.num_cols, csr.num_rows); ASSERT_EQ(out_coo_matrix.num_rows, csr.num_cols); ASSERT_EQ(g->GetCreatedFormats(), 4); out_coo_matrix = g->GetCOOMatrix(0); ASSERT_EQ(out_coo_matrix.num_cols, csr.num_rows); ASSERT_EQ(out_coo_matrix.num_rows, csr.num_cols); ASSERT_EQ(g->GetCreatedFormats(), 5); // test out csr g = CreateFromCSR(2, csr); g_ptr = g->GetGraphInFormat(COO_CODE); out_coo_matrix = g_ptr->GetCOOMatrix(0); ASSERT_EQ(out_coo_matrix.num_rows, csr.num_rows); ASSERT_EQ(out_coo_matrix.num_cols, csr.num_cols); ASSERT_EQ(g->GetCreatedFormats(), 2); out_coo_matrix = g->GetCOOMatrix(0); ASSERT_EQ(out_coo_matrix.num_rows, csr.num_rows); ASSERT_EQ(out_coo_matrix.num_cols, csr.num_cols); ASSERT_EQ(g->GetCreatedFormats(), 3); // test out coo g = CreateFromCOO(2, coo); out_coo_matrix = g->GetCOOMatrix(0); ASSERT_EQ(out_coo_matrix.num_rows, coo.num_rows); ASSERT_EQ(out_coo_matrix.num_cols, coo.num_cols); ASSERT_EQ(g->GetCreatedFormats(), 1); } template void _TestUnitGraph_Reserve(DGLContext ctx) { const aten::CSRMatrix &csr = CSR1(ctx); const aten::COOMatrix &coo = COO1(ctx); auto g = CreateFromCSC(2, csr); ASSERT_EQ(g->GetCreatedFormats(), 4); auto r_g = std::dynamic_pointer_cast(g->GetRelationGraph(0))->Reverse(); ASSERT_EQ(r_g->GetCreatedFormats(), 2); aten::CSRMatrix g_in_csr = g->GetCSCMatrix(0); aten::CSRMatrix r_g_out_csr = r_g->GetCSRMatrix(0); ASSERT_TRUE(g_in_csr.indptr->data == r_g_out_csr.indptr->data); ASSERT_TRUE(g_in_csr.indices->data == r_g_out_csr.indices->data); aten::CSRMatrix g_out_csr = g->GetCSRMatrix(0); ASSERT_EQ(g->GetCreatedFormats(), 6); ASSERT_EQ(r_g->GetCreatedFormats(), 6); aten::CSRMatrix r_g_in_csr = r_g->GetCSCMatrix(0); ASSERT_TRUE(g_out_csr.indptr->data == r_g_in_csr.indptr->data); ASSERT_TRUE(g_out_csr.indices->data == r_g_in_csr.indices->data); aten::COOMatrix g_coo = g->GetCOOMatrix(0); ASSERT_EQ(g->GetCreatedFormats(), 7); ASSERT_EQ(r_g->GetCreatedFormats(), 6); aten::COOMatrix r_g_coo = r_g->GetCOOMatrix(0); ASSERT_EQ(r_g->GetCreatedFormats(), 7); ASSERT_EQ(g_coo.num_rows, r_g_coo.num_cols); ASSERT_EQ(g_coo.num_cols, r_g_coo.num_rows); ASSERT_TRUE(ArrayEQ(g_coo.row, r_g_coo.col)); ASSERT_TRUE(ArrayEQ(g_coo.col, r_g_coo.row)); // test out csr g = CreateFromCSR(2, csr); ASSERT_EQ(g->GetCreatedFormats(), 2); r_g = std::dynamic_pointer_cast(g->GetRelationGraph(0))->Reverse(); ASSERT_EQ(r_g->GetCreatedFormats(), 4); g_out_csr = g->GetCSRMatrix(0); r_g_in_csr = r_g->GetCSCMatrix(0); ASSERT_TRUE(g_out_csr.indptr->data == r_g_in_csr.indptr->data); ASSERT_TRUE(g_out_csr.indices->data == r_g_in_csr.indices->data); g_in_csr = g->GetCSCMatrix(0); ASSERT_EQ(g->GetCreatedFormats(), 6); ASSERT_EQ(r_g->GetCreatedFormats(), 6); r_g_out_csr = r_g->GetCSRMatrix(0); ASSERT_TRUE(g_in_csr.indptr->data == r_g_out_csr.indptr->data); ASSERT_TRUE(g_in_csr.indices->data == r_g_out_csr.indices->data); g_coo = g->GetCOOMatrix(0); ASSERT_EQ(g->GetCreatedFormats(), 7); ASSERT_EQ(r_g->GetCreatedFormats(), 6); r_g_coo = r_g->GetCOOMatrix(0); ASSERT_EQ(r_g->GetCreatedFormats(), 7); ASSERT_EQ(g_coo.num_rows, r_g_coo.num_cols); ASSERT_EQ(g_coo.num_cols, r_g_coo.num_rows); ASSERT_TRUE(ArrayEQ(g_coo.row, r_g_coo.col)); ASSERT_TRUE(ArrayEQ(g_coo.col, r_g_coo.row)); // test out coo g = CreateFromCOO(2, coo); ASSERT_EQ(g->GetCreatedFormats(), 1); r_g = std::dynamic_pointer_cast(g->GetRelationGraph(0))->Reverse(); ASSERT_EQ(r_g->GetCreatedFormats(), 1); g_coo = g->GetCOOMatrix(0); r_g_coo = r_g->GetCOOMatrix(0); ASSERT_EQ(g_coo.num_rows, r_g_coo.num_cols); ASSERT_EQ(g_coo.num_cols, r_g_coo.num_rows); ASSERT_TRUE(g_coo.row->data == r_g_coo.col->data); ASSERT_TRUE(g_coo.col->data == r_g_coo.row->data); g_in_csr = g->GetCSCMatrix(0); ASSERT_EQ(g->GetCreatedFormats(), 5); ASSERT_EQ(r_g->GetCreatedFormats(), 3); r_g_out_csr = r_g->GetCSRMatrix(0); ASSERT_TRUE(g_in_csr.indptr->data == r_g_out_csr.indptr->data); ASSERT_TRUE(g_in_csr.indices->data == r_g_out_csr.indices->data); g_out_csr = g->GetCSRMatrix(0); ASSERT_EQ(g->GetCreatedFormats(), 7); ASSERT_EQ(r_g->GetCreatedFormats(), 7); r_g_in_csr = r_g->GetCSCMatrix(0); ASSERT_TRUE(g_out_csr.indptr->data == r_g_in_csr.indptr->data); ASSERT_TRUE(g_out_csr.indices->data == r_g_in_csr.indices->data); } template void _TestUnitGraph_CopyTo( const DGLContext &src_ctx, const DGLContext &dst_ctx) { const aten::CSRMatrix &csr = CSR1(src_ctx); const aten::COOMatrix &coo = COO1(src_ctx); auto device = dgl::runtime::DeviceAPI::Get(dst_ctx); // We don't allow SetStream in DGL for now. auto stream = nullptr; auto g = dgl::UnitGraph::CreateFromCSC(2, csr); ASSERT_EQ(g->GetCreatedFormats(), 4); auto cg = dgl::UnitGraph::CopyTo(g, dst_ctx); device->StreamSync(dst_ctx, stream); ASSERT_EQ(cg->GetCreatedFormats(), 4); g = dgl::UnitGraph::CreateFromCSR(2, csr); ASSERT_EQ(g->GetCreatedFormats(), 2); cg = dgl::UnitGraph::CopyTo(g, dst_ctx); device->StreamSync(dst_ctx, stream); ASSERT_EQ(cg->GetCreatedFormats(), 2); g = dgl::UnitGraph::CreateFromCOO(2, coo); ASSERT_EQ(g->GetCreatedFormats(), 1); cg = dgl::UnitGraph::CopyTo(g, dst_ctx); device->StreamSync(dst_ctx, stream); ASSERT_EQ(cg->GetCreatedFormats(), 1); } TEST(UniGraphTest, TestUnitGraph_CopyTo) { _TestUnitGraph_CopyTo(CPU, CPU); _TestUnitGraph_CopyTo(CPU, CPU); #ifdef DGL_USE_CUDA _TestUnitGraph_CopyTo(CPU, GPU); _TestUnitGraph_CopyTo(GPU, GPU); _TestUnitGraph_CopyTo(GPU, CPU); _TestUnitGraph_CopyTo(CPU, GPU); _TestUnitGraph_CopyTo(GPU, GPU); _TestUnitGraph_CopyTo(GPU, CPU); #endif } TEST(UniGraphTest, TestUnitGraph_InOutDegrees) { _TestUnitGraph_InOutDegrees(CPU); _TestUnitGraph_InOutDegrees(CPU); #ifdef DGL_USE_CUDA _TestUnitGraph_InOutDegrees(GPU); _TestUnitGraph_InOutDegrees(GPU); #endif } TEST(UniGraphTest, TestUnitGraph_Create) { _TestUnitGraph(CPU); _TestUnitGraph(CPU); #ifdef DGL_USE_CUDA _TestUnitGraph(GPU); _TestUnitGraph(GPU); #endif } TEST(UniGraphTest, TestUnitGraph_GetInCSR) { _TestUnitGraph_GetInCSR(CPU); _TestUnitGraph_GetInCSR(CPU); #ifdef DGL_USE_CUDA _TestUnitGraph_GetInCSR(GPU); _TestUnitGraph_GetInCSR(GPU); #endif } TEST(UniGraphTest, TestUnitGraph_GetOutCSR) { _TestUnitGraph_GetOutCSR(CPU); _TestUnitGraph_GetOutCSR(CPU); #ifdef DGL_USE_CUDA _TestUnitGraph_GetOutCSR(GPU); _TestUnitGraph_GetOutCSR(GPU); #endif } TEST(UniGraphTest, TestUnitGraph_GetCOO) { _TestUnitGraph_GetCOO(CPU); _TestUnitGraph_GetCOO(CPU); #ifdef DGL_USE_CUDA _TestUnitGraph_GetCOO(GPU); _TestUnitGraph_GetCOO(GPU); #endif } TEST(UniGraphTest, TestUnitGraph_Reserve) { _TestUnitGraph_Reserve(CPU); _TestUnitGraph_Reserve(CPU); #ifdef DGL_USE_CUDA _TestUnitGraph_Reserve(GPU); _TestUnitGraph_Reserve(GPU); #endif } ================================================ FILE: tests/cpp/test_zerocopy_serialize.cc ================================================ #include #include #include #include #include #include #include #include #include "../../src/graph/heterograph.h" #include "../../src/graph/unit_graph.h" #include "./common.h" #ifndef _WIN32 using namespace dgl; using namespace dgl::aten; using namespace dmlc; // Function to convert an idarray to string std::string IdArrayToStr(IdArray arr) { arr = arr.CopyTo(DGLContext{kDGLCPU, 0}); int64_t len = arr->shape[0]; std::ostringstream oss; oss << "(" << len << ")["; if (arr->dtype.bits == 32) { int32_t *data = static_cast(arr->data); for (int64_t i = 0; i < len; ++i) { oss << data[i] << " "; } } else { int64_t *data = static_cast(arr->data); for (int64_t i = 0; i < len; ++i) { oss << data[i] << " "; } } oss << "]"; return oss.str(); } TEST(ZeroCopySerialize, NDArray) { auto tensor1 = VecToIdArray({1, 2, 5, 3}); auto tensor2 = VecToIdArray({6, 6, 5, 7}); std::string nonzerocopy_blob; dmlc::MemoryStringStream ifs(&nonzerocopy_blob); static_cast(&ifs)->Write(tensor1); static_cast(&ifs)->Write(tensor2); std::string zerocopy_blob; StreamWithBuffer zc_write_strm(&zerocopy_blob, true); zc_write_strm.Write(tensor1); zc_write_strm.Write(tensor2); EXPECT_EQ(nonzerocopy_blob.size() - zerocopy_blob.size(), 126) << "Invalid save"; std::vector new_ptr_list; // Use memcpy to mimic remote machine reconstruction for (auto ptr : zc_write_strm.buffer_list()) { auto new_ptr = malloc(ptr.size); memcpy(new_ptr, ptr.data, ptr.size); new_ptr_list.emplace_back(new_ptr); } NDArray loadtensor1, loadtensor2; StreamWithBuffer zc_read_strm(&zerocopy_blob, new_ptr_list); zc_read_strm.Read(&loadtensor1); zc_read_strm.Read(&loadtensor2); } TEST(ZeroCopySerialize, ZeroShapeNDArray) { auto tensor1 = VecToIdArray({6, 6, 5, 7}); auto tensor2 = VecToIdArray({}); auto tensor3 = VecToIdArray({6, 6, 2, 7}); std::vector ndvec; ndvec.push_back(tensor1); ndvec.push_back(tensor2); ndvec.push_back(tensor3); std::string zerocopy_blob; StreamWithBuffer zc_write_strm(&zerocopy_blob, true); zc_write_strm.Write(ndvec); std::vector new_ptr_list; // Use memcpy to mimic remote machine reconstruction for (auto ptr : zc_write_strm.buffer_list()) { auto new_ptr = malloc(ptr.size); memcpy(new_ptr, ptr.data, ptr.size); new_ptr_list.emplace_back(new_ptr); } std::vector ndvec_read; StreamWithBuffer zc_read_strm(&zerocopy_blob, new_ptr_list); zc_read_strm.Read(&ndvec_read); EXPECT_EQ(ndvec_read[1]->ndim, 1); EXPECT_EQ(ndvec_read[1]->shape[0], 0); } TEST(ZeroCopySerialize, SharedMem) { auto tensor1 = VecToIdArray({1, 2, 5, 3}); DGLDataType dtype = {kDGLInt, 64, 1}; std::vector shape{4}; DGLContext cpu_ctx = {kDGLCPU, 0}; auto shared_tensor = NDArray::EmptyShared("test", shape, dtype, cpu_ctx, true); shared_tensor.CopyFrom(tensor1); std::string nonzerocopy_blob; dmlc::MemoryStringStream ifs(&nonzerocopy_blob); static_cast(&ifs)->Write(shared_tensor); std::string zerocopy_blob; StreamWithBuffer zc_write_strm(&zerocopy_blob, false); zc_write_strm.Write(shared_tensor); EXPECT_EQ(nonzerocopy_blob.size() - zerocopy_blob.size(), 51) << "Invalid save"; NDArray loadtensor1; StreamWithBuffer zc_read_strm = StreamWithBuffer(&zerocopy_blob, false); zc_read_strm.Read(&loadtensor1); } TEST(ZeroCopySerialize, HeteroGraph) { auto src = VecToIdArray({1, 2, 5, 3}); auto dst = VecToIdArray({1, 6, 2, 6}); auto mg1 = dgl::UnitGraph::CreateFromCOO(2, 9, 8, src, dst); src = VecToIdArray({6, 2, 5, 1, 8}); dst = VecToIdArray({5, 2, 4, 8, 0}); auto mg2 = dgl::UnitGraph::CreateFromCOO(1, 9, 9, src, dst); std::vector relgraphs; relgraphs.push_back(mg1); relgraphs.push_back(mg2); src = VecToIdArray({0, 0}); dst = VecToIdArray({1, 0}); auto meta_gptr = ImmutableGraph::CreateFromCOO(3, src, dst); auto hrptr = std::make_shared(meta_gptr, relgraphs); std::string nonzerocopy_blob; dmlc::MemoryStringStream ifs(&nonzerocopy_blob); static_cast(&ifs)->Write(hrptr); std::string zerocopy_blob; StreamWithBuffer zc_write_strm(&zerocopy_blob, true); zc_write_strm.Write(hrptr); EXPECT_EQ(nonzerocopy_blob.size() - zerocopy_blob.size(), 745) << "Invalid save"; std::vector new_ptr_list; // Use memcpy to mimic remote machine reconstruction for (auto ptr : zc_write_strm.buffer_list()) { auto new_ptr = malloc(ptr.size); memcpy(new_ptr, ptr.data, ptr.size); new_ptr_list.emplace_back(new_ptr); } auto gptr = dgl::Serializer::make_shared(); StreamWithBuffer zc_read_strm(&zerocopy_blob, new_ptr_list); zc_read_strm.Read(&gptr); EXPECT_EQ(gptr->NumVertices(0), 9); EXPECT_EQ(gptr->NumVertices(1), 8); } #endif // _WIN32 ================================================ FILE: tests/cugraph/cugraph-ops/test_cugraph_gatconv.py ================================================ # pylint: disable=too-many-arguments, too-many-locals from collections import OrderedDict from itertools import product import dgl import pytest import torch from dgl.nn import CuGraphGATConv, GATConv options = OrderedDict( { "idtype_int": [False, True], "max_in_degree": [None, 8], "num_heads": [1, 3], "to_block": [False, True], } ) def generate_graph(): u = torch.tensor([0, 1, 0, 2, 3, 0, 4, 0, 5, 0, 6, 7, 0, 8, 9]) v = torch.tensor([1, 9, 2, 9, 9, 4, 9, 5, 9, 6, 9, 9, 8, 9, 0]) g = dgl.graph((u, v)) return g @pytest.mark.parametrize(",".join(options.keys()), product(*options.values())) def test_gatconv_equality(idtype_int, max_in_degree, num_heads, to_block): device = "cuda:0" in_feat, out_feat = 10, 2 args = (in_feat, out_feat, num_heads) kwargs = {"bias": False} g = generate_graph().to(device) if idtype_int: g = g.int() if to_block: g = dgl.to_block(g) feat = torch.rand(g.num_src_nodes(), in_feat).to(device) torch.manual_seed(0) conv1 = GATConv(*args, **kwargs, allow_zero_in_degree=True).to(device) out1 = conv1(g, feat) torch.manual_seed(0) conv2 = CuGraphGATConv(*args, **kwargs).to(device) dim = num_heads * out_feat with torch.no_grad(): conv2.attn_weights.data[:dim] = conv1.attn_l.data.flatten() conv2.attn_weights.data[dim:] = conv1.attn_r.data.flatten() conv2.fc.weight.data[:] = conv1.fc.weight.data out2 = conv2(g, feat, max_in_degree=max_in_degree) assert torch.allclose(out1, out2, atol=1e-6) grad_out1 = torch.rand_like(out1) grad_out2 = grad_out1.clone().detach() out1.backward(grad_out1) out2.backward(grad_out2) assert torch.allclose(conv1.fc.weight.grad, conv2.fc.weight.grad, atol=1e-6) assert torch.allclose( torch.cat((conv1.attn_l.grad, conv1.attn_r.grad), dim=0), conv2.attn_weights.grad.view(2, num_heads, out_feat), atol=1e-6, ) ================================================ FILE: tests/cugraph/cugraph-ops/test_cugraph_relgraphconv.py ================================================ # pylint: disable=too-many-arguments, too-many-locals from collections import OrderedDict from itertools import product import dgl import pytest import torch from dgl.nn import CuGraphRelGraphConv, RelGraphConv # TODO(tingyu66): Re-enable the following tests after updating cuGraph CI image. options = OrderedDict( { "idtype_int": [False, True], "max_in_degree": [None, 8], "num_bases": [1, 2, 5], "regularizer": [None, "basis"], "self_loop": [False, True], "to_block": [False, True], } ) def generate_graph(): u = torch.tensor([0, 1, 0, 2, 3, 0, 4, 0, 5, 0, 6, 7, 0, 8, 9]) v = torch.tensor([1, 9, 2, 9, 9, 4, 9, 5, 9, 6, 9, 9, 8, 9, 0]) g = dgl.graph((u, v)) return g @pytest.mark.parametrize(",".join(options.keys()), product(*options.values())) def test_relgraphconv_equality( idtype_int, max_in_degree, num_bases, regularizer, self_loop, to_block ): device = "cuda:0" in_feat, out_feat, num_rels = 10, 2, 3 args = (in_feat, out_feat, num_rels) kwargs = { "num_bases": num_bases, "regularizer": regularizer, "bias": False, "self_loop": self_loop, } g = generate_graph().to(device) g.edata[dgl.ETYPE] = torch.randint(num_rels, (g.num_edges(),)).to(device) if idtype_int: g = g.int() if to_block: g = dgl.to_block(g) feat = torch.rand(g.num_src_nodes(), in_feat).to(device) torch.manual_seed(0) conv1 = RelGraphConv(*args, **kwargs).to(device) torch.manual_seed(0) kwargs["apply_norm"] = False conv2 = CuGraphRelGraphConv(*args, **kwargs).to(device) out1 = conv1(g, feat, g.edata[dgl.ETYPE]) out2 = conv2(g, feat, g.edata[dgl.ETYPE], max_in_degree=max_in_degree) assert torch.allclose(out1, out2, atol=1e-06) grad_out = torch.rand_like(out1) out1.backward(grad_out) out2.backward(grad_out) end = -1 if self_loop else None assert torch.allclose(conv1.linear_r.W.grad, conv2.W.grad[:end], atol=1e-6) if self_loop: assert torch.allclose( conv1.loop_weight.grad, conv2.W.grad[-1], atol=1e-6 ) if regularizer is not None: assert torch.allclose( conv1.linear_r.coeff.grad, conv2.coeff.grad, atol=1e-6 ) ================================================ FILE: tests/cugraph/cugraph-ops/test_cugraph_sageconv.py ================================================ # pylint: disable=too-many-arguments, too-many-locals from collections import OrderedDict from itertools import product import dgl import pytest import torch from dgl.nn import CuGraphSAGEConv, SAGEConv options = OrderedDict( { "idtype_int": [False, True], "max_in_degree": [None, 8], "to_block": [False, True], } ) def generate_graph(): u = torch.tensor([0, 1, 0, 2, 3, 0, 4, 0, 5, 0, 6, 7, 0, 8, 9]) v = torch.tensor([1, 9, 2, 9, 9, 4, 9, 5, 9, 6, 9, 9, 8, 9, 0]) g = dgl.graph((u, v)) return g @pytest.mark.parametrize(",".join(options.keys()), product(*options.values())) def test_SAGEConv_equality(idtype_int, max_in_degree, to_block): device = "cuda:0" in_feat, out_feat = 5, 2 kwargs = {"aggregator_type": "mean"} g = generate_graph().to(device) if idtype_int: g = g.int() if to_block: g = dgl.to_block(g) feat = torch.rand(g.num_src_nodes(), in_feat).to(device) torch.manual_seed(0) conv1 = SAGEConv(in_feat, out_feat, **kwargs).to(device) torch.manual_seed(0) conv2 = CuGraphSAGEConv(in_feat, out_feat, **kwargs).to(device) with torch.no_grad(): conv2.linear.weight.data[:, :in_feat] = conv1.fc_neigh.weight.data conv2.linear.weight.data[:, in_feat:] = conv1.fc_self.weight.data conv2.linear.bias.data[:] = conv1.fc_self.bias.data out1 = conv1(g, feat) out2 = conv2(g, feat, max_in_degree=max_in_degree) assert torch.allclose(out1, out2, atol=1e-06) grad_out = torch.rand_like(out1) out1.backward(grad_out) out2.backward(grad_out) assert torch.allclose( conv1.fc_neigh.weight.grad, conv2.linear.weight.grad[:, :in_feat], atol=1e-6, ) assert torch.allclose( conv1.fc_self.weight.grad, conv2.linear.weight.grad[:, in_feat:], atol=1e-6, ) assert torch.allclose( conv1.fc_self.bias.grad, conv2.linear.bias.grad, atol=1e-6 ) ================================================ FILE: tests/cugraph/test_basics.py ================================================ # NOTE(vibwu): Currently cugraph must be imported before torch to avoid a resource cleanup issue. # See https://github.com/rapidsai/cugraph/issues/2718 import cugraph # usort: skip import backend as F import dgl def test_dummy(): cg = cugraph.Graph() assert cg is not None def test_to_cugraph_conversion(): g = dgl.graph((F.tensor([0, 1, 2, 3]), F.tensor([1, 0, 3, 2]))).to("cuda") cugraph_g = g.to_cugraph() assert cugraph_g.number_of_nodes() == g.num_nodes() assert cugraph_g.number_of_edges() == g.num_edges() assert cugraph_g.has_edge(0, 1) assert cugraph_g.has_edge(1, 0) assert cugraph_g.has_edge(3, 2) def test_from_cugraph_conversion(): # cudf is a dependency of cugraph import cudf # directed graph conversion test cugraph_g = cugraph.Graph(directed=True) df = cudf.DataFrame({"source": [0, 1, 2, 3], "destination": [1, 2, 3, 2]}) cugraph_g.from_cudf_edgelist(df) g = dgl.from_cugraph(cugraph_g) assert g.device.type == "cuda" assert g.num_nodes() == cugraph_g.number_of_nodes() assert g.num_edges() == cugraph_g.number_of_edges() # assert reverse edges are not present assert g.has_edges_between(0, 1) assert not g.has_edges_between(1, 0) assert g.has_edges_between(1, 2) assert not g.has_edges_between(2, 1) assert g.has_edges_between(2, 3) # undirected graph conversion test cugraph_g = cugraph.Graph(directed=False) df = cudf.DataFrame({"source": [0, 1, 2, 3], "destination": [1, 2, 3, 2]}) cugraph_g.from_cudf_edgelist(df) g = dgl.from_cugraph(cugraph_g) assert g.device.type == "cuda" assert g.num_nodes() == cugraph_g.number_of_nodes() # assert reverse edges are present assert g.has_edges_between(0, 1) assert g.has_edges_between(1, 0) assert g.has_edges_between(1, 2) assert g.has_edges_between(2, 1) assert g.has_edges_between(2, 3) ================================================ FILE: tests/dist/python/rpc_basic.py ================================================ import os import backend as F import dgl from numpy.testing import assert_array_equal INTEGER = 2 STR = "hello world!" HELLO_SERVICE_ID = 901231 TENSOR = F.zeros((1000, 1000), F.int64, F.cpu()) def tensor_func(tensor): return tensor * 2 class HelloResponse(dgl.distributed.Response): def __init__(self, hello_str, integer, tensor): self.hello_str = hello_str self.integer = integer self.tensor = tensor def __getstate__(self): return self.hello_str, self.integer, self.tensor def __setstate__(self, state): self.hello_str, self.integer, self.tensor = state class HelloRequest(dgl.distributed.Request): def __init__(self, hello_str, integer, tensor, func): self.hello_str = hello_str self.integer = integer self.tensor = tensor self.func = func def __getstate__(self): return self.hello_str, self.integer, self.tensor, self.func def __setstate__(self, state): self.hello_str, self.integer, self.tensor, self.func = state def process_request(self, server_state): assert self.hello_str == STR assert self.integer == INTEGER new_tensor = self.func(self.tensor) res = HelloResponse(self.hello_str, self.integer, new_tensor) return res def start_server(server_id, ip_config, num_servers, num_clients, keep_alive): server_state = dgl.distributed.ServerState( None, local_g=None, partition_book=None, keep_alive=keep_alive ) dgl.distributed.register_service( HELLO_SERVICE_ID, HelloRequest, HelloResponse ) print("Start server {}".format(server_id)) dgl.distributed.start_server( server_id=server_id, ip_config=ip_config, num_servers=num_servers, num_clients=num_clients, server_state=server_state, ) def start_client(ip_config, num_servers, group_id): dgl.distributed.register_service( HELLO_SERVICE_ID, HelloRequest, HelloResponse ) dgl.distributed.connect_to_server( ip_config=ip_config, num_servers=num_servers, group_id=group_id, ) req = HelloRequest(STR, INTEGER, TENSOR, tensor_func) server_namebook = dgl.distributed.read_ip_config(ip_config, num_servers) for server_id in server_namebook.keys(): # test send and recv dgl.distributed.send_request(server_id, req) res = dgl.distributed.recv_response() assert res.hello_str == STR assert res.integer == INTEGER assert_array_equal(F.asnumpy(res.tensor), F.asnumpy(TENSOR)) # test remote_call target_and_requests = [] for i in range(10): target_and_requests.append((server_id, req)) res_list = dgl.distributed.remote_call(target_and_requests) for res in res_list: assert res.hello_str == STR assert res.integer == INTEGER assert_array_equal(F.asnumpy(res.tensor), F.asnumpy(TENSOR)) # test send_request_to_machine dgl.distributed.send_request_to_machine(server_id, req) res = dgl.distributed.recv_response() assert res.hello_str == STR assert res.integer == INTEGER assert_array_equal(F.asnumpy(res.tensor), F.asnumpy(TENSOR)) # test remote_call_to_machine target_and_requests = [] for i in range(10): target_and_requests.append((server_id, req)) res_list = dgl.distributed.remote_call_to_machine(target_and_requests) for res in res_list: assert res.hello_str == STR assert res.integer == INTEGER assert_array_equal(F.asnumpy(res.tensor), F.asnumpy(TENSOR)) def main(): ip_config = os.environ.get("DIST_DGL_TEST_IP_CONFIG") num_servers = int(os.environ.get("DIST_DGL_TEST_NUM_SERVERS")) if os.environ.get("DIST_DGL_TEST_ROLE", "server") == "server": server_id = int(os.environ.get("DIST_DGL_TEST_SERVER_ID")) num_clients = int(os.environ.get("DIST_DGL_TEST_NUM_CLIENTS")) keep_alive = "DIST_DGL_TEST_KEEP_ALIVE" in os.environ start_server(server_id, ip_config, num_servers, num_clients, keep_alive) else: group_id = int(os.environ.get("DIST_DGL_TEST_GROUP_ID", "0")) start_client(ip_config, num_servers, group_id) if __name__ == "__main__": main() ================================================ FILE: tests/dist/python/run_dist_objects.py ================================================ import json import os from itertools import product import dgl import dgl.backend as F import numpy as np from dgl.distributed import edge_split, load_partition_book, node_split mode = os.environ.get("DIST_DGL_TEST_MODE", "") graph_name = os.environ.get("DIST_DGL_TEST_GRAPH_NAME", "random_test_graph") num_part = int(os.environ.get("DIST_DGL_TEST_NUM_PART")) num_servers_per_machine = int(os.environ.get("DIST_DGL_TEST_NUM_SERVER")) num_client_per_machine = int(os.environ.get("DIST_DGL_TEST_NUM_CLIENT")) shared_workspace = os.environ.get("DIST_DGL_TEST_WORKSPACE") graph_path = os.environ.get("DIST_DGL_TEST_GRAPH_PATH") part_id = int(os.environ.get("DIST_DGL_TEST_PART_ID")) ip_config = os.environ.get("DIST_DGL_TEST_IP_CONFIG", "ip_config.txt") os.environ["DGL_DIST_MODE"] = "distributed" def batched_assert_zero(tensor, size): BATCH_SIZE = 2**16 curr_pos = 0 while curr_pos < size: end = min(curr_pos + BATCH_SIZE, size) assert F.sum(tensor[F.arange(curr_pos, end)], 0) == 0 curr_pos = end def zeros_init(shape, dtype): return F.zeros(shape, dtype=dtype, ctx=F.cpu()) def rand_init(shape, dtype): return F.tensor((np.random.randint(0, 100, size=shape) > 30), dtype=dtype) def run_server( graph_name, server_id, server_count, num_clients, shared_mem, ): # server_count = num_servers_per_machine g = dgl.distributed.DistGraphServer( server_id, ip_config, server_count, num_clients, graph_path + "/{}.json".format(graph_name), disable_shared_mem=not shared_mem, graph_format=["csc", "coo"], ) print("start server", server_id) g.start() ########################################## ############### DistGraph ############### ########################################## def node_split_test(g, force_even, ntype="_N"): gpb = g.get_partition_book() selected_nodes_dist_tensor = dgl.distributed.DistTensor( [g.num_nodes(ntype)], F.uint8, init_func=rand_init ) nodes = node_split( selected_nodes_dist_tensor, gpb, ntype=ntype, force_even=force_even ) g.barrier() selected_nodes_dist_tensor[nodes] = F.astype( F.zeros_like(nodes), selected_nodes_dist_tensor.dtype ) g.barrier() if g.rank() == 0: batched_assert_zero(selected_nodes_dist_tensor, g.num_nodes(ntype)) g.barrier() def edge_split_test(g, force_even, etype="_E"): gpb = g.get_partition_book() selected_edges_dist_tensor = dgl.distributed.DistTensor( [g.num_edges(etype)], F.uint8, init_func=rand_init ) edges = edge_split( selected_edges_dist_tensor, gpb, etype=etype, force_even=force_even ) g.barrier() selected_edges_dist_tensor[edges] = F.astype( F.zeros_like(edges), selected_edges_dist_tensor.dtype ) g.barrier() if g.rank() == 0: batched_assert_zero(selected_edges_dist_tensor, g.num_edges(etype)) g.barrier() def test_dist_graph(g): gpb_path = graph_path + "/{}.json".format(graph_name) with open(gpb_path) as conf_f: part_metadata = json.load(conf_f) assert "num_nodes" in part_metadata assert "num_edges" in part_metadata num_nodes = part_metadata["num_nodes"] num_edges = part_metadata["num_edges"] assert g.num_nodes() == num_nodes assert g.num_edges() == num_edges num_nodes = {ntype: g.num_nodes(ntype) for ntype in g.ntypes} num_edges = {etype: g.num_edges(etype) for etype in g.etypes} for key, n_nodes in num_nodes.items(): assert g.num_nodes(key) == n_nodes node_split_test(g, force_even=False, ntype=key) node_split_test(g, force_even=True, ntype=key) for key, n_edges in num_edges.items(): assert g.num_edges(key) == n_edges edge_split_test(g, force_even=False, etype=key) edge_split_test(g, force_even=True, etype=key) ########################################## ########### DistGraphServices ########### ########################################## def find_edges_test(g, orig_nid_map): etypes = g.canonical_etypes etype_eids_uv_map = dict() for u_type, etype, v_type in etypes: orig_u = g.edges[etype].data["edge_u"] orig_v = g.edges[etype].data["edge_v"] eids = F.tensor(np.random.randint(g.num_edges(etype), size=100)) u, v = g.find_edges(eids, etype=etype) assert F.allclose(orig_nid_map[u_type][u], orig_u[eids]) assert F.allclose(orig_nid_map[v_type][v], orig_v[eids]) etype_eids_uv_map[etype] = (eids, F.cat([u, v], dim=0)) return etype_eids_uv_map def edge_subgraph_test(g, etype_eids_uv_map): etypes = g.canonical_etypes all_eids = dict() for t in etypes: all_eids[t] = etype_eids_uv_map[t[1]][0] sg = g.edge_subgraph(all_eids) for t in etypes: assert sg.num_edges(t[1]) == len(all_eids[t]) assert F.allclose(sg.edges[t].data[dgl.EID], all_eids[t]) for u_type, etype, v_type in etypes: uv = etype_eids_uv_map[etype][1] sg_u_nids = sg.nodes[u_type].data[dgl.NID] sg_v_nids = sg.nodes[v_type].data[dgl.NID] sg_uv = F.cat([sg_u_nids, sg_v_nids], dim=0) for node_id in uv: assert node_id in sg_uv def sample_neighbors_with_args(g, size, fanout): num_nodes = {ntype: g.num_nodes(ntype) for ntype in g.ntypes} etypes = g.canonical_etypes sampled_graph = g.sample_neighbors( { ntype: np.random.randint(0, n, size=size) for ntype, n in num_nodes.items() }, fanout, ) for ntype, n in num_nodes.items(): assert sampled_graph.num_nodes(ntype) == n for t in etypes: src, dst = sampled_graph.edges(etype=t) eids = sampled_graph.edges[t].data[dgl.EID] dist_u, dist_v = g.find_edges(eids, etype=t[1]) assert F.allclose(dist_u, src) assert F.allclose(dist_v, dst) def sample_neighbors_test(g): sample_neighbors_with_args(g, size=1024, fanout=3) sample_neighbors_with_args(g, size=1, fanout=10) sample_neighbors_with_args(g, size=1024, fanout=2) sample_neighbors_with_args(g, size=10, fanout=-1) sample_neighbors_with_args(g, size=2**10, fanout=1) sample_neighbors_with_args(g, size=2**12, fanout=1) def test_dist_graph_services(g): # in_degrees and out_degrees does not support heterograph if len(g.etypes) == 1: nids = F.arange(0, 128) # Test in_degrees orig_in_degrees = g.ndata["in_degrees"] local_in_degrees = g.in_degrees(nids) F.allclose(local_in_degrees, orig_in_degrees[nids]) # Test out_degrees orig_out_degrees = g.ndata["out_degrees"] local_out_degrees = g.out_degrees(nids) F.allclose(local_out_degrees, orig_out_degrees[nids]) num_nodes = {ntype: g.num_nodes(ntype) for ntype in g.ntypes} orig_nid_map = dict() dtype = g.edges[g.etypes[0]].data["edge_u"].dtype for ntype, _ in num_nodes.items(): orig_nid = F.tensor( np.load(graph_path + f"/orig_nid_array_{ntype}.npy"), dtype ) orig_nid_map[ntype] = orig_nid etype_eids_uv_map = find_edges_test(g, orig_nid_map) edge_subgraph_test(g, etype_eids_uv_map) sample_neighbors_test(g) ########################################## ############### DistTensor ############### ########################################## def dist_tensor_test_sanity(data_shape, name=None): local_rank = dgl.distributed.get_rank() % num_client_per_machine dist_ten = dgl.distributed.DistTensor( data_shape, F.int32, init_func=zeros_init, name=name ) # arbitrary value stride = 3 pos = (part_id // 2) * num_client_per_machine + local_rank if part_id % 2 == 0: dist_ten[pos * stride : (pos + 1) * stride] = F.ones( (stride, 2), dtype=F.int32, ctx=F.cpu() ) * (pos + 1) dgl.distributed.client_barrier() assert F.allclose( dist_ten[pos * stride : (pos + 1) * stride], F.ones((stride, 2), dtype=F.int32, ctx=F.cpu()) * (pos + 1), ) def dist_tensor_test_destroy_recreate(data_shape, name): dist_ten = dgl.distributed.DistTensor( data_shape, F.float32, name, init_func=zeros_init ) del dist_ten dgl.distributed.client_barrier() new_shape = (data_shape[0], 4) dist_ten = dgl.distributed.DistTensor( new_shape, F.float32, name, init_func=zeros_init ) def dist_tensor_test_persistent(data_shape): dist_ten_name = "persistent_dist_tensor" dist_ten = dgl.distributed.DistTensor( data_shape, F.float32, dist_ten_name, init_func=zeros_init, persistent=True, ) del dist_ten try: dist_ten = dgl.distributed.DistTensor( data_shape, F.float32, dist_ten_name ) raise Exception("") except BaseException: pass def test_dist_tensor(g): first_type = g.ntypes[0] data_shape = (g.num_nodes(first_type), 2) dist_tensor_test_sanity(data_shape) dist_tensor_test_sanity(data_shape, name="DistTensorSanity") dist_tensor_test_destroy_recreate(data_shape, name="DistTensorRecreate") dist_tensor_test_persistent(data_shape) ########################################## ############# DistEmbedding ############## ########################################## def dist_embedding_check_sanity(num_nodes, optimizer, name=None): local_rank = dgl.distributed.get_rank() % num_client_per_machine emb = dgl.distributed.DistEmbedding( num_nodes, 1, name=name, init_func=zeros_init ) lr = 0.001 optim = optimizer(params=[emb], lr=lr) stride = 3 pos = (part_id // 2) * num_client_per_machine + local_rank idx = F.arange(pos * stride, (pos + 1) * stride) if part_id % 2 == 0: with F.record_grad(): value = emb(idx) optim.zero_grad() loss = F.sum(value + 1, 0) loss.backward() optim.step() dgl.distributed.client_barrier() value = emb(idx) F.allclose(value, F.ones((len(idx), 1), dtype=F.int32, ctx=F.cpu()) * -lr) not_update_idx = F.arange( ((num_part + 1) / 2) * num_client_per_machine * stride, num_nodes ) value = emb(not_update_idx) assert np.all(F.asnumpy(value) == np.zeros((len(not_update_idx), 1))) def dist_embedding_check_existing(num_nodes): dist_emb_name = "UniqueEmb" emb = dgl.distributed.DistEmbedding( num_nodes, 1, name=dist_emb_name, init_func=zeros_init ) try: emb1 = dgl.distributed.DistEmbedding( num_nodes, 2, name=dist_emb_name, init_func=zeros_init ) raise Exception("") except BaseException: pass def test_dist_embedding(g): num_nodes = g.num_nodes(g.ntypes[0]) dist_embedding_check_sanity(num_nodes, dgl.distributed.optim.SparseAdagrad) dist_embedding_check_sanity( num_nodes, dgl.distributed.optim.SparseAdagrad, name="SomeEmbedding" ) dist_embedding_check_sanity( num_nodes, dgl.distributed.optim.SparseAdam, name="SomeEmbedding" ) dist_embedding_check_existing(num_nodes) ########################################## ############# DistOptimizer ############## ########################################## def dist_optimizer_check_store(g): num_nodes = g.num_nodes(g.ntypes[0]) rank = g.rank() try: emb = dgl.distributed.DistEmbedding( num_nodes, 1, name="optimizer_test", init_func=zeros_init ) emb2 = dgl.distributed.DistEmbedding( num_nodes, 5, name="optimizer_test2", init_func=zeros_init ) emb_optimizer = dgl.distributed.optim.SparseAdam([emb, emb2], lr=0.1) if rank == 0: name_to_state = {} for _, emb_states in emb_optimizer._state.items(): for state in emb_states: name_to_state[state.name] = F.uniform( state.shape, F.float32, F.cpu(), 0, 1 ) state[ F.arange(0, num_nodes, F.int64, F.cpu()) ] = name_to_state[state.name] emb_optimizer.save("emb.pt") new_emb_optimizer = dgl.distributed.optim.SparseAdam( [emb, emb2], lr=000.1, eps=2e-08, betas=(0.1, 0.222) ) new_emb_optimizer.load("emb.pt") if rank == 0: for _, emb_states in new_emb_optimizer._state.items(): for new_state in emb_states: state = name_to_state[new_state.name] new_state = new_state[ F.arange(0, num_nodes, F.int64, F.cpu()) ] assert F.allclose(state, new_state, 0.0, 0.0) assert new_emb_optimizer._lr == emb_optimizer._lr assert new_emb_optimizer._eps == emb_optimizer._eps assert new_emb_optimizer._beta1 == emb_optimizer._beta1 assert new_emb_optimizer._beta2 == emb_optimizer._beta2 g.barrier() finally: file = f"emb.pt_{rank}" if os.path.exists(file): os.remove(file) def test_dist_optimizer(g): dist_optimizer_check_store(g) ########################################## ############# DistDataLoader ############# ########################################## class NeighborSampler(object): def __init__(self, g, fanouts, sample_neighbors): self.g = g self.fanouts = fanouts self.sample_neighbors = sample_neighbors def sample_blocks(self, seeds): import torch as th seeds = th.LongTensor(np.asarray(seeds)) blocks = [] for fanout in self.fanouts: # For each seed node, sample ``fanout`` neighbors. frontier = self.sample_neighbors( self.g, seeds, fanout, replace=True ) # Then we compact the frontier into a bipartite graph for # message passing. block = dgl.to_block(frontier, seeds) # Obtain the seed nodes for next layer. seeds = block.srcdata[dgl.NID] block.edata["original_eids"] = frontier.edata[dgl.EID] blocks.insert(0, block) return blocks def distdataloader_test(g, batch_size, drop_last, shuffle): # We sample only a subset to minimize the test runtime num_nodes_to_sample = int(g.num_nodes() * 0.05) # To make sure that drop_last is tested if num_nodes_to_sample % batch_size == 0: num_nodes_to_sample -= 1 orig_nid_map = dict() dtype = g.edges[g.etypes[0]].data["edge_u"].dtype for ntype in g.ntypes: orig_nid = F.tensor( np.load(graph_path + f"/orig_nid_array_{ntype}.npy"), dtype ) orig_nid_map[ntype] = orig_nid orig_uv_map = dict() for etype in g.etypes: orig_uv_map[etype] = ( g.edges[etype].data["edge_u"], g.edges[etype].data["edge_v"], ) if len(g.ntypes) == 1: train_nid = F.arange(0, num_nodes_to_sample) else: train_nid = {g.ntypes[0]: F.arange(0, num_nodes_to_sample)} sampler = NeighborSampler(g, [5, 10], dgl.distributed.sample_neighbors) dataloader = dgl.dataloading.DistDataLoader( dataset=train_nid.numpy(), batch_size=batch_size, collate_fn=sampler.sample_blocks, shuffle=shuffle, drop_last=drop_last, ) for _ in range(2): max_nid = [] for idx, blocks in zip( range(0, num_nodes_to_sample, batch_size), dataloader ): block = blocks[-1] for src_type, etype, dst_type in block.canonical_etypes: orig_u, orig_v = orig_uv_map[etype] o_src, o_dst = block.edges(etype=etype) src_nodes_id = block.srcnodes[src_type].data[dgl.NID][o_src] dst_nodes_id = block.dstnodes[dst_type].data[dgl.NID][o_dst] max_nid.append(np.max(F.asnumpy(dst_nodes_id))) src_nodes_id = orig_nid_map[src_type][src_nodes_id] dst_nodes_id = orig_nid_map[dst_type][dst_nodes_id] eids = block.edata["original_eids"] F.allclose(src_nodes_id, orig_u[eids]) F.allclose(dst_nodes_id, orig_v[eids]) if not shuffle and len(max_nid) > 0: if drop_last: assert ( np.max(max_nid) == num_nodes_to_sample - 1 - num_nodes_to_sample % batch_size ) else: assert np.max(max_nid) == num_nodes_to_sample - 1 del dataloader def distnodedataloader_test( g, batch_size, drop_last, shuffle, num_workers, orig_nid_map, orig_uv_map ): # We sample only a subset to minimize the test runtime num_nodes_to_sample = int(g.num_nodes(g.ntypes[-1]) * 0.05) # To make sure that drop_last is tested if num_nodes_to_sample % batch_size == 0: num_nodes_to_sample -= 1 if len(g.ntypes) == 1: train_nid = F.arange(0, num_nodes_to_sample) else: train_nid = {g.ntypes[-1]: F.arange(0, num_nodes_to_sample)} if len(g.etypes) > 1: sampler = dgl.dataloading.MultiLayerNeighborSampler( [ {etype: 5 for etype in g.etypes}, 10, ] ) else: sampler = dgl.dataloading.MultiLayerNeighborSampler( [ 5, 10, ] ) dataloader = dgl.dataloading.DistNodeDataLoader( g, train_nid, sampler, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers, ) for _ in range(2): for _, (_, _, blocks) in zip( range(0, num_nodes_to_sample, batch_size), dataloader ): block = blocks[-1] for src_type, etype, dst_type in block.canonical_etypes: orig_u, orig_v = orig_uv_map[etype] o_src, o_dst = block.edges(etype=etype) src_nodes_id = block.srcnodes[src_type].data[dgl.NID][o_src] dst_nodes_id = block.dstnodes[dst_type].data[dgl.NID][o_dst] src_nodes_id = orig_nid_map[src_type][src_nodes_id] dst_nodes_id = orig_nid_map[dst_type][dst_nodes_id] eids = block.edges[etype].data[dgl.EID] F.allclose(src_nodes_id, orig_u[eids]) F.allclose(dst_nodes_id, orig_v[eids]) del dataloader def distedgedataloader_test( g, batch_size, drop_last, shuffle, num_workers, orig_nid_map, orig_uv_map, num_negs, ): # We sample only a subset to minimize the test runtime num_edges_to_sample = int(g.num_edges(g.etypes[-1]) * 0.05) # To make sure that drop_last is tested if num_edges_to_sample % batch_size == 0: num_edges_to_sample -= 1 if len(g.etypes) == 1: train_eid = F.arange(0, num_edges_to_sample) else: train_eid = {g.etypes[-1]: F.arange(0, num_edges_to_sample)} sampler = dgl.dataloading.MultiLayerNeighborSampler([5, 10]) dataloader = dgl.dataloading.DistEdgeDataLoader( g, train_eid, sampler, batch_size=batch_size, negative_sampler=dgl.dataloading.negative_sampler.Uniform(num_negs) if num_negs > 0 else None, shuffle=shuffle, drop_last=drop_last, num_workers=num_workers, ) for _ in range(2): for _, sampled_data in zip( range(0, num_edges_to_sample, batch_size), dataloader ): blocks = sampled_data[3 if num_negs > 0 else 2] block = blocks[-1] for src_type, etype, dst_type in block.canonical_etypes: orig_u, orig_v = orig_uv_map[etype] o_src, o_dst = block.edges(etype=etype) src_nodes_id = block.srcnodes[src_type].data[dgl.NID][o_src] dst_nodes_id = block.dstnodes[dst_type].data[dgl.NID][o_dst] src_nodes_id = orig_nid_map[src_type][src_nodes_id] dst_nodes_id = orig_nid_map[dst_type][dst_nodes_id] eids = block.edges[etype].data[dgl.EID] F.allclose(src_nodes_id, orig_u[eids]) F.allclose(dst_nodes_id, orig_v[eids]) if num_negs == 0: pos_pair_graph = sampled_data[1] assert np.all( F.asnumpy(block.dstnodes[dst_type].data[dgl.NID]) == F.asnumpy( pos_pair_graph.nodes[dst_type].data[dgl.NID] ) ) else: pos_graph, neg_graph = sampled_data[1:3] assert np.all( F.asnumpy(block.dstnodes[dst_type].data[dgl.NID]) == F.asnumpy(pos_graph.nodes[dst_type].data[dgl.NID]) ) assert np.all( F.asnumpy(block.dstnodes[dst_type].data[dgl.NID]) == F.asnumpy(neg_graph.nodes[dst_type].data[dgl.NID]) ) assert ( pos_graph.num_edges() * num_negs == neg_graph.num_edges() ) del dataloader def multi_distdataloader_test(g, dataloader_class): total_num_items = ( g.num_nodes(g.ntypes[-1]) if "Node" in dataloader_class.__name__ else g.num_edges(g.etypes[-1]) ) num_dataloaders = 4 batch_size = 32 sampler = dgl.dataloading.NeighborSampler([-1]) dataloaders = [] dl_iters = [] # We sample only a subset to minimize the test runtime num_items_to_sample = int(total_num_items * 0.05) # To make sure that drop_last is tested if num_items_to_sample % batch_size == 0: num_items_to_sample -= 1 if len(g.ntypes) == 1: train_ids = F.arange(0, num_items_to_sample) else: train_ids = { g.ntypes[-1] if "Node" in dataloader_class.__name__ else g.etypes[-1]: F.arange(0, num_items_to_sample) } for _ in range(num_dataloaders): dataloader = dataloader_class( g, train_ids, sampler, batch_size=batch_size ) dataloaders.append(dataloader) dl_iters.append(iter(dataloader)) # iterate on multiple dataloaders randomly while len(dl_iters) > 0: current_dl = np.random.choice(len(dl_iters), 1)[0] try: _ = next(dl_iters[current_dl]) except StopIteration: dl_iters.pop(current_dl) del dataloaders[current_dl] def test_dist_dataloader(g): orig_nid_map = dict() dtype = g.edges[g.etypes[0]].data["edge_u"].dtype for ntype in g.ntypes: orig_nid = F.tensor( np.load(graph_path + f"/orig_nid_array_{ntype}.npy"), dtype ) orig_nid_map[ntype] = orig_nid orig_uv_map = dict() for etype in g.etypes: orig_uv_map[etype] = ( g.edges[etype].data["edge_u"], g.edges[etype].data["edge_v"], ) batch_size_l = [64] drop_last_l = [False, True] num_workers_l = [0, 4] shuffle_l = [False, True] for batch_size, drop_last, shuffle, num_workers in product( batch_size_l, drop_last_l, shuffle_l, num_workers_l ): if len(g.ntypes) == 1 and num_workers == 0: distdataloader_test(g, batch_size, drop_last, shuffle) distnodedataloader_test( g, batch_size, drop_last, shuffle, num_workers, orig_nid_map, orig_uv_map, ) # No negssampling distedgedataloader_test( g, batch_size, drop_last, shuffle, num_workers, orig_nid_map, orig_uv_map, num_negs=0, ) # negsampling 15 distedgedataloader_test( g, batch_size, drop_last, shuffle, num_workers, orig_nid_map, orig_uv_map, num_negs=15, ) multi_distdataloader_test(g, dgl.dataloading.DistNodeDataLoader) multi_distdataloader_test(g, dgl.dataloading.DistEdgeDataLoader) if mode == "server": shared_mem = bool(int(os.environ.get("DIST_DGL_TEST_SHARED_MEM"))) server_id = int(os.environ.get("DIST_DGL_TEST_SERVER_ID")) run_server( graph_name, server_id, server_count=num_servers_per_machine, num_clients=num_part * num_client_per_machine, shared_mem=shared_mem, ) elif mode == "client": os.environ["DGL_NUM_SERVER"] = str(num_servers_per_machine) dgl.distributed.initialize(ip_config) gpb, graph_name, _, _ = load_partition_book( graph_path + "/{}.json".format(graph_name), part_id ) g = dgl.distributed.DistGraph(graph_name, gpb=gpb) target_func_map = { "DistGraph": test_dist_graph, "DistGraphServices": test_dist_graph_services, "DistTensor": test_dist_tensor, "DistEmbedding": test_dist_embedding, "DistOptimizer": test_dist_optimizer, "DistDataLoader": test_dist_dataloader, } targets = os.environ.get("DIST_DGL_TEST_OBJECT_TYPE", "") targets = targets.replace(" ", "").split(",") if targets else [] blacklist = os.environ.get("DIST_DGL_TEST_OBJECT_TYPE_BLACKLIST", "") blacklist = blacklist.replace(" ", "").split(",") if blacklist else [] for to_bl in blacklist: target_func_map.pop(to_bl, None) if not targets: for test_func in target_func_map.values(): test_func(g) else: for target in targets: if target in target_func_map: target_func_map[target](g) else: print(f"Tests not implemented for target '{target}'") else: exit(1) ================================================ FILE: tests/dist/test_dist_objects.py ================================================ import multiprocessing as mp import os import shutil import subprocess import unittest import dgl import dgl.backend as F import numpy as np import pytest import utils from dgl.distributed import partition_graph graph_name = os.environ.get("DIST_DGL_TEST_GRAPH_NAME", "random_test_graph") target = os.environ.get("DIST_DGL_TEST_OBJECT_TYPE", "") blacklist = os.environ.get("DIST_DGL_TEST_OBJECT_TYPE_BLACKLIST", "") shared_workspace = os.environ.get( "DIST_DGL_TEST_WORKSPACE", "/shared_workspace/dgl_dist_tensor_test/" ) def create_graph(num_part, dist_graph_path, hetero): if not hetero: g = dgl.rand_graph(10000, 42000) g.ndata["feat"] = F.unsqueeze(F.arange(0, g.num_nodes()), 1) g.edata["feat"] = F.unsqueeze(F.arange(0, g.num_edges()), 1) g.ndata["in_degrees"] = g.in_degrees() g.ndata["out_degrees"] = g.out_degrees() etype = g.etypes[0] ntype = g.ntypes[0] edge_u, edge_v = g.find_edges(F.arange(0, g.num_edges(etype))) g.edges[etype].data["edge_u"] = edge_u g.edges[etype].data["edge_v"] = edge_v orig_nid, orig_eid = partition_graph( g, graph_name, num_part, dist_graph_path, return_mapping=True ) orig_nid_f = os.path.join( dist_graph_path, f"orig_nid_array_{ntype}.npy" ) np.save(orig_nid_f, orig_nid.numpy()) orig_eid_f = os.path.join( dist_graph_path, f"orig_eid_array_{etype}.npy" ) np.save(orig_eid_f, orig_eid.numpy()) else: from scipy import sparse as spsp num_nodes = {"n1": 10000, "n2": 10010, "n3": 10020} etypes = [("n1", "r1", "n2"), ("n1", "r2", "n3"), ("n2", "r3", "n3")] edges = {} for etype in etypes: src_ntype, _, dst_ntype = etype arr = spsp.random( num_nodes[src_ntype], num_nodes[dst_ntype], density=0.001, format="coo", random_state=100, ) edges[etype] = (arr.row, arr.col) g = dgl.heterograph(edges, num_nodes) g.nodes["n1"].data["feat"] = F.unsqueeze( F.arange(0, g.num_nodes("n1")), 1 ) g.edges["r1"].data["feat"] = F.unsqueeze( F.arange(0, g.num_edges("r1")), 1 ) for _, etype, _ in etypes: edge_u, edge_v = g.find_edges( F.arange(0, g.num_edges(etype)), etype=etype ) g.edges[etype].data["edge_u"] = edge_u g.edges[etype].data["edge_v"] = edge_v orig_nid, orig_eid = partition_graph( g, graph_name, num_part, dist_graph_path, return_mapping=True ) for n_type, tensor in orig_nid.items(): orig_nid_f = os.path.join( dist_graph_path, f"orig_nid_array_{n_type}.npy" ) np.save(orig_nid_f, tensor.numpy()) for e_type, tensor in orig_eid.items(): orig_eid_f = os.path.join( dist_graph_path, f"orig_eid_array_{e_type}.npy" ) np.save(orig_eid_f, tensor.numpy()) @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") @pytest.mark.parametrize("num_servers", [1, 4]) @pytest.mark.parametrize("num_clients", [1, 4]) @pytest.mark.parametrize("hetero", [False, True]) @pytest.mark.parametrize("shared_mem", [False, True]) def test_dist_objects(num_servers, num_clients, hetero, shared_mem): if not shared_mem and num_servers > 1: pytest.skip( f"Backup servers are not supported when shared memory is disabled" ) ip_config = os.environ.get("DIST_DGL_TEST_IP_CONFIG", "ip_config.txt") ips = utils.get_ips(ip_config) num_part = len(ips) test_bin = os.path.join( os.environ.get("DIST_DGL_TEST_PY_BIN_DIR", "."), "run_dist_objects.py" ) dist_graph_path = os.path.join( shared_workspace, "hetero_dist_graph" if hetero else "dist_graph" ) if not os.path.isdir(dist_graph_path): create_graph(num_part, dist_graph_path, hetero) base_envs = ( f"DIST_DGL_TEST_WORKSPACE={shared_workspace} " f"DIST_DGL_TEST_NUM_PART={num_part} " f"DIST_DGL_TEST_NUM_SERVER={num_servers} " f"DIST_DGL_TEST_NUM_CLIENT={num_clients} " f"DIST_DGL_TEST_GRAPH_PATH={dist_graph_path} " f"DIST_DGL_TEST_IP_CONFIG={ip_config} " ) procs = [] # Start server server_id = 0 for part_id, ip in enumerate(ips): for _ in range(num_servers): cmd_envs = ( base_envs + f"DIST_DGL_TEST_SERVER_ID={server_id} " f"DIST_DGL_TEST_PART_ID={part_id} " f"DIST_DGL_TEST_SHARED_MEM={str(int(shared_mem))} " f"DIST_DGL_TEST_MODE=server " ) procs.append( utils.execute_remote(f"{cmd_envs} python3 {test_bin}", ip) ) server_id += 1 # Start client processes for part_id, ip in enumerate(ips): for _ in range(num_clients): cmd_envs = ( base_envs + f"DIST_DGL_TEST_PART_ID={part_id} " f"DIST_DGL_TEST_OBJECT_TYPE={target} " f"DIST_DGL_TEST_OBJECT_TYPE_BLACKLIST={blacklist} " f"DIST_DGL_TEST_MODE=client " ) procs.append( utils.execute_remote(f"{cmd_envs} python3 {test_bin}", ip) ) for p in procs: p.join() assert p.exitcode == 0 def teardown(): for name in ["dist_graph", "hetero_dist_graph"]: path = os.path.join(shared_workspace, name) if os.path.exists(path): print(f"Removing {path}...") shutil.rmtree(path) ================================================ FILE: tests/dist/test_rpc.py ================================================ import multiprocessing as mp import os import unittest import pytest import utils dgl_envs = f"PYTHONUNBUFFERED=1 DMLC_LOG_DEBUG=1 DGLBACKEND={os.environ.get('DGLBACKEND')} DGL_LIBRARY_PATH={os.environ.get('DGL_LIBRARY_PATH')} PYTHONPATH={os.environ.get('PYTHONPATH')} " @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") def test_rpc(): ip_config = os.environ.get("DIST_DGL_TEST_IP_CONFIG", "ip_config.txt") num_clients = 1 num_servers = 1 ips = utils.get_ips(ip_config) num_machines = len(ips) test_bin = os.path.join( os.environ.get("DIST_DGL_TEST_PY_BIN_DIR", "."), "rpc_basic.py" ) base_envs = ( dgl_envs + f" DGL_DIST_MODE=distributed DIST_DGL_TEST_IP_CONFIG={ip_config} DIST_DGL_TEST_NUM_SERVERS={num_servers} " ) procs = [] # start server processes server_id = 0 for ip in ips: for _ in range(num_servers): server_envs = ( base_envs + f" DIST_DGL_TEST_ROLE=server DIST_DGL_TEST_SERVER_ID={server_id} DIST_DGL_TEST_NUM_CLIENTS={num_clients * num_machines} " ) procs.append( utils.execute_remote(server_envs + " python3 " + test_bin, ip) ) server_id += 1 # start client processes client_envs = ( base_envs + " DIST_DGL_TEST_ROLE=client DIST_DGL_TEST_GROUP_ID=0 " ) for ip in ips: for _ in range(num_clients): procs.append( utils.execute_remote(client_envs + " python3 " + test_bin, ip) ) for p in procs: p.join() assert p.exitcode == 0 ================================================ FILE: tests/dist/utils.py ================================================ import multiprocessing as mp import os import subprocess from typing import Optional def run(ssh_cmd): subprocess.check_call(ssh_cmd, shell=True) def execute_remote( cmd: str, ip: str, port: Optional[int] = 22, username: Optional[str] = "" ) -> mp.Process: """Execute command line on remote machine via ssh. Args: cmd: User-defined command (udf) to execute on the remote host. ip: The ip-address of the host to run the command on. port: Port number that the host is listening on. username: Optional. If given, this will specify a username to use when issuing commands over SSH. Useful when your infra requires you to explicitly specify a username to avoid permission issues. Returns: Process: The Process whose run() is to run the `cmd` on the remote host. Returns when the cmd completes on the remote host. """ ip_prefix = "" if username: ip_prefix += "{username}@".format(username=username) custom_port = os.getenv("DIST_DGL_TEST_SSH_PORT", "") if custom_port: port = custom_port custom_ssh_key = os.getenv("DIST_DGL_TEST_SSH_KEY", "") if custom_ssh_key: custom_ssh_key = os.path.expanduser(custom_ssh_key) custom_ssh_key = "-i " + custom_ssh_key ssh_setup = os.getenv("DIST_DGL_TEST_SSH_SETUP", "") if ssh_setup: cmd = ssh_setup + ";" + cmd # Construct ssh command that executes `cmd` on the remote host ssh_cmd = "ssh -o StrictHostKeyChecking=no {ssh_key} -p {port} {ip_prefix}{ip} '{cmd}'".format( ssh_key=custom_ssh_key, port=str(port), ip_prefix=ip_prefix, ip=ip, cmd=cmd, ) ctx = mp.get_context("spawn") proc = ctx.Process(target=run, args=(ssh_cmd,)) proc.start() return proc def get_ips(ip_config): ips = [] with open(ip_config) as f: for line in f: result = line.strip().split() if len(result) != 1: raise RuntimeError( "Invalid format of ip_config:{}".format(ip_config) ) ips.append(result[0]) return ips ================================================ FILE: tests/distributed/test_dist_graph_store.py ================================================ import os os.environ["OMP_NUM_THREADS"] = "1" import math import multiprocessing as mp import pickle import socket import sys import time import unittest from multiprocessing import Condition, Manager, Process, Value import backend as F import dgl import dgl.graphbolt as gb import numpy as np import pytest import torch as th from dgl.data.utils import load_graphs, save_graphs from dgl.distributed import ( DistEmbedding, DistGraph, DistGraphServer, edge_split, load_partition, load_partition_book, node_split, partition_graph, ) from dgl.distributed.optim import SparseAdagrad from dgl.heterograph_index import create_unitgraph_from_coo from numpy.testing import assert_almost_equal, assert_array_equal from scipy import sparse as spsp from utils import create_random_graph, generate_ip_config, reset_envs if os.name != "nt": import fcntl import struct def _verify_dist_graph_server_dgl(g): # verify dtype of underlying graph cg = g.client_g for k, dtype in dgl.distributed.dist_graph.RESERVED_FIELD_DTYPE.items(): if k in cg.ndata: assert ( F.dtype(cg.ndata[k]) == dtype ), "Data type of {} in ndata should be {}.".format(k, dtype) if k in cg.edata: assert ( F.dtype(cg.edata[k]) == dtype ), "Data type of {} in edata should be {}.".format(k, dtype) def _verify_dist_graph_server_graphbolt(g): graph = g.client_g assert isinstance(graph, gb.FusedCSCSamplingGraph) # [Rui][TODO] verify dtype of underlying graph. def run_server( graph_name, server_id, server_count, num_clients, shared_mem, use_graphbolt=False, ): g = DistGraphServer( server_id, "kv_ip_config.txt", server_count, num_clients, "/tmp/dist_graph/{}.json".format(graph_name), disable_shared_mem=not shared_mem, graph_format=["csc", "coo"], use_graphbolt=use_graphbolt, ) print(f"Starting server[{server_id}] with use_graphbolt={use_graphbolt}") _verify = ( _verify_dist_graph_server_graphbolt if use_graphbolt else _verify_dist_graph_server_dgl ) _verify(g) g.start() def emb_init(shape, dtype): return F.zeros(shape, dtype, F.cpu()) def rand_init(shape, dtype): return F.tensor(np.random.normal(size=shape), F.float32) def check_dist_graph_empty(g, num_clients, num_nodes, num_edges): # Test API assert g.num_nodes() == num_nodes assert g.num_edges() == num_edges # Test init node data new_shape = (g.num_nodes(), 2) g.ndata["test1"] = dgl.distributed.DistTensor(new_shape, F.int32) nids = F.arange(0, int(g.num_nodes() / 2)) feats = g.ndata["test1"][nids] assert np.all(F.asnumpy(feats) == 0) # create a tensor and destroy a tensor and create it again. test3 = dgl.distributed.DistTensor( new_shape, F.float32, "test3", init_func=rand_init ) del test3 test3 = dgl.distributed.DistTensor((g.num_nodes(), 3), F.float32, "test3") del test3 # Test write data new_feats = F.ones((len(nids), 2), F.int32, F.cpu()) g.ndata["test1"][nids] = new_feats feats = g.ndata["test1"][nids] assert np.all(F.asnumpy(feats) == 1) # Test metadata operations. assert g.node_attr_schemes()["test1"].dtype == F.int32 print("end") def run_client_empty( graph_name, part_id, server_count, num_clients, num_nodes, num_edges, use_graphbolt=False, ): os.environ["DGL_NUM_SERVER"] = str(server_count) dgl.distributed.initialize("kv_ip_config.txt") gpb, graph_name, _, _ = load_partition_book( "/tmp/dist_graph/{}.json".format(graph_name), part_id ) g = DistGraph(graph_name, gpb=gpb) check_dist_graph_empty(g, num_clients, num_nodes, num_edges) def check_server_client_empty( shared_mem, num_servers, num_clients, use_graphbolt=False ): prepare_dist(num_servers) g = create_random_graph(10000) # Partition the graph num_parts = 1 graph_name = "dist_graph_test_1" partition_graph( g, graph_name, num_parts, "/tmp/dist_graph", use_graphbolt=use_graphbolt ) # let's just test on one partition for now. # We cannot run multiple servers and clients on the same machine. serv_ps = [] ctx = mp.get_context("spawn") for serv_id in range(num_servers): p = ctx.Process( target=run_server, args=( graph_name, serv_id, num_servers, num_clients, shared_mem, use_graphbolt, ), ) serv_ps.append(p) p.start() cli_ps = [] for cli_id in range(num_clients): print("start client", cli_id) p = ctx.Process( target=run_client_empty, args=( graph_name, 0, num_servers, num_clients, g.num_nodes(), g.num_edges(), use_graphbolt, ), ) p.start() cli_ps.append(p) for p in cli_ps: p.join() assert p.exitcode == 0 for p in serv_ps: p.join() assert p.exitcode == 0 print("clients have terminated") def run_client( graph_name, part_id, server_count, num_clients, num_nodes, num_edges, group_id, use_graphbolt=False, ): os.environ["DGL_NUM_SERVER"] = str(server_count) os.environ["DGL_GROUP_ID"] = str(group_id) dgl.distributed.initialize("kv_ip_config.txt") gpb, graph_name, _, _ = load_partition_book( "/tmp/dist_graph/{}.json".format(graph_name), part_id ) g = DistGraph(graph_name, gpb=gpb) check_dist_graph( g, num_clients, num_nodes, num_edges, use_graphbolt=use_graphbolt ) def run_emb_client( graph_name, part_id, server_count, num_clients, num_nodes, num_edges, group_id, ): os.environ["DGL_NUM_SERVER"] = str(server_count) os.environ["DGL_GROUP_ID"] = str(group_id) dgl.distributed.initialize("kv_ip_config.txt") gpb, graph_name, _, _ = load_partition_book( "/tmp/dist_graph/{}.json".format(graph_name), part_id ) g = DistGraph(graph_name, gpb=gpb) check_dist_emb(g, num_clients, num_nodes, num_edges) def run_optim_client( graph_name, part_id, server_count, rank, world_size, num_nodes, optimizer_states, save, ): os.environ["DGL_NUM_SERVER"] = str(server_count) os.environ["MASTER_ADDR"] = "127.0.0.1" os.environ["MASTER_PORT"] = "12355" dgl.distributed.initialize("kv_ip_config.txt") th.distributed.init_process_group( backend="gloo", rank=rank, world_size=world_size ) gpb, graph_name, _, _ = load_partition_book( "/tmp/dist_graph/{}.json".format(graph_name), part_id ) g = DistGraph(graph_name, gpb=gpb) check_dist_optim_store(rank, num_nodes, optimizer_states, save) def check_dist_optim_store(rank, num_nodes, optimizer_states, save): try: total_idx = F.arange(0, num_nodes, F.int64, F.cpu()) emb = DistEmbedding(num_nodes, 1, name="optim_emb1", init_func=emb_init) emb2 = DistEmbedding( num_nodes, 1, name="optim_emb2", init_func=emb_init ) if save: optimizer = SparseAdagrad([emb, emb2], lr=0.1, eps=1e-08) if rank == 0: optimizer._state["optim_emb1"][total_idx] = optimizer_states[0] optimizer._state["optim_emb2"][total_idx] = optimizer_states[1] optimizer.save("/tmp/dist_graph/emb.pt") else: optimizer = SparseAdagrad([emb, emb2], lr=0.001, eps=2e-08) optimizer.load("/tmp/dist_graph/emb.pt") if rank == 0: assert F.allclose( optimizer._state["optim_emb1"][total_idx], optimizer_states[0], 0.0, 0.0, ) assert F.allclose( optimizer._state["optim_emb2"][total_idx], optimizer_states[1], 0.0, 0.0, ) assert 0.1 == optimizer._lr assert 1e-08 == optimizer._eps th.distributed.barrier() except Exception as e: print(e) sys.exit(-1) def run_client_hierarchy( graph_name, part_id, server_count, node_mask, edge_mask, return_dict, use_graphbolt=False, ): os.environ["DGL_NUM_SERVER"] = str(server_count) dgl.distributed.initialize("kv_ip_config.txt") gpb, graph_name, _, _ = load_partition_book( "/tmp/dist_graph/{}.json".format(graph_name), part_id ) g = DistGraph(graph_name, gpb=gpb) node_mask = F.tensor(node_mask) edge_mask = F.tensor(edge_mask) nodes = node_split( node_mask, g.get_partition_book(), node_trainer_ids=g.ndata["trainer_id"], ) edges = edge_split( edge_mask, g.get_partition_book(), edge_trainer_ids=g.edata["trainer_id"], ) rank = g.rank() return_dict[rank] = (nodes, edges) def check_dist_emb(g, num_clients, num_nodes, num_edges): # Test sparse emb try: emb = DistEmbedding(g.num_nodes(), 1, "emb1", emb_init) nids = F.arange(0, int(g.num_nodes())) lr = 0.001 optimizer = SparseAdagrad([emb], lr=lr) with F.record_grad(): feats = emb(nids) assert np.all(F.asnumpy(feats) == np.zeros((len(nids), 1))) loss = F.sum(feats + 1, 0) loss.backward() optimizer.step() feats = emb(nids) if num_clients == 1: assert_almost_equal(F.asnumpy(feats), np.ones((len(nids), 1)) * -lr) rest = np.setdiff1d(np.arange(g.num_nodes()), F.asnumpy(nids)) feats1 = emb(rest) assert np.all(F.asnumpy(feats1) == np.zeros((len(rest), 1))) policy = dgl.distributed.PartitionPolicy("node", g.get_partition_book()) grad_sum = dgl.distributed.DistTensor( (g.num_nodes(), 1), F.float32, "emb1_sum", policy ) if num_clients == 1: assert np.all( F.asnumpy(grad_sum[nids]) == np.ones((len(nids), 1)) * num_clients ) assert np.all(F.asnumpy(grad_sum[rest]) == np.zeros((len(rest), 1))) emb = DistEmbedding(g.num_nodes(), 1, "emb2", emb_init) with F.no_grad(): feats1 = emb(nids) assert np.all(F.asnumpy(feats1) == 0) optimizer = SparseAdagrad([emb], lr=lr) with F.record_grad(): feats1 = emb(nids) feats2 = emb(nids) feats = F.cat([feats1, feats2], 0) assert np.all(F.asnumpy(feats) == np.zeros((len(nids) * 2, 1))) loss = F.sum(feats + 1, 0) loss.backward() optimizer.step() with F.no_grad(): feats = emb(nids) if num_clients == 1: assert_almost_equal( F.asnumpy(feats), np.ones((len(nids), 1)) * 1 * -lr ) rest = np.setdiff1d(np.arange(g.num_nodes()), F.asnumpy(nids)) feats1 = emb(rest) assert np.all(F.asnumpy(feats1) == np.zeros((len(rest), 1))) except NotImplementedError as e: pass except Exception as e: print(e) sys.exit(-1) def check_dist_graph(g, num_clients, num_nodes, num_edges, use_graphbolt=False): # Test API assert g.num_nodes() == num_nodes assert g.num_edges() == num_edges # Test reading node data nids = F.arange(0, int(g.num_nodes() / 2)) feats1 = g.ndata["features"][nids] feats = F.squeeze(feats1, 1) assert np.all(F.asnumpy(feats == nids)) # Test reading edge data eids = F.arange(0, int(g.num_edges() / 2)) feats1 = g.edata["features"][eids] feats = F.squeeze(feats1, 1) assert np.all(F.asnumpy(feats == eids)) # Test edge_subgraph sg = g.edge_subgraph(eids) assert sg.num_edges() == len(eids) assert F.array_equal(sg.edata[dgl.EID], eids) # Test init node data new_shape = (g.num_nodes(), 2) test1 = dgl.distributed.DistTensor(new_shape, F.int32) g.ndata["test1"] = test1 feats = g.ndata["test1"][nids] assert np.all(F.asnumpy(feats) == 0) assert test1.count_nonzero() == 0 # reference to a one that exists test2 = dgl.distributed.DistTensor( new_shape, F.float32, "test2", init_func=rand_init ) test3 = dgl.distributed.DistTensor(new_shape, F.float32, "test2") assert np.all(F.asnumpy(test2[nids]) == F.asnumpy(test3[nids])) # create a tensor and destroy a tensor and create it again. test3 = dgl.distributed.DistTensor( new_shape, F.float32, "test3", init_func=rand_init ) test3_name = test3.kvstore_key assert test3_name in g._client.data_name_list() assert test3_name in g._client.gdata_name_list() del test3 assert test3_name not in g._client.data_name_list() assert test3_name not in g._client.gdata_name_list() test3 = dgl.distributed.DistTensor((g.num_nodes(), 3), F.float32, "test3") del test3 # add tests for anonymous distributed tensor. test3 = dgl.distributed.DistTensor( new_shape, F.float32, init_func=rand_init ) data = test3[0:10] test4 = dgl.distributed.DistTensor( new_shape, F.float32, init_func=rand_init ) del test3 test5 = dgl.distributed.DistTensor( new_shape, F.float32, init_func=rand_init ) assert np.sum(F.asnumpy(test5[0:10] != data)) > 0 # test a persistent tesnor test4 = dgl.distributed.DistTensor( new_shape, F.float32, "test4", init_func=rand_init, persistent=True ) del test4 try: test4 = dgl.distributed.DistTensor( (g.num_nodes(), 3), F.float32, "test4" ) raise Exception("") except: pass # Test write data new_feats = F.ones((len(nids), 2), F.int32, F.cpu()) g.ndata["test1"][nids] = new_feats feats = g.ndata["test1"][nids] assert np.all(F.asnumpy(feats) == 1) # Test metadata operations. assert len(g.ndata["features"]) == g.num_nodes() assert g.ndata["features"].shape == (g.num_nodes(), 1) assert g.ndata["features"].dtype == F.int64 assert g.node_attr_schemes()["features"].dtype == F.int64 assert g.node_attr_schemes()["test1"].dtype == F.int32 assert g.node_attr_schemes()["features"].shape == (1,) selected_nodes = np.random.randint(0, 100, size=g.num_nodes()) > 30 # Test node split nodes = node_split(selected_nodes, g.get_partition_book()) nodes = F.asnumpy(nodes) # We only have one partition, so the local nodes are basically all nodes in the graph. local_nids = np.arange(g.num_nodes()) for n in nodes: assert n in local_nids print("end") def check_dist_emb_server_client( shared_mem, num_servers, num_clients, num_groups=1 ): prepare_dist(num_servers) g = create_random_graph(10000) # Partition the graph num_parts = 1 graph_name = ( f"check_dist_emb_{shared_mem}_{num_servers}_{num_clients}_{num_groups}" ) g.ndata["features"] = F.unsqueeze(F.arange(0, g.num_nodes()), 1) g.edata["features"] = F.unsqueeze(F.arange(0, g.num_edges()), 1) partition_graph(g, graph_name, num_parts, "/tmp/dist_graph") # let's just test on one partition for now. # We cannot run multiple servers and clients on the same machine. serv_ps = [] ctx = mp.get_context("spawn") for serv_id in range(num_servers): p = ctx.Process( target=run_server, args=( graph_name, serv_id, num_servers, num_clients, shared_mem, ), ) serv_ps.append(p) p.start() cli_ps = [] for cli_id in range(num_clients): for group_id in range(num_groups): print("start client[{}] for group[{}]".format(cli_id, group_id)) p = ctx.Process( target=run_emb_client, args=( graph_name, 0, num_servers, num_clients, g.num_nodes(), g.num_edges(), group_id, ), ) p.start() time.sleep(1) # avoid race condition when instantiating DistGraph cli_ps.append(p) for p in cli_ps: p.join() assert p.exitcode == 0 for p in serv_ps: p.join() assert p.exitcode == 0 print("clients have terminated") def check_server_client( shared_mem, num_servers, num_clients, num_groups=1, use_graphbolt=False ): prepare_dist(num_servers) g = create_random_graph(10000) # Partition the graph num_parts = 1 graph_name = f"check_server_client_{shared_mem}_{num_servers}_{num_clients}_{num_groups}" g.ndata["features"] = F.unsqueeze(F.arange(0, g.num_nodes()), 1) g.edata["features"] = F.unsqueeze(F.arange(0, g.num_edges()), 1) partition_graph( g, graph_name, num_parts, "/tmp/dist_graph", use_graphbolt=use_graphbolt ) # let's just test on one partition for now. # We cannot run multiple servers and clients on the same machine. serv_ps = [] ctx = mp.get_context("spawn") for serv_id in range(num_servers): p = ctx.Process( target=run_server, args=( graph_name, serv_id, num_servers, num_clients, shared_mem, use_graphbolt, ), ) serv_ps.append(p) p.start() # launch different client groups simultaneously cli_ps = [] for cli_id in range(num_clients): for group_id in range(num_groups): print("start client[{}] for group[{}]".format(cli_id, group_id)) p = ctx.Process( target=run_client, args=( graph_name, 0, num_servers, num_clients, g.num_nodes(), g.num_edges(), group_id, use_graphbolt, ), ) p.start() time.sleep(1) # avoid race condition when instantiating DistGraph cli_ps.append(p) for p in cli_ps: p.join() assert p.exitcode == 0 for p in serv_ps: p.join() assert p.exitcode == 0 print("clients have terminated") def check_server_client_hierarchy( shared_mem, num_servers, num_clients, use_graphbolt=False ): if num_clients == 1: # skip this test if there is only one client. return prepare_dist(num_servers) g = create_random_graph(10000) # Partition the graph num_parts = 1 graph_name = "dist_graph_test_2" g.ndata["features"] = F.unsqueeze(F.arange(0, g.num_nodes()), 1) g.edata["features"] = F.unsqueeze(F.arange(0, g.num_edges()), 1) partition_graph( g, graph_name, num_parts, "/tmp/dist_graph", num_trainers_per_machine=num_clients, use_graphbolt=use_graphbolt, ) # let's just test on one partition for now. # We cannot run multiple servers and clients on the same machine. serv_ps = [] ctx = mp.get_context("spawn") for serv_id in range(num_servers): p = ctx.Process( target=run_server, args=( graph_name, serv_id, num_servers, num_clients, shared_mem, use_graphbolt, ), ) serv_ps.append(p) p.start() cli_ps = [] manager = mp.Manager() return_dict = manager.dict() node_mask = np.zeros((g.num_nodes(),), np.int32) edge_mask = np.zeros((g.num_edges(),), np.int32) nodes = np.random.choice(g.num_nodes(), g.num_nodes() // 10, replace=False) edges = np.random.choice(g.num_edges(), g.num_edges() // 10, replace=False) node_mask[nodes] = 1 edge_mask[edges] = 1 nodes = np.sort(nodes) edges = np.sort(edges) for cli_id in range(num_clients): print("start client", cli_id) p = ctx.Process( target=run_client_hierarchy, args=( graph_name, 0, num_servers, node_mask, edge_mask, return_dict, use_graphbolt, ), ) p.start() cli_ps.append(p) for p in cli_ps: p.join() assert p.exitcode == 0 for p in serv_ps: p.join() assert p.exitcode == 0 nodes1 = [] edges1 = [] for n, e in return_dict.values(): nodes1.append(n) edges1.append(e) nodes1, _ = F.sort_1d(F.cat(nodes1, 0)) edges1, _ = F.sort_1d(F.cat(edges1, 0)) assert np.all(F.asnumpy(nodes1) == nodes) assert np.all(F.asnumpy(edges1) == edges) print("clients have terminated") def run_client_hetero( graph_name, part_id, server_count, num_clients, num_nodes, num_edges, use_graphbolt=False, ): os.environ["DGL_NUM_SERVER"] = str(server_count) dgl.distributed.initialize("kv_ip_config.txt") gpb, graph_name, _, _ = load_partition_book( "/tmp/dist_graph/{}.json".format(graph_name), part_id ) g = DistGraph(graph_name, gpb=gpb) check_dist_graph_hetero( g, num_clients, num_nodes, num_edges, use_graphbolt=use_graphbolt ) def create_random_hetero(): num_nodes = {"n1": 10000, "n2": 10010, "n3": 10020} etypes = [("n1", "r1", "n2"), ("n1", "r2", "n3"), ("n2", "r3", "n3")] edges = {} for etype in etypes: src_ntype, _, dst_ntype = etype arr = spsp.random( num_nodes[src_ntype], num_nodes[dst_ntype], density=0.001, format="coo", random_state=100, ) edges[etype] = (arr.row, arr.col) g = dgl.heterograph(edges, num_nodes) # assign ndata & edata. # data with same name as ntype/etype is assigned on purpose to verify # such same names can be correctly handled in DistGraph. See more details # in issue #4887 and #4463 on github. ntype = "n1" for name in ["feat", ntype]: g.nodes[ntype].data[name] = F.unsqueeze( F.arange(0, g.num_nodes(ntype)), 1 ) etype = "r1" for name in ["feat", etype]: g.edges[etype].data[name] = F.unsqueeze( F.arange(0, g.num_edges(etype)), 1 ) return g def check_dist_graph_hetero( g, num_clients, num_nodes, num_edges, use_graphbolt=False ): # Test API for ntype in num_nodes: assert ntype in g.ntypes assert num_nodes[ntype] == g.num_nodes(ntype) for etype in num_edges: assert etype in g.etypes assert num_edges[etype] == g.num_edges(etype) etypes = [("n1", "r1", "n2"), ("n1", "r2", "n3"), ("n2", "r3", "n3")] for i, etype in enumerate(g.canonical_etypes): assert etype[0] == etypes[i][0] assert etype[1] == etypes[i][1] assert etype[2] == etypes[i][2] assert g.num_nodes() == sum([num_nodes[ntype] for ntype in num_nodes]) assert g.num_edges() == sum([num_edges[etype] for etype in num_edges]) # Test reading node data ntype = "n1" nids = F.arange(0, g.num_nodes(ntype) // 2) for name in ["feat", ntype]: data = g.nodes[ntype].data[name][nids] data = F.squeeze(data, 1) assert np.all(F.asnumpy(data == nids)) assert len(g.nodes["n2"].data) == 0 expect_except = False try: g.nodes["xxx"].data["x"] except dgl.DGLError: expect_except = True assert expect_except # Test reading edge data etype = "r1" eids = F.arange(0, g.num_edges(etype) // 2) for name in ["feat", etype]: # access via etype data = g.edges[etype].data[name][eids] data = F.squeeze(data, 1) assert np.all(F.asnumpy(data == eids)) # access via canonical etype c_etype = g.to_canonical_etype(etype) data = g.edges[c_etype].data[name][eids] data = F.squeeze(data, 1) assert np.all(F.asnumpy(data == eids)) assert len(g.edges["r2"].data) == 0 expect_except = False try: g.edges["xxx"].data["x"] except dgl.DGLError: expect_except = True assert expect_except # Test edge_subgraph sg = g.edge_subgraph({"r1": eids}) assert sg.num_edges() == len(eids) assert F.array_equal(sg.edata[dgl.EID], eids) sg = g.edge_subgraph({("n1", "r1", "n2"): eids}) assert sg.num_edges() == len(eids) assert F.array_equal(sg.edata[dgl.EID], eids) # Test init node data new_shape = (g.num_nodes("n1"), 2) g.nodes["n1"].data["test1"] = dgl.distributed.DistTensor(new_shape, F.int32) feats = g.nodes["n1"].data["test1"][nids] assert np.all(F.asnumpy(feats) == 0) # create a tensor and destroy a tensor and create it again. test3 = dgl.distributed.DistTensor( new_shape, F.float32, "test3", init_func=rand_init ) del test3 test3 = dgl.distributed.DistTensor( (g.num_nodes("n1"), 3), F.float32, "test3" ) del test3 # add tests for anonymous distributed tensor. test3 = dgl.distributed.DistTensor( new_shape, F.float32, init_func=rand_init ) data = test3[0:10] test4 = dgl.distributed.DistTensor( new_shape, F.float32, init_func=rand_init ) del test3 test5 = dgl.distributed.DistTensor( new_shape, F.float32, init_func=rand_init ) assert np.sum(F.asnumpy(test5[0:10] != data)) > 0 # test a persistent tesnor test4 = dgl.distributed.DistTensor( new_shape, F.float32, "test4", init_func=rand_init, persistent=True ) del test4 try: test4 = dgl.distributed.DistTensor( (g.num_nodes("n1"), 3), F.float32, "test4" ) raise Exception("") except: pass # Test write data new_feats = F.ones((len(nids), 2), F.int32, F.cpu()) g.nodes["n1"].data["test1"][nids] = new_feats feats = g.nodes["n1"].data["test1"][nids] assert np.all(F.asnumpy(feats) == 1) # Test metadata operations. assert len(g.nodes["n1"].data["feat"]) == g.num_nodes("n1") assert g.nodes["n1"].data["feat"].shape == (g.num_nodes("n1"), 1) assert g.nodes["n1"].data["feat"].dtype == F.int64 selected_nodes = np.random.randint(0, 100, size=g.num_nodes("n1")) > 30 # Test node split nodes = node_split(selected_nodes, g.get_partition_book(), ntype="n1") nodes = F.asnumpy(nodes) # We only have one partition, so the local nodes are basically all nodes in the graph. local_nids = np.arange(g.num_nodes("n1")) for n in nodes: assert n in local_nids print("end") def check_server_client_hetero( shared_mem, num_servers, num_clients, use_graphbolt=False ): prepare_dist(num_servers) g = create_random_hetero() # Partition the graph num_parts = 1 graph_name = "dist_graph_test_3" partition_graph( g, graph_name, num_parts, "/tmp/dist_graph", use_graphbolt=use_graphbolt ) # let's just test on one partition for now. # We cannot run multiple servers and clients on the same machine. serv_ps = [] ctx = mp.get_context("spawn") for serv_id in range(num_servers): p = ctx.Process( target=run_server, args=( graph_name, serv_id, num_servers, num_clients, shared_mem, use_graphbolt, ), ) serv_ps.append(p) p.start() cli_ps = [] num_nodes = {ntype: g.num_nodes(ntype) for ntype in g.ntypes} num_edges = {etype: g.num_edges(etype) for etype in g.etypes} for cli_id in range(num_clients): print("start client", cli_id) p = ctx.Process( target=run_client_hetero, args=( graph_name, 0, num_servers, num_clients, num_nodes, num_edges, use_graphbolt, ), ) p.start() cli_ps.append(p) for p in cli_ps: p.join() assert p.exitcode == 0 for p in serv_ps: p.join() assert p.exitcode == 0 print("clients have terminated") @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") @unittest.skipIf( dgl.backend.backend_name == "tensorflow", reason="TF doesn't support some of operations in DistGraph", ) @unittest.skipIf( dgl.backend.backend_name == "mxnet", reason="Turn off Mxnet support" ) @pytest.mark.parametrize("shared_mem", [True]) @pytest.mark.parametrize("num_servers", [1]) @pytest.mark.parametrize("num_clients", [1, 4]) @pytest.mark.parametrize("use_graphbolt", [True, False]) def test_server_client(shared_mem, num_servers, num_clients, use_graphbolt): reset_envs() os.environ["DGL_DIST_MODE"] = "distributed" # [Rui] # 1. `disable_shared_mem=False` is not supported yet. Skip it. # 2. `num_servers` > 1 does not work on single machine. Skip it. for func in [ check_server_client, check_server_client_hetero, check_server_client_empty, check_server_client_hierarchy, ]: func(shared_mem, num_servers, num_clients, use_graphbolt=use_graphbolt) @unittest.skip(reason="Skip due to glitch in CI") @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") @unittest.skipIf( dgl.backend.backend_name == "tensorflow", reason="TF doesn't support distributed DistEmbedding", ) @unittest.skipIf( dgl.backend.backend_name == "mxnet", reason="Mxnet doesn't support distributed DistEmbedding", ) def test_dist_emb_server_client(): reset_envs() os.environ["DGL_DIST_MODE"] = "distributed" check_dist_emb_server_client(True, 1, 1) check_dist_emb_server_client(False, 1, 1) # [TODO][Rhett] Tests for multiple groups may fail sometimes and # root cause is unknown. Let's disable them for now. # check_dist_emb_server_client(True, 2, 2) # check_dist_emb_server_client(True, 1, 1, 2) # check_dist_emb_server_client(False, 1, 1, 2) # check_dist_emb_server_client(True, 2, 2, 2) @unittest.skipIf( dgl.backend.backend_name == "tensorflow", reason="TF doesn't support distributed Optimizer", ) @unittest.skipIf( dgl.backend.backend_name == "mxnet", reason="Mxnet doesn't support distributed Optimizer", ) def test_dist_optim_server_client(): reset_envs() os.environ["DGL_DIST_MODE"] = "distributed" optimizer_states = [] num_nodes = 10000 optimizer_states.append(F.uniform((num_nodes, 1), F.float32, F.cpu(), 0, 1)) optimizer_states.append(F.uniform((num_nodes, 1), F.float32, F.cpu(), 0, 1)) check_dist_optim_server_client(num_nodes, 1, 4, optimizer_states, True) check_dist_optim_server_client(num_nodes, 1, 8, optimizer_states, False) check_dist_optim_server_client(num_nodes, 1, 2, optimizer_states, False) def check_dist_optim_server_client( num_nodes, num_servers, num_clients, optimizer_states, save ): graph_name = f"check_dist_optim_{num_servers}_store" if save: prepare_dist(num_servers) g = create_random_graph(num_nodes) # Partition the graph num_parts = 1 g.ndata["features"] = F.unsqueeze(F.arange(0, g.num_nodes()), 1) g.edata["features"] = F.unsqueeze(F.arange(0, g.num_edges()), 1) partition_graph(g, graph_name, num_parts, "/tmp/dist_graph") # let's just test on one partition for now. # We cannot run multiple servers and clients on the same machine. serv_ps = [] ctx = mp.get_context("spawn") for serv_id in range(num_servers): p = ctx.Process( target=run_server, args=( graph_name, serv_id, num_servers, num_clients, True, ), ) serv_ps.append(p) p.start() cli_ps = [] for cli_id in range(num_clients): print("start client[{}] for group[0]".format(cli_id)) p = ctx.Process( target=run_optim_client, args=( graph_name, 0, num_servers, cli_id, num_clients, num_nodes, optimizer_states, save, ), ) p.start() time.sleep(1) # avoid race condition when instantiating DistGraph cli_ps.append(p) for p in cli_ps: p.join() assert p.exitcode == 0 for p in serv_ps: p.join() assert p.exitcode == 0 @unittest.skipIf( dgl.backend.backend_name == "tensorflow", reason="TF doesn't support some of operations in DistGraph", ) @unittest.skipIf( dgl.backend.backend_name == "mxnet", reason="Turn off Mxnet support" ) def test_standalone(): reset_envs() os.environ["DGL_DIST_MODE"] = "standalone" g = create_random_graph(10000) # Partition the graph num_parts = 1 graph_name = "dist_graph_test_3" g.ndata["features"] = F.unsqueeze(F.arange(0, g.num_nodes()), 1) g.edata["features"] = F.unsqueeze(F.arange(0, g.num_edges()), 1) partition_graph(g, graph_name, num_parts, "/tmp/dist_graph") dgl.distributed.initialize("kv_ip_config.txt") dist_g = DistGraph( graph_name, part_config="/tmp/dist_graph/{}.json".format(graph_name) ) check_dist_graph(dist_g, 1, g.num_nodes(), g.num_edges()) dgl.distributed.exit_client() # this is needed since there's two test here in one process @unittest.skip(reason="Skip due to glitch in CI") @unittest.skipIf( dgl.backend.backend_name == "tensorflow", reason="TF doesn't support distributed DistEmbedding", ) @unittest.skipIf( dgl.backend.backend_name == "mxnet", reason="Mxnet doesn't support distributed DistEmbedding", ) def test_standalone_node_emb(): reset_envs() os.environ["DGL_DIST_MODE"] = "standalone" g = create_random_graph(10000) # Partition the graph num_parts = 1 graph_name = "dist_graph_test_3" g.ndata["features"] = F.unsqueeze(F.arange(0, g.num_nodes()), 1) g.edata["features"] = F.unsqueeze(F.arange(0, g.num_edges()), 1) partition_graph(g, graph_name, num_parts, "/tmp/dist_graph") dgl.distributed.initialize("kv_ip_config.txt") dist_g = DistGraph( graph_name, part_config="/tmp/dist_graph/{}.json".format(graph_name) ) check_dist_emb(dist_g, 1, g.num_nodes(), g.num_edges()) dgl.distributed.exit_client() # this is needed since there's two test here in one process @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") @pytest.mark.parametrize("hetero", [True, False]) @pytest.mark.parametrize("empty_mask", [True, False]) def test_split(hetero, empty_mask): if hetero: g = create_random_hetero() ntype = "n1" etype = "r1" else: g = create_random_graph(10000) ntype = "_N" etype = "_E" num_parts = 4 num_hops = 2 partition_graph( g, "dist_graph_test", num_parts, "/tmp/dist_graph", num_hops=num_hops, part_method="metis", ) mask_thd = 100 if empty_mask else 30 node_mask = np.random.randint(0, 100, size=g.num_nodes(ntype)) > mask_thd edge_mask = np.random.randint(0, 100, size=g.num_edges(etype)) > mask_thd selected_nodes = np.nonzero(node_mask)[0] selected_edges = np.nonzero(edge_mask)[0] # The code now collects the roles of all client processes and use the information # to determine how to split the workloads. Here is to simulate the multi-client # use case. def set_roles(num_clients): dgl.distributed.role.CUR_ROLE = "default" dgl.distributed.role.GLOBAL_RANK = {i: i for i in range(num_clients)} dgl.distributed.role.PER_ROLE_RANK["default"] = { i: i for i in range(num_clients) } for i in range(num_parts): set_roles(num_parts) part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition( "/tmp/dist_graph/dist_graph_test.json", i ) local_nids = F.nonzero_1d(part_g.ndata["inner_node"]) local_nids = F.gather_row(part_g.ndata[dgl.NID], local_nids) if hetero: ntype_ids, nids = gpb.map_to_per_ntype(local_nids) local_nids = F.asnumpy(nids)[F.asnumpy(ntype_ids) == 0] else: local_nids = F.asnumpy(local_nids) nodes1 = np.intersect1d(selected_nodes, local_nids) nodes2 = node_split( node_mask, gpb, ntype=ntype, rank=i, force_even=False ) assert np.all(np.sort(nodes1) == np.sort(F.asnumpy(nodes2))) for n in F.asnumpy(nodes2): assert n in local_nids set_roles(num_parts * 2) nodes3 = node_split( node_mask, gpb, ntype=ntype, rank=i * 2, force_even=False ) nodes4 = node_split( node_mask, gpb, ntype=ntype, rank=i * 2 + 1, force_even=False ) nodes5 = F.cat([nodes3, nodes4], 0) assert np.all(np.sort(nodes1) == np.sort(F.asnumpy(nodes5))) set_roles(num_parts) local_eids = F.nonzero_1d(part_g.edata["inner_edge"]) local_eids = F.gather_row(part_g.edata[dgl.EID], local_eids) if hetero: etype_ids, eids = gpb.map_to_per_etype(local_eids) local_eids = F.asnumpy(eids)[F.asnumpy(etype_ids) == 0] else: local_eids = F.asnumpy(local_eids) edges1 = np.intersect1d(selected_edges, local_eids) edges2 = edge_split( edge_mask, gpb, etype=etype, rank=i, force_even=False ) assert np.all(np.sort(edges1) == np.sort(F.asnumpy(edges2))) for e in F.asnumpy(edges2): assert e in local_eids set_roles(num_parts * 2) edges3 = edge_split( edge_mask, gpb, etype=etype, rank=i * 2, force_even=False ) edges4 = edge_split( edge_mask, gpb, etype=etype, rank=i * 2 + 1, force_even=False ) edges5 = F.cat([edges3, edges4], 0) assert np.all(np.sort(edges1) == np.sort(F.asnumpy(edges5))) @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") @pytest.mark.parametrize("empty_mask", [True, False]) def test_split_even(empty_mask): g = create_random_graph(10000) num_parts = 4 num_hops = 2 partition_graph( g, "dist_graph_test", num_parts, "/tmp/dist_graph", num_hops=num_hops, part_method="metis", ) mask_thd = 100 if empty_mask else 30 node_mask = np.random.randint(0, 100, size=g.num_nodes()) > mask_thd edge_mask = np.random.randint(0, 100, size=g.num_edges()) > mask_thd all_nodes1 = [] all_nodes2 = [] all_edges1 = [] all_edges2 = [] # The code now collects the roles of all client processes and use the information # to determine how to split the workloads. Here is to simulate the multi-client # use case. def set_roles(num_clients): dgl.distributed.role.CUR_ROLE = "default" dgl.distributed.role.GLOBAL_RANK = {i: i for i in range(num_clients)} dgl.distributed.role.PER_ROLE_RANK["default"] = { i: i for i in range(num_clients) } for i in range(num_parts): set_roles(num_parts) part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition( "/tmp/dist_graph/dist_graph_test.json", i ) local_nids = F.nonzero_1d(part_g.ndata["inner_node"]) local_nids = F.gather_row(part_g.ndata[dgl.NID], local_nids) nodes = node_split(node_mask, gpb, rank=i, force_even=True) all_nodes1.append(nodes) subset = np.intersect1d(F.asnumpy(nodes), F.asnumpy(local_nids)) print( "part {} get {} nodes and {} are in the partition".format( i, len(nodes), len(subset) ) ) set_roles(num_parts * 2) nodes1 = node_split(node_mask, gpb, rank=i * 2, force_even=True) nodes2 = node_split(node_mask, gpb, rank=i * 2 + 1, force_even=True) nodes3, _ = F.sort_1d(F.cat([nodes1, nodes2], 0)) all_nodes2.append(nodes3) subset = np.intersect1d(F.asnumpy(nodes), F.asnumpy(nodes3)) print("intersection has", len(subset)) set_roles(num_parts) local_eids = F.nonzero_1d(part_g.edata["inner_edge"]) local_eids = F.gather_row(part_g.edata[dgl.EID], local_eids) edges = edge_split(edge_mask, gpb, rank=i, force_even=True) all_edges1.append(edges) subset = np.intersect1d(F.asnumpy(edges), F.asnumpy(local_eids)) print( "part {} get {} edges and {} are in the partition".format( i, len(edges), len(subset) ) ) set_roles(num_parts * 2) edges1 = edge_split(edge_mask, gpb, rank=i * 2, force_even=True) edges2 = edge_split(edge_mask, gpb, rank=i * 2 + 1, force_even=True) edges3, _ = F.sort_1d(F.cat([edges1, edges2], 0)) all_edges2.append(edges3) subset = np.intersect1d(F.asnumpy(edges), F.asnumpy(edges3)) print("intersection has", len(subset)) all_nodes1 = F.cat(all_nodes1, 0) all_edges1 = F.cat(all_edges1, 0) all_nodes2 = F.cat(all_nodes2, 0) all_edges2 = F.cat(all_edges2, 0) all_nodes = np.nonzero(node_mask)[0] all_edges = np.nonzero(edge_mask)[0] assert np.all(all_nodes == F.asnumpy(all_nodes1)) assert np.all(all_edges == F.asnumpy(all_edges1)) assert np.all(all_nodes == F.asnumpy(all_nodes2)) assert np.all(all_edges == F.asnumpy(all_edges2)) def prepare_dist(num_servers=1): generate_ip_config("kv_ip_config.txt", 1, num_servers=num_servers) if __name__ == "__main__": os.makedirs("/tmp/dist_graph", exist_ok=True) test_dist_emb_server_client() test_server_client() test_split(True) test_split(False) test_split_even() test_standalone() test_standalone_node_emb() ================================================ FILE: tests/distributed/test_dist_tensor.py ================================================ import operator import os import unittest import backend as F import dgl import pytest from utils import create_random_graph, generate_ip_config, reset_envs dist_g = None def rand_mask(shape, dtype): return F.randn(shape) > 0 @unittest.skipIf( dgl.backend.backend_name == "tensorflow", reason="TF doesn't support some of operations in DistGraph", ) @unittest.skipIf( dgl.backend.backend_name == "mxnet", reason="Turn off Mxnet support" ) def setup_module(): global dist_g reset_envs() os.environ["DGL_DIST_MODE"] = "standalone" dist_g = create_random_graph(10000) # Partition the graph. num_parts = 1 graph_name = "dist_graph_test_3" dist_g.ndata["features"] = F.unsqueeze(F.arange(0, dist_g.num_nodes()), 1) dist_g.edata["features"] = F.unsqueeze(F.arange(0, dist_g.num_edges()), 1) dgl.distributed.partition_graph( dist_g, graph_name, num_parts, "/tmp/dist_graph" ) dgl.distributed.initialize("kv_ip_config.txt") dist_g = dgl.distributed.DistGraph( graph_name, part_config="/tmp/dist_graph/{}.json".format(graph_name) ) dist_g.edata["mask1"] = dgl.distributed.DistTensor( (dist_g.num_edges(),), F.bool, init_func=rand_mask ) dist_g.edata["mask2"] = dgl.distributed.DistTensor( (dist_g.num_edges(),), F.bool, init_func=rand_mask ) def check_binary_op(key1, key2, key3, op): for i in range(0, dist_g.num_edges(), 1000): i_end = min(i + 1000, dist_g.num_edges()) assert F.array_equal( dist_g.edata[key3][i:i_end], op(dist_g.edata[key1][i:i_end], dist_g.edata[key2][i:i_end]), ) # Test with different index dtypes. int32 is not supported. with pytest.raises( dgl.utils.internal.InconsistentDtypeException, match="DGL now requires the input tensor to have", ): _ = dist_g.edata[key3][F.tensor([100, 20, 10], F.int32)] _ = dist_g.edata[key3][F.tensor([100, 20, 10], F.int64)] @unittest.skipIf( dgl.backend.backend_name == "tensorflow", reason="TF doesn't support some of operations in DistGraph", ) @unittest.skipIf( dgl.backend.backend_name == "mxnet", reason="Turn off Mxnet support" ) def test_op(): dist_g.edata["mask3"] = dist_g.edata["mask1"] | dist_g.edata["mask2"] check_binary_op("mask1", "mask2", "mask3", operator.or_) @unittest.skipIf( dgl.backend.backend_name == "tensorflow", reason="TF doesn't support some of operations in DistGraph", ) @unittest.skipIf( dgl.backend.backend_name == "mxnet", reason="Turn off Mxnet support" ) def teardown_module(): # Since there are two tests in one process, this is needed to make sure # the client exits properly. dgl.distributed.exit_client() if __name__ == "__main__": setup_module() test_op() teardown_module() ================================================ FILE: tests/distributed/test_distributed_sampling.py ================================================ import multiprocessing as mp import os import random import tempfile import time import traceback import unittest from pathlib import Path import dgl import dgl.backend as F import numpy as np import pytest import torch from dgl.data import CitationGraphDataset, WN18Dataset from dgl.distributed import ( DistGraph, DistGraphServer, load_partition, load_partition_book, partition_graph, sample_etype_neighbors, sample_neighbors, ) from dgl.distributed.graph_partition_book import _etype_tuple_to_str from scipy import sparse as spsp from utils import generate_ip_config, reset_envs def start_server( rank, tmpdir, disable_shared_mem, graph_name, graph_format=["csc", "coo"], use_graphbolt=False, ): g = DistGraphServer( rank, "rpc_ip_config.txt", 1, 1, tmpdir / (graph_name + ".json"), disable_shared_mem=disable_shared_mem, graph_format=graph_format, use_graphbolt=use_graphbolt, ) g.start() def start_sample_client(rank, tmpdir, disable_shared_mem): gpb = None if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition( tmpdir / "test_sampling.json", rank ) dgl.distributed.initialize("rpc_ip_config.txt") dist_graph = DistGraph("test_sampling", gpb=gpb) try: sampled_graph = sample_neighbors( dist_graph, torch.tensor([0, 10, 99, 66, 1024, 2008], dtype=dist_graph.idtype), 3, ) except Exception as e: print(traceback.format_exc()) sampled_graph = None dgl.distributed.exit_client() return sampled_graph def start_sample_client_shuffle( rank, tmpdir, disable_shared_mem, g, num_servers, group_id, orig_nid, orig_eid, use_graphbolt=False, return_eids=False, node_id_dtype=None, replace=False, ): os.environ["DGL_GROUP_ID"] = str(group_id) gpb = None if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition( tmpdir / "test_sampling.json", rank ) dgl.distributed.initialize("rpc_ip_config.txt") dist_graph = DistGraph("test_sampling", gpb=gpb) sampled_graph = sample_neighbors( dist_graph, torch.tensor([0, 10, 99, 66, 1024, 2008], dtype=node_id_dtype), 3, replace=replace, use_graphbolt=use_graphbolt, ) assert sampled_graph.idtype == dist_graph.idtype assert sampled_graph.idtype == torch.int64 assert ( dgl.ETYPE not in sampled_graph.edata ), "Etype should not be in homogeneous sampled graph." src, dst = sampled_graph.edges() sampled_in_degrees = sampled_graph.in_degrees(dst) src = orig_nid[src] dst = orig_nid[dst] assert sampled_graph.num_nodes() == g.num_nodes() assert np.all(F.asnumpy(g.has_edges_between(src, dst))) if use_graphbolt and not return_eids: assert ( dgl.EID not in sampled_graph.edata ), "EID should not be in sampled graph if use_graphbolt=True." else: eids = g.edge_ids(src, dst) eids1 = orig_eid[sampled_graph.edata[dgl.EID]] assert np.array_equal(F.asnumpy(eids1), F.asnumpy(eids)) # Verify replace argument. orig_in_degrees = g.in_degrees(dst) if replace: assert torch.all( (sampled_in_degrees == 3) | (sampled_in_degrees == orig_in_degrees) ) else: assert torch.all(sampled_in_degrees <= 3) def start_find_edges_client(rank, tmpdir, disable_shared_mem, eids, etype=None): gpb = None if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition( tmpdir / "test_find_edges.json", rank ) dgl.distributed.initialize("rpc_ip_config.txt") dist_graph = DistGraph("test_find_edges", gpb=gpb) try: u, v = dist_graph.find_edges(eids, etype=etype) except Exception as e: print(traceback.format_exc()) u, v = None, None dgl.distributed.exit_client() return u, v def start_get_degrees_client(rank, tmpdir, disable_shared_mem, nids=None): gpb = None if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition( tmpdir / "test_get_degrees.json", rank ) dgl.distributed.initialize("rpc_ip_config.txt") dist_graph = DistGraph("test_get_degrees", gpb=gpb) try: in_deg = dist_graph.in_degrees(nids) all_in_deg = dist_graph.in_degrees() out_deg = dist_graph.out_degrees(nids) all_out_deg = dist_graph.out_degrees() except Exception as e: print(traceback.format_exc()) in_deg, out_deg, all_in_deg, all_out_deg = None, None, None, None dgl.distributed.exit_client() return in_deg, out_deg, all_in_deg, all_out_deg def check_rpc_sampling(tmpdir, num_server): generate_ip_config("rpc_ip_config.txt", num_server, num_server) g = CitationGraphDataset("cora")[0] print(g.idtype) num_parts = num_server num_hops = 1 partition_graph( g, "test_sampling", num_parts, tmpdir, num_hops=num_hops, part_method="metis", ) pserver_list = [] ctx = mp.get_context("spawn") for i in range(num_server): p = ctx.Process( target=start_server, args=(i, tmpdir, num_server > 1, "test_sampling"), ) p.start() time.sleep(1) pserver_list.append(p) sampled_graph = start_sample_client(0, tmpdir, num_server > 1) print("Done sampling") for p in pserver_list: p.join() assert p.exitcode == 0 src, dst = sampled_graph.edges() assert sampled_graph.num_nodes() == g.num_nodes() assert np.all(F.asnumpy(g.has_edges_between(src, dst))) eids = g.edge_ids(src, dst) assert np.array_equal( F.asnumpy(sampled_graph.edata[dgl.EID]), F.asnumpy(eids) ) def check_rpc_find_edges_shuffle(tmpdir, num_server): generate_ip_config("rpc_ip_config.txt", num_server, num_server) g = CitationGraphDataset("cora")[0] num_parts = num_server orig_nid, orig_eid = partition_graph( g, "test_find_edges", num_parts, tmpdir, num_hops=1, part_method="metis", return_mapping=True, ) pserver_list = [] ctx = mp.get_context("spawn") for i in range(num_server): p = ctx.Process( target=start_server, args=(i, tmpdir, num_server > 1, "test_find_edges", ["csr", "coo"]), ) p.start() time.sleep(1) pserver_list.append(p) eids = F.tensor(np.random.randint(g.num_edges(), size=100)) u, v = g.find_edges(orig_eid[eids]) du, dv = start_find_edges_client(0, tmpdir, num_server > 1, eids) du = orig_nid[du] dv = orig_nid[dv] assert F.array_equal(u, du) assert F.array_equal(v, dv) def create_random_hetero(dense=False, empty=False): num_nodes = ( {"n1": 210, "n2": 200, "n3": 220} if dense else {"n1": 1010, "n2": 1000, "n3": 1020} ) etypes = [("n1", "r12", "n2"), ("n1", "r13", "n3"), ("n2", "r23", "n3")] edges = {} random.seed(42) for etype in etypes: src_ntype, _, dst_ntype = etype arr = spsp.random( num_nodes[src_ntype] - 10 if empty else num_nodes[src_ntype], num_nodes[dst_ntype] - 10 if empty else num_nodes[dst_ntype], density=0.1 if dense else 0.001, format="coo", random_state=100, ) edges[etype] = (arr.row, arr.col) g = dgl.heterograph(edges, num_nodes) g.nodes["n1"].data["feat"] = F.ones( (g.num_nodes("n1"), 10), F.float32, F.cpu() ) return g def check_rpc_hetero_find_edges_shuffle(tmpdir, num_server): generate_ip_config("rpc_ip_config.txt", num_server, num_server) g = create_random_hetero() num_parts = num_server orig_nid, orig_eid = partition_graph( g, "test_find_edges", num_parts, tmpdir, num_hops=1, part_method="metis", return_mapping=True, ) pserver_list = [] ctx = mp.get_context("spawn") for i in range(num_server): p = ctx.Process( target=start_server, args=(i, tmpdir, num_server > 1, "test_find_edges", ["csr", "coo"]), ) p.start() time.sleep(1) pserver_list.append(p) test_etype = g.to_canonical_etype("r12") eids = F.tensor(np.random.randint(g.num_edges(test_etype), size=100)) expect_except = False try: _, _ = g.find_edges(orig_eid[test_etype][eids], etype=("n1", "r12")) except: expect_except = True assert expect_except u, v = g.find_edges(orig_eid[test_etype][eids], etype="r12") u1, v1 = g.find_edges(orig_eid[test_etype][eids], etype=("n1", "r12", "n2")) assert F.array_equal(u, u1) assert F.array_equal(v, v1) du, dv = start_find_edges_client( 0, tmpdir, num_server > 1, eids, etype="r12" ) du = orig_nid["n1"][du] dv = orig_nid["n2"][dv] assert F.array_equal(u, du) assert F.array_equal(v, dv) # Wait non shared memory graph store @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") @unittest.skipIf( dgl.backend.backend_name == "tensorflow", reason="Not support tensorflow for now", ) @unittest.skipIf( dgl.backend.backend_name == "mxnet", reason="Turn off Mxnet support" ) @pytest.mark.parametrize("num_server", [1]) def test_rpc_find_edges_shuffle(num_server): reset_envs() import tempfile os.environ["DGL_DIST_MODE"] = "distributed" with tempfile.TemporaryDirectory() as tmpdirname: check_rpc_hetero_find_edges_shuffle(Path(tmpdirname), num_server) check_rpc_find_edges_shuffle(Path(tmpdirname), num_server) def check_rpc_get_degree_shuffle(tmpdir, num_server): generate_ip_config("rpc_ip_config.txt", num_server, num_server) g = CitationGraphDataset("cora")[0] num_parts = num_server orig_nid, _ = partition_graph( g, "test_get_degrees", num_parts, tmpdir, num_hops=1, part_method="metis", return_mapping=True, ) pserver_list = [] ctx = mp.get_context("spawn") for i in range(num_server): p = ctx.Process( target=start_server, args=(i, tmpdir, num_server > 1, "test_get_degrees"), ) p.start() time.sleep(1) pserver_list.append(p) nids = F.tensor(np.random.randint(g.num_nodes(), size=100)) in_degs, out_degs, all_in_degs, all_out_degs = start_get_degrees_client( 0, tmpdir, num_server > 1, nids ) print("Done get_degree") for p in pserver_list: p.join() assert p.exitcode == 0 print("check results") assert F.array_equal(g.in_degrees(orig_nid[nids]), in_degs) assert F.array_equal(g.in_degrees(orig_nid), all_in_degs) assert F.array_equal(g.out_degrees(orig_nid[nids]), out_degs) assert F.array_equal(g.out_degrees(orig_nid), all_out_degs) # Wait non shared memory graph store @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") @unittest.skipIf( dgl.backend.backend_name == "tensorflow", reason="Not support tensorflow for now", ) @unittest.skipIf( dgl.backend.backend_name == "mxnet", reason="Turn off Mxnet support" ) @pytest.mark.parametrize("num_server", [1]) def test_rpc_get_degree_shuffle(num_server): reset_envs() import tempfile os.environ["DGL_DIST_MODE"] = "distributed" with tempfile.TemporaryDirectory() as tmpdirname: check_rpc_get_degree_shuffle(Path(tmpdirname), num_server) # @unittest.skipIf(os.name == 'nt', reason='Do not support windows yet') # @unittest.skipIf(dgl.backend.backend_name == 'tensorflow', reason='Not support tensorflow for now') @unittest.skip("Only support partition with shuffle") def test_rpc_sampling(): reset_envs() import tempfile os.environ["DGL_DIST_MODE"] = "distributed" with tempfile.TemporaryDirectory() as tmpdirname: check_rpc_sampling(Path(tmpdirname), 1) def check_rpc_sampling_shuffle( tmpdir, num_server, num_groups=1, use_graphbolt=False, return_eids=False, node_id_dtype=None, replace=False, ): generate_ip_config("rpc_ip_config.txt", num_server, num_server) g = CitationGraphDataset("cora")[0] num_parts = num_server num_hops = 1 orig_nids, orig_eids = partition_graph( g, "test_sampling", num_parts, tmpdir, num_hops=num_hops, part_method="metis", return_mapping=True, use_graphbolt=use_graphbolt, store_eids=return_eids, ) pserver_list = [] ctx = mp.get_context("spawn") for i in range(num_server): p = ctx.Process( target=start_server, args=( i, tmpdir, num_server > 1, "test_sampling", ["csc", "coo"], use_graphbolt, ), ) p.start() time.sleep(1) pserver_list.append(p) pclient_list = [] num_clients = 1 for client_id in range(num_clients): for group_id in range(num_groups): p = ctx.Process( target=start_sample_client_shuffle, args=( client_id, tmpdir, num_server > 1, g, num_server, group_id, orig_nids, orig_eids, use_graphbolt, return_eids, node_id_dtype, replace, ), ) p.start() time.sleep(1) # avoid race condition when instantiating DistGraph pclient_list.append(p) for p in pclient_list: p.join() assert p.exitcode == 0 for p in pserver_list: p.join() assert p.exitcode == 0 def start_hetero_sample_client( rank, tmpdir, disable_shared_mem, nodes, use_graphbolt=False, return_eids=False, replace=False, ): gpb = None if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition( tmpdir / "test_sampling.json", rank ) dgl.distributed.initialize("rpc_ip_config.txt") dist_graph = DistGraph("test_sampling", gpb=gpb) assert "feat" in dist_graph.nodes["n1"].data assert "feat" not in dist_graph.nodes["n2"].data assert "feat" not in dist_graph.nodes["n3"].data nodes = { k: v.type(dist_graph.idtype).clone().detach() for k, v in nodes.items() } if gpb is None: gpb = dist_graph.get_partition_book() try: # Enable santity check in distributed sampling. os.environ["DGL_DIST_DEBUG"] = "1" sampled_graph = sample_neighbors( dist_graph, nodes, 3, replace=replace, use_graphbolt=use_graphbolt ) block = dgl.to_block(sampled_graph, nodes) if not use_graphbolt or return_eids: block.edata[dgl.EID] = sampled_graph.edata[dgl.EID] except Exception as e: print(traceback.format_exc()) block = None dgl.distributed.exit_client() return block, gpb def start_hetero_etype_sample_client( rank, tmpdir, disable_shared_mem, fanout=3, nodes=None, etype_sorted=False, use_graphbolt=False, return_eids=False, ): gpb = None if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition( tmpdir / "test_sampling.json", rank ) dgl.distributed.initialize("rpc_ip_config.txt") dist_graph = DistGraph("test_sampling", gpb=gpb) assert "feat" in dist_graph.nodes["n1"].data assert "feat" not in dist_graph.nodes["n2"].data assert "feat" not in dist_graph.nodes["n3"].data nodes = { k: v.type(dist_graph.idtype).clone().detach() for k, v in nodes.items() } if (not use_graphbolt) and dist_graph.local_partition is not None: # Check whether etypes are sorted in dist_graph local_g = dist_graph.local_partition local_nids = np.arange(local_g.num_nodes()) for lnid in local_nids: leids = local_g.in_edges(lnid, form="eid") letids = F.asnumpy(local_g.edata[dgl.ETYPE][leids]) _, idices = np.unique(letids, return_index=True) assert np.all(idices[:-1] <= idices[1:]) if gpb is None: gpb = dist_graph.get_partition_book() try: # Enable santity check in distributed sampling. os.environ["DGL_DIST_DEBUG"] = "1" sampled_graph = sample_etype_neighbors( dist_graph, nodes, fanout, etype_sorted=etype_sorted, use_graphbolt=use_graphbolt, ) block = dgl.to_block(sampled_graph, nodes) if sampled_graph.num_edges() > 0: if not use_graphbolt or return_eids: block.edata[dgl.EID] = sampled_graph.edata[dgl.EID] except Exception as e: print(traceback.format_exc()) block = None dgl.distributed.exit_client() return block, gpb def check_rpc_hetero_sampling_shuffle( tmpdir, num_server, use_graphbolt=False, return_eids=False, replace=False ): generate_ip_config("rpc_ip_config.txt", num_server, num_server) g = create_random_hetero() num_parts = num_server num_hops = 1 orig_nid_map, orig_eid_map = partition_graph( g, "test_sampling", num_parts, tmpdir, num_hops=num_hops, part_method="metis", return_mapping=True, use_graphbolt=use_graphbolt, store_eids=return_eids, ) pserver_list = [] ctx = mp.get_context("spawn") for i in range(num_server): p = ctx.Process( target=start_server, args=( i, tmpdir, num_server > 1, "test_sampling", ["csc", "coo"], use_graphbolt, ), ) p.start() time.sleep(1) pserver_list.append(p) nodes = {"n3": torch.tensor([0, 10, 99, 66, 124, 208], dtype=g.idtype)} block, gpb = start_hetero_sample_client( 0, tmpdir, num_server > 1, nodes=nodes, use_graphbolt=use_graphbolt, return_eids=return_eids, replace=replace, ) for p in pserver_list: p.join() assert p.exitcode == 0 for c_etype in block.canonical_etypes: src_type, etype, dst_type = c_etype src, dst = block.edges(etype=etype) # These are global Ids after shuffling. shuffled_src = F.gather_row(block.srcnodes[src_type].data[dgl.NID], src) shuffled_dst = F.gather_row(block.dstnodes[dst_type].data[dgl.NID], dst) orig_src = F.asnumpy(F.gather_row(orig_nid_map[src_type], shuffled_src)) orig_dst = F.asnumpy(F.gather_row(orig_nid_map[dst_type], shuffled_dst)) assert np.all( F.asnumpy(g.has_edges_between(orig_src, orig_dst, etype=etype)) ) if use_graphbolt and not return_eids: continue shuffled_eid = block.edges[etype].data[dgl.EID] orig_eid = F.asnumpy(F.gather_row(orig_eid_map[c_etype], shuffled_eid)) # Check the node Ids and edge Ids. orig_src1, orig_dst1 = g.find_edges(orig_eid, etype=etype) assert np.all(F.asnumpy(orig_src1) == orig_src) assert np.all(F.asnumpy(orig_dst1) == orig_dst) def get_degrees(g, nids, ntype): deg = F.zeros((len(nids),), dtype=F.int64) for srctype, etype, dsttype in g.canonical_etypes: if srctype == ntype: deg += g.out_degrees(u=nids, etype=etype) elif dsttype == ntype: deg += g.in_degrees(v=nids, etype=etype) return deg def check_rpc_hetero_sampling_empty_shuffle( tmpdir, num_server, use_graphbolt=False, return_eids=False ): generate_ip_config("rpc_ip_config.txt", num_server, num_server) g = create_random_hetero(empty=True) num_parts = num_server num_hops = 1 orig_nids, _ = partition_graph( g, "test_sampling", num_parts, tmpdir, num_hops=num_hops, part_method="metis", return_mapping=True, use_graphbolt=use_graphbolt, store_eids=return_eids, ) pserver_list = [] ctx = mp.get_context("spawn") for i in range(num_server): p = ctx.Process( target=start_server, args=( i, tmpdir, num_server > 1, "test_sampling", ["csc", "coo"], use_graphbolt, ), ) p.start() time.sleep(1) pserver_list.append(p) deg = get_degrees(g, orig_nids["n3"], "n3") empty_nids = F.nonzero_1d(deg == 0).to(g.idtype) block, gpb = start_hetero_sample_client( 0, tmpdir, num_server > 1, nodes={"n3": empty_nids}, use_graphbolt=use_graphbolt, return_eids=return_eids, ) for p in pserver_list: p.join() assert p.exitcode == 0 assert block.num_edges() == 0 assert len(block.etypes) == len(g.etypes) def check_rpc_hetero_etype_sampling_shuffle( tmpdir, num_server, graph_formats=None, use_graphbolt=False, return_eids=False, ): generate_ip_config("rpc_ip_config.txt", num_server, num_server) g = create_random_hetero(dense=True) num_parts = num_server num_hops = 1 orig_nid_map, orig_eid_map = partition_graph( g, "test_sampling", num_parts, tmpdir, num_hops=num_hops, part_method="metis", return_mapping=True, graph_formats=graph_formats, use_graphbolt=use_graphbolt, store_eids=return_eids, ) pserver_list = [] ctx = mp.get_context("spawn") for i in range(num_server): p = ctx.Process( target=start_server, args=( i, tmpdir, num_server > 1, "test_sampling", ["csc", "coo"], use_graphbolt, ), ) p.start() time.sleep(1) pserver_list.append(p) fanout = {etype: 3 for etype in g.canonical_etypes} etype_sorted = False if graph_formats is not None: etype_sorted = "csc" in graph_formats or "csr" in graph_formats nodes = {"n3": torch.tensor([0, 10, 99, 66, 124, 208], dtype=g.idtype)} block, gpb = start_hetero_etype_sample_client( 0, tmpdir, num_server > 1, fanout, nodes=nodes, etype_sorted=etype_sorted, use_graphbolt=use_graphbolt, return_eids=return_eids, ) print("Done sampling") for p in pserver_list: p.join() assert p.exitcode == 0 src, dst = block.edges(etype=("n1", "r13", "n3")) assert len(src) == 18 src, dst = block.edges(etype=("n2", "r23", "n3")) assert len(src) == 18 for c_etype in block.canonical_etypes: src_type, etype, dst_type = c_etype src, dst = block.edges(etype=etype) # These are global Ids after shuffling. shuffled_src = F.gather_row(block.srcnodes[src_type].data[dgl.NID], src) shuffled_dst = F.gather_row(block.dstnodes[dst_type].data[dgl.NID], dst) orig_src = F.asnumpy(F.gather_row(orig_nid_map[src_type], shuffled_src)) orig_dst = F.asnumpy(F.gather_row(orig_nid_map[dst_type], shuffled_dst)) assert np.all( F.asnumpy(g.has_edges_between(orig_src, orig_dst, etype=etype)) ) if use_graphbolt and not return_eids: continue # Check the node Ids and edge Ids. shuffled_eid = block.edges[etype].data[dgl.EID] orig_eid = F.asnumpy(F.gather_row(orig_eid_map[c_etype], shuffled_eid)) orig_src1, orig_dst1 = g.find_edges(orig_eid, etype=etype) assert np.all(F.asnumpy(orig_src1) == orig_src) assert np.all(F.asnumpy(orig_dst1) == orig_dst) def check_rpc_hetero_etype_sampling_empty_shuffle( tmpdir, num_server, use_graphbolt=False, return_eids=False ): generate_ip_config("rpc_ip_config.txt", num_server, num_server) g = create_random_hetero(dense=True, empty=True) num_parts = num_server num_hops = 1 orig_nids, _ = partition_graph( g, "test_sampling", num_parts, tmpdir, num_hops=num_hops, part_method="metis", return_mapping=True, use_graphbolt=use_graphbolt, store_eids=return_eids, ) pserver_list = [] ctx = mp.get_context("spawn") for i in range(num_server): p = ctx.Process( target=start_server, args=( i, tmpdir, num_server > 1, "test_sampling", ["csc", "coo"], use_graphbolt, ), ) p.start() time.sleep(1) pserver_list.append(p) fanout = 3 deg = get_degrees(g, orig_nids["n3"], "n3") empty_nids = F.nonzero_1d(deg == 0).to(g.idtype) block, gpb = start_hetero_etype_sample_client( 0, tmpdir, num_server > 1, fanout, nodes={"n3": empty_nids}, use_graphbolt=use_graphbolt, return_eids=return_eids, ) print("Done sampling") for p in pserver_list: p.join() assert p.exitcode == 0 assert block.num_edges() == 0 assert len(block.etypes) == len(g.etypes) def create_random_bipartite(): g = dgl.rand_bipartite("user", "buys", "game", 500, 1000, 1000) g.nodes["user"].data["feat"] = F.ones( (g.num_nodes("user"), 10), F.float32, F.cpu() ) g.nodes["game"].data["feat"] = F.ones( (g.num_nodes("game"), 10), F.float32, F.cpu() ) return g def start_bipartite_sample_client( rank, tmpdir, disable_shared_mem, nodes, use_graphbolt=False, return_eids=False, ): gpb = None if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition( tmpdir / "test_sampling.json", rank ) dgl.distributed.initialize("rpc_ip_config.txt") dist_graph = DistGraph("test_sampling", gpb=gpb) assert "feat" in dist_graph.nodes["user"].data assert "feat" in dist_graph.nodes["game"].data nodes = { k: v.type(dist_graph.idtype).clone().detach() for k, v in nodes.items() } if gpb is None: gpb = dist_graph.get_partition_book() # Enable santity check in distributed sampling. os.environ["DGL_DIST_DEBUG"] = "1" sampled_graph = sample_neighbors( dist_graph, nodes, 3, use_graphbolt=use_graphbolt ) block = dgl.to_block(sampled_graph, nodes) if sampled_graph.num_edges() > 0: if not use_graphbolt or return_eids: block.edata[dgl.EID] = sampled_graph.edata[dgl.EID] dgl.distributed.exit_client() return block, gpb def start_bipartite_etype_sample_client( rank, tmpdir, disable_shared_mem, fanout=3, nodes={}, use_graphbolt=False, return_eids=False, ): gpb = None if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition( tmpdir / "test_sampling.json", rank ) dgl.distributed.initialize("rpc_ip_config.txt") dist_graph = DistGraph("test_sampling", gpb=gpb) assert "feat" in dist_graph.nodes["user"].data assert "feat" in dist_graph.nodes["game"].data nodes = { k: v.type(dist_graph.idtype).clone().detach() for k, v in nodes.items() } if not use_graphbolt and dist_graph.local_partition is not None: # Check whether etypes are sorted in dist_graph local_g = dist_graph.local_partition local_nids = np.arange(local_g.num_nodes()) for lnid in local_nids: leids = local_g.in_edges(lnid, form="eid") letids = F.asnumpy(local_g.edata[dgl.ETYPE][leids]) _, idices = np.unique(letids, return_index=True) assert np.all(idices[:-1] <= idices[1:]) if gpb is None: gpb = dist_graph.get_partition_book() sampled_graph = sample_etype_neighbors( dist_graph, nodes, fanout, use_graphbolt=use_graphbolt ) block = dgl.to_block(sampled_graph, nodes) if sampled_graph.num_edges() > 0: if not use_graphbolt or return_eids: block.edata[dgl.EID] = sampled_graph.edata[dgl.EID] dgl.distributed.exit_client() return block, gpb def check_rpc_bipartite_sampling_empty( tmpdir, num_server, use_graphbolt=False, return_eids=False ): """sample on bipartite via sample_neighbors() which yields empty sample results""" generate_ip_config("rpc_ip_config.txt", num_server, num_server) g = create_random_bipartite() num_parts = num_server num_hops = 1 orig_nids, _ = partition_graph( g, "test_sampling", num_parts, tmpdir, num_hops=num_hops, part_method="metis", return_mapping=True, use_graphbolt=use_graphbolt, store_eids=return_eids, ) pserver_list = [] ctx = mp.get_context("spawn") for i in range(num_server): p = ctx.Process( target=start_server, args=( i, tmpdir, num_server > 1, "test_sampling", ["csc", "coo"], use_graphbolt, ), ) p.start() time.sleep(1) pserver_list.append(p) deg = get_degrees(g, orig_nids["game"], "game") empty_nids = F.nonzero_1d(deg == 0).to(g.idtype) nodes = {"game": empty_nids, "user": torch.tensor([1], dtype=g.idtype)} block, _ = start_bipartite_sample_client( 0, tmpdir, num_server > 1, nodes=nodes, use_graphbolt=use_graphbolt, return_eids=return_eids, ) print("Done sampling") for p in pserver_list: p.join() assert p.exitcode == 0 assert block.num_edges() == 0 assert len(block.etypes) == len(g.etypes) def check_rpc_bipartite_sampling_shuffle( tmpdir, num_server, use_graphbolt=False, return_eids=False ): """sample on bipartite via sample_neighbors() which yields non-empty sample results""" generate_ip_config("rpc_ip_config.txt", num_server, num_server) g = create_random_bipartite() num_parts = num_server num_hops = 1 orig_nid_map, orig_eid_map = partition_graph( g, "test_sampling", num_parts, tmpdir, num_hops=num_hops, part_method="metis", return_mapping=True, use_graphbolt=use_graphbolt, store_eids=return_eids, ) pserver_list = [] ctx = mp.get_context("spawn") for i in range(num_server): p = ctx.Process( target=start_server, args=( i, tmpdir, num_server > 1, "test_sampling", ["csc", "coo"], use_graphbolt, ), ) p.start() time.sleep(1) pserver_list.append(p) deg = get_degrees(g, orig_nid_map["game"], "game") nids = F.nonzero_1d(deg > 0) nodes = {"game": nids, "user": torch.tensor([0], dtype=g.idtype)} block, gpb = start_bipartite_sample_client( 0, tmpdir, num_server > 1, nodes=nodes, use_graphbolt=use_graphbolt, return_eids=return_eids, ) print("Done sampling") for p in pserver_list: p.join() assert p.exitcode == 0 for c_etype in block.canonical_etypes: src_type, etype, dst_type = c_etype src, dst = block.edges(etype=etype) # These are global Ids after shuffling. shuffled_src = F.gather_row(block.srcnodes[src_type].data[dgl.NID], src) shuffled_dst = F.gather_row(block.dstnodes[dst_type].data[dgl.NID], dst) orig_src = F.asnumpy(F.gather_row(orig_nid_map[src_type], shuffled_src)) orig_dst = F.asnumpy(F.gather_row(orig_nid_map[dst_type], shuffled_dst)) assert np.all( F.asnumpy(g.has_edges_between(orig_src, orig_dst, etype=etype)) ) if use_graphbolt and not return_eids: continue shuffled_eid = block.edges[etype].data[dgl.EID] orig_eid = F.asnumpy(F.gather_row(orig_eid_map[c_etype], shuffled_eid)) # Check the node Ids and edge Ids. orig_src1, orig_dst1 = g.find_edges(orig_eid, etype=etype) assert np.all(F.asnumpy(orig_src1) == orig_src) assert np.all(F.asnumpy(orig_dst1) == orig_dst) def check_rpc_bipartite_etype_sampling_empty( tmpdir, num_server, use_graphbolt=False, return_eids=False ): """sample on bipartite via sample_etype_neighbors() which yields empty sample results""" generate_ip_config("rpc_ip_config.txt", num_server, num_server) g = create_random_bipartite() num_parts = num_server num_hops = 1 orig_nids, _ = partition_graph( g, "test_sampling", num_parts, tmpdir, num_hops=num_hops, part_method="metis", return_mapping=True, use_graphbolt=use_graphbolt, store_eids=return_eids, ) pserver_list = [] ctx = mp.get_context("spawn") for i in range(num_server): p = ctx.Process( target=start_server, args=( i, tmpdir, num_server > 1, "test_sampling", ["csc", "coo"], use_graphbolt, ), ) p.start() time.sleep(1) pserver_list.append(p) deg = get_degrees(g, orig_nids["game"], "game") empty_nids = F.nonzero_1d(deg == 0).to(g.idtype) nodes = {"game": empty_nids, "user": torch.tensor([1], dtype=g.idtype)} block, _ = start_bipartite_etype_sample_client( 0, tmpdir, num_server > 1, nodes=nodes, use_graphbolt=use_graphbolt, return_eids=return_eids, ) print("Done sampling") for p in pserver_list: p.join() assert p.exitcode == 0 assert block is not None assert block.num_edges() == 0 assert len(block.etypes) == len(g.etypes) def check_rpc_bipartite_etype_sampling_shuffle( tmpdir, num_server, use_graphbolt=False, return_eids=False ): """sample on bipartite via sample_etype_neighbors() which yields non-empty sample results""" generate_ip_config("rpc_ip_config.txt", num_server, num_server) g = create_random_bipartite() num_parts = num_server num_hops = 1 orig_nid_map, orig_eid_map = partition_graph( g, "test_sampling", num_parts, tmpdir, num_hops=num_hops, part_method="metis", return_mapping=True, use_graphbolt=use_graphbolt, store_eids=return_eids, ) pserver_list = [] ctx = mp.get_context("spawn") for i in range(num_server): p = ctx.Process( target=start_server, args=( i, tmpdir, num_server > 1, "test_sampling", ["csc", "coo"], use_graphbolt, ), ) p.start() time.sleep(1) pserver_list.append(p) fanout = 3 deg = get_degrees(g, orig_nid_map["game"], "game") nids = F.nonzero_1d(deg > 0) nodes = {"game": nids, "user": torch.tensor([0], dtype=g.idtype)} block, gpb = start_bipartite_etype_sample_client( 0, tmpdir, num_server > 1, fanout, nodes=nodes, use_graphbolt=use_graphbolt, return_eids=return_eids, ) print("Done sampling") for p in pserver_list: p.join() assert p.exitcode == 0 for c_etype in block.canonical_etypes: src_type, etype, dst_type = c_etype src, dst = block.edges(etype=etype) # These are global Ids after shuffling. shuffled_src = F.gather_row(block.srcnodes[src_type].data[dgl.NID], src) shuffled_dst = F.gather_row(block.dstnodes[dst_type].data[dgl.NID], dst) orig_src = F.asnumpy(F.gather_row(orig_nid_map[src_type], shuffled_src)) orig_dst = F.asnumpy(F.gather_row(orig_nid_map[dst_type], shuffled_dst)) assert np.all( F.asnumpy(g.has_edges_between(orig_src, orig_dst, etype=etype)) ) if use_graphbolt and not return_eids: continue # Check the node Ids and edge Ids. shuffled_eid = block.edges[etype].data[dgl.EID] orig_eid = F.asnumpy(F.gather_row(orig_eid_map[c_etype], shuffled_eid)) orig_src1, orig_dst1 = g.find_edges(orig_eid, etype=etype) assert np.all(F.asnumpy(orig_src1) == orig_src) assert np.all(F.asnumpy(orig_dst1) == orig_dst) @pytest.mark.parametrize("num_server", [1]) @pytest.mark.parametrize("use_graphbolt", [False, True]) @pytest.mark.parametrize("return_eids", [False, True]) @pytest.mark.parametrize("node_id_dtype", [torch.int64]) @pytest.mark.parametrize("replace", [False, True]) def test_rpc_sampling_shuffle( num_server, use_graphbolt, return_eids, node_id_dtype, replace ): reset_envs() os.environ["DGL_DIST_MODE"] = "distributed" with tempfile.TemporaryDirectory() as tmpdirname: check_rpc_sampling_shuffle( Path(tmpdirname), num_server, use_graphbolt=use_graphbolt, return_eids=return_eids, node_id_dtype=node_id_dtype, replace=replace, ) @pytest.mark.parametrize("num_server", [1]) @pytest.mark.parametrize("use_graphbolt,", [False, True]) @pytest.mark.parametrize("return_eids", [False, True]) @pytest.mark.parametrize("replace", [False, True]) def test_rpc_hetero_sampling_shuffle( num_server, use_graphbolt, return_eids, replace ): reset_envs() os.environ["DGL_DIST_MODE"] = "distributed" with tempfile.TemporaryDirectory() as tmpdirname: check_rpc_hetero_sampling_shuffle( Path(tmpdirname), num_server, use_graphbolt=use_graphbolt, return_eids=return_eids, replace=replace, ) @pytest.mark.parametrize("num_server", [1]) @pytest.mark.parametrize("use_graphbolt", [False, True]) @pytest.mark.parametrize("return_eids", [False, True]) def test_rpc_hetero_sampling_empty_shuffle( num_server, use_graphbolt, return_eids ): reset_envs() os.environ["DGL_DIST_MODE"] = "distributed" with tempfile.TemporaryDirectory() as tmpdirname: check_rpc_hetero_sampling_empty_shuffle( Path(tmpdirname), num_server, use_graphbolt=use_graphbolt, return_eids=return_eids, ) @pytest.mark.parametrize("num_server", [1]) @pytest.mark.parametrize( "graph_formats", [None, ["csc"], ["csr"], ["csc", "coo"]] ) def test_rpc_hetero_etype_sampling_shuffle_dgl(num_server, graph_formats): reset_envs() os.environ["DGL_DIST_MODE"] = "distributed" with tempfile.TemporaryDirectory() as tmpdirname: check_rpc_hetero_etype_sampling_shuffle( Path(tmpdirname), num_server, graph_formats=graph_formats ) @pytest.mark.parametrize("num_server", [1]) @pytest.mark.parametrize("return_eids", [False, True]) def test_rpc_hetero_etype_sampling_shuffle_graphbolt(num_server, return_eids): reset_envs() os.environ["DGL_DIST_MODE"] = "distributed" with tempfile.TemporaryDirectory() as tmpdirname: check_rpc_hetero_etype_sampling_shuffle( Path(tmpdirname), num_server, use_graphbolt=True, return_eids=return_eids, ) @pytest.mark.parametrize("num_server", [1]) @pytest.mark.parametrize("use_graphbolt", [False, True]) @pytest.mark.parametrize("return_eids", [False, True]) def test_rpc_hetero_etype_sampling_empty_shuffle( num_server, use_graphbolt, return_eids ): reset_envs() os.environ["DGL_DIST_MODE"] = "distributed" with tempfile.TemporaryDirectory() as tmpdirname: check_rpc_hetero_etype_sampling_empty_shuffle( Path(tmpdirname), num_server, use_graphbolt=use_graphbolt, return_eids=return_eids, ) @pytest.mark.parametrize("num_server", [1]) @pytest.mark.parametrize("use_graphbolt", [False, True]) @pytest.mark.parametrize("return_eids", [False, True]) def test_rpc_bipartite_sampling_empty_shuffle( num_server, use_graphbolt, return_eids ): reset_envs() os.environ["DGL_DIST_MODE"] = "distributed" with tempfile.TemporaryDirectory() as tmpdirname: check_rpc_bipartite_sampling_empty( Path(tmpdirname), num_server, use_graphbolt, return_eids ) @pytest.mark.parametrize("num_server", [1]) @pytest.mark.parametrize("use_graphbolt", [False, True]) @pytest.mark.parametrize("return_eids", [False, True]) def test_rpc_bipartite_sampling_shuffle(num_server, use_graphbolt, return_eids): reset_envs() os.environ["DGL_DIST_MODE"] = "distributed" with tempfile.TemporaryDirectory() as tmpdirname: check_rpc_bipartite_sampling_shuffle( Path(tmpdirname), num_server, use_graphbolt, return_eids ) @pytest.mark.parametrize("num_server", [1]) @pytest.mark.parametrize("use_graphbolt", [False, True]) @pytest.mark.parametrize("return_eids", [False, True]) def test_rpc_bipartite_etype_sampling_empty_shuffle( num_server, use_graphbolt, return_eids ): reset_envs() os.environ["DGL_DIST_MODE"] = "distributed" with tempfile.TemporaryDirectory() as tmpdirname: check_rpc_bipartite_etype_sampling_empty( Path(tmpdirname), num_server, use_graphbolt=use_graphbolt, return_eids=return_eids, ) @pytest.mark.parametrize("num_server", [1]) @pytest.mark.parametrize("use_graphbolt", [False, True]) @pytest.mark.parametrize("return_eids", [False, True]) def test_rpc_bipartite_etype_sampling_shuffle( num_server, use_graphbolt, return_eids ): reset_envs() os.environ["DGL_DIST_MODE"] = "distributed" with tempfile.TemporaryDirectory() as tmpdirname: check_rpc_bipartite_etype_sampling_shuffle( Path(tmpdirname), num_server, use_graphbolt=use_graphbolt, return_eids=return_eids, ) def check_standalone_sampling(tmpdir): g = CitationGraphDataset("cora")[0] prob = np.maximum(np.random.randn(g.num_edges()), 0) mask = prob > 0 g.edata["prob"] = F.tensor(prob) g.edata["mask"] = F.tensor(mask) num_parts = 1 num_hops = 1 partition_graph( g, "test_sampling", num_parts, tmpdir, num_hops=num_hops, part_method="metis", ) os.environ["DGL_DIST_MODE"] = "standalone" dgl.distributed.initialize("rpc_ip_config.txt") dist_graph = DistGraph( "test_sampling", part_config=tmpdir / "test_sampling.json" ) sampled_graph = sample_neighbors( dist_graph, torch.tensor([0, 10, 99, 66, 1024, 2008], dtype=dist_graph.idtype), 3, ) src, dst = sampled_graph.edges() assert sampled_graph.num_nodes() == g.num_nodes() assert np.all(F.asnumpy(g.has_edges_between(src, dst))) eids = g.edge_ids(src, dst) assert np.array_equal( F.asnumpy(sampled_graph.edata[dgl.EID]), F.asnumpy(eids) ) sampled_graph = sample_neighbors( dist_graph, torch.tensor([0, 10, 99, 66, 1024, 2008], dtype=dist_graph.idtype), 3, prob="mask", ) eid = F.asnumpy(sampled_graph.edata[dgl.EID]) assert mask[eid].all() sampled_graph = sample_neighbors( dist_graph, torch.tensor([0, 10, 99, 66, 1024, 2008], dtype=dist_graph.idtype), 3, prob="prob", ) eid = F.asnumpy(sampled_graph.edata[dgl.EID]) assert (prob[eid] > 0).all() dgl.distributed.exit_client() def check_standalone_etype_sampling(tmpdir): hg = CitationGraphDataset("cora")[0] prob = np.maximum(np.random.randn(hg.num_edges()), 0) mask = prob > 0 hg.edata["prob"] = F.tensor(prob) hg.edata["mask"] = F.tensor(mask) num_parts = 1 num_hops = 1 partition_graph( hg, "test_sampling", num_parts, tmpdir, num_hops=num_hops, part_method="metis", ) os.environ["DGL_DIST_MODE"] = "standalone" dgl.distributed.initialize("rpc_ip_config.txt") dist_graph = DistGraph( "test_sampling", part_config=tmpdir / "test_sampling.json" ) sampled_graph = sample_etype_neighbors( dist_graph, torch.tensor([0, 10, 99, 66, 1023], dtype=dist_graph.idtype), 3, ) src, dst = sampled_graph.edges() assert sampled_graph.num_nodes() == hg.num_nodes() assert np.all(F.asnumpy(hg.has_edges_between(src, dst))) eids = hg.edge_ids(src, dst) assert np.array_equal( F.asnumpy(sampled_graph.edata[dgl.EID]), F.asnumpy(eids) ) sampled_graph = sample_etype_neighbors( dist_graph, torch.tensor([0, 10, 99, 66, 1023], dtype=dist_graph.idtype), 3, prob="mask", ) eid = F.asnumpy(sampled_graph.edata[dgl.EID]) assert mask[eid].all() sampled_graph = sample_etype_neighbors( dist_graph, torch.tensor([0, 10, 99, 66, 1023], dtype=dist_graph.idtype), 3, prob="prob", ) eid = F.asnumpy(sampled_graph.edata[dgl.EID]) assert (prob[eid] > 0).all() dgl.distributed.exit_client() def check_standalone_etype_sampling_heterograph(tmpdir): hg = CitationGraphDataset("cora")[0] num_parts = 1 num_hops = 1 src, dst = hg.edges() new_hg = dgl.heterograph( { ("paper", "cite", "paper"): (src, dst), ("paper", "cite-by", "paper"): (dst, src), }, {"paper": hg.num_nodes()}, ) partition_graph( new_hg, "test_hetero_sampling", num_parts, tmpdir, num_hops=num_hops, part_method="metis", ) os.environ["DGL_DIST_MODE"] = "standalone" dgl.distributed.initialize("rpc_ip_config.txt") dist_graph = DistGraph( "test_hetero_sampling", part_config=tmpdir / "test_hetero_sampling.json" ) sampled_graph = sample_etype_neighbors( dist_graph, torch.tensor( [0, 1, 2, 10, 99, 66, 1023, 1024, 2700, 2701], dtype=dist_graph.idtype, ), 1, ) src, dst = sampled_graph.edges(etype=("paper", "cite", "paper")) assert len(src) == 10 src, dst = sampled_graph.edges(etype=("paper", "cite-by", "paper")) assert len(src) == 10 assert sampled_graph.num_nodes() == new_hg.num_nodes() dgl.distributed.exit_client() @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") @unittest.skipIf( dgl.backend.backend_name == "tensorflow", reason="Not support tensorflow for now", ) def test_standalone_sampling(): reset_envs() import tempfile os.environ["DGL_DIST_MODE"] = "standalone" with tempfile.TemporaryDirectory() as tmpdirname: check_standalone_sampling(Path(tmpdirname)) def start_in_subgraph_client(rank, tmpdir, disable_shared_mem, nodes): gpb = None dgl.distributed.initialize("rpc_ip_config.txt") if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition( tmpdir / "test_in_subgraph.json", rank ) dist_graph = DistGraph("test_in_subgraph", gpb=gpb) try: sampled_graph = dgl.distributed.in_subgraph(dist_graph, nodes) except Exception as e: print(traceback.format_exc()) sampled_graph = None dgl.distributed.exit_client() return sampled_graph def check_rpc_in_subgraph_shuffle(tmpdir, num_server): generate_ip_config("rpc_ip_config.txt", num_server, num_server) g = CitationGraphDataset("cora")[0] num_parts = num_server orig_nid, orig_eid = partition_graph( g, "test_in_subgraph", num_parts, tmpdir, num_hops=1, part_method="metis", return_mapping=True, ) pserver_list = [] ctx = mp.get_context("spawn") for i in range(num_server): p = ctx.Process( target=start_server, args=(i, tmpdir, num_server > 1, "test_in_subgraph"), ) p.start() time.sleep(1) pserver_list.append(p) nodes = torch.tensor([0, 10, 99, 66, 1024, 2008], dtype=g.idtype) sampled_graph = start_in_subgraph_client(0, tmpdir, num_server > 1, nodes) for p in pserver_list: p.join() assert p.exitcode == 0 src, dst = sampled_graph.edges() src = orig_nid[src] dst = orig_nid[dst] assert sampled_graph.num_nodes() == g.num_nodes() assert np.all(F.asnumpy(g.has_edges_between(src, dst))) subg1 = dgl.in_subgraph(g, orig_nid[nodes]) src1, dst1 = subg1.edges() assert np.all(np.sort(F.asnumpy(src)) == np.sort(F.asnumpy(src1))) assert np.all(np.sort(F.asnumpy(dst)) == np.sort(F.asnumpy(dst1))) eids = g.edge_ids(src, dst) eids1 = orig_eid[sampled_graph.edata[dgl.EID]] assert np.array_equal(F.asnumpy(eids1), F.asnumpy(eids)) @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") @unittest.skipIf( dgl.backend.backend_name == "tensorflow", reason="Not support tensorflow for now", ) def test_rpc_in_subgraph(): reset_envs() import tempfile os.environ["DGL_DIST_MODE"] = "distributed" with tempfile.TemporaryDirectory() as tmpdirname: check_rpc_in_subgraph_shuffle(Path(tmpdirname), 1) @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") @unittest.skipIf( dgl.backend.backend_name == "tensorflow", reason="Not support tensorflow for now", ) @unittest.skipIf( dgl.backend.backend_name == "mxnet", reason="Turn off Mxnet support" ) def test_standalone_etype_sampling(): reset_envs() import tempfile with tempfile.TemporaryDirectory() as tmpdirname: os.environ["DGL_DIST_MODE"] = "standalone" check_standalone_etype_sampling_heterograph(Path(tmpdirname)) with tempfile.TemporaryDirectory() as tmpdirname: os.environ["DGL_DIST_MODE"] = "standalone" check_standalone_etype_sampling(Path(tmpdirname)) @pytest.mark.parametrize("num_parts", [1, 4]) @pytest.mark.parametrize("use_graphbolt", [False]) @pytest.mark.parametrize("prob_or_mask", ["prob", "mask"]) def test_local_sampling_homograph(num_parts, use_graphbolt, prob_or_mask): reset_envs() os.environ["DGL_DIST_MODE"] = "distributed" with tempfile.TemporaryDirectory() as test_dir: g = CitationGraphDataset("cora")[0] prob = torch.rand(g.num_edges()) mask = prob > 0.2 prob[torch.randperm(len(prob))[: int(len(prob) * 0.5)]] = 0.0 g.edata["prob"] = prob g.edata["mask"] = mask graph_name = "test_local_sampling" _, orig_eids = partition_graph( g, graph_name, num_parts, test_dir, num_hops=1, part_method="metis", return_mapping=True, use_graphbolt=use_graphbolt, store_eids=True, store_inner_node=True, store_inner_edge=True, ) part_config = os.path.join(test_dir, f"{graph_name}.json") for part_id in range(num_parts): local_g, _, edge_feats, gpb, _, _, _ = load_partition( part_config, part_id, load_feats=True, use_graphbolt=use_graphbolt, ) inner_global_nids = gpb.partid2nids(part_id) inner_global_eids = gpb.partid2eids(part_id) inner_node_data = ( local_g.node_attributes["inner_node"] if use_graphbolt else local_g.ndata["inner_node"] ) inner_edge_data = ( local_g.edge_attributes["inner_edge"] if use_graphbolt else local_g.edata["inner_edge"] ) assert len(inner_global_nids) == inner_node_data.sum() assert len(inner_global_eids) == inner_edge_data.sum() c_etype = gpb.canonical_etypes[0] _prob = [] prob = edge_feats[_etype_tuple_to_str(c_etype) + "/" + prob_or_mask] assert len(prob) == len(inner_global_eids) assert len(prob) <= inner_edge_data.shape[0] _prob.append(prob) sampled_g = dgl.distributed.graph_services._sample_neighbors( use_graphbolt, local_g, gpb, inner_global_nids, 5, prob=_prob, ) sampled_homo_eids = sampled_g.global_eids sampled_orig_eids = orig_eids[sampled_homo_eids] assert torch.all(g.edata[prob_or_mask][sampled_orig_eids] > 0) @pytest.mark.parametrize("num_parts", [1, 4]) @pytest.mark.parametrize("use_graphbolt", [False]) @pytest.mark.parametrize("prob_or_mask", ["prob", "mask"]) def test_local_sampling_heterograph(num_parts, use_graphbolt, prob_or_mask): reset_envs() os.environ["DGL_DIST_MODE"] = "distributed" with tempfile.TemporaryDirectory() as test_dir: g = create_random_hetero() for c_etype in g.canonical_etypes: prob = torch.rand(g.num_edges(c_etype)) mask = prob > 0.2 prob[torch.randperm(len(prob))[: int(len(prob) * 0.5)]] = 0.0 g.edges[c_etype].data["prob"] = prob g.edges[c_etype].data["mask"] = mask graph_name = "test_local_sampling" _, orig_eids = partition_graph( g, graph_name, num_parts, test_dir, num_hops=1, part_method="metis", return_mapping=True, use_graphbolt=use_graphbolt, store_eids=True, store_inner_node=True, store_inner_edge=True, ) part_config = os.path.join(test_dir, f"{graph_name}.json") for part_id in range(num_parts): local_g, _, edge_feats, gpb, _, _, _ = load_partition( part_config, part_id, load_feats=True, use_graphbolt=use_graphbolt, ) inner_global_nids = [ gpb.map_to_homo_nid(gpb.partid2nids(part_id, ntype), ntype) for ntype in gpb.ntypes ] inner_global_nids = torch.cat(inner_global_nids) inner_global_eids = { c_etype: gpb.partid2eids(part_id, c_etype) for c_etype in gpb.canonical_etypes } inner_node_data = ( local_g.node_attributes["inner_node"] if use_graphbolt else local_g.ndata["inner_node"] ) inner_edge_data = ( local_g.edge_attributes["inner_edge"] if use_graphbolt else local_g.edata["inner_edge"] ) assert len(inner_global_nids) == inner_node_data.sum() num_inner_global_eids = sum( [len(eids) for eids in inner_global_eids.values()] ) assert num_inner_global_eids == inner_edge_data.sum() _prob = [] for i, c_etype in enumerate(gpb.canonical_etypes): prob = edge_feats[ _etype_tuple_to_str(c_etype) + "/" + prob_or_mask ] assert len(prob) == len(inner_global_eids[c_etype]) assert ( len(prob) == gpb.local_etype_offset[i + 1] - gpb.local_etype_offset[i] ) assert len(prob) <= inner_edge_data.shape[0] _prob.append(prob) sampled_g = dgl.distributed.graph_services._sample_etype_neighbors( use_graphbolt, local_g, gpb, inner_global_nids, torch.full((len(g.canonical_etypes),), 5, dtype=torch.int64), prob=_prob, etype_offset=gpb.local_etype_offset, ) sampled_homo_eids = sampled_g.global_eids sampled_etype_ids, sampled_per_etype_eids = gpb.map_to_per_etype( sampled_homo_eids ) for etype_id, c_etype in enumerate(gpb.canonical_etypes): indices = torch.nonzero(sampled_etype_ids == etype_id).squeeze() sampled_eids = sampled_per_etype_eids[indices] sampled_orig_eids = orig_eids[c_etype][sampled_eids] assert torch.all( g.edges[c_etype].data[prob_or_mask][sampled_orig_eids] > 0 ) def check_hetero_dist_edge_dataloader_gb( tmpdir, num_server, use_graphbolt=True ): generate_ip_config("rpc_ip_config.txt", num_server, num_server) g = create_random_hetero() eids = torch.randperm(g.num_edges("r23"))[:10] mask = torch.zeros(g.num_edges("r23"), dtype=torch.bool) mask[eids] = True num_parts = num_server orig_nid_map, orig_eid_map = partition_graph( g, "test_sampling", num_parts, tmpdir, num_hops=1, part_method="metis", return_mapping=True, use_graphbolt=use_graphbolt, store_eids=True, ) part_config = tmpdir / "test_sampling.json" pserver_list = [] ctx = mp.get_context("spawn") for i in range(num_server): p = ctx.Process( target=start_server, args=( i, tmpdir, num_server > 1, "test_sampling", ["csc", "coo"], True, ), ) p.start() time.sleep(1) pserver_list.append(p) dgl.distributed.initialize("rpc_ip_config.txt", use_graphbolt=True) dist_graph = DistGraph("test_sampling", part_config=part_config) os.environ["DGL_DIST_DEBUG"] = "1" edges = {("n2", "r23", "n3"): eids} sampler = dgl.dataloading.MultiLayerNeighborSampler([10, 10], mask="mask") loader = dgl.dataloading.DistEdgeDataLoader( dist_graph, edges, sampler, batch_size=64 ) dgl.distributed.exit_client() for p in pserver_list: p.join() assert p.exitcode == 0 block = next(iter(loader))[2][0] assert block.num_src_nodes("n1") > 0 assert block.num_edges("r12") > 0 assert block.num_edges("r13") > 0 assert block.num_edges("r23") > 0 def test_hetero_dist_edge_dataloader_gb( num_server=1, ): reset_envs() os.environ["DGL_DIST_MODE"] = "distributed" with tempfile.TemporaryDirectory() as tmpdirname: check_hetero_dist_edge_dataloader_gb(Path(tmpdirname), num_server) if __name__ == "__main__": import tempfile with tempfile.TemporaryDirectory() as tmpdirname: os.environ["DGL_DIST_MODE"] = "standalone" check_standalone_etype_sampling_heterograph(Path(tmpdirname)) with tempfile.TemporaryDirectory() as tmpdirname: os.environ["DGL_DIST_MODE"] = "standalone" check_standalone_etype_sampling(Path(tmpdirname)) check_standalone_sampling(Path(tmpdirname)) os.environ["DGL_DIST_MODE"] = "distributed" check_rpc_sampling(Path(tmpdirname), 2) check_rpc_sampling(Path(tmpdirname), 1) check_rpc_get_degree_shuffle(Path(tmpdirname), 1) check_rpc_get_degree_shuffle(Path(tmpdirname), 2) check_rpc_find_edges_shuffle(Path(tmpdirname), 2) check_rpc_find_edges_shuffle(Path(tmpdirname), 1) check_rpc_hetero_find_edges_shuffle(Path(tmpdirname), 1) check_rpc_hetero_find_edges_shuffle(Path(tmpdirname), 2) check_rpc_in_subgraph_shuffle(Path(tmpdirname), 2) check_rpc_sampling_shuffle(Path(tmpdirname), 1) check_rpc_hetero_sampling_shuffle(Path(tmpdirname), 1) check_rpc_hetero_sampling_shuffle(Path(tmpdirname), 2) check_rpc_hetero_sampling_empty_shuffle(Path(tmpdirname), 1) check_rpc_hetero_etype_sampling_shuffle(Path(tmpdirname), 1) check_rpc_hetero_etype_sampling_shuffle(Path(tmpdirname), 2) check_rpc_hetero_etype_sampling_empty_shuffle(Path(tmpdirname), 1) ================================================ FILE: tests/distributed/test_mp_dataloader.py ================================================ import multiprocessing as mp import os import tempfile import time import unittest import uuid import backend as F import dgl import numpy as np import pytest import torch as th from dgl.data import CitationGraphDataset from dgl.distributed import ( DistDataLoader, DistGraph, DistGraphServer, load_partition, partition_graph, ) from scipy import sparse as spsp from utils import generate_ip_config, reset_envs def _unique_rand_graph(num_nodes=1000, num_edges=10 * 1000): edges_set = set() while len(edges_set) < num_edges: src = np.random.randint(0, num_nodes - 1) dst = np.random.randint(0, num_nodes - 1) if ( src != dst and (src, dst) not in edges_set and (dst, src) not in edges_set ): edges_set.add((src, dst)) src_list, dst_list = zip(*edges_set) src = th.tensor(src_list, dtype=th.long) dst = th.tensor(dst_list, dtype=th.long) g = dgl.graph((th.cat([src, dst]), th.cat([dst, src]))) E = len(src) reverse_eids = th.cat([th.arange(E, 2 * E), th.arange(0, E)]) return g, reverse_eids class NeighborSampler(object): def __init__( self, g, fanouts, sample_neighbors, use_graphbolt=False, return_eids=False, ): self.g = g self.fanouts = fanouts self.sample_neighbors = sample_neighbors self.use_graphbolt = use_graphbolt self.return_eids = return_eids def sample_blocks(self, seeds): import torch as th seeds = th.tensor(np.asarray(seeds), dtype=self.g.idtype) blocks = [] for fanout in self.fanouts: # For each seed node, sample ``fanout`` neighbors. frontier = self.sample_neighbors( self.g, seeds, fanout, use_graphbolt=self.use_graphbolt ) # Then we compact the frontier into a bipartite graph for # message passing. block = dgl.to_block(frontier, seeds) # Obtain the seed nodes for next layer. seeds = block.srcdata[dgl.NID] if frontier.num_edges() > 0: if not self.use_graphbolt or self.return_eids: block.edata[dgl.EID] = frontier.edata[dgl.EID] blocks.insert(0, block) return blocks def start_server( rank, ip_config, part_config, disable_shared_mem, num_clients, use_graphbolt=False, ): print("server: #clients=" + str(num_clients)) g = DistGraphServer( rank, ip_config, 1, num_clients, part_config, disable_shared_mem=disable_shared_mem, graph_format=["csc", "coo"], use_graphbolt=use_graphbolt, ) g.start() def start_dist_dataloader( rank, ip_config, part_config, num_server, drop_last, orig_nid, orig_eid, use_graphbolt=False, return_eids=False, ): dgl.distributed.initialize(ip_config) gpb = None disable_shared_mem = num_server > 1 if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition(part_config, rank) num_nodes_to_sample = 202 batch_size = 32 train_nid = th.arange(num_nodes_to_sample) graph_name = os.path.splitext(os.path.basename(part_config))[0] dist_graph = DistGraph( graph_name, gpb=gpb, part_config=part_config, ) # Create sampler sampler = NeighborSampler( dist_graph, [5, 10], dgl.distributed.sample_neighbors, use_graphbolt=use_graphbolt, return_eids=return_eids, ) # Enable santity check in distributed sampling. os.environ["DGL_DIST_DEBUG"] = "1" # We need to test creating DistDataLoader multiple times. for i in range(2): # Create DataLoader for constructing blocks dataloader = DistDataLoader( dataset=train_nid, batch_size=batch_size, collate_fn=sampler.sample_blocks, shuffle=False, drop_last=drop_last, ) groundtruth_g = CitationGraphDataset("cora")[0] max_nid = [] for _ in range(2): for idx, blocks in zip( range(0, num_nodes_to_sample, batch_size), dataloader ): block = blocks[-1] o_src, o_dst = block.edges() src_nodes_id = block.srcdata[dgl.NID][o_src] dst_nodes_id = block.dstdata[dgl.NID][o_dst] max_nid.append(np.max(F.asnumpy(dst_nodes_id))) src_nodes_id = orig_nid[src_nodes_id] dst_nodes_id = orig_nid[dst_nodes_id] has_edges = groundtruth_g.has_edges_between( src_nodes_id, dst_nodes_id ) assert np.all(F.asnumpy(has_edges)) if use_graphbolt and not return_eids: continue eids = orig_eid[block.edata[dgl.EID]] expected_eids = groundtruth_g.edge_ids( src_nodes_id, dst_nodes_id ) assert th.equal( eids, expected_eids ), f"{eids} != {expected_eids}" if drop_last: assert ( np.max(max_nid) == num_nodes_to_sample - 1 - num_nodes_to_sample % batch_size ) else: assert np.max(max_nid) == num_nodes_to_sample - 1 del dataloader # this is needed since there's two test here in one process dgl.distributed.exit_client() @unittest.skip(reason="Skip due to glitch in CI") def test_standalone(): reset_envs() with tempfile.TemporaryDirectory() as test_dir: ip_config = os.path.join(test_dir, "ip_config.txt") generate_ip_config(ip_config, 1, 1) g = CitationGraphDataset("cora")[0] print(g.idtype) num_parts = 1 num_hops = 1 graph_name = f"graph_{uuid.uuid4()}" orig_nid, orig_eid = partition_graph( g, graph_name, num_parts, test_dir, num_hops=num_hops, part_method="metis", return_mapping=True, ) part_config = os.path.join(test_dir, f"{graph_name}.json") os.environ["DGL_DIST_MODE"] = "standalone" try: start_dist_dataloader( 0, ip_config, part_config, 1, True, orig_nid, orig_eid ) except Exception as e: print(e) def start_dist_neg_dataloader( rank, ip_config, part_config, num_server, num_workers, orig_nid, groundtruth_g, ): import dgl import torch as th dgl.distributed.initialize(ip_config) gpb = None disable_shared_mem = num_server > 1 if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition(part_config, rank) num_edges_to_sample = 202 batch_size = 32 graph_name = os.path.splitext(os.path.basename(part_config))[0] dist_graph = DistGraph(graph_name, gpb=gpb, part_config=part_config) assert len(dist_graph.ntypes) == len(groundtruth_g.ntypes) assert len(dist_graph.etypes) == len(groundtruth_g.etypes) if len(dist_graph.etypes) == 1: train_eid = th.arange(num_edges_to_sample) else: train_eid = {dist_graph.etypes[0]: th.arange(num_edges_to_sample)} for i in range(num_server): part, _, _, _, _, _, _ = load_partition(part_config, i) num_negs = 5 sampler = dgl.dataloading.MultiLayerNeighborSampler([5, 10]) negative_sampler = dgl.dataloading.negative_sampler.Uniform(num_negs) dataloader = dgl.distributed.DistEdgeDataLoader( dist_graph, train_eid, sampler, batch_size=batch_size, negative_sampler=negative_sampler, shuffle=True, drop_last=False, num_workers=num_workers, ) for _ in range(2): for _, (_, pos_graph, neg_graph, blocks) in zip( range(0, num_edges_to_sample, batch_size), dataloader ): block = blocks[-1] for src_type, etype, dst_type in block.canonical_etypes: o_src, o_dst = block.edges(etype=etype) src_nodes_id = block.srcnodes[src_type].data[dgl.NID][o_src] dst_nodes_id = block.dstnodes[dst_type].data[dgl.NID][o_dst] src_nodes_id = orig_nid[src_type][src_nodes_id] dst_nodes_id = orig_nid[dst_type][dst_nodes_id] has_edges = groundtruth_g.has_edges_between( src_nodes_id, dst_nodes_id, etype=etype ) assert np.all(F.asnumpy(has_edges)) assert np.all( F.asnumpy(block.dstnodes[dst_type].data[dgl.NID]) == F.asnumpy(pos_graph.nodes[dst_type].data[dgl.NID]) ) assert np.all( F.asnumpy(block.dstnodes[dst_type].data[dgl.NID]) == F.asnumpy(neg_graph.nodes[dst_type].data[dgl.NID]) ) assert pos_graph.num_edges() * num_negs == neg_graph.num_edges() del dataloader # this is needed since there's two test here in one process dgl.distributed.exit_client() def check_neg_dataloader(g, num_server, num_workers): with tempfile.TemporaryDirectory() as test_dir: ip_config = "ip_config.txt" generate_ip_config(ip_config, num_server, num_server) num_parts = num_server num_hops = 1 graph_name = f"graph_{uuid.uuid4()}" orig_nid, orig_eid = partition_graph( g, graph_name, num_parts, test_dir, num_hops=num_hops, part_method="metis", return_mapping=True, ) part_config = os.path.join(test_dir, f"{graph_name}.json") if not isinstance(orig_nid, dict): orig_nid = {g.ntypes[0]: orig_nid} if not isinstance(orig_eid, dict): orig_eid = {g.etypes[0]: orig_eid} pserver_list = [] ctx = mp.get_context("spawn") for i in range(num_server): p = ctx.Process( target=start_server, args=( i, ip_config, part_config, num_server > 1, num_workers + 1, ), ) p.start() time.sleep(1) pserver_list.append(p) os.environ["DGL_DIST_MODE"] = "distributed" os.environ["DGL_NUM_SAMPLER"] = str(num_workers) ptrainer_list = [] p = ctx.Process( target=start_dist_neg_dataloader, args=( 0, ip_config, part_config, num_server, num_workers, orig_nid, g, ), ) p.start() ptrainer_list.append(p) for p in pserver_list: p.join() assert p.exitcode == 0 for p in ptrainer_list: p.join() assert p.exitcode == 0 @pytest.mark.parametrize("num_server", [1]) @pytest.mark.parametrize("num_workers", [0, 1]) @pytest.mark.parametrize("use_graphbolt", [False, True]) @pytest.mark.parametrize("return_eids", [False, True]) def test_dist_dataloader(num_server, num_workers, use_graphbolt, return_eids): if not use_graphbolt and return_eids: # return_eids is not supported in non-GraphBolt mode. return reset_envs() os.environ["DGL_DIST_MODE"] = "distributed" os.environ["DGL_NUM_SAMPLER"] = str(num_workers) with tempfile.TemporaryDirectory() as test_dir: ip_config = "ip_config.txt" generate_ip_config(ip_config, num_server, num_server) g = CitationGraphDataset("cora")[0] num_parts = num_server num_hops = 1 graph_name = f"graph_{uuid.uuid4()}" orig_nid, orig_eid = partition_graph( g, graph_name, num_parts, test_dir, num_hops=num_hops, part_method="metis", return_mapping=True, use_graphbolt=use_graphbolt, store_eids=return_eids, ) part_config = os.path.join(test_dir, f"{graph_name}.json") pserver_list = [] ctx = mp.get_context("spawn") for i in range(num_server): p = ctx.Process( target=start_server, args=( i, ip_config, part_config, num_server > 1, num_workers + 1, use_graphbolt, ), ) p.start() time.sleep(1) pserver_list.append(p) ptrainer_list = [] num_trainers = 1 for trainer_id in range(num_trainers): p = ctx.Process( target=start_dist_dataloader, args=( trainer_id, ip_config, part_config, num_server, False, orig_nid, orig_eid, use_graphbolt, return_eids, ), ) p.start() time.sleep(1) # avoid race condition when instantiating DistGraph ptrainer_list.append(p) for p in ptrainer_list: p.join() assert p.exitcode == 0 for p in pserver_list: p.join() assert p.exitcode == 0 def start_node_dataloader( rank, ip_config, part_config, num_server, num_workers, orig_nid, orig_eid, groundtruth_g, use_graphbolt=False, return_eids=False, prob_or_mask=None, use_deprecated_dataloader=False, ): dgl.distributed.initialize(ip_config, use_graphbolt=use_graphbolt) gpb = None disable_shared_mem = num_server > 1 if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition(part_config, rank) num_nodes_to_sample = 202 batch_size = 32 graph_name = os.path.splitext(os.path.basename(part_config))[0] dist_graph = DistGraph( graph_name, gpb=gpb, part_config=part_config, ) assert len(dist_graph.ntypes) == len(groundtruth_g.ntypes) assert len(dist_graph.etypes) == len(groundtruth_g.etypes) if len(dist_graph.etypes) == 1: train_nid = th.arange(num_nodes_to_sample, dtype=dist_graph.idtype) else: train_nid = { "n3": th.arange(num_nodes_to_sample, dtype=dist_graph.idtype) } for i in range(num_server): part, _, _, _, _, _, _ = load_partition(part_config, i) # Create sampler _prob = None _mask = None if prob_or_mask is None: pass elif prob_or_mask == "prob": _prob = "prob" elif prob_or_mask == "mask": _mask = "mask" else: raise ValueError(f"Unsupported prob type: {prob_or_mask}") sampler = dgl.dataloading.MultiLayerNeighborSampler( [ ( # test dict for hetero {etype: 5 for etype in dist_graph.etypes} if len(dist_graph.etypes) > 1 else 5 ), 10, ], prob=_prob, mask=_mask, ) # test int for hetero # Enable santity check in distributed sampling. os.environ["DGL_DIST_DEBUG"] = "1" # We need to test creating DistDataLoader multiple times. for i in range(2): # Create DataLoader for constructing blocks dataloader_cls = ( dgl.dataloading.DistNodeDataLoader if use_deprecated_dataloader else dgl.distributed.DistNodeDataLoader ) dataloader = dataloader_cls( dist_graph, train_nid, sampler, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=num_workers, ) for _ in range(2): for idx, (_, _, blocks) in zip( range(0, num_nodes_to_sample, batch_size), dataloader ): block = blocks[-1] for c_etype in block.canonical_etypes: src_type, _, dst_type = c_etype o_src, o_dst = block.edges(etype=c_etype) src_nodes_id = block.srcnodes[src_type].data[dgl.NID][o_src] dst_nodes_id = block.dstnodes[dst_type].data[dgl.NID][o_dst] src_nodes_id = orig_nid[src_type][src_nodes_id] dst_nodes_id = orig_nid[dst_type][dst_nodes_id] has_edges = groundtruth_g.has_edges_between( src_nodes_id, dst_nodes_id, etype=c_etype ) assert np.all(F.asnumpy(has_edges)) if use_graphbolt and not return_eids: assert dgl.EID not in block.edges[c_etype].data continue eids = orig_eid[c_etype][block.edges[c_etype].data[dgl.EID]] expected_eids = groundtruth_g.edge_ids( src_nodes_id, dst_nodes_id, etype=c_etype ) assert th.equal( eids, expected_eids ), f"{eids} != {expected_eids}" # Verify the prob/mask functionality. if prob_or_mask is not None: prob_data = groundtruth_g.edges[c_etype].data[ prob_or_mask ][eids] assert th.all(prob_data > 0) del dataloader # this is needed since there's two test here in one process dgl.distributed.exit_client() def start_edge_dataloader( rank, ip_config, part_config, num_server, num_workers, orig_nid, orig_eid, groundtruth_g, use_graphbolt, exclude, reverse_eids, reverse_etypes, negative, prob_or_mask, use_deprecated_dataloader=False, ): dgl.distributed.initialize(ip_config, use_graphbolt=use_graphbolt) gpb = None disable_shared_mem = num_server > 1 if disable_shared_mem: _, _, _, gpb, _, _, _ = load_partition(part_config, rank) num_edges_to_sample = 202 batch_size = 32 graph_name = os.path.splitext(os.path.basename(part_config))[0] dist_graph = DistGraph(graph_name, gpb=gpb, part_config=part_config) assert len(dist_graph.ntypes) == len(groundtruth_g.ntypes) assert len(dist_graph.etypes) == len(groundtruth_g.etypes) if len(dist_graph.etypes) == 1: train_eid = th.arange(num_edges_to_sample) else: train_eid = { dist_graph.canonical_etypes[0]: th.arange(num_edges_to_sample) } for i in range(num_server): part, _, _, _, _, _, _ = load_partition(part_config, i) # Create sampler _prob = None _mask = None if prob_or_mask is None: pass elif prob_or_mask == "prob": _prob = "prob" elif prob_or_mask == "mask": _mask = "mask" else: raise ValueError(f"Unsupported prob type: {prob_or_mask}") sampler = dgl.dataloading.MultiLayerNeighborSampler( [5, -1], prob=_prob, mask=_mask ) # Negative sampler. negative_sampler = None if negative: negative_sampler = dgl.dataloading.negative_sampler.Uniform(5) # We need to test creating DistDataLoader multiple times. for i in range(2): # Create DataLoader for constructing blocks dataloader_cls = ( dgl.dataloading.DistEdgeDataLoader if use_deprecated_dataloader else dgl.distributed.DistEdgeDataLoader ) dataloader = dataloader_cls( dist_graph, train_eid, sampler, batch_size=batch_size, shuffle=True, drop_last=False, num_workers=num_workers, exclude=exclude, reverse_eids=reverse_eids, reverse_etypes=reverse_etypes, negative_sampler=negative_sampler, ) for _ in range(2): for _, minibatch in zip( range(0, num_edges_to_sample, batch_size), dataloader ): if negative: _, pos_pair_graph, neg_pair_graph, blocks = minibatch else: _, pos_pair_graph, blocks = minibatch block = blocks[-1] for src_type, etype, dst_type in block.canonical_etypes: o_src, o_dst = block.edges(etype=etype) src_nodes_id = block.srcnodes[src_type].data[dgl.NID][o_src] dst_nodes_id = block.dstnodes[dst_type].data[dgl.NID][o_dst] src_nodes_id = orig_nid[src_type][src_nodes_id] dst_nodes_id = orig_nid[dst_type][dst_nodes_id] has_edges = groundtruth_g.has_edges_between( src_nodes_id, dst_nodes_id, etype=etype ) assert np.all(F.asnumpy(has_edges)) assert np.all( F.asnumpy(block.dstnodes[dst_type].data[dgl.NID]) == F.asnumpy( pos_pair_graph.nodes[dst_type].data[dgl.NID] ) ) if negative: assert np.all( F.asnumpy(block.dstnodes[dst_type].data[dgl.NID]) == F.asnumpy( neg_pair_graph.nodes[dst_type].data[dgl.NID] ) ) if ( dgl.EID not in block.edges[(src_type, etype, dst_type)].data ): continue sampled_eids = block.edges[ (src_type, etype, dst_type) ].data[dgl.EID] sampled_orig_eids = orig_eid[(src_type, etype, dst_type)][ sampled_eids ] raw_src, raw_dst = groundtruth_g.find_edges( sampled_orig_eids, etype=(src_type, etype, dst_type) ) sampled_src, sampled_dst = block.edges( etype=(src_type, etype, dst_type) ) sampled_orig_src = block.nodes[src_type].data[dgl.NID][ sampled_src ] sampled_orig_dst = block.nodes[dst_type].data[dgl.NID][ sampled_dst ] assert th.equal( raw_src, orig_nid[src_type][sampled_orig_src] ) assert th.equal( raw_dst, orig_nid[dst_type][sampled_orig_dst] ) # Verify the prob/mask functionality. if prob_or_mask is not None: prob_data = groundtruth_g.edges[etype].data[ prob_or_mask ][sampled_orig_eids] assert th.all(prob_data > 0) # Verify the exclude functionality. if dgl.EID not in blocks[-1].edata.keys(): continue for ( src_type, etype, dst_type, ) in pos_pair_graph.canonical_etypes: for block in blocks: if ( src_type, etype, dst_type, ) not in block.canonical_etypes: continue current_eids = block.edges[etype].data[dgl.EID] seed_eids = pos_pair_graph.edges[etype].data[dgl.EID] if exclude is None: # seed_eids are not guaranteed to be sampled. pass elif exclude == "self": assert not th.any(th.isin(current_eids, seed_eids)) elif exclude == "reverse_id": src, dst = groundtruth_g.find_edges(seed_eids) reverse_seed_eids = groundtruth_g.edge_ids(dst, src) assert not th.any( th.isin(current_eids, reverse_seed_eids) ) assert not th.any(th.isin(current_eids, seed_eids)) elif exclude == "reverse_types": assert not th.any(th.isin(current_eids, seed_eids)) reverse_etype = reverse_etypes[ (src_type, etype, dst_type) ] if reverse_etype in block.canonical_etypes: assert not th.any( th.isin( block.edges[reverse_etype].data[ dgl.EID ], seed_eids, ) ) else: raise ValueError( f"Unsupported exclude type: {exclude}" ) del dataloader dgl.distributed.exit_client() def check_dataloader( g, num_server, num_workers, dataloader_type, use_graphbolt=False, return_eids=False, exclude=None, reverse_eids=None, reverse_etypes=None, negative=False, prob_or_mask=None, use_deprecated_dataloader=False, ): with tempfile.TemporaryDirectory() as test_dir: ip_config = "ip_config.txt" generate_ip_config(ip_config, num_server, num_server) num_parts = num_server num_hops = 1 graph_name = f"graph_{uuid.uuid4()}" orig_nid, orig_eid = partition_graph( g, graph_name, num_parts, test_dir, num_hops=num_hops, part_method="metis", return_mapping=True, use_graphbolt=use_graphbolt, store_eids=return_eids, ) part_config = os.path.join(test_dir, f"{graph_name}.json") if not isinstance(orig_nid, dict): orig_nid = {g.ntypes[0]: orig_nid} if not isinstance(orig_eid, dict): orig_eid = {g.canonical_etypes[0]: orig_eid} pserver_list = [] ctx = mp.get_context("spawn") for i in range(num_server): p = ctx.Process( target=start_server, args=( i, ip_config, part_config, num_server > 1, num_workers + 1, use_graphbolt, ), ) p.start() time.sleep(1) pserver_list.append(p) os.environ["DGL_DIST_MODE"] = "distributed" os.environ["DGL_NUM_SAMPLER"] = str(num_workers) ptrainer_list = [] if dataloader_type == "node": p = ctx.Process( target=start_node_dataloader, args=( 0, ip_config, part_config, num_server, num_workers, orig_nid, orig_eid, g, use_graphbolt, return_eids, prob_or_mask, use_deprecated_dataloader, ), ) p.start() ptrainer_list.append(p) elif dataloader_type == "edge": p = ctx.Process( target=start_edge_dataloader, args=( 0, ip_config, part_config, num_server, num_workers, orig_nid, orig_eid, g, use_graphbolt, exclude, reverse_eids, reverse_etypes, negative, prob_or_mask, use_deprecated_dataloader, ), ) p.start() ptrainer_list.append(p) for p in pserver_list: p.join() assert p.exitcode == 0 for p in ptrainer_list: p.join() assert p.exitcode == 0 def create_random_hetero(): num_nodes = {"n1": 10000, "n2": 10010, "n3": 10020} etypes = [("n1", "r1", "n2"), ("n1", "r2", "n3"), ("n2", "r3", "n3")] edges = {} for etype in etypes: src_ntype, _, dst_ntype = etype arr = spsp.random( num_nodes[src_ntype], num_nodes[dst_ntype], density=0.001, format="coo", random_state=100, ) edges[etype] = (arr.row, arr.col) # Add reverse edges. src, dst = edges[("n1", "r1", "n2")] edges[("n2", "r21", "n1")] = (dst, src) g = dgl.heterograph(edges, num_nodes) g.nodes["n1"].data["feat"] = F.unsqueeze(F.arange(0, g.num_nodes("n1")), 1) g.edges["r1"].data["feat"] = F.unsqueeze(F.arange(0, g.num_edges("r1")), 1) return g @pytest.mark.parametrize("num_server", [1]) @pytest.mark.parametrize("num_workers", [0, 1]) @pytest.mark.parametrize("dataloader_type", ["node", "edge"]) @pytest.mark.parametrize("use_graphbolt", [False, True]) @pytest.mark.parametrize("return_eids", [False, True]) def test_dataloader_homograph( num_server, num_workers, dataloader_type, use_graphbolt, return_eids ): if not use_graphbolt and return_eids: # return_eids is not supported in non-GraphBolt mode. return reset_envs() g = CitationGraphDataset("cora")[0] check_dataloader( g, num_server, num_workers, dataloader_type, use_graphbolt=use_graphbolt, return_eids=return_eids, ) @pytest.mark.parametrize("num_workers", [0]) @pytest.mark.parametrize("use_graphbolt", [False, True]) @pytest.mark.parametrize("exclude", [None, "self", "reverse_id"]) @pytest.mark.parametrize("negative", [False, True]) def test_edge_dataloader_homograph( num_workers, use_graphbolt, exclude, negative ): num_server = 1 dataloader_type = "edge" reset_envs() g, reverse_eids = _unique_rand_graph() check_dataloader( g, num_server, num_workers, dataloader_type, use_graphbolt=use_graphbolt, return_eids=True, exclude=exclude, reverse_eids=reverse_eids, negative=negative, ) @pytest.mark.parametrize("num_server", [1]) @pytest.mark.parametrize("num_workers", [1]) @pytest.mark.parametrize("dataloader_type", ["node", "edge"]) @pytest.mark.parametrize("use_graphbolt", [False, True]) @pytest.mark.parametrize("prob_or_mask", ["prob", "mask"]) def test_dataloader_homograph_prob_or_mask( num_server, num_workers, dataloader_type, use_graphbolt, prob_or_mask ): reset_envs() g = CitationGraphDataset("cora")[0] prob = th.rand(g.num_edges()) mask = prob > 0.2 g.edata["prob"] = F.tensor(prob) g.edata["mask"] = F.tensor(mask) check_dataloader( g, num_server, num_workers, dataloader_type, use_graphbolt=use_graphbolt, return_eids=True, prob_or_mask=prob_or_mask, ) @pytest.mark.parametrize("num_server", [1]) @pytest.mark.parametrize("num_workers", [0, 1]) @pytest.mark.parametrize("dataloader_type", ["node", "edge"]) @pytest.mark.parametrize("use_graphbolt", [False, True]) @pytest.mark.parametrize("return_eids", [False, True]) def test_dataloader_heterograph( num_server, num_workers, dataloader_type, use_graphbolt, return_eids ): if not use_graphbolt and return_eids: # return_eids is not supported in non-GraphBolt mode. return reset_envs() g = create_random_hetero() check_dataloader( g, num_server, num_workers, dataloader_type, use_graphbolt=use_graphbolt, return_eids=return_eids, ) @pytest.mark.parametrize("num_workers", [0]) @pytest.mark.parametrize("use_graphbolt", [False, True]) @pytest.mark.parametrize("exclude", [None, "self", "reverse_types"]) @pytest.mark.parametrize("negative", [False, True]) def test_edge_dataloader_heterograph( num_workers, use_graphbolt, exclude, negative ): num_server = 1 dataloader_type = "edge" reset_envs() g = create_random_hetero() reverse_etypes = {("n1", "r1", "n2"): ("n2", "r21", "n1")} check_dataloader( g, num_server, num_workers, dataloader_type, use_graphbolt=use_graphbolt, return_eids=True, exclude=exclude, reverse_etypes=reverse_etypes, negative=negative, ) @pytest.mark.parametrize("num_server", [1]) @pytest.mark.parametrize("num_workers", [1]) @pytest.mark.parametrize("dataloader_type", ["node", "edge"]) @pytest.mark.parametrize("use_graphbolt", [False, True]) @pytest.mark.parametrize("prob_or_mask", ["prob", "mask"]) def test_dataloader_heterograph_prob_or_mask( num_server, num_workers, dataloader_type, use_graphbolt, prob_or_mask ): reset_envs() g = create_random_hetero() for etype in g.canonical_etypes: prob = th.rand(g.num_edges(etype)) mask = prob > prob.median() g.edges[etype].data["prob"] = prob g.edges[etype].data["mask"] = mask check_dataloader( g, num_server, num_workers, dataloader_type, use_graphbolt=use_graphbolt, return_eids=True, prob_or_mask=prob_or_mask, ) @unittest.skip(reason="Skip due to glitch in CI") @pytest.mark.parametrize("num_server", [3]) @pytest.mark.parametrize("num_workers", [0, 4]) def test_neg_dataloader(num_server, num_workers): reset_envs() g = CitationGraphDataset("cora")[0] check_neg_dataloader(g, num_server, num_workers) g = create_random_hetero() check_neg_dataloader(g, num_server, num_workers) def start_multiple_dataloaders( ip_config, part_config, graph_name, orig_g, num_dataloaders, dataloader_type, use_graphbolt, ): dgl.distributed.initialize(ip_config) dist_g = dgl.distributed.DistGraph(graph_name, part_config=part_config) if dataloader_type == "node": train_ids = th.arange(orig_g.num_nodes(), dtype=dist_g.idtype) batch_size = orig_g.num_nodes() // 100 else: train_ids = th.arange(orig_g.num_edges()) batch_size = orig_g.num_edges() // 100 sampler = dgl.dataloading.NeighborSampler([-1]) dataloaders = [] dl_iters = [] for _ in range(num_dataloaders): if dataloader_type == "node": dataloader = dgl.distributed.DistNodeDataLoader( dist_g, train_ids, sampler, batch_size=batch_size ) else: dataloader = dgl.distributed.DistEdgeDataLoader( dist_g, train_ids, sampler, batch_size=batch_size ) dataloaders.append(dataloader) dl_iters.append(iter(dataloader)) # iterate on multiple dataloaders randomly while len(dl_iters) > 0: next_dl = np.random.choice(len(dl_iters), 1)[0] try: _ = next(dl_iters[next_dl]) except StopIteration: dl_iters.pop(next_dl) del dataloaders[next_dl] dgl.distributed.exit_client() @pytest.mark.parametrize("num_dataloaders", [4]) @pytest.mark.parametrize("num_workers", [0]) @pytest.mark.parametrize("dataloader_type", ["node", "edge"]) @pytest.mark.parametrize("use_graphbolt", [False, True]) def test_multiple_dist_dataloaders( num_dataloaders, num_workers, dataloader_type, use_graphbolt ): reset_envs() os.environ["DGL_DIST_MODE"] = "distributed" os.environ["DGL_NUM_SAMPLER"] = str(num_workers) num_parts = 1 num_servers = 1 with tempfile.TemporaryDirectory() as test_dir: ip_config = os.path.join(test_dir, "ip_config.txt") generate_ip_config(ip_config, num_parts, num_servers) orig_g = dgl.rand_graph(1000, 10000) graph_name = f"graph_{uuid.uuid4()}" partition_graph( orig_g, graph_name, num_parts, test_dir, use_graphbolt=use_graphbolt, ) part_config = os.path.join(test_dir, f"{graph_name}.json") p_servers = [] ctx = mp.get_context("spawn") for i in range(num_servers): p = ctx.Process( target=start_server, args=( i, ip_config, part_config, num_servers > 1, num_workers + 1, use_graphbolt, ), ) p.start() time.sleep(1) p_servers.append(p) p_client = ctx.Process( target=start_multiple_dataloaders, args=( ip_config, part_config, graph_name, orig_g, num_dataloaders, dataloader_type, use_graphbolt, ), ) p_client.start() p_client.join() assert p_client.exitcode == 0 for p in p_servers: p.join() assert p.exitcode == 0 reset_envs() @pytest.mark.parametrize("dataloader_type", ["node", "edge"]) def test_deprecated_dataloader(dataloader_type): reset_envs() g = CitationGraphDataset("cora")[0] check_dataloader( g, 1, 0, dataloader_type, use_deprecated_dataloader=True, ) ================================================ FILE: tests/distributed/test_new_kvstore.py ================================================ import multiprocessing as mp import os import time import unittest import backend as F import dgl from numpy.testing import assert_array_equal from utils import generate_ip_config, reset_envs # Create an one-part Graph node_map = {"_N": F.tensor([[0, 6]], F.int64)} edge_map = {("_N", "_E", "_N"): F.tensor([[0, 7]], F.int64)} global_nid = F.tensor([0, 1, 2, 3, 4, 5], F.int64) global_eid = F.tensor([0, 1, 2, 3, 4, 5, 6], F.int64) g = dgl.graph([]) g.add_nodes(6) g.add_edges(0, 1) # 0 g.add_edges(0, 2) # 1 g.add_edges(0, 3) # 2 g.add_edges(2, 3) # 3 g.add_edges(1, 1) # 4 g.add_edges(0, 4) # 5 g.add_edges(2, 5) # 6 g.ndata[dgl.NID] = global_nid g.edata[dgl.EID] = global_eid gpb = dgl.distributed.graph_partition_book.RangePartitionBook( part_id=0, num_parts=1, node_map=node_map, edge_map=edge_map, ntypes={ntype: i for i, ntype in enumerate(g.ntypes)}, etypes={etype: i for i, etype in enumerate(g.canonical_etypes)}, ) node_policy = dgl.distributed.PartitionPolicy( policy_str="node~_N", partition_book=gpb ) edge_policy = dgl.distributed.PartitionPolicy( policy_str="edge~_N:_E:_N", partition_book=gpb ) data_0 = F.tensor( [[1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0]], F.float32, ) data_0_1 = F.tensor([1.0, 2.0, 3.0, 4.0, 5.0, 6.0], F.float32) data_0_2 = F.tensor([1, 2, 3, 4, 5, 6], F.int32) data_0_3 = F.tensor([1, 2, 3, 4, 5, 6], F.int64) data_1 = F.tensor( [ [2.0, 2.0], [2.0, 2.0], [2.0, 2.0], [2.0, 2.0], [2.0, 2.0], [2.0, 2.0], [2.0, 2.0], ], F.float32, ) data_2 = F.tensor( [[0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [0.0, 0.0]], F.float32, ) def init_zero_func(shape, dtype): return F.zeros(shape, dtype, F.cpu()) def udf_push(target, name, id_tensor, data_tensor): target[name][id_tensor] = data_tensor * data_tensor def add_push(target, name, id_tensor, data_tensor): target[name][id_tensor] += data_tensor @unittest.skipIf( os.name == "nt" or os.getenv("DGLBACKEND") == "tensorflow", reason="Do not support windows and TF yet", ) def test_partition_policy(): assert node_policy.part_id == 0 assert edge_policy.part_id == 0 local_nid = node_policy.to_local(F.tensor([0, 1, 2, 3, 4, 5])) local_eid = edge_policy.to_local(F.tensor([0, 1, 2, 3, 4, 5, 6])) assert_array_equal( F.asnumpy(local_nid), F.asnumpy(F.tensor([0, 1, 2, 3, 4, 5], F.int64)) ) assert_array_equal( F.asnumpy(local_eid), F.asnumpy(F.tensor([0, 1, 2, 3, 4, 5, 6], F.int64)), ) nid_partid = node_policy.to_partid(F.tensor([0, 1, 2, 3, 4, 5], F.int64)) eid_partid = edge_policy.to_partid(F.tensor([0, 1, 2, 3, 4, 5, 6], F.int64)) assert_array_equal( F.asnumpy(nid_partid), F.asnumpy(F.tensor([0, 0, 0, 0, 0, 0], F.int64)) ) assert_array_equal( F.asnumpy(eid_partid), F.asnumpy(F.tensor([0, 0, 0, 0, 0, 0, 0], F.int64)), ) assert node_policy.get_part_size() == len(local_nid) assert edge_policy.get_part_size() == len(local_eid) def start_server(server_id, num_clients, num_servers): # Init kvserver print("Sleep 5 seconds to test client re-connect.") time.sleep(5) kvserver = dgl.distributed.KVServer( server_id=server_id, ip_config="kv_ip_config.txt", num_servers=num_servers, num_clients=num_clients, ) kvserver.add_part_policy(node_policy) kvserver.add_part_policy(edge_policy) if kvserver.is_backup_server(): kvserver.init_data("data_0", "node~_N") kvserver.init_data("data_0_1", "node~_N") kvserver.init_data("data_0_2", "node~_N") kvserver.init_data("data_0_3", "node~_N") else: kvserver.init_data("data_0", "node~_N", data_0) kvserver.init_data("data_0_1", "node~_N", data_0_1) kvserver.init_data("data_0_2", "node~_N", data_0_2) kvserver.init_data("data_0_3", "node~_N", data_0_3) # start server server_state = dgl.distributed.ServerState( kv_store=kvserver, local_g=None, partition_book=None ) dgl.distributed.start_server( server_id=server_id, ip_config="kv_ip_config.txt", num_servers=num_servers, num_clients=num_clients, server_state=server_state, ) def start_server_mul_role(server_id, num_clients, num_servers): # Init kvserver kvserver = dgl.distributed.KVServer( server_id=server_id, ip_config="kv_ip_mul_config.txt", num_servers=num_servers, num_clients=num_clients, ) kvserver.add_part_policy(node_policy) if kvserver.is_backup_server(): kvserver.init_data("data_0", "node~_N") else: kvserver.init_data("data_0", "node~_N", data_0) # start server server_state = dgl.distributed.ServerState( kv_store=kvserver, local_g=None, partition_book=None ) dgl.distributed.start_server( server_id=server_id, ip_config="kv_ip_mul_config.txt", num_servers=num_servers, num_clients=num_clients, server_state=server_state, ) def start_client(num_clients, num_servers): os.environ["DGL_DIST_MODE"] = "distributed" # Note: connect to server first ! dgl.distributed.initialize(ip_config="kv_ip_config.txt") # Init kvclient kvclient = dgl.distributed.KVClient( ip_config="kv_ip_config.txt", num_servers=num_servers ) kvclient.map_shared_data(partition_book=gpb) assert dgl.distributed.get_num_client() == num_clients kvclient.init_data( name="data_1", shape=F.shape(data_1), dtype=F.dtype(data_1), part_policy=edge_policy, init_func=init_zero_func, ) kvclient.init_data( name="data_2", shape=F.shape(data_2), dtype=F.dtype(data_2), part_policy=node_policy, init_func=init_zero_func, ) # Test data_name_list name_list = kvclient.data_name_list() print(name_list) assert "data_0" in name_list assert "data_0_1" in name_list assert "data_0_2" in name_list assert "data_0_3" in name_list assert "data_1" in name_list assert "data_2" in name_list # Test get_meta_data meta = kvclient.get_data_meta("data_0") dtype, shape, policy = meta assert dtype == F.dtype(data_0) assert shape == F.shape(data_0) assert policy.policy_str == "node~_N" meta = kvclient.get_data_meta("data_0_1") dtype, shape, policy = meta assert dtype == F.dtype(data_0_1) assert shape == F.shape(data_0_1) assert policy.policy_str == "node~_N" meta = kvclient.get_data_meta("data_0_2") dtype, shape, policy = meta assert dtype == F.dtype(data_0_2) assert shape == F.shape(data_0_2) assert policy.policy_str == "node~_N" meta = kvclient.get_data_meta("data_0_3") dtype, shape, policy = meta assert dtype == F.dtype(data_0_3) assert shape == F.shape(data_0_3) assert policy.policy_str == "node~_N" meta = kvclient.get_data_meta("data_1") dtype, shape, policy = meta assert dtype == F.dtype(data_1) assert shape == F.shape(data_1) assert policy.policy_str == "edge~_N:_E:_N" meta = kvclient.get_data_meta("data_2") dtype, shape, policy = meta assert dtype == F.dtype(data_2) assert shape == F.shape(data_2) assert policy.policy_str == "node~_N" # Test push and pull id_tensor = F.tensor([0, 2, 4], F.int64) data_tensor = F.tensor([[6.0, 6.0], [6.0, 6.0], [6.0, 6.0]], F.float32) kvclient.push(name="data_0", id_tensor=id_tensor, data_tensor=data_tensor) kvclient.push(name="data_1", id_tensor=id_tensor, data_tensor=data_tensor) kvclient.push(name="data_2", id_tensor=id_tensor, data_tensor=data_tensor) res = kvclient.pull(name="data_0", id_tensor=id_tensor) assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor)) res = kvclient.pull(name="data_1", id_tensor=id_tensor) assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor)) res = kvclient.pull(name="data_2", id_tensor=id_tensor) assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor)) # Register new push handler kvclient.register_push_handler("data_0", udf_push) kvclient.register_push_handler("data_1", udf_push) kvclient.register_push_handler("data_2", udf_push) # Test push and pull kvclient.push(name="data_0", id_tensor=id_tensor, data_tensor=data_tensor) kvclient.push(name="data_1", id_tensor=id_tensor, data_tensor=data_tensor) kvclient.push(name="data_2", id_tensor=id_tensor, data_tensor=data_tensor) kvclient.barrier() data_tensor = data_tensor * data_tensor res = kvclient.pull(name="data_0", id_tensor=id_tensor) assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor)) res = kvclient.pull(name="data_1", id_tensor=id_tensor) assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor)) res = kvclient.pull(name="data_2", id_tensor=id_tensor) assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor)) # Test delete data kvclient.delete_data("data_0") kvclient.delete_data("data_1") kvclient.delete_data("data_2") # Register new push handler kvclient.init_data( name="data_3", shape=F.shape(data_2), dtype=F.dtype(data_2), part_policy=node_policy, init_func=init_zero_func, ) kvclient.register_push_handler("data_3", add_push) data_tensor = F.tensor([[6.0, 6.0], [6.0, 6.0], [6.0, 6.0]], F.float32) kvclient.barrier() time.sleep(kvclient.client_id + 1) print("add...") kvclient.push(name="data_3", id_tensor=id_tensor, data_tensor=data_tensor) kvclient.barrier() res = kvclient.pull(name="data_3", id_tensor=id_tensor) data_tensor = data_tensor * num_clients assert_array_equal(F.asnumpy(res), F.asnumpy(data_tensor)) def start_client_mul_role(i): os.environ["DGL_DIST_MODE"] = "distributed" # Initialize creates kvstore ! dgl.distributed.initialize(ip_config="kv_ip_mul_config.txt") if i == 0: # block one trainer time.sleep(5) kvclient = dgl.distributed.kvstore.get_kvstore() kvclient.barrier() print("i: %d role: %s" % (i, kvclient.role)) assert dgl.distributed.role.get_num_trainers() == 2 assert dgl.distributed.role.get_trainer_rank() < 2 print( "trainer rank: %d, global rank: %d" % ( dgl.distributed.role.get_trainer_rank(), dgl.distributed.role.get_global_rank(), ) ) dgl.distributed.exit_client() @unittest.skipIf( os.name == "nt" or os.getenv("DGLBACKEND") == "tensorflow", reason="Do not support windows and TF yet", ) def test_kv_store(): reset_envs() num_servers = 2 num_clients = 2 generate_ip_config("kv_ip_config.txt", 1, num_servers) ctx = mp.get_context("spawn") pserver_list = [] pclient_list = [] os.environ["DGL_NUM_SERVER"] = str(num_servers) for i in range(num_servers): pserver = ctx.Process( target=start_server, args=(i, num_clients, num_servers) ) pserver.start() pserver_list.append(pserver) for i in range(num_clients): pclient = ctx.Process( target=start_client, args=(num_clients, num_servers) ) pclient.start() pclient_list.append(pclient) for i in range(num_clients): pclient_list[i].join() for i in range(num_servers): pserver_list[i].join() @unittest.skipIf( os.name == "nt" or os.getenv("DGLBACKEND") == "tensorflow", reason="Do not support windows and TF yet", ) def test_kv_multi_role(): reset_envs() num_servers = 2 num_trainers = 2 num_samplers = 2 generate_ip_config("kv_ip_mul_config.txt", 1, num_servers) # There are two trainer processes and each trainer process has two sampler processes. num_clients = num_trainers * (1 + num_samplers) ctx = mp.get_context("spawn") pserver_list = [] pclient_list = [] os.environ["DGL_NUM_SAMPLER"] = str(num_samplers) os.environ["DGL_NUM_SERVER"] = str(num_servers) for i in range(num_servers): pserver = ctx.Process( target=start_server_mul_role, args=(i, num_clients, num_servers) ) pserver.start() pserver_list.append(pserver) for i in range(num_trainers): pclient = ctx.Process(target=start_client_mul_role, args=(i,)) pclient.start() pclient_list.append(pclient) for i in range(num_trainers): pclient_list[i].join() for i in range(num_servers): pserver_list[i].join() if __name__ == "__main__": test_partition_policy() test_kv_store() test_kv_multi_role() ================================================ FILE: tests/distributed/test_partition.py ================================================ import json import os import tempfile import dgl import dgl.backend as F import dgl.graphbolt as gb import numpy as np import pytest import torch as th from dgl import function as fn from dgl.distributed import ( dgl_partition_to_graphbolt, load_partition, load_partition_book, load_partition_feats, partition_graph, ) from dgl.distributed.graph_partition_book import ( _etype_str_to_tuple, _etype_tuple_to_str, DEFAULT_ETYPE, DEFAULT_NTYPE, EdgePartitionPolicy, HeteroDataName, NodePartitionPolicy, RangePartitionBook, ) from dgl.distributed.partition import ( _get_inner_edge_mask, _get_inner_node_mask, RESERVED_FIELD_DTYPE, ) from scipy import sparse as spsp from utils import reset_envs def _verify_partition_data_types(part_g): """ check list: make sure nodes and edges have correct type. """ ndata = ( part_g.node_attributes if isinstance(part_g, gb.FusedCSCSamplingGraph) else part_g.ndata ) edata = ( part_g.edge_attributes if isinstance(part_g, gb.FusedCSCSamplingGraph) else part_g.edata ) for k, dtype in RESERVED_FIELD_DTYPE.items(): if k in ndata: assert ndata[k].dtype == dtype if k in edata: assert edata[k].dtype == dtype def _verify_partition_formats(part_g, formats): # verify saved graph formats if formats is None: assert "coo" in part_g.formats()["created"] else: for format in formats: assert format in part_g.formats()["created"] def create_random_graph(n): arr = ( spsp.random(n, n, density=0.001, format="coo", random_state=100) != 0 ).astype(np.int64) return dgl.from_scipy(arr) def create_random_hetero(): num_nodes = {"n1": 1000, "n2": 1010, "n3": 1020} etypes = [ ("n1", "r1", "n2"), ("n2", "r1", "n1"), ("n1", "r2", "n3"), ("n2", "r3", "n3"), ] edges = {} for etype in etypes: src_ntype, _, dst_ntype = etype arr = spsp.random( num_nodes[src_ntype], num_nodes[dst_ntype], density=0.001, format="coo", random_state=100, ) edges[etype] = (arr.row, arr.col) return dgl.heterograph(edges, num_nodes) def _verify_graphbolt_attributes( parts, store_inner_node, store_inner_edge, store_eids ): """ check list: make sure arguments work. """ for part in parts: assert store_inner_edge == ("inner_edge" in part.edge_attributes) assert store_inner_node == ("inner_node" in part.node_attributes) assert store_eids == (dgl.EID in part.edge_attributes) def _verify_hetero_graph_node_edge_num( g, parts, store_inner_edge, debug_mode, ): """ check list: make sure edge type are correct. make sure the number of nodes in each node type are correct. make sure the number of nodes in each node type are correct. """ num_nodes = {ntype: 0 for ntype in g.ntypes} num_edges = {etype: 0 for etype in g.canonical_etypes} for part in parts: edata = ( part.edge_attributes if isinstance(part, gb.FusedCSCSamplingGraph) else part.edata ) if dgl.ETYPE in edata: # edata may not contain all edge types. assert len(g.canonical_etypes) >= len(F.unique(edata[dgl.ETYPE])) if debug_mode or isinstance(part, dgl.DGLGraph): for ntype in g.ntypes: ntype_id = g.get_ntype_id(ntype) inner_node_mask = _get_inner_node_mask(part, ntype_id) num_inner_nodes = F.sum(F.astype(inner_node_mask, F.int64), 0) num_nodes[ntype] += num_inner_nodes if store_inner_edge or isinstance(part, dgl.DGLGraph): for etype in g.canonical_etypes: etype_id = g.get_etype_id(etype) inner_edge_mask = _get_inner_edge_mask(part, etype_id) num_inner_edges = F.sum(F.astype(inner_edge_mask, F.int64), 0) num_edges[etype] += num_inner_edges # Verify the number of nodes are correct. if debug_mode or isinstance(part, dgl.DGLGraph): for ntype in g.ntypes: print( "node {}: {}, {}".format( ntype, g.num_nodes(ntype), num_nodes[ntype] ) ) assert g.num_nodes(ntype) == num_nodes[ntype] # Verify the number of edges are correct. if store_inner_edge or isinstance(part, dgl.DGLGraph): for etype in g.canonical_etypes: print( "edge {}: {}, {}".format( etype, g.num_edges(etype), num_edges[etype] ) ) assert g.num_edges(etype) == num_edges[etype] def _verify_edge_id_range_hetero( g, part, eids, ): """ check list: make sure inner_eids fall into a range. make sure all edges are included. """ edata = ( part.edge_attributes if isinstance(part, gb.FusedCSCSamplingGraph) else part.edata ) etype = ( part.type_per_edge if isinstance(part, gb.FusedCSCSamplingGraph) else edata[dgl.ETYPE] ) eid = th.arange(len(edata[dgl.EID])) etype_arr = F.gather_row(etype, eid) eid_arr = F.gather_row(edata[dgl.EID], eid) for etype in g.canonical_etypes: etype_id = g.get_etype_id(etype) eids[etype].append(F.boolean_mask(eid_arr, etype_arr == etype_id)) # Make sure edge Ids fall into a range. inner_edge_mask = _get_inner_edge_mask(part, etype_id) inner_eids = np.sort( F.asnumpy(F.boolean_mask(edata[dgl.EID], inner_edge_mask)) ) assert np.all( inner_eids == np.arange(inner_eids[0], inner_eids[-1] + 1) ) return eids def _verify_node_id_range_hetero(g, part, nids): """ check list: make sure inner nodes have Ids fall into a range. """ for ntype in g.ntypes: ntype_id = g.get_ntype_id(ntype) # Make sure inner nodes have Ids fall into a range. inner_node_mask = _get_inner_node_mask(part, ntype_id) inner_nids = F.boolean_mask( part.node_attributes[dgl.NID], inner_node_mask ) assert np.all( F.asnumpy( inner_nids == F.arange( F.as_scalar(inner_nids[0]), F.as_scalar(inner_nids[-1]) + 1, ) ) ) nids[ntype].append(inner_nids) return nids def _verify_graph_attributes_hetero( g, parts, store_inner_edge, store_inner_node, ): """ check list: make sure edge ids fall into a range. make sure inner nodes have Ids fall into a range. make sure all nodes is included. make sure all edges is included. """ nids = {ntype: [] for ntype in g.ntypes} eids = {etype: [] for etype in g.canonical_etypes} # check edge id. if store_inner_edge or isinstance(parts[0], dgl.DGLGraph): for part in parts: # collect eids eids = _verify_edge_id_range_hetero(g, part, eids) for etype in eids: eids_type = F.cat(eids[etype], 0) uniq_ids = F.unique(eids_type) # We should get all nodes. assert len(uniq_ids) == g.num_edges(etype) # check node id. if store_inner_node or isinstance(parts[0], dgl.DGLGraph): for part in parts: nids = _verify_node_id_range_hetero(g, part, nids) for ntype in nids: nids_type = F.cat(nids[ntype], 0) uniq_ids = F.unique(nids_type) # We should get all nodes. assert len(uniq_ids) == g.num_nodes(ntype) def _verify_hetero_graph( g, parts, store_eids=False, store_inner_edge=False, store_inner_node=False, debug_mode=False, ): _verify_hetero_graph_node_edge_num( g, parts, store_inner_edge=store_inner_edge, debug_mode=debug_mode, ) if store_eids: _verify_graph_attributes_hetero( g, parts, store_inner_edge=store_inner_edge, store_inner_node=store_inner_node, ) def _verify_node_feats(g, part, gpb, orig_nids, node_feats, is_homo=False): for ntype in g.ntypes: ndata = ( part.node_attributes if isinstance(part, gb.FusedCSCSamplingGraph) else part.ndata ) ntype_id = g.get_ntype_id(ntype) inner_node_mask = _get_inner_node_mask( part, ntype_id, (gpb if isinstance(part, gb.FusedCSCSamplingGraph) else None), ) inner_nids = F.boolean_mask(ndata[dgl.NID], inner_node_mask) ntype_ids, inner_type_nids = gpb.map_to_per_ntype(inner_nids) partid = gpb.nid2partid(inner_type_nids, ntype) if is_homo: assert np.all(F.asnumpy(ntype_ids) == ntype_id) assert np.all(F.asnumpy(partid) == gpb.partid) if is_homo: orig_id = orig_nids[inner_type_nids] else: orig_id = orig_nids[ntype][inner_type_nids] local_nids = gpb.nid2localnid(inner_type_nids, gpb.partid, ntype) for name in g.nodes[ntype].data: if name in [dgl.NID, "inner_node"]: continue true_feats = F.gather_row(g.nodes[ntype].data[name], orig_id) ndata = F.gather_row(node_feats[ntype + "/" + name], local_nids) assert np.all(F.asnumpy(ndata == true_feats)) def _verify_edge_feats(g, part, gpb, orig_eids, edge_feats, is_homo=False): for etype in g.canonical_etypes: edata = ( part.edge_attributes if isinstance(part, gb.FusedCSCSamplingGraph) else part.edata ) etype_id = g.get_etype_id(etype) inner_edge_mask = _get_inner_edge_mask(part, etype_id) inner_eids = F.boolean_mask(edata[dgl.EID], inner_edge_mask) etype_ids, inner_type_eids = gpb.map_to_per_etype(inner_eids) partid = gpb.eid2partid(inner_type_eids, etype) assert np.all(F.asnumpy(etype_ids) == etype_id) assert np.all(F.asnumpy(partid) == gpb.partid) if is_homo: orig_id = orig_eids[inner_type_eids] else: orig_id = orig_eids[etype][inner_type_eids] local_eids = gpb.eid2localeid(inner_type_eids, gpb.partid, etype) for name in g.edges[etype].data: if name in [dgl.EID, "inner_edge"]: continue true_feats = F.gather_row(g.edges[etype].data[name], orig_id) edata = F.gather_row( edge_feats[_etype_tuple_to_str(etype) + "/" + name], local_eids, ) assert np.all(F.asnumpy(edata == true_feats)) def verify_graph_feats_hetero_dgl( g, gpb, part, node_feats, edge_feats, orig_nids, orig_eids, ): """ check list: make sure the feats of nodes and edges are correct """ _verify_node_feats(g, part, gpb, orig_nids, node_feats) _verify_edge_feats(g, part, gpb, orig_eids, edge_feats) def verify_graph_feats_gb( g, gpbs, parts, tot_node_feats, tot_edge_feats, orig_nids, orig_eids, shuffled_labels, shuffled_edata, test_ntype, test_etype, store_inner_node=False, store_inner_edge=False, store_eids=False, is_homo=False, ): """ check list: make sure the feats of nodes and edges are correct """ for part_id in range(len(parts)): part = parts[part_id] gpb = gpbs[part_id] node_feats = tot_node_feats[part_id] edge_feats = tot_edge_feats[part_id] if store_inner_node: _verify_node_feats( g, part, gpb, orig_nids, node_feats, is_homo=is_homo, ) if store_inner_edge and store_eids: _verify_edge_feats( g, part, gpb, orig_eids, edge_feats, is_homo=is_homo, ) _verify_shuffled_labels_gb( g, shuffled_labels, shuffled_edata, orig_nids, orig_eids, test_ntype, test_etype, ) def check_hetero_partition( hg, part_method, num_parts=4, num_trainers_per_machine=1, load_feats=True, graph_formats=None, ): test_ntype = "n1" test_etype = ("n1", "r1", "n2") hg.nodes[test_ntype].data["labels"] = F.arange(0, hg.num_nodes(test_ntype)) hg.nodes[test_ntype].data["feats"] = F.tensor( np.random.randn(hg.num_nodes(test_ntype), 10), F.float32 ) hg.edges[test_etype].data["feats"] = F.tensor( np.random.randn(hg.num_edges(test_etype), 10), F.float32 ) hg.edges[test_etype].data["labels"] = F.arange(0, hg.num_edges(test_etype)) num_hops = 1 orig_nids, orig_eids = partition_graph( hg, "test", num_parts, "/tmp/partition", num_hops=num_hops, part_method=part_method, return_mapping=True, num_trainers_per_machine=num_trainers_per_machine, graph_formats=graph_formats, ) assert len(orig_nids) == len(hg.ntypes) assert len(orig_eids) == len(hg.canonical_etypes) for ntype in hg.ntypes: assert len(orig_nids[ntype]) == hg.num_nodes(ntype) for etype in hg.canonical_etypes: assert len(orig_eids[etype]) == hg.num_edges(etype) parts = [] shuffled_labels = [] shuffled_elabels = [] for i in range(num_parts): part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition( "/tmp/partition/test.json", i, load_feats=load_feats ) _verify_partition_data_types(part_g) _verify_partition_formats(part_g, graph_formats) if not load_feats: assert not node_feats assert not edge_feats node_feats, edge_feats = load_partition_feats( "/tmp/partition/test.json", i ) if num_trainers_per_machine > 1: for ntype in hg.ntypes: name = ntype + "/trainer_id" assert name in node_feats part_ids = F.floor_div( node_feats[name], num_trainers_per_machine ) assert np.all(F.asnumpy(part_ids) == i) for etype in hg.canonical_etypes: name = _etype_tuple_to_str(etype) + "/trainer_id" assert name in edge_feats part_ids = F.floor_div( edge_feats[name], num_trainers_per_machine ) assert np.all(F.asnumpy(part_ids) == i) # Verify the mapping between the reshuffled IDs and the original IDs. # These are partition-local IDs. part_src_ids, part_dst_ids = part_g.edges() # These are reshuffled global homogeneous IDs. part_src_ids = F.gather_row(part_g.ndata[dgl.NID], part_src_ids) part_dst_ids = F.gather_row(part_g.ndata[dgl.NID], part_dst_ids) part_eids = part_g.edata[dgl.EID] # These are reshuffled per-type IDs. src_ntype_ids, part_src_ids = gpb.map_to_per_ntype(part_src_ids) dst_ntype_ids, part_dst_ids = gpb.map_to_per_ntype(part_dst_ids) etype_ids, part_eids = gpb.map_to_per_etype(part_eids) # `IdMap` is in int64 by default. assert src_ntype_ids.dtype == F.int64 assert dst_ntype_ids.dtype == F.int64 assert etype_ids.dtype == F.int64 with pytest.raises(dgl.utils.internal.InconsistentDtypeException): gpb.map_to_per_ntype(F.tensor([0], F.int32)) with pytest.raises(dgl.utils.internal.InconsistentDtypeException): gpb.map_to_per_etype(F.tensor([0], F.int32)) # These are original per-type IDs. for etype_id, etype in enumerate(hg.canonical_etypes): if F.sum((etype_ids == etype_id), 0) == 0: continue part_src_ids1 = F.boolean_mask(part_src_ids, etype_ids == etype_id) src_ntype_ids1 = F.boolean_mask( src_ntype_ids, etype_ids == etype_id ) part_dst_ids1 = F.boolean_mask(part_dst_ids, etype_ids == etype_id) dst_ntype_ids1 = F.boolean_mask( dst_ntype_ids, etype_ids == etype_id ) part_eids1 = F.boolean_mask(part_eids, etype_ids == etype_id) assert np.all(F.asnumpy(src_ntype_ids1 == src_ntype_ids1[0])) assert np.all(F.asnumpy(dst_ntype_ids1 == dst_ntype_ids1[0])) src_ntype = hg.ntypes[F.as_scalar(src_ntype_ids1[0])] dst_ntype = hg.ntypes[F.as_scalar(dst_ntype_ids1[0])] orig_src_ids1 = F.gather_row(orig_nids[src_ntype], part_src_ids1) orig_dst_ids1 = F.gather_row(orig_nids[dst_ntype], part_dst_ids1) orig_eids1 = F.gather_row(orig_eids[etype], part_eids1) orig_eids2 = hg.edge_ids(orig_src_ids1, orig_dst_ids1, etype=etype) assert len(orig_eids1) == len(orig_eids2) assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2)) parts.append(part_g) verify_graph_feats_hetero_dgl( hg, gpb, part_g, node_feats, edge_feats, orig_nids, orig_eids ) shuffled_labels.append(node_feats[test_ntype + "/labels"]) shuffled_elabels.append( edge_feats[_etype_tuple_to_str(test_etype) + "/labels"] ) _verify_hetero_graph(hg, parts) shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0)) shuffled_elabels = F.asnumpy(F.cat(shuffled_elabels, 0)) orig_labels = np.zeros(shuffled_labels.shape, dtype=shuffled_labels.dtype) orig_elabels = np.zeros( shuffled_elabels.shape, dtype=shuffled_elabels.dtype ) orig_labels[F.asnumpy(orig_nids[test_ntype])] = shuffled_labels orig_elabels[F.asnumpy(orig_eids[test_etype])] = shuffled_elabels assert np.all(orig_labels == F.asnumpy(hg.nodes[test_ntype].data["labels"])) assert np.all( orig_elabels == F.asnumpy(hg.edges[test_etype].data["labels"]) ) def check_partition( g, part_method, num_parts=4, num_trainers_per_machine=1, load_feats=True, graph_formats=None, ): g.ndata["labels"] = F.arange(0, g.num_nodes()) g.ndata["feats"] = F.tensor(np.random.randn(g.num_nodes(), 10), F.float32) g.edata["feats"] = F.tensor(np.random.randn(g.num_edges(), 10), F.float32) g.update_all(fn.copy_u("feats", "msg"), fn.sum("msg", "h")) g.update_all(fn.copy_e("feats", "msg"), fn.sum("msg", "eh")) num_hops = 2 orig_nids, orig_eids = partition_graph( g, "test", num_parts, "/tmp/partition", num_hops=num_hops, part_method=part_method, return_mapping=True, num_trainers_per_machine=num_trainers_per_machine, graph_formats=graph_formats, ) part_sizes = [] shuffled_labels = [] shuffled_edata = [] for i in range(num_parts): part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition( "/tmp/partition/test.json", i, load_feats=load_feats ) _verify_partition_data_types(part_g) _verify_partition_formats(part_g, graph_formats) if not load_feats: assert not node_feats assert not edge_feats node_feats, edge_feats = load_partition_feats( "/tmp/partition/test.json", i ) if num_trainers_per_machine > 1: for ntype in g.ntypes: name = ntype + "/trainer_id" assert name in node_feats part_ids = F.floor_div( node_feats[name], num_trainers_per_machine ) assert np.all(F.asnumpy(part_ids) == i) for etype in g.canonical_etypes: name = _etype_tuple_to_str(etype) + "/trainer_id" assert name in edge_feats part_ids = F.floor_div( edge_feats[name], num_trainers_per_machine ) assert np.all(F.asnumpy(part_ids) == i) # Check the metadata assert gpb._num_nodes() == g.num_nodes() assert gpb._num_edges() == g.num_edges() assert gpb.num_partitions() == num_parts gpb_meta = gpb.metadata() assert len(gpb_meta) == num_parts assert len(gpb.partid2nids(i)) == gpb_meta[i]["num_nodes"] assert len(gpb.partid2eids(i)) == gpb_meta[i]["num_edges"] part_sizes.append((gpb_meta[i]["num_nodes"], gpb_meta[i]["num_edges"])) nid = F.boolean_mask(part_g.ndata[dgl.NID], part_g.ndata["inner_node"]) local_nid = gpb.nid2localnid(nid, i) assert F.dtype(local_nid) in (F.int64, F.int32) assert np.all(F.asnumpy(local_nid) == np.arange(0, len(local_nid))) eid = F.boolean_mask(part_g.edata[dgl.EID], part_g.edata["inner_edge"]) local_eid = gpb.eid2localeid(eid, i) assert F.dtype(local_eid) in (F.int64, F.int32) assert np.all(F.asnumpy(local_eid) == np.arange(0, len(local_eid))) # Check the node map. local_nodes = F.boolean_mask( part_g.ndata[dgl.NID], part_g.ndata["inner_node"] ) llocal_nodes = F.nonzero_1d(part_g.ndata["inner_node"]) local_nodes1 = gpb.partid2nids(i) assert F.dtype(local_nodes1) in (F.int32, F.int64) assert np.all( np.sort(F.asnumpy(local_nodes)) == np.sort(F.asnumpy(local_nodes1)) ) assert np.all(F.asnumpy(llocal_nodes) == np.arange(len(llocal_nodes))) # Check the edge map. local_edges = F.boolean_mask( part_g.edata[dgl.EID], part_g.edata["inner_edge"] ) llocal_edges = F.nonzero_1d(part_g.edata["inner_edge"]) local_edges1 = gpb.partid2eids(i) assert F.dtype(local_edges1) in (F.int32, F.int64) assert np.all( np.sort(F.asnumpy(local_edges)) == np.sort(F.asnumpy(local_edges1)) ) assert np.all(F.asnumpy(llocal_edges) == np.arange(len(llocal_edges))) # Verify the mapping between the reshuffled IDs and the original IDs. part_src_ids, part_dst_ids = part_g.edges() part_src_ids = F.gather_row(part_g.ndata[dgl.NID], part_src_ids) part_dst_ids = F.gather_row(part_g.ndata[dgl.NID], part_dst_ids) part_eids = part_g.edata[dgl.EID] orig_src_ids = F.gather_row(orig_nids, part_src_ids) orig_dst_ids = F.gather_row(orig_nids, part_dst_ids) orig_eids1 = F.gather_row(orig_eids, part_eids) orig_eids2 = g.edge_ids(orig_src_ids, orig_dst_ids) assert F.shape(orig_eids1)[0] == F.shape(orig_eids2)[0] assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2)) local_orig_nids = orig_nids[part_g.ndata[dgl.NID]] local_orig_eids = orig_eids[part_g.edata[dgl.EID]] part_g.ndata["feats"] = F.gather_row(g.ndata["feats"], local_orig_nids) part_g.edata["feats"] = F.gather_row(g.edata["feats"], local_orig_eids) local_nodes = orig_nids[local_nodes] local_edges = orig_eids[local_edges] part_g.update_all(fn.copy_u("feats", "msg"), fn.sum("msg", "h")) part_g.update_all(fn.copy_e("feats", "msg"), fn.sum("msg", "eh")) assert F.allclose( F.gather_row(g.ndata["h"], local_nodes), F.gather_row(part_g.ndata["h"], llocal_nodes), ) assert F.allclose( F.gather_row(g.ndata["eh"], local_nodes), F.gather_row(part_g.ndata["eh"], llocal_nodes), ) for name in ["labels", "feats"]: assert "_N/" + name in node_feats assert node_feats["_N/" + name].shape[0] == len(local_nodes) true_feats = F.gather_row(g.ndata[name], local_nodes) ndata = F.gather_row(node_feats["_N/" + name], local_nid) assert np.all(F.asnumpy(true_feats) == F.asnumpy(ndata)) for name in ["feats"]: efeat_name = _etype_tuple_to_str(DEFAULT_ETYPE) + "/" + name assert efeat_name in edge_feats assert edge_feats[efeat_name].shape[0] == len(local_edges) true_feats = F.gather_row(g.edata[name], local_edges) edata = F.gather_row(edge_feats[efeat_name], local_eid) assert np.all(F.asnumpy(true_feats) == F.asnumpy(edata)) # This only works if node/edge IDs are shuffled. shuffled_labels.append(node_feats["_N/labels"]) shuffled_edata.append(edge_feats["_N:_E:_N/feats"]) # Verify that we can reconstruct node/edge data for original IDs. shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0)) shuffled_edata = F.asnumpy(F.cat(shuffled_edata, 0)) orig_labels = np.zeros(shuffled_labels.shape, dtype=shuffled_labels.dtype) orig_edata = np.zeros(shuffled_edata.shape, dtype=shuffled_edata.dtype) orig_labels[F.asnumpy(orig_nids)] = shuffled_labels orig_edata[F.asnumpy(orig_eids)] = shuffled_edata assert np.all(orig_labels == F.asnumpy(g.ndata["labels"])) assert np.all(orig_edata == F.asnumpy(g.edata["feats"])) node_map = [] edge_map = [] for i, (num_nodes, num_edges) in enumerate(part_sizes): node_map.append(np.ones(num_nodes) * i) edge_map.append(np.ones(num_edges) * i) node_map = np.concatenate(node_map) edge_map = np.concatenate(edge_map) nid2pid = gpb.nid2partid(F.arange(0, len(node_map))) assert F.dtype(nid2pid) in (F.int32, F.int64) assert np.all(F.asnumpy(nid2pid) == node_map) eid2pid = gpb.eid2partid(F.arange(0, len(edge_map))) assert F.dtype(eid2pid) in (F.int32, F.int64) assert np.all(F.asnumpy(eid2pid) == edge_map) @pytest.mark.parametrize("part_method", ["metis", "random"]) @pytest.mark.parametrize("num_parts", [1, 4]) @pytest.mark.parametrize("num_trainers_per_machine", [1]) @pytest.mark.parametrize("load_feats", [True, False]) @pytest.mark.parametrize( "graph_formats", [None, ["csc"], ["coo", "csc"], ["coo", "csc", "csr"]] ) def test_partition( part_method, num_parts, num_trainers_per_machine, load_feats, graph_formats, ): os.environ["DGL_DIST_DEBUG"] = "1" if part_method == "random" and num_parts > 1: num_trainers_per_machine = 1 g = create_random_graph(1000) check_partition( g, part_method, num_parts, num_trainers_per_machine, load_feats, graph_formats, ) hg = create_random_hetero() check_hetero_partition( hg, part_method, num_parts, num_trainers_per_machine, load_feats, graph_formats, ) reset_envs() @pytest.mark.parametrize("node_map_dtype", [F.int32, F.int64]) @pytest.mark.parametrize("edge_map_dtype", [F.int32, F.int64]) def test_RangePartitionBook(node_map_dtype, edge_map_dtype): part_id = 1 num_parts = 2 # homogeneous node_map = { DEFAULT_NTYPE: F.tensor([[0, 1000], [1000, 2000]], dtype=node_map_dtype) } edge_map = { DEFAULT_ETYPE: F.tensor( [[0, 5000], [5000, 10000]], dtype=edge_map_dtype ) } ntypes = {DEFAULT_NTYPE: 0} etypes = {DEFAULT_ETYPE: 0} gpb = RangePartitionBook( part_id, num_parts, node_map, edge_map, ntypes, etypes ) assert gpb.etypes == [DEFAULT_ETYPE[1]] assert gpb.canonical_etypes == [DEFAULT_ETYPE] assert gpb.to_canonical_etype(DEFAULT_ETYPE[1]) == DEFAULT_ETYPE ntype_ids, per_ntype_ids = gpb.map_to_per_ntype( F.tensor([0, 1000], dtype=node_map_dtype) ) assert ntype_ids.dtype == node_map_dtype assert per_ntype_ids.dtype == node_map_dtype assert np.all(F.asnumpy(ntype_ids) == 0) assert np.all(F.asnumpy(per_ntype_ids) == [0, 1000]) etype_ids, per_etype_ids = gpb.map_to_per_etype( F.tensor([0, 5000], dtype=edge_map_dtype) ) assert etype_ids.dtype == edge_map_dtype assert per_etype_ids.dtype == edge_map_dtype assert np.all(F.asnumpy(etype_ids) == 0) assert np.all(F.asnumpy(per_etype_ids) == [0, 5000]) node_policy = NodePartitionPolicy(gpb, DEFAULT_NTYPE) assert node_policy.type_name == DEFAULT_NTYPE edge_policy = EdgePartitionPolicy(gpb, DEFAULT_ETYPE) assert edge_policy.type_name == DEFAULT_ETYPE # Init via etype is not supported node_map = { "node1": F.tensor([[0, 1000], [1000, 2000]], dtype=node_map_dtype), "node2": F.tensor([[0, 1000], [1000, 2000]], dtype=node_map_dtype), } edge_map = { "edge1": F.tensor([[0, 5000], [5000, 10000]], dtype=edge_map_dtype) } ntypes = {"node1": 0, "node2": 1} etypes = {"edge1": 0} expect_except = False try: RangePartitionBook( part_id, num_parts, node_map, edge_map, ntypes, etypes ) except AssertionError: expect_except = True assert expect_except expect_except = False try: EdgePartitionPolicy(gpb, "edge1") except AssertionError: expect_except = True assert expect_except # heterogeneous, init via canonical etype node_map = { "node1": F.tensor([[0, 1000], [1000, 2000]], dtype=node_map_dtype), "node2": F.tensor([[0, 1000], [1000, 2000]], dtype=node_map_dtype), } edge_map = { ("node1", "edge1", "node2"): F.tensor( [[0, 5000], [5000, 10000]], dtype=edge_map_dtype ) } ntypes = {"node1": 0, "node2": 1} etypes = {("node1", "edge1", "node2"): 0} c_etype = list(etypes.keys())[0] gpb = RangePartitionBook( part_id, num_parts, node_map, edge_map, ntypes, etypes ) assert gpb.etypes == ["edge1"] assert gpb.canonical_etypes == [c_etype] assert gpb.to_canonical_etype("edge1") == c_etype assert gpb.to_canonical_etype(c_etype) == c_etype ntype_ids, per_ntype_ids = gpb.map_to_per_ntype( F.tensor([0, 1000], dtype=node_map_dtype) ) assert ntype_ids.dtype == node_map_dtype assert per_ntype_ids.dtype == node_map_dtype assert np.all(F.asnumpy(ntype_ids) == 0) assert np.all(F.asnumpy(per_ntype_ids) == [0, 1000]) etype_ids, per_etype_ids = gpb.map_to_per_etype( F.tensor([0, 5000], dtype=edge_map_dtype) ) assert etype_ids.dtype == edge_map_dtype assert per_etype_ids.dtype == edge_map_dtype assert np.all(F.asnumpy(etype_ids) == 0) assert np.all(F.asnumpy(per_etype_ids) == [0, 5000]) expect_except = False try: gpb.to_canonical_etype(("node1", "edge2", "node2")) except BaseException: expect_except = True assert expect_except expect_except = False try: gpb.to_canonical_etype("edge2") except BaseException: expect_except = True assert expect_except # NodePartitionPolicy node_policy = NodePartitionPolicy(gpb, "node1") assert node_policy.type_name == "node1" assert node_policy.policy_str == "node~node1" assert node_policy.part_id == part_id assert node_policy.is_node assert node_policy.get_data_name("x").is_node() local_ids = th.arange(0, 1000) global_ids = local_ids + 1000 assert th.equal(node_policy.to_local(global_ids), local_ids) assert th.all(node_policy.to_partid(global_ids) == part_id) assert node_policy.get_part_size() == 1000 assert node_policy.get_size() == 2000 # EdgePartitionPolicy edge_policy = EdgePartitionPolicy(gpb, c_etype) assert edge_policy.type_name == c_etype assert edge_policy.policy_str == "edge~node1:edge1:node2" assert edge_policy.part_id == part_id assert not edge_policy.is_node assert not edge_policy.get_data_name("x").is_node() local_ids = th.arange(0, 5000) global_ids = local_ids + 5000 assert th.equal(edge_policy.to_local(global_ids), local_ids) assert th.all(edge_policy.to_partid(global_ids) == part_id) assert edge_policy.get_part_size() == 5000 assert edge_policy.get_size() == 10000 expect_except = False try: HeteroDataName(False, "edge1", "feat") except BaseException: expect_except = True assert expect_except data_name = HeteroDataName(False, c_etype, "feat") assert data_name.get_type() == c_etype def test_UnknownPartitionBook(): node_map = {"_N": {0: 0, 1: 1, 2: 2}} edge_map = {"_N:_E:_N": {0: 0, 1: 1, 2: 2}} part_metadata = { "num_parts": 1, "num_nodes": len(node_map), "num_edges": len(edge_map), "node_map": node_map, "edge_map": edge_map, "graph_name": "test_graph", } with tempfile.TemporaryDirectory() as test_dir: part_config = os.path.join(test_dir, "test_graph.json") with open(part_config, "w") as file: json.dump(part_metadata, file, indent=4) try: load_partition_book(part_config, 0) except Exception as e: if not isinstance(e, TypeError): raise e @pytest.mark.parametrize("part_method", ["metis", "random"]) @pytest.mark.parametrize("num_parts", [1, 4]) @pytest.mark.parametrize("store_eids", [True, False]) @pytest.mark.parametrize("store_inner_node", [True, False]) @pytest.mark.parametrize("store_inner_edge", [True, False]) @pytest.mark.parametrize("debug_mode", [True, False]) def test_dgl_partition_to_graphbolt_homo( part_method, num_parts, store_eids, store_inner_node, store_inner_edge, debug_mode, ): reset_envs() if debug_mode: os.environ["DGL_DIST_DEBUG"] = "1" with tempfile.TemporaryDirectory() as test_dir: g = create_random_graph(1000) graph_name = "test" partition_graph( g, graph_name, num_parts, test_dir, part_method=part_method ) part_config = os.path.join(test_dir, f"{graph_name}.json") dgl_partition_to_graphbolt( part_config, store_eids=store_eids, store_inner_node=store_inner_node, store_inner_edge=store_inner_edge, ) for part_id in range(num_parts): orig_g = dgl.load_graphs( os.path.join(test_dir, f"part{part_id}/graph.dgl") )[0][0] os.remove(os.path.join(test_dir, f"part{part_id}/graph.dgl")) new_g = load_partition( part_config, part_id, load_feats=False, use_graphbolt=True )[0] orig_indptr, orig_indices, orig_eids = orig_g.adj().csc() # The original graph is in int64 while the partitioned graph is in # int32 as dtype formatting is applied when converting to graphbolt # format. assert orig_indptr.dtype == th.int64 assert orig_indices.dtype == th.int64 assert new_g.csc_indptr.dtype == th.int32 assert new_g.indices.dtype == th.int32 assert th.equal(orig_indptr, new_g.csc_indptr) assert th.equal(orig_indices, new_g.indices) assert new_g.node_type_offset is None assert orig_g.ndata[dgl.NID].dtype == th.int64 assert new_g.node_attributes[dgl.NID].dtype == th.int64 assert th.equal( orig_g.ndata[dgl.NID], new_g.node_attributes[dgl.NID] ) if store_inner_node or debug_mode: assert th.equal( orig_g.ndata["inner_node"], new_g.node_attributes["inner_node"], ) if store_eids or debug_mode: assert orig_g.edata[dgl.EID].dtype == th.int64 assert new_g.edge_attributes[dgl.EID].dtype == th.int64 assert th.equal( orig_g.edata[dgl.EID][orig_eids], new_g.edge_attributes[dgl.EID], ) if store_inner_edge or debug_mode: assert orig_g.edata["inner_edge"].dtype == th.uint8 assert new_g.edge_attributes["inner_edge"].dtype == th.uint8 assert th.equal( orig_g.edata["inner_edge"][orig_eids], new_g.edge_attributes["inner_edge"], ) assert new_g.type_per_edge is None assert new_g.node_type_to_id is None assert new_g.edge_type_to_id is None @pytest.mark.parametrize("part_method", ["metis", "random"]) @pytest.mark.parametrize("num_parts", [1, 4]) @pytest.mark.parametrize("store_eids", [True, False]) @pytest.mark.parametrize("store_inner_node", [True, False]) @pytest.mark.parametrize("store_inner_edge", [True, False]) @pytest.mark.parametrize("debug_mode", [True, False]) def test_dgl_partition_to_graphbolt_hetero( part_method, num_parts, store_eids, store_inner_node, store_inner_edge, debug_mode, ): reset_envs() if debug_mode: os.environ["DGL_DIST_DEBUG"] = "1" with tempfile.TemporaryDirectory() as test_dir: g = create_random_hetero() graph_name = "test" partition_graph( g, graph_name, num_parts, test_dir, part_method=part_method ) part_config = os.path.join(test_dir, f"{graph_name}.json") dgl_partition_to_graphbolt( part_config, store_eids=store_eids, store_inner_node=store_inner_node, store_inner_edge=store_inner_edge, ) for part_id in range(num_parts): orig_g = dgl.load_graphs( os.path.join(test_dir, f"part{part_id}/graph.dgl") )[0][0] os.remove(os.path.join(test_dir, f"part{part_id}/graph.dgl")) new_g = load_partition( part_config, part_id, load_feats=False, use_graphbolt=True )[0] orig_indptr, orig_indices, orig_eids = orig_g.adj().csc() # Edges should be sorted in etype for the same dst node. if debug_mode: num_inner_edges = orig_g.edata["inner_edge"].sum().item() assert ( num_inner_edges == orig_g.edata["inner_edge"][th.arange(num_inner_edges)] .sum() .item() ) assert ( num_inner_edges == new_g.edge_attributes["inner_edge"][:num_inner_edges] .sum() .item() ) num_inner_nodes = orig_g.ndata["inner_node"].sum().item() assert ( num_inner_nodes == orig_g.ndata["inner_node"][th.arange(num_inner_nodes)] .sum() .item() ) assert ( num_inner_nodes == new_g.node_attributes["inner_node"][:num_inner_nodes] .sum() .item() ) for i in range(orig_g.num_nodes()): if orig_g.in_degrees(i) == 0: continue # Verify DGLGraph partitions. eids = orig_g.in_edges(i, form="eid") etypes = orig_g.edata[dgl.ETYPE][eids] assert th.equal(etypes, etypes.sort()[0]) # Verify GraphBolt partitions. eids_start = new_g.csc_indptr[i] eids_end = new_g.csc_indptr[i + 1] etypes = new_g.edge_attributes[dgl.ETYPE][ eids_start:eids_end ] assert th.equal(etypes, etypes.sort()[0]) # The original graph is in int64 while the partitioned graph is in # int32 as dtype formatting is applied when converting to graphbolt # format. assert orig_indptr.dtype == th.int64 assert orig_indices.dtype == th.int64 assert new_g.csc_indptr.dtype == th.int32 assert new_g.indices.dtype == th.int32 assert th.equal(orig_indptr, new_g.csc_indptr) assert th.equal(orig_indices, new_g.indices) assert orig_g.ndata[dgl.NID].dtype == th.int64 assert new_g.node_attributes[dgl.NID].dtype == th.int64 assert th.equal( orig_g.ndata[dgl.NID], new_g.node_attributes[dgl.NID] ) if store_inner_node or debug_mode: assert th.equal( orig_g.ndata["inner_node"], new_g.node_attributes["inner_node"], ) if debug_mode: assert orig_g.ndata[dgl.NTYPE].dtype == th.int32 assert new_g.node_attributes[dgl.NTYPE].dtype == th.int8 assert th.equal( orig_g.ndata[dgl.NTYPE], new_g.node_attributes[dgl.NTYPE] ) if store_eids or debug_mode: assert orig_g.edata[dgl.EID].dtype == th.int64 assert new_g.edge_attributes[dgl.EID].dtype == th.int64 assert th.equal( orig_g.edata[dgl.EID][orig_eids], new_g.edge_attributes[dgl.EID], ) if store_inner_edge or debug_mode: assert orig_g.edata["inner_edge"].dtype == th.uint8 assert new_g.edge_attributes["inner_edge"].dtype == th.uint8 assert th.equal( orig_g.edata["inner_edge"], new_g.edge_attributes["inner_edge"], ) if debug_mode: assert orig_g.edata[dgl.ETYPE].dtype == th.int32 assert new_g.edge_attributes[dgl.ETYPE].dtype == th.int8 assert th.equal( orig_g.edata[dgl.ETYPE][orig_eids], new_g.edge_attributes[dgl.ETYPE], ) assert th.equal( orig_g.edata[dgl.ETYPE][orig_eids], new_g.type_per_edge ) for node_type, type_id in new_g.node_type_to_id.items(): assert g.get_ntype_id(node_type) == type_id for edge_type, type_id in new_g.edge_type_to_id.items(): assert g.get_etype_id(_etype_str_to_tuple(edge_type)) == type_id assert new_g.node_type_offset is None def test_not_sorted_node_edge_map(): # Partition configure file which includes not sorted node/edge map. part_config_str = """ { "edge_map": { "item:likes-rev:user": [ [ 0, 100 ], [ 1000, 1500 ] ], "user:follows-rev:user": [ [ 300, 600 ], [ 2100, 2800 ] ], "user:follows:user": [ [ 100, 300 ], [ 1500, 2100 ] ], "user:likes:item": [ [ 600, 1000 ], [ 2800, 3600 ] ] }, "etypes": { "item:likes-rev:user": 0, "user:follows-rev:user": 2, "user:follows:user": 1, "user:likes:item": 3 }, "graph_name": "test_graph", "halo_hops": 1, "node_map": { "user": [ [ 100, 300 ], [ 600, 1000 ] ], "item": [ [ 0, 100 ], [ 300, 600 ] ] }, "ntypes": { "user": 1, "item": 0 }, "num_edges": 3600, "num_nodes": 1000, "num_parts": 2, "part-0": { "edge_feats": "part0/edge_feat.dgl", "node_feats": "part0/node_feat.dgl", "part_graph": "part0/graph.dgl" }, "part-1": { "edge_feats": "part1/edge_feat.dgl", "node_feats": "part1/node_feat.dgl", "part_graph": "part1/graph.dgl" }, "part_method": "metis" } """ with tempfile.TemporaryDirectory() as test_dir: part_config = os.path.join(test_dir, "test_graph.json") with open(part_config, "w") as file: file.write(part_config_str) # Part 0. gpb, _, _, _ = load_partition_book(part_config, 0) assert gpb.local_ntype_offset == [0, 100, 300] assert gpb.local_etype_offset == [0, 100, 300, 600, 1000] # Patr 1. gpb, _, _, _ = load_partition_book(part_config, 1) assert gpb.local_ntype_offset == [0, 300, 700] assert gpb.local_etype_offset == [0, 500, 1100, 1800, 2600] def _get_part_IDs(part_g): # These are partition-local IDs. num_columns = part_g.csc_indptr.diff() part_src_ids = part_g.indices part_dst_ids = th.arange(part_g.total_num_nodes).repeat_interleave( num_columns ) # These are reshuffled global homogeneous IDs. part_src_ids = F.gather_row(part_g.node_attributes[dgl.NID], part_src_ids) part_dst_ids = F.gather_row(part_g.node_attributes[dgl.NID], part_dst_ids) return part_src_ids, part_dst_ids def _verify_orig_edge_IDs_gb( g, orig_nids, orig_eids, part_eids, part_src_ids, part_dst_ids, src_ntype=None, dst_ntype=None, etype=None, ): """ check list: make sure orig edge id are correct after """ if src_ntype is not None and dst_ntype is not None: orig_src_nid = orig_nids[src_ntype] orig_dst_nid = orig_nids[dst_ntype] else: orig_src_nid = orig_nids orig_dst_nid = orig_nids orig_src_ids = F.gather_row(orig_src_nid, part_src_ids) orig_dst_ids = F.gather_row(orig_dst_nid, part_dst_ids) if etype is not None: orig_eids = orig_eids[etype] orig_eids1 = F.gather_row(orig_eids, part_eids) orig_eids2 = g.edge_ids(orig_src_ids, orig_dst_ids, etype=etype) assert len(orig_eids1) == len(orig_eids2) assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2)) def _verify_metadata_gb(gpb, g, num_parts, part_id, part_sizes): """ check list: make sure the number of nodes and edges is correct. make sure the number of parts is correct. make sure the number of nodes and edges in each parts os corrcet. """ assert gpb._num_nodes() == g.num_nodes() assert gpb._num_edges() == g.num_edges() assert gpb.num_partitions() == num_parts gpb_meta = gpb.metadata() assert len(gpb_meta) == num_parts assert len(gpb.partid2nids(part_id)) == gpb_meta[part_id]["num_nodes"] assert len(gpb.partid2eids(part_id)) == gpb_meta[part_id]["num_edges"] part_sizes.append( (gpb_meta[part_id]["num_nodes"], gpb_meta[part_id]["num_edges"]) ) def _verify_local_id_gb(part_g, part_id, gpb): """ check list: make sure the type of local id is correct. make sure local id have a right order. """ nid = F.boolean_mask( part_g.node_attributes[dgl.NID], part_g.node_attributes["inner_node"], ) local_nid = gpb.nid2localnid(nid, part_id) assert F.dtype(local_nid) in (F.int64, F.int32) assert np.all(F.asnumpy(local_nid) == np.arange(0, len(local_nid))) eid = F.boolean_mask( part_g.edge_attributes[dgl.EID], part_g.edge_attributes["inner_edge"], ) local_eid = gpb.eid2localeid(eid, part_id) assert F.dtype(local_eid) in (F.int64, F.int32) assert np.all(np.sort(F.asnumpy(local_eid)) == np.arange(0, len(local_eid))) return local_nid, local_eid def _verify_map_gb( part_g, part_id, gpb, ): """ check list: make sure the map node and its data type is correct. """ # Check the node map. local_nodes = F.boolean_mask( part_g.node_attributes[dgl.NID], part_g.node_attributes["inner_node"], ) inner_node_index = F.nonzero_1d(part_g.node_attributes["inner_node"]) mapping_nodes = gpb.partid2nids(part_id) assert F.dtype(mapping_nodes) in (F.int32, F.int64) assert np.all( np.sort(F.asnumpy(local_nodes)) == np.sort(F.asnumpy(mapping_nodes)) ) assert np.all( F.asnumpy(inner_node_index) == np.arange(len(inner_node_index)) ) # Check the edge map. local_edges = F.boolean_mask( part_g.edge_attributes[dgl.EID], part_g.edge_attributes["inner_edge"], ) inner_edge_index = F.nonzero_1d(part_g.edge_attributes["inner_edge"]) mapping_edges = gpb.partid2eids(part_id) assert F.dtype(mapping_edges) in (F.int32, F.int64) assert np.all( np.sort(F.asnumpy(local_edges)) == np.sort(F.asnumpy(mapping_edges)) ) assert np.all( F.asnumpy(inner_edge_index) == np.arange(len(inner_edge_index)) ) return local_nodes, local_edges def _verify_local_and_map_id_gb( part_g, part_id, gpb, store_inner_node, store_inner_edge, store_eids, ): """ check list: make sure local id are correct. make sure mapping id are correct. """ if store_inner_node and store_inner_edge and store_eids: _verify_local_id_gb(part_g, part_id, gpb) _verify_map_gb(part_g, part_id, gpb) def _verify_orig_IDs_gb( part_g, gpb, g, is_homo=False, part_src_ids=None, part_dst_ids=None, src_ntype_ids=None, dst_ntype_ids=None, orig_nids=None, orig_eids=None, ): """ check list: make sure orig edge id are correct. make sure hetero ntype id are correct. """ part_eids = part_g.edge_attributes[dgl.EID] if is_homo: _verify_orig_edge_IDs_gb( g, orig_nids, orig_eids, part_eids, part_src_ids, part_dst_ids ) local_orig_nids = orig_nids[part_g.node_attributes[dgl.NID]] local_orig_eids = orig_eids[part_g.edge_attributes[dgl.EID]] part_g.node_attributes["feats"] = F.gather_row( g.ndata["feats"], local_orig_nids ) part_g.edge_attributes["feats"] = F.gather_row( g.edata["feats"], local_orig_eids ) else: etype_ids, part_eids = gpb.map_to_per_etype(part_eids) # `IdMap` is in int64 by default. assert etype_ids.dtype == F.int64 # These are original per-type IDs. for etype_id, etype in enumerate(g.canonical_etypes): part_src_ids1 = F.boolean_mask(part_src_ids, etype_ids == etype_id) src_ntype_ids1 = F.boolean_mask( src_ntype_ids, etype_ids == etype_id ) part_dst_ids1 = F.boolean_mask(part_dst_ids, etype_ids == etype_id) dst_ntype_ids1 = F.boolean_mask( dst_ntype_ids, etype_ids == etype_id ) part_eids1 = F.boolean_mask(part_eids, etype_ids == etype_id) assert np.all(F.asnumpy(src_ntype_ids1 == src_ntype_ids1[0])) assert np.all(F.asnumpy(dst_ntype_ids1 == dst_ntype_ids1[0])) src_ntype = g.ntypes[F.as_scalar(src_ntype_ids1[0])] dst_ntype = g.ntypes[F.as_scalar(dst_ntype_ids1[0])] _verify_orig_edge_IDs_gb( g, orig_nids, orig_eids, part_eids1, part_src_ids1, part_dst_ids1, src_ntype, dst_ntype, etype, ) @pytest.mark.parametrize("part_method", ["metis", "random"]) @pytest.mark.parametrize("num_parts", [1, 4]) @pytest.mark.parametrize("store_eids", [True, False]) @pytest.mark.parametrize("store_inner_node", [True, False]) @pytest.mark.parametrize("store_inner_edge", [True, False]) @pytest.mark.parametrize("debug_mode", [True, False]) def test_partition_graph_graphbolt_homo( part_method, num_parts, store_eids, store_inner_node, store_inner_edge, debug_mode, ): reset_envs() if debug_mode: os.environ["DGL_DIST_DEBUG"] = "1" with tempfile.TemporaryDirectory() as test_dir: g = create_random_graph(1000) graph_name = "test" g.ndata["labels"] = F.arange(0, g.num_nodes()) g.ndata["feats"] = F.tensor( np.random.randn(g.num_nodes(), 10), F.float32 ) g.edata["feats"] = F.tensor( np.random.randn(g.num_edges(), 10), F.float32 ) orig_nids, orig_eids = partition_graph( g, graph_name, num_parts, test_dir, part_method=part_method, use_graphbolt=True, store_eids=store_eids, store_inner_node=store_inner_node, store_inner_edge=store_inner_edge, return_mapping=True, ) if debug_mode: store_eids = store_inner_node = store_inner_edge = True _verify_graphbolt_part( g, test_dir, orig_nids, orig_eids, graph_name, num_parts, store_inner_node, store_inner_edge, store_eids, is_homo=True, ) def _verify_constructed_id_gb(part_sizes, gpb): """ verify the part id of each node by constructed nids. check list: make sure each node' part id and its type are corect """ node_map = [] edge_map = [] for part_i, (num_nodes, num_edges) in enumerate(part_sizes): node_map.append(np.ones(num_nodes) * part_i) edge_map.append(np.ones(num_edges) * part_i) node_map = np.concatenate(node_map) edge_map = np.concatenate(edge_map) nid2pid = gpb.nid2partid(F.arange(0, len(node_map))) assert F.dtype(nid2pid) in (F.int32, F.int64) assert np.all(F.asnumpy(nid2pid) == node_map) eid2pid = gpb.eid2partid(F.arange(0, len(edge_map))) assert F.dtype(eid2pid) in (F.int32, F.int64) assert np.all(F.asnumpy(eid2pid) == edge_map) def _verify_shuffled_labels_gb( g, shuffled_labels, shuffled_edata, orig_nids, orig_eids, test_ntype=None, test_etype=None, ): """ check list: make sure node data are correct. make sure edge data are correct. """ shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0)) shuffled_edata = F.asnumpy(F.cat(shuffled_edata, 0)) orig_labels = np.zeros(shuffled_labels.shape, dtype=shuffled_labels.dtype) orig_edata = np.zeros(shuffled_edata.shape, dtype=shuffled_edata.dtype) orig_nid = orig_nids if test_ntype is None else orig_nids[test_ntype] orig_eid = orig_eids if test_etype is None else orig_eids[test_etype] nlabel = ( g.ndata["labels"] if test_ntype is None else g.nodes[test_ntype].data["labels"] ) edata = ( g.edata["feats"] if test_etype is None else g.edges[test_etype].data["labels"] ) orig_labels[F.asnumpy(orig_nid)] = shuffled_labels orig_edata[F.asnumpy(orig_eid)] = shuffled_edata assert np.all(orig_labels == F.asnumpy(nlabel)) assert np.all(orig_edata == F.asnumpy(edata)) def _verify_node_type_ID_gb(part_g, gpb): """ check list: make sure ntype id have correct data type """ part_src_ids, part_dst_ids = _get_part_IDs(part_g) # These are reshuffled per-type IDs. src_ntype_ids, part_src_ids = gpb.map_to_per_ntype(part_src_ids) dst_ntype_ids, part_dst_ids = gpb.map_to_per_ntype(part_dst_ids) # `IdMap` is in int64 by default. assert src_ntype_ids.dtype == F.int64 assert dst_ntype_ids.dtype == F.int64 with pytest.raises(dgl.utils.internal.InconsistentDtypeException): gpb.map_to_per_ntype(F.tensor([0], F.int32)) with pytest.raises(dgl.utils.internal.InconsistentDtypeException): gpb.map_to_per_etype(F.tensor([0], F.int32)) return ( part_src_ids, part_dst_ids, src_ntype_ids, part_src_ids, dst_ntype_ids, ) def _verify_IDs_gb( g, part_g, part_id, gpb, part_sizes, orig_nids, orig_eids, store_inner_node, store_inner_edge, store_eids, is_homo, ): # verify local id and mapping id _verify_local_and_map_id_gb( part_g, part_id, gpb, store_inner_node, store_inner_edge, store_eids, ) # Verify the mapping between the reshuffled IDs and the original IDs. ( part_src_ids, part_dst_ids, src_ntype_ids, part_src_ids, dst_ntype_ids, ) = _verify_node_type_ID_gb(part_g, gpb) if store_eids: _verify_orig_IDs_gb( part_g, gpb, g, part_src_ids=part_src_ids, part_dst_ids=part_dst_ids, src_ntype_ids=src_ntype_ids, dst_ntype_ids=dst_ntype_ids, orig_nids=orig_nids, orig_eids=orig_eids, is_homo=is_homo, ) _verify_constructed_id_gb(part_sizes, gpb) def _collect_data_gb( parts, part_g, gpbs, gpb, tot_node_feats, node_feats, tot_edge_feats, edge_feats, shuffled_labels, shuffled_edata, test_ntype, test_etype, ): if test_ntype != None: shuffled_labels.append(node_feats[test_ntype + "/labels"]) shuffled_edata.append( edge_feats[_etype_tuple_to_str(test_etype) + "/labels"] ) else: shuffled_labels.append(node_feats["_N/labels"]) shuffled_edata.append(edge_feats["_N:_E:_N/feats"]) parts.append(part_g) gpbs.append(gpb) tot_node_feats.append(node_feats) tot_edge_feats.append(edge_feats) def _verify_graphbolt_part( g, test_dir, orig_nids, orig_eids, graph_name, num_parts, store_inner_node, store_inner_edge, store_eids, test_ntype=None, test_etype=None, is_homo=False, ): """ check list: _verify_metadata_gb: data type, ID's order and ID's number of edges and nodes _verify_IDs_gb: local id, mapping id,node type id, orig edge, hetero ntype id verify_graph_feats_gb: nodes and edges' feats _verify_graphbolt_attributes: arguments """ parts = [] tot_node_feats = [] tot_edge_feats = [] shuffled_labels = [] shuffled_edata = [] part_sizes = [] gpbs = [] part_config = os.path.join(test_dir, f"{graph_name}.json") # test each part for part_id in range(num_parts): part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition( part_config, part_id, load_feats=True, use_graphbolt=True ) # verify metadata _verify_metadata_gb( gpb, g, num_parts, part_id, part_sizes, ) # verify eid and nid _verify_IDs_gb( g, part_g, part_id, gpb, part_sizes, orig_nids, orig_eids, store_inner_node, store_inner_edge, store_eids, is_homo, ) # collect shuffled data and parts _collect_data_gb( parts, part_g, gpbs, gpb, tot_node_feats, node_feats, tot_edge_feats, edge_feats, shuffled_labels, shuffled_edata, test_ntype, test_etype, ) # verify graph feats verify_graph_feats_gb( g, gpbs, parts, tot_node_feats, tot_edge_feats, orig_nids, orig_eids, shuffled_labels=shuffled_labels, shuffled_edata=shuffled_edata, test_ntype=test_ntype, test_etype=test_etype, store_inner_node=store_inner_node, store_inner_edge=store_inner_edge, store_eids=store_eids, is_homo=is_homo, ) _verify_graphbolt_attributes( parts, store_inner_node, store_inner_edge, store_eids ) return parts def _verify_original_IDs_type_hetero(hg, orig_nids, orig_eids): """ check list: make sure type of nodes and edges' ids are correct. make sure nodes and edges' number in each type is correct. """ assert len(orig_nids) == len(hg.ntypes) assert len(orig_eids) == len(hg.canonical_etypes) for ntype in hg.ntypes: assert len(orig_nids[ntype]) == hg.num_nodes(ntype) assert F.dtype(orig_nids[ntype]) in (F.int64, F.int32) for etype in hg.canonical_etypes: assert len(orig_eids[etype]) == hg.num_edges(etype) assert F.dtype(orig_eids[etype]) in (F.int64, F.int32) @pytest.mark.parametrize("part_method", ["metis", "random"]) @pytest.mark.parametrize("num_parts", [1, 4]) @pytest.mark.parametrize("store_eids", [True, False]) @pytest.mark.parametrize("store_inner_node", [True, False]) @pytest.mark.parametrize("store_inner_edge", [True, False]) @pytest.mark.parametrize("debug_mode", [True, False]) def test_partition_graph_graphbolt_hetero( part_method, num_parts, store_eids, store_inner_node, store_inner_edge, debug_mode, n_jobs=1, ): test_ntype = "n1" test_etype = ("n1", "r1", "n2") reset_envs() if debug_mode: os.environ["DGL_DIST_DEBUG"] = "1" with tempfile.TemporaryDirectory() as test_dir: hg = create_random_hetero() graph_name = "test" hg.nodes[test_ntype].data["labels"] = F.arange( 0, hg.num_nodes(test_ntype) ) hg.nodes[test_ntype].data["feats"] = F.tensor( np.random.randn(hg.num_nodes(test_ntype), 10), F.float32 ) hg.edges[test_etype].data["feats"] = F.tensor( np.random.randn(hg.num_edges(test_etype), 10), F.float32 ) hg.edges[test_etype].data["labels"] = F.arange( 0, hg.num_edges(test_etype) ) orig_nids, orig_eids = partition_graph( hg, graph_name, num_parts, test_dir, part_method=part_method, return_mapping=True, num_trainers_per_machine=1, use_graphbolt=True, store_eids=store_eids, store_inner_node=store_inner_node, store_inner_edge=store_inner_edge, n_jobs=n_jobs, ) _verify_original_IDs_type_hetero(hg, orig_nids, orig_eids) if debug_mode: store_eids = store_inner_node = store_inner_edge = True parts = _verify_graphbolt_part( hg, test_dir, orig_nids, orig_eids, graph_name, num_parts, store_inner_node, store_inner_edge, store_eids, test_ntype, test_etype, is_homo=False, ) _verify_hetero_graph( hg, parts, store_eids=store_eids, store_inner_edge=store_inner_edge, debug_mode=debug_mode, ) @pytest.mark.parametrize("part_method", ["metis", "random"]) @pytest.mark.parametrize("num_parts", [1, 4]) @pytest.mark.parametrize("graph_formats", [["csc"], ["coo"], ["coo", "csc"]]) def test_partition_graph_graphbolt_homo_find_edges( part_method, num_parts, graph_formats, n_jobs=1, ): reset_envs() os.environ["DGL_DIST_DEBUG"] = "1" with tempfile.TemporaryDirectory() as test_dir: g = create_random_graph(1000) g.ndata["feat"] = th.rand(g.num_nodes(), 5) graph_name = "test" orig_nids, orig_eids = partition_graph( g, graph_name, num_parts, test_dir, part_method=part_method, graph_formats=graph_formats, return_mapping=True, use_graphbolt=True, store_eids=True, store_inner_node=True, store_inner_edge=True, n_jobs=n_jobs, ) part_config = os.path.join(test_dir, f"{graph_name}.json") for part_id in range(num_parts): local_g, _, _, gpb, _, _, _ = load_partition( part_config, part_id, load_feats=False, use_graphbolt=True ) inner_local_eids = th.nonzero( local_g.edge_attributes["inner_edge"], as_tuple=False ).squeeze() inner_global_eids = local_g.edge_attributes[dgl.EID][ inner_local_eids ] if "coo" not in graph_formats: with pytest.raises( ValueError, match="The edge attributes DGL2GB_EID and GB_DST_ID are " "not found. Please make sure `coo` format is available" " when generating partitions in GraphBolt format.", ): dgl.distributed.graph_services._find_edges( local_g, gpb, inner_global_eids ) continue global_src, global_dst = dgl.distributed.graph_services._find_edges( local_g, gpb, inner_global_eids ) orig_global_src = orig_nids[global_src] orig_global_dst = orig_nids[global_dst] assert th.all(g.has_edges_between(orig_global_src, orig_global_dst)) # dtype check. assert ( local_g.edge_attributes[dgl.distributed.DGL2GB_EID].dtype == th.int32 ) assert ( local_g.edge_attributes[dgl.distributed.GB_DST_ID].dtype == th.int32 ) # No need to map local node IDs. inner_local_nids = th.nonzero( local_g.node_attributes["inner_node"], as_tuple=False ).squeeze() inner_global_nids = local_g.node_attributes[dgl.NID][ inner_local_nids ] assert th.equal( inner_local_nids, gpb.nid2localnid(inner_global_nids, part_id) ) # Need to map local edge IDs. DGL_inner_local_eids = gpb.eid2localeid(inner_global_eids, part_id) GB_inner_local_eids = local_g.edge_attributes[ dgl.distributed.DGL2GB_EID ][DGL_inner_local_eids] assert th.equal(inner_local_eids, GB_inner_local_eids) @pytest.mark.parametrize("part_method", ["metis", "random"]) @pytest.mark.parametrize("num_parts", [1, 4]) @pytest.mark.parametrize("graph_formats", [["csc"], ["coo"], ["coo", "csc"]]) def test_partition_graph_graphbolt_hetero_find_edges( part_method, num_parts, graph_formats, n_jobs=1, ): reset_envs() os.environ["DGL_DIST_DEBUG"] = "1" with tempfile.TemporaryDirectory() as test_dir: hg = create_random_hetero() graph_name = "test" orig_nids, orig_eids = partition_graph( hg, graph_name, num_parts, test_dir, part_method=part_method, graph_formats=graph_formats, return_mapping=True, use_graphbolt=True, store_eids=True, store_inner_node=True, store_inner_edge=True, n_jobs=n_jobs, ) part_config = os.path.join(test_dir, f"{graph_name}.json") for part_id in range(num_parts): local_g, _, _, gpb, _, _, _ = load_partition( part_config, part_id, load_feats=False, use_graphbolt=True ) inner_local_eids = th.nonzero( local_g.edge_attributes["inner_edge"], as_tuple=False ).squeeze() inner_global_eids = local_g.edge_attributes[dgl.EID][ inner_local_eids ] if "coo" not in graph_formats: with pytest.raises( ValueError, match="The edge attributes DGL2GB_EID and GB_DST_ID are " "not found. Please make sure `coo` format is available" " when generating partitions in GraphBolt format.", ): dgl.distributed.graph_services._find_edges( local_g, gpb, inner_global_eids ) continue global_src, global_dst = dgl.distributed.graph_services._find_edges( local_g, gpb, inner_global_eids ) ntype_ids_src, per_ntype_nids_src = gpb.map_to_per_ntype(global_src) ntype_ids_dst, per_ntype_nids_dst = gpb.map_to_per_ntype(global_dst) etype_ids, per_etype_eids = gpb.map_to_per_etype(inner_global_eids) for src_ntype, etype, dst_ntype in hg.canonical_etypes: etype_id = hg.get_etype_id((src_ntype, etype, dst_ntype)) current_etype_indices = th.nonzero( etype_ids == etype_id, as_tuple=False ).squeeze() assert th.all( ntype_ids_src[current_etype_indices] == gpb.ntypes.index(src_ntype) ) assert th.all( ntype_ids_dst[current_etype_indices] == gpb.ntypes.index(dst_ntype) ) current_per_ntype_nids_src = per_ntype_nids_src[ current_etype_indices ] current_per_ntype_nids_dst = per_ntype_nids_dst[ current_etype_indices ] current_orig_global_src = orig_nids[src_ntype][ current_per_ntype_nids_src ] current_orig_global_dst = orig_nids[dst_ntype][ current_per_ntype_nids_dst ] assert th.all( hg.has_edges_between( current_orig_global_src, current_orig_global_dst, etype=(src_ntype, etype, dst_ntype), ) ) current_orig_global_eids = orig_eids[ (src_ntype, etype, dst_ntype) ][per_etype_eids[current_etype_indices]] orig_src_ids, orig_dst_ids = hg.find_edges( current_orig_global_eids, etype=(src_ntype, etype, dst_ntype), ) assert th.equal(current_orig_global_src, orig_src_ids) assert th.equal(current_orig_global_dst, orig_dst_ids) # dtype check. assert ( local_g.edge_attributes[dgl.distributed.DGL2GB_EID].dtype == th.int32 ) assert ( local_g.edge_attributes[dgl.distributed.GB_DST_ID].dtype == th.int32 ) # No need to map local node IDs. inner_local_nids = th.nonzero( local_g.node_attributes["inner_node"], as_tuple=False ).squeeze() inner_global_nids = local_g.node_attributes[dgl.NID][ inner_local_nids ] assert th.equal( inner_local_nids, gpb.nid2localnid(inner_global_nids, part_id) ) # Need to map local edge IDs. DGL_inner_local_eids = gpb.eid2localeid(inner_global_eids, part_id) GB_inner_local_eids = local_g.edge_attributes[ dgl.distributed.DGL2GB_EID ][DGL_inner_local_eids] assert th.equal(inner_local_eids, GB_inner_local_eids) @pytest.mark.parametrize("num_parts", [1, 4]) def test_partition_graph_graphbolt_hetero_multi( num_parts, ): reset_envs() test_partition_graph_graphbolt_hetero( part_method="random", num_parts=num_parts, n_jobs=4, store_eids=True, store_inner_node=True, store_inner_edge=True, debug_mode=False, ) @pytest.mark.parametrize("num_parts", [1, 4]) def test_partition_graph_graphbolt_homo_find_edges_multi( num_parts, ): test_partition_graph_graphbolt_homo_find_edges( part_method="random", num_parts=num_parts, graph_formats="coo", n_jobs=4, ) @pytest.mark.parametrize("num_parts", [1, 4]) def test_partition_graph_graphbolt_hetero_find_edges_multi( num_parts, ): test_partition_graph_graphbolt_hetero_find_edges( part_method="random", num_parts=num_parts, graph_formats="coo", n_jobs=4, ) @pytest.mark.parametrize("part_method", ["metis", "random"]) @pytest.mark.parametrize("num_parts", [4]) @pytest.mark.parametrize("num_trainers_per_machine", [1]) @pytest.mark.parametrize("graph_formats", [None]) def test_partition_hetero_few_edges( part_method, num_parts, num_trainers_per_machine, graph_formats, ): os.environ["DGL_DIST_DEBUG"] = "1" if part_method == "random" and num_parts > 1: num_trainers_per_machine = 1 # Create a heterograph with 2 edges for one edge type. hg = create_random_hetero() edges_coo = { c_etype: hg.edges(etype=c_etype) for c_etype in hg.canonical_etypes } edges_coo[("n1", "a0", "n2")] = (th.tensor([0, 1]), th.tensor([1, 0])) edges_coo[("n1", "a1", "n3")] = (th.tensor([0, 1]), th.tensor([1, 0])) hg = dgl.heterograph(edges_coo) check_hetero_partition( hg, part_method, num_parts, num_trainers_per_machine, load_feats=False, graph_formats=graph_formats, ) reset_envs() @pytest.mark.parametrize("part_method", ["metis", "random"]) @pytest.mark.parametrize("num_parts", [4]) @pytest.mark.parametrize("num_trainers_per_machine", [1]) @pytest.mark.parametrize("graph_formats", [None]) def test_partition_hetero_few_nodes( part_method, num_parts, num_trainers_per_machine, graph_formats, ): os.environ["DGL_DIST_DEBUG"] = "1" if part_method == "random" and num_parts > 1: num_trainers_per_machine = 1 # Create a heterograph with 2 nodes for one node type. hg = create_random_hetero() edges_coo = { c_etype: hg.edges(etype=c_etype) for c_etype in hg.canonical_etypes } edges_coo[("n1", "r_few", "n_few")] = (th.tensor([0, 1]), th.tensor([1, 0])) edges_coo[("a0", "a01", "n_1")] = (th.tensor([0, 1]), th.tensor([1, 0])) hg = dgl.heterograph(edges_coo) expected_exception = False try: check_hetero_partition( hg, part_method, num_parts, num_trainers_per_machine, load_feats=False, graph_formats=graph_formats, ) except Exception as e: expected_exception = True assert expected_exception == (part_method == "metis") reset_envs() ================================================ FILE: tests/distributed/test_rpc.py ================================================ import multiprocessing as mp import os import socket import time import unittest import backend as F import dgl import pytest from numpy.testing import assert_array_equal from utils import generate_ip_config, reset_envs if os.name != "nt": import fcntl import struct INTEGER = 2 STR = "hello world!" HELLO_SERVICE_ID = 901231 TENSOR = F.zeros((1000, 1000), F.int64, F.cpu()) def foo(x, y): assert x == 123 assert y == "abc" class MyRequest(dgl.distributed.Request): def __init__(self): self.x = 123 self.y = "abc" self.z = F.randn((3, 4)) self.foo = foo def __getstate__(self): return self.x, self.y, self.z, self.foo def __setstate__(self, state): self.x, self.y, self.z, self.foo = state def process_request(self, server_state): pass class MyResponse(dgl.distributed.Response): def __init__(self): self.x = 432 def __getstate__(self): return self.x def __setstate__(self, state): self.x = state def simple_func(tensor): return tensor class HelloResponse(dgl.distributed.Response): def __init__(self, hello_str, integer, tensor): self.hello_str = hello_str self.integer = integer self.tensor = tensor def __getstate__(self): return self.hello_str, self.integer, self.tensor def __setstate__(self, state): self.hello_str, self.integer, self.tensor = state class HelloRequest(dgl.distributed.Request): def __init__(self, hello_str, integer, tensor, func): self.hello_str = hello_str self.integer = integer self.tensor = tensor self.func = func def __getstate__(self): return self.hello_str, self.integer, self.tensor, self.func def __setstate__(self, state): self.hello_str, self.integer, self.tensor, self.func = state def process_request(self, server_state): assert self.hello_str == STR assert self.integer == INTEGER new_tensor = self.func(self.tensor) res = HelloResponse(self.hello_str, self.integer, new_tensor) return res TIMEOUT_SERVICE_ID = 123456789 TIMEOUT_META = "timeout_test" class TimeoutResponse(dgl.distributed.Response): def __init__(self, meta): self.meta = meta def __getstate__(self): return self.meta def __setstate__(self, state): self.meta = state class TimeoutRequest(dgl.distributed.Request): def __init__(self, meta, timeout, response=True): self.meta = meta self.timeout = timeout self.response = response def __getstate__(self): return self.meta, self.timeout, self.response def __setstate__(self, state): self.meta, self.timeout, self.response = state def process_request(self, server_state): assert self.meta == TIMEOUT_META # convert from milliseconds to seconds time.sleep(self.timeout / 1000) if not self.response: return None res = TimeoutResponse(self.meta) return res def start_server( num_clients, ip_config, server_id=0, num_servers=1, ): print("Sleep 1 seconds to test client re-connect.") time.sleep(1) server_state = dgl.distributed.ServerState( None, local_g=None, partition_book=None ) dgl.distributed.register_service( HELLO_SERVICE_ID, HelloRequest, HelloResponse ) dgl.distributed.register_service( TIMEOUT_SERVICE_ID, TimeoutRequest, TimeoutResponse ) print("Start server {}".format(server_id)) dgl.distributed.start_server( server_id=server_id, ip_config=ip_config, num_servers=num_servers, num_clients=num_clients, server_state=server_state, ) def start_client(ip_config, group_id=0, num_servers=1): dgl.distributed.register_service( HELLO_SERVICE_ID, HelloRequest, HelloResponse ) dgl.distributed.connect_to_server( ip_config=ip_config, num_servers=num_servers, group_id=group_id, ) req = HelloRequest(STR, INTEGER, TENSOR, simple_func) # test send and recv dgl.distributed.send_request(0, req) res = dgl.distributed.recv_response() assert res.hello_str == STR assert res.integer == INTEGER assert_array_equal(F.asnumpy(res.tensor), F.asnumpy(TENSOR)) # test remote_call target_and_requests = [] for i in range(10): target_and_requests.append((0, req)) res_list = dgl.distributed.remote_call(target_and_requests) for res in res_list: assert res.hello_str == STR assert res.integer == INTEGER assert_array_equal(F.asnumpy(res.tensor), F.asnumpy(TENSOR)) # test send_request_to_machine dgl.distributed.send_request_to_machine(0, req) res = dgl.distributed.recv_response() assert res.hello_str == STR assert res.integer == INTEGER assert_array_equal(F.asnumpy(res.tensor), F.asnumpy(TENSOR)) # test remote_call_to_machine target_and_requests = [] for i in range(10): target_and_requests.append((0, req)) res_list = dgl.distributed.remote_call_to_machine(target_and_requests) for res in res_list: assert res.hello_str == STR assert res.integer == INTEGER assert_array_equal(F.asnumpy(res.tensor), F.asnumpy(TENSOR)) def start_client_timeout(ip_config, group_id=0, num_servers=1): dgl.distributed.register_service( TIMEOUT_SERVICE_ID, TimeoutRequest, TimeoutResponse ) dgl.distributed.connect_to_server( ip_config=ip_config, num_servers=num_servers, group_id=group_id, ) timeout = 1 * 1000 # milliseconds req = TimeoutRequest(TIMEOUT_META, timeout) # test send and recv dgl.distributed.send_request(0, req) res = dgl.distributed.recv_response(timeout=int(timeout / 2)) assert res is None res = dgl.distributed.recv_response() assert res.meta == TIMEOUT_META # test remote_call req = TimeoutRequest(TIMEOUT_META, timeout, response=False) target_and_requests = [] for i in range(3): target_and_requests.append((0, req)) expect_except = False try: res_list = dgl.distributed.remote_call( target_and_requests, timeout=int(timeout / 2) ) except dgl.DGLError: expect_except = True assert expect_except # test send_request_to_machine req = TimeoutRequest(TIMEOUT_META, timeout) dgl.distributed.send_request_to_machine(0, req) res = dgl.distributed.recv_response(timeout=int(timeout / 2)) assert res is None res = dgl.distributed.recv_response() assert res.meta == TIMEOUT_META # test remote_call_to_machine req = TimeoutRequest(TIMEOUT_META, timeout, response=False) target_and_requests = [] for i in range(3): target_and_requests.append((0, req)) expect_except = False try: res_list = dgl.distributed.remote_call_to_machine( target_and_requests, timeout=int(timeout / 2) ) except dgl.DGLError: expect_except = True assert expect_except @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") def test_rpc_timeout(): reset_envs() os.environ["DGL_DIST_MODE"] = "distributed" ip_config = "rpc_ip_config.txt" generate_ip_config(ip_config, 1, 1) ctx = mp.get_context("spawn") pserver = ctx.Process(target=start_server, args=(1, ip_config, 0, 1)) pclient = ctx.Process(target=start_client_timeout, args=(ip_config, 0, 1)) pserver.start() pclient.start() pserver.join() pclient.join() def test_serialize(): reset_envs() os.environ["DGL_DIST_MODE"] = "distributed" from dgl.distributed.rpc import ( deserialize_from_payload, serialize_to_payload, ) SERVICE_ID = 12345 dgl.distributed.register_service(SERVICE_ID, MyRequest, MyResponse) req = MyRequest() data, tensors = serialize_to_payload(req) req1 = deserialize_from_payload(MyRequest, data, tensors) req1.foo(req1.x, req1.y) assert req.x == req1.x assert req.y == req1.y assert F.array_equal(req.z, req1.z) res = MyResponse() data, tensors = serialize_to_payload(res) res1 = deserialize_from_payload(MyResponse, data, tensors) assert res.x == res1.x def test_rpc_msg(): reset_envs() os.environ["DGL_DIST_MODE"] = "distributed" from dgl.distributed.rpc import ( deserialize_from_payload, RPCMessage, serialize_to_payload, ) SERVICE_ID = 32452 dgl.distributed.register_service(SERVICE_ID, MyRequest, MyResponse) req = MyRequest() data, tensors = serialize_to_payload(req) rpcmsg = RPCMessage(SERVICE_ID, 23, 0, 1, data, tensors) assert rpcmsg.service_id == SERVICE_ID assert rpcmsg.msg_seq == 23 assert rpcmsg.client_id == 0 assert rpcmsg.server_id == 1 assert len(rpcmsg.data) == len(data) assert len(rpcmsg.tensors) == 1 assert F.array_equal(rpcmsg.tensors[0], req.z) @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") def test_multi_client(): reset_envs() os.environ["DGL_DIST_MODE"] = "distributed" ip_config = "rpc_ip_config_mul_client.txt" generate_ip_config(ip_config, 1, 1) ctx = mp.get_context("spawn") num_clients = 20 pserver = ctx.Process( target=start_server, args=(num_clients, ip_config, 0, 1), ) pclient_list = [] for i in range(num_clients): pclient = ctx.Process(target=start_client, args=(ip_config, 0, 1)) pclient_list.append(pclient) pserver.start() for i in range(num_clients): pclient_list[i].start() for i in range(num_clients): pclient_list[i].join() pserver.join() @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") def test_multi_thread_rpc(): reset_envs() os.environ["DGL_DIST_MODE"] = "distributed" num_servers = 2 ip_config = "rpc_ip_config_multithread.txt" generate_ip_config(ip_config, num_servers, num_servers) ctx = mp.get_context("spawn") pserver_list = [] for i in range(num_servers): pserver = ctx.Process(target=start_server, args=(1, ip_config, i, 1)) pserver.start() pserver_list.append(pserver) def start_client_multithread(ip_config): import threading dgl.distributed.connect_to_server( ip_config=ip_config, num_servers=1, ) dgl.distributed.register_service( HELLO_SERVICE_ID, HelloRequest, HelloResponse ) req = HelloRequest(STR, INTEGER, TENSOR, simple_func) dgl.distributed.send_request(0, req) def subthread_call(server_id): req = HelloRequest(STR, INTEGER, TENSOR, simple_func) dgl.distributed.send_request(server_id, req) subthread = threading.Thread(target=subthread_call, args=(1,)) subthread.start() subthread.join() res0 = dgl.distributed.recv_response() res1 = dgl.distributed.recv_response() # Order is not guaranteed assert_array_equal(F.asnumpy(res0.tensor), F.asnumpy(TENSOR)) assert_array_equal(F.asnumpy(res1.tensor), F.asnumpy(TENSOR)) dgl.distributed.exit_client() start_client_multithread(ip_config) pserver.join() @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") def test_multi_client_connect(): reset_envs() os.environ["DGL_DIST_MODE"] = "distributed" ip_config = "rpc_ip_config_mul_client.txt" generate_ip_config(ip_config, 1, 1) ctx = mp.get_context("spawn") num_clients = 1 pserver = ctx.Process( target=start_server, args=(num_clients, ip_config, 0, 1), ) # small max try times os.environ["DGL_DIST_MAX_TRY_TIMES"] = "1" expect_except = False try: start_client(ip_config, 0, 1) except dgl.distributed.DistConnectError as err: print("Expected error: {}".format(err)) expect_except = True assert expect_except # large max try times os.environ["DGL_DIST_MAX_TRY_TIMES"] = "1024" pclient = ctx.Process(target=start_client, args=(ip_config, 0, 1)) pclient.start() pserver.start() pclient.join() pserver.join() reset_envs() if __name__ == "__main__": test_serialize() test_rpc_msg() test_multi_client("socket") test_multi_client("tesnsorpipe") test_multi_thread_rpc() test_multi_client_connect("socket") ================================================ FILE: tests/distributed/utils.py ================================================ import os import random import socket import dgl import numpy as np import scipy.sparse as spsp def generate_ip_config(file_name, num_machines, num_servers): """Get local IP and available ports, writes to file.""" # get available IP in localhost sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) try: # doesn't even have to be reachable sock.connect(("10.255.255.255", 1)) ip = sock.getsockname()[0] except ValueError: ip = "127.0.0.1" finally: sock.close() # scan available PORT ports = [] sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) start = random.randint(10000, 30000) for port in range(start, 65535): try: sock.connect((ip, port)) ports = [] except: ports.append(port) if len(ports) == num_machines * num_servers: break sock.close() if len(ports) < num_machines * num_servers: raise RuntimeError( "Failed to get available IP/PORT with required numbers." ) with open(file_name, "w") as f: for i in range(num_machines): f.write("{} {}\n".format(ip, ports[i * num_servers])) def reset_envs(): """Reset common environment variable which are set in tests.""" for key in [ "DGL_ROLE", "DGL_NUM_SAMPLER", "DGL_NUM_SERVER", "DGL_DIST_MODE", "DGL_NUM_CLIENT", "DGL_DIST_MAX_TRY_TIMES", "DGL_DIST_DEBUG", ]: if key in os.environ: os.environ.pop(key) def create_random_graph(n): return dgl.rand_graph(n, int(n * n * 0.001)) ================================================ FILE: tests/examples/test_sampling_examples.py ================================================ import os import subprocess import sys import unittest EXAMPLE_ROOT = os.path.join( os.path.dirname(os.path.relpath(__file__)), "..", "..", "examples", "graphbolt", "quickstart", ) def test_node_classification(): script = os.path.join(EXAMPLE_ROOT, "node_classification.py") out = subprocess.run(["python", str(script)], capture_output=True) assert ( out.returncode == 0 ), f"stdout: {out.stdout.decode('utf-8')}\nstderr: {out.stderr.decode('utf-8')}" stdout = out.stdout.decode("utf-8") assert float(stdout[-5:]) > 0.59 def test_link_prediction(): script = os.path.join(EXAMPLE_ROOT, "link_prediction.py") out = subprocess.run(["python", str(script)], capture_output=True) assert ( out.returncode == 0 ), f"stdout: {out.stdout.decode('utf-8')}\nstderr: {out.stderr.decode('utf-8')}" stdout = out.stdout.decode("utf-8") assert float(stdout[-5:]) > 0.80 ================================================ FILE: tests/examples/test_sparse_examples.py ================================================ import os import subprocess import sys EXAMPLE_ROOT = os.path.join( os.path.dirname(os.path.relpath(__file__)), "..", "..", "examples", "sparse", ) def test_gcn(): script = os.path.join(EXAMPLE_ROOT, "gcn.py") out = subprocess.run(["python", str(script)], capture_output=True) assert ( out.returncode == 0 ), f"stdout: {out.stdout.decode('utf-8')}\nstderr: {out.stderr.decode('utf-8')}" stdout = out.stdout.decode("utf-8") assert float(stdout[-5:]) > 0.75 def test_gcnii(): script = os.path.join(EXAMPLE_ROOT, "gcnii.py") out = subprocess.run(["python", str(script)], capture_output=True) assert ( out.returncode == 0 ), f"stdout: {out.stdout.decode('utf-8')}\nstderr: {out.stderr.decode('utf-8')}" stdout = out.stdout.decode("utf-8") assert float(stdout[-5:]) > 0.75 def test_appnp(): script = os.path.join(EXAMPLE_ROOT, "appnp.py") out = subprocess.run(["python", str(script)], capture_output=True) assert ( out.returncode == 0 ), f"stdout: {out.stdout.decode('utf-8')}\nstderr: {out.stderr.decode('utf-8')}" stdout = out.stdout.decode("utf-8") assert float(stdout[-5:]) > 0.75 def test_c_and_s(): script = os.path.join(EXAMPLE_ROOT, "c_and_s.py") out = subprocess.run(["python", str(script)], capture_output=True) assert ( out.returncode == 0 ), f"stdout: {out.stdout.decode('utf-8')}\nstderr: {out.stderr.decode('utf-8')}" stdout = out.stdout.decode("utf-8") assert float(stdout[-5:]) > 0.7 def test_gat(): script = os.path.join(EXAMPLE_ROOT, "gat.py") out = subprocess.run(["python", str(script)], capture_output=True) assert ( out.returncode == 0 ), f"stdout: {out.stdout.decode('utf-8')}\nstderr: {out.stderr.decode('utf-8')}" stdout = out.stdout.decode("utf-8") assert float(stdout[-5:]) > 0.7 def test_hgnn(): script = os.path.join(EXAMPLE_ROOT, "hgnn.py") out = subprocess.run(["python", str(script)], capture_output=True) assert ( out.returncode == 0 ), f"stdout: {out.stdout.decode('utf-8')}\nstderr: {out.stderr.decode('utf-8')}" stdout = out.stdout.decode("utf-8") assert float(stdout[-5:]) >= 0.65 def test_hypergraphatt(): script = os.path.join(EXAMPLE_ROOT, "hypergraphatt.py") out = subprocess.run( ["python", str(script), "--epochs=10"], capture_output=True ) assert ( out.returncode == 0 ), f"stdout: {out.stdout.decode('utf-8')}\nstderr: {out.stderr.decode('utf-8')}" def test_sgc(): script = os.path.join(EXAMPLE_ROOT, "sgc.py") out = subprocess.run(["python", str(script)], capture_output=True) assert ( out.returncode == 0 ), f"stdout: {out.stdout.decode('utf-8')}\nstderr: {out.stderr.decode('utf-8')}" stdout = out.stdout.decode("utf-8") assert float(stdout[-5:]) > 0.7 def _test_flaky(test_fn, max_num_success=8, num_tries=10): num_success = 0 for i in range(num_tries): try: test_fn() num_success += 1 except AssertionError: pass # If it succeeds max_num_success / num_tries of the time. if num_tries * num_success >= max_num_success * (i + 1): return # Early failure if required success rate is impossible now. num_failure = i + 1 - num_success assert num_failure <= num_tries - max_num_success def _test_sign(): script = os.path.join(EXAMPLE_ROOT, "sign.py") out = subprocess.run(["python", str(script)], capture_output=True) assert ( out.returncode == 0 ), f"stdout: {out.stdout.decode('utf-8')}\nstderr: {out.stderr.decode('utf-8')}" stdout = out.stdout.decode("utf-8") assert float(stdout[-5:]) > 0.7 def test_sign(): _test_flaky(_test_sign) def test_twirls(): script = os.path.join(EXAMPLE_ROOT, "twirls.py") out = subprocess.run(["python", str(script)], capture_output=True) assert ( out.returncode == 0 ), f"stdout: {out.stdout.decode('utf-8')}\nstderr: {out.stderr.decode('utf-8')}" stdout = out.stdout.decode("utf-8") assert float(stdout[-5:]) > 0.7 out = subprocess.run( ["python", str(script), "--attention"], capture_output=True ) assert ( out.returncode == 0 ), f"stdout: {out.stdout.decode('utf-8')}\nstderr: {out.stderr.decode('utf-8')}" stdout = out.stdout.decode("utf-8") assert float(stdout[-5:]) > 0.65 ================================================ FILE: tests/go/test_model.py ================================================ import dgl import pytest import torch from utils.graph_cases import get_cases from dglgo.model import * @pytest.mark.parametrize("g", get_cases(["has_scalar_e_feature"])) def test_gcn(g): data_info = {"num_nodes": g.num_nodes(), "out_size": 7} node_feat = None edge_feat = g.edata["scalar_w"] # node embedding + not use_edge_weight model = GCN(data_info, embed_size=10, use_edge_weight=False) model(g, node_feat) # node embedding + use_edge_weight model = GCN(data_info, embed_size=10, use_edge_weight=True) model(g, node_feat, edge_feat) data_info["in_size"] = g.ndata["h"].shape[-1] node_feat = g.ndata["h"] # node feat + not use_edge_weight model = GCN(data_info, embed_size=-1, use_edge_weight=False) model(g, node_feat) # node feat + use_edge_weight model = GCN(data_info, embed_size=-1, use_edge_weight=True) model(g, node_feat, edge_feat) @pytest.mark.parametrize("g", get_cases(["block-bipartite"])) def test_gcn_block(g): data_info = {"in_size": 10, "out_size": 7} blocks = [g] node_feat = torch.randn(g.num_src_nodes(), data_info["in_size"]) edge_feat = torch.abs(torch.randn(g.num_edges())) # not use_edge_weight model = GCN(data_info, use_edge_weight=False) model.forward_block(blocks, node_feat) # use_edge_weight model = GCN(data_info, use_edge_weight=True) model.forward_block(blocks, node_feat, edge_feat) @pytest.mark.parametrize("g", get_cases(["has_scalar_e_feature"])) def test_gat(g): data_info = {"num_nodes": g.num_nodes(), "out_size": 7} node_feat = None # node embedding model = GAT(data_info, embed_size=10) model(g, node_feat) # node feat data_info["in_size"] = g.ndata["h"].shape[-1] node_feat = g.ndata["h"] model = GAT(data_info, embed_size=-1) model(g, node_feat) @pytest.mark.parametrize("g", get_cases(["block-bipartite"])) def test_gat_block(g): data_info = {"in_size": 10, "out_size": 7} blocks = [g] node_feat = torch.randn(g.num_src_nodes(), data_info["in_size"]) model = GAT(data_info, num_layers=1, heads=[8]) model.forward_block(blocks, node_feat) @pytest.mark.parametrize("g", get_cases(["has_scalar_e_feature"])) def test_gin(g): data_info = {"num_nodes": g.num_nodes(), "out_size": 7} node_feat = None # node embedding model = GIN(data_info, embed_size=10) model(g, node_feat) # node feat data_info["in_size"] = g.ndata["h"].shape[-1] node_feat = g.ndata["h"] model = GIN(data_info, embed_size=-1) model(g, node_feat) @pytest.mark.parametrize("g", get_cases(["has_scalar_e_feature"])) def test_sage(g): data_info = {"num_nodes": g.num_nodes(), "out_size": 7} node_feat = None edge_feat = g.edata["scalar_w"] # node embedding model = GraphSAGE(data_info, embed_size=10) model(g, node_feat) model(g, node_feat, edge_feat) # node feat data_info["in_size"] = g.ndata["h"].shape[-1] node_feat = g.ndata["h"] model = GraphSAGE(data_info, embed_size=-1) model(g, node_feat) model(g, node_feat, edge_feat) @pytest.mark.parametrize("g", get_cases(["block-bipartite"])) def test_sage_block(g): data_info = {"in_size": 10, "out_size": 7} blocks = [g] node_feat = torch.randn(g.num_src_nodes(), data_info["in_size"]) edge_feat = torch.abs(torch.randn(g.num_edges())) model = GraphSAGE(data_info, embed_size=-1) model.forward_block(blocks, node_feat) model.forward_block(blocks, node_feat, edge_feat) @pytest.mark.parametrize("g", get_cases(["has_scalar_e_feature"])) def test_sgc(g): data_info = {"num_nodes": g.num_nodes(), "out_size": 7} node_feat = None # node embedding model = SGC(data_info, embed_size=10) model(g, node_feat) # node feat data_info["in_size"] = g.ndata["h"].shape[-1] node_feat = g.ndata["h"] model = SGC(data_info, embed_size=-1) model(g, node_feat) def test_bilinear(): data_info = {"in_size": 10, "out_size": 1} model = BilinearPredictor(data_info) num_pairs = 10 h_src = torch.randn(num_pairs, data_info["in_size"]) h_dst = torch.randn(num_pairs, data_info["in_size"]) model(h_src, h_dst) def test_ele(): data_info = {"in_size": 10, "out_size": 1} model = ElementWiseProductPredictor(data_info) num_pairs = 10 h_src = torch.randn(num_pairs, data_info["in_size"]) h_dst = torch.randn(num_pairs, data_info["in_size"]) model(h_src, h_dst) @pytest.mark.parametrize("virtual_node", [True, False]) def test_ogbg_gin(virtual_node): # Test for ogbg-mol datasets data_info = {"name": "ogbg-molhiv", "out_size": 1} model = OGBGGIN( data_info, embed_size=10, num_layers=2, virtual_node=virtual_node ) num_nodes = 5 num_edges = 15 g1 = dgl.rand_graph(num_nodes, num_edges) g2 = dgl.rand_graph(num_nodes, num_edges) g = dgl.batch([g1, g2]) num_nodes = g.num_nodes() num_edges = g.num_edges() nfeat = torch.zeros(num_nodes, 9).long() efeat = torch.zeros(num_edges, 3).long() model(g, nfeat, efeat) # Test for non-ogbg-mol datasets data_info = { "name": "a_dataset", "out_size": 1, "node_feat_size": 15, "edge_feat_size": 5, } model = OGBGGIN( data_info, embed_size=10, num_layers=2, virtual_node=virtual_node ) nfeat = torch.randn(num_nodes, data_info["node_feat_size"]) efeat = torch.randn(num_edges, data_info["edge_feat_size"]) model(g, nfeat, efeat) def test_pna(): # Test for ogbg-mol datasets data_info = {"name": "ogbg-molhiv", "delta": 1, "out_size": 1} model = PNA(data_info, embed_size=10, num_layers=2) num_nodes = 5 num_edges = 15 g = dgl.rand_graph(num_nodes, num_edges) nfeat = torch.zeros(num_nodes, 9).long() model(g, nfeat) # Test for non-ogbg-mol datasets data_info = { "name": "a_dataset", "node_feat_size": 15, "delta": 1, "out_size": 1, } model = PNA(data_info, embed_size=10, num_layers=2) nfeat = torch.randn(num_nodes, data_info["node_feat_size"]) model(g, nfeat) ================================================ FILE: tests/go/test_pipeline.py ================================================ import os import pytest @pytest.mark.parametrize( "data", [ "cora", "citeseer", "pubmed", "csv", "reddit", "co-buy-computer", "ogbn-arxiv", "ogbn-products", ], ) def test_nodepred_data(data): os.system(f"dgl configure nodepred --data {data} --model gcn") assert os.path.exists(f"nodepred_{data}_gcn.yaml") custom_cfg = f"custom_{data}_gcn.yaml" os.system( f"dgl configure nodepred --data {data} --model gcn --cfg {custom_cfg}" ) assert os.path.exists(custom_cfg) custom_script = f"{data}_gcn.py" os.system(f"dgl export --cfg {custom_cfg} --output {custom_script}") assert os.path.exists(custom_script) @pytest.mark.parametrize("model", ["gcn", "gat", "sage", "sgc", "gin"]) def test_nodepred_model(model): os.system(f"dgl configure nodepred --data cora --model {model}") assert os.path.exists(f"nodepred_cora_{model}.yaml") custom_cfg = f"custom_cora_{model}.yaml" os.system( f"dgl configure nodepred --data cora --model {model} --cfg {custom_cfg}" ) assert os.path.exists(custom_cfg) custom_script = f"cora_{model}.py" os.system(f"dgl export --cfg {custom_cfg} --output {custom_script}") assert os.path.exists(custom_script) @pytest.mark.parametrize( "data", [ "cora", "citeseer", "pubmed", "csv", "reddit", "co-buy-computer", "ogbn-arxiv", "ogbn-products", ], ) def test_nodepred_ns_data(data): os.system(f"dgl configure nodepred-ns --data {data} --model gcn") assert os.path.exists(f"nodepred-ns_{data}_gcn.yaml") custom_cfg = f"ns-custom_{data}_gcn.yaml" os.system( f"dgl configure nodepred-ns --data {data} --model gcn --cfg {custom_cfg}" ) assert os.path.exists(custom_cfg) custom_script = f"ns-{data}_gcn.py" os.system(f"dgl export --cfg {custom_cfg} --output {custom_script}") assert os.path.exists(custom_script) @pytest.mark.parametrize("model", ["gcn", "gat", "sage"]) def test_nodepred_ns_model(model): os.system(f"dgl configure nodepred-ns --data cora --model {model}") assert os.path.exists(f"nodepred-ns_cora_{model}.yaml") custom_cfg = f"ns-custom_cora_{model}.yaml" os.system( f"dgl configure nodepred-ns --data cora --model {model} --cfg {custom_cfg}" ) assert os.path.exists(custom_cfg) custom_script = f"ns-cora_{model}.py" os.system(f"dgl export --cfg {custom_cfg} --output {custom_script}") assert os.path.exists(custom_script) @pytest.mark.parametrize( "data", [ "cora", "citeseer", "pubmed", "csv", "reddit", "co-buy-computer", "ogbn-arxiv", "ogbn-products", "ogbl-collab", "ogbl-citation2", ], ) def test_linkpred_data(data): node_model = "gcn" edge_model = "ele" neg_sampler = "global" custom_cfg = "_".join([data, node_model, edge_model, neg_sampler]) + ".yaml" os.system( "dgl configure linkpred --data {} --node-model {} --edge-model {} --neg-sampler {} --cfg {}".format( data, node_model, edge_model, neg_sampler, custom_cfg ) ) assert os.path.exists(custom_cfg) custom_script = ( "_".join([data, node_model, edge_model, neg_sampler]) + ".py" ) os.system( "dgl export --cfg {} --output {}".format(custom_cfg, custom_script) ) assert os.path.exists(custom_script) @pytest.mark.parametrize("node_model", ["gcn", "gat", "sage", "sgc", "gin"]) def test_linkpred_node_model(node_model): data = "cora" edge_model = "ele" neg_sampler = "global" custom_cfg = "_".join([data, node_model, edge_model, neg_sampler]) + ".yaml" os.system( "dgl configure linkpred --data {} --node-model {} --edge-model {} --neg-sampler {} --cfg {}".format( data, node_model, edge_model, neg_sampler, custom_cfg ) ) assert os.path.exists(custom_cfg) custom_script = ( "_".join([data, node_model, edge_model, neg_sampler]) + ".py" ) os.system( "dgl export --cfg {} --output {}".format(custom_cfg, custom_script) ) assert os.path.exists(custom_script) @pytest.mark.parametrize("edge_model", ["ele", "bilinear"]) def test_linkpred_edge_model(edge_model): data = "cora" node_model = "gcn" neg_sampler = "global" custom_cfg = "_".join([data, node_model, edge_model, neg_sampler]) + ".yaml" os.system( "dgl configure linkpred --data {} --node-model {} --edge-model {} --neg-sampler {} --cfg {}".format( data, node_model, edge_model, neg_sampler, custom_cfg ) ) assert os.path.exists(custom_cfg) custom_script = ( "_".join([data, node_model, edge_model, neg_sampler]) + ".py" ) os.system( "dgl export --cfg {} --output {}".format(custom_cfg, custom_script) ) assert os.path.exists(custom_script) @pytest.mark.parametrize("neg_sampler", ["global", "persource", ""]) def test_linkpred_neg_sampler(neg_sampler): data = "cora" node_model = "gcn" edge_model = "ele" custom_cfg = f"{data}_{node_model}_{edge_model}_{neg_sampler}.yaml" if neg_sampler == "": os.system( "dgl configure linkpred --data {} --node-model {} --edge-model {} --cfg {}".format( data, node_model, edge_model, custom_cfg ) ) else: os.system( "dgl configure linkpred --data {} --node-model {} --edge-model {} --neg-sampler {} --cfg {}".format( data, node_model, edge_model, neg_sampler, custom_cfg ) ) assert os.path.exists(custom_cfg) custom_script = f"{data}_{node_model}_{edge_model}_{neg_sampler}.py" os.system( "dgl export --cfg {} --output {}".format(custom_cfg, custom_script) ) assert os.path.exists(custom_script) @pytest.mark.parametrize("data", ["csv", "ogbg-molhiv", "ogbg-molpcba"]) @pytest.mark.parametrize("model", ["gin", "pna"]) def test_graphpred(data, model): os.system( "dgl configure graphpred --data {} --model {}".format(data, model) ) assert os.path.exists("graphpred_{}_{}.yaml".format(data, model)) custom_cfg = "custom_{}_{}.yaml".format(data, model) os.system( "dgl configure graphpred --data {} --model {} --cfg {}".format( data, model, custom_cfg ) ) assert os.path.exists(custom_cfg) custom_script = "_".join([data, model]) + ".py" os.system( "dgl export --cfg {} --output {}".format(custom_cfg, custom_script) ) assert os.path.exists(custom_script) @pytest.mark.parametrize( "recipe", [ "graphpred_hiv_gin.yaml", "graphpred_hiv_pna.yaml", "graphpred_pcba_gin.yaml", "linkpred_cora_sage.yaml", "linkpred_citation2_sage.yaml", "linkpred_collab_sage.yaml", "nodepred_citeseer_gat.yaml", "nodepred_citeseer_gcn.yaml", "nodepred_citeseer_sage.yaml", "nodepred_cora_gat.yaml", "nodepred_cora_gcn.yaml", "nodepred_cora_sage.yaml", "nodepred_pubmed_gat.yaml", "nodepred_pubmed_gcn.yaml", "nodepred_pubmed_sage.yaml", "nodepred-ns_arxiv_gcn.yaml", "nodepred-ns_product_sage.yaml", ], ) def test_recipe(recipe): # Remove all generated yaml files current_dir = os.listdir("./") for item in current_dir: if item.endswith(".yaml"): os.remove(item) os.system("dgl recipe get {}".format(recipe)) assert os.path.exists(recipe) def test_node_cora(): os.system("dgl configure nodepred --data cora --model gcn") os.system("dgl train --cfg nodepred_cora_gcn.yaml") assert os.path.exists("results") assert os.path.exists("results/run_0.pth") os.system("dgl configure-apply nodepred --cpt results/run_0.pth") assert os.path.exists("apply_nodepred_cora_gcn.yaml") os.system( "dgl configure-apply nodepred --data cora --cpt results/run_0.pth --cfg apply.yaml" ) assert os.path.exists("apply.yaml") os.system("dgl apply --cfg apply.yaml") assert os.path.exists("apply_results/output.csv") os.system("dgl export --cfg apply.yaml --output apply.py") assert os.path.exists("apply.py") ================================================ FILE: tests/integration/test_data.py ================================================ import gzip import io import os import tarfile import tempfile import unittest import backend as F import dgl import dgl.data as data import numpy as np import pandas as pd import pytest import yaml from dgl import DGLError @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") def test_reddit(): # RedditDataset g = data.RedditDataset()[0] assert g.num_nodes() == 232965 assert g.num_edges() == 114615892 dst = F.asnumpy(g.edges()[1]) assert np.array_equal(dst, np.sort(dst)) transform = dgl.AddSelfLoop(allow_duplicate=True) g2 = data.RedditDataset(transform=transform)[0] assert g2.num_edges() - g.num_edges() == g.num_nodes() @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") def test_fakenews(): transform = dgl.AddSelfLoop(allow_duplicate=True) ds = data.FakeNewsDataset("politifact", "bert") assert len(ds) == 314 g = ds[0][0] g2 = data.FakeNewsDataset("politifact", "bert", transform=transform)[0][0] assert g2.num_edges() - g.num_edges() == g.num_nodes() ds = data.FakeNewsDataset("gossipcop", "profile") assert len(ds) == 5464 g = ds[0][0] g2 = data.FakeNewsDataset("gossipcop", "profile", transform=transform)[0][0] assert g2.num_edges() - g.num_edges() == g.num_nodes() @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="only supports pytorch" ) def test_peptides_structural(): transform = dgl.AddSelfLoop(allow_duplicate=True) dataset1 = data.PeptidesStructuralDataset() g1 = dataset1[0][0] dataset2 = data.PeptidesStructuralDataset(transform=transform) g2 = dataset2[0][0] assert g2.num_edges() - g1.num_edges() == g1.num_nodes() @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="only supports pytorch" ) def test_peptides_functional(): transform = dgl.AddSelfLoop(allow_duplicate=True) dataset1 = data.PeptidesFunctionalDataset() g1, label = dataset1[0] dataset2 = data.PeptidesFunctionalDataset(transform=transform) g2, _ = dataset2[0] assert g2.num_edges() - g1.num_edges() == g1.num_nodes() assert dataset1.num_classes == label.shape[0] @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="only supports pytorch" ) def test_VOC_superpixels(): transform = dgl.AddSelfLoop(allow_duplicate=True) dataset1 = data.VOCSuperpixelsDataset() g1 = dataset1[0] dataset2 = data.VOCSuperpixelsDataset(transform=transform) g2 = dataset2[0] assert g2.num_edges() - g1.num_edges() == g1.num_nodes() @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="only supports pytorch" ) def test_COCO_superpixels(): transform = dgl.AddSelfLoop(allow_duplicate=True) dataset1 = data.COCOSuperpixelsDataset() g1 = dataset1[0] dataset2 = data.COCOSuperpixelsDataset(transform=transform) g2 = dataset2[0] assert g2.num_edges() - g1.num_edges() == g1.num_nodes() @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="only supports pytorch" ) def test_MNIST_SuperPixel(): transform = dgl.AddSelfLoop(allow_duplicate=True) dataset1 = data.MNISTSuperPixelDataset() g1, _ = dataset1[0] dataset2 = data.MNISTSuperPixelDataset(transform=transform) g2, _ = dataset2[0] assert g2.num_edges() - g1.num_edges() == g1.num_nodes() @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="only supports pytorch" ) def test_CIFAR10_SuperPixel(): transform = dgl.AddSelfLoop(allow_duplicate=True) dataset1 = data.CIFAR10SuperPixelDataset() g1, _ = dataset1[0] dataset2 = data.CIFAR10SuperPixelDataset(transform=transform) g2, _ = dataset2[0] assert g2.num_edges() - g1.num_edges() == g1.num_nodes() @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") def test_as_graphpred(): ds = data.GINDataset(name="MUTAG", self_loop=True) new_ds = data.AsGraphPredDataset(ds, [0.8, 0.1, 0.1], verbose=True) assert len(new_ds) == 188 assert new_ds.num_tasks == 1 assert new_ds.num_classes == 2 ds = data.FakeNewsDataset("politifact", "profile") new_ds = data.AsGraphPredDataset(ds, verbose=True) assert len(new_ds) == 314 assert new_ds.num_tasks == 1 assert new_ds.num_classes == 2 ds = data.QM7bDataset() new_ds = data.AsGraphPredDataset(ds, [0.8, 0.1, 0.1], verbose=True) assert len(new_ds) == 7211 assert new_ds.num_tasks == 14 assert new_ds.num_classes is None ds = data.QM9Dataset(label_keys=["mu", "gap"]) new_ds = data.AsGraphPredDataset(ds, [0.8, 0.1, 0.1], verbose=True) assert len(new_ds) == 130831 assert new_ds.num_tasks == 2 assert new_ds.num_classes is None ds = data.QM9EdgeDataset(label_keys=["mu", "alpha"]) new_ds = data.AsGraphPredDataset(ds, [0.8, 0.1, 0.1], verbose=True) assert len(new_ds) == 130831 assert new_ds.num_tasks == 2 assert new_ds.num_classes is None ds = data.TUDataset("DD") new_ds = data.AsGraphPredDataset(ds, [0.8, 0.1, 0.1], verbose=True) assert len(new_ds) == 1178 assert new_ds.num_tasks == 1 assert new_ds.num_classes == 2 ds = data.LegacyTUDataset("DD") new_ds = data.AsGraphPredDataset(ds, [0.8, 0.1, 0.1], verbose=True) assert len(new_ds) == 1178 assert new_ds.num_tasks == 1 assert new_ds.num_classes == 2 ds = data.BA2MotifDataset() new_ds = data.AsGraphPredDataset(ds, [0.8, 0.1, 0.1], verbose=True) assert len(new_ds) == 1000 assert new_ds.num_tasks == 1 assert new_ds.num_classes == 2 @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="ogb only supports pytorch" ) def test_as_linkpred_ogb(): from ogb.linkproppred import DglLinkPropPredDataset ds = data.AsLinkPredDataset( DglLinkPropPredDataset("ogbl-collab"), split_ratio=None, verbose=True ) # original dataset has 46329 test edges assert ds.test_edges[0][0].shape[0] == 46329 # force generate new split ds = data.AsLinkPredDataset( DglLinkPropPredDataset("ogbl-collab"), split_ratio=[0.7, 0.2, 0.1], verbose=True, ) assert ds.test_edges[0][0].shape[0] == 235812 @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="ogb only supports pytorch" ) @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") def test_as_nodepred_ogb(): from ogb.nodeproppred import DglNodePropPredDataset ds = data.AsNodePredDataset( DglNodePropPredDataset("ogbn-arxiv"), split_ratio=None, verbose=True ) split = DglNodePropPredDataset("ogbn-arxiv").get_idx_split() train_idx, val_idx, test_idx = split["train"], split["valid"], split["test"] assert F.array_equal(ds.train_idx, F.tensor(train_idx)) assert F.array_equal(ds.val_idx, F.tensor(val_idx)) assert F.array_equal(ds.test_idx, F.tensor(test_idx)) # force generate new split ds = data.AsNodePredDataset( DglNodePropPredDataset("ogbn-arxiv"), split_ratio=[0.7, 0.2, 0.1], verbose=True, ) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="ogb only supports pytorch" ) def test_as_graphpred_ogb(): from ogb.graphproppred import DglGraphPropPredDataset ds = data.AsGraphPredDataset( DglGraphPropPredDataset("ogbg-molhiv"), split_ratio=None, verbose=True ) assert len(ds.train_idx) == 32901 # force generate new split ds = data.AsGraphPredDataset( DglGraphPropPredDataset("ogbg-molhiv"), split_ratio=[0.6, 0.2, 0.2], verbose=True, ) assert len(ds.train_idx) == 24676 ================================================ FILE: tests/lint/clangformat_linter.py ================================================ """Borrowed from github.com/pytorch/pytorch/tools/linter/adapters/clangformat_linter.py""" import argparse import concurrent.futures import json import logging import os import subprocess import sys import time from enum import Enum from pathlib import Path from typing import Any, List, NamedTuple, Optional IS_WINDOWS: bool = os.name == "nt" def eprint(*args: Any, **kwargs: Any) -> None: print(*args, file=sys.stderr, flush=True, **kwargs) class LintSeverity(str, Enum): ERROR = "error" WARNING = "warning" ADVICE = "advice" DISABLED = "disabled" class LintMessage(NamedTuple): path: Optional[str] line: Optional[int] char: Optional[int] code: str severity: LintSeverity name: str original: Optional[str] replacement: Optional[str] description: Optional[str] def as_posix(name: str) -> str: return name.replace("\\", "/") if IS_WINDOWS else name def _run_command( args: List[str], *, timeout: int, ) -> "subprocess.CompletedProcess[bytes]": logging.debug("$ %s", " ".join(args)) start_time = time.monotonic() try: return subprocess.run( args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=IS_WINDOWS, # So batch scripts are found. timeout=timeout, check=True, ) finally: end_time = time.monotonic() logging.debug("took %dms", (end_time - start_time) * 1000) def run_command( args: List[str], *, retries: int, timeout: int, ) -> "subprocess.CompletedProcess[bytes]": remaining_retries = retries while True: try: return _run_command(args, timeout=timeout) except subprocess.TimeoutExpired as err: if remaining_retries == 0: raise err remaining_retries -= 1 logging.warning( "(%s/%s) Retrying because command failed with: %r", retries - remaining_retries, retries, err, ) time.sleep(1) def check_file( filename: str, binary: str, retries: int, timeout: int, ) -> List[LintMessage]: try: with open(filename, "rb") as f: original = f.read() proc = run_command( [binary, filename], retries=retries, timeout=timeout, ) except subprocess.TimeoutExpired: return [ LintMessage( path=filename, line=None, char=None, code="CLANGFORMAT", severity=LintSeverity.ERROR, name="timeout", original=None, replacement=None, description=( "clang-format timed out while trying to process a file. " "Please report an issue in pytorch/pytorch with the " "label 'module: lint'" ), ) ] except (OSError, subprocess.CalledProcessError) as err: return [ LintMessage( path=filename, line=None, char=None, code="CLANGFORMAT", severity=LintSeverity.ADVICE, name="command-failed", original=None, replacement=None, description=( f"Failed due to {err.__class__.__name__}:\n{err}" if not isinstance(err, subprocess.CalledProcessError) else ( "COMMAND (exit code {returncode})\n" "{command}\n\n" "STDERR\n{stderr}\n\n" "STDOUT\n{stdout}" ).format( returncode=err.returncode, command=" ".join(as_posix(x) for x in err.cmd), stderr=err.stderr.decode("utf-8").strip() or "(empty)", stdout=err.stdout.decode("utf-8").strip() or "(empty)", ) ), ) ] replacement = proc.stdout if original == replacement: return [] line = 0 original = original.decode("utf-8") replacement = replacement.decode("utf-8") for line, (i, j) in enumerate( zip(original.split("\n"), replacement.split("\n")) ): if i != j: break return [ LintMessage( path=filename, line=line, char=None, code="CLANGFORMAT", severity=LintSeverity.WARNING, name="format", original=original, replacement=replacement, description="See https://clang.llvm.org/docs/ClangFormat.html.\nRun `lintrunner -a` to apply this patch.", ) ] def main() -> None: parser = argparse.ArgumentParser( description="Format files with clang-format.", fromfile_prefix_chars="@", ) parser.add_argument( "--binary", required=True, help="clang-format binary path", ) parser.add_argument( "--retries", default=3, type=int, help="times to retry timed out clang-format", ) parser.add_argument( "--timeout", default=90, type=int, help="seconds to wait for clang-format", ) parser.add_argument( "--verbose", action="store_true", help="verbose logging", ) parser.add_argument( "filenames", nargs="+", help="paths to lint", ) args = parser.parse_args() logging.basicConfig( format="<%(threadName)s:%(levelname)s> %(message)s", level=logging.NOTSET if args.verbose else logging.DEBUG if len(args.filenames) < 1000 else logging.INFO, stream=sys.stderr, ) with concurrent.futures.ThreadPoolExecutor( max_workers=os.cpu_count(), thread_name_prefix="Thread", ) as executor: futures = { executor.submit( check_file, x, args.binary, args.retries, args.timeout ): x for x in args.filenames } for future in concurrent.futures.as_completed(futures): try: for lint_message in future.result(): print(json.dumps(lint_message._asdict()), flush=True) except Exception: logging.critical('Failed at "%s".', futures[future]) raise if __name__ == "__main__": main() ================================================ FILE: tests/lint/lint.py ================================================ #!/usr/bin/env python3 # pylint: disable=protected-access, unused-variable, locally-disabled, len-as-condition """Lint helper to generate lint summary of source. Copyright by Contributors. Borrowed from dmlc-core/scripts/lint.py@939c052 """ from __future__ import print_function import argparse import codecs import os import re import sys import cpplint from cpplint import _cpplint_state from pylint import epylint CXX_SUFFIX = set(["cc", "c", "cpp", "h", "cu", "hpp", "cuh"]) PYTHON_SUFFIX = set(["py"]) def filepath_enumerate(paths): """Enumerate the file paths of all subfiles of the list of paths""" out = [] for path in paths: if os.path.isfile(path): out.append(path) else: for root, dirs, files in os.walk(path): for name in files: out.append(os.path.normpath(os.path.join(root, name))) return out # pylint: disable=useless-object-inheritance class LintHelper(object): """Class to help runing the lint and records summary""" @staticmethod def _print_summary_map(strm, result_map, ftype): """Print summary of certain result map.""" if len(result_map) == 0: return 0 npass = sum(1 for x in result_map.values() if len(x) == 0) strm.write( f"====={npass}/{len(result_map)} {ftype} files passed check=====\n" ) for fname, emap in result_map.items(): if len(emap) == 0: continue strm.write( f"{fname}: {sum(emap.values())} Errors of {len(emap)} Categories map={str(emap)}\n" ) return len(result_map) - npass def __init__(self): self.project_name = None self.cpp_header_map = {} self.cpp_src_map = {} self.python_map = {} pylint_disable = [ "superfluous-parens", "too-many-instance-attributes", "too-few-public-methods", ] # setup pylint self.pylint_opts = [ "--extension-pkg-whitelist=numpy", "--disable=" + ",".join(pylint_disable), ] self.pylint_cats = set(["error", "warning", "convention", "refactor"]) # setup cpp lint cpplint_args = [ "--quiet", "--extensions=" + (",".join(CXX_SUFFIX)), ".", ] _ = cpplint.ParseArguments(cpplint_args) cpplint._SetFilters( ",".join( [ "-build/c++11", "-build/namespaces", "-build/include,", "+build/include_what_you_use", "+build/include_order", ] ) ) cpplint._SetCountingStyle("toplevel") cpplint._line_length = 80 def process_cpp(self, path, suffix): """Process a cpp file.""" _cpplint_state.ResetErrorCounts() cpplint.ProcessFile(str(path), _cpplint_state.verbose_level) _cpplint_state.PrintErrorCounts() errors = _cpplint_state.errors_by_category.copy() if suffix == "h": self.cpp_header_map[str(path)] = errors else: self.cpp_src_map[str(path)] = errors def process_python(self, path): """Process a python file.""" (pylint_stdout, pylint_stderr) = epylint.py_run( " ".join([str(path)] + self.pylint_opts), return_std=True ) emap = {} err = pylint_stderr.read() if len(err): print(err) for line in pylint_stdout: sys.stderr.write(line) key = line.split(":")[-1].split("(")[0].strip() if key not in self.pylint_cats: continue if key not in emap: emap[key] = 1 else: emap[key] += 1 self.python_map[str(path)] = emap def print_summary(self, strm): """Print summary of lint.""" nerr = 0 nerr += LintHelper._print_summary_map( strm, self.cpp_header_map, "cpp-header" ) nerr += LintHelper._print_summary_map( strm, self.cpp_src_map, "cpp-source" ) nerr += LintHelper._print_summary_map(strm, self.python_map, "python") if nerr == 0: strm.write("All passed!\n") else: strm.write(f"{nerr} files failed lint\n") return nerr # singleton helper for lint check _HELPER = LintHelper() def get_header_guard_dmlc(filename): """Get Header Guard Convention for DMLC Projects. For headers in include, directly use the path For headers in src, use project name plus path Examples: with project-name = dmlc include/dmlc/timer.h -> DMLC_TIMTER_H_ src/io/libsvm_parser.h -> DMLC_IO_LIBSVM_PARSER_H_ """ fileinfo = cpplint.FileInfo(filename) file_path_from_root = fileinfo.RepositoryName() inc_list = ["include", "api", "wrapper", "contrib"] if os.name == "nt": inc_list.append("mshadow") if ( file_path_from_root.find("src/") != -1 and _HELPER.project_name is not None ): idx = file_path_from_root.find("src/") file_path_from_root = ( _HELPER.project_name + file_path_from_root[idx + 3 :] ) else: idx = file_path_from_root.find("include/") if idx != -1: file_path_from_root = file_path_from_root[idx + 8 :] for spath in inc_list: prefix = spath + "/" if file_path_from_root.startswith(prefix): file_path_from_root = re.sub( "^" + prefix, "", file_path_from_root ) break return re.sub(r"[-./\s]", "_", file_path_from_root).upper() + "_" cpplint.GetHeaderGuardCPPVariable = get_header_guard_dmlc def process(fname, allow_type): """Process a file.""" fname = str(fname) arr = fname.rsplit(".", 1) if fname.find("#") != -1 or arr[-1] not in allow_type: return if arr[-1] in CXX_SUFFIX: _HELPER.process_cpp(fname, arr[-1]) if arr[-1] in PYTHON_SUFFIX: _HELPER.process_python(fname) def main(): """Main entry function.""" parser = argparse.ArgumentParser(description="lint source codes") parser.add_argument("project", help="project name") parser.add_argument( "filetype", choices=["python", "cpp", "all"], help="source code type" ) parser.add_argument("path", nargs="+", help="path to traverse") parser.add_argument( "--exclude_path", nargs="+", default=[], help="exclude this path, and all subfolders if path is a folder", ) parser.add_argument( "--quiet", action="store_true", help="run cpplint in quiet mode" ) parser.add_argument("--pylint-rc", default=None, help="pylint rc file") args = parser.parse_args() _HELPER.project_name = args.project if args.pylint_rc is not None: _HELPER.pylint_opts = [ "--rcfile=" + args.pylint_rc, ] file_type = args.filetype allow_type = [] if file_type in ("python", "all"): allow_type += PYTHON_SUFFIX if file_type in ("cpp", "all"): allow_type += CXX_SUFFIX allow_type = set(allow_type) if sys.version_info.major == 2 and os.name != "nt": sys.stderr = codecs.StreamReaderWriter( sys.stderr, codecs.getreader("utf8"), codecs.getwriter("utf8"), "replace", ) # get excluded files excluded_paths = filepath_enumerate(args.exclude_path) for path in args.path: if os.path.isfile(path): normpath = os.path.normpath(path) if normpath not in excluded_paths: process(path, allow_type) else: for root, dirs, files in os.walk(path): for name in files: file_path = os.path.normpath(os.path.join(root, name)) if file_path not in excluded_paths: process(file_path, allow_type) nerr = _HELPER.print_summary(sys.stderr) sys.exit(nerr > 0) if __name__ == "__main__": main() ================================================ FILE: tests/lint/pip_init.py ================================================ """ Initializer script that installs stuff to pip. Borrowed from github.com/pytorch/pytorch/tools/linter/adapters/pip_init.py """ import argparse import logging import os import subprocess import sys import time from typing import List def run_command(args: List[str]) -> "subprocess.CompletedProcess[bytes]": logging.debug("$ %s", " ".join(args)) start_time = time.monotonic() try: return subprocess.run(args, check=True) finally: end_time = time.monotonic() logging.debug("took %dms", (end_time - start_time) * 1000) if __name__ == "__main__": parser = argparse.ArgumentParser(description="pip initializer") parser.add_argument( "packages", nargs="+", help="pip packages to install", ) parser.add_argument( "--verbose", action="store_true", help="verbose logging", ) parser.add_argument( "--dry-run", help="do not install anything, just print what would be done.", ) parser.add_argument( "--no-black-binary", help="do not use pre-compiled binaries from pip for black.", action="store_true", ) args = parser.parse_args() logging.basicConfig( format="<%(threadName)s:%(levelname)s> %(message)s", level=logging.NOTSET if args.verbose else logging.DEBUG, stream=sys.stderr, ) pip_args = ["pip3", "install"] # If we are in a global install, use `--user` to install so that you do not # need root access in order to initialize linters. # # However, `pip install --user` interacts poorly with virtualenvs (see: # https://bit.ly/3vD4kvl) and conda (see: https://bit.ly/3KG7ZfU). So in # these cases perform a regular installation. in_conda = os.environ.get("CONDA_PREFIX") is not None in_virtualenv = os.environ.get("VIRTUAL_ENV") is not None if not in_conda and not in_virtualenv: pip_args.append("--user") pip_args.extend(args.packages) for package in args.packages: package_name, _, version = package.partition("=") if version == "": raise RuntimeError( "Package {package_name} did not have a version specified. " "Please specify a version to produce a consistent linting experience." ) if args.no_black_binary and "black" in package_name: pip_args.append(f"--no-binary={package_name}") dry_run = args.dry_run == "1" if dry_run: print(f"Would have run: {pip_args}") sys.exit(0) run_command(pip_args) ================================================ FILE: tests/lint/pylintrc ================================================ [MASTER] # A comma-separated list of package or module names from where C extensions may # be loaded. Extensions are loading into the active Python interpreter and may # run arbitrary code. extension-pkg-whitelist= # Add files or directories to the blacklist. They should be base names, not # paths. ignore=CVS,_cy2,_cy3,backend,data,contrib,_deprecate # Add files or directories matching the regex patterns to the blacklist. The # regex matches against base names, not paths. ignore-patterns= # Python code to execute, usually for sys.path manipulation such as # pygtk.require(). #init-hook= # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the # number of processors available to use. jobs=4 # Control the amount of potential inferred values when inferring a single # object. This can help the performance when dealing with large functions or # complex, nested conditions. limit-inference-results=100 # List of plugins (as comma separated values of python modules names) to load, # usually to register additional checkers. load-plugins= # Pickle collected data for later comparisons. persistent=yes # Specify a configuration file. #rcfile= # When enabled, pylint would attempt to guess common misconfiguration and emit # user-friendly hints instead of false-positive error messages. suggestion-mode=yes # Allow loading of arbitrary C extensions. Extensions are imported into the # active Python interpreter and may run arbitrary code. unsafe-load-any-extension=no [MESSAGES CONTROL] # Only show warnings with the listed confidence levels. Leave empty to show # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED. confidence= # Disable the message, report, category or checker with the given id(s). You # can either give multiple identifiers separated by comma (,) or put this # option multiple times (only on the command line, not in the configuration # file where it should appear only once). You can also use "--disable=all" to # disable everything first and then reenable specific checks. For example, if # you want to run only the similarities checker, you can use "--disable=all # --enable=similarities". If you want to run only the classes checker, but have # no Warning level messages displayed, use "--disable=all --enable=classes # --disable=W". disable=design, similarities, no-self-use, attribute-defined-outside-init, locally-disabled, star-args, pointless-except, bad-option-value, global-statement, fixme, suppressed-message, useless-suppression, locally-enabled, import-error, unsubscriptable-object, unbalanced-tuple-unpacking, protected-access, useless-object-inheritance, no-else-return, len-as-condition, cyclic-import, # disabled due to the inevitable dgl.graph -> dgl.subgraph loop undefined-variable, # disabled due to C extension (should enable) raise-missing-from, # meh import-outside-toplevel, # due to inevitable imports within blocks using-constant-test, # due to in-place object modification in C super-with-arguments, # 2.3.0->2.6.0, should enable but there's too many... not-callable, # due to optional callables that can be None # Enable the message, report, category or checker with the given id(s). You can # either give multiple identifier separated by comma (,) or put this option # multiple time (only on the command line, not in the configuration file where # it should appear only once). See also the "--disable" option for examples. enable=c-extension-no-member [REPORTS] # Python expression which should return a note less than 10 (10 is the highest # note). You have access to the variables errors warning, statement which # respectively contain the number of errors / warnings messages and the total # number of statements analyzed. This is used by the global evaluation report # (RP0004). evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) # Template used to display messages. This is a python new-style format string # used to format the message information. See doc for all details. #msg-template= # Set the output format. Available formats are text, parseable, colorized, json # and msvs (visual studio). You can also give a reporter class, e.g. # mypackage.mymodule.MyReporterClass. output-format=text # Tells whether to display a full report or only the messages. reports=no # Activate the evaluation score. score=yes [REFACTORING] # Maximum number of nested blocks for function / method body max-nested-blocks=5 # Complete name of functions that never returns. When checking for # inconsistent-return-statements if a never returning function is called then # it will be considered as an explicit return statement and no message will be # printed. never-returning-functions=sys.exit [MISCELLANEOUS] # List of note tags to take in consideration, separated by a comma. notes=FIXME, XXX, TODO [BASIC] # Naming style matching correct argument names. argument-naming-style=snake_case # Regular expression matching correct argument names. Overrides argument- # naming-style. #argument-rgx= # Naming style matching correct attribute names. attr-naming-style=snake_case # Regular expression matching correct attribute names. Overrides attr-naming- # style. #attr-rgx= # Bad variable names which should always be refused, separated by a comma. bad-names=foo, bar, baz, toto, tutu, tata # Naming style matching correct class attribute names. class-attribute-naming-style=any # Regular expression matching correct class attribute names. Overrides class- # attribute-naming-style. #class-attribute-rgx= # Naming style matching correct class names. class-naming-style=PascalCase # Regular expression matching correct class names. Overrides class-naming- # style. #class-rgx= # Naming style matching correct constant names. const-naming-style=UPPER_CASE # Regular expression matching correct constant names. Overrides const-naming- # style. #const-rgx= # Minimum line length for functions/classes that require docstrings, shorter # ones are exempt. docstring-min-length=-1 # Naming style matching correct function names. function-naming-style=snake_case # Regular expression matching correct function names. Overrides function- # naming-style. #function-rgx= # Good variable names which should always be accepted, separated by a comma. # f - files # i, j, k - loop variables # u, v, e - nodes and edges # s, d - source and destination # t - time # r - relation type # n, m - general integers representing quantity # w, x, y, z - general math variables # g, G - graphs # hg - heterogeneous graphs # sg - subgraphs # fn - functions # us, vs, es, gs - plural form of u, v, g, e # op - operators # ty - type # A, B, C, W - for tensor operators like matmul # dp - DataPipes (see https://pytorch.org/data/0.7/torchdata.datapipes.iter.html) # it - iterators good-names=f,i,j,k,u,v,e,n,m,w,x,y,z,s,d,t,r,g,G,hg,sg,fn,ex,Run,_,us,vs,gs,es,op,ty,A,B,C,W,a,b,N,D1,D2,R,dp,it # Include a hint for the correct naming format with invalid-name. include-naming-hint=no # Naming style matching correct inline iteration names. inlinevar-naming-style=any # Regular expression matching correct inline iteration names. Overrides # inlinevar-naming-style. #inlinevar-rgx= # Naming style matching correct method names. method-naming-style=snake_case # Regular expression matching correct method names. Overrides method-naming- # style. #method-rgx= # Naming style matching correct module names. module-naming-style=snake_case # Regular expression matching correct module names. Overrides module-naming- # style. #module-rgx= # Colon-delimited sets of names that determine each other's naming style when # the name regexes allow several styles. name-group= # Regular expression which should only match function or class names that do # not require a docstring. no-docstring-rgx=^_ # List of decorators that produce properties, such as abc.abstractproperty. Add # to this list to register other decorators that produce valid properties. # These decorators are taken in consideration only for invalid-name. property-classes=abc.abstractproperty # Naming style matching correct variable names. variable-naming-style=snake_case # Regular expression matching correct variable names. Overrides variable- # naming-style. #variable-rgx= [VARIABLES] # List of additional names supposed to be defined in builtins. Remember that # you should avoid defining new builtins when possible. additional-builtins= # Tells whether unused global variables should be treated as a violation. allow-global-unused-variables=yes # List of strings which can identify a callback function by name. A callback # name must start or end with one of those strings. callbacks=cb_, _cb # A regular expression matching the name of dummy variables (i.e. expected to # not be used). dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ # Argument names that match this expression will be ignored. Default to name # with leading underscore. ignored-argument-names=_.*|^ignored_|^unused_ # Tells whether we should check for unused import in __init__ files. init-import=no # List of qualified module names which can have objects that can redefine # builtins. redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io [SPELLING] # Limits count of emitted suggestions for spelling mistakes. max-spelling-suggestions=4 # Spelling dictionary name. Available dictionaries: none. To make it working # install python-enchant package.. spelling-dict= # List of comma separated words that should not be checked. spelling-ignore-words= # A path to a file that contains private dictionary; one word per line. spelling-private-dict-file= # Tells whether to store unknown words to indicated private dictionary in # --spelling-private-dict-file option instead of raising a message. spelling-store-unknown-words=no [LOGGING] # Format style used to check logging format string. `old` means using % # formatting, while `new` is for `{}` formatting. logging-format-style=old # Logging modules to check that the string format arguments are in logging # function parameter format. logging-modules=logging [FORMAT] # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. expected-line-ending-format= # Regexp for a line that is allowed to be longer than the limit. ignore-long-lines=^\s*(# )??$ # Number of spaces of indent required inside a hanging or continued line. indent-after-paren=4 # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 # tab). indent-string=' ' # Maximum number of characters on a single line. max-line-length=100 # Maximum number of lines in a module. max-module-lines=4000 # List of optional constructs for which whitespace checking is disabled. `dict- # separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. # `trailing-comma` allows a space between comma and closing bracket: (a, ). # `empty-line` allows space-only lines. no-space-check=trailing-comma, dict-separator # Allow the body of a class to be on the same line as the declaration if body # contains single statement. single-line-class-stmt=no # Allow the body of an if to be on the same line as the test if there is no # else. single-line-if-stmt=no [SIMILARITIES] # Ignore comments when computing similarities. ignore-comments=yes # Ignore docstrings when computing similarities. ignore-docstrings=yes # Ignore imports when computing similarities. ignore-imports=no # Minimum lines number of a similarity. min-similarity-lines=4 [TYPECHECK] # List of decorators that produce context managers, such as # contextlib.contextmanager. Add to this list to register other decorators that # produce valid context managers. contextmanager-decorators=contextlib.contextmanager # List of members which are set dynamically and missed by pylint inference # system, and so shouldn't trigger E1101 when accessed. Python regular # expressions are accepted. generated-members= # Tells whether missing members accessed in mixin class should be ignored. A # mixin class is detected if its name ends with "mixin" (case insensitive). ignore-mixin-members=yes # Tells whether to warn about missing members when the owner of the attribute # is inferred to be None. ignore-none=yes # This flag controls whether pylint should warn about no-member and similar # checks whenever an opaque object is returned when inferring. The inference # can return multiple potential results while evaluating a Python object, but # some branches might not be evaluated, which results in partial inference. In # that case, it might be useful to still emit no-member and other checks for # the rest of the inferred objects. ignore-on-opaque-inference=yes # List of class names for which member attributes should not be checked (useful # for classes with dynamically set attributes). This supports the use of # qualified names. ignored-classes=optparse.Values,thread._local,_thread._local # List of module names for which member attributes should not be checked # (useful for modules/projects where namespaces are manipulated during runtime # and thus existing member attributes cannot be deduced by static analysis. It # supports qualified module names, as well as Unix pattern matching. ignored-modules=dgl.backend,dgl._api_internal,dgl._deprecate # Show a hint with possible names when a member name was not found. The aspect # of finding the hint is based on edit distance. missing-member-hint=yes # The minimum edit distance a name should have in order to be considered a # similar match for a missing member name. missing-member-hint-distance=1 # The total number of similar names that should be taken in consideration when # showing a hint for a missing member. missing-member-max-choices=1 [IMPORTS] # Allow wildcard imports from modules that define __all__. allow-wildcard-with-all=yes # Analyse import fallback blocks. This can be used to support both Python 2 and # 3 compatible code, which means that the block might have code that exists # only in one or another interpreter, leading to false positives when analysed. analyse-fallback-blocks=no # Deprecated modules which should not be used, separated by a comma. deprecated-modules=optparse,tkinter.tix # Create a graph of external dependencies in the given file (report RP0402 must # not be disabled). ext-import-graph= # Create a graph of every (i.e. internal and external) dependencies in the # given file (report RP0402 must not be disabled). import-graph= # Create a graph of internal dependencies in the given file (report RP0402 must # not be disabled). int-import-graph= # Force import order to recognize a module as part of the standard # compatibility libraries. known-standard-library= # Force import order to recognize a module as part of a third party library. known-third-party=enchant [DESIGN] # Maximum number of arguments for function / method. max-args=5 # Maximum number of attributes for a class (see R0902). max-attributes=7 # Maximum number of boolean expressions in an if statement. max-bool-expr=5 # Maximum number of branch for function / method body. max-branches=12 # Maximum number of locals for function / method body. max-locals=15 # Maximum number of parents for a class (see R0901). max-parents=7 # Maximum number of public methods for a class (see R0904). max-public-methods=20 # Maximum number of return / yield for function / method body. max-returns=6 # Maximum number of statements in function / method body. max-statements=50 # Minimum number of public methods for a class (see R0903). min-public-methods=2 [CLASSES] # List of method names used to declare (i.e. assign) instance attributes. defining-attr-methods=__init__, __new__, setUp # List of member names, which should be excluded from the protected access # warning. exclude-protected=_asdict, _fields, _replace, _source, _make # List of valid names for the first argument in a class method. valid-classmethod-first-arg=cls # List of valid names for the first argument in a metaclass class method. valid-metaclass-classmethod-first-arg=cls [EXCEPTIONS] # Exceptions that will emit a warning when being caught. Defaults to # "Exception". overgeneral-exceptions=Exception ================================================ FILE: tests/lint/ufmt_linter.py ================================================ """Borrowed from github.com/pytorch/pytorch/tools/linter/adapters/ufmt_linter.py""" import argparse import concurrent.futures import json import logging import os import sys from enum import Enum from pathlib import Path from typing import Any, List, NamedTuple, Optional from ufmt.core import make_black_config, ufmt_string from usort import Config as UsortConfig IS_WINDOWS: bool = os.name == "nt" def eprint(*args: Any, **kwargs: Any) -> None: print(*args, file=sys.stderr, flush=True, **kwargs) class LintSeverity(str, Enum): ERROR = "error" WARNING = "warning" ADVICE = "advice" DISABLED = "disabled" class LintMessage(NamedTuple): path: Optional[str] line: Optional[int] char: Optional[int] code: str severity: LintSeverity name: str original: Optional[str] replacement: Optional[str] description: Optional[str] def as_posix(name: str) -> str: return name.replace("\\", "/") if IS_WINDOWS else name def format_error_message(filename: str, err: Exception) -> LintMessage: return LintMessage( path=filename, line=None, char=None, code="UFMT", severity=LintSeverity.ADVICE, name="command-failed", original=None, replacement=None, description=(f"Failed due to {err.__class__.__name__}:\n{err}"), ) def check_file( filename: str, ) -> List[LintMessage]: with open(filename, "rb") as f: original = f.read().decode("utf-8") try: path = Path(filename) usort_config = UsortConfig.find(path) black_config = make_black_config(path) # Use UFMT API to call both usort and black replacement = ufmt_string( path=path, content=original, usort_config=usort_config, black_config=black_config, ) if original == replacement: return [] line = 0 for line, (i, j) in enumerate( zip(original.split("\n"), replacement.split("\n")) ): if i != j: break return [ LintMessage( path=filename, line=line, char=None, code="UFMT", severity=LintSeverity.WARNING, name="format", original=original, replacement=replacement, description="Run `lintrunner -a` to apply this patch.", ) ] except Exception as err: return [format_error_message(filename, err)] def main() -> None: parser = argparse.ArgumentParser( description="Format files with ufmt (black + usort).", fromfile_prefix_chars="@", ) parser.add_argument( "--verbose", action="store_true", help="verbose logging", ) parser.add_argument( "filenames", nargs="+", help="paths to lint", ) args = parser.parse_args() logging.basicConfig( format="<%(threadName)s:%(levelname)s> %(message)s", level=logging.NOTSET if args.verbose else logging.DEBUG if len(args.filenames) < 1000 else logging.INFO, stream=sys.stderr, ) with concurrent.futures.ThreadPoolExecutor( max_workers=os.cpu_count(), thread_name_prefix="Thread", ) as executor: futures = {executor.submit(check_file, x): x for x in args.filenames} for future in concurrent.futures.as_completed(futures): try: for lint_message in future.result(): print(json.dumps(lint_message._asdict()), flush=True) except Exception: logging.critical('Failed at "%s".', futures[future]) raise if __name__ == "__main__": main() ================================================ FILE: tests/python/common/backend/test_set_default_backend.py ================================================ import os import unittest import backend as F def test_set_default_backend(): default_dir = os.path.join(os.path.expanduser("~"), ".dgl_unit_test") F.set_default_backend(default_dir, "pytorch") # make sure the config file was created assert os.path.exists(os.path.join(default_dir, "config.json")) ================================================ FILE: tests/python/common/backend/test_tensor.py ================================================ import unittest import backend as F import dgl import dgl.ndarray as nd import numpy as np @unittest.skipIf( dgl.backend.backend_name == "tensorflow", reason="TF doesn't support inplace update", ) def test_dlpack(): # test dlpack conversion. def nd2th(): ans = np.array( [[1.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]] ) x = nd.array(np.zeros((3, 4), dtype=np.float32)) dl = x.to_dlpack() y = F.zerocopy_from_dlpack(dl) y[0] = 1 print(x) print(y) assert np.allclose(x.asnumpy(), ans) def th2nd(): ans = np.array( [[1.0, 1.0, 1.0, 1.0], [0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0]] ) x = F.zeros((3, 4)) dl = F.zerocopy_to_dlpack(x) y = nd.from_dlpack(dl) x[0] = 1 print(x) print(y) assert np.allclose(y.asnumpy(), ans) def th2nd_incontiguous(): x = F.astype(F.tensor([[0, 1], [2, 3]]), F.int64) ans = np.array([0, 2]) y = x[:2, 0] # Uncomment this line and comment the one below to observe error # dl = dlpack.to_dlpack(y) dl = F.zerocopy_to_dlpack(y) z = nd.from_dlpack(dl) print(x) print(z) assert np.allclose(z.asnumpy(), ans) nd2th() th2nd() th2nd_incontiguous() ================================================ FILE: tests/python/common/cuda/test_gpu_cache.py ================================================ # # Copyright (c) 2022 by Contributors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import unittest import backend as F import dgl from utils import parametrize_idtype D = 5 def generate_graph(idtype, grad=False, add_data=True): g = dgl.graph([]).to(F.ctx(), dtype=idtype) g.add_nodes(10) u, v = [], [] # create a graph where 0 is the source and 9 is the sink for i in range(1, 9): u.append(0) v.append(i) u.append(i) v.append(9) # add a back flow from 9 to 0 u.append(9) v.append(0) g.add_edges(u, v) if add_data: ncol = F.randn((10, D)) ecol = F.randn((17, D)) if grad: ncol = F.attach_grad(ncol) ecol = F.attach_grad(ecol) g.ndata["h"] = ncol g.edata["l"] = ecol return g @unittest.skipIf(not F.gpu_ctx(), reason="only necessary with GPU") @parametrize_idtype def test_gpu_cache(idtype): g = generate_graph(idtype) cache = dgl.cuda.GPUCache(5, D, idtype) h = g.ndata["h"] t = 5 keys = F.arange(0, t, dtype=idtype) values, m_idx, m_keys = cache.query(keys) m_values = h[F.tensor(m_keys, F.int64)] values[F.tensor(m_idx, F.int64)] = m_values cache.replace(m_keys, m_values) keys = F.arange(3, 8, dtype=idtype) values, m_idx, m_keys = cache.query(keys) assert m_keys.shape[0] == 3 and m_idx.shape[0] == 3 m_values = h[F.tensor(m_keys, F.int64)] values[F.tensor(m_idx, F.int64)] = m_values assert (values != h[F.tensor(keys, F.int64)]).sum().item() == 0 cache.replace(m_keys, m_values) if __name__ == "__main__": test_gpu_cache(F.int64) test_gpu_cache(F.int32) ================================================ FILE: tests/python/common/data/data/test_heterophilous_graphs.py ================================================ import unittest import backend as F import dgl @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Only supports PyTorch backend.", ) def test_roman_empire(): transform = dgl.AddSelfLoop(allow_duplicate=True) g = dgl.data.RomanEmpireDataset(force_reload=True)[0] assert g.num_nodes() == 22662 assert g.num_edges() == 65854 g2 = dgl.data.RomanEmpireDataset(force_reload=True, transform=transform)[0] assert g2.num_edges() - g.num_edges() == g.num_nodes() @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Only supports PyTorch backend.", ) def test_amazon_ratings(): transform = dgl.AddSelfLoop(allow_duplicate=True) g = dgl.data.AmazonRatingsDataset(force_reload=True)[0] assert g.num_nodes() == 24492 assert g.num_edges() == 186100 g2 = dgl.data.AmazonRatingsDataset(force_reload=True, transform=transform)[ 0 ] assert g2.num_edges() - g.num_edges() == g.num_nodes() @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Only supports PyTorch backend.", ) def test_minesweeper(): transform = dgl.AddSelfLoop(allow_duplicate=True) g = dgl.data.MinesweeperDataset(force_reload=True)[0] assert g.num_nodes() == 10000 assert g.num_edges() == 78804 g2 = dgl.data.MinesweeperDataset(force_reload=True, transform=transform)[0] assert g2.num_edges() - g.num_edges() == g.num_nodes() @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Only supports PyTorch backend.", ) def test_tolokers(): transform = dgl.AddSelfLoop(allow_duplicate=True) g = dgl.data.TolokersDataset(force_reload=True)[0] assert g.num_nodes() == 11758 assert g.num_edges() == 1038000 g2 = dgl.data.TolokersDataset(force_reload=True, transform=transform)[0] assert g2.num_edges() - g.num_edges() == g.num_nodes() @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Only supports PyTorch backend.", ) def test_questions(): transform = dgl.AddSelfLoop(allow_duplicate=True) g = dgl.data.QuestionsDataset(force_reload=True)[0] assert g.num_nodes() == 48921 assert g.num_edges() == 307080 g2 = dgl.data.QuestionsDataset(force_reload=True, transform=transform)[0] assert g2.num_edges() - g.num_edges() == g.num_nodes() ================================================ FILE: tests/python/common/data/test_actor.py ================================================ import unittest import backend as F import dgl @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="only supports pytorch" ) def test_actor(): transform = dgl.AddSelfLoop(allow_duplicate=True) g = dgl.data.ActorDataset(force_reload=True)[0] assert g.num_nodes() == 7600 assert g.num_edges() == 33391 g2 = dgl.data.ActorDataset(force_reload=True, transform=transform)[0] assert g2.num_edges() - g.num_edges() == g.num_nodes() ================================================ FILE: tests/python/common/data/test_data.py ================================================ import gzip import io import os import tarfile import tempfile import unittest import warnings import backend as F import dgl import dgl.data as data import numpy as np import pandas as pd import pytest import yaml from dgl import DGLError @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") def test_minigc(): ds = data.MiniGCDataset(16, 10, 20) g, l = list(zip(*ds)) print(g, l) g1 = ds[0][0] transform = dgl.AddSelfLoop(allow_duplicate=True) ds = data.MiniGCDataset(16, 10, 20, transform=transform) g2 = ds[0][0] assert g2.num_edges() - g1.num_edges() == g1.num_nodes() @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") def test_gin(): ds_n_graphs = { "MUTAG": 188, "IMDBBINARY": 1000, "IMDBMULTI": 1500, "PROTEINS": 1113, "PTC": 344, } transform = dgl.AddSelfLoop(allow_duplicate=True) for name, n_graphs in ds_n_graphs.items(): ds = data.GINDataset(name, self_loop=False, degree_as_nlabel=False) assert len(ds) == n_graphs, (len(ds), name) g1 = ds[0][0] ds = data.GINDataset( name, self_loop=False, degree_as_nlabel=False, transform=transform ) g2 = ds[0][0] assert g2.num_edges() - g1.num_edges() == g1.num_nodes() assert ds.num_classes == ds.gclasses @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") def test_fraud(): transform = dgl.AddSelfLoop(allow_duplicate=True) g = data.FraudDataset("amazon")[0] assert g.num_nodes() == 11944 num_edges1 = g.num_edges() g2 = data.FraudDataset("amazon", transform=transform)[0] # 3 edge types assert g2.num_edges() - num_edges1 == g.num_nodes() * 3 g = data.FraudAmazonDataset()[0] assert g.num_nodes() == 11944 g2 = data.FraudAmazonDataset(transform=transform)[0] # 3 edge types assert g2.num_edges() - g.num_edges() == g.num_nodes() * 3 g = data.FraudYelpDataset()[0] assert g.num_nodes() == 45954 g2 = data.FraudYelpDataset(transform=transform)[0] # 3 edge types assert g2.num_edges() - g.num_edges() == g.num_nodes() * 3 @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") def test_tudataset_regression(): ds = data.TUDataset("ZINC_test", force_reload=True) assert ds.num_classes == ds.num_labels assert len(ds) == 5000 g = ds[0][0] transform = dgl.AddSelfLoop(allow_duplicate=True) ds = data.TUDataset("ZINC_test", force_reload=True, transform=transform) g2 = ds[0][0] assert g2.num_edges() - g.num_edges() == g.num_nodes() @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") def test_data_hash(): class HashTestDataset(data.DGLDataset): def __init__(self, hash_key=()): super(HashTestDataset, self).__init__("hashtest", hash_key=hash_key) def _load(self): pass a = HashTestDataset((True, 0, "1", (1, 2, 3))) b = HashTestDataset((True, 0, "1", (1, 2, 3))) c = HashTestDataset((True, 0, "1", (1, 2, 4))) assert a.hash == b.hash assert a.hash != c.hash @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") def test_citation_graph(): transform = dgl.AddSelfLoop(allow_duplicate=True) # cora g = data.CoraGraphDataset(force_reload=True, reorder=True)[0] assert g.num_nodes() == 2708 assert g.num_edges() == 10556 dst = F.asnumpy(g.edges()[1]) assert np.array_equal(dst, np.sort(dst)) g2 = data.CoraGraphDataset(transform=transform)[0] assert g2.num_edges() - g.num_edges() == g.num_nodes() # Citeseer g = data.CiteseerGraphDataset(force_reload=True, reorder=True)[0] assert g.num_nodes() == 3327 assert g.num_edges() == 9228 dst = F.asnumpy(g.edges()[1]) assert np.array_equal(dst, np.sort(dst)) g2 = data.CiteseerGraphDataset(transform=transform)[0] assert g2.num_edges() - g.num_edges() == g.num_nodes() # Pubmed g = data.PubmedGraphDataset(force_reload=True, reorder=True)[0] assert g.num_nodes() == 19717 assert g.num_edges() == 88651 dst = F.asnumpy(g.edges()[1]) assert np.array_equal(dst, np.sort(dst)) g2 = data.PubmedGraphDataset(transform=transform)[0] assert g2.num_edges() - g.num_edges() == g.num_nodes() @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") def test_gnn_benchmark(): transform = dgl.AddSelfLoop(allow_duplicate=True) # AmazonCoBuyComputerDataset g = data.AmazonCoBuyComputerDataset()[0] assert g.num_nodes() == 13752 assert g.num_edges() == 491722 dst = F.asnumpy(g.edges()[1]) assert np.array_equal(dst, np.sort(dst)) g2 = data.AmazonCoBuyComputerDataset(transform=transform)[0] assert g2.num_edges() - g.num_edges() == g.num_nodes() # AmazonCoBuyPhotoDataset g = data.AmazonCoBuyPhotoDataset()[0] assert g.num_nodes() == 7650 assert g.num_edges() == 238163 dst = F.asnumpy(g.edges()[1]) assert np.array_equal(dst, np.sort(dst)) g2 = data.AmazonCoBuyPhotoDataset(transform=transform)[0] assert g2.num_edges() - g.num_edges() == g.num_nodes() # CoauthorPhysicsDataset g = data.CoauthorPhysicsDataset()[0] assert g.num_nodes() == 34493 assert g.num_edges() == 495924 dst = F.asnumpy(g.edges()[1]) assert np.array_equal(dst, np.sort(dst)) g2 = data.CoauthorPhysicsDataset(transform=transform)[0] assert g2.num_edges() - g.num_edges() == g.num_nodes() # CoauthorCSDataset g = data.CoauthorCSDataset()[0] assert g.num_nodes() == 18333 assert g.num_edges() == 163788 dst = F.asnumpy(g.edges()[1]) assert np.array_equal(dst, np.sort(dst)) g2 = data.CoauthorCSDataset(transform=transform)[0] assert g2.num_edges() - g.num_edges() == g.num_nodes() # CoraFullDataset g = data.CoraFullDataset()[0] assert g.num_nodes() == 19793 assert g.num_edges() == 126842 dst = F.asnumpy(g.edges()[1]) assert np.array_equal(dst, np.sort(dst)) g2 = data.CoraFullDataset(transform=transform)[0] assert g2.num_edges() - g.num_edges() == g.num_nodes() @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") def test_explain_syn(): dataset = data.BAShapeDataset() assert dataset.num_classes == 4 g = dataset[0] assert "label" in g.ndata assert "feat" in g.ndata g1 = data.BAShapeDataset(force_reload=True, seed=0)[0] src1, dst1 = g1.edges() g2 = data.BAShapeDataset(force_reload=True, seed=0)[0] src2, dst2 = g2.edges() assert F.allclose(src1, src2) assert F.allclose(dst1, dst2) dataset = data.BACommunityDataset() assert dataset.num_classes == 8 g = dataset[0] assert "label" in g.ndata assert "feat" in g.ndata g1 = data.BACommunityDataset(force_reload=True, seed=0)[0] src1, dst1 = g1.edges() g2 = data.BACommunityDataset(force_reload=True, seed=0)[0] src2, dst2 = g2.edges() assert F.allclose(src1, src2) assert F.allclose(dst1, dst2) dataset = data.TreeCycleDataset() assert dataset.num_classes == 2 g = dataset[0] assert "label" in g.ndata assert "feat" in g.ndata g1 = data.TreeCycleDataset(force_reload=True, seed=0)[0] src1, dst1 = g1.edges() g2 = data.TreeCycleDataset(force_reload=True, seed=0)[0] src2, dst2 = g2.edges() assert F.allclose(src1, src2) assert F.allclose(dst1, dst2) dataset = data.TreeGridDataset() assert dataset.num_classes == 2 g = dataset[0] assert "label" in g.ndata assert "feat" in g.ndata g1 = data.TreeGridDataset(force_reload=True, seed=0)[0] src1, dst1 = g1.edges() g2 = data.TreeGridDataset(force_reload=True, seed=0)[0] src2, dst2 = g2.edges() assert F.allclose(src1, src2) assert F.allclose(dst1, dst2) dataset = data.BA2MotifDataset() assert dataset.num_classes == 2 g, label = dataset[0] assert "feat" in g.ndata @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") def test_wiki_cs(): g = data.WikiCSDataset()[0] assert g.num_nodes() == 11701 assert g.num_edges() == 431726 dst = F.asnumpy(g.edges()[1]) assert np.array_equal(dst, np.sort(dst)) transform = dgl.AddSelfLoop(allow_duplicate=True) g2 = data.WikiCSDataset(transform=transform)[0] assert g2.num_edges() - g.num_edges() == g.num_nodes() @unittest.skip(reason="Dataset too large to download for the latest CI.") @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") def test_yelp(): g = data.YelpDataset(reorder=True)[0] assert g.num_nodes() == 716847 assert g.num_edges() == 13954819 dst = F.asnumpy(g.edges()[1]) assert np.array_equal(dst, np.sort(dst)) transform = dgl.AddSelfLoop(allow_duplicate=True) g2 = data.YelpDataset(reorder=True, transform=transform)[0] assert g2.num_edges() - g.num_edges() == g.num_nodes() @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") def test_flickr(): g = data.FlickrDataset(reorder=True)[0] assert g.num_nodes() == 89250 assert g.num_edges() == 899756 dst = F.asnumpy(g.edges()[1]) assert np.array_equal(dst, np.sort(dst)) transform = dgl.AddSelfLoop(allow_duplicate=True) g2 = data.FlickrDataset(reorder=True, transform=transform)[0] assert g2.num_edges() - g.num_edges() == g.num_nodes() @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") def test_pattern(): mode_n_graphs = { "train": 10000, "valid": 2000, "test": 2000, } transform = dgl.AddSelfLoop(allow_duplicate=True) for mode, n_graphs in mode_n_graphs.items(): ds = data.PATTERNDataset(mode=mode) assert len(ds) == n_graphs, (len(ds), mode) g1 = ds[0] ds = data.PATTERNDataset(mode=mode, transform=transform) g2 = ds[0] assert g2.num_edges() - g1.num_edges() == g1.num_nodes() assert ds.num_classes == 2 @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") def test_cluster(): mode_n_graphs = { "train": 10000, "valid": 1000, "test": 1000, } transform = dgl.AddSelfLoop(allow_duplicate=True) for mode, n_graphs in mode_n_graphs.items(): ds = data.CLUSTERDataset(mode=mode) assert len(ds) == n_graphs, (len(ds), mode) g1 = ds[0] ds = data.CLUSTERDataset(mode=mode, transform=transform) g2 = ds[0] assert g2.num_edges() - g1.num_edges() == g1.num_nodes() assert ds.num_classes == 6 @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="only supports pytorch" ) def test_zinc(): mode_n_graphs = { "train": 10000, "valid": 1000, "test": 1000, } transform = dgl.AddSelfLoop(allow_duplicate=True) for mode, n_graphs in mode_n_graphs.items(): dataset1 = data.ZINCDataset(mode=mode) g1, label = dataset1[0] dataset2 = data.ZINCDataset(mode=mode, transform=transform) g2, _ = dataset2[0] assert g2.num_edges() - g1.num_edges() == g1.num_nodes() # return a scalar tensor assert not label.shape @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") def test_extract_archive(): # gzip with tempfile.TemporaryDirectory() as src_dir: gz_file = "gz_archive" gz_path = os.path.join(src_dir, gz_file + ".gz") content = b"test extract archive gzip" with gzip.open(gz_path, "wb") as f: f.write(content) with tempfile.TemporaryDirectory() as dst_dir: data.utils.extract_archive(gz_path, dst_dir, overwrite=True) assert os.path.exists(os.path.join(dst_dir, gz_file)) # tar with tempfile.TemporaryDirectory() as src_dir: tar_file = "tar_archive" tar_path = os.path.join(src_dir, tar_file + ".tar") # default encode to utf8 content = "test extract archive tar\n".encode() info = tarfile.TarInfo(name="tar_archive") info.size = len(content) with tarfile.open(tar_path, "w") as f: f.addfile(info, io.BytesIO(content)) with tempfile.TemporaryDirectory() as dst_dir: data.utils.extract_archive(tar_path, dst_dir, overwrite=True) assert os.path.exists(os.path.join(dst_dir, tar_file)) def _test_construct_graphs_node_ids(): from dgl.data.csv_dataset_base import ( DGLGraphConstructor, EdgeData, NodeData, ) num_nodes = 100 num_edges = 1000 # node IDs are required to be unique node_ids = np.random.choice(np.arange(num_nodes / 2), num_nodes) src_ids = np.random.choice(node_ids, size=num_edges) dst_ids = np.random.choice(node_ids, size=num_edges) node_data = NodeData(node_ids, {}) edge_data = EdgeData(src_ids, dst_ids, {}) expect_except = False try: _, _ = DGLGraphConstructor.construct_graphs(node_data, edge_data) except: expect_except = True assert expect_except # node IDs are already labelled from 0~num_nodes-1 node_ids = np.arange(num_nodes) np.random.shuffle(node_ids) _, idx = np.unique(node_ids, return_index=True) src_ids = np.random.choice(node_ids, size=num_edges) dst_ids = np.random.choice(node_ids, size=num_edges) node_feat = np.random.rand(num_nodes, 3) node_data = NodeData(node_ids, {"feat": node_feat}) edge_data = EdgeData(src_ids, dst_ids, {}) graphs, data_dict = DGLGraphConstructor.construct_graphs( node_data, edge_data ) assert len(graphs) == 1 assert len(data_dict) == 0 g = graphs[0] assert g.is_homogeneous assert g.num_nodes() == len(node_ids) assert g.num_edges() == len(src_ids) assert F.array_equal( F.tensor(node_feat[idx], dtype=F.float32), g.ndata["feat"] ) # node IDs are mixed with numeric and non-numeric values # homogeneous graph node_ids = [1, 2, 3, "a"] src_ids = [1, 2, 3] dst_ids = ["a", 1, 2] node_data = NodeData(node_ids, {}) edge_data = EdgeData(src_ids, dst_ids, {}) graphs, data_dict = DGLGraphConstructor.construct_graphs( node_data, edge_data ) assert len(graphs) == 1 assert len(data_dict) == 0 g = graphs[0] assert g.is_homogeneous assert g.num_nodes() == len(node_ids) assert g.num_edges() == len(src_ids) # heterogeneous graph node_ids_user = [1, 2, 3] node_ids_item = ["a", "b", "c"] src_ids = node_ids_user dst_ids = node_ids_item node_data_user = NodeData(node_ids_user, {}, type="user") node_data_item = NodeData(node_ids_item, {}, type="item") edge_data = EdgeData(src_ids, dst_ids, {}, type=("user", "like", "item")) graphs, data_dict = DGLGraphConstructor.construct_graphs( [node_data_user, node_data_item], edge_data ) assert len(graphs) == 1 assert len(data_dict) == 0 g = graphs[0] assert not g.is_homogeneous assert g.num_nodes("user") == len(node_ids_user) assert g.num_nodes("item") == len(node_ids_item) assert g.num_edges() == len(src_ids) def _test_construct_graphs_homo(): from dgl.data.csv_dataset_base import ( DGLGraphConstructor, EdgeData, NodeData, ) # node_id could be non-sorted, non-numeric. num_nodes = 100 num_edges = 1000 num_dims = 3 node_ids = np.random.choice( np.arange(num_nodes * 2), size=num_nodes, replace=False ) assert len(node_ids) == num_nodes # to be non-sorted np.random.shuffle(node_ids) # to be non-numeric node_ids = ["id_{}".format(id) for id in node_ids] t_ndata = { "feat": np.random.rand(num_nodes, num_dims), "label": np.random.randint(2, size=num_nodes), } _, u_indices = np.unique(node_ids, return_index=True) ndata = { "feat": t_ndata["feat"][u_indices], "label": t_ndata["label"][u_indices], } node_data = NodeData(node_ids, t_ndata) src_ids = np.random.choice(node_ids, size=num_edges) dst_ids = np.random.choice(node_ids, size=num_edges) edata = { "feat": np.random.rand(num_edges, num_dims), "label": np.random.randint(2, size=num_edges), } edge_data = EdgeData(src_ids, dst_ids, edata) graphs, data_dict = DGLGraphConstructor.construct_graphs( node_data, edge_data ) assert len(graphs) == 1 assert len(data_dict) == 0 g = graphs[0] assert g.is_homogeneous assert g.num_nodes() == num_nodes assert g.num_edges() == num_edges def assert_data(lhs, rhs): for key, value in lhs.items(): assert key in rhs assert F.dtype(rhs[key]) != F.float64 assert F.array_equal( F.tensor(value, dtype=F.dtype(rhs[key])), rhs[key] ) assert_data(ndata, g.ndata) assert_data(edata, g.edata) def _test_construct_graphs_hetero(): from dgl.data.csv_dataset_base import ( DGLGraphConstructor, EdgeData, NodeData, ) # node_id/src_id/dst_id could be non-sorted, duplicated, non-numeric. num_nodes = 100 num_edges = 1000 num_dims = 3 ntypes = ["user", "item"] node_data = [] node_ids_dict = {} ndata_dict = {} for ntype in ntypes: node_ids = np.random.choice( np.arange(num_nodes * 2), size=num_nodes, replace=False ) assert len(node_ids) == num_nodes # to be non-sorted np.random.shuffle(node_ids) # to be non-numeric node_ids = ["id_{}".format(id) for id in node_ids] t_ndata = { "feat": np.random.rand(num_nodes, num_dims), "label": np.random.randint(2, size=num_nodes), } _, u_indices = np.unique(node_ids, return_index=True) ndata = { "feat": t_ndata["feat"][u_indices], "label": t_ndata["label"][u_indices], } node_data.append(NodeData(node_ids, t_ndata, type=ntype)) node_ids_dict[ntype] = node_ids ndata_dict[ntype] = ndata etypes = [("user", "follow", "user"), ("user", "like", "item")] edge_data = [] edata_dict = {} for src_type, e_type, dst_type in etypes: src_ids = np.random.choice(node_ids_dict[src_type], size=num_edges) dst_ids = np.random.choice(node_ids_dict[dst_type], size=num_edges) edata = { "feat": np.random.rand(num_edges, num_dims), "label": np.random.randint(2, size=num_edges), } edge_data.append( EdgeData(src_ids, dst_ids, edata, type=(src_type, e_type, dst_type)) ) edata_dict[(src_type, e_type, dst_type)] = edata graphs, data_dict = DGLGraphConstructor.construct_graphs( node_data, edge_data ) assert len(graphs) == 1 assert len(data_dict) == 0 g = graphs[0] assert not g.is_homogeneous assert g.num_nodes() == num_nodes * len(ntypes) assert g.num_edges() == num_edges * len(etypes) def assert_data(lhs, rhs): for key, value in lhs.items(): assert key in rhs assert F.dtype(rhs[key]) != F.float64 assert F.array_equal( F.tensor(value, dtype=F.dtype(rhs[key])), rhs[key] ) for ntype in g.ntypes: assert g.num_nodes(ntype) == num_nodes assert_data(ndata_dict[ntype], g.nodes[ntype].data) for etype in g.canonical_etypes: assert g.num_edges(etype) == num_edges assert_data(edata_dict[etype], g.edges[etype].data) def _test_construct_graphs_multiple(): from dgl.data.csv_dataset_base import ( DGLGraphConstructor, EdgeData, GraphData, NodeData, ) num_nodes = 100 num_edges = 1000 num_graphs = 10 num_dims = 3 node_ids = np.array([], dtype=int) src_ids = np.array([], dtype=int) dst_ids = np.array([], dtype=int) ngraph_ids = np.array([], dtype=int) egraph_ids = np.array([], dtype=int) u_indices = np.array([], dtype=int) for i in range(num_graphs): l_node_ids = np.random.choice( np.arange(num_nodes * 2), size=num_nodes, replace=False ) node_ids = np.append(node_ids, l_node_ids) _, l_u_indices = np.unique(l_node_ids, return_index=True) u_indices = np.append(u_indices, l_u_indices) ngraph_ids = np.append(ngraph_ids, np.full(num_nodes, i)) src_ids = np.append( src_ids, np.random.choice(l_node_ids, size=num_edges) ) dst_ids = np.append( dst_ids, np.random.choice(l_node_ids, size=num_edges) ) egraph_ids = np.append(egraph_ids, np.full(num_edges, i)) ndata = { "feat": np.random.rand(num_nodes * num_graphs, num_dims), "label": np.random.randint(2, size=num_nodes * num_graphs), } ngraph_ids = ["graph_{}".format(id) for id in ngraph_ids] node_data = NodeData(node_ids, ndata, graph_id=ngraph_ids) egraph_ids = ["graph_{}".format(id) for id in egraph_ids] edata = { "feat": np.random.rand(num_edges * num_graphs, num_dims), "label": np.random.randint(2, size=num_edges * num_graphs), } edge_data = EdgeData(src_ids, dst_ids, edata, graph_id=egraph_ids) gdata = { "feat": np.random.rand(num_graphs, num_dims), "label": np.random.randint(2, size=num_graphs), } graph_ids = ["graph_{}".format(id) for id in np.arange(num_graphs)] graph_data = GraphData(graph_ids, gdata) graphs, data_dict = DGLGraphConstructor.construct_graphs( node_data, edge_data, graph_data ) assert len(graphs) == num_graphs assert len(data_dict) == len(gdata) for k, v in data_dict.items(): assert F.dtype(v) != F.float64 assert F.array_equal( F.reshape(F.tensor(gdata[k], dtype=F.dtype(v)), (len(graphs), -1)), v, ) for i, g in enumerate(graphs): assert g.is_homogeneous assert g.num_nodes() == num_nodes assert g.num_edges() == num_edges def assert_data(lhs, rhs, size, node=False): for key, value in lhs.items(): assert key in rhs value = value[i * size : (i + 1) * size] if node: indices = u_indices[i * size : (i + 1) * size] value = value[indices] assert F.dtype(rhs[key]) != F.float64 assert F.array_equal( F.tensor(value, dtype=F.dtype(rhs[key])), rhs[key] ) assert_data(ndata, g.ndata, num_nodes, node=True) assert_data(edata, g.edata, num_edges) # Graph IDs found in node/edge CSV but not in graph CSV graph_data = GraphData(np.arange(num_graphs - 2), {}) expect_except = False try: _, _ = DGLGraphConstructor.construct_graphs( node_data, edge_data, graph_data ) except: expect_except = True assert expect_except def _get_data_table(data_frame, save_index=False): from dgl.data.csv_dataset_base import DefaultDataParser with tempfile.TemporaryDirectory() as test_dir: csv_path = os.path.join(test_dir, "nodes.csv") data_frame.to_csv(csv_path, index=save_index) dp = DefaultDataParser() df = pd.read_csv(csv_path) # Warning suppression : "Untitled column found. Ignored...", # which appears when a CSV file is saved with an index: # data_frame.to_csv(csv_path, index=True). with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) return dp(df) def _test_DefaultDataParser(): # common csv num_nodes = 5 num_labels = 3 num_dims = 2 node_id = np.arange(num_nodes) label = np.random.randint(num_labels, size=num_nodes) feat = np.random.rand(num_nodes, num_dims) df = pd.DataFrame( { "node_id": node_id, "label": label, "feat": [line.tolist() for line in feat], } ) dt = _get_data_table(df) assert np.array_equal(node_id, dt["node_id"]) assert np.array_equal(label, dt["label"]) assert np.array_equal(feat, dt["feat"]) # string consists of non-numeric values df = pd.DataFrame({"label": ["a", "b", "c"]}) expect_except = False try: _get_data_table(df) except: expect_except = True assert expect_except # csv has index column which is ignored as it's unnamed df = pd.DataFrame({"label": [1, 2, 3]}) dt = _get_data_table(df, True) assert len(dt) == 1 def _test_load_yaml_with_sanity_check(): from dgl.data.csv_dataset_base import load_yaml_with_sanity_check with tempfile.TemporaryDirectory() as test_dir: yaml_path = os.path.join(test_dir, "meta.yaml") # workable but meaningless usually yaml_data = { "dataset_name": "default", "node_data": [], "edge_data": [], } with open(yaml_path, "w") as f: yaml.dump(yaml_data, f, sort_keys=False) meta = load_yaml_with_sanity_check(yaml_path) assert meta.version == "1.0.0" assert meta.dataset_name == "default" assert meta.separator == "," assert len(meta.node_data) == 0 assert len(meta.edge_data) == 0 assert meta.graph_data is None # minimum with required fields only yaml_data = { "version": "1.0.0", "dataset_name": "default", "node_data": [{"file_name": "nodes.csv"}], "edge_data": [{"file_name": "edges.csv"}], } with open(yaml_path, "w") as f: yaml.dump(yaml_data, f, sort_keys=False) meta = load_yaml_with_sanity_check(yaml_path) for ndata in meta.node_data: assert ndata.file_name == "nodes.csv" assert ndata.ntype == "_V" assert ndata.graph_id_field == "graph_id" assert ndata.node_id_field == "node_id" for edata in meta.edge_data: assert edata.file_name == "edges.csv" assert edata.etype == ["_V", "_E", "_V"] assert edata.graph_id_field == "graph_id" assert edata.src_id_field == "src_id" assert edata.dst_id_field == "dst_id" # optional fields are specified yaml_data = { "version": "1.0.0", "dataset_name": "default", "separator": "|", "node_data": [ { "file_name": "nodes.csv", "ntype": "user", "graph_id_field": "xxx", "node_id_field": "xxx", } ], "edge_data": [ { "file_name": "edges.csv", "etype": ["user", "follow", "user"], "graph_id_field": "xxx", "src_id_field": "xxx", "dst_id_field": "xxx", } ], "graph_data": {"file_name": "graph.csv", "graph_id_field": "xxx"}, } with open(yaml_path, "w") as f: yaml.dump(yaml_data, f, sort_keys=False) meta = load_yaml_with_sanity_check(yaml_path) assert len(meta.node_data) == 1 ndata = meta.node_data[0] assert ndata.ntype == "user" assert ndata.graph_id_field == "xxx" assert ndata.node_id_field == "xxx" assert len(meta.edge_data) == 1 edata = meta.edge_data[0] assert edata.etype == ["user", "follow", "user"] assert edata.graph_id_field == "xxx" assert edata.src_id_field == "xxx" assert edata.dst_id_field == "xxx" assert meta.graph_data is not None assert meta.graph_data.file_name == "graph.csv" assert meta.graph_data.graph_id_field == "xxx" # some required fields are missing yaml_data = { "dataset_name": "default", "node_data": [], "edge_data": [], } for field in yaml_data.keys(): ydata = {k: v for k, v in yaml_data.items()} ydata.pop(field) with open(yaml_path, "w") as f: yaml.dump(ydata, f, sort_keys=False) expect_except = False try: meta = load_yaml_with_sanity_check(yaml_path) except: expect_except = True assert expect_except # inapplicable version yaml_data = { "version": "0.0.0", "dataset_name": "default", "node_data": [{"file_name": "nodes_0.csv"}], "edge_data": [{"file_name": "edges_0.csv"}], } with open(yaml_path, "w") as f: yaml.dump(yaml_data, f, sort_keys=False) expect_except = False try: meta = load_yaml_with_sanity_check(yaml_path) except DGLError: expect_except = True assert expect_except # duplicate node types yaml_data = { "version": "1.0.0", "dataset_name": "default", "node_data": [ {"file_name": "nodes.csv"}, {"file_name": "nodes.csv"}, ], "edge_data": [{"file_name": "edges.csv"}], } with open(yaml_path, "w") as f: yaml.dump(yaml_data, f, sort_keys=False) expect_except = False try: meta = load_yaml_with_sanity_check(yaml_path) except DGLError: expect_except = True assert expect_except # duplicate edge types yaml_data = { "version": "1.0.0", "dataset_name": "default", "node_data": [{"file_name": "nodes.csv"}], "edge_data": [ {"file_name": "edges.csv"}, {"file_name": "edges.csv"}, ], } with open(yaml_path, "w") as f: yaml.dump(yaml_data, f, sort_keys=False) expect_except = False try: meta = load_yaml_with_sanity_check(yaml_path) except DGLError: expect_except = True assert expect_except def _test_load_node_data_from_csv(): from dgl.data.csv_dataset_base import DefaultDataParser, MetaNode, NodeData with tempfile.TemporaryDirectory() as test_dir: num_nodes = 100 # minimum df = pd.DataFrame({"node_id": np.arange(num_nodes)}) csv_path = os.path.join(test_dir, "nodes.csv") df.to_csv(csv_path, index=False) meta_node = MetaNode(file_name=csv_path) node_data = NodeData.load_from_csv(meta_node, DefaultDataParser()) assert np.array_equal(df["node_id"], node_data.id) assert len(node_data.data) == 0 # common case df = pd.DataFrame( { "node_id": np.arange(num_nodes), "label": np.random.randint(3, size=num_nodes), } ) csv_path = os.path.join(test_dir, "nodes.csv") df.to_csv(csv_path, index=False) meta_node = MetaNode(file_name=csv_path) node_data = NodeData.load_from_csv(meta_node, DefaultDataParser()) assert np.array_equal(df["node_id"], node_data.id) assert len(node_data.data) == 1 assert np.array_equal(df["label"], node_data.data["label"]) assert np.array_equal(np.full(num_nodes, 0), node_data.graph_id) assert node_data.type == "_V" # add more fields into nodes.csv df = pd.DataFrame( { "node_id": np.arange(num_nodes), "label": np.random.randint(3, size=num_nodes), "graph_id": np.full(num_nodes, 1), } ) csv_path = os.path.join(test_dir, "nodes.csv") df.to_csv(csv_path, index=False) meta_node = MetaNode(file_name=csv_path) node_data = NodeData.load_from_csv(meta_node, DefaultDataParser()) assert np.array_equal(df["node_id"], node_data.id) assert len(node_data.data) == 1 assert np.array_equal(df["label"], node_data.data["label"]) assert np.array_equal(df["graph_id"], node_data.graph_id) assert node_data.type == "_V" # required header is missing df = pd.DataFrame({"label": np.random.randint(3, size=num_nodes)}) csv_path = os.path.join(test_dir, "nodes.csv") df.to_csv(csv_path, index=False) meta_node = MetaNode(file_name=csv_path) expect_except = False try: NodeData.load_from_csv(meta_node, DefaultDataParser()) except: expect_except = True assert expect_except def _test_load_edge_data_from_csv(): from dgl.data.csv_dataset_base import DefaultDataParser, EdgeData, MetaEdge with tempfile.TemporaryDirectory() as test_dir: num_nodes = 100 num_edges = 1000 # minimum df = pd.DataFrame( { "src_id": np.random.randint(num_nodes, size=num_edges), "dst_id": np.random.randint(num_nodes, size=num_edges), } ) csv_path = os.path.join(test_dir, "edges.csv") df.to_csv(csv_path, index=False) meta_edge = MetaEdge(file_name=csv_path) edge_data = EdgeData.load_from_csv(meta_edge, DefaultDataParser()) assert np.array_equal(df["src_id"], edge_data.src) assert np.array_equal(df["dst_id"], edge_data.dst) assert len(edge_data.data) == 0 # common case df = pd.DataFrame( { "src_id": np.random.randint(num_nodes, size=num_edges), "dst_id": np.random.randint(num_nodes, size=num_edges), "label": np.random.randint(3, size=num_edges), } ) csv_path = os.path.join(test_dir, "edges.csv") df.to_csv(csv_path, index=False) meta_edge = MetaEdge(file_name=csv_path) edge_data = EdgeData.load_from_csv(meta_edge, DefaultDataParser()) assert np.array_equal(df["src_id"], edge_data.src) assert np.array_equal(df["dst_id"], edge_data.dst) assert len(edge_data.data) == 1 assert np.array_equal(df["label"], edge_data.data["label"]) assert np.array_equal(np.full(num_edges, 0), edge_data.graph_id) assert edge_data.type == ("_V", "_E", "_V") # add more fields into edges.csv df = pd.DataFrame( { "src_id": np.random.randint(num_nodes, size=num_edges), "dst_id": np.random.randint(num_nodes, size=num_edges), "graph_id": np.arange(num_edges), "feat": np.random.randint(3, size=num_edges), "label": np.random.randint(3, size=num_edges), } ) csv_path = os.path.join(test_dir, "edges.csv") df.to_csv(csv_path, index=False) meta_edge = MetaEdge(file_name=csv_path) edge_data = EdgeData.load_from_csv(meta_edge, DefaultDataParser()) assert np.array_equal(df["src_id"], edge_data.src) assert np.array_equal(df["dst_id"], edge_data.dst) assert len(edge_data.data) == 2 assert np.array_equal(df["feat"], edge_data.data["feat"]) assert np.array_equal(df["label"], edge_data.data["label"]) assert np.array_equal(df["graph_id"], edge_data.graph_id) assert edge_data.type == ("_V", "_E", "_V") # required headers are missing df = pd.DataFrame( {"src_id": np.random.randint(num_nodes, size=num_edges)} ) csv_path = os.path.join(test_dir, "edges.csv") df.to_csv(csv_path, index=False) meta_edge = MetaEdge(file_name=csv_path) expect_except = False try: EdgeData.load_from_csv(meta_edge, DefaultDataParser()) except DGLError: expect_except = True assert expect_except df = pd.DataFrame( {"dst_id": np.random.randint(num_nodes, size=num_edges)} ) csv_path = os.path.join(test_dir, "edges.csv") df.to_csv(csv_path, index=False) meta_edge = MetaEdge(file_name=csv_path) expect_except = False try: EdgeData.load_from_csv(meta_edge, DefaultDataParser()) except DGLError: expect_except = True assert expect_except def _test_load_graph_data_from_csv(): from dgl.data.csv_dataset_base import ( DefaultDataParser, GraphData, MetaGraph, ) with tempfile.TemporaryDirectory() as test_dir: num_graphs = 100 # minimum df = pd.DataFrame({"graph_id": np.arange(num_graphs)}) csv_path = os.path.join(test_dir, "graph.csv") df.to_csv(csv_path, index=False) meta_graph = MetaGraph(file_name=csv_path) graph_data = GraphData.load_from_csv(meta_graph, DefaultDataParser()) assert np.array_equal(df["graph_id"], graph_data.graph_id) assert len(graph_data.data) == 0 # common case df = pd.DataFrame( { "graph_id": np.arange(num_graphs), "label": np.random.randint(3, size=num_graphs), } ) csv_path = os.path.join(test_dir, "graph.csv") df.to_csv(csv_path, index=False) meta_graph = MetaGraph(file_name=csv_path) graph_data = GraphData.load_from_csv(meta_graph, DefaultDataParser()) assert np.array_equal(df["graph_id"], graph_data.graph_id) assert len(graph_data.data) == 1 assert np.array_equal(df["label"], graph_data.data["label"]) # add more fields into graph.csv df = pd.DataFrame( { "graph_id": np.arange(num_graphs), "feat": np.random.randint(3, size=num_graphs), "label": np.random.randint(3, size=num_graphs), } ) csv_path = os.path.join(test_dir, "graph.csv") df.to_csv(csv_path, index=False) meta_graph = MetaGraph(file_name=csv_path) graph_data = GraphData.load_from_csv(meta_graph, DefaultDataParser()) assert np.array_equal(df["graph_id"], graph_data.graph_id) assert len(graph_data.data) == 2 assert np.array_equal(df["feat"], graph_data.data["feat"]) assert np.array_equal(df["label"], graph_data.data["label"]) # required header is missing df = pd.DataFrame({"label": np.random.randint(3, size=num_graphs)}) csv_path = os.path.join(test_dir, "graph.csv") df.to_csv(csv_path, index=False) meta_graph = MetaGraph(file_name=csv_path) expect_except = False try: GraphData.load_from_csv(meta_graph, DefaultDataParser()) except DGLError: expect_except = True assert expect_except def _test_CSVDataset_single(): with tempfile.TemporaryDirectory() as test_dir: # generate YAML/CSVs meta_yaml_path = os.path.join(test_dir, "meta.yaml") edges_csv_path_0 = os.path.join(test_dir, "test_edges_0.csv") edges_csv_path_1 = os.path.join(test_dir, "test_edges_1.csv") nodes_csv_path_0 = os.path.join(test_dir, "test_nodes_0.csv") nodes_csv_path_1 = os.path.join(test_dir, "test_nodes_1.csv") meta_yaml_data = { "version": "1.0.0", "dataset_name": "default_name", "node_data": [ { "file_name": os.path.basename(nodes_csv_path_0), "ntype": "user", }, { "file_name": os.path.basename(nodes_csv_path_1), "ntype": "item", }, ], "edge_data": [ { "file_name": os.path.basename(edges_csv_path_0), "etype": ["user", "follow", "user"], }, { "file_name": os.path.basename(edges_csv_path_1), "etype": ["user", "like", "item"], }, ], } with open(meta_yaml_path, "w") as f: yaml.dump(meta_yaml_data, f, sort_keys=False) num_nodes = 100 num_edges = 500 num_dims = 3 feat_ndata = np.random.rand(num_nodes, num_dims) label_ndata = np.random.randint(2, size=num_nodes) df = pd.DataFrame( { "node_id": np.arange(num_nodes), "label": label_ndata, "feat": [line.tolist() for line in feat_ndata], } ) df.to_csv(nodes_csv_path_0, index=False) df.to_csv(nodes_csv_path_1, index=False) feat_edata = np.random.rand(num_edges, num_dims) label_edata = np.random.randint(2, size=num_edges) df = pd.DataFrame( { "src_id": np.random.randint(num_nodes, size=num_edges), "dst_id": np.random.randint(num_nodes, size=num_edges), "label": label_edata, "feat": [line.tolist() for line in feat_edata], } ) df.to_csv(edges_csv_path_0, index=False) df.to_csv(edges_csv_path_1, index=False) # load CSVDataset for force_reload in [True, False]: if not force_reload: # remove original node data file to verify reload from cached files os.remove(nodes_csv_path_0) assert not os.path.exists(nodes_csv_path_0) csv_dataset = data.CSVDataset(test_dir, force_reload=force_reload) assert len(csv_dataset) == 1 g = csv_dataset[0] assert not g.is_homogeneous assert csv_dataset.has_cache() for ntype in g.ntypes: assert g.num_nodes(ntype) == num_nodes assert F.array_equal( F.tensor(feat_ndata, dtype=F.float32), g.nodes[ntype].data["feat"], ) assert np.array_equal( label_ndata, F.asnumpy(g.nodes[ntype].data["label"]) ) for etype in g.etypes: assert g.num_edges(etype) == num_edges assert F.array_equal( F.tensor(feat_edata, dtype=F.float32), g.edges[etype].data["feat"], ) assert np.array_equal( label_edata, F.asnumpy(g.edges[etype].data["label"]) ) def _test_CSVDataset_multiple(): with tempfile.TemporaryDirectory() as test_dir: # generate YAML/CSVs meta_yaml_path = os.path.join(test_dir, "meta.yaml") edges_csv_path_0 = os.path.join(test_dir, "test_edges_0.csv") edges_csv_path_1 = os.path.join(test_dir, "test_edges_1.csv") nodes_csv_path_0 = os.path.join(test_dir, "test_nodes_0.csv") nodes_csv_path_1 = os.path.join(test_dir, "test_nodes_1.csv") graph_csv_path = os.path.join(test_dir, "test_graph.csv") meta_yaml_data = { "version": "1.0.0", "dataset_name": "default_name", "node_data": [ { "file_name": os.path.basename(nodes_csv_path_0), "ntype": "user", }, { "file_name": os.path.basename(nodes_csv_path_1), "ntype": "item", }, ], "edge_data": [ { "file_name": os.path.basename(edges_csv_path_0), "etype": ["user", "follow", "user"], }, { "file_name": os.path.basename(edges_csv_path_1), "etype": ["user", "like", "item"], }, ], "graph_data": {"file_name": os.path.basename(graph_csv_path)}, } with open(meta_yaml_path, "w") as f: yaml.dump(meta_yaml_data, f, sort_keys=False) num_nodes = 100 num_edges = 500 num_graphs = 10 num_dims = 3 feat_ndata = np.random.rand(num_nodes * num_graphs, num_dims) label_ndata = np.random.randint(2, size=num_nodes * num_graphs) df = pd.DataFrame( { "node_id": np.hstack( [np.arange(num_nodes) for _ in range(num_graphs)] ), "label": label_ndata, "feat": [line.tolist() for line in feat_ndata], "graph_id": np.hstack( [np.full(num_nodes, i) for i in range(num_graphs)] ), } ) df.to_csv(nodes_csv_path_0, index=False) df.to_csv(nodes_csv_path_1, index=False) feat_edata = np.random.rand(num_edges * num_graphs, num_dims) label_edata = np.random.randint(2, size=num_edges * num_graphs) df = pd.DataFrame( { "src_id": np.hstack( [ np.random.randint(num_nodes, size=num_edges) for _ in range(num_graphs) ] ), "dst_id": np.hstack( [ np.random.randint(num_nodes, size=num_edges) for _ in range(num_graphs) ] ), "label": label_edata, "feat": [line.tolist() for line in feat_edata], "graph_id": np.hstack( [np.full(num_edges, i) for i in range(num_graphs)] ), } ) df.to_csv(edges_csv_path_0, index=False) df.to_csv(edges_csv_path_1, index=False) feat_gdata = np.random.rand(num_graphs, num_dims) label_gdata = np.random.randint(2, size=num_graphs) df = pd.DataFrame( { "label": label_gdata, "feat": [line.tolist() for line in feat_gdata], "graph_id": np.arange(num_graphs), } ) df.to_csv(graph_csv_path, index=False) # load CSVDataset with default node/edge/gdata_parser for force_reload in [True, False]: if not force_reload: # remove original node data file to verify reload from cached files os.remove(nodes_csv_path_0) assert not os.path.exists(nodes_csv_path_0) csv_dataset = data.CSVDataset(test_dir, force_reload=force_reload) assert len(csv_dataset) == num_graphs assert csv_dataset.has_cache() assert len(csv_dataset.data) == 2 assert "feat" in csv_dataset.data assert "label" in csv_dataset.data assert F.array_equal( F.tensor(feat_gdata, dtype=F.float32), csv_dataset.data["feat"] ) for i, (g, g_data) in enumerate(csv_dataset): assert not g.is_homogeneous assert F.asnumpy(g_data["label"]) == label_gdata[i] assert F.array_equal( g_data["feat"], F.tensor(feat_gdata[i], dtype=F.float32) ) for ntype in g.ntypes: assert g.num_nodes(ntype) == num_nodes assert F.array_equal( F.tensor( feat_ndata[i * num_nodes : (i + 1) * num_nodes], dtype=F.float32, ), g.nodes[ntype].data["feat"], ) assert np.array_equal( label_ndata[i * num_nodes : (i + 1) * num_nodes], F.asnumpy(g.nodes[ntype].data["label"]), ) for etype in g.etypes: assert g.num_edges(etype) == num_edges assert F.array_equal( F.tensor( feat_edata[i * num_edges : (i + 1) * num_edges], dtype=F.float32, ), g.edges[etype].data["feat"], ) assert np.array_equal( label_edata[i * num_edges : (i + 1) * num_edges], F.asnumpy(g.edges[etype].data["label"]), ) def _test_CSVDataset_customized_data_parser(): with tempfile.TemporaryDirectory() as test_dir: # generate YAML/CSVs meta_yaml_path = os.path.join(test_dir, "meta.yaml") edges_csv_path_0 = os.path.join(test_dir, "test_edges_0.csv") edges_csv_path_1 = os.path.join(test_dir, "test_edges_1.csv") nodes_csv_path_0 = os.path.join(test_dir, "test_nodes_0.csv") nodes_csv_path_1 = os.path.join(test_dir, "test_nodes_1.csv") graph_csv_path = os.path.join(test_dir, "test_graph.csv") meta_yaml_data = { "dataset_name": "default_name", "node_data": [ { "file_name": os.path.basename(nodes_csv_path_0), "ntype": "user", }, { "file_name": os.path.basename(nodes_csv_path_1), "ntype": "item", }, ], "edge_data": [ { "file_name": os.path.basename(edges_csv_path_0), "etype": ["user", "follow", "user"], }, { "file_name": os.path.basename(edges_csv_path_1), "etype": ["user", "like", "item"], }, ], "graph_data": {"file_name": os.path.basename(graph_csv_path)}, } with open(meta_yaml_path, "w") as f: yaml.dump(meta_yaml_data, f, sort_keys=False) num_nodes = 100 num_edges = 500 num_graphs = 10 label_ndata = np.random.randint(2, size=num_nodes * num_graphs) df = pd.DataFrame( { "node_id": np.hstack( [np.arange(num_nodes) for _ in range(num_graphs)] ), "label": label_ndata, "graph_id": np.hstack( [np.full(num_nodes, i) for i in range(num_graphs)] ), } ) df.to_csv(nodes_csv_path_0, index=False) df.to_csv(nodes_csv_path_1, index=False) label_edata = np.random.randint(2, size=num_edges * num_graphs) df = pd.DataFrame( { "src_id": np.hstack( [ np.random.randint(num_nodes, size=num_edges) for _ in range(num_graphs) ] ), "dst_id": np.hstack( [ np.random.randint(num_nodes, size=num_edges) for _ in range(num_graphs) ] ), "label": label_edata, "graph_id": np.hstack( [np.full(num_edges, i) for i in range(num_graphs)] ), } ) df.to_csv(edges_csv_path_0, index=False) df.to_csv(edges_csv_path_1, index=False) label_gdata = np.random.randint(2, size=num_graphs) df = pd.DataFrame( {"label": label_gdata, "graph_id": np.arange(num_graphs)} ) df.to_csv(graph_csv_path, index=False) class CustDataParser: def __call__(self, df): data = {} for header in df: dt = df[header].to_numpy().squeeze() if header == "label": dt += 2 data[header] = dt return data # load CSVDataset with customized node/edge/gdata_parser # specify via dict[ntype/etype, callable] csv_dataset = data.CSVDataset( test_dir, force_reload=True, ndata_parser={"user": CustDataParser()}, edata_parser={("user", "like", "item"): CustDataParser()}, gdata_parser=CustDataParser(), ) assert len(csv_dataset) == num_graphs assert len(csv_dataset.data) == 1 assert "label" in csv_dataset.data for i, (g, g_data) in enumerate(csv_dataset): assert not g.is_homogeneous assert F.asnumpy(g_data) == label_gdata[i] + 2 for ntype in g.ntypes: assert g.num_nodes(ntype) == num_nodes offset = 2 if ntype == "user" else 0 assert np.array_equal( label_ndata[i * num_nodes : (i + 1) * num_nodes] + offset, F.asnumpy(g.nodes[ntype].data["label"]), ) for etype in g.etypes: assert g.num_edges(etype) == num_edges offset = 2 if etype == "like" else 0 assert np.array_equal( label_edata[i * num_edges : (i + 1) * num_edges] + offset, F.asnumpy(g.edges[etype].data["label"]), ) # specify via callable csv_dataset = data.CSVDataset( test_dir, force_reload=True, ndata_parser=CustDataParser(), edata_parser=CustDataParser(), gdata_parser=CustDataParser(), ) assert len(csv_dataset) == num_graphs assert len(csv_dataset.data) == 1 assert "label" in csv_dataset.data for i, (g, g_data) in enumerate(csv_dataset): assert not g.is_homogeneous assert F.asnumpy(g_data) == label_gdata[i] + 2 for ntype in g.ntypes: assert g.num_nodes(ntype) == num_nodes offset = 2 assert np.array_equal( label_ndata[i * num_nodes : (i + 1) * num_nodes] + offset, F.asnumpy(g.nodes[ntype].data["label"]), ) for etype in g.etypes: assert g.num_edges(etype) == num_edges offset = 2 assert np.array_equal( label_edata[i * num_edges : (i + 1) * num_edges] + offset, F.asnumpy(g.edges[etype].data["label"]), ) def _test_NodeEdgeGraphData(): from dgl.data.csv_dataset_base import EdgeData, GraphData, NodeData # NodeData basics num_nodes = 100 node_ids = np.arange(num_nodes, dtype=float) ndata = NodeData(node_ids, {}) assert np.array_equal(ndata.id, node_ids) assert len(ndata.data) == 0 assert ndata.type == "_V" assert np.array_equal(ndata.graph_id, np.full(num_nodes, 0)) # NodeData more data = {"feat": np.random.rand(num_nodes, 3)} graph_id = np.arange(num_nodes) ndata = NodeData(node_ids, data, type="user", graph_id=graph_id) assert ndata.type == "user" assert np.array_equal(ndata.graph_id, graph_id) assert len(ndata.data) == len(data) for k, v in data.items(): assert k in ndata.data assert np.array_equal(ndata.data[k], v) # NodeData except expect_except = False try: NodeData( np.arange(num_nodes), {"feat": np.random.rand(num_nodes + 1, 3)}, graph_id=np.arange(num_nodes - 1), ) except: expect_except = True assert expect_except # EdgeData basics num_nodes = 100 num_edges = 1000 src_ids = np.random.randint(num_nodes, size=num_edges) dst_ids = np.random.randint(num_nodes, size=num_edges) edata = EdgeData(src_ids, dst_ids, {}) assert np.array_equal(edata.src, src_ids) assert np.array_equal(edata.dst, dst_ids) assert edata.type == ("_V", "_E", "_V") assert len(edata.data) == 0 assert np.array_equal(edata.graph_id, np.full(num_edges, 0)) # EdageData more src_ids = np.random.randint(num_nodes, size=num_edges).astype(float) dst_ids = np.random.randint(num_nodes, size=num_edges).astype(float) data = {"feat": np.random.rand(num_edges, 3)} etype = ("user", "like", "item") graph_ids = np.arange(num_edges) edata = EdgeData(src_ids, dst_ids, data, type=etype, graph_id=graph_ids) assert np.array_equal(edata.src, src_ids) assert np.array_equal(edata.dst, dst_ids) assert edata.type == etype assert len(edata.data) == len(data) for k, v in data.items(): assert k in edata.data assert np.array_equal(edata.data[k], v) assert np.array_equal(edata.graph_id, graph_ids) # EdgeData except expect_except = False try: EdgeData( np.arange(num_edges), np.arange(num_edges + 1), {"feat": np.random.rand(num_edges - 1, 3)}, graph_id=np.arange(num_edges + 2), ) except: expect_except = True assert expect_except # GraphData basics num_graphs = 10 graph_ids = np.arange(num_graphs) gdata = GraphData(graph_ids, {}) assert np.array_equal(gdata.graph_id, graph_ids) assert len(gdata.data) == 0 # GraphData more graph_ids = np.arange(num_graphs).astype(float) data = {"feat": np.random.rand(num_graphs, 3)} gdata = GraphData(graph_ids, data) assert np.array_equal(gdata.graph_id, graph_ids) assert len(gdata.data) == len(data) for k, v in data.items(): assert k in gdata.data assert np.array_equal(gdata.data[k], v) @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") @unittest.skipIf( dgl.backend.backend_name == "tensorflow", reason="Skip Tensorflow" ) def test_csvdataset(): _test_NodeEdgeGraphData() _test_construct_graphs_node_ids() _test_construct_graphs_homo() _test_construct_graphs_hetero() _test_construct_graphs_multiple() _test_DefaultDataParser() _test_load_yaml_with_sanity_check() _test_load_node_data_from_csv() _test_load_edge_data_from_csv() _test_load_graph_data_from_csv() _test_CSVDataset_single() _test_CSVDataset_multiple() _test_CSVDataset_customized_data_parser() @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") def test_as_nodepred1(): ds = data.AmazonCoBuyComputerDataset() print("train_mask" in ds[0].ndata) new_ds = data.AsNodePredDataset(ds, [0.8, 0.1, 0.1], verbose=True) assert len(new_ds) == 1 assert new_ds[0].num_nodes() == ds[0].num_nodes() assert new_ds[0].num_edges() == ds[0].num_edges() assert "train_mask" in new_ds[0].ndata assert F.array_equal( new_ds.train_idx, F.nonzero_1d(new_ds[0].ndata["train_mask"]) ) assert F.array_equal( new_ds.val_idx, F.nonzero_1d(new_ds[0].ndata["val_mask"]) ) assert F.array_equal( new_ds.test_idx, F.nonzero_1d(new_ds[0].ndata["test_mask"]) ) ds = data.AIFBDataset() print("train_mask" in ds[0].nodes["Personen"].data) new_ds = data.AsNodePredDataset( ds, [0.8, 0.1, 0.1], "Personen", verbose=True ) assert len(new_ds) == 1 assert new_ds[0].ntypes == ds[0].ntypes assert new_ds[0].canonical_etypes == ds[0].canonical_etypes assert "train_mask" in new_ds[0].nodes["Personen"].data assert F.array_equal( new_ds.train_idx, F.nonzero_1d(new_ds[0].nodes["Personen"].data["train_mask"]), ) assert F.array_equal( new_ds.val_idx, F.nonzero_1d(new_ds[0].nodes["Personen"].data["val_mask"]), ) assert F.array_equal( new_ds.test_idx, F.nonzero_1d(new_ds[0].nodes["Personen"].data["test_mask"]), ) @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") def test_as_nodepred2(): # test proper reprocessing # create ds = data.AsNodePredDataset( data.AmazonCoBuyComputerDataset(), [0.8, 0.1, 0.1] ) assert F.sum(F.astype(ds[0].ndata["train_mask"], F.int32), 0) == int( ds[0].num_nodes() * 0.8 ) assert len(ds.train_idx) == int(ds[0].num_nodes() * 0.8) # read from cache ds = data.AsNodePredDataset( data.AmazonCoBuyComputerDataset(), [0.8, 0.1, 0.1] ) assert F.sum(F.astype(ds[0].ndata["train_mask"], F.int32), 0) == int( ds[0].num_nodes() * 0.8 ) assert len(ds.train_idx) == int(ds[0].num_nodes() * 0.8) # invalid cache, re-read ds = data.AsNodePredDataset( data.AmazonCoBuyComputerDataset(), [0.1, 0.1, 0.8] ) assert F.sum(F.astype(ds[0].ndata["train_mask"], F.int32), 0) == int( ds[0].num_nodes() * 0.1 ) assert len(ds.train_idx) == int(ds[0].num_nodes() * 0.1) # create ds = data.AsNodePredDataset( data.AIFBDataset(), [0.8, 0.1, 0.1], "Personen", verbose=True ) assert F.sum( F.astype(ds[0].nodes["Personen"].data["train_mask"], F.int32), 0 ) == int(ds[0].num_nodes("Personen") * 0.8) assert len(ds.train_idx) == int(ds[0].num_nodes("Personen") * 0.8) # read from cache ds = data.AsNodePredDataset( data.AIFBDataset(), [0.8, 0.1, 0.1], "Personen", verbose=True ) assert F.sum( F.astype(ds[0].nodes["Personen"].data["train_mask"], F.int32), 0 ) == int(ds[0].num_nodes("Personen") * 0.8) assert len(ds.train_idx) == int(ds[0].num_nodes("Personen") * 0.8) # invalid cache, re-read ds = data.AsNodePredDataset( data.AIFBDataset(), [0.1, 0.1, 0.8], "Personen", verbose=True ) assert F.sum( F.astype(ds[0].nodes["Personen"].data["train_mask"], F.int32), 0 ) == int(ds[0].num_nodes("Personen") * 0.1) assert len(ds.train_idx) == int(ds[0].num_nodes("Personen") * 0.1) @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") def test_as_linkpred(): # create ds = data.AsLinkPredDataset( data.CoraGraphDataset(), split_ratio=[0.8, 0.1, 0.1], neg_ratio=1, verbose=True, ) # Cora has 10556 edges, 10% test edges can be 1057 assert ds.test_edges[0][0].shape[0] == 1057 # negative samples, not guaranteed, so the assert is in a relaxed range assert 1000 <= ds.test_edges[1][0].shape[0] <= 1057 # read from cache ds = data.AsLinkPredDataset( data.CoraGraphDataset(), split_ratio=[0.7, 0.1, 0.2], neg_ratio=2, verbose=True, ) assert ds.test_edges[0][0].shape[0] == 2112 # negative samples, not guaranteed to be ratio 2, so the assert is in a relaxed range assert 4000 < ds.test_edges[1][0].shape[0] <= 4224 @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") @unittest.skipIf( dgl.backend.backend_name == "tensorflow", reason="Skip Tensorflow" ) def test_as_nodepred_csvdataset(): with tempfile.TemporaryDirectory() as test_dir: # generate YAML/CSVs meta_yaml_path = os.path.join(test_dir, "meta.yaml") edges_csv_path = os.path.join(test_dir, "test_edges.csv") nodes_csv_path = os.path.join(test_dir, "test_nodes.csv") meta_yaml_data = { "version": "1.0.0", "dataset_name": "default_name", "node_data": [{"file_name": os.path.basename(nodes_csv_path)}], "edge_data": [{"file_name": os.path.basename(edges_csv_path)}], } with open(meta_yaml_path, "w") as f: yaml.dump(meta_yaml_data, f, sort_keys=False) num_nodes = 100 num_edges = 500 num_dims = 3 num_classes = num_nodes feat_ndata = np.random.rand(num_nodes, num_dims) label_ndata = np.arange(num_classes) df = pd.DataFrame( { "node_id": np.arange(num_nodes), "label": label_ndata, "feat": [line.tolist() for line in feat_ndata], } ) df.to_csv(nodes_csv_path, index=False) df = pd.DataFrame( { "src_id": np.random.randint(num_nodes, size=num_edges), "dst_id": np.random.randint(num_nodes, size=num_edges), } ) df.to_csv(edges_csv_path, index=False) ds = data.CSVDataset(test_dir, force_reload=True) assert "feat" in ds[0].ndata assert "label" in ds[0].ndata assert "train_mask" not in ds[0].ndata assert not hasattr(ds[0], "num_classes") new_ds = data.AsNodePredDataset( ds, split_ratio=[0.8, 0.1, 0.1], force_reload=True ) assert new_ds.num_classes == num_classes assert "feat" in new_ds[0].ndata assert "label" in new_ds[0].ndata assert "train_mask" in new_ds[0].ndata @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") def test_as_graphpred_reprocess(): ds = data.AsGraphPredDataset( data.GINDataset(name="MUTAG", self_loop=True), [0.8, 0.1, 0.1] ) assert len(ds.train_idx) == int(len(ds) * 0.8) # read from cache ds = data.AsGraphPredDataset( data.GINDataset(name="MUTAG", self_loop=True), [0.8, 0.1, 0.1] ) assert len(ds.train_idx) == int(len(ds) * 0.8) # invalid cache, re-read ds = data.AsGraphPredDataset( data.GINDataset(name="MUTAG", self_loop=True), [0.1, 0.1, 0.8] ) assert len(ds.train_idx) == int(len(ds) * 0.1) ds = data.AsGraphPredDataset( data.FakeNewsDataset("politifact", "profile"), [0.8, 0.1, 0.1] ) assert len(ds.train_idx) == int(len(ds) * 0.8) # read from cache ds = data.AsGraphPredDataset( data.FakeNewsDataset("politifact", "profile"), [0.8, 0.1, 0.1] ) assert len(ds.train_idx) == int(len(ds) * 0.8) # invalid cache, re-read ds = data.AsGraphPredDataset( data.FakeNewsDataset("politifact", "profile"), [0.1, 0.1, 0.8] ) assert len(ds.train_idx) == int(len(ds) * 0.1) ds = data.AsGraphPredDataset(data.QM7bDataset(), [0.8, 0.1, 0.1]) assert len(ds.train_idx) == int(len(ds) * 0.8) # read from cache ds = data.AsGraphPredDataset(data.QM7bDataset(), [0.8, 0.1, 0.1]) assert len(ds.train_idx) == int(len(ds) * 0.8) # invalid cache, re-read ds = data.AsGraphPredDataset(data.QM7bDataset(), [0.1, 0.1, 0.8]) assert len(ds.train_idx) == int(len(ds) * 0.1) ds = data.AsGraphPredDataset( data.QM9Dataset(label_keys=["mu", "gap"]), [0.8, 0.1, 0.1] ) assert len(ds.train_idx) == int(len(ds) * 0.8) # read from cache ds = data.AsGraphPredDataset( data.QM9Dataset(label_keys=["mu", "gap"]), [0.8, 0.1, 0.1] ) assert len(ds.train_idx) == int(len(ds) * 0.8) # invalid cache, re-read ds = data.AsGraphPredDataset( data.QM9Dataset(label_keys=["mu", "gap"]), [0.1, 0.1, 0.8] ) assert len(ds.train_idx) == int(len(ds) * 0.1) ds = data.AsGraphPredDataset( data.QM9EdgeDataset(label_keys=["mu", "alpha"]), [0.8, 0.1, 0.1] ) assert len(ds.train_idx) == int(len(ds) * 0.8) # read from cache ds = data.AsGraphPredDataset( data.QM9EdgeDataset(label_keys=["mu", "alpha"]), [0.8, 0.1, 0.1] ) assert len(ds.train_idx) == int(len(ds) * 0.8) # invalid cache, re-read ds = data.AsGraphPredDataset( data.QM9EdgeDataset(label_keys=["mu", "alpha"]), [0.1, 0.1, 0.8] ) assert len(ds.train_idx) == int(len(ds) * 0.1) ds = data.AsGraphPredDataset(data.TUDataset("DD"), [0.8, 0.1, 0.1]) assert len(ds.train_idx) == int(len(ds) * 0.8) # read from cache ds = data.AsGraphPredDataset(data.TUDataset("DD"), [0.8, 0.1, 0.1]) assert len(ds.train_idx) == int(len(ds) * 0.8) # invalid cache, re-read ds = data.AsGraphPredDataset(data.TUDataset("DD"), [0.1, 0.1, 0.8]) assert len(ds.train_idx) == int(len(ds) * 0.1) ds = data.AsGraphPredDataset(data.LegacyTUDataset("DD"), [0.8, 0.1, 0.1]) assert len(ds.train_idx) == int(len(ds) * 0.8) # read from cache ds = data.AsGraphPredDataset(data.LegacyTUDataset("DD"), [0.8, 0.1, 0.1]) assert len(ds.train_idx) == int(len(ds) * 0.8) # invalid cache, re-read ds = data.AsGraphPredDataset(data.LegacyTUDataset("DD"), [0.1, 0.1, 0.8]) assert len(ds.train_idx) == int(len(ds) * 0.1) ds = data.AsGraphPredDataset(data.BA2MotifDataset(), [0.8, 0.1, 0.1]) assert len(ds.train_idx) == int(len(ds) * 0.8) # read from cache ds = data.AsGraphPredDataset(data.BA2MotifDataset(), [0.8, 0.1, 0.1]) assert len(ds.train_idx) == int(len(ds) * 0.8) # invalid cache, re-read ds = data.AsGraphPredDataset(data.BA2MotifDataset(), [0.1, 0.1, 0.8]) assert len(ds.train_idx) == int(len(ds) * 0.1) if __name__ == "__main__": test_minigc() test_gin() test_data_hash() test_tudataset_regression() test_fraud() test_fakenews() test_csvdataset() test_as_nodepred1() test_as_nodepred2() test_as_nodepred_csvdataset() ================================================ FILE: tests/python/common/data/test_geom_gcn.py ================================================ import unittest import backend as F import dgl @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="only supports pytorch" ) def test_chameleon(): transform = dgl.AddSelfLoop(allow_duplicate=True) g = dgl.data.ChameleonDataset(force_reload=True)[0] assert g.num_nodes() == 2277 assert g.num_edges() == 36101 g2 = dgl.data.ChameleonDataset(force_reload=True, transform=transform)[0] assert g2.num_edges() - g.num_edges() == g.num_nodes() @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="only supports pytorch" ) def test_squirrel(): transform = dgl.AddSelfLoop(allow_duplicate=True) g = dgl.data.SquirrelDataset(force_reload=True)[0] assert g.num_nodes() == 5201 assert g.num_edges() == 217073 g2 = dgl.data.SquirrelDataset(force_reload=True, transform=transform)[0] assert g2.num_edges() - g.num_edges() == g.num_nodes() @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="only supports pytorch" ) def test_cornell(): transform = dgl.AddSelfLoop(allow_duplicate=True) g = dgl.data.CornellDataset(force_reload=True)[0] assert g.num_nodes() == 183 assert g.num_edges() == 298 g2 = dgl.data.CornellDataset(force_reload=True, transform=transform)[0] assert g2.num_edges() - g.num_edges() == g.num_nodes() @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="only supports pytorch" ) def test_texas(): transform = dgl.AddSelfLoop(allow_duplicate=True) g = dgl.data.TexasDataset(force_reload=True)[0] assert g.num_nodes() == 183 assert g.num_edges() == 325 g2 = dgl.data.TexasDataset(force_reload=True, transform=transform)[0] assert g2.num_edges() - g.num_edges() == g.num_nodes() @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="only supports pytorch" ) def test_wisconsin(): transform = dgl.AddSelfLoop(allow_duplicate=True) g = dgl.data.WisconsinDataset(force_reload=True)[0] assert g.num_nodes() == 251 assert g.num_edges() == 515 g2 = dgl.data.WisconsinDataset(force_reload=True, transform=transform)[0] assert g2.num_edges() - g.num_edges() == g.num_nodes() ================================================ FILE: tests/python/common/data/test_movielens.py ================================================ import unittest import backend as F import dgl from dgl.data.movielens import MovieLensDataset @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="only supports pytorch" ) def test_movielens(): transform = dgl.AddSelfLoop(new_etypes=True) movielens = MovieLensDataset(name="ml-100k", valid_ratio=0.2, verbose=True) g = movielens[0] assert g.num_edges("user-movie") == g.num_edges("movie-user") == 100000 assert ( g.nodes["user"].data["feat"].shape[1] == g.nodes["user"].data["feat"].shape[1] == g.nodes["user"].data["feat"].shape[1] == 23 ) assert ( g.nodes["movie"].data["feat"].shape[1] == g.nodes["movie"].data["feat"].shape[1] == g.nodes["movie"].data["feat"].shape[1] == 320 ) movielens = MovieLensDataset( name="ml-100k", valid_ratio=0.2, transform=transform, verbose=True ) g1 = movielens[0] assert g1.num_edges() - g.num_edges() == g.num_nodes() assert g1.num_edges() - g.num_edges() == g.num_nodes() assert g1.num_edges() - g.num_edges() == g.num_nodes() movielens = MovieLensDataset( name="ml-1m", valid_ratio=0.2, test_ratio=0.1, verbose=True ) g = movielens[0] assert g.num_edges("user-movie") == g.num_edges("movie-user") == 1000209 movielens = MovieLensDataset( name="ml-10m", valid_ratio=0.2, test_ratio=0.1, verbose=True ) g = movielens[0] assert g.num_edges("user-movie") == g.num_edges("movie-user") == 10000054 ================================================ FILE: tests/python/common/data/test_serialize.py ================================================ import os import tempfile import time import unittest import warnings import backend as F import dgl import dgl.ndarray as nd import numpy as np import pytest import scipy as sp from dgl.data.utils import load_labels, load_tensors, save_tensors np.random.seed(44) def generate_rand_graph(n): arr = (sp.sparse.random(n, n, density=0.1, format="coo") != 0).astype( np.int64 ) return dgl.from_scipy(arr) def construct_graph(n): g_list = [] for _ in range(n): g = generate_rand_graph(30) g.edata["e1"] = F.randn((g.num_edges(), 32)) g.edata["e2"] = F.ones((g.num_edges(), 32)) g.ndata["n1"] = F.randn((g.num_nodes(), 64)) g_list.append(g) return g_list @unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented") def test_graph_serialize_with_feature(): num_graphs = 100 t0 = time.time() g_list = construct_graph(num_graphs) t1 = time.time() # create a temporary file and immediately release it so DGL can open it. f = tempfile.NamedTemporaryFile(delete=False) path = f.name f.close() dgl.save_graphs(path, g_list) t2 = time.time() idx_list = np.random.permutation(np.arange(num_graphs)).tolist() loadg_list, _ = dgl.load_graphs(path, idx_list) t3 = time.time() idx = idx_list[0] load_g = loadg_list[0] print("Save time: {} s".format(t2 - t1)) print("Load time: {} s".format(t3 - t2)) print("Graph Construction time: {} s".format(t1 - t0)) assert F.allclose(load_g.nodes(), g_list[idx].nodes()) load_edges = load_g.all_edges("uv", "eid") g_edges = g_list[idx].all_edges("uv", "eid") assert F.allclose(load_edges[0], g_edges[0]) assert F.allclose(load_edges[1], g_edges[1]) assert F.allclose(load_g.edata["e1"], g_list[idx].edata["e1"]) assert F.allclose(load_g.edata["e2"], g_list[idx].edata["e2"]) assert F.allclose(load_g.ndata["n1"], g_list[idx].ndata["n1"]) os.unlink(path) @unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented") def test_graph_serialize_without_feature(): num_graphs = 100 g_list = [generate_rand_graph(30) for _ in range(num_graphs)] # create a temporary file and immediately release it so DGL can open it. f = tempfile.NamedTemporaryFile(delete=False) path = f.name f.close() dgl.save_graphs(path, g_list) idx_list = np.random.permutation(np.arange(num_graphs)).tolist() loadg_list, _ = dgl.load_graphs(path, idx_list) idx = idx_list[0] load_g = loadg_list[0] assert F.allclose(load_g.nodes(), g_list[idx].nodes()) load_edges = load_g.all_edges("uv", "eid") g_edges = g_list[idx].all_edges("uv", "eid") assert F.allclose(load_edges[0], g_edges[0]) assert F.allclose(load_edges[1], g_edges[1]) os.unlink(path) @unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented") def test_graph_serialize_with_labels(): num_graphs = 100 g_list = [generate_rand_graph(30) for _ in range(num_graphs)] labels = {"label": F.zeros((num_graphs, 1))} # create a temporary file and immediately release it so DGL can open it. f = tempfile.NamedTemporaryFile(delete=False) path = f.name f.close() dgl.save_graphs(path, g_list, labels) idx_list = np.random.permutation(np.arange(num_graphs)).tolist() loadg_list, l_labels0 = dgl.load_graphs(path, idx_list) l_labels = load_labels(path) assert F.allclose(l_labels["label"], labels["label"]) assert F.allclose(l_labels0["label"], labels["label"]) idx = idx_list[0] load_g = loadg_list[0] assert F.allclose(load_g.nodes(), g_list[idx].nodes()) load_edges = load_g.all_edges("uv", "eid") g_edges = g_list[idx].all_edges("uv", "eid") assert F.allclose(load_edges[0], g_edges[0]) assert F.allclose(load_edges[1], g_edges[1]) os.unlink(path) def test_serialize_tensors(): # create a temporary file and immediately release it so DGL can open it. f = tempfile.NamedTemporaryFile(delete=False) path = f.name f.close() tensor_dict = { "a": F.tensor([1, 3, -1, 0], dtype=F.int64), "1@1": F.tensor([1.5, 2], dtype=F.float32), } save_tensors(path, tensor_dict) load_tensor_dict = load_tensors(path) for key in tensor_dict: assert key in load_tensor_dict assert np.array_equal( F.asnumpy(load_tensor_dict[key]), F.asnumpy(tensor_dict[key]) ) load_nd_dict = load_tensors(path, return_dgl_ndarray=True) for key in tensor_dict: assert key in load_nd_dict assert isinstance(load_nd_dict[key], nd.NDArray) assert np.array_equal( load_nd_dict[key].asnumpy(), F.asnumpy(tensor_dict[key]) ) os.unlink(path) def test_serialize_empty_dict(): # create a temporary file and immediately release it so DGL can open it. f = tempfile.NamedTemporaryFile(delete=False) path = f.name f.close() tensor_dict = {} save_tensors(path, tensor_dict) load_tensor_dict = load_tensors(path) assert isinstance(load_tensor_dict, dict) assert len(load_tensor_dict) == 0 os.unlink(path) def load_old_files(files): with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) return dgl.load_graphs(os.path.join(os.path.dirname(__file__), files)) def test_load_old_files1(): loadg_list, _ = load_old_files("data/1.bin") idx, num_nodes, edge0, edge1, edata_e1, edata_e2, ndata_n1 = np.load( os.path.join(os.path.dirname(__file__), "data/1.npy"), allow_pickle=True ) load_g = loadg_list[idx] load_edges = load_g.all_edges("uv", "eid") assert np.allclose(F.asnumpy(load_edges[0]), edge0) assert np.allclose(F.asnumpy(load_edges[1]), edge1) assert np.allclose(F.asnumpy(load_g.edata["e1"]), edata_e1) assert np.allclose(F.asnumpy(load_g.edata["e2"]), edata_e2) assert np.allclose(F.asnumpy(load_g.ndata["n1"]), ndata_n1) def test_load_old_files2(): loadg_list, labels0 = load_old_files("data/2.bin") labels1 = load_labels(os.path.join(os.path.dirname(__file__), "data/2.bin")) idx, edges0, edges1, np_labels = np.load( os.path.join(os.path.dirname(__file__), "data/2.npy"), allow_pickle=True ) assert np.allclose(F.asnumpy(labels0["label"]), np_labels) assert np.allclose(F.asnumpy(labels1["label"]), np_labels) load_g = loadg_list[idx] print(load_g) load_edges = load_g.all_edges("uv", "eid") assert np.allclose(F.asnumpy(load_edges[0]), edges0) assert np.allclose(F.asnumpy(load_edges[1]), edges1) def create_heterographs(idtype): g_x = dgl.heterograph( {("user", "follows", "user"): ([0, 1, 2], [1, 2, 3])}, idtype=idtype ) g_y = dgl.heterograph( {("user", "knows", "user"): ([0, 2], [2, 3])}, idtype=idtype ).formats("csr") g_x.ndata["h"] = F.randn((4, 3)) g_x.edata["w"] = F.randn((3, 2)) g_y.ndata["hh"] = F.ones((4, 5)) g_y.edata["ww"] = F.randn((2, 10)) g = dgl.heterograph( { ("user", "follows", "user"): ([0, 1, 2], [1, 2, 3]), ("user", "knows", "user"): ([0, 2], [2, 3]), }, idtype=idtype, ) g.nodes["user"].data["h"] = g_x.ndata["h"] g.nodes["user"].data["hh"] = g_y.ndata["hh"] g.edges["follows"].data["w"] = g_x.edata["w"] g.edges["knows"].data["ww"] = g_y.edata["ww"] return [g, g_x, g_y] def create_heterographs2(idtype): g_x = dgl.heterograph( {("user", "follows", "user"): ([0, 1, 2], [1, 2, 3])}, idtype=idtype ) g_y = dgl.heterograph( {("user", "knows", "user"): ([0, 2], [2, 3])}, idtype=idtype ).formats("csr") g_z = dgl.heterograph( {("user", "knows", "knowledge"): ([0, 1, 3], [2, 3, 4])}, idtype=idtype ) g_x.ndata["h"] = F.randn((4, 3)) g_x.edata["w"] = F.randn((3, 2)) g_y.ndata["hh"] = F.ones((4, 5)) g_y.edata["ww"] = F.randn((2, 10)) g = dgl.heterograph( { ("user", "follows", "user"): ([0, 1, 2], [1, 2, 3]), ("user", "knows", "user"): ([0, 2], [2, 3]), ("user", "knows", "knowledge"): ([0, 1, 3], [2, 3, 4]), }, idtype=idtype, ) g.nodes["user"].data["h"] = g_x.ndata["h"] g.edges["follows"].data["w"] = g_x.edata["w"] g.nodes["user"].data["hh"] = g_y.ndata["hh"] g.edges[("user", "knows", "user")].data["ww"] = g_y.edata["ww"] return [g, g_x, g_y, g_z] def test_deserialize_old_heterograph_file(): path = os.path.join(os.path.dirname(__file__), "data/hetero1.bin") g_list, label_dict = dgl.load_graphs(path) assert g_list[0].idtype == F.int64 assert g_list[3].idtype == F.int32 assert np.allclose( F.asnumpy(g_list[2].nodes["user"].data["hh"]), np.ones((4, 5)) ) assert np.allclose( F.asnumpy(g_list[5].nodes["user"].data["hh"]), np.ones((4, 5)) ) edges = g_list[0]["follows"].edges() assert np.allclose(F.asnumpy(edges[0]), np.array([0, 1, 2])) assert np.allclose(F.asnumpy(edges[1]), np.array([1, 2, 3])) assert F.allclose(label_dict["graph_label"], F.ones(54)) def create_old_heterograph_files(): path = os.path.join(os.path.dirname(__file__), "data/hetero1.bin") g_list0 = create_heterographs(F.int64) + create_heterographs(F.int32) labels_dict = {"graph_label": F.ones(54)} dgl.save_graphs(path, g_list0, labels_dict) @unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented") def test_serialize_heterograph(): f = tempfile.NamedTemporaryFile(delete=False) path = f.name f.close() g_list0 = create_heterographs2(F.int64) + create_heterographs2(F.int32) dgl.save_graphs(path, g_list0) g_list, _ = dgl.load_graphs(path) assert g_list[0].idtype == F.int64 assert len(g_list[0].canonical_etypes) == 3 for i in range(len(g_list0)): for j, etypes in enumerate(g_list0[i].canonical_etypes): assert g_list[i].canonical_etypes[j] == etypes # assert g_list[1].restrict_format() == 'any' # assert g_list[2].restrict_format() == 'csr' assert g_list[4].idtype == F.int32 assert np.allclose( F.asnumpy(g_list[2].nodes["user"].data["hh"]), np.ones((4, 5)) ) assert np.allclose( F.asnumpy(g_list[6].nodes["user"].data["hh"]), np.ones((4, 5)) ) edges = g_list[0]["follows"].edges() assert np.allclose(F.asnumpy(edges[0]), np.array([0, 1, 2])) assert np.allclose(F.asnumpy(edges[1]), np.array([1, 2, 3])) for i in range(len(g_list)): assert g_list[i].ntypes == g_list0[i].ntypes assert g_list[i].etypes == g_list0[i].etypes # test set feature after load_graph g_list[3].nodes["user"].data["test"] = F.tensor([0, 1, 2, 4]) g_list[3].edata["test"] = F.tensor([0, 1, 2]) os.unlink(path) @unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented") @pytest.mark.skip(reason="lack of permission on CI") def test_serialize_heterograph_s3(): path = "s3://dglci-data-test/graph2.bin" g_list0 = create_heterographs(F.int64) + create_heterographs(F.int32) dgl.save_graphs(path, g_list0) g_list = dgl.load_graphs(path, [0, 2, 5]) assert g_list[0].idtype == F.int64 # assert g_list[1].restrict_format() == 'csr' assert np.allclose( F.asnumpy(g_list[1].nodes["user"].data["hh"]), np.ones((4, 5)) ) assert np.allclose( F.asnumpy(g_list[2].nodes["user"].data["hh"]), np.ones((4, 5)) ) edges = g_list[0]["follows"].edges() assert np.allclose(F.asnumpy(edges[0]), np.array([0, 1, 2])) assert np.allclose(F.asnumpy(edges[1]), np.array([1, 2, 3])) @unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented") @pytest.mark.parametrize( "formats", [ "coo", "csr", "csc", ["coo", "csc"], ["coo", "csr"], ["csc", "csr"], ["coo", "csr", "csc"], ], ) def test_graph_serialize_with_formats(formats): num_graphs = 100 g_list = [generate_rand_graph(30) for _ in range(num_graphs)] # create a temporary file and immediately release it so DGL can open it. f = tempfile.NamedTemporaryFile(delete=False) path = f.name f.close() dgl.save_graphs(path, g_list, formats=formats) idx_list = np.random.permutation(np.arange(num_graphs)).tolist() loadg_list, _ = dgl.load_graphs(path, idx_list) idx = idx_list[0] load_g = loadg_list[0] g_formats = load_g.formats() # verify formats if not isinstance(formats, list): formats = [formats] for fmt in formats: assert fmt in g_formats["created"] assert F.allclose(load_g.nodes(), g_list[idx].nodes()) load_edges = load_g.all_edges("uv", "eid") g_edges = g_list[idx].all_edges("uv", "eid") assert F.allclose(load_edges[0], g_edges[0]) assert F.allclose(load_edges[1], g_edges[1]) os.unlink(path) @unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented") def test_graph_serialize_with_restricted_formats(): g = dgl.rand_graph(100, 200) g = g.formats(["coo"]) g_list = [g] # create a temporary file and immediately release it so DGL can open it. f = tempfile.NamedTemporaryFile(delete=False) path = f.name f.close() expect_except = False try: dgl.save_graphs(path, g_list, formats=["csr"]) except: expect_except = True assert expect_except os.unlink(path) @unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented") def test_deserialize_old_graph(): num_nodes = 100 num_edges = 200 path = os.path.join(os.path.dirname(__file__), "data/graph_0.9a220622.dgl") g_list, _ = dgl.load_graphs(path) g = g_list[0] assert "coo" in g.formats()["created"] assert "csr" in g.formats()["not created"] assert "csc" in g.formats()["not created"] assert num_nodes == g.num_nodes() assert num_edges == g.num_edges() ================================================ FILE: tests/python/common/data/test_utils.py ================================================ import gzip import io import os import tarfile import tempfile import unittest import backend as F import dgl import dgl.data as data import numpy as np import pandas as pd import pytest import yaml from dgl import DGLError @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") def test_add_nodepred_split(): dataset = data.AmazonCoBuyComputerDataset() print("train_mask" in dataset[0].ndata) data.utils.add_nodepred_split(dataset, [0.8, 0.1, 0.1]) assert "train_mask" in dataset[0].ndata dataset = data.AIFBDataset() print("train_mask" in dataset[0].nodes["Publikationen"].data) data.utils.add_nodepred_split( dataset, [0.8, 0.1, 0.1], ntype="Publikationen" ) assert "train_mask" in dataset[0].nodes["Publikationen"].data @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") def test_extract_archive(): # gzip with tempfile.TemporaryDirectory() as src_dir: gz_file = "gz_archive" gz_path = os.path.join(src_dir, gz_file + ".gz") content = b"test extract archive gzip" with gzip.open(gz_path, "wb") as f: f.write(content) with tempfile.TemporaryDirectory() as dst_dir: data.utils.extract_archive(gz_path, dst_dir, overwrite=True) assert os.path.exists(os.path.join(dst_dir, gz_file)) # tar with tempfile.TemporaryDirectory() as src_dir: tar_file = "tar_archive" tar_path = os.path.join(src_dir, tar_file + ".tar") # default encode to utf8 content = "test extract archive tar\n".encode() info = tarfile.TarInfo(name="tar_archive") info.size = len(content) with tarfile.open(tar_path, "w") as f: f.addfile(info, io.BytesIO(content)) with tempfile.TemporaryDirectory() as dst_dir: data.utils.extract_archive(tar_path, dst_dir, overwrite=True) assert os.path.exists(os.path.join(dst_dir, tar_file)) @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") def test_mask_nodes_by_property(): num_nodes = 1000 property_values = np.random.uniform(size=num_nodes) part_ratios = [0.3, 0.1, 0.1, 0.3, 0.2] split_masks = data.utils.mask_nodes_by_property( property_values, part_ratios ) assert "in_valid_mask" in split_masks @unittest.skipIf( F._default_context_str == "gpu", reason="Datasets don't need to be tested on GPU.", ) @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") def test_add_node_property_split(): dataset = data.AmazonCoBuyComputerDataset() part_ratios = [0.3, 0.1, 0.1, 0.3, 0.2] for property_name in ["popularity", "locality", "density"]: data.utils.add_node_property_split(dataset, part_ratios, property_name) assert "in_valid_mask" in dataset[0].ndata if __name__ == "__main__": test_extract_archive() test_add_nodepred_split() test_mask_nodes_by_property() test_add_node_property_split() ================================================ FILE: tests/python/common/dataloading/test_dataloader.py ================================================ import unittest import backend as F import dgl from dgl.dataloading import ( as_edge_prediction_sampler, negative_sampler, NeighborSampler, ) from utils import parametrize_idtype def create_test_graph(idtype): # test heterograph from the docstring, plus a user -- wishes -- game relation # 3 users, 2 games, 2 developers # metagraph: # ('user', 'follows', 'user'), # ('user', 'plays', 'game'), # ('user', 'wishes', 'game'), # ('developer', 'develops', 'game')]) g = dgl.heterograph( { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]), ("user", "wishes", "game"): ([0, 2], [1, 0]), ("developer", "develops", "game"): ([0, 1], [0, 1]), }, idtype=idtype, device=F.ctx(), ) assert g.idtype == idtype assert g.device == F.ctx() return g @parametrize_idtype def test_edge_prediction_sampler(idtype): g = create_test_graph(idtype) sampler = NeighborSampler([10, 10]) sampler = as_edge_prediction_sampler( sampler, negative_sampler=negative_sampler.Uniform(1) ) seeds = F.copy_to(F.arange(0, 2, dtype=idtype), ctx=F.ctx()) # just a smoke test to make sure we don't fail internal assertions result = sampler.sample(g, {"follows": seeds}) if __name__ == "__main__": test_edge_prediction_sampler() ================================================ FILE: tests/python/common/function/test_basics.py ================================================ import warnings from collections import defaultdict as ddict import backend as F import dgl import networkx as nx import numpy as np from utils import parametrize_idtype D = 5 reduce_msg_shapes = set() def message_func(edges): assert F.ndim(edges.src["h"]) == 2 assert F.shape(edges.src["h"])[1] == D return {"m": edges.src["h"]} def reduce_func(nodes): msgs = nodes.mailbox["m"] reduce_msg_shapes.add(tuple(msgs.shape)) assert F.ndim(msgs) == 3 assert F.shape(msgs)[2] == D return {"accum": F.sum(msgs, 1)} def apply_node_func(nodes): return {"h": nodes.data["h"] + nodes.data["accum"]} def generate_graph_old(grad=False): g = dgl.graph([]) g.add_nodes(10) # 10 nodes # create a graph where 0 is the source and 9 is the sink # 17 edges for i in range(1, 9): g.add_edges(0, i) g.add_edges(i, 9) # add a back flow from 9 to 0 g.add_edges(9, 0) g = g.to(F.ctx()) ncol = F.randn((10, D)) ecol = F.randn((17, D)) if grad: ncol = F.attach_grad(ncol) ecol = F.attach_grad(ecol) g.ndata["h"] = ncol g.edata["w"] = ecol g.set_n_initializer(dgl.init.zero_initializer) g.set_e_initializer(dgl.init.zero_initializer) return g def generate_graph(idtype, grad=False): """ s, d, eid 0, 1, 0 1, 9, 1 0, 2, 2 2, 9, 3 0, 3, 4 3, 9, 5 0, 4, 6 4, 9, 7 0, 5, 8 5, 9, 9 0, 6, 10 6, 9, 11 0, 7, 12 7, 9, 13 0, 8, 14 8, 9, 15 9, 0, 16 """ u = F.tensor([0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0, 8, 9]) v = F.tensor([1, 9, 2, 9, 3, 9, 4, 9, 5, 9, 6, 9, 7, 9, 8, 9, 0]) g = dgl.graph((u, v), idtype=idtype) assert g.device == F.ctx() ncol = F.randn((10, D)) ecol = F.randn((17, D)) if grad: ncol = F.attach_grad(ncol) ecol = F.attach_grad(ecol) g.ndata["h"] = ncol g.edata["w"] = ecol g.set_n_initializer(dgl.init.zero_initializer) g.set_e_initializer(dgl.init.zero_initializer) return g def test_compatible(): g = generate_graph_old() @parametrize_idtype def test_batch_setter_getter(idtype): def _pfc(x): return list(F.zerocopy_to_numpy(x)[:, 0]) g = generate_graph(idtype) # set all nodes g.ndata["h"] = F.zeros((10, D)) assert F.allclose(g.ndata["h"], F.zeros((10, D))) # pop nodes old_len = len(g.ndata) g.ndata.pop("h") assert len(g.ndata) == old_len - 1 g.ndata["h"] = F.zeros((10, D)) # set partial nodes u = F.tensor([1, 3, 5], g.idtype) g.nodes[u].data["h"] = F.ones((3, D)) assert _pfc(g.ndata["h"]) == [ 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ] # get partial nodes u = F.tensor([1, 2, 3], g.idtype) assert _pfc(g.nodes[u].data["h"]) == [1.0, 0.0, 1.0] """ s, d, eid 0, 1, 0 1, 9, 1 0, 2, 2 2, 9, 3 0, 3, 4 3, 9, 5 0, 4, 6 4, 9, 7 0, 5, 8 5, 9, 9 0, 6, 10 6, 9, 11 0, 7, 12 7, 9, 13 0, 8, 14 8, 9, 15 9, 0, 16 """ # set all edges g.edata["l"] = F.zeros((17, D)) assert _pfc(g.edata["l"]) == [0.0] * 17 # pop edges old_len = len(g.edata) g.edata.pop("l") assert len(g.edata) == old_len - 1 g.edata["l"] = F.zeros((17, D)) # set partial edges (many-many) u = F.tensor([0, 0, 2, 5, 9], g.idtype) v = F.tensor([1, 3, 9, 9, 0], g.idtype) g.edges[u, v].data["l"] = F.ones((5, D)) truth = [0.0] * 17 truth[0] = truth[4] = truth[3] = truth[9] = truth[16] = 1.0 assert _pfc(g.edata["l"]) == truth u = F.tensor([3, 4, 6], g.idtype) v = F.tensor([9, 9, 9], g.idtype) g.edges[u, v].data["l"] = F.ones((3, D)) truth[5] = truth[7] = truth[11] = 1.0 assert _pfc(g.edata["l"]) == truth u = F.tensor([0, 0, 0], g.idtype) v = F.tensor([4, 5, 6], g.idtype) g.edges[u, v].data["l"] = F.ones((3, D)) truth[6] = truth[8] = truth[10] = 1.0 assert _pfc(g.edata["l"]) == truth u = F.tensor([0, 6, 0], g.idtype) v = F.tensor([6, 9, 7], g.idtype) assert _pfc(g.edges[u, v].data["l"]) == [1.0, 1.0, 0.0] @parametrize_idtype def test_batch_setter_autograd(idtype): g = generate_graph(idtype, grad=True) h1 = g.ndata["h"] # partial set v = F.tensor([1, 2, 8], g.idtype) hh = F.attach_grad(F.zeros((len(v), D))) with F.record_grad(): g.nodes[v].data["h"] = hh h2 = g.ndata["h"] F.backward(h2, F.ones((10, D)) * 2) assert F.array_equal( F.grad(h1)[:, 0], F.tensor([2.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0, 2.0]), ) assert F.array_equal(F.grad(hh)[:, 0], F.tensor([2.0, 2.0, 2.0])) def _test_nx_conversion(): # check conversion between networkx and DGLGraph def _check_nx_feature(nxg, nf, ef): # check node and edge feature of nxg # this is used to check to_networkx num_nodes = len(nxg) num_edges = nxg.size() if num_nodes > 0: node_feat = ddict(list) for nid, attr in nxg.nodes(data=True): assert len(attr) == len(nf) for k in nxg.nodes[nid]: node_feat[k].append(F.unsqueeze(attr[k], 0)) for k in node_feat: feat = F.cat(node_feat[k], 0) assert F.allclose(feat, nf[k]) else: assert len(nf) == 0 if num_edges > 0: edge_feat = ddict(lambda: [0] * num_edges) for u, v, attr in nxg.edges(data=True): assert len(attr) == len(ef) + 1 # extra id eid = attr["id"] for k in ef: edge_feat[k][eid] = F.unsqueeze(attr[k], 0) for k in edge_feat: feat = F.cat(edge_feat[k], 0) assert F.allclose(feat, ef[k]) else: assert len(ef) == 0 n1 = F.randn((5, 3)) n2 = F.randn((5, 10)) n3 = F.randn((5, 4)) e1 = F.randn((4, 5)) e2 = F.randn((4, 7)) g = dgl.graph(([0, 1, 3, 4], [2, 4, 0, 3])) g.ndata.update({"n1": n1, "n2": n2, "n3": n3}) g.edata.update({"e1": e1, "e2": e2}) # convert to networkx nxg = g.to_networkx(node_attrs=["n1", "n3"], edge_attrs=["e1", "e2"]) assert len(nxg) == 5 assert nxg.size() == 4 _check_nx_feature(nxg, {"n1": n1, "n3": n3}, {"e1": e1, "e2": e2}) # convert to DGLGraph, nx graph has id in edge feature # use id feature to test non-tensor copy g = dgl.from_networkx(nxg, node_attrs=["n1"], edge_attrs=["e1", "id"]) # check graph size assert g.num_nodes() == 5 assert g.num_edges() == 4 # check number of features # test with existing dglgraph (so existing features should be cleared) assert len(g.ndata) == 1 assert len(g.edata) == 2 # check feature values assert F.allclose(g.ndata["n1"], n1) # with id in nx edge feature, e1 should follow original order assert F.allclose(g.edata["e1"], e1) assert F.array_equal( F.astype(g.edata["id"], F.int64), F.copy_to(F.arange(0, 4), F.cpu()) ) # test conversion after modifying DGLGraph g.edata.pop("id") # pop id so we don't need to provide id when adding edges new_n = F.randn((2, 3)) new_e = F.randn((3, 5)) g.add_nodes(2, data={"n1": new_n}) # add three edges, one is a multi-edge g.add_edges([3, 6, 0], [4, 5, 2], data={"e1": new_e}) n1 = F.cat((n1, new_n), 0) e1 = F.cat((e1, new_e), 0) # convert to networkx again nxg = g.to_networkx(node_attrs=["n1"], edge_attrs=["e1"]) assert len(nxg) == 7 assert nxg.size() == 7 _check_nx_feature(nxg, {"n1": n1}, {"e1": e1}) # now test convert from networkx without id in edge feature # first pop id in edge feature for _, _, attr in nxg.edges(data=True): attr.pop("id") # test with a new graph g = dgl.from_networkx(nxg, node_attrs=["n1"], edge_attrs=["e1"]) # check graph size assert g.num_nodes() == 7 assert g.num_edges() == 7 # check number of features assert len(g.ndata) == 1 assert len(g.edata) == 1 # check feature values assert F.allclose(g.ndata["n1"], n1) # edge feature order follows nxg.edges() edge_feat = [] for _, _, attr in nxg.edges(data=True): edge_feat.append(F.unsqueeze(attr["e1"], 0)) edge_feat = F.cat(edge_feat, 0) assert F.allclose(g.edata["e1"], edge_feat) # Test converting from a networkx graph whose nodes are # not labeled with consecutive-integers. nxg = nx.cycle_graph(5) nxg.remove_nodes_from([0, 4]) for u in nxg.nodes(): nxg.nodes[u]["h"] = F.tensor([u]) for u, v, d in nxg.edges(data=True): d["h"] = F.tensor([u, v]) g = dgl.from_networkx(nxg, node_attrs=["h"], edge_attrs=["h"]) assert g.num_nodes() == 3 assert g.num_edges() == 4 assert g.has_edge_between(0, 1) assert g.has_edge_between(1, 2) assert F.allclose(g.ndata["h"], F.tensor([[1.0], [2.0], [3.0]])) assert F.allclose( g.edata["h"], F.tensor([[1.0, 2.0], [1.0, 2.0], [2.0, 3.0], [2.0, 3.0]]) ) @parametrize_idtype def test_apply_nodes(idtype): def _upd(nodes): return {"h": nodes.data["h"] * 2} g = generate_graph(idtype) old = g.ndata["h"] g.apply_nodes(_upd) assert F.allclose(old * 2, g.ndata["h"]) u = F.tensor([0, 3, 4, 6], g.idtype) g.apply_nodes(lambda nodes: {"h": nodes.data["h"] * 0.0}, u) assert F.allclose(F.gather_row(g.ndata["h"], u), F.zeros((4, D))) @parametrize_idtype def test_apply_edges(idtype): def _upd(edges): return {"w": edges.data["w"] * 2} g = generate_graph(idtype) old = g.edata["w"] g.apply_edges(_upd) assert F.allclose(old * 2, g.edata["w"]) u = F.tensor([0, 0, 0, 4, 5, 6], g.idtype) v = F.tensor([1, 2, 3, 9, 9, 9], g.idtype) g.apply_edges(lambda edges: {"w": edges.data["w"] * 0.0}, (u, v)) eid = F.tensor(g.edge_ids(u, v)) assert F.allclose(F.gather_row(g.edata["w"], eid), F.zeros((6, D))) @parametrize_idtype def test_update_routines(idtype): g = generate_graph(idtype) # send_and_recv reduce_msg_shapes.clear() u = [0, 0, 0, 4, 5, 6] v = [1, 2, 3, 9, 9, 9] g.send_and_recv((u, v), message_func, reduce_func, apply_node_func) assert reduce_msg_shapes == {(1, 3, D), (3, 1, D)} reduce_msg_shapes.clear() try: g.send_and_recv([u, v]) assert False except: pass # pull v = F.tensor([1, 2, 3, 9], g.idtype) reduce_msg_shapes.clear() g.pull(v, message_func, reduce_func, apply_node_func) assert reduce_msg_shapes == {(1, 8, D), (3, 1, D)} reduce_msg_shapes.clear() # push v = F.tensor([0, 1, 2, 3], g.idtype) reduce_msg_shapes.clear() g.push(v, message_func, reduce_func, apply_node_func) assert reduce_msg_shapes == {(1, 3, D), (8, 1, D)} reduce_msg_shapes.clear() # update_all reduce_msg_shapes.clear() g.update_all(message_func, reduce_func, apply_node_func) assert reduce_msg_shapes == {(1, 8, D), (9, 1, D)} reduce_msg_shapes.clear() @parametrize_idtype def test_update_all_0deg(idtype): # test#1 g = dgl.graph(([1, 2, 3, 4], [0, 0, 0, 0]), idtype=idtype, device=F.ctx()) def _message(edges): return {"m": edges.src["h"]} def _reduce(nodes): return {"x": nodes.data["h"] + F.sum(nodes.mailbox["m"], 1)} def _apply(nodes): return {"x": nodes.data["x"] * 2} def _init2(shape, dtype, ctx, ids): return 2 + F.zeros(shape, dtype, ctx) g.set_n_initializer(_init2, "x") old_repr = F.randn((5, 5)) g.ndata["h"] = old_repr g.update_all(_message, _reduce, _apply) new_repr = g.ndata["x"] # the first row of the new_repr should be the sum of all the node # features; while the 0-deg nodes should be initialized by the # initializer and applied with UDF. assert F.allclose(new_repr[1:], 2 * (2 + F.zeros((4, 5)))) assert F.allclose(new_repr[0], 2 * F.sum(old_repr, 0)) # test#2: graph with no edge g = dgl.graph(([], []), num_nodes=5, idtype=idtype, device=F.ctx()) g.ndata["h"] = old_repr # Intercepting the warning: The input graph for the user-defined edge # function does not contain valid edges. with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) g.update_all( _message, _reduce, lambda nodes: {"h": nodes.data["h"] * 2} ) new_repr = g.ndata["h"] # should fallback to apply assert F.allclose(new_repr, 2 * old_repr) @parametrize_idtype def test_pull_0deg(idtype): g = dgl.graph(([0], [1]), idtype=idtype, device=F.ctx()) def _message(edges): return {"m": edges.src["h"]} def _reduce(nodes): return {"x": nodes.data["h"] + F.sum(nodes.mailbox["m"], 1)} def _apply(nodes): return {"x": nodes.data["x"] * 2} def _init2(shape, dtype, ctx, ids): return 2 + F.zeros(shape, dtype, ctx) g.set_n_initializer(_init2, "x") # test#1: pull both 0deg and non-0deg nodes old = F.randn((2, 5)) g.ndata["h"] = old g.pull([0, 1], _message, _reduce, _apply) new = g.ndata["x"] # 0deg check: initialized with the func and got applied assert F.allclose(new[0], F.full_1d(5, 4, dtype=F.float32)) # non-0deg check assert F.allclose(new[1], F.sum(old, 0) * 2) # test#2: pull only 0deg node old = F.randn((2, 5)) g.ndata["h"] = old # Intercepting the warning: The input graph for the user-defined edge # function does not contain valid edges with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) g.pull(0, _message, _reduce, lambda nodes: {"h": nodes.data["h"] * 2}) new = g.ndata["h"] # 0deg check: fallback to apply assert F.allclose(new[0], 2 * old[0]) # non-0deg check: not touched assert F.allclose(new[1], old[1]) def test_dynamic_addition(): N = 3 D = 1 g = dgl.graph([]).to(F.ctx()) # Test node addition g.add_nodes(N) g.ndata.update({"h1": F.randn((N, D)), "h2": F.randn((N, D))}) g.add_nodes(3) assert g.ndata["h1"].shape[0] == g.ndata["h2"].shape[0] == N + 3 # Test edge addition g.add_edges(0, 1) g.add_edges(1, 0) g.edata.update({"h1": F.randn((2, D)), "h2": F.randn((2, D))}) assert g.edata["h1"].shape[0] == g.edata["h2"].shape[0] == 2 g.add_edges([0, 2], [2, 0]) g.edata["h1"] = F.randn((4, D)) assert g.edata["h1"].shape[0] == g.edata["h2"].shape[0] == 4 g.add_edges(1, 2) g.edges[4].data["h1"] = F.randn((1, D)) assert g.edata["h1"].shape[0] == g.edata["h2"].shape[0] == 5 # test add edge with part of the features g.add_edges(2, 1, {"h1": F.randn((1, D))}) assert len(g.edata["h1"]) == len(g.edata["h2"]) @parametrize_idtype def test_repr(idtype): g = dgl.graph( ([0, 0, 1], [1, 2, 2]), num_nodes=10, idtype=idtype, device=F.ctx() ) repr_string = g.__repr__() print(repr_string) g.ndata["x"] = F.zeros((10, 5)) g.edata["y"] = F.zeros((3, 4)) repr_string = g.__repr__() print(repr_string) @parametrize_idtype def test_local_var(idtype): g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4]), idtype=idtype, device=F.ctx()) g.ndata["h"] = F.zeros((g.num_nodes(), 3)) g.edata["w"] = F.zeros((g.num_edges(), 4)) # test override def foo(g): g = g.local_var() g.ndata["h"] = F.ones((g.num_nodes(), 3)) g.edata["w"] = F.ones((g.num_edges(), 4)) foo(g) assert F.allclose(g.ndata["h"], F.zeros((g.num_nodes(), 3))) assert F.allclose(g.edata["w"], F.zeros((g.num_edges(), 4))) # test out-place update def foo(g): g = g.local_var() g.nodes[[2, 3]].data["h"] = F.ones((2, 3)) g.edges[[2, 3]].data["w"] = F.ones((2, 4)) foo(g) assert F.allclose(g.ndata["h"], F.zeros((g.num_nodes(), 3))) assert F.allclose(g.edata["w"], F.zeros((g.num_edges(), 4))) # test out-place update 2 def foo(g): g = g.local_var() g.apply_nodes(lambda nodes: {"h": nodes.data["h"] + 10}, [2, 3]) g.apply_edges(lambda edges: {"w": edges.data["w"] + 10}, [2, 3]) foo(g) assert F.allclose(g.ndata["h"], F.zeros((g.num_nodes(), 3))) assert F.allclose(g.edata["w"], F.zeros((g.num_edges(), 4))) # test auto-pop def foo(g): g = g.local_var() g.ndata["hh"] = F.ones((g.num_nodes(), 3)) g.edata["ww"] = F.ones((g.num_edges(), 4)) foo(g) assert "hh" not in g.ndata assert "ww" not in g.edata # test initializer1 g = dgl.graph(([0, 1], [1, 1]), idtype=idtype, device=F.ctx()) g.set_n_initializer(dgl.init.zero_initializer) def foo(g): g = g.local_var() g.nodes[0].data["h"] = F.ones((1, 1)) assert F.allclose(g.ndata["h"], F.tensor([[1.0], [0.0]])) foo(g) # test initializer2 def foo_e_initializer(shape, dtype, ctx, id_range): return F.ones(shape) g.set_e_initializer(foo_e_initializer, field="h") def foo(g): g = g.local_var() g.edges[0, 1].data["h"] = F.ones((1, 1)) assert F.allclose(g.edata["h"], F.ones((2, 1))) g.edges[0, 1].data["w"] = F.ones((1, 1)) assert F.allclose(g.edata["w"], F.tensor([[1.0], [0.0]])) foo(g) @parametrize_idtype def test_local_scope(idtype): g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4]), idtype=idtype, device=F.ctx()) g.ndata["h"] = F.zeros((g.num_nodes(), 3)) g.edata["w"] = F.zeros((g.num_edges(), 4)) # test override def foo(g): with g.local_scope(): g.ndata["h"] = F.ones((g.num_nodes(), 3)) g.edata["w"] = F.ones((g.num_edges(), 4)) foo(g) assert F.allclose(g.ndata["h"], F.zeros((g.num_nodes(), 3))) assert F.allclose(g.edata["w"], F.zeros((g.num_edges(), 4))) # test out-place update def foo(g): with g.local_scope(): g.nodes[[2, 3]].data["h"] = F.ones((2, 3)) g.edges[[2, 3]].data["w"] = F.ones((2, 4)) foo(g) assert F.allclose(g.ndata["h"], F.zeros((g.num_nodes(), 3))) assert F.allclose(g.edata["w"], F.zeros((g.num_edges(), 4))) # test out-place update 2 def foo(g): with g.local_scope(): g.apply_nodes(lambda nodes: {"h": nodes.data["h"] + 10}, [2, 3]) g.apply_edges(lambda edges: {"w": edges.data["w"] + 10}, [2, 3]) foo(g) assert F.allclose(g.ndata["h"], F.zeros((g.num_nodes(), 3))) assert F.allclose(g.edata["w"], F.zeros((g.num_edges(), 4))) # test auto-pop def foo(g): with g.local_scope(): g.ndata["hh"] = F.ones((g.num_nodes(), 3)) g.edata["ww"] = F.ones((g.num_edges(), 4)) foo(g) assert "hh" not in g.ndata assert "ww" not in g.edata # test nested scope def foo(g): with g.local_scope(): g.ndata["hh"] = F.ones((g.num_nodes(), 3)) g.edata["ww"] = F.ones((g.num_edges(), 4)) with g.local_scope(): g.ndata["hhh"] = F.ones((g.num_nodes(), 3)) g.edata["www"] = F.ones((g.num_edges(), 4)) assert "hhh" not in g.ndata assert "www" not in g.edata foo(g) assert "hh" not in g.ndata assert "ww" not in g.edata # test initializer1 g = dgl.graph(([0, 1], [1, 1]), idtype=idtype, device=F.ctx()) g.set_n_initializer(dgl.init.zero_initializer) def foo(g): with g.local_scope(): g.nodes[0].data["h"] = F.ones((1, 1)) assert F.allclose(g.ndata["h"], F.tensor([[1.0], [0.0]])) foo(g) # test initializer2 def foo_e_initializer(shape, dtype, ctx, id_range): return F.ones(shape) g.set_e_initializer(foo_e_initializer, field="h") def foo(g): with g.local_scope(): g.edges[0, 1].data["h"] = F.ones((1, 1)) assert F.allclose(g.edata["h"], F.ones((2, 1))) g.edges[0, 1].data["w"] = F.ones((1, 1)) assert F.allclose(g.edata["w"], F.tensor([[1.0], [0.0]])) foo(g) # test exception handling def foo(g): try: with g.local_scope(): g.ndata["hh"] = F.ones((g.num_nodes(), 1)) # throw TypeError 1 + "1" except TypeError: pass assert "hh" not in g.ndata foo(g) @parametrize_idtype def test_isolated_nodes(idtype): g = dgl.graph(([0, 1], [1, 2]), num_nodes=5, idtype=idtype, device=F.ctx()) assert g.num_nodes() == 5 g = dgl.heterograph( {("user", "plays", "game"): ([0, 0, 1], [2, 3, 2])}, {"user": 5, "game": 7}, idtype=idtype, device=F.ctx(), ) assert g.idtype == idtype assert g.num_nodes("user") == 5 assert g.num_nodes("game") == 7 # Test backward compatibility g = dgl.heterograph( {("user", "plays", "game"): ([0, 0, 1], [2, 3, 2])}, {"user": 5, "game": 7}, idtype=idtype, device=F.ctx(), ) assert g.idtype == idtype assert g.num_nodes("user") == 5 assert g.num_nodes("game") == 7 @parametrize_idtype def test_send_multigraph(idtype): g = dgl.graph(([0, 0, 0, 2], [1, 1, 1, 1]), idtype=idtype, device=F.ctx()) def _message_a(edges): return {"a": edges.data["a"]} def _message_b(edges): return {"a": edges.data["a"] * 3} def _reduce(nodes): return {"a": F.max(nodes.mailbox["a"], 1)} def answer(*args): return F.max(F.stack(args, 0), 0) assert g.is_multigraph # send by eid old_repr = F.randn((4, 5)) # send_and_recv_on g.ndata["a"] = F.zeros((3, 5)) g.edata["a"] = old_repr g.send_and_recv([0, 2, 3], message_func=_message_a, reduce_func=_reduce) new_repr = g.ndata["a"] assert F.allclose( new_repr[1], answer(old_repr[0], old_repr[2], old_repr[3]) ) assert F.allclose(new_repr[[0, 2]], F.zeros((2, 5))) @parametrize_idtype def test_issue_1088(idtype): # This test ensures that message passing on a heterograph with one edge type # would not crash (GitHub issue #1088). import dgl.function as fn g = dgl.heterograph( {("U", "E", "V"): ([0, 1, 2], [1, 2, 3])}, idtype=idtype, device=F.ctx() ) g.nodes["U"].data["x"] = F.randn((3, 3)) g.update_all(fn.copy_u("x", "m"), fn.sum("m", "y")) @parametrize_idtype def test_degree_bucket_edge_ordering(idtype): import dgl.function as fn g = dgl.graph( ([1, 3, 5, 0, 4, 2, 3, 3, 4, 5], [1, 1, 0, 0, 1, 2, 2, 0, 3, 3]), idtype=idtype, device=F.ctx(), ) g.edata["eid"] = F.copy_to(F.arange(0, 10), F.ctx()) def reducer(nodes): eid = F.asnumpy(F.copy_to(nodes.mailbox["eid"], F.cpu())) assert np.array_equal(eid, np.sort(eid, 1)) return {"n": F.sum(nodes.mailbox["eid"], 1)} g.update_all(fn.copy_e("eid", "eid"), reducer) @parametrize_idtype def test_issue_2484(idtype): import dgl.function as fn g = dgl.graph(([0, 1, 2], [1, 2, 3]), idtype=idtype, device=F.ctx()) x = F.copy_to(F.randn((4,)), F.ctx()) g.ndata["x"] = x g.pull([2, 1], fn.u_add_v("x", "x", "m"), fn.sum("m", "x")) y1 = g.ndata["x"] g.ndata["x"] = x g.pull([1, 2], fn.u_add_v("x", "x", "m"), fn.sum("m", "x")) y2 = g.ndata["x"] assert F.allclose(y1, y2) ================================================ FILE: tests/python/common/ops/test_edge_softmax.py ================================================ import itertools import math import unittest from collections import Counter import backend as F import dgl import dgl.function as fn import networkx as nx import numpy as np import pytest import scipy.sparse as ssp from dgl import DGLError from dgl.ops import edge_softmax from scipy.sparse import rand from utils import get_cases, parametrize_idtype edge_softmax_shapes = [(1,), (1, 3), (3, 4, 5)] rfuncs = {"sum": fn.sum, "max": fn.max, "min": fn.min, "mean": fn.mean} fill_value = {"sum": 0, "max": float("-inf")} feat_size = 2 @pytest.mark.parametrize("g", get_cases(["clique"])) @pytest.mark.parametrize("norm_by", ["src", "dst"]) @pytest.mark.parametrize("shp", edge_softmax_shapes) @parametrize_idtype def test_edge_softmax(g, norm_by, shp, idtype): g = g.astype(idtype).to(F.ctx()) edata = F.tensor(np.random.rand(g.num_edges(), *shp)) e1 = F.attach_grad(F.clone(edata)) with F.record_grad(): score1 = edge_softmax(g, e1, norm_by=norm_by) F.backward(F.reduce_sum(score1)) grad_edata = F.grad(e1) with F.record_grad(): e2 = F.attach_grad(F.clone(edata)) e2_2d = F.reshape( e2, (g.number_of_src_nodes(), g.number_of_dst_nodes(), *e2.shape[1:]), ) if norm_by == "src": score2 = F.softmax(e2_2d, 1) score2 = F.reshape(score2, (-1, *e2.shape[1:])) if norm_by == "dst": score2 = F.softmax(e2_2d, 0) score2 = F.reshape(score2, (-1, *e2.shape[1:])) assert F.allclose(score1, score2) print("forward passed") F.backward(F.reduce_sum(score2)) assert F.allclose(F.grad(e2), grad_edata) print("backward passed") def create_test_heterograph(idtype): # test heterograph from the docstring, plus a user -- wishes -- game relation # 3 users, 2 games, 2 developers # metagraph: # ('user', 'follows', 'user'), # ('user', 'plays', 'game'), # ('user', 'wishes', 'game'), # ('developer', 'develops', 'game')]) g = dgl.heterograph( { ("user", "follows", "user"): ([0, 1, 2, 1, 1], [0, 0, 1, 1, 2]), ("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]), ("user", "wishes", "game"): ([0, 1, 1], [0, 0, 1]), ("developer", "develops", "game"): ([0, 1, 0], [0, 1, 1]), }, idtype=idtype, device=F.ctx(), ) assert g.idtype == idtype assert g.device == F.ctx() return g @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now" ) def test_edge_softmax_unidirectional(): g = dgl.heterograph( { ("A", "AB", "B"): ( [1, 2, 3, 1, 2, 3, 1, 2, 3], [0, 0, 0, 1, 1, 1, 2, 2, 2], ), ("B", "BB", "B"): ( [0, 1, 2, 0, 1, 2, 0, 1, 2], [0, 0, 0, 1, 1, 1, 2, 2, 2], ), } ) g = g.to(F.ctx()) g.edges["AB"].data["x"] = F.ones(9) * 2 g.edges["BB"].data["x"] = F.ones(9) result = dgl.ops.edge_softmax( g, {"AB": g.edges["AB"].data["x"], "BB": g.edges["BB"].data["x"]} ) ab = result["A", "AB", "B"] bb = result["B", "BB", "B"] e2 = F.zeros_like(ab) + math.exp(2) / ((math.exp(2) + math.exp(1)) * 3) e1 = F.zeros_like(bb) + math.exp(1) / ((math.exp(2) + math.exp(1)) * 3) assert F.allclose(ab, e2) assert F.allclose(bb, e1) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now" ) @pytest.mark.parametrize("g", get_cases(["clique"])) @pytest.mark.parametrize("norm_by", ["src", "dst"]) # @pytest.mark.parametrize('shp', edge_softmax_shapes) @parametrize_idtype def test_edge_softmax(g, norm_by, idtype): print("params", norm_by, idtype) g = create_test_heterograph(idtype) x1 = F.randn((g.num_edges("plays"), feat_size)) x2 = F.randn((g.num_edges("follows"), feat_size)) x3 = F.randn((g.num_edges("develops"), feat_size)) x4 = F.randn((g.num_edges("wishes"), feat_size)) F.attach_grad(F.clone(x1)) F.attach_grad(F.clone(x2)) F.attach_grad(F.clone(x3)) F.attach_grad(F.clone(x4)) g["plays"].edata["eid"] = x1 g["follows"].edata["eid"] = x2 g["develops"].edata["eid"] = x3 g["wishes"].edata["eid"] = x4 ################################################################# # edge_softmax() on homogeneous graph ################################################################# with F.record_grad(): hm_g = dgl.to_homogeneous(g) hm_x = F.cat((x3, x2, x1, x4), 0) hm_e = F.attach_grad(F.clone(hm_x)) score_hm = edge_softmax(hm_g, hm_e, norm_by=norm_by) hm_g.edata["score"] = score_hm ht_g = dgl.to_heterogeneous(hm_g, g.ntypes, g.etypes) r1 = ht_g.edata["score"][("user", "plays", "game")] r2 = ht_g.edata["score"][("user", "follows", "user")] r3 = ht_g.edata["score"][("developer", "develops", "game")] r4 = ht_g.edata["score"][("user", "wishes", "game")] F.backward(F.reduce_sum(r1) + F.reduce_sum(r2)) grad_edata_hm = F.grad(hm_e) ################################################################# # edge_softmax() on heterogeneous graph ################################################################# e1 = F.attach_grad(F.clone(x1)) e2 = F.attach_grad(F.clone(x2)) e3 = F.attach_grad(F.clone(x3)) e4 = F.attach_grad(F.clone(x4)) e = { ("user", "follows", "user"): e2, ("user", "plays", "game"): e1, ("user", "wishes", "game"): e4, ("developer", "develops", "game"): e3, } with F.record_grad(): score = edge_softmax(g, e, norm_by=norm_by) r5 = score[("user", "plays", "game")] r6 = score[("user", "follows", "user")] r7 = score[("developer", "develops", "game")] r8 = score[("user", "wishes", "game")] F.backward(F.reduce_sum(r5) + F.reduce_sum(r6)) grad_edata_ht = F.cat( (F.grad(e3), F.grad(e2), F.grad(e1), F.grad(e4)), 0 ) # correctness check assert F.allclose(r1, r5) assert F.allclose(r2, r6) assert F.allclose(r3, r7) assert F.allclose(r4, r8) assert F.allclose(grad_edata_hm, grad_edata_ht) if __name__ == "__main__": test_edge_softmax_unidirectional() ================================================ FILE: tests/python/common/ops/test_ops.py ================================================ import random import unittest import backend as F import dgl import numpy as np import pytest import torch from dgl.ops import gather_mm, gsddmm, gspmm, segment_reduce from utils import parametrize_idtype from utils.graph_cases import get_cases # Set seeds to make tests fully reproducible. SEED = 12345 # random.randint(1, 99999) random.seed(SEED) np.random.seed(SEED) dgl.seed(SEED) F.seed(SEED) udf_msg = { "add": lambda edges: {"m": edges.src["x"] + edges.data["w"]}, "sub": lambda edges: {"m": edges.src["x"] - edges.data["w"]}, "mul": lambda edges: {"m": edges.src["x"] * edges.data["w"]}, "div": lambda edges: {"m": edges.src["x"] / edges.data["w"]}, "copy_lhs": lambda edges: {"m": edges.src["x"]}, "copy_rhs": lambda edges: {"m": edges.data["w"]}, } def select(target, src, edge, dst): if target == "u": return src elif target == "v": return dst elif target == "e": return edge def binary_op(msg, x, y): if msg == "add": return x + y elif msg == "sub": return x - y elif msg == "mul": return x * y elif msg == "div": return x / y elif msg == "dot": return F.sum(x * y, -1, keepdims=True) elif msg == "copy_lhs": return x elif msg == "copy_rhs": return y def edge_func(lhs_target, rhs_target, msg): def foo(edges): return { "m": binary_op( msg, select(lhs_target, edges.src, edges.data, edges.dst)["x"], select(rhs_target, edges.src, edges.data, edges.dst)["y"], ) } return foo udf_apply_edges = { lhs_target + "_" + msg + "_" + rhs_target: edge_func(lhs_target, rhs_target, msg) for lhs_target in ["u", "v", "e"] for rhs_target in ["u", "v", "e"] for msg in ["add", "sub", "mul", "div", "dot", "copy_lhs", "copy_rhs"] } udf_reduce = { "sum": lambda nodes: {"v": F.sum(nodes.mailbox["m"], 1)}, "min": lambda nodes: {"v": F.min(nodes.mailbox["m"], 1)}, "max": lambda nodes: {"v": F.max(nodes.mailbox["m"], 1)}, } graphs = [ # dgl.rand_graph(30, 0), dgl.rand_graph(30, 100), dgl.rand_bipartite("_U", "_E", "_V", 30, 40, 300), ] spmm_shapes = [ ((1, 2, 1, 3, 1), (4, 1, 3, 1, 1)), ((3, 3), (1, 3)), ((1,), (3,)), ((3,), (1,)), ((1,), (1,)), ((), ()), ] sddmm_shapes = [ ((1, 2, 1, 3, 1), (4, 1, 3, 1, 1)), ((5, 3, 1, 7), (1, 3, 7, 7)), ((1, 3, 3), (4, 1, 3)), ((3,), (3,)), ((1,), (1,)), ] @pytest.mark.parametrize("g", graphs) @pytest.mark.parametrize("shp", spmm_shapes) @pytest.mark.parametrize( "msg", ["add", "sub", "mul", "div", "copy_lhs", "copy_rhs"] ) @pytest.mark.parametrize("reducer", ["sum", "min", "max"]) @parametrize_idtype @pytest.mark.parametrize("dtype", [np.float32, np.float64]) def test_spmm(idtype, dtype, g, shp, msg, reducer): g = g.astype(idtype).to(F.ctx()) print(g) print(g.idtype) hu = F.tensor( np.random.rand(*((g.number_of_src_nodes(),) + shp[0])).astype(dtype) + 1 ) he = F.tensor( np.random.rand(*((g.num_edges(),) + shp[1])).astype(dtype) + 1 ) print("u shape: {}, e shape: {}".format(F.shape(hu), F.shape(he))) g.srcdata["x"] = F.attach_grad(F.clone(hu)) g.edata["w"] = F.attach_grad(F.clone(he)) print("SpMM(message func: {}, reduce func: {})".format(msg, reducer)) u = F.attach_grad(F.clone(hu)) e = F.attach_grad(F.clone(he)) with F.record_grad(): v = gspmm(g, msg, reducer, u, e) if reducer in ["max", "min"]: v = F.replace_inf_with_zero(v) if g.num_edges() > 0: F.backward(F.reduce_sum(v)) if msg != "copy_rhs": grad_u = F.grad(u) if msg != "copy_lhs": grad_e = F.grad(e) with F.record_grad(): g.update_all(udf_msg[msg], udf_reduce[reducer]) if g.num_edges() > 0: v1 = g.dstdata["v"] assert F.allclose(v, v1) print("forward passed") F.backward(F.reduce_sum(v1)) if msg != "copy_rhs": if reducer in [ "min", "max", ]: # there might be some numerical errors rate = F.reduce_sum( F.abs(F.grad(g.srcdata["x"]) - grad_u) ) / F.reduce_sum(F.abs(grad_u)) assert F.as_scalar(rate) < 1e-2, rate else: assert F.allclose(F.grad(g.srcdata["x"]), grad_u) if msg != "copy_lhs": if reducer in ["min", "max"]: rate = F.reduce_sum( F.abs(F.grad(g.edata["w"]) - grad_e) ) / F.reduce_sum(F.abs(grad_e)) assert F.as_scalar(rate) < 1e-2, rate else: assert F.allclose(F.grad(g.edata["w"]), grad_e) print("backward passed") g.srcdata.pop("x") g.edata.pop("w") if "v" in g.dstdata: g.dstdata.pop("v") @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now.", ) @parametrize_idtype @pytest.mark.parametrize( "dtype, rtol, atol", [(torch.float16, 1e-3, 0.5), (torch.bfloat16, 4e-3, 2.0)], ) def test_half_spmm(idtype, dtype, rtol, atol): if F._default_context_str == "cpu" and dtype == torch.float16: pytest.skip("float16 is not supported on CPU.") if ( F._default_context_str == "gpu" and dtype == torch.bfloat16 and not torch.cuda.is_bf16_supported() ): pytest.skip("BF16 is not supported.") # make sure the spmm result is < 512 to match the rtol/atol we set. g = dgl.graph( (torch.arange(900), torch.tensor([0] * 900)), idtype=idtype, device=F.ctx(), ) feat_fp32 = torch.rand((g.num_src_nodes(), 32)).to(F.ctx()) feat_half = feat_fp32.to(dtype) # test SpMMCSR g = g.formats(["csc"]) res_fp32 = dgl.ops.copy_u_sum(g, feat_fp32)[0] res_half = dgl.ops.copy_u_sum(g, feat_half)[0].float() assert torch.allclose(res_fp32, res_half, rtol=rtol, atol=atol) # test SpMMCOO # TODO(Xin): half-precision SpMMCoo is temporally disabled. # g = g.formats(['coo']) # res_fp32 = dgl.ops.copy_u_sum(g, feat_fp32)[0] # res_half = dgl.ops.copy_u_sum(g, feat_half)[0].float() # assert torch.allclose(res_fp32, res_half, rtol=rtol, atol=atol) @pytest.mark.parametrize("g", graphs) @pytest.mark.parametrize("shp", sddmm_shapes) @pytest.mark.parametrize("lhs_target", ["u", "v", "e"]) @pytest.mark.parametrize("rhs_target", ["u", "v", "e"]) @pytest.mark.parametrize( "msg", ["add", "sub", "mul", "div", "dot", "copy_lhs", "copy_rhs"] ) @parametrize_idtype def test_sddmm(g, shp, lhs_target, rhs_target, msg, idtype): if lhs_target == rhs_target: return g = g.astype(idtype).to(F.ctx()) if dgl.backend.backend_name == "mxnet" and g.num_edges() == 0: pytest.skip() # mxnet do not support zero shape tensor print(g) print(g.idtype) len_lhs = select( lhs_target, g.number_of_src_nodes(), g.num_edges(), g.number_of_dst_nodes(), ) lhs_shp = (len_lhs,) + shp[0] len_rhs = select( rhs_target, g.number_of_src_nodes(), g.num_edges(), g.number_of_dst_nodes(), ) rhs_shp = (len_rhs,) + shp[1] feat_lhs = F.tensor(np.random.rand(*lhs_shp) + 1) feat_rhs = F.tensor(np.random.rand(*rhs_shp) + 1) print( "lhs shape: {}, rhs shape: {}".format( F.shape(feat_lhs), F.shape(feat_rhs) ) ) lhs_frame = select(lhs_target, g.srcdata, g.edata, g.dstdata) rhs_frame = select(rhs_target, g.srcdata, g.edata, g.dstdata) lhs_frame["x"] = F.attach_grad(F.clone(feat_lhs)) rhs_frame["y"] = F.attach_grad(F.clone(feat_rhs)) msg_func = lhs_target + "_" + msg + "_" + rhs_target print("SDDMM(message func: {})".format(msg_func)) lhs = F.attach_grad(F.clone(feat_lhs)) rhs = F.attach_grad(F.clone(feat_rhs)) with F.record_grad(): e = gsddmm( g, msg, lhs, rhs, lhs_target=lhs_target, rhs_target=rhs_target ) F.backward(F.reduce_sum(e)) grad_lhs = F.grad(lhs) grad_rhs = F.grad(rhs) with F.record_grad(): g.apply_edges(udf_apply_edges[msg_func]) if g.num_edges() > 0: e1 = g.edata["m"] assert F.allclose(e, e1) print("forward passed") F.backward(F.reduce_sum(e1)) if msg != "copy_rhs": assert F.allclose(F.grad(lhs_frame["x"]), grad_lhs) if msg != "copy_lhs": assert F.allclose(F.grad(rhs_frame["y"]), grad_rhs) print("backward passed") lhs_frame.pop("x") rhs_frame.pop("y") if "m" in g.edata: g.edata.pop("m") @pytest.mark.parametrize("reducer", ["sum", "max", "min", "mean"]) def test_segment_reduce(reducer): ctx = F.ctx() value = F.tensor(np.random.rand(10, 5)) v1 = F.attach_grad(F.clone(value)) v2 = F.attach_grad(F.clone(value)) seglen = F.tensor([2, 3, 0, 4, 1, 0, 0]) u = F.copy_to(F.arange(0, F.shape(value)[0], F.int32), ctx) v = F.repeat( F.copy_to(F.arange(0, len(seglen), F.int32), ctx), seglen, dim=0 ) num_nodes = {"_U": len(u), "_V": len(seglen)} g = dgl.convert.heterograph( {("_U", "_E", "_V"): (u, v)}, num_nodes_dict=num_nodes ) with F.record_grad(): rst1 = gspmm(g, "copy_lhs", reducer, v1, None) if reducer in ["max", "min"]: rst1 = F.replace_inf_with_zero(rst1) F.backward(F.reduce_sum(rst1)) grad1 = F.grad(v1) with F.record_grad(): rst2 = segment_reduce(seglen, v2, reducer=reducer) F.backward(F.reduce_sum(rst2)) assert F.allclose(rst1, rst2) print("forward passed") grad2 = F.grad(v2) assert F.allclose(grad1, grad2) print("backward passed") @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now" ) @parametrize_idtype @pytest.mark.parametrize("feat_size", [1, 8, 16, 64, 256]) @pytest.mark.parametrize( "dtype, tol", [ (torch.float16, 1e-2), (torch.bfloat16, 1e-2), (torch.float32, 3e-3), (torch.float64, 1e-4), ], ) def test_segment_mm(idtype, feat_size, dtype, tol): if F._default_context_str == "cpu" and dtype == torch.float16: pytest.skip("float16 is not supported on CPU.") if ( F._default_context_str == "gpu" and dtype == torch.bfloat16 and not torch.cuda.is_bf16_supported() ): pytest.skip("BF16 is not supported.") dev = F.ctx() # input a = torch.tensor(np.random.rand(100, feat_size)).to(dev).to(dtype) a.requires_grad_() b = ( torch.tensor(np.random.rand(10, feat_size, feat_size + 1)) .to(dev) .to(dtype) ) b.requires_grad_() seglen_a = torch.tensor([10, 15, 8, 0, 1, 9, 18, 24, 15, 0]).to(idtype) dc = torch.tensor(np.random.rand(100, feat_size + 1)).to(dev).to(dtype) # compute c = dgl.ops.segment_mm(a, b, seglen_a) c.backward(dc) da = a.grad.clone() db = b.grad.clone() # ground truth c_t = [] off = 0 for i, l in enumerate(seglen_a): c_t.append(a[off : off + l] @ b[i]) off += l c_t = torch.cat(c_t).to(dtype) a.grad.zero_() b.grad.zero_() c_t.backward(dc) da_t = a.grad db_t = b.grad assert torch.allclose(c, c_t, atol=tol, rtol=tol) assert torch.allclose(da, da_t, atol=tol, rtol=tol) assert torch.allclose(db, db_t, atol=tol, rtol=tol) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now" ) @pytest.mark.parametrize("feat_size", [1, 8, 16, 64, 256]) @pytest.mark.parametrize( "dtype, tol", [ (torch.float16, 1e-2), (torch.bfloat16, 2e-2), (torch.float32, 3e-3), (torch.float64, 1e-4), ], ) def test_gather_mm_idx_b(feat_size, dtype, tol): if F._default_context_str == "cpu" and dtype == torch.float16: pytest.skip("float16 is not supported on CPU.") if F._default_context_str == "gpu": if dtype == torch.bfloat16 and not torch.cuda.is_bf16_supported(): pytest.skip("BF16 is not supported.") if ( dtype == torch.float16 and torch.cuda.get_device_capability() < (7, 0) ) or ( dtype == torch.bfloat16 and torch.cuda.get_device_capability() < (8, 0) ): pytest.skip( f"{dtype} is not supported for atomic operations on GPU with " f"cuda capability ({torch.cuda.get_device_capability()})." ) dev = F.ctx() # input a = torch.tensor(np.random.rand(100, feat_size)).to(dev).to(dtype) a.requires_grad_() b = ( torch.tensor(np.random.rand(10, feat_size, feat_size + 1)) .to(dev) .to(dtype) ) b.requires_grad_() idx = torch.tensor(np.random.randint(0, 10, 100)).to(dev).long() dc = torch.tensor(np.random.rand(100, feat_size + 1)).to(dev).to(dtype) # compute c = gather_mm(a, b, idx_b=idx) c.backward(dc) da = a.grad.clone() db = b.grad.clone() # ground truth c_t = torch.bmm(a.unsqueeze(1), b[idx]).squeeze(1) a.grad.zero_() b.grad.zero_() c_t.backward(dc) da_t = a.grad db_t = b.grad assert torch.allclose(c, c_t, atol=tol, rtol=tol) assert torch.allclose(da, da_t, atol=tol, rtol=tol) assert torch.allclose(db, db_t, atol=tol, rtol=tol) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now" ) @parametrize_idtype @pytest.mark.parametrize("feat_size", [1, 8, 16, 64, 256]) def _test_gather_mm_idx_a(idtype, feat_size): # TODO(minjie): currently disabled due to bugs in the CUDA kernel. Need to fix it later. import torch dev = F.ctx() # input a = torch.tensor(np.random.rand(10, feat_size)).to(dev) a.requires_grad_() b = torch.tensor(np.random.rand(100, feat_size, feat_size + 1)).to(dev) b.requires_grad_() idx = torch.tensor(np.random.randint(0, 10, 100)).to(dev) dc = torch.tensor(np.random.rand(100, feat_size + 1)).to(dev) # compute c = gather_mm(a, b, idx_a=idx) c.backward(dc) da = a.grad.clone() db = b.grad.clone() # ground truth c_t = torch.bmm(a[idx].unsqueeze(1), b).squeeze(1) a.grad.zero_() b.grad.zero_() c_t.backward(dc) da_t = a.grad db_t = b.grad assert torch.allclose(c, c_t, atol=1e-4, rtol=1e-4) assert torch.allclose(da, da_t, atol=1e-4, rtol=1e-4) assert torch.allclose(db, db_t, atol=1e-4, rtol=1e-4) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now" ) @unittest.skipIf( F._default_context_str == "gpu", reason="Libxsmm only fit in CPU." ) def test_use_libxsmm_switch(): import torch g = dgl.graph(([0, 0, 0, 1, 1, 2], [0, 1, 2, 1, 2, 2])) x = torch.ones(3, 2, requires_grad=True) y = torch.arange(1, 13).float().view(6, 2).requires_grad_() dgl.use_libxsmm(False) assert ~dgl.is_libxsmm_enabled() dgl.ops.u_mul_e_sum(g, x, y) dgl.use_libxsmm(True) assert dgl.is_libxsmm_enabled() dgl.ops.u_mul_e_sum(g, x, y) ================================================ FILE: tests/python/common/sampling/test_sampling.py ================================================ import unittest import warnings import backend as F import dgl import numpy as np import pytest sample_neighbors_fusing_mode = { True: dgl.sampling.sample_neighbors_fused, False: dgl.sampling.sample_neighbors, } def check_random_walk(g, metapath, traces, ntypes, prob=None, trace_eids=None): traces = F.asnumpy(traces) ntypes = F.asnumpy(ntypes) for j in range(traces.shape[1] - 1): assert ntypes[j] == g.get_ntype_id(g.to_canonical_etype(metapath[j])[0]) assert ntypes[j + 1] == g.get_ntype_id( g.to_canonical_etype(metapath[j])[2] ) for i in range(traces.shape[0]): for j in range(traces.shape[1] - 1): assert g.has_edges_between( traces[i, j], traces[i, j + 1], etype=metapath[j] ) if prob is not None and prob in g.edges[metapath[j]].data: p = F.asnumpy(g.edges[metapath[j]].data["p"]) eids = g.edge_ids( traces[i, j], traces[i, j + 1], etype=metapath[j] ) assert p[eids] != 0 if trace_eids is not None: u, v = g.find_edges(trace_eids[i, j], etype=metapath[j]) assert (u == traces[i, j]) and (v == traces[i, j + 1]) @pytest.mark.parametrize("use_uva", [True, False]) def test_non_uniform_random_walk(use_uva): if use_uva: if F.ctx() == F.cpu(): pytest.skip("UVA biased random walk requires a GPU.") if dgl.backend.backend_name != "pytorch": pytest.skip( "UVA biased random walk is only supported with PyTorch." ) g2 = dgl.heterograph( {("user", "follow", "user"): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0])} ) g4 = dgl.heterograph( { ("user", "follow", "user"): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0]), ("user", "view", "item"): ([0, 0, 1, 2, 3, 3], [0, 1, 1, 2, 2, 1]), ("item", "viewed-by", "user"): ( [0, 1, 1, 2, 2, 1], [0, 0, 1, 2, 3, 3], ), } ) g2.edata["p"] = F.copy_to( F.tensor([3, 0, 3, 3, 3], dtype=F.float32), F.cpu() ) g2.edata["p2"] = F.copy_to( F.tensor([[3], [0], [3], [3], [3]], dtype=F.float32), F.cpu() ) g4.edges["follow"].data["p"] = F.copy_to( F.tensor([3, 0, 3, 3, 3], dtype=F.float32), F.cpu() ) g4.edges["viewed-by"].data["p"] = F.copy_to( F.tensor([1, 1, 1, 1, 1, 1], dtype=F.float32), F.cpu() ) if use_uva: for g in (g2, g4): g.create_formats_() g.pin_memory_() elif F._default_context_str == "gpu": g2 = g2.to(F.ctx()) g4 = g4.to(F.ctx()) try: traces, eids, ntypes = dgl.sampling.random_walk( g2, F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g2.idtype), length=4, prob="p", return_eids=True, ) check_random_walk( g2, ["follow"] * 4, traces, ntypes, "p", trace_eids=eids ) with pytest.raises(dgl.DGLError): traces, ntypes = dgl.sampling.random_walk( g2, F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g2.idtype), length=4, prob="p2", ) metapath = ["follow", "view", "viewed-by"] * 2 traces, eids, ntypes = dgl.sampling.random_walk( g4, F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g4.idtype), metapath=metapath, prob="p", return_eids=True, ) check_random_walk(g4, metapath, traces, ntypes, "p", trace_eids=eids) traces, eids, ntypes = dgl.sampling.random_walk( g4, F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g4.idtype), metapath=metapath, prob="p", restart_prob=0.0, return_eids=True, ) check_random_walk(g4, metapath, traces, ntypes, "p", trace_eids=eids) traces, eids, ntypes = dgl.sampling.random_walk( g4, F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g4.idtype), metapath=metapath, prob="p", restart_prob=F.zeros((6,), F.float32, F.ctx()), return_eids=True, ) check_random_walk(g4, metapath, traces, ntypes, "p", trace_eids=eids) traces, eids, ntypes = dgl.sampling.random_walk( g4, F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g4.idtype), metapath=metapath + ["follow"], prob="p", restart_prob=F.tensor([0, 0, 0, 0, 0, 0, 1], F.float32), return_eids=True, ) check_random_walk( g4, metapath, traces[:, :7], ntypes[:7], "p", trace_eids=eids ) assert (F.asnumpy(traces[:, 7]) == -1).all() finally: for g in (g2, g4): g.unpin_memory_() @pytest.mark.parametrize("use_uva", [True, False]) def test_uniform_random_walk(use_uva): if use_uva and F.ctx() == F.cpu(): pytest.skip("UVA random walk requires a GPU.") g1 = dgl.heterograph({("user", "follow", "user"): ([0, 1, 2], [1, 2, 0])}) g2 = dgl.heterograph( {("user", "follow", "user"): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0])} ) g3 = dgl.heterograph( { ("user", "follow", "user"): ([0, 1, 2], [1, 2, 0]), ("user", "view", "item"): ([0, 1, 2], [0, 1, 2]), ("item", "viewed-by", "user"): ([0, 1, 2], [0, 1, 2]), } ) g4 = dgl.heterograph( { ("user", "follow", "user"): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0]), ("user", "view", "item"): ([0, 0, 1, 2, 3, 3], [0, 1, 1, 2, 2, 1]), ("item", "viewed-by", "user"): ( [0, 1, 1, 2, 2, 1], [0, 0, 1, 2, 3, 3], ), } ) if use_uva: for g in (g1, g2, g3, g4): g.create_formats_() g.pin_memory_() elif F._default_context_str == "gpu": g1 = g1.to(F.ctx()) g2 = g2.to(F.ctx()) g3 = g3.to(F.ctx()) g4 = g4.to(F.ctx()) try: traces, eids, ntypes = dgl.sampling.random_walk( g1, F.tensor([0, 1, 2, 0, 1, 2], dtype=g1.idtype), length=4, return_eids=True, ) check_random_walk(g1, ["follow"] * 4, traces, ntypes, trace_eids=eids) if F._default_context_str == "cpu": with pytest.raises(dgl.DGLError): dgl.sampling.random_walk( g1, F.tensor([0, 1, 2, 10], dtype=g1.idtype), length=4, return_eids=True, ) traces, eids, ntypes = dgl.sampling.random_walk( g1, F.tensor([0, 1, 2, 0, 1, 2], dtype=g1.idtype), length=4, restart_prob=0.0, return_eids=True, ) check_random_walk(g1, ["follow"] * 4, traces, ntypes, trace_eids=eids) traces, ntypes = dgl.sampling.random_walk( g1, F.tensor([0, 1, 2, 0, 1, 2], dtype=g1.idtype), length=4, restart_prob=F.zeros((4,), F.float32), ) check_random_walk(g1, ["follow"] * 4, traces, ntypes) traces, ntypes = dgl.sampling.random_walk( g1, F.tensor([0, 1, 2, 0, 1, 2], dtype=g1.idtype), length=5, restart_prob=F.tensor([0, 0, 0, 0, 1], dtype=F.float32), ) check_random_walk( g1, ["follow"] * 4, F.slice_axis(traces, 1, 0, 5), F.slice_axis(ntypes, 0, 0, 5), ) assert (F.asnumpy(traces)[:, 5] == -1).all() traces, eids, ntypes = dgl.sampling.random_walk( g2, F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g2.idtype), length=4, return_eids=True, ) check_random_walk(g2, ["follow"] * 4, traces, ntypes, trace_eids=eids) metapath = ["follow", "view", "viewed-by"] * 2 traces, eids, ntypes = dgl.sampling.random_walk( g3, F.tensor([0, 1, 2, 0, 1, 2], dtype=g3.idtype), metapath=metapath, return_eids=True, ) check_random_walk(g3, metapath, traces, ntypes, trace_eids=eids) metapath = ["follow", "view", "viewed-by"] * 2 traces, eids, ntypes = dgl.sampling.random_walk( g4, F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g4.idtype), metapath=metapath, return_eids=True, ) check_random_walk(g4, metapath, traces, ntypes, trace_eids=eids) traces, eids, ntypes = dgl.sampling.random_walk( g4, F.tensor([0, 1, 2, 0, 1, 2], dtype=g4.idtype), metapath=metapath, return_eids=True, ) check_random_walk(g4, metapath, traces, ntypes, trace_eids=eids) finally: # make sure to unpin the graphs even if some test fails for g in (g1, g2, g3, g4): if g.is_pinned(): g.unpin_memory_() @unittest.skipIf( F._default_context_str == "gpu", reason="GPU random walk not implemented" ) def test_node2vec(): g1 = dgl.heterograph({("user", "follow", "user"): ([0, 1, 2], [1, 2, 0])}) g2 = dgl.heterograph( {("user", "follow", "user"): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0])} ) g2.edata["p"] = F.tensor([3, 0, 3, 3, 3], dtype=F.float32) ntypes = F.zeros((5,), dtype=F.int64) traces, eids = dgl.sampling.node2vec_random_walk( g1, [0, 1, 2, 0, 1, 2], 1, 1, 4, return_eids=True ) check_random_walk(g1, ["follow"] * 4, traces, ntypes, trace_eids=eids) traces, eids = dgl.sampling.node2vec_random_walk( g2, [0, 1, 2, 3, 0, 1, 2, 3], 1, 1, 4, prob="p", return_eids=True ) check_random_walk(g2, ["follow"] * 4, traces, ntypes, "p", trace_eids=eids) @unittest.skipIf( F._default_context_str == "gpu", reason="GPU pack traces not implemented" ) def test_pack_traces(): traces, types = ( np.array( [[0, 1, -1, -1, -1, -1, -1], [0, 1, 1, 3, 0, 0, 0]], dtype="int64" ), np.array([0, 0, 1, 0, 0, 1, 0], dtype="int64"), ) traces = F.zerocopy_from_numpy(traces) types = F.zerocopy_from_numpy(types) result = dgl.sampling.pack_traces(traces, types) assert F.array_equal( result[0], F.tensor([0, 1, 0, 1, 1, 3, 0, 0, 0], dtype=F.int64) ) assert F.array_equal( result[1], F.tensor([0, 0, 0, 0, 1, 0, 0, 1, 0], dtype=F.int64) ) assert F.array_equal(result[2], F.tensor([2, 7], dtype=F.int64)) assert F.array_equal(result[3], F.tensor([0, 2], dtype=F.int64)) @pytest.mark.parametrize("use_uva", [True, False]) def test_pinsage_sampling(use_uva): if use_uva and F.ctx() == F.cpu(): pytest.skip("UVA sampling requires a GPU.") def _test_sampler(g, sampler, ntype): seeds = F.copy_to(F.tensor([0, 2], dtype=g.idtype), F.ctx()) neighbor_g = sampler(seeds) assert neighbor_g.ntypes == [ntype] u, v = neighbor_g.all_edges(form="uv", order="eid") uv = list(zip(F.asnumpy(u).tolist(), F.asnumpy(v).tolist())) assert (1, 0) in uv or (0, 0) in uv assert (2, 2) in uv or (3, 2) in uv g = dgl.heterograph( { ("item", "bought-by", "user"): ( [0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 2, 3, 2, 3], ), ("user", "bought", "item"): ( [0, 1, 0, 1, 2, 3, 2, 3], [0, 0, 1, 1, 2, 2, 3, 3], ), } ) if use_uva: g.create_formats_() g.pin_memory_() elif F._default_context_str == "gpu": g = g.to(F.ctx()) try: sampler = dgl.sampling.PinSAGESampler(g, "item", "user", 4, 0.5, 3, 2) _test_sampler(g, sampler, "item") sampler = dgl.sampling.RandomWalkNeighborSampler( g, 4, 0.5, 3, 2, ["bought-by", "bought"] ) _test_sampler(g, sampler, "item") sampler = dgl.sampling.RandomWalkNeighborSampler( g, 4, 0.5, 3, 2, [("item", "bought-by", "user"), ("user", "bought", "item")], ) _test_sampler(g, sampler, "item") finally: if g.is_pinned(): g.unpin_memory_() g = dgl.graph(([0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 2, 3, 2, 3])) if use_uva: g.create_formats_() g.pin_memory_() elif F._default_context_str == "gpu": g = g.to(F.ctx()) try: sampler = dgl.sampling.RandomWalkNeighborSampler(g, 4, 0.5, 3, 2) _test_sampler(g, sampler, g.ntypes[0]) finally: if g.is_pinned(): g.unpin_memory_() g = dgl.heterograph( { ("A", "AB", "B"): ([0, 2], [1, 3]), ("B", "BC", "C"): ([1, 3], [2, 1]), ("C", "CA", "A"): ([2, 1], [0, 2]), } ) if use_uva: g.create_formats_() g.pin_memory_() elif F._default_context_str == "gpu": g = g.to(F.ctx()) try: sampler = dgl.sampling.RandomWalkNeighborSampler( g, 4, 0.5, 3, 2, ["AB", "BC", "CA"] ) _test_sampler(g, sampler, "A") finally: if g.is_pinned(): g.unpin_memory_() def _gen_neighbor_sampling_test_graph(hypersparse, reverse): if hypersparse: # should crash if allocated a CSR card = 1 << 50 num_nodes_dict = {"user": card, "game": card, "coin": card} else: card = None num_nodes_dict = None if reverse: g = dgl.heterograph( { ("user", "follow", "user"): ( [0, 0, 0, 1, 1, 1, 2], [1, 2, 3, 0, 2, 3, 0], ) }, {"user": card if card is not None else 4}, ) g = g.to(F.ctx()) g.edata["prob"] = F.tensor( [0.5, 0.5, 0.0, 0.5, 0.5, 0.0, 1.0], dtype=F.float32 ) g.edata["mask"] = F.tensor([True, True, False, True, True, False, True]) hg = dgl.heterograph( { ("user", "follow", "user"): ( [0, 0, 0, 1, 1, 1, 2], [1, 2, 3, 0, 2, 3, 0], ), ("game", "play", "user"): ([0, 1, 2, 2], [0, 0, 1, 3]), ("user", "liked-by", "game"): ( [0, 1, 2, 0, 3, 0], [2, 2, 2, 1, 1, 0], ), ("coin", "flips", "user"): ([0, 0, 0, 0], [0, 1, 2, 3]), }, num_nodes_dict, ) hg = hg.to(F.ctx()) else: g = dgl.heterograph( { ("user", "follow", "user"): ( [1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2], ) }, {"user": card if card is not None else 4}, ) g = g.to(F.ctx()) g.edata["prob"] = F.tensor( [0.5, 0.5, 0.0, 0.5, 0.5, 0.0, 1.0], dtype=F.float32 ) g.edata["mask"] = F.tensor([True, True, False, True, True, False, True]) hg = dgl.heterograph( { ("user", "follow", "user"): ( [1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2], ), ("user", "play", "game"): ([0, 0, 1, 3], [0, 1, 2, 2]), ("game", "liked-by", "user"): ( [2, 2, 2, 1, 1, 0], [0, 1, 2, 0, 3, 0], ), ("user", "flips", "coin"): ([0, 1, 2, 3], [0, 0, 0, 0]), }, num_nodes_dict, ) hg = hg.to(F.ctx()) hg.edges["follow"].data["prob"] = F.tensor( [0.5, 0.5, 0.0, 0.5, 0.5, 0.0, 1.0], dtype=F.float32 ) hg.edges["follow"].data["mask"] = F.tensor( [True, True, False, True, True, False, True] ) hg.edges["play"].data["prob"] = F.tensor( [0.8, 0.5, 0.5, 0.5], dtype=F.float32 ) # Leave out the mask of play and liked-by since all of them are True anyway. hg.edges["liked-by"].data["prob"] = F.tensor( [0.3, 0.5, 0.2, 0.5, 0.1, 0.1], dtype=F.float32 ) return g, hg def _gen_neighbor_topk_test_graph(hypersparse, reverse): if hypersparse: # should crash if allocated a CSR card = 1 << 50 else: card = None if reverse: g = dgl.heterograph( { ("user", "follow", "user"): ( [0, 0, 0, 1, 1, 1, 2], [1, 2, 3, 0, 2, 3, 0], ) } ) g.edata["weight"] = F.tensor( [0.5, 0.3, 0.0, -5.0, 22.0, 0.0, 1.0], dtype=F.float32 ) hg = dgl.heterograph( { ("user", "follow", "user"): ( [0, 0, 0, 1, 1, 1, 2], [1, 2, 3, 0, 2, 3, 0], ), ("game", "play", "user"): ([0, 1, 2, 2], [0, 0, 1, 3]), ("user", "liked-by", "game"): ( [0, 1, 2, 0, 3, 0], [2, 2, 2, 1, 1, 0], ), ("coin", "flips", "user"): ([0, 0, 0, 0], [0, 1, 2, 3]), } ) else: g = dgl.heterograph( { ("user", "follow", "user"): ( [1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2], ) } ) g.edata["weight"] = F.tensor( [0.5, 0.3, 0.0, -5.0, 22.0, 0.0, 1.0], dtype=F.float32 ) hg = dgl.heterograph( { ("user", "follow", "user"): ( [1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2], ), ("user", "play", "game"): ([0, 0, 1, 3], [0, 1, 2, 2]), ("game", "liked-by", "user"): ( [2, 2, 2, 1, 1, 0], [0, 1, 2, 0, 3, 0], ), ("user", "flips", "coin"): ([0, 1, 2, 3], [0, 0, 0, 0]), } ) hg.edges["follow"].data["weight"] = F.tensor( [0.5, 0.3, 0.0, -5.0, 22.0, 0.0, 1.0], dtype=F.float32 ) hg.edges["play"].data["weight"] = F.tensor( [0.8, 0.5, 0.4, 0.5], dtype=F.float32 ) hg.edges["liked-by"].data["weight"] = F.tensor( [0.3, 0.5, 0.2, 0.5, 0.1, 0.1], dtype=F.float32 ) hg.edges["flips"].data["weight"] = F.tensor( [10, 2, 13, -1], dtype=F.float32 ) return g, hg def _test_sample_neighbors(hypersparse, prob, fused): g, hg = _gen_neighbor_sampling_test_graph(hypersparse, False) def _test1(p, replace): subg = sample_neighbors_fusing_mode[fused]( g, [0, 1], -1, prob=p, replace=replace ) if not fused: assert subg.num_nodes() == g.num_nodes() u, v = subg.edges() if fused: u, v = subg.srcdata[dgl.NID][u], subg.dstdata[dgl.NID][v] u_ans, v_ans, e_ans = g.in_edges([0, 1], form="all") if p is not None: emask = F.gather_row(g.edata[p], e_ans) if p == "prob": emask = emask != 0 u_ans = F.boolean_mask(u_ans, emask) v_ans = F.boolean_mask(v_ans, emask) uv = set(zip(F.asnumpy(u), F.asnumpy(v))) uv_ans = set(zip(F.asnumpy(u_ans), F.asnumpy(v_ans))) assert uv == uv_ans for i in range(10): subg = sample_neighbors_fusing_mode[fused]( g, [0, 1], 2, prob=p, replace=replace ) if not fused: assert subg.num_nodes() == g.num_nodes() assert subg.num_edges() == 4 u, v = subg.edges() if fused: u, v = subg.srcdata[dgl.NID][u], subg.dstdata[dgl.NID][v] assert set(F.asnumpy(F.unique(v))) == {0, 1} assert F.array_equal( F.astype(g.has_edges_between(u, v), F.int64), F.ones((4,), dtype=F.int64), ) assert F.array_equal(g.edge_ids(u, v), subg.edata[dgl.EID]) edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) if not replace: # check no duplication assert len(edge_set) == 4 if p is not None: assert not (3, 0) in edge_set assert not (3, 1) in edge_set _test1(prob, True) # w/ replacement, uniform _test1(prob, False) # w/o replacement, uniform def _test2(p, replace): # fanout > #neighbors subg = sample_neighbors_fusing_mode[fused]( g, [0, 2], -1, prob=p, replace=replace ) if not fused: assert subg.num_nodes() == g.num_nodes() u, v = subg.edges() if fused: u, v = subg.srcdata[dgl.NID][u], subg.dstdata[dgl.NID][v] u_ans, v_ans, e_ans = g.in_edges([0, 2], form="all") if p is not None: emask = F.gather_row(g.edata[p], e_ans) if p == "prob": emask = emask != 0 u_ans = F.boolean_mask(u_ans, emask) v_ans = F.boolean_mask(v_ans, emask) uv = set(zip(F.asnumpy(u), F.asnumpy(v))) uv_ans = set(zip(F.asnumpy(u_ans), F.asnumpy(v_ans))) assert uv == uv_ans for i in range(10): subg = sample_neighbors_fusing_mode[fused]( g, [0, 2], 2, prob=p, replace=replace ) if not fused: assert subg.num_nodes() == g.num_nodes() num_edges = 4 if replace else 3 assert subg.num_edges() == num_edges u, v = subg.edges() if fused: u, v = subg.srcdata[dgl.NID][u], subg.dstdata[dgl.NID][v] assert set(F.asnumpy(F.unique(v))) == {0, 2} assert F.array_equal( F.astype(g.has_edges_between(u, v), F.int64), F.ones((num_edges,), dtype=F.int64), ) assert F.array_equal(g.edge_ids(u, v), subg.edata[dgl.EID]) edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) if not replace: # check no duplication assert len(edge_set) == num_edges if p is not None: assert not (3, 0) in edge_set _test2(prob, True) # w/ replacement, uniform _test2(prob, False) # w/o replacement, uniform def _test3(p, replace): subg = sample_neighbors_fusing_mode[fused]( hg, {"user": [0, 1], "game": 0}, -1, prob=p, replace=replace ) if not fused: assert len(subg.ntypes) == 3 assert len(subg.srctypes) == 3 assert len(subg.dsttypes) == 3 assert len(subg.etypes) == 4 assert subg["follow"].num_edges() == 6 if p is None else 4 assert subg["play"].num_edges() == 1 assert subg["liked-by"].num_edges() == 4 assert subg["flips"].num_edges() == 0 for i in range(10): subg = sample_neighbors_fusing_mode[fused]( hg, {"user": [0, 1], "game": 0}, 2, prob=p, replace=replace ) if not fused: assert len(subg.ntypes) == 3 assert len(subg.srctypes) == 3 assert len(subg.dsttypes) == 3 assert len(subg.etypes) == 4 assert subg["follow"].num_edges() == 4 assert subg["play"].num_edges() == 2 if replace else 1 assert subg["liked-by"].num_edges() == 4 if replace else 3 assert subg["flips"].num_edges() == 0 _test3(prob, True) # w/ replacement, uniform _test3(prob, False) # w/o replacement, uniform # test different fanouts for different relations for i in range(10): subg = sample_neighbors_fusing_mode[fused]( hg, {"user": [0, 1], "game": 0, "coin": 0}, {"follow": 1, "play": 2, "liked-by": 0, "flips": -1}, replace=True, ) if not fused: assert len(subg.ntypes) == 3 assert len(subg.srctypes) == 3 assert len(subg.dsttypes) == 3 assert len(subg.etypes) == 4 assert subg["follow"].num_edges() == 2 assert subg["play"].num_edges() == 2 assert subg["liked-by"].num_edges() == 0 assert subg["flips"].num_edges() == 4 def _test_sample_labors(hypersparse, prob): g, hg = _gen_neighbor_sampling_test_graph(hypersparse, False) # test with seed nodes [0, 1] def _test1(p): subg = dgl.sampling.sample_labors(g, [0, 1], -1, prob=p)[0] assert subg.num_nodes() == g.num_nodes() u, v = subg.edges() u_ans, v_ans, e_ans = g.in_edges([0, 1], form="all") if p is not None: emask = F.gather_row(g.edata[p], e_ans) if p == "prob": emask = emask != 0 u_ans = F.boolean_mask(u_ans, emask) v_ans = F.boolean_mask(v_ans, emask) uv = set(zip(F.asnumpy(u), F.asnumpy(v))) uv_ans = set(zip(F.asnumpy(u_ans), F.asnumpy(v_ans))) assert uv == uv_ans for i in range(10): subg = dgl.sampling.sample_labors(g, [0, 1], 2, prob=p)[0] assert subg.num_nodes() == g.num_nodes() assert subg.num_edges() >= 0 u, v = subg.edges() assert set(F.asnumpy(F.unique(v))).issubset({0, 1}) assert F.array_equal( F.astype(g.has_edges_between(u, v), F.int64), F.ones((subg.num_edges(),), dtype=F.int64), ) assert F.array_equal(g.edge_ids(u, v), subg.edata[dgl.EID]) edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) # check no duplication assert len(edge_set) == subg.num_edges() if p is not None: assert not (3, 0) in edge_set assert not (3, 1) in edge_set _test1(prob) # test with seed nodes [0, 2] def _test2(p): subg = dgl.sampling.sample_labors(g, [0, 2], -1, prob=p)[0] assert subg.num_nodes() == g.num_nodes() u, v = subg.edges() u_ans, v_ans, e_ans = g.in_edges([0, 2], form="all") if p is not None: emask = F.gather_row(g.edata[p], e_ans) if p == "prob": emask = emask != 0 u_ans = F.boolean_mask(u_ans, emask) v_ans = F.boolean_mask(v_ans, emask) uv = set(zip(F.asnumpy(u), F.asnumpy(v))) uv_ans = set(zip(F.asnumpy(u_ans), F.asnumpy(v_ans))) assert uv == uv_ans for i in range(10): subg = dgl.sampling.sample_labors(g, [0, 2], 2, prob=p)[0] assert subg.num_nodes() == g.num_nodes() assert subg.num_edges() >= 0 u, v = subg.edges() assert set(F.asnumpy(F.unique(v))).issubset({0, 2}) assert F.array_equal( F.astype(g.has_edges_between(u, v), F.int64), F.ones((subg.num_edges(),), dtype=F.int64), ) assert F.array_equal(g.edge_ids(u, v), subg.edata[dgl.EID]) edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) # check no duplication assert len(edge_set) == subg.num_edges() if p is not None: assert not (3, 0) in edge_set _test2(prob) # test with heterogenous seed nodes def _test3(p): subg = dgl.sampling.sample_labors( hg, {"user": [0, 1], "game": 0}, -1, prob=p )[0] assert len(subg.ntypes) == 3 assert len(subg.etypes) == 4 assert subg["follow"].num_edges() == 6 if p is None else 4 assert subg["play"].num_edges() == 1 assert subg["liked-by"].num_edges() == 4 assert subg["flips"].num_edges() == 0 for i in range(10): subg = dgl.sampling.sample_labors( hg, {"user": [0, 1], "game": 0}, 2, prob=p )[0] assert len(subg.ntypes) == 3 assert len(subg.etypes) == 4 assert subg["follow"].num_edges() >= 0 assert subg["play"].num_edges() >= 0 assert subg["liked-by"].num_edges() >= 0 assert subg["flips"].num_edges() >= 0 _test3(prob) # test different fanouts for different relations for i in range(10): subg = dgl.sampling.sample_labors( hg, {"user": [0, 1], "game": 0, "coin": 0}, {"follow": 1, "play": 2, "liked-by": 0, "flips": g.num_nodes()}, )[0] assert len(subg.ntypes) == 3 assert len(subg.etypes) == 4 assert subg["follow"].num_edges() >= 0 assert subg["play"].num_edges() >= 0 assert subg["liked-by"].num_edges() == 0 assert subg["flips"].num_edges() == 4 def _test_sample_neighbors_outedge(hypersparse, fused): g, hg = _gen_neighbor_sampling_test_graph(hypersparse, True) def _test1(p, replace): subg = sample_neighbors_fusing_mode[fused]( g, [0, 1], -1, prob=p, replace=replace, edge_dir="out" ) if not fused: assert subg.num_nodes() == g.num_nodes() u, v = subg.edges() if fused: u, v = subg.dstdata[dgl.NID][u], subg.srcdata[dgl.NID][v] u_ans, v_ans, e_ans = g.out_edges([0, 1], form="all") if p is not None: emask = F.gather_row(g.edata[p], e_ans) if p == "prob": emask = emask != 0 u_ans = F.boolean_mask(u_ans, emask) v_ans = F.boolean_mask(v_ans, emask) uv = set(zip(F.asnumpy(u), F.asnumpy(v))) uv_ans = set(zip(F.asnumpy(u_ans), F.asnumpy(v_ans))) assert uv == uv_ans for i in range(10): subg = sample_neighbors_fusing_mode[fused]( g, [0, 1], 2, prob=p, replace=replace, edge_dir="out" ) if not fused: assert subg.num_nodes() == g.num_nodes() assert subg.num_edges() == 4 u, v = subg.edges() if fused: u, v = subg.dstdata[dgl.NID][u], subg.srcdata[dgl.NID][v] assert set(F.asnumpy(F.unique(u))) == {0, 1} assert F.array_equal( F.astype(g.has_edges_between(u, v), F.int64), F.ones((4,), dtype=F.int64), ) assert F.array_equal(g.edge_ids(u, v), subg.edata[dgl.EID]) edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) if not replace: # check no duplication assert len(edge_set) == 4 if p is not None: assert not (0, 3) in edge_set assert not (1, 3) in edge_set _test1(None, True) # w/ replacement, uniform _test1(None, False) # w/o replacement, uniform _test1("prob", True) # w/ replacement _test1("prob", False) # w/o replacement def _test2(p, replace): # fanout > #neighbors subg = sample_neighbors_fusing_mode[fused]( g, [0, 2], -1, prob=p, replace=replace, edge_dir="out" ) if not fused: assert subg.num_nodes() == g.num_nodes() u, v = subg.edges() if fused: u, v = subg.dstdata[dgl.NID][u], subg.srcdata[dgl.NID][v] u_ans, v_ans, e_ans = g.out_edges([0, 2], form="all") if p is not None: emask = F.gather_row(g.edata[p], e_ans) if p == "prob": emask = emask != 0 u_ans = F.boolean_mask(u_ans, emask) v_ans = F.boolean_mask(v_ans, emask) uv = set(zip(F.asnumpy(u), F.asnumpy(v))) uv_ans = set(zip(F.asnumpy(u_ans), F.asnumpy(v_ans))) assert uv == uv_ans for i in range(10): subg = sample_neighbors_fusing_mode[fused]( g, [0, 2], 2, prob=p, replace=replace, edge_dir="out" ) if not fused: assert subg.num_nodes() == g.num_nodes() num_edges = 4 if replace else 3 assert subg.num_edges() == num_edges u, v = subg.edges() if fused: u, v = subg.dstdata[dgl.NID][u], subg.srcdata[dgl.NID][v] assert set(F.asnumpy(F.unique(u))) == {0, 2} assert F.array_equal( F.astype(g.has_edges_between(u, v), F.int64), F.ones((num_edges,), dtype=F.int64), ) assert F.array_equal(g.edge_ids(u, v), subg.edata[dgl.EID]) edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) if not replace: # check no duplication assert len(edge_set) == num_edges if p is not None: assert not (0, 3) in edge_set _test2(None, True) # w/ replacement, uniform _test2(None, False) # w/o replacement, uniform _test2("prob", True) # w/ replacement _test2("prob", False) # w/o replacement def _test3(p, replace): subg = sample_neighbors_fusing_mode[fused]( hg, {"user": [0, 1], "game": 0}, -1, prob=p, replace=replace, edge_dir="out", ) if not fused: assert len(subg.ntypes) == 3 assert len(subg.srctypes) == 3 assert len(subg.dsttypes) == 3 assert len(subg.etypes) == 4 assert subg["follow"].num_edges() == 6 if p is None else 4 assert subg["play"].num_edges() == 1 assert subg["liked-by"].num_edges() == 4 assert subg["flips"].num_edges() == 0 for i in range(10): subg = sample_neighbors_fusing_mode[fused]( hg, {"user": [0, 1], "game": 0}, 2, prob=p, replace=replace, edge_dir="out", ) if not fused: assert len(subg.ntypes) == 3 assert len(subg.srctypes) == 3 assert len(subg.dsttypes) == 3 assert len(subg.etypes) == 4 assert subg["follow"].num_edges() == 4 assert subg["play"].num_edges() == 2 if replace else 1 assert subg["liked-by"].num_edges() == 4 if replace else 3 assert subg["flips"].num_edges() == 0 _test3(None, True) # w/ replacement, uniform _test3(None, False) # w/o replacement, uniform _test3("prob", True) # w/ replacement _test3("prob", False) # w/o replacement def _test_sample_neighbors_topk(hypersparse): g, hg = _gen_neighbor_topk_test_graph(hypersparse, False) def _test1(): subg = dgl.sampling.select_topk(g, -1, "weight", [0, 1]) assert subg.num_nodes() == g.num_nodes() u, v = subg.edges() u_ans, v_ans = subg.in_edges([0, 1]) uv = set(zip(F.asnumpy(u), F.asnumpy(v))) uv_ans = set(zip(F.asnumpy(u_ans), F.asnumpy(v_ans))) assert uv == uv_ans subg = dgl.sampling.select_topk(g, 2, "weight", [0, 1]) assert subg.num_nodes() == g.num_nodes() assert subg.num_edges() == 4 u, v = subg.edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert F.array_equal(g.edge_ids(u, v), subg.edata[dgl.EID]) assert edge_set == {(2, 0), (1, 0), (2, 1), (3, 1)} _test1() def _test2(): # k > #neighbors subg = dgl.sampling.select_topk(g, -1, "weight", [0, 2]) assert subg.num_nodes() == g.num_nodes() u, v = subg.edges() u_ans, v_ans = subg.in_edges([0, 2]) uv = set(zip(F.asnumpy(u), F.asnumpy(v))) uv_ans = set(zip(F.asnumpy(u_ans), F.asnumpy(v_ans))) assert uv == uv_ans subg = dgl.sampling.select_topk(g, 2, "weight", [0, 2]) assert subg.num_nodes() == g.num_nodes() assert subg.num_edges() == 3 u, v = subg.edges() assert F.array_equal(g.edge_ids(u, v), subg.edata[dgl.EID]) edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(2, 0), (1, 0), (0, 2)} _test2() def _test3(): subg = dgl.sampling.select_topk( hg, 2, "weight", {"user": [0, 1], "game": 0} ) assert len(subg.ntypes) == 3 assert len(subg.etypes) == 4 u, v = subg["follow"].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert F.array_equal( hg["follow"].edge_ids(u, v), subg["follow"].edata[dgl.EID] ) assert edge_set == {(2, 0), (1, 0), (2, 1), (3, 1)} u, v = subg["play"].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert F.array_equal( hg["play"].edge_ids(u, v), subg["play"].edata[dgl.EID] ) assert edge_set == {(0, 0)} u, v = subg["liked-by"].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert F.array_equal( hg["liked-by"].edge_ids(u, v), subg["liked-by"].edata[dgl.EID] ) assert edge_set == {(2, 0), (2, 1), (1, 0)} assert subg["flips"].num_edges() == 0 _test3() # test different k for different relations subg = dgl.sampling.select_topk( hg, {"follow": 1, "play": 2, "liked-by": 0, "flips": -1}, "weight", {"user": [0, 1], "game": 0, "coin": 0}, ) assert len(subg.ntypes) == 3 assert len(subg.etypes) == 4 assert subg["follow"].num_edges() == 2 assert subg["play"].num_edges() == 1 assert subg["liked-by"].num_edges() == 0 assert subg["flips"].num_edges() == 4 def _test_sample_neighbors_topk_outedge(hypersparse): g, hg = _gen_neighbor_topk_test_graph(hypersparse, True) def _test1(): subg = dgl.sampling.select_topk(g, -1, "weight", [0, 1], edge_dir="out") assert subg.num_nodes() == g.num_nodes() u, v = subg.edges() u_ans, v_ans = subg.out_edges([0, 1]) uv = set(zip(F.asnumpy(u), F.asnumpy(v))) uv_ans = set(zip(F.asnumpy(u_ans), F.asnumpy(v_ans))) assert uv == uv_ans subg = dgl.sampling.select_topk(g, 2, "weight", [0, 1], edge_dir="out") assert subg.num_nodes() == g.num_nodes() assert subg.num_edges() == 4 u, v = subg.edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert F.array_equal(g.edge_ids(u, v), subg.edata[dgl.EID]) assert edge_set == {(0, 2), (0, 1), (1, 2), (1, 3)} _test1() def _test2(): # k > #neighbors subg = dgl.sampling.select_topk(g, -1, "weight", [0, 2], edge_dir="out") assert subg.num_nodes() == g.num_nodes() u, v = subg.edges() u_ans, v_ans = subg.out_edges([0, 2]) uv = set(zip(F.asnumpy(u), F.asnumpy(v))) uv_ans = set(zip(F.asnumpy(u_ans), F.asnumpy(v_ans))) assert uv == uv_ans subg = dgl.sampling.select_topk(g, 2, "weight", [0, 2], edge_dir="out") assert subg.num_nodes() == g.num_nodes() assert subg.num_edges() == 3 u, v = subg.edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert F.array_equal(g.edge_ids(u, v), subg.edata[dgl.EID]) assert edge_set == {(0, 2), (0, 1), (2, 0)} _test2() def _test3(): subg = dgl.sampling.select_topk( hg, 2, "weight", {"user": [0, 1], "game": 0}, edge_dir="out" ) assert len(subg.ntypes) == 3 assert len(subg.etypes) == 4 u, v = subg["follow"].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert F.array_equal( hg["follow"].edge_ids(u, v), subg["follow"].edata[dgl.EID] ) assert edge_set == {(0, 2), (0, 1), (1, 2), (1, 3)} u, v = subg["play"].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert F.array_equal( hg["play"].edge_ids(u, v), subg["play"].edata[dgl.EID] ) assert edge_set == {(0, 0)} u, v = subg["liked-by"].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert F.array_equal( hg["liked-by"].edge_ids(u, v), subg["liked-by"].edata[dgl.EID] ) assert edge_set == {(0, 2), (1, 2), (0, 1)} assert subg["flips"].num_edges() == 0 _test3() def test_sample_neighbors_noprob(): _test_sample_neighbors(False, None, False) if F._default_context_str != "gpu" and F.backend_name == "pytorch": _test_sample_neighbors(False, None, True) # _test_sample_neighbors(True) def test_sample_labors_noprob(): _test_sample_labors(False, None) def test_sample_neighbors_prob(): _test_sample_neighbors(False, "prob", False) if F._default_context_str != "gpu" and F.backend_name == "pytorch": _test_sample_neighbors(False, "prob", True) # _test_sample_neighbors(True) def test_sample_labors_prob(): _test_sample_labors(False, "prob") def test_sample_neighbors_outedge(): _test_sample_neighbors_outedge(False, False) if F._default_context_str != "gpu" and F.backend_name == "pytorch": _test_sample_neighbors_outedge(False, True) # _test_sample_neighbors_outedge(True) @unittest.skipIf( F.backend_name == "mxnet", reason="MXNet has problem converting bool arrays" ) @unittest.skipIf( F._default_context_str == "gpu", reason="GPU sample neighbors with mask not implemented", ) def test_sample_neighbors_mask(): _test_sample_neighbors(False, "mask", False) if F._default_context_str != "gpu" and F.backend_name == "pytorch": _test_sample_neighbors(False, "mask", True) @unittest.skipIf( F._default_context_str == "gpu", reason="GPU sample neighbors not implemented", ) def test_sample_neighbors_topk(): _test_sample_neighbors_topk(False) # _test_sample_neighbors_topk(True) @unittest.skipIf( F._default_context_str == "gpu", reason="GPU sample neighbors not implemented", ) def test_sample_neighbors_topk_outedge(): _test_sample_neighbors_topk_outedge(False) # _test_sample_neighbors_topk_outedge(True) @pytest.mark.parametrize("fused", [False, True]) def test_sample_neighbors_with_0deg(fused): if fused and ( F._default_context_str == "gpu" or F.backend_name != "pytorch" ): pytest.skip("Fused sampling support CPU with backend PyTorch.") g = dgl.graph(([], []), num_nodes=5).to(F.ctx()) sg = sample_neighbors_fusing_mode[fused]( g, F.tensor([1, 2], dtype=F.int64), 2, edge_dir="in", replace=False ) assert sg.num_edges() == 0 sg = sample_neighbors_fusing_mode[fused]( g, F.tensor([1, 2], dtype=F.int64), 2, edge_dir="in", replace=True ) assert sg.num_edges() == 0 sg = sample_neighbors_fusing_mode[fused]( g, F.tensor([1, 2], dtype=F.int64), 2, edge_dir="out", replace=False ) assert sg.num_edges() == 0 sg = sample_neighbors_fusing_mode[fused]( g, F.tensor([1, 2], dtype=F.int64), 2, edge_dir="out", replace=True ) assert sg.num_edges() == 0 def create_test_graph(num_nodes, num_edges_per_node, bipartite=False): src = np.concatenate( [np.array([i] * num_edges_per_node) for i in range(num_nodes)] ) dst = np.concatenate( [ np.random.choice(num_nodes, num_edges_per_node, replace=False) for i in range(num_nodes) ] ) if bipartite: g = dgl.heterograph({("u", "e", "v"): (src, dst)}) else: g = dgl.graph((src, dst)) return g def create_etype_test_graph(num_nodes, num_edges_per_node, rare_cnt): src = np.concatenate( [ np.random.choice(num_nodes, num_edges_per_node, replace=False) for i in range(num_nodes) ] ) dst = np.concatenate( [np.array([i] * num_edges_per_node) for i in range(num_nodes)] ) minor_src = np.concatenate( [ np.random.choice(num_nodes, 2, replace=False) for i in range(num_nodes) ] ) minor_dst = np.concatenate([np.array([i] * 2) for i in range(num_nodes)]) most_zero_src = np.concatenate( [ np.random.choice(num_nodes, num_edges_per_node, replace=False) for i in range(rare_cnt) ] ) most_zero_dst = np.concatenate( [np.array([i] * num_edges_per_node) for i in range(rare_cnt)] ) g = dgl.heterograph( { ("v", "e_major", "u"): (src, dst), ("u", "e_major_rev", "v"): (dst, src), ("v2", "e_minor", "u"): (minor_src, minor_dst), ("v2", "most_zero", "u"): (most_zero_src, most_zero_dst), ("u", "e_minor_rev", "v2"): (minor_dst, minor_src), } ) for etype in g.etypes: prob = np.random.rand(g.num_edges(etype)) prob[prob > 0.2] = 0 g.edges[etype].data["p"] = F.zerocopy_from_numpy(prob) g.edges[etype].data["mask"] = F.zerocopy_from_numpy(prob != 0) return g @unittest.skipIf( F._default_context_str == "gpu", reason="GPU sample neighbors not implemented", ) def test_sample_neighbors_biased_homogeneous(): g = create_test_graph(100, 30) def check_num(nodes, tag): nodes, tag = F.asnumpy(nodes), F.asnumpy(tag) cnt = [sum(tag[nodes] == i) for i in range(4)] # No tag 0 assert cnt[0] == 0 # very rare tag 1 assert cnt[2] > 2 * cnt[1] assert cnt[3] > 2 * cnt[1] tag = F.tensor(np.random.choice(4, 100)) bias = F.tensor([0, 0.1, 10, 10], dtype=F.float32) # inedge / without replacement g_sorted = dgl.sort_csc_by_tag(g, tag) for _ in range(5): subg = dgl.sampling.sample_neighbors_biased( g_sorted, g.nodes(), 5, bias, replace=False ) check_num(subg.edges()[0], tag) u, v = subg.edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert len(edge_set) == subg.num_edges() # inedge / with replacement for _ in range(5): subg = dgl.sampling.sample_neighbors_biased( g_sorted, g.nodes(), 5, bias, replace=True ) check_num(subg.edges()[0], tag) # outedge / without replacement g_sorted = dgl.sort_csr_by_tag(g, tag) for _ in range(5): subg = dgl.sampling.sample_neighbors_biased( g_sorted, g.nodes(), 5, bias, edge_dir="out", replace=False ) check_num(subg.edges()[1], tag) u, v = subg.edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert len(edge_set) == subg.num_edges() # outedge / with replacement for _ in range(5): subg = dgl.sampling.sample_neighbors_biased( g_sorted, g.nodes(), 5, bias, edge_dir="out", replace=True ) check_num(subg.edges()[1], tag) @unittest.skipIf( F._default_context_str == "gpu", reason="GPU sample neighbors not implemented", ) def test_sample_neighbors_biased_bipartite(): g = create_test_graph(100, 30, True) num_dst = g.num_dst_nodes() bias = F.tensor([0, 0.01, 10, 10], dtype=F.float32) def check_num(nodes, tag): nodes, tag = F.asnumpy(nodes), F.asnumpy(tag) cnt = [sum(tag[nodes] == i) for i in range(4)] # No tag 0 assert cnt[0] == 0 # very rare tag 1 assert cnt[2] > 2 * cnt[1] assert cnt[3] > 2 * cnt[1] # inedge / without replacement tag = F.tensor(np.random.choice(4, 100)) g_sorted = dgl.sort_csc_by_tag(g, tag) for _ in range(5): subg = dgl.sampling.sample_neighbors_biased( g_sorted, g.dstnodes(), 5, bias, replace=False ) check_num(subg.edges()[0], tag) u, v = subg.edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert len(edge_set) == subg.num_edges() # inedge / with replacement for _ in range(5): subg = dgl.sampling.sample_neighbors_biased( g_sorted, g.dstnodes(), 5, bias, replace=True ) check_num(subg.edges()[0], tag) # outedge / without replacement tag = F.tensor(np.random.choice(4, num_dst)) g_sorted = dgl.sort_csr_by_tag(g, tag) for _ in range(5): subg = dgl.sampling.sample_neighbors_biased( g_sorted, g.srcnodes(), 5, bias, edge_dir="out", replace=False ) check_num(subg.edges()[1], tag) u, v = subg.edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert len(edge_set) == subg.num_edges() # outedge / with replacement for _ in range(5): subg = dgl.sampling.sample_neighbors_biased( g_sorted, g.srcnodes(), 5, bias, edge_dir="out", replace=True ) check_num(subg.edges()[1], tag) @unittest.skipIf( F._default_context_str == "gpu", reason="GPU sample neighbors not implemented", ) @unittest.skipIf( F.backend_name == "mxnet", reason="MXNet has problem converting bool arrays" ) @pytest.mark.parametrize("format_", ["coo", "csr", "csc"]) @pytest.mark.parametrize("direction", ["in", "out"]) @pytest.mark.parametrize("replace", [False, True]) def test_sample_neighbors_etype_homogeneous(format_, direction, replace): num_nodes = 100 rare_cnt = 4 g = create_etype_test_graph(100, 30, rare_cnt) h_g = dgl.to_homogeneous(g, edata=["p", "mask"]) h_g_etype = F.asnumpy(h_g.edata[dgl.ETYPE]) h_g_offset = np.cumsum(np.insert(np.bincount(h_g_etype), 0, 0)).tolist() sg = g.edge_subgraph(g.edata["mask"], relabel_nodes=False) h_sg = h_g.edge_subgraph(h_g.edata["mask"], relabel_nodes=False) h_sg_etype = F.asnumpy(h_sg.edata[dgl.ETYPE]) h_sg_offset = np.cumsum(np.insert(np.bincount(h_sg_etype), 0, 0)).tolist() seed_ntype = g.get_ntype_id("u") seeds = F.nonzero_1d(h_g.ndata[dgl.NTYPE] == seed_ntype) fanouts = F.tensor([6, 5, 4, 3, 2], dtype=F.int64) def check_num(h_g, all_src, all_dst, subg, replace, fanouts, direction): src, dst = subg.edges() all_etype_array = F.asnumpy(h_g.edata[dgl.ETYPE]) num_etypes = all_etype_array.max() + 1 etype_array = F.asnumpy(subg.edata[dgl.ETYPE]) src = F.asnumpy(src) dst = F.asnumpy(dst) fanouts = F.asnumpy(fanouts) all_src = F.asnumpy(all_src) all_dst = F.asnumpy(all_dst) src_per_etype = [] dst_per_etype = [] all_src_per_etype = [] all_dst_per_etype = [] for etype in range(num_etypes): src_per_etype.append(src[etype_array == etype]) dst_per_etype.append(dst[etype_array == etype]) all_src_per_etype.append(all_src[all_etype_array == etype]) all_dst_per_etype.append(all_dst[all_etype_array == etype]) if replace: if direction == "in": in_degree_per_etype = [np.bincount(d) for d in dst_per_etype] for etype in range(len(fanouts)): in_degree = in_degree_per_etype[etype] fanout = fanouts[etype] ans = np.zeros_like(in_degree) if len(in_degree) > 0: ans[all_dst_per_etype[etype]] = fanout assert np.all(in_degree == ans) else: out_degree_per_etype = [np.bincount(s) for s in src_per_etype] for etype in range(len(fanouts)): out_degree = out_degree_per_etype[etype] fanout = fanouts[etype] ans = np.zeros_like(out_degree) if len(out_degree) > 0: ans[all_src_per_etype[etype]] = fanout assert np.all(out_degree == ans) else: if direction == "in": for v in set(dst): u = src[dst == v] et = etype_array[dst == v] all_u = all_src[all_dst == v] all_et = all_etype_array[all_dst == v] for etype in set(et): u_etype = set(u[et == etype]) all_u_etype = set(all_u[all_et == etype]) assert (len(u_etype) == fanouts[etype]) or ( u_etype == all_u_etype ) else: for u in set(src): v = dst[src == u] et = etype_array[src == u] all_v = all_dst[all_src == u] all_et = all_etype_array[all_src == u] for etype in set(et): v_etype = set(v[et == etype]) all_v_etype = set(all_v[all_et == etype]) assert (len(v_etype) == fanouts[etype]) or ( v_etype == all_v_etype ) all_src, all_dst = h_g.edges() all_sub_src, all_sub_dst = h_sg.edges() h_g = h_g.formats(format_) if (direction, format_) in [("in", "csr"), ("out", "csc")]: h_g = h_g.formats(["csc", "csr", "coo"]) for _ in range(5): subg = dgl.sampling.sample_etype_neighbors( h_g, seeds, h_g_offset, fanouts, replace=replace, edge_dir=direction ) check_num(h_g, all_src, all_dst, subg, replace, fanouts, direction) p = [g.edges[etype].data["p"] for etype in g.etypes] subg = dgl.sampling.sample_etype_neighbors( h_g, seeds, h_g_offset, fanouts, replace=replace, edge_dir=direction, prob=p, ) check_num( h_sg, all_sub_src, all_sub_dst, subg, replace, fanouts, direction ) p = [g.edges[etype].data["mask"] for etype in g.etypes] subg = dgl.sampling.sample_etype_neighbors( h_g, seeds, h_g_offset, fanouts, replace=replace, edge_dir=direction, prob=p, ) check_num( h_sg, all_sub_src, all_sub_dst, subg, replace, fanouts, direction ) @unittest.skipIf( F._default_context_str == "gpu", reason="GPU sample neighbors not implemented", ) @unittest.skipIf( F.backend_name == "mxnet", reason="MXNet has problem converting bool arrays" ) @pytest.mark.parametrize("format_", ["csr", "csc"]) @pytest.mark.parametrize("direction", ["in", "out"]) def test_sample_neighbors_etype_sorted_homogeneous(format_, direction): rare_cnt = 4 g = create_etype_test_graph(100, 30, rare_cnt) h_g = dgl.to_homogeneous(g) seed_ntype = g.get_ntype_id("u") seeds = F.nonzero_1d(h_g.ndata[dgl.NTYPE] == seed_ntype) fanouts = F.tensor([6, 5, -1, 3, 2], dtype=F.int64) h_g = h_g.formats(format_) if (direction, format_) in [("in", "csr"), ("out", "csc")]: h_g = h_g.formats(["csc", "csr", "coo"]) if direction == "in": h_g = dgl.sort_csc_by_tag(h_g, h_g.edata[dgl.ETYPE], tag_type="edge") else: h_g = dgl.sort_csr_by_tag(h_g, h_g.edata[dgl.ETYPE], tag_type="edge") # shuffle h_g_etype = F.asnumpy(h_g.edata[dgl.ETYPE]) h_g_offset = np.cumsum(np.insert(np.bincount(h_g_etype), 0, 0)).tolist() sg = dgl.sampling.sample_etype_neighbors( h_g, seeds, h_g_offset, fanouts, edge_dir=direction, etype_sorted=True ) @pytest.mark.parametrize("dtype", ["int32", "int64"]) @pytest.mark.parametrize("fused", [False, True]) def test_sample_neighbors_exclude_edges_heteroG(dtype, fused): if fused and ( F._default_context_str == "gpu" or F.backend_name != "pytorch" ): pytest.skip("Fused sampling support CPU with backend PyTorch.") d_i_d_u_nodes = F.zerocopy_from_numpy( np.unique(np.random.randint(300, size=100, dtype=dtype)) ) d_i_d_v_nodes = F.zerocopy_from_numpy( np.random.randint(25, size=d_i_d_u_nodes.shape, dtype=dtype) ) d_i_g_u_nodes = F.zerocopy_from_numpy( np.unique(np.random.randint(300, size=100, dtype=dtype)) ) d_i_g_v_nodes = F.zerocopy_from_numpy( np.random.randint(25, size=d_i_g_u_nodes.shape, dtype=dtype) ) d_t_d_u_nodes = F.zerocopy_from_numpy( np.unique(np.random.randint(300, size=100, dtype=dtype)) ) d_t_d_v_nodes = F.zerocopy_from_numpy( np.random.randint(25, size=d_t_d_u_nodes.shape, dtype=dtype) ) g = dgl.heterograph( { ("drug", "interacts", "drug"): (d_i_d_u_nodes, d_i_d_v_nodes), ("drug", "interacts", "gene"): (d_i_g_u_nodes, d_i_g_v_nodes), ("drug", "treats", "disease"): (d_t_d_u_nodes, d_t_d_v_nodes), } ).to(F.ctx()) (U, V, EID) = (0, 1, 2) nd_b_idx = np.random.randint(low=1, high=24, dtype=dtype) nd_e_idx = np.random.randint(low=25, high=49, dtype=dtype) did_b_idx = np.random.randint(low=1, high=24, dtype=dtype) did_e_idx = np.random.randint(low=25, high=49, dtype=dtype) sampled_amount = np.random.randint(low=1, high=10, dtype=dtype) drug_i_drug_edges = g.all_edges( form="all", etype=("drug", "interacts", "drug") ) excluded_d_i_d_edges = drug_i_drug_edges[EID][did_b_idx:did_e_idx] sampled_drug_node = drug_i_drug_edges[V][nd_b_idx:nd_e_idx] did_excluded_nodes_U = drug_i_drug_edges[U][did_b_idx:did_e_idx] did_excluded_nodes_V = drug_i_drug_edges[V][did_b_idx:did_e_idx] nd_b_idx = np.random.randint(low=1, high=24, dtype=dtype) nd_e_idx = np.random.randint(low=25, high=49, dtype=dtype) dig_b_idx = np.random.randint(low=1, high=24, dtype=dtype) dig_e_idx = np.random.randint(low=25, high=49, dtype=dtype) drug_i_gene_edges = g.all_edges( form="all", etype=("drug", "interacts", "gene") ) excluded_d_i_g_edges = drug_i_gene_edges[EID][dig_b_idx:dig_e_idx] dig_excluded_nodes_U = drug_i_gene_edges[U][dig_b_idx:dig_e_idx] dig_excluded_nodes_V = drug_i_gene_edges[V][dig_b_idx:dig_e_idx] sampled_gene_node = drug_i_gene_edges[V][nd_b_idx:nd_e_idx] nd_b_idx = np.random.randint(low=1, high=24, dtype=dtype) nd_e_idx = np.random.randint(low=25, high=49, dtype=dtype) dtd_b_idx = np.random.randint(low=1, high=24, dtype=dtype) dtd_e_idx = np.random.randint(low=25, high=49, dtype=dtype) drug_t_dis_edges = g.all_edges( form="all", etype=("drug", "treats", "disease") ) excluded_d_t_d_edges = drug_t_dis_edges[EID][dtd_b_idx:dtd_e_idx] dtd_excluded_nodes_U = drug_t_dis_edges[U][dtd_b_idx:dtd_e_idx] dtd_excluded_nodes_V = drug_t_dis_edges[V][dtd_b_idx:dtd_e_idx] sampled_disease_node = drug_t_dis_edges[V][nd_b_idx:nd_e_idx] excluded_edges = { ("drug", "interacts", "drug"): excluded_d_i_d_edges, ("drug", "interacts", "gene"): excluded_d_i_g_edges, ("drug", "treats", "disease"): excluded_d_t_d_edges, } sg = sample_neighbors_fusing_mode[fused]( g, { "drug": sampled_drug_node, "gene": sampled_gene_node, "disease": sampled_disease_node, }, sampled_amount, exclude_edges=excluded_edges, ) if fused: def contain_edge(g, sg, etype, u, v): # set of subgraph graph edges deduced from original graph org_edges = set( map( tuple, np.stack( g.find_edges(sg.edges[etype].data[dgl.EID], etype), axis=1, ), ) ) # set of excluded edges excluded_edges = set(map(tuple, np.stack((u, v), axis=1))) diff_set = org_edges - excluded_edges return len(diff_set) != len(org_edges) assert not contain_edge( g, sg, ("drug", "interacts", "drug"), did_excluded_nodes_U, did_excluded_nodes_V, ) assert not contain_edge( g, sg, ("drug", "interacts", "gene"), dig_excluded_nodes_U, dig_excluded_nodes_V, ) assert not contain_edge( g, sg, ("drug", "treats", "disease"), dtd_excluded_nodes_U, dtd_excluded_nodes_V, ) else: assert not np.any( F.asnumpy( sg.has_edges_between( did_excluded_nodes_U, did_excluded_nodes_V, etype=("drug", "interacts", "drug"), ) ) ) assert not np.any( F.asnumpy( sg.has_edges_between( dig_excluded_nodes_U, dig_excluded_nodes_V, etype=("drug", "interacts", "gene"), ) ) ) assert not np.any( F.asnumpy( sg.has_edges_between( dtd_excluded_nodes_U, dtd_excluded_nodes_V, etype=("drug", "treats", "disease"), ) ) ) @pytest.mark.parametrize("dtype", ["int32", "int64"]) @pytest.mark.parametrize("fused", [False, True]) def test_sample_neighbors_exclude_edges_homoG(dtype, fused): if fused and ( F._default_context_str == "gpu" or F.backend_name != "pytorch" ): pytest.skip("Fused sampling support CPU with backend PyTorch.") u_nodes = F.zerocopy_from_numpy( np.unique(np.random.randint(300, size=100, dtype=dtype)) ) v_nodes = F.zerocopy_from_numpy( np.random.randint(25, size=u_nodes.shape, dtype=dtype) ) g = dgl.graph((u_nodes, v_nodes)).to(F.ctx()) (U, V, EID) = (0, 1, 2) nd_b_idx = np.random.randint(low=1, high=24, dtype=dtype) nd_e_idx = np.random.randint(low=25, high=49, dtype=dtype) b_idx = np.random.randint(low=1, high=24, dtype=dtype) e_idx = np.random.randint(low=25, high=49, dtype=dtype) sampled_amount = np.random.randint(low=1, high=10, dtype=dtype) g_edges = g.all_edges(form="all") excluded_edges = g_edges[EID][b_idx:e_idx] sampled_node = g_edges[V][nd_b_idx:nd_e_idx] excluded_nodes_U = g_edges[U][b_idx:e_idx] excluded_nodes_V = g_edges[V][b_idx:e_idx] sg = sample_neighbors_fusing_mode[fused]( g, sampled_node, sampled_amount, exclude_edges=excluded_edges ) if fused: def contain_edge(g, sg, u, v): # set of subgraph graph edges deduced from original graph org_edges = set( map( tuple, np.stack( g.find_edges(sg.edges["_E"].data[dgl.EID]), axis=1 ), ) ) # set of excluded edges excluded_edges = set(map(tuple, np.stack((u, v), axis=1))) diff_set = org_edges - excluded_edges return len(diff_set) != len(org_edges) assert not contain_edge(g, sg, excluded_nodes_U, excluded_nodes_V) else: assert not np.any( F.asnumpy(sg.has_edges_between(excluded_nodes_U, excluded_nodes_V)) ) @pytest.mark.parametrize("dtype", ["int32", "int64"]) def test_global_uniform_negative_sampling(dtype): warnings.simplefilter("ignore", np.exceptions.ComplexWarning) g = dgl.graph(([], []), num_nodes=1000).to(F.ctx()) src, dst = dgl.sampling.global_uniform_negative_sampling( g, 2000, False, True ) assert len(src) == 2000 assert len(dst) == 2000 g = dgl.graph( (np.random.randint(0, 20, (300,)), np.random.randint(0, 20, (300,))) ).to(F.ctx()) src, dst = dgl.sampling.global_uniform_negative_sampling(g, 20, False, True) assert not F.asnumpy(g.has_edges_between(src, dst)).any() src, dst = dgl.sampling.global_uniform_negative_sampling( g, 20, False, False ) assert not F.asnumpy(g.has_edges_between(src, dst)).any() src = F.asnumpy(src) dst = F.asnumpy(dst) s = set(zip(src.tolist(), dst.tolist())) assert len(s) == len(src) g = dgl.graph(([0], [1])).to(F.ctx()) src, dst = dgl.sampling.global_uniform_negative_sampling( g, 20, True, False, redundancy=10 ) src = F.asnumpy(src) dst = F.asnumpy(dst) # should have either no element or (1, 0) assert len(src) < 2 assert len(dst) < 2 if len(src) == 1: assert src[0] == 1 assert dst[0] == 0 g = dgl.heterograph( { ("A", "AB", "B"): ( np.random.randint(0, 20, (300,)), np.random.randint(0, 40, (300,)), ), ("B", "BA", "A"): ( np.random.randint(0, 40, (200,)), np.random.randint(0, 20, (200,)), ), } ).to(F.ctx()) src, dst = dgl.sampling.global_uniform_negative_sampling( g, 20, False, etype="AB" ) assert not F.asnumpy(g.has_edges_between(src, dst, etype="AB")).any() if __name__ == "__main__": from itertools import product test_sample_neighbors_noprob() test_sample_labors_noprob() test_sample_neighbors_prob() test_sample_labors_prob() test_sample_neighbors_mask() for args in product(["coo", "csr", "csc"], ["in", "out"], [False, True]): test_sample_neighbors_etype_homogeneous(*args) for args in product(["csr", "csc"], ["in", "out"]): test_sample_neighbors_etype_sorted_homogeneous(*args) test_non_uniform_random_walk(False) test_uniform_random_walk(False) test_pack_traces() test_pinsage_sampling(False) test_sample_neighbors_outedge() test_sample_neighbors_topk() test_sample_neighbors_topk_outedge() test_sample_neighbors_with_0deg() test_sample_neighbors_biased_homogeneous() test_sample_neighbors_biased_bipartite() test_sample_neighbors_exclude_edges_heteroG("int32") test_sample_neighbors_exclude_edges_homoG("int32") test_global_uniform_negative_sampling("int32") test_global_uniform_negative_sampling("int64") ================================================ FILE: tests/python/common/test_batch-graph.py ================================================ import unittest import backend as F import dgl import numpy as np from utils import parametrize_idtype def tree1(idtype): """Generate a tree 0 / \ 1 2 / \ 3 4 Edges are from leaves to root. """ g = dgl.graph(([], [])).astype(idtype).to(F.ctx()) g.add_nodes(5) g.add_edges(3, 1) g.add_edges(4, 1) g.add_edges(1, 0) g.add_edges(2, 0) g.ndata["h"] = F.tensor([0, 1, 2, 3, 4]) g.edata["h"] = F.randn((4, 10)) return g def tree2(idtype): """Generate a tree 1 / \ 4 3 / \ 2 0 Edges are from leaves to root. """ g = dgl.graph(([], [])).astype(idtype).to(F.ctx()) g.add_nodes(5) g.add_edges(2, 4) g.add_edges(0, 4) g.add_edges(4, 1) g.add_edges(3, 1) g.ndata["h"] = F.tensor([0, 1, 2, 3, 4]) g.edata["h"] = F.randn((4, 10)) return g @parametrize_idtype def test_batch_unbatch(idtype): t1 = tree1(idtype) t2 = tree2(idtype) bg = dgl.batch([t1, t2]) assert bg.num_nodes() == 10 assert bg.num_edges() == 8 assert bg.batch_size == 2 assert F.allclose(bg.batch_num_nodes(), F.tensor([5, 5])) assert F.allclose(bg.batch_num_edges(), F.tensor([4, 4])) tt1, tt2 = dgl.unbatch(bg) assert F.allclose(t1.ndata["h"], tt1.ndata["h"]) assert F.allclose(t1.edata["h"], tt1.edata["h"]) assert F.allclose(t2.ndata["h"], tt2.ndata["h"]) assert F.allclose(t2.edata["h"], tt2.edata["h"]) @parametrize_idtype def test_batch_unbatch1(idtype): t1 = tree1(idtype) t2 = tree2(idtype) b1 = dgl.batch([t1, t2]) b2 = dgl.batch([t2, b1]) assert b2.num_nodes() == 15 assert b2.num_edges() == 12 assert b2.batch_size == 3 assert F.allclose(b2.batch_num_nodes(), F.tensor([5, 5, 5])) assert F.allclose(b2.batch_num_edges(), F.tensor([4, 4, 4])) s1, s2, s3 = dgl.unbatch(b2) assert F.allclose(t2.ndata["h"], s1.ndata["h"]) assert F.allclose(t2.edata["h"], s1.edata["h"]) assert F.allclose(t1.ndata["h"], s2.ndata["h"]) assert F.allclose(t1.edata["h"], s2.edata["h"]) assert F.allclose(t2.ndata["h"], s3.ndata["h"]) assert F.allclose(t2.edata["h"], s3.edata["h"]) @unittest.skipIf( dgl.backend.backend_name == "tensorflow", reason="TF doesn't support inplace update", ) @parametrize_idtype def test_batch_unbatch_frame(idtype): """Test module of node/edge frames of batched/unbatched DGLGraphs. Also address the bug mentioned in https://github.com/dmlc/dgl/issues/1475. """ t1 = tree1(idtype) t2 = tree2(idtype) N1 = t1.num_nodes() E1 = t1.num_edges() N2 = t2.num_nodes() E2 = t2.num_edges() D = 10 t1.ndata["h"] = F.randn((N1, D)) t1.edata["h"] = F.randn((E1, D)) t2.ndata["h"] = F.randn((N2, D)) t2.edata["h"] = F.randn((E2, D)) b1 = dgl.batch([t1, t2]) b2 = dgl.batch([t2]) b1.ndata["h"][:N1] = F.zeros((N1, D)) b1.edata["h"][:E1] = F.zeros((E1, D)) b2.ndata["h"][:N2] = F.zeros((N2, D)) b2.edata["h"][:E2] = F.zeros((E2, D)) assert not F.allclose(t1.ndata["h"], F.zeros((N1, D))) assert not F.allclose(t1.edata["h"], F.zeros((E1, D))) assert not F.allclose(t2.ndata["h"], F.zeros((N2, D))) assert not F.allclose(t2.edata["h"], F.zeros((E2, D))) g1, g2 = dgl.unbatch(b1) (_g2,) = dgl.unbatch(b2) assert F.allclose(g1.ndata["h"], F.zeros((N1, D))) assert F.allclose(g1.edata["h"], F.zeros((E1, D))) assert F.allclose(g2.ndata["h"], t2.ndata["h"]) assert F.allclose(g2.edata["h"], t2.edata["h"]) assert F.allclose(_g2.ndata["h"], F.zeros((N2, D))) assert F.allclose(_g2.edata["h"], F.zeros((E2, D))) @parametrize_idtype def test_batch_unbatch2(idtype): # test setting/getting features after batch a = dgl.graph(([], [])).astype(idtype).to(F.ctx()) a.add_nodes(4) a.add_edges(0, [1, 2, 3]) b = dgl.graph(([], [])).astype(idtype).to(F.ctx()) b.add_nodes(3) b.add_edges(0, [1, 2]) c = dgl.batch([a, b]) c.ndata["h"] = F.ones((7, 1)) c.edata["w"] = F.ones((5, 1)) assert F.allclose(c.ndata["h"], F.ones((7, 1))) assert F.allclose(c.edata["w"], F.ones((5, 1))) @parametrize_idtype def test_batch_send_and_recv(idtype): t1 = tree1(idtype) t2 = tree2(idtype) bg = dgl.batch([t1, t2]) _mfunc = lambda edges: {"m": edges.src["h"]} _rfunc = lambda nodes: {"h": F.sum(nodes.mailbox["m"], 1)} u = [3, 4, 2 + 5, 0 + 5] v = [1, 1, 4 + 5, 4 + 5] bg.send_and_recv((u, v), _mfunc, _rfunc) t1, t2 = dgl.unbatch(bg) assert F.asnumpy(t1.ndata["h"][1]) == 7 assert F.asnumpy(t2.ndata["h"][4]) == 2 @parametrize_idtype def test_batch_propagate(idtype): t1 = tree1(idtype) t2 = tree2(idtype) bg = dgl.batch([t1, t2]) _mfunc = lambda edges: {"m": edges.src["h"]} _rfunc = lambda nodes: {"h": F.sum(nodes.mailbox["m"], 1)} # get leaves. order = [] # step 1 u = [3, 4, 2 + 5, 0 + 5] v = [1, 1, 4 + 5, 4 + 5] order.append((u, v)) # step 2 u = [1, 2, 4 + 5, 3 + 5] v = [0, 0, 1 + 5, 1 + 5] order.append((u, v)) bg.prop_edges(order, _mfunc, _rfunc) t1, t2 = dgl.unbatch(bg) assert F.asnumpy(t1.ndata["h"][0]) == 9 assert F.asnumpy(t2.ndata["h"][1]) == 5 @parametrize_idtype def test_batched_edge_ordering(idtype): g1 = dgl.graph(([], [])).astype(idtype).to(F.ctx()) g1.add_nodes(6) g1.add_edges([4, 4, 2, 2, 0], [5, 3, 3, 1, 1]) e1 = F.randn((5, 10)) g1.edata["h"] = e1 g2 = dgl.graph(([], [])).astype(idtype).to(F.ctx()) g2.add_nodes(6) g2.add_edges([0, 1, 2, 5, 4, 5], [1, 2, 3, 4, 3, 0]) e2 = F.randn((6, 10)) g2.edata["h"] = e2 g = dgl.batch([g1, g2]) r1 = g.edata["h"][g.edge_ids(4, 5)] r2 = g1.edata["h"][g1.edge_ids(4, 5)] assert F.array_equal(r1, r2) @parametrize_idtype def test_batch_no_edge(idtype): g1 = dgl.graph(([], [])).astype(idtype).to(F.ctx()) g1.add_nodes(6) g1.add_edges([4, 4, 2, 2, 0], [5, 3, 3, 1, 1]) g2 = dgl.graph(([], [])).astype(idtype).to(F.ctx()) g2.add_nodes(6) g2.add_edges([0, 1, 2, 5, 4, 5], [1, 2, 3, 4, 3, 0]) g3 = dgl.graph(([], [])).astype(idtype).to(F.ctx()) g3.add_nodes(1) # no edges g = dgl.batch([g1, g3, g2]) # should not throw an error @parametrize_idtype def test_batch_keeps_empty_data(idtype): g1 = dgl.graph(([], [])).astype(idtype).to(F.ctx()) g1.ndata["nh"] = F.tensor([]) g1.edata["eh"] = F.tensor([]) g2 = dgl.graph(([], [])).astype(idtype).to(F.ctx()) g2.ndata["nh"] = F.tensor([]) g2.edata["eh"] = F.tensor([]) g = dgl.batch([g1, g2]) assert "nh" in g.ndata assert "eh" in g.edata def _get_subgraph_batch_info(keys, induced_indices_arr, batch_num_objs): """Internal function to compute batch information for subgraphs. Parameters ---------- keys : List[str] The node/edge type keys. induced_indices_arr : List[Tensor] The induced node/edge index tensor for all node/edge types. batch_num_objs : Tensor Number of nodes/edges for each graph in the original batch. Returns ------- Mapping[str, Tensor] A dictionary mapping all node/edge type keys to the ``batch_num_objs`` array of corresponding graph. """ bucket_offset = np.expand_dims( np.cumsum(F.asnumpy(batch_num_objs), 0), -1 ) # (num_bkts, 1) ret = {} for key, induced_indices in zip(keys, induced_indices_arr): # NOTE(Zihao): this implementation is not efficient and we can replace it with # binary search in the future. induced_indices = np.expand_dims( F.asnumpy(induced_indices), 0 ) # (1, num_nodes) new_offset = np.sum((induced_indices < bucket_offset), 1) # (num_bkts,) # start_offset = [0] + [new_offset[i-1] for i in range(1, n_bkts)] start_offset = np.concatenate([np.zeros((1,)), new_offset[:-1]], 0) new_batch_num_objs = new_offset - start_offset ret[key] = F.tensor(new_batch_num_objs, dtype=F.dtype(batch_num_objs)) return ret @parametrize_idtype def test_set_batch_info(idtype): ctx = F.ctx() g1 = dgl.rand_graph(30, 100).astype(idtype).to(F.ctx()) g2 = dgl.rand_graph(40, 200).astype(idtype).to(F.ctx()) bg = dgl.batch([g1, g2]) batch_num_nodes = F.astype(bg.batch_num_nodes(), idtype) batch_num_edges = F.astype(bg.batch_num_edges(), idtype) # test homogeneous node subgraph sg_n = dgl.node_subgraph(bg, list(range(10, 20)) + list(range(50, 60))) induced_nodes = sg_n.ndata["_ID"] induced_edges = sg_n.edata["_ID"] new_batch_num_nodes = _get_subgraph_batch_info( bg.ntypes, [induced_nodes], batch_num_nodes ) new_batch_num_edges = _get_subgraph_batch_info( bg.canonical_etypes, [induced_edges], batch_num_edges ) sg_n.set_batch_num_nodes(new_batch_num_nodes) sg_n.set_batch_num_edges(new_batch_num_edges) subg_n1, subg_n2 = dgl.unbatch(sg_n) subg1 = dgl.node_subgraph(g1, list(range(10, 20))) subg2 = dgl.node_subgraph(g2, list(range(20, 30))) assert subg_n1.num_edges() == subg1.num_edges() assert subg_n2.num_edges() == subg2.num_edges() # test homogeneous edge subgraph sg_e = dgl.edge_subgraph( bg, list(range(40, 70)) + list(range(150, 200)), relabel_nodes=False ) induced_nodes = F.arange(0, bg.num_nodes(), idtype) induced_edges = sg_e.edata["_ID"] new_batch_num_nodes = _get_subgraph_batch_info( bg.ntypes, [induced_nodes], batch_num_nodes ) new_batch_num_edges = _get_subgraph_batch_info( bg.canonical_etypes, [induced_edges], batch_num_edges ) sg_e.set_batch_num_nodes(new_batch_num_nodes) sg_e.set_batch_num_edges(new_batch_num_edges) subg_e1, subg_e2 = dgl.unbatch(sg_e) subg1 = dgl.edge_subgraph(g1, list(range(40, 70)), relabel_nodes=False) subg2 = dgl.edge_subgraph(g2, list(range(50, 100)), relabel_nodes=False) assert subg_e1.num_nodes() == subg1.num_nodes() assert subg_e2.num_nodes() == subg2.num_nodes() if __name__ == "__main__": # test_batch_unbatch() # test_batch_unbatch1() # test_batch_unbatch_frame() # test_batch_unbatch2() # test_batched_edge_ordering() # test_batch_send_then_recv() # test_batch_send_and_recv() # test_batch_propagate() # test_batch_no_edge() test_set_batch_info(F.int32) ================================================ FILE: tests/python/common/test_batch-heterograph.py ================================================ import unittest import backend as F import dgl import pytest from dgl.base import ALL from utils import check_graph_equal, get_cases, parametrize_idtype def check_equivalence_between_heterographs( g1, g2, node_attrs=None, edge_attrs=None ): assert g1.ntypes == g2.ntypes assert g1.etypes == g2.etypes assert g1.canonical_etypes == g2.canonical_etypes for nty in g1.ntypes: assert g1.num_nodes(nty) == g2.num_nodes(nty) for ety in g1.etypes: if len(g1._etype2canonical[ety]) > 0: assert g1.num_edges(ety) == g2.num_edges(ety) for ety in g1.canonical_etypes: assert g1.num_edges(ety) == g2.num_edges(ety) src1, dst1, eid1 = g1.edges(etype=ety, form="all") src2, dst2, eid2 = g2.edges(etype=ety, form="all") assert F.allclose(src1, src2) assert F.allclose(dst1, dst2) assert F.allclose(eid1, eid2) if node_attrs is not None: for nty in node_attrs.keys(): if g1.num_nodes(nty) == 0: continue for feat_name in node_attrs[nty]: assert F.allclose( g1.nodes[nty].data[feat_name], g2.nodes[nty].data[feat_name] ) if edge_attrs is not None: for ety in edge_attrs.keys(): if g1.num_edges(ety) == 0: continue for feat_name in edge_attrs[ety]: assert F.allclose( g1.edges[ety].data[feat_name], g2.edges[ety].data[feat_name] ) @pytest.mark.parametrize("gs", get_cases(["two_hetero_batch"])) @parametrize_idtype def test_topology(gs, idtype): """Test batching two DGLGraphs where some nodes are isolated in some relations""" g1, g2 = gs g1 = g1.astype(idtype).to(F.ctx()) g2 = g2.astype(idtype).to(F.ctx()) bg = dgl.batch([g1, g2]) assert bg.idtype == idtype assert bg.device == F.ctx() assert bg.ntypes == g2.ntypes assert bg.etypes == g2.etypes assert bg.canonical_etypes == g2.canonical_etypes assert bg.batch_size == 2 # Test number of nodes for ntype in bg.ntypes: print(ntype) assert F.asnumpy(bg.batch_num_nodes(ntype)).tolist() == [ g1.num_nodes(ntype), g2.num_nodes(ntype), ] assert bg.num_nodes(ntype) == ( g1.num_nodes(ntype) + g2.num_nodes(ntype) ) # Test number of edges for etype in bg.canonical_etypes: assert F.asnumpy(bg.batch_num_edges(etype)).tolist() == [ g1.num_edges(etype), g2.num_edges(etype), ] assert bg.num_edges(etype) == ( g1.num_edges(etype) + g2.num_edges(etype) ) # Test relabeled nodes for ntype in bg.ntypes: assert list(F.asnumpy(bg.nodes(ntype))) == list( range(bg.num_nodes(ntype)) ) # Test relabeled edges src, dst = bg.edges(etype=("user", "follows", "user")) assert list(F.asnumpy(src)) == [0, 1, 4, 5] assert list(F.asnumpy(dst)) == [1, 2, 5, 6] src, dst = bg.edges(etype=("user", "follows", "developer")) assert list(F.asnumpy(src)) == [0, 1, 4, 5] assert list(F.asnumpy(dst)) == [1, 2, 4, 5] src, dst, eid = bg.edges(etype="plays", form="all") assert list(F.asnumpy(src)) == [0, 1, 2, 3, 4, 5, 6] assert list(F.asnumpy(dst)) == [0, 0, 1, 1, 2, 2, 3] assert list(F.asnumpy(eid)) == [0, 1, 2, 3, 4, 5, 6] # Test unbatching graphs g3, g4 = dgl.unbatch(bg) check_equivalence_between_heterographs(g1, g3) check_equivalence_between_heterographs(g2, g4) # Test dtype cast if idtype == "int32": bg_cast = bg.long() else: bg_cast = bg.int() assert bg.batch_size == bg_cast.batch_size # Test local var bg_local = bg.local_var() assert bg.batch_size == bg_local.batch_size @parametrize_idtype def test_batching_batched(idtype): """Test batching a DGLGraph and a batched DGLGraph.""" g1 = dgl.heterograph( { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "plays", "game"): ([0, 1], [0, 0]), }, idtype=idtype, device=F.ctx(), ) g2 = dgl.heterograph( { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "plays", "game"): ([0, 1], [0, 0]), }, idtype=idtype, device=F.ctx(), ) bg1 = dgl.batch([g1, g2]) g3 = dgl.heterograph( { ("user", "follows", "user"): ([0], [1]), ("user", "plays", "game"): ([1], [0]), }, idtype=idtype, device=F.ctx(), ) bg2 = dgl.batch([bg1, g3]) assert bg2.idtype == idtype assert bg2.device == F.ctx() assert bg2.ntypes == g3.ntypes assert bg2.etypes == g3.etypes assert bg2.canonical_etypes == g3.canonical_etypes assert bg2.batch_size == 3 # Test number of nodes for ntype in bg2.ntypes: assert F.asnumpy(bg2.batch_num_nodes(ntype)).tolist() == [ g1.num_nodes(ntype), g2.num_nodes(ntype), g3.num_nodes(ntype), ] assert bg2.num_nodes(ntype) == ( g1.num_nodes(ntype) + g2.num_nodes(ntype) + g3.num_nodes(ntype) ) # Test number of edges for etype in bg2.canonical_etypes: assert F.asnumpy(bg2.batch_num_edges(etype)).tolist() == [ g1.num_edges(etype), g2.num_edges(etype), g3.num_edges(etype), ] assert bg2.num_edges(etype) == ( g1.num_edges(etype) + g2.num_edges(etype) + g3.num_edges(etype) ) # Test relabeled nodes for ntype in bg2.ntypes: assert list(F.asnumpy(bg2.nodes(ntype))) == list( range(bg2.num_nodes(ntype)) ) # Test relabeled edges src, dst = bg2.edges(etype="follows") assert list(F.asnumpy(src)) == [0, 1, 3, 4, 6] assert list(F.asnumpy(dst)) == [1, 2, 4, 5, 7] src, dst = bg2.edges(etype="plays") assert list(F.asnumpy(src)) == [0, 1, 3, 4, 7] assert list(F.asnumpy(dst)) == [0, 0, 1, 1, 2] # Test unbatching graphs g4, g5, g6 = dgl.unbatch(bg2) check_equivalence_between_heterographs(g1, g4) check_equivalence_between_heterographs(g2, g5) check_equivalence_between_heterographs(g3, g6) @parametrize_idtype def test_features(idtype): """Test the features of batched DGLGraphs""" g1 = dgl.heterograph( { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "plays", "game"): ([0, 1], [0, 0]), }, idtype=idtype, device=F.ctx(), ) g1.nodes["user"].data["h1"] = F.tensor([[0.0], [1.0], [2.0]]) g1.nodes["user"].data["h2"] = F.tensor([[3.0], [4.0], [5.0]]) g1.nodes["game"].data["h1"] = F.tensor([[0.0]]) g1.nodes["game"].data["h2"] = F.tensor([[1.0]]) g1.edges["follows"].data["h1"] = F.tensor([[0.0], [1.0]]) g1.edges["follows"].data["h2"] = F.tensor([[2.0], [3.0]]) g1.edges["plays"].data["h1"] = F.tensor([[0.0], [1.0]]) g2 = dgl.heterograph( { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "plays", "game"): ([0, 1], [0, 0]), }, idtype=idtype, device=F.ctx(), ) g2.nodes["user"].data["h1"] = F.tensor([[0.0], [1.0], [2.0]]) g2.nodes["user"].data["h2"] = F.tensor([[3.0], [4.0], [5.0]]) g2.nodes["game"].data["h1"] = F.tensor([[0.0]]) g2.nodes["game"].data["h2"] = F.tensor([[1.0]]) g2.edges["follows"].data["h1"] = F.tensor([[0.0], [1.0]]) g2.edges["follows"].data["h2"] = F.tensor([[2.0], [3.0]]) g2.edges["plays"].data["h1"] = F.tensor([[0.0], [1.0]]) # test default setting bg = dgl.batch([g1, g2]) assert F.allclose( bg.nodes["user"].data["h1"], F.cat( [g1.nodes["user"].data["h1"], g2.nodes["user"].data["h1"]], dim=0 ), ) assert F.allclose( bg.nodes["user"].data["h2"], F.cat( [g1.nodes["user"].data["h2"], g2.nodes["user"].data["h2"]], dim=0 ), ) assert F.allclose( bg.nodes["game"].data["h1"], F.cat( [g1.nodes["game"].data["h1"], g2.nodes["game"].data["h1"]], dim=0 ), ) assert F.allclose( bg.nodes["game"].data["h2"], F.cat( [g1.nodes["game"].data["h2"], g2.nodes["game"].data["h2"]], dim=0 ), ) assert F.allclose( bg.edges["follows"].data["h1"], F.cat( [g1.edges["follows"].data["h1"], g2.edges["follows"].data["h1"]], dim=0, ), ) assert F.allclose( bg.edges["follows"].data["h2"], F.cat( [g1.edges["follows"].data["h2"], g2.edges["follows"].data["h2"]], dim=0, ), ) assert F.allclose( bg.edges["plays"].data["h1"], F.cat( [g1.edges["plays"].data["h1"], g2.edges["plays"].data["h1"]], dim=0 ), ) # test specifying ndata/edata bg = dgl.batch([g1, g2], ndata=["h2"], edata=["h1"]) assert F.allclose( bg.nodes["user"].data["h2"], F.cat( [g1.nodes["user"].data["h2"], g2.nodes["user"].data["h2"]], dim=0 ), ) assert F.allclose( bg.nodes["game"].data["h2"], F.cat( [g1.nodes["game"].data["h2"], g2.nodes["game"].data["h2"]], dim=0 ), ) assert F.allclose( bg.edges["follows"].data["h1"], F.cat( [g1.edges["follows"].data["h1"], g2.edges["follows"].data["h1"]], dim=0, ), ) assert F.allclose( bg.edges["plays"].data["h1"], F.cat( [g1.edges["plays"].data["h1"], g2.edges["plays"].data["h1"]], dim=0 ), ) assert "h1" not in bg.nodes["user"].data assert "h1" not in bg.nodes["game"].data assert "h2" not in bg.edges["follows"].data # Test unbatching graphs g3, g4 = dgl.unbatch(bg) check_equivalence_between_heterographs( g1, g3, node_attrs={"user": ["h2"], "game": ["h2"]}, edge_attrs={("user", "follows", "user"): ["h1"]}, ) check_equivalence_between_heterographs( g2, g4, node_attrs={"user": ["h2"], "game": ["h2"]}, edge_attrs={("user", "follows", "user"): ["h1"]}, ) @unittest.skipIf( F.backend_name == "mxnet", reason="MXNet does not support split array with zero-length segment.", ) @parametrize_idtype def test_empty_relation(idtype): """Test the features of batched DGLGraphs""" g1 = dgl.heterograph( { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "plays", "game"): ([], []), }, idtype=idtype, device=F.ctx(), ) g1.nodes["user"].data["h1"] = F.tensor([[0.0], [1.0], [2.0]]) g1.nodes["user"].data["h2"] = F.tensor([[3.0], [4.0], [5.0]]) g1.edges["follows"].data["h1"] = F.tensor([[0.0], [1.0]]) g1.edges["follows"].data["h2"] = F.tensor([[2.0], [3.0]]) g2 = dgl.heterograph( { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "plays", "game"): ([0, 1], [0, 0]), }, idtype=idtype, device=F.ctx(), ) g2.nodes["user"].data["h1"] = F.tensor([[0.0], [1.0], [2.0]]) g2.nodes["user"].data["h2"] = F.tensor([[3.0], [4.0], [5.0]]) g2.nodes["game"].data["h1"] = F.tensor([[0.0]]) g2.nodes["game"].data["h2"] = F.tensor([[1.0]]) g2.edges["follows"].data["h1"] = F.tensor([[0.0], [1.0]]) g2.edges["follows"].data["h2"] = F.tensor([[2.0], [3.0]]) g2.edges["plays"].data["h1"] = F.tensor([[0.0], [1.0]]) bg = dgl.batch([g1, g2]) # Test number of nodes for ntype in bg.ntypes: assert F.asnumpy(bg.batch_num_nodes(ntype)).tolist() == [ g1.num_nodes(ntype), g2.num_nodes(ntype), ] # Test number of edges for etype in bg.canonical_etypes: assert F.asnumpy(bg.batch_num_edges(etype)).tolist() == [ g1.num_edges(etype), g2.num_edges(etype), ] # Test features assert F.allclose( bg.nodes["user"].data["h1"], F.cat( [g1.nodes["user"].data["h1"], g2.nodes["user"].data["h1"]], dim=0 ), ) assert F.allclose( bg.nodes["user"].data["h2"], F.cat( [g1.nodes["user"].data["h2"], g2.nodes["user"].data["h2"]], dim=0 ), ) assert F.allclose(bg.nodes["game"].data["h1"], g2.nodes["game"].data["h1"]) assert F.allclose(bg.nodes["game"].data["h2"], g2.nodes["game"].data["h2"]) assert F.allclose( bg.edges["follows"].data["h1"], F.cat( [g1.edges["follows"].data["h1"], g2.edges["follows"].data["h1"]], dim=0, ), ) assert F.allclose( bg.edges["plays"].data["h1"], g2.edges["plays"].data["h1"] ) # Test unbatching graphs g3, g4 = dgl.unbatch(bg) check_equivalence_between_heterographs( g1, g3, node_attrs={"user": ["h1", "h2"], "game": ["h1", "h2"]}, edge_attrs={("user", "follows", "user"): ["h1"]}, ) check_equivalence_between_heterographs( g2, g4, node_attrs={"user": ["h1", "h2"], "game": ["h1", "h2"]}, edge_attrs={("user", "follows", "user"): ["h1"]}, ) # Test graphs without edges g1 = dgl.heterograph({("u", "r", "v"): ([], [])}, {"u": 0, "v": 4}) g2 = dgl.heterograph({("u", "r", "v"): ([], [])}, {"u": 1, "v": 5}) dgl.batch([g1, g2]) @parametrize_idtype def test_unbatch2(idtype): # batch 3 graphs but unbatch to 2 g1 = dgl.graph(([0, 1, 2], [1, 2, 3]), idtype=idtype, device=F.ctx()) g2 = dgl.graph(([0, 1, 2], [1, 2, 3]), idtype=idtype, device=F.ctx()) g3 = dgl.graph(([0, 1, 2], [1, 2, 3]), idtype=idtype, device=F.ctx()) bg = dgl.batch([g1, g2, g3]) bnn = F.tensor([8, 4]) bne = F.tensor([6, 3]) f1, f2 = dgl.unbatch(bg, node_split=bnn, edge_split=bne) u, v = f1.edges(order="eid") assert F.allclose(u, F.tensor([0, 1, 2, 4, 5, 6])) assert F.allclose(v, F.tensor([1, 2, 3, 5, 6, 7])) u, v = f2.edges(order="eid") assert F.allclose(u, F.tensor([0, 1, 2])) assert F.allclose(v, F.tensor([1, 2, 3])) # batch 2 but unbatch to 3 bg = dgl.batch([f1, f2]) gg1, gg2, gg3 = dgl.unbatch(bg, F.tensor([4, 4, 4]), F.tensor([3, 3, 3])) check_graph_equal(g1, gg1) check_graph_equal(g2, gg2) check_graph_equal(g3, gg3) @parametrize_idtype def test_slice_batch(idtype): g1 = dgl.heterograph( { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "plays", "game"): ([], []), ("user", "follows", "game"): ([0, 0], [1, 4]), }, idtype=idtype, device=F.ctx(), ) g2 = dgl.heterograph( { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "plays", "game"): ([0, 1], [0, 0]), ("user", "follows", "game"): ([0, 1], [1, 4]), }, num_nodes_dict={"user": 4, "game": 6}, idtype=idtype, device=F.ctx(), ) g3 = dgl.heterograph( { ("user", "follows", "user"): ([0], [2]), ("user", "plays", "game"): ([1, 2], [3, 4]), ("user", "follows", "game"): ([], []), }, idtype=idtype, device=F.ctx(), ) g_list = [g1, g2, g3] bg = dgl.batch(g_list) bg.nodes["user"].data["h1"] = F.randn((bg.num_nodes("user"), 2)) bg.nodes["user"].data["h2"] = F.randn((bg.num_nodes("user"), 5)) bg.edges[("user", "follows", "user")].data["h1"] = F.randn( (bg.num_edges(("user", "follows", "user")), 2) ) for fmat in ["coo", "csr", "csc"]: bg = bg.formats(fmat) for i in range(len(g_list)): g_i = g_list[i] g_slice = dgl.slice_batch(bg, i) assert g_i.ntypes == g_slice.ntypes assert g_i.canonical_etypes == g_slice.canonical_etypes assert g_i.idtype == g_slice.idtype assert g_i.device == g_slice.device for nty in g_i.ntypes: assert g_i.num_nodes(nty) == g_slice.num_nodes(nty) for feat in g_i.nodes[nty].data: assert F.allclose( g_i.nodes[nty].data[feat], g_slice.nodes[nty].data[feat] ) for ety in g_i.canonical_etypes: assert g_i.num_edges(ety) == g_slice.num_edges(ety) for feat in g_i.edges[ety].data: assert F.allclose( g_i.edges[ety].data[feat], g_slice.edges[ety].data[feat] ) @parametrize_idtype def test_batch_keeps_empty_data(idtype): g1 = ( dgl.heterograph({("a", "to", "a"): ([], [])}).astype(idtype).to(F.ctx()) ) g1.nodes["a"].data["nh"] = F.tensor([]) g1.edges[("a", "to", "a")].data["eh"] = F.tensor([]) g2 = ( dgl.heterograph({("a", "to", "a"): ([], [])}).astype(idtype).to(F.ctx()) ) g2.nodes["a"].data["nh"] = F.tensor([]) g2.edges[("a", "to", "a")].data["eh"] = F.tensor([]) g = dgl.batch([g1, g2]) assert "nh" in g.nodes["a"].data assert "eh" in g.edges[("a", "to", "a")].data def test_batch_netypes(): # Test for https://github.com/dmlc/dgl/issues/2808 import networkx as nx B = nx.DiGraph() B.add_nodes_from( [1, 2, 3, 4], bipartite=0, some_attr=F.tensor([1, 2, 3, 4], dtype=F.float32), ) B.add_nodes_from(["a", "b", "c"], bipartite=1) B.add_edges_from( [(1, "a"), (1, "b"), (2, "b"), (2, "c"), (3, "c"), (4, "a")] ) g_dict = { 0: dgl.bipartite_from_networkx(B, "A", "e", "B"), 1: dgl.bipartite_from_networkx(B, "B", "e", "A"), 2: dgl.bipartite_from_networkx(B, "A", "e", "B", u_attrs=["some_attr"]), 3: dgl.bipartite_from_networkx(B, "B", "e", "A", u_attrs=["some_attr"]), } for _, g in g_dict.items(): dgl.batch((g, g, g)) if __name__ == "__main__": # test_topology('int32') # test_batching_batched('int32') # test_batched_features('int32') # test_empty_relation('int64') # test_to_device('int32') pass ================================================ FILE: tests/python/common/test_convert.py ================================================ import unittest import backend as F import dgl from utils import parametrize_idtype def get_nodes_by_ntype(nodes, ntype): return dict((k, v) for k, v in nodes.items() if v["ntype"] == ntype) def edge_attrs(edge): # Edges in Networkx are in the format (src, dst, attrs) return edge[2] def get_edges_by_etype(edges, etype): return [e for e in edges if edge_attrs(e)["etype"] == etype] def check_attrs_for_nodes(nodes, attrs): return all(v.keys() == attrs for v in nodes.values()) def check_attr_values_for_nodes(nodes, attr_name, values): return F.allclose( F.stack([v[attr_name] for v in nodes.values()], 0), values ) def check_attrs_for_edges(edges, attrs): return all(edge_attrs(e).keys() == attrs for e in edges) def check_attr_values_for_edges(edges, attr_name, values): return F.allclose( F.stack([edge_attrs(e)[attr_name] for e in edges], 0), values ) @unittest.skipIf( F._default_context_str == "gpu", reason="`to_networkx` does not support graphs on GPU", ) @parametrize_idtype def test_to_networkx(idtype): # TODO: adapt and move code from the _test_nx_conversion function in # tests/python/common/function/test_basics.py to here # (pending resolution of https://github.com/dmlc/dgl/issues/5735). g = dgl.heterograph( { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "follows", "topic"): ([1, 1], [1, 2]), ("user", "plays", "game"): ([0, 3], [3, 4]), }, idtype=idtype, device=F.ctx(), ) n1 = F.randn((5, 3)) n2 = F.randn((4, 2)) e1 = F.randn((2, 3)) e2 = F.randn((2, 2)) g.nodes["game"].data["n"] = F.copy_to(n1, ctx=F.ctx()) g.nodes["user"].data["n"] = F.copy_to(n2, ctx=F.ctx()) g.edges[("user", "follows", "user")].data["e"] = F.copy_to(e1, ctx=F.ctx()) g.edges["plays"].data["e"] = F.copy_to(e2, ctx=F.ctx()) nxg = dgl.to_networkx( g, node_attrs=["n"], edge_attrs=["e"], ) # Test nodes nxg_nodes = dict(nxg.nodes(data=True)) assert len(nxg_nodes) == g.num_nodes() assert {v["ntype"] for v in nxg_nodes.values()} == set(g.ntypes) nxg_nodes_by_ntype = {} for ntype in g.ntypes: nxg_nodes_by_ntype[ntype] = get_nodes_by_ntype(nxg_nodes, ntype) assert g.num_nodes(ntype) == len(nxg_nodes_by_ntype[ntype]) assert check_attrs_for_nodes(nxg_nodes_by_ntype["game"], {"ntype", "n"}) assert check_attr_values_for_nodes(nxg_nodes_by_ntype["game"], "n", n1) assert check_attrs_for_nodes(nxg_nodes_by_ntype["user"], {"ntype", "n"}) assert check_attr_values_for_nodes(nxg_nodes_by_ntype["user"], "n", n2) # Nodes without node attributes assert check_attrs_for_nodes(nxg_nodes_by_ntype["topic"], {"ntype"}) # Test edges nxg_edges = list(nxg.edges(data=True)) assert len(nxg_edges) == g.num_edges() assert {edge_attrs(e)["etype"] for e in nxg_edges} == set( g.canonical_etypes ) nxg_edges_by_etype = {} for etype in g.canonical_etypes: nxg_edges_by_etype[etype] = get_edges_by_etype(nxg_edges, etype) assert g.num_edges(etype) == len(nxg_edges_by_etype[etype]) assert check_attrs_for_edges( nxg_edges_by_etype[("user", "follows", "user")], {"id", "etype", "e"}, ) assert check_attr_values_for_edges( nxg_edges_by_etype[("user", "follows", "user")], "e", e1 ) assert check_attrs_for_edges( nxg_edges_by_etype[("user", "plays", "game")], {"id", "etype", "e"} ) assert check_attr_values_for_edges( nxg_edges_by_etype[("user", "plays", "game")], "e", e2 ) # Edges without edge attributes assert check_attrs_for_edges( nxg_edges_by_etype[("user", "follows", "topic")], {"id", "etype"} ) ================================================ FILE: tests/python/common/test_ffi.py ================================================ import os import unittest import backend as F import dgl import numpy as np import pytest @unittest.skipIf(os.name == "nt", reason="Cython only works on linux") def test_cython(): import dgl._ffi._cy3.core @pytest.mark.parametrize("arg", [1, 2.3]) def test_callback(arg): def cb(x): return x + 1 ret = dgl._api_internal._TestPythonCallback(cb, arg) assert ret == arg + 1 @pytest.mark.parametrize("dtype", [F.float32, F.float64, F.int32, F.int64]) def _test_callback_array(dtype): def cb(x): return F.to_dgl_nd(F.from_dgl_nd(x) + 1) arg = F.copy_to(F.tensor([1, 2, 3], dtype=dtype), F.ctx()) ret = F.from_dgl_nd( dgl._api_internal._TestPythonCallback(cb, F.to_dgl_nd(arg)) ) assert np.allclose(F.asnumpy(ret), F.asnumpy(arg) + 1) @pytest.mark.parametrize("arg", [1, 2.3]) def test_callback_thread(arg): def cb(x): return x + 1 ret = dgl._api_internal._TestPythonCallbackThread(cb, arg) assert ret == arg + 1 @pytest.mark.parametrize("dtype", [F.float32, F.float64, F.int32, F.int64]) def _test_callback_array_thread(dtype): def cb(x): return F.to_dgl_nd(F.from_dgl_nd(x) + 1) arg = F.copy_to(F.tensor([1, 2, 3], dtype=dtype), F.ctx()) ret = F.from_dgl_nd( dgl._api_internal._TestPythonCallbackThread(cb, F.to_dgl_nd(arg)) ) assert np.allclose(F.asnumpy(ret), F.asnumpy(arg) + 1) ================================================ FILE: tests/python/common/test_frame.py ================================================ import pickle import unittest import backend as F import dgl import dgl.ndarray as nd import numpy as np from dgl.frame import Column from utils import parametrize_idtype def test_column_subcolumn(): data = F.copy_to( F.tensor( [ [1.0, 1.0, 1.0, 1.0], [0.0, 2.0, 9.0, 0.0], [3.0, 2.0, 1.0, 0.0], [1.0, 1.0, 1.0, 1.0], [0.0, 2.0, 4.0, 0.0], ] ), F.ctx(), ) original = Column(data) # subcolumn from cpu context i1 = F.tensor([0, 2, 1, 3], dtype=F.int64) l1 = original.subcolumn(i1) assert len(l1) == i1.shape[0] assert F.array_equal(l1.data, F.gather_row(data, i1)) # next subcolumn from target context i2 = F.copy_to(F.tensor([0, 2], dtype=F.int64), F.ctx()) l2 = l1.subcolumn(i2) assert len(l2) == i2.shape[0] i1i2 = F.copy_to(F.gather_row(i1, F.copy_to(i2, F.context(i1))), F.ctx()) assert F.array_equal(l2.data, F.gather_row(data, i1i2)) # next subcolumn also from target context i3 = F.copy_to(F.tensor([1], dtype=F.int64), F.ctx()) l3 = l2.subcolumn(i3) assert len(l3) == i3.shape[0] i1i2i3 = F.copy_to( F.gather_row(i1i2, F.copy_to(i3, F.context(i1i2))), F.ctx() ) assert F.array_equal(l3.data, F.gather_row(data, i1i2i3)) def test_serialize_deserialize_plain(): data = F.copy_to( F.tensor( [ [1.0, 1.0, 1.0, 1.0], [0.0, 2.0, 9.0, 0.0], [3.0, 2.0, 1.0, 0.0], [1.0, 1.0, 1.0, 1.0], [0.0, 2.0, 4.0, 0.0], ] ), F.ctx(), ) original = Column(data) serial = pickle.dumps(original) new = pickle.loads(serial) print("new = {}".format(new)) assert F.array_equal(new.data, original.data) def test_serialize_deserialize_subcolumn(): data = F.copy_to( F.tensor( [ [1.0, 1.0, 1.0, 1.0], [0.0, 2.0, 9.0, 0.0], [3.0, 2.0, 1.0, 0.0], [1.0, 1.0, 1.0, 1.0], [0.0, 2.0, 4.0, 0.0], ] ), F.ctx(), ) original = Column(data) # subcolumn from cpu context i1 = F.tensor([0, 2, 1, 3], dtype=F.int64) l1 = original.subcolumn(i1) serial = pickle.dumps(l1) new = pickle.loads(serial) assert F.array_equal(new.data, l1.data) def test_serialize_deserialize_dtype(): data = F.copy_to( F.tensor( [ [1.0, 1.0, 1.0, 1.0], [0.0, 2.0, 9.0, 0.0], [3.0, 2.0, 1.0, 0.0], [1.0, 1.0, 1.0, 1.0], [0.0, 2.0, 4.0, 0.0], ] ), F.ctx(), ) original = Column(data) original = original.astype(F.int64) serial = pickle.dumps(original) new = pickle.loads(serial) assert new.dtype == F.int64 ================================================ FILE: tests/python/common/test_generators.py ================================================ import unittest import backend as F import dgl import numpy as np @unittest.skipIf( F._default_context_str == "gpu", reason="GPU random choice not implemented" ) def test_rand_graph(): g = dgl.rand_graph(10000, 100000) assert g.num_nodes() == 10000 assert g.num_edges() == 100000 # test random seed dgl.random.seed(42) g1 = dgl.rand_graph(100, 30) dgl.random.seed(42) g2 = dgl.rand_graph(100, 30) u1, v1 = g1.edges() u2, v2 = g2.edges() assert F.array_equal(u1, u2) assert F.array_equal(v1, v2) if __name__ == "__main__": test_rand_graph() ================================================ FILE: tests/python/common/test_heterograph-apply-edges.py ================================================ import itertools import unittest from collections import Counter from itertools import product import backend as F import dgl import dgl.function as fn import networkx as nx import numpy as np import pytest import scipy.sparse as spsp import torch from dgl import DGLError from scipy.sparse import rand from utils import get_cases, parametrize_idtype rfuncs = {"sum": fn.sum, "max": fn.max, "min": fn.min, "mean": fn.mean} fill_value = {"sum": 0, "max": float("-inf")} feat_size = 2 @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now" ) def create_test_heterograph(idtype): # test heterograph from the docstring, plus a user -- wishes -- game relation # 3 users, 2 games, 2 developers # metagraph: # ('user', 'follows', 'user'), # ('user', 'plays', 'game'), # ('user', 'wishes', 'game'), # ('developer', 'develops', 'game')]) g = dgl.heterograph( { ("user", "follows", "user"): ([0, 1, 2, 1], [0, 0, 1, 1]), ("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]), ("user", "wishes", "game"): ([0, 1, 1], [0, 0, 1]), ("developer", "develops", "game"): ([0, 1, 0], [0, 1, 1]), }, idtype=idtype, device=F.ctx(), ) assert g.idtype == idtype assert g.device == F.ctx() return g def create_random_hetero_with_single_source_node_type(idtype): num_nodes = {"n1": 5, "n2": 10, "n3": 15} etypes = [("n1", "r1", "n2"), ("n1", "r2", "n3"), ("n1", "r3", "n2")] edges = {} for etype in etypes: src_ntype, _, dst_ntype = etype arr = spsp.random( num_nodes[src_ntype], num_nodes[dst_ntype], density=1, format="coo", random_state=100, ) edges[etype] = (arr.row, arr.col) return dgl.heterograph(edges, idtype=idtype, device=F.ctx()) @parametrize_idtype def test_unary_copy_u(idtype): def _test(mfunc): g = create_test_heterograph(idtype) x1 = F.randn((g.num_nodes("user"), feat_size)) x2 = F.randn((g.num_nodes("developer"), feat_size)) F.attach_grad(x1) F.attach_grad(x2) g.nodes["user"].data["h"] = x1 g.nodes["developer"].data["h"] = x2 ################################################################# # apply_edges() is called on each relation type separately ################################################################# with F.record_grad(): [ g.apply_edges(fn.copy_u("h", "m"), etype=rel) for rel in g.canonical_etypes ] r1 = g["plays"].edata["m"] F.backward(r1, F.ones(r1.shape)) n_grad1 = F.grad(g.ndata["h"]["user"]) # TODO (Israt): clear not working g.edata["m"].clear() ################################################################# # apply_edges() is called on all relation types ################################################################# g.apply_edges(fn.copy_u("h", "m")) r2 = g["plays"].edata["m"] F.backward(r2, F.ones(r2.shape)) n_grad2 = F.grad(g.nodes["user"].data["h"]) # correctness check def _print_error(a, b): for i, (x, y) in enumerate( zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten()) ): if not np.allclose(x, y): print("@{} {} v.s. {}".format(i, x, y)) if not F.allclose(r1, r2): _print_error(r1, r2) assert F.allclose(r1, r2) if not F.allclose(n_grad1, n_grad2): print("node grad") _print_error(n_grad1, n_grad2) assert F.allclose(n_grad1, n_grad2) _test(fn.copy_u) @parametrize_idtype def test_unary_copy_e(idtype): def _test(mfunc): g = create_test_heterograph(idtype) feat_size = 2 x1 = F.randn((4, feat_size)) x2 = F.randn((4, feat_size)) x3 = F.randn((3, feat_size)) x4 = F.randn((3, feat_size)) F.attach_grad(x1) F.attach_grad(x2) F.attach_grad(x3) F.attach_grad(x4) g["plays"].edata["eid"] = x1 g["follows"].edata["eid"] = x2 g["develops"].edata["eid"] = x3 g["wishes"].edata["eid"] = x4 ################################################################# # apply_edges() is called on each relation type separately ################################################################# with F.record_grad(): [ g.apply_edges(fn.copy_e("eid", "m"), etype=rel) for rel in g.canonical_etypes ] r1 = g["develops"].edata["m"] F.backward(r1, F.ones(r1.shape)) e_grad1 = F.grad(g["develops"].edata["eid"]) ################################################################# # apply_edges() is called on all relation types ################################################################# g.apply_edges(fn.copy_e("eid", "m")) r2 = g["develops"].edata["m"] F.backward(r2, F.ones(r2.shape)) e_grad2 = F.grad(g["develops"].edata["eid"]) # # correctness check def _print_error(a, b): for i, (x, y) in enumerate( zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten()) ): if not np.allclose(x, y): print("@{} {} v.s. {}".format(i, x, y)) if not F.allclose(r1, r2): _print_error(r1, r2) assert F.allclose(r1, r2) if not F.allclose(e_grad1, e_grad2): print("edge grad") _print_error(e_grad1, e_grad2) assert F.allclose(e_grad1, e_grad2) _test(fn.copy_e) @parametrize_idtype def test_binary_op(idtype): def _test(lhs, rhs, binary_op): g = create_test_heterograph(idtype) n1 = F.randn((g.num_nodes("user"), feat_size)) n2 = F.randn((g.num_nodes("developer"), feat_size)) n3 = F.randn((g.num_nodes("game"), feat_size)) x1 = F.randn((g.num_edges("plays"), feat_size)) x2 = F.randn((g.num_edges("follows"), feat_size)) x3 = F.randn((g.num_edges("develops"), feat_size)) x4 = F.randn((g.num_edges("wishes"), feat_size)) builtin_msg_name = "{}_{}_{}".format(lhs, binary_op, rhs) builtin_msg = getattr(fn, builtin_msg_name) ################################################################# # apply_edges() is called on each relation type separately ################################################################# F.attach_grad(n1) F.attach_grad(n2) F.attach_grad(n3) g.nodes["user"].data["h"] = n1 g.nodes["developer"].data["h"] = n2 g.nodes["game"].data["h"] = n3 F.attach_grad(x1) F.attach_grad(x2) F.attach_grad(x3) F.attach_grad(x4) g["plays"].edata["h"] = x1 g["follows"].edata["h"] = x2 g["develops"].edata["h"] = x3 g["wishes"].edata["h"] = x4 with F.record_grad(): [ g.apply_edges(builtin_msg("h", "h", "m"), etype=rel) for rel in g.canonical_etypes ] r1 = g["plays"].edata["m"] loss = F.sum(r1.view(-1), 0) F.backward(loss) n_grad1 = F.grad(g.nodes["game"].data["h"]) ################################################################# # apply_edges() is called on all relation types ################################################################# F.attach_grad(n1) F.attach_grad(n2) F.attach_grad(n3) g.nodes["user"].data["h"] = n1 g.nodes["developer"].data["h"] = n2 g.nodes["game"].data["h"] = n3 F.attach_grad(x1) F.attach_grad(x2) F.attach_grad(x3) F.attach_grad(x4) g["plays"].edata["h"] = x1 g["follows"].edata["h"] = x2 g["develops"].edata["h"] = x3 g["wishes"].edata["h"] = x4 with F.record_grad(): g.apply_edges(builtin_msg("h", "h", "m")) r2 = g["plays"].edata["m"] loss = F.sum(r2.view(-1), 0) F.backward(loss) n_grad2 = F.grad(g.nodes["game"].data["h"]) # correctness check def _print_error(a, b): for i, (x, y) in enumerate( zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten()) ): if not np.allclose(x, y): print("@{} {} v.s. {}".format(i, x, y)) if not F.allclose(r1, r2): _print_error(r1, r2) assert F.allclose(r1, r2) if n_grad1 is not None or n_grad2 is not None: if not F.allclose(n_grad1, n_grad2): print("node grad") _print_error(n_grad1, n_grad2) assert F.allclose(n_grad1, n_grad2) target = ["u", "v", "e"] for lhs, rhs in product(target, target): if lhs == rhs: continue for binary_op in ["add", "sub", "mul", "div", "dot"]: print(lhs, rhs, binary_op) _test(lhs, rhs, binary_op) # Here we test heterograph with only single source node type because the format # of node feature is a tensor. @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now" ) @parametrize_idtype def test_heterograph_with_single_source_node_type_apply_edges(idtype): hg = create_random_hetero_with_single_source_node_type(idtype) hg.nodes["n1"].data["h"] = F.randn((hg.num_nodes("n1"), 1)) hg.nodes["n2"].data["h"] = F.randn((hg.num_nodes("n2"), 1)) hg.nodes["n3"].data["h"] = F.randn((hg.num_nodes("n3"), 1)) assert type(hg.srcdata["h"]) == torch.Tensor hg.apply_edges(fn.u_add_v("h", "h", "x")) if __name__ == "__main__": test_unary_copy_u() test_unary_copy_e() ================================================ FILE: tests/python/common/test_heterograph-index.py ================================================ import unittest import backend as F import dgl import pytest from dgl import DGLError from utils import parametrize_idtype def create_test_heterograph(idtype): # 3 users, 2 games, 2 developers # metagraph: # ('user', 'follows', 'user'), # ('user', 'plays', 'game'), # ('user', 'wishes', 'game'), # ('developer', 'develops', 'game')]) g = dgl.heterograph( { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]), ("user", "wishes", "game"): ([0, 2], [1, 0]), ("developer", "develops", "game"): ([0, 1], [0, 1]), }, idtype=idtype, device=F.ctx(), ) assert g.idtype == idtype assert g.device == F.ctx() return g @unittest.skipIf( F._default_context_str == "cpu", reason="Need gpu for this test" ) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Pinning graph outplace only supported for PyTorch", ) @parametrize_idtype def test_pin_memory(idtype): g = create_test_heterograph(idtype) g.nodes["user"].data["h"] = F.ones((3, 5)) g.nodes["game"].data["i"] = F.ones((2, 5)) g.edges["plays"].data["e"] = F.ones((4, 4)) g = g.to(F.cpu()) assert not g.is_pinned() # Test pinning a CPU graph. g._graph.pin_memory() assert not g.is_pinned() g._graph = g._graph.pin_memory() assert g.is_pinned() assert g.device == F.cpu() # when clone with a new (different) formats, e.g., g.formats("csc") # ensure the new graphs are not pinned assert not g.formats("csc").is_pinned() assert not g.formats("csr").is_pinned() # 'coo' formats is the default and thus not cloned assert g.formats("coo").is_pinned() # Test pinning a GPU graph will cause error raised. g1 = g.to(F.cuda()) with pytest.raises(DGLError): g1._graph.pin_memory() # Test pinning an empty homograph g2 = dgl.graph(([], [])) assert not g2.is_pinned() g2._graph = g2._graph.pin_memory() assert g2.is_pinned() # Test pinning heterograph with 0 edge of one relation type g3 = dgl.heterograph( {("a", "b", "c"): ([0, 1], [1, 2]), ("c", "d", "c"): ([], [])} ).astype(idtype) g3._graph = g3._graph.pin_memory() assert g3.is_pinned() if __name__ == "__main__": pass ================================================ FILE: tests/python/common/test_heterograph-kernel.py ================================================ from itertools import product import backend as F import dgl import dgl.function as fn import networkx as nx import numpy as np import pytest from utils import get_cases, parametrize_idtype def udf_copy_src(edges): return {"m": edges.src["u"]} def udf_copy_edge(edges): return {"m": edges.data["e"]} def udf_mean(nodes): return {"r2": F.mean(nodes.mailbox["m"], 1)} def udf_sum(nodes): return {"r2": F.sum(nodes.mailbox["m"], 1)} def udf_max(nodes): return {"r2": F.max(nodes.mailbox["m"], 1)} D1 = 5 D2 = 3 D3 = 4 D4 = 10 # NOTE(xiang): used to dot feature vector builtin = {"sum": fn.sum, "max": fn.max, "mean": fn.mean} udf_reduce = {"sum": udf_sum, "max": udf_max, "mean": udf_mean} fill_value = {"sum": 0, "max": float("-inf")} def generate_feature(g, broadcast="none", binary_op="none"): """Create graph with src, edge, dst feature. broadcast can be 'u', 'e', 'v', 'none' """ np.random.seed(31) nv = g.num_nodes() ne = g.num_edges() if binary_op == "dot": if broadcast == "e": u = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3, D4))) e = F.tensor(np.random.uniform(-1, 1, (ne, D2, 1, D4))) v = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3, D4))) elif broadcast == "u": u = F.tensor(np.random.uniform(-1, 1, (nv, D2, 1, D4))) e = F.tensor(np.random.uniform(-1, 1, (ne, D1, D2, D3, D4))) v = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3, D4))) elif broadcast == "v": u = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3, D4))) e = F.tensor(np.random.uniform(-1, 1, (ne, D1, D2, D3, D4))) v = F.tensor(np.random.uniform(-1, 1, (nv, D2, 1, D4))) else: u = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3, D4))) e = F.tensor(np.random.uniform(-1, 1, (ne, D1, D2, D3, D4))) v = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3, D4))) else: if broadcast == "e": u = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3))) e = F.tensor(np.random.uniform(-1, 1, (ne, D2, 1))) v = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3))) elif broadcast == "u": u = F.tensor(np.random.uniform(-1, 1, (nv, D2, 1))) e = F.tensor(np.random.uniform(-1, 1, (ne, D1, D2, D3))) v = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3))) elif broadcast == "v": u = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3))) e = F.tensor(np.random.uniform(-1, 1, (ne, D1, D2, D3))) v = F.tensor(np.random.uniform(-1, 1, (nv, D2, 1))) else: u = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3))) e = F.tensor(np.random.uniform(-1, 1, (ne, D1, D2, D3))) v = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3))) return ( F.astype(u, F.float32), F.astype(v, F.float32), F.astype(e, F.float32), ) def test_copy_src_reduce(): def _test(red, partial): g = dgl.from_networkx(nx.erdos_renyi_graph(100, 0.1)) # NOTE(zihao): add self-loop to avoid zero-degree nodes. # https://github.com/dmlc/dgl/issues/761 g.add_edges(g.nodes(), g.nodes()) g = g.to(F.ctx()) hu, hv, he = generate_feature(g, "none", "none") if partial: nid = F.tensor(list(range(0, 100, 2)), g.idtype) g.ndata["u"] = F.attach_grad(F.clone(hu)) g.ndata["v"] = F.attach_grad(F.clone(hv)) g.edata["e"] = F.attach_grad(F.clone(he)) with F.record_grad(): if partial: g.pull( nid, fn.copy_u(u="u", out="m"), builtin[red](msg="m", out="r1"), ) else: g.update_all( fn.copy_u(u="u", out="m"), builtin[red](msg="m", out="r1") ) r1 = g.ndata["r1"] F.backward(F.reduce_sum(r1)) n_grad1 = F.grad(g.ndata["u"]) # reset grad g.ndata["u"] = F.attach_grad(F.clone(hu)) g.ndata["v"] = F.attach_grad(F.clone(hv)) g.edata["e"] = F.attach_grad(F.clone(he)) with F.record_grad(): if partial: g.pull(nid, udf_copy_src, udf_reduce[red]) else: g.update_all(udf_copy_src, udf_reduce[red]) r2 = g.ndata["r2"] F.backward(F.reduce_sum(r2)) n_grad2 = F.grad(g.ndata["u"]) def _print_error(a, b): print("ERROR: Test copy_src_{} partial: {}".format(red, partial)) for i, (x, y) in enumerate( zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten()) ): if not np.allclose(x, y): print("@{} {} v.s. {}".format(i, x, y)) if not F.allclose(r1, r2): _print_error(r1, r2) assert F.allclose(r1, r2) if not F.allclose(n_grad1, n_grad2): print("node grad") _print_error(n_grad1, n_grad2) assert F.allclose(n_grad1, n_grad2) _test("sum", False) _test("max", False) _test("mean", False) _test("sum", True) _test("max", True) _test("mean", True) def test_copy_edge_reduce(): def _test(red, partial): g = dgl.from_networkx(nx.erdos_renyi_graph(100, 0.1)) # NOTE(zihao): add self-loop to avoid zero-degree nodes. g.add_edges(g.nodes(), g.nodes()) g = g.to(F.ctx()) hu, hv, he = generate_feature(g, "none", "none") if partial: nid = F.tensor(list(range(0, 100, 2)), g.idtype) g.ndata["u"] = F.attach_grad(F.clone(hu)) g.ndata["v"] = F.attach_grad(F.clone(hv)) g.edata["e"] = F.attach_grad(F.clone(he)) with F.record_grad(): if partial: g.pull( nid, fn.copy_e(e="e", out="m"), builtin[red](msg="m", out="r1"), ) else: g.update_all( fn.copy_e(e="e", out="m"), builtin[red](msg="m", out="r1") ) r1 = g.ndata["r1"] F.backward(F.reduce_sum(r1)) e_grad1 = F.grad(g.edata["e"]) # reset grad g.ndata["u"] = F.attach_grad(F.clone(hu)) g.ndata["v"] = F.attach_grad(F.clone(hv)) g.edata["e"] = F.attach_grad(F.clone(he)) with F.record_grad(): if partial: g.pull(nid, udf_copy_edge, udf_reduce[red]) else: g.update_all(udf_copy_edge, udf_reduce[red]) r2 = g.ndata["r2"] F.backward(F.reduce_sum(r2)) e_grad2 = F.grad(g.edata["e"]) def _print_error(a, b): print("ERROR: Test copy_edge_{} partial: {}".format(red, partial)) return for i, (x, y) in enumerate( zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten()) ): if not np.allclose(x, y): print("@{} {} v.s. {}".format(i, x, y)) if not F.allclose(r1, r2): _print_error(r1, r2) assert F.allclose(r1, r2) if not F.allclose(e_grad1, e_grad2): print("edge gradient") _print_error(e_grad1, e_grad2) assert F.allclose(e_grad1, e_grad2) _test("sum", False) _test("max", False) _test("mean", False) _test("sum", True) _test("max", True) _test("mean", True) def test_all_binary_builtins(): def _test(g, lhs, rhs, binary_op, reducer, partial, nid, broadcast="none"): # initialize node/edge features with uniform(-1, 1) hu, hv, he = generate_feature(g, broadcast, binary_op) if binary_op == "div": # op = div # lhs range: [-1, 1] # rhs range: [1, 2] # result range: [-1, 1] if rhs == "u": hu = (hu + 3) / 2 elif rhs == "v": hv = (hv + 3) / 2 elif rhs == "e": he = (he + 3) / 2 if binary_op == "add" or binary_op == "sub": # op = add, sub # lhs range: [-1/2, 1/2] # rhs range: [-1/2, 1/2] # result range: [-1, 1] hu = hu / 2 hv = hv / 2 he = he / 2 g.ndata["u"] = F.attach_grad(F.clone(hu)) g.ndata["v"] = F.attach_grad(F.clone(hv)) g.edata["e"] = F.attach_grad(F.clone(he)) builtin_msg_name = "{}_{}_{}".format(lhs, binary_op, rhs) builtin_msg = getattr(fn, builtin_msg_name) builtin_red = getattr(fn, reducer) def target_feature_switch(g, target): if target == "u": return g.ndata["u"] elif target == "v": return g.ndata["v"] else: return g.edata["e"] with F.record_grad(): if partial: g.pull(nid, builtin_msg(lhs, rhs, "m"), builtin_red("m", "r1")) else: g.update_all(builtin_msg(lhs, rhs, "m"), builtin_red("m", "r1")) r1 = g.ndata.pop("r1") F.backward(F.reduce_sum(r1)) lhs_grad_1 = F.grad(target_feature_switch(g, lhs)) rhs_grad_1 = F.grad(target_feature_switch(g, rhs)) # reset grad g.ndata["u"] = F.attach_grad(F.clone(hu)) g.ndata["v"] = F.attach_grad(F.clone(hv)) g.edata["e"] = F.attach_grad(F.clone(he)) def target_switch(edges, target): if target == "u": return edges.src elif target == "v": return edges.dst elif target == "e": return edges.data else: assert 0, "Unknown target {}".format(target) def mfunc(edges): op = getattr(F, binary_op) lhs_data = target_switch(edges, lhs)[lhs] rhs_data = target_switch(edges, rhs)[rhs] # NOTE(zihao): we need to do batched broadcast # e.g. (68, 3, 1) op (68, 5, 3, 4) while F.ndim(lhs_data) < F.ndim(rhs_data): lhs_data = F.unsqueeze(lhs_data, 1) while F.ndim(rhs_data) < F.ndim(lhs_data): rhs_data = F.unsqueeze(rhs_data, 1) return {"m": op(lhs_data, rhs_data)} def rfunc(nodes): op = getattr(F, reducer) return {"r2": op(nodes.mailbox["m"], 1)} with F.record_grad(): if partial: g.pull(nid, mfunc, rfunc) else: g.update_all(mfunc, rfunc) r2 = g.ndata.pop("r2") F.backward(F.reduce_sum(r2), F.tensor([1.0])) lhs_grad_2 = F.grad(target_feature_switch(g, lhs)) rhs_grad_2 = F.grad(target_feature_switch(g, rhs)) rtol = 1e-4 atol = 1e-4 def _print_error(a, b): print( "ERROR: Test {}_{}_{}_{} broadcast: {} partial: {}".format( lhs, binary_op, rhs, reducer, broadcast, partial ) ) return if lhs == "u": lhs_data = hu elif lhs == "v": lhs_data = hv elif lhs == "e": lhs_data = he if rhs == "u": rhs_data = hu elif rhs == "v": rhs_data = hv elif rhs == "e": rhs_data = he print("lhs", F.asnumpy(lhs_data).tolist()) print("rhs", F.asnumpy(rhs_data).tolist()) for i, (x, y) in enumerate( zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten()) ): if not np.allclose(x, y, rtol, atol): print("@{} {} v.s. {}".format(i, x, y)) if not F.allclose(r1, r2, rtol, atol): _print_error(r1, r2) assert F.allclose(r1, r2, rtol, atol) if not F.allclose(lhs_grad_1, lhs_grad_2, rtol, atol): print("left grad") _print_error(lhs_grad_1, lhs_grad_2) assert F.allclose(lhs_grad_1, lhs_grad_2, rtol, atol) if not F.allclose(rhs_grad_1, rhs_grad_2, rtol, atol): print("right grad") _print_error(rhs_grad_1, rhs_grad_2) assert F.allclose(rhs_grad_1, rhs_grad_2, rtol, atol) g = dgl.graph([]) g.add_nodes(20) # NOTE(zihao): add self-loop to avoid zero-degree nodes. g.add_edges(g.nodes(), g.nodes()) for i in range(2, 18): g.add_edges(0, i) g.add_edges(1, i) g.add_edges(i, 18) g.add_edges(i, 19) g.add_edges(18, 0) g.add_edges(18, 1) g.add_edges(19, 0) g.add_edges(19, 1) g = g.to(F.ctx()) nid = F.tensor([0, 1, 4, 5, 7, 12, 14, 15, 18, 19], g.idtype) target = ["u", "v", "e"] for lhs, rhs in product(target, target): if lhs == rhs: continue for binary_op in ["add", "sub", "mul", "div"]: for reducer in ["sum", "max", "min", "mean"]: for broadcast in ["none", lhs, rhs]: for partial in [False, True]: print(lhs, rhs, binary_op, reducer, broadcast, partial) _test( g, lhs, rhs, binary_op, reducer, partial, nid, broadcast=broadcast, ) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo-zero-degree"])) def test_mean_zero_degree(g, idtype): g = g.astype(idtype).to(F.ctx()) g.ndata["h"] = F.ones((g.num_nodes(), 3)) g.update_all(fn.copy_u("h", "m"), fn.mean("m", "x")) deg = F.asnumpy(g.in_degrees()) v = F.tensor(np.where(deg == 0)[0]) assert F.allclose(F.gather_row(g.ndata["x"], v), F.zeros((len(v), 3))) if __name__ == "__main__": test_copy_src_reduce() test_copy_edge_reduce() test_all_binary_builtins() ================================================ FILE: tests/python/common/test_heterograph-misc.py ================================================ import math import numbers import backend as F import dgl import networkx as nx import numpy as np import pytest import scipy.sparse as sp from dgl import DGLError # graph generation: a random graph with 10 nodes # and 20 edges. # - has self loop # - no multi edge def edge_pair_input(sort=False): if sort: src = [0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 6, 7, 7, 7, 9] dst = [4, 6, 9, 3, 5, 3, 7, 5, 8, 1, 3, 4, 9, 1, 9, 6, 2, 8, 9, 2] return src, dst else: src = [0, 0, 4, 5, 0, 4, 7, 4, 4, 3, 2, 7, 7, 5, 3, 2, 1, 9, 6, 1] dst = [9, 6, 3, 9, 4, 4, 9, 9, 1, 8, 3, 2, 8, 1, 5, 7, 3, 2, 6, 5] return src, dst def nx_input(): g = nx.DiGraph() src, dst = edge_pair_input() for i, e in enumerate(zip(src, dst)): g.add_edge(*e, id=i) return g def elist_input(): src, dst = edge_pair_input() return list(zip(src, dst)) def scipy_coo_input(): src, dst = edge_pair_input() return sp.coo_matrix((np.ones((20,)), (src, dst)), shape=(10, 10)) def scipy_csr_input(): src, dst = edge_pair_input() csr = sp.coo_matrix((np.ones((20,)), (src, dst)), shape=(10, 10)).tocsr() csr.sort_indices() # src = [0 0 0 1 1 2 2 3 3 4 4 4 4 5 5 6 7 7 7 9] # dst = [4 6 9 3 5 3 7 5 8 1 3 4 9 1 9 6 2 8 9 2] return csr def gen_by_mutation(): g = dgl.graph([]) src, dst = edge_pair_input() g.add_nodes(10) g.add_edges(src, dst) return g def test_query(): def _test_one(g): assert g.num_nodes() == 10 assert g.num_edges() == 20 for i in range(10): assert g.has_nodes(i) assert not g.has_nodes(11) assert F.allclose(g.has_nodes([0, 2, 10, 11]), F.tensor([1, 1, 0, 0])) src, dst = edge_pair_input() for u, v in zip(src, dst): assert g.has_edges_between(u, v) assert not g.has_edges_between(0, 0) assert F.allclose( g.has_edges_between([0, 0, 3], [0, 9, 8]), F.tensor([0, 1, 1]) ) assert set(F.asnumpy(g.predecessors(9))) == set([0, 5, 7, 4]) assert set(F.asnumpy(g.successors(2))) == set([7, 3]) assert g.edge_ids(4, 4) == 5 assert F.allclose(g.edge_ids([4, 0], [4, 9]), F.tensor([5, 0])) src, dst = g.find_edges([3, 6, 5]) assert F.allclose(src, F.tensor([5, 7, 4])) assert F.allclose(dst, F.tensor([9, 9, 4])) src, dst, eid = g.in_edges(9, form="all") tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid))) assert set(tup) == set([(0, 9, 0), (5, 9, 3), (7, 9, 6), (4, 9, 7)]) src, dst, eid = g.in_edges( [9, 0, 8], form="all" ) # test node#0 has no in edges tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid))) assert set(tup) == set( [(0, 9, 0), (5, 9, 3), (7, 9, 6), (4, 9, 7), (3, 8, 9), (7, 8, 12)] ) src, dst, eid = g.out_edges(0, form="all") tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid))) assert set(tup) == set([(0, 9, 0), (0, 6, 1), (0, 4, 4)]) src, dst, eid = g.out_edges( [0, 4, 8], form="all" ) # test node#8 has no out edges tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid))) assert set(tup) == set( [ (0, 9, 0), (0, 6, 1), (0, 4, 4), (4, 3, 2), (4, 4, 5), (4, 9, 7), (4, 1, 8), ] ) src, dst, eid = g.edges("all", "eid") t_src, t_dst = edge_pair_input() t_tup = list(zip(t_src, t_dst, list(range(20)))) tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid))) assert set(tup) == set(t_tup) assert list(F.asnumpy(eid)) == list(range(20)) src, dst, eid = g.edges("all", "srcdst") t_src, t_dst = edge_pair_input() t_tup = list(zip(t_src, t_dst, list(range(20)))) tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid))) assert set(tup) == set(t_tup) assert list(F.asnumpy(src)) == sorted(list(F.asnumpy(src))) assert g.in_degrees(0) == 0 assert g.in_degrees(9) == 4 assert F.allclose(g.in_degrees([0, 9]), F.tensor([0, 4])) assert g.out_degrees(8) == 0 assert g.out_degrees(9) == 1 assert F.allclose(g.out_degrees([8, 9]), F.tensor([0, 1])) assert np.array_equal( F.sparse_to_numpy(g.adj_external(transpose=True)), scipy_coo_input().toarray().T, ) assert np.array_equal( F.sparse_to_numpy(g.adj_external(transpose=False)), scipy_coo_input().toarray(), ) def _test(g): # test twice to see whether the cached format works or not _test_one(g) _test_one(g) def _test_csr_one(g): assert g.num_nodes() == 10 assert g.num_edges() == 20 for i in range(10): assert g.has_nodes(i) assert not g.has_nodes(11) assert F.allclose(g.has_nodes([0, 2, 10, 11]), F.tensor([1, 1, 0, 0])) src, dst = edge_pair_input(sort=True) for u, v in zip(src, dst): assert g.has_edges_between(u, v) assert not g.has_edges_between(0, 0) assert F.allclose( g.has_edges_between([0, 0, 3], [0, 9, 8]), F.tensor([0, 1, 1]) ) assert set(F.asnumpy(g.predecessors(9))) == set([0, 5, 7, 4]) assert set(F.asnumpy(g.successors(2))) == set([7, 3]) # src = [0 0 0 1 1 2 2 3 3 4 4 4 4 5 5 6 7 7 7 9] # dst = [4 6 9 3 5 3 7 5 8 1 3 4 9 1 9 6 2 8 9 2] # eid = [0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9] assert g.edge_ids(4, 4) == 11 assert F.allclose(g.edge_ids([4, 0], [4, 9]), F.tensor([11, 2])) src, dst = g.find_edges([3, 6, 5]) assert F.allclose(src, F.tensor([1, 2, 2])) assert F.allclose(dst, F.tensor([3, 7, 3])) src, dst, eid = g.in_edges(9, form="all") tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid))) assert set(tup) == set([(0, 9, 2), (5, 9, 14), (7, 9, 18), (4, 9, 12)]) src, dst, eid = g.in_edges( [9, 0, 8], form="all" ) # test node#0 has no in edges tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid))) assert set(tup) == set( [ (0, 9, 2), (5, 9, 14), (7, 9, 18), (4, 9, 12), (3, 8, 8), (7, 8, 17), ] ) src, dst, eid = g.out_edges(0, form="all") tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid))) assert set(tup) == set([(0, 9, 2), (0, 6, 1), (0, 4, 0)]) src, dst, eid = g.out_edges( [0, 4, 8], form="all" ) # test node#8 has no out edges tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid))) assert set(tup) == set( [ (0, 9, 2), (0, 6, 1), (0, 4, 0), (4, 3, 10), (4, 4, 11), (4, 9, 12), (4, 1, 9), ] ) src, dst, eid = g.edges("all", "eid") t_src, t_dst = edge_pair_input(sort=True) t_tup = list(zip(t_src, t_dst, list(range(20)))) tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid))) assert set(tup) == set(t_tup) assert list(F.asnumpy(eid)) == list(range(20)) src, dst, eid = g.edges("all", "srcdst") t_src, t_dst = edge_pair_input(sort=True) t_tup = list(zip(t_src, t_dst, list(range(20)))) tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid))) assert set(tup) == set(t_tup) assert list(F.asnumpy(src)) == sorted(list(F.asnumpy(src))) assert g.in_degrees(0) == 0 assert g.in_degrees(9) == 4 assert F.allclose(g.in_degrees([0, 9]), F.tensor([0, 4])) assert g.out_degrees(8) == 0 assert g.out_degrees(9) == 1 assert F.allclose(g.out_degrees([8, 9]), F.tensor([0, 1])) assert np.array_equal( F.sparse_to_numpy(g.adj_external(transpose=True)), scipy_coo_input().toarray().T, ) assert np.array_equal( F.sparse_to_numpy(g.adj_external(transpose=False)), scipy_coo_input().toarray(), ) def _test_csr(g): # test twice to see whether the cached format works or not _test_csr_one(g) _test_csr_one(g) def _test_edge_ids(): g = gen_by_mutation() eids = g.edge_ids([4, 0], [4, 9]) assert eids.shape[0] == 2 eid = g.edge_ids(4, 4) assert isinstance(eid, numbers.Number) with pytest.raises(DGLError): eids = g.edge_ids([9, 0], [4, 9]) with pytest.raises(DGLError): eid = g.edge_ids(4, 5) g.add_edges(0, 4) eids = g.edge_ids([0, 0], [4, 9]) eid = g.edge_ids(0, 4) _test(gen_by_mutation()) _test(dgl.graph(elist_input())) _test(dgl.from_scipy(scipy_coo_input())) _test_csr(dgl.from_scipy(scipy_csr_input())) _test_edge_ids() def test_mutation(): g = dgl.graph([]) g = g.to(F.ctx()) # test add nodes with data g.add_nodes(5) g.add_nodes(5, {"h": F.ones((5, 2))}) ans = F.cat([F.zeros((5, 2)), F.ones((5, 2))], 0) assert F.allclose(ans, g.ndata["h"]) g.ndata["w"] = 2 * F.ones((10, 2)) assert F.allclose(2 * F.ones((10, 2)), g.ndata["w"]) # test add edges with data g.add_edges([2, 3], [3, 4]) g.add_edges([0, 1], [1, 2], {"m": F.ones((2, 2))}) ans = F.cat([F.zeros((2, 2)), F.ones((2, 2))], 0) assert F.allclose(ans, g.edata["m"]) def test_scipy_adjmat(): g = dgl.graph([]) g.add_nodes(10) g.add_edges(range(9), range(1, 10)) adj_0 = g.adj_external(scipy_fmt="csr") adj_1 = g.adj_external(scipy_fmt="coo") assert np.array_equal(adj_0.toarray(), adj_1.toarray()) adj_t0 = g.adj_external(transpose=False, scipy_fmt="csr") adj_t_1 = g.adj_external(transpose=False, scipy_fmt="coo") assert np.array_equal(adj_0.toarray(), adj_1.toarray()) def test_incmat(): g = dgl.graph([]) g.add_nodes(4) g.add_edges(0, 1) # 0 g.add_edges(0, 2) # 1 g.add_edges(0, 3) # 2 g.add_edges(2, 3) # 3 g.add_edges(1, 1) # 4 inc_in = F.sparse_to_numpy(g.incidence_matrix("in")) inc_out = F.sparse_to_numpy(g.incidence_matrix("out")) inc_both = F.sparse_to_numpy(g.incidence_matrix("both")) print(inc_in) print(inc_out) print(inc_both) assert np.allclose( inc_in, np.array( [ [0.0, 0.0, 0.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0, 1.0], [0.0, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 1.0, 1.0, 0.0], ] ), ) assert np.allclose( inc_out, np.array( [ [1.0, 1.0, 1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 1.0], [0.0, 0.0, 0.0, 1.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0], ] ), ) assert np.allclose( inc_both, np.array( [ [-1.0, -1.0, -1.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, -1.0, 0.0], [0.0, 0.0, 1.0, 1.0, 0.0], ] ), ) def test_find_edges(): g = dgl.graph([]) g.add_nodes(10) g.add_edges(range(9), range(1, 10)) e = g.find_edges([1, 3, 2, 4]) assert ( F.asnumpy(e[0][0]) == 1 and F.asnumpy(e[0][1]) == 3 and F.asnumpy(e[0][2]) == 2 and F.asnumpy(e[0][3]) == 4 ) assert ( F.asnumpy(e[1][0]) == 2 and F.asnumpy(e[1][1]) == 4 and F.asnumpy(e[1][2]) == 3 and F.asnumpy(e[1][3]) == 5 ) try: g.find_edges([10]) fail = False except DGLError: fail = True finally: assert fail def test_ismultigraph(): g = dgl.graph([]) g.add_nodes(10) assert g.is_multigraph == False g.add_edges([0], [0]) assert g.is_multigraph == False g.add_edges([1], [2]) assert g.is_multigraph == False g.add_edges([0, 2], [0, 3]) assert g.is_multigraph == True def test_hypersparse_query(): g = dgl.graph([]) g = g.to(F.ctx()) g.add_nodes(1000001) g.add_edges([0], [1]) for i in range(10): assert g.has_nodes(i) assert not g.has_nodes(1000002) assert g.edge_ids(0, 1) == 0 src, dst = g.find_edges([0]) src, dst, eid = g.in_edges(1, form="all") src, dst, eid = g.out_edges(0, form="all") src, dst = g.edges() assert g.in_degrees(0) == 0 assert g.in_degrees(1) == 1 assert g.out_degrees(0) == 1 assert g.out_degrees(1) == 0 def test_empty_data_initialized(): g = dgl.graph([]) g = g.to(F.ctx()) g.ndata["ha"] = F.tensor([]) g.add_nodes(1, {"hb": F.tensor([1])}) assert "ha" in g.ndata assert len(g.ndata["ha"]) == 1 def test_is_sorted(): u_src, u_dst = edge_pair_input(False) s_src, s_dst = edge_pair_input(True) u_src = F.tensor(u_src, dtype=F.int32) u_dst = F.tensor(u_dst, dtype=F.int32) s_src = F.tensor(s_src, dtype=F.int32) s_dst = F.tensor(s_dst, dtype=F.int32) src_sorted, dst_sorted = dgl.utils.is_sorted_srcdst(u_src, u_dst) assert src_sorted == False assert dst_sorted == False src_sorted, dst_sorted = dgl.utils.is_sorted_srcdst(s_src, s_dst) assert src_sorted == True assert dst_sorted == True src_sorted, dst_sorted = dgl.utils.is_sorted_srcdst(u_src, u_dst) assert src_sorted == False assert dst_sorted == False src_sorted, dst_sorted = dgl.utils.is_sorted_srcdst(s_src, u_dst) assert src_sorted == True assert dst_sorted == False def test_default_types(): dg = dgl.graph([]) g = dgl.graph(([], [])) assert dg.ntypes == g.ntypes assert dg.etypes == g.etypes def test_formats(): g = dgl.rand_graph(10, 20) # in_degrees works if coo or csc available # out_degrees works if coo or csr available try: g.in_degrees() g.out_degrees() g.formats("coo").in_degrees() g.formats("coo").out_degrees() g.formats("csc").in_degrees() g.formats("csr").out_degrees() fail = False except DGLError: fail = True finally: assert not fail # in_degrees NOT works if csc available only try: g.formats("csc").out_degrees() fail = True except DGLError: fail = False finally: assert not fail # out_degrees NOT works if csr available only try: g.formats("csr").in_degrees() fail = True except DGLError: fail = False finally: assert not fail # If the intersection of created formats and allowed formats is # not empty, then retain the intersection. # Case1: intersection is not empty and intersected is equal to # created formats. g = g.formats(["coo", "csr"]) g.create_formats_() g = g.formats(["coo", "csr", "csc"]) assert sorted(g.formats()["created"]) == sorted(["coo", "csr"]) assert sorted(g.formats()["not created"]) == sorted(["csc"]) # Case2: intersection is not empty and intersected is not equal # to created formats. g = g.formats(["coo", "csr"]) g.create_formats_() g = g.formats(["coo", "csc"]) assert sorted(g.formats()["created"]) == sorted(["coo"]) assert sorted(g.formats()["not created"]) == sorted(["csc"]) # If the intersection of created formats and allowed formats is # empty, then create a format in the order of `coo` -> `csr` -> # `csc`. # Case1: intersection is empty and just one format is allowed. g = g.formats(["coo", "csr"]) g.create_formats_() g = g.formats(["csc"]) assert sorted(g.formats()["created"]) == sorted(["csc"]) assert sorted(g.formats()["not created"]) == sorted([]) # Case2: intersection is empty and more than one format is allowed. g = g.formats("csc") g.create_formats_() g = g.formats(["csr", "coo"]) assert sorted(g.formats()["created"]) == sorted(["coo"]) assert sorted(g.formats()["not created"]) == sorted(["csr"]) if __name__ == "__main__": test_query() test_mutation() test_scipy_adjmat() test_incmat() test_find_edges() test_hypersparse_query() test_is_sorted() test_default_types() test_formats() ================================================ FILE: tests/python/common/test_heterograph-pickle.py ================================================ import io import pickle import unittest import backend as F import dgl import dgl.function as fn import networkx as nx import pytest import scipy.sparse as ssp from dgl.graph_index import create_graph_index from dgl.utils import toindex from utils import ( assert_is_identical, assert_is_identical_hetero, check_graph_equal, get_cases, parametrize_idtype, ) def _assert_is_identical_nodeflow(nf1, nf2): assert nf1.num_nodes() == nf2.num_nodes() src, dst = nf1.all_edges() src2, dst2 = nf2.all_edges() assert F.array_equal(src, src2) assert F.array_equal(dst, dst2) assert nf1.num_layers == nf2.num_layers for i in range(nf1.num_layers): assert nf1.layer_size(i) == nf2.layer_size(i) assert nf1.layers[i].data.keys() == nf2.layers[i].data.keys() for k in nf1.layers[i].data: assert F.allclose(nf1.layers[i].data[k], nf2.layers[i].data[k]) assert nf1.num_blocks == nf2.num_blocks for i in range(nf1.num_blocks): assert nf1.block_size(i) == nf2.block_size(i) assert nf1.blocks[i].data.keys() == nf2.blocks[i].data.keys() for k in nf1.blocks[i].data: assert F.allclose(nf1.blocks[i].data[k], nf2.blocks[i].data[k]) def _assert_is_identical_batchedgraph(bg1, bg2): assert_is_identical(bg1, bg2) assert bg1.batch_size == bg2.batch_size assert bg1.batch_num_nodes == bg2.batch_num_nodes assert bg1.batch_num_edges == bg2.batch_num_edges def _assert_is_identical_batchedhetero(bg1, bg2): assert_is_identical_hetero(bg1, bg2) for ntype in bg1.ntypes: assert bg1.batch_num_nodes(ntype) == bg2.batch_num_nodes(ntype) for canonical_etype in bg1.canonical_etypes: assert bg1.batch_num_edges(canonical_etype) == bg2.batch_num_edges( canonical_etype ) def _assert_is_identical_index(i1, i2): assert i1.slice_data() == i2.slice_data() assert F.array_equal(i1.tousertensor(), i2.tousertensor()) def _reconstruct_pickle(obj): f = io.BytesIO() pickle.dump(obj, f) f.seek(0) obj = pickle.load(f) f.close() return obj def test_pickling_index(): # normal index i = toindex([1, 2, 3]) i.tousertensor() i.todgltensor() # construct a dgl tensor which is unpicklable i2 = _reconstruct_pickle(i) _assert_is_identical_index(i, i2) # slice index i = toindex(slice(5, 10)) i2 = _reconstruct_pickle(i) _assert_is_identical_index(i, i2) def test_pickling_graph_index(): gi = create_graph_index(None, False) gi.add_nodes(3) src_idx = toindex([0, 0]) dst_idx = toindex([1, 2]) gi.add_edges(src_idx, dst_idx) gi2 = _reconstruct_pickle(gi) assert gi2.num_nodes() == gi.num_nodes() src_idx2, dst_idx2, _ = gi2.edges() assert F.array_equal(src_idx.tousertensor(), src_idx2.tousertensor()) assert F.array_equal(dst_idx.tousertensor(), dst_idx2.tousertensor()) def _global_message_func(nodes): return {"x": nodes.data["x"]} @unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented") @parametrize_idtype @pytest.mark.parametrize( "g", get_cases(exclude=["dglgraph", "two_hetero_batch"]) ) def test_pickling_graph(g, idtype): g = g.astype(idtype) new_g = _reconstruct_pickle(g) check_graph_equal(g, new_g, check_feature=True) @unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented") def test_pickling_batched_heterograph(): # copied from test_heterograph.create_test_heterograph() g = dgl.heterograph( { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]), ("user", "wishes", "game"): ([0, 2], [1, 0]), ("developer", "develops", "game"): ([0, 1], [0, 1]), } ) g2 = dgl.heterograph( { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]), ("user", "wishes", "game"): ([0, 2], [1, 0]), ("developer", "develops", "game"): ([0, 1], [0, 1]), } ) g.nodes["user"].data["u_h"] = F.randn((3, 4)) g.nodes["game"].data["g_h"] = F.randn((2, 5)) g.edges["plays"].data["p_h"] = F.randn((4, 6)) g2.nodes["user"].data["u_h"] = F.randn((3, 4)) g2.nodes["game"].data["g_h"] = F.randn((2, 5)) g2.edges["plays"].data["p_h"] = F.randn((4, 6)) bg = dgl.batch([g, g2]) new_bg = _reconstruct_pickle(bg) check_graph_equal(bg, new_bg) @unittest.skipIf( F._default_context_str == "gpu", reason="GPU edge_subgraph w/ relabeling not implemented", ) def test_pickling_subgraph(): f1 = io.BytesIO() f2 = io.BytesIO() g = dgl.rand_graph(10000, 100000) g.ndata["x"] = F.randn((10000, 4)) g.edata["x"] = F.randn((100000, 5)) pickle.dump(g, f1) sg = g.subgraph([0, 1]) sgx = sg.ndata["x"] # materialize pickle.dump(sg, f2) # TODO(BarclayII): How should I test that the size of the subgraph pickle file should not # be as large as the size of the original pickle file? assert f1.tell() > f2.tell() * 50 f2.seek(0) f2.truncate() sgx = sg.edata["x"] # materialize pickle.dump(sg, f2) assert f1.tell() > f2.tell() * 50 f2.seek(0) f2.truncate() sg = g.edge_subgraph([0]) sgx = sg.edata["x"] # materialize pickle.dump(sg, f2) assert f1.tell() > f2.tell() * 50 f2.seek(0) f2.truncate() sgx = sg.ndata["x"] # materialize pickle.dump(sg, f2) assert f1.tell() > f2.tell() * 50 f1.close() f2.close() @unittest.skipIf(F._default_context_str != "gpu", reason="Need GPU for pin") @unittest.skipIf( dgl.backend.backend_name == "tensorflow", reason="TensorFlow create graph on gpu when unpickle", ) @parametrize_idtype def test_pickling_is_pinned(idtype): from copy import deepcopy g = dgl.rand_graph(10, 20, idtype=idtype, device=F.cpu()) hg = dgl.heterograph( { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]), ("user", "wishes", "game"): ([0, 2], [1, 0]), ("developer", "develops", "game"): ([0, 1], [0, 1]), }, idtype=idtype, device=F.cpu(), ) for graph in [g, hg]: assert not graph.is_pinned() graph.pin_memory_() assert graph.is_pinned() pg = _reconstruct_pickle(graph) assert pg.is_pinned() pg.unpin_memory_() dg = deepcopy(graph) assert dg.is_pinned() dg.unpin_memory_() graph.unpin_memory_() if __name__ == "__main__": test_pickling_index() test_pickling_graph_index() test_pickling_frame() test_pickling_graph() test_pickling_nodeflow() test_pickling_batched_graph() test_pickling_heterograph() test_pickling_batched_heterograph() test_pickling_is_pinned() ================================================ FILE: tests/python/common/test_heterograph-remove.py ================================================ import backend as F import dgl import numpy as np from utils import parametrize_idtype def create_graph(idtype, num_node): g = dgl.graph([]) g = g.astype(idtype).to(F.ctx()) g.add_nodes(num_node) return g @parametrize_idtype def test_node_removal(idtype): g = create_graph(idtype, 10) g.add_edges(0, 0) assert g.num_nodes() == 10 g.ndata["id"] = F.arange(0, 10) # remove nodes g.remove_nodes(range(4, 7)) assert g.num_nodes() == 7 assert F.array_equal(g.ndata["id"], F.tensor([0, 1, 2, 3, 7, 8, 9])) assert dgl.NID not in g.ndata assert dgl.EID not in g.edata # add nodes g.add_nodes(3) assert g.num_nodes() == 10 assert F.array_equal( g.ndata["id"], F.tensor([0, 1, 2, 3, 7, 8, 9, 0, 0, 0]) ) # remove nodes g.remove_nodes(range(1, 4), store_ids=True) assert g.num_nodes() == 7 assert F.array_equal(g.ndata["id"], F.tensor([0, 7, 8, 9, 0, 0, 0])) assert dgl.NID in g.ndata assert dgl.EID in g.edata @parametrize_idtype def test_multigraph_node_removal(idtype): g = create_graph(idtype, 5) for i in range(5): g.add_edges(i, i) g.add_edges(i, i) assert g.num_nodes() == 5 assert g.num_edges() == 10 # remove nodes g.remove_nodes([2, 3]) assert g.num_nodes() == 3 assert g.num_edges() == 6 # add nodes g.add_nodes(1) g.add_edges(1, 1) g.add_edges(1, 1) assert g.num_nodes() == 4 assert g.num_edges() == 8 # remove nodes g.remove_nodes([0]) assert g.num_nodes() == 3 assert g.num_edges() == 6 @parametrize_idtype def test_multigraph_edge_removal(idtype): g = create_graph(idtype, 5) for i in range(5): g.add_edges(i, i) g.add_edges(i, i) assert g.num_nodes() == 5 assert g.num_edges() == 10 # remove edges g.remove_edges([2, 3]) assert g.num_nodes() == 5 assert g.num_edges() == 8 # add edges g.add_edges(1, 1) g.add_edges(1, 1) assert g.num_nodes() == 5 assert g.num_edges() == 10 # remove edges g.remove_edges([0, 1]) assert g.num_nodes() == 5 assert g.num_edges() == 8 @parametrize_idtype def test_edge_removal(idtype): g = create_graph(idtype, 5) for i in range(5): for j in range(5): g.add_edges(i, j) g.edata["id"] = F.arange(0, 25) # remove edges g.remove_edges(range(13, 20)) assert g.num_nodes() == 5 assert g.num_edges() == 18 assert F.array_equal( g.edata["id"], F.tensor(list(range(13)) + list(range(20, 25))) ) assert dgl.NID not in g.ndata assert dgl.EID not in g.edata # add edges g.add_edges(3, 3) assert g.num_nodes() == 5 assert g.num_edges() == 19 assert F.array_equal( g.edata["id"], F.tensor(list(range(13)) + list(range(20, 25)) + [0]) ) # remove edges g.remove_edges(range(2, 10), store_ids=True) assert g.num_nodes() == 5 assert g.num_edges() == 11 assert F.array_equal( g.edata["id"], F.tensor([0, 1, 10, 11, 12, 20, 21, 22, 23, 24, 0]) ) assert dgl.EID in g.edata @parametrize_idtype def test_node_and_edge_removal(idtype): g = create_graph(idtype, 10) for i in range(10): for j in range(10): g.add_edges(i, j) g.edata["id"] = F.arange(0, 100) assert g.num_nodes() == 10 assert g.num_edges() == 100 # remove nodes g.remove_nodes([2, 4]) assert g.num_nodes() == 8 assert g.num_edges() == 64 # remove edges g.remove_edges(range(10, 20)) assert g.num_nodes() == 8 assert g.num_edges() == 54 # add nodes g.add_nodes(2) assert g.num_nodes() == 10 assert g.num_edges() == 54 # add edges for i in range(8, 10): for j in range(8, 10): g.add_edges(i, j) assert g.num_nodes() == 10 assert g.num_edges() == 58 # remove edges g.remove_edges(range(10, 20)) assert g.num_nodes() == 10 assert g.num_edges() == 48 @parametrize_idtype def test_node_frame(idtype): g = create_graph(idtype, 10) data = np.random.rand(10, 3) new_data = data.take([0, 1, 2, 7, 8, 9], axis=0) g.ndata["h"] = F.tensor(data) # remove nodes g.remove_nodes(range(3, 7)) assert F.allclose(g.ndata["h"], F.tensor(new_data)) @parametrize_idtype def test_edge_frame(idtype): g = create_graph(idtype, 10) g.add_edges(list(range(10)), list(range(1, 10)) + [0]) data = np.random.rand(10, 3) new_data = data.take([0, 1, 2, 7, 8, 9], axis=0) g.edata["h"] = F.tensor(data) # remove edges g.remove_edges(range(3, 7)) assert F.allclose(g.edata["h"], F.tensor(new_data)) @parametrize_idtype def test_issue1287(idtype): # reproduce https://github.com/dmlc/dgl/issues/1287. # setting features after remove nodes g = create_graph(idtype, 5) g.add_edges([0, 2, 3, 1, 1], [1, 0, 3, 1, 0]) g.remove_nodes([0, 1]) g.ndata["h"] = F.randn((g.num_nodes(), 3)) g.edata["h"] = F.randn((g.num_edges(), 2)) # remove edges g = create_graph(idtype, 5) g.add_edges([0, 2, 3, 1, 1], [1, 0, 3, 1, 0]) g.remove_edges([0, 1]) g = g.to(F.ctx()) g.ndata["h"] = F.randn((g.num_nodes(), 3)) g.edata["h"] = F.randn((g.num_edges(), 2)) if __name__ == "__main__": test_node_removal() test_edge_removal() test_multigraph_node_removal() test_multigraph_edge_removal() test_node_and_edge_removal() test_node_frame() test_edge_frame() test_frame_size() ================================================ FILE: tests/python/common/test_heterograph-shared-memory.py ================================================ import io import multiprocessing as mp import os import pickle import unittest import backend as F import dgl import dgl.function as fn import networkx as nx import scipy.sparse as ssp from dgl.graph_index import create_graph_index from dgl.utils import toindex from utils import parametrize_idtype def create_test_graph(idtype): g = dgl.heterograph( ( { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]), ("user", "wishes", "game"): ([0, 2], [1, 0]), ("developer", "develops", "game"): ([0, 1], [0, 1]), } ), idtype=idtype, ) return g def _assert_is_identical_hetero(g, g2): assert g.ntypes == g2.ntypes assert g.canonical_etypes == g2.canonical_etypes # check if two metagraphs are identical for edges, features in g.metagraph().edges(keys=True).items(): assert g2.metagraph().edges(keys=True)[edges] == features # check if node ID spaces and feature spaces are equal for ntype in g.ntypes: assert g.num_nodes(ntype) == g2.num_nodes(ntype) # check if edge ID spaces and feature spaces are equal for etype in g.canonical_etypes: src, dst = g.all_edges(etype=etype, order="eid") src2, dst2 = g2.all_edges(etype=etype, order="eid") assert F.array_equal(src, src2) assert F.array_equal(dst, dst2) @unittest.skipIf( dgl.backend.backend_name == "tensorflow", reason="Not support tensorflow for now", ) @parametrize_idtype def test_single_process(idtype): hg = create_test_graph(idtype=idtype) hg_share = hg.shared_memory("hg") hg_rebuild = dgl.hetero_from_shared_memory("hg") hg_save_again = hg_rebuild.shared_memory("hg") _assert_is_identical_hetero(hg, hg_share) _assert_is_identical_hetero(hg, hg_rebuild) _assert_is_identical_hetero(hg, hg_save_again) def sub_proc(hg_origin, name): hg_rebuild = dgl.hetero_from_shared_memory(name) hg_save_again = hg_rebuild.shared_memory(name) _assert_is_identical_hetero(hg_origin, hg_rebuild) _assert_is_identical_hetero(hg_origin, hg_save_again) @unittest.skipIf( dgl.backend.backend_name == "tensorflow", reason="Not support tensorflow for now", ) @parametrize_idtype def test_multi_process(idtype): hg = create_test_graph(idtype=idtype) hg_share = hg.shared_memory("hg1") p = mp.Process(target=sub_proc, args=(hg, "hg1")) p.start() p.join() @unittest.skipIf( F._default_context_str == "cpu", reason="Need gpu for this test" ) @unittest.skipIf( dgl.backend.backend_name == "tensorflow", reason="Not support tensorflow for now", ) def test_copy_from_gpu(): hg = create_test_graph(idtype=F.int32) hg_gpu = hg.to(F.cuda()) hg_share = hg_gpu.shared_memory("hg_gpu") p = mp.Process(target=sub_proc, args=(hg, "hg_gpu")) p.start() p.join() # TODO: Test calling shared_memory with Blocks (a subclass of HeteroGraph) if __name__ == "__main__": test_single_process(F.int64) test_multi_process(F.int32) test_copy_from_gpu() ================================================ FILE: tests/python/common/test_heterograph-specialization.py ================================================ import backend as F import dgl import dgl.function as fn import numpy as np import scipy.sparse as sp from utils import parametrize_idtype D = 5 def generate_graph(idtype): g = dgl.graph([]) g = g.astype(idtype).to(F.ctx()) g.add_nodes(10) # create a graph where 0 is the source and 9 is the sink for i in range(1, 9): g.add_edges(0, i) g.add_edges(i, 9) # add a back flow from 9 to 0 g.add_edges(9, 0) g.ndata.update({"f1": F.randn((10,)), "f2": F.randn((10, D))}) weights = F.randn((17,)) g.edata.update({"e1": weights, "e2": F.unsqueeze(weights, 1)}) return g @parametrize_idtype def test_v2v_update_all(idtype): def _test(fld): def message_func(edges): return {"m": edges.src[fld]} def message_func_edge(edges): if len(edges.src[fld].shape) == 1: return {"m": edges.src[fld] * edges.data["e1"]} else: return {"m": edges.src[fld] * edges.data["e2"]} def reduce_func(nodes): return {fld: F.sum(nodes.mailbox["m"], 1)} def apply_func(nodes): return {fld: 2 * nodes.data[fld]} g = generate_graph(idtype) # update all v1 = g.ndata[fld] g.update_all( fn.copy_u(u=fld, out="m"), fn.sum(msg="m", out=fld), apply_func ) v2 = g.ndata[fld] g.ndata.update({fld: v1}) g.update_all(message_func, reduce_func, apply_func) v3 = g.ndata[fld] assert F.allclose(v2, v3) # update all with edge weights v1 = g.ndata[fld] g.update_all( fn.u_mul_e(fld, "e1", "m"), fn.sum(msg="m", out=fld), apply_func ) v2 = g.ndata[fld] g.ndata.update({fld: v1}) g.update_all(message_func_edge, reduce_func, apply_func) v4 = g.ndata[fld] assert F.allclose(v2, v4) # test 1d node features _test("f1") # test 2d node features _test("f2") @parametrize_idtype def test_v2v_snr(idtype): u = F.tensor([0, 0, 0, 3, 4, 9], idtype) v = F.tensor([1, 2, 3, 9, 9, 0], idtype) def _test(fld): def message_func(edges): return {"m": edges.src[fld]} def message_func_edge(edges): if len(edges.src[fld].shape) == 1: return {"m": edges.src[fld] * edges.data["e1"]} else: return {"m": edges.src[fld] * edges.data["e2"]} def reduce_func(nodes): return {fld: F.sum(nodes.mailbox["m"], 1)} def apply_func(nodes): return {fld: 2 * nodes.data[fld]} g = generate_graph(idtype) # send and recv v1 = g.ndata[fld] g.send_and_recv( (u, v), fn.copy_u(u=fld, out="m"), fn.sum(msg="m", out=fld), apply_func, ) v2 = g.ndata[fld] g.ndata.update({fld: v1}) g.send_and_recv((u, v), message_func, reduce_func, apply_func) v3 = g.ndata[fld] assert F.allclose(v2, v3) # send and recv with edge weights v1 = g.ndata[fld] g.send_and_recv( (u, v), fn.u_mul_e(fld, "e1", "m"), fn.sum(msg="m", out=fld), apply_func, ) v2 = g.ndata[fld] g.ndata.update({fld: v1}) g.send_and_recv((u, v), message_func_edge, reduce_func, apply_func) v4 = g.ndata[fld] assert F.allclose(v2, v4) # test 1d node features _test("f1") # test 2d node features _test("f2") @parametrize_idtype def test_v2v_pull(idtype): nodes = F.tensor([1, 2, 3, 9], idtype) def _test(fld): def message_func(edges): return {"m": edges.src[fld]} def message_func_edge(edges): if len(edges.src[fld].shape) == 1: return {"m": edges.src[fld] * edges.data["e1"]} else: return {"m": edges.src[fld] * edges.data["e2"]} def reduce_func(nodes): return {fld: F.sum(nodes.mailbox["m"], 1)} def apply_func(nodes): return {fld: 2 * nodes.data[fld]} g = generate_graph(idtype) # send and recv v1 = g.ndata[fld] g.pull( nodes, fn.copy_u(u=fld, out="m"), fn.sum(msg="m", out=fld), apply_func, ) v2 = g.ndata[fld] g.ndata[fld] = v1 g.pull(nodes, message_func, reduce_func, apply_func) v3 = g.ndata[fld] assert F.allclose(v2, v3) # send and recv with edge weights v1 = g.ndata[fld] g.pull( nodes, fn.u_mul_e(fld, "e1", "m"), fn.sum(msg="m", out=fld), apply_func, ) v2 = g.ndata[fld] g.ndata[fld] = v1 g.pull(nodes, message_func_edge, reduce_func, apply_func) v4 = g.ndata[fld] assert F.allclose(v2, v4) # test 1d node features _test("f1") # test 2d node features _test("f2") @parametrize_idtype def test_update_all_multi_fallback(idtype): # create a graph with zero in degree nodes g = dgl.graph([]) g = g.astype(idtype).to(F.ctx()) g.add_nodes(10) for i in range(1, 9): g.add_edges(0, i) g.add_edges(i, 9) g.ndata["h"] = F.randn((10, D)) g.edata["w1"] = F.randn((16,)) g.edata["w2"] = F.randn((16, D)) def _mfunc_hxw1(edges): return {"m1": edges.src["h"] * F.unsqueeze(edges.data["w1"], 1)} def _mfunc_hxw2(edges): return {"m2": edges.src["h"] * edges.data["w2"]} def _rfunc_m1(nodes): return {"o1": F.sum(nodes.mailbox["m1"], 1)} def _rfunc_m2(nodes): return {"o2": F.sum(nodes.mailbox["m2"], 1)} def _rfunc_m1max(nodes): return {"o3": F.max(nodes.mailbox["m1"], 1)} def _afunc(nodes): ret = {} for k, v in nodes.data.items(): if k.startswith("o"): ret[k] = 2 * v return ret # compute ground truth g.update_all(_mfunc_hxw1, _rfunc_m1, _afunc) o1 = g.ndata.pop("o1") g.update_all(_mfunc_hxw2, _rfunc_m2, _afunc) o2 = g.ndata.pop("o2") g.update_all(_mfunc_hxw1, _rfunc_m1max, _afunc) o3 = g.ndata.pop("o3") # v2v spmv g.update_all( fn.u_mul_e("h", "w1", "m1"), fn.sum(msg="m1", out="o1"), _afunc ) assert F.allclose(o1, g.ndata.pop("o1")) # v2v fallback to e2v g.update_all( fn.u_mul_e("h", "w2", "m2"), fn.sum(msg="m2", out="o2"), _afunc ) assert F.allclose(o2, g.ndata.pop("o2")) @parametrize_idtype def test_pull_multi_fallback(idtype): # create a graph with zero in degree nodes g = dgl.graph([]) g = g.astype(idtype).to(F.ctx()) g.add_nodes(10) for i in range(1, 9): g.add_edges(0, i) g.add_edges(i, 9) g.ndata["h"] = F.randn((10, D)) g.edata["w1"] = F.randn((16,)) g.edata["w2"] = F.randn((16, D)) def _mfunc_hxw1(edges): return {"m1": edges.src["h"] * F.unsqueeze(edges.data["w1"], 1)} def _mfunc_hxw2(edges): return {"m2": edges.src["h"] * edges.data["w2"]} def _rfunc_m1(nodes): return {"o1": F.sum(nodes.mailbox["m1"], 1)} def _rfunc_m2(nodes): return {"o2": F.sum(nodes.mailbox["m2"], 1)} def _rfunc_m1max(nodes): return {"o3": F.max(nodes.mailbox["m1"], 1)} def _afunc(nodes): ret = {} for k, v in nodes.data.items(): if k.startswith("o"): ret[k] = 2 * v return ret # nodes to pull def _pull_nodes(nodes): # compute ground truth g.pull(nodes, _mfunc_hxw1, _rfunc_m1, _afunc) o1 = g.ndata.pop("o1") g.pull(nodes, _mfunc_hxw2, _rfunc_m2, _afunc) o2 = g.ndata.pop("o2") g.pull(nodes, _mfunc_hxw1, _rfunc_m1max, _afunc) o3 = g.ndata.pop("o3") # v2v spmv g.pull( nodes, fn.u_mul_e("h", "w1", "m1"), fn.sum(msg="m1", out="o1"), _afunc, ) assert F.allclose(o1, g.ndata.pop("o1")) # v2v fallback to e2v g.pull( nodes, fn.u_mul_e("h", "w2", "m2"), fn.sum(msg="m2", out="o2"), _afunc, ) assert F.allclose(o2, g.ndata.pop("o2")) # test#1: non-0deg nodes nodes = [1, 2, 9] _pull_nodes(nodes) # test#2: 0deg nodes + non-0deg nodes nodes = [0, 1, 2, 9] _pull_nodes(nodes) @parametrize_idtype def test_spmv_3d_feat(idtype): def src_mul_edge_udf(edges): return { "sum": edges.src["h"] * F.unsqueeze(F.unsqueeze(edges.data["h"], 1), 1) } def sum_udf(nodes): return {"h": F.sum(nodes.mailbox["sum"], 1)} n = 100 p = 0.1 a = sp.random(n, n, p, data_rvs=lambda n: np.ones(n)) g = dgl.from_scipy(a) g = g.astype(idtype).to(F.ctx()) m = g.num_edges() # test#1: v2v with adj data h = F.randn((n, 5, 5)) e = F.randn((m,)) g.ndata["h"] = h g.edata["h"] = e g.update_all( message_func=fn.u_mul_e("h", "h", "sum"), reduce_func=fn.sum("sum", "h") ) # 1 ans = g.ndata["h"] g.ndata["h"] = h g.edata["h"] = e g.update_all( message_func=src_mul_edge_udf, reduce_func=fn.sum("sum", "h") ) # 2 assert F.allclose(g.ndata["h"], ans) g.ndata["h"] = h g.edata["h"] = e g.update_all(message_func=src_mul_edge_udf, reduce_func=sum_udf) # 3 assert F.allclose(g.ndata["h"], ans) # test#2: e2v def src_mul_edge_udf(edges): return {"sum": edges.src["h"] * edges.data["h"]} h = F.randn((n, 5, 5)) e = F.randn((m, 5, 5)) g.ndata["h"] = h g.edata["h"] = e g.update_all( message_func=fn.u_mul_e("h", "h", "sum"), reduce_func=fn.sum("sum", "h") ) # 1 ans = g.ndata["h"] g.ndata["h"] = h g.edata["h"] = e g.update_all( message_func=src_mul_edge_udf, reduce_func=fn.sum("sum", "h") ) # 2 assert F.allclose(g.ndata["h"], ans) g.ndata["h"] = h g.edata["h"] = e g.update_all(message_func=src_mul_edge_udf, reduce_func=sum_udf) # 3 assert F.allclose(g.ndata["h"], ans) if __name__ == "__main__": test_v2v_update_all() test_v2v_snr() test_v2v_pull() test_v2v_update_all_multi_fn() test_v2v_snr_multi_fn() test_e2v_update_all_multi_fn() test_e2v_snr_multi_fn() test_e2v_recv_multi_fn() test_update_all_multi_fallback() test_pull_multi_fallback() test_spmv_3d_feat() ================================================ FILE: tests/python/common/test_heterograph-update-all.py ================================================ import itertools import unittest from collections import Counter from itertools import product import backend as F import dgl import dgl.function as fn import networkx as nx import numpy as np import pytest import scipy.sparse as ssp from dgl import DGLError from scipy.sparse import rand from utils import get_cases, parametrize_idtype rfuncs = {"sum": fn.sum, "max": fn.max, "min": fn.min, "mean": fn.mean} feat_size = 2 @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now" ) def create_test_heterograph(idtype): # test heterograph from the docstring, plus a user -- wishes -- game relation # 3 users, 2 games, 2 developers # metagraph: # ('user', 'follows', 'user'), # ('user', 'plays', 'game'), # ('user', 'wishes', 'game'), # ('developer', 'develops', 'game')]) g = dgl.heterograph( { ("user", "follows", "user"): ([0, 1, 2, 1], [0, 0, 1, 1]), ("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]), ("user", "wishes", "game"): ([0, 1, 1], [0, 0, 1]), ("developer", "develops", "game"): ([0, 1, 0], [0, 1, 1]), }, idtype=idtype, device=F.ctx(), ) assert g.idtype == idtype assert g.device == F.ctx() return g def create_test_heterograph_2(idtype): src = np.random.randint(0, 50, 25) dst = np.random.randint(0, 50, 25) src1 = np.random.randint(0, 25, 10) dst1 = np.random.randint(0, 25, 10) src2 = np.random.randint(0, 100, 1000) dst2 = np.random.randint(0, 100, 1000) g = dgl.heterograph( { ("user", "becomes", "player"): (src, dst), ("user", "follows", "user"): (src, dst), ("user", "plays", "game"): (src, dst), ("user", "wishes", "game"): (src1, dst1), ("developer", "develops", "game"): (src2, dst2), }, idtype=idtype, device=F.ctx(), ) assert g.idtype == idtype assert g.device == F.ctx() return g def create_test_heterograph_large(idtype): src = np.random.randint(0, 50, 2500) dst = np.random.randint(0, 50, 2500) g = dgl.heterograph( { ("user", "follows", "user"): (src, dst), ("user", "plays", "game"): (src, dst), ("user", "wishes", "game"): (src, dst), ("developer", "develops", "game"): (src, dst), }, idtype=idtype, device=F.ctx(), ) assert g.idtype == idtype assert g.device == F.ctx() return g @parametrize_idtype def test_unary_copy_u(idtype): def _test(mfunc, rfunc): g = create_test_heterograph_2(idtype) g0 = create_test_heterograph(idtype) g1 = create_test_heterograph_large(idtype) cross_reducer = rfunc.__name__ x1 = F.randn((g.num_nodes("user"), feat_size)) x2 = F.randn((g.num_nodes("developer"), feat_size)) F.attach_grad(x1) F.attach_grad(x2) g.nodes["user"].data["h"] = x1 g.nodes["developer"].data["h"] = x2 ################################################################# # multi_update_all(): call msg_passing separately for each etype ################################################################# with F.record_grad(): g.multi_update_all( { etype: (mfunc("h", "m"), rfunc("m", "y")) for etype in g.canonical_etypes }, cross_reducer, ) r1 = g.nodes["game"].data["y"].clone() r2 = g.nodes["user"].data["y"].clone() r3 = g.nodes["player"].data["y"].clone() loss = r1.sum() + r2.sum() + r3.sum() F.backward(loss) n_grad1 = F.grad(g.nodes["user"].data["h"]).clone() n_grad2 = F.grad(g.nodes["developer"].data["h"]).clone() g.nodes["user"].data.clear() g.nodes["developer"].data.clear() g.nodes["game"].data.clear() g.nodes["player"].data.clear() ################################################################# # update_all(): call msg_passing for all etypes ################################################################# F.attach_grad(x1) F.attach_grad(x2) g.nodes["user"].data["h"] = x1 g.nodes["developer"].data["h"] = x2 with F.record_grad(): g.update_all(mfunc("h", "m"), rfunc("m", "y")) r4 = g.nodes["game"].data["y"] r5 = g.nodes["user"].data["y"] r6 = g.nodes["player"].data["y"] loss = r4.sum() + r5.sum() + r6.sum() F.backward(loss) n_grad3 = F.grad(g.nodes["user"].data["h"]) n_grad4 = F.grad(g.nodes["developer"].data["h"]) assert F.allclose(r1, r4) assert F.allclose(r2, r5) assert F.allclose(r3, r6) assert F.allclose(n_grad1, n_grad3) assert F.allclose(n_grad2, n_grad4) _test(fn.copy_u, fn.sum) _test(fn.copy_u, fn.max) _test(fn.copy_u, fn.min) # _test('copy_u', 'mean') @parametrize_idtype def test_unary_copy_e(idtype): def _test(mfunc, rfunc): g = create_test_heterograph_large(idtype) g0 = create_test_heterograph_2(idtype) g1 = create_test_heterograph(idtype) cross_reducer = rfunc.__name__ x1 = F.randn((g.num_edges("plays"), feat_size)) x2 = F.randn((g.num_edges("follows"), feat_size)) x3 = F.randn((g.num_edges("develops"), feat_size)) x4 = F.randn((g.num_edges("wishes"), feat_size)) F.attach_grad(x1) F.attach_grad(x2) F.attach_grad(x3) F.attach_grad(x4) g["plays"].edata["eid"] = x1 g["follows"].edata["eid"] = x2 g["develops"].edata["eid"] = x3 g["wishes"].edata["eid"] = x4 ################################################################# # multi_update_all(): call msg_passing separately for each etype ################################################################# with F.record_grad(): g.multi_update_all( { "plays": (mfunc("eid", "m"), rfunc("m", "y")), "follows": (mfunc("eid", "m"), rfunc("m", "y")), "develops": (mfunc("eid", "m"), rfunc("m", "y")), "wishes": (mfunc("eid", "m"), rfunc("m", "y")), }, cross_reducer, ) r1 = g.nodes["game"].data["y"].clone() r2 = g.nodes["user"].data["y"].clone() loss = r1.sum() + r2.sum() F.backward(loss) e_grad1 = F.grad(g["develops"].edata["eid"]).clone() e_grad2 = F.grad(g["plays"].edata["eid"]).clone() e_grad3 = F.grad(g["wishes"].edata["eid"]).clone() e_grad4 = F.grad(g["follows"].edata["eid"]).clone() {etype: (g[etype].edata.clear()) for _, etype, _ in g.canonical_etypes}, ################################################################# # update_all(): call msg_passing for all etypes ################################################################# # TODO(Israt): output type can be None in multi_update and empty F.attach_grad(x1) F.attach_grad(x2) F.attach_grad(x3) F.attach_grad(x4) g["plays"].edata["eid"] = x1 g["follows"].edata["eid"] = x2 g["develops"].edata["eid"] = x3 g["wishes"].edata["eid"] = x4 with F.record_grad(): g.update_all(mfunc("eid", "m"), rfunc("m", "y")) r3 = g.nodes["game"].data["y"] r4 = g.nodes["user"].data["y"] loss = r3.sum() + r4.sum() F.backward(loss) e_grad5 = F.grad(g["develops"].edata["eid"]) e_grad6 = F.grad(g["plays"].edata["eid"]) e_grad7 = F.grad(g["wishes"].edata["eid"]) e_grad8 = F.grad(g["follows"].edata["eid"]) # # correctness check def _print_error(a, b): for i, (x, y) in enumerate( zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten()) ): if not np.allclose(x, y): print("@{} {} v.s. {}".format(i, x, y)) assert F.allclose(r1, r3) assert F.allclose(r2, r4) assert F.allclose(e_grad1, e_grad5) assert F.allclose(e_grad2, e_grad6) assert F.allclose(e_grad3, e_grad7) assert F.allclose(e_grad4, e_grad8) _test(fn.copy_e, fn.sum) _test(fn.copy_e, fn.max) _test(fn.copy_e, fn.min) # _test('copy_e', 'mean') @parametrize_idtype def test_binary_op(idtype): def _test(lhs, rhs, binary_op, reducer): g = create_test_heterograph(idtype) x1 = F.randn((g.num_nodes("user"), feat_size)) x2 = F.randn((g.num_nodes("developer"), feat_size)) x3 = F.randn((g.num_nodes("game"), feat_size)) F.attach_grad(x1) F.attach_grad(x2) F.attach_grad(x3) g.nodes["user"].data["h"] = x1 g.nodes["developer"].data["h"] = x2 g.nodes["game"].data["h"] = x3 x1 = F.randn((4, feat_size)) x2 = F.randn((4, feat_size)) x3 = F.randn((3, feat_size)) x4 = F.randn((3, feat_size)) F.attach_grad(x1) F.attach_grad(x2) F.attach_grad(x3) F.attach_grad(x4) g["plays"].edata["h"] = x1 g["follows"].edata["h"] = x2 g["develops"].edata["h"] = x3 g["wishes"].edata["h"] = x4 builtin_msg_name = "{}_{}_{}".format(lhs, binary_op, rhs) builtin_msg = getattr(fn, builtin_msg_name) builtin_red = getattr(fn, reducer) ################################################################# # multi_update_all(): call msg_passing separately for each etype ################################################################# with F.record_grad(): g.multi_update_all( { etype: (builtin_msg("h", "h", "m"), builtin_red("m", "y")) for etype in g.canonical_etypes }, "sum", ) r1 = g.nodes["game"].data["y"] F.backward(r1, F.ones(r1.shape)) n_grad1 = F.grad(r1) ################################################################# # update_all(): call msg_passing for all etypes ################################################################# g.update_all(builtin_msg("h", "h", "m"), builtin_red("m", "y")) r2 = g.nodes["game"].data["y"] F.backward(r2, F.ones(r2.shape)) n_grad2 = F.grad(r2) # correctness check def _print_error(a, b): for i, (x, y) in enumerate( zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten()) ): if not np.allclose(x, y): print("@{} {} v.s. {}".format(i, x, y)) if not F.allclose(r1, r2): _print_error(r1, r2) assert F.allclose(r1, r2) # TODO (Israt): r1 and r2 have different frad func associated with # if not F.allclose(n_grad1, n_grad2): # print('node grad') # _print_error(n_grad1, n_grad2) # assert(F.allclose(n_grad1, n_grad2)) target = ["u", "v", "e"] for lhs, rhs in product(target, target): if lhs == rhs: continue for binary_op in ["add", "sub", "mul", "div"]: # TODO(Israt) :Add support for reduce func "max", "min", "mean" for reducer in ["sum"]: print(lhs, rhs, binary_op, reducer) _test(lhs, rhs, binary_op, reducer) # Issue #5873 def test_multi_update_all_minmax_reduce_with_isolated_nodes(): g = dgl.heterograph( { ("A", "AB", "B"): ([0, 1, 2, 3], [0, 0, 1, 1]), ("C", "CB", "B"): ([0, 1, 2, 3], [2, 2, 3, 3]), }, device=F.ctx(), ) g.nodes["A"].data["x"] = F.randn((4, 16)) g.nodes["C"].data["x"] = F.randn((4, 16)) g.multi_update_all( { "AB": (dgl.function.copy_u("x", "m"), dgl.function.min("m", "a1")), "CB": (dgl.function.copy_u("x", "m"), dgl.function.min("m", "a2")), }, cross_reducer="min", ) assert not np.isinf(F.asnumpy(g.nodes["B"].data["a1"])).any() assert not np.isinf(F.asnumpy(g.nodes["B"].data["a2"])).any() g.multi_update_all( { "AB": (dgl.function.copy_u("x", "m"), dgl.function.max("m", "a1")), "CB": (dgl.function.copy_u("x", "m"), dgl.function.max("m", "a2")), }, cross_reducer="max", ) assert not np.isinf(F.asnumpy(g.nodes["B"].data["a1"])).any() assert not np.isinf(F.asnumpy(g.nodes["B"].data["a2"])).any() if __name__ == "__main__": test_unary_copy_u() test_unary_copy_e() test_binary_op() ================================================ FILE: tests/python/common/test_heterograph.py ================================================ import itertools import multiprocessing as mp import unittest from collections import Counter import backend as F import dgl import dgl.function as fn import networkx as nx import numpy as np import pytest import scipy.sparse as ssp from dgl import DGLError from scipy.sparse import rand from utils import ( assert_is_identical_hetero, check_graph_equal, get_cases, parametrize_idtype, ) def create_test_heterograph(idtype): # test heterograph from the docstring, plus a user -- wishes -- game relation # 3 users, 2 games, 2 developers # metagraph: # ('user', 'follows', 'user'), # ('user', 'plays', 'game'), # ('user', 'wishes', 'game'), # ('developer', 'develops', 'game')]) g = dgl.heterograph( { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]), ("user", "wishes", "game"): ([0, 2], [1, 0]), ("developer", "develops", "game"): ([0, 1], [0, 1]), }, idtype=idtype, device=F.ctx(), ) assert g.idtype == idtype assert g.device == F.ctx() return g def create_test_heterograph1(idtype): edges = [] edges.extend([(0, 1), (1, 2)]) # follows edges.extend([(0, 3), (1, 3), (2, 4), (1, 4)]) # plays edges.extend([(0, 4), (2, 3)]) # wishes edges.extend([(5, 3), (6, 4)]) # develops edges = tuple(zip(*edges)) ntypes = F.tensor([0, 0, 0, 1, 1, 2, 2]) etypes = F.tensor([0, 0, 1, 1, 1, 1, 2, 2, 3, 3]) g0 = dgl.graph(edges, idtype=idtype, device=F.ctx()) g0.ndata[dgl.NTYPE] = ntypes g0.edata[dgl.ETYPE] = etypes return dgl.to_heterogeneous( g0, ["user", "game", "developer"], ["follows", "plays", "wishes", "develops"], ) def create_test_heterograph2(idtype): g = dgl.heterograph( { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]), ("user", "wishes", "game"): ("csr", ([0, 1, 1, 2], [1, 0], [])), ("developer", "develops", "game"): ( "csc", ([0, 1, 2], [0, 1], [0, 1]), ), }, idtype=idtype, device=F.ctx(), ) assert g.idtype == idtype assert g.device == F.ctx() return g def create_test_heterograph3(idtype): g = dgl.heterograph( { ("user", "plays", "game"): ( F.tensor([0, 1, 1, 2], dtype=idtype), F.tensor([0, 0, 1, 1], dtype=idtype), ), ("developer", "develops", "game"): ( F.tensor([0, 1], dtype=idtype), F.tensor([0, 1], dtype=idtype), ), }, idtype=idtype, device=F.ctx(), ) g.nodes["user"].data["h"] = F.copy_to( F.tensor([1, 1, 1], dtype=idtype), ctx=F.ctx() ) g.nodes["game"].data["h"] = F.copy_to( F.tensor([2, 2], dtype=idtype), ctx=F.ctx() ) g.nodes["developer"].data["h"] = F.copy_to( F.tensor([3, 3], dtype=idtype), ctx=F.ctx() ) g.edges["plays"].data["h"] = F.copy_to( F.tensor([1, 1, 1, 1], dtype=idtype), ctx=F.ctx() ) return g def create_test_heterograph4(idtype): g = dgl.heterograph( { ("user", "follows", "user"): ( F.tensor([0, 1, 1, 2, 2, 2], dtype=idtype), F.tensor([0, 0, 1, 1, 2, 2], dtype=idtype), ), ("user", "plays", "game"): ( F.tensor([0, 1], dtype=idtype), F.tensor([0, 1], dtype=idtype), ), }, idtype=idtype, device=F.ctx(), ) g.nodes["user"].data["h"] = F.copy_to( F.tensor([1, 1, 1], dtype=idtype), ctx=F.ctx() ) g.nodes["game"].data["h"] = F.copy_to( F.tensor([2, 2], dtype=idtype), ctx=F.ctx() ) g.edges["follows"].data["h"] = F.copy_to( F.tensor([1, 2, 3, 4, 5, 6], dtype=idtype), ctx=F.ctx() ) g.edges["plays"].data["h"] = F.copy_to( F.tensor([1, 2], dtype=idtype), ctx=F.ctx() ) return g def create_test_heterograph5(idtype): g = dgl.heterograph( { ("user", "follows", "user"): ( F.tensor([1, 2], dtype=idtype), F.tensor([0, 1], dtype=idtype), ), ("user", "plays", "game"): ( F.tensor([0, 1], dtype=idtype), F.tensor([0, 1], dtype=idtype), ), }, idtype=idtype, device=F.ctx(), ) g.nodes["user"].data["h"] = F.copy_to( F.tensor([1, 1, 1], dtype=idtype), ctx=F.ctx() ) g.nodes["game"].data["h"] = F.copy_to( F.tensor([2, 2], dtype=idtype), ctx=F.ctx() ) g.edges["follows"].data["h"] = F.copy_to( F.tensor([1, 2], dtype=idtype), ctx=F.ctx() ) g.edges["plays"].data["h"] = F.copy_to( F.tensor([1, 2], dtype=idtype), ctx=F.ctx() ) return g def get_redfn(name): return getattr(F, name) @parametrize_idtype def test_create(idtype): device = F.ctx() g0 = create_test_heterograph(idtype) g1 = create_test_heterograph1(idtype) g2 = create_test_heterograph2(idtype) assert set(g0.ntypes) == set(g1.ntypes) == set(g2.ntypes) assert ( set(g0.canonical_etypes) == set(g1.canonical_etypes) == set(g2.canonical_etypes) ) # Create a bipartite graph from a SciPy matrix src_ids = np.array([2, 3, 4]) dst_ids = np.array([1, 2, 3]) eweight = np.array([0.2, 0.3, 0.5]) sp_mat = ssp.coo_matrix((eweight, (src_ids, dst_ids))) g = dgl.bipartite_from_scipy( sp_mat, utype="user", etype="plays", vtype="game", idtype=idtype, device=device, ) assert g.idtype == idtype assert g.device == device assert g.num_src_nodes() == 5 assert g.num_dst_nodes() == 4 assert g.num_edges() == 3 src, dst = g.edges() assert F.allclose(src, F.tensor([2, 3, 4], dtype=idtype)) assert F.allclose(dst, F.tensor([1, 2, 3], dtype=idtype)) g = dgl.bipartite_from_scipy( sp_mat, utype="_U", etype="_E", vtype="_V", eweight_name="w", idtype=idtype, device=device, ) assert F.allclose(g.edata["w"], F.tensor(eweight)) # Create a bipartite graph from a NetworkX graph nx_g = nx.DiGraph() nx_g.add_nodes_from( [1, 3], bipartite=0, feat1=np.zeros((2)), feat2=np.ones((2)) ) nx_g.add_nodes_from([2, 4, 5], bipartite=1, feat3=np.zeros((3))) nx_g.add_edge(1, 4, weight=np.ones((1)), eid=np.array([1])) nx_g.add_edge(3, 5, weight=np.ones((1)), eid=np.array([0])) g = dgl.bipartite_from_networkx( nx_g, utype="user", etype="plays", vtype="game", idtype=idtype, device=device, ) assert g.idtype == idtype assert g.device == device assert g.num_src_nodes() == 2 assert g.num_dst_nodes() == 3 assert g.num_edges() == 2 src, dst = g.edges() assert F.allclose(src, F.tensor([0, 1], dtype=idtype)) assert F.allclose(dst, F.tensor([1, 2], dtype=idtype)) g = dgl.bipartite_from_networkx( nx_g, utype="_U", etype="_E", vtype="V", u_attrs=["feat1", "feat2"], e_attrs=["weight"], v_attrs=["feat3"], ) assert F.allclose(g.srcdata["feat1"], F.tensor(np.zeros((2, 2)))) assert F.allclose(g.srcdata["feat2"], F.tensor(np.ones((2, 2)))) assert F.allclose(g.dstdata["feat3"], F.tensor(np.zeros((3, 3)))) assert F.allclose(g.edata["weight"], F.tensor(np.ones((2, 1)))) g = dgl.bipartite_from_networkx( nx_g, utype="_U", etype="_E", vtype="V", edge_id_attr_name="eid", idtype=idtype, device=device, ) src, dst = g.edges() assert F.allclose(src, F.tensor([1, 0], dtype=idtype)) assert F.allclose(dst, F.tensor([2, 1], dtype=idtype)) # create from scipy spmat = ssp.coo_matrix(([1, 1, 1], ([0, 0, 1], [2, 3, 2])), shape=(4, 4)) g = dgl.from_scipy(spmat, idtype=idtype, device=device) assert g.num_nodes() == 4 assert g.num_edges() == 3 assert g.idtype == idtype assert g.device == device # test inferring number of nodes for heterograph g = dgl.heterograph( { ("l0", "e0", "l1"): ([0, 0], [1, 2]), ("l0", "e1", "l2"): ([2], [2]), ("l2", "e2", "l2"): ([1, 3], [1, 3]), }, idtype=idtype, device=device, ) assert g.num_nodes("l0") == 3 assert g.num_nodes("l1") == 3 assert g.num_nodes("l2") == 4 assert g.idtype == idtype assert g.device == device # test if validate flag works # homo graph with pytest.raises(DGLError): g = dgl.graph( ([0, 0, 0, 1, 1, 2], [0, 1, 2, 0, 1, 2]), num_nodes=2, idtype=idtype, device=device, ) # bipartite graph def _test_validate_bipartite(card): with pytest.raises(DGLError): g = dgl.heterograph( {("_U", "_E", "_V"): ([0, 0, 1, 1, 2], [1, 1, 2, 2, 3])}, {"_U": card[0], "_V": card[1]}, idtype=idtype, device=device, ) _test_validate_bipartite((3, 3)) _test_validate_bipartite((2, 4)) # test from_scipy num_nodes = 10 density = 0.25 for fmt in ["csr", "coo", "csc"]: adj = rand(num_nodes, num_nodes, density=density, format=fmt) g = dgl.from_scipy(adj, eweight_name="w", idtype=idtype) assert g.idtype == idtype assert g.device == F.cpu() assert F.array_equal( g.edata["w"], F.copy_to(F.tensor(adj.data), F.cpu()) ) def test_create2(): mat = ssp.random(20, 30, 0.1) # coo mat = mat.tocoo() row = F.tensor(mat.row, dtype=F.int64) col = F.tensor(mat.col, dtype=F.int64) g = dgl.heterograph( {("A", "AB", "B"): ("coo", (row, col))}, num_nodes_dict={"A": 20, "B": 30}, ) # csr mat = mat.tocsr() indptr = F.tensor(mat.indptr, dtype=F.int64) indices = F.tensor(mat.indices, dtype=F.int64) data = F.tensor([], dtype=F.int64) g = dgl.heterograph( {("A", "AB", "B"): ("csr", (indptr, indices, data))}, num_nodes_dict={"A": 20, "B": 30}, ) # csc mat = mat.tocsc() indptr = F.tensor(mat.indptr, dtype=F.int64) indices = F.tensor(mat.indices, dtype=F.int64) data = F.tensor([], dtype=F.int64) g = dgl.heterograph( {("A", "AB", "B"): ("csc", (indptr, indices, data))}, num_nodes_dict={"A": 20, "B": 30}, ) @parametrize_idtype def test_query(idtype): g = create_test_heterograph(idtype) ntypes = ["user", "game", "developer"] canonical_etypes = [ ("user", "follows", "user"), ("user", "plays", "game"), ("user", "wishes", "game"), ("developer", "develops", "game"), ] etypes = ["follows", "plays", "wishes", "develops"] # node & edge types assert set(ntypes) == set(g.ntypes) assert set(etypes) == set(g.etypes) assert set(canonical_etypes) == set(g.canonical_etypes) # metagraph mg = g.metagraph() assert set(g.ntypes) == set(mg.nodes) etype_triplets = [(u, v, e) for u, v, e in mg.edges(keys=True)] assert set( [ ("user", "user", "follows"), ("user", "game", "plays"), ("user", "game", "wishes"), ("developer", "game", "develops"), ] ) == set(etype_triplets) for i in range(len(etypes)): assert g.to_canonical_etype(etypes[i]) == canonical_etypes[i] def _test(g): # number of nodes assert [g.num_nodes(ntype) for ntype in ntypes] == [3, 2, 2] # number of edges assert [g.num_edges(etype) for etype in etypes] == [2, 4, 2, 2] # has_nodes for ntype in ntypes: n = g.num_nodes(ntype) for i in range(n): assert g.has_nodes(i, ntype) assert not g.has_nodes(n, ntype) assert np.array_equal( F.asnumpy(g.has_nodes([0, n], ntype)).astype("int32"), [1, 0] ) assert not g.is_multigraph for etype in etypes: srcs, dsts = edges[etype] for src, dst in zip(srcs, dsts): assert g.has_edges_between(src, dst, etype) assert F.asnumpy(g.has_edges_between(srcs, dsts, etype)).all() srcs, dsts = negative_edges[etype] for src, dst in zip(srcs, dsts): assert not g.has_edges_between(src, dst, etype) assert not F.asnumpy(g.has_edges_between(srcs, dsts, etype)).any() srcs, dsts = edges[etype] n_edges = len(srcs) # predecessors & in_edges & in_degree pred = [s for s, d in zip(srcs, dsts) if d == 0] assert set(F.asnumpy(g.predecessors(0, etype)).tolist()) == set( pred ) u, v = g.in_edges([0], etype=etype) assert F.asnumpy(v).tolist() == [0] * len(pred) assert set(F.asnumpy(u).tolist()) == set(pred) assert g.in_degrees(0, etype) == len(pred) # successors & out_edges & out_degree succ = [d for s, d in zip(srcs, dsts) if s == 0] assert set(F.asnumpy(g.successors(0, etype)).tolist()) == set(succ) u, v = g.out_edges([0], etype=etype) assert F.asnumpy(u).tolist() == [0] * len(succ) assert set(F.asnumpy(v).tolist()) == set(succ) assert g.out_degrees(0, etype) == len(succ) # edge_ids for i, (src, dst) in enumerate(zip(srcs, dsts)): assert g.edge_ids(src, dst, etype=etype) == i _, _, eid = g.edge_ids(src, dst, etype=etype, return_uv=True) assert eid == i assert F.asnumpy( g.edge_ids(srcs, dsts, etype=etype) ).tolist() == list(range(n_edges)) u, v, e = g.edge_ids(srcs, dsts, etype=etype, return_uv=True) u, v, e = F.asnumpy(u), F.asnumpy(v), F.asnumpy(e) assert u[e].tolist() == srcs assert v[e].tolist() == dsts # find_edges for eid in [ list(range(n_edges)), np.arange(n_edges), F.astype(F.arange(0, n_edges), g.idtype), ]: u, v = g.find_edges(eid, etype) assert F.asnumpy(u).tolist() == srcs assert F.asnumpy(v).tolist() == dsts # all_edges. for order in ["eid"]: u, v, e = g.edges("all", order, etype) assert F.asnumpy(u).tolist() == srcs assert F.asnumpy(v).tolist() == dsts assert F.asnumpy(e).tolist() == list(range(n_edges)) # in_degrees & out_degrees in_degrees = F.asnumpy(g.in_degrees(etype=etype)) out_degrees = F.asnumpy(g.out_degrees(etype=etype)) src_count = Counter(srcs) dst_count = Counter(dsts) utype, _, vtype = g.to_canonical_etype(etype) for i in range(g.num_nodes(utype)): assert out_degrees[i] == src_count[i] for i in range(g.num_nodes(vtype)): assert in_degrees[i] == dst_count[i] edges = { "follows": ([0, 1], [1, 2]), "plays": ([0, 1, 2, 1], [0, 0, 1, 1]), "wishes": ([0, 2], [1, 0]), "develops": ([0, 1], [0, 1]), } # edges that does not exist in the graph negative_edges = { "follows": ([0, 1], [0, 1]), "plays": ([0, 2], [1, 0]), "wishes": ([0, 1], [0, 1]), "develops": ([0, 1], [1, 0]), } g = create_test_heterograph(idtype) _test(g) g = create_test_heterograph1(idtype) _test(g) if F._default_context_str != "gpu": # XXX: CUDA COO operators have not been live yet. g = create_test_heterograph2(idtype) _test(g) etypes = canonical_etypes edges = { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]), ("user", "wishes", "game"): ([0, 2], [1, 0]), ("developer", "develops", "game"): ([0, 1], [0, 1]), } # edges that does not exist in the graph negative_edges = { ("user", "follows", "user"): ([0, 1], [0, 1]), ("user", "plays", "game"): ([0, 2], [1, 0]), ("user", "wishes", "game"): ([0, 1], [0, 1]), ("developer", "develops", "game"): ([0, 1], [1, 0]), } g = create_test_heterograph(idtype) _test(g) g = create_test_heterograph1(idtype) _test(g) if F._default_context_str != "gpu": # XXX: CUDA COO operators have not been live yet. g = create_test_heterograph2(idtype) _test(g) # test repr print(g) @parametrize_idtype def test_empty_query(idtype): g = dgl.graph(([1, 2, 3], [0, 4, 5]), idtype=idtype, device=F.ctx()) g.add_nodes(0) g.add_edges([], []) g.remove_edges([]) g.remove_nodes([]) assert F.shape(g.has_nodes([])) == (0,) assert F.shape(g.has_edges_between([], [])) == (0,) g.edge_ids([], []) g.edge_ids([], [], return_uv=True) g.find_edges([]) assert F.shape(g.in_edges([], form="eid")) == (0,) u, v = g.in_edges([], form="uv") assert F.shape(u) == (0,) assert F.shape(v) == (0,) u, v, e = g.in_edges([], form="all") assert F.shape(u) == (0,) assert F.shape(v) == (0,) assert F.shape(e) == (0,) assert F.shape(g.out_edges([], form="eid")) == (0,) u, v = g.out_edges([], form="uv") assert F.shape(u) == (0,) assert F.shape(v) == (0,) u, v, e = g.out_edges([], form="all") assert F.shape(u) == (0,) assert F.shape(v) == (0,) assert F.shape(e) == (0,) assert F.shape(g.in_degrees([])) == (0,) assert F.shape(g.out_degrees([])) == (0,) g = dgl.graph(([], []), idtype=idtype, device=F.ctx()) error_thrown = True try: g.in_degrees([0]) fail = False except: pass assert error_thrown error_thrown = True try: g.out_degrees([0]) fail = False except: pass assert error_thrown @unittest.skipIf( F._default_context_str == "gpu", reason="GPU does not have COO impl." ) def _test_hypersparse(): N1 = 1 << 50 # should crash if allocated a CSR N2 = 1 << 48 g = dgl.heterograph( { ("user", "follows", "user"): ( F.tensor([0], F.int64), F.tensor([1], F.int64), ), ("user", "plays", "game"): ( F.tensor([0], F.int64), F.tensor([N2], F.int64), ), }, {"user": N1, "game": N1}, device=F.ctx(), ) assert g.num_nodes("user") == N1 assert g.num_nodes("game") == N1 assert g.num_edges("follows") == 1 assert g.num_edges("plays") == 1 assert g.has_edges_between(0, 1, "follows") assert not g.has_edges_between(0, 0, "follows") mask = F.asnumpy(g.has_edges_between([0, 0], [0, 1], "follows")).tolist() assert mask == [0, 1] assert g.has_edges_between(0, N2, "plays") assert not g.has_edges_between(0, 0, "plays") mask = F.asnumpy(g.has_edges_between([0, 0], [0, N2], "plays")).tolist() assert mask == [0, 1] assert F.asnumpy(g.predecessors(0, "follows")).tolist() == [] assert F.asnumpy(g.successors(0, "follows")).tolist() == [1] assert F.asnumpy(g.predecessors(1, "follows")).tolist() == [0] assert F.asnumpy(g.successors(1, "follows")).tolist() == [] assert F.asnumpy(g.predecessors(0, "plays")).tolist() == [] assert F.asnumpy(g.successors(0, "plays")).tolist() == [N2] assert F.asnumpy(g.predecessors(N2, "plays")).tolist() == [0] assert F.asnumpy(g.successors(N2, "plays")).tolist() == [] assert g.edge_ids(0, 1, etype="follows") == 0 assert g.edge_ids(0, N2, etype="plays") == 0 u, v = g.find_edges([0], "follows") assert F.asnumpy(u).tolist() == [0] assert F.asnumpy(v).tolist() == [1] u, v = g.find_edges([0], "plays") assert F.asnumpy(u).tolist() == [0] assert F.asnumpy(v).tolist() == [N2] u, v, e = g.all_edges("all", "eid", "follows") assert F.asnumpy(u).tolist() == [0] assert F.asnumpy(v).tolist() == [1] assert F.asnumpy(e).tolist() == [0] u, v, e = g.all_edges("all", "eid", "plays") assert F.asnumpy(u).tolist() == [0] assert F.asnumpy(v).tolist() == [N2] assert F.asnumpy(e).tolist() == [0] assert g.in_degrees(0, "follows") == 0 assert g.in_degrees(1, "follows") == 1 assert F.asnumpy(g.in_degrees([0, 1], "follows")).tolist() == [0, 1] assert g.in_degrees(0, "plays") == 0 assert g.in_degrees(N2, "plays") == 1 assert F.asnumpy(g.in_degrees([0, N2], "plays")).tolist() == [0, 1] assert g.out_degrees(0, "follows") == 1 assert g.out_degrees(1, "follows") == 0 assert F.asnumpy(g.out_degrees([0, 1], "follows")).tolist() == [1, 0] assert g.out_degrees(0, "plays") == 1 assert g.out_degrees(N2, "plays") == 0 assert F.asnumpy(g.out_degrees([0, N2], "plays")).tolist() == [1, 0] def _test_edge_ids(): N1 = 1 << 50 # should crash if allocated a CSR N2 = 1 << 48 g = dgl.heterograph( { ("user", "follows", "user"): ( F.tensor([0], F.int64), F.tensor([1], F.int64), ), ("user", "plays", "game"): ( F.tensor([0], F.int64), F.tensor([N2], F.int64), ), }, {"user": N1, "game": N1}, ) with pytest.raises(DGLError): eid = g.edge_ids(0, 0, etype="follows") g2 = dgl.heterograph( { ("user", "follows", "user"): ( F.tensor([0, 0], F.int64), F.tensor([1, 1], F.int64), ), ("user", "plays", "game"): ( F.tensor([0], F.int64), F.tensor([N2], F.int64), ), }, {"user": N1, "game": N1}, device=F.cpu(), ) eid = g2.edge_ids(0, 1, etype="follows") assert eid == 0 @pytest.mark.skipif( F.backend_name != "pytorch", reason="Only support PyTorch for now" ) @parametrize_idtype def test_adj(idtype): g = create_test_heterograph(idtype) adj = g.adj("follows") assert F.asnumpy(adj.indices()).tolist() == [[0, 1], [1, 2]] assert np.allclose(F.asnumpy(adj.val), np.array([1, 1])) g.edata["h"] = {("user", "plays", "game"): F.tensor([1, 2, 3, 4])} print(g.edata["h"]) adj = g.adj("plays", "h") assert F.asnumpy(adj.indices()).tolist() == [[0, 1, 2, 1], [0, 0, 1, 1]] assert np.allclose(F.asnumpy(adj.val), np.array([1, 2, 3, 4])) @parametrize_idtype def test_adj_external(idtype): g = create_test_heterograph(idtype) adj = F.sparse_to_numpy(g.adj_external(transpose=True, etype="follows")) assert np.allclose( adj, np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0]]) ) adj = F.sparse_to_numpy(g.adj_external(transpose=False, etype="follows")) assert np.allclose( adj, np.array([[0.0, 1.0, 0.0], [0.0, 0.0, 1.0], [0.0, 0.0, 0.0]]) ) adj = F.sparse_to_numpy(g.adj_external(transpose=True, etype="plays")) assert np.allclose(adj, np.array([[1.0, 1.0, 0.0], [0.0, 1.0, 1.0]])) adj = F.sparse_to_numpy(g.adj_external(transpose=False, etype="plays")) assert np.allclose(adj, np.array([[1.0, 0.0], [1.0, 1.0], [0.0, 1.0]])) adj = g.adj_external(transpose=True, scipy_fmt="csr", etype="follows") assert np.allclose( adj.todense(), np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0]]), ) adj = g.adj_external(transpose=True, scipy_fmt="coo", etype="follows") assert np.allclose( adj.todense(), np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0]]), ) adj = g.adj_external(transpose=True, scipy_fmt="csr", etype="plays") assert np.allclose( adj.todense(), np.array([[1.0, 1.0, 0.0], [0.0, 1.0, 1.0]]) ) adj = g.adj_external(transpose=True, scipy_fmt="coo", etype="plays") assert np.allclose( adj.todense(), np.array([[1.0, 1.0, 0.0], [0.0, 1.0, 1.0]]) ) adj = F.sparse_to_numpy(g["follows"].adj_external(transpose=True)) assert np.allclose( adj, np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0]]) ) @parametrize_idtype def test_inc(idtype): g = create_test_heterograph(idtype) adj = F.sparse_to_numpy(g["follows"].inc("in")) assert np.allclose(adj, np.array([[0.0, 0.0], [1.0, 0.0], [0.0, 1.0]])) adj = F.sparse_to_numpy(g["follows"].inc("out")) assert np.allclose(adj, np.array([[1.0, 0.0], [0.0, 1.0], [0.0, 0.0]])) adj = F.sparse_to_numpy(g["follows"].inc("both")) assert np.allclose(adj, np.array([[-1.0, 0.0], [1.0, -1.0], [0.0, 1.0]])) adj = F.sparse_to_numpy(g.inc("in", etype="plays")) assert np.allclose( adj, np.array([[1.0, 1.0, 0.0, 0.0], [0.0, 0.0, 1.0, 1.0]]) ) adj = F.sparse_to_numpy(g.inc("out", etype="plays")) assert np.allclose( adj, np.array( [[1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 1.0], [0.0, 0.0, 1.0, 0.0]] ), ) adj = F.sparse_to_numpy(g.inc("both", etype="follows")) assert np.allclose(adj, np.array([[-1.0, 0.0], [1.0, -1.0], [0.0, 1.0]])) @parametrize_idtype def test_view(idtype): # test single node type g = dgl.heterograph( {("user", "follows", "user"): ([0, 1], [1, 2])}, idtype=idtype, device=F.ctx(), ) f1 = F.randn((3, 6)) g.ndata["h"] = f1 f2 = g.nodes["user"].data["h"] assert F.array_equal(f1, f2) fail = False try: g.ndata["h"] = {"user": f1} except Exception: fail = True assert fail # test single edge type f3 = F.randn((2, 4)) g.edata["h"] = f3 f4 = g.edges["follows"].data["h"] assert F.array_equal(f3, f4) fail = False try: g.edata["h"] = {"follows": f3} except Exception: fail = True assert fail # test data view g = create_test_heterograph(idtype) f1 = F.randn((3, 6)) g.nodes["user"].data["h"] = f1 # ok f2 = g.nodes["user"].data["h"] assert F.array_equal(f1, f2) assert F.array_equal(g.nodes("user"), F.arange(0, 3, idtype)) g.nodes["user"].data.pop("h") # multi type ndata f1 = F.randn((3, 6)) f2 = F.randn((2, 6)) fail = False try: g.ndata["h"] = f1 except Exception: fail = True assert fail f3 = F.randn((2, 4)) g.edges["user", "follows", "user"].data["h"] = f3 f4 = g.edges["user", "follows", "user"].data["h"] f5 = g.edges["follows"].data["h"] assert F.array_equal(f3, f4) assert F.array_equal(f3, f5) assert F.array_equal( g.edges(etype="follows", form="eid"), F.arange(0, 2, idtype) ) g.edges["follows"].data.pop("h") f3 = F.randn((2, 4)) fail = False try: g.edata["h"] = f3 except Exception: fail = True assert fail # test srcdata f1 = F.randn((3, 6)) g.srcnodes["user"].data["h"] = f1 # ok f2 = g.srcnodes["user"].data["h"] assert F.array_equal(f1, f2) assert F.array_equal(g.srcnodes("user"), F.arange(0, 3, idtype)) g.srcnodes["user"].data.pop("h") # test dstdata f1 = F.randn((3, 6)) g.dstnodes["user"].data["h"] = f1 # ok f2 = g.dstnodes["user"].data["h"] assert F.array_equal(f1, f2) assert F.array_equal(g.dstnodes("user"), F.arange(0, 3, idtype)) g.dstnodes["user"].data.pop("h") @parametrize_idtype def test_view1(idtype): # test relation view HG = create_test_heterograph(idtype) ntypes = ["user", "game", "developer"] canonical_etypes = [ ("user", "follows", "user"), ("user", "plays", "game"), ("user", "wishes", "game"), ("developer", "develops", "game"), ] etypes = ["follows", "plays", "wishes", "develops"] def _test_query(): for etype in etypes: utype, _, vtype = HG.to_canonical_etype(etype) g = HG[etype] srcs, dsts = edges[etype] for src, dst in zip(srcs, dsts): assert g.has_edges_between(src, dst) assert F.asnumpy(g.has_edges_between(srcs, dsts)).all() srcs, dsts = negative_edges[etype] for src, dst in zip(srcs, dsts): assert not g.has_edges_between(src, dst) assert not F.asnumpy(g.has_edges_between(srcs, dsts)).any() srcs, dsts = edges[etype] n_edges = len(srcs) # predecessors & in_edges & in_degree pred = [s for s, d in zip(srcs, dsts) if d == 0] assert set(F.asnumpy(g.predecessors(0)).tolist()) == set(pred) u, v = g.in_edges([0]) assert F.asnumpy(v).tolist() == [0] * len(pred) assert set(F.asnumpy(u).tolist()) == set(pred) assert g.in_degrees(0) == len(pred) # successors & out_edges & out_degree succ = [d for s, d in zip(srcs, dsts) if s == 0] assert set(F.asnumpy(g.successors(0)).tolist()) == set(succ) u, v = g.out_edges([0]) assert F.asnumpy(u).tolist() == [0] * len(succ) assert set(F.asnumpy(v).tolist()) == set(succ) assert g.out_degrees(0) == len(succ) # edge_ids for i, (src, dst) in enumerate(zip(srcs, dsts)): assert g.edge_ids(src, dst, etype=etype) == i _, _, eid = g.edge_ids(src, dst, etype=etype, return_uv=True) assert eid == i assert F.asnumpy(g.edge_ids(srcs, dsts)).tolist() == list( range(n_edges) ) u, v, e = g.edge_ids(srcs, dsts, return_uv=True) u, v, e = F.asnumpy(u), F.asnumpy(v), F.asnumpy(e) assert u[e].tolist() == srcs assert v[e].tolist() == dsts # find_edges u, v = g.find_edges(list(range(n_edges))) assert F.asnumpy(u).tolist() == srcs assert F.asnumpy(v).tolist() == dsts # all_edges. for order in ["eid"]: u, v, e = g.all_edges(form="all", order=order) assert F.asnumpy(u).tolist() == srcs assert F.asnumpy(v).tolist() == dsts assert F.asnumpy(e).tolist() == list(range(n_edges)) # in_degrees & out_degrees in_degrees = F.asnumpy(g.in_degrees()) out_degrees = F.asnumpy(g.out_degrees()) src_count = Counter(srcs) dst_count = Counter(dsts) for i in range(g.num_nodes(utype)): assert out_degrees[i] == src_count[i] for i in range(g.num_nodes(vtype)): assert in_degrees[i] == dst_count[i] edges = { "follows": ([0, 1], [1, 2]), "plays": ([0, 1, 2, 1], [0, 0, 1, 1]), "wishes": ([0, 2], [1, 0]), "develops": ([0, 1], [0, 1]), } # edges that does not exist in the graph negative_edges = { "follows": ([0, 1], [0, 1]), "plays": ([0, 2], [1, 0]), "wishes": ([0, 1], [0, 1]), "develops": ([0, 1], [1, 0]), } _test_query() etypes = canonical_etypes edges = { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]), ("user", "wishes", "game"): ([0, 2], [1, 0]), ("developer", "develops", "game"): ([0, 1], [0, 1]), } # edges that does not exist in the graph negative_edges = { ("user", "follows", "user"): ([0, 1], [0, 1]), ("user", "plays", "game"): ([0, 2], [1, 0]), ("user", "wishes", "game"): ([0, 1], [0, 1]), ("developer", "develops", "game"): ([0, 1], [1, 0]), } _test_query() # test features HG.nodes["user"].data["h"] = F.ones((HG.num_nodes("user"), 5)) HG.nodes["game"].data["m"] = F.ones((HG.num_nodes("game"), 3)) * 2 # test only one node type g = HG["follows"] assert g.num_nodes() == 3 # test ndata and edata f1 = F.randn((3, 6)) g.ndata["h"] = f1 # ok f2 = HG.nodes["user"].data["h"] assert F.array_equal(f1, f2) assert F.array_equal(g.nodes(), F.arange(0, 3, g.idtype)) f3 = F.randn((2, 4)) g.edata["h"] = f3 f4 = HG.edges["follows"].data["h"] assert F.array_equal(f3, f4) assert F.array_equal(g.edges(form="eid"), F.arange(0, 2, g.idtype)) @parametrize_idtype def test_flatten(idtype): def check_mapping(g, fg): if len(fg.ntypes) == 1: SRC = DST = fg.ntypes[0] else: SRC = fg.ntypes[0] DST = fg.ntypes[1] etypes = F.asnumpy(fg.edata[dgl.ETYPE]).tolist() eids = F.asnumpy(fg.edata[dgl.EID]).tolist() for i, (etype, eid) in enumerate(zip(etypes, eids)): src_g, dst_g = g.find_edges([eid], g.canonical_etypes[etype]) src_fg, dst_fg = fg.find_edges([i]) # TODO(gq): I feel this code is quite redundant; can we just add new members (like # "induced_srcid") to returned heterograph object and not store them as features? assert F.asnumpy(src_g) == F.asnumpy( F.gather_row(fg.nodes[SRC].data[dgl.NID], src_fg)[0] ) tid = F.asnumpy( F.gather_row(fg.nodes[SRC].data[dgl.NTYPE], src_fg) ).item() assert g.canonical_etypes[etype][0] == g.ntypes[tid] assert F.asnumpy(dst_g) == F.asnumpy( F.gather_row(fg.nodes[DST].data[dgl.NID], dst_fg)[0] ) tid = F.asnumpy( F.gather_row(fg.nodes[DST].data[dgl.NTYPE], dst_fg) ).item() assert g.canonical_etypes[etype][2] == g.ntypes[tid] # check for wildcard slices g = create_test_heterograph(idtype) g.nodes["user"].data["h"] = F.ones((3, 5)) g.nodes["game"].data["i"] = F.ones((2, 5)) g.edges["plays"].data["e"] = F.ones((4, 4)) g.edges["wishes"].data["e"] = F.ones((2, 4)) g.edges["wishes"].data["f"] = F.ones((2, 4)) fg = g["user", :, "game"] # user--plays->game and user--wishes->game assert len(fg.ntypes) == 2 assert fg.ntypes == ["user", "game"] assert fg.etypes == ["plays+wishes"] assert fg.idtype == g.idtype assert fg.device == g.device etype = fg.etypes[0] assert fg[etype] is not None # Issue #2166 assert F.array_equal(fg.nodes["user"].data["h"], F.ones((3, 5))) assert F.array_equal(fg.nodes["game"].data["i"], F.ones((2, 5))) assert F.array_equal(fg.edata["e"], F.ones((6, 4))) assert "f" not in fg.edata etypes = F.asnumpy(fg.edata[dgl.ETYPE]).tolist() eids = F.asnumpy(fg.edata[dgl.EID]).tolist() assert set(zip(etypes, eids)) == set( [(3, 0), (3, 1), (2, 1), (2, 0), (2, 3), (2, 2)] ) check_mapping(g, fg) fg = g["user", :, "user"] assert fg.idtype == g.idtype assert fg.device == g.device # NOTE(gq): The node/edge types from the parent graph is returned if there is only one # node/edge type. This differs from the behavior above. assert fg.ntypes == ["user"] assert fg.etypes == ["follows"] u1, v1 = g.edges(etype="follows", order="eid") u2, v2 = fg.edges(etype="follows", order="eid") assert F.array_equal(u1, u2) assert F.array_equal(v1, v2) fg = g["developer", :, "game"] assert fg.idtype == g.idtype assert fg.device == g.device assert fg.ntypes == ["developer", "game"] assert fg.etypes == ["develops"] u1, v1 = g.edges(etype="develops", order="eid") u2, v2 = fg.edges(etype="develops", order="eid") assert F.array_equal(u1, u2) assert F.array_equal(v1, v2) fg = g[:, :, :] assert fg.idtype == g.idtype assert fg.device == g.device assert fg.ntypes == ["developer+user", "game+user"] assert fg.etypes == ["develops+follows+plays+wishes"] check_mapping(g, fg) # Test another heterograph g = dgl.heterograph( { ("user", "follows", "user"): ([0, 1, 2], [1, 2, 3]), ("user", "knows", "user"): ([0, 2], [2, 3]), }, idtype=idtype, device=F.ctx(), ) g.nodes["user"].data["h"] = F.randn((4, 3)) g.edges["follows"].data["w"] = F.randn((3, 2)) g.nodes["user"].data["hh"] = F.randn((4, 5)) g.edges["knows"].data["ww"] = F.randn((2, 10)) fg = g["user", :, "user"] assert fg.idtype == g.idtype assert fg.device == g.device assert fg.ntypes == ["user"] assert fg.etypes == ["follows+knows"] check_mapping(g, fg) fg = g["user", :, :] assert fg.idtype == g.idtype assert fg.device == g.device assert fg.ntypes == ["user"] assert fg.etypes == ["follows+knows"] check_mapping(g, fg) @unittest.skipIf( F._default_context_str == "cpu", reason="Need gpu for this test" ) @parametrize_idtype def test_to_device(idtype): # TODO: rewrite this test case to accept different graphs so we # can test reverse graph and batched graph g = create_test_heterograph(idtype) g.nodes["user"].data["h"] = F.ones((3, 5)) g.nodes["game"].data["i"] = F.ones((2, 5)) g.edges["plays"].data["e"] = F.ones((4, 4)) assert g.device == F.ctx() g = g.to(F.cpu()) assert g.device == F.cpu() assert F.context(g.nodes["user"].data["h"]) == F.cpu() assert F.context(g.nodes["game"].data["i"]) == F.cpu() assert F.context(g.edges["plays"].data["e"]) == F.cpu() for ntype in g.ntypes: assert F.context(g.batch_num_nodes(ntype)) == F.cpu() for etype in g.canonical_etypes: assert F.context(g.batch_num_edges(etype)) == F.cpu() if F.is_cuda_available(): g1 = g.to(F.cuda()) assert g1.device == F.cuda() assert F.context(g1.nodes["user"].data["h"]) == F.cuda() assert F.context(g1.nodes["game"].data["i"]) == F.cuda() assert F.context(g1.edges["plays"].data["e"]) == F.cuda() for ntype in g1.ntypes: assert F.context(g1.batch_num_nodes(ntype)) == F.cuda() for etype in g1.canonical_etypes: assert F.context(g1.batch_num_edges(etype)) == F.cuda() assert F.context(g.nodes["user"].data["h"]) == F.cpu() assert F.context(g.nodes["game"].data["i"]) == F.cpu() assert F.context(g.edges["plays"].data["e"]) == F.cpu() for ntype in g.ntypes: assert F.context(g.batch_num_nodes(ntype)) == F.cpu() for etype in g.canonical_etypes: assert F.context(g.batch_num_edges(etype)) == F.cpu() with pytest.raises(DGLError): g1.nodes["user"].data["h"] = F.copy_to(F.ones((3, 5)), F.cpu()) with pytest.raises(DGLError): g1.edges["plays"].data["e"] = F.copy_to(F.ones((4, 4)), F.cpu()) @unittest.skipIf( F._default_context_str == "cpu", reason="Need gpu for this test" ) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["block"])) def test_to_device2(g, idtype): g = g.astype(idtype) g = g.to(F.cpu()) assert g.device == F.cpu() if F.is_cuda_available(): g1 = g.to(F.cuda()) assert g1.device == F.cuda() assert g1.ntypes == g.ntypes assert g1.etypes == g.etypes assert g1.canonical_etypes == g.canonical_etypes @unittest.skipIf( F._default_context_str == "cpu", reason="Need gpu for this test" ) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Pinning graph inplace only supported for PyTorch", ) @parametrize_idtype def test_pin_memory_(idtype): # TODO: rewrite this test case to accept different graphs so we # can test reverse graph and batched graph g = create_test_heterograph(idtype) g.nodes["user"].data["h"] = F.ones((3, 5)) g.nodes["game"].data["i"] = F.ones((2, 5)) g.edges["plays"].data["e"] = F.ones((4, 4)) g = g.to(F.cpu()) assert not g.is_pinned() # unpin an unpinned CPU graph, directly return g.unpin_memory_() assert not g.is_pinned() assert g.device == F.cpu() # pin a CPU graph g.pin_memory_() assert g.is_pinned() assert g.device == F.cpu() assert g.nodes["user"].data["h"].is_pinned() assert g.nodes["game"].data["i"].is_pinned() assert g.edges["plays"].data["e"].is_pinned() assert F.context(g.nodes["user"].data["h"]) == F.cpu() assert F.context(g.nodes["game"].data["i"]) == F.cpu() assert F.context(g.edges["plays"].data["e"]) == F.cpu() for ntype in g.ntypes: assert F.context(g.batch_num_nodes(ntype)) == F.cpu() for etype in g.canonical_etypes: assert F.context(g.batch_num_edges(etype)) == F.cpu() # it's fine to clone with new formats, but new graphs are not pinned # >>> g.formats() # {'created': ['coo'], 'not created': ['csr', 'csc']} assert not g.formats("csc").is_pinned() assert not g.formats("csr").is_pinned() # 'coo' formats is already created and thus not cloned assert g.formats("coo").is_pinned() # pin a pinned graph, directly return g.pin_memory_() assert g.is_pinned() assert g.device == F.cpu() # unpin a pinned graph g.unpin_memory_() assert not g.is_pinned() assert g.device == F.cpu() g1 = g.to(F.cuda()) # unpin an unpinned GPU graph, directly return g1.unpin_memory_() assert not g1.is_pinned() assert g1.device == F.cuda() # error pinning a GPU graph with pytest.raises(DGLError): g1.pin_memory_() # test pin empty homograph g2 = dgl.graph(([], [])) assert not g2.is_pinned() g2.pin_memory_() assert g2.is_pinned() g2.unpin_memory_() assert not g2.is_pinned() # test pin heterograph with 0 edge of one relation type g3 = dgl.heterograph( {("a", "b", "c"): ([0, 1], [1, 2]), ("c", "d", "c"): ([], [])} ).astype(idtype) g3.pin_memory_() assert g3.is_pinned() g3.unpin_memory_() assert not g3.is_pinned() @parametrize_idtype def test_convert_bound(idtype): def _test_bipartite_bound(data, card): with pytest.raises(DGLError): dgl.heterograph( {("_U", "_E", "_V"): data}, {"_U": card[0], "_V": card[1]}, idtype=idtype, device=F.ctx(), ) def _test_graph_bound(data, card): with pytest.raises(DGLError): dgl.graph(data, num_nodes=card, idtype=idtype, device=F.ctx()) _test_bipartite_bound(([1, 2], [1, 2]), (2, 3)) _test_bipartite_bound(([0, 1], [1, 4]), (2, 3)) _test_graph_bound(([1, 3], [1, 2]), 3) _test_graph_bound(([0, 1], [1, 3]), 3) @parametrize_idtype def test_convert(idtype): hg = create_test_heterograph(idtype) hs = [] for ntype in hg.ntypes: h = F.randn((hg.num_nodes(ntype), 5)) hg.nodes[ntype].data["h"] = h hs.append(h) hg.nodes["user"].data["x"] = F.randn((3, 3)) ws = [] for etype in hg.canonical_etypes: w = F.randn((hg.num_edges(etype), 5)) hg.edges[etype].data["w"] = w ws.append(w) hg.edges["plays"].data["x"] = F.randn((4, 3)) g = dgl.to_homogeneous(hg, ndata=["h"], edata=["w"]) assert g.idtype == idtype assert g.device == hg.device assert F.array_equal(F.cat(hs, dim=0), g.ndata["h"]) assert "x" not in g.ndata assert F.array_equal(F.cat(ws, dim=0), g.edata["w"]) assert "x" not in g.edata src, dst = g.all_edges(order="eid") src = F.asnumpy(src) dst = F.asnumpy(dst) etype_id, eid = F.asnumpy(g.edata[dgl.ETYPE]), F.asnumpy(g.edata[dgl.EID]) ntype_id, nid = F.asnumpy(g.ndata[dgl.NTYPE]), F.asnumpy(g.ndata[dgl.NID]) for i in range(g.num_edges()): srctype = hg.ntypes[ntype_id[src[i]]] dsttype = hg.ntypes[ntype_id[dst[i]]] etype = hg.etypes[etype_id[i]] src_i, dst_i = hg.find_edges([eid[i]], (srctype, etype, dsttype)) assert F.asnumpy(src_i).item() == nid[src[i]] assert F.asnumpy(dst_i).item() == nid[dst[i]] mg = nx.MultiDiGraph( [ ("user", "user", "follows"), ("user", "game", "plays"), ("user", "game", "wishes"), ("developer", "game", "develops"), ] ) for _mg in [None, mg]: hg2 = dgl.to_heterogeneous( g, hg.ntypes, hg.etypes, ntype_field=dgl.NTYPE, etype_field=dgl.ETYPE, metagraph=_mg, ) assert hg2.idtype == hg.idtype assert hg2.device == hg.device assert set(hg.ntypes) == set(hg2.ntypes) assert set(hg.canonical_etypes) == set(hg2.canonical_etypes) for ntype in hg.ntypes: assert hg.num_nodes(ntype) == hg2.num_nodes(ntype) assert F.array_equal( hg.nodes[ntype].data["h"], hg2.nodes[ntype].data["h"] ) for canonical_etype in hg.canonical_etypes: src, dst = hg.all_edges(etype=canonical_etype, order="eid") src2, dst2 = hg2.all_edges(etype=canonical_etype, order="eid") assert F.array_equal(src, src2) assert F.array_equal(dst, dst2) assert F.array_equal( hg.edges[canonical_etype].data["w"], hg2.edges[canonical_etype].data["w"], ) # hetero_from_homo test case 2 g = dgl.graph(([0, 1, 2, 0], [2, 2, 3, 3]), idtype=idtype, device=F.ctx()) g.ndata[dgl.NTYPE] = F.tensor([0, 0, 1, 2]) g.edata[dgl.ETYPE] = F.tensor([0, 0, 1, 2]) hg = dgl.to_heterogeneous(g, ["l0", "l1", "l2"], ["e0", "e1", "e2"]) assert hg.idtype == idtype assert hg.device == g.device assert set(hg.canonical_etypes) == set( [("l0", "e0", "l1"), ("l1", "e1", "l2"), ("l0", "e2", "l2")] ) assert hg.num_nodes("l0") == 2 assert hg.num_nodes("l1") == 1 assert hg.num_nodes("l2") == 1 assert hg.num_edges("e0") == 2 assert hg.num_edges("e1") == 1 assert hg.num_edges("e2") == 1 assert F.array_equal(hg.ndata[dgl.NID]["l0"], F.tensor([0, 1], F.int64)) assert F.array_equal(hg.ndata[dgl.NID]["l1"], F.tensor([2], F.int64)) assert F.array_equal(hg.ndata[dgl.NID]["l2"], F.tensor([3], F.int64)) assert F.array_equal( hg.edata[dgl.EID][("l0", "e0", "l1")], F.tensor([0, 1], F.int64) ) assert F.array_equal( hg.edata[dgl.EID][("l0", "e2", "l2")], F.tensor([3], F.int64) ) assert F.array_equal( hg.edata[dgl.EID][("l1", "e1", "l2")], F.tensor([2], F.int64) ) # hetero_from_homo test case 3 mg = nx.MultiDiGraph( [("user", "movie", "watches"), ("user", "TV", "watches")] ) g = dgl.graph(((0, 0), (1, 2)), idtype=idtype, device=F.ctx()) g.ndata[dgl.NTYPE] = F.tensor([0, 1, 2]) g.edata[dgl.ETYPE] = F.tensor([0, 0]) for _mg in [None, mg]: hg = dgl.to_heterogeneous( g, ["user", "TV", "movie"], ["watches"], metagraph=_mg ) assert hg.idtype == g.idtype assert hg.device == g.device assert set(hg.canonical_etypes) == set( [("user", "watches", "movie"), ("user", "watches", "TV")] ) assert hg.num_nodes("user") == 1 assert hg.num_nodes("TV") == 1 assert hg.num_nodes("movie") == 1 assert hg.num_edges(("user", "watches", "TV")) == 1 assert hg.num_edges(("user", "watches", "movie")) == 1 assert len(hg.etypes) == 2 # hetero_to_homo test case 2 hg = dgl.heterograph( {("_U", "_E", "_V"): ([0, 1], [0, 1])}, {"_U": 2, "_V": 3}, idtype=idtype, device=F.ctx(), ) g = dgl.to_homogeneous(hg) assert hg.idtype == g.idtype assert hg.device == g.device assert g.num_nodes() == 5 # hetero_to_subgraph_to_homo hg = dgl.heterograph( { ("user", "plays", "game"): ([0, 1, 1, 2], [0, 0, 2, 1]), ("user", "follows", "user"): ([0, 1, 1], [1, 2, 2]), }, idtype=idtype, device=F.ctx(), ) hg.nodes["user"].data["h"] = F.copy_to( F.tensor([[1, 0], [0, 1], [1, 1]], dtype=idtype), ctx=F.ctx() ) sg = dgl.node_subgraph(hg, {"user": [1, 2]}) assert len(sg.ntypes) == 2 assert len(sg.etypes) == 2 assert sg.num_nodes("user") == 2 assert sg.num_nodes("game") == 0 g = dgl.to_homogeneous(sg, ndata=["h"]) assert "h" in g.ndata.keys() assert g.num_nodes() == 2 @unittest.skipIf( F._default_context_str == "gpu", reason="Test on cpu is enough" ) @parametrize_idtype def test_to_homo_zero_nodes(idtype): # Fix gihub issue #2870 g = dgl.heterograph( { ("A", "AB", "B"): ( np.random.randint(0, 200, (1000,)), np.random.randint(0, 200, (1000,)), ), ("B", "BA", "A"): ( np.random.randint(0, 200, (1000,)), np.random.randint(0, 200, (1000,)), ), }, num_nodes_dict={"A": 200, "B": 200, "C": 0}, idtype=idtype, ) g.nodes["A"].data["x"] = F.randn((200, 3)) g.nodes["B"].data["x"] = F.randn((200, 3)) gg = dgl.to_homogeneous(g, ["x"]) assert "x" in gg.ndata @parametrize_idtype def test_to_homo2(idtype): # test the result homogeneous graph has nodes and edges sorted by their types hg = create_test_heterograph(idtype) g = dgl.to_homogeneous(hg) ntypes = F.asnumpy(g.ndata[dgl.NTYPE]) etypes = F.asnumpy(g.edata[dgl.ETYPE]) p = 0 for tid, ntype in enumerate(hg.ntypes): num_nodes = hg.num_nodes(ntype) for i in range(p, p + num_nodes): assert ntypes[i] == tid p += num_nodes p = 0 for tid, etype in enumerate(hg.canonical_etypes): num_edges = hg.num_edges(etype) for i in range(p, p + num_edges): assert etypes[i] == tid p += num_edges # test store_type=False g = dgl.to_homogeneous(hg, store_type=False) assert dgl.NTYPE not in g.ndata assert dgl.ETYPE not in g.edata # test return_count=True g, ntype_count, etype_count = dgl.to_homogeneous(hg, return_count=True) for i, count in enumerate(ntype_count): assert count == hg.num_nodes(hg.ntypes[i]) for i, count in enumerate(etype_count): assert count == hg.num_edges(hg.canonical_etypes[i]) @parametrize_idtype def test_invertible_conversion(idtype): # Test whether to_homogeneous and to_heterogeneous are invertible hg = create_test_heterograph(idtype) g = dgl.to_homogeneous(hg) hg2 = dgl.to_heterogeneous(g, hg.ntypes, hg.etypes) assert_is_identical_hetero(hg, hg2, True) @parametrize_idtype def test_metagraph_reachable(idtype): g = create_test_heterograph(idtype) x = F.randn((3, 5)) g.nodes["user"].data["h"] = x new_g = dgl.metapath_reachable_graph(g, ["follows", "plays"]) assert new_g.idtype == idtype assert new_g.ntypes == ["game", "user"] assert new_g.num_edges() == 3 assert F.asnumpy(new_g.has_edges_between([0, 0, 1], [0, 1, 1])).all() new_g = dgl.metapath_reachable_graph(g, ["follows"]) assert new_g.idtype == idtype assert new_g.ntypes == ["user"] assert new_g.num_edges() == 2 assert F.asnumpy(new_g.has_edges_between([0, 1], [1, 2])).all() @unittest.skipIf( dgl.backend.backend_name == "mxnet", reason="MXNet doesn't support bool tensor", ) @parametrize_idtype def test_subgraph_mask(idtype): g = create_test_heterograph(idtype) g_graph = g["follows"] g_bipartite = g["plays"] x = F.randn((3, 5)) y = F.randn((2, 4)) g.nodes["user"].data["h"] = x g.edges["follows"].data["h"] = y def _check_subgraph(g, sg): assert sg.idtype == g.idtype assert sg.device == g.device assert sg.ntypes == g.ntypes assert sg.etypes == g.etypes assert sg.canonical_etypes == g.canonical_etypes assert F.array_equal( F.tensor(sg.nodes["user"].data[dgl.NID]), F.tensor([1, 2], idtype) ) assert F.array_equal( F.tensor(sg.nodes["game"].data[dgl.NID]), F.tensor([0], idtype) ) assert F.array_equal( F.tensor(sg.edges["follows"].data[dgl.EID]), F.tensor([1], idtype) ) assert F.array_equal( F.tensor(sg.edges["plays"].data[dgl.EID]), F.tensor([1], idtype) ) assert F.array_equal( F.tensor(sg.edges["wishes"].data[dgl.EID]), F.tensor([1], idtype) ) assert sg.num_nodes("developer") == 0 assert sg.num_edges("develops") == 0 assert F.array_equal( sg.nodes["user"].data["h"], g.nodes["user"].data["h"][1:3] ) assert F.array_equal( sg.edges["follows"].data["h"], g.edges["follows"].data["h"][1:2] ) sg1 = g.subgraph( { "user": F.tensor([False, True, True], dtype=F.bool), "game": F.tensor([True, False, False, False], dtype=F.bool), } ) _check_subgraph(g, sg1) if F._default_context_str != "gpu": # TODO(minjie): enable this later sg2 = g.edge_subgraph( { "follows": F.tensor([False, True], dtype=F.bool), "plays": F.tensor([False, True, False, False], dtype=F.bool), "wishes": F.tensor([False, True], dtype=F.bool), } ) _check_subgraph(g, sg2) @parametrize_idtype def test_subgraph(idtype): g = create_test_heterograph(idtype) g_graph = g["follows"] g_bipartite = g["plays"] x = F.randn((3, 5)) y = F.randn((2, 4)) g.nodes["user"].data["h"] = x g.edges["follows"].data["h"] = y def _check_subgraph(g, sg): assert sg.idtype == g.idtype assert sg.device == g.device assert sg.ntypes == g.ntypes assert sg.etypes == g.etypes assert sg.canonical_etypes == g.canonical_etypes assert F.array_equal( F.tensor(sg.nodes["user"].data[dgl.NID]), F.tensor([1, 2], g.idtype) ) assert F.array_equal( F.tensor(sg.nodes["game"].data[dgl.NID]), F.tensor([0], g.idtype) ) assert F.array_equal( F.tensor(sg.edges["follows"].data[dgl.EID]), F.tensor([1], g.idtype) ) assert F.array_equal( F.tensor(sg.edges["plays"].data[dgl.EID]), F.tensor([1], g.idtype) ) assert F.array_equal( F.tensor(sg.edges["wishes"].data[dgl.EID]), F.tensor([1], g.idtype) ) assert sg.num_nodes("developer") == 0 assert sg.num_edges("develops") == 0 assert F.array_equal( sg.nodes["user"].data["h"], g.nodes["user"].data["h"][1:3] ) assert F.array_equal( sg.edges["follows"].data["h"], g.edges["follows"].data["h"][1:2] ) sg1 = g.subgraph({"user": [1, 2], "game": [0]}) _check_subgraph(g, sg1) if F._default_context_str != "gpu": # TODO(minjie): enable this later sg2 = g.edge_subgraph({"follows": [1], "plays": [1], "wishes": [1]}) _check_subgraph(g, sg2) # backend tensor input sg1 = g.subgraph( { "user": F.tensor([1, 2], dtype=idtype), "game": F.tensor([0], dtype=idtype), } ) _check_subgraph(g, sg1) if F._default_context_str != "gpu": # TODO(minjie): enable this later sg2 = g.edge_subgraph( { "follows": F.tensor([1], dtype=idtype), "plays": F.tensor([1], dtype=idtype), "wishes": F.tensor([1], dtype=idtype), } ) _check_subgraph(g, sg2) # numpy input sg1 = g.subgraph({"user": np.array([1, 2]), "game": np.array([0])}) _check_subgraph(g, sg1) if F._default_context_str != "gpu": # TODO(minjie): enable this later sg2 = g.edge_subgraph( { "follows": np.array([1]), "plays": np.array([1]), "wishes": np.array([1]), } ) _check_subgraph(g, sg2) def _check_subgraph_single_ntype(g, sg, preserve_nodes=False): assert sg.idtype == g.idtype assert sg.device == g.device assert sg.ntypes == g.ntypes assert sg.etypes == g.etypes assert sg.canonical_etypes == g.canonical_etypes if not preserve_nodes: assert F.array_equal( F.tensor(sg.nodes["user"].data[dgl.NID]), F.tensor([1, 2], g.idtype), ) else: for ntype in sg.ntypes: assert g.num_nodes(ntype) == sg.num_nodes(ntype) assert F.array_equal( F.tensor(sg.edges["follows"].data[dgl.EID]), F.tensor([1], g.idtype) ) if not preserve_nodes: assert F.array_equal( sg.nodes["user"].data["h"], g.nodes["user"].data["h"][1:3] ) assert F.array_equal( sg.edges["follows"].data["h"], g.edges["follows"].data["h"][1:2] ) def _check_subgraph_single_etype(g, sg, preserve_nodes=False): assert sg.ntypes == g.ntypes assert sg.etypes == g.etypes assert sg.canonical_etypes == g.canonical_etypes if not preserve_nodes: assert F.array_equal( F.tensor(sg.nodes["user"].data[dgl.NID]), F.tensor([0, 1], g.idtype), ) assert F.array_equal( F.tensor(sg.nodes["game"].data[dgl.NID]), F.tensor([0], g.idtype), ) else: for ntype in sg.ntypes: assert g.num_nodes(ntype) == sg.num_nodes(ntype) assert F.array_equal( F.tensor(sg.edges["plays"].data[dgl.EID]), F.tensor([0, 1], g.idtype), ) sg1_graph = g_graph.subgraph([1, 2]) _check_subgraph_single_ntype(g_graph, sg1_graph) if F._default_context_str != "gpu": # TODO(minjie): enable this later sg1_graph = g_graph.edge_subgraph([1]) _check_subgraph_single_ntype(g_graph, sg1_graph) sg1_graph = g_graph.edge_subgraph([1], relabel_nodes=False) _check_subgraph_single_ntype(g_graph, sg1_graph, True) sg2_bipartite = g_bipartite.edge_subgraph([0, 1]) _check_subgraph_single_etype(g_bipartite, sg2_bipartite) sg2_bipartite = g_bipartite.edge_subgraph([0, 1], relabel_nodes=False) _check_subgraph_single_etype(g_bipartite, sg2_bipartite, True) def _check_typed_subgraph1(g, sg): assert g.idtype == sg.idtype assert g.device == sg.device assert set(sg.ntypes) == {"user", "game"} assert set(sg.etypes) == {"follows", "plays", "wishes"} for ntype in sg.ntypes: assert sg.num_nodes(ntype) == g.num_nodes(ntype) for etype in sg.etypes: src_sg, dst_sg = sg.all_edges(etype=etype, order="eid") src_g, dst_g = g.all_edges(etype=etype, order="eid") assert F.array_equal(src_sg, src_g) assert F.array_equal(dst_sg, dst_g) assert F.array_equal( sg.nodes["user"].data["h"], g.nodes["user"].data["h"] ) assert F.array_equal( sg.edges["follows"].data["h"], g.edges["follows"].data["h"] ) g.nodes["user"].data["h"] = F.scatter_row( g.nodes["user"].data["h"], F.tensor([2]), F.randn((1, 5)) ) g.edges["follows"].data["h"] = F.scatter_row( g.edges["follows"].data["h"], F.tensor([1]), F.randn((1, 4)) ) assert F.array_equal( sg.nodes["user"].data["h"], g.nodes["user"].data["h"] ) assert F.array_equal( sg.edges["follows"].data["h"], g.edges["follows"].data["h"] ) def _check_typed_subgraph2(g, sg): assert set(sg.ntypes) == {"developer", "game"} assert set(sg.etypes) == {"develops"} for ntype in sg.ntypes: assert sg.num_nodes(ntype) == g.num_nodes(ntype) for etype in sg.etypes: src_sg, dst_sg = sg.all_edges(etype=etype, order="eid") src_g, dst_g = g.all_edges(etype=etype, order="eid") assert F.array_equal(src_sg, src_g) assert F.array_equal(dst_sg, dst_g) sg3 = g.node_type_subgraph(["user", "game"]) _check_typed_subgraph1(g, sg3) sg4 = g.edge_type_subgraph(["develops"]) _check_typed_subgraph2(g, sg4) sg5 = g.edge_type_subgraph(["follows", "plays", "wishes"]) _check_typed_subgraph1(g, sg5) @parametrize_idtype def test_apply(idtype): def node_udf(nodes): return {"h": nodes.data["h"] * 2} def node_udf2(nodes): return {"h": F.sum(nodes.data["h"], dim=1, keepdims=True)} def edge_udf(edges): return {"h": edges.data["h"] * 2 + edges.src["h"]} g = create_test_heterograph(idtype) g.nodes["user"].data["h"] = F.ones((3, 5)) g.apply_nodes(node_udf, ntype="user") assert F.array_equal(g.nodes["user"].data["h"], F.ones((3, 5)) * 2) g["plays"].edata["h"] = F.ones((4, 5)) g.apply_edges(edge_udf, etype=("user", "plays", "game")) assert F.array_equal(g["plays"].edata["h"], F.ones((4, 5)) * 4) # test apply on graph with only one type g["follows"].apply_nodes(node_udf) assert F.array_equal(g.nodes["user"].data["h"], F.ones((3, 5)) * 4) g["plays"].apply_edges(edge_udf) assert F.array_equal(g["plays"].edata["h"], F.ones((4, 5)) * 12) # Test the case that feature size changes g.nodes["user"].data["h"] = F.ones((3, 5)) g.apply_nodes(node_udf2, ntype="user") assert F.array_equal(g.nodes["user"].data["h"], F.ones((3, 1)) * 5) # test fail case # fail due to multiple types with pytest.raises(DGLError): g.apply_nodes(node_udf) with pytest.raises(DGLError): g.apply_edges(edge_udf) @parametrize_idtype def test_level2(idtype): # edges = { # 'follows': ([0, 1], [1, 2]), # 'plays': ([0, 1, 2, 1], [0, 0, 1, 1]), # 'wishes': ([0, 2], [1, 0]), # 'develops': ([0, 1], [0, 1]), # } g = create_test_heterograph(idtype) def rfunc(nodes): return {"y": F.sum(nodes.mailbox["m"], 1)} def rfunc2(nodes): return {"y": F.max(nodes.mailbox["m"], 1)} def mfunc(edges): return {"m": edges.src["h"]} def afunc(nodes): return {"y": nodes.data["y"] + 1} ############################################################# # send_and_recv ############################################################# g.nodes["user"].data["h"] = F.ones((3, 2)) g.send_and_recv([2, 3], mfunc, rfunc, etype="plays") y = g.nodes["game"].data["y"] assert F.array_equal(y, F.tensor([[0.0, 0.0], [2.0, 2.0]])) # only one type g["plays"].send_and_recv([2, 3], mfunc, rfunc) y = g.nodes["game"].data["y"] assert F.array_equal(y, F.tensor([[0.0, 0.0], [2.0, 2.0]])) # test fail case # fail due to multiple types with pytest.raises(DGLError): g.send_and_recv([2, 3], mfunc, rfunc) g.nodes["game"].data.clear() ############################################################# # pull ############################################################# g.nodes["user"].data["h"] = F.ones((3, 2)) g.pull(1, mfunc, rfunc, etype="plays") y = g.nodes["game"].data["y"] assert F.array_equal(y, F.tensor([[0.0, 0.0], [2.0, 2.0]])) # only one type g["plays"].pull(1, mfunc, rfunc) y = g.nodes["game"].data["y"] assert F.array_equal(y, F.tensor([[0.0, 0.0], [2.0, 2.0]])) # test fail case with pytest.raises(DGLError): g.pull(1, mfunc, rfunc) g.nodes["game"].data.clear() ############################################################# # update_all ############################################################# g.nodes["user"].data["h"] = F.ones((3, 2)) g.update_all(mfunc, rfunc, etype="plays") y = g.nodes["game"].data["y"] assert F.array_equal(y, F.tensor([[2.0, 2.0], [2.0, 2.0]])) # only one type g["plays"].update_all(mfunc, rfunc) y = g.nodes["game"].data["y"] assert F.array_equal(y, F.tensor([[2.0, 2.0], [2.0, 2.0]])) # test fail case # fail due to multiple types with pytest.raises(DGLError): g.update_all(mfunc, rfunc) # test multi g.multi_update_all( {"plays": (mfunc, rfunc), ("user", "wishes", "game"): (mfunc, rfunc2)}, "sum", ) assert F.array_equal( g.nodes["game"].data["y"], F.tensor([[3.0, 3.0], [3.0, 3.0]]) ) # test multi g.multi_update_all( { "plays": (mfunc, rfunc, afunc), ("user", "wishes", "game"): (mfunc, rfunc2), }, "sum", afunc, ) assert F.array_equal( g.nodes["game"].data["y"], F.tensor([[5.0, 5.0], [5.0, 5.0]]) ) # test cross reducer g.nodes["user"].data["h"] = F.randn((3, 2)) for cred in ["sum", "max", "min", "mean", "stack"]: g.multi_update_all( {"plays": (mfunc, rfunc, afunc), "wishes": (mfunc, rfunc2)}, cred, afunc, ) y = g.nodes["game"].data["y"] g["plays"].update_all(mfunc, rfunc, afunc) y1 = g.nodes["game"].data["y"] g["wishes"].update_all(mfunc, rfunc2) y2 = g.nodes["game"].data["y"] if cred == "stack": # stack has an internal order by edge type id yy = F.stack([y1, y2], 1) yy = yy + 1 # final afunc assert F.array_equal(y, yy) else: yy = get_redfn(cred)(F.stack([y1, y2], 0), 0) yy = yy + 1 # final afunc assert F.array_equal(y, yy) # test fail case # fail because cannot infer ntype with pytest.raises(DGLError): g.update_all( {"plays": (mfunc, rfunc), "follows": (mfunc, rfunc2)}, "sum" ) g.nodes["game"].data.clear() @parametrize_idtype @unittest.skipIf( F._default_context_str == "cpu", reason="Need gpu for this test" ) def test_more_nnz(idtype): g = dgl.graph( ([0, 0, 0, 0, 0], [1, 1, 1, 1, 1]), idtype=idtype, device=F.ctx() ) g.ndata["x"] = F.copy_to(F.ones((2, 5)), ctx=F.ctx()) g.update_all(fn.copy_u("x", "m"), fn.sum("m", "y")) y = g.ndata["y"] ans = np.zeros((2, 5)) ans[1] = 5 ans = F.copy_to(F.tensor(ans, dtype=F.dtype(y)), ctx=F.ctx()) assert F.array_equal(y, ans) @parametrize_idtype def test_updates(idtype): def msg_func(edges): return {"m": edges.src["h"]} def reduce_func(nodes): return {"y": F.sum(nodes.mailbox["m"], 1)} def apply_func(nodes): return {"y": nodes.data["y"] * 2} g = create_test_heterograph(idtype) x = F.randn((3, 5)) g.nodes["user"].data["h"] = x for msg, red, apply in itertools.product( [fn.copy_u("h", "m"), msg_func], [fn.sum("m", "y"), reduce_func], [None, apply_func], ): multiplier = 1 if apply is None else 2 g["user", "plays", "game"].update_all(msg, red, apply) y = g.nodes["game"].data["y"] assert F.array_equal(y[0], (x[0] + x[1]) * multiplier) assert F.array_equal(y[1], (x[1] + x[2]) * multiplier) del g.nodes["game"].data["y"] g["user", "plays", "game"].send_and_recv( ([0, 1, 2], [0, 1, 1]), msg, red, apply ) y = g.nodes["game"].data["y"] assert F.array_equal(y[0], x[0] * multiplier) assert F.array_equal(y[1], (x[1] + x[2]) * multiplier) del g.nodes["game"].data["y"] # pulls from destination (game) node 0 g["user", "plays", "game"].pull(0, msg, red, apply) y = g.nodes["game"].data["y"] assert F.array_equal(y[0], (x[0] + x[1]) * multiplier) del g.nodes["game"].data["y"] # pushes from source (user) node 0 g["user", "plays", "game"].push(0, msg, red, apply) y = g.nodes["game"].data["y"] assert F.array_equal(y[0], x[0] * multiplier) del g.nodes["game"].data["y"] @parametrize_idtype def test_backward(idtype): g = create_test_heterograph(idtype) x = F.randn((3, 5)) F.attach_grad(x) g.nodes["user"].data["h"] = x with F.record_grad(): g.multi_update_all( { "plays": (fn.copy_u("h", "m"), fn.sum("m", "y")), "wishes": (fn.copy_u("h", "m"), fn.sum("m", "y")), }, "sum", ) y = g.nodes["game"].data["y"] F.backward(y, F.ones(y.shape)) print(F.grad(x)) assert F.array_equal( F.grad(x), F.tensor( [ [2.0, 2.0, 2.0, 2.0, 2.0], [2.0, 2.0, 2.0, 2.0, 2.0], [2.0, 2.0, 2.0, 2.0, 2.0], ] ), ) @parametrize_idtype def test_empty_heterograph(idtype): def assert_empty(g): assert g.num_nodes("user") == 0 assert g.num_edges("plays") == 0 assert g.num_nodes("game") == 0 # empty src-dst pair assert_empty(dgl.heterograph({("user", "plays", "game"): ([], [])})) g = dgl.heterograph( {("user", "follows", "user"): ([], [])}, idtype=idtype, device=F.ctx() ) assert g.idtype == idtype assert g.device == F.ctx() assert g.num_nodes("user") == 0 assert g.num_edges("follows") == 0 # empty relation graph with others g = dgl.heterograph( { ("user", "plays", "game"): ([], []), ("developer", "develops", "game"): ([0, 1], [0, 1]), }, idtype=idtype, device=F.ctx(), ) assert g.idtype == idtype assert g.device == F.ctx() assert g.num_nodes("user") == 0 assert g.num_edges("plays") == 0 assert g.num_nodes("game") == 2 assert g.num_edges("develops") == 2 assert g.num_nodes("developer") == 2 @parametrize_idtype def test_types_in_function(idtype): def mfunc1(edges): assert edges.canonical_etype == ("user", "follow", "user") return {} def rfunc1(nodes): assert nodes.ntype == "user" return {} def filter_nodes1(nodes): assert nodes.ntype == "user" return F.zeros((3,)) def filter_edges1(edges): assert edges.canonical_etype == ("user", "follow", "user") return F.zeros((2,)) def mfunc2(edges): assert edges.canonical_etype == ("user", "plays", "game") return {} def rfunc2(nodes): assert nodes.ntype == "game" return {} def filter_nodes2(nodes): assert nodes.ntype == "game" return F.zeros((3,)) def filter_edges2(edges): assert edges.canonical_etype == ("user", "plays", "game") return F.zeros((2,)) g = dgl.heterograph( {("user", "follow", "user"): ((0, 1), (1, 2))}, idtype=idtype, device=F.ctx(), ) g.apply_nodes(rfunc1) g.apply_edges(mfunc1) g.update_all(mfunc1, rfunc1) g.send_and_recv([0, 1], mfunc1, rfunc1) g.push([0], mfunc1, rfunc1) g.pull([1], mfunc1, rfunc1) g.filter_nodes(filter_nodes1) g.filter_edges(filter_edges1) g = dgl.heterograph( {("user", "plays", "game"): ([0, 1], [1, 2])}, idtype=idtype, device=F.ctx(), ) g.apply_nodes(rfunc2, ntype="game") g.apply_edges(mfunc2) g.update_all(mfunc2, rfunc2) g.send_and_recv([0, 1], mfunc2, rfunc2) g.push([0], mfunc2, rfunc2) g.pull([1], mfunc2, rfunc2) g.filter_nodes(filter_nodes2, ntype="game") g.filter_edges(filter_edges2) @parametrize_idtype def test_stack_reduce(idtype): # edges = { # 'follows': ([0, 1], [1, 2]), # 'plays': ([0, 1, 2, 1], [0, 0, 1, 1]), # 'wishes': ([0, 2], [1, 0]), # 'develops': ([0, 1], [0, 1]), # } g = create_test_heterograph(idtype) g.nodes["user"].data["h"] = F.randn((3, 200)) def rfunc(nodes): return {"y": F.sum(nodes.mailbox["m"], 1)} def rfunc2(nodes): return {"y": F.max(nodes.mailbox["m"], 1)} def mfunc(edges): return {"m": edges.src["h"]} g.multi_update_all( {"plays": (mfunc, rfunc), "wishes": (mfunc, rfunc2)}, "stack" ) assert g.nodes["game"].data["y"].shape == ( g.num_nodes("game"), 2, 200, ) # only one type-wise update_all, stack still adds one dimension g.multi_update_all({"plays": (mfunc, rfunc)}, "stack") assert g.nodes["game"].data["y"].shape == ( g.num_nodes("game"), 1, 200, ) @parametrize_idtype def test_isolated_ntype(idtype): g = dgl.heterograph( {("A", "AB", "B"): ([0, 1, 2], [1, 2, 3])}, num_nodes_dict={"A": 3, "B": 4, "C": 4}, idtype=idtype, device=F.ctx(), ) assert g.num_nodes("A") == 3 assert g.num_nodes("B") == 4 assert g.num_nodes("C") == 4 g = dgl.heterograph( {("A", "AC", "C"): ([0, 1, 2], [1, 2, 3])}, num_nodes_dict={"A": 3, "B": 4, "C": 4}, idtype=idtype, device=F.ctx(), ) assert g.num_nodes("A") == 3 assert g.num_nodes("B") == 4 assert g.num_nodes("C") == 4 G = dgl.graph( ([0, 1, 2], [4, 5, 6]), num_nodes=11, idtype=idtype, device=F.ctx() ) G.ndata[dgl.NTYPE] = F.tensor( [0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2], dtype=F.int64 ) G.edata[dgl.ETYPE] = F.tensor([0, 0, 0], dtype=F.int64) g = dgl.to_heterogeneous(G, ["A", "B", "C"], ["AB"]) assert g.num_nodes("A") == 3 assert g.num_nodes("B") == 4 assert g.num_nodes("C") == 4 @parametrize_idtype def test_ismultigraph(idtype): g1 = dgl.heterograph( {("A", "AB", "B"): ([0, 0, 1, 2], [1, 2, 5, 5])}, {"A": 6, "B": 6}, idtype=idtype, device=F.ctx(), ) assert g1.is_multigraph == False g2 = dgl.heterograph( {("A", "AC", "C"): ([0, 0, 0, 1], [1, 1, 2, 5])}, {"A": 6, "C": 6}, idtype=idtype, device=F.ctx(), ) assert g2.is_multigraph == True g3 = dgl.graph(((0, 1), (1, 2)), num_nodes=6, idtype=idtype, device=F.ctx()) assert g3.is_multigraph == False g4 = dgl.graph( ([0, 0, 1], [1, 1, 2]), num_nodes=6, idtype=idtype, device=F.ctx() ) assert g4.is_multigraph == True g = dgl.heterograph( { ("A", "AB", "B"): ([0, 0, 1, 2], [1, 2, 5, 5]), ("A", "AA", "A"): ([0, 1], [1, 2]), }, {"A": 6, "B": 6}, idtype=idtype, device=F.ctx(), ) assert g.is_multigraph == False g = dgl.heterograph( { ("A", "AB", "B"): ([0, 0, 1, 2], [1, 2, 5, 5]), ("A", "AC", "C"): ([0, 0, 0, 1], [1, 1, 2, 5]), }, {"A": 6, "B": 6, "C": 6}, idtype=idtype, device=F.ctx(), ) assert g.is_multigraph == True g = dgl.heterograph( { ("A", "AB", "B"): ([0, 0, 1, 2], [1, 2, 5, 5]), ("A", "AA", "A"): ([0, 0, 1], [1, 1, 2]), }, {"A": 6, "B": 6}, idtype=idtype, device=F.ctx(), ) assert g.is_multigraph == True g = dgl.heterograph( { ("A", "AC", "C"): ([0, 0, 0, 1], [1, 1, 2, 5]), ("A", "AA", "A"): ([0, 1], [1, 2]), }, {"A": 6, "C": 6}, idtype=idtype, device=F.ctx(), ) assert g.is_multigraph == True @parametrize_idtype def test_graph_index_is_unibipartite(idtype): g1 = dgl.heterograph( {("A", "AB", "B"): ([0, 0, 1], [1, 2, 5])}, idtype=idtype, device=F.ctx(), ) assert g1._graph.is_metagraph_unibipartite() # more complicated bipartite g2 = dgl.heterograph( { ("A", "AB", "B"): ([0, 0, 1], [1, 2, 5]), ("A", "AC", "C"): ([1, 0], [0, 0]), }, idtype=idtype, device=F.ctx(), ) assert g2._graph.is_metagraph_unibipartite() g3 = dgl.heterograph( { ("A", "AB", "B"): ([0, 0, 1], [1, 2, 5]), ("A", "AC", "C"): ([1, 0], [0, 0]), ("A", "AA", "A"): ([0, 1], [0, 1]), }, idtype=idtype, device=F.ctx(), ) assert not g3._graph.is_metagraph_unibipartite() g4 = dgl.heterograph( { ("A", "AB", "B"): ([0, 0, 1], [1, 2, 5]), ("C", "CA", "A"): ([1, 0], [0, 0]), }, idtype=idtype, device=F.ctx(), ) assert not g4._graph.is_metagraph_unibipartite() @parametrize_idtype def test_bipartite(idtype): g1 = dgl.heterograph( {("A", "AB", "B"): ([0, 0, 1], [1, 2, 5])}, idtype=idtype, device=F.ctx(), ) assert g1.is_unibipartite assert len(g1.ntypes) == 2 assert g1.etypes == ["AB"] assert g1.srctypes == ["A"] assert g1.dsttypes == ["B"] assert g1.num_nodes("A") == 2 assert g1.num_nodes("B") == 6 assert g1.number_of_src_nodes("A") == 2 assert g1.number_of_src_nodes() == 2 assert g1.number_of_dst_nodes("B") == 6 assert g1.number_of_dst_nodes() == 6 assert g1.num_edges() == 3 g1.srcdata["h"] = F.randn((2, 5)) assert F.array_equal(g1.srcnodes["A"].data["h"], g1.srcdata["h"]) assert F.array_equal(g1.nodes["A"].data["h"], g1.srcdata["h"]) assert F.array_equal(g1.nodes["SRC/A"].data["h"], g1.srcdata["h"]) g1.dstdata["h"] = F.randn((6, 3)) assert F.array_equal(g1.dstnodes["B"].data["h"], g1.dstdata["h"]) assert F.array_equal(g1.nodes["B"].data["h"], g1.dstdata["h"]) assert F.array_equal(g1.nodes["DST/B"].data["h"], g1.dstdata["h"]) # more complicated bipartite g2 = dgl.heterograph( { ("A", "AB", "B"): ([0, 0, 1], [1, 2, 5]), ("A", "AC", "C"): ([1, 0], [0, 0]), }, idtype=idtype, device=F.ctx(), ) assert g2.is_unibipartite assert g2.srctypes == ["A"] assert set(g2.dsttypes) == {"B", "C"} assert g2.num_nodes("A") == 2 assert g2.num_nodes("B") == 6 assert g2.num_nodes("C") == 1 assert g2.number_of_src_nodes("A") == 2 assert g2.number_of_src_nodes() == 2 assert g2.number_of_dst_nodes("B") == 6 assert g2.number_of_dst_nodes("C") == 1 g2.srcdata["h"] = F.randn((2, 5)) assert F.array_equal(g2.srcnodes["A"].data["h"], g2.srcdata["h"]) assert F.array_equal(g2.nodes["A"].data["h"], g2.srcdata["h"]) assert F.array_equal(g2.nodes["SRC/A"].data["h"], g2.srcdata["h"]) g3 = dgl.heterograph( { ("A", "AB", "B"): ([0, 0, 1], [1, 2, 5]), ("A", "AC", "C"): ([1, 0], [0, 0]), ("A", "AA", "A"): ([0, 1], [0, 1]), }, idtype=idtype, device=F.ctx(), ) assert not g3.is_unibipartite g4 = dgl.heterograph( { ("A", "AB", "B"): ([0, 0, 1], [1, 2, 5]), ("C", "CA", "A"): ([1, 0], [0, 0]), }, idtype=idtype, device=F.ctx(), ) assert not g4.is_unibipartite @parametrize_idtype def test_dtype_cast(idtype): g = dgl.graph(([0, 1, 0, 2], [0, 1, 1, 0]), idtype=idtype, device=F.ctx()) assert g.idtype == idtype g.ndata["feat"] = F.tensor([3, 4, 5]) g.edata["h"] = F.tensor([3, 4, 5, 6]) if idtype == "int32": g_cast = g.long() assert g_cast.idtype == F.int64 else: g_cast = g.int() assert g_cast.idtype == F.int32 check_graph_equal(g, g_cast, check_idtype=False) def test_float_cast(): for t in [F.bfloat16, F.float16, F.float32, F.float64]: idtype = F.int32 g = dgl.heterograph( { ("user", "follows", "user"): ( F.tensor([0, 1, 1, 2, 2, 3], dtype=idtype), F.tensor([0, 0, 1, 1, 2, 2], dtype=idtype), ), ("user", "plays", "game"): ( F.tensor([0, 1, 1], dtype=idtype), F.tensor([0, 0, 1], dtype=idtype), ), }, idtype=idtype, device=F.ctx(), ) uvalues = [1, 2, 3, 4] gvalues = [5, 6] fvalues = [7, 8, 9, 10, 11, 12] pvalues = [13, 14, 15] dataNamesTypes = [ ("a", F.float16), ("b", F.float32), ("c", F.float64), ("d", F.int32), ("e", F.int64), ("f", F.bfloat16), ] for name, type in dataNamesTypes: g.nodes["user"].data[name] = F.copy_to( F.tensor(uvalues, dtype=type), ctx=F.ctx() ) for name, type in dataNamesTypes: g.nodes["game"].data[name] = F.copy_to( F.tensor(gvalues, dtype=type), ctx=F.ctx() ) for name, type in dataNamesTypes: g.edges["follows"].data[name] = F.copy_to( F.tensor(fvalues, dtype=type), ctx=F.ctx() ) for name, type in dataNamesTypes: g.edges["plays"].data[name] = F.copy_to( F.tensor(pvalues, dtype=type), ctx=F.ctx() ) if t == F.bfloat16: g = dgl.transforms.functional.to_bfloat16(g) if t == F.float16: g = dgl.transforms.functional.to_half(g) if t == F.float32: g = dgl.transforms.functional.to_float(g) if t == F.float64: g = dgl.transforms.functional.to_double(g) for name, origType in dataNamesTypes: # integer tensors shouldn't be converted reqType = ( t if (origType in [F.bfloat16, F.float16, F.float32, F.float64]) else origType ) values = g.nodes["user"].data[name] assert values.dtype == reqType assert len(values) == len(uvalues) assert F.allclose(values, F.tensor(uvalues), 0, 0) values = g.nodes["game"].data[name] assert values.dtype == reqType assert len(values) == len(gvalues) assert F.allclose(values, F.tensor(gvalues), 0, 0) values = g.edges["follows"].data[name] assert values.dtype == reqType assert len(values) == len(fvalues) assert F.allclose(values, F.tensor(fvalues), 0, 0) values = g.edges["plays"].data[name] assert values.dtype == reqType assert len(values) == len(pvalues) assert F.allclose(values, F.tensor(pvalues), 0, 0) @parametrize_idtype def test_format(idtype): # single relation g = dgl.graph(([0, 1, 0, 2], [0, 1, 1, 0]), idtype=idtype, device=F.ctx()) assert g.formats()["created"] == ["coo"] g1 = g.formats(["coo", "csr", "csc"]) assert len(g1.formats()["created"]) + len(g1.formats()["not created"]) == 3 g1.create_formats_() assert len(g1.formats()["created"]) == 3 assert g.formats()["created"] == ["coo"] # multiple relation g = dgl.heterograph( { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "plays", "game"): ([0, 1, 1, 2], [0, 0, 1, 1]), ("developer", "develops", "game"): ([0, 1], [0, 1]), }, idtype=idtype, device=F.ctx(), ) user_feat = F.randn((g["follows"].number_of_src_nodes(), 5)) g["follows"].srcdata["h"] = user_feat g1 = g.formats("csc") # test frame assert F.array_equal(g1["follows"].srcdata["h"], user_feat) # test each relation graph assert g1.formats()["created"] == ["csc"] assert len(g1.formats()["not created"]) == 0 # in_degrees g = dgl.rand_graph(100, 2340).to(F.ctx()) ind_arr = [] for vid in range(0, 100): ind_arr.append(g.in_degrees(vid)) in_degrees = g.in_degrees() g = g.formats("coo") for vid in range(0, 100): assert g.in_degrees(vid) == ind_arr[vid] assert F.array_equal(in_degrees, g.in_degrees()) @parametrize_idtype def test_edges_order(idtype): # (0, 2), (1, 2), (0, 1), (0, 1), (2, 1) g = dgl.graph( (np.array([0, 1, 0, 0, 2]), np.array([2, 2, 1, 1, 1])), idtype=idtype, device=F.ctx(), ) print(g.formats()) src, dst = g.all_edges(order="srcdst") assert F.array_equal(src, F.tensor([0, 0, 0, 1, 2], dtype=idtype)) assert F.array_equal(dst, F.tensor([1, 1, 2, 2, 1], dtype=idtype)) @parametrize_idtype def test_reverse(idtype): g = dgl.heterograph( { ("user", "follows", "user"): ( [0, 1, 2, 4, 3, 1, 3], [1, 2, 3, 2, 0, 0, 1], ) }, idtype=idtype, device=F.ctx(), ) gidx = g._graph r_gidx = gidx.reverse() assert gidx.num_nodes(0) == r_gidx.num_nodes(0) assert gidx.num_edges(0) == r_gidx.num_edges(0) g_s, g_d, _ = gidx.edges(0) rg_s, rg_d, _ = r_gidx.edges(0) assert F.array_equal(g_s, rg_d) assert F.array_equal(g_d, rg_s) # force to start with 'csr' gidx = gidx.formats("csr") gidx = gidx.formats(["coo", "csr", "csc"]) r_gidx = gidx.reverse() assert "csr" in gidx.formats()["created"] assert "csc" in r_gidx.formats()["created"] assert gidx.num_nodes(0) == r_gidx.num_nodes(0) assert gidx.num_edges(0) == r_gidx.num_edges(0) g_s, g_d, _ = gidx.edges(0) rg_s, rg_d, _ = r_gidx.edges(0) assert F.array_equal(g_s, rg_d) assert F.array_equal(g_d, rg_s) # force to start with 'csc' gidx = gidx.formats("csc") gidx = gidx.formats(["coo", "csr", "csc"]) r_gidx = gidx.reverse() assert "csc" in gidx.formats()["created"] assert "csr" in r_gidx.formats()["created"] assert gidx.num_nodes(0) == r_gidx.num_nodes(0) assert gidx.num_edges(0) == r_gidx.num_edges(0) g_s, g_d, _ = gidx.edges(0) rg_s, rg_d, _ = r_gidx.edges(0) assert F.array_equal(g_s, rg_d) assert F.array_equal(g_d, rg_s) g = dgl.heterograph( { ("user", "follows", "user"): ( [0, 1, 2, 4, 3, 1, 3], [1, 2, 3, 2, 0, 0, 1], ), ("user", "plays", "game"): ( [0, 0, 2, 3, 3, 4, 1], [1, 0, 1, 0, 1, 0, 0], ), ("developer", "develops", "game"): ([0, 1, 1, 2], [0, 0, 1, 1]), }, idtype=idtype, device=F.ctx(), ) gidx = g._graph r_gidx = gidx.reverse() # metagraph mg = gidx.metagraph r_mg = r_gidx.metagraph for etype in range(3): assert mg.find_edge(etype) == r_mg.find_edge(etype)[::-1] # three node types and three edge types assert gidx.num_nodes(0) == r_gidx.num_nodes(0) assert gidx.num_nodes(1) == r_gidx.num_nodes(1) assert gidx.num_nodes(2) == r_gidx.num_nodes(2) assert gidx.num_edges(0) == r_gidx.num_edges(0) assert gidx.num_edges(1) == r_gidx.num_edges(1) assert gidx.num_edges(2) == r_gidx.num_edges(2) g_s, g_d, _ = gidx.edges(0) rg_s, rg_d, _ = r_gidx.edges(0) assert F.array_equal(g_s, rg_d) assert F.array_equal(g_d, rg_s) g_s, g_d, _ = gidx.edges(1) rg_s, rg_d, _ = r_gidx.edges(1) assert F.array_equal(g_s, rg_d) assert F.array_equal(g_d, rg_s) g_s, g_d, _ = gidx.edges(2) rg_s, rg_d, _ = r_gidx.edges(2) assert F.array_equal(g_s, rg_d) assert F.array_equal(g_d, rg_s) # force to start with 'csr' gidx = gidx.formats("csr") gidx = gidx.formats(["coo", "csr", "csc"]) r_gidx = gidx.reverse() # three node types and three edge types assert "csr" in gidx.formats()["created"] assert "csc" in r_gidx.formats()["created"] assert gidx.num_nodes(0) == r_gidx.num_nodes(0) assert gidx.num_nodes(1) == r_gidx.num_nodes(1) assert gidx.num_nodes(2) == r_gidx.num_nodes(2) assert gidx.num_edges(0) == r_gidx.num_edges(0) assert gidx.num_edges(1) == r_gidx.num_edges(1) assert gidx.num_edges(2) == r_gidx.num_edges(2) g_s, g_d, _ = gidx.edges(0) rg_s, rg_d, _ = r_gidx.edges(0) assert F.array_equal(g_s, rg_d) assert F.array_equal(g_d, rg_s) g_s, g_d, _ = gidx.edges(1) rg_s, rg_d, _ = r_gidx.edges(1) assert F.array_equal(g_s, rg_d) assert F.array_equal(g_d, rg_s) g_s, g_d, _ = gidx.edges(2) rg_s, rg_d, _ = r_gidx.edges(2) assert F.array_equal(g_s, rg_d) assert F.array_equal(g_d, rg_s) # force to start with 'csc' gidx = gidx.formats("csc") gidx = gidx.formats(["coo", "csr", "csc"]) r_gidx = gidx.reverse() # three node types and three edge types assert "csc" in gidx.formats()["created"] assert "csr" in r_gidx.formats()["created"] assert gidx.num_nodes(0) == r_gidx.num_nodes(0) assert gidx.num_nodes(1) == r_gidx.num_nodes(1) assert gidx.num_nodes(2) == r_gidx.num_nodes(2) assert gidx.num_edges(0) == r_gidx.num_edges(0) assert gidx.num_edges(1) == r_gidx.num_edges(1) assert gidx.num_edges(2) == r_gidx.num_edges(2) g_s, g_d, _ = gidx.edges(0) rg_s, rg_d, _ = r_gidx.edges(0) assert F.array_equal(g_s, rg_d) assert F.array_equal(g_d, rg_s) g_s, g_d, _ = gidx.edges(1) rg_s, rg_d, _ = r_gidx.edges(1) assert F.array_equal(g_s, rg_d) assert F.array_equal(g_d, rg_s) g_s, g_d, _ = gidx.edges(2) rg_s, rg_d, _ = r_gidx.edges(2) assert F.array_equal(g_s, rg_d) assert F.array_equal(g_d, rg_s) @parametrize_idtype def test_clone(idtype): g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) g.ndata["h"] = F.copy_to(F.tensor([1, 1, 1], dtype=idtype), ctx=F.ctx()) g.edata["h"] = F.copy_to(F.tensor([1, 1], dtype=idtype), ctx=F.ctx()) new_g = g.clone() assert g.num_nodes() == new_g.num_nodes() assert g.num_edges() == new_g.num_edges() assert g.device == new_g.device assert g.idtype == new_g.idtype assert F.array_equal(g.ndata["h"], new_g.ndata["h"]) assert F.array_equal(g.edata["h"], new_g.edata["h"]) # data change new_g.ndata["h"] = F.copy_to(F.tensor([2, 2, 2], dtype=idtype), ctx=F.ctx()) assert F.array_equal(g.ndata["h"], new_g.ndata["h"]) == False g.edata["h"] = F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx()) assert F.array_equal(g.edata["h"], new_g.edata["h"]) == False # graph structure change g.add_nodes(1) assert g.num_nodes() != new_g.num_nodes() new_g.add_edges(1, 1) assert g.num_edges() != new_g.num_edges() # zero data graph g = dgl.graph(([], []), num_nodes=0, idtype=idtype, device=F.ctx()) new_g = g.clone() assert g.num_nodes() == new_g.num_nodes() assert g.num_edges() == new_g.num_edges() # heterograph g = create_test_heterograph3(idtype) g.edges["plays"].data["h"] = F.copy_to( F.tensor([1, 2, 3, 4], dtype=idtype), ctx=F.ctx() ) new_g = g.clone() assert g.num_nodes("user") == new_g.num_nodes("user") assert g.num_nodes("game") == new_g.num_nodes("game") assert g.num_nodes("developer") == new_g.num_nodes("developer") assert g.num_edges("plays") == new_g.num_edges("plays") assert g.num_edges("develops") == new_g.num_edges("develops") assert F.array_equal( g.nodes["user"].data["h"], new_g.nodes["user"].data["h"] ) assert F.array_equal( g.nodes["game"].data["h"], new_g.nodes["game"].data["h"] ) assert F.array_equal( g.edges["plays"].data["h"], new_g.edges["plays"].data["h"] ) assert g.device == new_g.device assert g.idtype == new_g.idtype u, v = g.edges(form="uv", order="eid", etype="plays") nu, nv = new_g.edges(form="uv", order="eid", etype="plays") assert F.array_equal(u, nu) assert F.array_equal(v, nv) # graph structure change u = F.tensor([0, 4], dtype=idtype) v = F.tensor([2, 6], dtype=idtype) g.add_edges(u, v, etype="plays") u, v = g.edges(form="uv", order="eid", etype="plays") assert u.shape[0] != nu.shape[0] assert v.shape[0] != nv.shape[0] assert ( g.nodes["user"].data["h"].shape[0] != new_g.nodes["user"].data["h"].shape[0] ) assert ( g.nodes["game"].data["h"].shape[0] != new_g.nodes["game"].data["h"].shape[0] ) assert ( g.edges["plays"].data["h"].shape[0] != new_g.edges["plays"].data["h"].shape[0] ) @parametrize_idtype def test_add_edges(idtype): # homogeneous graph g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) u = 0 v = 1 g.add_edges(u, v) assert g.device == F.ctx() assert g.num_nodes() == 3 assert g.num_edges() == 3 u = [0] v = [1] g.add_edges(u, v) assert g.device == F.ctx() assert g.num_nodes() == 3 assert g.num_edges() == 4 u = F.tensor(u, dtype=idtype) v = F.tensor(v, dtype=idtype) g.add_edges(u, v) assert g.device == F.ctx() assert g.num_nodes() == 3 assert g.num_edges() == 5 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([0, 1, 0, 0, 0], dtype=idtype)) assert F.array_equal(v, F.tensor([1, 2, 1, 1, 1], dtype=idtype)) # node id larger than current max node id g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) u = F.tensor([0, 1], dtype=idtype) v = F.tensor([2, 3], dtype=idtype) g.add_edges(u, v) assert g.num_nodes() == 4 assert g.num_edges() == 4 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([0, 1, 0, 1], dtype=idtype)) assert F.array_equal(v, F.tensor([1, 2, 2, 3], dtype=idtype)) # has data g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) g.ndata["h"] = F.copy_to(F.tensor([1, 1, 1], dtype=idtype), ctx=F.ctx()) g.edata["h"] = F.copy_to(F.tensor([1, 1], dtype=idtype), ctx=F.ctx()) u = F.tensor([0, 1], dtype=idtype) v = F.tensor([2, 3], dtype=idtype) e_feat = { "h": F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx()), "hh": F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx()), } g.add_edges(u, v, e_feat) assert g.num_nodes() == 4 assert g.num_edges() == 4 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([0, 1, 0, 1], dtype=idtype)) assert F.array_equal(v, F.tensor([1, 2, 2, 3], dtype=idtype)) assert F.array_equal(g.ndata["h"], F.tensor([1, 1, 1, 0], dtype=idtype)) assert F.array_equal(g.edata["h"], F.tensor([1, 1, 2, 2], dtype=idtype)) assert F.array_equal(g.edata["hh"], F.tensor([0, 0, 2, 2], dtype=idtype)) # zero data graph g = dgl.graph(([], []), num_nodes=0, idtype=idtype, device=F.ctx()) u = F.tensor([0, 1], dtype=idtype) v = F.tensor([2, 2], dtype=idtype) e_feat = { "h": F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx()), "hh": F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx()), } g.add_edges(u, v, e_feat) assert g.num_nodes() == 3 assert g.num_edges() == 2 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([0, 1], dtype=idtype)) assert F.array_equal(v, F.tensor([2, 2], dtype=idtype)) assert F.array_equal(g.edata["h"], F.tensor([2, 2], dtype=idtype)) assert F.array_equal(g.edata["hh"], F.tensor([2, 2], dtype=idtype)) # bipartite graph g = dgl.heterograph( {("user", "plays", "game"): ([0, 1], [1, 2])}, idtype=idtype, device=F.ctx(), ) u = 0 v = 1 g.add_edges(u, v) assert g.device == F.ctx() assert g.num_nodes("user") == 2 assert g.num_nodes("game") == 3 assert g.num_edges() == 3 u = [0] v = [1] g.add_edges(u, v) assert g.device == F.ctx() assert g.num_nodes("user") == 2 assert g.num_nodes("game") == 3 assert g.num_edges() == 4 u = F.tensor(u, dtype=idtype) v = F.tensor(v, dtype=idtype) g.add_edges(u, v) assert g.device == F.ctx() assert g.num_nodes("user") == 2 assert g.num_nodes("game") == 3 assert g.num_edges() == 5 u, v = g.edges(form="uv") assert F.array_equal(u, F.tensor([0, 1, 0, 0, 0], dtype=idtype)) assert F.array_equal(v, F.tensor([1, 2, 1, 1, 1], dtype=idtype)) # node id larger than current max node id g = dgl.heterograph( {("user", "plays", "game"): ([0, 1], [1, 2])}, idtype=idtype, device=F.ctx(), ) u = F.tensor([0, 2], dtype=idtype) v = F.tensor([2, 3], dtype=idtype) g.add_edges(u, v) assert g.device == F.ctx() assert g.num_nodes("user") == 3 assert g.num_nodes("game") == 4 assert g.num_edges() == 4 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([0, 1, 0, 2], dtype=idtype)) assert F.array_equal(v, F.tensor([1, 2, 2, 3], dtype=idtype)) # has data g = dgl.heterograph( {("user", "plays", "game"): ([0, 1], [1, 2])}, idtype=idtype, device=F.ctx(), ) g.nodes["user"].data["h"] = F.copy_to( F.tensor([1, 1], dtype=idtype), ctx=F.ctx() ) g.nodes["game"].data["h"] = F.copy_to( F.tensor([2, 2, 2], dtype=idtype), ctx=F.ctx() ) g.edata["h"] = F.copy_to(F.tensor([1, 1], dtype=idtype), ctx=F.ctx()) u = F.tensor([0, 2], dtype=idtype) v = F.tensor([2, 3], dtype=idtype) e_feat = { "h": F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx()), "hh": F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx()), } g.add_edges(u, v, e_feat) assert g.num_nodes("user") == 3 assert g.num_nodes("game") == 4 assert g.num_edges() == 4 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([0, 1, 0, 2], dtype=idtype)) assert F.array_equal(v, F.tensor([1, 2, 2, 3], dtype=idtype)) assert F.array_equal( g.nodes["user"].data["h"], F.tensor([1, 1, 0], dtype=idtype) ) assert F.array_equal( g.nodes["game"].data["h"], F.tensor([2, 2, 2, 0], dtype=idtype) ) assert F.array_equal(g.edata["h"], F.tensor([1, 1, 2, 2], dtype=idtype)) assert F.array_equal(g.edata["hh"], F.tensor([0, 0, 2, 2], dtype=idtype)) # heterogeneous graph g = create_test_heterograph3(idtype) u = F.tensor([0, 2], dtype=idtype) v = F.tensor([2, 3], dtype=idtype) g.add_edges(u, v, etype="plays") assert g.num_nodes("user") == 3 assert g.num_nodes("game") == 4 assert g.num_nodes("developer") == 2 assert g.num_edges("plays") == 6 assert g.num_edges("develops") == 2 u, v = g.edges(form="uv", order="eid", etype="plays") assert F.array_equal(u, F.tensor([0, 1, 1, 2, 0, 2], dtype=idtype)) assert F.array_equal(v, F.tensor([0, 0, 1, 1, 2, 3], dtype=idtype)) assert F.array_equal( g.nodes["user"].data["h"], F.tensor([1, 1, 1], dtype=idtype) ) assert F.array_equal( g.nodes["game"].data["h"], F.tensor([2, 2, 0, 0], dtype=idtype) ) assert F.array_equal( g.edges["plays"].data["h"], F.tensor([1, 1, 1, 1, 0, 0], dtype=idtype) ) # add with feature e_feat = {"h": F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx())} u = F.tensor([0, 2], dtype=idtype) v = F.tensor([2, 3], dtype=idtype) g.nodes["game"].data["h"] = F.copy_to( F.tensor([2, 2, 1, 1], dtype=idtype), ctx=F.ctx() ) g.add_edges(u, v, data=e_feat, etype="develops") assert g.num_nodes("user") == 3 assert g.num_nodes("game") == 4 assert g.num_nodes("developer") == 3 assert g.num_edges("plays") == 6 assert g.num_edges("develops") == 4 u, v = g.edges(form="uv", order="eid", etype="develops") assert F.array_equal(u, F.tensor([0, 1, 0, 2], dtype=idtype)) assert F.array_equal(v, F.tensor([0, 1, 2, 3], dtype=idtype)) assert F.array_equal( g.nodes["developer"].data["h"], F.tensor([3, 3, 0], dtype=idtype) ) assert F.array_equal( g.nodes["game"].data["h"], F.tensor([2, 2, 1, 1], dtype=idtype) ) assert F.array_equal( g.edges["develops"].data["h"], F.tensor([0, 0, 2, 2], dtype=idtype) ) @parametrize_idtype def test_add_nodes(idtype): # homogeneous Graphs g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) g.ndata["h"] = F.copy_to(F.tensor([1, 1, 1], dtype=idtype), ctx=F.ctx()) g.add_nodes(1) assert g.num_nodes() == 4 assert F.array_equal(g.ndata["h"], F.tensor([1, 1, 1, 0], dtype=idtype)) # zero node graph g = dgl.graph(([], []), num_nodes=3, idtype=idtype, device=F.ctx()) g.ndata["h"] = F.copy_to(F.tensor([1, 1, 1], dtype=idtype), ctx=F.ctx()) g.add_nodes( 1, data={"h": F.copy_to(F.tensor([2], dtype=idtype), ctx=F.ctx())} ) assert g.num_nodes() == 4 assert F.array_equal(g.ndata["h"], F.tensor([1, 1, 1, 2], dtype=idtype)) # bipartite graph g = dgl.heterograph( {("user", "plays", "game"): ([0, 1], [1, 2])}, idtype=idtype, device=F.ctx(), ) g.add_nodes( 2, data={"h": F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx())}, ntype="user", ) assert g.num_nodes("user") == 4 assert F.array_equal( g.nodes["user"].data["h"], F.tensor([0, 0, 2, 2], dtype=idtype) ) g.add_nodes(2, ntype="game") assert g.num_nodes("game") == 5 # heterogeneous graph g = create_test_heterograph3(idtype) g.add_nodes(1, ntype="user") g.add_nodes( 2, data={"h": F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx())}, ntype="game", ) g.add_nodes(0, ntype="developer") assert g.num_nodes("user") == 4 assert g.num_nodes("game") == 4 assert g.num_nodes("developer") == 2 assert F.array_equal( g.nodes["user"].data["h"], F.tensor([1, 1, 1, 0], dtype=idtype) ) assert F.array_equal( g.nodes["game"].data["h"], F.tensor([2, 2, 2, 2], dtype=idtype) ) @unittest.skipIf( dgl.backend.backend_name == "mxnet", reason="MXNet has error with (0,) shape tensor.", ) @parametrize_idtype def test_remove_edges(idtype): # homogeneous Graphs g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) e = 0 g.remove_edges(e) assert g.num_edges() == 1 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([1], dtype=idtype)) assert F.array_equal(v, F.tensor([2], dtype=idtype)) g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) e = [0] g.remove_edges(e) assert g.num_edges() == 1 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([1], dtype=idtype)) assert F.array_equal(v, F.tensor([2], dtype=idtype)) e = F.tensor([0], dtype=idtype) g.remove_edges(e) assert g.num_edges() == 0 # has node data g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) g.ndata["h"] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx()) g.remove_edges(1) assert g.num_edges() == 1 assert F.array_equal(g.ndata["h"], F.tensor([1, 2, 3], dtype=idtype)) # has edge data g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) g.edata["h"] = F.copy_to(F.tensor([1, 2], dtype=idtype), ctx=F.ctx()) g.remove_edges(0) assert g.num_edges() == 1 assert F.array_equal(g.edata["h"], F.tensor([2], dtype=idtype)) # invalid eid assert_fail = False try: g.remove_edges(1) except: assert_fail = True assert assert_fail # bipartite graph g = dgl.heterograph( {("user", "plays", "game"): ([0, 1], [1, 2])}, idtype=idtype, device=F.ctx(), ) e = 0 g.remove_edges(e) assert g.num_edges() == 1 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([1], dtype=idtype)) assert F.array_equal(v, F.tensor([2], dtype=idtype)) g = dgl.heterograph( {("user", "plays", "game"): ([0, 1], [1, 2])}, idtype=idtype, device=F.ctx(), ) e = [0] g.remove_edges(e) assert g.num_edges() == 1 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([1], dtype=idtype)) assert F.array_equal(v, F.tensor([2], dtype=idtype)) e = F.tensor([0], dtype=idtype) g.remove_edges(e) assert g.num_edges() == 0 # has data g = dgl.heterograph( {("user", "plays", "game"): ([0, 1], [1, 2])}, idtype=idtype, device=F.ctx(), ) g.nodes["user"].data["h"] = F.copy_to( F.tensor([1, 1], dtype=idtype), ctx=F.ctx() ) g.nodes["game"].data["h"] = F.copy_to( F.tensor([2, 2, 2], dtype=idtype), ctx=F.ctx() ) g.edata["h"] = F.copy_to(F.tensor([1, 2], dtype=idtype), ctx=F.ctx()) g.remove_edges(1) assert g.num_edges() == 1 assert F.array_equal( g.nodes["user"].data["h"], F.tensor([1, 1], dtype=idtype) ) assert F.array_equal( g.nodes["game"].data["h"], F.tensor([2, 2, 2], dtype=idtype) ) assert F.array_equal(g.edata["h"], F.tensor([1], dtype=idtype)) # heterogeneous graph g = create_test_heterograph3(idtype) g.edges["plays"].data["h"] = F.copy_to( F.tensor([1, 2, 3, 4], dtype=idtype), ctx=F.ctx() ) g.remove_edges(1, etype="plays") assert g.num_edges("plays") == 3 u, v = g.edges(form="uv", order="eid", etype="plays") assert F.array_equal(u, F.tensor([0, 1, 2], dtype=idtype)) assert F.array_equal(v, F.tensor([0, 1, 1], dtype=idtype)) assert F.array_equal( g.edges["plays"].data["h"], F.tensor([1, 3, 4], dtype=idtype) ) # remove all edges of 'develops' g.remove_edges([0, 1], etype="develops") assert g.num_edges("develops") == 0 assert F.array_equal( g.nodes["user"].data["h"], F.tensor([1, 1, 1], dtype=idtype) ) assert F.array_equal( g.nodes["game"].data["h"], F.tensor([2, 2], dtype=idtype) ) assert F.array_equal( g.nodes["developer"].data["h"], F.tensor([3, 3], dtype=idtype) ) @parametrize_idtype def test_remove_nodes(idtype): # homogeneous Graphs g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) n = 0 g.remove_nodes(n) assert g.num_nodes() == 2 assert g.num_edges() == 1 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([0], dtype=idtype)) assert F.array_equal(v, F.tensor([1], dtype=idtype)) g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) n = [1] g.remove_nodes(n) assert g.num_nodes() == 2 assert g.num_edges() == 0 g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) n = F.tensor([2], dtype=idtype) g.remove_nodes(n) assert g.num_nodes() == 2 assert g.num_edges() == 1 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([0], dtype=idtype)) assert F.array_equal(v, F.tensor([1], dtype=idtype)) # invalid nid assert_fail = False try: g.remove_nodes(3) except: assert_fail = True assert assert_fail # has node and edge data g = dgl.graph(([0, 0, 2], [0, 1, 2]), idtype=idtype, device=F.ctx()) g.ndata["hv"] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx()) g.edata["he"] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx()) g.remove_nodes(F.tensor([0], dtype=idtype)) assert g.num_nodes() == 2 assert g.num_edges() == 1 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([1], dtype=idtype)) assert F.array_equal(v, F.tensor([1], dtype=idtype)) assert F.array_equal(g.ndata["hv"], F.tensor([2, 3], dtype=idtype)) assert F.array_equal(g.edata["he"], F.tensor([3], dtype=idtype)) # node id larger than current max node id g = dgl.heterograph( {("user", "plays", "game"): ([0, 1], [1, 2])}, idtype=idtype, device=F.ctx(), ) n = 0 g.remove_nodes(n, ntype="user") assert g.num_nodes("user") == 1 assert g.num_nodes("game") == 3 assert g.num_edges() == 1 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([0], dtype=idtype)) assert F.array_equal(v, F.tensor([2], dtype=idtype)) g = dgl.heterograph( {("user", "plays", "game"): ([0, 1], [1, 2])}, idtype=idtype, device=F.ctx(), ) n = [1] g.remove_nodes(n, ntype="user") assert g.num_nodes("user") == 1 assert g.num_nodes("game") == 3 assert g.num_edges() == 1 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([0], dtype=idtype)) assert F.array_equal(v, F.tensor([1], dtype=idtype)) g = dgl.heterograph( {("user", "plays", "game"): ([0, 1], [1, 2])}, idtype=idtype, device=F.ctx(), ) n = F.tensor([0], dtype=idtype) g.remove_nodes(n, ntype="game") assert g.num_nodes("user") == 2 assert g.num_nodes("game") == 2 assert g.num_edges() == 2 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([0, 1], dtype=idtype)) assert F.array_equal(v, F.tensor([0, 1], dtype=idtype)) # heterogeneous graph g = create_test_heterograph3(idtype) g.edges["plays"].data["h"] = F.copy_to( F.tensor([1, 2, 3, 4], dtype=idtype), ctx=F.ctx() ) g.remove_nodes(0, ntype="game") assert g.num_nodes("user") == 3 assert g.num_nodes("game") == 1 assert g.num_nodes("developer") == 2 assert g.num_edges("plays") == 2 assert g.num_edges("develops") == 1 assert F.array_equal( g.nodes["user"].data["h"], F.tensor([1, 1, 1], dtype=idtype) ) assert F.array_equal(g.nodes["game"].data["h"], F.tensor([2], dtype=idtype)) assert F.array_equal( g.nodes["developer"].data["h"], F.tensor([3, 3], dtype=idtype) ) u, v = g.edges(form="uv", order="eid", etype="plays") assert F.array_equal(u, F.tensor([1, 2], dtype=idtype)) assert F.array_equal(v, F.tensor([0, 0], dtype=idtype)) assert F.array_equal( g.edges["plays"].data["h"], F.tensor([3, 4], dtype=idtype) ) u, v = g.edges(form="uv", order="eid", etype="develops") assert F.array_equal(u, F.tensor([1], dtype=idtype)) assert F.array_equal(v, F.tensor([0], dtype=idtype)) @parametrize_idtype def test_frame(idtype): g = dgl.graph(([0, 1, 2], [1, 2, 3]), idtype=idtype, device=F.ctx()) g.ndata["h"] = F.copy_to(F.tensor([0, 1, 2, 3], dtype=idtype), ctx=F.ctx()) g.edata["h"] = F.copy_to(F.tensor([0, 1, 2], dtype=idtype), ctx=F.ctx()) # remove nodes sg = dgl.remove_nodes(g, [3]) # check for lazy update assert F.array_equal(sg._node_frames[0]._columns["h"].storage, g.ndata["h"]) assert F.array_equal(sg._edge_frames[0]._columns["h"].storage, g.edata["h"]) assert sg.ndata["h"].shape[0] == 3 assert sg.edata["h"].shape[0] == 2 # update after read assert F.array_equal( sg._node_frames[0]._columns["h"].storage, F.tensor([0, 1, 2], dtype=idtype), ) assert F.array_equal( sg._edge_frames[0]._columns["h"].storage, F.tensor([0, 1], dtype=idtype) ) ng = dgl.add_nodes(sg, 1) assert ng.ndata["h"].shape[0] == 4 assert F.array_equal( ng._node_frames[0]._columns["h"].storage, F.tensor([0, 1, 2, 0], dtype=idtype), ) ng = dgl.add_edges(ng, [3], [1]) assert ng.edata["h"].shape[0] == 3 assert F.array_equal( ng._edge_frames[0]._columns["h"].storage, F.tensor([0, 1, 0], dtype=idtype), ) # multi level lazy update sg = dgl.remove_nodes(g, [3]) assert F.array_equal(sg._node_frames[0]._columns["h"].storage, g.ndata["h"]) assert F.array_equal(sg._edge_frames[0]._columns["h"].storage, g.edata["h"]) ssg = dgl.remove_nodes(sg, [1]) assert F.array_equal( ssg._node_frames[0]._columns["h"].storage, g.ndata["h"] ) assert F.array_equal( ssg._edge_frames[0]._columns["h"].storage, g.edata["h"] ) # ssg is changed assert ssg.ndata["h"].shape[0] == 2 assert ssg.edata["h"].shape[0] == 0 assert F.array_equal( ssg._node_frames[0]._columns["h"].storage, F.tensor([0, 2], dtype=idtype), ) # sg still in lazy model assert F.array_equal(sg._node_frames[0]._columns["h"].storage, g.ndata["h"]) assert F.array_equal(sg._edge_frames[0]._columns["h"].storage, g.edata["h"]) @unittest.skipIf( dgl.backend.backend_name == "tensorflow", reason="TensorFlow always create a new tensor", ) @unittest.skipIf( F._default_context_str == "cpu", reason="cpu do not have context change problem", ) @parametrize_idtype def test_frame_device(idtype): g = dgl.graph(([0, 1, 2], [2, 3, 1])) g.ndata["h"] = F.copy_to(F.tensor([1, 1, 1, 2], dtype=idtype), ctx=F.cpu()) g.ndata["hh"] = F.copy_to(F.ones((4, 3), dtype=idtype), ctx=F.cpu()) g.edata["h"] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.cpu()) g = g.to(F.ctx()) # lazy device copy assert F.context(g._node_frames[0]._columns["h"].storage) == F.cpu() assert F.context(g._node_frames[0]._columns["hh"].storage) == F.cpu() print(g.ndata["h"]) assert F.context(g._node_frames[0]._columns["h"].storage) == F.ctx() assert F.context(g._node_frames[0]._columns["hh"].storage) == F.cpu() assert F.context(g._edge_frames[0]._columns["h"].storage) == F.cpu() # lazy device copy in subgraph sg = dgl.node_subgraph(g, [0, 1, 2]) assert F.context(sg._node_frames[0]._columns["h"].storage) == F.ctx() assert F.context(sg._node_frames[0]._columns["hh"].storage) == F.cpu() assert F.context(sg._edge_frames[0]._columns["h"].storage) == F.cpu() print(sg.ndata["hh"]) assert F.context(sg._node_frames[0]._columns["hh"].storage) == F.ctx() assert F.context(sg._edge_frames[0]._columns["h"].storage) == F.cpu() # back to cpu sg = sg.to(F.cpu()) assert F.context(sg._node_frames[0]._columns["h"].storage) == F.ctx() assert F.context(sg._node_frames[0]._columns["hh"].storage) == F.ctx() assert F.context(sg._edge_frames[0]._columns["h"].storage) == F.cpu() print(sg.ndata["h"]) print(sg.ndata["hh"]) print(sg.edata["h"]) assert F.context(sg._node_frames[0]._columns["h"].storage) == F.cpu() assert F.context(sg._node_frames[0]._columns["hh"].storage) == F.cpu() assert F.context(sg._edge_frames[0]._columns["h"].storage) == F.cpu() # set some field sg = sg.to(F.ctx()) assert F.context(sg._node_frames[0]._columns["h"].storage) == F.cpu() sg.ndata["h"][0] = 5 assert F.context(sg._node_frames[0]._columns["h"].storage) == F.ctx() assert F.context(sg._node_frames[0]._columns["hh"].storage) == F.cpu() assert F.context(sg._edge_frames[0]._columns["h"].storage) == F.cpu() # add nodes ng = dgl.add_nodes(sg, 3) assert F.context(ng._node_frames[0]._columns["h"].storage) == F.ctx() assert F.context(ng._node_frames[0]._columns["hh"].storage) == F.ctx() assert F.context(ng._edge_frames[0]._columns["h"].storage) == F.cpu() @parametrize_idtype def test_create_block(idtype): block = dgl.create_block( ([0, 1, 2], [1, 2, 3]), idtype=idtype, device=F.ctx() ) assert block.num_src_nodes() == 3 assert block.num_dst_nodes() == 4 assert block.num_edges() == 3 block = dgl.create_block(([], []), idtype=idtype, device=F.ctx()) assert block.num_src_nodes() == 0 assert block.num_dst_nodes() == 0 assert block.num_edges() == 0 block = dgl.create_block(([], []), 3, 4, idtype=idtype, device=F.ctx()) assert block.num_src_nodes() == 3 assert block.num_dst_nodes() == 4 assert block.num_edges() == 0 block = dgl.create_block( ([0, 1, 2], [1, 2, 3]), 4, 5, idtype=idtype, device=F.ctx() ) assert block.num_src_nodes() == 4 assert block.num_dst_nodes() == 5 assert block.num_edges() == 3 sx = F.randn((4, 5)) dx = F.randn((5, 6)) ex = F.randn((3, 4)) block.srcdata["x"] = sx block.dstdata["x"] = dx block.edata["x"] = ex g = dgl.block_to_graph(block) assert g.num_src_nodes() == 4 assert g.num_dst_nodes() == 5 assert g.num_edges() == 3 assert g.srcdata["x"] is sx assert g.dstdata["x"] is dx assert g.edata["x"] is ex block = dgl.create_block( { ("A", "AB", "B"): ([1, 2, 3], [2, 1, 0]), ("B", "BA", "A"): ([2, 3], [3, 4]), }, idtype=idtype, device=F.ctx(), ) assert block.num_src_nodes("A") == 4 assert block.num_src_nodes("B") == 4 assert block.num_dst_nodes("B") == 3 assert block.num_dst_nodes("A") == 5 assert block.num_edges("AB") == 3 assert block.num_edges("BA") == 2 block = dgl.create_block( {("A", "AB", "B"): ([], []), ("B", "BA", "A"): ([], [])}, idtype=idtype, device=F.ctx(), ) assert block.num_src_nodes("A") == 0 assert block.num_src_nodes("B") == 0 assert block.num_dst_nodes("B") == 0 assert block.num_dst_nodes("A") == 0 assert block.num_edges("AB") == 0 assert block.num_edges("BA") == 0 block = dgl.create_block( {("A", "AB", "B"): ([], []), ("B", "BA", "A"): ([], [])}, num_src_nodes={"A": 5, "B": 5}, num_dst_nodes={"A": 6, "B": 4}, idtype=idtype, device=F.ctx(), ) assert block.num_src_nodes("A") == 5 assert block.num_src_nodes("B") == 5 assert block.num_dst_nodes("B") == 4 assert block.num_dst_nodes("A") == 6 assert block.num_edges("AB") == 0 assert block.num_edges("BA") == 0 block = dgl.create_block( { ("A", "AB", "B"): ([1, 2, 3], [2, 1, 0]), ("B", "BA", "A"): ([2, 3], [3, 4]), }, num_src_nodes={"A": 5, "B": 5}, num_dst_nodes={"A": 6, "B": 4}, idtype=idtype, device=F.ctx(), ) assert block.num_src_nodes("A") == 5 assert block.num_src_nodes("B") == 5 assert block.num_dst_nodes("B") == 4 assert block.num_dst_nodes("A") == 6 assert block.num_edges(("A", "AB", "B")) == 3 assert block.num_edges(("B", "BA", "A")) == 2 sax = F.randn((5, 3)) sbx = F.randn((5, 4)) dax = F.randn((6, 5)) dbx = F.randn((4, 6)) eabx = F.randn((3, 7)) ebax = F.randn((2, 8)) block.srcnodes["A"].data["x"] = sax block.srcnodes["B"].data["x"] = sbx block.dstnodes["A"].data["x"] = dax block.dstnodes["B"].data["x"] = dbx block.edges["AB"].data["x"] = eabx block.edges["BA"].data["x"] = ebax hg = dgl.block_to_graph(block) assert hg.num_nodes("A_src") == 5 assert hg.num_nodes("B_src") == 5 assert hg.num_nodes("A_dst") == 6 assert hg.num_nodes("B_dst") == 4 assert hg.num_edges(("A_src", "AB", "B_dst")) == 3 assert hg.num_edges(("B_src", "BA", "A_dst")) == 2 assert hg.nodes["A_src"].data["x"] is sax assert hg.nodes["B_src"].data["x"] is sbx assert hg.nodes["A_dst"].data["x"] is dax assert hg.nodes["B_dst"].data["x"] is dbx assert hg.edges["AB"].data["x"] is eabx assert hg.edges["BA"].data["x"] is ebax @parametrize_idtype @pytest.mark.parametrize("fmt", ["coo", "csr", "csc"]) def test_adj_tensors(idtype, fmt): if fmt == "coo": A = ssp.random(10, 10, 0.2).tocoo() A.data = np.arange(20) row = F.tensor(A.row, idtype) col = F.tensor(A.col, idtype) g = dgl.graph((row, col)) elif fmt == "csr": A = ssp.random(10, 10, 0.2).tocsr() A.data = np.arange(20) indptr = F.tensor(A.indptr, idtype) indices = F.tensor(A.indices, idtype) g = dgl.graph(("csr", (indptr, indices, []))) with pytest.raises(DGLError): g2 = dgl.graph(("csr", (indptr[:-1], indices, [])), num_nodes=10) elif fmt == "csc": A = ssp.random(10, 10, 0.2).tocsc() A.data = np.arange(20) indptr = F.tensor(A.indptr, idtype) indices = F.tensor(A.indices, idtype) g = dgl.graph(("csc", (indptr, indices, []))) with pytest.raises(DGLError): g2 = dgl.graph(("csr", (indptr[:-1], indices, [])), num_nodes=10) A_coo = A.tocoo() A_csr = A.tocsr() A_csc = A.tocsc() row, col = g.adj_tensors("coo") assert np.array_equal(F.asnumpy(row), A_coo.row) assert np.array_equal(F.asnumpy(col), A_coo.col) indptr, indices, eids = g.adj_tensors("csr") assert np.array_equal(F.asnumpy(indptr), A_csr.indptr) if fmt == "csr": assert len(eids) == 0 assert np.array_equal(F.asnumpy(indices), A_csr.indices) else: indices_sorted = F.zeros(len(indices), idtype) indices_sorted = F.scatter_row(indices_sorted, eids, indices) indices_sorted_np = np.zeros(len(indices), dtype=A_csr.indices.dtype) indices_sorted_np[A_csr.data] = A_csr.indices assert np.array_equal(F.asnumpy(indices_sorted), indices_sorted_np) indptr, indices, eids = g.adj_tensors("csc") assert np.array_equal(F.asnumpy(indptr), A_csc.indptr) if fmt == "csc": assert len(eids) == 0 assert np.array_equal(F.asnumpy(indices), A_csc.indices) else: indices_sorted = F.zeros(len(indices), idtype) indices_sorted = F.scatter_row(indices_sorted, eids, indices) indices_sorted_np = np.zeros(len(indices), dtype=A_csc.indices.dtype) indices_sorted_np[A_csc.data] = A_csc.indices assert np.array_equal(F.asnumpy(indices_sorted), indices_sorted_np) def _test_forking_pickler_entry(g, q): q.put(g.formats()) @unittest.skipIf( dgl.backend.backend_name == "mxnet", reason="MXNet doesn't support spawning" ) def test_forking_pickler(): ctx = mp.get_context("spawn") g = dgl.graph(([0, 1, 2], [1, 2, 3])) g.create_formats_() q = ctx.Queue(1) proc = ctx.Process(target=_test_forking_pickler_entry, args=(g, q)) proc.start() fmt = q.get()["created"] proc.join() assert "coo" in fmt assert "csr" in fmt assert "csc" in fmt if __name__ == "__main__": # test_create() # test_query() # test_hypersparse() # test_adj("int32") # test_inc() # test_view("int32") # test_view1("int32") # test_flatten(F.int32) # test_convert_bound() # test_convert() # test_to_device("int32") # test_transform("int32") # test_subgraph("int32") # test_subgraph_mask("int32") # test_apply() # test_level1() # test_level2() # test_updates() # test_backward() # test_empty_heterograph('int32') # test_types_in_function() # test_stack_reduce() # test_isolated_ntype() # test_bipartite() # test_dtype_cast() # test_float_cast() # test_reverse("int32") # test_format() # test_add_edges(F.int32) # test_add_nodes(F.int32) # test_remove_edges(F.int32) # test_remove_nodes(F.int32) # test_clone(F.int32) # test_frame(F.int32) # test_frame_device(F.int32) # test_empty_query(F.int32) # test_create_block(F.int32) pass ================================================ FILE: tests/python/common/test_homophily.py ================================================ import math import unittest import backend as F import dgl from utils import parametrize_idtype @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now" ) @parametrize_idtype def test_node_homophily(idtype): # IfChangeThenChange: python/dgl/homophily.py # Update the docstring example. device = F.ctx() graph = dgl.graph( ([1, 2, 0, 4], [0, 1, 2, 3]), idtype=idtype, device=device ) y = F.tensor([0, 0, 0, 0, 1]) assert math.isclose(dgl.node_homophily(graph, y), 0.6000000238418579) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now" ) @parametrize_idtype def test_edge_homophily(idtype): # IfChangeThenChange: python/dgl/homophily.py # Update the docstring example. device = F.ctx() graph = dgl.graph( ([1, 2, 0, 4], [0, 1, 2, 3]), idtype=idtype, device=device ) y = F.tensor([0, 0, 0, 0, 1]) assert math.isclose(dgl.edge_homophily(graph, y), 0.75) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now" ) @parametrize_idtype def test_linkx_homophily(idtype): # IfChangeThenChange: python/dgl/homophily.py # Update the docstring example. device = F.ctx() graph = dgl.graph(([0, 1, 2, 3], [1, 2, 0, 4]), device=device) y = F.tensor([0, 0, 0, 0, 1]) assert math.isclose(dgl.linkx_homophily(graph, y), 0.19999998807907104) y = F.tensor([0, 1, 2, 3, 4]) assert math.isclose(dgl.linkx_homophily(graph, y), 0.0000000000000000) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now" ) @parametrize_idtype def test_adjusted_homophily(idtype): # IfChangeThenChange: python/dgl/homophily.py # Update the docstring example. device = F.ctx() graph = dgl.graph( ([1, 2, 0, 4], [0, 1, 2, 3]), idtype=idtype, device=device ) y = F.tensor([0, 0, 0, 0, 1]) assert math.isclose(dgl.adjusted_homophily(graph, y), -0.1428571492433548) ================================================ FILE: tests/python/common/test_label_informativeness.py ================================================ import math import unittest import backend as F import dgl from utils import parametrize_idtype @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now" ) @parametrize_idtype def test_edge_label_informativeness(idtype): # IfChangeThenChange: python/dgl/label_informativeness.py # Update the docstring example. device = F.ctx() graph = dgl.graph( ([0, 1, 2, 2, 3, 4], [1, 2, 0, 3, 4, 5]), idtype=idtype, device=device ) y = F.tensor([0, 0, 0, 0, 1, 1]) assert math.isclose( dgl.edge_label_informativeness(graph, y), 0.25177597999572754, abs_tol=1e-6, ) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now" ) @parametrize_idtype def test_node_label_informativeness(idtype): # IfChangeThenChange: python/dgl/label_informativeness.py # Update the docstring example. device = F.ctx() graph = dgl.graph( ([0, 1, 2, 2, 3, 4], [1, 2, 0, 3, 4, 5]), idtype=idtype, device=device ) y = F.tensor([0, 0, 0, 0, 1, 1]) assert math.isclose( dgl.node_label_informativeness(graph, y), 0.3381872773170471, abs_tol=1e-6, ) ================================================ FILE: tests/python/common/test_merge.py ================================================ import backend as F import dgl from utils import parametrize_idtype @parametrize_idtype def test_heterograph_merge(idtype): g1 = ( dgl.heterograph({("a", "to", "b"): ([0, 1], [1, 0])}) .astype(idtype) .to(F.ctx()) ) g1_n_edges = g1.num_edges(etype="to") g1.nodes["a"].data["nh"] = F.randn((2, 3)) g1.nodes["b"].data["nh"] = F.randn((2, 3)) g1.edges["to"].data["eh"] = F.randn((2, 3)) g2 = ( dgl.heterograph({("a", "to", "b"): ([1, 2, 3], [2, 3, 5])}) .astype(idtype) .to(F.ctx()) ) g2.nodes["a"].data["nh"] = F.randn((4, 3)) g2.nodes["b"].data["nh"] = F.randn((6, 3)) g2.edges["to"].data["eh"] = F.randn((3, 3)) g2.add_nodes(3, ntype="a") g2.add_nodes(3, ntype="b") m = dgl.merge([g1, g2]) # Check g2's edges and nodes were added to g1's in m. m_us = F.asnumpy(m.edges()[0][g1_n_edges:]) g2_us = F.asnumpy(g2.edges()[0]) assert all(m_us == g2_us) m_vs = F.asnumpy(m.edges()[1][g1_n_edges:]) g2_vs = F.asnumpy(g2.edges()[1]) assert all(m_vs == g2_vs) for ntype in m.ntypes: assert m.num_nodes(ntype=ntype) == max( g1.num_nodes(ntype=ntype), g2.num_nodes(ntype=ntype) ) # Check g1's node data was updated with g2's in m. for key in m.nodes[ntype].data: g2_n_nodes = g2.num_nodes(ntype=ntype) updated_g1_ndata = F.asnumpy(m.nodes[ntype].data[key][:g2_n_nodes]) g2_ndata = F.asnumpy(g2.nodes[ntype].data[key]) assert all((updated_g1_ndata == g2_ndata).flatten()) # Check g1's edge data was updated with g2's in m. for key in m.edges["to"].data: updated_g1_edata = F.asnumpy(m.edges["to"].data[key][g1_n_edges:]) g2_edata = F.asnumpy(g2.edges["to"].data[key]) assert all((updated_g1_edata == g2_edata).flatten()) ================================================ FILE: tests/python/common/test_partition.py ================================================ import unittest import backend as F from dgl.distributed import graph_partition_book as gpb from dgl.partition import NDArrayPartition from utils import parametrize_idtype @unittest.skipIf( F._default_context_str == "cpu", reason="NDArrayPartition only works on GPU.", ) @parametrize_idtype def test_get_node_partition_from_book(idtype): node_map = {"_N": F.tensor([[0, 3], [4, 5], [6, 10]], dtype=idtype)} edge_map = { ("_N", "_E", "_N"): F.tensor([[0, 9], [10, 15], [16, 25]], dtype=idtype) } ntypes = {ntype: i for i, ntype in enumerate(node_map)} etypes = {etype: i for i, etype in enumerate(edge_map)} book = gpb.RangePartitionBook(0, 3, node_map, edge_map, ntypes, etypes) partition = gpb.get_node_partition_from_book(book, F.ctx()) assert partition.num_parts() == 3 assert partition.array_size() == 11 # Test map_to_local test_ids = F.copy_to(F.tensor([0, 2, 6, 7, 10], dtype=idtype), F.ctx()) act_ids = partition.map_to_local(test_ids) exp_ids = F.copy_to(F.tensor([0, 2, 0, 1, 4], dtype=idtype), F.ctx()) assert F.array_equal(act_ids, exp_ids) # Test map_to_global test_ids = F.copy_to(F.tensor([0, 2], dtype=idtype), F.ctx()) act_ids = partition.map_to_global(test_ids, 0) exp_ids = F.copy_to(F.tensor([0, 2], dtype=idtype), F.ctx()) assert F.array_equal(act_ids, exp_ids) test_ids = F.copy_to(F.tensor([0, 1], dtype=idtype), F.ctx()) act_ids = partition.map_to_global(test_ids, 1) exp_ids = F.copy_to(F.tensor([4, 5], dtype=idtype), F.ctx()) assert F.array_equal(act_ids, exp_ids) test_ids = F.copy_to(F.tensor([0, 1, 4], dtype=idtype), F.ctx()) act_ids = partition.map_to_global(test_ids, 2) exp_ids = F.copy_to(F.tensor([6, 7, 10], dtype=idtype), F.ctx()) assert F.array_equal(act_ids, exp_ids) # Test generate_permutation test_ids = F.copy_to(F.tensor([6, 0, 7, 2, 10], dtype=idtype), F.ctx()) perm, split_sum = partition.generate_permutation(test_ids) exp_perm = F.copy_to(F.tensor([1, 3, 0, 2, 4], dtype=idtype), F.ctx()) exp_sum = F.copy_to(F.tensor([2, 0, 3]), F.ctx()) assert F.array_equal(perm, exp_perm) assert F.array_equal(split_sum, exp_sum) ================================================ FILE: tests/python/common/test_propagate.py ================================================ import unittest import backend as F import dgl import networkx as nx from utils import check_fail, parametrize_idtype def create_graph(idtype): g = dgl.from_networkx(nx.path_graph(5), idtype=idtype, device=F.ctx()) return g def mfunc(edges): return {"m": edges.src["x"]} def rfunc(nodes): msg = F.sum(nodes.mailbox["m"], 1) return {"x": nodes.data["x"] + msg} @unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented") @parametrize_idtype def test_prop_nodes_bfs(idtype): g = create_graph(idtype) g.ndata["x"] = F.ones((5, 2)) dgl.prop_nodes_bfs( g, 0, message_func=mfunc, reduce_func=rfunc, apply_node_func=None ) # pull nodes using bfs order will result in a cumsum[i] + data[i] + data[i+1] assert F.allclose( g.ndata["x"], F.tensor([[2.0, 2.0], [4.0, 4.0], [6.0, 6.0], [8.0, 8.0], [9.0, 9.0]]), ) @unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented") @parametrize_idtype def test_prop_edges_dfs(idtype): g = create_graph(idtype) g.ndata["x"] = F.ones((5, 2)) dgl.prop_edges_dfs( g, 0, message_func=mfunc, reduce_func=rfunc, apply_node_func=None ) # snr using dfs results in a cumsum assert F.allclose( g.ndata["x"], F.tensor([[1.0, 1.0], [2.0, 2.0], [3.0, 3.0], [4.0, 4.0], [5.0, 5.0]]), ) g.ndata["x"] = F.ones((5, 2)) dgl.prop_edges_dfs( g, 0, has_reverse_edge=True, message_func=mfunc, reduce_func=rfunc, apply_node_func=None, ) # result is cumsum[i] + cumsum[i-1] assert F.allclose( g.ndata["x"], F.tensor([[1.0, 1.0], [3.0, 3.0], [5.0, 5.0], [7.0, 7.0], [9.0, 9.0]]), ) g.ndata["x"] = F.ones((5, 2)) dgl.prop_edges_dfs( g, 0, has_nontree_edge=True, message_func=mfunc, reduce_func=rfunc, apply_node_func=None, ) # result is cumsum[i] + cumsum[i+1] assert F.allclose( g.ndata["x"], F.tensor([[3.0, 3.0], [5.0, 5.0], [7.0, 7.0], [9.0, 9.0], [5.0, 5.0]]), ) @unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented") @parametrize_idtype def test_prop_nodes_topo(idtype): # bi-directional chain g = create_graph(idtype) assert check_fail(dgl.prop_nodes_topo, g) # has loop # tree tree = dgl.graph([]) tree.add_nodes(5) tree.add_edges(1, 0) tree.add_edges(2, 0) tree.add_edges(3, 2) tree.add_edges(4, 2) tree = dgl.graph(tree.edges()) # init node feature data tree.ndata["x"] = F.zeros((5, 2)) # set all leaf nodes to be ones tree.nodes[[1, 3, 4]].data["x"] = F.ones((3, 2)) # Filtering DGLWarning: # The input graph for the user-defined edge # function does not contain valid edges import warnings with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) dgl.prop_nodes_topo( tree, message_func=mfunc, reduce_func=rfunc, apply_node_func=None ) # root node get the sum assert F.allclose(tree.nodes[0].data["x"], F.tensor([[3.0, 3.0]])) if __name__ == "__main__": test_prop_nodes_bfs() test_prop_edges_dfs() test_prop_nodes_topo() ================================================ FILE: tests/python/common/test_random.py ================================================ import unittest import backend as F import dgl import numpy as np @unittest.skipIf( F._default_context_str == "gpu", reason="GPU random choice not implemented" ) def test_random_choice(): # test 1 a = F.arange(0, 100) x = dgl.random.choice(a, 10, replace=True, prob=None) assert len(x) == 10 for i in range(len(x)): assert F.asnumpy(x[i]) >= 0 and F.asnumpy(x[i]) < 100 # test 2, replace=False, small num a = F.arange(0, 100) x = dgl.random.choice(a, 10, replace=False, prob=None) assert len(x) == 10 for i in range(len(x)): assert F.asnumpy(x[i]) >= 0 and F.asnumpy(x[i]) < 100 # test 3, replace=False, large num a = F.arange(0, 100) x = dgl.random.choice(a, 100, replace=False, prob=None) assert len(x) == 100 assert np.array_equal(np.sort(F.asnumpy(x)), F.asnumpy(a)) # test 4, first arg is integer x = dgl.random.choice(100, 100, replace=False, prob=None) assert len(x) == 100 assert np.array_equal(np.sort(F.asnumpy(x)), F.asnumpy(a)) # test 5, with prob prob = np.ones((100,)) prob[37:40] = 0.0 prob -= prob.min() prob /= prob.sum() prob = F.tensor(prob) x = dgl.random.choice(100, 97, replace=False, prob=prob) assert len(x) == 97 for i in range(len(x)): assert F.asnumpy(x[i]) < 37 or F.asnumpy(x[i]) >= 40 if __name__ == "__main__": test_random_choice() ================================================ FILE: tests/python/common/test_readout.py ================================================ import unittest import backend as F import dgl import networkx as nx import numpy as np import pytest from utils import parametrize_idtype from utils.graph_cases import get_cases @parametrize_idtype def test_sum_case1(idtype): # NOTE: If you want to update this test case, remember to update the docstring # example too!!! g1 = dgl.graph(([0, 1], [1, 0]), idtype=idtype, device=F.ctx()) g1.ndata["h"] = F.tensor([1.0, 2.0]) g2 = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) g2.ndata["h"] = F.tensor([1.0, 2.0, 3.0]) bg = dgl.batch([g1, g2]) bg.ndata["w"] = F.tensor([0.1, 0.2, 0.1, 0.5, 0.2]) assert F.allclose(F.tensor([3.0]), dgl.sum_nodes(g1, "h")) assert F.allclose(F.tensor([3.0, 6.0]), dgl.sum_nodes(bg, "h")) assert F.allclose(F.tensor([0.5, 1.7]), dgl.sum_nodes(bg, "h", "w")) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo"], exclude=["dglgraph"])) @pytest.mark.parametrize("reducer", ["sum", "max", "mean"]) def test_reduce_readout(g, idtype, reducer): g = g.astype(idtype).to(F.ctx()) g.ndata["h"] = F.randn((g.num_nodes(), 3)) g.edata["h"] = F.randn((g.num_edges(), 2)) # Test.1: node readout x = dgl.readout_nodes(g, "h", op=reducer) # check correctness subg = dgl.unbatch(g) subx = [] for sg in subg: sx = dgl.readout_nodes(sg, "h", op=reducer) subx.append(sx) assert F.allclose(x, F.cat(subx, dim=0)) x = getattr(dgl, "{}_nodes".format(reducer))(g, "h") # check correctness subg = dgl.unbatch(g) subx = [] for sg in subg: sx = getattr(dgl, "{}_nodes".format(reducer))(sg, "h") subx.append(sx) assert F.allclose(x, F.cat(subx, dim=0)) # Test.2: edge readout x = dgl.readout_edges(g, "h", op=reducer) # check correctness subg = dgl.unbatch(g) subx = [] for sg in subg: sx = dgl.readout_edges(sg, "h", op=reducer) subx.append(sx) assert F.allclose(x, F.cat(subx, dim=0)) x = getattr(dgl, "{}_edges".format(reducer))(g, "h") # check correctness subg = dgl.unbatch(g) subx = [] for sg in subg: sx = getattr(dgl, "{}_edges".format(reducer))(sg, "h") subx.append(sx) assert F.allclose(x, F.cat(subx, dim=0)) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo"], exclude=["dglgraph"])) @pytest.mark.parametrize("reducer", ["sum", "max", "mean"]) def test_weighted_reduce_readout(g, idtype, reducer): g = g.astype(idtype).to(F.ctx()) g.ndata["h"] = F.randn((g.num_nodes(), 3)) g.ndata["w"] = F.randn((g.num_nodes(), 1)) g.edata["h"] = F.randn((g.num_edges(), 2)) g.edata["w"] = F.randn((g.num_edges(), 1)) # Test.1: node readout x = dgl.readout_nodes(g, "h", "w", op=reducer) # check correctness subg = dgl.unbatch(g) subx = [] for sg in subg: sx = dgl.readout_nodes(sg, "h", "w", op=reducer) subx.append(sx) assert F.allclose(x, F.cat(subx, dim=0)) x = getattr(dgl, "{}_nodes".format(reducer))(g, "h", "w") # check correctness subg = dgl.unbatch(g) subx = [] for sg in subg: sx = getattr(dgl, "{}_nodes".format(reducer))(sg, "h", "w") subx.append(sx) assert F.allclose(x, F.cat(subx, dim=0)) # Test.2: edge readout x = dgl.readout_edges(g, "h", "w", op=reducer) # check correctness subg = dgl.unbatch(g) subx = [] for sg in subg: sx = dgl.readout_edges(sg, "h", "w", op=reducer) subx.append(sx) assert F.allclose(x, F.cat(subx, dim=0)) x = getattr(dgl, "{}_edges".format(reducer))(g, "h", "w") # check correctness subg = dgl.unbatch(g) subx = [] for sg in subg: sx = getattr(dgl, "{}_edges".format(reducer))(sg, "h", "w") subx.append(sx) assert F.allclose(x, F.cat(subx, dim=0)) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo"], exclude=["dglgraph"])) @pytest.mark.parametrize("descending", [True, False]) def test_topk(g, idtype, descending): g = g.astype(idtype).to(F.ctx()) g.ndata["x"] = F.randn((g.num_nodes(), 3)) # Test.1: to test the case where k > number of nodes. dgl.topk_nodes(g, "x", 100, sortby=-1) # Test.2: test correctness min_nnodes = F.asnumpy(g.batch_num_nodes()).min() if min_nnodes <= 1: return k = min_nnodes - 1 val, indices = dgl.topk_nodes(g, "x", k, descending=descending, sortby=-1) print(k) print(g.ndata["x"]) print("val", val) print("indices", indices) subg = dgl.unbatch(g) subval, subidx = [], [] for sg in subg: subx = F.asnumpy(sg.ndata["x"]) ai = np.argsort(subx[:, -1:].flatten()) if descending: ai = np.ascontiguousarray(ai[::-1]) subx = np.expand_dims(subx[ai[:k]], 0) subval.append(F.tensor(subx)) subidx.append(F.tensor(np.expand_dims(ai[:k], 0))) print(F.cat(subval, dim=0)) assert F.allclose(val, F.cat(subval, dim=0)) assert F.allclose(indices, F.cat(subidx, dim=0)) # Test.3: sorby=None dgl.topk_nodes(g, "x", k, sortby=None) g.edata["x"] = F.randn((g.num_edges(), 3)) # Test.4: topk edges where k > number of edges. dgl.topk_edges(g, "x", 100, sortby=-1) # Test.5: topk edges test correctness min_nedges = F.asnumpy(g.batch_num_edges()).min() if min_nedges <= 1: return k = min_nedges - 1 val, indices = dgl.topk_edges(g, "x", k, descending=descending, sortby=-1) print(k) print(g.edata["x"]) print("val", val) print("indices", indices) subg = dgl.unbatch(g) subval, subidx = [], [] for sg in subg: subx = F.asnumpy(sg.edata["x"]) ai = np.argsort(subx[:, -1:].flatten()) if descending: ai = np.ascontiguousarray(ai[::-1]) subx = np.expand_dims(subx[ai[:k]], 0) subval.append(F.tensor(subx)) subidx.append(F.tensor(np.expand_dims(ai[:k], 0))) print(F.cat(subval, dim=0)) assert F.allclose(val, F.cat(subval, dim=0)) assert F.allclose(indices, F.cat(subidx, dim=0)) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo"], exclude=["dglgraph"])) def test_softmax(g, idtype): g = g.astype(idtype).to(F.ctx()) g.ndata["h"] = F.randn((g.num_nodes(), 3)) g.edata["h"] = F.randn((g.num_edges(), 2)) # Test.1: node readout x = dgl.softmax_nodes(g, "h") subg = dgl.unbatch(g) subx = [] for sg in subg: subx.append(F.softmax(sg.ndata["h"], dim=0)) assert F.allclose(x, F.cat(subx, dim=0)) # Test.2: edge readout x = dgl.softmax_edges(g, "h") subg = dgl.unbatch(g) subx = [] for sg in subg: subx.append(F.softmax(sg.edata["h"], dim=0)) assert F.allclose(x, F.cat(subx, dim=0)) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo"], exclude=["dglgraph"])) def test_broadcast(idtype, g): g = g.astype(idtype).to(F.ctx()) gfeat = F.randn((g.batch_size, 3)) # Test.0: broadcast_nodes g.ndata["h"] = dgl.broadcast_nodes(g, gfeat) subg = dgl.unbatch(g) for i, sg in enumerate(subg): assert F.allclose( sg.ndata["h"], F.repeat(F.reshape(gfeat[i], (1, 3)), sg.num_nodes(), dim=0), ) # Test.1: broadcast_edges g.edata["h"] = dgl.broadcast_edges(g, gfeat) subg = dgl.unbatch(g) for i, sg in enumerate(subg): assert F.allclose( sg.edata["h"], F.repeat(F.reshape(gfeat[i], (1, 3)), sg.num_edges(), dim=0), ) ================================================ FILE: tests/python/common/test_sparse_ops-csr.py ================================================ import backend as F import dgl import numpy as np import pytest import scipy.sparse as ssp from utils import parametrize_idtype if F.backend_name == "pytorch": import torch torch.backends.cuda.matmul.allow_tf32 = False def _random_simple_graph( idtype, dtype, ctx, M, N, max_nnz, srctype, dsttype, etype ): src = np.random.randint(0, M, (max_nnz,)) dst = np.random.randint(0, N, (max_nnz,)) val = np.random.randn(max_nnz) a = ssp.csr_matrix((val, (src, dst)), shape=(M, N)) a.sum_duplicates() a = a.tocoo() # shuffle edges perm = np.random.permutation(a.nnz) row = a.row[perm] col = a.col[perm] val = a.data[perm] a = ssp.csr_matrix((val, (row, col)), shape=(M, N)) A = dgl.heterograph( { (srctype, etype, dsttype): ( F.copy_to(F.tensor(row, dtype=idtype), ctx), F.copy_to(F.tensor(col, dtype=idtype), ctx), ) }, num_nodes_dict={srctype: a.shape[0], dsttype: a.shape[1]}, ) A.edata["w"] = F.copy_to(F.tensor(val, dtype=dtype), ctx) return a, A @parametrize_idtype @pytest.mark.parametrize("dtype", [F.float32, F.float64]) @pytest.mark.parametrize("return_edge_ids", [True, False]) def test_csrmm(idtype, dtype, return_edge_ids): a, A = _random_simple_graph( idtype, dtype, F.ctx(), 500, 600, 9000, "A", "B", "AB" ) b, B = _random_simple_graph( idtype, dtype, F.ctx(), 600, 700, 9000, "B", "C", "BC" ) C, C_weights = dgl._sparse_ops._csrmm( A._graph, A.edata["w"], B._graph, B.edata["w"], 2 ) C_adj = C.adjacency_matrix_scipy(0, False, "csr", return_edge_ids) C_adj.data = F.asnumpy(C_weights) C_adj = F.tensor(C_adj.todense(), dtype=dtype) c = F.tensor((a * b).todense(), dtype=dtype) assert F.allclose(C_adj, c) @parametrize_idtype @pytest.mark.parametrize("dtype", [F.float32, F.float64]) @pytest.mark.parametrize("num_vtypes", [1, 2]) def test_csrmm_backward(idtype, dtype, num_vtypes): a, A = _random_simple_graph(idtype, dtype, F.ctx(), 3, 4, 6, "A", "B", "AB") b, B = _random_simple_graph( idtype, dtype, F.ctx(), 4, 3, 6, "B", "A" if num_vtypes == 1 else "C", "BA", ) A_row, A_col = A.edges(order="eid") B_row, B_col = B.edges(order="eid") A_row = F.asnumpy(A_row) A_col = F.asnumpy(A_col) B_row = F.asnumpy(B_row) B_col = F.asnumpy(B_col) a_dense = F.attach_grad(F.tensor(a.todense(), dtype=dtype)) b_dense = F.attach_grad(F.tensor(b.todense(), dtype=dtype)) A.edata["w"] = F.attach_grad(A.edata["w"]) B.edata["w"] = F.attach_grad(B.edata["w"]) with F.record_grad(): C = dgl.adj_product_graph(A, B, "w") assert len(C.ntypes) == num_vtypes assert len(C.etypes) == 1 C_dense = np.zeros((3, 3)) C_row, C_col = C.edges(order="eid") C_row = F.asnumpy(C_row) C_col = F.asnumpy(C_col) C_dense[C_row, C_col] = F.asnumpy(C.edata["w"]) c_dense = F.matmul(a_dense, b_dense) assert np.allclose(C_dense, F.asnumpy(c_dense), rtol=1e-4, atol=1e-4) F.backward(F.reduce_sum(C.edata["w"]) + F.reduce_sum(c_dense)) a_dense_grad = F.asnumpy(F.grad(a_dense))[A_row, A_col] b_dense_grad = F.asnumpy(F.grad(b_dense))[B_row, B_col] A_spspmm_grad = F.asnumpy(F.grad(A.edata["w"])) B_spspmm_grad = F.asnumpy(F.grad(B.edata["w"])) assert np.allclose(a_dense_grad, A_spspmm_grad, rtol=1e-4, atol=1e-4) assert np.allclose(b_dense_grad, B_spspmm_grad, rtol=1e-4, atol=1e-4) @parametrize_idtype @pytest.mark.parametrize("dtype", [F.float32, F.float64]) @pytest.mark.parametrize("return_edge_ids", [True, False]) def test_csrsum(idtype, dtype, return_edge_ids): a, A = _random_simple_graph( idtype, dtype, F.ctx(), 500, 600, 9000, "A", "B", "AB" ) b, B = _random_simple_graph( idtype, dtype, F.ctx(), 500, 600, 9000, "A", "B", "AB" ) C, C_weights = dgl._sparse_ops._csrsum( [A._graph, B._graph], [A.edata["w"], B.edata["w"]] ) C_adj = C.adjacency_matrix_scipy(0, False, "csr", return_edge_ids) C_adj.data = F.asnumpy(C_weights) C_adj = F.tensor(C_adj.todense(), dtype=dtype) c = F.tensor((a + b).todense(), dtype=dtype) assert F.allclose(C_adj, c) @parametrize_idtype @pytest.mark.parametrize("dtype", [F.float32, F.float64]) @pytest.mark.parametrize("nelems", [1, 2]) def test_csrsum_backward(idtype, dtype, nelems): a, A = _random_simple_graph(idtype, dtype, F.ctx(), 3, 4, 6, "A", "B", "AB") b, B = _random_simple_graph(idtype, dtype, F.ctx(), 3, 4, 6, "A", "B", "AB") A_row, A_col = A.edges(order="eid") B_row, B_col = B.edges(order="eid") A_row = F.asnumpy(A_row) A_col = F.asnumpy(A_col) B_row = F.asnumpy(B_row) B_col = F.asnumpy(B_col) a_dense = F.attach_grad(F.tensor(a.todense(), dtype=dtype)) b_dense = F.attach_grad(F.tensor(b.todense(), dtype=dtype)) A.edata["w"] = F.attach_grad(A.edata["w"]) B.edata["w"] = F.attach_grad(B.edata["w"]) with F.record_grad(): if nelems == 2: # Test for two element case C = dgl.adj_sum_graph([A, B], "w") assert C.canonical_etypes == A.canonical_etypes C_dense = np.zeros((3, 4)) C_row, C_col = C.edges(order="eid") C_row = F.asnumpy(C_row) C_col = F.asnumpy(C_col) C_dense[C_row, C_col] = F.asnumpy(C.edata["w"]) c_dense = a_dense + b_dense assert np.allclose( C_dense, F.asnumpy(c_dense), rtol=1e-4, atol=1e-4 ) F.backward(F.reduce_sum(C.edata["w"]) + F.reduce_sum(c_dense)) a_dense_grad = F.asnumpy(F.grad(a_dense))[A_row, A_col] b_dense_grad = F.asnumpy(F.grad(b_dense))[B_row, B_col] A_spspmm_grad = F.asnumpy(F.grad(A.edata["w"])) B_spspmm_grad = F.asnumpy(F.grad(B.edata["w"])) assert np.allclose( a_dense_grad, A_spspmm_grad, rtol=1e-4, atol=1e-4 ) assert np.allclose( b_dense_grad, B_spspmm_grad, rtol=1e-4, atol=1e-4 ) elif nelems == 1: # Test for single element case C = dgl.adj_sum_graph([A], "w") assert C.canonical_etypes == A.canonical_etypes C_dense = np.zeros((3, 4)) C_row, C_col = C.edges(order="eid") C_row = F.asnumpy(C_row) C_col = F.asnumpy(C_col) C_dense[C_row, C_col] = F.asnumpy(C.edata["w"]) c_dense = a_dense assert np.allclose( C_dense, F.asnumpy(c_dense), rtol=1e-4, atol=1e-4 ) F.backward(F.reduce_sum(C.edata["w"]) + F.reduce_sum(c_dense)) a_dense_grad = F.asnumpy(F.grad(a_dense))[A_row, A_col] A_spspmm_grad = F.asnumpy(F.grad(A.edata["w"])) assert np.allclose( a_dense_grad, A_spspmm_grad, rtol=1e-4, atol=1e-4 ) @parametrize_idtype @pytest.mark.parametrize("dtype", [F.float32, F.float64]) @pytest.mark.parametrize("A_nnz", [9000, 0]) @pytest.mark.parametrize("B_nnz", [9000, 0]) def test_csrmask(idtype, dtype, A_nnz, B_nnz): a, A = _random_simple_graph( idtype, dtype, F.ctx(), 500, 600, A_nnz, "A", "B", "AB" ) b, B = _random_simple_graph( idtype, dtype, F.ctx(), 500, 600, B_nnz, "A", "B", "AB" ) C = dgl._sparse_ops._csrmask(A._graph, A.edata["w"], B._graph) B_row, B_col = B.edges(order="eid") B_row = F.asnumpy(B_row) B_col = F.asnumpy(B_col) c = F.tensor(a.todense()[B_row, B_col], dtype) assert F.allclose(C, c) @parametrize_idtype @pytest.mark.parametrize("dtype", [F.float32, F.float64]) def test_csrmask_backward(idtype, dtype): a, A = _random_simple_graph(idtype, dtype, F.ctx(), 3, 4, 6, "A", "B", "AB") b, B = _random_simple_graph(idtype, dtype, F.ctx(), 3, 4, 6, "A", "B", "AB") A_row, A_col = A.edges(order="eid") B_row, B_col = B.edges(order="eid") A_row = F.asnumpy(A_row) A_col = F.asnumpy(A_col) B_row = F.asnumpy(B_row) B_col = F.asnumpy(B_col) a_dense = F.attach_grad(F.tensor(a.todense(), dtype=dtype)) A.edata["w"] = F.attach_grad(A.edata["w"]) with F.record_grad(): # Test for two element case C1 = F.csrmask(A._graph, A.edata["w"], B._graph) if dgl.backend.backend_name == "tensorflow": import tensorflow as tf C2 = tf.gather_nd(a_dense, tf.stack([B_row, B_col], 1)) else: C2 = a_dense[B_row, B_col] assert F.allclose(C1, C2, rtol=1e-4, atol=1e-4) F.backward(F.reduce_sum(C1) + F.reduce_sum(C2)) a_dense_grad = F.asnumpy(F.grad(a_dense))[A_row, A_col] A_spspmm_grad = F.asnumpy(F.grad(A.edata["w"])) assert np.allclose(a_dense_grad, A_spspmm_grad, rtol=1e-4, atol=1e-4) if __name__ == "__main__": test_csrmm(F.int32, F.float32) test_csrmm(F.int64, F.float32) test_csrsum(F.int32, F.float32) test_csrsum(F.int64, F.float32) test_csrmask(F.int32, F.float32, 9000, 9000) test_csrmask(F.int64, F.float32, 9000, 0) test_csrmask(F.int32, F.float32, 0, 9000) test_csrmask(F.int64, F.float32, 0, 0) test_csrmm_backward(F.int32, F.float32, 1) test_csrmm_backward(F.int64, F.float32, 1) test_csrmm_backward(F.int32, F.float32, 2) test_csrmm_backward(F.int64, F.float32, 2) test_csrsum_backward(F.int32, F.float32, 1) test_csrsum_backward(F.int64, F.float32, 1) test_csrsum_backward(F.int32, F.float32, 2) test_csrsum_backward(F.int64, F.float32, 2) test_csrmask_backward(F.int32, F.float32) test_csrmask_backward(F.int64, F.float32) ================================================ FILE: tests/python/common/test_subgraph.py ================================================ import unittest import backend as F import dgl import networkx as nx import numpy as np import pytest import scipy.sparse as ssp from utils import parametrize_idtype D = 5 def generate_graph(grad=False, add_data=True): g = dgl.graph([]).to(F.ctx()) g.add_nodes(10) # create a graph where 0 is the source and 9 is the sink for i in range(1, 9): g.add_edges(0, i) g.add_edges(i, 9) # add a back flow from 9 to 0 g.add_edges(9, 0) if add_data: ncol = F.randn((10, D)) ecol = F.randn((17, D)) if grad: ncol = F.attach_grad(ncol) ecol = F.attach_grad(ecol) g.ndata["h"] = ncol g.edata["l"] = ecol return g def test_edge_subgraph(): # Test when the graph has no node data and edge data. g = generate_graph(add_data=False) eid = [0, 2, 3, 6, 7, 9] # relabel=True sg = g.edge_subgraph(eid) assert F.array_equal( sg.ndata[dgl.NID], F.tensor([0, 2, 4, 5, 1, 9], g.idtype) ) assert F.array_equal(sg.edata[dgl.EID], F.tensor(eid, g.idtype)) sg.ndata["h"] = F.arange(0, sg.num_nodes()) sg.edata["h"] = F.arange(0, sg.num_edges()) # relabel=False sg = g.edge_subgraph(eid, relabel_nodes=False) assert g.num_nodes() == sg.num_nodes() assert F.array_equal(sg.edata[dgl.EID], F.tensor(eid, g.idtype)) sg.ndata["h"] = F.arange(0, sg.num_nodes()) sg.edata["h"] = F.arange(0, sg.num_edges()) @pytest.mark.parametrize("relabel_nodes", [True, False]) def test_subgraph_relabel_nodes(relabel_nodes): g = generate_graph() h = g.ndata["h"] l = g.edata["l"] nid = [0, 2, 3, 6, 7, 9] sg = g.subgraph(nid, relabel_nodes=relabel_nodes) eid = {2, 3, 4, 5, 10, 11, 12, 13, 16} assert set(F.asnumpy(sg.edata[dgl.EID])) == eid eid = sg.edata[dgl.EID] # the subgraph is empty initially except for EID field # the subgraph is empty initially except for NID field if relabel_nodes if relabel_nodes: assert len(sg.ndata) == 2 assert len(sg.edata) == 2 sh = sg.ndata["h"] # The node number is not reduced if relabel_node=False. # The subgraph keeps the same node information as the original graph. if relabel_nodes: assert F.allclose(F.gather_row(h, F.tensor(nid)), sh) else: assert F.allclose( F.gather_row(h, F.tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])), sh ) # The s,d,eid means the source node, destination node and edge id of the subgraph. # The edges labeled 1 are those selected by the subgraph. """ s, d, eid 0, 1, 0 1, 9, 1 0, 2, 2 1 2, 9, 3 1 0, 3, 4 1 3, 9, 5 1 0, 4, 6 4, 9, 7 0, 5, 8 5, 9, 9 3 0, 6, 10 1 6, 9, 11 1 3 0, 7, 12 1 7, 9, 13 1 3 0, 8, 14 8, 9, 15 3 9, 0, 16 1 """ assert F.allclose(F.gather_row(l, eid), sg.edata["l"]) # update the node/edge features on the subgraph should NOT # reflect to the parent graph. if relabel_nodes: sg.ndata["h"] = F.zeros((6, D)) else: sg.ndata["h"] = F.zeros((10, D)) assert F.allclose(h, g.ndata["h"]) def _test_map_to_subgraph(): g = dgl.graph([]) g.add_nodes(10) g.add_edges(F.arange(0, 9), F.arange(1, 10)) h = g.subgraph([0, 1, 2, 5, 8]) v = h.map_to_subgraph_nid([0, 8, 2]) assert np.array_equal(F.asnumpy(v), np.array([0, 4, 2])) def create_test_heterograph(idtype): # test heterograph from the docstring, plus a user -- wishes -- game relation # 3 users, 2 games, 2 developers # metagraph: # ('user', 'follows', 'user'), # ('user', 'plays', 'game'), # ('user', 'wishes', 'game'), # ('developer', 'develops', 'game')]) g = dgl.heterograph( { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]), ("user", "wishes", "game"): ([0, 2], [1, 0]), ("developer", "develops", "game"): ([0, 1], [0, 1]), }, idtype=idtype, device=F.ctx(), ) for etype in g.etypes: g.edges[etype].data["weight"] = F.randn((g.num_edges(etype),)) assert g.idtype == idtype assert g.device == F.ctx() return g def create_test_heterograph2(idtype): """test heterograph from the docstring, with an empty relation""" # 3 users, 2 games, 2 developers # metagraph: # ('user', 'follows', 'user'), # ('user', 'plays', 'game'), # ('user', 'wishes', 'game'), # ('developer', 'develops', 'game') g = dgl.heterograph( { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]), ("user", "wishes", "game"): ([0, 2], [1, 0]), ("developer", "develops", "game"): ([], []), }, idtype=idtype, device=F.ctx(), ) for etype in g.etypes: g.edges[etype].data["weight"] = F.randn((g.num_edges(etype),)) assert g.idtype == idtype assert g.device == F.ctx() return g @unittest.skipIf( dgl.backend.backend_name == "mxnet", reason="MXNet doesn't support bool tensor", ) @parametrize_idtype def test_subgraph_mask(idtype): g = create_test_heterograph(idtype) g_graph = g["follows"] g_bipartite = g["plays"] x = F.randn((3, 5)) y = F.randn((2, 4)) g.nodes["user"].data["h"] = x g.edges["follows"].data["h"] = y def _check_subgraph(g, sg): assert sg.idtype == g.idtype assert sg.device == g.device assert sg.ntypes == g.ntypes assert sg.etypes == g.etypes assert sg.canonical_etypes == g.canonical_etypes assert F.array_equal( F.tensor(sg.nodes["user"].data[dgl.NID]), F.tensor([1, 2], idtype) ) assert F.array_equal( F.tensor(sg.nodes["game"].data[dgl.NID]), F.tensor([0], idtype) ) assert F.array_equal( F.tensor(sg.edges["follows"].data[dgl.EID]), F.tensor([1], idtype) ) assert F.array_equal( F.tensor(sg.edges["plays"].data[dgl.EID]), F.tensor([1], idtype) ) assert F.array_equal( F.tensor(sg.edges["wishes"].data[dgl.EID]), F.tensor([1], idtype) ) assert sg.num_nodes("developer") == 0 assert sg.num_edges("develops") == 0 assert F.array_equal( sg.nodes["user"].data["h"], g.nodes["user"].data["h"][1:3] ) assert F.array_equal( sg.edges["follows"].data["h"], g.edges["follows"].data["h"][1:2] ) sg1 = g.subgraph( { "user": F.tensor([False, True, True], dtype=F.bool), "game": F.tensor([True, False, False, False], dtype=F.bool), } ) _check_subgraph(g, sg1) sg2 = g.edge_subgraph( { "follows": F.tensor([False, True], dtype=F.bool), "plays": F.tensor([False, True, False, False], dtype=F.bool), "wishes": F.tensor([False, True], dtype=F.bool), } ) _check_subgraph(g, sg2) @parametrize_idtype def test_subgraph1(idtype): g = create_test_heterograph(idtype) g_graph = g["follows"] g_bipartite = g["plays"] x = F.randn((3, 5)) y = F.randn((2, 4)) g.nodes["user"].data["h"] = x g.edges["follows"].data["h"] = y def _check_subgraph(g, sg): assert sg.idtype == g.idtype assert sg.device == g.device assert sg.ntypes == g.ntypes assert sg.etypes == g.etypes assert sg.canonical_etypes == g.canonical_etypes assert F.array_equal( F.tensor(sg.nodes["user"].data[dgl.NID]), F.tensor([1, 2], g.idtype) ) assert F.array_equal( F.tensor(sg.nodes["game"].data[dgl.NID]), F.tensor([0], g.idtype) ) assert F.array_equal( F.tensor(sg.edges["follows"].data[dgl.EID]), F.tensor([1], g.idtype) ) assert F.array_equal( F.tensor(sg.edges["plays"].data[dgl.EID]), F.tensor([1], g.idtype) ) assert F.array_equal( F.tensor(sg.edges["wishes"].data[dgl.EID]), F.tensor([1], g.idtype) ) assert sg.num_nodes("developer") == 0 assert sg.num_edges("develops") == 0 assert F.array_equal( sg.nodes["user"].data["h"], g.nodes["user"].data["h"][1:3] ) assert F.array_equal( sg.edges["follows"].data["h"], g.edges["follows"].data["h"][1:2] ) sg1 = g.subgraph({"user": [1, 2], "game": [0]}) _check_subgraph(g, sg1) sg2 = g.edge_subgraph({"follows": [1], "plays": [1], "wishes": [1]}) _check_subgraph(g, sg2) # backend tensor input sg1 = g.subgraph( { "user": F.tensor([1, 2], dtype=idtype), "game": F.tensor([0], dtype=idtype), } ) _check_subgraph(g, sg1) sg2 = g.edge_subgraph( { "follows": F.tensor([1], dtype=idtype), "plays": F.tensor([1], dtype=idtype), "wishes": F.tensor([1], dtype=idtype), } ) _check_subgraph(g, sg2) # numpy input sg1 = g.subgraph({"user": np.array([1, 2]), "game": np.array([0])}) _check_subgraph(g, sg1) sg2 = g.edge_subgraph( { "follows": np.array([1]), "plays": np.array([1]), "wishes": np.array([1]), } ) _check_subgraph(g, sg2) def _check_subgraph_single_ntype(g, sg, preserve_nodes=False): assert sg.idtype == g.idtype assert sg.device == g.device assert sg.ntypes == g.ntypes assert sg.etypes == g.etypes assert sg.canonical_etypes == g.canonical_etypes if not preserve_nodes: assert F.array_equal( F.tensor(sg.nodes["user"].data[dgl.NID]), F.tensor([1, 2], g.idtype), ) else: for ntype in sg.ntypes: assert g.num_nodes(ntype) == sg.num_nodes(ntype) assert F.array_equal( F.tensor(sg.edges["follows"].data[dgl.EID]), F.tensor([1], g.idtype) ) if not preserve_nodes: assert F.array_equal( sg.nodes["user"].data["h"], g.nodes["user"].data["h"][1:3] ) assert F.array_equal( sg.edges["follows"].data["h"], g.edges["follows"].data["h"][1:2] ) def _check_subgraph_single_etype(g, sg, preserve_nodes=False): assert sg.ntypes == g.ntypes assert sg.etypes == g.etypes assert sg.canonical_etypes == g.canonical_etypes if not preserve_nodes: assert F.array_equal( F.tensor(sg.nodes["user"].data[dgl.NID]), F.tensor([0, 1], g.idtype), ) assert F.array_equal( F.tensor(sg.nodes["game"].data[dgl.NID]), F.tensor([0], g.idtype), ) else: for ntype in sg.ntypes: assert g.num_nodes(ntype) == sg.num_nodes(ntype) assert F.array_equal( F.tensor(sg.edges["plays"].data[dgl.EID]), F.tensor([0, 1], g.idtype), ) sg1_graph = g_graph.subgraph([1, 2]) _check_subgraph_single_ntype(g_graph, sg1_graph) sg1_graph = g_graph.edge_subgraph([1]) _check_subgraph_single_ntype(g_graph, sg1_graph) sg1_graph = g_graph.edge_subgraph([1], relabel_nodes=False) _check_subgraph_single_ntype(g_graph, sg1_graph, True) sg2_bipartite = g_bipartite.edge_subgraph([0, 1]) _check_subgraph_single_etype(g_bipartite, sg2_bipartite) sg2_bipartite = g_bipartite.edge_subgraph([0, 1], relabel_nodes=False) _check_subgraph_single_etype(g_bipartite, sg2_bipartite, True) def _check_typed_subgraph1(g, sg): assert g.idtype == sg.idtype assert g.device == sg.device assert set(sg.ntypes) == {"user", "game"} assert set(sg.etypes) == {"follows", "plays", "wishes"} for ntype in sg.ntypes: assert sg.num_nodes(ntype) == g.num_nodes(ntype) for etype in sg.etypes: src_sg, dst_sg = sg.all_edges(etype=etype, order="eid") src_g, dst_g = g.all_edges(etype=etype, order="eid") assert F.array_equal(src_sg, src_g) assert F.array_equal(dst_sg, dst_g) assert F.array_equal( sg.nodes["user"].data["h"], g.nodes["user"].data["h"] ) assert F.array_equal( sg.edges["follows"].data["h"], g.edges["follows"].data["h"] ) g.nodes["user"].data["h"] = F.scatter_row( g.nodes["user"].data["h"], F.tensor([2]), F.randn((1, 5)) ) g.edges["follows"].data["h"] = F.scatter_row( g.edges["follows"].data["h"], F.tensor([1]), F.randn((1, 4)) ) assert F.array_equal( sg.nodes["user"].data["h"], g.nodes["user"].data["h"] ) assert F.array_equal( sg.edges["follows"].data["h"], g.edges["follows"].data["h"] ) def _check_typed_subgraph2(g, sg): assert set(sg.ntypes) == {"developer", "game"} assert set(sg.etypes) == {"develops"} for ntype in sg.ntypes: assert sg.num_nodes(ntype) == g.num_nodes(ntype) for etype in sg.etypes: src_sg, dst_sg = sg.all_edges(etype=etype, order="eid") src_g, dst_g = g.all_edges(etype=etype, order="eid") assert F.array_equal(src_sg, src_g) assert F.array_equal(dst_sg, dst_g) sg3 = g.node_type_subgraph(["user", "game"]) _check_typed_subgraph1(g, sg3) sg4 = g.edge_type_subgraph(["develops"]) _check_typed_subgraph2(g, sg4) sg5 = g.edge_type_subgraph(["follows", "plays", "wishes"]) _check_typed_subgraph1(g, sg5) # Test for restricted format for fmt in ["csr", "csc", "coo"]: g = dgl.graph(([0, 1], [1, 2])).formats(fmt) sg = g.subgraph({g.ntypes[0]: [1, 0]}) nids = F.asnumpy(sg.ndata[dgl.NID]) assert np.array_equal(nids, np.array([1, 0])) src, dst = sg.edges(order="eid") src = F.asnumpy(src) dst = F.asnumpy(dst) assert np.array_equal(src, np.array([1])) @parametrize_idtype def test_in_subgraph(idtype): hg = dgl.heterograph( { ("user", "follow", "user"): ( [1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2], ), ("user", "play", "game"): ([0, 0, 1, 3], [0, 1, 2, 2]), ("game", "liked-by", "user"): ( [2, 2, 2, 1, 1, 0], [0, 1, 2, 0, 3, 0], ), ("user", "flips", "coin"): ([0, 1, 2, 3], [0, 0, 0, 0]), }, idtype=idtype, num_nodes_dict={"user": 5, "game": 10, "coin": 8}, ).to(F.ctx()) subg = dgl.in_subgraph(hg, {"user": [0, 1], "game": 0}) assert subg.idtype == idtype assert len(subg.ntypes) == 3 assert len(subg.etypes) == 4 u, v = subg["follow"].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert F.array_equal( hg["follow"].edge_ids(u, v), subg["follow"].edata[dgl.EID] ) assert edge_set == {(1, 0), (2, 0), (3, 0), (0, 1), (2, 1), (3, 1)} u, v = subg["play"].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert F.array_equal(hg["play"].edge_ids(u, v), subg["play"].edata[dgl.EID]) assert edge_set == {(0, 0)} u, v = subg["liked-by"].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert F.array_equal( hg["liked-by"].edge_ids(u, v), subg["liked-by"].edata[dgl.EID] ) assert edge_set == {(2, 0), (2, 1), (1, 0), (0, 0)} assert subg["flips"].num_edges() == 0 for ntype in subg.ntypes: assert dgl.NID not in subg.nodes[ntype].data # Test store_ids subg = dgl.in_subgraph(hg, {"user": [0, 1], "game": 0}, store_ids=False) for etype in ["follow", "play", "liked-by"]: assert dgl.EID not in subg.edges[etype].data for ntype in subg.ntypes: assert dgl.NID not in subg.nodes[ntype].data # Test relabel nodes subg = dgl.in_subgraph(hg, {"user": [0, 1], "game": 0}, relabel_nodes=True) assert subg.idtype == idtype assert len(subg.ntypes) == 3 assert len(subg.etypes) == 4 u, v = subg["follow"].edges() old_u = F.gather_row(subg.nodes["user"].data[dgl.NID], u) old_v = F.gather_row(subg.nodes["user"].data[dgl.NID], v) assert F.array_equal( hg["follow"].edge_ids(old_u, old_v), subg["follow"].edata[dgl.EID] ) edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v)))) assert edge_set == {(1, 0), (2, 0), (3, 0), (0, 1), (2, 1), (3, 1)} u, v = subg["play"].edges() old_u = F.gather_row(subg.nodes["user"].data[dgl.NID], u) old_v = F.gather_row(subg.nodes["game"].data[dgl.NID], v) assert F.array_equal( hg["play"].edge_ids(old_u, old_v), subg["play"].edata[dgl.EID] ) edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v)))) assert edge_set == {(0, 0)} u, v = subg["liked-by"].edges() old_u = F.gather_row(subg.nodes["game"].data[dgl.NID], u) old_v = F.gather_row(subg.nodes["user"].data[dgl.NID], v) assert F.array_equal( hg["liked-by"].edge_ids(old_u, old_v), subg["liked-by"].edata[dgl.EID] ) edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v)))) assert edge_set == {(2, 0), (2, 1), (1, 0), (0, 0)} assert subg.num_nodes("user") == 4 assert subg.num_nodes("game") == 3 assert subg.num_nodes("coin") == 0 assert subg.num_edges("flips") == 0 @parametrize_idtype def test_out_subgraph(idtype): hg = dgl.heterograph( { ("user", "follow", "user"): ( [1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2], ), ("user", "play", "game"): ([0, 0, 1, 3], [0, 1, 2, 2]), ("game", "liked-by", "user"): ( [2, 2, 2, 1, 1, 0], [0, 1, 2, 0, 3, 0], ), ("user", "flips", "coin"): ([0, 1, 2, 3], [0, 0, 0, 0]), }, idtype=idtype, ).to(F.ctx()) subg = dgl.out_subgraph(hg, {"user": [0, 1], "game": 0}) assert subg.idtype == idtype assert len(subg.ntypes) == 3 assert len(subg.etypes) == 4 u, v = subg["follow"].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(1, 0), (0, 1), (0, 2)} assert F.array_equal( hg["follow"].edge_ids(u, v), subg["follow"].edata[dgl.EID] ) u, v = subg["play"].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(0, 0), (0, 1), (1, 2)} assert F.array_equal(hg["play"].edge_ids(u, v), subg["play"].edata[dgl.EID]) u, v = subg["liked-by"].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(0, 0)} assert F.array_equal( hg["liked-by"].edge_ids(u, v), subg["liked-by"].edata[dgl.EID] ) u, v = subg["flips"].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(0, 0), (1, 0)} assert F.array_equal( hg["flips"].edge_ids(u, v), subg["flips"].edata[dgl.EID] ) for ntype in subg.ntypes: assert dgl.NID not in subg.nodes[ntype].data # Test store_ids subg = dgl.out_subgraph(hg, {"user": [0, 1], "game": 0}, store_ids=False) for etype in subg.canonical_etypes: assert dgl.EID not in subg.edges[etype].data for ntype in subg.ntypes: assert dgl.NID not in subg.nodes[ntype].data # Test relabel nodes subg = dgl.out_subgraph(hg, {"user": [1], "game": 0}, relabel_nodes=True) assert subg.idtype == idtype assert len(subg.ntypes) == 3 assert len(subg.etypes) == 4 u, v = subg["follow"].edges() old_u = F.gather_row(subg.nodes["user"].data[dgl.NID], u) old_v = F.gather_row(subg.nodes["user"].data[dgl.NID], v) edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v)))) assert edge_set == {(1, 0)} assert F.array_equal( hg["follow"].edge_ids(old_u, old_v), subg["follow"].edata[dgl.EID] ) u, v = subg["play"].edges() old_u = F.gather_row(subg.nodes["user"].data[dgl.NID], u) old_v = F.gather_row(subg.nodes["game"].data[dgl.NID], v) edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v)))) assert edge_set == {(1, 2)} assert F.array_equal( hg["play"].edge_ids(old_u, old_v), subg["play"].edata[dgl.EID] ) u, v = subg["liked-by"].edges() old_u = F.gather_row(subg.nodes["game"].data[dgl.NID], u) old_v = F.gather_row(subg.nodes["user"].data[dgl.NID], v) edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v)))) assert edge_set == {(0, 0)} assert F.array_equal( hg["liked-by"].edge_ids(old_u, old_v), subg["liked-by"].edata[dgl.EID] ) u, v = subg["flips"].edges() old_u = F.gather_row(subg.nodes["user"].data[dgl.NID], u) old_v = F.gather_row(subg.nodes["coin"].data[dgl.NID], v) edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v)))) assert edge_set == {(1, 0)} assert F.array_equal( hg["flips"].edge_ids(old_u, old_v), subg["flips"].edata[dgl.EID] ) assert subg.num_nodes("user") == 2 assert subg.num_nodes("game") == 2 assert subg.num_nodes("coin") == 1 def test_subgraph_message_passing(): # Unit test for PR #2055 g = dgl.graph(([0, 1, 2], [2, 3, 4])).to(F.cpu()) g.ndata["x"] = F.copy_to(F.randn((5, 6)), F.cpu()) sg = g.subgraph([1, 2, 3]).to(F.ctx()) sg.update_all( lambda edges: {"x": edges.src["x"]}, lambda nodes: {"y": F.sum(nodes.mailbox["x"], 1)}, ) @parametrize_idtype def test_khop_in_subgraph(idtype): g = dgl.graph( ([1, 1, 2, 3, 4], [0, 2, 0, 4, 2]), idtype=idtype, device=F.ctx() ) g.edata["w"] = F.tensor([[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]]) sg, inv = dgl.khop_in_subgraph(g, 0, k=2) assert sg.idtype == g.idtype u, v = sg.edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(1, 0), (1, 2), (2, 0), (3, 2)} assert F.array_equal( sg.edata[dgl.EID], F.tensor([0, 1, 2, 4], dtype=idtype) ) assert F.array_equal( sg.edata["w"], F.tensor([[0, 1], [2, 3], [4, 5], [8, 9]]) ) assert F.array_equal(F.astype(inv, idtype), F.tensor([0], idtype)) # Test multiple nodes sg, inv = dgl.khop_in_subgraph(g, [0, 2], k=1) assert sg.num_edges() == 4 sg, inv = dgl.khop_in_subgraph(g, F.tensor([0, 2], idtype), k=1) assert sg.num_edges() == 4 # Test isolated node sg, inv = dgl.khop_in_subgraph(g, 1, k=2) assert sg.idtype == g.idtype assert sg.num_nodes() == 1 assert sg.num_edges() == 0 assert F.array_equal(F.astype(inv, idtype), F.tensor([0], idtype)) g = dgl.heterograph( { ("user", "plays", "game"): ([0, 1, 1, 2], [0, 0, 2, 1]), ("user", "follows", "user"): ([0, 1, 1], [1, 2, 2]), }, idtype=idtype, device=F.ctx(), ) sg, inv = dgl.khop_in_subgraph(g, {"game": 0}, k=2) assert sg.idtype == idtype assert sg.num_nodes("game") == 1 assert sg.num_nodes("user") == 2 assert len(sg.ntypes) == 2 assert len(sg.etypes) == 2 u, v = sg["follows"].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(0, 1)} u, v = sg["plays"].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(0, 0), (1, 0)} assert F.array_equal(F.astype(inv["game"], idtype), F.tensor([0], idtype)) # Test isolated node sg, inv = dgl.khop_in_subgraph(g, {"user": 0}, k=2) assert sg.idtype == idtype assert sg.num_nodes("game") == 0 assert sg.num_nodes("user") == 1 assert sg.num_edges("follows") == 0 assert sg.num_edges("plays") == 0 assert F.array_equal(F.astype(inv["user"], idtype), F.tensor([0], idtype)) # Test multiple nodes sg, inv = dgl.khop_in_subgraph( g, {"user": F.tensor([0, 1], idtype), "game": 0}, k=1 ) u, v = sg["follows"].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(0, 1)} u, v = sg["plays"].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(0, 0), (1, 0)} assert F.array_equal( F.astype(inv["user"], idtype), F.tensor([0, 1], idtype) ) assert F.array_equal(F.astype(inv["game"], idtype), F.tensor([0], idtype)) @parametrize_idtype def test_khop_out_subgraph(idtype): g = dgl.graph( ([0, 2, 0, 4, 2], [1, 1, 2, 3, 4]), idtype=idtype, device=F.ctx() ) g.edata["w"] = F.tensor([[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]]) sg, inv = dgl.khop_out_subgraph(g, 0, k=2) assert sg.idtype == g.idtype u, v = sg.edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(0, 1), (2, 1), (0, 2), (2, 3)} assert F.array_equal( sg.edata[dgl.EID], F.tensor([0, 2, 1, 4], dtype=idtype) ) assert F.array_equal( sg.edata["w"], F.tensor([[0, 1], [4, 5], [2, 3], [8, 9]]) ) assert F.array_equal(F.astype(inv, idtype), F.tensor([0], idtype)) # Test multiple nodes sg, inv = dgl.khop_out_subgraph(g, [0, 2], k=1) assert sg.num_edges() == 4 sg, inv = dgl.khop_out_subgraph(g, F.tensor([0, 2], idtype), k=1) assert sg.num_edges() == 4 # Test isolated node sg, inv = dgl.khop_out_subgraph(g, 1, k=2) assert sg.idtype == g.idtype assert sg.num_nodes() == 1 assert sg.num_edges() == 0 assert F.array_equal(F.astype(inv, idtype), F.tensor([0], idtype)) g = dgl.heterograph( { ("user", "plays", "game"): ([0, 1, 1, 2], [0, 0, 2, 1]), ("user", "follows", "user"): ([0, 1], [1, 3]), }, idtype=idtype, device=F.ctx(), ) sg, inv = dgl.khop_out_subgraph(g, {"user": 0}, k=2) assert sg.idtype == idtype assert sg.num_nodes("game") == 2 assert sg.num_nodes("user") == 3 assert len(sg.ntypes) == 2 assert len(sg.etypes) == 2 u, v = sg["follows"].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(0, 1), (1, 2)} u, v = sg["plays"].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(0, 0), (1, 0), (1, 1)} assert F.array_equal(F.astype(inv["user"], idtype), F.tensor([0], idtype)) # Test isolated node sg, inv = dgl.khop_out_subgraph(g, {"user": 3}, k=2) assert sg.idtype == idtype assert sg.num_nodes("game") == 0 assert sg.num_nodes("user") == 1 assert sg.num_edges("follows") == 0 assert sg.num_edges("plays") == 0 assert F.array_equal(F.astype(inv["user"], idtype), F.tensor([0], idtype)) # Test multiple nodes sg, inv = dgl.khop_out_subgraph( g, {"user": F.tensor([2], idtype), "game": 0}, k=1 ) assert sg.num_edges("follows") == 0 u, v = sg["plays"].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(0, 1)} assert F.array_equal(F.astype(inv["user"], idtype), F.tensor([0], idtype)) assert F.array_equal(F.astype(inv["game"], idtype), F.tensor([0], idtype)) @unittest.skipIf(not F.gpu_ctx(), "only necessary with GPU") @pytest.mark.parametrize( "parent_idx_device", [("cpu", F.cpu()), ("cuda", F.cuda()), ("uva", F.cpu()), ("uva", F.cuda())], ) @pytest.mark.parametrize("child_device", [F.cpu(), F.cuda()]) def test_subframes(parent_idx_device, child_device): parent_device, idx_device = parent_idx_device g = dgl.graph( (F.tensor([1, 2, 3], dtype=F.int64), F.tensor([2, 3, 4], dtype=F.int64)) ) print(g.device) g.ndata["x"] = F.randn((5, 4)) g.edata["a"] = F.randn((3, 6)) idx = F.tensor([1, 2], dtype=F.int64) if parent_device == "cuda": g = g.to(F.cuda()) elif parent_device == "uva": if F.backend_name != "pytorch": pytest.skip("UVA only supported for PyTorch") g = g.to(F.cpu()) g.create_formats_() g.pin_memory_() elif parent_device == "cpu": g = g.to(F.cpu()) idx = F.copy_to(idx, idx_device) sg = g.sample_neighbors(idx, 2).to(child_device) assert sg.device == F.context(sg.ndata["x"]) assert sg.device == F.context(sg.edata["a"]) assert sg.device == child_device if parent_device != "uva": sg = g.to(child_device).sample_neighbors( F.copy_to(idx, child_device), 2 ) assert sg.device == F.context(sg.ndata["x"]) assert sg.device == F.context(sg.edata["a"]) assert sg.device == child_device if parent_device == "uva": g.unpin_memory_() @unittest.skipIf( F._default_context_str != "gpu", reason="UVA only available on GPU" ) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="UVA only supported for PyTorch", ) @pytest.mark.parametrize("device", [F.cpu(), F.cuda()]) @parametrize_idtype def test_uva_subgraph(idtype, device): g = create_test_heterograph2(idtype) g = g.to(F.cpu()) g.create_formats_() g.pin_memory_() indices = {"user": F.copy_to(F.tensor([0], idtype), device)} edge_indices = {"follows": F.copy_to(F.tensor([0], idtype), device)} assert g.subgraph(indices).device == device assert g.edge_subgraph(edge_indices).device == device assert g.in_subgraph(indices).device == device assert g.out_subgraph(indices).device == device assert g.khop_in_subgraph(indices, 1)[0].device == device assert g.khop_out_subgraph(indices, 1)[0].device == device assert g.sample_neighbors(indices, 1).device == device g.unpin_memory_() if __name__ == "__main__": test_edge_subgraph() test_uva_subgraph(F.int64, F.cpu()) test_uva_subgraph(F.int64, F.cuda()) ================================================ FILE: tests/python/common/test_traversal.py ================================================ import itertools import random import sys import time import unittest import backend as F import dgl import networkx as nx import numpy as np import scipy.sparse as sp from utils import parametrize_idtype np.random.seed(42) def toset(x): # F.zerocopy_to_numpy may return a int return set(F.zerocopy_to_numpy(x).tolist()) @parametrize_idtype def test_bfs(idtype, n=100): def _bfs_nx(g_nx, src): edges = nx.bfs_edges(g_nx, src) layers_nx = [set([src])] edges_nx = [] frontier = set() edge_frontier = set() for u, v in edges: if u in layers_nx[-1]: frontier.add(v) edge_frontier.add(g.edge_ids(int(u), int(v))) else: layers_nx.append(frontier) edges_nx.append(edge_frontier) frontier = set([v]) edge_frontier = set([g.edge_ids(u, v)]) # avoids empty successors if len(frontier) > 0 and len(edge_frontier) > 0: layers_nx.append(frontier) edges_nx.append(edge_frontier) return layers_nx, edges_nx a = sp.random(n, n, 3 / n, data_rvs=lambda n: np.ones(n)) g = dgl.from_scipy(a).astype(idtype) g_nx = g.to_networkx() src = random.choice(range(n)) layers_nx, _ = _bfs_nx(g_nx, src) layers_dgl = dgl.bfs_nodes_generator(g, src) assert len(layers_dgl) == len(layers_nx) assert all(toset(x) == y for x, y in zip(layers_dgl, layers_nx)) g_nx = nx.random_labeled_tree(n, seed=42) g = dgl.from_networkx(g_nx).astype(idtype) src = 0 _, edges_nx = _bfs_nx(g_nx, src) edges_dgl = dgl.bfs_edges_generator(g, src) assert len(edges_dgl) == len(edges_nx) assert all(toset(x) == y for x, y in zip(edges_dgl, edges_nx)) @parametrize_idtype def test_topological_nodes(idtype, n=100): a = sp.random(n, n, 3 / n, data_rvs=lambda n: np.ones(n)) b = sp.tril(a, -1).tocoo() g = dgl.from_scipy(b).astype(idtype) layers_dgl = dgl.topological_nodes_generator(g) adjmat = g.adj_external(transpose=True) def tensor_topo_traverse(): n = g.num_nodes() mask = F.copy_to(F.ones((n, 1)), F.cpu()) degree = F.spmm(adjmat, mask) while F.reduce_sum(mask) != 0.0: v = F.astype((degree == 0.0), F.float32) v = v * mask mask = mask - v frontier = F.copy_to(F.nonzero_1d(F.squeeze(v, 1)), F.cpu()) yield frontier degree -= F.spmm(adjmat, v) layers_spmv = list(tensor_topo_traverse()) assert len(layers_dgl) == len(layers_spmv) assert all(toset(x) == toset(y) for x, y in zip(layers_dgl, layers_spmv)) DFS_LABEL_NAMES = ["forward", "reverse", "nontree"] @parametrize_idtype def test_dfs_labeled_edges(idtype, example=False): dgl_g = dgl.graph([]).astype(idtype) dgl_g.add_nodes(6) dgl_g.add_edges([0, 1, 0, 3, 3], [1, 2, 2, 4, 5]) dgl_edges, dgl_labels = dgl.dfs_labeled_edges_generator( dgl_g, [0, 3], has_reverse_edge=True, has_nontree_edge=True ) dgl_edges = [toset(t) for t in dgl_edges] dgl_labels = [toset(t) for t in dgl_labels] g1_solutions = [ # edges labels [[0, 1, 1, 0, 2], [0, 0, 1, 1, 2]], [[2, 2, 0, 1, 0], [0, 1, 0, 2, 1]], ] g2_solutions = [ # edges labels [[3, 3, 4, 4], [0, 1, 0, 1]], [[4, 4, 3, 3], [0, 1, 0, 1]], ] def combine_frontiers(sol): es, ls = zip(*sol) es = [ set(i for i in t if i is not None) for t in itertools.zip_longest(*es) ] ls = [ set(i for i in t if i is not None) for t in itertools.zip_longest(*ls) ] return es, ls for sol_set in itertools.product(g1_solutions, g2_solutions): es, ls = combine_frontiers(sol_set) if es == dgl_edges and ls == dgl_labels: break else: assert False if __name__ == "__main__": test_bfs(idtype="int32") test_topological_nodes(idtype="int32") test_dfs_labeled_edges(idtype="int32") ================================================ FILE: tests/python/common/transforms/test_functional-sort.py ================================================ import itertools import unittest from collections import Counter import backend as F import dgl import dgl.function as fn import networkx as nx import numpy as np import pytest import scipy.sparse as ssp from dgl import DGLError from utils import parametrize_idtype def create_test_heterograph(num_nodes, num_adj, idtype): if isinstance(num_adj, int): num_adj = [num_adj, num_adj + 1] num_adj_list = list( np.random.choice(np.arange(num_adj[0], num_adj[1]), num_nodes) ) src = np.concatenate([[i] * num_adj_list[i] for i in range(num_nodes)]) dst = [ np.random.choice(num_nodes, nadj, replace=False) for nadj in num_adj_list ] dst = np.concatenate(dst) return dgl.graph((src, dst), idtype=idtype) def check_sort(spm, tag_arr=None, tag_pos=None): if tag_arr is None: tag_arr = np.arange(spm.shape[0]) else: tag_arr = F.asnumpy(tag_arr) if tag_pos is not None: tag_pos = F.asnumpy(tag_pos) for i in range(spm.shape[0]): row = spm.getrow(i) dst = row.nonzero()[1] if tag_pos is not None: tag_pos_row = tag_pos[i] tag_pos_ptr = tag_arr[dst[0]] if len(dst) > 0 else 0 for j in range(len(dst) - 1): if tag_pos is not None and tag_arr[dst[j]] != tag_pos_ptr: # `tag_pos_ptr` is the expected tag value. Here we check whether the # tag value is equal to `tag_pos_ptr` return False if tag_arr[dst[j]] > tag_arr[dst[j + 1]]: # The tag should be in ascending order after sorting return False if tag_pos is not None and tag_arr[dst[j]] < tag_arr[dst[j + 1]]: if j + 1 != int(tag_pos_row[tag_pos_ptr + 1]): # The boundary of tag should be consistent with `tag_pos` return False tag_pos_ptr = tag_arr[dst[j + 1]] return True @unittest.skipIf( F._default_context_str == "gpu", reason="GPU sorting by tag not implemented" ) @parametrize_idtype def test_sort_with_tag(idtype): num_nodes, num_adj, num_tags = 200, [20, 50], 5 g = create_test_heterograph(num_nodes, num_adj, idtype=idtype) tag = F.tensor(np.random.choice(num_tags, g.num_nodes())) src, dst = g.edges() edge_tag_dst = F.gather_row(tag, F.tensor(dst)) edge_tag_src = F.gather_row(tag, F.tensor(src)) for tag_type in ["node", "edge"]: new_g = dgl.sort_csr_by_tag( g, tag if tag_type == "node" else edge_tag_dst, tag_type=tag_type ) old_csr = g.adj_external(scipy_fmt="csr") new_csr = new_g.adj_external(scipy_fmt="csr") assert check_sort(new_csr, tag, new_g.dstdata["_TAG_OFFSET"]) assert not check_sort( old_csr, tag ) # Check the original csr is not modified. for tag_type in ["node", "edge"]: new_g = dgl.sort_csc_by_tag( g, tag if tag_type == "node" else edge_tag_src, tag_type=tag_type ) old_csc = g.adj_external(transpose=True, scipy_fmt="csr") new_csc = new_g.adj_external(transpose=True, scipy_fmt="csr") assert check_sort(new_csc, tag, new_g.srcdata["_TAG_OFFSET"]) assert not check_sort(old_csc, tag) @unittest.skipIf( F._default_context_str == "gpu", reason="GPU sorting by tag not implemented" ) @parametrize_idtype def test_sort_with_tag_bipartite(idtype): num_nodes, num_adj, num_tags = 200, [20, 50], 5 g = create_test_heterograph(num_nodes, num_adj, idtype=idtype) g = dgl.heterograph({("_U", "_E", "_V"): g.edges()}) utag = F.tensor(np.random.choice(num_tags, g.num_nodes("_U"))) vtag = F.tensor(np.random.choice(num_tags, g.num_nodes("_V"))) new_g = dgl.sort_csr_by_tag(g, vtag) old_csr = g.adj_external(scipy_fmt="csr") new_csr = new_g.adj_external(scipy_fmt="csr") assert check_sort(new_csr, vtag, new_g.nodes["_U"].data["_TAG_OFFSET"]) assert not check_sort(old_csr, vtag) new_g = dgl.sort_csc_by_tag(g, utag) old_csc = g.adj_external(transpose=True, scipy_fmt="csr") new_csc = new_g.adj_external(transpose=True, scipy_fmt="csr") assert check_sort(new_csc, utag, new_g.nodes["_V"].data["_TAG_OFFSET"]) assert not check_sort(old_csc, utag) if __name__ == "__main__": test_sort_with_tag(F.int32) test_sort_with_tag_bipartite(F.int32) ================================================ FILE: tests/python/common/transforms/test_to_block.py ================================================ ## # Copyright 2019-2021 Contributors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import backend as F import dgl import dgl.partition from utils import parametrize_idtype @parametrize_idtype def test_to_block(idtype): def check(g, bg, ntype, etype, dst_nodes, include_dst_in_src=True): if dst_nodes is not None: assert F.array_equal(bg.dstnodes[ntype].data[dgl.NID], dst_nodes) n_dst_nodes = bg.num_nodes("DST/" + ntype) if include_dst_in_src: assert F.array_equal( bg.srcnodes[ntype].data[dgl.NID][:n_dst_nodes], bg.dstnodes[ntype].data[dgl.NID], ) g = g[etype] bg = bg[etype] induced_src = bg.srcdata[dgl.NID] induced_dst = bg.dstdata[dgl.NID] induced_eid = bg.edata[dgl.EID] bg_src, bg_dst = bg.all_edges(order="eid") src_ans, dst_ans = g.all_edges(order="eid") induced_src_bg = F.gather_row(induced_src, bg_src) induced_dst_bg = F.gather_row(induced_dst, bg_dst) induced_src_ans = F.gather_row(src_ans, induced_eid) induced_dst_ans = F.gather_row(dst_ans, induced_eid) assert F.array_equal(induced_src_bg, induced_src_ans) assert F.array_equal(induced_dst_bg, induced_dst_ans) def checkall(g, bg, dst_nodes, include_dst_in_src=True): for etype in g.etypes: ntype = g.to_canonical_etype(etype)[2] if dst_nodes is not None and ntype in dst_nodes: check(g, bg, ntype, etype, dst_nodes[ntype], include_dst_in_src) else: check(g, bg, ntype, etype, None, include_dst_in_src) # homogeneous graph g = dgl.graph( (F.tensor([1, 2], dtype=idtype), F.tensor([2, 3], dtype=idtype)) ) dst_nodes = F.tensor([3, 2], dtype=idtype) bg = dgl.to_block(g, dst_nodes=dst_nodes) check(g, bg, "_N", "_E", dst_nodes) src_nodes = bg.srcnodes["_N"].data[dgl.NID] bg = dgl.to_block(g, dst_nodes=dst_nodes, src_nodes=src_nodes) check(g, bg, "_N", "_E", dst_nodes) # heterogeneous graph g = dgl.heterograph( { ("A", "AA", "A"): ([0, 2, 1, 3], [1, 3, 2, 4]), ("A", "AB", "B"): ([0, 1, 3, 1], [1, 3, 5, 6]), ("B", "BA", "A"): ([2, 3], [3, 2]), }, idtype=idtype, device=F.ctx(), ) g.nodes["A"].data["x"] = F.randn((5, 10)) g.nodes["B"].data["x"] = F.randn((7, 5)) g.edges["AA"].data["x"] = F.randn((4, 3)) g.edges["AB"].data["x"] = F.randn((4, 3)) g.edges["BA"].data["x"] = F.randn((2, 3)) g_a = g["AA"] def check_features(g, bg): for ntype in bg.srctypes: for key in g.nodes[ntype].data: assert F.array_equal( bg.srcnodes[ntype].data[key], F.gather_row( g.nodes[ntype].data[key], bg.srcnodes[ntype].data[dgl.NID], ), ) for ntype in bg.dsttypes: for key in g.nodes[ntype].data: assert F.array_equal( bg.dstnodes[ntype].data[key], F.gather_row( g.nodes[ntype].data[key], bg.dstnodes[ntype].data[dgl.NID], ), ) for etype in bg.canonical_etypes: for key in g.edges[etype].data: assert F.array_equal( bg.edges[etype].data[key], F.gather_row( g.edges[etype].data[key], bg.edges[etype].data[dgl.EID] ), ) bg = dgl.to_block(g_a) check(g_a, bg, "A", "AA", None) check_features(g_a, bg) assert bg.number_of_src_nodes() == 5 assert bg.number_of_dst_nodes() == 4 bg = dgl.to_block(g_a, include_dst_in_src=False) check(g_a, bg, "A", "AA", None, False) check_features(g_a, bg) assert bg.number_of_src_nodes() == 4 assert bg.number_of_dst_nodes() == 4 dst_nodes = F.tensor([4, 3, 2, 1], dtype=idtype) bg = dgl.to_block(g_a, dst_nodes) check(g_a, bg, "A", "AA", dst_nodes) check_features(g_a, bg) g_ab = g["AB"] bg = dgl.to_block(g_ab) assert bg.idtype == idtype assert bg.num_nodes("SRC/B") == 4 assert F.array_equal( bg.srcnodes["B"].data[dgl.NID], bg.dstnodes["B"].data[dgl.NID] ) assert bg.num_nodes("DST/A") == 0 checkall(g_ab, bg, None) check_features(g_ab, bg) dst_nodes = {"B": F.tensor([5, 6, 3, 1], dtype=idtype)} bg = dgl.to_block(g, dst_nodes) assert bg.num_nodes("SRC/B") == 4 assert F.array_equal( bg.srcnodes["B"].data[dgl.NID], bg.dstnodes["B"].data[dgl.NID] ) assert bg.num_nodes("DST/A") == 0 checkall(g, bg, dst_nodes) check_features(g, bg) dst_nodes = { "A": F.tensor([4, 3, 2, 1], dtype=idtype), "B": F.tensor([3, 5, 6, 1], dtype=idtype), } bg = dgl.to_block(g, dst_nodes=dst_nodes) checkall(g, bg, dst_nodes) check_features(g, bg) # test specifying lhs_nodes with include_dst_in_src src_nodes = {} for ntype in dst_nodes.keys(): # use the previous run to get the list of source nodes src_nodes[ntype] = bg.srcnodes[ntype].data[dgl.NID] bg = dgl.to_block(g, dst_nodes=dst_nodes, src_nodes=src_nodes) checkall(g, bg, dst_nodes) check_features(g, bg) # test without include_dst_in_src dst_nodes = { "A": F.tensor([4, 3, 2, 1], dtype=idtype), "B": F.tensor([3, 5, 6, 1], dtype=idtype), } bg = dgl.to_block(g, dst_nodes=dst_nodes, include_dst_in_src=False) checkall(g, bg, dst_nodes, False) check_features(g, bg) # test specifying lhs_nodes without include_dst_in_src src_nodes = {} for ntype in dst_nodes.keys(): # use the previous run to get the list of source nodes src_nodes[ntype] = bg.srcnodes[ntype].data[dgl.NID] bg = dgl.to_block( g, dst_nodes=dst_nodes, include_dst_in_src=False, src_nodes=src_nodes ) checkall(g, bg, dst_nodes, False) check_features(g, bg) ================================================ FILE: tests/python/common/transforms/test_transform.py ================================================ ## # Copyright 2019-2021 Contributors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import math import os import unittest import backend as F import dgl import dgl.function as fn import dgl.partition import networkx as nx import numpy as np import pytest from scipy import sparse as spsp from utils import parametrize_idtype from utils.graph_cases import get_cases D = 5 def create_test_heterograph3(idtype): g = dgl.heterograph( { ("user", "plays", "game"): ( F.tensor([0, 1, 1, 2], dtype=idtype), F.tensor([0, 0, 1, 1], dtype=idtype), ), ("developer", "develops", "game"): ( F.tensor([0, 1], dtype=idtype), F.tensor([0, 1], dtype=idtype), ), }, idtype=idtype, device=F.ctx(), ) g.nodes["user"].data["h"] = F.copy_to( F.tensor([1, 1, 1], dtype=idtype), ctx=F.ctx() ) g.nodes["game"].data["h"] = F.copy_to( F.tensor([2, 2], dtype=idtype), ctx=F.ctx() ) g.nodes["developer"].data["h"] = F.copy_to( F.tensor([3, 3], dtype=idtype), ctx=F.ctx() ) g.edges["plays"].data["h"] = F.copy_to( F.tensor([1, 1, 1, 1], dtype=idtype), ctx=F.ctx() ) return g def create_test_heterograph4(idtype): g = dgl.heterograph( { ("user", "follows", "user"): ( F.tensor([0, 1, 1, 2, 2, 2], dtype=idtype), F.tensor([0, 0, 1, 1, 2, 2], dtype=idtype), ), ("user", "plays", "game"): ( F.tensor([0, 1], dtype=idtype), F.tensor([0, 1], dtype=idtype), ), }, idtype=idtype, device=F.ctx(), ) g.nodes["user"].data["h"] = F.copy_to( F.tensor([1, 1, 1], dtype=idtype), ctx=F.ctx() ) g.nodes["game"].data["h"] = F.copy_to( F.tensor([2, 2], dtype=idtype), ctx=F.ctx() ) g.edges["follows"].data["h"] = F.copy_to( F.tensor([1, 2, 3, 4, 5, 6], dtype=idtype), ctx=F.ctx() ) g.edges["plays"].data["h"] = F.copy_to( F.tensor([1, 2], dtype=idtype), ctx=F.ctx() ) return g def create_test_heterograph5(idtype): g = dgl.heterograph( { ("user", "follows", "user"): ( F.tensor([1, 2], dtype=idtype), F.tensor([0, 1], dtype=idtype), ), ("user", "plays", "game"): ( F.tensor([0, 1], dtype=idtype), F.tensor([0, 1], dtype=idtype), ), }, idtype=idtype, device=F.ctx(), ) g.nodes["user"].data["h"] = F.copy_to( F.tensor([1, 1, 1], dtype=idtype), ctx=F.ctx() ) g.nodes["game"].data["h"] = F.copy_to( F.tensor([2, 2], dtype=idtype), ctx=F.ctx() ) g.edges["follows"].data["h"] = F.copy_to( F.tensor([1, 2], dtype=idtype), ctx=F.ctx() ) g.edges["plays"].data["h"] = F.copy_to( F.tensor([1, 2], dtype=idtype), ctx=F.ctx() ) return g # line graph related def test_line_graph1(): N = 5 G = dgl.from_networkx(nx.star_graph(N)).to(F.ctx()) G.edata["h"] = F.randn((2 * N, D)) L = G.line_graph(shared=True) assert L.num_nodes() == 2 * N assert F.allclose(L.ndata["h"], G.edata["h"]) assert G.device == F.ctx() @parametrize_idtype def test_line_graph2(idtype): g = dgl.heterograph( {("user", "follows", "user"): ([0, 1, 1, 2, 2], [2, 0, 2, 0, 1])}, idtype=idtype, ) lg = dgl.line_graph(g) assert lg.num_nodes() == 5 assert lg.num_edges() == 8 row, col = lg.edges() assert np.array_equal(F.asnumpy(row), np.array([0, 0, 1, 2, 2, 3, 4, 4])) assert np.array_equal(F.asnumpy(col), np.array([3, 4, 0, 3, 4, 0, 1, 2])) lg = dgl.line_graph(g, backtracking=False) assert lg.num_nodes() == 5 assert lg.num_edges() == 4 row, col = lg.edges() assert np.array_equal(F.asnumpy(row), np.array([0, 1, 2, 4])) assert np.array_equal(F.asnumpy(col), np.array([4, 0, 3, 1])) g = dgl.heterograph( {("user", "follows", "user"): ([0, 1, 1, 2, 2], [2, 0, 2, 0, 1])}, idtype=idtype, ).formats("csr") lg = dgl.line_graph(g) assert lg.num_nodes() == 5 assert lg.num_edges() == 8 row, col = lg.edges() assert np.array_equal(F.asnumpy(row), np.array([0, 0, 1, 2, 2, 3, 4, 4])) assert np.array_equal(F.asnumpy(col), np.array([3, 4, 0, 3, 4, 0, 1, 2])) g = dgl.heterograph( {("user", "follows", "user"): ([0, 1, 1, 2, 2], [2, 0, 2, 0, 1])}, idtype=idtype, ).formats("csc") lg = dgl.line_graph(g) assert lg.num_nodes() == 5 assert lg.num_edges() == 8 row, col, eid = lg.edges("all") row = F.asnumpy(row) col = F.asnumpy(col) eid = F.asnumpy(eid).astype(int) order = np.argsort(eid) assert np.array_equal(row[order], np.array([0, 0, 1, 2, 2, 3, 4, 4])) assert np.array_equal(col[order], np.array([3, 4, 0, 3, 4, 0, 1, 2])) def test_no_backtracking(): N = 5 G = dgl.from_networkx(nx.star_graph(N)) L = G.line_graph(backtracking=False) assert L.num_nodes() == 2 * N for i in range(1, N): e1 = G.edge_ids(0, i) e2 = G.edge_ids(i, 0) assert not L.has_edges_between(e1, e2) assert not L.has_edges_between(e2, e1) # reverse graph related @parametrize_idtype def test_reverse(idtype): g = dgl.graph([]) g = g.astype(idtype).to(F.ctx()) g.add_nodes(5) # The graph need not to be completely connected. g.add_edges([0, 1, 2], [1, 2, 1]) g.ndata["h"] = F.tensor([[0.0], [1.0], [2.0], [3.0], [4.0]]) g.edata["h"] = F.tensor([[5.0], [6.0], [7.0]]) rg = g.reverse() assert g.is_multigraph == rg.is_multigraph assert g.num_nodes() == rg.num_nodes() assert g.num_edges() == rg.num_edges() assert F.allclose( F.astype(rg.has_edges_between([1, 2, 1], [0, 1, 2]), F.float32), F.ones((3,)), ) assert g.edge_ids(0, 1) == rg.edge_ids(1, 0) assert g.edge_ids(1, 2) == rg.edge_ids(2, 1) assert g.edge_ids(2, 1) == rg.edge_ids(1, 2) # test dgl.reverse # test homogeneous graph g = dgl.graph((F.tensor([0, 1, 2]), F.tensor([1, 2, 0]))) g.ndata["h"] = F.tensor([[0.0], [1.0], [2.0]]) g.edata["h"] = F.tensor([[3.0], [4.0], [5.0]]) g_r = dgl.reverse(g) assert g.num_nodes() == g_r.num_nodes() assert g.num_edges() == g_r.num_edges() u_g, v_g, eids_g = g.all_edges(form="all") u_rg, v_rg, eids_rg = g_r.all_edges(form="all") assert F.array_equal(u_g, v_rg) assert F.array_equal(v_g, u_rg) assert F.array_equal(eids_g, eids_rg) assert F.array_equal(g.ndata["h"], g_r.ndata["h"]) assert len(g_r.edata) == 0 # without share ndata g_r = dgl.reverse(g, copy_ndata=False) assert g.num_nodes() == g_r.num_nodes() assert g.num_edges() == g_r.num_edges() assert len(g_r.ndata) == 0 assert len(g_r.edata) == 0 # with share ndata and edata g_r = dgl.reverse(g, copy_ndata=True, copy_edata=True) assert g.num_nodes() == g_r.num_nodes() assert g.num_edges() == g_r.num_edges() assert F.array_equal(g.ndata["h"], g_r.ndata["h"]) assert F.array_equal(g.edata["h"], g_r.edata["h"]) # add new node feature to g_r g_r.ndata["hh"] = F.tensor([0, 1, 2]) assert ("hh" in g.ndata) is False assert ("hh" in g_r.ndata) is True # add new edge feature to g_r g_r.edata["hh"] = F.tensor([0, 1, 2]) assert ("hh" in g.edata) is False assert ("hh" in g_r.edata) is True # test heterogeneous graph g = dgl.heterograph( { ("user", "follows", "user"): ( [0, 1, 2, 4, 3, 1, 3], [1, 2, 3, 2, 0, 0, 1], ), ("user", "plays", "game"): ( [0, 0, 2, 3, 3, 4, 1], [1, 0, 1, 0, 1, 0, 0], ), ("developer", "develops", "game"): ([0, 1, 1, 2], [0, 0, 1, 1]), }, idtype=idtype, device=F.ctx(), ) g.nodes["user"].data["h"] = F.tensor([0, 1, 2, 3, 4]) g.nodes["user"].data["hh"] = F.tensor([1, 1, 1, 1, 1]) g.nodes["game"].data["h"] = F.tensor([0, 1]) g.edges["follows"].data["h"] = F.tensor([0, 1, 2, 4, 3, 1, 3]) g.edges["follows"].data["hh"] = F.tensor([1, 2, 3, 2, 0, 0, 1]) g_r = dgl.reverse(g) for etype_g, etype_gr in zip(g.canonical_etypes, g_r.canonical_etypes): assert etype_g[0] == etype_gr[2] assert etype_g[1] == etype_gr[1] assert etype_g[2] == etype_gr[0] assert g.num_edges(etype_g) == g_r.num_edges(etype_gr) for ntype in g.ntypes: assert g.num_nodes(ntype) == g_r.num_nodes(ntype) assert F.array_equal(g.nodes["user"].data["h"], g_r.nodes["user"].data["h"]) assert F.array_equal( g.nodes["user"].data["hh"], g_r.nodes["user"].data["hh"] ) assert F.array_equal(g.nodes["game"].data["h"], g_r.nodes["game"].data["h"]) assert len(g_r.edges["follows"].data) == 0 u_g, v_g, eids_g = g.all_edges( form="all", etype=("user", "follows", "user") ) u_rg, v_rg, eids_rg = g_r.all_edges( form="all", etype=("user", "follows", "user") ) assert F.array_equal(u_g, v_rg) assert F.array_equal(v_g, u_rg) assert F.array_equal(eids_g, eids_rg) u_g, v_g, eids_g = g.all_edges(form="all", etype=("user", "plays", "game")) u_rg, v_rg, eids_rg = g_r.all_edges( form="all", etype=("game", "plays", "user") ) assert F.array_equal(u_g, v_rg) assert F.array_equal(v_g, u_rg) assert F.array_equal(eids_g, eids_rg) u_g, v_g, eids_g = g.all_edges( form="all", etype=("developer", "develops", "game") ) u_rg, v_rg, eids_rg = g_r.all_edges( form="all", etype=("game", "develops", "developer") ) assert F.array_equal(u_g, v_rg) assert F.array_equal(v_g, u_rg) assert F.array_equal(eids_g, eids_rg) # withour share ndata g_r = dgl.reverse(g, copy_ndata=False) for etype_g, etype_gr in zip(g.canonical_etypes, g_r.canonical_etypes): assert etype_g[0] == etype_gr[2] assert etype_g[1] == etype_gr[1] assert etype_g[2] == etype_gr[0] assert g.num_edges(etype_g) == g_r.num_edges(etype_gr) for ntype in g.ntypes: assert g.num_nodes(ntype) == g_r.num_nodes(ntype) assert len(g_r.nodes["user"].data) == 0 assert len(g_r.nodes["game"].data) == 0 g_r = dgl.reverse(g, copy_ndata=True, copy_edata=True) print(g_r) for etype_g, etype_gr in zip(g.canonical_etypes, g_r.canonical_etypes): assert etype_g[0] == etype_gr[2] assert etype_g[1] == etype_gr[1] assert etype_g[2] == etype_gr[0] assert g.num_edges(etype_g) == g_r.num_edges(etype_gr) assert F.array_equal( g.edges["follows"].data["h"], g_r.edges["follows"].data["h"] ) assert F.array_equal( g.edges["follows"].data["hh"], g_r.edges["follows"].data["hh"] ) # add new node feature to g_r g_r.nodes["user"].data["hhh"] = F.tensor([0, 1, 2, 3, 4]) assert ("hhh" in g.nodes["user"].data) is False assert ("hhh" in g_r.nodes["user"].data) is True # add new edge feature to g_r g_r.edges["follows"].data["hhh"] = F.tensor([1, 2, 3, 2, 0, 0, 1]) assert ("hhh" in g.edges["follows"].data) is False assert ("hhh" in g_r.edges["follows"].data) is True @parametrize_idtype def test_reverse_shared_frames(idtype): g = dgl.graph([]) g = g.astype(idtype).to(F.ctx()) g.add_nodes(3) g.add_edges([0, 1, 2], [1, 2, 1]) g.ndata["h"] = F.tensor([[0.0], [1.0], [2.0]]) g.edata["h"] = F.tensor([[3.0], [4.0], [5.0]]) rg = g.reverse(copy_ndata=True, copy_edata=True) assert F.allclose(g.ndata["h"], rg.ndata["h"]) assert F.allclose(g.edata["h"], rg.edata["h"]) assert F.allclose( g.edges[[0, 2], [1, 1]].data["h"], rg.edges[[1, 1], [0, 2]].data["h"] ) @unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented") def test_to_bidirected(): # homogeneous graph elist = [(0, 0), (0, 1), (1, 0), (1, 1), (2, 1), (2, 2)] num_edges = 7 g = dgl.graph(tuple(zip(*elist))) elist.append((1, 2)) elist = set(elist) big = dgl.to_bidirected(g) assert big.num_edges() == num_edges src, dst = big.edges() eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == set(elist) # heterogeneous graph elist1 = [(0, 0), (0, 1), (1, 0), (1, 1), (2, 1), (2, 2)] elist2 = [(0, 0), (0, 1)] g = dgl.heterograph( { ("user", "wins", "user"): tuple(zip(*elist1)), ("user", "follows", "user"): tuple(zip(*elist2)), } ) g.nodes["user"].data["h"] = F.ones((3, 1)) elist1.append((1, 2)) elist1 = set(elist1) elist2.append((1, 0)) elist2 = set(elist2) big = dgl.to_bidirected(g) assert big.num_edges("wins") == 7 assert big.num_edges("follows") == 3 src, dst = big.edges(etype="wins") eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == set(elist1) src, dst = big.edges(etype="follows") eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == set(elist2) big = dgl.to_bidirected(g, copy_ndata=True) assert F.array_equal(g.nodes["user"].data["h"], big.nodes["user"].data["h"]) def test_add_reverse_edges(): # homogeneous graph g = dgl.graph((F.tensor([0, 1, 3, 1]), F.tensor([1, 2, 0, 2]))) g.ndata["h"] = F.tensor([[0.0], [1.0], [2.0], [1.0]]) g.edata["h"] = F.tensor([[3.0], [4.0], [5.0], [6.0]]) bg = dgl.add_reverse_edges(g, copy_ndata=True, copy_edata=True) u, v = g.edges() ub, vb = bg.edges() assert F.array_equal(F.cat([u, v], dim=0), ub) assert F.array_equal(F.cat([v, u], dim=0), vb) assert F.array_equal(g.ndata["h"], bg.ndata["h"]) assert F.array_equal( F.cat([g.edata["h"], g.edata["h"]], dim=0), bg.edata["h"] ) bg.ndata["hh"] = F.tensor([[0.0], [1.0], [2.0], [1.0]]) assert ("hh" in g.ndata) is False bg.edata["hh"] = F.tensor( [[0.0], [1.0], [2.0], [1.0], [0.0], [1.0], [2.0], [1.0]] ) assert ("hh" in g.edata) is False # donot share ndata and edata bg = dgl.add_reverse_edges(g, copy_ndata=False, copy_edata=False) ub, vb = bg.edges() assert F.array_equal(F.cat([u, v], dim=0), ub) assert F.array_equal(F.cat([v, u], dim=0), vb) assert ("h" in bg.ndata) is False assert ("h" in bg.edata) is False # zero edge graph g = dgl.graph(([], [])) bg = dgl.add_reverse_edges( g, copy_ndata=True, copy_edata=True, exclude_self=False ) # heterogeneous graph g = dgl.heterograph( { ("user", "wins", "user"): ( F.tensor([0, 2, 0, 2, 2]), F.tensor([1, 1, 2, 1, 0]), ), ("user", "plays", "game"): ( F.tensor([1, 2, 1]), F.tensor([2, 1, 1]), ), ("user", "follows", "user"): ( F.tensor([1, 2, 1]), F.tensor([0, 0, 0]), ), } ) g.nodes["game"].data["hv"] = F.ones((3, 1)) g.nodes["user"].data["hv"] = F.ones((3, 1)) g.edges["wins"].data["h"] = F.tensor([0, 1, 2, 3, 4]) bg = dgl.add_reverse_edges( g, copy_ndata=True, copy_edata=True, ignore_bipartite=True ) assert F.array_equal( g.nodes["game"].data["hv"], bg.nodes["game"].data["hv"] ) assert F.array_equal( g.nodes["user"].data["hv"], bg.nodes["user"].data["hv"] ) u, v = g.all_edges(order="eid", etype=("user", "wins", "user")) ub, vb = bg.all_edges(order="eid", etype=("user", "wins", "user")) assert F.array_equal(F.cat([u, v], dim=0), ub) assert F.array_equal(F.cat([v, u], dim=0), vb) assert F.array_equal( F.cat([g.edges["wins"].data["h"], g.edges["wins"].data["h"]], dim=0), bg.edges["wins"].data["h"], ) u, v = g.all_edges(order="eid", etype=("user", "follows", "user")) ub, vb = bg.all_edges(order="eid", etype=("user", "follows", "user")) assert F.array_equal(F.cat([u, v], dim=0), ub) assert F.array_equal(F.cat([v, u], dim=0), vb) u, v = g.all_edges(order="eid", etype=("user", "plays", "game")) ub, vb = bg.all_edges(order="eid", etype=("user", "plays", "game")) assert F.array_equal(u, ub) assert F.array_equal(v, vb) assert set(bg.edges["plays"].data.keys()) == {dgl.EID} assert set(bg.edges["follows"].data.keys()) == {dgl.EID} # donot share ndata and edata bg = dgl.add_reverse_edges( g, copy_ndata=False, copy_edata=False, ignore_bipartite=True ) assert len(bg.edges["wins"].data) == 0 assert len(bg.edges["plays"].data) == 0 assert len(bg.edges["follows"].data) == 0 assert len(bg.nodes["game"].data) == 0 assert len(bg.nodes["user"].data) == 0 u, v = g.all_edges(order="eid", etype=("user", "wins", "user")) ub, vb = bg.all_edges(order="eid", etype=("user", "wins", "user")) assert F.array_equal(F.cat([u, v], dim=0), ub) assert F.array_equal(F.cat([v, u], dim=0), vb) u, v = g.all_edges(order="eid", etype=("user", "follows", "user")) ub, vb = bg.all_edges(order="eid", etype=("user", "follows", "user")) assert F.array_equal(F.cat([u, v], dim=0), ub) assert F.array_equal(F.cat([v, u], dim=0), vb) u, v = g.all_edges(order="eid", etype=("user", "plays", "game")) ub, vb = bg.all_edges(order="eid", etype=("user", "plays", "game")) assert F.array_equal(u, ub) assert F.array_equal(v, vb) # test the case when some nodes have zero degree # homogeneous graph g = dgl.graph((F.tensor([0, 1, 3, 1]), F.tensor([1, 2, 0, 2])), num_nodes=6) g.ndata["h"] = F.tensor([[0.0], [1.0], [2.0], [1.0], [1.0], [1.0]]) g.edata["h"] = F.tensor([[3.0], [4.0], [5.0], [6.0]]) bg = dgl.add_reverse_edges(g, copy_ndata=True, copy_edata=True) assert g.num_nodes() == bg.num_nodes() assert F.array_equal(g.ndata["h"], bg.ndata["h"]) assert F.array_equal( F.cat([g.edata["h"], g.edata["h"]], dim=0), bg.edata["h"] ) # heterogeneous graph g = dgl.heterograph( { ("user", "wins", "user"): ( F.tensor([0, 2, 0, 2, 2]), F.tensor([1, 1, 2, 1, 0]), ), ("user", "plays", "game"): ( F.tensor([1, 2, 1]), F.tensor([2, 1, 1]), ), ("user", "follows", "user"): ( F.tensor([1, 2, 1]), F.tensor([0, 0, 0]), ), }, num_nodes_dict={"user": 5, "game": 3}, ) g.nodes["game"].data["hv"] = F.ones((3, 1)) g.nodes["user"].data["hv"] = F.ones((5, 1)) g.edges["wins"].data["h"] = F.tensor([0, 1, 2, 3, 4]) bg = dgl.add_reverse_edges( g, copy_ndata=True, copy_edata=True, ignore_bipartite=True ) assert g.num_nodes("user") == bg.num_nodes("user") assert g.num_nodes("game") == bg.num_nodes("game") assert F.array_equal( g.nodes["game"].data["hv"], bg.nodes["game"].data["hv"] ) assert F.array_equal( g.nodes["user"].data["hv"], bg.nodes["user"].data["hv"] ) assert F.array_equal( F.cat([g.edges["wins"].data["h"], g.edges["wins"].data["h"]], dim=0), bg.edges["wins"].data["h"], ) # test exclude_self g = dgl.heterograph( { ("A", "r1", "A"): (F.tensor([0, 0, 1, 1]), F.tensor([0, 1, 1, 2])), ("A", "r2", "A"): (F.tensor([0, 1]), F.tensor([1, 2])), } ) g.edges["r1"].data["h"] = F.tensor([0, 1, 2, 3]) rg = dgl.add_reverse_edges(g, copy_edata=True, exclude_self=True) assert rg.num_edges("r1") == 6 assert rg.num_edges("r2") == 4 assert F.array_equal(rg.edges["r1"].data["h"], F.tensor([0, 1, 2, 3, 1, 3])) @unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented") def test_simple_graph(): elist = [(0, 1), (0, 2), (1, 2), (0, 1)] g = dgl.graph(elist) assert g.is_multigraph sg = dgl.to_simple(g) assert not sg.is_multigraph assert sg.num_edges() == 3 src, dst = sg.edges() eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == set(elist) @unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented") def _test_bidirected_graph(): def _test(in_readonly, out_readonly): elist = [(0, 0), (0, 1), (1, 0), (1, 1), (2, 1), (2, 2)] num_edges = 7 g = dgl.graph(elist) elist.append((1, 2)) elist = set(elist) big = dgl.to_bidirected_stale(g, out_readonly) assert big.num_edges() == num_edges src, dst = big.edges() eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == set(elist) _test(True, True) _test(True, False) _test(False, True) _test(False, False) @unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented") def test_khop_graph(): N = 20 feat = F.randn((N, 5)) def _test(g): for k in range(4): g_k = dgl.khop_graph(g, k) # use original graph to do message passing for k times. g.ndata["h"] = feat for _ in range(k): g.update_all(fn.copy_u("h", "m"), fn.sum("m", "h")) h_0 = g.ndata.pop("h") # use k-hop graph to do message passing for one time. g_k.ndata["h"] = feat g_k.update_all(fn.copy_u("h", "m"), fn.sum("m", "h")) h_1 = g_k.ndata.pop("h") assert F.allclose(h_0, h_1, rtol=1e-3, atol=1e-3) # Test for random undirected graphs g = dgl.from_networkx(nx.erdos_renyi_graph(N, 0.3)) _test(g) # Test for random directed graphs g = dgl.from_networkx(nx.erdos_renyi_graph(N, 0.3, directed=True)) _test(g) @unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented") def test_khop_adj(): N = 20 feat = F.randn((N, 5)) g = dgl.from_networkx(nx.erdos_renyi_graph(N, 0.3, directed=True)) for k in range(3): adj = F.tensor(F.swapaxes(dgl.khop_adj(g, k), 0, 1)) # use original graph to do message passing for k times. g.ndata["h"] = feat for _ in range(k): g.update_all(fn.copy_u("h", "m"), fn.sum("m", "h")) h_0 = g.ndata.pop("h") # use k-hop adj to do message passing for one time. h_1 = F.matmul(adj, feat) assert F.allclose(h_0, h_1, rtol=1e-3, atol=1e-3) @unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented") def test_laplacian_lambda_max(): N = 20 eps = 1e-6 # test DGLGraph g = dgl.from_networkx(nx.erdos_renyi_graph(N, 0.3)) l_max = dgl.laplacian_lambda_max(g) assert l_max[0] < 2 + eps # test batched DGLGraph """ N_arr = [20, 30, 10, 12] bg = dgl.batch([ dgl.from_networkx(nx.erdos_renyi_graph(N, 0.3)) for N in N_arr ]) l_max_arr = dgl.laplacian_lambda_max(bg) assert len(l_max_arr) == len(N_arr) for l_max in l_max_arr: assert l_max < 2 + eps """ def create_large_graph(num_nodes, idtype=F.int64): row = np.random.choice(num_nodes, num_nodes * 10) col = np.random.choice(num_nodes, num_nodes * 10) spm = spsp.coo_matrix((np.ones(len(row)), (row, col))) spm.sum_duplicates() return dgl.from_scipy(spm, idtype=idtype) # Disabled since everything will be on heterogeneous graphs @unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented") def test_partition_with_halo(): g = create_large_graph(1000) node_part = np.random.choice(4, g.num_nodes()) subgs, _, _ = dgl.transforms.partition_graph_with_halo( g, node_part, 2, reshuffle=True ) for part_id, subg in subgs.items(): node_ids = np.nonzero(node_part == part_id)[0] lnode_ids = np.nonzero(F.asnumpy(subg.ndata["inner_node"]))[0] orig_nids = F.asnumpy(subg.ndata["orig_id"])[lnode_ids] assert np.all(np.sort(orig_nids) == node_ids) assert np.all( F.asnumpy(subg.in_degrees(lnode_ids)) == F.asnumpy(g.in_degrees(orig_nids)) ) assert np.all( F.asnumpy(subg.out_degrees(lnode_ids)) == F.asnumpy(g.out_degrees(orig_nids)) ) @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") @unittest.skipIf( F._default_context_str == "gpu", reason="METIS doesn't support GPU" ) @parametrize_idtype def test_metis_partition(idtype): # TODO(zhengda) Metis fails to partition a small graph. g = create_large_graph(1000, idtype=idtype) if idtype == F.int64: check_metis_partition(g, 0) check_metis_partition(g, 1) check_metis_partition(g, 2) check_metis_partition_with_constraint(g) else: assert_fail = False try: check_metis_partition(g, 1) except: assert_fail = True assert assert_fail def check_metis_partition_with_constraint(g): ntypes = np.zeros((g.num_nodes(),), dtype=np.int32) ntypes[0 : int(g.num_nodes() / 4)] = 1 ntypes[int(g.num_nodes() * 3 / 4) :] = 2 subgs = dgl.transforms.metis_partition( g, 4, extra_cached_hops=1, balance_ntypes=ntypes ) if subgs is not None: for i in subgs: subg = subgs[i] parent_nids = F.asnumpy(subg.ndata[dgl.NID]) sub_ntypes = ntypes[parent_nids] print("type0:", np.sum(sub_ntypes == 0)) print("type1:", np.sum(sub_ntypes == 1)) print("type2:", np.sum(sub_ntypes == 2)) subgs = dgl.transforms.metis_partition( g, 4, extra_cached_hops=1, balance_ntypes=ntypes, balance_edges=True ) if subgs is not None: for i in subgs: subg = subgs[i] parent_nids = F.asnumpy(subg.ndata[dgl.NID]) sub_ntypes = ntypes[parent_nids] print("type0:", np.sum(sub_ntypes == 0)) print("type1:", np.sum(sub_ntypes == 1)) print("type2:", np.sum(sub_ntypes == 2)) def check_metis_partition(g, extra_hops): subgs = dgl.transforms.metis_partition(g, 4, extra_cached_hops=extra_hops) num_inner_nodes = 0 num_inner_edges = 0 if subgs is not None: for part_id, subg in subgs.items(): lnode_ids = np.nonzero(F.asnumpy(subg.ndata["inner_node"]))[0] ledge_ids = np.nonzero(F.asnumpy(subg.edata["inner_edge"]))[0] num_inner_nodes += len(lnode_ids) num_inner_edges += len(ledge_ids) assert np.sum(F.asnumpy(subg.ndata["part_id"]) == part_id) == len( lnode_ids ) assert num_inner_nodes == g.num_nodes() print(g.num_edges() - num_inner_edges) if extra_hops == 0: return # partitions with node reshuffling subgs = dgl.transforms.metis_partition( g, 4, extra_cached_hops=extra_hops, reshuffle=True ) num_inner_nodes = 0 num_inner_edges = 0 edge_cnts = np.zeros((g.num_edges(),)) if subgs is not None: for part_id, subg in subgs.items(): lnode_ids = np.nonzero(F.asnumpy(subg.ndata["inner_node"]))[0] ledge_ids = np.nonzero(F.asnumpy(subg.edata["inner_edge"]))[0] num_inner_nodes += len(lnode_ids) num_inner_edges += len(ledge_ids) assert np.sum(F.asnumpy(subg.ndata["part_id"]) == part_id) == len( lnode_ids ) nids = F.asnumpy(subg.ndata[dgl.NID]) # ensure the local node Ids are contiguous. parent_ids = F.asnumpy(subg.ndata[dgl.NID]) parent_ids = parent_ids[: len(lnode_ids)] assert np.all( parent_ids == np.arange(parent_ids[0], parent_ids[-1] + 1) ) # count the local edges. parent_ids = F.asnumpy(subg.edata[dgl.EID])[ledge_ids] edge_cnts[parent_ids] += 1 orig_ids = subg.ndata["orig_id"] inner_node = F.asnumpy(subg.ndata["inner_node"]) for nid in range(subg.num_nodes()): neighs = subg.predecessors(nid) old_neighs1 = F.gather_row(orig_ids, neighs) old_nid = F.asnumpy(orig_ids[nid]) old_neighs2 = g.predecessors(old_nid) # If this is an inner node, it should have the full neighborhood. if inner_node[nid]: assert np.all( np.sort(F.asnumpy(old_neighs1)) == np.sort(F.asnumpy(old_neighs2)) ) # Normally, local edges are only counted once. assert np.all(edge_cnts == 1) assert num_inner_nodes == g.num_nodes() print(g.num_edges() - num_inner_edges) @unittest.skipIf( F._default_context_str == "gpu", reason="It doesn't support GPU" ) def test_reorder_nodes(): g = create_large_graph(1000) new_nids = np.random.permutation(g.num_nodes()) # TODO(zhengda) we need to test both CSR and COO. new_g = dgl.partition.reorder_nodes(g, new_nids) new_in_deg = new_g.in_degrees() new_out_deg = new_g.out_degrees() in_deg = g.in_degrees() out_deg = g.out_degrees() new_in_deg1 = F.scatter_row(in_deg, F.tensor(new_nids), in_deg) new_out_deg1 = F.scatter_row(out_deg, F.tensor(new_nids), out_deg) assert np.all(F.asnumpy(new_in_deg == new_in_deg1)) assert np.all(F.asnumpy(new_out_deg == new_out_deg1)) orig_ids = F.asnumpy(new_g.ndata["orig_id"]) for nid in range(g.num_nodes()): neighs = F.asnumpy(g.successors(nid)) new_neighs1 = new_nids[neighs] new_nid = new_nids[nid] new_neighs2 = new_g.successors(new_nid) assert np.all(np.sort(new_neighs1) == np.sort(F.asnumpy(new_neighs2))) for nid in range(new_g.num_nodes()): neighs = F.asnumpy(new_g.successors(nid)) old_neighs1 = orig_ids[neighs] old_nid = orig_ids[nid] old_neighs2 = g.successors(old_nid) assert np.all(np.sort(old_neighs1) == np.sort(F.asnumpy(old_neighs2))) neighs = F.asnumpy(new_g.predecessors(nid)) old_neighs1 = orig_ids[neighs] old_nid = orig_ids[nid] old_neighs2 = g.predecessors(old_nid) assert np.all(np.sort(old_neighs1) == np.sort(F.asnumpy(old_neighs2))) @parametrize_idtype def test_compact(idtype): g1 = dgl.heterograph( { ("user", "follow", "user"): ([1, 3], [3, 5]), ("user", "plays", "game"): ([2, 3, 2], [4, 4, 5]), ("game", "wished-by", "user"): ([6, 5], [7, 7]), }, {"user": 20, "game": 10}, idtype=idtype, device=F.ctx(), ) g2 = dgl.heterograph( { ("game", "clicked-by", "user"): ([3], [1]), ("user", "likes", "user"): ([1, 8], [8, 9]), }, {"user": 20, "game": 10}, idtype=idtype, device=F.ctx(), ) g3 = dgl.heterograph( {("user", "_E", "user"): ((0, 1), (1, 2))}, {"user": 10}, idtype=idtype, device=F.ctx(), ) g4 = dgl.heterograph( {("user", "_E", "user"): ((1, 3), (3, 5))}, {"user": 10}, idtype=idtype, device=F.ctx(), ) def _check(g, new_g, induced_nodes): assert g.ntypes == new_g.ntypes assert g.canonical_etypes == new_g.canonical_etypes for ntype in g.ntypes: assert -1 not in induced_nodes[ntype] for etype in g.canonical_etypes: g_src, g_dst = g.all_edges(order="eid", etype=etype) g_src = F.asnumpy(g_src) g_dst = F.asnumpy(g_dst) new_g_src, new_g_dst = new_g.all_edges(order="eid", etype=etype) new_g_src_mapped = induced_nodes[etype[0]][F.asnumpy(new_g_src)] new_g_dst_mapped = induced_nodes[etype[2]][F.asnumpy(new_g_dst)] assert (g_src == new_g_src_mapped).all() assert (g_dst == new_g_dst_mapped).all() # Test default new_g1 = dgl.compact_graphs(g1) induced_nodes = { ntype: new_g1.nodes[ntype].data[dgl.NID] for ntype in new_g1.ntypes } induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()} assert new_g1.idtype == idtype assert set(induced_nodes["user"]) == set([1, 3, 5, 2, 7]) assert set(induced_nodes["game"]) == set([4, 5, 6]) _check(g1, new_g1, induced_nodes) # Test with always_preserve given a dict new_g1 = dgl.compact_graphs( g1, always_preserve={"game": F.tensor([4, 7], idtype)} ) assert new_g1.idtype == idtype induced_nodes = { ntype: new_g1.nodes[ntype].data[dgl.NID] for ntype in new_g1.ntypes } induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()} assert set(induced_nodes["user"]) == set([1, 3, 5, 2, 7]) assert set(induced_nodes["game"]) == set([4, 5, 6, 7]) _check(g1, new_g1, induced_nodes) # Test with always_preserve given a tensor new_g3 = dgl.compact_graphs(g3, always_preserve=F.tensor([1, 7], idtype)) induced_nodes = { ntype: new_g3.nodes[ntype].data[dgl.NID] for ntype in new_g3.ntypes } induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()} assert new_g3.idtype == idtype assert set(induced_nodes["user"]) == set([0, 1, 2, 7]) _check(g3, new_g3, induced_nodes) # Test multiple graphs new_g1, new_g2 = dgl.compact_graphs([g1, g2]) induced_nodes = { ntype: new_g1.nodes[ntype].data[dgl.NID] for ntype in new_g1.ntypes } induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()} assert new_g1.idtype == idtype assert new_g2.idtype == idtype assert set(induced_nodes["user"]) == set([1, 3, 5, 2, 7, 8, 9]) assert set(induced_nodes["game"]) == set([3, 4, 5, 6]) _check(g1, new_g1, induced_nodes) _check(g2, new_g2, induced_nodes) # Test multiple graphs with always_preserve given a dict new_g1, new_g2 = dgl.compact_graphs( [g1, g2], always_preserve={"game": F.tensor([4, 7], dtype=idtype)} ) induced_nodes = { ntype: new_g1.nodes[ntype].data[dgl.NID] for ntype in new_g1.ntypes } induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()} assert new_g1.idtype == idtype assert new_g2.idtype == idtype assert set(induced_nodes["user"]) == set([1, 3, 5, 2, 7, 8, 9]) assert set(induced_nodes["game"]) == set([3, 4, 5, 6, 7]) _check(g1, new_g1, induced_nodes) _check(g2, new_g2, induced_nodes) # Test multiple graphs with always_preserve given a tensor new_g3, new_g4 = dgl.compact_graphs( [g3, g4], always_preserve=F.tensor([1, 7], dtype=idtype) ) induced_nodes = { ntype: new_g3.nodes[ntype].data[dgl.NID] for ntype in new_g3.ntypes } induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()} assert new_g3.idtype == idtype assert new_g4.idtype == idtype assert set(induced_nodes["user"]) == set([0, 1, 2, 3, 5, 7]) _check(g3, new_g3, induced_nodes) _check(g4, new_g4, induced_nodes) @unittest.skipIf( F._default_context_str == "gpu", reason="GPU to simple not implemented" ) @parametrize_idtype def test_to_simple(idtype): # homogeneous graph g = dgl.graph((F.tensor([0, 1, 2, 1]), F.tensor([1, 2, 0, 2]))) g.ndata["h"] = F.tensor([[0.0], [1.0], [2.0]]) g.edata["h"] = F.tensor([[3.0], [4.0], [5.0], [6.0]]) sg, wb = dgl.to_simple(g, writeback_mapping=True) u, v = g.all_edges(form="uv", order="eid") u = F.asnumpy(u).tolist() v = F.asnumpy(v).tolist() uv = list(zip(u, v)) eid_map = F.asnumpy(wb) su, sv = sg.all_edges(form="uv", order="eid") su = F.asnumpy(su).tolist() sv = F.asnumpy(sv).tolist() suv = list(zip(su, sv)) sc = F.asnumpy(sg.edata["count"]) assert set(uv) == set(suv) for i, e in enumerate(suv): assert sc[i] == sum(e == _e for _e in uv) for i, e in enumerate(uv): assert eid_map[i] == suv.index(e) # shared ndata assert F.array_equal(sg.ndata["h"], g.ndata["h"]) assert "h" not in sg.edata # new ndata to sg sg.ndata["hh"] = F.tensor([[0.0], [1.0], [2.0]]) assert "hh" not in g.ndata sg = dgl.to_simple(g, writeback_mapping=False, copy_ndata=False) assert "h" not in sg.ndata assert "h" not in sg.edata # test coalesce edge feature sg = dgl.to_simple(g, copy_edata=True, aggregator="arbitrary") assert F.allclose(sg.edata["h"][1], F.tensor([4.0])) sg = dgl.to_simple(g, copy_edata=True, aggregator="sum") assert F.allclose(sg.edata["h"][1], F.tensor([10.0])) sg = dgl.to_simple(g, copy_edata=True, aggregator="mean") assert F.allclose(sg.edata["h"][1], F.tensor([5.0])) # heterogeneous graph g = dgl.heterograph( { ("user", "follow", "user"): ( [0, 1, 2, 1, 1, 1], [1, 3, 2, 3, 4, 4], ), ("user", "plays", "game"): ( [3, 2, 1, 1, 3, 2, 2], [5, 3, 4, 4, 5, 3, 3], ), }, idtype=idtype, device=F.ctx(), ) g.nodes["user"].data["h"] = F.tensor([0, 1, 2, 3, 4]) g.nodes["user"].data["hh"] = F.tensor([0, 1, 2, 3, 4]) g.edges["follow"].data["h"] = F.tensor([0, 1, 2, 3, 4, 5]) sg, wb = dgl.to_simple( g, return_counts="weights", writeback_mapping=True, copy_edata=True ) g.nodes["game"].data["h"] = F.tensor([0, 1, 2, 3, 4, 5]) for etype in g.canonical_etypes: u, v = g.all_edges(form="uv", order="eid", etype=etype) u = F.asnumpy(u).tolist() v = F.asnumpy(v).tolist() uv = list(zip(u, v)) eid_map = F.asnumpy(wb[etype]) su, sv = sg.all_edges(form="uv", order="eid", etype=etype) su = F.asnumpy(su).tolist() sv = F.asnumpy(sv).tolist() suv = list(zip(su, sv)) sw = F.asnumpy(sg.edges[etype].data["weights"]) assert set(uv) == set(suv) for i, e in enumerate(suv): assert sw[i] == sum(e == _e for _e in uv) for i, e in enumerate(uv): assert eid_map[i] == suv.index(e) # shared ndata assert F.array_equal(sg.nodes["user"].data["h"], g.nodes["user"].data["h"]) assert F.array_equal( sg.nodes["user"].data["hh"], g.nodes["user"].data["hh"] ) assert "h" not in sg.nodes["game"].data # new ndata to sg sg.nodes["user"].data["hhh"] = F.tensor([0, 1, 2, 3, 4]) assert "hhh" not in g.nodes["user"].data # share edata feat_idx = F.asnumpy(wb[("user", "follow", "user")]) _, indices = np.unique(feat_idx, return_index=True) assert np.array_equal( F.asnumpy(sg.edges["follow"].data["h"]), F.asnumpy(g.edges["follow"].data["h"])[indices], ) sg = dgl.to_simple(g, writeback_mapping=False, copy_ndata=False) for ntype in g.ntypes: assert g.num_nodes(ntype) == sg.num_nodes(ntype) assert "h" not in sg.nodes["user"].data assert "hh" not in sg.nodes["user"].data # verify DGLGraph.edge_ids() after dgl.to_simple() # in case ids are not initialized in underlying coo2csr() u = F.tensor([0, 1, 2]) v = F.tensor([1, 2, 3]) eids = F.tensor([0, 1, 2]) g = dgl.graph((u, v)) assert F.array_equal(g.edge_ids(u, v), eids) sg = dgl.to_simple(g) assert F.array_equal(sg.edge_ids(u, v), eids) @unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented") @parametrize_idtype def test_remove_edges(idtype): def check(g1, etype, g, edges_removed): src, dst, eid = g.edges(etype=etype, form="all") src1, dst1 = g1.edges(etype=etype, order="eid") if etype is not None: eid1 = g1.edges[etype].data[dgl.EID] else: eid1 = g1.edata[dgl.EID] src1 = F.asnumpy(src1) dst1 = F.asnumpy(dst1) eid1 = F.asnumpy(eid1) src = F.asnumpy(src) dst = F.asnumpy(dst) eid = F.asnumpy(eid) sde_set = set(zip(src, dst, eid)) for s, d, e in zip(src1, dst1, eid1): assert (s, d, e) in sde_set assert not np.isin(edges_removed, eid1).any() assert g1.idtype == g.idtype for fmt in ["coo", "csr", "csc"]: for edges_to_remove in [[2], [2, 2], [3, 2], [1, 3, 1, 2]]: g = dgl.graph(([0, 2, 1, 3], [1, 3, 2, 4]), idtype=idtype).formats( fmt ) g1 = dgl.remove_edges(g, F.tensor(edges_to_remove, idtype)) check(g1, None, g, edges_to_remove) g = dgl.from_scipy( spsp.csr_matrix( ([1, 1, 1, 1], ([0, 2, 1, 3], [1, 3, 2, 4])), shape=(5, 5) ), idtype=idtype, ).formats(fmt) g1 = dgl.remove_edges(g, F.tensor(edges_to_remove, idtype)) check(g1, None, g, edges_to_remove) g = dgl.heterograph( { ("A", "AA", "A"): ([0, 2, 1, 3], [1, 3, 2, 4]), ("A", "AB", "B"): ([0, 1, 3, 1], [1, 3, 5, 6]), ("B", "BA", "A"): ([2, 3], [3, 2]), }, idtype=idtype, ) g2 = dgl.remove_edges( g, { "AA": F.tensor([2], idtype), "AB": F.tensor([3], idtype), "BA": F.tensor([1], idtype), }, ) check(g2, "AA", g, [2]) check(g2, "AB", g, [3]) check(g2, "BA", g, [1]) g3 = dgl.remove_edges( g, { "AA": F.tensor([], idtype), "AB": F.tensor([3], idtype), "BA": F.tensor([1], idtype), }, ) check(g3, "AA", g, []) check(g3, "AB", g, [3]) check(g3, "BA", g, [1]) g4 = dgl.remove_edges(g, {"AB": F.tensor([3, 1, 2, 0], idtype)}) check(g4, "AA", g, []) check(g4, "AB", g, [3, 1, 2, 0]) check(g4, "BA", g, []) @parametrize_idtype def test_add_edges(idtype): # homogeneous graph g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) u = 0 v = 1 g = dgl.add_edges(g, u, v) assert g.device == F.ctx() assert g.num_nodes() == 3 assert g.num_edges() == 3 u = [0] v = [1] g = dgl.add_edges(g, u, v) assert g.device == F.ctx() assert g.num_nodes() == 3 assert g.num_edges() == 4 u = F.tensor(u, dtype=idtype) v = F.tensor(v, dtype=idtype) g = dgl.add_edges(g, u, v) assert g.device == F.ctx() assert g.num_nodes() == 3 assert g.num_edges() == 5 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([0, 1, 0, 0, 0], dtype=idtype)) assert F.array_equal(v, F.tensor([1, 2, 1, 1, 1], dtype=idtype)) g = dgl.add_edges(g, [], []) g = dgl.add_edges(g, 0, []) g = dgl.add_edges(g, [], 0) assert g.device == F.ctx() assert g.num_nodes() == 3 assert g.num_edges() == 5 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([0, 1, 0, 0, 0], dtype=idtype)) assert F.array_equal(v, F.tensor([1, 2, 1, 1, 1], dtype=idtype)) # node id larger than current max node id g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) u = F.tensor([0, 1], dtype=idtype) v = F.tensor([2, 3], dtype=idtype) g = dgl.add_edges(g, u, v) assert g.num_nodes() == 4 assert g.num_edges() == 4 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([0, 1, 0, 1], dtype=idtype)) assert F.array_equal(v, F.tensor([1, 2, 2, 3], dtype=idtype)) # has data g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) g.ndata["h"] = F.copy_to(F.tensor([1, 1, 1], dtype=idtype), ctx=F.ctx()) g.edata["h"] = F.copy_to(F.tensor([1, 1], dtype=idtype), ctx=F.ctx()) u = F.tensor([0, 1], dtype=idtype) v = F.tensor([2, 3], dtype=idtype) e_feat = { "h": F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx()), "hh": F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx()), } g = dgl.add_edges(g, u, v, e_feat) assert g.num_nodes() == 4 assert g.num_edges() == 4 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([0, 1, 0, 1], dtype=idtype)) assert F.array_equal(v, F.tensor([1, 2, 2, 3], dtype=idtype)) assert F.array_equal(g.ndata["h"], F.tensor([1, 1, 1, 0], dtype=idtype)) assert F.array_equal(g.edata["h"], F.tensor([1, 1, 2, 2], dtype=idtype)) assert F.array_equal(g.edata["hh"], F.tensor([0, 0, 2, 2], dtype=idtype)) # zero data graph g = dgl.graph(([], []), num_nodes=0, idtype=idtype, device=F.ctx()) u = F.tensor([0, 1], dtype=idtype) v = F.tensor([2, 2], dtype=idtype) e_feat = { "h": F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx()), "hh": F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx()), } g = dgl.add_edges(g, u, v, e_feat) assert g.num_nodes() == 3 assert g.num_edges() == 2 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([0, 1], dtype=idtype)) assert F.array_equal(v, F.tensor([2, 2], dtype=idtype)) assert F.array_equal(g.edata["h"], F.tensor([2, 2], dtype=idtype)) assert F.array_equal(g.edata["hh"], F.tensor([2, 2], dtype=idtype)) # bipartite graph g = dgl.heterograph( {("user", "plays", "game"): ([0, 1], [1, 2])}, idtype=idtype, device=F.ctx(), ) u = 0 v = 1 g = dgl.add_edges(g, u, v) assert g.device == F.ctx() assert g.num_nodes("user") == 2 assert g.num_nodes("game") == 3 assert g.num_edges() == 3 u = [0] v = [1] g = dgl.add_edges(g, u, v) assert g.device == F.ctx() assert g.num_nodes("user") == 2 assert g.num_nodes("game") == 3 assert g.num_edges() == 4 u = F.tensor(u, dtype=idtype) v = F.tensor(v, dtype=idtype) g = dgl.add_edges(g, u, v) assert g.device == F.ctx() assert g.num_nodes("user") == 2 assert g.num_nodes("game") == 3 assert g.num_edges() == 5 u, v = g.edges(form="uv") assert F.array_equal(u, F.tensor([0, 1, 0, 0, 0], dtype=idtype)) assert F.array_equal(v, F.tensor([1, 2, 1, 1, 1], dtype=idtype)) # node id larger than current max node id g = dgl.heterograph( {("user", "plays", "game"): ([0, 1], [1, 2])}, idtype=idtype, device=F.ctx(), ) u = F.tensor([0, 2], dtype=idtype) v = F.tensor([2, 3], dtype=idtype) g = dgl.add_edges(g, u, v) assert g.device == F.ctx() assert g.num_nodes("user") == 3 assert g.num_nodes("game") == 4 assert g.num_edges() == 4 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([0, 1, 0, 2], dtype=idtype)) assert F.array_equal(v, F.tensor([1, 2, 2, 3], dtype=idtype)) # has data g = dgl.heterograph( {("user", "plays", "game"): ([0, 1], [1, 2])}, idtype=idtype, device=F.ctx(), ) g.nodes["user"].data["h"] = F.copy_to( F.tensor([1, 1], dtype=idtype), ctx=F.ctx() ) g.nodes["game"].data["h"] = F.copy_to( F.tensor([2, 2, 2], dtype=idtype), ctx=F.ctx() ) g.edata["h"] = F.copy_to(F.tensor([1, 1], dtype=idtype), ctx=F.ctx()) u = F.tensor([0, 2], dtype=idtype) v = F.tensor([2, 3], dtype=idtype) e_feat = { "h": F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx()), "hh": F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx()), } g = dgl.add_edges(g, u, v, e_feat) assert g.num_nodes("user") == 3 assert g.num_nodes("game") == 4 assert g.num_edges() == 4 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([0, 1, 0, 2], dtype=idtype)) assert F.array_equal(v, F.tensor([1, 2, 2, 3], dtype=idtype)) assert F.array_equal( g.nodes["user"].data["h"], F.tensor([1, 1, 0], dtype=idtype) ) assert F.array_equal( g.nodes["game"].data["h"], F.tensor([2, 2, 2, 0], dtype=idtype) ) assert F.array_equal(g.edata["h"], F.tensor([1, 1, 2, 2], dtype=idtype)) assert F.array_equal(g.edata["hh"], F.tensor([0, 0, 2, 2], dtype=idtype)) # heterogeneous graph g = create_test_heterograph3(idtype) u = F.tensor([0, 2], dtype=idtype) v = F.tensor([2, 3], dtype=idtype) g = dgl.add_edges(g, u, v, etype="plays") assert g.num_nodes("user") == 3 assert g.num_nodes("game") == 4 assert g.num_nodes("developer") == 2 assert g.num_edges("plays") == 6 assert g.num_edges("develops") == 2 u, v = g.edges(form="uv", order="eid", etype="plays") assert F.array_equal(u, F.tensor([0, 1, 1, 2, 0, 2], dtype=idtype)) assert F.array_equal(v, F.tensor([0, 0, 1, 1, 2, 3], dtype=idtype)) assert F.array_equal( g.nodes["user"].data["h"], F.tensor([1, 1, 1], dtype=idtype) ) assert F.array_equal( g.nodes["game"].data["h"], F.tensor([2, 2, 0, 0], dtype=idtype) ) assert F.array_equal( g.edges["plays"].data["h"], F.tensor([1, 1, 1, 1, 0, 0], dtype=idtype) ) # add with feature e_feat = {"h": F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx())} u = F.tensor([0, 2], dtype=idtype) v = F.tensor([2, 3], dtype=idtype) g.nodes["game"].data["h"] = F.copy_to( F.tensor([2, 2, 1, 1], dtype=idtype), ctx=F.ctx() ) g = dgl.add_edges(g, u, v, data=e_feat, etype="develops") assert g.num_nodes("user") == 3 assert g.num_nodes("game") == 4 assert g.num_nodes("developer") == 3 assert g.num_edges("plays") == 6 assert g.num_edges("develops") == 4 u, v = g.edges(form="uv", order="eid", etype="develops") assert F.array_equal(u, F.tensor([0, 1, 0, 2], dtype=idtype)) assert F.array_equal(v, F.tensor([0, 1, 2, 3], dtype=idtype)) assert F.array_equal( g.nodes["developer"].data["h"], F.tensor([3, 3, 0], dtype=idtype) ) assert F.array_equal( g.nodes["game"].data["h"], F.tensor([2, 2, 1, 1], dtype=idtype) ) assert F.array_equal( g.edges["develops"].data["h"], F.tensor([0, 0, 2, 2], dtype=idtype) ) @parametrize_idtype def test_add_nodes(idtype): # homogeneous Graphs g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) g.ndata["h"] = F.copy_to(F.tensor([1, 1, 1], dtype=idtype), ctx=F.ctx()) new_g = dgl.add_nodes(g, 1) assert g.num_nodes() == 3 assert new_g.num_nodes() == 4 assert F.array_equal(new_g.ndata["h"], F.tensor([1, 1, 1, 0], dtype=idtype)) # zero node graph g = dgl.graph(([], []), num_nodes=3, idtype=idtype, device=F.ctx()) g.ndata["h"] = F.copy_to(F.tensor([1, 1, 1], dtype=idtype), ctx=F.ctx()) g = dgl.add_nodes( g, 1, data={"h": F.copy_to(F.tensor([2], dtype=idtype), ctx=F.ctx())} ) assert g.num_nodes() == 4 assert F.array_equal(g.ndata["h"], F.tensor([1, 1, 1, 2], dtype=idtype)) # bipartite graph g = dgl.heterograph( {("user", "plays", "game"): ([0, 1], [1, 2])}, idtype=idtype, device=F.ctx(), ) g = dgl.add_nodes( g, 2, data={"h": F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx())}, ntype="user", ) assert g.num_nodes("user") == 4 assert g.num_nodes("game") == 3 assert F.array_equal( g.nodes["user"].data["h"], F.tensor([0, 0, 2, 2], dtype=idtype) ) g = dgl.add_nodes(g, 2, ntype="game") assert g.num_nodes("user") == 4 assert g.num_nodes("game") == 5 # heterogeneous graph g = create_test_heterograph3(idtype) g = dgl.add_nodes(g, 1, ntype="user") g = dgl.add_nodes( g, 2, data={"h": F.copy_to(F.tensor([2, 2], dtype=idtype), ctx=F.ctx())}, ntype="game", ) assert g.num_nodes("user") == 4 assert g.num_nodes("game") == 4 assert g.num_nodes("developer") == 2 assert F.array_equal( g.nodes["user"].data["h"], F.tensor([1, 1, 1, 0], dtype=idtype) ) assert F.array_equal( g.nodes["game"].data["h"], F.tensor([2, 2, 2, 2], dtype=idtype) ) @parametrize_idtype def test_remove_edges(idtype): # homogeneous Graphs g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) e = 0 g = dgl.remove_edges(g, e) assert g.num_edges() == 1 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([1], dtype=idtype)) assert F.array_equal(v, F.tensor([2], dtype=idtype)) g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) e = [0] g = dgl.remove_edges(g, e) assert g.num_edges() == 1 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([1], dtype=idtype)) assert F.array_equal(v, F.tensor([2], dtype=idtype)) e = F.tensor([0], dtype=idtype) g = dgl.remove_edges(g, e) assert g.num_edges() == 0 # has node data g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) g.ndata["h"] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx()) g = dgl.remove_edges(g, 1) assert g.num_edges() == 1 assert F.array_equal(g.ndata["h"], F.tensor([1, 2, 3], dtype=idtype)) # has edge data g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) g.edata["h"] = F.copy_to(F.tensor([1, 2], dtype=idtype), ctx=F.ctx()) g = dgl.remove_edges(g, 0) assert g.num_edges() == 1 assert F.array_equal(g.edata["h"], F.tensor([2], dtype=idtype)) # invalid eid assert_fail = False try: g = dgl.remove_edges(g, 1) except: assert_fail = True assert assert_fail # bipartite graph g = dgl.heterograph( {("user", "plays", "game"): ([0, 1], [1, 2])}, idtype=idtype, device=F.ctx(), ) e = 0 g = dgl.remove_edges(g, e) assert g.num_edges() == 1 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([1], dtype=idtype)) assert F.array_equal(v, F.tensor([2], dtype=idtype)) g = dgl.heterograph( {("user", "plays", "game"): ([0, 1], [1, 2])}, idtype=idtype, device=F.ctx(), ) e = [0] g = dgl.remove_edges(g, e) assert g.num_edges() == 1 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([1], dtype=idtype)) assert F.array_equal(v, F.tensor([2], dtype=idtype)) e = F.tensor([0], dtype=idtype) g = dgl.remove_edges(g, e) assert g.num_edges() == 0 # has data g = dgl.heterograph( {("user", "plays", "game"): ([0, 1], [1, 2])}, idtype=idtype, device=F.ctx(), ) g.nodes["user"].data["h"] = F.copy_to( F.tensor([1, 1], dtype=idtype), ctx=F.ctx() ) g.nodes["game"].data["h"] = F.copy_to( F.tensor([2, 2, 2], dtype=idtype), ctx=F.ctx() ) g.edata["h"] = F.copy_to(F.tensor([1, 2], dtype=idtype), ctx=F.ctx()) g = dgl.remove_edges(g, 1) assert g.num_edges() == 1 assert F.array_equal( g.nodes["user"].data["h"], F.tensor([1, 1], dtype=idtype) ) assert F.array_equal( g.nodes["game"].data["h"], F.tensor([2, 2, 2], dtype=idtype) ) assert F.array_equal(g.edata["h"], F.tensor([1], dtype=idtype)) # heterogeneous graph g = create_test_heterograph3(idtype) g.edges["plays"].data["h"] = F.copy_to( F.tensor([1, 2, 3, 4], dtype=idtype), ctx=F.ctx() ) g = dgl.remove_edges(g, 1, etype="plays") assert g.num_edges("plays") == 3 u, v = g.edges(form="uv", order="eid", etype="plays") assert F.array_equal(u, F.tensor([0, 1, 2], dtype=idtype)) assert F.array_equal(v, F.tensor([0, 1, 1], dtype=idtype)) assert F.array_equal( g.edges["plays"].data["h"], F.tensor([1, 3, 4], dtype=idtype) ) # remove all edges of 'develops' g = dgl.remove_edges(g, [0, 1], etype="develops") assert g.num_edges("develops") == 0 assert F.array_equal( g.nodes["user"].data["h"], F.tensor([1, 1, 1], dtype=idtype) ) assert F.array_equal( g.nodes["game"].data["h"], F.tensor([2, 2], dtype=idtype) ) assert F.array_equal( g.nodes["developer"].data["h"], F.tensor([3, 3], dtype=idtype) ) # batched graph ctx = F.ctx() g1 = dgl.graph(([0, 1], [1, 2]), num_nodes=5, idtype=idtype, device=ctx) g2 = dgl.graph(([], []), idtype=idtype, device=ctx) g3 = dgl.graph(([2, 3, 4], [3, 2, 1]), idtype=idtype, device=ctx) bg = dgl.batch([g1, g2, g3]) bg_r = dgl.remove_edges(bg, 2) assert bg.batch_size == bg_r.batch_size assert F.array_equal(bg.batch_num_nodes(), bg_r.batch_num_nodes()) assert F.array_equal( bg_r.batch_num_edges(), F.tensor([2, 0, 2], dtype=idtype) ) bg_r = dgl.remove_edges(bg, [0, 2]) assert bg.batch_size == bg_r.batch_size assert F.array_equal(bg.batch_num_nodes(), bg_r.batch_num_nodes()) assert F.array_equal( bg_r.batch_num_edges(), F.tensor([1, 0, 2], dtype=idtype) ) bg_r = dgl.remove_edges(bg, F.tensor([0, 2], dtype=idtype)) assert bg.batch_size == bg_r.batch_size assert F.array_equal(bg.batch_num_nodes(), bg_r.batch_num_nodes()) assert F.array_equal( bg_r.batch_num_edges(), F.tensor([1, 0, 2], dtype=idtype) ) # batched heterogeneous graph g1 = dgl.heterograph( { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "plays", "game"): ([1, 3], [0, 1]), }, num_nodes_dict={"user": 4, "game": 3}, idtype=idtype, device=ctx, ) g2 = dgl.heterograph( { ("user", "follows", "user"): ([0, 2], [3, 4]), ("user", "plays", "game"): ([], []), }, num_nodes_dict={"user": 6, "game": 2}, idtype=idtype, device=ctx, ) g3 = dgl.heterograph( { ("user", "follows", "user"): ([], []), ("user", "plays", "game"): ([1, 2], [1, 2]), }, idtype=idtype, device=ctx, ) bg = dgl.batch([g1, g2, g3]) bg_r = dgl.remove_edges(bg, 1, etype="follows") assert bg.batch_size == bg_r.batch_size ntypes = bg.ntypes for nty in ntypes: assert F.array_equal(bg.batch_num_nodes(nty), bg_r.batch_num_nodes(nty)) assert F.array_equal( bg_r.batch_num_edges("follows"), F.tensor([1, 2, 0], dtype=idtype) ) assert F.array_equal( bg_r.batch_num_edges("plays"), bg.batch_num_edges("plays") ) bg_r = dgl.remove_edges(bg, 2, etype="plays") assert bg.batch_size == bg_r.batch_size for nty in ntypes: assert F.array_equal(bg.batch_num_nodes(nty), bg_r.batch_num_nodes(nty)) assert F.array_equal( bg.batch_num_edges("follows"), bg_r.batch_num_edges("follows") ) assert F.array_equal( bg_r.batch_num_edges("plays"), F.tensor([2, 0, 1], dtype=idtype) ) bg_r = dgl.remove_edges(bg, [0, 1, 3], etype="follows") assert bg.batch_size == bg_r.batch_size for nty in ntypes: assert F.array_equal(bg.batch_num_nodes(nty), bg_r.batch_num_nodes(nty)) assert F.array_equal( bg_r.batch_num_edges("follows"), F.tensor([0, 1, 0], dtype=idtype) ) assert F.array_equal( bg.batch_num_edges("plays"), bg_r.batch_num_edges("plays") ) bg_r = dgl.remove_edges(bg, [1, 2], etype="plays") assert bg.batch_size == bg_r.batch_size for nty in ntypes: assert F.array_equal(bg.batch_num_nodes(nty), bg_r.batch_num_nodes(nty)) assert F.array_equal( bg.batch_num_edges("follows"), bg_r.batch_num_edges("follows") ) assert F.array_equal( bg_r.batch_num_edges("plays"), F.tensor([1, 0, 1], dtype=idtype) ) bg_r = dgl.remove_edges( bg, F.tensor([0, 1, 3], dtype=idtype), etype="follows" ) assert bg.batch_size == bg_r.batch_size for nty in ntypes: assert F.array_equal(bg.batch_num_nodes(nty), bg_r.batch_num_nodes(nty)) assert F.array_equal( bg_r.batch_num_edges("follows"), F.tensor([0, 1, 0], dtype=idtype) ) assert F.array_equal( bg.batch_num_edges("plays"), bg_r.batch_num_edges("plays") ) bg_r = dgl.remove_edges(bg, F.tensor([1, 2], dtype=idtype), etype="plays") assert bg.batch_size == bg_r.batch_size for nty in ntypes: assert F.array_equal(bg.batch_num_nodes(nty), bg_r.batch_num_nodes(nty)) assert F.array_equal( bg.batch_num_edges("follows"), bg_r.batch_num_edges("follows") ) assert F.array_equal( bg_r.batch_num_edges("plays"), F.tensor([1, 0, 1], dtype=idtype) ) @parametrize_idtype def test_remove_nodes(idtype): # homogeneous Graphs g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) n = 0 g = dgl.remove_nodes(g, n) assert g.num_nodes() == 2 assert g.num_edges() == 1 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([0], dtype=idtype)) assert F.array_equal(v, F.tensor([1], dtype=idtype)) g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) n = [1] g = dgl.remove_nodes(g, n) assert g.num_nodes() == 2 assert g.num_edges() == 0 g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) n = F.tensor([2], dtype=idtype) g = dgl.remove_nodes(g, n) assert g.num_nodes() == 2 assert g.num_edges() == 1 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([0], dtype=idtype)) assert F.array_equal(v, F.tensor([1], dtype=idtype)) # invalid nid assert_fail = False try: g.remove_nodes(3) except: assert_fail = True assert assert_fail # has node and edge data g = dgl.graph(([0, 0, 2], [0, 1, 2]), idtype=idtype, device=F.ctx()) g.ndata["hv"] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx()) g.edata["he"] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx()) g = dgl.remove_nodes(g, F.tensor([0], dtype=idtype)) assert g.num_nodes() == 2 assert g.num_edges() == 1 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([1], dtype=idtype)) assert F.array_equal(v, F.tensor([1], dtype=idtype)) assert F.array_equal(g.ndata["hv"], F.tensor([2, 3], dtype=idtype)) assert F.array_equal(g.edata["he"], F.tensor([3], dtype=idtype)) # node id larger than current max node id g = dgl.heterograph( {("user", "plays", "game"): ([0, 1], [1, 2])}, idtype=idtype, device=F.ctx(), ) n = 0 g = dgl.remove_nodes(g, n, ntype="user") assert g.num_nodes("user") == 1 assert g.num_nodes("game") == 3 assert g.num_edges() == 1 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([0], dtype=idtype)) assert F.array_equal(v, F.tensor([2], dtype=idtype)) g = dgl.heterograph( {("user", "plays", "game"): ([0, 1], [1, 2])}, idtype=idtype, device=F.ctx(), ) n = [1] g = dgl.remove_nodes(g, n, ntype="user") assert g.num_nodes("user") == 1 assert g.num_nodes("game") == 3 assert g.num_edges() == 1 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([0], dtype=idtype)) assert F.array_equal(v, F.tensor([1], dtype=idtype)) g = dgl.heterograph( {("user", "plays", "game"): ([0, 1], [1, 2])}, idtype=idtype, device=F.ctx(), ) n = F.tensor([0], dtype=idtype) g = dgl.remove_nodes(g, n, ntype="game") assert g.num_nodes("user") == 2 assert g.num_nodes("game") == 2 assert g.num_edges() == 2 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([0, 1], dtype=idtype)) assert F.array_equal(v, F.tensor([0, 1], dtype=idtype)) # heterogeneous graph g = create_test_heterograph3(idtype) g.edges["plays"].data["h"] = F.copy_to( F.tensor([1, 2, 3, 4], dtype=idtype), ctx=F.ctx() ) g = dgl.remove_nodes(g, 0, ntype="game") assert g.num_nodes("user") == 3 assert g.num_nodes("game") == 1 assert g.num_nodes("developer") == 2 assert g.num_edges("plays") == 2 assert g.num_edges("develops") == 1 assert F.array_equal( g.nodes["user"].data["h"], F.tensor([1, 1, 1], dtype=idtype) ) assert F.array_equal(g.nodes["game"].data["h"], F.tensor([2], dtype=idtype)) assert F.array_equal( g.nodes["developer"].data["h"], F.tensor([3, 3], dtype=idtype) ) u, v = g.edges(form="uv", order="eid", etype="plays") assert F.array_equal(u, F.tensor([1, 2], dtype=idtype)) assert F.array_equal(v, F.tensor([0, 0], dtype=idtype)) assert F.array_equal( g.edges["plays"].data["h"], F.tensor([3, 4], dtype=idtype) ) u, v = g.edges(form="uv", order="eid", etype="develops") assert F.array_equal(u, F.tensor([1], dtype=idtype)) assert F.array_equal(v, F.tensor([0], dtype=idtype)) # batched graph ctx = F.ctx() g1 = dgl.graph(([0, 1], [1, 2]), num_nodes=5, idtype=idtype, device=ctx) g2 = dgl.graph(([], []), idtype=idtype, device=ctx) g3 = dgl.graph(([2, 3, 4], [3, 2, 1]), idtype=idtype, device=ctx) bg = dgl.batch([g1, g2, g3]) bg_r = dgl.remove_nodes(bg, 1) assert bg_r.batch_size == bg.batch_size assert F.array_equal( bg_r.batch_num_nodes(), F.tensor([4, 0, 5], dtype=idtype) ) assert F.array_equal( bg_r.batch_num_edges(), F.tensor([0, 0, 3], dtype=idtype) ) bg_r = dgl.remove_nodes(bg, [1, 7]) assert bg_r.batch_size == bg.batch_size assert F.array_equal( bg_r.batch_num_nodes(), F.tensor([4, 0, 4], dtype=idtype) ) assert F.array_equal( bg_r.batch_num_edges(), F.tensor([0, 0, 1], dtype=idtype) ) bg_r = dgl.remove_nodes(bg, F.tensor([1, 7], dtype=idtype)) assert bg_r.batch_size == bg.batch_size assert F.array_equal( bg_r.batch_num_nodes(), F.tensor([4, 0, 4], dtype=idtype) ) assert F.array_equal( bg_r.batch_num_edges(), F.tensor([0, 0, 1], dtype=idtype) ) # batched heterogeneous graph g1 = dgl.heterograph( { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "plays", "game"): ([1, 3], [0, 1]), }, num_nodes_dict={"user": 4, "game": 3}, idtype=idtype, device=ctx, ) g2 = dgl.heterograph( { ("user", "follows", "user"): ([0, 2], [3, 4]), ("user", "plays", "game"): ([], []), }, num_nodes_dict={"user": 6, "game": 2}, idtype=idtype, device=ctx, ) g3 = dgl.heterograph( { ("user", "follows", "user"): ([], []), ("user", "plays", "game"): ([1, 2], [1, 2]), }, idtype=idtype, device=ctx, ) bg = dgl.batch([g1, g2, g3]) bg_r = dgl.remove_nodes(bg, 1, ntype="user") assert bg_r.batch_size == bg.batch_size assert F.array_equal( bg_r.batch_num_nodes("user"), F.tensor([3, 6, 3], dtype=idtype) ) assert F.array_equal( bg.batch_num_nodes("game"), bg_r.batch_num_nodes("game") ) assert F.array_equal( bg_r.batch_num_edges("follows"), F.tensor([0, 2, 0], dtype=idtype) ) assert F.array_equal( bg_r.batch_num_edges("plays"), F.tensor([1, 0, 2], dtype=idtype) ) bg_r = dgl.remove_nodes(bg, 6, ntype="game") assert bg_r.batch_size == bg.batch_size assert F.array_equal( bg.batch_num_nodes("user"), bg_r.batch_num_nodes("user") ) assert F.array_equal( bg_r.batch_num_nodes("game"), F.tensor([3, 2, 2], dtype=idtype) ) assert F.array_equal( bg.batch_num_edges("follows"), bg_r.batch_num_edges("follows") ) assert F.array_equal( bg_r.batch_num_edges("plays"), F.tensor([2, 0, 1], dtype=idtype) ) bg_r = dgl.remove_nodes(bg, [1, 5, 6, 11], ntype="user") assert bg_r.batch_size == bg.batch_size assert F.array_equal( bg_r.batch_num_nodes("user"), F.tensor([3, 4, 2], dtype=idtype) ) assert F.array_equal( bg.batch_num_nodes("game"), bg_r.batch_num_nodes("game") ) assert F.array_equal( bg_r.batch_num_edges("follows"), F.tensor([0, 1, 0], dtype=idtype) ) assert F.array_equal( bg_r.batch_num_edges("plays"), F.tensor([1, 0, 1], dtype=idtype) ) bg_r = dgl.remove_nodes(bg, [0, 3, 4, 7], ntype="game") assert bg_r.batch_size == bg.batch_size assert F.array_equal( bg.batch_num_nodes("user"), bg_r.batch_num_nodes("user") ) assert F.array_equal( bg_r.batch_num_nodes("game"), F.tensor([2, 0, 2], dtype=idtype) ) assert F.array_equal( bg.batch_num_edges("follows"), bg_r.batch_num_edges("follows") ) assert F.array_equal( bg_r.batch_num_edges("plays"), F.tensor([1, 0, 1], dtype=idtype) ) bg_r = dgl.remove_nodes( bg, F.tensor([1, 5, 6, 11], dtype=idtype), ntype="user" ) assert bg_r.batch_size == bg.batch_size assert F.array_equal( bg_r.batch_num_nodes("user"), F.tensor([3, 4, 2], dtype=idtype) ) assert F.array_equal( bg.batch_num_nodes("game"), bg_r.batch_num_nodes("game") ) assert F.array_equal( bg_r.batch_num_edges("follows"), F.tensor([0, 1, 0], dtype=idtype) ) assert F.array_equal( bg_r.batch_num_edges("plays"), F.tensor([1, 0, 1], dtype=idtype) ) bg_r = dgl.remove_nodes( bg, F.tensor([0, 3, 4, 7], dtype=idtype), ntype="game" ) assert bg_r.batch_size == bg.batch_size assert F.array_equal( bg.batch_num_nodes("user"), bg_r.batch_num_nodes("user") ) assert F.array_equal( bg_r.batch_num_nodes("game"), F.tensor([2, 0, 2], dtype=idtype) ) assert F.array_equal( bg.batch_num_edges("follows"), bg_r.batch_num_edges("follows") ) assert F.array_equal( bg_r.batch_num_edges("plays"), F.tensor([1, 0, 1], dtype=idtype) ) @parametrize_idtype def test_add_selfloop(idtype): # homogeneous graph # test for fill_data is float g = dgl.graph(([0, 0, 2], [2, 1, 0]), idtype=idtype, device=F.ctx()) g.edata["he"] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx()) g.edata["he1"] = F.copy_to( F.tensor([[0.0, 1.0], [2.0, 3.0], [4.0, 5.0]]), ctx=F.ctx() ) g.ndata["hn"] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx()) g = dgl.add_self_loop(g) assert g.num_nodes() == 3 assert g.num_edges() == 6 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([0, 0, 2, 0, 1, 2], dtype=idtype)) assert F.array_equal(v, F.tensor([2, 1, 0, 0, 1, 2], dtype=idtype)) assert F.array_equal( g.edata["he"], F.tensor([1, 2, 3, 1, 1, 1], dtype=idtype) ) assert F.array_equal( g.edata["he1"], F.tensor( [ [0.0, 1.0], [2.0, 3.0], [4.0, 5.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0], ] ), ) # test for fill_data is int g = dgl.graph(([0, 0, 2], [2, 1, 0]), idtype=idtype, device=F.ctx()) g.edata["he"] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx()) g.edata["he1"] = F.copy_to( F.tensor([[0, 1], [2, 3], [4, 5]], dtype=idtype), ctx=F.ctx() ) g.ndata["hn"] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx()) g = dgl.add_self_loop(g, fill_data=1) assert g.num_nodes() == 3 assert g.num_edges() == 6 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([0, 0, 2, 0, 1, 2], dtype=idtype)) assert F.array_equal(v, F.tensor([2, 1, 0, 0, 1, 2], dtype=idtype)) assert F.array_equal( g.edata["he"], F.tensor([1, 2, 3, 1, 1, 1], dtype=idtype) ) assert F.array_equal( g.edata["he1"], F.tensor( [[0, 1], [2, 3], [4, 5], [1, 1], [1, 1], [1, 1]], dtype=idtype ), ) # test for fill_data is str g = dgl.graph(([0, 0, 2], [2, 1, 0]), idtype=idtype, device=F.ctx()) g.edata["he"] = F.copy_to(F.tensor([1.0, 2.0, 3.0]), ctx=F.ctx()) g.edata["he1"] = F.copy_to( F.tensor([[0.0, 1.0], [2.0, 3.0], [4.0, 5.0]]), ctx=F.ctx() ) g.ndata["hn"] = F.copy_to(F.tensor([1, 2, 3], dtype=idtype), ctx=F.ctx()) g = dgl.add_self_loop(g, fill_data="sum") assert g.num_nodes() == 3 assert g.num_edges() == 6 u, v = g.edges(form="uv", order="eid") assert F.array_equal(u, F.tensor([0, 0, 2, 0, 1, 2], dtype=idtype)) assert F.array_equal(v, F.tensor([2, 1, 0, 0, 1, 2], dtype=idtype)) assert F.array_equal( g.edata["he"], F.tensor([1.0, 2.0, 3.0, 3.0, 2.0, 1.0]) ) assert F.array_equal( g.edata["he1"], F.tensor( [ [0.0, 1.0], [2.0, 3.0], [4.0, 5.0], [4.0, 5.0], [2.0, 3.0], [0.0, 1.0], ] ), ) # bipartite graph g = dgl.heterograph( {("user", "plays", "game"): ([0, 1, 2], [1, 2, 2])}, idtype=idtype, device=F.ctx(), ) # nothing will happend raise_error = False try: g = dgl.add_self_loop(g) except: raise_error = True assert raise_error # test for fill_data is float g = create_test_heterograph5(idtype) g.edges["follows"].data["h1"] = F.copy_to( F.tensor([[0.0, 1.0], [1.0, 2.0]]), ctx=F.ctx() ) g = dgl.add_self_loop(g, etype="follows") assert g.num_nodes("user") == 3 assert g.num_nodes("game") == 2 assert g.num_edges("follows") == 5 assert g.num_edges("plays") == 2 u, v = g.edges(form="uv", order="eid", etype="follows") assert F.array_equal(u, F.tensor([1, 2, 0, 1, 2], dtype=idtype)) assert F.array_equal(v, F.tensor([0, 1, 0, 1, 2], dtype=idtype)) assert F.array_equal( g.edges["follows"].data["h"], F.tensor([1, 2, 1, 1, 1], dtype=idtype) ) assert F.array_equal( g.edges["follows"].data["h1"], F.tensor([[0.0, 1.0], [1.0, 2.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0]]), ) assert F.array_equal( g.edges["plays"].data["h"], F.tensor([1, 2], dtype=idtype) ) # test for fill_data is int g = create_test_heterograph5(idtype) g.edges["follows"].data["h1"] = F.copy_to( F.tensor([[0, 1], [1, 2]], dtype=idtype), ctx=F.ctx() ) g = dgl.add_self_loop(g, fill_data=1, etype="follows") assert g.num_nodes("user") == 3 assert g.num_nodes("game") == 2 assert g.num_edges("follows") == 5 assert g.num_edges("plays") == 2 u, v = g.edges(form="uv", order="eid", etype="follows") assert F.array_equal(u, F.tensor([1, 2, 0, 1, 2], dtype=idtype)) assert F.array_equal(v, F.tensor([0, 1, 0, 1, 2], dtype=idtype)) assert F.array_equal( g.edges["follows"].data["h"], F.tensor([1, 2, 1, 1, 1], dtype=idtype) ) assert F.array_equal( g.edges["follows"].data["h1"], F.tensor([[0, 1], [1, 2], [1, 1], [1, 1], [1, 1]], dtype=idtype), ) assert F.array_equal( g.edges["plays"].data["h"], F.tensor([1, 2], dtype=idtype) ) # test for fill_data is str g = dgl.heterograph( { ("user", "follows", "user"): ( F.tensor([1, 2], dtype=idtype), F.tensor([0, 1], dtype=idtype), ), ("user", "plays", "game"): ( F.tensor([0, 1], dtype=idtype), F.tensor([0, 1], dtype=idtype), ), }, idtype=idtype, device=F.ctx(), ) g.nodes["user"].data["h"] = F.copy_to( F.tensor([1, 1, 1], dtype=idtype), ctx=F.ctx() ) g.nodes["game"].data["h"] = F.copy_to( F.tensor([2, 2], dtype=idtype), ctx=F.ctx() ) g.edges["follows"].data["h"] = F.copy_to(F.tensor([1.0, 2.0]), ctx=F.ctx()) g.edges["follows"].data["h1"] = F.copy_to( F.tensor([[0.0, 1.0], [1.0, 2.0]]), ctx=F.ctx() ) g.edges["plays"].data["h"] = F.copy_to(F.tensor([1.0, 2.0]), ctx=F.ctx()) g = dgl.add_self_loop(g, fill_data="mean", etype="follows") assert g.num_nodes("user") == 3 assert g.num_nodes("game") == 2 assert g.num_edges("follows") == 5 assert g.num_edges("plays") == 2 u, v = g.edges(form="uv", order="eid", etype="follows") assert F.array_equal(u, F.tensor([1, 2, 0, 1, 2], dtype=idtype)) assert F.array_equal(v, F.tensor([0, 1, 0, 1, 2], dtype=idtype)) assert F.array_equal( g.edges["follows"].data["h"], F.tensor([1.0, 2.0, 1.0, 2.0, 0.0]) ) assert F.array_equal( g.edges["follows"].data["h1"], F.tensor([[0.0, 1.0], [1.0, 2.0], [0.0, 1.0], [1.0, 2.0], [0.0, 0.0]]), ) assert F.array_equal(g.edges["plays"].data["h"], F.tensor([1.0, 2.0])) raise_error = False try: g = dgl.add_self_loop(g, etype="plays") except: raise_error = True assert raise_error @parametrize_idtype def test_remove_selfloop(idtype): # homogeneous graph g = dgl.graph(([0, 0, 0, 1], [1, 0, 0, 2]), idtype=idtype, device=F.ctx()) g.edata["he"] = F.copy_to(F.tensor([1, 2, 3, 4], dtype=idtype), ctx=F.ctx()) g = dgl.remove_self_loop(g) assert g.num_nodes() == 3 assert g.num_edges() == 2 assert F.array_equal(g.edata["he"], F.tensor([1, 4], dtype=idtype)) # bipartite graph g = dgl.heterograph( {("user", "plays", "game"): ([0, 1, 2], [1, 2, 2])}, idtype=idtype, device=F.ctx(), ) # nothing will happend raise_error = False try: g = dgl.remove_self_loop(g, etype="plays") except: raise_error = True assert raise_error g = create_test_heterograph4(idtype) g = dgl.remove_self_loop(g, etype="follows") assert g.num_nodes("user") == 3 assert g.num_nodes("game") == 2 assert g.num_edges("follows") == 2 assert g.num_edges("plays") == 2 u, v = g.edges(form="uv", order="eid", etype="follows") assert F.array_equal(u, F.tensor([1, 2], dtype=idtype)) assert F.array_equal(v, F.tensor([0, 1], dtype=idtype)) assert F.array_equal( g.edges["follows"].data["h"], F.tensor([2, 4], dtype=idtype) ) assert F.array_equal( g.edges["plays"].data["h"], F.tensor([1, 2], dtype=idtype) ) raise_error = False try: g = dgl.remove_self_loop(g, etype="plays") except: raise_error = True assert raise_error # batch information g = dgl.graph( ([0, 0, 0, 1, 3, 3, 4], [1, 0, 0, 2, 3, 4, 4]), idtype=idtype, device=F.ctx(), ) g.set_batch_num_nodes([3, 2]) g.set_batch_num_edges([4, 3]) g = dgl.remove_self_loop(g) assert g.num_nodes() == 5 assert g.num_edges() == 3 assert F.array_equal(g.batch_num_nodes(), F.tensor([3, 2], dtype=idtype)) assert F.array_equal(g.batch_num_edges(), F.tensor([2, 1], dtype=idtype)) @parametrize_idtype def test_reorder_graph(idtype): g = dgl.graph( ([0, 1, 2, 3, 4], [2, 2, 3, 2, 3]), idtype=idtype, device=F.ctx() ) g.ndata["h"] = F.copy_to(F.randn((g.num_nodes(), 3)), ctx=F.ctx()) g.edata["w"] = F.copy_to(F.randn((g.num_edges(), 2)), ctx=F.ctx()) # call with default: node_permute_algo=None, edge_permute_algo='src' rg = dgl.reorder_graph(g) assert dgl.EID in rg.edata.keys() src = F.asnumpy(rg.edges()[0]) assert np.array_equal(src, np.sort(src)) # call with 'rcmk' node_permute_algo rg = dgl.reorder_graph(g, node_permute_algo="rcmk") assert dgl.NID in rg.ndata.keys() assert dgl.EID in rg.edata.keys() src = F.asnumpy(rg.edges()[0]) assert np.array_equal(src, np.sort(src)) # call with 'dst' edge_permute_algo rg = dgl.reorder_graph(g, edge_permute_algo="dst") dst = F.asnumpy(rg.edges()[1]) assert np.array_equal(dst, np.sort(dst)) # call with unknown edge_permute_algo raise_error = False try: dgl.reorder_graph(g, edge_permute_algo="none") except: raise_error = True assert raise_error # reorder back to original according to stored ids rg = dgl.reorder_graph(g, node_permute_algo="rcmk") rg2 = dgl.reorder_graph( rg, "custom", permute_config={"nodes_perm": np.argsort(F.asnumpy(rg.ndata[dgl.NID]))}, ) assert F.array_equal(g.ndata["h"], rg2.ndata["h"]) assert F.array_equal(g.edata["w"], rg2.edata["w"]) # do not store ids rg = dgl.reorder_graph(g, store_ids=False) assert not dgl.NID in rg.ndata.keys() assert not dgl.EID in rg.edata.keys() # metis does not work on windows. if os.name == "nt": pass else: # metis_partition may fail for small graph. mg = create_large_graph(1000).to(F.ctx()) # call with metis strategy, but k is not specified raise_error = False try: dgl.reorder_graph(mg, node_permute_algo="metis") except: raise_error = True assert raise_error # call with metis strategy, k is specified raise_error = False try: dgl.reorder_graph( mg, node_permute_algo="metis", permute_config={"k": 2} ) except: raise_error = True assert not raise_error # call with qualified nodes_perm specified nodes_perm = np.random.permutation(g.num_nodes()) raise_error = False try: dgl.reorder_graph( g, node_permute_algo="custom", permute_config={"nodes_perm": nodes_perm}, ) except: raise_error = True assert not raise_error # call with unqualified nodes_perm specified raise_error = False try: dgl.reorder_graph( g, node_permute_algo="custom", permute_config={"nodes_perm": nodes_perm[: g.num_nodes() - 1]}, ) except: raise_error = True assert raise_error # call with unsupported strategy raise_error = False try: dgl.reorder_graph(g, node_permute_algo="cmk") except: raise_error = True assert raise_error # heterograph: not supported raise_error = False try: hg = dgl.heterogrpah( {("user", "follow", "user"): ([0, 1], [1, 2])}, idtype=idtype, device=F.ctx(), ) dgl.reorder_graph(hg) except: raise_error = True assert raise_error # TODO: shall we fix them? # add 'csc' format if needed # fg = g.formats('csr') # assert 'csc' not in sum(fg.formats().values(), []) # rfg = dgl.reorder_graph(fg) # assert 'csc' in sum(rfg.formats().values(), []) @unittest.skipIf( dgl.backend.backend_name == "tensorflow", reason="TF doesn't support a slicing operation", ) @parametrize_idtype def test_norm_by_dst(idtype): # Case1: A homogeneous graph g = dgl.graph(([0, 1, 1], [1, 1, 2]), idtype=idtype, device=F.ctx()) eweight = dgl.norm_by_dst(g) assert F.allclose(eweight, F.tensor([0.5, 0.5, 1.0])) # Case2: A heterogeneous graph g = dgl.heterograph( { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "plays", "game"): ([0, 1, 1], [1, 1, 2]), }, idtype=idtype, device=F.ctx(), ) eweight = dgl.norm_by_dst(g, etype=("user", "plays", "game")) assert F.allclose(eweight, F.tensor([0.5, 0.5, 1.0])) @parametrize_idtype def test_module_add_self_loop(idtype): g = dgl.graph(([1, 1], [1, 2]), idtype=idtype, device=F.ctx()) g.ndata["h"] = F.randn((g.num_nodes(), 2)) g.edata["w"] = F.randn((g.num_edges(), 3)) # Case1: add self-loops with the default setting transform = dgl.AddSelfLoop() new_g = transform(g) assert new_g.device == g.device assert new_g.idtype == g.idtype assert new_g.num_nodes() == g.num_nodes() assert new_g.num_edges() == 4 src, dst = new_g.edges() eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == {(0, 0), (1, 1), (1, 2), (2, 2)} assert "h" in new_g.ndata assert "w" in new_g.edata # Case2: remove self-loops first to avoid duplicate ones transform = dgl.AddSelfLoop(allow_duplicate=True) new_g = transform(g) assert new_g.device == g.device assert new_g.idtype == g.idtype assert new_g.num_nodes() == g.num_nodes() assert new_g.num_edges() == 5 src, dst = new_g.edges() eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == {(0, 0), (1, 1), (1, 2), (2, 2)} assert "h" in new_g.ndata assert "w" in new_g.edata # Case3: add self-loops for a homogeneous graph (the example in doc) transform = dgl.AddSelfLoop(fill_data="sum") g = dgl.graph(([0, 0, 2], [2, 1, 0]), idtype=idtype, device=F.ctx()) new_g = transform(g) assert new_g.device == g.device assert new_g.idtype == g.idtype assert new_g.num_nodes() == g.num_nodes() src, dst = new_g.edges() eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == {(0, 2), (0, 1), (2, 0), (0, 0), (1, 1), (2, 2)} # Create a heterogeneous graph g = dgl.heterograph( { ("user", "plays", "game"): ([0], [1]), ("user", "follows", "user"): ([1], [3]), }, idtype=idtype, device=F.ctx(), ) g.nodes["user"].data["h1"] = F.randn((4, 2)) g.edges["plays"].data["w1"] = F.randn((1, 3)) g.nodes["game"].data["h2"] = F.randn((2, 4)) g.edges["follows"].data["w2"] = F.randn((1, 5)) # Case4: add self-loops for a heterogeneous graph new_g = transform(g) assert new_g.device == g.device assert new_g.idtype == g.idtype assert new_g.ntypes == g.ntypes assert new_g.canonical_etypes == g.canonical_etypes for nty in new_g.ntypes: assert new_g.num_nodes(nty) == g.num_nodes(nty) assert new_g.num_edges("plays") == 1 assert new_g.num_edges("follows") == 5 assert "h1" in new_g.nodes["user"].data assert "h2" in new_g.nodes["game"].data assert "w1" in new_g.edges["plays"].data assert "w2" in new_g.edges["follows"].data # Case5: add self-etypes for a heterogeneous graph transform = dgl.AddSelfLoop(new_etypes=True) new_g = transform(g) assert new_g.device == g.device assert new_g.idtype == g.idtype assert new_g.ntypes == g.ntypes assert set(new_g.canonical_etypes) == { ("user", "plays", "game"), ("user", "follows", "user"), ("user", "self", "user"), ("game", "self", "game"), } for nty in new_g.ntypes: assert new_g.num_nodes(nty) == g.num_nodes(nty) assert new_g.num_edges("plays") == 1 assert new_g.num_edges("follows") == 5 assert new_g.num_edges(("user", "self", "user")) == 4 assert new_g.num_edges(("game", "self", "game")) == 2 assert "h1" in new_g.nodes["user"].data assert "h2" in new_g.nodes["game"].data assert "w1" in new_g.edges["plays"].data assert "w2" in new_g.edges["follows"].data @parametrize_idtype def test_module_remove_self_loop(idtype): transform = dgl.RemoveSelfLoop() # Case1: homogeneous graph g = dgl.graph(([1, 1], [1, 2]), idtype=idtype, device=F.ctx()) g.ndata["h"] = F.randn((g.num_nodes(), 2)) g.edata["w"] = F.randn((g.num_edges(), 3)) new_g = transform(g) assert new_g.device == g.device assert new_g.idtype == g.idtype assert new_g.num_nodes() == g.num_nodes() assert new_g.num_edges() == 1 src, dst = new_g.edges() eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == {(1, 2)} assert "h" in new_g.ndata assert "w" in new_g.edata # Case2: heterogeneous graph g = dgl.heterograph( { ("user", "plays", "game"): ([0, 1], [1, 1]), ("user", "follows", "user"): ([1, 2], [2, 2]), }, idtype=idtype, device=F.ctx(), ) g.nodes["user"].data["h1"] = F.randn((3, 2)) g.edges["plays"].data["w1"] = F.randn((2, 3)) g.nodes["game"].data["h2"] = F.randn((2, 4)) g.edges["follows"].data["w2"] = F.randn((2, 5)) new_g = transform(g) assert new_g.device == g.device assert new_g.idtype == g.idtype assert new_g.ntypes == g.ntypes assert new_g.canonical_etypes == g.canonical_etypes for nty in new_g.ntypes: assert new_g.num_nodes(nty) == g.num_nodes(nty) assert new_g.num_edges("plays") == 2 assert new_g.num_edges("follows") == 1 assert "h1" in new_g.nodes["user"].data assert "h2" in new_g.nodes["game"].data assert "w1" in new_g.edges["plays"].data assert "w2" in new_g.edges["follows"].data @parametrize_idtype def test_module_add_reverse(idtype): transform = dgl.AddReverse() # Case1: Add reverse edges for a homogeneous graph g = dgl.graph(([0], [1]), idtype=idtype, device=F.ctx()) g.ndata["h"] = F.randn((g.num_nodes(), 3)) g.edata["w"] = F.randn((g.num_edges(), 2)) new_g = transform(g) assert new_g.device == g.device assert new_g.idtype == g.idtype assert g.num_nodes() == new_g.num_nodes() src, dst = new_g.edges() eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == {(0, 1), (1, 0)} assert F.allclose(g.ndata["h"], new_g.ndata["h"]) assert F.allclose(g.edata["w"], F.narrow_row(new_g.edata["w"], 0, 1)) assert F.allclose( F.narrow_row(new_g.edata["w"], 1, 2), F.zeros((1, 2), F.float32, F.ctx()), ) # Case2: Add reverse edges for a homogeneous graph and copy edata transform = dgl.AddReverse(copy_edata=True) new_g = transform(g) assert new_g.device == g.device assert new_g.idtype == g.idtype assert g.num_nodes() == new_g.num_nodes() src, dst = new_g.edges() eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == {(0, 1), (1, 0)} assert F.allclose(g.ndata["h"], new_g.ndata["h"]) assert F.allclose(g.edata["w"], F.narrow_row(new_g.edata["w"], 0, 1)) assert F.allclose(g.edata["w"], F.narrow_row(new_g.edata["w"], 1, 2)) # Case3: Add reverse edges for a heterogeneous graph g = dgl.heterograph( { ("user", "plays", "game"): ([0, 1], [1, 1]), ("user", "follows", "user"): ([1, 2], [2, 2]), }, device=F.ctx(), ) new_g = transform(g) assert new_g.device == g.device assert new_g.idtype == g.idtype assert g.ntypes == new_g.ntypes assert set(new_g.canonical_etypes) == { ("user", "plays", "game"), ("user", "follows", "user"), ("game", "rev_plays", "user"), } for nty in g.ntypes: assert g.num_nodes(nty) == new_g.num_nodes(nty) src, dst = new_g.edges(etype="plays") eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == {(0, 1), (1, 1)} src, dst = new_g.edges(etype="follows") eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == {(1, 2), (2, 2), (2, 1)} src, dst = new_g.edges(etype="rev_plays") eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == {(1, 1), (1, 0)} # Case4: Enforce reverse edge types for symmetric canonical edge types transform = dgl.AddReverse(sym_new_etype=True) new_g = transform(g) assert new_g.device == g.device assert new_g.idtype == g.idtype assert g.ntypes == new_g.ntypes assert set(new_g.canonical_etypes) == { ("user", "plays", "game"), ("user", "follows", "user"), ("game", "rev_plays", "user"), ("user", "rev_follows", "user"), } for nty in g.ntypes: assert g.num_nodes(nty) == new_g.num_nodes(nty) src, dst = new_g.edges(etype="plays") eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == {(0, 1), (1, 1)} src, dst = new_g.edges(etype="follows") eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == {(1, 2), (2, 2)} src, dst = new_g.edges(etype="rev_plays") eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == {(1, 1), (1, 0)} src, dst = new_g.edges(etype="rev_follows") eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == {(2, 1), (2, 2)} @unittest.skipIf( F._default_context_str == "gpu", reason="GPU not supported for to_simple" ) @parametrize_idtype def test_module_to_simple(idtype): transform = dgl.ToSimple() g = dgl.graph(([0, 1, 1], [1, 2, 2]), idtype=idtype, device=F.ctx()) g.ndata["h"] = F.randn((g.num_nodes(), 2)) g.edata["w"] = F.tensor([[0.1], [0.2], [0.3]]) sg = transform(g) assert sg.device == g.device assert sg.idtype == g.idtype assert sg.num_nodes() == g.num_nodes() assert sg.num_edges() == 2 src, dst = sg.edges() eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == {(0, 1), (1, 2)} assert F.allclose(sg.edata["count"], F.tensor([1, 2])) assert F.allclose(sg.ndata["h"], g.ndata["h"]) g = dgl.heterograph( { ("user", "follows", "user"): ([0, 1, 1], [1, 2, 2]), ("user", "plays", "game"): ([0, 1, 0], [1, 1, 1]), } ) sg = transform(g) assert sg.device == g.device assert sg.idtype == g.idtype assert sg.ntypes == g.ntypes assert sg.canonical_etypes == g.canonical_etypes for nty in sg.ntypes: assert sg.num_nodes(nty) == g.num_nodes(nty) for ety in sg.canonical_etypes: assert sg.num_edges(ety) == 2 src, dst = sg.edges(etype="follows") eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == {(0, 1), (1, 2)} src, dst = sg.edges(etype="plays") eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == {(0, 1), (1, 1)} @parametrize_idtype def test_module_line_graph(idtype): transform = dgl.LineGraph() g = dgl.graph(([0, 1, 1], [1, 0, 2]), idtype=idtype, device=F.ctx()) g.ndata["h"] = F.tensor([[0.0], [1.0], [2.0]]) g.edata["w"] = F.tensor([[0.0], [0.1], [0.2]]) new_g = transform(g) assert new_g.device == g.device assert new_g.idtype == g.idtype assert new_g.num_nodes() == g.num_edges() src, dst = new_g.edges() eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == {(0, 1), (0, 2), (1, 0)} transform = dgl.LineGraph(backtracking=False) new_g = transform(g) assert new_g.device == g.device assert new_g.idtype == g.idtype assert new_g.num_nodes() == g.num_edges() src, dst = new_g.edges() eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == {(0, 2)} @parametrize_idtype def test_module_khop_graph(idtype): transform = dgl.KHopGraph(2) g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) g.ndata["h"] = F.randn((g.num_nodes(), 2)) new_g = transform(g) assert new_g.device == g.device assert new_g.idtype == g.idtype assert new_g.num_nodes() == g.num_nodes() assert F.allclose(g.ndata["h"], new_g.ndata["h"]) src, dst = new_g.edges() eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == {(0, 2)} @parametrize_idtype def test_module_add_metapaths(idtype): g = dgl.heterograph( { ("person", "author", "paper"): ([0, 0, 1], [1, 2, 2]), ("paper", "accepted", "venue"): ([1], [0]), ("paper", "rejected", "venue"): ([2], [1]), }, idtype=idtype, device=F.ctx(), ) g.nodes["venue"].data["h"] = F.randn((g.num_nodes("venue"), 2)) g.edges["author"].data["h"] = F.randn((g.num_edges("author"), 3)) # Case1: keep_orig_edges is True metapaths = { "accepted": [ ("person", "author", "paper"), ("paper", "accepted", "venue"), ], "rejected": [ ("person", "author", "paper"), ("paper", "rejected", "venue"), ], } transform = dgl.AddMetaPaths(metapaths) new_g = transform(g) assert new_g.device == g.device assert new_g.idtype == g.idtype assert new_g.ntypes == g.ntypes assert set(new_g.canonical_etypes) == { ("person", "author", "paper"), ("paper", "accepted", "venue"), ("paper", "rejected", "venue"), ("person", "accepted", "venue"), ("person", "rejected", "venue"), } for nty in new_g.ntypes: assert new_g.num_nodes(nty) == g.num_nodes(nty) for ety in g.canonical_etypes: assert new_g.num_edges(ety) == g.num_edges(ety) assert F.allclose( g.nodes["venue"].data["h"], new_g.nodes["venue"].data["h"] ) assert F.allclose( g.edges["author"].data["h"], new_g.edges["author"].data["h"] ) src, dst = new_g.edges(etype=("person", "accepted", "venue")) eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == {(0, 0)} src, dst = new_g.edges(etype=("person", "rejected", "venue")) eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == {(0, 1), (1, 1)} # Case2: keep_orig_edges is False transform = dgl.AddMetaPaths(metapaths, keep_orig_edges=False) new_g = transform(g) assert new_g.device == g.device assert new_g.idtype == g.idtype assert new_g.ntypes == g.ntypes assert len(new_g.canonical_etypes) == 2 for nty in new_g.ntypes: assert new_g.num_nodes(nty) == g.num_nodes(nty) assert F.allclose( g.nodes["venue"].data["h"], new_g.nodes["venue"].data["h"] ) src, dst = new_g.edges(etype=("person", "accepted", "venue")) eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == {(0, 0)} src, dst = new_g.edges(etype=("person", "rejected", "venue")) eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == {(0, 1), (1, 1)} @parametrize_idtype def test_module_compose(idtype): g = dgl.graph(([0, 1], [1, 2]), idtype=idtype, device=F.ctx()) transform = dgl.Compose([dgl.AddReverse(), dgl.AddSelfLoop()]) new_g = transform(g) assert new_g.device == g.device assert new_g.idtype == g.idtype assert new_g.num_edges() == 7 src, dst = new_g.edges() eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == {(0, 1), (1, 2), (1, 0), (2, 1), (0, 0), (1, 1), (2, 2)} @parametrize_idtype def test_module_gcnnorm(idtype): g = dgl.heterograph( { ("A", "r1", "A"): ([0, 1, 2], [0, 0, 1]), ("A", "r2", "B"): ([0, 0], [1, 1]), ("B", "r3", "B"): ([0, 1, 2], [0, 0, 1]), }, idtype=idtype, device=F.ctx(), ) g.edges["r3"].data["w"] = F.tensor([0.1, 0.2, 0.3]) transform = dgl.GCNNorm() new_g = transform(g) assert "w" not in new_g.edges[("A", "r2", "B")].data assert F.allclose( new_g.edges[("A", "r1", "A")].data["w"], F.tensor([1.0 / 2, 1.0 / math.sqrt(2), 0.0]), ) assert F.allclose( new_g.edges[("B", "r3", "B")].data["w"], F.tensor([1.0 / 3, 2.0 / 3, 0.0]), ) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now" ) @parametrize_idtype def test_module_ppr(idtype): g = dgl.graph( ([0, 1, 2, 3, 4], [2, 3, 4, 5, 3]), idtype=idtype, device=F.ctx() ) g.ndata["h"] = F.randn((6, 2)) transform = dgl.PPR(avg_degree=2) new_g = transform(g) assert new_g.idtype == g.idtype assert new_g.device == g.device assert new_g.num_nodes() == g.num_nodes() src, dst = new_g.edges() eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == { (0, 0), (0, 2), (0, 4), (1, 1), (1, 3), (1, 5), (2, 2), (2, 3), (2, 4), (3, 3), (3, 5), (4, 3), (4, 4), (4, 5), (5, 5), } assert F.allclose(g.ndata["h"], new_g.ndata["h"]) assert "w" in new_g.edata # Prior edge weights g.edata["w"] = F.tensor([0.1, 0.2, 0.3, 0.4, 0.5]) new_g = transform(g) src, dst = new_g.edges() eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == { (0, 0), (1, 1), (1, 3), (2, 2), (2, 3), (2, 4), (3, 3), (3, 5), (4, 3), (4, 4), (4, 5), (5, 5), } @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now" ) @parametrize_idtype def test_module_heat_kernel(idtype): # Case1: directed graph g = dgl.graph( ([0, 1, 2, 3, 4], [2, 3, 4, 5, 3]), idtype=idtype, device=F.ctx() ) g.ndata["h"] = F.randn((6, 2)) transform = dgl.HeatKernel(avg_degree=1) new_g = transform(g) assert new_g.idtype == g.idtype assert new_g.device == g.device assert new_g.num_nodes() == g.num_nodes() assert F.allclose(g.ndata["h"], new_g.ndata["h"]) assert "w" in new_g.edata # Case2: weighted undirected graph g = dgl.graph(([0, 1, 2, 3], [1, 0, 3, 2]), idtype=idtype, device=F.ctx()) g.edata["w"] = F.tensor([0.1, 0.2, 0.3, 0.4]) new_g = transform(g) src, dst = new_g.edges() eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == {(0, 0), (1, 1), (2, 2), (3, 3)} @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now" ) @parametrize_idtype def test_module_gdc(idtype): transform = dgl.GDC([0.1, 0.2, 0.1], avg_degree=1) g = dgl.graph( ([0, 1, 2, 3, 4], [2, 3, 4, 5, 3]), idtype=idtype, device=F.ctx() ) g.ndata["h"] = F.randn((6, 2)) new_g = transform(g) assert new_g.idtype == g.idtype assert new_g.device == g.device assert new_g.num_nodes() == g.num_nodes() src, dst = new_g.edges() eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == { (0, 0), (0, 2), (0, 4), (1, 1), (1, 3), (1, 5), (2, 2), (2, 3), (2, 4), (3, 3), (3, 5), (4, 3), (4, 4), (4, 5), (5, 5), } assert F.allclose(g.ndata["h"], new_g.ndata["h"]) assert "w" in new_g.edata # Prior edge weights g.edata["w"] = F.tensor([0.1, 0.2, 0.3, 0.4, 0.5]) new_g = transform(g) src, dst = new_g.edges() eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == {(0, 0), (1, 1), (2, 2), (3, 3), (4, 3), (4, 4), (5, 5)} @unittest.skipIf( dgl.backend.backend_name == "tensorflow", reason="TF doesn't support a slicing operation", ) @parametrize_idtype def test_module_node_shuffle(idtype): transform = dgl.NodeShuffle() g = dgl.heterograph( {("A", "r", "B"): ([0, 1], [1, 2])}, idtype=idtype, device=F.ctx() ) g.nodes["B"].data["h"] = F.randn((g.num_nodes("B"), 2)) old_nfeat = g.nodes["B"].data["h"] new_g = transform(g) new_nfeat = g.nodes["B"].data["h"] assert F.allclose(old_nfeat, new_nfeat) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now" ) @parametrize_idtype def test_module_drop_node(idtype): transform = dgl.DropNode() g = dgl.heterograph( {("A", "r", "B"): ([0, 1], [1, 2])}, idtype=idtype, device=F.ctx() ) num_nodes_old = g.num_nodes() new_g = transform(g) assert new_g.idtype == g.idtype assert new_g.device == g.device assert new_g.ntypes == g.ntypes assert new_g.canonical_etypes == g.canonical_etypes num_nodes_new = g.num_nodes() # Ensure that the original graph is not corrupted assert num_nodes_old == num_nodes_new @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now" ) @parametrize_idtype def test_module_drop_edge(idtype): transform = dgl.DropEdge() g = dgl.heterograph( { ("A", "r1", "B"): ([0, 1], [1, 2]), ("C", "r2", "C"): ([3, 4, 5], [6, 7, 8]), }, idtype=idtype, device=F.ctx(), ) num_edges_old = g.num_edges() new_g = transform(g) assert new_g.idtype == g.idtype assert new_g.device == g.device assert new_g.ntypes == g.ntypes assert new_g.canonical_etypes == g.canonical_etypes num_edges_new = g.num_edges() # Ensure that the original graph is not corrupted assert num_edges_old == num_edges_new @parametrize_idtype def test_module_add_edge(idtype): transform = dgl.AddEdge() g = dgl.heterograph( { ("A", "r1", "B"): ([0, 1, 2, 3, 4], [1, 2, 3, 4, 5]), ("C", "r2", "C"): ([0, 1, 2, 3, 4], [1, 2, 3, 4, 5]), }, idtype=idtype, device=F.ctx(), ) num_edges_old = g.num_edges() new_g = transform(g) assert new_g.num_edges(("A", "r1", "B")) == 6 assert new_g.num_edges(("C", "r2", "C")) == 6 assert new_g.idtype == g.idtype assert new_g.device == g.device assert new_g.ntypes == g.ntypes assert new_g.canonical_etypes == g.canonical_etypes num_edges_new = g.num_edges() # Ensure that the original graph is not corrupted assert num_edges_old == num_edges_new @parametrize_idtype def test_module_random_walk_pe(idtype): transform = dgl.RandomWalkPE(2, "rwpe") g = dgl.graph(([0, 1, 1], [1, 1, 0]), idtype=idtype, device=F.ctx()) new_g = transform(g) tgt = F.copy_to(F.tensor([[0.0, 0.5], [0.5, 0.75]]), g.device) assert F.allclose(new_g.ndata["rwpe"], tgt) @parametrize_idtype def test_module_lap_pe(idtype): g = dgl.graph( ([2, 1, 0, 3, 1, 1], [3, 1, 1, 2, 1, 0]), idtype=idtype, device=F.ctx() ) tgt_eigval = F.copy_to( F.repeat( F.tensor([[1.1534e-17, 1.3333e00, 2.0, np.nan, np.nan]]), g.num_nodes(), dim=0, ), g.device, ) tgt_pe = F.copy_to( F.tensor( [ [0.5, 0.86602539, 0.0, 0.0, 0.0], [0.86602539, 0.5, 0.0, 0.0, 0.0], [0.0, 0.0, 0.70710677, 0.0, 0.0], [0.0, 0.0, 0.70710677, 0.0, 0.0], ] ), g.device, ) # without padding (k=n) transform = dgl.LapPE(5, feat_name="lappe", padding=True) new_g = transform(g) # tensorflow has no abs() api if dgl.backend.backend_name == "tensorflow": assert F.allclose(new_g.ndata["lappe"].__abs__(), tgt_pe) # pytorch & mxnet else: assert F.allclose(new_g.ndata["lappe"].abs(), tgt_pe) # with eigenvalues transform = dgl.LapPE( 5, feat_name="lappe", eigval_name="eigval", padding=True ) new_g = transform(g) # tensorflow has no abs() api if dgl.backend.backend_name == "tensorflow": assert F.allclose(new_g.ndata["eigval"][:, :3], tgt_eigval[:, :3]) assert F.allclose(new_g.ndata["lappe"].__abs__(), tgt_pe) # pytorch & mxnet else: assert F.allclose(new_g.ndata["eigval"][:, :3], tgt_eigval[:, :3]) assert F.allclose(new_g.ndata["lappe"].abs(), tgt_pe) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now" ) @pytest.mark.parametrize("g", get_cases(["has_scalar_e_feature"])) def test_module_sign(g): import torch atol = 1e-06 ctx = F.ctx() g = g.to(ctx) adj = g.adj_external(transpose=True, scipy_fmt="coo").todense() adj = torch.tensor(adj).float().to(ctx) weight_adj = ( g.adj_external(transpose=True, scipy_fmt="coo").astype(float).todense() ) weight_adj = torch.tensor(weight_adj).float().to(ctx) src, dst = g.edges() src, dst = src.long(), dst.long() weight_adj[dst, src] = g.edata["scalar_w"] # raw transform = dgl.SIGNDiffusion(k=1, in_feat_name="h", diffuse_op="raw") g = transform(g) target = torch.matmul(adj, g.ndata["h"]) assert torch.allclose(g.ndata["out_feat_1"], target, atol=atol) transform = dgl.SIGNDiffusion( k=1, in_feat_name="h", eweight_name="scalar_w", diffuse_op="raw" ) g = transform(g) target = torch.matmul(weight_adj, g.ndata["h"]) assert torch.allclose(g.ndata["out_feat_1"], target, atol=atol) # rw adj_rw = torch.matmul(torch.diag(1 / adj.sum(dim=1)), adj) transform = dgl.SIGNDiffusion(k=1, in_feat_name="h", diffuse_op="rw") g = transform(g) target = torch.matmul(adj_rw, g.ndata["h"]) assert torch.allclose(g.ndata["out_feat_1"], target, atol=atol) weight_adj_rw = torch.matmul( torch.diag(1 / weight_adj.sum(dim=1)), weight_adj ) transform = dgl.SIGNDiffusion( k=1, in_feat_name="h", eweight_name="scalar_w", diffuse_op="rw" ) g = transform(g) target = torch.matmul(weight_adj_rw, g.ndata["h"]) assert torch.allclose(g.ndata["out_feat_1"], target, atol=atol) # gcn raw_eweight = g.edata["scalar_w"] gcn_norm = dgl.GCNNorm() g = gcn_norm(g) adj_gcn = adj.clone() adj_gcn[dst, src] = g.edata.pop("w") transform = dgl.SIGNDiffusion(k=1, in_feat_name="h", diffuse_op="gcn") g = transform(g) target = torch.matmul(adj_gcn, g.ndata["h"]) assert torch.allclose(g.ndata["out_feat_1"], target, atol=atol) gcn_norm = dgl.GCNNorm("scalar_w") g = gcn_norm(g) weight_adj_gcn = weight_adj.clone() weight_adj_gcn[dst, src] = g.edata["scalar_w"] g.edata["scalar_w"] = raw_eweight transform = dgl.SIGNDiffusion( k=1, in_feat_name="h", eweight_name="scalar_w", diffuse_op="gcn" ) g = transform(g) target = torch.matmul(weight_adj_gcn, g.ndata["h"]) assert torch.allclose(g.ndata["out_feat_1"], target, atol=atol) # ppr alpha = 0.2 transform = dgl.SIGNDiffusion( k=1, in_feat_name="h", diffuse_op="ppr", alpha=alpha ) g = transform(g) target = (1 - alpha) * torch.matmul( adj_gcn, g.ndata["h"] ) + alpha * g.ndata["h"] assert torch.allclose(g.ndata["out_feat_1"], target, atol=atol) transform = dgl.SIGNDiffusion( k=1, in_feat_name="h", eweight_name="scalar_w", diffuse_op="ppr", alpha=alpha, ) g = transform(g) target = (1 - alpha) * torch.matmul( weight_adj_gcn, g.ndata["h"] ) + alpha * g.ndata["h"] assert torch.allclose(g.ndata["out_feat_1"], target, atol=atol) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now" ) @parametrize_idtype def test_module_row_feat_normalizer(idtype): # Case1: Normalize features of a homogeneous graph. transform = dgl.RowFeatNormalizer( subtract_min=True, node_feat_names=["h"], edge_feat_names=["w"] ) g = dgl.rand_graph(5, 5, idtype=idtype, device=F.ctx()) g.ndata["h"] = F.randn((g.num_nodes(), 128)) g.edata["w"] = F.randn((g.num_edges(), 128)) g = transform(g) assert g.ndata["h"].shape == (g.num_nodes(), 128) assert g.edata["w"].shape == (g.num_edges(), 128) assert F.allclose(g.ndata["h"].sum(1), F.tensor([1.0, 1.0, 1.0, 1.0, 1.0])) assert F.allclose(g.edata["w"].sum(1), F.tensor([1.0, 1.0, 1.0, 1.0, 1.0])) # Case2: Normalize features of a heterogeneous graph. transform = dgl.RowFeatNormalizer( subtract_min=True, node_feat_names=["h", "h2"], edge_feat_names=["w"] ) g = dgl.heterograph( { ("user", "follows", "user"): (F.tensor([1, 2]), F.tensor([3, 4])), ("player", "plays", "game"): (F.tensor([2, 2]), F.tensor([1, 1])), }, idtype=idtype, device=F.ctx(), ) g.ndata["h"] = {"game": F.randn((2, 128)), "player": F.randn((3, 128))} g.ndata["h2"] = {"user": F.randn((5, 128))} g.edata["w"] = { ("user", "follows", "user"): F.randn((2, 128)), ("player", "plays", "game"): F.randn((2, 128)), } g = transform(g) assert g.ndata["h"]["game"].shape == (2, 128) assert g.ndata["h"]["player"].shape == (3, 128) assert g.ndata["h2"]["user"].shape == (5, 128) assert g.edata["w"][("user", "follows", "user")].shape == (2, 128) assert g.edata["w"][("player", "plays", "game")].shape == (2, 128) assert F.allclose(g.ndata["h"]["game"].sum(1), F.tensor([1.0, 1.0])) assert F.allclose(g.ndata["h"]["player"].sum(1), F.tensor([1.0, 1.0, 1.0])) assert F.allclose( g.ndata["h2"]["user"].sum(1), F.tensor([1.0, 1.0, 1.0, 1.0, 1.0]) ) assert F.allclose( g.edata["w"][("user", "follows", "user")].sum(1), F.tensor([1.0, 1.0]) ) assert F.allclose( g.edata["w"][("player", "plays", "game")].sum(1), F.tensor([1.0, 1.0]) ) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Only support PyTorch for now" ) @parametrize_idtype def test_module_feat_mask(idtype): # Case1: Mask node and edge feature tensors of a homogeneous graph. transform = dgl.FeatMask(node_feat_names=["h"], edge_feat_names=["w"]) g = dgl.rand_graph(5, 20, idtype=idtype, device=F.ctx()) g.ndata["h"] = F.ones((g.num_nodes(), 10)) g.edata["w"] = F.ones((g.num_edges(), 20)) g = transform(g) assert g.device == g.device assert g.idtype == g.idtype assert g.ndata["h"].shape == (g.num_nodes(), 10) assert g.edata["w"].shape == (g.num_edges(), 20) # Case2: Mask node and edge feature tensors of a heterogeneous graph. g = dgl.heterograph( { ("user", "follows", "user"): (F.tensor([1, 2]), F.tensor([3, 4])), ("player", "plays", "game"): (F.tensor([2, 2]), F.tensor([1, 1])), }, idtype=idtype, device=F.ctx(), ) g.ndata["h"] = {"game": F.randn((2, 5)), "player": F.randn((3, 5))} g.edata["w"] = { ("user", "follows", "user"): F.randn((2, 5)), ("player", "plays", "game"): F.randn((2, 5)), } g = transform(g) assert g.device == g.device assert g.idtype == g.idtype assert g.ndata["h"]["game"].shape == (2, 5) assert g.ndata["h"]["player"].shape == (3, 5) assert g.edata["w"][("user", "follows", "user")].shape == (2, 5) assert g.edata["w"][("player", "plays", "game")].shape == (2, 5) @parametrize_idtype def test_shortest_dist(idtype): g = dgl.graph(([0, 1, 1, 2], [2, 0, 3, 3]), idtype=idtype, device=F.ctx()) # case 1: directed single source dist = dgl.shortest_dist(g, root=0) tgt = F.copy_to(F.tensor([0, -1, 1, 2], dtype=F.int64), g.device) assert F.array_equal(dist, tgt) # case 2: undirected all pairs dist, paths = dgl.shortest_dist(g, root=None, return_paths=True) tgt_dist = F.copy_to( F.tensor( [[0, -1, 1, 2], [1, 0, 2, 1], [-1, -1, 0, 1], [-1, -1, -1, 0]], dtype=F.int64, ), g.device, ) tgt_paths = F.copy_to( F.tensor( [ [[-1, -1], [-1, -1], [0, -1], [0, 3]], [[1, -1], [-1, -1], [1, 0], [2, -1]], [[-1, -1], [-1, -1], [-1, -1], [3, -1]], [[-1, -1], [-1, -1], [-1, -1], [-1, -1]], ], dtype=F.int64, ), g.device, ) assert F.array_equal(dist, tgt_dist) assert F.array_equal(paths, tgt_paths) @parametrize_idtype def test_module_to_levi(idtype): transform = dgl.ToLevi() g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 0]), idtype=idtype, device=F.ctx()) g.ndata["h"] = F.randn((g.num_nodes(), 2)) g.edata["w"] = F.randn((g.num_edges(), 2)) lg = transform(g) assert lg.device == g.device assert lg.idtype == g.idtype assert lg.ntypes == ["edge", "node"] assert lg.canonical_etypes == [ ("edge", "e2n", "node"), ("node", "n2e", "edge"), ] assert lg.num_nodes("node") == g.num_nodes() assert lg.num_nodes("edge") == g.num_edges() assert lg.num_edges("n2e") == g.num_edges() assert lg.num_edges("e2n") == g.num_edges() src, dst = lg.edges(etype="n2e") eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == {(0, 0), (1, 1), (2, 2), (3, 3)} src, dst = lg.edges(etype="e2n") eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst)))) assert eset == {(0, 1), (1, 2), (2, 3), (3, 0)} assert F.allclose(lg.nodes["node"].data["h"], g.ndata["h"]) assert F.allclose(lg.nodes["edge"].data["w"], g.edata["w"]) @parametrize_idtype def test_module_svd_pe(idtype): g = dgl.graph( ( [0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4], [2, 3, 0, 2, 0, 2, 3, 4, 3, 4, 0, 1], ), idtype=idtype, device=F.ctx(), ) # without padding tgt_pe = F.copy_to( F.tensor( [ [0.6669, 0.3068, 0.7979, 0.8477], [0.6311, 0.6101, 0.1248, 0.5137], [1.1993, 0.0665, 0.9183, 0.1455], [0.5682, 0.6766, 0.8952, 0.6449], [0.3393, 0.8363, 0.6500, 0.4564], ] ), g.device, ) transform_1 = dgl.SVDPE(k=2, feat_name="svd_pe") g1 = transform_1(g) if dgl.backend.backend_name == "tensorflow": assert F.allclose(g1.ndata["svd_pe"].__abs__(), tgt_pe) else: assert F.allclose(g1.ndata["svd_pe"].abs(), tgt_pe) # with padding transform_2 = dgl.SVDPE(k=6, feat_name="svd_pe", padding=True) g2 = transform_2(g) assert F.shape(g2.ndata["svd_pe"]) == (5, 12) if __name__ == "__main__": test_partition_with_halo() test_module_heat_kernel(F.int32) ================================================ FILE: tests/python/common/utils/test_filter.py ================================================ import unittest import backend as F import dgl import numpy as np from dgl.utils import Filter from utils import parametrize_idtype def test_graph_filter(): g = dgl.graph([]).to(F.ctx()) g.add_nodes(4) g.add_edges([0, 1, 2, 3], [1, 2, 3, 0]) n_repr = np.zeros((4, 5)) e_repr = np.zeros((4, 5)) n_repr[[1, 3]] = 1 e_repr[[1, 3]] = 1 n_repr = F.copy_to(F.zerocopy_from_numpy(n_repr), F.ctx()) e_repr = F.copy_to(F.zerocopy_from_numpy(e_repr), F.ctx()) g.ndata["a"] = n_repr g.edata["a"] = e_repr def predicate(r): return F.max(r.data["a"], 1) > 0 # full node filter n_idx = g.filter_nodes(predicate) assert set(F.zerocopy_to_numpy(n_idx)) == {1, 3} # partial node filter n_idx = g.filter_nodes(predicate, [0, 1]) assert set(F.zerocopy_to_numpy(n_idx)) == {1} # full edge filter e_idx = g.filter_edges(predicate) assert set(F.zerocopy_to_numpy(e_idx)) == {1, 3} # partial edge filter e_idx = g.filter_edges(predicate, [0, 1]) assert set(F.zerocopy_to_numpy(e_idx)) == {1} @unittest.skipIf( F._default_context_str == "cpu", reason="CPU not yet supported" ) @parametrize_idtype def test_array_filter(idtype): f = Filter( F.copy_to(F.tensor([0, 1, 9, 4, 6, 5, 7], dtype=idtype), F.ctx()) ) x = F.copy_to(F.tensor([0, 3, 9, 11], dtype=idtype), F.ctx()) y = F.copy_to( F.tensor([0, 19, 0, 28, 3, 9, 11, 4, 5], dtype=idtype), F.ctx() ) xi_act = f.find_included_indices(x) xi_exp = F.copy_to(F.tensor([0, 2], dtype=idtype), F.ctx()) assert F.array_equal(xi_act, xi_exp) xe_act = f.find_excluded_indices(x) xe_exp = F.copy_to(F.tensor([1, 3], dtype=idtype), F.ctx()) assert F.array_equal(xe_act, xe_exp) yi_act = f.find_included_indices(y) yi_exp = F.copy_to(F.tensor([0, 2, 5, 7, 8], dtype=idtype), F.ctx()) assert F.array_equal(yi_act, yi_exp) ye_act = f.find_excluded_indices(y) ye_exp = F.copy_to(F.tensor([1, 3, 4, 6], dtype=idtype), F.ctx()) assert F.array_equal(ye_act, ye_exp) @unittest.skipIf( dgl.backend.backend_name != "pytorch", reason="Multiple streams are only supported by pytorch backend", ) @unittest.skipIf( F._default_context_str == "cpu", reason="CPU not yet supported" ) @parametrize_idtype def test_filter_multistream(idtype): # this is a smoke test to ensure we do not trip any internal assertions import torch s = torch.cuda.Stream(device=F.ctx()) with torch.cuda.stream(s): # we must do multiple runs such that the stream is busy as we launch # work for i in range(10): f = Filter(F.arange(1000, 4000, dtype=idtype, ctx=F.ctx())) x = F.randint([30000], dtype=idtype, ctx=F.ctx(), low=0, high=50000) xi = f.find_included_indices(x) if __name__ == "__main__": test_graph_filter() test_array_filter() ================================================ FILE: tests/python/common/utils/test_pin_memory.py ================================================ import backend as F import dgl import pytest @pytest.mark.skipif( F._default_context_str == "cpu", reason="Need gpu for this test" ) def test_pin_unpin(): t = F.arange(0, 100, dtype=F.int64, ctx=F.cpu()) assert not F.is_pinned(t) if F.backend_name == "pytorch": nd = dgl.utils.pin_memory_inplace(t) assert F.is_pinned(t) nd.unpin_memory_() assert not F.is_pinned(t) del nd # tensor will be unpinned immediately if the returned ndarray is not saved dgl.utils.pin_memory_inplace(t) assert not F.is_pinned(t) t_pin = t.pin_memory() # cannot unpin a tensor that is pinned outside of DGL with pytest.raises(dgl.DGLError): F.to_dgl_nd(t_pin).unpin_memory_() else: with pytest.raises(dgl.DGLError): # tensorflow and mxnet should throw an error dgl.utils.pin_memory_inplace(t) if __name__ == "__main__": test_pin_unpin() ================================================ FILE: tests/python/mxnet/ip_config.txt ================================================ 0 127.0.0.1 50050 1 127.0.0.1 50051 2 127.0.0.1 50052 3 127.0.0.1 50053 ================================================ FILE: tests/python/mxnet/test_geometry.py ================================================ import backend as F import mxnet as mx import numpy as np from dgl.geometry import farthest_point_sampler def test_fps(): N = 1000 batch_size = 5 sample_points = 10 x = mx.nd.array( np.random.uniform(size=(batch_size, int(N / batch_size), 3)) ) ctx = F.ctx() if F.gpu_ctx(): x = x.as_in_context(ctx) res = farthest_point_sampler(x, sample_points) assert res.shape[0] == batch_size assert res.shape[1] == sample_points assert res.sum() > 0 if __name__ == "__main__": test_fps() ================================================ FILE: tests/python/mxnet/test_nn.py ================================================ import backend as F import dgl import dgl.function as fn import dgl.nn.mxnet as nn import mxnet as mx import networkx as nx import numpy as np import pytest import scipy as sp from mxnet import autograd, gluon, nd from utils import parametrize_idtype from utils.graph_cases import ( get_cases, random_bipartite, random_dglgraph, random_graph, ) def check_close(a, b): assert np.allclose(a.asnumpy(), b.asnumpy(), rtol=1e-4, atol=1e-4) def _AXWb(A, X, W, b): X = mx.nd.dot(X, W.data(X.context)) Y = mx.nd.dot(A, X.reshape(X.shape[0], -1)).reshape(X.shape) return Y + b.data(X.context) @parametrize_idtype @pytest.mark.parametrize("out_dim", [1, 2]) def test_graph_conv(idtype, out_dim): g = dgl.from_networkx(nx.path_graph(3)) g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() adj = g.adj_external(transpose=True, ctx=ctx) conv = nn.GraphConv(5, out_dim, norm="none", bias=True) conv.initialize(ctx=ctx) # test#1: basic h0 = F.ones((3, 5)) h1 = conv(g, h0) assert len(g.ndata) == 0 assert len(g.edata) == 0 check_close(h1, _AXWb(adj, h0, conv.weight, conv.bias)) # test#2: more-dim h0 = F.ones((3, 5, 5)) h1 = conv(g, h0) assert len(g.ndata) == 0 assert len(g.edata) == 0 check_close(h1, _AXWb(adj, h0, conv.weight, conv.bias)) conv = nn.GraphConv(5, out_dim) conv.initialize(ctx=ctx) # test#3: basic h0 = F.ones((3, 5)) h1 = conv(g, h0) assert len(g.ndata) == 0 assert len(g.edata) == 0 # test#4: basic h0 = F.ones((3, 5, 5)) h1 = conv(g, h0) assert len(g.ndata) == 0 assert len(g.edata) == 0 conv = nn.GraphConv(5, out_dim) conv.initialize(ctx=ctx) with autograd.train_mode(): # test#3: basic h0 = F.ones((3, 5)) h1 = conv(g, h0) assert len(g.ndata) == 0 assert len(g.edata) == 0 # test#4: basic h0 = F.ones((3, 5, 5)) h1 = conv(g, h0) assert len(g.ndata) == 0 assert len(g.edata) == 0 # test not override features g.ndata["h"] = 2 * F.ones((3, 1)) h1 = conv(g, h0) assert len(g.ndata) == 1 assert len(g.edata) == 0 assert "h" in g.ndata check_close(g.ndata["h"], 2 * F.ones((3, 1))) @parametrize_idtype @pytest.mark.parametrize( "g", get_cases(["homo", "block-bipartite"], exclude=["zero-degree", "dglgraph"]), ) @pytest.mark.parametrize("norm", ["none", "both", "right", "left"]) @pytest.mark.parametrize("weight", [True, False]) @pytest.mark.parametrize("bias", [False]) @pytest.mark.parametrize("out_dim", [1, 2]) def test_graph_conv2(idtype, g, norm, weight, bias, out_dim): g = g.astype(idtype).to(F.ctx()) conv = nn.GraphConv(5, out_dim, norm=norm, weight=weight, bias=bias) conv.initialize(ctx=F.ctx()) ext_w = F.randn((5, out_dim)).as_in_context(F.ctx()) nsrc = g.number_of_src_nodes() ndst = g.number_of_dst_nodes() h = F.randn((nsrc, 5)).as_in_context(F.ctx()) if weight: h_out = conv(g, h) else: h_out = conv(g, h, ext_w) assert h_out.shape == (ndst, out_dim) @parametrize_idtype @pytest.mark.parametrize( "g", get_cases(["bipartite"], exclude=["zero-degree", "dglgraph"]) ) @pytest.mark.parametrize("norm", ["none", "both", "right"]) @pytest.mark.parametrize("weight", [True, False]) @pytest.mark.parametrize("bias", [False]) @pytest.mark.parametrize("out_dim", [1, 2]) def test_graph_conv2_bi(idtype, g, norm, weight, bias, out_dim): g = g.astype(idtype).to(F.ctx()) conv = nn.GraphConv(5, out_dim, norm=norm, weight=weight, bias=bias) conv.initialize(ctx=F.ctx()) ext_w = F.randn((5, out_dim)).as_in_context(F.ctx()) nsrc = g.number_of_src_nodes() ndst = g.number_of_dst_nodes() h = F.randn((nsrc, 5)).as_in_context(F.ctx()) h_dst = F.randn((ndst, out_dim)).as_in_context(F.ctx()) if weight: h_out = conv(g, (h, h_dst)) else: h_out = conv(g, (h, h_dst), ext_w) assert h_out.shape == (ndst, out_dim) def _S2AXWb(A, N, X, W, b): X1 = X * N X1 = mx.nd.dot(A, X1.reshape(X1.shape[0], -1)) X1 = X1 * N X2 = X1 * N X2 = mx.nd.dot(A, X2.reshape(X2.shape[0], -1)) X2 = X2 * N X = mx.nd.concat(X, X1, X2, dim=-1) Y = mx.nd.dot(X, W) return Y + b @pytest.mark.parametrize("out_dim", [1, 2]) def test_tagconv(out_dim): g = dgl.from_networkx(nx.path_graph(3)).to(F.ctx()) ctx = F.ctx() adj = g.adj_external(transpose=True, ctx=ctx) norm = mx.nd.power(g.in_degrees().astype("float32"), -0.5) conv = nn.TAGConv(5, out_dim, bias=True) conv.initialize(ctx=ctx) print(conv) # test#1: basic h0 = F.ones((3, 5)) h1 = conv(g, h0) assert len(g.ndata) == 0 assert len(g.edata) == 0 shp = norm.shape + (1,) * (h0.ndim - 1) norm = norm.reshape(shp).as_in_context(h0.context) assert F.allclose( h1, _S2AXWb(adj, norm, h0, conv.lin.data(ctx), conv.h_bias.data(ctx)) ) conv = nn.TAGConv(5, out_dim) conv.initialize(ctx=ctx) # test#2: basic h0 = F.ones((3, 5)) h1 = conv(g, h0) assert h1.shape[-1] == out_dim @parametrize_idtype @pytest.mark.parametrize( "g", get_cases(["homo", "block-bipartite"], exclude=["zero-degree"]) ) @pytest.mark.parametrize("out_dim", [1, 20]) @pytest.mark.parametrize("num_heads", [1, 5]) def test_gat_conv(g, idtype, out_dim, num_heads): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() gat = nn.GATConv(10, out_dim, num_heads) # n_heads = 5 gat.initialize(ctx=ctx) print(gat) feat = F.randn((g.number_of_src_nodes(), 10)) h = gat(g, feat) assert h.shape == (g.number_of_dst_nodes(), num_heads, out_dim) _, a = gat(g, feat, True) assert a.shape == (g.num_edges(), num_heads, 1) # test residual connection gat = nn.GATConv(10, out_dim, num_heads, residual=True) gat.initialize(ctx=ctx) h = gat(g, feat) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["bipartite"], exclude=["zero-degree"])) @pytest.mark.parametrize("out_dim", [1, 2]) @pytest.mark.parametrize("num_heads", [1, 4]) def test_gat_conv_bi(g, idtype, out_dim, num_heads): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() gat = nn.GATConv(5, out_dim, num_heads) gat.initialize(ctx=ctx) feat = ( F.randn((g.number_of_src_nodes(), 5)), F.randn((g.number_of_dst_nodes(), 5)), ) h = gat(g, feat) assert h.shape == (g.number_of_dst_nodes(), num_heads, out_dim) _, a = gat(g, feat, True) assert a.shape == (g.num_edges(), num_heads, 1) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo", "block-bipartite"])) @pytest.mark.parametrize("aggre_type", ["mean", "pool", "gcn"]) @pytest.mark.parametrize("out_dim", [1, 10]) def test_sage_conv(idtype, g, aggre_type, out_dim): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() sage = nn.SAGEConv(5, out_dim, aggre_type) feat = F.randn((g.number_of_src_nodes(), 5)) sage.initialize(ctx=ctx) h = sage(g, feat) assert h.shape[-1] == out_dim @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["bipartite"])) @pytest.mark.parametrize("aggre_type", ["mean", "pool", "gcn"]) @pytest.mark.parametrize("out_dim", [1, 2]) def test_sage_conv_bi(idtype, g, aggre_type, out_dim): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() dst_dim = 5 if aggre_type != "gcn" else 10 sage = nn.SAGEConv((10, dst_dim), out_dim, aggre_type) feat = ( F.randn((g.number_of_src_nodes(), 10)), F.randn((g.number_of_dst_nodes(), dst_dim)), ) sage.initialize(ctx=ctx) h = sage(g, feat) assert h.shape[-1] == out_dim assert h.shape[0] == g.number_of_dst_nodes() @parametrize_idtype @pytest.mark.parametrize("aggre_type", ["mean", "pool", "gcn"]) @pytest.mark.parametrize("out_dim", [1, 2]) def test_sage_conv_bi2(idtype, aggre_type, out_dim): # Test the case for graphs without edges g = dgl.heterograph({("_U", "_E", "_V"): ([], [])}, {"_U": 5, "_V": 3}) g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() sage = nn.SAGEConv((3, 3), out_dim, "gcn") feat = (F.randn((5, 3)), F.randn((3, 3))) sage.initialize(ctx=ctx) h = sage(g, feat) assert h.shape[-1] == out_dim assert h.shape[0] == 3 for aggre_type in ["mean", "pool"]: sage = nn.SAGEConv((3, 1), out_dim, aggre_type) feat = (F.randn((5, 3)), F.randn((3, 1))) sage.initialize(ctx=ctx) h = sage(g, feat) assert h.shape[-1] == out_dim assert h.shape[0] == 3 def test_gg_conv(): g = dgl.from_networkx(nx.erdos_renyi_graph(20, 0.3)).to(F.ctx()) ctx = F.ctx() gg_conv = nn.GatedGraphConv(10, 20, 3, 4) # n_step = 3, n_etypes = 4 gg_conv.initialize(ctx=ctx) print(gg_conv) # test#1: basic h0 = F.randn((20, 10)) etypes = nd.random.randint(0, 4, g.num_edges()).as_in_context(ctx) h1 = gg_conv(g, h0, etypes) assert h1.shape == (20, 20) @pytest.mark.parametrize("out_dim", [1, 20]) def test_cheb_conv(out_dim): g = dgl.from_networkx(nx.erdos_renyi_graph(20, 0.3)).to(F.ctx()) ctx = F.ctx() cheb = nn.ChebConv(10, out_dim, 3) # k = 3 cheb.initialize(ctx=ctx) print(cheb) # test#1: basic h0 = F.randn((20, 10)) h1 = cheb(g, h0) assert h1.shape == (20, out_dim) @parametrize_idtype @pytest.mark.parametrize( "g", get_cases(["homo", "block-bipartite"], exclude=["zero-degree"]) ) def test_agnn_conv(g, idtype): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() agnn_conv = nn.AGNNConv(0.1, True) agnn_conv.initialize(ctx=ctx) print(agnn_conv) feat = F.randn((g.number_of_src_nodes(), 10)) h = agnn_conv(g, feat) assert h.shape == (g.number_of_dst_nodes(), 10) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["bipartite"], exclude=["zero-degree"])) def test_agnn_conv_bi(g, idtype): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() agnn_conv = nn.AGNNConv(0.1, True) agnn_conv.initialize(ctx=ctx) print(agnn_conv) feat = ( F.randn((g.number_of_src_nodes(), 5)), F.randn((g.number_of_dst_nodes(), 5)), ) h = agnn_conv(g, feat) assert h.shape == (g.number_of_dst_nodes(), 5) def test_appnp_conv(): g = dgl.from_networkx(nx.erdos_renyi_graph(20, 0.3)).to(F.ctx()) ctx = F.ctx() appnp_conv = nn.APPNPConv(3, 0.1, 0) appnp_conv.initialize(ctx=ctx) print(appnp_conv) # test#1: basic h0 = F.randn((20, 10)) h1 = appnp_conv(g, h0) assert h1.shape == (20, 10) @pytest.mark.parametrize("out_dim", [1, 2]) def test_dense_cheb_conv(out_dim): for k in range(1, 4): ctx = F.ctx() g = dgl.from_scipy(sp.sparse.random(100, 100, density=0.3)).to(F.ctx()) adj = g.adj_external(transpose=True, ctx=ctx).tostype("default") cheb = nn.ChebConv(5, out_dim, k) dense_cheb = nn.DenseChebConv(5, out_dim, k) cheb.initialize(ctx=ctx) dense_cheb.initialize(ctx=ctx) for i in range(len(cheb.fc)): dense_cheb.fc[i].weight.set_data(cheb.fc[i].weight.data()) if cheb.bias is not None: dense_cheb.bias.set_data(cheb.bias.data()) feat = F.randn((100, 5)) out_cheb = cheb(g, feat, [2.0]) out_dense_cheb = dense_cheb(adj, feat, 2.0) assert F.allclose(out_cheb, out_dense_cheb) @parametrize_idtype @pytest.mark.parametrize("norm_type", ["both", "right", "none"]) @pytest.mark.parametrize( "g", get_cases(["homo", "block-bipartite"], exclude=["zero-degree"]) ) @pytest.mark.parametrize("out_dim", [1, 2]) def test_dense_graph_conv(idtype, g, norm_type, out_dim): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() adj = g.adj_external(transpose=True, ctx=ctx).tostype("default") conv = nn.GraphConv(5, out_dim, norm=norm_type, bias=True) dense_conv = nn.DenseGraphConv(5, out_dim, norm=norm_type, bias=True) conv.initialize(ctx=ctx) dense_conv.initialize(ctx=ctx) dense_conv.weight.set_data(conv.weight.data()) dense_conv.bias.set_data(conv.bias.data()) feat = F.randn((g.number_of_src_nodes(), 5)) out_conv = conv(g, feat) out_dense_conv = dense_conv(adj, feat) assert F.allclose(out_conv, out_dense_conv) @parametrize_idtype @pytest.mark.parametrize( "g", get_cases(["homo", "bipartite", "block-bipartite"]) ) @pytest.mark.parametrize("out_dim", [1, 2]) def test_dense_sage_conv(idtype, g, out_dim): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() adj = g.adj_external(transpose=True, ctx=ctx).tostype("default") sage = nn.SAGEConv(5, out_dim, "gcn") dense_sage = nn.DenseSAGEConv(5, out_dim) sage.initialize(ctx=ctx) dense_sage.initialize(ctx=ctx) dense_sage.fc.weight.set_data(sage.fc_neigh.weight.data()) dense_sage.fc.bias.set_data(sage.fc_neigh.bias.data()) if len(g.ntypes) == 2: feat = ( F.randn((g.number_of_src_nodes(), 5)), F.randn((g.number_of_dst_nodes(), 5)), ) else: feat = F.randn((g.num_nodes(), 5)) out_sage = sage(g, feat) out_dense_sage = dense_sage(adj, feat) assert F.allclose(out_sage, out_dense_sage) @parametrize_idtype @pytest.mark.parametrize( "g", get_cases(["homo", "block-bipartite"], exclude=["zero-degree"]) ) @pytest.mark.parametrize("out_dim", [1, 2]) def test_edge_conv(g, idtype, out_dim): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() edge_conv = nn.EdgeConv(5, out_dim) edge_conv.initialize(ctx=ctx) print(edge_conv) # test #1: basic h0 = F.randn((g.number_of_src_nodes(), 5)) h1 = edge_conv(g, h0) assert h1.shape == (g.number_of_dst_nodes(), out_dim) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["bipartite"], exclude=["zero-degree"])) @pytest.mark.parametrize("out_dim", [1, 2]) def test_edge_conv_bi(g, idtype, out_dim): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() edge_conv = nn.EdgeConv(5, out_dim) edge_conv.initialize(ctx=ctx) print(edge_conv) # test #1: basic h0 = F.randn((g.number_of_src_nodes(), 5)) x0 = F.randn((g.number_of_dst_nodes(), 5)) h1 = edge_conv(g, (h0, x0)) assert h1.shape == (g.number_of_dst_nodes(), out_dim) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo", "block-bipartite"])) @pytest.mark.parametrize("aggregator_type", ["mean", "max", "sum"]) def test_gin_conv(g, idtype, aggregator_type): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() gin_conv = nn.GINConv(lambda x: x, aggregator_type, 0.1) gin_conv.initialize(ctx=ctx) print(gin_conv) # test #1: basic feat = F.randn((g.number_of_src_nodes(), 5)) h = gin_conv(g, feat) assert h.shape == (g.number_of_dst_nodes(), 5) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["bipartite"])) @pytest.mark.parametrize("aggregator_type", ["mean", "max", "sum"]) def test_gin_conv_bi(g, idtype, aggregator_type): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() gin_conv = nn.GINConv(lambda x: x, aggregator_type, 0.1) gin_conv.initialize(ctx=ctx) print(gin_conv) # test #2: bipartite feat = ( F.randn((g.number_of_src_nodes(), 5)), F.randn((g.number_of_dst_nodes(), 5)), ) h = gin_conv(g, feat) return h.shape == (g.number_of_dst_nodes(), 5) @parametrize_idtype @pytest.mark.parametrize( "g", get_cases(["homo", "block-bipartite"], exclude=["zero-degree"]) ) def test_gmm_conv(g, idtype): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() gmm_conv = nn.GMMConv(5, 2, 5, 3, "max") gmm_conv.initialize(ctx=ctx) h0 = F.randn((g.number_of_src_nodes(), 5)) pseudo = F.randn((g.num_edges(), 5)) h1 = gmm_conv(g, h0, pseudo) assert h1.shape == (g.number_of_dst_nodes(), 2) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["bipartite"], exclude=["zero-degree"])) def test_gmm_conv_bi(g, idtype): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() gmm_conv = nn.GMMConv((5, 4), 2, 5, 3, "max") gmm_conv.initialize(ctx=ctx) # test #1: basic h0 = F.randn((g.number_of_src_nodes(), 5)) hd = F.randn((g.number_of_dst_nodes(), 4)) pseudo = F.randn((g.num_edges(), 5)) h1 = gmm_conv(g, (h0, hd), pseudo) assert h1.shape == (g.number_of_dst_nodes(), 2) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo", "block-bipartite"])) def test_nn_conv(g, idtype): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() nn_conv = nn.NNConv(5, 2, gluon.nn.Embedding(3, 5 * 2), "max") nn_conv.initialize(ctx=ctx) # test #1: basic h0 = F.randn((g.number_of_src_nodes(), 5)) etypes = nd.random.randint(0, 4, g.num_edges()).as_in_context(ctx) h1 = nn_conv(g, h0, etypes) assert h1.shape == (g.number_of_dst_nodes(), 2) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["bipartite"])) def test_nn_conv_bi(g, idtype): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() nn_conv = nn.NNConv((5, 4), 2, gluon.nn.Embedding(3, 5 * 2), "max") nn_conv.initialize(ctx=ctx) # test #1: basic h0 = F.randn((g.number_of_src_nodes(), 5)) hd = F.randn((g.number_of_dst_nodes(), 4)) etypes = nd.random.randint(0, 4, g.num_edges()).as_in_context(ctx) h1 = nn_conv(g, (h0, hd), etypes) assert h1.shape == (g.number_of_dst_nodes(), 2) @pytest.mark.parametrize("out_dim", [1, 2]) def test_sg_conv(out_dim): g = dgl.from_networkx(nx.erdos_renyi_graph(20, 0.3)).to(F.ctx()) g = dgl.add_self_loop(g) ctx = F.ctx() sgc = nn.SGConv(5, out_dim, 2) sgc.initialize(ctx=ctx) print(sgc) # test #1: basic h0 = F.randn((g.num_nodes(), 5)) h1 = sgc(g, h0) assert h1.shape == (g.num_nodes(), out_dim) def test_set2set(): g = dgl.from_networkx(nx.path_graph(10)).to(F.ctx()) ctx = F.ctx() s2s = nn.Set2Set(5, 3, 3) # hidden size 5, 3 iters, 3 layers s2s.initialize(ctx=ctx) print(s2s) # test#1: basic h0 = F.randn((g.num_nodes(), 5)) h1 = s2s(g, h0) assert h1.shape[0] == 1 and h1.shape[1] == 10 and h1.ndim == 2 # test#2: batched graph bg = dgl.batch([g, g, g]) h0 = F.randn((bg.num_nodes(), 5)) h1 = s2s(bg, h0) assert h1.shape[0] == 3 and h1.shape[1] == 10 and h1.ndim == 2 def test_glob_att_pool(): g = dgl.from_networkx(nx.path_graph(10)).to(F.ctx()) ctx = F.ctx() gap = nn.GlobalAttentionPooling(gluon.nn.Dense(1), gluon.nn.Dense(10)) gap.initialize(ctx=ctx) print(gap) # test#1: basic h0 = F.randn((g.num_nodes(), 5)) h1 = gap(g, h0) assert h1.shape[0] == 1 and h1.shape[1] == 10 and h1.ndim == 2 # test#2: batched graph bg = dgl.batch([g, g, g, g]) h0 = F.randn((bg.num_nodes(), 5)) h1 = gap(bg, h0) assert h1.shape[0] == 4 and h1.shape[1] == 10 and h1.ndim == 2 def test_simple_pool(): g = dgl.from_networkx(nx.path_graph(15)).to(F.ctx()) sum_pool = nn.SumPooling() avg_pool = nn.AvgPooling() max_pool = nn.MaxPooling() sort_pool = nn.SortPooling(10) # k = 10 print(sum_pool, avg_pool, max_pool, sort_pool) # test#1: basic h0 = F.randn((g.num_nodes(), 5)) h1 = sum_pool(g, h0) check_close(F.squeeze(h1, 0), F.sum(h0, 0)) h1 = avg_pool(g, h0) check_close(F.squeeze(h1, 0), F.mean(h0, 0)) h1 = max_pool(g, h0) check_close(F.squeeze(h1, 0), F.max(h0, 0)) h1 = sort_pool(g, h0) assert h1.shape[0] == 1 and h1.shape[1] == 10 * 5 and h1.ndim == 2 # test#2: batched graph g_ = dgl.from_networkx(nx.path_graph(5)).to(F.ctx()) bg = dgl.batch([g, g_, g, g_, g]) h0 = F.randn((bg.num_nodes(), 5)) h1 = sum_pool(bg, h0) truth = mx.nd.stack( F.sum(h0[:15], 0), F.sum(h0[15:20], 0), F.sum(h0[20:35], 0), F.sum(h0[35:40], 0), F.sum(h0[40:55], 0), axis=0, ) check_close(h1, truth) h1 = avg_pool(bg, h0) truth = mx.nd.stack( F.mean(h0[:15], 0), F.mean(h0[15:20], 0), F.mean(h0[20:35], 0), F.mean(h0[35:40], 0), F.mean(h0[40:55], 0), axis=0, ) check_close(h1, truth) h1 = max_pool(bg, h0) truth = mx.nd.stack( F.max(h0[:15], 0), F.max(h0[15:20], 0), F.max(h0[20:35], 0), F.max(h0[35:40], 0), F.max(h0[40:55], 0), axis=0, ) check_close(h1, truth) h1 = sort_pool(bg, h0) assert h1.shape[0] == 5 and h1.shape[1] == 10 * 5 and h1.ndim == 2 @pytest.mark.parametrize("O", [1, 2, 8]) def test_rgcn(O): ctx = F.ctx() etype = [] g = dgl.from_scipy(sp.sparse.random(100, 100, density=0.1)).to(F.ctx()) # 5 etypes R = 5 for i in range(g.num_edges()): etype.append(i % 5) B = 2 I = 10 rgc_basis = nn.RelGraphConv(I, O, R, "basis", B) rgc_basis.initialize(ctx=ctx) h = nd.random.randn(100, I, ctx=ctx) r = nd.array(etype, ctx=ctx) h_new = rgc_basis(g, h, r) assert list(h_new.shape) == [100, O] if O % B == 0: rgc_bdd = nn.RelGraphConv(I, O, R, "bdd", B) rgc_bdd.initialize(ctx=ctx) h = nd.random.randn(100, I, ctx=ctx) r = nd.array(etype, ctx=ctx) h_new = rgc_bdd(g, h, r) assert list(h_new.shape) == [100, O] # with norm norm = nd.zeros((g.num_edges(), 1), ctx=ctx) rgc_basis = nn.RelGraphConv(I, O, R, "basis", B) rgc_basis.initialize(ctx=ctx) h = nd.random.randn(100, I, ctx=ctx) r = nd.array(etype, ctx=ctx) h_new = rgc_basis(g, h, r, norm) assert list(h_new.shape) == [100, O] if O % B == 0: rgc_bdd = nn.RelGraphConv(I, O, R, "bdd", B) rgc_bdd.initialize(ctx=ctx) h = nd.random.randn(100, I, ctx=ctx) r = nd.array(etype, ctx=ctx) h_new = rgc_bdd(g, h, r, norm) assert list(h_new.shape) == [100, O] # id input rgc_basis = nn.RelGraphConv(I, O, R, "basis", B) rgc_basis.initialize(ctx=ctx) h = nd.random.randint(0, I, (100,), ctx=ctx) r = nd.array(etype, ctx=ctx) h_new = rgc_basis(g, h, r) assert list(h_new.shape) == [100, O] def test_sequential(): ctx = F.ctx() # test single graph class ExampleLayer(gluon.nn.Block): def __init__(self, **kwargs): super().__init__(**kwargs) def forward(self, graph, n_feat, e_feat): graph = graph.local_var() graph.ndata["h"] = n_feat graph.update_all(fn.copy_u("h", "m"), fn.sum("m", "h")) n_feat += graph.ndata["h"] graph.apply_edges(fn.u_add_v("h", "h", "e")) e_feat += graph.edata["e"] return n_feat, e_feat g = dgl.graph(([], [])).to(F.ctx()) g.add_nodes(3) g.add_edges([0, 1, 2, 0, 1, 2, 0, 1, 2], [0, 0, 0, 1, 1, 1, 2, 2, 2]) net = nn.Sequential() net.add(ExampleLayer()) net.add(ExampleLayer()) net.add(ExampleLayer()) net.initialize(ctx=ctx) n_feat = F.randn((3, 4)) e_feat = F.randn((9, 4)) n_feat, e_feat = net(g, n_feat, e_feat) assert n_feat.shape == (3, 4) assert e_feat.shape == (9, 4) # test multiple graphs class ExampleLayer(gluon.nn.Block): def __init__(self, **kwargs): super().__init__(**kwargs) def forward(self, graph, n_feat): graph = graph.local_var() graph.ndata["h"] = n_feat graph.update_all(fn.copy_u("h", "m"), fn.sum("m", "h")) n_feat += graph.ndata["h"] return n_feat.reshape(graph.num_nodes() // 2, 2, -1).sum(1) g1 = dgl.from_networkx(nx.erdos_renyi_graph(32, 0.05)).to(F.ctx()) g2 = dgl.from_networkx(nx.erdos_renyi_graph(16, 0.2)).to(F.ctx()) g3 = dgl.from_networkx(nx.erdos_renyi_graph(8, 0.8)).to(F.ctx()) net = nn.Sequential() net.add(ExampleLayer()) net.add(ExampleLayer()) net.add(ExampleLayer()) net.initialize(ctx=ctx) n_feat = F.randn((32, 4)) n_feat = net([g1, g2, g3], n_feat) assert n_feat.shape == (4, 4) def myagg(alist, dsttype): rst = alist[0] for i in range(1, len(alist)): rst = rst + (i + 1) * alist[i] return rst @parametrize_idtype @pytest.mark.parametrize("agg", ["sum", "max", "min", "mean", "stack", myagg]) def test_hetero_conv(agg, idtype): g = dgl.heterograph( { ("user", "follows", "user"): ([0, 0, 2, 1], [1, 2, 1, 3]), ("user", "plays", "game"): ([0, 0, 0, 1, 2], [0, 2, 3, 0, 2]), ("store", "sells", "game"): ([0, 0, 1, 1], [0, 3, 1, 2]), }, idtype=idtype, device=F.ctx(), ) conv = nn.HeteroGraphConv( { "follows": nn.GraphConv(2, 3, allow_zero_in_degree=True), "plays": nn.GraphConv(2, 4, allow_zero_in_degree=True), "sells": nn.GraphConv(3, 4, allow_zero_in_degree=True), }, agg, ) conv.initialize(ctx=F.ctx()) print(conv) uf = F.randn((4, 2)) gf = F.randn((4, 4)) sf = F.randn((2, 3)) h = conv(g, {"user": uf, "store": sf, "game": gf}) assert set(h.keys()) == {"user", "game"} if agg != "stack": assert h["user"].shape == (4, 3) assert h["game"].shape == (4, 4) else: assert h["user"].shape == (4, 1, 3) assert h["game"].shape == (4, 2, 4) block = dgl.to_block( g.to(F.cpu()), {"user": [0, 1, 2, 3], "game": [0, 1, 2, 3], "store": []} ).to(F.ctx()) h = conv( block, ( {"user": uf, "game": gf, "store": sf}, {"user": uf, "game": gf, "store": sf[0:0]}, ), ) assert set(h.keys()) == {"user", "game"} if agg != "stack": assert h["user"].shape == (4, 3) assert h["game"].shape == (4, 4) else: assert h["user"].shape == (4, 1, 3) assert h["game"].shape == (4, 2, 4) h = conv(block, {"user": uf, "game": gf, "store": sf}) assert set(h.keys()) == {"user", "game"} if agg != "stack": assert h["user"].shape == (4, 3) assert h["game"].shape == (4, 4) else: assert h["user"].shape == (4, 1, 3) assert h["game"].shape == (4, 2, 4) # test with mod args class MyMod(mx.gluon.nn.Block): def __init__(self, s1, s2): super(MyMod, self).__init__() self.carg1 = 0 self.s1 = s1 self.s2 = s2 def forward(self, g, h, arg1=None): # mxnet does not support kwargs if arg1 is not None: self.carg1 += 1 return F.zeros((g.number_of_dst_nodes(), self.s2)) mod1 = MyMod(2, 3) mod2 = MyMod(2, 4) mod3 = MyMod(3, 4) conv = nn.HeteroGraphConv( {"follows": mod1, "plays": mod2, "sells": mod3}, agg ) conv.initialize(ctx=F.ctx()) mod_args = {"follows": (1,), "plays": (1,)} h = conv(g, {"user": uf, "store": sf, "game": gf}, mod_args) assert mod1.carg1 == 1 assert mod2.carg1 == 1 assert mod3.carg1 == 0 # conv on graph without any edges for etype in g.etypes: g = dgl.remove_edges(g, g.edges(form="eid", etype=etype), etype=etype) assert g.num_edges() == 0 h = conv(g, {"user": uf, "game": gf, "store": sf}) assert set(h.keys()) == {"user", "game"} block = dgl.to_block( g.to(F.cpu()), {"user": [0, 1, 2, 3], "game": [0, 1, 2, 3], "store": []} ).to(F.ctx()) h = conv( block, ( {"user": uf, "game": gf, "store": sf}, {"user": uf, "game": gf, "store": sf[0:0]}, ), ) assert set(h.keys()) == {"user", "game"} if __name__ == "__main__": test_graph_conv() test_gat_conv() test_sage_conv() test_gg_conv() test_cheb_conv() test_agnn_conv() test_appnp_conv() test_dense_cheb_conv() test_dense_graph_conv() test_dense_sage_conv() test_edge_conv() test_gin_conv() test_gmm_conv() test_nn_conv() test_sg_conv() test_set2set() test_glob_att_pool() test_simple_pool() test_rgcn() test_sequential() test_hetero_conv() ================================================ FILE: tests/python/pytorch/cuda/test_nccl.py ================================================ import unittest import backend as F import torch import torch.distributed as dist from dgl.cuda import nccl from dgl.partition import NDArrayPartition @unittest.skipIf( F._default_context_str == "cpu", reason="NCCL only runs on GPU." ) def test_nccl_sparse_push_single_remainder(): torch.cuda.set_device("cuda:0") dist.init_process_group( backend="nccl", init_method="tcp://127.0.0.1:12345", world_size=1, rank=0, ) index = F.randint([10000], F.int32, F.ctx(), 0, 10000) value = F.uniform([10000, 100], F.float32, F.ctx(), -1.0, 1.0) part = NDArrayPartition(10000, 1, "remainder") ri, rv = nccl.sparse_all_to_all_push(index, value, part) assert F.array_equal(ri, index) assert F.array_equal(rv, value) dist.destroy_process_group() @unittest.skipIf( F._default_context_str == "cpu", reason="NCCL only runs on GPU." ) def test_nccl_sparse_pull_single_remainder(): torch.cuda.set_device("cuda:0") dist.init_process_group( backend="nccl", init_method="tcp://127.0.0.1:12345", world_size=1, rank=0, ) req_index = F.randint([10000], F.int64, F.ctx(), 0, 100000) value = F.uniform([100000, 100], F.float32, F.ctx(), -1.0, 1.0) part = NDArrayPartition(100000, 1, "remainder") rv = nccl.sparse_all_to_all_pull(req_index, value, part) exp_rv = F.gather_row(value, req_index) assert F.array_equal(rv, exp_rv) dist.destroy_process_group() @unittest.skipIf( F._default_context_str == "cpu", reason="NCCL only runs on GPU." ) def test_nccl_sparse_push_single_range(): torch.cuda.set_device("cuda:0") dist.init_process_group( backend="nccl", init_method="tcp://127.0.0.1:12345", world_size=1, rank=0, ) index = F.randint([10000], F.int32, F.ctx(), 0, 10000) value = F.uniform([10000, 100], F.float32, F.ctx(), -1.0, 1.0) part_ranges = F.copy_to( F.tensor([0, value.shape[0]], dtype=F.int64), F.ctx() ) part = NDArrayPartition(10000, 1, "range", part_ranges=part_ranges) ri, rv = nccl.sparse_all_to_all_push(index, value, part) assert F.array_equal(ri, index) assert F.array_equal(rv, value) dist.destroy_process_group() @unittest.skipIf( F._default_context_str == "cpu", reason="NCCL only runs on GPU." ) def test_nccl_sparse_pull_single_range(): torch.cuda.set_device("cuda:0") dist.init_process_group( backend="nccl", init_method="tcp://127.0.0.1:12345", world_size=1, rank=0, ) req_index = F.randint([10000], F.int64, F.ctx(), 0, 100000) value = F.uniform([100000, 100], F.float32, F.ctx(), -1.0, 1.0) part_ranges = F.copy_to( F.tensor([0, value.shape[0]], dtype=F.int64), F.ctx() ) part = NDArrayPartition(100000, 1, "range", part_ranges=part_ranges) rv = nccl.sparse_all_to_all_pull(req_index, value, part) exp_rv = F.gather_row(value, req_index) assert F.array_equal(rv, exp_rv) dist.destroy_process_group() if __name__ == "__main__": test_nccl_sparse_push_single_remainder() test_nccl_sparse_pull_single_remainder() test_nccl_sparse_push_single_range() test_nccl_sparse_pull_single_range() ================================================ FILE: tests/python/pytorch/dataloading/test_dataloader.py ================================================ import os import unittest from collections.abc import Iterator, Mapping from functools import partial import backend as F import dgl import dgl.ops as OPS import numpy as np import pytest import torch import torch.distributed as dist import torch.multiprocessing as mp from utils import parametrize_idtype @pytest.mark.parametrize("batch_size", [None, 16]) def test_graph_dataloader(batch_size): num_batches = 2 num_samples = num_batches * (batch_size if batch_size is not None else 1) minigc_dataset = dgl.data.MiniGCDataset(num_samples, 10, 20) data_loader = dgl.dataloading.GraphDataLoader( minigc_dataset, batch_size=batch_size, shuffle=True ) assert isinstance(iter(data_loader), Iterator) for graph, label in data_loader: assert isinstance(graph, dgl.DGLGraph) if batch_size is not None: assert F.asnumpy(label).shape[0] == batch_size else: # If batch size is None, the label element will be a single scalar following # PyTorch's practice. assert F.asnumpy(label).ndim == 0 @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") @pytest.mark.parametrize("num_workers", [0, 4]) def test_cluster_gcn(num_workers): dataset = dgl.data.CoraFullDataset() g = dataset[0] sampler = dgl.dataloading.ClusterGCNSampler(g, 100) dataloader = dgl.dataloading.DataLoader( g, torch.arange(100), sampler, batch_size=4, num_workers=num_workers ) assert len(dataloader) == 25 for i, sg in enumerate(dataloader): pass @pytest.mark.parametrize("num_workers", [0, 4]) def test_shadow(num_workers): g = dgl.data.CoraFullDataset()[0] sampler = dgl.dataloading.ShaDowKHopSampler([5, 10, 15]) dataloader = dgl.dataloading.DataLoader( g, torch.arange(g.num_nodes()), sampler, batch_size=5, shuffle=True, drop_last=False, num_workers=num_workers, ) for i, (input_nodes, output_nodes, subgraph) in enumerate(dataloader): assert torch.equal(input_nodes, subgraph.ndata[dgl.NID]) assert torch.equal(input_nodes[: output_nodes.shape[0]], output_nodes) assert torch.equal( subgraph.ndata["label"], g.ndata["label"][input_nodes] ) assert torch.equal(subgraph.ndata["feat"], g.ndata["feat"][input_nodes]) if i == 5: break @pytest.mark.parametrize("num_workers", [0, 4]) @pytest.mark.parametrize("mode", ["node", "edge", "walk"]) def test_saint(num_workers, mode): g = dgl.data.CoraFullDataset()[0] if mode == "node": budget = 100 elif mode == "edge": budget = 200 elif mode == "walk": budget = (3, 2) sampler = dgl.dataloading.SAINTSampler(mode, budget) dataloader = dgl.dataloading.DataLoader( g, torch.arange(100), sampler, num_workers=num_workers ) assert len(dataloader) == 100 for sg in dataloader: pass @parametrize_idtype @pytest.mark.parametrize( "mode", ["cpu", "uva_cuda_indices", "uva_cpu_indices", "pure_gpu"] ) @pytest.mark.parametrize("use_ddp", [False, True]) @pytest.mark.parametrize("use_mask", [False, True]) def test_neighbor_nonuniform(idtype, mode, use_ddp, use_mask): if mode != "cpu" and F.ctx() == F.cpu(): pytest.skip("UVA and GPU sampling require a GPU.") if mode != "cpu" and use_mask: pytest.skip("Masked sampling only works on CPU.") if use_ddp: if os.name == "nt": pytest.skip("PyTorch 1.13.0+ has problems in Windows DDP...") dist.init_process_group( "gloo" if F.ctx() == F.cpu() else "nccl", "tcp://127.0.0.1:12347", world_size=1, rank=0, ) g = dgl.graph(([1, 2, 3, 4, 5, 6, 7, 8], [0, 0, 0, 0, 1, 1, 1, 1])).astype( idtype ) g.edata["p"] = torch.FloatTensor([1, 1, 0, 0, 1, 1, 0, 0]) g.edata["mask"] = g.edata["p"] != 0 if mode in ("cpu", "uva_cpu_indices"): indices = F.copy_to(F.tensor([0, 1], idtype), F.cpu()) else: indices = F.copy_to(F.tensor([0, 1], idtype), F.cuda()) if mode == "pure_gpu": g = g.to(F.cuda()) use_uva = mode.startswith("uva") if use_mask: prob, mask = None, "mask" else: prob, mask = "p", None sampler = dgl.dataloading.MultiLayerNeighborSampler( [2], prob=prob, mask=mask ) for num_workers in [0, 1, 2] if mode == "cpu" else [0]: dataloader = dgl.dataloading.DataLoader( g, indices, sampler, batch_size=1, device=F.ctx(), num_workers=num_workers, use_uva=use_uva, use_ddp=use_ddp, ) for input_nodes, output_nodes, blocks in dataloader: seed = output_nodes.item() neighbors = set(input_nodes[1:].cpu().numpy()) if seed == 1: assert neighbors == {5, 6} elif seed == 0: assert neighbors == {1, 2} g = dgl.heterograph( { ("B", "BA", "A"): ( [1, 2, 3, 4, 5, 6, 7, 8], [0, 0, 0, 0, 1, 1, 1, 1], ), ("C", "CA", "A"): ( [1, 2, 3, 4, 5, 6, 7, 8], [0, 0, 0, 0, 1, 1, 1, 1], ), } ).astype(idtype) g.edges["BA"].data["p"] = torch.FloatTensor([1, 1, 0, 0, 1, 1, 0, 0]) g.edges["BA"].data["mask"] = g.edges["BA"].data["p"] != 0 g.edges["CA"].data["p"] = torch.FloatTensor([0, 0, 1, 1, 0, 0, 1, 1]) g.edges["CA"].data["mask"] = g.edges["CA"].data["p"] != 0 if mode == "pure_gpu": g = g.to(F.cuda()) for num_workers in [0, 1, 2] if mode == "cpu" else [0]: dataloader = dgl.dataloading.DataLoader( g, {"A": indices}, sampler, batch_size=1, device=F.ctx(), num_workers=num_workers, use_uva=use_uva, use_ddp=use_ddp, ) for input_nodes, output_nodes, blocks in dataloader: seed = output_nodes["A"].item() # Seed and neighbors are of different node types so slicing is not necessary here. neighbors = set(input_nodes["B"].cpu().numpy()) if seed == 1: assert neighbors == {5, 6} elif seed == 0: assert neighbors == {1, 2} neighbors = set(input_nodes["C"].cpu().numpy()) if seed == 1: assert neighbors == {7, 8} elif seed == 0: assert neighbors == {3, 4} if use_ddp: dist.destroy_process_group() def _check_dtype(data, dtype, attr_name): if isinstance(data, dict): for k, v in data.items(): assert getattr(v, attr_name) == dtype elif isinstance(data, list): for v in data: assert getattr(v, attr_name) == dtype else: assert getattr(data, attr_name) == dtype def _check_device(data): if isinstance(data, dict): for k, v in data.items(): assert v.device == F.ctx() elif isinstance(data, list): for v in data: assert v.device == F.ctx() else: assert data.device == F.ctx() @pytest.mark.parametrize("sampler_name", ["full", "neighbor"]) @pytest.mark.parametrize( "mode", ["cpu", "uva_cuda_indices", "uva_cpu_indices", "pure_gpu"] ) @pytest.mark.parametrize("nprocs", [1, 4]) @pytest.mark.parametrize("drop_last", [True, False]) def test_ddp_dataloader_decompose_dataset( sampler_name, mode, nprocs, drop_last ): if torch.cuda.device_count() < nprocs and mode != "cpu": pytest.skip( "DDP dataloader needs sufficient GPUs for UVA and GPU sampling." ) if mode != "cpu" and F.ctx() == F.cpu(): pytest.skip("UVA and GPU sampling require a GPU.") if os.name == "nt": pytest.skip("PyTorch 1.13.0+ has problems in Windows DDP...") g, _, _, _ = _create_homogeneous() g = g.to(F.cpu()) sampler = { "full": dgl.dataloading.MultiLayerFullNeighborSampler(2), "neighbor": dgl.dataloading.MultiLayerNeighborSampler([3, 3]), }[sampler_name] indices = F.copy_to(F.arange(0, g.num_nodes()), F.cpu()) data = indices, sampler arguments = mode, drop_last g.create_formats_() os.environ["OMP_NUM_THREADS"] = str(mp.cpu_count() // 2 // nprocs) mp.spawn(_ddp_runner, args=(nprocs, g, data, arguments), nprocs=nprocs) def _ddp_runner(proc_id, nprocs, g, data, args): mode, drop_last = args indices, sampler = data if mode == "cpu": device = torch.device("cpu") else: device = torch.device(proc_id) torch.cuda.set_device(device) if mode == "pure_gpu": g = g.to(F.cuda()) if mode in ("cpu", "uva_cpu_indices"): indices = indices.cpu() else: indices = indices.cuda() dist.init_process_group( "nccl" if mode != "cpu" else "gloo", "tcp://127.0.0.1:12347", world_size=nprocs, rank=proc_id, ) use_uva = mode.startswith("uva") batch_size = g.num_nodes() shuffle = False for num_workers in [1, 4] if mode == "cpu" else [0]: dataloader = dgl.dataloading.DataLoader( g, indices, sampler, device=device, batch_size=batch_size, # g1.num_nodes(), num_workers=num_workers, use_uva=use_uva, use_ddp=True, drop_last=drop_last, shuffle=shuffle, ) max_nid = [0] for i, (input_nodes, output_nodes, blocks) in enumerate(dataloader): block = blocks[-1] o_src, o_dst = block.edges() src_nodes_id = block.srcdata[dgl.NID][o_src] dst_nodes_id = block.dstdata[dgl.NID][o_dst] max_nid.append(np.max(dst_nodes_id.cpu().numpy())) local_max = torch.tensor(np.max(max_nid)) if torch.distributed.get_backend() == "nccl": local_max = local_max.cuda() dist.reduce(local_max, 0, op=dist.ReduceOp.MAX) if proc_id == 0: if drop_last and not shuffle and local_max > 0: assert ( local_max.item() == len(indices) - len(indices) % nprocs - 1 - (len(indices) // nprocs) % batch_size ) elif not drop_last: assert local_max == len(indices) - 1 dist.destroy_process_group() @parametrize_idtype @pytest.mark.parametrize( "sampler_name", ["full", "neighbor", "neighbor2", "labor"] ) @pytest.mark.parametrize( "mode", ["cpu", "uva_cuda_indices", "uva_cpu_indices", "pure_gpu"] ) @pytest.mark.parametrize("use_ddp", [False, True]) def test_node_dataloader(idtype, sampler_name, mode, use_ddp): if mode != "cpu" and F.ctx() == F.cpu(): pytest.skip("UVA and GPU sampling require a GPU.") if use_ddp: if os.name == "nt": pytest.skip("PyTorch 1.13.0+ has problems in Windows DDP...") dist.init_process_group( "gloo" if F.ctx() == F.cpu() else "nccl", "tcp://127.0.0.1:12347", world_size=1, rank=0, ) g1 = dgl.graph(([0, 0, 0, 1, 1], [1, 2, 3, 3, 4])).astype(idtype) g1.ndata["feat"] = F.copy_to(F.randn((5, 8)), F.cpu()) g1.ndata["label"] = F.copy_to(F.randn((g1.num_nodes(),)), F.cpu()) if mode in ("cpu", "uva_cpu_indices"): indices = F.copy_to(F.arange(0, g1.num_nodes(), idtype), F.cpu()) else: indices = F.copy_to(F.arange(0, g1.num_nodes(), idtype), F.cuda()) if mode == "pure_gpu": g1 = g1.to(F.cuda()) use_uva = mode.startswith("uva") sampler = { "full": dgl.dataloading.MultiLayerFullNeighborSampler(2), "neighbor": dgl.dataloading.MultiLayerNeighborSampler([3, 3]), "neighbor2": dgl.dataloading.MultiLayerNeighborSampler([3, 3]), "labor": dgl.dataloading.LaborSampler([3, 3]), }[sampler_name] for num_workers in [0, 1, 2] if mode == "cpu" else [0]: dataloader = dgl.dataloading.DataLoader( g1, indices, sampler, device=F.ctx(), batch_size=g1.num_nodes(), num_workers=num_workers, use_uva=use_uva, use_ddp=use_ddp, ) for input_nodes, output_nodes, blocks in dataloader: _check_device(input_nodes) _check_device(output_nodes) _check_device(blocks) _check_dtype(input_nodes, idtype, "dtype") _check_dtype(output_nodes, idtype, "dtype") _check_dtype(blocks, idtype, "idtype") g2 = dgl.heterograph( { ("user", "follow", "user"): ( [0, 0, 0, 1, 1, 1, 2], [1, 2, 3, 0, 2, 3, 0], ), ("user", "followed-by", "user"): ( [1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2], ), ("user", "play", "game"): ([0, 1, 1, 3, 5], [0, 1, 2, 0, 2]), ("game", "played-by", "user"): ([0, 1, 2, 0, 2], [0, 1, 1, 3, 5]), } ).astype(idtype) for ntype in g2.ntypes: g2.nodes[ntype].data["feat"] = F.copy_to( F.randn((g2.num_nodes(ntype), 8)), F.cpu() ) if mode in ("cpu", "uva_cpu_indices"): indices = {nty: F.copy_to(g2.nodes(nty), F.cpu()) for nty in g2.ntypes} else: indices = {nty: F.copy_to(g2.nodes(nty), F.cuda()) for nty in g2.ntypes} if mode == "pure_gpu": g2 = g2.to(F.cuda()) batch_size = max(g2.num_nodes(nty) for nty in g2.ntypes) sampler = { "full": dgl.dataloading.MultiLayerFullNeighborSampler(2), "neighbor": dgl.dataloading.MultiLayerNeighborSampler( [{etype: 3 for etype in g2.etypes}] * 2 ), "neighbor2": dgl.dataloading.MultiLayerNeighborSampler([3, 3]), "labor": dgl.dataloading.LaborSampler([3, 3]), }[sampler_name] for num_workers in [0, 1, 2] if mode == "cpu" else [0]: dataloader = dgl.dataloading.DataLoader( g2, indices, sampler, device=F.ctx(), batch_size=batch_size, num_workers=num_workers, use_uva=use_uva, use_ddp=use_ddp, ) assert isinstance(iter(dataloader), Iterator) for input_nodes, output_nodes, blocks in dataloader: _check_device(input_nodes) _check_device(output_nodes) _check_device(blocks) _check_dtype(input_nodes, idtype, "dtype") _check_dtype(output_nodes, idtype, "dtype") _check_dtype(blocks, idtype, "idtype") if use_ddp: dist.destroy_process_group() @parametrize_idtype @pytest.mark.parametrize("sampler_name", ["full", "neighbor"]) @pytest.mark.parametrize( "neg_sampler", [ dgl.dataloading.negative_sampler.Uniform(2), dgl.dataloading.negative_sampler.GlobalUniform(15, False, 3), dgl.dataloading.negative_sampler.GlobalUniform(15, True, 3), ], ) @pytest.mark.parametrize("mode", ["cpu", "uva", "pure_gpu"]) @pytest.mark.parametrize("use_ddp", [False, True]) def test_edge_dataloader(idtype, sampler_name, neg_sampler, mode, use_ddp): if mode != "cpu" and F.ctx() == F.cpu(): pytest.skip("UVA and GPU sampling require a GPU.") if mode == "uva" and isinstance( neg_sampler, dgl.dataloading.negative_sampler.GlobalUniform ): pytest.skip("GlobalUniform don't support UVA yet.") if use_ddp: if os.name == "nt": pytest.skip("PyTorch 1.13.0+ has problems in Windows DDP...") dist.init_process_group( "gloo" if F.ctx() == F.cpu() else "nccl", "tcp://127.0.0.1:12347", world_size=1, rank=0, ) g1 = dgl.graph(([0, 0, 0, 1, 1], [1, 2, 3, 3, 4])).astype(idtype) g1.ndata["feat"] = F.copy_to(F.randn((5, 8)), F.cpu()) if mode == "pure_gpu": g1 = g1.to(F.cuda()) sampler = { "full": dgl.dataloading.MultiLayerFullNeighborSampler(2), "neighbor": dgl.dataloading.MultiLayerNeighborSampler([3, 3]), }[sampler_name] # no negative sampler edge_sampler = dgl.dataloading.as_edge_prediction_sampler(sampler) dataloader = dgl.dataloading.DataLoader( g1, g1.edges(form="eid"), edge_sampler, device=F.ctx(), batch_size=g1.num_edges(), use_uva=(mode == "uva"), use_ddp=use_ddp, ) for input_nodes, pos_pair_graph, blocks in dataloader: _check_device(input_nodes) _check_device(pos_pair_graph) _check_device(blocks) # negative sampler edge_sampler = dgl.dataloading.as_edge_prediction_sampler( sampler, negative_sampler=neg_sampler ) dataloader = dgl.dataloading.DataLoader( g1, g1.edges(form="eid"), edge_sampler, device=F.ctx(), batch_size=g1.num_edges(), use_uva=(mode == "uva"), use_ddp=use_ddp, ) for input_nodes, pos_pair_graph, neg_pair_graph, blocks in dataloader: _check_device(input_nodes) _check_device(pos_pair_graph) _check_device(neg_pair_graph) _check_device(blocks) g2 = dgl.heterograph( { ("user", "follow", "user"): ( [0, 0, 0, 1, 1, 1, 2], [1, 2, 3, 0, 2, 3, 0], ), ("user", "followed-by", "user"): ( [1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2], ), ("user", "play", "game"): ([0, 1, 1, 3, 5], [0, 1, 2, 0, 2]), ("game", "played-by", "user"): ([0, 1, 2, 0, 2], [0, 1, 1, 3, 5]), } ).astype(idtype) for ntype in g2.ntypes: g2.nodes[ntype].data["feat"] = F.copy_to( F.randn((g2.num_nodes(ntype), 8)), F.cpu() ) if mode == "pure_gpu": g2 = g2.to(F.cuda()) batch_size = max(g2.num_edges(ety) for ety in g2.canonical_etypes) sampler = { "full": dgl.dataloading.MultiLayerFullNeighborSampler(2), "neighbor": dgl.dataloading.MultiLayerNeighborSampler( [{etype: 3 for etype in g2.etypes}] * 2 ), }[sampler_name] # no negative sampler edge_sampler = dgl.dataloading.as_edge_prediction_sampler(sampler) dataloader = dgl.dataloading.DataLoader( g2, {ety: g2.edges(form="eid", etype=ety) for ety in g2.canonical_etypes}, edge_sampler, device=F.ctx(), batch_size=batch_size, use_uva=(mode == "uva"), use_ddp=use_ddp, ) for input_nodes, pos_pair_graph, blocks in dataloader: _check_device(input_nodes) _check_device(pos_pair_graph) _check_device(blocks) # negative sampler edge_sampler = dgl.dataloading.as_edge_prediction_sampler( sampler, negative_sampler=neg_sampler ) dataloader = dgl.dataloading.DataLoader( g2, {ety: g2.edges(form="eid", etype=ety) for ety in g2.canonical_etypes}, edge_sampler, device=F.ctx(), batch_size=batch_size, use_uva=(mode == "uva"), use_ddp=use_ddp, ) assert isinstance(iter(dataloader), Iterator) for input_nodes, pos_pair_graph, neg_pair_graph, blocks in dataloader: _check_device(input_nodes) _check_device(pos_pair_graph) _check_device(neg_pair_graph) _check_device(blocks) if use_ddp: dist.destroy_process_group() def _create_homogeneous(): s = torch.randint(0, 200, (1000,), device=F.ctx()) d = torch.randint(0, 200, (1000,), device=F.ctx()) src = torch.cat([s, d]) dst = torch.cat([d, s]) g = dgl.graph((s, d), num_nodes=200) reverse_eids = torch.cat( [torch.arange(1000, 2000), torch.arange(0, 1000)] ).to(F.ctx()) always_exclude = torch.randint(0, 1000, (50,), device=F.ctx()) seed_edges = torch.arange(0, 1000, device=F.ctx()) return g, reverse_eids, always_exclude, seed_edges def _create_heterogeneous(): edges = {} for utype, etype, vtype in [("A", "AA", "A"), ("A", "AB", "B")]: s = torch.randint(0, 200, (1000,), device=F.ctx()) d = torch.randint(0, 200, (1000,), device=F.ctx()) edges[utype, etype, vtype] = (s, d) edges[vtype, "rev-" + etype, utype] = (d, s) g = dgl.heterograph(edges, num_nodes_dict={"A": 200, "B": 200}) reverse_etypes = { "AA": "rev-AA", "AB": "rev-AB", "rev-AA": "AA", "rev-AB": "AB", } always_exclude = { "AA": torch.randint(0, 1000, (50,), device=F.ctx()), "AB": torch.randint(0, 1000, (50,), device=F.ctx()), } seed_edges = { "AA": torch.arange(0, 1000, device=F.ctx()), "AB": torch.arange(0, 1000, device=F.ctx()), } return g, reverse_etypes, always_exclude, seed_edges def _remove_duplicates(s, d): s, d = list(zip(*list(set(zip(s.tolist(), d.tolist()))))) return torch.tensor(s, device=F.ctx()), torch.tensor(d, device=F.ctx()) def _find_edges_to_exclude(g, exclude, always_exclude, pair_eids): if exclude == None: return always_exclude elif exclude == "self": return ( torch.cat([pair_eids, always_exclude]) if always_exclude is not None else pair_eids ) elif exclude == "reverse_id": pair_eids = torch.cat([pair_eids, pair_eids + 1000]) return ( torch.cat([pair_eids, always_exclude]) if always_exclude is not None else pair_eids ) elif exclude == "reverse_types": pair_eids = {g.to_canonical_etype(k): v for k, v in pair_eids.items()} if ("A", "AA", "A") in pair_eids: pair_eids[("A", "rev-AA", "A")] = pair_eids[("A", "AA", "A")] if ("A", "AB", "B") in pair_eids: pair_eids[("B", "rev-AB", "A")] = pair_eids[("A", "AB", "B")] if always_exclude is not None: always_exclude = { g.to_canonical_etype(k): v for k, v in always_exclude.items() } for k in always_exclude.keys(): if k in pair_eids: pair_eids[k] = torch.cat([pair_eids[k], always_exclude[k]]) else: pair_eids[k] = always_exclude[k] return pair_eids @pytest.mark.parametrize("always_exclude_flag", [False, True]) @pytest.mark.parametrize( "exclude", [None, "self", "reverse_id", "reverse_types"] ) @pytest.mark.parametrize( "sampler", [ dgl.dataloading.MultiLayerFullNeighborSampler(1), dgl.dataloading.ShaDowKHopSampler([5]), ], ) @pytest.mark.parametrize("batch_size", [1, 50]) def test_edge_dataloader_excludes( exclude, always_exclude_flag, batch_size, sampler ): if exclude == "reverse_types": g, reverse_etypes, always_exclude, seed_edges = _create_heterogeneous() else: g, reverse_eids, always_exclude, seed_edges = _create_homogeneous() g = g.to(F.ctx()) if not always_exclude_flag: always_exclude = None kwargs = {} kwargs["exclude"] = ( partial(_find_edges_to_exclude, g, exclude, always_exclude) if always_exclude_flag else exclude ) kwargs["reverse_eids"] = reverse_eids if exclude == "reverse_id" else None kwargs["reverse_etypes"] = ( reverse_etypes if exclude == "reverse_types" else None ) sampler = dgl.dataloading.as_edge_prediction_sampler(sampler, **kwargs) dataloader = dgl.dataloading.DataLoader( g, seed_edges, sampler, batch_size=batch_size, device=F.ctx(), use_prefetch_thread=False, ) for i, (input_nodes, pair_graph, blocks) in enumerate(dataloader): if isinstance(blocks, list): subg = blocks[0] else: subg = blocks pair_eids = pair_graph.edata[dgl.EID] block_eids = subg.edata[dgl.EID] edges_to_exclude = _find_edges_to_exclude( g, exclude, always_exclude, pair_eids ) if edges_to_exclude is None: continue edges_to_exclude = dgl.utils.recursive_apply( edges_to_exclude, lambda x: x.cpu().numpy() ) block_eids = dgl.utils.recursive_apply( block_eids, lambda x: x.cpu().numpy() ) if isinstance(edges_to_exclude, Mapping): for k in edges_to_exclude.keys(): assert not np.isin(edges_to_exclude[k], block_eids[k]).any() else: assert not np.isin(edges_to_exclude, block_eids).any() if i == 10: break def test_edge_dataloader_exclusion_with_reverse_seed_nodes(): utype, etype, vtype = ("A", "AB", "B") s = torch.randint(0, 20, (500,), device=F.ctx()) d = torch.randint(0, 20, (500,), device=F.ctx()) s, d = _remove_duplicates(s, d) g = dgl.heterograph({("A", "AB", "B"): (s, d), ("B", "BA", "A"): (d, s)}) sampler = dgl.dataloading.as_edge_prediction_sampler( dgl.dataloading.NeighborSampler(fanouts=[2, 2, 2]), exclude="reverse_types", reverse_etypes={"AB": "BA", "BA": "AB"}, ) seed_edges = { "AB": torch.arange(g.number_of_edges("AB"), device=F.ctx()), "BA": torch.arange(g.number_of_edges("BA"), device=F.ctx()), } dataloader = dgl.dataloading.DataLoader( g, seed_edges, sampler, batch_size=2, device=F.ctx(), shuffle=True, drop_last=False, ) for _, pos_graph, mfgs in dataloader: s, d = pos_graph["AB"].edges() AB_pos = list(zip(s.tolist(), d.tolist())) s, d = pos_graph["BA"].edges() BA_pos = list(zip(s.tolist(), d.tolist())) s, d = mfgs[-1]["AB"].edges() AB_mfg = list(zip(s.tolist(), d.tolist())) s, d = mfgs[-1]["BA"].edges() BA_mfg = list(zip(s.tolist(), d.tolist())) assert all(edge not in AB_mfg for edge in AB_pos) assert all(edge not in BA_mfg for edge in BA_pos) def test_edge_dataloader_exclusion_without_all_reverses(): data_dict = { ("A", "AB", "B"): (torch.tensor([0, 1]), torch.tensor([0, 1])), ("B", "BA", "A"): (torch.tensor([0, 1]), torch.tensor([0, 1])), ("B", "BC", "C"): (torch.tensor([0]), torch.tensor([0])), ("C", "CA", "A"): (torch.tensor([0, 1]), torch.tensor([0, 1])), } g = dgl.heterograph(data_dict=data_dict) block_sampler = dgl.dataloading.MultiLayerNeighborSampler( fanouts=[1], replace=True ) block_sampler = dgl.dataloading.as_edge_prediction_sampler( block_sampler, exclude="reverse_types", reverse_etypes={"AB": "BA"}, ) d = dgl.dataloading.DataLoader( graph=g, indices={ "AB": torch.tensor([0]), "BC": torch.tensor([0]), }, graph_sampler=block_sampler, batch_size=2, shuffle=True, drop_last=False, num_workers=0, device=F.ctx(), use_ddp=False, ) next(iter(d)) def dummy_worker_init_fn(worker_id): pass def test_dataloader_worker_init_fn(): dataset = dgl.data.CoraFullDataset() g = dataset[0] sampler = dgl.dataloading.MultiLayerNeighborSampler([2]) dataloader = dgl.dataloading.DataLoader( g, torch.arange(100), sampler, batch_size=4, num_workers=4, worker_init_fn=dummy_worker_init_fn, ) for _ in dataloader: pass def test_distributed_dataloaders(): # Test distributed dataloaders could be successfully imported. try: from dgl.dataloading import ( DistDataLoader, DistEdgeDataLoader, DistNodeDataLoader, EdgeCollator, NodeCollator, ) except ImportError: pytest.fail("Distributed DataLoader from dataloading import failed") try: from dgl.distributed import ( DistDataLoader, DistEdgeDataLoader, DistNodeDataLoader, EdgeCollator, NodeCollator, ) except ImportError: pytest.fail("Distributed DataLoader from dataloading import failed") if __name__ == "__main__": # test_node_dataloader(F.int32, 'neighbor', None) test_edge_dataloader_excludes( "reverse_types", False, 1, dgl.dataloading.ShaDowKHopSampler([5]) ) test_edge_dataloader_exclusion_without_all_reverses() ================================================ FILE: tests/python/pytorch/dataloading/test_spot_target.py ================================================ from collections.abc import Mapping import dgl import numpy as np import pytest import torch def _create_homogeneous(): s = torch.randint(0, 200, (1000,)) d = torch.randint(0, 200, (1000,)) g = dgl.graph((s, d), num_nodes=200) reverse_eids = torch.cat([torch.arange(1000, 2000), torch.arange(0, 1000)]) seed_edges = torch.arange(0, 1000) return g, reverse_eids, seed_edges def _find_edges_to_exclude(g, pair_eids, degree_threshold): src, dst = g.find_edges(pair_eids) head_degree = g.in_degrees(src) tail_degree = g.in_degrees(dst) degree = torch.min(head_degree, tail_degree) degree_mask = degree < degree_threshold low_degree_pair_eids = pair_eids[degree_mask] low_degree_pair_eids = torch.cat( [low_degree_pair_eids, low_degree_pair_eids + 1000] ) return low_degree_pair_eids @pytest.mark.parametrize("degree_threshold", [1, 2, 3, 4, 5]) @pytest.mark.parametrize("batch_size", [1, 10, 50]) def test_spot_target_excludes(degree_threshold, batch_size): g, reverse_eids, seed_edges = _create_homogeneous() sampler = dgl.dataloading.MultiLayerFullNeighborSampler(1) low_degree_excluder = dgl.dataloading.SpotTarget( g, exclude="reverse_id", degree_threshold=degree_threshold, reverse_eids=reverse_eids, ) sampler = dgl.dataloading.as_edge_prediction_sampler( sampler, exclude=low_degree_excluder, negative_sampler=dgl.dataloading.negative_sampler.Uniform(1), ) dataloader = dgl.dataloading.DataLoader( g, seed_edges, sampler, batch_size=batch_size ) for i, (input_nodes, pair_graph, neg_pair_graph, blocks) in enumerate( dataloader ): if isinstance(blocks, list): subg = blocks[0] else: subg = blocks pair_eids = pair_graph.edata[dgl.EID] block_eids = subg.edata[dgl.EID] edges_to_exclude = _find_edges_to_exclude( g, pair_eids, degree_threshold ) if edges_to_exclude is None: continue edges_to_exclude = dgl.utils.recursive_apply( edges_to_exclude, lambda x: x.cpu().numpy() ) block_eids = dgl.utils.recursive_apply( block_eids, lambda x: x.cpu().numpy() ) if isinstance(edges_to_exclude, Mapping): for k in edges_to_exclude.keys(): assert not np.isin(edges_to_exclude[k], block_eids[k]).any() else: assert not np.isin(edges_to_exclude, block_eids).any() if i == 10: break if __name__ == "__main__": test_spot_target_excludes(degree_threshold=2, batch_size=10) ================================================ FILE: tests/python/pytorch/distributed/optim/test_dist_optim.py ================================================ import os os.environ["OMP_NUM_THREADS"] = "1" import multiprocessing as mp import pickle import random import socket import sys import time import unittest import backend as F import dgl import numpy as np import torch as th from dgl import function as fn from dgl.distributed import ( DistEmbedding, DistGraph, DistGraphServer, load_partition_book, partition_graph, ) from dgl.distributed.optim import SparseAdagrad, SparseAdam from scipy import sparse as spsp # Set seeds to make tests fully reproducible. SEED = 12345 # random.randint(1, 99999) F.seed(SEED) def create_random_graph(n): arr = ( spsp.random(n, n, density=0.001, format="coo", random_state=100) != 0 ).astype(np.int64) return dgl.from_scipy(arr) def get_local_usable_addr(): """Get local usable IP and port Returns ------- str IP address, e.g., '192.168.8.12:50051' """ sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) try: # doesn't even have to be reachable sock.connect(("10.255.255.255", 1)) ip_addr = sock.getsockname()[0] except ValueError: ip_addr = "127.0.0.1" finally: sock.close() sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.bind(("", 0)) sock.listen(1) port = sock.getsockname()[1] sock.close() return ip_addr + " " + str(port) def prepare_dist(): ip_config = open("optim_ip_config.txt", "w") ip_addr = get_local_usable_addr() ip_config.write("{}\n".format(ip_addr)) ip_config.close() def run_server(graph_name, server_id, server_count, num_clients, shared_mem): g = DistGraphServer( server_id, "optim_ip_config.txt", num_clients, server_count, "/tmp/dist_graph/{}.json".format(graph_name), disable_shared_mem=not shared_mem, ) print("start server", server_id) g.start() def initializer(shape, dtype): arr = th.zeros(shape, dtype=dtype) th.manual_seed(0) th.nn.init.uniform_(arr, 0, 1.0) return arr def run_client(graph_name, cli_id, part_id, server_count): device = F.ctx() time.sleep(5) os.environ["DGL_NUM_SERVER"] = str(server_count) dgl.distributed.initialize("optim_ip_config.txt") gpb, graph_name, _, _ = load_partition_book( "/tmp/dist_graph/{}.json".format(graph_name), part_id ) g = DistGraph(graph_name, gpb=gpb) policy = dgl.distributed.PartitionPolicy("node", g.get_partition_book()) num_nodes = g.num_nodes() emb_dim = 4 dgl_emb = DistEmbedding( num_nodes, emb_dim, name="optim", init_func=initializer, part_policy=policy, ) dgl_emb_zero = DistEmbedding( num_nodes, emb_dim, name="optim-zero", init_func=initializer, part_policy=policy, ) dgl_adam = SparseAdam(params=[dgl_emb, dgl_emb_zero], lr=0.01) dgl_adam._world_size = 1 dgl_adam._rank = 0 torch_emb = th.nn.Embedding(num_nodes, emb_dim, sparse=True) torch_emb_zero = th.nn.Embedding(num_nodes, emb_dim, sparse=True) th.manual_seed(0) th.nn.init.uniform_(torch_emb.weight, 0, 1.0) th.manual_seed(0) th.nn.init.uniform_(torch_emb_zero.weight, 0, 1.0) torch_adam = th.optim.SparseAdam( list(torch_emb.parameters()) + list(torch_emb_zero.parameters()), lr=0.01, ) labels = th.ones((4,)).long() idx = th.randint(0, num_nodes, size=(4,)) dgl_value = dgl_emb(idx, device).to(th.device("cpu")) torch_value = torch_emb(idx) torch_adam.zero_grad() torch_loss = th.nn.functional.cross_entropy(torch_value, labels) torch_loss.backward() torch_adam.step() dgl_adam.zero_grad() dgl_loss = th.nn.functional.cross_entropy(dgl_value, labels) dgl_loss.backward() dgl_adam.step() assert F.allclose( dgl_emb.weight[0 : num_nodes // 2], torch_emb.weight[0 : num_nodes // 2] ) def check_sparse_adam(num_trainer=1, shared_mem=True): prepare_dist() g = create_random_graph(2000) num_servers = num_trainer num_clients = num_trainer num_parts = 1 graph_name = "dist_graph_test" partition_graph(g, graph_name, num_parts, "/tmp/dist_graph") # let's just test on one partition for now. # We cannot run multiple servers and clients on the same machine. serv_ps = [] ctx = mp.get_context("spawn") for serv_id in range(num_servers): p = ctx.Process( target=run_server, args=(graph_name, serv_id, num_servers, num_clients, shared_mem), ) serv_ps.append(p) p.start() cli_ps = [] for cli_id in range(num_clients): print("start client", cli_id) p = ctx.Process( target=run_client, args=(graph_name, cli_id, 0, num_servers) ) p.start() cli_ps.append(p) for p in cli_ps: p.join() for p in serv_ps: p.join() @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") def test_sparse_opt(): os.environ["DGL_DIST_MODE"] = "distributed" check_sparse_adam(1, True) check_sparse_adam(1, False) if __name__ == "__main__": os.makedirs("/tmp/dist_graph", exist_ok=True) test_sparse_opt() ================================================ FILE: tests/python/pytorch/geometry/test_geometry.py ================================================ import backend as F import dgl import dgl.nn import numpy as np import pytest import torch as th from dgl import DGLError from dgl.base import DGLWarning from dgl.geometry import farthest_point_sampler, neighbor_matching from utils import parametrize_idtype from utils.graph_cases import get_cases def test_fps(): N = 1000 batch_size = 5 sample_points = 10 x = th.tensor(np.random.uniform(size=(batch_size, int(N / batch_size), 3))) ctx = F.ctx() if F.gpu_ctx(): x = x.to(ctx) res = farthest_point_sampler(x, sample_points) assert res.shape[0] == batch_size assert res.shape[1] == sample_points assert res.sum() > 0 def test_fps_start_idx(): N = 1000 batch_size = 5 sample_points = 10 x = th.tensor(np.random.uniform(size=(batch_size, int(N / batch_size), 3))) ctx = F.ctx() if F.gpu_ctx(): x = x.to(ctx) res = farthest_point_sampler(x, sample_points, start_idx=0) assert th.any(res[:, 0] == 0) def _test_knn_common(device, algorithm, dist, exclude_self): x = th.randn(8, 3).to(device) kg = dgl.nn.KNNGraph(3) if dist == "euclidean": d = th.cdist(x, x).to(F.cpu()) else: x = x + th.randn(1).item() tmp_x = x / (1e-5 + F.sqrt(F.sum(x * x, dim=1, keepdims=True))) d = 1 - F.matmul(tmp_x, tmp_x.T).to(F.cpu()) def check_knn(g, x, start, end, k, exclude_self, check_indices=True): assert g.device == x.device g = g.to(F.cpu()) for v in range(start, end): src, _ = g.in_edges(v) src = set(src.numpy()) assert len(src) == k if check_indices: i = v - start src_ans = set( th.topk( d[start:end, start:end][i], k + (1 if exclude_self else 0), largest=False, )[1].numpy() + start ) if exclude_self: # remove self src_ans.remove(v) assert src == src_ans def check_batch(g, k, expected_batch_info): assert F.array_equal(g.batch_num_nodes(), F.tensor(expected_batch_info)) assert F.array_equal( g.batch_num_edges(), k * F.tensor(expected_batch_info) ) # check knn with 2d input g = kg(x, algorithm, dist, exclude_self) check_knn(g, x, 0, 8, 3, exclude_self) check_batch(g, 3, [8]) # check knn with 3d input g = kg(x.view(2, 4, 3), algorithm, dist, exclude_self) check_knn(g, x, 0, 4, 3, exclude_self) check_knn(g, x, 4, 8, 3, exclude_self) check_batch(g, 3, [4, 4]) # check segmented knn # there are only 2 edges per node possible when exclude_self with 3 nodes in the segment # and this test case isn't supposed to warn, so limit it when exclude_self is True adjusted_k = 3 - (1 if exclude_self else 0) kg = dgl.nn.SegmentedKNNGraph(adjusted_k) g = kg(x, [3, 5], algorithm, dist, exclude_self) check_knn(g, x, 0, 3, adjusted_k, exclude_self) check_knn(g, x, 3, 8, adjusted_k, exclude_self) check_batch(g, adjusted_k, [3, 5]) # check k > num_points kg = dgl.nn.KNNGraph(10) with pytest.warns(DGLWarning): g = kg(x, algorithm, dist, exclude_self) # there are only 7 edges per node possible when exclude_self with 8 nodes total adjusted_k = 8 - (1 if exclude_self else 0) check_knn(g, x, 0, 8, adjusted_k, exclude_self) check_batch(g, adjusted_k, [8]) with pytest.warns(DGLWarning): g = kg(x.view(2, 4, 3), algorithm, dist, exclude_self) # there are only 3 edges per node possible when exclude_self with 4 nodes per segment adjusted_k = 4 - (1 if exclude_self else 0) check_knn(g, x, 0, 4, adjusted_k, exclude_self) check_knn(g, x, 4, 8, adjusted_k, exclude_self) check_batch(g, adjusted_k, [4, 4]) kg = dgl.nn.SegmentedKNNGraph(5) with pytest.warns(DGLWarning): g = kg(x, [3, 5], algorithm, dist, exclude_self) # there are only 2 edges per node possible when exclude_self in the segment with # only 3 nodes, and the current implementation reduces k for all segments # in that case adjusted_k = 3 - (1 if exclude_self else 0) check_knn(g, x, 0, 3, adjusted_k, exclude_self) check_knn(g, x, 3, 8, adjusted_k, exclude_self) check_batch(g, adjusted_k, [3, 5]) # check k == 0 # that's valid for exclude_self, but -1 is not, so check -1 instead for exclude_self adjusted_k = 0 - (1 if exclude_self else 0) kg = dgl.nn.KNNGraph(adjusted_k) with pytest.raises(DGLError): g = kg(x, algorithm, dist, exclude_self) kg = dgl.nn.SegmentedKNNGraph(adjusted_k) with pytest.raises(DGLError): g = kg(x, [3, 5], algorithm, dist, exclude_self) # check empty x_empty = th.tensor([]) kg = dgl.nn.KNNGraph(3) with pytest.raises(DGLError): g = kg(x_empty, algorithm, dist, exclude_self) kg = dgl.nn.SegmentedKNNGraph(3) with pytest.raises(DGLError): g = kg(x_empty, [3, 5], algorithm, dist, exclude_self) # check all coincident points x = th.zeros((20, 3)).to(device) kg = dgl.nn.KNNGraph(3) g = kg(x, algorithm, dist, exclude_self) # different algorithms may break the tie differently, so don't check the indices check_knn(g, x, 0, 20, 3, exclude_self, False) check_batch(g, 3, [20]) # check all coincident points kg = dgl.nn.SegmentedKNNGraph(3) g = kg(x, [4, 7, 5, 4], algorithm, dist, exclude_self) # different algorithms may break the tie differently, so don't check the indices check_knn(g, x, 0, 4, 3, exclude_self, False) check_knn(g, x, 4, 11, 3, exclude_self, False) check_knn(g, x, 11, 16, 3, exclude_self, False) check_knn(g, x, 16, 20, 3, exclude_self, False) check_batch(g, 3, [4, 7, 5, 4]) @pytest.mark.parametrize( "algorithm", ["bruteforce-blas", "bruteforce", "kd-tree"] ) @pytest.mark.parametrize("dist", ["euclidean", "cosine"]) @pytest.mark.parametrize("exclude_self", [False, True]) def test_knn_cpu(algorithm, dist, exclude_self): _test_knn_common(F.cpu(), algorithm, dist, exclude_self) @pytest.mark.parametrize( "algorithm", ["bruteforce-blas", "bruteforce", "bruteforce-sharemem"] ) @pytest.mark.parametrize("dist", ["euclidean", "cosine"]) @pytest.mark.parametrize("exclude_self", [False, True]) def test_knn_cuda(algorithm, dist, exclude_self): if not th.cuda.is_available(): return _test_knn_common(F.cuda(), algorithm, dist, exclude_self) @pytest.mark.parametrize("num_points", [8, 64, 256, 1024]) def test_knn_sharedmem_large(num_points): if not th.cuda.is_available(): return x = th.randn(num_points, 5, device="cuda") y = th.randn(num_points, 5, device="cuda") k = 4 def ground_truth(x, y, k): dist = ( th.sum(x * x, dim=1) + th.sum(y * y, dim=1).unsqueeze(-1) - 2 * th.mm(y, x.T) ) ret = th.topk(dist, k, dim=-1, largest=False)[1] return th.sort(ret, dim=-1)[0] gt = ground_truth(x, y, k) actual = th.sort( dgl.functional.knn( k, x, [num_points], y, [num_points], algorithm="bruteforce-sharemem" )[1].reshape(-1, k), -1, )[0] assert th.all(actual == gt).item() @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo"], exclude=["dglgraph"])) @pytest.mark.parametrize("weight", [True, False]) @pytest.mark.parametrize("relabel", [True, False]) def test_edge_coarsening(idtype, g, weight, relabel): num_nodes = g.num_nodes() g = dgl.to_bidirected(g) g = g.astype(idtype).to(F.ctx()) edge_weight = None if weight: edge_weight = F.abs(F.randn((g.num_edges(),))).to(F.ctx()) node_labels = neighbor_matching(g, edge_weight, relabel_idx=relabel) unique_ids, counts = th.unique(node_labels, return_counts=True) num_result_ids = unique_ids.size(0) # shape correct assert node_labels.shape == (g.num_nodes(),) # all nodes marked assert F.reduce_sum(node_labels < 0).item() == 0 # number of unique node ids correct. assert num_result_ids >= num_nodes // 2 and num_result_ids <= num_nodes # each unique id has <= 2 nodes assert F.reduce_sum(counts > 2).item() == 0 # if two nodes have the same id, they must be neighbors idxs = F.arange(0, num_nodes, idtype) for l in unique_ids: l = l.item() idx = idxs[(node_labels == l)] if idx.size(0) == 2: u, v = idx[0].item(), idx[1].item() assert g.has_edges_between(u, v) if __name__ == "__main__": test_fps() test_fps_start_idx() test_knn() test_knn_sharedmem_large() ================================================ FILE: tests/python/pytorch/graphbolt/__init__.py ================================================ """ DGL graphbolt API tests""" ================================================ FILE: tests/python/pytorch/graphbolt/gb_test_utils.py ================================================ import os import dgl import dgl.graphbolt as gb import numpy as np import pandas as pd import scipy.sparse as sp import torch def rand_csc_graph(N, density, bidirection_edge=False): adj = sp.random(N, N, density) if bidirection_edge: adj = adj + adj.T adj = adj.tocsc() indptr = torch.LongTensor(adj.indptr) indices = torch.LongTensor(adj.indices) graph = gb.fused_csc_sampling_graph(indptr, indices) return graph def random_homo_graph(num_nodes, num_edges): csc_indptr = torch.randint(0, num_edges, (num_nodes + 1,)) csc_indptr = torch.sort(csc_indptr)[0] csc_indptr[0] = 0 csc_indptr[-1] = num_edges indices = torch.randint(0, num_nodes, (num_edges,)) return csc_indptr, indices def get_type_to_id(num_ntypes, num_etypes): ntypes = {f"n{i}": i for i in range(num_ntypes)} etypes = {} count = 0 for n1 in range(num_ntypes): for n2 in range(n1, num_ntypes): if count >= num_etypes: break etypes.update({f"n{n1}:e{count}:n{n2}": count}) count += 1 return ntypes, etypes def get_ntypes_and_etypes(num_nodes, num_ntypes, num_etypes): ntypes = {f"n{i}": num_nodes // num_ntypes for i in range(num_ntypes)} if num_nodes % num_ntypes != 0: ntypes["n0"] += num_nodes % num_ntypes etypes = [] count = 0 while count < num_etypes: for n1 in range(num_ntypes): for n2 in range(num_ntypes): if count >= num_etypes: break etypes.append((f"n{n1}", f"e{count}", f"n{n2}")) count += 1 return ntypes, etypes def random_hetero_graph(num_nodes, num_edges, num_ntypes, num_etypes): ntypes, etypes = get_ntypes_and_etypes(num_nodes, num_ntypes, num_etypes) edges = {} for step, etype in enumerate(etypes): src_ntype, _, dst_ntype = etype num_e = num_edges // num_etypes + ( 0 if step != 0 else num_edges % num_etypes ) if ntypes[src_ntype] == 0 or ntypes[dst_ntype] == 0: continue src = torch.randint(0, ntypes[src_ntype], (num_e,)) dst = torch.randint(0, ntypes[dst_ntype], (num_e,)) edges[etype] = (src, dst) gb_g = gb.from_dglgraph(dgl.heterograph(edges, ntypes)) return ( gb_g.csc_indptr, gb_g.indices, gb_g.node_type_offset, gb_g.type_per_edge, gb_g.node_type_to_id, gb_g.edge_type_to_id, ) def random_homo_graphbolt_graph( test_dir, dataset_name, num_nodes, num_edges, num_classes, edge_fmt="csv" ): """Generate random graphbolt version homograph""" # Generate random edges. nodes = np.repeat(np.arange(num_nodes, dtype=np.int64), 5) neighbors = np.random.randint( 0, num_nodes, size=(num_edges), dtype=np.int64 ) edges = np.stack([nodes, neighbors], axis=1) os.makedirs(os.path.join(test_dir, "edges"), exist_ok=True) assert edge_fmt in [ "numpy", "csv", ], "Only numpy and csv are supported for edges." if edge_fmt == "csv": # Write into edges/edge.csv edges_DataFrame = pd.DataFrame(edges, columns=["src", "dst"]) edge_path = os.path.join("edges", "edge.csv") edges_DataFrame.to_csv( os.path.join(test_dir, edge_path), index=False, header=False, ) else: # Write into edges/edge.npy edges = edges.T edge_path = os.path.join("edges", "edge.npy") np.save(os.path.join(test_dir, edge_path), edges) # Generate random graph edge-feats. edge_feats = np.random.rand(num_edges, num_classes) os.makedirs(os.path.join(test_dir, "data"), exist_ok=True) edge_feat_path = os.path.join("data", "edge-feat.npy") np.save(os.path.join(test_dir, edge_feat_path), edge_feats) # Generate random node-feats. if num_classes == 1: node_feats = np.random.rand(num_nodes) else: node_feats = np.random.rand(num_nodes, num_classes) node_feat_path = os.path.join("data", "node-feat.npy") np.save(os.path.join(test_dir, node_feat_path), node_feats) # Generate train/test/valid set. assert num_nodes % 4 == 0, "num_nodes must be divisible by 4" each_set_size = num_nodes // 4 os.makedirs(os.path.join(test_dir, "set"), exist_ok=True) train_pairs = ( np.arange(each_set_size), np.arange(each_set_size, 2 * each_set_size), ) train_data = np.vstack(train_pairs).T.astype(edges.dtype) train_path = os.path.join("set", "train.npy") np.save(os.path.join(test_dir, train_path), train_data) validation_pairs = ( np.arange(each_set_size, 2 * each_set_size), np.arange(2 * each_set_size, 3 * each_set_size), ) validation_data = np.vstack(validation_pairs).T.astype(edges.dtype) validation_path = os.path.join("set", "validation.npy") np.save(os.path.join(test_dir, validation_path), validation_data) test_pairs = ( np.arange(2 * each_set_size, 3 * each_set_size), np.arange(3 * each_set_size, 4 * each_set_size), ) test_data = np.vstack(test_pairs).T.astype(edges.dtype) test_path = os.path.join("set", "test.npy") np.save(os.path.join(test_dir, test_path), test_data) yaml_content = f""" dataset_name: {dataset_name} graph: # Graph structure and required attributes. nodes: - num: {num_nodes} edges: - format: {edge_fmt} path: {edge_path} feature_data: - domain: node type: null name: feat format: numpy in_memory: true path: {node_feat_path} - domain: edge type: null name: feat format: numpy in_memory: true path: {edge_feat_path} feature_data: - domain: node type: null name: feat format: numpy in_memory: true path: {node_feat_path} - domain: edge type: null name: feat format: numpy path: {edge_feat_path} tasks: - name: link_prediction num_classes: {num_classes} train_set: - type: null data: - name: seeds format: numpy in_memory: true path: {train_path} validation_set: - type: null data: - name: seeds format: numpy in_memory: true path: {validation_path} test_set: - type: null data: - name: seeds format: numpy in_memory: true path: {test_path} """ return yaml_content def generate_raw_data_for_hetero_dataset( test_dir, dataset_name, num_nodes, num_edges, num_classes, edge_fmt="csv" ): # Generate edges. edges_path = {} for etype, num_edge in num_edges.items(): src_ntype, etype_str, dst_ntype = etype src = torch.randint(0, num_nodes[src_ntype], (num_edge,)) dst = torch.randint(0, num_nodes[dst_ntype], (num_edge,)) os.makedirs(os.path.join(test_dir, "edges"), exist_ok=True) assert edge_fmt in [ "numpy", "csv", ], "Only numpy and csv are supported for edges." if edge_fmt == "csv": # Write into edges/edge.csv edges = pd.DataFrame( np.stack([src, dst], axis=1), columns=["src", "dst"] ) edge_path = os.path.join("edges", f"{etype_str}.csv") edges.to_csv( os.path.join(test_dir, edge_path), index=False, header=False, ) else: edges = np.stack([src, dst], axis=1).T edge_path = os.path.join("edges", f"{etype_str}.npy") np.save(os.path.join(test_dir, edge_path), edges) edges_path[etype_str] = edge_path # Generate node features. node_feats_path = {} os.makedirs(os.path.join(test_dir, "data"), exist_ok=True) for ntype, num_node in num_nodes.items(): node_feat_path = os.path.join("data", f"{ntype}-feat.npy") node_feats = np.random.rand(num_node, num_classes) np.save(os.path.join(test_dir, node_feat_path), node_feats) node_feats_path[ntype] = node_feat_path # Generate edge features. edge_feats_path = {} os.makedirs(os.path.join(test_dir, "data"), exist_ok=True) for etype, num_edge in num_edges.items(): src_ntype, etype_str, dst_ntype = etype edge_feat_path = os.path.join("data", f"{etype_str}-feat.npy") edge_feats = np.random.rand(num_edge, num_classes) np.save(os.path.join(test_dir, edge_feat_path), edge_feats) edge_feats_path[etype_str] = edge_feat_path # Generate train/test/valid set. os.makedirs(os.path.join(test_dir, "set"), exist_ok=True) user_ids = torch.arange(num_nodes["user"]) np.random.shuffle(user_ids.numpy()) num_train = int(num_nodes["user"] * 0.6) num_validation = int(num_nodes["user"] * 0.2) num_test = num_nodes["user"] - num_train - num_validation train_path = os.path.join("set", "train.npy") np.save(os.path.join(test_dir, train_path), user_ids[:num_train]) validation_path = os.path.join("set", "validation.npy") np.save( os.path.join(test_dir, validation_path), user_ids[num_train : num_train + num_validation], ) test_path = os.path.join("set", "test.npy") np.save( os.path.join(test_dir, test_path), user_ids[num_train + num_validation :], ) yaml_content = f""" dataset_name: {dataset_name} graph: # Graph structure and required attributes. nodes: - type: user num: {num_nodes["user"]} - type: item num: {num_nodes["item"]} edges: - type: "user:follow:user" format: {edge_fmt} path: {edges_path["follow"]} - type: "user:click:item" format: {edge_fmt} path: {edges_path["click"]} feature_data: - domain: node type: user name: feat format: numpy in_memory: true path: {node_feats_path["user"]} - domain: node type: item name: feat format: numpy in_memory: true path: {node_feats_path["item"]} - domain: edge type: "user:follow:user" name: feat format: numpy in_memory: true path: {edge_feats_path["follow"]} - domain: edge type: "user:click:item" name: feat format: numpy in_memory: true path: {edge_feats_path["click"]} feature_data: - domain: node type: user name: feat format: numpy in_memory: true path: {node_feats_path["user"]} - domain: node type: item name: feat format: numpy in_memory: true path: {node_feats_path["item"]} tasks: - name: node_classification num_classes: {num_classes} train_set: - type: user data: - name: seeds format: numpy in_memory: true path: {train_path} validation_set: - type: user data: - name: seeds format: numpy in_memory: true path: {validation_path} test_set: - type: user data: - name: seeds format: numpy in_memory: true path: {test_path} """ yaml_file = os.path.join(test_dir, "metadata.yaml") with open(yaml_file, "w") as f: f.write(yaml_content) ================================================ FILE: tests/python/pytorch/graphbolt/impl/__init__.py ================================================ """ DGL graphbolt/impl tests""" ================================================ FILE: tests/python/pytorch/graphbolt/impl/test_basic_feature_store.py ================================================ import pytest import torch from dgl import graphbolt as gb def test_basic_feature_store_homo(): a = torch.tensor([[1, 2, 4], [2, 5, 3]]) b = torch.tensor([[[1, 2], [3, 4]], [[2, 5], [4, 3]]]) metadata = {"max_value": 3} features = {} features[("node", None, "a")] = gb.TorchBasedFeature(a, metadata=metadata) features[("node", None, "b")] = gb.TorchBasedFeature(b) feature_store = gb.BasicFeatureStore(features) # Test __getitem__ to access the stored Feature. feature = feature_store[("node", None, "a")] assert isinstance(feature, gb.Feature) assert torch.equal( feature.read(), torch.tensor([[1, 2, 4], [2, 5, 3]]), ) # Test read the entire feature. assert torch.equal( feature_store.read("node", None, "a"), torch.tensor([[1, 2, 4], [2, 5, 3]]), ) assert torch.equal( feature_store.read("node", None, "b"), torch.tensor([[[1, 2], [3, 4]], [[2, 5], [4, 3]]]), ) # Test read with ids. assert torch.equal( feature_store.read("node", None, "a", torch.tensor([0])), torch.tensor([[1, 2, 4]]), ) assert torch.equal( feature_store.read("node", None, "b", torch.tensor([0])), torch.tensor([[[1, 2], [3, 4]]]), ) # Test get the size and count of the entire feature. assert feature_store.size("node", None, "a") == torch.Size([3]) assert feature_store.size("node", None, "b") == torch.Size([2, 2]) assert feature_store.count("node", None, "a") == a.size(0) assert feature_store.count("node", None, "b") == b.size(0) # Test get metadata of the feature. assert feature_store.metadata("node", None, "a") == metadata assert feature_store.metadata("node", None, "b") == {} # Test __setitem__ and __contains__ of FeatureStore. assert ("node", None, "c") not in feature_store feature_store[("node", None, "c")] = feature_store[("node", None, "a")] assert ("node", None, "c") in feature_store # Test get keys of the features. assert feature_store.keys() == [ ("node", None, "a"), ("node", None, "b"), ("node", None, "c"), ] def test_basic_feature_store_hetero(): a = torch.tensor([[1, 2, 4], [2, 5, 3]]) b = torch.tensor([[[6], [8]], [[8], [9]]]) metadata = {"max_value": 3} features = {} features[("node", "author", "a")] = gb.TorchBasedFeature( a, metadata=metadata ) features[("edge", "paper:cites", "b")] = gb.TorchBasedFeature(b) feature_store = gb.BasicFeatureStore(features) # Test __getitem__ to access the stored Feature. feature = feature_store[("node", "author", "a")] assert isinstance(feature, gb.Feature) assert torch.equal( feature.read(), torch.tensor([[1, 2, 4], [2, 5, 3]]), ) # Test read the entire feature. assert torch.equal( feature_store.read("node", "author", "a"), torch.tensor([[1, 2, 4], [2, 5, 3]]), ) assert torch.equal( feature_store.read("edge", "paper:cites", "b"), torch.tensor([[[6], [8]], [[8], [9]]]), ) # Test read with ids. assert torch.equal( feature_store.read("node", "author", "a", torch.tensor([0])), torch.tensor([[1, 2, 4]]), ) # Test get the size of the entire feature. assert feature_store.size("node", "author", "a") == torch.Size([3]) assert feature_store.size("edge", "paper:cites", "b") == torch.Size([2, 1]) # Test get metadata of the feature. assert feature_store.metadata("node", "author", "a") == metadata assert feature_store.metadata("edge", "paper:cites", "b") == {} # Test __setitem__ and __contains__ of FeatureStore. assert ("node", "author", "c") not in feature_store feature_store[("node", "author", "c")] = feature_store[ ("node", "author", "a") ] assert ("node", "author", "c") in feature_store # Test get keys of the features. assert feature_store.keys() == [ ("node", "author", "a"), ("edge", "paper:cites", "b"), ("node", "author", "c"), ] def test_basic_feature_store_errors(): a = torch.tensor([3, 2, 1]) b = torch.tensor([[1, 2, 4], [2, 5, 3]]) features = {} # Test error when dimension of the value is illegal. with pytest.raises( AssertionError, match=rf"dimension of torch_feature in TorchBasedFeature must be " rf"greater than 1, but got {a.dim()} dimension.", ): features[("node", "paper", "a")] = gb.TorchBasedFeature(a) features[("node", "author", "b")] = gb.TorchBasedFeature(b) feature_store = gb.BasicFeatureStore(features) # Test error when key does not exist. with pytest.raises(KeyError): feature_store.read("node", "paper", "b") # Test error when at least one id is out of bound. with pytest.raises(IndexError): feature_store.read("node", "author", "b", torch.tensor([0, 3])) ================================================ FILE: tests/python/pytorch/graphbolt/impl/test_cooperative_minibatching_utils.py ================================================ import unittest from functools import partial import backend as F import dgl.graphbolt as gb import pytest import torch WORLD_SIZE = 7 assert_equal = partial(torch.testing.assert_close, rtol=0, atol=0) @unittest.skipIf( F._default_context_str != "gpu", reason="This test requires an NVIDIA GPU.", ) @pytest.mark.parametrize("dtype", [torch.int32, torch.int64]) @pytest.mark.parametrize("rank", list(range(WORLD_SIZE))) def test_rank_sort_and_unique_and_compact(dtype, rank): torch.manual_seed(7) nodes_list1 = [ torch.randint(0, 2111111111, [777], dtype=dtype, device=F.ctx()) for _ in range(10) ] nodes_list2 = [nodes.sort()[0] for nodes in nodes_list1] res1 = torch.ops.graphbolt.rank_sort(nodes_list1, rank, WORLD_SIZE) res2 = torch.ops.graphbolt.rank_sort(nodes_list2, rank, WORLD_SIZE) for i, ((nodes1, idx1, offsets1), (nodes2, idx2, offsets2)) in enumerate( zip(res1, res2) ): assert_equal(nodes_list1[i], nodes1[idx1]) assert_equal(nodes_list2[i], nodes2[idx2]) assert_equal(offsets1, offsets2) assert offsets1.is_pinned() and offsets2.is_pinned() res3 = torch.ops.graphbolt.rank_sort(nodes_list1, rank, WORLD_SIZE) # This function is deterministic. Call with identical arguments and check. for (nodes1, idx1, offsets1), (nodes3, idx3, offsets3) in zip(res1, res3): assert_equal(nodes1, nodes3) assert_equal(idx1, idx3) assert_equal(offsets1, offsets3) # The dependency on the rank argument is simply a permutation. res4 = torch.ops.graphbolt.rank_sort(nodes_list1, 0, WORLD_SIZE) for (nodes1, idx1, offsets1), (nodes4, idx4, offsets4) in zip(res1, res4): off1 = offsets1.tolist() off4 = offsets4.tolist() assert_equal(nodes1[idx1], nodes4[idx4]) for i in range(WORLD_SIZE): j = (i - rank + WORLD_SIZE) % WORLD_SIZE assert_equal( nodes1[off1[j] : off1[j + 1]], nodes4[off4[i] : off4[i + 1]] ) unique, compacted, offsets = gb.unique_and_compact( nodes_list1[:1], rank, WORLD_SIZE ) nodes1, idx1, offsets1 = res1[0] assert_equal(unique, nodes1) assert_equal(compacted[0], idx1) assert_equal(offsets, offsets1) ================================================ FILE: tests/python/pytorch/graphbolt/impl/test_cpu_cached_feature.py ================================================ import os import tempfile import unittest import backend as F import numpy as np import pytest import torch from dgl import graphbolt as gb def to_on_disk_numpy(test_dir, name, t): path = os.path.join(test_dir, name + ".npy") np.save(path, t.numpy()) return path @pytest.mark.parametrize( "dtype", [ torch.bool, torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64, torch.float16, torch.bfloat16, torch.float32, torch.float64, ], ) @pytest.mark.parametrize("policy", ["s3-fifo", "sieve", "lru", "clock"]) def test_cpu_cached_feature(dtype, policy): cache_size_a = 32 cache_size_b = 64 a = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=dtype) b = torch.tensor([[[1, 2], [3, 4]], [[4, 5], [6, 7]]], dtype=dtype) pin_memory = F._default_context_str == "gpu" cache_size_a *= a[:1].nbytes cache_size_b *= b[:1].nbytes feat_store_a = gb.cpu_cached_feature( gb.TorchBasedFeature(a), cache_size_a, policy, pin_memory ) feat_store_b = gb.cpu_cached_feature( gb.TorchBasedFeature(b), cache_size_b, policy, pin_memory ) # Test read the entire feature. assert torch.equal(feat_store_a.read(), a) assert torch.equal(feat_store_b.read(), b) # Test read with ids. assert torch.equal( # Test read when ids are on a different device. feat_store_a.read(torch.tensor([0], device=F.ctx())), torch.tensor([[1, 2, 3]], dtype=dtype, device=F.ctx()), ) assert torch.equal( feat_store_b.read(torch.tensor([1, 1])), torch.tensor([[[4, 5], [6, 7]], [[4, 5], [6, 7]]], dtype=dtype), ) assert torch.equal( feat_store_a.read(torch.tensor([1, 1])), torch.tensor([[4, 5, 6], [4, 5, 6]], dtype=dtype), ) assert torch.equal( feat_store_b.read(torch.tensor([0])), torch.tensor([[[1, 2], [3, 4]]], dtype=dtype), ) # The cache should be full now for the large cache sizes, %100 hit expected. total_miss = feat_store_a._feature.total_miss feat_store_a.read(torch.tensor([0, 1])) assert total_miss == feat_store_a._feature.total_miss total_miss = feat_store_b._feature.total_miss feat_store_b.read(torch.tensor([0, 1])) assert total_miss == feat_store_b._feature.total_miss assert feat_store_a._feature.miss_rate == feat_store_a.miss_rate # Test get the size and count of the entire feature. assert feat_store_a.size() == torch.Size([3]) assert feat_store_b.size() == torch.Size([2, 2]) assert feat_store_a.count() == a.size(0) assert feat_store_b.count() == b.size(0) # Test update the entire feature. feat_store_a.update(torch.tensor([[0, 1, 2], [3, 5, 2]], dtype=dtype)) assert torch.equal( feat_store_a.read(), torch.tensor([[0, 1, 2], [3, 5, 2]], dtype=dtype), ) # Test update with ids. feat_store_a.update( torch.tensor([[2, 0, 1]], dtype=dtype), torch.tensor([0]), ) assert torch.equal( feat_store_a.read(), torch.tensor([[2, 0, 1], [3, 5, 2]], dtype=dtype), ) # Test with different dimensionality feat_store_a.update(b) assert torch.equal(feat_store_a.read(), b) @pytest.mark.parametrize( "dtype", [ torch.bool, torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64, torch.float16, torch.bfloat16, torch.float32, torch.float64, ], ) def test_cpu_cached_feature_read_async(dtype): a = torch.randint(0, 2, [1000, 13], dtype=dtype) cache_size = 256 * a[:1].nbytes feat_store = gb.cpu_cached_feature(gb.TorchBasedFeature(a), cache_size) # Test read with ids. ids1 = torch.tensor([0, 15, 71, 101]) ids2 = torch.tensor([71, 101, 202, 303]) for ids in [ids1, ids2]: reader = feat_store.read_async(ids) for _ in range(feat_store.read_async_num_stages(ids.device)): values = next(reader) assert torch.equal(values.wait(), a[ids]) @unittest.skipIf( not torch.ops.graphbolt.detect_io_uring(), reason="DiskBasedFeature is not available on this system.", ) @pytest.mark.parametrize( "dtype", [ torch.bool, torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64, torch.float16, torch.float32, torch.float64, ], ) def test_cpu_cached_disk_feature_read_async(dtype): a = torch.randint(0, 2, [1000, 13], dtype=dtype) cache_size = 256 * a[:1].nbytes ids1 = torch.tensor([0, 15, 71, 101]) ids2 = torch.tensor([71, 101, 202, 303]) with tempfile.TemporaryDirectory() as test_dir: path = to_on_disk_numpy(test_dir, "tensor", a) feat_store = gb.cpu_cached_feature( gb.DiskBasedFeature(path=path), cache_size ) # Test read feature. for ids in [ids1, ids2]: reader = feat_store.read_async(ids) for _ in range(feat_store.read_async_num_stages(ids.device)): values = next(reader) assert torch.equal(values.wait(), a[ids]) feat_store = None ================================================ FILE: tests/python/pytorch/graphbolt/impl/test_disk_based_feature_store.py ================================================ import os import tempfile import unittest from functools import partial import backend as F import numpy as np import pytest import torch from dgl import graphbolt as gb def to_on_disk_numpy(test_dir, name, t): path = os.path.join(test_dir, name + ".npy") t = t.numpy() np.save(path, t) return path assert_equal = partial(torch.testing.assert_close, rtol=0, atol=0) @unittest.skipIf( not torch.ops.graphbolt.detect_io_uring(), reason="DiskBasedFeature is not available on this system.", ) def test_disk_based_feature(): with tempfile.TemporaryDirectory() as test_dir: a = torch.tensor([[1, 2, 3], [4, 5, 6]]) b = torch.tensor([[[1, 2], [3, 4]], [[4, 5], [6, 7]]]) c = torch.randn([4111, 47]) metadata = {"max_value": 3} path_a = to_on_disk_numpy(test_dir, "a", a) path_b = to_on_disk_numpy(test_dir, "b", b) path_c = to_on_disk_numpy(test_dir, "c", c) feature_a = gb.DiskBasedFeature(path=path_a, metadata=metadata) feature_b = gb.DiskBasedFeature(path=path_b) feature_c = gb.DiskBasedFeature(path=path_c) # Read the entire feature. assert_equal(feature_a.read(), torch.tensor([[1, 2, 3], [4, 5, 6]])) assert_equal( feature_b.read(), torch.tensor([[[1, 2], [3, 4]], [[4, 5], [6, 7]]]) ) # Test read the feature with ids. assert_equal( feature_a.read(torch.tensor([0])), torch.tensor([[1, 2, 3]]), ) assert_equal( feature_b.read(torch.tensor([1])), torch.tensor([[[4, 5], [6, 7]]]), ) # Test reading into pin_memory if F._default_context_str == "gpu": res = feature_a.read(torch.tensor([0], pin_memory=True)) assert res.is_pinned() # Test when the index tensor is large. torch_based_feature_a = gb.TorchBasedFeature(a) ind_a = torch.randint(low=0, high=a.size(0), size=(4111,)) assert_equal( feature_a.read(ind_a), torch_based_feature_a.read(ind_a), ) # Test converting to torch_based_feature with read_into_memory() torch_based_feature_b = feature_b.read_into_memory() ind_b = torch.randint(low=0, high=b.size(0), size=(4111,)) assert_equal( feature_b.read(ind_b), torch_based_feature_b.read(ind_b), ) # Test with larger stored feature tensor ind_c = torch.randint(low=0, high=c.size(0), size=(4111,)) assert_equal(feature_c.read(ind_c), c[ind_c]) # Test get the size and count of the entire feature. assert feature_a.size() == torch.Size([3]) assert feature_b.size() == torch.Size([2, 2]) assert feature_a.count() == a.size(0) assert feature_b.count() == b.size(0) # Test get metadata of the feature. assert feature_a.metadata() == metadata assert feature_b.metadata() == {} with pytest.raises(IndexError): feature_a.read(torch.tensor([0, 1, 2, 3])) # Test loading a Fortran contiguous ndarray. a_T = np.asfortranarray(a) path_a_T = test_dir + "a_T.npy" np.save(path_a_T, a_T) with pytest.raises( AssertionError, match="DiskBasedFeature only supports C_CONTIGUOUS array.", ): gb.DiskBasedFeature(path=path_a_T, metadata=metadata) # For windows, the file is locked by the numpy.load. We need to delete # it before closing the temporary directory. a = b = c = None feature_a = feature_b = feature_c = None @unittest.skipIf( not torch.ops.graphbolt.detect_io_uring(), reason="DiskBasedFeature is not available on this system.", ) @pytest.mark.parametrize( "dtype", [ torch.float32, torch.float64, torch.int32, torch.int64, torch.int8, torch.float16, torch.complex128, ], ) @pytest.mark.parametrize("idtype", [torch.int32, torch.int64]) @pytest.mark.parametrize( "shape", [(10, 20), (20, 10), (20, 25, 10), (137, 50, 30)] ) @pytest.mark.parametrize("index", [[0], [1, 2, 3], [0, 6, 2, 8]]) def test_more_disk_based_feature(dtype, idtype, shape, index): if dtype == torch.complex128: tensor = torch.complex( torch.randint(0, 127, shape, dtype=torch.float64), torch.randint(0, 127, shape, dtype=torch.float64), ) else: tensor = torch.randint(0, 127, shape, dtype=dtype) test_tensor = tensor.clone() idx = torch.tensor(index, dtype=idtype) with tempfile.TemporaryDirectory() as test_dir: path = to_on_disk_numpy(test_dir, "tensor", tensor) feature = gb.DiskBasedFeature(path=path) # Test read feature. assert_equal(feature.read(idx), test_tensor[idx.long()]) @unittest.skipIf( not torch.ops.graphbolt.detect_io_uring(), reason="DiskBasedFeature is not available on this system.", ) def test_disk_based_feature_repr(): with tempfile.TemporaryDirectory() as test_dir: a = torch.tensor([[1, 2, 3], [4, 5, 6]]) b = torch.tensor([[[1, 2], [3, 4]], [[4, 5], [6, 7]]]) metadata = {"max_value": 3} path_a = to_on_disk_numpy(test_dir, "a", a) path_b = to_on_disk_numpy(test_dir, "b", b) feature_a = gb.DiskBasedFeature(path=path_a, metadata=metadata) feature_b = gb.DiskBasedFeature(path=path_b) expected_str_feature_a = str( "DiskBasedFeature(\n" " feature=tensor([[1, 2, 3],\n" " [4, 5, 6]]),\n" " metadata={'max_value': 3},\n" ")" ) expected_str_feature_b = str( "DiskBasedFeature(\n" " feature=tensor([[[1, 2],\n" " [3, 4]],\n" "\n" " [[4, 5],\n" " [6, 7]]]),\n" " metadata={},\n" ")" ) assert str(feature_a) == expected_str_feature_a assert str(feature_b) == expected_str_feature_b a = b = metadata = None feature_a = feature_b = None expected_str_feature_a = expected_str_feature_b = None ================================================ FILE: tests/python/pytorch/graphbolt/impl/test_feature_cache.py ================================================ import backend as F import pytest import torch from dgl import graphbolt as gb def _test_query_and_replace(policy1, policy2, keys, offset): # Testing query_and_replace equivalence to query and then replace. ( _, index, pointers, missing_keys, found_offsets, missing_offsets, ) = policy1.query_and_replace(keys, offset) found_cnt = keys.size(0) - missing_keys.size(0) found_pointers = pointers[:found_cnt] policy1.reading_completed(found_pointers, found_offsets) missing_pointers = pointers[found_cnt:] policy1.writing_completed(missing_pointers, missing_offsets) ( _, index2, missing_keys2, found_pointers2, found_offsets2, missing_offsets2, ) = policy2.query(keys + offset, 0) policy2.reading_completed(found_pointers2, found_offsets2) (_, missing_pointers2, missing_offsets2) = policy2.replace( missing_keys2, missing_offsets2, 0 ) policy2.writing_completed(missing_pointers2, missing_offsets2) assert torch.equal(index, index2) assert torch.equal(missing_keys, missing_keys2 - offset) @pytest.mark.parametrize("offsets", [False, True]) @pytest.mark.parametrize( "dtype", [ torch.bool, torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64, torch.float16, torch.bfloat16, torch.float32, torch.float64, ], ) @pytest.mark.parametrize("feature_size", [2, 16]) @pytest.mark.parametrize("num_parts", [1, 2, None]) @pytest.mark.parametrize("policy", ["s3-fifo", "sieve", "lru", "clock"]) @pytest.mark.parametrize("offset", [0, 1111111]) def test_feature_cache(offsets, dtype, feature_size, num_parts, policy, offset): cache_size = 32 * ( torch.get_num_threads() if num_parts is None else num_parts ) a = torch.randint(0, 2, [1024, feature_size], dtype=dtype) cache = gb.impl.CPUFeatureCache( (cache_size,) + a.shape[1:], a.dtype, policy, num_parts ) cache2 = gb.impl.CPUFeatureCache( (cache_size,) + a.shape[1:], a.dtype, policy, num_parts ) policy1 = gb.impl.CPUFeatureCache( (cache_size,) + a.shape[1:], a.dtype, policy, num_parts )._policy policy2 = gb.impl.CPUFeatureCache( (cache_size,) + a.shape[1:], a.dtype, policy, num_parts )._policy reader_fn = lambda keys: a[keys] keys = torch.tensor([0, 1]) values, missing_index, missing_keys, missing_offsets = cache.query( keys, offset ) if not offsets: missing_offsets = None assert torch.equal( missing_keys.flip([0]) if num_parts == 1 else missing_keys.sort()[0], keys, ) missing_values = a[missing_keys] cache.replace(missing_keys, missing_values, missing_offsets, offset) values[missing_index] = missing_values assert torch.equal(values, a[keys]) assert torch.equal( cache2.query_and_replace(keys, reader_fn, offset), a[keys] ) _test_query_and_replace(policy1, policy2, keys, offset) pin_memory = F._default_context_str == "gpu" keys = torch.arange(1, 33, pin_memory=pin_memory) values, missing_index, missing_keys, missing_offsets = cache.query( keys, offset ) if not offsets: missing_offsets = None assert torch.equal( missing_keys.flip([0]) if num_parts == 1 else missing_keys.sort()[0], torch.arange(2, 33), ) assert not pin_memory or values.is_pinned() missing_values = a[missing_keys] cache.replace(missing_keys, missing_values, missing_offsets, offset) values[missing_index] = missing_values assert torch.equal(values, a[keys]) assert torch.equal( cache2.query_and_replace(keys, reader_fn, offset), a[keys] ) _test_query_and_replace(policy1, policy2, keys, offset) values, missing_index, missing_keys, missing_offsets = cache.query( keys, offset ) if not offsets: missing_offsets = None assert torch.equal(missing_keys.flip([0]), torch.tensor([])) missing_values = a[missing_keys] cache.replace(missing_keys, missing_values, missing_offsets, offset) values[missing_index] = missing_values assert torch.equal(values, a[keys]) assert torch.equal( cache2.query_and_replace(keys, reader_fn, offset), a[keys] ) _test_query_and_replace(policy1, policy2, keys, offset) values, missing_index, missing_keys, missing_offsets = cache.query( keys, offset ) if not offsets: missing_offsets = None assert torch.equal(missing_keys.flip([0]), torch.tensor([])) missing_values = a[missing_keys] cache.replace(missing_keys, missing_values, missing_offsets, offset) values[missing_index] = missing_values assert torch.equal(values, a[keys]) assert torch.equal( cache2.query_and_replace(keys, reader_fn, offset), a[keys] ) _test_query_and_replace(policy1, policy2, keys, offset) assert cache.miss_rate == cache2.miss_rate raw_feature_cache = torch.ops.graphbolt.feature_cache( (cache_size,) + a.shape[1:], a.dtype, pin_memory ) idx = torch.tensor([0, 1, 2]) raw_feature_cache.replace(idx, a[idx]) val = raw_feature_cache.index_select(idx) assert torch.equal(val, a[idx]) if pin_memory: val = raw_feature_cache.index_select(idx.to(F.ctx())) assert torch.equal(val, a[idx].to(F.ctx())) ================================================ FILE: tests/python/pytorch/graphbolt/impl/test_fused_csc_sampling_graph.py ================================================ import os import pickle import re import tempfile import unittest import backend as F import dgl import dgl.graphbolt as gb import pytest import torch import torch.multiprocessing as mp from dgl.graphbolt.base import etype_str_to_tuple from scipy import sparse as spsp from .. import gb_test_utils as gbt torch.manual_seed(3407) mp.set_sharing_strategy("file_system") @unittest.skipIf( F._default_context_str == "gpu", reason="Graph is CPU only at present.", ) @pytest.mark.parametrize("total_num_nodes", [0, 1, 10, 100, 1000]) def test_empty_graph(total_num_nodes): csc_indptr = torch.zeros((total_num_nodes + 1,), dtype=int) indices = torch.tensor([]) graph = gb.fused_csc_sampling_graph(csc_indptr, indices) assert graph.total_num_edges == 0 assert graph.total_num_nodes == total_num_nodes assert torch.equal(graph.csc_indptr, csc_indptr) assert torch.equal(graph.indices, indices) @unittest.skipIf( F._default_context_str == "gpu", reason="Graph is CPU only at present.", ) @pytest.mark.parametrize("total_num_nodes", [0, 1, 10, 100, 1000]) def test_hetero_empty_graph(total_num_nodes): csc_indptr = torch.zeros((total_num_nodes + 1,), dtype=int) indices = torch.tensor([]) node_type_to_id, edge_type_to_id = gbt.get_type_to_id( num_ntypes=3, num_etypes=5 ) # Some node types have no nodes. if total_num_nodes == 0: node_type_offset = torch.zeros((4,), dtype=int) else: node_type_offset = torch.sort(torch.randint(0, total_num_nodes, (4,)))[ 0 ] node_type_offset[0] = 0 node_type_offset[-1] = total_num_nodes type_per_edge = torch.tensor([]) graph = gb.fused_csc_sampling_graph( csc_indptr, indices, node_type_offset=node_type_offset, type_per_edge=type_per_edge, node_type_to_id=node_type_to_id, edge_type_to_id=edge_type_to_id, edge_attributes=None, ) assert graph.total_num_edges == 0 assert graph.total_num_nodes == total_num_nodes assert torch.equal(graph.csc_indptr, csc_indptr) assert torch.equal(graph.indices, indices) assert graph.node_type_to_id == node_type_to_id assert graph.edge_type_to_id == edge_type_to_id assert torch.equal(graph.node_type_offset, node_type_offset) assert torch.equal(graph.type_per_edge, type_per_edge) @unittest.skipIf( F._default_context_str == "gpu", reason="Graph is CPU only at present.", ) @pytest.mark.parametrize( "ntypes", [{"n1": 1, "n2": 1}, {5: 1, "n2": 2}, {"n1": 1.5, "n2": 2.0}] ) def test_type_to_id_with_ntype_exception(ntypes): with pytest.raises(AssertionError): gb.fused_csc_sampling_graph( None, None, node_type_to_id=ntypes, edge_type_to_id={"e1": 1} ) @unittest.skipIf( F._default_context_str == "gpu", reason="Graph is CPU only at present.", ) @pytest.mark.parametrize( "etypes", [ {("n1", 5, "n12"): 1}, {"e1": 1}, {("n1", "e1"): 1}, {("n1", "e1", 10): 1}, {"n1:e1:n2": 1, ("n1", "e2", "n3"): 1}, {("n1", "e1", "n10"): 1}, {"n1:e1:n2": 1.5}, ], ) def test_type_to_id_with_etype_exception(etypes): with pytest.raises(Exception): gb.fused_csc_sampling_graph( None, None, node_type_to_id={"n1": 0, "n2": 1, "n3": 2}, edge_type_to_id=etypes, ) @unittest.skipIf( F._default_context_str == "gpu", reason="Graph is CPU only at present.", ) @pytest.mark.parametrize( "total_num_nodes, total_num_edges", [(1, 1), (100, 1), (10, 50), (1000, 50000)], ) def test_homo_graph(total_num_nodes, total_num_edges): csc_indptr, indices = gbt.random_homo_graph( total_num_nodes, total_num_edges ) node_attributes = { "A1": torch.arange(total_num_nodes), "A2": torch.arange(total_num_nodes), } edge_attributes = { "A1": torch.randn(total_num_edges), "A2": torch.randn(total_num_edges), } graph = gb.fused_csc_sampling_graph( csc_indptr, indices, node_attributes=node_attributes, edge_attributes=edge_attributes, ) assert graph.total_num_nodes == total_num_nodes assert graph.total_num_edges == total_num_edges assert torch.equal(csc_indptr, graph.csc_indptr) assert torch.equal(indices, graph.indices) assert graph.node_attributes == node_attributes assert graph.edge_attributes == edge_attributes assert graph.node_type_offset is None assert graph.type_per_edge is None assert graph.node_type_to_id is None assert graph.edge_type_to_id is None @unittest.skipIf( F._default_context_str == "gpu", reason="Graph is CPU only at present.", ) @pytest.mark.parametrize( "total_num_nodes, total_num_edges", [(1, 1), (100, 1), (10, 50), (1000, 50000)], ) @pytest.mark.parametrize("num_ntypes, num_etypes", [(1, 1), (3, 5), (100, 1)]) def test_hetero_graph(total_num_nodes, total_num_edges, num_ntypes, num_etypes): ( csc_indptr, indices, node_type_offset, type_per_edge, node_type_to_id, edge_type_to_id, ) = gbt.random_hetero_graph( total_num_nodes, total_num_edges, num_ntypes, num_etypes ) node_attributes = { "A1": torch.arange(total_num_nodes), "A2": torch.arange(total_num_nodes), } edge_attributes = { "A1": torch.randn(total_num_edges), "A2": torch.randn(total_num_edges), } graph = gb.fused_csc_sampling_graph( csc_indptr, indices, node_type_offset=node_type_offset, type_per_edge=type_per_edge, node_type_to_id=node_type_to_id, edge_type_to_id=edge_type_to_id, node_attributes=node_attributes, edge_attributes=edge_attributes, ) assert graph.total_num_nodes == total_num_nodes assert graph.total_num_edges == total_num_edges assert torch.equal(csc_indptr, graph.csc_indptr) assert torch.equal(indices, graph.indices) assert torch.equal(node_type_offset, graph.node_type_offset) assert torch.equal(type_per_edge, graph.type_per_edge) assert graph.node_attributes == node_attributes assert graph.edge_attributes == edge_attributes assert node_type_to_id == graph.node_type_to_id assert edge_type_to_id == graph.edge_type_to_id @unittest.skipIf( F._default_context_str == "gpu", reason="Graph is CPU only at present.", ) @pytest.mark.parametrize( "total_num_nodes, total_num_edges", [(1, 1), (100, 1), (10, 50), (1000, 50000)], ) def test_num_nodes_edges_homo(total_num_nodes, total_num_edges): csc_indptr, indices = gbt.random_homo_graph( total_num_nodes, total_num_edges ) edge_attributes = { "A1": torch.randn(total_num_edges), "A2": torch.randn(total_num_edges), } graph = gb.fused_csc_sampling_graph( csc_indptr, indices, edge_attributes=edge_attributes ) assert graph.num_nodes == total_num_nodes assert graph.num_edges == total_num_edges @unittest.skipIf( F._default_context_str == "gpu", reason="Graph is CPU only at present.", ) def test_num_nodes_hetero(): """Original graph in COO: 1 0 1 0 1 1 0 1 1 0 0 1 0 1 0 0 1 0 0 1 1 0 0 0 1 node_type_0: [0, 1] node_type_1: [2, 3, 4] edge_type_0: node_type_0 -> node_type_0 edge_type_1: node_type_0 -> node_type_1 edge_type_2: node_type_1 -> node_type_0 edge_type_3: node_type_1 -> node_type_1 """ # Initialize data. total_num_nodes = 5 total_num_edges = 12 ntypes = { "N0": 0, "N1": 1, } etypes = { "N0:R0:N0": 0, "N0:R1:N1": 1, "N1:R2:N0": 2, "N1:R3:N1": 3, "N1:R4:N0": 4, } indptr = torch.LongTensor([0, 3, 5, 7, 9, 12]) indices = torch.LongTensor([0, 1, 4, 2, 3, 0, 1, 1, 2, 0, 3, 4]) node_type_offset = torch.LongTensor([0, 2, 5]) type_per_edge = torch.LongTensor([0, 0, 2, 2, 2, 1, 1, 1, 3, 1, 3, 3]) assert indptr[-1] == total_num_edges assert indptr[-1] == len(indices) assert node_type_offset[-1] == total_num_nodes assert all(type_per_edge < len(etypes)) # Construct FusedCSCSamplingGraph. graph = gb.fused_csc_sampling_graph( indptr, indices, node_type_offset=node_type_offset, type_per_edge=type_per_edge, node_type_to_id=ntypes, edge_type_to_id=etypes, ) # Verify nodes number per node types. assert graph.num_nodes == { "N0": 2, "N1": 3, } assert sum(graph.num_nodes.values()) == total_num_nodes # Verify edges number per edge types. assert graph.num_edges == { "N0:R0:N0": 2, "N0:R1:N1": 4, "N1:R2:N0": 3, "N1:R3:N1": 3, "N1:R4:N0": 0, } assert sum(graph.num_edges.values()) == total_num_edges @unittest.skipIf( F._default_context_str == "gpu", reason="Graph is CPU only at present.", ) @pytest.mark.parametrize( "node_type_offset", [ torch.tensor([0, 1]), torch.tensor([0, 1, 5, 6, 10]), torch.tensor([0, 1, 10]), ], ) def test_node_type_offset_wrong_legnth(node_type_offset): num_ntypes = 3 ( csc_indptr, indices, _, type_per_edge, node_type_to_id, edge_type_to_id, ) = gbt.random_hetero_graph(10, 50, num_ntypes, 5) with pytest.raises(Exception): gb.fused_csc_sampling_graph( csc_indptr, indices, node_type_offset=node_type_offset, type_per_edge=type_per_edge, node_type_to_id=node_type_to_id, edge_type_to_id=edge_type_to_id, ) @unittest.skipIf( F._default_context_str == "gpu", reason="Graph is CPU only at present.", ) @pytest.mark.parametrize( "total_num_nodes, total_num_edges", [(1, 1), (100, 1), (10, 50), (1000, 50000)], ) @pytest.mark.parametrize("has_node_attrs", [True, False]) @pytest.mark.parametrize("has_edge_attrs", [True, False]) def test_load_save_homo_graph( total_num_nodes, total_num_edges, has_node_attrs, has_edge_attrs ): csc_indptr, indices = gbt.random_homo_graph( total_num_nodes, total_num_edges ) node_attributes = None if has_node_attrs: node_attributes = { "A": torch.arange(total_num_nodes), "B": torch.arange(total_num_nodes), } edge_attributes = None if has_edge_attrs: edge_attributes = { "A": torch.arange(total_num_edges), "B": torch.arange(total_num_edges), } graph = gb.fused_csc_sampling_graph( csc_indptr, indices, node_attributes=node_attributes, edge_attributes=edge_attributes, ) with tempfile.TemporaryDirectory() as test_dir: filename = os.path.join(test_dir, "fused_csc_sampling_graph.pt") torch.save(graph, filename) graph2 = torch.load(filename, weights_only=False) assert graph.total_num_nodes == graph2.total_num_nodes assert graph.total_num_edges == graph2.total_num_edges assert torch.equal(graph.csc_indptr, graph2.csc_indptr) assert torch.equal(graph.indices, graph2.indices) assert graph.node_type_offset is None and graph2.node_type_offset is None assert graph.type_per_edge is None and graph2.type_per_edge is None assert graph.node_type_to_id is None and graph2.node_type_to_id is None assert graph.edge_type_to_id is None and graph2.edge_type_to_id is None if has_node_attrs: assert graph.node_attributes.keys() == graph2.node_attributes.keys() for key in graph.node_attributes.keys(): assert torch.equal( graph.node_attributes[key], graph2.node_attributes[key] ) else: assert graph.node_attributes is None and graph2.node_attributes is None if has_edge_attrs: assert graph.edge_attributes.keys() == graph2.edge_attributes.keys() for key in graph.edge_attributes.keys(): assert torch.equal( graph.edge_attributes[key], graph2.edge_attributes[key] ) else: assert graph.edge_attributes is None and graph2.edge_attributes is None @unittest.skipIf( F._default_context_str == "gpu", reason="Graph is CPU only at present.", ) @pytest.mark.parametrize( "total_num_nodes, total_num_edges", [(1, 1), (100, 1), (10, 50), (1000, 50000)], ) @pytest.mark.parametrize("num_ntypes, num_etypes", [(1, 1), (3, 5), (100, 1)]) @pytest.mark.parametrize("has_node_attrs", [True, False]) @pytest.mark.parametrize("has_edge_attrs", [True, False]) def test_load_save_hetero_graph( total_num_nodes, total_num_edges, num_ntypes, num_etypes, has_node_attrs, has_edge_attrs, ): ( csc_indptr, indices, node_type_offset, type_per_edge, node_type_to_id, edge_type_to_id, ) = gbt.random_hetero_graph( total_num_nodes, total_num_edges, num_ntypes, num_etypes ) node_attributes = None if has_node_attrs: node_attributes = { "A": torch.arange(total_num_nodes), "B": torch.arange(total_num_nodes), } edge_attributes = None if has_edge_attrs: edge_attributes = { "A": torch.arange(total_num_edges), "B": torch.arange(total_num_edges), } graph = gb.fused_csc_sampling_graph( csc_indptr, indices, node_type_offset=node_type_offset, type_per_edge=type_per_edge, node_type_to_id=node_type_to_id, edge_type_to_id=edge_type_to_id, node_attributes=node_attributes, edge_attributes=edge_attributes, ) with tempfile.TemporaryDirectory() as test_dir: filename = os.path.join(test_dir, "fused_csc_sampling_graph.pt") torch.save(graph, filename) graph2 = torch.load(filename, weights_only=False) assert graph.total_num_nodes == graph2.total_num_nodes assert graph.total_num_edges == graph2.total_num_edges assert torch.equal(graph.csc_indptr, graph2.csc_indptr) assert torch.equal(graph.indices, graph2.indices) assert torch.equal(graph.node_type_offset, graph2.node_type_offset) assert torch.equal(graph.type_per_edge, graph2.type_per_edge) assert graph.node_type_to_id == graph2.node_type_to_id assert graph.edge_type_to_id == graph2.edge_type_to_id if has_node_attrs: assert graph.node_attributes.keys() == graph2.node_attributes.keys() for key in graph.node_attributes.keys(): assert torch.equal( graph.node_attributes[key], graph2.node_attributes[key] ) else: assert graph.node_attributes is None and graph2.node_attributes is None if has_edge_attrs: assert graph.edge_attributes.keys() == graph2.edge_attributes.keys() for key in graph.edge_attributes.keys(): assert torch.equal( graph.edge_attributes[key], graph2.edge_attributes[key] ) else: assert graph.edge_attributes is None and graph2.edge_attributes is None @unittest.skipIf( F._default_context_str == "gpu", reason="Graph is CPU only at present.", ) @pytest.mark.parametrize( "total_num_nodes, total_num_edges", [(1, 1), (100, 1), (10, 50), (1000, 50000)], ) @pytest.mark.parametrize("has_node_attrs", [True, False]) @pytest.mark.parametrize("has_edge_attrs", [True, False]) def test_pickle_homo_graph( total_num_nodes, total_num_edges, has_node_attrs, has_edge_attrs ): csc_indptr, indices = gbt.random_homo_graph( total_num_nodes, total_num_edges ) node_attributes = None if has_node_attrs: node_attributes = { "A": torch.arange(total_num_nodes), "B": torch.arange(total_num_nodes), } edge_attributes = None if has_edge_attrs: edge_attributes = { "A": torch.arange(total_num_edges), "B": torch.arange(total_num_edges), } graph = gb.fused_csc_sampling_graph( csc_indptr, indices, node_attributes=node_attributes, edge_attributes=edge_attributes, ) serialized = pickle.dumps(graph) graph2 = pickle.loads(serialized) assert graph.total_num_nodes == graph2.total_num_nodes assert graph.total_num_edges == graph2.total_num_edges assert torch.equal(graph.csc_indptr, graph2.csc_indptr) assert torch.equal(graph.indices, graph2.indices) assert graph.node_type_offset is None and graph2.node_type_offset is None assert graph.type_per_edge is None and graph2.type_per_edge is None assert graph.node_type_to_id is None and graph2.node_type_to_id is None assert graph.edge_type_to_id is None and graph2.edge_type_to_id is None if has_node_attrs: assert graph.node_attributes.keys() == graph2.node_attributes.keys() for key in graph.node_attributes.keys(): assert torch.equal( graph.node_attributes[key], graph2.node_attributes[key] ) else: assert graph.node_attributes is None and graph2.node_attributes is None if has_edge_attrs: assert graph.edge_attributes.keys() == graph2.edge_attributes.keys() for key in graph.edge_attributes.keys(): assert torch.equal( graph.edge_attributes[key], graph2.edge_attributes[key] ) else: assert graph.edge_attributes is None and graph2.edge_attributes is None @unittest.skipIf( F._default_context_str == "gpu", reason="Graph is CPU only at present.", ) @pytest.mark.parametrize( "total_num_nodes, total_num_edges", [(1, 1), (100, 1), (10, 50), (1000, 50000)], ) @pytest.mark.parametrize("num_ntypes, num_etypes", [(1, 1), (3, 5), (100, 1)]) @pytest.mark.parametrize("has_node_attrs", [True, False]) @pytest.mark.parametrize("has_edge_attrs", [True, False]) def test_pickle_hetero_graph( total_num_nodes, total_num_edges, num_ntypes, num_etypes, has_node_attrs, has_edge_attrs, ): ( csc_indptr, indices, node_type_offset, type_per_edge, node_type_to_id, edge_type_to_id, ) = gbt.random_hetero_graph( total_num_nodes, total_num_edges, num_ntypes, num_etypes ) node_attributes = None if has_node_attrs: node_attributes = { "A": torch.arange(total_num_nodes), "B": torch.arange(total_num_nodes), } edge_attributes = None if has_edge_attrs: edge_attributes = { "A": torch.arange(total_num_edges), "B": torch.arange(total_num_edges), } graph = gb.fused_csc_sampling_graph( csc_indptr, indices, node_type_offset=node_type_offset, type_per_edge=type_per_edge, node_type_to_id=node_type_to_id, edge_type_to_id=edge_type_to_id, node_attributes=node_attributes, edge_attributes=edge_attributes, ) serialized = pickle.dumps(graph) graph2 = pickle.loads(serialized) assert graph.total_num_nodes == graph2.total_num_nodes assert graph.total_num_edges == graph2.total_num_edges assert torch.equal(graph.csc_indptr, graph2.csc_indptr) assert torch.equal(graph.indices, graph2.indices) assert torch.equal(graph.node_type_offset, graph2.node_type_offset) assert torch.equal(graph.type_per_edge, graph2.type_per_edge) assert graph.node_type_to_id.keys() == graph2.node_type_to_id.keys() for i in graph.node_type_to_id.keys(): assert graph.node_type_to_id[i] == graph2.node_type_to_id[i] assert graph.edge_type_to_id.keys() == graph2.edge_type_to_id.keys() for i in graph.edge_type_to_id.keys(): assert graph.edge_type_to_id[i] == graph2.edge_type_to_id[i] if has_node_attrs: assert graph.node_attributes.keys() == graph2.node_attributes.keys() for key in graph.node_attributes.keys(): assert torch.equal( graph.node_attributes[key], graph2.node_attributes[key] ) else: assert graph.node_attributes is None and graph2.node_attributes is None if has_edge_attrs: assert graph.edge_attributes.keys() == graph2.edge_attributes.keys() for key in graph.edge_attributes.keys(): assert torch.equal( graph.edge_attributes[key], graph2.edge_attributes[key] ) else: assert graph.edge_attributes is None and graph2.edge_attributes is None def process_csc_sampling_graph_multiprocessing(graph): return graph.total_num_nodes @unittest.skipIf( F._default_context_str == "gpu", reason="Graph is CPU only at present.", ) def test_multiprocessing(): total_num_nodes = 5 total_num_edges = 10 num_ntypes = 2 num_etypes = 3 ( csc_indptr, indices, node_type_offset, type_per_edge, node_type_to_id, edge_type_to_id, ) = gbt.random_hetero_graph( total_num_nodes, total_num_edges, num_ntypes, num_etypes ) edge_attributes = { "a": torch.randn((total_num_edges,)), } graph = gb.fused_csc_sampling_graph( csc_indptr, indices, node_type_offset=node_type_offset, type_per_edge=type_per_edge, node_type_to_id=node_type_to_id, edge_type_to_id=edge_type_to_id, edge_attributes=edge_attributes, ) p = mp.Process( target=process_csc_sampling_graph_multiprocessing, args=(graph,) ) p.start() p.join() def test_in_subgraph_homo(): """Original graph in COO: 1 0 1 0 1 1 0 1 1 0 0 1 0 1 0 0 1 0 0 1 1 0 0 0 1 """ # Initialize data. total_num_nodes = 5 total_num_edges = 12 indptr = torch.LongTensor([0, 3, 5, 7, 9, 12]) indices = torch.LongTensor([0, 1, 4, 2, 3, 0, 1, 1, 2, 0, 3, 4]) assert indptr[-1] == total_num_edges assert indptr[-1] == len(indices) # Construct FusedCSCSamplingGraph. graph = gb.fused_csc_sampling_graph(indptr, indices).to(F.ctx()) # Extract in subgraph. nodes = torch.tensor([4, 1, 3], device=F.ctx()) in_subgraph = graph.in_subgraph(nodes) # Verify in subgraph. assert torch.equal( in_subgraph.sampled_csc.indices, torch.tensor([0, 3, 4, 2, 3, 1, 2], device=F.ctx()), ) assert torch.equal( in_subgraph.sampled_csc.indptr, torch.tensor([0, 3, 5, 7], device=F.ctx()), ) assert in_subgraph.original_column_node_ids is None assert in_subgraph.original_row_node_ids is None assert torch.equal( in_subgraph.original_edge_ids, torch.tensor([9, 10, 11, 3, 4, 7, 8], device=F.ctx()), ) def test_in_subgraph_hetero(): """Original graph in COO: 1 0 1 0 1 1 0 1 1 0 0 1 0 1 0 0 1 0 0 1 1 0 0 0 1 node_type_0: [0, 1] node_type_1: [2, 3, 4] edge_type_0: node_type_0 -> node_type_0 edge_type_1: node_type_0 -> node_type_1 edge_type_2: node_type_1 -> node_type_0 edge_type_3: node_type_1 -> node_type_1 """ # Initialize data. total_num_nodes = 5 total_num_edges = 12 ntypes = { "N0": 0, "N1": 1, } etypes = { "N0:R0:N0": 0, "N0:R1:N1": 1, "N1:R2:N0": 2, "N1:R3:N1": 3, } indptr = torch.LongTensor([0, 3, 5, 7, 9, 12]) indices = torch.LongTensor([0, 1, 4, 2, 3, 0, 1, 1, 2, 0, 3, 4]) node_type_offset = torch.LongTensor([0, 2, 5]) type_per_edge = torch.LongTensor([0, 0, 2, 2, 2, 1, 1, 1, 3, 1, 3, 3]) assert indptr[-1] == total_num_edges assert indptr[-1] == len(indices) assert node_type_offset[-1] == total_num_nodes assert all(type_per_edge < len(etypes)) # Construct FusedCSCSamplingGraph. graph = gb.fused_csc_sampling_graph( indptr, indices, node_type_offset=node_type_offset, type_per_edge=type_per_edge, node_type_to_id=ntypes, edge_type_to_id=etypes, ).to(F.ctx()) # Extract in subgraph. nodes = { "N0": torch.tensor([1], device=F.ctx()), "N1": torch.tensor([2, 1], device=F.ctx()), } in_subgraph = graph.in_subgraph(nodes) # Verify in subgraph. assert torch.equal( in_subgraph.sampled_csc["N0:R0:N0"].indices, torch.tensor([], device=F.ctx()), ) assert torch.equal( in_subgraph.sampled_csc["N0:R0:N0"].indptr, torch.tensor([0, 0], device=F.ctx()), ) assert torch.equal( in_subgraph.sampled_csc["N0:R1:N1"].indices, torch.tensor([0, 1], device=F.ctx()), ) assert torch.equal( in_subgraph.sampled_csc["N0:R1:N1"].indptr, torch.tensor([0, 1, 2], device=F.ctx()), ) assert torch.equal( in_subgraph.sampled_csc["N1:R2:N0"].indices, torch.tensor([0, 1], device=F.ctx()), ) assert torch.equal( in_subgraph.sampled_csc["N1:R2:N0"].indptr, torch.tensor([0, 2], device=F.ctx()), ) assert torch.equal( in_subgraph.sampled_csc["N1:R3:N1"].indices, torch.tensor([1, 2, 0], device=F.ctx()), ) assert torch.equal( in_subgraph.sampled_csc["N1:R3:N1"].indptr, torch.tensor([0, 2, 3], device=F.ctx()), ) assert in_subgraph.original_column_node_ids is None assert in_subgraph.original_row_node_ids is None assert torch.equal( in_subgraph.original_edge_ids["N0:R0:N0"], torch.tensor([], device=F.ctx()), ) assert torch.equal( in_subgraph.original_edge_ids["N0:R1:N1"], torch.tensor([9, 7], device=F.ctx()), ) assert torch.equal( in_subgraph.original_edge_ids["N1:R2:N0"], torch.tensor([3, 4], device=F.ctx()), ) assert torch.equal( in_subgraph.original_edge_ids["N1:R3:N1"], torch.tensor([10, 11, 8], device=F.ctx()), ) @pytest.mark.parametrize("indptr_dtype", [torch.int32, torch.int64]) @pytest.mark.parametrize("indices_dtype", [torch.int32, torch.int64]) @pytest.mark.parametrize("replace", [False, True]) @pytest.mark.parametrize("labor", [False, True]) @pytest.mark.parametrize("use_node_timestamp", [False, True]) @pytest.mark.parametrize("use_edge_timestamp", [False, True]) def test_temporal_sample_neighbors_homo( indptr_dtype, indices_dtype, replace, labor, use_node_timestamp, use_edge_timestamp, ): if replace and F._default_context_str == "gpu": pytest.skip("Sampling with replacement not yet implemented on the GPU.") """Original graph in COO: 1 0 1 0 1 1 0 1 1 0 0 1 0 1 0 0 1 0 0 1 1 0 0 0 1 """ # Initialize data. total_num_nodes = 5 total_num_edges = 12 indptr = torch.tensor([0, 3, 5, 7, 9, 12], dtype=indptr_dtype) indices = torch.tensor( [0, 1, 4, 2, 3, 0, 1, 1, 2, 0, 3, 4], dtype=indices_dtype ) assert indptr[-1] == total_num_edges assert indptr[-1] == len(indices) assert len(indptr) == total_num_nodes + 1 # Construct FusedCSCSamplingGraph. graph = gb.fused_csc_sampling_graph(indptr, indices).to(F.ctx()) # Generate subgraph via sample neighbors. fanouts = torch.LongTensor([2]) sampler = ( graph.temporal_sample_layer_neighbors if labor else graph.temporal_sample_neighbors ) seed_list = [1, 3, 4] seed_timestamp = torch.randint( 0, 100, (len(seed_list),), dtype=torch.int64, device=F.ctx() ) if use_node_timestamp: node_timestamp = torch.randint( 0, 100, (total_num_nodes,), dtype=torch.int64, device=F.ctx() ) graph.node_attributes = {"timestamp": node_timestamp} if use_edge_timestamp: edge_timestamp = torch.randint( 0, 100, (total_num_edges,), dtype=torch.int64, device=F.ctx() ) graph.edge_attributes = {"timestamp": edge_timestamp} # Sample with nodes in mismatched dtype with graph's indices. nodes = torch.tensor( seed_list, dtype=(torch.int64 if indices_dtype == torch.int32 else torch.int32), ) with pytest.raises( AssertionError, match=re.escape( "Data type of nodes must be consistent with indices.dtype" ), ): _ = sampler( nodes, seed_timestamp, fanouts, replace=replace, node_timestamp_attr_name=( "timestamp" if use_node_timestamp else None ), edge_timestamp_attr_name=( "timestamp" if use_edge_timestamp else None ), ) def _get_available_neighbors(): available_neighbors = [] for i, seed in enumerate(seed_list): neighbors = [] start = indptr[seed].item() end = indptr[seed + 1].item() for j in range(start, end): neighbor = indices[j].item() if ( use_node_timestamp and (node_timestamp[neighbor] >= seed_timestamp[i]).item() ): continue if ( use_edge_timestamp and (edge_timestamp[j] >= seed_timestamp[i]).item() ): continue neighbors.append(neighbor) available_neighbors.append(neighbors) return available_neighbors nodes = torch.tensor(seed_list, dtype=indices_dtype, device=F.ctx()) subgraph = sampler( nodes, seed_timestamp, fanouts, replace=replace, node_timestamp_attr_name="timestamp" if use_node_timestamp else None, edge_timestamp_attr_name="timestamp" if use_edge_timestamp else None, ) sampled_count = torch.diff(subgraph.sampled_csc.indptr).tolist() available_neighbors = _get_available_neighbors() assert len(available_neighbors) == len(sampled_count) for i, count in enumerate(sampled_count): if not replace: expect_count = min(fanouts[0], len(available_neighbors[i])) else: expect_count = fanouts[0] if len(available_neighbors[i]) > 0 else 0 assert count == expect_count sampled_neighbors = torch.split(subgraph.sampled_csc.indices, sampled_count) for i, neighbors in enumerate(sampled_neighbors): assert set(neighbors.tolist()).issubset(set(available_neighbors[i])) @pytest.mark.parametrize("indptr_dtype", [torch.int32, torch.int64]) @pytest.mark.parametrize("indices_dtype", [torch.int32, torch.int64]) @pytest.mark.parametrize("replace", [False, True]) @pytest.mark.parametrize("labor", [False, True]) @pytest.mark.parametrize("use_node_timestamp", [False, True]) @pytest.mark.parametrize("use_edge_timestamp", [False, True]) def test_temporal_sample_neighbors_hetero( indptr_dtype, indices_dtype, replace, labor, use_node_timestamp, use_edge_timestamp, ): if replace and F._default_context_str == "gpu": pytest.skip("Sampling with replacement not yet implemented on the GPU.") """Original graph in COO: "n1:e1:n2":[0, 0, 1, 1, 1], [0, 2, 0, 1, 2] "n2:e2:n1":[0, 0, 1, 2], [0, 1, 1 ,0] 0 0 1 0 1 0 0 1 1 1 1 1 0 0 0 0 1 0 0 0 1 0 0 0 0 """ # Initialize data. ntypes = {"n1": 0, "n2": 1} etypes = {"n1:e1:n2": 0, "n2:e2:n1": 1} ntypes_to_offset = {"n1": 0, "n2": 2} total_num_nodes = 5 total_num_edges = 9 indptr = torch.tensor([0, 2, 4, 6, 7, 9], dtype=indptr_dtype) indices = torch.tensor([2, 4, 2, 3, 0, 1, 1, 0, 1], dtype=indices_dtype) type_per_edge = torch.LongTensor([1, 1, 1, 1, 0, 0, 0, 0, 0]) node_type_offset = torch.LongTensor([0, 2, 5]) assert indptr[-1] == total_num_edges assert indptr[-1] == len(indices) # Construct FusedCSCSamplingGraph. graph = gb.fused_csc_sampling_graph( indptr, indices, node_type_offset=node_type_offset, type_per_edge=type_per_edge, node_type_to_id=ntypes, edge_type_to_id=etypes, ).to(F.ctx()) # Generate subgraph via sample neighbors. fanouts = torch.LongTensor([-1, -1]) sampler = ( graph.temporal_sample_layer_neighbors if labor else graph.temporal_sample_neighbors ) seeds = { "n1": torch.tensor([0], dtype=indices_dtype, device=F.ctx()), "n2": torch.tensor([0], dtype=indices_dtype, device=F.ctx()), } per_etype_destination_nodes = { "n1:e1:n2": torch.tensor([1], dtype=indices_dtype), "n2:e2:n1": torch.tensor([0], dtype=indices_dtype), } seed_timestamp = { "n1": torch.randint(0, 100, (1,), dtype=torch.int64, device=F.ctx()), "n2": torch.randint(0, 100, (1,), dtype=torch.int64, device=F.ctx()), } if use_node_timestamp: node_timestamp = torch.randint( 0, 100, (total_num_nodes,), dtype=torch.int64, device=F.ctx() ) graph.node_attributes = {"timestamp": node_timestamp} if use_edge_timestamp: edge_timestamp = torch.randint( 0, 100, (total_num_edges,), dtype=torch.int64, device=F.ctx() ) graph.edge_attributes = {"timestamp": edge_timestamp} subgraph = sampler( seeds, seed_timestamp, fanouts, replace=replace, node_timestamp_attr_name="timestamp" if use_node_timestamp else None, edge_timestamp_attr_name="timestamp" if use_edge_timestamp else None, ) def _to_homo(): ret_seeds, ret_timestamps = [], [] for ntype, nodes in seeds.items(): ntype_id = ntypes[ntype] offset = node_type_offset[ntype_id] ret_seeds.append(nodes + offset) ret_timestamps.append(seed_timestamp[ntype]) return torch.cat(ret_seeds), torch.cat(ret_timestamps) homo_seeds, homo_seed_timestamp = _to_homo() def _get_available_neighbors(): available_neighbors = [] for i, seed in enumerate(homo_seeds): neighbors = [] start = indptr[seed].item() end = indptr[seed + 1].item() for j in range(start, end): neighbor = indices[j].item() if ( use_node_timestamp and ( node_timestamp[neighbor] >= homo_seed_timestamp[i] ).item() ): continue if ( use_edge_timestamp and (edge_timestamp[j] >= homo_seed_timestamp[i]).item() ): continue neighbors.append(neighbor) available_neighbors.append(neighbors) return available_neighbors available_neighbors = _get_available_neighbors() sampled_count = [0] * homo_seeds.numel() sampled_neighbors = [[] for _ in range(homo_seeds.numel())] for etype, csc in subgraph.sampled_csc.items(): stype, _, _ = etype_str_to_tuple(etype) ntype_offset = ntypes_to_offset[stype] dest_nodes = per_etype_destination_nodes[etype] for i in range(dest_nodes.numel()): l = csc.indptr[i] r = csc.indptr[i + 1] seed_offset = dest_nodes[i].item() sampled_neighbors[seed_offset].extend( (csc.indices[l:r] + ntype_offset).tolist() ) sampled_count[seed_offset] += r - l for i, count in enumerate(sampled_count): assert count == len(available_neighbors[i]) assert set(sampled_neighbors[i]).issubset(set(available_neighbors[i])) def check_tensors_on_the_same_shared_memory(t1: torch.Tensor, t2: torch.Tensor): """Check if two tensors are on the same shared memory. This function copies a random tensor value to `t1` and checks whether `t2` holds the same random value and checks whether t2 is a distinct tensor object from `t1`. Their equality confirms that they are separate tensors that rely on the shared memory for their tensor value. """ assert t1.data_ptr() != t2.data_ptr() old_t1 = t1.clone() v = torch.randint_like(t1, 100) t1[:] = v assert torch.equal(t1, t2) t1[:] = old_t1 def check_node_edge_attributes(graph1, graph2, attributes, attr_name): for name, attr in attributes.items(): edge_attributes_1 = getattr(graph1, attr_name) edge_attributes_2 = getattr(graph2, attr_name) assert name in edge_attributes_1 assert name in edge_attributes_2 assert torch.equal(edge_attributes_1[name], attr) check_tensors_on_the_same_shared_memory( edge_attributes_1[name], edge_attributes_2[name] ) @unittest.skipIf( F._default_context_str == "gpu", reason="FusedCSCSamplingGraph is only supported on CPU.", ) @pytest.mark.parametrize( "total_num_nodes, total_num_edges", [(1, 1), (100, 1), (10, 50), (1000, 50000)], ) @pytest.mark.parametrize("test_node_attrs", [True, False]) @pytest.mark.parametrize("test_edge_attrs", [True, False]) def test_homo_graph_on_shared_memory( total_num_nodes, total_num_edges, test_node_attrs, test_edge_attrs ): csc_indptr, indices = gbt.random_homo_graph( total_num_nodes, total_num_edges ) node_attributes = None if test_node_attrs: node_attributes = { "A1": torch.arange(total_num_nodes), "A2": torch.arange(total_num_nodes), } edge_attributes = None if test_edge_attrs: edge_attributes = { "A1": torch.randn(total_num_edges), "A2": torch.randn(total_num_edges), } graph = gb.fused_csc_sampling_graph( csc_indptr, indices, node_attributes=node_attributes, edge_attributes=edge_attributes, ) shm_name = "test_homo_g" graph1 = graph.copy_to_shared_memory(shm_name) graph2 = gb.load_from_shared_memory(shm_name) assert graph1.total_num_nodes == total_num_nodes assert graph1.total_num_nodes == total_num_nodes assert graph2.total_num_edges == total_num_edges assert graph2.total_num_edges == total_num_edges # Test the value of graph1 is correct assert torch.equal(graph1.csc_indptr, csc_indptr) assert torch.equal(graph1.indices, indices) # Test the value of graph2 is correct assert torch.equal(graph2.csc_indptr, csc_indptr) assert torch.equal(graph2.indices, indices) # Test the memory of graph1 and graph2 is on shared memory check_tensors_on_the_same_shared_memory( graph1.csc_indptr, graph2.csc_indptr ) check_tensors_on_the_same_shared_memory(graph1.indices, graph2.indices) if test_node_attrs: check_node_edge_attributes( graph1, graph2, node_attributes, "node_attributes" ) if test_edge_attrs: check_node_edge_attributes( graph1, graph2, edge_attributes, "edge_attributes" ) assert graph1.node_type_offset is None and graph2.node_type_offset is None assert graph1.type_per_edge is None and graph2.type_per_edge is None assert graph1.node_type_to_id is None and graph2.node_type_to_id is None assert graph1.edge_type_to_id is None and graph2.edge_type_to_id is None @unittest.skipIf( F._default_context_str == "gpu", reason="FusedCSCSamplingGraph is only supported on CPU.", ) @pytest.mark.parametrize( "total_num_nodes, total_num_edges", [(1, 1), (100, 1), (10, 50), (1000, 50 * 1000), (10 * 1000, 100 * 1000)], ) @pytest.mark.parametrize( "num_ntypes, num_etypes", [(1, 1), (3, 5), (100, 1), (1000, 1000)] ) @pytest.mark.parametrize("test_node_attrs", [True, False]) @pytest.mark.parametrize("test_edge_attrs", [True, False]) def test_hetero_graph_on_shared_memory( total_num_nodes, total_num_edges, num_ntypes, num_etypes, test_node_attrs, test_edge_attrs, ): ( csc_indptr, indices, node_type_offset, type_per_edge, node_type_to_id, edge_type_to_id, ) = gbt.random_hetero_graph( total_num_nodes, total_num_edges, num_ntypes, num_etypes ) node_attributes = None if test_node_attrs: node_attributes = { "A1": torch.arange(total_num_nodes), "A2": torch.arange(total_num_nodes), } edge_attributes = None if test_edge_attrs: edge_attributes = { "A1": torch.randn(total_num_edges), "A2": torch.randn(total_num_edges), } graph = gb.fused_csc_sampling_graph( csc_indptr, indices, node_type_offset=node_type_offset, type_per_edge=type_per_edge, node_type_to_id=node_type_to_id, edge_type_to_id=edge_type_to_id, node_attributes=node_attributes, edge_attributes=edge_attributes, ) shm_name = "test_hetero_g" graph1 = graph.copy_to_shared_memory(shm_name) graph2 = gb.load_from_shared_memory(shm_name) assert graph1.total_num_nodes == total_num_nodes assert graph1.total_num_nodes == total_num_nodes assert graph2.total_num_edges == total_num_edges assert graph2.total_num_edges == total_num_edges # Test the value of graph1 is correct assert torch.equal(graph1.csc_indptr, csc_indptr) assert torch.equal(graph1.indices, indices) assert torch.equal(graph1.node_type_offset, node_type_offset) assert torch.equal(graph1.type_per_edge, type_per_edge) # Test the value of graph2 is correct assert torch.equal(graph2.csc_indptr, csc_indptr) assert torch.equal(graph2.indices, indices) assert torch.equal(graph2.node_type_offset, node_type_offset) assert torch.equal(graph2.type_per_edge, type_per_edge) # Test the memory of graph1 and graph2 is on shared memory check_tensors_on_the_same_shared_memory( graph1.csc_indptr, graph2.csc_indptr ) check_tensors_on_the_same_shared_memory(graph1.indices, graph2.indices) check_tensors_on_the_same_shared_memory( graph1.node_type_offset, graph2.node_type_offset ) check_tensors_on_the_same_shared_memory( graph1.type_per_edge, graph2.type_per_edge ) if test_node_attrs: check_node_edge_attributes( graph1, graph2, node_attributes, "node_attributes" ) if test_edge_attrs: check_node_edge_attributes( graph1, graph2, edge_attributes, "edge_attributes" ) assert node_type_to_id == graph1.node_type_to_id assert edge_type_to_id == graph1.edge_type_to_id assert node_type_to_id == graph2.node_type_to_id assert edge_type_to_id == graph2.edge_type_to_id def process_csc_sampling_graph_on_shared_memory(graph, data_queue, flag_queue): # Backup the attributes. csc_indptr = graph.csc_indptr.clone() indices = graph.indices.clone() node_type_offset = graph.node_type_offset.clone() type_per_edge = graph.type_per_edge.clone() # Change the value to random integers. Send the new value to the main # process. v = torch.randint_like(graph.csc_indptr, 100) graph.csc_indptr[:] = v data_queue.put(v.clone()) v = torch.randint_like(graph.indices, 100) graph.indices[:] = v data_queue.put(v.clone()) v = torch.randint_like(graph.node_type_offset, 100) graph.node_type_offset[:] = v data_queue.put(v.clone()) v = torch.randint_like(graph.type_per_edge, 100) graph.type_per_edge[:] = v data_queue.put(v.clone()) # Wait for the main process to finish. flag_queue.get() graph.csc_indptr[:] = csc_indptr graph.indices[:] = indices graph.node_type_offset[:] = node_type_offset graph.type_per_edge[:] = type_per_edge @unittest.skipIf( F._default_context_str == "gpu", reason="Graph is CPU only at present.", ) def test_multiprocessing_with_shared_memory(): """Test if two CSCSamplingGraphs are on the same shared memory after spawning. For now this code only works when the sharing strategy of torch.multiprocessing is set to `file_system` at the beginning. The cause is still yet to be found. """ total_num_nodes = 5 total_num_edges = 10 num_ntypes = 2 num_etypes = 3 ( csc_indptr, indices, node_type_offset, type_per_edge, node_type_to_id, edge_type_to_id, ) = gbt.random_hetero_graph( total_num_nodes, total_num_edges, num_ntypes, num_etypes ) csc_indptr.share_memory_() indices.share_memory_() node_type_offset.share_memory_() type_per_edge.share_memory_() graph = gb.fused_csc_sampling_graph( csc_indptr, indices, node_type_offset=node_type_offset, type_per_edge=type_per_edge, node_type_to_id=node_type_to_id, edge_type_to_id=edge_type_to_id, edge_attributes=None, ) ctx = mp.get_context("spawn") # Use spawn method. data_queue = ctx.Queue() # Used for sending graph. flag_queue = ctx.Queue() # Used for sending finish signal. p = ctx.Process( target=process_csc_sampling_graph_on_shared_memory, args=(graph, data_queue, flag_queue), ) p.start() try: # Get data from the other process. Then check if the tensors here have # the same data. csc_indptr2 = data_queue.get() assert torch.equal(graph.csc_indptr, csc_indptr2) indices2 = data_queue.get() assert torch.equal(graph.indices, indices2) node_type_offset2 = data_queue.get() assert torch.equal(graph.node_type_offset, node_type_offset2) type_per_edge2 = data_queue.get() assert torch.equal(graph.type_per_edge, type_per_edge2) except: raise finally: # Send a finish signal to end sub-process. flag_queue.put(None) p.join() @unittest.skipIf( F._default_context_str == "gpu", reason="Graph on GPU is not supported yet.", ) def test_from_dglgraph_homogeneous(): dgl_g = dgl.rand_graph(1000, 10 * 1000) # Check if the original edge id exist in edge attributes when the # original_edge_id is set to False. gb_g = gb.from_dglgraph( dgl_g, is_homogeneous=False, include_original_edge_id=False ) assert ( gb_g.edge_attributes is None or gb.ORIGINAL_EDGE_ID not in gb_g.edge_attributes ) gb_g = gb.from_dglgraph( dgl_g, is_homogeneous=True, include_original_edge_id=True ) # Get the COO representation of the FusedCSCSamplingGraph. num_columns = gb_g.csc_indptr.diff() rows = gb_g.indices columns = torch.arange(gb_g.total_num_nodes).repeat_interleave(num_columns) original_edge_ids = gb_g.edge_attributes[gb.ORIGINAL_EDGE_ID] assert torch.all(dgl_g.edges()[0][original_edge_ids] == rows) assert torch.all(dgl_g.edges()[1][original_edge_ids] == columns) assert gb_g.total_num_nodes == dgl_g.num_nodes() assert gb_g.total_num_edges == dgl_g.num_edges() assert gb_g.node_type_offset is None assert gb_g.type_per_edge is None assert gb_g.node_type_to_id is None assert gb_g.edge_type_to_id is None @unittest.skipIf( F._default_context_str == "gpu", reason="Graph on GPU is not supported yet.", ) def test_from_dglgraph_heterogeneous(): dgl_g = dgl.heterograph( { ("author", "writes", "paper"): ( [1, 2, 3, 4, 5, 2], [1, 2, 3, 4, 5, 4], ), ("author", "affiliated_with", "institution"): ( [1, 2, 3, 4, 5], [1, 2, 3, 4, 5], ), ("paper", "has_topic", "field"): ([1, 2, 3, 4, 5], [1, 2, 3, 4, 5]), ("paper", "cites", "paper"): ( [2, 3, 4, 5, 6, 1], [1, 2, 3, 4, 5, 4], ), } ) # Check if the original edge id exist in edge attributes when the # original_edge_id is set to False. gb_g = gb.from_dglgraph( dgl_g, is_homogeneous=False, include_original_edge_id=False ) assert ( gb_g.edge_attributes is None or gb.ORIGINAL_EDGE_ID not in gb_g.edge_attributes ) gb_g = gb.from_dglgraph( dgl_g, is_homogeneous=False, include_original_edge_id=True ) # `reverse_node_id` is used to map the node id in FusedCSCSamplingGraph to the # node id in Hetero-DGLGraph. num_ntypes = gb_g.node_type_offset.diff() reverse_node_id = torch.cat([torch.arange(num) for num in num_ntypes]) # Get the COO representation of the FusedCSCSamplingGraph. num_columns = gb_g.csc_indptr.diff() rows = reverse_node_id[gb_g.indices] columns = reverse_node_id[ torch.arange(gb_g.total_num_nodes).repeat_interleave(num_columns) ] # Check the order of etypes in DGLGraph is the same as FusedCSCSamplingGraph. assert ( # Since the etypes in FusedCSCSamplingGraph is "srctype:etype:dsttype", # we need to split the string and get the middle part. list( map( lambda ss: ss.split(":")[1], gb_g.edge_type_to_id.keys(), ) ) == dgl_g.etypes ) # Use ORIGINAL_EDGE_ID to check if the edge mapping is correct. for edge_idx in range(gb_g.total_num_edges): hetero_graph_idx = gb_g.type_per_edge[edge_idx] original_edge_id = gb_g.edge_attributes[gb.ORIGINAL_EDGE_ID][edge_idx] edge_type = dgl_g.etypes[hetero_graph_idx] dgl_edge_pairs = dgl_g.edges(etype=edge_type) assert dgl_edge_pairs[0][original_edge_id] == rows[edge_idx] assert dgl_edge_pairs[1][original_edge_id] == columns[edge_idx] assert gb_g.total_num_nodes == dgl_g.num_nodes() assert gb_g.total_num_edges == dgl_g.num_edges() assert torch.equal(gb_g.node_type_offset, torch.tensor([0, 6, 12, 18, 25])) assert torch.equal( gb_g.type_per_edge, torch.tensor( [3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 1, 2, 1, 2, 1, 2, 1, 1, 2, 2, 1, 2] ), ) assert gb_g.node_type_to_id == { "author": 0, "field": 1, "institution": 2, "paper": 3, } assert gb_g.edge_type_to_id == { "author:affiliated_with:institution": 0, "author:writes:paper": 1, "paper:cites:paper": 2, "paper:has_topic:field": 3, } def create_fused_csc_sampling_graph(): # Initialize data. total_num_nodes = 10 total_num_edges = 9 ntypes = {"N0": 0, "N1": 1, "N2": 2, "N3": 3} etypes = { "N0:R0:N1": 0, "N0:R1:N2": 1, "N0:R2:N3": 2, } indptr = torch.LongTensor([0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9]) indices = torch.LongTensor([1, 2, 3, 4, 5, 6, 7, 8, 9]) node_type_offset = torch.LongTensor([0, 1, 4, 7, 10]) type_per_edge = torch.LongTensor([0, 0, 0, 1, 1, 1, 2, 2, 2]) assert indptr[-1] == total_num_edges assert indptr[-1] == len(indices) assert node_type_offset[-1] == total_num_nodes assert all(type_per_edge < len(etypes)) edge_attributes = { "mask": torch.BoolTensor([1, 1, 0, 1, 1, 1, 0, 0, 0]), "all": torch.BoolTensor([1, 1, 1, 1, 1, 1, 1, 1, 1]), "zero": torch.BoolTensor([0, 0, 0, 0, 0, 0, 0, 0, 0]), } # Construct FusedCSCSamplingGraph. return gb.fused_csc_sampling_graph( indptr, indices, edge_attributes=edge_attributes, node_type_offset=node_type_offset, type_per_edge=type_per_edge, node_type_to_id=ntypes, edge_type_to_id=etypes, ) def is_graph_on_device_type(graph, device_type): assert graph.csc_indptr.device.type == device_type assert graph.indices.device.type == device_type assert graph.node_type_offset.device.type == device_type assert graph.type_per_edge.device.type == device_type assert graph.csc_indptr.device.type == device_type for key in graph.edge_attributes: assert graph.edge_attributes[key].device.type == device_type def is_graph_pinned(graph): assert graph.csc_indptr.is_pinned() assert graph.indices.is_pinned() assert graph.node_type_offset.is_pinned() assert graph.type_per_edge.is_pinned() assert graph.csc_indptr.is_pinned() for key in graph.edge_attributes: assert graph.edge_attributes[key].is_pinned() @unittest.skipIf( F._default_context_str == "cpu", reason="`to` function needs GPU to test.", ) @pytest.mark.parametrize("device", ["pinned", "cuda"]) def test_csc_sampling_graph_to_device(device): # Construct FusedCSCSamplingGraph. graph = create_fused_csc_sampling_graph() # Copy to device. graph2 = graph.to(device) if device == "cuda": is_graph_on_device_type(graph2, "cuda") elif device == "pinned": is_graph_on_device_type(graph2, "cpu") is_graph_pinned(graph2) # The original variable should be untouched. is_graph_on_device_type(graph, "cpu") @unittest.skipIf( F._default_context_str == "cpu", reason="Tests for pinned memory are only meaningful on GPU.", ) @unittest.skipIf( gb.is_wsl(), reason="In place pinning is not supported on WSL." ) def test_csc_sampling_graph_to_pinned_memory(): # Construct FusedCSCSamplingGraph. graph = create_fused_csc_sampling_graph() ptr = graph.csc_indptr.data_ptr() # Copy to pinned_memory in-place. graph.pin_memory_() # Check if pinning is truly in-place. assert graph.csc_indptr.data_ptr() == ptr is_graph_on_device_type(graph, "cpu") is_graph_pinned(graph) @pytest.mark.parametrize("indptr_dtype", [torch.int32, torch.int64]) @pytest.mark.parametrize("indices_dtype", [torch.int32, torch.int64]) @pytest.mark.parametrize("labor", [False, True]) @pytest.mark.parametrize("is_pinned", [False, True]) @pytest.mark.parametrize("nodes", [None, True]) def test_sample_neighbors_homo( indptr_dtype, indices_dtype, labor, is_pinned, nodes ): if is_pinned and nodes is None: pytest.skip("Optional nodes and is_pinned is not supported together.") """Original graph in COO: 1 0 1 0 1 1 0 1 1 0 0 1 0 1 0 0 1 0 0 1 1 0 0 0 1 """ if F._default_context_str == "cpu" and is_pinned: pytest.skip("Pinning is not meaningful without a GPU.") # Initialize data. total_num_edges = 12 indptr = torch.tensor([0, 3, 5, 7, 9, 12], dtype=indptr_dtype) indices = torch.tensor( [0, 1, 4, 2, 3, 0, 1, 1, 2, 0, 3, 4], dtype=indices_dtype ) assert indptr[-1] == total_num_edges assert indptr[-1] == len(indices) # Construct FusedCSCSamplingGraph. graph = gb.fused_csc_sampling_graph(indptr, indices).to( "pinned" if is_pinned else F.ctx() ) # Generate subgraph via sample neighbors. if nodes: nodes = torch.tensor([1, 3, 4], dtype=indices_dtype).to(F.ctx()) elif F._default_context_str != "gpu": pytest.skip("Optional nodes is supported only for the GPU.") sampler = graph.sample_layer_neighbors if labor else graph.sample_neighbors subgraph = sampler(nodes, fanouts=torch.LongTensor([2])) # Verify in subgraph. sampled_indptr_num = subgraph.sampled_csc.indptr.size(0) sampled_num = subgraph.sampled_csc.indices.size(0) assert sampled_num == len(subgraph.original_edge_ids) if nodes is None: assert sampled_indptr_num == indptr.shape[0] assert sampled_num == 10 else: assert sampled_indptr_num == 4 assert sampled_num == 6 assert subgraph.original_column_node_ids is None assert subgraph.original_row_node_ids is None @pytest.mark.parametrize("labor", [False, True]) def test_sample_neighbors_hetero_single_fanout(labor): u, i = torch.randint(20, size=(1000,)), torch.randint(10, size=(1000,)) graph = dgl.heterograph({("u", "w", "i"): (u, i), ("i", "b", "u"): (i, u)}) graph = gb.from_dglgraph(graph).to(F.ctx()) sampler = graph.sample_layer_neighbors if labor else graph.sample_neighbors for i in range(11): nodes = {"u": torch.randint(10, (100,), device=F.ctx())} sampler(nodes, fanouts=torch.tensor([-1])) # Should reach here without crashing. @pytest.mark.parametrize("indptr_dtype", [torch.int32, torch.int64]) @pytest.mark.parametrize("indices_dtype", [torch.int32, torch.int64]) @pytest.mark.parametrize("labor", [False, True]) def test_sample_neighbors_hetero(indptr_dtype, indices_dtype, labor): """Original graph in COO: "n1:e1:n2":[0, 0, 1, 1, 1], [0, 2, 0, 1, 2] "n2:e2:n1":[0, 0, 1, 2], [0, 1, 1 ,0] 0 0 1 0 1 0 0 1 1 1 1 1 0 0 0 0 1 0 0 0 1 0 0 0 0 """ # Initialize data. ntypes = {"n1": 0, "n2": 1} etypes = {"n1:e1:n2": 0, "n2:e2:n1": 1} total_num_edges = 9 indptr = torch.tensor([0, 2, 4, 6, 7, 9], dtype=indptr_dtype) indices = torch.tensor([2, 4, 2, 3, 0, 1, 1, 0, 1], dtype=indices_dtype) type_per_edge = torch.tensor( [1, 1, 1, 1, 0, 0, 0, 0, 0], dtype=indices_dtype ) node_type_offset = torch.tensor([0, 2, 5], dtype=indices_dtype) assert indptr[-1] == total_num_edges assert indptr[-1] == len(indices) # Construct FusedCSCSamplingGraph. graph = gb.fused_csc_sampling_graph( indptr, indices, node_type_offset=node_type_offset, type_per_edge=type_per_edge, node_type_to_id=ntypes, edge_type_to_id=etypes, ).to(F.ctx()) # Sample on both node types. nodes = { "n1": torch.tensor([0], dtype=indices_dtype, device=F.ctx()), "n2": torch.tensor([0], dtype=indices_dtype, device=F.ctx()), } fanouts = torch.tensor([-1, -1]) sampler = graph.sample_layer_neighbors if labor else graph.sample_neighbors subgraph = sampler(nodes, fanouts) # Verify in subgraph. expected_sampled_csc = { "n1:e1:n2": gb.CSCFormatBase( indptr=torch.tensor([0, 2], device=F.ctx()), indices=torch.tensor([0, 1], device=F.ctx()), ), "n2:e2:n1": gb.CSCFormatBase( indptr=torch.tensor([0, 2], device=F.ctx()), indices=torch.tensor([0, 2], device=F.ctx()), ), } assert len(subgraph.sampled_csc) == 2 for etype, pairs in expected_sampled_csc.items(): assert torch.equal(subgraph.sampled_csc[etype].indptr, pairs.indptr) assert torch.equal( subgraph.sampled_csc[etype].indices.sort()[0], pairs.indices ) assert len(pairs.indices) == len(subgraph.original_edge_ids[etype]) assert subgraph.original_column_node_ids is None assert subgraph.original_row_node_ids is None # Sample on single node type. nodes = {"n1": torch.tensor([0], dtype=indices_dtype, device=F.ctx())} fanouts = torch.tensor([-1, -1]) sampler = graph.sample_layer_neighbors if labor else graph.sample_neighbors subgraph = sampler(nodes, fanouts) # Verify in subgraph. expected_sampled_csc = { "n1:e1:n2": gb.CSCFormatBase( indptr=torch.tensor([0], device=F.ctx()), indices=torch.tensor([], device=F.ctx()), ), "n2:e2:n1": gb.CSCFormatBase( indptr=torch.tensor([0, 2], device=F.ctx()), indices=torch.tensor([0, 2], device=F.ctx()), ), } assert len(subgraph.sampled_csc) == 2 for etype, pairs in expected_sampled_csc.items(): assert torch.equal(subgraph.sampled_csc[etype].indptr, pairs.indptr) assert torch.equal( subgraph.sampled_csc[etype].indices.sort()[0], pairs.indices ) assert len(pairs.indices) == len(subgraph.original_edge_ids[etype]) assert subgraph.original_column_node_ids is None assert subgraph.original_row_node_ids is None @pytest.mark.parametrize( "fanouts, expected_sampled_num1, expected_sampled_num2", [ ([0], 0, 0), ([1], 1, 1), ([2], 2, 2), ([4], 2, 2), ([-1], 2, 2), ([0, 0], 0, 0), ([1, 0], 1, 0), ([0, 1], 0, 1), ([1, 1], 1, 1), ([2, 1], 2, 1), ([-1, -1], 2, 2), ], ) @pytest.mark.parametrize("labor", [False, True]) def test_sample_neighbors_fanouts( fanouts, expected_sampled_num1, expected_sampled_num2, labor ): """Original graph in COO: "n1:e1:n2":[0, 0, 1, 1, 1], [0, 2, 0, 1, 2] "n2:e2:n1":[0, 0, 1, 2], [0, 1, 1 ,0] 0 0 1 0 1 0 0 1 1 1 1 1 0 0 0 0 1 0 0 0 1 0 0 0 0 """ # Initialize data. ntypes = {"n1": 0, "n2": 1} etypes = {"n1:e1:n2": 0, "n2:e2:n1": 1} total_num_edges = 9 indptr = torch.LongTensor([0, 2, 4, 6, 7, 9]) indices = torch.LongTensor([2, 4, 2, 3, 0, 1, 1, 0, 1]) type_per_edge = torch.LongTensor([1, 1, 1, 1, 0, 0, 0, 0, 0]) node_type_offset = torch.LongTensor([0, 2, 5]) assert indptr[-1] == total_num_edges assert indptr[-1] == len(indices) # Construct FusedCSCSamplingGraph. graph = gb.fused_csc_sampling_graph( indptr, indices, node_type_offset=node_type_offset, type_per_edge=type_per_edge, node_type_to_id=ntypes, edge_type_to_id=etypes, ).to(F.ctx()) nodes = { "n1": torch.tensor([0], device=F.ctx()), "n2": torch.tensor([0], device=F.ctx()), } fanouts = torch.LongTensor(fanouts) sampler = graph.sample_layer_neighbors if labor else graph.sample_neighbors subgraph = sampler(nodes, fanouts) # Verify in subgraph. assert ( expected_sampled_num1 == 0 or subgraph.sampled_csc["n1:e1:n2"].indices.numel() == expected_sampled_num1 ) assert subgraph.sampled_csc["n1:e1:n2"].indptr.size(0) == 2 assert ( expected_sampled_num2 == 0 or subgraph.sampled_csc["n2:e2:n1"].indices.numel() == expected_sampled_num2 ) assert subgraph.sampled_csc["n2:e2:n1"].indptr.size(0) == 2 @pytest.mark.parametrize( "replace, expected_sampled_num1, expected_sampled_num2", [(False, 2, 2), (True, 4, 4)], ) def test_sample_neighbors_replace( replace, expected_sampled_num1, expected_sampled_num2 ): if F._default_context_str == "gpu" and replace == True: pytest.skip("Sampling with replacement not yet supported on GPU.") """Original graph in COO: "n1:e1:n2":[0, 0, 1, 1, 1], [0, 2, 0, 1, 2] "n2:e2:n1":[0, 0, 1, 2], [0, 1, 1 ,0] 0 0 1 0 1 0 0 1 1 1 1 1 0 0 0 0 1 0 0 0 1 0 0 0 0 """ # Initialize data. ntypes = {"n1": 0, "n2": 1} etypes = {"n1:e1:n2": 0, "n2:e2:n1": 1} total_num_edges = 9 indptr = torch.LongTensor([0, 2, 4, 6, 7, 9]) indices = torch.LongTensor([2, 4, 2, 3, 0, 1, 1, 0, 1]) type_per_edge = torch.LongTensor([1, 1, 1, 1, 0, 0, 0, 0, 0]) node_type_offset = torch.LongTensor([0, 2, 5]) assert indptr[-1] == total_num_edges assert indptr[-1] == len(indices) # Construct FusedCSCSamplingGraph. graph = gb.fused_csc_sampling_graph( indptr, indices, node_type_offset=node_type_offset, type_per_edge=type_per_edge, node_type_to_id=ntypes, edge_type_to_id=etypes, ) nodes = {"n1": torch.LongTensor([0]), "n2": torch.LongTensor([0])} subgraph = graph.sample_neighbors( nodes, torch.LongTensor([4]), replace=replace ) # Verify in subgraph. assert ( subgraph.sampled_csc["n1:e1:n2"].indices.numel() == expected_sampled_num1 ) assert subgraph.sampled_csc["n1:e1:n2"].indptr.size(0) == 2 assert ( subgraph.sampled_csc["n2:e2:n1"].indices.numel() == expected_sampled_num2 ) assert subgraph.sampled_csc["n2:e2:n1"].indptr.size(0) == 2 @pytest.mark.parametrize("labor", [False, True]) @pytest.mark.parametrize("is_pinned", [False, True]) def test_sample_neighbors_return_eids_homo(labor, is_pinned): """Original graph in COO: 1 0 1 0 1 1 0 1 1 0 0 1 0 1 0 0 1 0 0 1 1 0 0 0 1 """ if F._default_context_str == "cpu" and is_pinned: pytest.skip("Pinning is not meaningful without a GPU.") # Initialize data. total_num_edges = 12 indptr = torch.LongTensor([0, 3, 5, 7, 9, 12]) indices = torch.LongTensor([0, 1, 4, 2, 3, 0, 1, 1, 2, 0, 3, 4]) assert indptr[-1] == total_num_edges assert indptr[-1] == len(indices) # Add edge id mapping from CSC graph -> original graph. edge_attributes = {gb.ORIGINAL_EDGE_ID: torch.randperm(total_num_edges)} # Construct FusedCSCSamplingGraph. graph = gb.fused_csc_sampling_graph( indptr, indices, edge_attributes=edge_attributes ).to("pinned" if is_pinned else F.ctx()) # Generate subgraph via sample neighbors. nodes = torch.LongTensor([1, 3, 4]).to(F.ctx()) sampler = graph.sample_layer_neighbors if labor else graph.sample_neighbors subgraph = sampler(nodes, fanouts=torch.LongTensor([-1])) # Verify in subgraph. expected_reverse_edge_ids = edge_attributes[gb.ORIGINAL_EDGE_ID][ torch.tensor([3, 4, 7, 8, 9, 10, 11]) ].to(F.ctx()) assert torch.equal( torch.sort(expected_reverse_edge_ids)[0], torch.sort(subgraph.original_edge_ids)[0], ) assert subgraph.original_column_node_ids is None assert subgraph.original_row_node_ids is None @pytest.mark.parametrize("labor", [False, True]) def test_sample_neighbors_return_eids_hetero(labor): """ Original graph in COO: "n1:e1:n2":[0, 0, 1, 1, 1], [0, 2, 0, 1, 2] "n2:e2:n1":[0, 0, 1, 2], [0, 1, 1 ,0] 0 0 1 0 1 0 0 1 1 1 1 1 0 0 0 0 1 0 0 0 1 0 0 0 0 """ # Initialize data. ntypes = {"n1": 0, "n2": 1} etypes = {"n1:e1:n2": 0, "n2:e2:n1": 1} total_num_edges = 9 indptr = torch.LongTensor([0, 2, 4, 6, 7, 9]) indices = torch.LongTensor([2, 4, 2, 3, 0, 1, 1, 0, 1]) type_per_edge = torch.LongTensor([1, 1, 1, 1, 0, 0, 0, 0, 0]) node_type_offset = torch.LongTensor([0, 2, 5]) edge_attributes = { gb.ORIGINAL_EDGE_ID: torch.cat([torch.randperm(4), torch.randperm(5)]) } assert indptr[-1] == total_num_edges assert indptr[-1] == len(indices) # Construct FusedCSCSamplingGraph. graph = gb.fused_csc_sampling_graph( indptr, indices, node_type_offset=node_type_offset, type_per_edge=type_per_edge, edge_attributes=edge_attributes, node_type_to_id=ntypes, edge_type_to_id=etypes, ).to(F.ctx()) # Sample on both node types. nodes = { "n1": torch.LongTensor([0]).to(F.ctx()), "n2": torch.LongTensor([0]).to(F.ctx()), } fanouts = torch.tensor([-1, -1]) sampler = graph.sample_layer_neighbors if labor else graph.sample_neighbors subgraph = sampler(nodes, fanouts) expected_reverse_edge_ids = { "n2:e2:n1": graph.edge_attributes[gb.ORIGINAL_EDGE_ID][ torch.tensor([0, 1], device=F.ctx()) ], "n1:e1:n2": graph.edge_attributes[gb.ORIGINAL_EDGE_ID][ torch.tensor([4, 5], device=F.ctx()) ], } assert subgraph.original_column_node_ids is None assert subgraph.original_row_node_ids is None for etype in etypes.keys(): assert torch.equal( subgraph.original_edge_ids[etype].sort()[0], expected_reverse_edge_ids[etype].sort()[0], ) @pytest.mark.parametrize("replace", [True, False]) @pytest.mark.parametrize("labor", [False, True]) @pytest.mark.parametrize("probs_name", ["weight", "mask"]) def test_sample_neighbors_probs(replace, labor, probs_name): if F._default_context_str == "gpu" and replace == True: pytest.skip("Sampling with replacement not yet supported on GPU.") """Original graph in COO: 1 0 1 0 1 1 0 1 1 0 0 1 0 1 0 0 1 0 0 1 1 0 0 0 1 """ # Initialize data. total_num_edges = 12 indptr = torch.LongTensor([0, 3, 5, 7, 9, 12]) indices = torch.LongTensor([0, 1, 4, 2, 3, 0, 1, 1, 2, 0, 3, 4]) assert indptr[-1] == total_num_edges assert indptr[-1] == len(indices) edge_attributes = { "weight": torch.FloatTensor( [2.5, 0, 8.4, 0, 0.4, 1.2, 2.5, 0, 8.4, 0.5, 0.4, 1.2] ), "mask": torch.BoolTensor([1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1]), } # Construct FusedCSCSamplingGraph. graph = gb.fused_csc_sampling_graph( indptr, indices, edge_attributes=edge_attributes ) # Generate subgraph via sample neighbors. nodes = torch.LongTensor([1, 3, 4]) sampler = graph.sample_layer_neighbors if labor else graph.sample_neighbors subgraph = sampler( nodes, fanouts=torch.tensor([2]), replace=replace, probs_name=probs_name, ) # Verify in subgraph. sampled_num = subgraph.sampled_csc.indices.size(0) assert subgraph.sampled_csc.indptr.size(0) == 4 if replace: assert sampled_num == 6 else: assert sampled_num == 4 @pytest.mark.parametrize("replace", [True, False]) @pytest.mark.parametrize("labor", [False, True]) @pytest.mark.parametrize( "probs_or_mask", [ torch.zeros(12, dtype=torch.float32), torch.zeros(12, dtype=torch.bool), ], ) def test_sample_neighbors_zero_probs(replace, labor, probs_or_mask): if F._default_context_str == "gpu" and replace == True: pytest.skip("Sampling with replacement not yet supported on GPU.") # Initialize data. total_num_nodes = 5 total_num_edges = 12 indptr = torch.LongTensor([0, 3, 5, 7, 9, 12]) indices = torch.LongTensor([0, 1, 4, 2, 3, 0, 1, 1, 2, 0, 3, 4]) assert indptr[-1] == total_num_edges assert indptr[-1] == len(indices) edge_attributes = {"probs_or_mask": probs_or_mask} # Construct FusedCSCSamplingGraph. graph = gb.fused_csc_sampling_graph( indptr, indices, edge_attributes=edge_attributes ) # Generate subgraph via sample neighbors. nodes = torch.LongTensor([1, 3, 4]) sampler = graph.sample_layer_neighbors if labor else graph.sample_neighbors subgraph = sampler( nodes, fanouts=torch.tensor([5]), replace=replace, probs_name="probs_or_mask", ) # Verify in subgraph. sampled_num = subgraph.sampled_csc.indices.size(0) assert subgraph.sampled_csc.indptr.size(0) == 4 assert sampled_num == 0 @pytest.mark.parametrize("replace", [False, True]) @pytest.mark.parametrize("labor", [False, True]) @pytest.mark.parametrize( "fanouts, probs_name", [ ([2], "mask"), ([3], "mask"), ([4], "mask"), ([-1], "mask"), ([7], "mask"), ([3], "all"), ([-1], "all"), ([7], "all"), ([3], "zero"), ([-1], "zero"), ([3], "none"), ([-1], "none"), ], ) def test_sample_neighbors_homo_pick_number(fanouts, replace, labor, probs_name): if F._default_context_str == "gpu" and replace == True: pytest.skip("Sampling with replacement not yet supported on GPU.") """Original graph in COO: 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 """ # Initialize data. total_num_edges = 6 indptr = torch.LongTensor([0, 6, 6, 6, 6, 6, 6]) indices = torch.LongTensor([0, 1, 2, 3, 4, 5]) assert indptr[-1] == total_num_edges assert indptr[-1] == len(indices) edge_attributes = { "mask": torch.BoolTensor([1, 0, 0, 1, 0, 1]), "all": torch.BoolTensor([1, 1, 1, 1, 1, 1]), "zero": torch.BoolTensor([0, 0, 0, 0, 0, 0]), } # Construct FusedCSCSamplingGraph. graph = gb.fused_csc_sampling_graph( indptr, indices, edge_attributes=edge_attributes ) # Generate subgraph via sample neighbors. nodes = torch.LongTensor([0, 1]) sampler = graph.sample_layer_neighbors if labor else graph.sample_neighbors # Make sure no exception will be thrown. subgraph = sampler( nodes, fanouts=torch.LongTensor(fanouts), replace=replace, probs_name=probs_name if probs_name != "none" else None, ) sampled_num = subgraph.sampled_csc.indices.size(0) assert subgraph.sampled_csc.indptr.size(0) == 3 # Verify in subgraph. if probs_name == "mask": if fanouts[0] == -1: assert sampled_num == 3 else: if replace: assert sampled_num == fanouts[0] else: assert sampled_num == min(fanouts[0], 3) elif probs_name == "zero": assert sampled_num == 0 else: if fanouts[0] == -1: assert sampled_num == 6 else: if replace: assert sampled_num == fanouts[0] else: assert sampled_num == min(fanouts[0], 6) @pytest.mark.parametrize("replace", [False, True]) @pytest.mark.parametrize("labor", [False, True]) @pytest.mark.parametrize( "fanouts, probs_name", [ ([-1, -1, -1], "mask"), ([1, 1, 1], "mask"), ([2, 2, 2], "mask"), ([3, 3, 3], "mask"), ([4, 4, 4], "mask"), ([-1, 1, 3], "none"), ([2, -1, 4], "none"), ], ) def test_sample_neighbors_hetero_pick_number( fanouts, replace, labor, probs_name ): if F._default_context_str == "gpu" and replace == True: pytest.skip("Sampling with replacement not yet supported on GPU.") # Initialize data. total_num_nodes = 10 total_num_edges = 9 ntypes = {"N0": 0, "N1": 1, "N2": 2, "N3": 3} etypes = { "N1:R0:N0": 0, "N2:R1:N0": 1, "N3:R2:N0": 2, } indptr = torch.LongTensor([0, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9]) indices = torch.LongTensor([1, 2, 3, 4, 5, 6, 7, 8, 9]) node_type_offset = torch.LongTensor([0, 1, 4, 7, 10]) type_per_edge = torch.LongTensor([0, 0, 0, 1, 1, 1, 2, 2, 2]) assert indptr[-1] == total_num_edges assert indptr[-1] == len(indices) assert node_type_offset[-1] == total_num_nodes assert all(type_per_edge < len(etypes)) edge_attributes = { "mask": torch.BoolTensor([1, 1, 0, 1, 1, 1, 0, 0, 0]), "all": torch.BoolTensor([1, 1, 1, 1, 1, 1, 1, 1, 1]), "zero": torch.BoolTensor([0, 0, 0, 0, 0, 0, 0, 0, 0]), } # Construct FusedCSCSamplingGraph. graph = gb.fused_csc_sampling_graph( indptr, indices, edge_attributes=edge_attributes, node_type_offset=node_type_offset, type_per_edge=type_per_edge, node_type_to_id=ntypes, edge_type_to_id=etypes, ).to(F.ctx()) # Generate subgraph via sample neighbors. nodes = { "N0": torch.LongTensor([0]).to(F.ctx()), "N1": torch.LongTensor([1]).to(F.ctx()), } sampler = graph.sample_layer_neighbors if labor else graph.sample_neighbors # Make sure no exception will be thrown. subgraph = sampler( nodes, fanouts=torch.LongTensor(fanouts), replace=replace, probs_name=probs_name if probs_name != "none" else None, ) print(subgraph) if probs_name == "none": for etype, pairs in subgraph.sampled_csc.items(): assert pairs.indptr.size(0) == 2 sampled_num = pairs.indices.size(0) fanout = fanouts[etypes[etype]] if fanout == -1: assert sampled_num == 3 else: if replace: assert sampled_num == fanout else: assert sampled_num == min(fanout, 3) else: fanout = fanouts[0] # Here fanout is the same for all etypes. for etype, pairs in subgraph.sampled_csc.items(): assert pairs.indptr.size(0) == 2 sampled_num = pairs.indices.size(0) if etypes[etype] == 0: # Etype 0: 2 valid neighbors. if fanout == -1: assert sampled_num == 2 else: if replace: assert sampled_num == fanout else: assert sampled_num == min(fanout, 2) elif etypes[etype] == 1: # Etype 1: 3 valid neighbors. if fanout == -1: assert sampled_num == 3 else: if replace: assert sampled_num == fanout else: assert sampled_num == min(fanout, 3) else: # Etype 2: 0 valid neighbors. assert sampled_num == 0 @unittest.skipIf( F._default_context_str == "gpu", reason="Graph is CPU only at present.", ) def test_graph_attributes(): num_nodes = 1000 num_edges = 10 * 1000 csc_indptr, indices = gbt.random_homo_graph(num_nodes, num_edges) graph = gb.fused_csc_sampling_graph( csc_indptr, indices, node_attributes=None, edge_attributes=None, ) # Case 1: default is None. assert graph.node_attributes is None assert graph.edge_attributes is None # Case 2: Assign the whole node/edge attributes. node_attributes = { "A": torch.rand(num_nodes, 2), "B": torch.rand(num_nodes, 2), } edge_attributes = { "A": torch.rand(num_nodes, 2), "B": torch.rand(num_nodes, 2), } graph.node_attributes = node_attributes graph.edge_attributes = edge_attributes for k, v in node_attributes.items(): assert torch.equal(v, graph.node_attributes[k]) assert torch.equal(v, graph.node_attribute(k)) for k, v in edge_attributes.items(): assert torch.equal(v, graph.edge_attributes[k]) assert torch.equal(v, graph.edge_attribute(k)) assert "C" not in graph.node_attributes assert "C" not in graph.edge_attributes with pytest.raises(RuntimeError, match="Node attribute C does not exist."): graph.node_attribute("C") with pytest.raises(RuntimeError, match="Edge attribute C does not exist."): graph.edge_attribute("C") # Case 3: Assign/overwrite more node/edge attributes into existing ones. for key in ["B", "C"]: node_attributes[key] = torch.rand(num_nodes, 2) edge_attributes[key] = torch.rand(num_edges, 2) graph.add_node_attribute(key, node_attributes[key]) graph.add_edge_attribute(key, edge_attributes[key]) for k, v in node_attributes.items(): assert torch.equal(v, graph.node_attributes[k]) assert torch.equal(v, graph.node_attribute(k)) for k, v in edge_attributes.items(): assert torch.equal(v, graph.edge_attributes[k]) assert torch.equal(v, graph.edge_attribute(k)) # Case 4: Assign more node/edge attributes which were None previously. graph.node_attributes = None graph.edge_attributes = None graph.add_node_attribute("C", node_attributes["C"]) graph.add_edge_attribute("C", edge_attributes["C"]) assert torch.equal(node_attributes["C"], graph.node_attribute("C")) assert torch.equal(node_attributes["C"], graph.node_attributes["C"]) assert torch.equal(edge_attributes["C"], graph.edge_attribute("C")) assert torch.equal(edge_attributes["C"], graph.edge_attributes["C"]) ================================================ FILE: tests/python/pytorch/graphbolt/impl/test_gpu_cached_feature.py ================================================ import os import tempfile import unittest import backend as F import numpy as np import pytest import torch from dgl import graphbolt as gb def to_on_disk_numpy(test_dir, name, t): path = os.path.join(test_dir, name + ".npy") np.save(path, t.cpu().numpy()) return path def _skip_condition_cached_feature(): return (F._default_context_str != "gpu") or ( torch.cuda.get_device_capability()[0] < 7 ) def _reason_to_skip_cached_feature(): if F._default_context_str != "gpu": return "GPUCachedFeature tests are available only when testing the GPU backend." return "GPUCachedFeature requires a Volta or later generation NVIDIA GPU." @unittest.skipIf( _skip_condition_cached_feature(), reason=_reason_to_skip_cached_feature(), ) @pytest.mark.parametrize( "dtype", [ torch.bool, torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64, torch.float16, torch.bfloat16, torch.float32, torch.float64, ], ) @pytest.mark.parametrize("cache_size_a", [1, 1024]) @pytest.mark.parametrize("cache_size_b", [1, 1024]) def test_gpu_cached_feature(dtype, cache_size_a, cache_size_b): a = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=dtype, pin_memory=True) b = torch.tensor( [[[1, 2], [3, 4]], [[4, 5], [6, 7]]], dtype=dtype, pin_memory=True ) cache_size_a *= a[:1].element_size() * a[:1].numel() cache_size_b *= b[:1].element_size() * b[:1].numel() feat_store_a = gb.gpu_cached_feature(gb.TorchBasedFeature(a), cache_size_a) feat_store_b = gb.gpu_cached_feature(gb.TorchBasedFeature(b), cache_size_b) # Test read the entire feature. assert torch.equal(feat_store_a.read(), a.to("cuda")) assert torch.equal(feat_store_b.read(), b.to("cuda")) # Test read with ids. assert torch.equal( feat_store_a.read(torch.tensor([0]).to("cuda")), torch.tensor([[1, 2, 3]], dtype=dtype).to("cuda"), ) assert torch.equal( feat_store_b.read(torch.tensor([1, 1]).to("cuda")), torch.tensor([[[4, 5], [6, 7]], [[4, 5], [6, 7]]], dtype=dtype).to( "cuda" ), ) assert torch.equal( feat_store_a.read(torch.tensor([1, 1]).to("cuda")), torch.tensor([[4, 5, 6], [4, 5, 6]], dtype=dtype).to("cuda"), ) assert torch.equal( feat_store_b.read(torch.tensor([0]).to("cuda")), torch.tensor([[[1, 2], [3, 4]]], dtype=dtype).to("cuda"), ) # The cache should be full now for the large cache sizes, %100 hit expected. if cache_size_a >= 1024: total_miss = feat_store_a._feature.total_miss feat_store_a.read(torch.tensor([0, 1]).to("cuda")) assert total_miss == feat_store_a._feature.total_miss if cache_size_b >= 1024: total_miss = feat_store_b._feature.total_miss feat_store_b.read(torch.tensor([0, 1]).to("cuda")) assert total_miss == feat_store_b._feature.total_miss assert feat_store_a._feature.miss_rate == feat_store_a.miss_rate # Test get the size and count of the entire feature. assert feat_store_a.size() == torch.Size([3]) assert feat_store_b.size() == torch.Size([2, 2]) assert feat_store_a.count() == a.size(0) assert feat_store_b.count() == b.size(0) # Test update the entire feature. feat_store_a.update( torch.tensor([[0, 1, 2], [3, 5, 2]], dtype=dtype).to("cuda") ) assert torch.equal( feat_store_a.read(), torch.tensor([[0, 1, 2], [3, 5, 2]], dtype=dtype).to("cuda"), ) # Test update with ids. feat_store_a.update( torch.tensor([[2, 0, 1]], dtype=dtype).to("cuda"), torch.tensor([0]).to("cuda"), ) assert torch.equal( feat_store_a.read(), torch.tensor([[2, 0, 1], [3, 5, 2]], dtype=dtype).to("cuda"), ) # Test with different dimensionality feat_store_a.update(b) assert torch.equal(feat_store_a.read(), b.to("cuda")) @unittest.skipIf( _skip_condition_cached_feature(), reason=_reason_to_skip_cached_feature(), ) @pytest.mark.parametrize( "dtype", [ torch.bool, torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64, torch.float16, torch.bfloat16, torch.float32, torch.float64, ], ) @pytest.mark.parametrize("pin_memory", [False, True]) def test_gpu_cached_feature_read_async(dtype, pin_memory): a = torch.randint(0, 2, [1000, 13], dtype=dtype, pin_memory=pin_memory) a_cuda = a.to(F.ctx()) cache_size = 256 * a[:1].nbytes feat_store = gb.gpu_cached_feature(gb.TorchBasedFeature(a), cache_size) # Test read with ids. ids1 = torch.tensor([0, 15, 71, 101], device=F.ctx()) ids2 = torch.tensor([71, 101, 202, 303], device=F.ctx()) for ids in [ids1, ids2]: reader = feat_store.read_async(ids) for _ in range(feat_store.read_async_num_stages(ids.device)): values = next(reader) assert torch.equal(values.wait(), a_cuda[ids]) @unittest.skipIf( _skip_condition_cached_feature(), reason=_reason_to_skip_cached_feature(), ) @unittest.skipIf( not torch.ops.graphbolt.detect_io_uring(), reason="DiskBasedFeature is not available on this system.", ) @pytest.mark.parametrize( "dtype", [ torch.bool, torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64, torch.float16, torch.float32, torch.float64, ], ) def test_gpu_cached_nested_feature_async(dtype): a = torch.randint(0, 2, [1000, 13], dtype=dtype, device=F.ctx()) cache_size = 256 * a[:1].nbytes ids1 = torch.tensor([0, 15, 71, 101], device=F.ctx()) ids2 = torch.tensor([71, 101, 202, 303], device=F.ctx()) with tempfile.TemporaryDirectory() as test_dir: path = to_on_disk_numpy(test_dir, "tensor", a) disk_store = gb.DiskBasedFeature(path=path) feat_store1 = gb.gpu_cached_feature(disk_store, cache_size) feat_store2 = gb.gpu_cached_feature( gb.cpu_cached_feature(disk_store, cache_size * 2), cache_size ) feat_store3 = gb.gpu_cached_feature( gb.cpu_cached_feature(disk_store, cache_size * 2, pin_memory=True), cache_size, ) # Test read feature. for feat_store in [feat_store1, feat_store2, feat_store3]: for ids in [ids1, ids2]: reader = feat_store.read_async(ids) for _ in range(feat_store.read_async_num_stages(ids.device)): values = next(reader) assert torch.equal(values.wait(), a[ids]) feat_store1 = feat_store2 = feat_store3 = disk_store = None ================================================ FILE: tests/python/pytorch/graphbolt/impl/test_gpu_graph_cache.py ================================================ import unittest import backend as F import dgl.graphbolt as gb import pytest import torch @unittest.skipIf( F._default_context_str != "gpu" or torch.cuda.get_device_capability()[0] < 7, reason="GPUCachedFeature tests are available only when testing the GPU backend." if F._default_context_str != "gpu" else "GPUCachedFeature requires a Volta or later generation NVIDIA GPU.", ) @pytest.mark.parametrize( "indptr_dtype", [ torch.int32, torch.int64, ], ) @pytest.mark.parametrize( "dtype", [ torch.bool, torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64, torch.float16, torch.bfloat16, torch.float32, torch.float64, ], ) @pytest.mark.parametrize("cache_size", [4, 9, 11]) @pytest.mark.parametrize("with_edge_ids", [True, False]) def test_gpu_graph_cache(indptr_dtype, dtype, cache_size, with_edge_ids): indices_dtype = torch.int32 indptr = torch.tensor([0, 3, 6, 10], dtype=indptr_dtype, pin_memory=True) indices = torch.arange(0, indptr[-1], dtype=indices_dtype, pin_memory=True) probs_or_mask = indices.to(dtype).pin_memory() edge_tensors = [indices, probs_or_mask] g = gb.GPUGraphCache( cache_size, 2, indptr.dtype, [e.dtype for e in edge_tensors], not with_edge_ids, ) for i in range(10): keys = ( torch.arange(2, dtype=indices_dtype, device=F.ctx()) + i * 2 ) % (indptr.size(0) - 1) missing_keys, replace = g.query(keys) ( missing_indptr, missing_edge_tensors, ) = torch.ops.graphbolt.index_select_csc_batched( indptr, edge_tensors, missing_keys, with_edge_ids, None ) output_indptr, output_edge_tensors = replace( missing_indptr, missing_edge_tensors ) ( reference_indptr, reference_edge_tensors, ) = torch.ops.graphbolt.index_select_csc_batched( indptr, edge_tensors, keys, with_edge_ids, None ) assert torch.equal(output_indptr, reference_indptr) assert len(output_edge_tensors) == len(reference_edge_tensors) for e, ref in zip(output_edge_tensors, reference_edge_tensors): assert torch.equal(e, ref) ================================================ FILE: tests/python/pytorch/graphbolt/impl/test_hetero_cached_feature.py ================================================ import backend as F import pytest import torch from dgl import graphbolt as gb @pytest.mark.parametrize( "cached_feature_type", [gb.cpu_cached_feature, gb.gpu_cached_feature] ) def test_hetero_cached_feature(cached_feature_type): if cached_feature_type == gb.gpu_cached_feature and ( F._default_context_str != "gpu" or torch.cuda.get_device_capability()[0] < 7 ): pytest.skip( "GPUCachedFeature tests are available only when testing the GPU backend." if F._default_context_str != "gpu" else "GPUCachedFeature requires a Volta or later generation NVIDIA GPU." ) device = F.ctx() if cached_feature_type == gb.gpu_cached_feature else None pin_memory = cached_feature_type == gb.gpu_cached_feature a = { ("node", str(i), "feat"): gb.TorchBasedFeature( torch.randn([(i + 1) * 10, 5], pin_memory=pin_memory) ) for i in range(75) } cached_a = cached_feature_type(a, 2**18) for i in range(1024): etype = i % len(a) ids = torch.randint( 0, (etype + 1) * 10 - 1, ((etype + 1) * 4,), device=device ) feature_key = ("node", str(etype), "feat") ref = a[feature_key].read(ids) val = cached_a[feature_key].read(ids) torch.testing.assert_close(ref, val, rtol=0, atol=0) assert cached_a[feature_key].miss_rate < 0.69 ================================================ FILE: tests/python/pytorch/graphbolt/impl/test_in_subgraph_sampler.py ================================================ import unittest import backend as F import dgl.graphbolt as gb import pytest import torch from .. import gb_test_utils @unittest.skipIf( F._default_context_str == "cpu", reason="Tests for pinned memory are only meaningful on GPU.", ) @pytest.mark.parametrize( "indptr_dtype", [torch.int32, torch.int64], ) @pytest.mark.parametrize( "indices_dtype", [ torch.int8, torch.uint8, torch.int16, torch.int32, torch.int64, torch.float32, torch.float64, ], ) @pytest.mark.parametrize("idtype", [torch.int32, torch.int64]) @pytest.mark.parametrize("is_pinned", [False, True]) @pytest.mark.parametrize("with_edge_ids", [False, True]) @pytest.mark.parametrize("output_size", [None, True]) def test_index_select_csc( indptr_dtype, indices_dtype, idtype, is_pinned, with_edge_ids, output_size ): """Original graph in COO: 1 0 1 0 1 0 1 0 0 1 0 1 0 1 0 1 0 0 0 1 0 0 1 0 1 0 0 0 0 1 0 0 1 0 1 0 """ indptr = torch.tensor([0, 3, 5, 7, 9, 12, 14], dtype=indptr_dtype) indices = torch.tensor( [0, 1, 4, 2, 3, 0, 5, 1, 2, 0, 3, 5, 1, 4], dtype=indices_dtype ) index = torch.tensor([0, 5, 3], dtype=idtype) cpu_indptr, cpu_indices = torch.ops.graphbolt.index_select_csc( indptr, indices, index, None ) if is_pinned: indptr = indptr.pin_memory() indices = indices.pin_memory() else: indptr = indptr.cuda() indices = indices.cuda() index = index.cuda() edge_ids = torch.tensor( [0, 1, 2, 12, 13, 7, 8], dtype=indptr_dtype, device=index.device ) if output_size: output_size = len(cpu_indices) gpu_indptr, gpu_indices = torch.ops.graphbolt.index_select_csc( indptr, indices, index, output_size ) assert not cpu_indptr.is_cuda assert not cpu_indices.is_cuda assert gpu_indptr.is_cuda assert gpu_indices.is_cuda assert torch.equal(cpu_indptr, gpu_indptr.cpu()) assert torch.equal(cpu_indices, gpu_indices.cpu()) for output_size_selection in [None, output_size]: indices_list = [ indices, indices.int().pin_memory() if is_pinned else indices.int(), ] ( gpu_indptr2, gpu_indices_list, ) = torch.ops.graphbolt.index_select_csc_batched( indptr, indices_list, index, with_edge_ids, output_size_selection ) assert torch.equal(gpu_indptr, gpu_indptr2) assert torch.equal(gpu_indices_list[0], gpu_indices) assert torch.equal(gpu_indices_list[1], gpu_indices.int()) if with_edge_ids: assert torch.equal(gpu_indices_list[2], edge_ids) def test_InSubgraphSampler_homo(): """Original graph in COO: 1 0 1 0 1 0 1 0 0 1 0 1 0 1 0 1 0 0 0 1 0 0 1 0 1 0 0 0 0 1 0 0 1 0 1 0 """ indptr = torch.LongTensor([0, 3, 5, 7, 9, 12, 14]) indices = torch.LongTensor([0, 1, 4, 2, 3, 0, 5, 1, 2, 0, 3, 5, 1, 4]) graph = gb.fused_csc_sampling_graph(indptr, indices).to(F.ctx()) seed_nodes = torch.LongTensor([0, 5, 3]) item_set = gb.ItemSet(seed_nodes, names="seeds") batch_size = 1 item_sampler = gb.ItemSampler(item_set, batch_size=batch_size).copy_to( F.ctx() ) in_subgraph_sampler = gb.InSubgraphSampler(item_sampler, graph) it = iter(in_subgraph_sampler) def original_indices(minibatch): sampled_subgraph = minibatch.sampled_subgraphs[0] _indices = sampled_subgraph.original_row_node_ids[ sampled_subgraph.sampled_csc.indices ] return _indices mn = next(it) assert torch.equal(mn.seeds, torch.LongTensor([0]).to(F.ctx())) assert torch.equal( mn.sampled_subgraphs[0].sampled_csc.indptr, torch.tensor([0, 3]).to(F.ctx()), ) mn = next(it) assert torch.equal(mn.seeds, torch.LongTensor([5]).to(F.ctx())) assert torch.equal( mn.sampled_subgraphs[0].sampled_csc.indptr, torch.tensor([0, 2]).to(F.ctx()), ) assert torch.equal(original_indices(mn), torch.tensor([1, 4]).to(F.ctx())) mn = next(it) assert torch.equal(mn.seeds, torch.LongTensor([3]).to(F.ctx())) assert torch.equal( mn.sampled_subgraphs[0].sampled_csc.indptr, torch.tensor([0, 2]).to(F.ctx()), ) assert torch.equal(original_indices(mn), torch.tensor([1, 2]).to(F.ctx())) def test_InSubgraphSampler_hetero(): """Original graph in COO: 1 0 1 0 1 0 1 0 0 1 0 1 0 1 0 1 0 0 0 1 0 0 1 0 1 0 0 0 0 1 0 0 1 0 1 0 node_type_0: [0, 1, 2] node_type_1: [3, 4, 5] edge_type_0: node_type_0 -> node_type_0 edge_type_1: node_type_0 -> node_type_1 edge_type_2: node_type_1 -> node_type_0 edge_type_3: node_type_1 -> node_type_1 """ ntypes = { "N0": 0, "N1": 1, } etypes = { "N0:R0:N0": 0, "N0:R1:N1": 1, "N1:R2:N0": 2, "N1:R3:N1": 3, } indptr = torch.LongTensor([0, 3, 5, 7, 9, 12, 14]) indices = torch.LongTensor([0, 1, 4, 2, 3, 0, 5, 1, 2, 0, 3, 5, 1, 4]) node_type_offset = torch.LongTensor([0, 3, 6]) type_per_edge = torch.LongTensor([0, 0, 2, 0, 2, 0, 2, 1, 1, 1, 3, 3, 1, 3]) graph = gb.fused_csc_sampling_graph( csc_indptr=indptr, indices=indices, node_type_offset=node_type_offset, type_per_edge=type_per_edge, node_type_to_id=ntypes, edge_type_to_id=etypes, ).to(F.ctx()) item_set = gb.HeteroItemSet( { "N0": gb.ItemSet(torch.LongTensor([1, 0, 2]), names="seeds"), "N1": gb.ItemSet(torch.LongTensor([0, 2, 1]), names="seeds"), } ) batch_size = 2 item_sampler = gb.ItemSampler(item_set, batch_size=batch_size).copy_to( F.ctx() ) in_subgraph_sampler = gb.InSubgraphSampler(item_sampler, graph) it = iter(in_subgraph_sampler) mn = next(it) assert torch.equal(mn.seeds["N0"], torch.LongTensor([1, 0]).to(F.ctx())) expected_sampled_csc = { "N0:R0:N0": gb.CSCFormatBase( indptr=torch.LongTensor([0, 1, 3]), indices=torch.LongTensor([2, 1, 0]), ), "N0:R1:N1": gb.CSCFormatBase( indptr=torch.LongTensor([0]), indices=torch.LongTensor([]) ), "N1:R2:N0": gb.CSCFormatBase( indptr=torch.LongTensor([0, 1, 2]), indices=torch.LongTensor([0, 1]) ), "N1:R3:N1": gb.CSCFormatBase( indptr=torch.LongTensor([0]), indices=torch.LongTensor([]) ), } for etype, pairs in mn.sampled_subgraphs[0].sampled_csc.items(): assert torch.equal( pairs.indices, expected_sampled_csc[etype].indices.to(F.ctx()) ) assert torch.equal( pairs.indptr, expected_sampled_csc[etype].indptr.to(F.ctx()) ) mn = next(it) assert mn.seeds == { "N0": torch.LongTensor([2]).to(F.ctx()), "N1": torch.LongTensor([0]).to(F.ctx()), } expected_sampled_csc = { "N0:R0:N0": gb.CSCFormatBase( indptr=torch.LongTensor([0, 1]), indices=torch.LongTensor([1]) ), "N0:R1:N1": gb.CSCFormatBase( indptr=torch.LongTensor([0, 2]), indices=torch.LongTensor([2, 0]) ), "N1:R2:N0": gb.CSCFormatBase( indptr=torch.LongTensor([0, 1]), indices=torch.LongTensor([1]) ), "N1:R3:N1": gb.CSCFormatBase( indptr=torch.LongTensor([0, 0]), indices=torch.LongTensor([]) ), } for etype, pairs in mn.sampled_subgraphs[0].sampled_csc.items(): assert torch.equal( pairs.indices, expected_sampled_csc[etype].indices.to(F.ctx()) ) assert torch.equal( pairs.indptr, expected_sampled_csc[etype].indptr.to(F.ctx()) ) mn = next(it) assert torch.equal(mn.seeds["N1"], torch.LongTensor([2, 1]).to(F.ctx())) expected_sampled_csc = { "N0:R0:N0": gb.CSCFormatBase( indptr=torch.LongTensor([0]), indices=torch.LongTensor([]) ), "N0:R1:N1": gb.CSCFormatBase( indptr=torch.LongTensor([0, 1, 2]), indices=torch.LongTensor([0, 1]) ), "N1:R2:N0": gb.CSCFormatBase( indptr=torch.LongTensor([0]), indices=torch.LongTensor([]) ), "N1:R3:N1": gb.CSCFormatBase( indptr=torch.LongTensor([0, 1, 3]), indices=torch.LongTensor([1, 2, 0]), ), } if graph.csc_indptr.is_cuda and torch.cuda.get_device_capability()[0] < 7: expected_sampled_csc["N0:R1:N1"] = gb.CSCFormatBase( indptr=torch.LongTensor([0, 1, 2]), indices=torch.LongTensor([1, 0]) ) for etype, pairs in mn.sampled_subgraphs[0].sampled_csc.items(): assert torch.equal( pairs.indices, expected_sampled_csc[etype].indices.to(F.ctx()) ) assert torch.equal( pairs.indptr, expected_sampled_csc[etype].indptr.to(F.ctx()) ) ================================================ FILE: tests/python/pytorch/graphbolt/impl/test_legacy_dataset.py ================================================ import dgl.graphbolt as gb import pytest import torch from dgl import AddSelfLoop from dgl.data import AsNodePredDataset, CoraGraphDataset def test_LegacyDataset_homo_node_pred(): cora = CoraGraphDataset(transform=AddSelfLoop()) dataset = gb.LegacyDataset(cora) # Check tasks. assert len(dataset.tasks) == 1 task = dataset.tasks[0] assert task.train_set.names == ("seeds", "labels") assert len(task.train_set) == 140 assert task.validation_set.names == ("seeds", "labels") assert len(task.validation_set) == 500 assert task.test_set.names == ("seeds", "labels") assert len(task.test_set) == 1000 assert task.metadata["num_classes"] == 7 num_nodes = 2708 assert dataset.graph.num_nodes == num_nodes assert len(dataset.all_nodes_set) == num_nodes assert dataset.feature.size("node", None, "feat") == torch.Size([1433]) assert ( dataset.feature.read( "node", None, "feat", torch.tensor([num_nodes - 1]) ).size(dim=0) == 1 ) # Out of bound indexing results in segmentation fault instead of exception # in CI. This may be related to docker env. Skip it for now. # with pytest.raises(IndexError): # dataset.feature.read("node", None, "feat", torch.Tensor([num_nodes])) ================================================ FILE: tests/python/pytorch/graphbolt/impl/test_negative_sampler.py ================================================ import re import backend as F import dgl.graphbolt as gb import pytest import torch from .. import gb_test_utils def test_NegativeSampler_invoke(): # Instantiate graph and required datapipes. num_seeds = 30 item_set = gb.ItemSet( torch.arange(0, 2 * num_seeds).reshape(-1, 2), names="seeds" ) batch_size = 10 item_sampler = gb.ItemSampler(item_set, batch_size=batch_size).copy_to( F.ctx() ) negative_ratio = 2 # Invoke NegativeSampler via class constructor. negative_sampler = gb.NegativeSampler( item_sampler, negative_ratio, ) with pytest.raises(NotImplementedError): next(iter(negative_sampler)) # Invoke NegativeSampler via functional form. negative_sampler = item_sampler.sample_negative( negative_ratio, ) with pytest.raises(NotImplementedError): next(iter(negative_sampler)) def test_UniformNegativeSampler_invoke(): # Instantiate graph and required datapipes. graph = gb_test_utils.rand_csc_graph(100, 0.05, bidirection_edge=True).to( F.ctx() ) num_seeds = 30 item_set = gb.ItemSet( torch.arange(0, 2 * num_seeds).reshape(-1, 2), names="seeds" ) batch_size = 10 item_sampler = gb.ItemSampler(item_set, batch_size=batch_size).copy_to( F.ctx() ) negative_ratio = 2 def _verify(negative_sampler): for data in negative_sampler: # Assertation seeds_len = batch_size + batch_size * negative_ratio assert data.seeds.size(0) == seeds_len assert data.labels.size(0) == seeds_len assert data.indexes.size(0) == seeds_len # Invoke UniformNegativeSampler via class constructor. negative_sampler = gb.UniformNegativeSampler( item_sampler, graph, negative_ratio, ) _verify(negative_sampler) # Invoke UniformNegativeSampler via functional form. negative_sampler = item_sampler.sample_uniform_negative( graph, negative_ratio, ) _verify(negative_sampler) @pytest.mark.parametrize("negative_ratio", [1, 5, 10, 20]) def test_Uniform_NegativeSampler(negative_ratio): # Construct FusedCSCSamplingGraph. graph = gb_test_utils.rand_csc_graph(100, 0.05, bidirection_edge=True).to( F.ctx() ) num_seeds = 30 item_set = gb.ItemSet( torch.arange(0, num_seeds * 2).reshape(-1, 2), names="seeds" ) batch_size = 10 item_sampler = gb.ItemSampler(item_set, batch_size=batch_size).copy_to( F.ctx() ) # Construct NegativeSampler. negative_sampler = gb.UniformNegativeSampler( item_sampler, graph, negative_ratio, ) # Perform Negative sampling. for data in negative_sampler: seeds_len = batch_size + batch_size * negative_ratio # Assertation assert data.seeds.size(0) == seeds_len assert data.labels.size(0) == seeds_len assert data.indexes.size(0) == seeds_len # Check negative seeds value. pos_src = data.seeds[:batch_size, 0] neg_src = data.seeds[batch_size:, 0] assert torch.equal(pos_src.repeat_interleave(negative_ratio), neg_src) # Check labels. assert torch.equal( data.labels[:batch_size], torch.ones(batch_size).to(F.ctx()) ) assert torch.equal( data.labels[batch_size:], torch.zeros(batch_size * negative_ratio).to(F.ctx()), ) # Check indexes. pos_indexes = torch.arange(0, batch_size).to(F.ctx()) neg_indexes = pos_indexes.repeat_interleave(negative_ratio) expected_indexes = torch.cat((pos_indexes, neg_indexes)) assert torch.equal(data.indexes, expected_indexes) def test_Uniform_NegativeSampler_error_shape(): # 1. seeds with shape N*3. # Construct FusedCSCSamplingGraph. graph = gb_test_utils.rand_csc_graph(100, 0.05, bidirection_edge=True).to( F.ctx() ) num_seeds = 30 item_set = gb.ItemSet( torch.arange(0, num_seeds * 3).reshape(-1, 3), names="seeds" ) batch_size = 10 item_sampler = gb.ItemSampler(item_set, batch_size=batch_size).copy_to( F.ctx() ) negative_ratio = 2 # Construct NegativeSampler. negative_sampler = gb.UniformNegativeSampler( item_sampler, graph, negative_ratio, ) with pytest.raises( AssertionError, match=re.escape( "Only tensor with shape N*2 is " + "supported for negative sampling, but got torch.Size([10, 3])." ), ): next(iter(negative_sampler)) # 2. seeds with shape N*2*1. # Construct FusedCSCSamplingGraph. item_set = gb.ItemSet( torch.arange(0, num_seeds * 2).reshape(-1, 2, 1), names="seeds" ) item_sampler = gb.ItemSampler(item_set, batch_size=batch_size).copy_to( F.ctx() ) # Construct NegativeSampler. negative_sampler = gb.UniformNegativeSampler( item_sampler, graph, negative_ratio, ) with pytest.raises( AssertionError, match=re.escape( "Only tensor with shape N*2 is " + "supported for negative sampling, but got torch.Size([10, 2, 1])." ), ): next(iter(negative_sampler)) # 3. seeds with shape N. # Construct FusedCSCSamplingGraph. item_set = gb.ItemSet(torch.arange(0, num_seeds), names="seeds") item_sampler = gb.ItemSampler(item_set, batch_size=batch_size).copy_to( F.ctx() ) # Construct NegativeSampler. negative_sampler = gb.UniformNegativeSampler( item_sampler, graph, negative_ratio, ) with pytest.raises( AssertionError, match=re.escape( "Only tensor with shape N*2 is " + "supported for negative sampling, but got torch.Size([10])." ), ): next(iter(negative_sampler)) def get_hetero_graph(): # COO graph: # [0, 0, 1, 1, 2, 2, 3, 3, 4, 4] # [2, 4, 2, 3, 0, 1, 1, 0, 0, 1] # [1, 1, 1, 1, 0, 0, 0, 0, 0] - > edge type. # num_nodes = 5, num_n1 = 2, num_n2 = 3 ntypes = {"n1": 0, "n2": 1} etypes = {"n1:e1:n2": 0, "n2:e2:n1": 1} indptr = torch.LongTensor([0, 2, 4, 6, 8, 10]) indices = torch.LongTensor([2, 4, 2, 3, 0, 1, 1, 0, 0, 1]) type_per_edge = torch.LongTensor([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) node_type_offset = torch.LongTensor([0, 2, 5]) return gb.fused_csc_sampling_graph( indptr, indices, node_type_offset=node_type_offset, type_per_edge=type_per_edge, node_type_to_id=ntypes, edge_type_to_id=etypes, ) def test_NegativeSampler_Hetero_Data(): graph = get_hetero_graph().to(F.ctx()) itemset = gb.HeteroItemSet( { "n1:e1:n2": gb.ItemSet( torch.LongTensor([[0, 0, 1, 1], [0, 2, 0, 1]]).T, names="seeds", ), "n2:e2:n1": gb.ItemSet( torch.LongTensor([[0, 0, 1, 1, 2, 2], [0, 1, 1, 0, 0, 1]]).T, names="seeds", ), } ) batch_size = 2 negative_ratio = 1 item_sampler = gb.ItemSampler(itemset, batch_size=batch_size).copy_to( F.ctx() ) negative_dp = gb.UniformNegativeSampler(item_sampler, graph, negative_ratio) assert len(list(negative_dp)) == 5 # Perform negative sampling. expected_neg_src = [ {"n1:e1:n2": torch.tensor([0, 0])}, {"n1:e1:n2": torch.tensor([1, 1])}, {"n2:e2:n1": torch.tensor([0, 0])}, {"n2:e2:n1": torch.tensor([1, 1])}, {"n2:e2:n1": torch.tensor([2, 2])}, ] for i, data in enumerate(negative_dp): # Check negative seeds value. for etype, seeds_data in data.seeds.items(): neg_src = seeds_data[batch_size:, 0] neg_dst = seeds_data[batch_size:, 1] assert torch.equal(expected_neg_src[i][etype].to(F.ctx()), neg_src) assert (neg_dst < 3).all(), neg_dst ================================================ FILE: tests/python/pytorch/graphbolt/impl/test_neighbor_sampler.py ================================================ import unittest from functools import partial import backend as F import dgl.graphbolt as gb import pytest import torch def get_hetero_graph(include_original_edge_ids): # COO graph: # [0, 0, 1, 1, 2, 2, 3, 3, 4, 4] # [2, 4, 2, 3, 0, 1, 1, 0, 0, 1] # [1, 1, 1, 1, 0, 0, 0, 0, 0] - > edge type. # num_nodes = 5, num_n1 = 2, num_n2 = 3 ntypes = {"n1": 0, "n2": 1, "n3": 2} etypes = {"n2:e1:n3": 0, "n3:e2:n2": 1} indptr = torch.LongTensor([0, 0, 2, 4, 6, 8, 10]) indices = torch.LongTensor([3, 5, 3, 4, 1, 2, 2, 1, 1, 2]) type_per_edge = torch.LongTensor([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) edge_attributes = { "weight": torch.FloatTensor( [2.5, 0, 8.4, 0, 0.4, 1.2, 2.5, 0, 8.4, 0.5] ), "mask": torch.BoolTensor([1, 0, 1, 0, 1, 1, 1, 0, 1, 1]), } if include_original_edge_ids: edge_attributes[gb.ORIGINAL_EDGE_ID] = ( torch.arange(indices.size(0), 0, -1) - 1 ) node_type_offset = torch.LongTensor([0, 1, 3, 6]) return gb.fused_csc_sampling_graph( indptr, indices, node_type_offset=node_type_offset, type_per_edge=type_per_edge, node_type_to_id=ntypes, edge_type_to_id=etypes, edge_attributes=edge_attributes, ) @unittest.skipIf(F._default_context_str != "gpu", reason="Enabled only on GPU.") @pytest.mark.parametrize("hetero", [False, True]) @pytest.mark.parametrize("prob_name", [None, "weight", "mask"]) @pytest.mark.parametrize("sorted", [False, True]) @pytest.mark.parametrize("num_cached_edges", [0, 10]) @pytest.mark.parametrize("is_pinned", [False, True]) @pytest.mark.parametrize("has_orig_edge_ids", [False, True]) def test_NeighborSampler_GraphFetch( hetero, prob_name, sorted, num_cached_edges, is_pinned, has_orig_edge_ids ): if sorted: items = torch.arange(3) else: items = torch.tensor([2, 0, 1]) names = "seeds" itemset = gb.ItemSet(items, names=names) graph = get_hetero_graph(has_orig_edge_ids) graph = graph.pin_memory_() if is_pinned else graph.to(F.ctx()) if hetero: itemset = gb.HeteroItemSet({"n3": itemset}) else: graph.type_per_edge = None item_sampler = gb.ItemSampler(itemset, batch_size=2).copy_to(F.ctx()) fanout = torch.LongTensor([2]) preprocess_fn = partial( gb.SubgraphSampler._preprocess, cooperative=False, async_op=False ) datapipe = item_sampler.map(preprocess_fn) datapipe = datapipe.map( partial(gb.NeighborSampler._prepare, graph.node_type_to_id) ) sample_per_layer = gb.SamplePerLayer( datapipe, graph.sample_neighbors, fanout, False, prob_name, False ) compact_per_layer = sample_per_layer.compact_per_layer(True) gb.seed(123) expected_results = list(compact_per_layer) if num_cached_edges > 0: graph._initialize_gpu_graph_cache(num_cached_edges, 1, prob_name) datapipe = datapipe.sample_per_layer( graph.sample_neighbors, fanout, False, prob_name, True ) datapipe = datapipe.compact_per_layer(True) gb.seed(123) new_results = list(datapipe) assert len(expected_results) == len(new_results) for a, b in zip(expected_results, new_results): assert repr(a) == repr(b) def remove_input_nodes(minibatch): minibatch.input_nodes = None return minibatch datapipe = item_sampler.sample_neighbor( graph, [fanout], False, prob_name=prob_name, overlap_fetch=True ) datapipe = datapipe.transform(remove_input_nodes) dataloader = gb.DataLoader(datapipe) gb.seed(123) new_results = list(dataloader) assert len(expected_results) == len(new_results) for a, b in zip(expected_results, new_results): assert repr(a) == repr(b) @pytest.mark.parametrize("layer_dependency", [False, True]) @pytest.mark.parametrize("overlap_graph_fetch", [False, True]) def test_labor_dependent_minibatching(layer_dependency, overlap_graph_fetch): if F._default_context_str != "gpu" and overlap_graph_fetch: pytest.skip("overlap_graph_fetch is only available for GPU.") num_edges = 200 csc_indptr = torch.cat( ( torch.zeros(1, dtype=torch.int64), torch.ones(num_edges + 1, dtype=torch.int64) * num_edges, ) ) indices = torch.arange(1, num_edges + 1) graph = gb.fused_csc_sampling_graph( csc_indptr.int(), indices.int(), ).to(F.ctx()) torch.random.set_rng_state(torch.manual_seed(123).get_state()) batch_dependency = 100 itemset = gb.ItemSet(torch.zeros(batch_dependency + 1).int(), names="seeds") datapipe = gb.ItemSampler(itemset, batch_size=1).copy_to(F.ctx()) fanouts = [5, 5] datapipe = datapipe.sample_layer_neighbor( graph, fanouts, overlap_fetch=overlap_graph_fetch, layer_dependency=layer_dependency, batch_dependency=batch_dependency, ) dataloader = gb.DataLoader(datapipe) res = list(dataloader) assert len(res) == batch_dependency + 1 if layer_dependency: assert torch.equal( res[0].input_nodes, res[0].sampled_subgraphs[1].original_row_node_ids, ) else: assert res[0].input_nodes.size(0) > res[0].sampled_subgraphs[ 1 ].original_row_node_ids.size(0) delta = 0 for i in range(batch_dependency): res_current = ( res[i].sampled_subgraphs[-1].original_row_node_ids.tolist() ) res_next = ( res[i + 1].sampled_subgraphs[-1].original_row_node_ids.tolist() ) intersect_len = len(set(res_current).intersection(set(res_next))) assert intersect_len >= fanouts[-1] delta += 1 + fanouts[-1] - intersect_len assert delta >= fanouts[-1] ================================================ FILE: tests/python/pytorch/graphbolt/impl/test_ondisk_dataset.py ================================================ import os import pickle import random import re import tempfile import unittest import warnings import numpy as np import pandas as pd import pydantic import pytest import torch import yaml from dgl import graphbolt as gb from dgl.graphbolt import GBWarning from .. import gb_test_utils as gbt def write_yaml_file(yaml_content, dir): os.makedirs(os.path.join(dir, "preprocessed"), exist_ok=True) yaml_file = os.path.join(dir, "preprocessed/metadata.yaml") with open(yaml_file, "w") as f: f.write(yaml_content) def load_dataset(dataset): with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) return dataset.load() def write_yaml_and_load_dataset(yaml_content, dir, force_preprocess=False): write_yaml_file(yaml_content, dir) return load_dataset( gb.OnDiskDataset(dir, force_preprocess=force_preprocess) ) def load_sampling_graph(test_dir, processed_dataset): return torch.load( os.path.join(test_dir, processed_dataset["graph_topology"]["path"]), weights_only=False, ) def test_OnDiskDataset_TVTSet_exceptions(): """Test excpetions thrown when parsing TVTSet.""" with tempfile.TemporaryDirectory() as test_dir: # Case 1: ``format`` is invalid. yaml_content = """ tasks: - name: node_classification train_set: - type: paper data: - format: torch_invalid path: set/paper-train.pt """ write_yaml_file(yaml_content, test_dir) with pytest.raises(pydantic.ValidationError): _ = gb.OnDiskDataset(test_dir, force_preprocess=False).load() # Case 2: ``type`` is not specified while multiple TVT sets are # specified. yaml_content = """ tasks: - name: node_classification train_set: - type: null data: - format: numpy path: set/train.npy - type: null data: - format: numpy path: set/train.npy """ write_yaml_file(yaml_content, test_dir) with pytest.raises( AssertionError, match=r"Only one TVT set is allowed if type is not specified.", ): _ = gb.OnDiskDataset(test_dir, force_preprocess=False).load() def test_OnDiskDataset_multiple_tasks(): """Teset multiple tasks are supported.""" with tempfile.TemporaryDirectory() as test_dir: train_ids = np.arange(1000) train_ids_path = os.path.join(test_dir, "train_ids.npy") np.save(train_ids_path, train_ids) train_labels = np.random.randint(0, 10, size=1000) train_labels_path = os.path.join(test_dir, "train_labels.npy") np.save(train_labels_path, train_labels) yaml_content = f""" tasks: - name: node_classification_1 num_classes: 10 train_set: - type: null data: - name: seeds format: numpy in_memory: true path: {train_ids_path} - name: labels format: numpy in_memory: true path: {train_labels_path} - format: numpy in_memory: true path: {train_labels_path} - name: node_classification_2 num_classes: 10 train_set: - type: null data: - name: seeds format: numpy in_memory: true path: {train_ids_path} - name: labels format: numpy in_memory: true path: {train_labels_path} - format: numpy in_memory: true path: {train_labels_path} """ dataset = write_yaml_and_load_dataset(yaml_content, test_dir) assert len(dataset.tasks) == 2 for task_id in range(2): assert ( dataset.tasks[task_id].metadata["name"] == f"node_classification_{task_id + 1}" ) assert dataset.tasks[task_id].metadata["num_classes"] == 10 # Verify train set. train_set = dataset.tasks[task_id].train_set assert len(train_set) == 1000 assert isinstance(train_set, gb.ItemSet) for i, (id, label, _) in enumerate(train_set): assert id == train_ids[i] assert label == train_labels[i] assert train_set.names == ("seeds", "labels", None) train_set = None dataset = None def test_OnDiskDataset_TVTSet_ItemSet_names(): """Test TVTSet which returns ItemSet with IDs, labels and corresponding names.""" with tempfile.TemporaryDirectory() as test_dir: train_ids = np.arange(1000) train_ids_path = os.path.join(test_dir, "train_ids.npy") np.save(train_ids_path, train_ids) train_labels = np.random.randint(0, 10, size=1000) train_labels_path = os.path.join(test_dir, "train_labels.npy") np.save(train_labels_path, train_labels) yaml_content = f""" tasks: - name: node_classification num_classes: 10 train_set: - type: null data: - name: seeds format: numpy in_memory: true path: {train_ids_path} - name: labels format: numpy in_memory: true path: {train_labels_path} - format: numpy in_memory: true path: {train_labels_path} """ dataset = write_yaml_and_load_dataset(yaml_content, test_dir) # Verify train set. train_set = dataset.tasks[0].train_set assert len(train_set) == 1000 assert isinstance(train_set, gb.ItemSet) for i, (id, label, _) in enumerate(train_set): assert id == train_ids[i] assert label == train_labels[i] assert train_set.names == ("seeds", "labels", None) train_set = None def test_OnDiskDataset_TVTSet_HeteroItemSet_names(): """Test TVTSet which returns ItemSet with IDs, labels and corresponding names.""" with tempfile.TemporaryDirectory() as test_dir: train_ids = np.arange(1000) train_ids_path = os.path.join(test_dir, "train_ids.npy") np.save(train_ids_path, train_ids) train_labels = np.random.randint(0, 10, size=1000) train_labels_path = os.path.join(test_dir, "train_labels.npy") np.save(train_labels_path, train_labels) yaml_content = f""" tasks: - name: node_classification num_classes: 10 train_set: - type: "author:writes:paper" data: - name: seeds format: numpy in_memory: true path: {train_ids_path} - name: labels format: numpy in_memory: true path: {train_labels_path} - format: numpy in_memory: true path: {train_labels_path} """ dataset = write_yaml_and_load_dataset(yaml_content, test_dir) # Verify train set. train_set = dataset.tasks[0].train_set assert len(train_set) == 1000 assert isinstance(train_set, gb.HeteroItemSet) for i, item in enumerate(train_set): assert isinstance(item, dict) assert "author:writes:paper" in item id, label, _ = item["author:writes:paper"] assert id == train_ids[i] assert label == train_labels[i] assert train_set.names == ("seeds", "labels", None) train_set = None def test_OnDiskDataset_TVTSet_ItemSet_id_label(): """Test TVTSet which returns ItemSet with IDs and labels.""" with tempfile.TemporaryDirectory() as test_dir: train_ids = np.arange(1000) train_ids_path = os.path.join(test_dir, "train_ids.npy") np.save(train_ids_path, train_ids) train_labels = np.random.randint(0, 10, size=1000) train_labels_path = os.path.join(test_dir, "train_labels.npy") np.save(train_labels_path, train_labels) validation_ids = np.arange(1000, 2000) validation_ids_path = os.path.join(test_dir, "validation_ids.npy") np.save(validation_ids_path, validation_ids) validation_labels = np.random.randint(0, 10, size=1000) validation_labels_path = os.path.join(test_dir, "validation_labels.npy") np.save(validation_labels_path, validation_labels) test_ids = np.arange(2000, 3000) test_ids_path = os.path.join(test_dir, "test_ids.npy") np.save(test_ids_path, test_ids) test_labels = np.random.randint(0, 10, size=1000) test_labels_path = os.path.join(test_dir, "test_labels.npy") np.save(test_labels_path, test_labels) # Case 1: # all TVT sets are specified. # ``type`` is not specified or specified as ``null``. # ``in_memory`` could be ``true`` and ``false``. yaml_content = f""" tasks: - name: node_classification num_classes: 10 train_set: - type: null data: - name: seeds format: numpy in_memory: true path: {train_ids_path} - name: labels format: numpy in_memory: true path: {train_labels_path} validation_set: - data: - name: seeds format: numpy in_memory: true path: {validation_ids_path} - name: labels format: numpy in_memory: true path: {validation_labels_path} test_set: - type: null data: - name: seeds format: numpy in_memory: true path: {test_ids_path} - name: labels format: numpy in_memory: true path: {test_labels_path} """ dataset = write_yaml_and_load_dataset(yaml_content, test_dir) # Verify tasks. assert len(dataset.tasks) == 1 assert dataset.tasks[0].metadata["name"] == "node_classification" assert dataset.tasks[0].metadata["num_classes"] == 10 # Verify train set. train_set = dataset.tasks[0].train_set assert len(train_set) == 1000 assert isinstance(train_set, gb.ItemSet) for i, (id, label) in enumerate(train_set): assert id == train_ids[i] assert label == train_labels[i] assert train_set.names == ("seeds", "labels") train_set = None # Verify validation set. validation_set = dataset.tasks[0].validation_set assert len(validation_set) == 1000 assert isinstance(validation_set, gb.ItemSet) for i, (id, label) in enumerate(validation_set): assert id == validation_ids[i] assert label == validation_labels[i] assert validation_set.names == ("seeds", "labels") validation_set = None # Verify test set. test_set = dataset.tasks[0].test_set assert len(test_set) == 1000 assert isinstance(test_set, gb.ItemSet) for i, (id, label) in enumerate(test_set): assert id == test_ids[i] assert label == test_labels[i] assert test_set.names == ("seeds", "labels") test_set = None dataset = None # Case 2: Some TVT sets are None. yaml_content = f""" tasks: - name: node_classification train_set: - type: null data: - format: numpy path: {train_ids_path} """ dataset = write_yaml_and_load_dataset(yaml_content, test_dir) assert dataset.tasks[0].train_set is not None assert dataset.tasks[0].validation_set is None assert dataset.tasks[0].test_set is None dataset = None def test_OnDiskDataset_TVTSet_ItemSet_node_pairs_labels(): """Test TVTSet which returns ItemSet with node pairs and labels.""" with tempfile.TemporaryDirectory() as test_dir: train_seeds = np.arange(2000).reshape(1000, 2) train_seeds_path = os.path.join(test_dir, "train_seeds.npy") np.save(train_seeds_path, train_seeds) train_labels = np.random.randint(0, 10, size=1000) train_labels_path = os.path.join(test_dir, "train_labels.npy") np.save(train_labels_path, train_labels) validation_seeds = np.arange(2000, 4000).reshape(1000, 2) validation_seeds_path = os.path.join(test_dir, "validation_seeds.npy") np.save(validation_seeds_path, validation_seeds) validation_labels = np.random.randint(0, 10, size=1000) validation_labels_path = os.path.join(test_dir, "validation_labels.npy") np.save(validation_labels_path, validation_labels) test_seeds = np.arange(4000, 6000).reshape(1000, 2) test_seeds_path = os.path.join(test_dir, "test_seeds.npy") np.save(test_seeds_path, test_seeds) test_labels = np.random.randint(0, 10, size=1000) test_labels_path = os.path.join(test_dir, "test_labels.npy") np.save(test_labels_path, test_labels) yaml_content = f""" tasks: - name: link_prediction train_set: - type: null data: - name: seeds format: numpy in_memory: true path: {train_seeds_path} - name: labels format: numpy in_memory: true path: {train_labels_path} validation_set: - data: - name: seeds format: numpy in_memory: true path: {validation_seeds_path} - name: labels format: numpy in_memory: true path: {validation_labels_path} test_set: - type: null data: - name: seeds format: numpy in_memory: true path: {test_seeds_path} - name: labels format: numpy in_memory: true path: {test_labels_path} """ dataset = write_yaml_and_load_dataset(yaml_content, test_dir) # Verify train set. train_set = dataset.tasks[0].train_set assert len(train_set) == 1000 assert isinstance(train_set, gb.ItemSet) for i, (node_pair, label) in enumerate(train_set): assert node_pair[0] == train_seeds[i][0] assert node_pair[1] == train_seeds[i][1] assert label == train_labels[i] assert train_set.names == ("seeds", "labels") train_set = None # Verify validation set. validation_set = dataset.tasks[0].validation_set assert len(validation_set) == 1000 assert isinstance(validation_set, gb.ItemSet) for i, (node_pair, label) in enumerate(validation_set): assert node_pair[0] == validation_seeds[i][0] assert node_pair[1] == validation_seeds[i][1] assert label == validation_labels[i] assert validation_set.names == ("seeds", "labels") validation_set = None # Verify test set. test_set = dataset.tasks[0].test_set assert len(test_set) == 1000 assert isinstance(test_set, gb.ItemSet) for i, (node_pair, label) in enumerate(test_set): assert node_pair[0] == test_seeds[i][0] assert node_pair[1] == test_seeds[i][1] assert label == test_labels[i] assert test_set.names == ("seeds", "labels") test_set = None dataset = None def test_OnDiskDataset_TVTSet_ItemSet_node_pairs_labels_indexes(): """Test TVTSet which returns ItemSet with node pairs and negative ones.""" with tempfile.TemporaryDirectory() as test_dir: train_seeds = np.arange(2000).reshape(1000, 2) train_neg_dst = np.random.choice(1000 * 10, size=1000 * 10) train_neg_src = train_seeds[:, 0].repeat(10) train_neg_seeds = ( np.concatenate((train_neg_dst, train_neg_src)).reshape(2, -1).T ) train_seeds = np.concatenate((train_seeds, train_neg_seeds)) train_seeds_path = os.path.join(test_dir, "train_seeds.npy") np.save(train_seeds_path, train_seeds) train_labels = torch.empty(1000 * 11) train_labels[:1000] = 1 train_labels[1000:] = 0 train_labels_path = os.path.join(test_dir, "train_labels.pt") torch.save(train_labels, train_labels_path) train_indexes = torch.arange(0, 1000) train_indexes = np.concatenate( (train_indexes, train_indexes.repeat_interleave(10)) ) train_indexes_path = os.path.join(test_dir, "train_indexes.pt") torch.save(train_indexes, train_indexes_path) validation_seeds = np.arange(2000, 4000).reshape(1000, 2) validation_neg_seeds = train_neg_seeds + 1 validation_seeds = np.concatenate( (validation_seeds, validation_neg_seeds) ) validation_seeds_path = os.path.join(test_dir, "validation_seeds.npy") np.save(validation_seeds_path, validation_seeds) validation_labels = train_labels validation_labels_path = os.path.join(test_dir, "validation_labels.pt") torch.save(validation_labels, validation_labels_path) validation_indexes = train_indexes validation_indexes_path = os.path.join( test_dir, "validation_indexes.pt" ) torch.save(validation_indexes, validation_indexes_path) test_seeds = np.arange(4000, 6000).reshape(1000, 2) test_neg_seeds = train_neg_seeds + 2 test_seeds = np.concatenate((test_seeds, test_neg_seeds)) test_seeds_path = os.path.join(test_dir, "test_seeds.npy") np.save(test_seeds_path, test_seeds) test_labels = train_labels test_labels_path = os.path.join(test_dir, "test_labels.pt") torch.save(test_labels, test_labels_path) test_indexes = train_indexes test_indexes_path = os.path.join(test_dir, "test_indexes.pt") torch.save(test_indexes, test_indexes_path) yaml_content = f""" tasks: - name: link_prediction train_set: - type: null data: - name: seeds format: numpy in_memory: true path: {train_seeds_path} - name: labels format: torch in_memory: true path: {train_labels_path} - name: indexes format: torch in_memory: true path: {train_indexes_path} validation_set: - data: - name: seeds format: numpy in_memory: true path: {validation_seeds_path} - name: labels format: torch in_memory: true path: {validation_labels_path} - name: indexes format: torch in_memory: true path: {validation_indexes_path} test_set: - type: null data: - name: seeds format: numpy in_memory: true path: {test_seeds_path} - name: labels format: torch in_memory: true path: {test_labels_path} - name: indexes format: torch in_memory: true path: {test_indexes_path} """ dataset = write_yaml_and_load_dataset(yaml_content, test_dir) # Verify train set. train_set = dataset.tasks[0].train_set assert len(train_set) == 1000 * 11 assert isinstance(train_set, gb.ItemSet) for i, (node_pair, label, index) in enumerate(train_set): assert node_pair[0] == train_seeds[i][0] assert node_pair[1] == train_seeds[i][1] assert label == train_labels[i] assert index == train_indexes[i] assert train_set.names == ("seeds", "labels", "indexes") train_set = None # Verify validation set. validation_set = dataset.tasks[0].validation_set assert len(validation_set) == 1000 * 11 assert isinstance(validation_set, gb.ItemSet) for i, (node_pair, label, index) in enumerate(validation_set): assert node_pair[0] == validation_seeds[i][0] assert node_pair[1] == validation_seeds[i][1] assert label == validation_labels[i] assert index == validation_indexes[i] assert validation_set.names == ("seeds", "labels", "indexes") validation_set = None # Verify test set. test_set = dataset.tasks[0].test_set assert len(test_set) == 1000 * 11 assert isinstance(test_set, gb.ItemSet) for i, (node_pair, label, index) in enumerate(test_set): assert node_pair[0] == test_seeds[i][0] assert label == test_labels[i] assert index == test_indexes[i] assert test_set.names == ("seeds", "labels", "indexes") test_set = None dataset = None def test_OnDiskDataset_TVTSet_HeteroItemSet_id_label(): """Test TVTSet which returns HeteroItemSet with IDs and labels.""" with tempfile.TemporaryDirectory() as test_dir: train_ids = np.arange(1000) train_labels = np.random.randint(0, 10, size=1000) train_data = np.vstack([train_ids, train_labels]).T train_path = os.path.join(test_dir, "train.npy") np.save(train_path, train_data) validation_ids = np.arange(1000, 2000) validation_labels = np.random.randint(0, 10, size=1000) validation_data = np.vstack([validation_ids, validation_labels]).T validation_path = os.path.join(test_dir, "validation.npy") np.save(validation_path, validation_data) test_ids = np.arange(2000, 3000) test_labels = np.random.randint(0, 10, size=1000) test_data = np.vstack([test_ids, test_labels]).T test_path = os.path.join(test_dir, "test.npy") np.save(test_path, test_data) yaml_content = f""" tasks: - name: node_classification train_set: - type: paper data: - name: seeds format: numpy in_memory: true path: {train_path} - type: author data: - name: seeds format: numpy path: {train_path} validation_set: - type: paper data: - name: seeds format: numpy path: {validation_path} - type: author data: - name: seeds format: numpy path: {validation_path} test_set: - type: paper data: - name: seeds format: numpy in_memory: false path: {test_path} - type: author data: - name: seeds format: numpy path: {test_path} """ dataset = write_yaml_and_load_dataset(yaml_content, test_dir) # Verify train set. train_set = dataset.tasks[0].train_set assert len(train_set) == 2000 assert isinstance(train_set, gb.HeteroItemSet) for i, item in enumerate(train_set): assert isinstance(item, dict) assert len(item) == 1 key = list(item.keys())[0] assert key in ["paper", "author"] id, label = item[key] assert id == train_ids[i % 1000] assert label == train_labels[i % 1000] assert train_set.names == ("seeds",) train_set = None # Verify validation set. validation_set = dataset.tasks[0].validation_set assert len(validation_set) == 2000 assert isinstance(validation_set, gb.HeteroItemSet) for i, item in enumerate(validation_set): assert isinstance(item, dict) assert len(item) == 1 key = list(item.keys())[0] assert key in ["paper", "author"] id, label = item[key] assert id == validation_ids[i % 1000] assert label == validation_labels[i % 1000] assert validation_set.names == ("seeds",) validation_set = None # Verify test set. test_set = dataset.tasks[0].test_set assert len(test_set) == 2000 assert isinstance(test_set, gb.HeteroItemSet) for i, item in enumerate(test_set): assert isinstance(item, dict) assert len(item) == 1 key = list(item.keys())[0] assert key in ["paper", "author"] id, label = item[key] assert id == test_ids[i % 1000] assert label == test_labels[i % 1000] assert test_set.names == ("seeds",) test_set = None dataset = None def test_OnDiskDataset_TVTSet_HeteroItemSet_node_pairs_labels(): """Test TVTSet which returns HeteroItemSet with node pairs and labels.""" with tempfile.TemporaryDirectory() as test_dir: train_seeds = np.arange(2000).reshape(1000, 2) train_seeds_path = os.path.join(test_dir, "train_seeds.npy") np.save(train_seeds_path, train_seeds) train_labels = np.random.randint(0, 10, size=1000) train_labels_path = os.path.join(test_dir, "train_labels.npy") np.save(train_labels_path, train_labels) validation_seeds = np.arange(2000, 4000).reshape(1000, 2) validation_seeds_path = os.path.join(test_dir, "validation_seeds.npy") np.save(validation_seeds_path, validation_seeds) validation_labels = np.random.randint(0, 10, size=1000) validation_labels_path = os.path.join(test_dir, "validation_labels.npy") np.save(validation_labels_path, validation_labels) test_seeds = np.arange(4000, 6000).reshape(1000, 2) test_seeds_path = os.path.join(test_dir, "test_seeds.npy") np.save(test_seeds_path, test_seeds) test_labels = np.random.randint(0, 10, size=1000) test_labels_path = os.path.join(test_dir, "test_labels.npy") np.save(test_labels_path, test_labels) yaml_content = f""" tasks: - name: edge_classification train_set: - type: paper:cites:paper data: - name: seeds format: numpy in_memory: true path: {train_seeds_path} - name: labels format: numpy in_memory: true path: {train_labels_path} - type: author:writes:paper data: - name: seeds format: numpy path: {train_seeds_path} - name: labels format: numpy path: {train_labels_path} validation_set: - type: paper:cites:paper data: - name: seeds format: numpy path: {validation_seeds_path} - name: labels format: numpy path: {validation_labels_path} - type: author:writes:paper data: - name: seeds format: numpy path: {validation_seeds_path} - name: labels format: numpy path: {validation_labels_path} test_set: - type: paper:cites:paper data: - name: seeds format: numpy in_memory: true path: {test_seeds_path} - name: labels format: numpy in_memory: true path: {test_labels_path} - type: author:writes:paper data: - name: seeds format: numpy in_memory: true path: {test_seeds_path} - name: labels format: numpy in_memory: true path: {test_labels_path} """ dataset = write_yaml_and_load_dataset(yaml_content, test_dir) # Verify train set. train_set = dataset.tasks[0].train_set assert len(train_set) == 2000 assert isinstance(train_set, gb.HeteroItemSet) for i, item in enumerate(train_set): assert isinstance(item, dict) assert len(item) == 1 key = list(item.keys())[0] assert key in ["paper:cites:paper", "author:writes:paper"] node_pair, label = item[key] assert node_pair[0] == train_seeds[i % 1000][0] assert node_pair[1] == train_seeds[i % 1000][1] assert label == train_labels[i % 1000] assert train_set.names == ("seeds", "labels") train_set = None # Verify validation set. validation_set = dataset.tasks[0].validation_set assert len(validation_set) == 2000 assert isinstance(validation_set, gb.HeteroItemSet) for i, item in enumerate(validation_set): assert isinstance(item, dict) assert len(item) == 1 key = list(item.keys())[0] assert key in ["paper:cites:paper", "author:writes:paper"] node_pair, label = item[key] assert node_pair[0] == validation_seeds[i % 1000][0] assert node_pair[1] == validation_seeds[i % 1000][1] assert label == validation_labels[i % 1000] assert validation_set.names == ("seeds", "labels") validation_set = None # Verify test set. test_set = dataset.tasks[0].test_set assert len(test_set) == 2000 assert isinstance(test_set, gb.HeteroItemSet) for i, item in enumerate(test_set): assert isinstance(item, dict) assert len(item) == 1 key = list(item.keys())[0] assert key in ["paper:cites:paper", "author:writes:paper"] node_pair, label = item[key] assert node_pair[0] == test_seeds[i % 1000][0] assert node_pair[1] == test_seeds[i % 1000][1] assert label == test_labels[i % 1000] assert test_set.names == ("seeds", "labels") test_set = None dataset = None def test_OnDiskDataset_Feature_heterograph(): """Test Feature storage.""" with tempfile.TemporaryDirectory() as test_dir: # Generate node data. node_data_paper = np.random.rand(1000, 10) node_data_paper_path = os.path.join(test_dir, "node_data_paper.npy") np.save(node_data_paper_path, node_data_paper) node_data_label = torch.tensor( [[random.randint(0, 10)] for _ in range(1000)] ) node_data_label_path = os.path.join(test_dir, "node_data_label.npy") np.save(node_data_label_path, node_data_label) # Generate edge data. edge_data_writes = np.random.rand(1000, 10) edge_data_writes_path = os.path.join(test_dir, "edge_writes_paper.npy") np.save(edge_data_writes_path, edge_data_writes) edge_data_label = torch.tensor( [[random.randint(0, 10)] for _ in range(1000)] ) edge_data_label_path = os.path.join(test_dir, "edge_data_label.npy") np.save(edge_data_label_path, edge_data_label) # Generate YAML. yaml_content = f""" feature_data: - domain: node type: paper name: feat format: numpy in_memory: false path: {node_data_paper_path} num_categories: 10 - domain: node type: paper name: labels format: numpy in_memory: true path: {node_data_label_path} - domain: edge type: "author:writes:paper" name: feat format: numpy in_memory: false path: {edge_data_writes_path} num_categories: 10 - domain: edge type: "author:writes:paper" name: labels format: numpy in_memory: true path: {edge_data_label_path} """ dataset = write_yaml_and_load_dataset(yaml_content, test_dir) # Verify feature data storage. feature_data = dataset.feature assert len(feature_data) == 4 # Verify node feature data. assert torch.equal( feature_data.read("node", "paper", "feat"), torch.tensor(node_data_paper), ) assert ( feature_data.metadata("node", "paper", "feat")["num_categories"] == 10 ) assert torch.equal( feature_data.read("node", "paper", "labels"), node_data_label.clone().detach(), ) assert len(feature_data.metadata("node", "paper", "labels")) == 0 # Verify edge feature data. assert torch.equal( feature_data.read("edge", "author:writes:paper", "feat"), torch.tensor(edge_data_writes), ) assert ( feature_data.metadata("edge", "author:writes:paper", "feat")[ "num_categories" ] == 10 ) assert torch.equal( feature_data.read("edge", "author:writes:paper", "labels"), edge_data_label.clone().detach(), ) assert ( len(feature_data.metadata("edge", "author:writes:paper", "labels")) == 0 ) feature_data = None dataset = None def test_OnDiskDataset_Feature_homograph(): """Test Feature storage.""" with tempfile.TemporaryDirectory() as test_dir: # Generate node data. node_data_feat = np.random.rand(1000, 10) node_data_feat_path = os.path.join(test_dir, "node_data_feat.npy") np.save(node_data_feat_path, node_data_feat) node_data_label = torch.tensor( [[random.randint(0, 10)] for _ in range(1000)] ) node_data_label_path = os.path.join(test_dir, "node_data_label.npy") np.save(node_data_label_path, node_data_label) # Generate edge data. edge_data_feat = np.random.rand(1000, 10) edge_data_feat_path = os.path.join(test_dir, "edge_data_feat.npy") np.save(edge_data_feat_path, edge_data_feat) edge_data_label = torch.tensor( [[random.randint(0, 10)] for _ in range(1000)] ) edge_data_label_path = os.path.join(test_dir, "edge_data_label.npy") np.save(edge_data_label_path, edge_data_label) # Generate YAML. # ``type`` is not specified in the YAML. yaml_content = f""" feature_data: - domain: node name: feat format: numpy in_memory: false path: {node_data_feat_path} num_categories: 10 - domain: node name: labels format: numpy in_memory: true path: {node_data_label_path} - domain: edge name: feat format: numpy in_memory: false path: {edge_data_feat_path} num_categories: 10 - domain: edge name: labels format: numpy in_memory: true path: {edge_data_label_path} """ dataset = write_yaml_and_load_dataset(yaml_content, test_dir) # Verify feature data storage. feature_data = dataset.feature assert len(feature_data) == 4 # Verify node feature data. assert torch.equal( feature_data.read("node", None, "feat"), torch.tensor(node_data_feat), ) assert ( feature_data.metadata("node", None, "feat")["num_categories"] == 10 ) assert torch.equal( feature_data.read("node", None, "labels"), node_data_label.clone().detach(), ) assert len(feature_data.metadata("node", None, "labels")) == 0 # Verify edge feature data. assert torch.equal( feature_data.read("edge", None, "feat"), torch.tensor(edge_data_feat), ) assert ( feature_data.metadata("edge", None, "feat")["num_categories"] == 10 ) assert torch.equal( feature_data.read("edge", None, "labels"), edge_data_label.clone().detach(), ) assert len(feature_data.metadata("edge", None, "labels")) == 0 feature_data = None dataset = None def test_OnDiskDataset_Graph_Exceptions(): """Test exceptions in parsing graph topology.""" with tempfile.TemporaryDirectory() as test_dir: # Invalid graph type. yaml_content = """ graph_topology: type: CSRSamplingGraph path: /path/to/graph """ write_yaml_file(yaml_content, test_dir) with pytest.raises( pydantic.ValidationError, match="1 validation error for OnDiskMetaData", ): _ = gb.OnDiskDataset(test_dir, force_preprocess=False).load() def test_OnDiskDataset_Graph_homogeneous(): """Test homogeneous graph topology.""" csc_indptr, indices = gbt.random_homo_graph(1000, 10 * 1000) graph = gb.fused_csc_sampling_graph(csc_indptr, indices) with tempfile.TemporaryDirectory() as test_dir: graph_path = os.path.join(test_dir, "fused_csc_sampling_graph.pt") torch.save(graph, graph_path) yaml_content = f""" graph_topology: type: FusedCSCSamplingGraph path: {graph_path} """ dataset = write_yaml_and_load_dataset(yaml_content, test_dir) graph2 = dataset.graph assert graph.total_num_nodes == graph2.total_num_nodes assert graph.total_num_edges == graph2.total_num_edges assert torch.equal(graph.csc_indptr, graph2.csc_indptr) assert torch.equal(graph.indices, graph2.indices) assert ( graph.node_type_offset is None and graph2.node_type_offset is None ) assert graph.type_per_edge is None and graph2.type_per_edge is None assert graph.node_type_to_id is None and graph2.node_type_to_id is None assert graph.edge_type_to_id is None and graph2.edge_type_to_id is None def test_OnDiskDataset_Graph_heterogeneous(): """Test heterogeneous graph topology.""" ( csc_indptr, indices, node_type_offset, type_per_edge, node_type_to_id, edge_type_to_id, ) = gbt.random_hetero_graph(1000, 10 * 1000, 3, 4) graph = gb.fused_csc_sampling_graph( csc_indptr, indices, node_type_offset=node_type_offset, type_per_edge=type_per_edge, node_type_to_id=node_type_to_id, edge_type_to_id=edge_type_to_id, ) with tempfile.TemporaryDirectory() as test_dir: graph_path = os.path.join(test_dir, "fused_csc_sampling_graph.pt") torch.save(graph, graph_path) yaml_content = f""" graph_topology: type: FusedCSCSamplingGraph path: {graph_path} """ dataset = write_yaml_and_load_dataset(yaml_content, test_dir) graph2 = dataset.graph assert graph.total_num_nodes == graph2.total_num_nodes assert graph.total_num_edges == graph2.total_num_edges assert torch.equal(graph.csc_indptr, graph2.csc_indptr) assert torch.equal(graph.indices, graph2.indices) assert torch.equal(graph.node_type_offset, graph2.node_type_offset) assert torch.equal(graph.type_per_edge, graph2.type_per_edge) assert graph.node_type_to_id == graph2.node_type_to_id assert graph.edge_type_to_id == graph2.edge_type_to_id def test_OnDiskDataset_Metadata(): """Test metadata of OnDiskDataset.""" with tempfile.TemporaryDirectory() as test_dir: # All metadata fields are specified. dataset_name = "graphbolt_test" yaml_content = f""" dataset_name: {dataset_name} """ dataset = write_yaml_and_load_dataset(yaml_content, test_dir) assert dataset.dataset_name == dataset_name # Only dataset_name is specified. yaml_content = f""" dataset_name: {dataset_name} """ dataset = write_yaml_and_load_dataset(yaml_content, test_dir) assert dataset.dataset_name == dataset_name @pytest.mark.parametrize("edge_fmt", ["csv", "numpy"]) def test_OnDiskDataset_preprocess_homogeneous(edge_fmt): """Test preprocess of OnDiskDataset.""" with tempfile.TemporaryDirectory() as test_dir: # All metadata fields are specified. dataset_name = "graphbolt_test" num_nodes = 4000 num_edges = 20000 num_classes = 10 # Generate random graph. yaml_content = gbt.random_homo_graphbolt_graph( test_dir, dataset_name, num_nodes, num_edges, num_classes, edge_fmt=edge_fmt, ) yaml_file = os.path.join(test_dir, "metadata.yaml") with open(yaml_file, "w") as f: f.write(yaml_content) output_file = gb.ondisk_dataset.preprocess_ondisk_dataset( test_dir, include_original_edge_id=False ) with open(output_file, "rb") as f: processed_dataset = yaml.load(f, Loader=yaml.Loader) assert processed_dataset["dataset_name"] == dataset_name assert processed_dataset["tasks"][0]["num_classes"] == num_classes assert "graph" not in processed_dataset assert "graph_topology" in processed_dataset fused_csc_sampling_graph = load_sampling_graph( test_dir, processed_dataset ) assert fused_csc_sampling_graph.total_num_nodes == num_nodes assert fused_csc_sampling_graph.total_num_edges == num_edges assert ( fused_csc_sampling_graph.node_attributes is not None and "feat" in fused_csc_sampling_graph.node_attributes ) assert ( fused_csc_sampling_graph.edge_attributes is not None and gb.ORIGINAL_EDGE_ID not in fused_csc_sampling_graph.edge_attributes and "feat" in fused_csc_sampling_graph.edge_attributes ) num_samples = 100 fanout = 1 subgraph = fused_csc_sampling_graph.sample_neighbors( torch.arange( 0, num_samples, dtype=fused_csc_sampling_graph.indices.dtype, ), torch.tensor([fanout]), ) assert len(subgraph.sampled_csc.indices) <= num_samples with tempfile.TemporaryDirectory() as test_dir: # All metadata fields are specified. dataset_name = "graphbolt_test" num_nodes = 4000 num_edges = 20000 num_classes = 10 # Generate random graph. yaml_content = gbt.random_homo_graphbolt_graph( test_dir, dataset_name, num_nodes, num_edges, num_classes, edge_fmt=edge_fmt, ) yaml_file = os.path.join(test_dir, "metadata.yaml") with open(yaml_file, "w") as f: f.write(yaml_content) # Test generating original_edge_id. output_file = gb.ondisk_dataset.preprocess_ondisk_dataset( test_dir, include_original_edge_id=True ) with open(output_file, "rb") as f: processed_dataset = yaml.load(f, Loader=yaml.Loader) fused_csc_sampling_graph = load_sampling_graph( test_dir, processed_dataset ) assert ( fused_csc_sampling_graph.edge_attributes is not None and gb.ORIGINAL_EDGE_ID in fused_csc_sampling_graph.edge_attributes ) fused_csc_sampling_graph = None @pytest.mark.parametrize("auto_cast", [False, True]) def test_OnDiskDataset_preprocess_homogeneous_hardcode( auto_cast, edge_fmt="numpy" ): """Test preprocess of OnDiskDataset.""" with tempfile.TemporaryDirectory() as test_dir: """Original graph in COO: 0 1 1 0 0 0 0 1 1 0 0 0 0 1 1 1 0 0 0 1 1 1 0 0 0 node_feats: [0.0, 1.9, 2.8, 3.7, 4.6] edge_feats: [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9] """ dataset_name = "graphbolt_test" num_nodes = 5 num_edges = 10 num_classes = 1 # Generate edges. edges = np.array( [[0, 0, 1, 1, 2, 2, 3, 3, 4, 4], [1, 2, 2, 3, 3, 4, 4, 0, 0, 1]], dtype=np.int64, ).T os.makedirs(os.path.join(test_dir, "edges"), exist_ok=True) edges = edges.T edge_path = os.path.join("edges", "edge.npy") np.save(os.path.join(test_dir, edge_path), edges) # Generate graph edge-feats. edge_feats = np.array( [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9], dtype=np.float64, ) os.makedirs(os.path.join(test_dir, "data"), exist_ok=True) edge_feat_path = os.path.join("data", "edge-feat.npy") np.save(os.path.join(test_dir, edge_feat_path), edge_feats) # Generate node-feats. node_feats = np.array( [0.0, 1.9, 2.8, 3.7, 4.6], dtype=np.float64, ) node_feat_path = os.path.join("data", "node-feat.npy") np.save(os.path.join(test_dir, node_feat_path), node_feats) # Generate train/test/valid set. os.makedirs(os.path.join(test_dir, "set"), exist_ok=True) train_data = np.array([0, 1, 2, 3, 4]) train_path = os.path.join("set", "train.npy") np.save(os.path.join(test_dir, train_path), train_data) valid_data = np.array([0, 1, 2, 3, 4]) valid_path = os.path.join("set", "valid.npy") np.save(os.path.join(test_dir, valid_path), valid_data) test_data = np.array([0, 1, 2, 3, 4]) test_path = os.path.join("set", "test.npy") np.save(os.path.join(test_dir, test_path), test_data) yaml_content = ( f"dataset_name: {dataset_name}\n" f"graph:\n" f" nodes:\n" f" - num: {num_nodes}\n" f" edges:\n" f" - format: {edge_fmt}\n" f" path: {edge_path}\n" f" feature_data:\n" f" - domain: node\n" f" type: null\n" f" name: feat\n" f" format: numpy\n" f" in_memory: true\n" f" path: {node_feat_path}\n" f" - domain: edge\n" f" type: null\n" f" name: feat\n" f" format: numpy\n" f" in_memory: true\n" f" path: {edge_feat_path}\n" f"feature_data:\n" f" - domain: node\n" f" type: null\n" f" name: feat\n" f" format: numpy\n" f" in_memory: true\n" f" path: {node_feat_path}\n" f" - domain: edge\n" f" type: null\n" f" name: feat\n" f" format: numpy\n" f" path: {edge_feat_path}\n" f"tasks:\n" f" - name: node_classification\n" f" num_classes: {num_classes}\n" f" train_set:\n" f" - type: null\n" f" data:\n" f" - name: seeds\n" f" format: numpy\n" f" in_memory: true\n" f" path: {train_path}\n" f" validation_set:\n" f" - type: null\n" f" data:\n" f" - name: seeds\n" f" format: numpy\n" f" in_memory: true\n" f" path: {valid_path}\n" f" test_set:\n" f" - type: null\n" f" data:\n" f" - name: seeds\n" f" format: numpy\n" f" in_memory: true\n" f" path: {test_path}\n" ) yaml_file = os.path.join(test_dir, "metadata.yaml") with open(yaml_file, "w") as f: f.write(yaml_content) output_file = gb.ondisk_dataset.preprocess_ondisk_dataset( test_dir, include_original_edge_id=True, auto_cast_to_optimal_dtype=auto_cast, ) with open(output_file, "rb") as f: processed_dataset = yaml.load(f, Loader=yaml.Loader) assert processed_dataset["dataset_name"] == dataset_name assert processed_dataset["tasks"][0]["num_classes"] == num_classes assert "graph" not in processed_dataset assert "graph_topology" in processed_dataset fused_csc_sampling_graph = load_sampling_graph( test_dir, processed_dataset ) assert fused_csc_sampling_graph.total_num_nodes == num_nodes assert fused_csc_sampling_graph.total_num_edges == num_edges assert torch.equal( fused_csc_sampling_graph.csc_indptr, torch.tensor([0, 2, 4, 6, 8, 10]), ) assert torch.equal( fused_csc_sampling_graph.indices, torch.tensor([3, 4, 0, 4, 0, 1, 1, 2, 2, 3]), ) assert torch.equal( fused_csc_sampling_graph.node_attributes["feat"], torch.tensor([0.0, 1.9, 2.8, 3.7, 4.6], dtype=torch.float64), ) assert torch.equal( fused_csc_sampling_graph.edge_attributes["feat"], torch.tensor( [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9], dtype=torch.float64, ), ) assert torch.equal( fused_csc_sampling_graph.edge_attributes[gb.ORIGINAL_EDGE_ID], torch.tensor([7, 8, 0, 9, 1, 2, 3, 4, 5, 6]), ) expected_dtype = torch.int32 if auto_cast else torch.int64 assert fused_csc_sampling_graph.csc_indptr.dtype == expected_dtype assert fused_csc_sampling_graph.indices.dtype == expected_dtype assert ( fused_csc_sampling_graph.edge_attributes[gb.ORIGINAL_EDGE_ID].dtype == expected_dtype ) num_samples = 5 fanout = 1 subgraph = fused_csc_sampling_graph.sample_neighbors( torch.arange( 0, num_samples, dtype=fused_csc_sampling_graph.indices.dtype, ), torch.tensor([fanout]), ) assert len(subgraph.sampled_csc.indices) <= num_samples @pytest.mark.parametrize("auto_cast", [False, True]) def test_OnDiskDataset_preprocess_heterogeneous_hardcode( auto_cast, edge_fmt="numpy" ): """Test preprocess of OnDiskDataset.""" with tempfile.TemporaryDirectory() as test_dir: """Original graph in COO: 0 1 1 0 0 0 0 1 1 0 0 0 0 1 1 1 0 0 0 1 1 1 0 0 0 node_type_0: [0, 1] node_type_1: [2, 3, 4] edge_type_0: node_type_0 -> node_type_0 edge_type_1: node_type_0 -> node_type_1 edge_type_2: node_type_1 -> node_type_1 edge_type_3: node_type_1 -> node_type_0 node_feats: [0.0, 1.9, 2.8, 3.7, 4.6] edge_feats: [0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9] """ dataset_name = "graphbolt_test" num_nodes = { "A": 2, "B": 3, } num_edges = { ("A", "a_a", "A"): 1, ("A", "a_b", "B"): 3, ("B", "b_b", "A"): 3, ("B", "b_a", "B"): 3, } num_classes = 1 # Generate edges. os.makedirs(os.path.join(test_dir, "edges"), exist_ok=True) np.save( os.path.join(test_dir, "edges", "a_a.npy"), np.array([[0], [1]], dtype=np.int64), ) np.save( os.path.join(test_dir, "edges", "a_b.npy"), np.array([[0, 1, 1], [0, 0, 1]], dtype=np.int64), ) np.save( os.path.join(test_dir, "edges", "b_b.npy"), np.array([[0, 0, 1], [1, 2, 2]], dtype=np.int64), ) np.save( os.path.join(test_dir, "edges", "b_a.npy"), np.array([[1, 2, 2], [0, 0, 1]], dtype=np.int64), ) # Generate node features. os.makedirs(os.path.join(test_dir, "data"), exist_ok=True) np.save( os.path.join(test_dir, "data", "A-feat.npy"), np.array([0.0, 1.9], dtype=np.float64), ) np.save( os.path.join(test_dir, "data", "B-feat.npy"), np.array([2.8, 3.7, 4.6], dtype=np.float64), ) # Generate edge features. os.makedirs(os.path.join(test_dir, "data"), exist_ok=True) np.save( os.path.join(test_dir, "data", "a_a-feat.npy"), np.array([0.0], dtype=np.float64), ) np.save( os.path.join(test_dir, "data", "a_b-feat.npy"), np.array([1.1, 2.2, 3.3], dtype=np.float64), ) np.save( os.path.join(test_dir, "data", "b_b-feat.npy"), np.array([4.4, 5.5, 6.6], dtype=np.float64), ) np.save( os.path.join(test_dir, "data", "b_a-feat.npy"), np.array([7.7, 8.8, 9.9], dtype=np.float64), ) yaml_content = ( f"dataset_name: {dataset_name}\n" f"graph:\n" f" nodes:\n" f" - type: A\n" f" num: 2\n" f" - type: B\n" f" num: 3\n" f" edges:\n" f" - type: A:a_a:A\n" f" format: {edge_fmt}\n" f" path: {os.path.join('edges', 'a_a.npy')}\n" f" - type: A:a_b:B\n" f" format: {edge_fmt}\n" f" path: {os.path.join('edges', 'a_b.npy')}\n" f" - type: B:b_b:B\n" f" format: {edge_fmt}\n" f" path: {os.path.join('edges', 'b_b.npy')}\n" f" - type: B:b_a:A\n" f" format: {edge_fmt}\n" f" path: {os.path.join('edges', 'b_a.npy')}\n" f" feature_data:\n" f" - domain: node\n" f" type: A\n" f" name: feat\n" f" format: numpy\n" f" in_memory: true\n" f" path: {os.path.join(test_dir, 'data', 'A-feat.npy')}\n" f" - domain: node\n" f" type: B\n" f" name: feat\n" f" format: numpy\n" f" in_memory: true\n" f" path: {os.path.join(test_dir, 'data', 'B-feat.npy')}\n" f" - domain: edge\n" f" type: A:a_a:A\n" f" name: feat\n" f" format: numpy\n" f" in_memory: true\n" f" path: {os.path.join(test_dir, 'data', 'a_a-feat.npy')}\n" f" - domain: edge\n" f" type: A:a_b:B\n" f" name: feat\n" f" format: numpy\n" f" in_memory: true\n" f" path: {os.path.join(test_dir, 'data', 'a_b-feat.npy')}\n" f" - domain: edge\n" f" type: B:b_b:B\n" f" name: feat\n" f" format: numpy\n" f" in_memory: true\n" f" path: {os.path.join(test_dir, 'data', 'b_b-feat.npy')}\n" f" - domain: edge\n" f" type: B:b_a:A\n" f" name: feat\n" f" format: numpy\n" f" in_memory: true\n" f" path: {os.path.join(test_dir, 'data', 'b_a-feat.npy')}\n" ) yaml_file = os.path.join(test_dir, "metadata.yaml") with open(yaml_file, "w") as f: f.write(yaml_content) output_file = gb.ondisk_dataset.preprocess_ondisk_dataset( test_dir, include_original_edge_id=True, auto_cast_to_optimal_dtype=auto_cast, ) with open(output_file, "rb") as f: processed_dataset = yaml.load(f, Loader=yaml.Loader) assert processed_dataset["dataset_name"] == dataset_name assert "graph" not in processed_dataset assert "graph_topology" in processed_dataset fused_csc_sampling_graph = load_sampling_graph( test_dir, processed_dataset ) assert fused_csc_sampling_graph.total_num_nodes == 5 assert fused_csc_sampling_graph.total_num_edges == 10 assert torch.equal( fused_csc_sampling_graph.csc_indptr, torch.tensor([0, 2, 4, 6, 8, 10]), ) assert torch.equal( fused_csc_sampling_graph.indices, torch.tensor([3, 4, 0, 4, 0, 1, 1, 2, 2, 3]), ) assert torch.equal( fused_csc_sampling_graph.node_attributes["feat"], torch.tensor([0.0, 1.9, 2.8, 3.7, 4.6], dtype=torch.float64), ) assert torch.equal( fused_csc_sampling_graph.edge_attributes["feat"], torch.tensor( [0.0, 1.1, 2.2, 3.3, 7.7, 8.8, 9.9, 4.4, 5.5, 6.6], dtype=torch.float64, ), ) assert torch.equal( fused_csc_sampling_graph.type_per_edge, torch.tensor([2, 2, 0, 2, 1, 1, 1, 3, 3, 3]), ) assert torch.equal( fused_csc_sampling_graph.edge_attributes[gb.ORIGINAL_EDGE_ID], torch.tensor([0, 1, 0, 2, 0, 1, 2, 0, 1, 2]), ) expected_dtype = torch.int32 if auto_cast else torch.int64 assert fused_csc_sampling_graph.csc_indptr.dtype == expected_dtype assert fused_csc_sampling_graph.indices.dtype == expected_dtype assert ( fused_csc_sampling_graph.edge_attributes[gb.ORIGINAL_EDGE_ID].dtype == expected_dtype ) assert fused_csc_sampling_graph.node_type_offset.dtype == expected_dtype expected_etype_dtype = torch.uint8 if auto_cast else torch.int64 assert ( fused_csc_sampling_graph.type_per_edge.dtype == expected_etype_dtype ) def test_OnDiskDataset_preprocess_path(): """Test if the preprocess function can catch the path error.""" with tempfile.TemporaryDirectory() as test_dir: # All metadata fields are specified. dataset_name = "graphbolt_test" yaml_content = f""" dataset_name: {dataset_name} """ yaml_file = os.path.join(test_dir, "metadata.yaml") with open(yaml_file, "w") as f: f.write(yaml_content) # Case1. Test the passed in is the yaml file path. with pytest.raises( RuntimeError, match="The dataset must be a directory. " rf"But got {re.escape(yaml_file)}", ): _ = gb.OnDiskDataset(yaml_file) # Case2. Test the passed in is a fake directory. fake_dir = os.path.join(test_dir, "fake_dir") with pytest.raises( RuntimeError, match=rf"Invalid dataset path: {re.escape(fake_dir)}", ): _ = gb.OnDiskDataset(fake_dir) # Case3. Test the passed in is the dataset directory. # But the metadata.yaml is not in the directory. os.makedirs(os.path.join(test_dir, "fake_dir"), exist_ok=True) with pytest.raises( RuntimeError, match=r"metadata.yaml does not exist.", ): _ = gb.OnDiskDataset(fake_dir) def test_OnDiskDataset_preprocess_yaml_content(): """Test if the preprocessed metadata.yaml is correct.""" with tempfile.TemporaryDirectory() as test_dir: # All metadata fields are specified. dataset_name = "graphbolt_test" num_nodes = 4000 num_edges = 20000 num_classes = 10 # Generate random edges. nodes = np.repeat(np.arange(num_nodes), 5) neighbors = np.random.randint(0, num_nodes, size=(num_edges)) edges = np.stack([nodes, neighbors], axis=1) # Write into edges/edge.csv os.makedirs(os.path.join(test_dir, "edges"), exist_ok=True) edges = pd.DataFrame(edges, columns=["src", "dst"]) edge_path = os.path.join("edges", "edge.csv") edges.to_csv( os.path.join(test_dir, edge_path), index=False, header=False, ) # Generate random graph edge-feats. edge_feats = np.random.rand(num_edges, 5) os.makedirs(os.path.join(test_dir, "data"), exist_ok=True) feature_edge = os.path.join("data", "edge-feat.npy") np.save(os.path.join(test_dir, feature_edge), edge_feats) # Generate random node-feats. node_feats = np.random.rand(num_nodes, 10) feature_node = os.path.join("data", "node-feat.npy") np.save(os.path.join(test_dir, feature_node), node_feats) # Generate train/test/valid set. os.makedirs(os.path.join(test_dir, "set"), exist_ok=True) train_pairs = (np.arange(1000), np.arange(1000, 2000)) train_labels = np.random.randint(0, 10, size=1000) train_data = np.vstack([train_pairs, train_labels]).T train_path = os.path.join("set", "train.npy") np.save(os.path.join(test_dir, train_path), train_data) validation_pairs = (np.arange(1000, 2000), np.arange(2000, 3000)) validation_labels = np.random.randint(0, 10, size=1000) validation_data = np.vstack([validation_pairs, validation_labels]).T validation_path = os.path.join("set", "validation.npy") np.save(os.path.join(test_dir, validation_path), validation_data) test_pairs = (np.arange(2000, 3000), np.arange(3000, 4000)) test_labels = np.random.randint(0, 10, size=1000) test_data = np.vstack([test_pairs, test_labels]).T test_path = os.path.join("set", "test.npy") np.save(os.path.join(test_dir, test_path), test_data) yaml_content = f""" dataset_name: {dataset_name} graph: # graph structure and required attributes. nodes: - num: {num_nodes} edges: - format: csv path: {edge_path} feature_data: - domain: edge type: null name: feat format: numpy in_memory: true path: {feature_edge} feature_data: - domain: node type: null name: feat format: numpy in_memory: false path: {feature_node} tasks: - name: node_classification num_classes: {num_classes} train_set: - type: null data: - format: numpy path: {train_path} validation_set: - type: null data: - format: numpy path: {validation_path} test_set: - type: null data: - format: numpy path: {test_path} """ yaml_file = os.path.join(test_dir, "metadata.yaml") with open(yaml_file, "w") as f: f.write(yaml_content) preprocessed_metadata_path = gb.preprocess_ondisk_dataset(test_dir) with open(preprocessed_metadata_path, "r") as f: yaml_data = yaml.safe_load(f) topo_path = os.path.join("preprocessed", "fused_csc_sampling_graph.pt") target_yaml_content = f""" dataset_name: {dataset_name} graph_topology: type: FusedCSCSamplingGraph path: {topo_path} feature_data: - domain: node type: null name: feat format: numpy in_memory: false path: {os.path.join("preprocessed", feature_node)} tasks: - name: node_classification num_classes: {num_classes} train_set: - type: null data: - format: numpy path: {os.path.join("preprocessed", train_path)} validation_set: - type: null data: - format: numpy path: {os.path.join("preprocessed", validation_path)} test_set: - type: null data: - format: numpy path: {os.path.join("preprocessed", test_path)} include_original_edge_id: False """ target_yaml_data = yaml.safe_load(target_yaml_content) # Check yaml content. assert ( yaml_data == target_yaml_data ), "The preprocessed metadata.yaml is not correct." # Check file existence. assert os.path.exists( os.path.join(test_dir, yaml_data["graph_topology"]["path"]) ) assert os.path.exists( os.path.join(test_dir, yaml_data["feature_data"][0]["path"]) ) for set_name in ["train_set", "validation_set", "test_set"]: assert os.path.exists( os.path.join( test_dir, yaml_data["tasks"][0][set_name][0]["data"][0]["path"], ) ) def test_OnDiskDataset_preprocess_force_preprocess(capsys): """Test force preprocess of OnDiskDataset.""" with tempfile.TemporaryDirectory() as test_dir: # All metadata fields are specified. dataset_name = "graphbolt_test" num_nodes = 4000 num_edges = 20000 num_classes = 10 # Generate random graph. yaml_content = gbt.random_homo_graphbolt_graph( test_dir, dataset_name, num_nodes, num_edges, num_classes, ) yaml_file = os.path.join(test_dir, "metadata.yaml") with open(yaml_file, "w") as f: f.write(yaml_content) # First preprocess on-disk dataset. preprocessed_metadata_path = ( gb.ondisk_dataset.preprocess_ondisk_dataset( test_dir, include_original_edge_id=False, force_preprocess=False ) ) captured = capsys.readouterr().out.split("\n") assert captured == [ "Start to preprocess the on-disk dataset.", "Finish preprocessing the on-disk dataset.", "", ] with open(preprocessed_metadata_path, "r") as f: target_yaml_data = yaml.safe_load(f) assert target_yaml_data["tasks"][0]["name"] == "link_prediction" # Change yaml_data, but do not force preprocess on-disk dataset. with open(yaml_file, "r") as f: yaml_data = yaml.safe_load(f) yaml_data["tasks"][0]["name"] = "fake_name" with open(yaml_file, "w") as f: yaml.dump(yaml_data, f) preprocessed_metadata_path = ( gb.ondisk_dataset.preprocess_ondisk_dataset( test_dir, include_original_edge_id=False, force_preprocess=False ) ) captured = capsys.readouterr().out.split("\n") assert captured == ["The dataset is already preprocessed.", ""] with open(preprocessed_metadata_path, "r") as f: target_yaml_data = yaml.safe_load(f) assert target_yaml_data["tasks"][0]["name"] == "link_prediction" # Force preprocess on-disk dataset. preprocessed_metadata_path = ( gb.ondisk_dataset.preprocess_ondisk_dataset( test_dir, include_original_edge_id=False, force_preprocess=True ) ) captured = capsys.readouterr().out.split("\n") assert captured == [ "The on-disk dataset is re-preprocessing, so the existing " + "preprocessed dataset has been removed.", "Start to preprocess the on-disk dataset.", "Finish preprocessing the on-disk dataset.", "", ] with open(preprocessed_metadata_path, "r") as f: target_yaml_data = yaml.safe_load(f) assert target_yaml_data["tasks"][0]["name"] == "fake_name" def test_OnDiskDataset_preprocess_auto_force_preprocess(capsys): """Test force preprocess of OnDiskDataset.""" with tempfile.TemporaryDirectory() as test_dir: # All metadata fields are specified. dataset_name = "graphbolt_test" num_nodes = 4000 num_edges = 20000 num_classes = 10 # Generate random graph. yaml_content = gbt.random_homo_graphbolt_graph( test_dir, dataset_name, num_nodes, num_edges, num_classes, ) yaml_file = os.path.join(test_dir, "metadata.yaml") with open(yaml_file, "w") as f: f.write(yaml_content) # First preprocess on-disk dataset. preprocessed_metadata_path = ( gb.ondisk_dataset.preprocess_ondisk_dataset( test_dir, include_original_edge_id=False ) ) captured = capsys.readouterr().out.split("\n") assert captured == [ "Start to preprocess the on-disk dataset.", "Finish preprocessing the on-disk dataset.", "", ] with open(preprocessed_metadata_path, "r") as f: target_yaml_data = yaml.safe_load(f) assert target_yaml_data["tasks"][0]["name"] == "link_prediction" # 1. Change yaml_data. with open(yaml_file, "r") as f: yaml_data = yaml.safe_load(f) yaml_data["tasks"][0]["name"] = "fake_name" with open(yaml_file, "w") as f: yaml.dump(yaml_data, f) preprocessed_metadata_path = ( gb.ondisk_dataset.preprocess_ondisk_dataset( test_dir, include_original_edge_id=False ) ) captured = capsys.readouterr().out.split("\n") assert captured == [ "The on-disk dataset is re-preprocessing, so the existing " + "preprocessed dataset has been removed.", "Start to preprocess the on-disk dataset.", "Finish preprocessing the on-disk dataset.", "", ] with open(preprocessed_metadata_path, "r") as f: target_yaml_data = yaml.safe_load(f) assert target_yaml_data["tasks"][0]["name"] == "fake_name" # 2. Change edge feature. edge_feats = np.random.rand(num_edges, num_classes) edge_feat_path = os.path.join("data", "edge-feat.npy") np.save(os.path.join(test_dir, edge_feat_path), edge_feats) preprocessed_metadata_path = ( gb.ondisk_dataset.preprocess_ondisk_dataset( test_dir, include_original_edge_id=False ) ) captured = capsys.readouterr().out.split("\n") assert captured == [ "The on-disk dataset is re-preprocessing, so the existing " + "preprocessed dataset has been removed.", "Start to preprocess the on-disk dataset.", "Finish preprocessing the on-disk dataset.", "", ] preprocessed_edge_feat = np.load( os.path.join(test_dir, "preprocessed", edge_feat_path) ) assert preprocessed_edge_feat.all() == edge_feats.all() with open(preprocessed_metadata_path, "r") as f: target_yaml_data = yaml.safe_load(f) assert target_yaml_data["include_original_edge_id"] == False # 3. Change include_original_edge_id. preprocessed_metadata_path = ( gb.ondisk_dataset.preprocess_ondisk_dataset( test_dir, include_original_edge_id=True ) ) captured = capsys.readouterr().out.split("\n") assert captured == [ "The on-disk dataset is re-preprocessing, so the existing " + "preprocessed dataset has been removed.", "Start to preprocess the on-disk dataset.", "Finish preprocessing the on-disk dataset.", "", ] with open(preprocessed_metadata_path, "r") as f: target_yaml_data = yaml.safe_load(f) assert target_yaml_data["include_original_edge_id"] == True # 4. Change nothing. preprocessed_metadata_path = ( gb.ondisk_dataset.preprocess_ondisk_dataset( test_dir, include_original_edge_id=True ) ) captured = capsys.readouterr().out.split("\n") assert captured == ["The dataset is already preprocessed.", ""] def test_OnDiskDataset_preprocess_not_include_eids(): with tempfile.TemporaryDirectory() as test_dir: # All metadata fields are specified. dataset_name = "graphbolt_test" num_nodes = 4000 num_edges = 20000 num_classes = 10 # Generate random graph. yaml_content = gbt.random_homo_graphbolt_graph( test_dir, dataset_name, num_nodes, num_edges, num_classes, ) yaml_file = os.path.join(test_dir, "metadata.yaml") with open(yaml_file, "w") as f: f.write(yaml_content) with pytest.warns( GBWarning, match="Edge feature is stored, but edge IDs are not saved.", ): gb.ondisk_dataset.preprocess_ondisk_dataset( test_dir, include_original_edge_id=False ) @pytest.mark.parametrize("edge_fmt", ["csv", "numpy"]) def test_OnDiskDataset_load_name(edge_fmt): """Test preprocess of OnDiskDataset.""" with tempfile.TemporaryDirectory() as test_dir: # All metadata fields are specified. dataset_name = "graphbolt_test" num_nodes = 4000 num_edges = 20000 num_classes = 10 # Generate random graph. yaml_content = gbt.random_homo_graphbolt_graph( test_dir, dataset_name, num_nodes, num_edges, num_classes, edge_fmt=edge_fmt, ) yaml_file = os.path.join(test_dir, "metadata.yaml") with open(yaml_file, "w") as f: f.write(yaml_content) # Check modify `dataset_name` field. dataset = gb.OnDiskDataset(test_dir) dataset.yaml_data["dataset_name"] = "fake_name" dataset.load() assert dataset.dataset_name == "fake_name" dataset = None @pytest.mark.parametrize("edge_fmt", ["csv", "numpy"]) def test_OnDiskDataset_load_feature(edge_fmt): """Test preprocess of OnDiskDataset.""" with tempfile.TemporaryDirectory() as test_dir: # All metadata fields are specified. dataset_name = "graphbolt_test" num_nodes = 4000 num_edges = 20000 num_classes = 10 # Generate random graph. yaml_content = gbt.random_homo_graphbolt_graph( test_dir, dataset_name, num_nodes, num_edges, num_classes, edge_fmt=edge_fmt, ) yaml_file = os.path.join(test_dir, "metadata.yaml") with open(yaml_file, "w") as f: f.write(yaml_content) # Case1. Test modify the `in_memory` field. dataset = gb.OnDiskDataset(test_dir).load() original_feature_data = dataset.feature dataset.yaml_data["feature_data"][0]["in_memory"] = True load_dataset(dataset) modify_feature_data = dataset.feature # After modify the `in_memory` field, the feature data should be # equal. assert torch.equal( original_feature_data.read("node", None, "feat"), modify_feature_data.read("node", None, "feat"), ) # Case2. Test modify the `format` field. dataset = gb.OnDiskDataset(test_dir) # If `format` is torch and `in_memory` is False, it will # raise an AssertionError. dataset.yaml_data["feature_data"][0]["in_memory"] = False dataset.yaml_data["feature_data"][0]["format"] = "torch" with pytest.raises( AssertionError, match="^Pytorch tensor can only be loaded in memory,", ): load_dataset(dataset) dataset = gb.OnDiskDataset(test_dir) dataset.yaml_data["feature_data"][0]["in_memory"] = True dataset.yaml_data["feature_data"][0]["format"] = "torch" # If `format` is torch and `in_memory` is True, it will # raise an UnpicklingError. with pytest.raises(pickle.UnpicklingError): load_dataset(dataset) # Case3. Test modify the `path` field. dataset = gb.OnDiskDataset(test_dir) # Use invalid path will raise an FileNotFoundError. dataset.yaml_data["feature_data"][0]["path"] = "fake_path" with pytest.raises( FileNotFoundError, match=r"\[Errno 2\] No such file or directory:", ): load_dataset(dataset) # Modifying the `path` field to an absolute path should work. # In os.path.join, if a segment is an absolute path (which # on Windows requires both a drive and a root), then all # previous segments are ignored and joining continues from # the absolute path segment. dataset = load_dataset(gb.OnDiskDataset(test_dir)) original_feature_data = dataset.feature dataset.yaml_data["feature_data"][0]["path"] = os.path.join( test_dir, dataset.yaml_data["feature_data"][0]["path"] ) load_dataset(dataset) modify_feature_data = dataset.feature assert torch.equal( original_feature_data.read("node", None, "feat"), modify_feature_data.read("node", None, "feat"), ) original_feature_data = None modify_feature_data = None dataset = None @pytest.mark.parametrize("edge_fmt", ["csv", "numpy"]) def test_OnDiskDataset_load_graph(edge_fmt): """Test preprocess of OnDiskDataset.""" with tempfile.TemporaryDirectory() as test_dir: # All metadata fields are specified. dataset_name = "graphbolt_test" num_nodes = 4000 num_edges = 20000 num_classes = 10 # Generate random graph. yaml_content = gbt.random_homo_graphbolt_graph( test_dir, dataset_name, num_nodes, num_edges, num_classes, edge_fmt=edge_fmt, ) yaml_file = os.path.join(test_dir, "metadata.yaml") with open(yaml_file, "w") as f: f.write(yaml_content) # Check the different original_edge_id option to load edge_attributes. dataset = gb.OnDiskDataset( test_dir, include_original_edge_id=True ).load() assert ( dataset.graph.edge_attributes is not None and gb.ORIGINAL_EDGE_ID in dataset.graph.edge_attributes ) # Case1. Test modify the `type` field. dataset = gb.OnDiskDataset(test_dir) dataset.yaml_data["graph_topology"]["type"] = "fake_type" with pytest.raises( pydantic.ValidationError, # As error message diffs in pydantic 1.x and 2.x, we just match # keyword only. match="'FusedCSCSamplingGraph'", ): dataset.load() # Case2. Test modify the `path` field. dataset = gb.OnDiskDataset(test_dir) dataset.yaml_data["graph_topology"]["path"] = "fake_path" with pytest.raises( FileNotFoundError, match=r"\[Errno 2\] No such file or directory:", ): dataset.load() # Modifying the `path` field to an absolute path should work. # In os.path.join, if a segment is an absolute path (which # on Windows requires both a drive and a root), then all # previous segments are ignored and joining continues from # the absolute path segment. dataset = gb.OnDiskDataset(test_dir).load() original_graph = dataset.graph dataset.yaml_data["graph_topology"]["path"] = os.path.join( test_dir, dataset.yaml_data["graph_topology"]["path"] ) dataset.load() modify_graph = dataset.graph assert torch.equal( original_graph.csc_indptr, modify_graph.csc_indptr, ) original_graph = None modify_graph = None dataset = None with tempfile.TemporaryDirectory() as test_dir: # All metadata fields are specified. dataset_name = "graphbolt_test" num_nodes = 4000 num_edges = 20000 num_classes = 10 # Generate random graph. yaml_content = gbt.random_homo_graphbolt_graph( test_dir, dataset_name, num_nodes, num_edges, num_classes, edge_fmt=edge_fmt, ) yaml_file = os.path.join(test_dir, "metadata.yaml") with open(yaml_file, "w") as f: f.write(yaml_content) # Test do not generate original_edge_id. dataset = gb.OnDiskDataset( test_dir, include_original_edge_id=False ).load() assert ( dataset.graph.edge_attributes is None or gb.ORIGINAL_EDGE_ID not in dataset.graph.edge_attributes ) dataset = None @pytest.mark.parametrize("edge_fmt", ["csv", "numpy"]) def test_OnDiskDataset_load_tasks(edge_fmt): """Test preprocess of OnDiskDataset.""" with tempfile.TemporaryDirectory() as test_dir: # All metadata fields are specified. dataset_name = "graphbolt_test" num_nodes = 4000 num_edges = 20000 num_classes = 10 # Generate random graph. yaml_content = gbt.random_homo_graphbolt_graph( test_dir, dataset_name, num_nodes, num_edges, num_classes, edge_fmt=edge_fmt, ) yaml_file = os.path.join(test_dir, "metadata.yaml") with open(yaml_file, "w") as f: f.write(yaml_content) # Case1. Test modify the `name` field. dataset = gb.OnDiskDataset(test_dir) dataset.yaml_data["tasks"][0]["name"] = "fake_name" dataset.load() assert dataset.tasks[0].metadata["name"] == "fake_name" # Case2. Test modify the `num_classes` field. dataset = gb.OnDiskDataset(test_dir) dataset.yaml_data["tasks"][0]["num_classes"] = 100 dataset.load() assert dataset.tasks[0].metadata["num_classes"] == 100 # Case3. Test modify the `format` field. dataset = gb.OnDiskDataset(test_dir) # Change the `format` field to torch. dataset.yaml_data["tasks"][0]["train_set"][0]["data"][0][ "format" ] = "torch" with pytest.raises(pickle.UnpicklingError): dataset.load() dataset = gb.OnDiskDataset(test_dir) dataset.yaml_data["tasks"][0]["train_set"][0]["data"][0][ "format" ] = "torch" # Change the `in_memory` field to False will also raise an # UnpicklingError. Unlike the case of testing `feature_data`. dataset.yaml_data["tasks"][0]["train_set"][0]["data"][0][ "in_memory" ] = False with pytest.raises(pickle.UnpicklingError): dataset.load() # Case4. Test modify the `path` field. dataset = gb.OnDiskDataset(test_dir) # Use invalid path will raise an FileNotFoundError. dataset.yaml_data["tasks"][0]["train_set"][0]["data"][0][ "path" ] = "fake_path" with pytest.raises( FileNotFoundError, match=r"\[Errno 2\] No such file or directory:", ): dataset.load() # Modifying the `path` field to an absolute path should work. # In os.path.join, if a segment is an absolute path (which # on Windows requires both a drive and a root), then all # previous segments are ignored and joining continues from # the absolute path segment. dataset = gb.OnDiskDataset(test_dir).load() original_train_set = dataset.tasks[0].train_set._items dataset.yaml_data["tasks"][0]["train_set"][0]["data"][0][ "path" ] = os.path.join( test_dir, dataset.yaml_data["tasks"][0]["train_set"][0]["data"][0]["path"], ) dataset.load() modify_train_set = dataset.tasks[0].train_set._items assert torch.equal( original_train_set[0], modify_train_set[0], ) original_train_set = None modify_train_set = None dataset = None def test_OnDiskDataset_all_nodes_set_homo(): """Test homograph's all nodes set of OnDiskDataset.""" csc_indptr, indices = gbt.random_homo_graph(1000, 10 * 1000) graph = gb.fused_csc_sampling_graph(csc_indptr, indices) with tempfile.TemporaryDirectory() as test_dir: graph_path = os.path.join(test_dir, "fused_csc_sampling_graph.pt") torch.save(graph, graph_path) yaml_content = f""" graph_topology: type: FusedCSCSamplingGraph path: {graph_path} """ dataset = write_yaml_and_load_dataset(yaml_content, test_dir) all_nodes_set = dataset.all_nodes_set assert isinstance(all_nodes_set, gb.ItemSet) assert all_nodes_set.names == ("seeds",) for i, item in enumerate(all_nodes_set): assert i == item dataset = None def test_OnDiskDataset_all_nodes_set_hetero(): """Test heterograph's all nodes set of OnDiskDataset.""" ( csc_indptr, indices, node_type_offset, type_per_edge, node_type_to_id, edge_type_to_id, ) = gbt.random_hetero_graph(1000, 10 * 1000, 3, 4) graph = gb.fused_csc_sampling_graph( csc_indptr, indices, node_type_offset=node_type_offset, type_per_edge=type_per_edge, node_type_to_id=node_type_to_id, edge_type_to_id=edge_type_to_id, edge_attributes=None, ) with tempfile.TemporaryDirectory() as test_dir: graph_path = os.path.join(test_dir, "fused_csc_sampling_graph.pt") torch.save(graph, graph_path) yaml_content = f""" graph_topology: type: FusedCSCSamplingGraph path: {graph_path} """ dataset = write_yaml_and_load_dataset(yaml_content, test_dir) all_nodes_set = dataset.all_nodes_set assert isinstance(all_nodes_set, gb.HeteroItemSet) assert all_nodes_set.names == ("seeds",) for i, item in enumerate(all_nodes_set): assert len(item) == 1 assert isinstance(item, dict) dataset = None @pytest.mark.parametrize("fmt", ["numpy", "torch"]) def test_OnDiskDataset_load_1D_feature(fmt): with tempfile.TemporaryDirectory() as test_dir: # All metadata fields are specified. dataset_name = "graphbolt_test" num_nodes = 4 num_edges = 20 num_classes = 1 type_name = "npy" if fmt == "numpy" else "pt" # Generate random edges. nodes = np.repeat(np.arange(num_nodes), 5) neighbors = np.random.randint(0, num_nodes, size=(num_edges)) edges = np.stack([nodes, neighbors], axis=1) # Write into edges/edge.csv os.makedirs(os.path.join(test_dir, "edges"), exist_ok=True) edges = pd.DataFrame(edges, columns=["src", "dst"]) edge_path = os.path.join("edges", "edge.csv") edges.to_csv( os.path.join(test_dir, edge_path), index=False, header=False, ) # Generate random graph edge-feats. edge_feats = np.random.rand(num_edges, 5) os.makedirs(os.path.join(test_dir, "data"), exist_ok=True) edge_feat_path = os.path.join("data", f"edge-feat.{type_name}") # Generate random 1-D node-feats. node_feats = np.random.rand(num_nodes) node_feat_path = os.path.join("data", f"node-feat.{type_name}") assert node_feats.ndim == 1 # Generate 1-D train set. os.makedirs(os.path.join(test_dir, "set"), exist_ok=True) train_path = os.path.join("set", f"train.{type_name}") if fmt == "numpy": np.save(os.path.join(test_dir, edge_feat_path), edge_feats) np.save(os.path.join(test_dir, node_feat_path), node_feats) np.save(os.path.join(test_dir, train_path), np.array([0, 1, 0])) else: torch.save( torch.from_numpy(edge_feats), os.path.join(test_dir, edge_feat_path), ) torch.save( torch.from_numpy(node_feats), os.path.join(test_dir, node_feat_path), ) torch.save( torch.tensor([0, 1, 0]), os.path.join(test_dir, train_path) ) yaml_content = f""" dataset_name: {dataset_name} graph: # graph structure and required attributes. nodes: - num: {num_nodes} edges: - format: csv path: {edge_path} feature_data: - domain: edge type: null name: feat format: {fmt} in_memory: true path: {edge_feat_path} feature_data: - domain: node type: null name: feat format: {fmt} in_memory: false path: {node_feat_path} tasks: - name: node_classification num_classes: {num_classes} train_set: - type: null data: - format: {fmt} path: {train_path} """ yaml_file = os.path.join(test_dir, "metadata.yaml") with open(yaml_file, "w") as f: f.write(yaml_content) dataset = gb.OnDiskDataset(test_dir).load() feature = dataset.feature.read("node", None, "feat") # Test whether feature has changed. assert torch.equal(torch.from_numpy(node_feats.reshape(-1, 1)), feature) # Test whether itemsets keep same. assert torch.equal( dataset.tasks[0].train_set._items[0], torch.tensor([0, 1, 0]) ) dataset = None node_feats = None feature = None def test_BuiltinDataset(): """Test BuiltinDataset.""" with tempfile.TemporaryDirectory() as test_dir: # Case 1: download from DGL S3 storage. dataset_name = "test-dataset-231207" # Add dataset to the builtin dataset list for testing only. Due to we # add `seeds` suffix to datasets when downloading, so we append # dataset name with `-seeds` suffix here. gb.BuiltinDataset._all_datasets.append(dataset_name + "-seeds") dataset = gb.BuiltinDataset(name=dataset_name, root=test_dir).load() assert dataset.graph is not None assert dataset.feature is not None assert dataset.tasks is not None assert dataset.dataset_name == dataset_name # Case 2: dataset is already downloaded. dataset = gb.BuiltinDataset(name=dataset_name, root=test_dir).load() assert dataset.graph is not None assert dataset.feature is not None assert dataset.tasks is not None assert dataset.dataset_name == dataset_name dataset = None # Case 3: dataset is not available. dataset_name = "fake_name-seeds" with pytest.raises( RuntimeError, match=rf"Dataset {dataset_name} is not available.*", ): _ = gb.BuiltinDataset(name=dataset_name, root=test_dir).load() @pytest.mark.parametrize("auto_cast", [True, False]) @pytest.mark.parametrize("include_original_edge_id", [True, False]) @pytest.mark.parametrize("edge_fmt", ["csv", "numpy"]) def test_OnDiskDataset_homogeneous( auto_cast, include_original_edge_id, edge_fmt ): """Preprocess and instantiate OnDiskDataset for homogeneous graph.""" with tempfile.TemporaryDirectory() as test_dir: # All metadata fields are specified. dataset_name = "graphbolt_test" num_nodes = 4000 num_edges = 20000 num_classes = 10 # Generate random graph. yaml_content = gbt.random_homo_graphbolt_graph( test_dir, dataset_name, num_nodes, num_edges, num_classes, edge_fmt=edge_fmt, ) yaml_file = os.path.join(test_dir, "metadata.yaml") with open(yaml_file, "w") as f: f.write(yaml_content) dataset = gb.OnDiskDataset( test_dir, include_original_edge_id=include_original_edge_id, auto_cast_to_optimal_dtype=auto_cast, ).load() assert dataset.dataset_name == dataset_name graph = dataset.graph assert isinstance(graph, gb.FusedCSCSamplingGraph) assert graph.total_num_nodes == num_nodes assert graph.total_num_edges == num_edges assert ( graph.node_attributes is not None and "feat" in graph.node_attributes ) assert ( graph.edge_attributes is not None and "feat" in graph.edge_attributes ) assert ( not include_original_edge_id ) or gb.ORIGINAL_EDGE_ID in graph.edge_attributes tasks = dataset.tasks assert len(tasks) == 1 assert isinstance(tasks[0].train_set, gb.ItemSet) assert isinstance(tasks[0].validation_set, gb.ItemSet) assert isinstance(tasks[0].test_set, gb.ItemSet) assert tasks[0].train_set._items[0].dtype == graph.indices.dtype assert tasks[0].validation_set._items[0].dtype == graph.indices.dtype assert tasks[0].test_set._items[0].dtype == graph.indices.dtype assert dataset.all_nodes_set._items.dtype == graph.indices.dtype assert tasks[0].metadata["num_classes"] == num_classes assert tasks[0].metadata["name"] == "link_prediction" assert dataset.feature.size("node", None, "feat")[0] == num_classes assert dataset.feature.size("edge", None, "feat")[0] == num_classes for itemset in [ tasks[0].train_set, tasks[0].validation_set, tasks[0].test_set, dataset.all_nodes_set, ]: datapipe = gb.ItemSampler(itemset, batch_size=10) datapipe = datapipe.sample_neighbor(graph, [-1]) datapipe = datapipe.fetch_feature( dataset.feature, node_feature_keys=["feat"] ) dataloader = gb.DataLoader(datapipe) for _ in dataloader: pass graph = None tasks = None dataset = None @pytest.mark.parametrize("auto_cast", [True, False]) @pytest.mark.parametrize("include_original_edge_id", [True, False]) @pytest.mark.parametrize("edge_fmt", ["csv", "numpy"]) def test_OnDiskDataset_heterogeneous( auto_cast, include_original_edge_id, edge_fmt ): """Preprocess and instantiate OnDiskDataset for heterogeneous graph.""" with tempfile.TemporaryDirectory() as test_dir: dataset_name = "OnDiskDataset_hetero" num_nodes = { "user": 1000, "item": 2000, } num_edges = { ("user", "follow", "user"): 10000, ("user", "click", "item"): 20000, } num_classes = 10 gbt.generate_raw_data_for_hetero_dataset( test_dir, dataset_name, num_nodes, num_edges, num_classes, edge_fmt=edge_fmt, ) dataset = gb.OnDiskDataset( test_dir, include_original_edge_id=include_original_edge_id, auto_cast_to_optimal_dtype=auto_cast, ).load() assert dataset.dataset_name == dataset_name graph = dataset.graph assert isinstance(graph, gb.FusedCSCSamplingGraph) assert graph.total_num_nodes == sum( num_nodes for num_nodes in num_nodes.values() ) assert graph.total_num_edges == sum( num_edge for num_edge in num_edges.values() ) expected_dtype = torch.int32 if auto_cast else torch.int64 assert graph.indices.dtype == expected_dtype assert ( graph.node_attributes is not None and "feat" in graph.node_attributes ) assert ( graph.edge_attributes is not None and "feat" in graph.edge_attributes ) assert ( not include_original_edge_id ) or gb.ORIGINAL_EDGE_ID in graph.edge_attributes tasks = dataset.tasks assert len(tasks) == 1 assert isinstance(tasks[0].train_set, gb.HeteroItemSet) assert isinstance(tasks[0].validation_set, gb.HeteroItemSet) assert isinstance(tasks[0].test_set, gb.HeteroItemSet) assert tasks[0].metadata["num_classes"] == num_classes assert tasks[0].metadata["name"] == "node_classification" assert dataset.feature.size("node", "user", "feat")[0] == num_classes assert dataset.feature.size("node", "item", "feat")[0] == num_classes for itemset in [ tasks[0].train_set, tasks[0].validation_set, tasks[0].test_set, dataset.all_nodes_set, ]: datapipe = gb.ItemSampler(itemset, batch_size=10) datapipe = datapipe.sample_neighbor(graph, [-1]) datapipe = datapipe.fetch_feature( dataset.feature, node_feature_keys={"user": ["feat"]} ) dataloader = gb.DataLoader(datapipe) for _ in dataloader: pass graph = None tasks = None dataset = None def test_OnDiskDataset_force_preprocess(capsys): """Test force preprocess of OnDiskDataset.""" with tempfile.TemporaryDirectory() as test_dir: # All metadata fields are specified. dataset_name = "graphbolt_test" num_nodes = 4000 num_edges = 20000 num_classes = 10 # Generate random graph. yaml_content = gbt.random_homo_graphbolt_graph( test_dir, dataset_name, num_nodes, num_edges, num_classes, ) yaml_file = os.path.join(test_dir, "metadata.yaml") with open(yaml_file, "w") as f: f.write(yaml_content) # First preprocess on-disk dataset. dataset = gb.OnDiskDataset( test_dir, include_original_edge_id=False, force_preprocess=False ).load() captured = capsys.readouterr().out.split("\n") assert captured == [ "Start to preprocess the on-disk dataset.", "Finish preprocessing the on-disk dataset.", "", ] tasks = dataset.tasks assert tasks[0].metadata["name"] == "link_prediction" # Change yaml_data, but do not force preprocess on-disk dataset. with open(yaml_file, "r") as f: yaml_data = yaml.safe_load(f) yaml_data["tasks"][0]["name"] = "fake_name" with open(yaml_file, "w") as f: yaml.dump(yaml_data, f) dataset = gb.OnDiskDataset( test_dir, include_original_edge_id=False, force_preprocess=False ).load() captured = capsys.readouterr().out.split("\n") assert captured == ["The dataset is already preprocessed.", ""] tasks = dataset.tasks assert tasks[0].metadata["name"] == "link_prediction" # Force preprocess on-disk dataset. dataset = gb.OnDiskDataset( test_dir, include_original_edge_id=False, force_preprocess=True ).load() captured = capsys.readouterr().out.split("\n") assert captured == [ "The on-disk dataset is re-preprocessing, so the existing " + "preprocessed dataset has been removed.", "Start to preprocess the on-disk dataset.", "Finish preprocessing the on-disk dataset.", "", ] tasks = dataset.tasks assert tasks[0].metadata["name"] == "fake_name" tasks = None dataset = None def test_OnDiskDataset_auto_force_preprocess(capsys): """Test force preprocess of OnDiskDataset.""" with tempfile.TemporaryDirectory() as test_dir: # All metadata fields are specified. dataset_name = "graphbolt_test" num_nodes = 4000 num_edges = 20000 num_classes = 10 # Generate random graph. yaml_content = gbt.random_homo_graphbolt_graph( test_dir, dataset_name, num_nodes, num_edges, num_classes, ) yaml_file = os.path.join(test_dir, "metadata.yaml") with open(yaml_file, "w") as f: f.write(yaml_content) # First preprocess on-disk dataset. dataset = gb.OnDiskDataset( test_dir, include_original_edge_id=False ).load() captured = capsys.readouterr().out.split("\n") assert captured == [ "Start to preprocess the on-disk dataset.", "Finish preprocessing the on-disk dataset.", "", ] tasks = dataset.tasks assert tasks[0].metadata["name"] == "link_prediction" # 1. Change yaml_data. with open(yaml_file, "r") as f: yaml_data = yaml.safe_load(f) yaml_data["tasks"][0]["name"] = "fake_name" with open(yaml_file, "w") as f: yaml.dump(yaml_data, f) dataset = gb.OnDiskDataset( test_dir, include_original_edge_id=False ).load() captured = capsys.readouterr().out.split("\n") assert captured == [ "The on-disk dataset is re-preprocessing, so the existing " + "preprocessed dataset has been removed.", "Start to preprocess the on-disk dataset.", "Finish preprocessing the on-disk dataset.", "", ] tasks = dataset.tasks assert tasks[0].metadata["name"] == "fake_name" # 2. Change edge feature. edge_feats = np.random.rand(num_edges, num_classes) edge_feat_path = os.path.join("data", "edge-feat.npy") np.save(os.path.join(test_dir, edge_feat_path), edge_feats) dataset = gb.OnDiskDataset( test_dir, include_original_edge_id=False ).load() captured = capsys.readouterr().out.split("\n") assert captured == [ "The on-disk dataset is re-preprocessing, so the existing " + "preprocessed dataset has been removed.", "Start to preprocess the on-disk dataset.", "Finish preprocessing the on-disk dataset.", "", ] assert torch.equal( dataset.feature.read("edge", None, "feat"), torch.from_numpy(edge_feats), ) graph = dataset.graph assert gb.ORIGINAL_EDGE_ID not in graph.edge_attributes # 3. Change include_original_edge_id. dataset = gb.OnDiskDataset( test_dir, include_original_edge_id=True ).load() captured = capsys.readouterr().out.split("\n") assert captured == [ "The on-disk dataset is re-preprocessing, so the existing " + "preprocessed dataset has been removed.", "Start to preprocess the on-disk dataset.", "Finish preprocessing the on-disk dataset.", "", ] graph = dataset.graph assert gb.ORIGINAL_EDGE_ID in graph.edge_attributes # 4. Change Nothing. dataset = gb.OnDiskDataset( test_dir, include_original_edge_id=True ).load() captured = capsys.readouterr().out.split("\n") assert captured == ["The dataset is already preprocessed.", ""] graph = None tasks = None dataset = None def test_OnDiskTask_repr_homogeneous(): item_set = gb.ItemSet( (torch.arange(0, 5), torch.arange(5, 10)), names=("seeds", "labels"), ) metadata = {"name": "node_classification"} task = gb.OnDiskTask(metadata, item_set, item_set, item_set) expected_str = ( "OnDiskTask(validation_set=ItemSet(\n" " items=(tensor([0, 1, 2, 3, 4]), tensor([5, 6, 7, 8, 9])),\n" " names=('seeds', 'labels'),\n" " ),\n" " train_set=ItemSet(\n" " items=(tensor([0, 1, 2, 3, 4]), tensor([5, 6, 7, 8, 9])),\n" " names=('seeds', 'labels'),\n" " ),\n" " test_set=ItemSet(\n" " items=(tensor([0, 1, 2, 3, 4]), tensor([5, 6, 7, 8, 9])),\n" " names=('seeds', 'labels'),\n" " ),\n" " metadata={'name': 'node_classification'},)" ) assert repr(task) == expected_str, task def test_OnDiskDataset_not_include_eids(): with tempfile.TemporaryDirectory() as test_dir: # All metadata fields are specified. dataset_name = "graphbolt_test" num_nodes = 4000 num_edges = 20000 num_classes = 10 # Generate random graph. yaml_content = gbt.random_homo_graphbolt_graph( test_dir, dataset_name, num_nodes, num_edges, num_classes, ) yaml_file = os.path.join(test_dir, "metadata.yaml") with open(yaml_file, "w") as f: f.write(yaml_content) with pytest.warns( GBWarning, match="Edge feature is stored, but edge IDs are not saved.", ): gb.OnDiskDataset(test_dir, include_original_edge_id=False) def test_OnDiskTask_repr_heterogeneous(): item_set = gb.HeteroItemSet( { "user": gb.ItemSet(torch.arange(0, 5), names="seeds"), "item": gb.ItemSet(torch.arange(5, 10), names="seeds"), } ) metadata = {"name": "node_classification"} task = gb.OnDiskTask(metadata, item_set, item_set, item_set) expected_str = ( "OnDiskTask(validation_set=HeteroItemSet(\n" " itemsets={'user': ItemSet(\n" " items=(tensor([0, 1, 2, 3, 4]),),\n" " names=('seeds',),\n" " ), 'item': ItemSet(\n" " items=(tensor([5, 6, 7, 8, 9]),),\n" " names=('seeds',),\n" " )},\n" " names=('seeds',),\n" " ),\n" " train_set=HeteroItemSet(\n" " itemsets={'user': ItemSet(\n" " items=(tensor([0, 1, 2, 3, 4]),),\n" " names=('seeds',),\n" " ), 'item': ItemSet(\n" " items=(tensor([5, 6, 7, 8, 9]),),\n" " names=('seeds',),\n" " )},\n" " names=('seeds',),\n" " ),\n" " test_set=HeteroItemSet(\n" " itemsets={'user': ItemSet(\n" " items=(tensor([0, 1, 2, 3, 4]),),\n" " names=('seeds',),\n" " ), 'item': ItemSet(\n" " items=(tensor([5, 6, 7, 8, 9]),),\n" " names=('seeds',),\n" " )},\n" " names=('seeds',),\n" " ),\n" " metadata={'name': 'node_classification'},)" ) assert repr(task) == expected_str, task def test_OnDiskDataset_load_tasks_selectively(): """Test preprocess of OnDiskDataset.""" with tempfile.TemporaryDirectory() as test_dir: # All metadata fields are specified. dataset_name = "graphbolt_test" num_nodes = 4000 num_edges = 20000 num_classes = 10 # Generate random graph. yaml_content = gbt.random_homo_graphbolt_graph( test_dir, dataset_name, num_nodes, num_edges, num_classes, ) train_path = os.path.join("set", "train.npy") yaml_content += f""" - name: node_classification num_classes: {num_classes} train_set: - type: null data: - format: numpy path: {train_path} """ yaml_file = os.path.join(test_dir, "metadata.yaml") with open(yaml_file, "w") as f: f.write(yaml_content) # Case1. Test load all tasks. dataset = gb.OnDiskDataset(test_dir).load() assert len(dataset.tasks) == 2 # Case2. Test load tasks selectively. dataset = gb.OnDiskDataset(test_dir).load(tasks="link_prediction") assert len(dataset.tasks) == 1 assert dataset.tasks[0].metadata["name"] == "link_prediction" dataset = gb.OnDiskDataset(test_dir).load(tasks=["link_prediction"]) assert len(dataset.tasks) == 1 assert dataset.tasks[0].metadata["name"] == "link_prediction" # Case3. Test load tasks with non-existent task name. with pytest.warns( GBWarning, match="Below tasks are not found in YAML: {'fake-name'}. Skipped.", ): dataset = gb.OnDiskDataset(test_dir).load(tasks=["fake-name"]) assert len(dataset.tasks) == 0 # Case4. Test load tasks selectively with incorrect task type. with pytest.raises(TypeError): dataset = gb.OnDiskDataset(test_dir).load(tasks=2) dataset = None def test_OnDiskDataset_preprocess_graph_with_single_type(): """Test for graph with single node/edge type.""" with tempfile.TemporaryDirectory() as test_dir: # All metadata fields are specified. dataset_name = "graphbolt_test" num_nodes = 4000 num_edges = 20000 # Generate random edges. nodes = np.repeat(np.arange(num_nodes), 5) neighbors = np.random.randint(0, num_nodes, size=(num_edges)) edges = np.stack([nodes, neighbors], axis=1) # Write into edges/edge.csv os.makedirs(os.path.join(test_dir, "edges/"), exist_ok=True) edges = pd.DataFrame(edges, columns=["src", "dst"]) edges.to_csv( os.path.join(test_dir, "edges/edge.csv"), index=False, header=False, ) # Generate random graph edge-feats. edge_feats = np.random.rand(num_edges, 5) os.makedirs(os.path.join(test_dir, "data/"), exist_ok=True) np.save(os.path.join(test_dir, "data/edge-feat.npy"), edge_feats) # Generate random node-feats. node_feats = np.random.rand(num_nodes, 10) np.save(os.path.join(test_dir, "data/node-feat.npy"), node_feats) yaml_content = f""" dataset_name: {dataset_name} graph: # graph structure and required attributes. nodes: - num: {num_nodes} type: author edges: - type: author:collab:author format: csv path: edges/edge.csv feature_data: - domain: edge type: author:collab:author name: feat format: numpy path: data/edge-feat.npy - domain: node type: author name: feat format: numpy path: data/node-feat.npy """ yaml_file = os.path.join(test_dir, "metadata.yaml") with open(yaml_file, "w") as f: f.write(yaml_content) dataset = gb.OnDiskDataset(test_dir).load() assert dataset.dataset_name == dataset_name graph = dataset.graph assert isinstance(graph, gb.FusedCSCSamplingGraph) assert graph.total_num_nodes == num_nodes assert graph.total_num_edges == num_edges assert ( graph.node_attributes is not None and "feat" in graph.node_attributes ) assert ( graph.edge_attributes is not None and "feat" in graph.edge_attributes ) assert torch.equal(graph.node_type_offset, torch.tensor([0, num_nodes])) assert torch.equal( graph.type_per_edge, torch.zeros(num_edges), ) assert graph.edge_type_to_id == {"author:collab:author": 0} assert graph.node_type_to_id == {"author": 0} ================================================ FILE: tests/python/pytorch/graphbolt/impl/test_sampled_subgraph_impl.py ================================================ import unittest import backend as F import dgl import dgl.graphbolt as gb import pytest import torch from dgl.graphbolt.impl.sampled_subgraph_impl import SampledSubgraphImpl def _assert_container_equal(lhs, rhs): if isinstance(lhs, torch.Tensor): assert isinstance(rhs, torch.Tensor) assert torch.equal(lhs, rhs) elif isinstance(lhs, tuple): assert isinstance(rhs, tuple) assert len(lhs) == len(rhs) for l, r in zip(lhs, rhs): _assert_container_equal(l, r) elif isinstance(lhs, gb.CSCFormatBase): assert isinstance(rhs, gb.CSCFormatBase) assert len(lhs.indptr) == len(rhs.indptr) assert len(lhs.indices) == len(rhs.indices) _assert_container_equal(lhs.indptr, rhs.indptr) _assert_container_equal(lhs.indices, rhs.indices) elif isinstance(lhs, dict): assert isinstance(rhs, dict) assert len(lhs) == len(rhs) for key, value in lhs.items(): assert key in rhs _assert_container_equal(value, rhs[key]) @pytest.mark.parametrize("reverse_row", [True, False]) @pytest.mark.parametrize("reverse_column", [True, False]) def test_exclude_edges_homo_deduplicated(reverse_row, reverse_column): csc_formats = gb.CSCFormatBase( indptr=torch.tensor([0, 0, 1, 2, 2, 3]), indices=torch.tensor([0, 3, 2]) ) if reverse_row: original_row_node_ids = torch.tensor([10, 15, 11, 24, 9]) src_to_exclude = torch.tensor([11]) else: original_row_node_ids = None src_to_exclude = torch.tensor([2]) if reverse_column: original_column_node_ids = torch.tensor([10, 15, 11, 24, 9]) dst_to_exclude = torch.tensor([9]) else: original_column_node_ids = None dst_to_exclude = torch.tensor([4]) original_edge_ids = torch.Tensor([5, 9, 10]) subgraph = SampledSubgraphImpl( csc_formats, original_column_node_ids, original_row_node_ids, original_edge_ids, ) edges_to_exclude = torch.cat((src_to_exclude, dst_to_exclude)).view(2, -1).T result = subgraph.exclude_edges(edges_to_exclude) expected_csc_formats = gb.CSCFormatBase( indptr=torch.tensor([0, 0, 1, 2, 2, 2]), indices=torch.tensor([0, 3]) ) if reverse_row: expected_row_node_ids = torch.tensor([10, 15, 11, 24, 9]) else: expected_row_node_ids = None if reverse_column: expected_column_node_ids = torch.tensor([10, 15, 11, 24, 9]) else: expected_column_node_ids = None expected_edge_ids = torch.Tensor([5, 9]) _assert_container_equal(result.sampled_csc, expected_csc_formats) _assert_container_equal( result.original_column_node_ids, expected_column_node_ids ) _assert_container_equal(result.original_row_node_ids, expected_row_node_ids) _assert_container_equal(result.original_edge_ids, expected_edge_ids) @pytest.mark.parametrize("reverse_row", [True, False]) @pytest.mark.parametrize("reverse_column", [True, False]) def test_exclude_edges_homo_duplicated(reverse_row, reverse_column): csc_formats = gb.CSCFormatBase( indptr=torch.tensor([0, 0, 1, 3, 3, 5]), indices=torch.tensor([0, 3, 3, 2, 2]), ) if reverse_row: original_row_node_ids = torch.tensor([10, 15, 11, 24, 9]) src_to_exclude = torch.tensor([24]) else: original_row_node_ids = None src_to_exclude = torch.tensor([3]) if reverse_column: original_column_node_ids = torch.tensor([10, 15, 11, 24, 9]) dst_to_exclude = torch.tensor([11]) else: original_column_node_ids = None dst_to_exclude = torch.tensor([2]) original_edge_ids = torch.Tensor([5, 9, 9, 10, 10]) subgraph = SampledSubgraphImpl( csc_formats, original_column_node_ids, original_row_node_ids, original_edge_ids, ) edges_to_exclude = torch.cat((src_to_exclude, dst_to_exclude)).view(2, -1).T result = subgraph.exclude_edges(edges_to_exclude) expected_csc_formats = gb.CSCFormatBase( indptr=torch.tensor([0, 0, 1, 1, 1, 3]), indices=torch.tensor([0, 2, 2]) ) if reverse_row: expected_row_node_ids = torch.tensor([10, 15, 11, 24, 9]) else: expected_row_node_ids = None if reverse_column: expected_column_node_ids = torch.tensor([10, 15, 11, 24, 9]) else: expected_column_node_ids = None expected_edge_ids = torch.Tensor([5, 10, 10]) _assert_container_equal(result.sampled_csc, expected_csc_formats) _assert_container_equal( result.original_column_node_ids, expected_column_node_ids ) _assert_container_equal(result.original_row_node_ids, expected_row_node_ids) _assert_container_equal(result.original_edge_ids, expected_edge_ids) @pytest.mark.parametrize("reverse_row", [True, False]) @pytest.mark.parametrize("reverse_column", [True, False]) def test_exclude_edges_hetero_deduplicated(reverse_row, reverse_column): csc_formats = { "A:relation:B": gb.CSCFormatBase( indptr=torch.tensor([0, 1, 2, 3]), indices=torch.tensor([2, 1, 0]), ) } if reverse_row: original_row_node_ids = { "A": torch.tensor([13, 14, 15]), } src_to_exclude = torch.tensor([15, 13]) else: original_row_node_ids = None src_to_exclude = torch.tensor([2, 0]) if reverse_column: original_column_node_ids = { "B": torch.tensor([10, 11, 12]), } dst_to_exclude = torch.tensor([10, 12]) else: original_column_node_ids = None dst_to_exclude = torch.tensor([0, 2]) original_edge_ids = {"A:relation:B": torch.tensor([19, 20, 21])} subgraph = SampledSubgraphImpl( sampled_csc=csc_formats, original_column_node_ids=original_column_node_ids, original_row_node_ids=original_row_node_ids, original_edge_ids=original_edge_ids, ) edges_to_exclude = { "A:relation:B": torch.cat( ( src_to_exclude, dst_to_exclude, ) ) .view(2, -1) .T } result = subgraph.exclude_edges(edges_to_exclude) expected_csc_formats = { "A:relation:B": gb.CSCFormatBase( indptr=torch.tensor([0, 0, 1, 1]), indices=torch.tensor([1]), ) } if reverse_row: expected_row_node_ids = { "A": torch.tensor([13, 14, 15]), } else: expected_row_node_ids = None if reverse_column: expected_column_node_ids = { "B": torch.tensor([10, 11, 12]), } else: expected_column_node_ids = None expected_edge_ids = {"A:relation:B": torch.tensor([20])} _assert_container_equal(result.sampled_csc, expected_csc_formats) _assert_container_equal( result.original_column_node_ids, expected_column_node_ids ) _assert_container_equal(result.original_row_node_ids, expected_row_node_ids) _assert_container_equal(result.original_edge_ids, expected_edge_ids) @pytest.mark.parametrize("reverse_row", [True, False]) @pytest.mark.parametrize("reverse_column", [True, False]) def test_exclude_edges_hetero_duplicated(reverse_row, reverse_column): csc_formats = { "A:relation:B": gb.CSCFormatBase( indptr=torch.tensor([0, 2, 4, 5]), indices=torch.tensor([2, 2, 1, 1, 0]), ) } if reverse_row: original_row_node_ids = { "A": torch.tensor([13, 14, 15]), } src_to_exclude = torch.tensor([15, 13]) else: original_row_node_ids = None src_to_exclude = torch.tensor([2, 0]) if reverse_column: original_column_node_ids = { "B": torch.tensor([10, 11, 12]), } dst_to_exclude = torch.tensor([10, 12]) else: original_column_node_ids = None dst_to_exclude = torch.tensor([0, 2]) original_edge_ids = {"A:relation:B": torch.tensor([19, 19, 20, 20, 21])} subgraph = SampledSubgraphImpl( sampled_csc=csc_formats, original_column_node_ids=original_column_node_ids, original_row_node_ids=original_row_node_ids, original_edge_ids=original_edge_ids, ) edges_to_exclude = { "A:relation:B": torch.cat( ( src_to_exclude, dst_to_exclude, ) ) .view(2, -1) .T } result = subgraph.exclude_edges(edges_to_exclude) expected_csc_formats = { "A:relation:B": gb.CSCFormatBase( indptr=torch.tensor([0, 0, 2, 2]), indices=torch.tensor([1, 1]), ) } if reverse_row: expected_row_node_ids = { "A": torch.tensor([13, 14, 15]), } else: expected_row_node_ids = None if reverse_column: expected_column_node_ids = { "B": torch.tensor([10, 11, 12]), } else: expected_column_node_ids = None expected_edge_ids = {"A:relation:B": torch.tensor([20, 20])} _assert_container_equal(result.sampled_csc, expected_csc_formats) _assert_container_equal( result.original_column_node_ids, expected_column_node_ids ) _assert_container_equal(result.original_row_node_ids, expected_row_node_ids) _assert_container_equal(result.original_edge_ids, expected_edge_ids) @pytest.mark.parametrize("reverse_row", [True, False]) @pytest.mark.parametrize("reverse_column", [True, False]) def test_exclude_edges_homo_deduplicated_tensor(reverse_row, reverse_column): csc_formats = gb.CSCFormatBase( indptr=torch.tensor([0, 0, 1, 2, 2, 3]), indices=torch.tensor([0, 3, 2]) ) if reverse_row: original_row_node_ids = torch.tensor([10, 15, 11, 24, 9]) src_to_exclude = torch.tensor([11]) else: original_row_node_ids = None src_to_exclude = torch.tensor([2]) if reverse_column: original_column_node_ids = torch.tensor([10, 15, 11, 24, 9]) dst_to_exclude = torch.tensor([9]) else: original_column_node_ids = None dst_to_exclude = torch.tensor([4]) original_edge_ids = torch.Tensor([5, 9, 10]) subgraph = SampledSubgraphImpl( csc_formats, original_column_node_ids, original_row_node_ids, original_edge_ids, ) edges_to_exclude = torch.cat((src_to_exclude, dst_to_exclude)).view(1, -1) result = subgraph.exclude_edges(edges_to_exclude) expected_csc_formats = gb.CSCFormatBase( indptr=torch.tensor([0, 0, 1, 2, 2, 2]), indices=torch.tensor([0, 3]) ) if reverse_row: expected_row_node_ids = torch.tensor([10, 15, 11, 24, 9]) else: expected_row_node_ids = None if reverse_column: expected_column_node_ids = torch.tensor([10, 15, 11, 24, 9]) else: expected_column_node_ids = None expected_edge_ids = torch.Tensor([5, 9]) _assert_container_equal(result.sampled_csc, expected_csc_formats) _assert_container_equal( result.original_column_node_ids, expected_column_node_ids ) _assert_container_equal(result.original_row_node_ids, expected_row_node_ids) _assert_container_equal(result.original_edge_ids, expected_edge_ids) @pytest.mark.parametrize("reverse_row", [True, False]) @pytest.mark.parametrize("reverse_column", [True, False]) def test_exclude_edges_homo_duplicated_tensor(reverse_row, reverse_column): csc_formats = gb.CSCFormatBase( indptr=torch.tensor([0, 0, 1, 3, 3, 5]), indices=torch.tensor([0, 3, 3, 2, 2]), ) if reverse_row: original_row_node_ids = torch.tensor([10, 15, 11, 24, 9]) src_to_exclude = torch.tensor([24]) else: original_row_node_ids = None src_to_exclude = torch.tensor([3]) if reverse_column: original_column_node_ids = torch.tensor([10, 15, 11, 24, 9]) dst_to_exclude = torch.tensor([11]) else: original_column_node_ids = None dst_to_exclude = torch.tensor([2]) original_edge_ids = torch.Tensor([5, 9, 9, 10, 10]) subgraph = SampledSubgraphImpl( csc_formats, original_column_node_ids, original_row_node_ids, original_edge_ids, ) edges_to_exclude = torch.cat((src_to_exclude, dst_to_exclude)).view(1, -1) result = subgraph.exclude_edges(edges_to_exclude) expected_csc_formats = gb.CSCFormatBase( indptr=torch.tensor([0, 0, 1, 1, 1, 3]), indices=torch.tensor([0, 2, 2]) ) if reverse_row: expected_row_node_ids = torch.tensor([10, 15, 11, 24, 9]) else: expected_row_node_ids = None if reverse_column: expected_column_node_ids = torch.tensor([10, 15, 11, 24, 9]) else: expected_column_node_ids = None expected_edge_ids = torch.Tensor([5, 10, 10]) _assert_container_equal(result.sampled_csc, expected_csc_formats) _assert_container_equal( result.original_column_node_ids, expected_column_node_ids ) _assert_container_equal(result.original_row_node_ids, expected_row_node_ids) _assert_container_equal(result.original_edge_ids, expected_edge_ids) @pytest.mark.parametrize("reverse_row", [True, False]) @pytest.mark.parametrize("reverse_column", [True, False]) def test_exclude_edges_hetero_deduplicated_tensor(reverse_row, reverse_column): csc_formats = { "A:relation:B": gb.CSCFormatBase( indptr=torch.tensor([0, 1, 2, 3]), indices=torch.tensor([2, 1, 0]), ) } if reverse_row: original_row_node_ids = { "A": torch.tensor([13, 14, 15]), } src_to_exclude = torch.tensor([15, 13]) else: original_row_node_ids = None src_to_exclude = torch.tensor([2, 0]) if reverse_column: original_column_node_ids = { "B": torch.tensor([10, 11, 12]), } dst_to_exclude = torch.tensor([10, 12]) else: original_column_node_ids = None dst_to_exclude = torch.tensor([0, 2]) original_edge_ids = {"A:relation:B": torch.tensor([19, 20, 21])} subgraph = SampledSubgraphImpl( sampled_csc=csc_formats, original_column_node_ids=original_column_node_ids, original_row_node_ids=original_row_node_ids, original_edge_ids=original_edge_ids, ) edges_to_exclude = { "A:relation:B": torch.cat((src_to_exclude, dst_to_exclude)) .view(2, -1) .T } result = subgraph.exclude_edges(edges_to_exclude) expected_csc_formats = { "A:relation:B": gb.CSCFormatBase( indptr=torch.tensor([0, 0, 1, 1]), indices=torch.tensor([1]), ) } if reverse_row: expected_row_node_ids = { "A": torch.tensor([13, 14, 15]), } else: expected_row_node_ids = None if reverse_column: expected_column_node_ids = { "B": torch.tensor([10, 11, 12]), } else: expected_column_node_ids = None expected_edge_ids = {"A:relation:B": torch.tensor([20])} _assert_container_equal(result.sampled_csc, expected_csc_formats) _assert_container_equal( result.original_column_node_ids, expected_column_node_ids ) _assert_container_equal(result.original_row_node_ids, expected_row_node_ids) _assert_container_equal(result.original_edge_ids, expected_edge_ids) @pytest.mark.parametrize("reverse_row", [True, False]) @pytest.mark.parametrize("reverse_column", [True, False]) def test_exclude_edges_hetero_duplicated_tensor(reverse_row, reverse_column): csc_formats = { "A:relation:B": gb.CSCFormatBase( indptr=torch.tensor([0, 2, 4, 5]), indices=torch.tensor([2, 2, 1, 1, 0]), ) } if reverse_row: original_row_node_ids = { "A": torch.tensor([13, 14, 15]), } src_to_exclude = torch.tensor([15, 13]) else: original_row_node_ids = None src_to_exclude = torch.tensor([2, 0]) if reverse_column: original_column_node_ids = { "B": torch.tensor([10, 11, 12]), } dst_to_exclude = torch.tensor([10, 12]) else: original_column_node_ids = None dst_to_exclude = torch.tensor([0, 2]) original_edge_ids = {"A:relation:B": torch.tensor([19, 19, 20, 20, 21])} subgraph = SampledSubgraphImpl( sampled_csc=csc_formats, original_column_node_ids=original_column_node_ids, original_row_node_ids=original_row_node_ids, original_edge_ids=original_edge_ids, ) edges_to_exclude = { "A:relation:B": torch.cat((src_to_exclude, dst_to_exclude)) .view(2, -1) .T } result = subgraph.exclude_edges(edges_to_exclude) expected_csc_formats = { "A:relation:B": gb.CSCFormatBase( indptr=torch.tensor([0, 0, 2, 2]), indices=torch.tensor([1, 1]), ) } if reverse_row: expected_row_node_ids = { "A": torch.tensor([13, 14, 15]), } else: expected_row_node_ids = None if reverse_column: expected_column_node_ids = { "B": torch.tensor([10, 11, 12]), } else: expected_column_node_ids = None expected_edge_ids = {"A:relation:B": torch.tensor([20, 20])} _assert_container_equal(result.sampled_csc, expected_csc_formats) _assert_container_equal( result.original_column_node_ids, expected_column_node_ids ) _assert_container_equal(result.original_row_node_ids, expected_row_node_ids) _assert_container_equal(result.original_edge_ids, expected_edge_ids) def test_to_pyg_homo(): graph = dgl.graph(([5, 0, 7, 7, 2, 4], [0, 1, 2, 2, 3, 4])) graph = gb.from_dglgraph(graph, is_homogeneous=True).to(F.ctx()) items = torch.LongTensor([[0, 3], [4, 4]]) names = "seeds" itemset = gb.ItemSet(items, names=names) datapipe = gb.ItemSampler(itemset, batch_size=4).copy_to(F.ctx()) num_layer = 2 fanouts = [torch.LongTensor([-1]) for _ in range(num_layer)] sampler = gb.NeighborSampler datapipe = sampler( datapipe, graph, fanouts, deduplicate=True, ) for minibatch in datapipe: x = torch.randn((minibatch.node_ids().size(0), 2), dtype=torch.float32) for subgraph in minibatch.sampled_subgraphs: (x_src, x_dst), edge_index, sizes = subgraph.to_pyg(x) assert torch.equal(x_src, x) dst_size = subgraph.original_column_node_ids.size(0) assert torch.equal(x_dst, x[:dst_size]) src_size = subgraph.original_row_node_ids.size(0) assert dst_size == sizes[1] assert src_size == sizes[0] assert torch.equal(edge_index[0], subgraph.sampled_csc.indices) assert torch.equal( edge_index[1], gb.expand_indptr( subgraph.sampled_csc.indptr, subgraph.sampled_csc.indices.dtype, ), ) x = x_dst def test_to_pyg_hetero(): # COO graph: # [0, 0, 1, 1, 2, 2, 3, 3, 4, 4] # [2, 4, 2, 3, 0, 1, 1, 0, 0, 1] # [1, 1, 1, 1, 0, 0, 0, 0, 0] - > edge type. # num_nodes = 5, num_n1 = 2, num_n2 = 3 ntypes = {"n1": 0, "n2": 1} etypes = {"n1:e1:n2": 0, "n2:e2:n1": 1} indptr = torch.LongTensor([0, 2, 4, 6, 8, 10]) indices = torch.LongTensor([2, 4, 2, 3, 0, 1, 1, 0, 0, 1]) type_per_edge = torch.LongTensor([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) node_type_offset = torch.LongTensor([0, 2, 5]) graph = gb.fused_csc_sampling_graph( indptr, indices, node_type_offset=node_type_offset, type_per_edge=type_per_edge, node_type_to_id=ntypes, edge_type_to_id=etypes, ).to(F.ctx()) itemset = gb.HeteroItemSet( {"n1:e1:n2": gb.ItemSet(torch.tensor([[0, 1]]), names="seeds")} ) item_sampler = gb.ItemSampler(itemset, batch_size=2).copy_to(F.ctx()) num_layer = 2 fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] Sampler = gb.NeighborSampler datapipe = Sampler( item_sampler, graph, fanouts, deduplicate=True, ) for minibatch in datapipe: x = {} for key, ids in minibatch.node_ids().items(): x[key] = torch.randn((ids.size(0), 2), dtype=torch.float32) for subgraph in minibatch.sampled_subgraphs: (x_src, x_dst), edge_index, sizes = subgraph.to_pyg(x) assert x_src == x for ntype in x: dst_size = subgraph.original_column_node_ids[ntype].size(0) assert torch.equal(x_dst[ntype], x[ntype][:dst_size]) for etype in subgraph.sampled_csc: src_ntype, _, dst_ntype = gb.etype_str_to_tuple(etype) src_size = subgraph.original_row_node_ids[src_ntype].size(0) dst_size = subgraph.original_column_node_ids[dst_ntype].size(0) assert dst_size == sizes[etype][1] assert src_size == sizes[etype][0] assert torch.equal( edge_index[etype][0], subgraph.sampled_csc[etype].indices ) assert torch.equal( edge_index[etype][1], gb.expand_indptr( subgraph.sampled_csc[etype].indptr, subgraph.sampled_csc[etype].indices.dtype, ), ) x = x_dst @unittest.skipIf( F._default_context_str == "cpu", reason="`to` function needs GPU to test.", ) def test_sampled_subgraph_to_device(): # Initialize data. csc_format = { "A:relation:B": gb.CSCFormatBase( indptr=torch.tensor([0, 1, 2, 3]), indices=torch.tensor([0, 1, 2]), ) } original_row_node_ids = { "A": torch.tensor([13, 14, 15]), } src_to_exclude = torch.tensor([15, 13]) original_column_node_ids = { "B": torch.tensor([10, 11, 12]), } dst_to_exclude = torch.tensor([10, 12]) original_edge_ids = {"A:relation:B": torch.tensor([19, 20, 21])} subgraph = SampledSubgraphImpl( sampled_csc=csc_format, original_column_node_ids=original_column_node_ids, original_row_node_ids=original_row_node_ids, original_edge_ids=original_edge_ids, ) edges_to_exclude = { "A:relation:B": torch.cat( ( src_to_exclude, dst_to_exclude, ) ) .view(2, -1) .T } graph = subgraph.exclude_edges(edges_to_exclude) # Copy to device. graph = graph.to("cuda") # Check. for key in graph.sampled_csc: assert graph.sampled_csc[key].indices.device.type == "cuda" assert graph.sampled_csc[key].indptr.device.type == "cuda" for key in graph.original_column_node_ids: assert graph.original_column_node_ids[key].device.type == "cuda" for key in graph.original_row_node_ids: assert graph.original_row_node_ids[key].device.type == "cuda" for key in graph.original_edge_ids: assert graph.original_edge_ids[key].device.type == "cuda" def test_sampled_subgraph_impl_representation_homo(): sampled_subgraph_impl = SampledSubgraphImpl( sampled_csc=gb.CSCFormatBase( indptr=torch.arange(0, 101, 10), indices=torch.arange(10, 110), ), original_column_node_ids=torch.arange(0, 10), original_row_node_ids=torch.arange(0, 110), original_edge_ids=None, ) expected_result = str( """SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]), indices=tensor([ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109]), ), original_row_node_ids=tensor([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109]), original_edge_ids=None, original_column_node_ids=tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), )""" ) assert str(sampled_subgraph_impl) == expected_result, print( sampled_subgraph_impl ) def test_sampled_subgraph_impl_representation_hetero(): sampled_subgraph_impl = SampledSubgraphImpl( sampled_csc={ "n1:e1:n2": gb.CSCFormatBase( indptr=torch.tensor([0, 2, 4]), indices=torch.tensor([4, 5, 6, 7]), ), "n2:e2:n1": gb.CSCFormatBase( indptr=torch.tensor([0, 2, 4, 6, 8]), indices=torch.tensor([2, 3, 4, 5, 6, 7, 8, 9]), ), }, original_column_node_ids={ "n1": torch.tensor([1, 0, 0, 1]), "n2": torch.tensor([1, 2]), }, original_row_node_ids={ "n1": torch.tensor([1, 0, 0, 1, 1, 0, 0, 1]), "n2": torch.tensor([1, 2, 0, 1, 0, 2, 0, 2, 0, 1]), }, original_edge_ids=None, ) expected_result = str( """SampledSubgraphImpl(sampled_csc={'n1:e1:n2': CSCFormatBase(indptr=tensor([0, 2, 4]), indices=tensor([4, 5, 6, 7]), ), 'n2:e2:n1': CSCFormatBase(indptr=tensor([0, 2, 4, 6, 8]), indices=tensor([2, 3, 4, 5, 6, 7, 8, 9]), )}, original_row_node_ids={'n1': tensor([1, 0, 0, 1, 1, 0, 0, 1]), 'n2': tensor([1, 2, 0, 1, 0, 2, 0, 2, 0, 1])}, original_edge_ids=None, original_column_node_ids={'n1': tensor([1, 0, 0, 1]), 'n2': tensor([1, 2])}, )""" ) assert str(sampled_subgraph_impl) == expected_result, print( sampled_subgraph_impl ) ================================================ FILE: tests/python/pytorch/graphbolt/impl/test_torch_based_feature_store.py ================================================ import os import tempfile import unittest import backend as F import numpy as np import pydantic import pytest import torch from dgl import graphbolt as gb def to_on_disk_tensor(test_dir, name, t): path = os.path.join(test_dir, name + ".npy") t = t.numpy() np.save(path, t) # The Pytorch tensor is a view of the numpy array on disk, which does not # consume memory. t = torch.as_tensor(np.load(path, mmap_mode="r+")) return t @pytest.mark.parametrize("in_memory", [True, False]) def test_torch_based_feature(in_memory): with tempfile.TemporaryDirectory() as test_dir: a = torch.tensor([[1, 2, 3], [4, 5, 6]]) b = torch.tensor([[[1, 2], [3, 4]], [[4, 5], [6, 7]]]) metadata = {"max_value": 3} if not in_memory: a = to_on_disk_tensor(test_dir, "a", a) b = to_on_disk_tensor(test_dir, "b", b) feature_a = gb.TorchBasedFeature(a, metadata=metadata) feature_b = gb.TorchBasedFeature(b) # Read the entire feature. assert torch.equal( feature_a.read(), torch.tensor([[1, 2, 3], [4, 5, 6]]) ) # Test read the feature with ids. assert torch.equal( feature_b.read(), torch.tensor([[[1, 2], [3, 4]], [[4, 5], [6, 7]]]) ) # Read the feature with ids. assert torch.equal( feature_a.read(torch.tensor([0])), torch.tensor([[1, 2, 3]]), ) assert torch.equal( feature_b.read(torch.tensor([1])), torch.tensor([[[4, 5], [6, 7]]]), ) # Update the feature with ids. feature_a.update(torch.tensor([[0, 1, 2]]), torch.tensor([0])) assert torch.equal( feature_a.read(), torch.tensor([[0, 1, 2], [4, 5, 6]]) ) feature_b.update(torch.tensor([[[1, 2], [3, 4]]]), torch.tensor([1])) assert torch.equal( feature_b.read(), torch.tensor([[[1, 2], [3, 4]], [[1, 2], [3, 4]]]) ) # Test update the feature. feature_a.update(torch.tensor([[5, 1, 3]])) assert torch.equal( feature_a.read(), torch.tensor([[5, 1, 3]]), ), print(feature_a.read()) feature_b.update( torch.tensor([[[1, 3], [5, 7]], [[2, 4], [6, 8]], [[2, 4], [6, 8]]]) ) assert torch.equal( feature_b.read(), torch.tensor( [[[1, 3], [5, 7]], [[2, 4], [6, 8]], [[2, 4], [6, 8]]] ), ) # Test get the size and count of the entire feature. assert feature_a.size() == torch.Size([3]) assert feature_b.size() == torch.Size([2, 2]) assert feature_a.count() == 1 assert feature_b.count() == 3 # Test get metadata of the feature. assert feature_a.metadata() == metadata assert feature_b.metadata() == {} with pytest.raises(IndexError): feature_a.read(torch.tensor([0, 1, 2, 3])) # For windows, the file is locked by the numpy.load. We need to delete # it before closing the temporary directory. a = b = None feature_a = feature_b = None # Test loaded tensors' contiguity from C/Fortran contiguous ndarray. contiguous_numpy = np.array([[1, 2, 3], [4, 5, 6]], order="C") non_contiguous_numpy = np.array([[1, 2, 3], [4, 5, 6]], order="F") assert contiguous_numpy.flags["C_CONTIGUOUS"] assert non_contiguous_numpy.flags["F_CONTIGUOUS"] np.save( os.path.join(test_dir, "contiguous_numpy.npy"), contiguous_numpy ) np.save( os.path.join(test_dir, "non_contiguous_numpy.npy"), non_contiguous_numpy, ) cur_mmap_mode = None if not in_memory: cur_mmap_mode = "r+" feature_a = gb.TorchBasedFeature( torch.from_numpy( np.load( os.path.join(test_dir, "contiguous_numpy.npy"), mmap_mode=cur_mmap_mode, ) ) ) feature_b = gb.TorchBasedFeature( torch.from_numpy( np.load( os.path.join(test_dir, "non_contiguous_numpy.npy"), mmap_mode=cur_mmap_mode, ) ) ) assert feature_a._tensor.is_contiguous() assert feature_b._tensor.is_contiguous() contiguous_numpy = non_contiguous_numpy = None feature_a = feature_b = None def is_feature_store_on_cuda(store): for feature in store._features.values(): assert feature._tensor.is_cuda def is_feature_store_on_cpu(store): for feature in store._features.values(): assert not feature._tensor.is_cuda @unittest.skipIf( F._default_context_str == "cpu", reason="Tests for pinned memory are only meaningful on GPU.", ) @pytest.mark.parametrize("device", ["pinned", "cuda"]) def test_feature_store_to_device(device): with tempfile.TemporaryDirectory() as test_dir: a = torch.tensor([[1, 2, 4], [2, 5, 3]]) b = torch.tensor([[[1, 2], [3, 4]], [[2, 5], [3, 4]]]) write_tensor_to_disk(test_dir, "a", a, fmt="torch") write_tensor_to_disk(test_dir, "b", b, fmt="numpy") feature_data = [ gb.OnDiskFeatureData( domain="node", type="paper", name="a", format="torch", path=os.path.join(test_dir, "a.pt"), ), gb.OnDiskFeatureData( domain="edge", type="paper:cites:paper", name="b", format="numpy", path=os.path.join(test_dir, "b.npy"), ), ] feature_store = gb.TorchBasedFeatureStore(feature_data) feature_store2 = feature_store.to(device) if device == "pinned": assert feature_store2.is_pinned() elif device == "cuda": is_feature_store_on_cuda(feature_store2) # The original variable should be untouched. is_feature_store_on_cpu(feature_store) @unittest.skipIf( F._default_context_str == "cpu", reason="Tests for pinned memory are only meaningful on GPU.", ) @pytest.mark.parametrize( "dtype", [ torch.float32, torch.float64, torch.int32, torch.int64, torch.int8, torch.float16, torch.complex128, ], ) @pytest.mark.parametrize("idtype", [torch.int32, torch.int64]) @pytest.mark.parametrize("shape", [(2, 1), (2, 3), (2, 2, 2), (137, 13, 3)]) @pytest.mark.parametrize("in_place", [False, True]) def test_torch_based_pinned_feature(dtype, idtype, shape, in_place): if dtype == torch.complex128: tensor = torch.complex( torch.randint(0, 13, shape, dtype=torch.float64), torch.randint(0, 13, shape, dtype=torch.float64), ) else: tensor = torch.randint(0, 13, shape, dtype=dtype) test_tensor = tensor.clone().detach() test_tensor_cuda = test_tensor.cuda() feature = gb.TorchBasedFeature(tensor) if in_place: if gb.is_wsl(): pytest.skip("In place pinning is not supported on WSL.") feature.pin_memory_() # Check if pinning is truly in-place. assert feature._tensor.data_ptr() == tensor.data_ptr() else: feature = feature.to("pinned") assert feature.is_pinned() # Test read entire pinned feature, the result should be on cuda. assert torch.equal(feature.read(), test_tensor_cuda) assert feature.read().is_cuda assert torch.equal( feature.read(torch.tensor([0], dtype=idtype).cuda()), test_tensor_cuda[[0]], ) # Test read pinned feature with idx on cuda, the result should be on cuda. assert feature.read(torch.tensor([0], dtype=idtype).cuda()).is_cuda # Test read pinned feature with idx on cpu, the result should be on cpu. assert torch.equal( feature.read(torch.tensor([0], dtype=idtype)), test_tensor[[0]] ) assert not feature.read(torch.tensor([0], dtype=idtype)).is_cuda def write_tensor_to_disk(dir, name, t, fmt="torch"): if fmt == "torch": torch.save(t, os.path.join(dir, name + ".pt")) elif fmt == "numpy": t = t.numpy() np.save(os.path.join(dir, name + ".npy"), t) else: raise ValueError(f"Unsupported format: {fmt}") @pytest.mark.parametrize("in_memory", [True, False]) def test_torch_based_feature_store(in_memory): with tempfile.TemporaryDirectory() as test_dir: a = torch.tensor([[1, 2, 4], [2, 5, 3]]) b = torch.tensor([[[1, 2], [3, 4]], [[2, 5], [3, 4]]]) write_tensor_to_disk(test_dir, "a", a, fmt="torch") write_tensor_to_disk(test_dir, "b", b, fmt="numpy") feature_data = [ gb.OnDiskFeatureData( domain="node", type="paper", name="a", format="torch", path=os.path.join(test_dir, "a.pt"), in_memory=True, ), gb.OnDiskFeatureData( domain="edge", type="paper:cites:paper", name="b", format="numpy", path=os.path.join(test_dir, "b.npy"), in_memory=in_memory, ), ] feature_store = gb.TorchBasedFeatureStore(feature_data) assert isinstance( feature_store[("node", "paper", "a")], gb.TorchBasedFeature ) assert isinstance( feature_store[("edge", "paper:cites:paper", "b")], gb.TorchBasedFeature if in_memory else gb.DiskBasedFeature, ) # Test read the entire feature. assert torch.equal( feature_store.read("node", "paper", "a"), torch.tensor([[1, 2, 4], [2, 5, 3]]), ) assert torch.equal( feature_store.read("edge", "paper:cites:paper", "b"), torch.tensor([[[1, 2], [3, 4]], [[2, 5], [3, 4]]]), ) # Test get the size of the entire feature. assert feature_store.size("node", "paper", "a") == torch.Size([3]) assert feature_store.size( "edge", "paper:cites:paper", "b" ) == torch.Size([2, 2]) # Test get the keys of the features. assert feature_store.keys() == [ ("node", "paper", "a"), ("edge", "paper:cites:paper", "b"), ] # For windows, the file is locked by the numpy.load. We need to delete # it before closing the temporary directory. a = b = None feature_store = None # ``domain`` should be enum. with pytest.raises(pydantic.ValidationError): _ = gb.OnDiskFeatureData( domain="invalid", type="paper", name="a", format="torch", path=os.path.join(test_dir, "a.pt"), in_memory=True, ) # ``type`` could be null. feature_data = [ gb.OnDiskFeatureData( domain="node", name="a", format="torch", path=os.path.join(test_dir, "a.pt"), in_memory=True, ), ] feature_store = gb.TorchBasedFeatureStore(feature_data) # Test read the entire feature. assert torch.equal( feature_store.read("node", None, "a"), torch.tensor([[1, 2, 4], [2, 5, 3]]), ) # Test get the size of the entire feature. assert feature_store.size("node", None, "a") == torch.Size([3]) feature_store = None @pytest.mark.parametrize("in_memory", [True, False]) def test_torch_based_feature_repr(in_memory): with tempfile.TemporaryDirectory() as test_dir: a = torch.tensor([[1, 2, 3], [4, 5, 6]]) b = torch.tensor([[[1, 2], [3, 4]], [[4, 5], [6, 7]]]) metadata = {"max_value": 3} if not in_memory: a = to_on_disk_tensor(test_dir, "a", a) b = to_on_disk_tensor(test_dir, "b", b) feature_a = gb.TorchBasedFeature(a, metadata=metadata) feature_b = gb.TorchBasedFeature(b) expected_str_feature_a = ( "TorchBasedFeature(\n" " feature=tensor([[1, 2, 3],\n" " [4, 5, 6]]),\n" " metadata={'max_value': 3},\n" ")" ) expected_str_feature_b = ( "TorchBasedFeature(\n" " feature=tensor([[[1, 2],\n" " [3, 4]],\n" "\n" " [[4, 5],\n" " [6, 7]]]),\n" " metadata={},\n" ")" ) assert repr(feature_a) == expected_str_feature_a, feature_a assert repr(feature_b) == expected_str_feature_b, feature_b a = b = metadata = None feature_a = feature_b = None expected_str_feature_a = expected_str_feature_b = None @pytest.mark.parametrize("in_memory", [True, False]) def test_torch_based_feature_store_repr(in_memory): with tempfile.TemporaryDirectory() as test_dir: a = torch.tensor([[1, 2, 4], [2, 5, 3]]) b = torch.tensor([[[1, 2], [3, 4]], [[2, 5], [3, 4]]]) write_tensor_to_disk(test_dir, "a", a, fmt="torch") write_tensor_to_disk(test_dir, "b", b, fmt="numpy") feature_data = [ gb.OnDiskFeatureData( domain="node", type="paper", name="a", format="torch", path=os.path.join(test_dir, "a.pt"), in_memory=True, ), gb.OnDiskFeatureData( domain="edge", type="paper:cites:paper", name="b", format="numpy", path=os.path.join(test_dir, "b.npy"), in_memory=in_memory, ), ] feature_store = gb.TorchBasedFeatureStore(feature_data) expected_feature_store_str = ( ( "TorchBasedFeatureStore(\n" " {(, 'paper', 'a'): TorchBasedFeature(\n" " feature=tensor([[1, 2, 4],\n" " [2, 5, 3]]),\n" " metadata={},\n" " ), (, 'paper:cites:paper', 'b'): TorchBasedFeature(\n" " feature=tensor([[[1, 2],\n" " [3, 4]],\n" "\n" " [[2, 5],\n" " [3, 4]]]),\n" " metadata={},\n" " )}\n" ")" ) if in_memory else ( "TorchBasedFeatureStore(\n" " {(, 'paper', 'a'): TorchBasedFeature(\n" " feature=tensor([[1, 2, 4],\n" " [2, 5, 3]]),\n" " metadata={},\n" " ), (, 'paper:cites:paper', 'b'): DiskBasedFeature(\n" " feature=tensor([[[1, 2],\n" " [3, 4]],\n" "\n" " [[2, 5],\n" " [3, 4]]]),\n" " metadata={},\n" " )}\n" ")" ) ) assert repr(feature_store) == expected_feature_store_str, feature_store a = b = feature_data = None feature_store = expected_feature_store_str = None ================================================ FILE: tests/python/pytorch/graphbolt/internal/test_sample_utils.py ================================================ import backend as F import dgl.graphbolt as gb import pytest import torch def test_unique_and_compact_hetero(): N1 = torch.tensor( [0, 5, 2, 7, 12, 7, 9, 5, 6, 2, 3, 4, 1, 0, 9], device=F.ctx() ) N2 = torch.tensor([0, 3, 3, 5, 2, 7, 2, 8, 4, 9, 2, 3], device=F.ctx()) N3 = torch.tensor([1, 2, 6, 6, 1, 8, 3, 6, 3, 2], device=F.ctx()) expected_unique = { "n1": torch.tensor([0, 5, 2, 7, 12, 9, 6, 3, 4, 1], device=F.ctx()), "n2": torch.tensor([0, 3, 5, 2, 7, 8, 4, 9], device=F.ctx()), "n3": torch.tensor([1, 2, 6, 8, 3], device=F.ctx()), } if N1.is_cuda and torch.cuda.get_device_capability()[0] < 7: expected_reverse_id = { k: v.sort()[1] for k, v in expected_unique.items() } expected_unique = {k: v.sort()[0] for k, v in expected_unique.items()} else: expected_reverse_id = { k: torch.arange(0, v.shape[0], device=F.ctx()) for k, v in expected_unique.items() } nodes_dict = { "n1": N1.split(5), "n2": N2.split(4), "n3": N3.split(2), } expected_nodes_dict = { "n1": [ torch.tensor([0, 1, 2, 3, 4], device=F.ctx()), torch.tensor([3, 5, 1, 6, 2], device=F.ctx()), torch.tensor([7, 8, 9, 0, 5], device=F.ctx()), ], "n2": [ torch.tensor([0, 1, 1, 2], device=F.ctx()), torch.tensor([3, 4, 3, 5], device=F.ctx()), torch.tensor([6, 7, 3, 1], device=F.ctx()), ], "n3": [ torch.tensor([0, 1], device=F.ctx()), torch.tensor([2, 2], device=F.ctx()), torch.tensor([0, 3], device=F.ctx()), torch.tensor([4, 2], device=F.ctx()), torch.tensor([4, 1], device=F.ctx()), ], } unique, compacted, _ = gb.unique_and_compact(nodes_dict) for ntype, nodes in unique.items(): expected_nodes = expected_unique[ntype] assert torch.equal(nodes, expected_nodes) for ntype, nodes in compacted.items(): expected_nodes = expected_nodes_dict[ntype] assert isinstance(nodes, list) for expected_node, node in zip(expected_nodes, nodes): node = expected_reverse_id[ntype][node] assert torch.equal(expected_node, node) def test_unique_and_compact_homo(): N = torch.tensor( [0, 5, 2, 7, 12, 7, 9, 5, 6, 2, 3, 4, 1, 0, 9], device=F.ctx() ) expected_unique_N = torch.tensor( [0, 5, 2, 7, 12, 9, 6, 3, 4, 1], device=F.ctx() ) if N.is_cuda and torch.cuda.get_device_capability()[0] < 7: expected_reverse_id_N = expected_unique_N.sort()[1] expected_unique_N = expected_unique_N.sort()[0] else: expected_reverse_id_N = torch.arange( 0, expected_unique_N.shape[0], device=F.ctx() ) nodes_list = N.split(5) expected_nodes_list = [ torch.tensor([0, 1, 2, 3, 4], device=F.ctx()), torch.tensor([3, 5, 1, 6, 2], device=F.ctx()), torch.tensor([7, 8, 9, 0, 5], device=F.ctx()), ] unique, compacted, _ = gb.unique_and_compact(nodes_list) assert torch.equal(unique, expected_unique_N) assert isinstance(compacted, list) for expected_node, node in zip(expected_nodes_list, compacted): node = expected_reverse_id_N[node] assert torch.equal(expected_node, node) def test_unique_and_compact_csc_formats_hetero(): dst_nodes = { "n2": torch.tensor([2, 4, 1, 3]), "n3": torch.tensor([1, 3, 2, 7]), } csc_formats = { "n1:e1:n2": gb.CSCFormatBase( indptr=torch.tensor([0, 3, 4, 7, 10]), indices=torch.tensor([1, 3, 4, 6, 2, 7, 9, 4, 2, 6]), ), "n1:e2:n3": gb.CSCFormatBase( indptr=torch.tensor([0, 1, 4, 7, 10]), indices=torch.tensor([5, 2, 6, 4, 7, 2, 8, 1, 3, 0]), ), "n2:e3:n3": gb.CSCFormatBase( indptr=torch.tensor([0, 2, 4, 6, 8]), indices=torch.tensor([2, 5, 4, 1, 4, 3, 6, 0]), ), } expected_unique_nodes = { "n1": torch.tensor([1, 3, 4, 6, 2, 7, 9, 5, 8, 0]), "n2": torch.tensor([2, 4, 1, 3, 5, 6, 0]), "n3": torch.tensor([1, 3, 2, 7]), } expected_csc_formats = { "n1:e1:n2": gb.CSCFormatBase( indptr=torch.tensor([0, 3, 4, 7, 10]), indices=torch.tensor([0, 1, 2, 3, 4, 5, 6, 2, 4, 3]), ), "n1:e2:n3": gb.CSCFormatBase( indptr=torch.tensor([0, 1, 4, 7, 10]), indices=torch.tensor([7, 4, 3, 2, 5, 4, 8, 0, 1, 9]), ), "n2:e3:n3": gb.CSCFormatBase( indptr=torch.tensor([0, 2, 4, 6, 8]), indices=torch.tensor([0, 4, 1, 2, 1, 3, 5, 6]), ), } unique_nodes, compacted_csc_formats, _ = gb.unique_and_compact_csc_formats( csc_formats, dst_nodes ) for ntype, nodes in unique_nodes.items(): expected_nodes = expected_unique_nodes[ntype] assert torch.equal(nodes, expected_nodes) for etype, pair in compacted_csc_formats.items(): indices = pair.indices indptr = pair.indptr expected_indices = expected_csc_formats[etype].indices expected_indptr = expected_csc_formats[etype].indptr assert torch.equal(indices, expected_indices) assert torch.equal(indptr, expected_indptr) def test_unique_and_compact_csc_formats_homo(): seeds = torch.tensor([1, 3, 5, 2, 6]) indptr = torch.tensor([0, 2, 4, 6, 7, 11]) indices = torch.tensor([2, 3, 1, 4, 5, 2, 5, 1, 4, 4, 6]) csc_formats = gb.CSCFormatBase(indptr=indptr, indices=indices) expected_unique_nodes = torch.tensor([1, 3, 5, 2, 6, 4]) expected_indptr = indptr expected_indices = torch.tensor([3, 1, 0, 5, 2, 3, 2, 0, 5, 5, 4]) unique_nodes, compacted_csc_formats, _ = gb.unique_and_compact_csc_formats( csc_formats, seeds ) indptr = compacted_csc_formats.indptr indices = compacted_csc_formats.indices assert torch.equal(indptr, expected_indptr) assert torch.equal(indices, expected_indices) assert torch.equal(unique_nodes, expected_unique_nodes) def test_unique_and_compact_incorrect_indptr(): seeds = torch.tensor([1, 3, 5, 2, 6, 7]) indptr = torch.tensor([0, 2, 4, 6, 7, 11]) indices = torch.tensor([2, 3, 1, 4, 5, 2, 5, 1, 4, 4, 6]) csc_formats = gb.CSCFormatBase(indptr=indptr, indices=indices) # The number of seeds is not corresponding to indptr. with pytest.raises(AssertionError): gb.unique_and_compact_csc_formats(csc_formats, seeds) def test_compact_csc_format_hetero(): dst_nodes = { "n2": torch.tensor([2, 4, 1, 3]), "n3": torch.tensor([1, 3, 2, 7]), } csc_formats = { "n1:e1:n2": gb.CSCFormatBase( indptr=torch.tensor([0, 3, 4, 7, 10]), indices=torch.tensor([1, 3, 4, 6, 2, 7, 9, 4, 2, 6]), ), "n1:e2:n3": gb.CSCFormatBase( indptr=torch.tensor([0, 1, 4, 7, 10]), indices=torch.tensor([5, 2, 6, 4, 7, 2, 8, 1, 3, 0]), ), "n2:e3:n3": gb.CSCFormatBase( indptr=torch.tensor([0, 2, 4, 6, 8]), indices=torch.tensor([2, 5, 4, 1, 4, 3, 6, 0]), ), } expected_original_row_ids = { "n1": torch.tensor( [1, 3, 4, 6, 2, 7, 9, 4, 2, 6, 5, 2, 6, 4, 7, 2, 8, 1, 3, 0] ), "n2": torch.tensor([2, 4, 1, 3, 2, 5, 4, 1, 4, 3, 6, 0]), "n3": torch.tensor([1, 3, 2, 7]), } expected_csc_formats = { "n1:e1:n2": gb.CSCFormatBase( indptr=torch.tensor([0, 3, 4, 7, 10]), indices=torch.arange(0, 10), ), "n1:e2:n3": gb.CSCFormatBase( indptr=torch.tensor([0, 1, 4, 7, 10]), indices=torch.arange(0, 10) + 10, ), "n2:e3:n3": gb.CSCFormatBase( indptr=torch.tensor([0, 2, 4, 6, 8]), indices=torch.arange(0, 8) + 4, ), } original_row_ids, compacted_csc_formats = gb.compact_csc_format( csc_formats, dst_nodes ) for ntype, nodes in original_row_ids.items(): expected_nodes = expected_original_row_ids[ntype] assert torch.equal(nodes, expected_nodes) for etype, csc_format in compacted_csc_formats.items(): indptr = csc_format.indptr indices = csc_format.indices expected_indptr = expected_csc_formats[etype].indptr expected_indices = expected_csc_formats[etype].indices assert torch.equal(indptr, expected_indptr) assert torch.equal(indices, expected_indices) def test_compact_csc_format_homo(): seeds = torch.tensor([1, 3, 5, 2, 6]) indptr = torch.tensor([0, 2, 4, 6, 7, 11]) indices = torch.tensor([2, 3, 1, 4, 5, 2, 5, 1, 4, 4, 6]) csc_formats = gb.CSCFormatBase(indptr=indptr, indices=indices) expected_original_row_ids = torch.tensor( [1, 3, 5, 2, 6, 2, 3, 1, 4, 5, 2, 5, 1, 4, 4, 6] ) expected_indptr = indptr expected_indices = torch.arange(0, len(indices)) + 5 original_row_ids, compacted_csc_formats = gb.compact_csc_format( csc_formats, seeds ) indptr = compacted_csc_formats.indptr indices = compacted_csc_formats.indices assert torch.equal(indptr, expected_indptr) assert torch.equal(indices, expected_indices) assert torch.equal(original_row_ids, expected_original_row_ids) def test_compact_incorrect_indptr(): seeds = torch.tensor([1, 3, 5, 2, 6, 7]) indptr = torch.tensor([0, 2, 4, 6, 7, 11]) indices = torch.tensor([2, 3, 1, 4, 5, 2, 5, 1, 4, 4, 6]) csc_formats = gb.CSCFormatBase(indptr=indptr, indices=indices) # The number of seeds is not corresponding to indptr. with pytest.raises(AssertionError): gb.compact_csc_format(csc_formats, seeds) ================================================ FILE: tests/python/pytorch/graphbolt/internal/test_utils.py ================================================ import json import os import re import tempfile from functools import partial import dgl.graphbolt as gb import dgl.graphbolt.internal as internal import numpy as np import pandas as pd import pytest import torch def test_read_torch_data(): with tempfile.TemporaryDirectory() as test_dir: save_tensor = torch.tensor([[1, 2, 4], [2, 5, 3]]) file_name = os.path.join(test_dir, "save_tensor.pt") torch.save(save_tensor, file_name) read_tensor = internal.utils._read_torch_data(file_name) assert torch.equal(save_tensor, read_tensor) save_tensor = read_tensor = None @pytest.mark.parametrize("in_memory", [True, False]) def test_read_numpy_data(in_memory): with tempfile.TemporaryDirectory() as test_dir: save_numpy = np.array([[1, 2, 4], [2, 5, 3]]) file_name = os.path.join(test_dir, "save_numpy.npy") np.save(file_name, save_numpy) read_tensor = internal.utils._read_numpy_data(file_name, in_memory) assert torch.equal(torch.from_numpy(save_numpy), read_tensor) save_numpy = read_tensor = None @pytest.mark.parametrize("fmt", ["torch", "numpy"]) def test_read_data(fmt): with tempfile.TemporaryDirectory() as test_dir: data = np.array([[1, 2, 4], [2, 5, 3]]) type_name = "pt" if fmt == "torch" else "npy" file_name = os.path.join(test_dir, f"save_data.{type_name}") if fmt == "numpy": np.save(file_name, data) elif fmt == "torch": torch.save(torch.from_numpy(data), file_name) read_tensor = internal.read_data(file_name, fmt) assert torch.equal(torch.from_numpy(data), read_tensor) @pytest.mark.parametrize( "data_fmt, save_fmt, contiguous", [ ("torch", "torch", True), ("torch", "torch", False), ("torch", "numpy", True), ("torch", "numpy", False), ("numpy", "torch", True), ("numpy", "torch", False), ("numpy", "numpy", True), ("numpy", "numpy", False), ], ) def test_save_data(data_fmt, save_fmt, contiguous): with tempfile.TemporaryDirectory() as test_dir: data = np.array([[1, 2, 4], [2, 5, 3]]) if not contiguous: data = np.asfortranarray(data) tensor_data = torch.from_numpy(data) type_name = "pt" if save_fmt == "torch" else "npy" save_file_name = os.path.join(test_dir, f"save_data.{type_name}") # Step1. Save the data. if data_fmt == "torch": internal.save_data(tensor_data, save_file_name, save_fmt) elif data_fmt == "numpy": internal.save_data(data, save_file_name, save_fmt) # Step2. Load the data. if save_fmt == "torch": loaded_data = torch.load(save_file_name, weights_only=False) assert loaded_data.is_contiguous() assert torch.equal(tensor_data, loaded_data) elif save_fmt == "numpy": loaded_data = np.load(save_file_name) # Checks if the loaded data is C-contiguous. assert loaded_data.flags["C_CONTIGUOUS"] assert np.array_equal(tensor_data.numpy(), loaded_data) data = tensor_data = loaded_data = None @pytest.mark.parametrize("fmt", ["torch", "numpy"]) def test_get_npy_dim(fmt): with tempfile.TemporaryDirectory() as test_dir: data = np.array([[1, 2, 4], [2, 5, 3]]) type_name = "pt" if fmt == "torch" else "npy" file_name = os.path.join(test_dir, f"save_data.{type_name}") if fmt == "numpy": np.save(file_name, data) assert internal.get_npy_dim(file_name) == 2 elif fmt == "torch": torch.save(torch.from_numpy(data), file_name) with pytest.raises(ValueError): internal.get_npy_dim(file_name) data = None @pytest.mark.parametrize("data_fmt", ["numpy", "torch"]) @pytest.mark.parametrize("save_fmt", ["numpy", "torch"]) @pytest.mark.parametrize("is_feature", [True, False]) def test_copy_or_convert_data(data_fmt, save_fmt, is_feature): with tempfile.TemporaryDirectory() as test_dir: data = np.arange(10) tensor_data = torch.from_numpy(data) in_type_name = "npy" if data_fmt == "numpy" else "pt" input_path = os.path.join(test_dir, f"data.{in_type_name}") out_type_name = "npy" if save_fmt == "numpy" else "pt" output_path = os.path.join(test_dir, f"out_data.{out_type_name}") if data_fmt == "numpy": np.save(input_path, data) else: torch.save(tensor_data, input_path) if save_fmt == "torch": with pytest.raises(AssertionError): internal.copy_or_convert_data( input_path, output_path, data_fmt, save_fmt, is_feature=is_feature, ) else: internal.copy_or_convert_data( input_path, output_path, data_fmt, save_fmt, is_feature=is_feature, ) if is_feature: data = data.reshape(-1, 1) tensor_data = tensor_data.reshape(-1, 1) if save_fmt == "numpy": out_data = np.load(output_path) assert (data == out_data).all() data = None tensor_data = None out_data = None @pytest.mark.parametrize("edge_fmt", ["csv", "numpy"]) def test_read_edges(edge_fmt): with tempfile.TemporaryDirectory() as test_dir: num_nodes = 40 num_edges = 200 nodes = np.repeat(np.arange(num_nodes), 5) neighbors = np.random.randint(0, num_nodes, size=(num_edges)) edges = np.stack([nodes, neighbors], axis=1) os.makedirs(os.path.join(test_dir, "edges"), exist_ok=True) if edge_fmt == "csv": # Wrtie into edges/edge.csv edges = pd.DataFrame(edges, columns=["src", "dst"]) edge_path = os.path.join("edges", "edge.csv") edges.to_csv( os.path.join(test_dir, edge_path), index=False, header=False, ) else: # Wrtie into edges/edge.npy edges = edges.T edge_path = os.path.join("edges", "edge.npy") np.save(os.path.join(test_dir, edge_path), edges) src, dst = internal.read_edges(test_dir, edge_fmt, edge_path) assert src.all() == nodes.all() assert dst.all() == neighbors.all() def test_read_edges_error(): # 1. Unsupported file format. with pytest.raises( AssertionError, match="`numpy` or `csv` is expected when reading edges but got `fake-type`.", ): internal.read_edges("test_dir", "fake-type", "edge_path") # 2. Unexpected shape of numpy array with tempfile.TemporaryDirectory() as test_dir: num_nodes = 40 num_edges = 200 nodes = np.repeat(np.arange(num_nodes), 5) neighbors = np.random.randint(0, num_nodes, size=(num_edges)) edges = np.stack([nodes, neighbors, nodes], axis=1) os.makedirs(os.path.join(test_dir, "edges"), exist_ok=True) # Wrtie into edges/edge.npy edges = edges.T edge_path = os.path.join("edges", "edge.npy") np.save(os.path.join(test_dir, edge_path), edges) with pytest.raises( AssertionError, match=re.escape( "The shape of edges should be (2, N), but got torch.Size([3, 200])." ), ): internal.read_edges(test_dir, "numpy", edge_path) def test_calculate_file_hash(): with tempfile.TemporaryDirectory() as test_dir: test_file_path = os.path.join(test_dir, "test.txt") with open(test_file_path, "w") as file: file.write("test content") hash_value = internal.calculate_file_hash( test_file_path, hash_algo="md5" ) expected_hash_value = "9473fdd0d880a43c21b7778d34872157" assert expected_hash_value == hash_value with pytest.raises( ValueError, match=re.escape( "Hash algorithm must be one of: ['md5', 'sha1', 'sha224', " + "'sha256', 'sha384', 'sha512'], but got `fake`." ), ): hash_value = internal.calculate_file_hash( test_file_path, hash_algo="fake" ) def test_calculate_dir_hash(): with tempfile.TemporaryDirectory() as test_dir: test_file_path_1 = os.path.join(test_dir, "test_1.txt") test_file_path_2 = os.path.join(test_dir, "test_2.txt") with open(test_file_path_1, "w") as file: file.write("test content") with open(test_file_path_2, "w") as file: file.write("test contents of directory") hash_value = internal.calculate_dir_hash(test_dir, hash_algo="md5") expected_hash_value = [ "56e708a2bdf92887d4a7f25cbc13c555", "9473fdd0d880a43c21b7778d34872157", ] assert len(hash_value) == 2 for val in hash_value.values(): assert val in expected_hash_value def test_check_dataset_change(): with tempfile.TemporaryDirectory() as test_dir: # Generate directory and record its hash value. test_file_path_1 = os.path.join(test_dir, "test_1.txt") test_file_path_2 = os.path.join(test_dir, "test_2.txt") with open(test_file_path_1, "w") as file: file.write("test content") with open(test_file_path_2, "w") as file: file.write("test contents of directory") hash_value = internal.calculate_dir_hash(test_dir, hash_algo="md5") hash_value_file = "dataset_hash_value.txt" hash_value_file_paht = os.path.join( test_dir, "preprocessed", hash_value_file ) os.makedirs(os.path.join(test_dir, "preprocessed"), exist_ok=True) with open(hash_value_file_paht, "w") as file: file.write(json.dumps(hash_value, indent=4)) # Modify the content of a file. with open(test_file_path_2, "w") as file: file.write("test contents of directory changed") assert internal.check_dataset_change(test_dir, "preprocessed") def test_numpy_save_aligned(): assert_equal = partial(torch.testing.assert_close, rtol=0, atol=0) a = torch.randn(1024, dtype=torch.float32) # 4096 bytes with tempfile.TemporaryDirectory() as test_dir: aligned_path = os.path.join(test_dir, "aligned.npy") gb.numpy_save_aligned(aligned_path, a.numpy()) nonaligned_path = os.path.join(test_dir, "nonaligned.npy") np.save(nonaligned_path, a.numpy()) assert_equal(np.load(aligned_path), np.load(nonaligned_path)) # The size of the file should be 4K (aligned header) + 4K (tensor). assert os.path.getsize(aligned_path) == 4096 * 2 ================================================ FILE: tests/python/pytorch/graphbolt/test_base.py ================================================ import os import re import unittest from collections.abc import Iterable, Mapping import backend as F import dgl.graphbolt as gb import pytest import torch from torch.torch_version import TorchVersion from . import gb_test_utils def test_pytorch_cuda_allocator_conf(): env = os.getenv("PYTORCH_CUDA_ALLOC_CONF") assert env is not None config_list = env.split(",") assert "expandable_segments:True" in config_list @unittest.skipIf(F._default_context_str != "gpu", "CopyTo needs GPU to test") @pytest.mark.parametrize("non_blocking", [False, True]) def test_CopyTo(non_blocking): item_sampler = gb.ItemSampler( gb.ItemSet(torch.arange(20), names="seeds"), 4 ) if non_blocking: item_sampler = item_sampler.transform(lambda x: x.pin_memory()) # Invoke CopyTo via class constructor. dp = gb.CopyTo(item_sampler, "cuda") for data in dp: assert data.seeds.device.type == "cuda" dp = gb.CopyTo(item_sampler, "cuda", non_blocking) for data in dp: assert data.seeds.device.type == "cuda" # Invoke CopyTo via functional form. dp = item_sampler.copy_to("cuda", non_blocking) for data in dp: assert data.seeds.device.type == "cuda" @pytest.mark.parametrize( "task", [ "node_classification", "node_inference", "link_prediction", "edge_classification", ], ) @unittest.skipIf(F._default_context_str == "cpu", "CopyTo needs GPU to test") def test_CopyToWithMiniBatches(task): N = 16 B = 2 if task == "node_classification": itemset = gb.ItemSet( (torch.arange(N), torch.arange(N)), names=("seeds", "labels") ) elif task == "node_inference": itemset = gb.ItemSet(torch.arange(N), names="seeds") elif task == "link_prediction": itemset = gb.ItemSet( ( torch.arange(2 * N).reshape(-1, 2), torch.arange(N), ), names=("seeds", "labels"), ) elif task == "edge_classification": itemset = gb.ItemSet( (torch.arange(2 * N).reshape(-1, 2), torch.arange(N)), names=("seeds", "labels"), ) graph = gb_test_utils.rand_csc_graph(100, 0.15, bidirection_edge=True) features = {} keys = [("node", None, "a"), ("node", None, "b")] features[keys[0]] = gb.TorchBasedFeature(torch.randn(200, 4)) features[keys[1]] = gb.TorchBasedFeature(torch.randn(200, 4)) feature_store = gb.BasicFeatureStore(features) datapipe = gb.ItemSampler(itemset, batch_size=B) datapipe = gb.NeighborSampler( datapipe, graph, fanouts=[torch.LongTensor([2]) for _ in range(2)], ) if task != "node_inference": datapipe = gb.FeatureFetcher( datapipe, feature_store, ["a"], ) copied_attrs = [ "labels", "compacted_seeds", "sampled_subgraphs", "indexes", "node_features", "edge_features", "blocks", "seeds", "input_nodes", ] def test_data_device(datapipe): for data in datapipe: for attr in dir(data): var = getattr(data, attr) if isinstance(var, Mapping): var = var[next(iter(var))] elif isinstance(var, Iterable): var = next(iter(var)) if ( not callable(var) and not attr.startswith("__") and hasattr(var, "device") and var is not None ): if attr in copied_attrs: assert var.device.type == "cuda", attr else: assert var.device.type == "cpu", attr # Invoke CopyTo via class constructor. test_data_device(gb.CopyTo(datapipe, "cuda")) # Invoke CopyTo via functional form. test_data_device(datapipe.copy_to("cuda")) def test_etype_tuple_to_str(): """Convert etype from tuple to string.""" # Test for expected input. c_etype = ("user", "like", "item") c_etype_str = gb.etype_tuple_to_str(c_etype) assert c_etype_str == "user:like:item" # Test for unexpected input: not a tuple. c_etype = "user:like:item" with pytest.raises( AssertionError, match=re.escape( "Passed-in canonical etype should be in format of (str, str, str). " "But got user:like:item." ), ): _ = gb.etype_tuple_to_str(c_etype) # Test for unexpected input: tuple with wrong length. c_etype = ("user", "like") with pytest.raises( AssertionError, match=re.escape( "Passed-in canonical etype should be in format of (str, str, str). " "But got ('user', 'like')." ), ): _ = gb.etype_tuple_to_str(c_etype) def test_etype_str_to_tuple(): """Convert etype from string to tuple.""" # Test for expected input. c_etype_str = "user:like:item" c_etype = gb.etype_str_to_tuple(c_etype_str) assert c_etype == ("user", "like", "item") # Test for unexpected input: string with wrong format. c_etype_str = "user:like" with pytest.raises( AssertionError, match=re.escape( "Passed-in canonical etype should be in format of 'str:str:str'. " "But got user:like." ), ): _ = gb.etype_str_to_tuple(c_etype_str) def test_seed_type_str_to_ntypes(): """Convert etype from string to tuple.""" # Test for node pairs. seed_type_str = "user:like:item" seed_size = 2 node_type = gb.seed_type_str_to_ntypes(seed_type_str, seed_size) assert node_type == ["user", "item"] # Test for node pairs. seed_type_str = "user:item:user" seed_size = 3 node_type = gb.seed_type_str_to_ntypes(seed_type_str, seed_size) assert node_type == ["user", "item", "user"] # Test for unexpected input: list. seed_type_str = ["user", "item"] with pytest.raises( AssertionError, match=re.escape( "Passed-in seed type should be string, but got " ), ): _ = gb.seed_type_str_to_ntypes(seed_type_str, 2) def test_isin(): elements = torch.tensor([2, 3, 5, 5, 20, 13, 11], device=F.ctx()) test_elements = torch.tensor([2, 5], device=F.ctx()) res = gb.isin(elements, test_elements) expected = torch.tensor( [True, False, True, True, False, False, False], device=F.ctx() ) assert torch.equal(res, expected) def test_isin_big_data(): elements = torch.randint(0, 10000, (10000000,), device=F.ctx()) test_elements = torch.randint(0, 10000, (500000,), device=F.ctx()) res = gb.isin(elements, test_elements) expected = torch.isin(elements, test_elements) assert torch.equal(res, expected) def test_isin_non_1D_dim(): elements = torch.tensor([[2, 3], [5, 5], [20, 13]], device=F.ctx()) test_elements = torch.tensor([2, 5], device=F.ctx()) with pytest.raises(Exception): gb.isin(elements, test_elements) elements = torch.tensor([2, 3, 5, 5, 20, 13], device=F.ctx()) test_elements = torch.tensor([[2, 5]], device=F.ctx()) with pytest.raises(Exception): gb.isin(elements, test_elements) @pytest.mark.parametrize( "dtype", [ torch.bool, torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64, torch.float16, torch.bfloat16, torch.float32, torch.float64, ], ) @pytest.mark.parametrize("idtype", [torch.int32, torch.int64]) @pytest.mark.parametrize("pinned", [False, True]) def test_index_select(dtype, idtype, pinned): if F._default_context_str != "gpu" and pinned: pytest.skip("Pinned tests are available only on GPU.") tensor = torch.tensor([[2, 3], [5, 5], [20, 13]], dtype=dtype) tensor = tensor.pin_memory() if pinned else tensor.to(F.ctx()) index = torch.tensor([0, 2], dtype=idtype, device=F.ctx()) gb_result = gb.index_select(tensor, index) torch_result = tensor.to(F.ctx())[index.long()] assert torch.equal(torch_result, gb_result) if pinned: gb_result = gb.index_select(tensor.cpu(), index.cpu().pin_memory()) assert torch.equal(torch_result.cpu(), gb_result) assert gb_result.is_pinned() # Test the internal async API future = torch.ops.graphbolt.index_select_async(tensor.cpu(), index.cpu()) assert torch.equal(torch_result.cpu(), future.wait()) @pytest.mark.parametrize( "dtype", [ torch.bool, torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64, torch.float16, torch.bfloat16, torch.float32, torch.float64, ], ) @pytest.mark.parametrize("idtype", [torch.int32, torch.int64]) def test_scatter_async(dtype, idtype): input = torch.tensor([[2, 3], [5, 5], [20, 13]], dtype=dtype) index = torch.ones([1], dtype=idtype) res = torch.ops.graphbolt.scatter_async(input, index, input[2:3]) assert torch.equal( torch.tensor([[2, 3], [20, 13], [20, 13]], dtype=dtype), res.wait() ) def torch_expand_indptr(indptr, dtype, nodes=None): if nodes is None: nodes = torch.arange(len(indptr) - 1, dtype=dtype, device=indptr.device) return nodes.to(dtype).repeat_interleave(indptr.diff()) @pytest.mark.parametrize("nodes", [None, True]) @pytest.mark.parametrize("dtype", [torch.int32, torch.int64]) def test_expand_indptr(nodes, dtype): if nodes: nodes = torch.tensor([1, 7, 3, 4, 5, 8], dtype=dtype, device=F.ctx()) indptr = torch.tensor([0, 2, 2, 7, 10, 12, 20], device=F.ctx()) torch_result = torch_expand_indptr(indptr, dtype, nodes) gb_result = gb.expand_indptr(indptr, dtype, nodes) assert torch.equal(torch_result, gb_result) gb_result = gb.expand_indptr(indptr, dtype, nodes, indptr[-1].item()) assert torch.equal(torch_result, gb_result) if TorchVersion(torch.__version__) >= TorchVersion("2.2.0a0"): import torch._dynamo as dynamo from torch.testing._internal.optests import opcheck # Tests torch.compile compatibility for output_size in [None, indptr[-1].item()]: kwargs = {"node_ids": nodes, "output_size": output_size} opcheck( torch.ops.graphbolt.expand_indptr, (indptr, dtype), kwargs, test_utils=[ "test_schema", "test_autograd_registration", "test_faketensor", "test_aot_dispatch_dynamic", ], raise_exception=True, ) explanation = dynamo.explain(gb.expand_indptr)( indptr, dtype, nodes, output_size ) expected_breaks = -1 if output_size is None else 0 assert explanation.graph_break_count == expected_breaks @unittest.skipIf( F._default_context_str != "gpu", "Only GPU implementation is available." ) @pytest.mark.parametrize("offset", [None, True]) @pytest.mark.parametrize("dtype", [torch.int32, torch.int64]) def test_indptr_edge_ids(offset, dtype): indptr = torch.tensor([0, 2, 2, 7, 10, 12], device=F.ctx()) if offset: offset = indptr[:-1] ref_result = torch.arange( 0, indptr[-1].item(), dtype=dtype, device=F.ctx() ) else: ref_result = torch.tensor( [0, 1, 0, 1, 2, 3, 4, 0, 1, 2, 0, 1], dtype=dtype, device=F.ctx() ) gb_result = gb.indptr_edge_ids(indptr, dtype, offset) assert torch.equal(ref_result, gb_result) gb_result = gb.indptr_edge_ids(indptr, dtype, offset, indptr[-1].item()) assert torch.equal(ref_result, gb_result) if TorchVersion(torch.__version__) >= TorchVersion("2.2.0a0"): import torch._dynamo as dynamo from torch.testing._internal.optests import opcheck # Tests torch.compile compatibility for output_size in [None, indptr[-1].item()]: kwargs = {"offset": offset, "output_size": output_size} opcheck( torch.ops.graphbolt.indptr_edge_ids, (indptr, dtype), kwargs, test_utils=[ "test_schema", "test_autograd_registration", "test_faketensor", "test_aot_dispatch_dynamic", ], raise_exception=True, ) explanation = dynamo.explain(gb.indptr_edge_ids)( indptr, dtype, offset, output_size ) expected_breaks = -1 if output_size is None else 0 assert explanation.graph_break_count == expected_breaks def test_csc_format_base_representation(): csc_format_base = gb.CSCFormatBase( indptr=torch.tensor([0, 2, 4]), indices=torch.tensor([4, 5, 6, 7]), ) expected_result = str( """CSCFormatBase(indptr=tensor([0, 2, 4]), indices=tensor([4, 5, 6, 7]), )""" ) assert str(csc_format_base) == expected_result, print(csc_format_base) def test_csc_format_base_incorrect_indptr(): indptr = torch.tensor([0, 2, 4, 6, 7, 11]) indices = torch.tensor([2, 3, 1, 4, 5, 2, 5, 1, 4, 4]) with pytest.raises(AssertionError): # The value of last element in indptr is not corresponding to indices. csc_formats = gb.CSCFormatBase(indptr=indptr, indices=indices) ================================================ FILE: tests/python/pytorch/graphbolt/test_dataloader.py ================================================ import os import unittest from sys import platform import backend as F import dgl import dgl.graphbolt import dgl.graphbolt as gb import pytest import torch import torch.distributed as thd from dgl.graphbolt.datapipes import find_dps, traverse_dps from . import gb_test_utils @pytest.mark.parametrize("overlap_feature_fetch", [False, True]) def test_DataLoader(overlap_feature_fetch): N = 40 B = 4 itemset = dgl.graphbolt.ItemSet(torch.arange(N), names="seeds") graph = gb_test_utils.rand_csc_graph(200, 0.15, bidirection_edge=True) features = {} keys = [("node", None, "a"), ("node", None, "b"), ("edge", None, "c")] features[keys[0]] = dgl.graphbolt.TorchBasedFeature(torch.randn(200, 4)) features[keys[1]] = dgl.graphbolt.TorchBasedFeature(torch.randn(200, 4)) M = graph.total_num_edges features[keys[2]] = dgl.graphbolt.TorchBasedFeature(torch.randn(M, 1)) feature_store = dgl.graphbolt.BasicFeatureStore(features) item_sampler = dgl.graphbolt.ItemSampler(itemset, batch_size=B) subgraph_sampler = dgl.graphbolt.NeighborSampler( item_sampler, graph, fanouts=[torch.LongTensor([2]) for _ in range(2)], ) feature_fetcher = dgl.graphbolt.FeatureFetcher( subgraph_sampler, feature_store, ["a", "b"], ["c"], overlap_fetch=overlap_feature_fetch, ) device_transferrer = dgl.graphbolt.CopyTo(feature_fetcher, F.ctx()) dataloader = dgl.graphbolt.DataLoader( device_transferrer, num_workers=4, ) for i, minibatch in enumerate(dataloader): assert "a" in minibatch.node_features assert "b" in minibatch.node_features for layer_id in range(minibatch.num_layers()): assert "c" in minibatch.edge_features[layer_id] assert i + 1 == N // B @unittest.skipIf( F._default_context_str != "gpu", reason="This test requires the GPU.", ) @pytest.mark.parametrize( "sampler_name", ["NeighborSampler", "LayerNeighborSampler"] ) @pytest.mark.parametrize("enable_feature_fetch", [True, False]) @pytest.mark.parametrize("overlap_feature_fetch", [True, False]) @pytest.mark.parametrize("overlap_graph_fetch", [True, False]) @pytest.mark.parametrize("cooperative", [True, False]) @pytest.mark.parametrize("asynchronous", [True, False]) @pytest.mark.parametrize("num_gpu_cached_edges", [0, 1024]) @pytest.mark.parametrize("gpu_cache_threshold", [1, 3]) def test_gpu_sampling_DataLoader( sampler_name, enable_feature_fetch, overlap_feature_fetch, overlap_graph_fetch, cooperative, asynchronous, num_gpu_cached_edges, gpu_cache_threshold, ): if cooperative and not thd.is_initialized(): # On Windows, the init method can only be file. init_method = ( f"file:///{os.path.join(os.getcwd(), 'dis_tempfile')}" if platform == "win32" else "tcp://127.0.0.1:12345" ) thd.init_process_group( init_method=init_method, world_size=1, rank=0, ) N = 40 B = 4 num_layers = 2 itemset = dgl.graphbolt.ItemSet(torch.arange(N), names="seeds") graph = gb_test_utils.rand_csc_graph(200, 0.15, bidirection_edge=True) graph = graph.pin_memory_() if overlap_graph_fetch else graph.to(F.ctx()) features = {} keys = [ ("node", None, "a"), ("node", None, "b"), ("node", None, "c"), ("edge", None, "d"), ] features[keys[0]] = dgl.graphbolt.TorchBasedFeature( torch.randn(200, 4, pin_memory=True) ) features[keys[1]] = dgl.graphbolt.TorchBasedFeature( torch.randn(200, 4, pin_memory=True) ) features[keys[2]] = dgl.graphbolt.TorchBasedFeature( torch.randn(200, 4, device=F.ctx()) ) features[keys[3]] = dgl.graphbolt.TorchBasedFeature( torch.randn(graph.total_num_edges, 1, device=F.ctx()) ) feature_store = dgl.graphbolt.BasicFeatureStore(features) dataloaders = [] for i in range(2): datapipe = dgl.graphbolt.ItemSampler(itemset, batch_size=B) datapipe = datapipe.copy_to(F.ctx()) kwargs = { "overlap_fetch": overlap_graph_fetch, "num_gpu_cached_edges": num_gpu_cached_edges, "gpu_cache_threshold": gpu_cache_threshold, "cooperative": cooperative, "asynchronous": asynchronous, } if i != 0: kwargs = {} datapipe = getattr(dgl.graphbolt, sampler_name)( datapipe, graph, fanouts=[torch.LongTensor([2]) for _ in range(num_layers)], **kwargs, ) if enable_feature_fetch: datapipe = dgl.graphbolt.FeatureFetcher( datapipe, feature_store, ["a", "b", "c"], ["d"], overlap_fetch=overlap_feature_fetch and i == 0, cooperative=asynchronous and cooperative and i == 0, ) dataloaders.append(dgl.graphbolt.DataLoader(datapipe)) dataloader, dataloader2 = dataloaders bufferer_cnt = int(enable_feature_fetch and overlap_feature_fetch) if overlap_graph_fetch: bufferer_cnt += num_layers if num_gpu_cached_edges > 0: bufferer_cnt += 2 * num_layers if asynchronous: bufferer_cnt += 2 * num_layers + 1 # _preprocess stage has 1. if cooperative: bufferer_cnt += 3 * num_layers if enable_feature_fetch: bufferer_cnt += 1 # feature fetch has 1. if cooperative: # _preprocess stage. bufferer_cnt += 4 datapipe_graph = traverse_dps(dataloader) bufferers = find_dps( datapipe_graph, dgl.graphbolt.Bufferer, ) assert len(bufferers) == bufferer_cnt # Fixes the randomness of LayerNeighborSampler torch.manual_seed(1) minibatches = list(dataloader) assert len(minibatches) == N // B for i, _ in enumerate(dataloader): if i >= 1: break torch.manual_seed(1) for minibatch, minibatch2 in zip(minibatches, dataloader2): if enable_feature_fetch: assert "a" in minibatch.node_features assert "b" in minibatch.node_features assert "c" in minibatch.node_features if sampler_name == "LayerNeighborSampler": assert torch.equal( minibatch.node_features["a"], minibatch2.node_features["a"] ) for layer_id in range(minibatch.num_layers()): assert "d" in minibatch.edge_features[layer_id] edge_feature = minibatch.edge_features[layer_id]["d"] edge_feature_ref = minibatch2.edge_features[layer_id]["d"] if sampler_name == "LayerNeighborSampler": assert torch.equal(edge_feature, edge_feature_ref) assert len(list(dataloader)) == N // B if asynchronous and cooperative: for minibatch in minibatches: x = torch.ones((minibatch.node_ids().size(0), 1), device=F.ctx()) for subgraph in minibatch.sampled_subgraphs: x = gb.CooperativeConvFunction.apply(subgraph, x) x, edge_index, size = subgraph.to_pyg(x) x = x[0] one = torch.ones( edge_index.shape[1], dtype=x.dtype, device=x.device ) coo = torch.sparse_coo_tensor( edge_index.flipud(), one, size=(size[1], size[0]) ) x = torch.sparse.mm(coo, x) assert x.shape[0] == minibatch.seeds.shape[0] assert x.shape[1] == 1 if thd.is_initialized(): thd.destroy_process_group() ================================================ FILE: tests/python/pytorch/graphbolt/test_dataset.py ================================================ import pytest from dgl import graphbolt as gb def test_Dataset(): dataset = gb.Dataset() with pytest.raises(NotImplementedError): _ = dataset.tasks with pytest.raises(NotImplementedError): _ = dataset.graph with pytest.raises(NotImplementedError): _ = dataset.feature with pytest.raises(NotImplementedError): _ = dataset.dataset_name ================================================ FILE: tests/python/pytorch/graphbolt/test_feature_fetcher.py ================================================ import random from functools import partial import dgl.graphbolt as gb import torch from torch.utils.data.datapipes.iter import Mapper from . import gb_test_utils def test_FeatureFetcher_invoke(): # Prepare graph and required datapipes. graph = gb_test_utils.rand_csc_graph(20, 0.15, bidirection_edge=True) a = torch.tensor( [[random.randint(0, 10)] for _ in range(graph.total_num_nodes)] ) b = torch.tensor( [[random.randint(0, 10)] for _ in range(graph.total_num_edges)] ) features = {} keys = [("node", None, "a"), ("edge", None, "b")] features[keys[0]] = gb.TorchBasedFeature(a) features[keys[1]] = gb.TorchBasedFeature(b) feature_store = gb.BasicFeatureStore(features) itemset = gb.ItemSet(torch.arange(10), names="seeds") item_sampler = gb.ItemSampler(itemset, batch_size=2) num_layer = 2 fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] # Invoke FeatureFetcher via class constructor. datapipe = gb.NeighborSampler(item_sampler, graph, fanouts) datapipe = gb.FeatureFetcher(datapipe, feature_store, ["a"], ["b"]) assert len(list(datapipe)) == 5 # Invoke FeatureFetcher via functional form. datapipe = item_sampler.sample_neighbor(graph, fanouts).fetch_feature( feature_store, ["a"], ["b"] ) assert len(list(datapipe)) == 5 def test_FeatureFetcher_homo(): graph = gb_test_utils.rand_csc_graph(20, 0.15, bidirection_edge=True) a = torch.tensor( [[random.randint(0, 10)] for _ in range(graph.total_num_nodes)] ) b = torch.tensor( [[random.randint(0, 10)] for _ in range(graph.total_num_edges)] ) features = {} keys = [("node", None, "a"), ("edge", None, "b")] features[keys[0]] = gb.TorchBasedFeature(a) features[keys[1]] = gb.TorchBasedFeature(b) feature_store = gb.BasicFeatureStore(features) itemset = gb.ItemSet(torch.arange(10), names="seeds") item_sampler = gb.ItemSampler(itemset, batch_size=2) num_layer = 2 fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] sampler_dp = gb.NeighborSampler(item_sampler, graph, fanouts) fetcher_dp = gb.FeatureFetcher(sampler_dp, feature_store, ["a"], ["b"]) assert len(list(fetcher_dp)) == 5 def _func(fn, minibatch): return fn(minibatch) def test_FeatureFetcher_with_edges_homo(): graph = gb_test_utils.rand_csc_graph(20, 0.15, bidirection_edge=True) a = torch.tensor( [[random.randint(0, 10)] for _ in range(graph.total_num_nodes)] ) b = torch.tensor( [[random.randint(0, 10)] for _ in range(graph.total_num_edges)] ) def add_node_and_edge_ids(minibatch): seeds = minibatch.seeds subgraphs = [] for _ in range(3): sampled_csc = gb.CSCFormatBase( indptr=torch.arange(11), indices=torch.arange(10), ) subgraphs.append( gb.SampledSubgraphImpl( sampled_csc=sampled_csc, original_column_node_ids=torch.arange(10), original_row_node_ids=torch.arange(10), original_edge_ids=torch.randint( 0, graph.total_num_edges, (10,) ), ) ) data = gb.MiniBatch(input_nodes=seeds, sampled_subgraphs=subgraphs) return data features = {} keys = [("node", None, "a"), ("edge", None, "b")] features[keys[0]] = gb.TorchBasedFeature(a) features[keys[1]] = gb.TorchBasedFeature(b) feature_store = gb.BasicFeatureStore(features) itemset = gb.ItemSet(torch.arange(10), names="seeds") item_sampler_dp = gb.ItemSampler(itemset, batch_size=2) fn = partial(_func, add_node_and_edge_ids) converter_dp = Mapper(item_sampler_dp, fn) fetcher_dp = gb.FeatureFetcher(converter_dp, feature_store, ["a"], ["b"]) assert len(list(fetcher_dp)) == 5 for data in fetcher_dp: assert data.node_features["a"].size(0) == 2 assert len(data.edge_features) == 3 for edge_feature in data.edge_features: assert edge_feature["b"].size(0) == 10 def get_hetero_graph(): # COO graph: # [0, 0, 1, 1, 2, 2, 3, 3, 4, 4] # [2, 4, 2, 3, 0, 1, 1, 0, 0, 1] # [1, 1, 1, 1, 0, 0, 0, 0, 0] - > edge type. # num_nodes = 5, num_n1 = 2, num_n2 = 3 ntypes = {"n1": 0, "n2": 1} etypes = {"n1:e1:n2": 0, "n2:e2:n1": 1} indptr = torch.LongTensor([0, 2, 4, 6, 8, 10]) indices = torch.LongTensor([2, 4, 2, 3, 0, 1, 1, 0, 0, 1]) type_per_edge = torch.LongTensor([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) node_type_offset = torch.LongTensor([0, 2, 5]) return gb.fused_csc_sampling_graph( indptr, indices, node_type_offset=node_type_offset, type_per_edge=type_per_edge, node_type_to_id=ntypes, edge_type_to_id=etypes, ) def test_FeatureFetcher_hetero(): graph = get_hetero_graph() a = torch.tensor([[random.randint(0, 10)] for _ in range(2)]) b = torch.tensor([[random.randint(0, 10)] for _ in range(3)]) features = {} keys = [("node", "n1", "a"), ("node", "n2", "a")] features[keys[0]] = gb.TorchBasedFeature(a) features[keys[1]] = gb.TorchBasedFeature(b) feature_store = gb.BasicFeatureStore(features) itemset = gb.HeteroItemSet( { "n1": gb.ItemSet(torch.LongTensor([0, 1]), names="seeds"), "n2": gb.ItemSet(torch.LongTensor([0, 1, 2]), names="seeds"), } ) item_sampler = gb.ItemSampler(itemset, batch_size=2) num_layer = 2 fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] sampler_dp = gb.NeighborSampler(item_sampler, graph, fanouts) # "n3" is not in the sampled input nodes. node_feature_keys = {"n1": ["a"], "n2": ["a"], "n3": ["a"]} fetcher_dp = gb.FeatureFetcher( sampler_dp, feature_store, node_feature_keys=node_feature_keys ) assert len(list(fetcher_dp)) == 3 # Do not fetch feature for "n1". node_feature_keys = {"n2": ["a"]} fetcher_dp = gb.FeatureFetcher( sampler_dp, feature_store, node_feature_keys=node_feature_keys ) for mini_batch in fetcher_dp: assert ("n1", "a") not in mini_batch.node_features def test_FeatureFetcher_with_edges_hetero(): a = torch.tensor([[random.randint(0, 10)] for _ in range(20)]) b = torch.tensor([[random.randint(0, 10)] for _ in range(50)]) def add_node_and_edge_ids(minibatch): seeds = minibatch.seeds subgraphs = [] original_edge_ids = { "n1:e1:n2": torch.randint(0, 50, (10,)), "n2:e2:n1": torch.randint(0, 50, (10,)), } original_column_node_ids = { "n1": torch.randint(0, 20, (10,)), "n2": torch.randint(0, 20, (10,)), } original_row_node_ids = { "n1": torch.randint(0, 20, (10,)), "n2": torch.randint(0, 20, (10,)), } for _ in range(3): subgraphs.append( gb.SampledSubgraphImpl( sampled_csc={ "n1:e1:n2": gb.CSCFormatBase( indptr=torch.arange(11), indices=torch.arange(10), ), "n2:e2:n1": gb.CSCFormatBase( indptr=torch.arange(11), indices=torch.arange(10), ), }, original_column_node_ids=original_column_node_ids, original_row_node_ids=original_row_node_ids, original_edge_ids=original_edge_ids, ) ) data = gb.MiniBatch(input_nodes=seeds, sampled_subgraphs=subgraphs) return data features = {} keys = [ ("node", "n1", "a"), ("edge", "n1:e1:n2", "a"), ("edge", "n2:e2:n1", "a"), ] features[keys[0]] = gb.TorchBasedFeature(a) features[keys[1]] = gb.TorchBasedFeature(b) feature_store = gb.BasicFeatureStore(features) itemset = gb.HeteroItemSet( { "n1": gb.ItemSet(torch.randint(0, 20, (10,)), names="seeds"), } ) item_sampler_dp = gb.ItemSampler(itemset, batch_size=2) fn = partial(_func, add_node_and_edge_ids) converter_dp = Mapper(item_sampler_dp, fn) # "n3:e3:n3" is not in the sampled edges. # Do not fetch feature for "n2:e2:n1". node_feature_keys = {"n1": ["a"]} edge_feature_keys = {"n1:e1:n2": ["a"], "n3:e3:n3": ["a"]} fetcher_dp = gb.FeatureFetcher( converter_dp, feature_store, node_feature_keys=node_feature_keys, edge_feature_keys=edge_feature_keys, ) assert len(list(fetcher_dp)) == 5 for data in fetcher_dp: assert data.node_features[("n1", "a")].size(0) == 2 assert len(data.edge_features) == 3 for edge_feature in data.edge_features: assert edge_feature[("n1:e1:n2", "a")].size(0) == 10 assert ("n2:e2:n1", "a") not in edge_feature ================================================ FILE: tests/python/pytorch/graphbolt/test_graphbolt_utils.py ================================================ import backend as F import dgl.graphbolt as gb import pytest import torch def test_find_reverse_edges_homo(): edges = torch.tensor([[1, 3, 5], [2, 4, 5]]).T edges = gb.add_reverse_edges(edges) expected_edges = torch.tensor([[1, 3, 5, 2, 4, 5], [2, 4, 5, 1, 3, 5]]).T assert torch.equal(edges, expected_edges) assert torch.equal(edges[1], expected_edges[1]) def test_find_reverse_edges_hetero(): edges = { "A:r:B": torch.tensor([[1, 5], [2, 5]]).T, "B:rr:A": torch.tensor([[3], [3]]).T, } edges = gb.add_reverse_edges(edges, {"A:r:B": "B:rr:A"}) expected_edges = { "A:r:B": torch.tensor([[1, 5], [2, 5]]).T, "B:rr:A": torch.tensor([[3, 2, 5], [3, 1, 5]]).T, } assert torch.equal(edges["A:r:B"], expected_edges["A:r:B"]) assert torch.equal(edges["B:rr:A"], expected_edges["B:rr:A"]) def test_find_reverse_edges_bi_reverse_types(): edges = { "A:r:B": torch.tensor([[1, 5], [2, 5]]).T, "B:rr:A": torch.tensor([[3], [3]]).T, } edges = gb.add_reverse_edges(edges, {"A:r:B": "B:rr:A", "B:rr:A": "A:r:B"}) expected_edges = { "A:r:B": torch.tensor([[1, 5, 3], [2, 5, 3]]).T, "B:rr:A": torch.tensor([[3, 2, 5], [3, 1, 5]]).T, } assert torch.equal(edges["A:r:B"], expected_edges["A:r:B"]) assert torch.equal(edges["B:rr:A"], expected_edges["B:rr:A"]) def test_find_reverse_edges_circual_reverse_types(): edges = { "A:r1:B": torch.tensor([[1, 1]]), "B:r2:C": torch.tensor([[2, 2]]), "C:r3:A": torch.tensor([[3, 3]]), } edges = gb.add_reverse_edges( edges, {"A:r1:B": "B:r2:C", "B:r2:C": "C:r3:A", "C:r3:A": "A:r1:B"} ) expected_edges = { "A:r1:B": torch.tensor([[1, 3], [1, 3]]).T, "B:r2:C": torch.tensor([[2, 1], [2, 1]]).T, "C:r3:A": torch.tensor([[3, 2], [3, 2]]).T, } assert torch.equal(edges["A:r1:B"], expected_edges["A:r1:B"]) assert torch.equal(edges["B:r2:C"], expected_edges["B:r2:C"]) assert torch.equal(edges["A:r1:B"], expected_edges["A:r1:B"]) assert torch.equal(edges["C:r3:A"], expected_edges["C:r3:A"]) ================================================ FILE: tests/python/pytorch/graphbolt/test_integration.py ================================================ import dgl import dgl.graphbolt as gb import dgl.sparse as dglsp import torch def test_integration_link_prediction(): torch.manual_seed(926) indptr = torch.tensor([0, 0, 1, 3, 6, 8, 10]) indices = torch.tensor([5, 3, 3, 3, 3, 4, 4, 0, 5, 4]) matrix_a = dglsp.from_csc(indptr, indices) seeds = torch.t(torch.stack(matrix_a.coo())) node_feature_data = torch.tensor( [ [0.9634, 0.2294], [0.6172, 0.7865], [0.2109, 0.1089], [0.8672, 0.2276], [0.5503, 0.8223], [0.5160, 0.2486], ] ) edge_feature_data = torch.tensor( [ [0.5123, 0.1709, 0.6150], [0.1476, 0.1902, 0.1314], [0.2582, 0.5203, 0.6228], [0.3708, 0.7631, 0.2683], [0.2126, 0.7878, 0.7225], [0.7885, 0.3414, 0.5485], [0.4088, 0.8200, 0.1851], [0.0056, 0.9469, 0.4432], [0.8972, 0.7511, 0.3617], [0.5773, 0.2199, 0.3366], ] ) item_set = gb.ItemSet(seeds, names="seeds") graph = gb.fused_csc_sampling_graph(indptr, indices) node_feature = gb.TorchBasedFeature(node_feature_data) edge_feature = gb.TorchBasedFeature(edge_feature_data) features = { ("node", None, "feat"): node_feature, ("edge", None, "feat"): edge_feature, } feature_store = gb.BasicFeatureStore(features) datapipe = gb.ItemSampler(item_set, batch_size=4) datapipe = datapipe.sample_uniform_negative(graph, 2) fanouts = torch.LongTensor([1]) datapipe = datapipe.sample_neighbor(graph, [fanouts, fanouts], replace=True) datapipe = datapipe.transform(gb.exclude_seed_edges) datapipe = datapipe.fetch_feature( feature_store, node_feature_keys=["feat"], edge_feature_keys=["feat"] ) dataloader = gb.DataLoader( datapipe, ) expected = [ str( """MiniBatch(seeds=tensor([[5, 1], [3, 2], [3, 2], [3, 3], [5, 2], [5, 1], [3, 4], [3, 3], [3, 5], [3, 2], [3, 0], [3, 4]]), sampled_subgraphs=[SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 1, 1, 1, 1, 2, 2], dtype=torch.int32), indices=tensor([4, 5], dtype=torch.int32), ), original_row_node_ids=tensor([5, 1, 3, 2, 4, 0]), original_edge_ids=tensor([9, 7]), original_column_node_ids=tensor([5, 1, 3, 2, 4, 0]), ), SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 1, 1, 1, 1, 2, 2], dtype=torch.int32), indices=tensor([0, 5], dtype=torch.int32), ), original_row_node_ids=tensor([5, 1, 3, 2, 4, 0]), original_edge_ids=tensor([8, 7]), original_column_node_ids=tensor([5, 1, 3, 2, 4, 0]), )], node_features={'feat': tensor([[0.5160, 0.2486], [0.6172, 0.7865], [0.8672, 0.2276], [0.2109, 0.1089], [0.5503, 0.8223], [0.9634, 0.2294]])}, labels=tensor([1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.]), input_nodes=tensor([5, 1, 3, 2, 4, 0]), indexes=tensor([0, 1, 2, 3, 0, 0, 1, 1, 2, 2, 3, 3]), edge_features=[{'feat': tensor([[0.5773, 0.2199, 0.3366], [0.0056, 0.9469, 0.4432]])}, {'feat': tensor([[0.8972, 0.7511, 0.3617], [0.0056, 0.9469, 0.4432]])}], compacted_seeds=tensor([[0, 1], [2, 3], [2, 3], [2, 2], [0, 3], [0, 1], [2, 4], [2, 2], [2, 0], [2, 3], [2, 5], [2, 4]]), blocks=[Block(num_src_nodes=6, num_dst_nodes=6, num_edges=2), Block(num_src_nodes=6, num_dst_nodes=6, num_edges=2)], )""" ), str( """MiniBatch(seeds=tensor([[3, 3], [4, 3], [4, 4], [0, 4], [3, 4], [3, 5], [4, 1], [4, 4], [4, 4], [4, 5], [0, 1], [0, 3]]), sampled_subgraphs=[SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 0, 0, 0, 0, 1], dtype=torch.int32), indices=tensor([3], dtype=torch.int32), ), original_row_node_ids=tensor([3, 4, 0, 5, 1]), original_edge_ids=tensor([0]), original_column_node_ids=tensor([3, 4, 0, 5, 1]), ), SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 0, 0, 0, 1, 2], dtype=torch.int32), indices=tensor([3, 3], dtype=torch.int32), ), original_row_node_ids=tensor([3, 4, 0, 5, 1]), original_edge_ids=tensor([8, 0]), original_column_node_ids=tensor([3, 4, 0, 5, 1]), )], node_features={'feat': tensor([[0.8672, 0.2276], [0.5503, 0.8223], [0.9634, 0.2294], [0.5160, 0.2486], [0.6172, 0.7865]])}, labels=tensor([1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.]), input_nodes=tensor([3, 4, 0, 5, 1]), indexes=tensor([0, 1, 2, 3, 0, 0, 1, 1, 2, 2, 3, 3]), edge_features=[{'feat': tensor([[0.5123, 0.1709, 0.6150]])}, {'feat': tensor([[0.8972, 0.7511, 0.3617], [0.5123, 0.1709, 0.6150]])}], compacted_seeds=tensor([[0, 0], [1, 0], [1, 1], [2, 1], [0, 1], [0, 3], [1, 4], [1, 1], [1, 1], [1, 3], [2, 4], [2, 0]]), blocks=[Block(num_src_nodes=5, num_dst_nodes=5, num_edges=1), Block(num_src_nodes=5, num_dst_nodes=5, num_edges=2)], )""" ), str( """MiniBatch(seeds=tensor([[5, 5], [4, 5], [5, 5], [5, 5], [4, 0], [4, 0]]), sampled_subgraphs=[SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 0, 1, 1], dtype=torch.int32), indices=tensor([1], dtype=torch.int32), ), original_row_node_ids=tensor([5, 4, 0]), original_edge_ids=tensor([6]), original_column_node_ids=tensor([5, 4, 0]), ), SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 0, 1, 1], dtype=torch.int32), indices=tensor([2], dtype=torch.int32), ), original_row_node_ids=tensor([5, 4, 0]), original_edge_ids=tensor([7]), original_column_node_ids=tensor([5, 4, 0]), )], node_features={'feat': tensor([[0.5160, 0.2486], [0.5503, 0.8223], [0.9634, 0.2294]])}, labels=tensor([1., 1., 0., 0., 0., 0.]), input_nodes=tensor([5, 4, 0]), indexes=tensor([0, 1, 0, 0, 1, 1]), edge_features=[{'feat': tensor([[0.4088, 0.8200, 0.1851]])}, {'feat': tensor([[0.0056, 0.9469, 0.4432]])}], compacted_seeds=tensor([[0, 0], [1, 0], [0, 0], [0, 0], [1, 2], [1, 2]]), blocks=[Block(num_src_nodes=3, num_dst_nodes=3, num_edges=1), Block(num_src_nodes=3, num_dst_nodes=3, num_edges=1)], )""" ), ] for step, data in enumerate(dataloader): assert expected[step] == str(data), print(step, data) def test_integration_node_classification(): torch.manual_seed(926) indptr = torch.tensor([0, 0, 1, 3, 6, 8, 10]) indices = torch.tensor([5, 3, 3, 3, 3, 4, 4, 0, 5, 4]) seeds = torch.tensor([5, 1, 2, 4, 3, 0]) node_feature_data = torch.tensor( [ [0.9634, 0.2294], [0.6172, 0.7865], [0.2109, 0.1089], [0.8672, 0.2276], [0.5503, 0.8223], [0.5160, 0.2486], ] ) edge_feature_data = torch.tensor( [ [0.5123, 0.1709, 0.6150], [0.1476, 0.1902, 0.1314], [0.2582, 0.5203, 0.6228], [0.3708, 0.7631, 0.2683], [0.2126, 0.7878, 0.7225], [0.7885, 0.3414, 0.5485], [0.4088, 0.8200, 0.1851], [0.0056, 0.9469, 0.4432], [0.8972, 0.7511, 0.3617], [0.5773, 0.2199, 0.3366], ] ) item_set = gb.ItemSet(seeds, names="seeds") graph = gb.fused_csc_sampling_graph(indptr, indices) node_feature = gb.TorchBasedFeature(node_feature_data) edge_feature = gb.TorchBasedFeature(edge_feature_data) features = { ("node", None, "feat"): node_feature, ("edge", None, "feat"): edge_feature, } feature_store = gb.BasicFeatureStore(features) datapipe = gb.ItemSampler(item_set, batch_size=2) fanouts = torch.LongTensor([1]) datapipe = datapipe.sample_neighbor(graph, [fanouts, fanouts], replace=True) datapipe = datapipe.fetch_feature( feature_store, node_feature_keys=["feat"], edge_feature_keys=["feat"] ) dataloader = gb.DataLoader( datapipe, ) expected = [ str( """MiniBatch(seeds=tensor([5, 1]), sampled_subgraphs=[SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 1, 2], dtype=torch.int32), indices=tensor([0, 0], dtype=torch.int32), ), original_row_node_ids=tensor([5, 1]), original_edge_ids=tensor([8, 0]), original_column_node_ids=tensor([5, 1]), ), SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 1, 2], dtype=torch.int32), indices=tensor([0, 0], dtype=torch.int32), ), original_row_node_ids=tensor([5, 1]), original_edge_ids=tensor([8, 0]), original_column_node_ids=tensor([5, 1]), )], node_features={'feat': tensor([[0.5160, 0.2486], [0.6172, 0.7865]])}, labels=None, input_nodes=tensor([5, 1]), indexes=None, edge_features=[{'feat': tensor([[0.8972, 0.7511, 0.3617], [0.5123, 0.1709, 0.6150]])}, {'feat': tensor([[0.8972, 0.7511, 0.3617], [0.5123, 0.1709, 0.6150]])}], compacted_seeds=None, blocks=[Block(num_src_nodes=2, num_dst_nodes=2, num_edges=2), Block(num_src_nodes=2, num_dst_nodes=2, num_edges=2)], )""" ), str( """MiniBatch(seeds=tensor([2, 4]), sampled_subgraphs=[SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 1, 2, 3], dtype=torch.int32), indices=tensor([2, 1, 2], dtype=torch.int32), ), original_row_node_ids=tensor([2, 4, 3]), original_edge_ids=tensor([1, 6, 3]), original_column_node_ids=tensor([2, 4, 3]), ), SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 1, 2], dtype=torch.int32), indices=tensor([2, 1], dtype=torch.int32), ), original_row_node_ids=tensor([2, 4, 3]), original_edge_ids=tensor([2, 6]), original_column_node_ids=tensor([2, 4]), )], node_features={'feat': tensor([[0.2109, 0.1089], [0.5503, 0.8223], [0.8672, 0.2276]])}, labels=None, input_nodes=tensor([2, 4, 3]), indexes=None, edge_features=[{'feat': tensor([[0.1476, 0.1902, 0.1314], [0.4088, 0.8200, 0.1851], [0.3708, 0.7631, 0.2683]])}, {'feat': tensor([[0.2582, 0.5203, 0.6228], [0.4088, 0.8200, 0.1851]])}], compacted_seeds=None, blocks=[Block(num_src_nodes=3, num_dst_nodes=3, num_edges=3), Block(num_src_nodes=3, num_dst_nodes=2, num_edges=2)], )""" ), str( """MiniBatch(seeds=tensor([3, 0]), sampled_subgraphs=[SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 1, 1], dtype=torch.int32), indices=tensor([0], dtype=torch.int32), ), original_row_node_ids=tensor([3, 0]), original_edge_ids=tensor([3]), original_column_node_ids=tensor([3, 0]), ), SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 1, 1], dtype=torch.int32), indices=tensor([0], dtype=torch.int32), ), original_row_node_ids=tensor([3, 0]), original_edge_ids=tensor([3]), original_column_node_ids=tensor([3, 0]), )], node_features={'feat': tensor([[0.8672, 0.2276], [0.9634, 0.2294]])}, labels=None, input_nodes=tensor([3, 0]), indexes=None, edge_features=[{'feat': tensor([[0.3708, 0.7631, 0.2683]])}, {'feat': tensor([[0.3708, 0.7631, 0.2683]])}], compacted_seeds=None, blocks=[Block(num_src_nodes=2, num_dst_nodes=2, num_edges=1), Block(num_src_nodes=2, num_dst_nodes=2, num_edges=1)], )""" ), ] for step, data in enumerate(dataloader): assert expected[step] == str(data), print(step, data) ================================================ FILE: tests/python/pytorch/graphbolt/test_item_sampler.py ================================================ import os import re import unittest from collections import defaultdict from sys import platform import backend as F import dgl import pytest import torch import torch.distributed as dist import torch.multiprocessing as mp from dgl import graphbolt as gb def test_ItemSampler_minibatcher(): # Default minibatcher is used if not specified. # Warning message is raised if names are not specified. item_set = gb.ItemSet(torch.arange(0, 10)) item_sampler = gb.ItemSampler(item_set, batch_size=4) with pytest.warns( UserWarning, match=re.escape( "Failed to map item list to `MiniBatch` as the names of items are " "not provided. Please provide a customized `MiniBatcher`. The " "item list is returned as is." ), ): minibatch = next(iter(item_sampler)) assert not isinstance(minibatch, gb.MiniBatch) # Default minibatcher is used if not specified. # Warning message is raised if unrecognized names are specified. item_set = gb.ItemSet(torch.arange(0, 10), names="unknown_name") item_sampler = gb.ItemSampler(item_set, batch_size=4) with pytest.warns( UserWarning, match=re.escape( "Unknown item name 'unknown_name' is detected and added into " "`MiniBatch`. You probably need to provide a customized " "`MiniBatcher`." ), ): minibatch = next(iter(item_sampler)) assert isinstance(minibatch, gb.MiniBatch) assert minibatch.unknown_name is not None # Default minibatcher is used if not specified. # `MiniBatch` is returned if expected names are specified. item_set = gb.ItemSet(torch.arange(0, 10), names="seeds") item_sampler = gb.ItemSampler(item_set, batch_size=4) minibatch = next(iter(item_sampler)) assert isinstance(minibatch, gb.MiniBatch) assert minibatch.seeds is not None assert len(minibatch.seeds) == 4 # Customized minibatcher is used if specified. def minibatcher(batch, names): return gb.MiniBatch(seeds=batch) item_sampler = gb.ItemSampler( item_set, batch_size=4, minibatcher=minibatcher ) minibatch = next(iter(item_sampler)) assert isinstance(minibatch, gb.MiniBatch) assert minibatch.seeds is not None assert len(minibatch.seeds) == 4 @pytest.mark.parametrize("batch_size", [1, 4]) @pytest.mark.parametrize("shuffle", [True, False]) @pytest.mark.parametrize("drop_last", [True, False]) def test_ItemSet_integer(batch_size, shuffle, drop_last): # Node IDs. num_ids = 103 item_set = gb.ItemSet(num_ids, names="seeds") item_sampler = gb.ItemSampler( item_set, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last ) minibatch_ids = [] for i, minibatch in enumerate(item_sampler): assert isinstance(minibatch, gb.MiniBatch) assert minibatch.seeds is not None assert minibatch.labels is None is_last = (i + 1) * batch_size >= num_ids if not is_last or num_ids % batch_size == 0: assert len(minibatch.seeds) == batch_size else: if not drop_last: assert len(minibatch.seeds) == num_ids % batch_size else: assert False minibatch_ids.append(minibatch.seeds) minibatch_ids = torch.cat(minibatch_ids) assert torch.all(minibatch_ids[:-1] <= minibatch_ids[1:]) is not shuffle @pytest.mark.parametrize("batch_size", [1, 4]) @pytest.mark.parametrize("shuffle", [True, False]) @pytest.mark.parametrize("drop_last", [True, False]) def test_ItemSet_seed_nodes(batch_size, shuffle, drop_last): # Node IDs. num_ids = 103 seed_nodes = torch.arange(0, num_ids) item_set = gb.ItemSet(seed_nodes, names="seeds") item_sampler = gb.ItemSampler( item_set, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last ) minibatch_ids = [] for i, minibatch in enumerate(item_sampler): assert isinstance(minibatch, gb.MiniBatch) assert minibatch.seeds is not None assert minibatch.labels is None is_last = (i + 1) * batch_size >= num_ids if not is_last or num_ids % batch_size == 0: assert len(minibatch.seeds) == batch_size else: if not drop_last: assert len(minibatch.seeds) == num_ids % batch_size else: assert False minibatch_ids.append(minibatch.seeds) minibatch_ids = torch.cat(minibatch_ids) assert torch.all(minibatch_ids[:-1] <= minibatch_ids[1:]) is not shuffle @pytest.mark.parametrize("batch_size", [1, 4]) @pytest.mark.parametrize("shuffle", [True, False]) @pytest.mark.parametrize("drop_last", [True, False]) def test_ItemSet_seed_nodes_labels(batch_size, shuffle, drop_last): # Node IDs. num_ids = 103 seed_nodes = torch.arange(0, num_ids) labels = torch.arange(0, num_ids) item_set = gb.ItemSet((seed_nodes, labels), names=("seeds", "labels")) item_sampler = gb.ItemSampler( item_set, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last ) minibatch_ids = [] minibatch_labels = [] for i, minibatch in enumerate(item_sampler): assert isinstance(minibatch, gb.MiniBatch) assert minibatch.seeds is not None assert minibatch.labels is not None assert len(minibatch.seeds) == len(minibatch.labels) is_last = (i + 1) * batch_size >= num_ids if not is_last or num_ids % batch_size == 0: assert len(minibatch.seeds) == batch_size else: if not drop_last: assert len(minibatch.seeds) == num_ids % batch_size else: assert False minibatch_ids.append(minibatch.seeds) minibatch_labels.append(minibatch.labels) minibatch_ids = torch.cat(minibatch_ids) minibatch_labels = torch.cat(minibatch_labels) assert torch.all(minibatch_ids[:-1] <= minibatch_ids[1:]) is not shuffle assert ( torch.all(minibatch_labels[:-1] <= minibatch_labels[1:]) is not shuffle ) @pytest.mark.parametrize("batch_size", [1, 4]) @pytest.mark.parametrize("shuffle", [True, False]) @pytest.mark.parametrize("drop_last", [True, False]) def test_ItemSet_node_pairs(batch_size, shuffle, drop_last): # Node pairs. num_ids = 103 node_pairs = torch.arange(0, 2 * num_ids).reshape(-1, 2) item_set = gb.ItemSet(node_pairs, names="seeds") item_sampler = gb.ItemSampler( item_set, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last ) src_ids = [] dst_ids = [] for i, minibatch in enumerate(item_sampler): assert minibatch.seeds is not None assert isinstance(minibatch.seeds, torch.Tensor) assert minibatch.labels is None src, dst = minibatch.seeds.T is_last = (i + 1) * batch_size >= num_ids if not is_last or num_ids % batch_size == 0: expected_batch_size = batch_size else: if not drop_last: expected_batch_size = num_ids % batch_size else: assert False assert len(src) == expected_batch_size assert len(dst) == expected_batch_size # Verify src and dst IDs match. assert torch.equal(src + 1, dst) # Archive batch. src_ids.append(src) dst_ids.append(dst) src_ids = torch.cat(src_ids) dst_ids = torch.cat(dst_ids) assert torch.all(src_ids[:-1] <= src_ids[1:]) is not shuffle assert torch.all(dst_ids[:-1] <= dst_ids[1:]) is not shuffle @pytest.mark.parametrize("batch_size", [1, 4]) @pytest.mark.parametrize("shuffle", [True, False]) @pytest.mark.parametrize("drop_last", [True, False]) def test_ItemSet_node_pairs_labels(batch_size, shuffle, drop_last): # Node pairs and labels num_ids = 103 node_pairs = torch.arange(0, 2 * num_ids).reshape(-1, 2) labels = node_pairs[:, 0] item_set = gb.ItemSet((node_pairs, labels), names=("seeds", "labels")) item_sampler = gb.ItemSampler( item_set, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last ) src_ids = [] dst_ids = [] labels = [] for i, minibatch in enumerate(item_sampler): assert minibatch.seeds is not None assert isinstance(minibatch.seeds, torch.Tensor) assert minibatch.labels is not None src, dst = minibatch.seeds.T label = minibatch.labels assert len(src) == len(dst) assert len(src) == len(label) is_last = (i + 1) * batch_size >= num_ids if not is_last or num_ids % batch_size == 0: expected_batch_size = batch_size else: if not drop_last: expected_batch_size = num_ids % batch_size else: assert False assert len(src) == expected_batch_size assert len(dst) == expected_batch_size assert len(label) == expected_batch_size # Verify src/dst IDs and labels match. assert torch.equal(src + 1, dst) assert torch.equal(src, label) # Archive batch. src_ids.append(src) dst_ids.append(dst) labels.append(label) src_ids = torch.cat(src_ids) dst_ids = torch.cat(dst_ids) labels = torch.cat(labels) assert torch.all(src_ids[:-1] <= src_ids[1:]) is not shuffle assert torch.all(dst_ids[:-1] <= dst_ids[1:]) is not shuffle assert torch.all(labels[:-1] <= labels[1:]) is not shuffle @pytest.mark.parametrize("batch_size", [1, 4]) @pytest.mark.parametrize("shuffle", [True, False]) @pytest.mark.parametrize("drop_last", [True, False]) def test_ItemSet_node_pairs_labels_indexes(batch_size, shuffle, drop_last): # Node pairs and negative destinations. num_ids = 103 num_negs = 2 node_pairs = torch.arange(0, 2 * num_ids).reshape(-1, 2) neg_srcs = node_pairs[:, 0].repeat_interleave(num_negs) neg_dsts = torch.arange(2 * num_ids, 2 * num_ids + num_ids * num_negs) neg_node_pairs = torch.cat((neg_srcs, neg_dsts)).reshape(2, -1).T labels = torch.empty(num_ids * 3) labels[:num_ids] = 1 labels[num_ids:] = 0 indexes = torch.cat( ( torch.arange(0, num_ids), torch.arange(0, num_ids).repeat_interleave(num_negs), ) ) node_pairs = torch.cat((node_pairs, neg_node_pairs)) item_set = gb.ItemSet( (node_pairs, labels, indexes), names=("seeds", "labels", "indexes") ) item_sampler = gb.ItemSampler( item_set, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last ) src_ids = [] dst_ids = [] negs_ids = [] final_labels = [] final_indexes = [] for i, minibatch in enumerate(item_sampler): assert minibatch.seeds is not None assert isinstance(minibatch.seeds, torch.Tensor) assert minibatch.labels is not None assert minibatch.indexes is not None src, dst = minibatch.seeds.T negs_src = src[~minibatch.labels.to(bool)] negs_dst = dst[~minibatch.labels.to(bool)] is_last = (i + 1) * batch_size >= num_ids * 3 if not is_last or num_ids * 3 % batch_size == 0: expected_batch_size = batch_size else: if not drop_last: expected_batch_size = num_ids * 3 % batch_size else: assert False assert len(src) == expected_batch_size assert len(dst) == expected_batch_size assert negs_src.dim() == 1 assert negs_dst.dim() == 1 assert torch.equal((negs_dst - 2 * num_ids) // 2 * 2, negs_src) # Archive batch. src_ids.append(src) dst_ids.append(dst) negs_ids.append(negs_dst) final_labels.append(minibatch.labels) final_indexes.append(minibatch.indexes) src_ids = torch.cat(src_ids) dst_ids = torch.cat(dst_ids) negs_ids = torch.cat(negs_ids) final_labels = torch.cat(final_labels) final_indexes = torch.cat(final_indexes) assert torch.all(src_ids[:-1] <= src_ids[1:]) is not shuffle assert torch.all(dst_ids[:-1] <= dst_ids[1:]) is not shuffle assert torch.all(negs_ids[:-1] <= negs_ids[1:]) is not shuffle assert torch.all(final_labels[:-1] >= final_labels[1:]) is not shuffle if not drop_last: assert final_labels.sum() == num_ids assert torch.equal(final_indexes, indexes) is not shuffle @pytest.mark.parametrize("batch_size", [1, 4]) @pytest.mark.parametrize("shuffle", [True, False]) @pytest.mark.parametrize("drop_last", [True, False]) def test_ItemSet_hyperlink(batch_size, shuffle, drop_last): # Node pairs. num_ids = 103 seeds = torch.arange(0, 3 * num_ids).reshape(-1, 3) item_set = gb.ItemSet(seeds, names="seeds") item_sampler = gb.ItemSampler( item_set, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last ) seeds_ids = [] for i, minibatch in enumerate(item_sampler): assert minibatch.seeds is not None assert isinstance(minibatch.seeds, torch.Tensor) assert minibatch.labels is None is_last = (i + 1) * batch_size >= num_ids if not is_last or num_ids % batch_size == 0: expected_batch_size = batch_size else: if not drop_last: expected_batch_size = num_ids % batch_size else: assert False assert minibatch.seeds.shape == (expected_batch_size, 3) # Verify seeds match. assert torch.equal(minibatch.seeds[:, 0] + 1, minibatch.seeds[:, 1]) assert torch.equal(minibatch.seeds[:, 1] + 1, minibatch.seeds[:, 2]) # Archive batch. seeds_ids.append(minibatch.seeds) seeds_ids = torch.cat(seeds_ids) assert torch.all(seeds_ids[:-1, 0] <= seeds_ids[1:, 0]) is not shuffle assert torch.all(seeds_ids[:-1, 1] <= seeds_ids[1:, 1]) is not shuffle assert torch.all(seeds_ids[:-1, 2] <= seeds_ids[1:, 2]) is not shuffle @pytest.mark.parametrize("batch_size", [1, 4]) @pytest.mark.parametrize("shuffle", [True, False]) @pytest.mark.parametrize("drop_last", [True, False]) def test_ItemSet_seeds_labels(batch_size, shuffle, drop_last): # Node pairs and labels num_ids = 103 seeds = torch.arange(0, 3 * num_ids).reshape(-1, 3) labels = seeds[:, 0] item_set = gb.ItemSet((seeds, labels), names=("seeds", "labels")) item_sampler = gb.ItemSampler( item_set, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last ) seeds_ids = [] labels = [] for i, minibatch in enumerate(item_sampler): assert minibatch.seeds is not None assert isinstance(minibatch.seeds, torch.Tensor) assert minibatch.labels is not None label = minibatch.labels assert len(minibatch.seeds) == len(label) is_last = (i + 1) * batch_size >= num_ids if not is_last or num_ids % batch_size == 0: expected_batch_size = batch_size else: if not drop_last: expected_batch_size = num_ids % batch_size else: assert False assert minibatch.seeds.shape == (expected_batch_size, 3) assert len(label) == expected_batch_size # Verify seeds and labels match. assert torch.equal(minibatch.seeds[:, 0] + 1, minibatch.seeds[:, 1]) assert torch.equal(minibatch.seeds[:, 1] + 1, minibatch.seeds[:, 2]) # Archive batch. seeds_ids.append(minibatch.seeds) labels.append(label) seeds_ids = torch.cat(seeds_ids) labels = torch.cat(labels) assert torch.all(seeds_ids[:-1, 0] <= seeds_ids[1:, 0]) is not shuffle assert torch.all(seeds_ids[:-1, 1] <= seeds_ids[1:, 1]) is not shuffle assert torch.all(seeds_ids[:-1, 2] <= seeds_ids[1:, 2]) is not shuffle assert torch.all(labels[:-1] <= labels[1:]) is not shuffle def test_append_with_other_datapipes(): num_ids = 100 batch_size = 4 item_set = gb.ItemSet(torch.arange(0, num_ids), names="seeds") data_pipe = gb.ItemSampler(item_set, batch_size) for i, data in enumerate(data_pipe): expected = torch.full((batch_size,), i * batch_size) expected = expected + torch.tensor([0, 1, 2, 3]) assert torch.equal(data.seeds, expected) @pytest.mark.parametrize("batch_size", [1, 4]) @pytest.mark.parametrize("shuffle", [True, False]) @pytest.mark.parametrize("drop_last", [True, False]) def test_HeteroItemSet_seed_nodes(batch_size, shuffle, drop_last): # Node IDs. num_ids = 205 ids = { "user": gb.ItemSet(torch.arange(0, 99), names="seeds"), "item": gb.ItemSet(torch.arange(99, num_ids), names="seeds"), } chained_ids = [] for key, value in ids.items(): chained_ids += [(key, v) for v in value] item_set = gb.HeteroItemSet(ids) item_sampler = gb.ItemSampler( item_set, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last ) minibatch_ids = [] for i, minibatch in enumerate(item_sampler): is_last = (i + 1) * batch_size >= num_ids if not is_last or num_ids % batch_size == 0: expected_batch_size = batch_size else: if not drop_last: expected_batch_size = num_ids % batch_size else: assert False assert isinstance(minibatch, gb.MiniBatch) assert minibatch.seeds is not None ids = [] for _, v in minibatch.seeds.items(): ids.append(v) ids = torch.cat(ids) assert len(ids) == expected_batch_size minibatch_ids.append(ids) minibatch_ids = torch.cat(minibatch_ids) assert torch.all(minibatch_ids[:-1] <= minibatch_ids[1:]) is not shuffle @pytest.mark.parametrize("batch_size", [1, 4]) @pytest.mark.parametrize("shuffle", [True, False]) @pytest.mark.parametrize("drop_last", [True, False]) def test_HeteroItemSet_seed_nodes_labels(batch_size, shuffle, drop_last): # Node IDs. num_ids = 205 ids = { "user": gb.ItemSet( (torch.arange(0, 99), torch.arange(0, 99)), names=("seeds", "labels"), ), "item": gb.ItemSet( (torch.arange(99, num_ids), torch.arange(99, num_ids)), names=("seeds", "labels"), ), } chained_ids = [] for key, value in ids.items(): chained_ids += [(key, v) for v in value] item_set = gb.HeteroItemSet(ids) item_sampler = gb.ItemSampler( item_set, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last ) minibatch_ids = [] minibatch_labels = [] for i, minibatch in enumerate(item_sampler): assert isinstance(minibatch, gb.MiniBatch) assert minibatch.seeds is not None assert minibatch.labels is not None is_last = (i + 1) * batch_size >= num_ids if not is_last or num_ids % batch_size == 0: expected_batch_size = batch_size else: if not drop_last: expected_batch_size = num_ids % batch_size else: assert False ids = [] for _, v in minibatch.seeds.items(): ids.append(v) ids = torch.cat(ids) assert len(ids) == expected_batch_size minibatch_ids.append(ids) labels = [] for _, v in minibatch.labels.items(): labels.append(v) labels = torch.cat(labels) assert len(labels) == expected_batch_size minibatch_labels.append(labels) minibatch_ids = torch.cat(minibatch_ids) minibatch_labels = torch.cat(minibatch_labels) assert torch.all(minibatch_ids[:-1] <= minibatch_ids[1:]) is not shuffle assert ( torch.all(minibatch_labels[:-1] <= minibatch_labels[1:]) is not shuffle ) @pytest.mark.parametrize("batch_size", [1, 4]) @pytest.mark.parametrize("shuffle", [True, False]) @pytest.mark.parametrize("drop_last", [True, False]) def test_HeteroItemSet_node_pairs(batch_size, shuffle, drop_last): # Node pairs. num_ids = 103 total_pairs = 2 * num_ids node_pairs_like = torch.arange(0, num_ids * 2).reshape(-1, 2) node_pairs_follow = torch.arange(num_ids * 2, num_ids * 4).reshape(-1, 2) node_pairs_dict = { "user:like:item": gb.ItemSet(node_pairs_like, names="seeds"), "user:follow:user": gb.ItemSet(node_pairs_follow, names="seeds"), } item_set = gb.HeteroItemSet(node_pairs_dict) item_sampler = gb.ItemSampler( item_set, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last ) src_ids = [] dst_ids = [] for i, minibatch in enumerate(item_sampler): assert isinstance(minibatch, gb.MiniBatch) assert minibatch.seeds is not None assert minibatch.labels is None is_last = (i + 1) * batch_size >= total_pairs if not is_last or total_pairs % batch_size == 0: expected_batch_size = batch_size else: if not drop_last: expected_batch_size = total_pairs % batch_size else: assert False src = [] dst = [] for _, (seeds) in minibatch.seeds.items(): assert isinstance(seeds, torch.Tensor) src.append(seeds[:, 0]) dst.append(seeds[:, 1]) src = torch.cat(src) dst = torch.cat(dst) assert len(src) == expected_batch_size assert len(dst) == expected_batch_size src_ids.append(src) dst_ids.append(dst) assert torch.equal(src + 1, dst) src_ids = torch.cat(src_ids) dst_ids = torch.cat(dst_ids) assert torch.all(src_ids[:-1] <= src_ids[1:]) is not shuffle assert torch.all(dst_ids[:-1] <= dst_ids[1:]) is not shuffle @pytest.mark.parametrize("batch_size", [1, 4]) @pytest.mark.parametrize("shuffle", [True, False]) @pytest.mark.parametrize("drop_last", [True, False]) def test_HeteroItemSet_node_pairs_labels(batch_size, shuffle, drop_last): # Node pairs and labels num_ids = 103 total_ids = 2 * num_ids node_pairs_like = torch.arange(0, num_ids * 2).reshape(-1, 2) node_pairs_follow = torch.arange(num_ids * 2, num_ids * 4).reshape(-1, 2) labels = torch.arange(0, num_ids) node_pairs_dict = { "user:like:item": gb.ItemSet( (node_pairs_like, node_pairs_like[:, 0]), names=("seeds", "labels"), ), "user:follow:user": gb.ItemSet( (node_pairs_follow, node_pairs_follow[:, 0]), names=("seeds", "labels"), ), } item_set = gb.HeteroItemSet(node_pairs_dict) item_sampler = gb.ItemSampler( item_set, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last ) src_ids = [] dst_ids = [] labels = [] for i, minibatch in enumerate(item_sampler): assert isinstance(minibatch, gb.MiniBatch) assert minibatch.seeds is not None assert minibatch.labels is not None is_last = (i + 1) * batch_size >= total_ids if not is_last or total_ids % batch_size == 0: expected_batch_size = batch_size else: if not drop_last: expected_batch_size = total_ids % batch_size else: assert False src = [] dst = [] label = [] for _, seeds in minibatch.seeds.items(): assert isinstance(seeds, torch.Tensor) src.append(seeds[:, 0]) dst.append(seeds[:, 1]) for _, v_label in minibatch.labels.items(): label.append(v_label) src = torch.cat(src) dst = torch.cat(dst) label = torch.cat(label) assert len(src) == expected_batch_size assert len(dst) == expected_batch_size assert len(label) == expected_batch_size src_ids.append(src) dst_ids.append(dst) labels.append(label) assert torch.equal(src + 1, dst) assert torch.equal(src, label) src_ids = torch.cat(src_ids) dst_ids = torch.cat(dst_ids) labels = torch.cat(labels) assert torch.all(src_ids[:-1] <= src_ids[1:]) is not shuffle assert torch.all(dst_ids[:-1] <= dst_ids[1:]) is not shuffle assert torch.all(labels[:-1] <= labels[1:]) is not shuffle @pytest.mark.parametrize("batch_size", [1, 4]) @pytest.mark.parametrize("shuffle", [True, False]) @pytest.mark.parametrize("drop_last", [True, False]) def test_HeteroItemSet_node_pairs_labels_indexes( batch_size, shuffle, drop_last ): # Head, tail and negative tails. num_ids = 103 total_ids = 6 * num_ids num_negs = 2 node_pairs_like = torch.arange(0, num_ids * 2).reshape(-1, 2) node_pairs_follow = torch.arange(num_ids * 2, num_ids * 4).reshape(-1, 2) neg_dsts_like = torch.arange(num_ids * 4, num_ids * 4 + num_ids * num_negs) neg_node_pairs_like = ( torch.cat( (node_pairs_like[:, 0].repeat_interleave(num_negs), neg_dsts_like) ) .view(2, -1) .T ) all_node_pairs_like = torch.cat((node_pairs_like, neg_node_pairs_like)) labels_like = torch.empty(num_ids * 3) labels_like[:num_ids] = 1 labels_like[num_ids:] = 0 indexes_like = torch.cat( ( torch.arange(0, num_ids), torch.arange(0, num_ids).repeat_interleave(num_negs), ) ) neg_dsts_follow = torch.arange( num_ids * 4 + num_ids * num_negs, num_ids * 4 + num_ids * num_negs * 2 ) neg_node_pairs_follow = ( torch.cat( ( node_pairs_follow[:, 0].repeat_interleave(num_negs), neg_dsts_follow, ) ) .view(2, -1) .T ) all_node_pairs_follow = torch.cat( (node_pairs_follow, neg_node_pairs_follow) ) labels_follow = torch.empty(num_ids * 3) labels_follow[:num_ids] = 1 labels_follow[num_ids:] = 0 indexes_follow = torch.cat( ( torch.arange(0, num_ids), torch.arange(0, num_ids).repeat_interleave(num_negs), ) ) data_dict = { "user:like:item": gb.ItemSet( (all_node_pairs_like, labels_like, indexes_like), names=("seeds", "labels", "indexes"), ), "user:follow:user": gb.ItemSet( (all_node_pairs_follow, labels_follow, indexes_follow), names=("seeds", "labels", "indexes"), ), } item_set = gb.HeteroItemSet(data_dict) item_sampler = gb.ItemSampler( item_set, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last ) src_ids = [] dst_ids = [] negs_ids = [] final_labels = defaultdict(list) final_indexes = defaultdict(list) for i, minibatch in enumerate(item_sampler): assert isinstance(minibatch, gb.MiniBatch) assert minibatch.seeds is not None assert minibatch.labels is not None assert minibatch.indexes is not None is_last = (i + 1) * batch_size >= total_ids if not is_last or total_ids % batch_size == 0: expected_batch_size = batch_size else: if not drop_last: expected_batch_size = total_ids % batch_size else: assert False src = [] dst = [] negs_src = [] negs_dst = [] for etype, seeds in minibatch.seeds.items(): assert isinstance(seeds, torch.Tensor) src_etype = seeds[:, 0] dst_etype = seeds[:, 1] src.append(src_etype) dst.append(dst_etype) negs_src.append(src_etype[~minibatch.labels[etype].to(bool)]) negs_dst.append(dst_etype[~minibatch.labels[etype].to(bool)]) final_labels[etype].append(minibatch.labels[etype]) final_indexes[etype].append(minibatch.indexes[etype]) src = torch.cat(src) dst = torch.cat(dst) negs_src = torch.cat(negs_src) negs_dst = torch.cat(negs_dst) assert len(src) == expected_batch_size assert len(dst) == expected_batch_size src_ids.append(src) dst_ids.append(dst) negs_ids.append(negs_dst) assert negs_src.dim() == 1 assert negs_dst.dim() == 1 assert torch.equal(negs_src, (negs_dst - num_ids * 4) // 2 * 2) src_ids = torch.cat(src_ids) dst_ids = torch.cat(dst_ids) negs_ids = torch.cat(negs_ids) assert torch.all(src_ids[:-1] <= src_ids[1:]) is not shuffle assert torch.all(dst_ids[:-1] <= dst_ids[1:]) is not shuffle assert torch.all(negs_ids <= negs_ids) is not shuffle for etype in data_dict.keys(): final_labels_etype = torch.cat(final_labels[etype]) final_indexes_etype = torch.cat(final_indexes[etype]) assert ( torch.all(final_labels_etype[:-1] >= final_labels_etype[1:]) is not shuffle ) if not drop_last: assert final_labels_etype.sum() == num_ids assert ( torch.equal(final_indexes_etype, indexes_follow) is not shuffle ) @pytest.mark.parametrize("batch_size", [1, 4]) @pytest.mark.parametrize("shuffle", [True, False]) @pytest.mark.parametrize("drop_last", [True, False]) def test_HeteroItemSet_hyperlink(batch_size, shuffle, drop_last): # Node pairs. num_ids = 103 total_pairs = 2 * num_ids seeds_like = torch.arange(0, num_ids * 3).reshape(-1, 3) seeds_follow = torch.arange(num_ids * 3, num_ids * 6).reshape(-1, 3) seeds_dict = { "user:like:item": gb.ItemSet(seeds_like, names="seeds"), "user:follow:user": gb.ItemSet(seeds_follow, names="seeds"), } item_set = gb.HeteroItemSet(seeds_dict) item_sampler = gb.ItemSampler( item_set, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last ) seeds_ids = [] for i, minibatch in enumerate(item_sampler): assert isinstance(minibatch, gb.MiniBatch) assert minibatch.seeds is not None assert minibatch.labels is None assert minibatch.indexes is None is_last = (i + 1) * batch_size >= total_pairs if not is_last or total_pairs % batch_size == 0: expected_batch_size = batch_size else: if not drop_last: expected_batch_size = total_pairs % batch_size else: assert False seeds_lst = [] for _, (seeds) in minibatch.seeds.items(): assert isinstance(seeds, torch.Tensor) seeds_lst.append(seeds) seeds_lst = torch.cat(seeds_lst) assert seeds_lst.shape == (expected_batch_size, 3) seeds_ids.append(seeds_lst) assert torch.equal(seeds_lst[:, 0] + 1, seeds_lst[:, 1]) assert torch.equal(seeds_lst[:, 1] + 1, seeds_lst[:, 2]) seeds_ids = torch.cat(seeds_ids) assert torch.all(seeds_ids[:-1, 0] <= seeds_ids[1:, 0]) is not shuffle assert torch.all(seeds_ids[:-1, 1] <= seeds_ids[1:, 1]) is not shuffle assert torch.all(seeds_ids[:-1, 2] <= seeds_ids[1:, 2]) is not shuffle @pytest.mark.parametrize("batch_size", [1, 4]) @pytest.mark.parametrize("shuffle", [True, False]) @pytest.mark.parametrize("drop_last", [True, False]) def test_HeteroItemSet_hyperlink_labels(batch_size, shuffle, drop_last): # Node pairs and labels num_ids = 103 total_ids = 2 * num_ids seeds_like = torch.arange(0, num_ids * 3).reshape(-1, 3) seeds_follow = torch.arange(num_ids * 3, num_ids * 6).reshape(-1, 3) seeds_dict = { "user:like:item": gb.ItemSet( (seeds_like, seeds_like[:, 0]), names=("seeds", "labels"), ), "user:follow:user": gb.ItemSet( (seeds_follow, seeds_follow[:, 0]), names=("seeds", "labels"), ), } item_set = gb.HeteroItemSet(seeds_dict) item_sampler = gb.ItemSampler( item_set, batch_size=batch_size, shuffle=shuffle, drop_last=drop_last ) seeds_ids = [] labels = [] for i, minibatch in enumerate(item_sampler): assert isinstance(minibatch, gb.MiniBatch) assert minibatch.seeds is not None assert minibatch.labels is not None assert minibatch.indexes is None is_last = (i + 1) * batch_size >= total_ids if not is_last or total_ids % batch_size == 0: expected_batch_size = batch_size else: if not drop_last: expected_batch_size = total_ids % batch_size else: assert False seeds_lst = [] label = [] for _, seeds in minibatch.seeds.items(): assert isinstance(seeds, torch.Tensor) seeds_lst.append(seeds) for _, v_label in minibatch.labels.items(): label.append(v_label) seeds_lst = torch.cat(seeds_lst) label = torch.cat(label) assert seeds_lst.shape == (expected_batch_size, 3) assert len(label) == expected_batch_size seeds_ids.append(seeds_lst) labels.append(label) assert torch.equal(seeds_lst[:, 0] + 1, seeds_lst[:, 1]) assert torch.equal(seeds_lst[:, 1] + 1, seeds_lst[:, 2]) assert torch.equal(seeds_lst[:, 0], label) seeds_ids = torch.cat(seeds_ids) labels = torch.cat(labels) assert torch.all(seeds_ids[:-1, 0] <= seeds_ids[1:, 0]) is not shuffle assert torch.all(seeds_ids[:-1, 1] <= seeds_ids[1:, 1]) is not shuffle assert torch.all(seeds_ids[:-1, 2] <= seeds_ids[1:, 2]) is not shuffle assert torch.all(labels[:-1] <= labels[1:]) is not shuffle def distributed_item_sampler_subprocess( proc_id, nprocs, item_set, num_ids, num_workers, batch_size, drop_last, drop_uneven_inputs, ): # On Windows, the init method can only be file. init_method = ( f"file:///{os.path.join(os.getcwd(), 'dis_tempfile')}" if platform == "win32" else "tcp://127.0.0.1:12345" ) dist.init_process_group( backend="gloo", # Use Gloo backend for CPU multiprocessing init_method=init_method, world_size=nprocs, rank=proc_id, ) # Create a DistributedItemSampler. item_sampler = gb.DistributedItemSampler( item_set, batch_size=batch_size, shuffle=True, drop_last=drop_last, drop_uneven_inputs=drop_uneven_inputs, ) feature_fetcher = gb.FeatureFetcher( item_sampler, gb.BasicFeatureStore({}), [], ) data_loader = gb.DataLoader(feature_fetcher, num_workers=num_workers) # Count the numbers of items and batches. num_items = 0 sampled_count = torch.zeros(num_ids, dtype=torch.int32) for i in data_loader: # Count how many times each item is sampled. sampled_count[i.seeds] += 1 if drop_last: assert i.seeds.size(0) == batch_size num_items += i.seeds.size(0) num_batches = len(list(item_sampler)) if drop_uneven_inputs: num_batches_tensor = torch.tensor(num_batches) dist.broadcast(num_batches_tensor, 0) # Test if the number of batches are the same for all processes. assert num_batches_tensor == num_batches # Add up results from all processes. dist.reduce(sampled_count, 0) try: # Make sure no item is sampled more than once. assert sampled_count.max() <= 1 finally: dist.destroy_process_group() @pytest.mark.parametrize( "params", [ ((24, 4, 0, 4, False, False), [(8, 8), (8, 8), (4, 4), (4, 4)]), ((30, 4, 0, 4, False, False), [(8, 8), (8, 8), (8, 8), (6, 6)]), ((30, 4, 0, 4, True, False), [(8, 8), (8, 8), (8, 8), (6, 4)]), ((30, 4, 0, 4, False, True), [(8, 8), (8, 8), (8, 8), (6, 6)]), ((30, 4, 0, 4, True, True), [(8, 4), (8, 4), (8, 4), (6, 4)]), ( (53, 4, 2, 4, False, False), [(8, 8), (8, 8), (8, 8), (5, 5), (8, 8), (4, 4), (8, 8), (4, 4)], ), ( (53, 4, 2, 4, True, False), [(8, 8), (8, 8), (9, 8), (4, 4), (8, 8), (4, 4), (8, 8), (4, 4)], ), ( (53, 4, 2, 4, False, True), [(10, 8), (6, 4), (9, 8), (4, 4), (8, 8), (4, 4), (8, 8), (4, 4)], ), ( (53, 4, 2, 4, True, True), [(10, 8), (6, 4), (9, 8), (4, 4), (8, 8), (4, 4), (8, 8), (4, 4)], ), ( (63, 4, 2, 4, False, False), [(8, 8), (8, 8), (8, 8), (8, 8), (8, 8), (8, 8), (8, 8), (7, 7)], ), ( (63, 4, 2, 4, True, False), [(8, 8), (8, 8), (8, 8), (8, 8), (8, 8), (8, 8), (10, 8), (5, 4)], ), ( (63, 4, 2, 4, False, True), [(8, 8), (8, 8), (8, 8), (8, 8), (8, 8), (8, 8), (8, 8), (7, 7)], ), ( (63, 4, 2, 4, True, True), [ (10, 8), (6, 4), (10, 8), (6, 4), (10, 8), (6, 4), (10, 8), (5, 4), ], ), ( (65, 4, 2, 4, False, False), [(9, 9), (8, 8), (8, 8), (8, 8), (8, 8), (8, 8), (8, 8), (8, 8)], ), ( (65, 4, 2, 4, True, True), [(9, 8), (8, 8), (8, 8), (8, 8), (8, 8), (8, 8), (8, 8), (8, 8)], ), ], ) def test_RangeCalculation(params): ( ( total, num_replicas, num_workers, batch_size, drop_last, drop_uneven_inputs, ), key, ) = params answer = [] sum = 0 for rank in range(num_replicas): for worker_id in range(max(num_workers, 1)): result = gb.internal.calculate_range( True, total, num_replicas, rank, num_workers, worker_id, batch_size, drop_last, drop_uneven_inputs, ) assert sum == result[0] sum += result[1] answer.append((result[1], result[2])) assert key == answer @unittest.skipIf(F._default_context_str != "cpu", reason="GPU not required.") @pytest.mark.parametrize("num_ids", [24, 30, 32, 34, 36]) @pytest.mark.parametrize("num_workers", [0, 2]) @pytest.mark.parametrize("drop_last", [False, True]) @pytest.mark.parametrize("drop_uneven_inputs", [False, True]) def test_DistributedItemSampler( num_ids, num_workers, drop_last, drop_uneven_inputs ): nprocs = 4 batch_size = 4 item_set = gb.ItemSet(torch.arange(0, num_ids), names="seeds") # On Windows, if the process group initialization file already exists, # the program may hang. So we need to delete it if it exists. if platform == "win32": try: os.remove(os.path.join(os.getcwd(), "dis_tempfile")) except FileNotFoundError: pass mp.spawn( distributed_item_sampler_subprocess, args=( nprocs, item_set, num_ids, num_workers, batch_size, drop_last, drop_uneven_inputs, ), nprocs=nprocs, join=True, ) ================================================ FILE: tests/python/pytorch/graphbolt/test_itemset.py ================================================ import re import dgl import pytest import torch from dgl import graphbolt as gb def test_ItemSet_names(): # ItemSet with single name. item_set = gb.ItemSet(torch.arange(0, 5), names="seeds") assert item_set.names == ("seeds",) # ItemSet with multiple names. item_set = gb.ItemSet( (torch.arange(0, 5), torch.arange(5, 10)), names=("seeds", "labels"), ) assert item_set.names == ("seeds", "labels") # ItemSet without name. item_set = gb.ItemSet(torch.arange(0, 5)) assert item_set.names is None # Integer-initiated ItemSet with excessive names. with pytest.raises( AssertionError, match=re.escape("Number of items (1) and names (2) don't match."), ): _ = gb.ItemSet(5, names=("seeds", "labels")) # ItemSet with mismatched items and names. with pytest.raises( AssertionError, match=re.escape("Number of items (1) and names (2) don't match."), ): _ = gb.ItemSet(torch.arange(0, 5), names=("seeds", "labels")) @pytest.mark.parametrize("dtype", [torch.int32, torch.int64]) def test_ItemSet_scalar_dtype(dtype): item_set = gb.ItemSet(torch.tensor(5, dtype=dtype), names="seeds") for i, item in enumerate(item_set): assert i == item assert item.dtype == dtype assert item_set[2] == torch.tensor(2, dtype=dtype) assert torch.equal( item_set[slice(1, 4, 2)], torch.arange(1, 4, 2, dtype=dtype) ) def test_ItemSet_length(): # Integer with valid length num = 10 item_set = gb.ItemSet(num) assert len(item_set) == 10 # Test __iter__() method. Same as below. for i, item in enumerate(item_set): assert i == item # Single iterable with valid length. ids = torch.arange(0, 5) item_set = gb.ItemSet(ids) assert len(item_set) == 5 for i, item in enumerate(item_set): assert i == item.item() # Tuple of iterables with valid length. item_set = gb.ItemSet((torch.arange(0, 5), torch.arange(5, 10))) assert len(item_set) == 5 for i, (item1, item2) in enumerate(item_set): assert i == item1.item() assert i + 5 == item2.item() class InvalidLength: def __iter__(self): return iter([0, 1, 2]) # Single iterable with invalid length. with pytest.raises( TypeError, match="object of type 'InvalidLength' has no len()" ): item_set = gb.ItemSet(InvalidLength()) # Tuple of iterables with invalid length. with pytest.raises( TypeError, match="object of type 'InvalidLength' has no len()" ): item_set = gb.ItemSet((InvalidLength(), InvalidLength())) def test_ItemSet_seed_nodes(): # Node IDs with tensor. item_set = gb.ItemSet(torch.arange(0, 5), names="seeds") assert item_set.names == ("seeds",) # Iterating over ItemSet and indexing one by one. for i, item in enumerate(item_set): assert i == item.item() assert i == item_set[i] # Indexing with a slice. assert torch.equal(item_set[::2], torch.tensor([0, 2, 4])) # Indexing with an Iterable. assert torch.equal(item_set[torch.arange(0, 5)], torch.arange(0, 5)) # Node IDs with single integer. item_set = gb.ItemSet(5, names="seeds") assert item_set.names == ("seeds",) # Iterating over ItemSet and indexing one by one. for i, item in enumerate(item_set): assert i == item.item() assert i == item_set[i] # Indexing with a slice. assert torch.equal(item_set[::2], torch.tensor([0, 2, 4])) assert torch.equal(item_set[torch.arange(0, 5)], torch.arange(0, 5)) # Indexing with an integer. assert item_set[0] == 0 assert item_set[-1] == 4 # Indexing that is out of range. with pytest.raises(IndexError, match="ItemSet index out of range."): _ = item_set[5] with pytest.raises(IndexError, match="ItemSet index out of range."): _ = item_set[-10] # Indexing with invalid input type. with pytest.raises( TypeError, match="ItemSet indices must be int, slice, or torch.Tensor, not .", ): _ = item_set[1.5] def test_ItemSet_seed_nodes_labels(): # Node IDs and labels. seed_nodes = torch.arange(0, 5) labels = torch.randint(0, 3, (5,)) item_set = gb.ItemSet((seed_nodes, labels), names=("seeds", "labels")) assert item_set.names == ("seeds", "labels") # Iterating over ItemSet and indexing one by one. for i, (seed_node, label) in enumerate(item_set): assert seed_node == seed_nodes[i] assert label == labels[i] assert seed_node == item_set[i][0] assert label == item_set[i][1] # Indexing with a slice. assert torch.equal(item_set[:][0], seed_nodes) assert torch.equal(item_set[:][1], labels) # Indexing with an Iterable. assert torch.equal(item_set[torch.arange(0, 5)][0], seed_nodes) assert torch.equal(item_set[torch.arange(0, 5)][1], labels) def test_ItemSet_node_pairs(): # Node pairs. node_pairs = torch.arange(0, 10).reshape(-1, 2) item_set = gb.ItemSet(node_pairs, names="seeds") assert item_set.names == ("seeds",) # Iterating over ItemSet and indexing one by one. for i, (src, dst) in enumerate(item_set): assert node_pairs[i][0] == src assert node_pairs[i][1] == dst assert node_pairs[i][0] == item_set[i][0] assert node_pairs[i][1] == item_set[i][1] # Indexing with a slice. assert torch.equal(item_set[:], node_pairs) # Indexing with an Iterable. assert torch.equal(item_set[torch.arange(0, 5)], node_pairs) def test_ItemSet_node_pairs_labels(): # Node pairs and labels node_pairs = torch.arange(0, 10).reshape(-1, 2) labels = torch.randint(0, 3, (5,)) item_set = gb.ItemSet((node_pairs, labels), names=("seeds", "labels")) assert item_set.names == ("seeds", "labels") # Iterating over ItemSet and indexing one by one. for i, (node_pair, label) in enumerate(item_set): assert torch.equal(node_pairs[i], node_pair) assert labels[i] == label assert torch.equal(node_pairs[i], item_set[i][0]) assert labels[i] == item_set[i][1] # Indexing with a slice. assert torch.equal(item_set[:][0], node_pairs) assert torch.equal(item_set[:][1], labels) # Indexing with an Iterable. assert torch.equal(item_set[torch.arange(0, 5)][0], node_pairs) assert torch.equal(item_set[torch.arange(0, 5)][1], labels) def test_ItemSet_node_pairs_labels_indexes(): # Node pairs and negative destinations. node_pairs = torch.arange(0, 10).reshape(-1, 2) labels = torch.tensor([1, 1, 0, 0, 0]) indexes = torch.tensor([0, 1, 0, 0, 1]) item_set = gb.ItemSet( (node_pairs, labels, indexes), names=("seeds", "labels", "indexes") ) assert item_set.names == ("seeds", "labels", "indexes") # Iterating over ItemSet and indexing one by one. for i, (node_pair, label, index) in enumerate(item_set): assert torch.equal(node_pairs[i], node_pair) assert torch.equal(labels[i], label) assert torch.equal(indexes[i], index) assert torch.equal(node_pairs[i], item_set[i][0]) assert torch.equal(labels[i], item_set[i][1]) assert torch.equal(indexes[i], item_set[i][2]) # Indexing with a slice. assert torch.equal(item_set[:][0], node_pairs) assert torch.equal(item_set[:][1], labels) assert torch.equal(item_set[:][2], indexes) # Indexing with an Iterable. assert torch.equal(item_set[torch.arange(0, 5)][0], node_pairs) assert torch.equal(item_set[torch.arange(0, 5)][1], labels) assert torch.equal(item_set[torch.arange(0, 5)][2], indexes) def test_ItemSet_graphs(): # Graphs. graphs = [dgl.rand_graph(10, 20) for _ in range(5)] item_set = gb.ItemSet(graphs) assert item_set.names is None # Iterating over ItemSet and indexing one by one. for i, item in enumerate(item_set): assert graphs[i] == item assert graphs[i] == item_set[i] # Indexing with a slice. assert item_set[:] == graphs def test_HeteroItemSet_names(): # HeteroItemSet with single name. item_set = gb.HeteroItemSet( { "user": gb.ItemSet(torch.arange(0, 5), names="seeds"), "item": gb.ItemSet(torch.arange(5, 10), names="seeds"), } ) assert item_set.names == ("seeds",) # HeteroItemSet with multiple names. item_set = gb.HeteroItemSet( { "user": gb.ItemSet( (torch.arange(0, 5), torch.arange(5, 10)), names=("seeds", "labels"), ), "item": gb.ItemSet( (torch.arange(5, 10), torch.arange(10, 15)), names=("seeds", "labels"), ), } ) assert item_set.names == ("seeds", "labels") # HeteroItemSet with no name. item_set = gb.HeteroItemSet( { "user": gb.ItemSet(torch.arange(0, 5)), "item": gb.ItemSet(torch.arange(5, 10)), } ) assert item_set.names is None # HeteroItemSet with mismatched items and names. with pytest.raises( AssertionError, match=re.escape("All itemsets must have the same names."), ): _ = gb.HeteroItemSet( { "user": gb.ItemSet( (torch.arange(0, 5), torch.arange(5, 10)), names=("seeds", "labels"), ), "item": gb.ItemSet((torch.arange(5, 10),), names=("seeds",)), } ) def test_HeteroItemSet_length(): # Single iterable with valid length. user_ids = torch.arange(0, 5) item_ids = torch.arange(0, 5) item_set = gb.HeteroItemSet( { "user": gb.ItemSet(user_ids), "item": gb.ItemSet(item_ids), } ) assert len(item_set) == len(user_ids) + len(item_ids) # Tuple of iterables with valid length. node_pairs_like = torch.arange(0, 10).reshape(-1, 2) neg_dsts_like = torch.arange(10, 20).reshape(-1, 2) node_pairs_follow = torch.arange(0, 10).reshape(-1, 2) neg_dsts_follow = torch.arange(10, 20).reshape(-1, 2) item_set = gb.HeteroItemSet( { "user:like:item": gb.ItemSet((node_pairs_like, neg_dsts_like)), "user:follow:user": gb.ItemSet( (node_pairs_follow, neg_dsts_follow) ), } ) assert len(item_set) == node_pairs_like.size(0) + node_pairs_follow.size(0) class InvalidLength: def __iter__(self): return iter([0, 1, 2]) # Single iterable with invalid length. with pytest.raises( TypeError, match="object of type 'InvalidLength' has no len()" ): item_set = gb.HeteroItemSet( { "user": gb.ItemSet(InvalidLength()), "item": gb.ItemSet(InvalidLength()), } ) # Tuple of iterables with invalid length. with pytest.raises( TypeError, match="object of type 'InvalidLength' has no len()" ): item_set = gb.HeteroItemSet( { "user:like:item": gb.ItemSet( (InvalidLength(), InvalidLength()) ), "user:follow:user": gb.ItemSet( (InvalidLength(), InvalidLength()) ), } ) def test_HeteroItemSet_iteration_seed_nodes(): # Node IDs. user_ids = torch.arange(0, 5) item_ids = torch.arange(5, 10) ids = { "user": gb.ItemSet(user_ids, names="seeds"), "item": gb.ItemSet(item_ids, names="seeds"), } chained_ids = [] for key, value in ids.items(): chained_ids += [(key, v) for v in value] item_set = gb.HeteroItemSet(ids) assert item_set.names == ("seeds",) # Iterating over HeteroItemSet and indexing one by one. for i, item in enumerate(item_set): assert len(item) == 1 assert isinstance(item, dict) assert chained_ids[i][0] in item assert item[chained_ids[i][0]] == chained_ids[i][1] assert item_set[i] == item assert item_set[i - len(item_set)] == item # Indexing all with a slice. assert torch.equal(item_set[:]["user"], user_ids) assert torch.equal(item_set[:]["item"], item_ids) # Indexing partial with a slice. partial_data = item_set[:3] assert len(list(partial_data.keys())) == 1 assert torch.equal(partial_data["user"], user_ids[:3]) partial_data = item_set[7:] assert len(list(partial_data.keys())) == 1 assert torch.equal(partial_data["item"], item_ids[2:]) partial_data = item_set[3:8:2] assert len(list(partial_data.keys())) == 2 assert torch.equal(partial_data["user"], user_ids[3:-1:2]) assert torch.equal(partial_data["item"], item_ids[0:3:2]) # Indexing with an iterable of int. partial_data = item_set[torch.tensor([1, 0, 4])] assert len(list(partial_data.keys())) == 1 assert torch.equal(partial_data["user"], torch.tensor([1, 0, 4])) partial_data = item_set[torch.tensor([9, 8, 5])] assert len(list(partial_data.keys())) == 1 assert torch.equal(partial_data["item"], torch.tensor([9, 8, 5])) partial_data = item_set[torch.tensor([8, 1, 0, 9, 7, 5])] assert len(list(partial_data.keys())) == 2 assert torch.equal(partial_data["user"], torch.tensor([1, 0])) assert torch.equal(partial_data["item"], torch.tensor([8, 9, 7, 5])) # Exception cases. with pytest.raises( AssertionError, match="Start must be smaller than stop." ): _ = item_set[5:3] with pytest.raises( AssertionError, match="Start must be smaller than stop." ): _ = item_set[-1:3] with pytest.raises(IndexError, match="HeteroItemSet index out of range."): _ = item_set[20] with pytest.raises(IndexError, match="HeteroItemSet index out of range."): _ = item_set[-20] with pytest.raises( TypeError, match="HeteroItemSet indices must be int, slice, or iterable of int, not .", ): _ = item_set[1.5] def test_HeteroItemSet_iteration_seed_nodes_labels(): # Node IDs and labels. user_ids = torch.arange(0, 5) user_labels = torch.randint(0, 3, (5,)) item_ids = torch.arange(5, 10) item_labels = torch.randint(0, 3, (5,)) ids_labels = { "user": gb.ItemSet((user_ids, user_labels), names=("seeds", "labels")), "item": gb.ItemSet((item_ids, item_labels), names=("seeds", "labels")), } chained_ids = [] for key, value in ids_labels.items(): chained_ids += [(key, v) for v in value] item_set = gb.HeteroItemSet(ids_labels) assert item_set.names == ("seeds", "labels") # Iterating over HeteroItemSet and indexing one by one. for i, item in enumerate(item_set): assert len(item) == 1 assert isinstance(item, dict) assert chained_ids[i][0] in item assert item[chained_ids[i][0]] == chained_ids[i][1] assert item_set[i] == item # Indexing with a slice. assert torch.equal(item_set[:]["user"][0], user_ids) assert torch.equal(item_set[:]["user"][1], user_labels) assert torch.equal(item_set[:]["item"][0], item_ids) assert torch.equal(item_set[:]["item"][1], item_labels) def test_HeteroItemSet_iteration_node_pairs(): # Node pairs. node_pairs = torch.arange(0, 10).reshape(-1, 2) node_pairs_dict = { "user:like:item": gb.ItemSet(node_pairs, names="seeds"), "user:follow:user": gb.ItemSet(node_pairs, names="seeds"), } expected_data = [] for key, value in node_pairs_dict.items(): expected_data += [(key, v) for v in value] item_set = gb.HeteroItemSet(node_pairs_dict) assert item_set.names == ("seeds",) # Iterating over HeteroItemSet and indexing one by one. for i, item in enumerate(item_set): assert len(item) == 1 assert isinstance(item, dict) assert expected_data[i][0] in item assert torch.equal(item[expected_data[i][0]], expected_data[i][1]) assert item_set[i].keys() == item.keys() key = list(item.keys())[0] assert torch.equal(item_set[i][key], item[key]) # Indexing with a slice. assert torch.equal(item_set[:]["user:like:item"], node_pairs) assert torch.equal(item_set[:]["user:follow:user"], node_pairs) def test_HeteroItemSet_iteration_node_pairs_labels(): # Node pairs and labels node_pairs = torch.arange(0, 10).reshape(-1, 2) labels = torch.randint(0, 3, (5,)) node_pairs_labels = { "user:like:item": gb.ItemSet( (node_pairs, labels), names=("seeds", "labels") ), "user:follow:user": gb.ItemSet( (node_pairs, labels), names=("seeds", "labels") ), } expected_data = [] for key, value in node_pairs_labels.items(): expected_data += [(key, v) for v in value] item_set = gb.HeteroItemSet(node_pairs_labels) assert item_set.names == ("seeds", "labels") # Iterating over HeteroItemSet and indexing one by one. for i, item in enumerate(item_set): assert len(item) == 1 assert isinstance(item, dict) key, value = expected_data[i] assert key in item assert torch.equal(item[key][0], value[0]) assert item[key][1] == value[1] assert item_set[i].keys() == item.keys() key = list(item.keys())[0] assert torch.equal(item_set[i][key][0], item[key][0]) assert torch.equal(item_set[i][key][1], item[key][1]) # Indexing with a slice. assert torch.equal(item_set[:]["user:like:item"][0], node_pairs) assert torch.equal(item_set[:]["user:like:item"][1], labels) assert torch.equal(item_set[:]["user:follow:user"][0], node_pairs) assert torch.equal(item_set[:]["user:follow:user"][1], labels) def test_HeteroItemSet_iteration_node_pairs_labels_indexes(): # Node pairs and negative destinations. node_pairs = torch.arange(0, 10).reshape(-1, 2) labels = torch.tensor([1, 1, 0, 0, 0]) indexes = torch.tensor([0, 1, 0, 0, 1]) node_pairs_neg_dsts = { "user:like:item": gb.ItemSet( (node_pairs, labels, indexes), names=("seeds", "labels", "indexes") ), "user:follow:user": gb.ItemSet( (node_pairs, labels, indexes), names=("seeds", "labels", "indexes") ), } expected_data = [] for key, value in node_pairs_neg_dsts.items(): expected_data += [(key, v) for v in value] item_set = gb.HeteroItemSet(node_pairs_neg_dsts) assert item_set.names == ("seeds", "labels", "indexes") # Iterating over HeteroItemSet and indexing one by one. for i, item in enumerate(item_set): assert len(item) == 1 assert isinstance(item, dict) key, value = expected_data[i] assert key in item assert torch.equal(item[key][0], value[0]) assert torch.equal(item[key][1], value[1]) assert torch.equal(item[key][2], value[2]) assert item_set[i].keys() == item.keys() key = list(item.keys())[0] assert torch.equal(item_set[i][key][0], item[key][0]) assert torch.equal(item_set[i][key][1], item[key][1]) assert torch.equal(item_set[i][key][2], item[key][2]) # Indexing with a slice. assert torch.equal(item_set[:]["user:like:item"][0], node_pairs) assert torch.equal(item_set[:]["user:like:item"][1], labels) assert torch.equal(item_set[:]["user:like:item"][2], indexes) assert torch.equal(item_set[:]["user:follow:user"][0], node_pairs) assert torch.equal(item_set[:]["user:follow:user"][1], labels) assert torch.equal(item_set[:]["user:follow:user"][2], indexes) def test_ItemSet_repr(): # ItemSet with single name. item_set = gb.ItemSet(torch.arange(0, 5), names="seeds") expected_str = ( "ItemSet(\n" " items=(tensor([0, 1, 2, 3, 4]),),\n" " names=('seeds',),\n" ")" ) assert str(item_set) == expected_str, item_set # ItemSet with multiple names. item_set = gb.ItemSet( (torch.arange(0, 5), torch.arange(5, 10)), names=("seeds", "labels"), ) expected_str = ( "ItemSet(\n" " items=(tensor([0, 1, 2, 3, 4]), tensor([5, 6, 7, 8, 9])),\n" " names=('seeds', 'labels'),\n" ")" ) assert str(item_set) == expected_str, item_set def test_HeteroItemSet_repr(): # HeteroItemSet with single name. item_set = gb.HeteroItemSet( { "user": gb.ItemSet(torch.arange(0, 5), names="seeds"), "item": gb.ItemSet(torch.arange(5, 10), names="seeds"), } ) expected_str = ( "HeteroItemSet(\n" " itemsets={'user': ItemSet(\n" " items=(tensor([0, 1, 2, 3, 4]),),\n" " names=('seeds',),\n" " ), 'item': ItemSet(\n" " items=(tensor([5, 6, 7, 8, 9]),),\n" " names=('seeds',),\n" " )},\n" " names=('seeds',),\n" ")" ) assert str(item_set) == expected_str, item_set # HeteroItemSet with multiple names. item_set = gb.HeteroItemSet( { "user": gb.ItemSet( (torch.arange(0, 5), torch.arange(5, 10)), names=("seeds", "labels"), ), "item": gb.ItemSet( (torch.arange(5, 10), torch.arange(10, 15)), names=("seeds", "labels"), ), } ) expected_str = ( "HeteroItemSet(\n" " itemsets={'user': ItemSet(\n" " items=(tensor([0, 1, 2, 3, 4]), tensor([5, 6, 7, 8, 9])),\n" " names=('seeds', 'labels'),\n" " ), 'item': ItemSet(\n" " items=(tensor([5, 6, 7, 8, 9]), tensor([10, 11, 12, 13, 14])),\n" " names=('seeds', 'labels'),\n" " )},\n" " names=('seeds', 'labels'),\n" ")" ) assert str(item_set) == expected_str, item_set def test_deprecation_alias(): """Test `ItemSetDict` as the alias for `HeteroItemSet`.""" user_ids = torch.arange(0, 5) item_ids = torch.arange(5, 10) ids = { "user": gb.ItemSet(user_ids, names="seeds"), "item": gb.ItemSet(item_ids, names="seeds"), } with pytest.warns( DeprecationWarning, match="ItemSetDict is deprecated and will be removed in the future. Please use HeteroItemSet instead.", ): item_set_dict = gb.ItemSetDict(ids) hetero_item_set = gb.HeteroItemSet(ids) assert len(item_set_dict) == len(hetero_item_set) assert item_set_dict.names == hetero_item_set.names assert item_set_dict._keys == hetero_item_set._keys assert torch.equal(item_set_dict._offsets, hetero_item_set._offsets) assert ( repr(item_set_dict)[len("ItemSetDict") :] == repr(hetero_item_set)[len("HeteroItemSet") :] ) # Indexing all with a slice. assert torch.equal(item_set_dict[:]["user"], hetero_item_set[:]["user"]) assert torch.equal(item_set_dict[:]["item"], hetero_item_set[:]["item"]) # Indexing partial with a slice. partial_data = item_set_dict[:3] assert len(list(partial_data.keys())) == 1 assert torch.equal(partial_data["user"], hetero_item_set[:3]["user"]) partial_data = item_set_dict[7:] assert len(list(partial_data.keys())) == 1 assert torch.equal(partial_data["item"], hetero_item_set[7:]["item"]) partial_data = item_set_dict[3:8:2] assert len(list(partial_data.keys())) == 2 assert torch.equal(partial_data["user"], hetero_item_set[3:8:2]["user"]) assert torch.equal(partial_data["item"], hetero_item_set[3:8:2]["item"]) # Indexing with an iterable of int. partial_data = item_set_dict[torch.tensor([1, 0, 4])] assert len(list(partial_data.keys())) == 1 assert torch.equal(partial_data["user"], hetero_item_set[1, 0, 4]["user"]) partial_data = item_set_dict[torch.tensor([9, 8, 5])] assert len(list(partial_data.keys())) == 1 assert torch.equal(partial_data["item"], hetero_item_set[9, 8, 5]["item"]) partial_data = item_set_dict[torch.tensor([8, 1, 0, 9, 7, 5])] assert len(list(partial_data.keys())) == 2 assert torch.equal(partial_data["user"], hetero_item_set[1, 0]["user"]) assert torch.equal( partial_data["item"], hetero_item_set[8, 9, 7, 5]["item"] ) ================================================ FILE: tests/python/pytorch/graphbolt/test_minibatch.py ================================================ import dgl import dgl.graphbolt as gb import pytest import torch relation = "A:r:B" reverse_relation = "B:rr:A" @pytest.mark.parametrize("indptr_dtype", [torch.int32, torch.int64]) @pytest.mark.parametrize("indices_dtype", [torch.int32, torch.int64]) def test_minibatch_representation_homo(indptr_dtype, indices_dtype): seeds = torch.tensor([10, 11]) csc_formats = [ gb.CSCFormatBase( indptr=torch.tensor([0, 1, 3, 5, 6], dtype=indptr_dtype), indices=torch.tensor([0, 1, 2, 2, 1, 2], dtype=indices_dtype), ), gb.CSCFormatBase( indptr=torch.tensor([0, 2, 3], dtype=indptr_dtype), indices=torch.tensor([1, 2, 0], dtype=indices_dtype), ), ] original_column_node_ids = [ torch.tensor([10, 11, 12, 13]), torch.tensor([10, 11]), ] original_row_node_ids = [ torch.tensor([10, 11, 12, 13]), torch.tensor([10, 11, 12]), ] original_edge_ids = [ torch.tensor([19, 20, 21, 22, 25, 30]), torch.tensor([10, 15, 17]), ] node_features = {"x": torch.tensor([5, 0, 2, 1])} edge_features = [ {"x": torch.tensor([9, 0, 1, 1, 7, 4])}, {"x": torch.tensor([0, 2, 2])}, ] subgraphs = [] for i in range(2): subgraphs.append( gb.SampledSubgraphImpl( sampled_csc=csc_formats[i], original_column_node_ids=original_column_node_ids[i], original_row_node_ids=original_row_node_ids[i], original_edge_ids=original_edge_ids[i], ) ) input_nodes = torch.tensor([8, 1, 6, 5, 9, 0, 2, 4]) compacted_seeds = torch.tensor([0, 1]) labels = torch.tensor([1.0, 2.0]) # Test minibatch without data. minibatch = gb.MiniBatch() expect_result = str( """MiniBatch(seeds=None, sampled_subgraphs=None, node_features=None, labels=None, input_nodes=None, indexes=None, edge_features=None, compacted_seeds=None, blocks=None, )""" ) result = str(minibatch) assert result == expect_result, print(expect_result, result) # Test minibatch with all attributes. minibatch = gb.MiniBatch( seeds=seeds, sampled_subgraphs=subgraphs, labels=labels, node_features=node_features, edge_features=edge_features, compacted_seeds=compacted_seeds, input_nodes=input_nodes, ) expect_result = str( """MiniBatch(seeds=tensor([10, 11]), sampled_subgraphs=[SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 1, 3, 5, 6], dtype=torch.int32), indices=tensor([0, 1, 2, 2, 1, 2], dtype=torch.int32), ), original_row_node_ids=tensor([10, 11, 12, 13]), original_edge_ids=tensor([19, 20, 21, 22, 25, 30]), original_column_node_ids=tensor([10, 11, 12, 13]), ), SampledSubgraphImpl(sampled_csc=CSCFormatBase(indptr=tensor([0, 2, 3], dtype=torch.int32), indices=tensor([1, 2, 0], dtype=torch.int32), ), original_row_node_ids=tensor([10, 11, 12]), original_edge_ids=tensor([10, 15, 17]), original_column_node_ids=tensor([10, 11]), )], node_features={'x': tensor([5, 0, 2, 1])}, labels=tensor([1., 2.]), input_nodes=tensor([8, 1, 6, 5, 9, 0, 2, 4]), indexes=None, edge_features=[{'x': tensor([9, 0, 1, 1, 7, 4])}, {'x': tensor([0, 2, 2])}], compacted_seeds=tensor([0, 1]), blocks=[Block(num_src_nodes=4, num_dst_nodes=4, num_edges=6), Block(num_src_nodes=3, num_dst_nodes=2, num_edges=3)], )""" ) result = str(minibatch) assert result == expect_result, print(expect_result, result) @pytest.mark.parametrize("indptr_dtype", [torch.int32, torch.int64]) @pytest.mark.parametrize("indices_dtype", [torch.int32, torch.int64]) def test_minibatch_representation_hetero(indptr_dtype, indices_dtype): seeds = {relation: torch.tensor([10, 11])} csc_formats = [ { relation: gb.CSCFormatBase( indptr=torch.tensor([0, 1, 2, 3], dtype=indptr_dtype), indices=torch.tensor([0, 1, 1], dtype=indices_dtype), ), reverse_relation: gb.CSCFormatBase( indptr=torch.tensor([0, 0, 0, 1, 2], dtype=indptr_dtype), indices=torch.tensor([1, 0], dtype=indices_dtype), ), }, { relation: gb.CSCFormatBase( indptr=torch.tensor([0, 1, 2], dtype=indptr_dtype), indices=torch.tensor([1, 0], dtype=indices_dtype), ), reverse_relation: gb.CSCFormatBase( indptr=torch.tensor([0, 2], dtype=indptr_dtype), indices=torch.tensor([1, 0], dtype=indices_dtype), ), }, ] original_column_node_ids = [ {"B": torch.tensor([10, 11, 12]), "A": torch.tensor([5, 7, 9, 11])}, {"B": torch.tensor([10, 11]), "A": torch.tensor([5])}, ] original_row_node_ids = [ { "A": torch.tensor([5, 7, 9, 11]), "B": torch.tensor([10, 11, 12]), }, { "A": torch.tensor([5, 7]), "B": torch.tensor([10, 11]), }, ] original_edge_ids = [ { relation: torch.tensor([19, 20, 21]), reverse_relation: torch.tensor([23, 26]), }, {relation: torch.tensor([10, 12])}, ] node_features = { ("A", "x"): torch.tensor([6, 4, 0, 1]), } edge_features = [ {(relation, "x"): torch.tensor([4, 2, 4])}, {(relation, "x"): torch.tensor([0, 6])}, ] subgraphs = [] for i in range(2): subgraphs.append( gb.SampledSubgraphImpl( sampled_csc=csc_formats[i], original_column_node_ids=original_column_node_ids[i], original_row_node_ids=original_row_node_ids[i], original_edge_ids=original_edge_ids[i], ) ) compacted_seeds = {relation: torch.tensor([0, 1])} # Test minibatch with all attributes. minibatch = gb.MiniBatch( seeds=seeds, sampled_subgraphs=subgraphs, node_features=node_features, edge_features=edge_features, labels={"B": torch.tensor([2, 5])}, compacted_seeds=compacted_seeds, input_nodes={ "A": torch.tensor([5, 7, 9, 11]), "B": torch.tensor([10, 11, 12]), }, ) expect_result = str( """MiniBatch(seeds={'A:r:B': tensor([10, 11])}, sampled_subgraphs=[SampledSubgraphImpl(sampled_csc={'A:r:B': CSCFormatBase(indptr=tensor([0, 1, 2, 3], dtype=torch.int32), indices=tensor([0, 1, 1], dtype=torch.int32), ), 'B:rr:A': CSCFormatBase(indptr=tensor([0, 0, 0, 1, 2], dtype=torch.int32), indices=tensor([1, 0], dtype=torch.int32), )}, original_row_node_ids={'A': tensor([ 5, 7, 9, 11]), 'B': tensor([10, 11, 12])}, original_edge_ids={'A:r:B': tensor([19, 20, 21]), 'B:rr:A': tensor([23, 26])}, original_column_node_ids={'B': tensor([10, 11, 12]), 'A': tensor([ 5, 7, 9, 11])}, ), SampledSubgraphImpl(sampled_csc={'A:r:B': CSCFormatBase(indptr=tensor([0, 1, 2], dtype=torch.int32), indices=tensor([1, 0], dtype=torch.int32), ), 'B:rr:A': CSCFormatBase(indptr=tensor([0, 2], dtype=torch.int32), indices=tensor([1, 0], dtype=torch.int32), )}, original_row_node_ids={'A': tensor([5, 7]), 'B': tensor([10, 11])}, original_edge_ids={'A:r:B': tensor([10, 12])}, original_column_node_ids={'B': tensor([10, 11]), 'A': tensor([5])}, )], node_features={('A', 'x'): tensor([6, 4, 0, 1])}, labels={'B': tensor([2, 5])}, input_nodes={'A': tensor([ 5, 7, 9, 11]), 'B': tensor([10, 11, 12])}, indexes=None, edge_features=[{('A:r:B', 'x'): tensor([4, 2, 4])}, {('A:r:B', 'x'): tensor([0, 6])}], compacted_seeds={'A:r:B': tensor([0, 1])}, blocks=[Block(num_src_nodes={'A': 4, 'B': 3}, num_dst_nodes={'A': 4, 'B': 3}, num_edges={('A', 'r', 'B'): 3, ('B', 'rr', 'A'): 2}, metagraph=[('A', 'B', 'r'), ('B', 'A', 'rr')]), Block(num_src_nodes={'A': 2, 'B': 2}, num_dst_nodes={'A': 1, 'B': 2}, num_edges={('A', 'r', 'B'): 2, ('B', 'rr', 'A'): 2}, metagraph=[('A', 'B', 'r'), ('B', 'A', 'rr')])], )""" ) result = str(minibatch) assert result == expect_result, print(result) @pytest.mark.parametrize("indptr_dtype", [torch.int32, torch.int64]) @pytest.mark.parametrize("indices_dtype", [torch.int32, torch.int64]) def test_get_dgl_blocks_homo(indptr_dtype, indices_dtype): csc_formats = [ gb.CSCFormatBase( indptr=torch.tensor([0, 1, 3, 5, 6], dtype=indptr_dtype), indices=torch.tensor([0, 1, 2, 2, 1, 2], dtype=indices_dtype), ), gb.CSCFormatBase( indptr=torch.tensor([0, 1, 3], dtype=indptr_dtype), indices=torch.tensor([0, 1, 2], dtype=indices_dtype), ), ] original_column_node_ids = [ torch.tensor([10, 11, 12, 13]), torch.tensor([10, 11]), ] original_row_node_ids = [ torch.tensor([10, 11, 12, 13]), torch.tensor([10, 11, 12]), ] original_edge_ids = [ torch.tensor([19, 20, 21, 22, 25, 30]), torch.tensor([10, 15, 17]), ] subgraphs = [] for i in range(2): subgraphs.append( gb.SampledSubgraphImpl( sampled_csc=csc_formats[i], original_column_node_ids=original_column_node_ids[i], original_row_node_ids=original_row_node_ids[i], original_edge_ids=original_edge_ids[i], ) ) # Test minibatch with all attributes. minibatch = gb.MiniBatch( sampled_subgraphs=subgraphs, ) dgl_blocks = minibatch.blocks expect_result = str( """[Block(num_src_nodes=4, num_dst_nodes=4, num_edges=6), Block(num_src_nodes=3, num_dst_nodes=2, num_edges=3)]""" ) result = str(dgl_blocks) assert result == expect_result def test_get_dgl_blocks_hetero(): csc_formats = [ { relation: gb.CSCFormatBase( indptr=torch.tensor([0, 1, 2, 3]), indices=torch.tensor([0, 1, 1]), ), reverse_relation: gb.CSCFormatBase( indptr=torch.tensor([0, 0, 0, 1, 2]), indices=torch.tensor([1, 0]), ), }, { relation: gb.CSCFormatBase( indptr=torch.tensor([0, 1, 2]), indices=torch.tensor([1, 0]) ), reverse_relation: gb.CSCFormatBase( indptr=torch.tensor([0, 1]), indices=torch.tensor([1]), ), }, ] original_column_node_ids = [ {"B": torch.tensor([10, 11, 12]), "A": torch.tensor([5, 7, 9, 11])}, {"B": torch.tensor([10, 11]), "A": torch.tensor([5])}, ] original_row_node_ids = [ { "A": torch.tensor([5, 7, 9, 11]), "B": torch.tensor([10, 11, 12]), }, { "A": torch.tensor([5, 7]), "B": torch.tensor([10, 11]), }, ] original_edge_ids = [ { relation: torch.tensor([19, 20, 21]), reverse_relation: torch.tensor([23, 26]), }, {relation: torch.tensor([10, 12])}, ] subgraphs = [] for i in range(2): subgraphs.append( gb.SampledSubgraphImpl( sampled_csc=csc_formats[i], original_column_node_ids=original_column_node_ids[i], original_row_node_ids=original_row_node_ids[i], original_edge_ids=original_edge_ids[i], ) ) # Test minibatch with all attributes. minibatch = gb.MiniBatch( sampled_subgraphs=subgraphs, ) dgl_blocks = minibatch.blocks expect_result = str( """[Block(num_src_nodes={'A': 4, 'B': 3}, num_dst_nodes={'A': 4, 'B': 3}, num_edges={('A', 'r', 'B'): 3, ('B', 'rr', 'A'): 2}, metagraph=[('A', 'B', 'r'), ('B', 'A', 'rr')]), Block(num_src_nodes={'A': 2, 'B': 2}, num_dst_nodes={'A': 1, 'B': 2}, num_edges={('A', 'r', 'B'): 2, ('B', 'rr', 'A'): 1}, metagraph=[('A', 'B', 'r'), ('B', 'A', 'rr')])]""" ) result = str(dgl_blocks) assert result == expect_result def test_get_dgl_blocks_hetero_partial_empty_edges(): hg = dgl.heterograph( { ("n1", "e1", "n1"): ([0, 1, 1], [1, 2, 0]), ("n1", "e2", "n2"): ([0, 1, 2], [1, 0, 2]), } ) gb_g = gb.from_dglgraph(hg, is_homogeneous=False) train_set = gb.HeteroItemSet( {"n1:e2:n2": gb.ItemSet(torch.LongTensor([[0, 1]]), names="seeds")} ) datapipe = gb.ItemSampler(train_set, batch_size=1) datapipe = datapipe.sample_neighbor(gb_g, fanouts=[-1, -1]) dataloader = gb.DataLoader(datapipe) blocks_str = str(next(iter(dataloader)).blocks) expected_str = """[Block(num_src_nodes={'n1': 2, 'n2': 0}, num_dst_nodes={'n1': 2, 'n2': 0}, num_edges={('n1', 'e1', 'n1'): 2, ('n1', 'e2', 'n2'): 0}, metagraph=[('n1', 'n1', 'e1'), ('n1', 'n2', 'e2')]), Block(num_src_nodes={'n1': 2, 'n2': 0}, num_dst_nodes={'n1': 1, 'n2': 1}, num_edges={('n1', 'e1', 'n1'): 1, ('n1', 'e2', 'n2'): 1}, metagraph=[('n1', 'n1', 'e1'), ('n1', 'n2', 'e2')])]""" assert expected_str == blocks_str def test_get_dgl_blocks_hetero_empty_edges(): hg = dgl.heterograph( { ("n3", "e1", "n1"): ([0, 1, 1], [1, 2, 0]), ("n3", "e2", "n2"): ([0, 1, 2], [1, 0, 2]), } ) gb_g = gb.from_dglgraph(hg, is_homogeneous=False) train_set = gb.HeteroItemSet( {"n3:e1:n1": gb.ItemSet(torch.LongTensor([[2, 1]]), names="seeds")} ) datapipe = gb.ItemSampler(train_set, batch_size=1) datapipe = datapipe.sample_neighbor(gb_g, fanouts=[-1, -1]) dataloader = gb.DataLoader(datapipe) blocks_str = str(next(iter(dataloader)).blocks) expected_str = """[Block(num_src_nodes={'n1': 0, 'n2': 0, 'n3': 2}, num_dst_nodes={'n1': 0, 'n2': 0, 'n3': 2}, num_edges={('n3', 'e1', 'n1'): 0, ('n3', 'e2', 'n2'): 0}, metagraph=[('n3', 'n1', 'e1'), ('n3', 'n2', 'e2')]), Block(num_src_nodes={'n1': 0, 'n2': 0, 'n3': 2}, num_dst_nodes={'n1': 1, 'n2': 0, 'n3': 1}, num_edges={('n3', 'e1', 'n1'): 1, ('n3', 'e2', 'n2'): 0}, metagraph=[('n3', 'n1', 'e1'), ('n3', 'n2', 'e2')])]""" assert expected_str == blocks_str def test_get_dgl_blocks_homo_empty_edges(): g = dgl.graph(([2, 3, 4], [3, 4, 5])) gb_g = gb.from_dglgraph(g, is_homogeneous=True) train_set = gb.ItemSet(torch.LongTensor([[0, 1]]), names="seeds") datapipe = gb.ItemSampler(train_set, batch_size=1) datapipe = datapipe.sample_neighbor(gb_g, fanouts=[-1, -1]) dataloader = gb.DataLoader(datapipe) blocks_str = str(next(iter(dataloader)).blocks) expected_str = "[Block(num_src_nodes=2, num_dst_nodes=2, num_edges=0), Block(num_src_nodes=2, num_dst_nodes=2, num_edges=0)]" assert expected_str == blocks_str def test_seeds_ntype_being_passed(): hg = dgl.heterograph({("n1", "e1", "n2"): ([0, 1, 2], [2, 0, 1])}) gb_g = gb.from_dglgraph(hg, is_homogeneous=False) train_set = gb.HeteroItemSet( {"n2": gb.ItemSet(torch.LongTensor([0, 1]), names="seeds")} ) datapipe = gb.ItemSampler(train_set, batch_size=2) datapipe = datapipe.sample_neighbor(gb_g, [-1, -1, -1]) dataloader = gb.DataLoader(datapipe) blocks = next(iter(dataloader)).blocks for block in blocks: assert "n2" in block.srctypes def create_homo_minibatch(): csc_formats = [ gb.CSCFormatBase( indptr=torch.tensor([0, 1, 3, 5, 6]), indices=torch.tensor([0, 1, 2, 2, 1, 2]), ), gb.CSCFormatBase( indptr=torch.tensor([0, 2, 3]), indices=torch.tensor([1, 2, 0]), ), ] original_column_node_ids = [ torch.tensor([10, 11, 12, 13]), torch.tensor([10, 11]), ] original_row_node_ids = [ torch.tensor([10, 11, 12, 13]), torch.tensor([10, 11, 12]), ] original_edge_ids = [ torch.tensor([19, 20, 21, 22, 25, 30]), torch.tensor([10, 15, 17]), ] node_features = {"x": torch.randint(0, 10, (4,))} edge_features = [ {"x": torch.randint(0, 10, (6,))}, {"x": torch.randint(0, 10, (3,))}, ] subgraphs = [] for i in range(2): subgraphs.append( gb.SampledSubgraphImpl( sampled_csc=csc_formats[i], original_column_node_ids=original_column_node_ids[i], original_row_node_ids=original_row_node_ids[i], original_edge_ids=original_edge_ids[i], ) ) return gb.MiniBatch( sampled_subgraphs=subgraphs, node_features=node_features, edge_features=edge_features, input_nodes=torch.tensor([10, 11, 12, 13]), ) def create_hetero_minibatch(): sampled_csc = [ { relation: gb.CSCFormatBase( indptr=torch.tensor([0, 1, 2, 3]), indices=torch.tensor([0, 1, 1]), ), reverse_relation: gb.CSCFormatBase( indptr=torch.tensor([0, 0, 0, 1, 2]), indices=torch.tensor([1, 0]), ), }, { relation: gb.CSCFormatBase( indptr=torch.tensor([0, 1, 2]), indices=torch.tensor([1, 0]) ) }, ] original_column_node_ids = [ {"B": torch.tensor([10, 11, 12]), "A": torch.tensor([5, 7, 9, 11])}, {"B": torch.tensor([10, 11])}, ] original_row_node_ids = [ { "A": torch.tensor([5, 7, 9, 11]), "B": torch.tensor([10, 11, 12]), }, { "A": torch.tensor([5, 7]), "B": torch.tensor([10, 11]), }, ] original_edge_ids = [ { relation: torch.tensor([19, 20, 21]), reverse_relation: torch.tensor([23, 26]), }, {relation: torch.tensor([10, 12])}, ] node_features = { ("A", "x"): torch.randint(0, 10, (4,)), } edge_features = [ {(relation, "x"): torch.randint(0, 10, (3,))}, {(relation, "x"): torch.randint(0, 10, (2,))}, ] subgraphs = [] for i in range(2): subgraphs.append( gb.SampledSubgraphImpl( sampled_csc=sampled_csc[i], original_column_node_ids=original_column_node_ids[i], original_row_node_ids=original_row_node_ids[i], original_edge_ids=original_edge_ids[i], ) ) return gb.MiniBatch( sampled_subgraphs=subgraphs, node_features=node_features, edge_features=edge_features, input_nodes={ "A": torch.tensor([5, 7, 9, 11]), "B": torch.tensor([10, 11, 12]), }, ) def check_dgl_blocks_hetero(minibatch, blocks): etype = gb.etype_str_to_tuple(relation) sampled_csc = [ subgraph.sampled_csc for subgraph in minibatch.sampled_subgraphs ] original_edge_ids = [ subgraph.original_edge_ids for subgraph in minibatch.sampled_subgraphs ] original_row_node_ids = [ subgraph.original_row_node_ids for subgraph in minibatch.sampled_subgraphs ] for i, block in enumerate(blocks): edges = block.edges(etype=etype) dst_ndoes = torch.arange( 0, len(sampled_csc[i][relation].indptr) - 1 ).repeat_interleave(sampled_csc[i][relation].indptr.diff()) assert torch.equal(edges[0], sampled_csc[i][relation].indices) assert torch.equal(edges[1], dst_ndoes) assert torch.equal( block.edges[etype].data[dgl.EID], original_edge_ids[i][relation] ) edges = blocks[0].edges(etype=gb.etype_str_to_tuple(reverse_relation)) dst_ndoes = torch.arange( 0, len(sampled_csc[0][reverse_relation].indptr) - 1 ).repeat_interleave(sampled_csc[0][reverse_relation].indptr.diff()) assert torch.equal(edges[0], sampled_csc[0][reverse_relation].indices) assert torch.equal(edges[1], dst_ndoes) assert torch.equal( blocks[0].srcdata[dgl.NID]["A"], original_row_node_ids[0]["A"] ) assert torch.equal( blocks[0].srcdata[dgl.NID]["B"], original_row_node_ids[0]["B"] ) def check_dgl_blocks_homo(minibatch, blocks): sampled_csc = [ subgraph.sampled_csc for subgraph in minibatch.sampled_subgraphs ] original_edge_ids = [ subgraph.original_edge_ids for subgraph in minibatch.sampled_subgraphs ] original_row_node_ids = [ subgraph.original_row_node_ids for subgraph in minibatch.sampled_subgraphs ] for i, block in enumerate(blocks): dst_ndoes = torch.arange( 0, len(sampled_csc[i].indptr) - 1 ).repeat_interleave(sampled_csc[i].indptr.diff()) assert torch.equal(block.edges()[0], sampled_csc[i].indices) assert torch.equal(block.edges()[1], dst_ndoes) assert torch.equal(block.edata[dgl.EID], original_edge_ids[i]) assert torch.equal(blocks[0].srcdata[dgl.NID], original_row_node_ids[0]) def test_dgl_node_classification_without_feature(): # Arrange minibatch = create_homo_minibatch() minibatch.node_features = None minibatch.labels = None minibatch.seeds = torch.tensor([10, 15]) # Act dgl_blocks = minibatch.blocks # Assert assert len(dgl_blocks) == 2 assert minibatch.node_features is None assert minibatch.labels is None check_dgl_blocks_homo(minibatch, dgl_blocks) def test_dgl_node_classification_homo(): # Arrange minibatch = create_homo_minibatch() minibatch.seeds = torch.tensor([10, 15]) minibatch.labels = torch.tensor([2, 5]) # Act dgl_blocks = minibatch.blocks # Assert assert len(dgl_blocks) == 2 check_dgl_blocks_homo(minibatch, dgl_blocks) def test_dgl_node_classification_hetero(): minibatch = create_hetero_minibatch() minibatch.labels = {"B": torch.tensor([2, 5])} minibatch.seeds = {"B": torch.tensor([10, 15])} # Act dgl_blocks = minibatch.blocks # Assert assert len(dgl_blocks) == 2 check_dgl_blocks_hetero(minibatch, dgl_blocks) def test_dgl_link_predication_homo(): # Arrange minibatch = create_homo_minibatch() minibatch.compacted_seeds = ( torch.tensor([[0, 1, 0, 0, 1, 1], [1, 0, 1, 1, 0, 0]]).T, ) minibatch.labels = torch.tensor([1, 1, 0, 0, 0, 0]) # Act dgl_blocks = minibatch.blocks # Assert assert len(dgl_blocks) == 2 check_dgl_blocks_homo(minibatch, dgl_blocks) def test_dgl_link_predication_hetero(): # Arrange minibatch = create_hetero_minibatch() minibatch.compacted_seeds = { relation: (torch.tensor([[1, 1, 2, 0, 1, 2], [1, 0, 1, 1, 0, 0]]).T,), reverse_relation: ( torch.tensor([[0, 1, 1, 2, 0, 2], [1, 0, 1, 1, 0, 0]]).T, ), } minibatch.labels = { relation: (torch.tensor([1, 1, 0, 0, 0, 0]),), reverse_relation: (torch.tensor([1, 1, 0, 0, 0, 0]),), } # Act dgl_blocks = minibatch.blocks # Assert assert len(dgl_blocks) == 2 check_dgl_blocks_hetero(minibatch, dgl_blocks) def test_to_pyg_data(): test_minibatch = create_homo_minibatch() test_minibatch.seeds = torch.tensor([0, 1]) test_minibatch.labels = torch.tensor([7, 8]) expected_edge_index = torch.tensor( [[0, 0, 1, 1, 1, 2, 2, 2, 2], [0, 1, 0, 1, 2, 0, 1, 2, 3]] ) expected_node_features = next(iter(test_minibatch.node_features.values())) expected_labels = torch.tensor([7, 8]) expected_batch_size = 2 expected_n_id = torch.tensor([10, 11, 12, 13]) pyg_data = test_minibatch.to_pyg_data() pyg_data.validate() assert torch.equal(pyg_data.edge_index, expected_edge_index) assert torch.equal(pyg_data.x, expected_node_features) assert torch.equal(pyg_data.y, expected_labels) assert pyg_data.batch_size == expected_batch_size assert torch.equal(pyg_data.n_id, expected_n_id) test_minibatch.seeds = torch.tensor([[0, 1], [2, 3]]) assert pyg_data.batch_size == expected_batch_size test_minibatch.seeds = {"A": torch.tensor([0, 1])} assert pyg_data.batch_size == expected_batch_size test_minibatch.seeds = {"A": torch.tensor([[0, 1], [2, 3]])} assert pyg_data.batch_size == expected_batch_size subgraph = test_minibatch.sampled_subgraphs[0] # Test with sampled_csc as None. test_minibatch = gb.MiniBatch( sampled_subgraphs=None, node_features={"feat": expected_node_features}, labels=expected_labels, ) pyg_data = test_minibatch.to_pyg_data() assert pyg_data.edge_index is None, "Edge index should be none." # Test with node_features as None. test_minibatch = gb.MiniBatch( sampled_subgraphs=[subgraph], node_features=None, labels=expected_labels, ) pyg_data = test_minibatch.to_pyg_data() assert pyg_data.x is None, "Node features should be None." # Test with labels as None. test_minibatch = gb.MiniBatch( sampled_subgraphs=[subgraph], node_features={"feat": expected_node_features}, labels=None, ) pyg_data = test_minibatch.to_pyg_data() assert pyg_data.y is None, "Labels should be None." # Test with multiple features. test_minibatch = gb.MiniBatch( sampled_subgraphs=[subgraph], node_features={ "feat": expected_node_features, "extra_feat": torch.tensor([[3], [4]]), }, labels=expected_labels, ) try: pyg_data = test_minibatch.to_pyg_data() assert ( pyg_data.x is None ), "Multiple features case should raise an error." except AssertionError as e: assert ( str(e) == "`to_pyg_data` only supports single feature homogeneous graph." ) ================================================ FILE: tests/python/pytorch/graphbolt/test_subgraph_sampler.py ================================================ import unittest import warnings from enum import Enum from functools import partial import backend as F import dgl import dgl.graphbolt as gb import pytest import torch from . import gb_test_utils def _check_sampler_len(sampler, lenExp): with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) assert len(list(sampler)) == lenExp class SamplerType(Enum): Normal = 0 Layer = 1 Temporal = 2 TemporalLayer = 3 def _get_sampler(sampler_type): if sampler_type == SamplerType.Normal: return gb.NeighborSampler if sampler_type == SamplerType.Layer: return gb.LayerNeighborSampler if sampler_type == SamplerType.Temporal: return partial( gb.TemporalNeighborSampler, node_timestamp_attr_name="timestamp", edge_timestamp_attr_name="timestamp", ) else: return partial( gb.TemporalLayerNeighborSampler, node_timestamp_attr_name="timestamp", edge_timestamp_attr_name="timestamp", ) def _is_temporal(sampler_type): return sampler_type in [SamplerType.Temporal, SamplerType.TemporalLayer] def get_hetero_graph(): # COO graph: # [0, 0, 1, 1, 2, 2, 3, 3, 4, 4] # [2, 4, 2, 3, 0, 1, 1, 0, 0, 1] # [1, 1, 1, 1, 0, 0, 0, 0, 0] - > edge type. # num_nodes = 5, num_n1 = 2, num_n2 = 3 ntypes = {"n1": 0, "n2": 1} etypes = {"n1:e1:n2": 0, "n2:e2:n1": 1} indptr = torch.LongTensor([0, 2, 4, 6, 8, 10]) indices = torch.LongTensor([2, 4, 2, 3, 0, 1, 1, 0, 0, 1]) type_per_edge = torch.LongTensor([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) node_type_offset = torch.LongTensor([0, 2, 5]) return gb.fused_csc_sampling_graph( indptr, indices, node_type_offset=node_type_offset, type_per_edge=type_per_edge, node_type_to_id=ntypes, edge_type_to_id=etypes, ) def _assert_hetero_values( datapipe, original_row_node_ids, original_column_node_ids, csc_formats ): for data in datapipe: for step, sampled_subgraph in enumerate(data.sampled_subgraphs): for ntype in ["n1", "n2"]: assert torch.equal( sampled_subgraph.original_row_node_ids[ntype], original_row_node_ids[step][ntype].to(F.ctx()), ) assert torch.equal( sampled_subgraph.original_column_node_ids[ntype], original_column_node_ids[step][ntype].to(F.ctx()), ) for etype in ["n1:e1:n2", "n2:e2:n1"]: assert torch.equal( sampled_subgraph.sampled_csc[etype].indices, csc_formats[step][etype].indices.to(F.ctx()), ) assert torch.equal( sampled_subgraph.sampled_csc[etype].indptr, csc_formats[step][etype].indptr.to(F.ctx()), ) def _assert_homo_values( datapipe, original_row_node_ids, compacted_indices, indptr, seeds ): for data in datapipe: for step, sampled_subgraph in enumerate(data.sampled_subgraphs): assert torch.equal( sampled_subgraph.original_row_node_ids, original_row_node_ids[step], ) assert torch.equal( sampled_subgraph.sampled_csc.indices, compacted_indices[step] ) assert torch.equal( sampled_subgraph.sampled_csc.indptr, indptr[step] ) assert torch.equal( sampled_subgraph.original_column_node_ids, seeds[step] ) def test_SubgraphSampler_invoke(): itemset = gb.ItemSet(torch.arange(10), names="seeds") item_sampler = gb.ItemSampler(itemset, batch_size=2).copy_to(F.ctx()) # Invoke via class constructor. datapipe = gb.SubgraphSampler(item_sampler) with pytest.raises(NotImplementedError): next(iter(datapipe)) # Invokde via functional form. datapipe = item_sampler.sample_subgraph() with pytest.raises(NotImplementedError): next(iter(datapipe)) @pytest.mark.parametrize("labor", [False, True]) def test_NeighborSampler_invoke(labor): graph = gb_test_utils.rand_csc_graph(20, 0.15, bidirection_edge=True).to( F.ctx() ) itemset = gb.ItemSet(torch.arange(10), names="seeds") item_sampler = gb.ItemSampler(itemset, batch_size=2).copy_to(F.ctx()) num_layer = 2 fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] # Invoke via class constructor. Sampler = gb.LayerNeighborSampler if labor else gb.NeighborSampler datapipe = Sampler(item_sampler, graph, fanouts) assert len(list(datapipe)) == 5 # Invokde via functional form. if labor: datapipe = item_sampler.sample_layer_neighbor(graph, fanouts) else: datapipe = item_sampler.sample_neighbor(graph, fanouts) assert len(list(datapipe)) == 5 @pytest.mark.parametrize("labor", [False, True]) def test_NeighborSampler_fanouts(labor): graph = gb_test_utils.rand_csc_graph(20, 0.15, bidirection_edge=True).to( F.ctx() ) itemset = gb.ItemSet(torch.arange(10), names="seeds") item_sampler = gb.ItemSampler(itemset, batch_size=2).copy_to(F.ctx()) num_layer = 2 # `fanouts` is a list of tensors. fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] if labor: datapipe = item_sampler.sample_layer_neighbor(graph, fanouts) else: datapipe = item_sampler.sample_neighbor(graph, fanouts) assert len(list(datapipe)) == 5 # `fanouts` is a list of integers. fanouts = [2 for _ in range(num_layer)] if labor: datapipe = item_sampler.sample_layer_neighbor(graph, fanouts) else: datapipe = item_sampler.sample_neighbor(graph, fanouts) assert len(list(datapipe)) == 5 @pytest.mark.parametrize( "sampler_type", [ SamplerType.Normal, SamplerType.Layer, SamplerType.Temporal, SamplerType.TemporalLayer, ], ) def test_SubgraphSampler_Node(sampler_type): graph = gb_test_utils.rand_csc_graph(20, 0.15, bidirection_edge=True).to( F.ctx() ) items = torch.arange(10) names = "seeds" if _is_temporal(sampler_type): graph.node_attributes = {"timestamp": torch.arange(20).to(F.ctx())} graph.edge_attributes = { "timestamp": torch.arange(len(graph.indices)).to(F.ctx()) } items = (items, torch.arange(10)) names = (names, "timestamp") itemset = gb.ItemSet(items, names=names) item_sampler = gb.ItemSampler(itemset, batch_size=2).copy_to(F.ctx()) num_layer = 2 fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] sampler = _get_sampler(sampler_type) sampler_dp = sampler(item_sampler, graph, fanouts) _check_sampler_len(sampler_dp, 5) @pytest.mark.parametrize( "sampler_type", [ SamplerType.Normal, SamplerType.Layer, SamplerType.Temporal, SamplerType.TemporalLayer, ], ) def test_SubgraphSampler_Link(sampler_type): graph = gb_test_utils.rand_csc_graph(20, 0.15, bidirection_edge=True).to( F.ctx() ) items = torch.arange(20).reshape(-1, 2) names = "seeds" if _is_temporal(sampler_type): graph.node_attributes = {"timestamp": torch.arange(20).to(F.ctx())} graph.edge_attributes = { "timestamp": torch.arange(len(graph.indices)).to(F.ctx()) } items = (items, torch.arange(10)) names = (names, "timestamp") itemset = gb.ItemSet(items, names=names) datapipe = gb.ItemSampler(itemset, batch_size=2).copy_to(F.ctx()) num_layer = 2 fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] sampler = _get_sampler(sampler_type) datapipe = sampler(datapipe, graph, fanouts) datapipe = datapipe.transform(partial(gb.exclude_seed_edges)) _check_sampler_len(datapipe, 5) for data in datapipe: assert torch.equal( data.compacted_seeds, torch.tensor([[0, 1], [2, 3]]).to(F.ctx()) ) @pytest.mark.parametrize( "sampler_type", [ SamplerType.Normal, SamplerType.Layer, SamplerType.Temporal, SamplerType.TemporalLayer, ], ) def test_SubgraphSampler_Link_With_Negative(sampler_type): graph = gb_test_utils.rand_csc_graph(20, 0.15, bidirection_edge=True).to( F.ctx() ) items = torch.arange(20).reshape(-1, 2) names = "seeds" if _is_temporal(sampler_type): graph.node_attributes = {"timestamp": torch.arange(20).to(F.ctx())} graph.edge_attributes = { "timestamp": torch.arange(len(graph.indices)).to(F.ctx()) } items = (items, torch.arange(10)) names = (names, "timestamp") itemset = gb.ItemSet(items, names=names) datapipe = gb.ItemSampler(itemset, batch_size=2).copy_to(F.ctx()) num_layer = 2 fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] datapipe = gb.UniformNegativeSampler(datapipe, graph, 1) sampler = _get_sampler(sampler_type) datapipe = sampler(datapipe, graph, fanouts) datapipe = datapipe.transform(partial(gb.exclude_seed_edges)) _check_sampler_len(datapipe, 5) @pytest.mark.parametrize( "sampler_type", [ SamplerType.Normal, SamplerType.Layer, SamplerType.Temporal, SamplerType.TemporalLayer, ], ) def test_SubgraphSampler_HyperLink(sampler_type): graph = gb_test_utils.rand_csc_graph(20, 0.15, bidirection_edge=True).to( F.ctx() ) items = torch.arange(20).reshape(-1, 5) names = "seeds" if _is_temporal(sampler_type): graph.node_attributes = {"timestamp": torch.arange(20).to(F.ctx())} graph.edge_attributes = { "timestamp": torch.arange(len(graph.indices)).to(F.ctx()) } items = (items, torch.arange(4)) names = (names, "timestamp") itemset = gb.ItemSet(items, names=names) datapipe = gb.ItemSampler(itemset, batch_size=2).copy_to(F.ctx()) num_layer = 2 fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] sampler = _get_sampler(sampler_type) datapipe = sampler(datapipe, graph, fanouts) _check_sampler_len(datapipe, 2) for data in datapipe: assert torch.equal( data.compacted_seeds, torch.tensor([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]).to(F.ctx()), ) @pytest.mark.parametrize( "sampler_type", [ SamplerType.Normal, SamplerType.Layer, SamplerType.Temporal, SamplerType.TemporalLayer, ], ) def test_SubgraphSampler_Node_Hetero(sampler_type): graph = get_hetero_graph().to(F.ctx()) items = torch.arange(3) names = "seeds" if _is_temporal(sampler_type): graph.node_attributes = { "timestamp": torch.arange(graph.csc_indptr.numel() - 1).to(F.ctx()) } graph.edge_attributes = { "timestamp": torch.arange(graph.indices.numel()).to(F.ctx()) } items = (items, torch.randint(0, 10, (3,))) names = (names, "timestamp") itemset = gb.HeteroItemSet({"n2": gb.ItemSet(items, names=names)}) item_sampler = gb.ItemSampler(itemset, batch_size=2).copy_to(F.ctx()) num_layer = 2 fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] sampler = _get_sampler(sampler_type) sampler_dp = sampler(item_sampler, graph, fanouts) _check_sampler_len(sampler_dp, 2) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) for minibatch in sampler_dp: assert len(minibatch.sampled_subgraphs) == num_layer @pytest.mark.parametrize( "sampler_type", [ SamplerType.Normal, SamplerType.Layer, SamplerType.Temporal, SamplerType.TemporalLayer, ], ) def test_SubgraphSampler_Link_Hetero(sampler_type): graph = get_hetero_graph().to(F.ctx()) first_items = torch.LongTensor([[0, 0, 1, 1], [0, 2, 0, 1]]).T first_names = "seeds" second_items = torch.LongTensor([[0, 0, 1, 1, 2, 2], [0, 1, 1, 0, 0, 1]]).T second_names = "seeds" if _is_temporal(sampler_type): graph.node_attributes = { "timestamp": torch.arange(graph.csc_indptr.numel() - 1).to(F.ctx()) } graph.edge_attributes = { "timestamp": torch.arange(graph.indices.numel()).to(F.ctx()) } first_items = (first_items, torch.randint(0, 10, (4,))) first_names = (first_names, "timestamp") second_items = (second_items, torch.randint(0, 10, (6,))) second_names = (second_names, "timestamp") itemset = gb.HeteroItemSet( { "n1:e1:n2": gb.ItemSet( first_items, names=first_names, ), "n2:e2:n1": gb.ItemSet( second_items, names=second_names, ), } ) datapipe = gb.ItemSampler(itemset, batch_size=2).copy_to(F.ctx()) num_layer = 2 fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] sampler = _get_sampler(sampler_type) datapipe = sampler(datapipe, graph, fanouts) datapipe = datapipe.transform(partial(gb.exclude_seed_edges)) _check_sampler_len(datapipe, 5) for data in datapipe: for compacted_seeds in data.compacted_seeds.values(): if _is_temporal(sampler_type): assert torch.equal( compacted_seeds, torch.tensor([[0, 0], [1, 1]]).to(F.ctx()) ) else: assert torch.equal( torch.sort(compacted_seeds.T, dim=1)[0].T, torch.tensor([[0, 0], [0, 1]]).to(F.ctx()), ) @pytest.mark.parametrize( "sampler_type", [ SamplerType.Normal, SamplerType.Layer, SamplerType.Temporal, SamplerType.TemporalLayer, ], ) def test_SubgraphSampler_Link_Hetero_With_Negative(sampler_type): graph = get_hetero_graph().to(F.ctx()) first_items = torch.LongTensor([[0, 0, 1, 1], [0, 2, 0, 1]]).T first_names = "seeds" second_items = torch.LongTensor([[0, 0, 1, 1, 2, 2], [0, 1, 1, 0, 0, 1]]).T second_names = "seeds" if _is_temporal(sampler_type): graph.node_attributes = { "timestamp": torch.arange(graph.csc_indptr.numel() - 1).to(F.ctx()) } graph.edge_attributes = { "timestamp": torch.arange(graph.indices.numel()).to(F.ctx()) } first_items = (first_items, torch.randint(0, 10, (4,))) first_names = (first_names, "timestamp") second_items = (second_items, torch.randint(0, 10, (6,))) second_names = (second_names, "timestamp") itemset = gb.HeteroItemSet( { "n1:e1:n2": gb.ItemSet( first_items, names=first_names, ), "n2:e2:n1": gb.ItemSet( second_items, names=second_names, ), } ) datapipe = gb.ItemSampler(itemset, batch_size=2).copy_to(F.ctx()) num_layer = 2 fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] datapipe = gb.UniformNegativeSampler(datapipe, graph, 1) sampler = _get_sampler(sampler_type) datapipe = sampler(datapipe, graph, fanouts) datapipe = datapipe.transform(partial(gb.exclude_seed_edges)) _check_sampler_len(datapipe, 5) @pytest.mark.parametrize( "sampler_type", [ SamplerType.Normal, SamplerType.Layer, SamplerType.Temporal, SamplerType.TemporalLayer, ], ) def test_SubgraphSampler_Link_Hetero_Unknown_Etype(sampler_type): graph = get_hetero_graph().to(F.ctx()) first_items = torch.LongTensor([[0, 0, 1, 1], [0, 2, 0, 1]]).T first_names = "seeds" second_items = torch.LongTensor([[0, 0, 1, 1, 2, 2], [0, 1, 1, 0, 0, 1]]).T second_names = "seeds" if _is_temporal(sampler_type): graph.node_attributes = { "timestamp": torch.arange(graph.csc_indptr.numel() - 1).to(F.ctx()) } graph.edge_attributes = { "timestamp": torch.arange(graph.indices.numel()).to(F.ctx()) } first_items = (first_items, torch.randint(0, 10, (4,))) first_names = (first_names, "timestamp") second_items = (second_items, torch.randint(0, 10, (6,))) second_names = (second_names, "timestamp") # "e11" and "e22" are not valid edge types. itemset = gb.HeteroItemSet( { "n1:e11:n2": gb.ItemSet( first_items, names=first_names, ), "n2:e22:n1": gb.ItemSet( second_items, names=second_names, ), } ) datapipe = gb.ItemSampler(itemset, batch_size=2).copy_to(F.ctx()) num_layer = 2 fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] sampler = _get_sampler(sampler_type) datapipe = sampler(datapipe, graph, fanouts) datapipe = datapipe.transform(partial(gb.exclude_seed_edges)) _check_sampler_len(datapipe, 5) @pytest.mark.parametrize( "sampler_type", [ SamplerType.Normal, SamplerType.Layer, SamplerType.Temporal, SamplerType.TemporalLayer, ], ) def test_SubgraphSampler_Link_Hetero_With_Negative_Unknown_Etype(sampler_type): graph = get_hetero_graph().to(F.ctx()) first_items = torch.LongTensor([[0, 0, 1, 1], [0, 2, 0, 1]]).T first_names = "seeds" second_items = torch.LongTensor([[0, 0, 1, 1, 2, 2], [0, 1, 1, 0, 0, 1]]).T second_names = "seeds" if _is_temporal(sampler_type): graph.node_attributes = { "timestamp": torch.arange(graph.csc_indptr.numel() - 1).to(F.ctx()) } graph.edge_attributes = { "timestamp": torch.arange(graph.indices.numel()).to(F.ctx()) } first_items = (first_items, torch.randint(0, 10, (4,))) first_names = (first_names, "timestamp") second_items = (second_items, torch.randint(0, 10, (6,))) second_names = (second_names, "timestamp") # "e11" and "e22" are not valid edge types. itemset = gb.HeteroItemSet( { "n1:e11:n2": gb.ItemSet( first_items, names=first_names, ), "n2:e22:n1": gb.ItemSet( second_items, names=second_names, ), } ) datapipe = gb.ItemSampler(itemset, batch_size=2).copy_to(F.ctx()) num_layer = 2 fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] datapipe = gb.UniformNegativeSampler(datapipe, graph, 1) sampler = _get_sampler(sampler_type) datapipe = sampler(datapipe, graph, fanouts) datapipe = datapipe.transform(partial(gb.exclude_seed_edges)) _check_sampler_len(datapipe, 5) @pytest.mark.parametrize( "sampler_type", [ SamplerType.Normal, SamplerType.Layer, SamplerType.Temporal, SamplerType.TemporalLayer, ], ) def test_SubgraphSampler_HyperLink_Hetero(sampler_type): graph = get_hetero_graph().to(F.ctx()) items = torch.LongTensor([[2, 0, 1, 1, 2], [0, 1, 1, 0, 0]]) names = "seeds" if _is_temporal(sampler_type): graph.node_attributes = { "timestamp": torch.arange(graph.csc_indptr.numel() - 1).to(F.ctx()) } graph.edge_attributes = { "timestamp": torch.arange(graph.indices.numel()).to(F.ctx()) } items = (items, torch.randint(0, 10, (2,))) names = (names, "timestamp") itemset = gb.HeteroItemSet( { "n2:n1:n2:n1:n2": gb.ItemSet( items, names=names, ), } ) datapipe = gb.ItemSampler(itemset, batch_size=2).copy_to(F.ctx()) num_layer = 2 fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] sampler = _get_sampler(sampler_type) datapipe = sampler(datapipe, graph, fanouts) _check_sampler_len(datapipe, 1) for data in datapipe: for compacted_seeds in data.compacted_seeds.values(): if _is_temporal(sampler_type): assert torch.equal( compacted_seeds, torch.tensor([[0, 0, 2, 2, 4], [1, 1, 3, 3, 5]]).to( F.ctx() ), ) else: assert torch.equal( compacted_seeds, torch.tensor([[0, 0, 2, 1, 0], [1, 1, 2, 0, 1]]).to( F.ctx() ), ) @pytest.mark.parametrize( "sampler_type", [ SamplerType.Normal, SamplerType.Layer, SamplerType.Temporal, SamplerType.TemporalLayer, ], ) @pytest.mark.parametrize( "replace", [False, True], ) def test_SubgraphSampler_Random_Hetero_Graph(sampler_type, replace): if F._default_context_str == "gpu" and replace == True: pytest.skip("Sampling with replacement not yet supported on GPU.") num_nodes = 5 num_edges = 9 num_ntypes = 3 num_etypes = 3 ( csc_indptr, indices, node_type_offset, type_per_edge, node_type_to_id, edge_type_to_id, ) = gb_test_utils.random_hetero_graph( num_nodes, num_edges, num_ntypes, num_etypes ) node_attributes = {} edge_attributes = { "A1": torch.randn(num_edges), "A2": torch.randn(num_edges), } if _is_temporal(sampler_type): node_attributes["timestamp"] = torch.randint(0, 10, (num_nodes,)) edge_attributes["timestamp"] = torch.randint(0, 10, (num_edges,)) graph = gb.fused_csc_sampling_graph( csc_indptr, indices, node_type_offset=node_type_offset, type_per_edge=type_per_edge, node_type_to_id=node_type_to_id, edge_type_to_id=edge_type_to_id, node_attributes=node_attributes, edge_attributes=edge_attributes, ).to(F.ctx()) first_items = torch.tensor([0]) first_names = "seeds" second_items = torch.tensor([0]) second_names = "seeds" if _is_temporal(sampler_type): first_items = (first_items, torch.randint(0, 10, (1,))) first_names = (first_names, "timestamp") second_items = (second_items, torch.randint(0, 10, (1,))) second_names = (second_names, "timestamp") itemset = gb.HeteroItemSet( { "n2": gb.ItemSet(first_items, names=first_names), "n1": gb.ItemSet(second_items, names=second_names), } ) item_sampler = gb.ItemSampler(itemset, batch_size=2).copy_to(F.ctx()) num_layer = 2 fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] sampler = _get_sampler(sampler_type) sampler_dp = sampler(item_sampler, graph, fanouts, replace=replace) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) for data in sampler_dp: for sampledsubgraph in data.sampled_subgraphs: for _, value in sampledsubgraph.sampled_csc.items(): assert torch.equal( torch.ge( value.indices, torch.zeros(len(value.indices)).to(F.ctx()), ), torch.ones(len(value.indices)).to(F.ctx()), ) assert torch.equal( torch.ge( value.indptr, torch.zeros(len(value.indptr)).to(F.ctx()), ), torch.ones(len(value.indptr)).to(F.ctx()), ) for ( _, value, ) in sampledsubgraph.original_column_node_ids.items(): assert torch.equal( torch.ge(value, torch.zeros(len(value)).to(F.ctx())), torch.ones(len(value)).to(F.ctx()), ) for _, value in sampledsubgraph.original_row_node_ids.items(): assert torch.equal( torch.ge(value, torch.zeros(len(value)).to(F.ctx())), torch.ones(len(value)).to(F.ctx()), ) @pytest.mark.parametrize( "sampler_type", [ SamplerType.Normal, SamplerType.Layer, SamplerType.Temporal, SamplerType.TemporalLayer, ], ) def test_SubgraphSampler_without_deduplication_Homo_Node(sampler_type): graph = dgl.graph( ([5, 0, 1, 5, 6, 7, 2, 2, 4], [0, 1, 2, 2, 2, 2, 3, 4, 4]) ) graph = gb.from_dglgraph(graph, True).to(F.ctx()) seed_nodes = torch.LongTensor([0, 3, 4]) items = seed_nodes names = "seeds" if _is_temporal(sampler_type): graph.node_attributes = { "timestamp": torch.zeros( graph.csc_indptr.numel() - 1, dtype=torch.int64 ).to(F.ctx()) } graph.edge_attributes = { "timestamp": torch.zeros( graph.indices.numel(), dtype=torch.int64 ).to(F.ctx()) } items = (items, torch.randint(1, 10, (3,))) names = (names, "timestamp") itemset = gb.ItemSet(items, names=names) item_sampler = gb.ItemSampler(itemset, batch_size=len(seed_nodes)).copy_to( F.ctx() ) num_layer = 2 fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] sampler = _get_sampler(sampler_type) if _is_temporal(sampler_type): datapipe = sampler(item_sampler, graph, fanouts) else: datapipe = sampler(item_sampler, graph, fanouts, deduplicate=False) length = [17, 7] compacted_indices = [ (torch.arange(0, 10) + 7).to(F.ctx()), (torch.arange(0, 4) + 3).to(F.ctx()), ] indptr = [ torch.tensor([0, 1, 2, 4, 4, 6, 8, 10]).to(F.ctx()), torch.tensor([0, 1, 2, 4]).to(F.ctx()), ] seeds = [ torch.tensor([0, 2, 2, 3, 4, 4, 5]).to(F.ctx()), torch.tensor([0, 3, 4]).to(F.ctx()), ] with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) for data in datapipe: for step, sampled_subgraph in enumerate(data.sampled_subgraphs): assert ( len(sampled_subgraph.original_row_node_ids) == length[step] ) assert torch.equal( sampled_subgraph.sampled_csc.indices, compacted_indices[step], ) assert torch.equal( sampled_subgraph.sampled_csc.indptr, indptr[step] ) assert torch.equal( torch.sort(sampled_subgraph.original_column_node_ids)[0], seeds[step], ) @pytest.mark.parametrize( "sampler_type", [ SamplerType.Normal, SamplerType.Layer, SamplerType.Temporal, SamplerType.TemporalLayer, ], ) def test_SubgraphSampler_without_deduplication_Hetero_Node(sampler_type): graph = get_hetero_graph().to(F.ctx()) items = torch.arange(2) names = "seeds" if _is_temporal(sampler_type): graph.node_attributes = { "timestamp": torch.zeros( graph.csc_indptr.numel() - 1, dtype=torch.int64, device=F.ctx() ) } graph.edge_attributes = { "timestamp": torch.zeros( graph.indices.numel(), dtype=torch.int64, device=F.ctx() ) } items = (items, torch.randint(1, 10, (2,))) names = (names, "timestamp") itemset = gb.HeteroItemSet({"n2": gb.ItemSet(items, names=names)}) item_sampler = gb.ItemSampler(itemset, batch_size=2).copy_to(F.ctx()) num_layer = 2 fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] sampler = _get_sampler(sampler_type) if _is_temporal(sampler_type): datapipe = sampler(item_sampler, graph, fanouts) else: datapipe = sampler(item_sampler, graph, fanouts, deduplicate=False) csc_formats = [ { "n1:e1:n2": gb.CSCFormatBase( indptr=torch.tensor([0, 2, 4]), indices=torch.tensor([4, 5, 6, 7]), ), "n2:e2:n1": gb.CSCFormatBase( indptr=torch.tensor([0, 2, 4, 6, 8]), indices=torch.tensor([2, 3, 4, 5, 6, 7, 8, 9]), ), }, { "n1:e1:n2": gb.CSCFormatBase( indptr=torch.tensor([0, 2, 4]), indices=torch.tensor([0, 1, 2, 3]), ), "n2:e2:n1": gb.CSCFormatBase( indptr=torch.tensor([0]), indices=torch.tensor([], dtype=torch.int64), ), }, ] original_column_node_ids = [ { "n1": torch.tensor([0, 1, 1, 0]), "n2": torch.tensor([0, 1]), }, { "n1": torch.tensor([], dtype=torch.int64), "n2": torch.tensor([0, 1]), }, ] original_row_node_ids = [ { "n1": torch.tensor([0, 1, 1, 0, 0, 1, 1, 0]), "n2": torch.tensor([0, 1, 0, 2, 0, 1, 0, 1, 0, 2]), }, { "n1": torch.tensor([0, 1, 1, 0]), "n2": torch.tensor([0, 1]), }, ] with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) _assert_hetero_values( datapipe, original_row_node_ids, original_column_node_ids, csc_formats, ) @unittest.skipIf( F._default_context_str == "gpu", reason="Fails due to different result on the GPU.", ) @pytest.mark.parametrize("labor", [False, True]) def test_SubgraphSampler_unique_csc_format_Homo_Node_cpu(labor): torch.manual_seed(1205) graph = dgl.graph(([5, 0, 6, 7, 2, 2, 4], [0, 1, 2, 2, 3, 4, 4])) graph = gb.from_dglgraph(graph, True).to(F.ctx()) seed_nodes = torch.LongTensor([0, 3, 4]) itemset = gb.ItemSet(seed_nodes, names="seeds") item_sampler = gb.ItemSampler(itemset, batch_size=len(seed_nodes)).copy_to( F.ctx() ) num_layer = 2 fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] Sampler = gb.LayerNeighborSampler if labor else gb.NeighborSampler datapipe = Sampler( item_sampler, graph, fanouts, deduplicate=True, ) original_row_node_ids = [ torch.tensor([0, 3, 4, 5, 2, 6, 7]).to(F.ctx()), torch.tensor([0, 3, 4, 5, 2]).to(F.ctx()), ] compacted_indices = [ torch.tensor([3, 4, 4, 2, 5, 6]).to(F.ctx()), torch.tensor([3, 4, 4, 2]).to(F.ctx()), ] indptr = [ torch.tensor([0, 1, 2, 4, 4, 6]).to(F.ctx()), torch.tensor([0, 1, 2, 4]).to(F.ctx()), ] seeds = [ torch.tensor([0, 3, 4, 5, 2]).to(F.ctx()), torch.tensor([0, 3, 4]).to(F.ctx()), ] _assert_homo_values( datapipe, original_row_node_ids, compacted_indices, indptr, seeds ) @unittest.skipIf( F._default_context_str == "cpu", reason="Fails due to different result on the CPU.", ) @pytest.mark.parametrize("labor", [False, True]) def test_SubgraphSampler_unique_csc_format_Homo_Node_gpu(labor): torch.manual_seed(1205) graph = dgl.graph(([5, 0, 7, 7, 2, 4], [0, 1, 2, 2, 3, 4])) graph = gb.from_dglgraph(graph, is_homogeneous=True).to(F.ctx()) seed_nodes = torch.LongTensor([0, 3, 4]) itemset = gb.ItemSet(seed_nodes, names="seeds") item_sampler = gb.ItemSampler(itemset, batch_size=len(seed_nodes)).copy_to( F.ctx() ) num_layer = 2 fanouts = [torch.LongTensor([-1]) for _ in range(num_layer)] Sampler = gb.LayerNeighborSampler if labor else gb.NeighborSampler datapipe = Sampler( item_sampler, graph, fanouts, deduplicate=True, ) if torch.cuda.get_device_capability()[0] < 7: original_row_node_ids = [ torch.tensor([0, 3, 4, 2, 5, 7]).to(F.ctx()), torch.tensor([0, 3, 4, 2, 5]).to(F.ctx()), ] compacted_indices = [ torch.tensor([4, 3, 2, 5, 5]).to(F.ctx()), torch.tensor([4, 3, 2]).to(F.ctx()), ] indptr = [ torch.tensor([0, 1, 2, 3, 5, 5]).to(F.ctx()), torch.tensor([0, 1, 2, 3]).to(F.ctx()), ] seeds = [ torch.tensor([0, 3, 4, 2, 5]).to(F.ctx()), torch.tensor([0, 3, 4]).to(F.ctx()), ] else: original_row_node_ids = [ torch.tensor([0, 3, 4, 5, 2, 7]).to(F.ctx()), torch.tensor([0, 3, 4, 5, 2]).to(F.ctx()), ] compacted_indices = [ torch.tensor([3, 4, 2, 5, 5]).to(F.ctx()), torch.tensor([3, 4, 2]).to(F.ctx()), ] indptr = [ torch.tensor([0, 1, 2, 3, 3, 5]).to(F.ctx()), torch.tensor([0, 1, 2, 3]).to(F.ctx()), ] seeds = [ torch.tensor([0, 3, 4, 5, 2]).to(F.ctx()), torch.tensor([0, 3, 4]).to(F.ctx()), ] _assert_homo_values( datapipe, original_row_node_ids, compacted_indices, indptr, seeds ) @pytest.mark.parametrize("labor", [False, True]) def test_SubgraphSampler_unique_csc_format_Hetero_Node(labor): graph = get_hetero_graph().to(F.ctx()) itemset = gb.HeteroItemSet( {"n2": gb.ItemSet(torch.arange(2), names="seeds")} ) item_sampler = gb.ItemSampler(itemset, batch_size=2).copy_to(F.ctx()) num_layer = 2 fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] Sampler = gb.LayerNeighborSampler if labor else gb.NeighborSampler datapipe = Sampler( item_sampler, graph, fanouts, deduplicate=True, ) csc_formats = [ { "n1:e1:n2": gb.CSCFormatBase( indptr=torch.tensor([0, 2, 4]), indices=torch.tensor([0, 1, 1, 0]), ), "n2:e2:n1": gb.CSCFormatBase( indptr=torch.tensor([0, 2, 4]), indices=torch.tensor([0, 2, 0, 1]), ), }, { "n1:e1:n2": gb.CSCFormatBase( indptr=torch.tensor([0, 2, 4]), indices=torch.tensor([0, 1, 1, 0]), ), "n2:e2:n1": gb.CSCFormatBase( indptr=torch.tensor([0]), indices=torch.tensor([], dtype=torch.int64), ), }, ] original_column_node_ids = [ { "n1": torch.tensor([0, 1]), "n2": torch.tensor([0, 1]), }, { "n1": torch.tensor([], dtype=torch.int64), "n2": torch.tensor([0, 1]), }, ] original_row_node_ids = [ { "n1": torch.tensor([0, 1]), "n2": torch.tensor([0, 1, 2]), }, { "n1": torch.tensor([0, 1]), "n2": torch.tensor([0, 1]), }, ] _assert_hetero_values( datapipe, original_row_node_ids, original_column_node_ids, csc_formats ) @pytest.mark.parametrize( "sampler_type", [ SamplerType.Normal, SamplerType.Layer, SamplerType.Temporal, SamplerType.TemporalLayer, ], ) def test_SubgraphSampler_Hetero_multifanout_per_layer(sampler_type): graph = get_hetero_graph().to(F.ctx()) items_n1 = torch.tensor([0]) items_n2 = torch.tensor([1]) names = "seeds" if _is_temporal(sampler_type): graph.node_attributes = { "timestamp": torch.arange(graph.csc_indptr.numel() - 1).to(F.ctx()) } graph.edge_attributes = { "timestamp": torch.arange(graph.indices.numel()).to(F.ctx()) } # All edges can be sampled. items_n1 = (items_n1, torch.tensor([10])) items_n2 = (items_n2, torch.tensor([10])) names = (names, "timestamp") itemset = gb.HeteroItemSet( { "n1": gb.ItemSet(items=items_n1, names=names), "n2": gb.ItemSet(items=items_n2, names=names), } ) item_sampler = gb.ItemSampler(itemset, batch_size=2).copy_to(F.ctx()) num_layer = 2 # The number of edges to be sampled for each edge types of each node. fanouts = [torch.LongTensor([2, 1]) for _ in range(num_layer)] sampler = _get_sampler(sampler_type) sampler_dp = sampler(item_sampler, graph, fanouts) if _is_temporal(sampler_type): indices_len = [ { "n1:e1:n2": 4, "n2:e2:n1": 3, }, { "n1:e1:n2": 2, "n2:e2:n1": 1, }, ] else: indices_len = [ { "n1:e1:n2": 4, "n2:e2:n1": 2, }, { "n1:e1:n2": 2, "n2:e2:n1": 1, }, ] with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) for minibatch in sampler_dp: for step, sampled_subgraph in enumerate( minibatch.sampled_subgraphs ): assert ( len(sampled_subgraph.sampled_csc["n1:e1:n2"].indices) == indices_len[step]["n1:e1:n2"] ) assert ( len(sampled_subgraph.sampled_csc["n2:e2:n1"].indices) == indices_len[step]["n2:e2:n1"] ) @pytest.mark.parametrize( "sampler_type", [ SamplerType.Normal, SamplerType.Layer, SamplerType.Temporal, SamplerType.TemporalLayer, ], ) def test_SubgraphSampler_without_deduplication_Homo_Link(sampler_type): graph = dgl.graph( ([5, 0, 1, 5, 6, 7, 2, 2, 4], [0, 1, 2, 2, 2, 2, 3, 4, 4]) ) graph = gb.from_dglgraph(graph, True).to(F.ctx()) seed_nodes = torch.LongTensor([[0, 1], [3, 5]]) items = seed_nodes names = "seeds" if _is_temporal(sampler_type): graph.node_attributes = { "timestamp": torch.zeros( graph.csc_indptr.numel() - 1, dtype=torch.int64 ).to(F.ctx()) } graph.edge_attributes = { "timestamp": torch.zeros( graph.indices.numel(), dtype=torch.int64 ).to(F.ctx()) } items = (items, torch.randint(1, 10, (2,))) names = (names, "timestamp") itemset = gb.ItemSet(items, names=names) item_sampler = gb.ItemSampler(itemset, batch_size=4).copy_to(F.ctx()) num_layer = 2 fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] sampler = _get_sampler(sampler_type) if _is_temporal(sampler_type): datapipe = sampler(item_sampler, graph, fanouts) else: datapipe = sampler(item_sampler, graph, fanouts, deduplicate=False) length = [13, 7] compacted_indices = [ (torch.arange(0, 6) + 7).to(F.ctx()), (torch.arange(0, 3) + 4).to(F.ctx()), ] indptr = [ torch.tensor([0, 1, 2, 3, 3, 3, 4, 6]).to(F.ctx()), torch.tensor([0, 1, 2, 3, 3]).to(F.ctx()), ] seeds = [ torch.tensor([0, 0, 1, 2, 3, 5, 5]).to(F.ctx()), torch.tensor([0, 1, 3, 5]).to(F.ctx()), ] for data in datapipe: for step, sampled_subgraph in enumerate(data.sampled_subgraphs): assert len(sampled_subgraph.original_row_node_ids) == length[step] assert torch.equal( sampled_subgraph.sampled_csc.indices, compacted_indices[step] ) assert torch.equal( sampled_subgraph.sampled_csc.indptr, indptr[step] ) assert torch.equal( torch.sort(sampled_subgraph.original_column_node_ids)[0], seeds[step], ) @pytest.mark.parametrize( "sampler_type", [ SamplerType.Normal, SamplerType.Layer, SamplerType.Temporal, SamplerType.TemporalLayer, ], ) def test_SubgraphSampler_without_deduplication_Hetero_Link(sampler_type): graph = get_hetero_graph().to(F.ctx()) items = torch.arange(2).view(1, 2) names = "seeds" if _is_temporal(sampler_type): graph.node_attributes = { "timestamp": torch.zeros( graph.csc_indptr.numel() - 1, dtype=torch.int64 ).to(F.ctx()) } graph.edge_attributes = { "timestamp": torch.zeros( graph.indices.numel(), dtype=torch.int64 ).to(F.ctx()) } items = (items, torch.randint(1, 10, (1,))) names = (names, "timestamp") itemset = gb.HeteroItemSet({"n1:e1:n2": gb.ItemSet(items, names=names)}) item_sampler = gb.ItemSampler(itemset, batch_size=2).copy_to(F.ctx()) num_layer = 2 fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] sampler = _get_sampler(sampler_type) if _is_temporal(sampler_type): datapipe = sampler(item_sampler, graph, fanouts) else: datapipe = sampler(item_sampler, graph, fanouts, deduplicate=False) csc_formats = [ { "n1:e1:n2": gb.CSCFormatBase( indptr=torch.tensor([0, 2, 4, 6]), indices=torch.tensor([3, 4, 5, 6, 7, 8]), ), "n2:e2:n1": gb.CSCFormatBase( indptr=torch.tensor([0, 2, 4, 6]), indices=torch.tensor([3, 4, 5, 6, 7, 8]), ), }, { "n1:e1:n2": gb.CSCFormatBase( indptr=torch.tensor([0, 2]), indices=torch.tensor([1, 2]), ), "n2:e2:n1": gb.CSCFormatBase( indptr=torch.tensor([0, 2]), indices=torch.tensor([1, 2], dtype=torch.int64), ), }, ] original_column_node_ids = [ { "n1": torch.tensor([0, 1, 0]), "n2": torch.tensor([1, 0, 2]), }, { "n1": torch.tensor([0]), "n2": torch.tensor([1]), }, ] original_row_node_ids = [ { "n1": torch.tensor([0, 1, 0, 1, 0, 0, 1, 0, 1]), "n2": torch.tensor([1, 0, 2, 0, 2, 0, 1, 0, 2]), }, { "n1": torch.tensor([0, 1, 0]), "n2": torch.tensor([1, 0, 2]), }, ] for data in datapipe: for step, sampled_subgraph in enumerate(data.sampled_subgraphs): for ntype in ["n1", "n2"]: assert torch.equal( sampled_subgraph.original_row_node_ids[ntype], original_row_node_ids[step][ntype].to(F.ctx()), ) assert torch.equal( sampled_subgraph.original_column_node_ids[ntype], original_column_node_ids[step][ntype].to(F.ctx()), ) for etype in ["n1:e1:n2", "n2:e2:n1"]: assert torch.equal( sampled_subgraph.sampled_csc[etype].indices, csc_formats[step][etype].indices.to(F.ctx()), ) assert torch.equal( sampled_subgraph.sampled_csc[etype].indptr, csc_formats[step][etype].indptr.to(F.ctx()), ) @unittest.skipIf( F._default_context_str == "gpu", reason="Fails due to different result on the GPU.", ) @pytest.mark.parametrize("labor", [False, True]) def test_SubgraphSampler_unique_csc_format_Homo_Link_cpu(labor): torch.manual_seed(1205) graph = dgl.graph(([5, 0, 6, 7, 2, 2, 4], [0, 1, 2, 2, 3, 4, 4])) graph = gb.from_dglgraph(graph, True).to(F.ctx()) seed_nodes = torch.LongTensor([[0, 3], [4, 4]]) itemset = gb.ItemSet(seed_nodes, names="seeds") item_sampler = gb.ItemSampler(itemset, batch_size=4).copy_to(F.ctx()) num_layer = 2 fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] Sampler = gb.LayerNeighborSampler if labor else gb.NeighborSampler datapipe = Sampler( item_sampler, graph, fanouts, deduplicate=True, ) original_row_node_ids = [ torch.tensor([0, 3, 4, 5, 2, 6, 7]).to(F.ctx()), torch.tensor([0, 3, 4, 5, 2]).to(F.ctx()), ] compacted_indices = [ torch.tensor([3, 4, 4, 2, 5, 6]).to(F.ctx()), torch.tensor([3, 4, 4, 2]).to(F.ctx()), ] indptr = [ torch.tensor([0, 1, 2, 4, 4, 6]).to(F.ctx()), torch.tensor([0, 1, 2, 4]).to(F.ctx()), ] seeds = [ torch.tensor([0, 3, 4, 5, 2]).to(F.ctx()), torch.tensor([0, 3, 4]).to(F.ctx()), ] for data in datapipe: for step, sampled_subgraph in enumerate(data.sampled_subgraphs): assert torch.equal( sampled_subgraph.original_row_node_ids, original_row_node_ids[step], ) assert torch.equal( sampled_subgraph.sampled_csc.indices, compacted_indices[step] ) assert torch.equal( sampled_subgraph.sampled_csc.indptr, indptr[step] ) assert torch.equal( sampled_subgraph.original_column_node_ids, seeds[step] ) @unittest.skipIf( F._default_context_str == "cpu", reason="Fails due to different result on the CPU.", ) @pytest.mark.parametrize("labor", [False, True]) def test_SubgraphSampler_unique_csc_format_Homo_Link_gpu(labor): torch.manual_seed(1205) graph = dgl.graph(([5, 0, 7, 7, 2, 4], [0, 1, 2, 2, 3, 4])) graph = gb.from_dglgraph(graph, is_homogeneous=True).to(F.ctx()) seed_nodes = torch.LongTensor([[0, 3], [4, 4]]) itemset = gb.ItemSet(seed_nodes, names="seeds") item_sampler = gb.ItemSampler(itemset, batch_size=4).copy_to(F.ctx()) num_layer = 2 fanouts = [torch.LongTensor([-1]) for _ in range(num_layer)] Sampler = gb.LayerNeighborSampler if labor else gb.NeighborSampler datapipe = Sampler( item_sampler, graph, fanouts, deduplicate=True, ) if torch.cuda.get_device_capability()[0] < 7: original_row_node_ids = [ torch.tensor([0, 3, 4, 2, 5, 7]).to(F.ctx()), torch.tensor([0, 3, 4, 2, 5]).to(F.ctx()), ] compacted_indices = [ torch.tensor([4, 3, 2, 5, 5]).to(F.ctx()), torch.tensor([4, 3, 2]).to(F.ctx()), ] indptr = [ torch.tensor([0, 1, 2, 3, 5, 5]).to(F.ctx()), torch.tensor([0, 1, 2, 3]).to(F.ctx()), ] seeds = [ torch.tensor([0, 3, 4, 2, 5]).to(F.ctx()), torch.tensor([0, 3, 4]).to(F.ctx()), ] else: original_row_node_ids = [ torch.tensor([0, 3, 4, 5, 2, 7]).to(F.ctx()), torch.tensor([0, 3, 4, 5, 2]).to(F.ctx()), ] compacted_indices = [ torch.tensor([3, 4, 2, 5, 5]).to(F.ctx()), torch.tensor([3, 4, 2]).to(F.ctx()), ] indptr = [ torch.tensor([0, 1, 2, 3, 3, 5]).to(F.ctx()), torch.tensor([0, 1, 2, 3]).to(F.ctx()), ] seeds = [ torch.tensor([0, 3, 4, 5, 2]).to(F.ctx()), torch.tensor([0, 3, 4]).to(F.ctx()), ] for data in datapipe: for step, sampled_subgraph in enumerate(data.sampled_subgraphs): assert torch.equal( sampled_subgraph.original_row_node_ids, original_row_node_ids[step], ) assert torch.equal( sampled_subgraph.sampled_csc.indices, compacted_indices[step] ) assert torch.equal( sampled_subgraph.sampled_csc.indptr, indptr[step] ) assert torch.equal( sampled_subgraph.original_column_node_ids, seeds[step] ) @pytest.mark.parametrize("labor", [False, True]) def test_SubgraphSampler_unique_csc_format_Hetero_Link(labor): graph = get_hetero_graph().to(F.ctx()) itemset = gb.HeteroItemSet( {"n1:e1:n2": gb.ItemSet(torch.tensor([[0, 1]]), names="seeds")} ) item_sampler = gb.ItemSampler(itemset, batch_size=2).copy_to(F.ctx()) num_layer = 2 fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] Sampler = gb.LayerNeighborSampler if labor else gb.NeighborSampler datapipe = Sampler( item_sampler, graph, fanouts, deduplicate=True, ) csc_formats = [ { "n1:e1:n2": gb.CSCFormatBase( indptr=torch.tensor([0, 2, 4, 6]), indices=torch.tensor([1, 0, 0, 1, 0, 1]), ), "n2:e2:n1": gb.CSCFormatBase( indptr=torch.tensor([0, 2, 4]), indices=torch.tensor([1, 2, 1, 0]), ), }, { "n1:e1:n2": gb.CSCFormatBase( indptr=torch.tensor([0, 2]), indices=torch.tensor([1, 0]), ), "n2:e2:n1": gb.CSCFormatBase( indptr=torch.tensor([0, 2]), indices=torch.tensor([1, 2], dtype=torch.int64), ), }, ] original_column_node_ids = [ { "n1": torch.tensor([0, 1]), "n2": torch.tensor([0, 1, 2]), }, { "n1": torch.tensor([0]), "n2": torch.tensor([1]), }, ] original_row_node_ids = [ { "n1": torch.tensor([0, 1]), "n2": torch.tensor([0, 1, 2]), }, { "n1": torch.tensor([0, 1]), "n2": torch.tensor([0, 1, 2]), }, ] for data in datapipe: for step, sampled_subgraph in enumerate(data.sampled_subgraphs): for ntype in ["n1", "n2"]: assert torch.equal( torch.sort(sampled_subgraph.original_row_node_ids[ntype])[ 0 ], original_row_node_ids[step][ntype].to(F.ctx()), ) assert torch.equal( torch.sort( sampled_subgraph.original_column_node_ids[ntype] )[0], original_column_node_ids[step][ntype].to(F.ctx()), ) for etype in ["n1:e1:n2", "n2:e2:n1"]: assert torch.equal( sampled_subgraph.sampled_csc[etype].indices, csc_formats[step][etype].indices.to(F.ctx()), ) assert torch.equal( sampled_subgraph.sampled_csc[etype].indptr, csc_formats[step][etype].indptr.to(F.ctx()), ) @pytest.mark.parametrize( "sampler_type", [ SamplerType.Normal, SamplerType.Layer, SamplerType.Temporal, SamplerType.TemporalLayer, ], ) def test_SubgraphSampler_without_deduplication_Homo_HyperLink(sampler_type): graph = dgl.graph( ([5, 0, 1, 5, 6, 7, 2, 2, 4], [0, 1, 2, 2, 2, 2, 3, 4, 4]) ) graph = gb.from_dglgraph(graph, True).to(F.ctx()) items = torch.LongTensor([[0, 1, 4], [3, 5, 6]]) names = "seeds" if _is_temporal(sampler_type): graph.node_attributes = { "timestamp": torch.zeros( graph.csc_indptr.numel() - 1, dtype=torch.int64 ).to(F.ctx()) } graph.edge_attributes = { "timestamp": torch.zeros( graph.indices.numel(), dtype=torch.int64 ).to(F.ctx()) } items = (items, torch.randint(1, 10, (2,))) names = (names, "timestamp") itemset = gb.ItemSet(items, names=names) item_sampler = gb.ItemSampler(itemset, batch_size=2).copy_to(F.ctx()) num_layer = 2 fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] sampler = _get_sampler(sampler_type) if _is_temporal(sampler_type): datapipe = sampler(item_sampler, graph, fanouts) else: datapipe = sampler(item_sampler, graph, fanouts, deduplicate=False) length = [23, 11] compacted_indices = [ (torch.arange(0, 12) + 11).to(F.ctx()), (torch.arange(0, 5) + 6).to(F.ctx()), ] indptr = [ torch.tensor([0, 1, 2, 4, 5, 5, 5, 5, 6, 8, 10, 12]).to(F.ctx()), torch.tensor([0, 1, 2, 4, 5, 5, 5]).to(F.ctx()), ] seeds = [ torch.tensor([0, 0, 1, 2, 2, 3, 4, 4, 5, 5, 6]).to(F.ctx()), torch.tensor([0, 1, 3, 4, 5, 6]).to(F.ctx()), ] for data in datapipe: for step, sampled_subgraph in enumerate(data.sampled_subgraphs): assert len(sampled_subgraph.original_row_node_ids) == length[step] assert torch.equal( sampled_subgraph.sampled_csc.indices, compacted_indices[step] ) assert torch.equal( sampled_subgraph.sampled_csc.indptr, indptr[step] ) assert torch.equal( torch.sort(sampled_subgraph.original_column_node_ids)[0], seeds[step], ) @pytest.mark.parametrize( "sampler_type", [ SamplerType.Normal, SamplerType.Layer, SamplerType.Temporal, SamplerType.TemporalLayer, ], ) def test_SubgraphSampler_without_deduplication_Hetero_HyperLink(sampler_type): graph = get_hetero_graph().to(F.ctx()) items = torch.arange(3).view(1, 3) names = "seeds" if _is_temporal(sampler_type): graph.node_attributes = { "timestamp": torch.zeros( graph.csc_indptr.numel() - 1, dtype=torch.int64 ).to(F.ctx()) } graph.edge_attributes = { "timestamp": torch.zeros( graph.indices.numel(), dtype=torch.int64 ).to(F.ctx()) } items = (items, torch.randint(1, 10, (1,))) names = (names, "timestamp") itemset = gb.HeteroItemSet({"n2:n1:n2": gb.ItemSet(items, names=names)}) item_sampler = gb.ItemSampler(itemset, batch_size=2).copy_to(F.ctx()) num_layer = 2 fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] sampler = _get_sampler(sampler_type) if _is_temporal(sampler_type): datapipe = sampler(item_sampler, graph, fanouts) else: datapipe = sampler(item_sampler, graph, fanouts, deduplicate=False) csc_formats = [ { "n1:e1:n2": gb.CSCFormatBase( indptr=torch.tensor([0, 2, 4, 6, 8]), indices=torch.tensor([5, 6, 7, 8, 9, 10, 11, 12]), ), "n2:e2:n1": gb.CSCFormatBase( indptr=torch.tensor([0, 2, 4, 6, 8, 10]), indices=torch.tensor([4, 5, 6, 7, 8, 9, 10, 11, 12, 13]), ), }, { "n1:e1:n2": gb.CSCFormatBase( indptr=torch.tensor([0, 2, 4]), indices=torch.tensor([1, 2, 3, 4]), ), "n2:e2:n1": gb.CSCFormatBase( indptr=torch.tensor([0, 2]), indices=torch.tensor([2, 3], dtype=torch.int64), ), }, ] original_column_node_ids = [ { "n1": torch.tensor([1, 0, 1, 0, 1]), "n2": torch.tensor([0, 2, 0, 1]), }, { "n1": torch.tensor([1]), "n2": torch.tensor([0, 2]), }, ] original_row_node_ids = [ { "n1": torch.tensor([1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0]), "n2": torch.tensor([0, 2, 0, 1, 0, 1, 0, 2, 0, 1, 0, 2, 0, 1]), }, { "n1": torch.tensor([1, 0, 1, 0, 1]), "n2": torch.tensor([0, 2, 0, 1]), }, ] for data in datapipe: for step, sampled_subgraph in enumerate(data.sampled_subgraphs): for ntype in ["n1", "n2"]: assert torch.equal( sampled_subgraph.original_row_node_ids[ntype], original_row_node_ids[step][ntype].to(F.ctx()), ) assert torch.equal( sampled_subgraph.original_column_node_ids[ntype], original_column_node_ids[step][ntype].to(F.ctx()), ) for etype in ["n1:e1:n2", "n2:e2:n1"]: assert torch.equal( sampled_subgraph.sampled_csc[etype].indices, csc_formats[step][etype].indices.to(F.ctx()), ) assert torch.equal( sampled_subgraph.sampled_csc[etype].indptr, csc_formats[step][etype].indptr.to(F.ctx()), ) @unittest.skipIf( F._default_context_str == "gpu", reason="Fails due to different result on the GPU.", ) @pytest.mark.parametrize("labor", [False, True]) def test_SubgraphSampler_unique_csc_format_Homo_HyperLink_cpu(labor): torch.manual_seed(1205) graph = dgl.graph(([5, 0, 6, 7, 2, 2, 4], [0, 1, 2, 2, 3, 4, 4])) graph = gb.from_dglgraph(graph, True).to(F.ctx()) seed_nodes = torch.LongTensor([[0, 3, 3], [4, 4, 4]]) itemset = gb.ItemSet(seed_nodes, names="seeds") item_sampler = gb.ItemSampler(itemset, batch_size=4).copy_to(F.ctx()) num_layer = 2 fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] Sampler = gb.LayerNeighborSampler if labor else gb.NeighborSampler datapipe = Sampler( item_sampler, graph, fanouts, deduplicate=True, ) original_row_node_ids = [ torch.tensor([0, 3, 4, 5, 2, 6, 7]).to(F.ctx()), torch.tensor([0, 3, 4, 5, 2]).to(F.ctx()), ] compacted_indices = [ torch.tensor([3, 4, 4, 2, 5, 6]).to(F.ctx()), torch.tensor([3, 4, 4, 2]).to(F.ctx()), ] indptr = [ torch.tensor([0, 1, 2, 4, 4, 6]).to(F.ctx()), torch.tensor([0, 1, 2, 4]).to(F.ctx()), ] seeds = [ torch.tensor([0, 3, 4, 5, 2]).to(F.ctx()), torch.tensor([0, 3, 4]).to(F.ctx()), ] for data in datapipe: for step, sampled_subgraph in enumerate(data.sampled_subgraphs): assert torch.equal( sampled_subgraph.original_row_node_ids, original_row_node_ids[step], ) assert torch.equal( sampled_subgraph.sampled_csc.indices, compacted_indices[step] ) assert torch.equal( sampled_subgraph.sampled_csc.indptr, indptr[step] ) assert torch.equal( sampled_subgraph.original_column_node_ids, seeds[step] ) @unittest.skipIf( F._default_context_str == "cpu", reason="Fails due to different result on the CPU.", ) @pytest.mark.parametrize("labor", [False, True]) def test_SubgraphSampler_unique_csc_format_Homo_HyperLink_gpu(labor): torch.manual_seed(1205) graph = dgl.graph(([5, 0, 7, 7, 2, 4], [0, 1, 2, 2, 3, 4])) graph = gb.from_dglgraph(graph, is_homogeneous=True).to(F.ctx()) seed_nodes = torch.LongTensor([[0, 3, 4], [4, 4, 3]]) itemset = gb.ItemSet(seed_nodes, names="seeds") item_sampler = gb.ItemSampler(itemset, batch_size=4).copy_to(F.ctx()) num_layer = 2 fanouts = [torch.LongTensor([-1]) for _ in range(num_layer)] Sampler = gb.LayerNeighborSampler if labor else gb.NeighborSampler datapipe = Sampler( item_sampler, graph, fanouts, deduplicate=True, ) if torch.cuda.get_device_capability()[0] < 7: original_row_node_ids = [ torch.tensor([0, 3, 4, 2, 5, 7]).to(F.ctx()), torch.tensor([0, 3, 4, 2, 5]).to(F.ctx()), ] compacted_indices = [ torch.tensor([4, 3, 2, 5, 5]).to(F.ctx()), torch.tensor([4, 3, 2]).to(F.ctx()), ] indptr = [ torch.tensor([0, 1, 2, 3, 5, 5]).to(F.ctx()), torch.tensor([0, 1, 2, 3]).to(F.ctx()), ] seeds = [ torch.tensor([0, 3, 4, 2, 5]).to(F.ctx()), torch.tensor([0, 3, 4]).to(F.ctx()), ] else: original_row_node_ids = [ torch.tensor([0, 3, 4, 5, 2, 7]).to(F.ctx()), torch.tensor([0, 3, 4, 5, 2]).to(F.ctx()), ] compacted_indices = [ torch.tensor([3, 4, 2, 5, 5]).to(F.ctx()), torch.tensor([3, 4, 2]).to(F.ctx()), ] indptr = [ torch.tensor([0, 1, 2, 3, 3, 5]).to(F.ctx()), torch.tensor([0, 1, 2, 3]).to(F.ctx()), ] seeds = [ torch.tensor([0, 3, 4, 5, 2]).to(F.ctx()), torch.tensor([0, 3, 4]).to(F.ctx()), ] for data in datapipe: for step, sampled_subgraph in enumerate(data.sampled_subgraphs): assert torch.equal( sampled_subgraph.original_row_node_ids, original_row_node_ids[step], ) assert torch.equal( sampled_subgraph.sampled_csc.indices, compacted_indices[step] ) assert torch.equal( sampled_subgraph.sampled_csc.indptr, indptr[step] ) assert torch.equal( sampled_subgraph.original_column_node_ids, seeds[step] ) @pytest.mark.parametrize("labor", [False, True]) def test_SubgraphSampler_unique_csc_format_Hetero_HyperLink(labor): graph = get_hetero_graph().to(F.ctx()) itemset = gb.HeteroItemSet( {"n1:n2:n1": gb.ItemSet(torch.tensor([[0, 1, 0]]), names="seeds")} ) item_sampler = gb.ItemSampler(itemset, batch_size=2).copy_to(F.ctx()) num_layer = 2 fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] Sampler = gb.LayerNeighborSampler if labor else gb.NeighborSampler datapipe = Sampler( item_sampler, graph, fanouts, deduplicate=True, ) csc_formats = [ { "n1:e1:n2": gb.CSCFormatBase( indptr=torch.tensor([0, 2, 4, 6]), indices=torch.tensor([1, 0, 0, 1, 0, 1]), ), "n2:e2:n1": gb.CSCFormatBase( indptr=torch.tensor([0, 2, 4]), indices=torch.tensor([1, 2, 1, 0]), ), }, { "n1:e1:n2": gb.CSCFormatBase( indptr=torch.tensor([0, 2]), indices=torch.tensor([1, 0]), ), "n2:e2:n1": gb.CSCFormatBase( indptr=torch.tensor([0, 2]), indices=torch.tensor([1, 2], dtype=torch.int64), ), }, ] original_column_node_ids = [ { "n1": torch.tensor([0, 1]), "n2": torch.tensor([0, 1, 2]), }, { "n1": torch.tensor([0]), "n2": torch.tensor([1]), }, ] original_row_node_ids = [ { "n1": torch.tensor([0, 1]), "n2": torch.tensor([0, 1, 2]), }, { "n1": torch.tensor([0, 1]), "n2": torch.tensor([0, 1, 2]), }, ] for data in datapipe: for step, sampled_subgraph in enumerate(data.sampled_subgraphs): for ntype in ["n1", "n2"]: assert torch.equal( torch.sort(sampled_subgraph.original_row_node_ids[ntype])[ 0 ], original_row_node_ids[step][ntype].to(F.ctx()), ) assert torch.equal( torch.sort( sampled_subgraph.original_column_node_ids[ntype] )[0], original_column_node_ids[step][ntype].to(F.ctx()), ) for etype in ["n1:e1:n2", "n2:e2:n1"]: assert torch.equal( sampled_subgraph.sampled_csc[etype].indices, csc_formats[step][etype].indices.to(F.ctx()), ) assert torch.equal( sampled_subgraph.sampled_csc[etype].indptr, csc_formats[step][etype].indptr.to(F.ctx()), ) ================================================ FILE: tests/python/pytorch/graphbolt/test_utils.py ================================================ import re import unittest from functools import partial import backend as F import dgl import dgl.graphbolt as gb import pytest import torch def test_add_reverse_edges_homo(): edges = torch.tensor([[0, 1, 2, 3], [4, 5, 6, 7]]).T combined_edges = gb.add_reverse_edges(edges) assert torch.equal( combined_edges, torch.tensor([[0, 1, 2, 3, 4, 5, 6, 7], [4, 5, 6, 7, 0, 1, 2, 3]]).T, ) # Tensor with uncorrect dimensions. edges = torch.tensor([0, 1, 2, 3]) with pytest.raises( AssertionError, match=re.escape( "Only tensor with shape N*2 is supported now, but got torch.Size([4])." ), ): gb.add_reverse_edges(edges) def test_add_reverse_edges_hetero(): # reverse_etype doesn't exist in original etypes. edges = {"n1:e1:n2": torch.tensor([[0, 1, 2], [4, 5, 6]]).T} reverse_etype_mapping = {"n1:e1:n2": "n2:e2:n1"} combined_edges = gb.add_reverse_edges(edges, reverse_etype_mapping) assert torch.equal( combined_edges["n1:e1:n2"], torch.tensor([[0, 1, 2], [4, 5, 6]]).T ) assert torch.equal( combined_edges["n2:e2:n1"], torch.tensor([[4, 5, 6], [0, 1, 2]]).T ) # reverse_etype exists in original etypes. edges = { "n1:e1:n2": torch.tensor([[0, 1, 2], [4, 5, 6]]).T, "n2:e2:n1": torch.tensor([[7, 8, 9], [10, 11, 12]]).T, } reverse_etype_mapping = {"n1:e1:n2": "n2:e2:n1"} combined_edges = gb.add_reverse_edges(edges, reverse_etype_mapping) assert torch.equal( combined_edges["n1:e1:n2"], torch.tensor([[0, 1, 2], [4, 5, 6]]).T ) assert torch.equal( combined_edges["n2:e2:n1"], torch.tensor([[7, 8, 9, 4, 5, 6], [10, 11, 12, 0, 1, 2]]).T, ) # Tensor with uncorrect dimensions. edges = { "n1:e1:n2": torch.tensor([0, 1, 2]), "n2:e2:n1": torch.tensor([7, 8, 9]), } with pytest.raises( AssertionError, match=re.escape( "Only tensor with shape N*2 is supported now, but got torch.Size([3])." ), ): gb.add_reverse_edges(edges, reverse_etype_mapping) @unittest.skipIf( F._default_context_str == "gpu", reason="Fails due to different result on the GPU.", ) @pytest.mark.parametrize("use_datapipe", [False, True]) def test_exclude_seed_edges_homo_cpu(use_datapipe): graph = dgl.graph(([5, 0, 6, 7, 2, 2, 4], [0, 1, 2, 2, 3, 4, 4])) graph = gb.from_dglgraph(graph, True).to(F.ctx()) items = torch.LongTensor([[0, 3], [4, 4]]) names = "seeds" itemset = gb.ItemSet(items, names=names) datapipe = gb.ItemSampler(itemset, batch_size=2).copy_to(F.ctx()) num_layer = 2 fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] sampler = gb.NeighborSampler datapipe = sampler(datapipe, graph, fanouts) if use_datapipe: datapipe = datapipe.exclude_seed_edges() else: datapipe = datapipe.transform(partial(gb.exclude_seed_edges)) original_row_node_ids = [ torch.tensor([0, 3, 4, 5, 2, 6, 7]).to(F.ctx()), torch.tensor([0, 3, 4, 5, 2]).to(F.ctx()), ] compacted_indices = [ torch.tensor([3, 4, 4, 5, 6]).to(F.ctx()), torch.tensor([3, 4, 4]).to(F.ctx()), ] indptr = [ torch.tensor([0, 1, 2, 3, 3, 5]).to(F.ctx()), torch.tensor([0, 1, 2, 3]).to(F.ctx()), ] seeds = [ torch.tensor([0, 3, 4, 5, 2]).to(F.ctx()), torch.tensor([0, 3, 4]).to(F.ctx()), ] for data in datapipe: for step, sampled_subgraph in enumerate(data.sampled_subgraphs): assert torch.equal( sampled_subgraph.original_row_node_ids, original_row_node_ids[step], ) assert torch.equal( sampled_subgraph.sampled_csc.indices, compacted_indices[step] ) assert torch.equal( sampled_subgraph.sampled_csc.indptr, indptr[step] ) assert torch.equal( sampled_subgraph.original_column_node_ids, seeds[step] ) @unittest.skipIf( F._default_context_str == "cpu", reason="Fails due to different result on the CPU.", ) @pytest.mark.parametrize("use_datapipe", [False, True]) @pytest.mark.parametrize("async_op", [False, True]) def test_exclude_seed_edges_gpu(use_datapipe, async_op): graph = dgl.graph(([5, 0, 7, 7, 2, 4], [0, 1, 2, 2, 3, 4])) graph = gb.from_dglgraph(graph, is_homogeneous=True).to(F.ctx()) items = torch.LongTensor([[0, 3], [4, 4]]) names = "seeds" itemset = gb.ItemSet(items, names=names) datapipe = gb.ItemSampler(itemset, batch_size=4).copy_to(F.ctx()) num_layer = 2 fanouts = [torch.LongTensor([-1]) for _ in range(num_layer)] sampler = gb.NeighborSampler datapipe = sampler( datapipe, graph, fanouts, deduplicate=True, ) if use_datapipe: datapipe = datapipe.exclude_seed_edges(asynchronous=async_op) else: datapipe = datapipe.transform( partial(gb.exclude_seed_edges, async_op=async_op) ) if torch.cuda.get_device_capability()[0] < 7: original_row_node_ids = [ torch.tensor([0, 3, 4, 2, 5, 7]).to(F.ctx()), torch.tensor([0, 3, 4, 2, 5]).to(F.ctx()), ] compacted_indices = [ torch.tensor([4, 3, 5, 5]).to(F.ctx()), torch.tensor([4, 3]).to(F.ctx()), ] indptr = [ torch.tensor([0, 1, 2, 2, 5, 5]).to(F.ctx()), torch.tensor([0, 1, 2, 2]).to(F.ctx()), ] seeds = [ torch.tensor([0, 3, 4, 2, 5]).to(F.ctx()), torch.tensor([0, 3, 4]).to(F.ctx()), ] else: original_row_node_ids = [ torch.tensor([0, 3, 4, 5, 2, 7]).to(F.ctx()), torch.tensor([0, 3, 4, 5, 2]).to(F.ctx()), ] compacted_indices = [ torch.tensor([3, 4, 5, 5]).to(F.ctx()), torch.tensor([3, 4]).to(F.ctx()), ] indptr = [ torch.tensor([0, 1, 2, 2, 2, 4]).to(F.ctx()), torch.tensor([0, 1, 2, 2]).to(F.ctx()), ] seeds = [ torch.tensor([0, 3, 4, 5, 2]).to(F.ctx()), torch.tensor([0, 3, 4]).to(F.ctx()), ] for data in datapipe: for step, sampled_subgraph in enumerate(data.sampled_subgraphs): if async_op and not use_datapipe: sampled_subgraph = sampled_subgraph.wait() assert torch.equal( sampled_subgraph.original_row_node_ids, original_row_node_ids[step], ) assert torch.equal( (sampled_subgraph.sampled_csc.indices), compacted_indices[step] ) assert torch.equal( sampled_subgraph.sampled_csc.indptr, indptr[step] ) assert torch.equal( sampled_subgraph.original_column_node_ids, seeds[step] ) def get_hetero_graph(): # COO graph: # [0, 0, 1, 1, 2, 2, 3, 3, 4, 4] # [2, 4, 2, 3, 0, 1, 1, 0, 0, 1] # [1, 1, 1, 1, 0, 0, 0, 0, 0] - > edge type. # num_nodes = 5, num_n1 = 2, num_n2 = 3 ntypes = {"n1": 0, "n2": 1} etypes = {"n1:e1:n2": 0, "n2:e2:n1": 1} indptr = torch.LongTensor([0, 2, 4, 6, 8, 10]) indices = torch.LongTensor([2, 4, 2, 3, 0, 1, 1, 0, 0, 1]) type_per_edge = torch.LongTensor([1, 1, 1, 1, 0, 0, 0, 0, 0, 0]) node_type_offset = torch.LongTensor([0, 2, 5]) return gb.fused_csc_sampling_graph( indptr, indices, node_type_offset=node_type_offset, type_per_edge=type_per_edge, node_type_to_id=ntypes, edge_type_to_id=etypes, ) def test_exclude_seed_edges_hetero(): graph = get_hetero_graph().to(F.ctx()) itemset = gb.HeteroItemSet( {"n1:e1:n2": gb.ItemSet(torch.tensor([[0, 1]]), names="seeds")} ) item_sampler = gb.ItemSampler(itemset, batch_size=2).copy_to(F.ctx()) num_layer = 2 fanouts = [torch.LongTensor([2]) for _ in range(num_layer)] Sampler = gb.NeighborSampler datapipe = Sampler( item_sampler, graph, fanouts, deduplicate=True, ) datapipe = datapipe.transform(partial(gb.exclude_seed_edges)) csc_formats = [ { "n1:e1:n2": gb.CSCFormatBase( indptr=torch.tensor([0, 1, 3, 5]), indices=torch.tensor([1, 0, 1, 0, 1]), ), "n2:e2:n1": gb.CSCFormatBase( indptr=torch.tensor([0, 2, 4]), indices=torch.tensor([1, 2, 1, 0]), ), }, { "n1:e1:n2": gb.CSCFormatBase( indptr=torch.tensor([0, 1]), indices=torch.tensor([1]), ), "n2:e2:n1": gb.CSCFormatBase( indptr=torch.tensor([0, 2]), indices=torch.tensor([1, 2], dtype=torch.int64), ), }, ] original_column_node_ids = [ { "n1": torch.tensor([0, 1]), "n2": torch.tensor([0, 1, 2]), }, { "n1": torch.tensor([0]), "n2": torch.tensor([1]), }, ] original_row_node_ids = [ { "n1": torch.tensor([0, 1]), "n2": torch.tensor([0, 1, 2]), }, { "n1": torch.tensor([0, 1]), "n2": torch.tensor([0, 1, 2]), }, ] for data in datapipe: for step, sampled_subgraph in enumerate(data.sampled_subgraphs): for ntype in ["n1", "n2"]: assert torch.equal( torch.sort(sampled_subgraph.original_row_node_ids[ntype])[ 0 ], original_row_node_ids[step][ntype].to(F.ctx()), ) assert torch.equal( torch.sort( sampled_subgraph.original_column_node_ids[ntype] )[0], original_column_node_ids[step][ntype].to(F.ctx()), ) for etype in ["n1:e1:n2", "n2:e2:n1"]: assert torch.equal( sampled_subgraph.sampled_csc[etype].indices, csc_formats[step][etype].indices.to(F.ctx()), ) assert torch.equal( sampled_subgraph.sampled_csc[etype].indptr, csc_formats[step][etype].indptr.to(F.ctx()), ) ================================================ FILE: tests/python/pytorch/ip_config.txt ================================================ 0 127.0.0.1 40050 1 127.0.0.1 40051 2 127.0.0.1 40052 3 127.0.0.1 40053 ================================================ FILE: tests/python/pytorch/mpops/test_edgewise.py ================================================ import random import backend as F import dgl import numpy as np import pytest import torch from utils import parametrize_idtype random.seed(42) np.random.seed(42) dgl.seed(42) torch.random.manual_seed(42) @parametrize_idtype @pytest.mark.parametrize("feat_size", [(5,), ()]) def test_copy_u(idtype, feat_size): ctx = F.ctx() g = dgl.rand_graph(30, 100) g = g.astype(idtype).to(ctx) x = torch.randn( (g.num_nodes(),) + feat_size, requires_grad=True, device=ctx ) y = dgl.copy_u(g, x) y.sum().backward() x_grad = x.grad x.grad.zero_() u, v = g.edges() y_true = x[u.long()] y_true.sum().backward() x_grad_true = x.grad assert torch.allclose(y, y_true) assert torch.allclose(x_grad, x_grad_true) @parametrize_idtype @pytest.mark.parametrize("feat_size", [(5,), ()]) def test_copy_u_hetero(idtype, feat_size): ctx = F.ctx() hg = dgl.heterograph( { ("user", "follow", "user"): ([0, 1, 2], [2, 3, 4]), ("user", "like", "movie"): ([3, 3, 1, 2], [0, 0, 1, 1]), } ) hg = hg.astype(idtype).to(ctx) x = torch.randn( (hg.num_nodes("user"),) + feat_size, requires_grad=True, device=ctx ) y = dgl.copy_u(hg, x, etype="like") y.sum().backward() x_grad = x.grad x.grad.zero_() u, v = hg.edges(etype="like") y_true = x[u.long()] y_true.sum().backward() x_grad_true = x.grad assert torch.allclose(y, y_true) assert torch.allclose(x_grad, x_grad_true) @parametrize_idtype @pytest.mark.parametrize("feat_size", [(5,), ()]) def test_copy_v(idtype, feat_size): ctx = F.ctx() g = dgl.rand_graph(30, 100) g = g.astype(idtype).to(ctx) x = torch.randn( (g.num_nodes(),) + feat_size, requires_grad=True, device=ctx ) y = dgl.copy_v(g, x) y.sum().backward() x_grad = x.grad x.grad.zero_() u, v = g.edges() y_true = x[v.long()] y_true.sum().backward() x_grad_true = x.grad assert torch.allclose(y, y_true) assert torch.allclose(x_grad, x_grad_true) @parametrize_idtype @pytest.mark.parametrize("feat_size", [(5,), ()]) def test_copy_v_hetero(idtype, feat_size): ctx = F.ctx() hg = dgl.heterograph( { ("user", "follow", "user"): ([0, 1, 2], [2, 3, 4]), ("user", "like", "movie"): ([3, 3, 1, 2], [0, 0, 1, 1]), } ) hg = hg.astype(idtype).to(ctx) x = torch.randn( (hg.num_nodes("movie"),) + feat_size, requires_grad=True, device=ctx ) y = dgl.copy_v(hg, x, etype="like") y.sum().backward() x_grad = x.grad x.grad.zero_() u, v = hg.edges(etype="like") y_true = x[v.long()] y_true.sum().backward() x_grad_true = x.grad assert torch.allclose(y, y_true) assert torch.allclose(x_grad, x_grad_true) binary_arg_sizes = [ ((5,), (5,)), ((5,), ()), ((), (5,)), ((1, 3, 3), (4, 1, 3)), ((3, 3), (4, 1, 3)), ((4, 1, 3), (3, 3)), ] dot_arg_sizes = [ ((5,), (5,)), ((1, 3, 3), (4, 1, 3)), ((3, 3), (4, 1, 3)), ((4, 1, 3), (3, 3)), ] ops = ["add", "sub", "mul", "div"] def pad_shape(x, y, x_size, y_size): xy_size = torch.broadcast_shapes(x_size, y_size) new_x_size = (1,) * (len(xy_size) - len(x_size)) + x_size new_y_size = (1,) * (len(xy_size) - len(y_size)) + y_size new_x = x.view(-1, *new_x_size) new_y = y.view(-1, *new_y_size) return new_x, new_y @parametrize_idtype @pytest.mark.parametrize("op", ops) @pytest.mark.parametrize("x_size,y_size", binary_arg_sizes) def test_u_op_v(idtype, op, x_size, y_size): ctx = F.ctx() g = dgl.rand_graph(30, 100) g = g.astype(idtype).to(ctx) x = torch.randn((g.num_nodes(),) + x_size, requires_grad=True, device=ctx) y = torch.randn((g.num_nodes(),) + y_size, requires_grad=True, device=ctx) f_dgl = getattr(dgl, f"u_{op}_v") z = f_dgl(g, x, y) z.sum().backward() x_grad = x.grad y_grad = y.grad x_grad.zero_() y_grad.zero_() u, v = g.edges() f_torch = getattr(torch, op) x_u, y_v = pad_shape(x[u.long()], y[v.long()], x_size, y_size) z_true = f_torch(x_u, y_v) z_true.sum().backward() x_grad_true = x.grad y_grad_true = y.grad assert torch.allclose(z, z_true) assert torch.allclose(x_grad, x_grad_true) assert torch.allclose(y_grad, y_grad_true) @parametrize_idtype @pytest.mark.parametrize("x_size,y_size", dot_arg_sizes) def test_u_dot_v(idtype, x_size, y_size): ctx = F.ctx() g = dgl.rand_graph(30, 100) g = g.astype(idtype).to(ctx) x = torch.randn((g.num_nodes(),) + x_size, requires_grad=True, device=ctx) y = torch.randn((g.num_nodes(),) + y_size, requires_grad=True, device=ctx) z = dgl.u_dot_v(g, x, y) z.sum().backward() x_grad = x.grad y_grad = y.grad x_grad.zero_() y_grad.zero_() u, v = g.edges() x_u, y_v = pad_shape(x[u.long()], y[v.long()], x_size, y_size) z_true = (x_u * y_v).sum(-1).unsqueeze(-1) z_true.sum().backward() x_grad_true = x.grad y_grad_true = y.grad assert torch.allclose(z, z_true, atol=1e-4, rtol=1e-4) assert torch.allclose(x_grad, x_grad_true) assert torch.allclose(y_grad, y_grad_true) ================================================ FILE: tests/python/pytorch/nn/conv/test_gatedgcnconv.py ================================================ import io import backend as F import dgl.nn.pytorch as nn import pytest from utils import parametrize_idtype from utils.graph_cases import get_cases tmp_buffer = io.BytesIO() @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo"], exclude=["zero-degree"])) def test_gatedgcn_conv(g, idtype): ctx = F.ctx() g = g.astype(idtype).to(ctx) gatedgcnconv = nn.GatedGCNConv(10, 10, 5) feat = F.randn((g.num_nodes(), 10)) efeat = F.randn((g.num_edges(), 10)) gatedgcnconv = gatedgcnconv.to(ctx) h, edge_h = gatedgcnconv(g, feat, efeat) # current we only do shape check assert h.shape == (g.number_of_dst_nodes(), 5) assert edge_h.shape == (g.number_of_edges(), 5) ================================================ FILE: tests/python/pytorch/nn/test_nn.py ================================================ import io import pickle import random import re from copy import deepcopy import backend as F import dgl import dgl.function as fn import dgl.nn.pytorch as nn import networkx as nx import numpy as np # For setting seed for scipy import pytest import scipy as sp import torch import torch as th from dgl import shortest_dist from torch.nn.utils.rnn import pad_sequence from torch.optim import Adam, SparseAdam from torch.utils.data import DataLoader from utils import parametrize_idtype from utils.graph_cases import ( get_cases, random_bipartite, random_dglgraph, random_graph, ) # Set seeds to make tests fully reproducible. SEED = 12345 # random.randint(1, 99999) random.seed(SEED) # For networkx np.random.seed(SEED) # For scipy dgl.seed(SEED) F.seed(SEED) tmp_buffer = io.BytesIO() def _AXWb(A, X, W, b): X = th.matmul(X, W) Y = th.matmul(A, X.view(X.shape[0], -1)).view_as(X) return Y + b def graph_with_nodes(num_nodes, ctx=None): g = dgl.from_networkx(nx.path_graph(num_nodes)) return g.to(ctx) if ctx else g @pytest.mark.parametrize("out_dim", [1, 2]) def test_graph_conv0(out_dim): ctx = F.ctx() g = graph_with_nodes(3, ctx) adj = g.adj_external(transpose=True, ctx=ctx) conv = nn.GraphConv(5, out_dim, norm="none", bias=True) conv = conv.to(ctx) print(conv) # test pickle th.save(conv, tmp_buffer) # test#1: basic h0 = F.ones((3, 5)) h1 = conv(g, h0) assert len(g.ndata) == 0 assert len(g.edata) == 0 assert F.allclose(h1, _AXWb(adj, h0, conv.weight, conv.bias)) # test#2: more-dim h0 = F.ones((3, 5, 5)) h1 = conv(g, h0) assert len(g.ndata) == 0 assert len(g.edata) == 0 assert F.allclose(h1, _AXWb(adj, h0, conv.weight, conv.bias)) conv = nn.GraphConv(5, out_dim) conv = conv.to(ctx) # test#3: basic h0 = F.ones((3, 5)) h1 = conv(g, h0) assert len(g.ndata) == 0 assert len(g.edata) == 0 # test#4: basic h0 = F.ones((3, 5, 5)) h1 = conv(g, h0) assert len(g.ndata) == 0 assert len(g.edata) == 0 conv = nn.GraphConv(5, out_dim) conv = conv.to(ctx) # test#3: basic h0 = F.ones((3, 5)) h1 = conv(g, h0) assert len(g.ndata) == 0 assert len(g.edata) == 0 # test#4: basic h0 = F.ones((3, 5, 5)) h1 = conv(g, h0) assert len(g.ndata) == 0 assert len(g.edata) == 0 # test rest_parameters old_weight = deepcopy(conv.weight.data) conv.reset_parameters() new_weight = conv.weight.data assert not F.allclose(old_weight, new_weight) @parametrize_idtype @pytest.mark.parametrize( "g", get_cases(["homo", "bipartite"], exclude=["zero-degree", "dglgraph"]) ) @pytest.mark.parametrize("norm", ["none", "both", "right", "left"]) @pytest.mark.parametrize("weight", [True, False]) @pytest.mark.parametrize("bias", [True, False]) @pytest.mark.parametrize("out_dim", [1, 2]) def test_graph_conv(idtype, g, norm, weight, bias, out_dim): # Test one tensor input g = g.astype(idtype).to(F.ctx()) conv = nn.GraphConv(5, out_dim, norm=norm, weight=weight, bias=bias).to( F.ctx() ) ext_w = F.randn((5, out_dim)).to(F.ctx()) nsrc = g.number_of_src_nodes() ndst = g.number_of_dst_nodes() h = F.randn((nsrc, 5)).to(F.ctx()) if weight: h_out = conv(g, h) else: h_out = conv(g, h, weight=ext_w) assert h_out.shape == (ndst, out_dim) @parametrize_idtype @pytest.mark.parametrize( "g", get_cases(["has_scalar_e_feature"], exclude=["zero-degree", "dglgraph"]), ) @pytest.mark.parametrize("norm", ["none", "both", "right"]) @pytest.mark.parametrize("weight", [True, False]) @pytest.mark.parametrize("bias", [True, False]) @pytest.mark.parametrize("out_dim", [1, 2]) def test_graph_conv_e_weight(idtype, g, norm, weight, bias, out_dim): g = g.astype(idtype).to(F.ctx()) conv = nn.GraphConv(5, out_dim, norm=norm, weight=weight, bias=bias).to( F.ctx() ) ext_w = F.randn((5, out_dim)).to(F.ctx()) nsrc = g.number_of_src_nodes() ndst = g.number_of_dst_nodes() h = F.randn((nsrc, 5)).to(F.ctx()) e_w = g.edata["scalar_w"] if weight: h_out = conv(g, h, edge_weight=e_w) else: h_out = conv(g, h, weight=ext_w, edge_weight=e_w) assert h_out.shape == (ndst, out_dim) @parametrize_idtype @pytest.mark.parametrize( "g", get_cases(["has_scalar_e_feature"], exclude=["zero-degree", "dglgraph"]), ) @pytest.mark.parametrize("norm", ["none", "both", "right"]) @pytest.mark.parametrize("weight", [True, False]) @pytest.mark.parametrize("bias", [True, False]) @pytest.mark.parametrize("out_dim", [1, 2]) def test_graph_conv_e_weight_norm(idtype, g, norm, weight, bias, out_dim): g = g.astype(idtype).to(F.ctx()) conv = nn.GraphConv(5, out_dim, norm=norm, weight=weight, bias=bias).to( F.ctx() ) # test pickle th.save(conv, tmp_buffer) ext_w = F.randn((5, out_dim)).to(F.ctx()) nsrc = g.number_of_src_nodes() ndst = g.number_of_dst_nodes() h = F.randn((nsrc, 5)).to(F.ctx()) edgenorm = nn.EdgeWeightNorm(norm=norm) norm_weight = edgenorm(g, g.edata["scalar_w"]) if weight: h_out = conv(g, h, edge_weight=norm_weight) else: h_out = conv(g, h, weight=ext_w, edge_weight=norm_weight) assert h_out.shape == (ndst, out_dim) @parametrize_idtype @pytest.mark.parametrize( "g", get_cases(["bipartite"], exclude=["zero-degree", "dglgraph"]) ) @pytest.mark.parametrize("norm", ["none", "both", "right"]) @pytest.mark.parametrize("weight", [True, False]) @pytest.mark.parametrize("bias", [True, False]) @pytest.mark.parametrize("out_dim", [1, 2]) def test_graph_conv_bi(idtype, g, norm, weight, bias, out_dim): # Test a pair of tensor inputs g = g.astype(idtype).to(F.ctx()) conv = nn.GraphConv(5, out_dim, norm=norm, weight=weight, bias=bias).to( F.ctx() ) # test pickle th.save(conv, tmp_buffer) ext_w = F.randn((5, out_dim)).to(F.ctx()) nsrc = g.number_of_src_nodes() ndst = g.number_of_dst_nodes() h = F.randn((nsrc, 5)).to(F.ctx()) h_dst = F.randn((ndst, out_dim)).to(F.ctx()) if weight: h_out = conv(g, (h, h_dst)) else: h_out = conv(g, (h, h_dst), weight=ext_w) assert h_out.shape == (ndst, out_dim) def _S2AXWb(A, N, X, W, b): X1 = X * N X1 = th.matmul(A, X1.view(X1.shape[0], -1)) X1 = X1 * N X2 = X1 * N X2 = th.matmul(A, X2.view(X2.shape[0], -1)) X2 = X2 * N X = th.cat([X, X1, X2], dim=-1) Y = th.matmul(X, W.rot90()) return Y + b @pytest.mark.parametrize("out_dim", [1, 2]) def test_tagconv(out_dim): ctx = F.ctx() g = graph_with_nodes(3, ctx) adj = g.adj_external(transpose=True, ctx=ctx) norm = th.pow(g.in_degrees().float(), -0.5) conv = nn.TAGConv(5, out_dim, bias=True) conv = conv.to(ctx) print(conv) # test pickle th.save(conv, tmp_buffer) # test#1: basic h0 = F.ones((3, 5)) h1 = conv(g, h0) assert len(g.ndata) == 0 assert len(g.edata) == 0 shp = norm.shape + (1,) * (h0.dim() - 1) norm = th.reshape(norm, shp).to(ctx) assert F.allclose( h1, _S2AXWb(adj, norm, h0, conv.lin.weight, conv.lin.bias) ) conv = nn.TAGConv(5, out_dim) conv = conv.to(ctx) # test#2: basic h0 = F.ones((3, 5)) h1 = conv(g, h0) assert h1.shape[-1] == out_dim # test reset_parameters old_weight = deepcopy(conv.lin.weight.data) conv.reset_parameters() new_weight = conv.lin.weight.data assert not F.allclose(old_weight, new_weight) def test_set2set(): ctx = F.ctx() g = graph_with_nodes(10, ctx) s2s = nn.Set2Set(5, 3, 3) # hidden size 5, 3 iters, 3 layers s2s = s2s.to(ctx) print(s2s) # test#1: basic h0 = F.randn((g.num_nodes(), 5)) h1 = s2s(g, h0) assert h1.shape[0] == 1 and h1.shape[1] == 10 and h1.dim() == 2 # test#2: batched graph g1 = graph_with_nodes(11, ctx) g2 = graph_with_nodes(5, ctx) bg = dgl.batch([g, g1, g2]) h0 = F.randn((bg.num_nodes(), 5)) h1 = s2s(bg, h0) assert h1.shape[0] == 3 and h1.shape[1] == 10 and h1.dim() == 2 def test_glob_att_pool(): ctx = F.ctx() g = graph_with_nodes(10, ctx) gap = nn.GlobalAttentionPooling(th.nn.Linear(5, 1), th.nn.Linear(5, 10)) gap = gap.to(ctx) print(gap) # test pickle th.save(gap, tmp_buffer) # test#1: basic h0 = F.randn((g.num_nodes(), 5)) h1 = gap(g, h0) assert h1.shape[0] == 1 and h1.shape[1] == 10 and h1.dim() == 2 # test#2: batched graph bg = dgl.batch([g, g, g, g]) h0 = F.randn((bg.num_nodes(), 5)) h1 = gap(bg, h0) assert h1.shape[0] == 4 and h1.shape[1] == 10 and h1.dim() == 2 def test_simple_pool(): ctx = F.ctx() g = graph_with_nodes(15, ctx) sum_pool = nn.SumPooling() avg_pool = nn.AvgPooling() max_pool = nn.MaxPooling() sort_pool = nn.SortPooling(10) # k = 10 print(sum_pool, avg_pool, max_pool, sort_pool) # test#1: basic h0 = F.randn((g.num_nodes(), 5)) sum_pool = sum_pool.to(ctx) avg_pool = avg_pool.to(ctx) max_pool = max_pool.to(ctx) sort_pool = sort_pool.to(ctx) h1 = sum_pool(g, h0) assert F.allclose(F.squeeze(h1, 0), F.sum(h0, 0)) h1 = avg_pool(g, h0) assert F.allclose(F.squeeze(h1, 0), F.mean(h0, 0)) h1 = max_pool(g, h0) assert F.allclose(F.squeeze(h1, 0), F.max(h0, 0)) h1 = sort_pool(g, h0) assert h1.shape[0] == 1 and h1.shape[1] == 10 * 5 and h1.dim() == 2 # test#2: batched graph g_ = graph_with_nodes(5, ctx) bg = dgl.batch([g, g_, g, g_, g]) h0 = F.randn((bg.num_nodes(), 5)) h1 = sum_pool(bg, h0) truth = th.stack( [ F.sum(h0[:15], 0), F.sum(h0[15:20], 0), F.sum(h0[20:35], 0), F.sum(h0[35:40], 0), F.sum(h0[40:55], 0), ], 0, ) assert F.allclose(h1, truth) h1 = avg_pool(bg, h0) truth = th.stack( [ F.mean(h0[:15], 0), F.mean(h0[15:20], 0), F.mean(h0[20:35], 0), F.mean(h0[35:40], 0), F.mean(h0[40:55], 0), ], 0, ) assert F.allclose(h1, truth) h1 = max_pool(bg, h0) truth = th.stack( [ F.max(h0[:15], 0), F.max(h0[15:20], 0), F.max(h0[20:35], 0), F.max(h0[35:40], 0), F.max(h0[40:55], 0), ], 0, ) assert F.allclose(h1, truth) h1 = sort_pool(bg, h0) assert h1.shape[0] == 5 and h1.shape[1] == 10 * 5 and h1.dim() == 2 def test_set_trans(): ctx = F.ctx() g = graph_with_nodes(15) st_enc_0 = nn.SetTransformerEncoder(50, 5, 10, 100, 2, "sab") st_enc_1 = nn.SetTransformerEncoder(50, 5, 10, 100, 2, "isab", 3) st_dec = nn.SetTransformerDecoder(50, 5, 10, 100, 2, 4) st_enc_0 = st_enc_0.to(ctx) st_enc_1 = st_enc_1.to(ctx) st_dec = st_dec.to(ctx) print(st_enc_0, st_enc_1, st_dec) # test#1: basic h0 = F.randn((g.num_nodes(), 50)) h1 = st_enc_0(g, h0) assert h1.shape == h0.shape h1 = st_enc_1(g, h0) assert h1.shape == h0.shape h2 = st_dec(g, h1) assert h2.shape[0] == 1 and h2.shape[1] == 200 and h2.dim() == 2 # test#2: batched graph g1 = graph_with_nodes(5) g2 = graph_with_nodes(10) bg = dgl.batch([g, g1, g2]) h0 = F.randn((bg.num_nodes(), 50)) h1 = st_enc_0(bg, h0) assert h1.shape == h0.shape h1 = st_enc_1(bg, h0) assert h1.shape == h0.shape h2 = st_dec(bg, h1) assert h2.shape[0] == 3 and h2.shape[1] == 200 and h2.dim() == 2 @parametrize_idtype @pytest.mark.parametrize("O", [1, 8, 32]) def test_rgcn(idtype, O): ctx = F.ctx() etype = [] g = dgl.from_scipy(sp.sparse.random(100, 100, density=0.1)) g = g.astype(idtype).to(F.ctx()) # 5 etypes R = 5 for i in range(g.num_edges()): etype.append(i % 5) B = 2 I = 10 h = th.randn((100, I)).to(ctx) r = th.tensor(etype).to(ctx) norm = th.rand((g.num_edges(), 1)).to(ctx) sorted_r, idx = th.sort(r) sorted_g = dgl.reorder_graph( g, edge_permute_algo="custom", permute_config={"edges_perm": idx.to(idtype)}, ) sorted_norm = norm[idx] rgc = nn.RelGraphConv(I, O, R).to(ctx) th.save(rgc, tmp_buffer) # test pickle rgc_basis = nn.RelGraphConv(I, O, R, "basis", B).to(ctx) th.save(rgc_basis, tmp_buffer) # test pickle if O % B == 0: rgc_bdd = nn.RelGraphConv(I, O, R, "bdd", B).to(ctx) th.save(rgc_bdd, tmp_buffer) # test pickle # basic usage h_new = rgc(g, h, r) assert h_new.shape == (100, O) h_new_basis = rgc_basis(g, h, r) assert h_new_basis.shape == (100, O) if O % B == 0: h_new_bdd = rgc_bdd(g, h, r) assert h_new_bdd.shape == (100, O) # sorted input h_new_sorted = rgc(sorted_g, h, sorted_r, presorted=True) assert th.allclose(h_new, h_new_sorted, atol=1e-4, rtol=1e-4) h_new_basis_sorted = rgc_basis(sorted_g, h, sorted_r, presorted=True) assert th.allclose(h_new_basis, h_new_basis_sorted, atol=1e-4, rtol=1e-4) if O % B == 0: h_new_bdd_sorted = rgc_bdd(sorted_g, h, sorted_r, presorted=True) assert th.allclose(h_new_bdd, h_new_bdd_sorted, atol=1e-4, rtol=1e-4) # norm input h_new = rgc(g, h, r, norm) assert h_new.shape == (100, O) h_new = rgc_basis(g, h, r, norm) assert h_new.shape == (100, O) if O % B == 0: h_new = rgc_bdd(g, h, r, norm) assert h_new.shape == (100, O) @parametrize_idtype @pytest.mark.parametrize("O", [1, 10, 40]) def test_rgcn_default_nbasis(idtype, O): ctx = F.ctx() etype = [] g = dgl.from_scipy(sp.sparse.random(100, 100, density=0.1)) g = g.astype(idtype).to(F.ctx()) # 5 etypes R = 5 for i in range(g.num_edges()): etype.append(i % 5) I = 10 h = th.randn((100, I)).to(ctx) r = th.tensor(etype).to(ctx) norm = th.rand((g.num_edges(), 1)).to(ctx) sorted_r, idx = th.sort(r) sorted_g = dgl.reorder_graph( g, edge_permute_algo="custom", permute_config={"edges_perm": idx.to(idtype)}, ) sorted_norm = norm[idx] rgc = nn.RelGraphConv(I, O, R).to(ctx) th.save(rgc, tmp_buffer) # test pickle rgc_basis = nn.RelGraphConv(I, O, R, "basis").to(ctx) th.save(rgc_basis, tmp_buffer) # test pickle if O % R == 0: rgc_bdd = nn.RelGraphConv(I, O, R, "bdd").to(ctx) th.save(rgc_bdd, tmp_buffer) # test pickle # basic usage h_new = rgc(g, h, r) assert h_new.shape == (100, O) h_new_basis = rgc_basis(g, h, r) assert h_new_basis.shape == (100, O) if O % R == 0: h_new_bdd = rgc_bdd(g, h, r) assert h_new_bdd.shape == (100, O) # sorted input h_new_sorted = rgc(sorted_g, h, sorted_r, presorted=True) assert th.allclose(h_new, h_new_sorted, atol=1e-4, rtol=1e-4) h_new_basis_sorted = rgc_basis(sorted_g, h, sorted_r, presorted=True) assert th.allclose(h_new_basis, h_new_basis_sorted, atol=1e-4, rtol=1e-4) if O % R == 0: h_new_bdd_sorted = rgc_bdd(sorted_g, h, sorted_r, presorted=True) assert th.allclose(h_new_bdd, h_new_bdd_sorted, atol=1e-4, rtol=1e-4) # norm input h_new = rgc(g, h, r, norm) assert h_new.shape == (100, O) h_new = rgc_basis(g, h, r, norm) assert h_new.shape == (100, O) if O % R == 0: h_new = rgc_bdd(g, h, r, norm) assert h_new.shape == (100, O) @parametrize_idtype @pytest.mark.parametrize( "g", get_cases(["homo", "block-bipartite"], exclude=["zero-degree"]) ) @pytest.mark.parametrize("out_dim", [1, 5]) @pytest.mark.parametrize("num_heads", [1, 4]) def test_gat_conv(g, idtype, out_dim, num_heads): ctx = F.ctx() g = g.astype(idtype).to(ctx) gat = nn.GATConv(5, out_dim, num_heads) feat = F.randn((g.number_of_src_nodes(), 5)) gat = gat.to(ctx) h = gat(g, feat) # test pickle th.save(gat, tmp_buffer) assert h.shape == (g.number_of_dst_nodes(), num_heads, out_dim) _, a = gat(g, feat, get_attention=True) assert a.shape == (g.num_edges(), num_heads, 1) # test residual connection gat = nn.GATConv(5, out_dim, num_heads, residual=True) gat = gat.to(ctx) h = gat(g, feat) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["bipartite"], exclude=["zero-degree"])) @pytest.mark.parametrize("out_dim", [1, 2]) @pytest.mark.parametrize("num_heads", [1, 4]) def test_gat_conv_bi(g, idtype, out_dim, num_heads): ctx = F.ctx() g = g.astype(idtype).to(ctx) gat = nn.GATConv(5, out_dim, num_heads) feat = ( F.randn((g.number_of_src_nodes(), 5)), F.randn((g.number_of_dst_nodes(), 5)), ) gat = gat.to(ctx) h = gat(g, feat) assert h.shape == (g.number_of_dst_nodes(), num_heads, out_dim) _, a = gat(g, feat, get_attention=True) assert a.shape == (g.num_edges(), num_heads, 1) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["bipartite"], exclude=["zero-degree"])) @pytest.mark.parametrize("out_dim", [1, 2]) @pytest.mark.parametrize("num_heads", [1, 4]) def test_gat_conv_edge_weight(g, idtype, out_dim, num_heads): ctx = F.ctx() g = g.astype(idtype).to(ctx) gat = nn.GATConv(5, out_dim, num_heads) feat = ( F.randn((g.number_of_src_nodes(), 5)), F.randn((g.number_of_dst_nodes(), 5)), ) gat = gat.to(ctx) ew = F.randn((g.num_edges(),)) h = gat(g, feat, edge_weight=ew) assert h.shape == (g.number_of_dst_nodes(), num_heads, out_dim) _, a = gat(g, feat, get_attention=True) assert a.shape[0] == ew.shape[0] assert a.shape == (g.num_edges(), num_heads, 1) @parametrize_idtype @pytest.mark.parametrize( "g", get_cases(["homo", "block-bipartite"], exclude=["zero-degree"]) ) @pytest.mark.parametrize("out_dim", [1, 5]) @pytest.mark.parametrize("num_heads", [1, 4]) def test_gatv2_conv(g, idtype, out_dim, num_heads): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() gat = nn.GATv2Conv(5, out_dim, num_heads) feat = F.randn((g.number_of_src_nodes(), 5)) gat = gat.to(ctx) h = gat(g, feat) # test pickle th.save(gat, tmp_buffer) assert h.shape == (g.number_of_dst_nodes(), num_heads, out_dim) _, a = gat(g, feat, get_attention=True) assert a.shape == (g.num_edges(), num_heads, 1) # test residual connection gat = nn.GATConv(5, out_dim, num_heads, residual=True) gat = gat.to(ctx) h = gat(g, feat) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["bipartite"], exclude=["zero-degree"])) @pytest.mark.parametrize("out_dim", [1, 2]) @pytest.mark.parametrize("num_heads", [1, 4]) def test_gatv2_conv_bi(g, idtype, out_dim, num_heads): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() gat = nn.GATv2Conv(5, out_dim, num_heads) feat = ( F.randn((g.number_of_src_nodes(), 5)), F.randn((g.number_of_dst_nodes(), 5)), ) gat = gat.to(ctx) h = gat(g, feat) assert h.shape == (g.number_of_dst_nodes(), num_heads, out_dim) _, a = gat(g, feat, get_attention=True) assert a.shape == (g.num_edges(), num_heads, 1) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo"], exclude=["zero-degree"])) @pytest.mark.parametrize("out_node_feats", [1, 5]) @pytest.mark.parametrize("out_edge_feats", [1, 5]) @pytest.mark.parametrize("num_heads", [1, 4]) def test_egat_conv(g, idtype, out_node_feats, out_edge_feats, num_heads): ctx = F.ctx() g = g.astype(idtype).to(ctx) egat = nn.EGATConv( in_node_feats=10, in_edge_feats=5, out_node_feats=out_node_feats, out_edge_feats=out_edge_feats, num_heads=num_heads, ) nfeat = F.randn((g.num_nodes(), 10)) efeat = F.randn((g.num_edges(), 5)) egat = egat.to(ctx) h, f = egat(g, nfeat, efeat) th.save(egat, tmp_buffer) assert h.shape == (g.num_nodes(), num_heads, out_node_feats) assert f.shape == (g.num_edges(), num_heads, out_edge_feats) _, _, attn = egat(g, nfeat, efeat, get_attention=True) assert attn.shape == (g.num_edges(), num_heads, 1) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["bipartite"], exclude=["zero-degree"])) @pytest.mark.parametrize("out_node_feats", [1, 5]) @pytest.mark.parametrize("out_edge_feats", [1, 5]) @pytest.mark.parametrize("num_heads", [1, 4]) def test_egat_conv_bi(g, idtype, out_node_feats, out_edge_feats, num_heads): ctx = F.ctx() g = g.astype(idtype).to(ctx) egat = nn.EGATConv( in_node_feats=(10, 15), in_edge_feats=7, out_node_feats=out_node_feats, out_edge_feats=out_edge_feats, num_heads=num_heads, ) nfeat = ( F.randn((g.number_of_src_nodes(), 10)), F.randn((g.number_of_dst_nodes(), 15)), ) efeat = F.randn((g.num_edges(), 7)) egat = egat.to(ctx) h, f = egat(g, nfeat, efeat) th.save(egat, tmp_buffer) assert h.shape == (g.number_of_dst_nodes(), num_heads, out_node_feats) assert f.shape == (g.num_edges(), num_heads, out_edge_feats) _, _, attn = egat(g, nfeat, efeat, get_attention=True) assert attn.shape == (g.num_edges(), num_heads, 1) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo"], exclude=["zero-degree"])) @pytest.mark.parametrize("out_node_feats", [1, 5]) @pytest.mark.parametrize("out_edge_feats", [1, 5]) @pytest.mark.parametrize("num_heads", [1, 4]) def test_egat_conv_edge_weight( g, idtype, out_node_feats, out_edge_feats, num_heads ): ctx = F.ctx() g = g.astype(idtype).to(ctx) egat = nn.EGATConv( in_node_feats=10, in_edge_feats=5, out_node_feats=out_node_feats, out_edge_feats=out_edge_feats, num_heads=num_heads, ) egat = egat.to(ctx) nfeat = F.randn((g.num_nodes(), 10)) efeat = F.randn((g.num_edges(), 5)) ew = F.randn((g.num_edges(),)) h, f, attn = egat(g, nfeat, efeat, edge_weight=ew, get_attention=True) assert h.shape == (g.num_nodes(), num_heads, out_node_feats) assert f.shape == (g.num_edges(), num_heads, out_edge_feats) assert attn.shape == (g.num_edges(), num_heads, 1) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo"], exclude=["zero-degree"])) @pytest.mark.parametrize("out_feats", [1, 5]) @pytest.mark.parametrize("num_heads", [1, 4]) def test_edgegat_conv(g, idtype, out_feats, num_heads): ctx = F.ctx() g = g.astype(idtype).to(ctx) edgegat = nn.EdgeGATConv( in_feats=10, edge_feats=5, out_feats=out_feats, num_heads=num_heads ) nfeat = F.randn((g.number_of_nodes(), 10)) efeat = F.randn((g.number_of_edges(), 5)) edgegat = edgegat.to(ctx) h = edgegat(g, nfeat, efeat) th.save(edgegat, tmp_buffer) assert h.shape == (g.number_of_nodes(), num_heads, out_feats) _, attn = edgegat(g, nfeat, efeat, True) assert attn.shape == (g.number_of_edges(), num_heads, 1) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["bipartite"], exclude=["zero-degree"])) @pytest.mark.parametrize("out_feats", [1, 5]) @pytest.mark.parametrize("num_heads", [1, 4]) def test_edgegat_conv_bi(g, idtype, out_feats, num_heads): ctx = F.ctx() g = g.astype(idtype).to(ctx) edgegat = nn.EdgeGATConv( in_feats=(10, 15), edge_feats=7, out_feats=out_feats, num_heads=num_heads, ) nfeat = ( F.randn((g.number_of_src_nodes(), 10)), F.randn((g.number_of_dst_nodes(), 15)), ) efeat = F.randn((g.number_of_edges(), 7)) edgegat = edgegat.to(ctx) h = edgegat(g, nfeat, efeat) th.save(edgegat, tmp_buffer) assert h.shape == (g.number_of_dst_nodes(), num_heads, out_feats) _, attn = edgegat(g, nfeat, efeat, True) assert attn.shape == (g.number_of_edges(), num_heads, 1) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo", "block-bipartite"])) @pytest.mark.parametrize("aggre_type", ["mean", "pool", "gcn", "lstm"]) def test_sage_conv(idtype, g, aggre_type): g = g.astype(idtype).to(F.ctx()) sage = nn.SAGEConv(5, 10, aggre_type) feat = F.randn((g.number_of_src_nodes(), 5)) sage = sage.to(F.ctx()) # test pickle th.save(sage, tmp_buffer) h = sage(g, feat) assert h.shape[-1] == 10 @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["bipartite"])) @pytest.mark.parametrize("aggre_type", ["mean", "pool", "gcn", "lstm"]) @pytest.mark.parametrize("out_dim", [1, 2]) def test_sage_conv_bi(idtype, g, aggre_type, out_dim): g = g.astype(idtype).to(F.ctx()) dst_dim = 5 if aggre_type != "gcn" else 10 sage = nn.SAGEConv((10, dst_dim), out_dim, aggre_type) feat = ( F.randn((g.number_of_src_nodes(), 10)), F.randn((g.number_of_dst_nodes(), dst_dim)), ) sage = sage.to(F.ctx()) h = sage(g, feat) assert h.shape[-1] == out_dim assert h.shape[0] == g.number_of_dst_nodes() @parametrize_idtype @pytest.mark.parametrize("out_dim", [1, 2]) def test_sage_conv2(idtype, out_dim): # TODO: add test for blocks # Test the case for graphs without edges g = dgl.heterograph({("_U", "_E", "_V"): ([], [])}, {"_U": 5, "_V": 3}) g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() sage = nn.SAGEConv((3, 3), out_dim, "gcn") feat = (F.randn((5, 3)), F.randn((3, 3))) sage = sage.to(ctx) h = sage(g, (F.copy_to(feat[0], F.ctx()), F.copy_to(feat[1], F.ctx()))) assert h.shape[-1] == out_dim assert h.shape[0] == 3 for aggre_type in ["mean", "pool", "lstm"]: sage = nn.SAGEConv((3, 1), out_dim, aggre_type) feat = (F.randn((5, 3)), F.randn((3, 1))) sage = sage.to(ctx) h = sage(g, feat) assert h.shape[-1] == out_dim assert h.shape[0] == 3 @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo"], exclude=["zero-degree"])) @pytest.mark.parametrize("out_dim", [1, 2]) def test_sgc_conv(g, idtype, out_dim): ctx = F.ctx() g = g.astype(idtype).to(ctx) # not cached sgc = nn.SGConv(5, out_dim, 3) # test pickle th.save(sgc, tmp_buffer) feat = F.randn((g.num_nodes(), 5)) sgc = sgc.to(ctx) h = sgc(g, feat) assert h.shape[-1] == out_dim # cached sgc = nn.SGConv(5, out_dim, 3, True) sgc = sgc.to(ctx) h_0 = sgc(g, feat) h_1 = sgc(g, feat + 1) assert F.allclose(h_0, h_1) assert h_0.shape[-1] == out_dim @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo"], exclude=["zero-degree"])) def test_appnp_conv(g, idtype): ctx = F.ctx() g = g.astype(idtype).to(ctx) appnp = nn.APPNPConv(10, 0.1) feat = F.randn((g.num_nodes(), 5)) appnp = appnp.to(ctx) # test pickle th.save(appnp, tmp_buffer) h = appnp(g, feat) assert h.shape[-1] == 5 @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo"], exclude=["zero-degree"])) def test_appnp_conv_e_weight(g, idtype): ctx = F.ctx() g = g.astype(idtype).to(ctx) appnp = nn.APPNPConv(10, 0.1) feat = F.randn((g.num_nodes(), 5)) eweight = F.ones((g.num_edges(),)) appnp = appnp.to(ctx) h = appnp(g, feat, edge_weight=eweight) assert h.shape[-1] == 5 @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo"], exclude=["zero-degree"])) @pytest.mark.parametrize("bias", [True, False]) def test_gcn2conv_e_weight(g, idtype, bias): ctx = F.ctx() g = g.astype(idtype).to(ctx) gcn2conv = nn.GCN2Conv( 5, layer=2, alpha=0.5, bias=bias, project_initial_features=True ) feat = F.randn((g.num_nodes(), 5)) eweight = F.ones((g.num_edges(),)) gcn2conv = gcn2conv.to(ctx) res = feat h = gcn2conv(g, res, feat, edge_weight=eweight) assert h.shape[-1] == 5 assert re.match( re.compile(".*GCN2Conv.*in=.*, alpha=.*, beta=.*"), str(gcn2conv) ) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo"], exclude=["zero-degree"])) def test_sgconv_e_weight(g, idtype): ctx = F.ctx() g = g.astype(idtype).to(ctx) sgconv = nn.SGConv(5, 5, 3) feat = F.randn((g.num_nodes(), 5)) eweight = F.ones((g.num_edges(),)) sgconv = sgconv.to(ctx) h = sgconv(g, feat, edge_weight=eweight) assert h.shape[-1] == 5 @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo"], exclude=["zero-degree"])) def test_tagconv_e_weight(g, idtype): ctx = F.ctx() g = g.astype(idtype).to(ctx) conv = nn.TAGConv(5, 5, bias=True) conv = conv.to(ctx) feat = F.randn((g.num_nodes(), 5)) eweight = F.ones((g.num_edges(),)) conv = conv.to(ctx) h = conv(g, feat, edge_weight=eweight) assert h.shape[-1] == 5 @parametrize_idtype @pytest.mark.parametrize( "g", get_cases(["homo", "block-bipartite"], exclude=["zero-degree"]) ) @pytest.mark.parametrize("aggregator_type", ["mean", "max", "sum"]) def test_gin_conv(g, idtype, aggregator_type): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() gin = nn.GINConv(th.nn.Linear(5, 12), aggregator_type) th.save(gin, tmp_buffer) feat = F.randn((g.number_of_src_nodes(), 5)) gin = gin.to(ctx) h = gin(g, feat) # test pickle th.save(gin, tmp_buffer) assert h.shape == (g.number_of_dst_nodes(), 12) gin = nn.GINConv(None, aggregator_type) th.save(gin, tmp_buffer) gin = gin.to(ctx) h = gin(g, feat) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo", "block-bipartite"])) def test_gine_conv(g, idtype): ctx = F.ctx() g = g.astype(idtype).to(ctx) gine = nn.GINEConv(th.nn.Linear(5, 12)) th.save(gine, tmp_buffer) nfeat = F.randn((g.number_of_src_nodes(), 5)) efeat = F.randn((g.num_edges(), 5)) gine = gine.to(ctx) h = gine(g, nfeat, efeat) # test pickle th.save(gine, tmp_buffer) assert h.shape == (g.number_of_dst_nodes(), 12) gine = nn.GINEConv(None) th.save(gine, tmp_buffer) gine = gine.to(ctx) h = gine(g, nfeat, efeat) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["bipartite"], exclude=["zero-degree"])) @pytest.mark.parametrize("aggregator_type", ["mean", "max", "sum"]) def test_gin_conv_bi(g, idtype, aggregator_type): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() gin = nn.GINConv(th.nn.Linear(5, 12), aggregator_type) feat = ( F.randn((g.number_of_src_nodes(), 5)), F.randn((g.number_of_dst_nodes(), 5)), ) gin = gin.to(ctx) h = gin(g, feat) assert h.shape == (g.number_of_dst_nodes(), 12) @parametrize_idtype @pytest.mark.parametrize( "g", get_cases(["homo", "block-bipartite"], exclude=["zero-degree"]) ) def test_agnn_conv(g, idtype): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() agnn = nn.AGNNConv(1) feat = F.randn((g.number_of_src_nodes(), 5)) agnn = agnn.to(ctx) h = agnn(g, feat) assert h.shape == (g.number_of_dst_nodes(), 5) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["bipartite"], exclude=["zero-degree"])) def test_agnn_conv_bi(g, idtype): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() agnn = nn.AGNNConv(1) feat = ( F.randn((g.number_of_src_nodes(), 5)), F.randn((g.number_of_dst_nodes(), 5)), ) agnn = agnn.to(ctx) h = agnn(g, feat) assert h.shape == (g.number_of_dst_nodes(), 5) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo"], exclude=["zero-degree"])) def test_gated_graph_conv(g, idtype): ctx = F.ctx() g = g.astype(idtype).to(ctx) ggconv = nn.GatedGraphConv(5, 10, 5, 3) etypes = th.arange(g.num_edges()) % 3 feat = F.randn((g.num_nodes(), 5)) ggconv = ggconv.to(ctx) etypes = etypes.to(ctx) h = ggconv(g, feat, etypes) # current we only do shape check assert h.shape[-1] == 10 @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo"], exclude=["zero-degree"])) def test_gated_graph_conv_one_etype(g, idtype): ctx = F.ctx() g = g.astype(idtype).to(ctx) ggconv = nn.GatedGraphConv(5, 10, 5, 1) etypes = th.zeros(g.num_edges()) feat = F.randn((g.num_nodes(), 5)) ggconv = ggconv.to(ctx) etypes = etypes.to(ctx) h = ggconv(g, feat, etypes) h2 = ggconv(g, feat) # current we only do shape check assert F.allclose(h, h2) assert h.shape[-1] == 10 @parametrize_idtype @pytest.mark.parametrize( "g", get_cases(["homo", "block-bipartite"], exclude=["zero-degree"]) ) def test_nn_conv(g, idtype): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() edge_func = th.nn.Linear(4, 5 * 10) nnconv = nn.NNConv(5, 10, edge_func, "mean") feat = F.randn((g.number_of_src_nodes(), 5)) efeat = F.randn((g.num_edges(), 4)) nnconv = nnconv.to(ctx) h = nnconv(g, feat, efeat) # currently we only do shape check assert h.shape[-1] == 10 @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["bipartite"], exclude=["zero-degree"])) def test_nn_conv_bi(g, idtype): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() edge_func = th.nn.Linear(4, 5 * 10) nnconv = nn.NNConv((5, 2), 10, edge_func, "mean") feat = F.randn((g.number_of_src_nodes(), 5)) feat_dst = F.randn((g.number_of_dst_nodes(), 2)) efeat = F.randn((g.num_edges(), 4)) nnconv = nnconv.to(ctx) h = nnconv(g, (feat, feat_dst), efeat) # currently we only do shape check assert h.shape[-1] == 10 @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo"], exclude=["zero-degree"])) def test_gmm_conv(g, idtype): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() gmmconv = nn.GMMConv(5, 10, 3, 4, "mean") feat = F.randn((g.num_nodes(), 5)) pseudo = F.randn((g.num_edges(), 3)) gmmconv = gmmconv.to(ctx) h = gmmconv(g, feat, pseudo) # currently we only do shape check assert h.shape[-1] == 10 @parametrize_idtype @pytest.mark.parametrize( "g", get_cases(["bipartite", "block-bipartite"], exclude=["zero-degree"]) ) def test_gmm_conv_bi(g, idtype): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() gmmconv = nn.GMMConv((5, 2), 10, 3, 4, "mean") feat = F.randn((g.number_of_src_nodes(), 5)) feat_dst = F.randn((g.number_of_dst_nodes(), 2)) pseudo = F.randn((g.num_edges(), 3)) gmmconv = gmmconv.to(ctx) h = gmmconv(g, (feat, feat_dst), pseudo) # currently we only do shape check assert h.shape[-1] == 10 @parametrize_idtype @pytest.mark.parametrize("norm_type", ["both", "right", "none"]) @pytest.mark.parametrize( "g", get_cases(["homo", "bipartite"], exclude=["zero-degree"]) ) @pytest.mark.parametrize("out_dim", [1, 2]) def test_dense_graph_conv(norm_type, g, idtype, out_dim): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() # TODO(minjie): enable the following option after #1385 adj = g.adj_external(transpose=True, ctx=ctx).to_dense() conv = nn.GraphConv(5, out_dim, norm=norm_type, bias=True) dense_conv = nn.DenseGraphConv(5, out_dim, norm=norm_type, bias=True) dense_conv.weight.data = conv.weight.data dense_conv.bias.data = conv.bias.data feat = F.randn((g.number_of_src_nodes(), 5)) conv = conv.to(ctx) dense_conv = dense_conv.to(ctx) out_conv = conv(g, feat) out_dense_conv = dense_conv(adj, feat) assert F.allclose(out_conv, out_dense_conv) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo", "bipartite"])) @pytest.mark.parametrize("out_dim", [1, 2]) def test_dense_sage_conv(g, idtype, out_dim): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() adj = g.adj_external(transpose=True, ctx=ctx).to_dense() sage = nn.SAGEConv(5, out_dim, "gcn") dense_sage = nn.DenseSAGEConv(5, out_dim) dense_sage.fc.weight.data = sage.fc_neigh.weight.data dense_sage.fc.bias.data = sage.bias.data if len(g.ntypes) == 2: feat = ( F.randn((g.number_of_src_nodes(), 5)), F.randn((g.number_of_dst_nodes(), 5)), ) else: feat = F.randn((g.num_nodes(), 5)) sage = sage.to(ctx) dense_sage = dense_sage.to(ctx) out_sage = sage(g, feat) out_dense_sage = dense_sage(adj, feat) assert F.allclose(out_sage, out_dense_sage), g @parametrize_idtype @pytest.mark.parametrize( "g", get_cases(["homo", "block-bipartite"], exclude=["zero-degree"]) ) @pytest.mark.parametrize("out_dim", [1, 2]) def test_edge_conv(g, idtype, out_dim): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() edge_conv = nn.EdgeConv(5, out_dim).to(ctx) print(edge_conv) # test pickle th.save(edge_conv, tmp_buffer) h0 = F.randn((g.number_of_src_nodes(), 5)) h1 = edge_conv(g, h0) assert h1.shape == (g.number_of_dst_nodes(), out_dim) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["bipartite"], exclude=["zero-degree"])) @pytest.mark.parametrize("out_dim", [1, 2]) def test_edge_conv_bi(g, idtype, out_dim): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() edge_conv = nn.EdgeConv(5, out_dim).to(ctx) print(edge_conv) h0 = F.randn((g.number_of_src_nodes(), 5)) x0 = F.randn((g.number_of_dst_nodes(), 5)) h1 = edge_conv(g, (h0, x0)) assert h1.shape == (g.number_of_dst_nodes(), out_dim) @parametrize_idtype @pytest.mark.parametrize( "g", get_cases(["homo", "block-bipartite"], exclude=["zero-degree"]) ) @pytest.mark.parametrize("out_dim", [1, 2]) @pytest.mark.parametrize("num_heads", [1, 4]) def test_dotgat_conv(g, idtype, out_dim, num_heads): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() dotgat = nn.DotGatConv(5, out_dim, num_heads) feat = F.randn((g.number_of_src_nodes(), 5)) dotgat = dotgat.to(ctx) # test pickle th.save(dotgat, tmp_buffer) h = dotgat(g, feat) assert h.shape == (g.number_of_dst_nodes(), num_heads, out_dim) _, a = dotgat(g, feat, get_attention=True) assert a.shape == (g.num_edges(), num_heads, 1) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["bipartite"], exclude=["zero-degree"])) @pytest.mark.parametrize("out_dim", [1, 2]) @pytest.mark.parametrize("num_heads", [1, 4]) def test_dotgat_conv_bi(g, idtype, out_dim, num_heads): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() dotgat = nn.DotGatConv((5, 5), out_dim, num_heads) feat = ( F.randn((g.number_of_src_nodes(), 5)), F.randn((g.number_of_dst_nodes(), 5)), ) dotgat = dotgat.to(ctx) h = dotgat(g, feat) assert h.shape == (g.number_of_dst_nodes(), num_heads, out_dim) _, a = dotgat(g, feat, get_attention=True) assert a.shape == (g.num_edges(), num_heads, 1) @pytest.mark.parametrize("out_dim", [1, 2]) def test_dense_cheb_conv(out_dim): for k in range(1, 4): ctx = F.ctx() g = dgl.from_scipy(sp.sparse.random(100, 100, density=0.1)) g = g.to(F.ctx()) adj = g.adj_external(transpose=True, ctx=ctx).to_dense() cheb = nn.ChebConv(5, out_dim, k, None) dense_cheb = nn.DenseChebConv(5, out_dim, k) # for i in range(len(cheb.fc)): # dense_cheb.W.data[i] = cheb.fc[i].weight.data.t() dense_cheb.W.data = cheb.linear.weight.data.transpose(-1, -2).view( k, 5, out_dim ) if cheb.linear.bias is not None: dense_cheb.bias.data = cheb.linear.bias.data feat = F.randn((100, 5)) cheb = cheb.to(ctx) dense_cheb = dense_cheb.to(ctx) out_cheb = cheb(g, feat, [2.0]) out_dense_cheb = dense_cheb(adj, feat, 2.0) print(k, out_cheb, out_dense_cheb) assert F.allclose(out_cheb, out_dense_cheb) def test_sequential(): ctx = F.ctx() # Test single graph class ExampleLayer(th.nn.Module): def __init__(self): super().__init__() def forward(self, graph, n_feat, e_feat): graph = graph.local_var() graph.ndata["h"] = n_feat graph.update_all(fn.copy_u("h", "m"), fn.sum("m", "h")) n_feat += graph.ndata["h"] graph.apply_edges(fn.u_add_v("h", "h", "e")) e_feat += graph.edata["e"] return n_feat, e_feat g = dgl.graph([]) g.add_nodes(3) g.add_edges([0, 1, 2, 0, 1, 2, 0, 1, 2], [0, 0, 0, 1, 1, 1, 2, 2, 2]) g = g.to(F.ctx()) net = nn.Sequential(ExampleLayer(), ExampleLayer(), ExampleLayer()) n_feat = F.randn((3, 4)) e_feat = F.randn((9, 4)) net = net.to(ctx) n_feat, e_feat = net(g, n_feat, e_feat) assert n_feat.shape == (3, 4) assert e_feat.shape == (9, 4) # Test multiple graph class ExampleLayer(th.nn.Module): def __init__(self): super().__init__() def forward(self, graph, n_feat): graph = graph.local_var() graph.ndata["h"] = n_feat graph.update_all(fn.copy_u("h", "m"), fn.sum("m", "h")) n_feat += graph.ndata["h"] return n_feat.view(graph.num_nodes() // 2, 2, -1).sum(1) g1 = dgl.from_networkx(nx.erdos_renyi_graph(32, 0.05)).to(ctx) g2 = dgl.from_networkx(nx.erdos_renyi_graph(16, 0.2)).to(ctx) g3 = dgl.from_networkx(nx.erdos_renyi_graph(8, 0.8)).to(ctx) net = nn.Sequential(ExampleLayer(), ExampleLayer(), ExampleLayer()) net = net.to(ctx) n_feat = F.randn((32, 4)) n_feat = net([g1, g2, g3], n_feat) assert n_feat.shape == (4, 4) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo"], exclude=["zero-degree"])) def test_atomic_conv(g, idtype): g = g.astype(idtype).to(F.ctx()) aconv = nn.AtomicConv( interaction_cutoffs=F.tensor([12.0, 12.0]), rbf_kernel_means=F.tensor([0.0, 2.0]), rbf_kernel_scaling=F.tensor([4.0, 4.0]), features_to_use=F.tensor([6.0, 8.0]), ) ctx = F.ctx() if F.gpu_ctx(): aconv = aconv.to(ctx) feat = F.randn((g.num_nodes(), 1)) dist = F.randn((g.num_edges(), 1)) h = aconv(g, feat, dist) # current we only do shape check assert h.shape[-1] == 4 @parametrize_idtype @pytest.mark.parametrize( "g", get_cases(["homo", "bipartite"], exclude=["zero-degree"]) ) @pytest.mark.parametrize("out_dim", [1, 3]) def test_cf_conv(g, idtype, out_dim): g = g.astype(idtype).to(F.ctx()) cfconv = nn.CFConv( node_in_feats=2, edge_in_feats=3, hidden_feats=2, out_feats=out_dim ) ctx = F.ctx() if F.gpu_ctx(): cfconv = cfconv.to(ctx) src_feats = F.randn((g.number_of_src_nodes(), 2)) edge_feats = F.randn((g.num_edges(), 3)) h = cfconv(g, src_feats, edge_feats) # current we only do shape check assert h.shape[-1] == out_dim # case for bipartite graphs dst_feats = F.randn((g.number_of_dst_nodes(), 3)) h = cfconv(g, (src_feats, dst_feats), edge_feats) # current we only do shape check assert h.shape[-1] == out_dim def myagg(alist, dsttype): rst = alist[0] for i in range(1, len(alist)): rst = rst + (i + 1) * alist[i] return rst @parametrize_idtype @pytest.mark.parametrize("agg", ["sum", "max", "min", "mean", "stack", myagg]) @pytest.mark.parametrize("canonical_keys", [False, True]) def test_hetero_conv(agg, idtype, canonical_keys): g = dgl.heterograph( { ("user", "follows", "user"): ([0, 0, 2, 1], [1, 2, 1, 3]), ("user", "plays", "game"): ([0, 0, 0, 1, 2], [0, 2, 3, 0, 2]), ("store", "sells", "game"): ([0, 0, 1, 1], [0, 3, 1, 2]), }, idtype=idtype, device=F.ctx(), ) if not canonical_keys: conv = nn.HeteroGraphConv( { "follows": nn.GraphConv(2, 3, allow_zero_in_degree=True), "plays": nn.GraphConv(2, 4, allow_zero_in_degree=True), "sells": nn.GraphConv(3, 4, allow_zero_in_degree=True), }, agg, ) else: conv = nn.HeteroGraphConv( { ("user", "follows", "user"): nn.GraphConv( 2, 3, allow_zero_in_degree=True ), ("user", "plays", "game"): nn.GraphConv( 2, 4, allow_zero_in_degree=True ), ("store", "sells", "game"): nn.GraphConv( 3, 4, allow_zero_in_degree=True ), }, agg, ) conv = conv.to(F.ctx()) # test pickle th.save(conv, tmp_buffer) uf = F.randn((4, 2)) gf = F.randn((4, 4)) sf = F.randn((2, 3)) h = conv(g, {"user": uf, "game": gf, "store": sf}) assert set(h.keys()) == {"user", "game"} if agg != "stack": assert h["user"].shape == (4, 3) assert h["game"].shape == (4, 4) else: assert h["user"].shape == (4, 1, 3) assert h["game"].shape == (4, 2, 4) block = dgl.to_block( g.to(F.cpu()), {"user": [0, 1, 2, 3], "game": [0, 1, 2, 3], "store": []} ).to(F.ctx()) h = conv( block, ( {"user": uf, "game": gf, "store": sf}, {"user": uf, "game": gf, "store": sf[0:0]}, ), ) assert set(h.keys()) == {"user", "game"} if agg != "stack": assert h["user"].shape == (4, 3) assert h["game"].shape == (4, 4) else: assert h["user"].shape == (4, 1, 3) assert h["game"].shape == (4, 2, 4) h = conv(block, {"user": uf, "game": gf, "store": sf}) assert set(h.keys()) == {"user", "game"} if agg != "stack": assert h["user"].shape == (4, 3) assert h["game"].shape == (4, 4) else: assert h["user"].shape == (4, 1, 3) assert h["game"].shape == (4, 2, 4) # test with mod args class MyMod(th.nn.Module): def __init__(self, s1, s2): super(MyMod, self).__init__() self.carg1 = 0 self.carg2 = 0 self.s1 = s1 self.s2 = s2 def forward(self, g, h, arg1=None, *, arg2=None): if arg1 is not None: self.carg1 += 1 if arg2 is not None: self.carg2 += 1 return th.zeros((g.number_of_dst_nodes(), self.s2)) mod1 = MyMod(2, 3) mod2 = MyMod(2, 4) mod3 = MyMod(3, 4) conv = nn.HeteroGraphConv( {"follows": mod1, "plays": mod2, "sells": mod3}, agg ) conv = conv.to(F.ctx()) mod_args = {"follows": (1,), "plays": (1,)} mod_kwargs = {"sells": {"arg2": "abc"}} h = conv( g, {"user": uf, "game": gf, "store": sf}, mod_args=mod_args, mod_kwargs=mod_kwargs, ) assert mod1.carg1 == 1 assert mod1.carg2 == 0 assert mod2.carg1 == 1 assert mod2.carg2 == 0 assert mod3.carg1 == 0 assert mod3.carg2 == 1 # conv on graph without any edges for etype in g.etypes: g = dgl.remove_edges(g, g.edges(form="eid", etype=etype), etype=etype) assert g.num_edges() == 0 h = conv(g, {"user": uf, "game": gf, "store": sf}) assert set(h.keys()) == {"user", "game"} block = dgl.to_block( g.to(F.cpu()), {"user": [0, 1, 2, 3], "game": [0, 1, 2, 3], "store": []} ).to(F.ctx()) h = conv( block, ( {"user": uf, "game": gf, "store": sf}, {"user": uf, "game": gf, "store": sf[0:0]}, ), ) assert set(h.keys()) == {"user", "game"} @pytest.mark.parametrize("out_dim", [1, 2, 100]) def test_hetero_linear(out_dim): in_feats = { "user": F.randn((2, 1)), ("user", "follows", "user"): F.randn((3, 2)), } layer = nn.HeteroLinear( {"user": 1, ("user", "follows", "user"): 2}, out_dim ) layer = layer.to(F.ctx()) out_feats = layer(in_feats) assert out_feats["user"].shape == (2, out_dim) assert out_feats[("user", "follows", "user")].shape == (3, out_dim) @pytest.mark.parametrize("out_dim", [1, 2, 100]) def test_hetero_embedding(out_dim): layer = nn.HeteroEmbedding( {"user": 2, ("user", "follows", "user"): 3}, out_dim ) layer = layer.to(F.ctx()) embeds = layer.weight assert embeds["user"].shape == (2, out_dim) assert embeds[("user", "follows", "user")].shape == (3, out_dim) layer.reset_parameters() embeds = layer.weight assert embeds["user"].shape == (2, out_dim) assert embeds[("user", "follows", "user")].shape == (3, out_dim) embeds = layer( { "user": F.tensor([0], dtype=F.int64), ("user", "follows", "user"): F.tensor([0, 2], dtype=F.int64), } ) assert embeds["user"].shape == (1, out_dim) assert embeds[("user", "follows", "user")].shape == (2, out_dim) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo"], exclude=["zero-degree"])) @pytest.mark.parametrize("out_dim", [1, 2]) def test_gnnexplainer(g, idtype, out_dim): g = g.astype(idtype).to(F.ctx()) feat = F.randn((g.num_nodes(), 5)) class Model(th.nn.Module): def __init__(self, in_feats, out_feats, graph=False): super(Model, self).__init__() self.linear = th.nn.Linear(in_feats, out_feats) if graph: self.pool = nn.AvgPooling() else: self.pool = None def forward(self, graph, feat, eweight=None): with graph.local_scope(): feat = self.linear(feat) graph.ndata["h"] = feat if eweight is None: graph.update_all(fn.copy_u("h", "m"), fn.sum("m", "h")) else: graph.edata["w"] = eweight graph.update_all( fn.u_mul_e("h", "w", "m"), fn.sum("m", "h") ) if self.pool: return self.pool(graph, graph.ndata["h"]) else: return graph.ndata["h"] # Explain node prediction model = Model(5, out_dim) model = model.to(F.ctx()) explainer = nn.GNNExplainer(model, num_hops=1) new_center, sg, feat_mask, edge_mask = explainer.explain_node(0, g, feat) # Explain graph prediction model = Model(5, out_dim, graph=True) model = model.to(F.ctx()) explainer = nn.GNNExplainer(model, num_hops=1) feat_mask, edge_mask = explainer.explain_graph(g, feat) @pytest.mark.parametrize("g", get_cases(["hetero"], exclude=["zero-degree"])) @pytest.mark.parametrize("idtype", [F.int64]) @pytest.mark.parametrize("input_dim", [5]) @pytest.mark.parametrize("output_dim", [1, 2]) def test_heterognnexplainer(g, idtype, input_dim, output_dim): g = g.astype(idtype).to(F.ctx()) device = g.device # add self-loop and reverse edges transform1 = dgl.transforms.AddSelfLoop(new_etypes=True) g = transform1(g) transform2 = dgl.transforms.AddReverse(copy_edata=True) g = transform2(g) feat = { ntype: th.zeros((g.num_nodes(ntype), input_dim), device=device) for ntype in g.ntypes } class Model(th.nn.Module): def __init__(self, in_dim, num_classes, canonical_etypes, graph=False): super(Model, self).__init__() self.graph = graph self.etype_weights = th.nn.ModuleDict( { "_".join(c_etype): th.nn.Linear(in_dim, num_classes) for c_etype in canonical_etypes } ) def forward(self, graph, feat, eweight=None): with graph.local_scope(): c_etype_func_dict = {} for c_etype in graph.canonical_etypes: src_type, etype, dst_type = c_etype wh = self.etype_weights["_".join(c_etype)](feat[src_type]) graph.nodes[src_type].data[f"h_{c_etype}"] = wh if eweight is None: c_etype_func_dict[c_etype] = ( fn.copy_u(f"h_{c_etype}", "m"), fn.mean("m", "h"), ) else: graph.edges[c_etype].data["w"] = eweight[c_etype] c_etype_func_dict[c_etype] = ( fn.u_mul_e(f"h_{c_etype}", "w", "m"), fn.mean("m", "h"), ) graph.multi_update_all(c_etype_func_dict, "sum") if self.graph: hg = 0 for ntype in graph.ntypes: if graph.num_nodes(ntype): hg = hg + dgl.mean_nodes(graph, "h", ntype=ntype) return hg else: return graph.ndata["h"] # Explain node prediction model = Model(input_dim, output_dim, g.canonical_etypes) model = model.to(F.ctx()) ntype = g.ntypes[0] explainer = nn.explain.HeteroGNNExplainer(model, num_hops=1) new_center, sg, feat_mask, edge_mask = explainer.explain_node( ntype, 0, g, feat ) # Explain graph prediction model = Model(input_dim, output_dim, g.canonical_etypes, graph=True) model = model.to(F.ctx()) explainer = nn.explain.HeteroGNNExplainer(model, num_hops=1) feat_mask, edge_mask = explainer.explain_graph(g, feat) @parametrize_idtype @pytest.mark.parametrize( "g", get_cases( ["homo"], exclude=[ "zero-degree", "homo-zero-degree", "has_feature", "has_scalar_e_feature", "row_sorted", "col_sorted", "batched", ], ), ) @pytest.mark.parametrize("n_classes", [2]) def test_subgraphx(g, idtype, n_classes): ctx = F.ctx() g = g.astype(idtype).to(ctx) feat = F.randn((g.num_nodes(), 5)) class Model(th.nn.Module): def __init__(self, in_dim, n_classes): super().__init__() self.conv = nn.GraphConv(in_dim, n_classes) self.pool = nn.AvgPooling() def forward(self, g, h): h = th.nn.functional.relu(self.conv(g, h)) return self.pool(g, h) model = Model(feat.shape[1], n_classes) model = model.to(ctx) explainer = nn.SubgraphX( model, num_hops=1, shapley_steps=20, num_rollouts=5, coef=2.0 ) explainer.explain_graph(g, feat, target_class=0) @pytest.mark.parametrize("g", get_cases(["hetero"], exclude=["zero-degree"])) @pytest.mark.parametrize("idtype", [F.int64]) @pytest.mark.parametrize("input_dim", [5]) @pytest.mark.parametrize("n_classes", [2]) def test_heterosubgraphx(g, idtype, input_dim, n_classes): ctx = F.ctx() g = g.astype(idtype).to(ctx) device = g.device # add self-loop and reverse edges transform1 = dgl.transforms.AddSelfLoop(new_etypes=True) g = transform1(g) transform2 = dgl.transforms.AddReverse(copy_edata=True) g = transform2(g) feat = { ntype: th.zeros((g.num_nodes(ntype), input_dim), device=device) for ntype in g.ntypes } class Model(th.nn.Module): def __init__(self, in_dim, n_classes, canonical_etypes): super(Model, self).__init__() self.etype_weights = th.nn.ModuleDict( { "_".join(c_etype): th.nn.Linear(in_dim, n_classes) for c_etype in canonical_etypes } ) def forward(self, graph, feat): with graph.local_scope(): c_etype_func_dict = {} for c_etype in graph.canonical_etypes: src_type, etype, dst_type = c_etype wh = self.etype_weights["_".join(c_etype)](feat[src_type]) graph.nodes[src_type].data[f"h_{c_etype}"] = wh c_etype_func_dict[c_etype] = ( fn.copy_u(f"h_{c_etype}", "m"), fn.mean("m", "h"), ) graph.multi_update_all(c_etype_func_dict, "sum") hg = 0 for ntype in graph.ntypes: if graph.num_nodes(ntype): hg = hg + dgl.mean_nodes(graph, "h", ntype=ntype) return hg model = Model(input_dim, n_classes, g.canonical_etypes) model = model.to(ctx) explainer = nn.HeteroSubgraphX( model, num_hops=1, shapley_steps=20, num_rollouts=5, coef=2.0 ) explainer.explain_graph(g, feat, target_class=0) @parametrize_idtype @pytest.mark.parametrize( "g", get_cases( ["homo"], exclude=[ "zero-degree", "homo-zero-degree", "has_feature", "has_scalar_e_feature", "row_sorted", "col_sorted", ], ), ) @pytest.mark.parametrize("n_classes", [2]) def test_pgexplainer(g, idtype, n_classes): ctx = F.ctx() g = g.astype(idtype).to(ctx) feat = F.randn((g.num_nodes(), 5)) g.ndata["attr"] = feat # add reverse edges transform = dgl.transforms.AddReverse(copy_edata=True) g = transform(g) class Model(th.nn.Module): def __init__(self, in_feats, out_feats, graph=False): super(Model, self).__init__() self.graph = graph self.conv = nn.GraphConv(in_feats, out_feats) self.fc = th.nn.Linear(out_feats, out_feats) th.nn.init.xavier_uniform_(self.fc.weight) def forward(self, g, h, embed=False, edge_weight=None): h = self.conv(g, h, edge_weight=edge_weight) if not self.graph or embed: return h with g.local_scope(): g.ndata["h"] = h hg = dgl.mean_nodes(g, "h") return self.fc(hg) # graph explainer model = Model(feat.shape[1], n_classes, graph=True) model = model.to(ctx) explainer = nn.PGExplainer(model, n_classes) explainer.train_step(g, g.ndata["attr"], 5.0) probs, edge_weight = explainer.explain_graph(g, feat) # node explainer model = Model(feat.shape[1], n_classes, graph=False) model = model.to(ctx) explainer = nn.PGExplainer( model, n_classes, num_hops=1, explain_graph=False ) explainer.train_step_node(0, g, g.ndata["attr"], 5.0) explainer.train_step_node([0, 1], g, g.ndata["attr"], 5.0) explainer.train_step_node(th.tensor(0), g, g.ndata["attr"], 5.0) explainer.train_step_node(th.tensor([0, 1]), g, g.ndata["attr"], 5.0) probs, edge_weight, bg, inverse_indices = explainer.explain_node(0, g, feat) probs, edge_weight, bg, inverse_indices = explainer.explain_node( [0, 1], g, feat ) probs, edge_weight, bg, inverse_indices = explainer.explain_node( th.tensor(0), g, feat ) probs, edge_weight, bg, inverse_indices = explainer.explain_node( th.tensor([0, 1]), g, feat ) @pytest.mark.parametrize("g", get_cases(["hetero"])) @pytest.mark.parametrize("idtype", [F.int64]) @pytest.mark.parametrize("input_dim", [5]) @pytest.mark.parametrize("n_classes", [2]) def test_heteropgexplainer(g, idtype, input_dim, n_classes): ctx = F.ctx() g = g.astype(idtype).to(ctx) feat = { ntype: F.randn((g.num_nodes(ntype), input_dim)) for ntype in g.ntypes } # add self-loop and reverse edges transform1 = dgl.transforms.AddSelfLoop(new_etypes=True) g = transform1(g) transform2 = dgl.transforms.AddReverse(copy_edata=True) g = transform2(g) class Model(th.nn.Module): def __init__( self, in_feats, embed_dim, out_feats, canonical_etypes, graph=True ): super(Model, self).__init__() self.graph = graph self.conv = nn.HeteroGraphConv( { c_etype: nn.GraphConv(in_feats, embed_dim) for c_etype in canonical_etypes } ) self.fc = th.nn.Linear(embed_dim, out_feats) def forward(self, g, h, embed=False, edge_weight=None): if edge_weight is not None: mod_kwargs = { etype: {"edge_weight": mask} for etype, mask in edge_weight.items() } h = self.conv(g, h, mod_kwargs=mod_kwargs) else: h = self.conv(g, h) if not self.graph or embed: return h with g.local_scope(): g.ndata["h"] = h hg = 0 for ntype in g.ntypes: hg = hg + dgl.mean_nodes(g, "h", ntype=ntype) return self.fc(hg) embed_dim = input_dim # graph explainer model = Model( input_dim, embed_dim, n_classes, g.canonical_etypes, graph=True ) model = model.to(ctx) explainer = nn.HeteroPGExplainer(model, embed_dim) explainer.train_step(g, feat, 5.0) probs, edge_weight = explainer.explain_graph(g, feat) # node explainer model = Model( input_dim, embed_dim, n_classes, g.canonical_etypes, graph=False ) model = model.to(ctx) explainer = nn.HeteroPGExplainer( model, embed_dim, num_hops=1, explain_graph=False ) explainer.train_step_node({g.ntypes[0]: [0]}, g, feat, 5.0) explainer.train_step_node({g.ntypes[0]: th.tensor([0, 1])}, g, feat, 5.0) probs, edge_weight, bg, inverse_indices = explainer.explain_node( {g.ntypes[0]: [0]}, g, feat ) probs, edge_weight, bg, inverse_indices = explainer.explain_node( {g.ntypes[0]: th.tensor([0, 1])}, g, feat ) def test_jumping_knowledge(): ctx = F.ctx() num_layers = 2 num_nodes = 3 num_feats = 4 feat_list = [ th.randn((num_nodes, num_feats)).to(ctx) for _ in range(num_layers) ] model = nn.JumpingKnowledge("cat").to(ctx) model.reset_parameters() assert model(feat_list).shape == (num_nodes, num_layers * num_feats) model = nn.JumpingKnowledge("max").to(ctx) model.reset_parameters() assert model(feat_list).shape == (num_nodes, num_feats) model = nn.JumpingKnowledge("lstm", num_feats, num_layers).to(ctx) model.reset_parameters() assert model(feat_list).shape == (num_nodes, num_feats) @pytest.mark.parametrize("op", ["dot", "cos", "ele", "cat"]) def test_edge_predictor(op): ctx = F.ctx() num_pairs = 3 in_feats = 4 out_feats = 5 h_src = th.randn((num_pairs, in_feats)).to(ctx) h_dst = th.randn((num_pairs, in_feats)).to(ctx) pred = nn.EdgePredictor(op) if op in ["dot", "cos"]: assert pred(h_src, h_dst).shape == (num_pairs, 1) elif op == "ele": assert pred(h_src, h_dst).shape == (num_pairs, in_feats) else: assert pred(h_src, h_dst).shape == (num_pairs, 2 * in_feats) pred = nn.EdgePredictor(op, in_feats, out_feats, bias=True).to(ctx) assert pred(h_src, h_dst).shape == (num_pairs, out_feats) def test_ke_score_funcs(): ctx = F.ctx() num_edges = 30 num_rels = 3 nfeats = 4 h_src = th.randn((num_edges, nfeats)).to(ctx) h_dst = th.randn((num_edges, nfeats)).to(ctx) rels = th.randint(low=0, high=num_rels, size=(num_edges,)).to(ctx) score_func = nn.TransE(num_rels=num_rels, feats=nfeats).to(ctx) score_func.reset_parameters() score_func(h_src, h_dst, rels).shape == (num_edges) score_func = nn.TransR( num_rels=num_rels, rfeats=nfeats - 1, nfeats=nfeats ).to(ctx) score_func.reset_parameters() score_func(h_src, h_dst, rels).shape == (num_edges) def test_twirls(): g = dgl.graph(([0, 1, 2, 3, 2, 5], [1, 2, 3, 4, 0, 3])) feat = th.ones(6, 10) conv = nn.TWIRLSConv(10, 2, 128, prop_step=64) res = conv(g, feat) assert res.size() == (6, 2) @pytest.mark.parametrize("feat_size", [4, 32]) @pytest.mark.parametrize( "regularizer,num_bases", [(None, None), ("basis", 4), ("bdd", 4)] ) def test_typed_linear(feat_size, regularizer, num_bases): dev = F.ctx() num_types = 5 lin = nn.TypedLinear( feat_size, feat_size * 2, 5, regularizer=regularizer, num_bases=num_bases, ).to(dev) print(lin) x = th.randn(100, feat_size).to(dev) x_type = th.randint(0, 5, (100,)).to(dev) x_type_sorted, idx = th.sort(x_type) _, rev_idx = th.sort(idx) x_sorted = x[idx] # test unsorted y = lin(x, x_type) assert y.shape == (100, feat_size * 2) # test sorted y_sorted = lin(x_sorted, x_type_sorted, sorted_by_type=True) assert y_sorted.shape == (100, feat_size * 2) assert th.allclose(y, y_sorted[rev_idx], atol=1e-4, rtol=1e-4) @parametrize_idtype @pytest.mark.parametrize("in_size", [4]) @pytest.mark.parametrize("num_heads", [1]) def test_hgt(idtype, in_size, num_heads): dev = F.ctx() num_etypes = 5 num_ntypes = 2 head_size = in_size // num_heads g = dgl.from_scipy(sp.sparse.random(100, 100, density=0.01)) g = g.astype(idtype).to(dev) etype = th.tensor([i % num_etypes for i in range(g.num_edges())]).to(dev) ntype = th.tensor([i % num_ntypes for i in range(g.num_nodes())]).to(dev) x = th.randn(g.num_nodes(), in_size).to(dev) m = nn.HGTConv(in_size, head_size, num_heads, num_ntypes, num_etypes).to( dev ) y = m(g, x, ntype, etype) assert y.shape == (g.num_nodes(), head_size * num_heads) # presorted sorted_ntype, idx_nt = th.sort(ntype) sorted_etype, idx_et = th.sort(etype) _, rev_idx = th.sort(idx_nt) g.ndata["t"] = ntype g.ndata["x"] = x g.edata["t"] = etype sorted_g = dgl.reorder_graph( g, node_permute_algo="custom", edge_permute_algo="custom", permute_config={ "nodes_perm": idx_nt.to(idtype), "edges_perm": idx_et.to(idtype), }, ) print(sorted_g.ndata["t"]) print(sorted_g.edata["t"]) sorted_x = sorted_g.ndata["x"] sorted_y = m( sorted_g, sorted_x, sorted_ntype, sorted_etype, presorted=False ) assert sorted_y.shape == (g.num_nodes(), head_size * num_heads) # mini-batch train_idx = th.randperm(100, dtype=idtype)[:10] sampler = dgl.dataloading.NeighborSampler([-1]) train_loader = dgl.dataloading.DataLoader( g, train_idx.to(dev), sampler, batch_size=8, device=dev, shuffle=True ) (input_nodes, output_nodes, block) = next(iter(train_loader)) block = block[0] x = x[input_nodes.to(th.long)] ntype = ntype[input_nodes.to(th.long)] edge = block.edata[dgl.EID] etype = etype[edge.to(th.long)] y = m(block, x, ntype, etype) assert y.shape == (block.number_of_dst_nodes(), head_size * num_heads) # TODO(minjie): enable the following check # assert th.allclose(y, sorted_y[rev_idx], atol=1e-4, rtol=1e-4) @pytest.mark.parametrize("self_loop", [True, False]) @pytest.mark.parametrize("get_distances", [True, False]) def test_radius_graph(self_loop, get_distances): pos = th.tensor( [ [0.1, 0.3, 0.4], [0.5, 0.2, 0.1], [0.7, 0.9, 0.5], [0.3, 0.2, 0.5], [0.2, 0.8, 0.2], [0.9, 0.2, 0.1], [0.7, 0.4, 0.4], [0.2, 0.1, 0.6], [0.5, 0.3, 0.5], [0.4, 0.2, 0.6], ] ) rg = nn.RadiusGraph(0.3, self_loop=self_loop) if get_distances: g, dists = rg(pos, get_distances=get_distances) else: g = rg(pos) if self_loop: src_target = th.tensor( [ 0, 0, 1, 2, 3, 3, 3, 3, 3, 4, 5, 6, 6, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, ] ) dst_target = th.tensor( [ 0, 3, 1, 2, 0, 3, 7, 8, 9, 4, 5, 6, 8, 3, 7, 9, 3, 6, 8, 9, 3, 7, 8, 9, ] ) if get_distances: dists_target = th.tensor( [ [0.0000], [0.2449], [0.0000], [0.0000], [0.2449], [0.0000], [0.1732], [0.2236], [0.1414], [0.0000], [0.0000], [0.0000], [0.2449], [0.1732], [0.0000], [0.2236], [0.2236], [0.2449], [0.0000], [0.1732], [0.1414], [0.2236], [0.1732], [0.0000], ] ) else: src_target = th.tensor([0, 3, 3, 3, 3, 6, 7, 7, 8, 8, 8, 9, 9, 9]) dst_target = th.tensor([3, 0, 7, 8, 9, 8, 3, 9, 3, 6, 9, 3, 7, 8]) if get_distances: dists_target = th.tensor( [ [0.2449], [0.2449], [0.1732], [0.2236], [0.1414], [0.2449], [0.1732], [0.2236], [0.2236], [0.2449], [0.1732], [0.1414], [0.2236], [0.1732], ] ) src, dst = g.edges() assert th.equal(src, src_target) assert th.equal(dst, dst_target) if get_distances: assert th.allclose(dists, dists_target, rtol=1e-03) @parametrize_idtype def test_group_rev_res(idtype): dev = F.ctx() num_nodes = 5 num_edges = 20 feats = 32 groups = 2 g = dgl.rand_graph(num_nodes, num_edges).to(dev) h = th.randn(num_nodes, feats).to(dev) conv = nn.GraphConv(feats // groups, feats // groups) model = nn.GroupRevRes(conv, groups).to(dev) result = model(g, h) result.sum().backward() @pytest.mark.parametrize("in_size", [16, 32]) @pytest.mark.parametrize("hidden_size", [16, 32]) @pytest.mark.parametrize("out_size", [16, 32]) @pytest.mark.parametrize("edge_feat_size", [16, 10, 0]) def test_egnn_conv(in_size, hidden_size, out_size, edge_feat_size): dev = F.ctx() num_nodes = 5 num_edges = 20 g = dgl.rand_graph(num_nodes, num_edges).to(dev) h = th.randn(num_nodes, in_size).to(dev) x = th.randn(num_nodes, 3).to(dev) e = th.randn(num_edges, edge_feat_size).to(dev) model = nn.EGNNConv(in_size, hidden_size, out_size, edge_feat_size).to(dev) model(g, h, x, e) @pytest.mark.parametrize("in_size", [16, 32]) @pytest.mark.parametrize("out_size", [16, 32]) @pytest.mark.parametrize( "aggregators", [ ["mean", "max", "sum"], ["min", "std", "var"], ["moment3", "moment4", "moment5"], ], ) @pytest.mark.parametrize( "scalers", [["identity"], ["amplification", "attenuation"]] ) @pytest.mark.parametrize("delta", [2.5, 7.4]) @pytest.mark.parametrize("dropout", [0.0, 0.1]) @pytest.mark.parametrize("num_towers", [1, 4]) @pytest.mark.parametrize("edge_feat_size", [16, 0]) @pytest.mark.parametrize("residual", [True, False]) def test_pna_conv( in_size, out_size, aggregators, scalers, delta, dropout, num_towers, edge_feat_size, residual, ): dev = F.ctx() num_nodes = 5 num_edges = 20 g = dgl.rand_graph(num_nodes, num_edges).to(dev) h = th.randn(num_nodes, in_size).to(dev) e = th.randn(num_edges, edge_feat_size).to(dev) model = nn.PNAConv( in_size, out_size, aggregators, scalers, delta, dropout, num_towers, edge_feat_size, residual, ).to(dev) model(g, h, edge_feat=e) @pytest.mark.parametrize("k", [3, 5]) @pytest.mark.parametrize("alpha", [0.0, 0.5, 1.0]) @pytest.mark.parametrize("norm_type", ["sym", "row"]) @pytest.mark.parametrize("clamp", [True, False]) @pytest.mark.parametrize("normalize", [True, False]) @pytest.mark.parametrize("reset", [True, False]) def test_label_prop(k, alpha, norm_type, clamp, normalize, reset): dev = F.ctx() num_nodes = 5 num_edges = 20 num_classes = 4 g = dgl.rand_graph(num_nodes, num_edges).to(dev) labels = th.tensor([0, 2, 1, 3, 0]).long().to(dev) ml_labels = th.rand(num_nodes, num_classes).to(dev) > 0.7 mask = th.tensor([0, 1, 1, 1, 0]).bool().to(dev) model = nn.LabelPropagation(k, alpha, norm_type, clamp, normalize, reset) model(g, labels, mask) # multi-label case model(g, ml_labels, mask) @pytest.mark.parametrize("in_size", [16]) @pytest.mark.parametrize("out_size", [16, 32]) @pytest.mark.parametrize( "aggregators", [["mean", "max", "dir2-av"], ["min", "std", "dir1-dx"]] ) @pytest.mark.parametrize("scalers", [["amplification", "attenuation"]]) @pytest.mark.parametrize("delta", [2.5]) @pytest.mark.parametrize("edge_feat_size", [16, 0]) def test_dgn_conv( in_size, out_size, aggregators, scalers, delta, edge_feat_size ): dev = F.ctx() num_nodes = 5 num_edges = 20 g = dgl.rand_graph(num_nodes, num_edges).to(dev) h = th.randn(num_nodes, in_size).to(dev) e = th.randn(num_edges, edge_feat_size).to(dev) transform = dgl.LapPE(k=3, feat_name="eig") g = transform(g) eig = g.ndata["eig"] model = nn.DGNConv( in_size, out_size, aggregators, scalers, delta, edge_feat_size=edge_feat_size, ).to(dev) model(g, h, edge_feat=e, eig_vec=eig) aggregators_non_eig = [ aggr for aggr in aggregators if not aggr.startswith("dir") ] model = nn.DGNConv( in_size, out_size, aggregators_non_eig, scalers, delta, edge_feat_size=edge_feat_size, ).to(dev) model(g, h, edge_feat=e) def test_DeepWalk(): dev = F.ctx() g = dgl.graph(([0, 1, 2, 1, 2, 0], [1, 2, 0, 0, 1, 2])) model = nn.DeepWalk( g, emb_dim=8, walk_length=2, window_size=1, fast_neg=True, sparse=True ) model = model.to(dev) dataloader = DataLoader( torch.arange(g.num_nodes()), batch_size=16, collate_fn=model.sample ) optim = SparseAdam(model.parameters(), lr=0.01) walk = next(iter(dataloader)).to(dev) loss = model(walk) loss.backward() optim.step() model = nn.DeepWalk( g, emb_dim=8, walk_length=2, window_size=1, fast_neg=False, sparse=False ) model = model.to(dev) dataloader = DataLoader( torch.arange(g.num_nodes()), batch_size=16, collate_fn=model.sample ) optim = Adam(model.parameters(), lr=0.01) walk = next(iter(dataloader)).to(dev) loss = model(walk) loss.backward() optim.step() @pytest.mark.parametrize("max_degree", [2, 6]) @pytest.mark.parametrize("embedding_dim", [8, 16]) @pytest.mark.parametrize("direction", ["in", "out", "both"]) def test_degree_encoder(max_degree, embedding_dim, direction): g1 = dgl.graph( ( th.tensor([0, 0, 0, 1, 1, 2, 3, 3]), th.tensor([1, 2, 3, 0, 3, 0, 0, 1]), ) ) g2 = dgl.graph( ( th.tensor([0, 1]), th.tensor([1, 0]), ) ) in_degree = pad_sequence( [g1.in_degrees(), g2.in_degrees()], batch_first=True ) out_degree = pad_sequence( [g1.out_degrees(), g2.out_degrees()], batch_first=True ) model = nn.DegreeEncoder(max_degree, embedding_dim, direction=direction) if direction == "in": de_g = model(in_degree) elif direction == "out": de_g = model(out_degree) elif direction == "both": de_g = model(th.stack((in_degree, out_degree))) assert de_g.shape == (2, 4, embedding_dim) @parametrize_idtype def test_MetaPath2Vec(idtype): dev = F.ctx() g = dgl.heterograph( { ("user", "uc", "company"): ([0, 0, 2, 1, 3], [1, 2, 1, 3, 0]), ("company", "cp", "product"): ( [0, 0, 0, 1, 2, 3], [0, 2, 3, 0, 2, 1], ), ("company", "cu", "user"): ([1, 2, 1, 3, 0], [0, 0, 2, 1, 3]), ("product", "pc", "company"): ( [0, 2, 3, 0, 2, 1], [0, 0, 0, 1, 2, 3], ), }, idtype=idtype, device=dev, ) model = nn.MetaPath2Vec(g, ["uc", "cu"], window_size=1) model = model.to(dev) embeds = model.node_embed.weight assert embeds.shape[0] == g.num_nodes() @pytest.mark.parametrize("num_layer", [1, 4]) @pytest.mark.parametrize("k", [3, 5]) @pytest.mark.parametrize("lpe_dim", [4, 16]) @pytest.mark.parametrize("n_head", [2, 4]) @pytest.mark.parametrize("batch_norm", [True, False]) @pytest.mark.parametrize("num_post_layer", [0, 1, 2]) def test_LapPosEncoder( num_layer, k, lpe_dim, n_head, batch_norm, num_post_layer ): ctx = F.ctx() num_nodes = 4 EigVals = th.randn((num_nodes, k)).to(ctx) EigVecs = th.randn((num_nodes, k)).to(ctx) model = nn.LapPosEncoder( "Transformer", num_layer, k, lpe_dim, n_head, batch_norm, num_post_layer ).to(ctx) assert model(EigVals, EigVecs).shape == (num_nodes, lpe_dim) model = nn.LapPosEncoder( "DeepSet", num_layer, k, lpe_dim, batch_norm=batch_norm, num_post_layer=num_post_layer, ).to(ctx) assert model(EigVals, EigVecs).shape == (num_nodes, lpe_dim) @pytest.mark.parametrize("feat_size", [128, 512]) @pytest.mark.parametrize("num_heads", [8, 16]) @pytest.mark.parametrize("bias", [True, False]) @pytest.mark.parametrize("attn_bias_type", ["add", "mul"]) @pytest.mark.parametrize("attn_drop", [0.1, 0.5]) def test_BiasedMHA(feat_size, num_heads, bias, attn_bias_type, attn_drop): ndata = th.rand(16, 100, feat_size) attn_bias = th.rand(16, 100, 100, num_heads) attn_mask = th.rand(16, 100, 100) < 0.5 net = nn.BiasedMHA(feat_size, num_heads, bias, attn_bias_type, attn_drop) out = net(ndata, attn_bias, attn_mask) assert out.shape == (16, 100, feat_size) @pytest.mark.parametrize("edge_update", [True, False]) def test_EGTLayer(edge_update): batch_size = 16 num_nodes = 100 feat_size, edge_feat_size = 128, 32 nfeat = th.rand(batch_size, num_nodes, feat_size) efeat = th.rand(batch_size, num_nodes, num_nodes, edge_feat_size) mask = (th.rand(batch_size, num_nodes, num_nodes) < 0.5) * -1e9 net = nn.EGTLayer( feat_size=feat_size, edge_feat_size=edge_feat_size, num_heads=8, num_virtual_nodes=4, edge_update=edge_update, ) if edge_update: out_nfeat, out_efeat = net(nfeat, efeat, mask) assert out_nfeat.shape == (batch_size, num_nodes, feat_size) assert out_efeat.shape == ( batch_size, num_nodes, num_nodes, edge_feat_size, ) else: out_nfeat = net(nfeat, efeat, mask) assert out_nfeat.shape == (batch_size, num_nodes, feat_size) @pytest.mark.parametrize("attn_bias_type", ["add", "mul"]) @pytest.mark.parametrize("norm_first", [True, False]) def test_GraphormerLayer(attn_bias_type, norm_first): batch_size = 16 num_nodes = 100 feat_size = 512 num_heads = 8 nfeat = th.rand(batch_size, num_nodes, feat_size) attn_bias = th.rand(batch_size, num_nodes, num_nodes, num_heads) attn_mask = th.rand(batch_size, num_nodes, num_nodes) < 0.5 net = nn.GraphormerLayer( feat_size=feat_size, hidden_size=2048, num_heads=num_heads, attn_bias_type=attn_bias_type, norm_first=norm_first, dropout=0.1, attn_dropout=0.1, activation=th.nn.ReLU(), ) out = net(nfeat, attn_bias, attn_mask) assert out.shape == (batch_size, num_nodes, feat_size) @pytest.mark.parametrize("max_len", [1, 2]) @pytest.mark.parametrize("feat_dim", [16]) @pytest.mark.parametrize("num_heads", [1, 8]) def test_PathEncoder(max_len, feat_dim, num_heads): dev = F.ctx() g = dgl.graph( ( th.tensor([0, 0, 0, 1, 1, 2, 3, 3]), th.tensor([1, 2, 3, 0, 3, 0, 0, 1]), ) ).to(dev) edge_feat = th.rand(g.num_edges(), feat_dim).to(dev) edge_feat = th.cat((edge_feat, th.zeros(1, 16).to(dev)), dim=0) dist, path = shortest_dist(g, root=None, return_paths=True) path_data = edge_feat[path[:, :, :max_len]] model = nn.PathEncoder(max_len, feat_dim, num_heads=num_heads).to(dev) bias = model(dist.unsqueeze(0), path_data.unsqueeze(0)) assert bias.shape == (1, 4, 4, num_heads) @pytest.mark.parametrize("max_dist", [1, 4]) @pytest.mark.parametrize("num_kernels", [4, 16]) @pytest.mark.parametrize("num_heads", [1, 8]) def test_SpatialEncoder(max_dist, num_kernels, num_heads): dev = F.ctx() # single graph encoding 3d num_nodes = 4 coord = th.rand(1, num_nodes, 3).to(dev) node_type = th.tensor([[1, 0, 2, 1]]).to(dev) spatial_encoder = nn.SpatialEncoder3d( num_kernels=num_kernels, num_heads=num_heads, max_node_type=3 ).to(dev) out = spatial_encoder(coord, node_type=node_type) assert out.shape == (1, num_nodes, num_nodes, num_heads) # encoding on a batch of graphs g1 = dgl.graph( ( th.tensor([0, 0, 0, 1, 1, 2, 3, 3]), th.tensor([1, 2, 3, 0, 3, 0, 0, 1]), ) ).to(dev) g2 = dgl.graph( (th.tensor([0, 1, 2, 3, 2, 5]), th.tensor([1, 2, 3, 4, 0, 3])) ).to(dev) bsz, max_num_nodes = 2, 6 # 2d encoding dist = -th.ones((bsz, max_num_nodes, max_num_nodes), dtype=th.long).to(dev) dist[0, :4, :4] = shortest_dist(g1, root=None, return_paths=False) dist[1, :6, :6] = shortest_dist(g2, root=None, return_paths=False) model_1 = nn.SpatialEncoder(max_dist, num_heads=num_heads).to(dev) encoding = model_1(dist) assert encoding.shape == (bsz, max_num_nodes, max_num_nodes, num_heads) # 3d encoding coord = th.rand(bsz, max_num_nodes, 3).to(dev) node_type = th.randint( 0, 512, ( bsz, max_num_nodes, ), ).to(dev) model_2 = nn.SpatialEncoder3d(num_kernels, num_heads=num_heads).to(dev) model_3 = nn.SpatialEncoder3d( num_kernels, num_heads=num_heads, max_node_type=512 ).to(dev) encoding3d_1 = model_2(coord) encoding3d_2 = model_3(coord, node_type) assert encoding3d_1.shape == (bsz, max_num_nodes, max_num_nodes, num_heads) assert encoding3d_2.shape == (bsz, max_num_nodes, max_num_nodes, num_heads) @pytest.mark.parametrize("residual", [True, False]) def test_conv_with_zero_nodes_bugfix_7894(residual): """Test for PR #7894 in DGL where HeteroGraphConv with zero nodes in a specific node type would cause an error due to empty tensors. This test ensures that GATConv, GATv2Conv, and EdgeGATConv can handle such cases without raising errors. """ # Create a heterogeneous graph with zero nodes in the "tag" type user_item_src = torch.tensor([0, 1, 2]) user_item_dst = torch.tensor([4, 5, 6]) user_tag_src = torch.tensor([], dtype=torch.int64) user_tag_dst = torch.tensor([], dtype=torch.int64) num_nodes_dict = { "user": 5, "item": 10, "tag": 0, } data_dict = { ("user", "buys", "item"): (user_item_src, user_item_dst), ("user", "likes", "tag"): (user_tag_src, user_tag_dst), } g = dgl.heterograph(data_dict, num_nodes_dict=num_nodes_dict) feat_dim = 16 node_features = { "user": torch.randn(num_nodes_dict["user"], feat_dim), "item": torch.randn(num_nodes_dict["item"], feat_dim), "tag": torch.randn(num_nodes_dict["tag"], feat_dim), } edge_features = { ("user", "buys", "item"): torch.randn(g.num_edges(("user", "buys", "item")), feat_dim), ("user", "likes", "tag"): torch.randn(g.num_edges(("user", "likes", "tag")), feat_dim), } # Test GATConv with zero nodes in "tag" type conv = nn.HeteroGraphConv({ ("user", "buys", "item"): nn.GATConv(16, 2, num_heads=2, residual=residual), ("user", "likes", "tag"): nn.GATConv(16, 2, num_heads=2, residual=residual), }, aggregate="sum") out = conv(g, node_features) assert out["item"].shape == (10, 2, 2) assert out["tag"].shape == (0, 2, 2) assert "user" not in out # Test GATv2Conv with zero nodes in "tag" type conv_v2 = nn.HeteroGraphConv({ ("user", "buys", "item"): nn.GATv2Conv(16, 2, num_heads=2, residual=residual), ("user", "likes", "tag"): nn.GATv2Conv(16, 2, num_heads=2, residual=residual), }, aggregate="sum") out_v2 = conv_v2(g, node_features) assert out_v2["item"].shape == (10, 2, 2) assert out_v2["tag"].shape == (0, 2, 2) assert "user" not in out_v2 # Test EdgeGATConv with zero nodes in "tag" type edge_conv = nn.HeteroGraphConv({ ("user", "buys", "item"): nn.EdgeGATConv(16, 16, 2, num_heads=2, residual=residual), ("user", "likes", "tag"): nn.EdgeGATConv(16, 16, 2, num_heads=2, residual=residual), }, aggregate="sum") mod_kwargs = { "buys": {"edge_feat": edge_features[("user", "buys", "item")]}, "likes": {"edge_feat": edge_features[("user", "likes", "tag")]}, } out_edge = edge_conv(g, node_features, mod_kwargs=mod_kwargs) assert out_edge["item"].shape == (10, 2, 2) assert out_edge["tag"].shape == (0, 2, 2) assert "user" not in out_edge ================================================ FILE: tests/python/pytorch/nn/test_sparse_emb.py ================================================ import multiprocessing as mp import os import unittest import backend as F import pytest import torch as th from dgl.nn import NodeEmbedding from dgl.optim import SparseAdam def initializer(emb): th.manual_seed(0) emb.uniform_(-1.0, 1.0) return emb def check_all_set_all_get_emb(device, init_emb): num_embs = init_emb.shape[0] emb_dim = init_emb.shape[1] dgl_emb = NodeEmbedding(num_embs, emb_dim, "test", device=device) dgl_emb.all_set_embedding(init_emb) out_emb = dgl_emb.all_get_embedding() assert F.allclose(init_emb, out_emb) def check_all_set_all_get_optm_state( device, state_step, state_mem, state_power ): num_embs = state_mem.shape[0] emb_dim = state_mem.shape[1] dgl_emb = NodeEmbedding(num_embs, emb_dim, "test", device=device) optm = SparseAdam(params=[dgl_emb], lr=0.01) dgl_emb._all_set_optm_state((state_step, state_mem, state_power)) out_step, out_mem, out_power = dgl_emb._all_get_optm_state() assert F.allclose(state_step, out_step) assert F.allclose(state_mem, out_mem) assert F.allclose(state_power, out_power) def start_sparse_worker(rank, world_size, test, args): print("start sparse worker {}".format(rank)) dist_init_method = "tcp://{master_ip}:{master_port}".format( master_ip="127.0.0.1", master_port="12345" ) backend = "gloo" device = F.ctx() if device.type == "cuda": device = th.device(rank) th.cuda.set_device(device) th.distributed.init_process_group( backend=backend, init_method=dist_init_method, world_size=world_size, rank=rank, ) test(device, *args) th.distributed.barrier() th.distributed.destroy_process_group() @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") @pytest.mark.parametrize("num_workers", [1, 2, 3]) def test_multiprocess_sparse_emb_get_set(num_workers): if F.ctx().type == "cuda" and th.cuda.device_count() < num_workers: pytest.skip("Not enough GPUs to run test.") worker_list = [] init_emb = th.rand([1000, 8]) ctx = mp.get_context("spawn") for i in range(num_workers): p = ctx.Process( target=start_sparse_worker, args=(i, num_workers, check_all_set_all_get_emb, (init_emb,)), ) p.start() worker_list.append(p) for p in worker_list: p.join() for p in worker_list: assert p.exitcode == 0 @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") @pytest.mark.parametrize("num_workers", [1, 2, 3]) def test_multiprocess_sparse_emb_get_set_optm_state(num_workers): if F.ctx().type == "cuda" and th.cuda.device_count() < num_workers: pytest.skip("Not enough GPUs to run test.") worker_list = [] num_embs, emb_dim = 1000, 8 state_step = th.randint(1000, (num_embs,)) state_mem = th.rand((num_embs, emb_dim)) state_power = th.rand((num_embs, emb_dim)) ctx = mp.get_context("spawn") for i in range(num_workers): p = ctx.Process( target=start_sparse_worker, args=( i, num_workers, check_all_set_all_get_optm_state, (state_step, state_mem, state_power), ), ) p.start() worker_list.append(p) for p in worker_list: p.join() for p in worker_list: assert p.exitcode == 0 if __name__ == "__main__": # test_multiprocess_sparse_emb_get_set(1) # test_multiprocess_sparse_emb_get_set(2) # test_multiprocess_sparse_emb_get_set(3) test_multiprocess_sparse_emb_get_set_optm_state(1) # test_multiprocess_sparse_emb_get_set_optm_state(2) # test_multiprocess_sparse_emb_get_set_optm_state(3) ================================================ FILE: tests/python/pytorch/optim/test_optim.py ================================================ import os import unittest import backend as F import pytest import torch as th import torch.multiprocessing as mp from dgl.nn import NodeEmbedding from dgl.optim import SparseAdagrad, SparseAdam @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") @pytest.mark.parametrize("emb_dim", [1, 4, 101, 1024]) def test_sparse_adam(emb_dim): num_embs = 10 device = F.ctx() dgl_emb = NodeEmbedding(num_embs, emb_dim, "test") torch_emb = th.nn.Embedding(num_embs, emb_dim, sparse=True) th.manual_seed(0) th.nn.init.uniform_(torch_emb.weight, 0, 1.0) th.manual_seed(0) th.nn.init.uniform_(dgl_emb.weight, 0, 1.0) dgl_adam = SparseAdam(params=[dgl_emb], lr=0.01) torch_adam = th.optim.SparseAdam(list(torch_emb.parameters()), lr=0.01) # first step idx = th.randint(0, num_embs, size=(4,)) dgl_value = dgl_emb(idx, device).to(th.device("cpu")) torch_value = torch_emb(idx) labels = th.zeros((4,)).long() print("dgl_value = {}".format(dgl_value)) print("labels = {}".format(labels)) dgl_adam.zero_grad() torch_adam.zero_grad() dgl_loss = th.nn.functional.cross_entropy(dgl_value, labels) torch_loss = th.nn.functional.cross_entropy(torch_value, labels) dgl_loss.backward() torch_loss.backward() dgl_adam.step() torch_adam.step() assert F.allclose(dgl_emb.weight, torch_emb.weight) # Can not test second step # Pytorch sparseAdam maintains a global step # DGL sparseAdam use a per embedding step @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") @pytest.mark.parametrize("use_uva", [False, True, None]) @pytest.mark.parametrize("emb_dim", [1, 4, 101, 1024]) def test_sparse_adam_uva(use_uva, emb_dim): if F.ctx().type == "cpu" and use_uva == True: # we want to only test values of False and None when not using GPU pytest.skip("UVA cannot be used without GPUs.") num_embs = 10 device = F.ctx() dgl_emb = NodeEmbedding(num_embs, emb_dim, "test_uva{}".format(use_uva)) torch_emb = th.nn.Embedding(num_embs, emb_dim, sparse=True) th.manual_seed(0) th.nn.init.uniform_(torch_emb.weight, 0, 1.0) th.manual_seed(0) th.nn.init.uniform_(dgl_emb.weight, 0, 1.0) dgl_adam = SparseAdam(params=[dgl_emb], lr=0.01, use_uva=use_uva) torch_adam = th.optim.SparseAdam(list(torch_emb.parameters()), lr=0.01) # first step idx = th.randint(0, num_embs, size=(4,)) dgl_value = dgl_emb(idx, device).to(th.device("cpu")) torch_value = torch_emb(idx) labels = th.zeros((4,)).long() dgl_adam.zero_grad() torch_adam.zero_grad() dgl_loss = th.nn.functional.cross_entropy(dgl_value, labels) torch_loss = th.nn.functional.cross_entropy(torch_value, labels) dgl_loss.backward() torch_loss.backward() dgl_adam.step() torch_adam.step() assert F.allclose(dgl_emb.weight, torch_emb.weight) # Can not test second step # Pytorch sparseAdam maintains a global step # DGL sparseAdam use a per embedding step @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") @pytest.mark.parametrize("dtype", [th.float32, th.float16]) @pytest.mark.parametrize("emb_dim", [1, 4, 101, 1024]) def test_sparse_adam_dtype(dtype, emb_dim): num_embs = 10 device = F.ctx() dgl_emb = NodeEmbedding(num_embs, emb_dim, "test_dtype{}".format(dtype)) torch_emb = th.nn.Embedding(num_embs, emb_dim, sparse=True) th.manual_seed(0) th.nn.init.uniform_(torch_emb.weight, 0, 1.0) th.manual_seed(0) th.nn.init.uniform_(dgl_emb.weight, 0, 1.0) dgl_adam = SparseAdam(params=[dgl_emb], lr=0.01, dtype=dtype) torch_adam = th.optim.SparseAdam(list(torch_emb.parameters()), lr=0.01) # first step idx = th.randint(0, num_embs, size=(4,)) dgl_value = dgl_emb(idx, device).to(th.device("cpu")) torch_value = torch_emb(idx) labels = th.zeros((4,)).long() dgl_adam.zero_grad() torch_adam.zero_grad() dgl_loss = th.nn.functional.cross_entropy(dgl_value, labels) torch_loss = th.nn.functional.cross_entropy(torch_value, labels) dgl_loss.backward() torch_loss.backward() dgl_adam.step() torch_adam.step() assert F.allclose(dgl_emb.weight, torch_emb.weight) # Can not test second step # Pytorch sparseAdam maintains a global step # DGL sparseAdam use a per embedding step @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") def test_sparse_adam_zero_step(): num_embs = 10 emb_dim = 4 device = F.ctx() dgl_emb = NodeEmbedding(num_embs, emb_dim, "test") torch_emb = th.nn.Embedding(num_embs, emb_dim, sparse=True) dgl_emb_zero = NodeEmbedding(num_embs, emb_dim, "test2") torch_emb_zero = th.nn.Embedding(num_embs, emb_dim, sparse=True) th.manual_seed(0) th.nn.init.uniform_(torch_emb.weight, 0, 1.0) th.nn.init.uniform_(torch_emb_zero.weight, 0, 1.0) th.manual_seed(0) th.nn.init.uniform_(dgl_emb.weight, 0, 1.0) th.nn.init.uniform_(dgl_emb_zero.weight, 0, 1.0) dgl_adam = SparseAdam(params=[dgl_emb, dgl_emb_zero], lr=0.01) torch_adam = th.optim.SparseAdam( list(torch_emb.parameters()) + list(torch_emb_zero.parameters()), lr=0.01, ) # first step idx = th.randint(0, num_embs, size=(4,)) dgl_value = dgl_emb(idx, device).to(th.device("cpu")) torch_value = torch_emb(idx) labels = th.ones((4,)).long() dgl_adam.zero_grad() torch_adam.zero_grad() dgl_loss = th.nn.functional.cross_entropy(dgl_value, labels) torch_loss = th.nn.functional.cross_entropy(torch_value, labels) dgl_loss.backward() torch_loss.backward() dgl_adam.step() torch_adam.step() assert F.allclose(dgl_emb.weight, torch_emb.weight) def initializer(emb): th.manual_seed(0) emb.uniform_(-1.0, 1.0) return emb def start_sparse_adam_worker( rank, device, world_size, weight, tensor_dev="cpu", has_zero_grad=False, backend="gloo", num_embs=128, emb_dim=10, zero_comm=True, ): print("start sparse worker for adam {}".format(rank)) dist_init_method = "tcp://{master_ip}:{master_port}".format( master_ip="127.0.0.1", master_port="12345" ) if device.type == "cuda": th.cuda.set_device(device) th.distributed.init_process_group( backend=backend, init_method=dist_init_method, world_size=world_size, rank=rank, ) init_weight = th.empty((num_embs, emb_dim)) th.manual_seed(0) th.nn.init.uniform_(init_weight, -1.0, 1.0) dgl_emb = NodeEmbedding( num_embs, emb_dim, "test", init_func=initializer, device=tensor_dev ) dgl_emb.all_set_embedding(init_weight) if has_zero_grad: dgl_emb_zero = NodeEmbedding( num_embs, emb_dim, "zero", init_func=initializer, device=tensor_dev ) dgl_adam = SparseAdam(params=[dgl_emb, dgl_emb_zero], lr=0.01) else: dgl_adam = SparseAdam(params=[dgl_emb], lr=0.01) th.manual_seed(rank) if zero_comm: start = (num_embs // world_size) * rank end = (num_embs // world_size) * (rank + 1) idx = th.randint(start, end, size=(4,)).to(tensor_dev) else: idx = th.randint(0, num_embs, size=(4,)).to(tensor_dev) dgl_value = dgl_emb(idx, device) labels = th.ones((4,)).long().to(device) dgl_loss = th.nn.functional.cross_entropy(dgl_value, labels) dgl_adam.zero_grad() dgl_loss.backward() dgl_adam.step() th.distributed.barrier() dgl_weight = dgl_emb.all_get_embedding().detach() after_step = dgl_emb(idx, device).cpu() if rank == 0: dgl_value = dgl_value.detach().cpu() assert F.allclose(dgl_value, after_step) is False weight[:] = dgl_weight[:] th.distributed.barrier() def start_torch_adam_worker( rank, world_size, weight, has_zero_grad=False, num_embs=128, emb_dim=10, zero_comm=True, ): print("start sparse worker for adam {}".format(rank)) dist_init_method = "tcp://{master_ip}:{master_port}".format( master_ip="127.0.0.1", master_port="12345" ) backend = "gloo" th.distributed.init_process_group( backend=backend, init_method=dist_init_method, world_size=world_size, rank=rank, ) torch_emb = th.nn.Embedding(num_embs, emb_dim, sparse=True) th.manual_seed(0) th.nn.init.uniform_(torch_emb.weight, -1.0, 1.0) torch_emb = th.nn.parallel.DistributedDataParallel(torch_emb) if has_zero_grad: torch_emb_zero = th.nn.Embedding(num_embs, emb_dim, sparse=True) torch_emb_zero = torch_emb_zero.to(tensor_dev) th.manual_seed(0) th.nn.init.uniform_(torch_emb_zero.weight, -1.0, 1.0) torch_emb_zero = th.nn.parallel.DistributedDataParallel(torch_emb_zero) torch_adam = th.optim.SparseAdam( list(torch_emb.module.parameters()) + list(torch_emb_zero.module.parameters()), lr=0.01, ) else: torch_adam = th.optim.SparseAdam( list(torch_emb.module.parameters()), lr=0.01 ) th.manual_seed(rank) if zero_comm: start = (num_embs // world_size) * rank end = (num_embs // world_size) * (rank + 1) idx = th.randint(start, end, size=(4,)) else: idx = th.randint(0, num_embs, size=(4,)) labels = th.ones((4,)).long() torch_value = torch_emb(idx) torch_loss = th.nn.functional.cross_entropy(torch_value, labels) torch_adam.zero_grad() torch_loss.backward() torch_adam.step() th.distributed.barrier() if rank == 0: weight[:] = torch_emb.module.weight.cpu()[:] th.distributed.barrier() @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") @unittest.skipIf(F.ctx().type != "cpu", reason="cpu only test") @pytest.mark.parametrize("num_workers", [2, 4]) def test_multiprocess_cpu_sparse_adam(num_workers): backend = "gloo" worker_list = [] num_embs = 128 emb_dim = 10 dgl_weight = th.empty((num_embs, emb_dim)) ctx = mp.get_context("spawn") for i in range(num_workers): device = F.ctx() p = ctx.Process( target=start_sparse_adam_worker, args=( i, device, num_workers, dgl_weight, th.device("cpu"), True, backend, ), ) p.start() worker_list.append(p) for p in worker_list: p.join() worker_list = [] torch_weight = th.empty((num_embs, emb_dim)) for i in range(num_workers): p = ctx.Process( target=start_torch_adam_worker, args=(i, num_workers, torch_weight, False), ) p.start() worker_list.append(p) for p in worker_list: p.join() assert F.allclose(dgl_weight, torch_weight) @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") @unittest.skipIf(F.ctx().type == "cpu", reason="gpu only test") @pytest.mark.parametrize("num_workers", [2, 4, 8]) @pytest.mark.parametrize("backend", ["nccl", "gloo"]) @pytest.mark.parametrize("zero_comm", [True, False]) def test_multiprocess_sparse_adam(num_workers, backend, zero_comm): if F.ctx().type == "cuda" and th.cuda.device_count() < num_workers: pytest.skip("Not enough GPUs to run test.") worker_list = [] num_embs = 128 emb_dim = 10 dgl_weight = th.empty((num_embs, emb_dim)) ctx = mp.get_context("spawn") for i in range(num_workers): device = F.ctx() if device.type == "cuda": # make sure each process has a unique GPU device = th.device(i) p = ctx.Process( target=start_sparse_adam_worker, args=( i, device, num_workers, dgl_weight, th.device("cpu"), True, backend, num_embs, emb_dim, zero_comm, ), ) p.start() worker_list.append(p) for p in worker_list: p.join() worker_list = [] torch_weight = th.empty((num_embs, emb_dim)) for i in range(num_workers): p = ctx.Process( target=start_torch_adam_worker, args=( i, num_workers, torch_weight, False, num_embs, emb_dim, zero_comm, ), ) p.start() worker_list.append(p) for p in worker_list: p.join() assert F.allclose(dgl_weight, torch_weight) @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") @unittest.skipIf( F.ctx().type == "cpu", reason="cuda tensor is not supported for cpu" ) @pytest.mark.parametrize("num_workers", [2, 4, 8]) def test_multiprocess_sparse_adam_cuda_tensor(num_workers): if F.ctx().type == "cpu": pytest.skip("Do not test CPU") if F.ctx().type == "cuda" and th.cuda.device_count() < num_workers: pytest.skip("Not enough GPUs to run test.") backend = "nccl" worker_list = [] num_embs = 128 emb_dim = 10 dgl_weight = th.empty((num_embs, emb_dim)) ctx = mp.get_context("spawn") for i in range(num_workers): device = th.device(i) p = ctx.Process( target=start_sparse_adam_worker, args=(i, device, num_workers, dgl_weight, device, False, backend), ) p.start() worker_list.append(p) for p in worker_list: p.join() worker_list = [] torch_weight = th.empty((num_embs, emb_dim)) for i in range(num_workers): p = ctx.Process( target=start_torch_adam_worker, args=(i, num_workers, torch_weight, False), ) p.start() worker_list.append(p) for p in worker_list: p.join() assert F.allclose(dgl_weight, torch_weight) @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") @unittest.skipIf(F.ctx().type != "cpu", reason="cpu only test") @pytest.mark.parametrize("num_workers", [2, 4]) def test_multiprocess_sparse_adam_cpu_zero_step(num_workers): backend = "gloo" worker_list = [] num_embs = 128 emb_dim = 10 dgl_weight = th.empty((num_embs, emb_dim)) ctx = mp.get_context("spawn") for i in range(num_workers): device = F.ctx() p = ctx.Process( target=start_sparse_adam_worker, args=( i, device, num_workers, dgl_weight, th.device("cpu"), True, backend, ), ) p.start() worker_list.append(p) for p in worker_list: p.join() worker_list = [] torch_weight = th.empty((num_embs, emb_dim)) for i in range(num_workers): p = ctx.Process( target=start_torch_adam_worker, args=(i, num_workers, torch_weight, False), ) p.start() worker_list.append(p) for p in worker_list: p.join() assert F.allclose(dgl_weight, torch_weight) @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") @unittest.skipIf(F.ctx().type == "cpu", reason="gpu only test") @pytest.mark.parametrize("num_workers", [2, 4, 8]) @pytest.mark.parametrize("backend", ["nccl", "gloo"]) def test_multiprocess_sparse_adam_zero_step(num_workers, backend): if F.ctx().type == "cuda" and th.cuda.device_count() < num_workers: pytest.skip("Not enough GPUs to run test.") worker_list = [] num_embs = 128 emb_dim = 10 dgl_weight = th.empty((num_embs, emb_dim)) ctx = mp.get_context("spawn") for i in range(num_workers): device = F.ctx() if device.type == "cuda": # make sure each process has a unique GPU device = th.device(i) p = ctx.Process( target=start_sparse_adam_worker, args=( i, device, num_workers, dgl_weight, th.device("cpu"), True, backend, ), ) p.start() worker_list.append(p) for p in worker_list: p.join() worker_list = [] torch_weight = th.empty((num_embs, emb_dim)) for i in range(num_workers): p = ctx.Process( target=start_torch_adam_worker, args=(i, num_workers, torch_weight, False), ) p.start() worker_list.append(p) for p in worker_list: p.join() assert F.allclose(dgl_weight, torch_weight) @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") @unittest.skipIf( F.ctx().type == "cpu", reason="cuda tensor is not supported for cpu" ) @pytest.mark.parametrize("num_workers", [2, 4, 8]) def test_multiprocess_sparse_adam_zero_step_cuda_tensor(num_workers): if F.ctx().type == "cuda" and th.cuda.device_count() < num_workers: pytest.skip("Not enough GPUs to run test.") backend = "nccl" worker_list = [] num_embs = 128 emb_dim = 10 dgl_weight = th.empty((num_embs, emb_dim)) ctx = mp.get_context("spawn") for i in range(num_workers): device = th.device(i) p = ctx.Process( target=start_sparse_adam_worker, args=(i, device, num_workers, dgl_weight, device, True, backend), ) p.start() worker_list.append(p) for p in worker_list: p.join() worker_list = [] torch_weight = th.empty((num_embs, emb_dim)) for i in range(num_workers): p = ctx.Process( target=start_torch_adam_worker, args=(i, num_workers, torch_weight, False), ) p.start() worker_list.append(p) for p in worker_list: p.join() assert F.allclose(dgl_weight, torch_weight) def start_sparse_adam_state_dict_worker( rank, world_size, init_weight, backend, num_embs, emb_dim, ): print("start sparse worker for adam {}".format(rank)) dist_init_method = "tcp://{master_ip}:{master_port}".format( master_ip="127.0.0.1", master_port="12345" ) device = th.device(f"cuda:{rank}") th.cuda.set_device(device) tensor_dev = device if backend == "nccl" else th.device("cpu") th.distributed.init_process_group( backend=backend, init_method=dist_init_method, world_size=world_size, rank=rank, ) th.manual_seed(0) dgl_emb = NodeEmbedding( num_embs, emb_dim, "test", init_func=initializer, device=tensor_dev ) dgl_emb.all_set_embedding(init_weight) dgl_adam = SparseAdam(params=[dgl_emb], lr=0.01) start = (num_embs // world_size) * rank end = (num_embs // world_size) * (rank + 1) th.manual_seed(rank) idx = th.randint(start, end, size=(4,)).to(tensor_dev) dgl_value = dgl_emb(idx, device) labels = th.ones((4,)).long().to(device) dgl_loss = th.nn.functional.cross_entropy(dgl_value, labels) dgl_adam.zero_grad() dgl_loss.backward() dgl_adam.step() th.distributed.barrier() worker_state_dict = [t.detach().clone() for t in dgl_emb.optm_state] state_dict = dgl_adam.state_dict() for t in dgl_emb.optm_state: t.zero_() dgl_adam.load_state_dict(state_dict) for i, j in zip(worker_state_dict, dgl_emb.optm_state): F.allclose(i, j) th.distributed.barrier() @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") @unittest.skipIf(F.ctx().type == "cpu", reason="gpu only test") @pytest.mark.parametrize("num_workers", [1, 2, 4, 8]) @pytest.mark.parametrize("backend", ["nccl", "gloo"]) def test_multiprocess_sparse_adam_state_dict(num_workers, backend): if F.ctx().type == "cuda" and th.cuda.device_count() < num_workers: pytest.skip("Not enough GPUs to run test.") num_embs = 128 emb_dim = 10 init_weight = th.rand((num_embs, emb_dim)) mp.spawn( start_sparse_adam_state_dict_worker, ( num_workers, init_weight, backend, num_embs, emb_dim, ), nprocs=num_workers, ) if __name__ == "__main__": test_sparse_adam(1) test_sparse_adam(4) test_sparse_adam(101) test_sparse_adam(1024) test_sparse_adam_zero_step() test_multiprocess_cpu_sparse_adam(2) test_multiprocess_cpu_sparse_adam(4) test_multiprocess_cpu_sparse_adam(8) test_multiprocess_sparse_adam_cpu_zero_step(2) test_multiprocess_sparse_adam(2, backend="gloo") test_multiprocess_sparse_adam(4, backend="gloo") test_multiprocess_sparse_adam(8, backend="gloo") test_multiprocess_sparse_adam(2, backend="nccl") test_multiprocess_sparse_adam(4, backend="nccl") test_multiprocess_sparse_adam(8, backend="nccl") test_multiprocess_sparse_adam_zero_step(2, backend="gloo") test_multiprocess_sparse_adam_zero_step(4, backend="nccl") test_multiprocess_sparse_adam_cuda_tensor(2) test_multiprocess_sparse_adam_zero_step_cuda_tensor(4) test_multiprocess_sparse_adam_state_dict(2, "nccl") test_multiprocess_sparse_adam_state_dict(2, "gloo") ================================================ FILE: tests/python/pytorch/sparse/__init__.py ================================================ """ DGL sparse tests""" ================================================ FILE: tests/python/pytorch/sparse/test_broadcast.py ================================================ import operator import backend as F import pytest import torch from dgl.sparse import sp_broadcast_v from .utils import rand_coo @pytest.mark.parametrize("shape", [(3, 4), (1, 5), (5, 1)]) @pytest.mark.parametrize("nnz", [1, 4]) @pytest.mark.parametrize("nz_dim", [None, 2]) @pytest.mark.parametrize("op", ["add", "sub", "mul", "truediv"]) def test_sp_broadcast_v(shape, nnz, nz_dim, op): dev = F.ctx() A = rand_coo(shape, nnz, dev, nz_dim) v = torch.randn(A.shape[1], device=dev) res1 = sp_broadcast_v(A, v, op) if A.val.dim() == 1: rhs = v[A.col] else: rhs = v[A.col].view(-1, 1) res2 = getattr(operator, op)(A.val, rhs) assert torch.allclose(res1.val, res2) v = torch.randn(1, A.shape[1], device=dev) res1 = sp_broadcast_v(A, v, op) if A.val.dim() == 1: rhs = v.view(-1)[A.col] else: rhs = v.view(-1)[A.col].view(-1, 1) res2 = getattr(operator, op)(A.val, rhs) assert torch.allclose(res1.val, res2) v = torch.randn(A.shape[0], 1, device=dev) res1 = sp_broadcast_v(A, v, op) if A.val.dim() == 1: rhs = v.view(-1)[A.row] else: rhs = v.view(-1)[A.row].view(-1, 1) res2 = getattr(operator, op)(A.val, rhs) assert torch.allclose(res1.val, res2) ================================================ FILE: tests/python/pytorch/sparse/test_elementwise_op.py ================================================ import operator import backend as F import dgl.sparse as dglsp import pytest import torch from dgl.sparse import diag, power @pytest.mark.parametrize("opname", ["add", "sub", "mul", "truediv"]) def test_diag_op_diag(opname): op = getattr(operator, opname) ctx = F.ctx() shape = (3, 4) D1 = diag(torch.arange(1, 4).to(ctx), shape=shape) D2 = diag(torch.arange(10, 13).to(ctx), shape=shape) result = op(D1, D2) assert torch.allclose(result.val, op(D1.val, D2.val), rtol=1e-4, atol=1e-4) assert result.shape == D1.shape @pytest.mark.parametrize( "v_scalar", [2, 2.5, torch.tensor(2), torch.tensor(2.5)] ) def test_diag_op_scalar(v_scalar): ctx = F.ctx() shape = (3, 4) D1 = diag(torch.arange(1, 4).to(ctx), shape=shape) # D * v D2 = D1 * v_scalar assert torch.allclose(D1.val * v_scalar, D2.val, rtol=1e-4, atol=1e-4) assert D1.shape == D2.shape # v * D D2 = v_scalar * D1 assert torch.allclose(v_scalar * D1.val, D2.val, rtol=1e-4, atol=1e-4) assert D1.shape == D2.shape # D / v D2 = D1 / v_scalar assert torch.allclose(D1.val / v_scalar, D2.val, rtol=1e-4, atol=1e-4) assert D1.shape == D2.shape # D ^ v D1 = diag(torch.arange(1, 4).to(ctx)) D2 = D1**v_scalar assert torch.allclose(D1.val**v_scalar, D2.val, rtol=1e-4, atol=1e-4) assert D1.shape == D2.shape # pow(D, v) D2 = power(D1, v_scalar) assert torch.allclose(D1.val**v_scalar, D2.val, rtol=1e-4, atol=1e-4) assert D1.shape == D2.shape with pytest.raises(TypeError): D1 + v_scalar with pytest.raises(TypeError): v_scalar + D1 with pytest.raises(TypeError): D1 - v_scalar with pytest.raises(TypeError): v_scalar - D1 @pytest.mark.parametrize("val_shape", [(), (2,)]) @pytest.mark.parametrize("opname", ["add", "sub"]) def test_addsub_coo(val_shape, opname): op = getattr(operator, opname) func = getattr(dglsp, opname) ctx = F.ctx() row = torch.tensor([1, 0, 2]).to(ctx) col = torch.tensor([0, 3, 2]).to(ctx) val = torch.randn(row.shape + val_shape).to(ctx) A = dglsp.from_coo(row, col, val) row = torch.tensor([1, 0]).to(ctx) col = torch.tensor([0, 2]).to(ctx) val = torch.randn(row.shape + val_shape).to(ctx) B = dglsp.from_coo(row, col, val, shape=A.shape) C1 = op(A, B).to_dense() C2 = func(A, B).to_dense() dense_C = op(A.to_dense(), B.to_dense()) assert torch.allclose(dense_C, C1) assert torch.allclose(dense_C, C2) with pytest.raises(TypeError): op(A, 2) with pytest.raises(TypeError): op(2, A) @pytest.mark.parametrize("val_shape", [(), (2,)]) @pytest.mark.parametrize("opname", ["add", "sub"]) def test_addsub_csr(val_shape, opname): op = getattr(operator, opname) func = getattr(dglsp, opname) ctx = F.ctx() indptr = torch.tensor([0, 1, 2, 3]).to(ctx) indices = torch.tensor([3, 0, 2]).to(ctx) val = torch.randn(indices.shape + val_shape).to(ctx) A = dglsp.from_csr(indptr, indices, val) indptr = torch.tensor([0, 1, 2, 2]).to(ctx) indices = torch.tensor([2, 0]).to(ctx) val = torch.randn(indices.shape + val_shape).to(ctx) B = dglsp.from_csr(indptr, indices, val, shape=A.shape) C1 = op(A, B).to_dense() C2 = func(A, B).to_dense() dense_C = op(A.to_dense(), B.to_dense()) assert torch.allclose(dense_C, C1) assert torch.allclose(dense_C, C2) with pytest.raises(TypeError): op(A, 2) with pytest.raises(TypeError): op(2, A) @pytest.mark.parametrize("val_shape", [(), (2,)]) @pytest.mark.parametrize("opname", ["add", "sub"]) def test_addsub_csc(val_shape, opname): op = getattr(operator, opname) func = getattr(dglsp, opname) ctx = F.ctx() indptr = torch.tensor([0, 1, 1, 2, 3]).to(ctx) indices = torch.tensor([1, 2, 0]).to(ctx) val = torch.randn(indices.shape + val_shape).to(ctx) A = dglsp.from_csc(indptr, indices, val) indptr = torch.tensor([0, 1, 1, 2, 2]).to(ctx) indices = torch.tensor([1, 0]).to(ctx) val = torch.randn(indices.shape + val_shape).to(ctx) B = dglsp.from_csc(indptr, indices, val, shape=A.shape) C1 = op(A, B).to_dense() C2 = func(A, B).to_dense() dense_C = op(A.to_dense(), B.to_dense()) assert torch.allclose(dense_C, C1) assert torch.allclose(dense_C, C2) with pytest.raises(TypeError): op(A, 2) with pytest.raises(TypeError): op(2, A) @pytest.mark.parametrize("val_shape", [(), (2,)]) @pytest.mark.parametrize("opname", ["add", "sub"]) def test_addsub_diag(val_shape, opname): op = getattr(operator, opname) func = getattr(dglsp, opname) ctx = F.ctx() shape = (3, 4) val_shape = (shape[0],) + val_shape D1 = dglsp.diag(torch.randn(val_shape).to(ctx), shape=shape) D2 = dglsp.diag(torch.randn(val_shape).to(ctx), shape=shape) C1 = op(D1, D2).to_dense() C2 = func(D1, D2).to_dense() dense_C = op(D1.to_dense(), D2.to_dense()) assert torch.allclose(dense_C, C1) assert torch.allclose(dense_C, C2) with pytest.raises(TypeError): op(D1, 2) with pytest.raises(TypeError): op(2, D1) @pytest.mark.parametrize("val_shape", [(), (2,)]) def test_add_sparse_diag(val_shape): ctx = F.ctx() row = torch.tensor([1, 0, 2]).to(ctx) col = torch.tensor([0, 3, 2]).to(ctx) val = torch.randn(row.shape + val_shape).to(ctx) A = dglsp.from_coo(row, col, val) shape = (3, 4) val_shape = (shape[0],) + val_shape D = dglsp.diag(torch.randn(val_shape).to(ctx), shape=shape) sum1 = (A + D).to_dense() sum2 = (D + A).to_dense() sum3 = dglsp.add(A, D).to_dense() sum4 = dglsp.add(D, A).to_dense() dense_sum = A.to_dense() + D.to_dense() assert torch.allclose(dense_sum, sum1) assert torch.allclose(dense_sum, sum2) assert torch.allclose(dense_sum, sum3) assert torch.allclose(dense_sum, sum4) @pytest.mark.parametrize("val_shape", [(), (2,)]) def test_sub_sparse_diag(val_shape): ctx = F.ctx() row = torch.tensor([1, 0, 2]).to(ctx) col = torch.tensor([0, 3, 2]).to(ctx) val = torch.randn(row.shape + val_shape).to(ctx) A = dglsp.from_coo(row, col, val) shape = (3, 4) val_shape = (shape[0],) + val_shape D = dglsp.diag(torch.randn(val_shape).to(ctx), shape=shape) diff1 = (A - D).to_dense() diff2 = (D - A).to_dense() diff3 = dglsp.sub(A, D).to_dense() diff4 = dglsp.sub(D, A).to_dense() dense_diff = A.to_dense() - D.to_dense() assert torch.allclose(dense_diff, diff1) assert torch.allclose(dense_diff, -diff2) assert torch.allclose(dense_diff, diff3) assert torch.allclose(dense_diff, -diff4) @pytest.mark.parametrize("op", ["pow"]) def test_error_op_sparse_diag(op): ctx = F.ctx() row = torch.tensor([1, 0, 2]).to(ctx) col = torch.tensor([0, 3, 2]).to(ctx) val = torch.randn(row.shape).to(ctx) A = dglsp.from_coo(row, col, val) shape = (3, 4) D = dglsp.diag(torch.randn(row.shape[0]).to(ctx), shape=shape) with pytest.raises(TypeError): getattr(operator, op)(A, D) with pytest.raises(TypeError): getattr(operator, op)(D, A) ================================================ FILE: tests/python/pytorch/sparse/test_elementwise_op_sp.py ================================================ import sys import backend as F import pytest import torch from dgl.sparse import div, from_coo, mul, power, spmatrix, val_like from .utils import ( rand_coo, rand_csc, rand_csr, rand_diag, sparse_matrix_to_dense, ) def all_close_sparse(A, row, col, val, shape): rowA, colA = A.coo() valA = A.val assert torch.allclose(rowA, row) assert torch.allclose(colA, col) assert torch.allclose(valA, val) assert A.shape == shape @pytest.mark.parametrize( "v_scalar", [2, 2.5, torch.tensor(2), torch.tensor(2.5)] ) def test_muldiv_scalar(v_scalar): ctx = F.ctx() row = torch.tensor([1, 0, 2]).to(ctx) col = torch.tensor([0, 3, 2]).to(ctx) val = torch.randn(len(row)).to(ctx) A1 = from_coo(row, col, val, shape=(3, 4)) # A * v A2 = A1 * v_scalar assert torch.allclose(A1.val * v_scalar, A2.val, rtol=1e-4, atol=1e-4) assert A1.shape == A2.shape # v * A A2 = v_scalar * A1 assert torch.allclose(A1.val * v_scalar, A2.val, rtol=1e-4, atol=1e-4) assert A1.shape == A2.shape # A / v A2 = A1 / v_scalar assert torch.allclose(A1.val / v_scalar, A2.val, rtol=1e-4, atol=1e-4) assert A1.shape == A2.shape # v / A with pytest.raises(TypeError): v_scalar / A1 @pytest.mark.parametrize("val_shape", [(3,), (3, 2)]) def test_pow(val_shape): # A ** v ctx = F.ctx() row = torch.tensor([1, 0, 2]).to(ctx) col = torch.tensor([0, 3, 2]).to(ctx) val = torch.randn(val_shape).to(ctx) A = from_coo(row, col, val, shape=(3, 4)) exponent = 2 A_new = A**exponent assert torch.allclose(A_new.val, val**exponent) assert A_new.shape == A.shape new_row, new_col = A_new.coo() assert torch.allclose(new_row, row) assert torch.allclose(new_col, col) # power(A, v) A_new = power(A, exponent) assert torch.allclose(A_new.val, val**exponent) assert A_new.shape == A.shape new_row, new_col = A_new.coo() assert torch.allclose(new_row, row) assert torch.allclose(new_col, col) @pytest.mark.parametrize("op", ["add", "sub"]) @pytest.mark.parametrize( "v_scalar", [2, 2.5, torch.tensor(2), torch.tensor(2.5)] ) def test_error_op_scalar(op, v_scalar): ctx = F.ctx() row = torch.tensor([1, 0, 2]).to(ctx) col = torch.tensor([0, 3, 2]).to(ctx) val = torch.randn(len(row)).to(ctx) A = from_coo(row, col, val, shape=(3, 4)) with pytest.raises(TypeError): A + v_scalar with pytest.raises(TypeError): v_scalar + A with pytest.raises(TypeError): A - v_scalar with pytest.raises(TypeError): v_scalar - A @pytest.mark.parametrize( "create_func1", [rand_coo, rand_csr, rand_csc, rand_diag] ) @pytest.mark.parametrize( "create_func2", [rand_coo, rand_csr, rand_csc, rand_diag] ) @pytest.mark.parametrize("shape", [(5, 5), (5, 3)]) @pytest.mark.parametrize("nnz1", [5, 15]) @pytest.mark.parametrize("nnz2", [1, 14]) @pytest.mark.parametrize("nz_dim", [None, 3]) def test_spspmul(create_func1, create_func2, shape, nnz1, nnz2, nz_dim): dev = F.ctx() A = create_func1(shape, nnz1, dev, nz_dim) B = create_func2(shape, nnz2, dev, nz_dim) C = mul(A, B) assert not C.has_duplicate() DA = sparse_matrix_to_dense(A) DB = sparse_matrix_to_dense(B) DC = DA * DB grad = torch.rand_like(C.val) C.val.backward(grad) DC_grad = sparse_matrix_to_dense(val_like(C, grad)) DC.backward(DC_grad) assert torch.allclose(sparse_matrix_to_dense(C), DC, atol=1e-05) assert torch.allclose( val_like(A, A.val.grad).to_dense(), DA.grad, atol=1e-05 ) assert torch.allclose( val_like(B, B.val.grad).to_dense(), DB.grad, atol=1e-05 ) @pytest.mark.parametrize( "create_func", [rand_coo, rand_csr, rand_csc, rand_diag] ) @pytest.mark.parametrize("shape", [(5, 5), (5, 3)]) @pytest.mark.parametrize("nnz", [1, 14]) @pytest.mark.parametrize("nz_dim", [None, 3]) def test_spspdiv(create_func, nnz, shape, nz_dim): dev = F.ctx() A = create_func(shape, nnz, dev, nz_dim) perm = torch.randperm(A.nnz, device=dev) rperm = torch.argsort(perm) B = spmatrix(A.indices()[:, perm], A.val[perm], A.shape) C = div(A, B) assert not C.has_duplicate() assert torch.allclose(C.val, A.val / B.val[rperm], atol=1e-05) assert torch.allclose(C.indices(), A.indices(), atol=1e-05) # No need to test backward here, since it is handled by Pytorch ================================================ FILE: tests/python/pytorch/sparse/test_matmul.py ================================================ import warnings import backend as F import pytest import torch from dgl.sparse import bspmm, diag, from_coo, val_like from dgl.sparse.matmul import matmul from .utils import ( clone_detach_and_grad, dense_mask, rand_coo, rand_csc, rand_csr, rand_stride, sparse_matrix_to_dense, sparse_matrix_to_torch_sparse, ) def _torch_sparse_mm(torch_A1, torch_A2): with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) return torch.sparse.mm(torch_A1, torch_A2) @pytest.mark.parametrize("create_func", [rand_coo, rand_csr, rand_csc]) @pytest.mark.parametrize("shape", [(2, 7), (5, 2)]) @pytest.mark.parametrize("nnz", [1, 10]) @pytest.mark.parametrize("out_dim", [None, 10]) def test_spmm(create_func, shape, nnz, out_dim): dev = F.ctx() A = create_func(shape, nnz, dev) if out_dim is not None: X = torch.randn(shape[1], out_dim, requires_grad=True, device=dev) else: X = torch.randn(shape[1], requires_grad=True, device=dev) X = rand_stride(X) sparse_result = matmul(A, X) grad = torch.randn_like(sparse_result) sparse_result.backward(grad) adj = sparse_matrix_to_dense(A) XX = clone_detach_and_grad(X) dense_result = torch.matmul(adj, XX) if out_dim is None: dense_result = dense_result.view(-1) dense_result.backward(grad) assert torch.allclose(sparse_result, dense_result, atol=1e-05) assert torch.allclose(X.grad, XX.grad, atol=1e-05) assert torch.allclose( dense_mask(adj.grad, A), sparse_matrix_to_dense(val_like(A, A.val.grad)), atol=1e-05, ) @pytest.mark.parametrize("create_func", [rand_coo, rand_csr, rand_csc]) @pytest.mark.parametrize("shape", [(2, 7), (5, 2)]) @pytest.mark.parametrize("nnz", [1, 10]) def test_bspmm(create_func, shape, nnz): dev = F.ctx() A = create_func(shape, nnz, dev, 2) X = torch.randn(shape[1], 10, 2, requires_grad=True, device=dev) X = rand_stride(X) sparse_result = matmul(A, X) grad = torch.randn_like(sparse_result) sparse_result.backward(grad) XX = clone_detach_and_grad(X) torch_A = A.to_dense().clone().detach().requires_grad_() torch_result = torch_A.permute(2, 0, 1) @ XX.permute(2, 0, 1) torch_result.backward(grad.permute(2, 0, 1)) assert torch.allclose( sparse_result.permute(2, 0, 1), torch_result, atol=1e-05 ) assert torch.allclose(X.grad, XX.grad, atol=1e-05) assert torch.allclose( dense_mask(torch_A.grad, A), sparse_matrix_to_dense(val_like(A, A.val.grad)), atol=1e-05, ) @pytest.mark.parametrize("create_func1", [rand_coo, rand_csr, rand_csc]) @pytest.mark.parametrize("create_func2", [rand_coo, rand_csr, rand_csc]) @pytest.mark.parametrize("shape_n_m", [(5, 5), (5, 6)]) @pytest.mark.parametrize("shape_k", [3, 4]) @pytest.mark.parametrize("nnz1", [1, 10]) @pytest.mark.parametrize("nnz2", [1, 10]) def test_spspmm(create_func1, create_func2, shape_n_m, shape_k, nnz1, nnz2): dev = F.ctx() shape1 = shape_n_m shape2 = (shape_n_m[1], shape_k) A1 = create_func1(shape1, nnz1, dev) A2 = create_func2(shape2, nnz2, dev) A3 = matmul(A1, A2) grad = torch.randn_like(A3.val) A3.val.backward(grad) torch_A1 = sparse_matrix_to_torch_sparse(A1) torch_A2 = sparse_matrix_to_torch_sparse(A2) torch_A3 = _torch_sparse_mm(torch_A1, torch_A2) torch_A3_grad = sparse_matrix_to_torch_sparse(A3, grad) torch_A3.backward(torch_A3_grad) with torch.no_grad(): assert torch.allclose(A3.to_dense(), torch_A3.to_dense(), atol=1e-05) assert torch.allclose( val_like(A1, A1.val.grad).to_dense(), torch_A1.grad.to_dense(), atol=1e-05, ) assert torch.allclose( val_like(A2, A2.val.grad).to_dense(), torch_A2.grad.to_dense(), atol=1e-05, ) def test_spspmm_duplicate(): dev = F.ctx() row = torch.tensor([1, 0, 0, 0, 1]).to(dev) col = torch.tensor([1, 1, 1, 2, 2]).to(dev) val = torch.randn(len(row)).to(dev) shape = (4, 4) A1 = from_coo(row, col, val, shape) row = torch.tensor([1, 0, 0, 1]).to(dev) col = torch.tensor([1, 1, 2, 2]).to(dev) val = torch.randn(len(row)).to(dev) shape = (4, 4) A2 = from_coo(row, col, val, shape) try: matmul(A1, A2) except: pass else: assert False, "Should raise error." try: matmul(A2, A1) except: pass else: assert False, "Should raise error." @pytest.mark.parametrize("create_func", [rand_coo, rand_csr, rand_csc]) @pytest.mark.parametrize("sparse_shape", [(5, 5), (5, 6)]) @pytest.mark.parametrize("nnz", [1, 10]) def test_sparse_diag_mm(create_func, sparse_shape, nnz): dev = F.ctx() diag_shape = sparse_shape[1], sparse_shape[1] A = create_func(sparse_shape, nnz, dev) diag_val = torch.randn(sparse_shape[1], device=dev, requires_grad=True) D = diag(diag_val, diag_shape) B = matmul(A, D) grad = torch.randn_like(B.val) B.val.backward(grad) torch_A = sparse_matrix_to_torch_sparse(A) torch_D = sparse_matrix_to_torch_sparse(D) torch_B = _torch_sparse_mm(torch_A, torch_D) torch_B_grad = sparse_matrix_to_torch_sparse(B, grad) torch_B.backward(torch_B_grad) with torch.no_grad(): assert torch.allclose(B.to_dense(), torch_B.to_dense(), atol=1e-05) assert torch.allclose( val_like(A, A.val.grad).to_dense(), torch_A.grad.to_dense(), atol=1e-05, ) assert torch.allclose( diag(D.val.grad, D.shape).to_dense(), torch_D.grad.to_dense(), atol=1e-05, ) @pytest.mark.parametrize("create_func", [rand_coo, rand_csr, rand_csc]) @pytest.mark.parametrize("sparse_shape", [(5, 5), (5, 6)]) @pytest.mark.parametrize("nnz", [1, 10]) def test_diag_sparse_mm(create_func, sparse_shape, nnz): dev = F.ctx() diag_shape = sparse_shape[0], sparse_shape[0] A = create_func(sparse_shape, nnz, dev) diag_val = torch.randn(sparse_shape[0], device=dev, requires_grad=True) D = diag(diag_val, diag_shape) B = matmul(D, A) grad = torch.randn_like(B.val) B.val.backward(grad) torch_A = sparse_matrix_to_torch_sparse(A) torch_D = sparse_matrix_to_torch_sparse(D) torch_B = _torch_sparse_mm(torch_D, torch_A) torch_B_grad = sparse_matrix_to_torch_sparse(B, grad) torch_B.backward(torch_B_grad) with torch.no_grad(): assert torch.allclose(B.to_dense(), torch_B.to_dense(), atol=1e-05) assert torch.allclose( val_like(A, A.val.grad).to_dense(), torch_A.grad.to_dense(), atol=1e-05, ) assert torch.allclose( diag(D.val.grad, D.shape).to_dense(), torch_D.grad.to_dense(), atol=1e-05, ) ================================================ FILE: tests/python/pytorch/sparse/test_matrix_op.py ================================================ import backend as F import pytest import torch from .utils import ( rand_coo, rand_csc, rand_csr, rand_diag, sparse_matrix_to_dense, ) @pytest.mark.parametrize( "create_func", [rand_diag, rand_csr, rand_csc, rand_coo] ) @pytest.mark.parametrize("dim", [0, 1]) @pytest.mark.parametrize("index", [None, (1, 3), (4, 0, 2)]) def test_compact(create_func, dim, index): ctx = F.ctx() shape = (5, 5) ans_idx = [] if index is not None: ans_idx = list(dict.fromkeys(index)) index = torch.tensor(index).to(ctx) A = create_func(shape, 8, ctx) A_compact, ret_id = A.compact(dim, index) A_compact_dense = sparse_matrix_to_dense(A_compact) A_dense = sparse_matrix_to_dense(A) for i in range(shape[dim]): if dim == 0: row = list(A_dense[i, :].nonzero().reshape(-1)) else: row = list(A_dense[:, i].nonzero().reshape(-1)) if (i not in list(ans_idx)) and len(row) > 0: ans_idx.append(i) if len(ans_idx): ans_idx = torch.tensor(ans_idx).to(ctx) A_dense_select = sparse_matrix_to_dense(A.index_select(dim, ans_idx)) assert A_compact_dense.shape == A_dense_select.shape assert torch.allclose(A_compact_dense, A_dense_select) assert torch.allclose(ans_idx, ret_id) ================================================ FILE: tests/python/pytorch/sparse/test_reduction.py ================================================ import doctest import operator import sys import backend as F import dgl.sparse as dglsp import pytest import torch dgl_op_map = { "sum": "sum", "amin": "smin", "amax": "smax", "mean": "smean", "prod": "sprod", } default_entry = { "sum": 0, "amin": float("inf"), "amax": float("-inf"), "mean": 0, "prod": 1, } binary_op_map = { "sum": operator.add, "amin": torch.min, "amax": torch.max, "mean": operator.add, "prod": operator.mul, } NUM_ROWS = 10 NUM_COLS = 15 def _coalesce_dense(row, col, val, nrows, ncols, op): # Sparse matrix coalescing on a dense matrix. # # It is done by stacking every non-zero entry on an individual slice # of an (nrows x ncols x nnz), that is, construct a tensor A with # shape (nrows, ncols, len(val)) where # # A[row[i], col[i], i] = val[i] # # and then reducing on the third "nnz" dimension. # # The mask matrix M has the same sparsity pattern as A with 1 being # the non-zero entries. This is used for division if the reduce # operator is mean. M = torch.zeros(NUM_ROWS, NUM_COLS, device=F.ctx()) A = torch.full( (NUM_ROWS, NUM_COLS, 20) + val.shape[1:], default_entry[op], device=F.ctx(), dtype=val.dtype, ) A = torch.index_put(A, (row, col, torch.arange(20)), val) for i in range(20): M[row[i], col[i]] += 1 if op == "mean": A = A.sum(2) else: A = getattr(A, op)(2) M = M.view(NUM_ROWS, NUM_COLS, *([1] * (val.dim() - 1))) return A, M # Add docstring tests of dglsp.reduction to unit tests @pytest.mark.parametrize( "func", ["reduce", "sum", "smin", "smax", "sprod", "smean"] ) def test_docstring(func): globs = {"torch": torch, "dglsp": dglsp} runner = doctest.DebugRunner() finder = doctest.DocTestFinder() obj = getattr(dglsp, func) for test in finder.find(obj, func, globs=globs): runner.run(test) @pytest.mark.parametrize("shape", [(20,), (20, 20)]) @pytest.mark.parametrize("op", ["sum", "amin", "amax", "mean", "prod"]) @pytest.mark.parametrize("use_reduce", [False, True]) def test_reduce_all(shape, op, use_reduce): row = torch.randint(0, NUM_ROWS, (20,), device=F.ctx()) col = torch.randint(0, NUM_COLS, (20,), device=F.ctx()) val = torch.randn(*shape, device=F.ctx()) val2 = val.clone() val = val.requires_grad_() val2 = val2.requires_grad_() A = dglsp.from_coo(row, col, val, shape=(NUM_ROWS, NUM_COLS)) A2, M = _coalesce_dense(row, col, val2, NUM_ROWS, NUM_COLS, op) if not use_reduce: output = getattr(A, dgl_op_map[op])() else: output = A.reduce(rtype=dgl_op_map[op]) if op == "mean": output2 = A2.sum((0, 1)) / M.sum() elif op == "prod": output2 = A2.prod(0).prod(0) # prod() does not support tuple of dims else: output2 = getattr(A2, op)((0, 1)) assert (output - output2).abs().max() < 1e-4 head = torch.randn(*output.shape).to(val) if output.dim() > 0 else None output.backward(head) output2.backward(head) assert (val.grad - val2.grad).abs().max() < 1e-4 @pytest.mark.parametrize("shape", [(20,), (20, 20)]) @pytest.mark.parametrize("dim", [0, 1]) @pytest.mark.parametrize("empty_nnz", [False, True]) @pytest.mark.parametrize("op", ["sum", "amin", "amax", "mean", "prod"]) @pytest.mark.parametrize("use_reduce", [False, True]) def test_reduce_along(shape, dim, empty_nnz, op, use_reduce): row = torch.randint(0, NUM_ROWS, (20,), device=F.ctx()) col = torch.randint(0, NUM_COLS, (20,), device=F.ctx()) if dim == 0: mask = torch.bincount(col, minlength=NUM_COLS) == 0 else: mask = torch.bincount(row, minlength=NUM_ROWS) == 0 val = torch.randn(*shape, device=F.ctx()) val2 = val.clone() val = val.requires_grad_() val2 = val2.requires_grad_() # empty_nnz controls whether at least one column or one row has no # non-zero entry. if empty_nnz: row[row == 0] = 1 col[col == 0] = 1 A = dglsp.from_coo(row, col, val, shape=(NUM_ROWS, NUM_COLS)) A2, M = _coalesce_dense(row, col, val2, NUM_ROWS, NUM_COLS, op) if not use_reduce: output = getattr(A, dgl_op_map[op])(dim) else: output = A.reduce(dim=dim, rtype=dgl_op_map[op]) if op == "mean": output2 = A2.sum(dim) / M.sum(dim) else: output2 = getattr(A2, op)(dim) zero_entry_idx = (M.sum(dim) != 0).nonzero(as_tuple=True)[0] output3 = torch.index_put( torch.zeros_like(output2), (zero_entry_idx,), output2[zero_entry_idx] ) assert (output - output3).abs().max() < 1e-4 head = torch.randn(*output.shape).to(val) if output.dim() > 0 else None output.backward(head) output3.backward(head) assert (val.grad - val2.grad).abs().max() < 1e-4 ================================================ FILE: tests/python/pytorch/sparse/test_sddmm.py ================================================ import sys import backend as F import pytest import torch from dgl.sparse import bsddmm, sddmm from .utils import ( clone_detach_and_grad, rand_coo, rand_csc, rand_csr, rand_stride, ) @pytest.mark.parametrize("create_func", [rand_coo, rand_csr, rand_csc]) @pytest.mark.parametrize("shape", [(5, 5), (5, 4)]) @pytest.mark.parametrize("nnz", [2, 10]) @pytest.mark.parametrize("hidden", [1, 5]) def test_sddmm(create_func, shape, nnz, hidden): dev = F.ctx() A = create_func(shape, nnz, dev) if hidden > 1: B = torch.rand(shape[0], hidden, requires_grad=True, device=dev) C = torch.rand(hidden, shape[1], requires_grad=True, device=dev) else: B = torch.rand(shape[0], requires_grad=True, device=dev) C = torch.rand(shape[1], requires_grad=True, device=dev) B = rand_stride(B) C = rand_stride(C) A_val_clone = clone_detach_and_grad(A.val) dense_B = clone_detach_and_grad(B) dense_C = clone_detach_and_grad(C) sparse_result = sddmm(A, B, C) grad = torch.rand_like(sparse_result.val) sparse_result.val.backward(grad) if hidden == 1: dense_result = dense_B.view(-1, 1) @ dense_C.view(1, -1) else: dense_result = dense_B @ dense_C row, col = A.coo() dense_val = dense_result[row, col] * A_val_clone dense_val.backward(grad) assert torch.allclose(dense_val, sparse_result.val, atol=1e-05) assert torch.allclose(dense_C.grad, C.grad, atol=1e-05) assert torch.allclose(dense_B.grad, B.grad, atol=1e-05) assert torch.allclose(A_val_clone.grad, A.val.grad, atol=1e-05) @pytest.mark.parametrize("create_func", [rand_coo, rand_csr, rand_csc]) @pytest.mark.parametrize("shape", [(5, 5), (5, 4)]) @pytest.mark.parametrize("nnz", [2, 10]) @pytest.mark.parametrize("nz_dim", [2, 10]) def test_bsddmm(create_func, shape, nnz, nz_dim): dev = F.ctx() hidden = 2 A = create_func(shape, nnz, dev, nz_dim) B = torch.rand(shape[0], hidden, nz_dim, requires_grad=True, device=dev) C = torch.rand(hidden, shape[1], nz_dim, requires_grad=True, device=dev) B = rand_stride(B) C = rand_stride(C) A_val_clone = clone_detach_and_grad(A.val) dense_B = clone_detach_and_grad(B) dense_C = clone_detach_and_grad(C) sparse_result = bsddmm(A, B, C) grad = torch.rand_like(sparse_result.val) sparse_result.val.backward(grad) dense_result = dense_B.permute(2, 0, 1) @ dense_C.permute(2, 0, 1) dense_result = dense_result.permute(1, 2, 0) row, col = A.coo() dense_val = dense_result[row, col] * A_val_clone dense_val.backward(grad) assert torch.allclose(dense_val, sparse_result.val, atol=1e-05) assert torch.allclose(dense_C.grad, C.grad, atol=1e-05) assert torch.allclose(dense_B.grad, B.grad, atol=1e-05) assert torch.allclose(A_val_clone.grad, A.val.grad, atol=1e-05) ================================================ FILE: tests/python/pytorch/sparse/test_softmax.py ================================================ import sys import backend as F import dgl import pytest import torch from dgl.sparse import from_coo, softmax @pytest.mark.parametrize("val_D", [None, 2]) @pytest.mark.parametrize("csr", [True, False]) @pytest.mark.parametrize("dim", [0, 1]) def test_softmax(val_D, csr, dim): dev = F.ctx() row = torch.tensor([0, 0, 1, 1]).to(dev) col = torch.tensor([0, 2, 1, 2]).to(dev) nnz = len(row) if val_D is None: val = torch.randn(nnz).to(dev) else: val = torch.randn(nnz, val_D).to(dev) val_sparse = val.clone().requires_grad_() A = from_coo(row, col, val_sparse) if csr: # Test CSR A.csr() A_max = softmax(A, dim) if dim == 1: g = dgl.graph((col, row), num_nodes=max(A.shape)) else: g = dgl.graph((row, col), num_nodes=max(A.shape)) val_g = val.clone().requires_grad_() score = dgl.nn.functional.edge_softmax(g, val_g) assert torch.allclose(A_max.val, score, atol=1e-05) grad = torch.randn_like(score).to(dev) A_max.val.backward(grad) score.backward(grad) assert torch.allclose(A.val.grad, val_g.grad, atol=1e-05) ================================================ FILE: tests/python/pytorch/sparse/test_sparse_matrix.py ================================================ import unittest import warnings import backend as F import pytest import torch from dgl.sparse import ( diag, from_coo, from_csc, from_csr, from_torch_sparse, identity, to_torch_sparse_coo, to_torch_sparse_csc, to_torch_sparse_csr, val_like, ) from .utils import ( rand_coo, rand_csc, rand_csr, rand_diag, sparse_matrix_to_dense, ) def _torch_sparse_csr_tensor(indptr, indices, val, torch_sparse_shape): with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) return torch.sparse_csr_tensor(indptr, indices, val, torch_sparse_shape) @pytest.mark.parametrize("dense_dim", [None, 4]) @pytest.mark.parametrize("row", [(0, 0, 1, 2), (0, 1, 2, 4)]) @pytest.mark.parametrize("col", [(0, 1, 2, 2), (1, 3, 3, 4)]) @pytest.mark.parametrize("shape", [None, (5, 5), (5, 6)]) def test_from_coo(dense_dim, row, col, shape): val_shape = (len(row),) if dense_dim is not None: val_shape += (dense_dim,) ctx = F.ctx() val = torch.randn(val_shape).to(ctx) row = torch.tensor(row).to(ctx) col = torch.tensor(col).to(ctx) mat = from_coo(row, col, val, shape) if shape is None: shape = (torch.max(row).item() + 1, torch.max(col).item() + 1) mat_row, mat_col = mat.coo() mat_val = mat.val assert mat.shape == shape assert mat.nnz == row.numel() assert mat.dtype == val.dtype assert torch.allclose(mat_val, val) assert torch.allclose(mat_row, row) assert torch.allclose(mat_col, col) @pytest.mark.parametrize("dense_dim", [None, 4]) @pytest.mark.parametrize("indptr", [(0, 0, 1, 4), (0, 1, 2, 4)]) @pytest.mark.parametrize("indices", [(0, 1, 2, 3), (1, 2, 3, 4)]) @pytest.mark.parametrize("shape", [None, (3, 5)]) def test_from_csr(dense_dim, indptr, indices, shape): val_shape = (len(indices),) if dense_dim is not None: val_shape += (dense_dim,) ctx = F.ctx() val = torch.randn(val_shape).to(ctx) indptr = torch.tensor(indptr).to(ctx) indices = torch.tensor(indices).to(ctx) mat = from_csr(indptr, indices, val, shape) if shape is None: shape = (indptr.numel() - 1, torch.max(indices).item() + 1) assert mat.device == val.device assert mat.shape == shape assert mat.nnz == indices.numel() assert mat.dtype == val.dtype mat_indptr, mat_indices, value_indices = mat.csr() mat_val = mat.val if value_indices is None else mat.val[value_indices] assert torch.allclose(mat_indptr, indptr) assert torch.allclose(mat_indices, indices) assert torch.allclose(mat_val, val) @pytest.mark.parametrize("dense_dim", [None, 4]) @pytest.mark.parametrize("indptr", [(0, 0, 1, 4), (0, 1, 2, 4)]) @pytest.mark.parametrize("indices", [(0, 1, 2, 3), (1, 2, 3, 4)]) @pytest.mark.parametrize("shape", [None, (5, 3)]) def test_from_csc(dense_dim, indptr, indices, shape): val_shape = (len(indices),) if dense_dim is not None: val_shape += (dense_dim,) ctx = F.ctx() val = torch.randn(val_shape).to(ctx) indptr = torch.tensor(indptr).to(ctx) indices = torch.tensor(indices).to(ctx) mat = from_csc(indptr, indices, val, shape) if shape is None: shape = (torch.max(indices).item() + 1, indptr.numel() - 1) assert mat.device == val.device assert mat.shape == shape assert mat.nnz == indices.numel() assert mat.dtype == val.dtype mat_indptr, mat_indices, value_indices = mat.csc() mat_val = mat.val if value_indices is None else mat.val[value_indices] assert torch.allclose(mat_indptr, indptr) assert torch.allclose(mat_indices, indices) assert torch.allclose(mat_val, val) @pytest.mark.parametrize("val_shape", [(3), (3, 2)]) def test_dense(val_shape): ctx = F.ctx() row = torch.tensor([1, 1, 2]).to(ctx) col = torch.tensor([2, 4, 3]).to(ctx) val = torch.randn(val_shape).to(ctx) A = from_coo(row, col, val) A_dense = A.to_dense() shape = A.shape + val.shape[1:] mat = torch.zeros(shape, device=ctx) mat[row, col] = val assert torch.allclose(A_dense, mat) @pytest.mark.parametrize("dense_dim", [None, 4]) @pytest.mark.parametrize("indptr", [(0, 0, 1, 4), (0, 1, 2, 4)]) @pytest.mark.parametrize("indices", [(0, 1, 2, 3), (1, 4, 3, 2)]) @pytest.mark.parametrize("shape", [None, (3, 5)]) def test_csr_to_coo(dense_dim, indptr, indices, shape): ctx = F.ctx() val_shape = (len(indices),) if dense_dim is not None: val_shape += (dense_dim,) val = torch.randn(val_shape).to(ctx) indptr = torch.tensor(indptr).to(ctx) indices = torch.tensor(indices).to(ctx) mat = from_csr(indptr, indices, val, shape) if shape is None: shape = (indptr.numel() - 1, torch.max(indices).item() + 1) row = ( torch.arange(0, indptr.shape[0] - 1) .to(ctx) .repeat_interleave(torch.diff(indptr)) ) col = indices mat_row, mat_col = mat.coo() mat_val = mat.val assert mat.shape == shape assert mat.nnz == row.numel() assert mat.device == row.device assert mat.dtype == val.dtype assert torch.allclose(mat_val, val) assert torch.allclose(mat_row, row) assert torch.allclose(mat_col, col) @pytest.mark.parametrize("dense_dim", [None, 4]) @pytest.mark.parametrize("indptr", [(0, 0, 1, 4), (0, 1, 2, 4)]) @pytest.mark.parametrize("indices", [(0, 1, 2, 3), (1, 4, 3, 2)]) @pytest.mark.parametrize("shape", [None, (5, 3)]) def test_csc_to_coo(dense_dim, indptr, indices, shape): ctx = F.ctx() val_shape = (len(indices),) if dense_dim is not None: val_shape += (dense_dim,) val = torch.randn(val_shape).to(ctx) indptr = torch.tensor(indptr).to(ctx) indices = torch.tensor(indices).to(ctx) mat = from_csc(indptr, indices, val, shape) if shape is None: shape = (torch.max(indices).item() + 1, indptr.numel() - 1) col = ( torch.arange(0, indptr.shape[0] - 1) .to(ctx) .repeat_interleave(torch.diff(indptr)) ) row = indices mat_row, mat_col = mat.coo() mat_val = mat.val assert mat.shape == shape assert mat.nnz == row.numel() assert mat.device == row.device assert mat.dtype == val.dtype assert torch.allclose(mat_val, val) assert torch.allclose(mat_row, row) assert torch.allclose(mat_col, col) def _scatter_add(a, index, v=1): index = index.tolist() for i in index: a[i] += v return a @pytest.mark.parametrize("dense_dim", [None, 4]) @pytest.mark.parametrize("row", [(0, 0, 1, 2), (0, 1, 2, 4)]) @pytest.mark.parametrize("col", [(0, 1, 2, 2), (1, 3, 3, 4)]) @pytest.mark.parametrize("shape", [None, (5, 5), (5, 6)]) def test_coo_to_csr(dense_dim, row, col, shape): val_shape = (len(row),) if dense_dim is not None: val_shape += (dense_dim,) ctx = F.ctx() val = torch.randn(val_shape).to(ctx) row = torch.tensor(row).to(ctx) col = torch.tensor(col).to(ctx) mat = from_coo(row, col, val, shape) if shape is None: shape = (torch.max(row).item() + 1, torch.max(col).item() + 1) mat_indptr, mat_indices, value_indices = mat.csr() mat_val = mat.val if value_indices is None else mat.val[value_indices] indptr = torch.zeros(shape[0] + 1).to(ctx) indptr = _scatter_add(indptr, row + 1) indptr = torch.cumsum(indptr, 0).long() indices = col assert mat.shape == shape assert mat.nnz == row.numel() assert mat.dtype == val.dtype assert torch.allclose(mat_val, val) assert torch.allclose(mat_indptr, indptr) assert torch.allclose(mat_indices, indices) @pytest.mark.parametrize("dense_dim", [None, 4]) @pytest.mark.parametrize("indptr", [(0, 0, 1, 4), (0, 1, 2, 4)]) @pytest.mark.parametrize("indices", [(0, 1, 2, 3), (1, 4, 3, 2)]) @pytest.mark.parametrize("shape", [None, (5, 3)]) def test_csc_to_csr(dense_dim, indptr, indices, shape): ctx = F.ctx() val_shape = (len(indices),) if dense_dim is not None: val_shape += (dense_dim,) val = torch.randn(val_shape).to(ctx) indptr = torch.tensor(indptr).to(ctx) indices = torch.tensor(indices).to(ctx) mat = from_csc(indptr, indices, val, shape) mat_indptr, mat_indices, value_indices = mat.csr() mat_val = mat.val if value_indices is None else mat.val[value_indices] if shape is None: shape = (torch.max(indices).item() + 1, indptr.numel() - 1) col = ( torch.arange(0, indptr.shape[0] - 1) .to(ctx) .repeat_interleave(torch.diff(indptr)) ) row = indices row, sort_index = row.sort(stable=True) col = col[sort_index] val = val[sort_index] indptr = torch.zeros(shape[0] + 1).to(ctx) indptr = _scatter_add(indptr, row + 1) indptr = torch.cumsum(indptr, 0).long() indices = col assert mat.shape == shape assert mat.nnz == row.numel() assert mat.device == row.device assert mat.dtype == val.dtype assert torch.allclose(mat_val, val) assert torch.allclose(mat_indptr, indptr) assert torch.allclose(mat_indices, indices) @pytest.mark.parametrize("dense_dim", [None, 4]) @pytest.mark.parametrize("row", [(0, 0, 1, 2), (0, 1, 2, 4)]) @pytest.mark.parametrize("col", [(0, 1, 2, 2), (1, 3, 3, 4)]) @pytest.mark.parametrize("shape", [None, (5, 5), (5, 6)]) def test_coo_to_csc(dense_dim, row, col, shape): val_shape = (len(row),) if dense_dim is not None: val_shape += (dense_dim,) ctx = F.ctx() val = torch.randn(val_shape).to(ctx) row = torch.tensor(row).to(ctx) col = torch.tensor(col).to(ctx) mat = from_coo(row, col, val, shape) if shape is None: shape = (torch.max(row).item() + 1, torch.max(col).item() + 1) mat_indptr, mat_indices, value_indices = mat.csc() mat_val = mat.val if value_indices is None else mat.val[value_indices] indptr = torch.zeros(shape[1] + 1).to(ctx) _scatter_add(indptr, col + 1) indptr = torch.cumsum(indptr, 0).long() indices = row assert mat.shape == shape assert mat.nnz == row.numel() assert mat.dtype == val.dtype assert torch.allclose(mat_val, val) assert torch.allclose(mat_indptr, indptr) assert torch.allclose(mat_indices, indices) @pytest.mark.parametrize("dense_dim", [None, 4]) @pytest.mark.parametrize("indptr", [(0, 0, 1, 4), (0, 1, 2, 4)]) @pytest.mark.parametrize("indices", [(0, 1, 2, 3), (1, 2, 3, 4)]) @pytest.mark.parametrize("shape", [None, (3, 5)]) def test_csr_to_csc(dense_dim, indptr, indices, shape): val_shape = (len(indices),) if dense_dim is not None: val_shape += (dense_dim,) ctx = F.ctx() val = torch.randn(val_shape).to(ctx) indptr = torch.tensor(indptr).to(ctx) indices = torch.tensor(indices).to(ctx) mat = from_csr(indptr, indices, val, shape) mat_indptr, mat_indices, value_indices = mat.csc() mat_val = mat.val if value_indices is None else mat.val[value_indices] if shape is None: shape = (indptr.numel() - 1, torch.max(indices).item() + 1) row = ( torch.arange(0, indptr.shape[0] - 1) .to(ctx) .repeat_interleave(torch.diff(indptr)) ) col = indices col, sort_index = col.sort(stable=True) row = row[sort_index] val = val[sort_index] indptr = torch.zeros(shape[1] + 1).to(ctx) indptr = _scatter_add(indptr, col + 1) indptr = torch.cumsum(indptr, 0).long() indices = row assert mat.shape == shape assert mat.nnz == row.numel() assert mat.device == row.device assert mat.dtype == val.dtype assert torch.allclose(mat_val, val) assert torch.allclose(mat_indptr, indptr) assert torch.allclose(mat_indices, indices) @pytest.mark.parametrize("shape", [(3, 5), (5, 5), (5, 4)]) def test_diag_conversions(shape): n_rows, n_cols = shape nnz = min(shape) ctx = F.ctx() val = torch.randn(nnz).to(ctx) D = diag(val, shape) row, col = D.coo() assert torch.allclose(row, torch.arange(nnz).to(ctx)) assert torch.allclose(col, torch.arange(nnz).to(ctx)) indptr, indices, _ = D.csr() exp_indptr = list(range(0, nnz + 1)) + [nnz] * (n_rows - nnz) assert torch.allclose(indptr, torch.tensor(exp_indptr).to(ctx)) assert torch.allclose(indices, torch.arange(nnz).to(ctx)) indptr, indices, _ = D.csc() exp_indptr = list(range(0, nnz + 1)) + [nnz] * (n_cols - nnz) assert torch.allclose(indptr, torch.tensor(exp_indptr).to(ctx)) assert torch.allclose(indices, torch.arange(nnz).to(ctx)) @pytest.mark.parametrize("val_shape", [(3), (3, 2)]) @pytest.mark.parametrize("shape", [(3, 5), (5, 5)]) def test_val_like(val_shape, shape): def check_val_like(A, B): assert A.shape == B.shape assert A.nnz == B.nnz assert torch.allclose(torch.stack(A.coo()), torch.stack(B.coo())) assert A.val.device == B.val.device ctx = F.ctx() # COO row = torch.tensor([1, 1, 2]).to(ctx) col = torch.tensor([2, 4, 3]).to(ctx) val = torch.randn(3).to(ctx) coo_A = from_coo(row, col, val, shape) new_val = torch.randn(val_shape).to(ctx) coo_B = val_like(coo_A, new_val) check_val_like(coo_A, coo_B) # CSR indptr, indices, _ = coo_A.csr() csr_A = from_csr(indptr, indices, val, shape) csr_B = val_like(csr_A, new_val) check_val_like(csr_A, csr_B) # CSC indptr, indices, _ = coo_A.csc() csc_A = from_csc(indptr, indices, val, shape) csc_B = val_like(csc_A, new_val) check_val_like(csc_A, csc_B) def test_coalesce(): ctx = F.ctx() row = torch.tensor([1, 0, 0, 0, 1]).to(ctx) col = torch.tensor([1, 1, 1, 2, 2]).to(ctx) val = torch.arange(len(row)).to(ctx) A = from_coo(row, col, val, (4, 4)) assert A.has_duplicate() A_coalesced = A.coalesce() assert A_coalesced.nnz == 4 assert A_coalesced.shape == (4, 4) assert list(A_coalesced.row) == [0, 0, 1, 1] assert list(A_coalesced.col) == [1, 2, 1, 2] # Values of duplicate indices are added together. assert list(A_coalesced.val) == [3, 3, 0, 4] assert not A_coalesced.has_duplicate() def test_has_duplicate(): ctx = F.ctx() row = torch.tensor([1, 0, 0, 0, 1]).to(ctx) col = torch.tensor([1, 1, 1, 2, 2]).to(ctx) val = torch.arange(len(row)).to(ctx) shape = (4, 4) # COO coo_A = from_coo(row, col, val, shape) assert coo_A.has_duplicate() # CSR indptr, indices, _ = coo_A.csr() csr_A = from_csr(indptr, indices, val, shape) assert csr_A.has_duplicate() # CSC indptr, indices, _ = coo_A.csc() csc_A = from_csc(indptr, indices, val, shape) assert csc_A.has_duplicate() @pytest.mark.parametrize( "create_func", [rand_diag, rand_csr, rand_csc, rand_coo] ) @pytest.mark.parametrize("shape", [(5, 5), (6, 4)]) @pytest.mark.parametrize("dense_dim", [None, 4]) @pytest.mark.parametrize("select_dim", [0, 1]) @pytest.mark.parametrize("index", [(0, 1, 3), (1, 2)]) def test_index_select(create_func, shape, dense_dim, select_dim, index): ctx = F.ctx() A = create_func(shape, 20, ctx, dense_dim) index = torch.tensor(index).to(ctx) A_select = A.index_select(select_dim, index) dense = sparse_matrix_to_dense(A) dense_select = torch.index_select(dense, select_dim, index) A_select_to_dense = sparse_matrix_to_dense(A_select) assert A_select_to_dense.shape == dense_select.shape assert torch.allclose(A_select_to_dense, dense_select) @pytest.mark.parametrize( "create_func", [rand_diag, rand_csr, rand_csc, rand_coo] ) @pytest.mark.parametrize("shape", [(5, 5), (6, 4)]) @pytest.mark.parametrize("dense_dim", [None, 4]) @pytest.mark.parametrize("select_dim", [0, 1]) @pytest.mark.parametrize("rang", [slice(0, 2), slice(1, 3)]) def test_range_select(create_func, shape, dense_dim, select_dim, rang): ctx = F.ctx() A = create_func(shape, 20, ctx, dense_dim) A_select = A.range_select(select_dim, rang) dense = sparse_matrix_to_dense(A) if select_dim == 0: dense_select = dense[rang, :] else: dense_select = dense[:, rang] A_select_to_dense = sparse_matrix_to_dense(A_select) assert A_select_to_dense.shape == dense_select.shape assert torch.allclose(A_select_to_dense, dense_select) @pytest.mark.parametrize( "create_func", [rand_diag, rand_csr, rand_csc, rand_coo] ) @pytest.mark.parametrize("index", [(0, 1, 2, 3, 4), (0, 1, 3), (1, 1, 2)]) @pytest.mark.parametrize("replace", [False, True]) @pytest.mark.parametrize("bias", [False, True]) def test_sample_rowwise(create_func, index, replace, bias): ctx = F.ctx() shape = (5, 5) sample_dim = 0 sample_num = 3 A = create_func(shape, 10, ctx) A = val_like(A, torch.abs(A.val)) index = torch.tensor(index).to(ctx) A_sample = A.sample(sample_dim, sample_num, index, replace, bias) A_dense = sparse_matrix_to_dense(A) A_sample_to_dense = sparse_matrix_to_dense(A_sample) ans_shape = (index.size(0), shape[1]) # Verify sample elements in origin rows for i, row in enumerate(list(index)): ans_ele = list(A_dense[row, :].nonzero().reshape(-1)) ret_ele = list(A_sample_to_dense[i, :].nonzero().reshape(-1)) for e in ret_ele: assert e in ans_ele if replace: # The number of sample elements in one row should be equal to # 'sample_num' if the row is not empty otherwise should be # equal to 0. assert list(A_sample.row).count(torch.tensor(i)) == ( sample_num if len(ans_ele) != 0 else 0 ) else: assert len(ret_ele) == min(sample_num, len(ans_ele)) assert A_sample.shape == ans_shape if not replace: assert not A_sample.has_duplicate() @pytest.mark.parametrize( "create_func", [rand_diag, rand_csr, rand_csc, rand_coo] ) @pytest.mark.parametrize("index", [(0, 1, 2, 3, 4), (0, 1, 3), (1, 1, 2)]) @pytest.mark.parametrize("replace", [False, True]) @pytest.mark.parametrize("bias", [False, True]) def test_sample_columnwise(create_func, index, replace, bias): ctx = F.ctx() shape = (5, 5) sample_dim = 1 sample_num = 3 A = create_func(shape, 10, ctx) A = val_like(A, torch.abs(A.val)) index = torch.tensor(index).to(ctx) A_sample = A.sample(sample_dim, sample_num, index, replace, bias) A_dense = sparse_matrix_to_dense(A) A_sample_to_dense = sparse_matrix_to_dense(A_sample) ans_shape = (shape[0], index.size(0)) # Verify sample elements in origin columns for i, col in enumerate(list(index)): ans_ele = list(A_dense[:, col].nonzero().reshape(-1)) ret_ele = list(A_sample_to_dense[:, i].nonzero().reshape(-1)) for e in ret_ele: assert e in ans_ele if replace: # The number of sample elements in one column should be equal to # 'sample_num' if the column is not empty otherwise should be # equal to 0. assert list(A_sample.col).count(torch.tensor(i)) == ( sample_num if len(ans_ele) != 0 else 0 ) else: assert len(ret_ele) == min(sample_num, len(ans_ele)) assert A_sample.shape == ans_shape if not replace: assert not A_sample.has_duplicate() def test_print(): ctx = F.ctx() # basic row = torch.tensor([1, 1, 3]).to(ctx) col = torch.tensor([2, 1, 3]).to(ctx) val = torch.tensor([1.0, 1.0, 2.0]).to(ctx) A = from_coo(row, col, val) expected = ( str( """SparseMatrix(indices=tensor([[1, 1, 3], [2, 1, 3]]), values=tensor([1., 1., 2.]), shape=(4, 4), nnz=3)""" ) if str(ctx) == "cpu" else str( """SparseMatrix(indices=tensor([[1, 1, 3], [2, 1, 3]], device='cuda:0'), values=tensor([1., 1., 2.], device='cuda:0'), shape=(4, 4), nnz=3)""" ) ) assert str(A) == expected, print(A, expected) # vector-shape non zero row = torch.tensor([1, 1, 3]).to(ctx) col = torch.tensor([2, 1, 3]).to(ctx) val = torch.tensor( [[1.3080, 1.5984], [-0.4126, 0.7250], [-0.5416, -0.7022]] ).to(ctx) A = from_coo(row, col, val) expected = ( str( """SparseMatrix(indices=tensor([[1, 1, 3], [2, 1, 3]]), values=tensor([[ 1.3080, 1.5984], [-0.4126, 0.7250], [-0.5416, -0.7022]]), shape=(4, 4), nnz=3, val_size=(2,))""" ) if str(ctx) == "cpu" else str( """SparseMatrix(indices=tensor([[1, 1, 3], [2, 1, 3]], device='cuda:0'), values=tensor([[ 1.3080, 1.5984], [-0.4126, 0.7250], [-0.5416, -0.7022]], device='cuda:0'), shape=(4, 4), nnz=3, val_size=(2,))""" ) ) assert str(A) == expected, print(A, expected) @unittest.skipIf( F._default_context_str == "cpu", reason="Device conversions don't need to be tested on CPU.", ) @pytest.mark.parametrize("device", ["cpu", "cuda"]) def test_to_device(device): row = torch.tensor([1, 1, 2]) col = torch.tensor([1, 2, 0]) mat = from_coo(row, col, shape=(3, 4)) target_row = row.to(device) target_col = col.to(device) target_val = mat.val.to(device) mat2 = mat.to(device=device) assert mat2.shape == mat.shape assert torch.allclose(mat2.row, target_row) assert torch.allclose(mat2.col, target_col) assert torch.allclose(mat2.val, target_val) mat2 = getattr(mat, device)() assert mat2.shape == mat.shape assert torch.allclose(mat2.row, target_row) assert torch.allclose(mat2.col, target_col) assert torch.allclose(mat2.val, target_val) @pytest.mark.parametrize( "dtype", [torch.float, torch.double, torch.int, torch.long] ) def test_to_dtype(dtype): row = torch.tensor([1, 1, 2]) col = torch.tensor([1, 2, 0]) mat = from_coo(row, col, shape=(3, 4)) target_val = mat.val.to(dtype=dtype) mat2 = mat.to(dtype=dtype) assert mat2.shape == mat.shape assert torch.allclose(mat2.val, target_val) func_name = { torch.float: "float", torch.double: "double", torch.int: "int", torch.long: "long", } mat2 = getattr(mat, func_name[dtype])() assert mat2.shape == mat.shape assert torch.allclose(mat2.val, target_val) @pytest.mark.parametrize("dense_dim", [None, 2]) @pytest.mark.parametrize("row", [[0, 0, 1, 2], (0, 1, 2, 4)]) @pytest.mark.parametrize("col", [(0, 1, 2, 2), (1, 3, 3, 4)]) @pytest.mark.parametrize("extra_shape", [(0, 1), (2, 1)]) def test_sparse_matrix_transpose(dense_dim, row, col, extra_shape): mat_shape = (max(row) + 1 + extra_shape[0], max(col) + 1 + extra_shape[1]) val_shape = (len(row),) if dense_dim is not None: val_shape += (dense_dim,) ctx = F.ctx() val = torch.randn(val_shape).to(ctx) row = torch.tensor(row).to(ctx) col = torch.tensor(col).to(ctx) mat = from_coo(row, col, val, mat_shape).transpose() mat_row, mat_col = mat.coo() mat_val = mat.val assert mat.shape == mat_shape[::-1] assert torch.allclose(mat_val, val) assert torch.allclose(mat_row, col) assert torch.allclose(mat_col, row) @pytest.mark.parametrize("row", [[0, 0, 1, 2], (0, 1, 2, 4)]) @pytest.mark.parametrize("col", [(0, 1, 2, 2), (1, 3, 3, 4)]) @pytest.mark.parametrize("nz_dim", [None, 2]) @pytest.mark.parametrize("shape", [(5, 5), (6, 7)]) def test_torch_sparse_coo_conversion(row, col, nz_dim, shape): dev = F.ctx() row = torch.tensor(row).to(dev) col = torch.tensor(col).to(dev) indices = torch.stack([row, col]) torch_sparse_shape = shape val_shape = (row.shape[0],) if nz_dim is not None: torch_sparse_shape += (nz_dim,) val_shape += (nz_dim,) val = torch.randn(val_shape).to(dev) torch_sparse_coo = torch.sparse_coo_tensor(indices, val, torch_sparse_shape) spmat = from_torch_sparse(torch_sparse_coo) def _assert_spmat_equal_to_torch_sparse_coo(spmat, torch_sparse_coo): assert torch_sparse_coo.layout == torch.sparse_coo # Use .data_ptr() to check whether indices and values are on the same # memory address assert ( spmat.indices().data_ptr() == torch_sparse_coo._indices().data_ptr() ) assert spmat.val.data_ptr() == torch_sparse_coo._values().data_ptr() assert spmat.shape == torch_sparse_coo.shape[:2] _assert_spmat_equal_to_torch_sparse_coo(spmat, torch_sparse_coo) torch_sparse_coo = to_torch_sparse_coo(spmat) _assert_spmat_equal_to_torch_sparse_coo(spmat, torch_sparse_coo) @pytest.mark.parametrize("indptr", [(0, 0, 1, 4), (0, 1, 2, 4)]) @pytest.mark.parametrize("indices", [(0, 1, 2, 3), (1, 2, 3, 4)]) @pytest.mark.parametrize("shape", [(3, 5), (3, 7)]) def test_torch_sparse_csr_conversion(indptr, indices, shape): dev = F.ctx() indptr = torch.tensor(indptr).to(dev) indices = torch.tensor(indices).to(dev) torch_sparse_shape = shape val_shape = (indices.shape[0],) val = torch.randn(val_shape).to(dev) torch_sparse_csr = _torch_sparse_csr_tensor( indptr, indices, val, torch_sparse_shape ) spmat = from_torch_sparse(torch_sparse_csr) def _assert_spmat_equal_to_torch_sparse_csr(spmat, torch_sparse_csr): indptr, indices, value_indices = spmat.csr() assert torch_sparse_csr.layout == torch.sparse_csr assert value_indices is None # Use .data_ptr() to check whether indices and values are on the same # memory address assert indptr.data_ptr() == torch_sparse_csr.crow_indices().data_ptr() assert indices.data_ptr() == torch_sparse_csr.col_indices().data_ptr() assert spmat.val.data_ptr() == torch_sparse_csr.values().data_ptr() assert spmat.shape == torch_sparse_csr.shape[:2] _assert_spmat_equal_to_torch_sparse_csr(spmat, torch_sparse_csr) torch_sparse_csr = to_torch_sparse_csr(spmat) _assert_spmat_equal_to_torch_sparse_csr(spmat, torch_sparse_csr) @pytest.mark.parametrize("indptr", [(0, 0, 1, 4), (0, 1, 2, 4)]) @pytest.mark.parametrize("indices", [(0, 1, 2, 3), (1, 2, 3, 4)]) @pytest.mark.parametrize("shape", [(8, 3), (5, 3)]) def test_torch_sparse_csc_conversion(indptr, indices, shape): dev = F.ctx() indptr = torch.tensor(indptr).to(dev) indices = torch.tensor(indices).to(dev) torch_sparse_shape = shape val_shape = (indices.shape[0],) val = torch.randn(val_shape).to(dev) torch_sparse_csc = torch.sparse_csc_tensor( indptr, indices, val, torch_sparse_shape ) spmat = from_torch_sparse(torch_sparse_csc) def _assert_spmat_equal_to_torch_sparse_csc(spmat, torch_sparse_csc): indptr, indices, value_indices = spmat.csc() assert torch_sparse_csc.layout == torch.sparse_csc assert value_indices is None # Use .data_ptr() to check whether indices and values are on the same # memory address assert indptr.data_ptr() == torch_sparse_csc.ccol_indices().data_ptr() assert indices.data_ptr() == torch_sparse_csc.row_indices().data_ptr() assert spmat.val.data_ptr() == torch_sparse_csc.values().data_ptr() assert spmat.shape == torch_sparse_csc.shape[:2] _assert_spmat_equal_to_torch_sparse_csc(spmat, torch_sparse_csc) torch_sparse_csc = to_torch_sparse_csc(spmat) _assert_spmat_equal_to_torch_sparse_csc(spmat, torch_sparse_csc) ### Diag foramt related tests ### @pytest.mark.parametrize("val_shape", [(3,), (3, 2)]) @pytest.mark.parametrize("mat_shape", [None, (3, 5), (5, 3)]) def test_diag(val_shape, mat_shape): ctx = F.ctx() # creation val = torch.randn(val_shape).to(ctx) mat = diag(val, mat_shape) # val, shape attributes assert torch.allclose(mat.val, val) if mat_shape is None: mat_shape = (val_shape[0], val_shape[0]) assert mat.shape == mat_shape val = torch.randn(val_shape).to(ctx) # nnz assert mat.nnz == val.shape[0] # dtype assert mat.dtype == val.dtype # device assert mat.device == val.device # row, col, val edge_index = torch.arange(len(val)).to(mat.device) row, col = mat.coo() val = mat.val assert torch.allclose(row, edge_index) assert torch.allclose(col, edge_index) assert torch.allclose(val, val) @pytest.mark.parametrize("shape", [(3, 3), (3, 5), (5, 3)]) @pytest.mark.parametrize("d", [None, 2]) def test_identity(shape, d): ctx = F.ctx() # creation mat = identity(shape, d) # shape assert mat.shape == shape # val len_val = min(shape) if d is None: val_shape = len_val else: val_shape = (len_val, d) val = torch.ones(val_shape) assert torch.allclose(val, mat.val) @pytest.mark.parametrize("val_shape", [(3,), (3, 2)]) @pytest.mark.parametrize("mat_shape", [None, (3, 5), (5, 3)]) def test_diag_matrix_transpose(val_shape, mat_shape): ctx = F.ctx() val = torch.randn(val_shape).to(ctx) mat = diag(val, mat_shape).transpose() assert torch.allclose(mat.val, val) if mat_shape is None: mat_shape = (val_shape[0], val_shape[0]) assert mat.shape == mat_shape[::-1] ================================================ FILE: tests/python/pytorch/sparse/test_unary_op.py ================================================ import sys import backend as F import torch from dgl.sparse import diag, spmatrix def test_neg(): ctx = F.ctx() row = torch.tensor([1, 1, 3]).to(ctx) col = torch.tensor([1, 2, 3]).to(ctx) val = torch.tensor([1.0, 1.0, 2.0]).to(ctx) A = spmatrix(torch.stack([row, col]), val) neg_A = -A assert A.shape == neg_A.shape assert A.nnz == neg_A.nnz assert torch.allclose(-A.val, neg_A.val) assert torch.allclose(torch.stack(A.coo()), torch.stack(neg_A.coo())) assert A.val.device == neg_A.val.device def test_diag_neg(): ctx = F.ctx() val = torch.arange(3).float().to(ctx) D = diag(val) neg_D = -D assert D.shape == neg_D.shape assert torch.allclose(-D.val, neg_D.val) assert D.val.device == neg_D.val.device def test_diag_inv(): ctx = F.ctx() val = torch.arange(1, 4).float().to(ctx) D = diag(val) inv_D = D.inv() assert D.shape == inv_D.shape assert torch.allclose(1.0 / D.val, inv_D.val) assert D.val.device == inv_D.val.device ================================================ FILE: tests/python/pytorch/sparse/utils.py ================================================ import numpy as np import torch from dgl.sparse import diag, from_csc, from_csr, SparseMatrix, spmatrix np.random.seed(42) torch.random.manual_seed(42) def clone_detach_and_grad(t): t = t.clone().detach() t.requires_grad_() return t def rand_stride(t): """Add stride to the last dimension of a tensor.""" stride = np.random.randint(2, 4) ret = torch.stack([t] * stride, dim=-1)[..., 0] ret = ret.detach() if torch.is_floating_point(t): ret.requires_grad_() return ret def rand_coo(shape, nnz, dev, nz_dim=None): # Create a sparse matrix without duplicate entries. nnzid = np.random.choice(shape[0] * shape[1], nnz, replace=False) nnzid = torch.tensor(nnzid, device=dev).long() row = torch.div(nnzid, shape[1], rounding_mode="floor") col = nnzid % shape[1] if nz_dim is None: val = torch.randn(nnz, device=dev, requires_grad=True) else: val = torch.randn(nnz, nz_dim, device=dev, requires_grad=True) indices = torch.stack([row, col]) indices = rand_stride(indices) val = rand_stride(val) return spmatrix(indices, val, shape) def rand_csr(shape, nnz, dev, nz_dim=None): # Create a sparse matrix without duplicate entries. nnzid = np.random.choice(shape[0] * shape[1], nnz, replace=False) nnzid = torch.tensor(nnzid, device=dev).long() row = torch.div(nnzid, shape[1], rounding_mode="floor") col = nnzid % shape[1] if nz_dim is None: val = torch.randn(nnz, device=dev, requires_grad=True) else: val = torch.randn(nnz, nz_dim, device=dev, requires_grad=True) indptr = torch.zeros(shape[0] + 1, device=dev, dtype=torch.int64) for r in row.tolist(): indptr[r + 1] += 1 indptr = torch.cumsum(indptr, 0) row_sorted, row_sorted_idx = torch.sort(row) indices = col[row_sorted_idx] indptr = rand_stride(indptr) indices = rand_stride(indices) val = rand_stride(val) return from_csr(indptr, indices, val, shape=shape) def rand_csc(shape, nnz, dev, nz_dim=None): # Create a sparse matrix without duplicate entries. nnzid = np.random.choice(shape[0] * shape[1], nnz, replace=False) nnzid = torch.tensor(nnzid, device=dev).long() row = torch.div(nnzid, shape[1], rounding_mode="floor") col = nnzid % shape[1] if nz_dim is None: val = torch.randn(nnz, device=dev, requires_grad=True) else: val = torch.randn(nnz, nz_dim, device=dev, requires_grad=True) indptr = torch.zeros(shape[1] + 1, device=dev, dtype=torch.int64) for c in col.tolist(): indptr[c + 1] += 1 indptr = torch.cumsum(indptr, 0) col_sorted, col_sorted_idx = torch.sort(col) indices = row[col_sorted_idx] indptr = rand_stride(indptr) indices = rand_stride(indices) val = rand_stride(val) return from_csc(indptr, indices, val, shape=shape) def rand_diag(shape, nnz, dev, nz_dim=None): nnz = min(shape) if nz_dim is None: val = torch.randn(nnz, device=dev, requires_grad=True) else: val = torch.randn(nnz, nz_dim, device=dev, requires_grad=True) return diag(val, shape) def rand_coo_uncoalesced(shape, nnz, dev): # Create a sparse matrix with possible duplicate entries. row = torch.randint(shape[0], (nnz,), device=dev) col = torch.randint(shape[1], (nnz,), device=dev) val = torch.randn(nnz, device=dev, requires_grad=True) indices = torch.stack([row, col]) indices = rand_stride(indices) return spmatrix(indices, val, shape) def rand_csr_uncoalesced(shape, nnz, dev): # Create a sparse matrix with possible duplicate entries. row = torch.randint(shape[0], (nnz,), device=dev) col = torch.randint(shape[1], (nnz,), device=dev) val = torch.randn(nnz, device=dev, requires_grad=True) indptr = torch.zeros(shape[0] + 1, device=dev, dtype=torch.int64) for r in row.tolist(): indptr[r + 1] += 1 indptr = torch.cumsum(indptr, 0) row_sorted, row_sorted_idx = torch.sort(row) indices = col[row_sorted_idx] indptr = rand_stride(indptr) indices = rand_stride(indices) val = rand_stride(val) return from_csr(indptr, indices, val, shape=shape) def rand_csc_uncoalesced(shape, nnz, dev): # Create a sparse matrix with possible duplicate entries. row = torch.randint(shape[0], (nnz,), device=dev) col = torch.randint(shape[1], (nnz,), device=dev) val = torch.randn(nnz, device=dev, requires_grad=True) indptr = torch.zeros(shape[1] + 1, device=dev, dtype=torch.int64) for c in col.tolist(): indptr[c + 1] += 1 indptr = torch.cumsum(indptr, 0) col_sorted, col_sorted_idx = torch.sort(col) indices = row[col_sorted_idx] indptr = rand_stride(indptr) indices = rand_stride(indices) val = rand_stride(val) return from_csc(indptr, indices, val, shape=shape) def sparse_matrix_to_dense(A: SparseMatrix): dense = A.to_dense() return clone_detach_and_grad(dense) def sparse_matrix_to_torch_sparse(A: SparseMatrix, val=None): row, col = A.coo() edge_index = torch.cat((row.unsqueeze(0), col.unsqueeze(0)), 0) shape = A.shape if val is None: val = A.val val = val.clone().detach() if len(A.val.shape) > 1: shape += (A.val.shape[-1],) ret = torch.sparse_coo_tensor(edge_index, val, shape).coalesce() ret.requires_grad_() return ret def dense_mask(dense, sparse): ret = torch.zeros_like(dense) row, col = sparse.coo() for r, c in zip(row, col): ret[r, c] = dense[r, c] return ret ================================================ FILE: tests/python/pytorch/test_ffi-stream.py ================================================ import unittest from statistics import mean import backend as F import dgl import dgl.ndarray as nd import dgl.ops as OPS import numpy as np import torch from dgl import rand_graph from dgl._ffi.streams import _dgl_get_stream, to_dgl_stream_handle from dgl.utils import to_dgl_context # borrowed from PyTorch, torch/testing/_internal/common_utils.py def _get_cycles_per_ms() -> float: """Measure and return approximate number of cycles per millisecond for torch.cuda._sleep""" def measure() -> float: start = torch.cuda.Event(enable_timing=True) end = torch.cuda.Event(enable_timing=True) start.record() torch.cuda._sleep(1000000) end.record() end.synchronize() cycles_per_ms = 1000000 / start.elapsed_time(end) return cycles_per_ms # Get 10 values and remove the 2 max and 2 min and return the avg. # This is to avoid system disturbance that skew the results, e.g. # the very first cuda call likely does a bunch of init, which takes # much longer than subsequent calls. num = 10 vals = [] for _ in range(num): vals.append(measure()) vals = sorted(vals) return mean(vals[2 : num - 2]) @unittest.skipIf( F._default_context_str == "cpu", reason="stream only runs on GPU." ) def test_basics(): g = rand_graph(10, 20, device=F.cpu()) x = torch.ones(g.num_nodes(), 10) result = OPS.copy_u_sum(g, x).to(F.ctx()) # launch on default stream used in DGL xx = x.to(device=F.ctx()) gg = g.to(device=F.ctx()) OPS.copy_u_sum(gg, xx) assert torch.equal(OPS.copy_u_sum(gg, xx), result) # launch on new stream created via torch.cuda s = torch.cuda.Stream(device=F.ctx()) with torch.cuda.stream(s): xx = x.to(device=F.ctx(), non_blocking=True) gg = g.to(device=F.ctx()) OPS.copy_u_sum(gg, xx) s.synchronize() assert torch.equal(OPS.copy_u_sum(gg, xx), result) @unittest.skipIf( F._default_context_str == "cpu", reason="stream only runs on GPU." ) def test_set_get_stream(): current_stream = torch.cuda.current_stream() # test setting another stream s = torch.cuda.Stream(device=F.ctx()) torch.cuda.set_stream(s) assert ( to_dgl_stream_handle(s).value == _dgl_get_stream(to_dgl_context(F.ctx())).value ) # revert to default stream torch.cuda.set_stream(current_stream) @unittest.skipIf( F._default_context_str == "cpu", reason="stream only runs on GPU." ) # borrowed from PyTorch, test/test_cuda.py: test_record_stream() def test_record_stream_ndarray(): cycles_per_ms = _get_cycles_per_ms() t = nd.array(np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32), ctx=nd.cpu()) t.pin_memory_() result = nd.empty([4], ctx=nd.gpu(0)) stream = torch.cuda.Stream() ptr = [None] # Performs the CPU->GPU copy in a background stream def perform_copy(): with torch.cuda.stream(stream): tmp = t.copyto(nd.gpu(0)) ptr[0] = F.from_dgl_nd(tmp).data_ptr() torch.cuda.current_stream().wait_stream(stream) tmp.record_stream(to_dgl_stream_handle(torch.cuda.current_stream())) torch.cuda._sleep(int(50 * cycles_per_ms)) # delay the copy result.copyfrom(tmp) perform_copy() with torch.cuda.stream(stream): tmp2 = nd.empty([4], ctx=nd.gpu(0)) assert ( F.from_dgl_nd(tmp2).data_ptr() != ptr[0] ), "allocation re-used too soon" assert torch.equal( F.from_dgl_nd(result).cpu(), torch.tensor([1.0, 2.0, 3.0, 4.0]) ) # Check that the block will be re-used after the main stream finishes torch.cuda.current_stream().synchronize() with torch.cuda.stream(stream): tmp3 = nd.empty([4], ctx=nd.gpu(0)) assert ( F.from_dgl_nd(tmp3).data_ptr() == ptr[0] ), "allocation not re-used" @unittest.skipIf( F._default_context_str == "cpu", reason="stream only runs on GPU." ) def test_record_stream_graph_positive(): cycles_per_ms = _get_cycles_per_ms() g = rand_graph(10, 20, device=F.cpu()) g.create_formats_() x = torch.ones(g.num_nodes(), 10).to(F.ctx()) g1 = g.to(F.ctx()) # this is necessary to initialize the cusparse handle result = OPS.copy_u_sum(g1, x) torch.cuda.current_stream().synchronize() stream = torch.cuda.Stream() results2 = torch.zeros_like(result) # Performs the computing in a background stream def perform_computing(): with torch.cuda.stream(stream): g2 = g.to(F.ctx()) torch.cuda.current_stream().wait_stream(stream) g2.record_stream(torch.cuda.current_stream()) torch.cuda._sleep(int(50 * cycles_per_ms)) # delay the computing results2.copy_(OPS.copy_u_sum(g2, x)) perform_computing() with torch.cuda.stream(stream): # since we have called record stream for g2, g3 won't reuse its memory g3 = rand_graph(10, 20, device=F.ctx()) g3.create_formats_() torch.cuda.current_stream().synchronize() assert torch.equal(result, results2) @unittest.skipIf( F._default_context_str == "cpu", reason="stream only runs on GPU." ) def test_record_stream_graph_negative(): cycles_per_ms = _get_cycles_per_ms() g = rand_graph(10, 20, device=F.cpu()) g.create_formats_() x = torch.ones(g.num_nodes(), 10).to(F.ctx()) g1 = g.to(F.ctx()) # this is necessary to initialize the cusparse handle result = OPS.copy_u_sum(g1, x) torch.cuda.current_stream().synchronize() stream = torch.cuda.Stream() results2 = torch.zeros_like(result) # Performs the computing in a background stream def perform_computing(): with torch.cuda.stream(stream): g2 = g.to(F.ctx()) torch.cuda.current_stream().wait_stream(stream) # omit record_stream will produce a wrong result # g2.record_stream(torch.cuda.current_stream()) torch.cuda._sleep(int(50 * cycles_per_ms)) # delay the computing results2.copy_(OPS.copy_u_sum(g2, x)) perform_computing() with torch.cuda.stream(stream): # g3 will reuse g2's memory block, resulting a wrong result g3 = rand_graph(10, 20, device=F.ctx()) g3.create_formats_() torch.cuda.current_stream().synchronize() assert not torch.equal(result, results2) if __name__ == "__main__": test_basics() test_set_get_stream() test_record_stream_ndarray() test_record_stream_graph_positive() test_record_stream_graph_negative() ================================================ FILE: tests/python/pytorch/test_heterograph-pickle.py ================================================ import io import pickle import dgl import networkx as nx import torch def _reconstruct_pickle(obj): f = io.BytesIO() pickle.dump(obj, f) f.seek(0) obj = pickle.load(f) f.close() return obj def test_pickling_batched_graph(): # NOTE: this is a test for a wierd bug mentioned in # https://github.com/dmlc/dgl/issues/438 glist = [nx.path_graph(i + 5) for i in range(5)] glist = [dgl.from_networkx(g) for g in glist] bg = dgl.batch(glist) bg.ndata["x"] = torch.randn((35, 5)) bg.edata["y"] = torch.randn((60, 3)) new_bg = _reconstruct_pickle(bg) if __name__ == "__main__": test_pickling_batched_graph() ================================================ FILE: tests/python/pytorch/test_multiprocessing-ipc.py ================================================ import os import unittest import dgl import torch as th import torch.multiprocessing as mp def sub_ipc(g): print(g) return g @unittest.skipIf(os.name == "nt", reason="Do not support windows yet") def test_torch_ipc(): g = dgl.graph(([0, 1, 2], [1, 2, 3])) ctx = mp.get_context("spawn") p = ctx.Process(target=sub_ipc, args=(g,)) p.start() p.join() if __name__ == "__main__": test_torch_ipc() ================================================ FILE: tests/python/pytorch/utils/test_pin_memory.py ================================================ import backend as F import dgl import pytest import torch @pytest.mark.skipif( F._default_context_str == "cpu", reason="Need gpu for this test." ) def test_pin_noncontiguous(): t = torch.empty([10, 100]).transpose(0, 1) assert not t.is_contiguous() assert not F.is_pinned(t) with pytest.raises(dgl.DGLError): dgl.utils.pin_memory_inplace(t) @pytest.mark.skipif( F._default_context_str == "cpu", reason="Need gpu for this test." ) def test_pin_view(): t = torch.empty([100, 10]) v = t[10:20] assert v.is_contiguous() assert not F.is_pinned(t) with pytest.raises(dgl.DGLError): dgl.utils.pin_memory_inplace(v) # make sure an empty view does not generate an error u = t[10:10] u = dgl.utils.pin_memory_inplace(u) @pytest.mark.skipif( F._default_context_str == "cpu", reason="Need gpu for this test." ) def test_unpin_automatically(): # run a sufficient number of iterations such that the memory pool should be # re-used for j in range(10): t = torch.ones(10000, 10) assert not F.is_pinned(t) nd = dgl.utils.pin_memory_inplace(t) assert F.is_pinned(t) del nd # dgl.ndarray will unpin its data upon destruction assert not F.is_pinned(t) del t @pytest.mark.skipif( F._default_context_str == "cpu", reason="Need gpu for this test." ) def test_pin_unpin_column(): g = dgl.graph(([1, 2, 3, 4], [0, 0, 0, 0])) g.ndata["x"] = torch.randn(g.num_nodes()) g.pin_memory_() assert g.is_pinned() assert g.ndata["x"].is_pinned() for col in g._node_frames[0].values(): assert col.pinned_by_dgl assert col._data_nd is not None g.ndata["x"] = torch.randn(g.num_nodes()) # unpin the old ndata['x'] assert g.is_pinned() for col in g._node_frames[0].values(): assert not col.pinned_by_dgl assert col._data_nd is None assert not g.ndata["x"].is_pinned() @pytest.mark.skipif( F._default_context_str == "cpu", reason="Need gpu for this test." ) def test_pin_empty(): t = torch.tensor([]) assert not t.is_pinned() # Empty tensors will not be pinned or unpinned. It's a no-op. # This is also the default behavior in PyTorch. # We just check that it won't raise an error. nd = dgl.utils.pin_memory_inplace(t) assert not t.is_pinned() if __name__ == "__main__": test_pin_noncontiguous() test_pin_view() test_unpin_automatically() test_pin_unpin_column() ================================================ FILE: tests/python/tensorflow/test_basic.py ================================================ def test(): pass if __name__ == "__main__": test() ================================================ FILE: tests/python/tensorflow/test_nn.py ================================================ from copy import deepcopy import backend as F import dgl import dgl.function as fn import dgl.nn.tensorflow as nn import networkx as nx import numpy as np import pytest import scipy as sp import tensorflow as tf from tensorflow.keras import layers from utils import parametrize_idtype from utils.graph_cases import ( get_cases, random_bipartite, random_dglgraph, random_graph, ) def _AXWb(A, X, W, b): X = tf.matmul(X, W) Y = tf.reshape(tf.matmul(A, tf.reshape(X, (X.shape[0], -1))), X.shape) return Y + b @pytest.mark.parametrize("out_dim", [1, 2]) def test_graph_conv(out_dim): g = dgl.DGLGraph(nx.path_graph(3)).to(F.ctx()) ctx = F.ctx() adj = tf.sparse.to_dense( tf.sparse.reorder(g.adj_external(transpose=True, ctx=ctx)) ) conv = nn.GraphConv(5, out_dim, norm="none", bias=True) # conv = conv print(conv) # test#1: basic h0 = F.ones((3, 5)) h1 = conv(g, h0) assert len(g.ndata) == 0 assert len(g.edata) == 0 assert F.allclose(h1, _AXWb(adj, h0, conv.weight, conv.bias)) # test#2: more-dim h0 = F.ones((3, 5, 5)) h1 = conv(g, h0) assert len(g.ndata) == 0 assert len(g.edata) == 0 assert F.allclose(h1, _AXWb(adj, h0, conv.weight, conv.bias)) conv = nn.GraphConv(5, out_dim) # conv = conv # test#3: basic h0 = F.ones((3, 5)) h1 = conv(g, h0) assert len(g.ndata) == 0 assert len(g.edata) == 0 # test#4: basic h0 = F.ones((3, 5, 5)) h1 = conv(g, h0) assert len(g.ndata) == 0 assert len(g.edata) == 0 conv = nn.GraphConv(5, out_dim) # conv = conv # test#3: basic h0 = F.ones((3, 5)) h1 = conv(g, h0) assert len(g.ndata) == 0 assert len(g.edata) == 0 # test#4: basic h0 = F.ones((3, 5, 5)) h1 = conv(g, h0) assert len(g.ndata) == 0 assert len(g.edata) == 0 # test rest_parameters # old_weight = deepcopy(conv.weight.data) # conv.reset_parameters() # new_weight = conv.weight.data # assert not F.allclose(old_weight, new_weight) @parametrize_idtype @pytest.mark.parametrize( "g", get_cases(["homo", "block-bipartite"], exclude=["zero-degree", "dglgraph"]), ) @pytest.mark.parametrize("norm", ["none", "both", "right", "left"]) @pytest.mark.parametrize("weight", [True, False]) @pytest.mark.parametrize("bias", [True, False]) @pytest.mark.parametrize("out_dim", [1, 2]) def test_graph_conv2(idtype, g, norm, weight, bias, out_dim): g = g.astype(idtype).to(F.ctx()) conv = nn.GraphConv(5, out_dim, norm=norm, weight=weight, bias=bias) ext_w = F.randn((5, out_dim)) nsrc = g.number_of_src_nodes() ndst = g.number_of_dst_nodes() h = F.randn((nsrc, 5)) h_dst = F.randn((ndst, out_dim)) if weight: h_out = conv(g, h) else: h_out = conv(g, h, weight=ext_w) assert h_out.shape == (ndst, out_dim) @parametrize_idtype @pytest.mark.parametrize( "g", get_cases(["bipartite"], exclude=["zero-degree", "dglgraph"]) ) @pytest.mark.parametrize("norm", ["none", "both", "right"]) @pytest.mark.parametrize("weight", [True, False]) @pytest.mark.parametrize("bias", [True, False]) @pytest.mark.parametrize("out_dim", [1, 2]) def test_graph_conv2_bi(idtype, g, norm, weight, bias, out_dim): g = g.astype(idtype).to(F.ctx()) conv = nn.GraphConv(5, out_dim, norm=norm, weight=weight, bias=bias) ext_w = F.randn((5, out_dim)) nsrc = g.number_of_src_nodes() ndst = g.number_of_dst_nodes() h = F.randn((nsrc, 5)) h_dst = F.randn((ndst, out_dim)) if weight: h_out = conv(g, (h, h_dst)) else: h_out = conv(g, (h, h_dst), weight=ext_w) assert h_out.shape == (ndst, out_dim) def test_simple_pool(): ctx = F.ctx() g = dgl.DGLGraph(nx.path_graph(15)).to(F.ctx()) sum_pool = nn.SumPooling() avg_pool = nn.AvgPooling() max_pool = nn.MaxPooling() sort_pool = nn.SortPooling(10) # k = 10 print(sum_pool, avg_pool, max_pool, sort_pool) # test#1: basic h0 = F.randn((g.num_nodes(), 5)) h1 = sum_pool(g, h0) assert F.allclose(F.squeeze(h1, 0), F.sum(h0, 0)) h1 = avg_pool(g, h0) assert F.allclose(F.squeeze(h1, 0), F.mean(h0, 0)) h1 = max_pool(g, h0) assert F.allclose(F.squeeze(h1, 0), F.max(h0, 0)) h1 = sort_pool(g, h0) assert h1.shape[0] == 1 and h1.shape[1] == 10 * 5 and h1.ndim == 2 # test#2: batched graph g_ = dgl.DGLGraph(nx.path_graph(5)).to(F.ctx()) bg = dgl.batch([g, g_, g, g_, g]) h0 = F.randn((bg.num_nodes(), 5)) h1 = sum_pool(bg, h0) truth = tf.stack( [ F.sum(h0[:15], 0), F.sum(h0[15:20], 0), F.sum(h0[20:35], 0), F.sum(h0[35:40], 0), F.sum(h0[40:55], 0), ], 0, ) assert F.allclose(h1, truth) h1 = avg_pool(bg, h0) truth = tf.stack( [ F.mean(h0[:15], 0), F.mean(h0[15:20], 0), F.mean(h0[20:35], 0), F.mean(h0[35:40], 0), F.mean(h0[40:55], 0), ], 0, ) assert F.allclose(h1, truth) h1 = max_pool(bg, h0) truth = tf.stack( [ F.max(h0[:15], 0), F.max(h0[15:20], 0), F.max(h0[20:35], 0), F.max(h0[35:40], 0), F.max(h0[40:55], 0), ], 0, ) assert F.allclose(h1, truth) h1 = sort_pool(bg, h0) assert h1.shape[0] == 5 and h1.shape[1] == 10 * 5 and h1.ndim == 2 def test_glob_att_pool(): g = dgl.DGLGraph(nx.path_graph(10)).to(F.ctx()) gap = nn.GlobalAttentionPooling(layers.Dense(1), layers.Dense(10)) print(gap) # test#1: basic h0 = F.randn((g.num_nodes(), 5)) h1 = gap(g, h0) assert h1.shape[0] == 1 and h1.shape[1] == 10 and h1.ndim == 2 # test#2: batched graph bg = dgl.batch([g, g, g, g]) h0 = F.randn((bg.num_nodes(), 5)) h1 = gap(bg, h0) assert h1.shape[0] == 4 and h1.shape[1] == 10 and h1.ndim == 2 @pytest.mark.parametrize("O", [1, 2, 8]) def test_rgcn(O): etype = [] g = dgl.DGLGraph(sp.sparse.random(100, 100, density=0.1), readonly=True).to( F.ctx() ) # 5 etypes R = 5 for i in range(g.num_edges()): etype.append(i % 5) B = 2 I = 10 rgc_basis = nn.RelGraphConv(I, O, R, "basis", B) rgc_basis_low = nn.RelGraphConv(I, O, R, "basis", B, low_mem=True) rgc_basis_low.weight = rgc_basis.weight rgc_basis_low.w_comp = rgc_basis.w_comp rgc_basis_low.loop_weight = rgc_basis.loop_weight h = tf.random.normal((100, I)) r = tf.constant(etype) h_new = rgc_basis(g, h, r) h_new_low = rgc_basis_low(g, h, r) assert list(h_new.shape) == [100, O] assert list(h_new_low.shape) == [100, O] assert F.allclose(h_new, h_new_low) if O % B == 0: rgc_bdd = nn.RelGraphConv(I, O, R, "bdd", B) rgc_bdd_low = nn.RelGraphConv(I, O, R, "bdd", B, low_mem=True) rgc_bdd_low.weight = rgc_bdd.weight rgc_bdd_low.loop_weight = rgc_bdd.loop_weight h = tf.random.normal((100, I)) r = tf.constant(etype) h_new = rgc_bdd(g, h, r) h_new_low = rgc_bdd_low(g, h, r) assert list(h_new.shape) == [100, O] assert list(h_new_low.shape) == [100, O] assert F.allclose(h_new, h_new_low) # with norm norm = tf.zeros((g.num_edges(), 1)) rgc_basis = nn.RelGraphConv(I, O, R, "basis", B) rgc_basis_low = nn.RelGraphConv(I, O, R, "basis", B, low_mem=True) rgc_basis_low.weight = rgc_basis.weight rgc_basis_low.w_comp = rgc_basis.w_comp rgc_basis_low.loop_weight = rgc_basis.loop_weight h = tf.random.normal((100, I)) r = tf.constant(etype) h_new = rgc_basis(g, h, r, norm) h_new_low = rgc_basis_low(g, h, r, norm) assert list(h_new.shape) == [100, O] assert list(h_new_low.shape) == [100, O] assert F.allclose(h_new, h_new_low) if O % B == 0: rgc_bdd = nn.RelGraphConv(I, O, R, "bdd", B) rgc_bdd_low = nn.RelGraphConv(I, O, R, "bdd", B, low_mem=True) rgc_bdd_low.weight = rgc_bdd.weight rgc_bdd_low.loop_weight = rgc_bdd.loop_weight h = tf.random.normal((100, I)) r = tf.constant(etype) h_new = rgc_bdd(g, h, r, norm) h_new_low = rgc_bdd_low(g, h, r, norm) assert list(h_new.shape) == [100, O] assert list(h_new_low.shape) == [100, O] assert F.allclose(h_new, h_new_low) # id input rgc_basis = nn.RelGraphConv(I, O, R, "basis", B) rgc_basis_low = nn.RelGraphConv(I, O, R, "basis", B, low_mem=True) rgc_basis_low.weight = rgc_basis.weight rgc_basis_low.w_comp = rgc_basis.w_comp rgc_basis_low.loop_weight = rgc_basis.loop_weight h = tf.constant(np.random.randint(0, I, (100,))) * 1 r = tf.constant(etype) * 1 h_new = rgc_basis(g, h, r) h_new_low = rgc_basis_low(g, h, r) assert list(h_new.shape) == [100, O] assert list(h_new_low.shape) == [100, O] assert F.allclose(h_new, h_new_low) @parametrize_idtype @pytest.mark.parametrize( "g", get_cases(["homo", "block-bipartite"], exclude=["zero-degree"]) ) @pytest.mark.parametrize("out_dim", [1, 2]) @pytest.mark.parametrize("num_heads", [1, 4]) def test_gat_conv(g, idtype, out_dim, num_heads): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() gat = nn.GATConv(5, out_dim, num_heads) feat = F.randn((g.number_of_src_nodes(), 5)) h = gat(g, feat) assert h.shape == (g.number_of_dst_nodes(), num_heads, out_dim) _, a = gat(g, feat, get_attention=True) assert a.shape == (g.num_edges(), num_heads, 1) # test residual connection gat = nn.GATConv(5, out_dim, num_heads, residual=True) h = gat(g, feat) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["bipartite"], exclude=["zero-degree"])) @pytest.mark.parametrize("out_dim", [1, 2]) @pytest.mark.parametrize("num_heads", [1, 4]) def test_gat_conv_bi(g, idtype, out_dim, num_heads): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() gat = nn.GATConv(5, out_dim, num_heads) feat = ( F.randn((g.number_of_src_nodes(), 5)), F.randn((g.number_of_dst_nodes(), 5)), ) h = gat(g, feat) assert h.shape == (g.number_of_dst_nodes(), num_heads, out_dim) _, a = gat(g, feat, get_attention=True) assert a.shape == (g.num_edges(), num_heads, 1) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo", "block-bipartite"])) @pytest.mark.parametrize("aggre_type", ["mean", "pool", "gcn"]) @pytest.mark.parametrize("out_dim", [1, 10]) def test_sage_conv(idtype, g, aggre_type, out_dim): g = g.astype(idtype).to(F.ctx()) sage = nn.SAGEConv(5, out_dim, aggre_type) feat = F.randn((g.number_of_src_nodes(), 5)) h = sage(g, feat) assert h.shape[-1] == out_dim @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["bipartite"])) @pytest.mark.parametrize("aggre_type", ["mean", "pool", "gcn"]) @pytest.mark.parametrize("out_dim", [1, 2]) def test_sage_conv_bi(idtype, g, aggre_type, out_dim): g = g.astype(idtype).to(F.ctx()) dst_dim = 5 if aggre_type != "gcn" else 10 sage = nn.SAGEConv((10, dst_dim), out_dim, aggre_type) feat = ( F.randn((g.number_of_src_nodes(), 10)), F.randn((g.number_of_dst_nodes(), dst_dim)), ) h = sage(g, feat) assert h.shape[-1] == out_dim assert h.shape[0] == g.number_of_dst_nodes() @parametrize_idtype @pytest.mark.parametrize("aggre_type", ["mean", "pool", "gcn"]) @pytest.mark.parametrize("out_dim", [1, 2]) def test_sage_conv_bi_empty(idtype, aggre_type, out_dim): # Test the case for graphs without edges g = dgl.heterograph({("_U", "_E", "_V"): ([], [])}, {"_U": 5, "_V": 3}).to( F.ctx() ) g = g.astype(idtype).to(F.ctx()) sage = nn.SAGEConv((3, 3), out_dim, "gcn") feat = (F.randn((5, 3)), F.randn((3, 3))) h = sage(g, feat) assert h.shape[-1] == out_dim assert h.shape[0] == 3 for aggre_type in ["mean", "pool", "lstm"]: sage = nn.SAGEConv((3, 1), out_dim, aggre_type) feat = (F.randn((5, 3)), F.randn((3, 1))) h = sage(g, feat) assert h.shape[-1] == out_dim assert h.shape[0] == 3 @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo"], exclude=["zero-degree"])) @pytest.mark.parametrize("out_dim", [1, 2]) def test_sgc_conv(g, idtype, out_dim): ctx = F.ctx() g = g.astype(idtype).to(ctx) # not cached sgc = nn.SGConv(5, out_dim, 3) feat = F.randn((g.num_nodes(), 5)) h = sgc(g, feat) assert h.shape[-1] == out_dim # cached sgc = nn.SGConv(5, out_dim, 3, True) h_0 = sgc(g, feat) h_1 = sgc(g, feat + 1) assert F.allclose(h_0, h_1) assert h_0.shape[-1] == out_dim @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo"], exclude=["zero-degree"])) def test_appnp_conv(g, idtype): ctx = F.ctx() g = g.astype(idtype).to(ctx) appnp = nn.APPNPConv(10, 0.1) feat = F.randn((g.num_nodes(), 5)) h = appnp(g, feat) assert h.shape[-1] == 5 @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["homo", "block-bipartite"])) @pytest.mark.parametrize("aggregator_type", ["mean", "max", "sum"]) def test_gin_conv(g, idtype, aggregator_type): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() gin = nn.GINConv(tf.keras.layers.Dense(12), aggregator_type) feat = F.randn((g.number_of_src_nodes(), 5)) h = gin(g, feat) assert h.shape == (g.number_of_dst_nodes(), 12) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["bipartite"])) @pytest.mark.parametrize("aggregator_type", ["mean", "max", "sum"]) def test_gin_conv_bi(g, idtype, aggregator_type): g = g.astype(idtype).to(F.ctx()) gin = nn.GINConv(tf.keras.layers.Dense(12), aggregator_type) feat = ( F.randn((g.number_of_src_nodes(), 5)), F.randn((g.number_of_dst_nodes(), 5)), ) h = gin(g, feat) assert h.shape == (g.number_of_dst_nodes(), 12) @parametrize_idtype @pytest.mark.parametrize( "g", get_cases(["homo", "block-bipartite"], exclude=["zero-degree"]) ) @pytest.mark.parametrize("out_dim", [1, 2]) def test_edge_conv(g, idtype, out_dim): g = g.astype(idtype).to(F.ctx()) edge_conv = nn.EdgeConv(out_dim) h0 = F.randn((g.number_of_src_nodes(), 5)) h1 = edge_conv(g, h0) assert h1.shape == (g.number_of_dst_nodes(), out_dim) @parametrize_idtype @pytest.mark.parametrize("g", get_cases(["bipartite"], exclude=["zero-degree"])) @pytest.mark.parametrize("out_dim", [1, 2]) def test_edge_conv_bi(g, idtype, out_dim): g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() edge_conv = nn.EdgeConv(out_dim) h0 = F.randn((g.number_of_src_nodes(), 5)) x0 = F.randn((g.number_of_dst_nodes(), 5)) h1 = edge_conv(g, (h0, x0)) assert h1.shape == (g.number_of_dst_nodes(), out_dim) def myagg(alist, dsttype): rst = alist[0] for i in range(1, len(alist)): rst = rst + (i + 1) * alist[i] return rst @parametrize_idtype @pytest.mark.parametrize("agg", ["sum", "max", "min", "mean", "stack", myagg]) def test_hetero_conv(agg, idtype): g = dgl.heterograph( { ("user", "follows", "user"): ([0, 0, 2, 1], [1, 2, 1, 3]), ("user", "plays", "game"): ([0, 0, 0, 1, 2], [0, 2, 3, 0, 2]), ("store", "sells", "game"): ([0, 0, 1, 1], [0, 3, 1, 2]), }, idtype=idtype, device=F.ctx(), ) conv = nn.HeteroGraphConv( { "follows": nn.GraphConv(2, 3, allow_zero_in_degree=True), "plays": nn.GraphConv(2, 4, allow_zero_in_degree=True), "sells": nn.GraphConv(3, 4, allow_zero_in_degree=True), }, agg, ) uf = F.randn((4, 2)) gf = F.randn((4, 4)) sf = F.randn((2, 3)) h = conv(g, {"user": uf, "store": sf, "game": gf}) assert set(h.keys()) == {"user", "game"} if agg != "stack": assert h["user"].shape == (4, 3) assert h["game"].shape == (4, 4) else: assert h["user"].shape == (4, 1, 3) assert h["game"].shape == (4, 2, 4) block = dgl.to_block( g.to(F.cpu()), {"user": [0, 1, 2, 3], "game": [0, 1, 2, 3], "store": []} ).to(F.ctx()) h = conv( block, ( {"user": uf, "game": gf, "store": sf}, {"user": uf, "game": gf, "store": sf[0:0]}, ), ) assert set(h.keys()) == {"user", "game"} if agg != "stack": assert h["user"].shape == (4, 3) assert h["game"].shape == (4, 4) else: assert h["user"].shape == (4, 1, 3) assert h["game"].shape == (4, 2, 4) h = conv(block, {"user": uf, "game": gf, "store": sf}) assert set(h.keys()) == {"user", "game"} if agg != "stack": assert h["user"].shape == (4, 3) assert h["game"].shape == (4, 4) else: assert h["user"].shape == (4, 1, 3) assert h["game"].shape == (4, 2, 4) # test with mod args class MyMod(tf.keras.layers.Layer): def __init__(self, s1, s2): super(MyMod, self).__init__() self.carg1 = 0 self.carg2 = 0 self.s1 = s1 self.s2 = s2 def call(self, g, h, arg1=None, *, arg2=None): if arg1 is not None: self.carg1 += 1 if arg2 is not None: self.carg2 += 1 return tf.zeros((g.number_of_dst_nodes(), self.s2)) mod1 = MyMod(2, 3) mod2 = MyMod(2, 4) mod3 = MyMod(3, 4) conv = nn.HeteroGraphConv( {"follows": mod1, "plays": mod2, "sells": mod3}, agg ) mod_args = {"follows": (1,), "plays": (1,)} mod_kwargs = {"sells": {"arg2": "abc"}} h = conv( g, {"user": uf, "game": gf, "store": sf}, mod_args=mod_args, mod_kwargs=mod_kwargs, ) assert mod1.carg1 == 1 assert mod1.carg2 == 0 assert mod2.carg1 == 1 assert mod2.carg2 == 0 assert mod3.carg1 == 0 assert mod3.carg2 == 1 # conv on graph without any edges for etype in g.etypes: g = dgl.remove_edges(g, g.edges(form="eid", etype=etype), etype=etype) assert g.num_edges() == 0 h = conv(g, {"user": uf, "game": gf, "store": sf}) assert set(h.keys()) == {"user", "game"} block = dgl.to_block( g.to(F.cpu()), {"user": [0, 1, 2, 3], "game": [0, 1, 2, 3], "store": []} ).to(F.ctx()) h = conv( block, ( {"user": uf, "game": gf, "store": sf}, {"user": uf, "game": gf, "store": sf[0:0]}, ), ) assert set(h.keys()) == {"user", "game"} @pytest.mark.parametrize("out_dim", [1, 2]) def test_dense_cheb_conv(out_dim): for k in range(3, 4): ctx = F.ctx() g = dgl.DGLGraph( sp.sparse.random(100, 100, density=0.1, random_state=42) ) g = g.to(ctx) adj = tf.sparse.to_dense( tf.sparse.reorder(g.adj_external(transpose=True, ctx=ctx)) ) cheb = nn.ChebConv(5, out_dim, k, None, bias=True) dense_cheb = nn.DenseChebConv(5, out_dim, k, bias=True) # init cheb modules feat = F.ones((100, 5)) out_cheb = cheb(g, feat, [2.0]) dense_cheb.W = tf.reshape(cheb.linear.weights[0], (k, 5, out_dim)) if cheb.linear.bias is not None: dense_cheb.bias = cheb.linear.bias out_dense_cheb = dense_cheb(adj, feat, 2.0) print(out_cheb - out_dense_cheb) assert F.allclose(out_cheb, out_dense_cheb) if __name__ == "__main__": test_graph_conv() # test_set2set() test_glob_att_pool() test_simple_pool() # test_set_trans() test_rgcn() # test_tagconv() test_gat_conv() test_sage_conv() test_sgc_conv() test_appnp_conv() test_gin_conv() test_edge_conv() # test_agnn_conv() # test_gated_graph_conv() # test_nn_conv() # test_gmm_conv() # test_dense_graph_conv() # test_dense_sage_conv() test_dense_cheb_conv() # test_sequential() test_hetero_conv() ================================================ FILE: tests/python/test_dgl_import.py ================================================ import sys def test_graphbolt_is_not_imported(): assert ( "dgl.graphbolt" not in sys.modules ), "dgl.graphbolt is already imported" import dgl assert "dgl.graphbolt" not in sys.modules, "dgl.graphbolt is imported" ================================================ FILE: tests/scripts/build_dgl.bat ================================================ @ECHO OFF SETLOCAL EnableDelayedExpansion ECHO "Current user: %USERNAME%" python --version CALL "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Auxiliary\Build\vcvars64.bat" CALL mkvirtualenv --system-site-packages %BUILD_TAG% DEL /S /Q build DEL /S /Q _download MD build SET _MSPDBSRV_ENDPOINT_=%BUILD_TAG% SET TMP=%WORKSPACE%\tmp SET TEMP=%WORKSPACE%\tmp SET TMPDIR=%WORKSPACE%\tmp PUSHD build cmake -DCMAKE_CXX_FLAGS="/DDGL_EXPORTS" -Dgtest_force_shared_crt=ON -DDMLC_FORCE_SHARED_CRT=ON -DCMAKE_CONFIGURATION_TYPES="Release" -DTORCH_PYTHON_INTERPS=python .. -G "Visual Studio 16 2019" || EXIT /B 1 msbuild dgl.sln /m /nr:false || EXIT /B 1 COPY /Y Release\runUnitTests.exe . POPD CALL workon %BUILD_TAG% PUSHD python DEL /S /Q build *.egg-info dist pip install -e . || EXIT /B 1 POPD ENDLOCAL EXIT /B ================================================ FILE: tests/scripts/build_dgl.sh ================================================ #!/bin/bash set -e . /opt/conda/etc/profile.d/conda.sh if [ $# -ne 1 ]; then echo "Device argument required, can be cpu, gpu or cugraph" exit -1 fi if [[ $1 != "cpu" ]]; then # CI is now running on g4dn instance. Specify target arch to avoid below # error: Unknown CUDA Architecture Name 9.0a in CUDA_SELECT_NVCC_ARCH_FLAGS export TORCH_CUDA_ARCH_LIST=7.5 # For dgl_sparse and tensoradaptor. CMAKE_VARS="$CMAKE_VARS -DUSE_CUDA=ON -DCUDA_ARCH_NAME=Turing" # For graphbolt. fi # This is a semicolon-separated list of Python interpreters containing PyTorch. # The value here is for CI. Replace it with your own or comment this whole # statement for default Python interpreter. if [ "$1" != "cugraph" ]; then # We do not build pytorch for cugraph because currently building # pytorch against all the supported cugraph versions is not supported # See issue: https://github.com/rapidsai/cudf/issues/8510 CMAKE_VARS="$CMAKE_VARS -DTORCH_PYTHON_INTERPS=/opt/conda/envs/pytorch-ci/bin/python" else # Disable sparse build as cugraph docker image lacks cuDNN. CMAKE_VARS="$CMAKE_VARS -DBUILD_TORCH=OFF -DBUILD_SPARSE=OFF" fi if [ -d build ]; then rm -rf build fi mkdir build rm -rf _download pushd build cmake $CMAKE_VARS .. make -j popd pushd python if [[ $1 == "cugraph" ]]; then rm -rf build *.egg-info dist pip uninstall -y dgl # test install python3 setup.py install # test inplace build (for cython) python3 setup.py build_ext --inplace else for backend in pytorch mxnet tensorflow do conda activate "${backend}-ci" rm -rf build *.egg-info dist pip uninstall -y dgl # test install DGLBACKEND=${backend} python3 setup.py install # test inplace build (for cython) DGLBACKEND=${backend} python3 setup.py build_ext --inplace done fi popd ================================================ FILE: tests/scripts/ci_report/report.py ================================================ import enum import json import os import tempfile from pathlib import Path from urllib.parse import urljoin, urlparse import pytest import requests class JobStatus(enum.Enum): SUCCESS = 0 FAIL = 1 SKIP = 2 JENKINS_STATUS_MAPPING = { "SUCCESS": JobStatus.SUCCESS, "ABORTED": JobStatus.FAIL, "FAILED": JobStatus.FAIL, "IN_PROGRESS": JobStatus.FAIL, "NOT_EXECUTED": JobStatus.SKIP, "PAUSED_PENDING_INPUT": JobStatus.SKIP, "QUEUED": JobStatus.SKIP, "UNSTABLE": JobStatus.FAIL, } assert "BUILD_URL" in os.environ, "Are you in the Jenkins environment?" job_link = os.environ["BUILD_URL"] response = requests.get("{}wfapi".format(job_link), verify=False).json() domain = "{uri.scheme}://{uri.netloc}/".format(uri=urlparse(job_link)) stages = response["stages"] final_dict = {} failed_nodes = [] nodes_dict = {} def get_jenkins_json(path): return requests.get(urljoin(domain, path), verify=False).json() for stage in stages: link = stage["_links"]["self"]["href"] stage_name = stage["name"] res = requests.get(urljoin(domain, link), verify=False).json() nodes = res["stageFlowNodes"] for node in nodes: nodes_dict[node["id"]] = node nodes_dict[node["id"]]["stageName"] = stage_name def get_node_full_name(node, node_dict): name = "" while "parentNodes" in node: name = name + "/" + node["name"] id = node["parentNodes"][0] if id in nodes_dict: node = node_dict[id] else: break return name for key, node in nodes_dict.items(): logs = get_jenkins_json(node["_links"]["log"]["href"]).get("text", "") node_name = node["name"] if "Post Actions" in node["stageName"]: continue node_status = node["status"] id = node["id"] full_name = get_node_full_name(node, nodes_dict) final_dict["{}_{}/{}".format(id, node["stageName"], full_name)] = { "status": JENKINS_STATUS_MAPPING[node_status], "logs": logs, } JOB_NAME = os.getenv("JOB_NAME") BUILD_NUMBER = os.getenv("BUILD_NUMBER") BUILD_ID = os.getenv("BUILD_ID") prefix = f"https://dgl-ci-result.s3.us-west-2.amazonaws.com/{JOB_NAME}/{BUILD_NUMBER}/{BUILD_ID}/logs/logs_dir/" @pytest.mark.parametrize("test_name", final_dict) def test_generate_report(test_name): os.makedirs("./logs_dir/", exist_ok=True) tmp = tempfile.NamedTemporaryFile( mode="w", delete=False, suffix=".log", dir="./logs_dir/" ) tmp.write(final_dict[test_name]["logs"]) filename = Path(tmp.name).name # print(final_dict[test_name]["logs"]) print("Log path: {}".format(prefix + filename)) if final_dict[test_name]["status"] == JobStatus.FAIL: pytest.fail( "Test failed. Please see the log at {}".format(prefix + filename) ) elif final_dict[test_name]["status"] == JobStatus.SKIP: pytest.skip( "Test skipped. Please see the log at {}".format(prefix + filename) ) ================================================ FILE: tests/scripts/ci_report/status.py ================================================ import argparse import os import requests parser = argparse.ArgumentParser() parser.add_argument( "--result", type=str, default="FAILURE", ) args = parser.parse_args() JOB_NAME = os.getenv("JOB_NAME") BUILD_NUMBER = os.getenv("BUILD_NUMBER") BUILD_ID = os.getenv("BUILD_ID") COMMIT = os.getenv("GIT_COMMIT") # List of status of entire job. # https://javadoc.jenkins.io/hudson/model/Result.html if args.result == "SUCCESS": status_output = "✅ CI test succeeded." elif args.result == "NOT_BUILT": status_output = "⚪️ CI test cancelled due to overrun." elif args.result in ["FAILURE", "ABORTED"]: status_output = "❌ CI test failed." JOB_LINK = os.environ["BUILD_URL"] response = requests.get("{}wfapi".format(JOB_LINK), verify=False).json() for stage in response["stages"]: # List of status of individual stage. # https://javadoc.jenkins.io/plugin/pipeline-graph-analysis/org/jenkinsci/plugins/workflow/pipelinegraphanalysis/GenericStatus.html if stage["status"] in ["FAILED", "ABORTED"]: stage_name = stage["name"] status_output = f"❌ CI test failed in Stage [{stage_name}]." break else: status_output = f"[Debug Only] CI test with result [{args.result}]." comment = f""" Commit ID: {COMMIT}\n Build ID: {BUILD_ID}\n Status: {status_output}\n Report path: [link](https://dgl-ci-result.s3.us-west-2.amazonaws.com/{JOB_NAME}/{BUILD_NUMBER}/{BUILD_ID}/logs/report.html)\n Full logs path: [link](https://dgl-ci-result.s3.us-west-2.amazonaws.com/{JOB_NAME}/{BUILD_NUMBER}/{BUILD_ID}/logs/cireport.log) """ print(comment) ================================================ FILE: tests/scripts/cugraph_unit_test.sh ================================================ #!/bin/bash . /opt/conda/etc/profile.d/conda.sh function fail { echo FAIL: $@ exit -1 } export DGLBACKEND=$1 export DGLTESTDEV=gpu export DGL_LIBRARY_PATH=${PWD}/build export PYTHONPATH=tests:${PWD}/python:$PYTHONPATH export DGL_DOWNLOAD_DIR=${PWD}/_download export TF_FORCE_GPU_ALLOW_GROWTH=true export CUDA_VISIBLE_DEVICES=0 python3 -m pip install pytest psutil pyyaml pydantic pandas rdflib ogb torchdata || fail "pip install" python3 -m pytest -v --junitxml=pytest_cugraph.xml --durations=20 tests/cugraph || fail "cugraph" ================================================ FILE: tests/scripts/task_cpp_unit_test.bat ================================================ @ECHO OFF SETLOCAL EnableDelayedExpansion PUSHD build runUnitTests.exe || EXIT /B 1 POPD ================================================ FILE: tests/scripts/task_cpp_unit_test.sh ================================================ #!/bin/bash function fail { echo FAIL: $@ exit -1 } echo $PWD pushd build ls -lh export LD_LIBRARY_PATH=$PWD:$LD_LIBRARY_PATH ./runUnitTests || fail "CPP unit test" popd ================================================ FILE: tests/scripts/task_dist_test.sh ================================================ #!/bin/bash function fail { echo FAIL: $@ exit -1 } echo $PWD export DGLBACKEND=pytorch export DGL_LIBRARY_PATH=${PWD}/build export PYTHONPATH=${PWD}/tests:${PWD}/python:$PYTHONPATH export LD_LIBRARY_PATH=${PWD}/build:$LD_LIBRARY_PATH export DIST_DGL_TEST_CPP_BIN_DIR=${PWD}/build export DIST_DGL_TEST_IP_CONFIG=/home/ubuntu/workspace/ip_config.txt export DIST_DGL_TEST_PY_BIN_DIR=${PWD}/tests/dist/python if [[ -v DIST_DGL_TEST_SSH_PORT ]]; then SSH_PORT_LINE="-p $DIST_DGL_TEST_SSH_PORT"; fi if [[ -v DIST_DGL_TEST_SSH_KEY ]]; then SSH_KEY_LINE="-i $DIST_DGL_TEST_SSH_KEY"; fi if [[ -v DIST_DGL_TEST_SSH_SETUP ]]; then SSH_SETUP_LINE="$DIST_DGL_TEST_SSH_SETUP;"; fi while IFS= read line do for pkg in 'pytest' 'psutil' 'torch' do ret_pkg=$(ssh -o StrictHostKeyChecking=no ${line} ${SSH_PORT_LINE} ${SSH_KEY_LINE} "${SSH_SETUP_LINE}python3 -m pip list | grep -i ${pkg} ") || fail "${pkg} not installed in ${line}" done done < ${DIST_DGL_TEST_IP_CONFIG} python3 -m pytest -v --capture=tee-sys --junitxml=pytest_dist.xml --durations=100 tests/dist/test_*.py || fail "dist across machines" ================================================ FILE: tests/scripts/task_distributed_test.sh ================================================ #!/bin/bash . /opt/conda/etc/profile.d/conda.sh function fail { echo FAIL: $@ exit -1 } function usage { echo "Usage: $0 backend device" } if [ $# -ne 2 ]; then usage fail "Error: must specify backend and device" fi [ $1 == "pytorch" ] || fail "Distrbuted tests run on pytorch backend only." [ $2 == "cpu" ] || fail "Distrbuted tests run on cpu only." export DGLBACKEND=$1 export DGLTESTDEV=$2 export DGL_LIBRARY_PATH=${PWD}/build export PYTHONPATH=tests:${PWD}/python:$PYTHONPATH export DGL_DOWNLOAD_DIR=${PWD}/_download unset TORCH_ALLOW_TF32_CUBLAS_OVERRIDE export CUDA_VISIBLE_DEVICES=-1 conda activate ${DGLBACKEND}-ci export PYTHONUNBUFFERED=1 export OMP_NUM_THREADS=1 export DMLC_LOG_DEBUG=1 # Tests for distributed except test_partition.py are skipped due to glitch @2024.06.27. python3 -m pytest -v --capture=tee-sys --junitxml=pytest_distributed.xml --durations=100 tests/distributed/test_partition.py || fail "distributed" # Tests for tools are skipped due to glitch. #PYTHONPATH=tools:tools/distpartitioning:$PYTHONPATH python3 -m pytest -v --capture=tee-sys --junitxml=pytest_tools.xml --durations=100 tests/tools/*.py || fail "tools" ================================================ FILE: tests/scripts/task_example_test.bat ================================================ @ECHO OFF SETLOCAL EnableDelayedExpansion SET GCN_EXAMPLE_DIR=.\examples\pytorch IF x%1x==xx ( ECHO Must supply CPU or GPU GOTO :FAIL ) ELSE IF x%1x==xcpux ( SET DEV=-1 ) ELSE IF x%1x==xgpux ( SET DEV=0 SET CUDA_VISIBLE_DEVICES=0 ) ELSE ( ECHO Must supply CPU or GPU GOTO :FAIL ) CALL workon %BUILD_TAG% SET DGLBACKEND=pytorch SET DGL_LIBRARY_PATH=!CD!\build SET PYTHONPATH=!CD!\python;!PYTHONPATH! SET DGL_DOWNLOAD_DIR=!CD!\_download python -m pytest -v --junitxml=pytest_backend.xml --durations=100 tests\examples || GOTO :FAIL PUSHD !GCN_EXAMPLE_DIR! python pagerank.py || GOTO :FAIL python gcn\train.py --dataset cora || GOTO :FAIL POPD ENDLOCAL EXIT /B :FAIL ECHO Example test failed ENDLOCAL EXIT /B 1 ================================================ FILE: tests/scripts/task_example_test.sh ================================================ #!/bin/bash . /opt/conda/etc/profile.d/conda.sh conda activate pytorch-ci GCN_EXAMPLE_DIR="./examples/pytorch/" function fail { echo FAIL: $@ exit -1 } function usage { echo "Usage: $0 [cpu|gpu]" } # check arguments if [ $# -ne 1 ]; then usage fail "Error: must specify device" fi if [ "$1" == "cpu" ]; then dev=-1 elif [ "$1" == "gpu" ]; then export CUDA_VISIBLE_DEVICES=0 dev=0 else usage fail "Unknown device $1" fi export DGLBACKEND=pytorch export DGL_LIBRARY_PATH=${PWD}/build export PYTHONPATH=${PWD}/python:$PYTHONPATH export DGL_DOWNLOAD_DIR=${PWD}/_download # test python3 -m pytest -v --junitxml=pytest_backend.xml --durations=100 tests/examples || fail "sparse examples on $1" pushd $GCN_EXAMPLE_DIR> /dev/null python3 pagerank.py || fail "run pagerank.py on $1" python3 gcn/train.py --dataset cora || fail "run gcn/train.py on $1" python3 lda/lda_model.py || fail "run lda/lda_model.py on $1" popd > /dev/null ================================================ FILE: tests/scripts/task_go_test.sh ================================================ #!/bin/bash . /opt/conda/etc/profile.d/conda.sh function fail { echo FAIL: $@ exit -1 } export DGLBACKEND=pytorch export DGL_LIBRARY_PATH=${PWD}/build export PYTHONPATH=tests:${PWD}/python:$PYTHONPATH export DGL_DOWNLOAD_DIR=${PWD}/_download conda activate pytorch-ci pushd dglgo rm -rf build *.egg-info dist pip uninstall -y dglgo python3 setup.py install popd export LC_ALL=C.UTF-8 export LANG=C.UTF-8 # Skip go tests due to ImportError: cannot import name 'cached_property' from 'functools' in python3.7 #python3 -m pytest -v --junitxml=pytest_go.xml --durations=100 tests/go/test_model.py || fail "go" ================================================ FILE: tests/scripts/task_lint.sh ================================================ #!/bin/bash # cpplint echo 'Checking code style of C++ codes...' python3 tests/lint/lint.py dgl cpp include src || exit 1 python3 tests/lint/lint.py dgl_sparse cpp dgl_sparse/include dgl_sparse/src || exit 1 # pylint echo 'Checking code style of python codes...' python3 -m pylint --reports=y -v --rcfile=tests/lint/pylintrc python/dgl || exit 1 ================================================ FILE: tests/scripts/task_pytorch_tutorial_test.sh ================================================ #!/bin/bash # The working directory for this script will be "tests/scripts" . /opt/conda/etc/profile.d/conda.sh conda activate pytorch-ci TUTORIAL_ROOT="./tutorials" function fail { echo FAIL: $@ exit -1 } export MPLBACKEND=Agg export DGLBACKEND=pytorch export DGL_LIBRARY_PATH=${PWD}/build export PYTHONPATH=${PWD}/python:$PYTHONPATH export DGL_DOWNLOAD_DIR=${PWD}/_download pushd ${TUTORIAL_ROOT} > /dev/null # Install requirements pip install -r requirements.txt || fail "installing requirements" # Test for f in $(find . -path ./dist -prune -false -o -name "*.py" ! -name "*_mx.py") do echo "Running tutorial ${f} ..." python3 $f || fail "run ${f}" done popd > /dev/null ================================================ FILE: tests/scripts/task_unit_test.bat ================================================ @ECHO OFF SETLOCAL EnableDelayedExpansion IF x%1x==xx ( ECHO Specify backend EXIT /B 1 ) ELSE ( SET BACKEND=%1 ) CALL workon %BUILD_TAG% SET PYTHONPATH=tests;!CD!\python;!PYTHONPATH! SET DGLBACKEND=!BACKEND! SET DGL_LIBRARY_PATH=!CD!\build SET DGL_DOWNLOAD_DIR=!CD!\_download python -m pip install pytest psutil pandas pyyaml pydantic rdflib torchmetrics expecttest || EXIT /B 1 python -m pytest -v --junitxml=pytest_backend.xml --durations=100 tests\python\!DGLBACKEND! || EXIT /B 1 python -m pytest -v --junitxml=pytest_common.xml --durations=100 tests\python\common || EXIT /B 1 ENDLOCAL EXIT /B ================================================ FILE: tests/scripts/task_unit_test.sh ================================================ #!/bin/bash . /opt/conda/etc/profile.d/conda.sh function fail { echo FAIL: $@ exit -1 } function usage { echo "Usage: $0 backend device" } if [ $# -ne 2 ]; then usage fail "Error: must specify backend and device" fi export DGLBACKEND=$1 export DGLTESTDEV=$2 export DGL_LIBRARY_PATH=${PWD}/build export PYTHONPATH=tests:${PWD}/python:$PYTHONPATH export DGL_DOWNLOAD_DIR=${PWD}/_download export TF_FORCE_GPU_ALLOW_GROWTH=true unset TORCH_ALLOW_TF32_CUBLAS_OVERRIDE if [ $2 == "gpu" ] then export CUDA_VISIBLE_DEVICES=0 else export CUDA_VISIBLE_DEVICES=-1 fi conda activate ${DGLBACKEND}-ci python3 -m pip install expecttest if [ $DGLBACKEND == "mxnet" ] then python3 -m pytest -v --junitxml=pytest_compute.xml --durations=100 --ignore=tests/python/common/test_ffi.py tests/python/common || fail "common" else python3 -m pytest -v --junitxml=pytest_dgl_import.xml tests/python/test_dgl_import.py || fail "dgl_import" python3 -m pytest -v --junitxml=pytest_common.xml --durations=100 tests/python/common || fail "common" fi python3 -m pytest -v --junitxml=pytest_backend.xml --durations=100 tests/python/$DGLBACKEND || fail "backend-specific" ================================================ FILE: tests/tools/pytest_utils.py ================================================ import json import logging import os import dgl import numpy as np import torch from distpartitioning import array_readwriter from distpartitioning.array_readwriter.parquet import ParquetArrayParser from files import setdir def _chunk_numpy_array(arr, fmt_meta, chunk_sizes, path_fmt, vector_rows=False): paths = [] offset = 0 for j, n in enumerate(chunk_sizes): path = os.path.abspath(path_fmt % j) arr_chunk = arr[offset : offset + n] shape = arr_chunk.shape logging.info("Chunking %d-%d" % (offset, offset + n)) # If requested we write multi-column arrays as single-column vector Parquet files array_parser = array_readwriter.get_array_parser(**fmt_meta) if ( isinstance(array_parser, ParquetArrayParser) and len(shape) > 1 and shape[1] > 1 ): array_parser.write(path, arr_chunk, vector_rows=vector_rows) else: array_parser.write(path, arr_chunk) offset += n paths.append(path) return paths def _initialize_num_chunks(g, num_chunks, kwargs=None): """Initialize num_chunks for each node/edge. Parameters ---------- g: DGLGraph Graph to be chunked. num_chunks: int Default number of chunks to be applied onto node/edge data. kwargs: dict Key word arguments to specify details for each node/edge data. Returns ------- num_chunks_data: dict Detailed number of chunks for each node/edge. """ def _init(g, num_chunks, key, kwargs=None): chunks_data = kwargs.get(key, None) is_node = "_node" in key data_types = g.ntypes if is_node else g.canonical_etypes if isinstance(chunks_data, int): chunks_data = {data_type: chunks_data for data_type in data_types} elif isinstance(chunks_data, dict): for data_type in data_types: if data_type not in chunks_data: chunks_data[data_type] = num_chunks else: chunks_data = {data_type: num_chunks for data_type in data_types} for _, data in chunks_data.items(): if isinstance(data, dict): n_chunks = list(data.values()) else: n_chunks = [data] assert all( isinstance(v, int) for v in n_chunks ), "num_chunks for each data type should be int." return chunks_data num_chunks_data = {} for key in [ "num_chunks_nodes", "num_chunks_edges", "num_chunks_node_data", "num_chunks_edge_data", ]: num_chunks_data[key] = _init(g, num_chunks, key, kwargs=kwargs) return num_chunks_data def _chunk_graph( g, name, ndata_paths, edata_paths, num_chunks, data_fmt, edges_format, vector_rows=False, **kwargs, ): # First deal with ndata and edata that are homogeneous # (i.e. not a dict-of-dict) if len(g.ntypes) == 1 and not isinstance( next(iter(ndata_paths.values())), dict ): ndata_paths = {g.ntypes[0]: ndata_paths} if len(g.etypes) == 1 and not isinstance( next(iter(edata_paths.values())), dict ): edata_paths = {g.etypes[0]: ndata_paths} # Then convert all edge types to canonical edge types etypestrs = {etype: ":".join(etype) for etype in g.canonical_etypes} edata_paths = { ":".join(g.to_canonical_etype(k)): v for k, v in edata_paths.items() } metadata = {} metadata["graph_name"] = name metadata["node_type"] = g.ntypes # add node_type_counts metadata["num_nodes_per_type"] = [g.num_nodes(ntype) for ntype in g.ntypes] # Initialize num_chunks for each node/edge. num_chunks_details = _initialize_num_chunks(g, num_chunks, kwargs=kwargs) # Compute the number of nodes per chunk per node type metadata["num_nodes_per_chunk"] = num_nodes_per_chunk = [] num_chunks_nodes = num_chunks_details["num_chunks_nodes"] for ntype in g.ntypes: num_nodes = g.num_nodes(ntype) num_nodes_list = [] n_chunks = num_chunks_nodes[ntype] for i in range(n_chunks): n = num_nodes // n_chunks + (i < num_nodes % n_chunks) num_nodes_list.append(n) num_nodes_per_chunk.append(num_nodes_list) metadata["edge_type"] = [etypestrs[etype] for etype in g.canonical_etypes] metadata["num_edges_per_type"] = [ g.num_edges(etype) for etype in g.canonical_etypes ] # Compute the number of edges per chunk per edge type metadata["num_edges_per_chunk"] = num_edges_per_chunk = [] num_chunks_edges = num_chunks_details["num_chunks_edges"] for etype in g.canonical_etypes: num_edges = g.num_edges(etype) num_edges_list = [] n_chunks = num_chunks_edges[etype] for i in range(n_chunks): n = num_edges // n_chunks + (i < num_edges % n_chunks) num_edges_list.append(n) num_edges_per_chunk.append(num_edges_list) num_edges_per_chunk_dict = { k: v for k, v in zip(g.canonical_etypes, num_edges_per_chunk) } idxes_etypestr = { idx: (etype, etypestrs[etype]) for idx, etype in enumerate(g.canonical_etypes) } idxes = np.arange(len(idxes_etypestr)) # Split edge index metadata["edges"] = {} with setdir("edge_index"): np.random.shuffle(idxes) for idx in idxes: etype = idxes_etypestr[idx][0] etypestr = idxes_etypestr[idx][1] logging.info("Chunking edge index for %s" % etypestr) edges_meta = {} if edges_format == "csv": fmt_meta = {"name": edges_format, "delimiter": " "} elif edges_format == "parquet": fmt_meta = {"name": edges_format} else: raise RuntimeError(f"Invalid edges_fmt: {edges_format}") edges_meta["format"] = fmt_meta srcdst = torch.stack(g.edges(etype=etype), 1) edges_meta["data"] = _chunk_numpy_array( srcdst.numpy(), fmt_meta, num_edges_per_chunk_dict[etype], etypestr + "%d.txt", ) metadata["edges"][etypestr] = edges_meta # Chunk node data reader_fmt_meta, writer_fmt_meta = {"name": "numpy"}, {"name": data_fmt} file_suffix = "npy" if data_fmt == "numpy" else "parquet" metadata["node_data"] = {} num_chunks_node_data = num_chunks_details["num_chunks_node_data"] with setdir("node_data"): for ntype, ndata_per_type in ndata_paths.items(): ndata_meta = {} with setdir(ntype): for key, path in ndata_per_type.items(): logging.info( "Chunking node data for type %s key %s" % (ntype, key) ) chunk_sizes = [] num_nodes = g.num_nodes(ntype) n_chunks = num_chunks_node_data[ntype] if isinstance(n_chunks, dict): n_chunks = n_chunks.get(key, num_chunks) assert isinstance(n_chunks, int), ( f"num_chunks for {ntype}/{key} should be int while " f"{type(n_chunks)} is got." ) for i in range(n_chunks): n = num_nodes // n_chunks + (i < num_nodes % n_chunks) chunk_sizes.append(n) ndata_key_meta = {} arr = array_readwriter.get_array_parser( **reader_fmt_meta ).read(path) ndata_key_meta["format"] = writer_fmt_meta ndata_key_meta["data"] = _chunk_numpy_array( arr, writer_fmt_meta, chunk_sizes, key + "-%d." + file_suffix, vector_rows=vector_rows, ) ndata_meta[key] = ndata_key_meta metadata["node_data"][ntype] = ndata_meta # Chunk edge data metadata["edge_data"] = {} num_chunks_edge_data = num_chunks_details["num_chunks_edge_data"] with setdir("edge_data"): for etypestr, edata_per_type in edata_paths.items(): edata_meta = {} etype = tuple(etypestr.split(":")) with setdir(etypestr): for key, path in edata_per_type.items(): logging.info( "Chunking edge data for type %s key %s" % (etypestr, key) ) chunk_sizes = [] num_edges = g.num_edges(etype) n_chunks = num_chunks_edge_data[etype] if isinstance(n_chunks, dict): n_chunks = n_chunks.get(key, num_chunks) assert isinstance(n_chunks, int), ( f"num_chunks for {etype}/{key} should be int while " f"{type(n_chunks)} is got." ) for i in range(n_chunks): n = num_edges // n_chunks + (i < num_edges % n_chunks) chunk_sizes.append(n) edata_key_meta = {} arr = array_readwriter.get_array_parser( **reader_fmt_meta ).read(path) edata_key_meta["format"] = writer_fmt_meta edata_key_meta["data"] = _chunk_numpy_array( arr, writer_fmt_meta, chunk_sizes, key + "-%d." + file_suffix, vector_rows=vector_rows, ) edata_meta[key] = edata_key_meta metadata["edge_data"][etypestr] = edata_meta metadata_path = "metadata.json" with open(metadata_path, "w") as f: json.dump(metadata, f, sort_keys=True, indent=4) logging.info("Saved metadata in %s" % os.path.abspath(metadata_path)) def chunk_graph( g, name, ndata_paths, edata_paths, num_chunks, output_path, data_fmt="numpy", edges_fmt="csv", vector_rows=False, **kwargs, ): """ Split the graph into multiple chunks. A directory will be created at :attr:`output_path` with the metadata and chunked edge list as well as the node/edge data. Parameters ---------- g : DGLGraph The graph. name : str The name of the graph, to be used later in DistDGL training. ndata_paths : dict[str, pathlike] or dict[ntype, dict[str, pathlike]] The dictionary of paths pointing to the corresponding numpy array file for each node data key. edata_paths : dict[etype, pathlike] or dict[etype, dict[str, pathlike]] The dictionary of paths pointing to the corresponding numpy array file for each edge data key. ``etype`` could be canonical or non-canonical. num_chunks : int The number of chunks output_path : pathlike The output directory saving the chunked graph. data_fmt : str Format of node/edge data: 'numpy' or 'parquet'. edges_fmt : str Format of edges files: 'csv' or 'parquet'. vector_rows : str When true will write parquet files as single-column vector row files. kwargs : dict Key word arguments to control chunk details. """ for ntype, ndata in ndata_paths.items(): for key in ndata.keys(): ndata[key] = os.path.abspath(ndata[key]) for etype, edata in edata_paths.items(): for key in edata.keys(): edata[key] = os.path.abspath(edata[key]) with setdir(output_path): _chunk_graph( g, name, ndata_paths, edata_paths, num_chunks, data_fmt, edges_fmt, vector_rows, **kwargs, ) def create_chunked_dataset( root_dir, num_chunks, data_fmt="numpy", edges_fmt="csv", vector_rows=False, **kwargs, ): """ This function creates a sample dataset, based on MAG240 dataset. Parameters: ----------- root_dir : string directory in which all the files for the chunked dataset will be stored. """ # Step0: prepare chunked graph data format. # A synthetic mini MAG240. num_institutions = 1200 num_authors = 1200 num_papers = 1200 def rand_edges(num_src, num_dst, num_edges): eids = np.random.choice(num_src * num_dst, num_edges, replace=False) src = torch.from_numpy(eids // num_dst) dst = torch.from_numpy(eids % num_dst) return src, dst num_cite_edges = 24 * 1000 num_write_edges = 12 * 1000 num_affiliate_edges = 2400 # Structure. data_dict = { ("paper", "cites", "paper"): rand_edges( num_papers, num_papers, num_cite_edges ), ("author", "writes", "paper"): rand_edges( num_authors, num_papers, num_write_edges ), ("author", "affiliated_with", "institution"): rand_edges( num_authors, num_institutions, num_affiliate_edges ), ("institution", "writes", "paper"): rand_edges( num_institutions, num_papers, num_write_edges ), } src, dst = data_dict[("author", "writes", "paper")] data_dict[("paper", "rev_writes", "author")] = (dst, src) g = dgl.heterograph(data_dict) # paper feat, label, year num_paper_feats = 3 paper_feat = np.random.randn(num_papers, num_paper_feats) num_classes = 4 paper_label = np.random.choice(num_classes, num_papers) paper_year = np.random.choice(2022, num_papers) paper_orig_ids = np.arange(0, num_papers) writes_orig_ids = np.arange(0, num_write_edges) # masks. paper_train_mask = np.random.choice([True, False], num_papers) paper_test_mask = np.random.choice([True, False], num_papers) paper_val_mask = np.random.choice([True, False], num_papers) author_train_mask = np.random.choice([True, False], num_authors) author_test_mask = np.random.choice([True, False], num_authors) author_val_mask = np.random.choice([True, False], num_authors) inst_train_mask = np.random.choice([True, False], num_institutions) inst_test_mask = np.random.choice([True, False], num_institutions) inst_val_mask = np.random.choice([True, False], num_institutions) write_train_mask = np.random.choice([True, False], num_write_edges) write_test_mask = np.random.choice([True, False], num_write_edges) write_val_mask = np.random.choice([True, False], num_write_edges) # Edge features. cite_count = np.random.choice(10, num_cite_edges) write_year = np.random.choice(2022, num_write_edges) write2_year = np.random.choice(2022, num_write_edges) # Save features. input_dir = os.path.join(root_dir, "data_test") os.makedirs(input_dir) for sub_d in ["paper", "cites", "writes", "writes2"]: os.makedirs(os.path.join(input_dir, sub_d)) paper_feat_path = os.path.join(input_dir, "paper/feat.npy") with open(paper_feat_path, "wb") as f: np.save(f, paper_feat) g.nodes["paper"].data["feat"] = torch.from_numpy(paper_feat) paper_label_path = os.path.join(input_dir, "paper/label.npy") with open(paper_label_path, "wb") as f: np.save(f, paper_label) g.nodes["paper"].data["label"] = torch.from_numpy(paper_label) paper_year_path = os.path.join(input_dir, "paper/year.npy") with open(paper_year_path, "wb") as f: np.save(f, paper_year) g.nodes["paper"].data["year"] = torch.from_numpy(paper_year) paper_orig_ids_path = os.path.join(input_dir, "paper/orig_ids.npy") with open(paper_orig_ids_path, "wb") as f: np.save(f, paper_orig_ids) g.nodes["paper"].data["orig_ids"] = torch.from_numpy(paper_orig_ids) cite_count_path = os.path.join(input_dir, "cites/count.npy") with open(cite_count_path, "wb") as f: np.save(f, cite_count) g.edges["cites"].data["count"] = torch.from_numpy(cite_count) write_year_path = os.path.join(input_dir, "writes/year.npy") with open(write_year_path, "wb") as f: np.save(f, write_year) g.edges[("author", "writes", "paper")].data["year"] = torch.from_numpy( write_year ) g.edges["rev_writes"].data["year"] = torch.from_numpy(write_year) writes_orig_ids_path = os.path.join(input_dir, "writes/orig_ids.npy") with open(writes_orig_ids_path, "wb") as f: np.save(f, writes_orig_ids) g.edges[("author", "writes", "paper")].data["orig_ids"] = torch.from_numpy( writes_orig_ids ) write2_year_path = os.path.join(input_dir, "writes2/year.npy") with open(write2_year_path, "wb") as f: np.save(f, write2_year) g.edges[("institution", "writes", "paper")].data["year"] = torch.from_numpy( write2_year ) etype = ("author", "writes", "paper") write_train_mask_path = os.path.join(input_dir, "writes/train_mask.npy") with open(write_train_mask_path, "wb") as f: np.save(f, write_train_mask) g.edges[etype].data["train_mask"] = torch.from_numpy(write_train_mask) write_test_mask_path = os.path.join(input_dir, "writes/test_mask.npy") with open(write_test_mask_path, "wb") as f: np.save(f, write_test_mask) g.edges[etype].data["test_mask"] = torch.from_numpy(write_test_mask) write_val_mask_path = os.path.join(input_dir, "writes/val_mask.npy") with open(write_val_mask_path, "wb") as f: np.save(f, write_val_mask) g.edges[etype].data["val_mask"] = torch.from_numpy(write_val_mask) for sub_d in ["author", "institution"]: os.makedirs(os.path.join(input_dir, sub_d)) paper_train_mask_path = os.path.join(input_dir, "paper/train_mask.npy") with open(paper_train_mask_path, "wb") as f: np.save(f, paper_train_mask) g.nodes["paper"].data["train_mask"] = torch.from_numpy(paper_train_mask) paper_test_mask_path = os.path.join(input_dir, "paper/test_mask.npy") with open(paper_test_mask_path, "wb") as f: np.save(f, paper_test_mask) g.nodes["paper"].data["test_mask"] = torch.from_numpy(paper_test_mask) paper_val_mask_path = os.path.join(input_dir, "paper/val_mask.npy") with open(paper_val_mask_path, "wb") as f: np.save(f, paper_val_mask) g.nodes["paper"].data["val_mask"] = torch.from_numpy(paper_val_mask) author_train_mask_path = os.path.join(input_dir, "author/train_mask.npy") with open(author_train_mask_path, "wb") as f: np.save(f, author_train_mask) g.nodes["author"].data["train_mask"] = torch.from_numpy(author_train_mask) author_test_mask_path = os.path.join(input_dir, "author/test_mask.npy") with open(author_test_mask_path, "wb") as f: np.save(f, author_test_mask) g.nodes["author"].data["test_mask"] = torch.from_numpy(author_test_mask) author_val_mask_path = os.path.join(input_dir, "author/val_mask.npy") with open(author_val_mask_path, "wb") as f: np.save(f, author_val_mask) g.nodes["author"].data["val_mask"] = torch.from_numpy(author_val_mask) inst_train_mask_path = os.path.join(input_dir, "institution/train_mask.npy") with open(inst_train_mask_path, "wb") as f: np.save(f, inst_train_mask) g.nodes["institution"].data["train_mask"] = torch.from_numpy( inst_train_mask ) inst_test_mask_path = os.path.join(input_dir, "institution/test_mask.npy") with open(inst_test_mask_path, "wb") as f: np.save(f, inst_test_mask) g.nodes["institution"].data["test_mask"] = torch.from_numpy(inst_test_mask) inst_val_mask_path = os.path.join(input_dir, "institution/val_mask.npy") with open(inst_val_mask_path, "wb") as f: np.save(f, inst_val_mask) g.nodes["institution"].data["val_mask"] = torch.from_numpy(inst_val_mask) node_data = { "paper": { "feat": paper_feat_path, "train_mask": paper_train_mask_path, "test_mask": paper_test_mask_path, "val_mask": paper_val_mask_path, "label": paper_label_path, "year": paper_year_path, "orig_ids": paper_orig_ids_path, }, "author": { "train_mask": author_train_mask_path, "test_mask": author_test_mask_path, "val_mask": author_val_mask_path, }, "institution": { "train_mask": inst_train_mask_path, "test_mask": inst_test_mask_path, "val_mask": inst_val_mask_path, }, } edge_data = { "cites": {"count": cite_count_path}, ("author", "writes", "paper"): { "year": write_year_path, "orig_ids": writes_orig_ids_path, "train_mask": write_train_mask_path, "test_mask": write_test_mask_path, "val_mask": write_val_mask_path, }, "rev_writes": {"year": write_year_path}, ("institution", "writes", "paper"): {"year": write2_year_path}, } output_dir = os.path.join(root_dir, "chunked-data") chunk_graph( g, "mag240m", node_data, edge_data, num_chunks=num_chunks, output_path=output_dir, data_fmt=data_fmt, edges_fmt=edges_fmt, vector_rows=vector_rows, **kwargs, ) logging.debug("Done with creating chunked graph") return g ================================================ FILE: tests/tools/test_array_readwriter.py ================================================ import os import tempfile import numpy as np import pytest from distpartitioning import array_readwriter @pytest.mark.parametrize( "shape", [[500], [300, 10], [200, 5, 5], [100, 5, 5, 5]] ) @pytest.mark.parametrize("format", ["numpy", "parquet"]) def test_array_readwriter(format, shape): original_array = np.random.rand(*shape) fmt_meta = {"name": format} with tempfile.TemporaryDirectory() as test_dir: path = os.path.join(test_dir, f"nodes.{format}") array_readwriter.get_array_parser(**fmt_meta).write( path, original_array ) array = array_readwriter.get_array_parser(**fmt_meta).read(path) assert original_array.shape == array.shape assert np.array_equal(original_array, array) ================================================ FILE: tests/tools/test_change_etype_to_canonical_etype.py ================================================ import json import os import tempfile import unittest from collections import Counter import dgl import pytest from change_etype_to_canonical_etype import convert_conf, is_old_version from dgl.distributed import partition_graph from scipy import sparse as spsp def create_random_hetero(type_n, node_n): num_nodes = {} for i in range(1, type_n + 1): num_nodes[f"n{i}"] = node_n c_etypes = [] count = 0 for i in range(1, type_n): for j in range(i + 1, type_n + 1): count += 1 c_etypes.append((f"n{i}", f"r{count}", f"n{j}")) edges = {} for etype in c_etypes: src_ntype, _, dst_ntype = etype arr = spsp.random( num_nodes[src_ntype], num_nodes[dst_ntype], density=0.001, format="coo", random_state=100, ) edges[etype] = (arr.row, arr.col) return dgl.heterograph(edges, num_nodes), [ ":".join(c_etype) for c_etype in c_etypes ] @unittest.skip(reason="Skip due to glitch in CI") @pytest.mark.parametrize( "type_n, node_n, num_parts", [[3, 100, 2], [10, 500, 4], [10, 1000, 8]] ) def test_hetero_graph(type_n, node_n, num_parts): g, expected_c_etypes = create_random_hetero(type_n, node_n) do_convert_and_check(g, "convert_conf_test", num_parts, expected_c_etypes) @unittest.skip(reason="Skip due to glitch in CI") @pytest.mark.parametrize("node_n, num_parts", [[100, 2], [500, 4]]) def test_homo_graph(node_n, num_parts): g = dgl.rand_graph(node_n, node_n // 10) do_convert_and_check(g, "convert_conf_test", num_parts, ["_N:_E:_N"]) def do_convert_and_check(g, graph_name, num_parts, expected_c_etypes): with tempfile.TemporaryDirectory() as root_dir: partition_graph(g, graph_name, num_parts, root_dir) part_config = os.path.join(root_dir, graph_name + ".json") old_config = _get_old_config(part_config) # Call convert function convert_conf(part_config) with open(part_config, "r") as config_f: config = json.load(config_f) # Check we get all canonical etypes assert Counter(expected_c_etypes) == Counter( config["etypes"].keys() ) # Check the id is match after transform from etypes -> canonical assert old_config["etypes"] == _extract_etypes(config["etypes"]) def _get_old_config(part_config): with open(part_config, "r+") as config_f: config = json.load(config_f) if not is_old_version(config): config["etypes"] = _extract_etypes(config["etypes"]) config["edge_map"] = _extract_edge_map(config["edge_map"]) config_f.seek(0) json.dump(config, config_f, indent=4) config_f.truncate() return config def _extract_etypes(c_etypes): etypes = {} for c_etype, eid in c_etypes.items(): etype = c_etype.split(":")[1] etypes[etype] = eid return etypes def _extract_edge_map(c_edge_map): edge_map = {} for c_etype, emap in c_edge_map.items(): etype = c_etype.split(":")[1] edge_map[etype] = emap return edge_map ================================================ FILE: tests/tools/test_convert_partition.py ================================================ import os import tempfile import numpy as np import pytest import utils from convert_partition import _get_unique_invidx @pytest.mark.parametrize( "num_nodes, num_edges, nid_begin, nid_end", [ [4000, 40000, 0, 1000], [4000, 40000, 1000, 2000], [4000, 40000, 2000, 3000], [4000, 40000, 3000, 4000], [4000, 100, 0, 1000], [4000, 100, 1000, 2000], [4000, 100, 2000, 3000], [4000, 100, 3000, 4000], [1, 1, 0, 1], ], ) def test_get_unique_invidx_with_numpy(num_nodes, num_edges, nid_begin, nid_end): # prepare data for the function # generate synthetic edges if num_edges > 0: srcids = np.random.randint(0, num_nodes, (num_edges,)) # exclusive dstids = np.random.randint( nid_begin, nid_end, (num_edges,) ) # exclusive else: srcids = np.array([]) dstids = np.array([]) assert nid_begin <= nid_end # generate unique node-ids for any # partition. This list should be sorted. # This is equivilant to shuffle_nids in a partition unique_nids = np.arange(nid_begin, nid_end) # exclusive # test with numpy unique here orig_srcids = srcids.copy() orig_dstids = dstids.copy() input_arr = np.concatenate([srcids, dstids, unique_nids]) # test uniques, idxes, srcids, dstids = _get_unique_invidx( srcids, dstids, unique_nids ) assert len(uniques) == len(idxes) assert np.all(srcids < len(uniques)) assert np.all(dstids < len(uniques)) assert np.all(uniques[srcids].sort() == orig_srcids.sort()) assert np.all(uniques[dstids] == orig_dstids) assert np.all(uniques == input_arr[idxes]) # numpy np_uniques, np_idxes, np_inv_idxes = np.unique( np.concatenate([orig_srcids, orig_dstids, unique_nids]), return_index=True, return_inverse=True, ) # test uniques assert np.all(np_uniques == uniques) # test idxes array assert np.all(input_arr[idxes].sort() == input_arr[np_idxes].sort()) # test srcids, inv_indices assert np.all( uniques[srcids].sort() == np_uniques[np_inv_idxes[0 : len(srcids)]].sort() ) # test dstids, inv_indices assert np.all( uniques[dstids].sort() == np_uniques[np_inv_idxes[len(srcids) :]].sort() ) @pytest.mark.parametrize( "num_nodes, num_edges, nid_begin, nid_end", [ # dense networks, no. of edges more than no. of nodes [4000, 40000, 0, 1000], [4000, 40000, 1000, 2000], [4000, 40000, 2000, 3000], [4000, 40000, 3000, 4000], # sparse networks, no. of edges smaller than no. of nodes [4000, 100, 0, 1000], [4000, 100, 1000, 2000], [4000, 100, 2000, 3000], [4000, 100, 3000, 4000], # corner case [1, 1, 0, 1], ], ) def test_get_unique_invidx(num_nodes, num_edges, nid_begin, nid_end): # prepare data for the function # generate synthetic edges if num_edges > 0: srcids = np.random.randint(0, num_nodes, (num_edges,)) dstids = np.random.randint(nid_begin, nid_end, (num_edges,)) else: srcids = np.array([]) dstids = np.array([]) assert nid_begin <= nid_end # generate unique node-ids for any # partition. This list should be sorted. # This is equivilant to shuffle_nids in a partition unique_nids = np.arange(nid_begin, nid_end) # invoke the test target uniques, idxes, src_ids, dst_ids = _get_unique_invidx( srcids, dstids, unique_nids ) # validate the outputs of this function # array uniques should be sorted list of integers. assert np.all( np.diff(uniques) >= 0 ), f"Output parameter uniques assert failing." # idxes are list of integers # these are indices in the concatenated list (srcids, dstids, unique_nids) max_idx = len(src_ids) + len(dst_ids) + len(unique_nids) assert np.all(idxes >= 0), f"Output parameter idxes has negative values." assert np.all( idxes < max_idx ), f"Output parameter idxes has invalid maximum value." # srcids and dstids will be inverse indices in the uniques list min_src = np.amin(src_ids) max_src = np.amax(src_ids) min_dst = np.amin(dst_ids) max_dst = np.amax(dst_ids) assert ( len(uniques) > max_src ), f"Inverse idx, src_ids, has invalid max value." assert min_src >= 0, f"Inverse idx, src_ids has negative values." assert len(uniques) > max_dst, f"Inverse idx, dst_ids, invalid max value." assert max_dst >= 0, f"Inverse idx, dst_ids has negative values." def test_get_unique_invidx_low_mem(): srcids = np.array([14, 0, 3, 3, 0, 3, 9, 5, 14, 12]) dstids = np.array([10, 16, 12, 13, 10, 17, 16, 13, 14, 16]) unique_nids = np.array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19]) uniques, idxes, srcids, dstids = _get_unique_invidx( srcids, dstids, unique_nids, low_mem=True, ) expected_unqiues = np.array( [0, 3, 5, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] ) expected_idxes = np.array( [1, 2, 7, 6, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] ) expected_srcids = np.array([8, 0, 1, 1, 0, 1, 3, 2, 8, 6]) expected_dstids = np.array([4, 10, 6, 7, 4, 11, 10, 7, 8, 10]) assert np.all( uniques == expected_unqiues ), f"unique is not expected. {uniques} != {expected_unqiues}" assert np.all( idxes == expected_idxes ), f"indices is not expected. {idxes} != {expected_idxes}" assert np.all( srcids == expected_srcids ), f"srcids is not expected. {srcids} != {expected_srcids}" assert np.all( dstids == expected_dstids ), f"dstdis is not expected. {dstids} != {expected_dstids}" def test_get_unique_invidx_high_mem(): srcids = np.array([14, 0, 3, 3, 0, 3, 9, 5, 14, 12]) dstids = np.array([10, 16, 12, 13, 10, 17, 16, 13, 14, 16]) unique_nids = np.array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19]) uniques, idxes, srcids, dstids = _get_unique_invidx( srcids, dstids, unique_nids, low_mem=False, ) expected_unqiues = np.array( [0, 3, 5, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] ) expected_idxes = np.array( [1, 2, 7, 6, 10, 21, 9, 13, 0, 25, 11, 15, 28, 29] ) expected_srcids = np.array([8, 0, 1, 1, 0, 1, 3, 2, 8, 6]) expected_dstids = np.array([4, 10, 6, 7, 4, 11, 10, 7, 8, 10]) assert np.all( uniques == expected_unqiues ), f"unique is not expected. {uniques} != {expected_unqiues}" assert np.all( idxes == expected_idxes ), f"indices is not expected. {idxes} != {expected_idxes}" assert np.all( srcids == expected_srcids ), f"srcids is not expected. {srcids} != {expected_srcids}" assert np.all( dstids == expected_dstids ), f"dstdis is not expected. {dstids} != {expected_dstids}" def test_get_unique_invidx_low_high_mem(): srcids = np.array([14, 0, 3, 3, 0, 3, 9, 5, 14, 12]) dstids = np.array([10, 16, 12, 13, 10, 17, 16, 13, 14, 16]) unique_nids = np.array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19]) uniques_low, idxes_low, srcids_low, dstids_low = _get_unique_invidx( srcids, dstids, unique_nids, low_mem=True, ) uniques_high, idxes_high, srcids_high, dstids_high = _get_unique_invidx( srcids, dstids, unique_nids, low_mem=False, ) assert np.all( uniques_low == uniques_high ), f"unique is not expected. {uniques_low} != {uniques_high}" assert not np.all( idxes_low == idxes_high ), f"indices is not expected. {idxes_low} == {idxes_high}" assert np.all( srcids_low == srcids_high ), f"srcids is not expected. {srcids_low} != {srcids_high}" assert np.all( dstids_low == dstids_high ), f"dstdis is not expected. {dstids_low} != {dstids_high}" ================================================ FILE: tests/tools/test_dist_lookup.py ================================================ import logging import os import platform import tempfile from datetime import timedelta import dgl import numpy as np import pyarrow import pytest import torch.distributed as dist import torch.multiprocessing as mp from pytest_utils import create_chunked_dataset from tools.distpartitioning import constants, dist_lookup from tools.distpartitioning.gloo_wrapper import allgather_sizes from tools.distpartitioning.utils import ( get_idranges, get_ntype_counts_map, read_json, ) try: mp.set_start_method("spawn", force=True) except RuntimeError: pass def _init_process_group(rank, world_size): # init the gloo process group here. dist.init_process_group( backend="gloo", rank=rank, world_size=world_size, timeout=timedelta(seconds=180), ) print(f"[Rank: {rank}] Done with process group initialization...") def _create_lookup_service( partitions_dir, ntypes, id_map, rank, world_size, num_parts ): id_lookup = dist_lookup.DistLookupService( partitions_dir, ntypes, rank, world_size, num_parts ) id_lookup.set_idMap(id_map) # invoke the main function here. print(f"[Rank: {rank}] Done with Dist Lookup Service initialization...") return id_lookup def _run( port_num, rank, num_parts, world_size, partitions_dir, ntypes, id_map, test_data, ): os.environ["MASTER_ADDR"] = "127.0.0.1" os.environ["MASTER_PORT"] = str(port_num) _init_process_group(rank, world_size) lookup = _create_lookup_service( partitions_dir, ntypes, id_map, rank, world_size, num_parts ) tests_exec = 0 for worker, data in test_data.items(): if f"rank-{rank}" == worker: for item in data: method = item[0] request = item[1] response = item[2] if method == "getpartitionids": ret_val = lookup.get_partition_ids(request) tests_exec += 1 assert np.all(ret_val == response) else: assert False # ensure all the tests are executed. rank_counts = allgather_sizes([tests_exec], world_size, num_parts, True) assert np.sum(rank_counts) == len(test_data) def _single_machine_run( num_parts, world_size, partitions_dir, ntypes, id_map, test_data ): port_num = np.random.randint(10000, 20000, size=(1,), dtype=int)[0] ctx = mp.get_context("spawn") processes = [] for rank in range(world_size): p = ctx.Process( target=_run, args=( port_num, rank, num_parts, world_size, partitions_dir, ntypes, id_map, test_data, ), ) p.start() processes.append(p) for p in processes: p.join() p.close() def _prepare_test_data(partitions_dir, ntypes, gid_ranges, world_size): # read node-id to partition-id mappings from disk ntype_partids = [] for ntype_id, ntype in enumerate(ntypes): filename = f"{ntype}.txt" assert os.path.isfile(os.path.join(partitions_dir, filename)) read_options = pyarrow.csv.ReadOptions( use_threads=True, block_size=4096, autogenerate_column_names=True, ) parse_options = pyarrow.csv.ParseOptions(delimiter=" ") with pyarrow.csv.open_csv( os.path.join(partitions_dir, "{}.txt".format(ntype)), read_options=read_options, parse_options=parse_options, ) as reader: for next_chunk in reader: if next_chunk is None: break next_table = pyarrow.Table.from_batches([next_chunk]) ntype_partids.append(next_table["f0"].to_numpy()) # prepare test data for each rank here # key = f'rank-{rank}' # value is a list of tuple [(method-name, request, response)] test_data = {} for rank in range(world_size): ntype_id = np.random.randint(0, len(ntypes) - 1) ntype = ntypes[ntype_id] request = ( np.arange(len(ntype_partids[ntype_id])) + gid_ranges[ntypes[ntype_id]][0, 0] ) response = ntype_partids[ntype_id] test_data[f"rank-{rank}"] = [("getpartitionids", request, response)] # randomly shuffle the global-nids and retrieve their partition-ids. for rank in range(world_size): ntype_id = np.random.randint(0, len(ntypes) - 1) ntype = ntypes[ntype_id] idx = np.arange(len(ntype_partids[ntype_id])) request = idx + gid_ranges[ntypes[ntype_id]][0, 0] np.random.shuffle(idx) request = request[idx] response = ntype_partids[ntype_id][idx] test_data[f"rank-{rank}"] = [("getpartitionids", request, response)] # one final test # mix all the ntypes and shuffle randomly request = [] response = [] for idx in range(len(ntype_partids)): request.append( np.arange(len(ntype_partids[idx])) + gid_ranges[ntypes[idx]][0, 0] ) response.append(ntype_partids[idx]) request = np.concatenate(request) response = np.concatenate(response) idx = np.arange(len(request)) np.random.shuffle(idx) request = request[idx] response = response[idx] for idx in range(world_size): test_data[f"rank-{idx}"] = [("getpartitionids", request, response)] return test_data @pytest.mark.parametrize( "num_chunks, num_parts, world_size", [[4, 4, 4], [8, 4, 2], [8, 4, 4], [9, 6, 3], [11, 11, 1], [11, 4, 1]], ) def test_lookup_service( num_chunks, num_parts, world_size, num_chunks_nodes=None, num_chunks_edges=None, num_chunks_node_data=None, num_chunks_edge_data=None, ): with tempfile.TemporaryDirectory() as root_dir: g = create_chunked_dataset( root_dir, num_chunks, data_fmt="numpy", num_chunks_nodes=num_chunks_nodes, num_chunks_edges=num_chunks_edges, num_chunks_node_data=num_chunks_node_data, num_chunks_edge_data=num_chunks_edge_data, ) # Step1: graph partition in_dir = os.path.join(root_dir, "chunked-data") output_dir = os.path.join(root_dir, "parted_data") os.system( "python3 tools/partition_algo/random_partition.py " "--in_dir {} --out_dir {} --num_partitions {}".format( in_dir, output_dir, num_parts ) ) # metadata for original graph orig_config = os.path.join(in_dir, "metadata.json") orig_schema = read_json(orig_config) ntypes = orig_schema[constants.STR_NODE_TYPE] _, global_nid_ranges = get_idranges( orig_schema[constants.STR_NODE_TYPE], get_ntype_counts_map( orig_schema[constants.STR_NODE_TYPE], orig_schema[constants.STR_NUM_NODES_PER_TYPE], ), num_chunks=num_parts, ) id_map = dgl.distributed.id_map.IdMap(global_nid_ranges) # run the test _single_machine_run( num_parts, world_size, output_dir, ntypes, id_map, _prepare_test_data( output_dir, ntypes, global_nid_ranges, world_size ), ) ================================================ FILE: tests/tools/test_dist_part.py ================================================ import json import os import tempfile import dgl import dgl.backend as F import numpy as np import pyarrow.parquet as pq import pytest import torch from dgl.data.utils import load_graphs, load_tensors from dgl.distributed.partition import ( _etype_tuple_to_str, _get_inner_edge_mask, _get_inner_node_mask, load_partition, RESERVED_FIELD_DTYPE, ) from distpartitioning import array_readwriter from distpartitioning.utils import generate_read_list from pytest_utils import chunk_graph, create_chunked_dataset from scipy import sparse as spsp from tools.verification_utils import ( verify_graph_feats, verify_partition_data_types, verify_partition_formats, ) def _test_chunk_graph( num_chunks, data_fmt="numpy", edges_fmt="csv", vector_rows=False, num_chunks_nodes=None, num_chunks_edges=None, num_chunks_node_data=None, num_chunks_edge_data=None, ): with tempfile.TemporaryDirectory() as root_dir: g = create_chunked_dataset( root_dir, num_chunks, data_fmt=data_fmt, edges_fmt=edges_fmt, vector_rows=vector_rows, num_chunks_nodes=num_chunks_nodes, num_chunks_edges=num_chunks_edges, num_chunks_node_data=num_chunks_node_data, num_chunks_edge_data=num_chunks_edge_data, ) # check metadata.json output_dir = os.path.join(root_dir, "chunked-data") json_file = os.path.join(output_dir, "metadata.json") assert os.path.isfile(json_file) with open(json_file, "rb") as f: meta_data = json.load(f) assert meta_data["graph_name"] == "mag240m" assert len(meta_data["num_nodes_per_chunk"][0]) == num_chunks # check edge_index output_edge_index_dir = os.path.join(output_dir, "edge_index") for c_etype in g.canonical_etypes: c_etype_str = _etype_tuple_to_str(c_etype) if num_chunks_edges is None: n_chunks = num_chunks else: n_chunks = num_chunks_edges for i in range(n_chunks): fname = os.path.join( output_edge_index_dir, f"{c_etype_str}{i}.txt" ) assert os.path.isfile(fname) if edges_fmt == "csv": with open(fname, "r") as f: header = f.readline() num1, num2 = header.rstrip().split(" ") assert isinstance(int(num1), int) assert isinstance(int(num2), int) elif edges_fmt == "parquet": metadata = pq.read_metadata(fname) assert metadata.num_columns == 2 else: assert False, f"Invalid edges_fmt: {edges_fmt}" # check node/edge_data suffix = "npy" if data_fmt == "numpy" else "parquet" reader_fmt_meta = {"name": data_fmt} def test_data(sub_dir, feat, expected_data, expected_shape, num_chunks): data = [] for i in range(num_chunks): fname = os.path.join(sub_dir, f"{feat}-{i}.{suffix}") assert os.path.isfile(fname), f"{fname} cannot be found." feat_array = array_readwriter.get_array_parser( **reader_fmt_meta ).read(fname) assert feat_array.shape[0] == expected_shape data.append(feat_array) data = np.concatenate(data, 0) assert torch.equal(torch.from_numpy(data), expected_data) output_node_data_dir = os.path.join(output_dir, "node_data") for ntype in g.ntypes: sub_dir = os.path.join(output_node_data_dir, ntype) if isinstance(num_chunks_node_data, int): chunks_data = num_chunks_node_data elif isinstance(num_chunks_node_data, dict): chunks_data = num_chunks_node_data.get(ntype, num_chunks) else: chunks_data = num_chunks for feat, data in g.nodes[ntype].data.items(): if isinstance(chunks_data, dict): n_chunks = chunks_data.get(feat, num_chunks) else: n_chunks = chunks_data test_data( sub_dir, feat, data, g.num_nodes(ntype) // n_chunks, n_chunks, ) output_edge_data_dir = os.path.join(output_dir, "edge_data") for c_etype in g.canonical_etypes: c_etype_str = _etype_tuple_to_str(c_etype) sub_dir = os.path.join(output_edge_data_dir, c_etype_str) if isinstance(num_chunks_edge_data, int): chunks_data = num_chunks_edge_data elif isinstance(num_chunks_edge_data, dict): chunks_data = num_chunks_edge_data.get(c_etype, num_chunks) else: chunks_data = num_chunks for feat, data in g.edges[c_etype].data.items(): if isinstance(chunks_data, dict): n_chunks = chunks_data.get(feat, num_chunks) else: n_chunks = chunks_data test_data( sub_dir, feat, data, g.num_edges(c_etype) // n_chunks, n_chunks, ) @pytest.mark.parametrize("num_chunks", [1, 8]) @pytest.mark.parametrize("data_fmt", ["numpy", "parquet"]) @pytest.mark.parametrize("edges_fmt", ["csv", "parquet"]) def test_chunk_graph_basics(num_chunks, data_fmt, edges_fmt): _test_chunk_graph(num_chunks, data_fmt=data_fmt, edges_fmt=edges_fmt) @pytest.mark.parametrize("num_chunks", [1, 8]) @pytest.mark.parametrize("vector_rows", [True, False]) def test_chunk_graph_vector_rows(num_chunks, vector_rows): _test_chunk_graph( num_chunks, data_fmt="parquet", edges_fmt="parquet", vector_rows=vector_rows, ) @pytest.mark.parametrize( "num_chunks, " "num_chunks_nodes, " "num_chunks_edges, " "num_chunks_node_data, " "num_chunks_edge_data", [ [1, None, None, None, None], [8, None, None, None, None], [4, 4, 4, 8, 12], [4, 4, 4, {"paper": 10}, {("author", "writes", "paper"): 24}], [ 4, 4, 4, {"paper": {"feat": 10}}, {("author", "writes", "paper"): {"year": 24}}, ], ], ) def test_chunk_graph_arbitrary_chunks( num_chunks, num_chunks_nodes, num_chunks_edges, num_chunks_node_data, num_chunks_edge_data, ): _test_chunk_graph( num_chunks, num_chunks_nodes=num_chunks_nodes, num_chunks_edges=num_chunks_edges, num_chunks_node_data=num_chunks_node_data, num_chunks_edge_data=num_chunks_edge_data, ) def create_mini_chunked_dataset( root_dir, num_chunks, data_fmt, edges_fmt, vector_rows, few_entity="node", **kwargs, ): num_nodes = {"n1": 1000, "n2": 1010, "n3": 1020} etypes = [ ("n1", "r1", "n2"), ("n2", "r1", "n1"), ("n1", "r2", "n3"), ("n2", "r3", "n3"), ] node_items = ["n1", "n2", "n3"] edges_coo = {} for etype in etypes: src_ntype, _, dst_ntype = etype arr = spsp.random( num_nodes[src_ntype], num_nodes[dst_ntype], density=0.001, format="coo", random_state=100, ) edges_coo[etype] = (arr.row, arr.col) edge_items = [] if few_entity == "edge": edges_coo[("n1", "a0", "n2")] = ( torch.tensor([0, 1]), torch.tensor([1, 0]), ) edges_coo[("n1", "a1", "n3")] = ( torch.tensor([0, 1]), torch.tensor([1, 0]), ) edge_items.append(("n1", "a0", "n2")) edge_items.append(("n1", "a1", "n3")) elif few_entity == "node": edges_coo[("n1", "r_few", "n_few")] = ( torch.tensor([0, 1]), torch.tensor([1, 0]), ) edges_coo[("a0", "a01", "n_1")] = ( torch.tensor([0, 1]), torch.tensor([1, 0]), ) edge_items.append(("n1", "r_few", "n_few")) edge_items.append(("a0", "a01", "n_1")) node_items.append("n_few") node_items.append("n_1") num_nodes["n_few"] = 2 num_nodes["n_1"] = 2 g = dgl.heterograph(edges_coo) node_data = {} edge_data = {} # save feature input_dir = os.path.join(root_dir, "data_test") for ntype in node_items: os.makedirs(os.path.join(input_dir, ntype)) feat = np.random.randn(num_nodes[ntype], 3) feat_path = os.path.join(input_dir, f"{ntype}/feat.npy") with open(feat_path, "wb") as f: np.save(f, feat) g.nodes[ntype].data["feat"] = torch.from_numpy(feat) node_data[ntype] = {"feat": feat_path} for etype in set(edge_items): os.makedirs(os.path.join(input_dir, etype[1])) num_edge = len(edges_coo[etype][0]) feat = np.random.randn(num_edge, 4) feat_path = os.path.join(input_dir, f"{etype[1]}/feat.npy") with open(feat_path, "wb") as f: np.save(f, feat) g.edges[etype].data["feat"] = torch.from_numpy(feat) edge_data[etype] = {"feat": feat_path} output_dir = os.path.join(root_dir, "chunked-data") chunk_graph( g, "mag240m", node_data, edge_data, num_chunks=num_chunks, output_path=output_dir, data_fmt=data_fmt, edges_fmt=edges_fmt, vector_rows=vector_rows, **kwargs, ) return g def _test_pipeline( num_chunks, num_parts, world_size, graph_formats=None, data_fmt="numpy", num_chunks_nodes=None, num_chunks_edges=None, num_chunks_node_data=None, num_chunks_edge_data=None, use_verify_partitions=False, ): if num_parts % world_size != 0: # num_parts should be a multiple of world_size return with tempfile.TemporaryDirectory() as root_dir: g = create_chunked_dataset( root_dir, num_chunks, data_fmt=data_fmt, num_chunks_nodes=num_chunks_nodes, num_chunks_edges=num_chunks_edges, num_chunks_node_data=num_chunks_node_data, num_chunks_edge_data=num_chunks_edge_data, ) # Step1: graph partition in_dir = os.path.join(root_dir, "chunked-data") output_dir = os.path.join(root_dir, "parted_data") os.system( "python3 tools/partition_algo/random_partition.py " "--in_dir {} --out_dir {} --num_partitions {}".format( in_dir, output_dir, num_parts ) ) for ntype in ["author", "institution", "paper"]: fname = os.path.join(output_dir, "{}.txt".format(ntype)) with open(fname, "r") as f: header = f.readline().rstrip() assert isinstance(int(header), int) # Step2: data dispatch partition_dir = os.path.join(root_dir, "parted_data") out_dir = os.path.join(root_dir, "partitioned") ip_config = os.path.join(root_dir, "ip_config.txt") with open(ip_config, "w") as f: for i in range(world_size): f.write(f"127.0.0.{i + 1}\n") cmd = "python3 tools/dispatch_data.py" cmd += f" --in-dir {in_dir}" cmd += f" --partitions-dir {partition_dir}" cmd += f" --out-dir {out_dir}" cmd += f" --ip-config {ip_config}" cmd += " --ssh-port 22" cmd += " --process-group-timeout 60" cmd += " --save-orig-nids" cmd += " --save-orig-eids" cmd += f" --graph-formats {graph_formats}" if graph_formats else "" os.system(cmd) # check if verify_partitions.py is used for validation. if use_verify_partitions: cmd = "python3 tools/verify_partitions.py " cmd += f" --orig-dataset-dir {in_dir}" cmd += f" --part-graph {out_dir}" cmd += f" --partitions-dir {output_dir}" os.system(cmd) return # read original node/edge IDs def read_orig_ids(fname): orig_ids = {} for i in range(num_parts): ids_path = os.path.join(out_dir, f"part{i}", fname) part_ids = load_tensors(ids_path) for type, data in part_ids.items(): if type not in orig_ids: orig_ids[type] = data else: orig_ids[type] = torch.cat((orig_ids[type], data)) return orig_ids orig_nids = read_orig_ids("orig_nids.dgl") orig_eids = read_orig_ids("orig_eids.dgl") # load partitions and verify part_config = os.path.join(out_dir, "metadata.json") for i in range(num_parts): part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition( part_config, i ) verify_partition_data_types(part_g) verify_partition_formats(part_g, graph_formats) verify_graph_feats( g, gpb, part_g, node_feats, edge_feats, orig_nids, orig_eids ) @pytest.mark.parametrize( "num_chunks, num_parts, world_size", [[4, 4, 4], [8, 4, 2], [8, 4, 4], [9, 6, 3], [11, 11, 1], [11, 4, 1]], ) def test_pipeline_basics(num_chunks, num_parts, world_size): _test_pipeline(num_chunks, num_parts, world_size) _test_pipeline( num_chunks, num_parts, world_size, use_verify_partitions=False ) @pytest.mark.parametrize( "graph_formats", [None, "csc", "coo,csc", "coo,csc,csr"] ) def test_pipeline_formats(graph_formats): _test_pipeline(4, 4, 4, graph_formats) @pytest.mark.parametrize( "num_chunks, " "num_parts, " "world_size, " "num_chunks_node_data, " "num_chunks_edge_data", [ # Test cases where no. of chunks more than # no. of partitions [8, 4, 4, 8, 8], [8, 4, 2, 8, 8], [9, 7, 5, 9, 9], [8, 8, 4, 8, 8], # Test cases where no. of chunks smaller # than no. of partitions [7, 8, 4, 7, 7], [1, 8, 4, 1, 1], [1, 4, 4, 1, 1], [3, 4, 4, 3, 3], [1, 4, 2, 1, 1], [3, 4, 2, 3, 3], [1, 5, 3, 1, 1], ], ) def test_pipeline_arbitrary_chunks( num_chunks, num_parts, world_size, num_chunks_node_data, num_chunks_edge_data, ): _test_pipeline( num_chunks, num_parts, world_size, num_chunks_node_data=num_chunks_node_data, num_chunks_edge_data=num_chunks_edge_data, ) @pytest.mark.parametrize( "graph_formats", [None, "csc", "coo,csc", "coo,csc,csr"] ) def test_pipeline_formats(graph_formats): _test_pipeline(4, 4, 4, graph_formats) @pytest.mark.parametrize("data_fmt", ["numpy", "parquet"]) def test_pipeline_feature_format(data_fmt): _test_pipeline(4, 4, 4, data_fmt=data_fmt) @pytest.mark.parametrize( "num_chunks, num_parts, world_size", [[4, 4, 4], [8, 4, 2], [8, 4, 4], [9, 6, 3], [11, 11, 1], [11, 4, 1]], ) @pytest.mark.parametrize("few_entity", ["node", "edge"]) def test_partition_hetero_few_entity( num_chunks, num_parts, world_size, few_entity, graph_formats=None, data_fmt="numpy", edges_fmt="csv", vector_rows=False, num_chunks_nodes=None, num_chunks_edges=None, num_chunks_node_data=None, num_chunks_edge_data=None, ): with tempfile.TemporaryDirectory() as root_dir: g = create_mini_chunked_dataset( root_dir, num_chunks, few_entity=few_entity, data_fmt=data_fmt, edges_fmt=edges_fmt, vector_rows=vector_rows, num_chunks_nodes=num_chunks_nodes, num_chunks_edges=num_chunks_edges, num_chunks_node_data=num_chunks_node_data, num_chunks_edge_data=num_chunks_edge_data, ) # Step1: graph partition in_dir = os.path.join(root_dir, "chunked-data") output_dir = os.path.join(root_dir, "parted_data") os.system( "python3 tools/partition_algo/random_partition.py " "--in_dir {} --out_dir {} --num_partitions {}".format( in_dir, output_dir, num_parts ) ) # Step2: data dispatch partition_dir = os.path.join(root_dir, "parted_data") out_dir = os.path.join(root_dir, "partitioned") ip_config = os.path.join(root_dir, "ip_config.txt") with open(ip_config, "w") as f: for i in range(world_size): f.write(f"127.0.0.{i + 1}\n") cmd = "python3 tools/dispatch_data.py" cmd += f" --in-dir {in_dir}" cmd += f" --partitions-dir {partition_dir}" cmd += f" --out-dir {out_dir}" cmd += f" --ip-config {ip_config}" cmd += " --ssh-port 22" cmd += " --process-group-timeout 60" cmd += " --save-orig-nids" cmd += " --save-orig-eids" cmd += f" --graph-formats {graph_formats}" if graph_formats else "" os.system(cmd) # read original node/edge IDs def read_orig_ids(fname): orig_ids = {} for i in range(num_parts): ids_path = os.path.join(out_dir, f"part{i}", fname) part_ids = load_tensors(ids_path) for type, data in part_ids.items(): if type not in orig_ids: orig_ids[type] = data else: orig_ids[type] = torch.cat((orig_ids[type], data)) return orig_ids orig_nids = read_orig_ids("orig_nids.dgl") orig_eids = read_orig_ids("orig_eids.dgl") # load partitions and verify part_config = os.path.join(out_dir, "metadata.json") for i in range(num_parts): part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition( part_config, i ) verify_partition_data_types(part_g) verify_partition_formats(part_g, graph_formats) verify_graph_feats( g, gpb, part_g, node_feats, edge_feats, orig_nids, orig_eids ) def test_utils_generate_read_list(): read_list = generate_read_list(10, 4) assert np.array_equal(read_list[0], np.array([0, 1, 2])) assert np.array_equal(read_list[1], np.array([3, 4, 5])) assert np.array_equal(read_list[2], np.array([6, 7])) assert np.array_equal(read_list[3], np.array([8, 9])) ================================================ FILE: tests/tools/test_dist_partition_graphbolt.py ================================================ import json import os import tempfile import dgl import dgl.backend as F import dgl.graphbolt as gb import numpy as np import pyarrow.parquet as pq import pytest import torch from dgl.data.utils import load_graphs, load_tensors from dgl.distributed.partition import ( _etype_str_to_tuple, _etype_tuple_to_str, _get_inner_edge_mask, _get_inner_node_mask, load_partition, RESERVED_FIELD_DTYPE, ) from distpartitioning import array_readwriter from distpartitioning.utils import generate_read_list from pytest_utils import create_chunked_dataset def _verify_metadata_gb(gpb, g, num_parts, part_id, part_sizes): """ check list: make sure the number of nodes and edges is correct. make sure the number of parts is correct. make sure the number of nodes and edges in each part is corrcet. """ assert gpb._num_nodes() == g.num_nodes() assert gpb._num_edges() == g.num_edges() assert gpb.num_partitions() == num_parts gpb_meta = gpb.metadata() assert len(gpb_meta) == num_parts assert len(gpb.partid2nids(part_id)) == gpb_meta[part_id]["num_nodes"] assert len(gpb.partid2eids(part_id)) == gpb_meta[part_id]["num_edges"] part_sizes.append( (gpb_meta[part_id]["num_nodes"], gpb_meta[part_id]["num_edges"]) ) def _verify_local_id_gb(part_g, part_id, gpb): """ check list: make sure the type of local id is correct. make sure local id have a right order. """ nid = F.boolean_mask( part_g.node_attributes[dgl.NID], part_g.node_attributes["inner_node"], ) local_nid = gpb.nid2localnid(nid, part_id) assert F.dtype(local_nid) in (F.int64, F.int32) assert np.all(F.asnumpy(local_nid) == np.arange(0, len(local_nid))) eid = F.boolean_mask( part_g.edge_attributes[dgl.EID], part_g.edge_attributes["inner_edge"], ) local_eid = gpb.eid2localeid(eid, part_id) assert F.dtype(local_eid) in (F.int64, F.int32) assert np.all(np.sort(F.asnumpy(local_eid)) == np.arange(0, len(local_eid))) return local_nid, local_eid def _verify_map_gb( part_g, part_id, gpb, ): """ check list: make sure the map node and its data type is correct. """ # Check the node map. local_nodes = F.boolean_mask( part_g.node_attributes[dgl.NID], part_g.node_attributes["inner_node"], ) inner_node_index = F.nonzero_1d(part_g.node_attributes["inner_node"]) mapping_nodes = gpb.partid2nids(part_id) assert F.dtype(mapping_nodes) in (F.int32, F.int64) assert np.all( np.sort(F.asnumpy(local_nodes)) == np.sort(F.asnumpy(mapping_nodes)) ) assert np.all( F.asnumpy(inner_node_index) == np.arange(len(inner_node_index)) ) # Check the edge map. local_edges = F.boolean_mask( part_g.edge_attributes[dgl.EID], part_g.edge_attributes["inner_edge"], ) inner_edge_index = F.nonzero_1d(part_g.edge_attributes["inner_edge"]) mapping_edges = gpb.partid2eids(part_id) assert F.dtype(mapping_edges) in (F.int32, F.int64) assert np.all( np.sort(F.asnumpy(local_edges)) == np.sort(F.asnumpy(mapping_edges)) ) assert np.all( F.asnumpy(inner_edge_index) == np.arange(len(inner_edge_index)) ) return local_nodes, local_edges def _verify_local_and_map_id_gb( part_g, part_id, gpb, store_inner_node, store_inner_edge, store_eids, ): """ check list: make sure local id are correct. make sure mapping id are correct. """ if store_inner_node and store_inner_edge and store_eids: _verify_local_id_gb(part_g, part_id, gpb) _verify_map_gb(part_g, part_id, gpb) def _get_part_IDs(part_g): # These are partition-local IDs. num_columns = part_g.csc_indptr.diff() part_src_ids = part_g.indices part_dst_ids = torch.arange(part_g.total_num_nodes).repeat_interleave( num_columns ) # These are reshuffled global homogeneous IDs. part_src_ids = F.gather_row(part_g.node_attributes[dgl.NID], part_src_ids) part_dst_ids = F.gather_row(part_g.node_attributes[dgl.NID], part_dst_ids) return part_src_ids, part_dst_ids def _verify_node_type_ID_gb(part_g, gpb): """ check list: make sure ntype id have correct data type """ part_src_ids, part_dst_ids = _get_part_IDs(part_g) # These are reshuffled per-type IDs. src_ntype_ids, part_src_ids = gpb.map_to_per_ntype(part_src_ids) dst_ntype_ids, part_dst_ids = gpb.map_to_per_ntype(part_dst_ids) # `IdMap` is in int64 by default. assert src_ntype_ids.dtype == F.int64 assert dst_ntype_ids.dtype == F.int64 with pytest.raises(dgl.utils.internal.InconsistentDtypeException): gpb.map_to_per_ntype(F.tensor([0], F.int32)) with pytest.raises(dgl.utils.internal.InconsistentDtypeException): gpb.map_to_per_etype(F.tensor([0], F.int32)) return ( part_src_ids, part_dst_ids, src_ntype_ids, part_src_ids, dst_ntype_ids, ) def _verify_orig_edge_IDs_gb( g, orig_nids, orig_eids, part_eids, part_src_ids, part_dst_ids, src_ntype=None, dst_ntype=None, etype=None, ): """ check list: make sure orig edge id are correct after """ if src_ntype is not None and dst_ntype is not None: orig_src_nid = orig_nids[src_ntype] orig_dst_nid = orig_nids[dst_ntype] else: orig_src_nid = orig_nids orig_dst_nid = orig_nids orig_src_ids = F.gather_row(orig_src_nid, part_src_ids) orig_dst_ids = F.gather_row(orig_dst_nid, part_dst_ids) if etype is not None: orig_eids = orig_eids[etype] orig_eids1 = F.gather_row(orig_eids, part_eids) orig_eids2 = g.edge_ids(orig_src_ids, orig_dst_ids, etype=etype) assert len(orig_eids1) == len(orig_eids2) assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2)) def _verify_orig_IDs_gb( part_g, gpb, g, is_homo=False, part_src_ids=None, part_dst_ids=None, src_ntype_ids=None, dst_ntype_ids=None, orig_nids=None, orig_eids=None, ): """ check list: make sure orig edge id are correct. make sure hetero ntype id are correct. """ part_eids = part_g.edge_attributes[dgl.EID] if is_homo: _verify_orig_edge_IDs_gb( g, orig_nids, orig_eids, part_eids, part_src_ids, part_dst_ids ) local_orig_nids = orig_nids[part_g.node_attributes[dgl.NID]] local_orig_eids = orig_eids[part_g.edge_attributes[dgl.EID]] part_g.node_attributes["feats"] = F.gather_row( g.ndata["feats"], local_orig_nids ) part_g.edge_attributes["feats"] = F.gather_row( g.edata["feats"], local_orig_eids ) else: etype_ids, part_eids = gpb.map_to_per_etype(part_eids) # `IdMap` is in int64 by default. assert etype_ids.dtype == F.int64 # These are original per-type IDs. for etype_id, etype in enumerate(g.canonical_etypes): part_src_ids1 = F.boolean_mask(part_src_ids, etype_ids == etype_id) src_ntype_ids1 = F.boolean_mask( src_ntype_ids, etype_ids == etype_id ) part_dst_ids1 = F.boolean_mask(part_dst_ids, etype_ids == etype_id) dst_ntype_ids1 = F.boolean_mask( dst_ntype_ids, etype_ids == etype_id ) part_eids1 = F.boolean_mask(part_eids, etype_ids == etype_id) assert np.all(F.asnumpy(src_ntype_ids1 == src_ntype_ids1[0])) assert np.all(F.asnumpy(dst_ntype_ids1 == dst_ntype_ids1[0])) src_ntype = g.ntypes[F.as_scalar(src_ntype_ids1[0])] dst_ntype = g.ntypes[F.as_scalar(dst_ntype_ids1[0])] _verify_orig_edge_IDs_gb( g, orig_nids, orig_eids, part_eids1, part_src_ids1, part_dst_ids1, src_ntype, dst_ntype, etype, ) def _verify_constructed_id_gb(part_sizes, gpb): """ verify the part id of each node by constructed nids. check list: make sure each node' part id and its type are corect """ node_map = [] edge_map = [] for part_i, (num_nodes, num_edges) in enumerate(part_sizes): node_map.append(np.ones(num_nodes) * part_i) edge_map.append(np.ones(num_edges) * part_i) node_map = np.concatenate(node_map) edge_map = np.concatenate(edge_map) nid2pid = gpb.nid2partid(F.arange(0, len(node_map))) assert F.dtype(nid2pid) in (F.int32, F.int64) assert np.all(F.asnumpy(nid2pid) == node_map) eid2pid = gpb.eid2partid(F.arange(0, len(edge_map))) assert F.dtype(eid2pid) in (F.int32, F.int64) assert np.all(F.asnumpy(eid2pid) == edge_map) def _verify_IDs_gb( g, part_g, part_id, gpb, part_sizes, orig_nids, orig_eids, store_inner_node, store_inner_edge, store_eids, is_homo, ): # verify local id and mapping id _verify_local_and_map_id_gb( part_g, part_id, gpb, store_inner_node, store_inner_edge, store_eids, ) # Verify the mapping between the reshuffled IDs and the original IDs. ( part_src_ids, part_dst_ids, src_ntype_ids, part_src_ids, dst_ntype_ids, ) = _verify_node_type_ID_gb(part_g, gpb) if store_eids: _verify_orig_IDs_gb( part_g, gpb, g, part_src_ids=part_src_ids, part_dst_ids=part_dst_ids, src_ntype_ids=src_ntype_ids, dst_ntype_ids=dst_ntype_ids, orig_nids=orig_nids, orig_eids=orig_eids, is_homo=is_homo, ) _verify_constructed_id_gb(part_sizes, gpb) def _collect_data_gb( parts, part_g, gpbs, gpb, tot_node_feats, node_feats, tot_edge_feats, edge_feats, shuffled_labels, shuffled_edata, test_ntype, test_etype, ): if test_ntype != None: shuffled_labels.append(node_feats[test_ntype + "/label"]) shuffled_edata.append( edge_feats[_etype_tuple_to_str(test_etype) + "/count"] ) else: shuffled_labels.append(node_feats["_N/labels"]) shuffled_edata.append(edge_feats["_N:_E:_N/feats"]) parts.append(part_g) gpbs.append(gpb) tot_node_feats.append(node_feats) tot_edge_feats.append(edge_feats) def _verify_node_feats(g, part, gpb, orig_nids, node_feats, is_homo=False): for ntype in g.ntypes: ndata = ( part.node_attributes if isinstance(part, gb.FusedCSCSamplingGraph) else part.ndata ) ntype_id = g.get_ntype_id(ntype) inner_node_mask = _get_inner_node_mask( part, ntype_id, (gpb if isinstance(part, gb.FusedCSCSamplingGraph) else None), ) inner_nids = F.boolean_mask(ndata[dgl.NID], inner_node_mask) ntype_ids, inner_type_nids = gpb.map_to_per_ntype(inner_nids) partid = gpb.nid2partid(inner_type_nids, ntype) if is_homo: assert np.all(F.asnumpy(ntype_ids) == ntype_id) assert np.all(F.asnumpy(partid) == gpb.partid) if is_homo: orig_id = orig_nids[inner_type_nids] else: orig_id = orig_nids[ntype][inner_type_nids] local_nids = gpb.nid2localnid(inner_type_nids, gpb.partid, ntype) for name in g.nodes[ntype].data: if name in [dgl.NID, "inner_node"]: continue true_feats = F.gather_row(g.nodes[ntype].data[name], orig_id) ndata = F.gather_row(node_feats[ntype + "/" + name], local_nids) assert np.all(F.asnumpy(ndata == true_feats)) def _verify_edge_feats(g, part, gpb, orig_eids, edge_feats, is_homo=False): for etype in g.canonical_etypes: edata = ( part.edge_attributes if isinstance(part, gb.FusedCSCSamplingGraph) else part.edata ) etype_id = g.get_etype_id(etype) inner_edge_mask = _get_inner_edge_mask(part, etype_id) inner_eids = F.boolean_mask(edata[dgl.EID], inner_edge_mask) etype_ids, inner_type_eids = gpb.map_to_per_etype(inner_eids) partid = gpb.eid2partid(inner_type_eids, etype) assert np.all(F.asnumpy(etype_ids) == etype_id) assert np.all(F.asnumpy(partid) == gpb.partid) if is_homo: orig_id = orig_eids[inner_type_eids] else: orig_id = orig_eids[etype][inner_type_eids] local_eids = gpb.eid2localeid(inner_type_eids, gpb.partid, etype) for name in g.edges[etype].data: if name in [dgl.EID, "inner_edge"]: continue true_feats = F.gather_row(g.edges[etype].data[name], orig_id) edata = F.gather_row( edge_feats[_etype_tuple_to_str(etype) + "/" + name], local_eids, ) assert np.all(F.asnumpy(edata == true_feats)) def _verify_shuffled_labels_gb( g, shuffled_labels, shuffled_edata, orig_nids, orig_eids, test_ntype=None, test_etype=None, ): """ check list: make sure node data are correct. make sure edge data are correct. """ shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0)) shuffled_edata = F.asnumpy(F.cat(shuffled_edata, 0)) orig_labels = np.zeros(shuffled_labels.shape, dtype=shuffled_labels.dtype) orig_edata = np.zeros(shuffled_edata.shape, dtype=shuffled_edata.dtype) orig_nid = orig_nids if test_ntype is None else orig_nids[test_ntype] orig_eid = orig_eids if test_etype is None else orig_eids[test_etype] nlabel = ( g.ndata["labels"] if test_ntype is None else g.nodes[test_ntype].data["label"] ) edata = ( g.edata["feats"] if test_etype is None else g.edges[test_etype].data["count"] ) orig_labels[F.asnumpy(orig_nid)] = shuffled_labels orig_edata[F.asnumpy(orig_eid)] = shuffled_edata assert np.all(orig_labels == F.asnumpy(nlabel)) assert np.all(orig_edata == F.asnumpy(edata)) def verify_graph_feats_gb( g, gpbs, parts, tot_node_feats, tot_edge_feats, orig_nids, orig_eids, shuffled_labels, shuffled_edata, test_ntype, test_etype, store_inner_node=False, store_inner_edge=False, store_eids=False, is_homo=False, ): """ check list: make sure the feats of nodes and edges are correct """ for part_id in range(len(parts)): part = parts[part_id] gpb = gpbs[part_id] node_feats = tot_node_feats[part_id] edge_feats = tot_edge_feats[part_id] if store_inner_node: _verify_node_feats( g, part, gpb, orig_nids, node_feats, is_homo=is_homo, ) if store_inner_edge and store_eids: _verify_edge_feats( g, part, gpb, orig_eids, edge_feats, is_homo=is_homo, ) _verify_shuffled_labels_gb( g, shuffled_labels, shuffled_edata, orig_nids, orig_eids, test_ntype, test_etype, ) def _verify_graphbolt_attributes( parts, store_inner_node, store_inner_edge, store_eids ): """ check list: make sure arguments work. """ for part in parts: assert store_inner_edge == ("inner_edge" in part.edge_attributes) assert store_inner_node == ("inner_node" in part.node_attributes) assert store_eids == (dgl.EID in part.edge_attributes) def _verify_graphbolt_part( g, test_dir, orig_nids, orig_eids, graph_name, num_parts, store_inner_node, store_inner_edge, store_eids, part_config=None, test_ntype=None, test_etype=None, is_homo=False, ): """ check list: _verify_metadata_gb: data type, ID's order and ID's number of edges and nodes _verify_IDs_gb: local id, mapping id,node type id, orig edge, hetero ntype id verify_graph_feats_gb: nodes and edges' feats _verify_graphbolt_attributes: arguments """ parts = [] tot_node_feats = [] tot_edge_feats = [] shuffled_labels = [] shuffled_edata = [] part_sizes = [] gpbs = [] if part_config is None: part_config = os.path.join(test_dir, f"{graph_name}.json") # test each part for part_id in range(num_parts): part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition( part_config, part_id, load_feats=True, use_graphbolt=True ) # verify metadata _verify_metadata_gb( gpb, g, num_parts, part_id, part_sizes, ) # verify eid and nid _verify_IDs_gb( g, part_g, part_id, gpb, part_sizes, orig_nids, orig_eids, store_inner_node, store_inner_edge, store_eids, is_homo, ) # collect shuffled data and parts _collect_data_gb( parts, part_g, gpbs, gpb, tot_node_feats, node_feats, tot_edge_feats, edge_feats, shuffled_labels, shuffled_edata, test_ntype, test_etype, ) # verify graph feats verify_graph_feats_gb( g, gpbs, parts, tot_node_feats, tot_edge_feats, orig_nids, orig_eids, shuffled_labels=shuffled_labels, shuffled_edata=shuffled_edata, test_ntype=test_ntype, test_etype=test_etype, store_inner_node=store_inner_node, store_inner_edge=store_inner_edge, store_eids=store_eids, is_homo=is_homo, ) _verify_graphbolt_attributes( parts, store_inner_node, store_inner_edge, store_eids ) return parts def _verify_hetero_graph_node_edge_num( g, parts, store_inner_edge, debug_mode, ): """ check list: make sure edge type are correct. make sure the number of nodes in each node type are correct. make sure the number of nodes in each node type are correct. """ num_nodes = {ntype: 0 for ntype in g.ntypes} num_edges = {etype: 0 for etype in g.canonical_etypes} for part in parts: edata = ( part.edge_attributes if isinstance(part, gb.FusedCSCSamplingGraph) else part.edata ) if dgl.ETYPE in edata: assert len(g.canonical_etypes) == len(F.unique(edata[dgl.ETYPE])) if debug_mode or isinstance(part, dgl.DGLGraph): for ntype in g.ntypes: ntype_id = g.get_ntype_id(ntype) inner_node_mask = _get_inner_node_mask(part, ntype_id) num_inner_nodes = F.sum(F.astype(inner_node_mask, F.int64), 0) num_nodes[ntype] += num_inner_nodes if store_inner_edge or isinstance(part, dgl.DGLGraph): for etype in g.canonical_etypes: etype_id = g.get_etype_id(etype) inner_edge_mask = _get_inner_edge_mask(part, etype_id) num_inner_edges = F.sum(F.astype(inner_edge_mask, F.int64), 0) num_edges[etype] += num_inner_edges # Verify the number of nodes are correct. if debug_mode or isinstance(part, dgl.DGLGraph): for ntype in g.ntypes: print( "node {}: {}, {}".format( ntype, g.num_nodes(ntype), num_nodes[ntype] ) ) assert g.num_nodes(ntype) == num_nodes[ntype] # Verify the number of edges are correct. if store_inner_edge or isinstance(part, dgl.DGLGraph): for etype in g.canonical_etypes: print( "edge {}: {}, {}".format( etype, g.num_edges(etype), num_edges[etype] ) ) assert g.num_edges(etype) == num_edges[etype] def _verify_edge_id_range_hetero( g, part, eids, ): """ check list: make sure inner_eids fall into a range. make sure all edges are included. """ edata = ( part.edge_attributes if isinstance(part, gb.FusedCSCSamplingGraph) else part.edata ) etype = ( part.type_per_edge if isinstance(part, gb.FusedCSCSamplingGraph) else edata[dgl.ETYPE] ) eid = torch.arange(len(edata[dgl.EID])) etype_arr = F.gather_row(etype, eid) eid_arr = F.gather_row(edata[dgl.EID], eid) for etype in g.canonical_etypes: etype_id = g.get_etype_id(etype) eids[etype].append(F.boolean_mask(eid_arr, etype_arr == etype_id)) # Make sure edge Ids fall into a range. inner_edge_mask = _get_inner_edge_mask(part, etype_id) inner_eids = np.sort( F.asnumpy(F.boolean_mask(edata[dgl.EID], inner_edge_mask)) ) assert np.all( inner_eids == np.arange(inner_eids[0], inner_eids[-1] + 1) ) return eids def _verify_node_id_range_hetero(g, part, nids): """ check list: make sure inner nodes have Ids fall into a range. """ for ntype in g.ntypes: ntype_id = g.get_ntype_id(ntype) # Make sure inner nodes have Ids fall into a range. inner_node_mask = _get_inner_node_mask(part, ntype_id) inner_nids = F.boolean_mask( part.node_attributes[dgl.NID], inner_node_mask ) assert np.all( F.asnumpy( inner_nids == F.arange( F.as_scalar(inner_nids[0]), F.as_scalar(inner_nids[-1]) + 1, ) ) ) nids[ntype].append(inner_nids) return nids def _verify_graph_attributes_hetero( g, parts, store_inner_edge, store_inner_node, ): """ check list: make sure edge ids fall into a range. make sure inner nodes have Ids fall into a range. make sure all nodes is included. make sure all edges is included. """ nids = {ntype: [] for ntype in g.ntypes} eids = {etype: [] for etype in g.canonical_etypes} # check edge id. if store_inner_edge or isinstance(parts[0], dgl.DGLGraph): for part in parts: # collect eids eids = _verify_edge_id_range_hetero(g, part, eids) for etype in eids: eids_type = F.cat(eids[etype], 0) uniq_ids = F.unique(eids_type) # We should get all nodes. assert len(uniq_ids) == g.num_edges(etype) # check node id. if store_inner_node or isinstance(parts[0], dgl.DGLGraph): for part in parts: nids = _verify_node_id_range_hetero(g, part, nids) for ntype in nids: nids_type = F.cat(nids[ntype], 0) uniq_ids = F.unique(nids_type) # We should get all nodes. assert len(uniq_ids) == g.num_nodes(ntype) def _verify_hetero_graph( g, parts, store_eids=False, store_inner_edge=False, store_inner_node=False, debug_mode=False, ): _verify_hetero_graph_node_edge_num( g, parts, store_inner_edge=store_inner_edge, debug_mode=debug_mode, ) if store_eids: _verify_graph_attributes_hetero( g, parts, store_inner_edge=store_inner_edge, store_inner_node=store_inner_node, ) def _test_pipeline_graphbolt( num_chunks, num_parts, world_size, graph_formats=None, data_fmt="numpy", num_chunks_nodes=None, num_chunks_edges=None, num_chunks_node_data=None, num_chunks_edge_data=None, use_verify_partitions=False, store_eids=True, store_inner_edge=True, store_inner_node=True, ): if num_parts % world_size != 0: # num_parts should be a multiple of world_size return with tempfile.TemporaryDirectory() as root_dir: g = create_chunked_dataset( root_dir, num_chunks, data_fmt=data_fmt, num_chunks_nodes=num_chunks_nodes, num_chunks_edges=num_chunks_edges, num_chunks_node_data=num_chunks_node_data, num_chunks_edge_data=num_chunks_edge_data, ) graph_name = "test" test_ntype = "paper" test_etype = ("paper", "cites", "paper") # Step1: graph partition in_dir = os.path.join(root_dir, "chunked-data") output_dir = os.path.join(root_dir, "parted_data") os.system( "python3 tools/partition_algo/random_partition.py " "--in_dir {} --out_dir {} --num_partitions {}".format( in_dir, output_dir, num_parts ) ) for ntype in ["author", "institution", "paper"]: fname = os.path.join(output_dir, "{}.txt".format(ntype)) with open(fname, "r") as f: header = f.readline().rstrip() assert isinstance(int(header), int) # Step2: data dispatch partition_dir = os.path.join(root_dir, "parted_data") out_dir = os.path.join(root_dir, "partitioned") ip_config = os.path.join(root_dir, "ip_config.txt") with open(ip_config, "w") as f: for i in range(world_size): f.write(f"127.0.0.{i + 1}\n") cmd = "python3 tools/dispatch_data.py " cmd += f" --in-dir {in_dir} " cmd += f" --partitions-dir {partition_dir} " cmd += f" --out-dir {out_dir} " cmd += f" --ip-config {ip_config} " cmd += " --ssh-port 22 " cmd += " --process-group-timeout 60 " cmd += " --save-orig-nids " cmd += " --save-orig-eids " cmd += " --use-graphbolt " cmd += f" --graph-formats {graph_formats} " if graph_formats else "" if store_eids: cmd += " --store-eids " if store_inner_edge: cmd += " --store-inner-edge " if store_inner_node: cmd += " --store-inner-node " os.system(cmd) # check if verify_partitions.py is used for validation. if use_verify_partitions: cmd = "python3 tools/verify_partitions.py " cmd += f" --orig-dataset-dir {in_dir}" cmd += f" --part-graph {out_dir}" cmd += f" --partitions-dir {output_dir}" os.system(cmd) return # read original node/edge IDs def read_orig_ids(fname): orig_ids = {} for i in range(num_parts): ids_path = os.path.join(out_dir, f"part{i}", fname) part_ids = load_tensors(ids_path) for type, data in part_ids.items(): if type not in orig_ids: orig_ids[type] = data else: orig_ids[type] = torch.cat((orig_ids[type], data)) return orig_ids orig_nids, orig_eids = None, None orig_nids = read_orig_ids("orig_nids.dgl") orig_eids_str = read_orig_ids("orig_eids.dgl") orig_eids = {} # transmit etype from string to tuple. for etype, eids in orig_eids_str.items(): orig_eids[_etype_str_to_tuple(etype)] = eids # load partitions and verify part_config = os.path.join(out_dir, "metadata.json") parts = _verify_graphbolt_part( g, root_dir, orig_nids, orig_eids, graph_name, num_parts, store_inner_node, store_inner_edge, store_eids, test_ntype=test_ntype, test_etype=test_etype, part_config=part_config, is_homo=False, ) _verify_hetero_graph( g, parts, store_eids=store_eids, store_inner_edge=store_inner_edge, ) @pytest.mark.parametrize( "num_chunks, num_parts, world_size", [[4, 4, 4], [8, 4, 2], [8, 4, 4], [9, 6, 3], [11, 11, 1], [11, 4, 1]], ) def test_pipeline_basics(num_chunks, num_parts, world_size): _test_pipeline_graphbolt( num_chunks, num_parts, world_size, ) _test_pipeline_graphbolt( num_chunks, num_parts, world_size, use_verify_partitions=False ) @pytest.mark.parametrize("store_inner_node", [True, False]) @pytest.mark.parametrize("store_inner_edge", [True, False]) @pytest.mark.parametrize("store_eids", [True, False]) def test_pipeline_attributes(store_inner_node, store_inner_edge, store_eids): _test_pipeline_graphbolt( 4, 4, 4, store_inner_node=store_inner_node, store_inner_edge=store_inner_edge, store_eids=store_eids, ) @pytest.mark.parametrize( "num_chunks, " "num_parts, " "world_size, " "num_chunks_node_data, " "num_chunks_edge_data", [ # Test cases where no. of chunks more than # no. of partitions [8, 4, 4, 8, 8], [8, 4, 2, 8, 8], [9, 7, 5, 9, 9], [8, 8, 4, 8, 8], # Test cases where no. of chunks smaller # than no. of partitions [7, 8, 4, 7, 7], [1, 8, 4, 1, 1], [1, 4, 4, 1, 1], [3, 4, 4, 3, 3], [1, 4, 2, 1, 1], [3, 4, 2, 3, 3], [1, 5, 3, 1, 1], ], ) def test_pipeline_arbitrary_chunks( num_chunks, num_parts, world_size, num_chunks_node_data, num_chunks_edge_data, ): _test_pipeline_graphbolt( num_chunks, num_parts, world_size, num_chunks_node_data=num_chunks_node_data, num_chunks_edge_data=num_chunks_edge_data, ) @pytest.mark.parametrize("data_fmt", ["numpy", "parquet"]) def test_pipeline_feature_format(data_fmt): _test_pipeline_graphbolt(4, 4, 4, data_fmt=data_fmt) ================================================ FILE: tests/tools/test_launch.py ================================================ import json import os import tempfile import unittest from launch import * class TestWrapUdfInTorchDistLauncher(unittest.TestCase): """wrap_udf_in_torch_dist_launcher()""" def test_simple(self): # test that a simple udf_command is correctly wrapped udf_command = "python3.7 path/to/some/trainer.py arg1 arg2" wrapped_udf_command = wrap_udf_in_torch_dist_launcher( udf_command=udf_command, num_trainers=2, num_nodes=2, node_rank=1, master_addr="127.0.0.1", master_port=1234, ) expected = ( "python3.7 -m torch.distributed.run " "--nproc_per_node=2 --nnodes=2 --node_rank=1 --master_addr=127.0.0.1 " "--master_port=1234 path/to/some/trainer.py arg1 arg2" ) self.assertEqual(wrapped_udf_command, expected) def test_chained_udf(self): # test that a chained udf_command is properly handled udf_command = ( "cd path/to && python3.7 path/to/some/trainer.py arg1 arg2" ) wrapped_udf_command = wrap_udf_in_torch_dist_launcher( udf_command=udf_command, num_trainers=2, num_nodes=2, node_rank=1, master_addr="127.0.0.1", master_port=1234, ) expected = ( "cd path/to && python3.7 -m torch.distributed.run " "--nproc_per_node=2 --nnodes=2 --node_rank=1 --master_addr=127.0.0.1 " "--master_port=1234 path/to/some/trainer.py arg1 arg2" ) self.assertEqual(wrapped_udf_command, expected) def test_py_versions(self): # test that this correctly handles different py versions/binaries py_binaries = ( "python3.7", "python3.8", "python3.9", "python3", "python", ) udf_command = "{python_bin} path/to/some/trainer.py arg1 arg2" for py_bin in py_binaries: wrapped_udf_command = wrap_udf_in_torch_dist_launcher( udf_command=udf_command.format(python_bin=py_bin), num_trainers=2, num_nodes=2, node_rank=1, master_addr="127.0.0.1", master_port=1234, ) expected = ( "{python_bin} -m torch.distributed.run ".format( python_bin=py_bin ) + "--nproc_per_node=2 --nnodes=2 --node_rank=1 --master_addr=127.0.0.1 " "--master_port=1234 path/to/some/trainer.py arg1 arg2" ) self.assertEqual(wrapped_udf_command, expected) class TestWrapCmdWithLocalEnvvars(unittest.TestCase): """wrap_cmd_with_local_envvars()""" def test_simple(self): self.assertEqual( wrap_cmd_with_local_envvars("ls && pwd", "VAR1=value1 VAR2=value2"), "(export VAR1=value1 VAR2=value2; ls && pwd)", ) class TestConstructDglServerEnvVars(unittest.TestCase): """construct_dgl_server_env_vars()""" def test_simple(self): self.assertEqual( construct_dgl_server_env_vars( num_samplers=2, num_server_threads=3, tot_num_clients=4, part_config="path/to/part.config", ip_config="path/to/ip.config", num_servers=5, graph_format="csc", ), ( "DGL_ROLE=server " "DGL_NUM_SAMPLER=2 " "OMP_NUM_THREADS=3 " "DGL_NUM_CLIENT=4 " "DGL_CONF_PATH=path/to/part.config " "DGL_IP_CONFIG=path/to/ip.config " "DGL_NUM_SERVER=5 " "DGL_GRAPH_FORMAT=csc " ), ) class TestConstructDglClientEnvVars(unittest.TestCase): """construct_dgl_client_env_vars()""" def test_simple(self): # with pythonpath self.assertEqual( construct_dgl_client_env_vars( num_samplers=1, tot_num_clients=2, part_config="path/to/part.config", ip_config="path/to/ip.config", num_servers=3, graph_format="csc", num_omp_threads=4, group_id=0, pythonpath="some/pythonpath/", ), ( "DGL_DIST_MODE=distributed " "DGL_ROLE=client " "DGL_NUM_SAMPLER=1 " "DGL_NUM_CLIENT=2 " "DGL_CONF_PATH=path/to/part.config " "DGL_IP_CONFIG=path/to/ip.config " "DGL_NUM_SERVER=3 " "DGL_GRAPH_FORMAT=csc " "OMP_NUM_THREADS=4 " "DGL_GROUP_ID=0 " "PYTHONPATH=some/pythonpath/ " ), ) # without pythonpath self.assertEqual( construct_dgl_client_env_vars( num_samplers=1, tot_num_clients=2, part_config="path/to/part.config", ip_config="path/to/ip.config", num_servers=3, graph_format="csc", num_omp_threads=4, group_id=0, ), ( "DGL_DIST_MODE=distributed " "DGL_ROLE=client " "DGL_NUM_SAMPLER=1 " "DGL_NUM_CLIENT=2 " "DGL_CONF_PATH=path/to/part.config " "DGL_IP_CONFIG=path/to/ip.config " "DGL_NUM_SERVER=3 " "DGL_GRAPH_FORMAT=csc " "OMP_NUM_THREADS=4 " "DGL_GROUP_ID=0 " ), ) def test_submit_jobs(): class Args: pass args = Args() with tempfile.TemporaryDirectory() as test_dir: num_machines = 8 ip_config = os.path.join(test_dir, "ip_config.txt") with open(ip_config, "w") as f: for i in range(num_machines): f.write("{} {}\n".format("127.0.0." + str(i), 30050)) part_config = os.path.join(test_dir, "ogb-products.json") with open(part_config, "w") as f: json.dump({"num_parts": num_machines}, f) args.num_trainers = 8 args.num_samplers = 1 args.num_servers = 4 args.workspace = test_dir args.part_config = "ogb-products.json" args.ip_config = "ip_config.txt" args.num_server_threads = 1 args.graph_format = "csc" args.extra_envs = ["NCCL_DEBUG=INFO"] args.num_omp_threads = 1 udf_command = "python3 train_dist.py --num_epochs 10" clients_cmd, servers_cmd = submit_jobs(args, udf_command, dry_run=True) def common_checks(): assert "cd " + test_dir in cmd assert "export " + args.extra_envs[0] in cmd assert f"DGL_NUM_SAMPLER={args.num_samplers}" in cmd assert ( f"DGL_NUM_CLIENT={args.num_trainers*(args.num_samplers+1)*num_machines}" in cmd ) assert f"DGL_CONF_PATH={args.part_config}" in cmd assert f"DGL_IP_CONFIG={args.ip_config}" in cmd assert f"DGL_NUM_SERVER={args.num_servers}" in cmd assert f"DGL_GRAPH_FORMAT={args.graph_format}" in cmd assert f"OMP_NUM_THREADS={args.num_omp_threads}" in cmd assert udf_command[len("python3 ") :] in cmd for cmd in clients_cmd: common_checks() assert "DGL_DIST_MODE=distributed" in cmd assert "DGL_ROLE=client" in cmd assert "DGL_GROUP_ID=0" in cmd assert ( f"python3 -m torch.distributed.run --nproc_per_node={args.num_trainers} --nnodes={num_machines}" in cmd ) assert "--master_addr=127.0.0" in cmd assert "--master_port=1234" in cmd for cmd in servers_cmd: common_checks() assert "DGL_ROLE=server" in cmd assert "DGL_SERVER_ID=" in cmd if __name__ == "__main__": unittest.main() ================================================ FILE: tests/tools/test_parmetis.py ================================================ import argparse import json import os import sys import tempfile import unittest import dgl import numpy as np import torch from dgl.data.utils import load_graphs, load_tensors from partition_algo.base import load_partition_meta from pytest_utils import create_chunked_dataset """ TODO: skipping this test case since the dependency, mpirun, is not yet configured in the CI framework. """ @unittest.skipIf(True, reason="mpi is not available in CI test framework.") def test_parmetis_preprocessing(): with tempfile.TemporaryDirectory() as root_dir: num_chunks = 2 g = create_chunked_dataset(root_dir, num_chunks) # Trigger ParMETIS pre-processing here. input_dir = os.path.join(root_dir, "chunked-data") results_dir = os.path.join(root_dir, "parmetis-data") os.system( f"mpirun -np {num_chunks} python3 tools/distpartitioning/parmetis_preprocess.py " f"--schema {metadata.json} " f"--input_dir {input_dir} " f"--output_dir {results_dir} " f"--num_parts {num_chunks}" ) # Now add all the tests and check whether the test has passed or failed. # Read parmetis_nfiles and ensure all files are present. parmetis_data_dir = os.path.join(root_dir, "parmetis-data") assert os.path.isdir(parmetis_data_dir) parmetis_nodes_file = os.path.join( parmetis_data_dir, "parmetis_nfiles.txt" ) assert os.path.isfile(parmetis_nodes_file) # `parmetis_nfiles.txt` should have each line in the following format. # with open(parmetis_nodes_file, "r") as nodes_metafile: lines = nodes_metafile.readlines() total_node_count = 0 for line in lines: tokens = line.split(" ") assert len(tokens) == 3 assert os.path.isfile(tokens[0]) assert int(tokens[1]) == total_node_count # check contents of each of the nodes files here with open(tokens[0], "r") as nodes_file: node_lines = nodes_file.readlines() for line in node_lines: val = line.split(" ") # assert len(val) == 8 node_count = len(node_lines) total_node_count += node_count assert int(tokens[2]) == total_node_count # Meta_data object. output_dir = os.path.join(root_dir, "chunked-data") json_file = os.path.join(output_dir, "metadata.json") assert os.path.isfile(json_file) with open(json_file, "rb") as f: meta_data = json.load(f) # Count the total no. of nodes. true_node_count = 0 num_nodes_per_chunk = meta_data["num_nodes_per_chunk"] for i in range(len(num_nodes_per_chunk)): node_per_part = num_nodes_per_chunk[i] for j in range(len(node_per_part)): true_node_count += node_per_part[j] assert total_node_count == true_node_count # Read parmetis_efiles and ensure all files are present. # This file contains a list of filenames. parmetis_edges_file = os.path.join( parmetis_data_dir, "parmetis_efiles.txt" ) assert os.path.isfile(parmetis_edges_file) with open(parmetis_edges_file, "r") as edges_metafile: lines = edges_metafile.readlines() total_edge_count = 0 for line in lines: edges_filename = line.strip() assert os.path.isfile(edges_filename) with open(edges_filename, "r") as edges_file: edge_lines = edges_file.readlines() total_edge_count += len(edge_lines) for line in edge_lines: val = line.split(" ") assert len(val) == 2 # Count the total no. of edges true_edge_count = 0 num_edges_per_chunk = meta_data["num_edges_per_chunk"] for i in range(len(num_edges_per_chunk)): edges_per_part = num_edges_per_chunk[i] for j in range(len(edges_per_part)): true_edge_count += edges_per_part[j] assert true_edge_count == total_edge_count def test_parmetis_postprocessing(): with tempfile.TemporaryDirectory() as root_dir: num_chunks = 2 g = create_chunked_dataset(root_dir, num_chunks) num_nodes = g.num_nodes() num_institutions = g.num_nodes("institution") num_authors = g.num_nodes("author") num_papers = g.num_nodes("paper") # Generate random parmetis partition ids for the nodes in the graph. # Replace this code with actual ParMETIS executable when it is ready output_dir = os.path.join(root_dir, "chunked-data") assert os.path.isdir(output_dir) parmetis_file = os.path.join(output_dir, "parmetis_output.txt") node_ids = np.arange(num_nodes) partition_ids = np.random.randint(0, 2, (num_nodes,)) parmetis_output = np.column_stack([node_ids, partition_ids]) # Create parmetis output, this is mimicking running actual parmetis. with open(parmetis_file, "w") as f: np.savetxt(f, parmetis_output) assert os.path.isfile(parmetis_file) # Check the post processing script here. results_dir = os.path.join(output_dir, "partitions_dir") json_file = os.path.join(output_dir, "metadata.json") print(json_file) print(results_dir) print(parmetis_file) os.system( f"python3 tools/distpartitioning/parmetis_postprocess.py " f"--postproc_input_dir {output_dir} " f"--schema_file metadata.json " f"--parmetis_output_file {parmetis_file} " f"--partitions_dir {results_dir}" ) ntype_count = { "author": num_authors, "paper": num_papers, "institution": num_institutions, } for ntype_name in ["author", "paper", "institution"]: fname = os.path.join(results_dir, f"{ntype_name}.txt") print(fname) assert os.path.isfile(fname) # Load and check the partition ids in this file. part_ids = np.loadtxt(fname) assert part_ids.shape[0] == ntype_count[ntype_name] assert np.min(part_ids) == 0 assert np.max(part_ids) == 1 # check partition meta file part_meta_file = os.path.join(results_dir, "partition_meta.json") assert os.path.isfile(part_meta_file) part_meta = load_partition_meta(part_meta_file) assert part_meta.num_parts == 2 assert part_meta.algo_name == "metis" """ TODO: skipping this test case since it depends on the dependency, mpi, which is not yet configured in the CI framework. """ @unittest.skipIf(True, reason="mpi is not available in CI test framework.") def test_parmetis_wrapper(): with tempfile.TemporaryDirectory() as root_dir: num_chunks = 2 graph_name = "mag240m" g = create_chunked_dataset(root_dir, num_chunks) all_ntypes = g.ntypes all_etypes = g.etypes num_constraints = len(all_ntypes) + 3 num_institutions = g.num_nodes("institution") num_authors = g.num_nodes("author") num_papers = g.num_nodes("paper") # Trigger ParMETIS. schema_file = os.path.join(root_dir, "chunked-data/metadata.json") preproc_input_dir = os.path.join(root_dir, "chunked-data") preproc_output_dir = os.path.join( root_dir, "chunked-data/preproc_output_dir" ) parmetis_output_file = os.path.join( os.getcwd(), f"{graph_name}_part.{num_chunks}" ) partitions_dir = os.path.join(root_dir, "chunked-data/partitions_dir") hostfile = os.path.join(root_dir, "ip_config.txt") with open(hostfile, "w") as f: f.write("127.0.0.1\n") f.write("127.0.0.1\n") num_nodes = g.num_nodes() num_edges = g.num_edges() stats_file = f"{graph_name}_stats.txt" with open(stats_file, "w") as f: f.write(f"{num_nodes} {num_edges} {num_constraints}") os.system( f"python3 tools/distpartitioning/parmetis_wrapper.py " f"--schema_file {schema_file} " f"--preproc_input_dir {preproc_input_dir} " f"--preproc_output_dir {preproc_output_dir} " f"--hostfile {hostfile} " f"--num_parts {num_chunks} " f"--parmetis_output_file {parmetis_output_file} " f"--partitions_dir {partitions_dir} " ) print("Executing Done.") ntype_count = { "author": num_authors, "paper": num_papers, "institution": num_institutions, } for ntype_name in ["author", "paper", "institution"]: fname = os.path.join(partitions_dir, f"{ntype_name}.txt") print(fname) assert os.path.isfile(fname) # Load and check the partition ids in this file. part_ids = np.loadtxt(fname) assert part_ids.shape[0] == ntype_count[ntype_name] assert np.min(part_ids) == 0 assert np.max(part_ids) == (num_chunks - 1) ================================================ FILE: tests/tools/test_parmetis_preproc.py ================================================ import os import tempfile from collections import namedtuple import numpy as np import pytest from distpartitioning import array_readwriter, constants from distpartitioning.parmetis_preprocess import gen_edge_files from distpartitioning.utils import generate_roundrobin_read_list from numpy.testing import assert_array_equal NODE_TYPE = "n1" EDGE_TYPE = f"{NODE_TYPE}:e1:{NODE_TYPE}" def _read_file(fname, fmt_name, fmt_delimiter): """Read a file Parameters: ----------- fname : string filename of the input file to read fmt_name : string specifying whether it is a csv or a parquet file fmt_delimiter : string string specifying the delimiter used in the input file """ reader_fmt_meta = { "name": fmt_name, } if fmt_name == constants.STR_CSV: reader_fmt_meta["delimiter"] = fmt_delimiter data_df = array_readwriter.get_array_parser(**reader_fmt_meta).read(fname) return data_df def _get_test_data(edges_dir, num_chunks, edge_fmt, edge_fmt_del): """Creates unit test input which are a set of edge files in the following format "src_node_iddst_node_id" Parameters: ----------- edges_dir : str folder where edge files are stored num_chunks : int no. of files to create for each edge type edge_fmt : str, optional to specify whether this file is csv or parquet edge_fmt_del : str optional delimiter to use in the edges file Returns: -------- dict : dictionary created which represents the schema used for creating the input dataset """ schema = {} schema["num_nodes_per_type"] = [10] schema["edge_type"] = [EDGE_TYPE] schema["node_type"] = [NODE_TYPE] edges = {} edges[EDGE_TYPE] = {} edges[EDGE_TYPE]["format"] = {} edges[EDGE_TYPE]["format"]["name"] = edge_fmt edges[EDGE_TYPE]["format"]["delimiter"] = edge_fmt_del os.makedirs(edges_dir, exist_ok=True) fmt_meta = {"name": edge_fmt} if edge_fmt == "csv": fmt_meta["delimiter"] = edge_fmt_del edge_files = [] for idx in range(num_chunks): path = os.path.join(edges_dir, f"test_file_{idx}.{fmt_meta['name']}") array_parser = array_readwriter.get_array_parser(**fmt_meta) edge_data = ( np.array([np.arange(10), np.arange(10)]).reshape(10, 2) + 10 * idx ) array_parser.write(path, edge_data) edge_files.append(path) edges[EDGE_TYPE]["data"] = edge_files schema["edges"] = edges return schema @pytest.mark.parametrize("num_chunks, num_parts", [[4, 1], [4, 2], [4, 4]]) @pytest.mark.parametrize("edges_fmt", ["csv", "parquet"]) @pytest.mark.parametrize("edges_delimiter", [" ", ","]) def test_gen_edge_files(num_chunks, num_parts, edges_fmt, edges_delimiter): """Unit test case for the function tools/distpartitioning/parmetis_preprocess.py::gen_edge_files Parameters: ----------- num_chunks : int no. of chunks the input graph needs to be partititioned into num_parts : int no. of partitions edges_fmt : string specifying the storage format for the edge files edges_delimiter : string specifying the delimiter used in the edge files """ # Create the input dataset with tempfile.TemporaryDirectory() as root_dir: # Create expected environment for test input_dir = os.path.join(root_dir, "chunked-data") output_dir = os.path.join(root_dir, "preproc_dir") # Mock a parser object fn_params = namedtuple("fn_params", "input_dir output_dir num_parts") fn_params.input_dir = input_dir fn_params.output_dir = output_dir fn_params.num_parts = num_parts # Create test files and get corresponding file schema schema_map = _get_test_data( input_dir, num_chunks, edges_fmt, edges_delimiter ) edges_file_list = schema_map["edges"][EDGE_TYPE]["data"] # This is breaking encapsulation, but no other good way to get file list rank_assignments = generate_roundrobin_read_list( len(edges_file_list), num_parts ) # Get the global node id offsets for each node type # There is only one node-type in the test graph # which range from 0 thru 9. ntype_gnid_offset = {} ntype_gnid_offset[NODE_TYPE] = np.array([0, 10 * num_chunks]).reshape( 1, 2 ) # Iterate over no. of partitions for rank in range(num_parts): actual_results = gen_edge_files(rank, schema_map, fn_params) # Get the original files original_files = [ edges_file_list[file_idx] for file_idx in rank_assignments[rank] ] # Validate the results with the baseline results # Test 1. no. of files should have the same count per rank assert len(original_files) == len(actual_results) assert len(actual_results) > 0 # Test 2. Check the contents of each file and verify the # file contents match with the expected results. for actual_fname, original_fname in zip( actual_results, original_files ): # Check the actual file exists assert os.path.isfile(actual_fname) # Read both files and compare the edges # Here note that the src and dst end points are global_node_ids actual_data = _read_file(actual_fname, "csv", " ") expected_data = _read_file( original_fname, edges_fmt, edges_delimiter ) # Subtract the global node id offsets, so that we get type node ids # In the current unit test case, the graph has only one node-type. # and this means that type-node-ids are same as the global-node-ids. # Below two lines will take take into effect when the graphs have # more than one node type. actual_data[:, 0] -= ntype_gnid_offset[NODE_TYPE][0, 0] actual_data[:, 1] -= ntype_gnid_offset[NODE_TYPE][0, 0] # Verify that the contents are equal assert_array_equal(expected_data, actual_data) ================================================ FILE: tests/utils/__init__.py ================================================ import backend as F import pytest parametrize_idtype = pytest.mark.parametrize("idtype", [F.int32, F.int64]) from .checks import * from .graph_cases import get_cases ================================================ FILE: tests/utils/checks.py ================================================ import backend as F import dgl import pytest from dgl.base import is_internal_column __all__ = [ "check_fail", "assert_is_identical", "assert_is_identical_hetero", "check_graph_equal", ] def check_fail(fn, *args, **kwargs): try: fn(*args, **kwargs) return False except: return True def assert_is_identical(g, g2): assert g.num_nodes() == g2.num_nodes() src, dst = g.all_edges(order="eid") src2, dst2 = g2.all_edges(order="eid") assert F.array_equal(src, src2) assert F.array_equal(dst, dst2) assert len(g.ndata) == len(g2.ndata) assert len(g.edata) == len(g2.edata) for k in g.ndata: assert F.allclose(g.ndata[k], g2.ndata[k]) for k in g.edata: assert F.allclose(g.edata[k], g2.edata[k]) def assert_is_identical_hetero(g, g2, ignore_internal_data=False): assert g.ntypes == g2.ntypes assert g.canonical_etypes == g2.canonical_etypes # check if two metagraphs are identical for edges, features in g.metagraph().edges(keys=True).items(): assert g2.metagraph().edges(keys=True)[edges] == features # check if node ID spaces and feature spaces are equal for ntype in g.ntypes: assert g.num_nodes(ntype) == g2.num_nodes(ntype) if ignore_internal_data: for k in list(g.nodes[ntype].data.keys()): if is_internal_column(k): del g.nodes[ntype].data[k] for k in list(g2.nodes[ntype].data.keys()): if is_internal_column(k): del g2.nodes[ntype].data[k] assert len(g.nodes[ntype].data) == len(g2.nodes[ntype].data) for k in g.nodes[ntype].data: assert F.allclose(g.nodes[ntype].data[k], g2.nodes[ntype].data[k]) # check if edge ID spaces and feature spaces are equal for etype in g.canonical_etypes: src, dst = g.all_edges(etype=etype, order="eid") src2, dst2 = g2.all_edges(etype=etype, order="eid") assert F.array_equal(src, src2) assert F.array_equal(dst, dst2) if ignore_internal_data: for k in list(g.edges[etype].data.keys()): if is_internal_column(k): del g.edges[etype].data[k] for k in list(g2.edges[etype].data.keys()): if is_internal_column(k): del g2.edges[etype].data[k] assert len(g.edges[etype].data) == len(g2.edges[etype].data) for k in g.edges[etype].data: assert F.allclose(g.edges[etype].data[k], g2.edges[etype].data[k]) def check_graph_equal(g1, g2, *, check_idtype=True, check_feature=True): assert g1.device == g2.device if check_idtype: assert g1.idtype == g2.idtype assert g1.ntypes == g2.ntypes assert g1.etypes == g2.etypes assert g1.srctypes == g2.srctypes assert g1.dsttypes == g2.dsttypes assert g1.canonical_etypes == g2.canonical_etypes assert g1.batch_size == g2.batch_size # check if two metagraphs are identical for edges, features in g1.metagraph().edges(keys=True).items(): assert g2.metagraph().edges(keys=True)[edges] == features for nty in g1.ntypes: assert g1.num_nodes(nty) == g2.num_nodes(nty) assert F.allclose(g1.batch_num_nodes(nty), g2.batch_num_nodes(nty)) for ety in g1.canonical_etypes: assert g1.num_edges(ety) == g2.num_edges(ety) assert F.allclose(g1.batch_num_edges(ety), g2.batch_num_edges(ety)) src1, dst1, eid1 = g1.edges(etype=ety, form="all") src2, dst2, eid2 = g2.edges(etype=ety, form="all") if check_idtype: assert F.allclose(src1, src2) assert F.allclose(dst1, dst2) assert F.allclose(eid1, eid2) else: assert F.allclose(src1, F.astype(src2, g1.idtype)) assert F.allclose(dst1, F.astype(dst2, g1.idtype)) assert F.allclose(eid1, F.astype(eid2, g1.idtype)) if check_feature: for nty in g1.ntypes: if g1.num_nodes(nty) == 0: continue for feat_name in g1.nodes[nty].data.keys(): assert F.allclose( g1.nodes[nty].data[feat_name], g2.nodes[nty].data[feat_name] ) for ety in g1.canonical_etypes: if g1.num_edges(ety) == 0: continue for feat_name in g2.edges[ety].data.keys(): assert F.allclose( g1.edges[ety].data[feat_name], g2.edges[ety].data[feat_name] ) ================================================ FILE: tests/utils/graph_cases.py ================================================ from collections import defaultdict import backend as F import dgl import networkx as nx import numpy as np import scipy.sparse as ssp case_registry = defaultdict(list) def register_case(labels): def wrapper(fn): for lbl in labels: case_registry[lbl].append(fn) fn.__labels__ = labels return fn return wrapper def get_cases(labels=None, exclude=[]): """Get all graph instances of the given labels.""" cases = set() if labels is None: # get all the cases labels = case_registry.keys() for lbl in labels: for case in case_registry[lbl]: if not any([l in exclude for l in case.__labels__]): cases.add(case) return [fn() for fn in cases] @register_case(["bipartite", "zero-degree"]) def bipartite1(): return dgl.heterograph( {("_U", "_E", "_V"): ([0, 0, 0, 2, 2, 3], [0, 1, 4, 1, 4, 3])} ) @register_case(["bipartite"]) def bipartite_full(): return dgl.heterograph( { ("_U", "_E", "_V"): ( [0, 0, 0, 0, 1, 1, 1, 1], [0, 1, 2, 3, 0, 1, 2, 3], ) } ) @register_case(["homo"]) def graph0(): return dgl.graph( ( [0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 6, 6, 7, 8, 9], [4, 5, 1, 2, 4, 7, 9, 8, 6, 4, 1, 0, 1, 0, 2, 3, 5], ) ) @register_case(["homo", "zero-degree", "homo-zero-degree"]) def bipartite1(): return dgl.graph(([0, 0, 0, 2, 2, 3], [0, 1, 4, 1, 4, 3])) @register_case(["homo", "has_feature"]) def graph1(): g = dgl.graph( ( [0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 6, 6, 7, 8, 9], [4, 5, 1, 2, 4, 7, 9, 8, 6, 4, 1, 0, 1, 0, 2, 3, 5], ), device=F.cpu(), ) g.ndata["h"] = F.copy_to(F.randn((g.num_nodes(), 2)), F.cpu()) g.edata["w"] = F.copy_to(F.randn((g.num_edges(), 3)), F.cpu()) return g @register_case(["homo", "has_scalar_e_feature"]) def graph1(): g = dgl.graph( ( [0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 6, 6, 7, 8, 9], [4, 5, 1, 2, 4, 7, 9, 8, 6, 4, 1, 0, 1, 0, 2, 3, 5], ), device=F.cpu(), ) g.ndata["h"] = F.copy_to(F.randn((g.num_nodes(), 2)), F.cpu()) g.edata["scalar_w"] = F.copy_to(F.abs(F.randn((g.num_edges(),))), F.cpu()) return g @register_case(["homo", "row_sorted"]) def graph2(): return dgl.graph( ( [0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 6, 6, 7, 8, 9], [4, 5, 1, 2, 4, 7, 9, 8, 6, 4, 1, 0, 1, 0, 2, 3, 5], ), row_sorted=True, ) @register_case(["homo", "row_sorted", "col_sorted"]) def graph3(): return dgl.graph( ( [0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 6, 6, 7, 8, 9], [1, 4, 5, 2, 4, 7, 8, 9, 1, 4, 6, 0, 0, 1, 2, 3, 5], ), row_sorted=True, col_sorted=True, ) @register_case(["hetero", "has_feature"]) def heterograph0(): g = dgl.heterograph( { ("user", "plays", "game"): ([0, 1, 1, 2], [0, 0, 1, 1]), ("developer", "develops", "game"): ([0, 1], [0, 1]), }, device=F.cpu(), ) g.nodes["user"].data["h"] = F.copy_to( F.randn((g.num_nodes("user"), 3)), F.cpu() ) g.nodes["game"].data["h"] = F.copy_to( F.randn((g.num_nodes("game"), 2)), F.cpu() ) g.nodes["developer"].data["h"] = F.copy_to( F.randn((g.num_nodes("developer"), 3)), F.cpu() ) g.edges["plays"].data["h"] = F.copy_to( F.randn((g.num_edges("plays"), 1)), F.cpu() ) g.edges["develops"].data["h"] = F.copy_to( F.randn((g.num_edges("develops"), 5)), F.cpu() ) return g @register_case(["batched", "homo"]) def batched_graph0(): g1 = dgl.add_self_loop(dgl.graph(([0, 1, 2], [1, 2, 3]))) g2 = dgl.add_self_loop(dgl.graph(([1, 1], [2, 0]))) g3 = dgl.add_self_loop(dgl.graph(([0], [1]))) return dgl.batch([g1, g2, g3]) @register_case(["block", "bipartite", "block-bipartite"]) def block_graph0(): g = dgl.graph(([2, 3, 4], [5, 6, 7]), num_nodes=100) g = g.to(F.cpu()) return dgl.to_block(g) @register_case(["block"]) def block_graph1(): g = dgl.heterograph( { ("user", "plays", "game"): ([0, 1, 2], [1, 1, 0]), ("user", "likes", "game"): ([1, 2, 3], [0, 0, 2]), ("store", "sells", "game"): ([0, 1, 1], [0, 1, 2]), }, device=F.cpu(), ) return dgl.to_block(g) @register_case(["clique"]) def clique(): g = dgl.graph(([0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2])) return g def random_dglgraph(size): return dgl.DGLGraph(nx.erdos_renyi_graph(size, 0.3)) def random_graph(size): return dgl.from_networkx(nx.erdos_renyi_graph(size, 0.3)) def random_bipartite(size_src, size_dst): return dgl.bipartite_from_scipy( ssp.random(size_src, size_dst, 0.1), utype="_U", etype="_E", vtype="V", ) def random_block(size): g = dgl.from_networkx(nx.erdos_renyi_graph(size, 0.1)) return dgl.to_block(g, np.unique(F.zerocopy_to_numpy(g.edges()[1]))) @register_case(["two_hetero_batch"]) def two_hetero_batch(): g1 = dgl.heterograph( { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "follows", "developer"): ([0, 1], [1, 2]), ("user", "plays", "game"): ([0, 1, 2, 3], [0, 0, 1, 1]), } ) g2 = dgl.heterograph( { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "follows", "developer"): ([0, 1], [1, 2]), ("user", "plays", "game"): ([0, 1, 2], [0, 0, 1]), } ) return [g1, g2] @register_case(["two_hetero_batch"]) def two_hetero_batch_with_isolated_ntypes(): g1 = dgl.heterograph( { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "follows", "developer"): ([0, 1], [1, 2]), ("user", "plays", "game"): ([0, 1, 2, 3], [0, 0, 1, 1]), }, num_nodes_dict={"user": 4, "game": 2, "developer": 3, "platform": 2}, ) g2 = dgl.heterograph( { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "follows", "developer"): ([0, 1], [1, 2]), ("user", "plays", "game"): ([0, 1, 2], [0, 0, 1]), }, num_nodes_dict={"user": 3, "game": 2, "developer": 3, "platform": 3}, ) return [g1, g2] @register_case(["batched", "hetero"]) def batched_heterograph0(): g1 = dgl.heterograph( { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "follows", "developer"): ([0, 1], [1, 2]), ("user", "plays", "game"): ([0, 1, 2, 3], [0, 0, 1, 1]), } ) g2 = dgl.heterograph( { ("user", "follows", "user"): ([0, 1], [1, 2]), ("user", "follows", "developer"): ([0, 1], [1, 2]), ("user", "plays", "game"): ([0, 1, 2], [0, 0, 1]), } ) g3 = dgl.heterograph( { ("user", "follows", "user"): ([1], [2]), ("user", "follows", "developer"): ([0, 1, 2], [0, 2, 2]), ("user", "plays", "game"): ([0, 1], [0, 0]), } ) return dgl.batch([g1, g2, g3]) ================================================ FILE: third_party/HugeCTR/gpu_cache/ReadMe.md ================================================ # GPU Embedding Cache This project implements an embedding cache on GPU memory that is designed for CTR inference and training workload. The cache stores the hot pairs, (embedding id, embedding vectors), on GPU memory. Storing the data on GPU memory reduces the traffic to the parameter server when performing embedding table lookup. The cache is designed for CTR inference and training, it has following features and restrictions: * All the backup memory-side operations are performed by the parameter server. These operations include prefetching, latency hiding, and so on. * This is a single-GPU design. Each cache belongs to one GPU. * The cache is thread-safe: multiple workers, CPU threads, can concurrently call the API of a single cache object with well-defined behavior. * The cache implements a least recently used (LRU) replacement algorithm so that it caches the most recently queried embeddings. * The embeddings stored inside the cache are unique: there are no duplicated embedding IDs in the cache. ## Project Structure This project is a stand-alone module in HugeCTR project. The root folder of this project is the `gpu_cache` folder under the HugeCTR root directory. The `include` folder contains the headers for the cache library and the `src` folder contains the implementations and Makefile for the cache library. The `test` folder contains a test that tests the correctness and performance of the GPU embedding cache. The test also acts as sample code that shows how to use the cache. The `nv_gpu_cache.hpp` file contains the definition of the main class, `gpu_cache`, that implements the GPU embedding cache. The `nv_gpu_cache.cu` file contains the implementation. As a module of HugeCTR, this project is built with and used by the HugeCTR project. ## Supported Data Types * The cache supports 32 and 64-bit scalar integer types for the key (embedding ID) type. For example, the data type declarations `unsigned int` and `long long` match these integer types. * The cache supports a vector of floats for the value (embedding vector) type. * You need to specify an empty key to indicate the empty bucket. Do not use an empty key to represent any real key. * Refer to the instantiation code at the end of the `nv_gpu_cache.cu` file for template parameters. ## Requirements * NVIDIA GPU >= Volta (SM 70). * CUDA environment >= 11.0. * (Optional) libcu++ library >= 1.1.0. The CUDA Toolkit 11.0 (Early Access) and above meets the required library version. Using the libcu++ library provides better performance and more precisely-defined behavior. You can enable libcu++ library by defining the `LIBCUDACXX_VERSION` macro when you compile. Otherwise, the libcu++ library is not enabled. * The default building option for HugeCTR is to disable the libcu++ library. ## Usage Overview ```c++ template, typename slab_hasher = Mod_Hash> class gpu_cache{ public: //Ctor gpu_cache(const size_t capacity_in_set, const size_t embedding_vec_size); //Dtor ~gpu_cache(); // Query API, i.e. A single read from the cache void Query(const key_type* d_keys, const size_t len, float* d_values, uint64_t* d_missing_index, key_type* d_missing_keys, size_t* d_missing_len, cudaStream_t stream, const size_t task_per_warp_tile = TASK_PER_WARP_TILE_MACRO); // Replace API, i.e. Follow the Query API to update the content of the cache to Most Recent void Replace(const key_type* d_keys, const size_t len, const float* d_values, cudaStream_t stream, const size_t task_per_warp_tile = TASK_PER_WARP_TILE_MACRO); // Update API, i.e. update the embeddings which exist in the cache void Update(const key_type* d_keys, const size_t len, const float* d_values, cudaStream_t stream, const size_t task_per_warp_tile = TASK_PER_WARP_TILE_MACRO); // Dump API, i.e. dump some slabsets' keys from the cache void Dump(key_type* d_keys, size_t* d_dump_counter, const size_t start_set_index, const size_t end_set_index, cudaStream_t stream); }; ``` ## API `Constructor` To create a new embedding cache, you need to provide the following: * Template parameters: + key_type: the data type of embedding ID. + ref_counter_type: the data type of the internal counter. This data type should be 64bit unsigned integer(i.e. uint64_t), 32bit integer has the risk of overflow. + empty_key: the key value indicate for empty bucket(i.e. The empty key), user should never use empty key value to represent any real keys. + set_associativity: the hyper-parameter indicates how many slabs per cache set.(See `Performance hint` session below) + warp_size: the hyper-parameter indicates how many [key, value] pairs per slab. Acceptable value includes 1/2/4/8/16/32.(See `Performance hint` session below) + For other template parameters just use the default value. * Parameters: + capacity_in_set: # of cache set in the embedding cache. So the total capacity of the embedding cache is `warp_size * set_associativity * capacity_in_set` [key, value] pairs. + embedding_vec_size: # of float per a embedding vector. * The host thread will wait for the GPU kernels to complete before returning from the API, thus this API is synchronous with CPU thread. When returned, the initialization process of the cache is already done. * The embedding cache will be created on the GPU where user call the constructor. Thus, user should set the host thread to the target CUDA device before creating the embedding cache. All resources(i.e. device-side buffers, CUDA streams) used later for this embedding cache should be allocated on the same CUDA device as the embedding cache. * The constructor can be called only once, thus is not thread-safe. `Destructor` * The destructor clean up the embedding cache. This API should be called only once when user need to delete the embedding cache object, thus is not thread-safe. `Query` * Search `len` elements from device-side buffers `d_keys` in the cache and return the result in device-side buffer `d_values` if a key is hit in the cache. * If a key is missing, the missing key and its index in the `d_keys` buffer will be returned in device-side buffers `d_missing_keys` and `d_missing_index`. The # of missing key will be return in device-side buffer `d_missing_len`. For simplicity, these buffers should have the same length as `d_keys` to avoid out-of-bound access. * The GPU kernels will be launched in `stream` CUDA stream. * The host thread will return from the API immediately after the kernels are launched, thus this API is Asynchronous with CPU thread. * The keys to be queried in the `d_keys` buffer can have duplication. In this case, user will get duplicated returned values or missing information. * This API is thread-safe and can be called concurrently with other APIs. * For hyper-parameter `task_per_warp_tile`, see `Performance hint` session below. `Replace` * The API will replace `len` [key, value] pairs listed in `d_keys` and `d_values` into the embedding cache using the LRU replacement algorithm. * The GPU kernels will be launched in `stream` CUDA stream. * The host thread will return from the API immediately after the kernels are launched, thus this API is Asynchronous with CPU thread. * The keys to be replaced in the `d_keys` buffer can have duplication and can be already stored inside the cache. In these cases, the cache will detect any possible duplication and maintain the uniqueness of all the [key ,value] pairs stored in the cache. * This API is thread-safe and can be called concurrently with other APIs. * This API will first try to insert the [key, value] pairs into the cache if there is any empty slot. If the cache is full, it will do the replacement. * For hyper-parameter `task_per_warp_tile`, see `Performance hint` session below. `Update` * The API will search for `len` keys listed in `d_keys` buffer within the cache. If a key is found in the cache, this API will update the value associated with the key to the corresponding values provided in `d_values` buffer. If a key is not found in the cache, this API will do nothing to this key. * The GPU kernels will be launched in `stream` CUDA stream. * The host thread will return from the API immediately after the kernels are launched, thus this API is Asynchronous with CPU thread. * If the keys to be updated in the `d_keys` buffer have duplication, all values associated with this key in the `d_values` buffer will be updated to the cache atomically. The final result depends on the order of updating the value. * This API is thread-safe and can be called concurrently with other APIs. * For hyper-parameter `task_per_warp_tile`, see `Performance hint` session below. `Dump` * The API will dump all the keys stored in [`start_set_index`, `end_set_index`) cache sets to `d_keys` buffer as a linear array(the key order is not guaranteed). The total # of keys dumped will be reported in `d_dump_counter` variable. * The GPU kernels will be launched in `stream` CUDA stream. * The host thread will return from the API immediately after the kernels are launched, thus this API is Asynchronous with CPU thread. * This API is thread-safe and can be called concurrently with other APIs. ## More Information * The detailed introduction of the GPU embedding cache data structure is presented at GTC China 2020: https://on-demand-gtc.gputechconf.com/gtcnew/sessionview.php?sessionName=cns20626-%e4%bd%bf%e7%94%a8+gpu+embedding+cache+%e5%8a%a0%e9%80%9f+ctr+%e6%8e%a8%e7%90%86%e8%bf%87%e7%a8%8b * The `test` folder contains a example of using the GPU embedding cache. * This project is used by `embedding_cache` class in `HugeCTR/include/inference/embedding_cache.hpp` which can be used as an example. ## Performance Hint * The hyper-parameter `warp_size` should be keep as 32 by default. When the length for Query or Replace operations is small(~1-50k), user can choose smaller warp_size and increase the total # of cache set(while maintaining the same cache size) to increase the parallelism and improve the performance. * The hyper-parameter `set_associativity` is critical to performance: + If set too small, may cause load imbalance between different cache sets(lower down the effective capacity of the cache, lower down the hit rate). To prevent this, the embedding cache uses a very random hash function to hash the keys to different cache set, thus will achieve load balance statistically. However, larger cache set will tends to have better load balance. + If set too large, the searching space for a single key will be very large. The performance of the embedding cache API will drop dramatically. Also, each set will be accessed exclusively, thus the more cache sets the higher parallelism can be achieved. + Recommend setting `set_associativity` to 2 or 4. * The runtime hyper-parameter `task_per_warp_tile` is set to 1 as default parameter, thus users don't need to change their code to accommodate this interface change. This hyper-parameter determines how many keys are been queried/replaced/updated by a single warp tile. The acceptable value is between [1, `warp_size`]. For small to medium size operations to the cache, less task per warp tile can increase the total # of warp tiles running concurrently on the GPU chip, thus can bring significant performance improvement. For large size operations to the cache, the increased # of warp tile will not bring any performance improvement(even a little regression on the performance, ~5%). User can choose the value for this parameter based on the value of `len` parameter. * The GPU is designed for optimizing throughput. Always try to batch up the inference task and try to have larger `query_size`. * As the APIs of the embedding cache is asynchronous with host threads. Try to optimize the E2E inference pipeline by overlapping asynchronous tasks on GPU or between CPU and GPU. For example, after retrieving the missing values from the parameter server, user can combine the missing values with the hit values and do the rest of inference pipeline at the same time with the `Replace` API. Replacement is not necessarily happens together with Query all the time, user can do query multiple times then do a replacement if the hit rate is acceptable. * Try different cache capacity and evaluate the hit rate. If the capacity of embedding cache can be larger than actual embedding footprint, the hit rate can be as high as 99%+. ================================================ FILE: third_party/HugeCTR/gpu_cache/include/gpu_cache_api.hpp ================================================ /* * Copyright (c) 2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include #define TASK_PER_WARP_TILE_MACRO 1 namespace gpu_cache { /////////////////////////////////////////////////////////////////////////////////////////////////// // GPU Cache API template class gpu_cache_api { public: virtual ~gpu_cache_api() noexcept(false) {} // Query API, i.e. A single read from the cache virtual void Query(const key_type* d_keys, const size_t len, float* d_values, uint64_t* d_missing_index, key_type* d_missing_keys, size_t* d_missing_len, cudaStream_t stream, const size_t task_per_warp_tile = TASK_PER_WARP_TILE_MACRO) = 0; // Replace API, i.e. Follow the Query API to update the content of the cache to Most Recent virtual void Replace(const key_type* d_keys, const size_t len, const float* d_values, cudaStream_t stream, const size_t task_per_warp_tile = TASK_PER_WARP_TILE_MACRO) = 0; // Update API, i.e. update the embeddings which exist in the cache virtual void Update(const key_type* d_keys, const size_t len, const float* d_values, cudaStream_t stream, const size_t task_per_warp_tile = TASK_PER_WARP_TILE_MACRO) = 0; // Dump API, i.e. dump some slabsets' keys from the cache virtual void Dump(key_type* d_keys, size_t* d_dump_counter, const size_t start_set_index, const size_t end_set_index, cudaStream_t stream) = 0; // Record all the lookup stream of a specific cache for Update/Replace sync virtual void Record(cudaStream_t stream) = 0; }; } // namespace gpu_cache ================================================ FILE: third_party/HugeCTR/gpu_cache/include/hash_functions.cuh ================================================ /* * Copyright (c) 2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include // MurmurHash3_32 implementation from // https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp //----------------------------------------------------------------------------- // MurmurHash3 was written by Austin Appleby, and is placed in the public // domain. The author hereby disclaims copyright to this source code. // Note - The x86 and x64 versions do _not_ produce the same results, as the // algorithms are optimized for their respective platforms. You can still // compile and run any of them on any platform, but your performance with the // non-native version will be less than optimal. template struct MurmurHash3_32 { using argument_type = Key; using result_type = uint32_t; /*__forceinline__ __host__ __device__ MurmurHash3_32() : m_seed( 0 ) {}*/ __forceinline__ __host__ __device__ static uint32_t rotl32(uint32_t x, int8_t r) { return (x << r) | (x >> (32 - r)); } __forceinline__ __host__ __device__ static uint32_t fmix32(uint32_t h) { h ^= h >> 16; h *= 0x85ebca6b; h ^= h >> 13; h *= 0xc2b2ae35; h ^= h >> 16; return h; } /* --------------------------------------------------------------------------*/ /** * @Synopsis Combines two hash values into a new single hash value. Called * repeatedly to create a hash value from several variables. * Taken from the Boost hash_combine function * https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html * * @Param lhs The first hash value to combine * @Param rhs The second hash value to combine * * @Returns A hash value that intelligently combines the lhs and rhs hash values */ /* ----------------------------------------------------------------------------*/ __host__ __device__ static result_type hash_combine(result_type lhs, result_type rhs) { result_type combined{lhs}; combined ^= rhs + 0x9e3779b9 + (combined << 6) + (combined >> 2); return combined; } __forceinline__ __host__ __device__ static result_type hash(const Key& key) { constexpr int len = sizeof(argument_type); const uint8_t* const data = (const uint8_t*)&key; constexpr int nblocks = len / 4; uint32_t h1 = m_seed; constexpr uint32_t c1 = 0xcc9e2d51; constexpr uint32_t c2 = 0x1b873593; //---------- // body const uint32_t* const blocks = (const uint32_t*)(data + nblocks * 4); for (int i = -nblocks; i; i++) { uint32_t k1 = blocks[i]; // getblock32(blocks,i); k1 *= c1; k1 = rotl32(k1, 15); k1 *= c2; h1 ^= k1; h1 = rotl32(h1, 13); h1 = h1 * 5 + 0xe6546b64; } //---------- // tail const uint8_t* tail = (const uint8_t*)(data + nblocks * 4); uint32_t k1 = 0; switch (len & 3) { case 3: k1 ^= tail[2] << 16; case 2: k1 ^= tail[1] << 8; case 1: k1 ^= tail[0]; k1 *= c1; k1 = rotl32(k1, 15); k1 *= c2; h1 ^= k1; }; //---------- // finalization h1 ^= len; h1 = fmix32(h1); return h1; } __host__ __device__ __forceinline__ result_type operator()(const Key& key) const { return this->hash(key); } }; template struct Fix_Hash { using result_type = index_type; __forceinline__ __host__ __device__ static index_type hash(const key_type& key) { return result; } }; template struct Mod_Hash { __forceinline__ __host__ __device__ static result_type hash(const key_type& key) { return (result_type)key; } }; ================================================ FILE: third_party/HugeCTR/gpu_cache/include/nv_gpu_cache.hpp ================================================ /* * Copyright (c) 2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include #include #include #include #include "gpu_cache_api.hpp" #ifdef LIBCUDACXX_VERSION #include #include #endif #define SET_ASSOCIATIVITY 2 #define SLAB_SIZE 32 #define TASK_PER_WARP_TILE_MACRO 1 namespace gpu_cache { // slab for static slab list template struct static_slab { key_type slab_[warp_size]; }; // Static slablist(slabset) for GPU Cache template struct slab_set { static_slab set_[set_associativity]; }; /////////////////////////////////////////////////////////////////////////////////////////////////// // GPU Cache template , typename slab_hasher = Mod_Hash> class gpu_cache : public gpu_cache_api { public: // Ctor gpu_cache(const size_t capacity_in_set, const size_t embedding_vec_size); // Dtor ~gpu_cache(); // Query API, i.e. A single read from the cache void Query(const key_type* d_keys, const size_t len, float* d_values, uint64_t* d_missing_index, key_type* d_missing_keys, size_t* d_missing_len, cudaStream_t stream, const size_t task_per_warp_tile = TASK_PER_WARP_TILE_MACRO) override; // Replace API, i.e. Follow the Query API to update the content of the cache to Most Recent void Replace(const key_type* d_keys, const size_t len, const float* d_values, cudaStream_t stream, const size_t task_per_warp_tile = TASK_PER_WARP_TILE_MACRO) override; // Update API, i.e. update the embeddings which exist in the cache void Update(const key_type* d_keys, const size_t len, const float* d_values, cudaStream_t stream, const size_t task_per_warp_tile = TASK_PER_WARP_TILE_MACRO) override; // Dump API, i.e. dump some slabsets' keys from the cache void Dump(key_type* d_keys, size_t* d_dump_counter, const size_t start_set_index, const size_t end_set_index, cudaStream_t stream) override; void Record(cudaStream_t stream) override {} public: using slabset = slab_set; #ifdef LIBCUDACXX_VERSION using atomic_ref_counter_type = cuda::atomic; using mutex = cuda::binary_semaphore; #endif private: static const size_t BLOCK_SIZE_ = 64; // Cache data slabset* keys_; float* vals_; ref_counter_type* slot_counter_; // Global counter #ifdef LIBCUDACXX_VERSION atomic_ref_counter_type* global_counter_; #else ref_counter_type* global_counter_; #endif // CUDA device int dev_; // Cache capacity size_t capacity_in_set_; size_t num_slot_; // Embedding vector size size_t embedding_vec_size_; #ifdef LIBCUDACXX_VERSION // Array of mutex to protect (sub-)warp-level data structure, each mutex protect 1 slab set mutex* set_mutex_; #else // Array of flag to protect (sub-)warp-level data structure, each flag act as a mutex and protect // 1 slab set 1 for unlock, 0 for lock int* set_mutex_; #endif }; } // namespace gpu_cache ================================================ FILE: third_party/HugeCTR/gpu_cache/include/nv_util.h ================================================ /* * Copyright (c) 2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include #include #include #include #include #define CUDA_CHECK(val) \ { nv::cuda_check_((val), __FILE__, __LINE__); } namespace nv { template struct is_fp8 : std::false_type {}; template <> struct is_fp8<__nv_fp8_e4m3> : std::true_type {}; template <> struct is_fp8<__nv_fp8_e5m2> : std::true_type {}; class CudaException : public std::runtime_error { public: CudaException(const std::string& what) : runtime_error(what) {} }; inline void cuda_check_(cudaError_t val, const char* file, int line) { if (val != cudaSuccess) { throw CudaException(std::string(file) + ":" + std::to_string(line) + ": CUDA error " + std::to_string(val) + ": " + cudaGetErrorString(val)); } } class CudaDeviceRestorer { public: CudaDeviceRestorer() { CUDA_CHECK(cudaGetDevice(&dev_)); } ~CudaDeviceRestorer() { CUDA_CHECK(cudaSetDevice(dev_)); } void check_device(int device) const { if (device != dev_) { throw std::runtime_error( std::string(__FILE__) + ":" + std::to_string(__LINE__) + ": Runtime Error: The device id in the context is not consistent with configuration"); } } private: int dev_; }; inline int get_dev(const void* ptr) { cudaPointerAttributes attr; CUDA_CHECK(cudaPointerGetAttributes(&attr, ptr)); int dev = -1; #if CUDART_VERSION >= 10000 if (attr.type == cudaMemoryTypeDevice) #else if (attr.memoryType == cudaMemoryTypeDevice) #endif { dev = attr.device; } return dev; } inline void switch_to_dev(const void* ptr) { int dev = get_dev(ptr); if (dev >= 0) { CUDA_CHECK(cudaSetDevice(dev)); } } } // namespace nv ================================================ FILE: third_party/HugeCTR/gpu_cache/src/nv_gpu_cache.cu ================================================ /* * Copyright (c) 2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include namespace cg = cooperative_groups; // Overload CUDA atomic for other 64bit unsigned/signed integer type __forceinline__ __device__ long atomicAdd(long* address, long val) { return (long)atomicAdd((unsigned long long*)address, (unsigned long long)val); } __forceinline__ __device__ long long atomicAdd(long long* address, long long val) { return (long long)atomicAdd((unsigned long long*)address, (unsigned long long)val); } __forceinline__ __device__ unsigned long atomicAdd(unsigned long* address, unsigned long val) { return (unsigned long)atomicAdd((unsigned long long*)address, (unsigned long long)val); } namespace gpu_cache { #ifdef LIBCUDACXX_VERSION template __forceinline__ __device__ void warp_tile_copy(const size_t lane_idx, const size_t emb_vec_size_in_float, float* d_dst, const float* d_src) { #pragma unroll for (size_t i = lane_idx; i < emb_vec_size_in_float; i += warp_size) { d_dst[i] = d_src[i]; } } #else template __forceinline__ __device__ void warp_tile_copy(const size_t lane_idx, const size_t emb_vec_size_in_float, volatile float* d_dst, volatile float* d_src) { #pragma unroll for (size_t i = lane_idx; i < emb_vec_size_in_float; i += warp_size) { d_dst[i] = d_src[i]; } } #endif #ifdef LIBCUDACXX_VERSION // Will be called by multiple thread_block_tile((sub-)warp) on the same mutex // Expect only one thread_block_tile return to execute critical section at any time template __forceinline__ __device__ void warp_lock_mutex(const cg::thread_block_tile& warp_tile, mutex& set_mutex) { // The first thread of this (sub-)warp to acquire the lock if (warp_tile.thread_rank() == 0) { set_mutex.acquire(); } warp_tile.sync(); // Synchronize the threads in the (sub-)warp. Execution barrier + memory fence } // The (sub-)warp holding the mutex will unlock the mutex after finishing the critical section on a // set Expect any following (sub-)warp that acquire the mutex can see its modification done in the // critical section template __forceinline__ __device__ void warp_unlock_mutex(const cg::thread_block_tile& warp_tile, mutex& set_mutex) { warp_tile.sync(); // Synchronize the threads in the (sub-)warp. Execution barrier + memory fence // The first thread of this (sub-)warp to release the lock if (warp_tile.thread_rank() == 0) { set_mutex.release(); } } #else // Will be called by multiple thread_block_tile((sub-)warp) on the same mutex // Expect only one thread_block_tile return to execute critical section at any time template __forceinline__ __device__ void warp_lock_mutex(const cg::thread_block_tile& warp_tile, volatile int& set_mutex) { // The first thread of this (sub-)warp to acquire the lock if (warp_tile.thread_rank() == 0) { while (0 == atomicCAS((int*)&set_mutex, 1, 0)) ; } __threadfence(); warp_tile.sync(); // Synchronize the threads in the (sub-)warp. Execution barrier + memory fence } // The (sub-)warp holding the mutex will unlock the mutex after finishing the critical section on a // set Expect any following (sub-)warp that acquire the mutex can see its modification done in the // critical section template __forceinline__ __device__ void warp_unlock_mutex(const cg::thread_block_tile& warp_tile, volatile int& set_mutex) { __threadfence(); warp_tile.sync(); // Synchronize the threads in the (sub-)warp. Execution barrier + memory fence // The first thread of this (sub-)warp to release the lock if (warp_tile.thread_rank() == 0) { atomicExch((int*)&set_mutex, 1); } } #endif // The (sub-)warp doing all reduction to find the slot with min slot_counter // The slot with min slot_counter is the LR slot. template __forceinline__ __device__ void warp_min_reduction( const cg::thread_block_tile& warp_tile, ref_counter_type& min_slot_counter_val, size_t& slab_distance, size_t& slot_distance) { const size_t lane_idx = warp_tile.thread_rank(); slot_distance = lane_idx; for (size_t i = (warp_tile.size() >> 1); i > 0; i = i >> 1) { ref_counter_type input_slot_counter_val = warp_tile.shfl_xor(min_slot_counter_val, (int)i); size_t input_slab_distance = warp_tile.shfl_xor(slab_distance, (int)i); size_t input_slot_distance = warp_tile.shfl_xor(slot_distance, (int)i); if (input_slot_counter_val == min_slot_counter_val) { if (input_slab_distance == slab_distance) { if (input_slot_distance < slot_distance) { slot_distance = input_slot_distance; } } else if (input_slab_distance < slab_distance) { slab_distance = input_slab_distance; slot_distance = input_slot_distance; } } else if (input_slot_counter_val < min_slot_counter_val) { min_slot_counter_val = input_slot_counter_val; slab_distance = input_slab_distance; slot_distance = input_slot_distance; } } } /////////////////////////////////////////////////////////////////////////////////////////////////// #ifdef LIBCUDACXX_VERSION // Kernel to initialize the GPU cache // Init every entry of the cache with pair template __global__ void init_cache(slabset* keys, ref_counter_type* slot_counter, atomic_ref_counter_type* global_counter, const size_t num_slot, const key_type empty_key, mutex* set_mutex, const size_t capacity_in_set) { const size_t idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < num_slot) { // Set the key of this slot to unused key // Flatten the cache key_type* key_slot = (key_type*)keys; key_slot[idx] = empty_key; // Clear the counter for this slot slot_counter[idx] = 0; } // First CUDA thread clear the global counter if (idx == 0) { new (global_counter) atomic_ref_counter_type(0); } // First capacity_in_set CUDA thread initialize mutex if (idx < capacity_in_set) { new (set_mutex + idx) mutex(1); } } template __global__ void destruct_kernel(atomic_ref_counter_type* global_counter, mutex* set_mutex, const size_t capacity_in_set) { const size_t idx = blockIdx.x * blockDim.x + threadIdx.x; // First CUDA thread destruct the global_counter if (idx == 0) { global_counter->~atomic_ref_counter_type(); } // First capacity_in_set CUDA thread destruct the set mutex if (idx < capacity_in_set) { (set_mutex + idx)->~mutex(); } } #else // Kernel to initialize the GPU cache // Init every entry of the cache with pair template __global__ void init_cache(slabset* keys, ref_counter_type* slot_counter, ref_counter_type* global_counter, const size_t num_slot, const key_type empty_key, int* set_mutex, const size_t capacity_in_set) { const size_t idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < num_slot) { // Set the key of this slot to unused key // Flatten the cache key_type* key_slot = (key_type*)keys; key_slot[idx] = empty_key; // Clear the counter for this slot slot_counter[idx] = 0; } // First CUDA thread clear the global counter if (idx == 0) { global_counter[idx] = 0; } // First capacity_in_set CUDA thread initialize mutex if (idx < capacity_in_set) { set_mutex[idx] = 1; } } #endif // Kernel to update global counter // Resolve distance overflow issue as well #ifdef LIBCUDACXX_VERSION template __global__ void update_kernel_overflow_ignore(atomic_ref_counter_type* global_counter, size_t* d_missing_len) { // Update global counter global_counter->fetch_add(1, cuda::std::memory_order_relaxed); *d_missing_len = 0; } #else template __global__ void update_kernel_overflow_ignore(ref_counter_type* global_counter, size_t* d_missing_len) { // Update global counter atomicAdd(global_counter, 1); *d_missing_len = 0; } #endif #ifdef LIBCUDACXX_VERSION // Kernel to read from cache // Also update locality information for touched slot template __global__ void get_kernel(const key_type* d_keys, const size_t len, float* d_values, const size_t embedding_vec_size, uint64_t* d_missing_index, key_type* d_missing_keys, size_t* d_missing_len, const atomic_ref_counter_type* global_counter, ref_counter_type* slot_counter, const size_t capacity_in_set, const slabset* keys, const float* vals, mutex* set_mutex, const size_t task_per_warp_tile) { // Lane(thread) ID within a warp_tile cg::thread_block_tile warp_tile = cg::tiled_partition(cg::this_thread_block()); const size_t lane_idx = warp_tile.thread_rank(); // Warp tile global ID const size_t warp_tile_global_idx = (blockIdx.x * (blockDim.x / warp_size)) + warp_tile.meta_group_rank(); // The index of key for this thread const size_t key_idx = (warp_tile_global_idx * task_per_warp_tile) + lane_idx; // The assigned key for this lane(thread) key_type key; // The dst slabset and the dst slab inside this set size_t src_set; size_t src_slab; // The variable that contains the missing key key_type missing_key; // The variable that contains the index for the missing key uint64_t missing_index; // The counter for counting the missing key in this warp uint8_t warp_missing_counter = 0; // Active flag: whether current lane(thread) has unfinished task bool active = false; if (lane_idx < task_per_warp_tile) { if (key_idx < len) { active = true; key = d_keys[key_idx]; src_set = set_hasher::hash(key) % capacity_in_set; src_slab = slab_hasher::hash(key) % set_associativity; } } // Lane participate in warp_tile ballot to produce warp-level work queue unsigned active_mask = warp_tile.ballot(active); // The warp-level outer loop: finish all the tasks within the work queue while (active_mask != 0) { // Next task in the work quere, start from lower index lane(thread) int next_lane = __ffs(active_mask) - 1; // Broadcast the task and the global index to all lane in the warp_tile key_type next_key = warp_tile.shfl(key, next_lane); size_t next_idx = warp_tile.shfl(key_idx, next_lane); size_t next_set = warp_tile.shfl(src_set, next_lane); size_t next_slab = warp_tile.shfl(src_slab, next_lane); // Counter to record how many slab have been searched size_t counter = 0; // Working queue before task started const unsigned old_active_mask = active_mask; // Lock the slabset before operating the slabset warp_lock_mutex(warp_tile, set_mutex[next_set]); // The warp-level inner loop: finish a single task in the work queue while (active_mask == old_active_mask) { // When all the slabs inside a slabset have been searched, mark missing task, task is // completed if (counter >= set_associativity) { if (lane_idx == warp_missing_counter) { missing_key = next_key; missing_index = next_idx; } if (lane_idx == (size_t)next_lane) { active = false; } warp_missing_counter++; active_mask = warp_tile.ballot(active); break; } // The warp_tile read out the slab key_type read_key = keys[next_set].set_[next_slab].slab_[lane_idx]; // Compare the slab data with the target key int found_lane = __ffs(warp_tile.ballot(read_key == next_key)) - 1; // If found, mark hit task, copy the founded data, the task is completed if (found_lane >= 0) { size_t found_offset = (next_set * set_associativity + next_slab) * warp_size + found_lane; if (lane_idx == (size_t)next_lane) { slot_counter[found_offset] = global_counter->load(cuda::std::memory_order_relaxed); active = false; } warp_tile_copy(lane_idx, embedding_vec_size, d_values + next_idx * embedding_vec_size, vals + found_offset * embedding_vec_size); active_mask = warp_tile.ballot(active); break; } // Compare the slab data with empty key, if found empty key, mark missing task, task is // completed if (warp_tile.ballot(read_key == empty_key) != 0) { if (lane_idx == warp_missing_counter) { missing_key = next_key; missing_index = next_idx; } if (lane_idx == (size_t)next_lane) { active = false; } warp_missing_counter++; active_mask = warp_tile.ballot(active); break; } // Not found in this slab, the task is not completed, goto searching next slab counter++; next_slab = (next_slab + 1) % set_associativity; } // Unlock the slabset after operating the slabset warp_unlock_mutex(warp_tile, set_mutex[next_set]); } // After warp_tile complete the working queue, save the result for output // First thread of the warp_tile accumulate the missing length to global variable size_t warp_position; if (lane_idx == 0) { warp_position = atomicAdd(d_missing_len, (size_t)warp_missing_counter); } warp_position = warp_tile.shfl(warp_position, 0); if (lane_idx < warp_missing_counter) { d_missing_keys[warp_position + lane_idx] = missing_key; d_missing_index[warp_position + lane_idx] = missing_index; } } #else // Kernel to read from cache // Also update locality information for touched slot template __global__ void get_kernel(const key_type* d_keys, const size_t len, float* d_values, const size_t embedding_vec_size, uint64_t* d_missing_index, key_type* d_missing_keys, size_t* d_missing_len, ref_counter_type* global_counter, volatile ref_counter_type* slot_counter, const size_t capacity_in_set, volatile slabset* keys, volatile float* vals, volatile int* set_mutex, const size_t task_per_warp_tile) { // Lane(thread) ID within a warp_tile cg::thread_block_tile warp_tile = cg::tiled_partition(cg::this_thread_block()); const size_t lane_idx = warp_tile.thread_rank(); // Warp tile global ID const size_t warp_tile_global_idx = (blockIdx.x * (blockDim.x / warp_size)) + warp_tile.meta_group_rank(); // The index of key for this thread const size_t key_idx = (warp_tile_global_idx * task_per_warp_tile) + lane_idx; // The assigned key for this lane(thread) key_type key; // The dst slabset and the dst slab inside this set size_t src_set; size_t src_slab; // The variable that contains the missing key key_type missing_key; // The variable that contains the index for the missing key uint64_t missing_index; // The counter for counting the missing key in this warp uint8_t warp_missing_counter = 0; // Active flag: whether current lane(thread) has unfinished task bool active = false; if (lane_idx < task_per_warp_tile) { if (key_idx < len) { active = true; key = d_keys[key_idx]; src_set = set_hasher::hash(key) % capacity_in_set; src_slab = slab_hasher::hash(key) % set_associativity; } } // Lane participate in warp_tile ballot to produce warp-level work queue unsigned active_mask = warp_tile.ballot(active); // The warp-level outer loop: finish all the tasks within the work queue while (active_mask != 0) { // Next task in the work quere, start from lower index lane(thread) int next_lane = __ffs(active_mask) - 1; // Broadcast the task and the global index to all lane in the warp_tile key_type next_key = warp_tile.shfl(key, next_lane); size_t next_idx = warp_tile.shfl(key_idx, next_lane); size_t next_set = warp_tile.shfl(src_set, next_lane); size_t next_slab = warp_tile.shfl(src_slab, next_lane); // Counter to record how many slab have been searched size_t counter = 0; // Working queue before task started const unsigned old_active_mask = active_mask; // Lock the slabset before operating the slabset warp_lock_mutex(warp_tile, set_mutex[next_set]); // The warp-level inner loop: finish a single task in the work queue while (active_mask == old_active_mask) { // When all the slabs inside a slabset have been searched, mark missing task, task is // completed if (counter >= set_associativity) { if (lane_idx == warp_missing_counter) { missing_key = next_key; missing_index = next_idx; } if (lane_idx == (size_t)next_lane) { active = false; } warp_missing_counter++; active_mask = warp_tile.ballot(active); break; } // The warp_tile read out the slab key_type read_key = ((volatile key_type*)(keys[next_set].set_[next_slab].slab_))[lane_idx]; // Compare the slab data with the target key int found_lane = __ffs(warp_tile.ballot(read_key == next_key)) - 1; // If found, mark hit task, copy the founded data, the task is completed if (found_lane >= 0) { size_t found_offset = (next_set * set_associativity + next_slab) * warp_size + found_lane; if (lane_idx == (size_t)next_lane) { slot_counter[found_offset] = atomicAdd(global_counter, 0); active = false; } warp_tile_copy(lane_idx, embedding_vec_size, (volatile float*)(d_values + next_idx * embedding_vec_size), (volatile float*)(vals + found_offset * embedding_vec_size)); active_mask = warp_tile.ballot(active); break; } // Compare the slab data with empty key, if found empty key, mark missing task, task is // completed if (warp_tile.ballot(read_key == empty_key) != 0) { if (lane_idx == warp_missing_counter) { missing_key = next_key; missing_index = next_idx; } if (lane_idx == (size_t)next_lane) { active = false; } warp_missing_counter++; active_mask = warp_tile.ballot(active); break; } // Not found in this slab, the task is not completed, goto searching next slab counter++; next_slab = (next_slab + 1) % set_associativity; } // Unlock the slabset after operating the slabset warp_unlock_mutex(warp_tile, set_mutex[next_set]); } // After warp_tile complete the working queue, save the result for output // First thread of the warp_tile accumulate the missing length to global variable size_t warp_position; if (lane_idx == 0) { warp_position = atomicAdd(d_missing_len, (size_t)warp_missing_counter); } warp_position = warp_tile.shfl(warp_position, 0); if (lane_idx < warp_missing_counter) { d_missing_keys[warp_position + lane_idx] = missing_key; d_missing_index[warp_position + lane_idx] = missing_index; } } #endif #ifdef LIBCUDACXX_VERSION // Kernel to insert or replace the pairs into the cache template ::max(), size_t max_slab_distance = std::numeric_limits::max()> __global__ void insert_replace_kernel(const key_type* d_keys, const float* d_values, const size_t embedding_vec_size, const size_t len, slabset* keys, float* vals, ref_counter_type* slot_counter, mutex* set_mutex, const atomic_ref_counter_type* global_counter, const size_t capacity_in_set, const size_t task_per_warp_tile) { // Lane(thread) ID within a warp_tile cg::thread_block_tile warp_tile = cg::tiled_partition(cg::this_thread_block()); const size_t lane_idx = warp_tile.thread_rank(); // Warp tile global ID const size_t warp_tile_global_idx = (blockIdx.x * (blockDim.x / warp_size)) + warp_tile.meta_group_rank(); // The index of key for this thread const size_t key_idx = (warp_tile_global_idx * task_per_warp_tile) + lane_idx; // The assigned key for this lane(thread) key_type key; // The dst slabset and the dst slab inside this set size_t src_set; size_t src_slab; // Active flag: whether current lane(thread) has unfinished task bool active = false; if (lane_idx < task_per_warp_tile) { if (key_idx < len) { active = true; key = d_keys[key_idx]; src_set = set_hasher::hash(key) % capacity_in_set; src_slab = slab_hasher::hash(key) % set_associativity; } } // Lane participate in warp_tile ballot to produce warp-level work queue unsigned active_mask = warp_tile.ballot(active); // The warp-level outer loop: finish all the tasks within the work queue while (active_mask != 0) { // Next task in the work quere, start from lower index lane(thread) int next_lane = __ffs(active_mask) - 1; // Broadcast the task, the global index and the src slabset and slab to all lane in a warp_tile key_type next_key = warp_tile.shfl(key, next_lane); size_t next_idx = warp_tile.shfl(key_idx, next_lane); size_t next_set = warp_tile.shfl(src_set, next_lane); size_t next_slab = warp_tile.shfl(src_slab, next_lane); size_t first_slab = next_slab; // Counter to record how many slab have been searched size_t counter = 0; // Variable to keep the min slot counter during the probing ref_counter_type min_slot_counter_val = max_ref_counter_type; // Variable to keep the slab distance for slot with min counter size_t slab_distance = max_slab_distance; // Variable to keep the slot distance for slot with min counter within the slab size_t slot_distance; // Working queue before task started const unsigned old_active_mask = active_mask; // Lock the slabset before operating the slabset warp_lock_mutex(warp_tile, set_mutex[next_set]); // The warp-level inner loop: finish a single task in the work queue while (active_mask == old_active_mask) { // When all the slabs inside a slabset have been searched // and no empty slots or target slots are found. Replace with LRU if (counter >= set_associativity) { // (sub)Warp all-reduction, the reduction result store in all threads warp_min_reduction(warp_tile, min_slot_counter_val, slab_distance, slot_distance); // Calculate the position of LR slot size_t target_slab = (first_slab + slab_distance) % set_associativity; size_t slot_index = (next_set * set_associativity + target_slab) * warp_size + slot_distance; // Replace the LR slot if (lane_idx == (size_t)next_lane) { keys[next_set].set_[target_slab].slab_[slot_distance] = key; slot_counter[slot_index] = global_counter->load(cuda::std::memory_order_relaxed); } warp_tile_copy(lane_idx, embedding_vec_size, vals + slot_index * embedding_vec_size, d_values + next_idx * embedding_vec_size); // Replace complete, mark this task completed if (lane_idx == (size_t)next_lane) { active = false; } active_mask = warp_tile.ballot(active); break; } // The warp_tile read out the slab key_type read_key = keys[next_set].set_[next_slab].slab_[lane_idx]; // Compare the slab data with the target key int found_lane = __ffs(warp_tile.ballot(read_key == next_key)) - 1; // If found target key, the insertion/replace is no longer needed. // Refresh the slot, the task is completed if (found_lane >= 0) { size_t found_offset = (next_set * set_associativity + next_slab) * warp_size + found_lane; if (lane_idx == (size_t)next_lane) { slot_counter[found_offset] = global_counter->load(cuda::std::memory_order_relaxed); active = false; } active_mask = warp_tile.ballot(active); break; } // Compare the slab data with empty key. // If found empty key, do insertion,the task is complete found_lane = __ffs(warp_tile.ballot(read_key == empty_key)) - 1; if (found_lane >= 0) { size_t found_offset = (next_set * set_associativity + next_slab) * warp_size + found_lane; if (lane_idx == (size_t)next_lane) { keys[next_set].set_[next_slab].slab_[found_lane] = key; slot_counter[found_offset] = global_counter->load(cuda::std::memory_order_relaxed); } warp_tile_copy(lane_idx, embedding_vec_size, vals + found_offset * embedding_vec_size, d_values + next_idx * embedding_vec_size); if (lane_idx == (size_t)next_lane) { active = false; } active_mask = warp_tile.ballot(active); break; } // If no target or unused slot found in this slab, // Refresh LR info, continue probing ref_counter_type read_slot_counter = slot_counter[(next_set * set_associativity + next_slab) * warp_size + lane_idx]; if (read_slot_counter < min_slot_counter_val) { min_slot_counter_val = read_slot_counter; slab_distance = counter; } counter++; next_slab = (next_slab + 1) % set_associativity; } // Unlock the slabset after operating the slabset warp_unlock_mutex(warp_tile, set_mutex[next_set]); } } #else // Kernel to insert or replace the pairs into the cache template ::max(), size_t max_slab_distance = std::numeric_limits::max()> __global__ void insert_replace_kernel(const key_type* d_keys, const float* d_values, const size_t embedding_vec_size, const size_t len, volatile slabset* keys, volatile float* vals, volatile ref_counter_type* slot_counter, volatile int* set_mutex, ref_counter_type* global_counter, const size_t capacity_in_set, const size_t task_per_warp_tile) { // Lane(thread) ID within a warp_tile cg::thread_block_tile warp_tile = cg::tiled_partition(cg::this_thread_block()); const size_t lane_idx = warp_tile.thread_rank(); // Warp tile global ID const size_t warp_tile_global_idx = (blockIdx.x * (blockDim.x / warp_size)) + warp_tile.meta_group_rank(); // The index of key for this thread const size_t key_idx = (warp_tile_global_idx * task_per_warp_tile) + lane_idx; // The assigned key for this lane(thread) key_type key; // The dst slabset and the dst slab inside this set size_t src_set; size_t src_slab; // Active flag: whether current lane(thread) has unfinished task bool active = false; if (lane_idx < task_per_warp_tile) { if (key_idx < len) { active = true; key = d_keys[key_idx]; src_set = set_hasher::hash(key) % capacity_in_set; src_slab = slab_hasher::hash(key) % set_associativity; } } // Lane participate in warp_tile ballot to produce warp-level work queue unsigned active_mask = warp_tile.ballot(active); // The warp-level outer loop: finish all the tasks within the work queue while (active_mask != 0) { // Next task in the work quere, start from lower index lane(thread) int next_lane = __ffs(active_mask) - 1; // Broadcast the task, the global index and the src slabset and slab to all lane in a warp_tile key_type next_key = warp_tile.shfl(key, next_lane); size_t next_idx = warp_tile.shfl(key_idx, next_lane); size_t next_set = warp_tile.shfl(src_set, next_lane); size_t next_slab = warp_tile.shfl(src_slab, next_lane); size_t first_slab = next_slab; // Counter to record how many slab have been searched size_t counter = 0; // Variable to keep the min slot counter during the probing ref_counter_type min_slot_counter_val = max_ref_counter_type; // Variable to keep the slab distance for slot with min counter size_t slab_distance = max_slab_distance; // Variable to keep the slot distance for slot with min counter within the slab size_t slot_distance; // Working queue before task started const unsigned old_active_mask = active_mask; // Lock the slabset before operating the slabset warp_lock_mutex(warp_tile, set_mutex[next_set]); // The warp-level inner loop: finish a single task in the work queue while (active_mask == old_active_mask) { // When all the slabs inside a slabset have been searched // and no empty slots or target slots are found. Replace with LRU if (counter >= set_associativity) { // (sub)Warp all-reduction, the reduction result store in all threads warp_min_reduction(warp_tile, min_slot_counter_val, slab_distance, slot_distance); // Calculate the position of LR slot size_t target_slab = (first_slab + slab_distance) % set_associativity; size_t slot_index = (next_set * set_associativity + target_slab) * warp_size + slot_distance; // Replace the LR slot if (lane_idx == (size_t)next_lane) { ((volatile key_type*)(keys[next_set].set_[target_slab].slab_))[slot_distance] = key; slot_counter[slot_index] = atomicAdd(global_counter, 0); } warp_tile_copy(lane_idx, embedding_vec_size, (volatile float*)(vals + slot_index * embedding_vec_size), (volatile float*)(d_values + next_idx * embedding_vec_size)); // Replace complete, mark this task completed if (lane_idx == (size_t)next_lane) { active = false; } active_mask = warp_tile.ballot(active); break; } // The warp_tile read out the slab key_type read_key = ((volatile key_type*)(keys[next_set].set_[next_slab].slab_))[lane_idx]; // Compare the slab data with the target key int found_lane = __ffs(warp_tile.ballot(read_key == next_key)) - 1; // If found target key, the insertion/replace is no longer needed. // Refresh the slot, the task is completed if (found_lane >= 0) { size_t found_offset = (next_set * set_associativity + next_slab) * warp_size + found_lane; if (lane_idx == (size_t)next_lane) { slot_counter[found_offset] = atomicAdd(global_counter, 0); active = false; } active_mask = warp_tile.ballot(active); break; } // Compare the slab data with empty key. // If found empty key, do insertion,the task is complete found_lane = __ffs(warp_tile.ballot(read_key == empty_key)) - 1; if (found_lane >= 0) { size_t found_offset = (next_set * set_associativity + next_slab) * warp_size + found_lane; if (lane_idx == (size_t)next_lane) { ((volatile key_type*)(keys[next_set].set_[next_slab].slab_))[found_lane] = key; slot_counter[found_offset] = atomicAdd(global_counter, 0); } warp_tile_copy(lane_idx, embedding_vec_size, (volatile float*)(vals + found_offset * embedding_vec_size), (volatile float*)(d_values + next_idx * embedding_vec_size)); if (lane_idx == (size_t)next_lane) { active = false; } active_mask = warp_tile.ballot(active); break; } // If no target or unused slot found in this slab, // Refresh LR info, continue probing ref_counter_type read_slot_counter = slot_counter[(next_set * set_associativity + next_slab) * warp_size + lane_idx]; if (read_slot_counter < min_slot_counter_val) { min_slot_counter_val = read_slot_counter; slab_distance = counter; } counter++; next_slab = (next_slab + 1) % set_associativity; } // Unlock the slabset after operating the slabset warp_unlock_mutex(warp_tile, set_mutex[next_set]); } } #endif #ifdef LIBCUDACXX_VERSION // Kernel to update the existing keys in the cache // Will not change the locality information template __global__ void update_kernel(const key_type* d_keys, const size_t len, const float* d_values, const size_t embedding_vec_size, const size_t capacity_in_set, const slabset* keys, float* vals, mutex* set_mutex, const size_t task_per_warp_tile) { // Lane(thread) ID within a warp_tile cg::thread_block_tile warp_tile = cg::tiled_partition(cg::this_thread_block()); const size_t lane_idx = warp_tile.thread_rank(); // Warp tile global ID const size_t warp_tile_global_idx = (blockIdx.x * (blockDim.x / warp_size)) + warp_tile.meta_group_rank(); // The index of key for this thread const size_t key_idx = (warp_tile_global_idx * task_per_warp_tile) + lane_idx; // The assigned key for this lane(thread) key_type key; // The dst slabset and the dst slab inside this set size_t src_set; size_t src_slab; // Active flag: whether current lane(thread) has unfinished task bool active = false; if (lane_idx < task_per_warp_tile) { if (key_idx < len) { active = true; key = d_keys[key_idx]; src_set = set_hasher::hash(key) % capacity_in_set; src_slab = slab_hasher::hash(key) % set_associativity; } } // Lane participate in warp_tile ballot to produce warp-level work queue unsigned active_mask = warp_tile.ballot(active); // The warp-level outer loop: finish all the tasks within the work queue while (active_mask != 0) { // Next task in the work quere, start from lower index lane(thread) int next_lane = __ffs(active_mask) - 1; // Broadcast the task and the global index to all lane in the warp_tile key_type next_key = warp_tile.shfl(key, next_lane); size_t next_idx = warp_tile.shfl(key_idx, next_lane); size_t next_set = warp_tile.shfl(src_set, next_lane); size_t next_slab = warp_tile.shfl(src_slab, next_lane); // Counter to record how many slab have been searched size_t counter = 0; // Working queue before task started const unsigned old_active_mask = active_mask; // Lock the slabset before operating the slabset warp_lock_mutex(warp_tile, set_mutex[next_set]); // The warp-level inner loop: finish a single task in the work queue while (active_mask == old_active_mask) { // When all the slabs inside a slabset have been searched, mark missing task, do nothing, task // complete if (counter >= set_associativity) { if (lane_idx == (size_t)next_lane) { active = false; } active_mask = warp_tile.ballot(active); break; } // The warp_tile read out the slab key_type read_key = keys[next_set].set_[next_slab].slab_[lane_idx]; // Compare the slab data with the target key int found_lane = __ffs(warp_tile.ballot(read_key == next_key)) - 1; // If found, mark hit task, update the value, the task is completed if (found_lane >= 0) { size_t found_offset = (next_set * set_associativity + next_slab) * warp_size + found_lane; if (lane_idx == (size_t)next_lane) { active = false; } warp_tile_copy(lane_idx, embedding_vec_size, vals + found_offset * embedding_vec_size, d_values + next_idx * embedding_vec_size); active_mask = warp_tile.ballot(active); break; } // Compare the slab data with empty key, if found empty key, mark missing task, do nothing, // task is completed if (warp_tile.ballot(read_key == empty_key) != 0) { if (lane_idx == (size_t)next_lane) { active = false; } active_mask = warp_tile.ballot(active); break; } // Not found in this slab, the task is not completed, goto searching next slab counter++; next_slab = (next_slab + 1) % set_associativity; } // Unlock the slabset after operating the slabset warp_unlock_mutex(warp_tile, set_mutex[next_set]); } } #else // Kernel to update the existing keys in the cache // Will not change the locality information template __global__ void update_kernel(const key_type* d_keys, const size_t len, const float* d_values, const size_t embedding_vec_size, const size_t capacity_in_set, volatile slabset* keys, volatile float* vals, volatile int* set_mutex, const size_t task_per_warp_tile) { // Lane(thread) ID within a warp_tile cg::thread_block_tile warp_tile = cg::tiled_partition(cg::this_thread_block()); const size_t lane_idx = warp_tile.thread_rank(); // Warp tile global ID const size_t warp_tile_global_idx = (blockIdx.x * (blockDim.x / warp_size)) + warp_tile.meta_group_rank(); // The index of key for this thread const size_t key_idx = (warp_tile_global_idx * task_per_warp_tile) + lane_idx; // The assigned key for this lane(thread) key_type key; // The dst slabset and the dst slab inside this set size_t src_set; size_t src_slab; // Active flag: whether current lane(thread) has unfinished task bool active = false; if (lane_idx < task_per_warp_tile) { if (key_idx < len) { active = true; key = d_keys[key_idx]; src_set = set_hasher::hash(key) % capacity_in_set; src_slab = slab_hasher::hash(key) % set_associativity; } } // Lane participate in warp_tile ballot to produce warp-level work queue unsigned active_mask = warp_tile.ballot(active); // The warp-level outer loop: finish all the tasks within the work queue while (active_mask != 0) { // Next task in the work quere, start from lower index lane(thread) int next_lane = __ffs(active_mask) - 1; // Broadcast the task and the global index to all lane in the warp_tile key_type next_key = warp_tile.shfl(key, next_lane); size_t next_idx = warp_tile.shfl(key_idx, next_lane); size_t next_set = warp_tile.shfl(src_set, next_lane); size_t next_slab = warp_tile.shfl(src_slab, next_lane); // Counter to record how many slab have been searched size_t counter = 0; // Working queue before task started const unsigned old_active_mask = active_mask; // Lock the slabset before operating the slabset warp_lock_mutex(warp_tile, set_mutex[next_set]); // The warp-level inner loop: finish a single task in the work queue while (active_mask == old_active_mask) { // When all the slabs inside a slabset have been searched, mark missing task, do nothing, task // complete if (counter >= set_associativity) { if (lane_idx == (size_t)next_lane) { active = false; } active_mask = warp_tile.ballot(active); break; } // The warp_tile read out the slab key_type read_key = ((volatile key_type*)(keys[next_set].set_[next_slab].slab_))[lane_idx]; // Compare the slab data with the target key int found_lane = __ffs(warp_tile.ballot(read_key == next_key)) - 1; // If found, mark hit task, update the value, the task is completed if (found_lane >= 0) { size_t found_offset = (next_set * set_associativity + next_slab) * warp_size + found_lane; if (lane_idx == (size_t)next_lane) { active = false; } warp_tile_copy(lane_idx, embedding_vec_size, (volatile float*)(vals + found_offset * embedding_vec_size), (volatile float*)(d_values + next_idx * embedding_vec_size)); active_mask = warp_tile.ballot(active); break; } // Compare the slab data with empty key, if found empty key, mark missing task, do nothing, // task is completed if (warp_tile.ballot(read_key == empty_key) != 0) { if (lane_idx == (size_t)next_lane) { active = false; } active_mask = warp_tile.ballot(active); break; } // Not found in this slab, the task is not completed, goto searching next slab counter++; next_slab = (next_slab + 1) % set_associativity; } // Unlock the slabset after operating the slabset warp_unlock_mutex(warp_tile, set_mutex[next_set]); } } #endif #ifdef LIBCUDACXX_VERSION template __global__ void dump_kernel(key_type* d_keys, size_t* d_dump_counter, const slabset* keys, mutex* set_mutex, const size_t start_set_index, const size_t end_set_index) { // Block-level counter used by all warp tiles within a block __shared__ uint32_t block_acc; // Initialize block-level counter if (threadIdx.x == 0) { block_acc = 0; } __syncthreads(); // Lane(thread) ID within a warp tile cg::thread_block_tile warp_tile = cg::tiled_partition(cg::this_thread_block()); const size_t lane_idx = warp_tile.thread_rank(); // Warp tile target slabset id const size_t set_idx = ((blockIdx.x * (blockDim.x / warp_size)) + warp_tile.meta_group_rank()) + start_set_index; // Keys dump from cache key_type read_key[set_associativity]; // Lane(thread) offset for storing each key uint32_t thread_key_offset[set_associativity]; // Warp offset for storing each key uint32_t warp_key_offset; // Block offset for storing each key __shared__ size_t block_key_offset; // Warp tile dump target slabset if (set_idx < end_set_index) { // Lock the slabset before operating the slabset warp_lock_mutex(warp_tile, set_mutex[set_idx]); // The warp tile read out the slabset for (unsigned slab_id = 0; slab_id < set_associativity; slab_id++) { // The warp tile read out a slab read_key[slab_id] = keys[set_idx].set_[slab_id].slab_[lane_idx]; } // Finish dumping the slabset, unlock the slabset warp_unlock_mutex(warp_tile, set_mutex[set_idx]); // Each lane(thread) within the warp tile calculate the offset to store its keys uint32_t warp_tile_total_keys = 0; for (unsigned slab_id = 0; slab_id < set_associativity; slab_id++) { unsigned valid_mask = warp_tile.ballot(read_key[slab_id] != empty_key); thread_key_offset[slab_id] = __popc(valid_mask & ((1U << lane_idx) - 1U)) + warp_tile_total_keys; warp_tile_total_keys = warp_tile_total_keys + __popc(valid_mask); } // Each warp tile request a unique place from the block-level counter if (lane_idx == 0) { warp_key_offset = atomicAdd(&block_acc, warp_tile_total_keys); } warp_key_offset = warp_tile.shfl(warp_key_offset, 0); } // Each block request a unique place in global memory output buffer __syncthreads(); if (threadIdx.x == 0) { block_key_offset = atomicAdd(d_dump_counter, (size_t)block_acc); } __syncthreads(); // Warp tile store the (non-empty)keys back to output buffer if (set_idx < end_set_index) { for (unsigned slab_id = 0; slab_id < set_associativity; slab_id++) { if (read_key[slab_id] != empty_key) { d_keys[block_key_offset + warp_key_offset + thread_key_offset[slab_id]] = read_key[slab_id]; } } } } #else template __global__ void dump_kernel(key_type* d_keys, size_t* d_dump_counter, volatile slabset* keys, volatile int* set_mutex, const size_t start_set_index, const size_t end_set_index) { // Block-level counter used by all warp tiles within a block __shared__ uint32_t block_acc; // Initialize block-level counter if (threadIdx.x == 0) { block_acc = 0; } __syncthreads(); // Lane(thread) ID within a warp tile cg::thread_block_tile warp_tile = cg::tiled_partition(cg::this_thread_block()); const size_t lane_idx = warp_tile.thread_rank(); // Warp tile target slabset id const size_t set_idx = ((blockIdx.x * (blockDim.x / warp_size)) + warp_tile.meta_group_rank()) + start_set_index; // Keys dump from cache key_type read_key[set_associativity]; // Lane(thread) offset for storing each key uint32_t thread_key_offset[set_associativity]; // Warp offset for storing each key uint32_t warp_key_offset; // Block offset for storing each key __shared__ size_t block_key_offset; // Warp tile dump target slabset if (set_idx < end_set_index) { // Lock the slabset before operating the slabset warp_lock_mutex(warp_tile, set_mutex[set_idx]); // The warp tile read out the slabset for (unsigned slab_id = 0; slab_id < set_associativity; slab_id++) { // The warp tile read out a slab read_key[slab_id] = ((volatile key_type*)(keys[set_idx].set_[slab_id].slab_))[lane_idx]; } // Finish dumping the slabset, unlock the slabset warp_unlock_mutex(warp_tile, set_mutex[set_idx]); // Each lane(thread) within the warp tile calculate the offset to store its keys uint32_t warp_tile_total_keys = 0; for (unsigned slab_id = 0; slab_id < set_associativity; slab_id++) { unsigned valid_mask = warp_tile.ballot(read_key[slab_id] != empty_key); thread_key_offset[slab_id] = __popc(valid_mask & ((1U << lane_idx) - 1U)) + warp_tile_total_keys; warp_tile_total_keys = warp_tile_total_keys + __popc(valid_mask); } // Each warp tile request a unique place from the block-level counter if (lane_idx == 0) { warp_key_offset = atomicAdd(&block_acc, warp_tile_total_keys); } warp_key_offset = warp_tile.shfl(warp_key_offset, 0); } // Each block request a unique place in global memory output buffer __syncthreads(); if (threadIdx.x == 0) { block_key_offset = atomicAdd(d_dump_counter, (size_t)block_acc); } __syncthreads(); // Warp tile store the (non-empty)keys back to output buffer if (set_idx < end_set_index) { for (unsigned slab_id = 0; slab_id < set_associativity; slab_id++) { if (read_key[slab_id] != empty_key) { d_keys[block_key_offset + warp_key_offset + thread_key_offset[slab_id]] = read_key[slab_id]; } } } } #endif /////////////////////////////////////////////////////////////////////////////////////////////////// #ifdef LIBCUDACXX_VERSION template gpu_cache::gpu_cache(const size_t capacity_in_set, const size_t embedding_vec_size) : capacity_in_set_(capacity_in_set), embedding_vec_size_(embedding_vec_size) { // Check parameter if (capacity_in_set_ == 0) { printf("Error: Invalid value for capacity_in_set.\n"); return; } if (embedding_vec_size_ == 0) { printf("Error: Invalid value for embedding_vec_size.\n"); return; } if (set_associativity <= 0) { printf("Error: Invalid value for set_associativity.\n"); return; } if (warp_size != 1 && warp_size != 2 && warp_size != 4 && warp_size != 8 && warp_size != 16 && warp_size != 32) { printf("Error: Invalid value for warp_size.\n"); return; } // Get the current CUDA dev CUDA_CHECK(cudaGetDevice(&dev_)); // Calculate # of slot num_slot_ = capacity_in_set_ * set_associativity * warp_size; // Allocate GPU memory for cache CUDA_CHECK(cudaMalloc((void**)&keys_, sizeof(slabset) * capacity_in_set_)); CUDA_CHECK(cudaMalloc((void**)&vals_, sizeof(float) * embedding_vec_size_ * num_slot_)); CUDA_CHECK(cudaMalloc((void**)&slot_counter_, sizeof(ref_counter_type) * num_slot_)); CUDA_CHECK(cudaMalloc((void**)&global_counter_, sizeof(atomic_ref_counter_type))); // Allocate GPU memory for set mutex CUDA_CHECK(cudaMalloc((void**)&set_mutex_, sizeof(mutex) * capacity_in_set_)); // Initialize the cache, set all entry to unused init_cache<<<((num_slot_ - 1) / BLOCK_SIZE_) + 1, BLOCK_SIZE_>>>( keys_, slot_counter_, global_counter_, num_slot_, empty_key, set_mutex_, capacity_in_set_); // Wait for initialization to finish CUDA_CHECK(cudaStreamSynchronize(0)); CUDA_CHECK(cudaGetLastError()); } #else template gpu_cache::gpu_cache(const size_t capacity_in_set, const size_t embedding_vec_size) : capacity_in_set_(capacity_in_set), embedding_vec_size_(embedding_vec_size) { // Check parameter if (capacity_in_set_ == 0) { printf("Error: Invalid value for capacity_in_set.\n"); return; } if (embedding_vec_size_ == 0) { printf("Error: Invalid value for embedding_vec_size.\n"); return; } if (set_associativity <= 0) { printf("Error: Invalid value for set_associativity.\n"); return; } if (warp_size != 1 && warp_size != 2 && warp_size != 4 && warp_size != 8 && warp_size != 16 && warp_size != 32) { printf("Error: Invalid value for warp_size.\n"); return; } // Get the current CUDA dev CUDA_CHECK(cudaGetDevice(&dev_)); // Calculate # of slot num_slot_ = capacity_in_set_ * set_associativity * warp_size; // Allocate GPU memory for cache CUDA_CHECK(cudaMalloc((void**)&keys_, sizeof(slabset) * capacity_in_set_)); CUDA_CHECK(cudaMalloc((void**)&vals_, sizeof(float) * embedding_vec_size_ * num_slot_)); CUDA_CHECK(cudaMalloc((void**)&slot_counter_, sizeof(ref_counter_type) * num_slot_)); CUDA_CHECK(cudaMalloc((void**)&global_counter_, sizeof(ref_counter_type))); // Allocate GPU memory for set mutex CUDA_CHECK(cudaMalloc((void**)&set_mutex_, sizeof(int) * capacity_in_set_)); // Initialize the cache, set all entry to unused init_cache<<<((num_slot_ - 1) / BLOCK_SIZE_) + 1, BLOCK_SIZE_>>>( keys_, slot_counter_, global_counter_, num_slot_, empty_key, set_mutex_, capacity_in_set_); // Wait for initialization to finish CUDA_CHECK(cudaStreamSynchronize(0)); CUDA_CHECK(cudaGetLastError()); } #endif #ifdef LIBCUDACXX_VERSION template gpu_cache::~gpu_cache() { // Device Restorer nv::CudaDeviceRestorer dev_restorer; // Check device dev_restorer.check_device(dev_); // Destruct CUDA std object destruct_kernel<<<((capacity_in_set_ - 1) / BLOCK_SIZE_) + 1, BLOCK_SIZE_>>>( global_counter_, set_mutex_, capacity_in_set_); // Wait for destruction to finish CUDA_CHECK(cudaStreamSynchronize(0)); // Free GPU memory for cache CUDA_CHECK(cudaFree(keys_)); CUDA_CHECK(cudaFree(vals_)); CUDA_CHECK(cudaFree(slot_counter_)); CUDA_CHECK(cudaFree(global_counter_)); // Free GPU memory for set mutex CUDA_CHECK(cudaFree(set_mutex_)); } #else template gpu_cache::~gpu_cache() noexcept(false) { // Device Restorer nv::CudaDeviceRestorer dev_restorer; // Check device dev_restorer.check_device(dev_); // Free GPU memory for cache CUDA_CHECK(cudaFree(keys_)); CUDA_CHECK(cudaFree(vals_)); CUDA_CHECK(cudaFree(slot_counter_)); CUDA_CHECK(cudaFree(global_counter_)); // Free GPU memory for set mutex CUDA_CHECK(cudaFree(set_mutex_)); } #endif #ifdef LIBCUDACXX_VERSION template void gpu_cache::Query(const key_type* d_keys, const size_t len, float* d_values, uint64_t* d_missing_index, key_type* d_missing_keys, size_t* d_missing_len, cudaStream_t stream, const size_t task_per_warp_tile) { // Device Restorer nv::CudaDeviceRestorer dev_restorer; // Check device dev_restorer.check_device(dev_); // Check if it is a valid query if (len == 0) { // Set the d_missing_len to 0 before return CUDA_CHECK(cudaMemsetAsync(d_missing_len, 0, sizeof(size_t), stream)); return; } // Update the global counter as user perform a new(most recent) read operation to the cache // Resolve distance overflow issue as well. update_kernel_overflow_ignore <<<1, 1, 0, stream>>>(global_counter_, d_missing_len); // Read from the cache // Touch and refresh the hitting slot const size_t keys_per_block = (BLOCK_SIZE_ / warp_size) * task_per_warp_tile; const size_t grid_size = ((len - 1) / keys_per_block) + 1; get_kernel<<>>( d_keys, len, d_values, embedding_vec_size_, d_missing_index, d_missing_keys, d_missing_len, global_counter_, slot_counter_, capacity_in_set_, keys_, vals_, set_mutex_, task_per_warp_tile); // Check for GPU error before return CUDA_CHECK(cudaGetLastError()); } #else template void gpu_cache::Query(const key_type* d_keys, const size_t len, float* d_values, uint64_t* d_missing_index, key_type* d_missing_keys, size_t* d_missing_len, cudaStream_t stream, const size_t task_per_warp_tile) { // Device Restorer nv::CudaDeviceRestorer dev_restorer; // Check device dev_restorer.check_device(dev_); // Check if it is a valid query if (len == 0) { // Set the d_missing_len to 0 before return CUDA_CHECK(cudaMemsetAsync(d_missing_len, 0, sizeof(size_t), stream)); return; } // Update the global counter as user perform a new(most recent) read operation to the cache // Resolve distance overflow issue as well. update_kernel_overflow_ignore <<<1, 1, 0, stream>>>(global_counter_, d_missing_len); // Read from the cache // Touch and refresh the hitting slot const size_t keys_per_block = (BLOCK_SIZE_ / warp_size) * task_per_warp_tile; const size_t grid_size = ((len - 1) / keys_per_block) + 1; get_kernel<<>>( d_keys, len, d_values, embedding_vec_size_, d_missing_index, d_missing_keys, d_missing_len, global_counter_, slot_counter_, capacity_in_set_, keys_, vals_, set_mutex_, task_per_warp_tile); // Check for GPU error before return CUDA_CHECK(cudaGetLastError()); } #endif #ifdef LIBCUDACXX_VERSION template void gpu_cache::Replace(const key_type* d_keys, const size_t len, const float* d_values, cudaStream_t stream, const size_t task_per_warp_tile) { // Check if it is a valid replacement if (len == 0) { return; } // Device Restorer nv::CudaDeviceRestorer dev_restorer; // Check device dev_restorer.check_device(dev_); // Try to insert the paris into the cache as long as there are unused slot // Then replace the pairs into the cache const size_t keys_per_block = (BLOCK_SIZE_ / warp_size) * task_per_warp_tile; const size_t grid_size = ((len - 1) / keys_per_block) + 1; insert_replace_kernel <<>>(d_keys, d_values, embedding_vec_size_, len, keys_, vals_, slot_counter_, set_mutex_, global_counter_, capacity_in_set_, task_per_warp_tile); // Check for GPU error before return CUDA_CHECK(cudaGetLastError()); } #else template void gpu_cache::Replace(const key_type* d_keys, const size_t len, const float* d_values, cudaStream_t stream, const size_t task_per_warp_tile) { // Check if it is a valid replacement if (len == 0) { return; } // Device Restorer nv::CudaDeviceRestorer dev_restorer; // Check device dev_restorer.check_device(dev_); // Try to insert the paris into the cache as long as there are unused slot // Then replace the pairs into the cache const size_t keys_per_block = (BLOCK_SIZE_ / warp_size) * task_per_warp_tile; const size_t grid_size = ((len - 1) / keys_per_block) + 1; insert_replace_kernel<<>>( d_keys, d_values, embedding_vec_size_, len, keys_, vals_, slot_counter_, set_mutex_, global_counter_, capacity_in_set_, task_per_warp_tile); // Check for GPU error before return CUDA_CHECK(cudaGetLastError()); } #endif #ifdef LIBCUDACXX_VERSION template void gpu_cache::Update(const key_type* d_keys, const size_t len, const float* d_values, cudaStream_t stream, const size_t task_per_warp_tile) { // Check if it is a valid update request if (len == 0) { return; } // Device Restorer nv::CudaDeviceRestorer dev_restorer; // Check device dev_restorer.check_device(dev_); // Update the value of input keys that are existed in the cache const size_t keys_per_block = (BLOCK_SIZE_ / warp_size) * task_per_warp_tile; const size_t grid_size = ((len - 1) / keys_per_block) + 1; update_kernel<<>>( d_keys, len, d_values, embedding_vec_size_, capacity_in_set_, keys_, vals_, set_mutex_, task_per_warp_tile); // Check for GPU error before return CUDA_CHECK(cudaGetLastError()); } #else template void gpu_cache::Update(const key_type* d_keys, const size_t len, const float* d_values, cudaStream_t stream, const size_t task_per_warp_tile) { // Check if it is a valid update request if (len == 0) { return; } // Device Restorer nv::CudaDeviceRestorer dev_restorer; // Check device dev_restorer.check_device(dev_); // Update the value of input keys that are existed in the cache const size_t keys_per_block = (BLOCK_SIZE_ / warp_size) * task_per_warp_tile; const size_t grid_size = ((len - 1) / keys_per_block) + 1; update_kernel <<>>(d_keys, len, d_values, embedding_vec_size_, capacity_in_set_, keys_, vals_, set_mutex_, task_per_warp_tile); // Check for GPU error before return CUDA_CHECK(cudaGetLastError()); } #endif #ifdef LIBCUDACXX_VERSION template void gpu_cache::Dump(key_type* d_keys, size_t* d_dump_counter, const size_t start_set_index, const size_t end_set_index, cudaStream_t stream) { // Check if it is a valid dump request if (start_set_index >= capacity_in_set_) { printf("Error: Invalid value for start_set_index. Nothing dumped.\n"); return; } if (end_set_index <= start_set_index || end_set_index > capacity_in_set_) { printf("Error: Invalid value for end_set_index. Nothing dumped.\n"); return; } // Device Restorer nv::CudaDeviceRestorer dev_restorer; // Check device dev_restorer.check_device(dev_); // Set the global counter to 0 first CUDA_CHECK(cudaMemsetAsync(d_dump_counter, 0, sizeof(size_t), stream)); // Dump keys from the cache const size_t grid_size = (((end_set_index - start_set_index) - 1) / (BLOCK_SIZE_ / warp_size)) + 1; dump_kernel <<>>(d_keys, d_dump_counter, keys_, set_mutex_, start_set_index, end_set_index); // Check for GPU error before return CUDA_CHECK(cudaGetLastError()); } #else template void gpu_cache::Dump(key_type* d_keys, size_t* d_dump_counter, const size_t start_set_index, const size_t end_set_index, cudaStream_t stream) { // Check if it is a valid dump request if (start_set_index >= capacity_in_set_) { printf("Error: Invalid value for start_set_index. Nothing dumped.\n"); return; } if (end_set_index <= start_set_index || end_set_index > capacity_in_set_) { printf("Error: Invalid value for end_set_index. Nothing dumped.\n"); return; } // Device Restorer nv::CudaDeviceRestorer dev_restorer; // Check device dev_restorer.check_device(dev_); // Set the global counter to 0 first CUDA_CHECK(cudaMemsetAsync(d_dump_counter, 0, sizeof(size_t), stream)); // Dump keys from the cache const size_t grid_size = (((end_set_index - start_set_index) - 1) / (BLOCK_SIZE_ / warp_size)) + 1; dump_kernel <<>>(d_keys, d_dump_counter, keys_, set_mutex_, start_set_index, end_set_index); // Check for GPU error before return CUDA_CHECK(cudaGetLastError()); } #endif template class gpu_cache::max(), SET_ASSOCIATIVITY, SLAB_SIZE>; template class gpu_cache::max(), SET_ASSOCIATIVITY, SLAB_SIZE>; } // namespace gpu_cache ================================================ FILE: tools/README.md ================================================ # DGL Utility Scripts This folder contains the utilities that do not belong to DGL core package as standalone executable scripts. ## Graph Chunking `chunk_graph.py` provides an example of chunking an existing DGLGraph object into the on-disk [chunked graph format](http://13.231.216.217/guide/distributed-preprocessing.html#chunked-graph-format). An example of chunking the OGB MAG240M dataset: ```python import ogb.lsc dataset = ogb.lsc.MAG240MDataset('.') etypes = [ ('paper', 'cites', 'paper'), ('author', 'writes', 'paper'), ('author', 'affiliated_with', 'institution')] g = dgl.heterograph({k: tuple(dataset.edge_index(*k)) for k in etypes}) chunk_graph( g, 'mag240m', {'paper': { 'feat': 'mag240m_kddcup2021/processed/paper/node_feat.npy', 'label': 'mag240m_kddcup2021/processed/paper/node_label.npy', 'year': 'mag240m_kddcup2021/processed/paper/node_year.npy'}}, {}, 4, 'output') ``` The output chunked graph metadata will go as follows (assuming the current directory as `/home/user`: ```json { "graph_name": "mag240m", "node_type": [ "author", "institution", "paper" ], "num_nodes_per_chunk": [ [ 30595778, 30595778, 30595778, 30595778 ], [ 6431, 6430, 6430, 6430 ], [ 30437917, 30437917, 30437916, 30437916 ] ], "edge_type": [ "author:affiliated_with:institution", "author:writes:paper", "paper:cites:paper" ], "num_edges_per_chunk": [ [ 11148147, 11148147, 11148146, 11148146 ], [ 96505680, 96505680, 96505680, 96505680 ], [ 324437232, 324437232, 324437231, 324437231 ] ], "edges": { "author:affiliated_with:institution": { "format": { "name": "csv", "delimiter": " " }, "data": [ "/home/user/output/edge_index/author:affiliated_with:institution0.txt", "/home/user/output/edge_index/author:affiliated_with:institution1.txt", "/home/user/output/edge_index/author:affiliated_with:institution2.txt", "/home/user/output/edge_index/author:affiliated_with:institution3.txt" ] }, "author:writes:paper": { "format": { "name": "csv", "delimiter": " " }, "data": [ "/home/user/output/edge_index/author:writes:paper0.txt", "/home/user/output/edge_index/author:writes:paper1.txt", "/home/user/output/edge_index/author:writes:paper2.txt", "/home/user/output/edge_index/author:writes:paper3.txt" ] }, "paper:cites:paper": { "format": { "name": "csv", "delimiter": " " }, "data": [ "/home/user/output/edge_index/paper:cites:paper0.txt", "/home/user/output/edge_index/paper:cites:paper1.txt", "/home/user/output/edge_index/paper:cites:paper2.txt", "/home/user/output/edge_index/paper:cites:paper3.txt" ] } }, "node_data": { "paper": { "feat": { "format": { "name": "numpy" }, "data": [ "/home/user/output/node_data/paper/feat-0.npy", "/home/user/output/node_data/paper/feat-1.npy", "/home/user/output/node_data/paper/feat-2.npy", "/home/user/output/node_data/paper/feat-3.npy" ] }, "label": { "format": { "name": "numpy" }, "data": [ "/home/user/output/node_data/paper/label-0.npy", "/home/user/output/node_data/paper/label-1.npy", "/home/user/output/node_data/paper/label-2.npy", "/home/user/output/node_data/paper/label-3.npy" ] }, "year": { "format": { "name": "numpy" }, "data": [ "/home/user/output/node_data/paper/year-0.npy", "/home/user/output/node_data/paper/year-1.npy", "/home/user/output/node_data/paper/year-2.npy", "/home/user/output/node_data/paper/year-3.npy" ] } } }, "edge_data": {} } ``` ## Change edge type to canonical edge type for partition configuration json In the upcoming DGL v1.0, we will require the partition configuration file to contain only canonical edge type. This tool is designed to help migrating existing configuration files from old style to new one. ### Sample Usage ``` python tools/change_etype_to_canonical_etype.py --part_config "{configuration file path}" ``` ### Requirement Partition algorithms produce one configuration file and multiple data folders, and each data folder corresponds to a partition. **This tool needs to read from the partition configuration file (specified by the commandline argument) *and* the graph structure data (stored in `graph.dgl` under the data folder) of the first partition.** They can be local files or shared files among network, if you follow this [official tutorial](https://docs.dgl.ai/en/latest/tutorials/dist/1_node_classification.html#sphx-glr-tutorials-dist-1-node-classification-py) for distributed training, you don't need to care about this as all files are shared by every participant through NFS. **For example, below is a typical data folder expected by this tool:** ``` data_root_dir/ |-- graph_name.json # specified by part_config |-- part0/ ... |-- graph.dgl ... ``` For more information about partition algorithm, see https://docs.dgl.ai/en/latest/generated/dgl.distributed.partition.partition_graph.html. ### Input arguments 1. *part_config*: The path of partition json file. < **Required**> ### Result This tool changes the key of ``etypes`` and ``edge_map`` from format ``str`` to ``str:str:str`` and it overwrites the original file instead of creating a new one. E.g. **File content before running the script** ```json { "edge_map": { "r1": [ [ 0, 6 ], [ 16, 20 ] ], "r2": [ [ 6, 11 ], [ 20, 25 ] ], "r3": [ [ 11, 16 ], [ 25, 30 ] ] }, "etypes": { "r1": 0, "r2": 1, "r3": 2 }, ... } ``` **After running** ```json { "edge_map": { "n1:r1:n2": [ [ 0, 6 ], [ 16, 20 ] ], "n1:r2:n3": [ [ 6, 11 ], [ 20, 25 ] ], "n2:r3:n3": [ [ 11, 16 ], [ 25, 30 ] ] }, "etypes": { "n1:r1:n2": 0, "n1:r2:n3": 1, "n2:r3:n3": 2 } ... } ``` ================================================ FILE: tools/change_etype_to_canonical_etype.py ================================================ import argparse import json import logging import os import time import dgl import torch from dgl._ffi.base import DGLError from dgl.data.utils import load_graphs from dgl.utils import toindex ETYPES_KEY = "etypes" EDGE_MAP_KEY = "edge_map" NTYPES_KEY = "ntypes" NUM_PARTS_KEY = "num_parts" CANONICAL_ETYPE_DELIMITER = ":" def convert_conf(part_config): with open(part_config, "r+", encoding="utf-8") as f: config = json.load(f) logging.info("Checking if the provided json file need to be changed.") if is_old_version(config): logging.info("Changing the partition configuration file.") canonical_etypes = {} if len(config[NTYPES_KEY]) == 1: ntype = list(config[NTYPES_KEY].keys())[0] canonical_etypes = { CANONICAL_ETYPE_DELIMITER.join((ntype, etype, ntype)): eid for etype, eid in config[ETYPES_KEY].items() } else: canonical_etypes = etype2canonical_etype(part_config, config) reverse_c_etypes = {v: k for k, v in canonical_etypes.items()} # Convert edge_map keys from etype -> c_etype. new_edge_map = {} for e_type, range in config[EDGE_MAP_KEY].items(): eid = config[ETYPES_KEY][e_type] c_etype = reverse_c_etypes[eid] new_edge_map[c_etype] = range config[EDGE_MAP_KEY] = new_edge_map config[ETYPES_KEY] = canonical_etypes logging.info("Dumping the content to disk.") f.seek(0) json.dump(config, f, indent=4) f.truncate() def etype2canonical_etype(part_config, config): num_parts = config[NUM_PARTS_KEY] edge_map = config[EDGE_MAP_KEY] etypes = list(edge_map.keys()) # Get part id of each seed edge. partition_ids = [] for _, bound in edge_map.items(): for i in range(num_parts): if bound[i][1] > bound[i][0]: partition_ids.append(i) break partition_ids = torch.tensor(partition_ids) # Get starting index of each partition. shifts = [] for i in range(num_parts): shifts.append(edge_map[etypes[0]][i][0]) shifts = torch.tensor(shifts) canonical_etypes = {} part_ids = [ part_id for part_id in range(num_parts) if part_id in partition_ids ] for part_id in part_ids: seed_etypes = [ etypes[i] for i in range(len(etypes)) if partition_ids[i] == part_id ] c_etype = _find_c_etypes_in_partition( part_id, seed_etypes, config[ETYPES_KEY], config[NTYPES_KEY], edge_map, shifts, part_config, ) canonical_etypes.update(c_etype) return canonical_etypes def _find_c_etypes_in_partition( part_id, seed_etypes, etypes, ntypes, edge_map, shifts, config_path ): try: folder = os.path.dirname(os.path.realpath(config_path)) local_g = load_graphs(f"{folder}/part{part_id}/graph.dgl")[0][0] local_eids = [ edge_map[etype][part_id][0] - shifts[part_id] for etype in seed_etypes ] local_eids = toindex(torch.tensor(local_eids)) local_eids = local_eids.tousertensor() local_src, local_dst = local_g.find_edges(local_eids) src_ntids, dst_ntids = ( local_g.ndata[dgl.NTYPE][local_src], local_g.ndata[dgl.NTYPE][local_dst], ) ntypes = {v: k for k, v in ntypes.items()} src_ntypes = [ntypes[ntid.item()] for ntid in src_ntids] dst_ntypes = [ntypes[ntid.item()] for ntid in dst_ntids] c_etypes = list(zip(src_ntypes, seed_etypes, dst_ntypes)) c_etypes = [ CANONICAL_ETYPE_DELIMITER.join(c_etype) for c_etype in c_etypes ] return {k: etypes[v] for (k, v) in zip(c_etypes, seed_etypes)} except DGLError as e: print(e) logging.fatal( f"Graph data of partition {part_id} is requested but not found." ) def is_old_version(config): first_etype = list(config[ETYPES_KEY].keys())[0] etype_tuple = first_etype.split(CANONICAL_ETYPE_DELIMITER) return len(etype_tuple) == 1 if __name__ == "__main__": parser = argparse.ArgumentParser( description="Change edge type in config file from format (str)" " to (str,str,str), the original file will be overwritten", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( "--part_config", type=str, help="The file of the partition config" ) args, _ = parser.parse_known_args() assert ( args.part_config is not None ), "A user has to specify a partition config file with --part_config." start = time.time() convert_conf(args.part_config) end = time.time() logging.info(f"elplased time in seconds: {end - start}") ================================================ FILE: tools/chunk_graph.py ================================================ # See the __main__ block for usage of chunk_graph(). import json import logging import os import pathlib from contextlib import contextmanager import dgl import torch from distpartitioning import array_readwriter from files import setdir def chunk_numpy_array(arr, fmt_meta, chunk_sizes, path_fmt): paths = [] offset = 0 for j, n in enumerate(chunk_sizes): path = os.path.abspath(path_fmt % j) arr_chunk = arr[offset : offset + n] logging.info("Chunking %d-%d" % (offset, offset + n)) array_readwriter.get_array_parser(**fmt_meta).write(path, arr_chunk) offset += n paths.append(path) return paths def _chunk_graph( g, name, ndata_paths, edata_paths, num_chunks, output_path, data_fmt ): # First deal with ndata and edata that are homogeneous (i.e. not a dict-of-dict) if len(g.ntypes) == 1 and not isinstance( next(iter(ndata_paths.values())), dict ): ndata_paths = {g.ntypes[0]: ndata_paths} if len(g.etypes) == 1 and not isinstance( next(iter(edata_paths.values())), dict ): edata_paths = {g.etypes[0]: ndata_paths} # Then convert all edge types to canonical edge types etypestrs = {etype: ":".join(etype) for etype in g.canonical_etypes} edata_paths = { ":".join(g.to_canonical_etype(k)): v for k, v in edata_paths.items() } metadata = {} metadata["graph_name"] = name metadata["node_type"] = g.ntypes # Compute the number of nodes per chunk per node type metadata["num_nodes_per_chunk"] = num_nodes_per_chunk = [] for ntype in g.ntypes: num_nodes = g.num_nodes(ntype) num_nodes_list = [] for i in range(num_chunks): n = num_nodes // num_chunks + (i < num_nodes % num_chunks) num_nodes_list.append(n) num_nodes_per_chunk.append(num_nodes_list) num_nodes_per_chunk_dict = { k: v for k, v in zip(g.ntypes, num_nodes_per_chunk) } metadata["edge_type"] = [etypestrs[etype] for etype in g.canonical_etypes] # Compute the number of edges per chunk per edge type metadata["num_edges_per_chunk"] = num_edges_per_chunk = [] for etype in g.canonical_etypes: num_edges = g.num_edges(etype) num_edges_list = [] for i in range(num_chunks): n = num_edges // num_chunks + (i < num_edges % num_chunks) num_edges_list.append(n) num_edges_per_chunk.append(num_edges_list) num_edges_per_chunk_dict = { k: v for k, v in zip(g.canonical_etypes, num_edges_per_chunk) } # Split edge index metadata["edges"] = {} with setdir("edge_index"): for etype in g.canonical_etypes: etypestr = etypestrs[etype] logging.info("Chunking edge index for %s" % etypestr) edges_meta = {} fmt_meta = {"name": "csv", "delimiter": " "} edges_meta["format"] = fmt_meta srcdst = torch.stack(g.edges(etype=etype), 1) edges_meta["data"] = chunk_numpy_array( srcdst.numpy(), fmt_meta, num_edges_per_chunk_dict[etype], etypestr + "%d.txt", ) metadata["edges"][etypestr] = edges_meta # Chunk node data reader_fmt_meta, writer_fmt_meta = {"name": "numpy"}, {"name": data_fmt} file_suffix = "npy" if data_fmt == "numpy" else "parquet" metadata["node_data"] = {} with setdir("node_data"): for ntype, ndata_per_type in ndata_paths.items(): ndata_meta = {} with setdir(ntype): for key, path in ndata_per_type.items(): logging.info( "Chunking node data for type %s key %s" % (ntype, key) ) ndata_key_meta = {} arr = array_readwriter.get_array_parser( **reader_fmt_meta ).read(path) ndata_key_meta["format"] = writer_fmt_meta ndata_key_meta["data"] = chunk_numpy_array( arr, writer_fmt_meta, num_nodes_per_chunk_dict[ntype], key + "-%d." + file_suffix, ) ndata_meta[key] = ndata_key_meta metadata["node_data"][ntype] = ndata_meta # Chunk edge data metadata["edge_data"] = {} with setdir("edge_data"): for etypestr, edata_per_type in edata_paths.items(): edata_meta = {} with setdir(etypestr): for key, path in edata_per_type.items(): logging.info( "Chunking edge data for type %s key %s" % (etypestr, key) ) edata_key_meta = {} arr = array_readwriter.get_array_parser( **reader_fmt_meta ).read(path) edata_key_meta["format"] = writer_fmt_meta etype = tuple(etypestr.split(":")) edata_key_meta["data"] = chunk_numpy_array( arr, writer_fmt_meta, num_edges_per_chunk_dict[etype], key + "-%d." + file_suffix, ) edata_meta[key] = edata_key_meta metadata["edge_data"][etypestr] = edata_meta metadata_path = "metadata.json" with open(metadata_path, "w") as f: json.dump(metadata, f, sort_keys=True, indent=4) logging.info("Saved metadata in %s" % os.path.abspath(metadata_path)) def chunk_graph( g, name, ndata_paths, edata_paths, num_chunks, output_path, data_fmt="numpy" ): """ Split the graph into multiple chunks. A directory will be created at :attr:`output_path` with the metadata and chunked edge list as well as the node/edge data. Parameters ---------- g : DGLGraph The graph. name : str The name of the graph, to be used later in DistDGL training. ndata_paths : dict[str, pathlike] or dict[ntype, dict[str, pathlike]] The dictionary of paths pointing to the corresponding numpy array file for each node data key. edata_paths : dict[etype, pathlike] or dict[etype, dict[str, pathlike]] The dictionary of paths pointing to the corresponding numpy array file for each edge data key. ``etype`` could be canonical or non-canonical. num_chunks : int The number of chunks output_path : pathlike The output directory saving the chunked graph. """ for ntype, ndata in ndata_paths.items(): for key in ndata.keys(): ndata[key] = os.path.abspath(ndata[key]) for etype, edata in edata_paths.items(): for key in edata.keys(): edata[key] = os.path.abspath(edata[key]) with setdir(output_path): _chunk_graph( g, name, ndata_paths, edata_paths, num_chunks, output_path, data_fmt ) if __name__ == "__main__": logging.basicConfig(level="INFO") input_dir = "/data" output_dir = "/chunked-data" (g,), _ = dgl.load_graphs(os.path.join(input_dir, "graph.dgl")) chunk_graph( g, "mag240m", { "paper": { "feat": os.path.join(input_dir, "paper/feat.npy"), "label": os.path.join(input_dir, "paper/label.npy"), "year": os.path.join(input_dir, "paper/year.npy"), } }, { "cites": {"count": os.path.join(input_dir, "cites/count.npy")}, "writes": {"year": os.path.join(input_dir, "writes/year.npy")}, # you can put the same data file if they indeed share the features. "rev_writes": {"year": os.path.join(input_dir, "writes/year.npy")}, }, 4, output_dir, ) # The generated metadata goes as in tools/sample-config/mag240m-metadata.json. ================================================ FILE: tools/copy_files.py ================================================ """Copy the partitions to a cluster of machines.""" import argparse import copy import json import logging import os import signal import stat import subprocess import sys def copy_file(file_name, ip, workspace, param=""): print("copy {} to {}".format(file_name, ip + ":" + workspace + "/")) cmd = "scp " + param + " " + file_name + " " + ip + ":" + workspace + "/" subprocess.check_call(cmd, shell=True) def exec_cmd(ip, cmd): cmd = "ssh -o StrictHostKeyChecking=no " + ip + " '" + cmd + "'" subprocess.check_call(cmd, shell=True) def main(): parser = argparse.ArgumentParser(description="Copy data to the servers.") parser.add_argument( "--workspace", type=str, required=True, help="Path of user directory of distributed tasks. \ This is used to specify a destination location where \ data are copied to on remote machines.", ) parser.add_argument( "--rel_data_path", type=str, required=True, help="Relative path in workspace to store the partition data.", ) parser.add_argument( "--part_config", type=str, required=True, help="The partition config file. The path is on the local machine.", ) parser.add_argument( "--script_folder", type=str, required=True, help="The folder contains all the user code scripts.", ) parser.add_argument( "--ip_config", type=str, required=True, help="The file of IP configuration for servers. \ The path is on the local machine.", ) args = parser.parse_args() hosts = [] with open(args.ip_config) as f: for line in f: res = line.strip().split(" ") ip = res[0] hosts.append(ip) # We need to update the partition config file so that the paths are relative to # the workspace in the remote machines. with open(args.part_config) as conf_f: part_metadata = json.load(conf_f) tmp_part_metadata = copy.deepcopy(part_metadata) num_parts = part_metadata["num_parts"] assert num_parts == len( hosts ), "The number of partitions needs to be the same as the number of hosts." graph_name = part_metadata["graph_name"] node_map = part_metadata["node_map"] edge_map = part_metadata["edge_map"] if not isinstance(node_map, dict): assert ( node_map[-4:] == ".npy" ), "node map should be stored in a NumPy array." tmp_part_metadata["node_map"] = "{}/{}/node_map.npy".format( args.workspace, args.rel_data_path ) if not isinstance(edge_map, dict): assert ( edge_map[-4:] == ".npy" ), "edge map should be stored in a NumPy array." tmp_part_metadata["edge_map"] = "{}/{}/edge_map.npy".format( args.workspace, args.rel_data_path ) for part_id in range(num_parts): part_files = tmp_part_metadata["part-{}".format(part_id)] part_files["edge_feats"] = "{}/part{}/edge_feat.dgl".format( args.rel_data_path, part_id ) part_files["node_feats"] = "{}/part{}/node_feat.dgl".format( args.rel_data_path, part_id ) part_files["part_graph"] = "{}/part{}/graph.dgl".format( args.rel_data_path, part_id ) tmp_part_config = "/tmp/{}.json".format(graph_name) with open(tmp_part_config, "w") as outfile: json.dump(tmp_part_metadata, outfile, sort_keys=True, indent=4) # Copy ip config. for part_id, ip in enumerate(hosts): remote_path = "{}/{}".format(args.workspace, args.rel_data_path) exec_cmd(ip, "mkdir -p {}".format(remote_path)) copy_file(args.ip_config, ip, args.workspace) copy_file( tmp_part_config, ip, "{}/{}".format(args.workspace, args.rel_data_path), ) node_map = part_metadata["node_map"] edge_map = part_metadata["edge_map"] if not isinstance(node_map, dict): copy_file(node_map, ip, tmp_part_metadata["node_map"]) if not isinstance(edge_map, dict): copy_file(edge_map, ip, tmp_part_metadata["edge_map"]) remote_path = "{}/{}/part{}".format( args.workspace, args.rel_data_path, part_id ) exec_cmd(ip, "mkdir -p {}".format(remote_path)) part_files = part_metadata["part-{}".format(part_id)] copy_file(part_files["node_feats"], ip, remote_path) copy_file(part_files["edge_feats"], ip, remote_path) copy_file(part_files["part_graph"], ip, remote_path) # copy script folder copy_file(args.script_folder, ip, args.workspace, "-r") def signal_handler(signal, frame): logging.info("Stop copying") sys.exit(0) if __name__ == "__main__": fmt = "%(asctime)s %(levelname)s %(message)s" logging.basicConfig(format=fmt, level=logging.INFO) signal.signal(signal.SIGINT, signal_handler) main() ================================================ FILE: tools/dispatch_data.py ================================================ """Launching distributed graph partitioning pipeline """ import argparse import json import logging import os import sys from partition_algo.base import load_partition_meta INSTALL_DIR = os.path.abspath(os.path.join(__file__, "..")) LAUNCH_SCRIPT = "distgraphlaunch.py" PIPELINE_SCRIPT = "distpartitioning/data_proc_pipeline.py" UDF_WORLD_SIZE = "world-size" UDF_PART_DIR = "partitions-dir" UDF_INPUT_DIR = "input-dir" UDF_GRAPH_NAME = "graph-name" UDF_SCHEMA = "schema" UDF_NUM_PARTS = "num-parts" UDF_OUT_DIR = "output" LARG_PROCS_MACHINE = "num_proc_per_machine" LARG_IPCONF = "ip_config" LARG_MASTER_PORT = "master_port" LARG_SSH_PORT = "ssh_port" def get_launch_cmd(args) -> str: cmd = sys.executable + " " + os.path.join(INSTALL_DIR, LAUNCH_SCRIPT) cmd = f"{cmd} --{LARG_SSH_PORT} {args.ssh_port} " cmd = f"{cmd} --{LARG_PROCS_MACHINE} 1 " cmd = f"{cmd} --{LARG_IPCONF} {args.ip_config} " cmd = f"{cmd} --{LARG_MASTER_PORT} {args.master_port} " return cmd def submit_jobs(args) -> str: # read the json file and get the remaining argument here. schema_path = args.metadata_filename with open(os.path.join(args.in_dir, schema_path)) as schema: schema_map = json.load(schema) graph_name = schema_map["graph_name"] # retrieve num_parts num_parts = 0 partition_path = os.path.join(args.partitions_dir, "partition_meta.json") if os.path.isfile(partition_path): part_meta = load_partition_meta(partition_path) num_parts = part_meta.num_parts assert ( num_parts != 0 ), f"Invalid value for no. of partitions. Please check partition_meta.json file." # verify ip_config with open(args.ip_config, "r") as f: num_ips = len(f.readlines()) assert ( num_parts % num_ips == 0 ), f"The num_parts[{args.num_parts}] should be a multiple of number of lines(ip addresses)[{args.ip_config}]." argslist = "" argslist += "--world-size {} ".format(num_ips) argslist += "--partitions-dir {} ".format( os.path.abspath(args.partitions_dir) ) argslist += "--input-dir {} ".format(os.path.abspath(args.in_dir)) argslist += "--graph-name {} ".format(graph_name) argslist += "--schema {} ".format(schema_path) argslist += "--num-parts {} ".format(num_parts) argslist += "--output {} ".format(os.path.abspath(args.out_dir)) argslist += "--process-group-timeout {} ".format(args.process_group_timeout) argslist += "--log-level {} ".format(args.log_level) argslist += "--save-orig-nids " if args.save_orig_nids else "" argslist += "--save-orig-eids " if args.save_orig_eids else "" argslist += "--use-graphbolt " if args.use_graphbolt else "" argslist += "--store-eids " if args.store_eids else "" argslist += "--store-inner-node " if args.store_inner_node else "" argslist += "--store-inner-edge " if args.store_inner_edge else "" argslist += ( f"--graph-formats {args.graph_formats} " if args.graph_formats else "" ) # (BarclayII) Is it safe to assume all the workers have the Python executable at the same path? pipeline_cmd = os.path.join(INSTALL_DIR, PIPELINE_SCRIPT) udf_cmd = f"{args.python_path} {pipeline_cmd} {argslist}" launch_cmd = get_launch_cmd(args) launch_cmd += '"' + udf_cmd + '"' print(launch_cmd) os.system(launch_cmd) def main(): parser = argparse.ArgumentParser( description="Dispatch edge index and data to partitions", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( "--in-dir", type=str, help="Location of the input directory where the dataset is located", ) parser.add_argument( "--metadata-filename", type=str, default="metadata.json", help="Filename for the metadata JSON file that describes the dataset to be dispatched.", ) parser.add_argument( "--partitions-dir", type=str, help="Location of the partition-id mapping files which define node-ids and their respective partition-ids, relative to the input directory", ) parser.add_argument( "--out-dir", type=str, help="Location of the output directory where the graph partitions will be created by this pipeline", ) parser.add_argument( "--ip-config", type=str, help="File location of IP configuration for server processes", ) parser.add_argument( "--master-port", type=int, default=12345, help="port used by gloo group to create randezvous point", ) parser.add_argument( "--log-level", required=False, type=str, help="Log level to use for execution.", default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], ) parser.add_argument( "--python-path", type=str, default=sys.executable, help="Path to the Python executable on all workers", ) parser.add_argument("--ssh-port", type=int, default=22, help="SSH Port.") parser.add_argument( "--process-group-timeout", type=int, default=1800, help="timeout[seconds] for operations executed against the process group", ) parser.add_argument( "--save-orig-nids", action="store_true", help="Save original node IDs into files", ) parser.add_argument( "--save-orig-eids", action="store_true", help="Save original edge IDs into files", ) parser.add_argument( "--use-graphbolt", action="store_true", help="Use GraphBolt for distributed partition.", ) parser.add_argument( "--store-inner-node", action="store_true", default=False, help="Store inner nodes.", ) parser.add_argument( "--store-inner-edge", action="store_true", default=False, help="Store inner edges.", ) parser.add_argument( "--store-eids", action="store_true", default=False, help="Store edge IDs.", ) parser.add_argument( "--graph-formats", type=str, default=None, help="Save partitions in specified formats. It could be any combination(joined with ``,``) " "of ``coo``, ``csc`` and ``csr``. If not specified, save one format only according to " "what format is available. If multiple formats are available, selection priority " "from high to low is ``coo``, ``csc``, ``csr``.", ) args, _ = parser.parse_known_args() fmt = "%(asctime)s %(levelname)s %(message)s" logging.basicConfig( format=fmt, level=getattr(logging, args.log_level, None), ) assert os.path.isdir(args.in_dir) assert os.path.isdir(args.partitions_dir) assert os.path.isfile(args.ip_config) assert isinstance(args.master_port, int) submit_jobs(args) if __name__ == "__main__": main() ================================================ FILE: tools/distgraphlaunch.py ================================================ """Launching tool for DGL distributed training""" import argparse import json import logging import multiprocessing import os import re import signal import stat import subprocess import sys import time from functools import partial from threading import Thread from typing import Optional DEFAULT_PORT = 30050 def cleanup_proc(get_all_remote_pids, conn): """This process tries to clean up the remote training tasks.""" print("cleanupu process runs") # This process should not handle SIGINT. signal.signal(signal.SIGINT, signal.SIG_IGN) data = conn.recv() # If the launch process exits normally, this process doesn't need to do anything. if data == "exit": sys.exit(0) else: remote_pids = get_all_remote_pids() # Otherwise, we need to ssh to each machine and kill the training jobs. for (ip, port), pids in remote_pids.items(): kill_process(ip, port, pids) print("cleanup process exits") def kill_process(ip, port, pids): """ssh to a remote machine and kill the specified processes.""" curr_pid = os.getpid() killed_pids = [] # If we kill child processes first, the parent process may create more again. This happens # to Python's process pool. After sorting, we always kill parent processes first. pids.sort() for pid in pids: assert curr_pid != pid print("kill process {} on {}:{}".format(pid, ip, port), flush=True) kill_cmd = ( "ssh -o StrictHostKeyChecking=no -p " + str(port) + " " + ip + " 'kill {}'".format(pid) ) subprocess.run(kill_cmd, shell=True) killed_pids.append(pid) # It's possible that some of the processes are not killed. Let's try again. for i in range(3): killed_pids = get_killed_pids(ip, port, killed_pids) if len(killed_pids) == 0: break else: killed_pids.sort() for pid in killed_pids: print( "kill process {} on {}:{}".format(pid, ip, port), flush=True ) kill_cmd = ( "ssh -o StrictHostKeyChecking=no -p " + str(port) + " " + ip + " 'kill -9 {}'".format(pid) ) subprocess.run(kill_cmd, shell=True) def get_killed_pids(ip, port, killed_pids): """Get the process IDs that we want to kill but are still alive.""" killed_pids = [str(pid) for pid in killed_pids] killed_pids = ",".join(killed_pids) ps_cmd = ( "ssh -o StrictHostKeyChecking=no -p " + str(port) + " " + ip + " 'ps -p {} -h'".format(killed_pids) ) res = subprocess.run(ps_cmd, shell=True, stdout=subprocess.PIPE) pids = [] for p in res.stdout.decode("utf-8").split("\n"): l = p.split() if len(l) > 0: pids.append(int(l[0])) return pids def execute_remote( cmd: str, ip: str, port: int, username: Optional[str] = "" ) -> Thread: """Execute command line on remote machine via ssh. Args: cmd: User-defined command (udf) to execute on the remote host. ip: The ip-address of the host to run the command on. port: Port number that the host is listening on. thread_list: username: Optional. If given, this will specify a username to use when issuing commands over SSH. Useful when your infra requires you to explicitly specify a username to avoid permission issues. Returns: thread: The Thread whose run() is to run the `cmd` on the remote host. Returns when the cmd completes on the remote host. """ ip_prefix = "" if username: ip_prefix += "{username}@".format(username=username) # Construct ssh command that executes `cmd` on the remote host ssh_cmd = "ssh -o StrictHostKeyChecking=no -p {port} {ip_prefix}{ip} '{cmd}'".format( port=str(port), ip_prefix=ip_prefix, ip=ip, cmd=cmd, ) # thread func to run the job def run(ssh_cmd): subprocess.check_call(ssh_cmd, shell=True) thread = Thread(target=run, args=(ssh_cmd,)) thread.setDaemon(True) thread.start() return thread def get_remote_pids(ip, port, cmd_regex): """Get the process IDs that run the command in the remote machine.""" pids = [] curr_pid = os.getpid() # Here we want to get the python processes. We may get some ssh processes, so we should filter them out. ps_cmd = ( "ssh -o StrictHostKeyChecking=no -p " + str(port) + " " + ip + " 'ps -aux | grep python | grep -v StrictHostKeyChecking'" ) res = subprocess.run(ps_cmd, shell=True, stdout=subprocess.PIPE) for p in res.stdout.decode("utf-8").split("\n"): l = p.split() if len(l) < 2: continue # We only get the processes that run the specified command. res = re.search(cmd_regex, p) if res is not None and int(l[1]) != curr_pid: pids.append(l[1]) pid_str = ",".join([str(pid) for pid in pids]) ps_cmd = ( "ssh -o StrictHostKeyChecking=no -p " + str(port) + " " + ip + " 'pgrep -P {}'".format(pid_str) ) res = subprocess.run(ps_cmd, shell=True, stdout=subprocess.PIPE) pids1 = res.stdout.decode("utf-8").split("\n") all_pids = [] for pid in set(pids + pids1): if pid == "" or int(pid) == curr_pid: continue all_pids.append(int(pid)) all_pids.sort() return all_pids def get_all_remote_pids(hosts, ssh_port, udf_command): """Get all remote processes.""" remote_pids = {} for node_id, host in enumerate(hosts): ip, _ = host # When creating training processes in remote machines, we may insert some arguments # in the commands. We need to use regular expressions to match the modified command. cmds = udf_command.split() new_udf_command = " .*".join(cmds) pids = get_remote_pids(ip, ssh_port, new_udf_command) remote_pids[(ip, ssh_port)] = pids return remote_pids def construct_torch_dist_launcher_cmd( num_trainers: int, num_nodes: int, node_rank: int, master_addr: str, master_port: int, ) -> str: """Constructs the torch distributed launcher command. Helper function. Args: num_trainers: num_nodes: node_rank: master_addr: master_port: Returns: cmd_str. """ torch_cmd_template = ( "-m torch.distributed.launch " "--nproc_per_node={nproc_per_node} " "--nnodes={nnodes} " "--node_rank={node_rank} " "--master_addr={master_addr} " "--master_port={master_port}" ) return torch_cmd_template.format( nproc_per_node=num_trainers, nnodes=num_nodes, node_rank=node_rank, master_addr=master_addr, master_port=master_port, ) def wrap_udf_in_torch_dist_launcher( udf_command: str, num_trainers: int, num_nodes: int, node_rank: int, master_addr: str, master_port: int, ) -> str: """Wraps the user-defined function (udf_command) with the torch.distributed.launch module. Example: if udf_command is "python3 run/some/trainer.py arg1 arg2", then new_df_command becomes: "python3 -m torch.distributed.launch run/some/trainer.py arg1 arg2 udf_command is assumed to consist of pre-commands (optional) followed by the python launcher script (required): Examples: # simple python3.7 path/to/some/trainer.py arg1 arg2 # multi-commands (cd some/dir && python3.7 path/to/some/trainer.py arg1 arg2) IMPORTANT: If udf_command consists of multiple python commands, then this will result in undefined behavior. Args: udf_command: num_trainers: num_nodes: node_rank: master_addr: master_port: Returns: """ torch_dist_cmd = construct_torch_dist_launcher_cmd( num_trainers=num_trainers, num_nodes=num_nodes, node_rank=node_rank, master_addr=master_addr, master_port=master_port, ) # Auto-detect the python binary that kicks off the distributed trainer code. # Note: This allowlist order matters, this will match with the FIRST matching entry. Thus, please add names to this # from most-specific to least-specific order eg: # (python3.7, python3.8) -> (python3) # The allowed python versions are from this: https://www.dgl.ai/pages/start.html python_bin_allowlist = ( "python3.6", "python3.7", "python3.8", "python3.9", "python3", # for backwards compatibility, accept python2 but technically DGL is a py3 library, so this is not recommended "python2.7", "python2", ) # If none of the candidate python bins match, then we go with the default `python` python_bin = "python" for candidate_python_bin in python_bin_allowlist: if candidate_python_bin in udf_command: python_bin = candidate_python_bin break # transforms the udf_command from: # python path/to/dist_trainer.py arg0 arg1 # to: # python -m torch.distributed.launch [DIST TORCH ARGS] path/to/dist_trainer.py arg0 arg1 # Note: if there are multiple python commands in `udf_command`, this may do the Wrong Thing, eg launch each # python command within the torch distributed launcher. new_udf_command = udf_command.replace( python_bin, f"{python_bin} {torch_dist_cmd}" ) return new_udf_command def construct_dgl_server_env_vars( ip_config: str, num_proc_per_machine: int, pythonpath: Optional[str] = "", ) -> str: """Constructs the DGL server-specific env vars string that are required for DGL code to behave in the correct server role. Convenience function. Args: ip_config: IP config file containing IP addresses of cluster hosts. Relative path to workspace. num_proc_per_machine: pythonpath: Optional. If given, this will pass this as PYTHONPATH. Returns: server_env_vars: The server-specific env-vars in a string format, friendly for CLI execution. """ server_env_vars_template = ( "DGL_IP_CONFIG={DGL_IP_CONFIG} " "DGL_NUM_SERVER={DGL_NUM_SERVER} " "{suffix_optional_envvars}" ) suffix_optional_envvars = "" if pythonpath: suffix_optional_envvars += f"PYTHONPATH={pythonpath} " return server_env_vars_template.format( DGL_IP_CONFIG=ip_config, DGL_NUM_SERVER=num_proc_per_machine, suffix_optional_envvars=suffix_optional_envvars, ) def wrap_cmd_with_local_envvars(cmd: str, env_vars: str) -> str: """Wraps a CLI command with desired env vars with the following properties: (1) env vars persist for the entire `cmd`, even if it consists of multiple "chained" commands like: cmd = "ls && pwd && python run/something.py" (2) env vars don't pollute the environment after `cmd` completes. Example: >>> cmd = "ls && pwd" >>> env_vars = "VAR1=value1 VAR2=value2" >>> wrap_cmd_with_local_envvars(cmd, env_vars) "(export VAR1=value1 VAR2=value2; ls && pwd)" Args: cmd: env_vars: A string containing env vars, eg "VAR1=val1 VAR2=val2" Returns: cmd_with_env_vars: """ # use `export` to persist env vars for entire cmd block. required if udf_command is a chain of commands # also: wrap in parens to not pollute env: # https://stackoverflow.com/a/45993803 return f"(export {env_vars}; {cmd})" def wrap_cmd_with_extra_envvars(cmd: str, env_vars: list) -> str: """Wraps a CLI command with extra env vars Example: >>> cmd = "ls && pwd" >>> env_vars = ["VAR1=value1", "VAR2=value2"] >>> wrap_cmd_with_extra_envvars(cmd, env_vars) "(export VAR1=value1 VAR2=value2; ls && pwd)" Args: cmd: env_vars: A list of strings containing env vars, e.g., ["VAR1=value1", "VAR2=value2"] Returns: cmd_with_env_vars: """ env_vars = " ".join(env_vars) return wrap_cmd_with_local_envvars(cmd, env_vars) def submit_jobs(args, udf_command): """Submit distributed jobs (server and client processes) via ssh""" hosts = [] thread_list = [] server_count_per_machine = 0 # Get the IP addresses of the cluster. # ip_config = os.path.join(args.workspace, args.ip_config) ip_config = args.ip_config with open(ip_config) as f: for line in f: result = line.strip().split() if len(result) == 2: ip = result[0] port = int(result[1]) hosts.append((ip, port)) elif len(result) == 1: ip = result[0] port = DEFAULT_PORT hosts.append((ip, port)) else: raise RuntimeError("Format error of ip_config.") server_count_per_machine = args.num_proc_per_machine # launch server tasks server_env_vars = construct_dgl_server_env_vars( ip_config=args.ip_config, num_proc_per_machine=args.num_proc_per_machine, pythonpath=os.environ.get("PYTHONPATH", ""), ) for i in range(len(hosts) * server_count_per_machine): ip, _ = hosts[int(i / server_count_per_machine)] server_env_vars_cur = f"{server_env_vars} RANK={i} MASTER_ADDR={hosts[0][0]} MASTER_PORT={args.master_port}" cmd = wrap_cmd_with_local_envvars(udf_command, server_env_vars_cur) print(cmd) thread_list.append( execute_remote(cmd, ip, args.ssh_port, username=args.ssh_username) ) # Start a cleanup process dedicated for cleaning up remote training jobs. conn1, conn2 = multiprocessing.Pipe() func = partial(get_all_remote_pids, hosts, args.ssh_port, udf_command) process = multiprocessing.Process(target=cleanup_proc, args=(func, conn1)) process.start() def signal_handler(signal, frame): logging.info("Stop launcher") # We need to tell the cleanup process to kill remote training jobs. conn2.send("cleanup") sys.exit(0) signal.signal(signal.SIGINT, signal_handler) for thread in thread_list: thread.join() # The training processes complete. We should tell the cleanup process to exit. conn2.send("exit") process.join() def main(): parser = argparse.ArgumentParser(description="Launch a distributed job") parser.add_argument("--ssh_port", type=int, default=22, help="SSH Port.") parser.add_argument( "--ssh_username", default="", help="Optional. When issuing commands (via ssh) to cluster, use the provided username in the ssh cmd. " "Example: If you provide --ssh_username=bob, then the ssh command will be like: 'ssh bob@1.2.3.4 CMD' " "instead of 'ssh 1.2.3.4 CMD'", ) parser.add_argument( "--num_proc_per_machine", type=int, help="The number of server processes per machine", ) parser.add_argument( "--master_port", type=int, help="This port is used to form gloo group (randevouz server)", ) parser.add_argument( "--ip_config", type=str, help="The file (in workspace) of IP configuration for server processes", ) args, udf_command = parser.parse_known_args() assert len(udf_command) == 1, "Please provide user command line." assert ( args.num_proc_per_machine is not None and args.num_proc_per_machine > 0 ), "--num_proc_per_machine must be a positive number." assert ( args.ip_config is not None ), "A user has to specify an IP configuration file with --ip_config." udf_command = str(udf_command[0]) if "python" not in udf_command: raise RuntimeError( "DGL launching script can only support Python executable file." ) submit_jobs(args, udf_command) if __name__ == "__main__": fmt = "%(asctime)s %(levelname)s %(message)s" logging.basicConfig(format=fmt, level=logging.INFO) main() ================================================ FILE: tools/distpartitioning/README.md ================================================ ### xxx_nodes.txt format This file is used to provide node information to this framework. Following is the format for each line in this file: ``` ``` where node_type is the type id of this node, weights can be any number of columns as determined by the user, global_type_node_id are the contiguous ids starting from `0` for a particular node_type. And attributes can be any number of columns at the end of each line. ### xxx___edges.txt format This file is used to provide edge information to this framework. Following is the format for each line in this file: ``` ``` where global_src_id and global_dst_id are two end points of an edge, global_type_edge_id is the unique id assigned to each edge type and are contiguous, and starting from 0, for each edge_type. Attributes can be any number of columns at the end of each line. ### Naming convention `global_` prefix (for any node or edge ids) indicate that these ids are read from graph input files. These ids are allocated to nodes and edges before `data shuffling`. These ids are globally unique across all partitions. `shuffle_global_` prefix (for any node or edge ids) indicate that these ids are assigned after the `data shuffling` is completed. These ids are globally unique across all partitions. `part_local_` prefix (for any node or edge ids) indicate that these ids are assigned after the `data shuffling` and are unique within a given partition. For instance, if a variable is named as `global_src_id` it means that this id is read from the graph input file and is assumed to be globally unique across all partitions. Similarly if a variable is named `part_local_node_id` then it means that this node_id is assigned after the data shuffling is complete and is unique with a given partition. ### High level description of the algorithm #### Single file format for graph input files Here we assume that all the nodes' related data is present in one single file and similarly all the edges are in one single file. In this case following steps are executed to write dgl objects for each partition, as assigned my any partitioning algorithm, for example METIS. ##### Step 1 (Data Loading): Rank-0 process reads in all the graph files which are xxx_nodes.txt, xxx_edges.txt, node_feats.dgl, edge_feats.dgl and xxx_removed_edges.txt. Rank-0 process determines the ownership of nodes by using the output of partitioning algorithm (here, we expect the output of partitioning step is a mapping between a node and its partition id for the entire graph). Edge ownership is determined by the `destination` node-id for that edge. Each edge belongs to the partition-id of the destination node-id of each edge. ##### Step 2 (Data Shuffling): Rank-0 process will send node-data, edge-data, node-features, edge-features to their respective processes by using the ownership rules described in Step-1. Non-Rank-0 processes will receive their own nodes, edges, node-features and edge-features and store them in local data-structures. Upon completion of sending information Rank-0 process will delete nodes, edges, node-features and edge-features which are not owned by rank-0. ##### Step 3 (ID assignment and resolution): At this time all the ranks will have their own local information in their respective data structures. Then each process will perform the following steps: a) Assign shuffle_global_xxx (here xxx is node_ids and edge_ids) for nodes and edges by performing prefix sum on all ranks. b) Assign part_local_xxx (xxx means node_ids and edge_ids) to nodes and edges so that they can be used to index into the node and edge features, and c) Retrieve shuffle_global_node_ids by using global_node_ids to determine the ownership of any given node. This step is done for the node_ids (present locally on any given rank) for which shuffle_global_node_ids were assigned on a different rank'ed process. ##### Step 4 (Serialization): After every rank has global-ids, shuffle_global-ids, part_local-ids for all the nodes and edges present locally, then it proceeds by DGL object creation. Finally Rank-0 process will aggregate graph-level metadata and create a json file with graph-level information. ### How to use this tool To run this code on a single machine using multiple processes, use the following command ``` python3 data_proc_pipeline.py --world-size 2 --nodes-file mag_nodes.txt --edges-file mag_edges.txt --node-feats-file node_feat.dgl --metis-partitions mag_part.2 --input-dir /home/ubuntu/data --graph-name mag --schema mag.json --num-parts 2 --num-node-weights 4 --workspace /home/ubuntu/data --node-attr-dtype float --output /home/ubuntu/data/outputs --removed-edges mag_removed_edges.txt ``` Above command, assumes that there are `2` partitions and number of node weights are `4`. All other command line arguments are self-explanatory. ================================================ FILE: tools/distpartitioning/array_readwriter/__init__.py ================================================ from . import csv, numpy_array, parquet from .registry import get_array_parser, register_array_parser ================================================ FILE: tools/distpartitioning/array_readwriter/csv.py ================================================ import logging import pandas as pd import pyarrow import pyarrow.csv from .registry import register_array_parser @register_array_parser("csv") class CSVArrayParser(object): def __init__(self, delimiter=","): self.delimiter = delimiter def read(self, path): logging.debug( "Reading from %s using CSV format with configuration %s" % (path, self.__dict__) ) # do not read the first line as header read_options = pyarrow.csv.ReadOptions(autogenerate_column_names=True) parse_options = pyarrow.csv.ParseOptions(delimiter=self.delimiter) arr = pyarrow.csv.read_csv( path, read_options=read_options, parse_options=parse_options ) logging.debug("Done reading from %s" % path) return arr.to_pandas().to_numpy() def write(self, path, arr): logging.debug( "Writing to %s using CSV format with configuration %s" % (path, self.__dict__) ) write_options = pyarrow.csv.WriteOptions( include_header=False, delimiter=self.delimiter ) arr = pyarrow.Table.from_pandas(pd.DataFrame(arr)) pyarrow.csv.write_csv(arr, path, write_options=write_options) logging.debug("Done writing to %s" % path) ================================================ FILE: tools/distpartitioning/array_readwriter/numpy_array.py ================================================ import logging import numpy as np from numpy.lib.format import open_memmap from .registry import register_array_parser @register_array_parser("numpy") class NumpyArrayParser(object): def __init__(self): pass def read(self, path): logging.debug("Reading from %s using numpy format" % path) arr = np.load(path, mmap_mode="r") logging.debug("Done reading from %s" % path) return arr def write(self, path, arr): logging.debug("Writing to %s using numpy format" % path) # np.save would load the entire memmap array up into CPU. So we manually open # an empty npy file with memmap mode and manually flush it instead. new_arr = open_memmap(path, mode="w+", dtype=arr.dtype, shape=arr.shape) new_arr[:] = arr[:] logging.debug("Done writing to %s" % path) ================================================ FILE: tools/distpartitioning/array_readwriter/parquet.py ================================================ import logging import numpy as np import pandas as pd import pyarrow import pyarrow.parquet from .registry import register_array_parser @register_array_parser("parquet") class ParquetArrayParser(object): def __init__(self): pass def read(self, path): logging.debug("Reading from %s using parquet format" % path) metadata = pyarrow.parquet.read_metadata(path) metadata = metadata.schema.to_arrow_schema().metadata # As parquet data are tabularized, we assume the dim of ndarray is 2. # If not, it should be explictly specified in the file as metadata. if metadata: shape = metadata.get(b"shape", None) else: shape = None table = pyarrow.parquet.read_table(path, memory_map=True) data_types = table.schema.types # Spark ML feature processing produces single-column parquet files where each row is a vector object if len(data_types) == 1 and isinstance(data_types[0], pyarrow.ListType): arr = np.array(table.to_pandas().iloc[:, 0].to_list()) logging.debug( f"Parquet data under {path} converted from single vector per row to ndarray" ) else: arr = table.to_pandas().to_numpy() if not shape: logging.debug( "Shape information not found in the metadata, read the data as " "a 2 dim array." ) logging.debug("Done reading from %s" % path) shape = tuple(eval(shape.decode())) if shape else arr.shape return arr.reshape(shape) def write(self, path, array, vector_rows=False): logging.debug("Writing to %s using parquet format" % path) shape = array.shape if len(shape) > 2: array = array.reshape(shape[0], -1) if vector_rows: table = pyarrow.table( [pyarrow.array(array.tolist())], names=["vector"] ) logging.debug("Writing to %s using single-vector rows..." % path) else: table = pyarrow.Table.from_pandas(pd.DataFrame(array)) table = table.replace_schema_metadata({"shape": str(shape)}) pyarrow.parquet.write_table(table, path) logging.debug("Done writing to %s" % path) ================================================ FILE: tools/distpartitioning/array_readwriter/registry.py ================================================ REGISTRY = {} def register_array_parser(name): def _deco(cls): REGISTRY[name] = cls return cls return _deco def get_array_parser(**fmt_meta): cls = REGISTRY[fmt_meta.pop("name")] return cls(**fmt_meta) ================================================ FILE: tools/distpartitioning/constants.py ================================================ GLOBAL_NID = "global_node_id" GLOBAL_EID = "global_edge_id" SHUFFLE_GLOBAL_NID = "shuffle_global_node_id" SHUFFLE_GLOBAL_EID = "shuffle_global_edge_id" NTYPE_ID = "node_type_id" ETYPE_ID = "edge_type_id" GLOBAL_TYPE_NID = "global_type_node_id" GLOBAL_TYPE_EID = "global_type_edge_id" GLOBAL_SRC_ID = "global_src_id" GLOBAL_DST_ID = "global_dst_id" SHUFFLE_GLOBAL_SRC_ID = "shuffle_global_src_id" SHUFFLE_GLOBAL_DST_ID = "shuffle_global_dst_id" OWNER_PROCESS = "owner_proc_id" PART_LOCAL_NID = "part_local_nid" STR_NODE_TYPE = "node_type" STR_EDGE_TYPE = "edge_type" STR_EDGES = "edges" STR_FORMAT = "format" STR_FORMAT_DELIMITER = "delimiter" STR_DATA = "data" STR_NODE_DATA = "node_data" STR_EDGE_DATA = "edge_data" STR_NUMPY = "numpy" STR_PARQUET = "parquet" STR_CSV = "csv" STR_NAME = "name" STR_GRAPH_NAME = "graph_name" STR_NODE_FEATURES = "node_features" STR_EDGE_FEATURES = "edge_features" STR_NUM_NODES_PER_TYPE = "num_nodes_per_type" STR_NUM_EDGES_PER_TYPE = "num_edges_per_type" STR_NTYPES = "ntypes" ================================================ FILE: tools/distpartitioning/convert_partition.py ================================================ import copy import gc import logging import os import constants import dgl import dgl.backend as F import dgl.graphbolt as gb import numpy as np import torch as th import torch.distributed as dist from dgl import EID, ETYPE, NID, NTYPE from dgl.distributed.constants import DGL2GB_EID, GB_DST_ID from dgl.distributed.partition import ( _cast_to_minimum_dtype, _etype_str_to_tuple, _etype_tuple_to_str, cast_various_to_minimum_dtype_gb, RESERVED_FIELD_DTYPE, ) from utils import get_idranges, memory_snapshot def _get_unique_invidx(srcids, dstids, nids, low_mem=True): """This function is used to compute a list of unique elements, and their indices in the input list, which is the concatenation of srcids, dstids and uniq_nids. In addition, this function will also compute inverse indices, in the list of unique elements, for the elements in srcids, dstids and nids arrays. srcids, dstids will be over-written to contain the inverse indices. Basically, this function is mimicing the functionality of numpy's unique function call. The problem with numpy's unique function call is its high memory requirement. For an input list of 3 billion edges it consumes about 550GB of systems memory, which is limiting the capability of the partitioning pipeline. Note: This function is a workaround solution for the high memory requirement of numpy's unique function call. This function is not a general purpose function and is only used in the context of the partitioning pipeline. What's more, this function does not behave exactly the same as numpy's unique function call. Namely, this function does not return the exact same inverse indices as numpy's unique function call. However, for the current use case, this function is sufficient. Current numpy uniques function returns 3 return parameters, which are . list of unique elements . list of indices, in the input argument list, which are first occurance of the corresponding element in the uniques list . list of inverse indices, which are indices from the uniques list and can be used to rebuild the original input array Compared to the above numpy's return parameters, this work around solution returns 4 values . list of unique elements, . list of indices, which may not be the first occurance of the corresponding element from the uniques . list of inverse indices, here we only build the inverse indices for srcids and dstids input arguments. For the current use case, only these two inverse indices are needed. Parameters: ----------- srcids : numpy array a list of numbers, which are the src-ids of the edges dstids : numpy array a list of numbers, which are the dst-ids of the edges nids : numpy array a list of numbers, a list of unique shuffle-global-nids. This list is guaranteed to be a list of sorted consecutive unique list of numbers. Also, this list will be a `super set` for the list of dstids. Current implementation of the pipeline guarantees this assumption and is used to simplify the current implementation of the workaround solution. low_mem : bool, optional Indicates whether to use the low memory version of the function. If ``False``, the function will use numpy's native ``unique`` function. Otherwise, the function will use the low memory version of the function. Returns: -------- numpy array : a list of unique, sorted elements, computed from the input arguments numpy array : a list of integers. These are indices in the concatenated list [srcids, dstids, uniq_nids], which are the input arguments to this function numpy array : a list of integers. These are inverse indices, which will be indices from the unique elements list specifying the elements from the input array, srcids numpy array : a list of integers. These are inverse indices, which will be indices from the unique elements list specifying the elements from the input array, dstids """ assert len(srcids) == len( dstids ), f"Please provide the correct input parameters" assert len(srcids) != 0, f"Please provide a non-empty edge-list." if not low_mem: logging.warning( "Calling numpy's native function unique. This functions memory " "overhead will limit size of the partitioned graph objects " "processed by each node in the cluster." ) uniques, idxes, inv_idxes = np.unique( np.concatenate([srcids, dstids, nids]), return_index=True, return_inverse=True, ) src_len = len(srcids) dst_len = len(dstids) return ( uniques, idxes, inv_idxes[:src_len], inv_idxes[src_len : (src_len + dst_len)], ) # find uniqes which appear only in the srcids list mask = np.isin(srcids, nids, invert=True, kind="table") srcids_only = srcids[mask] srcids_idxes = np.where(mask == 1)[0] # sort uniques, unique_srcids_idx = np.unique(srcids_only, return_index=True) idxes = srcids_idxes[unique_srcids_idx] # build uniques and idxes, first and second return parameters uniques = np.concatenate([uniques, nids]) idxes = np.concatenate( [idxes, len(srcids) + len(dstids) + np.arange(len(nids))] ) # sort and idxes sort_idx = np.argsort(uniques) uniques = uniques[sort_idx] idxes = idxes[sort_idx] # uniques and idxes are built assert len(uniques) == len(idxes), f"Error building the idxes array." srcids = np.searchsorted(uniques, srcids, side="left") # process dstids now. # dstids is guaranteed to be a subset of the `nids` list # here we are computing index in the list of uniqes for # each element in the list of dstids, in a two step process # 1. locate the position of first element from nids in the # list of uniques - dstids cannot appear to the left # of this number, they are guaranteed to be on the right # side of this number. # 2. dstids = dstids - nids[0] # By subtracting nids[0] from the list of dstids will make # the list of dstids to be in the range of [0, max(nids)-1] # 3. dstids = dstids - nids[0] + offset # Now we move the list of dstids by `offset` which will be # the starting position of the nids[0] element. Note that # nids will ALWAYS be a SUPERSET of dstids. offset = np.searchsorted(uniques, nids[0], side="left") dstids = dstids - nids[0] + offset # return the values return uniques, idxes, srcids, dstids # Utility functions. def _is_homogeneous(ntypes, etypes): """Checks if the provided ntypes and etypes form a homogeneous graph.""" return len(ntypes) == 1 and len(etypes) == 1 def _coo2csc(src_ids, dst_ids): src_ids, dst_ids = th.tensor(src_ids, dtype=th.int64), th.tensor( dst_ids, dtype=th.int64 ) num_nodes = th.max(th.stack([src_ids, dst_ids], dim=0)).item() + 1 dst, idx = dst_ids.sort() indptr = th.searchsorted(dst, th.arange(num_nodes + 1)) indices = src_ids[idx] return indptr, indices, idx def _create_edge_data(edgeid_offset, etype_ids, num_edges): eid = th.arange( edgeid_offset, edgeid_offset + num_edges, dtype=RESERVED_FIELD_DTYPE[dgl.EID], ) etype = th.as_tensor(etype_ids, dtype=RESERVED_FIELD_DTYPE[dgl.ETYPE]) inner_edge = th.ones(num_edges, dtype=RESERVED_FIELD_DTYPE["inner_edge"]) return eid, etype, inner_edge def _create_node_data(ntype, uniq_ids, reshuffle_nodes, inner_nodes): node_type = th.as_tensor(ntype, dtype=RESERVED_FIELD_DTYPE[dgl.NTYPE]) node_id = th.as_tensor(uniq_ids[reshuffle_nodes]) inner_node = th.as_tensor( inner_nodes[reshuffle_nodes], dtype=RESERVED_FIELD_DTYPE["inner_node"], ) return node_type, node_id, inner_node def _compute_node_ntype( global_src_id, global_dst_id, global_homo_nid, idx, reshuffle_nodes, id_map ): global_ids = np.concatenate([global_src_id, global_dst_id, global_homo_nid]) part_global_ids = global_ids[idx] part_global_ids = part_global_ids[reshuffle_nodes] ntype, per_type_ids = id_map(part_global_ids) return ntype, per_type_ids def _graph_orig_ids( return_orig_nids, return_orig_eids, ntypes_map, etypes_map, node_attr, edge_attr, per_type_ids, type_per_edge, global_edge_id, ): orig_nids = None orig_eids = None if return_orig_nids: orig_nids = {} for ntype, ntype_id in ntypes_map.items(): mask = th.logical_and( node_attr[dgl.NTYPE] == ntype_id, node_attr["inner_node"], ) orig_nids[ntype] = th.as_tensor(per_type_ids[mask]) if return_orig_eids: orig_eids = {} for etype, etype_id in etypes_map.items(): mask = th.logical_and( type_per_edge == etype_id, edge_attr["inner_edge"], ) orig_eids[_etype_tuple_to_str(etype)] = th.as_tensor( global_edge_id[mask] ) return orig_nids, orig_eids def _create_edge_attr_gb( part_local_dst_id, edgeid_offset, etype_ids, ntypes, etypes, etypes_map ): edge_attr = {} # create edge data in graph. num_edges = len(part_local_dst_id) ( edge_attr[dgl.EID], type_per_edge, edge_attr["inner_edge"], ) = _create_edge_data(edgeid_offset, etype_ids, num_edges) assert "inner_edge" in edge_attr is_homo = _is_homogeneous(ntypes, etypes) edge_type_to_id = ( {gb.etype_tuple_to_str(("_N", "_E", "_N")): 0} if is_homo else { gb.etype_tuple_to_str(etype): etid for etype, etid in etypes_map.items() } ) return edge_attr, type_per_edge, edge_type_to_id def _create_node_attr( idx, global_src_id, global_dst_id, global_homo_nid, uniq_ids, reshuffle_nodes, id_map, inner_nodes, ): # compute per_type_ids and ntype for all the nodes in the graph. ntype, per_type_ids = _compute_node_ntype( global_src_id, global_dst_id, global_homo_nid, idx, reshuffle_nodes, id_map, ) # create node data in graph. node_attr = {} ( node_attr[dgl.NTYPE], node_attr[dgl.NID], node_attr["inner_node"], ) = _create_node_data(ntype, uniq_ids, reshuffle_nodes, inner_nodes) return node_attr, per_type_ids def remove_attr_gb( edge_attr, node_attr, store_inner_node, store_inner_edge, store_eids ): edata, ndata = copy.deepcopy(edge_attr), copy.deepcopy(node_attr) if not store_inner_edge: assert "inner_edge" in edata edata.pop("inner_edge") if not store_eids: assert dgl.EID in edata edata.pop(dgl.EID) if not store_inner_node: assert "inner_node" in ndata ndata.pop("inner_node") return edata, ndata def _process_partition_gb( node_attr, edge_attr, type_per_edge, src_ids, dst_ids, sort_etypes, ): """Preprocess partitions before saving: 1. format data types. 2. sort csc/csr by tag. """ for k, dtype in RESERVED_FIELD_DTYPE.items(): if k in node_attr: node_attr[k] = F.astype(node_attr[k], dtype) if k in edge_attr: edge_attr[k] = F.astype(edge_attr[k], dtype) indptr, indices, edge_ids = _coo2csc(src_ids, dst_ids) if sort_etypes: split_size = th.diff(indptr) split_indices = th.split(type_per_edge, tuple(split_size), dim=0) sorted_idxs = [] for split_indice in split_indices: sorted_idxs.append(split_indice.sort()[1]) sorted_idx = th.cat(sorted_idxs, dim=0) sorted_idx = ( th.repeat_interleave(indptr[:-1], split_size, dim=0) + sorted_idx ) return indptr, indices[sorted_idx], edge_ids[sorted_idx] def _update_node_map(node_map_val, end_ids_per_rank, id_ntypes, prev_last_id): """this function is modified from the function '_update_node_edge_map' in dgl.distributed.partition""" # Update the node_map_val to be contiguous. rank = dist.get_rank() prev_end_id = ( end_ids_per_rank[rank - 1].item() if rank > 0 else prev_last_id ) ntype_ids = {ntype: ntype_id for ntype_id, ntype in enumerate(id_ntypes)} for ntype_id in list(ntype_ids.values()): ntype = id_ntypes[ntype_id] start_id = node_map_val[ntype][0][0] end_id = node_map_val[ntype][0][1] if not (start_id == -1 and end_id == -1): continue prev_ntype_id = ( ntype_ids[ntype] - 1 if ntype_ids[ntype] > 0 else max(ntype_ids.values()) ) prev_ntype = id_ntypes[prev_ntype_id] if ntype_ids[ntype] == 0: node_map_val[ntype][0][0] = prev_end_id else: node_map_val[ntype][0][0] = node_map_val[prev_ntype][0][1] node_map_val[ntype][0][1] = node_map_val[ntype][0][0] return node_map_val[ntype][0][-1] def create_graph_object( tot_node_count, tot_edge_count, node_count, edge_count, num_parts, schema, part_id, node_data, edge_data, edgeid_offset, node_typecounts, edge_typecounts, last_ids={}, return_orig_nids=False, return_orig_eids=False, use_graphbolt=False, **kwargs, ): """ This function creates dgl objects for a given graph partition, as in function arguments. The "schema" argument is a dictionary, which contains the metadata related to node ids and edge ids. It contains two keys: "nid" and "eid", whose value is also a dictionary with the following structure. 1. The key-value pairs in the "nid" dictionary has the following format. "ntype-name" is the user assigned name to this node type. "format" describes the format of the contents of the files. and "data" is a list of lists, each list has 3 elements: file-name, start_id and end_id. File-name can be either absolute or relative path to this file and starting and ending ids are type ids of the nodes which are contained in this file. These type ids are later used to compute global ids of these nodes which are used throughout the processing of this pipeline. "ntype-name" : { "format" : "csv", "data" : [ [ /ntype0-name-0.csv, start_id0, end_id0], [ /ntype0-name-1.csv, start_id1, end_id1], ... [ /ntype0-name-.csv, start_id, end_id], ] } 2. The key-value pairs in the "eid" dictionary has the following format. As described for the "nid" dictionary the "eid" dictionary is similarly structured except that these entries are for edges. "etype-name" : { "format" : "csv", "data" : [ [ /etype0-name-0, start_id0, end_id0], [ /etype0-name-1 start_id1, end_id1], ... [ /etype0-name-1 start_id, end_id] ] } In "nid" dictionary, the type_nids are specified that should be assigned to nodes which are read from the corresponding nodes file. Along the same lines dictionary for the key "eid" is used for edges in the input graph. These type ids, for nodes and edges, are used to compute global ids for nodes and edges which are stored in the graph object. Parameters: ----------- tot_node_count : int the number of all nodes tot_edge_count : int the number of all edges node_count : int the number of nodes in partition edge_count : int the number of edges in partition graph_formats : str the format of graph num_parts : int the number of parts schame : json object json object created by reading the graph metadata json file part_id : int partition id of the graph partition for which dgl object is to be created node_data : numpy ndarray node_data, where each row is of the following format: edge_data : numpy ndarray edge_data, where each row is of the following format: edgeid_offset : int offset to be used when assigning edge global ids in the current partition return_orig_ids : bool, optional Indicates whether to return original node/edge IDs. Returns: -------- dgl object dgl object created for the current graph partition dictionary map between node types and the range of global node ids used dictionary map between edge types and the range of global edge ids used dictionary map between node type(string) and node_type_id(int) dictionary map between edge type(string) and edge_type_id(int) dict of tensors If `return_orig_nids=True`, return a dict of 1D tensors whose key is the node type and value is a 1D tensor mapping between shuffled node IDs and the original node IDs for each node type. Otherwise, ``None`` is returned. dict of tensors If `return_orig_eids=True`, return a dict of 1D tensors whose key is the edge type and value is a 1D tensor mapping between shuffled edge IDs and the original edge IDs for each edge type. Otherwise, ``None`` is returned. """ # create auxiliary data structures from the schema object memory_snapshot("CreateDGLObj_Begin", part_id) _, global_nid_ranges = get_idranges( schema[constants.STR_NODE_TYPE], node_typecounts ) _, global_eid_ranges = get_idranges( schema[constants.STR_EDGE_TYPE], edge_typecounts ) id_map = dgl.distributed.id_map.IdMap(global_nid_ranges) ntypes = [(key, global_nid_ranges[key][0, 0]) for key in global_nid_ranges] ntypes.sort(key=lambda e: e[1]) ntype_offset_np = np.array([e[1] for e in ntypes]) ntypes = [e[0] for e in ntypes] ntypes_map = {e: i for i, e in enumerate(ntypes)} etypes = [(key, global_eid_ranges[key][0, 0]) for key in global_eid_ranges] etypes.sort(key=lambda e: e[1]) etypes = [e[0] for e in etypes] etypes_map = {_etype_str_to_tuple(e): i for i, e in enumerate(etypes)} node_map_val = {ntype: [] for ntype in ntypes} edge_map_val = {_etype_str_to_tuple(etype): [] for etype in etypes} memory_snapshot("CreateDGLObj_AssignNodeData", part_id) shuffle_global_nids = node_data[constants.SHUFFLE_GLOBAL_NID] node_data.pop(constants.SHUFFLE_GLOBAL_NID) gc.collect() ntype_ids = node_data[constants.NTYPE_ID] node_data.pop(constants.NTYPE_ID) gc.collect() global_type_nid = node_data[constants.GLOBAL_TYPE_NID] node_data.pop(constants.GLOBAL_TYPE_NID) node_data = None gc.collect() global_homo_nid = ntype_offset_np[ntype_ids] + global_type_nid assert np.all(shuffle_global_nids[1:] - shuffle_global_nids[:-1] == 1) shuffle_global_nid_range = (shuffle_global_nids[0], shuffle_global_nids[-1]) # Determine the node ID ranges of different node types. prev_last_id = last_ids.get(part_id - 1, 0) for ntype_name in global_nid_ranges: ntype_id = ntypes_map[ntype_name] type_nids = shuffle_global_nids[ntype_ids == ntype_id] if len(type_nids) == 0: node_map_val[ntype_name].append([-1, -1]) else: node_map_val[ntype_name].append( [int(type_nids[0]), int(type_nids[-1]) + 1] ) last_id = th.tensor( [max(prev_last_id, int(type_nids[-1]) + 1)], dtype=th.int64 ) id_ntypes = list(global_nid_ranges.keys()) gather_last_ids = [ th.zeros(1, dtype=th.int64) for _ in range(dist.get_world_size()) ] dist.all_gather(gather_last_ids, last_id) prev_last_id = _update_node_map( node_map_val, gather_last_ids, id_ntypes, prev_last_id ) last_ids[part_id] = prev_last_id # process edges memory_snapshot("CreateDGLObj_AssignEdgeData: ", part_id) shuffle_global_src_id = edge_data[constants.SHUFFLE_GLOBAL_SRC_ID] edge_data.pop(constants.SHUFFLE_GLOBAL_SRC_ID) gc.collect() shuffle_global_dst_id = edge_data[constants.SHUFFLE_GLOBAL_DST_ID] edge_data.pop(constants.SHUFFLE_GLOBAL_DST_ID) gc.collect() global_src_id = edge_data[constants.GLOBAL_SRC_ID] edge_data.pop(constants.GLOBAL_SRC_ID) gc.collect() global_dst_id = edge_data[constants.GLOBAL_DST_ID] edge_data.pop(constants.GLOBAL_DST_ID) gc.collect() global_edge_id = edge_data[constants.GLOBAL_TYPE_EID] edge_data.pop(constants.GLOBAL_TYPE_EID) gc.collect() etype_ids = edge_data[constants.ETYPE_ID] edge_data.pop(constants.ETYPE_ID) edge_data = None gc.collect() logging.info( f"There are {len(shuffle_global_src_id)} edges in partition {part_id}" ) # It's not guaranteed that the edges are sorted based on edge type. # Let's sort edges and all attributes on the edges. if not np.all(np.diff(etype_ids) >= 0): sort_idx = np.argsort(etype_ids) ( shuffle_global_src_id, shuffle_global_dst_id, global_src_id, global_dst_id, global_edge_id, etype_ids, ) = ( shuffle_global_src_id[sort_idx], shuffle_global_dst_id[sort_idx], global_src_id[sort_idx], global_dst_id[sort_idx], global_edge_id[sort_idx], etype_ids[sort_idx], ) assert np.all(np.diff(etype_ids) >= 0) else: print(f"[Rank: {part_id} Edge data is already sorted !!!") # Determine the edge ID range of different edge types. edge_id_start = edgeid_offset for etype_name in global_eid_ranges: etype = _etype_str_to_tuple(etype_name) assert len(etype) == 3 etype_id = etypes_map[etype] edge_map_val[etype].append( [edge_id_start, edge_id_start + np.sum(etype_ids == etype_id)] ) edge_id_start += np.sum(etype_ids == etype_id) memory_snapshot("CreateDGLObj_UniqueNodeIds: ", part_id) # get the edge list in some order and then reshuffle. # Here the order of nodes is defined by the sorted order. uniq_ids, idx, part_local_src_id, part_local_dst_id = _get_unique_invidx( shuffle_global_src_id, shuffle_global_dst_id, np.arange(shuffle_global_nid_range[0], shuffle_global_nid_range[1] + 1), ) inner_nodes = th.as_tensor( np.logical_and( uniq_ids >= shuffle_global_nid_range[0], uniq_ids <= shuffle_global_nid_range[1], ) ) # get the list of indices, from inner_nodes, which will sort inner_nodes as [True, True, ...., False, False, ...] # essentially local nodes will be placed before non-local nodes. reshuffle_nodes = th.arange(len(uniq_ids)) reshuffle_nodes = th.cat( [reshuffle_nodes[inner_nodes.bool()], reshuffle_nodes[inner_nodes == 0]] ) """ Following procedure is used to map the part_local_src_id, part_local_dst_id to account for reshuffling of nodes (to order localy owned nodes prior to non-local nodes in a partition) 1. Form a node_map, in this case a numpy array, which will be used to map old node-ids (pre-reshuffling) to post-reshuffling ids. 2. Once the map is created, use this map to map all the node-ids in the part_local_src_id and part_local_dst_id list to their appropriate `new` node-ids (post-reshuffle order). 3. Since only the node's order is changed, we will have to re-order nodes related information when creating dgl object: this includes dgl.NTYPE, dgl.NID and inner_node. 4. Edge's order is not changed. At this point in the execution path edges are still ordered by their etype-ids. 5. Create the dgl object appropriately and return the dgl object. Here is a simple example to understand the above flow better. part_local_nids = [0, 1, 2, 3, 4, 5] part_local_src_ids = [0, 0, 0, 0, 2, 3, 4] part_local_dst_ids = [1, 2, 3, 4, 4, 4, 5] Assume that nodes {1, 5} are halo-nodes, which are not owned by this partition. reshuffle_nodes = [0, 2, 3, 4, 1, 5] A node_map, which maps node-ids from old to reshuffled order is as follows: node_map = np.zeros((len(reshuffle_nodes,))) node_map[reshuffle_nodes] = np.arange(len(reshuffle_nodes)) Using the above map, we have mapped part_local_src_ids and part_local_dst_ids as follows: part_local_src_ids = [0, 0, 0, 0, 1, 2, 3] part_local_dst_ids = [4, 1, 2, 3, 3, 3, 5] In this graph above, note that nodes {0, 1, 2, 3} are inner_nodes and {4, 5} are NON-inner-nodes Since the edge are re-ordered in any way, there is no reordering required for edge related data during the DGL object creation. """ # create the mappings to generate mapped part_local_src_id and part_local_dst_id # This map will map from unshuffled node-ids to reshuffled-node-ids (which are ordered to prioritize # locally owned nodes). nid_map = np.zeros( ( len( reshuffle_nodes, ) ) ) nid_map[reshuffle_nodes] = np.arange(len(reshuffle_nodes)) # Now map the edge end points to reshuffled_values. part_local_src_id, part_local_dst_id = ( nid_map[part_local_src_id], nid_map[part_local_dst_id], ) """ Creating attributes for graphbolt and DGLGraph is as follows. node attributes: this part is implemented in _create_node_attr. compute the ntype and per type ids for each node with global node type id. create ntype, nid and inner node with orig ntype and inner nodes this part is shared by graphbolt and DGLGraph. the attributes created for graphbolt are as follows: edge attributes: this part is implemented in _create_edge_attr_gb. create eid, type per edge and inner edge with edgeid_offset. create edge_type_to_id with etypes_map. The process to remove extra attribute is implemented in remove_attr_gb. the unused attributes like inner_node, inner_edge, eids will be removed following the arguments in kwargs. edge_attr, node_attr are the variable that have removed extra attributes to construct csc_graph. edata, ndata are the variable that reserve extra attributes to be used to generate orig_nid and orig_eid. the src_ids and dst_ids will be transformed into indptr and indices in _coo2csc. all variable mentioned above will be casted to minimum data type in cast_various_to_minimum_dtype_gb. orig_nids and orig_eids will be generated in _graph_orig_ids with ndata and edata. """ # create the graph here now. ndata, per_type_ids = _create_node_attr( idx, global_src_id, global_dst_id, global_homo_nid, uniq_ids, reshuffle_nodes, id_map, inner_nodes, ) if use_graphbolt: edata, type_per_edge, edge_type_to_id = _create_edge_attr_gb( part_local_dst_id, edgeid_offset, etype_ids, ntypes, etypes, etypes_map, ) assert edata is not None assert ndata is not None sort_etypes = len(etypes_map) > 1 indptr, indices, csc_edge_ids = _process_partition_gb( ndata, edata, type_per_edge, part_local_src_id, part_local_dst_id, sort_etypes, ) edge_attr, node_attr = remove_attr_gb( edge_attr=edata, node_attr=ndata, **kwargs ) edge_attr = { attr: edge_attr[attr][csc_edge_ids] for attr in edge_attr.keys() } cast_various_to_minimum_dtype_gb( node_count=node_count, edge_count=edge_count, tot_node_count=tot_node_count, tot_edge_count=tot_edge_count, num_parts=num_parts, indptr=indptr, indices=indices, type_per_edge=type_per_edge, etypes=etypes, ntypes=ntypes, node_attributes=node_attr, edge_attributes=edge_attr, ) part_graph = gb.fused_csc_sampling_graph( csc_indptr=indptr, indices=indices, node_type_offset=None, type_per_edge=type_per_edge[csc_edge_ids], node_attributes=node_attr, edge_attributes=edge_attr, node_type_to_id=ntypes_map, edge_type_to_id=edge_type_to_id, ) else: num_edges = len(part_local_dst_id) part_graph = dgl.graph( data=(part_local_src_id, part_local_dst_id), num_nodes=len(uniq_ids) ) # create edge data in graph. ( part_graph.edata[dgl.EID], part_graph.edata[dgl.ETYPE], part_graph.edata["inner_edge"], ) = _create_edge_data(edgeid_offset, etype_ids, num_edges) ndata, per_type_ids = _create_node_attr( idx, global_src_id, global_dst_id, global_homo_nid, uniq_ids, reshuffle_nodes, id_map, inner_nodes, ) for attr_name, node_attributes in ndata.items(): part_graph.ndata[attr_name] = node_attributes type_per_edge = part_graph.edata[dgl.ETYPE] ndata, edata = part_graph.ndata, part_graph.edata # get the original node ids and edge ids from original graph. orig_nids, orig_eids = _graph_orig_ids( return_orig_nids, return_orig_eids, ntypes_map, etypes_map, ndata, edata, per_type_ids, type_per_edge, global_edge_id, ) return ( part_graph, node_map_val, edge_map_val, ntypes_map, etypes_map, orig_nids, orig_eids, ) def create_metadata_json( graph_name, num_nodes, num_edges, part_id, num_parts, node_map_val, edge_map_val, ntypes_map, etypes_map, output_dir, use_graphbolt, ): """ Auxiliary function to create json file for the graph partition metadata Parameters: ----------- graph_name : string name of the graph num_nodes : int no. of nodes in the graph partition num_edges : int no. of edges in the graph partition part_id : int integer indicating the partition id num_parts : int total no. of partitions of the original graph node_map_val : dictionary map between node types and the range of global node ids used edge_map_val : dictionary map between edge types and the range of global edge ids used ntypes_map : dictionary map between node type(string) and node_type_id(int) etypes_map : dictionary map between edge type(string) and edge_type_id(int) output_dir : string directory where the output files are to be stored use_graphbolt : bool whether to use graphbolt or not Returns: -------- dictionary map describing the graph information """ part_metadata = { "graph_name": graph_name, "num_nodes": num_nodes, "num_edges": num_edges, "part_method": "metis", "num_parts": num_parts, "halo_hops": 1, "node_map": node_map_val, "edge_map": edge_map_val, "ntypes": ntypes_map, "etypes": etypes_map, } part_dir = "part" + str(part_id) node_feat_file = os.path.join(part_dir, "node_feat.dgl") edge_feat_file = os.path.join(part_dir, "edge_feat.dgl") if use_graphbolt: part_graph_file = os.path.join(part_dir, "fused_csc_sampling_graph.pt") else: part_graph_file = os.path.join(part_dir, "graph.dgl") part_graph_type = "part_graph_graphbolt" if use_graphbolt else "part_graph" part_metadata["part-{}".format(part_id)] = { "node_feats": node_feat_file, "edge_feats": edge_feat_file, part_graph_type: part_graph_file, } return part_metadata ================================================ FILE: tools/distpartitioning/data_proc_pipeline.py ================================================ import argparse import logging import os import platform import numpy as np import torch.multiprocessing as mp from data_shuffle import multi_machine_run, single_machine_run def log_params(params): """Print all the command line arguments for debugging purposes. Parameters: ----------- params: argparse object Argument Parser structure listing all the pre-defined parameters """ print("Input Dir: ", params.input_dir) print("Graph Name: ", params.graph_name) print("Schema File: ", params.schema) print("No. partitions: ", params.num_parts) print("Output Dir: ", params.output) print("WorldSize: ", params.world_size) print("Metis partitions: ", params.partitions_dir) if __name__ == "__main__": """ Start of execution from this point. Invoke the appropriate function to begin execution """ # arguments which are already needed by the existing implementation of convert_partition.py parser = argparse.ArgumentParser(description="Construct graph partitions") parser.add_argument( "--input-dir", required=True, type=str, help="The directory path that contains the partition results.", ) parser.add_argument( "--graph-name", required=True, type=str, help="The graph name" ) parser.add_argument( "--schema", required=True, type=str, help="The schema of the graph" ) parser.add_argument( "--num-parts", required=True, type=int, help="The number of partitions" ) parser.add_argument( "--output", required=True, type=str, help="The output directory of the partitioned results", ) parser.add_argument( "--partitions-dir", help="directory of the partition-ids for each node type", default=None, type=str, ) parser.add_argument( "--log-level", type=str, default="info", help="To enable log level for debugging purposes. Available options: \ (Critical, Error, Warning, Info, Debug, Notset), default value \ is: Info", ) # arguments needed for the distributed implementation parser.add_argument( "--world-size", help="no. of processes to spawn", default=1, type=int, required=True, ) parser.add_argument( "--process-group-timeout", required=True, type=int, help="timeout[seconds] for operations executed against the process group " "(see torch.distributed.init_process_group)", ) parser.add_argument( "--save-orig-nids", action="store_true", help="Save original node IDs into files", ) parser.add_argument( "--save-orig-eids", action="store_true", help="Save original edge IDs into files", ) parser.add_argument( "--use-graphbolt", action="store_true", help="Use GraphBolt for distributed partition.", ) parser.add_argument( "--store-inner-node", action="store_true", default=False, help="Store inner nodes.", ) parser.add_argument( "--store-inner-edge", action="store_true", default=False, help="Store inner edges.", ) parser.add_argument( "--store-eids", action="store_true", default=False, help="Store edge IDs.", ) parser.add_argument( "--graph-formats", default=None, type=str, help="Save partitions in specified formats.", ) params = parser.parse_args() # invoke the pipeline function numeric_level = getattr(logging, params.log_level.upper(), None) logging.basicConfig( level=numeric_level, format=f"[{platform.node()} %(levelname)s %(asctime)s PID:%(process)d] %(message)s", ) multi_machine_run(params) ================================================ FILE: tools/distpartitioning/data_shuffle.py ================================================ import gc import logging import math import os import sys from datetime import timedelta from timeit import default_timer as timer import constants import dgl import numpy as np import torch import torch.distributed as dist import torch.multiprocessing as mp from convert_partition import create_graph_object, create_metadata_json from dataset_utils import get_dataset from dist_lookup import DistLookupService from globalids import ( assign_shuffle_global_nids_edges, assign_shuffle_global_nids_nodes, lookup_shuffle_global_nids_edges, ) from gloo_wrapper import allgather_sizes, alltoallv_cpu, gather_metadata_json from utils import ( augment_edge_data, DATA_TYPE_ID, get_edge_types, get_etype_featnames, get_gid_offsets, get_gnid_range_map, get_idranges, get_node_types, get_ntype_counts_map, get_ntype_featnames, map_partid_rank, memory_snapshot, read_json, read_ntype_partition_files, REV_DATA_TYPE_ID, write_dgl_objects, write_metadata_json, ) def gen_node_data( rank, world_size, num_parts, id_lookup, ntid_ntype_map, schema_map ): """ For this data processing pipeline, reading node files is not needed. All the needed information about the nodes can be found in the metadata json file. This function generates the nodes owned by a given process, using metis partitions. Parameters: ----------- rank : int rank of the process world_size : int total no. of processes num_parts : int total no. of partitions id_lookup : instance of class DistLookupService Distributed lookup service used to map global-nids to respective partition-ids and shuffle-global-nids ntid_ntype_map : a dictionary where keys are node_type ids(integers) and values are node_type names(strings). schema_map: dictionary formed by reading the input metadata json file for the input dataset. Please note that, it is assumed that for the input graph files, the nodes of a particular node-type are split into `p` files (because of `p` partitions to be generated). On a similar node, edges of a particular edge-type are split into `p` files as well. #assuming m nodetypes present in the input graph "num_nodes_per_chunk" : [ [a0, a1, a2, ... a], [b0, b1, b2, ... b], ... [m0, m1, m2, ... m] ] Here, each sub-list, corresponding a nodetype in the input graph, has `p` elements. For instance [a0, a1, ... a] where each element represents the number of nodes which are to be processed by a process during distributed partitioning. In addition to the above key-value pair for the nodes in the graph, the node-features are captured in the "node_data" key-value pair. In this dictionary the keys will be nodetype names and value will be a dictionary which is used to capture all the features present for that particular node-type. This is shown in the following example: "node_data" : { "paper": { # node type "feat": { # feature key "format": {"name": "numpy"}, "data": ["node_data/paper-feat-part1.npy", "node_data/paper-feat-part2.npy"] }, "label": { # feature key "format": {"name": "numpy"}, "data": ["node_data/paper-label-part1.npy", "node_data/paper-label-part2.npy"] }, "year": { # feature key "format": {"name": "numpy"}, "data": ["node_data/paper-year-part1.npy", "node_data/paper-year-part2.npy"] } } } In the above textual description we have a node-type, which is paper, and it has 3 features namely feat, label and year. Each feature has `p` files whose location in the filesystem is the list for the key "data" and "foramt" is used to describe storage format. Returns: -------- dictionary : dictionary where keys are column names and values are numpy arrays, these arrays are generated by using information present in the metadata json file """ local_node_data = {} for local_part_id in range(num_parts // world_size): local_node_data[constants.GLOBAL_NID + "/" + str(local_part_id)] = [] local_node_data[constants.NTYPE_ID + "/" + str(local_part_id)] = [] local_node_data[ constants.GLOBAL_TYPE_NID + "/" + str(local_part_id) ] = [] # Note that `get_idranges` always returns two dictionaries. Keys in these # dictionaries are type names for nodes and edges and values are # `num_parts` number of tuples indicating the range of type-ids in first # dictionary and range of global-nids in the second dictionary. type_nid_dict, global_nid_dict = get_idranges( schema_map[constants.STR_NODE_TYPE], get_ntype_counts_map( schema_map[constants.STR_NODE_TYPE], schema_map[constants.STR_NUM_NODES_PER_TYPE], ), num_chunks=num_parts, ) for ntype_id, ntype_name in ntid_ntype_map.items(): # No. of nodes in each process can differ significantly in lopsided distributions # Synchronize on a per ntype basis dist.barrier() type_start, type_end = ( type_nid_dict[ntype_name][0][0], type_nid_dict[ntype_name][-1][1], ) gnid_start, gnid_end = ( global_nid_dict[ntype_name][0, 0], global_nid_dict[ntype_name][0, 1], ) node_partid_slice = id_lookup.get_partition_ids( np.arange(gnid_start, gnid_end, dtype=np.int64) ) # exclusive for local_part_id in range(num_parts // world_size): cond = node_partid_slice == (rank + local_part_id * world_size) own_gnids = np.arange(gnid_start, gnid_end, dtype=np.int64) own_gnids = own_gnids[cond] own_tnids = np.arange(type_start, type_end, dtype=np.int64) own_tnids = own_tnids[cond] local_node_data[ constants.NTYPE_ID + "/" + str(local_part_id) ].append(np.ones(own_gnids.shape, dtype=np.int64) * ntype_id) local_node_data[ constants.GLOBAL_NID + "/" + str(local_part_id) ].append(own_gnids) local_node_data[ constants.GLOBAL_TYPE_NID + "/" + str(local_part_id) ].append(own_tnids) for k in local_node_data.keys(): local_node_data[k] = np.concatenate(local_node_data[k]) return local_node_data def exchange_edge_data(rank, world_size, num_parts, edge_data, id_lookup): """ Exchange edge_data among processes in the world. Prepare list of sliced data targeting each process and trigger alltoallv_cpu to trigger messaging api Parameters: ----------- rank : int rank of the process world_size : int total no. of processes edge_data : dictionary edge information, as a dicitonary which stores column names as keys and values as column data. This information is read from the edges.txt file. id_lookup : DistLookupService instance this object will be used to retrieve ownership information of nodes Returns: -------- dictionary : the input argument, edge_data, is updated with the edge data received by other processes in the world. """ # Synchronize at the beginning of this function dist.barrier() # Prepare data for each rank in the cluster. timer_start = timer() CHUNK_SIZE = 100 * 1000 * 1000 # 100 * 8 * 5 = 1 * 4 = 8 GB/message/node num_edges = edge_data[constants.GLOBAL_SRC_ID].shape[0] all_counts = allgather_sizes( [num_edges], world_size, num_parts, return_sizes=True ) max_edges = np.amax(all_counts) all_edges = np.sum(all_counts) num_chunks = (max_edges // CHUNK_SIZE) + ( 0 if (max_edges % CHUNK_SIZE == 0) else 1 ) LOCAL_CHUNK_SIZE = (num_edges // num_chunks) + ( 0 if (num_edges % num_chunks == 0) else 1 ) logging.debug( f"[Rank: {rank} Edge Data Shuffle - max_edges: {max_edges}, \ local_edges: {num_edges} and num_chunks: {num_chunks} \ Total edges: {all_edges} Local_CHUNK_SIZE: {LOCAL_CHUNK_SIZE}" ) for local_part_id in range(num_parts // world_size): local_src_ids = [] local_dst_ids = [] local_type_eids = [] local_etype_ids = [] local_eids = [] for chunk in range(num_chunks): chunk_start = chunk * LOCAL_CHUNK_SIZE chunk_end = (chunk + 1) * LOCAL_CHUNK_SIZE logging.debug( f"[Rank: {rank}] EdgeData Shuffle: processing \ local_part_id: {local_part_id} and chunkid: {chunk}" ) cur_src_id = edge_data[constants.GLOBAL_SRC_ID][ chunk_start:chunk_end ] cur_dst_id = edge_data[constants.GLOBAL_DST_ID][ chunk_start:chunk_end ] cur_type_eid = edge_data[constants.GLOBAL_TYPE_EID][ chunk_start:chunk_end ] cur_etype_id = edge_data[constants.ETYPE_ID][chunk_start:chunk_end] cur_eid = edge_data[constants.GLOBAL_EID][chunk_start:chunk_end] input_list = [] owner_ids = id_lookup.get_partition_ids(cur_dst_id) for idx in range(world_size): send_idx = owner_ids == (idx + local_part_id * world_size) send_idx = send_idx.reshape(cur_src_id.shape[0]) filt_data = np.column_stack( ( cur_src_id[send_idx == 1], cur_dst_id[send_idx == 1], cur_type_eid[send_idx == 1], cur_etype_id[send_idx == 1], cur_eid[send_idx == 1], ) ) if filt_data.shape[0] <= 0: input_list.append(torch.empty((0, 5), dtype=torch.int64)) else: input_list.append(torch.from_numpy(filt_data)) # Now send newly formed chunk to others. dist.barrier() output_list = alltoallv_cpu( rank, world_size, input_list, retain_nones=False ) # Replace the values of the edge_data, with the received data from all the other processes. rcvd_edge_data = torch.cat(output_list).numpy() local_src_ids.append(rcvd_edge_data[:, 0]) local_dst_ids.append(rcvd_edge_data[:, 1]) local_type_eids.append(rcvd_edge_data[:, 2]) local_etype_ids.append(rcvd_edge_data[:, 3]) local_eids.append(rcvd_edge_data[:, 4]) edge_data[ constants.GLOBAL_SRC_ID + "/" + str(local_part_id) ] = np.concatenate(local_src_ids) edge_data[ constants.GLOBAL_DST_ID + "/" + str(local_part_id) ] = np.concatenate(local_dst_ids) edge_data[ constants.GLOBAL_TYPE_EID + "/" + str(local_part_id) ] = np.concatenate(local_type_eids) edge_data[ constants.ETYPE_ID + "/" + str(local_part_id) ] = np.concatenate(local_etype_ids) edge_data[ constants.GLOBAL_EID + "/" + str(local_part_id) ] = np.concatenate(local_eids) # Check if the data was exchanged correctly local_edge_count = 0 for local_part_id in range(num_parts // world_size): local_edge_count += edge_data[ constants.GLOBAL_SRC_ID + "/" + str(local_part_id) ].shape[0] shuffle_edge_counts = allgather_sizes( [local_edge_count], world_size, num_parts, return_sizes=True ) shuffle_edge_total = np.sum(shuffle_edge_counts) assert shuffle_edge_total == all_edges timer_end = timer() logging.info( f"[Rank: {rank}] Time to send/rcv edge data: {timedelta(seconds=timer_end-timer_start)}" ) # Clean up. edge_data.pop(constants.GLOBAL_SRC_ID) edge_data.pop(constants.GLOBAL_DST_ID) edge_data.pop(constants.GLOBAL_TYPE_EID) edge_data.pop(constants.ETYPE_ID) edge_data.pop(constants.GLOBAL_EID) return edge_data def exchange_feature( rank, data, id_lookup, feat_type, feat_key, featdata_key, gid_start, gid_end, type_id_start, type_id_end, local_part_id, world_size, num_parts, cur_features, cur_global_ids, ): """This function is used to send/receive one feature for either nodes or edges of the input graph dataset. Parameters: ----------- rank : int integer, unique id assigned to the current process data: dicitonary dictionry in which node or edge features are stored and this information is read from the appropriate node features file which belongs to the current process id_lookup : instance of DistLookupService instance of an implementation of dist. lookup service to retrieve values for keys feat_type : string this is used to distinguish which features are being exchanged. Please note that for nodes ownership is clearly defined and for edges it is always assumed that destination end point of the edge defines the ownership of that particular edge feat_key : string this string is used as a key in the dictionary to store features, as tensors, in local dictionaries featdata_key : numpy array features associated with this feature key being processed gid_start : int starting global_id, of either node or edge, for the feature data gid_end : int ending global_if, of either node or edge, for the feature data type_id_start : int starting type_id for the feature data type_id_end : int ending type_id for the feature data local_part_id : int integers used to the identify the local partition id used to locate data belonging to this partition world_size : int total number of processes created num_parts : int total number of partitions cur_features : dictionary dictionary to store the feature data which belongs to the current process cur_global_ids : dictionary dictionary to store global ids, of either nodes or edges, for which the features stored in the cur_features dictionary Returns: ------- dictionary : a dictionary is returned where keys are type names and feature data are the values list : a dictionary of global_ids either nodes or edges whose features are received during the data shuffle process """ # type_ids for this feature subset on the current rank gids_feat = np.arange(gid_start, gid_end) local_idx = np.arange(0, type_id_end - type_id_start) feats_per_rank = [] global_id_per_rank = [] tokens = feat_key.split("/") assert len(tokens) == 3 local_feat_key = "/".join(tokens[:-1]) + "/" + str(local_part_id) logging.debug( f"[Rank: {rank} feature: {feat_key}, gid_start - {gid_start} and gid_end - {gid_end}" ) # Get the partition ids for the range of global nids. if feat_type == constants.STR_NODE_FEATURES: # Retrieve the partition ids for the node features. # Each partition id will be in the range [0, num_parts). partid_slice = id_lookup.get_partition_ids( np.arange(gid_start, gid_end, dtype=np.int64) ) else: # Edge data case. # Ownership is determined by the destination node. assert data is not None global_eids = np.arange(gid_start, gid_end, dtype=np.int64) if data[constants.GLOBAL_EID].shape[0] > 0: logging.debug( f"[Rank: {rank} disk read global eids - min - {np.amin(data[constants.GLOBAL_EID])}, max - {np.amax(data[constants.GLOBAL_EID])}, count - {data[constants.GLOBAL_EID].shape}" ) # Now use `data` to extract destination nodes' global id # and use that to get the ownership common, idx1, idx2 = np.intersect1d( data[constants.GLOBAL_EID], global_eids, return_indices=True ) assert ( common.shape[0] == idx2.shape[0] ), f"Rank {rank}: {common.shape[0]} != {idx2.shape[0]}" assert ( common.shape[0] == global_eids.shape[0] ), f"Rank {rank}: {common.shape[0]} != {global_eids.shape[0]}" global_dst_nids = data[constants.GLOBAL_DST_ID][idx1] assert np.all(global_eids == data[constants.GLOBAL_EID][idx1]) partid_slice = id_lookup.get_partition_ids(global_dst_nids) # determine the shape of the feature-data # this is needed to so that ranks where feature-data is not present # should use the correct shape for sending the padded vector. # exchange length here. feat_dim_len = 0 if featdata_key is not None: feat_dim_len = len(featdata_key.shape) all_lens = allgather_sizes( [feat_dim_len], world_size, num_parts, return_sizes=True ) if all_lens[0] <= 0: logging.debug( f"[Rank: {rank} No process has any feature data to shuffle for {local_feat_key}" ) return cur_features, cur_global_ids rank0_shape_len = all_lens[0] for idx in range(1, world_size): assert (all_lens[idx] == 0) or (all_lens[idx] == rank0_shape_len), ( f"feature: {local_feat_key} shapes does not match " f"at rank - {idx} and rank - 0" ) # exchange actual data here. if featdata_key is not None: logging.debug(f"Rank: {rank} {featdata_key.shape=}") feat_dims_dtype = list(featdata_key.shape) assert ( len(featdata_key.shape) == 2 or len(featdata_key.shape) == 1 ), f"We expect 1D or 2D tensors for features, got shape {featdata_key.shape}" # When a feature is 2-dim, the shape should match the feature dimension. if len(featdata_key.shape) == 2: feature_dimension = feat_dims_dtype[1] else: feature_dimension = 0 feat_dims_dtype.append(DATA_TYPE_ID[featdata_key.dtype]) else: feat_dims_dtype = list(np.zeros((rank0_shape_len), dtype=np.int64)) feat_dims_dtype.append(DATA_TYPE_ID[torch.float32]) feature_dimension = 0 feature_dimension_tensor = torch.tensor([feature_dimension]) dist.all_reduce(feature_dimension_tensor, op=dist.ReduceOp.MAX) feature_dimension = feature_dimension_tensor.item() logging.debug(f"Sending the feature shape information - {feat_dims_dtype}") all_dims_dtype = allgather_sizes( feat_dims_dtype, world_size, num_parts, return_sizes=True ) for idx in range(world_size): cond = partid_slice == (idx + local_part_id * world_size) gids_per_partid = gids_feat[cond] local_idx_partid = local_idx[cond] if gids_per_partid.shape[0] == 0: assert len(all_dims_dtype) % world_size == 0 dim_len = int(len(all_dims_dtype) / world_size) rank0_shape = list(np.zeros((dim_len - 1), dtype=np.int32)) assert ( len(rank0_shape) == 2 or len(rank0_shape) == 1 ), f"We expect 1D or 2D tensors for features, got shape {rank0_shape}" # When a feature is 2-dim, the shape[1] (number of columns) should match the feature dimension. if len(rank0_shape) == 2: rank0_shape[1] = feature_dimension rank0_dtype = REV_DATA_TYPE_ID[ all_dims_dtype[(dim_len - 1) : (dim_len)][0] ] data = torch.empty(rank0_shape, dtype=rank0_dtype) feats_per_rank.append(data) global_id_per_rank.append(torch.empty((0,), dtype=torch.int64)) else: feats_per_rank.append(featdata_key[local_idx_partid]) global_id_per_rank.append( torch.from_numpy(gids_per_partid).type(torch.int64) ) for idx, tt in enumerate(feats_per_rank): logging.debug( f"[Rank: {rank} features shape - {tt.shape} and ids - {global_id_per_rank[idx].shape}" ) # features (and global nids) per rank to be sent out are ready # for transmission, perform alltoallv here. output_feat_list = alltoallv_cpu( rank, world_size, feats_per_rank, retain_nones=False ) output_id_list = alltoallv_cpu( rank, world_size, global_id_per_rank, retain_nones=False ) logging.debug( f"[Rank : {rank} feats - {output_feat_list}, ids - {output_id_list}" ) assert len(output_feat_list) == len(output_id_list), ( "Length of feature list and id list are expected to be equal while " f"got {len(output_feat_list)} and {len(output_id_list)}." ) # stitch node_features together to form one large feature tensor if len(output_feat_list) > 0: output_feat_list = torch.cat(output_feat_list) output_id_list = torch.cat(output_id_list) if local_feat_key in cur_features: temp = cur_features[local_feat_key] cur_features[local_feat_key] = torch.cat([temp, output_feat_list]) temp = cur_global_ids[local_feat_key] cur_global_ids[local_feat_key] = torch.cat([temp, output_id_list]) else: cur_features[local_feat_key] = output_feat_list cur_global_ids[local_feat_key] = output_id_list else: cur_features[local_feat_key] = torch.empty( (0, feature_dimension), dtype=torch.float32 ) cur_global_ids[local_feat_key] = torch.empty((0,), dtype=torch.int64) return cur_features, cur_global_ids def exchange_features( rank, world_size, num_parts, feature_tids, type_id_map, id_lookup, feature_data, feat_type, data, ): """ This function is used to shuffle node features so that each process will receive all the node features whose corresponding nodes are owned by the same process. The mapping procedure to identify the owner process is not straight forward. The following steps are used to identify the owner processes for the locally read node- features. a. Compute the global_nids for the locally read node features. Here metadata json file is used to identify the corresponding global_nids. Please note that initial graph input nodes.txt files are sorted based on node_types. b. Using global_nids and metis partitions owner processes can be easily identified. c. Now each process sends the global_nids for which shuffle_global_nids are needed to be retrieved. d. After receiving the corresponding shuffle_global_nids these ids are added to the node_data and edge_data dictionaries This pipeline assumes all the input data in numpy format, except node/edge features which are maintained as tensors throughout the various stages of the pipeline execution. Parameters: ----------- rank : int rank of the current process world_size : int total no. of participating processes. feature_tids : dictionary dictionary with keys as node-type names with suffixes as feature names and value is a dictionary. This dictionary contains information about node-features associated with a given node-type and value is a list. This list contains a of indexes, like [starting-idx, ending-idx) which can be used to index into the node feature tensors read from corresponding input files. type_id_map : dictionary mapping between type names and global_ids, of either nodes or edges, which belong to the keys in this dictionary id_lookup : instance of class DistLookupService Distributed lookup service used to map global-nids to respective partition-ids and shuffle-global-nids feat_type : string this is used to distinguish which features are being exchanged. Please note that for nodes ownership is clearly defined and for edges it is always assumed that destination end point of the edge defines the ownership of that particular edge data: dicitonary dictionry in which node or edge features are stored and this information is read from the appropriate node features file which belongs to the current process Returns: -------- dictionary : a dictionary is returned where keys are type names and feature data are the values list : a dictionary of global_ids either nodes or edges whose features are received during the data shuffle process """ start = timer() own_features = {} own_global_ids = {} # To iterate over the node_types and associated node_features for feat_key, type_info in feature_tids.items(): # To iterate over the feature data, of a given (node or edge )type # type_info is a list of 3 elements (as shown below): # [feature-name, starting-idx, ending-idx] # feature-name is the name given to the feature-data, # read from the input metadata file # [starting-idx, ending-idx) specifies the range of indexes # associated with the features data # Determine the owner process for these features. # Note that the keys in the node features (and similarly edge features) # dictionary is of the following format: # `node_type/feature_name/local_part_id`: # where node_type and feature_name are self-explanatory and # local_part_id denotes the partition-id, in the local process, # which will be used a suffix to store all the information of a # given partition which is processed by the current process. Its # values start from 0 onwards, for instance 0, 1, 2 ... etc. # local_part_id can be easily mapped to global partition id very # easily, using cyclic ordering. All local_part_ids = 0 from all # processes will form global partition-ids between 0 and world_size-1. # Similarly all local_part_ids = 1 from all processes will form # global partition ids in the range [world_size, 2*world_size-1] and # so on. tokens = feat_key.split("/") assert len(tokens) == 3 type_name = tokens[0] feat_name = tokens[1] logging.debug(f"[Rank: {rank}] processing feature: {feat_key}") for feat_info in type_info: # Compute the global_id range for this feature data type_id_start = int(feat_info[0]) type_id_end = int(feat_info[1]) begin_global_id = type_id_map[type_name][0] gid_start = begin_global_id + type_id_start gid_end = begin_global_id + type_id_end # Check if features exist for this type_name + feat_name. # This check should always pass, because feature_tids are built # by reading the input metadata json file for existing features. assert feat_key in feature_data for local_part_id in range(num_parts // world_size): featdata_key = feature_data[feat_key] # Synchronize for each feature dist.barrier() own_features, own_global_ids = exchange_feature( rank, data, id_lookup, feat_type, feat_key, featdata_key, gid_start, gid_end, type_id_start, type_id_end, local_part_id, world_size, num_parts, own_features, own_global_ids, ) end = timer() logging.info( f"[Rank: {rank}] Total time for feature exchange: {timedelta(seconds = end - start)}" ) for k, v in own_features.items(): logging.debug(f"Rank: {rank}] Key - {k} Value - {v.shape}") return own_features, own_global_ids def exchange_graph_data( rank, world_size, num_parts, node_features, edge_features, node_feat_tids, edge_feat_tids, edge_data, id_lookup, ntypes_ntypeid_map, ntypes_gnid_range_map, etypes_geid_range_map, ntid_ntype_map, schema_map, ): """ Wrapper function which is used to shuffle graph data on all the processes. Parameters: ----------- rank : int rank of the current process world_size : int total no. of participating processes. num_parts : int total no. of graph partitions. node_feautres : dicitonary dictionry where node_features are stored and this information is read from the appropriate node features file which belongs to the current process edge_features : dictionary dictionary where edge_features are stored. This information is read from the appropriate edge feature files whose ownership is assigned to the current process node_feat_tids: dictionary in which keys are node-type names and values are triplets. Each triplet has node-feature name and the starting and ending type ids of the node-feature data read from the corresponding node feature data file read by current process. Each node type may have several features and hence each key may have several triplets. edge_feat_tids : dictionary a dictionary in which keys are edge-type names and values are triplets of the format . This triplet is used to identify the chunk of feature data for which current process is responsible for edge_data : dictionary dictionary which is used to store edge information as read from appropriate files assigned to each process. id_lookup : instance of class DistLookupService Distributed lookup service used to map global-nids to respective partition-ids and shuffle-global-nids ntypes_ntypeid_map : dictionary mappings between node type names and node type ids ntypes_gnid_range_map : dictionary mapping between node type names and global_nids which belong to the keys in this dictionary etypes_geid_range_map : dictionary mapping between edge type names and global_eids which are assigned to the edges of this edge_type ntid_ntype_map : dictionary mapping between node type id and no of nodes which belong to each node_type_id schema_map : dictionary is the data structure read from the metadata json file for the input graph Returns: -------- dictionary : the input argument, node_data dictionary, is updated with the node data received from other processes in the world. The node data is received by each rank in the process of data shuffling. dictionary : node features dictionary which has node features for the nodes which are owned by the current process dictionary : list of global_nids for the nodes whose node features are received when node features shuffling was performed in the `exchange_features` function call dictionary : the input argument, edge_data dictionary, is updated with the edge data received from other processes in the world. The edge data is received by each rank in the process of data shuffling. dictionary : edge features dictionary which has edge features. These destination end points of these edges are owned by the current process dictionary : list of global_eids for the edges whose edge features are received when edge features shuffling was performed in the `exchange_features` function call """ memory_snapshot("ShuffleNodeFeaturesBegin: ", rank) logging.debug(f"[Rank: {rank} - node_feat_tids - {node_feat_tids}") rcvd_node_features, rcvd_global_nids = exchange_features( rank, world_size, num_parts, node_feat_tids, ntypes_gnid_range_map, id_lookup, node_features, constants.STR_NODE_FEATURES, None, ) dist.barrier() memory_snapshot("ShuffleNodeFeaturesComplete: ", rank) logging.debug(f"[Rank: {rank}] Done with node features exchange.") rcvd_edge_features, rcvd_global_eids = exchange_features( rank, world_size, num_parts, edge_feat_tids, etypes_geid_range_map, id_lookup, edge_features, constants.STR_EDGE_FEATURES, edge_data, ) dist.barrier() logging.debug(f"[Rank: {rank}] Done with edge features exchange.") node_data = gen_node_data( rank, world_size, num_parts, id_lookup, ntid_ntype_map, schema_map ) dist.barrier() memory_snapshot("NodeDataGenerationComplete: ", rank) edge_data = exchange_edge_data( rank, world_size, num_parts, edge_data, id_lookup ) dist.barrier() memory_snapshot("ShuffleEdgeDataComplete: ", rank) return ( node_data, rcvd_node_features, rcvd_global_nids, edge_data, rcvd_edge_features, rcvd_global_eids, ) def read_dataset(rank, world_size, id_lookup, params, schema_map, ntype_counts): """ This function gets the dataset and performs post-processing on the data which is read from files. Additional information(columns) are added to nodes metadata like owner_process, global_nid which are later used in processing this information. For edge data, which is now a dictionary, we add new columns like global_edge_id and owner_process. Augmenting these data structure helps in processing these data structures when data shuffling is performed. Parameters: ----------- rank : int rank of the current process world_size : int total no. of processes instantiated id_lookup : instance of class DistLookupService Distributed lookup service used to map global-nids to respective partition-ids and shuffle-global-nids params : argparser object argument parser object to access command line arguments schema_map : dictionary dictionary created by reading the input graph metadata json file Returns : --------- dictionary in which keys are node-type names and values are are tuples representing the range of ids for nodes to be read by the current process dictionary node features which is a dictionary where keys are feature names and values are feature data as multi-dimensional tensors dictionary in which keys are node-type names and values are triplets. Each triplet has node-feature name and the starting and ending type ids of the node-feature data read from the corresponding node feature data file read by current process. Each node type may have several features and hence each key may have several triplets. dictionary edge data information is read from edges.txt and additional columns are added such as owner process for each edge. dictionary edge features which is also a dictionary, similar to node features dictionary dictionary a dictionary in which keys are edge-type names and values are tuples indicating the range of ids for edges read by the current process. dictionary a dictionary in which keys are edge-type names and values are triplets, (edge-feature-name, start_type_id, end_type_id). These type_ids are indices in the edge-features read by the current process. Note that each edge-type may have several edge-features. """ edge_features = {} ( node_features, node_feat_tids, edge_data, edge_typecounts, edge_tids, edge_features, edge_feat_tids, ) = get_dataset( params.input_dir, params.graph_name, rank, world_size, params.num_parts, schema_map, ntype_counts, ) # Synchronize so that everybody completes reading dataset from disk dist.barrier() logging.info(f"[Rank: {rank}] Done reading dataset {params.input_dir}") edge_data = augment_edge_data( edge_data, id_lookup, edge_tids, rank, world_size, params.num_parts ) dist.barrier() # SYNCH logging.debug( f"[Rank: {rank}] Done augmenting edge_data: {len(edge_data)}, {edge_data[constants.GLOBAL_SRC_ID].shape}" ) return ( node_features, node_feat_tids, edge_data, edge_typecounts, edge_features, edge_feat_tids, ) def reorder_data(num_parts, world_size, data, key): """ Auxiliary function used to sort node and edge data for the input graph. Parameters: ----------- num_parts : int total no. of partitions world_size : int total number of nodes used in this execution data : dictionary which is used to store the node and edge data for the input graph key : string specifies the column which is used to determine the sort order for the remaining columns Returns: -------- dictionary same as the input dictionary, but with reordered columns (values in the dictionary), as per the np.argsort results on the column specified by the ``key`` column """ for local_part_id in range(num_parts // world_size): sorted_idx = data[key + "/" + str(local_part_id)].argsort() for k, v in data.items(): tokens = k.split("/") assert len(tokens) == 2 if tokens[1] == str(local_part_id): data[k] = v[sorted_idx] sorted_idx = None gc.collect() return data def gen_dist_partitions(rank, world_size, params): """ Function which will be executed by all Gloo processes to begin execution of the pipeline. This function expects the input dataset is split across multiple file format. Input dataset and its file structure is described in metadata json file which is also part of the input dataset. On a high-level, this metadata json file contains information about the following items a) Nodes metadata, It is assumed that nodes which belong to each node-type are split into p files (wherer `p` is no. of partitions). b) Similarly edge metadata contains information about edges which are split into p-files. c) Node and Edge features, it is also assumed that each node (and edge) feature, if present, is also split into `p` files. For example, a sample metadata json file might be as follows: : (In this toy example, we assume that we have "m" node-types, "k" edge types, and for node_type = ntype0-name we have two features namely feat0-name and feat1-name. Please note that the node-features are also split into `p` files. This will help in load-balancing during data-shuffling phase). Terminology used to identify any particular "id" assigned to nodes, edges or node features. Prefix "global" is used to indicate that this information is either read from the input dataset or autogenerated based on the information read from input dataset files. Prefix "type" is used to indicate a unique id assigned to either nodes or edges. For instance, type_node_id means that a unique id, with a given node type, assigned to a node. And prefix "shuffle" will be used to indicate a unique id, across entire graph, assigned to either a node or an edge. For instance, SHUFFLE_GLOBAL_NID means a unique id which is assigned to a node after the data shuffle is completed. Some high-level notes on the structure of the metadata json file. 1. path(s) mentioned in the entries for nodes, edges and node-features files can be either absolute or relative. if these paths are relative, then it is assumed that they are relative to the folder from which the execution is launched. 2. The id_startx and id_endx represent the type_node_id and type_edge_id respectively for nodes and edge data. This means that these ids should match the no. of nodes/edges read from any given file. Since these are type_ids for the nodes and edges in any given file, their global_ids can be easily computed as well. { "graph_name" : xyz, "node_type" : ["ntype0-name", "ntype1-name", ....], #m node types "num_nodes_per_chunk" : [ [a0, a1, ...a], #p partitions [b0, b1, ... b], .... [c0, c1, ..., c] #no, of node types ], "edge_type" : ["src_ntype:edge_type:dst_ntype", ....], #k edge types "num_edges_per_chunk" : [ [a0, a1, ...a], #p partitions [b0, b1, ... b], .... [c0, c1, ..., c] #no, of edge types ], "node_data" : { "ntype0-name" : { "feat0-name" : { "format" : {"name": "numpy"}, "data" : [ #list of lists ["/feat-0.npy", 0, id_end0], ["/feat-1.npy", id_start1, id_end1], .... ["/feat-.npy", id_start, id_end] ] }, "feat1-name" : { "format" : {"name": "numpy"}, "data" : [ #list of lists ["/feat-0.npy", 0, id_end0], ["/feat-1.npy", id_start1, id_end1], .... ["/feat-.npy", id_start, id_end] ] } } }, "edges": { #k edge types "src_ntype:etype0-name:dst_ntype" : { "format": {"name" : "csv", "delimiter" : " "}, "data" : [ ["/etype0-name-0.txt", 0, id_end0], #These are type_edge_ids for edges of this type ["/etype0-name-1.txt", id_start1, id_end1], ..., ["/etype0-name-.txt", id_start, id_end] ] }, ..., "src_ntype:etype-name:dst_ntype" : { "format": {"name" : "csv", "delimiter" : " "}, "data" : [ ["/etype-name-0.txt", 0, id_end0], ["/etype-name-1.txt", id_start1, id_end1], ..., ["/etype-name-.txt", id_start, id_end] ] }, }, } The function performs the following steps: 1. Reads the metis partitions to identify the owner process of all the nodes in the entire graph. 2. Reads the input data set, each partitipating process will map to a single file for the edges, node-features and edge-features for each node-type and edge-types respectively. Using nodes metadata information, nodes which are owned by a given process are generated to optimize communication to some extent. 3. Now each process shuffles the data by identifying the respective owner processes using metis partitions. a. To identify owner processes for nodes, metis partitions will be used. b. For edges, the owner process of the destination node will be the owner of the edge as well. c. For node and edge features, identifying the owner process is a little bit involved. For this purpose, graph metadata json file is used to first map the locally read node features to their global_nids. Now owner process is identified using metis partitions for these global_nids to retrieve shuffle_global_nids. A similar process is used for edge_features as well. d. After all the data shuffling is done, the order of node-features may be different when compared to their global_type_nids. Node- and edge-data are ordered by node-type and edge-type respectively. And now node features and edge features are re-ordered to match the order of their node- and edge-types. 4. Last step is to create the DGL objects with the data present on each of the processes. a. DGL objects for nodes, edges, node- and edge- features. b. Metadata is gathered from each process to create the global metadata json file, by process rank = 0. Parameters: ---------- rank : int integer representing the rank of the current process in a typical distributed implementation world_size : int integer representing the total no. of participating processes in a typical distributed implementation params : argparser object this object, key value pairs, provides access to the command line arguments from the runtime environment """ global_start = timer() logging.info( f"[Rank: {rank}] Starting distributed data processing pipeline..." ) memory_snapshot("Pipeline Begin: ", rank) # init processing schema_map = read_json(os.path.join(params.input_dir, params.schema)) # The resources, which are node-id to partition-id mappings, are split # into `world_size` number of parts, where each part can be mapped to # each physical node. id_lookup = DistLookupService( os.path.join(params.input_dir, params.partitions_dir), schema_map[constants.STR_NODE_TYPE], rank, world_size, params.num_parts, ) # get the id to name mappings here. ntypes_ntypeid_map, ntypes, ntypeid_ntypes_map = get_node_types(schema_map) etypes_etypeid_map, etypes, etypeid_etypes_map = get_edge_types(schema_map) logging.info( f"[Rank: {rank}] Initialized metis partitions and node_types map..." ) # Initialize distributed lookup service for partition-id and shuffle-global-nids mappings # for global-nids _, global_nid_ranges = get_idranges( schema_map[constants.STR_NODE_TYPE], get_ntype_counts_map( schema_map[constants.STR_NODE_TYPE], schema_map[constants.STR_NUM_NODES_PER_TYPE], ), ) id_map = dgl.distributed.id_map.IdMap(global_nid_ranges) id_lookup.set_idMap(id_map) # read input graph files and augment these datastructures with # appropriate information (global_nid and owner process) for node and edge data ( node_features, node_feat_tids, edge_data, edge_typecounts, edge_features, edge_feat_tids, ) = read_dataset( rank, world_size, id_lookup, params, schema_map, get_ntype_counts_map( schema_map[constants.STR_NODE_TYPE], schema_map[constants.STR_NUM_NODES_PER_TYPE], ), ) logging.info( f"[Rank: {rank}] Done augmenting file input data with auxilary columns" ) memory_snapshot("DatasetReadComplete: ", rank) # send out node and edge data --- and appropriate features. # this function will also stitch the data recvd from other processes # and return the aggregated data # ntypes_gnid_range_map = get_gnid_range_map(node_tids) # etypes_geid_range_map = get_gnid_range_map(edge_tids) ntypes_gnid_range_map = get_gid_offsets( schema_map[constants.STR_NODE_TYPE], get_ntype_counts_map( schema_map[constants.STR_NODE_TYPE], schema_map[constants.STR_NUM_NODES_PER_TYPE], ), ) etypes_geid_range_map = get_gid_offsets( schema_map[constants.STR_EDGE_TYPE], edge_typecounts ) ( node_data, rcvd_node_features, rcvd_global_nids, edge_data, rcvd_edge_features, rcvd_global_eids, ) = exchange_graph_data( rank, world_size, params.num_parts, node_features, edge_features, node_feat_tids, edge_feat_tids, edge_data, id_lookup, ntypes_ntypeid_map, ntypes_gnid_range_map, etypes_geid_range_map, ntypeid_ntypes_map, schema_map, ) gc.collect() logging.debug(f"[Rank: {rank}] Done with data shuffling...") memory_snapshot("DataShuffleComplete: ", rank) # sort node_data by ntype node_data = reorder_data( params.num_parts, world_size, node_data, constants.NTYPE_ID ) logging.debug(f"[Rank: {rank}] Sorted node_data by node_type") memory_snapshot("NodeDataSortComplete: ", rank) # resolve global_ids for nodes # Synchronize before assigning shuffle-global-ids to nodes dist.barrier() assign_shuffle_global_nids_nodes( rank, world_size, params.num_parts, node_data ) logging.debug(f"[Rank: {rank}] Done assigning global-ids to nodes...") memory_snapshot("ShuffleGlobalID_Nodes_Complete: ", rank) # shuffle node feature according to the node order on each rank. for ntype_name in ntypes: featnames = get_ntype_featnames(ntype_name, schema_map) for featname in featnames: # if a feature name exists for a node-type, then it should also have # feature data as well. Hence using the assert statement. for local_part_id in range(params.num_parts // world_size): feature_key = ( ntype_name + "/" + featname + "/" + str(local_part_id) ) assert feature_key in rcvd_global_nids global_nids = rcvd_global_nids[feature_key] _, idx1, _ = np.intersect1d( node_data[constants.GLOBAL_NID + "/" + str(local_part_id)], global_nids, return_indices=True, ) shuffle_global_ids = node_data[ constants.SHUFFLE_GLOBAL_NID + "/" + str(local_part_id) ][idx1] feature_idx = shuffle_global_ids.argsort() rcvd_node_features[feature_key] = rcvd_node_features[ feature_key ][feature_idx] memory_snapshot("ReorderNodeFeaturesComplete: ", rank) # Sort edge_data by etype edge_data = reorder_data( params.num_parts, world_size, edge_data, constants.ETYPE_ID ) logging.debug(f"[Rank: {rank}] Sorted edge_data by edge_type") memory_snapshot("EdgeDataSortComplete: ", rank) # Synchronize before assigning shuffle-global-nids for edges end points. dist.barrier() shuffle_global_eid_offsets = assign_shuffle_global_nids_edges( rank, world_size, params.num_parts, edge_data ) logging.debug(f"[Rank: {rank}] Done assigning global_ids to edges ...") memory_snapshot("ShuffleGlobalID_Edges_Complete: ", rank) # Shuffle edge features according to the edge order on each rank. for etype_name in etypes: featnames = get_etype_featnames(etype_name, schema_map) for featname in featnames: for local_part_id in range(params.num_parts // world_size): feature_key = ( etype_name + "/" + featname + "/" + str(local_part_id) ) assert feature_key in rcvd_global_eids global_eids = rcvd_global_eids[feature_key] _, idx1, _ = np.intersect1d( edge_data[constants.GLOBAL_EID + "/" + str(local_part_id)], global_eids, return_indices=True, ) shuffle_global_ids = edge_data[ constants.SHUFFLE_GLOBAL_EID + "/" + str(local_part_id) ][idx1] feature_idx = shuffle_global_ids.argsort() rcvd_edge_features[feature_key] = rcvd_edge_features[ feature_key ][feature_idx] # determine global-ids for edge end-points # Synchronize before retrieving shuffle-global-nids for edges end points. dist.barrier() edge_data = lookup_shuffle_global_nids_edges( rank, world_size, params.num_parts, edge_data, id_lookup, node_data ) logging.debug( f"[Rank: {rank}] Done resolving orig_node_id for local node_ids..." ) memory_snapshot("ShuffleGlobalID_Lookup_Complete: ", rank) def prepare_local_data(src_data, local_part_id): local_data = {} for k, v in src_data.items(): tokens = k.split("/") if tokens[len(tokens) - 1] == str(local_part_id): local_data["/".join(tokens[:-1])] = v return local_data # create dgl objects here output_meta_json = {} start = timer() graph_formats = None if params.graph_formats: graph_formats = params.graph_formats.split(",") prev_last_ids = {} for local_part_id in range(params.num_parts // world_size): # Synchronize for each local partition of the graph object. dist.barrier() num_edges = shuffle_global_eid_offsets[local_part_id] node_count = len( node_data[constants.NTYPE_ID + "/" + str(local_part_id)] ) edge_count = len( edge_data[constants.ETYPE_ID + "/" + str(local_part_id)] ) local_node_data = prepare_local_data(node_data, local_part_id) local_edge_data = prepare_local_data(edge_data, local_part_id) tot_node_count = sum(schema_map["num_nodes_per_type"]) tot_edge_count = sum(schema_map["num_edges_per_type"]) ( graph_obj, ntypes_map_val, etypes_map_val, ntypes_map, etypes_map, orig_nids, orig_eids, ) = create_graph_object( tot_node_count, tot_edge_count, node_count, edge_count, params.num_parts, schema_map, rank + local_part_id * world_size, local_node_data, local_edge_data, num_edges, get_ntype_counts_map( schema_map[constants.STR_NODE_TYPE], schema_map[constants.STR_NUM_NODES_PER_TYPE], ), edge_typecounts, prev_last_ids, return_orig_nids=params.save_orig_nids, return_orig_eids=params.save_orig_eids, use_graphbolt=params.use_graphbolt, store_inner_node=params.store_inner_node, store_inner_edge=params.store_inner_edge, store_eids=params.store_eids, ) sort_etypes = len(etypes_map) > 1 local_node_features = prepare_local_data( rcvd_node_features, local_part_id ) local_edge_features = prepare_local_data( rcvd_edge_features, local_part_id ) write_dgl_objects( graph_obj, local_node_features, local_edge_features, params.output, rank + (local_part_id * world_size), orig_nids, orig_eids, graph_formats, sort_etypes, params.use_graphbolt, ) if params.use_graphbolt: memory_snapshot("DiskWriteGrapgboltObjectsComplete: ", rank) else: memory_snapshot("DiskWriteDGLObjectsComplete: ", rank) # get the meta-data json_metadata = create_metadata_json( params.graph_name, node_count, edge_count, local_part_id * world_size + rank, params.num_parts, ntypes_map_val, etypes_map_val, ntypes_map, etypes_map, params.output, params.use_graphbolt, ) output_meta_json[ "local-part-id-" + str(local_part_id * world_size + rank) ] = json_metadata memory_snapshot("MetadataCreateComplete: ", rank) last_id_tensor = torch.tensor( [prev_last_ids[rank + (local_part_id * world_size)]], dtype=torch.int64, ) gather_list = [ torch.zeros(1, dtype=torch.int64) for _ in range(world_size) ] dist.all_gather(gather_list, last_id_tensor) for rank_id, last_id in enumerate(gather_list): prev_last_ids[ rank_id + (local_part_id * world_size) ] = last_id.item() if rank == 0: # get meta-data from all partitions and merge them on rank-0 metadata_list = gather_metadata_json(output_meta_json, rank, world_size) metadata_list[0] = output_meta_json write_metadata_json( metadata_list, params.output, params.graph_name, world_size, params.num_parts, ) else: # send meta-data to Rank-0 process gather_metadata_json(output_meta_json, rank, world_size) end = timer() logging.info( f"[Rank: {rank}] Time to create dgl objects: {timedelta(seconds = end - start)}" ) memory_snapshot("MetadataWriteComplete: ", rank) global_end = timer() logging.info( f"[Rank: {rank}] Total execution time of the program: {timedelta(seconds = global_end - global_start)}" ) memory_snapshot("PipelineComplete: ", rank) def single_machine_run(params): """Main function for distributed implementation on a single machine Parameters: ----------- params : argparser object Argument Parser structure with pre-determined arguments as defined at the bottom of this file. """ processes = [] mp.set_start_method("spawn") # Invoke `target` function from each of the spawned process for distributed # implementation for rank in range(params.world_size): p = mp.Process( target=run, args=(rank, params.world_size, gen_dist_partitions, params), ) p.start() processes.append(p) for p in processes: p.join() def run(rank, world_size, func_exec, params, backend="gloo"): """ Init. function which is run by each process in the Gloo ProcessGroup Parameters: ----------- rank : integer rank of the process world_size : integer number of processes configured in the Process Group proc_exec : function name function which will be invoked which has the logic for each process in the group params : argparser object argument parser object to access the command line arguments backend : string string specifying the type of backend to use for communication """ os.environ["MASTER_ADDR"] = "127.0.0.1" os.environ["MASTER_PORT"] = "29500" # create Gloo Process Group dist.init_process_group( backend, rank=rank, world_size=world_size, timeout=timedelta(seconds=5 * 60), ) # Invoke the main function to kick-off each process func_exec(rank, world_size, params) def multi_machine_run(params): """ Function to be invoked when executing data loading pipeline on multiple machines Parameters: ----------- params : argparser object argparser object providing access to command line arguments. """ rank = int(os.environ["RANK"]) # init the gloo process group here. dist.init_process_group( backend="gloo", rank=rank, world_size=params.world_size, timeout=timedelta(seconds=params.process_group_timeout), ) logging.info(f"[Rank: {rank}] Done with process group initialization...") # invoke the main function here. gen_dist_partitions(rank, params.world_size, params) logging.info( f"[Rank: {rank}] Done with Distributed data processing pipeline processing." ) ================================================ FILE: tools/distpartitioning/dataset_utils.py ================================================ import gc import logging import os import array_readwriter import constants import numpy as np import pyarrow import pyarrow.parquet as pq import torch import torch.distributed as dist from gloo_wrapper import alltoallv_cpu from utils import ( DATA_TYPE_ID, generate_read_list, get_gid_offsets, get_idranges, map_partid_rank, REV_DATA_TYPE_ID, ) def _broadcast_shape( data, rank, world_size, num_parts, is_feat_data, feat_name ): """Auxiliary function to broadcast the shape of a feature data. This information is used to figure out the type-ids for the local features. Parameters: ----------- data : numpy array which is the feature data read from the disk rank : integer which represents the id of the process in the process group world_size : integer represents the total no. of process in the process group num_parts : integer specifying the no. of partitions is_feat_data : bool flag used to seperate feature data and edge data feat_name : string name of the feature Returns: ------- list of tuples : which represents the range of type-ids for the data array. """ assert len(data.shape) in [ 1, 2, ], f"Data is expected to be 1-D or 2-D but got {data.shape}." data_shape = list(data.shape) if len(data_shape) == 1: data_shape.append(1) if is_feat_data: data_shape.append(DATA_TYPE_ID[data.dtype]) data_shape = torch.tensor(data_shape, dtype=torch.int64) data_shape_output = [ torch.zeros_like(data_shape) for _ in range(world_size) ] dist.all_gather(data_shape_output, data_shape) logging.debug( f"[Rank: {rank} Received shapes from all ranks: {data_shape_output}" ) shapes = [x.numpy() for x in data_shape_output if x[0] != 0] shapes = np.vstack(shapes) if is_feat_data: logging.debug( f"shapes: {shapes}, condition: {all(shapes[0,2] == s for s in shapes[:,2])}" ) assert all( shapes[0, 2] == s for s in shapes[:, 2] ), f"dtypes for {feat_name} does not match on all ranks" # compute tids here. type_counts = list(shapes[:, 0]) tid_start = np.cumsum([0] + type_counts[:-1]) tid_end = np.cumsum(type_counts) tid_ranges = list(zip(tid_start, tid_end)) logging.debug(f"starts -> {tid_start} ... end -> {tid_end}") return tid_ranges def get_dataset( input_dir, graph_name, rank, world_size, num_parts, schema_map, ntype_counts ): """ Function to read the multiple file formatted dataset. Parameters: ----------- input_dir : string root directory where dataset is located. graph_name : string graph name string rank : int rank of the current process world_size : int total number of process in the current execution num_parts : int total number of output graph partitions schema_map : dictionary this is the dictionary created by reading the graph metadata json file for the input graph dataset Return: ------- dictionary where keys are node-type names and values are tuples. Each tuple represents the range of type ids read from a file by the current process. Please note that node data for each node type is split into "p" files and each one of these "p" files are read a process in the distributed graph partitioning pipeline dictionary Data read from numpy files for all the node features in this dataset. Dictionary built using this data has keys as node feature names and values as tensor data representing node features dictionary in which keys are node-type and values are a triplet. This triplet has node-feature name, and range of tids for the node feature data read from files by the current process. Each node-type may have mutiple feature(s) and associated tensor data. dictionary Data read from edges.txt file and used to build a dictionary with keys as column names and values as columns in the csv file. dictionary in which keys are edge-type names and values are triplets. This triplet has edge-feature name, and range of tids for theedge feature data read from the files by the current process. Each edge-type may have several edge features and associated tensor data. dictionary Data read from numpy files for all the edge features in this dataset. This dictionary's keys are feature names and values are tensors data representing edge feature data. dictionary This dictionary is used for identifying the global-id range for the associated edge features present in the previous return value. The keys are edge-type names and values are triplets. Each triplet consists of edge-feature name and starting and ending points of the range of tids representing the corresponding edge feautres. """ # node features dictionary # TODO: With the new file format, It is guaranteed that the input dataset will have # no. of nodes with features (node-features) files and nodes metadata will always be the same. # This means the dimension indicating the no. of nodes in any node-feature files and the no. of # nodes in the corresponding nodes metadata file will always be the same. With this guarantee, # we can eliminate the `node_feature_tids` dictionary since the same information is also populated # in the `node_tids` dictionary. This will be remnoved in the next iteration of code changes. node_features = {} node_feature_tids = {} """ The structure of the node_data is as follows, which is present in the input metadata json file. "node_data" : { "ntype0-name" : { "feat0-name" : { "format" : {"name": "numpy"}, "data" : [ #list "/feat-0.npy", "/feat-1.npy", .... "/feat-.npy" ] }, "feat1-name" : { "format" : {"name": "numpy"}, "data" : [ #list "/feat-0.npy", "/feat-1.npy", .... "/feat-.npy" ] } } } As shown above, the value for the key "node_data" is a dictionary object, which is used to describe the feature data for each of the node-type names. Keys in this top-level dictionary are node-type names and value is a dictionary which captures all the features for the current node-type. Feature data is captured with keys being the feature-names and value is a dictionary object which has 2 keys namely format and data. Format entry is used to mention the format of the storage used by the node features themselves and "data" is used to mention all the files present for this given node feature. Data read from each of the node features file is a multi-dimensional tensor data and is read in numpy or parquet format, which is also the storage format of node features on the permanent storage. "node_type" : ["ntype0-name", "ntype1-name", ....], #m node types "num_nodes_per_chunk" : [ [a0, a1, ...a], #p partitions [b0, b1, ... b], .... [c0, c1, ..., c] #no, of node types ], The "node_type" points to a list of all the node names present in the graph And "num_nodes_per_chunk" is used to mention no. of nodes present in each of the input nodes files. These node counters are used to compute the type_node_ids as well as global node-ids by using a simple cumulative summation and maitaining an offset counter to store the end of the current. Since nodes are NOT actually associated with any additional metadata, w.r.t to the processing involved in this pipeline this information is not needed to be stored in files. This optimization saves a considerable amount of time when loading massively large datasets for paritioning. As opposed to reading from files and performing shuffling process each process/rank generates nodes which are owned by that particular rank. And using the "num_nodes_per_chunk" information each process can easily compute any nodes per-type node_id and global node_id. The node-ids are treated as int64's in order to support billions of nodes in the input graph. """ # read my nodes for each node type """ node_tids, ntype_gnid_offset = get_idranges( schema_map[constants.STR_NODE_TYPE], schema_map[constants.STR_NUM_NODES_PER_CHUNK], num_chunks=num_parts, ) """ logging.debug(f"[Rank: {rank} ntype_counts: {ntype_counts}") ntype_gnid_offset = get_gid_offsets( schema_map[constants.STR_NODE_TYPE], ntype_counts ) logging.debug(f"[Rank: {rank} - ntype_gnid_offset = {ntype_gnid_offset}") # iterate over the "node_data" dictionary in the schema_map # read the node features if exists # also keep track of the type_nids for which the node_features are read. dataset_features = schema_map[constants.STR_NODE_DATA] if (dataset_features is not None) and (len(dataset_features) > 0): for ntype_name, ntype_feature_data in dataset_features.items(): for feat_name, feat_data in ntype_feature_data.items(): assert feat_data[constants.STR_FORMAT][constants.STR_NAME] in [ constants.STR_NUMPY, constants.STR_PARQUET, ] # It is guaranteed that num_chunks is always greater # than num_partitions. node_data = [] num_files = len(feat_data[constants.STR_DATA]) if num_files == 0: continue reader_fmt_meta = { "name": feat_data[constants.STR_FORMAT][constants.STR_NAME] } read_list = generate_read_list(num_files, world_size) for idx in read_list[rank]: data_file = feat_data[constants.STR_DATA][idx] if not os.path.isabs(data_file): data_file = os.path.join(input_dir, data_file) node_data.append( array_readwriter.get_array_parser( **reader_fmt_meta ).read(data_file) ) if len(node_data) > 0: node_data = np.concatenate(node_data) else: node_data = np.array([]) node_data = torch.from_numpy(node_data) cur_tids = _broadcast_shape( node_data, rank, world_size, num_parts, True, f"{ntype_name}/{feat_name}", ) logging.debug(f"[Rank: {rank} - cur_tids: {cur_tids}") # collect data on current rank. for local_part_id in range(num_parts): data_key = ( f"{ntype_name}/{feat_name}/{local_part_id//world_size}" ) if map_partid_rank(local_part_id, world_size) == rank: if len(cur_tids) > local_part_id: start, end = cur_tids[local_part_id] assert node_data.shape[0] == ( end - start ), f"Node feature data, {data_key}, shape = {node_data.shape} does not match with tids = ({start},{end})" node_features[data_key] = node_data node_feature_tids[data_key] = [(start, end)] else: node_features[data_key] = None node_feature_tids[data_key] = [(0, 0)] # done building node_features locally. if len(node_features) <= 0: logging.debug( f"[Rank: {rank}] This dataset does not have any node features" ) else: assert len(node_features) == len(node_feature_tids) # Note that the keys in the node_features dictionary are as follows: # `ntype_name/feat_name/local_part_id`. # where ntype_name and feat_name are self-explanatory, and # local_part_id indicates the partition-id, in the context of current # process which take the values 0, 1, 2, .... for feat_name, feat_info in node_features.items(): if feat_info == None: continue logging.debug( f"[Rank: {rank}] node feature name: {feat_name}, feature data shape: {feat_info.size()}" ) tokens = feat_name.split("/") assert len(tokens) == 3 # Get the range of type ids which are mapped to the current node. tids = node_feature_tids[feat_name] # Iterate over the range of type ids for the current node feature # and count the number of features for this feature name. count = tids[0][1] - tids[0][0] assert ( count == feat_info.size()[0] ), f"{feat_name}, {count} vs {feat_info.size()[0]}." """ Reading edge features now. The structure of the edge_data is as follows, which is present in the input metadata json file. "edge_data" : { "etype0-name" : { "feat0-name" : { "format" : {"name": "numpy"}, "data" : [ #list "/feat-0.npy", "/feat-1.npy", .... "/feat-.npy" ] }, "feat1-name" : { "format" : {"name": "numpy"}, "data" : [ #list "/feat-0.npy", "/feat-1.npy", .... "/feat-.npy" ] } } } As shown above, the value for the key "edge_data" is a dictionary object, which is used to describe the feature data for each of the edge-type names. Keys in this top-level dictionary are edge-type names and value is a dictionary which captures all the features for the current edge-type. Feature data is captured with keys being the feature-names and value is a dictionary object which has 2 keys namely `format` and `data`. Format entry is used to mention the format of the storage used by the node features themselves and "data" is used to mention all the files present for this given node feature. Data read from each of the node features file is a multi-dimensional tensor data and is read in numpy format, which is also the storage format of node features on the permanent storage. """ edge_features = {} edge_feature_tids = {} # Iterate over the "edge_data" dictionary in the schema_map. # Read the edge features if exists. # Also keep track of the type_eids for which the edge_features are read. dataset_features = schema_map[constants.STR_EDGE_DATA] if dataset_features and (len(dataset_features) > 0): for etype_name, etype_feature_data in dataset_features.items(): for feat_name, feat_data in etype_feature_data.items(): assert feat_data[constants.STR_FORMAT][constants.STR_NAME] in [ constants.STR_NUMPY, constants.STR_PARQUET, ] edge_data = [] num_files = len(feat_data[constants.STR_DATA]) if num_files == 0: continue reader_fmt_meta = { "name": feat_data[constants.STR_FORMAT][constants.STR_NAME] } read_list = generate_read_list(num_files, world_size) for idx in read_list[rank]: data_file = feat_data[constants.STR_DATA][idx] if not os.path.isabs(data_file): data_file = os.path.join(input_dir, data_file) logging.debug( f"[Rank: {rank}] Loading edges-feats of {etype_name}[{feat_name}] from {data_file}" ) edge_data.append( array_readwriter.get_array_parser( **reader_fmt_meta ).read(data_file) ) if len(edge_data) > 0: edge_data = np.concatenate(edge_data) else: edge_data = np.array([]) edge_data = torch.from_numpy(edge_data) # exchange the amount of data read from the disk. edge_tids = _broadcast_shape( edge_data, rank, world_size, num_parts, True, f"{etype_name}/{feat_name}", ) # collect data on current rank. for local_part_id in range(num_parts): data_key = ( f"{etype_name}/{feat_name}/{local_part_id//world_size}" ) if map_partid_rank(local_part_id, world_size) == rank: if len(edge_tids) > local_part_id: start, end = edge_tids[local_part_id] assert edge_data.shape[0] == ( end - start ), f"Edge Feature data, for {data_key}, of shape = {edge_data.shape} does not match with tids = ({start}, {end})" edge_features[data_key] = edge_data edge_feature_tids[data_key] = [(start, end)] else: edge_features[data_key] = None edge_feature_tids[data_key] = [(0, 0)] # Done with building node_features locally. if len(edge_features) <= 0: logging.debug( f"[Rank: {rank}] This dataset does not have any edge features" ) else: assert len(edge_features) == len(edge_feature_tids) for k, v in edge_features.items(): if v == None: continue logging.debug( f"[Rank: {rank}] edge feature name: {k}, feature data shape: {v.shape}" ) tids = edge_feature_tids[k] count = tids[0][1] - tids[0][0] assert count == v.size()[0] """ Code below is used to read edges from the input dataset with the help of the metadata json file for the input graph dataset. In the metadata json file, we expect the following key-value pairs to help read the edges of the input graph. "edge_type" : [ # a total of n edge types canonical_etype_0, canonical_etype_1, ..., canonical_etype_n-1 ] The value for the key is a list of strings, each string is associated with an edgetype in the input graph. Note that these strings are in canonical edgetypes format. This means, these edge type strings follow the following naming convention: src_ntype:etype:dst_ntype. src_ntype and dst_ntype are node type names of the src and dst end points of this edge type, and etype is the relation name between src and dst ntypes. The files in which edges are present and their storage format are present in the following key-value pair: "edges" : { "canonical_etype_0" : { "format" : { "name" : "csv", "delimiter" : " " }, "data" : [ filename_0, filename_1, filename_2, .... filename_ ] }, } As shown above the "edges" dictionary value has canonical edgetypes as keys and for each canonical edgetype we have "format" and "data" which describe the storage format of the edge files and actual filenames respectively. Please note that each edgetype data is split in to `p` files, where p is the no. of partitions to be made of the input graph. Each edge file contains two columns representing the source per-type node_ids and destination per-type node_ids of any given edge. Since these are node-ids as well they are read in as int64's. """ # read my edges for each edge type etype_names = schema_map[constants.STR_EDGE_TYPE] etype_name_idmap = {e: idx for idx, e in enumerate(etype_names)} edge_tids = {} edge_typecounts = {} edge_datadict = {} edge_data = schema_map[constants.STR_EDGES] # read the edges files and store this data in memory. for col in [ constants.GLOBAL_SRC_ID, constants.GLOBAL_DST_ID, constants.GLOBAL_TYPE_EID, constants.ETYPE_ID, ]: edge_datadict[col] = [] for etype_name, etype_id in etype_name_idmap.items(): etype_info = edge_data[etype_name] edge_info = etype_info[constants.STR_DATA] # edgetype strings are in canonical format, src_node_type:edge_type:dst_node_type tokens = etype_name.split(":") assert len(tokens) == 3 src_ntype_name = tokens[0] dst_ntype_name = tokens[2] num_chunks = len(edge_info) read_list = generate_read_list(num_chunks, world_size) src_ids = [] dst_ids = [] """ curr_partids = [] for part_id in range(num_parts): if map_partid_rank(part_id, world_size) == rank: curr_partids.append(read_list[part_id]) for idx in np.concatenate(curr_partids): """ for idx in read_list[rank]: edge_file = edge_info[idx] if not os.path.isabs(edge_file): edge_file = os.path.join(input_dir, edge_file) logging.debug( f"[Rank: {rank}] Loading edges of etype[{etype_name}] from {edge_file}" ) if ( etype_info[constants.STR_FORMAT][constants.STR_NAME] == constants.STR_CSV ): read_options = pyarrow.csv.ReadOptions( use_threads=True, block_size=4096, autogenerate_column_names=True, ) parse_options = pyarrow.csv.ParseOptions(delimiter=" ") if os.path.getsize(edge_file) == 0: # if getsize() == 0, the file is empty, indicating that the partition doesn't have this attribute. # The src_ids and dst_ids should remain empty. continue with pyarrow.csv.open_csv( edge_file, read_options=read_options, parse_options=parse_options, ) as reader: for next_chunk in reader: if next_chunk is None: break next_table = pyarrow.Table.from_batches([next_chunk]) src_ids.append(next_table["f0"].to_numpy()) dst_ids.append(next_table["f1"].to_numpy()) elif ( etype_info[constants.STR_FORMAT][constants.STR_NAME] == constants.STR_PARQUET ): data_df = pq.read_table(edge_file) data_df = data_df.rename_columns(["f0", "f1"]) src_ids.append(data_df["f0"].to_numpy()) dst_ids.append(data_df["f1"].to_numpy()) else: raise ValueError( f"Unknown edge format {etype_info[constants.STR_FORMAT][constants.STR_NAME]} for edge type {etype_name}" ) if len(src_ids) > 0: src_ids = np.concatenate(src_ids) dst_ids = np.concatenate(dst_ids) # currently these are just type_edge_ids... which will be converted to global ids edge_datadict[constants.GLOBAL_SRC_ID].append( src_ids + ntype_gnid_offset[src_ntype_name][0] ) edge_datadict[constants.GLOBAL_DST_ID].append( dst_ids + ntype_gnid_offset[dst_ntype_name][0] ) edge_datadict[constants.ETYPE_ID].append( etype_name_idmap[etype_name] * np.ones(shape=(src_ids.shape), dtype=np.int64) ) else: src_ids = np.array([]) # broadcast shape to compute the etype_id, and global_eid's later. cur_tids = _broadcast_shape( src_ids, rank, world_size, num_parts, False, None ) edge_typecounts[etype_name] = cur_tids[-1][1] edge_tids[etype_name] = cur_tids for local_part_id in range(num_parts): if map_partid_rank(local_part_id, world_size) == rank: if len(cur_tids) > local_part_id: edge_datadict[constants.GLOBAL_TYPE_EID].append( np.arange( cur_tids[local_part_id][0], cur_tids[local_part_id][1], dtype=np.int64, ) ) # edge_tids[etype_name] = [(cur_tids[local_part_id][0], cur_tids[local_part_id][1])] assert len(edge_datadict[constants.GLOBAL_SRC_ID]) == len( edge_datadict[constants.GLOBAL_TYPE_EID] ), f"Error while reading edges from the disk, local_part_id = {local_part_id}, num_parts = {num_parts}, world_size = {world_size} cur_tids = {cur_tids}" # stitch together to create the final data on the local machine for col in [ constants.GLOBAL_SRC_ID, constants.GLOBAL_DST_ID, constants.GLOBAL_TYPE_EID, constants.ETYPE_ID, ]: if len(edge_datadict[col]) > 0: edge_datadict[col] = np.concatenate(edge_datadict[col]) if len(edge_datadict[constants.GLOBAL_SRC_ID]) > 0: assert ( edge_datadict[constants.GLOBAL_SRC_ID].shape == edge_datadict[constants.GLOBAL_DST_ID].shape ) assert ( edge_datadict[constants.GLOBAL_DST_ID].shape == edge_datadict[constants.GLOBAL_TYPE_EID].shape ) assert ( edge_datadict[constants.GLOBAL_TYPE_EID].shape == edge_datadict[constants.ETYPE_ID].shape ) logging.debug( f"[Rank: {rank}] Done reading edge_file: {len(edge_datadict)}, {edge_datadict[constants.GLOBAL_SRC_ID].shape}" ) else: assert edge_datadict[constants.GLOBAL_SRC_ID] == [] assert edge_datadict[constants.GLOBAL_DST_ID] == [] assert edge_datadict[constants.GLOBAL_TYPE_EID] == [] edge_datadict[constants.GLOBAL_SRC_ID] = np.array([], dtype=np.int64) edge_datadict[constants.GLOBAL_DST_ID] = np.array([], dtype=np.int64) edge_datadict[constants.GLOBAL_TYPE_EID] = np.array([], dtype=np.int64) edge_datadict[constants.ETYPE_ID] = np.array([], dtype=np.int64) logging.debug(f"Rank: {rank} edge_feat_tids: {edge_feature_tids}") return ( node_features, node_feature_tids, edge_datadict, edge_typecounts, edge_tids, edge_features, edge_feature_tids, ) ================================================ FILE: tools/distpartitioning/dist_lookup.py ================================================ import copy import logging import os import numpy as np import pyarrow import torch from gloo_wrapper import allgather_sizes, alltoallv_cpu from pyarrow import csv from utils import map_partid_rank class DistLookupService: """ This is an implementation of a Distributed Lookup Service to provide the following services to its users. Map 1) global node-ids to partition-ids, and 2) global node-ids to shuffle global node-ids (contiguous, within each node for a give node_type and across all the partitions) This services initializes itself with the node-id to partition-id mappings, which are inputs to this service. The node-id to partition-id mappings are assumed to be in one file for each node type. These node-id-to-partition-id mappings are split within the service processes so that each process ends up with a contiguous chunk. It first divides the no of mappings (node-id to partition-id) for each node type into equal chunks across all the service processes. So each service process will be thse owner of a set of node-id-to-partition-id mappings. This class has two functions which are as follows: 1) `get_partition_ids` function which returns the node-id to partition-id mappings to the user 2) `get_shuffle_nids` function which returns the node-id to shuffle-node-id mapping to the user Parameters: ----------- input_dir : string string representing the input directory where the node-type partition-id files are located ntype_names : list of strings list of strings which are used to read files located within the input_dir directory and these files contents are partition-id's for the node-ids which are of a particular node type id_map : dgl.distributed.id_map instance this id_map is used to retrieve ntype-ids, node type ids, and type_nids, per type node ids, for any given global node id rank : integer integer indicating the rank of a given process world_size : integer integer indicating the total no. of processes num_parts : integer interger representing the no. of partitions """ def __init__(self, input_dir, ntype_names, rank, world_size, num_parts): assert os.path.isdir(input_dir) assert ntype_names is not None assert len(ntype_names) > 0 # These lists are indexed by ntype_ids. type_nid_begin = [] type_nid_end = [] partid_list = [] ntype_count = [] ntypes = [] # Iterate over the node types and extract the partition id mappings. for ntype in ntype_names: filename = f"{ntype}.txt" logging.debug( f"[Rank: {rank}] Reading file: {os.path.join(input_dir, filename)}" ) read_options = pyarrow.csv.ReadOptions( use_threads=True, block_size=4096, autogenerate_column_names=True, ) parse_options = pyarrow.csv.ParseOptions(delimiter=" ") ntype_partids = [] with pyarrow.csv.open_csv( os.path.join(input_dir, "{}.txt".format(ntype)), read_options=read_options, parse_options=parse_options, ) as reader: for next_chunk in reader: if next_chunk is None: break next_table = pyarrow.Table.from_batches([next_chunk]) ntype_partids.append(next_table["f0"].to_numpy()) ntype_partids = np.concatenate(ntype_partids) count = len(ntype_partids) ntype_count.append(count) ntypes.append(ntype) # Each rank assumes a contiguous set of partition-ids which are equally split # across all the processes. split_size = np.ceil(count / np.int64(world_size)).astype(np.int64) start, end = ( np.int64(rank) * split_size, np.int64(rank + 1) * split_size, ) if rank == (world_size - 1): end = count type_nid_begin.append(start) type_nid_end.append(end) # Slice the partition-ids which belong to the current instance. partid_list.append(copy.deepcopy(ntype_partids[start:end])) # Explicitly release the array read from the file. del ntype_partids logging.debug( f"[Rank: {rank}] ntypeid begin - {type_nid_begin} - {type_nid_end}" ) # Store all the information in the object instance variable. self.type_nid_begin = np.array(type_nid_begin, dtype=np.int64) self.type_nid_end = np.array(type_nid_end, dtype=np.int64) self.partid_list = partid_list self.ntype_count = np.array(ntype_count, dtype=np.int64) self.ntypes = ntypes self.rank = rank self.world_size = world_size self.num_parts = num_parts def set_idMap(self, id_map): self.id_map = id_map def get_partition_ids(self, agg_global_nids): """ This function is used to get the partition-ids for a given set of global node ids global_nids <-> partition-ids mappings are deterministically distributed across all the participating processes, within the service. A contiguous global-nids (ntype-ids, per-type-nids) are stored within each process and this is determined by the total no. of nodes of a given ntype-id and the rank of the process. Process, where the global_nid <-> partition-id mapping is stored can be easily computed as described above. Once this is determined we perform an alltoallv to send the request. On the receiving side, each process receives a set of global_nids and retrieves corresponding partition-ids using locally stored lookup tables. It builds responses to all the other processes and performs alltoallv. Once the response, partition-ids, is received, they are re-ordered corresponding to the incoming global-nids order and returns to the caller. Parameters: ----------- self : instance of this class instance of this class, which is passed by the runtime implicitly agg_global_nids : numpy array an array of aggregated global node-ids for which partition-ids are to be retrieved by the distributed lookup service. Returns: -------- list of integers : list of integers, which are the partition-ids of the global-node-ids (which is the function argument) """ CHUNK_SIZE = 200 * 1000 * 1000 # Determine the no. of times each process has to send alltoall messages. local_rows = agg_global_nids.shape[0] all_sizes = allgather_sizes( [local_rows], self.world_size, self.num_parts, return_sizes=True ) max_count = np.amax(all_sizes) if max_count <= 0: logging.debug( f"[Rank: {self.rank}] No process has global_nids to process !!!" ) return num_splits = np.ceil(max_count / CHUNK_SIZE).astype(np.uint16) LOCAL_CHUNK_SIZE = np.ceil(local_rows / num_splits).astype(np.int64) agg_partition_ids = [] logging.debug( f"[Rank: {self.rank}] BatchSize: {CHUNK_SIZE}, \ max_count: {max_count}, \ splits: {num_splits}, \ rows: {agg_global_nids.shape}, \ local batch_size: {LOCAL_CHUNK_SIZE}" ) for split in range(num_splits): # Compute the global_nids for this iteration global_nids = agg_global_nids[ split * LOCAL_CHUNK_SIZE : (split + 1) * LOCAL_CHUNK_SIZE ] # Find the process where global_nid --> partition-id(owner) is stored. if len(global_nids) > 0: ntype_ids, type_nids = self.id_map(global_nids) ntype_ids, type_nids = ntype_ids.numpy(), type_nids.numpy() else: ntype_ids = np.array([], dtype=np.int64) type_nids = np.array([], dtype=np.int64) assert len(ntype_ids) == len(global_nids) # For each node-type, the per-type-node-id <-> partition-id mappings are # stored as contiguous chunks by this lookup service. # The no. of these mappings stored by each process, in the lookup service, are # equally split among all the processes in the lookup service, deterministically. typeid_counts = self.ntype_count[ntype_ids] chunk_sizes = np.ceil(typeid_counts / self.world_size).astype( np.int64 ) service_owners = np.floor_divide(type_nids, chunk_sizes).astype( np.int64 ) # Now `service_owners` is a list of ranks (process-ids) which own the corresponding # global-nid <-> partition-id mapping. # Split the input global_nids into a list of lists where each list will be # sent to the respective rank/process # We also need to store the indices, in the indices_list, so that we can re-order # the final result (partition-ids) in the same order as the global-nids (function argument) send_list = [] indices_list = [] for idx in range(self.world_size): idxes = np.where(service_owners == idx) ll = global_nids[idxes[0]] send_list.append(torch.from_numpy(ll)) indices_list.append(idxes[0]) assert len(np.concatenate(indices_list)) == len(global_nids) assert np.all( np.sort(np.concatenate(indices_list)) == np.arange(len(global_nids)) ) # Send the request to everyone else. # As a result of this operation, the current process also receives a list of lists # from all the other processes. # These lists are global-node-ids whose global-node-ids <-> partition-id mappings # are owned/stored by the current process owner_req_list = alltoallv_cpu( self.rank, self.world_size, send_list ) # Create the response list here for each of the request list received in the previous # step. Populate the respective partition-ids in this response lists appropriately out_list = [] for idx in range(self.world_size): if owner_req_list[idx] is None: out_list.append(torch.empty((0,), dtype=torch.int64)) continue # Get the node_type_ids and per_type_nids for the incoming global_nids. ntype_ids, type_nids = self.id_map(owner_req_list[idx].numpy()) ntype_ids, type_nids = ntype_ids.numpy(), type_nids.numpy() # Lists to store partition-ids for the incoming global-nids. type_id_lookups = [] local_order_idx = [] # Now iterate over all the node_types and acculumulate all the partition-ids # since all the partition-ids are based on the node_type order... they # must be re-ordered as per the order of the input, which may be different. for tid in range(len(self.partid_list)): cond = ntype_ids == tid local_order_idx.append(np.where(cond)[0]) global_type_nids = type_nids[cond] if len(global_type_nids) <= 0: continue local_type_nids = ( global_type_nids - self.type_nid_begin[tid] ) assert np.all(local_type_nids >= 0) assert np.all( local_type_nids <= ( self.type_nid_end[tid] + 1 - self.type_nid_begin[tid] ) ) cur_owners = self.partid_list[tid][local_type_nids] type_id_lookups.append(cur_owners) # Reorder the partition-ids, so that it agrees with the input order -- # which is the order in which the incoming message is received. if len(type_id_lookups) <= 0: out_list.append(torch.empty((0,), dtype=torch.int64)) else: # Now reorder results for each request. sort_order_idx = np.argsort(np.concatenate(local_order_idx)) lookups = np.concatenate(type_id_lookups)[sort_order_idx] out_list.append(torch.from_numpy(lookups)) # Send the partition-ids to their respective requesting processes. owner_resp_list = alltoallv_cpu( self.rank, self.world_size, out_list ) # Owner_resp_list, is a list of lists of numpy arrays where each list # is a list of partition-ids which the current process requested # Now we need to re-order so that the parition-ids correspond to the # global_nids which are passed into this function. # Order according to the requesting order. # Owner_resp_list is the list of owner-ids for global_nids (function argument). owner_ids = [x for x in owner_resp_list if x is not None] if len(owner_ids) > 0: owner_ids = torch.cat(owner_ids).numpy() else: owner_ids = np.array([], dtype=np.int64) assert len(owner_ids) == len(global_nids) global_nids_order = np.concatenate(indices_list) sort_order_idx = np.argsort(global_nids_order) owner_ids = owner_ids[sort_order_idx] global_nids_order = global_nids_order[sort_order_idx] assert np.all(np.arange(len(global_nids)) == global_nids_order) if len(owner_ids) > 0: # Store the partition-ids for the current split agg_partition_ids.append(owner_ids) # Stitch the list of partition-ids and return to the caller if len(agg_partition_ids) > 0: agg_partition_ids = np.concatenate(agg_partition_ids) else: agg_partition_ids = np.array([], dtype=np.int64) assert agg_global_nids.shape[0] == agg_partition_ids.shape[0] # Now the owner_ids (partition-ids) which corresponding to the global_nids. return agg_partition_ids def get_shuffle_nids( self, global_nids, my_global_nids, my_shuffle_global_nids, world_size ): """ This function is used to retrieve shuffle_global_nids for a given set of incoming global_nids. Note that global_nids are of random order and will contain duplicates This function first retrieves the partition-ids of the incoming global_nids. These partition-ids which are also the ranks of processes which own the respective global-nids as well as shuffle-global-nids. alltoallv is performed to send the global-nids to respective ranks/partition-ids where the mapping global-nids <-> shuffle-global-nid is located. On the receiving side, once the global-nids are received associated shuffle-global-nids are retrieved and an alltoallv is performed to send the responses to all the other processes. Once the responses, shuffle-global-nids, are received, they are re-ordered according to the incoming global-nids order and returns to the caller. Parameters: ----------- self : instance of this class instance of this class, which is passed by the runtime implicitly global_nids : numpy array an array of global node-ids for which partition-ids are to be retrieved by the distributed lookup service. my_global_nids: numpy ndarray array of global_nids which are owned by the current partition/rank/process This process has the node <-> partition id mapping my_shuffle_global_nids : numpy ndarray array of shuffle_global_nids which are assigned by the current process/rank world_size : int total no. of processes in the MPI_WORLD Returns: -------- list of integers: list of shuffle_global_nids which correspond to the incoming node-ids in the global_nids. """ # Get the owner_ids (partition-ids or rank). owner_ids = self.get_partition_ids(global_nids) # These owner_ids, which are also partition ids of the nodes in the # input graph, are in the range 0 - (num_partitions - 1). # These ids are generated using some kind of graph partitioning method. # Distribuged lookup service, as used by the graph partitioning # pipeline, is used to store ntype-ids (also type_nids) and their # mapping to the associated partition-id. # These ids are split into `num_process` chunks and processes in the # dist. lookup service are assigned the owernship of these chunks. # The pipeline also enforeces the following constraint among the # pipeline input parameters: num_partitions, num_processes # num_partitions is an integer multiple of num_processes # which means each individual node in the cluster will be running # equal number of processes. owner_ids = map_partid_rank(owner_ids, world_size) # Ask these owners to supply for the shuffle_global_nids. send_list = [] id_list = [] for idx in range(self.world_size): cond = owner_ids == idx idxes = np.where(cond) ll = global_nids[idxes[0]] send_list.append(torch.from_numpy(ll)) id_list.append(idxes[0]) assert len(np.concatenate(id_list)) == len(global_nids) cur_global_nids = alltoallv_cpu(self.rank, self.world_size, send_list) # At this point, current process received a list of lists each containing # a list of global-nids whose corresponding shuffle_global_nids are located # in the current process. shuffle_nids_list = [] for idx in range(self.world_size): if cur_global_nids[idx] is None: shuffle_nids_list.append(torch.empty((0,), dtype=torch.int64)) continue uniq_ids, inverse_idx = np.unique( cur_global_nids[idx], return_inverse=True ) common, idx1, idx2 = np.intersect1d( uniq_ids, my_global_nids, assume_unique=True, return_indices=True, ) assert len(common) == len(uniq_ids) req_shuffle_global_nids = my_shuffle_global_nids[idx2][inverse_idx] assert len(req_shuffle_global_nids) == len(cur_global_nids[idx]) shuffle_nids_list.append(torch.from_numpy(req_shuffle_global_nids)) # Send the shuffle-global-nids to their respective ranks. mapped_global_nids = alltoallv_cpu( self.rank, self.world_size, shuffle_nids_list ) for idx in range(len(mapped_global_nids)): if mapped_global_nids[idx] == None: mapped_global_nids[idx] = torch.empty((0,), dtype=torch.int64) # Reorder to match global_nids (function parameter). global_nids_order = np.concatenate(id_list) shuffle_global_nids = torch.cat(mapped_global_nids).numpy() assert len(shuffle_global_nids) == len(global_nids) sorted_idx = np.argsort(global_nids_order) shuffle_global_nids = shuffle_global_nids[sorted_idx] global_nids_ordered = global_nids_order[sorted_idx] assert np.all(global_nids_ordered == np.arange(len(global_nids))) return shuffle_global_nids ================================================ FILE: tools/distpartitioning/globalids.py ================================================ import itertools import operator import constants import numpy as np import torch from dist_lookup import DistLookupService from gloo_wrapper import allgather_sizes, alltoallv_cpu from utils import memory_snapshot def get_shuffle_global_nids(rank, world_size, global_nids_ranks, node_data): """ For nodes which are not owned by the current rank, whose global_nid <-> shuffle_global-nid mapping is not present at the current rank, this function retrieves their shuffle_global_ids from the owner rank Parameters: ----------- rank : integer rank of the process world_size : integer total no. of ranks configured global_nids_ranks : list list of numpy arrays (of global_nids), index of the list is the rank of the process where global_nid <-> shuffle_global_nid mapping is located. node_data : dictionary node_data is a dictionary with keys as column names and values as numpy arrays Returns: -------- numpy ndarray where the column-0 are global_nids and column-1 are shuffle_global_nids which are retrieved from other processes. """ # build a list of sizes (lengths of lists) global_nids_ranks = [torch.from_numpy(x) for x in global_nids_ranks] recv_nodes = alltoallv_cpu(rank, world_size, global_nids_ranks) # Use node_data to lookup global id to send over. send_nodes = [] for proc_i_nodes in recv_nodes: # list of node-ids to lookup if proc_i_nodes is not None: global_nids = proc_i_nodes.numpy() if len(global_nids) != 0: common, ind1, ind2 = np.intersect1d( node_data[constants.GLOBAL_NID], global_nids, return_indices=True, ) shuffle_global_nids = node_data[constants.SHUFFLE_GLOBAL_NID][ ind1 ] send_nodes.append( torch.from_numpy(shuffle_global_nids).type( dtype=torch.int64 ) ) else: send_nodes.append(torch.empty((0), dtype=torch.int64)) else: send_nodes.append(torch.empty((0), dtype=torch.int64)) # send receive global-ids recv_shuffle_global_nids = alltoallv_cpu(rank, world_size, send_nodes) shuffle_global_nids = np.concatenate( [x.numpy() if x is not None else [] for x in recv_shuffle_global_nids] ) global_nids = np.concatenate([x for x in global_nids_ranks]) ret_val = np.column_stack([global_nids, shuffle_global_nids]) return ret_val def lookup_shuffle_global_nids_edges( rank, world_size, num_parts, edge_data, id_lookup, node_data ): """ This function is a helper function used to lookup shuffle-global-nids for a given set of global-nids using a distributed lookup service. Parameters: ----------- rank : integer rank of the process world_size : integer total number of processes used in the process group num_parts : integer total number of output graph partitions edge_data : dictionary edge_data is a dicitonary with keys as column names and values as numpy arrays representing all the edges present in the current graph partition id_lookup : instance of DistLookupService class instance of a distributed lookup service class which is used to retrieve partition-ids and shuffle-global-nids for any given set of global-nids node_data : dictionary node_data is a dictionary with keys as column names and values as numpy arrays representing all the nodes owned by the current process Returns: -------- dictionary : dictionary where keys are column names and values are numpy arrays representing all the edges present in the current graph partition """ # Make sure that the outgoing message size does not exceed 2GB in size. # Even though gloo can handle upto 10GB size of data in the outgoing messages, # it needs additional memory to store temporary information into the buffers which will increase # the memory needs of the process. MILLION = 1000 * 1000 BATCH_SIZE = 250 * MILLION memory_snapshot("GlobalToShuffleIDMapBegin: ", rank) local_nids = [] local_shuffle_nids = [] for local_part_id in range(num_parts // world_size): local_nids.append( node_data[constants.GLOBAL_NID + "/" + str(local_part_id)] ) local_shuffle_nids.append( node_data[constants.SHUFFLE_GLOBAL_NID + "/" + str(local_part_id)] ) local_nids = np.concatenate(local_nids) local_shuffle_nids = np.concatenate(local_shuffle_nids) for local_part_id in range(num_parts // world_size): node_list = edge_data[ constants.GLOBAL_SRC_ID + "/" + str(local_part_id) ] # Determine the no. of times each process has to send alltoall messages. all_sizes = allgather_sizes( [node_list.shape[0]], world_size, num_parts, return_sizes=True ) max_count = np.amax(all_sizes) num_splits = max_count // BATCH_SIZE + 1 # Split the message into batches and send. splits = np.array_split(node_list, num_splits) shuffle_mappings = [] for item in splits: shuffle_ids = id_lookup.get_shuffle_nids( item, local_nids, local_shuffle_nids, world_size ) shuffle_mappings.append(shuffle_ids) shuffle_ids = np.concatenate(shuffle_mappings) assert shuffle_ids.shape[0] == node_list.shape[0] edge_data[ constants.SHUFFLE_GLOBAL_SRC_ID + "/" + str(local_part_id) ] = shuffle_ids # Destination end points of edges are owned by the current node and therefore # should have corresponding SHUFFLE_GLOBAL_NODE_IDs. # Here retrieve SHUFFLE_GLOBAL_NODE_IDs for the destination end points of local edges. uniq_ids, inverse_idx = np.unique( edge_data[constants.GLOBAL_DST_ID + "/" + str(local_part_id)], return_inverse=True, ) common, idx1, idx2 = np.intersect1d( uniq_ids, node_data[constants.GLOBAL_NID + "/" + str(local_part_id)], assume_unique=True, return_indices=True, ) assert len(common) == len(uniq_ids) edge_data[ constants.SHUFFLE_GLOBAL_DST_ID + "/" + str(local_part_id) ] = node_data[constants.SHUFFLE_GLOBAL_NID + "/" + str(local_part_id)][ idx2 ][ inverse_idx ] assert len( edge_data[ constants.SHUFFLE_GLOBAL_DST_ID + "/" + str(local_part_id) ] ) == len(edge_data[constants.GLOBAL_DST_ID + "/" + str(local_part_id)]) memory_snapshot("GlobalToShuffleIDMap_AfterLookupServiceCalls: ", rank) return edge_data def assign_shuffle_global_nids_nodes(rank, world_size, num_parts, node_data): """ Utility function to assign shuffle global ids to nodes at a given rank node_data gets converted from [ntype, global_type_nid, global_nid] to [shuffle_global_nid, ntype, global_type_nid, global_nid, part_local_type_nid] where shuffle_global_nid : global id of the node after data shuffle ntype : node-type as read from xxx_nodes.txt global_type_nid : node-type-id as read from xxx_nodes.txt global_nid : node-id as read from xxx_nodes.txt, implicitly this is the line no. in the file part_local_type_nid : type_nid assigned by the current rank within its scope Parameters: ----------- rank : integer rank of the process world_size : integer total number of processes used in the process group num_parts : integer total number of output graph partitions node_data : dictionary node_data is a dictionary with keys as column names and values as numpy arrays """ # Compute prefix sum to determine node-id offsets local_row_counts = [] for local_part_id in range(num_parts // world_size): local_row_counts.append( node_data[constants.GLOBAL_NID + "/" + str(local_part_id)].shape[0] ) # Perform allgather to compute the local offsets. prefix_sum_nodes = allgather_sizes(local_row_counts, world_size, num_parts) for local_part_id in range(num_parts // world_size): shuffle_global_nid_start = prefix_sum_nodes[ rank + (local_part_id * world_size) ] shuffle_global_nid_end = prefix_sum_nodes[ rank + 1 + (local_part_id * world_size) ] shuffle_global_nids = np.arange( shuffle_global_nid_start, shuffle_global_nid_end, dtype=np.int64 ) node_data[ constants.SHUFFLE_GLOBAL_NID + "/" + str(local_part_id) ] = shuffle_global_nids def assign_shuffle_global_nids_edges(rank, world_size, num_parts, edge_data): """ Utility function to assign shuffle_global_eids to edges edge_data gets converted from [global_src_nid, global_dst_nid, global_type_eid, etype] to [shuffle_global_src_nid, shuffle_global_dst_nid, global_src_nid, global_dst_nid, global_type_eid, etype] Parameters: ----------- rank : integer rank of the current process world_size : integer total count of processes in execution num_parts : integer total number of output graph partitions edge_data : numpy ndarray edge data as read from xxx_edges.txt file Returns: -------- integer shuffle_global_eid_start, which indicates the starting value from which shuffle_global-ids are assigned to edges on this rank """ # get prefix sum of edge counts per rank to locate the starting point # from which global-ids to edges are assigned in the current rank local_row_counts = [] for local_part_id in range(num_parts // world_size): local_row_counts.append( edge_data[constants.GLOBAL_SRC_ID + "/" + str(local_part_id)].shape[ 0 ] ) shuffle_global_eid_offset = [] prefix_sum_edges = allgather_sizes(local_row_counts, world_size, num_parts) for local_part_id in range(num_parts // world_size): shuffle_global_eid_start = prefix_sum_edges[ rank + (local_part_id * world_size) ] shuffle_global_eid_end = prefix_sum_edges[ rank + 1 + (local_part_id * world_size) ] shuffle_global_eids = np.arange( shuffle_global_eid_start, shuffle_global_eid_end, dtype=np.int64 ) edge_data[ constants.SHUFFLE_GLOBAL_EID + "/" + str(local_part_id) ] = shuffle_global_eids shuffle_global_eid_offset.append(shuffle_global_eid_start) return shuffle_global_eid_offset ================================================ FILE: tools/distpartitioning/gloo_wrapper.py ================================================ import numpy as np import torch import torch.distributed as dist def allgather_sizes(send_data, world_size, num_parts, return_sizes=False): """ Perform all gather on list lengths, used to compute prefix sums to determine the offsets on each ranks. This is used to allocate global ids for edges/nodes on each ranks. Parameters ---------- send_data : numpy array Data on which allgather is performed. world_size : integer No. of processes configured for execution num_parts : integer No. of output graph partitions return_sizes : bool Boolean flag to indicate whether to return raw sizes from each process or perform prefix sum on the raw sizes. Returns : --------- numpy array array with the prefix sum """ # Assert on the world_size, num_parts assert (num_parts % world_size) == 0 # compute the length of the local data send_length = len(send_data) out_tensor = torch.as_tensor(send_data, dtype=torch.int64) in_tensor = [ torch.zeros(send_length, dtype=torch.int64) for _ in range(world_size) ] # all_gather message dist.all_gather(in_tensor, out_tensor) # Return on the raw sizes from each process if return_sizes: return torch.cat(in_tensor).numpy() # gather sizes in on array to return to the invoking function rank_sizes = np.zeros(num_parts + 1, dtype=np.int64) part_counts = torch.cat(in_tensor).numpy() count = rank_sizes[0] idx = 1 for local_part_id in range(num_parts // world_size): for r in range(world_size): count += part_counts[r * (num_parts // world_size) + local_part_id] rank_sizes[idx] = count idx += 1 return rank_sizes def __alltoall_cpu(rank, world_size, output_tensor_list, input_tensor_list): """ Each process scatters list of input tensors to all processes in a cluster and return gathered list of tensors in output list. The tensors should have the same shape. Parameters ---------- rank : int The rank of current worker world_size : int The size of the entire output_tensor_list : List of tensor The received tensors input_tensor_list : List of tensor The tensors to exchange """ input_tensor_list = [ tensor.to(torch.device("cpu")) for tensor in input_tensor_list ] # TODO(#5002): As Boolean data is not supported in # ``torch.distributed.scatter()``, we convert boolean into uint8 before # scatter and convert it back afterwards. dtypes = [t.dtype for t in input_tensor_list] for i, dtype in enumerate(dtypes): if dtype == torch.bool: input_tensor_list[i] = input_tensor_list[i].to(torch.int8) output_tensor_list[i] = output_tensor_list[i].to(torch.int8) for i in range(world_size): dist.scatter( output_tensor_list[i], input_tensor_list if i == rank else [], src=i ) # Convert back to original dtype for i, dtype in enumerate(dtypes): if dtype == torch.bool: input_tensor_list[i] = input_tensor_list[i].to(dtype) output_tensor_list[i] = output_tensor_list[i].to(dtype) def alltoallv_cpu(rank, world_size, input_tensor_list, retain_nones=True): """ Wrapper function to providing the alltoallv functionality by using underlying alltoall messaging primitive. This function, in its current implementation, supports exchanging messages of arbitrary dimensions and is not tied to the user of this function. This function pads all input tensors, except one, so that all the messages are of the same size. Once the messages are padded, It first sends a vector whose first two elements are 1) actual message size along first dimension, and 2) Message size along first dimension which is used for communication. The rest of the dimensions are assumed to be same across all the input tensors. After receiving the message sizes, the receiving end will create buffers of appropriate sizes. And then slices the received messages to remove the added padding, if any, and returns to the caller. Parameters: ----------- rank : int The rank of current worker world_size : int The size of the entire input_tensor_list : List of tensor The tensors to exchange retain_nones : bool Indicates whether to retain ``None`` data in returned value. Returns: -------- list : list of tensors received from other processes during alltoall message """ # ensure len of input_tensor_list is same as the world_size. assert input_tensor_list != None assert len(input_tensor_list) == world_size # ensure that all the tensors in the input_tensor_list are of same size. sizes = [list(x.size()) for x in input_tensor_list] for idx in range(1, len(sizes)): assert len(sizes[idx - 1]) == len( sizes[idx] ) # no. of dimensions should be same assert ( input_tensor_list[idx - 1].dtype == input_tensor_list[idx].dtype ) # dtype should be same assert ( sizes[idx - 1][1:] == sizes[idx][1:] ) # except first dimension remaining dimensions should all be the same # decide how much to pad. # always use the first-dimension for padding. ll = [x[0] for x in sizes] # dims of the padding needed, if any # these dims are used for padding purposes. diff_dims = [[np.amax(ll) - l[0]] + l[1:] for l in sizes] # pad the actual message input_tensor_list = [ torch.cat((x, torch.zeros(diff_dims[idx]).type(x.dtype))) for idx, x in enumerate(input_tensor_list) ] # send useful message sizes to all send_counts = [] recv_counts = [] for idx in range(world_size): # send a vector, of atleast 3 elements, [a, b, ....] where # a = useful message dim, b = actual message outgoing message size along the first dimension # and remaining elements are the remaining dimensions of the tensor send_counts.append( torch.from_numpy( np.array([sizes[idx][0]] + [np.amax(ll)] + sizes[idx][1:]) ).type(torch.int64) ) recv_counts.append( torch.zeros((1 + len(sizes[idx])), dtype=torch.int64) ) __alltoall_cpu(rank, world_size, recv_counts, send_counts) # allocate buffers for receiving message output_tensor_list = [] recv_counts = [tsize.numpy() for tsize in recv_counts] for idx, tsize in enumerate(recv_counts): output_tensor_list.append( torch.zeros(tuple(tsize[1:])).type(input_tensor_list[idx].dtype) ) # send actual message itself. __alltoall_cpu(rank, world_size, output_tensor_list, input_tensor_list) # extract un-padded message from the output_tensor_list and return it return_vals = [] for s, t in zip(recv_counts, output_tensor_list): if s[0] == 0: if retain_nones: return_vals.append(None) else: return_vals.append(t[0 : s[0]]) return return_vals def gather_metadata_json(metadata, rank, world_size): """ Gather an object (json schema on `rank`) Parameters: ----------- metadata : json dictionary object json schema formed on each rank with graph level data. This will be used as input to the distributed training in the later steps. Returns: -------- list : list of json dictionary objects The result of the gather operation, which is the list of json dicitonary objects from each rank in the world """ # Populate input obj and output obj list on rank-0 and non-rank-0 machines input_obj = None if rank == 0 else metadata output_objs = [None for _ in range(world_size)] if rank == 0 else None # invoke the gloo method to perform gather on rank-0 dist.gather_object(input_obj, output_objs, dst=0) return output_objs ================================================ FILE: tools/distpartitioning/parmetis_postprocess.py ================================================ import argparse import logging import os import platform import sys from pathlib import Path import constants import numpy as np import pyarrow import pyarrow.csv as csv from partition_algo.base import dump_partition_meta, PartitionMeta from utils import get_idranges, get_node_types, read_json def post_process(params): """Auxiliary function to read the parmetis output file and generate metis partition-id files, sorted, per node-type. These files are used by the dist. graph partitioning pipeline for further processing. Parameters: ----------- params : argparser object argparser object to capture command line options passed to the executable """ logging.info("Starting to process parmetis output.") logging.info(params.postproc_input_dir) logging.info(params.schema_file) logging.info(params.parmetis_output_file) assert os.path.isfile( os.path.join(params.postproc_input_dir, params.schema_file) ) assert os.path.isfile(params.parmetis_output_file) schema = read_json( os.path.join(params.postproc_input_dir, params.schema_file) ) metis_df = csv.read_csv( params.parmetis_output_file, read_options=pyarrow.csv.ReadOptions(autogenerate_column_names=True), parse_options=pyarrow.csv.ParseOptions(delimiter=" "), ) global_nids = metis_df["f0"].to_numpy() partition_ids = metis_df["f1"].to_numpy() num_parts = np.unique(partition_ids).size sort_idx = np.argsort(global_nids) global_nids = global_nids[sort_idx] partition_ids = partition_ids[sort_idx] ntypes_ntypeid_map, ntypes, ntid_ntype_map = get_node_types(schema) type_nid_dict, ntype_gnid_offset = get_idranges( schema[constants.STR_NODE_TYPE], dict( zip( schema[constants.STR_NODE_TYPE], schema[constants.STR_NUM_NODES_PER_TYPE], ) ), ) outdir = Path(params.partitions_dir) os.makedirs(outdir, exist_ok=True) for ntype_id, ntype_name in ntid_ntype_map.items(): start = ntype_gnid_offset[ntype_name][0, 0] end = ntype_gnid_offset[ntype_name][0, 1] out_data = partition_ids[start:end] out_file = os.path.join(outdir, f"{ntype_name}.txt") options = csv.WriteOptions(include_header=False, delimiter=" ") csv.write_csv( pyarrow.Table.from_arrays([out_data], names=["partition-ids"]), out_file, options, ) logging.info(f"Generated {out_file}") # generate partition meta file. part_meta = PartitionMeta( version="1.0.0", num_parts=num_parts, algo_name="metis" ) dump_partition_meta(part_meta, os.path.join(outdir, "partition_meta.json")) logging.info("Done processing parmetis output") if __name__ == "__main__": """Main function to convert the output of parmetis into metis partitions which are accepted by graph partitioning pipeline. ParMETIS currently generates one output file, which is in the following format: Graph partitioing pipeline, per the new dataset file format rules expects the metis partitions to be in the following format: No. of files will be equal to the no. of node-types in the graph Each file will have one-number/line which is . Example usage: -------------- python parmetis_postprocess.py --input_file --output-dir --schema """ parser = argparse.ArgumentParser( description="PostProcessing the ParMETIS\ output for partitioning pipeline" ) parser.add_argument( "--postproc_input_dir", required=True, type=str, help="Base directory for post processing step.", ) parser.add_argument( "--schema_file", required=True, type=str, help="The schema of the input graph", ) parser.add_argument( "--parmetis_output_file", required=True, type=str, help="ParMETIS output file", ) parser.add_argument( "--partitions_dir", required=True, type=str, help="The output\ will be files (with metis partition ids) and each file corresponds to\ a node-type in the input graph dataset.", ) params = parser.parse_args() # Configure logging. logging.basicConfig( level="INFO", format=f"[{platform.node()} \ %(levelname)s %(asctime)s PID:%(process)d] %(message)s", ) # Invoke the function for post processing post_process(params) ================================================ FILE: tools/distpartitioning/parmetis_preprocess.py ================================================ import argparse import logging import os import platform from pathlib import Path import array_readwriter import constants import numpy as np import pyarrow import pyarrow.csv as csv from utils import ( generate_read_list, generate_roundrobin_read_list, get_idranges, get_node_types, read_json, ) def get_proc_info(): """Helper function to get the rank from the environment when `mpirun` is used to run this python program. Please note that for mpi(openmpi) installation the rank is retrieved from the environment using OMPI_COMM_WORLD_RANK. For mpich it is retrieved from the environment using PMI_RANK. Returns: -------- integer : Rank of the current process. """ env_variables = dict(os.environ) # mpich if "PMI_RANK" in env_variables: return int(env_variables["PMI_RANK"]) # openmpi elif "OMPI_COMM_WORLD_RANK" in env_variables: return int(env_variables["OMPI_COMM_WORLD_RANK"]) else: return 0 def get_world_size(): """Helper function to get the world size from the environment when `mpirun` is used to run this python program. Returns: -------- integer : Numer of processes created by the executor that created this process. """ env_variables = dict(os.environ) # mpich if "PMI_SIZE" in env_variables: return int(env_variables["PMI_SIZE"]) # openmpi elif "OMPI_COMM_WORLD_SIZE" in env_variables: return int(env_variables["OMPI_COMM_WORLD_SIZE"]) else: return 1 def gen_edge_files(rank, schema_map, params): """Function to create edges files to be consumed by ParMETIS for partitioning purposes. This function creates the edge files and each of these will have the following format (meaning each line of these file is of the following format) Here ``global`` prefix means that globally unique identifier assigned each node in the input graph. In this context globally unique means unique across all the nodes in the input graph. Parameters: ----------- rank : int rank of the current process schema_map : json dictionary Dictionary created by reading the metadata.json file for the input dataset. output : string Location of storing the node-weights and edge files for ParMETIS. """ _, ntype_gnid_offset = get_idranges( schema_map[constants.STR_NODE_TYPE], dict( zip( schema_map[constants.STR_NODE_TYPE], schema_map[constants.STR_NUM_NODES_PER_TYPE], ) ), ) # Regenerate edge files here. edge_data = schema_map[constants.STR_EDGES] outdir = Path(params.output_dir) os.makedirs(outdir, exist_ok=True) def process_and_write_back(data_df, idx): data_f0 = data_df[:, 0] data_f1 = data_df[:, 1] global_src_id = data_f0 + ntype_gnid_offset[src_ntype_name][0, 0] global_dst_id = data_f1 + ntype_gnid_offset[dst_ntype_name][0, 0] cols = [global_src_id, global_dst_id] col_names = ["global_src_id", "global_dst_id"] out_file_name = Path(edge_data_files[idx]).stem.split(".")[0] out_file = os.path.join( outdir, etype_name, f"edges_{out_file_name}.csv" ) os.makedirs(os.path.dirname(out_file), exist_ok=True) options = csv.WriteOptions(include_header=False, delimiter=" ") csv.write_csv( pyarrow.Table.from_arrays(cols, names=col_names), out_file, options, ) return out_file edge_files = [] for etype_name, etype_info in edge_data.items(): edge_data_files = etype_info[constants.STR_DATA] # ``edgetype`` strings are in canonical format, src_node_type:edge_type:dst_node_type tokens = etype_name.split(":") assert len(tokens) == 3 src_ntype_name = tokens[0] dst_ntype_name = tokens[2] rank_assignments = generate_roundrobin_read_list( len(edge_data_files), params.num_parts ) for file_idx in rank_assignments[rank]: reader_fmt_meta = { "name": etype_info[constants.STR_FORMAT][constants.STR_NAME], } if reader_fmt_meta["name"] == constants.STR_CSV: reader_fmt_meta["delimiter"] = etype_info[constants.STR_FORMAT][ constants.STR_FORMAT_DELIMITER ] data_df = array_readwriter.get_array_parser(**reader_fmt_meta).read( os.path.join(params.input_dir, edge_data_files[file_idx]) ) out_file = process_and_write_back(data_df, file_idx) edge_files.append(out_file) return edge_files def gen_node_weights_files(schema_map, params): """Function to create node weight files for ParMETIS along with the edge files. This function generates node-data files, which will be read by the ParMETIS executable for partitioning purposes. Each line in these files will be of the following format: node_type_id - is id assigned to the node-type to which a given particular node belongs to weight_list - this is a one-hot vector in which the number in the location of the current nodes' node-type will be set to `1` and other will be `0` type_node_id - this is the id assigned to the node (in the context of the current nodes` node-type). Meaning this id is unique across all the nodes which belong to the current nodes` node-type. Parameters: ----------- schema_map : json dictionary Dictionary created by reading the metadata.json file for the input dataset. output : string Location of storing the node-weights and edge files for ParMETIS. Returns: -------- list : List of filenames for nodes of the input graph. list : List o ffilenames for edges of the input graph. """ rank = get_proc_info() ntypes_ntypeid_map, ntypes, ntid_ntype_map = get_node_types(schema_map) type_nid_dict, ntype_gnid_offset = get_idranges( schema_map[constants.STR_NODE_TYPE], dict( zip( schema_map[constants.STR_NODE_TYPE], schema_map[constants.STR_NUM_NODES_PER_TYPE], ) ), ) node_files = [] outdir = Path(params.output_dir) os.makedirs(outdir, exist_ok=True) for ntype_id, ntype_name in ntid_ntype_map.items(): # This ntype does not have any train/test/val masks... # Each rank will generate equal no. of rows for this node type. total_count = schema_map[constants.STR_NUM_NODES_PER_TYPE][ntype_id] per_rank_range = np.ones((params.num_parts,), dtype=np.int64) * ( total_count // params.num_parts ) for i in range(total_count % params.num_parts): per_rank_range[i] += 1 tid_start = np.cumsum([0] + list(per_rank_range[:-1])) tid_end = np.cumsum(list(per_rank_range)) local_tid_start = tid_start[rank] local_tid_end = tid_end[rank] sz = local_tid_end - local_tid_start cols = [] col_names = [] # ntype-id cols.append( pyarrow.array(np.ones(sz, dtype=np.int64) * np.int64(ntype_id)) ) col_names.append("ntype") # one-hot vector for ntype-id here. for i in range(len(ntypes)): if i == ntype_id: cols.append(pyarrow.array(np.ones(sz, dtype=np.int64))) else: cols.append(pyarrow.array(np.zeros(sz, dtype=np.int64))) col_names.append("w{}".format(i)) # `type_nid` should be the very last column in the node weights files. cols.append( pyarrow.array( np.arange(local_tid_start, local_tid_end, dtype=np.int64) ) ) col_names.append("type_nid") out_file = os.path.join( outdir, "node_weights_{}_{}.txt".format(ntype_name, rank) ) options = csv.WriteOptions(include_header=False, delimiter=" ") options.delimiter = " " csv.write_csv( pyarrow.Table.from_arrays(cols, names=col_names), out_file, options ) node_files.append( ( ntype_gnid_offset[ntype_name][0, 0] + local_tid_start, ntype_gnid_offset[ntype_name][0, 0] + local_tid_end, out_file, ) ) return node_files def gen_parmetis_input_args(params, schema_map): """Function to create two input arguments which will be passed to the parmetis. first argument is a text file which has a list of node-weights files, namely parmetis-nfiles.txt, and second argument is a text file which has a list of edge files, namely parmetis_efiles.txt. ParMETIS uses these two files to read/load the graph and partition the graph With regards to the file format, parmetis_nfiles.txt uses the following format for each line in that file: (exclusive) While parmetis_efiles.txt just has in each line. Parameters: ----------- params : argparser instance Instance of ArgParser class, which has all the input arguments passed to run this program. schema_map : json dictionary Dictionary object created after reading the graph metadata.json file. """ # TODO: This makes the assumption that all node files have the same number of chunks ntypes_ntypeid_map, ntypes, ntid_ntype_map = get_node_types(schema_map) type_nid_dict, ntype_gnid_offset = get_idranges( schema_map[constants.STR_NODE_TYPE], dict( zip( schema_map[constants.STR_NODE_TYPE], schema_map[constants.STR_NUM_NODES_PER_TYPE], ) ), ) # Check if _stats.txt exists, if not create one using metadata. # Here stats file will be created in the current directory. # No. of constraints, third column in the stats file is computed as follows: # num_constraints = no. of node types + train_mask + test_mask + val_mask # Here, (train/test/val) masks will be set to 1 if these masks exist for # all the node types in the graph, otherwise these flags will be set to 0 assert ( constants.STR_GRAPH_NAME in schema_map ), "Graph name is not present in the json file" graph_name = schema_map[constants.STR_GRAPH_NAME] if not os.path.isfile( os.path.join(params.input_dir, f"{graph_name}_stats.txt") ): num_nodes = np.sum(schema_map[constants.STR_NUM_NODES_PER_TYPE]) num_edges = np.sum(schema_map[constants.STR_NUM_EDGES_PER_TYPE]) num_ntypes = len(schema_map[constants.STR_NODE_TYPE]) num_constraints = num_ntypes with open( os.path.join(params.input_dir, f"{graph_name}_stats.txt"), "w" ) as sf: sf.write(f"{num_nodes} {num_edges} {num_constraints}") node_files = [] outdir = Path(params.output_dir) os.makedirs(outdir, exist_ok=True) for ntype_id, ntype_name in ntid_ntype_map.items(): global_nid_offset = ntype_gnid_offset[ntype_name][0, 0] total_count = schema_map[constants.STR_NUM_NODES_PER_TYPE][ntype_id] per_rank_range = np.ones((params.num_parts,), dtype=np.int64) * ( total_count // params.num_parts ) for i in range(total_count % params.num_parts): per_rank_range[i] += 1 tid_start = np.cumsum([0] + list(per_rank_range[:-1])) tid_end = np.cumsum(per_rank_range) logging.info(f" tid-start = {tid_start}, tid-end = {tid_end}") logging.info(f" per_rank_range - {per_rank_range}") for part_idx in range(params.num_parts): local_tid_start = tid_start[part_idx] local_tid_end = tid_end[part_idx] out_file = os.path.join( outdir, "node_weights_{}_{}.txt".format(ntype_name, part_idx) ) node_files.append( ( out_file, global_nid_offset + local_tid_start, global_nid_offset + local_tid_end, ) ) with open( os.path.join(params.output_dir, "parmetis_nfiles.txt"), "w" ) as parmetis_nf: for node_file in node_files: # format: filename global_node_id_start global_node_id_end(exclusive) parmetis_nf.write( "{} {} {}\n".format(node_file[0], node_file[1], node_file[2]) ) # Regenerate edge files here. # NOTE: The file names need to match the ones generated by gen_edge_files function edge_data = schema_map[constants.STR_EDGES] edge_files = [] for etype_name, etype_info in edge_data.items(): edge_data_files = etype_info[constants.STR_DATA] for edge_file_path in edge_data_files: out_file_name = Path(edge_file_path).stem.split(".")[0] out_file = os.path.join( outdir, etype_name, "edges_{}.csv".format(out_file_name) ) edge_files.append(out_file) with open( os.path.join(params.output_dir, "parmetis_efiles.txt"), "w" ) as parmetis_efile: for edge_file in edge_files: parmetis_efile.write("{}\n".format(edge_file)) def run_preprocess_data(params): """Main function which will help create graph files for ParMETIS processing Parameters: ----------- params : argparser object An instance of argparser class which stores command line arguments. """ logging.info("Starting to generate ParMETIS files...") rank = get_proc_info() assert os.path.isdir( params.input_dir ), f"Please check `input_dir` argument: {params.input_dit}." schema_map = read_json(os.path.join(params.input_dir, params.schema_file)) gen_node_weights_files(schema_map, params) logging.info("Done with node weights....") gen_edge_files(rank, schema_map, params) logging.info("Done with edge weights...") if rank == 0: gen_parmetis_input_args(params, schema_map) logging.info("Done generating files for ParMETIS run ..") if __name__ == "__main__": """Main function used to generate temporary files needed for ParMETIS execution. This function generates node-weight files and edges files which are consumed by ParMETIS. Example usage: -------------- mpirun -np 4 python3 parmetis_preprocess.py --schema --output """ parser = argparse.ArgumentParser( description="Generate ParMETIS files for input dataset" ) parser.add_argument( "--schema_file", required=True, type=str, help="The schema of the input graph", ) parser.add_argument( "--input_dir", required=True, type=str, help="This directory will be used as the relative directory to locate files, if absolute paths are not used", ) parser.add_argument( "--output_dir", required=True, type=str, help="The output directory for the node weights files and auxiliary files for ParMETIS.", ) parser.add_argument( "--num_parts", required=True, type=int, help="Total no. of output graph partitions.", ) parser.add_argument( "--log_level", required=False, type=str, help="Log level to use for execution.", default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], ) params = parser.parse_args() # Configure logging. logging.basicConfig( level=getattr(logging, params.log_level, None), format=f"[{platform.node()} \ %(levelname)s %(asctime)s PID:%(process)d] %(message)s", ) # Invoke the function to generate files for parmetis run_preprocess_data(params) ================================================ FILE: tools/distpartitioning/parmetis_wrapper.py ================================================ import argparse import logging import os import platform import sys from pathlib import Path import constants from utils import read_json def check_dependencies(): """Check if all the dependencies needed for the execution of this file are installed. """ exec_path = os.get_exec_path() mpi_install = False for x in exec_path: if os.path.isfile(os.path.join(x, "mpirun")): mpi_install = True break assert ( mpi_install ), "Could not locate the following dependency: MPI. Please install it and try again." dgl_path = os.environ.get("DGL_HOME", "") assert os.path.isdir( dgl_path ), "Environment variable DGL_HOME not found. Please define the DGL installation path" def run_parmetis_wrapper(params): """Function to execute all the steps needed to run ParMETIS Parameters: ----------- params : argparser object an instance of argparser class to capture command-line arguments """ schema = read_json( os.path.join(params.preproc_input_dir, params.schema_file) ) graph_name = schema[constants.STR_GRAPH_NAME] num_partitions = params.num_parts # Check if parmetis_preprocess.py exists. assert os.path.isfile( os.path.join( os.path.dirname(os.path.abspath(__file__)), "parmetis_preprocess.py" ) ), "Please check DGL Installation, parmetis_preprocess.py file does not exist." # Trigger pre-processing step to generate input files for ParMETIS. preproc_cmd = ( f"mpirun -np {num_partitions} -hostfile {params.hostfile} " f"python3 $DGL_HOME/tools/distpartitioning/parmetis_preprocess.py " f"--schema_file {params.schema_file} " f"--input_dir {params.preproc_input_dir} " f"--output_dir {params.preproc_output_dir} " f"--num_parts {num_partitions}" ) logging.info(f"Executing Preprocessing Step: {preproc_cmd}") os.system(preproc_cmd) logging.info(f"Done Preprocessing Step") # Trigger ParMETIS for creating metis partitions for the input graph. parmetis_install_path = "pm_dglpart3" if params.parmetis_install_path is not None: parmetis_install_path = os.path.join( params.parmetis_install_path, parmetis_install_path ) parmetis_nfiles = os.path.join( params.preproc_output_dir, "parmetis_nfiles.txt" ) parmetis_efiles = os.path.join( params.preproc_output_dir, "parmetis_efiles.txt" ) parmetis_cmd = ( f"mpirun -np {num_partitions} -hostfile {params.hostfile} " f"{parmetis_install_path} {graph_name} {num_partitions} " f"{parmetis_nfiles} {parmetis_efiles}" ) logging.info(f"Executing ParMETIS: {parmetis_cmd}") os.system(parmetis_cmd) logging.info(f"Done ParMETIS execution step") # Trigger post-processing step to convert parmetis output to the form # acceptable by dist. graph partitioning pipeline. parmetis_output_file = os.path.join( os.getcwd(), f"{graph_name}_part.{num_partitions}" ) postproc_cmd = ( f"python3 $DGL_HOME/tools/distpartitioning/parmetis_postprocess.py " f"--postproc_input_dir {params.preproc_input_dir} " f"--schema_file {params.schema_file} " f"--parmetis_output_file {parmetis_output_file} " f"--partitions_dir {params.partitions_dir}" ) logging.info(f"Executing PostProcessing: {postproc_cmd}") os.system(postproc_cmd) logging.info("Done Executing ParMETIS...") if __name__ == "__main__": """Main function to invoke the parmetis wrapper function""" parser = argparse.ArgumentParser( description="Run ParMETIS as part of the graph partitioning pipeline" ) # Preprocessing step. parser.add_argument( "--schema_file", required=True, type=str, help="The schema of the input graph", ) parser.add_argument( "--preproc_input_dir", type=str, help="The input directory for preprocess where the dataset is located", ) parser.add_argument( "--preproc_output_dir", required=True, type=str, help="The output directory for the node weights files and auxiliary\ files for ParMETIS.", ) parser.add_argument( "--hostfile", required=True, type=str, help="A text file with a list of ip addresses.", ) parser.add_argument( "--num_parts", required=True, type=int, help="integer representing no. of partitions.", ) # ParMETIS step. parser.add_argument( "--parmetis_install_path", required=False, type=str, help="The directory where ParMETIS is installed", ) # Postprocessing step. parser.add_argument( "--parmetis_output_file", required=True, type=str, help="ParMETIS output file (global_node_id to partition_id mappings)", ) parser.add_argument( "--partitions_dir", required=True, type=str, help="The directory where the files (with metis partition ids) grouped \ by node_types", ) params = parser.parse_args() # Configure logging. logging.basicConfig( level="INFO", format=f"[{platform.node()} \ %(levelname)s %(asctime)s PID:%(process)d] %(message)s", ) check_dependencies() run_parmetis_wrapper(params) ================================================ FILE: tools/distpartitioning/utils.py ================================================ import json import logging import os from itertools import cycle import constants import dgl import numpy as np import psutil import pyarrow import torch from dgl.distributed.partition import _dump_part_config from pyarrow import csv DATA_TYPE_ID = { data_type: id for id, data_type in enumerate( [ torch.float32, torch.float64, torch.float16, torch.uint8, torch.int8, torch.int16, torch.int32, torch.int64, torch.bool, ] ) } REV_DATA_TYPE_ID = {id: data_type for data_type, id in DATA_TYPE_ID.items()} def read_ntype_partition_files(schema_map, input_dir): """ Utility method to read the partition id mapping for each node. For each node type, there will be an file, in the input directory argument containing the partition id mapping for a given nodeid. Parameters: ----------- schema_map : dictionary dictionary created by reading the input metadata json file input_dir : string directory in which the node-id to partition-id mappings files are located for each of the node types in the input graph Returns: -------- numpy array : array of integers representing mapped partition-ids for a given node-id. The line number, in these files, are used as the type_node_id in each of the files. The index into this array will be the homogenized node-id and value will be the partition-id for that node-id (index). Please note that the partition-ids of each node-type are stacked together vertically and in this way heterogenous node-ids are converted to homogenous node-ids. """ assert os.path.isdir(input_dir) # iterate over the node types and extract the partition id mappings part_ids = [] ntype_names = schema_map[constants.STR_NODE_TYPE] for ntype in ntype_names: df = csv.read_csv( os.path.join(input_dir, "{}.txt".format(ntype)), read_options=pyarrow.csv.ReadOptions( autogenerate_column_names=True ), parse_options=pyarrow.csv.ParseOptions(delimiter=" "), ) ntype_partids = df["f0"].to_numpy() part_ids.append(ntype_partids) return np.concatenate(part_ids) def read_json(json_file): """ Utility method to read a json file schema Parameters: ----------- json_file : string file name for the json schema Returns: -------- dictionary, as serialized in the json_file """ with open(json_file) as schema: val = json.load(schema) return val def get_etype_featnames(etype_name, schema_map): """Retrieves edge feature names for a given edge_type Parameters: ----------- eype_name : string a string specifying a edge_type name schema : dictionary metadata json object as a dictionary, which is read from the input metadata file from the input dataset Returns: -------- list : a list of feature names for a given edge_type """ edge_data = schema_map[constants.STR_EDGE_DATA] feats = edge_data.get(etype_name, {}) return [feat for feat in feats] def get_ntype_featnames(ntype_name, schema_map): """ Retrieves node feature names for a given node_type Parameters: ----------- ntype_name : string a string specifying a node_type name schema : dictionary metadata json object as a dictionary, which is read from the input metadata file from the input dataset Returns: -------- list : a list of feature names for a given node_type """ node_data = schema_map[constants.STR_NODE_DATA] feats = node_data.get(ntype_name, {}) return [feat for feat in feats] def get_edge_types(schema_map): """Utility method to extract edge_typename -> edge_type mappings as defined by the input schema Parameters: ----------- schema_map : dictionary Input schema from which the edge_typename -> edge_typeid dictionary is created. Returns: -------- dictionary with keys as edge type names and values as ids (integers) list list of etype name strings dictionary with keys as etype ids (integers) and values as edge type names """ etypes = schema_map[constants.STR_EDGE_TYPE] etype_etypeid_map = {e: i for i, e in enumerate(etypes)} etypeid_etype_map = {i: e for i, e in enumerate(etypes)} return etype_etypeid_map, etypes, etypeid_etype_map def get_node_types(schema_map): """ Utility method to extract node_typename -> node_type mappings as defined by the input schema Parameters: ----------- schema_map : dictionary Input schema from which the node_typename -> node_type dictionary is created. Returns: -------- dictionary with keys as node type names and values as ids (integers) list list of ntype name strings dictionary with keys as ntype ids (integers) and values as node type names """ ntypes = schema_map[constants.STR_NODE_TYPE] ntype_ntypeid_map = {e: i for i, e in enumerate(ntypes)} ntypeid_ntype_map = {i: e for i, e in enumerate(ntypes)} return ntype_ntypeid_map, ntypes, ntypeid_ntype_map def get_gid_offsets(typenames, typecounts): """ Builds a map where the key-value pairs are typnames and respective global-id offsets. Parameters: ----------- typenames : list of strings a list of strings which can be either node typenames or edge typenames typecounts : list of integers a list of integers indicating the total number of nodes/edges for its typeid which is the index in this list Returns: -------- dictionary : a dictionary where keys are node_type names and values are global_nid range, which is a tuple. """ assert len(typenames) == len( typecounts ), f"No. of typenames does not match with its type counts names = {typenames}, counts = {typecounts}" counts = [] for name in typenames: counts.append(typecounts[name]) starts = np.cumsum([0] + counts[:-1]) ends = np.cumsum(counts) gid_offsets = {} for idx, name in enumerate(typenames): gid_offsets[name] = [starts[idx], ends[idx]] return gid_offsets """ starts = np.cumsum([0] + type_counts[:-1]) ends = np.cumsum(type_counts) gid_offsets = {} for idx, name in enumerate(typenames): gid_offsets[name] = [start[idx], ends[idx]] return gid_offsets """ def get_gnid_range_map(node_tids): """ Retrieves auxiliary dictionaries from the metadata json object Parameters: ----------- node_tids: dictionary This dictionary contains the information about nodes for each node_type. Typically this information contains p-entries, where each entry has a file-name, starting and ending type_node_ids for the nodes in this file. Keys in this dictionary are the node_type and value is a list of lists. Each individual entry in this list has three items: file-name, starting type_nid and ending type_nid Returns: -------- dictionary : a dictionary where keys are node_type names and values are global_nid range, which is a tuple. """ ntypes_gid_range = {} offset = 0 for k, v in node_tids.items(): ntypes_gid_range[k] = [offset + int(v[0][0]), offset + int(v[-1][1])] offset += int(v[-1][1]) return ntypes_gid_range def write_metadata_json( input_list, output_dir, graph_name, world_size, num_parts ): """ Merge json schema's from each of the rank's on rank-0. This utility function, to be used on rank-0, to create aggregated json file. Parameters: ----------- metadata_list : list of json (dictionaries) a list of json dictionaries to merge on rank-0 output_dir : string output directory path in which results are stored (as a json file) graph-name : string a string specifying the graph name """ # Preprocess the input_list, a list of dictionaries # each dictionary will contain num_parts/world_size metadata json # which correspond to local partitions on the respective ranks. metadata_list = [] for local_part_id in range(num_parts // world_size): for idx in range(world_size): metadata_list.append( input_list[idx][ "local-part-id-" + str(local_part_id * world_size + idx) ] ) # Initialize global metadata graph_metadata = {} # Merge global_edge_ids from each json object in the input list edge_map = {} x = metadata_list[0]["edge_map"] for k in x: edge_map[k] = [] for idx in range(len(metadata_list)): edge_map[k].append( [ int(metadata_list[idx]["edge_map"][k][0][0]), int(metadata_list[idx]["edge_map"][k][0][1]), ] ) graph_metadata["edge_map"] = edge_map graph_metadata["etypes"] = metadata_list[0]["etypes"] graph_metadata["graph_name"] = metadata_list[0]["graph_name"] graph_metadata["halo_hops"] = metadata_list[0]["halo_hops"] # Merge global_nodeids from each of json object in the input list node_map = {} x = metadata_list[0]["node_map"] for k in x: node_map[k] = [] for idx in range(len(metadata_list)): node_map[k].append( [ int(metadata_list[idx]["node_map"][k][0][0]), int(metadata_list[idx]["node_map"][k][0][1]), ] ) graph_metadata["node_map"] = node_map graph_metadata["ntypes"] = metadata_list[0]["ntypes"] graph_metadata["num_edges"] = int( sum([metadata_list[i]["num_edges"] for i in range(len(metadata_list))]) ) graph_metadata["num_nodes"] = int( sum([metadata_list[i]["num_nodes"] for i in range(len(metadata_list))]) ) graph_metadata["num_parts"] = metadata_list[0]["num_parts"] graph_metadata["part_method"] = metadata_list[0]["part_method"] for i in range(len(metadata_list)): graph_metadata["part-{}".format(i)] = metadata_list[i][ "part-{}".format(i) ] _dump_part_config(f"{output_dir}/metadata.json", graph_metadata) def augment_edge_data( edge_data, lookup_service, edge_tids, rank, world_size, num_parts ): """ Add partition-id (rank which owns an edge) column to the edge_data. Parameters: ----------- edge_data : numpy ndarray Edge information as read from the xxx_edges.txt file lookup_service : instance of class DistLookupService Distributed lookup service used to map global-nids to respective partition-ids and▒ shuffle-global-nids edge_tids: dictionary dictionary where keys are canonical edge types and values are list of tuples which indicate the range of edges assigned to each of the partitions rank : integer rank of the current process world_size : integer total no. of process participating in the communication primitives num_parts : integer total no. of partitions requested for the input graph Returns: -------- dictionary : dictionary with keys as column names and values as numpy arrays and this information is loaded from input dataset files. In addition to this we include additional columns which aid this pipelines computation, like constants.OWNER_PROCESS """ # add global_nids to the node_data etype_offset = {} offset = 0 for etype_name, tid_range in edge_tids.items(): etype_offset[etype_name] = offset + int(tid_range[0][0]) offset += int(tid_range[-1][1]) global_eids = [] for etype_name, tid_range in edge_tids.items(): for idx in range(num_parts): if map_partid_rank(idx, world_size) == rank: if len(tid_range) > idx: global_eid_start = etype_offset[etype_name] begin = global_eid_start + int(tid_range[idx][0]) end = global_eid_start + int(tid_range[idx][1]) global_eids.append(np.arange(begin, end, dtype=np.int64)) global_eids = ( np.concatenate(global_eids) if len(global_eids) > 0 else np.array([], dtype=np.int64) ) assert global_eids.shape[0] == edge_data[constants.ETYPE_ID].shape[0] edge_data[constants.GLOBAL_EID] = global_eids return edge_data def read_edges_file(edge_file, edge_data_dict): """ Utility function to read xxx_edges.txt file Parameters: ----------- edge_file : string Graph file for edges in the input graph Returns: -------- dictionary edge data as read from xxx_edges.txt file and columns are stored in a dictionary with key-value pairs as column-names and column-data. """ if edge_file == "" or edge_file == None: return None # Read the file from here. # # global_src_id -- global idx for the source node ... line # in the graph_nodes.txt # global_dst_id -- global idx for the destination id node ... line # in the graph_nodes.txt edge_data_df = csv.read_csv( edge_file, read_options=pyarrow.csv.ReadOptions(autogenerate_column_names=True), parse_options=pyarrow.csv.ParseOptions(delimiter=" "), ) edge_data_dict = {} edge_data_dict[constants.GLOBAL_SRC_ID] = edge_data_df["f0"].to_numpy() edge_data_dict[constants.GLOBAL_DST_ID] = edge_data_df["f1"].to_numpy() edge_data_dict[constants.GLOBAL_TYPE_EID] = edge_data_df["f2"].to_numpy() edge_data_dict[constants.ETYPE_ID] = edge_data_df["f3"].to_numpy() return edge_data_dict def read_node_features_file(nodes_features_file): """ Utility function to load tensors from a file Parameters: ----------- nodes_features_file : string Features file for nodes in the graph Returns: -------- dictionary mappings between ntype and list of features """ node_features = dgl.data.utils.load_tensors(nodes_features_file, False) return node_features def read_edge_features_file(edge_features_file): """ Utility function to load tensors from a file Parameters: ----------- edge_features_file : string Features file for edges in the graph Returns: -------- dictionary mappings between etype and list of features """ edge_features = dgl.data.utils.load_tensors(edge_features_file, True) return edge_features def write_node_features(node_features, node_file): """ Utility function to serialize node_features in node_file file Parameters: ----------- node_features : dictionary dictionary storing ntype <-> list of features node_file : string File in which the node information is serialized """ dgl.data.utils.save_tensors(node_file, node_features) def write_edge_features(edge_features, edge_file): """ Utility function to serialize edge_features in edge_file file Parameters: ----------- edge_features : dictionary dictionary storing etype <-> list of features edge_file : string File in which the edge information is serialized """ dgl.data.utils.save_tensors(edge_file, edge_features) def write_graph_graghbolt(graph_file, graph_obj): """ Utility function to serialize FusedCSCSamplingGraph Parameters: ----------- graph_obj : FusedCSCSamplingGraph FusedCSCSamplingGraph, as created in convert_partition.py, which is to be serialized graph_file : string File name in which graph object is serialized """ torch.save(graph_obj, graph_file) def write_graph_dgl(graph_file, graph_obj, formats, sort_etypes): """ Utility function to serialize graph dgl objects Parameters: ----------- graph_obj : dgl graph object graph dgl object, as created in convert_partition.py, which is to be serialized graph_file : string File name in which graph object is serialized formats : str or list[str] Save graph in specified formats. sort_etypes : bool Whether to sort etypes in csc/csr. """ dgl.distributed.partition.process_partitions( graph_obj, formats, sort_etypes ) dgl.save_graphs(graph_file, [graph_obj], formats=formats) def _write_graph( part_dir, graph_obj, formats=None, sort_etypes=None, use_graphbolt=False ): if use_graphbolt: write_graph_graghbolt( os.path.join(part_dir, "fused_csc_sampling_graph.pt"), graph_obj ) else: write_graph_dgl( os.path.join(part_dir, "graph.dgl"), graph_obj, formats, sort_etypes ) def write_dgl_objects( graph_obj, node_features, edge_features, output_dir, part_id, orig_nids, orig_eids, formats, sort_etypes, use_graphbolt, ): """ Wrapper function to write graph, node/edge feature, original node/edge IDs. Parameters: ----------- graph_obj : dgl object graph dgl object as created in convert_partition.py file node_features : dgl object Tensor data for node features edge_features : dgl object Tensor data for edge features output_dir : string location where the output files will be located part_id : int integer indicating the partition-id orig_nids : dict original node IDs orig_eids : dict original edge IDs formats : str or list[str] Save graph in formats. sort_etypes : bool Whether to sort etypes in csc/csr. use_graphbolt : bool Whether to use graphbolt or not. """ part_dir = output_dir + "/part" + str(part_id) os.makedirs(part_dir, exist_ok=True) _write_graph( part_dir, graph_obj, formats=formats, sort_etypes=sort_etypes, use_graphbolt=use_graphbolt, ) if node_features != None: write_node_features( node_features, os.path.join(part_dir, "node_feat.dgl") ) if edge_features != None: write_edge_features( edge_features, os.path.join(part_dir, "edge_feat.dgl") ) if orig_nids is not None: orig_nids_file = os.path.join(part_dir, "orig_nids.dgl") dgl.data.utils.save_tensors(orig_nids_file, orig_nids) if orig_eids is not None: orig_eids_file = os.path.join(part_dir, "orig_eids.dgl") dgl.data.utils.save_tensors(orig_eids_file, orig_eids) def get_idranges(names, counts, num_chunks=None): """ counts will be a list of numbers of a dictionary. Length is less than or equal to the num_parts variable. Parameters: ----------- names : list of strings which are either node-types or edge-types counts : list of integers which are total no. of nodes or edges for a give node or edge type num_chunks : int, optional specifying the no. of chunks Returns: -------- dictionary dictionary where the keys are node-/edge-type names and values are list of tuples where each tuple indicates the range of values for corresponding type-ids. dictionary dictionary where the keys are node-/edge-type names and value is a tuple. This tuple indicates the global-ids for the associated node-/edge-type. """ gnid_start = 0 gnid_end = gnid_start tid_dict = {} gid_dict = {} for idx, typename in enumerate(names): gnid_end += counts[typename] tid_dict[typename] = [[0, counts[typename]]] gid_dict[typename] = np.array([gnid_start, gnid_end]).reshape([1, 2]) gnid_start = gnid_end return tid_dict, gid_dict def get_ntype_counts_map(ntypes, ntype_counts): """ Return a dictionary with key, value pairs as node type names and no. of nodes of a particular type in the input graph. Parameters: ----------- ntypes : list of strings where each string is a node-type name ntype_counts : list of integers where each integer is the total no. of nodes for that, idx, node type Returns: -------- dictinary : a dictionary where node-type names are keys and values are total no. of nodes for a given node-type name (which is also the key) """ return dict(zip(ntypes, ntype_counts)) def memory_snapshot(tag, rank): """ Utility function to take a snapshot of the usage of system resources at a given point of time. Parameters: ----------- tag : string string provided by the user for bookmarking purposes rank : integer process id of the participating process """ GB = 1024 * 1024 * 1024 MB = 1024 * 1024 KB = 1024 peak = dgl.partition.get_peak_mem() * KB mem = psutil.virtual_memory() avail = mem.available / MB used = mem.used / MB total = mem.total / MB mem_string = f"{total:.0f} (MB) total, {peak:.0f} (MB) peak, {used:.0f} (MB) used, {avail:.0f} (MB) avail" logging.debug(f"[Rank: {rank} MEMORY_SNAPSHOT] {mem_string} - {tag}") def map_partid_rank(partid, world_size): """Auxiliary function to map a given partition id to one of the rank in the MPI_WORLD processes. The range of partition ids is assumed to equal or a multiple of the total size of MPI_WORLD. In this implementation, we use a cyclical mapping procedure to convert partition ids to ranks. Parameters: ----------- partid : int partition id, as read from node id to partition id mappings. Returns: -------- int : rank of the process, which will be responsible for the given partition id. """ return partid % world_size def generate_read_list(num_files, world_size): """ Generate the file IDs to read for each rank using sequential assignment. Parameters: ----------- num_files : int Total number of files. world_size : int World size of group. Returns: -------- read_list : np.array Array of target file IDs to read. Each worker is expected to read the list of file indexes in its rank's index in the list. e.g. rank 0 reads the file indexed in read_list[0], rank 1 the ones in read_list[1] etc. Examples -------- >>> tools.distpartitionning.utils.generate_read_list(10, 4) [array([0, 1, 2]), array([3, 4, 5]), array([6, 7]), array([8, 9])] """ return np.array_split(np.arange(num_files), world_size) def generate_roundrobin_read_list(num_files, world_size): """ Generate the file IDs to read for each rank using round robin assignment. Parameters: ----------- num_files : int Total number of files. world_size : int World size of group. Returns: -------- read_list : np.array Array of target file IDs to read. Each worker is expected to read the list of file indexes in its rank's index in the list. e.g. rank 0 reads the indexed in read_list[0], rank 1 the ones in read_list[1] etc. Examples -------- >>> tools.distpartitionning.utils.generate_roundrobin_read_list(10, 4) [[0, 4, 8], [1, 5, 9], [2, 6], [3, 7]] """ assignment_lists = [[] for _ in range(world_size)] for rank, part_idx in zip(cycle(range(world_size)), range(num_files)): assignment_lists[rank].append(part_idx) return assignment_lists ================================================ FILE: tools/files.py ================================================ import logging import os from contextlib import contextmanager from numpy.lib.format import open_memmap @contextmanager def setdir(path): try: os.makedirs(path, exist_ok=True) cwd = os.getcwd() logging.info("Changing directory to %s" % path) logging.info("Previously: %s" % cwd) os.chdir(path) yield finally: logging.info("Restoring directory to %s" % cwd) os.chdir(cwd) ================================================ FILE: tools/launch.py ================================================ """Launching tool for DGL distributed training""" import argparse import json import logging import multiprocessing import os import queue import re import signal import subprocess import sys import time from functools import partial from threading import Thread from typing import Optional def cleanup_proc(get_all_remote_pids, conn): """This process tries to clean up the remote training tasks.""" print("cleanup process runs") # This process should not handle SIGINT. signal.signal(signal.SIGINT, signal.SIG_IGN) data = conn.recv() # If the launch process exits normally, this process doesn't need to do anything. if data == "exit": sys.exit(0) else: remote_pids = get_all_remote_pids() # Otherwise, we need to ssh to each machine and kill the training jobs. for (ip, port), pids in remote_pids.items(): kill_process(ip, port, pids) print("cleanup process exits") def kill_process(ip, port, pids): """ssh to a remote machine and kill the specified processes.""" curr_pid = os.getpid() killed_pids = [] # If we kill child processes first, the parent process may create more again. This happens # to Python's process pool. After sorting, we always kill parent processes first. pids.sort() for pid in pids: assert curr_pid != pid print("kill process {} on {}:{}".format(pid, ip, port), flush=True) kill_cmd = ( "ssh -o StrictHostKeyChecking=no -p " + str(port) + " " + ip + " 'kill {}'".format(pid) ) subprocess.run(kill_cmd, shell=True) killed_pids.append(pid) # It's possible that some of the processes are not killed. Let's try again. for i in range(3): killed_pids = get_killed_pids(ip, port, killed_pids) if len(killed_pids) == 0: break else: killed_pids.sort() for pid in killed_pids: print( "kill process {} on {}:{}".format(pid, ip, port), flush=True ) kill_cmd = ( "ssh -o StrictHostKeyChecking=no -p " + str(port) + " " + ip + " 'kill -9 {}'".format(pid) ) subprocess.run(kill_cmd, shell=True) def get_killed_pids(ip, port, killed_pids): """Get the process IDs that we want to kill but are still alive.""" killed_pids = [str(pid) for pid in killed_pids] killed_pids = ",".join(killed_pids) ps_cmd = ( "ssh -o StrictHostKeyChecking=no -p " + str(port) + " " + ip + " 'ps -p {} -h'".format(killed_pids) ) res = subprocess.run(ps_cmd, shell=True, stdout=subprocess.PIPE) pids = [] for p in res.stdout.decode("utf-8").split("\n"): l = p.split() if len(l) > 0: pids.append(int(l[0])) return pids def execute_remote( cmd: str, state_q: queue.Queue, ip: str, port: int, username: Optional[str] = "", ) -> Thread: """Execute command line on remote machine via ssh. Args: cmd: User-defined command (udf) to execute on the remote host. state_q: A queue collecting Thread exit states. ip: The ip-address of the host to run the command on. port: Port number that the host is listening on. thread_list: username: Optional. If given, this will specify a username to use when issuing commands over SSH. Useful when your infra requires you to explicitly specify a username to avoid permission issues. Returns: thread: The Thread whose run() is to run the `cmd` on the remote host. Returns when the cmd completes on the remote host. """ ip_prefix = "" if username: ip_prefix += "{username}@".format(username=username) # Construct ssh command that executes `cmd` on the remote host ssh_cmd = "ssh -o StrictHostKeyChecking=no -p {port} {ip_prefix}{ip} '{cmd}'".format( port=str(port), ip_prefix=ip_prefix, ip=ip, cmd=cmd, ) # thread func to run the job def run(ssh_cmd, state_q): try: subprocess.check_call(ssh_cmd, shell=True) state_q.put(0) except subprocess.CalledProcessError as err: print(f"Called process error {err}") state_q.put(err.returncode) except Exception: state_q.put(-1) thread = Thread( target=run, args=( ssh_cmd, state_q, ), ) thread.setDaemon(True) thread.start() # sleep for a while in case of ssh is rejected by peer due to busy connection time.sleep(0.2) return thread def get_remote_pids(ip, port, cmd_regex): """Get the process IDs that run the command in the remote machine.""" pids = [] curr_pid = os.getpid() # Here we want to get the python processes. We may get some ssh processes, so we should filter them out. ps_cmd = ( "ssh -o StrictHostKeyChecking=no -p " + str(port) + " " + ip + " 'ps -aux | grep python | grep -v StrictHostKeyChecking'" ) res = subprocess.run(ps_cmd, shell=True, stdout=subprocess.PIPE) for p in res.stdout.decode("utf-8").split("\n"): l = p.split() if len(l) < 2: continue # We only get the processes that run the specified command. res = re.search(cmd_regex, p) if res is not None and int(l[1]) != curr_pid: pids.append(l[1]) pid_str = ",".join([str(pid) for pid in pids]) ps_cmd = ( "ssh -o StrictHostKeyChecking=no -p " + str(port) + " " + ip + " 'pgrep -P {}'".format(pid_str) ) res = subprocess.run(ps_cmd, shell=True, stdout=subprocess.PIPE) pids1 = res.stdout.decode("utf-8").split("\n") all_pids = [] for pid in set(pids + pids1): if pid == "" or int(pid) == curr_pid: continue all_pids.append(int(pid)) all_pids.sort() return all_pids def get_all_remote_pids(hosts, ssh_port, udf_command): """Get all remote processes.""" remote_pids = {} for node_id, host in enumerate(hosts): ip, _ = host # When creating training processes in remote machines, we may insert some arguments # in the commands. We need to use regular expressions to match the modified command. cmds = udf_command.split() new_udf_command = " .*".join(cmds) pids = get_remote_pids(ip, ssh_port, new_udf_command) remote_pids[(ip, ssh_port)] = pids return remote_pids def construct_torch_dist_launcher_cmd( num_trainers: int, num_nodes: int, node_rank: int, master_addr: str, master_port: int, ) -> str: """Constructs the torch distributed launcher command. Helper function. Args: num_trainers: num_nodes: node_rank: master_addr: master_port: Returns: cmd_str. """ torch_cmd_template = ( "-m torch.distributed.run " "--nproc_per_node={nproc_per_node} " "--nnodes={nnodes} " "--node_rank={node_rank} " "--master_addr={master_addr} " "--master_port={master_port}" ) return torch_cmd_template.format( nproc_per_node=num_trainers, nnodes=num_nodes, node_rank=node_rank, master_addr=master_addr, master_port=master_port, ) def wrap_udf_in_torch_dist_launcher( udf_command: str, num_trainers: int, num_nodes: int, node_rank: int, master_addr: str, master_port: int, ) -> str: """Wraps the user-defined function (udf_command) with the torch.distributed.run module. Example: if udf_command is "python3 run/some/trainer.py arg1 arg2", then new_df_command becomes: "python3 -m torch.distributed.run run/some/trainer.py arg1 arg2 udf_command is assumed to consist of pre-commands (optional) followed by the python launcher script (required): Examples: # simple python3.7 path/to/some/trainer.py arg1 arg2 # multi-commands (cd some/dir && python3.7 path/to/some/trainer.py arg1 arg2) IMPORTANT: If udf_command consists of multiple python commands, then this will result in undefined behavior. Args: udf_command: num_trainers: num_nodes: node_rank: master_addr: master_port: Returns: """ torch_dist_cmd = construct_torch_dist_launcher_cmd( num_trainers=num_trainers, num_nodes=num_nodes, node_rank=node_rank, master_addr=master_addr, master_port=master_port, ) # Auto-detect the python binary that kicks off the distributed trainer code. # Note: This allowlist order matters, this will match with the FIRST matching entry. Thus, please add names to this # from most-specific to least-specific order eg: # (python3.7, python3.8) -> (python3) # The allowed python versions are from this: https://www.dgl.ai/pages/start.html python_bin_allowlist = ( "python3.6", "python3.7", "python3.8", "python3.9", "python3", # for backwards compatibility, accept python2 but technically DGL is a py3 library, so this is not recommended "python2.7", "python2", ) # If none of the candidate python bins match, then we go with the default `python` python_bin = "python" for candidate_python_bin in python_bin_allowlist: if candidate_python_bin in udf_command: python_bin = candidate_python_bin break # transforms the udf_command from: # python path/to/dist_trainer.py arg0 arg1 # to: # python -m torch.distributed.run [DIST TORCH ARGS] path/to/dist_trainer.py arg0 arg1 # Note: if there are multiple python commands in `udf_command`, this may do the Wrong Thing, eg launch each # python command within the torch distributed launcher. new_udf_command = udf_command.replace( python_bin, f"{python_bin} {torch_dist_cmd}" ) return new_udf_command def construct_dgl_server_env_vars( num_samplers: int, num_server_threads: int, tot_num_clients: int, part_config: str, ip_config: str, num_servers: int, graph_format: str, pythonpath: Optional[str] = "", ) -> str: """Constructs the DGL server-specific env vars string that are required for DGL code to behave in the correct server role. Convenience function. Args: num_samplers: num_server_threads: tot_num_clients: part_config: Partition config. Relative path to workspace. ip_config: IP config file containing IP addresses of cluster hosts. Relative path to workspace. num_servers: graph_format: pythonpath: Optional. If given, this will pass this as PYTHONPATH. Returns: server_env_vars: The server-specific env-vars in a string format, friendly for CLI execution. """ server_env_vars_template = ( "DGL_ROLE={DGL_ROLE} " "DGL_NUM_SAMPLER={DGL_NUM_SAMPLER} " "OMP_NUM_THREADS={OMP_NUM_THREADS} " "DGL_NUM_CLIENT={DGL_NUM_CLIENT} " "DGL_CONF_PATH={DGL_CONF_PATH} " "DGL_IP_CONFIG={DGL_IP_CONFIG} " "DGL_NUM_SERVER={DGL_NUM_SERVER} " "DGL_GRAPH_FORMAT={DGL_GRAPH_FORMAT} " "{suffix_optional_envvars}" ) suffix_optional_envvars = "" if pythonpath: suffix_optional_envvars += f"PYTHONPATH={pythonpath} " return server_env_vars_template.format( DGL_ROLE="server", DGL_NUM_SAMPLER=num_samplers, OMP_NUM_THREADS=num_server_threads, DGL_NUM_CLIENT=tot_num_clients, DGL_CONF_PATH=part_config, DGL_IP_CONFIG=ip_config, DGL_NUM_SERVER=num_servers, DGL_GRAPH_FORMAT=graph_format, suffix_optional_envvars=suffix_optional_envvars, ) def construct_dgl_client_env_vars( num_samplers: int, tot_num_clients: int, part_config: str, ip_config: str, num_servers: int, graph_format: str, num_omp_threads: int, group_id: int, pythonpath: Optional[str] = "", ) -> str: """Constructs the DGL client-specific env vars string that are required for DGL code to behave in the correct client role. Convenience function. Args: num_samplers: tot_num_clients: part_config: Partition config. Relative path to workspace. ip_config: IP config file containing IP addresses of cluster hosts. Relative path to workspace. num_servers: graph_format: num_omp_threads: group_id: Used in client processes to indicate which group it belongs to. pythonpath: Optional. If given, this will pass this as PYTHONPATH. Returns: client_env_vars: The client-specific env-vars in a string format, friendly for CLI execution. """ client_env_vars_template = ( "DGL_DIST_MODE={DGL_DIST_MODE} " "DGL_ROLE={DGL_ROLE} " "DGL_NUM_SAMPLER={DGL_NUM_SAMPLER} " "DGL_NUM_CLIENT={DGL_NUM_CLIENT} " "DGL_CONF_PATH={DGL_CONF_PATH} " "DGL_IP_CONFIG={DGL_IP_CONFIG} " "DGL_NUM_SERVER={DGL_NUM_SERVER} " "DGL_GRAPH_FORMAT={DGL_GRAPH_FORMAT} " "OMP_NUM_THREADS={OMP_NUM_THREADS} " "DGL_GROUP_ID={DGL_GROUP_ID} " "{suffix_optional_envvars}" ) # append optional additional env-vars suffix_optional_envvars = "" if pythonpath: suffix_optional_envvars += f"PYTHONPATH={pythonpath} " return client_env_vars_template.format( DGL_DIST_MODE="distributed", DGL_ROLE="client", DGL_NUM_SAMPLER=num_samplers, DGL_NUM_CLIENT=tot_num_clients, DGL_CONF_PATH=part_config, DGL_IP_CONFIG=ip_config, DGL_NUM_SERVER=num_servers, DGL_GRAPH_FORMAT=graph_format, OMP_NUM_THREADS=num_omp_threads, DGL_GROUP_ID=group_id, suffix_optional_envvars=suffix_optional_envvars, ) def wrap_cmd_with_local_envvars(cmd: str, env_vars: str) -> str: """Wraps a CLI command with desired env vars with the following properties: (1) env vars persist for the entire `cmd`, even if it consists of multiple "chained" commands like: cmd = "ls && pwd && python run/something.py" (2) env vars don't pollute the environment after `cmd` completes. Example: >>> cmd = "ls && pwd" >>> env_vars = "VAR1=value1 VAR2=value2" >>> wrap_cmd_with_local_envvars(cmd, env_vars) "(export VAR1=value1 VAR2=value2; ls && pwd)" Args: cmd: env_vars: A string containing env vars, eg "VAR1=val1 VAR2=val2" Returns: cmd_with_env_vars: """ # use `export` to persist env vars for entire cmd block. required if udf_command is a chain of commands # also: wrap in parens to not pollute env: # https://stackoverflow.com/a/45993803 return f"(export {env_vars}; {cmd})" def wrap_cmd_with_extra_envvars(cmd: str, env_vars: list) -> str: """Wraps a CLI command with extra env vars Example: >>> cmd = "ls && pwd" >>> env_vars = ["VAR1=value1", "VAR2=value2"] >>> wrap_cmd_with_extra_envvars(cmd, env_vars) "(export VAR1=value1 VAR2=value2; ls && pwd)" Args: cmd: env_vars: A list of strings containing env vars, e.g., ["VAR1=value1", "VAR2=value2"] Returns: cmd_with_env_vars: """ env_vars = " ".join(env_vars) return wrap_cmd_with_local_envvars(cmd, env_vars) def get_available_port(ip): """Get available port with specified ip.""" import socket sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) for port in range(1234, 65535): try: sock.connect((ip, port)) except: return port raise RuntimeError("Failed to get available port for ip~{}".format(ip)) def submit_jobs(args, udf_command, dry_run=False): """Submit distributed jobs (server and client processes) via ssh""" if dry_run: print( "Currently it's in dry run mode which means no jobs will be launched." ) servers_cmd = [] clients_cmd = [] hosts = [] thread_list = [] server_count_per_machine = 0 # Get the IP addresses of the cluster. ip_config = os.path.join(args.workspace, args.ip_config) with open(ip_config) as f: for line in f: result = line.strip().split() if len(result) == 2: ip = result[0] port = int(result[1]) hosts.append((ip, port)) elif len(result) == 1: ip = result[0] port = get_available_port(ip) hosts.append((ip, port)) else: raise RuntimeError("Format error of ip_config.") server_count_per_machine = args.num_servers # Get partition info of the graph data part_config = os.path.join(args.workspace, args.part_config) with open(part_config) as conf_f: part_metadata = json.load(conf_f) assert "num_parts" in part_metadata, "num_parts does not exist." # The number of partitions must match the number of machines in the cluster. assert part_metadata["num_parts"] == len( hosts ), "The number of graph partitions has to match the number of machines in the cluster." state_q = queue.Queue() tot_num_clients = args.num_trainers * (1 + args.num_samplers) * len(hosts) # launch server tasks server_env_vars = construct_dgl_server_env_vars( num_samplers=args.num_samplers, num_server_threads=args.num_server_threads, tot_num_clients=tot_num_clients, part_config=args.part_config, ip_config=args.ip_config, num_servers=args.num_servers, graph_format=args.graph_format, pythonpath=os.environ.get("PYTHONPATH", ""), ) for i in range(len(hosts) * server_count_per_machine): ip, _ = hosts[int(i / server_count_per_machine)] server_env_vars_cur = f"{server_env_vars} DGL_SERVER_ID={i}" cmd = wrap_cmd_with_local_envvars(udf_command, server_env_vars_cur) cmd = ( wrap_cmd_with_extra_envvars(cmd, args.extra_envs) if len(args.extra_envs) > 0 else cmd ) cmd = "cd " + str(args.workspace) + "; " + cmd servers_cmd.append(cmd) if not dry_run: thread_list.append( execute_remote( cmd, state_q, ip, args.ssh_port, username=args.ssh_username, ) ) # launch client tasks client_env_vars = construct_dgl_client_env_vars( num_samplers=args.num_samplers, tot_num_clients=tot_num_clients, part_config=args.part_config, ip_config=args.ip_config, num_servers=args.num_servers, graph_format=args.graph_format, num_omp_threads=os.environ.get( "OMP_NUM_THREADS", str(args.num_omp_threads) ), group_id=0, pythonpath=os.environ.get("PYTHONPATH", ""), ) master_addr = hosts[0][0] master_port = get_available_port(master_addr) for node_id, host in enumerate(hosts): ip, _ = host # Transform udf_command to follow torch's dist launcher format: `PYTHON_BIN -m torch.distributed.run ... UDF` torch_dist_udf_command = wrap_udf_in_torch_dist_launcher( udf_command=udf_command, num_trainers=args.num_trainers, num_nodes=len(hosts), node_rank=node_id, master_addr=master_addr, master_port=master_port, ) cmd = wrap_cmd_with_local_envvars( torch_dist_udf_command, client_env_vars ) cmd = ( wrap_cmd_with_extra_envvars(cmd, args.extra_envs) if len(args.extra_envs) > 0 else cmd ) cmd = "cd " + str(args.workspace) + "; " + cmd clients_cmd.append(cmd) if not dry_run: thread_list.append( execute_remote( cmd, state_q, ip, args.ssh_port, username=args.ssh_username ) ) # return commands of clients/servers directly if in dry run mode if dry_run: return clients_cmd, servers_cmd # Start a cleanup process dedicated for cleaning up remote training jobs. conn1, conn2 = multiprocessing.Pipe() func = partial(get_all_remote_pids, hosts, args.ssh_port, udf_command) process = multiprocessing.Process(target=cleanup_proc, args=(func, conn1)) process.start() def signal_handler(signal, frame): logging.info("Stop launcher") # We need to tell the cleanup process to kill remote training jobs. conn2.send("cleanup") sys.exit(0) signal.signal(signal.SIGINT, signal_handler) err = 0 for thread in thread_list: thread.join() err_code = state_q.get() if err_code != 0: # Record err_code # We record one of the error if there are multiple err = err_code # The training processes complete. We should tell the cleanup process to exit. conn2.send("exit") process.join() if err != 0: print("Task failed") sys.exit(-1) def main(): parser = argparse.ArgumentParser(description="Launch a distributed job") parser.add_argument("--ssh_port", type=int, default=22, help="SSH Port.") parser.add_argument( "--ssh_username", default="", help="Optional. When issuing commands (via ssh) to cluster, use the provided username in the ssh cmd. " "Example: If you provide --ssh_username=bob, then the ssh command will be like: 'ssh bob@1.2.3.4 CMD' " "instead of 'ssh 1.2.3.4 CMD'", ) parser.add_argument( "--workspace", type=str, help="Path of user directory of distributed tasks. \ This is used to specify a destination location where \ the contents of current directory will be rsyncd", ) parser.add_argument( "--num_trainers", type=int, help="The number of trainer processes per machine", ) parser.add_argument( "--num_omp_threads", type=int, help="The number of OMP threads per trainer", ) parser.add_argument( "--num_samplers", type=int, default=0, help="The number of sampler processes per trainer process", ) parser.add_argument( "--num_servers", type=int, help="The number of server processes per machine", ) parser.add_argument( "--part_config", type=str, help="The file (in workspace) of the partition config", ) parser.add_argument( "--ip_config", type=str, help="The file (in workspace) of IP configuration for server processes", ) parser.add_argument( "--num_server_threads", type=int, default=1, help="The number of OMP threads in the server process. \ It should be small if server processes and trainer processes run on \ the same machine. By default, it is 1.", ) parser.add_argument( "--graph_format", type=str, default="csc", help='The format of the graph structure of each partition. \ The allowed formats are csr, csc and coo. A user can specify multiple \ formats, separated by ",". For example, the graph format is "csr,csc".', ) parser.add_argument( "--extra_envs", nargs="+", type=str, default=[], help="Extra environment parameters need to be set. For example, \ you can set the LD_LIBRARY_PATH and NCCL_DEBUG by adding: \ --extra_envs LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH NCCL_DEBUG=INFO ", ) args, udf_command = parser.parse_known_args() assert len(udf_command) == 1, "Please provide user command line." assert ( args.num_trainers is not None and args.num_trainers > 0 ), "--num_trainers must be a positive number." assert ( args.num_samplers is not None and args.num_samplers >= 0 ), "--num_samplers must be a non-negative number." assert ( args.num_servers is not None and args.num_servers > 0 ), "--num_servers must be a positive number." assert ( args.num_server_threads > 0 ), "--num_server_threads must be a positive number." assert ( args.workspace is not None ), "A user has to specify a workspace with --workspace." assert ( args.part_config is not None ), "A user has to specify a partition configuration file with --part_config." assert ( args.ip_config is not None ), "A user has to specify an IP configuration file with --ip_config." if args.num_omp_threads is None: # Here we assume all machines have the same number of CPU cores as the machine # where the launch script runs. args.num_omp_threads = max( multiprocessing.cpu_count() // 2 // args.num_trainers, 1 ) print( "The number of OMP threads per trainer is set to", args.num_omp_threads, ) udf_command = str(udf_command[0]) if "python" not in udf_command: raise RuntimeError( "DGL launching script can only support Python executable file." ) submit_jobs(args, udf_command) if __name__ == "__main__": fmt = "%(asctime)s %(levelname)s %(message)s" logging.basicConfig(format=fmt, level=logging.INFO) main() ================================================ FILE: tools/partition_algo/base.py ================================================ import json from typing import Optional import pydantic as dt from dgl import DGLError class PartitionMeta(dt.BaseModel): """Metadata that describes the partition assignment results. Regardless of the choice of partitioning algorithm, a metadata JSON file will be created in the output directory which includes the meta information of the partition algorithm. To generate a metadata JSON: >>> part_meta = PartitionMeta(version='1.0.0', num_parts=4, algo_name='random') >>> with open('metadata.json', 'w') as f: ... json.dump(part_meta.dict(), f) To read a metadata JSON: >>> with open('metadata.json') as f: ... part_meta = PartitionMeta(**(json.load(f))) """ # version of metadata JSON. version: Optional[str] = "1.0.0" # number of partitions. num_parts: int # name of partition algorithm. algo_name: str def dump_partition_meta(part_meta, meta_file): """Dump partition metadata into json file. Parameters ---------- part_meta : PartitionMeta The partition metadata. meta_file : str The target file to save data. """ with open(meta_file, "w") as f: json.dump(part_meta.dict(), f, sort_keys=True, indent=4) def load_partition_meta(meta_file): """Load partition metadata and do sanity check. Parameters ---------- meta_file : str The path of the partition metadata file. Returns ------- PartitionMeta The partition metadata. """ with open(meta_file) as f: try: part_meta = PartitionMeta(**(json.load(f))) except dt.ValidationError as e: raise DGLError( f"Invalid partition metadata JSON. Error details: {e.json()}" ) if part_meta.version != "1.0.0": raise DGLError( f"Invalid version[{part_meta.version}]. Supported versions: '1.0.0'" ) if part_meta.num_parts <= 0: raise DGLError( f"num_parts[{part_meta.num_parts}] should be greater than 0." ) if part_meta.algo_name not in ["random", "metis"]: raise DGLError( f"algo_name[{part_meta.num_parts}] is not supported." ) return part_meta ================================================ FILE: tools/partition_algo/random_partition.py ================================================ # Requires setting PYTHONPATH=${GITROOT}/tools import argparse import json import logging import os import numpy as np from base import dump_partition_meta, PartitionMeta from distpartitioning import array_readwriter from files import setdir def _random_partition(metadata, num_parts): num_nodes_per_type = metadata["num_nodes_per_type"] ntypes = metadata["node_type"] for ntype, n in zip(ntypes, num_nodes_per_type): logging.info("Generating partition for node type %s" % ntype) parts = np.random.randint(0, num_parts, (n,)) array_readwriter.get_array_parser(name="csv").write( ntype + ".txt", parts ) def random_partition(metadata, num_parts, output_path): """ Randomly partition the graph described in metadata and generate partition ID mapping in :attr:`output_path`. A directory will be created at :attr:`output_path` containing the partition ID mapping files named ".txt" (e.g. "author.txt", "paper.txt" and "institution.txt" for OGB-MAG240M). Each file contains one line per node representing the partition ID the node belongs to. In addition, metadata which includes version, number of partitions is dumped. """ with setdir(output_path): _random_partition(metadata, num_parts) part_meta = PartitionMeta( version="1.0.0", num_parts=num_parts, algo_name="random" ) dump_partition_meta(part_meta, "partition_meta.json") # Run with PYTHONPATH=${GIT_ROOT_DIR}/tools # where ${GIT_ROOT_DIR} is the directory to the DGL git repository. if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--in_dir", type=str, help="input directory that contains the metadata file", ) parser.add_argument("--out_dir", type=str, help="output directory") parser.add_argument( "--num_partitions", type=int, help="number of partitions" ) logging.basicConfig(level="INFO") args = parser.parse_args() with open(os.path.join(args.in_dir, "metadata.json")) as f: metadata = json.load(f) num_parts = args.num_partitions random_partition(metadata, num_parts, args.out_dir) ================================================ FILE: tools/verification_utils.py ================================================ import json import os import constants import dgl import numpy as np import pyarrow import pyarrow.parquet as pq import pytest import torch from dgl.data.utils import load_tensors from dgl.distributed.partition import ( _etype_str_to_tuple, _etype_tuple_to_str, _get_inner_edge_mask, _get_inner_node_mask, RESERVED_FIELD_DTYPE, ) from distpartitioning.utils import get_idranges def read_file(fname, ftype): """Read a file from disk Parameters: ----------- fname : string specifying the absolute path to the file to read ftype : string supported formats are `numpy`, `parquet', `csv` Returns: -------- numpy ndarray : file contents are returned as numpy array """ reader_fmt_meta = {"name": ftype} array_readwriter.get_array_parser(**reader_fmt_meta).read(fname) return data def verify_partition_data_types(part_g): """Validate the dtypes in the partitioned graphs are valid Parameters: ----------- part_g : DGL Graph object created for the partitioned graphs """ for k, dtype in RESERVED_FIELD_DTYPE.items(): if k in part_g.ndata: assert part_g.ndata[k].dtype == dtype if k in part_g.edata: assert part_g.edata[k].dtype == dtype def verify_partition_formats(part_g, formats): """Validate the partitioned graphs with supported formats Parameters: ----------- part_g : DGL Graph object created for the partitioned graphs formats : string formats(csc, coo, csr) supported formats and multiple values can be seperated by comma """ # Verify saved graph formats if formats is None: assert "coo" in part_g.formats()["created"] else: formats = formats.split(",") for format in formats: assert format in part_g.formats()["created"] def verify_graph_feats( g, gpb, part, node_feats, edge_feats, orig_nids, orig_eids ): """Verify the node/edge features of the partitioned graph with the original graph Parameters: ----------- g : DGL Graph Object of the original graph gpb : global partition book created for the partitioned graph object node_feats : dictionary with key, value pairs as node-types and features as numpy arrays edge_feats : dictionary with key, value pairs as edge-types and features as numpy arrays orig_nids : dictionary with key, value pairs as node-types and (global) nids from the original graph orig_eids : dictionary with key, value pairs as edge-types and (global) eids from the original graph """ for ntype in g.ntypes: ntype_id = g.get_ntype_id(ntype) inner_node_mask = _get_inner_node_mask(part, ntype_id) inner_nids = part.ndata[dgl.NID][inner_node_mask] ntype_ids, inner_type_nids = gpb.map_to_per_ntype(inner_nids) partid = gpb.nid2partid(inner_type_nids, ntype) assert np.all(ntype_ids.numpy() == ntype_id) assert np.all(partid.numpy() == gpb.partid) orig_id = orig_nids[ntype][inner_type_nids] local_nids = gpb.nid2localnid(inner_type_nids, gpb.partid, ntype) for name in g.nodes[ntype].data: if name in [dgl.NID, "inner_node"]: continue true_feats = g.nodes[ntype].data[name][orig_id] ndata = node_feats[ntype + "/" + name][local_nids] assert np.array_equal(ndata.numpy(), true_feats.numpy()) for etype in g.canonical_etypes: etype_id = g.get_etype_id(etype) inner_edge_mask = _get_inner_edge_mask(part, etype_id) inner_eids = part.edata[dgl.EID][inner_edge_mask] etype_ids, inner_type_eids = gpb.map_to_per_etype(inner_eids) partid = gpb.eid2partid(inner_type_eids, etype) assert np.all(etype_ids.numpy() == etype_id) assert np.all(partid.numpy() == gpb.partid) orig_id = orig_eids[_etype_tuple_to_str(etype)][inner_type_eids] local_eids = gpb.eid2localeid(inner_type_eids, gpb.partid, etype) for name in g.edges[etype].data: if name in [dgl.EID, "inner_edge"]: continue true_feats = g.edges[etype].data[name][orig_id] edata = edge_feats[_etype_tuple_to_str(etype) + "/" + name][ local_eids ] assert np.array_equal(edata.numpy(), true_feats.numpy()) def verify_metadata_counts(part_schema, part_g, graph_schema, g, partid): """Verify the partitioned graph objects with the metadata Parameters: ----------- part_schema : json object which is created by reading the metadata.json file for the partitioned graph part_g : DGL graph object of a graph partition graph_schema : json object which is created by reading the metadata.json file for the original graph g : DGL Graph object created by reading the original graph from the disk. partid : integer specifying the partition id of the graph object, part_g """ for ntype in part_schema[constants.STR_NTYPES]: ntype_data = part_schema[constants.STR_NODE_MAP][ntype] meta_ntype_count = ntype_data[partid][1] - ntype_data[partid][0] inner_node_mask = _get_inner_node_mask(part_g, g.get_ntype_id(ntype)) graph_ntype_count = len(part_g.ndata[dgl.NID][inner_node_mask]) assert ( meta_ntype_count == graph_ntype_count ), f"Metadata ntypecount = {meta_ntype_count} and graph_ntype_count = {graph_ntype_count}" for etype in part_schema[constants.STR_ETYPES]: etype_data = part_schema[constants.STR_EDGE_MAP][etype] meta_etype_count = etype_data[partid][1] - etype_data[partid][0] mask = _get_inner_edge_mask( part_g, g.get_etype_id(_etype_str_to_tuple(etype)) ) graph_etype_count = len(part_g.edata[dgl.EID][mask]) assert ( meta_etype_count == graph_etype_count ), f"Metadata etypecount = {meta_etype_count} does not match part graph etypecount = {graph_etype_count}" def get_node_partids(partitions_dir, graph_schema): """load the node partition ids from the disk Parameters: ---------- partitions_dir : string directory path where metis/random partitions are located graph_schema : json object which is created by reading the metadata.json file for the original graph Returns: -------- dictionary : where keys are node-types and value is a list of partition-ids for all the nodes of that particular node-type. """ assert os.path.isdir( partitions_dir ), f"Please provide a valid directory to read nodes to partition-id mappings." _, gid_dict = get_idranges( graph_schema[constants.STR_NODE_TYPE], dict( zip( graph_schema[constants.STR_NODE_TYPE], graph_schema[constants.STR_NODE_TYPE_COUNTS], ) ), ) node_partids = {} for ntype_id, ntype in enumerate(graph_schema[constants.STR_NODE_TYPE]): node_partids[ntype] = read_file( os.path.join(partitions_dir, f"{ntype}.txt"), constants.STR_CSV ) assert ( len(node_partids[ntype]) == graph_schema[constants.STR_NODE_TYPE_COUNTS][ntype_id] ), f"Node count for {ntype} = {len(node_partids[ntype])} in the partitions_dir while it should be {graph_schema[constants.STR_NTYPE_COUNTS][ntype_id]} (from graph schema)." return node_partids def verify_node_partitionids( node_partids, part_g, g, gpb, graph_schema, orig_nids, partition_id ): """Verify partitioned graph objects node counts with the original graph Parameters: ----------- params : argparser object to access command line arguments for this python script part_data : list of tuples partitioned graph objects read from the disk g : DGL Graph object created by reading the original graph from disk graph_schema : json object created by reading the metadata.json file for the original graph orig_nids : dictionary which contains the origial(global) node-ids partition_id : integer partition id of the partitioned graph, part_g """ # read part graphs and verify the counts # inner node masks, should give the node counts in each part-g and get the corresponding orig-ids to map to the original graph node-ids for ntype_id, ntype in enumerate(graph_schema[constants.STR_NODE_TYPE]): mask = _get_inner_node_mask(part_g, g.get_ntype_id(ntype)) # map these to orig-nids. inner_nids = part_g.ndata[dgl.NID][mask] ntype_ids, inner_type_nids = gpb.map_to_per_ntype(inner_nids) partid = gpb.nid2partid(inner_type_nids, ntype) assert np.all(ntype_ids.numpy() == ntype_id) assert np.all(partid.numpy() == gpb.partid) idxes = orig_nids[ntype][inner_type_nids] assert np.all(idxes >= 0) # get the partition-ids for these nodes. assert np.all( node_partids[ntype][idxes] == partition_id ), f"All the nodes in the partition = {partid} does not their nodeid to partition-id maps are defined by the partitioning algorithm. Node-type = {ntype}" def read_orig_ids(out_dir, fname, num_parts): """Read original id files for the partitioned graph objects Parameters: ----------- out_dir : string specifying the directory where the files are located fname : string file name to read from num_parts : integer no. of partitions Returns: -------- dictionary : where keys are node/edge types and values are original node or edge ids from the original graph """ orig_ids = {} for i in range(num_parts): ids_path = os.path.join(out_dir, f"part{i}", fname) part_ids = load_tensors(ids_path) for type, data in part_ids.items(): if type not in orig_ids: orig_ids[type] = data.numpy() else: orig_ids[type] = np.concatenate((orig_ids[type], data)) return orig_ids ================================================ FILE: tools/verify_partitions.py ================================================ [File too large to display: 7.8 KB] ================================================ FILE: tutorials/blitz/.gitignore ================================================ *.dgl *.csv ================================================ FILE: tutorials/blitz/1_introduction.py ================================================ """ Node Classification with DGL ============================ GNNs are powerful tools for many machine learning tasks on graphs. In this introductory tutorial, you will learn the basic workflow of using GNNs for node classification, i.e. predicting the category of a node in a graph. By completing this tutorial, you will be able to - Load a DGL-provided dataset. - Build a GNN model with DGL-provided neural network modules. - Train and evaluate a GNN model for node classification on either CPU or GPU. This tutorial assumes that you have experience in building neural networks with PyTorch. (Time estimate: 13 minutes) """ import os os.environ["DGLBACKEND"] = "pytorch" import dgl import dgl.data import torch import torch.nn as nn import torch.nn.functional as F ###################################################################### # Overview of Node Classification with GNN # ---------------------------------------- # # One of the most popular and widely adopted tasks on graph data is node # classification, where a model needs to predict the ground truth category # of each node. Before graph neural networks, many proposed methods are # using either connectivity alone (such as DeepWalk or node2vec), or simple # combinations of connectivity and the node's own features. GNNs, by # contrast, offers an opportunity to obtain node representations by # combining the connectivity and features of a *local neighborhood*. # # `Kipf et # al., `__ is an example that formulates # the node classification problem as a semi-supervised node classification # task. With the help of only a small portion of labeled nodes, a graph # neural network (GNN) can accurately predict the node category of the # others. # # This tutorial will show how to build such a GNN for semi-supervised node # classification with only a small number of labels on the Cora # dataset, # a citation network with papers as nodes and citations as edges. The task # is to predict the category of a given paper. Each paper node contains a # word count vector as its features, normalized so that they sum up to one, # as described in Section 5.2 of # `the paper `__. # # Loading Cora Dataset # -------------------- # dataset = dgl.data.CoraGraphDataset() print(f"Number of categories: {dataset.num_classes}") ###################################################################### # A DGL Dataset object may contain one or multiple graphs. The Cora # dataset used in this tutorial only consists of one single graph. # g = dataset[0] ###################################################################### # A DGL graph can store node features and edge features in two # dictionary-like attributes called ``ndata`` and ``edata``. # In the DGL Cora dataset, the graph contains the following node features: # # - ``train_mask``: A boolean tensor indicating whether the node is in the # training set. # # - ``val_mask``: A boolean tensor indicating whether the node is in the # validation set. # # - ``test_mask``: A boolean tensor indicating whether the node is in the # test set. # # - ``label``: The ground truth node category. # # - ``feat``: The node features. # print("Node features") print(g.ndata) print("Edge features") print(g.edata) ###################################################################### # Defining a Graph Convolutional Network (GCN) # -------------------------------------------- # # This tutorial will build a two-layer `Graph Convolutional Network # (GCN) `__. Each # layer computes new node representations by aggregating neighbor # information. # # To build a multi-layer GCN you can simply stack ``dgl.nn.GraphConv`` # modules, which inherit ``torch.nn.Module``. # from dgl.nn import GraphConv class GCN(nn.Module): def __init__(self, in_feats, h_feats, num_classes): super(GCN, self).__init__() self.conv1 = GraphConv(in_feats, h_feats) self.conv2 = GraphConv(h_feats, num_classes) def forward(self, g, in_feat): h = self.conv1(g, in_feat) h = F.relu(h) h = self.conv2(g, h) return h # Create the model with given dimensions model = GCN(g.ndata["feat"].shape[1], 16, dataset.num_classes) ###################################################################### # DGL provides implementation of many popular neighbor aggregation # modules. You can easily invoke them with one line of code. # ###################################################################### # Training the GCN # ---------------- # # Training this GCN is similar to training other PyTorch neural networks. # def train(g, model): optimizer = torch.optim.Adam(model.parameters(), lr=0.01) best_val_acc = 0 best_test_acc = 0 features = g.ndata["feat"] labels = g.ndata["label"] train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] for e in range(100): # Forward logits = model(g, features) # Compute prediction pred = logits.argmax(1) # Compute loss # Note that you should only compute the losses of the nodes in the training set. loss = F.cross_entropy(logits[train_mask], labels[train_mask]) # Compute accuracy on training/validation/test train_acc = (pred[train_mask] == labels[train_mask]).float().mean() val_acc = (pred[val_mask] == labels[val_mask]).float().mean() test_acc = (pred[test_mask] == labels[test_mask]).float().mean() # Save the best validation accuracy and the corresponding test accuracy. if best_val_acc < val_acc: best_val_acc = val_acc best_test_acc = test_acc # Backward optimizer.zero_grad() loss.backward() optimizer.step() if e % 5 == 0: print( f"In epoch {e}, loss: {loss:.3f}, val acc: {val_acc:.3f} (best {best_val_acc:.3f}), test acc: {test_acc:.3f} (best {best_test_acc:.3f})" ) model = GCN(g.ndata["feat"].shape[1], 16, dataset.num_classes) train(g, model) ###################################################################### # Training on GPU # --------------- # # Training on GPU requires to put both the model and the graph onto GPU # with the ``to`` method, similar to what you will do in PyTorch. # # .. code:: python # # g = g.to('cuda') # model = GCN(g.ndata['feat'].shape[1], 16, dataset.num_classes).to('cuda') # train(g, model) # ###################################################################### # What’s next? # ------------ # # - :doc:`How does DGL represent a graph <2_dglgraph>`? # - :doc:`Write your own GNN module <3_message_passing>`. # - :doc:`Link prediction (predicting existence of edges) on full # graph <4_link_predict>`. # - :doc:`Graph classification <5_graph_classification>`. # - :doc:`Make your own dataset <6_load_data>`. # - :ref:`The list of supported graph convolution # modules `. # - :ref:`The list of datasets provided by DGL `. # # Thumbnail credits: Stanford CS224W Notes # sphinx_gallery_thumbnail_path = '_static/blitz_1_introduction.png' ================================================ FILE: tutorials/blitz/2_dglgraph.py ================================================ """ How Does DGL Represent A Graph? =============================== By the end of this tutorial you will be able to: - Construct a graph in DGL from scratch. - Assign node and edge features to a graph. - Query properties of a DGL graph such as node degrees and connectivity. - Transform a DGL graph into another graph. - Load and save DGL graphs. (Time estimate: 16 minutes) """ ###################################################################### # DGL Graph Construction # ---------------------- # # DGL represents a directed graph as a ``DGLGraph`` object. You can # construct a graph by specifying the number of nodes in the graph as well # as the list of source and destination nodes. Nodes in the graph have # consecutive IDs starting from 0. # # For instance, the following code constructs a directed star graph with 5 # leaves. The center node's ID is 0. The edges go from the # center node to the leaves. # import os os.environ["DGLBACKEND"] = "pytorch" import dgl import numpy as np import torch g = dgl.graph(([0, 0, 0, 0, 0], [1, 2, 3, 4, 5]), num_nodes=6) # Equivalently, PyTorch LongTensors also work. g = dgl.graph( (torch.LongTensor([0, 0, 0, 0, 0]), torch.LongTensor([1, 2, 3, 4, 5])), num_nodes=6, ) # You can omit the number of nodes argument if you can tell the number of nodes from the edge list alone. g = dgl.graph(([0, 0, 0, 0, 0], [1, 2, 3, 4, 5])) ###################################################################### # Edges in the graph have consecutive IDs starting from 0, and are # in the same order as the list of source and destination nodes during # creation. # # Print the source and destination nodes of every edge. print(g.edges()) ###################################################################### # .. note:: # # ``DGLGraph``'s are always directed to best fit the computation # pattern of graph neural networks, where the messages sent # from one node to the other are often different between both # directions. If you want to handle undirected graphs, you may consider # treating it as a bidirectional graph. See `Graph # Transformations`_ for an example of making # a bidirectional graph. # ###################################################################### # Assigning Node and Edge Features to Graph # ----------------------------------------- # # Many graph data contain attributes on nodes and edges. # Although the types of node and edge attributes can be arbitrary in real # world, ``DGLGraph`` only accepts attributes stored in tensors (with # numerical contents). Consequently, an attribute of all the nodes or # edges must have the same shape. In the context of deep learning, those # attributes are often called *features*. # # You can assign and retrieve node and edge features via ``ndata`` and # ``edata`` interface. # # Assign a 3-dimensional node feature vector for each node. g.ndata["x"] = torch.randn(6, 3) # Assign a 4-dimensional edge feature vector for each edge. g.edata["a"] = torch.randn(5, 4) # Assign a 5x4 node feature matrix for each node. Node and edge features in DGL can be multi-dimensional. g.ndata["y"] = torch.randn(6, 5, 4) print(g.edata["a"]) ###################################################################### # .. note:: # # The vast development of deep learning has provided us many # ways to encode various types of attributes into numerical features. # Here are some general suggestions: # # - For categorical attributes (e.g. gender, occupation), consider # converting them to integers or one-hot encoding. # - For variable length string contents (e.g. news article, quote), # consider applying a language model. # - For images, consider applying a vision model such as CNNs. # # You can find plenty of materials on how to encode such attributes # into a tensor in the `PyTorch Deep Learning # Tutorials `__. # ###################################################################### # Querying Graph Structures # ------------------------- # # ``DGLGraph`` object provides various methods to query a graph structure. # print(g.num_nodes()) print(g.num_edges()) # Out degrees of the center node print(g.out_degrees(0)) # In degrees of the center node - note that the graph is directed so the in degree should be 0. print(g.in_degrees(0)) ###################################################################### # Graph Transformations # --------------------- # ###################################################################### # DGL provides many APIs to transform a graph to another such as # extracting a subgraph: # # Induce a subgraph from node 0, node 1 and node 3 from the original graph. sg1 = g.subgraph([0, 1, 3]) # Induce a subgraph from edge 0, edge 1 and edge 3 from the original graph. sg2 = g.edge_subgraph([0, 1, 3]) ###################################################################### # You can obtain the node/edge mapping from the subgraph to the original # graph by looking into the node feature ``dgl.NID`` or edge feature # ``dgl.EID`` in the new graph. # # The original IDs of each node in sg1 print(sg1.ndata[dgl.NID]) # The original IDs of each edge in sg1 print(sg1.edata[dgl.EID]) # The original IDs of each node in sg2 print(sg2.ndata[dgl.NID]) # The original IDs of each edge in sg2 print(sg2.edata[dgl.EID]) ###################################################################### # ``subgraph`` and ``edge_subgraph`` also copies the original features # to the subgraph: # # The original node feature of each node in sg1 print(sg1.ndata["x"]) # The original edge feature of each node in sg1 print(sg1.edata["a"]) # The original node feature of each node in sg2 print(sg2.ndata["x"]) # The original edge feature of each node in sg2 print(sg2.edata["a"]) ###################################################################### # Another common transformation is to add a reverse edge for each edge in # the original graph with ``dgl.add_reverse_edges``. # # .. note:: # # If you have an undirected graph, it is better to convert it # into a bidirectional graph first via adding reverse edges. # newg = dgl.add_reverse_edges(g) print(newg.edges()) ###################################################################### # Loading and Saving Graphs # ------------------------- # # You can save a graph or a list of graphs via ``dgl.save_graphs`` and # load them back with ``dgl.load_graphs``. # # Save graphs dgl.save_graphs("graph.dgl", g) dgl.save_graphs("graphs.dgl", [g, sg1, sg2]) # Load graphs (g,), _ = dgl.load_graphs("graph.dgl") print(g) (g, sg1, sg2), _ = dgl.load_graphs("graphs.dgl") print(g) print(sg1) print(sg2) ###################################################################### # What’s next? # ------------ # # - See # :ref:`here ` # for a list of graph structure query APIs. # - See # :ref:`here ` # for a list of subgraph extraction routines. # - See # :ref:`here ` # for a list of graph transformation routines. # - API reference of :func:`dgl.save_graphs` # and # :func:`dgl.load_graphs` # # Thumbnail credits: Wikipedia # sphinx_gallery_thumbnail_path = '_static/blitz_2_dglgraph.png' ================================================ FILE: tutorials/blitz/3_message_passing.py ================================================ """ Write your own GNN module ========================= Sometimes, your model goes beyond simply stacking existing GNN modules. For example, you would like to invent a new way of aggregating neighbor information by considering node importance or edge weights. By the end of this tutorial you will be able to - Understand DGL’s message passing APIs. - Implement GraphSAGE convolution module by your own. This tutorial assumes that you already know :doc:`the basics of training a GNN for node classification <1_introduction>`. (Time estimate: 10 minutes) """ import os os.environ["DGLBACKEND"] = "pytorch" import dgl import dgl.function as fn import torch import torch.nn as nn import torch.nn.functional as F ###################################################################### # Message passing and GNNs # ------------------------ # # DGL follows the *message passing paradigm* inspired by the Message # Passing Neural Network proposed by `Gilmer et # al. `__ Essentially, they found many # GNN models can fit into the following framework: # # .. math:: # # # m_{u\to v}^{(l)} = M^{(l)}\left(h_v^{(l-1)}, h_u^{(l-1)}, e_{u\to v}^{(l-1)}\right) # # .. math:: # # # m_{v}^{(l)} = \sum_{u\in\mathcal{N}(v)}m_{u\to v}^{(l)} # # .. math:: # # # h_v^{(l)} = U^{(l)}\left(h_v^{(l-1)}, m_v^{(l)}\right) # # where DGL calls :math:`M^{(l)}` the *message function*, :math:`\sum` the # *reduce function* and :math:`U^{(l)}` the *update function*. Note that # :math:`\sum` here can represent any function and is not necessarily a # summation. # ###################################################################### # For example, the `GraphSAGE convolution (Hamilton et al., # 2017) `__ # takes the following mathematical form: # # .. math:: # # # h_{\mathcal{N}(v)}^k\leftarrow \text{Average}\{h_u^{k-1},\forall u\in\mathcal{N}(v)\} # # .. math:: # # # h_v^k\leftarrow \text{ReLU}\left(W^k\cdot \text{CONCAT}(h_v^{k-1}, h_{\mathcal{N}(v)}^k) \right) # # You can see that message passing is directional: the message sent from # one node :math:`u` to other node :math:`v` is not necessarily the same # as the other message sent from node :math:`v` to node :math:`u` in the # opposite direction. # # Although DGL has builtin support of GraphSAGE via # :class:`dgl.nn.SAGEConv `, # here is how you can implement GraphSAGE convolution in DGL by your own. # class SAGEConv(nn.Module): """Graph convolution module used by the GraphSAGE model. Parameters ---------- in_feat : int Input feature size. out_feat : int Output feature size. """ def __init__(self, in_feat, out_feat): super(SAGEConv, self).__init__() # A linear submodule for projecting the input and neighbor feature to the output. self.linear = nn.Linear(in_feat * 2, out_feat) def forward(self, g, h): """Forward computation Parameters ---------- g : Graph The input graph. h : Tensor The input node feature. """ with g.local_scope(): g.ndata["h"] = h # update_all is a message passing API. g.update_all( message_func=fn.copy_u("h", "m"), reduce_func=fn.mean("m", "h_N"), ) h_N = g.ndata["h_N"] h_total = torch.cat([h, h_N], dim=1) return self.linear(h_total) ###################################################################### # The central piece in this code is the # :func:`g.update_all ` # function, which gathers and averages the neighbor features. There are # three concepts here: # # * Message function ``fn.copy_u('h', 'm')`` that # copies the node feature under name ``'h'`` as *messages* with name # ``'m'`` sent to neighbors. # # * Reduce function ``fn.mean('m', 'h_N')`` that averages # all the received messages under name ``'m'`` and saves the result as a # new node feature ``'h_N'``. # # * ``update_all`` tells DGL to trigger the # message and reduce functions for all the nodes and edges. # ###################################################################### # Afterwards, you can stack your own GraphSAGE convolution layers to form # a multi-layer GraphSAGE network. # class Model(nn.Module): def __init__(self, in_feats, h_feats, num_classes): super(Model, self).__init__() self.conv1 = SAGEConv(in_feats, h_feats) self.conv2 = SAGEConv(h_feats, num_classes) def forward(self, g, in_feat): h = self.conv1(g, in_feat) h = F.relu(h) h = self.conv2(g, h) return h ###################################################################### # Training loop # ~~~~~~~~~~~~~ # The following code for data loading and training loop is directly copied # from the introduction tutorial. # import dgl.data dataset = dgl.data.CoraGraphDataset() g = dataset[0] def train(g, model): optimizer = torch.optim.Adam(model.parameters(), lr=0.01) all_logits = [] best_val_acc = 0 best_test_acc = 0 features = g.ndata["feat"] labels = g.ndata["label"] train_mask = g.ndata["train_mask"] val_mask = g.ndata["val_mask"] test_mask = g.ndata["test_mask"] for e in range(200): # Forward logits = model(g, features) # Compute prediction pred = logits.argmax(1) # Compute loss # Note that we should only compute the losses of the nodes in the training set, # i.e. with train_mask 1. loss = F.cross_entropy(logits[train_mask], labels[train_mask]) # Compute accuracy on training/validation/test train_acc = (pred[train_mask] == labels[train_mask]).float().mean() val_acc = (pred[val_mask] == labels[val_mask]).float().mean() test_acc = (pred[test_mask] == labels[test_mask]).float().mean() # Save the best validation accuracy and the corresponding test accuracy. if best_val_acc < val_acc: best_val_acc = val_acc best_test_acc = test_acc # Backward optimizer.zero_grad() loss.backward() optimizer.step() all_logits.append(logits.detach()) if e % 5 == 0: print( "In epoch {}, loss: {:.3f}, val acc: {:.3f} (best {:.3f}), test acc: {:.3f} (best {:.3f})".format( e, loss, val_acc, best_val_acc, test_acc, best_test_acc ) ) model = Model(g.ndata["feat"].shape[1], 16, dataset.num_classes) train(g, model) ###################################################################### # More customization # ------------------ # # In DGL, we provide many built-in message and reduce functions under the # ``dgl.function`` package. You can find more details in :ref:`the API # doc `. # ###################################################################### # These APIs allow one to quickly implement new graph convolution modules. # For example, the following implements a new ``SAGEConv`` that aggregates # neighbor representations using a weighted average. Note that ``edata`` # member can hold edge features which can also take part in message # passing. # class WeightedSAGEConv(nn.Module): """Graph convolution module used by the GraphSAGE model with edge weights. Parameters ---------- in_feat : int Input feature size. out_feat : int Output feature size. """ def __init__(self, in_feat, out_feat): super(WeightedSAGEConv, self).__init__() # A linear submodule for projecting the input and neighbor feature to the output. self.linear = nn.Linear(in_feat * 2, out_feat) def forward(self, g, h, w): """Forward computation Parameters ---------- g : Graph The input graph. h : Tensor The input node feature. w : Tensor The edge weight. """ with g.local_scope(): g.ndata["h"] = h g.edata["w"] = w g.update_all( message_func=fn.u_mul_e("h", "w", "m"), reduce_func=fn.mean("m", "h_N"), ) h_N = g.ndata["h_N"] h_total = torch.cat([h, h_N], dim=1) return self.linear(h_total) ###################################################################### # Because the graph in this dataset does not have edge weights, we # manually assign all edge weights to one in the ``forward()`` function of # the model. You can replace it with your own edge weights. # class Model(nn.Module): def __init__(self, in_feats, h_feats, num_classes): super(Model, self).__init__() self.conv1 = WeightedSAGEConv(in_feats, h_feats) self.conv2 = WeightedSAGEConv(h_feats, num_classes) def forward(self, g, in_feat): h = self.conv1(g, in_feat, torch.ones(g.num_edges(), 1).to(g.device)) h = F.relu(h) h = self.conv2(g, h, torch.ones(g.num_edges(), 1).to(g.device)) return h model = Model(g.ndata["feat"].shape[1], 16, dataset.num_classes) train(g, model) ###################################################################### # Even more customization by user-defined function # ------------------------------------------------ # # DGL allows user-defined message and reduce function for the maximal # expressiveness. Here is a user-defined message function that is # equivalent to ``fn.u_mul_e('h', 'w', 'm')``. # def u_mul_e_udf(edges): return {"m": edges.src["h"] * edges.data["w"]} ###################################################################### # ``edges`` has three members: ``src``, ``data`` and ``dst``, representing # the source node feature, edge feature, and destination node feature for # all edges. # ###################################################################### # You can also write your own reduce function. For example, the following # is equivalent to the builtin ``fn.mean('m', 'h_N')`` function that averages # the incoming messages: # def mean_udf(nodes): return {"h_N": nodes.mailbox["m"].mean(1)} ###################################################################### # In short, DGL will group the nodes by their in-degrees, and for each # group DGL stacks the incoming messages along the second dimension. You # can then perform a reduction along the second dimension to aggregate # messages. # # For more details on customizing message and reduce function with # user-defined function, please refer to the :ref:`API # reference `. # ###################################################################### # Best practice of writing custom GNN modules # ------------------------------------------- # # DGL recommends the following practice ranked by preference: # # - Use ``dgl.nn`` modules. # - Use ``dgl.nn.functional`` functions which contain lower-level complex # operations such as computing a softmax for each node over incoming # edges. # - Use ``update_all`` with builtin message and reduce functions. # - Use user-defined message or reduce functions. # ###################################################################### # What’s next? # ------------ # # - :ref:`Writing Efficient Message Passing # Code `. # # Thumbnail credits: Representation Learning on Networks, Jure Leskovec, WWW 2018 # sphinx_gallery_thumbnail_path = '_static/blitz_3_message_passing.png' ================================================ FILE: tutorials/blitz/4_link_predict.py ================================================ """ Link Prediction using Graph Neural Networks =========================================== In the :doc:`introduction <1_introduction>`, you have already learned the basic workflow of using GNNs for node classification, i.e. predicting the category of a node in a graph. This tutorial will teach you how to train a GNN for link prediction, i.e. predicting the existence of an edge between two arbitrary nodes in a graph. By the end of this tutorial you will be able to - Build a GNN-based link prediction model. - Train and evaluate the model on a small DGL-provided dataset. (Time estimate: 28 minutes) """ import itertools import os os.environ["DGLBACKEND"] = "pytorch" import dgl import dgl.data import numpy as np import scipy.sparse as sp import torch import torch.nn as nn import torch.nn.functional as F ###################################################################### # Overview of Link Prediction with GNN # ------------------------------------ # # Many applications such as social recommendation, item recommendation, # knowledge graph completion, etc., can be formulated as link prediction, # which predicts whether an edge exists between two particular nodes. This # tutorial shows an example of predicting whether a citation relationship, # either citing or being cited, between two papers exists in a citation # network. # # This tutorial formulates the link prediction problem as a binary classification # problem as follows: # # - Treat the edges in the graph as *positive examples*. # - Sample a number of non-existent edges (i.e. node pairs with no edges # between them) as *negative* examples. # - Divide the positive examples and negative examples into a training # set and a test set. # - Evaluate the model with any binary classification metric such as Area # Under Curve (AUC). # # .. note:: # # The practice comes from # `SEAL `__, # although the model here does not use their idea of node labeling. # # In some domains such as large-scale recommender systems or information # retrieval, you may favor metrics that emphasize good performance of # top-K predictions. In these cases you may want to consider other metrics # such as mean average precision, and use other negative sampling methods, # which are beyond the scope of this tutorial. # # Loading graph and features # -------------------------- # # Following the :doc:`introduction <1_introduction>`, this tutorial # first loads the Cora dataset. # dataset = dgl.data.CoraGraphDataset() g = dataset[0] ###################################################################### # Prepare training and testing sets # --------------------------------- # # This tutorial randomly picks 10% of the edges for positive examples in # the test set, and leave the rest for the training set. It then samples # the same number of edges for negative examples in both sets. # # Split edge set for training and testing u, v = g.edges() eids = np.arange(g.num_edges()) eids = np.random.permutation(eids) test_size = int(len(eids) * 0.1) train_size = g.num_edges() - test_size test_pos_u, test_pos_v = u[eids[:test_size]], v[eids[:test_size]] train_pos_u, train_pos_v = u[eids[test_size:]], v[eids[test_size:]] # Find all negative edges and split them for training and testing adj = sp.coo_matrix((np.ones(len(u)), (u.numpy(), v.numpy()))) adj_neg = 1 - adj.todense() - np.eye(g.num_nodes()) neg_u, neg_v = np.where(adj_neg != 0) neg_eids = np.random.choice(len(neg_u), g.num_edges()) test_neg_u, test_neg_v = ( neg_u[neg_eids[:test_size]], neg_v[neg_eids[:test_size]], ) train_neg_u, train_neg_v = ( neg_u[neg_eids[test_size:]], neg_v[neg_eids[test_size:]], ) ###################################################################### # When training, you will need to remove the edges in the test set from # the original graph. You can do this via ``dgl.remove_edges``. # # .. note:: # # ``dgl.remove_edges`` works by creating a subgraph from the # original graph, resulting in a copy and therefore could be slow for # large graphs. If so, you could save the training and test graph to # disk, as you would do for preprocessing. # train_g = dgl.remove_edges(g, eids[:test_size]) ###################################################################### # Define a GraphSAGE model # ------------------------ # # This tutorial builds a model consisting of two # `GraphSAGE `__ layers, each computes # new node representations by averaging neighbor information. DGL provides # ``dgl.nn.SAGEConv`` that conveniently creates a GraphSAGE layer. # from dgl.nn import SAGEConv # ----------- 2. create model -------------- # # build a two-layer GraphSAGE model class GraphSAGE(nn.Module): def __init__(self, in_feats, h_feats): super(GraphSAGE, self).__init__() self.conv1 = SAGEConv(in_feats, h_feats, "mean") self.conv2 = SAGEConv(h_feats, h_feats, "mean") def forward(self, g, in_feat): h = self.conv1(g, in_feat) h = F.relu(h) h = self.conv2(g, h) return h ###################################################################### # The model then predicts the probability of existence of an edge by # computing a score between the representations of both incident nodes # with a function (e.g. an MLP or a dot product), which you will see in # the next section. # # .. math:: # # # \hat{y}_{u\sim v} = f(h_u, h_v) # ###################################################################### # Positive graph, negative graph, and ``apply_edges`` # --------------------------------------------------- # # In previous tutorials you have learned how to compute node # representations with a GNN. However, link prediction requires you to # compute representation of *pairs of nodes*. # # DGL recommends you to treat the pairs of nodes as another graph, since # you can describe a pair of nodes with an edge. In link prediction, you # will have a *positive graph* consisting of all the positive examples as # edges, and a *negative graph* consisting of all the negative examples. # The *positive graph* and the *negative graph* will contain the same set # of nodes as the original graph. This makes it easier to pass node # features among multiple graphs for computation. As you will see later, # you can directly feed the node representations computed on the entire # graph to the positive and the negative graphs for computing pair-wise # scores. # # The following code constructs the positive graph and the negative graph # for the training set and the test set respectively. # train_pos_g = dgl.graph((train_pos_u, train_pos_v), num_nodes=g.num_nodes()) train_neg_g = dgl.graph((train_neg_u, train_neg_v), num_nodes=g.num_nodes()) test_pos_g = dgl.graph((test_pos_u, test_pos_v), num_nodes=g.num_nodes()) test_neg_g = dgl.graph((test_neg_u, test_neg_v), num_nodes=g.num_nodes()) ###################################################################### # The benefit of treating the pairs of nodes as a graph is that you can # use the ``DGLGraph.apply_edges`` method, which conveniently computes new # edge features based on the incident nodes’ features and the original # edge features (if applicable). # # DGL provides a set of optimized builtin functions to compute new # edge features based on the original node/edge features. For example, # ``dgl.function.u_dot_v`` computes a dot product of the incident nodes’ # representations for each edge. # import dgl.function as fn class DotPredictor(nn.Module): def forward(self, g, h): with g.local_scope(): g.ndata["h"] = h # Compute a new edge feature named 'score' by a dot-product between the # source node feature 'h' and destination node feature 'h'. g.apply_edges(fn.u_dot_v("h", "h", "score")) # u_dot_v returns a 1-element vector for each edge so you need to squeeze it. return g.edata["score"][:, 0] ###################################################################### # You can also write your own function if it is complex. # For instance, the following module produces a scalar score on each edge # by concatenating the incident nodes’ features and passing it to an MLP. # class MLPPredictor(nn.Module): def __init__(self, h_feats): super().__init__() self.W1 = nn.Linear(h_feats * 2, h_feats) self.W2 = nn.Linear(h_feats, 1) def apply_edges(self, edges): """ Computes a scalar score for each edge of the given graph. Parameters ---------- edges : Has three members ``src``, ``dst`` and ``data``, each of which is a dictionary representing the features of the source nodes, the destination nodes, and the edges themselves. Returns ------- dict A dictionary of new edge features. """ h = torch.cat([edges.src["h"], edges.dst["h"]], 1) return {"score": self.W2(F.relu(self.W1(h))).squeeze(1)} def forward(self, g, h): with g.local_scope(): g.ndata["h"] = h g.apply_edges(self.apply_edges) return g.edata["score"] ###################################################################### # .. note:: # # The builtin functions are optimized for both speed and memory. # We recommend using builtin functions whenever possible. # # .. note:: # # If you have read the :doc:`message passing # tutorial <3_message_passing>`, you will notice that the # argument ``apply_edges`` takes has exactly the same form as a message # function in ``update_all``. # ###################################################################### # Training loop # ------------- # # After you defined the node representation computation and the edge score # computation, you can go ahead and define the overall model, loss # function, and evaluation metric. # # The loss function is simply binary cross entropy loss. # # .. math:: # # # \mathcal{L} = -\sum_{u\sim v\in \mathcal{D}}\left( y_{u\sim v}\log(\hat{y}_{u\sim v}) + (1-y_{u\sim v})\log(1-\hat{y}_{u\sim v})) \right) # # The evaluation metric in this tutorial is AUC. # model = GraphSAGE(train_g.ndata["feat"].shape[1], 16) # You can replace DotPredictor with MLPPredictor. # pred = MLPPredictor(16) pred = DotPredictor() def compute_loss(pos_score, neg_score): scores = torch.cat([pos_score, neg_score]) labels = torch.cat( [torch.ones(pos_score.shape[0]), torch.zeros(neg_score.shape[0])] ) return F.binary_cross_entropy_with_logits(scores, labels) def compute_auc(pos_score, neg_score): scores = torch.cat([pos_score, neg_score]).numpy() labels = torch.cat( [torch.ones(pos_score.shape[0]), torch.zeros(neg_score.shape[0])] ).numpy() return roc_auc_score(labels, scores) ###################################################################### # The training loop goes as follows: # # .. note:: # # This tutorial does not include evaluation on a validation # set. In practice you should save and evaluate the best model based on # performance on the validation set. # # ----------- 3. set up loss and optimizer -------------- # # in this case, loss will in training loop optimizer = torch.optim.Adam( itertools.chain(model.parameters(), pred.parameters()), lr=0.01 ) # ----------- 4. training -------------------------------- # all_logits = [] for e in range(100): # forward h = model(train_g, train_g.ndata["feat"]) pos_score = pred(train_pos_g, h) neg_score = pred(train_neg_g, h) loss = compute_loss(pos_score, neg_score) # backward optimizer.zero_grad() loss.backward() optimizer.step() if e % 5 == 0: print("In epoch {}, loss: {}".format(e, loss)) # ----------- 5. check results ------------------------ # from sklearn.metrics import roc_auc_score with torch.no_grad(): pos_score = pred(test_pos_g, h) neg_score = pred(test_neg_g, h) print("AUC", compute_auc(pos_score, neg_score)) # Thumbnail credits: Link Prediction with Neo4j, Mark Needham # sphinx_gallery_thumbnail_path = '_static/blitz_4_link_predict.png' ================================================ FILE: tutorials/blitz/5_graph_classification.py ================================================ """ Training a GNN for Graph Classification ======================================= By the end of this tutorial, you will be able to - Load a DGL-provided graph classification dataset. - Understand what *readout* function does. - Understand how to create and use a minibatch of graphs. - Build a GNN-based graph classification model. - Train and evaluate the model on a DGL-provided dataset. (Time estimate: 18 minutes) """ import os os.environ["DGLBACKEND"] = "pytorch" import dgl import dgl.data import torch import torch.nn as nn import torch.nn.functional as F ###################################################################### # Overview of Graph Classification with GNN # ----------------------------------------- # # Graph classification or regression requires a model to predict certain # graph-level properties of a single graph given its node and edge # features. Molecular property prediction is one particular application. # # This tutorial shows how to train a graph classification model for a # small dataset from the paper `How Powerful Are Graph Neural # Networks `__. # # Loading Data # ------------ # # Generate a synthetic dataset with 10000 graphs, ranging from 10 to 500 nodes. dataset = dgl.data.GINDataset("PROTEINS", self_loop=True) ###################################################################### # The dataset is a set of graphs, each with node features and a single # label. One can see the node feature dimensionality and the number of # possible graph categories of ``GINDataset`` objects in ``dim_nfeats`` # and ``gclasses`` attributes. # print("Node feature dimensionality:", dataset.dim_nfeats) print("Number of graph categories:", dataset.gclasses) from dgl.dataloading import GraphDataLoader ###################################################################### # Defining Data Loader # -------------------- # # A graph classification dataset usually contains two types of elements: a # set of graphs, and their graph-level labels. Similar to an image # classification task, when the dataset is large enough, we need to train # with mini-batches. When you train a model for image classification or # language modeling, you will use a ``DataLoader`` to iterate over the # dataset. In DGL, you can use the ``GraphDataLoader``. # # You can also use various dataset samplers provided in # `torch.utils.data.sampler `__. # For example, this tutorial creates a training ``GraphDataLoader`` and # test ``GraphDataLoader``, using ``SubsetRandomSampler`` to tell PyTorch # to sample from only a subset of the dataset. # from torch.utils.data.sampler import SubsetRandomSampler num_examples = len(dataset) num_train = int(num_examples * 0.8) train_sampler = SubsetRandomSampler(torch.arange(num_train)) test_sampler = SubsetRandomSampler(torch.arange(num_train, num_examples)) train_dataloader = GraphDataLoader( dataset, sampler=train_sampler, batch_size=5, drop_last=False ) test_dataloader = GraphDataLoader( dataset, sampler=test_sampler, batch_size=5, drop_last=False ) ###################################################################### # You can try to iterate over the created ``GraphDataLoader`` and see what it # gives: # it = iter(train_dataloader) batch = next(it) print(batch) ###################################################################### # As each element in ``dataset`` has a graph and a label, the # ``GraphDataLoader`` will return two objects for each iteration. The # first element is the batched graph, and the second element is simply a # label vector representing the category of each graph in the mini-batch. # Next, we’ll talked about the batched graph. # # A Batched Graph in DGL # ---------------------- # # In each mini-batch, the sampled graphs are combined into a single bigger # batched graph via ``dgl.batch``. The single bigger batched graph merges # all original graphs as separately connected components, with the node # and edge features concatenated. This bigger graph is also a ``DGLGraph`` # instance (so you can # still treat it as a normal ``DGLGraph`` object as in # `here <2_dglgraph.ipynb>`__). It however contains the information # necessary for recovering the original graphs, such as the number of # nodes and edges of each graph element. # batched_graph, labels = batch print( "Number of nodes for each graph element in the batch:", batched_graph.batch_num_nodes(), ) print( "Number of edges for each graph element in the batch:", batched_graph.batch_num_edges(), ) # Recover the original graph elements from the minibatch graphs = dgl.unbatch(batched_graph) print("The original graphs in the minibatch:") print(graphs) ###################################################################### # Define Model # ------------ # # This tutorial will build a two-layer `Graph Convolutional Network # (GCN) `__. Each of # its layer computes new node representations by aggregating neighbor # information. If you have gone through the # :doc:`introduction <1_introduction>`, you will notice two # differences: # # - Since the task is to predict a single category for the *entire graph* # instead of for every node, you will need to aggregate the # representations of all the nodes and potentially the edges to form a # graph-level representation. Such process is more commonly referred as # a *readout*. A simple choice is to average the node features of a # graph with ``dgl.mean_nodes()``. # # - The input graph to the model will be a batched graph yielded by the # ``GraphDataLoader``. The readout functions provided by DGL can handle # batched graphs so that they will return one representation for each # minibatch element. # from dgl.nn import GraphConv class GCN(nn.Module): def __init__(self, in_feats, h_feats, num_classes): super(GCN, self).__init__() self.conv1 = GraphConv(in_feats, h_feats) self.conv2 = GraphConv(h_feats, num_classes) def forward(self, g, in_feat): h = self.conv1(g, in_feat) h = F.relu(h) h = self.conv2(g, h) g.ndata["h"] = h return dgl.mean_nodes(g, "h") ###################################################################### # Training Loop # ------------- # # The training loop iterates over the training set with the # ``GraphDataLoader`` object and computes the gradients, just like # image classification or language modeling. # # Create the model with given dimensions model = GCN(dataset.dim_nfeats, 16, dataset.gclasses) optimizer = torch.optim.Adam(model.parameters(), lr=0.01) for epoch in range(20): for batched_graph, labels in train_dataloader: pred = model(batched_graph, batched_graph.ndata["attr"].float()) loss = F.cross_entropy(pred, labels) optimizer.zero_grad() loss.backward() optimizer.step() num_correct = 0 num_tests = 0 for batched_graph, labels in test_dataloader: pred = model(batched_graph, batched_graph.ndata["attr"].float()) num_correct += (pred.argmax(1) == labels).sum().item() num_tests += len(labels) print("Test accuracy:", num_correct / num_tests) ###################################################################### # What’s next # ----------- # # - See `GIN # example `__ # for an end-to-end graph classification model. # # Thumbnail credits: DGL # sphinx_gallery_thumbnail_path = '_static/blitz_5_graph_classification.png' ================================================ FILE: tutorials/blitz/6_load_data.py ================================================ [File too large to display: 8.4 KB] ================================================ FILE: tutorials/blitz/README.txt ================================================ A Blitz Introduction to DGL =========================== ================================================ FILE: tutorials/cpu/README.txt ================================================ Training on CPUs ========================= ================================================ FILE: tutorials/cpu/argo_tutorial.py ================================================ """ Improve Scalability on Multi-Core CPUs ===================================================== Graph Neural Network (GNN) training suffers from low scalability on multi-core CPUs. Specificially, the performance often caps at 16 cores, and no improvement is observed when applying more than 16 cores [#f1]_. ARGO is a runtime system that offers scalable performance. With ARGO enabled, we are able to scale over 64 cores, allowing ARGO to speedup GNN training (in terms of epoch time) by up to 4.30x and 3.32x on a Xeon 8380H and a Xeon 6430L, respectively [#f2]_. This chapter focus on how to setup ARGO to unleash the potential of multi-core CPUs to speedup GNN training. Installation ````````````````````````````` ARGO utilizes the scikit-optimize library for auto-tuning. Please install scikit-optimize to run ARGO: .. code-block:: shell conda install -c conda-forge "scikit-optimize>=0.9.0" or .. code-block:: shell pip install scikit-optimize>=0.9 Enabling ARGO on your own GNN program ``````````````````````````````````````````` In this section, we provide a step-by-step tutorial on how to enable ARGO on a DGL program. We use the *ogb_example.py* [#f3]_ as an example. .. note:: We also provide the complete example file *ogb_example_ARGO.py* [#f4]_ which followed the steps below to enable ARGO on *ogb_example.py*. Step 1 --------------------------- First, include all necessary packages on top of the file. Please place your file and *argo.py* [#f5]_ in the same directory. .. code-block:: python import os import torch.distributed as dist from torch.nn.parallel import DistributedDataParallel import torch.multiprocessing as mp from argo import ARGO Step 2 --------------------------- Setup PyTorch Distributed Data Parallel (DDP) 2.1. Add the initialization function on top of the training program, and wrap the ```model``` with the DDP wrapper .. code-block:: python def train(...): dist.init_process_group('gloo', rank=rank, world_size=world_size) # newly added model = SAGE(...) # original code model = DistributedDataParallel(model) # newly added ... 2.2. In the main program, add the following before launching the training function .. code-block:: python ... os.environ['MASTER_ADDR'] = '127.0.0.1' os.environ['MASTER_PORT'] = '29501' mp.set_start_method('fork', force=True) train(args, device, data) # original code for launching the training function Step 3 --------------------------- Enable ARGO by initializing the runtime system, and wrapping the training function .. code-block:: python runtime = ARGO(n_search = 15, epoch = args.num_epochs, batch_size = args.batch_size) # initialization runtime.run(train, args=(args, device, data)) # wrap the training function .. note:: ARGO takes three input parameters: number of searches *n_search*, number of epochs, and the mini-batch size. Increasing *n_search* potentially leads to a better configuration with less epoch time; however, searching itself also causes extra overhead. We recommend setting *n_search* from 15 to 45 for an optimal overall performance. Step 4 --------------------------- Modify the input of the training function, by directly adding ARGO parameters after the original inputs. This is the original function: .. code-block:: python def train(args, device, data): Add the following variables: *rank, world_size, comp_core, load_core, counter, b_size, ep* .. code-block:: python def train(args, device, data, rank, world_size, comp_core, load_core, counter, b_size, ep): Step 5 --------------------------- Modify the *dataloader* function in the training function .. code-block:: python dataloader = dgl.dataloading.DataLoader( g, train_nid, sampler, batch_size=b_size, # modified shuffle=True, drop_last=False, num_workers=len(load_core), # modified use_ddp = True) # newly added Step 6 --------------------------- Enable core-binding by adding *enable_cpu_affinity()* before the training for-loop, and also change the number of epochs into the variable *ep*: .. code-block:: python with dataloader.enable_cpu_affinity(loader_cores=load_core, compute_cores=comp_core): for epoch in range(ep): # change num_epochs to ep Step 7 --------------------------- Last step! Load the model before training and save it afterward. Original Program: .. code-block:: python with dataloader.enable_cpu_affinity(loader_cores=load_core, compute_cores=comp_core): for epoch in range(ep): ... # training operations Modified: .. code-block:: python PATH = "model.pt" if counter[0] != 0: checkpoint = th.load(PATH) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) epoch = checkpoint['epoch'] loss = checkpoint['loss'] with dataloader.enable_cpu_affinity(loader_cores=load_core, compute_cores=comp_core): for epoch in range(ep): ... # training operations dist.barrier() if rank == 0: th.save({'epoch': counter[0], 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': loss, }, PATH) Step 8 --------------------------- Done! You can now run your GNN program with ARGO enabled. .. code-block:: shell python .py .. rubric:: Footnotes .. [#f1] https://github.com/dmlc/dgl/blob/master/examples/pytorch/argo/argo_scale.png .. [#f2] https://arxiv.org/abs/2402.03671 .. [#f3] https://github.com/dmlc/dgl/blob/master/examples/pytorch/argo/ogb_example.py .. [#f4] https://github.com/dmlc/dgl/blob/master/examples/pytorch/argo/ogb_example_ARGO.py .. [#f5] https://github.com/dmlc/dgl/blob/master/examples/pytorch/argo/argo.py """ ================================================ FILE: tutorials/cpu/cpu_best_practises.py ================================================ [File too large to display: 4.2 KB] ================================================ FILE: tutorials/models/1_gnn/1_gcn.py ================================================ """ .. _model-gcn: Graph Convolutional Network ==================================== **Author:** `Qi Huang `_, `Minjie Wang `_, Yu Gai, Quan Gan, Zheng Zhang .. warning:: The tutorial aims at gaining insights into the paper, with code as a mean of explanation. The implementation thus is NOT optimized for running efficiency. For recommended implementation, please refer to the `official examples `_. This is a gentle introduction of using DGL to implement Graph Convolutional Networks (Kipf & Welling et al., `Semi-Supervised Classification with Graph Convolutional Networks `_). We explain what is under the hood of the :class:`~dgl.nn.GraphConv` module. The reader is expected to learn how to define a new GNN layer using DGL's message passing APIs. """ ############################################################################### # Model Overview # ------------------------------------------ # GCN from the perspective of message passing # ``````````````````````````````````````````````` # We describe a layer of graph convolutional neural network from a message # passing perspective; the math can be found `here `_. # It boils down to the following step, for each node :math:`u`: # # 1) Aggregate neighbors' representations :math:`h_{v}` to produce an # intermediate representation :math:`\hat{h}_u`. 2) Transform the aggregated # representation :math:`\hat{h}_{u}` with a linear projection followed by a # non-linearity: :math:`h_{u} = f(W_{u} \hat{h}_u)`. # # We will implement step 1 with DGL message passing, and step 2 by # PyTorch ``nn.Module``. # # GCN implementation with DGL # `````````````````````````````````````````` # We first define the message and reduce function as usual. Since the # aggregation on a node :math:`u` only involves summing over the neighbors' # representations :math:`h_v`, we can simply use builtin functions: import os os.environ["DGLBACKEND"] = "pytorch" import dgl import dgl.function as fn import torch as th import torch.nn as nn import torch.nn.functional as F from dgl import DGLGraph gcn_msg = fn.copy_u(u="h", out="m") gcn_reduce = fn.sum(msg="m", out="h") ############################################################################### # We then proceed to define the GCNLayer module. A GCNLayer essentially performs # message passing on all the nodes then applies a fully-connected layer. # # .. note:: # # This is showing how to implement a GCN from scratch. DGL provides a more # efficient :class:`builtin GCN layer module `. # class GCNLayer(nn.Module): def __init__(self, in_feats, out_feats): super(GCNLayer, self).__init__() self.linear = nn.Linear(in_feats, out_feats) def forward(self, g, feature): # Creating a local scope so that all the stored ndata and edata # (such as the `'h'` ndata below) are automatically popped out # when the scope exits. with g.local_scope(): g.ndata["h"] = feature g.update_all(gcn_msg, gcn_reduce) h = g.ndata["h"] return self.linear(h) ############################################################################### # The forward function is essentially the same as any other commonly seen NNs # model in PyTorch. We can initialize GCN like any ``nn.Module``. For example, # let's define a simple neural network consisting of two GCN layers. Suppose we # are training the classifier for the cora dataset (the input feature size is # 1433 and the number of classes is 7). The last GCN layer computes node embeddings, # so the last layer in general does not apply activation. class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.layer1 = GCNLayer(1433, 16) self.layer2 = GCNLayer(16, 7) def forward(self, g, features): x = F.relu(self.layer1(g, features)) x = self.layer2(g, x) return x net = Net() print(net) ############################################################################### # We load the cora dataset using DGL's built-in data module. from dgl.data import CoraGraphDataset def load_cora_data(): dataset = CoraGraphDataset() g = dataset[0] features = g.ndata["feat"] labels = g.ndata["label"] train_mask = g.ndata["train_mask"] test_mask = g.ndata["test_mask"] return g, features, labels, train_mask, test_mask ############################################################################### # When a model is trained, we can use the following method to evaluate # the performance of the model on the test dataset: def evaluate(model, g, features, labels, mask): model.eval() with th.no_grad(): logits = model(g, features) logits = logits[mask] labels = labels[mask] _, indices = th.max(logits, dim=1) correct = th.sum(indices == labels) return correct.item() * 1.0 / len(labels) ############################################################################### # We then train the network as follows: import time import numpy as np g, features, labels, train_mask, test_mask = load_cora_data() # Add edges between each node and itself to preserve old node representations g.add_edges(g.nodes(), g.nodes()) optimizer = th.optim.Adam(net.parameters(), lr=1e-2) dur = [] for epoch in range(50): if epoch >= 3: t0 = time.time() net.train() logits = net(g, features) logp = F.log_softmax(logits, 1) loss = F.nll_loss(logp[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() if epoch >= 3: dur.append(time.time() - t0) acc = evaluate(net, g, features, labels, test_mask) print( "Epoch {:05d} | Loss {:.4f} | Test Acc {:.4f} | Time(s) {:.4f}".format( epoch, loss.item(), acc, np.mean(dur) ) ) ############################################################################### # .. _math: # # GCN in one formula # ------------------ # Mathematically, the GCN model follows this formula: # # :math:`H^{(l+1)} = \sigma(\tilde{D}^{-\frac{1}{2}}\tilde{A}\tilde{D}^{-\frac{1}{2}}H^{(l)}W^{(l)})` # # Here, :math:`H^{(l)}` denotes the :math:`l^{th}` layer in the network, # :math:`\sigma` is the non-linearity, and :math:`W` is the weight matrix for # this layer. :math:`\tilde{D}` and :math:`\tilde{A}` are separately the degree # and adjacency matrices for the graph. With the superscript ~, we are referring # to the variant where we add additional edges between each node and itself to # preserve its old representation in graph convolutions. The shape of the input # :math:`H^{(0)}` is :math:`N \times D`, where :math:`N` is the number of nodes # and :math:`D` is the number of input features. We can chain up multiple # layers as such to produce a node-level representation output with shape # :math:`N \times F`, where :math:`F` is the dimension of the output node # feature vector. # # The equation can be efficiently implemented using sparse matrix # multiplication kernels (such as Kipf's # `pygcn `_ code). The above DGL implementation # in fact has already used this trick due to the use of builtin functions. # # Note that the tutorial code implements a simplified version of GCN where we # replace :math:`\tilde{D}^{-\frac{1}{2}}\tilde{A}\tilde{D}^{-\frac{1}{2}}` with # :math:`\tilde{A}`. For a full implementation, see our example # `here `_. ================================================ FILE: tutorials/models/1_gnn/4_rgcn.py ================================================ [File too large to display: 14.6 KB] ================================================ FILE: tutorials/models/1_gnn/6_line_graph.py ================================================ [File too large to display: 24.9 KB] ================================================ FILE: tutorials/models/1_gnn/9_gat.py ================================================ [File too large to display: 20.2 KB] ================================================ FILE: tutorials/models/1_gnn/README.txt ================================================ [File too large to display: 1.9 KB] ================================================ FILE: tutorials/models/2_small_graph/3_tree-lstm.py ================================================ [File too large to display: 14.2 KB] ================================================ FILE: tutorials/models/2_small_graph/README.txt ================================================ .. _tutorials2-index: Batching many small graphs ------------------------------- * **Tree-LSTM** `[paper] `__ `[tutorial] <2_small_graph/3_tree-lstm.html>`__ `[PyTorch code] `__: Sentences have inherent structures that are thrown away by treating them simply as sequences. Tree-LSTM is a powerful model that learns the representation by using prior syntactic structures such as a parse-tree. The challenge in training is that simply by padding a sentence to the maximum length no longer works. Trees of different sentences have different sizes and topologies. DGL solves this problem by adding the trees to a bigger container graph, and then using message-passing to explore maximum parallelism. Batching is a key API for this. ================================================ FILE: tutorials/models/3_generative_model/5_dgmg.py ================================================ """ .. _model-dgmg: Generative Models of Graphs =========================================== **Author**: `Mufei Li `_, `Lingfan Yu `_, Zheng Zhang .. warning:: The tutorial aims at gaining insights into the paper, with code as a mean of explanation. The implementation thus is NOT optimized for running efficiency. For recommended implementation, please refer to the `official examples `_. """ ############################################################################## # # In this tutorial, you learn how to train and generate one graph at # a time. You also explore parallelism within the graph embedding operation, which is an # essential building block. The tutorial ends with a simple optimization that # delivers double the speed by batching across graphs. # # Earlier tutorials showed how embedding a graph or # a node enables you to work on tasks such as `semi-supervised classification for nodes # `__ # or `sentiment analysis # `__. # Wouldn't it be interesting to predict the future evolution of the graph and # perform the analysis iteratively? # # To address the evolution of the graphs, you generate a variety of graph samples. In other words, you need # **generative models** of graphs. In-addition to learning # node and edge features, you would need to model the distribution of arbitrary graphs. # While general generative models can model the density function explicitly and # implicitly and generate samples at once or sequentially, you only focus # on explicit generative models for sequential generation here. Typical applications # include drug or materials discovery, chemical processes, or proteomics. # # Introduction # -------------------- # The primitive actions of mutating a graph in Deep Graph Library (DGL) are nothing more than ``add_nodes`` # and ``add_edges``. That is, if you were to draw a circle of three nodes, # # .. figure:: https://user-images.githubusercontent.com/19576924/48313438-78baf000-e5f7-11e8-931e-cd00ab34fa50.gif # :alt: # # you can write the code as follows. # import os os.environ["DGLBACKEND"] = "pytorch" import dgl g = dgl.DGLGraph() g.add_nodes(1) # Add node 0 g.add_nodes(1) # Add node 1 # Edges in DGLGraph are directed by default. # For undirected edges, add edges for both directions. g.add_edges([1, 0], [0, 1]) # Add edges (1, 0), (0, 1) g.add_nodes(1) # Add node 2 g.add_edges([2, 1], [1, 2]) # Add edges (2, 1), (1, 2) g.add_edges([2, 0], [0, 2]) # Add edges (2, 0), (0, 2) ####################################################################################### # Real-world graphs are much more complex. There are many families of graphs, # with different sizes, topologies, node types, edge types, and the possibility # of multigraphs. Besides, a same graph can be generated in many different # orders. Regardless, the generative process entails a few steps. # # - Encode a changing graph. # - Perform actions stochastically. # - If you are training, collect error signals and optimize the model parameters. # # When it comes to implementation, another important aspect is speed. How do you # parallelize the computation, given that generating a graph is fundamentally a # sequential process? # # .. note:: # # To be sure, this is not necessarily a hard constraint. Subgraphs can be # built in parallel and then get assembled. But we # will restrict ourselves to the sequential processes for this tutorial. # # # DGMG: The main flow # -------------------- # For this tutorial, you use # `Deep Generative Models of Graphs `__ # ) (DGMG) to implement a graph generative model using DGL. Its algorithmic # framework is general but also challenging to parallelize. # # .. note:: # # While it's possible for DGMG to handle complex graphs with typed nodes, # typed edges, and multigraphs, here you use a simplified version of it # for generating graph topologies. # # DGMG generates a graph by following a state machine, which is basically a # two-level loop. Generate one node at a time and connect it to a subset of # the existing nodes, one at a time. This is similar to language modeling. The # generative process is an iterative one that emits one word or character or sentence # at a time, conditioned on the sequence generated so far. # # At each time step, you either: # - Add a new node to the graph # - Select two existing nodes and add an edge between them # # .. figure:: https://user-images.githubusercontent.com/19576924/48605003-7f11e900-e9b6-11e8-8880-87362348e154.png # :alt: # # The Python code will look as follows. In fact, this is *exactly* how inference # with DGMG is implemented in DGL. # def forward_inference(self): stop = self.add_node_and_update() while (not stop) and (self.g.num_nodes() < self.v_max + 1): num_trials = 0 to_add_edge = self.add_edge_or_not() while to_add_edge and (num_trials < self.g.num_nodes() - 1): self.choose_dest_and_update() num_trials += 1 to_add_edge = self.add_edge_or_not() stop = self.add_node_and_update() return self.g ####################################################################################### # Assume you have a pre-trained model for generating cycles of nodes 10-20. # How does it generate a cycle on-the-fly during inference? Use the code below # to create an animation with your own model. # # :: # # import torch # import matplotlib.animation as animation # import matplotlib.pyplot as plt # import networkx as nx # from copy import deepcopy # # if __name__ == '__main__': # # pre-trained model saved with path ./model.pth # model = torch.load('./model.pth') # model.eval() # g = model() # # src_list = g.edges()[1] # dest_list = g.edges()[0] # # evolution = [] # # nx_g = nx.Graph() # evolution.append(deepcopy(nx_g)) # # for i in range(0, len(src_list), 2): # src = src_list[i].item() # dest = dest_list[i].item() # if src not in nx_g.nodes(): # nx_g.add_node(src) # evolution.append(deepcopy(nx_g)) # if dest not in nx_g.nodes(): # nx_g.add_node(dest) # evolution.append(deepcopy(nx_g)) # nx_g.add_edges_from([(src, dest), (dest, src)]) # evolution.append(deepcopy(nx_g)) # # def animate(i): # ax.cla() # g_t = evolution[i] # nx.draw_circular(g_t, with_labels=True, ax=ax, # node_color=['#FEBD69'] * g_t.num_nodes()) # # fig, ax = plt.subplots() # ani = animation.FuncAnimation(fig, animate, # frames=len(evolution), # interval=600) # # .. figure:: https://user-images.githubusercontent.com/19576924/48928548-2644d200-ef1b-11e8-8591-da93345382ad.gif # :alt: # # DGMG: Optimization objective # ------------------------------ # Similar to language modeling, DGMG trains the model with *behavior cloning*, # or *teacher forcing*. Assume for each graph there exists a sequence of # *oracle actions* :math:`a_{1},\cdots,a_{T}` that generates it. What the model # does is to follow these actions, compute the joint probabilities of such # action sequences, and maximize them. # # By chain rule, the probability of taking :math:`a_{1},\cdots,a_{T}` is: # # .. math:: # # p(a_{1},\cdots, a_{T}) = p(a_{1})p(a_{2}|a_{1})\cdots p(a_{T}|a_{1},\cdots,a_{T-1}).\\ # # The optimization objective is then simply the typical MLE loss: # # .. math:: # # -\log p(a_{1},\cdots,a_{T})=-\sum_{t=1}^{T}\log p(a_{t}|a_{1},\cdots, a_{t-1}).\\ # def forward_train(self, actions): """ - actions: list - Contains a_1, ..., a_T described above - self.prepare_for_train() - Initializes self.action_step to be 0, which will get incremented by 1 every time it is called. - Initializes objects recording log p(a_t|a_1,...a_{t-1}) Returns ------- - self.get_log_prob(): log p(a_1, ..., a_T) """ self.prepare_for_train() stop = self.add_node_and_update(a=actions[self.action_step]) while not stop: to_add_edge = self.add_edge_or_not(a=actions[self.action_step]) while to_add_edge: self.choose_dest_and_update(a=actions[self.action_step]) to_add_edge = self.add_edge_or_not(a=actions[self.action_step]) stop = self.add_node_and_update(a=actions[self.action_step]) return self.get_log_prob() ####################################################################################### # The key difference between ``forward_train`` and ``forward_inference`` is # that the training process takes oracle actions as input and returns log # probabilities for evaluating the loss. # # DGMG: The implementation # -------------------------- # The ``DGMG`` class # `````````````````````````` # Below you can find the skeleton code for the model. You gradually # fill in the details for each function. # import torch.nn as nn class DGMGSkeleton(nn.Module): def __init__(self, v_max): """ Parameters ---------- v_max: int Max number of nodes considered """ super(DGMGSkeleton, self).__init__() # Graph configuration self.v_max = v_max def add_node_and_update(self, a=None): """Decide if to add a new node. If a new node should be added, update the graph.""" return NotImplementedError def add_edge_or_not(self, a=None): """Decide if a new edge should be added.""" return NotImplementedError def choose_dest_and_update(self, a=None): """Choose destination and connect it to the latest node. Add edges for both directions and update the graph.""" return NotImplementedError def forward_train(self, actions): """Forward at training time. It records the probability of generating a ground truth graph following the actions.""" return NotImplementedError def forward_inference(self): """Forward at inference time. It generates graphs on the fly.""" return NotImplementedError def forward(self, actions=None): # The graph you will work on self.g = dgl.DGLGraph() # If there are some features for nodes and edges, # zero tensors will be set for those of new nodes and edges. self.g.set_n_initializer(dgl.frame.zero_initializer) self.g.set_e_initializer(dgl.frame.zero_initializer) if self.training: return self.forward_train(actions=actions) else: return self.forward_inference() ####################################################################################### # Encoding a dynamic graph # `````````````````````````` # All the actions generating a graph are sampled from probability # distributions. In order to do that, you project the structured data, # namely the graph, onto an Euclidean space. The challenge is that such # process, called *embedding*, needs to be repeated as the graphs mutate. # # Graph embedding # '''''''''''''''''''''''''' # Let :math:`G=(V,E)` be an arbitrary graph. Each node :math:`v` has an # embedding vector :math:`\textbf{h}_{v} \in \mathbb{R}^{n}`. Similarly, # the graph has an embedding vector :math:`\textbf{h}_{G} \in \mathbb{R}^{k}`. # Typically, :math:`k > n` since a graph contains more information than # an individual node. # # The graph embedding is a weighted sum of node embeddings under a linear # transformation: # # .. math:: # # \textbf{h}_{G} =\sum_{v\in V}\text{Sigmoid}(g_m(\textbf{h}_{v}))f_{m}(\textbf{h}_{v}),\\ # # The first term, :math:`\text{Sigmoid}(g_m(\textbf{h}_{v}))`, computes a # gating function and can be thought of as how much the overall graph embedding # attends on each node. The second term :math:`f_{m}:\mathbb{R}^{n}\rightarrow\mathbb{R}^{k}` # maps the node embeddings to the space of graph embeddings. # # Implement graph embedding as a ``GraphEmbed`` class. # import torch class GraphEmbed(nn.Module): def __init__(self, node_hidden_size): super(GraphEmbed, self).__init__() # Setting from the paper self.graph_hidden_size = 2 * node_hidden_size # Embed graphs self.node_gating = nn.Sequential( nn.Linear(node_hidden_size, 1), nn.Sigmoid() ) self.node_to_graph = nn.Linear(node_hidden_size, self.graph_hidden_size) def forward(self, g): if g.num_nodes() == 0: return torch.zeros(1, self.graph_hidden_size) else: # Node features are stored as hv in ndata. hvs = g.ndata["hv"] return (self.node_gating(hvs) * self.node_to_graph(hvs)).sum( 0, keepdim=True ) ####################################################################################### # Update node embeddings via graph propagation # ''''''''''''''''''''''''''''''''''''''''''''' # # The mechanism of updating node embeddings in DGMG is similar to that for # graph convolutional networks. For a node :math:`v` in the graph, its # neighbor :math:`u` sends a message to it with # # .. math:: # # \textbf{m}_{u\rightarrow v}=\textbf{W}_{m}\text{concat}([\textbf{h}_{v}, \textbf{h}_{u}, \textbf{x}_{u, v}]) + \textbf{b}_{m},\\ # # where :math:`\textbf{x}_{u,v}` is the embedding of the edge between # :math:`u` and :math:`v`. # # After receiving messages from all its neighbors, :math:`v` summarizes them # with a node activation vector # # .. math:: # # \textbf{a}_{v} = \sum_{u: (u, v)\in E}\textbf{m}_{u\rightarrow v}\\ # # and use this information to update its own feature: # # .. math:: # # \textbf{h}'_{v} = \textbf{GRU}(\textbf{h}_{v}, \textbf{a}_{v}).\\ # # Performing all the operations above once for all nodes synchronously is # called one round of graph propagation. The more rounds of graph propagation # you perform, the longer distance messages travel throughout the graph. # # With DGL, you implement graph propagation with ``g.update_all``. # The message notation here can be a bit confusing. Researchers can refer # to :math:`\textbf{m}_{u\rightarrow v}` as messages, however the message function # below only passes :math:`\text{concat}([\textbf{h}_{u}, \textbf{x}_{u, v}])`. # The operation :math:`\textbf{W}_{m}\text{concat}([\textbf{h}_{v}, \textbf{h}_{u}, \textbf{x}_{u, v}]) + \textbf{b}_{m}` # is then performed across all edges at once for efficiency consideration. # from functools import partial class GraphProp(nn.Module): def __init__(self, num_prop_rounds, node_hidden_size): super(GraphProp, self).__init__() self.num_prop_rounds = num_prop_rounds # Setting from the paper self.node_activation_hidden_size = 2 * node_hidden_size message_funcs = [] node_update_funcs = [] self.reduce_funcs = [] for t in range(num_prop_rounds): # input being [hv, hu, xuv] message_funcs.append( nn.Linear( 2 * node_hidden_size + 1, self.node_activation_hidden_size ) ) self.reduce_funcs.append(partial(self.dgmg_reduce, round=t)) node_update_funcs.append( nn.GRUCell(self.node_activation_hidden_size, node_hidden_size) ) self.message_funcs = nn.ModuleList(message_funcs) self.node_update_funcs = nn.ModuleList(node_update_funcs) def dgmg_msg(self, edges): """For an edge u->v, return concat([h_u, x_uv])""" return {"m": torch.cat([edges.src["hv"], edges.data["he"]], dim=1)} def dgmg_reduce(self, nodes, round): hv_old = nodes.data["hv"] m = nodes.mailbox["m"] message = torch.cat( [hv_old.unsqueeze(1).expand(-1, m.size(1), -1), m], dim=2 ) node_activation = (self.message_funcs[round](message)).sum(1) return {"a": node_activation} def forward(self, g): if g.num_edges() > 0: for t in range(self.num_prop_rounds): g.update_all( message_func=self.dgmg_msg, reduce_func=self.reduce_funcs[t] ) g.ndata["hv"] = self.node_update_funcs[t]( g.ndata["a"], g.ndata["hv"] ) ####################################################################################### # Actions # `````````````````````````` # All actions are sampled from distributions parameterized using neural networks # and here they are in turn. # # Action 1: Add nodes # '''''''''''''''''''''''''' # # Given the graph embedding vector :math:`\textbf{h}_{G}`, evaluate # # .. math:: # # \text{Sigmoid}(\textbf{W}_{\text{add node}}\textbf{h}_{G}+b_{\text{add node}}),\\ # # which is then used to parametrize a Bernoulli distribution for deciding whether # to add a new node. # # If a new node is to be added, initialize its feature with # # .. math:: # # \textbf{W}_{\text{init}}\text{concat}([\textbf{h}_{\text{init}} , \textbf{h}_{G}])+\textbf{b}_{\text{init}},\\ # # where :math:`\textbf{h}_{\text{init}}` is a learnable embedding module for # untyped nodes. # import torch.nn.functional as F from torch.distributions import Bernoulli def bernoulli_action_log_prob(logit, action): """Calculate the log p of an action with respect to a Bernoulli distribution. Use logit rather than prob for numerical stability.""" if action == 0: return F.logsigmoid(-logit) else: return F.logsigmoid(logit) class AddNode(nn.Module): def __init__(self, graph_embed_func, node_hidden_size): super(AddNode, self).__init__() self.graph_op = {"embed": graph_embed_func} self.stop = 1 self.add_node = nn.Linear(graph_embed_func.graph_hidden_size, 1) # If to add a node, initialize its hv self.node_type_embed = nn.Embedding(1, node_hidden_size) self.initialize_hv = nn.Linear( node_hidden_size + graph_embed_func.graph_hidden_size, node_hidden_size, ) self.init_node_activation = torch.zeros(1, 2 * node_hidden_size) def _initialize_node_repr(self, g, node_type, graph_embed): """Whenver a node is added, initialize its representation.""" num_nodes = g.num_nodes() hv_init = self.initialize_hv( torch.cat( [ self.node_type_embed(torch.LongTensor([node_type])), graph_embed, ], dim=1, ) ) g.nodes[num_nodes - 1].data["hv"] = hv_init g.nodes[num_nodes - 1].data["a"] = self.init_node_activation def prepare_training(self): self.log_prob = [] def forward(self, g, action=None): graph_embed = self.graph_op["embed"](g) logit = self.add_node(graph_embed) prob = torch.sigmoid(logit) if not self.training: action = Bernoulli(prob).sample().item() stop = bool(action == self.stop) if not stop: g.add_nodes(1) self._initialize_node_repr(g, action, graph_embed) if self.training: sample_log_prob = bernoulli_action_log_prob(logit, action) self.log_prob.append(sample_log_prob) return stop ####################################################################################### # Action 2: Add edges # '''''''''''''''''''''''''' # # Given the graph embedding vector :math:`\textbf{h}_{G}` and the node # embedding vector :math:`\textbf{h}_{v}` for the latest node :math:`v`, # you evaluate # # .. math:: # # \text{Sigmoid}(\textbf{W}_{\text{add edge}}\text{concat}([\textbf{h}_{G}, \textbf{h}_{v}])+b_{\text{add edge}}),\\ # # which is then used to parametrize a Bernoulli distribution for deciding # whether to add a new edge starting from :math:`v`. # class AddEdge(nn.Module): def __init__(self, graph_embed_func, node_hidden_size): super(AddEdge, self).__init__() self.graph_op = {"embed": graph_embed_func} self.add_edge = nn.Linear( graph_embed_func.graph_hidden_size + node_hidden_size, 1 ) def prepare_training(self): self.log_prob = [] def forward(self, g, action=None): graph_embed = self.graph_op["embed"](g) src_embed = g.nodes[g.num_nodes() - 1].data["hv"] logit = self.add_edge(torch.cat([graph_embed, src_embed], dim=1)) prob = torch.sigmoid(logit) if self.training: sample_log_prob = bernoulli_action_log_prob(logit, action) self.log_prob.append(sample_log_prob) else: action = Bernoulli(prob).sample().item() to_add_edge = bool(action == 0) return to_add_edge ####################################################################################### # Action 3: Choose a destination # ''''''''''''''''''''''''''''''''' # # When action 2 returns `True`, choose a destination for the # latest node :math:`v`. # # For each possible destination :math:`u\in\{0, \cdots, v-1\}`, the # probability of choosing it is given by # # .. math:: # # \frac{\text{exp}(\textbf{W}_{\text{dest}}\text{concat}([\textbf{h}_{u}, \textbf{h}_{v}])+\textbf{b}_{\text{dest}})}{\sum_{i=0}^{v-1}\text{exp}(\textbf{W}_{\text{dest}}\text{concat}([\textbf{h}_{i}, \textbf{h}_{v}])+\textbf{b}_{\text{dest}})}\\ # from torch.distributions import Categorical class ChooseDestAndUpdate(nn.Module): def __init__(self, graph_prop_func, node_hidden_size): super(ChooseDestAndUpdate, self).__init__() self.graph_op = {"prop": graph_prop_func} self.choose_dest = nn.Linear(2 * node_hidden_size, 1) def _initialize_edge_repr(self, g, src_list, dest_list): # For untyped edges, only add 1 to indicate its existence. # For multiple edge types, use a one-hot representation # or an embedding module. edge_repr = torch.ones(len(src_list), 1) g.edges[src_list, dest_list].data["he"] = edge_repr def prepare_training(self): self.log_prob = [] def forward(self, g, dest): src = g.num_nodes() - 1 possible_dests = range(src) src_embed_expand = g.nodes[src].data["hv"].expand(src, -1) possible_dests_embed = g.nodes[possible_dests].data["hv"] dests_scores = self.choose_dest( torch.cat([possible_dests_embed, src_embed_expand], dim=1) ).view(1, -1) dests_probs = F.softmax(dests_scores, dim=1) if not self.training: dest = Categorical(dests_probs).sample().item() if not g.has_edges_between(src, dest): # For undirected graphs, add edges for both directions # so that you can perform graph propagation. src_list = [src, dest] dest_list = [dest, src] g.add_edges(src_list, dest_list) self._initialize_edge_repr(g, src_list, dest_list) self.graph_op["prop"](g) if self.training: if dests_probs.nelement() > 1: self.log_prob.append( F.log_softmax(dests_scores, dim=1)[:, dest : dest + 1] ) ####################################################################################### # Putting it together # `````````````````````````` # # You are now ready to have a complete implementation of the model class. # class DGMG(DGMGSkeleton): def __init__(self, v_max, node_hidden_size, num_prop_rounds): super(DGMG, self).__init__(v_max) # Graph embedding module self.graph_embed = GraphEmbed(node_hidden_size) # Graph propagation module self.graph_prop = GraphProp(num_prop_rounds, node_hidden_size) # Actions self.add_node_agent = AddNode(self.graph_embed, node_hidden_size) self.add_edge_agent = AddEdge(self.graph_embed, node_hidden_size) self.choose_dest_agent = ChooseDestAndUpdate( self.graph_prop, node_hidden_size ) # Forward functions self.forward_train = partial(forward_train, self=self) self.forward_inference = partial(forward_inference, self=self) @property def action_step(self): old_step_count = self.step_count self.step_count += 1 return old_step_count def prepare_for_train(self): self.step_count = 0 self.add_node_agent.prepare_training() self.add_edge_agent.prepare_training() self.choose_dest_agent.prepare_training() def add_node_and_update(self, a=None): """Decide if to add a new node. If a new node should be added, update the graph.""" return self.add_node_agent(self.g, a) def add_edge_or_not(self, a=None): """Decide if a new edge should be added.""" return self.add_edge_agent(self.g, a) def choose_dest_and_update(self, a=None): """Choose destination and connect it to the latest node. Add edges for both directions and update the graph.""" self.choose_dest_agent(self.g, a) def get_log_prob(self): add_node_log_p = torch.cat(self.add_node_agent.log_prob).sum() add_edge_log_p = torch.cat(self.add_edge_agent.log_prob).sum() choose_dest_log_p = torch.cat(self.choose_dest_agent.log_prob).sum() return add_node_log_p + add_edge_log_p + choose_dest_log_p ####################################################################################### # Below is an animation where a graph is generated on the fly # after every 10 batches of training for the first 400 batches. You # can see how the model improves over time and begins generating cycles. # # .. figure:: https://user-images.githubusercontent.com/19576924/48929291-60fe3880-ef22-11e8-832a-fbe56656559a.gif # :alt: # # For generative models, you can evaluate performance by checking the percentage # of valid graphs among the graphs it generates on the fly. import torch.utils.model_zoo as model_zoo # Download a pre-trained model state dict for generating cycles with 10-20 nodes. state_dict = model_zoo.load_url( "https://data.dgl.ai/model/dgmg_cycles-5a0c40be.pth" ) model = DGMG(v_max=20, node_hidden_size=16, num_prop_rounds=2) model.load_state_dict(state_dict) model.eval() def is_valid(g): # Check if g is a cycle having 10-20 nodes. def _get_previous(i, v_max): if i == 0: return v_max else: return i - 1 def _get_next(i, v_max): if i == v_max: return 0 else: return i + 1 size = g.num_nodes() if size < 10 or size > 20: return False for node in range(size): neighbors = g.successors(node) if len(neighbors) != 2: return False if _get_previous(node, size - 1) not in neighbors: return False if _get_next(node, size - 1) not in neighbors: return False return True num_valid = 0 for i in range(100): g = model() num_valid += is_valid(g) del model print("Among 100 graphs generated, {}% are valid.".format(num_valid)) ####################################################################################### # For the complete implementation, see the `DGL DGMG example # `__. # ================================================ FILE: tutorials/models/3_generative_model/README.txt ================================================ [File too large to display: 681 B] ================================================ FILE: tutorials/models/4_old_wines/2_capsule.py ================================================ [File too large to display: 10.0 KB] ================================================ FILE: tutorials/models/4_old_wines/7_transformer.py ================================================ """ .. _model-transformer: Transformer as a Graph Neural Network ====================================== **Author**: Zihao Ye, Jinjing Zhou, Qipeng Guo, Quan Gan, Zheng Zhang .. warning:: The tutorial aims at gaining insights into the paper, with code as a mean of explanation. The implementation thus is NOT optimized for running efficiency. For recommended implementation, please refer to the `official examples `_. """ ################################################################################################ # In this tutorial, you learn about a simplified implementation of the Transformer model. # You can see highlights of the most important design points. For instance, there is # only single-head attention. The complete code can be found # `here `__. # # The overall structure is similar to the one from the research papaer `Annotated # Transformer `__. # # The Transformer model, as a replacement of CNN/RNN architecture for # sequence modeling, was introduced in the research paper: `Attention is All # You Need `__. It improved the # state of the art for machine translation as well as natural language # inference task # (`GPT `__). # Recent work on pre-training Transformer with large scale corpus # (`BERT `__) supports that it is # capable of learning high-quality semantic representation. # # The interesting part of Transformer is its extensive employment of # attention. The classic use of attention comes from machine translation # model, where the output token attends to all input tokens. # # Transformer additionally applies *self-attention* in both decoder and # encoder. This process forces words relate to each other to combine # together, irrespective of their positions in the sequence. This is # different from RNN-based model, where words (in the source sentence) are # combined along the chain, which is thought to be too constrained. # # Attention layer of Transformer # ------------------------------ # # In the attention layer of Transformer, for each node the module learns to # assign weights on its in-coming edges. For node pair :math:`(i, j)` # (from :math:`i` to :math:`j`) with node # :math:`x_i, x_j \in \mathbb{R}^n`, the score of their connection is # defined as follows: # # .. math:: # # # q_j = W_q\cdot x_j \\ # k_i = W_k\cdot x_i\\ # v_i = W_v\cdot x_i\\ # \textrm{score} = q_j^T k_i # # where :math:`W_q, W_k, W_v \in \mathbb{R}^{n\times d_k}` map the # representations :math:`x` to “query”, “key”, and “value” space # respectively. # # There are other possibilities to implement the score function. The dot # product measures the similarity of a given query :math:`q_j` and a key # :math:`k_i`: if :math:`j` needs the information stored in :math:`i`, the # query vector at position :math:`j` (:math:`q_j`) is supposed to be close # to key vector at position :math:`i` (:math:`k_i`). # # The score is then used to compute the sum of the incoming values, # normalized over the weights of edges, stored in :math:`\textrm{wv}`. # Then apply an affine layer to :math:`\textrm{wv}` to get the output # :math:`o`: # # .. math:: # # # w_{ji} = \frac{\exp\{\textrm{score}_{ji} \}}{\sum\limits_{(k, i)\in E}\exp\{\textrm{score}_{ki} \}} \\ # \textrm{wv}_i = \sum_{(k, i)\in E} w_{ki} v_k \\ # o = W_o\cdot \textrm{wv} \\ # # Multi-head attention layer # ~~~~~~~~~~~~~~~~~~~~~~~~~~ # # In Transformer, attention is *multi-headed*. A head is very much like a # channel in a convolutional network. The multi-head attention consists of # multiple attention heads, in which each head refers to a single # attention module. :math:`\textrm{wv}^{(i)}` for all the heads are # concatenated and mapped to output :math:`o` with an affine layer: # # .. math:: # # # o = W_o \cdot \textrm{concat}\left(\left[\textrm{wv}^{(0)}, \textrm{wv}^{(1)}, \cdots, \textrm{wv}^{(h)}\right]\right) # # The code below wraps necessary components for multi-head attention, and # provides two interfaces. # # - ``get`` maps state ‘x’, to query, key and value, which is required by # following steps(\ ``propagate_attention``). # - ``get_o`` maps the updated value after attention to the output # :math:`o` for post-processing. # # .. code:: # # class MultiHeadAttention(nn.Module): # "Multi-Head Attention" # def __init__(self, h, dim_model): # "h: number of heads; dim_model: hidden dimension" # super(MultiHeadAttention, self).__init__() # self.d_k = dim_model // h # self.h = h # # W_q, W_k, W_v, W_o # self.linears = clones(nn.Linear(dim_model, dim_model), 4) # # def get(self, x, fields='qkv'): # "Return a dict of queries / keys / values." # batch_size = x.shape[0] # ret = {} # if 'q' in fields: # ret['q'] = self.linears[0](x).view(batch_size, self.h, self.d_k) # if 'k' in fields: # ret['k'] = self.linears[1](x).view(batch_size, self.h, self.d_k) # if 'v' in fields: # ret['v'] = self.linears[2](x).view(batch_size, self.h, self.d_k) # return ret # # def get_o(self, x): # "get output of the multi-head attention" # batch_size = x.shape[0] # return self.linears[3](x.view(batch_size, -1)) # # # How DGL implements Transformer with a graph neural network # ---------------------------------------------------------- # # You get a different perspective of Transformer by treating the # attention as edges in a graph and adopt message passing on the edges to # induce the appropriate processing. # # Graph structure # ~~~~~~~~~~~~~~~ # # Construct the graph by mapping tokens of the source and target # sentence to nodes. The complete Transformer graph is made up of three # subgraphs: # # **Source language graph**. This is a complete graph, each # token :math:`s_i` can attend to any other token :math:`s_j` (including # self-loops). |image0| # **Target language graph**. The graph is # half-complete, in that :math:`t_i` attends only to :math:`t_j` if # :math:`i > j` (an output token can not depend on future words). |image1| # **Cross-language graph**. This is a bi-partitie graph, where there is # an edge from every source token :math:`s_i` to every target token # :math:`t_j`, meaning every target token can attend on source tokens. # |image2| # # The full picture looks like this: |image3| # # Pre-build the graphs in dataset preparation stage. # # Message passing # ~~~~~~~~~~~~~~~ # # Once you define the graph structure, move on to defining the # computation for message passing. # # Assuming that you have already computed all the queries :math:`q_i`, keys # :math:`k_i` and values :math:`v_i`. For each node :math:`i` (no matter # whether it is a source token or target token), you can decompose the # attention computation into two steps: # # 1. **Message computation:** Compute attention score # :math:`\mathrm{score}_{ij}` between :math:`i` and all nodes :math:`j` # to be attended over, by taking the scaled-dot product between # :math:`q_i` and :math:`k_j`. The message sent from :math:`j` to # :math:`i` will consist of the score :math:`\mathrm{score}_{ij}` and # the value :math:`v_j`. # 2. **Message aggregation:** Aggregate the values :math:`v_j` from all # :math:`j` according to the scores :math:`\mathrm{score}_{ij}`. # # Simple implementation # ^^^^^^^^^^^^^^^^^^^^ # # Message computation # ''''''''''''''''''' # # Compute ``score`` and send source node’s ``v`` to destination’s mailbox # # .. code:: # # def message_func(edges): # return {'score': ((edges.src['k'] * edges.dst['q']) # .sum(-1, keepdim=True)), # 'v': edges.src['v']} # # Message aggregation # ''''''''''''''''''' # # Normalize over all in-edges and weighted sum to get output # # .. code:: # # import torch as th # import torch.nn.functional as F # # def reduce_func(nodes, d_k=64): # v = nodes.mailbox['v'] # att = F.softmax(nodes.mailbox['score'] / th.sqrt(d_k), 1) # return {'dx': (att * v).sum(1)} # # Execute on specific edges # ''''''''''''''''''''''''' # # .. code:: # # import functools.partial as partial # def naive_propagate_attention(self, g, eids): # g.send_and_recv(eids, message_func, partial(reduce_func, d_k=self.d_k)) # # Speeding up with built-in functions # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # # To speed up the message passing process, use DGL’s built-in # functions, including: # # - ``fn.src_mul_egdes(src_field, edges_field, out_field)`` multiplies # source’s attribute and edges attribute, and send the result to the # destination node’s mailbox keyed by ``out_field``. # - ``fn.copy_e(edges_field, out_field)`` copies edge’s attribute to # destination node’s mailbox. # - ``fn.sum(edges_field, out_field)`` sums up # edge’s attribute and sends aggregation to destination node’s mailbox. # # Here, you assemble those built-in functions into ``propagate_attention``, # which is also the main graph operation function in the final # implementation. To accelerate it, break the ``softmax`` operation into # the following steps. Recall that for each head there are two phases. # # 1. Compute attention score by multiply src node’s ``k`` and dst node’s # ``q`` # # - ``g.apply_edges(src_dot_dst('k', 'q', 'score'), eids)`` # # 2. Scaled Softmax over all dst nodes’ in-coming edges # # - Step 1: Exponentialize score with scale normalize constant # # - ``g.apply_edges(scaled_exp('score', np.sqrt(self.d_k)))`` # # .. math:: \textrm{score}_{ij}\leftarrow\exp{\left(\frac{\textrm{score}_{ij}}{ \sqrt{d_k}}\right)} # # - Step 2: Get the “values” on associated nodes weighted by “scores” # on in-coming edges of each node; get the sum of “scores” on # in-coming edges of each node for normalization. Note that here # :math:`\textrm{wv}` is not normalized. # # - ``msg: fn.u_mul_e('v', 'score', 'v'), reduce: fn.sum('v', 'wv')`` # # .. math:: \textrm{wv}_j=\sum_{i=1}^{N} \textrm{score}_{ij} \cdot v_i # # - ``msg: fn.copy_e('score', 'score'), reduce: fn.sum('score', 'z')`` # # .. math:: \textrm{z}_j=\sum_{i=1}^{N} \textrm{score}_{ij} # # The normalization of :math:`\textrm{wv}` is left to post processing. # # .. code:: # # def src_dot_dst(src_field, dst_field, out_field): # def func(edges): # return {out_field: (edges.src[src_field] * edges.dst[dst_field]).sum(-1, keepdim=True)} # # return func # # def scaled_exp(field, scale_constant): # def func(edges): # # clamp for softmax numerical stability # return {field: th.exp((edges.data[field] / scale_constant).clamp(-5, 5))} # # return func # # # def propagate_attention(self, g, eids): # # Compute attention score # g.apply_edges(src_dot_dst('k', 'q', 'score'), eids) # g.apply_edges(scaled_exp('score', np.sqrt(self.d_k))) # # Update node state # g.send_and_recv(eids, # [fn.u_mul_e('v', 'score', 'v'), fn.copy_e('score', 'score')], # [fn.sum('v', 'wv'), fn.sum('score', 'z')]) # # Preprocessing and postprocessing # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # In Transformer, data needs to be pre- and post-processed before and # after the ``propagate_attention`` function. # # **Preprocessing** The preprocessing function ``pre_func`` first # normalizes the node representations and then map them to a set of # queries, keys and values, using self-attention as an example: # # .. math:: # # # x \leftarrow \textrm{LayerNorm}(x) \\ # [q, k, v] \leftarrow [W_q, W_k, W_v ]\cdot x # # **Postprocessing** The postprocessing function ``post_funcs`` completes # the whole computation correspond to one layer of the transformer: 1. # Normalize :math:`\textrm{wv}` and get the output of Multi-Head Attention # Layer :math:`o`. # # .. math:: # # # \textrm{wv} \leftarrow \frac{\textrm{wv}}{z} \\ # o \leftarrow W_o\cdot \textrm{wv} + b_o # # add residual connection: # # .. math:: # # # x \leftarrow x + o # # 2. Applying a two layer position-wise feed forward layer on :math:`x` # then add residual connection: # # .. math:: # # # x \leftarrow x + \textrm{LayerNorm}(\textrm{FFN}(x)) # # where :math:`\textrm{FFN}` refers to the feed forward function. # # .. code:: # # class Encoder(nn.Module): # def __init__(self, layer, N): # super(Encoder, self).__init__() # self.N = N # self.layers = clones(layer, N) # self.norm = LayerNorm(layer.size) # # def pre_func(self, i, fields='qkv'): # layer = self.layers[i] # def func(nodes): # x = nodes.data['x'] # norm_x = layer.sublayer[0].norm(x) # return layer.self_attn.get(norm_x, fields=fields) # return func # # def post_func(self, i): # layer = self.layers[i] # def func(nodes): # x, wv, z = nodes.data['x'], nodes.data['wv'], nodes.data['z'] # o = layer.self_attn.get_o(wv / z) # x = x + layer.sublayer[0].dropout(o) # x = layer.sublayer[1](x, layer.feed_forward) # return {'x': x if i < self.N - 1 else self.norm(x)} # return func # # class Decoder(nn.Module): # def __init__(self, layer, N): # super(Decoder, self).__init__() # self.N = N # self.layers = clones(layer, N) # self.norm = LayerNorm(layer.size) # # def pre_func(self, i, fields='qkv', l=0): # layer = self.layers[i] # def func(nodes): # x = nodes.data['x'] # if fields == 'kv': # norm_x = x # In enc-dec attention, x has already been normalized. # else: # norm_x = layer.sublayer[l].norm(x) # return layer.self_attn.get(norm_x, fields) # return func # # def post_func(self, i, l=0): # layer = self.layers[i] # def func(nodes): # x, wv, z = nodes.data['x'], nodes.data['wv'], nodes.data['z'] # o = layer.self_attn.get_o(wv / z) # x = x + layer.sublayer[l].dropout(o) # if l == 1: # x = layer.sublayer[2](x, layer.feed_forward) # return {'x': x if i < self.N - 1 else self.norm(x)} # return func # # This completes all procedures of one layer of encoder and decoder in # Transformer. # # .. note:: # # The sublayer connection part is little bit different from the # original paper. However, this implementation is the same as `The Annotated # Transformer `__ # and # `OpenNMT `__. # # Main class of Transformer graph # ------------------------------- # # The processing flow of Transformer can be seen as a 2-stage # message-passing within the complete graph (adding pre- and post- # processing appropriately): 1) self-attention in encoder, 2) # self-attention in decoder followed by cross-attention between encoder # and decoder, as shown below. |image4| # # .. code:: python # # class Transformer(nn.Module): # def __init__(self, encoder, decoder, src_embed, tgt_embed, pos_enc, generator, h, d_k): # super(Transformer, self).__init__() # self.encoder, self.decoder = encoder, decoder # self.src_embed, self.tgt_embed = src_embed, tgt_embed # self.pos_enc = pos_enc # self.generator = generator # self.h, self.d_k = h, d_k # # def propagate_attention(self, g, eids): # # Compute attention score # g.apply_edges(src_dot_dst('k', 'q', 'score'), eids) # g.apply_edges(scaled_exp('score', np.sqrt(self.d_k))) # # Send weighted values to target nodes # g.send_and_recv(eids, # [fn.u_mul_e('v', 'score', 'v'), fn.copy_e('score', 'score')], # [fn.sum('v', 'wv'), fn.sum('score', 'z')]) # # def update_graph(self, g, eids, pre_pairs, post_pairs): # "Update the node states and edge states of the graph." # # # Pre-compute queries and key-value pairs. # for pre_func, nids in pre_pairs: # g.apply_nodes(pre_func, nids) # self.propagate_attention(g, eids) # # Further calculation after attention mechanism # for post_func, nids in post_pairs: # g.apply_nodes(post_func, nids) # # def forward(self, graph): # g = graph.g # nids, eids = graph.nids, graph.eids # # # Word Embedding and Position Embedding # src_embed, src_pos = self.src_embed(graph.src[0]), self.pos_enc(graph.src[1]) # tgt_embed, tgt_pos = self.tgt_embed(graph.tgt[0]), self.pos_enc(graph.tgt[1]) # g.nodes[nids['enc']].data['x'] = self.pos_enc.dropout(src_embed + src_pos) # g.nodes[nids['dec']].data['x'] = self.pos_enc.dropout(tgt_embed + tgt_pos) # # for i in range(self.encoder.N): # # Step 1: Encoder Self-attention # pre_func = self.encoder.pre_func(i, 'qkv') # post_func = self.encoder.post_func(i) # nodes, edges = nids['enc'], eids['ee'] # self.update_graph(g, edges, [(pre_func, nodes)], [(post_func, nodes)]) # # for i in range(self.decoder.N): # # Step 2: Dncoder Self-attention # pre_func = self.decoder.pre_func(i, 'qkv') # post_func = self.decoder.post_func(i) # nodes, edges = nids['dec'], eids['dd'] # self.update_graph(g, edges, [(pre_func, nodes)], [(post_func, nodes)]) # # Step 3: Encoder-Decoder attention # pre_q = self.decoder.pre_func(i, 'q', 1) # pre_kv = self.decoder.pre_func(i, 'kv', 1) # post_func = self.decoder.post_func(i, 1) # nodes_e, nodes_d, edges = nids['enc'], nids['dec'], eids['ed'] # self.update_graph(g, edges, [(pre_q, nodes_d), (pre_kv, nodes_e)], [(post_func, nodes_d)]) # # return self.generator(g.ndata['x'][nids['dec']]) # # # .. note:: # # By calling ``update_graph`` function, you can create your own # Transformer on any subgraphs with nearly the same code. This # flexibility enables us to discover new, sparse structures (c.f. local attention # mentioned `here `__). Note in this # implementation you don't use mask or padding, which makes the logic # more clear and saves memory. The trade-off is that the implementation is # slower. # # Training # -------- # # This tutorial does not cover several other techniques such as Label # Smoothing and Noam Optimizations mentioned in the original paper. For # detailed description about these modules, read `The # Annotated # Transformer `__ # written by Harvard NLP team. # # Task and the dataset # ~~~~~~~~~~~~~~~~~~~~ # # The Transformer is a general framework for a variety of NLP tasks. This tutorial focuses # on the sequence to sequence learning: it’s a typical case to illustrate how it works. # # As for the dataset, there are two example tasks: copy and sort, together # with two real-world translation tasks: multi30k en-de task and wmt14 # en-de task. # # - **copy dataset**: copy input sequences to output. (train/valid/test: # 9000, 1000, 1000) # - **sort dataset**: sort input sequences as output. (train/valid/test: # 9000, 1000, 1000) # - **Multi30k en-de**, translate sentences from En to De. # (train/valid/test: 29000, 1000, 1000) # - **WMT14 en-de**, translate sentences from En to De. # (Train/Valid/Test: 4500966/3000/3003) # # .. note:: # Training with wmt14 requires multi-GPU support and is not available. Contributions are welcome! # # Graph building # ~~~~~~~~~~~~~~ # # **Batching** This is similar to the way you handle Tree-LSTM. Build a graph pool in # advance, including all possible combination of input lengths and output # lengths. Then for each sample in a batch, call ``dgl.batch`` to batch # graphs of their sizes together in to a single large graph. # # You can wrap the process of creating graph pool and building # BatchedGraph in ``dataset.GraphPool`` and # ``dataset.TranslationDataset``. # # .. code:: python # # graph_pool = GraphPool() # # data_iter = dataset(graph_pool, mode='train', batch_size=1, devices=devices) # for graph in data_iter: # print(graph.nids['enc']) # encoder node ids # print(graph.nids['dec']) # decoder node ids # print(graph.eids['ee']) # encoder-encoder edge ids # print(graph.eids['ed']) # encoder-decoder edge ids # print(graph.eids['dd']) # decoder-decoder edge ids # print(graph.src[0]) # Input word index list # print(graph.src[1]) # Input positions # print(graph.tgt[0]) # Output word index list # print(graph.tgt[1]) # Ouptut positions # break # # Output: # # .. code:: # # tensor([0, 1, 2, 3, 4, 5, 6, 7, 8], device='cuda:0') # tensor([ 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], device='cuda:0') # tensor([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, # 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, # 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, # 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, # 72, 73, 74, 75, 76, 77, 78, 79, 80], device='cuda:0') # tensor([ 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, # 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, # 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, # 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, # 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, # 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, # 165, 166, 167, 168, 169, 170], device='cuda:0') # tensor([171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, # 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, # 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, # 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225], # device='cuda:0') # tensor([28, 25, 7, 26, 6, 4, 5, 9, 18], device='cuda:0') # tensor([0, 1, 2, 3, 4, 5, 6, 7, 8], device='cuda:0') # tensor([ 0, 28, 25, 7, 26, 6, 4, 5, 9, 18], device='cuda:0') # tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], device='cuda:0') # # Put it all together # ------------------- # # Train a one-head transformer with one layer, 128 dimension on copy # task. Set other parameters to the default. # # Inference module is not included in this tutorial. It # requires beam search. For a full implementation, see the `GitHub # repo `__. # # .. code:: python # # from tqdm.auto import tqdm # import torch as th # import numpy as np # # from loss import LabelSmoothing, SimpleLossCompute # from modules import make_model # from optims import NoamOpt # from dgl.contrib.transformer import get_dataset, GraphPool # # def run_epoch(data_iter, model, loss_compute, is_train=True): # for i, g in tqdm(enumerate(data_iter)): # with th.set_grad_enabled(is_train): # output = model(g) # loss = loss_compute(output, g.tgt_y, g.n_tokens) # print('average loss: {}'.format(loss_compute.avg_loss)) # print('accuracy: {}'.format(loss_compute.accuracy)) # # N = 1 # batch_size = 128 # devices = ['cuda' if th.cuda.is_available() else 'cpu'] # # dataset = get_dataset("copy") # V = dataset.vocab_size # criterion = LabelSmoothing(V, padding_idx=dataset.pad_id, smoothing=0.1) # dim_model = 128 # # # Create model # model = make_model(V, V, N=N, dim_model=128, dim_ff=128, h=1) # # # Sharing weights between Encoder & Decoder # model.src_embed.lut.weight = model.tgt_embed.lut.weight # model.generator.proj.weight = model.tgt_embed.lut.weight # # model, criterion = model.to(devices[0]), criterion.to(devices[0]) # model_opt = NoamOpt(dim_model, 1, 400, # th.optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.98), eps=1e-9)) # loss_compute = SimpleLossCompute # # att_maps = [] # for epoch in range(4): # train_iter = dataset(graph_pool, mode='train', batch_size=batch_size, devices=devices) # valid_iter = dataset(graph_pool, mode='valid', batch_size=batch_size, devices=devices) # print('Epoch: {} Training...'.format(epoch)) # model.train(True) # run_epoch(train_iter, model, # loss_compute(criterion, model_opt), is_train=True) # print('Epoch: {} Evaluating...'.format(epoch)) # model.att_weight_map = None # model.eval() # run_epoch(valid_iter, model, # loss_compute(criterion, None), is_train=False) # att_maps.append(model.att_weight_map) # # Visualization # ------------- # # After training, you can visualize the attention that the Transformer generates # on copy task. # # .. code:: python # # src_seq = dataset.get_seq_by_id(VIZ_IDX, mode='valid', field='src') # tgt_seq = dataset.get_seq_by_id(VIZ_IDX, mode='valid', field='tgt')[:-1] # # visualize head 0 of encoder-decoder attention # att_animation(att_maps, 'e2d', src_seq, tgt_seq, 0) # # |image5| from the figure you see the decoder nodes gradually learns to # attend to corresponding nodes in input sequence, which is the expected # behavior. # # Multi-head attention # ~~~~~~~~~~~~~~~~~~~~ # # Besides the attention of a one-head attention trained on toy task. We # also visualize the attention scores of Encoder’s Self Attention, # Decoder’s Self Attention and the Encoder-Decoder attention of an # one-Layer Transformer network trained on multi-30k dataset. # # From the visualization you see the diversity of different heads, which is what you would # expect. Different heads learn different relations between word pairs. # # - **Encoder Self-Attention** |image6| # # - **Encoder-Decoder Attention** Most words in target sequence attend on # their related words in source sequence, for example: when generating # “See” (in De), several heads attend on “lake”; when generating # “Eisfischerhütte”, several heads attend on “ice”. |image7| # # - **Decoder Self-Attention** Most words attend on their previous few # words. |image8| # # Adaptive Universal Transformer # ------------------------------ # # A recent research paper by Google, `Universal # Transformer `__, is an example to # show how ``update_graph`` adapts to more complex updating rules. # # The Universal Transformer was proposed to address the problem that # vanilla Transformer is not computationally universal by introducing # recurrence in Transformer: # # - The basic idea of Universal Transformer is to repeatedly revise its # representations of all symbols in the sequence with each recurrent # step by applying a Transformer layer on the representations. # - Compared to vanilla Transformer, Universal Transformer shares weights # among its layers, and it does not fix the recurrence time (which # means the number of layers in Transformer). # # A further optimization employs an `adaptive computation time # (ACT) `__ mechanism to allow the # model to dynamically adjust the number of times the representation of # each position in a sequence is revised (refereed to as **step** # hereafter). This model is also known as the Adaptive Universal # Transformer (AUT). # # In AUT, you maintain an active nodes list. In each step :math:`t`, we # compute a halting probability: :math:`h (0`__. # # The figure below shows the effect of Adaptive Computational # Time. Different positions of a sentence were revised different times. # # |image9| # # You can also visualize the dynamics of step distribution on nodes during the # training of AUT on sort task(reach 99.7% accuracy), which demonstrates # how AUT learns to reduce recurrence steps during training. |image10| # # .. |image0| image:: https://i.imgur.com/zV5LmTX.png # .. |image1| image:: https://i.imgur.com/dETQMMx.png # .. |image2| image:: https://i.imgur.com/hnGP229.png # .. |image3| image:: https://i.imgur.com/Hj2rRGT.png # .. |image4| image:: https://i.imgur.com/zlUpJ41.png # .. |image5| image:: https://s1.ax1x.com/2018/12/06/F126xI.gif # .. |image6| image:: https://i.imgur.com/HjYb7F2.png # .. |image7| image:: https://i.imgur.com/383J5O5.png # .. |image8| image:: https://i.imgur.com/c0UWB1V.png # .. |image9| image:: https://s1.ax1x.com/2018/12/06/F1sGod.png # .. |image10| image:: https://s1.ax1x.com/2018/12/06/F1r8Cq.gif # # .. note:: # The notebook itself is not executable due to many dependencies. # Download `7_transformer.py `__, # and copy the python script to directory ``examples/pytorch/transformer`` # then run ``python 7_transformer.py`` to see how it works. ================================================ FILE: tutorials/models/4_old_wines/README.txt ================================================ [File too large to display: 1.4 KB] ================================================ FILE: tutorials/models/README.txt ================================================ [File too large to display: 63 B] ================================================ FILE: tutorials/multi/1_graph_classification.py ================================================ [File too large to display: 8.8 KB] ================================================ FILE: tutorials/multi/2_node_classification.py ================================================ [File too large to display: 11.1 KB] ================================================ FILE: tutorials/multi/README.txt ================================================ [File too large to display: 52 B] ================================================ FILE: tutorials/requirements.txt ================================================ [File too large to display: 79 B]